/* UFS server routines for fs.defs interface
   Copyright (C) 1992 Free Software Foundation

This file is part of the GNU Hurd.

The GNU Hurd is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 1, or (at your option)
any later version.

The GNU Hurd is distributed in the hope that it will be useful, 
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with the GNU Hurd; see the file COPYING.  If not, write to
the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.  */

/* Written by Michael I. Bushnell. */

#include "ufs.h"
#include "fsdefs.h"
#include "ioudefs.h"
#include "iodefs.h"
#include "fsysudefs.h"
#include "inode.h"
#include "dinode.h"
#include "fs.h"
#include "dir.h"
#include <limits.h>
#include <unistd.h>
#include <stdlib.h>
#include <sys/file.h>
#include <sys/stat.h>
#include <errno.h>
#include <hurd/hurd_types.h>
#include <hurd/paths.h>
#include <string.h>
#include <stdio.h>

static error_t
create_and_link (struct inode *, char *, mode_t, struct inode **,
		 struct protid *, struct dirstat *);


/* Operations on regular files and directories */

error_t
file_exec (struct protid *cred,
	   task_t task,
	   int flags,
	   char *argv,
	   u_int argvlen,
	   char *envp,
	   u_int envplen,
	   mach_port_t *fds,
	   u_int fdslen,
	   mach_port_t *portarray,
	   u_int portarraylen,
	   int *intarray,
	   u_int intarraylen)
{
  return EOPNOTSUPP;
}

/* Fill STATBUF with stat information from IP.  */
error_t
do_stat (struct inode *ip,
	 io_statbuf_t *statbuf)
{
  error_t error;
  
  if (!(error = catch_exception ()))
    {
      statbuf->st_fstype = FSTYPE_UFS;
      statbuf->st_fsid.val[0] = 0;
      statbuf->st_fsid.val[1] = /* DEVICE! XXX */0;
      statbuf->st_ino = ip->i_number;
      statbuf->st_gen = ip->di->di_gen;
      statbuf->st_rdev = 0;
      statbuf->st_mode = DI_MODE(ip->di);
      statbuf->st_nlink = ip->di->di_nlink;
      statbuf->st_uid = DI_UID(ip->di);
      statbuf->st_gid = DI_GID(ip->di);
      statbuf->st_size = ip->di->di_size;
      statbuf->st_atime = ip->di->di_atime;
      statbuf->st_atime_usec = 0;
      statbuf->st_mtime = ip->di->di_mtime;
      statbuf->st_mtime_usec = 0;
      statbuf->st_ctime = ip->di->di_ctime;
      statbuf->st_ctime_usec = 0;
      statbuf->st_blksize = sblock->fs_bsize;
      statbuf->st_blocks = ip->di->di_blocks;
      statbuf->st_author = ip->di->di_author;
      end_catch_exception ();
    }
  return error;
}  

#define CHANGE_IP_FIELD(PROTID, OPERATION)				    \
({									    \
  error_t err;								    \
  struct inode *ip;							    \
  									    \
  if (!(PROTID))							    \
    return EOPNOTSUPP;							    \
  									    \
  if (readonly)								    \
    return EROFS;							    \
									    \
  ip = (PROTID)->po->ip;						    \
  									    \
  mutex_lock (&ip->i_toplock);						    \
   									    \
  err = catch_exception ();						    \
  if (!err)								    \
    {									    \
      (OPERATION);							    \
      end_catch_exception ();						    \
      inode_update (ip, 0);						    \
    }									    \
  mutex_unlock (&ip->i_toplock);					    \
  return err;								    \
})

/* Change the owner of the file referenced by CRED to UID, change the
   group to GID.  */
error_t
file_chown (struct protid *cred,
	    uid_t uid,
	    gid_t gid)
{
  CHANGE_IP_FIELD (cred,
		   ({
		     if (!isowner (ip, cred)
			 || ((!isuid (uid, cred)
			      || !groupmember (gid, cred))
			     && !isuid (0, cred)))
		       err = EPERM;
		     else
		       {
			 ip->di->di_uidh = uid >> 16;
			 ip->di->di_uidl = uid & 0xffff;
			 ip->di->di_gidh = gid >> 16;
			 ip->di->di_gidl = gid & 0xffff;
			 ip->di->di_ctime = time->seconds;
		       }
		   }));
}

/* Whan that Aprille... */
error_t
file_chauthor (struct protid *cred,
	       uid_t author)
{
  CHANGE_IP_FIELD (cred,
		   ({
		     if (!(err = isowner (ip, cred)))
		       {
			 ip->di->di_author = author;
			 ip->di->di_ctime = time->seconds;
		       }
		   }));
}

error_t
file_chmod (struct protid *cred,
	    mode_t mode)
{
  mode &= ~(IFMT | ISPARE);
  
  CHANGE_IP_FIELD (cred,
		   ({
		     if (!(err = isowner (ip, cred)))
		       {
			 if (!isuid (0, cred))
			   {
			     if ((DI_MODE (ip->di) & IFMT) != IFDIR)
			       mode &= ~ISVTX;
			     if (!groupmember (DI_GID (ip->di), cred))
			       mode &= ~ISGID;
			     if (!isuid (DI_UID (ip->di), cred))
			       mode &= ~ISUID;
			   }
			 mode |= ((DI_MODE (ip->di) & (IFMT | ISPARE)));
			 ip->di->di_model = mode & 0xffff;
			 ip->di->di_modeh = mode >> 16;
			 ip->di->di_ctime = time->seconds;
		       }
		   }));
}

error_t
file_chflags (struct protid *cred,
	      int flags)
{
  CHANGE_IP_FIELD (cred,
		   ({
		     if (!(err = isowner (ip, cred)))
		       {
			 ip->di->di_flags = flags;
			 ip->di->di_ctime = time->seconds;
		       }
		   }));
}

error_t
file_utimes (struct protid *cred,
	     time_value_t atime,
	     time_value_t mtime)
{
  CHANGE_IP_FIELD (cred,
		   ({
		     if (!(err = isowner (ip, cred)))
		       {
			 ip->di->di_atime = atime.seconds;
			 ip->di->di_mtime = mtime.seconds;
			 ip->di->di_ctime = time->seconds;
		       }
		   }));
}


error_t
file_seek (struct protid *cred,
	   int offset,
	   int whence,
	   int *newoffset)
{
  /* seek succeeds on read-only filesystems, so
     we do the following grotesqueness.  */
  
#define readonly 0

  CHANGE_IP_FIELD (cred,
		   ({
		     if (!(err = fs_get_it (ip)))
		       switch (whence)
			 {
			 case SEEK_SET:
			   cred->po->filepointer = offset;
			   break;
			 case SEEK_CUR:
			   cred->po->filepointer += offset;
			   break;
			 case SEEK_END:
			   cred->po->filepointer = ip->di->di_size + offset;
			   break;
			 default:
			   err = EINVAL;
			   break;
			 }
		     *newoffset = cred->po->filepointer;
		   }));
#undef readonly
}

error_t
file_truncate (struct protid *cred,
	       int size)
{
  CHANGE_IP_FIELD (cred,
		   ({
		     if (!(cred->po->openstat & FS_LOOKUP_WRITE))
		       err = EINVAL;
		     else
		       inode_truncate (ip, size);
		   }));
}

/* Apply an advisory lock as specified by FLAGS to CRED.  */
error_t
file_lock (struct protid *cred,
	   int flags)
{
  struct inode *ip;
  struct peropen *po;
  
  if (!cred)
    return EOPNOTSUPP;
  
  po = cred->po;
  ip = cred->po->ip;
  if (!(flags & (LOCK_UN|LOCK_EX|LOCK_SH)))
    return 0;
  
  if ((flags & LOCK_UN)
      && (flags & (LOCK_SH|LOCK_EX)))
    return EINVAL;

  if (flags & LOCK_EX)
    flags &= ~LOCK_SH;

  mutex_lock (&ip->i_toplock);
  if (flags & LOCK_UN)
    {
      if (po->flock_status == LOCK_UN)
	{
	  mutex_unlock (&ip->i_toplock);
	  return EBADF;
	}
      if (po->flock_status != ip->flock_type)
	panic ("flock");
      
      if (po->flock_status == LOCK_SH)
	{
	  if (!--ip->shlock_count)
	    ip->flock_type = LOCK_UN;
	}
      else if (po->flock_status == LOCK_EX)
	ip->flock_type = LOCK_UN;
      else
	panic ("flock 2");

      if (ip->flock_type == LOCK_UN && ip->needflock)
	{
	  ip->needflock = 0;
	  condition_broadcast (&ip->i_flockwait);
	}
      po->flock_status = LOCK_UN;
    }
  else
    {
      /* If we have an exclusive lock, release it */
      if (po->flock_status == LOCK_EX)
	{
	  po->flock_status = LOCK_UN;
	  ip->flock_type = LOCK_UN;
	  if (ip->needflock)
	    {
	      ip->needflock = 0;
	      condition_broadcast (&ip->i_flockwait);
	    }
	}
	  
      /* If there is an exclusive lock, wait for it to end */
      while (ip->flock_type == LOCK_EX)
	{
	  if (flags & LOCK_NB)
	    {
	      mutex_unlock (&ip->i_toplock);
	      return EWOULDBLOCK;
	    }
	  else
	    {
	      ip->needflock = 1;
	      condition_wait (&ip->i_flockwait, &ip->i_toplock);
	    }
	}

      /* It we have a shared lock, release it */
      if (po->flock_status == LOCK_SH)
	{
	  po->flock_status = LOCK_UN;
	  if (!--ip->shlock_count)
	    {
	      ip->flock_type = LOCK_UN;
	      if (ip->needflock)
		{
		  ip->needflock = 0;
		  condition_broadcast (&ip->i_flockwait);
		}
	    }
	}
      
      if (flags & LOCK_SH)
	{
	  if (ip->flock_type == LOCK_EX)
	    panic ("flock 3");

	  po->flock_status = LOCK_SH;
	  ip->flock_type = LOCK_SH;
	  ip->shlock_count++;
	}
      else if (flags & LOCK_EX)
	{
	  /* Wait for any shared locks to finish */
	  while (ip->flock_type == LOCK_SH)
	    {
	      if (flags & LOCK_NB)
		{
		  mutex_unlock (&ip->i_toplock);
		  return EWOULDBLOCK;
		}
	      else
		{
		  ip->needflock = 1;
		  condition_wait (&ip->i_flockwait, &ip->i_toplock);
		}
	    }	

	  if (ip->flock_type == LOCK_EX)
	    panic ("flock 4");
	  
	  ip->flock_type = LOCK_EX;
	  po->flock_status = LOCK_EX;
	}
      else
	panic ("flock 5");
    }
  return 0;
}

/* Find out the current lock held on the file, and what
   kind of lock we have.  */
error_t
file_lock_stat (struct protid *cred,
		int *mystat,
		int *allstat)
{
  struct inode *ip;
  
  if (!cred)
    return EOPNOTSUPP;
  ip = cred->po->ip;
  mutex_lock (&ip->i_toplock);
  *mystat = cred->po->flock_status;
  *allstat = ip->flock_type;
  mutex_unlock (&ip->i_toplock);
  return 0;
}


/* Return the control port for the filesystem in CONTROL.  */
error_t
file_getcontrol (struct protid *cred,
		 mach_port_t *control)
{
  int error = 0;;

  if (!cred)
    return EOPNOTSUPP;
  
  if (!isuid (0, cred))
    error = EPERM;
  else
    *control = ufs_control_port;

  return error;
}

/* Return filesystem statistics in STATBUF.  */
error_t
file_statfs (struct protid *file,
	     fsys_statfsbuf_t *statbuf)
{
  if (!file)
    return EOPNOTSUPP;
  
  statbuf->fsys_stb_type = FSTYPE_UFS;
  statbuf->fsys_stb_bsize = sblock->fs_bsize;
  statbuf->fsys_stb_fsize = sblock->fs_fsize;
  statbuf->fsys_stb_blocks = sblock->fs_dsize;
  statbuf->fsys_stb_bfree = sblock->fs_cstotal.cs_nbfree
    * sblock->fs_frag + sblock ->fs_cstotal.cs_nffree;
  statbuf->fsys_stb_bavail
    = (sblock->fs_dsize * (100 - sblock->fs_minfree) / 100) 
      - (sblock->fs_dsize - statbuf->fsys_stb_bfree);
  statbuf->fsys_stb_files = sblock->fs_ncg * sblock->fs_ipg - ROOTINO;
  statbuf->fsys_stb_ffree = sblock->fs_cstotal.cs_nifree;
  statbuf->fsys_stb_fsid_1 = 0;
  statbuf->fsys_stb_fsid_2 = /* DEVICE! XXX */0;

  return 0;
}

/* Cause the disk copy of CRED to be updated.  If WAIT is true, don't
   return until the disk is up to date.  */
error_t
file_sync (struct protid *cred,
	   int wait)
{
  struct inode *ip;

  if (!cred)
    return EOPNOTSUPP;
  
  ip = cred->po->ip;
  
  mutex_lock (&ip->i_toplock);
  fs_get_it (ip);
  mutex_unlock (&ip->i_toplock);
  file_update (ip, wait);
  return 0;
}

/* Cause the disk copy of the entire filesystem to be updated.  If
   WAIT is true, don't return until the disk is up to date.  If
   DOCHILDREN is true, cause all child filesystems to be updatad
   similarly.  */
error_t
file_syncfs (struct protid *cred,
	     int wait,
	     int dochildren)	/* not used yet XXX */
{
  if (!cred)
    return EOPNOTSUPP;
  
  sync_everything (wait);
  return 0;
}

/* Return icky Posix pathconf info in CONF.  */
error_t
file_pathconf (struct protid *cred,
	       struct pathconf *conf)
{
  if (!cred)
    return EOPNOTSUPP;
  
  conf->link_max = LINK_MAX;
  conf->max_canon = 0;
  conf->max_input = 0;
  conf->name_max = MAXNAMLEN;
  conf->path_max = -1;
  conf->pipe_buf = -1;
  conf->_posix_chown_restricted = 1;
  conf->_posix_no_trunc = 1;
  conf->_posix_vdisable = -1;
  return 0;
}

/* Return a node suitable as argument for dir_link.  */
error_t
file_getlinknode (struct protid *cred,
		  struct protid **newport)
{
  struct inode *ip;

  if (!cred)
    return EOPNOTSUPP;
  
  ip = cred->po->ip;
  if (ip->i_number == ROOTINO)
    return EBUSY;
  
  *newport = cred;
  return 0;
}

error_t
file_getfh (struct protid *cred,
	    char **data,
	    u_int *datalen)
{
  return EOPNOTSUPP;
}




/* Simple directory operations */
error_t
dir_pathtrans (struct protid *dircred,
	       char *path,
	       int flags,
	       mode_t mode,
	       enum retry_type *retry,
	       char *retryname,
	       struct protid **result)
{
  struct inode *volatile dip;
  struct inode *ip;
  volatile int nsymlink = 0;
  char *volatile nextname;
  int nextnamelen;
  int error = 0;
  char *volatile pathbuf;
  int pathbuflen = 0;
  int newnamelen;
  int create, excl;
  volatile int lastcomp = 0;
  volatile int newnode = 0;
  struct dirstat ds;

  if (!dircred)
    return EOPNOTSUPP;
  
  create = (flags & FS_LOOKUP_CREATE);
  excl = (flags & FS_LOOKUP_EXCL);
  
  /* Catch a simple, but potentially common, error */
  if (path[0] == '/')
    return EINVAL;

  dip = dircred->po->ip;
  mutex_lock (&dip->i_toplock);
  ip = 0;
  *retry = FS_RETRY_NONE;

  dip->i_refcnt++;		/* acquire a reference for later iput */

  do
    {
      if (lastcomp)
	panic ("pathtrans: multiple final components?");

      nextname = index (path, '/');

      if (nextname)
	{
	  *nextname++ = '\0';
	  lastcomp = 0;
	}
      else
	lastcomp = 1;
	  
      ip = 0;

      if (lastcomp && create)
	error = lookup (dip, path, CREATE, &ip, &ds, dircred);
      else
	error = lookup (dip, path, LOOKUP, &ip, 0, dircred);

      if (lastcomp && create && excl && (!error || error == EAGAIN))
	error = EEXIST;

      if (error == EAGAIN)
	{
	  *retry = FS_RETRY_REAUTH;
	  *result = 0;		/* convert_protid_to_port will fix this up */
	  strcpy (retryname, nextname);
	  error = 0;
	  goto out;
	}

      if (lastcomp && create && error == ENOENT)
	{
	  mode &= ~(IFMT | ISPARE | ISVTX);
	  mode |= IFREG;
	  error = create_and_link (dip, path, mode, &ip, dircred, &ds);
	  newnode = 1;
	}
      
      if (error)
	goto out;

      if (!(error = catch_exception ()))
	{
	  if (error)
	    {
	      end_catch_exception ();
	      goto out;
	    }

	  switch (DI_MODE(ip->di) & IFMT)
	    {
	    default:
	      panic ("unknown open format");
	      
	    case IFREG:
	    case IFDIR:
	    case IFSOCK:
	    case IFIFO:
	    case IFCHR:
	    case IFBLK:
	      path = nextname;
	      if (ip == dip)
		irele (dip);
	      else
		iput (dip);
	      if (!lastcomp)
		{
		  dip = ip;
		  ip = 0;
		}
	      break;
	      
	    case IFLNK:
	      if (nsymlink++ > MAXSYMLINKS)
		{
		  error = ELOOP;
		  end_catch_exception ();
		  goto out;
		}
	      
#if 0
	      /* Check if there's room to hold it all */
	      if (strlen (nextname) + ip->di->di_size + 2 > MAXPATHLEN)
		{
		  error = ENAMETOOLONG;
		  end_catch_exception ();
		  goto out;
		}
#endif
	      nextnamelen = strlen (nextname);
	      newnamelen = nextnamelen + ip->di->di_size + 2;
	      if (pathbuflen < newnamelen)
		{
		  pathbuf = alloca (newnamelen);
		  pathbuflen = newnamelen;
		}
	      
	      error = fs_rdwr (ip, pathbuf + nextnamelen + 1, 
			       0, ip->di->di_size, 0, dircred);
	      if (error)
		goto out;
	      
	      if (pathbuf[nextnamelen + 1] == '/')
		{
		  /* Punt to the caller */
		  *retry = FS_RETRY_NORMAL;
		  *result = MACH_PORT_NULL;
		  pathbuf[nextnamelen + ip->di->di_size + 1] = '\0';
		  strcpy (retryname, pathbuf + nextnamelen + 1);
		  end_catch_exception ();
		  goto out;
		}
	      
	      bcopy (pathbuf, nextname, nextnamelen);
	      pathbuf[nextnamelen] = '/';
	      
	      path = pathbuf;
	      iput (ip);
	      ip = 0;
	      break;
	    }
	  end_catch_exception ();
	}
      else
	/* Got an error reading the inode */
	goto out;  
    } while (path && *path);
  
  if (!(error = catch_exception ()))
    {
      if (!newnode)		/* don't check perms if we just created it */
	{
	  int type = DI_MODE(ip->di) & IFMT;
	  if ((type == IFSOCK || type == IFBLK 
	       || type == IFCHR || type == IFIFO)
	      && (flags & (FS_LOOKUP_READ|FS_LOOKUP_WRITE|FS_LOOKUP_EXEC)))
	    {
	      error = EOPNOTSUPP;
	      goto out1;
	    }
	  if (flags & FS_LOOKUP_READ)
	    {
	      if (error = ufs_access (ip, IREAD, dircred))
		goto out1;
	    }
	  
	  if (flags & FS_LOOKUP_EXEC)
	    {
	      if (error = ufs_access (ip, IEXEC, dircred))
		goto out1;
	    }
	  
	  if (flags & (FS_LOOKUP_WRITE|FS_LOOKUP_TRUNCATE))
	    {
	      if (type == IFDIR)
		{
		  error = EISDIR;
		  goto out1;
		}
	      if (readonly)
		{
		  error = EROFS;
		  goto out1;
		}
	      if (error = ufs_access (ip, IWRITE, dircred))
		goto out1;
	    }
	}
      
      if (flags & FS_LOOKUP_TRUNCATE)
	inode_truncate (ip, 0);
      
      flags &= FS_LOOKUP_LONGLIFE;
      
      printf ("ip: 0x%x\n", (int)ip);
      *result = make_protid (ip, flags, dircred);
    out1:
      end_catch_exception ();
    }

 out:
  if (ip)
    {
      if (dip == ip)
	irele (ip);
      else
	iput (ip);
    }
  if (dip)
    iput (dip);
  if (ds.type != LOOKUP)
    dsrelease (&ds);
  return error;
}

error_t 
dir_readdir (struct protid *dircred,
	     char **data,
	     unsigned int *datalen,
	     int offset,
	     int *newoffset,
	     int maxread)
{
  error_t err;
  
  if (offset == -1)
    return EINVAL;
  
  err = io_read (dircred, data, datalen, offset, maxread);
  
  if (!err)
    *newoffset = offset + *datalen;
  
  return err;
}

error_t
dir_mkdir (struct protid *dircred,
	   char *name,
	   mode_t mode)
{
  struct inode *dip;
  struct inode *ip = 0;
  struct dirstat ds;
  int error;

  if (!dircred)
    return EOPNOTSUPP;
  
  dip = dircred->po->ip;
  if (readonly)
    return EROFS;

  mutex_lock (&dip->i_toplock);

  error = lookup (dip, name, CREATE, 0, &ds, dircred);

  if (error == EAGAIN)
    error = EEXIST;
  if (!error)
    error =  EEXIST;
  if (error != ENOENT)
    {
      mutex_unlock (&dip->i_toplock);
      return error;
    }

  mode &= ~(ISPARE | IFMT);
  mode |= IFDIR;

  error = create_and_link (dip, name, mode, &ip, dircred, &ds);

  if (!error)
    iput (ip);

  mutex_unlock (&dip->i_toplock);
  return error;
}

/* Clear the `.' and `..' entries from directory DP.  Its parent is PDP,
   and the user responsible for this is identified by CRED.  Both 
   directories must be locked.  */
static error_t
clear_directory (struct inode *dp,
		 struct inode *pdp,
		 struct protid *cred)
{
  error_t err;
  struct dirstat ds;
  struct inode *ip;
  
  /* Find and remove the `.' entry. */
  err = lookup (dp, ".", REMOVE, &ip, &ds, cred);
  if (err == ENOENT)
    panic ("clear_directory no `.' entry");
  if (!err)
    err = dirremove (dp, &ds);
  if (err)
    return err;
  
  /* Decrement the link count */
  if (!(err = catch_exception ()))
    {
      dp->di->di_nlink--;
      dp->di->di_ctime = time->seconds;
      end_catch_exception ();
    }
  else
    return err;

  /* Find and remove the `..' entry. */
  err = ufs_checkdirmod (dp, DI_MODE (dp->di), pdp, cred);
  if (!err)
    err = lookup (dp, "..", REMOVE | SPEC_DOTDOT, &ip, &ds, cred);
  if (err == ENOENT)
    panic ("clear_directory no `..' entry");
  if (!err)
    err = dirremove (dp, &ds);
  if (err)
    return err;
  
  /* Decrement the link count on the parent */
  if (!(err = catch_exception ()))
    {
      pdp->di->di_nlink--;
      pdp->di->di_ctime = time->seconds;
      end_catch_exception ();
    }
  else
    return err;
  return err;
}


error_t
dir_rmdir (struct protid *dircred,
	   char *name)
{
  struct inode *dip;
  struct inode *ip = 0;
  struct dirstat ds;
  error_t error;

  if (!dircred)
    return EOPNOTSUPP;
  
  dip = dircred->po->ip;
  if (readonly)
    return EROFS;

  mutex_lock (&dip->i_toplock);

  error = lookup (dip, name, REMOVE, &ip, &ds, dircred);
  if (error = EAGAIN)
    error = ENOTEMPTY;
  if (error)
    {
      mutex_unlock (&dip->i_toplock);
      return error;
    }

  /* Attempt to rmdir(".") */
  if (dip == ip)
    {
      irele (ip);
      mutex_unlock (&dip->i_toplock);
      dsrelease (&ds);
      return EINVAL;
    }

  /* Verify the directory is empty (and valid).  (Rmdir ".." won't be
     valid since ".." will contain a reference to the current directory and
     thus be non-empty). */
  if (!dirempty (ip, dircred))
    {
      iput (ip);
      dsrelease (&ds);
      mutex_unlock (&dip->i_toplock);
      return ENOTEMPTY;
    }

  if (error = dirremove (dip, &ds))
    {
      iput (ip);
      mutex_unlock (&dip->i_toplock);
      return error;
    }

  clear_directory (ip, dip, dircred);
  iput (ip);
  mutex_unlock (&dip->i_toplock);
  return 0;
}

error_t
dir_unlink (struct protid *dircred,
	    char *name)
{
  struct inode *dip;
  struct inode *ip;
  struct dirstat ds;
  error_t error;

  if (!dircred)
    return EOPNOTSUPP;
  
  dip = dircred->po->ip;
  if (readonly)
    return EROFS;

  mutex_lock (&dip->i_toplock);

  error = lookup (dip, name, REMOVE, &ip, &ds, dircred);
  if (error == EAGAIN)
    error = EISDIR;
  if (error)
    {
      mutex_unlock (&dip->i_toplock);
      return error;
    }
  
  if (!(error = catch_exception ()))
    {
      /* This isn't the BSD behavior, but it is Posix compliant and saves
	 us on several race conditions.*/
      if ((DI_MODE(ip->di) & IFMT) == IFDIR)
	{
	  if (ip == dip)		/* gotta catch '.' */
	    irele (ip);
	  else
	    iput (ip);
	  mutex_unlock (&dip->i_toplock);
	  dsrelease (&ds);
	  return EISDIR;
	}
      else
	{  
	  error = dirremove (dip, &ds);
	  if (!error)
	    {
	      ip->di->di_nlink--;
	      ip->di->di_ctime = time->seconds;
	    }
	  else
	    {
	      iput (ip);
	      mutex_unlock (&dip->i_toplock);
	      return error;
	    }
	}
      end_catch_exception ();
    }

  /* This check is necessary because we might get here on an error while 
     checking the mode on something which happens to be `.'. */
  if (ip == dip)
    irele (ip);	
  else
    iput (ip);
  mutex_unlock (&dip->i_toplock);
  return error;
}

error_t
dir_link (struct protid *filecred,
	  struct protid *dircred,
	  char *name)
{
  struct inode *ip;
  struct inode *dip;
  struct dirstat ds;
  error_t error;

  if (!filecred)
    return EOPNOTSUPP;
  
  ip = filecred->po->ip;
  if (readonly)
    return EROFS;
  
  if (!dircred)
    return EXDEV;
  
  dip = dircred->po->ip;
  mutex_lock (&dip->i_toplock);
  /* This lock is safe since a non-directory is inherently a leaf */
  mutex_lock (&ip->i_toplock);

  if (!(error = catch_exception ()))
    {
      if ((DI_MODE(ip->di) & IFMT) == IFDIR)
	error = EISDIR;
      else if (ip->di->di_nlink == LINK_MAX - 1)
	error = EMLINK;
      end_catch_exception ();
    }
  if (error)
    return error;

  ds.type = LOOKUP;

  error = lookup (dip, name, CREATE, 0, &ds, dircred);

  if (error == EAGAIN)
    error = EEXIST;
  if (!error)
    error = EEXIST;
  if (error != ENOENT)
    goto out;
  
  if (!(error = catch_exception ()))
    {
      ip->di->di_nlink++;
      ip->di->di_ctime = time->seconds;
      end_catch_exception ();
    }
  else
    goto out;
  
  inode_update (ip, 1);

  error =  direnter (dip, name, ip, &ds, dircred);
  if (error)
    {
      if (!(error = catch_exception ()))
	{
	  ip->di->di_nlink--;
	  ip->di->di_ctime = time->seconds;
	  end_catch_exception ();
	}
    }

 out:
  mutex_unlock (&dip->i_toplock);
  mutex_unlock (&ip->i_toplock);
  return error;
}

/* To avoid races in checkpath, and to prevent a directory from being
   simultaneously renamed by two processes, we serialize all renames of
   directores with this lock */
static struct mutex renamedirlock;
static int renamedirinit;

error_t
dir_rename (struct protid *fromcred,
	    char *fromname,
	    struct protid *tocred,
	    char *toname)
{
  struct inode *fdp, *tdp, *fip, *tip, *tmpip;
  error_t err;
  int isdir;
  struct dirstat ds;
  

  if (!fromcred)
    return EOPNOTSUPP;

  /* Verify that tocred really is a port to us XXX */
  if (!tocred)
    return EXDEV;

  if (readonly)
    return EROFS;

  fdp = fromcred->po->ip;
  tdp = tocred->po->ip;

 try_again:
  /* Acquire the source; hold a reference to it.  This 
     will prevent anyone from deleting it before we create
     the new link. */
  mutex_lock (&fdp->i_toplock);
  err = lookup (fdp, fromname, LOOKUP, &fip, 0, fromcred);
  mutex_unlock (&fdp->i_toplock);
  if (err)
    return err;

  if (!(err = catch_exception ()))
    isdir = (DI_MODE (fip->di) & IFMT) == IFDIR;
  else
    {
      iput (fip);
      return err;
    }

  mutex_unlock (&fip->i_toplock);

  if (isdir)
    {
      /* We have to serialize all renames of directories.  If we can't
	 immediately proceed, then the node we are renaming might have
	 changed, and we have to start again. */
      if (!renamedirinit)
	{
	  mutex_init (&renamedirlock);
	  renamedirinit++;
	}
      if (!mutex_try_lock (&renamedirlock))
	{
	  mutex_lock (&renamedirlock);
	  goto try_again;
	}
    }

  if (isdir)
    {
      /* Make sure that we won't make a loop in the directory 
	 structure with this rename. */
      mutex_lock (&tdp->i_toplock);
      err = checkpath (fip, tdp, tocred);
      if (err)
	{
	  irele (fip);
	  return err;
	}
    }
  
  /* Link the node into the new directory. */
  mutex_lock (&tdp->i_toplock);
  
  err = lookup (tdp, toname, RENAME, &tip, &ds, tocred);
  if (err && err != ENOENT)
    {
      irele (fip);
      mutex_unlock (&tdp->i_toplock);
      return err;
    }

  if (tip == fip)
    {
      irele (fip);
      iput (tip);
      mutex_unlock (&tdp->i_toplock);
      return 0;
    }

  /* This is guaranteed to be safe even if fip is 
     a directory, because we know that it isn't
     an ancestor of tdp, thanks to checkpath */
  mutex_lock (&fip->i_toplock);
  if (!(err = catch_exception()))
    {
      if (isdir && tip)
	{
	  if ((DI_MODE (tip->di) & IFMT) != IFDIR)
	    {
	      iput (fip);
	      iput (tip);
	      mutex_unlock (&tdp->i_toplock);
	      return ENOTDIR;
	    }
	  
	  if (!dirempty (tip, tocred))
	    {
	      iput (fip);
	      iput (tip);
	      mutex_unlock (&tdp->i_toplock);
	      return ENOTEMPTY;
	    }
	}
      
      if (fip->di->di_nlink == LINK_MAX - 1)
	{
	  end_catch_exception ();
	  iput (fip);
	  if (tip)
	    iput (tip);
	  mutex_unlock (&tdp->i_toplock);
	  return EMLINK;
	}
      fip->di->di_nlink++;
      fip->di->di_ctime = time->seconds;
      end_catch_exception ();
    }
  else
    {
      iput (fip);
      iput (tip);
      mutex_unlock (&tdp->i_toplock);
      return err;
    }
  if (isdir)
    {
      /* Set the ".." entry to point at the new directory. */
      if (!(err = catch_exception ()))
	{
	  if (tdp->di->di_nlink == LINK_MAX - 1)
	    {
	      end_catch_exception ();
	      iput (fip);
	      iput (tip);
	      mutex_unlock (&tdp->i_toplock);
	      return EMLINK;
	    }
	  tdp->di->di_nlink++;
	  tdp->di->di_ctime = time->seconds;
	  end_catch_exception ();
	}
      if (!err)
	{
	  struct dirstat tmpds;
	  err = ufs_checkdirmod (fip, DI_MODE (fip->di), fdp, fromcred);
	  if (!err)
	    err = lookup (fip, "..", RENAME | SPEC_DOTDOT, 
			  0, &tmpds, fromcred);
	  if (err == ENOENT)
	    panic ("rename of directory missing `..'");
	  if (!err)
	    err = dirrewrite (fip, tdp, &tmpds);
	  else
	    dsrelease (&tmpds);
	}
      if (err)
	{
	  iput (fip);
	  iput (tip);
	  mutex_unlock (&tdp->i_toplock);
	  return err;
	}
    }

  inode_update (fip, 1);
	  
  if (tip)
    {
      err = dirrewrite (tdp, fip, &ds);
      if (!err
	  && !(err = catch_exception ()))
	{
	  tip->di->di_nlink--;
	  tip->di->di_ctime = time->seconds;
	  end_catch_exception ();
	}
      if (isdir)
	clear_directory (tip, tdp, tocred);
      iput (tip);
    }
  else
    err = direnter (tdp, toname, fip, &ds, tocred);

  mutex_unlock (&tdp->i_toplock);
  mutex_unlock (&fip->i_toplock);
  if (err)
    {
      irele (fip);
      return err;
    }

  
  /* Now we remove the source.  Unfortunately, we haven't held 
     fdp locked (nor could we), so someone else might have already
     removed it.  This should never happen for directories, because
     our reference should prevent dirempty from returning true.  */
  mutex_lock (&fdp->i_toplock);
  err = lookup (fdp, fromname, REMOVE, &tmpip, &ds, fromcred);
  if (err)
    {
      mutex_unlock (&fdp->i_toplock);
      irele (fip);
      return err;
    }

  if (tmpip != fip)
    {
      dsrelease (&ds);
      if (isdir)
	panic ("rename / rmdir collision");
      iput (tmpip);
      irele (fip);
      mutex_unlock (&fdp->i_toplock);
      return 0;
    }
  
  irele (tmpip);

  if (!err)
    err = dirremove (fdp, &ds);
  else
    {
      dsrelease (&ds);
      iput (fip);
      mutex_unlock (&fdp->i_toplock);
      return err;
    }

  if (!(err = catch_exception ()))
    {
      fip->di->di_nlink--;
      fip->di->di_ctime = time->seconds;
      if (isdir)
	{
	  fdp->di->di_nlink--;	/* Old `..' link */
	  fdp->di->di_ctime = time->seconds;
	}
      end_catch_exception ();
    }

  iput (fip);
  mutex_unlock (&fdp->i_toplock);
  return err;
}

/* Create a new inode and link it into DIR with as NAME.  Give it MODE;
   if that includes IFDIR, also initialize `.' and `..' in the new
   directory.  Return the node in IPP.  CRED identifies the user responsible
   for the call, and DS is the result of a prior lookup for creation. */
static error_t
create_and_link (struct inode *dir,
		 char *name,
		 mode_t mode,
		 struct inode **newnode,
		 struct protid *cred,
		 struct dirstat *ds)
{
  struct inode *ip;
  error_t err;
  volatile int dirinc = 0;
  
  /* Make the inode */
  ialloc (((mode & IFMT) == IFDIR) ? dirpref () : dir->i_number,
	  mode, newnode);
  ip = *newnode;

  /* Initialize the on-disk fields that are not taken care of
     by ialloc. */
  if (!(err = catch_exception ()))
    {
      /* di_blocks is done by ialloc; di_gen by iget */
      ip->di->di_nlink = 1;
      ip->di->di_qsize.val[0] = 0;
      ip->di->di_qsize.val[1] = 0;
      ip->di->di_flags = 0;
      ip->di->di_trans = 0;

      /* Set the uid.  If the user is anonymous, then make it
	 the same as the directory, but forstall fraud. */
      if (cred->nuids == 0)
	{
	  ip->di->di_uidl = dir->di->di_uidl;
	  ip->di->di_uidh = dir->di->di_uidh;
	  mode &= ~(ISUID);
	}
      else
	{
	  ip->di->di_uidl = (cred->uids->ids[0] & 0xffff);
	  ip->di->di_uidh = (cred->uids->ids[0] & 0xffff0000) >> 16;
	}
      
      /* Set the group.  Inherit from the directory if we can.
	 If there is no group, inherit from the directory and
	 forstall fraud. */
      if (cred->ngids == 0
	  || groupmember (DI_GID (dir->di), cred))
	{
	  ip->di->di_gidl = dir->di->di_gidl;
	  ip->di->di_gidh = dir->di->di_gidh;
	  if (cred->ngids == 0)
	    mode &= ~ISGID;
	}
      else
	{
	  ip->di->di_gidl = (cred->gids->ids[0] & 0xffff);
	  ip->di->di_gidh = (cred->gids->ids[0] & 0xffff0000) >> 16;
	}
      
      ip->di->di_model = (mode & 0xffff);
      ip->di->di_modeh = (mode & 0xffff0000) >> 16;

      /* Do this last... */
      ip->di->di_atime = ip->di->di_mtime = ip->di->di_ctime = time->seconds;

      end_catch_exception ();
    }
  else
    goto dumpit;

  if ((mode & IFMT) == IFDIR)
    {
      struct dirstat dirds;
      struct inode *foo;

      /* New links */
      if (!(err = catch_exception ()))
	{
	  if (dir->di->di_nlink == LINK_MAX - 1)
	    {
	      end_catch_exception ();
	      err = EMLINK;
	      goto dumpit;
	    }
	  ip->di->di_nlink++;	/* for `.' */
	  dir->di->di_nlink++;	/* for `..' */
	  dirinc = 1;
	  ip->di->di_ctime = time->seconds;
	  dir->di->di_ctime = time->seconds;
	  end_catch_exception ();
	}
      else
	goto dumpit;
      
      inode_update (ip, 1);
      inode_update (dir, 1);

      /* Initialize directory */
      
      err = lookup (ip, ".", CREATE, &foo, &dirds, cred);
      if (err != ENOENT)
	panic ("new directory already contains `.'");
      err = direnter (ip, ".", ip, &dirds, cred);
      if (err)
	goto dumpit;
      
      err = lookup (ip, "..", CREATE, &foo, &dirds, cred);
      if (err != ENOENT)
	panic ("new directory already contains `..'");
      err = direnter (ip, "..", dir, &dirds, cred);
      if (err)
	goto dumpit;
    }
  else
    inode_update (ip, 1);
  
  err = direnter (dir, name, ip, ds, cred);
  return err;

 dumpit:
  if (!catch_exception ())
    {
      if (dirinc)
	{
	  dir->di->di_nlink--;
	  dir->di->di_ctime = time->seconds;
	}
      ip->di->di_nlink = 0;
      ip->di->di_ctime = time->seconds;
      end_catch_exception ();
    }
  iput (ip);
  return err;
}

error_t
file_set_translator (struct protid *cred,
		     int flags,
		     int killtrans_flags,
		     char *transname,
		     u_int transnamelen,
		     fsys_t existing)
{
  struct inode *ip;
  error_t error;
  daddr_t blkno;
  char blkbuf[sblock->fs_fsize];

  if (!cred)
    return EOPNOTSUPP;
  
  ip = cred->po->ip;

  if (error = isowner (ip, cred))
    return error;

  if (ip->i_translator)
    {
      if  (flags & FS_TRANS_EXCL)
	{
	  iput (ip);
	  return EBUSY;
	}
      destroy_translator (ip, killtrans_flags);
    }

  if (!(error = catch_exception ()))
    {
      if ((flags & FS_TRANS_EXCL) && ip->di->di_trans)
	{
	  end_catch_exception ();
	  iput (ip);
	  return EBUSY;
	}
      end_catch_exception ();
    }
	  
  /* Handle the short-circuited translators */
  if (!(flags & FS_TRANS_FORCE))
    {
      volatile mode_t newmode = 0;
      
      if (!strcmp (transname, _HURD_SYMLINK))
	newmode = IFLNK;
      else if (!strcmp (transname, _HURD_CHRDEV))
	newmode = IFCHR;
      else if (!strcmp (transname, _HURD_BLKDEV))
	newmode = IFBLK;
      else if (!strcmp (transname, _HURD_FIFO))
	newmode = IFIFO;
      else if (!strcmp (transname, _HURD_IFSOCK))
	newmode = IFSOCK;
      
      if (newmode)
	{
	  volatile dev_t devno = 0;
	  if (!(error = catch_exception ()))
	    {
	      if ((DI_MODE (ip->di) & IFMT) == IFDIR)
		{
		  /* We can't allow this, because if the mode of the directory
		     changes, the links will be lost.  Perhaps it might be 
		     allowed for empty directories, but that's too much of a
		     pain.  */
		  end_catch_exception ();
		  iput (ip);
		  return EISDIR;
		}
	      if (newmode == IFBLK || newmode == IFCHR)
		{
		  /* Here we need to read the device number from the
		     contents of the file (which must be IFREG).  */
		  char buf[20];
		  
		  if ((DI_MODE (ip->di) & IFMT) != IFREG)
		    {
		      end_catch_exception ();
		      iput (ip);
		      return EINVAL;
		    }
		  error = fs_rdwr (ip, buf, 0, 20, 0, 0);
		  if (error)
		    {
		      end_catch_exception ();
		      iput (ip);
		      return error;
		    }
		  devno = atoi (buf);
		}
	      if (newmode != IFLNK)
		inode_truncate (ip, 0);
	      ip->di->di_model = (ip->di->di_model & ~IFMT) | newmode;
	      if (newmode == IFBLK || newmode == IFCHR)
		ip->di->di_rdev = devno;
	      end_catch_exception ();
	    }
	  inode_update (ip, 1);
	  iput (ip);
	  return 0;
	}
    }

  /* Allocate a block for the translator */
  error = alloc (ip, 0, 0, sblock->fs_fsize, &blkno, cred);
  if (error)
    {
      iput (ip);
      return error;
    }
  
  /* Write the name of the translator into the block */
  strcpy (blkbuf, transname);
  dev_write_sync (blkno, blkbuf, sblock->fs_fsize);
  
  /* Update the inode */
  if (!(error = catch_exception ()))
    {
      ip->di->di_trans = blkno;
      end_catch_exception ();
    }
  iput (ip);
  return error;
}

error_t
file_get_translator_cntl (struct protid *cred,
			  mach_port_t *ctl)
{
  struct inode *ip;
  error_t error;
  
  if (!cred)
    return EOPNOTSUPP;
  
  ip = cred->po->ip;

  if (!ip->i_translator)
    error = ENXIO;
  else
    error = isowner (ip, cred);

  if (!error)
    *ctl = ip->i_translator;

  iput (ip);
  return error;
}

error_t
file_get_translator (struct protid *cred,
		     char **trans,
		     u_int *translen)
{
  return EOPNOTSUPP;	/* XXX */
}
  
void
destroy_translator (struct inode *ip,
		    int flags)
{
  U_fsys_goaway (ip->i_translator, flags);
  mach_port_deallocate (mach_task_self (), ip->i_translator);
  ip->i_translator = 0;
}

error_t
file_stat_translator (struct protid *cred,
		      fsys_statfsbuf_t *statbuf)
{
  if (!cred)
    return EOPNOTSUPP;
  
  return file_statfs (cred, statbuf);
}

