irix-657m-src/irix/kern/fs/efs/efs_vnodeops.c

/*
 * EFS vnode operations.
 *
 * Copyright 1992, Silicon Graphics, Inc.
 * All Rights Reserved.
 *
 * This is UNPUBLISHED PROPRIETARY SOURCE CODE of Silicon Graphics, Inc.;
 * the contents of this file may not be disclosed to third parties, copied or
 * duplicated in any form, in whole or in part, without the prior written
 * permission of Silicon Graphics, Inc.
 *
 * RESTRICTED RIGHTS LEGEND:
 * Use, duplication or disclosure by the Government is subject to restrictions
 * as set forth in subdivision (c)(1)(ii) of the Rights in Technical Data
 * and Computer Software clause at DFARS 252.227-7013, and/or in similar or
 * successor clauses in the FAR, DOD or NASA FAR Supplement. Unpublished -
 * rights reserved under the Copyright Laws of the United States.
 */

#ident "$Revision: 1.213 $"

#include <sys/types.h>
#include <sys/buf.h>
#include <sys/conf.h>
#include <sys/cred.h>
#include <sys/debug.h>
#include <sys/dirent.h>
#include <sys/dnlc.h>
#include <sys/errno.h>
#include <sys/fcntl.h>
#include <ksys/vfile.h>
#include <sys/fs_subr.h>
#include <sys/iograph.h>
#include <sys/kabi.h>
#include <sys/kmem.h>
#include <sys/mman.h>
#include <sys/mode.h>
#include <sys/param.h>
#include <sys/pathname.h>
#include <sys/pfdat.h>		/* page flushing prototypes */
#include <sys/poll.h>
#include <sys/quota.h>
#include <sys/resource.h>
#include <sys/stat.h>
#include <sys/sysinfo.h>
#include <sys/sysmacros.h>
#include <sys/systm.h>
#include <sys/time.h>
#include <sys/uio.h>
#include <sys/var.h>
#include <sys/vfs.h>
#include <sys/vnode.h>
#include <sys/sat.h>
#include <sys/capability.h>
#include <sys/flock.h>
#include <sys/kfcntl.h>
#include <fs/specfs/spec_lsnode.h>
#include <string.h>
#ifdef _SHAREII
#include	<sys/shareIIstubs.h>
#endif /* _SHAREII */
#include "efs_inode.h"
#include "efs_dir.h"
#include "efs_sb.h"

#ifdef DATAPIPE
/* data pipe functions */
extern int fspe_get_ops(void *);
int        efs_fspe_dioinfo(struct vnode *, struct dioattr *);
#endif

static int efs_readi(struct inode *, struct uio *, int, u_short, struct cred *,
				struct flid *);
static int efs_writei(struct inode *, struct uio *, int, struct cred *,
				struct flid *);
static void efs_rwunlock(bhv_desc_t *, vrwlock_t);
static void efs_rwlock(bhv_desc_t *, vrwlock_t);
static int efs_fid(bhv_desc_t *, struct fid **);
static int efs_fid2(bhv_desc_t *, struct fid *);
static int efs_setattr(bhv_desc_t *, struct vattr *,int ,struct cred *);

#if _MIPS_SIM == _ABI64
int irix5_to_flock(enum xlate_mode, void *, int, xlate_info_t *);
int flock_to_irix5(void *, int, xlate_info_t *);
int irix5_n32_to_flock(enum xlate_mode, void *, int, xlate_info_t *);
int flock_to_irix5_n32(void *, int, xlate_info_t *);
#endif

/*
 * EFS direct I/O can be other than page aligned as long as we report
 * the maximum transfer size as the maximum number of pages minus 1.
 * This takes care of the case where the I/O is not page aligned, but
 * it is of maxdmasz size.  We go with BBSIZE for the alignment, because
 * that is what it has always been.
 */
#define FDIRIOALIGN	BBSIZE

#define EFS_INVALIDOFF(off)	(((off) < 0) || ((off) > SEEKLIMIT32))

/*
 * No open action is required for regular files.  Devices are handled
 * through the specfs file system, pipes through fifofs.  Device and
 * fifo vnodes are "wrapped" by specfs and fifofs vnodes, respectively,
 * when a new vnode is first looked up or created.
 */

/* ARGSUSED */
static int
efs_close(
	bhv_desc_t *bdp,
	int flag,
	lastclose_t lastclose,
	struct cred *cr)
{
	return 0;
}

/* ARGSUSED */
static int
efs_read(
	bhv_desc_t *bdp,
	struct uio *uiop,
	int ioflag,
	struct cred *cr,
	struct flid *fl)
{
	struct inode *ip;
	u_short type;
	int error;

	if (!(ioflag & IO_ISLOCKED))
		efs_rwlock(bdp, VRWLOCK_READ);

	ip = bhvtoi(bdp);
	ASSERT(ip->i_flags & IRWLOCK);
	type = ip->i_mode & IFMT;

	error =  efs_readi(ip, uiop, ioflag, type, cr, fl);

	if (!(ioflag & IO_ISLOCKED))
		efs_rwunlock(bdp, VRWLOCK_READ);

	return error;
}

/* ARGSUSED */
static int
efs_write(
	bhv_desc_t *bdp,
	struct uio *uiop,
	int ioflag,
	struct cred *cr,
	struct flid *fl)
{
	vnode_t *vp = BHV_TO_VNODE(bdp);
	struct inode *ip;
	int error;

	if (!(ioflag & IO_ISLOCKED))
		efs_rwlock(bdp, VRWLOCK_WRITE);

	ip = bhvtoi(bdp);
	ASSERT(ip->i_flags & IRWLOCK);
	if (vp->v_type == VREG) {
		if (ioflag & IO_APPEND) {
			/*
			 * In append mode, start at end of file.
			 */
			uiop->uio_offset = ip->i_size;
		}
	}
	error = efs_writei(ip, uiop, ioflag, cr, fl);

	if (!(ioflag & IO_ISLOCKED))
		efs_rwunlock(bdp, VRWLOCK_WRITE);

	return error;
}

#define MAXDIOSPLIT	20

struct dio_s {
	bhv_desc_t *bdp;
	struct cred *cr;
	int ioflag;
};

static int
diostrat(buf_t *bp)
{
	struct dio_s *dp;
	bhv_desc_t *bdp;
	struct vnode *vp;
	scoff_t offset, pushstart;
	int count, i, j, n, totxfer;
	buf_t *bps[MAXDIOSPLIT];
	int dlen = 1, dbn;
	struct bmapval bmv;
	buf_t *nbp;
	caddr_t base;
	int error, resid, totresid;
	int trail = 0;
	struct inode *ip;
	scoff_t	o_size;

	ASSERT(!(bp->b_flags & B_DONE));

	dp = (struct dio_s*) bp->b_private;
	bdp = dp->bdp;
	vp = BHV_TO_VNODE(bdp);
	ip = bhvtoi(bdp);
	o_size = ip->i_size;
	offset = BBTOB(bp->b_blkno);
	totresid = count = bp->b_bcount;
	base = bp->b_un.b_addr;
	error = resid = totxfer = 0;

	while ( !error && count && !trail && dlen ) {
		for ( i = 0 ; (i < MAXDIOSPLIT) && count && !trail ; i++ ) {
			/* build an io */
			for ( dlen = 0 ; dlen < count ; ) {
				n = 1;
				error = efs_bmap(bdp, offset+dlen, count-dlen,
					bp->b_flags&B_READ, dp->cr, &bmv, &n);

				if (error || (bmv.pbsize == 0))
					break;

				/* prime the pump */
				if (dlen == 0) {
					dbn = bmv.bn + BTOBB(bmv.pboff);
					pushstart = BBTOB(bmv.offset) & ~NBPP;
				} else if (dbn + BTOBB(dlen) != bmv.bn + BTOBB(bmv.pboff))
					break;

				dlen += bmv.pbsize;

				/* see if the file grew from writes */
				if ((bp->b_flags & B_READ) == 0 &&
				    offset + dlen > ip->i_size ) {
					ASSERT((vp->v_flag & VISSWAP) == 0);
					ip->i_size = offset + dlen;
					ip->i_flags |= ITRUNC;
				}
			}

			/* end of file or an error */
			if ( (dlen == 0) || error )
				break;

			/*
			 * Flush out delwri data.
			 */
			if (!(dp->ioflag & IO_IGNCACHE)) {
				off_t end = BBTOB(bmv.offset + bmv.length);
				VOP_FLUSHINVAL_PAGES(vp, (off_t)pushstart, ctob(btoc(end)) - 1,
					FI_NONE);
			}

			/* check for partial reads at end of file */
			if ( dlen & BBMASK ) {
				ASSERT(bp->b_flags & B_READ);
				trail = dlen;
				dlen &= ~BBMASK;
				dlen += BBSIZE;
			}

			/* trim back xfer */
			if ( dlen > count )
				dlen = count;

			/* get the information from disk */
			bps[i] = nbp = getphysbuf(bp->b_edev);
			nbp->b_flags = bp->b_flags;
			nbp->b_error = 0;
			nbp->b_blkno = dbn;
			nbp->b_bcount = dlen;
			nbp->b_un.b_addr = base;

			VOP_STRATEGY(ip->i_mount->m_devvp,nbp);
			if (error = geterror(nbp)) {
				biowait(nbp);
				nbp->b_flags = 0;
				putphysbuf(nbp);
				break;
			}

			/* correct for partial reads */
			if( trail )
				dlen = trail;

			base += dlen;
			offset += dlen;
			count -= dlen;
		}

		/* recover the buffers */
		for ( j = 0 ; j < i ; j++ ) {

			nbp = bps[j];
			biowait(nbp);

			/* check for an error */
			if ( !error )
				error = geterror(nbp);

			if ( !error && !resid ) {
				resid = nbp->b_resid;

				/* prevent adding up partial xfers */
				if( trail && (j == (i-1)) ) {
					/* correct for partial reads */
					if( resid <= nbp->b_bcount - trail )
						totxfer += trail;
				}
				else
					totxfer += nbp->b_bcount - resid;
			}

			nbp->b_flags = 0;
			putphysbuf(nbp);
		}
	}

	/* if any of the io's fail, the whole thing fails */
	if ( error ) {
		totxfer = 0;
		if (((bp->b_flags & B_READ) == 0) && !(vp->v_flag & VISSWAP))
			efs_itrunc(ip, o_size, 0);
	}

	bp->b_resid = totresid - totxfer;

	/* see if the file grew from writes */
	if ( (bp->b_flags & B_READ) == 0 ) {
		timespec_t tv;

		if ((ip->i_mode & (ISUID|ISGID)) &&
		    !cap_able_cred(dp->cr, CAP_FSETID)) {
			ip->i_mode &= ~ISUID;
			if (ip->i_mode & (IEXEC >> 3))
				ip->i_mode &= ~ISGID;
		}
		nanotime_syscall(&tv);
		ip->i_flags |= IMOD;
		ip->i_mtime = ip->i_ctime = tv.tv_sec;
		ip->i_umtime = tv.tv_nsec;
	}

	bioerror(bp,error);
	biodone(bp);

	/* make the compiler happy */
	return 0;
}

static int
efs_diordwr(bhv_desc_t *bdp, struct uio *uiop, struct cred *cr, int ioflag,
	    uint64_t dirflg)
{
	struct inode *ip = bhvtoi(bdp);
	struct dio_s dp;
	buf_t *bp;
	int error;

	/* special case caused by trailing reads */
	if (dirflg & B_READ ){
		if (ip->i_size == uiop->uio_offset)
			return 0;
	}

	/* do alignment checks */
	if (((__psint_t)uiop->uio_iov->iov_base & (FDIRIOALIGN-1))
	    || (uiop->uio_offset & BBMASK) || (uiop->uio_resid & BBMASK))
		return EINVAL;

	/* do maxio check */
	if (uiop->uio_resid > ctob(v.v_maxdmasz - 1))
		return EINVAL;

	bp = getphysbuf(ip->i_dev);

	/* save the info for later... */
	dp.bdp = bdp;
	dp.cr = cr;
	dp.ioflag = ioflag;

	bp->b_private = &dp;

	error = biophysio(diostrat, bp, bp->b_edev, dirflg,
			  (daddr_t)BTOBB(uiop->uio_offset), uiop);

	bp->b_flags = 0;
	putphysbuf(bp);

	return error;
}


#define	COPYOUT(bp,off,len,uio)	biomove(bp,off,len,UIO_READ,uio)
#define	COPYIN(bp,off,len,uio)	biomove(bp,off,len,UIO_WRITE,uio)

#define NREADIMAPS 4
static int
efs_readi(struct inode *ip,
	struct uio *uio,
	int ioflag,
	u_short type,
	struct cred *cr,
	struct flid *fl)
{
	register off_t offset;
	int error, n, i;
	struct vnode *vp;
	struct bmapval bmv[NREADIMAPS];
	int nmaps;
	struct buf *bp;
	timespec_t tv;

	ASSERT(type == IFREG || type == IFDIR || ISLINK(type) ||
	       type == IFSOCK);
	vp = itov(ip);
	offset = uio->uio_offset;

	/* check for locks if some exist and mandatory locking is enabled */
	if ((vp->v_flag & (VENF_LOCKING|VFRLOCKS)) ==
	    (VENF_LOCKING|VFRLOCKS)) {
		error = fs_checklock(vp, FREAD, offset, uio->uio_resid,
				     uio->uio_fmode, cr, fl, VRWLOCK_READ);
		if (error)
			return error;
	}

	if (EFS_INVALIDOFF(offset))
		return EINVAL;
	if (uio->uio_resid <= 0)
		return 0;

	/*
	 * Do the following only for writeable file systems.
	 * This closes a POSIX conformance bug which says that a read on a
	 * file in a read-only file system should not update its access
	 * time. Also, logically, there's no point in updating the atime
	 * as it is never going to be written back to disk.
	 */
	if (!(itovfs(ip)->vfs_flag & VFS_RDONLY)) {
		nanotime_syscall(&tv);
		ip->i_flags |= IMOD;
		ip->i_atime = tv.tv_sec;
	}
	switch (type) {
	  case IFLNK:
	  case IFCHRLNK:
	  case IFBLKLNK:
		/* in-line sym link? */
		if (ip->i_numextents == 0) {
			ASSERT(ip->i_size <= EFS_MAX_INLINE);

			/* paranoia when asserts are gone... */
			n = MIN(ip->i_size, EFS_MAX_INLINE);
			if ((n -= uio->uio_offset) <= 0) {
				error = 0;
				break;
			}
			n = MIN(uio->uio_resid, n);

			error = uiomove((char *)ip->i_extents, n, UIO_READ,
								uio);
			break;
		}
		/* fall through */
	  case IFDIR:
		do {
			nmaps = 2;
			error = efs_bmap(itobhv(ip), uio->uio_offset, uio->uio_resid,
					 B_READ, cr, bmv, &nmaps);
			if (error || bmv[0].pbsize == 0)
				break;
			ASSERT(bmv[0].bn >= 0);

			if (nmaps > 1)
				bp = breada(bmv[0].pbdev,
					bmv[0].bn, bmv[0].length,
					bmv[1].bn, bmv[1].length);
			else
				bp = bread(bmv[0].pbdev,
					   bmv[0].bn, bmv[0].length);

			if (bp->b_flags & B_ERROR)
				error = bp->b_error;
			else if (bp->b_resid)
				n = 0;
			else {
				n = bmv[0].pbsize;
				error = COPYOUT(bp, bmv[0].pboff, n, uio);
			}
			brelse(bp);

		} while (!error && uio->uio_resid != 0 && n != 0);
		break;

	case IFREG:
	    	if (ioflag & IO_RSYNC) {
			/* First we sync the data */
		    if ((ioflag & IO_SYNC) || (ioflag & IO_DSYNC)) {
			VOP_FLUSH_PAGES(vp, (off_t)0, ip->i_size - 1, 0, FI_NONE, error );
			error = 0;
		    }
		    if ((ip->i_remember < ip->i_size) || (ioflag & IO_SYNC)) {
			ip->i_flags |= ISYN;
			ip->i_remember = ip->i_size;
			efs_iupdat(ip);
		    }
		}

		if (ioflag & IO_DIRECT) {
			error = efs_diordwr(itobhv(ip), uio, cr, ioflag,
					    B_READ);
			break;
		}

		do {
			nmaps = NREADIMAPS;
			error = efs_bmap(itobhv(ip), uio->uio_offset, uio->uio_resid,
					 B_READ, cr, bmv, &nmaps);

			if (error || (n = bmv[0].pbsize) == 0)
				break;

			/*
			 * Pass on the policy modules from our caller
			 * to the chunk cache.
			 */
			for (i = 0; i < nmaps; i++) {
				bmv[i].pmp = uio->uio_pmp;
			}

			bp = chunkread(vp, bmv, nmaps, cr);

			if (bp->b_flags & B_ERROR)
				error = bp->b_error;
			else if (bp->b_resid)
				n = 0;
			else
				error = COPYOUT(bp, bmv[0].pboff, n, uio);

			brelse(bp);

		} while (!error && uio->uio_resid != 0 && n != 0);
		break;

	case IFSOCK:
		error = ENODEV;
	}
	return error;
}

extern int efs_inline;
static int
efs_writei(struct inode *ip,
	struct uio *uio,
	int ioflag,
	struct cred *cr,
	struct flid *fl)
{
	int		type, error, n, count, resid;
	struct vnode *	vp;
	register off_t	offset;
	struct bmapval	bmv;
	int		nmaps;
	struct buf *	bp;
	int		dotime = 0;
	off_t		limit;

	type = ip->i_mode & IFMT;
	ASSERT(type == IFREG || type == IFDIR || ISLINK(type) ||
	       type == IFSOCK);
	vp = itov(ip);
	offset = uio->uio_offset;
	count = uio->uio_resid;

	/* check for locks if some exist and mandatory locking is enabled */
	if ((vp->v_flag & (VENF_LOCKING|VFRLOCKS)) ==
	    (VENF_LOCKING|VFRLOCKS)) {
	    error = fs_checklock(vp, FWRITE, offset, count, uio->uio_fmode,
				 cr, fl, VRWLOCK_WRITE);
	    if (error)
		return error;
	}

	if ( EFS_INVALIDOFF(offset) || EFS_INVALIDOFF(offset + count) )
		return EINVAL;
	if (count <= 0)
		return 0;

	switch (type) {
	  case IFLNK:
	  case IFCHRLNK:
	  case IFBLKLNK:
		/*
		 * Create an in-line sym link iff there's room.
		 */
		ASSERT(offset == 0);
		ASSERT(ip->i_numextents == 0);
		if (efs_inline && count <= EFS_MAX_INLINE) {
			irealloc(ip, count);
			error = uiomove((char *)ip->i_extents, count,
					UIO_WRITE, uio);
			if (!error) {
				ip->i_size = count;
				dotime = 1;
			}
			break;
		}
	  case IFDIR:
		do {
			nmaps = 1;
			if (error = efs_bmap(itobhv(ip), uio->uio_offset,
					     uio->uio_resid, B_WRITE, cr,
					     &bmv, &nmaps))
				break;
			bp = ((n = bmv.pbsize) == bmv.bsize) ?
				getblk(bmv.pbdev, bmv.bn, bmv.length) :
				bread(bmv.pbdev, bmv.bn, bmv.length);

			if (error = COPYIN(bp, bmv.pboff, n, uio)) {
				brelse(bp);
				break;
			}

			if (uio->uio_offset > ip->i_size)
				ip->i_size = uio->uio_offset;
			dotime = 1;

			if ((ioflag & IO_SYNC) || (ioflag & IO_DSYNC))
				bwrite(bp);
			else
				bdwrite(bp);
		} while (uio->uio_resid != 0 && n != 0);
		break;

	  case IFREG:
		limit = MIN(uio->uio_limit, (off_t)EFS_MAX_FILE_OFFSET);
		n = (int)(limit - uio->uio_offset);
		if (n <= 0)
			return EFBIG;
		if (n < uio->uio_resid) {	/* only do partial write */
			resid = uio->uio_resid - n;
			uio->uio_resid = n;
		} else {
			resid = 0;
		}

		if (ioflag & IO_DIRECT) {
			error = efs_diordwr(itobhv(ip), uio, cr, ioflag,
					    B_WRITE);
			/* add back remainder of write */
			uio->uio_resid += resid;
			break;
		}

		do {
			nmaps = 1;
			if (error = efs_bmap(itobhv(ip), uio->uio_offset,
						uio->uio_resid, B_WRITE, cr,
						&bmv, &nmaps))
				break;

			/*
			 * We must bread the buffer if the write doesn't
			 * completely overwrite the buffer and the write
			 * either begins after the start of the buffer or
			 * ends before the current end of file.
			 */
			bmv.pmp = uio->uio_pmp;
			if ((n = bmv.pbsize) != bmv.bsize
			 && (bmv.pboff != 0 || uio->uio_offset != ip->i_size))
				bp = chunkread(vp, &bmv, 1, cr);
			else
				bp = getchunk(vp, &bmv, cr);

			if (bp->b_flags & B_ERROR) {
				error = bp->b_error;
				brelse(bp);
				break;
			}

			if (error = COPYIN(bp, bmv.pboff, n, uio)) {
				if (!(bp->b_flags & B_DONE))
					bp->b_flags |= B_STALE|B_DONE|B_ERROR;
				brelse(bp);
				break;
			}

			/*
			 * Update file size if COPYIN extended uio_offset.
			 */
			if (uio->uio_offset > ip->i_size) {
				ip->i_size = uio->uio_offset;
				ip->i_flags |= ITRUNC;
			}

			/*
			 * Mark inode modified and clear suid and sgid if
			 * not superuser.
			 */
			dotime = 1;
			if ((ip->i_mode & (ISUID|ISGID)) &&
			    !cap_able_cred(cr, CAP_FSETID)) {
				ip->i_mode &= ~ISUID;
				if (ip->i_mode & (IEXEC >> 3))
					ip->i_mode &= ~ISGID;
			}

			if ((ioflag & IO_SYNC) || (ioflag & IO_DSYNC))
				bwrite(bp);
			else
				bdwrite(bp);

		} while (uio->uio_resid != 0 && n != 0);

		uio->uio_resid += resid; /* add back remainder of write */
		break;

	case IFSOCK:
		error = ENODEV;
		break;
	}

	/*
	 * If we've already done a partial write, terminate
	 * the write but return no error.
	 */
	if (count != uio->uio_resid) {
		error = 0;
	}

	/*
	 * Set timestamps.  Don't put it off, we want the time to
	 * be reasonably accurate.
	 */
	if (dotime) {
		timespec_t tv;

		nanotime_syscall(&tv);
		ip->i_flags |= IMOD;
		ip->i_mtime = ip->i_ctime = tv.tv_sec;
		ip->i_umtime = tv.tv_nsec;
	}

	/*
	 * Update the inode only if inode changed.
	 * We set i_remember to i_size to ensure that the data
	 * written is actually permanent in the inode.
	 */
	if ((ioflag & (IO_SYNC | IO_DSYNC)) &&
	    (ip->i_flags & ITRUNC) &&
	    !(vp->v_flag & VISSWAP) &&
	    !error) {
		ip->i_flags |= ISYN;
		ip->i_remember = ip->i_size;
		error = efs_iupdat(ip);
	}

	return error;
}


/* ARGSUSED */
static int
efs_ioctl(
	bhv_desc_t *bdp,
	int cmd,
	void *arg,
	int flag,
	struct cred *cr,
	int *rvalp,
        struct vopbd *vbds)
{
	return ENOTTY;
}

/* ARGSUSED */
static int
efs_getattr(bdp, vap, flags, cr)
	bhv_desc_t *bdp;
	struct vattr *vap;
	int flags;
	struct cred *cr;
{
	vnode_t *vp = BHV_TO_VNODE(bdp);
	struct inode *ip;
	u_short type;

	ip = bhvtoi(bdp);

	vap->va_size = ip->i_size;
	if (vap->va_mask == AT_SIZE)
		return 0;
	vap->va_fsid = ip->i_dev;
	vap->va_nodeid = ip->i_number;
	vap->va_nlink = ip->i_nlink;
	vap->va_gencount = ip->i_gen;
	if (!(vap->va_mask & ~(AT_FSID|AT_NODEID|AT_NLINK|AT_GENCOUNT|AT_SIZE)))
		return 0;

	/*
	 * POSIX stat etc. require that any pending update flags
	 * be dealt with and cleared upon return from stat.
	 * Since we defer updating the inode on setting these flags
	 * we must pay now.  Rather than really go through the
	 * entire efs_iupdat, we simply get the times up to date.
	 * This emulates the setattr code below.
	 */
	if (ip->i_flags & (IACC|IUPD|ICHG)) {
		timespec_t tv;

		nanotime_syscall(&tv);
		ilock(ip);
		if (ip->i_flags & IACC)
			ip->i_atime = tv.tv_sec;
		if (ip->i_flags & IUPD) {
			ip->i_mtime = tv.tv_sec;
			ip->i_umtime = tv.tv_nsec;
		}
		if (ip->i_flags & ICHG)
			ip->i_ctime = tv.tv_sec;
		ip->i_flags &= ~(IACC|IUPD|ICHG);
		ip->i_updtimes = 0;
		ip->i_flags |= IMOD;
		iunlock(ip);
	} else if (ip->i_updtimes) {
		ilock(ip);
		ip->i_updtimes = 0;
		ip->i_flags |= IMOD;
		iunlock(ip);
	}

	/*
	 * Copy from in-core inode.
	 */
	vap->va_type = vp->v_type;
	vap->va_mode = ip->i_mode & MODEMASK;
	vap->va_uid = ip->i_uid;
	vap->va_gid = ip->i_gid;
	vap->va_vcode = ip->i_vcode;
        if (vp->v_type == VCHR || vp->v_type == VBLK)
                vap->va_rdev = ip->i_rdev;
        else
                vap->va_rdev = 0;       /* not a b/c spec. */
	vap->va_atime.tv_sec = ip->i_atime;
	vap->va_atime.tv_nsec = 0;
	vap->va_mtime.tv_sec = ip->i_mtime;
	vap->va_mtime.tv_nsec = ip->i_umtime;
	vap->va_ctime.tv_sec = ip->i_ctime;
	vap->va_ctime.tv_nsec = 0;

	type = ip->i_mode & IFMT;
	switch (type) {
	  case IFBLK:
	  case IFCHR:
		vap->va_blksize = BLKDEV_IOSIZE;
		break;

	  case IFCHRLNK:
	  case IFBLKLNK:
		vap->va_rdev = HWGRAPH_STRING_DEV;
		vap->va_blksize = BLKDEV_IOSIZE;
		break;
	  default:
		vap->va_blksize = 1 << itoefs(ip)->fs_lbshift;
	}
	vap->va_nblocks = BTOBB(ip->i_size);
	vap->va_xflags = 0;
	vap->va_extsize = 0;
	vap->va_nextents = 0;
	vap->va_anextents = 0;
	vap->va_projid = 0;
	return 0;
}

static int
efs_setattr(bdp, vap, flags, cr)
	bhv_desc_t *bdp;
	struct vattr *vap;
	int flags;
	struct cred *cr;
{
	vnode_t *vp = BHV_TO_VNODE(bdp);
	int mask;
	struct inode *ip;
	int error;
	timespec_t tv;
	int mandlock_before, mandlock_after;
	int file_owner;

	/*
	 * Cannot set certain attributes.
	 */
	mask = vap->va_mask;
	if (mask & AT_NOSET)
		return EINVAL;

	ip = bhvtoi(bdp);

	if (mask & AT_UPDTIMES) {
		ASSERT((mask & ~AT_UPDTIMES) == 0);
		nanotime_syscall(&tv);
		if (mask & AT_UPDATIME)
			ip->i_atime = tv.tv_sec;
		if (mask & AT_UPDCTIME)
			ip->i_ctime = tv.tv_sec;
		if (mask & AT_UPDMTIME) {
			ip->i_mtime = tv.tv_sec;
			ip->i_umtime = tv.tv_nsec;
		}
		ip->i_updtimes = 1;
		return 0;
	}

	ilock(ip);
	error = 0;

	/* determine whether mandatory locking mode changes */
	mandlock_before = MANDLOCK(vp, ip->i_mode);

	file_owner = (cr->cr_uid == ip->i_uid);

	if (mask & (AT_MODE|AT_UID|AT_GID)) {
		/*
		 * CAP_FOWNER overrides the following restrictions:
		 *
		 * The user ID of the calling process must be equal
		 * to the file owner ID, except in cases where the
		 * CAP_FSETID capability is applicable.
		 */
		if (!file_owner && !cap_able_cred(cr, CAP_FOWNER)) {
			error = EPERM;
			goto out;
		}
	}

	/*
	 * Change file access modes.  Must be owner or privileged.
	 */
	if (mask & AT_MODE) {
		mode_t m = 0;

		/*
		 * CAP_FSETID overrides the following restrictions:
		 *
		 * The effective user ID of the calling process shall match
		 * the file owner when setting the set-user-ID and
		 * set-group-ID bits on that file.
		 *
		 * The effective group ID or one of the supplementary group
		 * IDs of the calling process shall match the group owner of
		 * the file when setting the set-group-ID bit on that file
		 */
		if ((vap->va_mode & ISUID) && !file_owner)
			m |= ISUID;
		if ((vap->va_mode & ISGID) &&
		    !groupmember(ip->i_gid, cr))
			m |= ISGID;
		if ((vap->va_mode & ISVTX) && vp->v_type != VDIR)
			m |= ISVTX;
		if (m && !cap_able_cred(cr, CAP_FSETID))
			vap->va_mode &= ~m;

		ip->i_mode &= IFMT;
		ip->i_mode |= vap->va_mode & ~IFMT;
		ip->i_flags |= ICHG;
	}


	/*
	 * Change file ownership.  Must be the owner or privileged.
	 * If the system was configured with the "restricted_chown"
	 * option, the owner is not permitted to give away the file,
	 * and can change the group id only to a group of which he
	 * or she is a member.
	 */
	if (mask & (AT_UID|AT_GID)) {
		uid_t uid = (mask & AT_UID) ? vap->va_uid : ip->i_uid;
		gid_t gid = (mask & AT_GID) ? vap->va_gid : ip->i_gid;

		/* Prevent long uids from being silently truncated to 16bits */
		if (uid > 0xffff || gid > 0xffff)
		{
		    	error = EOVERFLOW;
			goto out;
		}

		/*
		 * CAP_CHOWN overrides the following restrictions:
		 *
		 * If _POSIX_CHOWN_RESTRICTED is defined, this capability
		 * shall override the restriction that a process cannot
		 * change the user ID of a file it owns and the restriction
		 * that the group ID supplied to the chown() function
		 * shall be equal to either the group ID or one of the
		 * supplementary group IDs of the calling process.
		 */
		if (restricted_chown &&
		    (ip->i_uid != uid || (ip->i_gid != gid &&
					  !groupmember(gid, cr))) &&
		    !cap_able_cred(cr, CAP_CHOWN)) {
			error = EPERM;
			goto out;
		}

		/*
		 * CAP_FSETID overrides the following restrictions:
		 *
		 * The set-user-ID and set-group-ID bits of a file will be
		 * cleared upon successful return from chown()
		 */
		if ((ip->i_mode & (ISUID|ISGID)) &&
		    !cap_able_cred(cr, CAP_FSETID)) {
			ip->i_mode &= ~(ISUID|ISGID);
		}

		if (ip->i_uid == uid) {
			/*
			 * XXX This won't work once we have group quotas
			 */
			ip->i_gid = gid;
		} else {
			long change = BTOBB(ip->i_size);

#ifdef _SHAREII
			if ((error = SHR_CHOWNDISK
					(
						itovfs(ip),
						ip->i_uid,
						vap->va_uid,
						(u_long)ip->i_blocks,
						DEV_BSIZE,
						cr
					)
			     )
			   )
				goto out;
#endif /* _SHAREII */
			/*
			 * We force the changes to the quota structure, hence we
			 * cannot fail because of want of space! Kludgy.
			 */
			(void) qt_chkdq(ip, -change, 1, NULL);
			(void) qt_chkiq(ip->i_mount, ip, (u_int)ip->i_uid, 1);
			qt_dqrele(ip->i_dquot);
			ip->i_uid = uid;
			ip->i_gid = gid;
			ip->i_dquot = qt_getinoquota(ip);
			(void) qt_chkdq(ip, change, 1, NULL);
			(void) qt_chkiq(ip->i_mount, (struct inode *)NULL,
					(u_int)ip->i_uid, 1);
		}
		ip->i_flags |= ICHG;
	}

	/*
	 * Truncate file.  Must have write permission and not be a directory.
	 */
	if (mask & AT_SIZE) {
		if (vp->v_type == VDIR) {
			error = EISDIR;
			goto out;
		} else if (vp->v_type != VREG) {
			error = EINVAL;
			goto out;
		}
		if (vp->v_flag & VISSWAP) {
			error = EACCES;
			goto out;
		}
		if (!(mask & AT_SIZE_NOPERM)) {
			if (error = efs_iaccess(ip, IWRITE, cr))
				goto out;
		}
		/* must UPD|CHG even though efs_itrunc may not do anything */
		ip->i_flags |= IUPD|ICHG;
		if (error = efs_itrunc(ip, (scoff_t)vap->va_size, 0))
			goto out;
	}

	/*
	 * Change file access or modified times.
	 */
	if (mask & (AT_ATIME|AT_MTIME)) {
		/*
		 * We turn off I* bits to denote that our 'vap' time
		 * not the current time is the most up-to-date
		 * We turn on IMOD to be sure that sooner or later
		 * the inode will still get pushed. Future access, mod, changes
		 * will simply turn on the respective I* bit and overwrite our
		 * value
		 */
		if (!file_owner && !cap_able_cred(cr, CAP_FOWNER)) {
			if (error = (flags & ATTR_UTIME) ?
				    EPERM : efs_iaccess(ip, IWRITE, cr))
				goto out;
		}
		/*
		 * since utime() always updates both mtime and atime
		 * ctime will always be set, as it need to be so there
		 * no reason to set ICHG
		 */
		ip->i_flags |= IMOD;
		if (mask & AT_ATIME) {
			ip->i_atime = vap->va_atime.tv_sec;
			ip->i_flags &= ~IACC;
		}
		if (mask & AT_MTIME) {
			nanotime_syscall(&tv);
			ip->i_mtime = vap->va_mtime.tv_sec;
			ip->i_umtime = vap->va_mtime.tv_nsec;
			ip->i_ctime = tv.tv_sec;
			ip->i_flags &= ~(IUPD|ICHG);
		}
	}

out:
	if (!error && (flags & (ATTR_EXEC|ATTR_LAZY)) == 0 &&
			(ip->i_flags & (IACC|IUPD|ICHG|IMOD))) {
		/* XXXjwag ordering issue w.r.t delwri */
		/* XXXjwag - why do we really have to call iupdat here?? */
		IGETINFO.ig_attrchg++;
		error = efs_iupdat(ip);
	}

	/*
	 * If the (regular) file's mandatory locking mode changed, then
	 * notify the vnode.  We do this under the inode lock to prevent
	 * racing calls to vop_vnode_change.
	 */
	mandlock_after = MANDLOCK(vp, ip->i_mode);
	if (mandlock_before != mandlock_after) {
		VOP_VNODE_CHANGE(vp, VCHANGE_FLAGS_ENF_LOCKING,
				 mandlock_after);
	}

	iunlock(ip);

	return error;
}

/*
 * This function knows that vnode mode bits are really inode mode bits.
 */
/* ARGSUSED */
static int
efs_access(bdp, mode, cr)
	bhv_desc_t *bdp;
	int mode;
	struct cred *cr;
{
	struct inode *ip;
	int error;

	ip = bhvtoi(bdp);
	ilock(ip);
	error = efs_iaccess(ip, mode, cr);
	iunlock(ip);
	return error;
}

/* ARGSUSED */
static int
efs_readlink(
	bhv_desc_t *bdp,
	struct uio *uiop,
	struct cred *cr)
{
	struct inode *ip;
	int error;
	u_short type;


	ip = bhvtoi(bdp);
	type = ip->i_mode & IFMT;

	if (!ISLINK(type))
		return EINVAL;

	ilock(ip);
	error = efs_readi(ip, uiop, 0, type, cr, NULL);
	iunlock(ip);
	return error;
}

/* ARGSUSED */
static int
efs_fsync(bdp, flag, cr, start, stop)
	bhv_desc_t *bdp;
	int flag;
	struct cred *cr;
	off_t start;
	off_t stop;
{
	vnode_t *vp = BHV_TO_VNODE(bdp);
	struct inode *ip;
	int error = 0;

	ip = bhvtoi(bdp);
	ilock(ip);
	if (flag & FSYNC_INVAL) {
		if (ip->i_flags & IINCORE && ip->i_numextents > 0) {
			struct extent *ex = &ip->i_extents[ip->i_numextents-1];
			VOP_FLUSHINVAL_PAGES(vp,0,BBTOB(ex->ex_offset+ex->ex_length) - 1,
				FI_REMAPF_LOCKED);
		}
	} else {
		VOP_FLUSH_PAGES(vp, (off_t)0, ip->i_size - 1,
			(flag & FSYNC_WAIT) ? 0 : B_ASYNC, FI_NONE, error);
		error = 0;
	}
	if (!(flag & FSYNC_DATA) ||
	    (((ip->i_mode & IFMT) == IFREG) &&
	     (ip->i_remember < ip->i_size))) {
		if (flag & FSYNC_WAIT)
			ip->i_flags |= ISYN;
		/*
		 * Since we just flushed all the data in the file, so ahead
		 * and bump i_remember all the way up to i_size.  This will
		 * ensure that all of our data blocks are permanent.
		 */
		ip->i_remember = ip->i_size;
		error = efs_iupdat(ip);
	}
	iunlock(ip);
	return error;	/* XXX should start all and sleep on v_sync */
}

/* ARGSUSED */
static int
efs_inactive(bdp, cr)
	bhv_desc_t *bdp;
	struct cred *cr;
{
	iinactive(bhvtoi(bdp));
	return VN_INACTIVE_CACHE;
}

/*
 * Unix file system operations having to do with directory manipulation.
 */
/* ARGSUSED */
static int
efs_lookup(bdp, nm, vpp, pnp, flags, rdir, cr)
	bhv_desc_t *bdp;
	char *nm;
	struct vnode **vpp;
	struct pathname *pnp;
	int flags;
	struct vnode *rdir;
	struct cred *cr;
{
	vnode_t *dvp = BHV_TO_VNODE(bdp);
	struct inode *dp, *ip;
	struct entry ent;
	int error;
	struct vnode *vp, *newvp;

	if (dvp->v_type != VDIR)
		return ENOTDIR;
	dp = bhvtoi(bdp);
	ilock(dp);
	error = efs_dirlookup(dp, nm, pnp, DLF_IGET|DLF_MUSTHAVE, &ent, cr);
	iunlock(dp);
	if (error)
		return error;
	ip = ent.e_ip;
	vp = itov(ip);
#ifdef _IRIX_LATER
	if ((ip->i_mode & ISVTX) && !(ip->i_mode & (IEXEC | IFDIR))
	    && efs_stickyhack) {
		VN_FLAGSET(vp, VISSWAP);
	}
#endif
	if (ip != dp)
		iunlock(ip);
	/*
	 * If vnode is a device return special vnode instead.
	 */
	if (ISVDEV(vp->v_type)) {
		newvp = spec_vp(vp, vp->v_rdev, vp->v_type, cr);

		VN_RELE(vp);
		if (newvp == NULL)
			return ENOSYS;
		vp = newvp;
	}

	*vpp = vp;
	return 0;
}

static int
efs_create(
	bhv_desc_t *bdp,
	char *name,
	struct vattr *vap,
	int flags,
	int mode,
	struct vnode **vpp,
	struct cred *cr)
{
	struct inode *dp, *ip;
	int error;
#ifdef CELL
	int truncated = 0;
#endif
	struct entry ent;
	struct vnode *vp, *newvp;

	if (*vpp) {
		VN_RELE(*vpp);
		*vpp = NULL;
	}

	dp = bhvtoi(bdp);
	ip = NULL;
	ilock(dp);
	if (error = efs_dirlookup(dp, name, NULL, DLF_IGET, &ent, cr))
		goto bad;

	/*
	 * If no entry was found, allocate an inode and enter it in dp.
	 * If an entry already exists and this is a non-exclusive create,
	 * check permissions and allow access for non-directory inodes.
	 * Read-only create of an existing directory is also allowed.
	 * Fail an exclusive create of anything which already exists.
	 */
	ip = ent.e_ip;
	if (ip == NULL) {
		if (error = efs_iaccess(dp, IWRITE, cr))
			goto bad;
		/*
		 * XPG4 says create cannot allocate a file if the
		 * file size limit is set to 0.
		 */
		if (flags & VZFS) {
			error = EFBIG;
			goto bad;
		}
		if (error = efs_ialloc(dp, MAKEIMODE(vap->va_type,vap->va_mode),
				       1, (vap->va_mask & AT_RDEV) ?
					   vap->va_rdev : NODEV, &ip, cr)) {
			goto bad;
		}

		if (error = efs_direnter(dp, ip, &ent, cr)) {
			ip->i_nlink = 0;
			ip->i_flags |= ICHG;
			goto bad;
		}
		vp = itov(ip);
	} else {
		vp = itov(ip);
		if (flags & VEXCL)
			error = EEXIST;
		else if (vp->v_type == VDIR && (mode & IWRITE))
			error = EISDIR;
		else if (mode)
			error = efs_iaccess(ip, mode, cr);
		if (!error && vp->v_type == VREG && (vap->va_mask & AT_SIZE)) {
			/*
			 * Truncate regular file, if requested by caller.
			 * POSIX requires the time stamps be updated
			 * regardless of whether file actually changes.
			 */
			ip->i_flags |= IUPD|ICHG;
			error = efs_itrunc(ip, (scoff_t)vap->va_size, 0);
#ifdef CELL
			truncated = 1;
#endif
		}
		if (error)
			goto bad;
	}
	iunlock(dp);
	if (ip != dp)
		iunlock(ip);

#ifdef CELL
	if (truncated)
		VOP_VNODE_CHANGE(vp, VCHANGE_FLAGS_TRUNCATED, 0);
#endif
	/*
	 * If vnode is a device, return special vnode instead.
	 */
	if (ISVDEV(vp->v_type)) {
		newvp = spec_vp(vp, vp->v_rdev, vp->v_type, cr);

		VN_RELE(vp);
		if (newvp == NULL)
			return ENOSYS;
		vp = newvp;
	}

	*vpp = vp;
	return 0;

bad:
	iunlock(dp);
	if (ip) {
		if (ip == dp)
			irele(ip);
		else
			iput(ip);
	}

	return error;
}

/*
 * Bump ip's link count and update the disk inode.  Let the link count
 * overflow MAXLINK so our caller can unconditionally recover by calling
 * efs_droplink.
 */
static int
efs_bumplink(struct inode *ip)
{
	ASSERT(ip->i_lockid == get_thread_id());
	ASSERT(ip->i_nlink >= 0);

	if (ip->i_nlink++ >= MAXLINK)
		return EMLINK;
	ip->i_flags |= ICHG;
	return efs_iupdat(ip);
}

/*
 * Drop ip's link count and flag it for later update.  Too many links
 * can be fixed by fsck; too few and a directory may be left pointing
 * at an unallocated inode.
 */
static void
efs_droplink(struct inode *ip)
{
	ASSERT(ip->i_lockid == get_thread_id());
	ASSERT(ip->i_nlink > 0);

	ip->i_nlink--;
	ip->i_flags |= ICHG;
}

/* ARGSUSED */
static int
efs_remove(
	bhv_desc_t *bdp,
	char *nm,
	struct cred *cr)
{
	struct inode *dp, *ip;
	int error;
	struct entry ent;
	int link_removed = 0;

	dp = bhvtoi(bdp);
	ilock(dp);
	error = efs_dirlookup(dp, nm, NULL, DLF_IGET|DLF_MUSTHAVE|DLF_REMOVE,
			      &ent, cr);
	if (error) {
		iunlock(dp);
		return error;
	}

	ip = ent.e_ip;
	if (itov(ip)->v_vfsmountedhere)
		error = EBUSY;
	else if ((ip->i_mode & IFMT) == IFDIR)
		error = EPERM;
	else {
		error = efs_dirremove(dp, &ent, cr);
		if (!error) {
			efs_droplink(ip);
			link_removed = 1;
			error = efs_iupdat(ip);
		}
	}
	if (ip != dp)
		iunlock(ip);
	iunlock(dp);

	if (link_removed) {
		/*
		 * Let interposed file systems know about removed links.
		 */
		VOP_LINK_REMOVED(itov(ip), itov(dp), (ip)->i_nlink==0);
	}
	irele(ip);

	return error;
}

/*
 * Link a file or a directory.  Only the superuser is allowed to make a
 * link to a directory.  Take pains to increment the source inode's link
 * count and update it before entering it in the target directory.
 */
static int
efs_link(
	bhv_desc_t *tbdp,
	struct vnode *svp,
	char *tnm,
	struct cred *cr)
{
	struct vnode *realvp;
	struct inode *tdp, *sip;
	struct entry ent;
	int error;
	bhv_desc_t *src_bdp;
	vn_bhv_head_t *src_bhp;

	VOP_REALVP(svp, &realvp, error);
	if (error == 0)
		svp = realvp;
	if (svp->v_type == VDIR)
		return EPERM;
	/*
	 * For now, manually find the EFS behavior descriptor for
	 * the source vnode.  If it doesn't exist then something
	 * is wrong and we should just return an error.
	 * Eventually we need to figure out how link is going to
	 * work in the face of stacked vnodes.
	 */
	src_bhp = VN_BHV_HEAD(svp);
	src_bdp = vn_bhv_lookup_unlocked(src_bhp, &efs_vnodeops);
	if (src_bdp == NULL) {
		return EXDEV;
	}
	sip = bhvtoi(src_bdp);
	ilock(sip);
	error = efs_bumplink(sip);
	iunlock(sip);
	if (!error) {
		tdp = bhvtoi(tbdp);
		ilock(tdp);
		error = efs_dirlookup(tdp, tnm, NULL, DLF_ENTER|DLF_EXCL,
				      &ent, cr);
		if (!error)
			error = efs_direnter(tdp, sip, &ent, cr);
		iunlock(tdp);
	}
	if (error) {
		ilock(sip);
		efs_droplink(sip);
		iunlock(sip);
	}
	return error;
}

/*
 * Rename the file named by snm in source directory sdvp to tnm in tdvp.
 * We can't do two-phase commit without extra state in the inode, but we
 * can guarantee that tnm exists throughout the operation.  Unlock the
 * source inodes to avoid deadlock (this means the source entry can be
 * unlinked while we're working).  Keep the target directory locked from
 * lookup through enter (rewrite).
 *
 * Sketch:
 *
 *	1.  Bump the source inode's link count right away to keep it
 *	    from being unlinked while it is unlocked.
 *
 *	2.  Link the source inode into the target directory.  If the
 * 	    target exists, rewrite its entry in-place (efs_direnter uses
 *	    the offset discovered by efs_dirlookup; the target directory
 *	    must remain locked across lookup and enter).  If the source
 *	    is a directory and it moved to a different parent, rewrite
 *	    its ".." entry to point at the target directory.
 *
 *	3.  Unlink the source directory entry, if it's still around.
 *	    When renaming one hard link over another link to the same
 *	    inode, only steps 1 and 3 are executed.
 */
/* ARGSUSED */
static int
efs_rename(
	bhv_desc_t *sbdp,		/* old (source) parent vnode */
	char *snm,			/* old (source) entry name */
	struct vnode *tdvp,		/* new (target) parent vnode */
	char *tnm,			/* new (target) entry name */
	struct pathname *tpnp,		/* new (target) pathname or null */
	struct cred *cr)
{
	int error, dflag;		/* error and efs_dirisempty result */
	int directory;			/* simple flags, see below */
	struct inode *sdp, *tdp;	/* source and target directories */
	struct inode *sip, *tip;	/* source and target inodes */
	struct entry sent, tent;	/* source and target entries */
	efs_ino_t newparent;		/* inumber of new parent directory */
	int tip_dropped = 0;		/* tip link dropped? */
	int tdp_dropped = 0;		/* tdp link dropped? */
	int sip_dropped = 0;		/* sip link dropped? */
	int sdp_dropped = 0;		/* sdp link dropped? */
	bhv_desc_t *tdbdp;

	/*
	 * Lookup the source inode (again -- it's a shame we can't keep
	 * a handle on what rename has already looked up).  Increment its
	 * link count and update it on disk right now, to prevent someone
	 * else from removing it behind our back.
	 */
	sdp = bhvtoi(sbdp);
	ilock(sdp);
	error = efs_dirlookup(sdp, snm, NULL, DLF_IGET|DLF_MUSTHAVE|DLF_REMOVE,
			      &sent, cr);
	iunlock(sdp);
	if (error)
		return error;
	sip = sent.e_ip;
	if (sip == sdp) {
		irele(sip);
		return EINVAL;
	}
	directory = ((sip->i_mode & IFMT) == IFDIR);
	error = efs_bumplink(sip);
	iunlock(sip);

	/*
	 * 1.  Lock target directory, check for an efs_bumplink error, and
	 * then lookup the target name, in case an inode is already linked
	 * under it in tdp.  Tell efs_dirlookup to check for permission to
	 * unlink as well as permission to enter.
	 *
	 * Find the EFS behavior descriptor for the target directory
	 * vnode since it was not handed to us.
	 */
	tdbdp = vn_bhv_lookup_unlocked(VN_BHV_HEAD(tdvp), &efs_vnodeops);
	if (tdbdp == NULL) {
		return EXDEV;
	}
	tdp = bhvtoi(tdbdp);
	tip = NULL;
	ilock(tdp);
	if (error)
		goto bad;
	if (error = efs_dirlookup(tdp, tnm, NULL, DLF_IGET|DLF_ENTER|DLF_REMOVE,
				  &tent, cr))
		goto bad;
	tip = tent.e_ip;
	if (tip == tdp) {
		error = EINVAL;
		goto bad;
	}
	ASSERT(!(sent.e_flags & PN_ISDOT) && !(tent.e_flags & PN_ISDOT));

	/*
	 * Source and target are identical.
	 */
	if (sip == tip) {
		ASSERT(sip != sdp);
		error = 0;	/* no-op */
		goto bad;
	}

	/*
	 * Directory rename requires special error checks.  We do not
	 * rely on the system call layer to check these cases, because
	 * there may be novel system call layers like the NFS server,
	 * which should not all have to do the same checks.
	 */
	newparent = 0;
	if (directory) {
		/*
		 * Renaming ".." is illegal.
		 */
		if ((sent.e_flags & PN_ISDOTDOT)
		    || (tent.e_flags & PN_ISDOTDOT)) {
			error = EINVAL;
			goto bad;
		}

		/*
		 * Check whether this rename would orphan the tree rooted at
		 * sip by moving it under itself.  Note that efs_notancestor
		 * unlocks tdp, so we must lookup tip again afterwards.  All
		 * calls to efs_notancestor go single-file through a monitor,
		 * to ensure that "mv /a/b /c/d/b2" won't lose the race with
		 * "mv /c/d /a/b/d2", resulting in "mv /a/b /a/b/d2/b2" and
		 * disconnection of the tree at /a/b.
		 */
		if (sdp != tdp) {
			newparent = tdp->i_number;
			if (tip) {
				iput(tip);
				tip = NULL;
			}
			if (error = efs_notancestor(sip, tdp, cr))
				goto bad;
			if (error = efs_dirlookup(tdp, tnm, NULL,
						  DLF_IGET|DLF_ENTER|DLF_REMOVE,
						  &tent, cr))
				goto bad;
			tip = tent.e_ip;
		}
	}

	if (tip == NULL) {
		/*
		 * If no target exists and the rename crosses directories,
		 * adjust the target directory link count to include the new
		 * ".." reference being added.
		 */
		if (newparent && (error = efs_bumplink(tdp)))
			goto bad;
		if (error = efs_direnter(tdp, sip, &tent, cr)) {
			if (newparent) {
				efs_droplink(tdp);
				tdp_dropped = 1;
			}
			goto bad;
		}
	} else {
		/*
		 * If target exists and it's a directory, check that both
		 * target and source are directories and that target can be
		 * destroyed, or that neither is a directory.
		 */
		if ((tip->i_mode & IFMT) == IFDIR) {
			if ((error = efs_dirisempty(tip, &dflag, cr))
			    || tip->i_nlink > 2) {
				if (error == ENOTEMPTY)
					error = EEXIST;	/* XXX */
				goto bad;
			}
			if (!directory) {
				error = EISDIR;
				goto bad;
			}
			if (itov(tip)->v_vfsmountedhere) {
				error = EBUSY;
				goto bad;
			}
		} else {
			if (directory) {
				error = ENOTDIR;
				goto bad;
			}
		}

		/*
		 * Purge all name cache references to the old target.
		 */
		dnlc_purge_vp(itov(tip));

		/*
		 * 2.  Link the source inode under the target name.  This
		 * is atomic, but if the source inode is a directory, and
		 * if the rename isn't local to a directory, the source's
		 * ".." entry will be inconsistent till the efs_dirinit().
		 * Now that the target entry has been rewritten, drop the
		 * old target's link count.
		 */
		if (error = efs_dirrewrite(tdp, sip, &tent, cr))
			goto bad;
		efs_droplink(tip);
		tip_dropped = 1;

		if (directory && (dflag & DIR_HASDOT)) {
			/*
			 * If the source is a directory and the target
			 * existed already, drop the target's link count
			 * again to deallocate it.
			 */
			efs_droplink(tip);
		}
	}

	iunlock(tdp);
	if (tip) {
		iunlock(tip);
		/* tell interposed file systems about removed links */
		if (tip_dropped)
			VOP_LINK_REMOVED(itov(tip), tdvp, (tip)->i_nlink==0);
		irele(tip);
	}

	/*
	 * 3.  Finally, remove the source.  Since sdp and sip have
	 * been unlocked, someone else may have already unlinked sip,
	 * so we ignore ENOENT.  If we're moving an inode over top of
	 * one of its hard links, remember to drop the link count we
	 * added in step 1.  Also remember to drop the source dir's
	 * link count if renaming a directory to a new parent.
	 */
	ilock(sdp);
	ilock(sip);

	error = efs_dirlookup(sdp, snm, NULL, DLF_REMOVE, &sent, cr);
	if (error == ENOENT)
		error = 0;
	else if (!error) {
		if (sent.e_inum != sip->i_number) {
			if (directory)
				panic("rename: lost directory");
		} else {
			if (newparent)
				error = efs_dirinit(sip, newparent, cr);
			if (!error && (newparent ||
				       (directory && tip != NULL))) {
				efs_droplink(sdp);
				sdp_dropped = 1;
			}
			if (!error) {
				error = efs_dirremove(sdp, &sent, cr);
				if (!error) {
					efs_droplink(sip);
					sip_dropped = 1;
				}
			}
		}
	}
	iunlock(sip);
	iunlock(sdp);

	/* tell interposed file systems about removed links */
	if (sdp_dropped)
		VOP_LINK_REMOVED(itov(sdp), itov(sip), (sdp)->i_nlink==0);
	if (sip_dropped)
		VOP_LINK_REMOVED(itov(sip), itov(sdp), (sip)->i_nlink==0);
	irele(sip);
	return error;

bad:
	/*
	 * Release old target inode if any and unlock target directory.
	 * Restore source's link count and iput it.
	 */
	if (tip) {
		if (tip == tdp)
			irele(tip);
		else
			iput(tip);
	}
	iunlock(tdp);
	if (tdp_dropped)
		VOP_LINK_REMOVED(itov(tdp), itov(sip), (tdp)->i_nlink==0);
	ilock(sip);
	efs_droplink(sip);
	iunlock(sip);
	VOP_LINK_REMOVED(itov(sip), itov(sdp), (sip)->i_nlink==0);
	irele(sip);
	return error;
}

/* ARGSUSED */
static int
efs_mkdir(
	bhv_desc_t *bdp,
	char *dirname,
	struct vattr *vap,
	struct vnode **vpp,
	struct cred *cr)
{
	struct inode *dp, *cdp;
	struct entry ent;
	int error;

	dp = bhvtoi(bdp);
	ilock(dp);
	/*
	 * Since dp is not locked between the lookup and this mkdir,
	 * it could have been removed.
	 */
	if (dp->i_nlink <= 0) {
		error = ENOENT;
		goto out2;
	}
	if (error = efs_iaccess(dp, IEXEC | IWRITE, cr)) {
		goto out2;
	}
	if (error = efs_bumplink(dp))
		goto out;
	if (error =
	    efs_dirlookup(dp, dirname, NULL, DLF_ENTER|DLF_EXCL, &ent, cr))
		goto out;
	error = efs_ialloc(dp, IFDIR | (vap->va_mode & ~IFMT), 2, 0, &cdp, cr);
	if (!error) {
		error = efs_dirinit(cdp, dp->i_number, cr);
		if (!error) {
			error = efs_direnter(dp, cdp, &ent, cr);
		}
		if (!error) {
			*vpp = itov(cdp);
			iunlock(cdp);
		} else {
			cdp->i_nlink = 0;
			cdp->i_flags |= ICHG;
			iput(cdp);
		}
	}
 out:
	if (error)
		efs_droplink(dp);
 out2:
	iunlock(dp);
	return error;
}

/* ARGSUSED */
static int
efs_rmdir(
	bhv_desc_t *bdp,
	char *nm,
	struct vnode *cdir,
	struct cred *cr)
{
	struct inode *dp, *cdp;
	struct entry ent;
	int error, dflag;
	int link_removed = 0;

	dp = bhvtoi(bdp);

	ilock(dp);
	error = efs_dirlookup(dp, nm, NULL, DLF_IGET|DLF_MUSTHAVE|DLF_REMOVE,
			      &ent, cr);
	if (error) {
		iunlock(dp);
		return error;
	}

	cdp = ent.e_ip;
	if (cdp == dp || itov(cdp) == cdir) {
		error = EINVAL;
	} else if ((cdp->i_mode & IFMT) != IFDIR) {
		error = ENOTDIR;
	} else if (itov(cdp)->v_vfsmountedhere) {
		error = EBUSY;
	} else if (cdp->i_nlink > 2) {
		error = EEXIST;		/* XXX ENOTEMPTY */
	} else if (error = efs_dirisempty(cdp, &dflag, cr)) {
		if (error == ENOTEMPTY)
			error = EEXIST;	/* XXX */
	} else {
		error = efs_dirremove(dp, &ent, cr);
		if (!error) {
			link_removed = 1;
			if (dflag & DIR_HASDOTDOT) {
				efs_droplink(dp);
				(void) efs_iupdat(dp);
			}
			if (dflag & DIR_HASDOT)
				cdp->i_nlink -= 2;
			else
				cdp->i_nlink--;
			cdp->i_flags |= ICHG;
			error = efs_iupdat(cdp);
		}
	}
	if (cdp != dp)
		iunlock(cdp);
	iunlock(dp);

	if (link_removed) {
		/*
		 * Let interposed file systems know about removed links.
		 */
		VOP_LINK_REMOVED(itov(dp), itov(cdp), (dp)->i_nlink==0);
		VOP_LINK_REMOVED(itov(cdp), itov(dp), (cdp)->i_nlink==0);
	}
	irele(cdp);

	return error;
}

/*
 * efs_readdir is in efs_dir.c
 */

/* ARGSUSED */
static int
efs_symlink(
	bhv_desc_t *bdp,		/* ptr to parent dir vnode */
	char *linkname,			/* name of symbolic link */
	struct vattr *vap,		/* attributes */
	char *target,			/* target path */
	struct cred *cr)		/* user credentials */
{
	struct inode *dp, *ip;
	struct entry ent;
	int error = 0, pathlen;
	struct uio uio;
	struct iovec iov;
	struct pathname cpn, ccpn;
	int newfile = 1;

	/*
	 * Check component lengths of the target path name.
	 */
	pathlen = strlen(target);
	if (pathlen >= MAXPATHLEN)	/* total string too long */
		return ENAMETOOLONG;
	if (pathlen >= MAXNAMELEN) {	/* is any component too long? */
		pn_alloc(&cpn);
		pn_alloc(&ccpn);
		bcopy(target, cpn.pn_path, pathlen);
		cpn.pn_pathlen = pathlen;
		while (cpn.pn_pathlen > 0 && !error) {
			if (error = pn_getcomponent(&cpn, ccpn.pn_path, 0)) {
				pn_free(&cpn);
				pn_free(&ccpn);
				if (error == ENAMETOOLONG)
					return error;
			} else if (cpn.pn_pathlen) {	/* advance past slash */
				cpn.pn_path++;
				cpn.pn_pathlen--;
			}
		}
		pn_free(&cpn);
		pn_free(&ccpn);
	}

	dp = bhvtoi(bdp);
	ilock(dp);
	error = efs_dirlookup(dp, linkname, NULL, DLF_ENTER|DLF_EXCL, &ent, cr);
	if (!error) {
		error = efs_ialloc(dp, IFLNK | (vap->va_mode&~IFMT), 1, 0,
				   &ip, cr);
	}

	if (!error) {
		uio.uio_iov = &iov;
		uio.uio_iovcnt = 1;
		uio.uio_offset = 0;
		uio.uio_segflg = UIO_SYSSPACE;
		uio.uio_resid = iov.iov_len = pathlen;
		uio.uio_pmp = NULL;
		uio.uio_pio = 0;
		uio.uio_readiolog = 0;
		uio.uio_writeiolog = 0;
		uio.uio_pbuf = 0;
		iov.iov_base = target;
		error = efs_writei(ip, &uio, 0, cr, NULL);
		if (!error & newfile) {
			error = efs_direnter(dp, ip, &ent, cr);
		}
		if (error) {
			ip->i_nlink = 0;
			ip->i_flags |= ICHG;
		}
		iput(ip);
	}
	iunlock(dp);
	return error;
}

static int
efs_fid(
	bhv_desc_t *bdp,
	struct fid **fidpp)
{
	struct efid *efid;

	efid = kmem_alloc(sizeof *efid, KM_SLEEP);
	efid->efid_len = sizeof *efid - sizeof efid->efid_len;
	efid->efid_pad = 0;
	efid->efid_ino = bhvtoi(bdp)->i_number;
	efid->efid_gen = bhvtoi(bdp)->i_gen;
	*fidpp = (struct fid *)efid;
	return 0;
}

static int
efs_fid2(
	bhv_desc_t *bdp,
	struct fid *fidp)
{
	struct efid *efid = (struct efid *)fidp;

	ASSERT(sizeof(fid_t) >= sizeof(struct efid));
	efid->efid_len = sizeof *efid - sizeof efid->efid_len;
	efid->efid_pad = 0;
	efid->efid_ino = bhvtoi(bdp)->i_number;
	efid->efid_gen = bhvtoi(bdp)->i_gen;
	return 0;
}

/* ARGSUSED */
static void
efs_rwlock(bhv_desc_t *bdp, vrwlock_t write_lock)
{
	struct inode *ip;

	ip = bhvtoi(bdp);
	ilock(ip);
	ip->i_flags |= IRWLOCK;
}

/* ARGSUSED */
static void
efs_rwunlock(bhv_desc_t *bdp, vrwlock_t write_lock)
{
	struct inode *ip;

	ip = bhvtoi(bdp);
	ip->i_flags &= ~IRWLOCK;
	iunlock(ip);
}

/* ARGSUSED */
static int
efs_seek(bdp, ooff, noffp)
	bhv_desc_t *bdp;
	off_t ooff;
	off_t *noffp;
{
	return *noffp < 0 ? EINVAL : 0;
}

static int
efs_frlock(
	bhv_desc_t *bdp,
	int cmd,
	struct flock *lfp,
	int flag,
	off_t offset,
	vrwlock_t vrwlock,
	cred_t *cr)
{
	vnode_t *vp = BHV_TO_VNODE(bdp);
	int dolock, error = 0;

	dolock = (vrwlock == VRWLOCK_NONE);
	if (dolock) {
		efs_rwlock(bdp, VRWLOCK_WRITE);
		vrwlock = VRWLOCK_WRITE;
	}
	if (cmd != F_CLNLK)
		error = convoff(vp, lfp, lfp->l_whence, offset, SEEKLIMIT32,cr);
	if (!error)
		error = fs_frlock(bdp, cmd, lfp, flag, offset, vrwlock, cr);
	if (dolock)
		efs_rwunlock(bdp, VRWLOCK_WRITE);
	return error;
}

/*
 * efs_bmap is defined in efs_bmap.c, oddly enough.
 */

static void
efs_strategy(bhv_desc_t *bdp, struct buf *bp)
{
	struct inode	*ip;
	scoff_t		isize;

	ip = bhvtoi(bdp);
	if (bp->b_flags & B_READ) {
		ASSERT(mutex_mine(&ip->i_lock));
		ASSERT(ip->i_lockid == get_thread_id());
		isize = ip->i_size;
		if (isize <= BBTOB(bp->b_offset)) {
			IGETINFO.ig_readcancel++;
			iodone(bp);
			return;
		}
	}

	VOP_STRATEGY(ip->i_mount->m_devvp, bp);
}

/* ARGSUSED */
static int
efs_map(
	bhv_desc_t *bdp,
	off_t off,
	size_t len,
	mprot_t prot,
	u_int flags,
	struct cred *cr,
	vnode_t **nvp)
{
	if ((off + (off_t)len) > (off_t)EFS_MAX_FILE_OFFSET)
		return EINVAL;
	return 0;
}

/* ARGSUSED */
static int
efs_reclaim(
	bhv_desc_t *bdp,
	int flag)
{
	vnode_t *vp = BHV_TO_VNODE(bdp);
	struct inode *ip;

	ASSERT(!VN_MAPPED(vp));
	ip = bhvtoi(bdp);

	if (ip->i_flags & IINCORE && ip->i_numextents > 0) {
		struct extent *ex = &ip->i_extents[ip->i_numextents - 1];
		VOP_FLUSHINVAL_PAGES(vp, 0, BBTOB(ex->ex_offset+ex->ex_length) - 1,
			FI_NONE);
	}
	dnlc_purge_vp(vp);
	ASSERT((ip->i_flags & (IMOD|IACC|IUPD|ICHG)) == 0);
	ireclaim(ip);
	return 0;
}


/* ARGSUSED */
int
efs_setfl(
	bhv_desc_t *bdp,
	int oflags,
	int nflags,
	cred_t *cr)
{
	return 0;
}

#ifdef DATAPIPE
/* ARGSUSED */
int
efs_fspe_dioinfo(
	struct vnode   *vp,
	struct dioattr *da)
{
	/* This is a copy from fcntl - F_DIOINFO cmd */
#ifdef R10000_SPECULATION_WAR
	da->d_mem = _PAGESZ;
#else
	da->d_mem = FDIRIOALIGN;
#endif
	da->d_miniosz = BBSIZE;
	da->d_maxiosz = ctob(v.v_maxdmasz - 1);
	return 0;
}
#endif

/* ARGSUSED */
int
efs_fcntl(
	bhv_desc_t *bdp,
	int cmd,
	void *arg,
	int flags,
	off_t offset,
	cred_t *cr,
	rval_t *rvp)
{
	int 	error = 0;
	struct flock		bf;
	struct irix5_flock	i5_bf;
	vnode_t *vp = BHV_TO_VNODE(bdp);
	char abi = get_current_abi();

	switch (cmd) {
#ifdef DATAPIPE
	case F_GETOPS:
		fspe_get_ops(arg);
		break;
#endif
	case F_DIOINFO: {
		struct dioattr da;

		/* only works on files opened for direct I/O */
		if (!(flags & FDIRECT)) {
			error = EINVAL;
			break;
		}

#ifdef MH_R10000_SPECULATION_WAR
		if (IS_R10000())
			da.d_mem = _PAGESZ;
		else
			da.d_mem = FDIRIOALIGN;
#elif R10000_SPECULATION_WAR	/* makes tlb invalidate during dma more
	effective, by decreasing the likelihood of a valid reference in the
	same page as dma user address space; leaving the tlb invalid avoids
	the speculative reference. We return the more stringent
	"requirements" on the fcntl(), but do *NOT* enforced them
	in the read/write code, to be sure we don't break apps... */
		da.d_mem = _PAGESZ;
#else
		da.d_mem = FDIRIOALIGN;
#endif
		da.d_miniosz = BBSIZE;
		da.d_maxiosz = ctob(v.v_maxdmasz - 1);

		if (copyout(&da, arg, sizeof da))
			error = EFAULT;

		break;
	}
	case F_ALLOCSP:
	case F_FREESP:
	case F_ALLOCSP64:
	case F_FREESP64:
		if ((flags & FWRITE) == 0) {
			error = EBADF;
		} else if (vp->v_type != VREG) {
			error = EINVAL;
		} else if (vp->v_flag & VISSWAP) {
			error = EACCES;
#if _MIPS_SIM == _ABI64
		} else if (ABI_IS_IRIX5_64(abi)) {
			if (copyin((caddr_t)arg, &bf, sizeof bf)) {
				error = EFAULT;
				break;
			}
#endif
		} else if (cmd == F_ALLOCSP64 || cmd == F_FREESP64 ||
			ABI_IS_IRIX5_N32(abi)) {
			/*
			 * The n32 flock structure is the same size as the
			 * o32 flock64 structure. So the copyin_xlate
			 * with irix5_n32_to_flock works here.
			 */
			if (COPYIN_XLATE((caddr_t)arg, &bf, sizeof bf,
					irix5_n32_to_flock,
					abi, 1)) {
				error = EFAULT;
				break;
			}
		} else {
			if (copyin((caddr_t)arg, &i5_bf, sizeof i5_bf)) {
				error = EFAULT;
				break;
			}
			/*
			 * Now expand to 64 bit sizes.
			 */
			bf.l_type = i5_bf.l_type;
			bf.l_whence = i5_bf.l_whence;
			bf.l_start = i5_bf.l_start;
			bf.l_len = i5_bf.l_len;
		}
		if ((error = convoff(vp, &bf, 0, offset, SEEKLIMIT32, cr)) == 0) {
			struct vattr vattr;

			vattr.va_size = bf.l_start;
			vattr.va_mask = AT_SIZE;
			error = efs_setattr(bdp, &vattr, 0, cr);
		}
		break;
	default:
		error = EINVAL;
		break;
	}

	return error;
}

/*
 * EFS doesn't fully support attributes.  We allow getting/setting one
 * particular attribute, though: _DEVNAME_ATTR is an attribute
 * for special device files stored on EFS file systems.  The value of this
 * attribute is a hwgraph device path, and it's stored on disk the same
 * way a symbolic link is stored.  The on-disk EFS type is changed to CHRLNK
 * or BLKLNK respectively.  This is seen by upper layers as VCHR or VBLK.
 */
/*ARGSUSED*/
int								/* error */
efs_attr_get(bhv_desc_t *bdp, char *name, char *value, int *valuelenp,
	     int flags, struct cred *cred)
{
	struct inode *ip;
	u_short type;
	struct uio uio;
	struct iovec iov;
	int error;

	/* Is it a MAC label */
	if (strcmp(name, SGI_MAC_FILE) == 0)
		return _MAC_EFS_ATTR_GET(bdp, name, value, valuelenp, flags, cred);
	/* Make sure we're getting the only permissible attribute */
	if (strcmp(name, _DEVNAME_ATTR))
		return(ENOSYS);

	ip = bhvtoi(bdp);

	/*
	 * Make sure we're only trying to get this attribute on
	 * an appropriate hwgraph special device file.
	 */
	type = ip->i_mode & IFMT;
	if ((type != IFCHRLNK) && (type != IFBLKLNK))
		return(ENOSYS);

	uio.uio_iov = &iov;
	uio.uio_iovcnt = 1;
	uio.uio_offset = 0;
	uio.uio_segflg = UIO_SYSSPACE;
	uio.uio_resid = iov.iov_len = *valuelenp;
	uio.uio_pmp = NULL;
	uio.uio_pio = 0;
	uio.uio_readiolog = 0;
	uio.uio_writeiolog = 0;
	uio.uio_pbuf = 0;
	iov.iov_base = value;

	ilock(ip);
	error = efs_readi(ip, &uio, 0, type, cred, NULL);
	iunlock(ip);

	return(error);
}


/*ARGSUSED */
int
efs_attr_set(bhv_desc_t *bdp, char *name, char *value, int valuelen, int flags,
		struct cred *cred)
{
	struct inode *ip;
	u_short type;
	int error;

	/* Is it a MAC Label */
	if (strcmp(name, SGI_MAC_FILE) == 0)
		return (_MAC_EFS_ATTR_SET(bdp, name, value, valuelen,
				flags, cred));

	/* Make sure we're setting the only permissible attribute */
	if (strcmp(name, _DEVNAME_ATTR))
		return(ENOSYS);

	/*
	 * Make sure we've got permission to make make special files,
	 * since by changing this attribute we're essentially creating
	 * a new special file.
	 */
	if (!cap_able_cred(cred, CAP_MKNOD))
		return(EPERM);

	ip = bhvtoi(bdp);
	ilock(ip);

	/*
	 * Only allow attribute to be written on hwgraph special device files.
	 */
	type = ip->i_mode & IFMT;
	if ((type == IFCHR) &&
	     IS_HWGRAPH_STRING_DEV(ip->i_rdev)) {
		error = efs_ichange_type(ip, IFCHRLNK);
	} else if ((type == IFBLK) &&
		    IS_HWGRAPH_STRING_DEV(ip->i_rdev)) {
		error = efs_ichange_type(ip, IFBLKLNK);
	} else if ((type == IFCHRLNK) || (type == IFBLKLNK))
		error = 0;
	else
		error = ENOSYS;

	if (!error) {
		struct uio uio;
		struct iovec iov;

		uio.uio_iov = &iov;
		uio.uio_iovcnt = 1;
		uio.uio_offset = 0;
		uio.uio_segflg = UIO_SYSSPACE;
		uio.uio_resid = iov.iov_len = valuelen;
		uio.uio_pmp = NULL;
		uio.uio_pio = 0;
		uio.uio_readiolog = 0;
		uio.uio_writeiolog = 0;
		uio.uio_pbuf = 0;
		iov.iov_base = value;

		error = efs_writei(ip, &uio, 0, cred, NULL);
		if (error) {
			/* On failure, restore old file type */
			efs_ichange_type(ip, type);
		}
	}
	iunlock(ip);

	return(error);
}

vnodeops_t efs_vnodeops = {
	BHV_IDENTITY_INIT_POSITION(VNODE_POSITION_BASE),
	(vop_open_t)fs_noerr,
	efs_close,
	efs_read,
	efs_write,
	efs_ioctl,
	efs_setfl,
	efs_getattr,
	efs_setattr,
	efs_access,
	efs_lookup,
	efs_create,
	efs_remove,
	efs_link,
	efs_rename,
	efs_mkdir,
	efs_rmdir,
	efs_readdir,
	efs_symlink,
	efs_readlink,
	efs_fsync,
	efs_inactive,
	efs_fid,
	efs_fid2,
	efs_rwlock,
	efs_rwunlock,
	efs_seek,
	fs_cmp,
	efs_frlock,
	(vop_realvp_t)fs_nosys,
	efs_bmap,
	efs_strategy,
	efs_map,
	(vop_addmap_t)fs_noerr,
	(vop_delmap_t)fs_noerr,
	fs_poll,
	(vop_dump_t)fs_nosys,
	fs_pathconf,
	(vop_allocstore_t)fs_nosys,
	efs_fcntl,
	efs_reclaim,
	efs_attr_get,
	efs_attr_set,
	(vop_attr_remove_t)fs_nosys,
	(vop_attr_list_t)fs_nosys,
	fs_cover,
	(vop_link_removed_t)fs_noval,
	fs_vnode_change,
	fs_tosspages,
	fs_flushinval_pages,
	fs_flush_pages,
	fs_invalfree_pages,
	fs_pages_sethole,
	(vop_commit_t)fs_nosys,
	(vop_readbuf_t)fs_nosys,
	fs_strgetmsg,
	fs_strputmsg,
};