1
0
Files
irix-657m-src/irix/kern/fs/efs/efs_vnodeops.c
T
2022-09-29 17:59:04 +03:00

2538 lines
56 KiB
C

/*
* EFS vnode operations.
*
* Copyright 1992, Silicon Graphics, Inc.
* All Rights Reserved.
*
* This is UNPUBLISHED PROPRIETARY SOURCE CODE of Silicon Graphics, Inc.;
* the contents of this file may not be disclosed to third parties, copied or
* duplicated in any form, in whole or in part, without the prior written
* permission of Silicon Graphics, Inc.
*
* RESTRICTED RIGHTS LEGEND:
* Use, duplication or disclosure by the Government is subject to restrictions
* as set forth in subdivision (c)(1)(ii) of the Rights in Technical Data
* and Computer Software clause at DFARS 252.227-7013, and/or in similar or
* successor clauses in the FAR, DOD or NASA FAR Supplement. Unpublished -
* rights reserved under the Copyright Laws of the United States.
*/
#ident "$Revision: 1.213 $"
#include <sys/types.h>
#include <sys/buf.h>
#include <sys/conf.h>
#include <sys/cred.h>
#include <sys/debug.h>
#include <sys/dirent.h>
#include <sys/dnlc.h>
#include <sys/errno.h>
#include <sys/fcntl.h>
#include <ksys/vfile.h>
#include <sys/fs_subr.h>
#include <sys/iograph.h>
#include <sys/kabi.h>
#include <sys/kmem.h>
#include <sys/mman.h>
#include <sys/mode.h>
#include <sys/param.h>
#include <sys/pathname.h>
#include <sys/pfdat.h> /* page flushing prototypes */
#include <sys/poll.h>
#include <sys/quota.h>
#include <sys/resource.h>
#include <sys/stat.h>
#include <sys/sysinfo.h>
#include <sys/sysmacros.h>
#include <sys/systm.h>
#include <sys/time.h>
#include <sys/uio.h>
#include <sys/var.h>
#include <sys/vfs.h>
#include <sys/vnode.h>
#include <sys/sat.h>
#include <sys/capability.h>
#include <sys/flock.h>
#include <sys/kfcntl.h>
#include <fs/specfs/spec_lsnode.h>
#include <string.h>
#ifdef _SHAREII
#include <sys/shareIIstubs.h>
#endif /* _SHAREII */
#include "efs_inode.h"
#include "efs_dir.h"
#include "efs_sb.h"
#ifdef DATAPIPE
/* data pipe functions */
extern int fspe_get_ops(void *);
int efs_fspe_dioinfo(struct vnode *, struct dioattr *);
#endif
static int efs_readi(struct inode *, struct uio *, int, u_short, struct cred *,
struct flid *);
static int efs_writei(struct inode *, struct uio *, int, struct cred *,
struct flid *);
static void efs_rwunlock(bhv_desc_t *, vrwlock_t);
static void efs_rwlock(bhv_desc_t *, vrwlock_t);
static int efs_fid(bhv_desc_t *, struct fid **);
static int efs_fid2(bhv_desc_t *, struct fid *);
static int efs_setattr(bhv_desc_t *, struct vattr *,int ,struct cred *);
#if _MIPS_SIM == _ABI64
int irix5_to_flock(enum xlate_mode, void *, int, xlate_info_t *);
int flock_to_irix5(void *, int, xlate_info_t *);
int irix5_n32_to_flock(enum xlate_mode, void *, int, xlate_info_t *);
int flock_to_irix5_n32(void *, int, xlate_info_t *);
#endif
/*
* EFS direct I/O can be other than page aligned as long as we report
* the maximum transfer size as the maximum number of pages minus 1.
* This takes care of the case where the I/O is not page aligned, but
* it is of maxdmasz size. We go with BBSIZE for the alignment, because
* that is what it has always been.
*/
#define FDIRIOALIGN BBSIZE
#define EFS_INVALIDOFF(off) (((off) < 0) || ((off) > SEEKLIMIT32))
/*
* No open action is required for regular files. Devices are handled
* through the specfs file system, pipes through fifofs. Device and
* fifo vnodes are "wrapped" by specfs and fifofs vnodes, respectively,
* when a new vnode is first looked up or created.
*/
/* ARGSUSED */
static int
efs_close(
bhv_desc_t *bdp,
int flag,
lastclose_t lastclose,
struct cred *cr)
{
return 0;
}
/* ARGSUSED */
static int
efs_read(
bhv_desc_t *bdp,
struct uio *uiop,
int ioflag,
struct cred *cr,
struct flid *fl)
{
struct inode *ip;
u_short type;
int error;
if (!(ioflag & IO_ISLOCKED))
efs_rwlock(bdp, VRWLOCK_READ);
ip = bhvtoi(bdp);
ASSERT(ip->i_flags & IRWLOCK);
type = ip->i_mode & IFMT;
error = efs_readi(ip, uiop, ioflag, type, cr, fl);
if (!(ioflag & IO_ISLOCKED))
efs_rwunlock(bdp, VRWLOCK_READ);
return error;
}
/* ARGSUSED */
static int
efs_write(
bhv_desc_t *bdp,
struct uio *uiop,
int ioflag,
struct cred *cr,
struct flid *fl)
{
vnode_t *vp = BHV_TO_VNODE(bdp);
struct inode *ip;
int error;
if (!(ioflag & IO_ISLOCKED))
efs_rwlock(bdp, VRWLOCK_WRITE);
ip = bhvtoi(bdp);
ASSERT(ip->i_flags & IRWLOCK);
if (vp->v_type == VREG) {
if (ioflag & IO_APPEND) {
/*
* In append mode, start at end of file.
*/
uiop->uio_offset = ip->i_size;
}
}
error = efs_writei(ip, uiop, ioflag, cr, fl);
if (!(ioflag & IO_ISLOCKED))
efs_rwunlock(bdp, VRWLOCK_WRITE);
return error;
}
#define MAXDIOSPLIT 20
struct dio_s {
bhv_desc_t *bdp;
struct cred *cr;
int ioflag;
};
static int
diostrat(buf_t *bp)
{
struct dio_s *dp;
bhv_desc_t *bdp;
struct vnode *vp;
scoff_t offset, pushstart;
int count, i, j, n, totxfer;
buf_t *bps[MAXDIOSPLIT];
int dlen = 1, dbn;
struct bmapval bmv;
buf_t *nbp;
caddr_t base;
int error, resid, totresid;
int trail = 0;
struct inode *ip;
scoff_t o_size;
ASSERT(!(bp->b_flags & B_DONE));
dp = (struct dio_s*) bp->b_private;
bdp = dp->bdp;
vp = BHV_TO_VNODE(bdp);
ip = bhvtoi(bdp);
o_size = ip->i_size;
offset = BBTOB(bp->b_blkno);
totresid = count = bp->b_bcount;
base = bp->b_un.b_addr;
error = resid = totxfer = 0;
while ( !error && count && !trail && dlen ) {
for ( i = 0 ; (i < MAXDIOSPLIT) && count && !trail ; i++ ) {
/* build an io */
for ( dlen = 0 ; dlen < count ; ) {
n = 1;
error = efs_bmap(bdp, offset+dlen, count-dlen,
bp->b_flags&B_READ, dp->cr, &bmv, &n);
if (error || (bmv.pbsize == 0))
break;
/* prime the pump */
if (dlen == 0) {
dbn = bmv.bn + BTOBB(bmv.pboff);
pushstart = BBTOB(bmv.offset) & ~NBPP;
} else if (dbn + BTOBB(dlen) != bmv.bn + BTOBB(bmv.pboff))
break;
dlen += bmv.pbsize;
/* see if the file grew from writes */
if ((bp->b_flags & B_READ) == 0 &&
offset + dlen > ip->i_size ) {
ASSERT((vp->v_flag & VISSWAP) == 0);
ip->i_size = offset + dlen;
ip->i_flags |= ITRUNC;
}
}
/* end of file or an error */
if ( (dlen == 0) || error )
break;
/*
* Flush out delwri data.
*/
if (!(dp->ioflag & IO_IGNCACHE)) {
off_t end = BBTOB(bmv.offset + bmv.length);
VOP_FLUSHINVAL_PAGES(vp, (off_t)pushstart, ctob(btoc(end)) - 1,
FI_NONE);
}
/* check for partial reads at end of file */
if ( dlen & BBMASK ) {
ASSERT(bp->b_flags & B_READ);
trail = dlen;
dlen &= ~BBMASK;
dlen += BBSIZE;
}
/* trim back xfer */
if ( dlen > count )
dlen = count;
/* get the information from disk */
bps[i] = nbp = getphysbuf(bp->b_edev);
nbp->b_flags = bp->b_flags;
nbp->b_error = 0;
nbp->b_blkno = dbn;
nbp->b_bcount = dlen;
nbp->b_un.b_addr = base;
VOP_STRATEGY(ip->i_mount->m_devvp,nbp);
if (error = geterror(nbp)) {
biowait(nbp);
nbp->b_flags = 0;
putphysbuf(nbp);
break;
}
/* correct for partial reads */
if( trail )
dlen = trail;
base += dlen;
offset += dlen;
count -= dlen;
}
/* recover the buffers */
for ( j = 0 ; j < i ; j++ ) {
nbp = bps[j];
biowait(nbp);
/* check for an error */
if ( !error )
error = geterror(nbp);
if ( !error && !resid ) {
resid = nbp->b_resid;
/* prevent adding up partial xfers */
if( trail && (j == (i-1)) ) {
/* correct for partial reads */
if( resid <= nbp->b_bcount - trail )
totxfer += trail;
}
else
totxfer += nbp->b_bcount - resid;
}
nbp->b_flags = 0;
putphysbuf(nbp);
}
}
/* if any of the io's fail, the whole thing fails */
if ( error ) {
totxfer = 0;
if (((bp->b_flags & B_READ) == 0) && !(vp->v_flag & VISSWAP))
efs_itrunc(ip, o_size, 0);
}
bp->b_resid = totresid - totxfer;
/* see if the file grew from writes */
if ( (bp->b_flags & B_READ) == 0 ) {
timespec_t tv;
if ((ip->i_mode & (ISUID|ISGID)) &&
!cap_able_cred(dp->cr, CAP_FSETID)) {
ip->i_mode &= ~ISUID;
if (ip->i_mode & (IEXEC >> 3))
ip->i_mode &= ~ISGID;
}
nanotime_syscall(&tv);
ip->i_flags |= IMOD;
ip->i_mtime = ip->i_ctime = tv.tv_sec;
ip->i_umtime = tv.tv_nsec;
}
bioerror(bp,error);
biodone(bp);
/* make the compiler happy */
return 0;
}
static int
efs_diordwr(bhv_desc_t *bdp, struct uio *uiop, struct cred *cr, int ioflag,
uint64_t dirflg)
{
struct inode *ip = bhvtoi(bdp);
struct dio_s dp;
buf_t *bp;
int error;
/* special case caused by trailing reads */
if (dirflg & B_READ ){
if (ip->i_size == uiop->uio_offset)
return 0;
}
/* do alignment checks */
if (((__psint_t)uiop->uio_iov->iov_base & (FDIRIOALIGN-1))
|| (uiop->uio_offset & BBMASK) || (uiop->uio_resid & BBMASK))
return EINVAL;
/* do maxio check */
if (uiop->uio_resid > ctob(v.v_maxdmasz - 1))
return EINVAL;
bp = getphysbuf(ip->i_dev);
/* save the info for later... */
dp.bdp = bdp;
dp.cr = cr;
dp.ioflag = ioflag;
bp->b_private = &dp;
error = biophysio(diostrat, bp, bp->b_edev, dirflg,
(daddr_t)BTOBB(uiop->uio_offset), uiop);
bp->b_flags = 0;
putphysbuf(bp);
return error;
}
#define COPYOUT(bp,off,len,uio) biomove(bp,off,len,UIO_READ,uio)
#define COPYIN(bp,off,len,uio) biomove(bp,off,len,UIO_WRITE,uio)
#define NREADIMAPS 4
static int
efs_readi(struct inode *ip,
struct uio *uio,
int ioflag,
u_short type,
struct cred *cr,
struct flid *fl)
{
register off_t offset;
int error, n, i;
struct vnode *vp;
struct bmapval bmv[NREADIMAPS];
int nmaps;
struct buf *bp;
timespec_t tv;
ASSERT(type == IFREG || type == IFDIR || ISLINK(type) ||
type == IFSOCK);
vp = itov(ip);
offset = uio->uio_offset;
/* check for locks if some exist and mandatory locking is enabled */
if ((vp->v_flag & (VENF_LOCKING|VFRLOCKS)) ==
(VENF_LOCKING|VFRLOCKS)) {
error = fs_checklock(vp, FREAD, offset, uio->uio_resid,
uio->uio_fmode, cr, fl, VRWLOCK_READ);
if (error)
return error;
}
if (EFS_INVALIDOFF(offset))
return EINVAL;
if (uio->uio_resid <= 0)
return 0;
/*
* Do the following only for writeable file systems.
* This closes a POSIX conformance bug which says that a read on a
* file in a read-only file system should not update its access
* time. Also, logically, there's no point in updating the atime
* as it is never going to be written back to disk.
*/
if (!(itovfs(ip)->vfs_flag & VFS_RDONLY)) {
nanotime_syscall(&tv);
ip->i_flags |= IMOD;
ip->i_atime = tv.tv_sec;
}
switch (type) {
case IFLNK:
case IFCHRLNK:
case IFBLKLNK:
/* in-line sym link? */
if (ip->i_numextents == 0) {
ASSERT(ip->i_size <= EFS_MAX_INLINE);
/* paranoia when asserts are gone... */
n = MIN(ip->i_size, EFS_MAX_INLINE);
if ((n -= uio->uio_offset) <= 0) {
error = 0;
break;
}
n = MIN(uio->uio_resid, n);
error = uiomove((char *)ip->i_extents, n, UIO_READ,
uio);
break;
}
/* fall through */
case IFDIR:
do {
nmaps = 2;
error = efs_bmap(itobhv(ip), uio->uio_offset, uio->uio_resid,
B_READ, cr, bmv, &nmaps);
if (error || bmv[0].pbsize == 0)
break;
ASSERT(bmv[0].bn >= 0);
if (nmaps > 1)
bp = breada(bmv[0].pbdev,
bmv[0].bn, bmv[0].length,
bmv[1].bn, bmv[1].length);
else
bp = bread(bmv[0].pbdev,
bmv[0].bn, bmv[0].length);
if (bp->b_flags & B_ERROR)
error = bp->b_error;
else if (bp->b_resid)
n = 0;
else {
n = bmv[0].pbsize;
error = COPYOUT(bp, bmv[0].pboff, n, uio);
}
brelse(bp);
} while (!error && uio->uio_resid != 0 && n != 0);
break;
case IFREG:
if (ioflag & IO_RSYNC) {
/* First we sync the data */
if ((ioflag & IO_SYNC) || (ioflag & IO_DSYNC)) {
VOP_FLUSH_PAGES(vp, (off_t)0, ip->i_size - 1, 0, FI_NONE, error );
error = 0;
}
if ((ip->i_remember < ip->i_size) || (ioflag & IO_SYNC)) {
ip->i_flags |= ISYN;
ip->i_remember = ip->i_size;
efs_iupdat(ip);
}
}
if (ioflag & IO_DIRECT) {
error = efs_diordwr(itobhv(ip), uio, cr, ioflag,
B_READ);
break;
}
do {
nmaps = NREADIMAPS;
error = efs_bmap(itobhv(ip), uio->uio_offset, uio->uio_resid,
B_READ, cr, bmv, &nmaps);
if (error || (n = bmv[0].pbsize) == 0)
break;
/*
* Pass on the policy modules from our caller
* to the chunk cache.
*/
for (i = 0; i < nmaps; i++) {
bmv[i].pmp = uio->uio_pmp;
}
bp = chunkread(vp, bmv, nmaps, cr);
if (bp->b_flags & B_ERROR)
error = bp->b_error;
else if (bp->b_resid)
n = 0;
else
error = COPYOUT(bp, bmv[0].pboff, n, uio);
brelse(bp);
} while (!error && uio->uio_resid != 0 && n != 0);
break;
case IFSOCK:
error = ENODEV;
}
return error;
}
extern int efs_inline;
static int
efs_writei(struct inode *ip,
struct uio *uio,
int ioflag,
struct cred *cr,
struct flid *fl)
{
int type, error, n, count, resid;
struct vnode * vp;
register off_t offset;
struct bmapval bmv;
int nmaps;
struct buf * bp;
int dotime = 0;
off_t limit;
type = ip->i_mode & IFMT;
ASSERT(type == IFREG || type == IFDIR || ISLINK(type) ||
type == IFSOCK);
vp = itov(ip);
offset = uio->uio_offset;
count = uio->uio_resid;
/* check for locks if some exist and mandatory locking is enabled */
if ((vp->v_flag & (VENF_LOCKING|VFRLOCKS)) ==
(VENF_LOCKING|VFRLOCKS)) {
error = fs_checklock(vp, FWRITE, offset, count, uio->uio_fmode,
cr, fl, VRWLOCK_WRITE);
if (error)
return error;
}
if ( EFS_INVALIDOFF(offset) || EFS_INVALIDOFF(offset + count) )
return EINVAL;
if (count <= 0)
return 0;
switch (type) {
case IFLNK:
case IFCHRLNK:
case IFBLKLNK:
/*
* Create an in-line sym link iff there's room.
*/
ASSERT(offset == 0);
ASSERT(ip->i_numextents == 0);
if (efs_inline && count <= EFS_MAX_INLINE) {
irealloc(ip, count);
error = uiomove((char *)ip->i_extents, count,
UIO_WRITE, uio);
if (!error) {
ip->i_size = count;
dotime = 1;
}
break;
}
case IFDIR:
do {
nmaps = 1;
if (error = efs_bmap(itobhv(ip), uio->uio_offset,
uio->uio_resid, B_WRITE, cr,
&bmv, &nmaps))
break;
bp = ((n = bmv.pbsize) == bmv.bsize) ?
getblk(bmv.pbdev, bmv.bn, bmv.length) :
bread(bmv.pbdev, bmv.bn, bmv.length);
if (error = COPYIN(bp, bmv.pboff, n, uio)) {
brelse(bp);
break;
}
if (uio->uio_offset > ip->i_size)
ip->i_size = uio->uio_offset;
dotime = 1;
if ((ioflag & IO_SYNC) || (ioflag & IO_DSYNC))
bwrite(bp);
else
bdwrite(bp);
} while (uio->uio_resid != 0 && n != 0);
break;
case IFREG:
limit = MIN(uio->uio_limit, (off_t)EFS_MAX_FILE_OFFSET);
n = (int)(limit - uio->uio_offset);
if (n <= 0)
return EFBIG;
if (n < uio->uio_resid) { /* only do partial write */
resid = uio->uio_resid - n;
uio->uio_resid = n;
} else {
resid = 0;
}
if (ioflag & IO_DIRECT) {
error = efs_diordwr(itobhv(ip), uio, cr, ioflag,
B_WRITE);
/* add back remainder of write */
uio->uio_resid += resid;
break;
}
do {
nmaps = 1;
if (error = efs_bmap(itobhv(ip), uio->uio_offset,
uio->uio_resid, B_WRITE, cr,
&bmv, &nmaps))
break;
/*
* We must bread the buffer if the write doesn't
* completely overwrite the buffer and the write
* either begins after the start of the buffer or
* ends before the current end of file.
*/
bmv.pmp = uio->uio_pmp;
if ((n = bmv.pbsize) != bmv.bsize
&& (bmv.pboff != 0 || uio->uio_offset != ip->i_size))
bp = chunkread(vp, &bmv, 1, cr);
else
bp = getchunk(vp, &bmv, cr);
if (bp->b_flags & B_ERROR) {
error = bp->b_error;
brelse(bp);
break;
}
if (error = COPYIN(bp, bmv.pboff, n, uio)) {
if (!(bp->b_flags & B_DONE))
bp->b_flags |= B_STALE|B_DONE|B_ERROR;
brelse(bp);
break;
}
/*
* Update file size if COPYIN extended uio_offset.
*/
if (uio->uio_offset > ip->i_size) {
ip->i_size = uio->uio_offset;
ip->i_flags |= ITRUNC;
}
/*
* Mark inode modified and clear suid and sgid if
* not superuser.
*/
dotime = 1;
if ((ip->i_mode & (ISUID|ISGID)) &&
!cap_able_cred(cr, CAP_FSETID)) {
ip->i_mode &= ~ISUID;
if (ip->i_mode & (IEXEC >> 3))
ip->i_mode &= ~ISGID;
}
if ((ioflag & IO_SYNC) || (ioflag & IO_DSYNC))
bwrite(bp);
else
bdwrite(bp);
} while (uio->uio_resid != 0 && n != 0);
uio->uio_resid += resid; /* add back remainder of write */
break;
case IFSOCK:
error = ENODEV;
break;
}
/*
* If we've already done a partial write, terminate
* the write but return no error.
*/
if (count != uio->uio_resid) {
error = 0;
}
/*
* Set timestamps. Don't put it off, we want the time to
* be reasonably accurate.
*/
if (dotime) {
timespec_t tv;
nanotime_syscall(&tv);
ip->i_flags |= IMOD;
ip->i_mtime = ip->i_ctime = tv.tv_sec;
ip->i_umtime = tv.tv_nsec;
}
/*
* Update the inode only if inode changed.
* We set i_remember to i_size to ensure that the data
* written is actually permanent in the inode.
*/
if ((ioflag & (IO_SYNC | IO_DSYNC)) &&
(ip->i_flags & ITRUNC) &&
!(vp->v_flag & VISSWAP) &&
!error) {
ip->i_flags |= ISYN;
ip->i_remember = ip->i_size;
error = efs_iupdat(ip);
}
return error;
}
/* ARGSUSED */
static int
efs_ioctl(
bhv_desc_t *bdp,
int cmd,
void *arg,
int flag,
struct cred *cr,
int *rvalp,
struct vopbd *vbds)
{
return ENOTTY;
}
/* ARGSUSED */
static int
efs_getattr(bdp, vap, flags, cr)
bhv_desc_t *bdp;
struct vattr *vap;
int flags;
struct cred *cr;
{
vnode_t *vp = BHV_TO_VNODE(bdp);
struct inode *ip;
u_short type;
ip = bhvtoi(bdp);
vap->va_size = ip->i_size;
if (vap->va_mask == AT_SIZE)
return 0;
vap->va_fsid = ip->i_dev;
vap->va_nodeid = ip->i_number;
vap->va_nlink = ip->i_nlink;
vap->va_gencount = ip->i_gen;
if (!(vap->va_mask & ~(AT_FSID|AT_NODEID|AT_NLINK|AT_GENCOUNT|AT_SIZE)))
return 0;
/*
* POSIX stat etc. require that any pending update flags
* be dealt with and cleared upon return from stat.
* Since we defer updating the inode on setting these flags
* we must pay now. Rather than really go through the
* entire efs_iupdat, we simply get the times up to date.
* This emulates the setattr code below.
*/
if (ip->i_flags & (IACC|IUPD|ICHG)) {
timespec_t tv;
nanotime_syscall(&tv);
ilock(ip);
if (ip->i_flags & IACC)
ip->i_atime = tv.tv_sec;
if (ip->i_flags & IUPD) {
ip->i_mtime = tv.tv_sec;
ip->i_umtime = tv.tv_nsec;
}
if (ip->i_flags & ICHG)
ip->i_ctime = tv.tv_sec;
ip->i_flags &= ~(IACC|IUPD|ICHG);
ip->i_updtimes = 0;
ip->i_flags |= IMOD;
iunlock(ip);
} else if (ip->i_updtimes) {
ilock(ip);
ip->i_updtimes = 0;
ip->i_flags |= IMOD;
iunlock(ip);
}
/*
* Copy from in-core inode.
*/
vap->va_type = vp->v_type;
vap->va_mode = ip->i_mode & MODEMASK;
vap->va_uid = ip->i_uid;
vap->va_gid = ip->i_gid;
vap->va_vcode = ip->i_vcode;
if (vp->v_type == VCHR || vp->v_type == VBLK)
vap->va_rdev = ip->i_rdev;
else
vap->va_rdev = 0; /* not a b/c spec. */
vap->va_atime.tv_sec = ip->i_atime;
vap->va_atime.tv_nsec = 0;
vap->va_mtime.tv_sec = ip->i_mtime;
vap->va_mtime.tv_nsec = ip->i_umtime;
vap->va_ctime.tv_sec = ip->i_ctime;
vap->va_ctime.tv_nsec = 0;
type = ip->i_mode & IFMT;
switch (type) {
case IFBLK:
case IFCHR:
vap->va_blksize = BLKDEV_IOSIZE;
break;
case IFCHRLNK:
case IFBLKLNK:
vap->va_rdev = HWGRAPH_STRING_DEV;
vap->va_blksize = BLKDEV_IOSIZE;
break;
default:
vap->va_blksize = 1 << itoefs(ip)->fs_lbshift;
}
vap->va_nblocks = BTOBB(ip->i_size);
vap->va_xflags = 0;
vap->va_extsize = 0;
vap->va_nextents = 0;
vap->va_anextents = 0;
vap->va_projid = 0;
return 0;
}
static int
efs_setattr(bdp, vap, flags, cr)
bhv_desc_t *bdp;
struct vattr *vap;
int flags;
struct cred *cr;
{
vnode_t *vp = BHV_TO_VNODE(bdp);
int mask;
struct inode *ip;
int error;
timespec_t tv;
int mandlock_before, mandlock_after;
int file_owner;
/*
* Cannot set certain attributes.
*/
mask = vap->va_mask;
if (mask & AT_NOSET)
return EINVAL;
ip = bhvtoi(bdp);
if (mask & AT_UPDTIMES) {
ASSERT((mask & ~AT_UPDTIMES) == 0);
nanotime_syscall(&tv);
if (mask & AT_UPDATIME)
ip->i_atime = tv.tv_sec;
if (mask & AT_UPDCTIME)
ip->i_ctime = tv.tv_sec;
if (mask & AT_UPDMTIME) {
ip->i_mtime = tv.tv_sec;
ip->i_umtime = tv.tv_nsec;
}
ip->i_updtimes = 1;
return 0;
}
ilock(ip);
error = 0;
/* determine whether mandatory locking mode changes */
mandlock_before = MANDLOCK(vp, ip->i_mode);
file_owner = (cr->cr_uid == ip->i_uid);
if (mask & (AT_MODE|AT_UID|AT_GID)) {
/*
* CAP_FOWNER overrides the following restrictions:
*
* The user ID of the calling process must be equal
* to the file owner ID, except in cases where the
* CAP_FSETID capability is applicable.
*/
if (!file_owner && !cap_able_cred(cr, CAP_FOWNER)) {
error = EPERM;
goto out;
}
}
/*
* Change file access modes. Must be owner or privileged.
*/
if (mask & AT_MODE) {
mode_t m = 0;
/*
* CAP_FSETID overrides the following restrictions:
*
* The effective user ID of the calling process shall match
* the file owner when setting the set-user-ID and
* set-group-ID bits on that file.
*
* The effective group ID or one of the supplementary group
* IDs of the calling process shall match the group owner of
* the file when setting the set-group-ID bit on that file
*/
if ((vap->va_mode & ISUID) && !file_owner)
m |= ISUID;
if ((vap->va_mode & ISGID) &&
!groupmember(ip->i_gid, cr))
m |= ISGID;
if ((vap->va_mode & ISVTX) && vp->v_type != VDIR)
m |= ISVTX;
if (m && !cap_able_cred(cr, CAP_FSETID))
vap->va_mode &= ~m;
ip->i_mode &= IFMT;
ip->i_mode |= vap->va_mode & ~IFMT;
ip->i_flags |= ICHG;
}
/*
* Change file ownership. Must be the owner or privileged.
* If the system was configured with the "restricted_chown"
* option, the owner is not permitted to give away the file,
* and can change the group id only to a group of which he
* or she is a member.
*/
if (mask & (AT_UID|AT_GID)) {
uid_t uid = (mask & AT_UID) ? vap->va_uid : ip->i_uid;
gid_t gid = (mask & AT_GID) ? vap->va_gid : ip->i_gid;
/* Prevent long uids from being silently truncated to 16bits */
if (uid > 0xffff || gid > 0xffff)
{
error = EOVERFLOW;
goto out;
}
/*
* CAP_CHOWN overrides the following restrictions:
*
* If _POSIX_CHOWN_RESTRICTED is defined, this capability
* shall override the restriction that a process cannot
* change the user ID of a file it owns and the restriction
* that the group ID supplied to the chown() function
* shall be equal to either the group ID or one of the
* supplementary group IDs of the calling process.
*/
if (restricted_chown &&
(ip->i_uid != uid || (ip->i_gid != gid &&
!groupmember(gid, cr))) &&
!cap_able_cred(cr, CAP_CHOWN)) {
error = EPERM;
goto out;
}
/*
* CAP_FSETID overrides the following restrictions:
*
* The set-user-ID and set-group-ID bits of a file will be
* cleared upon successful return from chown()
*/
if ((ip->i_mode & (ISUID|ISGID)) &&
!cap_able_cred(cr, CAP_FSETID)) {
ip->i_mode &= ~(ISUID|ISGID);
}
if (ip->i_uid == uid) {
/*
* XXX This won't work once we have group quotas
*/
ip->i_gid = gid;
} else {
long change = BTOBB(ip->i_size);
#ifdef _SHAREII
if ((error = SHR_CHOWNDISK
(
itovfs(ip),
ip->i_uid,
vap->va_uid,
(u_long)ip->i_blocks,
DEV_BSIZE,
cr
)
)
)
goto out;
#endif /* _SHAREII */
/*
* We force the changes to the quota structure, hence we
* cannot fail because of want of space! Kludgy.
*/
(void) qt_chkdq(ip, -change, 1, NULL);
(void) qt_chkiq(ip->i_mount, ip, (u_int)ip->i_uid, 1);
qt_dqrele(ip->i_dquot);
ip->i_uid = uid;
ip->i_gid = gid;
ip->i_dquot = qt_getinoquota(ip);
(void) qt_chkdq(ip, change, 1, NULL);
(void) qt_chkiq(ip->i_mount, (struct inode *)NULL,
(u_int)ip->i_uid, 1);
}
ip->i_flags |= ICHG;
}
/*
* Truncate file. Must have write permission and not be a directory.
*/
if (mask & AT_SIZE) {
if (vp->v_type == VDIR) {
error = EISDIR;
goto out;
} else if (vp->v_type != VREG) {
error = EINVAL;
goto out;
}
if (vp->v_flag & VISSWAP) {
error = EACCES;
goto out;
}
if (!(mask & AT_SIZE_NOPERM)) {
if (error = efs_iaccess(ip, IWRITE, cr))
goto out;
}
/* must UPD|CHG even though efs_itrunc may not do anything */
ip->i_flags |= IUPD|ICHG;
if (error = efs_itrunc(ip, (scoff_t)vap->va_size, 0))
goto out;
}
/*
* Change file access or modified times.
*/
if (mask & (AT_ATIME|AT_MTIME)) {
/*
* We turn off I* bits to denote that our 'vap' time
* not the current time is the most up-to-date
* We turn on IMOD to be sure that sooner or later
* the inode will still get pushed. Future access, mod, changes
* will simply turn on the respective I* bit and overwrite our
* value
*/
if (!file_owner && !cap_able_cred(cr, CAP_FOWNER)) {
if (error = (flags & ATTR_UTIME) ?
EPERM : efs_iaccess(ip, IWRITE, cr))
goto out;
}
/*
* since utime() always updates both mtime and atime
* ctime will always be set, as it need to be so there
* no reason to set ICHG
*/
ip->i_flags |= IMOD;
if (mask & AT_ATIME) {
ip->i_atime = vap->va_atime.tv_sec;
ip->i_flags &= ~IACC;
}
if (mask & AT_MTIME) {
nanotime_syscall(&tv);
ip->i_mtime = vap->va_mtime.tv_sec;
ip->i_umtime = vap->va_mtime.tv_nsec;
ip->i_ctime = tv.tv_sec;
ip->i_flags &= ~(IUPD|ICHG);
}
}
out:
if (!error && (flags & (ATTR_EXEC|ATTR_LAZY)) == 0 &&
(ip->i_flags & (IACC|IUPD|ICHG|IMOD))) {
/* XXXjwag ordering issue w.r.t delwri */
/* XXXjwag - why do we really have to call iupdat here?? */
IGETINFO.ig_attrchg++;
error = efs_iupdat(ip);
}
/*
* If the (regular) file's mandatory locking mode changed, then
* notify the vnode. We do this under the inode lock to prevent
* racing calls to vop_vnode_change.
*/
mandlock_after = MANDLOCK(vp, ip->i_mode);
if (mandlock_before != mandlock_after) {
VOP_VNODE_CHANGE(vp, VCHANGE_FLAGS_ENF_LOCKING,
mandlock_after);
}
iunlock(ip);
return error;
}
/*
* This function knows that vnode mode bits are really inode mode bits.
*/
/* ARGSUSED */
static int
efs_access(bdp, mode, cr)
bhv_desc_t *bdp;
int mode;
struct cred *cr;
{
struct inode *ip;
int error;
ip = bhvtoi(bdp);
ilock(ip);
error = efs_iaccess(ip, mode, cr);
iunlock(ip);
return error;
}
/* ARGSUSED */
static int
efs_readlink(
bhv_desc_t *bdp,
struct uio *uiop,
struct cred *cr)
{
struct inode *ip;
int error;
u_short type;
ip = bhvtoi(bdp);
type = ip->i_mode & IFMT;
if (!ISLINK(type))
return EINVAL;
ilock(ip);
error = efs_readi(ip, uiop, 0, type, cr, NULL);
iunlock(ip);
return error;
}
/* ARGSUSED */
static int
efs_fsync(bdp, flag, cr, start, stop)
bhv_desc_t *bdp;
int flag;
struct cred *cr;
off_t start;
off_t stop;
{
vnode_t *vp = BHV_TO_VNODE(bdp);
struct inode *ip;
int error = 0;
ip = bhvtoi(bdp);
ilock(ip);
if (flag & FSYNC_INVAL) {
if (ip->i_flags & IINCORE && ip->i_numextents > 0) {
struct extent *ex = &ip->i_extents[ip->i_numextents-1];
VOP_FLUSHINVAL_PAGES(vp,0,BBTOB(ex->ex_offset+ex->ex_length) - 1,
FI_REMAPF_LOCKED);
}
} else {
VOP_FLUSH_PAGES(vp, (off_t)0, ip->i_size - 1,
(flag & FSYNC_WAIT) ? 0 : B_ASYNC, FI_NONE, error);
error = 0;
}
if (!(flag & FSYNC_DATA) ||
(((ip->i_mode & IFMT) == IFREG) &&
(ip->i_remember < ip->i_size))) {
if (flag & FSYNC_WAIT)
ip->i_flags |= ISYN;
/*
* Since we just flushed all the data in the file, so ahead
* and bump i_remember all the way up to i_size. This will
* ensure that all of our data blocks are permanent.
*/
ip->i_remember = ip->i_size;
error = efs_iupdat(ip);
}
iunlock(ip);
return error; /* XXX should start all and sleep on v_sync */
}
/* ARGSUSED */
static int
efs_inactive(bdp, cr)
bhv_desc_t *bdp;
struct cred *cr;
{
iinactive(bhvtoi(bdp));
return VN_INACTIVE_CACHE;
}
/*
* Unix file system operations having to do with directory manipulation.
*/
/* ARGSUSED */
static int
efs_lookup(bdp, nm, vpp, pnp, flags, rdir, cr)
bhv_desc_t *bdp;
char *nm;
struct vnode **vpp;
struct pathname *pnp;
int flags;
struct vnode *rdir;
struct cred *cr;
{
vnode_t *dvp = BHV_TO_VNODE(bdp);
struct inode *dp, *ip;
struct entry ent;
int error;
struct vnode *vp, *newvp;
if (dvp->v_type != VDIR)
return ENOTDIR;
dp = bhvtoi(bdp);
ilock(dp);
error = efs_dirlookup(dp, nm, pnp, DLF_IGET|DLF_MUSTHAVE, &ent, cr);
iunlock(dp);
if (error)
return error;
ip = ent.e_ip;
vp = itov(ip);
#ifdef _IRIX_LATER
if ((ip->i_mode & ISVTX) && !(ip->i_mode & (IEXEC | IFDIR))
&& efs_stickyhack) {
VN_FLAGSET(vp, VISSWAP);
}
#endif
if (ip != dp)
iunlock(ip);
/*
* If vnode is a device return special vnode instead.
*/
if (ISVDEV(vp->v_type)) {
newvp = spec_vp(vp, vp->v_rdev, vp->v_type, cr);
VN_RELE(vp);
if (newvp == NULL)
return ENOSYS;
vp = newvp;
}
*vpp = vp;
return 0;
}
static int
efs_create(
bhv_desc_t *bdp,
char *name,
struct vattr *vap,
int flags,
int mode,
struct vnode **vpp,
struct cred *cr)
{
struct inode *dp, *ip;
int error;
#ifdef CELL
int truncated = 0;
#endif
struct entry ent;
struct vnode *vp, *newvp;
if (*vpp) {
VN_RELE(*vpp);
*vpp = NULL;
}
dp = bhvtoi(bdp);
ip = NULL;
ilock(dp);
if (error = efs_dirlookup(dp, name, NULL, DLF_IGET, &ent, cr))
goto bad;
/*
* If no entry was found, allocate an inode and enter it in dp.
* If an entry already exists and this is a non-exclusive create,
* check permissions and allow access for non-directory inodes.
* Read-only create of an existing directory is also allowed.
* Fail an exclusive create of anything which already exists.
*/
ip = ent.e_ip;
if (ip == NULL) {
if (error = efs_iaccess(dp, IWRITE, cr))
goto bad;
/*
* XPG4 says create cannot allocate a file if the
* file size limit is set to 0.
*/
if (flags & VZFS) {
error = EFBIG;
goto bad;
}
if (error = efs_ialloc(dp, MAKEIMODE(vap->va_type,vap->va_mode),
1, (vap->va_mask & AT_RDEV) ?
vap->va_rdev : NODEV, &ip, cr)) {
goto bad;
}
if (error = efs_direnter(dp, ip, &ent, cr)) {
ip->i_nlink = 0;
ip->i_flags |= ICHG;
goto bad;
}
vp = itov(ip);
} else {
vp = itov(ip);
if (flags & VEXCL)
error = EEXIST;
else if (vp->v_type == VDIR && (mode & IWRITE))
error = EISDIR;
else if (mode)
error = efs_iaccess(ip, mode, cr);
if (!error && vp->v_type == VREG && (vap->va_mask & AT_SIZE)) {
/*
* Truncate regular file, if requested by caller.
* POSIX requires the time stamps be updated
* regardless of whether file actually changes.
*/
ip->i_flags |= IUPD|ICHG;
error = efs_itrunc(ip, (scoff_t)vap->va_size, 0);
#ifdef CELL
truncated = 1;
#endif
}
if (error)
goto bad;
}
iunlock(dp);
if (ip != dp)
iunlock(ip);
#ifdef CELL
if (truncated)
VOP_VNODE_CHANGE(vp, VCHANGE_FLAGS_TRUNCATED, 0);
#endif
/*
* If vnode is a device, return special vnode instead.
*/
if (ISVDEV(vp->v_type)) {
newvp = spec_vp(vp, vp->v_rdev, vp->v_type, cr);
VN_RELE(vp);
if (newvp == NULL)
return ENOSYS;
vp = newvp;
}
*vpp = vp;
return 0;
bad:
iunlock(dp);
if (ip) {
if (ip == dp)
irele(ip);
else
iput(ip);
}
return error;
}
/*
* Bump ip's link count and update the disk inode. Let the link count
* overflow MAXLINK so our caller can unconditionally recover by calling
* efs_droplink.
*/
static int
efs_bumplink(struct inode *ip)
{
ASSERT(ip->i_lockid == get_thread_id());
ASSERT(ip->i_nlink >= 0);
if (ip->i_nlink++ >= MAXLINK)
return EMLINK;
ip->i_flags |= ICHG;
return efs_iupdat(ip);
}
/*
* Drop ip's link count and flag it for later update. Too many links
* can be fixed by fsck; too few and a directory may be left pointing
* at an unallocated inode.
*/
static void
efs_droplink(struct inode *ip)
{
ASSERT(ip->i_lockid == get_thread_id());
ASSERT(ip->i_nlink > 0);
ip->i_nlink--;
ip->i_flags |= ICHG;
}
/* ARGSUSED */
static int
efs_remove(
bhv_desc_t *bdp,
char *nm,
struct cred *cr)
{
struct inode *dp, *ip;
int error;
struct entry ent;
int link_removed = 0;
dp = bhvtoi(bdp);
ilock(dp);
error = efs_dirlookup(dp, nm, NULL, DLF_IGET|DLF_MUSTHAVE|DLF_REMOVE,
&ent, cr);
if (error) {
iunlock(dp);
return error;
}
ip = ent.e_ip;
if (itov(ip)->v_vfsmountedhere)
error = EBUSY;
else if ((ip->i_mode & IFMT) == IFDIR)
error = EPERM;
else {
error = efs_dirremove(dp, &ent, cr);
if (!error) {
efs_droplink(ip);
link_removed = 1;
error = efs_iupdat(ip);
}
}
if (ip != dp)
iunlock(ip);
iunlock(dp);
if (link_removed) {
/*
* Let interposed file systems know about removed links.
*/
VOP_LINK_REMOVED(itov(ip), itov(dp), (ip)->i_nlink==0);
}
irele(ip);
return error;
}
/*
* Link a file or a directory. Only the superuser is allowed to make a
* link to a directory. Take pains to increment the source inode's link
* count and update it before entering it in the target directory.
*/
static int
efs_link(
bhv_desc_t *tbdp,
struct vnode *svp,
char *tnm,
struct cred *cr)
{
struct vnode *realvp;
struct inode *tdp, *sip;
struct entry ent;
int error;
bhv_desc_t *src_bdp;
vn_bhv_head_t *src_bhp;
VOP_REALVP(svp, &realvp, error);
if (error == 0)
svp = realvp;
if (svp->v_type == VDIR)
return EPERM;
/*
* For now, manually find the EFS behavior descriptor for
* the source vnode. If it doesn't exist then something
* is wrong and we should just return an error.
* Eventually we need to figure out how link is going to
* work in the face of stacked vnodes.
*/
src_bhp = VN_BHV_HEAD(svp);
src_bdp = vn_bhv_lookup_unlocked(src_bhp, &efs_vnodeops);
if (src_bdp == NULL) {
return EXDEV;
}
sip = bhvtoi(src_bdp);
ilock(sip);
error = efs_bumplink(sip);
iunlock(sip);
if (!error) {
tdp = bhvtoi(tbdp);
ilock(tdp);
error = efs_dirlookup(tdp, tnm, NULL, DLF_ENTER|DLF_EXCL,
&ent, cr);
if (!error)
error = efs_direnter(tdp, sip, &ent, cr);
iunlock(tdp);
}
if (error) {
ilock(sip);
efs_droplink(sip);
iunlock(sip);
}
return error;
}
/*
* Rename the file named by snm in source directory sdvp to tnm in tdvp.
* We can't do two-phase commit without extra state in the inode, but we
* can guarantee that tnm exists throughout the operation. Unlock the
* source inodes to avoid deadlock (this means the source entry can be
* unlinked while we're working). Keep the target directory locked from
* lookup through enter (rewrite).
*
* Sketch:
*
* 1. Bump the source inode's link count right away to keep it
* from being unlinked while it is unlocked.
*
* 2. Link the source inode into the target directory. If the
* target exists, rewrite its entry in-place (efs_direnter uses
* the offset discovered by efs_dirlookup; the target directory
* must remain locked across lookup and enter). If the source
* is a directory and it moved to a different parent, rewrite
* its ".." entry to point at the target directory.
*
* 3. Unlink the source directory entry, if it's still around.
* When renaming one hard link over another link to the same
* inode, only steps 1 and 3 are executed.
*/
/* ARGSUSED */
static int
efs_rename(
bhv_desc_t *sbdp, /* old (source) parent vnode */
char *snm, /* old (source) entry name */
struct vnode *tdvp, /* new (target) parent vnode */
char *tnm, /* new (target) entry name */
struct pathname *tpnp, /* new (target) pathname or null */
struct cred *cr)
{
int error, dflag; /* error and efs_dirisempty result */
int directory; /* simple flags, see below */
struct inode *sdp, *tdp; /* source and target directories */
struct inode *sip, *tip; /* source and target inodes */
struct entry sent, tent; /* source and target entries */
efs_ino_t newparent; /* inumber of new parent directory */
int tip_dropped = 0; /* tip link dropped? */
int tdp_dropped = 0; /* tdp link dropped? */
int sip_dropped = 0; /* sip link dropped? */
int sdp_dropped = 0; /* sdp link dropped? */
bhv_desc_t *tdbdp;
/*
* Lookup the source inode (again -- it's a shame we can't keep
* a handle on what rename has already looked up). Increment its
* link count and update it on disk right now, to prevent someone
* else from removing it behind our back.
*/
sdp = bhvtoi(sbdp);
ilock(sdp);
error = efs_dirlookup(sdp, snm, NULL, DLF_IGET|DLF_MUSTHAVE|DLF_REMOVE,
&sent, cr);
iunlock(sdp);
if (error)
return error;
sip = sent.e_ip;
if (sip == sdp) {
irele(sip);
return EINVAL;
}
directory = ((sip->i_mode & IFMT) == IFDIR);
error = efs_bumplink(sip);
iunlock(sip);
/*
* 1. Lock target directory, check for an efs_bumplink error, and
* then lookup the target name, in case an inode is already linked
* under it in tdp. Tell efs_dirlookup to check for permission to
* unlink as well as permission to enter.
*
* Find the EFS behavior descriptor for the target directory
* vnode since it was not handed to us.
*/
tdbdp = vn_bhv_lookup_unlocked(VN_BHV_HEAD(tdvp), &efs_vnodeops);
if (tdbdp == NULL) {
return EXDEV;
}
tdp = bhvtoi(tdbdp);
tip = NULL;
ilock(tdp);
if (error)
goto bad;
if (error = efs_dirlookup(tdp, tnm, NULL, DLF_IGET|DLF_ENTER|DLF_REMOVE,
&tent, cr))
goto bad;
tip = tent.e_ip;
if (tip == tdp) {
error = EINVAL;
goto bad;
}
ASSERT(!(sent.e_flags & PN_ISDOT) && !(tent.e_flags & PN_ISDOT));
/*
* Source and target are identical.
*/
if (sip == tip) {
ASSERT(sip != sdp);
error = 0; /* no-op */
goto bad;
}
/*
* Directory rename requires special error checks. We do not
* rely on the system call layer to check these cases, because
* there may be novel system call layers like the NFS server,
* which should not all have to do the same checks.
*/
newparent = 0;
if (directory) {
/*
* Renaming ".." is illegal.
*/
if ((sent.e_flags & PN_ISDOTDOT)
|| (tent.e_flags & PN_ISDOTDOT)) {
error = EINVAL;
goto bad;
}
/*
* Check whether this rename would orphan the tree rooted at
* sip by moving it under itself. Note that efs_notancestor
* unlocks tdp, so we must lookup tip again afterwards. All
* calls to efs_notancestor go single-file through a monitor,
* to ensure that "mv /a/b /c/d/b2" won't lose the race with
* "mv /c/d /a/b/d2", resulting in "mv /a/b /a/b/d2/b2" and
* disconnection of the tree at /a/b.
*/
if (sdp != tdp) {
newparent = tdp->i_number;
if (tip) {
iput(tip);
tip = NULL;
}
if (error = efs_notancestor(sip, tdp, cr))
goto bad;
if (error = efs_dirlookup(tdp, tnm, NULL,
DLF_IGET|DLF_ENTER|DLF_REMOVE,
&tent, cr))
goto bad;
tip = tent.e_ip;
}
}
if (tip == NULL) {
/*
* If no target exists and the rename crosses directories,
* adjust the target directory link count to include the new
* ".." reference being added.
*/
if (newparent && (error = efs_bumplink(tdp)))
goto bad;
if (error = efs_direnter(tdp, sip, &tent, cr)) {
if (newparent) {
efs_droplink(tdp);
tdp_dropped = 1;
}
goto bad;
}
} else {
/*
* If target exists and it's a directory, check that both
* target and source are directories and that target can be
* destroyed, or that neither is a directory.
*/
if ((tip->i_mode & IFMT) == IFDIR) {
if ((error = efs_dirisempty(tip, &dflag, cr))
|| tip->i_nlink > 2) {
if (error == ENOTEMPTY)
error = EEXIST; /* XXX */
goto bad;
}
if (!directory) {
error = EISDIR;
goto bad;
}
if (itov(tip)->v_vfsmountedhere) {
error = EBUSY;
goto bad;
}
} else {
if (directory) {
error = ENOTDIR;
goto bad;
}
}
/*
* Purge all name cache references to the old target.
*/
dnlc_purge_vp(itov(tip));
/*
* 2. Link the source inode under the target name. This
* is atomic, but if the source inode is a directory, and
* if the rename isn't local to a directory, the source's
* ".." entry will be inconsistent till the efs_dirinit().
* Now that the target entry has been rewritten, drop the
* old target's link count.
*/
if (error = efs_dirrewrite(tdp, sip, &tent, cr))
goto bad;
efs_droplink(tip);
tip_dropped = 1;
if (directory && (dflag & DIR_HASDOT)) {
/*
* If the source is a directory and the target
* existed already, drop the target's link count
* again to deallocate it.
*/
efs_droplink(tip);
}
}
iunlock(tdp);
if (tip) {
iunlock(tip);
/* tell interposed file systems about removed links */
if (tip_dropped)
VOP_LINK_REMOVED(itov(tip), tdvp, (tip)->i_nlink==0);
irele(tip);
}
/*
* 3. Finally, remove the source. Since sdp and sip have
* been unlocked, someone else may have already unlinked sip,
* so we ignore ENOENT. If we're moving an inode over top of
* one of its hard links, remember to drop the link count we
* added in step 1. Also remember to drop the source dir's
* link count if renaming a directory to a new parent.
*/
ilock(sdp);
ilock(sip);
error = efs_dirlookup(sdp, snm, NULL, DLF_REMOVE, &sent, cr);
if (error == ENOENT)
error = 0;
else if (!error) {
if (sent.e_inum != sip->i_number) {
if (directory)
panic("rename: lost directory");
} else {
if (newparent)
error = efs_dirinit(sip, newparent, cr);
if (!error && (newparent ||
(directory && tip != NULL))) {
efs_droplink(sdp);
sdp_dropped = 1;
}
if (!error) {
error = efs_dirremove(sdp, &sent, cr);
if (!error) {
efs_droplink(sip);
sip_dropped = 1;
}
}
}
}
iunlock(sip);
iunlock(sdp);
/* tell interposed file systems about removed links */
if (sdp_dropped)
VOP_LINK_REMOVED(itov(sdp), itov(sip), (sdp)->i_nlink==0);
if (sip_dropped)
VOP_LINK_REMOVED(itov(sip), itov(sdp), (sip)->i_nlink==0);
irele(sip);
return error;
bad:
/*
* Release old target inode if any and unlock target directory.
* Restore source's link count and iput it.
*/
if (tip) {
if (tip == tdp)
irele(tip);
else
iput(tip);
}
iunlock(tdp);
if (tdp_dropped)
VOP_LINK_REMOVED(itov(tdp), itov(sip), (tdp)->i_nlink==0);
ilock(sip);
efs_droplink(sip);
iunlock(sip);
VOP_LINK_REMOVED(itov(sip), itov(sdp), (sip)->i_nlink==0);
irele(sip);
return error;
}
/* ARGSUSED */
static int
efs_mkdir(
bhv_desc_t *bdp,
char *dirname,
struct vattr *vap,
struct vnode **vpp,
struct cred *cr)
{
struct inode *dp, *cdp;
struct entry ent;
int error;
dp = bhvtoi(bdp);
ilock(dp);
/*
* Since dp is not locked between the lookup and this mkdir,
* it could have been removed.
*/
if (dp->i_nlink <= 0) {
error = ENOENT;
goto out2;
}
if (error = efs_iaccess(dp, IEXEC | IWRITE, cr)) {
goto out2;
}
if (error = efs_bumplink(dp))
goto out;
if (error =
efs_dirlookup(dp, dirname, NULL, DLF_ENTER|DLF_EXCL, &ent, cr))
goto out;
error = efs_ialloc(dp, IFDIR | (vap->va_mode & ~IFMT), 2, 0, &cdp, cr);
if (!error) {
error = efs_dirinit(cdp, dp->i_number, cr);
if (!error) {
error = efs_direnter(dp, cdp, &ent, cr);
}
if (!error) {
*vpp = itov(cdp);
iunlock(cdp);
} else {
cdp->i_nlink = 0;
cdp->i_flags |= ICHG;
iput(cdp);
}
}
out:
if (error)
efs_droplink(dp);
out2:
iunlock(dp);
return error;
}
/* ARGSUSED */
static int
efs_rmdir(
bhv_desc_t *bdp,
char *nm,
struct vnode *cdir,
struct cred *cr)
{
struct inode *dp, *cdp;
struct entry ent;
int error, dflag;
int link_removed = 0;
dp = bhvtoi(bdp);
ilock(dp);
error = efs_dirlookup(dp, nm, NULL, DLF_IGET|DLF_MUSTHAVE|DLF_REMOVE,
&ent, cr);
if (error) {
iunlock(dp);
return error;
}
cdp = ent.e_ip;
if (cdp == dp || itov(cdp) == cdir) {
error = EINVAL;
} else if ((cdp->i_mode & IFMT) != IFDIR) {
error = ENOTDIR;
} else if (itov(cdp)->v_vfsmountedhere) {
error = EBUSY;
} else if (cdp->i_nlink > 2) {
error = EEXIST; /* XXX ENOTEMPTY */
} else if (error = efs_dirisempty(cdp, &dflag, cr)) {
if (error == ENOTEMPTY)
error = EEXIST; /* XXX */
} else {
error = efs_dirremove(dp, &ent, cr);
if (!error) {
link_removed = 1;
if (dflag & DIR_HASDOTDOT) {
efs_droplink(dp);
(void) efs_iupdat(dp);
}
if (dflag & DIR_HASDOT)
cdp->i_nlink -= 2;
else
cdp->i_nlink--;
cdp->i_flags |= ICHG;
error = efs_iupdat(cdp);
}
}
if (cdp != dp)
iunlock(cdp);
iunlock(dp);
if (link_removed) {
/*
* Let interposed file systems know about removed links.
*/
VOP_LINK_REMOVED(itov(dp), itov(cdp), (dp)->i_nlink==0);
VOP_LINK_REMOVED(itov(cdp), itov(dp), (cdp)->i_nlink==0);
}
irele(cdp);
return error;
}
/*
* efs_readdir is in efs_dir.c
*/
/* ARGSUSED */
static int
efs_symlink(
bhv_desc_t *bdp, /* ptr to parent dir vnode */
char *linkname, /* name of symbolic link */
struct vattr *vap, /* attributes */
char *target, /* target path */
struct cred *cr) /* user credentials */
{
struct inode *dp, *ip;
struct entry ent;
int error = 0, pathlen;
struct uio uio;
struct iovec iov;
struct pathname cpn, ccpn;
int newfile = 1;
/*
* Check component lengths of the target path name.
*/
pathlen = strlen(target);
if (pathlen >= MAXPATHLEN) /* total string too long */
return ENAMETOOLONG;
if (pathlen >= MAXNAMELEN) { /* is any component too long? */
pn_alloc(&cpn);
pn_alloc(&ccpn);
bcopy(target, cpn.pn_path, pathlen);
cpn.pn_pathlen = pathlen;
while (cpn.pn_pathlen > 0 && !error) {
if (error = pn_getcomponent(&cpn, ccpn.pn_path, 0)) {
pn_free(&cpn);
pn_free(&ccpn);
if (error == ENAMETOOLONG)
return error;
} else if (cpn.pn_pathlen) { /* advance past slash */
cpn.pn_path++;
cpn.pn_pathlen--;
}
}
pn_free(&cpn);
pn_free(&ccpn);
}
dp = bhvtoi(bdp);
ilock(dp);
error = efs_dirlookup(dp, linkname, NULL, DLF_ENTER|DLF_EXCL, &ent, cr);
if (!error) {
error = efs_ialloc(dp, IFLNK | (vap->va_mode&~IFMT), 1, 0,
&ip, cr);
}
if (!error) {
uio.uio_iov = &iov;
uio.uio_iovcnt = 1;
uio.uio_offset = 0;
uio.uio_segflg = UIO_SYSSPACE;
uio.uio_resid = iov.iov_len = pathlen;
uio.uio_pmp = NULL;
uio.uio_pio = 0;
uio.uio_readiolog = 0;
uio.uio_writeiolog = 0;
uio.uio_pbuf = 0;
iov.iov_base = target;
error = efs_writei(ip, &uio, 0, cr, NULL);
if (!error & newfile) {
error = efs_direnter(dp, ip, &ent, cr);
}
if (error) {
ip->i_nlink = 0;
ip->i_flags |= ICHG;
}
iput(ip);
}
iunlock(dp);
return error;
}
static int
efs_fid(
bhv_desc_t *bdp,
struct fid **fidpp)
{
struct efid *efid;
efid = kmem_alloc(sizeof *efid, KM_SLEEP);
efid->efid_len = sizeof *efid - sizeof efid->efid_len;
efid->efid_pad = 0;
efid->efid_ino = bhvtoi(bdp)->i_number;
efid->efid_gen = bhvtoi(bdp)->i_gen;
*fidpp = (struct fid *)efid;
return 0;
}
static int
efs_fid2(
bhv_desc_t *bdp,
struct fid *fidp)
{
struct efid *efid = (struct efid *)fidp;
ASSERT(sizeof(fid_t) >= sizeof(struct efid));
efid->efid_len = sizeof *efid - sizeof efid->efid_len;
efid->efid_pad = 0;
efid->efid_ino = bhvtoi(bdp)->i_number;
efid->efid_gen = bhvtoi(bdp)->i_gen;
return 0;
}
/* ARGSUSED */
static void
efs_rwlock(bhv_desc_t *bdp, vrwlock_t write_lock)
{
struct inode *ip;
ip = bhvtoi(bdp);
ilock(ip);
ip->i_flags |= IRWLOCK;
}
/* ARGSUSED */
static void
efs_rwunlock(bhv_desc_t *bdp, vrwlock_t write_lock)
{
struct inode *ip;
ip = bhvtoi(bdp);
ip->i_flags &= ~IRWLOCK;
iunlock(ip);
}
/* ARGSUSED */
static int
efs_seek(bdp, ooff, noffp)
bhv_desc_t *bdp;
off_t ooff;
off_t *noffp;
{
return *noffp < 0 ? EINVAL : 0;
}
static int
efs_frlock(
bhv_desc_t *bdp,
int cmd,
struct flock *lfp,
int flag,
off_t offset,
vrwlock_t vrwlock,
cred_t *cr)
{
vnode_t *vp = BHV_TO_VNODE(bdp);
int dolock, error = 0;
dolock = (vrwlock == VRWLOCK_NONE);
if (dolock) {
efs_rwlock(bdp, VRWLOCK_WRITE);
vrwlock = VRWLOCK_WRITE;
}
if (cmd != F_CLNLK)
error = convoff(vp, lfp, lfp->l_whence, offset, SEEKLIMIT32,cr);
if (!error)
error = fs_frlock(bdp, cmd, lfp, flag, offset, vrwlock, cr);
if (dolock)
efs_rwunlock(bdp, VRWLOCK_WRITE);
return error;
}
/*
* efs_bmap is defined in efs_bmap.c, oddly enough.
*/
static void
efs_strategy(bhv_desc_t *bdp, struct buf *bp)
{
struct inode *ip;
scoff_t isize;
ip = bhvtoi(bdp);
if (bp->b_flags & B_READ) {
ASSERT(mutex_mine(&ip->i_lock));
ASSERT(ip->i_lockid == get_thread_id());
isize = ip->i_size;
if (isize <= BBTOB(bp->b_offset)) {
IGETINFO.ig_readcancel++;
iodone(bp);
return;
}
}
VOP_STRATEGY(ip->i_mount->m_devvp, bp);
}
/* ARGSUSED */
static int
efs_map(
bhv_desc_t *bdp,
off_t off,
size_t len,
mprot_t prot,
u_int flags,
struct cred *cr,
vnode_t **nvp)
{
if ((off + (off_t)len) > (off_t)EFS_MAX_FILE_OFFSET)
return EINVAL;
return 0;
}
/* ARGSUSED */
static int
efs_reclaim(
bhv_desc_t *bdp,
int flag)
{
vnode_t *vp = BHV_TO_VNODE(bdp);
struct inode *ip;
ASSERT(!VN_MAPPED(vp));
ip = bhvtoi(bdp);
if (ip->i_flags & IINCORE && ip->i_numextents > 0) {
struct extent *ex = &ip->i_extents[ip->i_numextents - 1];
VOP_FLUSHINVAL_PAGES(vp, 0, BBTOB(ex->ex_offset+ex->ex_length) - 1,
FI_NONE);
}
dnlc_purge_vp(vp);
ASSERT((ip->i_flags & (IMOD|IACC|IUPD|ICHG)) == 0);
ireclaim(ip);
return 0;
}
/* ARGSUSED */
int
efs_setfl(
bhv_desc_t *bdp,
int oflags,
int nflags,
cred_t *cr)
{
return 0;
}
#ifdef DATAPIPE
/* ARGSUSED */
int
efs_fspe_dioinfo(
struct vnode *vp,
struct dioattr *da)
{
/* This is a copy from fcntl - F_DIOINFO cmd */
#ifdef R10000_SPECULATION_WAR
da->d_mem = _PAGESZ;
#else
da->d_mem = FDIRIOALIGN;
#endif
da->d_miniosz = BBSIZE;
da->d_maxiosz = ctob(v.v_maxdmasz - 1);
return 0;
}
#endif
/* ARGSUSED */
int
efs_fcntl(
bhv_desc_t *bdp,
int cmd,
void *arg,
int flags,
off_t offset,
cred_t *cr,
rval_t *rvp)
{
int error = 0;
struct flock bf;
struct irix5_flock i5_bf;
vnode_t *vp = BHV_TO_VNODE(bdp);
char abi = get_current_abi();
switch (cmd) {
#ifdef DATAPIPE
case F_GETOPS:
fspe_get_ops(arg);
break;
#endif
case F_DIOINFO: {
struct dioattr da;
/* only works on files opened for direct I/O */
if (!(flags & FDIRECT)) {
error = EINVAL;
break;
}
#ifdef MH_R10000_SPECULATION_WAR
if (IS_R10000())
da.d_mem = _PAGESZ;
else
da.d_mem = FDIRIOALIGN;
#elif R10000_SPECULATION_WAR /* makes tlb invalidate during dma more
effective, by decreasing the likelihood of a valid reference in the
same page as dma user address space; leaving the tlb invalid avoids
the speculative reference. We return the more stringent
"requirements" on the fcntl(), but do *NOT* enforced them
in the read/write code, to be sure we don't break apps... */
da.d_mem = _PAGESZ;
#else
da.d_mem = FDIRIOALIGN;
#endif
da.d_miniosz = BBSIZE;
da.d_maxiosz = ctob(v.v_maxdmasz - 1);
if (copyout(&da, arg, sizeof da))
error = EFAULT;
break;
}
case F_ALLOCSP:
case F_FREESP:
case F_ALLOCSP64:
case F_FREESP64:
if ((flags & FWRITE) == 0) {
error = EBADF;
} else if (vp->v_type != VREG) {
error = EINVAL;
} else if (vp->v_flag & VISSWAP) {
error = EACCES;
#if _MIPS_SIM == _ABI64
} else if (ABI_IS_IRIX5_64(abi)) {
if (copyin((caddr_t)arg, &bf, sizeof bf)) {
error = EFAULT;
break;
}
#endif
} else if (cmd == F_ALLOCSP64 || cmd == F_FREESP64 ||
ABI_IS_IRIX5_N32(abi)) {
/*
* The n32 flock structure is the same size as the
* o32 flock64 structure. So the copyin_xlate
* with irix5_n32_to_flock works here.
*/
if (COPYIN_XLATE((caddr_t)arg, &bf, sizeof bf,
irix5_n32_to_flock,
abi, 1)) {
error = EFAULT;
break;
}
} else {
if (copyin((caddr_t)arg, &i5_bf, sizeof i5_bf)) {
error = EFAULT;
break;
}
/*
* Now expand to 64 bit sizes.
*/
bf.l_type = i5_bf.l_type;
bf.l_whence = i5_bf.l_whence;
bf.l_start = i5_bf.l_start;
bf.l_len = i5_bf.l_len;
}
if ((error = convoff(vp, &bf, 0, offset, SEEKLIMIT32, cr)) == 0) {
struct vattr vattr;
vattr.va_size = bf.l_start;
vattr.va_mask = AT_SIZE;
error = efs_setattr(bdp, &vattr, 0, cr);
}
break;
default:
error = EINVAL;
break;
}
return error;
}
/*
* EFS doesn't fully support attributes. We allow getting/setting one
* particular attribute, though: _DEVNAME_ATTR is an attribute
* for special device files stored on EFS file systems. The value of this
* attribute is a hwgraph device path, and it's stored on disk the same
* way a symbolic link is stored. The on-disk EFS type is changed to CHRLNK
* or BLKLNK respectively. This is seen by upper layers as VCHR or VBLK.
*/
/*ARGSUSED*/
int /* error */
efs_attr_get(bhv_desc_t *bdp, char *name, char *value, int *valuelenp,
int flags, struct cred *cred)
{
struct inode *ip;
u_short type;
struct uio uio;
struct iovec iov;
int error;
/* Is it a MAC label */
if (strcmp(name, SGI_MAC_FILE) == 0)
return _MAC_EFS_ATTR_GET(bdp, name, value, valuelenp, flags, cred);
/* Make sure we're getting the only permissible attribute */
if (strcmp(name, _DEVNAME_ATTR))
return(ENOSYS);
ip = bhvtoi(bdp);
/*
* Make sure we're only trying to get this attribute on
* an appropriate hwgraph special device file.
*/
type = ip->i_mode & IFMT;
if ((type != IFCHRLNK) && (type != IFBLKLNK))
return(ENOSYS);
uio.uio_iov = &iov;
uio.uio_iovcnt = 1;
uio.uio_offset = 0;
uio.uio_segflg = UIO_SYSSPACE;
uio.uio_resid = iov.iov_len = *valuelenp;
uio.uio_pmp = NULL;
uio.uio_pio = 0;
uio.uio_readiolog = 0;
uio.uio_writeiolog = 0;
uio.uio_pbuf = 0;
iov.iov_base = value;
ilock(ip);
error = efs_readi(ip, &uio, 0, type, cred, NULL);
iunlock(ip);
return(error);
}
/*ARGSUSED */
int
efs_attr_set(bhv_desc_t *bdp, char *name, char *value, int valuelen, int flags,
struct cred *cred)
{
struct inode *ip;
u_short type;
int error;
/* Is it a MAC Label */
if (strcmp(name, SGI_MAC_FILE) == 0)
return (_MAC_EFS_ATTR_SET(bdp, name, value, valuelen,
flags, cred));
/* Make sure we're setting the only permissible attribute */
if (strcmp(name, _DEVNAME_ATTR))
return(ENOSYS);
/*
* Make sure we've got permission to make make special files,
* since by changing this attribute we're essentially creating
* a new special file.
*/
if (!cap_able_cred(cred, CAP_MKNOD))
return(EPERM);
ip = bhvtoi(bdp);
ilock(ip);
/*
* Only allow attribute to be written on hwgraph special device files.
*/
type = ip->i_mode & IFMT;
if ((type == IFCHR) &&
IS_HWGRAPH_STRING_DEV(ip->i_rdev)) {
error = efs_ichange_type(ip, IFCHRLNK);
} else if ((type == IFBLK) &&
IS_HWGRAPH_STRING_DEV(ip->i_rdev)) {
error = efs_ichange_type(ip, IFBLKLNK);
} else if ((type == IFCHRLNK) || (type == IFBLKLNK))
error = 0;
else
error = ENOSYS;
if (!error) {
struct uio uio;
struct iovec iov;
uio.uio_iov = &iov;
uio.uio_iovcnt = 1;
uio.uio_offset = 0;
uio.uio_segflg = UIO_SYSSPACE;
uio.uio_resid = iov.iov_len = valuelen;
uio.uio_pmp = NULL;
uio.uio_pio = 0;
uio.uio_readiolog = 0;
uio.uio_writeiolog = 0;
uio.uio_pbuf = 0;
iov.iov_base = value;
error = efs_writei(ip, &uio, 0, cred, NULL);
if (error) {
/* On failure, restore old file type */
efs_ichange_type(ip, type);
}
}
iunlock(ip);
return(error);
}
vnodeops_t efs_vnodeops = {
BHV_IDENTITY_INIT_POSITION(VNODE_POSITION_BASE),
(vop_open_t)fs_noerr,
efs_close,
efs_read,
efs_write,
efs_ioctl,
efs_setfl,
efs_getattr,
efs_setattr,
efs_access,
efs_lookup,
efs_create,
efs_remove,
efs_link,
efs_rename,
efs_mkdir,
efs_rmdir,
efs_readdir,
efs_symlink,
efs_readlink,
efs_fsync,
efs_inactive,
efs_fid,
efs_fid2,
efs_rwlock,
efs_rwunlock,
efs_seek,
fs_cmp,
efs_frlock,
(vop_realvp_t)fs_nosys,
efs_bmap,
efs_strategy,
efs_map,
(vop_addmap_t)fs_noerr,
(vop_delmap_t)fs_noerr,
fs_poll,
(vop_dump_t)fs_nosys,
fs_pathconf,
(vop_allocstore_t)fs_nosys,
efs_fcntl,
efs_reclaim,
efs_attr_get,
efs_attr_set,
(vop_attr_remove_t)fs_nosys,
(vop_attr_list_t)fs_nosys,
fs_cover,
(vop_link_removed_t)fs_noval,
fs_vnode_change,
fs_tosspages,
fs_flushinval_pages,
fs_flush_pages,
fs_invalfree_pages,
fs_pages_sethole,
(vop_commit_t)fs_nosys,
(vop_readbuf_t)fs_nosys,
fs_strgetmsg,
fs_strputmsg,
};