1
0
Files
irix-657m-src/irix/kern/os/exec.c
2022-09-29 17:59:04 +03:00

1227 lines
29 KiB
C

/*************************************************************************
* *
* Copyright (C) 1986-1996 Silicon Graphics, Inc. *
* *
* These coded instructions, statements, and computer programs contain *
* unpublished proprietary information of Silicon Graphics, Inc., and *
* are protected by Federal copyright law. They may not be disclosed *
* to third parties or copied or duplicated in any form, in whole or *
* in part, without the prior written consent of Silicon Graphics, Inc. *
* *
**************************************************************************/
/* Copyright (c) 1990, 1991 UNIX System Laboratories, Inc. */
/* Copyright (c) 1984, 1986, 1987, 1988, 1989, 1990 AT&T */
/* All Rights Reserved */
/* THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF */
/* UNIX System Laboratories, Inc. */
/* The copyright notice above does not evidence any */
/* actual or intended publication of such source code. */
#ident "$Revision: 3.355 $"
#include <sys/types.h>
#include <sys/acct.h>
#include <ksys/as.h>
#include <sys/atomic_ops.h>
#include <sys/buf.h>
#include <sys/capability.h>
#include <sys/cmn_err.h>
#include <sys/cred.h>
#include <sys/debug.h>
#include <sys/errno.h>
#include <ksys/exception.h>
#include <sys/exec.h>
#include <ksys/fdt.h>
#include <sys/kabi.h>
#include <sys/kmem.h>
#include <sys/ksignal.h>
#include <sys/mman.h>
#include <sys/param.h>
#include <sys/pathname.h>
#include <sys/prctl.h>
#include <sys/proc.h>
#include <sys/resource.h>
#include <sys/runq.h>
#include <sys/sema.h>
#include <sys/sat.h>
#include <sys/attributes.h>
#include <sys/signal.h>
#include <sys/sysinfo.h>
#include <sys/sysmacros.h>
#include <sys/systm.h>
#include <sys/uio.h>
#include <ksys/vfile.h>
#include <sys/vfs.h>
#include <sys/vnode.h>
#include <ksys/vproc.h>
#include <string.h>
#include <sys/rtmon.h>
#include <sys/par.h>
#include <sys/imon.h>
#include <procfs/prsystm.h>
#include <sys/numa.h>
#include <sys/ckpt.h>
#ifdef _SYSTEM_SIMULATION
#include <sys/kopt.h>
#endif
#include "os/proc/pproc_private.h" /* XXX bogus */
#if CELL_IRIX
#include <ksys/cell.h>
#include <ksys/cell/membership.h>
#endif
#include <sys/dmi.h>
#include <sys/dmi_kern.h>
#ifdef R10000
#include <sys/hwperftypes.h>
#include <sys/hwperfmacros.h>
#endif
/* these are dynamically changable */
extern int ncargs;
extern int reset_limits_on_exec;
extern sv_t nexit;
extern int gfx_exit(void);
/*
* Length of /dev/fd prefix and suffix
*/
#define DEV_FD_PREFIX_LEN 8
#define DEV_FD_SUFFIX_LEN 8
/*
* The exec switch table. Called in order to try and exec a
* particular a.out type.
*/
int (*execsw[])(vnode_t *, vattr_t *, struct uarg *, int) = {
elfexec,
intpexec,
};
int nexectype = sizeof(execsw) / sizeof(int (*)());
struct rexeca {
sysarg_t cell;
char *fname;
char **argp;
char **envp;
};
struct execa {
char *fname;
char **argp;
char **envp;
};
static int iexec(char *fname, char **argp, char **envp, cell_t cell);
#if CELL_IRIX
/*
* TESTING - Set exec_rotor_low/exec_rotor_high to the min & max cells
* you want to run on. The default is all cells. Set do_rexec to
* turn it on using a combination of the REXEC bit flags.
*/
int exec_rotor = 0;
int exec_rotor_low=0, exec_rotor_high=MAX_CELLS - 1;
#define REXEC_ON 0x1
#define REXEC_SKIP_GOLDEN 0x2
#define REXEC_RANDOM 0x4
int do_rexec = 0;
static cell_t
pick_cell(void)
{
/* pick a cell to run on */
if (!do_rexec) {
return(cellid());
}
while (do_rexec & REXEC_ON) {
if (do_rexec & REXEC_RANDOM) {
exec_rotor = get_timestamp() %
((cell_membership >> 1) + 1);
}
else {
/* round-robin */
exec_rotor++;
if (exec_rotor > exec_rotor_high) {
exec_rotor = exec_rotor_low;
}
if (exec_rotor == cellid())
continue;
}
if (cell_in_membership(exec_rotor)) {
if ((do_rexec & REXEC_SKIP_GOLDEN) &&
exec_rotor == golden_cell) {
continue;
}
break;
}
}
return(exec_rotor);
}
#else
#define pick_cell() cellid()
#endif
/* ARGSUSED */
int
exec(struct execa *uap, rval_t *rvp)
{
cell_t cell;
cell = pick_cell();
return (iexec(uap->fname, uap->argp, NULL, cell));
}
/* ARGSUSED */
int
exece(struct execa *uap, rval_t *rvp)
{
cell_t cell;
cell = pick_cell();
return (iexec(uap->fname, uap->argp, uap->envp, cell));
}
/*
* rexec - new process executes on specified cell
*/
/* ARGSUSED */
int
rexec(struct rexeca *uap, rval_t *rvp)
{
cell_t cell;
cell = uap->cell;
if (cell < 0)
return EINVAL;
#if CELL_IRIX
if (!cell_in_membership(cell))
return ECELLDOWN;
#else
if (cell != 0)
return EINVAL;
#endif
return (iexec(uap->fname, uap->argp, uap->envp, cell));
}
#ifdef DEBUG
int exectrace = 0;
#endif
static int
iexec(char *fname, char **argp, char **envp, cell_t cell)
{
int error = 0;
vnode_t *vp;
struct pathname pn;
int snapncargs = ncargs;
struct uarg args;
ckpt_handle_t *ckptp = NULL;
#ifdef CKPT
ckpt_handle_t ckpt = NULL;
if (ckpt_enabled)
ckptp = &ckpt;
#endif
SYSINFO.sysexec++;
/*
* Lookup path name and remember last component for later.
*/
if (error = pn_get(fname, UIO_USERSPACE, &pn))
return error;
_SAT_PN_SAVE(&pn, curuthread);
#ifdef DEBUG
{
proc_t *p = curprocp;
if (exectrace == -1 || exectrace == p->p_pid)
cmn_err(CE_CONT, "exec of (pid=%d) %s by %s\n",
p->p_pid, pn.pn_buf, get_current_name());
}
#endif
bzero (&args, sizeof(args));
_SAT_PN_BOOK(SAT_EXEC, curuthread);
if (error = lookuppn(&pn, FOLLOW, NULLVPP, &vp, ckptp)) {
pn_free(&pn);
return error;
}
/*
* Save audit information about the old state.
* When the record's generated for real it'll be
* after the exec has changed attributes.
*/
_SAT_SAVE_ATTR(SAT_CAP_SET_TOKEN, curuthread);
_SAT_SAVE_ATTR(SAT_UGID_TOKEN, curuthread);
strncpy(args.ua_exec_file, pn.pn_path, PSCOMSIZ);
args.ua_exec_file[PSCOMSIZ - 1] = '\0';
args.ua_argp = argp;
args.ua_envp = envp;
args.ua_fname = fname;
args.ua_fnameseg = UIO_USERSPACE;
args.ua_ncargs = snapncargs;
args.ua_cell = cell;
args.ua_exec_vp = vp;
#ifdef CKPT
args.ua_ckpt = (ckptp && *ckptp)? ckpt_lookup_add(vp, *ckptp) : -1;
#endif
#ifdef _SYSTEM_SIMULATION
{
proc_t *p = curprocp;
if (is_enabled(arg_sableexectrace))
printf("Exec of %s (PID %d) by %s\n", pn.pn_path, p->p_pid,
p->p_comm);
}
#endif
pn_free(&pn);
/* We have state that needs to be cleaned up on error */
args.ua_exec_cleanup = 1;
error = gexec(&vp, &args, 0);
if (error == 0) {
/*
* Tell rtmond our new name. This actually generates more
* events than we typically want since this will cause
* events to be generated for every process in the system
* when we often are just tracing a small handful of
* processes. We may need to think about ways of trying to
* avoid unneeded events ...
*/
if (IS_TSTAMP_EVENT_ACTIVE(RTMON_PIDAWARE)) {
#pragma mips_frequency_hint NEVER
fawlty_exec(args.ua_exec_file);
}
} else if (args.ua_exec_cleanup) { /* handle error */
#pragma mips_frequency_hint NEVER
if (error == EAGAIN)
nomemmsg("exec");
VN_RELE(args.ua_exec_vp);
}
_SAT_EXEC(error);
return(error);
}
/*
* Get executable permissions (and capabilities if requested) of a vnode.
* Check for various access/execute/etc. issues and return an appropriate
* error if things are amiss.
*/
int
execpermissions(struct vnode *vp,
struct vattr *vattrp,
struct uarg *args)
{
int error;
uthread_t *ut = curuthread;
proc_t *p = UT_TO_PROC(ut);
vattrp->va_mask = AT_MODE|AT_UID|AT_GID|AT_SIZE;
VOP_GETATTR(vp, vattrp, ATTR_EXEC, ut->ut_cred, error);
if (error)
return error;
/*
* Check the access mode.
*/
VOP_ACCESS(vp, VEXEC, ut->ut_cred, error);
if (error != 0
|| vp->v_type != VREG
|| (vattrp->va_mode & (VEXEC|(VEXEC>>3)|(VEXEC>>6))) == 0) {
if (error == 0)
error = EACCES;
return error;
}
if (p->p_trace || PTRACED(p)) {
#pragma mips_frequency_hint NEVER
/*
* If we have read access then it's okay to let the exec()
* happen.
*/
VOP_ACCESS(vp, VREAD, ut->ut_cred, error);
if (!error)
return 0;
/*
* If process is traced via ptrace(2), fail the exec(2).
*/
if (p->p_flag & STRC)
return ENOEXEC;
/*
* Process is traced via /proc.
* Arrange to invalidate the /proc vnode.
*/
args->ua_traceinval = 1;
}
return 0;
}
/*
* Returns with args->ua_setid[level] set to:
* 0 - not a setuid/setgid/setcap program or setuid/setgid/setcap disallowed
* !0 - a permitted setuid/setgid/setcap program
*
* Also reads any capabilities attached to the executable into args->ua_cap.
* We need them here to determine if the image has attached capabilities and
* later on we'll need them to recalculate our capabilities. If an image does
* not have attached capabilities, we mark args->ua_cap as invalid for that
* capability recalculation.
*
* If the image is a permitted setuid/setgid/setcap program, then we also set
* args->ua_uid and args->ua_gid.
*
* If there are no errors, 0 is returned; otherwise an error code is returned.
*/
static int
execsetid(vnode_t *vp, vattr_t *vattrp, struct uarg *args)
{
uthread_t *ut = curuthread;
proc_t *p = UT_TO_PROC(ut);
cap_set_t *acap = &args->ua_cap;
int setid, capsize, error;
uid_t uid;
gid_t gid;
/*
* Grab any capabilities attached to the executable.
*/
capsize = sizeof(cap_set_t);
VOP_ATTR_GET(vp, SGI_CAP_FILE, (char *)acap, &capsize,
ATTR_ROOT, sys_cred, error);
if (error ||
(acap->cap_effective & CAP_INVALID) ||
(acap->cap_permitted & CAP_INVALID) ||
(acap->cap_inheritable & CAP_INVALID)) {
#pragma mips_frequency_hint FREQUENT
/*
* A non-zero error indicates that there is either no
* capability set on the file or that if there was one, we
* couldn't get it. Mark as an invalid capability set to
* keep track of the fact the file had no capabilities
* attached to it. Treat an invalid capability set as a
* missing one.
*/
acap->cap_effective = CAP_INVALID;
acap->cap_permitted = CAP_INVALID;
acap->cap_inheritable = CAP_INVALID;
}
/*
* If neither SUID, SGID or SCAP simply return successfully. Also
* silently ignore SUID/SGID/SCAP for file systems mounted with
* "nosuid" option.
*/
if (((vattrp->va_mode & (VSUID|VSGID)) == 0 &&
(acap->cap_effective & CAP_INVALID)) ||
(vp->v_vfsp->vfs_flag & VFS_NOSUID)) {
#pragma mips_frequency_hint FREQUENT
args->ua_setid = 0;
return 0;
}
/*
* Compute proposed execution credentials.
*/
setid = 0;
uid = ut->ut_cred->cr_uid;
gid = ut->ut_cred->cr_gid;
if (vattrp->va_mode & VSUID && vattrp->va_uid != uid) {
uid = vattrp->va_uid;
setid = 1;
}
if (vattrp->va_mode & VSGID && vattrp->va_gid != gid) {
gid = vattrp->va_gid;
setid = 1;
}
if (!(acap->cap_effective & CAP_INVALID)) {
cap_set_t *ucapp = &ut->ut_cred->cr_cap;
if (acap->cap_effective != ucapp->cap_effective ||
acap->cap_permitted != ucapp->cap_permitted ||
acap->cap_inheritable != ucapp->cap_inheritable) {
setid = 1;
}
}
/*
* Set setuid/setgid/setcap protections, if not tracing. If the
* process is being debuged (STRC) we never allow SUID/SGID/SCAP.
* If the process has system call and/or context switch tracing
* enabled, we allow the tracing to remain active if the process is
* privileged (CAP_PROC_MGT) or if it's being traced by a privileged
* tracer (SPARPRIV).
*/
if (setid) {
if ((p->p_flag & (STRC|SPARSYS|SPARSWTCH)) &&
!(p->p_flag & SPARPRIV) && !_CAP_ABLE(CAP_PROC_MGT)) {
#pragma mips_frequency_hint NEVER
int s;
if (p->p_flag & STRC)
return EPERM;
s = p_lock(p);
p->p_flag &= ~(SPARSYS|SPARSWTCH|SPARINH);
p->p_parcookie = 0;
p_unlock(p, s);
}
args->ua_uid = uid;
args->ua_gid = gid;
args->ua_setid = 1;
}
return 0;
}
int
gexec(struct vnode **vpp, struct uarg *args, int level)
{
proc_t *pp = curprocp;
uthread_t *ut = curuthread;
int i, error;
struct vnode *vp;
struct vattr vattr;
vp = *vpp;
if ((error = execpermissions(vp, &vattr, args)) != 0)
return error;
VOP_OPEN(vp, vpp, FREAD, ut->ut_cred, error);
if (error)
return error;
vp = *vpp;
args->ua_prev_script[level] = pp->p_script;
args->ua_level = level;
pp->p_script = NULL;
repl_interpose(vp, "ReplDefault");
if (vattr.va_size < MAGIC_SIZE) {
#pragma mips_frequency_hint NEVER
error = ENOEXEC;
goto closevp;
}
if (level == 0) {
#pragma mips_frequency_hint FREQUENT
/*
* We only check for suid/sgid/scap on the thing being
* exec()'d. In particular this means that for script
* execution, we only check the script and not its
* interpreter.
*/
error = execsetid(vp, &vattr, args);
if (error)
goto closevp;
}
if (!(ut->ut_pproxy->prxy_fp.pfp_fpflags & P_FP_PRESERVE)) {
#if TFP
ut->ut_pproxy->prxy_fp.pfp_fpflags = P_FP_IMPRECISE_EXCP;
#else
ut->ut_pproxy->prxy_fp.pfp_fpflags = 0;
#endif
}
error = check_dmapi_file(vp);
if (error)
goto closevp;
/*
* Loop through the switch table looking for the module that
* can handle this executable.
*/
for (i = 0; i < nexectype; i++) {
error = (*execsw[i]) (vp, &vattr, args, level);
if (error != ENOEXEC)
break;
}
if (!error) {
#pragma mips_frequency_hint FREQUENT
pp->p_exec_cnt++;
return 0;
}
if (!(args->ua_exec_cleanup)) {
/* don't handle error */
return error;
}
closevp:
ASSERT(error);
VOP_CLOSE(vp, FREAD, L_TRUE, ut->ut_cred, i);
/*
* Decrement use of new script, restore old one.
*/
if (pp->p_script) {
int s = VN_LOCK(pp->p_script);
int c = --pp->p_script->v_intpcount;
ASSERT(c >= 0);
VN_UNLOCK(pp->p_script, s);
if (!c)
IMON_EVENT(pp->p_script, ut->ut_cred, IMON_EXIT);
VN_RELE(pp->p_script);
}
pp->p_script = args->ua_prev_script[level];
return error;
}
int
exrdhead(struct vnode *vp, off_t off, size_t len, caddr_t *addrp)
{
ssize_t resid;
*addrp = kern_malloc(len);
if (vn_rdwr(UIO_READ, vp, *addrp, len, off, UIO_SYSSPACE, 0, 0L,
get_current_cred(), &resid, &curuthread->ut_flid) != 0
|| resid) {
return ENOEXEC;
}
return 0;
}
void
exhead_free(caddr_t addr)
{
kern_free(addr);
}
int
execmap(vnode_t *vp,
caddr_t addr,
size_t len,
size_t zfodlen,
off_t offset,
int prot,
int flags,
vasid_t vasid,
int ckpt)
{
caddr_t oldaddr;
size_t oldlen;
off_t oldoffset;
vnode_t *oldvp;
as_addspace_t asadd;
/* REFERENCED */
as_addspaceres_t asres;
int error = 0;
int ismappable = 0;
oldvp = vp; /* for VN_RELE check at the end */
oldaddr = addr;
addr = (caddr_t)((long)addr & ~POFFMASK);
oldlen = len;
len += ((size_t)oldaddr - (size_t)addr);
oldoffset = offset;
offset = (off_t)((long)offset & ~POFFMASK);
flags |= MAP_FIXED;
if (poff(oldoffset) == poff(oldaddr) && len) {
/* potentially map-able */
ismappable = 1;
/*
* Non-writable mappings are assumed to be text. For these,
* we set them up as MAP_SHARED so that we don't have to
* allocate smem. If we need to write to them later on,
* like when setting a breakpoint, we convert them to
* MAP_PRIVATE. The MAP_TEXT flag tells us when to do this.
*/
if ((vp->v_type == VCHR) || (vp->v_type == VBLK))
/* always PRIVATE */
flags |= MAP_PRIVATE;
else if (vp->v_type == VREG) {
if (prot & PROT_WRITE)
flags |= MAP_PRIVATE;
else
flags |= MAP_SHARED|MAP_TEXT;
} else
return ENODEV;
/*
* check if ok with file system to be mapped
* We could get back a different vp ... (lofs)
*
* If a new vp is returned, then it has a reference
* that we must VN_RELE.
*/
VOP_MAP(vp, offset, len, prot, flags, get_current_cred(),
&vp, error);
if (error)
return error;
}
if (ismappable) {
/*
* this can occur when rld maps /dev/zero ...
*/
if ((vp->v_type == VCHR) || (vp->v_type == VBLK)) {
asadd.as_op = AS_ADD_MMAPDEV;
asadd.as_addr = addr;
asadd.as_length = len;
asadd.as_prot = prot;
asadd.as_maxprot = PROT_ALL;
asadd.as_mmap_off = offset;
asadd.as_mmap_vp = vp;
asadd.as_mmap_flags = flags;
asadd.as_mmap_ckpt = ckpt;
/* XXX really shouldn't have to do this -
* we shouldn't be called with aspacelock held
*/
VAS_UNLOCK(vasid);
error = VAS_ADDSPACE(vasid, &asadd, &asres);
ASSERT(error || asres.as_addr == addr);
VAS_LOCK(vasid, AS_EXCL);
goto out;
}
/*
* If vnode is being mapped for execution, mark it for
* replication also.
* This is done here instead of at gexec() time to allow
* mapping libraries for replication.
* If it's a one-node system, don't interpose the
* replication layer on vnode.
*/
if (((prot & (PROT_WRITE|PROT_EXEC)) == PROT_EXEC) &&
curprocp->p_shaddr == 0 && numnodes > 1) {
repl_interpose(vp, "ReplDefault");
}
asadd.as_op = AS_ADD_EXEC;
asadd.as_addr = addr;
asadd.as_length = len;
asadd.as_prot = prot;
asadd.as_maxprot = PROT_ALL;
asadd.as_exec_off = offset;
asadd.as_exec_vp = vp;
asadd.as_exec_flags = flags;
asadd.as_exec_ckpt = ckpt;
asadd.as_exec_zfodlen = zfodlen;
error = VAS_ADDSPACE(vasid, &asadd, &asres);
if (error) {
/*
* don't repl_dispose - there could be others using
* it since nothing here is locked. Shouldn't
* really do any harm...
*/
goto out;
}
ASSERT(asres.as_addr == addr);
} else {
if (vp->v_type != VREG) {
error = ENODEV;
goto out;
}
asadd.as_op = AS_ADD_LOAD;
asadd.as_addr = addr;
asadd.as_length = len;
asadd.as_prot = prot;
asadd.as_maxprot = PROT_ALL;
asadd.as_load_off = oldoffset;
asadd.as_load_vp = vp;
asadd.as_load_flags = flags;
asadd.as_load_ckpt = ckpt;
asadd.as_load_laddr = oldaddr;
asadd.as_load_llength = oldlen;
asadd.as_load_zfodlen = zfodlen;
error = VAS_ADDSPACE(vasid, &asadd, &asres);
if (error)
goto out;
ASSERT(asres.as_addr == addr);
}
if (zfodlen) {
/*
* space already set up - all we have to do is zero the
* portion from the end of the load data to the end of the
* page
*/
size_t end;
ASSERT(error == 0);
end = (size_t)addr + len;
if (poff(end)) {
/* need to unlock so can fault */
VAS_UNLOCK(vasid);
if (uzero((caddr_t)end, NBPP - poff(end)) != 0)
error = EFAULT;
VAS_LOCK(vasid, AS_EXCL);
if (error)
goto out;
}
}
out:
if (oldvp != vp)
VN_RELE(vp); /* release ref. from VOP_MAP */
return error;
}
/*
* Machine independent final setup goes here
*/
void
setexecenv(struct vnode *vp)
{
int i;
uthread_t *ut = curuthread;
struct proc *p = curprocp;
proc_proxy_t *prxy = ut->ut_pproxy;
sigvec_t *sigvp = &p->p_sigvec;
mrlock(&p->p_who, MR_UPDATE, PZERO);
ASSERT(p->p_exec == NULL);
p->p_exec = vp;
if (p->p_exec)
VN_HOLD(p->p_exec); /* in with the new */
mrunlock(&p->p_who);
prxy->prxy_oldcontext = 0;
sigemptyset(&prxy->prxy_sigonstack);
prxy->prxy_ssflags = 0;
prxy->prxy_sigsp = 0;
prxy->prxy_siglb = (caddr_t)0;
/*
* In 1003.1b, section 3.1.2.2:
*
* "Signals set to the default action (SIG_DFL) in the calling
* process shall be set to the default action in the new process
* image. Signals set to be ignored (SIG_IGN) by the calling
* process image shall be set to be ignored by the new process
* image. Signals set to be caught by the calling process image
* shall be set to the default action in the new process image."
*
* XXX Push all this into sig.c!
*/
sigvec_lock(&p->p_sigvec);
for (i = 0; i < NUMSIGS; i++) {
if (sigvp->sv_hndlr[i] != SIG_DFL &&
sigvp->sv_hndlr[i] != SIG_IGN) {
sigvp->sv_hndlr[i] = SIG_DFL;
sigemptyset(&sigvp->sv_sigmasks[i]);
if (!sigvp->sv_sigpend.s_sigqueue &&
!ut->ut_sigpend.s_sigqueue)
continue;
if (sigismember(&ignoredefault, i + 1)) {
sigdelq(&sigvp->sv_sigqueue, i + 1, sigvp);
sigdelq(&ut->ut_sigqueue, i + 1, sigvp);
} else
if (sigvp->sv_sigpend.s_sigqueue) {
sigqueue_t *sqp;
while (sqp = sigdeq(&sigvp->sv_sigpend,
i+1, sigvp))
{
ASSERT(sqp->sq_info.si_signo == i + 1);
sigaddq(&ut->ut_sigqueue, sqp, sigvp);
}
}
}
}
/*
* Don't clear sv_sigrestart. It should be inherited from the
* parent.
*/
sigorset(&sigvp->sv_sigign, &ignoredefault);
sigemptyset(&sigvp->sv_sigcatch);
sigemptyset(&sigvp->sv_signodefer);
sigemptyset(&sigvp->sv_sigresethand);
sigdiffset(&sigvp->sv_sainfo, &ignoredefault);
/*
* If the user asked for NOCLDSTOP, the should still get it. We
* turn off SNOWAIT, though, since in general we want children to
* zombify (POSIX assumes this behavior). If the process wants
* SNOWAIT on, it will need to explicitly set it via
* sigaction().
*/
sigvp->sv_flags &= ~SNOWAIT;
/*
* Pending signals remain pending and held signals remain held, so
* don't clear p_phold or p_sig. We should clear out any 'default
* == ignore' signals from p_sig, though.
*/
sigorset(&ut->ut_sig, &sigvp->sv_sigpend.s_sig);
sigemptyset(&sigvp->sv_sigpend.s_sig);
sigdiffset(&ut->ut_sig, &ignoredefault);
sigvec_unlock(&p->p_sigvec);
}
/*
* remove old process address space.
* At this point, the exec no longer 'fails' the new process
* is simply killed
*/
/*ARGSUSED4*/
int
remove_proc(
struct proc *p,
struct uarg *args,
struct vnode *vp,
int rmp)
{
int s;
int c, level;
int error;
vasid_t vasid;
as_deletespace_t asd;
uthread_t *ut = curuthread;
vproc_t *vpr;
int oldf;
ASSERT(p == curprocp);
/*
* If the process is a graphics process, call the
* graphics exit routine since we are giving up graphics.
*/
if (UT_TO_KT(ut)->k_runcond & RQF_GFX)
gfx_exit();
/*
* Kill off other uthreads if this is pthreaded app.
* This discards pshare structure, too.
*/
if (ut->ut_flags & UT_PTHREAD) {
vpr = UT_TO_VPROC(ut);
VPROC_HOLD(vpr);
VPROC_THREAD_STATE(vpr, THRD_EXEC, s);
VPROC_RELE(vpr);
if (s)
return EBUSY;
}
/* if unblock on exec/exit flag is set, do that now */
if (p->p_unblkonexecpid) {
vpr = VPROC_LOOKUP(p->p_unblkonexecpid);
if (vpr != NULL) {
VPROC_UNBLKPID(vpr);
VPROC_RELE(vpr);
}
p->p_unblkonexecpid = 0;
}
/*
* certain other things like profiling and single stepping are not
* held across an exec
*/
#if R10000
/*
* If we have a hardware event counter CPU monitoring structure
* attached to the uthread and it is set up for PC profiling, disable
* it because we're going to be tearing down the p_profp array below
* which PC profiling depends on. We don't bother freeing it up here
* since the eventual exit path for the process/uthread will handle
* that and there are some complications with the tear down because
* the last uthread in a process is torn down after the proc/proxy
* are.
*/
if (ut->ut_cpumon && (ut->ut_cpumon->cm_flags & HWPERF_CM_PROFILING))
hwperf_disable_counters(ut->ut_cpumon);
#endif
s = p_lock(p);
/*
* POSIX says to inherit all non-specified attributes.
* XXX FIXADE is of marginal use to inherit..
*/
if (p->p_flag & SPROFFAST)
stopprfclk();
if (p->p_flag & SABORTSIG)
p->p_exitsig = 0;
p->p_flag &= ~(SPROF|SPROF32|SPROFFAST|SABORTSIG);
p->p_flag |= SEXECED; /* for setpgid() */
p_unlock(p, s);
prxy_flagclr(&p->p_proxy, PRXY_USERVME);
s = ut_lock(ut);
oldf = ut->ut_flags;
ut->ut_flags &= ~(UT_STEP|UT_SRIGHT|UT_PTHREAD|UT_PTPSCOPE|UT_OWEUPC);
ut_unlock(ut, s);
if (oldf & UT_SRIGHT)
prsright_release(&p->p_proxy);
if (p->p_profp) {
(void) kern_free(p->p_profp);
p->p_profp = NULL;
p->p_profn = 0;
} else {
ASSERT(p->p_profn == 0);
}
/*
* If ut_sighold points to prda, change back to kernel space.
*/
if (ut->ut_sighold != &p->p_sigvec.sv_sighold) {
s = ut_lock(ut);
p->p_sigvec.sv_sighold = *ut->ut_sighold;
ut->ut_sighold = &p->p_sigvec.sv_sighold;
ut_unlock(ut, s);
#if (_MIPS_SIM != _ABIO32)
ut->ut_prda->t_sys.t_flags &= ~T_HOLD_VALID;
#else
ut->ut_prda->t_sys.t_flags &= ~(T_HOLD_VALID|T_HOLD_KSIG_O32);
#endif
}
/*
* if part of a share group - get rid of that
*/
if (IS_SPROC(&p->p_proxy)) {
/* Notify scheduler that we are leaving share group */
leaveshaddrRunq(ut);
asd.as_exec_detachstk = detachshaddr(p, SHDEXEC);
} else
asd.as_exec_detachstk = 0;
/*
* Now that we've detached from the share group, we can check
* to see if this is an intp suid exec. If so, we can do the
* open and adjust the arg list appropriately.
*/
if (args->ua_intpvp) {
/*
* Note that in fuexarg(), the args->ua_intpstkloc has been
* set up to point at a location that contains args->ua_fname.
* In intp, we set up args->ua_name to be "/dev/fd/XXXXXXX".
*
* Things would be a lot easier if we set up the
* stack after sloughing the share group.
*/
int fd;
char fdnum[32];
int len;
if (error = execopen(&args->ua_intpvp, &fd)) {
/*
* Caller will kill the process.
*/
return error;
}
#ifdef CKPT
if (args->ua_ckpt >= 0)
ckpt_setfd(fd, args->ua_ckpt);
#endif
/*
* Substitute the name of the /dev/fd node into
* the args structure.
*/
ASSERT(args->ua_intpstkloc);
/*
* Get the fd value.
*/
numtos(fd, fdnum);
/*
* Wipe out the "XXXXXXX" in the stack location.
*/
bzero(&args->ua_intpstkloc[DEV_FD_PREFIX_LEN],
DEV_FD_SUFFIX_LEN);
/*
* Now insert the new descriptor value into the stack
* location.
*/
strcpy(&args->ua_intpstkloc[DEV_FD_PREFIX_LEN], fdnum);
/*
* Fix up the p_psargs location. We know that this must
* be the last argument in the list according to how
* intp sets up arguments.
*/
if (args->ua_intppsloc) {
/*
* We need to make sure we don't stomp off the end
* of the psargs array when copying the fd number.
*/
len = ((PSARGSZ - 1) -
(&args->ua_intppsloc[DEV_FD_PREFIX_LEN] -
&p->p_psargs[0]));
if (len > 0) {
len = MIN(strlen(fdnum), len);
strncpy(&args->ua_intppsloc[DEV_FD_PREFIX_LEN],
fdnum, len);
args->ua_intppsloc[DEV_FD_PREFIX_LEN+len] = '\0';
}
}
}
/*
* Perform exec processing, including close-on-exec.
*
* Note that for sprocs the fdt_exec logic assumes that
* detachshaddr (and hence fdt_detach_shaddr) has already
* been called. This ensures that close-on-exec
* processing occurs for a sproc after it has gotten
* its own copy of the fdt.
*/
fdt_exec();
/* remove old exec image */
if (p->p_exec) {
mrlock(&p->p_who, MR_UPDATE, PZERO);
VN_RELE(p->p_exec); /* out with the old */
p->p_exec = NULL;
mrunlock(&p->p_who);
}
/*
* release old scripts before rexec - ok to do here because
* any failures from this point on cause the process
* to be killed
*/
for (level = 0; level < 2; level++) {
if (args->ua_prev_script[level]) {
s = VN_LOCK(args->ua_prev_script[level]);
c = --args->ua_prev_script[level]->v_intpcount;
ASSERT(c >= 0);
VN_UNLOCK(args->ua_prev_script[level], s);
if (!c)
IMON_EVENT(args->ua_prev_script[level],
ut->ut_cred, IMON_EXIT);
VN_RELE(args->ua_prev_script[level]);
args->ua_prev_script[level] = NULL;
}
}
/* Remove old address space */
asd.as_op = AS_DEL_EXEC;
asd.as_exec_rmp = rmp;
asd.as_exec_prda = ut->ut_prda;
ut->ut_prda = 0;
as_lookup_current(&vasid);
VAS_DELETESPACE(vasid, &asd, NULL);
ASSERT(ut->ut_sharena == NULL);
/* The new tlbpid is needed after we're done with the
* old process and before we need anything for the new one.
* This effectively flushes the tlb of any pages that
* were released by execbld.
* Getxfile may read in the text of the new process and
* will need to use the new tlbpid.
*/
new_tlbpid(&ut->ut_as, VM_TLBINVAL);
/*
* Initialized the wired tlb entries for the new process.
*/
setup_wired_tlb(1);
return 0;
}
int
execopen(struct vnode **vpp, int *fdp)
{
struct vnode *vp = *vpp;
struct vnode *openvp = vp;
vfile_t *fp;
int error = 0;
int filemode = FREAD;
VN_HOLD(vp); /* open reference */
if (error = vfile_alloc(filemode, &fp, fdp)) {
VN_RELE(vp);
*fdp = -1; /* just in case vfile_alloc changed value */
return error;
}
VOP_OPEN(openvp, &vp, filemode, sys_cred, error);
if (error) {
VN_RELE(vp);
vfile_alloc_undo(*fdp, fp);
*fdp = -1;
return error;
}
vfile_ready(fp, vp);
*vpp = vp; /* vnode should not have changed */
return 0;
}
int
execclose(int fd)
{
int error;
auto vfile_t *fp;
if (error = closefd(fd, &fp))
return error;
return vfile_close(fp);
}
/*
* Check if the file has DMAPI managed regions/events. If so,
* generate a DMAPI read event for the entire file.
*
* Only a "read" event will be generated since check_dmapi_file
* is intended to be called only for gexec() and elfmap() files.
* In these cases, if the process later uses the mprotect() syscall
* to upgrade the page protection to include PROT_WRITE, the mapping
* type is changed to private.
*
* Note that the VOP_FCNTL used here returns an error if the
* underlying file system is unaware of the F_DMAPI subfunction
* being used. This causes no problems, since a non-zero return
* status is simply ignored. Only in the case of a zero return status
* can we be sure that the VOP_FCNTL F_DMAPI subfunction
* DM_FCNTL_MAPEVENT is implemented for this file system, and then
* interpret the maprq.error field.
*/
/* ARGSUSED */
int
check_dmapi_file(vnode_t *vp)
{
#ifdef CELL_IRIX
/*
* This code doesn't work with cells for the following reasons:
* 1) idl cannot tolerate null rval pointer
* 2) stack variable dmfcntl is pass as an in/out param; server cell
* accesses it directly (read and write); this will fail on
* sn0 due to the firewall protections
*/
return(0);
#else
int error;
dm_fcntl_t dmfcntl;
dmfcntl.dmfc_subfunc = DM_FCNTL_MAPEVENT;
dmfcntl.u_fcntl.maprq.length = 0; /* length = 0 for whole file */
dmfcntl.u_fcntl.maprq.max_event = DM_EVENT_READ;
VOP_FCNTL(vp, F_DMAPI, &dmfcntl, 0, (off_t)0, sys_cred, NULL, error);
if (error == 0) {
if ((error = dmfcntl.u_fcntl.maprq.error) != 0)
return error;
}
return 0;
#endif
}