/************************************************************************* * * * Copyright (C) 1986-1996 Silicon Graphics, Inc. * * * * These coded instructions, statements, and computer programs contain * * unpublished proprietary information of Silicon Graphics, Inc., and * * are protected by Federal copyright law. They may not be disclosed * * to third parties or copied or duplicated in any form, in whole or * * in part, without the prior written consent of Silicon Graphics, Inc. * * * **************************************************************************/ /* Copyright (c) 1990, 1991 UNIX System Laboratories, Inc. */ /* Copyright (c) 1984, 1986, 1987, 1988, 1989, 1990 AT&T */ /* All Rights Reserved */ /* THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF */ /* UNIX System Laboratories, Inc. */ /* The copyright notice above does not evidence any */ /* actual or intended publication of such source code. */ #ident "$Revision: 3.355 $" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef _SYSTEM_SIMULATION #include #endif #include "os/proc/pproc_private.h" /* XXX bogus */ #if CELL_IRIX #include #include #endif #include #include #ifdef R10000 #include #include #endif /* these are dynamically changable */ extern int ncargs; extern int reset_limits_on_exec; extern sv_t nexit; extern int gfx_exit(void); /* * Length of /dev/fd prefix and suffix */ #define DEV_FD_PREFIX_LEN 8 #define DEV_FD_SUFFIX_LEN 8 /* * The exec switch table. Called in order to try and exec a * particular a.out type. */ int (*execsw[])(vnode_t *, vattr_t *, struct uarg *, int) = { elfexec, intpexec, }; int nexectype = sizeof(execsw) / sizeof(int (*)()); struct rexeca { sysarg_t cell; char *fname; char **argp; char **envp; }; struct execa { char *fname; char **argp; char **envp; }; static int iexec(char *fname, char **argp, char **envp, cell_t cell); #if CELL_IRIX /* * TESTING - Set exec_rotor_low/exec_rotor_high to the min & max cells * you want to run on. The default is all cells. Set do_rexec to * turn it on using a combination of the REXEC bit flags. */ int exec_rotor = 0; int exec_rotor_low=0, exec_rotor_high=MAX_CELLS - 1; #define REXEC_ON 0x1 #define REXEC_SKIP_GOLDEN 0x2 #define REXEC_RANDOM 0x4 int do_rexec = 0; static cell_t pick_cell(void) { /* pick a cell to run on */ if (!do_rexec) { return(cellid()); } while (do_rexec & REXEC_ON) { if (do_rexec & REXEC_RANDOM) { exec_rotor = get_timestamp() % ((cell_membership >> 1) + 1); } else { /* round-robin */ exec_rotor++; if (exec_rotor > exec_rotor_high) { exec_rotor = exec_rotor_low; } if (exec_rotor == cellid()) continue; } if (cell_in_membership(exec_rotor)) { if ((do_rexec & REXEC_SKIP_GOLDEN) && exec_rotor == golden_cell) { continue; } break; } } return(exec_rotor); } #else #define pick_cell() cellid() #endif /* ARGSUSED */ int exec(struct execa *uap, rval_t *rvp) { cell_t cell; cell = pick_cell(); return (iexec(uap->fname, uap->argp, NULL, cell)); } /* ARGSUSED */ int exece(struct execa *uap, rval_t *rvp) { cell_t cell; cell = pick_cell(); return (iexec(uap->fname, uap->argp, uap->envp, cell)); } /* * rexec - new process executes on specified cell */ /* ARGSUSED */ int rexec(struct rexeca *uap, rval_t *rvp) { cell_t cell; cell = uap->cell; if (cell < 0) return EINVAL; #if CELL_IRIX if (!cell_in_membership(cell)) return ECELLDOWN; #else if (cell != 0) return EINVAL; #endif return (iexec(uap->fname, uap->argp, uap->envp, cell)); } #ifdef DEBUG int exectrace = 0; #endif static int iexec(char *fname, char **argp, char **envp, cell_t cell) { int error = 0; vnode_t *vp; struct pathname pn; int snapncargs = ncargs; struct uarg args; ckpt_handle_t *ckptp = NULL; #ifdef CKPT ckpt_handle_t ckpt = NULL; if (ckpt_enabled) ckptp = &ckpt; #endif SYSINFO.sysexec++; /* * Lookup path name and remember last component for later. */ if (error = pn_get(fname, UIO_USERSPACE, &pn)) return error; _SAT_PN_SAVE(&pn, curuthread); #ifdef DEBUG { proc_t *p = curprocp; if (exectrace == -1 || exectrace == p->p_pid) cmn_err(CE_CONT, "exec of (pid=%d) %s by %s\n", p->p_pid, pn.pn_buf, get_current_name()); } #endif bzero (&args, sizeof(args)); _SAT_PN_BOOK(SAT_EXEC, curuthread); if (error = lookuppn(&pn, FOLLOW, NULLVPP, &vp, ckptp)) { pn_free(&pn); return error; } /* * Save audit information about the old state. * When the record's generated for real it'll be * after the exec has changed attributes. */ _SAT_SAVE_ATTR(SAT_CAP_SET_TOKEN, curuthread); _SAT_SAVE_ATTR(SAT_UGID_TOKEN, curuthread); strncpy(args.ua_exec_file, pn.pn_path, PSCOMSIZ); args.ua_exec_file[PSCOMSIZ - 1] = '\0'; args.ua_argp = argp; args.ua_envp = envp; args.ua_fname = fname; args.ua_fnameseg = UIO_USERSPACE; args.ua_ncargs = snapncargs; args.ua_cell = cell; args.ua_exec_vp = vp; #ifdef CKPT args.ua_ckpt = (ckptp && *ckptp)? ckpt_lookup_add(vp, *ckptp) : -1; #endif #ifdef _SYSTEM_SIMULATION { proc_t *p = curprocp; if (is_enabled(arg_sableexectrace)) printf("Exec of %s (PID %d) by %s\n", pn.pn_path, p->p_pid, p->p_comm); } #endif pn_free(&pn); /* We have state that needs to be cleaned up on error */ args.ua_exec_cleanup = 1; error = gexec(&vp, &args, 0); if (error == 0) { /* * Tell rtmond our new name. This actually generates more * events than we typically want since this will cause * events to be generated for every process in the system * when we often are just tracing a small handful of * processes. We may need to think about ways of trying to * avoid unneeded events ... */ if (IS_TSTAMP_EVENT_ACTIVE(RTMON_PIDAWARE)) { #pragma mips_frequency_hint NEVER fawlty_exec(args.ua_exec_file); } } else if (args.ua_exec_cleanup) { /* handle error */ #pragma mips_frequency_hint NEVER if (error == EAGAIN) nomemmsg("exec"); VN_RELE(args.ua_exec_vp); } _SAT_EXEC(error); return(error); } /* * Get executable permissions (and capabilities if requested) of a vnode. * Check for various access/execute/etc. issues and return an appropriate * error if things are amiss. */ int execpermissions(struct vnode *vp, struct vattr *vattrp, struct uarg *args) { int error; uthread_t *ut = curuthread; proc_t *p = UT_TO_PROC(ut); vattrp->va_mask = AT_MODE|AT_UID|AT_GID|AT_SIZE; VOP_GETATTR(vp, vattrp, ATTR_EXEC, ut->ut_cred, error); if (error) return error; /* * Check the access mode. */ VOP_ACCESS(vp, VEXEC, ut->ut_cred, error); if (error != 0 || vp->v_type != VREG || (vattrp->va_mode & (VEXEC|(VEXEC>>3)|(VEXEC>>6))) == 0) { if (error == 0) error = EACCES; return error; } if (p->p_trace || PTRACED(p)) { #pragma mips_frequency_hint NEVER /* * If we have read access then it's okay to let the exec() * happen. */ VOP_ACCESS(vp, VREAD, ut->ut_cred, error); if (!error) return 0; /* * If process is traced via ptrace(2), fail the exec(2). */ if (p->p_flag & STRC) return ENOEXEC; /* * Process is traced via /proc. * Arrange to invalidate the /proc vnode. */ args->ua_traceinval = 1; } return 0; } /* * Returns with args->ua_setid[level] set to: * 0 - not a setuid/setgid/setcap program or setuid/setgid/setcap disallowed * !0 - a permitted setuid/setgid/setcap program * * Also reads any capabilities attached to the executable into args->ua_cap. * We need them here to determine if the image has attached capabilities and * later on we'll need them to recalculate our capabilities. If an image does * not have attached capabilities, we mark args->ua_cap as invalid for that * capability recalculation. * * If the image is a permitted setuid/setgid/setcap program, then we also set * args->ua_uid and args->ua_gid. * * If there are no errors, 0 is returned; otherwise an error code is returned. */ static int execsetid(vnode_t *vp, vattr_t *vattrp, struct uarg *args) { uthread_t *ut = curuthread; proc_t *p = UT_TO_PROC(ut); cap_set_t *acap = &args->ua_cap; int setid, capsize, error; uid_t uid; gid_t gid; /* * Grab any capabilities attached to the executable. */ capsize = sizeof(cap_set_t); VOP_ATTR_GET(vp, SGI_CAP_FILE, (char *)acap, &capsize, ATTR_ROOT, sys_cred, error); if (error || (acap->cap_effective & CAP_INVALID) || (acap->cap_permitted & CAP_INVALID) || (acap->cap_inheritable & CAP_INVALID)) { #pragma mips_frequency_hint FREQUENT /* * A non-zero error indicates that there is either no * capability set on the file or that if there was one, we * couldn't get it. Mark as an invalid capability set to * keep track of the fact the file had no capabilities * attached to it. Treat an invalid capability set as a * missing one. */ acap->cap_effective = CAP_INVALID; acap->cap_permitted = CAP_INVALID; acap->cap_inheritable = CAP_INVALID; } /* * If neither SUID, SGID or SCAP simply return successfully. Also * silently ignore SUID/SGID/SCAP for file systems mounted with * "nosuid" option. */ if (((vattrp->va_mode & (VSUID|VSGID)) == 0 && (acap->cap_effective & CAP_INVALID)) || (vp->v_vfsp->vfs_flag & VFS_NOSUID)) { #pragma mips_frequency_hint FREQUENT args->ua_setid = 0; return 0; } /* * Compute proposed execution credentials. */ setid = 0; uid = ut->ut_cred->cr_uid; gid = ut->ut_cred->cr_gid; if (vattrp->va_mode & VSUID && vattrp->va_uid != uid) { uid = vattrp->va_uid; setid = 1; } if (vattrp->va_mode & VSGID && vattrp->va_gid != gid) { gid = vattrp->va_gid; setid = 1; } if (!(acap->cap_effective & CAP_INVALID)) { cap_set_t *ucapp = &ut->ut_cred->cr_cap; if (acap->cap_effective != ucapp->cap_effective || acap->cap_permitted != ucapp->cap_permitted || acap->cap_inheritable != ucapp->cap_inheritable) { setid = 1; } } /* * Set setuid/setgid/setcap protections, if not tracing. If the * process is being debuged (STRC) we never allow SUID/SGID/SCAP. * If the process has system call and/or context switch tracing * enabled, we allow the tracing to remain active if the process is * privileged (CAP_PROC_MGT) or if it's being traced by a privileged * tracer (SPARPRIV). */ if (setid) { if ((p->p_flag & (STRC|SPARSYS|SPARSWTCH)) && !(p->p_flag & SPARPRIV) && !_CAP_ABLE(CAP_PROC_MGT)) { #pragma mips_frequency_hint NEVER int s; if (p->p_flag & STRC) return EPERM; s = p_lock(p); p->p_flag &= ~(SPARSYS|SPARSWTCH|SPARINH); p->p_parcookie = 0; p_unlock(p, s); } args->ua_uid = uid; args->ua_gid = gid; args->ua_setid = 1; } return 0; } int gexec(struct vnode **vpp, struct uarg *args, int level) { proc_t *pp = curprocp; uthread_t *ut = curuthread; int i, error; struct vnode *vp; struct vattr vattr; vp = *vpp; if ((error = execpermissions(vp, &vattr, args)) != 0) return error; VOP_OPEN(vp, vpp, FREAD, ut->ut_cred, error); if (error) return error; vp = *vpp; args->ua_prev_script[level] = pp->p_script; args->ua_level = level; pp->p_script = NULL; repl_interpose(vp, "ReplDefault"); if (vattr.va_size < MAGIC_SIZE) { #pragma mips_frequency_hint NEVER error = ENOEXEC; goto closevp; } if (level == 0) { #pragma mips_frequency_hint FREQUENT /* * We only check for suid/sgid/scap on the thing being * exec()'d. In particular this means that for script * execution, we only check the script and not its * interpreter. */ error = execsetid(vp, &vattr, args); if (error) goto closevp; } if (!(ut->ut_pproxy->prxy_fp.pfp_fpflags & P_FP_PRESERVE)) { #if TFP ut->ut_pproxy->prxy_fp.pfp_fpflags = P_FP_IMPRECISE_EXCP; #else ut->ut_pproxy->prxy_fp.pfp_fpflags = 0; #endif } error = check_dmapi_file(vp); if (error) goto closevp; /* * Loop through the switch table looking for the module that * can handle this executable. */ for (i = 0; i < nexectype; i++) { error = (*execsw[i]) (vp, &vattr, args, level); if (error != ENOEXEC) break; } if (!error) { #pragma mips_frequency_hint FREQUENT pp->p_exec_cnt++; return 0; } if (!(args->ua_exec_cleanup)) { /* don't handle error */ return error; } closevp: ASSERT(error); VOP_CLOSE(vp, FREAD, L_TRUE, ut->ut_cred, i); /* * Decrement use of new script, restore old one. */ if (pp->p_script) { int s = VN_LOCK(pp->p_script); int c = --pp->p_script->v_intpcount; ASSERT(c >= 0); VN_UNLOCK(pp->p_script, s); if (!c) IMON_EVENT(pp->p_script, ut->ut_cred, IMON_EXIT); VN_RELE(pp->p_script); } pp->p_script = args->ua_prev_script[level]; return error; } int exrdhead(struct vnode *vp, off_t off, size_t len, caddr_t *addrp) { ssize_t resid; *addrp = kern_malloc(len); if (vn_rdwr(UIO_READ, vp, *addrp, len, off, UIO_SYSSPACE, 0, 0L, get_current_cred(), &resid, &curuthread->ut_flid) != 0 || resid) { return ENOEXEC; } return 0; } void exhead_free(caddr_t addr) { kern_free(addr); } int execmap(vnode_t *vp, caddr_t addr, size_t len, size_t zfodlen, off_t offset, int prot, int flags, vasid_t vasid, int ckpt) { caddr_t oldaddr; size_t oldlen; off_t oldoffset; vnode_t *oldvp; as_addspace_t asadd; /* REFERENCED */ as_addspaceres_t asres; int error = 0; int ismappable = 0; oldvp = vp; /* for VN_RELE check at the end */ oldaddr = addr; addr = (caddr_t)((long)addr & ~POFFMASK); oldlen = len; len += ((size_t)oldaddr - (size_t)addr); oldoffset = offset; offset = (off_t)((long)offset & ~POFFMASK); flags |= MAP_FIXED; if (poff(oldoffset) == poff(oldaddr) && len) { /* potentially map-able */ ismappable = 1; /* * Non-writable mappings are assumed to be text. For these, * we set them up as MAP_SHARED so that we don't have to * allocate smem. If we need to write to them later on, * like when setting a breakpoint, we convert them to * MAP_PRIVATE. The MAP_TEXT flag tells us when to do this. */ if ((vp->v_type == VCHR) || (vp->v_type == VBLK)) /* always PRIVATE */ flags |= MAP_PRIVATE; else if (vp->v_type == VREG) { if (prot & PROT_WRITE) flags |= MAP_PRIVATE; else flags |= MAP_SHARED|MAP_TEXT; } else return ENODEV; /* * check if ok with file system to be mapped * We could get back a different vp ... (lofs) * * If a new vp is returned, then it has a reference * that we must VN_RELE. */ VOP_MAP(vp, offset, len, prot, flags, get_current_cred(), &vp, error); if (error) return error; } if (ismappable) { /* * this can occur when rld maps /dev/zero ... */ if ((vp->v_type == VCHR) || (vp->v_type == VBLK)) { asadd.as_op = AS_ADD_MMAPDEV; asadd.as_addr = addr; asadd.as_length = len; asadd.as_prot = prot; asadd.as_maxprot = PROT_ALL; asadd.as_mmap_off = offset; asadd.as_mmap_vp = vp; asadd.as_mmap_flags = flags; asadd.as_mmap_ckpt = ckpt; /* XXX really shouldn't have to do this - * we shouldn't be called with aspacelock held */ VAS_UNLOCK(vasid); error = VAS_ADDSPACE(vasid, &asadd, &asres); ASSERT(error || asres.as_addr == addr); VAS_LOCK(vasid, AS_EXCL); goto out; } /* * If vnode is being mapped for execution, mark it for * replication also. * This is done here instead of at gexec() time to allow * mapping libraries for replication. * If it's a one-node system, don't interpose the * replication layer on vnode. */ if (((prot & (PROT_WRITE|PROT_EXEC)) == PROT_EXEC) && curprocp->p_shaddr == 0 && numnodes > 1) { repl_interpose(vp, "ReplDefault"); } asadd.as_op = AS_ADD_EXEC; asadd.as_addr = addr; asadd.as_length = len; asadd.as_prot = prot; asadd.as_maxprot = PROT_ALL; asadd.as_exec_off = offset; asadd.as_exec_vp = vp; asadd.as_exec_flags = flags; asadd.as_exec_ckpt = ckpt; asadd.as_exec_zfodlen = zfodlen; error = VAS_ADDSPACE(vasid, &asadd, &asres); if (error) { /* * don't repl_dispose - there could be others using * it since nothing here is locked. Shouldn't * really do any harm... */ goto out; } ASSERT(asres.as_addr == addr); } else { if (vp->v_type != VREG) { error = ENODEV; goto out; } asadd.as_op = AS_ADD_LOAD; asadd.as_addr = addr; asadd.as_length = len; asadd.as_prot = prot; asadd.as_maxprot = PROT_ALL; asadd.as_load_off = oldoffset; asadd.as_load_vp = vp; asadd.as_load_flags = flags; asadd.as_load_ckpt = ckpt; asadd.as_load_laddr = oldaddr; asadd.as_load_llength = oldlen; asadd.as_load_zfodlen = zfodlen; error = VAS_ADDSPACE(vasid, &asadd, &asres); if (error) goto out; ASSERT(asres.as_addr == addr); } if (zfodlen) { /* * space already set up - all we have to do is zero the * portion from the end of the load data to the end of the * page */ size_t end; ASSERT(error == 0); end = (size_t)addr + len; if (poff(end)) { /* need to unlock so can fault */ VAS_UNLOCK(vasid); if (uzero((caddr_t)end, NBPP - poff(end)) != 0) error = EFAULT; VAS_LOCK(vasid, AS_EXCL); if (error) goto out; } } out: if (oldvp != vp) VN_RELE(vp); /* release ref. from VOP_MAP */ return error; } /* * Machine independent final setup goes here */ void setexecenv(struct vnode *vp) { int i; uthread_t *ut = curuthread; struct proc *p = curprocp; proc_proxy_t *prxy = ut->ut_pproxy; sigvec_t *sigvp = &p->p_sigvec; mrlock(&p->p_who, MR_UPDATE, PZERO); ASSERT(p->p_exec == NULL); p->p_exec = vp; if (p->p_exec) VN_HOLD(p->p_exec); /* in with the new */ mrunlock(&p->p_who); prxy->prxy_oldcontext = 0; sigemptyset(&prxy->prxy_sigonstack); prxy->prxy_ssflags = 0; prxy->prxy_sigsp = 0; prxy->prxy_siglb = (caddr_t)0; /* * In 1003.1b, section 3.1.2.2: * * "Signals set to the default action (SIG_DFL) in the calling * process shall be set to the default action in the new process * image. Signals set to be ignored (SIG_IGN) by the calling * process image shall be set to be ignored by the new process * image. Signals set to be caught by the calling process image * shall be set to the default action in the new process image." * * XXX Push all this into sig.c! */ sigvec_lock(&p->p_sigvec); for (i = 0; i < NUMSIGS; i++) { if (sigvp->sv_hndlr[i] != SIG_DFL && sigvp->sv_hndlr[i] != SIG_IGN) { sigvp->sv_hndlr[i] = SIG_DFL; sigemptyset(&sigvp->sv_sigmasks[i]); if (!sigvp->sv_sigpend.s_sigqueue && !ut->ut_sigpend.s_sigqueue) continue; if (sigismember(&ignoredefault, i + 1)) { sigdelq(&sigvp->sv_sigqueue, i + 1, sigvp); sigdelq(&ut->ut_sigqueue, i + 1, sigvp); } else if (sigvp->sv_sigpend.s_sigqueue) { sigqueue_t *sqp; while (sqp = sigdeq(&sigvp->sv_sigpend, i+1, sigvp)) { ASSERT(sqp->sq_info.si_signo == i + 1); sigaddq(&ut->ut_sigqueue, sqp, sigvp); } } } } /* * Don't clear sv_sigrestart. It should be inherited from the * parent. */ sigorset(&sigvp->sv_sigign, &ignoredefault); sigemptyset(&sigvp->sv_sigcatch); sigemptyset(&sigvp->sv_signodefer); sigemptyset(&sigvp->sv_sigresethand); sigdiffset(&sigvp->sv_sainfo, &ignoredefault); /* * If the user asked for NOCLDSTOP, the should still get it. We * turn off SNOWAIT, though, since in general we want children to * zombify (POSIX assumes this behavior). If the process wants * SNOWAIT on, it will need to explicitly set it via * sigaction(). */ sigvp->sv_flags &= ~SNOWAIT; /* * Pending signals remain pending and held signals remain held, so * don't clear p_phold or p_sig. We should clear out any 'default * == ignore' signals from p_sig, though. */ sigorset(&ut->ut_sig, &sigvp->sv_sigpend.s_sig); sigemptyset(&sigvp->sv_sigpend.s_sig); sigdiffset(&ut->ut_sig, &ignoredefault); sigvec_unlock(&p->p_sigvec); } /* * remove old process address space. * At this point, the exec no longer 'fails' the new process * is simply killed */ /*ARGSUSED4*/ int remove_proc( struct proc *p, struct uarg *args, struct vnode *vp, int rmp) { int s; int c, level; int error; vasid_t vasid; as_deletespace_t asd; uthread_t *ut = curuthread; vproc_t *vpr; int oldf; ASSERT(p == curprocp); /* * If the process is a graphics process, call the * graphics exit routine since we are giving up graphics. */ if (UT_TO_KT(ut)->k_runcond & RQF_GFX) gfx_exit(); /* * Kill off other uthreads if this is pthreaded app. * This discards pshare structure, too. */ if (ut->ut_flags & UT_PTHREAD) { vpr = UT_TO_VPROC(ut); VPROC_HOLD(vpr); VPROC_THREAD_STATE(vpr, THRD_EXEC, s); VPROC_RELE(vpr); if (s) return EBUSY; } /* if unblock on exec/exit flag is set, do that now */ if (p->p_unblkonexecpid) { vpr = VPROC_LOOKUP(p->p_unblkonexecpid); if (vpr != NULL) { VPROC_UNBLKPID(vpr); VPROC_RELE(vpr); } p->p_unblkonexecpid = 0; } /* * certain other things like profiling and single stepping are not * held across an exec */ #if R10000 /* * If we have a hardware event counter CPU monitoring structure * attached to the uthread and it is set up for PC profiling, disable * it because we're going to be tearing down the p_profp array below * which PC profiling depends on. We don't bother freeing it up here * since the eventual exit path for the process/uthread will handle * that and there are some complications with the tear down because * the last uthread in a process is torn down after the proc/proxy * are. */ if (ut->ut_cpumon && (ut->ut_cpumon->cm_flags & HWPERF_CM_PROFILING)) hwperf_disable_counters(ut->ut_cpumon); #endif s = p_lock(p); /* * POSIX says to inherit all non-specified attributes. * XXX FIXADE is of marginal use to inherit.. */ if (p->p_flag & SPROFFAST) stopprfclk(); if (p->p_flag & SABORTSIG) p->p_exitsig = 0; p->p_flag &= ~(SPROF|SPROF32|SPROFFAST|SABORTSIG); p->p_flag |= SEXECED; /* for setpgid() */ p_unlock(p, s); prxy_flagclr(&p->p_proxy, PRXY_USERVME); s = ut_lock(ut); oldf = ut->ut_flags; ut->ut_flags &= ~(UT_STEP|UT_SRIGHT|UT_PTHREAD|UT_PTPSCOPE|UT_OWEUPC); ut_unlock(ut, s); if (oldf & UT_SRIGHT) prsright_release(&p->p_proxy); if (p->p_profp) { (void) kern_free(p->p_profp); p->p_profp = NULL; p->p_profn = 0; } else { ASSERT(p->p_profn == 0); } /* * If ut_sighold points to prda, change back to kernel space. */ if (ut->ut_sighold != &p->p_sigvec.sv_sighold) { s = ut_lock(ut); p->p_sigvec.sv_sighold = *ut->ut_sighold; ut->ut_sighold = &p->p_sigvec.sv_sighold; ut_unlock(ut, s); #if (_MIPS_SIM != _ABIO32) ut->ut_prda->t_sys.t_flags &= ~T_HOLD_VALID; #else ut->ut_prda->t_sys.t_flags &= ~(T_HOLD_VALID|T_HOLD_KSIG_O32); #endif } /* * if part of a share group - get rid of that */ if (IS_SPROC(&p->p_proxy)) { /* Notify scheduler that we are leaving share group */ leaveshaddrRunq(ut); asd.as_exec_detachstk = detachshaddr(p, SHDEXEC); } else asd.as_exec_detachstk = 0; /* * Now that we've detached from the share group, we can check * to see if this is an intp suid exec. If so, we can do the * open and adjust the arg list appropriately. */ if (args->ua_intpvp) { /* * Note that in fuexarg(), the args->ua_intpstkloc has been * set up to point at a location that contains args->ua_fname. * In intp, we set up args->ua_name to be "/dev/fd/XXXXXXX". * * Things would be a lot easier if we set up the * stack after sloughing the share group. */ int fd; char fdnum[32]; int len; if (error = execopen(&args->ua_intpvp, &fd)) { /* * Caller will kill the process. */ return error; } #ifdef CKPT if (args->ua_ckpt >= 0) ckpt_setfd(fd, args->ua_ckpt); #endif /* * Substitute the name of the /dev/fd node into * the args structure. */ ASSERT(args->ua_intpstkloc); /* * Get the fd value. */ numtos(fd, fdnum); /* * Wipe out the "XXXXXXX" in the stack location. */ bzero(&args->ua_intpstkloc[DEV_FD_PREFIX_LEN], DEV_FD_SUFFIX_LEN); /* * Now insert the new descriptor value into the stack * location. */ strcpy(&args->ua_intpstkloc[DEV_FD_PREFIX_LEN], fdnum); /* * Fix up the p_psargs location. We know that this must * be the last argument in the list according to how * intp sets up arguments. */ if (args->ua_intppsloc) { /* * We need to make sure we don't stomp off the end * of the psargs array when copying the fd number. */ len = ((PSARGSZ - 1) - (&args->ua_intppsloc[DEV_FD_PREFIX_LEN] - &p->p_psargs[0])); if (len > 0) { len = MIN(strlen(fdnum), len); strncpy(&args->ua_intppsloc[DEV_FD_PREFIX_LEN], fdnum, len); args->ua_intppsloc[DEV_FD_PREFIX_LEN+len] = '\0'; } } } /* * Perform exec processing, including close-on-exec. * * Note that for sprocs the fdt_exec logic assumes that * detachshaddr (and hence fdt_detach_shaddr) has already * been called. This ensures that close-on-exec * processing occurs for a sproc after it has gotten * its own copy of the fdt. */ fdt_exec(); /* remove old exec image */ if (p->p_exec) { mrlock(&p->p_who, MR_UPDATE, PZERO); VN_RELE(p->p_exec); /* out with the old */ p->p_exec = NULL; mrunlock(&p->p_who); } /* * release old scripts before rexec - ok to do here because * any failures from this point on cause the process * to be killed */ for (level = 0; level < 2; level++) { if (args->ua_prev_script[level]) { s = VN_LOCK(args->ua_prev_script[level]); c = --args->ua_prev_script[level]->v_intpcount; ASSERT(c >= 0); VN_UNLOCK(args->ua_prev_script[level], s); if (!c) IMON_EVENT(args->ua_prev_script[level], ut->ut_cred, IMON_EXIT); VN_RELE(args->ua_prev_script[level]); args->ua_prev_script[level] = NULL; } } /* Remove old address space */ asd.as_op = AS_DEL_EXEC; asd.as_exec_rmp = rmp; asd.as_exec_prda = ut->ut_prda; ut->ut_prda = 0; as_lookup_current(&vasid); VAS_DELETESPACE(vasid, &asd, NULL); ASSERT(ut->ut_sharena == NULL); /* The new tlbpid is needed after we're done with the * old process and before we need anything for the new one. * This effectively flushes the tlb of any pages that * were released by execbld. * Getxfile may read in the text of the new process and * will need to use the new tlbpid. */ new_tlbpid(&ut->ut_as, VM_TLBINVAL); /* * Initialized the wired tlb entries for the new process. */ setup_wired_tlb(1); return 0; } int execopen(struct vnode **vpp, int *fdp) { struct vnode *vp = *vpp; struct vnode *openvp = vp; vfile_t *fp; int error = 0; int filemode = FREAD; VN_HOLD(vp); /* open reference */ if (error = vfile_alloc(filemode, &fp, fdp)) { VN_RELE(vp); *fdp = -1; /* just in case vfile_alloc changed value */ return error; } VOP_OPEN(openvp, &vp, filemode, sys_cred, error); if (error) { VN_RELE(vp); vfile_alloc_undo(*fdp, fp); *fdp = -1; return error; } vfile_ready(fp, vp); *vpp = vp; /* vnode should not have changed */ return 0; } int execclose(int fd) { int error; auto vfile_t *fp; if (error = closefd(fd, &fp)) return error; return vfile_close(fp); } /* * Check if the file has DMAPI managed regions/events. If so, * generate a DMAPI read event for the entire file. * * Only a "read" event will be generated since check_dmapi_file * is intended to be called only for gexec() and elfmap() files. * In these cases, if the process later uses the mprotect() syscall * to upgrade the page protection to include PROT_WRITE, the mapping * type is changed to private. * * Note that the VOP_FCNTL used here returns an error if the * underlying file system is unaware of the F_DMAPI subfunction * being used. This causes no problems, since a non-zero return * status is simply ignored. Only in the case of a zero return status * can we be sure that the VOP_FCNTL F_DMAPI subfunction * DM_FCNTL_MAPEVENT is implemented for this file system, and then * interpret the maprq.error field. */ /* ARGSUSED */ int check_dmapi_file(vnode_t *vp) { #ifdef CELL_IRIX /* * This code doesn't work with cells for the following reasons: * 1) idl cannot tolerate null rval pointer * 2) stack variable dmfcntl is pass as an in/out param; server cell * accesses it directly (read and write); this will fail on * sn0 due to the firewall protections */ return(0); #else int error; dm_fcntl_t dmfcntl; dmfcntl.dmfc_subfunc = DM_FCNTL_MAPEVENT; dmfcntl.u_fcntl.maprq.length = 0; /* length = 0 for whole file */ dmfcntl.u_fcntl.maprq.max_event = DM_EVENT_READ; VOP_FCNTL(vp, F_DMAPI, &dmfcntl, 0, (off_t)0, sys_cred, NULL, error); if (error == 0) { if ((error = dmfcntl.u_fcntl.maprq.error) != 0) return error; } return 0; #endif }