1502 lines
42 KiB
C
1502 lines
42 KiB
C
/**************************************************************************
|
|
* *
|
|
* Copyright (C) 1988-1996 Silicon Graphics, Inc. *
|
|
* *
|
|
* These coded instructions, statements, and computer programs contain *
|
|
* unpublished proprietary information of Silicon Graphics, Inc., and *
|
|
* are protected by Federal copyright law. They may not be disclosed *
|
|
* to third parties or copied or duplicated in any form, in whole or *
|
|
* in part, without the prior written consent of Silicon Graphics, Inc. *
|
|
* *
|
|
**************************************************************************/
|
|
|
|
#ident "$Revision: 1.152 $"
|
|
|
|
/*
|
|
* par -- process activity reporter
|
|
*
|
|
* It generates a report either by taking padc's output
|
|
* or by invoking padc to get process activity data.
|
|
*
|
|
* usage: par <collect_functions> <display_options> <command + argument list>
|
|
* par <collect_functions> <display_options> -p pid
|
|
* par <collect_functions> <display_options> -t 'time'
|
|
* par <collect_functions> <display_options>
|
|
* par <display_options>
|
|
*
|
|
* In the first instance, par execs the supplied command with the
|
|
* specified arguments and monitors the activity of that process.
|
|
*
|
|
* If -p is specified, then par reports on the activity of the
|
|
* specified process.
|
|
*
|
|
* If -t is specified, then par records information for 'time' seconds
|
|
* If no -t is given cotinuous information is collected
|
|
*
|
|
* In the last instance, par reads a padc output file from
|
|
* standard input and reports on its contents.
|
|
*
|
|
* The types of activity which can be reported are controlled
|
|
* by the following functions and options:
|
|
*
|
|
* collection functions:
|
|
* -s - trace system calls with parameters
|
|
* -r - trace scheduler
|
|
* -x - trace network queueing
|
|
* -X - trace network throughput
|
|
* -k - trace disk activity
|
|
* -p - trace a specific pid
|
|
* -i - inherit tracing on fork
|
|
* -O file - copy trace data to specified file
|
|
*
|
|
* display options:
|
|
* -l list system call in long format
|
|
* -n syscall-num select system call by system call number/name
|
|
* -e syscall-num exclude system call by system call number/name
|
|
* -N syscall-name select system call by system call name/number
|
|
* -P pid trace named process only
|
|
* -S print summary of system call and signal counts
|
|
* -SS print system calls and signals trace
|
|
* -Q print scheduling summary
|
|
* -QQ print scheduling trace
|
|
* -QQQ print full runq context scheduling trace
|
|
* -c do not print cpuid
|
|
* -d show any syscall time delta
|
|
* -u show times in uS
|
|
* -o file output to file
|
|
* -a len max ascii length to print
|
|
* -b len max binary length to print
|
|
* -B force binary output rather than ascii
|
|
* -A force ascii output rather than binary
|
|
* -z sort syscall summary by syscall name
|
|
*
|
|
* NB: event processing and display is handled by common code found
|
|
* in -lrtmon; go there to add new support for decoding system
|
|
* calls and other events.
|
|
*/
|
|
#include <stdio.h>
|
|
#include <errno.h>
|
|
#include <unistd.h>
|
|
#include <signal.h>
|
|
#include <assert.h>
|
|
#include <ctype.h>
|
|
#include <stdarg.h>
|
|
#include <stdlib.h>
|
|
|
|
#include <rtmon.h>
|
|
#include <sys/par.h>
|
|
#include <sys/param.h>
|
|
#include <sys/schedctl.h>
|
|
|
|
#define PADC "/usr/sbin/padc" /* name of event collector app */
|
|
|
|
typedef __uint64_t ptime_t; /* time base in par */
|
|
|
|
struct kidlist { /* linked list of processes */
|
|
struct kidlist* next;
|
|
struct kidlist* prev;
|
|
};
|
|
|
|
struct kidq { /* queue of processes */
|
|
struct kidlist h; /* list of processes */
|
|
int qlen; /* queue length */
|
|
int qmaxlen; /* max queue length over time */
|
|
};
|
|
struct kidq runq; /* global run queue */
|
|
struct kidq localqs[RTMOND_MAXCPU]; /* per-cpu affinity queue */
|
|
|
|
struct cpurec { /* per-CPU statistics */
|
|
ptime_t idletimes; /* total number of times went idle */
|
|
ptime_t idleticks; /* total mS spent idle */
|
|
ptime_t wentidle; /* tick number last went idle */
|
|
int disps; /* total times dispatched */
|
|
int hotdisps; /* times dispatched from local/affinity q */
|
|
ptime_t runticks; /* total mS spent running */
|
|
int swtchs; /* number of calls to swtch */
|
|
int64_t curkid; /* running kid */
|
|
int64_t lastkid; /* last running kid */
|
|
int runsame; /* times called swtch but ran same guy */
|
|
int nothighest; /* ran guy other than top of run q */
|
|
int curpri; /* priority of proc running at dispatch */
|
|
};
|
|
struct cpurec cpus[RTMOND_MAXCPU]; /* cpu statistics table */
|
|
|
|
struct kidrec { /* per-thread state+statistics */
|
|
struct kidlist h; /* queue links */
|
|
struct kidq* curq; /* current q process is on */
|
|
struct kidrec* hnext; /* hash chain */
|
|
int traced; /* 1 if being traced */
|
|
char* name; /* process name string */
|
|
int64_t kid; /* thread id */
|
|
/* pid_t pid; process id */
|
|
int pri; /* scheduling priority */
|
|
int rtpri; /* priority if realtime */
|
|
int lastrun; /* last CPU proc ran on */
|
|
int slices; /* # times switched in */
|
|
ptime_t starttick; /* time last switched in */
|
|
ptime_t runtime; /* total time spent running */
|
|
int queued; /* # times placed on global run queue */
|
|
int lqueued; /* # times placed on local run queue */
|
|
ptime_t queuedtick; /* time last placed on global queue */
|
|
ptime_t queuedtime; /* total time spent on global queue */
|
|
ptime_t lqueuedtime; /* total time spent on local queue */
|
|
ptime_t sleeptick; /* time last put to sleep */
|
|
ptime_t sleeptime; /* total time spent asleep */
|
|
int cpuswitch; /* # times moved between CPUs */
|
|
int preempt_short; /* # times preempted for <10 ticks */
|
|
int preempt_long; /* # times preempted for >=10 ticks */
|
|
int swtch; /* # times switched out */
|
|
int sleep; /* # times put to sleep */
|
|
int yield; /* # times voluntarily yielding CPU */
|
|
int mustrun; /* # times switched out 'cuz of mustrun proc */
|
|
};
|
|
|
|
/*
|
|
* Per-process state+statistics are maintained separately from
|
|
* the state maintained by the library support code. We should
|
|
* be able to hook our data on to that of the library but there's
|
|
* currently no way to do that.
|
|
*
|
|
* We use an egregiously large hash table size in order to handle
|
|
* cases where we may be keeping track of hundreds or even thousands
|
|
* of processes. Since pids tend to clump together we probably will
|
|
* only use a few cache lines out of each table when we're working
|
|
* with a small number of processes.
|
|
*/
|
|
#define KIDHASHSIZE 4096 /* must be power of 2 */
|
|
#define KIDHASH(kid) ((kid) & (KIDHASHSIZE-1)) /* hash pid to hash index */
|
|
struct kidrec *kidhash[KIDHASHSIZE]; /* process statistics table */
|
|
|
|
char funs[512];
|
|
FILE *nfd = stdin;
|
|
int debug = 0; /* print debug information */
|
|
int collect; /* we're collecting data */
|
|
int spawn; /* should we spawn process to monitor */
|
|
int Qflag; /* scheduling display flag */
|
|
int Sflag; /* syscall display flag */
|
|
int display; /* disk/net/misc. collect/display flag */
|
|
int cflag = 0; /* 1 if -c option specified */
|
|
ptime_t msecs; /* current time */
|
|
int syscallp; /* report restricted to pids?? */
|
|
int longl = 0;
|
|
int inherit = 0; /* have tracing inherited on fork */
|
|
FILE* outf = stdout; /* output file */
|
|
FILE* errf = stderr; /* error file */
|
|
const char* padcfile = NULL; /* name of optional event data file */
|
|
FILE* dataf = NULL; /* event data file */
|
|
rtmonPrintState* rs; /* decoder state */
|
|
int syscalls = 0; /* # syscalls specified */
|
|
int zflag = 0; /* sort syscall summary by name */
|
|
u_int contextswtch = 0; /* number of context switches */
|
|
u_int lost = 0; /* number of events lost */
|
|
|
|
void usage(char *progname);
|
|
void readpadc(FILE*);
|
|
void copypadc(FILE*);
|
|
void setsyscalltrace(const char* arg, int onoff);
|
|
void cpuidle(const tstamp_event_entry_t*);
|
|
void cpusched(const tstamp_event_entry_t*);
|
|
void kidexit(const tstamp_event_entry_t*);
|
|
void cpusummary(void);
|
|
void runqsummary(void);
|
|
void printkidsum(register struct kidrec *kp);
|
|
void kidsummary(void);
|
|
void schedsummary(void);
|
|
void dumpcpushort(void);
|
|
void dumprunq(void);
|
|
void showstat(void);
|
|
void dumperrors(char *msg);
|
|
void dumpsums(void);
|
|
void sys_summary(void);
|
|
void Qinit(struct kidq *);
|
|
void Qinsert(struct kidq *qp, struct kidrec *entp);
|
|
void Qremove(struct kidrec *entp);
|
|
void kidinsert(struct kidrec *kp);
|
|
struct kidrec *newkid(int64_t pid);
|
|
struct kidrec *getkid(int64_t pid);
|
|
struct kidrec *kidfind(register int64_t pid);
|
|
int getpri(struct kidrec *kp);
|
|
void kidwalk(void (*)(struct kidrec *));
|
|
void handleevent(const tstamp_event_entry_t* ev);
|
|
void merge(uint64_t, const tstamp_event_entry_t* pending[], int npending[]);
|
|
|
|
void sigcatcher(int sig) { (void) sig; }
|
|
|
|
static void
|
|
error(const char* fmt, ...)
|
|
{
|
|
va_list ap;
|
|
va_start(ap, fmt);
|
|
fprintf(errf, "par: ");
|
|
vfprintf(errf, fmt, ap);
|
|
va_end(ap);
|
|
fputc('\n', errf);
|
|
}
|
|
|
|
int
|
|
main(int argc, char **argv)
|
|
{
|
|
int monitoring = 0; /* number of pids we're monitoring */
|
|
pid_t childid; /* pid of child executing command */
|
|
int c; /* option switch */
|
|
const char* optstr = "Aa:Bb:cdDe:iKklN:n:O:o:P:p:rst:uSQXxz";
|
|
|
|
rs = rtmon_printBegin();
|
|
rs->eventmask = 0; /* by default, display nothing */
|
|
strcpy(funs, PADC);
|
|
while ((c = getopt(argc, argv, optstr)) != -1) {
|
|
switch(c) {
|
|
case 'i': /* inherit tracing */
|
|
inherit++;
|
|
strcat(funs, " -i");
|
|
break;
|
|
#ifdef NET_DEBUG
|
|
case 'X': /* network */
|
|
rs->eventmask |= RTMON_NETFLOW;
|
|
display=1;
|
|
goto collect_arg;
|
|
case 'x': /* network */
|
|
rs->eventmask |= RTMON_NETSCHED;
|
|
display=1;
|
|
goto collect_arg;
|
|
#endif
|
|
case 'K':
|
|
rs->flags |= (RTPRINT_SHOWKID | RTPRINT_SHOWPID);
|
|
display=1;
|
|
break;
|
|
case 'k': /* disk */
|
|
rs->eventmask |= RTMON_DISK;
|
|
display=1;
|
|
goto collect_arg;
|
|
case 'r': /* scheduler */
|
|
case 's': /* system calls+signals */
|
|
collect_arg:
|
|
collect++;
|
|
sprintf(strchr(funs,'\0'), " -%c", c);
|
|
break;
|
|
case 't': /* collect for a period of time */
|
|
collect++;
|
|
sprintf(strchr(funs,'\0'), " -t %s", optarg);
|
|
break;
|
|
case 'p': /* collect events from a specific pid */
|
|
collect++;
|
|
monitoring++;
|
|
sprintf(strchr(funs,'\0'), " -p %s", optarg);
|
|
/* simulate previous behaviour--limit reporting */
|
|
rtmon_traceProcess(rs, atoi(optarg));
|
|
break;
|
|
case 'l': /* force long listing */
|
|
++longl;
|
|
break;
|
|
case 'a': /* constrain max ascii string length */
|
|
rs->max_asciilen = atoi(optarg);
|
|
break;
|
|
case 'A': /* print data as ascii */
|
|
rs->flags |= RTPRINT_ASCII;
|
|
rs->flags &= ~RTPRINT_BINARY;
|
|
break;
|
|
case 'b': /* constrain max binary data length */
|
|
rs->max_binlen = atoi(optarg);
|
|
break;
|
|
case 'B': /* print data as binary */
|
|
rs->flags &= ~RTPRINT_ASCII;
|
|
rs->flags |= RTPRINT_BINARY;
|
|
break;
|
|
case 'P': /* filter reporting by pid */
|
|
rtmon_traceProcess(rs, atoi(optarg));
|
|
syscallp++;
|
|
break;
|
|
case 'Q': /* report scheduling activity */
|
|
if (++Qflag > 1)
|
|
rs->eventmask |= RTMON_TASK;
|
|
break;
|
|
case 'S': /* report system call activity */
|
|
if (++Sflag > 1)
|
|
rs->eventmask |= RTMON_SYSCALL|RTMON_SIGNAL;
|
|
break;
|
|
case 'n': /* filter reporting by sys call name/number */
|
|
case 'e': /* exclude sys call name/number */
|
|
case 'N': /* for compatibility */
|
|
setsyscalltrace(optarg, c != 'e');
|
|
break;
|
|
case 'c': /* disable CPU printing */
|
|
rs->flags &= ~RTPRINT_SHOWCPU;
|
|
cflag = 1;
|
|
break;
|
|
case 'd': /* show system calls as begin+end */
|
|
rs->syscalldelta = 0;
|
|
break;
|
|
case 'u': /* show times as ms+us delta */
|
|
rs->flags |= RTPRINT_USEUS;
|
|
break;
|
|
case 'D':
|
|
rs->flags |= RTPRINT_INTERNAL;
|
|
debug++;
|
|
break;
|
|
case 'o': /* send report output to specified file */
|
|
outf = fopen(optarg, "w+");
|
|
if (outf == NULL) {
|
|
error("%s: Cannot open: %s", optarg, strerror(errno));
|
|
exit(1);
|
|
}
|
|
errf = outf;
|
|
break;
|
|
case 'O': /* send event data to specified file */
|
|
padcfile = optarg;
|
|
break;
|
|
case 'z':
|
|
zflag = 1;
|
|
break;
|
|
default:
|
|
usage(argv[0]);
|
|
exit(1);
|
|
}
|
|
}
|
|
/* if any args left then we should spawn a process */
|
|
if (optind < argc) {
|
|
if (!Qflag && !Sflag && !display && !collect) {
|
|
/*
|
|
* If no analysis or collection options were specified
|
|
* and a command line appears to be present, then do a
|
|
* system call trace since that's what most people seem
|
|
* to want (e.g. par ls -> par -sSS ls).
|
|
*/
|
|
Sflag = 2;
|
|
rs->eventmask |= RTMON_SYSCALL|RTMON_SIGNAL;
|
|
strcat(funs, " -s");
|
|
}
|
|
collect++, spawn++;
|
|
}
|
|
|
|
if (!(Qflag || Sflag || display)) { /* no analysis options */
|
|
if (!collect) {
|
|
error("No statistics collection or analysis options specified.");
|
|
usage(argv[0]);
|
|
exit(1);
|
|
} else if (monitoring) {
|
|
/*
|
|
* If no analysis or collection options were specified
|
|
* then do a system call trace since that's what most
|
|
* people seem to want (e.g. par -p 178 -> par -sSS -p 178).
|
|
*/
|
|
Sflag = 2;
|
|
rs->eventmask |= RTMON_SYSCALL|RTMON_SIGNAL;
|
|
strcat(funs, " -s");
|
|
} else if (padcfile == NULL) {
|
|
error("No event data file specified "
|
|
"for data collection; use the -O option.");
|
|
usage(argv[0]);
|
|
exit(1);
|
|
}
|
|
}
|
|
if (padcfile != NULL) {
|
|
dataf = fopen(padcfile, "w");
|
|
if (dataf == NULL) {
|
|
error("Cannot open event data file %s: %s.",
|
|
padcfile, strerror(errno));
|
|
exit(1);
|
|
}
|
|
}
|
|
|
|
if (isatty(fileno(outf))) /* line buffer output to tty */
|
|
setlinebuf(outf);
|
|
|
|
/*
|
|
* Force a long listing if more than one
|
|
* process might appear in the traces.
|
|
*/
|
|
if (!longl)
|
|
longl = (Qflag > 1 || syscallp > 1 || inherit ||
|
|
(!spawn && monitoring != 1)) ||
|
|
(rs->flags & RTPRINT_SHOWKID) ;
|
|
if (longl)
|
|
rs->flags |= RTPRINT_SHOWPID;
|
|
else
|
|
rs->flags &= ~RTPRINT_SHOWPID;
|
|
|
|
if (collect) { /* collect event data */
|
|
if (spawn) { /* fork a child to exec command */
|
|
switch (childid = fork()) {
|
|
case 0: /* child */
|
|
/*
|
|
* Synchronize w/ padc by pausing until we
|
|
* receive SIGUSR1. This lets padc start up
|
|
* event collection before the command is exec'd.
|
|
*/
|
|
signal(SIGUSR1, sigcatcher);
|
|
pause();
|
|
execvp(argv[optind], &argv[optind]);
|
|
dumperrors("exec failed");
|
|
/*NOTREACHED*/
|
|
case -1:
|
|
dumperrors("command fork failed");
|
|
/*NOTREACHED*/
|
|
default: /* parent */
|
|
/*
|
|
* Pass child's pid to padc so it can send
|
|
* it SIGUSR1 when it's setup to collect
|
|
* event data for the command.
|
|
*/
|
|
sprintf(strchr(funs,'\0'), " -P %d", childid);
|
|
/* restrict tracing to child proc */
|
|
rtmon_traceProcess(rs, childid);
|
|
syscallp++;
|
|
break;
|
|
}
|
|
}
|
|
/*
|
|
* Spawn padc to collect data. We add args
|
|
* to force padc to ``line buffer'' its output
|
|
* so long-running processes w/ small amounts
|
|
* of event data don't have their results buffered
|
|
* indefinitely; and pass the max # bytes to
|
|
* collect for indirect params. Note that the
|
|
* latter is actually a system-wide parameter
|
|
* so one padc can affect another (sigh).
|
|
*/
|
|
sprintf(strchr(funs,'\0'), " -b 0 -I %d",
|
|
rs->max_asciilen > rs->max_binlen ?
|
|
rs->max_asciilen : rs->max_binlen);
|
|
if (debug > 1)
|
|
strcat(funs, debug > 2 ? " -DD" : " -D");
|
|
if ((nfd = popen(funs, "r")) == 0)
|
|
dumperrors("padc exec failed");
|
|
}
|
|
|
|
if (Qflag || Sflag || display) {
|
|
if (Qflag) {
|
|
u_int i;
|
|
Qinit(&runq);
|
|
for (i = 0; i < RTMOND_MAXCPU; i++)
|
|
Qinit(&localqs[i]);
|
|
}
|
|
|
|
readpadc(nfd);
|
|
if (Sflag)
|
|
sys_summary();
|
|
|
|
if (Qflag) {
|
|
dumpsums();
|
|
schedsummary();
|
|
#ifdef NET_DEBUG
|
|
netsummary();
|
|
#endif
|
|
}
|
|
} else
|
|
copypadc(nfd);
|
|
|
|
if (collect) {
|
|
if (spawn) /* in case padc neglected to do this */
|
|
kill(childid, SIGUSR1);
|
|
pclose(nfd);
|
|
}
|
|
if (outf != stdout)
|
|
fclose(outf);
|
|
if (dataf != NULL)
|
|
fclose(dataf);
|
|
return (0);
|
|
}
|
|
|
|
void
|
|
setsyscalltrace(const char* arg, int onoff)
|
|
{
|
|
if (isdigit(arg[0])) {
|
|
if (rtmon_settracebynum(rs, atoi(arg), onoff) ||
|
|
rtmon_settracebyname(rs, arg, onoff))
|
|
goto done;
|
|
} else if (rtmon_settracebyname(rs, arg, onoff))
|
|
goto done;
|
|
error("Unknown system call name/number %s.", arg);
|
|
exit(1);
|
|
/*NOTREACHED*/
|
|
done:
|
|
syscalls++;
|
|
if (Sflag < 2) /* enable syscall reporting */
|
|
Sflag = 2;
|
|
rs->eventmask |= RTMON_SYSCALL|RTMON_SIGNAL;
|
|
strcat(funs, " -s");
|
|
}
|
|
|
|
/*
|
|
* Read the remainder of a partial event.
|
|
*/
|
|
static int
|
|
readrem(FILE* fd, char* buf, size_t cc, size_t max)
|
|
{
|
|
size_t rem = max - (cc % sizeof (tstamp_event_entry_t));
|
|
do {
|
|
size_t n = fread(&buf[cc], 1, rem, fd);
|
|
if (n == 0)
|
|
break;
|
|
rem -= n;
|
|
} while (rem > 0);
|
|
return (rem == 0);
|
|
}
|
|
|
|
void
|
|
readpadc(FILE* fd)
|
|
{
|
|
const tstamp_event_entry_t* pending[RTMOND_MAXCPU];
|
|
int npending[RTMOND_MAXCPU];
|
|
char* data[RTMOND_MAXCPU];
|
|
size_t datalen[RTMOND_MAXCPU];
|
|
uint64_t tlast[RTMOND_MAXCPU];
|
|
|
|
memset(pending, 0, sizeof (pending));
|
|
memset(npending, 0, sizeof (npending));
|
|
memset(data, 0, sizeof (data));
|
|
memset(datalen, 0, sizeof (datalen));
|
|
memset(tlast, 0, sizeof (tlast));
|
|
for (;;) {
|
|
union {
|
|
tstamp_event_entry_t ev[NUMB_KERNEL_TSTAMPS];
|
|
char buf[1];
|
|
} u;
|
|
size_t n;
|
|
tstamp_event_entry_t* ev;
|
|
|
|
n = fread(u.buf, 1, sizeof (u.ev[0]), fd);
|
|
if (n == 0)
|
|
break;
|
|
if (n != sizeof (u.ev[0]) && !readrem(fd, u.buf, n, sizeof (u.ev[0]))) {
|
|
error("Partial event read. n is %d and size is %d evt is %d", n,sizeof(u.ev[0]),u.ev[0].evt);
|
|
break;
|
|
}
|
|
ev = u.ev;
|
|
if (ev->jumbocnt > 0) { /* jumbo event, read the remainder */
|
|
n = ev->jumbocnt * sizeof (*ev);
|
|
if (!readrem(fd, (char*) u.buf, sizeof (u.ev[0]), n)) {
|
|
error("Partial event read, terminating.");
|
|
break;
|
|
}
|
|
}
|
|
if (dataf != NULL &&
|
|
fwrite(u.buf, (1+ev->jumbocnt)*sizeof (*ev), 1, dataf) != 1) {
|
|
error("%s: Output write error: %s.", padcfile, strerror(errno));
|
|
break; /* XXX? */
|
|
}
|
|
if (ev->evt == TSTAMP_EV_SORECORD) {
|
|
int cpu;
|
|
size_t chunksize;
|
|
size_t nevts;
|
|
size_t off;
|
|
uint64_t threshold;
|
|
/*
|
|
* A new "record" of data to process. Read the
|
|
* new chunk of event data into the per-CPU buffer.
|
|
* We move data or expand the buffer as needed to
|
|
* accomodate the new events.
|
|
*/
|
|
cpu = ev->cpu;
|
|
assert(cpu < RTMOND_MAXCPU);
|
|
chunksize = (size_t) ev->qual[0];
|
|
nevts = chunksize / sizeof (*ev);
|
|
off = ((const char*) pending[cpu]) - data[cpu];
|
|
if (off + (npending[cpu]+nevts)*sizeof (*ev) > datalen[cpu]) {
|
|
if (chunksize <= off) {
|
|
/*
|
|
* Space is available at the front of the buffer;
|
|
* just copy the pending events down.
|
|
*/
|
|
if (npending[cpu])
|
|
memmove(data[cpu], pending[cpu],
|
|
npending[cpu]*sizeof (*ev));
|
|
pending[cpu] = (const tstamp_event_entry_t*) data[cpu];
|
|
} else {
|
|
char* dp;
|
|
/*
|
|
* Grow the data buffer to hold this chunk of
|
|
* events and any that are pending; and reset
|
|
* the pending reference to the buffer. We
|
|
* allocate a new buffer and copy pending data
|
|
* to the front instead of realloc'ing the
|
|
* existing buffer because we're likely to
|
|
* get more data soon and that would just
|
|
* cause us to copy lots of data to the front
|
|
* anyway. This way we copy (hopefully) less
|
|
* data and read the new chunk directly into
|
|
* the right spot--eliminating a copy.
|
|
*/
|
|
datalen[cpu] = off+(npending[cpu]+nevts)*sizeof (*ev);
|
|
dp = (char*) malloc(datalen[cpu]);
|
|
if (dp == NULL) {
|
|
error("Out of memory (event data).");
|
|
break;
|
|
}
|
|
if (data[cpu]) {
|
|
memcpy(dp, pending[cpu], npending[cpu]*sizeof (*ev));
|
|
free(data[cpu]);
|
|
}
|
|
data[cpu] = dp;
|
|
pending[cpu] = (const tstamp_event_entry_t*) dp;
|
|
}
|
|
}
|
|
/*
|
|
* Read the chunk of event data.
|
|
*/
|
|
if (chunksize && fread((char*)(pending[cpu]+npending[cpu]), chunksize, 1, fd) != 1) {
|
|
if (debug)
|
|
fprintf(outf, "short read of %d bytes of event data\n",
|
|
chunksize);
|
|
break;
|
|
}
|
|
npending[cpu] += nevts;
|
|
/*
|
|
* Calculate the latest time that we can merge
|
|
* events up to. We know each stream of events
|
|
* is ordered so by comparing the time of the
|
|
* last event for each CPU we can select a time
|
|
* that is safe to use in selecting events to merge.
|
|
*/
|
|
tlast[cpu] = ev->qual[2];
|
|
threshold = tlast[0];
|
|
if (debug)
|
|
fprintf(stdout,"threashold %lld\n",threshold);
|
|
for (cpu = rtmon_ncpus(rs)-1; cpu > 0; cpu--)
|
|
if (tlast[cpu] < threshold)
|
|
threshold = tlast[cpu];
|
|
merge(threshold, pending, npending);
|
|
} else
|
|
handleevent(ev);
|
|
if (debug)
|
|
fflush(outf);
|
|
}
|
|
if (debug)
|
|
fprintf(stdout,"threashold %lld\n",-1LL);
|
|
merge(-1LL, pending, npending); /* flush any remainder */
|
|
}
|
|
|
|
void
|
|
handleevent(const tstamp_event_entry_t* ev)
|
|
{
|
|
static int lost = 0;
|
|
static ptime_t starttime = 0;
|
|
|
|
/*
|
|
* Use the first non-config event to set the
|
|
* base time for calculating relative time
|
|
* and to decide whether or not to display
|
|
* CPU numbers in the listing.
|
|
*/
|
|
if (!starttime && ev->evt != TSTAMP_EV_CONFIG) {
|
|
starttime = ev->tstamp;
|
|
if (!cflag) {
|
|
if (rtmon_ncpus(rs) > 1)
|
|
rs->flags |= RTPRINT_SHOWCPU;
|
|
else
|
|
rs->flags &= ~RTPRINT_SHOWCPU;
|
|
}
|
|
}
|
|
msecs = ev->tstamp - starttime;
|
|
|
|
switch (ev->evt) {
|
|
case TSTAMP_EV_EXIT: /* process exit'd */
|
|
kidexit(ev);
|
|
rtmon_printEvent(rs, outf, ev);
|
|
break;
|
|
case TSTAMP_EV_FORK: { /* process fork'd */
|
|
const tstamp_event_fork_data_t* fdp =
|
|
(const tstamp_event_fork_data_t*) &ev->qual[0];
|
|
|
|
if (inherit && _rtmon_kid_istraced(&rs->ds, (int64_t) fdp->pkid))
|
|
rtmon_traceKid(rs, (int64_t)fdp->ckid);
|
|
|
|
rtmon_printEvent(rs, outf, ev);
|
|
break;
|
|
}
|
|
case EVENT_TASK_STATECHANGE: /* proc state change */
|
|
contextswtch++;
|
|
cpusched(ev);
|
|
break;
|
|
case TSTAMP_EV_EOSWITCH: /* new proc to run */
|
|
case TSTAMP_EV_EODISP: /* put proc on runq */
|
|
cpusched(ev);
|
|
break;
|
|
case EVENT_WIND_EXIT_IDLE: /* idle processor */
|
|
contextswtch++;
|
|
cpuidle(ev);
|
|
break;
|
|
case TSTAMP_EV_LOST_TSTAMP: /* server lost events */
|
|
lost += (int) ev->qual[0];
|
|
fprintf(outf, "OVERFLOW: CPU %d lost %d so far\n", ev->cpu, lost);
|
|
break;
|
|
default:
|
|
rtmon_printEvent(rs, outf, ev);
|
|
break;
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Merge event data up to tmax time. Merged events
|
|
* are written to stdout (w/ buffering). This code
|
|
* assumes that each CPU's set of events are already
|
|
* ordered in time; we just merge the event streams
|
|
* from different CPUs.
|
|
*/
|
|
void
|
|
merge(uint64_t tmax, const tstamp_event_entry_t* pending[], int npending[])
|
|
{
|
|
const tstamp_event_entry_t* candidate[RTMOND_MAXCPU];
|
|
const tstamp_event_entry_t* ev;
|
|
int nc, i, j, k;
|
|
|
|
/*
|
|
* Find CPUs with events to merge and do a
|
|
* first-level sort of the top events for
|
|
* each CPU. We assume below that each CPU's
|
|
* events are already sorted.
|
|
*/
|
|
nc = 0;
|
|
if (debug > 1)
|
|
fprintf(outf, "par: merge to %llu: ", tmax);
|
|
for (i = rtmon_ncpus(rs)-1; i >= 0; i--) {
|
|
if (npending[i] && (ev = pending[i])->tstamp < tmax) {
|
|
for (j = 0; j < nc && ev->tstamp > candidate[j]->tstamp; j++)
|
|
;
|
|
if (j < nc) { /* insert in middle */
|
|
for (k = nc-1; k >= j; k--)
|
|
candidate[k+1] = candidate[k];
|
|
}
|
|
candidate[j] = ev, nc++;
|
|
if (debug > 1)
|
|
fprintf(outf, " CPU[%d] %d", i, npending[i]);
|
|
} else if (debug > 1)
|
|
fprintf(outf, " !CPU[%d](%d,%lld)", i, npending[i],
|
|
npending[i] ? pending[i]->tstamp : 0);
|
|
}
|
|
if (debug > 1)
|
|
fprintf(outf, "\n");
|
|
while (nc > 0) {
|
|
ev = candidate[0]; /* sorted event */
|
|
if (ev->tstamp > tmax) {
|
|
for (i = 0; i < nc; i++)
|
|
pending[candidate[i]->cpu] = candidate[i];
|
|
break;
|
|
}
|
|
handleevent(ev); /* process event */
|
|
j = 1+ev->jumbocnt; /* slots used by event */
|
|
assert(j <= npending[ev->cpu]);
|
|
npending[ev->cpu] -= j;
|
|
if (npending[ev->cpu]) { /* merge next event for CPU */
|
|
/*
|
|
* Advance to next event for this CPU and
|
|
* re-sort the top events based on time.
|
|
* Since we know that all other candidate
|
|
* events are already sorted by time this
|
|
* just entails inserting the new event in
|
|
* the correct place.
|
|
*/
|
|
ev += j;
|
|
for (i = 0; i < nc-1 && ev->tstamp > candidate[i+1]->tstamp; i++)
|
|
candidate[i] = candidate[i+1];
|
|
candidate[i] = ev;
|
|
} else { /* no more events for CPU */
|
|
pending[ev->cpu] = ev+j;
|
|
nc--;
|
|
for (i = 1; i <= nc; i++) /* shift down, already sorted */
|
|
candidate[i-1] = candidate[i];
|
|
}
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Check a server-calculated checksum. By
|
|
* default data does *not* have checksums;
|
|
* these are added only for debugging.
|
|
*/
|
|
void
|
|
checksum(int cpu, const tstamp_event_entry_t* ev, size_t cc0, uint64_t esum)
|
|
{
|
|
const uint64_t* lp = (const uint64_t*) ev;
|
|
uint64_t sum = 0;
|
|
size_t cc;
|
|
for (cc = cc0; (ssize_t) cc > 0; cc -= sizeof (*lp))
|
|
sum += *lp++;
|
|
if (sum != esum) {
|
|
fprintf(stderr,
|
|
"padc: checksum mismatch, cpu %d, count %d, got %#llx, expected %#llx\n",
|
|
cpu, cc0, sum, esum);
|
|
lp = (const uint64_t*) ev;
|
|
for (cc = 0; cc != cc0; cc += sizeof (*lp))
|
|
fprintf(stderr, "%4d: %#llx\n", cc, *lp++);
|
|
}
|
|
}
|
|
|
|
void
|
|
copypadc(FILE* fd)
|
|
{
|
|
for (;;) {
|
|
char buf[128*1024];
|
|
size_t n = fread(buf, 1, sizeof (buf), fd);
|
|
if (n == 0)
|
|
break;
|
|
if (fwrite(buf, n, 1, dataf) != 1) {
|
|
error("%s: Output write error: %s.", padcfile, strerror(errno));
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
void
|
|
dumpsums(void)
|
|
{
|
|
if (contextswtch && msecs) {
|
|
u_int secs = (u_int)(rtmon_toms(rs,msecs) / 1000);
|
|
fprintf(outf, " %d context switches / sec\n",
|
|
contextswtch / (secs ? secs : 1));
|
|
}
|
|
}
|
|
|
|
typedef struct {
|
|
const char* name;
|
|
int count;
|
|
ptime_t time;
|
|
} scallstat_t;
|
|
static scallstat_t* stats = NULL;
|
|
static int nstats;
|
|
|
|
/* dump system calls' summary */
|
|
static void
|
|
recordstat(rtmonPrintState* rs, const char* name, int count, __int64_t time)
|
|
{
|
|
(void) rs;
|
|
if (nstats == 0)
|
|
stats = (scallstat_t*) malloc(sizeof (*stats));
|
|
else
|
|
stats = (scallstat_t*) realloc(stats, (nstats+1) * sizeof (*stats));
|
|
stats[nstats].name = name;
|
|
stats[nstats].count = count;
|
|
stats[nstats].time = time;
|
|
nstats++;
|
|
}
|
|
static int
|
|
statcompar(const void* a, const void* b)
|
|
{
|
|
const scallstat_t* sa = (const scallstat_t*) a;
|
|
const scallstat_t* sb = (const scallstat_t*) b;
|
|
|
|
if (sa->time != sb->time)
|
|
return ((int)(sb->time - sa->time));
|
|
if (sa->count != sb->count)
|
|
return (sb->count - sa->count);
|
|
return (strcmp(sa->name, sb->name));
|
|
}
|
|
static int
|
|
namecompar(const void* a, const void* b)
|
|
{
|
|
const scallstat_t* sa = (const scallstat_t*) a;
|
|
const scallstat_t* sb = (const scallstat_t*) b;
|
|
|
|
return (strcmp(sa->name, sb->name));
|
|
}
|
|
static void
|
|
banner(const char* c1, const char* c2, const char* c3, const char* c4)
|
|
{
|
|
fprintf(outf, "%-14s %6s %9s %9s\n", c1, c2, c3, c4);
|
|
}
|
|
void
|
|
sys_summary(void)
|
|
{
|
|
int i;
|
|
|
|
rtmon_walksyscalls(rs, recordstat);
|
|
qsort(stats, nstats, sizeof (scallstat_t), zflag ? namecompar : statcompar);
|
|
if (nstats > 0) {
|
|
if (Sflag > 1)
|
|
fputc('\n', outf);
|
|
fprintf(outf, "System call summary:\n");
|
|
banner( "", "", "Average", "Total");
|
|
banner("Name", "#Calls", "Time(ms)", "Time(ms)");
|
|
fprintf(outf, "-----------------------------------------\n");
|
|
for (i = 0; i < nstats; i++) {
|
|
const scallstat_t* st = &stats[i];
|
|
fprintf(outf, "%-14s %6d %9.2f %9.2f\n"
|
|
, st->name
|
|
, st->count
|
|
, rtmon_tomsf(rs, st->time/st->count)
|
|
, rtmon_tomsf(rs, st->time)
|
|
);
|
|
}
|
|
free(stats);
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Queue manipulation primitives
|
|
*/
|
|
void
|
|
Qinit(struct kidq* qp)
|
|
{
|
|
qp->h.next = qp->h.prev = &qp->h;
|
|
qp->qlen = 0;
|
|
qp->qmaxlen = 0;
|
|
}
|
|
|
|
#define Qempty(qp) (qp->next == qp)
|
|
|
|
int
|
|
getpri(struct kidrec* kp)
|
|
{
|
|
return (kp->pri <= PZERO || kp->rtpri == 0 ? kp->pri : kp->rtpri);
|
|
}
|
|
|
|
void
|
|
Qinsert(struct kidq* qp, struct kidrec* entp)
|
|
{
|
|
int newpri = getpri(entp);
|
|
struct kidlist* wp;
|
|
|
|
for (wp = qp->h.next; wp != &qp->h; wp = wp->next) {
|
|
int workpri = getpri((struct kidrec*) wp);
|
|
if (newpri < workpri)
|
|
break;
|
|
}
|
|
|
|
entp->curq = qp;
|
|
entp->h.next = wp;
|
|
entp->h.prev = wp->prev;
|
|
entp->h.prev->next = wp->prev = &entp->h;
|
|
if (++(qp->qlen) > qp->qmaxlen)
|
|
qp->qmaxlen = qp->qlen;
|
|
}
|
|
|
|
void
|
|
Qremove(struct kidrec* entp)
|
|
{
|
|
struct kidq* qp = entp->curq;
|
|
|
|
assert(qp != NULL);
|
|
entp->h.next->prev = entp->h.prev;
|
|
entp->h.prev->next = entp->h.next;
|
|
entp->h.next = entp->h.prev = NULL;
|
|
entp->curq = NULL;
|
|
qp->qlen--;
|
|
assert(qp->qlen >= 0);
|
|
}
|
|
|
|
void
|
|
kidinsert(struct kidrec* kp)
|
|
{
|
|
struct kidrec **phead = &kidhash[KIDHASH(kp->kid)];
|
|
while (*phead != NULL) {
|
|
assert((*phead)->kid != kp->kid);
|
|
phead = &(*phead)->hnext;
|
|
}
|
|
assert(kp->hnext == NULL);
|
|
*phead = kp;
|
|
}
|
|
|
|
struct kidrec*
|
|
kidfind(register int64_t kid)
|
|
{
|
|
struct kidrec *kp = kidhash[KIDHASH(kid)];
|
|
for (; kp != NULL && kp->kid != kid; kp = kp->hnext)
|
|
;
|
|
return (kp);
|
|
}
|
|
|
|
void
|
|
kidwalk(void (*func)(struct kidrec*))
|
|
{
|
|
int i;
|
|
for (i = 0; i < KIDHASHSIZE; i++) {
|
|
struct kidrec* kp = kidhash[i];
|
|
for (; kp != NULL; kp = kp->hnext)
|
|
(*func)(kp);
|
|
}
|
|
}
|
|
|
|
struct kidrec*
|
|
newkid(int64_t kid)
|
|
{
|
|
register struct kidrec* kp;
|
|
|
|
if ((kp = (struct kidrec*) malloc(sizeof(*kp))) == NULL) {
|
|
error("Out of memory: can not allocate %d bytes for KID %d.",
|
|
sizeof (*kp), kid);
|
|
exit(1);
|
|
}
|
|
|
|
memset(kp, 0, sizeof(*kp));
|
|
kp->traced =
|
|
(rs->flags & RTPRINT_ALLPROCS) != 0 || _rtmon_kid_istraced(&rs->ds, kid);
|
|
kp->kid = kid;
|
|
kp->pri = PIDLE;
|
|
kp->rtpri = PIDLE;
|
|
kp->lastrun = -1;
|
|
kp->starttick = -1;
|
|
kp->queuedtick = -1;
|
|
kp->sleeptick = -1;
|
|
|
|
/*
|
|
* Insert the new record
|
|
*/
|
|
kidinsert(kp);
|
|
|
|
return (kp);
|
|
}
|
|
|
|
struct kidrec*
|
|
getkid(int64_t kid)
|
|
{
|
|
struct kidrec* kp = kidfind(kid);
|
|
return (kp == NULL ? newkid(kid) : kp);
|
|
}
|
|
|
|
void
|
|
cpuidle(const tstamp_event_entry_t* ev)
|
|
{
|
|
struct cpurec* crp = &cpus[ev->cpu];
|
|
|
|
if (!Qflag)
|
|
return;
|
|
if (crp->wentidle == 0) {
|
|
crp->idletimes++;
|
|
crp->curkid = -1;
|
|
crp->curpri = PIDLE;
|
|
crp->wentidle = ev->tstamp;
|
|
rtmon_printEvent(rs, outf, ev);
|
|
}
|
|
}
|
|
|
|
#define RTMON_UNPACKPRI(v, f, p, bp) { \
|
|
f = (int)(v>>32); p = (short)(v&0xffff); bp = (short)((v>>16)&0xffff); \
|
|
}
|
|
|
|
/*
|
|
* handle scheduling event
|
|
* Always maintain all statistics even if only tracing a single pid
|
|
* The display options only handle what is printed.
|
|
* This lets us print reasonable summary stats
|
|
*/
|
|
void
|
|
cpusched(const tstamp_event_entry_t* ev)
|
|
{
|
|
register struct kidrec *kp;
|
|
register struct cpurec *crp;
|
|
ptime_t now;
|
|
cpuid_t onrq;
|
|
int64_t kid;
|
|
int flags, pri, basepri;
|
|
|
|
if (!Qflag)
|
|
return;
|
|
|
|
kid = (int64_t) ev->qual[0];
|
|
kp = getkid(kid);
|
|
crp = &cpus[ev->cpu];
|
|
now = ev->tstamp;
|
|
RTMON_UNPACKPRI(ev->qual[1], flags, pri, basepri);
|
|
|
|
switch (ev->evt) {
|
|
case TSTAMP_EV_EODISP: /* put on a run q */
|
|
kp->pri = pri;
|
|
kp->rtpri = basepri;
|
|
if (kp->sleeptick != -1) {
|
|
if (! (now >= kp->sleeptick))
|
|
now = kp->sleeptick;
|
|
kp->sleeptime += now - kp->sleeptick;
|
|
kp->sleeptick = -1;
|
|
}
|
|
kp->queuedtick = now;
|
|
/*
|
|
* Processor affinity is implemented by placing
|
|
* processes/threads on a per-cpu local q first,
|
|
* then moving them to the global run q if not
|
|
* re-scheduled quickly. This can result in
|
|
* multiple event records, one for each move.
|
|
*
|
|
* NB: cpu identifies the processor where the event
|
|
* record was generated. The target processor
|
|
* (or ncpus if none) is identified separately
|
|
* since they need not be the same.
|
|
*/
|
|
if (kp->curq != NULL) /* remove from previous q */
|
|
Qremove(kp);
|
|
onrq = (cpuid_t) flags; /* NB: packed in w/ pri's */
|
|
assert(onrq != (cpuid_t) -1);
|
|
if (onrq < rtmon_ncpus(rs)) {
|
|
kp->lqueued++;
|
|
Qinsert(&localqs[onrq], kp);
|
|
} else if (onrq == rtmon_ncpus(rs)) {
|
|
kp->queued++;
|
|
Qinsert(&runq, kp);
|
|
} else {
|
|
error(
|
|
"Scheduling event for invalid CPU %u (ncpus %u); max CPUs %u.",
|
|
onrq, rtmon_ncpus(rs), RTMOND_MAXCPU);
|
|
exit(1);
|
|
}
|
|
break;
|
|
case EVENT_TASK_STATECHANGE: /* switched out */
|
|
crp->swtchs++;
|
|
crp->lastkid = kp->kid;
|
|
/*
|
|
* set up for idle here - only really important
|
|
* when not tracing everyone - so runq stuff still looks
|
|
* right
|
|
*/
|
|
crp->curkid = -1;
|
|
crp->curpri = PIDLE;
|
|
kp->pri = pri;
|
|
kp->rtpri = basepri;
|
|
if (kp->starttick != -1) {
|
|
ptime_t runtime;
|
|
if (! (now >= kp->starttick))
|
|
now = kp->starttick;
|
|
runtime = now - kp->starttick;
|
|
kp->runtime += runtime;
|
|
crp->runticks += runtime;
|
|
}
|
|
kp->swtch++;
|
|
switch (flags) {
|
|
case SEMAWAIT:
|
|
case MUTEXWAIT:
|
|
case SVWAIT:
|
|
case MRWAIT:
|
|
case GANGWAIT:
|
|
kp->sleep++;
|
|
kp->sleeptick = now;
|
|
break;
|
|
case MUSTRUNCPU:
|
|
kp->mustrun++;
|
|
break;
|
|
case RESCHED_P:
|
|
case RESCHED_KP:
|
|
if (kp->starttick != -1 && rtmon_toms(rs,now - kp->starttick) < 10)
|
|
kp->preempt_short++;
|
|
else
|
|
kp->preempt_long++;
|
|
break;
|
|
case RESCHED_S:
|
|
case RESCHED_Y:
|
|
case RESCHED_D:
|
|
kp->yield++;
|
|
break;
|
|
default:
|
|
fprintf(outf, "Unknown sched reason %d\n", flags);
|
|
break;
|
|
}
|
|
kp->starttick = -1;
|
|
|
|
break;
|
|
case TSTAMP_EV_EOSWITCH: /* switched in */
|
|
kp->pri = pri;
|
|
kp->rtpri = basepri;
|
|
if (kp->queuedtick != -1 && now > kp->queuedtick) {
|
|
/*
|
|
* NB: We cannot properly attribute queue time
|
|
* if we lose the set on run q event that
|
|
* tells us which queue the process is on.
|
|
*/
|
|
if (kp->curq == &runq || kp->curq == NULL)
|
|
kp->queuedtime += now - kp->queuedtick;
|
|
else
|
|
kp->lqueuedtime += now - kp->queuedtick;
|
|
}
|
|
kp->slices++;
|
|
kp->starttick = now;
|
|
kp->queuedtick = -1;
|
|
if (kp->lastrun != -1 && kp->lastrun != ev->cpu)
|
|
kp->cpuswitch++;
|
|
kp->lastrun = ev->cpu;
|
|
if (crp->wentidle > 0) {
|
|
if (! (now >= crp->wentidle))
|
|
now = crp->wentidle;
|
|
crp->idleticks += now - crp->wentidle;
|
|
crp->wentidle = 0;
|
|
}
|
|
crp->curpri = getpri(kp);
|
|
if (crp->lastkid == kp->kid)
|
|
crp->runsame++;
|
|
crp->curkid = kp->kid;
|
|
crp->disps++;
|
|
if (kp->curq != NULL) {
|
|
if (kp != (struct kidrec*) kp->curq->h.next)
|
|
crp->nothighest++;
|
|
if (kp->curq != &runq) /* processor affinity win */
|
|
crp->hotdisps++;
|
|
Qremove(kp);
|
|
}
|
|
break;
|
|
}
|
|
rtmon_printEvent(rs, outf, ev);
|
|
if (Qflag > 2)
|
|
showstat();
|
|
}
|
|
|
|
/*
|
|
* Handle process termination event; if process
|
|
* was running complete run time calculation.
|
|
*/
|
|
void
|
|
kidexit(const tstamp_event_entry_t* ev)
|
|
{
|
|
struct kidrec* kp = getkid(ev->qual[0]);
|
|
assert(kp != NULL);
|
|
if (kp->starttick != -1) {
|
|
ptime_t runtime;
|
|
ptime_t now = ev->tstamp;
|
|
if (! (now >= kp->starttick))
|
|
now = kp->starttick;
|
|
runtime = now - kp->starttick;
|
|
kp->runtime += runtime;
|
|
kp->starttick = -1;
|
|
}
|
|
if (Qflag && kp->traced) {
|
|
/*
|
|
* Sigh, if we're going to generate a summary
|
|
* we must copy the process name string from
|
|
* the library because it'll free it when the
|
|
* process terminates.
|
|
*/
|
|
kp->name = strdup(rtmon_kidLookup(&rs->ds, kp->kid));
|
|
}
|
|
}
|
|
|
|
#define NZ(x) ((x) ? (x) : 1)
|
|
#define PCT(a,b) ((100.0 * (double) (a)) / (double)NZ((a) + (b)))
|
|
|
|
void
|
|
cpusummary(void)
|
|
{
|
|
register int i;
|
|
register struct cpurec *cp;
|
|
register struct kidrec *kp;
|
|
|
|
for (i = 0, cp = cpus; i < RTMOND_MAXCPU; i++, cp++) {
|
|
if (cp->idletimes == 0 && cp->disps == 0 && cp->swtchs == 0)
|
|
continue;
|
|
/*
|
|
* Tally the final execution segment.
|
|
*/
|
|
if (cp->wentidle > 0 && cp->wentidle <= msecs) {
|
|
assert(cp->curpri == PIDLE);
|
|
cp->idleticks += msecs - cp->wentidle;
|
|
} else if (cp->curkid > 0) {
|
|
kp = getkid(cp->curkid);
|
|
assert(kp != NULL);
|
|
if (kp->starttick != -1 && kp->starttick <= msecs)
|
|
cp->runticks += msecs - kp->starttick;
|
|
}
|
|
fprintf(outf, "\nCPU %d summary:\n", i);
|
|
fprintf(outf, " Total mS idle = %lld\n",
|
|
rtmon_toms(rs,cp->idleticks));
|
|
fprintf(outf, " Total mS running = %lld (saturation = %0.1f%%)\n",
|
|
rtmon_toms(rs,cp->runticks),
|
|
PCT(cp->runticks, cp->idleticks));
|
|
fprintf(outf, " Processes dispatched = %d (local %d global %d)\n",
|
|
cp->disps, cp->hotdisps, cp->disps - cp->hotdisps);
|
|
fprintf(outf, " Ran same = %d (%0.1f%%)\n",
|
|
cp->runsame, (100.0 * cp->runsame)/NZ(cp->disps));
|
|
fprintf(outf, " Ran not highest = %d (%0.1f%%)\n",
|
|
cp->nothighest, (100.0 * cp->nothighest)/NZ(cp->disps));
|
|
}
|
|
|
|
}
|
|
|
|
void
|
|
runqsummary(void)
|
|
{
|
|
fprintf(outf, "\nRun queue summary:\n");
|
|
fprintf(outf, " Max queue len\t= %d\n", runq.qmaxlen);
|
|
}
|
|
|
|
void
|
|
printkidsum(register struct kidrec *kp)
|
|
{
|
|
pid_t pid = 0;
|
|
#define AVG(a,b) ((a) > 0 ? rtmon_tomsf(rs,(b)/(a)) : 0.0)
|
|
if (!kp->traced)
|
|
return;
|
|
if (kp->starttick != -1 && kp->starttick <= msecs)
|
|
kp->runtime += msecs - kp->starttick;
|
|
else if (kp->queuedtick != -1 && kp->queuedtick <= msecs) {
|
|
if (kp->curq == &runq || kp->curq == NULL)
|
|
kp->queuedtime += msecs - kp->queuedtick;
|
|
else
|
|
kp->lqueuedtime += msecs - kp->queuedtick;
|
|
} else if (kp->sleeptick != -1 && kp->sleeptick <= msecs)
|
|
kp->sleeptime += msecs - kp->sleeptick;
|
|
pid = rtmon_pidLookup(&rs->ds, kp->kid);
|
|
if (rs->flags & RTPRINT_SHOWKID)
|
|
fprintf(outf, "\nProcess summary for %s (pid:kid %d:%lld):\n",
|
|
kp->name != NULL ? kp->name : rtmon_kidLookup(&rs->ds, kp->kid),
|
|
pid, kp->kid);
|
|
else
|
|
fprintf(outf, "\nProcess summary for %s (%s %lld):\n",
|
|
kp->name != NULL ? kp->name : rtmon_kidLookup(&rs->ds, kp->kid),
|
|
(pid != 0) ? "pid" : "kid",
|
|
(pid != 0) ? pid : kp->kid);
|
|
|
|
fprintf(outf, " Time slices %d\n", kp->slices);
|
|
fprintf(outf, " Total run mS %lld (avg mS/slice %0.2f)\n",
|
|
rtmon_toms(rs,kp->runtime), AVG(kp->slices, kp->runtime));
|
|
if (rtmon_ncpus(rs) > 1) {
|
|
fprintf(outf, " Times queued %d (local %d global %d)\n",
|
|
kp->queued+kp->lqueued, kp->lqueued, kp->queued);
|
|
fprintf(outf, " Total mS queued locally %llu (avg residence %0.2f)\n",
|
|
rtmon_toms(rs,kp->lqueuedtime), AVG(kp->lqueued, kp->lqueuedtime));
|
|
fprintf(outf, " Total mS queued globally %llu (avg residence %0.2f)\n",
|
|
rtmon_toms(rs,kp->queuedtime), AVG(kp->queued, kp->queuedtime));
|
|
} else {
|
|
fprintf(outf, " Times queued %d\n", kp->queued+kp->lqueued);
|
|
fprintf(outf, " Total mS queued %llu (avg queue residence %0.2f)\n",
|
|
rtmon_toms(rs,kp->queuedtime+kp->lqueuedtime),
|
|
AVG(kp->lqueued+kp->queued, kp->lqueuedtime+kp->queuedtime));
|
|
}
|
|
fprintf(outf, " Total mS sleeping %llu (avg wait time %0.2f)\n",
|
|
rtmon_toms(rs,kp->sleeptime), AVG(kp->sleep, kp->sleeptime));
|
|
fprintf(outf, " Total context switches %d (counts by reason for swtch):\n",
|
|
kp->swtch);
|
|
fprintf(outf, " preempt short\t%d (%0.1f%%)\n", kp->preempt_short,
|
|
(100.0*kp->preempt_short)/NZ(kp->swtch));
|
|
fprintf(outf, " preempt long\t%d (%0.1f%%)\n", kp->preempt_long,
|
|
(100.0*kp->preempt_long)/NZ(kp->swtch));
|
|
fprintf(outf, " sleep\t\t%d (%0.1f%%)\n", kp->sleep,
|
|
(100.0*kp->sleep)/NZ(kp->swtch));
|
|
fprintf(outf, " yield\t\t%d (%0.1f%%)\n", kp->yield,
|
|
(100.0*kp->yield)/NZ(kp->swtch));
|
|
fprintf(outf, " mustrun\t\t%d (%0.1f%%)\n", kp->mustrun,
|
|
(100.0*kp->mustrun)/NZ(kp->swtch));
|
|
#undef AVG
|
|
}
|
|
|
|
void
|
|
kidsummary(void)
|
|
{
|
|
kidwalk(printkidsum);
|
|
}
|
|
|
|
void
|
|
schedsummary(void)
|
|
{
|
|
cpusummary();
|
|
runqsummary();
|
|
kidsummary();
|
|
}
|
|
|
|
void
|
|
dumpcpushort(void)
|
|
{
|
|
register int i;
|
|
register struct cpurec *cp;
|
|
|
|
fprintf(outf, " CPU Status:");
|
|
for (i = 0, cp = cpus; i < RTMOND_MAXCPU; i++, cp++) {
|
|
pid_t pid;
|
|
if (cp->idletimes == 0 && cp->disps == 0 && cp->swtchs == 0)
|
|
continue;
|
|
if (cp->curpri == PIDLE) {
|
|
fprintf(outf, " [%d] IDLE", i);
|
|
continue;
|
|
}
|
|
pid = rtmon_pidLookup(&rs->ds, cp->curkid);
|
|
if (rs->flags & RTPRINT_SHOWKID)
|
|
fprintf(outf, " [%d] %d:%lld(pri %d)", i, pid, cp->curkid, cp->curpri);
|
|
else
|
|
fprintf(outf, " [%d] %lld(pri %d)", i, pid ? (int64_t)pid : cp->curkid, cp->curpri);
|
|
}
|
|
fprintf(outf, "\n");
|
|
}
|
|
|
|
void
|
|
dumprunq(void)
|
|
{
|
|
register int i;
|
|
|
|
fprintf(outf, " Run Queue:");
|
|
|
|
for (i = 0; i <= RTMOND_MAXCPU; i++) {
|
|
register struct kidlist *lp;
|
|
register struct kidlist *kp;
|
|
|
|
lp = (i == RTMOND_MAXCPU) ? &runq.h : &localqs[i].h;
|
|
if (!lp || lp->next == lp)
|
|
continue;
|
|
|
|
if (i == RTMOND_MAXCPU)
|
|
fprintf(outf, " [G]");
|
|
else
|
|
fprintf(outf, " [%d]", i);
|
|
|
|
for (kp = lp->next; kp != lp; kp = kp->next) {
|
|
pid_t pid;
|
|
struct kidrec* pkp = (struct kidrec*) kp;
|
|
|
|
pid = rtmon_pidLookup(&rs->ds, pkp->kid);
|
|
|
|
if (rs->flags & RTPRINT_SHOWKID)
|
|
fprintf(outf, " %d:%lld(pri %d)", pid, pkp->kid, getpri(pkp));
|
|
else
|
|
fprintf(outf, " %lld(pri %d)", pid ? pid : pkp->kid, getpri(pkp));
|
|
}
|
|
}
|
|
|
|
fprintf(outf, "\n");
|
|
}
|
|
|
|
void
|
|
showstat(void)
|
|
{
|
|
dumpcpushort();
|
|
dumprunq();
|
|
}
|
|
|
|
void
|
|
usage(char *progname)
|
|
{
|
|
fprintf(errf,
|
|
"Usage: %s <functions> <options> <command + args>\n"
|
|
" %s <functions> <options> -p pid\n"
|
|
" %s <functions> <options> -t time\n"
|
|
" %s <Display Options> <logfile\n",
|
|
progname, progname, progname, progname);
|
|
fprintf(errf,
|
|
"Collection Functions:\n"
|
|
" -s collect system call events\n"
|
|
" -r collect process scheduling events\n"
|
|
" -k collect disk activity events\n"
|
|
" -i inherit event collection on fork\n"
|
|
" -O file write event data to file\n"
|
|
#ifdef NET_DEBUG
|
|
" -x collect network queueing events\n"
|
|
" -X collect network throughput events\n"
|
|
#endif
|
|
);
|
|
fprintf(errf,
|
|
"Display Options:\n"
|
|
" -l list system calls in long format\n"
|
|
" -n syscall select system call by name or number\n"
|
|
" -e syscall exclude system call by name or number\n"
|
|
" -P pid list system calls for <pid> only\n"
|
|
" -d show time delta when printing calls\n"
|
|
" -c do not show cpu id when printing calls\n"
|
|
" -u show delta time in uSec\n"
|
|
" -k print disk I/O trace (same as collection flag)\n"
|
|
" -S print system call summary\n"
|
|
" -SS print system call trace\n"
|
|
" -Q print scheduling summary\n"
|
|
" -QQ print scheduling trace\n"
|
|
" -QQQ print runq trace also\n"
|
|
" -o file write analysis listing to file\n"
|
|
" -a len max bytes to print for ascii data\n"
|
|
" -b len max bytes to print for binary data\n"
|
|
" -A force character data printing\n"
|
|
" -B force hex data printing\n"
|
|
" -z sort system call summary by syscall name\n"
|
|
);
|
|
}
|
|
|
|
void
|
|
dumperrors(char *msg)
|
|
{
|
|
const char* emsg = strerror(errno);
|
|
fflush(outf);
|
|
fflush(errf);
|
|
error("%s: %s", msg, emsg);
|
|
if (collect)
|
|
pclose(nfd);
|
|
exit(1);
|
|
}
|