1
0
Files
irix-657m-src/eoe/cmd/rtmon/rtmond/tstamp.c
2022-09-29 17:59:04 +03:00

920 lines
28 KiB
C

/**************************************************************************
* *
* Copyright (C) 1997, Silicon Graphics, Inc. *
* *
* These coded instructions, statements, and computer programs contain *
* unpublished proprietary information of Silicon Graphics, Inc., and *
* are protected by Federal copyright law. They may not be disclosed *
* to third parties or copied or duplicated in any form, in whole or *
* in part, without the prior written consent of Silicon Graphics, Inc. *
* *
**************************************************************************/
/*
* Timestamp support; everything that deals with the kernel's
* tstamp support. The server creates a thread for each CPU
* where events are being collected. These events are then
* filtered according to the events a client has registered
* interest in and formatted according to a client-specified
* protocol (e.g. WindView). Data is then written on the
* client's network connection. Note that we do not (at
* present) merge event data received on multiple CPUs; the
* client is responsible for doing that.
*/
#include "rtmond.h"
#include "timer.h"
#include <stdlib.h>
#include <fcntl.h>
#include <sys/syssgi.h>
#include <sys/mman.h>
#include <sys/time.h>
/*#define DEBUG_PERFORMANCE*/
/*
* When no new events are available in either the user or
* kernel queues we do a syssgi call to wait for a period
* of time or for the kernel queue to reach half full.
*/
int waittime = _CONFIG_WAITTIME;
/*
* Interval for issuing "null records" to help clients in merging
* event streams from multiple CPUs. If a client hasn't received
* any data for this period of time then we send it a SORECORD event
* with no real events. The client can then use the timestamp info
* in the record to trigger event merging. If we didn't do this
* then clients might have to buffer very large amounts of data to
* insure merged events are properly ordered by time.
*/
int quiettime = _CONFIG_QUIETTIME;
/*
* Convert milliseconds to RTC ticks.
*/
#define MStoTicks(ms) ((ms) * clockfreq)
#define TicksToUS(t) (1000*(t)/clockfreq)
#define TSTAMP_TIME_THRESH_TICKS MStoTicks(TSTAMP_TIME_THRESHOLD)
#define WATER_MARK_NUM 3
#define WATER_MARK_DEN 2
#define TSTAMP_KERN_IX(dp, count) \
(dp->kern_index+count)%NUMB_KERNEL_TSTAMPS
#define TSTAMP_USER_IX(dp, count) \
(dp->user_index+count)%NUMB_USER_TSTAMPS
#define NZ(x) ((x) ? (x) : 1)
#ifdef DEBUG_PERFORMANCE
#define DEBUG_DECL uint64_t start_time, mid_time
#define DEBUG_STEP1 start_time = readcc()
#define DEBUG_STEP2 mid_time = readcc()
#define DEBUG_DONE(dp) { \
uint64_t end_time = readcc(); \
IFTRACE(PERF)(dp, "processed %d tstamps at a %lluus per event", \
dp->kern_count + dp->user_count, \
TicksToUS(mid_time-start_time)/NZ(dp->kern_count+dp->user_count));\
IFTRACE(PERF)(dp, "write and update %lluus/event total %lluus/event", \
TicksToUS(end_time-mid_time)/NZ(dp->kern_count+dp->user_count), \
TicksToUS(end_time-start_time)/NZ(dp->kern_count+dp->user_count));\
}
#else
#define DEBUG_DECL
#define DEBUG_STEP1
#define DEBUG_STEP2
#define DEBUG_DONE(dp)
#endif
static void init_user_queue(daemon_info_t*);
static void cleanup_user_queue(daemon_info_t*);
static void enter_user_event(daemon_info_t*, tstamp_event_entry_t*);
static void user_tstamp_update(user_queue_t* uq, int nread);
static void init_kernel_queue(daemon_info_t *dp);
static void cleanup_kernel_queue(daemon_info_t *dp);
static void enter_kernel_event(daemon_info_t *dp, tstamp_event_entry_t* ke);
static void scan_to_pid_event(daemon_info_t*, tstamp_event_entry_t*,
kern_queue_t*);
static void notify_lost_events(daemon_info_t*, __int64_t, int);
static sem_t threadstart; /* sema for starting threads */
void
init_thread(void)
{
uint64_t clockfreq;
clockfreq = NSEC_PER_SEC / cc.cycleval;
assert(clockfreq != 0);
if (sem_init(&threadstart, 0, 0) != 0)
Fatal(NULL, "Cannot initialize thread startup semaphore: %s",
strerror(errno));
IFTRACE(DEBUG)(NULL, "Clock frequency %llu (cycleval %lu)",
clockfreq, (unsigned long) cc.cycleval);
IFTRACE(DEBUG)(NULL,
"Quiet time %llu Wait time %llu tstamp merge time %llu",
MStoTicks(quiettime), MStoTicks(waittime), TSTAMP_TIME_THRESH_TICKS);
}
static void
sem_assert(daemon_info_t* dp, sem_t* sem, int vexpect)
{
int v;
sem_getvalue(sem, &v);
if (v != vexpect)
Fatal(dp, "sem_assert: expected %d, got %d", vexpect, v);
}
static void
syssgi_error(daemon_info_t* dp, const char* op)
{
Fatal(dp, "syssgi(%s): %s", op, strerror(errno));
}
#ifdef USE_SPROC
static void
tstamp_merge(void* vdp, size_t stacksize)
#else
static void*
tstamp_merge(void* vdp)
#endif
{
daemon_info_t* dp = (daemon_info_t*) vdp;
kern_queue_t* kq;
user_queue_t* uq;
tstamp_event_entry_t* ke;
tstamp_event_entry_t* ue;
client_t* cp;
uint64_t clockfreq;
uint64_t quiet_interval;
uint64_t tstamp_merge_threshold;
DEBUG_DECL;
IFTRACE(THREAD)(dp, "Starting event collection thread");
dp->isrunning = 1;
#ifdef USE_SPROC
(void) stacksize;
/*
* Immediately turn off par tracing for this thread.
*/
disallow_tracing(dp);
#else
(void) pthread_detach(pthread_self());
#endif
init_user_queue(dp);
init_kernel_queue(dp);
sem_post(&threadstart); /* wakeup master thread */
kq = &dp->kern_queue;
uq = dp->user_queue;
ke = &kq->tstamp_event_entry[0];
ue = &uq->tstamp_event_entry[0];
dp->kern_index = 0;
dp->user_index = 0;
/*
* Setup various time intervals dependent
* on the resolution of the cycle counter.
*/
clockfreq = NSEC_PER_SEC / cc.cycleval;
quiet_interval = MStoTicks(quiettime);
tstamp_merge_threshold = TSTAMP_TIME_THRESH_TICKS;
/*
* Lock the client list for this thread to guard
* against the master thread adding or removing clients.
* We unlock the list each time through the loop so the
* master thread can get in. The locking also is used to
* guarantee that kernel state cleanup done at thread
* shutdown is completed before the master thread starts
* up any new event collection thread for this CPU.
*/
sem_wait(&dp->clients_sem);
while ((cp = (client_t*) dp->clients) != NULL) {
int kevts, uevts;
uint64_t recent_time;
/*
* If no data has been pushed to a client in a while
* (nominally 100ms) then push anything currently
* buffered (or send a null record) *if* another buffer
* is available on the free list. This helps clients
* that need to collate events from multiple cpu's
* (otherwise they'd need to buffer potentially
* infinite amounts of data).
*/
recent_time = readcc();
do {
ioblock_t* io = cp->io;
if (io != NULL && cp->events) {
if (recent_time - cp->lastpush > quiet_interval && cp->iofree) {
IFTRACE(EVENTIO)(dp,
"Flush data, %llu us since last event, %llu us since last push",
TicksToUS(recent_time - cp->lastevt),
TicksToUS(recent_time - cp->lastpush));
cp->pushes++;
if (io->off > cp->lowmark)
(*cp->proto->flush)(dp, cp,
io->u.ev[1].tstamp, cp->lastevt);
else
(*cp->proto->flush)(dp, cp,
recent_time, recent_time);
}
}
} while (cp = cp->next);
/*
* Pause waiting for new events or a timeout.
*
* Note also that we unlock the client list so the
* master thread has a chance to add and delete clients.
* If the master thread is waiting to manipulate the
* client list, we should be preempted as a result of
* the sem_post call because the master thread will
* have the same rt priority and have been waiting
* to run (so have a longer wait time).
*/
dp->tstampwaits++;
sem_post(&dp->clients_sem); /* unlock before blocking */
if (syssgi(SGI_RT_TSTAMP_WAIT, dp->cpu, waittime) < 0)
syssgi_error(dp, "SGI_RT_TSTAMP_WAIT");
sem_wait(&dp->clients_sem); /* lock list again */
kevts = kq->state->tstamp_counter;
uevts = uq->state.tstamp_counter;
recent_time = readcc();
#ifdef notdef
/*
* Events are stamped with the value of the cycle counter.
* When processing the queues below we delay processing the
* most recent events until they've "aged" at least
* TSTAMP_TIME_THRESH_TICKS ticks so that we accurately
* merge user and kernel events (either of which might be
* delayed slightly by queueing overhead and/or contention
* in their delivery to the server).
*
* XXX This should be optimized according to whether
* or not any user timestamps might be received
* delaying things here may cause inefficiency in
* sending data to clients as well as delays.
*/
if (uq->state.nclients != 0)
recent_time -= tstamp_merge_threshold;
#endif
IFTRACE(EVENTS)(dp, "%d kernel events %d user events", kevts, uevts);
DEBUG_STEP1;
dp->kern_count = dp->user_count = 0;
if (uevts > 0 && kevts > 0) {
/*
* Kernel & user events, merge based on time stamps.
*/
while (dp->kern_count < kevts && dp->user_count < uevts) {
if (ke[dp->kern_index].evt == 0 || ue[dp->user_index].evt == 0)
break;
if (ke[dp->kern_index].tstamp < ue[dp->user_index].tstamp)
enter_kernel_event(dp, &ke[dp->kern_index]);
else
enter_user_event(dp, &ue[dp->user_index]);
}
}
if (kevts > 0) {
while (dp->kern_count < kevts) {
tstamp_event_entry_t* ev = &ke[dp->kern_index];
if (ev->evt == 0) {
/*
* Two possibilities: either kernel has yet to
* fill in the event (curr_index is incremented
* before the event record is written); or a
* multi-record event was discarded because the
* second or later record was unavailable for use.
*
* In the first case we terminate scanning the
* event queue so the kernel has time to put things
* in a consistent state.
*
* In the second case the kernel will have set
* the lost_counter field to indicate that it
* dropped an event; we can check this and if
* non-zero discard all records up to curr_index.
* When we do an update call below we'll get back
* in sync since the kernel should stop entering
* events in the queue past this point.
*/
IFTRACE(LOSTEVENTS)(dp,
"Zero event, kevts %d index %d lost_counter %d"
, kevts
, dp->kern_index
, kq->state->lost_counter
);
break;
}
if (ev->tstamp >= recent_time &&
kevts - dp->kern_count <= TSTAMP_NUMB_THRESHOLD) {
IFTRACE(EVENTS)(dp,
"Delay processing %d kernel events; tstamp %llu threshold %llu"
, kevts - dp->kern_count
, ev->tstamp
, recent_time
);
break;
}
enter_kernel_event(dp, ev);
}
} else if (uevts > 0) {
while (dp->user_count < uevts) {
if (ue[dp->user_index].tstamp >= recent_time) {
IFTRACE(EVENTS)(dp,
"Delay processing %d user events; tstamp %llu recent time %llu"
, uevts - dp->user_count
, ue[dp->user_index].tstamp
, recent_time
);
break;
}
enter_user_event(dp, &ue[dp->user_index]);
}
}
DEBUG_STEP2;
/*
* Check and update the kernel count.
*/
if (kq->state->lost_counter > 0) {
int klost;
/*
* Setup to resynchronize with the kernel. We
* know that once lost_counter is non-zero the kernel
* will discard all events until we do an "update"
* call (below) to indicate we've removed some events
* from the queue. We first flush any events that
* have been added to the queue since we grabbed the
* value of tstamp_counter above.
*/
if (kevts != kq->state->tstamp_counter) {
kevts = kq->state->tstamp_counter;
while (dp->kern_count < kevts && ke[dp->kern_index].evt != 0)
enter_kernel_event(dp, &ke[dp->kern_index]);
}
klost = kq->state->lost_counter;
IFTRACE(LOSTEVENTS)(dp,
"Lost %d kernel timestamps; tstamp_counter %d"
, klost
, kq->state->tstamp_counter
);
notify_lost_events(dp, recent_time, klost);
dp->klost += klost;
dp->kevents += dp->kern_count;
/*
* We now know the queue is empty and that no new
* events will be inserted until we do an update call
* below. Setup our state to reflect the kernel's
* and so that the update will clear out any bogus
* kernel state (such as when curr_index is bumped
* for a multi-record event but the event doesn't fit).
*/
dp->kern_count = kq->state->tstamp_counter; /* clear kernel */
dp->kern_index = kq->state->curr_index; /* resync index */
} else {
dp->kevents += dp->kern_count;
dp->kdelayed += kevts - dp->kern_count;
}
if (dp->kern_count > 0) {
IFTRACE(EVENTS)(dp, "tstamp update count %u", dp->kern_count);
if (syssgi(SGI_RT_TSTAMP_UPDATE, dp->cpu, dp->kern_count) < 0)
Log(LOG_WARNING, dp,
"Error resetting tstamp counter; count %d: %s",
dp->kern_count, strerror(errno));
}
/*
* Check and update the user count.
*/
if (uq->state.lost_counter > 0) {
IFTRACE(LOSTEVENTS)(dp, "Lost %d user timestamps",
uq->state.lost_counter);
notify_lost_events(dp,
ue[dp->user_index].tstamp, uq->state.lost_counter);
dp->ulost += uq->state.lost_counter;
dp->uevents += dp->user_count;
dp->user_count = uq->state.tstamp_counter;
dp->user_index = uq->state.curr_index; /* resync index */
} else {
dp->uevents += dp->user_count;
dp->udelayed += uevts - dp->user_count;
}
if (dp->user_count > 0)
user_tstamp_update(dp->user_queue, dp->user_count);
DEBUG_DONE(dp);
}
/*
* No more clients to process, cleanup kernel state and
* terminate the thread. The client list is locked so
* we can manipulate daemon_info_t state (e.g. the statistics)
* w/o problems. Note that we clear the isrunning flag
* when we're done (but after cleaning up state). If the
* master thread is waiting on the list lock it should
* start up a new thread for this CPU after we're done.
*/
cleanup_kernel_queue(dp);
cleanup_user_queue(dp);
kid_flush(dp);
if ((trace & TRACE_PERF) || dp->klost != 0 || dp->ulost != 0)
Trace(dp,
"Kernel events: %lld total, %lld delayed, %lld lost (%d%%) "
"User events: %lld total, %lld delayed, %lld lost (%d%%) "
"Waits: %lld"
, dp->kevents, dp->kdelayed, dp->klost
, (100*dp->klost)/NZ(dp->kevents+dp->klost)
, dp->uevents, dp->udelayed, dp->ulost
, (100*dp->ulost)/NZ(dp->uevents+dp->ulost)
, dp->tstampwaits
);
IFTRACE(THREAD)(dp, "Event processing thread done; clients %#x", dp->clients);
dp->isrunning = 0;
sem_post(&dp->clients_sem);
#ifdef USE_SPROC
exit(EXIT_SUCCESS);
#else
return (EXIT_SUCCESS);
#endif
}
/*
* Notify all clients that events were lost.
*/
static void
notify_lost_events(daemon_info_t* dp, __int64_t time, int counter)
{
client_t* cp = (client_t*) dp->clients;
while (cp) {
(*cp->proto->lostEvent)(dp, cp, time, counter);
cp->lastevt = time;
cp = cp->next;
}
}
/*
* Start a new thread that runs the event processing/merging
* procedure. We assume that we're called with the client list
* semaphore locked; this guards against clobbering state that
* is currently in use by a thread that's terminating.
*/
void
startMerge(daemon_info_t* dp)
{
assert(dp->isrunning == 0);
dp->mask = 0;
/* zero statistics */
dp->kevents = dp->uevents = 0;
dp->tstampwaits = 0;
dp->kdelayed = dp->udelayed = 0;
dp->klost = dp->ulost = 0;
dp->clientwrites = 0;
dp->clientdata = 0;
sem_assert(dp, &threadstart, 0);
#ifdef USE_SPROC
if (sprocsp(tstamp_merge, PR_SALL, dp, NULL, _SPROC_EVTHREAD_STACKSIZE) >= 0) {
#else
errno = pthread_create(&dp->pthread, NULL, tstamp_merge, dp);
if (errno == 0) {
#endif
/*
* Wait for the thread to initialize the kernel event queue.
* This does several things, including avoid a race where
* the collection thread may delete an existing tstamp buffer
* in the kernel after the master thread has installed an
* event collection mask. This also insures that clients
* don't get notified that their data connection is setup
* until all the collection threads are ready to go--which
* is important for clients that need to do things like
* enable system call tracing on one or more processes.
*/
if (sem_wait(&threadstart) != 0)
Log(LOG_ERR, dp, "sem_wait(threadstart): %s", strerror(errno));
} else
Log(LOG_ERR, dp, "Could not start event collection thread: %s",
strerror(errno));
}
static void
init_user_queue(daemon_info_t* dp)
{
user_queue_t* uq = dp->user_queue;
int i;
uq->state.lost_counter = 0;
uq->state.tstamp_counter = 0;
uq->state.curr_index = 0;
uq->nentries = NUMB_USER_TSTAMPS;
uq->water_mark = (uq->nentries*WATER_MARK_NUM)/WATER_MARK_DEN;
/* clear all valid bits */
for (i = 0; i < uq->nentries; i++)
uq->tstamp_event_entry[i].evt = 0;
uq->enabled = TRUE;
}
static void
cleanup_user_queue(daemon_info_t* dp)
{
user_queue_t* uq = dp->user_queue;
uq->enabled = FALSE;
}
static void
enter_user_event(daemon_info_t *dp, tstamp_event_entry_t *ue)
{
client_t* cp;
/*
* XXX filter based on interest?
*/
for (cp = (client_t*) dp->clients; cp; cp = cp->next) {
(*cp->proto->userEvent)(dp, cp, ue);
cp->lastevt = ue->tstamp;
}
ue->evt = 0;
dp->user_index = TSTAMP_USER_IX(dp, 1);
dp->user_count++;
}
static void
user_tstamp_update(user_queue_t* uq, int nread)
{
if ((uq->state.tstamp_counter -= nread) < uq->nentries)
uq->state.lost_counter = 0;
}
/*
* Estalish and initialize the kernel timestamp buffers
* and map them into our address space for reading.
*
* Error recovery is annoying since we exit() fatally
* on everything. This means we have to tear down
* whatever state we've built up along the way and also
* anything that our caller built up. So far the big
* issues outside of our own stateis the need to call
* cleanup_user_queue() and issue a sem_post() against
* threadstart ...
*
* If rtmond is to coexist with UTRACE (kernel event collection
* using wrap-around mode) then it has save and restore the event
* mask and EOB mode and carefully setup event collection to avoid
* losing events already collected by the kernel. The downside
* to doing this is that if rtmond is interrupted such that it
* leaves the system in a state where events are being collected
* then this (bogus) state will be propagated instead of it being
* reset to a default (good) state.
*
* XXX Need to rework stuff for better interaction
* with UTRACE collection.
*/
static void
init_kernel_queue(daemon_info_t* dp)
{
int addr;
int i;
char *sbase;
int fd;
/*
* Force a known state: clear the event mask to
* stop any event collection. This should be ok
* since the master thread should set it after
* we're properly initialized. If we don't clear
* the mask then we may inherit an old mask and
* receive unexpected/unwanted events until the
* desired mask is installed.
*/
IFTRACE(TSTAMP)(dp, "Zero tstamp mask");
/* NB: can't really check error return here */
(void) syssgi(SGI_RT_TSTAMP_MASK, dp->cpu, (uint64_t) 0, &dp->omask);
IFTRACE(TSTAMP)(dp, "Create %d tstamp buffer", NUMB_KERNEL_TSTAMPS);
addr = syssgi(SGI_RT_TSTAMP_CREATE, dp->cpu, NUMB_KERNEL_TSTAMPS);
if (addr == -1) {
if (errno != EEXIST) {
cleanup_user_queue(dp);
sem_post(&threadstart);
syssgi_error(dp, "SGI_RT_TSTAMP_CREATE");
}
/*
* A timestamp buffer already exists. If it's from
* a previous instance of this server, delete it and
* install our own buffer. If it's for UTRACEs,
* reuse the existing buffer. Find out by attempting
* to delete it.
*/
IFTRACE(TSTAMP)(dp, "Try to delete existing tstamp buffer");
if (syssgi(SGI_RT_TSTAMP_DELETE, dp->cpu) < 0) {
if (errno != EBUSY) {
cleanup_user_queue(dp);
sem_post(&threadstart);
syssgi_error(dp, "SGI_RT_TSTAMP_DELETE");
}
/*
* Use existing event buffer; should exist for
* UTRACE collection.
*/
IFTRACE(TSTAMP)(dp, "Reuse existing tstamp buffer");
addr = syssgi(SGI_RT_TSTAMP_ADDR, dp->cpu);
if (addr == -1) {
cleanup_user_queue(dp);
sem_post(&threadstart);
syssgi_error(dp, "SGI_RT_TSTAMP_ADDR");
}
/*XXX don't know how large the buffer is */
}
else {
/*
* The buffer must have been from an old instance of
* this server. Create a new one.
*/
IFTRACE(TSTAMP)(dp, "Create %d tstamp buffer", NUMB_KERNEL_TSTAMPS);
addr = syssgi(SGI_RT_TSTAMP_CREATE, dp->cpu, NUMB_KERNEL_TSTAMPS);
if (addr == -1) {
cleanup_user_queue(dp);
sem_post(&threadstart);
syssgi_error(dp, "SGI_RT_TSTAMP_CREATE");
}
}
}
IFTRACE(TSTAMP)(dp, "Map tstamp buffer at %#x", addr);
fd = open("/dev/mmem", O_RDONLY);
if (fd < 0) {
int err = errno;
syssgi(SGI_RT_TSTAMP_DELETE, dp->cpu);
cleanup_user_queue(dp);
sem_post(&threadstart);
Fatal(dp, "/dev/mmem: open: %s", strerror(err));
}
sbase = (char*) mmap(0,
NUMB_KERNEL_TSTAMPS*sizeof (tstamp_event_entry_t) +
TSTAMP_SHARED_STATE_LEN,
PROT_READ|PROT_WRITE, MAP_PRIVATE, fd, addr);
if ((void*) sbase == MAP_FAILED) {
int err = errno;
close(fd);
syssgi(SGI_RT_TSTAMP_DELETE, dp->cpu);
cleanup_user_queue(dp);
sem_post(&threadstart);
Fatal(dp, "mmap(tstamp buffer): %s", strerror(err));
}
close(fd);
dp->kern_queue.state = (tstamp_shared_state_t*) sbase;
dp->kern_queue.tstamp_event_entry =
(tstamp_event_entry_t*)(sbase + TSTAMP_SHARED_STATE_LEN);
IFTRACE(TSTAMP)(dp, "Set tstamp EOB mode to %#x", RT_TSTAMP_EOB_STOP);
/* set the system to stop collection when end_of_buffer is
reached. Also makes sure the buffer and shared state are
initialized properly */
if (syssgi(SGI_RT_TSTAMP_EOB_MODE, dp->cpu, RT_TSTAMP_EOB_STOP, &dp->eobmode) < 0) {
syssgi(SGI_RT_TSTAMP_DELETE, dp->cpu);
cleanup_user_queue(dp);
sem_post(&threadstart);
syssgi_error(dp, "SGI_RT_TSTAMP_EOB_MODE");
}
}
/*
* Restore kernel state to how it was prior
* to our starting up event collection.
*/
static void
cleanup_kernel_queue(daemon_info_t* dp)
{
IFTRACE(TSTAMP)(dp, "Restore EOB mode to %#x", dp->eobmode);
if (syssgi(SGI_RT_TSTAMP_EOB_MODE, dp->cpu, dp->eobmode, (caddr_t) NULL) < 0)
Log(LOG_WARNING, dp, "syssgi(SGI_RT_TSTAMP_EOB_MODE): %s",
strerror(errno));
IFTRACE(TSTAMP)(dp, "Restore tstamp mask to %#llx", dp->omask);
if (syssgi(SGI_RT_TSTAMP_MASK, dp->cpu, dp->omask, (caddr_t) NULL) < 0)
Log(LOG_WARNING, dp, "syssgi(SGI_RT_TSTAMP_MASK): %s", strerror(errno));
dp->user_queue->enabled = FALSE;
if (munmap(dp->kern_queue.state,
NUMB_KERNEL_TSTAMPS*sizeof (tstamp_event_entry_t) +
TSTAMP_SHARED_STATE_LEN) != 0)
Log(LOG_WARNING, dp, "munmap(kernel event queue): %s", strerror(errno));
if (dp->eobmode != RT_TSTAMP_EOB_WRAP) {
IFTRACE(TSTAMP)(dp, "Delete tstamp buffer");
if (syssgi(SGI_RT_TSTAMP_DELETE, dp->cpu) < 0)
Log(LOG_WARNING, dp, "syssgi(SGI_RT_TSTAMP_DELETE): %s",
strerror(errno));
}
}
#define IN_RANGE(b,ev) (KERNEL_EVENT(b) <= ev && ev < KERNEL_EVENT(b+100))
/*
* Return the classes that a client must be
* interested in in order to receive the
* specified event.
*/
static uint64_t
event_to_class(const tstamp_event_entry_t* ev)
{
switch (ev->evt) {
case TSTAMP_EV_EODISP:
case TSTAMP_EV_EOSWITCH:
case EVENT_TASK_STATECHANGE:
case EVENT_WIND_EXIT_IDLE:
return (RTMON_TASK|RTMON_TASKPROC);
case TSTAMP_EV_SYSCALL_BEGIN:
case TSTAMP_EV_SYSCALL_END:
return (RTMON_SYSCALL);
case TSTAMP_EV_SIGSEND:
case TSTAMP_EV_SIGRECV:
return (RTMON_SIGNAL);
case TSTAMP_EV_PROF_STACK32:
case TSTAMP_EV_PROF_STACK64:
return (RTMON_PROFILE);
case TSTAMP_EV_ALLOC:
return (RTMON_ALLOC);
case TSTAMP_EV_EXIT:
case TSTAMP_EV_EXEC:
case TSTAMP_EV_FORK:
/* anything potentially process-oriented gets these */
return (RTMON_PIDAWARE);
case UTRACE_EVENT:
case JUMBO_EVENT:
/* nobody needs to see UTRACEs */
return 0;
}
if (MIN_INT_ID-2 <= ev->evt && ev->evt <= MAX_INT_ID)
return (RTMON_INTR);
if (IN_RANGE(100, ev->evt)) /* XXX */
return (RTMON_VM);
if (IN_RANGE(200, ev->evt)) /* XXX */
return (RTMON_SCHEDULER);
if (IN_RANGE(DISK_EVENT_BASE, ev->evt))
return (RTMON_DISK);
if (IN_RANGE(NET_EVENT_BASE, ev->evt))
return (RTMON_NETFLOW|RTMON_NETSCHED); /* XXX */
return (RTMON_ALL); /* ??? XXX */
}
static void
enter_kernel_event(daemon_info_t* dp, tstamp_event_entry_t* ke)
{
uint64_t cmask;
client_t* cp;
/*
* The kernel may not get this right, so we do it since
* it's easy and there's no difference in cost doing it
* one place versus another.
*/
ke->cpu = dp->cpu;
/*
* Track pid-related events and update the pid
* data structures. We also cache the pid of
* the last running task for the old WindView
* protocol.
*/
switch (ke->evt) {
case TSTAMP_EV_EOSWITCH:
case DISK_EVENT_QUEUED:
case DISK_EVENT_START:
case DISK_EVENT_DONE:
case EVENT_TASK_STATECHANGE:
case TSTAMP_EV_EODISP:
case TSTAMP_EV_SIGRECV:
kid_check_kid(dp, (int64_t) ke->qual[0], ke->tstamp);
break;
case TSTAMP_EV_SYSCALL_BEGIN:
case TSTAMP_EV_SYSCALL_END: {
const tstamp_event_syscall_data_t* sdp =
(const tstamp_event_syscall_data_t*) &ke->qual[0];
kid_check_kid(dp, (int64_t) sdp->k_id, ke->tstamp);
/*
* Process system call events specially. Compare
* the magic cookie passed by the kernel to the
* client's cookie; if they match pass the event on.
*/
for (cp = (client_t*) dp->clients; cp; cp = cp->next)
if (cp->events & RTMON_SYSCALL) {
if (sdp->cookie && sdp->cookie == cp->cookie) {
(*cp->proto->kernelEvent)(dp, cp, ke);
cp->lastevt = ke->tstamp;
break; /* should be only one client */
}
}
goto done;
}
case TSTAMP_EV_FORK: {
const tstamp_event_fork_data_t* fdp =
(const tstamp_event_fork_data_t*) &ke->qual[0];
kid_rename_kid(dp, fdp->pkid, fdp->name, ke->tstamp, -1);
kid_dup_kid(dp, fdp->pkid, fdp->ckid);
kid_check_kid(dp,fdp->ckid, ke->tstamp);
break;
}
case TSTAMP_EV_EXEC: {
const tstamp_event_exec_data_t* edp =
(const tstamp_event_exec_data_t*) &ke->qual[0];
kid_rename_kid(dp, (int64_t) edp->k_id, edp->name, ke->tstamp, edp->pid);
break;
}
case TSTAMP_EV_EXIT:
kid_purge_kid(dp, (int64_t) ke->qual[0]);
break;
}
/*
* Dispatch event to protocol-specific handler
* based on client interests.
*
* XXX more fine-grained filter support?
*/
cmask = event_to_class(ke);
for (cp = (client_t*) dp->clients; cp; cp = cp->next)
if (cp->events & cmask) {
(*cp->proto->kernelEvent)(dp, cp, ke);
cp->lastevt = ke->tstamp;
}
done:
/*
* Mark entry as available so kernel will reuse it.
*/
if (ke->jumbocnt != 0) {
/*
* Jumbo event, mark extended entries free first.
*/
int n = ke->jumbocnt;
int ix = TSTAMP_KERN_IX(dp, n);
do {
dp->kern_queue.tstamp_event_entry[ix].jumbocnt = 0;
dp->kern_queue.tstamp_event_entry[ix].evt = 0;
if (--ix < 0)
ix += NUMB_KERNEL_TSTAMPS;
} while (--n);
dp->kern_index = TSTAMP_KERN_IX(dp, 1+ke->jumbocnt);
dp->kern_count += 1+ke->jumbocnt;
ke->jumbocnt = 0;
} else {
dp->kern_index = TSTAMP_KERN_IX(dp, 1);
dp->kern_count++;
}
ke->evt = 0;
}
/*
* Compute a new event mask for a CPU as the union of all
* the event masks specified by the CPU's clients. If it's
* different from the mask currently installed, then install
* the new mask.
*
* NB: Caller is assumed to have the client list locked.
*/
void
new_cpu_mask(daemon_info_t *dp)
{
uint64_t mask = dp->omask; /* preserve inherited mask */
client_t* cp;
for (cp = (client_t*) dp->clients; cp != NULL; cp = cp->next) {
/*
* If the client event mask is zero, then propagate
* the value from the common state before calculating
* the union value over all clients. This is used
* when starting up a client to avoid having events
* dispatched to a client before everything is setup
* and the client request is acknowledged.
*/
if (cp->events == 0) /* propagate common state */
cp->events = cp->com->events;
mask |= cp->events;
}
if (dp->mask != mask) {
IFTRACE(TSTAMP)(dp, "Set tstamp mask; old %#llx, new %#llx",
dp->mask, mask);
if (syssgi(SGI_RT_TSTAMP_MASK, dp->cpu, mask, (caddr_t) NULL) < 0)
Log(LOG_WARNING, dp,
"syssgi(SGI_RT_TSTAMP_MASK): mask %#llx: %s",
mask, strerror(errno));
else
dp->mask = mask;
}
}