496 lines
10 KiB
Perl
Executable File
496 lines
10 KiB
Perl
Executable File
#! /usr/sbin/perl
|
|
|
|
## massage data into the appropriate format for graphing, etc.
|
|
|
|
# %work contains workloads
|
|
# %work_to_usec contains map from workload to microseconds
|
|
# $nprocs contains number of processors
|
|
#
|
|
# ASSUMPTIONS:
|
|
# - There is a data file for every combination of processors and
|
|
# workload.
|
|
|
|
require "getopts.pl";
|
|
|
|
%field_number = (
|
|
"proc", 0,
|
|
"wall", 1,
|
|
"user", 2,
|
|
"sys", 3,
|
|
"nops", 4,
|
|
"fops", 5,
|
|
"wall_op", 6,
|
|
"user_op", 7,
|
|
"sys_op", 8,
|
|
"pgrec", 9,
|
|
"pgflt", 10,
|
|
"vcsw", 11,
|
|
"ivcsw", 12,
|
|
"work", 13,
|
|
|
|
## synthetic fields
|
|
"totalops", 14,
|
|
"user_totalop", 15,
|
|
"wall_totalop", 16,
|
|
);
|
|
|
|
%field_english = (
|
|
"proc", "processor number",
|
|
"wall", "wall clock time (sec)",
|
|
"user", "user time (sec)",
|
|
"sys", "system time (sec)",
|
|
"nops", "successful operations",
|
|
"fops", "failed operations",
|
|
"wall_op", "wall clock per operation (usec)",
|
|
"user_op", "user time per operation (usec)",
|
|
"sys_op", "system time per operation (usec)",
|
|
"pgrec", "page reclaims",
|
|
"pgflt", "page faults",
|
|
"vcsw", "voluntary context switches",
|
|
"ivcsw", "involuntary context switches",
|
|
"work", "parallel workload (usec)",
|
|
|
|
## synthetic fields
|
|
"totalops", "successful and failed operations",
|
|
"user_totalop", "user time per partial operation (usec)",
|
|
"wall_totalop", "wall clock time per partial operation (usec)",
|
|
);
|
|
|
|
%type_english = (
|
|
"min", "Minimum",
|
|
"max", "Maximum",
|
|
"avg", "Average",
|
|
"sum", "Total",
|
|
"sumsq", "Total squared",
|
|
"stddev", "Standard deviation of",
|
|
"var", "Variance of",
|
|
"max_vs_min", "Difference between max and min of",
|
|
);
|
|
|
|
$nprocs = 0;
|
|
@shownprocs = "1-"; # default to all numbers of processors
|
|
|
|
if (!&Getopts("d:nh:gwat:p:M:") || @ARGV < 1) {
|
|
print <<EOM;
|
|
|
|
usage: procs_vs_workload [options] <field> [type]
|
|
|
|
where options can be:
|
|
|
|
-d <dir> -- directory containing data files
|
|
-n -- normalize vs. 1 proc at same workload
|
|
-h <host> -- hostname for output
|
|
-g -- gnuplot output
|
|
-w -- wingz output (default)
|
|
-a -- don't subtract parallel work times
|
|
(only affects <field> = wall_op|user_op|sys_op)
|
|
-t -- graph title
|
|
-p <cpus> -- only show data for a certain number of processors
|
|
-M <max> -- clamp values to an upper limit
|
|
|
|
where <type> is one of:
|
|
|
|
min, max, avg, sum, sumsq, stddev, var, max_vs_min
|
|
|
|
and <field> is one of:
|
|
|
|
EOM
|
|
|
|
for (keys %field_english) {
|
|
print " " x 10, sprintf("%10s", $_),
|
|
"\t - $field_english{$_}\n";
|
|
}
|
|
|
|
exit(1);
|
|
}
|
|
|
|
if ($opt_w && $opt_g) {
|
|
print "Only one of -w or -g may be specified.\n";
|
|
}
|
|
|
|
## ARGUMENTS
|
|
$hostname = $opt_h;
|
|
$field = $ARGV[0];
|
|
$min_max_avg = $ARGV[1] || "avg";
|
|
$normalizep = $opt_n;
|
|
$dir = $opt_d || ".";
|
|
$wingz = $opt_w || !$opt_g;
|
|
$gnuplot = !$wingz;
|
|
$no_sub_workload = $opt_a;
|
|
$title = $opt_t;
|
|
@shownprocs = split(/,/, $opt_p) if defined($opt_p);
|
|
$clamp_max = $opt_M if defined($opt_M);
|
|
|
|
if (!defined($field_number{$field}) && $field !~ /^:/) {
|
|
print "<field> must be one of:\n";
|
|
for (keys %field_english) {
|
|
print " " x 10, sprintf("%10s", $_),
|
|
"\t - $field_english{$_}\n";
|
|
}
|
|
exit(1);
|
|
}
|
|
|
|
if ($field =~ /^:/) {
|
|
local($rest) = $field;
|
|
local(@notfound);
|
|
|
|
while ($rest =~ s/[^a-z]*([a-z]+)//) {
|
|
if (!defined($field_number{$1})) {
|
|
push(@notfound, $1);
|
|
}
|
|
}
|
|
|
|
if (@notfound) {
|
|
print join(", ", @notfound), " not valid field name",
|
|
(@notfound > 1 ? "s" : ""), ".\n";
|
|
print "Must be one of:\n\n";
|
|
for (keys %field_english) {
|
|
print " " x 10, sprintf("%10s", $_),
|
|
"\t - $field_english{$_}\n";
|
|
}
|
|
exit(1);
|
|
}
|
|
}
|
|
|
|
@files = &process_directory($dir);
|
|
|
|
if (@files == 0) {
|
|
print "No files to work with!\n";
|
|
exit(1);
|
|
}
|
|
|
|
&process_filenames(@files);
|
|
%work_to_usec = &load_workload_times($hostname);
|
|
|
|
&print_procs_vs_workload($field, $min_max_avg, $normalizep);
|
|
|
|
#--------------------------------------------------------------
|
|
|
|
## print_procs_vs_workload
|
|
##
|
|
## field -- field to observe
|
|
## min_max_avg - min, max, or avg of per-processor values
|
|
## normalizep -- normalize value wrt. 1 processor case
|
|
|
|
sub print_procs_vs_workload {
|
|
local($field, $min_max_avg, $normalizep) = @_;
|
|
|
|
local(%uniproc_val);
|
|
local(@vals);
|
|
local($val);
|
|
local($first_proc) = 1;
|
|
|
|
for $work (keys %work) {
|
|
@vals = &get_eval(1, $work, $min_max_avg);
|
|
$uniproc_val{$work} = &get_val($field, @vals);
|
|
}
|
|
|
|
## chart info
|
|
print "# " if $gnuplot;
|
|
$val = 0;
|
|
for (1..$nprocs) {
|
|
++$val if &number_in_list($_, @shownprocs);
|
|
}
|
|
print $val;
|
|
print "\t";
|
|
@foo = &sorted_work;
|
|
print scalar(@foo);
|
|
print "\t\"Number of processors" if $wingz;
|
|
print "\t\"Parallel work (usec)" if $wingz;
|
|
if ($wingz) {
|
|
local($filename) = $title;
|
|
if ($filename eq "") {
|
|
$filename = $hostname . "_" . $field .
|
|
$min_max_avg;
|
|
}
|
|
$filename =~ s/\s+/_/g;
|
|
print "\t\"$filename.gif";
|
|
}
|
|
print "\n\n";
|
|
|
|
## chart title
|
|
print "# " if $gnuplot;
|
|
print "\"" if $wingz;
|
|
print "$title - " if defined($title);
|
|
print "$hostname - " if !defined($title);
|
|
print "Normalized " if $normalizep;
|
|
print $type_english{$min_max_avg}, " ", $field_english{$field};
|
|
print "\n" if $gnuplot;
|
|
|
|
## column titles
|
|
for $work (&sorted_work) {
|
|
print "\t";
|
|
print "\"" if $wingz;
|
|
print $work_to_usec{$work};
|
|
}
|
|
print "\n";
|
|
|
|
## rows contain a fixed number of processors
|
|
## columns contain a fixed workload
|
|
for ($pp = $first_proc; $pp <= $nprocs; $pp++) {
|
|
next if !&number_in_list($pp, @shownprocs);
|
|
|
|
print "\"" if $wingz;
|
|
print "$pp";
|
|
|
|
for $work (&sorted_work) {
|
|
@vals = &get_eval($pp, $work, $min_max_avg);
|
|
|
|
$val = &get_val($field, @vals);
|
|
|
|
if ($normalizep) {
|
|
if ($uniproc_val{$work} != 0) {
|
|
$val /= $uniproc_val{$work};
|
|
}
|
|
}
|
|
|
|
if (defined($clamp_max) && $val > $clamp_max) {
|
|
$val = $clamp_max
|
|
}
|
|
|
|
print("\t", sprintf("%.5f", $val));
|
|
}
|
|
|
|
print "\n";
|
|
}
|
|
|
|
print "\n";
|
|
|
|
for ("row\t", "Number of processors\n", "col\t", "Workload (us)\n") {
|
|
print "# " if $gnuplot;
|
|
print "\"" if $wingz;
|
|
print;
|
|
}
|
|
}
|
|
|
|
#--------------------------------------------------------------
|
|
|
|
sub get_val {
|
|
local($field, @vals) = @_;
|
|
local($val);
|
|
|
|
if ($field =~ /^:/) {
|
|
$field =~ s/^://;
|
|
$field =~ s/[a-z]+/&get_val($&, @vals)/ge;
|
|
$val = eval($field);
|
|
}
|
|
else {
|
|
$val = $vals[$field_number{$field}];
|
|
}
|
|
|
|
if ($field eq "user_op" || $field eq "wall_op"
|
|
|| $field eq "sys_op") {
|
|
if (!$no_sub_workload) {
|
|
$val -= $vals[$field_number{"work"}];
|
|
$val = 0.0 if $val < 0.0;
|
|
}
|
|
}
|
|
|
|
# kludge to prevent zeros in the data, s.t. log scale graphs
|
|
# will work. The non-zero value here should be less than the
|
|
# estimated error in the data, but more than the precision of the
|
|
# output.
|
|
$val = 0.00001 if $val == 0.0;
|
|
|
|
return $val;
|
|
}
|
|
|
|
sub load_workload_times {
|
|
local($hostname) = @_;
|
|
local(%work_to_usec);
|
|
local($work, $usec);
|
|
|
|
if (open(FILE, "$dir/times-$hostname")) {
|
|
while (<FILE>) {
|
|
next if /^#/;
|
|
($work, $usec) = split;
|
|
$work_to_usec{$work} = $usec;
|
|
}
|
|
close(FILE);
|
|
}
|
|
else {
|
|
for (keys %work) {
|
|
$work_to_usec{$_} = $_;
|
|
}
|
|
}
|
|
|
|
return %work_to_usec;
|
|
}
|
|
|
|
sub process_directory {
|
|
local($dir) = @_;
|
|
local(@files, @files_raw, @hosts);
|
|
|
|
opendir(DIR, $dir) || die "unable to opendir $dir: $!\n";
|
|
@files_raw = readdir(DIR);
|
|
closedir(DIR);
|
|
|
|
@hosts = grep(/^hinv\-.*/, @files_raw);
|
|
for (@hosts) {
|
|
s/^hinv\-//;
|
|
}
|
|
|
|
if (!defined($hostname)) {
|
|
if (@hosts == 1) {
|
|
$hostname = $hosts[0];
|
|
}
|
|
else {
|
|
print "Must specify hostname!\n";
|
|
exit(1);
|
|
}
|
|
}
|
|
|
|
if (!grep($hostname, @hosts)) {
|
|
print "Data for $hostname doesn't exist in directory $dir!\n";
|
|
exit(1);
|
|
}
|
|
|
|
@files = grep(/^data\-$hostname\-/, @files_raw);
|
|
|
|
return @files;
|
|
}
|
|
|
|
sub sorted_work {
|
|
return sort {$a <=> $b} keys %work;
|
|
}
|
|
|
|
sub get_eval {
|
|
local($nprocs, $work, $type) = @_;
|
|
local($file) = "data-$hostname-w$work-t$nprocs";
|
|
local(@line, $nlines);
|
|
local(@sumsq, @sum, @max, @min);
|
|
local(@avgs, @var);
|
|
local($ii);
|
|
|
|
if ($type eq "stddev" || $type eq "var") {
|
|
## yuck, has to be two-pass
|
|
@avgs = &get_eval($nprocs, $work, "avg");
|
|
}
|
|
|
|
$nlines = 0;
|
|
|
|
open(FILE, $dir . "/" . $file)
|
|
|| die "unable to open $dir/$file: $!\n";
|
|
|
|
while (<FILE>) {
|
|
next if /^\#/; # skip comments
|
|
s/^\s+//; # remove leading whitespace
|
|
s/\s+$//; # remove trailing whitespace
|
|
|
|
@line = split;
|
|
$nlines++;
|
|
|
|
## XXX kludge to deal with infinity
|
|
## this will really only happen with user_op and wall_op
|
|
for (@line) {
|
|
if ($_ eq "inf") {
|
|
$_ = 1.0e10;
|
|
}
|
|
}
|
|
|
|
# fill in synthetic fields
|
|
$line[$field_number{"totalops"}] =
|
|
$line[$field_number{"nops"}] +
|
|
$line[$field_number{"fops"}];
|
|
die if $line[$field_number{"totalops"}] == 0;
|
|
$line[$field_number{"user_totalop"}] =
|
|
$line[$field_number{"user"}] * 1.0e6 /
|
|
$line[$field_number{"totalops"}];
|
|
$line[$field_number{"wall_totalop"}] =
|
|
$line[$field_number{"wall"}] * 1.0e6 /
|
|
$line[$field_number{"totalops"}];
|
|
|
|
# set initial values
|
|
if ($nlines == 1) {
|
|
@sum = @max = @min = @line;
|
|
for ($ii = 0; $ii < @line; $ii++) {
|
|
$sumsq[$ii] = $line[$ii] ** 2;
|
|
if ($type eq "var" || $type eq "stddev") {
|
|
$var[$ii] = ($line[$ii] -
|
|
$avgs[$ii]) ** 2;
|
|
}
|
|
}
|
|
next;
|
|
}
|
|
|
|
for ($ii = 0; $ii < @line; $ii++) {
|
|
$max[$ii] = $line[$ii] if $max[$ii] < $line[$ii];
|
|
$min[$ii] = $line[$ii] if $min[$ii] > $line[$ii];
|
|
$sum[$ii] += $line[$ii];
|
|
$sumsq[$ii] += $line[$ii] ** 2;
|
|
|
|
if ($type eq "var" || $type eq "stddev") {
|
|
$var[$ii] += ($line[$ii] - $avgs[$ii]) ** 2;
|
|
}
|
|
}
|
|
}
|
|
|
|
close(FILE);
|
|
|
|
if ($type eq "var" || $type eq "stddev") {
|
|
for ($ii = 0; $ii < @var; $ii++) {
|
|
if ($nlines != 1) {
|
|
$var[$ii] /= ($nlines - 1.0);
|
|
}
|
|
else {
|
|
$var[$ii] = 0;
|
|
}
|
|
}
|
|
}
|
|
|
|
if ($type eq "min") { return @min; }
|
|
elsif ($type eq "max") { return @max; }
|
|
elsif ($type eq "sum") { return @sum; }
|
|
elsif ($type eq "sumsq") { return @sumsq; }
|
|
elsif ($type eq "var") { return @var; }
|
|
elsif ($type eq "avg") {
|
|
for ($ii = 0; $ii < @sum; $ii++) {
|
|
$sum[$ii] /= ($nlines * 1.0); # floating pt. division
|
|
}
|
|
return @sum;
|
|
}
|
|
elsif ($type eq "stddev") {
|
|
for ($ii = 0; $ii < @var; $ii++) {
|
|
$var[$ii] = sqrt($var[$ii]);
|
|
}
|
|
return @var;
|
|
}
|
|
elsif ($type eq "max_vs_min") {
|
|
for ($ii = 0; $ii < @max; $ii++) {
|
|
$max[$ii] -= $min[$ii];
|
|
}
|
|
return @max;
|
|
}
|
|
else {
|
|
die;
|
|
}
|
|
}
|
|
|
|
sub process_filenames {
|
|
local(@files) = @_;
|
|
local($this_work, $this_nprocs);
|
|
|
|
for $file (@files) {
|
|
($this_work, $this_nprocs) = ($file =~ /\-w(\d+)\-t(\d+)/);
|
|
$nprocs = $this_nprocs if $this_nprocs > $nprocs;
|
|
$work{$this_work} = 1;
|
|
}
|
|
}
|
|
|
|
sub number_in_list {
|
|
local($n, @list) = @_;
|
|
|
|
for (@list) {
|
|
if (/^(\d+)?\-(\d+)?$/) {
|
|
next if defined($1) && $n < $1;
|
|
next if defined($2) && $n > $2;
|
|
return 1;
|
|
}
|
|
else {
|
|
return 1 if $n == $_;
|
|
}
|
|
}
|
|
|
|
return 0;
|
|
}
|