#! /usr/sbin/perl ## massage data into the appropriate format for graphing, etc. # %work contains workloads # %work_to_usec contains map from workload to microseconds # $nprocs contains number of processors # # ASSUMPTIONS: # - There is a data file for every combination of processors and # workload. require "getopts.pl"; %field_number = ( "proc", 0, "wall", 1, "user", 2, "sys", 3, "nops", 4, "fops", 5, "wall_op", 6, "user_op", 7, "sys_op", 8, "pgrec", 9, "pgflt", 10, "vcsw", 11, "ivcsw", 12, "work", 13, ## synthetic fields "totalops", 14, "user_totalop", 15, "wall_totalop", 16, ); %field_english = ( "proc", "processor number", "wall", "wall clock time (sec)", "user", "user time (sec)", "sys", "system time (sec)", "nops", "successful operations", "fops", "failed operations", "wall_op", "wall clock per operation (usec)", "user_op", "user time per operation (usec)", "sys_op", "system time per operation (usec)", "pgrec", "page reclaims", "pgflt", "page faults", "vcsw", "voluntary context switches", "ivcsw", "involuntary context switches", "work", "parallel workload (usec)", ## synthetic fields "totalops", "successful and failed operations", "user_totalop", "user time per partial operation (usec)", "wall_totalop", "wall clock time per partial operation (usec)", ); %type_english = ( "min", "Minimum", "max", "Maximum", "avg", "Average", "sum", "Total", "sumsq", "Total squared", "stddev", "Standard deviation of", "var", "Variance of", "max_vs_min", "Difference between max and min of", ); $nprocs = 0; @shownprocs = "1-"; # default to all numbers of processors if (!&Getopts("d:nh:gwat:p:M:") || @ARGV < 1) { print < [type] where options can be: -d -- directory containing data files -n -- normalize vs. 1 proc at same workload -h -- hostname for output -g -- gnuplot output -w -- wingz output (default) -a -- don't subtract parallel work times (only affects = wall_op|user_op|sys_op) -t -- graph title -p -- only show data for a certain number of processors -M -- clamp values to an upper limit where is one of: min, max, avg, sum, sumsq, stddev, var, max_vs_min and is one of: EOM for (keys %field_english) { print " " x 10, sprintf("%10s", $_), "\t - $field_english{$_}\n"; } exit(1); } if ($opt_w && $opt_g) { print "Only one of -w or -g may be specified.\n"; } ## ARGUMENTS $hostname = $opt_h; $field = $ARGV[0]; $min_max_avg = $ARGV[1] || "avg"; $normalizep = $opt_n; $dir = $opt_d || "."; $wingz = $opt_w || !$opt_g; $gnuplot = !$wingz; $no_sub_workload = $opt_a; $title = $opt_t; @shownprocs = split(/,/, $opt_p) if defined($opt_p); $clamp_max = $opt_M if defined($opt_M); if (!defined($field_number{$field}) && $field !~ /^:/) { print " must be one of:\n"; for (keys %field_english) { print " " x 10, sprintf("%10s", $_), "\t - $field_english{$_}\n"; } exit(1); } if ($field =~ /^:/) { local($rest) = $field; local(@notfound); while ($rest =~ s/[^a-z]*([a-z]+)//) { if (!defined($field_number{$1})) { push(@notfound, $1); } } if (@notfound) { print join(", ", @notfound), " not valid field name", (@notfound > 1 ? "s" : ""), ".\n"; print "Must be one of:\n\n"; for (keys %field_english) { print " " x 10, sprintf("%10s", $_), "\t - $field_english{$_}\n"; } exit(1); } } @files = &process_directory($dir); if (@files == 0) { print "No files to work with!\n"; exit(1); } &process_filenames(@files); %work_to_usec = &load_workload_times($hostname); &print_procs_vs_workload($field, $min_max_avg, $normalizep); #-------------------------------------------------------------- ## print_procs_vs_workload ## ## field -- field to observe ## min_max_avg - min, max, or avg of per-processor values ## normalizep -- normalize value wrt. 1 processor case sub print_procs_vs_workload { local($field, $min_max_avg, $normalizep) = @_; local(%uniproc_val); local(@vals); local($val); local($first_proc) = 1; for $work (keys %work) { @vals = &get_eval(1, $work, $min_max_avg); $uniproc_val{$work} = &get_val($field, @vals); } ## chart info print "# " if $gnuplot; $val = 0; for (1..$nprocs) { ++$val if &number_in_list($_, @shownprocs); } print $val; print "\t"; @foo = &sorted_work; print scalar(@foo); print "\t\"Number of processors" if $wingz; print "\t\"Parallel work (usec)" if $wingz; if ($wingz) { local($filename) = $title; if ($filename eq "") { $filename = $hostname . "_" . $field . $min_max_avg; } $filename =~ s/\s+/_/g; print "\t\"$filename.gif"; } print "\n\n"; ## chart title print "# " if $gnuplot; print "\"" if $wingz; print "$title - " if defined($title); print "$hostname - " if !defined($title); print "Normalized " if $normalizep; print $type_english{$min_max_avg}, " ", $field_english{$field}; print "\n" if $gnuplot; ## column titles for $work (&sorted_work) { print "\t"; print "\"" if $wingz; print $work_to_usec{$work}; } print "\n"; ## rows contain a fixed number of processors ## columns contain a fixed workload for ($pp = $first_proc; $pp <= $nprocs; $pp++) { next if !&number_in_list($pp, @shownprocs); print "\"" if $wingz; print "$pp"; for $work (&sorted_work) { @vals = &get_eval($pp, $work, $min_max_avg); $val = &get_val($field, @vals); if ($normalizep) { if ($uniproc_val{$work} != 0) { $val /= $uniproc_val{$work}; } } if (defined($clamp_max) && $val > $clamp_max) { $val = $clamp_max } print("\t", sprintf("%.5f", $val)); } print "\n"; } print "\n"; for ("row\t", "Number of processors\n", "col\t", "Workload (us)\n") { print "# " if $gnuplot; print "\"" if $wingz; print; } } #-------------------------------------------------------------- sub get_val { local($field, @vals) = @_; local($val); if ($field =~ /^:/) { $field =~ s/^://; $field =~ s/[a-z]+/&get_val($&, @vals)/ge; $val = eval($field); } else { $val = $vals[$field_number{$field}]; } if ($field eq "user_op" || $field eq "wall_op" || $field eq "sys_op") { if (!$no_sub_workload) { $val -= $vals[$field_number{"work"}]; $val = 0.0 if $val < 0.0; } } # kludge to prevent zeros in the data, s.t. log scale graphs # will work. The non-zero value here should be less than the # estimated error in the data, but more than the precision of the # output. $val = 0.00001 if $val == 0.0; return $val; } sub load_workload_times { local($hostname) = @_; local(%work_to_usec); local($work, $usec); if (open(FILE, "$dir/times-$hostname")) { while () { next if /^#/; ($work, $usec) = split; $work_to_usec{$work} = $usec; } close(FILE); } else { for (keys %work) { $work_to_usec{$_} = $_; } } return %work_to_usec; } sub process_directory { local($dir) = @_; local(@files, @files_raw, @hosts); opendir(DIR, $dir) || die "unable to opendir $dir: $!\n"; @files_raw = readdir(DIR); closedir(DIR); @hosts = grep(/^hinv\-.*/, @files_raw); for (@hosts) { s/^hinv\-//; } if (!defined($hostname)) { if (@hosts == 1) { $hostname = $hosts[0]; } else { print "Must specify hostname!\n"; exit(1); } } if (!grep($hostname, @hosts)) { print "Data for $hostname doesn't exist in directory $dir!\n"; exit(1); } @files = grep(/^data\-$hostname\-/, @files_raw); return @files; } sub sorted_work { return sort {$a <=> $b} keys %work; } sub get_eval { local($nprocs, $work, $type) = @_; local($file) = "data-$hostname-w$work-t$nprocs"; local(@line, $nlines); local(@sumsq, @sum, @max, @min); local(@avgs, @var); local($ii); if ($type eq "stddev" || $type eq "var") { ## yuck, has to be two-pass @avgs = &get_eval($nprocs, $work, "avg"); } $nlines = 0; open(FILE, $dir . "/" . $file) || die "unable to open $dir/$file: $!\n"; while () { next if /^\#/; # skip comments s/^\s+//; # remove leading whitespace s/\s+$//; # remove trailing whitespace @line = split; $nlines++; ## XXX kludge to deal with infinity ## this will really only happen with user_op and wall_op for (@line) { if ($_ eq "inf") { $_ = 1.0e10; } } # fill in synthetic fields $line[$field_number{"totalops"}] = $line[$field_number{"nops"}] + $line[$field_number{"fops"}]; die if $line[$field_number{"totalops"}] == 0; $line[$field_number{"user_totalop"}] = $line[$field_number{"user"}] * 1.0e6 / $line[$field_number{"totalops"}]; $line[$field_number{"wall_totalop"}] = $line[$field_number{"wall"}] * 1.0e6 / $line[$field_number{"totalops"}]; # set initial values if ($nlines == 1) { @sum = @max = @min = @line; for ($ii = 0; $ii < @line; $ii++) { $sumsq[$ii] = $line[$ii] ** 2; if ($type eq "var" || $type eq "stddev") { $var[$ii] = ($line[$ii] - $avgs[$ii]) ** 2; } } next; } for ($ii = 0; $ii < @line; $ii++) { $max[$ii] = $line[$ii] if $max[$ii] < $line[$ii]; $min[$ii] = $line[$ii] if $min[$ii] > $line[$ii]; $sum[$ii] += $line[$ii]; $sumsq[$ii] += $line[$ii] ** 2; if ($type eq "var" || $type eq "stddev") { $var[$ii] += ($line[$ii] - $avgs[$ii]) ** 2; } } } close(FILE); if ($type eq "var" || $type eq "stddev") { for ($ii = 0; $ii < @var; $ii++) { if ($nlines != 1) { $var[$ii] /= ($nlines - 1.0); } else { $var[$ii] = 0; } } } if ($type eq "min") { return @min; } elsif ($type eq "max") { return @max; } elsif ($type eq "sum") { return @sum; } elsif ($type eq "sumsq") { return @sumsq; } elsif ($type eq "var") { return @var; } elsif ($type eq "avg") { for ($ii = 0; $ii < @sum; $ii++) { $sum[$ii] /= ($nlines * 1.0); # floating pt. division } return @sum; } elsif ($type eq "stddev") { for ($ii = 0; $ii < @var; $ii++) { $var[$ii] = sqrt($var[$ii]); } return @var; } elsif ($type eq "max_vs_min") { for ($ii = 0; $ii < @max; $ii++) { $max[$ii] -= $min[$ii]; } return @max; } else { die; } } sub process_filenames { local(@files) = @_; local($this_work, $this_nprocs); for $file (@files) { ($this_work, $this_nprocs) = ($file =~ /\-w(\d+)\-t(\d+)/); $nprocs = $this_nprocs if $this_nprocs > $nprocs; $work{$this_work} = 1; } } sub number_in_list { local($n, @list) = @_; for (@list) { if (/^(\d+)?\-(\d+)?$/) { next if defined($1) && $n < $1; next if defined($2) && $n > $2; return 1; } else { return 1 if $n == $_; } } return 0; }