diff --git a/m1/perf/Makefile b/m1/perf/Makefile deleted file mode 100644 index 565ea3a..0000000 --- a/m1/perf/Makefile +++ /dev/null @@ -1,48 +0,0 @@ -M1=/home/qi/m1 -COMPILER_DIR=$(M1)/flickernoise/src -M1SWINC_DIR=$(M1)/milkymist/software/include -M1SWLIB_DIR=$(M1)/milkymist/software/libfpvm/x86-linux - -#CFLAGS_EXTRA=-DCOMP_DEBUG -CFLAGS_EXTRA= -CFLAGS_PROF=-pg -CFLAGS_COMMON=-Wall -g $(CFLAGS_PROF) $(CFLAGS_EXTRA) -DPRINTF_FLOAT -CFLAGS_M=-fno-builtin #-nostdinc -fno-builtin -CFLAGS=$(CFLAGS_COMMON) \ - -I$(COMPILER_DIR) \ - -Ifakes -I$(M1SWINC_DIR) -LDFLAGS=$(CFLAGS_PROF) -LDLIBS=-L$(M1SWLIB_DIR) -lfpvm - - -COMPILER_O = $(COMPILER_DIR)/compiler.o -LIBFPVM_A = $(M1SWLIB_DIR)/libfpvm.a -OBJS = main.o $(COMPILER_O) - -.PHONY: all clean spotless path - -all: main - -main: $(OBJS) $(LIBFPVM_A) - -$(COMPILER_O): - $(MAKE) -C $(COMPILER_DIR) CC=gcc \ - CFLAGS="$(CFLAGS_COMMON) $(CFLAGS_M) -I$(M1SWINC_DIR) \ - -I$(shell pwd)/fakes" \ - compiler.o - -$(LIBFPVM_A): - $(MAKE) -C $(M1SWLIB_DIR) CC=gcc \ - CFLAGS='$(CFLAGS_COMMON) $(CFLAGS_M) -I$(M1SWINC_DIR)' - -clean: - $(MAKE) -C $(M1SWLIB_DIR) clean - $(MAKE) -C $(COMPILER_DIR) clean - rm -f $(M1SWLIB_DIR)/sched.o - rm -f $(OBJS) - -spotless: clean - rm -f main - -path: - @echo $(M1) diff --git a/m1/perf/TODO b/m1/perf/TODO deleted file mode 100644 index aeff81a..0000000 --- a/m1/perf/TODO +++ /dev/null @@ -1,14 +0,0 @@ -Done: -- dynamically allocate scheduler context -- see if preferring critical path can improve code efficiency (YES !) - -Pending: -- see if dynamically adjusting the critical path leads to further improvements -- test IF -- run result comparison against full set of patches -- check if result comparison actually compares meaningful data -- compare run time and code size for all patches -- see what optimization changes (may interfere with profiling) -- build into Flickernoise (some things may need adapting, e.g., abort()) -- review code, see if things can be simplified -- see if valgrind can do something useful diff --git a/m1/perf/all-runs b/m1/perf/all-runs deleted file mode 100755 index bbd35da..0000000 --- a/m1/perf/all-runs +++ /dev/null @@ -1,26 +0,0 @@ -#!/bin/sh -ex - -rm -rf data - -mkdir data -for n in ref new opt; do - - case $n in - ref) flags=;; - new) flags=-n;; - opt) flags="-n -o";; - esac - - mkdir data/$n - for m in out expr prof; do - case $m in - out) more=-s;; - expr) more=-e;; - prof) more=-p;; - esac - - mkdir data/$n/$m - ./runs $flags $more data/$n/$m || exit - done -done - diff --git a/m1/perf/eval.pl b/m1/perf/eval.pl deleted file mode 100755 index f2bcad8..0000000 --- a/m1/perf/eval.pl +++ /dev/null @@ -1,77 +0,0 @@ -#!/usr/bin/perl - - -sub flush -{ - if ($nregs) { - print 0+keys %reg, "/", (sort { $b cmp $a } keys %reg)[0], - "\n"; - return; - } - for (sort keys %use) { - print "$_ = ".$reg{$_}."\n"; - } - print $res; -} - - -if ($ARGV[0] eq "-r") { - shift @ARGV; - $nregs = 1; -} - - -while (<>) { - if (/FPVM fragment:/) { - &flush if $i; - undef %tmp; - undef $i; - } - if (/PFPU fragment:/) { - undef $res; - undef %reg; - undef @val; - %use = %tmp; - $i = 0; - } - - $tmp{"R$1"} = 1 if /^\d+:.*-> R0(\d+)/; - next unless defined $i; - - next unless - /^(\d+):\s+(\S+)\s+(R\d+)?(,(R\d+))?.*?(->\s+(R\d+))?\s*$/; - # 1 2 3 4 5 6 7 - ($c, $op, $a, $b, $d) = ($1, $2, $3, $5, $7); - undef $e; - $e = $1 if /E=(\d+)>/; - die "($i) $_" if $c != $i; - - $reg{$a} = 1 if $nregs && defined $a; - $reg{$b} = 1 if $nregs && defined $b; - - print STDERR "$i: concurrent read/write on $a (A)\n" - if defined $d && $a eq $d; - print STDERR "$i: concurrent read/write on $b (B)\n" - if defined $d && $b eq $d; - - $a = $reg{$a} if defined $reg{$a}; - $b = $reg{$b} if defined $reg{$b}; - - if ($op eq "IF") { - $expr = "(IF ".$reg{"R002"}." $a $b)"; - $reg{"R002"} = 1 if $nregs; - } elsif ($op eq "VECTOUT") { - $res = "A = $a\nB = $b\n"; - } elsif (defined $b) { - $expr = "($op $a $b)"; - } elsif (defined $a) { - $expr = "($op $a)"; - } else { - $expr = "($op)"; - } - - $val[$e] = $expr if defined $e; - $reg{$d} = $val[$i] if defined $d; - $i++; -} -&flush; diff --git a/m1/perf/fakes/bsp b/m1/perf/fakes/bsp deleted file mode 120000 index 945c9b4..0000000 --- a/m1/perf/fakes/bsp +++ /dev/null @@ -1 +0,0 @@ -. \ No newline at end of file diff --git a/m1/perf/fakes/milkymist_pfpu.h b/m1/perf/fakes/milkymist_pfpu.h deleted file mode 100644 index 1a89b60..0000000 --- a/m1/perf/fakes/milkymist_pfpu.h +++ /dev/null @@ -1 +0,0 @@ -#include "hw/pfpu.h" diff --git a/m1/perf/fakes/rtems.h b/m1/perf/fakes/rtems.h deleted file mode 100644 index e69de29..0000000 diff --git a/m1/perf/favg b/m1/perf/favg deleted file mode 100755 index 7495f15..0000000 --- a/m1/perf/favg +++ /dev/null @@ -1,19 +0,0 @@ -#!/usr/bin/perl -$sel = $ARGV[0]; -shift @ARGV; -for (@ARGV) { - $s = 0; - $n = 0; - open(FILE, $_) || die "$_: $!"; - while () { - $c = 0 if //; - if (/]*>([0-9.]+)/ ) { - $c++; - next unless $c == $sel; - $s += $1; - $n++; - } - } - close FILE; - print "$s/$n = ", $s/$n, "\n"; -} diff --git a/m1/perf/main.c b/m1/perf/main.c deleted file mode 100644 index 105ddcd..0000000 --- a/m1/perf/main.c +++ /dev/null @@ -1,61 +0,0 @@ -#include -#include - -#include "compiler.h" - - -#define BUF_SIZE 1000000 - - -static void report(const char *s) -{ - fprintf(stderr, "%s\n", s); -} - - -static void usage(const char *name) -{ - fprintf(stderr, "usage: %s patch-file [loops]\n", name); - exit(1); -} - - -int main(int argc, char **argv) -{ - char buf[BUF_SIZE]; - const char *name; - FILE *file; - size_t got; - int loops = 1; - int i; - - switch (argc) { - case 2: - break; - case 3: - loops = atoi(argv[2]); - break; - default: - usage(*argv); - } - - name = argv[1]; - file = fopen(name, "r"); - if (!file) { - perror(name); - exit(1); - } - got = fread(buf, 1, sizeof(buf)-1, file); - if (got < 0) { - perror(name); - exit(1); - } - buf[got] = 0; - fclose(file); - - for (i = 0; i != loops; i++) - if (!patch_compile(buf, report)) - return 1; - - return 0; -} diff --git a/m1/perf/runs b/m1/perf/runs deleted file mode 100755 index ac626e0..0000000 --- a/m1/perf/runs +++ /dev/null @@ -1,65 +0,0 @@ -#!/bin/sh -x - - -sanitize() -{ - basename "$1" .fnp | tr ' ' _ | tr -cd 'A-Za-z0-9_-' | tr -s _ _ -} - - -usage() -{ -cat <&2 -usage: $0 [-e|-p] [-n [-o] [-s]] dir - - -e generate the calculated expression (default: just dump debug output) - -p profile 10000 runs (default: just dump debug output) - -n use "new" scheduler - -o enable LCPF optimizer - -s enable register pressure statistics -EOF - -} - - -M1=`make path` - -extra= -sched= -profile=false -evaluate=false - -while [ "${1#-}" != "$1" ]; do - case "$1" in - -e) evaluate=true;; - -p) profile=true;; - -n) sched=SCHED=sched.o;; - -o) extra="$extra -DLCPF";; - -s) extra="$extra -DREG_STATS";; - *) usage;; - esac - shift -done - -[ -z "$1" -o "$2" ] && usage -if [ ! -d "$1" ]; then - echo "$1: directory not found" 1>&2 - exit 1 -fi - -$profile || extra="$extra -DCOMP_DEBUG" - -make spotless -make CFLAGS_EXTRA="$extra" $sched all - -for n in $M1/flickernoise/patches/*/*.fnp; do - s=`sanitize "$n"` - if $profile; then - ./main "$n" 10000 || exit - gprof main >"$1"/$s - elif $evaluate; then - ./main "$n" | ./eval.pl >"$1"/$s || exit - else - ./main "$n" >"$1"/$s || exit - fi -done diff --git a/m1/perf/sched.c b/m1/perf/sched.c deleted file mode 100644 index d2ff819..0000000 --- a/m1/perf/sched.c +++ /dev/null @@ -1,655 +0,0 @@ -/* - * lnfpus.c - O(n) ... O(n^2) scheduler - * - * Copyright (C) 2011 Werner Almesberger - * - * Based on gfpus.c - * Copyright (C) 2007, 2008, 2009, 2010 Sebastien Bourdeauducq - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, version 3 of the License. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ - -#include -#include -#include -#include - -#include -#include -#include -#include - -#include - - -//#define REG_STATS -#define LCPF /* longest critical path first */ - -//#define DEBUG -#ifdef DEBUG -#define Dprintf printf -#else -#define Dprintf(...) -#endif - - -#define MAX_LATENCY 8 /* maximum latency; okay to make this bigger */ - -#define CODE(n) (((pfpu_instruction *) (code+(n)))->i) - - -struct list { - struct list *next, *prev; -}; - - -struct insn { - struct list more; /* more insns on same schedule */ - struct fpvm_instruction *vm_insn; - struct data_ref { - struct list more; /* more refs sharing the data */ - struct insn *insn; /* insn this is part of */ - struct insn *dep; /* insn we depend on */ - } opa, opb, dest, cond; - int arity; - int latency; - int rmw; /* non-zero if instruction is read-modify-write */ - int unresolved; /* number of data refs we need before we can sched */ - int earliest; /* earliest cycle dependencies seen so far are met */ - struct list dependants; /* list of dependencies (constant) */ - int num_dependants; /* number of dependencies */ - struct insn *next_setter; /* next setter of the same register */ -#ifdef LCPF - int distance; /* minimum cycles on this path until the end */ -#endif -}; - - -struct vm_reg { - struct insn *setter; /* instruction setting it; NULL if none */ - struct insn *first_setter; /* first setter */ - int pfpu_reg; /* underlying PFPU register */ - int refs; /* usage count */ -}; - - -struct pfpu_reg { - struct list more; /* list of unallocated PFPU registers */ - int vm_reg; /* corresponding FPVM register if allocated */ - int used; /* used somewhere in the program */ -}; - - -static struct sched_ctx { - struct fpvm_fragment *frag; - struct insn insns[FPVM_MAXCODELEN]; - struct vm_reg *regs; /* dynamically allocated */ - struct pfpu_reg pfpu_regs[PFPU_REG_COUNT]; - struct list unallocated; /* unallocated registers */ - struct list unscheduled; /* unscheduled insns */ - struct list waiting; /* insns waiting to be scheduled */ - struct list ready[PFPU_PROGSIZE]; /* insns ready at nth cycle */ - int cycle; /* the current cycle */ -#ifdef REG_STATS - int max_regs, curr_regs; /* allocation statistics */ -#endif -} *sc; - - -/* ----- Register initialization ------------------------------------------- */ - - -/* - * Straight from gfpus.c, only with some whitespace changes. - */ - -static void get_registers(struct fpvm_fragment *fragment, - unsigned int *registers) -{ - int i; - union { - float f; - unsigned int n; - } fconv; - - for(i = 0; i < fragment->nbindings; i++) - if(fragment->bindings[i].isvar) - registers[i] = 0; - else { - fconv.f = fragment->bindings[i].b.c; - registers[i] = fconv.n; - } - for(; i < PFPU_REG_COUNT; i++) - registers[i] = 0; -} - - -/* ----- Doubly-linked list ------------------------------------------------ */ - - -/* - * Use the naming conventions of include/linux/list.h - */ - - -#ifdef DEBUG - -static void list_poison(struct list *list) -{ - list->next = list->prev = NULL; -} - -#else /* DEBUG */ - -#define list_poison(list) - -#endif /* !DEBUG */ - - -static void list_init(struct list *list) -{ - list->next = list->prev = list; -} - - -static void list_del(struct list *item) -{ - assert(item->next != item); - item->prev->next = item->next; - item->next->prev = item->prev; - list_poison(item); -} - - -static void *list_pop(struct list *list) -{ - struct list *first; - - first = list->next; - if(first == list) - return NULL; - list_del(first); - return first; -} - - -static void list_add_tail(struct list *list, struct list *item) -{ - item->next = list; - item->prev = list->prev; - list->prev->next = item; - list->prev = item; -} - - -static void list_add(struct list *list, struct list *item) -{ - item->next = list->next; - item->prev = list; - list->next->prev = item; - list->next = item; -} - - -static void list_concat(struct list *a, struct list *b) -{ - if(b->next != b) { - a->prev->next = b->next; - b->next->prev = a->prev; - b->prev->next = a; - a->prev = b->prev; - } - list_poison(b); -} - - -/* - * Do not delete elements from the list while traversing it with foreach ! - */ - -#define foreach(var, head) \ - for(var = (void *) ((head))->next; \ - (var) != (void *) (head); \ - var = (void *) ((struct list *) (var))->next) - - -/* ----- Register management ----------------------------------------------- */ - - -static int vm_reg2idx(int reg) -{ - return reg >= 0 ? reg : sc->frag->nbindings-reg; -} - - -static int alloc_reg(struct insn *setter) -{ - struct pfpu_reg *reg; - int vm_reg, pfpu_reg, vm_idx; - - vm_reg = setter->vm_insn->dest; - if(vm_reg >= 0) { - pfpu_reg = vm_reg; - sc->pfpu_regs[vm_reg].vm_reg = vm_reg; /* @@@ global init */ - } else { - reg = list_pop(&sc->unallocated); - if(!reg) - return -1; - - #ifdef REG_STATS - sc->curr_regs++; - if(sc->curr_regs > sc->max_regs) - sc->max_regs = sc->curr_regs; - #endif - - reg->vm_reg = vm_reg; - pfpu_reg = reg-sc->pfpu_regs; - } - - Dprintf(" alloc reg %d -> %d\n", vm_reg, pfpu_reg); - - vm_idx = vm_reg2idx(vm_reg); - sc->regs[vm_idx].setter = setter; - sc->regs[vm_idx].pfpu_reg = pfpu_reg; - sc->regs[vm_idx].refs = setter->num_dependants+1; - - return pfpu_reg; -} - - -static void put_reg(int vm_reg) -{ - int vm_idx; - struct vm_reg *reg; - - if(vm_reg >= 0) - return; - - vm_idx = vm_reg2idx(vm_reg); - reg = sc->regs+vm_idx; - - assert(reg->refs); - if(--reg->refs) - return; - - Dprintf(" free reg %d\n", reg->pfpu_reg); - -#ifdef REG_STATS - assert(sc->curr_regs); - sc->curr_regs--; -#endif - - /* - * Prepend so that register numbers stay small and bugs reveal - * themselves more rapidly. - */ - list_add(&sc->unallocated, &sc->pfpu_regs[reg->pfpu_reg].more); - - /* clear it for style only */ - reg->setter = NULL; - reg->pfpu_reg = 0; -} - - -static int lookup_pfpu_reg(int vm_reg) -{ - return vm_reg >= 0 ? vm_reg : sc->regs[vm_reg2idx(vm_reg)].pfpu_reg; -} - - -static void mark(int vm_reg) -{ - if(vm_reg > 0) - sc->pfpu_regs[vm_reg].used = 1; -} - - -static int init_registers(struct fpvm_fragment *frag, - unsigned int *registers) -{ - int i; - - get_registers(frag, registers); - - for(i = 0; i != frag->ninstructions; i++) { - mark(frag->code[i].opa); - mark(frag->code[i].opb); - mark(frag->code[i].dest); - } - - list_init(&sc->unallocated); - for(i = PFPU_SPREG_COUNT; i != PFPU_REG_COUNT; i++) - if(!sc->pfpu_regs[i].used) - list_add_tail(&sc->unallocated, &sc->pfpu_regs[i].more); - - return 0; -} - - -/* ----- Instruction scheduler --------------------------------------------- */ - - -static struct vm_reg *add_data_ref(struct insn *insn, struct data_ref *ref, - int reg_num) -{ - struct vm_reg *reg; - - reg = sc->regs+vm_reg2idx(reg_num); - ref->insn = insn; - ref->dep = reg->setter; - if(insn->vm_insn->dest == reg_num) - insn->rmw = 1; - if(!ref->dep) - reg->refs++; - else { - list_add_tail(&ref->dep->dependants, &ref->more); - ref->dep->num_dependants++; - insn->unresolved++; - - Dprintf("insn %lu: reg %d setter %lu unresolved %d\n", - insn-sc->insns, reg_num, reg->setter-sc->insns, - insn->unresolved); - } - return reg; -} - - -static void init_scheduler(struct fpvm_fragment *frag) -{ - int i; - struct insn *insn; - struct vm_reg *reg; - struct data_ref *ref; - - list_init(&sc->unscheduled); - list_init(&sc->waiting); - for(i = 0; i != PFPU_PROGSIZE; i++) - list_init(sc->ready+i); - - for(i = 0; i != frag->ninstructions; i++) { - insn = sc->insns+i; - insn->vm_insn = frag->code+i; - insn->arity = fpvm_get_arity(frag->code[i].opcode); - insn->latency = pfpu_get_latency(frag->code[i].opcode); - list_init(&insn->dependants); - switch (insn->arity) { - case 3: - add_data_ref(insn, &insn->cond, FPVM_REG_IFB); - /* fall through */ - case 2: - add_data_ref(insn, &insn->opb, frag->code[i].opb); - /* fall through */ - case 1: - add_data_ref(insn, &insn->opa, frag->code[i].opa); - /* fall through */ - case 0: - reg = sc->regs+vm_reg2idx(frag->code[i].dest); - if(reg->setter) { - reg->setter->next_setter = insn; - foreach(ref, ®->setter->dependants) - if(ref->insn != insn) - insn->unresolved++; - if(!insn->rmw) - insn->unresolved++; - } else { - if(!insn->rmw) - insn->unresolved += reg->refs; - reg->first_setter = insn; - } - reg->setter = insn; - break; - default: - abort(); - } - if(insn->unresolved) - list_add_tail(&sc->unscheduled, &insn->more); - else - list_add_tail(&sc->ready[0], &insn->more); - } - -#ifdef LCPF - struct data_ref *dep; - - for(i = frag->ninstructions-1; i >= 0; i--) { - insn = sc->insns+i; -#if 0 - /* - * Theoretically, we should consider the distance through - * write-write dependencies too. In practice, this would - * mainly matter if we had operations whose result is ignored. - * This is a degenerate case that's probably not worth - * spending much effort on. - */ - if(insn->next_setter) { - insn->distance = - insn->next_setter->distance-insn->distance+1; - if(insn->distance < 1) - insn->distance = 1; - } -#endif - foreach(dep, &insn->dependants) - if(dep->insn->distance > insn->distance) - insn->distance = dep->insn->distance; - /* - * While it would be more correct to add one for the cycle - * following the write cycle, this also has the effect of - * producing slighly worse results on the example set of - * patches. Let's thus keep this "bug" for now. - */ -// insn->distance += insn->latency+1; - insn->distance += insn->latency; - } -#endif -} - - -static void unblock(struct insn *insn) -{ - int slot; - - assert(insn->unresolved); - if(--insn->unresolved) - return; - Dprintf(" unblocked %lu -> %u\n", insn-sc->insns, insn->earliest); - list_del(&insn->more); - slot = insn->earliest; - if(slot <= sc->cycle) - slot = sc->cycle+1; - list_add_tail(sc->ready+slot, &insn->more); -} - - -static void put_reg_by_ref(struct data_ref *ref, int vm_reg) -{ - struct insn *setter = ref->dep; - struct vm_reg *reg; - - if(setter) { - put_reg(setter->vm_insn->dest); - if(setter->next_setter && setter->next_setter != ref->insn) - unblock(setter->next_setter); - } else { - reg = sc->regs+vm_reg2idx(vm_reg); - if(reg->first_setter && !reg->first_setter->rmw) - unblock(reg->first_setter); - } -} - - -static void unblock_after(struct insn *insn, int cycle) -{ - if(insn->earliest <= cycle) - insn->earliest = cycle+1; - unblock(insn); -} - - -static int issue(struct insn *insn, unsigned *code) -{ - struct data_ref *ref; - int end, reg; - - end = sc->cycle+insn->latency; - - Dprintf("cycle %d: insn %lu L %d (A %d B %d)\n", sc->cycle, - insn-sc->insns, insn->latency, insn->vm_insn->opa, - insn->vm_insn->opb); - - switch (insn->arity) { - case 3: - put_reg_by_ref(&insn->cond, FPVM_REG_IFB); - /* fall through */ - case 2: - CODE(sc->cycle).opb = lookup_pfpu_reg(insn->vm_insn->opb); - put_reg_by_ref(&insn->opb, insn->vm_insn->opb); - /* fall through */ - case 1: - CODE(sc->cycle).opa = lookup_pfpu_reg(insn->vm_insn->opa); - put_reg_by_ref(&insn->opa, insn->vm_insn->opa); - break; - case 0: - break; - default: - abort(); - } - - reg = alloc_reg(insn); - if(reg < 0) - return -1; - CODE(end).dest = reg; - CODE(sc->cycle).opcode = fpvm_to_pfpu(insn->vm_insn->opcode); - - foreach(ref, &insn->dependants) - unblock_after(ref->insn, end); - if(insn->next_setter && !insn->next_setter->rmw) - unblock_after(insn->next_setter, - end-insn->next_setter->latency); - - return 0; -} - - -#ifdef DEBUG -static int count(const struct list *list) -{ - int n = 0; - const struct list *p; - - for(p = list->next; p != list; p = p->next) - n++; - return n; -} -#endif - - -static int schedule(unsigned int *code) -{ - int remaining; - int i, last, end; - struct insn *insn; - struct insn *best; - - remaining = sc->frag->ninstructions; - for(i = 0; remaining; i++) { - if(i == PFPU_PROGSIZE) - return -1; - - sc->cycle = i; - Dprintf("@%d --- remaining %d, waiting %d + ready %d\n", - i, remaining, count(&sc->waiting), count(&sc->ready[i])); - - list_concat(&sc->waiting, sc->ready+i); - best = NULL; - foreach(insn, &sc->waiting) { - end = i+insn->latency; - if(end >= PFPU_PROGSIZE) - return -1; - if(!CODE(end).dest) { -#ifdef LCPF - if(!best || best->distance < insn->distance) - best = insn; -#else - best = insn; - break; -#endif - } - } - if(best) { - if(issue(best, code) < 0) - return -1; - list_del(&best->more); - remaining--; - } - if(CODE(i).dest) - put_reg(sc->pfpu_regs[CODE(i).dest].vm_reg); - } - - /* - * Add NOPs to cover unfinished instructions. - */ - last = i; - end = i+MAX_LATENCY; - if(end > PFPU_PROGSIZE) - end = PFPU_PROGSIZE; - while(i != end) { - if(CODE(i).dest) - last = i+1; - i++; - } - return last; -} - - -int gfpus_schedule(struct fpvm_fragment *frag, unsigned int *code, - unsigned int *reg) -{ - /* - * allocate context and registers on stack because standalone FN has no - * memory allocator - */ - struct sched_ctx sc_alloc; - struct vm_reg regs[frag->nbindings-frag->next_sur]; - pfpu_instruction vecout; - int res; - -printf("greetings %lu %lu\n", sizeof(*sc), sizeof(regs)); - sc = &sc_alloc; - memset(sc, 0, sizeof(*sc)); - sc->frag = frag; - sc->regs = regs; - memset(regs, 0, sizeof(regs)); - - if(init_registers(frag, reg) < 0) - return -1; - init_scheduler(frag); - - memset(code, 0, PFPU_PROGSIZE*sizeof(*code)); - res = schedule(code); - -#ifdef REG_STATS - printf("regs: %d/%d\n", sc->curr_regs, sc->max_regs); -#endif - - if(res < 0) - return res; - if(frag->vector_mode) - return res; - if(res == PFPU_PROGSIZE) - return -1; - - vecout.w = 0; - vecout.i.opcode = FPVM_OPCODE_VECTOUT; - code[res] = vecout.w; - - return res+1; -} diff --git a/m1/perf/tabulate b/m1/perf/tabulate deleted file mode 100755 index f051670..0000000 --- a/m1/perf/tabulate +++ /dev/null @@ -1,173 +0,0 @@ -#!/bin/sh - -M1=`make path` - - -time() -{ - sed '/^.*of \([0-9.]*\) seconds.*/s//\1/p;d' $2" -} - - -rank() -{ - op=$1 - txt=`$2 $3 $6` - a=`echo "$txt" | trim` - b=`$2 $4 $6 | trim` - c=`$2 $5 $6 | trim` - - if [ $a $op $b -a $a $op $c ]; then - col=$green - elif [ $b $op $a -a $c $op $a ]; then - col=$red - else - col=white - fi - echo "$txt$7" -} - - -red="#ffb0b0" -green="#a0ffa0" - -html=false -if [ $1 = -h ]; then - html=true - shift -fi - -if $html; then - cat < -Scheduler comparison - - - - - - - - - - - - - - - -EOF - for n in `ls -1 data/ref/out`; do - ref=`sum ref $n` - new=`sum new $n` - opt=`sum opt $n` - same=`eq $ref $new`/`eq $ref $opt`/`eq $new $opt` - - echo "" - rank -lt time ref new opt $n - rank -lt size ref new opt $n - rank -gt eff ref new opt $n % - rank -lt regs ref new opt $n - - echo "
OriginalNew (no optimizer)New (LCPF)EquivName
TimeSizeEffRegs TimeSizeEffRegs TimeSizeEffRegs 
" - rank -lt time new ref opt $n - rank -lt size new ref opt $n - rank -gt eff new ref opt $n % - rank -lt regs new ref opt $n - - echo "" - rank -lt time opt ref new $n - rank -lt size opt ref new $n - rank -gt eff opt ref new $n % - rank -lt regs opt ref new $n - - echo "" - if [ $same = Y/Y/Y ]; then - cfield $green $same - else - cfield $red $same - fi - cfield white "$n" - done - cat < - - -EOF - exit -fi - - -echo "Original New sched (no opt) New sched (LCPF) Equiv Name" -echo "Time Size Eff Regs Time Size Eff Regs Time Size Eff Regs" - -tref=0 -tnew=0 -topt=0 - -for n in `ls -1 data/ref/out`; do - ref=`sum ref $n` - new=`sum new $n` - opt=`sum opt $n` - printf "%5.1f %4d%3d%% %4d %5.1f %4d%3d%% %4d %5.1f %4d%3d%% %4d %s " \ - `time ref $n` `size ref $n` `eff ref $n` `regs ref $n` \ - `time new $n` `size new $n` `eff new $n` `regs new $n` \ - `time opt $n` `size opt $n` `eff opt $n` `regs opt $n` \ - `eq $ref $new`/`eq $ref $opt`/`eq $new $opt` - echo $n - tref="$tref `time ref $n` +" - tnew="$tnew `time new $n` +" - topt="$topt `time opt $n` +" -done - -printf "Original time: %9.1f s\n" `dc -e "$tref p"` -printf "New (unopt) time: %6.1f s\n" `dc -e "$tnew p"` -printf "New (opt) time: %8.1f s\n" `dc -e "$topt p"` diff --git a/m1/perf/try b/m1/perf/try deleted file mode 100755 index 37b5334..0000000 --- a/m1/perf/try +++ /dev/null @@ -1,6 +0,0 @@ -#!/bin/sh -x - -M1=`make path` - -make CFLAGS_EXTRA=-DCOMP_DEBUG CFLAGS_PROF= SCHED=sched.o -gdb --args ./main $M1/flickernoise/patches/*/*Godhead*.fnp