diff --git a/m1/perf/Makefile b/m1/perf/Makefile index 20abdc7..5a36cdc 100644 --- a/m1/perf/Makefile +++ b/m1/perf/Makefile @@ -3,7 +3,9 @@ COMPILER_DIR=$(M1)/flickernoise/src M1SWINC_DIR=$(M1)/milkymist/software/include M1SWLIB_DIR=$(M1)/milkymist/software/libfpvm -CFLAGS_COMMON=-Wall -g -pg # -DCOMP_DEBUG +#CFLAGS_EXTRA=-DCOMP_DEBUG +CFLAGS_EXTRA= +CFLAGS_COMMON=-Wall -g -pg $(CFLAGS_EXTRA) CFLAGS=$(CFLAGS_COMMON) \ -I$(COMPILER_DIR) \ -Ifakes -I$(M1SWINC_DIR) @@ -15,6 +17,8 @@ COMPILER_O = $(COMPILER_DIR)/compiler.o LIBFPVM_A = $(M1SWLIB_DIR)/libfpvm.a OBJS = main.o $(COMPILER_O) +.PHONY: all clean path + all: main main: $(OBJS) $(LIBFPVM_A) @@ -33,3 +37,6 @@ clean: $(MAKE) -C $(M1SWLIB_DIR) clean $(MAKE) -C $(COMPILER_DIR) clean rm -f $(OBJS) + +path: + @echo $(M1) diff --git a/m1/perf/eval.pl b/m1/perf/eval.pl new file mode 100755 index 0000000..6c80b02 --- /dev/null +++ b/m1/perf/eval.pl @@ -0,0 +1,37 @@ +#!/usr/bin/perl + +#while (<>) { +# last if /per-vertex PFPU fragment/; +#} + +$i = 0; +while (<>) { + next unless + /^(\d+):\s+(\S+)\s+(R\d+)?(,(R\d+))?.*?(->\s+(R\d+))?\s*$/; + # 1 2 3 4 5 6 7 + ($c, $op, $a, $b, $d) = ($1, $2, $3, $5, $7); + undef $e; + $e = $1 if /E=(\d+)>/; + die if $c != $i; + + $a = $reg{$a} if defined $reg{$a}; + $b = $reg{$b} if defined $reg{$b}; + + if ($op eq "IF") { + $expr = "(IF $reg{002} $a $b)"; + } elsif ($op eq "VECTOUT") { + $res = "$a\n$b\n"; + } elsif (defined $b) { + $expr = "($op $a $b)"; + } elsif (defined $a) { + $expr = "($op $a)"; + } else { + $expr = "($op)"; + } + + $val[$e] = $expr if defined $e; + $reg{$d} = $val[$i] if defined $d; + $i++; +} + +print $res; diff --git a/m1/perf/sched.c b/m1/perf/sched.c index 0123e9a..82eaeca 100644 --- a/m1/perf/sched.c +++ b/m1/perf/sched.c @@ -31,10 +31,12 @@ #include +#define Dprintf(...) + + #define MAX_LATENCY 8 /* maximum latency; okay to make this bigger */ - -#define Dprintf(...) +#define FIELD(w) (((pfpu_instruction *) &(w))->i) struct list { @@ -91,7 +93,7 @@ static void get_registers(struct fpvm_fragment *fragment, /* - * Use naming conventions of include/linux/list.h + * Use the naming conventions of include/linux/list.h */ static void list_init(struct list *list) @@ -168,7 +170,12 @@ static struct vm_reg { int refs; /* usage count */ } *regs; -static struct list pfpu_regs[PFPU_REG_COUNT]; +static struct pfpu_reg { + struct list more; /* list of unallocated PFPU registers */ + int vm_reg; /* corresponding FPVM register if allocated */ + int used; /* used somewhere in the program */ +} pfpu_regs[PFPU_REG_COUNT]; + static struct list unallocated; /* unallocated registers */ static int nbindings; /* "public" bindings */ @@ -181,7 +188,7 @@ static int reg2idx(int reg) static int alloc_reg(struct insn *setter) { - struct list *reg; + struct pfpu_reg *reg; int vm_reg, pfpu_reg, vm_idx; vm_reg = setter->vm_insn->dest; @@ -190,6 +197,7 @@ static int alloc_reg(struct insn *setter) reg = list_pop(&unallocated); if (!reg) abort(); + reg->vm_reg = vm_reg; pfpu_reg = reg-pfpu_regs; Dprintf(" alloc reg %d -> %d\n", vm_reg, pfpu_reg); vm_idx = reg2idx(vm_reg); @@ -200,14 +208,10 @@ Dprintf(" alloc reg %d -> %d\n", vm_reg, pfpu_reg); } -static void put_reg(struct insn *setter) +static void put_reg(int vm_reg) { - int vm_reg, vm_idx; + int vm_idx; - if (!setter) - return; - - vm_reg = setter->vm_insn->dest; if (vm_reg >= 0) return; @@ -220,7 +224,7 @@ Dprintf(" free reg %d\n", regs[vm_idx].pfpu_reg); * Prepend so that register numbers stay small and bugs reveal * themselves more rapidly. */ - list_add(&unallocated, pfpu_regs+regs[vm_idx].pfpu_reg); + list_add(&unallocated, &pfpu_regs[regs[vm_idx].pfpu_reg].more); /* clear it for style only */ regs[vm_idx].setter = NULL; @@ -228,12 +232,26 @@ Dprintf(" free reg %d\n", regs[vm_idx].pfpu_reg); } +static void put_reg_by_setter(struct insn *setter) +{ + if (setter) + put_reg(setter->vm_insn->dest); +} + + static int lookup_pfpu_reg(int vm_reg) { return vm_reg >= 0 ? vm_reg : regs[reg2idx(vm_reg)].pfpu_reg; } +static void mark(int vm_reg) +{ + if (vm_reg > 0) + pfpu_regs[vm_reg].used = 1; +} + + static void init_registers(struct fpvm_fragment *fragment, unsigned int *registers) { @@ -247,14 +265,17 @@ static void init_registers(struct fpvm_fragment *fragment, regs = malloc(regs_size); memset(regs, 0, regs_size); - list_init(&unallocated); - for (i = fragment->nbindings; i != PFPU_REG_COUNT; i++) - list_add_tail(&unallocated, pfpu_regs+i); + memset(pfpu_regs, 0, sizeof(pfpu_regs)); + for (i = 0; i != fragment->ninstructions; i++) { + mark(fragment->code[i].opa); + mark(fragment->code[i].opb); + mark(fragment->code[i].dest); + } -/* - * @@@ the rules are more complex, see use of dont_touch in - * init_scheduler_state - */ + list_init(&unallocated); + for (i = PFPU_SPREG_COUNT; i != PFPU_REG_COUNT; i++) + if (!pfpu_regs[i].used) + list_add_tail(&unallocated, &pfpu_regs[i].more); } @@ -264,9 +285,6 @@ static void init_registers(struct fpvm_fragment *fragment, static struct list unscheduled; /* unscheduled insns */ static struct list waiting; /* insns waiting to be scheduled */ static struct list ready[PFPU_PROGSIZE]; /* insns ready at nth cycle */ -static struct insn *exits[PFPU_PROGSIZE+MAX_LATENCY]; - /* insn writing at nth cycle */ -static struct insn dummy_insn; /* dummy, to signal occupancy */ static struct vm_reg *add_data_ref(struct insn *insn, struct data_ref *ref, @@ -335,29 +353,15 @@ catch = 1; list_add_tail(&unscheduled, &insn->more); else list_add_tail(&ready[0], &insn->more); - } - - /* - * We add a few dummy instructions at the end so that we don't need to - * check array boundaries for the unlikely case of overrunning the - * schedule. - */ - for (i = 0; i != PFPU_PROGSIZE; i++) - exits[i] = NULL; - for (; i != PFPU_PROGSIZE+MAX_LATENCY; i++) - exits[i] = &dummy_insn; } -static unsigned issue(struct insn *insn, int cycle) +static void issue(struct insn *insn, int cycle, unsigned *code) { - pfpu_instruction code; struct data_ref *ref; int end; -int nada = 0; end = cycle+insn->latency; - exits[end] = insn; Dprintf("cycle %d: insn %lu L %d (A %d B %d)\n", cycle, insn-insns, insn->latency, insn->vm_insn->opa, insn->vm_insn->opb); @@ -365,12 +369,12 @@ Dprintf("cycle %d: insn %lu L %d (A %d B %d)\n", case 3: /* fall through */ case 2: - code.i.opb = lookup_pfpu_reg(insn->vm_insn->opb); - put_reg(insn->opb.dep); + FIELD(code[cycle]).opb = lookup_pfpu_reg(insn->vm_insn->opb); + put_reg_by_setter(insn->opb.dep); /* fall through */ case 1: - code.i.opa = lookup_pfpu_reg(insn->vm_insn->opa); - put_reg(insn->opa.dep); + FIELD(code[cycle]).opa = lookup_pfpu_reg(insn->vm_insn->opa); + put_reg_by_setter(insn->opa.dep); break; case 0: break; @@ -378,23 +382,19 @@ Dprintf("cycle %d: insn %lu L %d (A %d B %d)\n", abort(); } - code.i.dest = alloc_reg(insn); - code.i.opcode = fpvm_to_pfpu(insn->vm_insn->opcode); + FIELD(code[end]).dest = alloc_reg(insn); + FIELD(code[cycle]).opcode = fpvm_to_pfpu(insn->vm_insn->opcode); foreach (ref, &insn->dependants) { if (ref->insn->earliest <= end) ref->insn->earliest = end+1; if (!--ref->insn->unresolved) { Dprintf(" unlocked %lu -> %u\n", ref->insn-insns, ref->insn->earliest); -nada = 0; list_del(&ref->insn->more); list_add_tail(ready+ref->insn->earliest, &ref->insn->more); } } -if (nada && catch) *(volatile int *) 0 = 1; - - return code.w; } @@ -423,15 +423,19 @@ Dprintf("@%d --- remaining %d, waiting %d + ready %d = ", i, remaining, count(&waiting), count(&ready[i])); list_concat(&waiting, &ready[i]); Dprintf("%d\n", count(&waiting)); - foreach (insn, &waiting) - if (!exits[i+insn->latency]) { - code[i] = issue(insn, i); + foreach (insn, &waiting) { + end = i+insn->latency; + if (end >= PFPU_PROGSIZE) + return -1; + if (!FIELD(code[end]).dest) { + issue(insn, i, code); list_del(&insn->more); remaining--; break; } - if (exits[i]) - put_reg(exits[i]); + } + if (FIELD(code[i]).dest) + put_reg(pfpu_regs[FIELD(code[i]).dest].vm_reg); } /* @@ -442,7 +446,7 @@ Dprintf("%d\n", count(&waiting)); if (end > PFPU_PROGSIZE) end = PFPU_PROGSIZE; while (i != end) { - if (exits[i]) + if (FIELD(code[i]).dest) last = i+1; /* @@@ ? */ i++; }