diff --git a/m1/perf/Makefile b/m1/perf/Makefile index 5a36cdc..8c85070 100644 --- a/m1/perf/Makefile +++ b/m1/perf/Makefile @@ -36,6 +36,7 @@ $(LIBFPVM_A): clean: $(MAKE) -C $(M1SWLIB_DIR) clean $(MAKE) -C $(COMPILER_DIR) clean + rm -f $(M1SWLIB_DIR)/sched.o rm -f $(OBJS) path: diff --git a/m1/perf/TODO b/m1/perf/TODO index 5935d5e..aeff81a 100644 --- a/m1/perf/TODO +++ b/m1/perf/TODO @@ -1,8 +1,9 @@ Done: - dynamically allocate scheduler context +- see if preferring critical path can improve code efficiency (YES !) Pending: -- see if preferring critical path can improve code efficiency +- see if dynamically adjusting the critical path leads to further improvements - test IF - run result comparison against full set of patches - check if result comparison actually compares meaningful data diff --git a/m1/perf/sched.c b/m1/perf/sched.c index 49c6ed6..ec1ebda 100644 --- a/m1/perf/sched.c +++ b/m1/perf/sched.c @@ -31,7 +31,8 @@ #include -#define REG_STATS +//#define REG_STATS +//#define LCPF /* longest critical path first */ #ifdef DEBUG #define Dprintf printf @@ -64,6 +65,9 @@ struct insn { int earliest; /* earliest cycle dependencies seen so far are met */ struct list dependants; /* list of dependencies (constant) */ int num_dependants; /* number of unresolved dependencies */ +#ifdef LCPF + int distance; /* minimum cycles on this path until the end */ +#endif }; @@ -376,6 +380,18 @@ static void init_scheduler(struct fpvm_fragment *frag) else list_add_tail(&sc->ready[0], &insn->more); } + +#ifdef LCPF + struct data_ref *dep; + + for (i = frag->ninstructions-1; i >= 0; i--) { + insn = sc->insns+i; + foreach (dep, &insn->dependants) + if (dep->insn->distance > insn->distance) + insn->distance = dep->insn->distance; + insn->distance += insn->latency; + } +#endif } @@ -441,6 +457,9 @@ static int schedule(unsigned int *code) int remaining; int i, last, end; struct insn *insn; +#ifdef LCPF + struct insn *best; +#endif remaining = sc->frag->ninstructions; for (i = 0; remaining; i++) { @@ -451,17 +470,32 @@ static int schedule(unsigned int *code) i, remaining, count(&sc->waiting), count(&sc->ready[i])); list_concat(&sc->waiting, &sc->ready[i]); +#ifdef LCPF + best = NULL; +#endif foreach (insn, &sc->waiting) { end = i+insn->latency; if (end >= PFPU_PROGSIZE) return -1; if (!FIELD(code[end]).dest) { +#ifdef LCPF + if (!best || best->distance < insn->distance) + best = insn; +#else issue(insn, i, code); list_del(&insn->more); remaining--; break; +#endif } } +#ifdef LCPF + if (best) { + issue(best, i, code); + list_del(&best->more); + remaining--; + } +#endif if (FIELD(code[i]).dest) put_reg(sc->pfpu_regs[FIELD(code[i]).dest].vm_reg); }