From b4abaffa5cd51c4b0d73c2c79a0095330dfa7b4e Mon Sep 17 00:00:00 2001 From: Werner Almesberger Date: Sun, 18 Sep 2011 06:12:00 -0300 Subject: [PATCH] m1/perf/: allocate scheduler context (about 0.5 MB) dynamically --- m1/perf/TODO | 5 ++ m1/perf/sched.c | 161 +++++++++++++++++++++++++++--------------------- 2 files changed, 95 insertions(+), 71 deletions(-) diff --git a/m1/perf/TODO b/m1/perf/TODO index f4f212e..5935d5e 100644 --- a/m1/perf/TODO +++ b/m1/perf/TODO @@ -1,3 +1,7 @@ +Done: +- dynamically allocate scheduler context + +Pending: - see if preferring critical path can improve code efficiency - test IF - run result comparison against full set of patches @@ -6,3 +10,4 @@ - see what optimization changes (may interfere with profiling) - build into Flickernoise (some things may need adapting, e.g., abort()) - review code, see if things can be simplified +- see if valgrind can do something useful diff --git a/m1/perf/sched.c b/m1/perf/sched.c index 0732b8f..8d09e0c 100644 --- a/m1/perf/sched.c +++ b/m1/perf/sched.c @@ -48,7 +48,7 @@ struct list { }; -static struct insn { +struct insn { struct list more; /* more insns on same schedule */ struct fpvm_instruction *vm_insn; struct data_ref { @@ -62,7 +62,33 @@ static struct insn { int earliest; /* earliest cycle dependencies seen so far are met */ struct list dependants; /* list of dependencies (constant) */ int num_dependants; /* number of unresolved dependencies */ -} insns[FPVM_MAXCODELEN]; +}; + + +struct vm_reg { + struct insn *setter; /* instruction setting it; NULL if none */ + int pfpu_reg; /* underlying PFPU register */ + int refs; /* usage count */ +}; + + +struct pfpu_reg { + struct list more; /* list of unallocated PFPU registers */ + int vm_reg; /* corresponding FPVM register if allocated */ + int used; /* used somewhere in the program */ +}; + + +static struct sched_ctx { + struct fpvm_fragment *frag; + struct insn insns[FPVM_MAXCODELEN]; + struct vm_reg *regs; /* dynamically allocated */ + struct pfpu_reg pfpu_regs[PFPU_REG_COUNT]; + struct list unallocated; /* unallocated registers */ + struct list unscheduled; /* unscheduled insns */ + struct list waiting; /* insns waiting to be scheduled */ + struct list ready[PFPU_PROGSIZE]; /* insns ready at nth cycle */ +} *sc; /* ----- Register initialization ------------------------------------------- */ @@ -168,25 +194,9 @@ static void list_concat(struct list *a, struct list *b) /* ----- Register management ----------------------------------------------- */ -static struct vm_reg { - struct insn *setter; /* instruction setting it; NULL if none */ - int pfpu_reg; /* underlying PFPU register */ - int refs; /* usage count */ -} *regs; - -static struct pfpu_reg { - struct list more; /* list of unallocated PFPU registers */ - int vm_reg; /* corresponding FPVM register if allocated */ - int used; /* used somewhere in the program */ -} pfpu_regs[PFPU_REG_COUNT]; - -static struct list unallocated; /* unallocated registers */ -static int nbindings; /* "public" bindings */ - - static int reg2idx(int reg) { - return reg >= 0 ? reg : nbindings-reg; + return reg >= 0 ? reg : sc->frag->nbindings-reg; } @@ -198,18 +208,19 @@ static int alloc_reg(struct insn *setter) vm_reg = setter->vm_insn->dest; if (vm_reg >= 0) return vm_reg; - reg = list_pop(&unallocated); + reg = list_pop(&sc->unallocated); if (!reg) abort(); reg->vm_reg = vm_reg; - pfpu_reg = reg-pfpu_regs; + pfpu_reg = reg-sc->pfpu_regs; Dprintf(" alloc reg %d -> %d\n", vm_reg, pfpu_reg); vm_idx = reg2idx(vm_reg); - regs[vm_idx].setter = setter; - regs[vm_idx].pfpu_reg = pfpu_reg; - regs[vm_idx].refs = setter->num_dependants+1; + sc->regs[vm_idx].setter = setter; + sc->regs[vm_idx].pfpu_reg = pfpu_reg; + sc->regs[vm_idx].refs = setter->num_dependants+1; + return pfpu_reg; } @@ -222,19 +233,21 @@ static void put_reg(int vm_reg) return; vm_idx = reg2idx(vm_reg); - if (--regs[vm_idx].refs) + if (--sc->regs[vm_idx].refs) return; -Dprintf(" free reg %d\n", regs[vm_idx].pfpu_reg); + Dprintf(" free reg %d\n", regs[vm_idx].pfpu_reg); + /* * Prepend so that register numbers stay small and bugs reveal * themselves more rapidly. */ - list_add(&unallocated, &pfpu_regs[regs[vm_idx].pfpu_reg].more); + list_add(&sc->unallocated, + &sc->pfpu_regs[sc->regs[vm_idx].pfpu_reg].more); /* clear it for style only */ - regs[vm_idx].setter = NULL; - regs[vm_idx].pfpu_reg = 0; + sc->regs[vm_idx].setter = NULL; + sc->regs[vm_idx].pfpu_reg = 0; } @@ -247,58 +260,51 @@ static void put_reg_by_setter(struct insn *setter) static int lookup_pfpu_reg(int vm_reg) { - return vm_reg >= 0 ? vm_reg : regs[reg2idx(vm_reg)].pfpu_reg; + return vm_reg >= 0 ? vm_reg : sc->regs[reg2idx(vm_reg)].pfpu_reg; } static void mark(int vm_reg) { if (vm_reg > 0) - pfpu_regs[vm_reg].used = 1; + sc->pfpu_regs[vm_reg].used = 1; } -static void init_registers(struct fpvm_fragment *fragment, +static void init_registers(struct fpvm_fragment *frag, unsigned int *registers) { size_t regs_size; int i; - get_registers(fragment, registers); - nbindings = fragment->nbindings; + get_registers(frag, registers); - regs_size = sizeof(struct vm_reg)*(nbindings-fragment->next_sur); - regs = malloc(regs_size); - memset(regs, 0, regs_size); + regs_size = sizeof(struct vm_reg)*(frag->nbindings-frag->next_sur); + sc->regs = malloc(regs_size); + memset(sc->regs, 0, regs_size); - memset(pfpu_regs, 0, sizeof(pfpu_regs)); - for (i = 0; i != fragment->ninstructions; i++) { - mark(fragment->code[i].opa); - mark(fragment->code[i].opb); - mark(fragment->code[i].dest); + for (i = 0; i != frag->ninstructions; i++) { + mark(frag->code[i].opa); + mark(frag->code[i].opb); + mark(frag->code[i].dest); } - list_init(&unallocated); + list_init(&sc->unallocated); for (i = PFPU_SPREG_COUNT; i != PFPU_REG_COUNT; i++) - if (!pfpu_regs[i].used) - list_add_tail(&unallocated, &pfpu_regs[i].more); + if (!sc->pfpu_regs[i].used) + list_add_tail(&sc->unallocated, &sc->pfpu_regs[i].more); } /* ----- Instruction scheduler --------------------------------------------- */ -static struct list unscheduled; /* unscheduled insns */ -static struct list waiting; /* insns waiting to be scheduled */ -static struct list ready[PFPU_PROGSIZE]; /* insns ready at nth cycle */ - - static struct vm_reg *add_data_ref(struct insn *insn, struct data_ref *ref, int reg_num) { struct vm_reg *reg; - reg = regs+reg2idx(reg_num); + reg = sc->regs+reg2idx(reg_num); ref->insn = insn; ref->dep = reg->setter; if (ref->dep) { @@ -307,7 +313,8 @@ static struct vm_reg *add_data_ref(struct insn *insn, struct data_ref *ref, insn->unresolved++; Dprintf("insn %lu: reg %d setter %lu unresolved %d\n", - insn-insns, reg_num, reg->setter-insns, insn->unresolved); + insn-sc->insns, reg_num, reg->setter-sc->insns, + insn->unresolved); } else { list_init(&ref->more); } @@ -320,14 +327,13 @@ static void init_scheduler(struct fpvm_fragment *frag) int i; struct insn *insn; - list_init(&unscheduled); - list_init(&waiting); + list_init(&sc->unscheduled); + list_init(&sc->waiting); for (i = 0; i != PFPU_PROGSIZE; i++) - list_init(&ready[i]); + list_init(&sc->ready[i]); for (i = 0; i != frag->ninstructions; i++) { - insn = insns+i; - memset(insn, 0, sizeof(struct insn)); + insn = sc->insns+i; insn->vm_insn = frag->code+i; insn->arity = fpvm_get_arity(frag->code[i].opcode); insn->latency = pfpu_get_latency(frag->code[i].opcode); @@ -350,9 +356,9 @@ static void init_scheduler(struct fpvm_fragment *frag) abort(); } if (insn->unresolved) - list_add_tail(&unscheduled, &insn->more); + list_add_tail(&sc->unscheduled, &insn->more); else - list_add_tail(&ready[0], &insn->more); + list_add_tail(&sc->ready[0], &insn->more); } } @@ -364,7 +370,8 @@ static void issue(struct insn *insn, int cycle, unsigned *code) end = cycle+insn->latency; Dprintf("cycle %d: insn %lu L %d (A %d B %d)\n", cycle, - insn-insns, insn->latency, insn->vm_insn->opa, insn->vm_insn->opb); + insn-sc->insns, insn->latency, insn->vm_insn->opa, + insn->vm_insn->opb); switch (insn->arity) { case 3: @@ -393,7 +400,7 @@ static void issue(struct insn *insn, int cycle, unsigned *code) Dprintf(" unlocked %lu -> %u\n", ref->insn-insns, ref->insn->earliest); list_del(&ref->insn->more); - list_add_tail(ready+ref->insn->earliest, + list_add_tail(sc->ready+ref->insn->earliest, &ref->insn->more); } } @@ -413,22 +420,22 @@ static int count(const struct list *list) #endif -static int schedule(struct fpvm_fragment *frag, unsigned int *code) +static int schedule(unsigned int *code) { int remaining; int i, last, end; struct insn *insn; - remaining = frag->ninstructions; + remaining = sc->frag->ninstructions; for (i = 0; remaining; i++) { if (i == PFPU_PROGSIZE) return -1; Dprintf("@%d --- remaining %d, waiting %d + ready %d\n", - i, remaining, count(&waiting), count(&ready[i])); + i, remaining, count(&sc->waiting), count(&sc->ready[i])); - list_concat(&waiting, &ready[i]); - foreach (insn, &waiting) { + list_concat(&sc->waiting, &sc->ready[i]); + foreach (insn, &sc->waiting) { end = i+insn->latency; if (end >= PFPU_PROGSIZE) return -1; @@ -440,7 +447,7 @@ static int schedule(struct fpvm_fragment *frag, unsigned int *code) } } if (FIELD(code[i]).dest) - put_reg(pfpu_regs[FIELD(code[i]).dest].vm_reg); + put_reg(sc->pfpu_regs[FIELD(code[i]).dest].vm_reg); } /* @@ -459,17 +466,29 @@ static int schedule(struct fpvm_fragment *frag, unsigned int *code) } +static void init_scheduler_context(struct fpvm_fragment *frag, + unsigned int *reg) +{ + sc = malloc(sizeof(*sc)); + memset(sc, 0, sizeof(*sc)); + + sc->frag = frag; + + init_registers(frag, reg); + init_scheduler(frag); +} + + int gfpus_schedule(struct fpvm_fragment *frag, unsigned int *code, unsigned int *reg) { pfpu_instruction vecout; int res; - init_registers(frag, reg); + init_scheduler_context(frag, reg); memset(code, 0, PFPU_PROGSIZE*sizeof(*code)); - init_scheduler(frag); - res = schedule(frag, code); - free(regs); + res = schedule(code); + free(sc->regs); if (res < 0) return res; if (frag->vector_mode)