From 8f82a0e8d4e23ab9f769e23e862936cff93fca6e Mon Sep 17 00:00:00 2001 From: Werner Almesberger Date: Wed, 21 Sep 2011 17:44:52 -0300 Subject: [PATCH] m1/perf/sched.c: revamped to handle static registers correctly as well --- m1/perf/sched.c | 116 +++++++++++++++++++++++++++++++++++------------- 1 file changed, 85 insertions(+), 31 deletions(-) diff --git a/m1/perf/sched.c b/m1/perf/sched.c index e9e5097..ed77d44 100644 --- a/m1/perf/sched.c +++ b/m1/perf/sched.c @@ -62,10 +62,12 @@ struct insn { } opa, opb, dest, cond; int arity; int latency; + int rmw; /* non-zero if instruction is read-modify-write */ int unresolved; /* number of data refs we need before we can sched */ int earliest; /* earliest cycle dependencies seen so far are met */ struct list dependants; /* list of dependencies (constant) */ int num_dependants; /* number of dependencies */ + struct insn *next_setter; /* next setter of the same register */ #ifdef LCPF int distance; /* minimum cycles on this path until the end */ #endif @@ -74,6 +76,7 @@ struct insn { struct vm_reg { struct insn *setter; /* instruction setting it; NULL if none */ + struct insn *first_setter; /* first setter */ int pfpu_reg; /* underlying PFPU register */ int refs; /* usage count */ }; @@ -95,6 +98,7 @@ static struct sched_ctx { struct list unscheduled; /* unscheduled insns */ struct list waiting; /* insns waiting to be scheduled */ struct list ready[PFPU_PROGSIZE]; /* insns ready at nth cycle */ + int cycle; /* the current cycle */ #ifdef REG_STATS int max_regs, curr_regs; /* allocation statistics */ #endif @@ -233,20 +237,23 @@ static int alloc_reg(struct insn *setter) int vm_reg, pfpu_reg, vm_idx; vm_reg = setter->vm_insn->dest; - if (vm_reg >= 0) - return vm_reg; - reg = list_pop(&sc->unallocated); - if (!reg) - abort(); + if (vm_reg >= 0) { + pfpu_reg = vm_reg; + sc->pfpu_regs[vm_reg].vm_reg = vm_reg; /* @@@ global init */ + } else { + reg = list_pop(&sc->unallocated); + if (!reg) + abort(); -#ifdef REG_STATS - sc->curr_regs++; - if (sc->curr_regs > sc->max_regs) - sc->max_regs = sc->curr_regs; -#endif + #ifdef REG_STATS + sc->curr_regs++; + if (sc->curr_regs > sc->max_regs) + sc->max_regs = sc->curr_regs; + #endif - reg->vm_reg = vm_reg; - pfpu_reg = reg-sc->pfpu_regs; + reg->vm_reg = vm_reg; + pfpu_reg = reg-sc->pfpu_regs; + } Dprintf(" alloc reg %d -> %d\n", vm_reg, pfpu_reg); @@ -291,10 +298,20 @@ static void put_reg(int vm_reg) } -static void put_reg_by_setter(struct insn *setter) +static void unblock(struct insn *insn); +static void put_reg_by_setter(struct insn *setter, int vm_reg) { - if (setter) + struct vm_reg *reg; + + if (setter) { put_reg(setter->vm_insn->dest); + if (setter->next_setter) + unblock(setter->next_setter); + } else { + reg = sc->regs+vm_reg2idx(vm_reg); + if (reg->first_setter && !reg->first_setter->rmw) + unblock(reg->first_setter); + } } @@ -347,7 +364,11 @@ static struct vm_reg *add_data_ref(struct insn *insn, struct data_ref *ref, reg = sc->regs+vm_reg2idx(reg_num); ref->insn = insn; ref->dep = reg->setter; - if (ref->dep) { + if (insn->vm_insn->dest == reg_num) + insn->rmw = 1; + if (!ref->dep) + reg->refs++; + else { list_add_tail(&ref->dep->dependants, &ref->more); ref->dep->num_dependants++; insn->unresolved++; @@ -364,6 +385,8 @@ static void init_scheduler(struct fpvm_fragment *frag) { int i; struct insn *insn; + struct vm_reg *reg; + struct data_ref *ref; list_init(&sc->unscheduled); list_init(&sc->waiting); @@ -387,8 +410,20 @@ static void init_scheduler(struct fpvm_fragment *frag) add_data_ref(insn, &insn->opa, frag->code[i].opa); /* fall through */ case 0: - add_data_ref(insn, - &insn->dest, frag->code[i].dest)->setter = insn; + reg = sc->regs+vm_reg2idx(frag->code[i].dest); + if (reg->setter) { + reg->setter->next_setter = insn; + foreach (ref, ®->setter->dependants) + if (ref->insn != insn) + insn->unresolved++; + if (!insn->rmw) + insn->unresolved++; + } else { + if (!insn->rmw) + insn->unresolved += reg->refs; + reg->first_setter = insn; + } + reg->setter = insn; break; default: abort(); @@ -413,6 +448,30 @@ static void init_scheduler(struct fpvm_fragment *frag) } +static void unblock(struct insn *insn) +{ + int slot; + + assert(insn->unresolved); + if (--insn->unresolved) + return; + Dprintf(" unblocked %lu -> %u\n", insn-sc->insns, insn->earliest); + list_del(&insn->more); + slot = insn->earliest; + if (slot <= sc->cycle) + slot = sc->cycle+1; + list_add_tail(sc->ready+slot, &insn->more); +} + + +static void unblock_after(struct insn *insn, int cycle) +{ + if (insn->earliest <= cycle) + insn->earliest = cycle+1; + unblock(insn); +} + + static void issue(struct insn *insn, int cycle, unsigned *code) { struct data_ref *ref; @@ -425,14 +484,15 @@ static void issue(struct insn *insn, int cycle, unsigned *code) switch (insn->arity) { case 3: + put_reg_by_setter(insn->cond.dep, FPVM_REG_IFB); /* fall through */ case 2: CODE(cycle).opb = lookup_pfpu_reg(insn->vm_insn->opb); - put_reg_by_setter(insn->opb.dep); + put_reg_by_setter(insn->opb.dep, insn->vm_insn->opb); /* fall through */ case 1: CODE(cycle).opa = lookup_pfpu_reg(insn->vm_insn->opa); - put_reg_by_setter(insn->opa.dep); + put_reg_by_setter(insn->opa.dep, insn->vm_insn->opa); break; case 0: break; @@ -443,18 +503,11 @@ static void issue(struct insn *insn, int cycle, unsigned *code) CODE(end).dest = alloc_reg(insn); CODE(cycle).opcode = fpvm_to_pfpu(insn->vm_insn->opcode); - foreach (ref, &insn->dependants) { - if (ref->insn->earliest <= end) - ref->insn->earliest = end+1; - assert(ref->insn->unresolved); - if (!--ref->insn->unresolved) { - Dprintf(" unlocked %lu -> %u\n", ref->insn-sc->insns, - ref->insn->earliest); - list_del(&ref->insn->more); - list_add_tail(sc->ready+ref->insn->earliest, - &ref->insn->more); - } - } + foreach (ref, &insn->dependants) + unblock_after(ref->insn, end); + if (insn->next_setter && !insn->next_setter->rmw) + unblock_after(insn->next_setter, + end-insn->next_setter->latency); } @@ -485,6 +538,7 @@ static int schedule(unsigned int *code) if (i == PFPU_PROGSIZE) return -1; + sc->cycle = i; Dprintf("@%d --- remaining %d, waiting %d + ready %d\n", i, remaining, count(&sc->waiting), count(&sc->ready[i]));