/* * sched.c - O(n) ... O(n^2) scheduler * * Written 2011 by Werner Almesberger * * Based on gfpus.c * Copyright (C) 2007, 2008, 2009, 2010 Sebastien Bourdeauducq * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, version 3 of the License. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see . */ #include #include #include #include #include #include #include #include //#define REG_STATS //#define LCPF /* longest critical path first */ #ifdef DEBUG #define Dprintf printf #else #define Dprintf(...) #endif #define MAX_LATENCY 8 /* maximum latency; okay to make this bigger */ #define FIELD(w) (((pfpu_instruction *) &(w))->i) struct list { struct list *next, *prev; }; struct insn { struct list more; /* more insns on same schedule */ struct fpvm_instruction *vm_insn; struct data_ref { struct list more; /* more refs sharing the data */ struct insn *insn; /* insn this is part of */ struct insn *dep; /* insn we depend on */ } opa, opb, dest, cond; int arity; int latency; int unresolved; /* number of data refs we need before we can sched */ int earliest; /* earliest cycle dependencies seen so far are met */ struct list dependants; /* list of dependencies (constant) */ int num_dependants; /* number of unresolved dependencies */ #ifdef LCPF int distance; /* minimum cycles on this path until the end */ #endif }; struct vm_reg { struct insn *setter; /* instruction setting it; NULL if none */ int pfpu_reg; /* underlying PFPU register */ int refs; /* usage count */ }; struct pfpu_reg { struct list more; /* list of unallocated PFPU registers */ int vm_reg; /* corresponding FPVM register if allocated */ int used; /* used somewhere in the program */ }; static struct sched_ctx { struct fpvm_fragment *frag; struct insn insns[FPVM_MAXCODELEN]; struct vm_reg *regs; /* dynamically allocated */ struct pfpu_reg pfpu_regs[PFPU_REG_COUNT]; struct list unallocated; /* unallocated registers */ struct list unscheduled; /* unscheduled insns */ struct list waiting; /* insns waiting to be scheduled */ struct list ready[PFPU_PROGSIZE]; /* insns ready at nth cycle */ #ifdef REG_STATS int max_regs, curr_regs; /* allocation statistics */ #endif } *sc; /* ----- Register initialization ------------------------------------------- */ /* * Straight from gfpus.c, only with some whitespace changes. */ static void get_registers(struct fpvm_fragment *fragment, unsigned int *registers) { int i; union { float f; unsigned int n; } fconv; for(i = 0; i < fragment->nbindings; i++) if (fragment->bindings[i].isvar) registers[i] = 0; else { fconv.f = fragment->bindings[i].b.c; registers[i] = fconv.n; } for(; i < PFPU_REG_COUNT; i++) registers[i] = 0; } /* ----- Doubly-linked list ------------------------------------------------ */ /* * Use the naming conventions of include/linux/list.h */ static void list_init(struct list *list) { list->next = list->prev = list; } static void list_del(struct list *item) { item->prev->next = item->next; item->next->prev = item->prev; } static void *list_pop(struct list *list) { struct list *first; first = list->next; if (first == list) return NULL; list_del(first); return first; } static void list_add_tail(struct list *list, struct list *item) { item->next = list; item->prev = list->prev; list->prev->next = item; list->prev = item; } static void list_add(struct list *list, struct list *item) { item->next = list->next; item->prev = list; list->next->prev = item; list->next = item; } static void list_concat(struct list *a, struct list *b) { if (b->next != b) { a->prev->next = b->next; b->next->prev = a->prev; b->prev->next = a; a->prev = b->prev; } list_init(b); } /* * Do not delete elements from the list while traversing it with foreach ! */ #define foreach(var, head) \ for (var = (void *) ((struct list *) (head))->next; \ (var) != (void *) (head); \ var = (void *) ((struct list *) (var))->next) /* ----- Register management ----------------------------------------------- */ static int reg2idx(int reg) { return reg >= 0 ? reg : sc->frag->nbindings-reg; } static int alloc_reg(struct insn *setter) { struct pfpu_reg *reg; int vm_reg, pfpu_reg, vm_idx; vm_reg = setter->vm_insn->dest; if (vm_reg >= 0) return vm_reg; reg = list_pop(&sc->unallocated); if (!reg) abort(); #ifdef REG_STATS sc->curr_regs++; if (sc->curr_regs > sc->max_regs) sc->max_regs = sc->curr_regs; #endif reg->vm_reg = vm_reg; pfpu_reg = reg-sc->pfpu_regs; Dprintf(" alloc reg %d -> %d\n", vm_reg, pfpu_reg); vm_idx = reg2idx(vm_reg); sc->regs[vm_idx].setter = setter; sc->regs[vm_idx].pfpu_reg = pfpu_reg; sc->regs[vm_idx].refs = setter->num_dependants+1; return pfpu_reg; } static void put_reg(int vm_reg) { int vm_idx; if (vm_reg >= 0) return; vm_idx = reg2idx(vm_reg); if (--sc->regs[vm_idx].refs) return; Dprintf(" free reg %d\n", regs[vm_idx].pfpu_reg); #ifdef REG_STATS sc->curr_regs--; #endif /* * Prepend so that register numbers stay small and bugs reveal * themselves more rapidly. */ list_add(&sc->unallocated, &sc->pfpu_regs[sc->regs[vm_idx].pfpu_reg].more); /* clear it for style only */ sc->regs[vm_idx].setter = NULL; sc->regs[vm_idx].pfpu_reg = 0; } static void put_reg_by_setter(struct insn *setter) { if (setter) put_reg(setter->vm_insn->dest); } static int lookup_pfpu_reg(int vm_reg) { return vm_reg >= 0 ? vm_reg : sc->regs[reg2idx(vm_reg)].pfpu_reg; } static void mark(int vm_reg) { if (vm_reg > 0) sc->pfpu_regs[vm_reg].used = 1; } static void init_registers(struct fpvm_fragment *frag, unsigned int *registers) { size_t regs_size; int i; get_registers(frag, registers); regs_size = sizeof(struct vm_reg)*(frag->nbindings-frag->next_sur); sc->regs = malloc(regs_size); memset(sc->regs, 0, regs_size); for (i = 0; i != frag->ninstructions; i++) { mark(frag->code[i].opa); mark(frag->code[i].opb); mark(frag->code[i].dest); } list_init(&sc->unallocated); for (i = PFPU_SPREG_COUNT; i != PFPU_REG_COUNT; i++) if (!sc->pfpu_regs[i].used) list_add_tail(&sc->unallocated, &sc->pfpu_regs[i].more); } /* ----- Instruction scheduler --------------------------------------------- */ static struct vm_reg *add_data_ref(struct insn *insn, struct data_ref *ref, int reg_num) { struct vm_reg *reg; reg = sc->regs+reg2idx(reg_num); ref->insn = insn; ref->dep = reg->setter; if (ref->dep) { list_add_tail(&ref->dep->dependants, &ref->more); ref->dep->num_dependants++; insn->unresolved++; Dprintf("insn %lu: reg %d setter %lu unresolved %d\n", insn-sc->insns, reg_num, reg->setter-sc->insns, insn->unresolved); } else { list_init(&ref->more); } return reg; } static void init_scheduler(struct fpvm_fragment *frag) { int i; struct insn *insn; list_init(&sc->unscheduled); list_init(&sc->waiting); for (i = 0; i != PFPU_PROGSIZE; i++) list_init(&sc->ready[i]); for (i = 0; i != frag->ninstructions; i++) { insn = sc->insns+i; insn->vm_insn = frag->code+i; insn->arity = fpvm_get_arity(frag->code[i].opcode); insn->latency = pfpu_get_latency(frag->code[i].opcode); list_init(&insn->dependants); switch (insn->arity) { case 3: add_data_ref(insn, &insn->opb, FPVM_REG_IFB); /* fall through */ case 2: add_data_ref(insn, &insn->opb, frag->code[i].opb); /* fall through */ case 1: add_data_ref(insn, &insn->opa, frag->code[i].opa); /* fall through */ case 0: add_data_ref(insn, &insn->dest, frag->code[i].dest)->setter = insn; break; default: abort(); } if (insn->unresolved) list_add_tail(&sc->unscheduled, &insn->more); else list_add_tail(&sc->ready[0], &insn->more); } #ifdef LCPF struct data_ref *dep; for (i = frag->ninstructions-1; i >= 0; i--) { insn = sc->insns+i; foreach (dep, &insn->dependants) if (dep->insn->distance > insn->distance) insn->distance = dep->insn->distance; insn->distance += insn->latency; } #endif } static void issue(struct insn *insn, int cycle, unsigned *code) { struct data_ref *ref; int end; end = cycle+insn->latency; Dprintf("cycle %d: insn %lu L %d (A %d B %d)\n", cycle, insn-sc->insns, insn->latency, insn->vm_insn->opa, insn->vm_insn->opb); switch (insn->arity) { case 3: /* fall through */ case 2: FIELD(code[cycle]).opb = lookup_pfpu_reg(insn->vm_insn->opb); put_reg_by_setter(insn->opb.dep); /* fall through */ case 1: FIELD(code[cycle]).opa = lookup_pfpu_reg(insn->vm_insn->opa); put_reg_by_setter(insn->opa.dep); break; case 0: break; default: abort(); } FIELD(code[end]).dest = alloc_reg(insn); FIELD(code[cycle]).opcode = fpvm_to_pfpu(insn->vm_insn->opcode); foreach (ref, &insn->dependants) { if (ref->insn->earliest <= end) ref->insn->earliest = end+1; if (!--ref->insn->unresolved) { Dprintf(" unlocked %lu -> %u\n", ref->insn-insns, ref->insn->earliest); list_del(&ref->insn->more); list_add_tail(sc->ready+ref->insn->earliest, &ref->insn->more); } } } #ifdef DEBUG static int count(const struct list *list) { int n = 0; const struct list *p; for (p = list->next; p != list; p = p->next) n++; return n; } #endif static int schedule(unsigned int *code) { int remaining; int i, last, end; struct insn *insn; #ifdef LCPF struct insn *best; #endif remaining = sc->frag->ninstructions; for (i = 0; remaining; i++) { if (i == PFPU_PROGSIZE) return -1; Dprintf("@%d --- remaining %d, waiting %d + ready %d\n", i, remaining, count(&sc->waiting), count(&sc->ready[i])); list_concat(&sc->waiting, &sc->ready[i]); #ifdef LCPF best = NULL; #endif foreach (insn, &sc->waiting) { end = i+insn->latency; if (end >= PFPU_PROGSIZE) return -1; if (!FIELD(code[end]).dest) { #ifdef LCPF if (!best || best->distance < insn->distance) best = insn; #else issue(insn, i, code); list_del(&insn->more); remaining--; break; #endif } } #ifdef LCPF if (best) { issue(best, i, code); list_del(&best->more); remaining--; } #endif if (FIELD(code[i]).dest) put_reg(sc->pfpu_regs[FIELD(code[i]).dest].vm_reg); } /* * Add NOPs to cover unfinished instructions. */ last = i; end = i+MAX_LATENCY; if (end > PFPU_PROGSIZE) end = PFPU_PROGSIZE; while (i != end) { if (FIELD(code[i]).dest) last = i+1; /* @@@ ? */ i++; } return last; } static void init_scheduler_context(struct fpvm_fragment *frag, unsigned int *reg) { sc = malloc(sizeof(*sc)); memset(sc, 0, sizeof(*sc)); sc->frag = frag; init_registers(frag, reg); init_scheduler(frag); } int gfpus_schedule(struct fpvm_fragment *frag, unsigned int *code, unsigned int *reg) { pfpu_instruction vecout; int res; init_scheduler_context(frag, reg); memset(code, 0, PFPU_PROGSIZE*sizeof(*code)); res = schedule(code); #ifdef REG_STATS printf("regs: %d/%d\n", sc->curr_regs, sc->max_regs); #endif free(sc->regs); free(sc); if (res < 0) return res; if (frag->vector_mode) return res; if (res == PFPU_PROGSIZE) return -1; vecout.w = 0; vecout.i.opcode = FPVM_OPCODE_VECTOUT; code[res] = vecout.w; return res+1; }