/* * sched.c - O(n) ... O(n^2) scheduler * * Written 2011 by Werner Almesberger * * Based on gfpus.c * Copyright (C) 2007, 2008, 2009, 2010 Sebastien Bourdeauducq * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, version 3 of the License. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see . */ #include #include #include #include #include #include #include #include #define MAX_LATENCY 8 /* maximum latency; okay to make this bigger */ #define Dprintf(...) struct list { struct list *next, *prev; }; static struct insn { struct list more; /* more insns on same schedule */ struct fpvm_instruction *vm_insn; struct data_ref { struct list more; /* more refs sharing the data */ struct insn *insn; /* insn this is part of */ struct insn *dep; /* insn we depend on */ } opa, opb, dest, cond; int arity; int latency; int unresolved; /* number of data refs we need before we can sched */ int earliest; /* earliest cycle dependencies seen so far are met */ struct list dependants; /* list of dependencies (constant) */ int num_dependants; /* number of unresolved dependencies */ } insns[FPVM_MAXCODELEN]; /* ----- Register initialization ------------------------------------------- */ /* * Straight from gfpus.c, only with some whitespace changes. */ static void get_registers(struct fpvm_fragment *fragment, unsigned int *registers) { int i; union { float f; unsigned int n; } fconv; for(i = 0; i < fragment->nbindings; i++) if (fragment->bindings[i].isvar) registers[i] = 0; else { fconv.f = fragment->bindings[i].b.c; registers[i] = fconv.n; } for(; i < PFPU_REG_COUNT; i++) registers[i] = 0; } /* ----- Doubly-linked list ------------------------------------------------ */ /* * Use naming conventions of include/linux/list.h */ static void list_init(struct list *list) { list->next = list->prev = list; } static void list_del(struct list *item) { item->prev->next = item->next; item->next->prev = item->prev; } static void *list_pop(struct list *list) { struct list *first; first = list->next; if (first == list) return NULL; list_del(first); return first; } static void list_add_tail(struct list *list, struct list *item) { item->next = list; item->prev = list->prev; list->prev->next = item; list->prev = item; } static void list_add(struct list *list, struct list *item) { item->next = list->next; item->prev = list; list->next->prev = item; list->next = item; } static void list_concat(struct list *a, struct list *b) { if (b->next != b) { a->prev->next = b->next; b->next->prev = a->prev; b->prev->next = a; a->prev = b->prev; } list_init(b); } /* * Do not delete elements from the list while traversing it with foreach ! */ #define foreach(var, head) \ for (var = (void *) ((struct list *) (head))->next; \ (var) != (void *) (head); \ var = (void *) ((struct list *) (var))->next) /* ----- Register management ----------------------------------------------- */ static struct vm_reg { struct insn *setter; /* instruction setting it; NULL if none */ int pfpu_reg; /* underlying PFPU register */ int refs; /* usage count */ } *regs; static struct list pfpu_regs[PFPU_REG_COUNT]; static struct list unallocated; /* unallocated registers */ static int nbindings; /* "public" bindings */ static int reg2idx(int reg) { return reg >= 0 ? reg : nbindings-reg; } static int alloc_reg(struct insn *setter) { struct list *reg; int vm_reg, pfpu_reg, vm_idx; vm_reg = setter->vm_insn->dest; if (vm_reg >= 0) return vm_reg; reg = list_pop(&unallocated); if (!reg) abort(); pfpu_reg = reg-pfpu_regs; Dprintf(" alloc reg %d -> %d\n", vm_reg, pfpu_reg); vm_idx = reg2idx(vm_reg); regs[vm_idx].setter = setter; regs[vm_idx].pfpu_reg = pfpu_reg; regs[vm_idx].refs = setter->num_dependants+1; return pfpu_reg; } static void put_reg(struct insn *setter) { int vm_reg, vm_idx; if (!setter) return; vm_reg = setter->vm_insn->dest; if (vm_reg >= 0) return; vm_idx = reg2idx(vm_reg); if (--regs[vm_idx].refs) return; Dprintf(" free reg %d\n", regs[vm_idx].pfpu_reg); /* * Prepend so that register numbers stay small and bugs reveal * themselves more rapidly. */ list_add(&unallocated, pfpu_regs+regs[vm_idx].pfpu_reg); /* clear it for style only */ regs[vm_idx].setter = NULL; regs[vm_idx].pfpu_reg = 0; } static int lookup_pfpu_reg(int vm_reg) { return vm_reg >= 0 ? vm_reg : regs[reg2idx(vm_reg)].pfpu_reg; } static void init_registers(struct fpvm_fragment *fragment, unsigned int *registers) { size_t regs_size; int i; get_registers(fragment, registers); nbindings = fragment->nbindings; regs_size = sizeof(struct vm_reg)*(nbindings-fragment->next_sur); regs = malloc(regs_size); memset(regs, 0, regs_size); list_init(&unallocated); for (i = fragment->nbindings; i != PFPU_REG_COUNT; i++) list_add_tail(&unallocated, pfpu_regs+i); /* * @@@ the rules are more complex, see use of dont_touch in * init_scheduler_state */ } /* ----- Instruction scheduler --------------------------------------------- */ static struct list unscheduled; /* unscheduled insns */ static struct list waiting; /* insns waiting to be scheduled */ static struct list ready[PFPU_PROGSIZE]; /* insns ready at nth cycle */ static struct insn *exits[PFPU_PROGSIZE+MAX_LATENCY]; /* insn writing at nth cycle */ static struct insn dummy_insn; /* dummy, to signal occupancy */ static struct vm_reg *add_data_ref(struct insn *insn, struct data_ref *ref, int reg_num) { struct vm_reg *reg; reg = regs+reg2idx(reg_num); ref->insn = insn; ref->dep = reg->setter; if (ref->dep) { list_add_tail(&ref->dep->dependants, &ref->more); ref->dep->num_dependants++; insn->unresolved++; Dprintf("insn %lu: reg %d setter %lu unresolved %d\n", insn-insns, reg_num, reg->setter-insns, insn->unresolved); } else { list_init(&ref->more); } return reg; } int catch = 0; static void init_scheduler(struct fpvm_fragment *frag) { int i; struct insn *insn; list_init(&unscheduled); list_init(&waiting); for (i = 0; i != PFPU_PROGSIZE; i++) list_init(&ready[i]); #if 0 if (frag->ninstructions > 10) { frag->ninstructions = 10; catch = 1; } #endif for (i = 0; i != frag->ninstructions; i++) { insn = insns+i; memset(insn, 0, sizeof(struct insn)); insn->vm_insn = frag->code+i; insn->arity = fpvm_get_arity(frag->code[i].opcode); insn->latency = pfpu_get_latency(frag->code[i].opcode); list_init(&insn->dependants); switch (insn->arity) { case 3: add_data_ref(insn, &insn->opb, FPVM_REG_IFB); /* fall through */ case 2: add_data_ref(insn, &insn->opb, frag->code[i].opb); /* fall through */ case 1: add_data_ref(insn, &insn->opa, frag->code[i].opa); /* fall through */ case 0: add_data_ref(insn, &insn->dest, frag->code[i].dest)->setter = insn; break; default: abort(); } if (insn->unresolved) list_add_tail(&unscheduled, &insn->more); else list_add_tail(&ready[0], &insn->more); } /* * We add a few dummy instructions at the end so that we don't need to * check array boundaries for the unlikely case of overrunning the * schedule. */ for (i = 0; i != PFPU_PROGSIZE; i++) exits[i] = NULL; for (; i != PFPU_PROGSIZE+MAX_LATENCY; i++) exits[i] = &dummy_insn; } static unsigned issue(struct insn *insn, int cycle) { pfpu_instruction code; struct data_ref *ref; int end; int nada = 0; end = cycle+insn->latency; exits[end] = insn; Dprintf("cycle %d: insn %lu L %d (A %d B %d)\n", cycle, insn-insns, insn->latency, insn->vm_insn->opa, insn->vm_insn->opb); switch (insn->arity) { case 3: /* fall through */ case 2: code.i.opb = lookup_pfpu_reg(insn->vm_insn->opb); put_reg(insn->opb.dep); /* fall through */ case 1: code.i.opa = lookup_pfpu_reg(insn->vm_insn->opa); put_reg(insn->opa.dep); break; case 0: break; default: abort(); } code.i.dest = alloc_reg(insn); code.i.opcode = fpvm_to_pfpu(insn->vm_insn->opcode); foreach (ref, &insn->dependants) { if (ref->insn->earliest <= end) ref->insn->earliest = end+1; if (!--ref->insn->unresolved) { Dprintf(" unlocked %lu -> %u\n", ref->insn-insns, ref->insn->earliest); nada = 0; list_del(&ref->insn->more); list_add_tail(ready+ref->insn->earliest, &ref->insn->more); } } if (nada && catch) *(volatile int *) 0 = 1; return code.w; } static int count(const struct list *list) { int n = 0; const struct list *p; for (p = list->next; p != list; p = p->next) n++; return n; } static int schedule(struct fpvm_fragment *frag, unsigned int *code) { int remaining; int i, last, end; struct insn *insn; remaining = frag->ninstructions; for (i = 0; remaining; i++) { if (i == PFPU_PROGSIZE) return -1; Dprintf("@%d --- remaining %d, waiting %d + ready %d = ", i, remaining, count(&waiting), count(&ready[i])); list_concat(&waiting, &ready[i]); Dprintf("%d\n", count(&waiting)); foreach (insn, &waiting) if (!exits[i+insn->latency]) { code[i] = issue(insn, i); list_del(&insn->more); remaining--; break; } if (exits[i]) put_reg(exits[i]); } /* * Add NOPs to cover unfinished instructions. */ last = i; end = i+MAX_LATENCY; if (end > PFPU_PROGSIZE) end = PFPU_PROGSIZE; while (i != end) { if (exits[i]) last = i+1; /* @@@ ? */ i++; } return last; } int gfpus_schedule(struct fpvm_fragment *frag, unsigned int *code, unsigned int *reg) { pfpu_instruction vecout; int res; init_registers(frag, reg); memset(code, 0, PFPU_PROGSIZE*sizeof(*code)); init_scheduler(frag); res = schedule(frag, code); free(regs); if (res < 0) return res; if (frag->vector_mode) return res; if (res == PFPU_PROGSIZE) return -1; vecout.w = 0; vecout.i.opcode = FPVM_OPCODE_VECTOUT; code[res] = vecout.w; return res+1; }