/*
* sched.c - O(n) ... O(n^2) scheduler
*
* Written 2011 by Werner Almesberger
*
* Based on gfpus.c
* Copyright (C) 2007, 2008, 2009, 2010 Sebastien Bourdeauducq
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, version 3 of the License.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see .
*/
#include
#include
#include
#include
#include
#include
#include
#include
//#define REG_STATS
//#define LCPF /* longest critical path first */
#ifdef DEBUG
#define Dprintf printf
#else
#define Dprintf(...)
#endif
#define MAX_LATENCY 8 /* maximum latency; okay to make this bigger */
#define FIELD(w) (((pfpu_instruction *) &(w))->i)
struct list {
struct list *next, *prev;
};
struct insn {
struct list more; /* more insns on same schedule */
struct fpvm_instruction *vm_insn;
struct data_ref {
struct list more; /* more refs sharing the data */
struct insn *insn; /* insn this is part of */
struct insn *dep; /* insn we depend on */
} opa, opb, dest, cond;
int arity;
int latency;
int unresolved; /* number of data refs we need before we can sched */
int earliest; /* earliest cycle dependencies seen so far are met */
struct list dependants; /* list of dependencies (constant) */
int num_dependants; /* number of unresolved dependencies */
#ifdef LCPF
int distance; /* minimum cycles on this path until the end */
#endif
};
struct vm_reg {
struct insn *setter; /* instruction setting it; NULL if none */
int pfpu_reg; /* underlying PFPU register */
int refs; /* usage count */
};
struct pfpu_reg {
struct list more; /* list of unallocated PFPU registers */
int vm_reg; /* corresponding FPVM register if allocated */
int used; /* used somewhere in the program */
};
static struct sched_ctx {
struct fpvm_fragment *frag;
struct insn insns[FPVM_MAXCODELEN];
struct vm_reg *regs; /* dynamically allocated */
struct pfpu_reg pfpu_regs[PFPU_REG_COUNT];
struct list unallocated; /* unallocated registers */
struct list unscheduled; /* unscheduled insns */
struct list waiting; /* insns waiting to be scheduled */
struct list ready[PFPU_PROGSIZE]; /* insns ready at nth cycle */
#ifdef REG_STATS
int max_regs, curr_regs; /* allocation statistics */
#endif
} *sc;
/* ----- Register initialization ------------------------------------------- */
/*
* Straight from gfpus.c, only with some whitespace changes.
*/
static void get_registers(struct fpvm_fragment *fragment,
unsigned int *registers)
{
int i;
union {
float f;
unsigned int n;
} fconv;
for(i = 0; i < fragment->nbindings; i++)
if (fragment->bindings[i].isvar)
registers[i] = 0;
else {
fconv.f = fragment->bindings[i].b.c;
registers[i] = fconv.n;
}
for(; i < PFPU_REG_COUNT; i++)
registers[i] = 0;
}
/* ----- Doubly-linked list ------------------------------------------------ */
/*
* Use the naming conventions of include/linux/list.h
*/
static void list_init(struct list *list)
{
list->next = list->prev = list;
}
static void list_del(struct list *item)
{
item->prev->next = item->next;
item->next->prev = item->prev;
}
static void *list_pop(struct list *list)
{
struct list *first;
first = list->next;
if (first == list)
return NULL;
list_del(first);
return first;
}
static void list_add_tail(struct list *list, struct list *item)
{
item->next = list;
item->prev = list->prev;
list->prev->next = item;
list->prev = item;
}
static void list_add(struct list *list, struct list *item)
{
item->next = list->next;
item->prev = list;
list->next->prev = item;
list->next = item;
}
static void list_concat(struct list *a, struct list *b)
{
if (b->next != b) {
a->prev->next = b->next;
b->next->prev = a->prev;
b->prev->next = a;
a->prev = b->prev;
}
list_init(b);
}
/*
* Do not delete elements from the list while traversing it with foreach !
*/
#define foreach(var, head) \
for (var = (void *) ((struct list *) (head))->next; \
(var) != (void *) (head); \
var = (void *) ((struct list *) (var))->next)
/* ----- Register management ----------------------------------------------- */
static int reg2idx(int reg)
{
return reg >= 0 ? reg : sc->frag->nbindings-reg;
}
static int alloc_reg(struct insn *setter)
{
struct pfpu_reg *reg;
int vm_reg, pfpu_reg, vm_idx;
vm_reg = setter->vm_insn->dest;
if (vm_reg >= 0)
return vm_reg;
reg = list_pop(&sc->unallocated);
if (!reg)
abort();
#ifdef REG_STATS
sc->curr_regs++;
if (sc->curr_regs > sc->max_regs)
sc->max_regs = sc->curr_regs;
#endif
reg->vm_reg = vm_reg;
pfpu_reg = reg-sc->pfpu_regs;
Dprintf(" alloc reg %d -> %d\n", vm_reg, pfpu_reg);
vm_idx = reg2idx(vm_reg);
sc->regs[vm_idx].setter = setter;
sc->regs[vm_idx].pfpu_reg = pfpu_reg;
sc->regs[vm_idx].refs = setter->num_dependants+1;
return pfpu_reg;
}
static void put_reg(int vm_reg)
{
int vm_idx;
if (vm_reg >= 0)
return;
vm_idx = reg2idx(vm_reg);
if (--sc->regs[vm_idx].refs)
return;
Dprintf(" free reg %d\n", regs[vm_idx].pfpu_reg);
#ifdef REG_STATS
sc->curr_regs--;
#endif
/*
* Prepend so that register numbers stay small and bugs reveal
* themselves more rapidly.
*/
list_add(&sc->unallocated,
&sc->pfpu_regs[sc->regs[vm_idx].pfpu_reg].more);
/* clear it for style only */
sc->regs[vm_idx].setter = NULL;
sc->regs[vm_idx].pfpu_reg = 0;
}
static void put_reg_by_setter(struct insn *setter)
{
if (setter)
put_reg(setter->vm_insn->dest);
}
static int lookup_pfpu_reg(int vm_reg)
{
return vm_reg >= 0 ? vm_reg : sc->regs[reg2idx(vm_reg)].pfpu_reg;
}
static void mark(int vm_reg)
{
if (vm_reg > 0)
sc->pfpu_regs[vm_reg].used = 1;
}
static void init_registers(struct fpvm_fragment *frag,
unsigned int *registers)
{
size_t regs_size;
int i;
get_registers(frag, registers);
regs_size = sizeof(struct vm_reg)*(frag->nbindings-frag->next_sur);
sc->regs = malloc(regs_size);
memset(sc->regs, 0, regs_size);
for (i = 0; i != frag->ninstructions; i++) {
mark(frag->code[i].opa);
mark(frag->code[i].opb);
mark(frag->code[i].dest);
}
list_init(&sc->unallocated);
for (i = PFPU_SPREG_COUNT; i != PFPU_REG_COUNT; i++)
if (!sc->pfpu_regs[i].used)
list_add_tail(&sc->unallocated, &sc->pfpu_regs[i].more);
}
/* ----- Instruction scheduler --------------------------------------------- */
static struct vm_reg *add_data_ref(struct insn *insn, struct data_ref *ref,
int reg_num)
{
struct vm_reg *reg;
reg = sc->regs+reg2idx(reg_num);
ref->insn = insn;
ref->dep = reg->setter;
if (ref->dep) {
list_add_tail(&ref->dep->dependants, &ref->more);
ref->dep->num_dependants++;
insn->unresolved++;
Dprintf("insn %lu: reg %d setter %lu unresolved %d\n",
insn-sc->insns, reg_num, reg->setter-sc->insns,
insn->unresolved);
} else {
list_init(&ref->more);
}
return reg;
}
static void init_scheduler(struct fpvm_fragment *frag)
{
int i;
struct insn *insn;
list_init(&sc->unscheduled);
list_init(&sc->waiting);
for (i = 0; i != PFPU_PROGSIZE; i++)
list_init(&sc->ready[i]);
for (i = 0; i != frag->ninstructions; i++) {
insn = sc->insns+i;
insn->vm_insn = frag->code+i;
insn->arity = fpvm_get_arity(frag->code[i].opcode);
insn->latency = pfpu_get_latency(frag->code[i].opcode);
list_init(&insn->dependants);
switch (insn->arity) {
case 3:
add_data_ref(insn, &insn->opb, FPVM_REG_IFB);
/* fall through */
case 2:
add_data_ref(insn, &insn->opb, frag->code[i].opb);
/* fall through */
case 1:
add_data_ref(insn, &insn->opa, frag->code[i].opa);
/* fall through */
case 0:
add_data_ref(insn,
&insn->dest, frag->code[i].dest)->setter = insn;
break;
default:
abort();
}
if (insn->unresolved)
list_add_tail(&sc->unscheduled, &insn->more);
else
list_add_tail(&sc->ready[0], &insn->more);
}
#ifdef LCPF
struct data_ref *dep;
for (i = frag->ninstructions-1; i >= 0; i--) {
insn = sc->insns+i;
foreach (dep, &insn->dependants)
if (dep->insn->distance > insn->distance)
insn->distance = dep->insn->distance;
insn->distance += insn->latency;
}
#endif
}
static void issue(struct insn *insn, int cycle, unsigned *code)
{
struct data_ref *ref;
int end;
end = cycle+insn->latency;
Dprintf("cycle %d: insn %lu L %d (A %d B %d)\n", cycle,
insn-sc->insns, insn->latency, insn->vm_insn->opa,
insn->vm_insn->opb);
switch (insn->arity) {
case 3:
/* fall through */
case 2:
FIELD(code[cycle]).opb = lookup_pfpu_reg(insn->vm_insn->opb);
put_reg_by_setter(insn->opb.dep);
/* fall through */
case 1:
FIELD(code[cycle]).opa = lookup_pfpu_reg(insn->vm_insn->opa);
put_reg_by_setter(insn->opa.dep);
break;
case 0:
break;
default:
abort();
}
FIELD(code[end]).dest = alloc_reg(insn);
FIELD(code[cycle]).opcode = fpvm_to_pfpu(insn->vm_insn->opcode);
foreach (ref, &insn->dependants) {
if (ref->insn->earliest <= end)
ref->insn->earliest = end+1;
if (!--ref->insn->unresolved) {
Dprintf(" unlocked %lu -> %u\n", ref->insn-insns,
ref->insn->earliest);
list_del(&ref->insn->more);
list_add_tail(sc->ready+ref->insn->earliest,
&ref->insn->more);
}
}
}
#ifdef DEBUG
static int count(const struct list *list)
{
int n = 0;
const struct list *p;
for (p = list->next; p != list; p = p->next)
n++;
return n;
}
#endif
static int schedule(unsigned int *code)
{
int remaining;
int i, last, end;
struct insn *insn;
#ifdef LCPF
struct insn *best;
#endif
remaining = sc->frag->ninstructions;
for (i = 0; remaining; i++) {
if (i == PFPU_PROGSIZE)
return -1;
Dprintf("@%d --- remaining %d, waiting %d + ready %d\n",
i, remaining, count(&sc->waiting), count(&sc->ready[i]));
list_concat(&sc->waiting, &sc->ready[i]);
#ifdef LCPF
best = NULL;
#endif
foreach (insn, &sc->waiting) {
end = i+insn->latency;
if (end >= PFPU_PROGSIZE)
return -1;
if (!FIELD(code[end]).dest) {
#ifdef LCPF
if (!best || best->distance < insn->distance)
best = insn;
#else
issue(insn, i, code);
list_del(&insn->more);
remaining--;
break;
#endif
}
}
#ifdef LCPF
if (best) {
issue(best, i, code);
list_del(&best->more);
remaining--;
}
#endif
if (FIELD(code[i]).dest)
put_reg(sc->pfpu_regs[FIELD(code[i]).dest].vm_reg);
}
/*
* Add NOPs to cover unfinished instructions.
*/
last = i;
end = i+MAX_LATENCY;
if (end > PFPU_PROGSIZE)
end = PFPU_PROGSIZE;
while (i != end) {
if (FIELD(code[i]).dest)
last = i+1; /* @@@ ? */
i++;
}
return last;
}
static void init_scheduler_context(struct fpvm_fragment *frag,
unsigned int *reg)
{
sc = malloc(sizeof(*sc));
memset(sc, 0, sizeof(*sc));
sc->frag = frag;
init_registers(frag, reg);
init_scheduler(frag);
}
int gfpus_schedule(struct fpvm_fragment *frag, unsigned int *code,
unsigned int *reg)
{
pfpu_instruction vecout;
int res;
init_scheduler_context(frag, reg);
memset(code, 0, PFPU_PROGSIZE*sizeof(*code));
res = schedule(code);
#ifdef REG_STATS
printf("regs: %d/%d\n", sc->curr_regs, sc->max_regs);
#endif
free(sc->regs);
free(sc);
if (res < 0)
return res;
if (frag->vector_mode)
return res;
if (res == PFPU_PROGSIZE)
return -1;
vecout.w = 0;
vecout.i.opcode = FPVM_OPCODE_VECTOUT;
code[res] = vecout.w;
return res+1;
}