mirror of
git://projects.qi-hardware.com/wernermisc.git
synced 2024-11-15 13:30:38 +02:00
m1/perf/: allocate scheduler context (about 0.5 MB) dynamically
This commit is contained in:
parent
c009a13f5c
commit
b4abaffa5c
@ -1,3 +1,7 @@
|
|||||||
|
Done:
|
||||||
|
- dynamically allocate scheduler context
|
||||||
|
|
||||||
|
Pending:
|
||||||
- see if preferring critical path can improve code efficiency
|
- see if preferring critical path can improve code efficiency
|
||||||
- test IF
|
- test IF
|
||||||
- run result comparison against full set of patches
|
- run result comparison against full set of patches
|
||||||
@ -6,3 +10,4 @@
|
|||||||
- see what optimization changes (may interfere with profiling)
|
- see what optimization changes (may interfere with profiling)
|
||||||
- build into Flickernoise (some things may need adapting, e.g., abort())
|
- build into Flickernoise (some things may need adapting, e.g., abort())
|
||||||
- review code, see if things can be simplified
|
- review code, see if things can be simplified
|
||||||
|
- see if valgrind can do something useful
|
||||||
|
161
m1/perf/sched.c
161
m1/perf/sched.c
@ -48,7 +48,7 @@ struct list {
|
|||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
static struct insn {
|
struct insn {
|
||||||
struct list more; /* more insns on same schedule */
|
struct list more; /* more insns on same schedule */
|
||||||
struct fpvm_instruction *vm_insn;
|
struct fpvm_instruction *vm_insn;
|
||||||
struct data_ref {
|
struct data_ref {
|
||||||
@ -62,7 +62,33 @@ static struct insn {
|
|||||||
int earliest; /* earliest cycle dependencies seen so far are met */
|
int earliest; /* earliest cycle dependencies seen so far are met */
|
||||||
struct list dependants; /* list of dependencies (constant) */
|
struct list dependants; /* list of dependencies (constant) */
|
||||||
int num_dependants; /* number of unresolved dependencies */
|
int num_dependants; /* number of unresolved dependencies */
|
||||||
} insns[FPVM_MAXCODELEN];
|
};
|
||||||
|
|
||||||
|
|
||||||
|
struct vm_reg {
|
||||||
|
struct insn *setter; /* instruction setting it; NULL if none */
|
||||||
|
int pfpu_reg; /* underlying PFPU register */
|
||||||
|
int refs; /* usage count */
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
struct pfpu_reg {
|
||||||
|
struct list more; /* list of unallocated PFPU registers */
|
||||||
|
int vm_reg; /* corresponding FPVM register if allocated */
|
||||||
|
int used; /* used somewhere in the program */
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
static struct sched_ctx {
|
||||||
|
struct fpvm_fragment *frag;
|
||||||
|
struct insn insns[FPVM_MAXCODELEN];
|
||||||
|
struct vm_reg *regs; /* dynamically allocated */
|
||||||
|
struct pfpu_reg pfpu_regs[PFPU_REG_COUNT];
|
||||||
|
struct list unallocated; /* unallocated registers */
|
||||||
|
struct list unscheduled; /* unscheduled insns */
|
||||||
|
struct list waiting; /* insns waiting to be scheduled */
|
||||||
|
struct list ready[PFPU_PROGSIZE]; /* insns ready at nth cycle */
|
||||||
|
} *sc;
|
||||||
|
|
||||||
|
|
||||||
/* ----- Register initialization ------------------------------------------- */
|
/* ----- Register initialization ------------------------------------------- */
|
||||||
@ -168,25 +194,9 @@ static void list_concat(struct list *a, struct list *b)
|
|||||||
/* ----- Register management ----------------------------------------------- */
|
/* ----- Register management ----------------------------------------------- */
|
||||||
|
|
||||||
|
|
||||||
static struct vm_reg {
|
|
||||||
struct insn *setter; /* instruction setting it; NULL if none */
|
|
||||||
int pfpu_reg; /* underlying PFPU register */
|
|
||||||
int refs; /* usage count */
|
|
||||||
} *regs;
|
|
||||||
|
|
||||||
static struct pfpu_reg {
|
|
||||||
struct list more; /* list of unallocated PFPU registers */
|
|
||||||
int vm_reg; /* corresponding FPVM register if allocated */
|
|
||||||
int used; /* used somewhere in the program */
|
|
||||||
} pfpu_regs[PFPU_REG_COUNT];
|
|
||||||
|
|
||||||
static struct list unallocated; /* unallocated registers */
|
|
||||||
static int nbindings; /* "public" bindings */
|
|
||||||
|
|
||||||
|
|
||||||
static int reg2idx(int reg)
|
static int reg2idx(int reg)
|
||||||
{
|
{
|
||||||
return reg >= 0 ? reg : nbindings-reg;
|
return reg >= 0 ? reg : sc->frag->nbindings-reg;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -198,18 +208,19 @@ static int alloc_reg(struct insn *setter)
|
|||||||
vm_reg = setter->vm_insn->dest;
|
vm_reg = setter->vm_insn->dest;
|
||||||
if (vm_reg >= 0)
|
if (vm_reg >= 0)
|
||||||
return vm_reg;
|
return vm_reg;
|
||||||
reg = list_pop(&unallocated);
|
reg = list_pop(&sc->unallocated);
|
||||||
if (!reg)
|
if (!reg)
|
||||||
abort();
|
abort();
|
||||||
reg->vm_reg = vm_reg;
|
reg->vm_reg = vm_reg;
|
||||||
pfpu_reg = reg-pfpu_regs;
|
pfpu_reg = reg-sc->pfpu_regs;
|
||||||
|
|
||||||
Dprintf(" alloc reg %d -> %d\n", vm_reg, pfpu_reg);
|
Dprintf(" alloc reg %d -> %d\n", vm_reg, pfpu_reg);
|
||||||
|
|
||||||
vm_idx = reg2idx(vm_reg);
|
vm_idx = reg2idx(vm_reg);
|
||||||
regs[vm_idx].setter = setter;
|
sc->regs[vm_idx].setter = setter;
|
||||||
regs[vm_idx].pfpu_reg = pfpu_reg;
|
sc->regs[vm_idx].pfpu_reg = pfpu_reg;
|
||||||
regs[vm_idx].refs = setter->num_dependants+1;
|
sc->regs[vm_idx].refs = setter->num_dependants+1;
|
||||||
|
|
||||||
return pfpu_reg;
|
return pfpu_reg;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -222,19 +233,21 @@ static void put_reg(int vm_reg)
|
|||||||
return;
|
return;
|
||||||
|
|
||||||
vm_idx = reg2idx(vm_reg);
|
vm_idx = reg2idx(vm_reg);
|
||||||
if (--regs[vm_idx].refs)
|
if (--sc->regs[vm_idx].refs)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
Dprintf(" free reg %d\n", regs[vm_idx].pfpu_reg);
|
Dprintf(" free reg %d\n", regs[vm_idx].pfpu_reg);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Prepend so that register numbers stay small and bugs reveal
|
* Prepend so that register numbers stay small and bugs reveal
|
||||||
* themselves more rapidly.
|
* themselves more rapidly.
|
||||||
*/
|
*/
|
||||||
list_add(&unallocated, &pfpu_regs[regs[vm_idx].pfpu_reg].more);
|
list_add(&sc->unallocated,
|
||||||
|
&sc->pfpu_regs[sc->regs[vm_idx].pfpu_reg].more);
|
||||||
|
|
||||||
/* clear it for style only */
|
/* clear it for style only */
|
||||||
regs[vm_idx].setter = NULL;
|
sc->regs[vm_idx].setter = NULL;
|
||||||
regs[vm_idx].pfpu_reg = 0;
|
sc->regs[vm_idx].pfpu_reg = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -247,58 +260,51 @@ static void put_reg_by_setter(struct insn *setter)
|
|||||||
|
|
||||||
static int lookup_pfpu_reg(int vm_reg)
|
static int lookup_pfpu_reg(int vm_reg)
|
||||||
{
|
{
|
||||||
return vm_reg >= 0 ? vm_reg : regs[reg2idx(vm_reg)].pfpu_reg;
|
return vm_reg >= 0 ? vm_reg : sc->regs[reg2idx(vm_reg)].pfpu_reg;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
static void mark(int vm_reg)
|
static void mark(int vm_reg)
|
||||||
{
|
{
|
||||||
if (vm_reg > 0)
|
if (vm_reg > 0)
|
||||||
pfpu_regs[vm_reg].used = 1;
|
sc->pfpu_regs[vm_reg].used = 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
static void init_registers(struct fpvm_fragment *fragment,
|
static void init_registers(struct fpvm_fragment *frag,
|
||||||
unsigned int *registers)
|
unsigned int *registers)
|
||||||
{
|
{
|
||||||
size_t regs_size;
|
size_t regs_size;
|
||||||
int i;
|
int i;
|
||||||
|
|
||||||
get_registers(fragment, registers);
|
get_registers(frag, registers);
|
||||||
nbindings = fragment->nbindings;
|
|
||||||
|
|
||||||
regs_size = sizeof(struct vm_reg)*(nbindings-fragment->next_sur);
|
regs_size = sizeof(struct vm_reg)*(frag->nbindings-frag->next_sur);
|
||||||
regs = malloc(regs_size);
|
sc->regs = malloc(regs_size);
|
||||||
memset(regs, 0, regs_size);
|
memset(sc->regs, 0, regs_size);
|
||||||
|
|
||||||
memset(pfpu_regs, 0, sizeof(pfpu_regs));
|
for (i = 0; i != frag->ninstructions; i++) {
|
||||||
for (i = 0; i != fragment->ninstructions; i++) {
|
mark(frag->code[i].opa);
|
||||||
mark(fragment->code[i].opa);
|
mark(frag->code[i].opb);
|
||||||
mark(fragment->code[i].opb);
|
mark(frag->code[i].dest);
|
||||||
mark(fragment->code[i].dest);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
list_init(&unallocated);
|
list_init(&sc->unallocated);
|
||||||
for (i = PFPU_SPREG_COUNT; i != PFPU_REG_COUNT; i++)
|
for (i = PFPU_SPREG_COUNT; i != PFPU_REG_COUNT; i++)
|
||||||
if (!pfpu_regs[i].used)
|
if (!sc->pfpu_regs[i].used)
|
||||||
list_add_tail(&unallocated, &pfpu_regs[i].more);
|
list_add_tail(&sc->unallocated, &sc->pfpu_regs[i].more);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/* ----- Instruction scheduler --------------------------------------------- */
|
/* ----- Instruction scheduler --------------------------------------------- */
|
||||||
|
|
||||||
|
|
||||||
static struct list unscheduled; /* unscheduled insns */
|
|
||||||
static struct list waiting; /* insns waiting to be scheduled */
|
|
||||||
static struct list ready[PFPU_PROGSIZE]; /* insns ready at nth cycle */
|
|
||||||
|
|
||||||
|
|
||||||
static struct vm_reg *add_data_ref(struct insn *insn, struct data_ref *ref,
|
static struct vm_reg *add_data_ref(struct insn *insn, struct data_ref *ref,
|
||||||
int reg_num)
|
int reg_num)
|
||||||
{
|
{
|
||||||
struct vm_reg *reg;
|
struct vm_reg *reg;
|
||||||
|
|
||||||
reg = regs+reg2idx(reg_num);
|
reg = sc->regs+reg2idx(reg_num);
|
||||||
ref->insn = insn;
|
ref->insn = insn;
|
||||||
ref->dep = reg->setter;
|
ref->dep = reg->setter;
|
||||||
if (ref->dep) {
|
if (ref->dep) {
|
||||||
@ -307,7 +313,8 @@ static struct vm_reg *add_data_ref(struct insn *insn, struct data_ref *ref,
|
|||||||
insn->unresolved++;
|
insn->unresolved++;
|
||||||
|
|
||||||
Dprintf("insn %lu: reg %d setter %lu unresolved %d\n",
|
Dprintf("insn %lu: reg %d setter %lu unresolved %d\n",
|
||||||
insn-insns, reg_num, reg->setter-insns, insn->unresolved);
|
insn-sc->insns, reg_num, reg->setter-sc->insns,
|
||||||
|
insn->unresolved);
|
||||||
} else {
|
} else {
|
||||||
list_init(&ref->more);
|
list_init(&ref->more);
|
||||||
}
|
}
|
||||||
@ -320,14 +327,13 @@ static void init_scheduler(struct fpvm_fragment *frag)
|
|||||||
int i;
|
int i;
|
||||||
struct insn *insn;
|
struct insn *insn;
|
||||||
|
|
||||||
list_init(&unscheduled);
|
list_init(&sc->unscheduled);
|
||||||
list_init(&waiting);
|
list_init(&sc->waiting);
|
||||||
for (i = 0; i != PFPU_PROGSIZE; i++)
|
for (i = 0; i != PFPU_PROGSIZE; i++)
|
||||||
list_init(&ready[i]);
|
list_init(&sc->ready[i]);
|
||||||
|
|
||||||
for (i = 0; i != frag->ninstructions; i++) {
|
for (i = 0; i != frag->ninstructions; i++) {
|
||||||
insn = insns+i;
|
insn = sc->insns+i;
|
||||||
memset(insn, 0, sizeof(struct insn));
|
|
||||||
insn->vm_insn = frag->code+i;
|
insn->vm_insn = frag->code+i;
|
||||||
insn->arity = fpvm_get_arity(frag->code[i].opcode);
|
insn->arity = fpvm_get_arity(frag->code[i].opcode);
|
||||||
insn->latency = pfpu_get_latency(frag->code[i].opcode);
|
insn->latency = pfpu_get_latency(frag->code[i].opcode);
|
||||||
@ -350,9 +356,9 @@ static void init_scheduler(struct fpvm_fragment *frag)
|
|||||||
abort();
|
abort();
|
||||||
}
|
}
|
||||||
if (insn->unresolved)
|
if (insn->unresolved)
|
||||||
list_add_tail(&unscheduled, &insn->more);
|
list_add_tail(&sc->unscheduled, &insn->more);
|
||||||
else
|
else
|
||||||
list_add_tail(&ready[0], &insn->more);
|
list_add_tail(&sc->ready[0], &insn->more);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -364,7 +370,8 @@ static void issue(struct insn *insn, int cycle, unsigned *code)
|
|||||||
end = cycle+insn->latency;
|
end = cycle+insn->latency;
|
||||||
|
|
||||||
Dprintf("cycle %d: insn %lu L %d (A %d B %d)\n", cycle,
|
Dprintf("cycle %d: insn %lu L %d (A %d B %d)\n", cycle,
|
||||||
insn-insns, insn->latency, insn->vm_insn->opa, insn->vm_insn->opb);
|
insn-sc->insns, insn->latency, insn->vm_insn->opa,
|
||||||
|
insn->vm_insn->opb);
|
||||||
|
|
||||||
switch (insn->arity) {
|
switch (insn->arity) {
|
||||||
case 3:
|
case 3:
|
||||||
@ -393,7 +400,7 @@ static void issue(struct insn *insn, int cycle, unsigned *code)
|
|||||||
Dprintf(" unlocked %lu -> %u\n", ref->insn-insns,
|
Dprintf(" unlocked %lu -> %u\n", ref->insn-insns,
|
||||||
ref->insn->earliest);
|
ref->insn->earliest);
|
||||||
list_del(&ref->insn->more);
|
list_del(&ref->insn->more);
|
||||||
list_add_tail(ready+ref->insn->earliest,
|
list_add_tail(sc->ready+ref->insn->earliest,
|
||||||
&ref->insn->more);
|
&ref->insn->more);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -413,22 +420,22 @@ static int count(const struct list *list)
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
||||||
static int schedule(struct fpvm_fragment *frag, unsigned int *code)
|
static int schedule(unsigned int *code)
|
||||||
{
|
{
|
||||||
int remaining;
|
int remaining;
|
||||||
int i, last, end;
|
int i, last, end;
|
||||||
struct insn *insn;
|
struct insn *insn;
|
||||||
|
|
||||||
remaining = frag->ninstructions;
|
remaining = sc->frag->ninstructions;
|
||||||
for (i = 0; remaining; i++) {
|
for (i = 0; remaining; i++) {
|
||||||
if (i == PFPU_PROGSIZE)
|
if (i == PFPU_PROGSIZE)
|
||||||
return -1;
|
return -1;
|
||||||
|
|
||||||
Dprintf("@%d --- remaining %d, waiting %d + ready %d\n",
|
Dprintf("@%d --- remaining %d, waiting %d + ready %d\n",
|
||||||
i, remaining, count(&waiting), count(&ready[i]));
|
i, remaining, count(&sc->waiting), count(&sc->ready[i]));
|
||||||
|
|
||||||
list_concat(&waiting, &ready[i]);
|
list_concat(&sc->waiting, &sc->ready[i]);
|
||||||
foreach (insn, &waiting) {
|
foreach (insn, &sc->waiting) {
|
||||||
end = i+insn->latency;
|
end = i+insn->latency;
|
||||||
if (end >= PFPU_PROGSIZE)
|
if (end >= PFPU_PROGSIZE)
|
||||||
return -1;
|
return -1;
|
||||||
@ -440,7 +447,7 @@ static int schedule(struct fpvm_fragment *frag, unsigned int *code)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (FIELD(code[i]).dest)
|
if (FIELD(code[i]).dest)
|
||||||
put_reg(pfpu_regs[FIELD(code[i]).dest].vm_reg);
|
put_reg(sc->pfpu_regs[FIELD(code[i]).dest].vm_reg);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -459,17 +466,29 @@ static int schedule(struct fpvm_fragment *frag, unsigned int *code)
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static void init_scheduler_context(struct fpvm_fragment *frag,
|
||||||
|
unsigned int *reg)
|
||||||
|
{
|
||||||
|
sc = malloc(sizeof(*sc));
|
||||||
|
memset(sc, 0, sizeof(*sc));
|
||||||
|
|
||||||
|
sc->frag = frag;
|
||||||
|
|
||||||
|
init_registers(frag, reg);
|
||||||
|
init_scheduler(frag);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
int gfpus_schedule(struct fpvm_fragment *frag, unsigned int *code,
|
int gfpus_schedule(struct fpvm_fragment *frag, unsigned int *code,
|
||||||
unsigned int *reg)
|
unsigned int *reg)
|
||||||
{
|
{
|
||||||
pfpu_instruction vecout;
|
pfpu_instruction vecout;
|
||||||
int res;
|
int res;
|
||||||
|
|
||||||
init_registers(frag, reg);
|
init_scheduler_context(frag, reg);
|
||||||
memset(code, 0, PFPU_PROGSIZE*sizeof(*code));
|
memset(code, 0, PFPU_PROGSIZE*sizeof(*code));
|
||||||
init_scheduler(frag);
|
res = schedule(code);
|
||||||
res = schedule(frag, code);
|
free(sc->regs);
|
||||||
free(regs);
|
|
||||||
if (res < 0)
|
if (res < 0)
|
||||||
return res;
|
return res;
|
||||||
if (frag->vector_mode)
|
if (frag->vector_mode)
|
||||||
|
Loading…
Reference in New Issue
Block a user