m1/perf/: main.c was missing. Added on-going work.

2025-04-21 12:27:27 +03:00 · 2011-09-17 23:02:34 -03:00
parent 001ca49cc6
commit 9a5a22eda5
3 changed files with 542 additions and 0 deletions
--- a/m1/perf/main.c
+++ b/m1/perf/main.c
@@ -0,0 +1,60 @@
 #include <stdlib.h>
 #include <stdio.h>
 #include "compiler.h"
 #define	BUF_SIZE	1000000
 static void report(const char *s)
 {
 	fprintf(stderr, "%s\n", s);
 }
 static void usage(const char *name)
 {
 	fprintf(stderr, "usage: %s patch-file [loops]\n", name);
 	exit(1);
 }
 int main(int argc, char **argv)
 {
 	char buf[BUF_SIZE];
 	const char *name;
 	FILE *file;
 	size_t got;
 	int loops = 1;
 	int i;
 	switch (argc) {
 	case 2:
 		break;
 	case 3:
 		loops = atoi(argv[2]);
 		break;
 	default:
 		usage(*argv);
 	}
 	name = argv[1];
 	file = fopen(name, "r");
 	if (!file) {
 		perror(name);
 		exit(1);
 	}
 	got = fread(buf, sizeof(buf)-1, 1, file);
 	if (got < 0) {
 		perror(name);
 		exit(1);
 	}
 	buf[got] = 0;
 	fclose(file);
 	for (i = 0; i != loops; i++)
 		patch_compile(buf, report);
 	return 0;
 }
--- a/m1/perf/sched.c
+++ b/m1/perf/sched.c
@@ -0,0 +1,476 @@
 /*
 * sched.c - O(n) ... O(n^2) scheduler
 *
 * Written 2011 by Werner Almesberger
 *
 * Based on gfpus.c
 * Copyright (C) 2007, 2008, 2009, 2010 Sebastien Bourdeauducq
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, version 3 of the License.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */
 #include <stdlib.h>
 #include <stdio.h>
 #include <string.h>
 #include <fpvm/is.h>
 #include <fpvm/fpvm.h>
 #include <fpvm/pfpu.h>
 #include <fpvm/gfpus.h>
 #include <hw/pfpu.h>
 #define	MAX_LATENCY	8	/* maximum latency; okay to make this bigger */
 #define	Dprintf(...)
 struct list {
 	struct list *next, *prev;
 };
 static struct insn {
 	struct list more;		/* more insns on same schedule */
 	struct fpvm_instruction *vm_insn;
 	struct data_ref {
 		struct list more;	/* more refs sharing the data */
 		struct insn *insn;	/* insn this is part of */
 		struct insn *dep;	/* insn we depend on */
 	} opa, opb, dest, cond;
 	int arity;
 	int latency;
 	int unresolved;	/* number of data refs we need before we can sched */
 	int earliest;	/* earliest cycle dependencies seen so far are met */
 	struct list dependants;	/* list of dependencies (constant) */
 	int num_dependants;	/* number of unresolved dependencies */
 } insns[FPVM_MAXCODELEN];
 /* ----- Register initialization ------------------------------------------- */
 /*
 * Straight from gfpus.c, only with some whitespace changes.
 */
 static void get_registers(struct fpvm_fragment *fragment,
    unsigned int *registers)
 {
 	int i;
 	union {
 		float f;
 		unsigned int n;
 	} fconv;
 	for(i = 0; i < fragment->nbindings; i++)
       		if (fragment->bindings[i].isvar)
 			registers[i] = 0;
 		else {
 			fconv.f = fragment->bindings[i].b.c;
 			registers[i] = fconv.n;
 		}
 	for(; i < PFPU_REG_COUNT; i++)
 		registers[i] = 0;
 }
 /* ----- Doubly-linked list ------------------------------------------------ */
 /*
 * Use naming conventions of include/linux/list.h
 */
 static void list_init(struct list *list)
 {
 	list->next = list->prev = list;
 }
 static void list_del(struct list *item)
 {
 	item->prev->next = item->next;
 	item->next->prev = item->prev;
 }
 static void *list_pop(struct list *list)
 {
 	struct list *first;
 	first = list->next;
 	if (first == list)
 		return NULL;
 	list_del(first);
 	return first;
 }
 static void list_add_tail(struct list *list, struct list *item)
 {
 	item->next = list;
 	item->prev = list->prev;
 	list->prev->next = item;
 	list->prev = item;
 }
 static void list_add(struct list *list, struct list *item)
 {
 	item->next = list->next;
 	item->prev = list;
 	list->next->prev = item;
 	list->next = item;
 }
 static void list_concat(struct list *a, struct list *b)
 {
 	if (b->next != b) {
 		a->prev->next = b->next;
 		b->next->prev = a->prev;
 		b->prev->next = a;
 		a->prev = b->prev;
 	}
 	list_init(b);
 }
 /*
 * Do not delete elements from the list while traversing it with foreach !
 */
 #define	foreach(var, head) \
 	for (var = (void *) ((struct list *) (head))->next; \
 	    (var) != (void *) (head); \
 	    var = (void *) ((struct list *) (var))->next)
 /* ----- Register management ----------------------------------------------- */
 static struct vm_reg {
 	struct insn *setter;	/* instruction setting it; NULL if none */
 	int pfpu_reg;		/* underlying PFPU register */
 	int refs;		/* usage count */
 } *regs;
 static struct list pfpu_regs[PFPU_REG_COUNT];
 static struct list unallocated;	/* unallocated registers */
 static int nbindings;		/* "public" bindings */
 static int reg2idx(int reg)
 {
 	return reg >= 0 ? reg : nbindings-reg;
 }
 static int alloc_reg(struct insn *setter)
 {
 	struct list *reg;
 	int vm_reg, pfpu_reg, vm_idx;
 	vm_reg = setter->vm_insn->dest;
 	if (vm_reg >= 0)
 		return vm_reg;
 	reg = list_pop(&unallocated);
 	if (!reg)
 		abort();
 	pfpu_reg = reg-pfpu_regs;
 Dprintf("  alloc reg %d -> %d\n", vm_reg, pfpu_reg);
 	vm_idx = reg2idx(vm_reg);
 	regs[vm_idx].setter = setter;
 	regs[vm_idx].pfpu_reg = pfpu_reg;
 	regs[vm_idx].refs = setter->num_dependants+1;
 	return pfpu_reg;
 }
 static void put_reg(struct insn *setter)
 {
 	int vm_reg, vm_idx;
 	if (!setter)
 		return;
 	vm_reg = setter->vm_insn->dest;
 	if (vm_reg >= 0)
 		return;
 	vm_idx = reg2idx(vm_reg);
 	if (--regs[vm_idx].refs)
 		return;
 Dprintf("  free reg %d\n", regs[vm_idx].pfpu_reg);
 	/*
 	 * Prepend so that register numbers stay small and bugs reveal
 	 * themselves more rapidly.
 	 */
 	list_add(&unallocated, pfpu_regs+regs[vm_idx].pfpu_reg);
 	/* clear it for style only */
 	regs[vm_idx].setter = NULL;
 	regs[vm_idx].pfpu_reg = 0;
 }
 static int lookup_pfpu_reg(int vm_reg)
 {
 	return vm_reg >= 0 ? vm_reg : regs[reg2idx(vm_reg)].pfpu_reg;
 }
 static void init_registers(struct fpvm_fragment *fragment,
    unsigned int *registers)
 {
 	size_t regs_size;
 	int i;
 	get_registers(fragment, registers);
 	nbindings = fragment->nbindings;
 	regs_size = sizeof(struct vm_reg)*(nbindings-fragment->next_sur);
 	regs = malloc(regs_size);
 	memset(regs, 0, regs_size);
 	list_init(&unallocated);
 	for (i = fragment->nbindings; i != PFPU_REG_COUNT; i++)
 		list_add_tail(&unallocated, pfpu_regs+i);
 /*
 * @@@ the rules are more complex, see use of dont_touch in
 * init_scheduler_state
 */
 }
 /* ----- Instruction scheduler --------------------------------------------- */
 static struct list unscheduled;		/* unscheduled insns */
 static struct list waiting;		/* insns waiting to be scheduled */
 static struct list ready[PFPU_PROGSIZE]; /* insns ready at nth cycle */
 static struct insn *exits[PFPU_PROGSIZE+MAX_LATENCY];
 					/* insn writing at nth cycle */
 static struct insn dummy_insn;		/* dummy, to signal occupancy */
 static struct vm_reg *add_data_ref(struct insn *insn, struct data_ref *ref,
    int reg_num)
 {
 	struct vm_reg *reg;
 	reg = regs+reg2idx(reg_num);
 	ref->insn = insn;
 	ref->dep = reg->setter;
 	if (ref->dep) {
 		list_add_tail(&ref->dep->dependants, &ref->more);
 		ref->dep->num_dependants++;
 		insn->unresolved++;
 Dprintf("insn %lu: reg %d setter %lu unresolved %d\n",
   insn-insns, reg_num, reg->setter-insns, insn->unresolved);
 	} else {
 		list_init(&ref->more);
 	}
 	return reg;
 }
 int catch = 0;
 static void init_scheduler(struct fpvm_fragment *frag)
 {
 	int i;
 	struct insn *insn;
 	list_init(&unscheduled);
 	list_init(&waiting);
 	for (i = 0; i != PFPU_PROGSIZE; i++)
 		list_init(&ready[i]);
 #if 0
 if (frag->ninstructions > 10) {
  frag->ninstructions = 10;
 catch = 1;
 }
 #endif
 	for (i = 0; i != frag->ninstructions; i++) {
 		insn = insns+i;
 		memset(insn, 0, sizeof(struct insn));
 		insn->vm_insn = frag->code+i;
 		insn->arity = fpvm_get_arity(frag->code[i].opcode);
 		insn->latency = pfpu_get_latency(frag->code[i].opcode);
 		list_init(&insn->dependants);
 		switch (insn->arity) {
 		case 3:
 			add_data_ref(insn, &insn->opb, FPVM_REG_IFB);
 			/* fall through */
 		case 2:
 			add_data_ref(insn, &insn->opb, frag->code[i].opb);
 			/* fall through */
 		case 1:
 			add_data_ref(insn, &insn->opa, frag->code[i].opa);
 			/* fall through */
 		case 0:
 			add_data_ref(insn,
 			    &insn->dest, frag->code[i].dest)->setter = insn;
 			break;
 		default:
 			abort();
 		}
 		if (insn->unresolved)
 			list_add_tail(&unscheduled, &insn->more);
 		else
 			list_add_tail(&ready[0], &insn->more);
 	}
 	/*
 	 * We add a few dummy instructions at the end so that we don't need to
 	 * check array boundaries for the unlikely case of overrunning the
 	 * schedule.
 	 */
 	for (i = 0; i != PFPU_PROGSIZE; i++)
 		exits[i] = NULL;
 	for (; i != PFPU_PROGSIZE+MAX_LATENCY; i++)
 		exits[i] = &dummy_insn;
 }
 static unsigned issue(struct insn *insn, int cycle)
 {
 	pfpu_instruction code;
 	struct data_ref *ref;
 	int end;
 int nada = 0;
 	end = cycle+insn->latency;
 	exits[end] = insn;
 Dprintf("cycle %d: insn %lu L %d (A %d B %d)\n",
  cycle, insn-insns, insn->latency, insn->vm_insn->opa, insn->vm_insn->opb);
 	switch (insn->arity) {
 	case 3:
 		/* fall through */
 	case 2:
 		code.i.opb = lookup_pfpu_reg(insn->vm_insn->opb);
 		put_reg(insn->opb.dep);
 		/* fall through */
 	case 1:
 		code.i.opa = lookup_pfpu_reg(insn->vm_insn->opa);
 		put_reg(insn->opa.dep);
 		break;
 	case 0:
 		break;
 	default:
 		abort();
 	}
 	code.i.dest = alloc_reg(insn);
 	code.i.opcode = fpvm_to_pfpu(insn->vm_insn->opcode);
 	foreach (ref, &insn->dependants) {
 		if (ref->insn->earliest <= end)
 			ref->insn->earliest = end+1;
 		if (!--ref->insn->unresolved) {
 Dprintf("  unlocked %lu -> %u\n", ref->insn-insns, ref->insn->earliest);
 nada = 0;
 			list_del(&ref->insn->more);
 			list_add_tail(ready+ref->insn->earliest,
 			    &ref->insn->more);
 		}
 	}
 if (nada && catch) *(volatile int *) 0 = 1;
 	return code.w;
 }
 static int count(const struct list *list)
 {
 	int n = 0;
 	const struct list *p;
 	for (p = list->next; p != list; p = p->next)
 		n++;
 	return n;
 }
 static int schedule(struct fpvm_fragment *frag, unsigned int *code)
 {
 	int remaining;
 	int i, last, end;
 	struct insn *insn;
 	remaining = frag->ninstructions;
 	for (i = 0; remaining; i++) {
 		if (i == PFPU_PROGSIZE)
 			return -1;
 Dprintf("@%d --- remaining %d, waiting %d + ready %d = ", i, remaining,
  count(&waiting), count(&ready[i]));
 		list_concat(&waiting, &ready[i]);
 Dprintf("%d\n", count(&waiting));
 		foreach (insn, &waiting)
 			if (!exits[i+insn->latency]) {
 				code[i] = issue(insn, i);
 				list_del(&insn->more);
 				remaining--;
 				break;
 			}
 		if (exits[i])
 			put_reg(exits[i]);
 	}
 	/*
 	 * Add NOPs to cover unfinished instructions.
 	 */
 	last = i;
 	end = i+MAX_LATENCY;
 	if (end > PFPU_PROGSIZE)
 		end = PFPU_PROGSIZE;
 	while (i != end) {
 		if (exits[i])
 			last = i+1; /* @@@ ? */
 		i++;
 	}
 	return last;
 }
 int gfpus_schedule(struct fpvm_fragment *frag, unsigned int *code,
    unsigned int *reg)
 {
 	pfpu_instruction vecout;
 	int res;
 	init_registers(frag, reg);
 	memset(code, 0, PFPU_PROGSIZE*sizeof(*code));
 	init_scheduler(frag);
 	res = schedule(frag, code);
 	free(regs);
 	if (res < 0)
 		return res;
 	if (frag->vector_mode)
 		return res;
 	if (res == PFPU_PROGSIZE)
 		return -1;
 	vecout.w = 0;
 	vecout.i.opcode = FPVM_OPCODE_VECTOUT;
 	code[res] = vecout.w;
 	return res+1;
 }
--- a/m1/perf/try
+++ b/m1/perf/try
@@ -0,0 +1,6 @@
 #!/bin/sh -x
 M1=`make path`
 make CFLAGS_EXTRA=-DCOMP_DEBUG SCHED=sched.o
 gdb --args ./main $M1/flickernoise/patches/*/*Godhead*.fnp