more

2025-04-21 12:27:27 +03:00 · 2009-05-22 22:48:49 +02:00
parent 60ea570aaa
commit 12637f5695
18 changed files with 567 additions and 386 deletions
@@ -6,3 +6,5 @@ uimage
 *.o
 *.cc
 *.hh
+thread0
+thread1
@@ -1,6 +1,6 @@
 load = 0x80000000

-CXXFLAGS = -Wno-unused-parameter -fno-strict-aliasing -fno-builtin -nostdinc -DNUM_THREADS=0
+CXXFLAGS = -Wno-unused-parameter -fno-strict-aliasing -fno-builtin -nostdinc -DNUM_THREADS=0 -I/usr/include
 CPPFLAGS = -O5 -Wa,-mips32
 CROSS = mipsel-linux-gnu-
 CC = $(CROSS)gcc
@@ -22,15 +22,22 @@ PYPP = /usr/bin/pypp
 uimage: all.raw Makefile
 	mkimage -A MIPS -O Linux -C none -a $(load) -e 0x$(shell /bin/sh -c '$(OBJDUMP) -t all | grep __start$$ | cut -b-8') -n "Shevek's kernel" -d $< $@ | sed -e 's/:/;/g'

-arch.%: mips.%
+arch.hh: mips.hh
+	ln -s $< $@ || true
+arch.cc: mips.cc
 	ln -s $< $@ || true

 %.o:%.cc Makefile kernel.hh arch.hh
 	$(CC) $(CPPFLAGS) $(CXXFLAGS) -c $< -o $@

+entry.o: thread0 thread1
+
 %.o:%.S Makefile
 	$(CC) $(CPPFLAGS) -DKERNEL_STACK_SIZE=0x2000 -c $< -o $@

+%: boot-helper.o boot-programs/%.o
+	$(LD) $^ -o $@
+
 # entry.o must be the first file.  boot.o must be the first of the init objects (which can be dumped after loading).
 all: entry.o $(subst .cc,.o,$(kernel_sources)) boot.o $(subst .cc,.o,$(boot_sources))
 	$(LD) --omagic -Ttext $(load) $^ -o $@
@@ -43,6 +50,6 @@ junk = mdebug.abi32 reginfo comment pdr
 	gzip < $< > $@

 clean:
-	rm -f all uimage *.o all.raw.gz arch.hh
+	rm -f all uimage *.o all.raw.gz arch.hh arch.cc

 .PHONY: clean
@@ -11,10 +11,10 @@ bool Memory::use ():
 	return false

 void Memory::unuse ():
-	--used;
+	--used
 	return parent->unuse ()

-void *Memory::palloc ():
+unsigned Memory::palloc ():
 	if !use ():
 		return NULL
 	FreePage *ret = junk_pages
@@ -23,9 +23,9 @@ void *Memory::palloc ():
 		zero_pages = ret->next
 	else:
 		junk_pages = ret->next
-	return ret
+	return (unsigned)ret
 	
-void *Memory::zalloc ():
+unsigned Memory::zalloc ():
 	if !use ():
 		return NULL
 	FreePage *ret = zero_pages
@@ -37,14 +37,14 @@ void *Memory::zalloc ():
 	else:
 		zero_pages = ret->next
 	ret->next = NULL
-	return ret
+	return (unsigned)ret

-void Memory::pfree (void *page):
+void Memory::pfree (unsigned page):
 	FreePage *p = (FreePage *)page
 	p->next = junk_pages
 	junk_pages = p

-void Memory::zfree (void *page):
+void Memory::zfree (unsigned page):
 	FreePage *p = (FreePage *)page
 	p->next = zero_pages
 	zero_pages = p
@@ -125,13 +125,11 @@ void Object_base::free_obj (Memory *parent):
 			self->prev->next = self->next
 		else:
 			parent->frees = self->next
-		parent->pfree (self)
+		parent->pfree ((unsigned)self)

 Page *Memory::alloc_page ():
 	Page *ret = (Page *)search_free (sizeof (Page), (void **)&pages)
-	ret->physical = zalloc ()
-	if !ret->physical:
-		free_page (ret)
+	ret->physical = 0
 	return ret

 Thread *Memory::alloc_thread ():
@@ -0,0 +1,28 @@
+	.globl __start
+
+__start:
+	bal 1f
+	.word _gp
+1:
+	lw $gp, 0($ra)
+	la $v0, __my_receiver
+	sw $a0, ($v0)
+	la $v0, __top_memory
+	sw $a1, ($v0)
+	la $v0, __my_memory
+	sw $a2, ($v0)
+	la $v0, __my_admin
+	sw $a3, ($v0)
+	la $t9, main
+	la $ra, 1f
+	jr $t9
+	nop
+
+1:
+	// Generate an address fault.
+	lw $a0, -4($zero)
+
+	.comm __my_receiver, 4
+	.comm __top_memory, 4
+	.comm __my_memory, 4
+	.comm __my_admin, 4
@@ -0,0 +1,57 @@
+#ifndef __SOS_H
+#define __SOS_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define KERNEL_MASK 0xfff
+#define CAPTYPE_MASK 0xe00
+#define CAPTYPE_ADMIN 0x000
+#define CAPTYPE_RECEIVER 0x200
+#define CAPTYPE_MEMORY 0x400
+#define CAPTYPE_THREAD 0x600
+#define CAPTYPE_PAGE 0x800
+#define CAPTYPE_CAPABILITY 0xa00
+#define CAPTYPE_CAPPAGE 0xc00
+/*#define CAPTYPE_??? 0xe00*/
+
+/* This works on all kernel capabilities.  */
+#define CAP_DEGRADE 0
+
+/* Operations */
+#define CAP_ADMIN_SCHEDULE 1
+/* TODO: add priviledged operations.  */
+
+#define CAP_RECEIVER_SET_OWNER 1
+#define CAP_RECEIVER_CREATE_CAPABILITY 2
+#define CAP_RECEIVER_CREATE_CALL_CAPABILITY 3
+
+#define CAP_MEMORY_CREATE 1
+#define CAP_MEMORY_DESTROY 2
+#define CAP_MEMORY_LIST 3
+#define CAP_MEMORY_MAPPING 4
+#define CAP_MEMORY_DROP 5
+
+#define CAP_THREAD_RUN 1
+#define CAP_THREAD_RUN_CONDITIONAL 2
+#define CAP_THREAD_SLEEP 3
+#define CAP_THREAD_GET_INFO 4	/* Details of this are arch-specific.  */
+#define CAP_THREAD_SET_INFO 5	/* Details of this are arch-specific.  */
+
+#define CAP_PAGE_MAP 1
+#define CAP_PAGE_SHARE 2
+#define CAP_PAGE_SHARE_COW 3
+#define CAP_PAGE_FORGET 4
+
+#define CAP_CAPABILITY_GET 1
+#define CAP_CAPABILITY_SET_DEATH_NOTIFY 2
+
+#define CAP_CAPPAGE_SET 1
+#define CAP_CAPPAGE_GET 2
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
@@ -0,0 +1,6 @@
+#pypp 0
+#include "sos.h"
+
+int main ():
+	while true:
+		__asm__ volatile ("move $v0, $zero; move $a0, $zero ; move $a1, $zero ; move $a2, $zero ; syscall")
@@ -0,0 +1,6 @@
+#pypp 0
+#include "sos.h"
+
+int main ():
+	while true:
+		__asm__ volatile ("move $v0, $zero; li $a0, 1 ; move $a1, $a0 ; move $a2, $a0 ; syscall")
@@ -2,6 +2,7 @@
 	.lcomm kernel_stack, KERNEL_STACK_SIZE

 	.globl __start
+	.globl thread_start
 	.set noreorder

 #define Status 12
@@ -43,3 +44,8 @@ start_hack_for_disassembler:
 	la $t9, init
 	jr $t9
 	nop
+
+tread_start:
+	.word thread0
+	.word thread1
+	.word thread2
@@ -4,5 +4,5 @@

 // This is needed to make gcc happy to compile c++ code without
 // its standard library.
-char __gxx_personality_v0[] = "hack";
+char __gxx_personality_v0[] = "hack"

@@ -32,7 +32,15 @@
 #define SAVE_T7 (SAVE_T6 + 4)
 #define SAVE_T8 (SAVE_T7 + 4)
 #define SAVE_T9 (SAVE_T8 + 4)
-#define SAVE_GP (SAVE_T9 + 4)
+#define SAVE_S0 (SAVE_T9 + 4)
+#define SAVE_S1 (SAVE_S0 + 4)
+#define SAVE_S2 (SAVE_S1 + 4)
+#define SAVE_S3 (SAVE_S2 + 4)
+#define SAVE_S4 (SAVE_S3 + 4)
+#define SAVE_S5 (SAVE_S4 + 4)
+#define SAVE_S6 (SAVE_S5 + 4)
+#define SAVE_S7 (SAVE_S6 + 4)
+#define SAVE_GP (SAVE_S7 + 4)
 #define SAVE_FP (SAVE_GP + 4)
 #define SAVE_RA (SAVE_FP + 4)
 #define SAVE_HI (SAVE_RA + 4)
@@ -44,12 +52,13 @@ addr_000:
 	// TLB refill
 	// TODO: this should probably be assembly-only for speed reasons

-	li $a0, 0xffff0000
+	//mfc0 $a0, $EPC
+	li $a0, 0x11992288
 	la $t9, panic
 	jr $t9
 	nop

-	sw $ra, -0x188($zero)
+	sw $ra, -0xd88($zero)
 	bal save_regs
 	la $t9, tlb_refill
 	jr $t9
@@ -64,7 +73,7 @@ addr_100:
 	jr $t9
 	nop

-	sw $ra, -0x188($zero)
+	sw $ra, -0xd88($zero)
 	bal save_regs
 	la $t9, cache_error
 	jr $t9
@@ -72,7 +81,7 @@ addr_100:
 	.fill 0x180 - (. - addr_000)
 addr_180:
 	// General exception
-	sw $ra, -0x188($zero)
+	sw $ra, -0xd88($zero)
 	bal save_regs
 	la $t9, exception
 	jr $t9
@@ -87,7 +96,7 @@ addr_200:
 	jr $t9
 	nop

-	sw $ra, -0x188($zero)
+	sw $ra, -0xd88($zero)
 	bal save_regs
 	la $t9, interrupt
 	jr $t9
@@ -95,10 +104,10 @@ addr_200:
 	.fill 0x280 - (. - addr_000) - 16

 	// space for save_regs: k0; current Thread; ra; gp
-	.word 0
-	.word 0
-	.word 0
-	.word _gp
+	.word 0			// -d90 == k0
+	.word idle		// -d8c == current
+	.word 0			// -d88	== ra
+	.word _gp		// -d84 == gp

 	.word idle_page		// 280
 	.word 0x80000000	// 284  A pointer to the current page.
@@ -106,7 +115,7 @@ start_idle:			// 288
 	// Wait for the next interrupt, then the first thread will be scheduled.
 	// It is impractical to try to call schedule, because for that the
 	// idle task would need to own capabilities.
-	mfc0 $a0, $9
+	move $v0, $zero
 	syscall
 1:	wait
 	b 1b
@@ -135,32 +144,41 @@ kernel_exit:
 	lw $t7, SAVE_T7($v0)
 	lw $t8, SAVE_T8($v0)
 	lw $t9, SAVE_T9($v0)
-	lw $gp, SAVE_GP($v0)
+	lw $s0, SAVE_S0($v0)
+	lw $s1, SAVE_S1($v0)
+	lw $s2, SAVE_S2($v0)
+	lw $s3, SAVE_S3($v0)
+	lw $s4, SAVE_S4($v0)
+	lw $s5, SAVE_S5($v0)
+	lw $s6, SAVE_S6($v0)
+	lw $s7, SAVE_S7($v0)
 	lw $sp, SAVE_SP($v0)
 	lw $fp, SAVE_FP($v0)
 	lw $ra, SAVE_RA($v0)
 	lw $at, SAVE_AT($v0)
 	lw $k0, SAVE_K0($v0)
 	lw $k1, SAVE_V0($v0)
-	sw $k1, -0x190($zero)
+	sw $k1, -0xd90($zero)
 	lw $k1, SAVE_K1($v0)
-	sw $v0, -0x18c($zero)
-	lw $v0, -0x190($zero)
+	sw $v0, -0xd8c($zero)
+	lw $gp, SAVE_GP($v0)
+	lw $v0, -0xd90($zero)
 	eret

 save_regs:
-	sw $k0, -0x190($zero)
-	lw $k0, -0x18c($zero)
+	sw $k0, -0xd90($zero)
+	lw $k0, -0xd8c($zero)
+
 	sw $at, SAVE_AT($k0)
 	sw $gp, SAVE_GP($k0)
 	sw $sp, SAVE_SP($k0)
 	sw $fp, SAVE_FP($k0)

 	sw $k1, SAVE_K1($k0)
-	lw $k1, -0x190($zero)
+	lw $k1, -0xd90($zero)
 	sw $k1, SAVE_K0($k0)

-	lw $k1, -0x188($zero)
+	lw $k1, -0xd88($zero)
 	sw $k1, SAVE_RA($k0)
 	sw $v0, SAVE_V0($k0)
 	sw $v1, SAVE_V1($k0)
@@ -178,6 +196,14 @@ save_regs:
 	sw $t7, SAVE_T7($k0)
 	sw $t8, SAVE_T8($k0)
 	sw $t9, SAVE_T9($k0)
+	sw $s0, SAVE_S0($k0)
+	sw $s1, SAVE_S1($k0)
+	sw $s2, SAVE_S2($k0)
+	sw $s3, SAVE_S3($k0)
+	sw $s4, SAVE_S4($k0)
+	sw $s5, SAVE_S5($k0)
+	sw $s6, SAVE_S6($k0)
+	sw $s7, SAVE_S7($k0)
 	mfhi $v0
 	mflo $v1
 	sw $v0, SAVE_HI($k0)
@@ -185,9 +211,20 @@ save_regs:
 	mfc0 $k1, $EPC
 	sw $k1, SAVE_PC($k0)

-	lw $gp, -0x184($zero)
+	lw $gp, -0xd84($zero)
 	la $sp, kernel_stack + KERNEL_STACK_SIZE
 	move $t9, $ra
 	la $ra, kernel_exit
 	jr $t9
 	move $a0, $k0
+
+	.globl thread0
+	.globl thread1
+	.globl thread2
+thread0:
+	.balign 0x1000
+	.incbin "thread0"
+thread1:
+	.balign 0x1000
+	.incbin "thread1"
+thread2:
@@ -1,26 +1,9 @@
 #pypp 0
 // Also declare things which only work during kernel init.
 #define INIT
+#define ARCH
 #include "kernel.hh"
-
-#define reg_hack(x) #x
-#define cp0_get(reg, sel, target) do { __asm__ volatile ("mfc0 %0, $" reg_hack(reg) ", " #sel : "=r" (target)); } while (0)
-#define cp0_set(reg, value) do { __asm__ volatile ("mtc0 %0, $" reg_hack(reg) :: "r" (value)); } while (0)
-#define cp0_set0(reg) do { __asm__ volatile ("mtc0 $zero, $" reg_hack(reg)); } while (0)
-
-// cp0 registers.
-#define CP0_INDEX 0
-#define CP0_ENTRY_LO0 2
-#define CP0_ENTRY_LO1 3
-#define CP0_PAGE_MASK 5
-#define CP0_WIRED 6
-#define CP0_COUNT 9
-#define CP0_ENTRY_HI 10
-#define CP0_COMPARE 11
-#define CP0_STATUS 12
-#define CP0_CAUSE 13
-#define CP0_EPC 14
-#define CP0_CONFIG 16
+#include "elf.h"

 static void init_idle ():
 	// initialize idle task as if it is currently running.
@@ -48,7 +31,7 @@ static void init_idle ():
 	idle_page.next_obj = NULL
 	idle_page.prev = NULL
 	idle_page.next = NULL
-	idle_page.physical = (void *)0
+	idle_page.physical = 0

 static void init_cp0 ():
 	// Set timer to a defined value
@@ -67,21 +50,19 @@ static void init_cp0 ():
 	cp0_set0 (CP0_ENTRY_LO0)
 	cp0_set0 (CP0_ENTRY_LO1)
 	// Get number of tlb entries (is 31).
-	unsigned num;
-	cp0_get (CP0_CONFIG, 1, num)
+	unsigned num
+	cp0_get (CP0_CONFIG1, num)
 	num >>= 25
 	num &= 0x3f
 	// Clear the tlb.
-	#if 0
-	for unsigned i = 1; i < num; ++i:
-		// this address doesn't reach the tlb, so it can't trigger exceptions.
-		cp0_set (CP0_ENTRY_HI, 0x70000000 + 0x1000 * i)
+	for unsigned i = 1; i <= num; ++i:
+		// with asid 0, no page faults will be triggered, so it's safe to map memory anywhere.
+		cp0_set (CP0_ENTRY_HI, 0x2000 * i)
 		cp0_set (CP0_INDEX, i)
 		// write the data.
 		__asm__ volatile ("tlbwi")
-	#endif
 	// Fill the upper page in kseg3.
-	cp0_set (CP0_ENTRY_HI, 0xfffff000)
+	cp0_set (CP0_ENTRY_HI, 0xffffe000)
 	cp0_set (CP0_ENTRY_LO0, 0x1d)
 	cp0_set (CP0_ENTRY_LO1, 0x1f)
 	cp0_set0 (CP0_INDEX)
@@ -101,9 +82,72 @@ static void init_threads ():
 	for unsigned i = 0; i < NUM_THREADS; ++i:
 		Memory *mem = top_memory.alloc_memory ()
 		Thread *thread = mem->alloc_thread ()
-		// TODO
+		Page **pages = (Page **)mem->palloc ()
+		Elf32_Ehdr *header = (Elf32_Ehdr *)thread_start[i]
+		for unsigned j = 0; j < SELFMAG; ++j:
+			if header->e_ident[j] != ELFMAG[j]:
+				panic (i * 0x1000 + j, "invalid ELF magic")
+		if header->e_ident[EI_CLASS] != ELFCLASS32:
+			panic (i * 0x1000 + EI_CLASS, "invalid ELF class")
+		if header->e_ident[EI_DATA] != ELFDATA2LSB:
+			panic (i * 0x1000 + EI_DATA, "invalid ELF data")
+		if header->e_ident[EI_VERSION] != EV_CURRENT:
+			panic (i * 0x1000 + EI_VERSION, "invalid ELF version")
+		if header->e_type != ET_EXEC:
+			panic (i * 0x1000 + 0x10, "invalid ELF type")
+		if header->e_machine != EM_MIPS_RS3_LE:
+			panic (i * 0x1000 + 0x10, "invalid ELF machine")
+		thread->pc = header->e_entry
+		thread->sp = 0x80000000
+		for unsigned section = 0; section < header->e_shnum; ++section:
+			Elf32_Shdr *shdr = (Elf32_Shdr *)(thread_start[i] + header->e_shoff + section * header->e_shentsize)
+			if !(shdr->sh_flags & SHF_ALLOC):
+				continue
+			bool writable = shdr->sh_flags & SHF_WRITE
+			//bool executable = shdr->sh_flags & SHF_EXEC_INSTR
+			if shdr->sh_type != SHT_NOBITS:
+				for unsigned p = (shdr->sh_addr & PAGE_MASK); p <= ((shdr->sh_addr + shdr->sh_size - 1) & PAGE_MASK); p += PAGE_SIZE:
+					unsigned idx = (p - (shdr->sh_addr & PAGE_MASK)) >> PAGE_BITS
+					if !pages[idx]:
+						pages[idx] = mem->alloc_page ()
+						pages[idx]->physical = thread_start[i] + (idx << PAGE_BITS)
+						++top_memory.limit
+					if !mem->map (pages[idx], p, writable):
+						panic (0x22446688, "unable to map initial page")
+			else:
+				for unsigned p = (shdr->sh_addr & PAGE_MASK); p <= ((shdr->sh_addr + shdr->sh_size - 1) & PAGE_MASK); p += PAGE_SIZE:
+					Page *page = mem->get_mapping (p)
+					if !page:
+						page = mem->alloc_page ()
+						if !page:
+							panic (0x00220022, "out of memory")
+						page->physical = mem->zalloc ()
+						if !page->physical || !mem->map (page, p, true):
+							panic (0x33557799, "unable to map initial bss page")
+					else:
+						for unsigned a = p; a < p + PAGE_SIZE; a += 4:
+							if a >= shdr->sh_addr + shdr->sh_size:
+								break
+							if a < shdr->sh_addr:
+								continue
+							*(unsigned *)a = 0
+		for unsigned p = 0; p <= ((thread_start[i + 1] - thread_start[i] - 1) >> PAGE_BITS); ++p:
+			if pages[p]:
+				continue
+			++top_memory.limit
+			top_memory.zfree (thread_start[i] + (p << PAGE_BITS))
+		Page *stackpage = mem->alloc_page ()
+		stackpage->physical = mem->zalloc ()
+		if !stackpage || !mem->map (stackpage, 0x7ffff000, true):
+			panic (0x13151719, "unable to map initial stack page")
+		thread->arch.a0 = (unsigned)mem->alloc_receiver ()
+		thread->arch.a1 = (unsigned)&top_memory
+		thread->arch.a2 = (unsigned)mem
+		Capability *admin = mem->alloc_capability ((Receiver *)(CAPTYPE_ADMIN | ~PAGE_MASK), &mem->capabilities, ~0)
+		thread->arch.a3 = (unsigned)admin
+		mem->pfree ((unsigned)pages)

-/// Initialize the kernel, finish by falling into the idle task.
+// Initialize the kernel, finish by falling into the idle task.
 extern unsigned _end
 void init ():
 	// Initialize kernel variables to empty.
@@ -127,6 +171,7 @@ void init ():
 	top_memory.next_obj = NULL
 	top_memory.prev = NULL
 	top_memory.next = NULL
+	top_memory.parent = NULL
 	top_memory.pages = NULL
 	top_memory.threads = NULL
 	top_memory.memories = NULL
@@ -1,8 +1,10 @@
 #pypp 0
+#define ARCH
 #include "kernel.hh"

 /// A TLB miss has occurred.  This should eventually move to entry.S.
 Thread *tlb_refill (Thread *current, unsigned EntryHi):
+	panic (0x88776655, "TLB refill")
 	Page *page0 = current->address_space->get_mapping (EntryHi & ~(1 << 12))
 	Page *page1 = current->address_space->get_mapping (EntryHi | (1 << 12))
 	if (!(EntryHi & (1 << 12)) && !page0) || ((EntryHi & (1 << 12)) && !page1):
@@ -16,13 +18,15 @@ Thread *tlb_refill (Thread *current, unsigned EntryHi):
 		low1 = (unsigned)page1->physical | 0x18 | 0x4 | 0x2
 	else
 		low1 = 0
-	__asm__ volatile ("mtc0 %0, $2; mtc0 %1, $3; tlbwr" :: "r"(low0), "r"(low1))
+	cp0_set (CP0_ENTRY_LO0, low0)
+	cp0_set (CP0_ENTRY_LO1, low1)
+	__asm__ volatile ("tlbwr")
 	return current

 /// An interrupt which is not an exception has occurred.
 Thread *interrupt (Thread *current):
 	unsigned cause
-	__asm__ volatile ("mfc0 %0, $13" : "=r"(cause))
+	cp0_get (CP0_CAUSE, cause)
 	for unsigned i = 0; i < 8; ++i:
 		if cause & (1 << (i + 8)):
 			// TODO: Handle interrupt.
@@ -37,6 +41,7 @@ Thread *interrupt (Thread *current):
 /// A general exception has occurred.
 Thread *exception (Thread *current):
 	unsigned cause
+	led (true, true, true)
 	__asm__ volatile ("mfc0 %0, $13" : "=r"(cause))
 	switch (cause >> 2) & 0x1f:
 		case 0:
@@ -44,56 +49,61 @@ Thread *exception (Thread *current):
 			panic (0x11223344, "Interrupt.")
 		case 1:
 			// TLB modification.
-			panic (0x11223344, "TLB modification.")
+			panic (0x21223344, "TLB modification.")
 		case 2:
+			unsigned a
+			cp0_get (CP0_EPC, a)
+			panic (a)
 			// TLB load or instruction fetch.
-			panic (0x11223344, "TLB load or instruction fetch.")
+			panic (0x31223344, "TLB load or instruction fetch.")
 		case 3:
 			// TLB store.
-			panic (0x11223344, "TLB store.")
+			panic (0x41223344, "TLB store.")
 		case 4:
 			// Address error load or instruction fetch.
-			panic (0x11223344, "Address error load or instruction fetch.")
+			panic (0x51223344, "Address error load or instruction fetch.")
 		case 5:
 			// Address error store.
-			panic (0x11223344, "Address error store.")
+			panic (0x61223344, "Address error store.")
 		case 6:
 			// Bus error instruction fetch.
-			panic (0x11223344, "Bus error instruction fetch.")
+			panic (0x71223344, "Bus error instruction fetch.")
 		case 7:
 			// Bus error load or store.
-			panic (0x11223344, "Bus error load or store.")
+			panic (0x81223344, "Bus error load or store.")
 		case 8:
 			// Syscall.
+			// DEBUG: allow new exceptions.
+			//cp0_set (CP0_STATUS, 0x1000ff00)
 			Thread_arch_invoke ()
 			return current
 		case 9:
 			// Breakpoint.
-			panic (0x11223344, "Breakpoint.")
+			panic (0x91223344, "Breakpoint.")
 		case 10:
 			// Reserved instruction.
-			panic (0x11223344, "Reserved instruction.")
+			panic (0xa1223344, "Reserved instruction.")
 		case 11:
 			// Coprocessor unusable.
-			panic (0x11223344, "Coprocessor unusable.")
+			panic (0xb1223344, "Coprocessor unusable.")
 		case 12:
 			// Arithmetic overflow.
-			panic (0x11223344, "Arithmetic overflow.")
+			panic (0xc1223344, "Arithmetic overflow.")
 		case 13:
 			// Trap.
-			panic (0x11223344, "Trap.")
+			panic (0xe1223344, "Trap.")
 		case 15:
 			// Floating point exception.
-			panic (0x11223344, "Floating point exception.")
+			panic (0xf1223344, "Floating point exception.")
 		case 23:
 			// Reference to WatchHi/WatchLo address.
-			panic (0x11223344, "Reference to WatchHi/WatchLo address.")
+			panic (0xf2223344, "Reference to WatchHi/WatchLo address.")
 		case 24:
 			// Machine check.
-			panic (0x11223344, "Machine check.")
+			panic (0xf3223344, "Machine check.")
 		case 30:
 			// Cache error (EJTAG only).
-			panic (0x11223344, "Cache error (EJTAG only).")
+			panic (0xf4223344, "Cache error (EJTAG only).")
 		case 14:
 		case 16:
 		case 17:
@@ -109,7 +119,7 @@ Thread *exception (Thread *current):
 		case 29:
 		case 31:
 			// Reserved.
-			panic (0x11223344, "Reserved.")
+			panic (0xf5223344, "Reserved.")
 	return current

 /// There's a cache error.  Big trouble.  Probably not worth trying to recover.
@@ -2,6 +2,8 @@
 #ifndef _KERNEL_HH
 #define _KERNEL_HH

+#include "boot-programs/sos.h"
+
 #ifndef EXTERN
 #define EXTERN extern
 #endif
@@ -38,7 +40,7 @@ bool Object_base::is_free ():
 	return ((Free *)this)->marker == ~0

 struct Page : public Object <Page>:
-	void *physical
+	unsigned physical

 struct Thread : public Object <Thread>:
 	Memory *address_space
@@ -84,10 +86,10 @@ struct Memory : public Object <Memory>:
 	// Allocation of pages.
 	bool use ()
 	void unuse ()
-	void *palloc ()
-	void *zalloc ()
-	void pfree (void *page)
-	void zfree (void *page)
+	unsigned palloc ()
+	unsigned zalloc ()
+	void pfree (unsigned page)
+	void zfree (unsigned page)

 	// Allocation routines for kernel structures
 	void *search_free (unsigned size, void **first)
@@ -110,9 +112,10 @@ struct Memory : public Object <Memory>:
 // Functions which can be called from assembly must not be mangled.
 extern "C":
 	// Panic.  n is sent over caps led.  message is currently ignored.
-	void panic (unsigned n, char const *message)
+	void panic (unsigned n, char const *message = "")
 	// Debug: switch caps led
 	void led (bool one, bool two, bool three)
+	void dbg_sleep (unsigned ms)

 void schedule ()

@@ -136,6 +139,7 @@ void Memory_arch_free (Memory *mem)
 bool Memory_arch_map (Memory *mem, Page *page, unsigned address, bool write)
 void Memory_arch_unmap (Memory *mem, Page *page, unsigned address)
 Page *Memory_arch_get_mapping (Memory *mem, unsigned address)
+void arch_schedule (Thread *previous, Thread *target)

 bool Memory::map (Page *page, unsigned address, bool write):
 	return Memory_arch_map (this, page, address, write)
@@ -1,4 +1,5 @@
 #pypp 0
+#define ARCH
 #include "kernel.hh"

 void Thread_arch_init (Thread *thread):
@@ -29,9 +30,8 @@ void Thread_arch_init (Thread *thread):

 void Memory_arch_init (Memory *mem):
 	++g_asid
-	g_asid &= 0x3f
-	if !g_asid:
-		++g_asid
+	if g_asid > 0x3f:
+		g_asid = 1
 	mem->arch.asid = g_asid
 	mem->arch.directory = NULL

@@ -48,10 +48,10 @@ void Memory_arch_free (Memory *mem):
 				continue
 			mem->unmap (page, i * 0x1000 * 0x400 + j * 0x1000)
 		mem->unuse ()
-		mem->zfree (table)
+		mem->zfree ((unsigned)table)
 		mem->arch.directory[i] = NULL
 	mem->unuse ()
-	mem->zfree (mem->arch.directory)
+	mem->zfree ((unsigned)mem->arch.directory)

 bool Memory_arch_map (Memory *mem, Page *page, unsigned address, bool write):
 	unsigned *table = mem->arch.directory[(unsigned)address >> 22]
@@ -75,10 +75,15 @@ Page *Memory_arch_get_mapping (Memory *mem, unsigned address):

 void Thread_arch_invoke ():
 	Capability *target, *c0, *c1, *c2, *c3
-	target = current->address_space->find_capability (current->arch.v0)
+	if current:
+		target = current->address_space->find_capability (current->arch.v0)
+	else:
+		target = NULL
 	if !target:
 		// TODO: there must be no action here.  This is just because the rest doesn't work yet.
-		led (current->arch.a0, current->arch.a1, current->arch.a2)
+		if current:
+			led (current->arch.a0, current->arch.a1, current->arch.a2)
+			dbg_sleep (1000)
 		schedule ()
 		return
 	c0 = current->address_space->find_capability (current->arch.a0)
@@ -86,3 +91,11 @@ void Thread_arch_invoke ():
 	c2 = current->address_space->find_capability (current->arch.a2)
 	c3 = current->address_space->find_capability (current->arch.a3)
 	target->invoke (current->arch.t0, current->arch.t1, current->arch.t2, current->arch.t3, c0, c1, c2, c3)
+
+void arch_schedule (Thread *previous, Thread *target):
+	if target:
+		cp0_set (CP0_ENTRY_HI, target->address_space->arch.asid)
+	else:
+		// The idle tasks asid is 0.
+		cp0_set (CP0_ENTRY_HI, 0)
+	// TODO: flush TLB if the asid is already taken.
@@ -2,6 +2,48 @@
 #ifndef _ARCH_HH
 #define _ARCH_HH

+#ifdef ARCH
+#define reg_hack(x...) #x
+#define cp0_get(reg, target) do { __asm__ volatile ("mfc0 %0, $" reg_hack(reg) : "=r" (target)); } while (0)
+#define cp0_set(reg, value) do { __asm__ volatile ("mtc0 %0, $" reg_hack(reg) :: "r" (value)); } while (0)
+#define cp0_set0(reg) do { __asm__ volatile ("mtc0 $zero, $" reg_hack(reg)); } while (0)
+
+// cp0 registers.
+#define CP0_INDEX 0
+#define CP0_RANDOM 1
+#define CP0_ENTRY_LO0 2
+#define CP0_ENTRY_LO1 3
+#define CP0_CONTEXT 4
+#define CP0_PAGE_MASK 5
+#define CP0_WIRED 6
+#define CP0_BAD_V_ADDR 8
+#define CP0_COUNT 9
+#define CP0_ENTRY_HI 10
+#define CP0_COMPARE 11
+#define CP0_STATUS 12
+#define CP0_CAUSE 13
+#define CP0_EPC 14
+#define CP0_P_R_ID 15
+#define CP0_CONFIG 16
+#define CP0_CONFIG1 16, 1
+#define CP0_CONFIG2 16, 2
+#define CP0_CONFIG3 16, 3
+#define CP0_L_L_ADDR 17
+#define CP0_WATCH_LO 18
+#define CP0_WATCH_HI 19
+#define CP0_DEBUG 23
+#define CP0_DEPC 24
+#define CP0_PERF_CNT 25
+#define CP0_ERR_CTL 26
+#define CP0_CACHE_ERR 27
+#define CP0_TAG_LO 28, 0
+#define CP0_DATA_LO 28, 1
+#define CP0_TAG_HI 29, 0
+#define CP0_DATA_HI 29, 1
+#define CP0_ERROR_EPC 30
+#define CP0_DESAVE 31
+#endif
+
 #define PAGE_BITS (12)
 #define PAGE_SIZE (1 << PAGE_BITS)
 #define PAGE_MASK (~(PAGE_SIZE - 1))
@@ -9,6 +51,7 @@
 struct Thread_arch:
 	unsigned at, v0, v1, a0, a1, a2, a3
 	unsigned t0, t1, t2, t3, t4, t5, t6, t7, t8, t9
+	unsigned s0, s1, s2, s3, s4, s5, s6, s7
 	unsigned gp, fp, ra, hi, lo, k0, k1

 struct Memory_arch:
@@ -32,4 +75,9 @@ extern "C":
 	void run_idle (Thread *self)
 	#endif

+#ifdef INIT
+// This is "extern", not "EXTERN", because it really is defined elsewhere.
+extern unsigned thread_start[NUM_THREADS + 1]
+#endif
+
 #endif
@@ -1,311 +1,218 @@
 \documentclass{shevek}
 \begin{document}
-\title{Writing a kernel from scratch}
+\title{Overview of my kernel}
 \author{Bas Wijnen}
 \date{\today}
 \maketitle
 \begin{abstract}
-This is a report of the process of writing a kernel from scratch for
-the cheap (€150) Trendtac laptop.  In a following report I shall write about
-the operating system on top of it.  It is written while writing the system, so
-that no steps are forgotten.  Choices are explained and problems (and their
-solutions) are shown.  After reading this, you should have a thorough
-understanding of the kernel, and (with significant effort) be able to write a
-similar kernel yourself.  This document assumes a working Debian system with
-root access (for installing packages), and some knowledge about computer
-architectures.  (If you lack that knowledge, you can try to read it anyway and
-check other sources when you see something new.)
+This document briefly describes the inner workings of my kernel, including the
+reasons for the choices that were made.  It is meant to be understandable (with
+effort) for people who know nothing of operating systems.  On the other hand,
+it should also be readable for people who know about computer architecture, but
+want to know about this kernel.
 \end{abstract}

 \tableofcontents

-\section{Hardware details}
-The first step in the process of writing an operating system is finding out
-what the system is you're going to program for.  While most of the work is
-supposed to be platform--independant, some parts, especially in the beginning,
-will depend very much on the actual hardware.  So I searched the net and found:
+\section{Operating systems}
+This section describes what the purpose of an operating system is, and defines
+what I call an ``operating system''\footnote{Different people use very
+different definitions, so this is not as trivial as it sounds.}.  It also goes
+into some detail about microkernels and capabilities.  If you already know, you
+can safely skip this section.  It contains no information about my kernel.
+
+\subsection{The goal of an operating system}
+In the 1980s, a computer could only run one program at a time.  When the
+program had finished, the next one could be started.  This follows the
+processor itself: it runs a program, from the beginning until the end, and
+can't run more than one program simultaneously\footnote{Multi-core processors
+technically can run multiple programs simultaneously, but I'm not talking about
+those here.}.  In those days, an \textit{operating system} was the program that
+allowed other programs to be started.  The best known operating systems were
+called \textit{Disk operating system}, or \textit{DOS} (of which there were
+several).
+
+At some point, there was a need for programs that would ``help'' other programs
+in some way.  For example, they could provide a calculator which would pop up
+when the user pressed a certain key combination.  Such programs were called
+\textit{terminate and stay resident} programs, or TSRs.  This name came from
+the fact that they terminated, in the sense that they would allow the next
+program to be run, but they would stay resident and do their job in the
+background.
+
+At some point, people wanted to de \textit{multitasking}.  That is, multiple
+``real'' programs should run concurrently, not just some helpers.  The easiest
+way to implement this is with \textit{cooperative multitasking}.  Every program
+returns control to the system every now and then.  The system switches between
+all the running programs.  The result is that every program runs for a short
+time, several times per second.  For the user, this looks like the programs are
+all running simultaneously, while in reality it is similar to a chess master
+playing simultaneously on many boards: he really plays on one board at a time,
+but switches a lot.  On such a system, the \textit{kernel} is the program that
+chooses which program to run next.  The \textit{operating system} is the kernel
+plus some support programs which allow the user to control the system.
+
+On a system where multiple programs all think they ``own'' the computer, there
+is another problem: if more than one program tries to access the same device,
+it is very likely that at least one of them, and probably both, will fail.  For
+this reason, \textit{device drivers} on a multitasking system must not only
+allow the device to be controlled, but they must also make sure that concurrent
+access doesn't fail.  The simplest way to achieve this is simply to disallow
+it (let all operations fail that don't come from the first program using the
+driver).  A better way, if the device can handle it, is to somehow make sure
+that both work.
+
+There is one problem with cooperative multitasking: when one program crashes,
+or for some other reason doesn't return control to the system, the other
+programs stop running as well.  The solution to this is \textit{preemptive
+multitasking}.  This means that every program is interrupted every now and
+then, without asking for it, and the system switches to a different program.
+This makes the kernel slightly more complex, because it must take care to store
+every aspect of the running programs.  After all, the program doesn't expect to
+be interrupted, so it can't expect its state to change either.  This shouldn't
+be a problem though.  It's just something to remember when writing the kernel.
+
+Concluding, every modern desktop kernel uses preemptive multitasking.  This
+requires a timer interrupt.  The operating system consists of this kernel, plus
+the support programs that allow the user to control the system.
+
+\subsection{Microkernel}
+Most modern kernels are so-called \textit{monolithic} kernels: they include
+most of the operating system.  In particular, they include the device drivers.
+This is useful, because the device drivers need special attention anyway, and
+they are very kernel-specific.  Modern processors allow the kernel to protect
+access to the hardware, so that programs can't interfere with each other.  A
+device driver which doesn't properly ask the kernel will simply not be allowed
+to control the device.
+
+However, adding device drivers and everything that comes with them
+(filesystems, for example) to the kernel makes it a very large program.
+Furthermore, it makes it an ever-changing program: as new devices are built,
+new drivers must be added.  Such a program can never become stable and
+bug-free.
+
+Conceptually much nicer is the microkernel.  It includes the minimum that is
+needed for a kernel, and nothing more.  It does include task switching and some
+mehtod for tasks to communicate with each other.  It also ``handles'' hardware
+interrupts, but all it really does is passing them to the device driver, which
+is mostly a normal program.  Some microkernels don't do memory manangement
+(deciding which programs get how much and which memory), while others do.
+
+The drawback of a microkernel is that it requires much more communication
+between tasks.  Where a monolithic kernel can serve a driver request from a
+task directly, a microkernel must pass it to a device driver.  Usually there
+will be an answer, which must be passed back to the task.  This means more task
+switches.  This doesn't need to be a big problem, if task switching is
+optimized: because of the simpler structure of the microkernel, it can be much
+faster at this than a monolithic kernel.  And even if the end result is
+slightly slower, in my opinion the stability is still enough reason to prefer a
+microkernel over a monolitic one.
+
+Summarizing, a microkernel needs to do task switching and inter-process
+communication.  Because mapping memory into an address space is closely related
+to task switching, it is possible to include memory management as well.  The
+kernel must accept hardware interrupts, but doesn't handle them (except the
+timer interrupt).
+
+\subsection{Capabilities}
+Above I explained that the kernel must allow processes to communicate.  Many
+systems allow communication through the filesystem: one process writes to a
+file, and an other process reads from it.  This implies that any process can
+communicate with any other process, if they only have a place to write in the
+filesystem, where the other can read.
+
+This is a problem because of security.  If a process cannot communicate with
+any part of the system, except the parts that it really needs to perform its
+operation, it cannot leak or damage the other parts of the system either.  The
+reason that this is relevant is not that users will run programs that try to
+ruin their system (although this may happen as well), but that programs may
+break and damage random parts of the system, or be taken over by crackers.  If
+the broken or malicious process has fewer rights, it will also do less damage
+to the system.
+
+This leads to the goal of giving each process as little rights as possible.
+For this, it is best to have rights in a very fine-grained way.  Every
+operation of a driver (be it a hardware device driver, or just a shared program
+such as a file system) should have its own key, which can be given out without
+giving keys to the entire driver (or even multiple drivers).  Such a key is
+called a capability.
+
+Some operations are performed directly on the kernel itself.  For those, the
+kernel can provide its own capabilities.  Processes can create their own
+objects which can receive capability calls, and capabilities for those can be
+generated by them.  Processes can copy capabilities to other processes, if they
+have a channel to send them (using an existing capability).  This way, any
+operation of the process with the external world goes through a capability, and
+only one system call is needed, namely \textit{invoke}.
+
+This has a very nice side-effect, namely that it becomes very easy to tap
+communication of a task you control.  This means that a user can redirect
+certain requests from programs which don't do exactly what is desired to do
+nicer things.  For example, a program can be prevented from opening pop-up
+windows.  In other words, it puts control of the computer from the programmer
+into the hands of the user (as far as allowed by the system administrator).
+This is a very good thing.
+
+\section{Kernel objects}
+This section describes all the kernel objects, and the operations that can be
+performed on them.
+
+\subsection{Memory}
+A memory object is a container for storing things.  All objects live inside a
+memory object.  A memory object can contain other memory objects, capabilities,
+receivers, threads and pages.
+
+A memory object is also an address space.  Pages can be mapped (and unmapped).
+Any Thread in a memory object uses this address space while it is running.
+
+Every memory object has a limit.  When this limit is reached, no more pages can
+be allocated for it (including pages which it uses to store other objects).
+Using a new page in a memory object implies using it in all ancestor memory
+objects.  This means that setting a limit which is higher than the parent's
+limit means that the parent's limit applies anyway.
+
+Operations on memory objects:
 \begin{itemize}
-\item There's a \textbf{Jz4730} chip inside, which implements most
-functionality.  It has a mips core, an OHCI USB host controller (so no USB2),
-an AC97 audio device, a TFT display controller, an SD card reader, a network
-device, and lots of general purpose I/O pins, which are used for the LEDs and
-the keyboard.  There are also two PWM outputs, one of which seems to be used
-with the display.  It also has some other features, such as a digital camera
-controller, which are not used in the design.
-\item There's a separate 4-port USB hub inside.
-\item There's a serial port which is accessible with a tiny connector inside
-the battery compartiment.  It uses TTL signals, so to use it with a PC serial
-port, the signals must be converted with a MAX232.  That is normal for these
-boards, so I already have one handy.  The main problem in this case is that the
-connector is an unusual one, so it may take some time until I can actually
-connect things to the serial port.
+\item
 \end{itemize}

-First problem is how to write code which can be booted.  This seems easy: put a
-file named \textbf{uimage} on the first partition on an SD card, which must be
-formatted FAT or ext3, and hold down Fn, left shift and left control while
-booting.  The partition must also not be larger than 32 MB.
+\subsection{Page}
+A page can be used to store user data.  It can be mapped into an address space (a memory object).  Threads can then use the data directly.

-The boot program is u-boot, which has good documentation on the web.  Also,
-there is a Debian package named uboot-mkimage, which has the mkimage executable
-to create images that can be booted using u-boot.  uimage should be in this
-format.
+A page has no operations of itself; mapping a page is achieved using an
+operation on a memory object.

-To understand at least something of addresses, it's important to understand the
-memory model of the mips architecture:
+\subsection{Receiver}
+A receiver object is used for inter-process communication.  Capabilities can be
+created from it.  When those are invoked, the receiver can be used to retrieve
+the message.
+
+Operations on receiver objects:
 \begin{itemize}
-\item usermode code will never reference anything in the upper half of the memory (above 0x80000000).  If it does, it receives a segmentation fault.
-\item access in the lower half is paged and can be cached.  This is called
-kuseg when used from kernel code.  It will access the same pages as non-kernel
-code finds there.
-\item the upper half is divided in 3 segments.
-\item kseg0 runs from 0x80000000 to 0xa0000000.  Access to this memory will
-access physical memory from 0x00000000 to 0x20000000.  It is cached, but not
-mapped (meaning it accesses physical, not virtual, memory)
-\item kseg1 runs from 0xa0000000 to 0xc0000000.  It is identical to kseg0,
-except that is is not cached.
-\item kseg2 runs from 0xc0000000 to the top.  It is mapped like user memory,
-differently for each process, and can be cached.  It is intended for
-per-address space kernel structures.  I shall not use it in my kernel.
+\item
 \end{itemize}
-U-boot has some standard commands.  It can load the image from the SD card at
-0x80600000.  Even though the Linux image seems to use a different address, I'll
-go with this one for now.

-\section{Cross-compiler}
-Next thing to do is build a cross-compiler so it is possible to try out some
-things.  This shouldn't need to be very complex, but it is.  I wrote a separate
-document about how to do this.  Please read that if you don't have a working
-cross-compiler, or if you would like to install libraries for cross-building
-more easily.
+\subsection{Capability}
+A capability object can be invoked to send a message to a receiver or the
+kernel.  The owner cannot see from the capability where it points.  This is
+important, because the user must be able to substitute the capability for a
+different one, without the program noticing.

-\section{Making things run}
-For loading a program, it must be a binary executable with a header.  The
-header is inserted by mkimage.  It needs a load address and an entry point.
-Initially at least, the load address is 0x80600000.  The entry point must be
-computed from the executable.  The easiest way to do this is by making sure
-that it is the first byte in the executable.  The file can then be linked as
-binary, so without any headers.  This is done by giving the
-\verb+--oformat binary+ switch to ld.  I think the image is loaded without the
-header, so that can be completely ignored while building.  However, it might
-include it.  In that case, the entry point should be 0x40 higher, because
-that's the size of the header.
+Operations or capability objects:
+\begin{itemize}
+\item
+\end{itemize}

-\section{The first version of the kernel}
-This sounds better than it is.  The first version will be able to boot, and
-somehow show that it did that.  Not too impressive at all, and certainly not
-usable.  It is meant to find out if everything I wrote above actually works.
+\subsection{Thread}
+Thread objects hold the information about the current state of a thread.  This
+state is used to continue running the thread.  The address space is used to map
+the memory for the thread.  Different threads in the same address space have
+the same memory mapping.  All threads in one address space (often just one)
+together are called a process.

-For this kernel I need several things: a program which can boot, and a way to
-tell the user.  As the way to tell the user, I decided to use the caps-lock
-LED.  The display is quite complex to program, I suppose, so I won't even try
-at this stage.  The LED should be easy.  Especially because Linux can use it
-too.  I copied the code from the Linux kernel patch that seemed to be about the
-LED, and that gave me the macros \verb+__gpio_as_output+, \verb+__gpio_set_pin+
-and \verb+__gpio_clear_pin+.  And of course there's \verb+CAPSLOCKLED_IO+,
-which is the pin to set or clear.
-
-I used these macros in a function I called \verb+kernel_entry+.  In an endless
-loop, it switches the LED on 1000000 times, then off 1000000 times.  If the
-time required to set the led is in the order of microseconds, the LED should be
-blinking in the order of seconds.  I tried with 1000 first, but that left the
-LED on seemingly permanently, so it was appearantly way too fast.
-
-This is the code I want to run, but it isn't quite ready for that yet.  A C
-function needs to have a stack when it is called.  It is possible that u-boot
-provides one, but it may also not do that.  To be sure, it's best to use some
-assembly as the real entry point, which sets up the stack and calls the
-function.
-
-The symbol that ld will use as its entry point must be called \verb+__start+
-(on some other architectures with just one underscore).  So I created a simple
-assembly file which defines some stack space and does the setting up.  It also
-sets \$gp to the so-called \textit{global offset table}, and clears the .bss
-section.  This is needed to make compiler-generated code run properly.
-
-Now how to build the image file?  This is a problem.  The ELF format allows
-paged memory, which means that simply loading the file may not put everything
-at its proper address.  ld has an option for this, \verb+--omagic+.  This is
-meant for the a.out format, which isn't supported by mipsel binutils, but that
-doesn't matter.  The result is still that the .text section (with the
-executable code) is first in the file, immediately followed by the .data
-section.  So that means that loading the file into memory at the right address
-results in all parts of the file in the proper place.  Adding
-\verb+-Ttext 0x80600000+ makes everything right.  However, the result is still
-an ELF file.  So I use objcopy with \verb+-Obinary+ to create a binary file
-from it.  At this point, I also extract the start address (the location of
-\verb+__start+) from the ELF file, and use that for building uimage.  That
-way it is no longer needed that \_\_start is at the first byte of the file.
-
-Booting from the SD card is as easy as it seemed, except that I first tried an
-mmc card (which fits in the same slot, and usually works when SD is accepted)
-and that didn't work.  So you really need an SD card.
-
-\section{Context switching}
-One very central thing in the kernel is context switching.  That is, we need to
-know how the registers and the memory are organized when a user program is
-running.  In order to understand that, we must know how paging is done.  I
-already found that it is done by coprocessor 0, so now I need to find out how
-that works.
-
-On the net I found the \textit{MIPS32 architecture for developers}, version 3
-of which is sub-titled \textit{the MIPS32 priviledged resource architecture}.
-It explains everything there is to know about things which are not accessible
-from normal programs.  In other words, it is exactly the right book for
-programming a kernel or device driver using this processor.  How nice.
-
-It explains that memory accesses to the lower 2GB are (almost always) mapped
-through a TLB (translation lookaside buffer).  This is an array of some records
-where virtual to physical address mappings are stored.  In case of a TLB-miss
-(the virtual address cannot be found in the table), an exception is generated
-and the kernel must insert the mapping into the TLB.
-
-This is very flexible, because I get to decide how I write the kernel.  I shall
-use something similar to the hardware implementation of the IBM PC: a page
-directory which contains links to page tables, with each page table filled with
-pointers to page information.  It is useful to have a direct mapping from
-virtual address to kernel data as well.  There are several ways how this can be
-achieved.  The two simplest ones each have their own drawback: making a shadow
-page directory with shadow page tables with links to the kernel structures
-instead of the pages wastes some memory.  Using only the shadow, and doing a
-lookup of the physical address in the kernel structure (where it must be stored
-anyway) wastes some cpu time during the lookup.  At this moment I do not know
-what is more expensive.  I'll initially go for the cpu time wasting approach.
-
-\section{Kernel entry}
-Now that I have an idea of how a process looks in memory, I need to implement
-kernel entry and exit.  A process is preempted or makes a request, then the
-kernel responds, and then a process (possibly the same) is started again.
-
-The main problem of kernel entry is to save all registers in the kernel
-structure which is associated with the thread.  In case of the MIPS processor,
-there is a simple solution: there are two registers, k0 and k1, which cannot be
-used by the thread.  So they can be set before starting the thread, and will
-still have their values when the kernel is entered again.  By pointing one of
-them to the place to save the data, it becomes easy to perform the save and
-restore.
-
-As with the bootstrap process, this must be done in assembly.  In this case
-this is because the user stack must not be used, and a C function will use the
-current stack.  It will also mess up some registers before you can save them.
-
-The next problem is how to get the interrupt code at its address.  I'll try to
-load the thing at address 0x80000000.  It seems to work, which is good.  Linux
-probably has some reason to do things differently, but if this works, it is the
-easiest way.
-
-\section{Memory organization}
-Now I've reached the point where I need to create some memory structures.  To
-do that, I first need to decide how to organize the memory.  There's one very
-simple rule in my system: everyone must pay for what they use.  For memory,
-this means that a process brings its own memory where the kernel can write
-things about it.  The kernel does not need its own allocation system, because
-it always works for some process.  If the process doesn't provide the memory,
-the operation will fail.
-
-Memory will be organized hierarchically.  It belongs to a container, which I
-shall call \textit{memory}.  The entire memory is the property of another
-memory, its parent.  This is true for all but one, which is the top level
-memory.  The top level memory owns all memory in the system.  Some of it
-directly, most of it through other memories.
-
-The kernel will have a list of unclaimed pages.  For optimization, it actually
-has two lists: one with pages containing only zeroes, one with pages containing
-junk.  When idle, the junk pages can be filled with zeroes.
-
-Because the kernel starts at address 0, building up the list of pages is very
-easy: starting from the first page above the top of the kernel, everything is
-free space.  Initially, all pages are added to the junk list.
-
-\section{The idle task}
-When there is nothing to do, an endless loop should be waiting for interrupts.
-This loop is called the idle task.  I use it also to exit bootstrapping, by
-enabling interrupts after everything is set up as if we're running the idle
-task, and then jumping to it.
-
-There are two options for the idle task, again with their own drawbacks.  The
-idle task can run in kernel mode.  This is easy, it doesn't need any paging
-machinery then.  However, this means that the kernel must read-modify-write the
-status register of coprocessor 0, which contains the operating mode, on every
-context switch.  That's quite an expensive operation for such a critical path.
-
-The other option is to run it in user mode.  The drawback there is that it
-needs a page directory and a page table.  However, since the code is completely
-trusted, it may be possible to sneak that in through some unused space between
-two interrupt handlers.  That means there's no fault when accessing some memory
-owned by others, but the idle task is so trivial that it can be assumed to run
-without affecting them.
-
-\section{Intermezzo: some problems}
-Some problems came up while working.  First, I found that the code sometimes
-didn't work and sometimes it did.  It seemed that it had problems when the
-functions I called became more complex.  Looking at the disassembly, it appears
-that I didn't fully understand the calling convention used by the compiler.
-Appearantly, it always needs to have register t9 set to the called function.
-In all compiled code, functions are called as \verb+jalr $t9+.  It took quite
-some time to figure this out, but setting t9 to the called function in my
-assembly code does indeed solve the problem.
-
-The other problem is that the machine was still doing unexpected things.
-Appearantly, u-boot enables interrupts and handles them.  This is not very nice
-when I'm busy setting up interrupt handlers.  So before doing anything else, I
-first switch off all interrupts by writing 0 to the status register of CP0.
-
-This also reminded me that I need to flush the cache, so that I can be sure
-everything is correct.  For that reason, I need to start at 0xa0000000, not
-0x80000000, so that the startup code is not cached.  It should be fine to load
-the kernel at 0x80000000, but jump in at the non-cached location anyway, if I
-make sure the initial code, which clears the cache, can handle it.  After that,
-I jump to the cached region, and everything should be fine.  However, at this
-moment I first link the kernel at the non-cached address, so I don't need to
-worry about it.
-
-Finally, I read in the books that k0 and k1 are in fact normal general purpose
-registers.  So while they are by convention used for kernel purposes, and
-compilers will likely not touch them.  However, the kernel can't actually rely
-on them not being changed by user code.  So I'll need to use a different
-approach for saving the processor state.  The solution is trivial: use k1 as
-before, but first load it from a fixed memory location.  To be able to store k1
-itself, a page must be mapped in kseg3 (wired into the tlb), which can then be
-accessed with a negative index to \$zero.
-
-At this point, I was completely startled by crashes depending on seemingly
-irrelevant changes.  After a lot of investigation, I saw that I had forgotten
-that mips jumps have a delay slot, which is executed after the jump, before the
-first new instruction is executed.  I was executing random instructions, which
-lead to random behaviour.
-
-\section{Back to the idle task}
-With all this out of the way, I continued to implement the idle task.  I hoped
-to be able to never write to the status register.  However, this is not
-possible.  The idle task must be in user mode, and it must call wait.  That
-means it needs the coprocessor 0 usable bit set.  This bit may not be set for
-normal processes, however, or they would be able to change the tlb and all
-protection would be lost.  However, writing to the status register is not a
-problem.  First of all, it is only needed during a task switch, and they aren't
-as frequent as context switches (every entry to the kernel is a context switch,
-only when a different task is entered from the kernel than exited to the kernel
-is it a task switch).  Furthermore, and more importantly, coprocessor 0 is
-intgrated into the cpu, and writing to it is actually a very fast operation and
-not something to be avoided at all.
-
-So to switch to user mode, I set up the status register so that it looks like
-it's handling an exception, set EPC to the address of the idle task, and use
-eret to ``return'' to it.
-
-\section{Timer interrupts}
-This worked well.  Now I expected to get a timer interrupt soon after jumping
-to the idle task.  After all, I have set up the compare register, the timer
-should be running and I enabled the interrupts.  However, nothing happened.  I
-looked at the contents of the count register, and found that it was 0.  This
-means that it is not actually counting at all.  Looking at the Linux sources,
-they don't use this timer either, but instead use the cpu-external (but
-integrated in the chip) timer.  The documentation says that they have a
-different reason for this than a non-functional cpu timer.  Still, it means it
-can be used as an alternative.
-
-Having a timer is important for preemptive multitasking: a process needs to be
-interrupted in order to be preempted, so there needs to be a periodic interrupt
-source.
+Operations on thread objects:
+\begin{itemize}
+\item
+\end{itemize}

 \end{document}
@@ -2,9 +2,12 @@
 #include "kernel.hh"

 void schedule ():
+	Thread *old = current
 	if current:
 		current = current->schedule_next
 	if !current:
 		current = first_scheduled
 	if !current:
 		current = &idle
+	if old != current:
+		arch_schedule (old, current)
@@ -22,7 +22,7 @@
 #define REG_GPIO_GPDR(n)       REG32(GPIO_GPDR((n)))

 static void __gpio_port_as_gpiofn (unsigned p, unsigned o, unsigned fn):
-	unsigned int tmp;
+	unsigned int tmp
 	if o < 16:
 		tmp = REG_GPIO_GPSLR(p)
 		tmp &= ~(3 << ((o) << 1))
@@ -74,3 +74,7 @@ void led (bool one, bool two, bool three):
 		__gpio_clear_pin (NETWORK_IO)
 	else:
 		__gpio_set_pin (NETWORK_IO)
+
+void dbg_sleep (unsigned ms):
+	for unsigned i = 0; i < 1000 * ms; ++i:
+		__gpio_as_output (CAPSLOCKLED_IO)