407 lines
8.7 KiB
ArmAsm
407 lines
8.7 KiB
ArmAsm
#if TFP
|
|
|
|
/* block_div.s
|
|
* This file defines void block_div(struct test_block *)
|
|
*/
|
|
|
|
#define numerator 0x0
|
|
#define denominator 0x20
|
|
#define result 0x40
|
|
#define remainder 0xc0
|
|
#define denom_step 0x140
|
|
#define inexact 0x158
|
|
#define big_rem 0x15c
|
|
#define min_rem 0x168
|
|
#define max_rem 0x188
|
|
#define ds 0x148
|
|
#define points 0x150
|
|
#define halfulp 0x160
|
|
|
|
#define zero $0 /* wired zero */
|
|
#define AT $at /* assembler temp */
|
|
#define v0 $2 /* return value */
|
|
#define v1 $3
|
|
#define a0 $4 /* argument registers */
|
|
#define a1 $5
|
|
#define a2 $6
|
|
#define a3 $7
|
|
#define t0 $8 /* caller saved */
|
|
#define t1 $9
|
|
#define t2 $10
|
|
#define t3 $11
|
|
#define t4 $12
|
|
#define t5 $13
|
|
#define t6 $14
|
|
#define t7 $15
|
|
#define s0 $16 /* callee saved */
|
|
#define s1 $17
|
|
#define s2 $18
|
|
#define s3 $19
|
|
#define s4 $20
|
|
#define s5 $21
|
|
#define s6 $22
|
|
#define s7 $23
|
|
#define t8 $24 /* code generator */
|
|
#define t9 $25
|
|
#define k0 $26 /* kernel temporary */
|
|
#define k1 $27
|
|
#define gp $28 /* global pointer */
|
|
#define sp $29 /* stack pointer */
|
|
#define fp $30 /* frame pointer */
|
|
#define ra $31 /* return address */
|
|
|
|
#define r0 $0
|
|
#define r1 $1
|
|
#define r2 $2
|
|
#define r3 $3
|
|
#define r4 $4
|
|
#define r5 $5
|
|
#define r6 $6
|
|
#define r7 $7
|
|
#define r8 $8
|
|
#define r9 $9
|
|
#define r10 $10
|
|
#define r11 $11
|
|
#define r12 $12
|
|
#define r13 $13
|
|
#define r14 $14
|
|
#define r15 $15
|
|
#define r16 $16
|
|
#define r17 $17
|
|
#define r18 $18
|
|
#define r19 $19
|
|
#define r20 $20
|
|
#define r21 $21
|
|
#define r22 $22
|
|
#define r23 $23
|
|
#define r24 $24
|
|
#define r25 $25
|
|
#define r26 $26
|
|
#define r27 $27
|
|
#define r28 $28
|
|
#define r29 $29
|
|
#define r30 $30
|
|
#define r31 $31
|
|
|
|
#define fp0 $f0
|
|
#define fp1 $f1
|
|
#define fp2 $f2
|
|
#define fp3 $f3
|
|
#define fp4 $f4
|
|
#define fp5 $f5
|
|
#define fp6 $f6
|
|
#define fp7 $f7
|
|
#define fp8 $f8
|
|
#define fp9 $f9
|
|
#define fp10 $f10
|
|
#define fp11 $f11
|
|
#define fp12 $f12
|
|
#define fp13 $f13
|
|
#define fp14 $f14
|
|
#define fp15 $f15
|
|
#define fp16 $f16
|
|
#define fp17 $f17
|
|
#define fp18 $f18
|
|
#define fp19 $f19
|
|
#define fp20 $f20
|
|
#define fp21 $f21
|
|
#define fp22 $f22
|
|
#define fp23 $f23
|
|
#define fp24 $f24
|
|
#define fp25 $f25
|
|
#define fp26 $f26
|
|
#define fp27 $f27
|
|
#define fp28 $f28
|
|
#define fp29 $f29
|
|
#define fp30 $f30
|
|
#define fp31 $f31
|
|
|
|
#define cc0 $fcc0
|
|
#define cc1 $fcc1
|
|
#define cc2 $fcc2
|
|
#define cc3 $fcc3
|
|
#define cc4 $fcc4
|
|
#define cc5 $fcc5
|
|
#define cc6 $fcc6
|
|
#define cc7 $fcc7
|
|
|
|
#define fconfig $0
|
|
#define fsr $31
|
|
|
|
|
|
.text
|
|
.globl loop_block_div
|
|
.ent loop_block_div
|
|
.frame sp, 40, $31
|
|
.mask 0x80000000, -48
|
|
/* .set noreorder */
|
|
.set reorder
|
|
loop_block_div:
|
|
daddiu sp, -80
|
|
sd $31, 0(sp)
|
|
s.d $f24, 8(sp)
|
|
s.d $f25, 16(sp)
|
|
s.d $f26, 24(sp)
|
|
s.d $f27, 32(sp)
|
|
s.d $f28, 40(sp)
|
|
s.d $f29, 48(sp)
|
|
s.d $f30, 56(sp)
|
|
s.d $f31, 64(sp)
|
|
l.d $f0, (numerator+0)(a0)
|
|
l.d $f1, (numerator+8)(a0)
|
|
l.d $f2, (numerator+16)(a0)
|
|
l.d $f3, (numerator+24)(a0)
|
|
l.d $f4, (denominator+0)(a0)
|
|
l.d $f5, (denominator+8)(a0)
|
|
l.d $f6, (denominator+16)(a0)
|
|
l.d $f7, (denominator+24)(a0)
|
|
dmfc1 t1, $f7
|
|
ld t2, denom_step(a0)
|
|
|
|
denominator_loop:
|
|
div.d $f8, $f0, $f4
|
|
div.d $f9, $f0, $f5
|
|
div.d $f10, $f0, $f6
|
|
div.d $f11, $f0, $f7
|
|
s.d $f8, (result+0)(a0)
|
|
s.d $f9, (result+8)(a0)
|
|
div.d $f12, $f1, $f4
|
|
div.d $f13, $f1, $f5
|
|
s.d $f10, (result+16)(a0)
|
|
s.d $f11, (result+24)(a0)
|
|
div.d $f14, $f1, $f6
|
|
div.d $f15, $f1, $f7
|
|
s.d $f12, (result+32)(a0)
|
|
s.d $f13, (result+40)(a0)
|
|
div.d $f16, $f2, $f4
|
|
div.d $f17, $f2, $f5
|
|
s.d $f14, (result+48)(a0)
|
|
s.d $f15, (result+56)(a0)
|
|
div.d $f18, $f2, $f6
|
|
div.d $f19, $f2, $f7
|
|
s.d $f16, (result+64)(a0)
|
|
s.d $f17, (result+72)(a0)
|
|
div.d $f20, $f3, $f4
|
|
div.d $f21, $f3, $f5
|
|
s.d $f18, (result+80)(a0)
|
|
s.d $f19, (result+88)(a0)
|
|
div.d $f22, $f3, $f6
|
|
div.d $f23, $f3, $f7
|
|
li t4, 0x1000000
|
|
l.d $f24, (min_rem+0)(a0)
|
|
l.d $f25, (min_rem+8)(a0)
|
|
l.d $f26, (min_rem+16)(a0)
|
|
l.d $f27, (min_rem+24)(a0)
|
|
s.d $f20, (result+96)(a0)
|
|
s.d $f21, (result+104)(a0)
|
|
ctc1 t4, $31
|
|
msub.d $f8, $f0, $f8, $f4
|
|
msub.d $f9, $f0, $f9, $f5
|
|
s.d $f22, (result+112)(a0)
|
|
s.d $f23, (result+120)(a0)
|
|
msub.d $f10, $f0, $f10, $f6
|
|
msub.d $f11, $f0, $f11, $f7
|
|
msub.d $f12, $f1, $f12, $f4
|
|
msub.d $f13, $f1, $f13, $f5
|
|
msub.d $f14, $f1, $f14, $f6
|
|
msub.d $f15, $f1, $f15, $f7
|
|
msub.d $f16, $f2, $f16, $f4
|
|
msub.d $f17, $f2, $f17, $f5
|
|
s.d $f8, (remainder+0)(a0)
|
|
s.d $f9, (remainder+8)(a0)
|
|
msub.d $f18, $f2, $f18, $f6
|
|
msub.d $f19, $f2, $f19, $f7
|
|
s.d $f10, (remainder+16)(a0)
|
|
s.d $f11, (remainder+24)(a0)
|
|
msub.d $f20, $f3, $f20, $f4
|
|
msub.d $f21, $f3, $f21, $f5
|
|
s.d $f12, (remainder+32)(a0)
|
|
s.d $f13, (remainder+40)(a0)
|
|
msub.d $f22, $f3, $f22, $f6
|
|
msub.d $f23, $f3, $f23, $f7
|
|
/* Find minimum remainder */
|
|
s.d $f14, (remainder+48)(a0)
|
|
s.d $f15, (remainder+56)(a0)
|
|
c.le.d cc0, $f8, $f24
|
|
c.le.d cc1, $f12, $f25
|
|
s.d $f16, (remainder+64)(a0)
|
|
s.d $f17, (remainder+72)(a0)
|
|
c.le.d cc2, $f16, $f26
|
|
c.le.d cc3, $f20, $f27
|
|
s.d $f18, (remainder+80)(a0)
|
|
s.d $f19, (remainder+88)(a0)
|
|
movt.d $f24, $f8, cc0
|
|
movt.d $f25, $f12, cc1
|
|
s.d $f20, (remainder+96)(a0)
|
|
s.d $f21, (remainder+104)(a0)
|
|
movt.d $f26, $f16, cc2
|
|
movt.d $f27, $f20, cc3
|
|
s.d $f22, (remainder+112)(a0)
|
|
s.d $f23, (remainder+120)(a0)
|
|
c.le.d cc4, $f9, $f24
|
|
c.le.d cc5, $f13, $f25
|
|
c.le.d cc6, $f17, $f26
|
|
c.le.d cc7, $f21, $f27
|
|
movt.d $f24, $f9, cc4
|
|
movt.d $f25, $f13, cc5
|
|
movt.d $f26, $f17, cc6
|
|
movt.d $f27, $f21, cc7
|
|
c.le.d cc0, $f10, $f24
|
|
c.le.d cc1, $f14, $f25
|
|
c.le.d cc2, $f18, $f26
|
|
c.le.d cc3, $f22, $f27
|
|
movt.d $f24, $f10, cc0
|
|
movt.d $f25, $f14, cc1
|
|
movt.d $f26, $f18, cc2
|
|
movt.d $f27, $f22, cc3
|
|
c.le.d cc4, $f11, $f24
|
|
c.le.d cc5, $f15, $f25
|
|
c.le.d cc6, $f19, $f26
|
|
c.le.d cc7, $f23, $f27
|
|
movt.d $f24, $f11, cc4
|
|
movt.d $f25, $f15, cc5
|
|
movt.d $f26, $f19, cc6
|
|
movt.d $f27, $f23, cc7
|
|
/* Find if any of the msub's were inexact.
|
|
* This is down here so that the stores can
|
|
* be spread across the computations above.
|
|
* Compares cannot generate an inexact flag.
|
|
*/
|
|
cfc1 t6, $31
|
|
andi t6, 4
|
|
sw t6, inexact(a0)
|
|
/* Save away the minimum remainders found. */
|
|
s.d $f24, (min_rem+0)(a0)
|
|
s.d $f25, (min_rem+8)(a0)
|
|
/* We'll be checking if any (abs.d(test.rem[][]) > halfulp*test.num[])
|
|
* later, and we've done a fair bit of the work already in finding
|
|
* the minimum remainder, so stash it away.
|
|
*/
|
|
abs.d $f28,$f24
|
|
abs.d $f29,$f25
|
|
s.d $f26, (min_rem+16)(a0)
|
|
s.d $f27, (min_rem+24)(a0)
|
|
abs.d $f30,$f26
|
|
abs.d $f31,$f27
|
|
/* find maximum remainder */
|
|
l.d $f24, (max_rem+0)(a0)
|
|
l.d $f25, (max_rem+8)(a0)
|
|
c.le.d cc0, $f8, $f24
|
|
c.le.d cc1, $f12, $f25
|
|
l.d $f26, (max_rem+16)(a0)
|
|
l.d $f27, (max_rem+24)(a0)
|
|
c.le.d cc2, $f16, $f26
|
|
c.le.d cc3, $f20, $f27
|
|
movf.d $f24, $f8, cc0
|
|
movf.d $f25, $f12, cc1
|
|
movf.d $f26, $f16, cc2
|
|
movf.d $f27, $f20, cc3
|
|
c.le.d cc4, $f9, $f24
|
|
c.le.d cc5, $f13, $f25
|
|
c.le.d cc6, $f17, $f26
|
|
c.le.d cc7, $f21, $f27
|
|
movf.d $f24, $f9, cc4
|
|
movf.d $f25, $f13, cc5
|
|
movf.d $f26, $f17, cc6
|
|
movf.d $f27, $f21, cc7
|
|
c.le.d cc0, $f10, $f24
|
|
c.le.d cc1, $f14, $f25
|
|
c.le.d cc2, $f18, $f26
|
|
c.le.d cc3, $f22, $f27
|
|
movf.d $f24, $f10, cc0
|
|
movf.d $f25, $f14, cc1
|
|
movf.d $f26, $f18, cc2
|
|
movf.d $f27, $f22, cc3
|
|
c.le.d cc4, $f11, $f24
|
|
c.le.d cc5, $f15, $f25
|
|
c.le.d cc6, $f19, $f26
|
|
c.le.d cc7, $f23, $f27
|
|
movf.d $f24, $f11, cc4
|
|
movf.d $f25, $f15, cc5
|
|
s.d $f24, (max_rem+0)(a0)
|
|
s.d $f25, (max_rem+8)(a0)
|
|
movf.d $f26, $f19, cc6
|
|
movf.d $f27, $f23, cc7
|
|
s.d $f26, (max_rem+16)(a0)
|
|
s.d $f27, (max_rem+24)(a0)
|
|
|
|
/* Collect maximum absolute remainder values in $f28..$f31 */
|
|
abs.d $f24, $f24
|
|
abs.d $f25, $f25
|
|
abs.d $f26, $f26
|
|
abs.d $f27, $f27
|
|
c.le.d cc0, $f24, $f28
|
|
c.le.d cc1, $f25, $f29
|
|
c.le.d cc2, $f26, $f30
|
|
c.le.d cc3, $f27, $f31
|
|
movf.d $f28, $f24, cc0
|
|
movf.d $f29, $f25, cc1
|
|
l.d $f11, (halfulp)(a0)
|
|
mul.d $f8, $f11, $f0
|
|
mul.d $f9, $f11, $f1
|
|
mul.d $f10, $f11, $f2
|
|
mul.d $f11, $f11, $f3
|
|
movf.d $f30, $f26, cc2
|
|
movf.d $f31, $f27, cc3
|
|
|
|
/* Check if absolute value of any remainders were excessive by
|
|
* checking if the absolute value of the largest were excessive.
|
|
* Use f8..f11 to hold "excessive" boundaries.
|
|
*/
|
|
c.le.d cc4, $f28, $f8
|
|
c.le.d cc5, $f29, $f9
|
|
c.le.d cc6, $f30, $f10
|
|
c.le.d cc7, $f31, $f11
|
|
move t4, zero
|
|
li t5, 1
|
|
movf t4, t5, cc4
|
|
movf t4, t5, cc5
|
|
movf t4, t5, cc6
|
|
movf t4, t5, cc7
|
|
sw t4, big_rem(a0)
|
|
|
|
/* Update Denominators and ds
|
|
*/
|
|
daddu t1,t1,t2
|
|
sd t1,(denominator+0)(a0)
|
|
dmtc1 t1, $f4
|
|
daddu t1,t1,t2
|
|
sd t1,(denominator+8)(a0)
|
|
dmtc1 t1, $f5
|
|
daddu t1,t1,t2
|
|
sd t1,(denominator+16)(a0)
|
|
dmtc1 t1, $f6
|
|
daddu t1,t1,t2
|
|
sd t1,(denominator+24)(a0)
|
|
dmtc1 t1, $f7
|
|
ld t5, ds(a0)
|
|
ld t7, points(a0)
|
|
daddi t5, t5, 4
|
|
sd t5, ds(a0)
|
|
slt t7, t5, t7
|
|
|
|
/* If any remainders were inexact or too large, return, otherwise,
|
|
* loop.
|
|
*/
|
|
bne t4, zero, DropOut
|
|
bne t6, zero, DropOut
|
|
bne t7, zero, denominator_loop
|
|
|
|
DropOut:
|
|
l.d $f24, 8(sp)
|
|
l.d $f25, 16(sp)
|
|
l.d $f26, 24(sp)
|
|
l.d $f27, 32(sp)
|
|
l.d $f28, 40(sp)
|
|
l.d $f29, 48(sp)
|
|
l.d $f30, 56(sp)
|
|
l.d $f31, 64(sp)
|
|
ld $31, 0(sp)
|
|
daddiu sp, 80
|
|
j $31
|
|
nada
|
|
/* .set reorder */
|
|
.end loop_block_div
|
|
|
|
#endif
|