1868 lines
43 KiB
ArmAsm
1868 lines
43 KiB
ArmAsm
/**************************************************************************
|
|
* *
|
|
* Copyright (C) 1989-1993, Silicon Graphics, Inc. *
|
|
* *
|
|
* These coded instructions, statements, and computer programs contain *
|
|
* unpublished proprietary information of Silicon Graphics, Inc., and *
|
|
* are protected by Federal copyright law. They may not be disclosed *
|
|
* to third parties or copied or duplicated in any form, in whole or *
|
|
* in part, without the prior written consent of Silicon Graphics, Inc. *
|
|
* *
|
|
**************************************************************************/
|
|
/* Copyright(C) 1986, MIPS Computer Systems */
|
|
|
|
|
|
#include "ml/ml.h"
|
|
|
|
#define BZERO64
|
|
|
|
/* It turns out better to think of LWS/LWB and SWS/SWB as
|
|
* smaller-vs-bigger address rather than left-vs-right.
|
|
* Such a representation makes the code endian-independent.
|
|
*/
|
|
|
|
#ifdef _MIPSEB
|
|
# define LWS lwl
|
|
# define LWB lwr
|
|
# define LDS ldl
|
|
# define LDB ldr
|
|
# define SWS swl
|
|
# define SWB swr
|
|
# define SDS sdl
|
|
# define SDB sdr
|
|
#else
|
|
# define LWS lwr
|
|
# define LWB lwl
|
|
# define LDS ldr
|
|
# define LDB ldl
|
|
# define SWS swr
|
|
# define SWB swl
|
|
# define SDS sdr
|
|
# define SDB sdl
|
|
#endif /* _MIPSEB */
|
|
|
|
/*
|
|
* Normal version of copyin.
|
|
*
|
|
* int copyin(user_src, kernel_dst, bcount)
|
|
* long user_src, kernel_dst;
|
|
* long bcount;
|
|
*/
|
|
LOCALSZ= 1 # Save ra
|
|
COPYIOFRM= FRAMESZ((NARGSAVE+LOCALSZ)*SZREG)
|
|
RAOFF= COPYIOFRM-(1*SZREG)
|
|
NESTED(copyin, COPYIOFRM, zero)
|
|
PTR_SUBU sp,COPYIOFRM
|
|
REG_S ra,RAOFF(sp)
|
|
|
|
#if CELL
|
|
PTR_L t0,VPDA_CURKTHREAD(zero)
|
|
lbu t1,K_TYPE(t0)
|
|
bne t1,KT_XTHREAD,1f # If not xthread goto normal copy
|
|
PTR_L t2,X_INFO(t0) # xt_info field set indicates message
|
|
beq t2,0,1f # If not message thread goto normal
|
|
jal do_ucopy_copyin # Call copyin callback func
|
|
j 2f
|
|
1:
|
|
#endif
|
|
|
|
/*
|
|
* Test parameters for sanity: protect against copies to kernel
|
|
* addresses. We could compare against K0BASE for the high end,
|
|
* but MAXHIUSRATTACH is more conservative.
|
|
*/
|
|
bltz a2,cerror # if (bcount < 0) goto cerror;
|
|
bltz a0,cerror # if (user_src < 0) goto cerror;
|
|
PTR_ADDU v0,a0,a2 # v0 = user_src + bcount
|
|
LI t0,MAXHIUSRATTACH # no sign extension in 64 bit mode
|
|
bgtu v0,t0,cerror # if user_src + bcount > MAXHIUSRATTACH
|
|
# goto cerror;
|
|
|
|
.set noreorder
|
|
/* store dest (t0) is a struct dependancy and cannot speculate */
|
|
AUTO_CACHE_BARRIERS_DISABLE
|
|
PTR_L t0,VPDA_CURKTHREAD(zero)
|
|
li v0,NF_COPYIO # LDSLOT
|
|
jal bcopy
|
|
sh v0,K_NOFAULT(t0) # BDSLOT
|
|
|
|
PTR_L t0,VPDA_CURKTHREAD(zero)
|
|
move v0,zero # LDSLOT
|
|
sh zero,K_NOFAULT(t0)
|
|
AUTO_CACHE_BARRIERS_ENABLE
|
|
|
|
2:
|
|
.set reorder
|
|
REG_L ra,RAOFF(sp)
|
|
PTR_ADDU sp,COPYIOFRM
|
|
j ra
|
|
END(copyin)
|
|
|
|
/*
|
|
* Normal version of copyout.
|
|
*
|
|
* int copyout(kernel_src, user_dst, bcount);
|
|
* long kernel_src, user_dst, bcount;
|
|
*/
|
|
#if defined(R10000_SPECULATION_WAR) && (! defined(MH_R10000_SPECULATION_WAR))
|
|
#define COPYOUT_BCOPY nowar_bcopy /* safe, as dst is mapped */
|
|
#else
|
|
#define COPYOUT_BCOPY bcopy
|
|
#endif
|
|
NESTED(copyout, COPYIOFRM, zero)
|
|
PTR_SUBU sp,COPYIOFRM
|
|
REG_S ra,RAOFF(sp)
|
|
|
|
#if CELL
|
|
PTR_L t0,VPDA_CURKTHREAD(zero)
|
|
lbu t1,K_TYPE(t0)
|
|
bne t1,KT_XTHREAD,1f # If not xthread goto normal copy
|
|
PTR_L t2,X_INFO(t0) # xt_info field set indicates message
|
|
beq t2,0,1f # If not message thread goto normal
|
|
jal do_ucopy_copyout # Call copyout callback func
|
|
j 2f
|
|
1:
|
|
#endif
|
|
|
|
/*
|
|
* Test parameters for sanity: protect against copies to kernel
|
|
* addresses. We could compare against K0BASE for the high end,
|
|
* but MAXHIUSRATTACH is more conservative.
|
|
*/
|
|
bltz a2,cerror # if (bcount < 0) goto cerror;
|
|
bltz a1,cerror # if (user_dst < 0) goto cerror;
|
|
PTR_ADDU v0,a1,a2 # v0 = user_dst + bcount
|
|
LI t0,MAXHIUSRATTACH # no sign extension in 64 bit mode
|
|
bgtu v0,t0,cerror # if user_dst + bcount > MAXHIUSRATTACH
|
|
# goto cerror;
|
|
|
|
.set noreorder
|
|
/* store dest (t0) is a struct dependancy and cannot speculate */
|
|
AUTO_CACHE_BARRIERS_DISABLE
|
|
PTR_L t0,VPDA_CURKTHREAD(zero)
|
|
li v0,NF_COPYIO # LDSLOT
|
|
jal COPYOUT_BCOPY
|
|
sh v0,K_NOFAULT(t0) # BDSLOT
|
|
|
|
PTR_L t0,VPDA_CURKTHREAD(zero)
|
|
move v0,zero # LDSLOT
|
|
sh zero,K_NOFAULT(t0)
|
|
AUTO_CACHE_BARRIERS_ENABLE
|
|
|
|
2:
|
|
.set reorder
|
|
REG_L ra,RAOFF(sp)
|
|
PTR_ADDU sp,COPYIOFRM
|
|
j ra
|
|
END(copyout)
|
|
|
|
/*
|
|
* Byte swapping version of copyin.
|
|
*
|
|
* int swcopyin(user_src, kernel_dst, bcount)
|
|
* long user_src, kerner_dst;
|
|
* long bcount;
|
|
*/
|
|
NESTED(swcopyin, COPYIOFRM, zero)
|
|
PTR_SUBU sp,COPYIOFRM
|
|
REG_S ra,RAOFF(sp)
|
|
|
|
/*
|
|
* Test parameters for sanity: protect against copies to kernel
|
|
* addresses. We could compare against K0BASE for the high end,
|
|
* but MAXHIUSRATTACH is more conservative.
|
|
*/
|
|
bltz a2,cerror # if (bcount < 0) goto cerror;
|
|
bltz a0,cerror # if (user_src < 0) goto cerror;
|
|
PTR_ADDU v0,a0,a2 # v0 = user_src + bcount
|
|
LI t0,MAXHIUSRATTACH # no sign extension in 64 bit mode
|
|
bgtu v0,t0,cerror # if user_src + bcount > MAXHIUSRATTACH
|
|
# goto cerror;
|
|
|
|
.set noreorder
|
|
/* store dest (t0) is a struct dependancy and cannot speculate */
|
|
AUTO_CACHE_BARRIERS_DISABLE
|
|
PTR_L t0,VPDA_CURKTHREAD(zero)
|
|
li v0,NF_COPYIO # LDSLOT
|
|
jal swbcopy
|
|
sh v0,K_NOFAULT(t0) # BDSLOT
|
|
|
|
PTR_L t0,VPDA_CURKTHREAD(zero)
|
|
move v0,zero # LDSLOT
|
|
sh zero,K_NOFAULT(t0)
|
|
AUTO_CACHE_BARRIERS_ENABLE
|
|
|
|
.set reorder
|
|
REG_L ra,RAOFF(sp)
|
|
PTR_ADDU sp,COPYIOFRM
|
|
j ra
|
|
END(swcopyin)
|
|
|
|
/*
|
|
* Byte swapping version of copyout().
|
|
*
|
|
* int swcopyout(kernel_src, user_dst, bcount);
|
|
* long kernel_src, user_dst, bcount;
|
|
*/
|
|
NESTED(swcopyout, COPYIOFRM, zero)
|
|
PTR_SUBU sp,COPYIOFRM
|
|
REG_S ra,RAOFF(sp)
|
|
|
|
/*
|
|
* Test parameters for sanity: protect against copies to kernel
|
|
* addresses. We could compare against K0BASE for the high end,
|
|
* but MAXHIUSRATTACH is more conservative.
|
|
*/
|
|
bltz a2,cerror # if (bcount < 0) goto cerror;
|
|
bltz a1,cerror # if (user_dst < 0) goto cerror;
|
|
PTR_ADDU v0,a1,a2 # v0 = user_dst + bcount
|
|
LI t0,MAXHIUSRATTACH # no sign extension in 64 bit mode
|
|
bgtu v0,t0,cerror # if user_dst + bcount > MAXHIUSRATTACH
|
|
# goto cerror;
|
|
|
|
.set noreorder
|
|
/* store dest (t0) is a struct dependancy and cannot speculate */
|
|
AUTO_CACHE_BARRIERS_DISABLE
|
|
PTR_L t0,VPDA_CURKTHREAD(zero)
|
|
li v0,NF_COPYIO # LDSLOT
|
|
jal swbcopy
|
|
sh v0,K_NOFAULT(t0) # BDSLOT
|
|
|
|
PTR_L t0,VPDA_CURKTHREAD(zero)
|
|
move v0,zero # LDSLOT
|
|
sh zero,K_NOFAULT(t0)
|
|
AUTO_CACHE_BARRIERS_ENABLE
|
|
|
|
.set reorder
|
|
REG_L ra,RAOFF(sp)
|
|
PTR_ADDU sp,COPYIOFRM
|
|
j ra
|
|
END(swcopyout)
|
|
|
|
/*
|
|
* Zero user memory.
|
|
*
|
|
* int uzero(user_dst, bcount);
|
|
* long user_dst, bcount;
|
|
*/
|
|
NESTED(uzero, COPYIOFRM, zero)
|
|
PTR_SUBU sp,COPYIOFRM
|
|
REG_S ra,RAOFF(sp)
|
|
|
|
#if CELL
|
|
PTR_L t0,VPDA_CURKTHREAD(zero)
|
|
lbu t1,K_TYPE(t0)
|
|
bne t1,KT_XTHREAD,1f # If not xthread goto normal copy
|
|
PTR_L t2,X_INFO(t0) # xt_info field set indicates message
|
|
beq t2,0,1f # If not message thread goto normal
|
|
jal do_ucopy_zero # Call zero callback func
|
|
j 2f
|
|
1:
|
|
#endif
|
|
|
|
/*
|
|
* Test parameters for sanity: protect against zeros of kernel
|
|
* addresses. We could compare against K0BASE for the high end,
|
|
* but MAXHIUSRATTACH is more conservative.
|
|
*/
|
|
bltz a1,cerror # if (bcount < 0) goto cerror;
|
|
bltz a0,cerror # if (user_dst < 0) goto cerror;
|
|
PTR_ADDU v0,a0,a1 # v0 = user_dst + bcount
|
|
LI t0,MAXHIUSRATTACH # no sign extension in 64 bit mode
|
|
bgtu v0,t0,cerror # if user_dst + bcount > MAXHIUSRATTACH
|
|
# goto cerror;
|
|
|
|
.set noreorder
|
|
/* store dest (t0) is a struct dependancy and cannot speculate */
|
|
AUTO_CACHE_BARRIERS_DISABLE
|
|
PTR_L t0,VPDA_CURKTHREAD(zero)
|
|
li v0,NF_COPYIO # LDSLOT
|
|
jal bzero
|
|
sh v0,K_NOFAULT(t0) # BDSLOT
|
|
|
|
PTR_L t0,VPDA_CURKTHREAD(zero)
|
|
move v0,zero # LDSLOT
|
|
sh zero,K_NOFAULT(t0)
|
|
AUTO_CACHE_BARRIERS_ENABLE
|
|
|
|
2:
|
|
.set reorder
|
|
REG_L ra,RAOFF(sp)
|
|
PTR_ADDU sp,COPYIOFRM
|
|
j ra
|
|
END(uzero)
|
|
|
|
NESTED(cerror, COPYIOFRM, zero)
|
|
li v0,-1
|
|
REG_L ra,RAOFF(sp)
|
|
PTR_ADDU sp,COPYIOFRM
|
|
j ra
|
|
END(cerror)
|
|
|
|
#ifdef _MIPSEL
|
|
/* swapl - swap bytes within a buffer of words
|
|
* buffer must be word aligned, or an address error will be generated
|
|
* first arg is address, second is number of words (not bytes).
|
|
*/
|
|
LEAF(swapl)
|
|
.set noreorder
|
|
beq zero,a1,2f # be paranoid
|
|
nop
|
|
1: lw t0,0(a0) # t0 = abcd
|
|
subu a1,1
|
|
sll v0,t0,24 # v0 = d000
|
|
srl v1,t0,24 # v1 = 000a
|
|
or v0,v0,v1 # v0 = d00a
|
|
and v1,t0,0xff00 # v1 = 00c0
|
|
sll v1,v1,8 # v1 = 0c00
|
|
or v0,v0,v1 # v0 = dc0a
|
|
srl v1,t0,8 # v1 = 0abc
|
|
and v1,v1,0xff00 # v1 = 00b0
|
|
or v0,v0,v1 # v0 = dcba
|
|
sw v0,0(a0)
|
|
bne zero,a1,1b
|
|
addiu a0,4
|
|
2:
|
|
.set reorder
|
|
j ra
|
|
END(swapl)
|
|
|
|
/* swaps - swap bytes within a buffer of halfwords
|
|
* buffer must be halfword aligned, or an address error will be generated
|
|
* first arg is address, second is number of halfwords (not bytes).
|
|
*/
|
|
LEAF(swaps)
|
|
.set noreorder
|
|
beq zero,a1,2f # be paranoid
|
|
nop
|
|
1: lhu t0,0(a0) # t0 = 00ab
|
|
subu a1,1
|
|
srl v0,t0,8 # v0 = 000a
|
|
sll v1,t0,8 # v1 = 0ab0
|
|
or v0,v1 # v0 = 0aba
|
|
sh v0,0(a0)
|
|
bne zero,a1,1b
|
|
addiu a0,2
|
|
2:
|
|
.set reorder
|
|
j ra
|
|
END(swaps)
|
|
|
|
#endif /* _MIPSEL */
|
|
|
|
/* bzero(dst, bcount)
|
|
* Zero a block of memory.
|
|
* This code assumes most blocks are aligned and larger than 12 bytes.
|
|
* This code is used so often that it is generally in the cache.
|
|
*
|
|
* We let the assembler reorder instructions, since the choices it makes
|
|
* in this particular code favor the common, aligned big-block case.
|
|
* Use many registers to give the assembler plenty of choices to move
|
|
* things up.
|
|
*/
|
|
#define dst a0
|
|
#define count a1
|
|
|
|
LEAF(bzero)
|
|
XLEAF(blkclr)
|
|
#if IP20 || IP22
|
|
PTR_S a0,kv_initial_to
|
|
PTR_S a1,initial_count
|
|
#endif /* IP20 || IP22 */
|
|
#if IP32
|
|
or v0,a0,a1
|
|
EXPORT(bzero_cdx_entry)
|
|
.set noreorder # if aligned well use
|
|
li v0,1 # the R5000 cdx blkfill
|
|
bnez v0,1f
|
|
nop
|
|
j __cdx_blkfill
|
|
move a2,zero
|
|
.set reorder
|
|
1:
|
|
#endif /* IP32 */
|
|
#if IP26 || IP28 || (IP30 && (HEART_COHERENCY_WAR || HEART_INVALIDATE_WAR))
|
|
#ifdef IP26
|
|
andi v0,a0,TCC_LINESIZE-1 # if aligned well use
|
|
andi v1,a1,(8*TCC_LINESIZE)-1 # prefetching zero
|
|
#else
|
|
andi v0,a0,CACHE_SLINE_SIZE-1 # if aligned well use
|
|
andi v1,a1,(4*CACHE_SLINE_SIZE)-1 # blocked/unrolled zero
|
|
#endif
|
|
or v0,v0,v1
|
|
.set noreorder # assume small copy for R10K
|
|
bnezl v0,bzero_nopage
|
|
nop # BDSLOT
|
|
.set reorder
|
|
j _pagezero
|
|
EXPORT(bzero_nopage)
|
|
#endif
|
|
#ifdef BZERO64
|
|
.set noreorder
|
|
slti v0,count,8
|
|
bne v0,zero,bytezero # long enough to make the code work
|
|
PTR_SUBU v1,zero,dst
|
|
|
|
/* The following code is a little tricky. We will zero between one
|
|
* and eight bytes, depending upon the dst, in order to get aligned
|
|
* on a 64-bit boundary. If the address is already aligned, we
|
|
* end up zeroing the first word once here, not updating dst or count
|
|
* (acutally add/sub zero) then zeroing again in one of the loops below.
|
|
*/
|
|
CACHE_BARRIER # barrier for 0(dst)
|
|
SDS zero,0(dst)
|
|
and v0,v1,7 # number of bytes til aligned
|
|
subu count,v0
|
|
PTR_ADDU dst,v0
|
|
|
|
/* When we get here, we are aligned on 64-bit boundary */
|
|
|
|
blkzero:
|
|
#if TFP
|
|
and a3,count,~(16-1)
|
|
beq a3,zero,8f
|
|
dmtc1 zero,$f4 # BDSLOT, $f4 = 0
|
|
|
|
daddu t1,dst,a3 # t1 = last aligned address
|
|
daddiu t2,dst,16 # t2 = dst + 16
|
|
daddiu t3,dst,32 # t3 = dst + 32
|
|
daddiu t1,16 # t1 = end + 16
|
|
|
|
.align 4
|
|
32:
|
|
/*
|
|
* WARNING: Code to recover from multiple bit errors in memory ASSUMES
|
|
* it knows which registers are used here. Check ml/error.c before
|
|
* modifying this code.
|
|
*/
|
|
#if ECC_RECOVER
|
|
EXPORT(bzero_stores)
|
|
#endif /* ECC_RECOVER */
|
|
sdc1 $f4,-16(t2)
|
|
sdc1 $f4,-8(t2)
|
|
beq t3,t1,8f
|
|
daddiu t2,t3,16 # BDSLOT
|
|
|
|
sdc1 $f4,-16(t3)
|
|
sdc1 $f4,-8(t3)
|
|
bne t2,t1,32b
|
|
daddiu t3,t2,16 # BDSLOT
|
|
|
|
8: dadd dst, a3 # update destination address
|
|
and v1, count, 8
|
|
beq v1, zero, 7f
|
|
and v0, count, 7
|
|
sdc1 $f4, (dst)
|
|
daddi dst, 8
|
|
|
|
7: PTR_ADDU dst,v0
|
|
beq v0,zero,zdone
|
|
nop
|
|
j ra
|
|
SDB zero,-1(dst)
|
|
|
|
#else /* !TFP */
|
|
and a3,count,~(32-1)
|
|
beq a3,zero,16f
|
|
PTR_ADDU a3,a3,dst
|
|
|
|
32: PTR_ADDIU dst,32
|
|
#if ECC_RECOVER
|
|
EXPORT(bzero_stores)
|
|
#endif /* ECC_RECOVER */
|
|
CACHE_BARRIER # barrier for X(dst)
|
|
sd zero,-32(dst)
|
|
#if R10000 /* hit d$ banks right */
|
|
sd zero,-16(dst)
|
|
sd zero,-24(dst)
|
|
#else
|
|
sd zero,-24(dst)
|
|
sd zero,-16(dst)
|
|
#endif
|
|
#if IP25 || IP27 || IP30 /* R10000 machines with prefetch */
|
|
EXPORT(bzero_pref)
|
|
pref 7, 5*128(dst) # prefech 5 cachelines from now
|
|
#endif /* IP25 || IP27 and not IP28/IP30 */
|
|
bne dst,a3,32b
|
|
sd zero,-8(dst) # BDSLOT
|
|
/* We know we have fewer than 32 bytes remaining, so we do limited
|
|
* adjustments of the count. This code has overhead that is always
|
|
* less than the original MIPS code, and is often much less.
|
|
* The overhead from here down is <10+n instructions, where n is the number
|
|
* of bits in the count. The simple byte-loop requires about 2.75*c
|
|
* instructions of overhead, where c is the count. (You have to count
|
|
* 3 of the sb instructions per word as overhead). Ignoring I-cache
|
|
* misses, the break even point is around 5 bytes.
|
|
*/
|
|
16: and v0,count,16
|
|
beq v0,zero,8f
|
|
and v1,count,8
|
|
|
|
CACHE_BARRIER # barrier for 0(dst)
|
|
sd zero,0(dst)
|
|
sd zero,8(dst)
|
|
PTR_ADDIU dst,16
|
|
|
|
8: beq v1,zero,7f
|
|
and v0,count,7 # BDSLOT
|
|
CACHE_BARRIER # barrier for 0(dst)
|
|
sd zero,0(dst)
|
|
PTR_ADDIU dst,8
|
|
|
|
7: PTR_ADDU dst,v0
|
|
beq v0,zero,zdone
|
|
nop
|
|
CACHE_BARRIER # barrier for -1(dst)
|
|
j ra
|
|
SDB zero,-1(dst)
|
|
#endif
|
|
.set reorder
|
|
#else
|
|
PTR_SUBU v1,zero,dst
|
|
|
|
blt count,7,bytezero # long enough to make the code work
|
|
|
|
and v1,NBPW-1 # number of bytes til aligned
|
|
beq v1,zero,blkzero # already aligned
|
|
SWS zero,0(dst)
|
|
subu count,v1
|
|
PTR_ADDU dst,v1
|
|
|
|
/* zero a 32 byte, aligned block.
|
|
*
|
|
* Extra cycles help some machines, presumably because it keeps us from
|
|
* overrunning the write buffer.
|
|
*/
|
|
blkzero:
|
|
and a3,count,~(32-1)
|
|
beq a3,zero,16f
|
|
PTR_ADDU a3,a3,dst
|
|
32: sw zero,0(dst)
|
|
sw zero,4(dst)
|
|
sw zero,8(dst)
|
|
sw zero,12(dst)
|
|
sw zero,16(dst)
|
|
sw zero,20(dst)
|
|
sw zero,24(dst)
|
|
sw zero,28(dst)
|
|
PTR_ADDIU dst,32 #(as moves this way up)
|
|
bne dst,a3,32b
|
|
|
|
/* We know we have fewer than 32 bytes remaining, so we do limited
|
|
* adjustments of the count. This code has overhead that is always
|
|
* less than the original MIPS code, and is often much less.
|
|
* The overhead from here down is <10+n instructions, where n is the number
|
|
* of bits in the count. The simple byte-loop requires about 2.75*c
|
|
* instructions of overhead, where c is the count. (You have to count
|
|
* 3 of the sb instructions per word as overhead). Ignoring I-cache
|
|
* misses, the break even point is around 5 bytes.
|
|
*/
|
|
16: and v0,count,16
|
|
beq v0,zero,8f
|
|
sw zero,0(dst)
|
|
sw zero,4(dst)
|
|
sw zero,8(dst)
|
|
sw zero,12(dst)
|
|
PTR_ADDIU dst,16
|
|
|
|
8: and v1,count,8
|
|
beq v1,zero,4f
|
|
sw zero,0(dst)
|
|
sw zero,4(dst)
|
|
PTR_ADDIU dst,8
|
|
|
|
4: and v0,count,4
|
|
beq v0,zero,3f
|
|
sw zero,0(dst)
|
|
PTR_ADDIU dst,4
|
|
|
|
3: and v1,count,3
|
|
addu dst,v1
|
|
beq v1,zero,zdone
|
|
SWB zero,-1(dst)
|
|
j ra
|
|
#endif /* !BZERO64 */
|
|
|
|
bytezero:
|
|
PTR_ADDU a3,dst,count
|
|
ble count,zero,zdone
|
|
1: sb zero,0(dst)
|
|
PTR_ADDIU dst,1
|
|
bne dst,a3,1b
|
|
zdone: j ra
|
|
|
|
XLEAF(bzero_end)
|
|
END(bzero)
|
|
|
|
LEAF(bzerror)
|
|
j ra
|
|
END(bzerror)
|
|
|
|
#undef dst
|
|
#undef count
|
|
|
|
|
|
/* bcmp(src, dst, count)
|
|
*
|
|
* Most comparisions are short, and most are aligned. The answer is found
|
|
* in the first few bytes or not until the end of the strings.
|
|
*
|
|
* There's a bug in the R12KS CPU that will sometimes cause a LWR or LWL
|
|
* instruction to corrupt the portion of the destination register that it's
|
|
* not supposed to touch. In general LWR and LWL instructions are always
|
|
* paired so this isn't a problem. bcmp() was being fancy with the initial
|
|
* and trailing compares of unaligned comparisons when the two source strings
|
|
* had matching misalignments and only doing single LWL's for the initial and
|
|
* single LWR's for the trailing comparisons. Since we know that the bcmp()
|
|
* is for at least 11 bytes at the leading comparison point and at least 7
|
|
* bytes at the trailing comparison point, it's easy to just put the paired
|
|
* instruction in with very little performance impact.
|
|
*/
|
|
#define src a0
|
|
#define dst a1
|
|
#define count a2
|
|
|
|
LEAF(bcmp)
|
|
xor v0,src,dst
|
|
blt count,11,bytecmp # too short, just byte cmp
|
|
|
|
and v0,NBPW-1
|
|
PTR_SUBU t8,zero,src # number of bytes til aligned
|
|
bne v0,zero,unalgncmp # src and dst not alignable
|
|
|
|
/* since it is possible, word-align src and dst
|
|
*/
|
|
and t8,NBPW-1
|
|
beq t8,zero,wordcmp # already aligned
|
|
subu count,t8
|
|
LWS t0,0(src) # cmp unaligned portion
|
|
LWB t0,3(src) # WAR: the R12KS needs LWL/LWR paired
|
|
LWS t1,0(dst)
|
|
LWB t1,3(dst) # WAR: the R12KS needs LWL/LWR paired
|
|
PTR_ADDU src,t8
|
|
PTR_ADDU dst,t8
|
|
bne t0,t1,cmpne
|
|
|
|
|
|
/* do 4 words at a time
|
|
* One hassle here is avoiding unneeded fetches, which would cause
|
|
* unneed cache misses.
|
|
*/
|
|
wordcmp:
|
|
and v0,count,~(16-1)
|
|
beq v0,zero,8f
|
|
16: lw t0,0(src)
|
|
lw t1,0(dst)
|
|
PTR_ADDIU src,16
|
|
PTR_ADDIU dst,16
|
|
bne t0,t1,cmpne
|
|
|
|
lw t0,4-16(src)
|
|
lw t1,4-16(dst)
|
|
subu v0,16
|
|
bne t0,t1,cmpne
|
|
|
|
lw t0,8-16(src) # nothing to fill the delay slots
|
|
lw t1,8-16(dst)
|
|
bne t0,t1,cmpne
|
|
|
|
lw t0,12-16(src)
|
|
lw t1,12-16(dst)
|
|
bne t0,t1,cmpne
|
|
bne v0,zero,16b
|
|
|
|
|
|
/* Here we know we have < 16 bytes to finish.
|
|
* Use many registers to let the assembler fill the delay slots.
|
|
*/
|
|
8: and t9,count,8
|
|
and t8,count,4
|
|
beq t9,zero,4f
|
|
|
|
lw t0,0(src)
|
|
lw t1,0(dst)
|
|
PTR_ADDIU src,8
|
|
PTR_ADDIU dst,8
|
|
bne t0,t1,cmpne
|
|
|
|
lw t0,4-8(src)
|
|
lw t1,4-8(dst)
|
|
bne t0,t1,cmpne
|
|
|
|
4: and t9,count,3
|
|
beq t8,zero,3f
|
|
|
|
lw t0,0(src)
|
|
lw t1,0(dst)
|
|
PTR_ADDIU src,4
|
|
PTR_ADDIU dst,4
|
|
bne t0,t1,cmpne
|
|
|
|
/* We have 0 to 3 bytes remaining to compare, starting at a word boundary.
|
|
* We know the original length was >7, so we could go ahead and compare
|
|
* partial words even if there are only 0 bytes remaining, saving the
|
|
* loop overhead. Since most comparisions are of even numbers of words,
|
|
* we do not cheat that way.
|
|
*/
|
|
3: PTR_ADDU src,t9
|
|
beq t9,zero,cmpeq
|
|
PTR_ADDU dst,t9
|
|
LWS t0,-4(src) # WAR: the R12KS needs LWL/LWR paired
|
|
LWB t0,-1(src)
|
|
LWS t1,-4(dst) # WAR: the R12KS needs LWL/LWR paired
|
|
LWB t1,-1(dst)
|
|
sne v0,t0,t1
|
|
j ra
|
|
|
|
cmpne: li v0,1
|
|
j ra
|
|
|
|
/*
|
|
* deal with simultaneously unalignable cmp by aligning one src
|
|
* Assume this is rare, and do not unroll it.
|
|
*/
|
|
unalgncmp:
|
|
PTR_SUBU a3,zero,dst # calc byte cnt to get dst aligned
|
|
and a3,NBPW-1
|
|
subu count,a3
|
|
beq a3,zero,partaligncmp # already aligned
|
|
PTR_ADDU a3,src # src endpoint
|
|
1: lbu v0,0(src)
|
|
lbu v1,0(dst)
|
|
PTR_ADDIU src,1
|
|
PTR_ADDIU dst,1
|
|
bne v0,v1,cmpne
|
|
bne src,a3,1b
|
|
|
|
/*
|
|
* src unaligned, dst aligned loop
|
|
* Assume this is rare, and do not unroll it.
|
|
*/
|
|
partaligncmp:
|
|
and a3,count,~(NBPW-1)
|
|
subu count,a3
|
|
beq a3,zero,bytecmp
|
|
PTR_ADDU a3,src
|
|
4:
|
|
LWS v0,0(src)
|
|
LWB v0,3(src)
|
|
lw v1,0(dst)
|
|
PTR_ADDIU src,NBPW
|
|
PTR_ADDIU dst,NBPW
|
|
bne v0,v1,cmpne
|
|
bne src,a3,4b
|
|
|
|
/*
|
|
* brute force byte cmp loop
|
|
*/
|
|
bytecmp:
|
|
PTR_ADDU a3,count,src # src endpoint; BDSLOT
|
|
ble count,zero,cmpeq
|
|
1: lbu v0,0(src)
|
|
lbu v1,0(dst)
|
|
PTR_ADDIU src,1
|
|
PTR_ADDIU dst,1
|
|
bne v0,v1,cmpne
|
|
bne src,a3,1b
|
|
|
|
cmpeq: move v0,zero
|
|
j ra
|
|
|
|
|
|
END(bcmp)
|
|
|
|
|
|
/*
|
|
* addupc(pc, &(struct prof), ticks, use_32bit)
|
|
* return value
|
|
* -1 addupc() failed because pc was outside the offset range in u_prof
|
|
* 0 either addupc() succeeded or it failed due to invalid buffer address
|
|
*/
|
|
LEAF(addupc)
|
|
PTR_L v1,PR_OFF(a1) # base of profile region
|
|
PTR_SUBU a0,v1 # corrected pc
|
|
bltz a0,1f # below of profile region
|
|
lw v0,PR_SCALE(a1) # fixed point scale factor
|
|
bne v0,2,2f # if scale == 2, only use 1st bucket
|
|
li v0,0
|
|
b 3f
|
|
2: multu v0,a0
|
|
mflo v0 # shift 64 bit result right 16
|
|
srl v0,16
|
|
mfhi v1
|
|
sll v1,16
|
|
or v0,v1
|
|
3:
|
|
lw v1,PR_SIZE(a1)
|
|
PTR_L a0,PR_BASE(a1) # base of profile buckets
|
|
bne a3,zero,4f # 32-bit buckets?
|
|
|
|
/* 16-bit buckets */
|
|
and v0,~1
|
|
bgeu v0,v1,1f # above profile region
|
|
PTR_ADDU v0,a0
|
|
bltz v0,adderr # outside kuseg
|
|
|
|
.set noreorder
|
|
PTR_L t0,VPDA_CURKTHREAD(zero)
|
|
li v1,NF_ADDUPC # LDSLOT
|
|
CACHE_BARRIER_AT(0,v0) # barrier for v0 (t0 is dependent)
|
|
sh v1,K_NOFAULT(t0)
|
|
|
|
andi t1,v0,2 # short- or int-aligned?
|
|
ori v0,v0,2
|
|
bne t1,zero,7f
|
|
xori v0,2 # BDSLOT -- align to int
|
|
sll a2,16 # adjust increment value
|
|
7:
|
|
ll v1,0(v0) # add ticks to bucket
|
|
addu v1,a2
|
|
# Try to set the new one
|
|
AUTO_CACHE_BARRIERS_DISABLE
|
|
sc v1,0(v0)
|
|
AUTO_CACHE_BARRIERS_ENABLE
|
|
#ifdef R10K_LLSC_WAR
|
|
beql v1,zero,7b
|
|
#else
|
|
beq v1,zero,7b
|
|
#endif
|
|
nop
|
|
b 5f
|
|
.set reorder
|
|
|
|
4: /* 32-bit buckets */
|
|
and v0,~3 # mask off lower bits
|
|
bgeu v0,v1,1f # above profile region
|
|
PTR_ADDU v0,a0
|
|
bltz v0,adderr # outside kuseg
|
|
|
|
.set noreorder
|
|
PTR_L t0,VPDA_CURKTHREAD(zero)
|
|
li v1,NF_ADDUPC # LDSLOT
|
|
CACHE_BARRIER_AT(0,v0) # barrier for v0 (t0 is dependent)
|
|
sh v1,K_NOFAULT(t0)
|
|
6:
|
|
ll v1,0(v0) # add ticks to bucket
|
|
addu v1,a2
|
|
# Try to set the new one
|
|
AUTO_CACHE_BARRIERS_DISABLE
|
|
sc v1,0(v0)
|
|
AUTO_CACHE_BARRIERS_ENABLE
|
|
#ifdef R10K_LLSC_WAR
|
|
beql v1,zero,6b
|
|
#else
|
|
beq v1,zero,6b
|
|
#endif
|
|
nop
|
|
.set reorder
|
|
5:
|
|
sh zero,K_NOFAULT(t0)
|
|
li v0,0
|
|
j ra
|
|
|
|
1: li v0,-1
|
|
j ra
|
|
END(addupc)
|
|
|
|
LEAF(adderr)
|
|
li v0,0
|
|
sw zero,PR_SCALE(a1)
|
|
j ra
|
|
END(adderr)
|
|
|
|
LEAF(fubyte)
|
|
XLEAF(fuibyte)
|
|
#if (_MIPS_SZLONG == 32)
|
|
XLEAF(fulong)
|
|
#endif
|
|
.set noreorder
|
|
bltz a0,uerror
|
|
PTR_L t0,VPDA_CURKTHREAD(zero) # BDSLOT
|
|
li v0,NF_FSUMEM # LDSLOT
|
|
AUTO_CACHE_BARRIERS_DISABLE # t0 is dependent
|
|
sh v0,K_NOFAULT(t0)
|
|
lbu v0,0(a0)
|
|
j ra # LDSLOT
|
|
sh zero,K_NOFAULT(t0) # BDSLOT
|
|
AUTO_CACHE_BARRIERS_ENABLE
|
|
.set reorder
|
|
END(fubyte)
|
|
|
|
/*
|
|
* upath(from, to, maxbufsize)
|
|
* Read in a pathname from user space.
|
|
* RETURNS:
|
|
* -1 - if supplied address was not valid
|
|
* -2 - if pathname length is > maxbufsize - 1
|
|
* length otherwise (including '\0')
|
|
* Assume maxbufsize > 0
|
|
*/
|
|
LEAF(upath)
|
|
.set noreorder
|
|
bltz a0,uerror
|
|
PTR_L t0,VPDA_CURKTHREAD(zero) # BDSLOT
|
|
li v0,NF_FSUMEM # LDSLOT
|
|
AUTO_CACHE_BARRIERS_DISABLE # t0 is dependent
|
|
sh v0,K_NOFAULT(t0)
|
|
AUTO_CACHE_BARRIERS_ENABLE
|
|
move v1,a2
|
|
1:
|
|
CACHE_BARRIER # barrier for incrementing a1
|
|
lbu v0,0(a0)
|
|
LONG_SUBU a2,1 # LDSLOT
|
|
beq v0,zero,2f # return length
|
|
sb v0,0(a1) # BDSLOT
|
|
PTR_ADDU a0,1
|
|
bne a2,zero,1b
|
|
PTR_ADDU a1,1 # BDSLOT
|
|
b 3f
|
|
li v0,-2 # BDSLOT
|
|
2:
|
|
LONG_SUBU v0,v1,a2
|
|
3:
|
|
AUTO_CACHE_BARRIERS_DISABLE # t0 is dependent
|
|
j ra
|
|
sh zero,K_NOFAULT(t0) # BDSLOT
|
|
AUTO_CACHE_BARRIERS_ENABLE
|
|
.set reorder
|
|
END(upath)
|
|
|
|
/*
|
|
* we don't worry about flushing the write buffer, because we assume that
|
|
* the s* routines are ONLY called for talking to user address space which
|
|
* we assume is either not mapped or mapped to real live memory
|
|
*/
|
|
LEAF(subyte)
|
|
XLEAF(suibyte)
|
|
.set noreorder
|
|
bltz a0,uerror
|
|
PTR_L t0,VPDA_CURKTHREAD(zero) # BDSLOT
|
|
li v0,NF_FSUMEM # LDSLOT
|
|
CACHE_BARRIER # barrier for a0 (t0 is dependent)
|
|
sh v0,K_NOFAULT(t0)
|
|
sb a1,0(a0)
|
|
sh zero,K_NOFAULT(t0)
|
|
.set reorder
|
|
move v0,zero
|
|
j ra
|
|
END(subyte)
|
|
|
|
LEAF(fuword)
|
|
XLEAF(fuiword)
|
|
.set noreorder
|
|
bltz a0,uerror
|
|
PTR_L t0,VPDA_CURKTHREAD(zero) # BDSLOT
|
|
li v0,NF_FSUMEM # LDSLOT
|
|
AUTO_CACHE_BARRIERS_DISABLE # t0 is dependent
|
|
sh v0,K_NOFAULT(t0)
|
|
lw v0,0(a0)
|
|
sh zero,K_NOFAULT(t0) # LDSLOT
|
|
AUTO_CACHE_BARRIERS_ENABLE
|
|
.set reorder
|
|
j ra
|
|
END(fuword)
|
|
|
|
/*
|
|
* Get unsigned 32 bit vector (a0 = errptr, a1 = srcv, a2 = dstv, a3 = cnt)
|
|
*/
|
|
LEAF(sfu32v)
|
|
.set noreorder
|
|
bltz a1,suerror
|
|
PTR_L t0,VPDA_CURKTHREAD(zero) # BDSLOT
|
|
li v0,NF_SUERROR # LDSLOT
|
|
AUTO_CACHE_BARRIERS_DISABLE # t0 is dependent
|
|
sh v0,K_NOFAULT(t0)
|
|
AUTO_CACHE_BARRIERS_ENABLE
|
|
1:
|
|
CACHE_BARRIER # protect incrementing a2 in loop
|
|
lw v0,0(a1)
|
|
subu a3,4
|
|
sw v0,0(a2)
|
|
addu a1,4
|
|
bne zero,a3,1b
|
|
addu a2,4
|
|
|
|
AUTO_CACHE_BARRIERS_DISABLE # t0 is dependent
|
|
sh zero,K_NOFAULT(t0) # LDSLOT
|
|
AUTO_CACHE_BARRIERS_ENABLE
|
|
.set reorder
|
|
li v0,1
|
|
j ra
|
|
END(sfu32v)
|
|
|
|
/*
|
|
* Get signed 32 bit number and return in v0 (on fault return zero)
|
|
*/
|
|
LEAF(sfu32)
|
|
.set noreorder
|
|
bltz a1,suerror
|
|
PTR_L t0,VPDA_CURKTHREAD(zero) # BDSLOT
|
|
li v0,NF_SUERROR # LDSLOT
|
|
AUTO_CACHE_BARRIERS_DISABLE # t0 is dependent on PTR_L
|
|
sh v0,K_NOFAULT(t0)
|
|
lw v0,0(a1)
|
|
sh zero,K_NOFAULT(t0) # LDSLOT
|
|
AUTO_CACHE_BARRIERS_ENABLE
|
|
.set reorder
|
|
j ra
|
|
END(sfu32)
|
|
|
|
/*
|
|
* Put unsigned 32 bit number and return TRUE on success
|
|
*/
|
|
LEAF(spu32)
|
|
.set noreorder
|
|
bltz a1,suerror
|
|
PTR_L t0,VPDA_CURKTHREAD(zero) # BDSLOT
|
|
li v0,NF_SUERROR # LDSLOT
|
|
CACHE_BARRIER # barrier for a1 (t0 is dependent)
|
|
sh v0,K_NOFAULT(t0)
|
|
sw a2,0(a1)
|
|
sh zero,K_NOFAULT(t0)
|
|
.set reorder
|
|
li v0,1
|
|
j ra
|
|
END(spu32)
|
|
|
|
/*
|
|
* On error tramp, set @(a0) to EFAULT and return zero in v0.
|
|
*/
|
|
LEAF(suerror)
|
|
li v1,EFAULT # put EFAULT in arg
|
|
sw v1,0(a0)
|
|
li v0,0
|
|
j ra
|
|
END(suerror)
|
|
|
|
#if (_MIPS_SZLONG == 64)
|
|
LEAF(fulong)
|
|
.set noreorder
|
|
bltz a0,uerror
|
|
PTR_L t0,VPDA_CURKTHREAD(zero) # BDSLOT
|
|
li v0,NF_FSUMEM # LDSLOT
|
|
AUTO_CACHE_BARRIERS_DISABLE # t0 is dependent on PTR_L
|
|
sh v0,K_NOFAULT(t0)
|
|
ld v0,0(a0)
|
|
sh zero,K_NOFAULT(t0) # LDSLOT
|
|
AUTO_CACHE_BARRIERS_ENABLE
|
|
.set reorder
|
|
j ra
|
|
END(fulong)
|
|
#endif
|
|
|
|
LEAF(suword)
|
|
XLEAF(suiword)
|
|
.set noreorder
|
|
bltz a0,uerror
|
|
PTR_L t0,VPDA_CURKTHREAD(zero) # BDSLOT
|
|
li v0,NF_FSUMEM # LDSLOT
|
|
CACHE_BARRIER # barrier for a0 (t0 is dependent)
|
|
sh v0,K_NOFAULT(t0)
|
|
sw a1,0(a0)
|
|
sh zero,K_NOFAULT(t0)
|
|
.set reorder
|
|
move v0,zero
|
|
j ra
|
|
END(suword)
|
|
|
|
|
|
LEAF(suhalf)
|
|
.set noreorder
|
|
bltz a0,uerror
|
|
PTR_L t0,VPDA_CURKTHREAD(zero) # BDSLOT
|
|
li v0,NF_FSUMEM # LDSLOT
|
|
CACHE_BARRIER # barrier for a0 (t0 is dependent)
|
|
sh v0,K_NOFAULT(t0)
|
|
sh a1,0(a0)
|
|
sh zero,K_NOFAULT(t0)
|
|
.set reorder
|
|
move v0,zero
|
|
j ra
|
|
END(suhalf)
|
|
|
|
#if (_MIPS_SZLONG == 64)
|
|
LEAF(sulong)
|
|
.set noreorder
|
|
bltz a0,uerror
|
|
PTR_L t0,VPDA_CURKTHREAD(zero) # BDSLOT
|
|
li v0,NF_FSUMEM # LDSLOT
|
|
CACHE_BARRIER # barrier for a0 (t0 is dependent)
|
|
sh v0,K_NOFAULT(t0)
|
|
sd a1,0(a0)
|
|
sh zero,K_NOFAULT(t0)
|
|
.set reorder
|
|
move v0,zero
|
|
j ra
|
|
END(sulong)
|
|
#endif
|
|
|
|
/*
|
|
* Fetch an instruction word at a KSEG2 address-
|
|
* used for loadable drivers which may have hit the
|
|
* R4K badvaddr chip bug.
|
|
*/
|
|
LEAF(fkiword)
|
|
.set noreorder
|
|
li v0,NF_BADVADDR
|
|
sw v0,VPDA_NOFAULT(zero)
|
|
lw v0,0(a0)
|
|
sw zero,VPDA_NOFAULT(zero) # LDSLOT
|
|
.set reorder
|
|
j ra
|
|
END(fkiword)
|
|
|
|
LEAF(uerror)
|
|
li v0,-1 # error return
|
|
j ra
|
|
END(uerror)
|
|
|
|
|
|
|
|
LEAF(strlen)
|
|
move v0,a0 # save beginning pointer
|
|
1: lb v1,0(a0) # look at byte
|
|
PTR_ADDIU a0,1 # advance current pointer
|
|
bne v1,zero,1b # check for null byte
|
|
PTR_SUBU v0,a0,v0 # byte count including null byte
|
|
LONG_SUBU v0,1 # exclude null byte
|
|
j ra
|
|
END(strlen)
|
|
|
|
/*
|
|
* The following routines uload_word(), uload_half(), uload_uhalf(),
|
|
* ustore_word() and ustore_half() load and store unaligned items.
|
|
* The "addr" parameter is the address at which the reference is to be
|
|
* made. For load routines the value is returned indirectly through
|
|
* the "pword" parameter. For store routines the "value" parameter
|
|
* is stored. All routines indicate an error by returning a non-zero
|
|
* value. If no error occurs a zero is returned.
|
|
* ASSUME that test for KUSEG has already occurred
|
|
*/
|
|
|
|
/*
|
|
* int uload_word(caddr_t addr, k_machreg_t *pword)
|
|
*/
|
|
LEAF(uload_word)
|
|
.set noreorder
|
|
PTR_L t0,VPDA_CURKTHREAD(zero)
|
|
li v0,NF_FIXADE # LDSLOT
|
|
CACHE_BARRIER # barrier for a1 (t0 is dependent)
|
|
sh v0,K_NOFAULT(t0)
|
|
ulw v1,0(a0)
|
|
sh zero,K_NOFAULT(t0)
|
|
sreg v1,0(a1)
|
|
.set reorder # after sreg to avoid extra BARRIER
|
|
move v0,zero
|
|
j ra
|
|
END(uload_word)
|
|
|
|
/*
|
|
* int uload_half(caddr_t addr, k_machreg_t *pword)
|
|
*/
|
|
LEAF(uload_half)
|
|
.set noreorder
|
|
PTR_L t0,VPDA_CURKTHREAD(zero)
|
|
li v0,NF_FIXADE # LDSLOT
|
|
CACHE_BARRIER # barrier for a1 (t0 is dependent)
|
|
sh v0,K_NOFAULT(t0)
|
|
# 3.3 as gets this wrong (no nop); so expand it# ulh v1,0(a0)
|
|
.set noat
|
|
lb v1,0(a0)
|
|
lbu AT,1(a0)
|
|
sll v1,v1,8
|
|
or v1,v1,AT
|
|
.set at
|
|
# end expansion of ulh
|
|
sh zero,K_NOFAULT(t0)
|
|
sreg v1,0(a1)
|
|
.set reorder # after sreg to avoid extra BARRIER
|
|
move v0,zero
|
|
j ra
|
|
END(uload_half)
|
|
|
|
/*
|
|
* int uload_uhalf(caddr_t addr, k_machreg_t *pword)
|
|
*/
|
|
LEAF(uload_uhalf)
|
|
.set noreorder
|
|
PTR_L t0,VPDA_CURKTHREAD(zero)
|
|
li v0,NF_FIXADE # LDSLOT
|
|
CACHE_BARRIER # barrier for a1 (t0 is dependent)
|
|
sh v0,K_NOFAULT(t0)
|
|
# 3.3 as gets this wrong (no nop); so expand it# ulhu v1,0(a0)
|
|
.set noat
|
|
lbu v1,0(a0)
|
|
lbu AT,1(a0)
|
|
sll v1,v1,8
|
|
or v1,v1,AT
|
|
.set at
|
|
# end expansion of ulhu
|
|
sh zero,K_NOFAULT(t0)
|
|
sreg v1,0(a1)
|
|
.set reorder # after sreg to avoid extra BARRIER
|
|
move v0,zero
|
|
j ra
|
|
END(uload_uhalf)
|
|
|
|
/*
|
|
* int uload_uword(caddr_t addr, k_machreg_t *pword)
|
|
*/
|
|
LEAF(uload_uword)
|
|
.set noreorder
|
|
PTR_L t0,VPDA_CURKTHREAD(zero)
|
|
li v0,NF_FIXADE # LDSLOT
|
|
CACHE_BARRIER # barrier for a1 (t0 is dependent)
|
|
sh v0,K_NOFAULT(t0)
|
|
ulwu v1,0(a0)
|
|
sh zero,K_NOFAULT(t0)
|
|
CACHE_BARRIER # guard a1
|
|
sreg v1,0(a1)
|
|
.set reorder # after sreg to avoid extra BARRIER
|
|
move v0,zero
|
|
j ra
|
|
END(uload_uword)
|
|
|
|
/*
|
|
* int uload_double(caddr_t addr, k_machreg_t *pword)
|
|
*/
|
|
LEAF(uload_double)
|
|
.set noreorder
|
|
PTR_L t0,VPDA_CURKTHREAD(zero)
|
|
li v0,NF_FIXADE # LDSLOT
|
|
CACHE_BARRIER # barrier for a1 (t0 is dependent)
|
|
sh v0,K_NOFAULT(t0)
|
|
uld v1,0(a0)
|
|
sh zero,K_NOFAULT(t0)
|
|
sd v1,0(a1)
|
|
.set reorder # after sd to avoid extra BARRIER
|
|
move v0,zero
|
|
j ra
|
|
END(uload_double)
|
|
|
|
/*
|
|
* ustore_word(caddr_t addr, k_machreg_t value)
|
|
*/
|
|
LEAF(ustore_double)
|
|
.set noreorder
|
|
PTR_L t0,VPDA_CURKTHREAD(zero)
|
|
li v0,NF_FIXADE # LDSLOT
|
|
CACHE_BARRIER # barrier for a0 (t0 is dependent)
|
|
sh v0,K_NOFAULT(t0)
|
|
#if (_MIPS_SIM == _MIPS_SIM_ABI32)
|
|
/* 'value' is a long long, and _MIPS_SIM_ABI32 passes
|
|
* long longs in 2 registers. The register pair starts on
|
|
* an even register, hence the low order word is in reg a3.
|
|
*/
|
|
dsll32 a2,0
|
|
or a1,a2,a3
|
|
#endif
|
|
usd a1,0(a0)
|
|
sh zero,K_NOFAULT(t0)
|
|
.set reorder
|
|
move v0,zero
|
|
j ra
|
|
END(ustore_double)
|
|
|
|
/*
|
|
* ustore_word(caddr_t addr, k_machreg_t value)
|
|
*/
|
|
LEAF(ustore_word)
|
|
.set noreorder
|
|
PTR_L t0,VPDA_CURKTHREAD(zero)
|
|
li v0,NF_FIXADE # LDSLOT
|
|
CACHE_BARRIER # barrier for a0 (t0 is dependent)
|
|
sh v0,K_NOFAULT(t0)
|
|
#if (_MIPS_SIM == _MIPS_SIM_ABI32)
|
|
/* 'value' is a long long, and _MIPS_SIM_ABI32 passes
|
|
* long longs in 2 registers. The register pair starts on
|
|
* an even register, hence the low order word is in reg a3.
|
|
*/
|
|
usw a3,0(a0)
|
|
#else
|
|
usw a1,0(a0)
|
|
#endif
|
|
sh zero,K_NOFAULT(t0)
|
|
.set reorder
|
|
move v0,zero
|
|
j ra
|
|
END(ustore_word)
|
|
|
|
/*
|
|
* ustore_half(caddr_t addr, k_machreg_t value)
|
|
*/
|
|
LEAF(ustore_half)
|
|
.set noreorder
|
|
PTR_L t0,VPDA_CURKTHREAD(zero)
|
|
li v0,NF_FIXADE # LDSLOT
|
|
CACHE_BARRIER # barrier for a0 (t0 is dependent)
|
|
sh v0,K_NOFAULT(t0)
|
|
#if (_MIPS_SIM == _MIPS_SIM_ABI32)
|
|
/* 'value' is a long long, and _MIPS_SIM_ABI32 passes
|
|
* long longs in 2 registers. The register pair starts on
|
|
* an even register, hence the low order word is in reg a3.
|
|
*/
|
|
ush a3,0(a0)
|
|
#else
|
|
ush a1,0(a0)
|
|
#endif
|
|
sh zero,K_NOFAULT(t0)
|
|
.set reorder
|
|
move v0,zero
|
|
j ra
|
|
END(ustore_half)
|
|
|
|
LEAF(fixade_error)
|
|
move v0,gp
|
|
j ra
|
|
END(fixade_error)
|
|
|
|
|
|
|
|
/* void bcopy(from, to, count);
|
|
* unsigned char *from, *to;
|
|
* unsigned long count;
|
|
*/
|
|
|
|
#define MINCOPY 12
|
|
|
|
/* registers used */
|
|
#define from a0
|
|
#define to a1
|
|
#define count a2
|
|
|
|
#if !TFP
|
|
|
|
/* Use backwards copying code if the from and to regions overlap.
|
|
* Do not worry about zero-length or other silly copies. They are not
|
|
* worth the time to optimize.
|
|
*/
|
|
LEAF(bcopy)
|
|
XLEAF(ovbcopy)
|
|
#if IP20 || IP22
|
|
PTR_S a0,kv_initial_from
|
|
PTR_S a1,kv_initial_to
|
|
PTR_S a2,initial_count
|
|
#endif /* IP20 || IP22 */
|
|
#ifdef IP32
|
|
or v0,a0,a1 # if aligned well use the
|
|
or v0,a2 # faster R5000 cdx block copy
|
|
EXPORT(bcopy_cdx_entry)
|
|
.set noreorder
|
|
li v0,1
|
|
bnez v0,1f
|
|
nop
|
|
j __cdx_blkcopy
|
|
nop
|
|
.set reorder
|
|
1:
|
|
#endif /* IP32 */
|
|
#if IP28 || (IP30 && (HEART_COHERENCY_WAR || HEART_INVALIDATE_WAR))
|
|
andi v1,a1,CACHE_SLINE_SIZE-1 # if aligned well use the
|
|
andi v0,a0,CACHE_SLINE_SIZE-1 # faster blocked/unrolled
|
|
andi t0,a2,(4*CACHE_SLINE_SIZE)-1 # and cache tuned copy
|
|
or v0,v0,v1
|
|
or v0,v0,t0
|
|
.set noreorder # assume small copy for R10K
|
|
bnezl v0,bcopy_nopage
|
|
nop # BDSLOT
|
|
.set reorder
|
|
j _pagecopy
|
|
EXPORT(bcopy_nopage)
|
|
#endif
|
|
|
|
PTR_ADDU v0,from,count # v0 := from + count
|
|
ble to,from,goforwards # If to <= from then copy forwards
|
|
blt to,v0,gobackwards # backwards if from<to<from+count
|
|
|
|
/* Forward copy code. Check for pointer alignment and try to get both
|
|
* pointers aligned on a long boundary.
|
|
*/
|
|
goforwards:
|
|
blt count,MINCOPY,fbcopy
|
|
/* If possible, align source & destination on 64-bit boundary. */
|
|
and v0,from,7
|
|
and v1,to,7
|
|
li a3,8
|
|
bne v0,v1,align32 # low bits are different
|
|
|
|
/* Pointers 64-bit alignable (may be aligned). Since v0 == v1, we need only
|
|
* check what value v0 has to see how to get aligned. Also, since we have
|
|
* eliminated tiny copies, we know that the count is large enough to
|
|
* encompass the alignment copies.
|
|
*/
|
|
beq v0,zero,1f # If v0==0 then aligned
|
|
subu a3,a3,v1 # a3 = # bytes to get aligned
|
|
LDS v0,0(from)
|
|
SDS v0,0(to) # copy partial word
|
|
PTR_ADDU from,a3
|
|
PTR_ADDU to,a3
|
|
subu count,a3
|
|
1:
|
|
/* When we get here, source and destination are 64-bit aligned. Check if
|
|
* we have at least 64 bytes to move.
|
|
*/
|
|
and a3,count,~(64-1)
|
|
beq a3,zero,forwards # go do 32-bit copy
|
|
PTR_ADDU a3,a3,to
|
|
64:
|
|
/* Splitting d$ banks is faster on the R10000 */
|
|
#if R10000
|
|
ld t0,0(from); ld t2,16(from)
|
|
#if IP25 || IP27 || IP30
|
|
.set noreorder
|
|
EXPORT(bcopy_pref1)
|
|
pref 4,384(from) # R10000 machines with prefetch */
|
|
.set reorder
|
|
#endif /* IP25 || IP27 || IP30 (no IP28) */
|
|
ld t1,8(from); ld t3,24(from)
|
|
ld ta0,32(from);ld ta2,48(from)
|
|
ld ta1,40(from);ld ta3,56(from)
|
|
#if ECC_RECOVER
|
|
.set noreorder
|
|
EXPORT(bcopy_stores)
|
|
#endif /* ECC_RECOVER */
|
|
sd t0,0(to); sd t2,16(to)
|
|
sd t1,8(to); sd t3,24(to)
|
|
sd ta0,32(to); sd ta2,48(to)
|
|
sd ta1,40(to); sd ta3,56(to)
|
|
#if ECC_RECOVER
|
|
.set reorder
|
|
#endif /* ECC_RECOVER */
|
|
#else
|
|
ld t0,0(from); ld t1,8(from); ld t2,16(from); ld t3,24(from)
|
|
ld ta0,32(from);ld ta1,40(from);ld ta2,48(from);ld ta3,56(from)
|
|
#if ECC_RECOVER
|
|
.set noreorder
|
|
EXPORT(bcopy_stores)
|
|
#endif /* ECC_RECOVER */
|
|
sd t0,0(to); sd t1,8(to); sd t2,16(to); sd t3,24(to)
|
|
#if ECC_RECOVER
|
|
.set reorder
|
|
#endif /* ECC_RECOVER */
|
|
sd ta0,32(to); sd ta1,40(to); sd ta2,48(to); sd ta3,56(to)
|
|
#endif /* R10000 */
|
|
PTR_ADDU from,64
|
|
PTR_ADDU to,64
|
|
|
|
#if !defined(IP25) && !defined(IP27) && !defined(IP30)
|
|
bne a3,to,64b
|
|
#else /* IP25 || IP27 || IP30 */
|
|
beq a3,to,pref64end
|
|
|
|
/* we unroll r10k another time so we have a big loop which brings
|
|
* in cachelines. This way we can also perform two different
|
|
* prefetch operations (one for source and one for destination).
|
|
*/
|
|
|
|
ld t0,0(from); ld t2,16(from)
|
|
.set noreorder
|
|
EXPORT(bcopy_pref2)
|
|
pref 7,384(to)
|
|
.set reorder
|
|
ld t1,8(from); ld t3,24(from)
|
|
ld ta0,32(from); ld ta2,48(from); ld ta1,40(from); ld ta3,56(from)
|
|
sd t0,0(to); sd t2,16(to); sd t1,8(to); sd t3,24(to)
|
|
sd ta0,32(to); sd ta2,48(to); sd ta1,40(to); sd ta3,56(to)
|
|
PTR_ADDU from,64
|
|
PTR_ADDU to,64
|
|
bne a3,to,64b
|
|
pref64end:
|
|
#endif /* IP25 || IP27 || IP30 */
|
|
|
|
and count,64-1 # still have to copy non-64 multiple bytes
|
|
b forwards # complete with 32-bit copy
|
|
|
|
align32:
|
|
and v0,from,3
|
|
and v1,to,3
|
|
li a3,4
|
|
bne v0,v1,fmcopy # low bits are different
|
|
|
|
/* Pointers are alignable and may be aligned. Since v0 == v1, we need only
|
|
* check what value v0 has to see how to get aligned. Also, since we have
|
|
* eliminated tiny copies, we know that the count is large enough to
|
|
* encompass the alignment copies.
|
|
*/
|
|
beq v0,zero,forwards # If v0==0 then aligned
|
|
subu a3,a3,v1 # a3 = # bytes to get aligned
|
|
LWS v0,0(from)
|
|
SWS v0,0(to) # copy partial word
|
|
PTR_ADDU from,a3
|
|
PTR_ADDU to,a3
|
|
subu count,a3
|
|
|
|
/* Once we are here, the pointers are aligned on 32-bit boundaries
|
|
*/
|
|
forwards:
|
|
|
|
|
|
and a3,count,~(32-1)
|
|
beq a3,zero,16f
|
|
PTR_ADDU a3,a3,to
|
|
32:
|
|
lw t0,0(from); lw t1,4(from); lw t2,8(from); lw t3,12(from)
|
|
lw ta0,16(from); lw ta1,20(from); lw ta2,24(from); lw ta3,28(from)
|
|
sw t0,0(to); sw t1,4(to); sw t2,8(to); sw t3,12(to)
|
|
sw ta0,16(to); sw ta1,20(to); sw ta2,24(to); sw ta3,28(to)
|
|
PTR_ADDU from,32
|
|
PTR_ADDU to,32
|
|
bne a3,to,32b
|
|
|
|
/* We know we have fewer than 32 bytes remaining, so we do no more
|
|
* adjustments of the count.
|
|
*/
|
|
16: and v0,count,16
|
|
beq v0,zero,8f
|
|
lw t0,0(from); lw t1,4(from); lw t2,8(from); lw t3,12(from)
|
|
sw t0,0(to); sw t1,4(to); sw t2,8(to); sw t3,12(to)
|
|
PTR_ADDU from,16
|
|
PTR_ADDU to,16
|
|
|
|
8: and v1,count,8
|
|
beq v1,zero,4f
|
|
lw t0,0(from)
|
|
lw t1,4(from)
|
|
sw t0,0(to)
|
|
sw t1,4(to)
|
|
PTR_ADDU from,8
|
|
PTR_ADDU to,8
|
|
|
|
4: and v0,count,4
|
|
beq v0,zero,3f
|
|
lw t0,0(from)
|
|
sw t0,0(to)
|
|
PTR_ADDU from,4
|
|
PTR_ADDU to,4
|
|
|
|
3: and v1,count,3
|
|
PTR_ADDU from,v1
|
|
beq v1,zero,ret
|
|
PTR_ADDU to,v1
|
|
LWB t0,-1(from)
|
|
SWB t0,-1(to)
|
|
j ra
|
|
|
|
|
|
fmcopy:
|
|
/* Missaligned, non-overlap copy of many bytes. This happens too often.
|
|
* Align the destination for machines with write-thru caches.
|
|
*
|
|
* This code is always for machines that prefer nops between stores.
|
|
*
|
|
* Here v1=low bits of destination, a3=4.
|
|
*/
|
|
beq v1,zero,fmcopy4 # If v1==0 then destination is aligned
|
|
subu a3,a3,v1 # a3 = # bytes to align destination
|
|
subu count,a3
|
|
PTR_ADDU a3,to
|
|
1: lb v0,0(from)
|
|
PTR_ADDU from,1
|
|
sb v0,0(to)
|
|
PTR_ADDU to,1
|
|
bne to,a3,1b
|
|
|
|
fmcopy4:
|
|
and a3,count,~(16-1)
|
|
beq a3,zero,8f
|
|
PTR_ADDU a3,a3,to
|
|
16: LWS t0,0(from); LWB t0,0+3(from)
|
|
LWS t1,4(from); LWB t1,4+3(from); sw t0,0(to)
|
|
LWS t2,8(from); LWB t2,8+3(from); sw t1,4(to)
|
|
LWS t3,12(from); LWB t3,12+3(from); sw t2,8(to)
|
|
sw t3,12(to)
|
|
PTR_ADDU from,16
|
|
PTR_ADDU to,16
|
|
bne a3,to,16b
|
|
|
|
8: and v1,count,8
|
|
beq v1,zero,4f
|
|
LWS t0,0(from); LWB t0,0+3(from)
|
|
LWS t1,4(from); LWB t1,4+3(from); sw t0,0(to)
|
|
sw t1,4(to)
|
|
PTR_ADDU from,8
|
|
PTR_ADDU to,8
|
|
|
|
4: and v0,count,4
|
|
and count,3
|
|
beq v0,zero,fbcopy
|
|
LWS t0,0(from); LWB t0,0+3(from); sw t0,0(to)
|
|
PTR_ADDU from,4
|
|
PTR_ADDU to,4
|
|
|
|
|
|
/* Byte at a time copy code. This is used when the byte count is small.
|
|
*/
|
|
fbcopy:
|
|
PTR_ADDU a3,from,count # a3 = end+1
|
|
beq count,zero,ret # If count is zero, then we are done
|
|
|
|
1: lb v0,0(from) # v0 = *from
|
|
PTR_ADDU from,1 # advance pointer
|
|
sb v0,0(to) # Store byte
|
|
PTR_ADDU to,1 # advance pointer
|
|
bne from,a3,1b # Loop until done
|
|
ret: j ra # return to caller
|
|
|
|
|
|
/*****************************************************************************/
|
|
|
|
/*
|
|
* Backward copy code. Check for pointer alignment and try to get both
|
|
* pointers aligned on a long boundary.
|
|
*/
|
|
gobackwards:
|
|
PTR_ADDU from,count # Advance to end + 1
|
|
PTR_ADDU to,count # Advance to end + 1
|
|
|
|
/* small byte counts use byte at a time copy */
|
|
blt count,MINCOPY,backwards_bytecopy
|
|
and v0,from,3 # v0 := from & 3
|
|
and v1,to,3 # v1 := to & 3
|
|
beq v0,v1,backalignable # low bits are identical
|
|
/*
|
|
* Byte at a time copy code. This is used when the pointers are not
|
|
* alignable, when the byte count is small, or when cleaning up any
|
|
* remaining bytes on a larger transfer.
|
|
*/
|
|
backwards_bytecopy:
|
|
beq count,zero,ret # If count is zero quit
|
|
PTR_SUBU from,1 # Reduce by one (point at byte)
|
|
PTR_SUBU to,1 # Reduce by one (point at byte)
|
|
PTR_SUBU v1,from,count # v1 := original from - 1
|
|
|
|
99: lb v0,0(from) # v0 = *from
|
|
PTR_SUBU from,1 # backup pointer
|
|
sb v0,0(to) # Store byte
|
|
PTR_SUBU to,1 # backup pointer
|
|
bne from,v1,99b # Loop until done
|
|
j ra # return to caller
|
|
|
|
/*
|
|
* Pointers are alignable, and may be aligned. Since v0 == v1, we need only
|
|
* check what value v0 has to see how to get aligned. Also, since we have
|
|
* eliminated tiny copies, we know that the count is large enough to
|
|
* encompass the alignment copies.
|
|
*/
|
|
backalignable:
|
|
beq v0,zero,backwards # If v0==v1 && v0==0 then aligned
|
|
beq v0,3,back_copy3 # Need to copy 3 bytes to get aligned
|
|
beq v0,2,back_copy2 # Need to copy 2 bytes to get aligned
|
|
|
|
/* need to copy 1 byte */
|
|
lb v0,-1(from) # get one byte
|
|
PTR_SUBU from,1 # backup pointer
|
|
sb v0,-1(to) # store one byte
|
|
PTR_SUBU to,1 # backup pointer
|
|
subu count,1 # and reduce count
|
|
b backwards # Now pointers are aligned
|
|
|
|
/* need to copy 2 bytes */
|
|
back_copy2:
|
|
lh v0,-2(from) # get one short
|
|
PTR_SUBU from,2 # backup pointer
|
|
sh v0,-2(to) # store one short
|
|
PTR_SUBU to,2 # backup pointer
|
|
subu count,2 # and reduce count
|
|
b backwards
|
|
|
|
/* need to copy 3 bytes */
|
|
back_copy3:
|
|
lb v0,-1(from) # get one byte
|
|
lh v1,-3(from) # and one short
|
|
PTR_SUBU from,3 # backup pointer
|
|
sb v0,-1(to) # store one byte
|
|
sh v1,-3(to) # and one short
|
|
PTR_SUBU to,3 # backup pointer
|
|
subu count,3 # and reduce count
|
|
/* FALLTHROUGH */
|
|
/*
|
|
* Once we are here, the pointers are aligned on long boundaries.
|
|
* Begin copying in large chunks.
|
|
*/
|
|
backwards:
|
|
|
|
/* 32 byte at a time loop */
|
|
backwards_32:
|
|
blt count,32,backwards_16 # do 16 bytes at a time
|
|
lw v0,-4(from)
|
|
lw v1,-8(from)
|
|
lw t0,-12(from)
|
|
lw t1,-16(from)
|
|
lw t2,-20(from)
|
|
lw t3,-24(from)
|
|
lw ta0,-28(from)
|
|
lw ta1,-32(from) # Fetch 8*4 bytes
|
|
PTR_SUBU from,32 # backup from pointer now
|
|
sw v0,-4(to)
|
|
sw v1,-8(to)
|
|
sw t0,-12(to)
|
|
sw t1,-16(to)
|
|
sw t2,-20(to)
|
|
sw t3,-24(to)
|
|
sw ta0,-28(to)
|
|
sw ta1,-32(to) # Store 8*4 bytes
|
|
PTR_SUBU to,32 # backup to pointer now
|
|
subu count,32 # Reduce count
|
|
b backwards_32 # Try some more
|
|
|
|
/* 16 byte at a time loop */
|
|
backwards_16:
|
|
blt count,16,backwards_4 # Do rest in words
|
|
lw v0,-4(from)
|
|
lw v1,-8(from)
|
|
lw t0,-12(from)
|
|
lw t1,-16(from)
|
|
PTR_SUBU from,16 # backup from pointer now
|
|
sw v0,-4(to)
|
|
sw v1,-8(to)
|
|
sw t0,-12(to)
|
|
sw t1,-16(to)
|
|
PTR_SUBU to,16 # backup to pointer now
|
|
subu count,16 # Reduce count
|
|
b backwards_16 # Try some more
|
|
|
|
/* 4 byte at a time loop */
|
|
backwards_4:
|
|
blt count,4,backwards_bytecopy # Do rest
|
|
lw v0,-4(from)
|
|
PTR_SUBU from,4 # backup from pointer
|
|
sw v0,-4(to)
|
|
PTR_SUBU to,4 # backup to pointer
|
|
subu count,4 # Reduce count
|
|
b backwards_4
|
|
XLEAF(bcopy_end)
|
|
END(bcopy)
|
|
|
|
#endif /* !TFP */
|
|
|
|
#undef from
|
|
#undef to
|
|
#undef count
|
|
|
|
/*
|
|
* This code ASSUMES the following:
|
|
* - count is even
|
|
* - from & to do **not** overlap
|
|
*
|
|
* void swbcopy(from, to, count);
|
|
* unsigned char *from, *to;
|
|
* unsigned int count;
|
|
*/
|
|
#define from a0
|
|
#define to a1
|
|
#define count a2
|
|
|
|
|
|
LEAF(swbcopy)
|
|
beq count,zero,2f # Test for zero count
|
|
beq from,to,2f # Test for from == to
|
|
/*
|
|
* Copy bytes, two at a time.
|
|
*/
|
|
1: lb v0,0(from)
|
|
lb v1,1(from)
|
|
PTR_ADDU from,2
|
|
sb v1,0(to)
|
|
sb v0,1(to)
|
|
PTR_ADDU to,2
|
|
subu count,2
|
|
bgt count,zero,1b
|
|
|
|
2: j ra
|
|
END(swbcopy)
|
|
#undef from
|
|
#undef to
|
|
#undef count
|
|
|
|
#ifdef IPMHSIM
|
|
EXPORT(orb_rmw)
|
|
j ra
|
|
EXPORT(orh_rmw)
|
|
j ra
|
|
EXPORT(orw_rmw)
|
|
j ra
|
|
EXPORT(andb_rmw)
|
|
j ra
|
|
EXPORT(andh_rmw)
|
|
j ra
|
|
EXPORT(andw_rmw)
|
|
j ra
|
|
#endif /* IPMHSIM */
|
|
|
|
/*
|
|
* int rtlock_ownerstamp(caddr_t user_addr, unsigned int pid)
|
|
*
|
|
* Note: This routine assumes a 64bit data field in the rtlock structure,
|
|
* where the high 32bits represent the owner and the low 32bits
|
|
* (not modified here) represent the wait count.
|
|
*/
|
|
LEAF(rtlock_ownerstamp)
|
|
.set noreorder
|
|
bltz a0,uerror
|
|
PTR_L t0,VPDA_CURKTHREAD(zero) # BDSLOT
|
|
li v0,NF_FSUMEM # LDSLOT
|
|
CACHE_BARRIER # t0 dependent, and top factors a0
|
|
sh v0,K_NOFAULT(t0)
|
|
1: ll t1, 0(a0)
|
|
or t1, a1, zero
|
|
/* t0 dependancy on VPDA_CURKTHREAD, and a0 covered by barrier above */
|
|
AUTO_CACHE_BARRIERS_DISABLE
|
|
sc t1, 0(a0)
|
|
#ifdef R10K_LLSC_WAR
|
|
beql t1, zero, 1b
|
|
#else
|
|
beqz t1, 1b
|
|
#endif
|
|
nop
|
|
sh zero,K_NOFAULT(t0)
|
|
AUTO_CACHE_BARRIERS_ENABLE
|
|
j ra
|
|
move v0, zero
|
|
.set reorder
|
|
END(rtlock_ownerstamp)
|
|
|
|
/*
|
|
*
|
|
* int dumpcopy(kernel_src, kernel_dst, bcount)
|
|
* long kernel_src, kernel_dst;
|
|
* long bcount;
|
|
*/
|
|
NESTED(dumpcopy, COPYIOFRM, zero)
|
|
PTR_SUBU sp,COPYIOFRM
|
|
REG_S ra,RAOFF(sp)
|
|
.set noreorder
|
|
|
|
AUTO_CACHE_BARRIERS_DISABLE
|
|
PTR_L t0,VPDA_CURKTHREAD(zero)
|
|
beq t0,zero, 1f
|
|
li v0,NF_DUMPCOPY # LDSLOT
|
|
b 2f
|
|
sh v0,K_NOFAULT(t0)
|
|
/*
|
|
* If we're not being called from a thread, we're being called
|
|
* from the error handling code and interrupts are disabled.
|
|
*/
|
|
1: sw v0,VPDA_NOFAULT(zero)
|
|
2:
|
|
jal bcopy
|
|
nop
|
|
|
|
PTR_L t0,VPDA_CURKTHREAD(zero)
|
|
beq t0,zero, 3f
|
|
move v0,zero # BDSLOT
|
|
b 4f
|
|
sh zero,K_NOFAULT(t0)
|
|
/*
|
|
* If we're not being called from a thread, we're being called
|
|
* from the error handling code and interrupts are disabled.
|
|
*/
|
|
3: sw zero,VPDA_NOFAULT(zero)
|
|
4:
|
|
AUTO_CACHE_BARRIERS_ENABLE
|
|
|
|
.set reorder
|
|
REG_L ra,RAOFF(sp)
|
|
PTR_ADDU sp,COPYIOFRM
|
|
j ra
|
|
END(dumpcopy)
|