1
0
Files
2022-09-29 17:59:04 +03:00

453 lines
12 KiB
ArmAsm

/*
* r4kcache.s -- R4000-specific Cache manipulation primitives
*/
#ident "$Revision: 1.1 $"
#include <regdef.h>
#include <sys/cpu.h>
#include <sys/loaddrs.h>
#include <sys/sbd.h>
#include <asm.h>
BSS(_icache_size, 4) # bytes of icache
BSS(_dcache_size, 4) # bytes of dcache
BSS(cachewrback, 4) # writeback secondary cache?
BSS(_sidcache_size, 4) # bytes of secondary cache
BSS(_scache_linesize, 4) # secondary cache line size
BSS(_scache_linemask, 4) # secondary cache line mask
BSS(_icache_linesize, 4) # primary I cache line size
BSS(_icache_linemask, 4) # primary I cache line mask
BSS(_dcache_linesize, 4) # primary D cache line size
BSS(_dcache_linemask, 4) # primary D cache line mask
#define NBPP 4096
#define SCACHE_LINESIZE (32*4)
#define SCACHE_LINEMASK ((32*4)-1)
#ifdef BLOCKSIZE_8WORDS
#define DCACHE_LINESIZE (8*4)
#define ICACHE_LINESIZE (8*4)
#define DCACHE_LINEMASK ((8*4)-1)
#define ICACHE_LINEMASK ((8*4)-1)
#else
#define DCACHE_LINESIZE (4*4)
#define ICACHE_LINESIZE (4*4)
#define DCACHE_LINEMASK ((4*4)-1)
#define ICACHE_LINEMASK ((4*4)-1)
#endif /* BLOCKSIZE_8WORDS */
#ifdef R4600
#define R4600_DCACHE_LINESIZE (8*4)
#define R4600_ICACHE_LINESIZE (8*4)
#define R4600_DCACHE_LINEMASK ((8*4)-1)
#define R4600_ICACHE_LINEMASK ((8*4)-1)
BSS(_two_set_pcaches, 4) # 0 if one-set; set offset if two-set
#endif /* R4600 */
#if IP20 || IP22
#define K0_CACHEFLUSHBASE K0_RAMBASE
#else
#define K0_CACHEFLUSHBASE K0BASE
#endif
/*
* __cache_wb_inval(addr, len)
*
* Uses the INDEX_INVALIDATE and INDEX_WRITEBACK_INVALIDATE
* cacheops. Should be called with K0 addresses, to avoid
* the tlb translation (and tlb miss fault) overhead.
*
* Since we are shooting down 'live' lines in the 2nd cache, these
* lines have to be written back to memory before invalidating.
* Since we are invalidating lines in the secondary cache which
* will correspond to primary data cache lines, the primary data
* cache has to be invalidated also (a line cannot be valid in
* the primary but invalid in secondary.)
* We have to invalidate the primary instruction cache for the
* same reason.
*
* Since we are using index (writeback) invalidate cache ops on
* the primary caches, and since the address we are given generally
* has nothing to do with the virtual address at which the data may
* have been used (otherwise we would use the 'hit' operations), we
* perform the cache op at both indices in the primary caches. (Assumes
* 4K page size and 8K caches.) This is necessary since the address
* is basically meaningless.
*
* XXX Assumes 32 word line size for secondary cache.
* XXX Assumes primary data and primary inst. cache are the same size.
* XXX Assumes primary data and primary inst. line size are the same.
* XXX Assumes 8k primary cache size and 4k page size.
*/
LEAF(__cache_wb_inval)
XLEAF(clear_cache)
/*
* This routine is not preemptable, to avoid creating a non-subset
* cache situation, since the algorithm is to do the primary caches,
* then the secondary.
* Also, load any memory references into a register
* before flushing the primary caches, again to avoid a non-subset
* situation.
* Also, run uncached, for the same reason.
*/
/* NOTE: The hardware mavins assure me that it is OK to
* violate the subset rule for the i-cache. For this reason,
* run this routine cached, since uncached instruction accesses
* are about 48 times slower than cached.
*/
srl t0,a0,29 # make sure it's a cached address
beq t0,0x4,1f
j ra
1:
#if defined(IP20) || defined(EVEREST)
j rmi_cacheflush
#else
#if !defined(IP22)
blez a1, 42f # if we don't have anything return
#endif
#ifdef R4000PC
li t4, 0x100000
/* do we have an scache? */
bnez t4,101f # yes --> just use scache operations
lw t7, _dcache_size
lw v0, _icache_size
#ifdef R4600
lw t6,_two_set_pcaches
beqz t6,17f
/* do dcache */
beqz t7, 31f # no dcache --> check icache
/* clean dcache using indexed invalidate */
move t0, a0 # set up current address
PTR_ADDU t1, t0, a1 # set up limit address
PTR_SUBU t1, R4600_DCACHE_LINESIZE # (base + count - R4600_DCACHE_LINESIZE)
and t3, t0, R4600_DCACHE_LINEMASK # align start to dcache line
PTR_SUBU t0, t3 # pull off extra
.set noreorder
2: cache CACH_PD|C_IWBINV, (t0) # Invalidate cache line
xor t5,t0,t6
cache CACH_PD|C_IWBINV, (t5) # Invalidate cache line
bltu t0, t1, 2b
PTR_ADDU t0, R4600_DCACHE_LINESIZE # BDSLOT
.set reorder
/* now do icache */
31: beqz v0, 41f
/* clean icache using indexed invalidate */
move t0, a0 # set up current address
PTR_ADDU t1, t0, a1 # set up target address
PTR_SUBU t1, R4600_ICACHE_LINESIZE # (base + size - R4600_ICACHE_LINESIZE)
and t3, t0, R4600_ICACHE_LINEMASK # align start to icache line
PTR_SUBU t0, t3 # pull off extra
and t3, t1, R4600_ICACHE_LINEMASK # align ending to icache line
PTR_SUBU t1, t3
.set noreorder
4: cache CACH_PI|C_IINV, (t0) # Invalidate cache line
xor t5,t0,t6
cache CACH_PI|C_IINV, (t5) # Invalidate cache line
bltu t0, t1, 4b
PTR_ADDU t0, R4600_ICACHE_LINESIZE # BDSLOT
.set reorder
41:
j ra
17:
#endif /* R4600 */
/* do dcache */
beqz t7, 31f # no dcache --> check icache
/* clean dcache using indexed invalidate */
move t0, a0 # set up current address
PTR_ADDU t1, t0, a1 # set up limit address
PTR_SUBU t1, DCACHE_LINESIZE # (base + count - DCACHE_LINESIZE)
and t3, t0, DCACHE_LINEMASK # align start to dcache line
PTR_SUBU t0, t3 # pull off extra
.set noreorder
2: cache CACH_PD|C_IWBINV, (t0) # Invalidate cache line
bltu t0, t1, 2b
PTR_ADDU t0, DCACHE_LINESIZE # BDSLOT
.set reorder
/* now do icache */
31: beqz v0, 41f
/* clean icache using indexed invalidate */
move t0, a0 # set up current address
PTR_ADDU t1, t0, a1 # set up target address
PTR_SUBU t1, ICACHE_LINESIZE # (base + size - ICACHE_LINESIZE)
and t3, t0, ICACHE_LINEMASK # align start to icache line
PTR_SUBU t0, t3 # pull off extra
and t3, t1, ICACHE_LINEMASK # align ending to icache line
PTR_SUBU t1, t3
.set noreorder
4: cache CACH_PI|C_IINV, (t0) # Invalidate cache line
bltu t0, t1, 4b
PTR_ADDU t0, ICACHE_LINESIZE # BDSLOT
.set reorder
41:
j ra
101:
#endif /* R4000PC */
#ifdef IP22
j rmi_cacheflush # XXX - broken on IP24
#else
# Load this before flushing the primary dcache.
lw t4,_sidcache_size
lw t1,_dcache_size
srl t1,t1,1 # primary cache size/2
move t0,a0 # starting address
bltu t1,a1,1f # cache is smaller than count
move t1,a1
1:
# t1 has min(_dcache_size/2, len)
addu t1,t0 # ending addr + 1
# align the starting address to a 2nd cache line
and t2,t0,SCACHE_LINEMASK
subu t0,t2
1: # top of loop
.set noreorder
.set noat
# loop unrolled by four - 1 secondary cache line
# invalidate 4 primary inst. lines
cache CACH_PI|C_IINV,0(t0)
cache CACH_PI|C_IINV,(ICACHE_LINESIZE)(t0)
cache CACH_PI|C_IINV,(ICACHE_LINESIZE*2)(t0)
cache CACH_PI|C_IINV,(ICACHE_LINESIZE*3)(t0)
#ifndef BLOCKSIZE_8WORDS
cache CACH_PI|C_IINV,(ICACHE_LINESIZE*4)(t0)
cache CACH_PI|C_IINV,(ICACHE_LINESIZE*5)(t0)
cache CACH_PI|C_IINV,(ICACHE_LINESIZE*6)(t0)
cache CACH_PI|C_IINV,(ICACHE_LINESIZE*7)(t0)
#endif
# Do the other index in the primary I cache
cache CACH_PI|C_IINV,NBPP(t0)
cache CACH_PI|C_IINV,(NBPP+ICACHE_LINESIZE)(t0)
cache CACH_PI|C_IINV,(NBPP+ICACHE_LINESIZE*2)(t0)
cache CACH_PI|C_IINV,(NBPP+ICACHE_LINESIZE*3)(t0)
#ifndef BLOCKSIZE_8WORDS
cache CACH_PI|C_IINV,(NBPP+ICACHE_LINESIZE*4)(t0)
cache CACH_PI|C_IINV,(NBPP+ICACHE_LINESIZE*5)(t0)
cache CACH_PI|C_IINV,(NBPP+ICACHE_LINESIZE*6)(t0)
cache CACH_PI|C_IINV,(NBPP+ICACHE_LINESIZE*7)(t0)
#endif
# writeback + inval 4 primary data
cache CACH_PD|C_IWBINV,0(t0)
cache CACH_PD|C_IWBINV,(DCACHE_LINESIZE)(t0)
cache CACH_PD|C_IWBINV,(DCACHE_LINESIZE*2)(t0)
cache CACH_PD|C_IWBINV,(DCACHE_LINESIZE*3)(t0)
#ifndef BLOCKSIZE_8WORDS
cache CACH_PD|C_IWBINV,(DCACHE_LINESIZE*4)(t0)
cache CACH_PD|C_IWBINV,(DCACHE_LINESIZE*5)(t0)
cache CACH_PD|C_IWBINV,(DCACHE_LINESIZE*6)(t0)
cache CACH_PD|C_IWBINV,(DCACHE_LINESIZE*7)(t0)
#endif
# do the other index
cache CACH_PD|C_IWBINV,NBPP(t0)
cache CACH_PD|C_IWBINV,(NBPP+DCACHE_LINESIZE)(t0)
cache CACH_PD|C_IWBINV,(NBPP+DCACHE_LINESIZE*2)(t0)
cache CACH_PD|C_IWBINV,(NBPP+DCACHE_LINESIZE*3)(t0)
#ifndef BLOCKSIZE_8WORDS
cache CACH_PD|C_IWBINV,(NBPP+DCACHE_LINESIZE*4)(t0)
cache CACH_PD|C_IWBINV,(NBPP+DCACHE_LINESIZE*5)(t0)
cache CACH_PD|C_IWBINV,(NBPP+DCACHE_LINESIZE*6)(t0)
cache CACH_PD|C_IWBINV,(NBPP+DCACHE_LINESIZE*7)(t0)
#endif
.set reorder
.set at
#ifdef BLOCKSIZE_8WORDS
addu t0,DCACHE_LINESIZE*4
#else
addu t0,DCACHE_LINESIZE*8
#endif
bltu t0,t1,1b
# now do 2nd cache
move t0,a0 # starting addr.
bltu t4,a1,1f # 2nd cache is smaller than count
move t4,a1
1:
addu t4,t0 # ending addr + 1
subu t0,t2 # line-align
1:
.set noreorder
.set noat
cache CACH_SD|C_IWBINV,0(t0) # writeback + inval 2nd cache lines
.set reorder
.set at
addu t0,SCACHE_LINESIZE
bltu t0,t4,1b
42:
j ra # restores cached mode.
#endif /* IP22 */
#endif /* IP20 */
END(__cache_wb_inval)
/*
* flush_cache()
* flush entire I & D cache
*/
#define ALSZ 15 /* align on 16 byte boundary */
#define ALMASK ~0xf
#define SZREG 4
#define FRAMESZ(size) (((size)+ALSZ) & ALMASK)
FLUSHFRM= FRAMESZ((4+1)*SZREG) # 4 arg saves, and ra
NESTED(flush_cache2, FLUSHFRM, zero)
XLEAF(FlushAllCaches)
subu sp,FLUSHFRM
sw ra,FLUSHFRM-4(sp)
li a0,K0_RAMBASE
li a1, 0x100000
#ifdef R4000PC
bnez a1, 1f
lw a1, _dcache_size
#ifdef R4600
lw a2, _two_set_pcaches
beqz a2,1f
move a1,a2
#endif /* R4600 */
1:
#endif /* R4000PC */
jal __cache_wb_inval
lw ra,FLUSHFRM-4(sp)
addu sp,FLUSHFRM
j ra
END(flush_cache)
#if defined(IP20) || defined(EVEREST) || defined(IP22)
/*
* rmi_cacheflush(addr,len)
*/
LEAF(rmi_cacheflush)
move t0,a0 # starting addr.
move t5,a1
addu t5,t0 # ending addr + 1
#ifdef R4000PC_XXXX
lw t2,_sidcache_size
bnez t2,101f
#endif
# align the starting address to a 2nd cache line
and t0,~SCACHE_LINEMASK
1:
.set noreorder
/*
** The RMI workaround is for a bug in the RMI/MC chip on IP17/IP20.
** The problem occurs when we try to do writes without a
** preceding read. Such an operation only occurs when we
** do a cache instruction with a WRITEBACK from secondary
** cache.
**
** The workaround is to:
** 1) read from the location about to be flushed
** 2) read from the "companion location", which is
** a location that will hit at the same index in the
** secondary cache and cause an implicit writeback, if
** the old line is dirty.
** 3) invalidate the companion line.
**
** Step one is needed in case the companion line is already
** in the cache and is dirty....it will be flushed back to
** memory.
** Step 2 is needed to cause the desired line to be flushed.
** Step 3 is needed in order to avoid upsetting assumptions
** about cache lines that, say, a driver might make.
**
*/
lw zero,(t0) # read from line to flush
#define COMPANION_BIT 0x00400000
li t1,COMPANION_BIT # generate companion
xor t1,t0
#if defined(IP20) || defined(IP22)
li t2,K0_RAMBASE
bgeu t1,t2,2f
nop
or t1,K0_RAMBASE
2:
#endif
lw zero,(t1) # cause writeback
cache CACH_SD|C_HINV,0(t1) # invalidate cache line
.set reorder
addu t0,SCACHE_LINESIZE
bltu t0,t5,1b
j ra
#ifdef R4000PC_XXXX
101:
# align the starting address.
and t2,t0,ICACHE_LINEMASK
subu t0,t2
1:
.set noreorder
cache CACH_PI|C_IINV,0(t0)
.set reorder
addu t0,ICACHE_LINESIZE
bltu t0,t5,1b
move t0,a0
# align the starting address.
and t2,t0,DCACHE_LINEMASK
subu t0,t2
1:
.set noreorder
cache CACH_PD|C_IWBINV,0(t0)
.set reorder
addu t0,DCACHE_LINESIZE
bltu t0,t5,1b
j ra
#endif
END(rmi_cacheflush)
#endif /* IP20 */