1
0
Files
irix-657m-src/irix/kern/ml/atomic_ops.s
2022-09-29 17:59:04 +03:00

1609 lines
34 KiB
ArmAsm

/**************************************************************************
* *
* Copyright (C) 1993-1996 Silicon Graphics, Inc. *
* *
* These coded instructions, statements, and computer programs contain *
* unpublished proprietary information of Silicon Graphics, Inc., and *
* are protected by Federal copyright law. They may not be disclosed *
* to third parties or copied or duplicated in any form, in whole or *
* in part, without the prior written consent of Silicon Graphics, Inc. *
* *
**************************************************************************/
/*
* R4000 atomic operations based on link-load, store-conditional instructions.
*/
#ident "$Revision: 1.59 $"
#include <ml/ml.h>
/*
* Implementation of fetch-and-add.
* a0 - address of 32-bit integer to operate on
* a1 - value to add to integer
* new value returned in v0
*/
LEAF(atomicAddInt)
XLEAF(atomicAddUint)
#if (_MIPS_SZLONG == 32)
XLEAF(atomicAddLong)
XLEAF(atomicAddUlong)
#endif
.set noreorder
CACHE_BARRIER_AT(0,a0) # top factored for sc (barrier for a0)
# Load the old value
1:
ll t2,0(a0)
addu t1,t2,a1
# Try to set the new one
AUTO_CACHE_BARRIERS_DISABLE
sc t1,0(a0)
AUTO_CACHE_BARRIERS_ENABLE
#ifdef R10K_LLSC_WAR
beql t1,zero,1b
nop
#else
beq t1,zero,1b
#endif
# set return value in branch-delay slot
addu v0,t2,a1
# Return to caller
j ra
nop
.set reorder
END(atomicAddInt)
#if MP
/*
* int atomicAddIntHot(int *ip, int val, int *pref)
* uint atomicAddUintHot(uint *ip, uint val, int *pref)
*
* Atomically add val to *ip and return the new value.
*
* This is a special variant of the normal version of this routine that is
* used when *ip is a ``hot'' (frequently updated) word on an MP system. In
* addition to a pointer to an int to atomically manipulate, we also take a
* pointer to another byte in the same cache line which we will store into
* once per LL/SC loop in order to acquire the cache line in dirty exclusive
* mode. This helps ensure the success of the SC ...
*/
LEAF(atomicAddIntHot2)
XLEAF(atomicAddUintHot2)
.set noreorder
# Force cache line to be dirty/exclusive mode because we know that
# we're going to store into it we want a reasonable chance of having
# the SC below to succeed when the word is highly contended. When
# it's not contended, this extra SW instruction won't introduce a lot
# of overhead because the cache line will be in our primary cache
# already or will simply be dragged in exclusive a few instructions
# sooner than it would have been by the SC.
sb zero,0(a2) # cause prefetch exclusive of cacheline
1: ll t2,0(a0)
addu t1,t2,a1
sc t1,0(a0)
#ifdef R10K_LLSC_WAR
beql t1,zero,1b
#else
beq t1,zero,1b
#endif
sb zero,0(a2) # BDSLOT cause prefetch exclusive of cacheline
j ra
addu v0,t2,a1 # BDSLOT
.set reorder
END(atomicAddIntHot2)
#endif /* MP */
#if !defined(_COMPILER_VERSION) || (_COMPILER_VERSION<700) || defined(IP28)
/*
* Use compiler atomic operation intrinsics for the MipsPRO 7.0 and later
* compilers.
*
* We don't enable the compiler intrinsics for atomic operations on the IP28
* (Indigo2/R10K) because of a bug in the compiler: #763380,
* ``t5_no_spec_stores not working for atomic intrinsics.'' When this bug is
* fixed we can reenable the intrinsics for IP28.
*/
#if (_MIPS_SZLONG == 64)
/*
* Implementation of long-word fetch-and-add.
* a0 - address of 32- or64-bit integer to operate on
* a1 - value to add to integer
* new value returned in v0
*/
LEAF(atomicAddLong)
XLEAF(atomicAddUlong)
.set noreorder
CACHE_BARRIER_AT(0,a0) # top factored for sc (barrier for a0)
# Load the old value
1:
LONG_LL t2,0(a0)
LONG_ADDU t1,t2,a1
# Try to set the new one
AUTO_CACHE_BARRIERS_DISABLE
LONG_SC t1,0(a0)
AUTO_CACHE_BARRIERS_ENABLE
#ifdef R10K_LLSC_WAR
beql t1,zero,1b
nop
#else
beq t1,zero,1b
#endif
# set return value in branch-delay slot
LONG_ADDU v0,t2,a1
# Return to caller
j ra
nop
.set reorder
END(atomicAddLong)
#endif
/*
* Atomically set bits in a word (atomic version of "word |= arg;")
* a0 - address of 32-bit integer to operate on
* a1 - value to 'or' into integer
* returns old value
*/
LEAF(atomicSetInt)
XLEAF(atomicSetUint)
#if (_MIPS_SZLONG == 32)
XLEAF(atomicSetLong)
XLEAF(atomicSetUlong)
#endif
.set noreorder
CACHE_BARRIER_AT(0,a0) # top factored for sc (barrier for a0)
# Load the old value
1:
ll v0,0(a0)
or t2,v0,a1
# Try to set the new one
AUTO_CACHE_BARRIERS_DISABLE
sc t2,0(a0)
AUTO_CACHE_BARRIERS_ENABLE
#ifdef R10K_LLSC_WAR
beql t2,zero,1b
#else
beq t2,zero,1b
#endif
nop
# Return to caller
j ra
nop
.set reorder
END(atomicSetInt)
/*
* Atomically set bits in a 64-bit int (atomic version of "long |= arg;")
* a0 - address of long to operate on
* a1 - value to 'or' into integer
* returns old value
*/
LEAF(atomicSetInt64)
XLEAF(atomicSetUint64)
#if (_MIPS_SZLONG == 64)
XLEAF(atomicSetLong)
XLEAF(atomicSetUlong)
#endif
.set noreorder
CACHE_BARRIER_AT(0,a0) # top factored for sc (barrier for a0)
# Load the old value
1:
LONG_LL v0,0(a0)
or t2,v0,a1
# Try to set the new one
AUTO_CACHE_BARRIERS_DISABLE
LONG_SC t2,0(a0)
AUTO_CACHE_BARRIERS_ENABLE
#ifdef R10K_LLSC_WAR
beql t2,zero,1b
#else
beq t2,zero,1b
#endif
nop
# Return to caller
j ra
nop
.set reorder
END(atomicSetInt64)
#endif /* !defined(_COMPILER_VERSION) || (_COMPILER_VERSION<700) || IP28 */
/*
* Atomically set bits in a cpumask_t (atomic version of "cpumask_t |= arg;")
* a0 - address of cpumask_t to operate on
* a1 - value to 'or' into cpumask_t
*/
LEAF(atomicSetCpumask)
.set noreorder
LoadCpumask a1,0(a1) # get the bit mask value into a1
CACHE_BARRIER_AT(0,a0) # top foactored for sc
# Load the old value
1:
LLcpumask v0,0(a0) # load current value
or t2,v0,a1
# Try to set the new one
AUTO_CACHE_BARRIERS_DISABLE
SCcpumask t2,0(a0)
AUTO_CACHE_BARRIERS_ENABLE
#ifdef R10K_LLSC_WAR
beql t2,zero,1b
#else
beq t2,zero,1b
#endif
nop
# Return to caller
j ra
nop
.set reorder
END(atomicSetCpumask)
#if 1
/*
* The above if *should be:
*
* #if !defined(_COMPILER_VERSION) || (_COMPILER_VERSION<700) || defined(IP28)
*
* In order to signify that the following code is only needed for compiles
* that aren't using the compiler intrinsics from the MipsPRO 7.0 and later
* compilers. Because of a mistake in the #if condition at 6.5 MR the
* following code was accidentally included for all platforms. On the off
* chance that some 3rd party code is now using these routines we need to
* leave them in till IRIX 6.6 when we're allowed to break binary
* compatibility in the kernel.
*
* Also note that we don't enable the compiler intrinsics for atomic
* operations on the IP28 (Indigo2/R10K) because of a bug in the compiler:
* #763380, ``t5_no_spec_stores not working for atomic intrinsics.'' When
* this bug is fixed we can reenable the intrinsics for IP28.
*/
/*
* Atomically clear bits in a word (atomic version of "word &= ~arg;")
* a0 - address of 32-bit integer to operate on
* a1 - mask of bits to clear in integer
* returns old value
*
* bitlock_release_32bit must use ll/sc even though the lock bit is held
* because the lock bit isn't protecting _all_ the bits in the (integer) word.
*/
LEAF(atomicClearInt)
XLEAF(atomicClearUint)
XLEAF(bitlock_release_32bit)
#if (_MIPS_SZLONG == 32)
XLEAF(atomicClearLong)
XLEAF(atomicClearUlong)
#endif
.set noreorder
CACHE_BARRIER_AT(0,a0) # top factored for sc (barrier for a0)
# Negate argument
nor a1,a1,zero
# Load the old value
1:
ll v0,0(a0)
and t2,v0,a1
# Try to set the new one
AUTO_CACHE_BARRIERS_DISABLE
sc t2,0(a0)
AUTO_CACHE_BARRIERS_ENABLE
#ifdef R10K_LLSC_WAR
beql t2,zero,1b
#else
beq t2,zero,1b
#endif
nop
# Return to caller
j ra
nop
.set reorder
END(atomicClearInt)
/*
* Atomically clear bits in a 64-bit int (atomic version of "long &= ~arg;")
* a0 - address of 64-bit word to operate on
* a1 - mask of bits to clear
* returns old value
*
* bitlock_release must use ll/sc even though the lock bit is held because
* the lock bit isn't protecting _all_ the bits in the (long) word.
*/
LEAF(atomicClearInt64)
XLEAF(atomicClearUint64)
XLEAF(bitlock_release)
#if (_MIPS_SZLONG == 64)
XLEAF(atomicClearLong)
XLEAF(atomicClearUlong)
#endif
.set noreorder
CACHE_BARRIER_AT(0,a0) # top factored for sc (barrier for a0)
# Negate argument
nor a1,a1,zero
# Load the old value
1:
LONG_LL v0,0(a0)
and t2,v0,a1
# Try to set the new one
AUTO_CACHE_BARRIERS_DISABLE
LONG_SC t2,0(a0)
AUTO_CACHE_BARRIERS_ENABLE
#ifdef R10K_LLSC_WAR
beql t2,zero,1b
#else
beq t2,zero,1b
#endif
nop
# Return to caller
j ra
nop
.set reorder
END(atomicClearInt64)
#endif /* !defined(_COMPILER_VERSION) || (_COMPILER_VERSION<700) */
/*
* Atomically clear a bit-field in a long, and set it to a specific value
* (atomic version of (l) = ((l) & ~(f)) | (b))
* a0 - address of long to operate on (l)
* a1 - mask of bits to clear in long (f)
* a2 - value to or into bit-field (b)
* returns old value
*/
LEAF(atomicFieldAssignLong)
.set noreorder
CACHE_BARRIER_AT(0,a0) # top factored for sc (barrier for a0)
# Negate argument (f)
nor a1, a1, zero
# Load the old value
1:
LONG_LL v0, 0(a0)
and t2, v0, a1
or t2, t2, a2
# Try to write new one
AUTO_CACHE_BARRIERS_DISABLE
LONG_SC t2, 0(a0)
AUTO_CACHE_BARRIERS_ENABLE
#ifdef R10K_LLSC_WAR
beql t2, zero, 1b
#else
beq t2, zero, 1b
#endif
nop
# Return to caller
j ra
nop
.set reorder
END(atomicFieldAssignLong)
/*
* Atomically clear a bit-field in a 32 bit entity
* and set it to a specific value
* (atomic version of (l) = ((l) & ~(f)) | (b))
* a0 - address of long to operate on (l)
* a1 - mask of bits to clear in Uint (f)
* a2 - value to or into bit-field (b)
* returns old value
*/
LEAF(atomicFieldAssignUint)
.set noreorder
CACHE_BARRIER_AT(0,a0) # top factored for sc (barrier for a0)
# negate argument.
nor a1, a1, zero
1:
# Load old value.
ll v0, 0(a0)
and t2, v0, a1
or t2, t2, a2
# Try to write new value.
AUTO_CACHE_BARRIERS_DISABLE
sc t2, 0(a0)
AUTO_CACHE_BARRIERS_ENABLE
#ifdef R10K_LLSC_WAR
beql t2, zero, 1b
#else
beq t2, zero, 1b
#endif
nop
# Return
j ra
nop
END(atomicFieldAssignUint)
/*
* Atomically clear bits in a cpumask_t (atomic version of "cpumask_t &= ~arg;")
* a0 - address of cpumask_t to operate on
* a1 - value to clear into cpumask_t
*/
LEAF(atomicClearCpumask)
.set noreorder
LoadCpumask a1,0(a1) # get the bit mask value into a1
nor a1,a1,zero # negate argument
CACHE_BARRIER_AT(0,a0) # top factored for sc (barrier for a0)
# Load the old value
1:
LLcpumask v0,0(a0) # load current value
and t2,v0,a1
# Try to set the new one
AUTO_CACHE_BARRIERS_DISABLE
SCcpumask t2,0(a0)
AUTO_CACHE_BARRIERS_ENABLE
#ifdef R10K_LLSC_WAR
beql t2,zero,1b
#else
beq t2,zero,1b
#endif
nop
# Return to caller
j ra
nop
.set reorder
END(atomicClearCpumask)
/*
* Implementation of fetch-and-add.
* a0 - address of 64-bit integer to operate on
* a1 - value to add to integer
* new value returned in v0
*/
LEAF(atomicAddInt64)
XLEAF(atomicAddUint64)
.set noreorder
CACHE_BARRIER_AT(0,a0) # top factored for sc (barrier for a0)
#if _MIPS_SIM == _MIPS_SIM_ABI32
dsll a1,a2,32
dsll a3,32
dsrl a3,32
or a1,a3
#endif
# Load the old value
1:
lld t2,0(a0)
daddu t1,t2,a1
# Try to set the new one
AUTO_CACHE_BARRIERS_DISABLE
scd t1,0(a0)
AUTO_CACHE_BARRIERS_ENABLE
#ifdef R10K_LLSC_WAR
beql t1,zero,1b
nop
#else
beq t1,zero,1b
#endif
# set return value in branch-delay slot
daddu v0,t2,a1
#if _MIPS_SIM == _MIPS_SIM_ABI32
dsll v1,v0,32
dsra v1,32
dsra v0,32
#endif
# Return to caller
j ra
nop
.set reorder
END(atomicAddInt64)
#if MP
/*
* int64_t atomicAddInt64Hot(int64_t *ip, int64_t val, int *pref)
* uint64_t atomicAddUint64Hot(uint64_t *ip, uint64_t val, int *pref)
*
* Atomically add val to *ip and return the new value.
*
* This is a special variant of the normal version of this routine that is
* used when *ip is a ``hot'' (frequently updated) word on an MP system. In
* addition to a pointer to an int to atomically manipulate, we also take a
* pointer to another byte in the same cache line which we will store into
* once per LL/SC loop in order to acquire the cache line in dirty exclusive
* mode. This helps ensure the success of the SC ...
*/
LEAF(atomicAddInt64Hot2)
XLEAF(atomicAddUint64Hot2)
.set noreorder
# Force cache line to be dirty/exclusive mode because we know that
# we're going to store into it we want a reasonable chance of having
# the SC below to succeed when the word is highly contended. When
# it's not contended, this extra SW instruction won't introduce a lot
# of overhead because the cache line will be in our primary cache
# already or will simply be dragged in exclusive a few instructions
# sooner than it would have been by the SC.
sb zero,0(a2) # cause prefetch exclusive of cacheline
1: lld t2,0(a0)
daddu t1,t2,a1
scd t1,0(a0)
#ifdef R10K_LLSC_WAR
beql t1,zero,1b
#else
beq t1,zero,1b
#endif
sb zero,0(a2) # BDSLOT cause prefetch exclusive of cacheline
j ra
daddu v0,t2,a1 # BDSLOT
.set reorder
END(atomicAddInt64Hot2)
#endif /* MP */
/*
* Implementation of Push operation.
*
* a0 - address of queue head
* a1 - address of head of items to enqueue
* a2 - address of tail pointer
*
* Since head of list and address of tail pointer are specified separately,
* one can pass a list of items, already linked, to enqueue.
*/
LEAF(atomicPush)
.set noreorder
# Load the current queue head value
1:
PTR_L t0,0(a0)
#if MP
andi t1,t0,1
bne t1,zero,2f
#endif
CACHE_BARRIER_AT(0,a2) # for store to a2
# Store the current queue head pointer in tail pointer
PTR_S t0,0(a2)
# Link load the current queue head pointer.
# If the low bit is set, it is in use by alloc-routine -- try again.
# If current queue head pointer is the same as what we
# stored into our tail pointer, try to store it into the list head.
PTR_LL v0,0(a0)
bne v0,t0,1b
# copy item pointer into temp register -- the stored-from
# register gets changed as a result of the store-conditional
# instruction, and we may have to retry the load/store
move t2, a1
AUTO_CACHE_BARRIERS_DISABLE
PTR_SC t2,0(a0)
AUTO_CACHE_BARRIERS_ENABLE
#ifdef R10K_LLSC_WAR
beql t2,zero,1b
#else
beq t2,zero,1b
#endif
nop
# Return to caller
j ra
2: nop
#if MP
b 1b
nop
#endif
.set reorder
END(atomicPush)
/*
* Grab entire queue.
*
* a0 - address of queue head
*
*/
LEAF(atomicPull)
.set noreorder
CACHE_BARRIER_AT(0,a0) # top factored for PTR_SC (barrier for a0)
1:
PTR_LL v0,0(a0) # load the address of the first list item
#if MP
andi t1,v0,1
bne t1,zero,2f
#endif
move t0,zero # bdslot
AUTO_CACHE_BARRIERS_DISABLE
PTR_SC t0,0(a0) # null the queue header
AUTO_CACHE_BARRIERS_ENABLE
#ifdef R10K_LLSC_WAR
beql t0,zero,1b
#else
beq t0,zero,1b
#endif
nop
# Return to caller
j ra
2: nop
#if MP
b 1b
nop
#endif
.set reorder
END(atomicPull)
/*
* Implementation of atomicIncWithWrap.
* Function useful for circular queues
* a0 - address of 32-bit integer to inc
* a1 - wrap value. When a0 gets to this value we instead set it to 0
* Original value is returned in v0
*/
LEAF(atomicIncWithWrap)
.set noreorder
CACHE_BARRIER_AT(0,a0) # top factored for sc (barrier for a0)
# Load the old value
1:
ll t2,0(a0)
addiu t1,t2,1
bne t1,a1,norm
nop
move t1,zero
norm:
# Try to set the new one
AUTO_CACHE_BARRIERS_DISABLE
sc t1,0(a0)
AUTO_CACHE_BARRIERS_ENABLE
#ifdef R10K_LLSC_WAR
beql t1,zero,1b
nop
#else
beq t1,zero,1b
#endif
# set return value in branch-delay slot
move v0,t2 # BDSLOT
# Return to caller
j ra
nop
.set reorder
END(atomicIncWithWrap)
/*
* Implementation of atomicAddWithWrap. Similar
* to atomicIncWithWrap, but add an aribtrary value
* to the index before wrapping *and* return the
* original value (not the new value).
*
* a0 - address of 32-bit integer to inc
* a1 - amount of add
* a2 - wrap value. When a0 gets to this value we instead set it to 0
* old value returned in v0
*/
LEAF(atomicAddWithWrap)
.set noreorder
CACHE_BARRIER_AT(0,a0) # top factored for sc (barrier for a0)
# Load the old value
1:
ll t2,0(a0)
addu t1,t2,a1
blt t1,a2,2f
nop
subu t1,t1,a2
2:
# Try to set the new one
AUTO_CACHE_BARRIERS_DISABLE
sc t1,0(a0)
AUTO_CACHE_BARRIERS_ENABLE
#ifdef R10K_LLSC_WAR
beql t1,zero,1b
nop
#else
beq t1,zero,1b
#endif
# set return value in branch-delay slot
move v0,t2 # BDSLOT
# Return to caller
j ra
nop
.set reorder
END(atomicAddWithWrap)
/*
* Atomically set bits in an int/long word that contains a locking bit.
*
* a0 - address of integer/long word on which to operate
* a1 - lock bit
* a2 - value to 'or' into memory word
*
* returns old value
*/
LEAF(bitlock_set)
.set noreorder
CACHE_BARRIER_AT(0,a0) # top factored for sc (barrier for a0)
1: ll v0,0(a0)
#if MP
and ta1,v0,a1 # test that lock bit is clear
bne ta1,zero,1b # ain't used on SP systems
#endif
or t2,v0,a2
# Try to set the new bits
AUTO_CACHE_BARRIERS_DISABLE
sc t2,0(a0)
AUTO_CACHE_BARRIERS_ENABLE
#ifdef R10K_LLSC_WAR
beql t2,zero,1b
#else
beq t2,zero,1b
#endif
nop
# Return to caller
j ra
nop
.set reorder
END(bitlock_set)
/*
* Atomically clear bits in an int/long word that contains a locking bit.
*
* a0 - address of integer/long word on which to operate
* a1 - lock bit
* a2 - value to 'or' into memory word
* a2 - mask of bits to clear in word
*
* returns old value
*/
LEAF(bitlock_clr)
.set noreorder
nor a2,a2,zero # a2 = ~a2
CACHE_BARRIER_AT(0,a0) # top factored for sc (barrier for a0)
1: ll v0,0(a0)
#if MP
and ta1,v0,a1 # test that lock bit is clear
bne ta1,zero,1b # ain't used on SP systems
#endif
and t2,v0,a2
# Try to set the new bits
AUTO_CACHE_BARRIERS_DISABLE
sc t2,0(a0)
AUTO_CACHE_BARRIERS_ENABLE
#ifdef R10K_LLSC_WAR
beql t2,zero,1b
#else
beq t2,zero,1b
#endif
nop
# Return to caller
j ra
nop
.set reorder
END(bitlock_clr)
/*
* Acquire a bit lock in a long
*
* a0 - address of long word containing the bitlock
* a1 - bit used as lock
* returns old value
*/
LEAF(bitlock_acquire)
.set noreorder
CACHE_BARRIER_AT(0,a0) # top factored for sc (barrier for a0)
# Load current value
1:
LONG_LL v0, 0(a0)
and t2, v0, a1
bne t2, zero, 1b
# bit is zero, we can now try to set it
or t2, v0, a1
AUTO_CACHE_BARRIERS_DISABLE
LONG_SC t2, 0(a0)
AUTO_CACHE_BARRIERS_ENABLE
#ifdef R10K_LLSC_WAR
beql t2, zero, 1b
#else
beq t2, zero, 1b
#endif
nop
# Return to caller
j ra
nop
.set reorder
END(bitlock_acquire)
/*
* Acquire a bit lock in a 32-bit word
*
* a0 - address of the word containing the bitlock
* a1 - bit used as lock
* returns old value
*/
LEAF(bitlock_acquire_32bit)
.set noreorder
CACHE_BARRIER_AT(0,a0) # top factored for sc (barrier for a0)
# Load current value
1:
ll v0, 0(a0)
and t2, v0, a1
bne t2, zero, 1b
# bit is zero, we can now try to set it
or t2, v0, a1
AUTO_CACHE_BARRIERS_DISABLE
sc t2, 0(a0)
AUTO_CACHE_BARRIERS_ENABLE
#ifdef R10K_LLSC_WAR
beql t2, zero, 1b
#else
beq t2, zero, 1b
#endif
nop
# Return to caller
j ra
nop
.set reorder
END(bitlock_acquire_32bit)
/*
* Conditionally acquire a bit lock in a long
* (Try to acquire only a few times, and then give up)
* a0 - address of long word containing the bitlock
* a1 - bit used as lock
* returns 0 if acquired, 1 if NOT acquired
*/
LEAF(bitlock_condacq)
.set noreorder
CACHE_BARRIER_AT(0,a0) # top factored for sc (barrier for a0)
# Initialize retry counter
li t3, 5
1: nop
bltz t3, 2f # jump to "too many retries"
subu t3, 1
# Load current value
3:
LONG_LL v0, 0(a0)
and t2, v0, a1
bne t2, zero, 4f # lock taken, jump to "spin a bit"
# bit is zero, we can now try to set it
or t2, v0, a1 # BDSLOT
AUTO_CACHE_BARRIERS_DISABLE
LONG_SC t2, 0(a0)
AUTO_CACHE_BARRIERS_ENABLE
#ifdef R10K_LLSC_WAR
beql t2, zero, 3b
#else
beq t2, zero, 3b
#endif
nop # BDSLOT
# Return to caller with success code (lock acquired)
j ra
or v0, zero, zero # BDSLOT
4:
# Lock is busy, spin a bit
li ta1, 15
5:
nop
bgez ta1, 5b
subu ta1, 1
# Now try again
b 1b
nop
2:
#ifdef R10K_LLSC_WAR
# We need 32 instructions between an LL and any possible path to
# another LL to avoid the LL/SC hazard in early R10K parts.
# We use 18 NOP's here since we already have 14 instructions
# in this speculative path root.
#
nop; nop; nop; nop; nop; nop; nop; nop;
nop; nop; nop; nop; nop; nop; nop; nop;
nop; nop;
#endif
# We exhausted our allowed number of retries
# Return to caller with failure code (not acquired)
j ra
li v0, 1 # BDSLOT
.set reorder
END(bitlock_condacq)
LEAF(bitlock_condacq_32bit)
.set noreorder
CACHE_BARRIER_AT(0,a0) # top factored for sc (barrier for a0)
# Initialize retry counter
li t3, 5
1: nop
bltz t3, 2f # jump to "too many retries"
subu t3, 1
# Load current value
3:
ll v0, 0(a0)
and t2, v0, a1
bne t2, zero, 4f # lock taken, jump to "spin a bit"
# bit is zero, we can now try to set it
or t2, v0, a1 # BDSLOT
AUTO_CACHE_BARRIERS_DISABLE
sc t2, 0(a0)
AUTO_CACHE_BARRIERS_ENABLE
#ifdef R10K_LLSC_WAR
beql t2, zero, 3b
#else
beq t2, zero, 3b
#endif
nop # BDSLOT
# Return to caller with success code (lock acquired)
j ra
or v0, zero, zero # BDSLOT
4:
# Lock is busy, spin a bit
li ta1, 15
5:
nop
bgez ta1, 5b
subu ta1, 1
# Now try again
b 1b
nop
2:
#ifdef R10K_LLSC_WAR
# We need 32 instructions between an LL and any possible path to
# another LL to avoid the LL/SC hazard in early R10K parts.
# We use 18 NOP's here since we already have 14 instructions
# in this speculative path root.
#
nop; nop; nop; nop; nop; nop; nop; nop;
nop; nop; nop; nop; nop; nop; nop; nop;
nop; nop;
#endif
# We exhausted our allowed number of retries
# Return to caller with failure code (not acquired)
j ra
li v0, 1 # BDSLOT
.set reorder
END(bitlock_condacq_32bit)
/*
* void *mutex_set_always(__psunsigned_t *p, proc_t *replace)
*
* Replaces p's contents with "replace", regardless of whether
* p's contents were null or not. This is used by rtlocks to
* enable a process to assign an owner to a mutex.
*
* Returns: old contents.
*/
LEAF(mutex_set_always)
.set noreorder
CACHE_BARRIER_AT(0,a0) # top factored for sc (barrier for a0)
1: PTR_LL v0,0(a0)
move t0,a1
AUTO_CACHE_BARRIERS_DISABLE
PTR_SC t0,0(a0)
AUTO_CACHE_BARRIERS_ENABLE
#ifdef R10K_LLSC_WAR
beql t0,zero,1b
#else
beq t0,zero,1b
#endif
nop
j ra
nop
.set reorder
END(mutex_set_always)
/*
* Set a word and return the old value.
* a0 - address of word
* a1 - new value
*/
LEAF(test_and_set_int)
#if (_MIPS_SZLONG == 32)
XLEAF(test_and_set_long)
#endif
#if (_MIPS_SZPTR == 32)
XLEAF(test_and_set_ptr)
#endif
.set noreorder
CACHE_BARRIER_AT(0,a0) # top factored for sc (barrier for a0)
# Load the old value
1:
ll v0,0(a0)
move t2,a1
# Try to set the new one
AUTO_CACHE_BARRIERS_DISABLE
sc t2,0(a0)
AUTO_CACHE_BARRIERS_ENABLE
#ifdef R10K_LLSC_WAR
beql t2,zero,1b
#else
beq t2,zero,1b
#endif
nop
# Return to caller
j ra
nop
.set reorder
END(test_and_set_int)
/*
* Set a word and return the old value.
* a0 - address of word
* a1 - new value
*/
#if (_MIPS_SZPTR == 64)
LEAF(test_and_set_ptr)
#if (_MIPS_SZLONG == 64)
XLEAF(test_and_set_long)
#endif
.set noreorder
CACHE_BARRIER_AT(0,a0) # top factored for sc (barrier for a0)
# Load the old value
1:
lld v0,0(a0)
move t2,a1
# Try to set the new one
AUTO_CACHE_BARRIERS_DISABLE
scd t2,0(a0)
AUTO_CACHE_BARRIERS_ENABLE
#ifdef R10K_LLSC_WAR
beql t2,zero,1b
#else
beq t2,zero,1b
#endif
nop
# Return to caller
j ra
nop
.set reorder
END(test_and_set_ptr)
#endif
/*
* Swap data with memory
* a0 - address of word
* a1 - new value
* returns old value
*/
LEAF(swap_int)
#if (_MIPS_SZPTR == 32)
XLEAF(swap_ptr)
#endif
#if (_MIPS_SZLONG == 32)
XLEAF(swap_long)
#endif
.set noreorder
AUTO_CACHE_BARRIERS_DISABLE
CACHE_BARRIER_AT(0,a0) # top factored for sc (barrier for a0)
1: ll v0,0(a0)
move t0,a1
sc t0,0(a0)
#ifdef R10K_LLSC_WAR
beql t0,zero,1b
#else
beq t0,zero,1b
#endif
nop
j ra
nop
AUTO_CACHE_BARRIERS_ENABLE
.set reorder
END(swap_int)
/*
* Set a word to a new value if it equals a particular value
* a0 - address of word
* a1 - old value
* a2 - new value
* returns 1 if the values were swapped
*/
LEAF(compare_and_swap_int)
#if (_MIPS_SZPTR == 32)
XLEAF(compare_and_swap_ptr)
XLEAF(compare_and_swap_kt)
#endif
#if (_MIPS_SZLONG == 32)
XLEAF(compare_and_swap_long)
#endif
.set noreorder
CACHE_BARRIER_AT(0,a0) # top factored for sc (barrier for a0)
1:
# Load the old value
ll t0,0(a0)
# Check for match
bne t0,a1,2f
move v0,a2
# Try to set to the new value
AUTO_CACHE_BARRIERS_DISABLE
sc v0,0(a0)
AUTO_CACHE_BARRIERS_ENABLE
#ifdef R10K_LLSC_WAR
beql v0,zero,1b
#else
beq v0,zero,1b
#endif
nop
# Return success to caller
j ra
nop
2:
#ifdef R10K_LLSC_WAR
# We need 32 instructions between an LL and any possible path to
# another LL to avoid the LL/SC hazard in early R10K parts.
# We use 28 NOP's here since we already have 4 instructions
# in this speculative path root.
#
nop; nop; nop; nop; nop; nop; nop; nop;
nop; nop; nop; nop; nop; nop; nop; nop;
nop; nop; nop; nop; nop; nop; nop; nop;
nop; nop; nop; nop;
#endif
# Return failure to caller
j ra
move v0,zero
.set reorder
END(compare_and_swap_int)
/*
* Increment a word in memory only if it is > 0.
* a0 - address of word
* returns 0 if the value was not incremented, non-zero otherwise
*/
LEAF(compare_and_inc_int_gt_zero)
.set noreorder
CACHE_BARRIER_AT(0,a0) # top factored for sc (barrier for a0)
1:
# Load the old value
ll v0,0(a0)
# Return failure if value is zero
ble v0,zero,2f
addu v0,1
# Try to set to the new value
AUTO_CACHE_BARRIERS_DISABLE
sc v0,0(a0)
AUTO_CACHE_BARRIERS_ENABLE
#ifdef R10K_LLSC_WAR
beql v0,zero,1b
#else
beq v0,zero,1b
#endif
nop
# Return success to caller
j ra
nop
2:
#ifdef R10K_LLSC_WAR
# We need 32 instructions between an LL and any possible path to
# another LL to avoid the LL/SC hazard in early R10K parts.
# We use 28 NOP's here since we already have 4 instructions
# in this speculative path root.
#
nop; nop; nop; nop; nop; nop; nop; nop;
nop; nop; nop; nop; nop; nop; nop; nop;
nop; nop; nop; nop; nop; nop; nop; nop;
nop; nop; nop; nop;
#endif
# Return failure to caller
j ra
move v0,zero
.set reorder
END(compare_and_inc_int_gt_zero)
/*
* Decrement a word in memory only if it is > a specified value
* a0 - address of word
* a1 - compare value
* returns 1 if the value was decremented, zero otherwise
*/
LEAF(compare_and_dec_int_gt)
.set noreorder
CACHE_BARRIER_AT(0,a0) # top factored for sc (barrier for a0)
1:
# Load the old value
ll v0,0(a0)
# Return failure if value was less than or equal to compare value
addiu v0,-1
blt v0,a1,2f
nop
# Try to set to the new value
AUTO_CACHE_BARRIERS_DISABLE
sc v0,0(a0)
AUTO_CACHE_BARRIERS_ENABLE
#ifdef R10K_LLSC_WAR
beql v0,zero,1b
#else
beq v0,zero,1b
#endif
nop
# Return success to caller
j ra
nop
2:
#ifdef R10K_LLSC_WAR
# We need 32 instructions between an LL and any possible path to
# another LL to avoid the LL/SC hazard in early R10K parts.
# We use 27 NOP's here since we already have 5 instructions
# in this speculative path root.
#
nop; nop; nop; nop; nop; nop; nop; nop;
nop; nop; nop; nop; nop; nop; nop; nop;
nop; nop; nop; nop; nop; nop; nop; nop;
nop; nop; nop;
#endif
# Return failure to caller
j ra
move v0,zero
.set reorder
END(compare_and_dec_int_gt)
#if MP
/*
* int compare_and_dec_int_gt_hot2(volatile int *ip, int val, volatile int *jp)
*
* Decrement *ip only if it is greater than val. Returns 1 if *ip was
* decremented, zero otherwise.
*
* This is a special variant of the normal version of this routine that is
* used when *ip is a ``hot'' (frequently updated) word on an MP system. In
* addition to a pointer to an int to atomically manipulate, we also take a
* pointer to another word in the same cache line which we will store into
* once per LL/SC loop in order to acquire the cache line in dirty exclusive
* mode. This helps ensure the success of the SC ...
*/
LEAF(compare_and_dec_int_gt_hot2)
.set noreorder
CACHE_BARRIER_AT(0, a0) # top factored for sc (barrier for a0)
1:
ll v0, 0(a0) # Load the old value
addiu v0, -1 # Return failure if value was less
blt v0, a1, 2f # than or equal to compare value
nop
AUTO_CACHE_BARRIERS_DISABLE
sc v0, 0(a0) # Try to set the new value
AUTO_CACHE_BARRIERS_ENABLE
#ifdef R10K_LLSC_WAR
beql v0, zero, 1b # Loop on SC failure
#else
beq v0, zero, 1b # Loop on SC failure
#endif
# On SC failure, force cache line to be dirty/exclusive mode in the
# branch delay slot of the LL/SC loop branch. We do this because
# we're probably going to want to try to store into it on the next
# LL/SC loop also and we want a reasonable chance of having the SC
# in the next loop succeed when the word is highly contended. When
# it's not contended, this extra SW instruction won't introduce a lot
# of overhead because the cache line will be in our primary cache
# dirty exclusive already because of the SC instruction. Note in
# particular that we *don't* want to do this at the head of the LL/SC
# loop as we do for some of the other ``hot word'' atomic routine
# versions because we sometimes don't update the word (when it is less
# than or equal to the comparison value).
#
sw zero, 0(a2) # BDS: force cache line dirty/exclusive
j ra # Return success to caller
nop # BDS (v0 already equal to 1 from SC)
2:
#ifdef R10K_LLSC_WAR
# We need 32 instructions between an LL and any possible path to
# another LL to avoid the LL/SC hazard in early R10K parts.
# We use 27 NOP's here since we already have 5 instructions
# in this speculative path root.
#
nop; nop; nop; nop; nop; nop; nop; nop;
nop; nop; nop; nop; nop; nop; nop; nop;
nop; nop; nop; nop; nop; nop; nop; nop;
nop; nop; nop;
#endif
j ra # Return failure to caller
move v0, zero # BDS
.set reorder
END(compare_and_dec_int_gt_hot2)
#endif /* MP */
/*
* Swap data with memory
* a0 - address of word
* a1 - new value
* returns old value
*/
LEAF(swap_int64)
#if (_MIPS_SZPTR == 64)
XLEAF(swap_ptr)
#endif
#if (_MIPS_SZLONG == 64)
XLEAF(swap_long)
#endif
.set noreorder
AUTO_CACHE_BARRIERS_DISABLE
CACHE_BARRIER_AT(0,a0) # top factored for sc (barrier for a0)
1: lld v0,0(a0)
move t0,a1
scd t0,0(a0)
#ifdef R10K_LLSC_WAR
beql t0,zero,1b
#else
beq t0,zero,1b
#endif
nop
j ra
nop
AUTO_CACHE_BARRIERS_ENABLE
.set reorder
END(swap_int64)
/*
* Set a word to a new value if it equals a particular value
* a0 - address of word
* a1 - old value
* a2 - new value
* returns 1 if the values were swapped
*/
LEAF(compare_and_swap_int64)
#if (_MIPS_SZPTR == 64)
XLEAF(compare_and_swap_ptr)
XLEAF(compare_and_swap_kt)
#endif
#if (_MIPS_SZLONG == 64)
XLEAF(compare_and_swap_long)
#endif
.set noreorder
CACHE_BARRIER_AT(0,a0) # top factored for sc (barrier for a0)
1:
# Load the old value
lld t0,0(a0)
# Check for match
bne t0,a1,2f
move v0,a2
# Try to set to the new value
AUTO_CACHE_BARRIERS_DISABLE
scd v0,0(a0)
AUTO_CACHE_BARRIERS_ENABLE
#ifdef R10K_LLSC_WAR
beql v0,zero,1b
#else
beq v0,zero,1b
#endif
nop
# Return success to caller
j ra
nop
2:
#ifdef R10K_LLSC_WAR
# We need 32 instructions between an LL and any possible path to
# another LL to avoid the LL/SC hazard in early R10K parts.
# We use 28 NOP's here since we already have 4 instructions
# in this speculative path root.
#
nop; nop; nop; nop; nop; nop; nop; nop;
nop; nop; nop; nop; nop; nop; nop; nop;
nop; nop; nop; nop; nop; nop; nop; nop;
nop; nop; nop; nop;
#endif
# Return failure to caller
j ra
move v0,zero
.set reorder
END(compare_and_swap_int64)
/*
* Load the word from given address, and swap it to new value only if
* the current value is > parameter a1.
* if (*a0 > a1) {
* *a0 = a2;
* return 1;
* } else {
* return 0;
* }
*
* a0 - address of word
* a1 - value to compare for conditional check.
* a2 - new value to be Swapped.
* Return 1 is successful in swapping, 0 otherwise.
*/
LEAF(comparegt_and_swap_int)
.set noreorder
CACHE_BARRIER_AT(0,a0) # top factored for sc (barrier for a0)
1:
# Load the old value
ll t1,0(a0) # BD Slot
# Conditional check -- else part.
ble t1,a1,2f
move v0,a2
# Try to set the new one
AUTO_CACHE_BARRIERS_DISABLE
sc v0,0(a0) # BD Slot
AUTO_CACHE_BARRIERS_ENABLE
#ifdef R10K_LLSC_WAR
beql v0,zero,1b
#else
beq v0,zero,1b
#endif
nop
# Return success.
j ra
nop
# Return to caller
2:
#ifdef R10K_LLSC_WAR
# We need 32 instructions between an LL and any possible path to
# another LL to avoid the LL/SC hazard in early R10K parts.
# We use 28 NOP's here since we already have 4 instructions
# in this speculative path root.
#
nop; nop; nop; nop; nop; nop; nop; nop;
nop; nop; nop; nop; nop; nop; nop; nop;
nop; nop; nop; nop; nop; nop; nop; nop;
nop; nop; nop; nop;
#endif
j ra
move v0,zero # BD slot
.set reorder
END(comparegt_and_swap_int)
LEAF(store_atomic)
.set noreorder
CACHE_BARRIER_AT(0,a0) # top factored for sc (barrier for a0)
1:
ll ta0,0(a0)
AUTO_CACHE_BARRIERS_DISABLE
sc ta0,0(a0)
AUTO_CACHE_BARRIERS_ENABLE
#ifdef R10K_LLSC_WAR
beql ta0,zero,1b
#else
beq ta0,zero,1b
#endif
nop
j ra
nop
.set reorder
END(store_atomic)
/*
* Implementation of atomicIncPort
* Used to avoid locking overhead on MP systems
*
* a0 -- address of 32-bit integer to increment
* a1 -- wrap value
* a2 -- new value
* When *a0 becomes greater than a1, it is set to a2
* v0 -- return value; previous contents of *a0
*/
LEAF(atomicIncPort)
.set noreorder
CACHE_BARRIER_AT(0,a0) # top factored for sc (barrier for a0)
1:
ll t2,0(a0) # get old value
addiu t1,t2,1
ble t1,a1,nrm
nop
move t1, a2 # reset if wrapped
nrm:
AUTO_CACHE_BARRIERS_DISABLE
sc t1,0(a0) # set new value
AUTO_CACHE_BARRIERS_ENABLE
#ifdef R10K_LLSC_WAR
beql t1,zero,1b
nop
#else
beq t1,zero,1b
#endif
move v0,t2 # BDSLOT
j ra
nop
.set reorder
END(atomicIncPort)