1
0
Files
irix-657m-src/irix/kern/sys/atomic_ops.h
2022-09-29 17:59:04 +03:00

366 lines
13 KiB
C

/**************************************************************************
* *
* Copyright (C) 1993, Silicon Graphics, Inc. *
* *
* These coded instructions, statements, and computer programs contain *
* unpublished proprietary information of Silicon Graphics, Inc., and *
* are protected by Federal copyright law. They may not be disclosed *
* to third parties or copied or duplicated in any form, in whole or *
* in part, without the prior written consent of Silicon Graphics, Inc. *
* *
**************************************************************************/
#ifndef __SYS_ATOMIC_OPS_H__
#define __SYS_ATOMIC_OPS_H__
#ident "$Revision: 1.43 $"
#if _KERNEL && !_STANDALONE && !LANGUAGE_ASSEMBLY
/*
* MIPS atomic operations based on link-load, store-conditional instructions.
*/
struct kthread;
#if defined(_COMPILER_VERSION) && (_COMPILER_VERSION>=700) && !defined(IP28)
/*
* Use compiler atomic operation intrinsics for the MipsPRO 7.0 and later
* compilers.
*
* We don't enable the compiler intrinsics for atomic operations on the IP28
* (Indigo2/R10K) because of a bug in the compiler: #763380,
* ``t5_no_spec_stores not working for atomic intrinsics.'' When this bug is
* fixed we can reenable the intrinsics for IP28.
*/
/*
* Add, set or clear bits in a (4-byte) int.
*/
#define atomicAddInt(a, b) __add_and_fetch(a, b)
#define atomicSetInt(a, b) __fetch_and_or(a, b)
#define atomicClearInt(a, b) __fetch_and_and(a, ~(b))
#define atomicAddUint(a, b) __add_and_fetch(a, b)
#define atomicSetUint(a, b) __fetch_and_or(a, b)
#define atomicClearUint(a, b) __fetch_and_and(a, ~(b))
/*
* Add, set or clear bits in a long -- works for native size of long.
*/
#define atomicAddLong(a, b) __add_and_fetch(a, b)
#define atomicSetLong(a, b) __fetch_and_or(a, b)
#define atomicClearLong(a, b) __fetch_and_and(a, ~(b))
__inline uint atomicFieldAssignUint(uint *l, uint f, uint b)
{
uint tmp;
do {
tmp = *l;
} while (!__compare_and_swap((l), (tmp),
(((tmp) & (~((uint)f))) | (b))));
return tmp;
}
__inline long atomicFieldAssignLong(long *l,long f,long b)
{
long tmp;
do {
tmp = *l;
} while (!__compare_and_swap((l), (tmp),
(((tmp) & (~((__uint64_t)f))) | (b))));
return tmp;
}
#define atomicAddUlong(a, b) __add_and_fetch(a, b)
#define atomicSetUlong(a, b) __fetch_and_or(a, b)
#define atomicClearUlong(a, b) __fetch_and_and(a, ~(b))
/*
* Add, set or clear bits in a 64 bit int
*/
#define atomicAddInt64(a, b) __add_and_fetch(a, b)
#define atomicAddUint64(a, b) __add_and_fetch(a, b)
#define atomicSetInt64(a, b) __fetch_and_or(a, b)
#define atomicSetUint64(a, b) __fetch_and_or(a, b)
#define atomicClearInt64(a, b) __fetch_and_and(a, ~(b))
#define atomicClearUint64(a, b) __fetch_and_and(a, ~(b))
#define bitlock_release(a, b) __fetch_and_and(a, ~(b))
#define bitlock_release_32bit(a, b) __fetch_and_and(a, ~(b))
#else /* ucode or ragnarok compilers or IP28 */
/*
* Add, set or clear bits in a (4-byte) int.
*/
extern int atomicAddInt(volatile int *, int);
extern int atomicSetInt(volatile int *, int);
extern int atomicClearInt(volatile int *, int);
extern uint atomicAddUint(volatile uint *, uint);
extern uint atomicSetUint(volatile uint *, uint);
extern uint atomicClearUint(volatile uint *, uint);
/*
* Add, set or clear bits in a long -- works for native size of long.
*/
extern long atomicAddLong(volatile long *, long);
extern long atomicSetLong(volatile long *, long);
extern long atomicClearLong(volatile long *, long);
extern long atomicFieldAssignLong(volatile long *, long, long);
extern int atomicFieldAssignUint(volatile uint *, uint, uint);
extern unsigned long atomicAddUlong(volatile unsigned long *, unsigned long);
extern unsigned long atomicSetUlong(volatile unsigned long *, unsigned long);
extern unsigned long atomicClearUlong(volatile unsigned long *, unsigned long);
/*
* Add, set or clear bits in a 64 bit int
*/
extern int64_t atomicAddInt64(volatile int64_t *, int64_t);
extern uint64_t atomicAddUint64(volatile uint64_t *, uint64_t);
extern int64_t atomicSetInt64(volatile int64_t *, int64_t);
extern uint64_t atomicSetUint64(volatile uint64_t *, uint64_t);
extern int64_t atomicClearInt64(volatile int64_t *, int64_t);
extern uint64_t atomicClearUint64(volatile uint64_t *, uint64_t);
extern uint64_t bitlock_release(volatile uint64_t *, uint64_t);
extern uint32_t bitlock_release_32bit(volatile uint32_t *, uint32_t);
#endif /* ucode or ragnarok compilers or IP28 */
/*
* increment an (four-byte) int. Wrap back to 0 when we match second argument
*/
extern int atomicIncWithWrap(int *, int);
extern int atomicAddWithWrap(int *, int, int);
/* PCB anon port manipulation */
extern int atomicIncPort(int *, int, int);
/*
* Set or clear bits in a cpumask
*
* Note: These functions have always returned incorrect values on platforms
* that support more than 64 cpus. Since no kernel code uses the return
* value, the safest way to fix this problem is to change the function
* prototype to "void".
* The problem is related to:
* - the register conventions for returning values
* greater than 128 bits
* - the functions return the wrong value for systems that
* support > 64 cpus
*/
extern void atomicSetCpumask(cpumask_t *, cpumask_t *);
extern void atomicClearCpumask(cpumask_t *, cpumask_t *);
/*
* Atomic assembler routines used by heap allocators.
*/
extern void *atomicPush(void *, void *, void *);
extern void *atomicPull(void *);
/*
* Exchange values in memory. test_and_set is unconditional.
* compare_and_swap is conditional, returning 1 and swapping if the
* memory value equals "old".
*/
extern int test_and_set_int(int *loc, int new);
extern long test_and_set_long(long *loc, long new);
extern void * test_and_set_ptr(void **loc, void *new);
extern int compare_and_swap_int(int *loc, int old, int new);
extern int compare_and_swap_long(long *loc, long old, long new);
extern int compare_and_swap_ptr(void **loc, void *old, void *new);
extern int compare_and_swap_int64(__int64_t *loc, __int64_t old, __uint64_t new);
extern int comparegt_and_swap_int(int *loc, int compare_val, int new);
extern int compare_and_inc_int_gt_zero(int *loc);
extern int compare_and_dec_int_gt(int *loc, int compare_val);
int compare_and_swap_kt(struct kthread **, struct kthread *,
struct kthread *);
#if MP
#define compare_and_dec_int_gt_hot(cp,v) compare_and_dec_int_gt_hot2(&(cp)->value, v, &(cp)->pref)
extern int compare_and_dec_int_gt_hot2(volatile int *loc, int compare_val, char *pref);
#else /* MP */
#define compare_and_dec_int_gt_hot(cp,v) compare_and_dec_int_gt((int*)&(cp)->value, v)
#endif /* MP */
extern int swap_int(int *loc, int new);
extern long swap_long(long *loc, long new);
extern void * swap_ptr(void **loc, void *new);
extern __uint64_t swap_int64(__int64_t *loc, __uint64_t new);
/*
* Primitives to lock the pfn field in ptes
*/
extern long bitlock_acquire(long *, long);
extern long bitlock_condacq(long *, long);
/*
* Primitives to lock a 32-bit work by a bit field inside the word
*/
extern void bitlock_acquire_32bit(__uint32_t *word, __uint32_t locking_bit);
extern long bitlock_condacq_32bit(__uint32_t *, __uint32_t);
/*
* Primitives for atomic add on hot cachelines.
*
* Sometimes it's either impossible to avoid global counters, etc. or simply
* too hard for various schedule and resource reasons to do a distributed
* rewrite of a piece of code. This is bad because if the counter is
* frequently updated from lots of CPUs we'll take an enormous performance hit
* because of cache coherency overhead. Worse yet, if the counter is updated
* in an atomic manner with LL/SC instructions, the ``throughput'' rate on
* counter updates can go to near zero as the CPUs fight for control of the
* cache line containing the counter. One relatively cheap hack that helps in
* the MP cache contention case and doesn't hurt SP updates appreciably is to
* do a store into another word in the same cache line as the counter before
* each LL/SC loop. This works because if forces the cache line to be in the
* local CPU's cache in Dirty Exclusive mode, letting the cache coherency
* hardware figure out how to arbitrate this. Then, we have a short time
* window in which we can do a LL/SC with a high probability of success
* because our CPU ``owns'' the cache line.
*
* We encode this idea via several ``hot counter'' variants of the standard
* atomic counter routines. The lowest level routines take all of the same
* arguments that the standard atomic counter routines do plus an additional
* pointer to a ``junk'' word that will be stored to just before the LL in the
* LL/SC loop. We also provide a convenience data type for hot counters and
* some inline routines that take pointers to this data type and call the low
* level routines.
*
* Most of these ideas are further qualified by only being implemented on MP
* platforms since SP platforms don't have any problems and we can save space
* and the small amount of extra time there ...
*
* The counter (int, uint, etc) is embedded in a structure that contains
* a byte to use in the LL/SC loop to cause a prefetch-exclusive
* of the cacheline. This avoids extra coherency traffic & substantially
* improves performance of updating highly contended cache lines.
*
* NOTE: use the structure definitions for counter when possible. The actual
* assembly language primitives take 3 arguments; the increment value,
* the pointer to the counter & the pointer to the char to use for
* the prefetch. This allows the byte that is used for the prefetch to
* be allocated separated if needed.
*/
typedef struct {
volatile int value;
#if MP
char pref;
#endif
} hotIntCounter_t;
typedef struct {
volatile uint value;
#if MP
char pref;
#endif
} hotUintCounter_t;
typedef struct {
volatile int64_t value;
#if MP
char pref;
#endif
} hotInt64Counter_t;
typedef struct {
volatile uint64_t value;
#if MP
char pref;
#endif
} hotUint64Counter_t;
#pragma set type attribute hotIntCounter_t align=8
#pragma set type attribute hotUintCounter_t align=8
#pragma set type attribute hotInt64Counter_t align=16
#pragma set type attribute hotUint64Counter_t align=16
#define fetchIntHot(c) ((c)->value)
#define fetchUintHot(c) ((c)->value)
#define fetchInt64Hot(c) ((c)->value)
#define fetchUint64Hot(c) ((c)->value)
#define setIntHot(c,v) (c)->value = (v)
#define setUintHot(c,v) (c)->value = (v)
#define setInt64Hot(c,v) (c)->value = (v)
#define setUint64Hot(c,v) (c)->value = (v)
#define atomicAddIntHot(c,n) atomicAddIntHot2(&(c)->value, n, &(c)->pref)
#define atomicAddUintHot(c,n) atomicAddUintHot2(&(c)->value, n, &(c)->pref)
#define atomicAddInt64Hot(c,n) atomicAddInt64Hot2(&(c)->value, n, &(c)->pref)
#define atomicAddUint64Hot(c,n) atomicAddUint64Hot2(&(c)->value, n, &(c)->pref)
#if MP
extern int atomicAddIntHot2(volatile int *, int, char *);
extern uint atomicAddUintHot2(volatile uint *, uint, char *);
extern int64_t atomicAddInt64Hot2(volatile int64_t *, int64_t, char *);
extern uint64_t atomicAddUint64Hot2(volatile uint64_t *, uint64_t, char *);
#else /* !MP */
/*
* A small bug in the 7.2.1.3 compiler suite (#767819) causes cpp tokens to be
* re-replaced in the original replacement list if a nested macro resolves to
* one of the compiler intrinsics. The hack below is designed to get around
* this till the bug is fixed.
*/
#if defined(_COMPILER_VERSION) && (_COMPILER_VERSION>=700) && !defined(IP28)
#define atomicAddIntHot2(cp,val,pp) __add_and_fetch(cp,val)
#define atomicAddUintHot2(cp,val,pp) __add_and_fetch(cp,val)
#define atomicAddInt64Hot2(cp,val,pp) __add_and_fetch(cp,val)
#define atomicAddUint64Hot2(cp,val,pp) __add_and_fetch(cp,val)
#else
#define atomicAddIntHot2(cp,val,pp) atomicAddInt(cp, val)
#define atomicAddUintHot2(cp,val,pp) atomicAddUint(cp, val)
#define atomicAddInt64Hot2(cp,val,pp) atomicAddInt64(cp, val)
#define atomicAddUint64Hot2(cp,val,pp) atomicAddUint64(cp,val)
#endif
#endif /* !MP */
#if (_MIPS_SZLONG == 32)
typedef hotIntCounter_t hotLongCounter_t;
typedef hotUintCounter_t hotUlongCounter_t;
#define fetchLongHot(c) fetchIntHot(c)
#define fetchUlongHot(c) fetchUintHot(c)
#define setLongHot(c,v) setIntHot(c,v)
#define setUlongHot(c,v) setUintHot(c,v)
#define atomicAddLongHot(c,n) atomicAddIntHot(c,n)
#define atomicAddUlongHot(c,n) atomicAddUintHot(c,n)
#else /* _MIPS_SZLONG == 64 */
typedef hotInt64Counter_t hotLongCounter_t;
typedef hotUint64Counter_t hotUlongCounter_t;
#define fetchLongHot(c) fetchInt64Hot(c)
#define fetchUlongHot(c) fetchUint64Hot(c)
#define setLongHot(c,v) setInt64Hot(c,v)
#define setUlongHot(c,v) setUint64Hot(c,v)
#define atomicAddLongHot(c,n) atomicAddInt64Hot(c,n)
#define atomicAddUlongHot(c,n) atomicAddUint64Hot(c,n)
#endif /* _MIPS_SZLONG == 64 */
#endif /* _KERNEL && !_STANDALONE && !LANGUAGE_ASSEMBLY */
#endif /* !__SYS_ATOMIC_OPS_H__ */