608 lines
19 KiB
C
608 lines
19 KiB
C
|
|
/*
|
|
** Basic definitions and data structures for the compiler MP library.
|
|
*/
|
|
|
|
/*************************************************************************/
|
|
/* Symbolic constants and useful values (useable by asm routines as well */
|
|
/*************************************************************************/
|
|
#define MAX_THREADS 64
|
|
/* Currently, MAX_THREADS is limited to CACHE_LINE_SIZE - 8 (see */
|
|
/* c4DataType definition) */
|
|
|
|
/* Threads taken in groups of 4 ( == sizeof(int32)) */
|
|
#define MAX_GROUPS (MAX_THREADS / 4)
|
|
|
|
/* This many threads needs extra space in the shm arena */
|
|
#define ARENA_SIZE (128*1024)
|
|
|
|
/* Biggest line size on any machine (i.e. Everest) */
|
|
#define CACHE_LINE_SIZE 128
|
|
#define LOG2_CACHE_LINE_SIZE 7
|
|
|
|
/* This is a desirable alignment because the R4400 maps 2 4K pages into
|
|
** one tlb slot. So an 8K alignment increases the chances that we will
|
|
** only consume one tlb (admittedly a minor point, but why not).
|
|
*/
|
|
/* #define DESIRABLE_ALIGNMENT 8192 */
|
|
/* #define LOG2_DESIRABLE_ALIGNMENT 13 */
|
|
/* As of this writing, the assembler only permits alignment up to 4K */
|
|
#define DESIRABLE_ALIGNMENT 4096
|
|
#define LOG2_DESIRABLE_ALIGNMENT 12
|
|
|
|
|
|
/* "join" area info */
|
|
#define NUM_JOIN_FLAGS_PER_LINE 4 /* Currently must be a power of 2 */
|
|
#define LOG2_NUM_JOIN_FLAGS_PER_LINE 2
|
|
#define NUM_JOIN_LINES (MAX_THREADS/NUM_JOIN_FLAGS_PER_LINE)
|
|
|
|
/* Size of the taskCommon data structure */
|
|
#define TCOM_SIZE ((8 + NUM_JOIN_LINES + MAX_THREADS)*CACHE_LINE_SIZE)
|
|
|
|
/* Info for the 64bit counters that control the syncs */
|
|
#define INITIAL_CONSTRUCT_INSTANCE 2
|
|
#define INITIAL_FLAG_VALUE (1024LL * 1024LL *1024LL * 1024LL * 1024LL *1024LL)
|
|
#define UNUSED_FLAG_VALUE 1
|
|
/* Values from INITIAL_CONSTRUCT_INSTANCE upto INITIAL_FLAG_VALUE are used
|
|
** for controling PCF style constucts inside of pregions. Values greater
|
|
** than INITIAL_FLAG_VALUE control "outer" parallelism (e.g. doacross and
|
|
** the entry/exit from a PCF style region). It is assumed the values
|
|
** never collide (we don't check).
|
|
*/
|
|
|
|
|
|
/* Number of pregion construct cb's initially allocated */
|
|
#define INITIAL_CB_ALLOCATION 100
|
|
|
|
|
|
/* The various values for the "state" byte */
|
|
#define INITIALIZED 0x01
|
|
#define USER_BLOCKED 0x02
|
|
#define MULTI_PROCESSING 0x04
|
|
#define PROFILE_MODE 0x08
|
|
#define SOFT_LOCKS 0x10
|
|
#define NORMAL_STATE (INITIALIZED)
|
|
|
|
|
|
/*************************************************************************/
|
|
|
|
|
|
|
|
#ifdef _LANGUAGE_C
|
|
|
|
#include <stdio.h>
|
|
#include <sys/param.h>
|
|
#include <sys/types.h>
|
|
#include <ulocks.h>
|
|
|
|
/* pollute my name space a bit with nicer names */
|
|
typedef __int32_t int32;
|
|
typedef __uint32_t uint32;
|
|
typedef __int64_t int64;
|
|
typedef __uint64_t uint64;
|
|
typedef unsigned char uint8;
|
|
typedef signed char int8;
|
|
|
|
typedef uint8 boolean;
|
|
#define TRUE (1==1)
|
|
#define FALSE (1==0)
|
|
|
|
/* An unsigned int that is the same size as a pointer. */
|
|
/* Must work for both 32 and 64 bit worlds! (note that ptrdiff_t is signed) */
|
|
typedef unsigned long int uint_ptr;
|
|
|
|
|
|
|
|
|
|
/* Max # of contructs within a parallel region. */
|
|
/* This really only affects mpc style construct locks; control blocks
|
|
** are always allocated dynamically.
|
|
*/
|
|
#define MAX_CONSTRUCTS 256
|
|
|
|
#ifndef min
|
|
#define min(x,y) (((x)<(y)) ? (x) : (y))
|
|
#endif
|
|
#ifndef max
|
|
#define max(x,y) (((x)>(y)) ? (x) : (y))
|
|
#endif
|
|
|
|
/* For 32/64bit compatibility. This type ensures we always allocate
|
|
** an aligned 64bit space for a pointer.
|
|
*/
|
|
typedef struct {
|
|
union {
|
|
volatile void *ptr;
|
|
uint64 padding;
|
|
} data;
|
|
} vptr_rec;
|
|
typedef struct {
|
|
union {
|
|
void *ptr;
|
|
uint64 padding;
|
|
} data;
|
|
} ptr_rec;
|
|
|
|
|
|
typedef char cacheLineType[CACHE_LINE_SIZE];
|
|
|
|
|
|
/* A line in the "join" area */
|
|
struct joinDataType {
|
|
volatile uint64 flag[NUM_JOIN_FLAGS_PER_LINE];
|
|
};
|
|
struct joinLineType {
|
|
union {
|
|
struct joinDataType data;
|
|
cacheLineType padding;
|
|
} node;
|
|
};
|
|
|
|
|
|
|
|
/* Types for X3H5 parallel regions ("cb" means "control block") */
|
|
typedef struct {
|
|
/* boolean done; Is this used ?? */
|
|
/* uint8 region_threads; */
|
|
|
|
/* All the stuff that used to go here has slowly migrated to
|
|
** other places.
|
|
*/
|
|
uint64 dummy;
|
|
} region_cb_type;
|
|
|
|
|
|
typedef struct {
|
|
ulock_t controlLock; /* Used by Power C interface */
|
|
|
|
/* The instance most recently started (possibly still going) */
|
|
volatile uint64 construct_instance;
|
|
/* The most recent instance that has had all work assigned */
|
|
volatile uint64 all_allocated_instance;
|
|
/* Note that this is NOT the same as "complete"; the assigned
|
|
** work may still be executing. What it actually signals
|
|
** is that the given instance is now done using the construct_cb,
|
|
** and so the cb is free for another instance to use.
|
|
*/
|
|
|
|
|
|
/* sched_type kept in thread_cb, not construct_cb */
|
|
volatile int64 base; /* "current base" for dynamic schedules */
|
|
volatile int64 tripcount; /* in "chunks" for dynamic schedules */
|
|
volatile int64 stride; /* "stride between chunks" for dyn sched */
|
|
uint64 original_tripcount;
|
|
|
|
/* info for dynamic schedules */
|
|
uint32 full_chunk_size;
|
|
uint32 last_chunk_size;
|
|
|
|
/* "zero" is volatile so that when a store to it appears first in a
|
|
** basic block (to get exclusive access) the store won't be moved.
|
|
*/
|
|
volatile uint8 zero;
|
|
|
|
/* Used for enter/exit gate */
|
|
volatile uint8 thread_count;
|
|
|
|
/* info for gss schedules */
|
|
uint8 shift_amount;
|
|
boolean correction_needed;
|
|
|
|
} construct_cb_type;
|
|
|
|
|
|
typedef struct {
|
|
union {
|
|
region_cb_type region_cb;
|
|
construct_cb_type construct_cb;
|
|
cacheLineType padding;
|
|
} data;
|
|
} aligned_cb_type;
|
|
|
|
|
|
|
|
|
|
|
|
/* Definition of the mp data area */
|
|
typedef struct {
|
|
|
|
/* first cache line */
|
|
/* Info used by the slave threads for "outer level" paralleism
|
|
** (doacross or pregion).
|
|
*/
|
|
|
|
/* Flag that all slaves spin on while waiting for a parallel region */
|
|
volatile uint64 startFlag;
|
|
|
|
/* Since 64bit values may not be written atomically, we set this value
|
|
** first, then set the startFlag. Even if a slave sees a partial
|
|
** update of startFlag, this will reliably hold the full value
|
|
*/
|
|
volatile uint64 reliableStartFlagValue;
|
|
|
|
/* Info about the parallel region */
|
|
vptr_rec proc;
|
|
vptr_rec staticLink;
|
|
|
|
volatile int64 base;
|
|
volatile int64 stride;
|
|
volatile uint64 totalTripcount;
|
|
volatile uint64 chunkSize; /* Also trips/threads for F_SIMPLE_DOALL */
|
|
|
|
volatile uint8 currentNumthreads;
|
|
volatile uint8 schedType;
|
|
volatile uint8 remainder; /* rem(trips/threads) for F_SIMPLE_DOALL */
|
|
volatile uint8 interfaceType; /* 32 or 64bit */
|
|
volatile uint8 unused[4];
|
|
|
|
/* Used to make memory consistent */
|
|
vptr_rec memorySyncLock; /* ulock_t memorySyncLock */
|
|
/* EVent Counter location */
|
|
vptr_rec evcPtr;
|
|
|
|
/* Pointer to the array of construct cb's for a
|
|
** parallel region (only used by pregions, not by doacross).
|
|
*/
|
|
vptr_rec global_cb_array_ptr;
|
|
volatile uint64 construct_instance_counter;
|
|
|
|
} c1DataType;
|
|
|
|
|
|
typedef struct {
|
|
|
|
/* second cache line */
|
|
|
|
/* Info that is (usually) looked at or changed only by
|
|
** the master thread.
|
|
*/
|
|
volatile uint32 zero;
|
|
volatile uint32 state;
|
|
volatile uint32 suggestedNumthreads;
|
|
volatile uint32 previousNumthreads;
|
|
volatile uint32 maxNumthreads;
|
|
|
|
volatile uint32 evcValue;
|
|
uint64 flagValue;
|
|
|
|
/* These make the special case asm code a tiny bit easier */
|
|
ptr_rec waitEntry;
|
|
ptr_rec forkEntry;
|
|
ptr_rec wakeEntry;
|
|
ptr_rec resetEntry;
|
|
|
|
ptr_rec masters_cb_array_ptr;
|
|
uint64 masters_construct_instance_counter;
|
|
uint32 max_num_constructs;
|
|
|
|
} c2DataType;
|
|
|
|
|
|
|
|
|
|
typedef struct {
|
|
|
|
/* Third cache line */
|
|
/* Info for dynamic/gss scheduling */
|
|
|
|
/* Almost everything that used to be in this line is now obsolete */
|
|
|
|
/* volatile uint8 d_zero1; */
|
|
/* volatile uint8 d_unused1; */
|
|
/* volatile uint8 shiftAmount; */ /* Used in gss computation */
|
|
/* volatile boolean correctionNeeded; */ /* ditto */
|
|
|
|
/* volatile int64 currentBase; */
|
|
/* volatile int64 remainingTrips; */
|
|
|
|
ptr_rec iterationLockHandle; /* ulock_t iterationLockHandle */
|
|
ptr_rec internalLockHandle; /* ulock_t internalLockHandle */
|
|
|
|
} c3DataType;
|
|
|
|
|
|
typedef struct {
|
|
|
|
/* Forth and fifth cache lines */
|
|
/* Flags to deal with auto-blocking */
|
|
volatile int32 itersTillBlock;
|
|
volatile int32 iterIncrement;
|
|
union {
|
|
volatile boolean intendsToBlock[MAX_THREADS];
|
|
volatile uint32 intendsToBlockGroup[MAX_GROUPS];
|
|
} autoBlockFlags;
|
|
} c4DataType;
|
|
typedef struct {
|
|
union {
|
|
volatile boolean isNowUnblocked[MAX_THREADS];
|
|
volatile uint32 isNowUnblockedGroup[MAX_GROUPS];
|
|
} autoUnblockFlags;
|
|
} c5DataType;
|
|
|
|
|
|
|
|
|
|
|
|
/* Special locks for "user friendly" locking routines */
|
|
typedef struct {
|
|
ulock_t userLock;
|
|
barrier_t *userBarrier;
|
|
} userSyncType;
|
|
extern userSyncType __mp_userSync;
|
|
|
|
/*
|
|
** Definitions for "Control Blocks" used to control parallelism.
|
|
**
|
|
** Each *thread* has its own cb used for holding state info. Some
|
|
** info is duplicated here is well for convienience.
|
|
**
|
|
** Each *region* has a cb for holding info about that region.
|
|
**
|
|
** Each *construct* in a region has a cb for holding info about
|
|
** that construct.
|
|
**
|
|
** The region cb is kept in the place where "construct cb #0"
|
|
** would go (this makes it easier to find things).
|
|
**
|
|
** For example, when using dynamic scheduling, the constuct cb
|
|
** has info about the current state of the iterations, and all
|
|
** threads use it. With interleaved scheduling, each thread
|
|
** keeps track of where it is on its own.
|
|
**
|
|
** (Minor note: we let "current" tripcounts be "int" rather than "uint"
|
|
** because it is sometimes convienient (e.g. during dynamic scheduling)
|
|
** to let them go negative during the course of calculations. Note that
|
|
** "original" tripcounts are uint.)
|
|
*/
|
|
|
|
/* Scopes for locking with a parallel region */
|
|
#define GLOBAL_LOCK 1 /* Whole program */
|
|
#define REGION_LOCK 2 /* Whole region */
|
|
#define BLOCK_LOCK 3 /* This construct */
|
|
|
|
extern ulock_t __mp_global_lock;
|
|
extern ulock_t __mp_region_lock[MAX_THREADS];
|
|
extern ulock_t __mp_construct_lock[MAX_CONSTRUCTS];
|
|
|
|
|
|
|
|
|
|
typedef struct {
|
|
volatile uint64 thread_instance; /* volatile for __mp_exit_gate */
|
|
uint64 gate_instance; /* used for enter/exit gate */
|
|
|
|
int64 base; /* This is "current base" for interleave */
|
|
int64 tripcount; /* This is "remaining chunks" for interleave */
|
|
int64 stride; /* This is "inter-chunk-distance" for interleave */
|
|
|
|
volatile uint64 *done_flag_addr;
|
|
aligned_cb_type *my_copy_cb_array_ptr;
|
|
|
|
/* info for interleave schedules */
|
|
uint32 full_chunk_size;
|
|
uint32 last_chunk_size;
|
|
|
|
volatile uint8 zero;
|
|
uint8 sched_type;
|
|
boolean done;
|
|
boolean i_do_last_iteration;
|
|
uint8 interface_type;
|
|
uint8 num_threads;
|
|
} thread_cb_type;
|
|
|
|
typedef struct {
|
|
union {
|
|
thread_cb_type thread_cb;
|
|
cacheLineType padding;
|
|
} data;
|
|
} aligned_thread_cb_type;
|
|
|
|
|
|
|
|
|
|
/* Info about GSS scheduling for different numbers of threads */
|
|
typedef struct gss_info {
|
|
uint8 shiftAmount;
|
|
boolean correctionNeeded;
|
|
} gss_info_type;
|
|
extern gss_info_type __mp_gss_info[MAX_THREADS+1];
|
|
|
|
|
|
|
|
/* Type for "ordinal" synchronization */
|
|
typedef struct {
|
|
volatile int32 ord_value;
|
|
int32 ord_increment;
|
|
} ordinal_type;
|
|
|
|
|
|
|
|
/* Random variables needed in more than one place */
|
|
extern uint32 __mp_runtime_sched_type;
|
|
extern uint32 __mp_runtime_chunk_size;
|
|
|
|
|
|
|
|
/* Random defines */
|
|
|
|
/* Works as long as uninitialized PRDA is set to zero */
|
|
#define M_my_threadnum (PRDALIB->auto_mp_id)
|
|
|
|
|
|
/* If we are on a machine without strongly consistent memory (e.g. IP5, IP7)
|
|
** this brings memory up to date. On machines that don't need it (e.g.
|
|
** everything else) we arrange for it to be harmless and fast.
|
|
*/
|
|
#define SYNC_MEM (*((volatile int32 *) M_memorySyncLock))
|
|
|
|
|
|
|
|
/********************************************************************/
|
|
/* The major MP data structure */
|
|
/* Pretty much everything we need is all crammed into this structure.
|
|
** This gives us a single thing to dereference (which makes -xgot
|
|
** cheaper), and ensures that everything gets the alignment we want by
|
|
** forcing the alignment of just this one thing.
|
|
*/
|
|
|
|
/* If more cache lines are added to this structure, you also
|
|
** need to update the TCOM_SIZE #define
|
|
*/
|
|
typedef struct {
|
|
|
|
union {
|
|
c1DataType c1Data;
|
|
cacheLineType padding;
|
|
} c1;
|
|
|
|
union {
|
|
c2DataType c2Data;
|
|
cacheLineType padding;
|
|
} c2;
|
|
|
|
union {
|
|
c3DataType c3Data;
|
|
cacheLineType padding;
|
|
} c3;
|
|
|
|
union {
|
|
c4DataType c4Data;
|
|
cacheLineType padding;
|
|
} c4;
|
|
|
|
union {
|
|
c5DataType c5Data;
|
|
cacheLineType padding;
|
|
} c5;
|
|
|
|
/* Having this copy can sometimes save a bus xact, and avoids
|
|
** the potential problem of cache line conflicts in a direct
|
|
** mapped cache when doing the "fast write" to c1.
|
|
*/
|
|
union {
|
|
c1DataType c1Data;
|
|
cacheLineType padding;
|
|
} c6;
|
|
|
|
union {
|
|
/* Used by __mp_barrier */
|
|
volatile uint64 barrier_flag;
|
|
cacheLineType padding;
|
|
} c7;
|
|
|
|
/* c8: a "pre-allocated" construct_cb */
|
|
aligned_cb_type single_construct_cb;
|
|
|
|
|
|
struct joinLineType joinArea[NUM_JOIN_LINES];
|
|
|
|
aligned_thread_cb_type thread_cb[MAX_THREADS];
|
|
|
|
} taskCommonStructType;
|
|
|
|
|
|
extern taskCommonStructType __mp_taskCommon;
|
|
|
|
/* Field access macros */
|
|
|
|
#define M_syncData (__mp_taskCommon.c1.c1Data)
|
|
#define M_startFlag (__mp_taskCommon.c1.c1Data.startFlag)
|
|
#define M_reliableStartFlagValue (__mp_taskCommon.c1.c1Data.reliableStartFlagValue)
|
|
#define M_currentNumthreads (__mp_taskCommon.c1.c1Data.currentNumthreads)
|
|
#define M_threadOfLastIter (__mp_taskCommon.c1.c1Data.threadOfLastIter)
|
|
#define M_schedType (__mp_taskCommon.c1.c1Data.schedType)
|
|
#define M_interfaceType (__mp_taskCommon.c1.c1Data.interfaceType)
|
|
#define M_remainder (__mp_taskCommon.c1.c1Data.remainder)
|
|
#define M_chunkSize (__mp_taskCommon.c1.c1Data.chunkSize)
|
|
#define M_proc (__mp_taskCommon.c1.c1Data.proc.data.ptr)
|
|
#define M_staticLink (__mp_taskCommon.c1.c1Data.staticLink.data.ptr)
|
|
#define M_base (__mp_taskCommon.c1.c1Data.base)
|
|
#define M_stride (__mp_taskCommon.c1.c1Data.stride)
|
|
#define M_totalTripcount (__mp_taskCommon.c1.c1Data.totalTripcount)
|
|
#define M_memorySyncLock (__mp_taskCommon.c1.c1Data.memorySyncLock.data.ptr)
|
|
#define M_evcPtr (__mp_taskCommon.c1.c1Data.evcPtr.data.ptr)
|
|
#define M_global_cb_array_ptr (__mp_taskCommon.c1.c1Data.global_cb_array_ptr.data.ptr)
|
|
#define M_construct_instance_counter (__mp_taskCommon.c1.c1Data.construct_instance_counter)
|
|
|
|
#define M_copy_syncData (__mp_taskCommon.c6.c1Data)
|
|
#define M_copy_startFlag (__mp_taskCommon.c6.c1Data.startFlag)
|
|
#define M_copy_reliableStartFlagValue (__mp_taskCommon.c6.c1Data.reliableStartFlagValue)
|
|
#define M_copy_currentNumthreads (__mp_taskCommon.c6.c1Data.currentNumthreads)
|
|
#define M_copy_threadOfLastIter (__mp_taskCommon.c6.c1Data.threadOfLastIter)
|
|
#define M_copy_schedType (__mp_taskCommon.c6.c1Data.schedType)
|
|
#define M_copy_interfaceType (__mp_taskCommon.c6.c1Data.interfaceType)
|
|
#define M_copy_remainder (__mp_taskCommon.c6.c1Data.remainder)
|
|
#define M_copy_chunkSize (__mp_taskCommon.c6.c1Data.chunkSize)
|
|
#define M_copy_proc (__mp_taskCommon.c6.c1Data.proc.data.ptr)
|
|
#define M_copy_staticLink (__mp_taskCommon.c6.c1Data.staticLink.data.ptr)
|
|
#define M_copy_base (__mp_taskCommon.c6.c1Data.base)
|
|
#define M_copy_stride (__mp_taskCommon.c6.c1Data.stride)
|
|
#define M_copy_totalTripcount (__mp_taskCommon.c6.c1Data.totalTripcount)
|
|
#define M_copy_memorySyncLock (__mp_taskCommon.c6.c1Data.memorySyncLock.data.ptr)
|
|
#define M_copy_evcPtr (__mp_taskCommon.c6.c1Data.evcPtr.data.ptr)
|
|
#define M_copy_global_cb_array_ptr (__mp_taskCommon.c6.c1Data.global_cb_array_ptr.data.ptr)
|
|
#define M_copy_construct_instance_counter (__mp_taskCommon.c6.c1Data.construct_instance_counter)
|
|
|
|
|
|
#define M_zero (__mp_taskCommon.c2.c2Data.zero)
|
|
#define M_state (__mp_taskCommon.c2.c2Data.state)
|
|
#define M_suggestedNumthreads (__mp_taskCommon.c2.c2Data.suggestedNumthreads)
|
|
#define M_previousNumthreads (__mp_taskCommon.c2.c2Data.previousNumthreads)
|
|
#define M_maxNumthreads (__mp_taskCommon.c2.c2Data.maxNumthreads)
|
|
#define M_evcValue (__mp_taskCommon.c2.c2Data.evcValue)
|
|
#define M_flagValue (__mp_taskCommon.c2.c2Data.flagValue)
|
|
#define M_waitEntry (__mp_taskCommon.c2.c2Data.waitEntry.data.ptr)
|
|
#define M_forkEntry (__mp_taskCommon.c2.c2Data.forkEntry.data.ptr)
|
|
#define M_wakeEntry (__mp_taskCommon.c2.c2Data.wakeEntry.data.ptr)
|
|
#define M_resetEntry (__mp_taskCommon.c2.c2Data.resetEntry.data.ptr)
|
|
#define M_masters_cb_array_ptr (__mp_taskCommon.c2.c2Data.masters_cb_array_ptr.data.ptr)
|
|
#define M_masters_construct_instance_counter (__mp_taskCommon.c2.c2Data.masters_construct_instance_counter)
|
|
#define M_max_num_constructs (__mp_taskCommon.c2.c2Data.max_num_constructs)
|
|
|
|
#define M_d_zero1 (__mp_taskCommon.c3.c3Data.d_zero1)
|
|
#define M_d_unused1 (__mp_taskCommon.c3.c3Data.d_unused1)
|
|
#define M_shiftAmount (__mp_taskCommon.c3.c3Data.shiftAmount)
|
|
#define M_correctionNeeded (__mp_taskCommon.c3.c3Data.correctionNeeded)
|
|
#define M_iterationLockHandle (__mp_taskCommon.c3.c3Data.iterationLockHandle.data.ptr)
|
|
#define M_internalLockHandle (__mp_taskCommon.c3.c3Data.internalLockHandle.data.ptr)
|
|
#define M_currentBase (__mp_taskCommon.c3.c3Data.currentBase)
|
|
#define M_remainingTrips (__mp_taskCommon.c3.c3Data.remainingTrips)
|
|
|
|
#define M_itersTillBlock (__mp_taskCommon.c4.c4Data.itersTillBlock)
|
|
#define M_iterIncrement (__mp_taskCommon.c4.c4Data.iterIncrement)
|
|
#define M_intendsToBlock (__mp_taskCommon.c4.c4Data.autoBlockFlags.intendsToBlock)
|
|
#define M_intendsToBlockGroup (__mp_taskCommon.c4.c4Data.autoBlockFlags.intendsToBlockGroup)
|
|
#define M_isNowUnblocked (__mp_taskCommon.c5.c5Data.autoUnblockFlags.isNowUnblocked)
|
|
#define M_isNowUnblockedGroup (__mp_taskCommon.c5.c5Data.autoUnblockFlags.isNowUnblockedGroup)
|
|
|
|
#define M_barrier_flag (__mp_taskCommon.c7.barrier_flag)
|
|
|
|
#define M_cb (__mp_taskCommon.single_construct_cb.data.construct_cb)
|
|
#define M_cb_controlLock (__mp_taskCommon.single_construct_cb.data.construct_cb.controlLock)
|
|
#define M_cb_construct_instance (__mp_taskCommon.single_construct_cb.data.construct_cb.construct_instance)
|
|
#define M_cb_all_allocated_instance (__mp_taskCommon.single_construct_cb.data.construct_cb.all_allocated_instance)
|
|
#define M_cb_base (__mp_taskCommon.single_construct_cb.data.construct_cb.base)
|
|
#define M_cb_tripcount (__mp_taskCommon.single_construct_cb.data.construct_cb.tripcount)
|
|
#define M_cb_stride (__mp_taskCommon.single_construct_cb.data.construct_cb.stride)
|
|
#define M_cb_original_tripcount (__mp_taskCommon.single_construct_cb.data.construct_cb.original_tripcount)
|
|
#define M_cb_full_chunk_size (__mp_taskCommon.single_construct_cb.data.construct_cb.full_chunk_size)
|
|
#define M_cb_last_chunk_size (__mp_taskCommon.single_construct_cb.data.construct_cb.last_chunk_size)
|
|
#define M_cb_zero (__mp_taskCommon.single_construct_cb.data.construct_cb.zero)
|
|
#define M_cb_threads (__mp_taskCommon.single_construct_cb.data.construct_cb.threads)
|
|
#define M_cb_shift_amount (__mp_taskCommon.single_construct_cb.data.construct_cb.shift_amount)
|
|
#define M_cb_correction_needed (__mp_taskCommon.single_construct_cb.data.construct_cb.correction_needed)
|
|
|
|
|
|
#define M_joinArea_all (__mp_taskCommon.joinArea)
|
|
#define M_joinArea(_row,_col) (__mp_taskCommon.joinArea[_row].node.data.flag[_col])
|
|
#define M_thread_cb_all (__mp_taskCommon.thread_cb)
|
|
#define M_thread_cb(_thread) (__mp_taskCommon.thread_cb[_thread].data.thread_cb)
|
|
|
|
|
|
void __mp_sugnumthd_init(int32 min, int32 max,int32 now);
|
|
|
|
void __mp_sugnumthd_exit();
|
|
|
|
#endif /* ifdef _LANGUAGE_C */
|
|
|