irix-657m-src/irix/kern/ml/delay.c

/**************************************************************************
 *									  *
 * 		 Copyright (C) 1989, Silicon Graphics, Inc.		  *
 *									  *
 *  These coded instructions, statements, and computer programs  contain  *
 *  unpublished  proprietary  information of Silicon Graphics, Inc., and  *
 *  are protected by Federal copyright law.  They  may  not be disclosed  *
 *  to  third  parties  or copied or duplicated in any form, in whole or  *
 *  in part, without the prior written consent of Silicon Graphics, Inc.  *
 *									  *
 **************************************************************************/
#ident	"$Revision: 1.64 $"

#include "limits.h"
#include "sys/types.h"
#include "sys/systm.h"
#include "sys/sbd.h"
#include "sys/param.h"
#include "sys/debug.h"
#include "sys/i8254clock.h"
#include "sys/immu.h"
#include "sys/pda.h"
#include "sys/cmn_err.h"
#if IPMHSIM
#include "sys/kopt.h"
#endif

#if IP22			/* for extra hacks for bad IOC1 timer */
#include <sys/cpu.h>
#endif

extern unsigned int _cpuclkper100ticks(unsigned int *);
extern int _ticksper1024inst(void);

#ifndef IP26	/* uses seperate counter for us_delay() */
/*
 * Instructions per DELAY loop.  The loop body is a branch with a decrement
 * in the delay slot:
 *	1:	bgtz	s0,1b
 *		subu	s0,1
 */
#define	IPL	2
#ifndef IP32	/* uses seperate delay_calibrate */
void
delay_calibrate(void)
{
	register int tpi;

#ifdef IPMHSIM
	private.decinsperloop = 10;
#else /* IPMHSIM */
#if IP22
	if (!is_fullhouse() && is_ioc1() < 2)
	    tpi = 0;	/* Early Indies had a broken 8254 */
			/* (but they also only had 100Mhz cpus) */
	else
#endif
	tpi = _ticksper1024inst();

	if (tpi==0) {
		/*
		** The timer's broken.  Just use some reasonable
		** value that's on the large side.
		**
		** 4 == ~50Mhz R4000.
		*/
		private.decinsperloop = 10 * 4;
		return;
	}

	/*
	** Calculate the 100 * number of deci-nanoseconds per loop.
	** This is used to determine how high us_delay should
	** count.
	**
	** We now calculate 10 * deci-nanoseconds since as processors
	** got faster we lost significant digits (50Mhz R4000 varies
	** is usually 3, sometimes 4) making the rounding of the
	** calculation more significant.
	**
	** dns/loop = (dns/sec)/(ticks/sec) * inst/loop * ticks/inst
	**
	** Since we can have processors with different speed running
	** on the same system, decinsperloop is in the pda.
	*/
	private.decinsperloop =
		(10 * (100000000/MASTER_FREQ) * IPL * tpi)/1024;
#endif /* IPMHSIM */
}
#endif /* !IP32 */

#ifdef R4600SC
int early_delay_flag = 0;
#endif

/*
** Spin in a tight loop for "us" microseconds.  It is expected that
** decinsperloop will be properly calibrated (via a call to
** delay_calibrate) sometime before us_delay is called.
**
** XXX: for R4600SC we need to call this early, before PDA is set up
**      so we set the flag and use the value of 40, which is taken
**      from delay_calibrate() above.  early_delay_flag is set and
**	unset in size_2nd_cache() [cacheops.s].  This should probably
**	really be done in mlsetup() but I didn't want to screw w/it
**	since size_2nd_cache is the only thing in the kernel which
**	needs this support.
**
** XXX: oops, I got this wrong... the value of decinsperloop goes DOWN
**      as the CPU gets faster -- I thought it represented the number
**      of times to go through the loop directly (as it does in the
**      USL routines in SVR4).  This value is 17 on a 4600PC 100Mhz,
**	and is 13 on a 4600SC 133Mhz.
*/
void
us_delay(register uint us)
{
	void	delayloop(int,int);
	int	i;
#ifdef R4600SC
	extern int early_delay_flag;
	int n;
	int d = early_delay_flag ? 7 : private.decinsperloop;

	ASSERT(d!=0);
#else
	int n;
	int d = private.decinsperloop;

	ASSERT(private.decinsperloop!=0);
#endif

	/*
	 * delayloop wants nanoseconds so we need to do several of them
	 * if the delay is greater than ~2 seconds.  Why is it ever more
	 * than 2 seconds?  Only the sleep gods know.
	 */
	for (i = 1; us > INT_MAX/1000; i++, us >>= 1)
		;
	n = (1000) * us;
	while (i--)
		delayloop(n,d);
}
#endif	/* ndef IP26 */

/*
 * delay, but first make sure bus is clear, so delay is guaranteed
 * to be relative to bus, not CPU
 */
void us_delay(register uint us);
void
us_delaybus(register uint us)
{
	flushbus();
	if (us > 0)
		us_delay(us-1);	/* sub off time for flushbus (approx) */
	return;
}

#ifndef TFP
/*
** fig out max # of passes in clean_dcache() clean_icache()
** before allowing intr back on
** assuming 3.6MHZ crystal --> 277ns per tick
** want to preempt every 250us
**
*/
#define INST_PER_PASS	15	/* the observed number via disassembler */
#define CACHE_PREEMPT_LIMIT	500000	/* 500 us */
void
cache_preempt_limit(void)
{
	int tmp;

#ifdef IPMHSIM
	if (is_specified(arg_cpufreq) &&
	    (tmp = atoi(arg_cpufreq)) > 0) {
		tmp *= 1000000;
		tmp /= (1000000000 / CACHE_PREEMPT_LIMIT);
	} else
		tmp = 120000; /* assume 240 MHZ */
	tmp /= 2; /* assume 2 cycles per instruction */
#else /* IPMHSIM */
#if IP22
	if (!is_fullhouse() && is_ioc1() < 2)
	    tmp = 19;	/* Early Indies had a broken 8254 */
			/* (but they also only had 100Mhz cpus) */
	else
#endif
	tmp = _ticksper1024inst();
	/* time in nano sec per cached inst */
#ifndef IP32
	tmp = (tmp*(1000000000/MASTER_FREQ))/1024;
#else
	tmp = (DNS_PER_TICK * IPL * tmp)/1024;
#endif
	tmp = CACHE_PREEMPT_LIMIT/tmp;
					/* max # of insts that can be executed
					   before turning intr back on */
#endif /* IPMHSIM */
}
#endif	/* TFP */


#ifdef IP26

#define SYSAD_COUNT     20000
#define SYSAD_FREQ      50000000

#define abssub(x,y)          ((x)>(y) ? (x)-(y) : (y)-(x))

int CountTccVsSysAdCycles(int);

/*  Teton always has a 50.0Mhz SysAD bus and a counter on TCC running at this
 * speed.  We can use the ratio of C0_COUNT to this to calculate the processor
 * frequency.
 */
int
findcpufreq_raw(void)
{
    unsigned count = (unsigned) CountTccVsSysAdCycles(SYSAD_COUNT);
    unsigned freq;
    int i, entry = 0;
    unsigned val = SYSAD_FREQ;
    static unsigned freq_table[] = {
        65000000, 70000000, 75000000, 80000000, 84000000, 90000000, 0
    };

    /* if the count is less than the minimum, return SysAd freq */
    /*  this shouldn't happen */
    if (count < SYSAD_COUNT)
        return SYSAD_FREQ/1000000;

    /* calculate true frequency */
    freq = count * (SYSAD_FREQ/SYSAD_COUNT);

    /* look up the frequency in a table */
    for (i = 0; freq_table[i] > 0; i++)
        if (abssub(freq_table[i],freq) < val)
            val = abssub(freq_table[i], freq), entry = i;

    return freq_table[entry];
}

#else /* !IP26 */

/* this is the table of known cpu freq that we support
** table is in the order of increasing freq
**
** XXX- please add comments for supported systems
** round to 1000 so we get integer divide
*/

#ifndef IP32
static int freq[] = {
#if 0
	20000000,
	25000000,
	30000000,
#endif
	33000000,
	36000000,
	40000000,
	50000000,		/* 100mhz R4000 */
#ifdef IP28
	60000000,		/* 120mhz T5 prototype */
	70000000,		/* 140mhz T5 prototype */
#else
	66666000,		/* 133mhz R4600 */
#endif
	75000000,		/* 150mhz R4400 */
	80000000,
	85000000,
#if IP22
	87370000,		/* 175mhz R4400 (PM6) */
#else
	87500000,		/* 175mhz T5 prototype */
#endif
	90000000,
#ifdef R10000
	95000000,		/* 190mhz T5 prototype */
	96000000,		/* 192mhz T5 prototype */
	97500000,		/* 195mhz T5 (T5I2 MR) */
#else
	94339000,
	95250000,
#endif /* R10000 */
       100000000,		/* 200mhz R4400 (PM3; PM7) */
       120000000,               /* 240mhz R5000 (Triton) */
       125000000		/* 250mhz R4400 (PM5) */
};

#else /* IP32 */
/* see ml/MOOSEHEAD/README.clocks */
static int freq[] = {
	66580000,		/* R4600 PROTOTYPE 133 MHz */
	75121000,		/* FPGA R10K 150 MHz */
	75170000,		/* R5K 150 MHz */
	87552000,		/* JUICE R10K 175 MHz */
	87641000,               /* FPGA R10K 175 MHz */
	90000000,		/* R5K 180 MHz */
	97953000,		/* JUICE R10K 195 MHz */
       100227000,		/* R5K 200 MHz (100.227260*2 Mhz) */
       112500000,		/* R10K 225 */
       125284000,		/* R5K/R10K 250 MHz (125.284075*2 Mhz) */
       135000000,		/* R12K 270 MHz */
       137750000,		/* R10k 275 Mhz (275.500 Mhz) */
       150345000,		/* R10k 300 Mhz (300.690 Mhz) */
       150339000,		/* R5k  300 Mhz (300.678 Mhz) */
       175000000,		/* R12K Shrink - 350 MHz */
       180000000,		/* R12K Shrink - 360 MHz */
       187500000,		/* R12K Shrink - 390 MHz */
       192500000,		/* R12K Shrink - 385 MHz */
       195000000,		/* R12K Shrink - 390 MHz */
       200000000,		/* R12K Shrink - 400 MHz */
};
#endif /* IP32 */

#if IP22 /* XXX DEBUG - figure out why cpu count is not repeatable! */
int _findcpufreq_c[8],
    _findcpufreq_x = 0,
    _findcpufreq_repeat = 0;
#endif

/*
** find the frequency of the running CPU
** should only be invoked on the master
**
** findcpufreq_raw() returns the frequency of the processor's master
** clock, in Hz. It should be used wherever knowing this value to any
** decent precision is required.
**
** findcpufreq() returns the frequency of the processor's master
** clock, in MHz.
**
** cpu_mhz_rating() returns the "rated" speed of the processor, which
** should always match the numbers in our marketing literature. For
** instance, a "PM5" processor module has a 125MHz crystal, and
** findcpufreq() will return 125, but this is really a "250MHz"
** R4400. This function should be used wherever the findcpufreq()
** number would have been doubled, as it handles things like 133MHz
** and 175MHz correctly.
*/
#if (defined(IP22) && defined(TRITON))
extern int get_cpu_irr(void);
#endif /* (defined(IP22) && defined(TRITON)) */

int
findcpufreq_raw(void)
{
	int ticks;
	int i, closest,tmp,clock, freq_ret;
	int freqtbl_siz = sizeof(freq)/ sizeof(int);
#if (defined(IP22) && defined(TRITON)) || defined(IP32)
	rev_id_t ri;

	ri.ri_uint = get_cpu_irr();
#endif

#ifdef IPMHSIM
	if (! (is_specified(arg_cpufreq) &&
	       (ticks = atoi(arg_cpufreq)) > 0))
		ticks = 240; /* assume 240 MHZ */
	return(ticks * 1000000);
#elif IP28
	extern unsigned int _ticksper80ms(void);
	ticks = (_ticksper80ms() * 100) >> 3;
#else /* IPMHSIM */
#if !(defined(IP20) || defined(IP22) || defined(IP28) || defined(IP32))
	extern int _ticksper1024inst();
	int fcount= 0;

freq_loop:
	ticks = _ticksper1024inst();	/* # of ticks to execute 1024 inst */
	/*
	** 8MHZ takes about 472 ticks for 1024 NOPs
	** 12MHZ   ""	    315		""
	** 16MHZ   ""	    236		""
	** 25MHZ   ""	    151		""
	** 50MHZ  R4000      20         ""
	** 75MHZ  R4400     ~15 (est)   ""
        ** 100Mhz R5000     ~10 (est)   ""
	*/
	if ((ticks > 1000 || ticks < 10) && fcount<10 ) {
		fcount++;
		goto freq_loop;
	}
	if (fcount >=10)
		return(0);
	/* based on MASTER_FREQ */
	/* time in ns to execute 1 NOP inst , 1 tick is about 271ns */
	ticks = (ticks*(1000000000/MASTER_FREQ))/1024;
	ticks = 1000000000/ticks;	/* convert to HZ */
#else
#if IP22
	/* Early Indies can't call _cpuclkper100ticks() because of a
	 * broken i8254 timer.
	 */
	if (!is_fullhouse() && is_ioc1() < 2)
		ticks = 50000000;
	else
#endif	/* IP22 */
	{
#if IP22 /* XXX DEBUG - figure out why cpu count is not repeatable! */
		static
#endif  /* IP22 */
		unsigned int cpu_count, tick_count;

#if IP22 /* XXX DEBUG - figure out why cpu count is not repeatable! */
		if (_findcpufreq_repeat || _findcpufreq_x == 0)
#endif  /* IP22 */
		cpu_count = _cpuclkper100ticks(&tick_count);
		/* do division first to avoid overflow */
		ticks = cpu_count * (MASTER_FREQ / tick_count);	/* to HZ */

#if IP22 /* XXX DEBUG - figure out why cpu count is not repeatable! */
		_findcpufreq_c[(_findcpufreq_x++) % 8] = cpu_count;
		_findcpufreq_c[(_findcpufreq_x++) % 8] = tick_count;
#endif  /* IP22 */
	}
#endif	/* !(IP20 || IP22 || IP28 || IP32) */
#endif  /* IPMHSIM */

	closest = -1;
	for (i=0; i<(sizeof(freq)/sizeof(int)); i++) {
		if ( ticks > freq[i] )
			tmp = ticks - freq[i];
		else
			tmp = freq[i] - ticks;
		if ( closest == -1 ) {
			clock = i;
			closest = tmp;
			continue;
		}
		if ( tmp < closest ) {
			closest = tmp;
			clock = i;
		}

	}
	freq_ret = freq[clock];

#if defined(IP22) && defined(TRITON)
	/*
	 * There isn't enough difference in the clock speeds
	 * between the 175Mhz R4400 and the 180Mhz R5000 for
	 * this code to reliably differentiate between them.
	 * Check to see if we have a 180Mhz R4400 or a 175Mhz
	 * R5000.
	 */
	if (freq_ret == 90000000 && ri.ri_imp == C0_IMP_R4400)
	    freq_ret = 87370000;

	if (freq_ret == 87370000 && ri.ri_imp == C0_IMP_TRITON)
	    freq_ret = 90000000;
#elif defined(IP32)
	/*
	 * Similarly, make sure that clock speeds of O2 (IP32)
	 * 175Mhz R10000 vs 180Mhz R5000 and
	 * 195Mhz R10000 vs 200Mhz R5000
	 * are reliably differentiated.
	 */
	if (ri.ri_imp == C0_IMP_R10000 || ri.ri_imp == C0_IMP_R12000) {
	    /*
	     * there is no R10K 180, give the higher of 175 Mhz (FPGA)
	     * since we went over
	     */
	    if (freq_ret == 90000000)
		freq_ret = 87641000;
	    /*
	     * there is no R10K 200, return 195
	     */
	    else if (freq_ret == 100227000)
		freq_ret = 97953000;
	    /*
	     * Make sure R10K/R12K does NOT get R5k 300 MHz
	     */
	    else if (freq_ret == 150339000)
		freq_ret = 150345000;
	}

	if (ri.ri_imp == C0_IMP_TRITON) {
	    /*
	     * there is no R5k 175, return 180
	     */
	    if (freq_ret > 87000000 && freq_ret < 88000000)
		freq_ret = 90000000;
	    /*
	     * there is no R5k 195, return 200
	     */
	    else if (freq_ret == 97953000)
		freq_ret = 100227000;
	    /*
	     * Make sure R5K does NOT get R10k/R12k 300 MHz
	     */
	    else if (freq_ret == 150345000)
		freq_ret = 150339000;
	}
#endif

	/*
	** if the calculated freq is greater than the max that we know about
	** and if the margin is more than 4Mhz then lets use the calculated
	** freq instead of rounding it down to what's in the table
	*/
	if (ticks > freq[freqtbl_siz-1] &&
			(ticks - freq[freqtbl_siz-1]) > 4000000)
		freq_ret = ticks;

	return(freq_ret);
}

#endif /* !IP26 */

#define	ROUND(v,r)	(((v)+(r/2))/r)

int
findcpufreq(void)
{
    return ROUND(findcpufreq_raw(), 1000000);
}

int
cpu_mhz_rating(void)
{
    int		cpu_rate = findcpufreq_raw();

#if IP32
    if (cpu_rate >= 97000000 && cpu_rate <= 98000000)  /* 195MHz */
	    cpu_rate = 97500000;
    else if (cpu_rate >= 124000000 && cpu_rate <= 126000000) /* 250 MHz */
	    cpu_rate = 125000000;
    else if (cpu_rate >= 149000000 && cpu_rate <= 151000000) /* 300 MHz */
	    cpu_rate = 150000000;
#endif /* IP32 */
#if R4000 || R10000
    cpu_rate *= 2;
#endif
    cpu_rate = ROUND(cpu_rate, 1000000);
    return cpu_rate;
}