1
0
Files
irix-657m-src/irix/kern/ml/error.c
2022-09-29 17:59:04 +03:00

5595 lines
165 KiB
C

/**************************************************************************
* *
* Copyright (C) 1989-1994 Silicon Graphics, Inc. *
* *
* These coded instructions, statements, and computer programs contain *
* unpublished proprietary information of Silicon Graphics, Inc., and *
* are protected by Federal copyright law. They may not be disclosed *
* to third parties or copied or duplicated in any form, in whole or *
* in part, without the prior written consent of Silicon Graphics, Inc. *
* *
**************************************************************************/
#ident "$Revision: 1.125 $"
#include <sys/types.h>
#include <sys/cachectl.h>
#include <sys/callo.h>
#include <sys/cmn_err.h>
#include <sys/cpu.h>
#include <sys/debug.h>
#include <sys/dump.h>
#include <sys/errno.h>
#include <sys/loaddrs.h>
#include <sys/map.h>
#include <sys/pda.h>
#include <sys/proc.h>
#include <ksys/vproc.h>
#include <sys/kthread.h>
#include <sys/uthread.h>
#include <sys/sbd.h>
#include <sys/schedctl.h>
#include <sys/sysmacros.h>
#include <sys/syssgi.h>
#include <sys/systm.h>
#include <ksys/exception.h>
#include <sys/atomic_ops.h>
#include <sys/inst.h>
#include <sys/ecc.h>
#include <ksys/cacheops.h>
#include <ksys/as.h>
#include <sys/runq.h>
#ifdef EVEREST
#include <sys/EVEREST/io4.h>
#include <sys/EVEREST/vmecc.h>
#include <sys/EVEREST/everror.h>
#include <sys/EVEREST/mc3.h>
/* Until we know code is correct, report cache errors
* but consider them FATAL and panic. Removing this
* define will re-enable correction.
*/
/* #define IP19_CACHEERRS_FATAL 1 */
/* #define FORCE_CACHERR_ON_STORE 1 */
/* Following are flags which can be turned on to test very specific error
* conditions.
* ECC_TEST_EW_BIT attempts to generate an EW condition (unsuccessfully)
* ECC_TEST_TWO_BAD causes two bad cachelines to be setup so we can see
* what happens when another cpu references the other line.
*/
/* #define ECC_TEST_EW_BIT 1 */
/* #define ECC_TEST_TWO_BAD 1 */
#endif /* EVEREST */
#include <sys/ioerror.h>
#ifdef _MEM_PARITY_WAR
#if IP20 || IP22
#include <sys/mc.h>
#endif /* IP20 || IP22 */
#include <sys/pfdat.h>
#endif /* _MEM_PARITY_WAR */
#if IP20 || IP22 || IPMHSIM
#define GIO_ERRMASK 0xff00
extern int perr_mem_init(caddr_t);
#endif /* IP20 || IP22 */
extern struct reg_desc sr_desc[], cause_desc[];
#if R4000 && R10000
extern struct reg_desc r10k_sr_desc[];
#endif /* R4000 && R10000 */
extern int picache_size;
extern int pdcache_size;
#ifdef R4000
static void init_ecc_info(void);
#endif
void ecc_cleanup(void);
#define SET_CBITS_IN 0x80
#if EVEREST
void dump_hwstate(int);
#endif
#ifdef R4000PC
extern int get_r4k_config(void);
int r4000_config;
#endif /* R4000PC */
extern char bytetab[];
#define BYTEOFF(bl) ((bl&0xf0)?(bytetab[bl>>4]):(bytetab[bl]+4))
/*
* CP0 status register description
*/
struct reg_values imask_values[] = {
{ SR_IMASK8, "8" },
{ SR_IMASK7, "7" },
{ SR_IMASK6, "6" },
{ SR_IMASK5, "5" },
{ SR_IMASK4, "4" },
{ SR_IMASK3, "3" },
{ SR_IMASK2, "2" },
{ SR_IMASK1, "1" },
{ SR_IMASK0, "0" },
{ 0, NULL },
};
struct reg_values mode_values[] = {
{ SR_KSU_USR, "USER" },
#if R4000 || R10000
{ SR_KSU_KS, "SPRVSR" },
#endif
{ 0, "KERNEL" },
{ 0, NULL },
};
#if TFP
struct reg_values kps_values[] = {
{ SR_KPS_4K, "4k" },
{ SR_KPS_8K, "8k" },
{ SR_KPS_16K, "16k" },
{ SR_KPS_64K, "64k" },
{ SR_KPS_1M, "1m" },
{ SR_KPS_4M, "4m" },
{ SR_KPS_16M, "16m" },
{ 0, NULL },
};
struct reg_values ups_values[] = {
{ SR_UPS_4K, "4k" },
{ SR_UPS_8K, "8k" },
{ SR_UPS_16K, "16k" },
{ SR_UPS_64K, "64k" },
{ SR_UPS_1M, "1m" },
{ SR_UPS_4M, "4m" },
{ SR_UPS_16M, "16m" },
{ 0, NULL },
};
struct reg_desc sr_desc[] = {
/* mask shift name format values */
{ SR_DM, 0, "DM", NULL, NULL },
{ SR_KPSMASK, 0, "KPS", NULL, kps_values },
{ SR_UPSMASK, 0, "UPS", NULL, ups_values },
{ SR_CU1, 0, "CU1", NULL, NULL },
{ SR_CU0, 0, "CU0", NULL, NULL },
{ SR_FR, 0, "FR", NULL, NULL },
{ SR_RE, 0, "RE", NULL, NULL },
{ SR_IBIT8, 0, "IM8", NULL, NULL },
{ SR_IBIT7, 0, "IM7", NULL, NULL },
{ SR_IBIT6, 0, "IM6", NULL, NULL },
{ SR_IBIT5, 0, "IM5", NULL, NULL },
{ SR_IBIT4, 0, "IM4", NULL, NULL },
{ SR_IBIT3, 0, "IM3", NULL, NULL },
{ SR_IBIT2, 0, "IM2", NULL, NULL },
{ SR_IBIT1, 0, "IM1", NULL, NULL },
{ SR_IMASK, 0, "IPL", NULL, imask_values },
{ SR_XX, 0, "XX", NULL, NULL },
{ SR_UX, 0, "UX", NULL, NULL },
{ SR_KSU_MSK, 0, "MODE", NULL, mode_values },
{ SR_EXL, 0, "EXL", NULL, NULL },
{ SR_IE, 0, "IE", NULL, NULL },
{ 0, 0, NULL, NULL, NULL },
};
#elif defined (BEAST)
struct reg_desc sr_desc[] = {
/* mask shift name format values */
{ SR_CU2, 0, "CU2", NULL, NULL },
{ SR_CU1, 0, "CU1", NULL, NULL },
{ SR_CU0, 0, "CU0", NULL, NULL },
{ SR_FR, 0, "FR", NULL, NULL },
{ SR_RE, 0, "RE", NULL, NULL },
{ SR_SR, 0, "SR", NULL, NULL },
{ SR_NMI, 0, "NMI", NULL, NULL },
{ SR_CE, 0, "CE", NULL, NULL },
{ SR_IBIT10, 0, "IM10", NULL, NULL },
{ SR_IBIT9, 0, "IM9", NULL, NULL },
{ SR_IBIT8, 0, "IM8", NULL, NULL },
{ SR_IBIT7, 0, "IM7", NULL, NULL },
{ SR_IBIT6, 0, "IM6", NULL, NULL },
{ SR_IBIT5, 0, "IM5", NULL, NULL },
{ SR_IBIT4, 0, "IM4", NULL, NULL },
{ SR_IBIT3, 0, "IM3", NULL, NULL },
{ SR_IBIT2, 0, "IM2", NULL, NULL },
{ SR_IBIT1, 0, "IM1", NULL, NULL },
{ SR_IMASK, 0, "IPL", NULL, imask_values },
{ SR_KSU_MSK, 0, "MODE", NULL, mode_values },
{ SR_EXL, 0, "EXL", NULL, NULL },
{ SR_IE, 0, "IE", NULL, NULL },
{ 0, 0, NULL, NULL, NULL },
};
#else /* !TFP && !BEAST */
struct reg_desc sr_desc[] = {
#if R4000 && R10000
/* mask shift name format values */
{ SR_CU3, 0, "CU3", NULL, NULL },
{ SR_CU2, 0, "CU2", NULL, NULL },
{ SR_CU1, 0, "CU1", NULL, NULL },
{ SR_CU0, 0, "CU0", NULL, NULL },
{ SR_RP, 0, "RP", NULL, NULL },
{ SR_FR, 0, "FR", NULL, NULL },
{ SR_RE, 0, "RE", NULL, NULL },
{ SR_RE, 0, "RE", NULL, NULL },
{ SR_BEV, 0, "BEV", NULL, NULL },
{ SR_TS, 0, "TS", NULL, NULL },
{ SR_SR, 0, "SR", NULL, NULL },
{ SR_CH, 0, "CH", NULL, NULL },
{ SR_CE, 0, "CE", NULL, NULL },
{ SR_DE, 0, "DE", NULL, NULL },
{ SR_IBIT8, 0, "IM8", NULL, NULL },
{ SR_IBIT7, 0, "IM7", NULL, NULL },
{ SR_IBIT6, 0, "IM6", NULL, NULL },
{ SR_IBIT5, 0, "IM5", NULL, NULL },
{ SR_IBIT4, 0, "IM4", NULL, NULL },
{ SR_IBIT3, 0, "IM3", NULL, NULL },
{ SR_IBIT2, 0, "IM2", NULL, NULL },
{ SR_IBIT1, 0, "IM1", NULL, NULL },
{ SR_IMASK, 0, "IPL", NULL, imask_values },
{ SR_KX, 0, "KX", NULL, NULL },
{ SR_SX, 0, "SX", NULL, NULL },
{ SR_UX, 0, "UX", NULL, NULL },
{ SR_KSU_MSK, 0, "MODE", NULL, mode_values },
{ SR_ERL, 0, "ERL", NULL, NULL },
{ SR_EXL, 0, "EXL", NULL, NULL },
{ SR_IE, 0, "IE", NULL, NULL },
{ 0, 0, NULL, NULL, NULL },
};
struct reg_desc r10k_sr_desc[] = {
#endif /* R4000 && R10000 */
/* mask shift name format values */
#ifdef R10000
{ SR_XX, 0, "XX", NULL, NULL },
#else
{ SR_CU3, 0, "CU3", NULL, NULL },
#endif
{ SR_CU2, 0, "CU2", NULL, NULL },
{ SR_CU1, 0, "CU1", NULL, NULL },
{ SR_CU0, 0, "CU0", NULL, NULL },
#ifndef R10000
{ SR_RP, 0, "RP", NULL, NULL },
#endif
{ SR_FR, 0, "FR", NULL, NULL },
{ SR_RE, 0, "RE", NULL, NULL },
{ SR_RE, 0, "RE", NULL, NULL },
{ SR_BEV, 0, "BEV", NULL, NULL },
{ SR_TS, 0, "TS", NULL, NULL },
{ SR_SR, 0, "SR", NULL, NULL },
{ SR_CH, 0, "CH", NULL, NULL },
#ifdef R10000
{ SR_NMI, 0, "NMI", NULL, NULL },
#else
{ SR_CE, 0, "CE", NULL, NULL },
#endif
{ SR_DE, 0, "DE", NULL, NULL },
{ SR_IBIT8, 0, "IM8", NULL, NULL },
{ SR_IBIT7, 0, "IM7", NULL, NULL },
{ SR_IBIT6, 0, "IM6", NULL, NULL },
{ SR_IBIT5, 0, "IM5", NULL, NULL },
{ SR_IBIT4, 0, "IM4", NULL, NULL },
{ SR_IBIT3, 0, "IM3", NULL, NULL },
{ SR_IBIT2, 0, "IM2", NULL, NULL },
{ SR_IBIT1, 0, "IM1", NULL, NULL },
{ SR_IMASK, 0, "IPL", NULL, imask_values },
{ SR_KX, 0, "KX", NULL, NULL },
{ SR_SX, 0, "SX", NULL, NULL },
{ SR_UX, 0, "UX", NULL, NULL },
{ SR_KSU_MSK, 0, "MODE", NULL, mode_values },
{ SR_ERL, 0, "ERL", NULL, NULL },
{ SR_EXL, 0, "EXL", NULL, NULL },
{ SR_IE, 0, "IE", NULL, NULL },
{ 0, 0, NULL, NULL, NULL },
};
#endif
/*
* CP0 cause register description
*/
struct reg_values exc_values[] = {
{ EXC_INT, "INT" },
{ EXC_MOD, "MOD" },
{ EXC_RMISS, "RMISS" },
{ EXC_WMISS, "WMISS" },
{ EXC_RADE, "RADE" },
{ EXC_WADE, "WADE" },
#if !TFP
{ EXC_IBE, "IBE" },
{ EXC_DBE, "DBE" },
#endif
{ EXC_SYSCALL, "SYSCALL" },
{ EXC_BREAK, "BREAK" },
{ EXC_II, "II" },
{ EXC_CPU, "CPU" },
{ EXC_OV, "OV" },
{ EXC_TRAP, "TRAP" },
#if R4000
{ EXC_VCEI, "VCEI" },
{ EXC_FPE, "FPE" },
{ EXC_WATCH, "WATCH" },
{ EXC_VCED, "VCED" },
#endif
#if R10000
{ EXC_FPE, "FPE" },
#ifndef R4000
{ EXC_WATCH, "WATCH" },
#endif /* !R4000 */
#endif /* R10000 */
{ 0, NULL },
};
struct reg_desc cause_desc[] = {
/* mask shift name format values */
{ CAUSE_BD, 0, "BD", NULL, NULL },
{ CAUSE_CEMASK, -CAUSE_CESHIFT, "CE", "%d", NULL },
#if TFP
{ CAUSE_NMI, 0, "NMI", NULL, NULL },
{ CAUSE_BE, 0, "BE", NULL, NULL },
{ CAUSE_VCI, 0, "VCI/TLBX", NULL, NULL },
{ CAUSE_FPI, 0, "FPI", NULL, NULL },
{ CAUSE_IP11, 0, "IP11", NULL, NULL },
{ CAUSE_IP10, 0, "IP10", NULL, NULL },
{ CAUSE_IP9, 0, "IP9", NULL, NULL },
#endif
{ CAUSE_IP8, 0, "IP8", NULL, NULL },
{ CAUSE_IP7, 0, "IP7", NULL, NULL },
{ CAUSE_IP6, 0, "IP6", NULL, NULL },
{ CAUSE_IP5, 0, "IP5", NULL, NULL },
{ CAUSE_IP4, 0, "IP4", NULL, NULL },
{ CAUSE_IP3, 0, "IP3", NULL, NULL },
{ CAUSE_SW2, 0, "SW2", NULL, NULL },
{ CAUSE_SW1, 0, "SW1", NULL, NULL },
{ CAUSE_EXCMASK,0, "EXC", NULL, exc_values },
{ 0, 0, NULL, NULL, NULL },
};
#if !defined (TFP) && !defined (BEAST)
#if ((!defined(R10000)) || defined(R4000))
struct reg_desc cache_err_desc[] = {
/* mask shift name format values */
{ CACHERR_ER, 0, "ER", NULL, NULL },
{ CACHERR_EC, 0, "EC", NULL, NULL },
{ CACHERR_ED, 0, "ED", NULL, NULL },
{ CACHERR_ET, 0, "ET", NULL, NULL },
{ CACHERR_ES, 0, "ES", NULL, NULL },
{ CACHERR_EE, 0, "EE", NULL, NULL },
{ CACHERR_EB, 0, "EB", NULL, NULL },
{ CACHERR_EI, 0, "EI", NULL, NULL },
#if IP19
{ CACHERR_EW, 0, "EW", NULL, NULL },
#endif
{ CACHERR_SIDX_MASK, 0, "SIDX", "0x%x", NULL },
{ CACHERR_PIDX_MASK, CACHERR_PIDX_SHIFT, "PIDX", "0x%x", NULL },
{ 0, 0, NULL, NULL, NULL },
};
#endif /* ((!defined(R10000)) || defined(R4000)) */
#define SSTATE_INVALID 0
#define SSTATE_CLEX 4
#define SSTATE_DIRTEX 5
#define SSTATE_SHARED 6
#define SSTATE_DIRTSHAR 7
struct reg_values scache_states[] = {
#if R4000 && R10000
{ SSTATE_INVALID, "INVAL" },
{ SSTATE_CLEX, "CE" },
{ SSTATE_DIRTEX, "DE" },
{ SSTATE_SHARED, "shared" },
{ SSTATE_DIRTSHAR, "dirty-shared" },
{ 0, NULL },
};
struct reg_values r10k_scache_states[] = {
#endif /* R4000 && R10000 */
#ifdef R10000
{ SSTATE_INVALID, "INVAL" },
{ SSTATE_SHARED, "shared" },
{ SSTATE_CLEX, "CE" },
{ SSTATE_DIRTEX, "DE" },
#else
{ SSTATE_INVALID, "INVAL" },
{ SSTATE_CLEX, "CE" },
{ SSTATE_DIRTEX, "DE" },
{ SSTATE_SHARED, "shared" },
{ SSTATE_DIRTSHAR, "dirty-shared" },
#endif
{ 0, NULL },
};
#define PSTATE_INVALID 0
#define PSTATE_SHARED 1
#define PSTATE_CLEX 2
#define PSTATE_DIRTEX 3
struct reg_values pcache_states[] = {
{ PSTATE_INVALID, "INVAL" },
{ PSTATE_SHARED, "shared" },
{ PSTATE_CLEX, "CE" },
{ PSTATE_DIRTEX, "DE" },
{ 0, NULL },
};
#define STAG_LO 0xffffe000
#define STAG_STATE 0x00001c00
#define STAG_STATE_SHIFT -10
#define STAG_VINDEX 0x00000380
#define STAG_ECC SECC_MASK
#define STAG_VIND_SHIFT 5 /* taglo bits 31..13 << 35..17 */
#ifdef R10000
#define SECC_MASK 0x0000007f
#define SADDR_SHIFT 4
#endif
struct reg_desc s_taglo_desc[] = {
/* mask shift name format values */
{ STAG_LO, SADDR_SHIFT, "paddr","0x%x", NULL },
{ STAG_STATE, STAG_STATE_SHIFT, NULL, NULL, scache_states },
{ STAG_VINDEX, STAG_VIND_SHIFT, "vind", "0x%x", NULL },
{ STAG_ECC, 0, "ecc", "0x%x", NULL },
{ 0, 0, NULL, NULL, NULL },
};
#if R4000 && R10000
struct reg_desc r10k_s_taglo_desc[] = {
/* mask shift name format values */
{ STAG_LO, SADDR_SHIFT, "paddr","0x%x", NULL },
{ STAG_STATE, STAG_STATE_SHIFT, NULL, NULL, r10k_scache_states },
{ STAG_VINDEX, STAG_VIND_SHIFT, "vind", "0x%x", NULL },
{ STAG_ECC, 0, "ecc", "0x%x", NULL },
{ 0, 0, NULL, NULL, NULL },
};
#endif /* R4000 && R10000 */
#define PTAG_LO 0xffffff00
#define PTAG_STATE 0x000000c0
#define PTAG_STATE_SHIFT -6
#define PTAG_PARITY 0x00000001
#ifdef R10000
#define PTAG_WAY 0x00000002
#define PTAG_SP 0x00000004
#define PTAG_LRU 0x00000008
#define PADDR_SHIFT 4
#endif
struct reg_desc p_taglo_desc[] = {
/* mask shift name format values */
{ PTAG_LO, PADDR_SHIFT, "paddr","0x%x", NULL },
{ PTAG_STATE, PTAG_STATE_SHIFT,NULL, NULL, pcache_states },
#ifdef R10000
{ PTAG_LRU, 0, "LRU", NULL, NULL },
{ PTAG_SP, 2, "SP", "%d", NULL },
{ PTAG_WAY, 1, "WAY", "%d", NULL },
#endif
{ PTAG_PARITY, 0, "parity","%x", NULL },
{ 0, 0, NULL, NULL, NULL },
};
#if IP19
#undef PHYS_TO_K0
#undef K0_TO_PHYS
extern __psunsigned_t ecc_phys_to_k0( __psunsigned_t);
extern __psunsigned_t ecc_k0_to_phys( __psunsigned_t);
#define PHYS_TO_K0 ecc_phys_to_k0
#define K0_TO_PHYS ecc_k0_to_phys
/* The standard ECC_INTERRUPT macro makes cached references and
* we have ERL and DE set, so cache errors during these kernel
* routines would go un-reported.
*/
#define ECC_INTERRUPT
/* only routines called during ecc handling use the following macro, and
* they all execute at splhi with SR_DE set, so no locking is necessary */
#define MARK_FOR_CLEANUP ecc_info_param->needs_cleanup = 1
/* as handler exits, if cleanup is needed it raises an interrupt; else
* it decrements the w_index */
#define CLEANUP_IS_NEEDED (ecc_info_param->needs_cleanup)
#else /* !IP19 */
/* ecc_handler mustn't do anything that could cause exceptions (printing,
* for example) since we aren't on a stack that the exception code
* recognizes. It therefore raises a software interrupt that invokes
* ecc_cleanup() to do its dirty work. */
#define ECC_INTERRUPT timeout(ecc_cleanup, 0, TIMEPOKE_NOW); \
ecc_info.needs_cleanup = 0; \
call_cleanup = 1 \
/* only routines called during ecc handling use the following macro, and
* they all execute at splhi with SR_DE set, so no locking is necessary */
#define MARK_FOR_CLEANUP ecc_info.needs_cleanup = 1
/* as handler exits, if cleanup is needed it raises an interrupt; else
* it decrements the w_index */
#define CLEANUP_IS_NEEDED (ecc_info.needs_cleanup)
#endif
char *etstrings[] = {"OK", "DB", "CB", "2 Bit", "3 Bit", "4 Bit", "Fatal"};
eccdesc_t real_data_eccsyns[] = {
/* 0|8 1|9 2|A 3|B 4|C 5|D 6|E 7|F */
/* 0*/ {OK, 0},{CB, 0},{CB, 1},{B2, 0},{CB, 2},{B2, 0},{B2, 0},{DB, 7},
/* 8*/ {CB, 3},{B2, 0},{B2, 0},{DB,54},{B2, 0},{DB, 6},{DB,55},{B2, 0},
/*10*/ {CB, 4},{B2, 0},{B2, 0},{DB, 0},{B2, 0},{DB,20},{DB,48},{B2, 0},
/*18*/ {B2, 0},{DB,24},{DB,28},{B2, 0},{DB,16},{B2, 0},{B2, 0},{DB,52},
/*20*/ {CB, 5},{B2, 0},{B2, 0},{DB, 1},{B2, 0},{DB,21},{DB,49},{B2, 0},
/*28*/ {B2, 0},{DB,25},{DB,29},{B2, 0},{DB,17},{B2, 0},{B2, 0},{DB, 4},
/*30*/ {B2, 0},{DB,44},{DB,45},{B2, 0},{DB,46},{B2, 0},{B2, 0},{B3, 0},
/*38*/ {DB,47},{B2, 0},{B2, 0},{B3, 0},{B2, 0},{B3, 0},{B3, 0},{B2, 0},
/*40*/ {CB, 6},{B2, 0},{B2, 0},{DB, 2},{B2, 0},{DB,22},{DB,50},{B2, 0},
/*48*/ {B2, 0},{DB,26},{DB,30},{B2, 0},{DB,18},{B2, 0},{B2, 0},{DB,10},
/*50*/ {B2, 0},{DB,32},{DB,33},{B2, 0},{DB,34},{B2, 0},{B2, 0},{B3, 0},
/*58*/ {DB,35},{B2, 0},{B2, 0},{B3, 0},{B2, 0},{B3, 0},{B3, 0},{B2, 0},
/*60*/ {B2, 0},{DB,12},{DB,13},{B2, 0},{DB,14},{B2, 0},{B2, 0},{B3, 0},
/*68*/ {DB,15},{B2, 0},{B2, 0},{B3, 0},{B2, 0},{B3, 0},{B3, 0},{B2, 0},
/*70*/ {DB, 9},{B2, 0},{B2, 0},{B3, 0},{B2, 0},{B3, 0},{B3, 0},{B2, 0},
/*78*/ {B2, 0},{B3, 0},{B3, 0},{B2, 0},{B3, 0},{B2, 0},{B2, 0},{UN, 0},
/*80*/ {CB, 7},{B2, 0},{B2, 0},{DB, 3},{B2, 0},{DB,23},{DB,51},{B2, 0},
/*88*/ {B2, 0},{DB,27},{DB,31},{B2, 0},{DB,19},{B2, 0},{B2, 0},{DB,58},
/*90*/ {B2, 0},{DB,36},{DB,37},{B2, 0},{DB,38},{B2, 0},{B2, 0},{B3, 0},
/*98*/ {DB,39},{B2, 0},{B2, 0},{B3, 0},{B2, 0},{B3, 0},{B3, 0},{B2, 0},
/*a0*/ {B2, 0},{DB,40},{DB,41},{B2, 0},{DB,42},{B2, 0},{B2, 0},{B3, 0},
/*a8*/ {DB,43},{B2, 0},{B2, 0},{B3, 0},{B2, 0},{B3, 0},{B3, 0},{B2, 0},
/*b0*/ {DB,56},{B2, 0},{B2, 0},{B3, 0},{B2, 0},{B3, 0},{B3, 0},{B2, 0},
/*b8*/ {B2, 0},{B3, 0},{B3, 0},{B2, 0},{B3, 0},{B2, 0},{B2, 0},{UN, 0},
/*c0*/ {B2, 0},{DB,60},{DB,61},{B2, 0},{DB,62},{B2, 0},{B2, 0},{B3, 0},
/*c8*/ {DB,63},{B2, 0},{B2, 0},{B3, 0},{B2, 0},{B3, 0},{B3, 0},{B2, 0},
/*d0*/ {DB, 8},{B2, 0},{B2, 0},{B3, 0},{B2, 0},{B3, 0},{B3, 0},{B2, 0},
/*d8*/ {B2, 0},{B3, 0},{B3, 0},{B2, 0},{B3, 0},{B2, 0},{B2, 0},{UN, 0},
/*e8*/ {DB,57},{B2, 0},{B2, 0},{B3, 0},{B2, 0},{B3, 0},{B3, 0},{B2, 0},
/*e8*/ {B2, 0},{B3, 0},{B3, 0},{B2, 0},{B3, 0},{B2, 0},{B2, 0},{UN, 0},
/*f8*/ {B2, 0},{DB, 5},{DB,53},{B2, 0},{DB,59},{B2, 0},{B2, 0},{UN, 0},
/*f8*/ {DB,11},{B2, 0},{B2, 0},{UN, 0},{B2, 0},{UN, 0},{UN, 0},{B2, 0},
};
eccdesc_t real_tag_eccsyns[] = {
/* 0|8 1|9 2|A 3|B 4|C 5|D 6|E 7|F */
/* 0 */ {OK, 0},{CB, 0},{CB, 1},{B2, 0},{CB, 2},{B2, 0},{B2, 0},{DB, 0},
/* 8 */ {CB, 3},{B2, 0},{B2, 0},{DB,16},{B2, 0},{DB, 4},{DB, 5},{B2, 0},
/*10*/ {CB, 4},{B2, 0},{B2, 0},{DB,22},{B2, 0},{DB,17},{DB, 1},{B2, 0},
#ifdef R4000
/*18*/ {B2, 0},{UN, 0},{UN, 0},{B2, 0},{DB, 6},{B2, 0},{B2, 0},{B3, 0},
#else
/*18*/ {B2, 0},{UN, 0},{DB,25},{B2, 0},{DB, 6},{B2, 0},{B2, 0},{B3, 0},
#endif /* R4000 */
/*20*/ {CB, 5},{B2, 0},{B2, 0},{DB,18},{B2, 0},{DB,24},{DB, 2},{B2, 0},
/*28*/ {B2, 0},{DB,20},{UN, 0},{B2, 0},{UN, 0},{B2, 0},{B2, 0},{B3, 0},
/*30*/ {B2, 0},{DB, 8},{DB, 9},{B2, 0},{UN, 0},{B2, 0},{B2, 0},{B3, 0},
/*38*/ {DB,10},{B2, 0},{B2, 0},{B3, 0},{B2, 0},{B3, 0},{B3, 0},{B2, 0},
/*40*/ {CB, 6},{B2, 0},{B2, 0},{UN, 0},{B2, 0},{DB,19},{DB, 3},{B2, 0},
/*48*/ {B2, 0},{DB,23},{UN, 0},{B2, 0},{DB, 7},{B2, 0},{B2, 0},{B3, 0},
/*50*/ {B2, 0},{DB,21},{UN, 0},{B2, 0},{UN, 0},{B2, 0},{B2, 0},{B3, 0},
/*58*/ {UN, 0},{B2, 0},{B2, 0},{B3, 0},{B2, 0},{B3, 0},{B3, 0},{B2, 0},
/*60*/ {B2, 0},{DB,12},{DB,13},{B2, 0},{DB,14},{B2, 0},{B2, 0},{B3, 0},
/*68*/ {DB,15},{B2, 0},{B2, 0},{B3, 0},{B2, 0},{B3, 0},{B3, 0},{B2, 0},
/*70*/ {DB,11},{B3, 0},{B2, 0},{B3, 0},{B2, 0},{B3, 0},{B3, 0},{B2, 0},
/*78*/ {B3, 0},{B3, 0},{B3, 0},{B2, 0},{B3, 0},{B2, 0},{B2, 0},{B3, 0},
};
#ifdef IP19
/* really need to access all data uncached while processing a cache error
* exception in order to not perturb the state of the cache.
*/
#define data_eccsyns ecc_info_param->ecc_data_eccsyns
#define tag_eccsyns ecc_info_param->ecc_tag_eccsyns
#else /* !IP19 */
#define data_eccsyns real_data_eccsyns
#define tag_eccsyns real_tag_eccsyns
#endif /* !IP19 */
#ifdef R4000
/* calc_err_info() computes the checkbits for the incoming value(s)
* (two data uints if data, one uint (STagLo) if tag. It then derives
* the syndrome and uses it to fetch the eccdesc entry from the proper
* table. The following #defines and structures allow it to determine
* which ecc to compute, and to return the info to the calling routine.
*/
#define DATA_CBITS 1
#define TAG_CBITS 2
/* Both data and tag ecc submit checkbits and receive computed checkbits,
* the resulting syndrome, and syn_info. Data ecc, however, is computed
* from the values of two uints; tag ecc needs a portion of the STagLo
* register.
*/
typedef struct error_info {
unchar cbits_in;
unchar cbits_out;
unchar syndrome;
eccdesc_t syn_info;
union {
struct {
uint d_lo;
uint d_hi;
} data_in;
struct {
uint s_tlo;
} tag_in;
} ecc_type_t;
} error_info_t;
#define eidata_lo ecc_type_t.data_in.d_lo
#define eidata_hi ecc_type_t.data_in.d_hi
#define eis_taglo ecc_type_t.tag_in.s_tlo
/* Bits in the CacheErr reg tell the handler where the ECC error occurred.
* The sbd.h CACH_XX defines, plus the following SYSAD describe location;
* ecc_t_or_d tells whether the error was in the data field, the tag, or
* both. (The primary caches have separate parity bits for data & tags;
* the 2ndary has separate ecc checkbits for each, 7-bit for tags, 8 for data).
*/
#define SYSAD (CACH_SD + 1)
#define BAD_LOC (SYSAD + 1)
static char *error_loc_names[] = {
"primary i-cache", /* CACH_PI */
"primary d-cache", /* CACH_PD */
"secondary i-cache", /* CACH_SI */
"secondary d-cache", /* CACH_SD */
"CPU SysAD bus", /* SYSAD */
"<bad loc>", /* invalid */
};
enum ecc_t_or_d { DATA_ERR = 1, TAG_ERR, D_AND_T_ERR };
#define BYTESPERWD (sizeof(int))
#define BYTESPERDBLWD (2 * BYTESPERWD)
#define BYTESPERQUADWD (4 * BYTESPERWD)
#define NUM_TAGS 2
#define TAGLO_IDX 0 /* load & store cachops: lo == [0], hi [1] */
#define TAGHI_IDX 1 /* not used on IP17 (taghi must be zero!) */
/* one error mandates that the data caches be flushed (not
* just lines that 'hit'. Since we aren't trying to match
* any particular virtual address, pick an arbitrary address
* that maps to the beginning of the secondary cache. */
#define FOUR_MEG (0x400000l)
#define FLUSH_ADDR FOUR_MEG
/* the taglo register has different formats depending on whether the
* tag info is from a primary or secondary tag. The following macros
* return the state of the cacheline: clean or dirty, which are the
* only valid choices on the IP17. */
#define CLEAN_P_TAG(p_tlo) ((p_tlo & PSTATEMASK) == PCLEANEXCL)
#define CLEAN_S_TAG(s_tlo) ((s_tlo & SSTATEMASK) == SCLEANEXCL)
#define DIRTY_P_TAG(p_tlo) ((p_tlo & PSTATEMASK) == PDIRTYEXCL)
#define DIRTY_S_TAG(s_tlo) ((s_tlo & SSTATEMASK) == SDIRTYEXCL)
/* In order to allow more than one ECC exception to be handled before
* the cleanup-interrupt invokes ecc_cleanup(), define a structure
* that contains all info relevant to each ecc exception. An array
* of these allows multiple exceptions. Use two pointers, a 'writing'
* pointer for the handler to write the frames, and a 'reading'
* pointer for ecc_cleanup and ecc_panic to display the frames.
* Implement them as circular buffers.
*/
#ifdef IP19
#define ECC_FRAMES 64
#else
#define ECC_FRAMES 10
#endif
/* #define ECC_DEBUG */
/* Keep a tally of the ECC errors in each part (tag or data) of each
* cache. Since any errors in either of the primary caches means the
* entire R4K must be discarded, we don't track primary errors by address:
* frequency of occurrence is sufficiently detailed. Cache Errors
* may be in data or tag, SysAD errors are in data only (tag ecc
* is computed when the data is put into the cache lines). NO_ERROR
* keeps a count of the number of times the handler found no error
* in the indicated spot. This is the 'err_cnts' field in ecc_info.
* the #define of ECC_ERR_TYPES and the ecc_err_types enum are declared
* in IP17.h to allow cmd/ecc to determine the array size needed when
* doing an SGI_R4K_CERRS syscall for cache-error tallies. */
/* +++++++++++++ sys/IP17.h +++++++++++++++ */
/*
#define ECC_ERR_TYPES 8
enum ecc_err_types { PI_DERRS = 0, PI_TERRS, PD_DERRS, PD_TERRS,
SC_DERRS, SC_TERRS, SYSAD_ERRS, NO_ERROR };
*/
/* +++++++++++++ sys/IP17.h +++++++++++++++ */
static char *err_type_names[] = {
"pi-d","pi-t","pd-d","pd-t",
"sc-d","sc-t", "sysad", "noerr" };
#define ECC_ALL_MSGS -1
#define ECC_PANIC_MSG 0
#define ECC_INFO_MSG 1
#define ECC_ERROR_MSG 2
volatile char panic_str[] = "PANIC MSG: ";
volatile char info_str[] = "INFO MSG: ";
volatile char error_str[] = "ERROR MSG: ";
volatile char *msg_strs[] = { panic_str, info_str, error_str };
/* each ecc_handler invokation saves lots of info: */
typedef struct err_desc {
volatile k_machreg_t e_sr;
volatile uint e_cache_err;
volatile k_machreg_t e_error_epc;
volatile int e_location; /* CACH_{ PI, PD, SI, SD } or SYSAD */
volatile uint e_tag_or_data; /* DATA_ERR, TAG_ERR, or D_AND_T_ERR */
volatile __uint64_t e_paddr; /* entire physical addr of error (16 GB)*/
volatile k_machreg_t e_vaddr; /* p-cache virtual addr of error */
volatile uint e_s_taglo;
volatile uint e_p_taglo;
volatile uint e_badecc;
volatile uint e_lo_badval;
volatile uint e_hi_badval;
volatile uint e_syndrome;
volatile uint e_2nd_syn;
volatile uint e_syn_info;
volatile uint e_user;
volatile uint e_prevbadecc;
volatile pid_t e_pid;
volatile cpuid_t e_cpuid;
volatile uint e_sbe_dblwrds; /* bit mask of double-words with SBE */
volatile uint e_mbe_dblwrds; /* bit mask of double-words with DBE */
#ifdef _MEM_PARITY_WAR
volatile eframe_t *e_eframep;
volatile k_machreg_t *e_eccframep;
#endif /* _MEM_PARITY_WAR */
volatile uchar_t e_flags;
} err_desc_t;
/* definitions for e_flags */
#define E_PADDR_VALID 1 /* we are certain of the physical address */
#define E_VADDR_VALID 2 /* we are certain of pidx */
#define E_PADDR_MC 4 /* bad address reported by MC */
#define E_PADDR_GIO 8 /* bad address reported by HPC3 */
typedef struct ecc_info {
#ifdef IP19
/* rest of data is referenced uncached so need to be sure no cached data
* is in same cacheline.
*/
char cacheline_pad1[128];
#endif /* IP19 */
#ifdef ECC_TEST_EW_BIT
/* The following variable will be set to "1" when the ecc_handler has
* reached an "interesting place" and where it will wait for the "master
* cpu" to perform a cached access to the error location.
*/
int ecc_wait_for_external;
/* following fields setup by cpu2 which accesses the second bad line in
* cpu1's cache.
*/
int ecc_err2_datahi;
int ecc_err2_datalo;
int ecc_err2_cpuid;
/* cpu1 sets up the address of the second error */
int *ecc_err2_ptr;
/* cpu1 logs its' cacheErr register value after the second error has been
* accessed by cpu2.
*/
int ecc_cpu1_cacheerr2;
#endif
volatile int ecc_w_index; /* writing index (used by ecc_handler) */
volatile int ecc_r_index; /* reading index (ecc_cleanup &ecc_panic) */
volatile uint needs_cleanup;
volatile uint cleanup_cnt;
volatile uint ecc_flags;
#ifndef _MEM_PARITY_WAR
volatile k_machreg_t eframep;
volatile k_machreg_t eccframep;
#endif /* _MEM_PARITY_WAR */
volatile uint ecc_err_cnts[ECC_ERR_TYPES];
volatile err_desc_t desc[ECC_FRAMES];
#ifndef IP19
volatile char *ecc_panic_msg[ECC_FRAMES];
volatile char *ecc_info_msg[ECC_FRAMES];
volatile char *ecc_error_msg[ECC_FRAMES];
#else /* IP19 */
volatile int ecc_info_inited;
volatile int ecc_inval_eloc_where;
volatile int ecc_panic;
volatile int ecc_panic_cpuid; /* cpuid of panicing cpu */
volatile int ecc_panic_newmaster;
volatile int ecc_panic_recoverable;
volatile char ecc_panic_msg[ECC_FRAMES];
volatile char ecc_info_msg[ECC_FRAMES];
volatile char ecc_error_msg[ECC_FRAMES];
/* ecc_entry_state indicates current state of the ECC_FRAME entry:
* 0 == unused
* 1 == ecc_handler is currently active on entry
* 2 == ecc_handler has completed entry, awaiting ecc_cleanup
*/
volatile uint ecc_entry_state[ECC_FRAMES];
/* Following set of virtual addresses will be used by the kernel during
* ECC error processing in order to access the data at the point of the
* error without causing a VCE exception.
*/
__psunsigned_t ecc_vcecolor;
/* Following location will hold copy of EVERROR_EXT so it can be picked
* up by the ecc_handler wihtout the compiler generating loads to cached
* global address space.
*/
everror_ext_t *everror_ext;
/* global data items which need to be references uncached */
uint *ecc_tag_dbpos; /* avoid cached or gp-rel reference */
struct d_emask *ecc_d_ptrees;
struct t_emask *ecc_t_ptrees;
eccdesc_t *ecc_data_eccsyns;
eccdesc_t *ecc_tag_eccsyns;
__psunsigned_t ecc_dummyline;
__psunsigned_t ecc_k0size_less1;
pfn_t ecc_physmem;
int ecc_picache_size;
int ecc_pdcache_size;
int ecc_attempt_recovery;
/* rest of data is referenced uncached so need to be sure no cached data
* is in same cacheline.
*/
char cacheline_pad2[128];
#endif /* IP19 */
} ecc_info_t;
#ifdef IP19
/* Can't load from PDA since that would be a cached access.
* Most usage of SCACHE_PADDR is passed to "indexed load" routines
* which will automatically size to the secondary cache in HW>
*/
#define SCACHE_PADDR(edp) (edp->e_paddr)
#else
#define SCACHE_PADDR(edp) (edp->e_paddr & (private.p_scachesize-1))
#endif
#define POFFSET_PADDR(edp) (edp->e_paddr & ~(NBPP-1))
#ifdef _MEM_PARITY_WAR
#define ecc_info (*((volatile ecc_info_t *) CACHE_ERR_ECCINFO_P))
#define ecc_info_ptr ecc_info
#define ECC_INFO(a) ecc_info.a
#else /* _MEM_PARITY_WAR */
#ifdef IP19
volatile ecc_info_t real_ecc_info;
/* the following macro should NOT be used when in ecc_handler since compiler
* generates "gp" relative constants to perform this conversion and loading
* these constants results in cached accesses.
*/
#define ecc_info_ptr (*(volatile ecc_info_t*)(K0_TO_K1(&real_ecc_info)))
#define ECC_INFO(a) ecc_info_param->a
/* dummy cacheline is 3 cachelines long and we use the middle to
* guarentee it's not on the same line as any other cached data.
*/
static long long dummy_cacheline[48];
#else /* !IP19 */
volatile ecc_info_t ecc_info;
#define ecc_info_ptr ecc_info
#define ECC_INFO(a) ecc_info.a
#endif /* !IP19 */
#endif /* _MEM_PARITY_WAR */
#ifndef IP19
volatile int ecc_info_initialized = 0;
#endif
volatile int call_cleanup = 0;
volatile int in_cleanup = 0;
#if DEBUG_ECC
volatile uint f_ptaglo;
volatile uint f_staglo;
volatile uint f_loval;
volatile uint f_hival;
volatile __psunsigned_t f_p_caddr;
volatile __psunsigned_t f_s_caddr;
volatile uint f_cooked_ecc;
volatile uint f_d_ecc;
volatile uint f_ptaglo1;
volatile uint f_staglo1;
#endif /* DEBUG_ECC */
/* when calling print_ecc_info from symmon must use qprintf to avoid
* scrogging the kernel buffers. When non-zero, this global directs
* all display routines to use qprintf, else printf */
volatile int pm_use_qprintf = 0;
typedef void (*pfunc)(char *, ...);
extern void qprintf(char *, ...);
#define K_ECC_PANIC 0x1
#define HANDLER_OVERRAN 0x2
#ifdef IP19
extern int ecc_check_cache( __psunsigned_t );
#else /* !IP19 */
#ifdef _MEM_PARITY_WAR
extern int log_perr(uint addr, uint bytes, int no_console, int print_help);
extern int ecc_find_pidx(int, paddr_t);
volatile char **msg_addrs[] = { (volatile char **)NULL,
(volatile char **)NULL,
(volatile char **)NULL };
#else /* _MEM_PARITY_WAR */
volatile char **msg_addrs[] = { (volatile char **)&ecc_info.ecc_panic_msg[0],
(volatile char **)&ecc_info.ecc_info_msg[0],
(volatile char **)&ecc_info.ecc_error_msg[0] };
#endif /* _MEM_PARITY_WAR */
#endif /* !IP19 */
#define NEXT_INDEX(x) if (x+1 >= ECC_FRAMES) \
x = 0; \
else \
x += 1
#define PREV_INDEX(x) if (x-1 < 0) \
x = (ECC_FRAMES-1); \
else \
x -= 1
#if MP
#define PRINT_CPUID(id) cmn_err(CE_CONT, "CPU %d: ", id)
#else
#define PRINT_CPUID(id)
#endif
/* ecc handling prototypes */
static int print_ecctype(int, int, uint, __uint64_t, int, uint);
#if IP19
int real_calc_err_info(int, error_info_t *, volatile ecc_info_t *);
static int real_ecc_print_msg(int, uint, int, int, uint, volatile ecc_info_t *);
static int real_ecc_assign_msg(int, int, char, volatile ecc_info_t *);
static int real_ecc_fixmem(uint, eframe_t *, k_machreg_t *, uint, k_machreg_t,
volatile ecc_info_t *);
static int real_ecc_fixcache(uint, eframe_t *, k_machreg_t *, uint,
k_machreg_t, volatile ecc_info_t *);
int real_ecc_fixctag(uint, int, volatile ecc_info_t *);
int real_ecc_fixcdata(uint, int, k_machreg_t *, volatile ecc_info_t *);
static int real_ecc_log_error(int, int, volatile ecc_info_t *);
int real_xlate_bit(enum error_type, uint, volatile ecc_info_t *);
#define ecc_print_msg(a0,a1,a2,a3,a4) real_ecc_print_msg(a0,a1,a2,a3,a4,ecc_info_param)
#define ecc_log_error(a0,a1) real_ecc_log_error(a0,a1,ecc_info_param)
#define ecc_assign_msg(a0,a1,a2) real_ecc_assign_msg(a0,a1,a2,ecc_info_param)
#define ecc_fixmem(a0,a1,a2,a3,a4) real_ecc_fixmem(a0,a1,a2,a3,a4,ecc_info_param)
#define ecc_fixcache(a0,a1,a2,a3,a4) real_ecc_fixcache(a0,a1,a2,a3,a4,ecc_info_param)
#define ecc_fixctag(a0,a1) real_ecc_fixctag(a0,a1,ecc_info_param)
#define ecc_fixcdata(a0,a1,a2) real_ecc_fixcdata(a0,a1,a2,ecc_info_param)
#define xlate_bit(a0,a1) real_xlate_bit(a0,a1,ecc_info_param)
#define calc_err_info(a0,a1) real_calc_err_info(a0,a1,ecc_info_param)
#else /* !IP19 */
int calc_err_info(int, error_info_t *);
static int ecc_print_msg(int, uint, int, int, uint);
static int ecc_assign_msg(int, int, char *);
#ifndef MCCHIP
static int ecc_fixmem(uint, eframe_t *, k_machreg_t *, uint, k_machreg_t);
#endif /* ! MCCHIP */
static int ecc_fixcache(uint, eframe_t *, k_machreg_t *, uint, k_machreg_t);
int ecc_fixctag(uint, int);
int ecc_fixcdata(uint, int, k_machreg_t *);
static int ecc_log_error(int, int);
int xlate_bit(enum error_type, uint);
#endif /* !IP19 */
static int ecc_bad_ptag(uint);
int _c_hwbinv(int, __psunsigned_t);
int _c_hinv(int, __psunsigned_t);
int _c_ilt_n_ecc(int, __psunsigned_t, uint[], uint *);
int _c_ilt(int, __psunsigned_t, uint[]);
int _c_ist(int, __psunsigned_t, uint[]);
int _munge_decc(__psunsigned_t, uint);
#ifndef SCACHE_LINESIZE
#define SCACHE_LINESIZE (32*4)
#endif
#ifdef IP19
static char real_ecc_overrun_msg[] = "ECC error overrun!";
static char real_ecc_eb_not_i[] = "ecc_handler: EB bit set but error not i-cache";
static char real_ecc_incons_err[] = "ECC error not SysAD or either cache!";
static char real_ecc_ew_err[] = "double ECC error, incomplete information!";
static char
real_ecc_kernel_err[] = "Uncorrectable HARDWARE ECC error, in kernel mode";
static char
real_ecc_user_err[] = "Uncorrectable HARDWARE ECC error, in user mode";
static char real_ecc_inval_loc[] = "Invalid 'location' parameter in fixcache";
static char real_ecc_no_ptagerr[] = "No ecc tag error found in primary cacheline";
static char real_ecc_no_stagerr[] = "No ecc tag error found in secondary cacheline";
static char real_ecc_ptfix_failed[] = "ECC repair on primary tag unsuccessful";
static char real_ecc_stfix_failed[] = "ECC repair on secondary tag unsuccessful";
static char real_ecc_no_pdataerr[] = "No ecc data error found in primary cacheline";
static char real_ecc_no_sdataerr[]= "No ecc data error found in secondary cacheline";
static char real_ecc_sinvalid_noerr[]= "Secondary cacheline invalid, OK on re-read";
static char real_ecc_sinvalid_err[]= "Secondary cacheline invalid, ERROR on re-read";
static char real_ecc_sdcfix_failed[]="Data repair on clean secondary cache-line failed";
static char real_ecc_sdcfix_good[]="Data repair on clean 2nd cache-line SUCCESSFUL";
static char real_ecc_sddfix_failed[]="Data repair on dirty secondary cache-line failed";
static char real_ecc_sddfix_good[]="Data repair on dirty 2nd cache-line SUCCESSFUL";
static char real_ecc_md_sddfix_failed[]="Multi-bit data fix on dirty S-line failed";
static char real_ecc_p_data_err[] = "Data parity error in primary cache";
static char real_ecc_inval_eloc[] = "ecc_log_error: bad error location";
static char real_ecc_bad_s_tag[] = "Uncorrectable error in secondary cache tag";
static char real_ecc_ft_hinv_m_sc[] = "fixtag: _c_hinv missed cache";
static char real_ecc_scerr_too_early[] = "Scache error before recovery is possible";
static char real_ecc_ei_notdirty[] = "Scache error on store-miss but line not dirty";
static char real_ecc_mixed_psize[] = "ecc_handler expects primary linesizes equal";
static char real_ecc_ei_norecover[] = "Scache error on store-miss, recovery not possible ";
static char real_ecc_possible_ei[] = "cache test failed, possible store-miss?";
#define ecc_overrun_msg 1
#define ecc_eb_not_i 2
#define ecc_incons_err 3
#define ecc_ew_err 4
#define ecc_kernel_err 5
#define ecc_user_err 6
#define ecc_inval_loc 7
#define ecc_no_ptagerr 8
#define ecc_no_stagerr 9
#define ecc_ptfix_failed 10
#define ecc_stfix_failed 11
#define ecc_no_pdataerr 12
#define ecc_no_sdataerr 13
#define ecc_sinvalid_noerr 14
#define ecc_sinvalid_err 15
#define ecc_sdcfix_failed 16
#define ecc_sdcfix_good 17
#define ecc_sddfix_failed 18
#define ecc_sddfix_good 19
#define ecc_md_sddfix_failed 20
#define ecc_p_data_err 21
#define ecc_inval_eloc 22
#define ecc_bad_s_tag 23
#define ecc_ft_hinv_m_sc 24
#define ecc_scerr_too_early 25
#define ecc_ei_notdirty 26
#define ecc_mixed_psize 27
#define ecc_ei_norecover 28
#define ecc_possible_ei 29
#else /* !IP19 */
static char ecc_overrun_msg[] = "ECC error overrun!";
#if !MCCHIP && !IP32
static char ecc_eb_not_i[] = "ecc_handler: EB bit set but error not i-cache";
#endif
static char ecc_incons_err[] = "ECC error not SysAD or either cache!";
static char
ecc_kernel_err[] = "Uncorrectable HARDWARE ECC error, in kernel mode";
static char
ecc_user_err[] = "Uncorrectable HARDWARE ECC error, in user mode";
static char ecc_inval_loc[] = "Invalid 'location' parameter in fixcache";
static char ecc_no_ptagerr[] = "No ecc tag error found in primary cacheline";
static char ecc_no_stagerr[] = "No ecc tag error found in secondary cacheline";
static char ecc_ptfix_failed[] = "ECC repair on primary tag unsuccessful";
static char ecc_stfix_failed[] = "ECC repair on secondary tag unsuccessful";
static char ecc_no_pdataerr[] = "No ecc data error found in primary cacheline";
static char ecc_no_sdataerr[]= "No ecc data error found in secondary cacheline";
static char ecc_sinvalid_noerr[]= "Secondary cacheline invalid, OK on re-read";
static char ecc_sinvalid_err[]= "Secondary cacheline invalid, ERROR on re-read";
static char ecc_sdcfix_failed[]="Data repair on clean secondary cache-line failed";
static char ecc_sdcfix_good[]="Data repair on clean 2nd cache-line SUCCESSFUL";
static char ecc_sddfix_failed[]="Data repair on dirty secondary cache-line failed";
static char ecc_sddfix_good[]="Data repair on dirty 2nd cache-line SUCCESSFUL";
static char ecc_md_sddfix_failed[]="Multi-bit data fix on dirty S-line failed";
static char ecc_p_data_err[] = "Data parity error in primary cache";
static char ecc_inval_eloc[] = "ecc_log_error: bad error location";
static char ecc_bad_s_tag[] = "Uncorrectable error in secondary cache tag";
static char ecc_ft_hinv_m_sc[] = "fixtag: _c_hinv missed cache";
#if IP20 || IP22 || IP32 || IPMHSIM
static char ecc_extreq[] = "ecc_handler: ECC error result of external request";
#endif
#endif /* !IP19 */
#ifdef IP19_CACHEERRS_FATAL
volatile int verbose_ecc = 1; /* get lots of info on the (single) error */
#else
#ifdef IP19
volatile int verbose_ecc = 1; /* for now, get lots of info on correction too */
#else /* !IP19 */
volatile int verbose_ecc = 0;
#endif /* !IP19 */
#endif
volatile int syslog_ecctype = 1;
#if IP19
extern real_ecc_panic(void);
/* This routine is invoked from doacvec() when a cpu is checking for
* cpuvactions and it finds no pending actions.
* This solves the problem of a cpu which is trying to panic due to an ecc
* error in the cache. We know that continuing to execute on that cpu causes
* problems in some circumstances, so we attempt to change processors for
* the actual panic.
*
* Now, if the original failing cpu is the "master" cpu (which normally
* checks for ecc_cleanup() calls and panics, then we assign a new "master"
* and send it a cpuaction interrupt with NO pending actions (since we
* don't want a cpu with a bad cache to start fetching lines from another
* cpu). We don't want the other cpus to always perform this check since
* it involves an uncached access which is very slow.
*/
void
doacvec_check_ecc_logs(void)
{
if (ecc_info_ptr.ecc_panic == 1)
real_ecc_panic( );
}
/* This routine is invoked from ducons_write() in order to determine if the
* master cpu has moved. If it has, it returns the new master cpuid.
* Otherwise ducons_write will run the system out of cpuaction blocks trying
* to send console actions to cpu 0.
*/
int
ecc_panic_newmaster(void)
{
if (ecc_info_ptr.ecc_panic_newmaster)
return(ecc_info_ptr.ecc_panic_newmaster - 1);
return(0);
}
/* Following routine primarily used by do_mprboot() in order to determine
* that a cpu has died due to an ecc_panic. This is needed to avoid delaying
* the system restart waiting for all cpus to enter do_mprboot() since the
* cpu which died with an ecc error will never enter that routine since we
* attempt to keep it busy "idling" so it does no further damage.
*
* NOTE: only returns an indication that we're in ecc_panic. In the future
* it might be a good idea to return the number of cpus which have invoked
* ecc_panic in case we have several simulataneous failures (maybe due to
* bad memory).
*/
int
ecc_panic_deadcpus(void)
{
if (ecc_info_ptr.ecc_panic)
return(1);
else
return(0);
}
/* this routine is invoked from system clock processing to check
* if we need to perform ecc cleanup. This avoids having the ecc_handler
* making cached references while ERL and DE are set.
*/
void
ecc_interrupt_check(void)
{
if (ecc_info_ptr.needs_cleanup) {
extern real_ecc_panic(void);
if (ecc_info_ptr.ecc_panic)
real_ecc_panic( );
ecc_info_ptr.needs_cleanup = 0;
timeout(ecc_cleanup, 0, TIMEPOKE_NOW);
call_cleanup = 1;
}
#ifdef ECC_TEST_EW_BIT
/* This code assumes that certain locations are useable. Should be
* generalized.
*/
if (ecc_info_ptr.ecc_wait_for_external == 1) {
ecc_info_ptr.ecc_err2_datahi =
*ecc_info_ptr.ecc_err2_ptr;
ecc_info_ptr.ecc_err2_datalo =
*(ecc_info_ptr.ecc_err2_ptr+1);
ecc_info_ptr.ecc_err2_cpuid = cpuid();
ecc_info_ptr.ecc_wait_for_external = 2;
}
#endif /* ECC_TEST_EW_BIT */
}
#endif /* IP19 */
/* called from os/clock.c:timein, which is invoked due to a software
* interrupt (see #define of ECC_INTERRUPT), ecc_cleanup does all the
* work that ecc_handler can't finish because it is a) executing with
* ecc exceptions and interrupts disabled, and b) on an isolated
* stack which won't work with nested exceptions such as K2 tlbfaults.
* These cleanup actions are primarily printing and more detailed
* logging of errors. */
void
ecc_cleanup(void)
{
int index;
uint ospl;
err_desc_t *edp; /* ptr to set of variables to set this time */
int i;
#if IP19
volatile ecc_info_t *ecc_info_param;
ecc_info_param = (volatile ecc_info_t *)(K0_TO_K1(&real_ecc_info));
/* Only let one cpu at a time enter this code */
if (atomicSetInt((int *)&in_cleanup, 1))
return;
#endif
while (1) {
ospl = splecc(); /* lock during index incr and test */
#ifdef IP19
/* We check to see if the ecc_handler has finished updating the
* entry we're about to read (it updates the ecc_w_index before
* the entry is complete)
*/
index = ecc_info_ptr.ecc_r_index;
NEXT_INDEX(index);
if ((ecc_info_ptr.ecc_r_index == ecc_info_ptr.ecc_w_index) ||
(ecc_info_ptr.ecc_entry_state[index] != 2)) {
if (ecc_info_ptr.ecc_entry_state[index] != 0)
ecc_info_ptr.needs_cleanup = 1; /* try again later */
in_cleanup = 0;
/* if the handler hasn't bumped the w_index, call_cleanup
* should still be 0 from the last time through the while */
ASSERT(!call_cleanup);
splxecc(ospl);
return;
} else {
call_cleanup = 0;
}
#else /* !IP19 */
if (ecc_info_ptr.ecc_r_index == ecc_info_ptr.ecc_w_index) {
in_cleanup = 0;
/* if the handler hasn't bumped the w_index, call_cleanup
* should still be 0 from the last time through the while */
ASSERT(!call_cleanup);
splxecc(ospl);
return;
} else {
in_cleanup = 1;
call_cleanup = 0;
}
#endif /* !IP19 */
/* cleanup uses the (trailing) read-index */
NEXT_INDEX(ecc_info_ptr.ecc_r_index);
index = ecc_info_ptr.ecc_r_index;
ecc_info_ptr.cleanup_cnt++;
splxecc(ospl);
/* point edb to set of variables to use */
edp = (err_desc_t *)&(ecc_info_ptr.desc[index]);
if (verbose_ecc || edp->e_user) {
PRINT_CPUID(edp->e_cpuid);
cmn_err(CE_CONT," ecc_cleanup: %d times (r_index %d w_index %d)\n",
ecc_info_ptr.cleanup_cnt,
ecc_info_ptr.ecc_r_index, ecc_info_ptr.ecc_w_index);
}
/* always display error msgs for SYSLOG */
ecc_print_msg(ECC_ERROR_MSG,index,1,1,edp->e_cpuid);
#ifdef IP19
if (ecc_info_param->ecc_attempt_recovery)
cmn_err(CE_WARN,"Data may have been corrupted by scache error\n");
#endif /* IP19 */
if (syslog_ecctype)
print_ecctype(edp->e_location, edp->e_tag_or_data,
edp->e_syndrome, edp->e_paddr, 1, edp->e_cpuid);
if (edp->e_user || verbose_ecc) {
for (i = ECC_INFO_MSG; i >= ECC_PANIC_MSG; i--)
ecc_print_msg(i,index,1,1,edp->e_cpuid);
PRINT_CPUID(edp->e_cpuid);
cmn_err(CE_CONT," c_err %R, err_epc 0x%x\n",
edp->e_cache_err, cache_err_desc,
edp->e_error_epc);
if (edp->e_user || verbose_ecc) {
PRINT_CPUID(edp->e_cpuid);
cmn_err(CE_CONT," s_taglo %R%secc 0x%x e_pc 0x%x\n",
edp->e_s_taglo,
#if R4000 && R10000
IS_R10000() ? r10k_s_taglo_desc :
#endif /* R4000 && R10000 */
s_taglo_desc,
(edp->e_s_taglo ? "\n " : " "),
edp->e_badecc, edp->e_error_epc);
PRINT_CPUID(edp->e_cpuid);
cmn_err(CE_CONT," data_lo 0x%x data_hi 0x%x sbe dblwrds 0x%x mbe dblwrds 0x%x\n",
edp->e_lo_badval, edp->e_hi_badval,
edp->e_sbe_dblwrds, edp->e_mbe_dblwrds);
PRINT_CPUID(edp->e_cpuid);
cmn_err(CE_CONT,
" Err SR %R%spaddr 0x%llx, vaddr 0x%x\n",
edp->e_sr,
#if R4000 && R10000
IS_R10000() ? r10k_sr_desc :
#endif /* R4000 && R10000 */
sr_desc,
(edp->e_sr ? "\n " : " "),
edp->e_paddr,edp->e_vaddr);
}
}
if (edp->e_user) {
#ifdef _MEM_PARITY_WAR
dobuserr((struct eframe_s *)edp->e_eframep,
(inst_t *)edp->e_error_epc, 2);
#else
dobuserr((struct eframe_s *)ecc_info_ptr.eframep,
(inst_t *)edp->e_error_epc, 2);
#endif
if (edp->e_pid) {
while (sigtopid(edp->e_pid, SIGBUS,
SIG_ISKERN|SIG_NOSLEEP,
0, 0, 0) == EAGAIN)
;
PCB(pcb_resched) = 1;
} else
cmn_err(CE_WARN,
"NULL curuthread with user ecc error!\n");
edp->e_user = 0;
edp->e_pid = 0;
}
#ifdef IP19
ecc_info_ptr.ecc_entry_state[index] = 0;
#endif
} /* while */
} /* ecc_cleanup */
#if IP19
void
ecc_panic(volatile ecc_info_t *ecc_info_param)
{
/* We keep cached access to a minimum, though it appaears that stores
* are the real problem (cached ones that is).
*/
ecc_info_param->ecc_panic = 1;
ecc_info_param->needs_cleanup = 1;
ecc_info_param->ecc_panic_cpuid = cpuid();
/* Not much we can do if only one cpu, go ahead and try to panic */
if (maxcpus == 1) {
real_ecc_panic();
}
/* If we're the master cpu, try nominating a new master and send
* it an interrupt. Otherwise no-one notices the cache error since
* it is only the master cpu which polls the uncached location
* "needs_cleanup" once per second.
*/
if (private.p_flags & PDAF_MASTER) {
cpuid_t who;
if (cpuid() == 0)
who = 1;
else
who = 0;
ecc_info_param->ecc_panic_newmaster = who+1;
sendintr(who, DOACTION);
} else
/* attempt to wakeup the master more quickly than its'
* one second maintaince processing. May not work, but
* will be noticed eventually.
*/
sendintr(masterpda->p_cpuid, DOACTION);
/* wait for other cpu to actually report the panic. If this cpu
* does ANYTHING cached it may corrupt other data if this error
* was due to a store-miss.
*/
/* wait for reset signal (HW) from master cpu */
while (1)
;
}
real_ecc_panic()
{
volatile ecc_info_t *ecc_info_param =
(volatile ecc_info_t *)(K0_TO_K1(&real_ecc_info));
#else /* !IP19 */
/* ARGSUSED */
ecc_panic(
uint cache_err,
uint errorepc)
{
#endif /* !IP19 */
err_desc_t *edp; /* ptr to set of variables to set this time */
/* use w_index, handler was working there when it panic'ed */
int index = ECC_INFO(ecc_w_index);
#if defined (IP19)
if (ecc_info_param->ecc_info_inited != 1)
/* NOTE: No machine_error_dump() called in this case, but I've
* seen cpus "hang" trying to print that info and nothing comes
* out on the console. So get simple error message out first.
*/
cmn_err_tag(69,CE_PANIC|CE_CPUID,
"CPU cache error occurred before handler inited\n");
/* Set ecc_panic to 2 to indicate that we're already processing
* the panic condition. Decreases the chance that another cpu will
* try this at the same time. Not MP safe, but the cache error recovery
* logic is not MP safe for other reasons, so just keep risk to a minimum.
*/
ecc_info_param->ecc_panic = 2;
/* See if we're supposed to become the "master" due to a cache error
* on the real "master" which is now unresponsive in an
* "idle forever" loop.
*/
if ((ecc_info_param->ecc_panic_newmaster) &&
(ecc_info_param->ecc_panic_newmaster-1 == cpuid())){
private.p_flags |= PDAF_MASTER;
/* need to update masterpda gloabl so that we can reboot
* after panic is complete.
*/
masterpda = pdaindr[cpuid()].pda;
cmn_err(CE_CONT|CE_CPUID,
"ecc_panic: assuming role of master cpu (due to cache error)\n");
}
#endif /* IP19 */
{
extern int ecc_panic_cpu;
/* this flag lets icmn_err know that we're panic-ing so it
* avoids performing some operations which may lead to
* tlbmisses.
* Also, make first message give as much info as possible
* in case only first message makes it into console buffer.
*/
#ifdef IP19
ecc_panic_cpu = ecc_info_param->ecc_panic_cpuid;
#else
ecc_panic_cpu = cpuid();
#endif
if (ECC_INFO(ecc_flags) & HANDLER_OVERRAN)
cmn_err(CE_CONT|CE_CPUID,
"ecc_panic initiated, ECC error overrun!\n");
else
cmn_err(CE_CONT|CE_CPUID,"ecc_panic initiated! (for cpu %d)\n", ecc_panic_cpu);
}
#if defined (IP19)
/* Empirical evidence suggests that this machine_err_dump should come
* after the preceding "panic" variables and initial error message.
* I saw many cases where the cpu "hung" attempting to pring the
* machine error state, quite possible due to holding the putbuflck.
* With this placement we always seem to get the panic cleanly.
*/
machine_error_dump("");
#endif /* IP19 */
/* point edb to set of variables to use */
edp = (err_desc_t *)&(ECC_INFO(desc[index]));
if (ECC_INFO(ecc_flags & (K_ECC_PANIC | HANDLER_OVERRAN))) {
PRINT_CPUID(edp->e_cpuid);
cmn_err(CE_CONT,"ECC PANIC: %s\n",
((ECC_INFO(ecc_flags) & K_ECC_PANIC)
#ifdef IP19
? real_ecc_kernel_err
: real_ecc_overrun_msg));
#else /* !IP19 */
? ecc_kernel_err
: ecc_overrun_msg));
#endif /* !IP19 */
}
#if IP19
real_ecc_print_msg(ECC_ALL_MSGS,index,0,1,edp->e_cpuid, ecc_info_param);
#else
ecc_print_msg(ECC_ALL_MSGS,index,0,1,edp->e_cpuid);
#endif
if (edp->e_location != CACH_PI && edp->e_location != CACH_PD)
print_ecctype(edp->e_location, edp->e_tag_or_data,
edp->e_syndrome,edp->e_paddr, 1, edp->e_cpuid);
PRINT_CPUID(edp->e_cpuid);
cmn_err(CE_CONT," CacheErr %R\n", edp->e_cache_err, cache_err_desc);
PRINT_CPUID(edp->e_cpuid);
cmn_err(CE_CONT," Status %R\n", edp->e_sr,
#if R4000 && R10000
IS_R10000() ? r10k_sr_desc :
#endif /* R4000 && R10000 */
sr_desc);
PRINT_CPUID(edp->e_cpuid);
cmn_err(CE_CONT,
" ErrorEPC 0x%x, Exception Frame 0x%x, ECC Frame 0x%x\n",
#ifdef _MEM_PARITY_WAR
edp->e_error_epc, (__psunsigned_t)edp->e_eframep,
(__psunsigned_t)edp->e_eccframep);
#else
edp->e_error_epc, ECC_INFO(eframep), ECC_INFO(eccframep));
#endif /* _MEM_PARITY_WAR */
PRINT_CPUID(edp->e_cpuid);
cmn_err(CE_CONT," PhysAddr 0x%llx, VirtAddr 0x%x\n",
edp->e_paddr, edp->e_vaddr);
#if _MEM_PARITY_WAR
if (edp->e_flags & E_PADDR_MC) {
PRINT_CPUID(edp->e_cpuid);
log_perr(edp->e_paddr,
edp->e_eccframep[ECCF_CPU_ERR_STAT] & 0xff,
0, 1);
}
PRINT_CPUID(edp->e_cpuid);
cmn_err(CE_CONT," cpu_err_stat: 0x%x, cpu_err_addr: 0x%x\n",
edp->e_eccframep[ECCF_CPU_ERR_STAT],
edp->e_eccframep[ECCF_CPU_ERR_ADDR]);
PRINT_CPUID(edp->e_cpuid);
cmn_err(CE_CONT," gio_err_stat: 0x%x, gio_err_addr: 0x%x\n",
edp->e_eccframep[ECCF_GIO_ERR_STAT],
edp->e_eccframep[ECCF_GIO_ERR_ADDR]);
#if IP22
if (is_fullhouse())
cmn_err(CE_CONT," hpc3_buserr_stat: 0x%x\n",
PHYS_TO_K1(HPC3_BUSERR_STAT_ADDR));
#endif /* IP22 */
#endif /* _MEM_PARITY_WAR */
PRINT_CPUID(edp->e_cpuid);
cmn_err(CE_CONT," ECC cbits 0x%x data_lo 0x%x data_hi 0x%x\n",
edp->e_badecc,edp->e_lo_badval, edp->e_hi_badval);
PRINT_CPUID(edp->e_cpuid);
cmn_err(CE_CONT," sbe dblwrds 0x%x mbe dblwrds 0x%x\n",
edp->e_sbe_dblwrds, edp->e_mbe_dblwrds);
PRINT_CPUID(edp->e_cpuid);
cmn_err(CE_CONT," s_taglo %R%s\n",
edp->e_s_taglo,s_taglo_desc,
(edp->e_s_taglo ? "\n " : " "));
if (edp->e_2nd_syn)
cmn_err(CE_CONT,"2nd_syn 0x%x\n",edp->e_2nd_syn);
else
cmn_err(CE_CONT,"\n");
#if DEBUG_ECC
if (f_s_caddr) {
PRINT_CPUID(edp->e_cpuid);
cmn_err(CE_CONT,
" f_ vars:\n lov 0x%x hiv 0x%x pcad %x scad %x\n",
f_loval, f_hival, f_p_caddr, f_s_caddr);
PRINT_CPUID(edp->e_cpuid);
cmn_err(CE_CONT," P-lo %R%sS-lo %R\n",
f_ptaglo,p_taglo_desc, (f_ptaglo ? "\n " : " "),
f_staglo,
#if R4000 && R10000
IS_R10000() ? r10k_s_taglo_desc :
#endif /* R4000 && R10000 */
s_taglo_desc);
PRINT_CPUID(edp->e_cpuid);
cmn_err(CE_CONT," cooked 0x%x, f_d_ecc 0x%x\n",
f_cooked_ecc,f_d_ecc);
PRINT_CPUID(edp->e_cpuid);
cmn_err(CE_CONT," P-lo1 %R%sS-lo1 %R\n",
f_ptaglo1,p_taglo_desc, (f_ptaglo1 ? "\n " : " "),
f_staglo1,
#if R4000 && R10000
IS_R10000() ? r10k_s_taglo_desc :
#endif /* R4000 && R10000 */
s_taglo_desc);
}
#endif /* DEBUG_ECC */
#ifdef IP19
if (ecc_info_param->ecc_panic_recoverable == 1)
cmn_err_tag(70,CE_PANIC, "Single-bit cache error but recovery disabled\n");
else if (ecc_info_param->ecc_panic_recoverable == 2)
cmn_err_tag(71,CE_PANIC, "Store-miss cache error, possibly recoverable\n");
else
cmn_err_tag(72,CE_PANIC, "Uncorrectable cache ecc/parity error\n");
#else /* !IP19 */
cmn_err_tag(73,CE_PANIC, "Uncorrectable cache ecc/parity error\n");
#endif /* !IP19 */
/*NOTREACHED*/
} /* ecc_panic */
volatile int did_it = 0;
#ifdef _MEM_PARITY_WAR
extern int utlbmiss[], eutlbmiss[];
#ifdef R4600
extern int utlbmiss_r4600[];
extern int eutlbmiss_r4600[];
#endif /* R4600 */
#ifdef _R5000_BADVADDR_WAR
extern int utlbmiss_r5000[];
extern int eutlbmiss_r5000[];
extern int utlbmiss2_r5000[];
extern int eutlbmiss2_r5000[];
extern int utlbmiss1_r5000[];
extern int eutlbmiss1_r5000[];
extern int utlbmiss3_r5000[];
extern int eutlbmiss3_r5000[];
#endif /* _R5000_BADVADDR_WAR */
#if R4000 && (IP19 || IP22)
extern int utlbmiss_250mhz[], eutlbmiss_250mhz[];
extern int utlbmiss2_250mhz[], eutlbmiss3_250mhz[];
#endif /* R4000 && (IP19 || IP22) */
extern int utlbmiss1[], eutlbmiss1[];
extern int utlbmiss2[], eutlbmiss2[];
extern int utlbmiss3[], eutlbmiss3[];
#ifndef _NO_R4000
extern int locore_exl_0[], elocore_exl_0[];
extern int locore_exl_1[], elocore_exl_1[];
extern int locore_exl_2[], elocore_exl_2[];
extern int locore_exl_3[], elocore_exl_3[];
extern int locore_exl_4[], elocore_exl_4[];
extern int locore_exl_5[], elocore_exl_5[];
extern int locore_exl_6[], elocore_exl_6[];
extern int locore_exl_7[], elocore_exl_7[];
extern int locore_exl_8[], elocore_exl_8[];
extern int locore_exl_9[], elocore_exl_9[];
extern int locore_exl_10[], elocore_exl_10[];
extern int locore_exl_11[], elocore_exl_11[];
extern int locore_exl_12[], elocore_exl_12[];
extern int locore_exl_13[], elocore_exl_13[];
#ifdef _R5000_CVT_WAR
extern int locore_exl_14[], elocore_exl_14[];
extern int locore_exl_15[], elocore_exl_15[];
#endif /* _R5000_CVT_WAR */
extern int locore_exl_16[], elocore_exl_16[];
#ifdef USE_PTHREAD_RSA
extern int locore_exl_17[], elocore_exl_17[];
#endif /* USE_PTHREAD_RSA */
extern int locore_exl_18[], elocore_exl_18[];
extern int locore_exl_19[], elocore_exl_19[];
extern int locore_exl_20[], elocore_exl_20[];
extern int locore_exl_21[], elocore_exl_21[];
extern int locore_exl_22[], elocore_exl_22[];
extern int locore_exl_23[], elocore_exl_23[];
extern int locore_exl_24[], elocore_exl_24[];
extern int locore_exl_25[], elocore_exl_25[];
struct exl_handler_table_s {
int *base;
int *limit;
} exl_handler_table[] = {
{ (int *) K0BASE, (int *) K0BASE + NBPP }, /* exception handlers */
{ (int *) K1BASE, (int *) K1BASE + NBPP },
{ utlbmiss, eutlbmiss },
#ifdef R4600
{ utlbmiss_r4600, eutlbmiss_r4600 },
#endif /* R4600 */
#ifdef _R5000_BADVADDR_WAR
{ utlbmiss_r5000, eutlbmiss_r5000 },
{ utlbmiss2_r5000, eutlbmiss2_r5000 },
{ utlbmiss1_r5000, eutlbmiss1_r5000 },
{ utlbmiss3_r5000, eutlbmiss3_r5000 },
#endif /* _R5000_BADVADDR_WAR */
{ utlbmiss2, eutlbmiss3 }, /* includes utlbmiss1 and sharedseg */
#if R4000 && (IP19 || IP22)
{ utlbmiss_250mhz, eutlbmiss_250mhz },
{ utlbmiss2_250mhz, eutlbmiss3_250mhz },
#endif
{ locore_exl_0, elocore_exl_0 },
{ locore_exl_1, elocore_exl_1 },
{ locore_exl_2, elocore_exl_2 },
{ locore_exl_3, elocore_exl_3 },
{ locore_exl_4, elocore_exl_4 },
{ locore_exl_5, elocore_exl_5 },
{ locore_exl_6, elocore_exl_6 },
{ locore_exl_7, elocore_exl_7 },
{ locore_exl_8, elocore_exl_8 },
{ locore_exl_9, elocore_exl_9 },
{ locore_exl_10, elocore_exl_10 },
{ locore_exl_11, elocore_exl_11 },
{ locore_exl_12, elocore_exl_12 },
{ locore_exl_13, elocore_exl_13 },
#ifdef _R5000_CVT_WAR
{ locore_exl_14, elocore_exl_14 },
{ locore_exl_15, elocore_exl_15 },
#endif /* _R5000_CVT_WAR */
{ locore_exl_16, elocore_exl_16 },
#ifdef USE_PTHREAD_RSA
{ locore_exl_17, elocore_exl_17 },
#endif /* USE_PTHREAD_RSA */
{ locore_exl_18, elocore_exl_18 },
{ locore_exl_19, elocore_exl_19 },
{ locore_exl_20, elocore_exl_20 },
{ locore_exl_21, elocore_exl_21 },
{ locore_exl_22, elocore_exl_22 },
{ locore_exl_23, elocore_exl_23 },
{ locore_exl_24, elocore_exl_24 },
{ locore_exl_25, elocore_exl_25 },
{ NULL, NULL }
};
#define exl_handler_table_uc ((struct exl_handler_table_s *) K0_TO_K1((ulong) exl_handler_table))
#endif /* _NO_R4000 */
extern int perr_save_info(eframe_t *, k_machreg_t *, uint, k_machreg_t, int);
#ifdef R4600SC
extern int _r4600sc_enable_scache_erl(void);
extern int _r4600sc_disable_scache_erl(void);
#endif /* R4600SC */
extern int ecc_same_cache_block(int, paddr_t, paddr_t);
extern int tlb_to_phys(k_machreg_t , paddr_t *, int *);
extern unsigned int r_phys_word(paddr_t);
extern int _read_tag(int, caddr_t, int *);
extern ecc_fixup_caches(int, paddr_t, k_machreg_t, uchar_t);
extern int decode_inst(eframe_t *, int, int *, k_machreg_t *, int *);
#endif /* MEM_PARITY_WAR */
#if defined(_MEM_PARITY_WAR) || defined(IP32)
static int
ecc_is_branch(inst_t inst)
{
union mips_instruction i;
unsigned int op;
i.word = inst;
op = i.j_format.opcode;
if (op == spec_op) {
if (i.r_format.func == jr_op || i.r_format.func == jalr_op)
return(1);
return(0);
} else if (op == bcond_op) {
op = i.i_format.rt;
if ((/* op >= bltz_op && */ op <= bgezl_op) ||
(op >= bltzal_op && op <= bgezall_op))
return(1);
return(0);
} else if (op >= cop0_op && op <= cop3_op) {
if (i.r_format.rs == bc_op)
return(1);
return(0);
} else if (((op >= j_op) && (op <= bgtz_op)) ||
((op >= beql_op && op <= bgtzl_op)))
return(1);
return(0);
}
#define LOAD_INSTR 1
#define STORE_INSTR 2
static paddr_t
ecc_get_perr_addr(eframe_t *ep, k_machreg_t errorepc, int *cache_err)
{
paddr_t instaddr, bdaddr, paddr = 0;
k_machreg_t vaddr;
int cached, width, ldst;
#ifdef WRONG
int pidx, sidx;
#endif
inst_t inst, bdinst;
int is_bdslot = 0;
if (!tlb_to_phys(errorepc, &instaddr, &cached))
/* can't translate this address, fail */
return(-1);
#ifdef _MEM_PARITY_WAR
inst = (inst_t)r_phys_word(instaddr);
#else
inst = (inst_t)r_phys_word_erl(instaddr);
#endif
if (ecc_is_branch(inst)) {
if (!tlb_to_phys(errorepc+sizeof(inst_t), &bdaddr, &cached))
return(-1);
#ifdef _MEM_PARITY_WAR
bdinst = (inst_t)r_phys_word(bdaddr);
#else
bdinst = (inst_t)r_phys_word_erl(bdaddr);
#endif
is_bdslot = 1;
}
if (!(*cache_err & CACHERR_ER)) {
/*
* we got an error on an instruction access
* so errorepc points to the offending instruction
* or a branch instruction which may be the offending
* instruction or the offender may be the instruction
* in the branch delay slot.
*
* If only one instruction is involved, or if both
* instructions are in the same cache line, we can
* synthesize a physaddr and build a corresponding
* ce_sidx and ce_pidx and add them to the contents
* of the cache_err register from this exception.
*/
if (is_bdslot) {
if (!ecc_same_cache_block(CACH_PI, instaddr, bdaddr))
return(-1);
}
#ifdef WRONG
/*
* build a reasonable facsimile of ce_pidx and ce_pidx
* and place them in the cache_err register image
*/
pidx = (errorepc >> CACHERR_PIDX_SHIFT) & CACHERR_PIDX_MASK;
sidx = instaddr & CACHERR_SIDX_MASK;
*cache_err &= ~(CACHERR_PIDX_MASK | CACHERR_SIDX_MASK);
*cache_err |= (sidx | pidx);
#endif
return((__uint64_t)instaddr);
} else {
/*
* we got a data error, look at the instruction
* at errorepc -- if it is a load/store calculate
* the physical address of the target. If it is
* a branch, the instruction in the branch delay
* slot should be the offending instruction.
* calculate the physical address of the target
* of this instruction and use it as the physical
* address.
*
* if neither of these situations holds true, we've
* got a real problem.
*/
if (bdaddr)
inst = bdinst;
/*
* see if we can decode the instruction which
* caused the fault, if not, we can't get a physaddr.
*/
if (!decode_inst(ep, inst, &ldst, &vaddr, &width))
return(-1);
if (!tlb_to_phys(vaddr, &paddr, &cached))
return(-1);
#ifdef WRONG
/*
* build a reasonable facsimile of ce_pidx and ce_pidx
* and place them in the cache_err register image
*/
pidx = (vaddr >> CACHERR_PIDX_SHIFT) & CACHERR_PIDX_MASK;
sidx = paddr & CACHERR_SIDX_MASK;
*cache_err &= ~(CACHERR_PIDX_MASK | CACHERR_SIDX_MASK);
*cache_err |= (sidx | pidx);
#endif
return((__uint64_t)paddr);
}
/*NOTREACHED*/
}
#endif /* _MEM_PARITY_WAR || IP32 */
ecc_handler(
eframe_t *efp,
k_machreg_t *eccfp,
uint cache_err,
#if IP19
k_machreg_t errorepc,
volatile ecc_info_t *ecc_info_param,
cpuid_t ecc_cpuid)
#else
k_machreg_t errorepc)
#endif
{
int location;
err_desc_t *edp; /* ptr to set of variables to set this time */
uint ce_sidx = (cache_err & CACHERR_SIDX_MASK);
uint ce_pidx = (cache_err & CACHERR_PIDX_MASK); /* must be shifted */
__uint64_t physaddr;
#if IP32
_crmreg_t regval;
#endif
register int t_or_d = 0;
uint tags[NUM_TAGS], s_data_ecc;
int res = 0;
uint index = 0;
#if _MEM_PARITY_WAR
static time_t last_time;
#endif /* _MEM_PARITY_WAR */
#ifdef R4600SC
extern int two_set_pcaches;
int r4600sc_scache_disabled = 1;
int _r4600sc_disable_scache(void);
void _r4600sc_enable_scache(void);
if (two_set_pcaches && private.p_scachesize)
#ifdef _MEM_PARITY_WAR
r4600sc_scache_disabled = _r4600sc_disable_scache_erl();
#else /* _MEM_PARITY_WAR */
r4600sc_scache_disabled = _r4600sc_disable_scache();
#endif /* _MEM_PARITY_WAR */
#endif
#if defined (EVEREST)
/* Now save the cache error in the extended everror structure
* for future use by the FRU analyzer
*/
if (ecc_info_param->ecc_info_inited != 1)
return(1);
ecc_info_param->everror_ext->eex_cpu[ecc_cpuid].cpu_cache_err =
cache_err;
ecc_info_param->ecc_panic_recoverable = 0;
#endif /* EVEREST */
#ifdef _MEM_PARITY_WAR
#ifndef CMPLR_BUG_277906_FIXED
errorepc = eccfp[ECCF_ERROREPC];
#endif
#if R4000 && (! _NO_R4000)
if (efp->ef_sr & SR_EXL) {
/* check if we need to clear SR_EXL due to an R4000 bug:
* we clear SR_EXL if $errorepc was not in one of the
* SR_EXL handlers.
*/
k_machreg_t errorepc_k0;
if (IS_KSEG1(errorepc))
errorepc_k0 = K1_TO_K0(errorepc);
else
errorepc_k0 = errorepc;
for (index = 0; exl_handler_table_uc[index].base != 0; index++) {
if (((int *) errorepc_k0) >= exl_handler_table_uc[index].base &&
((int *) errorepc_k0) < exl_handler_table_uc[index].limit)
break;
}
if (exl_handler_table_uc[index].base == NULL)
/* errorepc not found in table */
efp->ef_sr &= ~SR_EXL;
}
#endif /* R4000 && (! _NO_R4000) */
#if (defined(IP20) || defined(IP22) || defined(IPMHSIM))
/* save bus error status */
eccfp[ECCF_CPU_ERR_STAT] = *(volatile uint *)PHYS_TO_K1(CPU_ERR_STAT);
eccfp[ECCF_CPU_ERR_ADDR] = *(volatile uint *)PHYS_TO_K1(CPU_ERR_ADDR);
eccfp[ECCF_GIO_ERR_STAT] = *(volatile uint *)PHYS_TO_K1(GIO_ERR_STAT);
eccfp[ECCF_GIO_ERR_ADDR] = *(volatile uint *)PHYS_TO_K1(GIO_ERR_ADDR);
/* clear possible errors */
*(volatile uint *)PHYS_TO_K1(CPU_ERR_STAT) = 0x0;
*(volatile uint *)PHYS_TO_K1(GIO_ERR_STAT) = 0x0;
flushbus();
/* retry if CPU see SYSAD error but MC did not see any */
if ((cache_err & CACHERR_EE) &&
!eccfp[ECCF_CPU_ERR_STAT] && !eccfp[ECCF_GIO_ERR_STAT] &&
(time - last_time > 5)) {
ecc_info.ecc_err_cnts[SYSAD_ERRS]++;
last_time = time;
return 0;
}
/* save_perr_info checks to see if it is a memory error
* we might be able to workaround, and saves away enough
* information to be able to fix it.
*/
if (perr_save_info(efp, eccfp, cache_err, errorepc,
((cache_err & CACHERR_EE)
? PERC_CACHE_SYSAD
: PERC_CACHE_LOCAL))) {
#ifdef R4600SC
if (!r4600sc_scache_disabled)
_r4600sc_enable_scache_erl();
#endif /* R4600SC */
return(-1); /* force an exception */
}
#endif /* IP20 || IP22 */
#endif /* _MEM_PARITY_WAR */
#ifdef IP19
/* On IP19 all memory (BSS) is zeroed at boot time, so we don't
* really have much to initialize. We really want to avoid
* referencing global variables which are cached.
*/
ecc_info_param->eframep = CACHE_ERR_EFRAME;
ecc_info_param->eccframep = CACHE_ERR_ECCFRAME;
#else /* !IP19 */
if (!ecc_info_initialized)
init_ecc_info();
#endif /* !Ip19 */
/* if this error was 'forced' the CE bit will be set--clear it
* in the eframe SR */
efp->ef_sr &= ~SR_CE;
/* Check if we have handled too many ecc errors without
* allowing the cleanup routine to execute. (splhi and
* DE bit set ensures we won't be interrupted during this
* test) */
NEXT_INDEX(ECC_INFO(ecc_w_index));
#ifdef IP19
/* On IP19 we have a state indicator built-in to the entry.
* Make sure entry is free before using it.
*/
if (ecc_info_param->ecc_entry_state[ecc_info_param->ecc_w_index] != 0) {
ecc_info_param->ecc_flags |= HANDLER_OVERRAN;
/* back up write index so ecc_panic() will do something */
PREV_INDEX(ecc_info_param->ecc_w_index);
return(1); /* ecc_panic will print proper msg */
} else {
index = ecc_info_param->ecc_w_index;
ecc_info_param->ecc_entry_state[index] = 1;
}
#else /* !IP19 */
if (ecc_info.ecc_w_index == ecc_info.ecc_r_index) {
ecc_info.ecc_flags |= HANDLER_OVERRAN;
/* back up write index so ecc_panic() will do something */
PREV_INDEX(ecc_info.ecc_w_index);
#ifdef R4600SC
/*
* on R4600SC there is no need to renable cache
* since we are going to panic anyway.
*/
#endif /* R4600SC */
return(1); /* ecc_panic will print proper msg */
} else {
index = ecc_info.ecc_w_index;
}
#endif /* !IP19 */
/* point edb to set of variables to use */
edp = (err_desc_t *)&(ECC_INFO(desc[index]));
#ifdef IP19
edp->e_cpuid = ecc_cpuid;
#else
edp->e_cpuid = cpuid();
#endif
#ifdef _MEM_PARITY_WAR
edp->e_eframep = efp;
edp->e_eccframep = eccfp;
#endif /* _MEM_PARITY_WAR */
edp->e_flags = E_PADDR_VALID|E_VADDR_VALID;
#ifdef R4000PC
if ((r4000_config & CONFIG_SC) == 0) { /* 0 == scache present */
#endif /* R4000PC */
/* use CacheErr sidx to fetch 2ndary tag of the line mapping it,
* and ecc checkbits of the data in that line */
_c_ilt_n_ecc(CACH_SD, PHYS_TO_K0(ce_sidx), tags, &s_data_ecc);
edp->e_badecc = eccfp[ECCF_ECC] = s_data_ecc;
edp->e_s_taglo = tags[TAGLO_IDX];
/* ce_sidx has paddr[21..3], 2ndary taglo has paddr[35..17] but
* must be shifted to proper position */
physaddr = (ce_sidx | ((edp->e_s_taglo & SADDRMASK) << SADDR_SHIFT));
#ifdef R4000PC
} else {
#ifdef _MEM_PARITY_WAR
if (((eccfp[ECCF_CPU_ERR_STAT] & CPU_ERR_STAT_RD_PAR) ==
CPU_ERR_STAT_RD_PAR)) {
physaddr = eccfp[ECCF_CPU_ERR_ADDR] & ~0x7;
physaddr += BYTEOFF(eccfp[ECCF_CPU_ERR_STAT] & 0xff);
edp->e_flags |= E_PADDR_MC;
} else if (eccfp[ECCF_GIO_ERR_STAT] & GIO_ERRMASK) {
physaddr = eccfp[ECCF_GIO_ERR_ADDR] & ~0x7;
physaddr |= BYTEOFF(eccfp[ECCF_GIO_ERR_STAT] & 0xff);
edp->e_flags |= E_PADDR_GIO;
}
else {
physaddr = ecc_get_perr_addr(efp,errorepc,(int*)&cache_err);
/* no physaddr, can't go on */
if (physaddr == -1)
return(1);
if (cache_err & CACHERR_ER)
/*
* if fault occured on a data reference
* and was *not* reported by MC, we can't
* be certain that the physaddr we got
* from decoding the instruction is correct.
*/
edp->e_flags &= ~E_PADDR_VALID;
}
/*
* this allows us to work around the rather persistent
* bug in the R4000 which causes it to report an incorrect
* pidx when it takes a primary cache parity error. This
* workaround is not necessary on the R4600 so we skip it
* if we are running on one. If we can't find it in the
* cache (ce_pidx == -1) we continue to collect information
* for error reporting, but we will not attempt to fix the
* error, we will either kill the process or we will panic.
*/
if (!(cache_err & CACHERR_ET) && !two_set_pcaches) {
ce_pidx = ecc_find_pidx((cache_err & CACHERR_ER) ?
CACH_PD :
CACH_PI, physaddr);
if (ce_pidx == -1) {
edp->e_flags &= ~E_VADDR_VALID;
} else {
cache_err = (cache_err & ~CACHERR_PIDX_MASK)
| ce_pidx;
}
}
#elif IP32
#define CPU_ERR (CRM_CPU_ERROR_CPU_ILL_ADDR | \
CRM_CPU_ERROR_CPU_WRT_PRTY)
#define CPU_ERR_REV0 (CRM_CPU_ERROR_CPU_INV_ADDR_RD | \
CRM_CPU_ERROR_CPU_INV_REG_ADDR)
#define MEM_ERR (CRM_MEM_ERROR_CPU_ACCESS | \
CRM_MEM_ERROR_HARD_ERR)
regval = READ_REG64(PHYS_TO_K1(CRM_CPU_ERROR_STAT), _crmreg_t);
eccfp[ECCF_CPU_ERR_STAT] = (uint)(regval & 0xffffffff);
regval = READ_REG64(PHYS_TO_K1(CRM_MEM_ERROR_STAT), _crmreg_t);
eccfp[ECCF_MEM_ERR_STAT] = (uint)(regval & 0xffffffff);
regval = READ_REG64(PHYS_TO_K1(CRM_CPU_ERROR_ADDR), _crmreg_t);
eccfp[ECCF_CPU_ERR_ADDR] = regval;
regval = READ_REG64(PHYS_TO_K1(CRM_MEM_ERROR_ADDR), _crmreg_t);
eccfp[ECCF_MEM_ERR_ADDR] = (uint)(regval & 0xffffffff);
if ((eccfp[ECCF_MEM_ERR_STAT] & MEM_ERR) == MEM_ERR) {
WRITE_REG64(0LL,
PHYS_TO_K1(CRM_MEM_ERROR_STAT), _crmreg_t);
physaddr = (__uint64_t)eccfp[ECCF_MEM_ERR_ADDR];
/*
* we access all memory below 256Mb at the 0 based
* alias. Memory at or above 256Mb is accessed above
* 1Gb. Correct for this since CRIME only reports
* memory error address bits 29:0.
*/
if (physaddr >= 0x10000000)
physaddr += 0x40000000;
} else if ((eccfp[ECCF_CPU_ERR_STAT] & CPU_ERR_REV0) == CPU_ERR_REV0) {
WRITE_REG64(0LL,
PHYS_TO_K1(CRM_CPU_ERROR_STAT), _crmreg_t);
physaddr = eccfp[ECCF_CPU_ERR_ADDRHI];
physaddr = (physaddr << 32) | eccfp[ECCF_CPU_ERR_ADDR];
} else if ((eccfp[ECCF_CPU_ERR_STAT] & CPU_ERR) == CPU_ERR) {
WRITE_REG64(0LL,
PHYS_TO_K1(CRM_CPU_ERROR_STAT), _crmreg_t);
physaddr = eccfp[ECCF_CPU_ERR_ADDRHI];
physaddr = (physaddr << 32) | eccfp[ECCF_CPU_ERR_ADDR];
}
else {
physaddr = (long long)ecc_get_perr_addr(efp,errorepc,
(int *)&cache_err);
/* no physaddr, can't go on */
if (physaddr == -1)
return(1);
if (cache_err & CACHERR_ER)
/*
* if fault occured on a data reference
* and was *not* reported by CRIME, we can't
* be certain that the physaddr we got
* from decoding the instruction is correct.
*/
edp->e_flags &= ~E_PADDR_VALID;
}
edp->e_paddr = physaddr;
#else
physaddr = 0;
#endif /* _MEM_PARITY_WAR */
}
#endif /* R4000PC */
edp->e_paddr = physaddr;
/* set the eccframe paddr to physaddr for now; later routines will
* change it if needed (e.g. a p-cache error needs e_vaddr */
#if IP19 || IP32
eccfp[ECCF_PADDR] = physaddr & 0x0ffffffff;
eccfp[ECCF_PADDRHI] = physaddr>>32;
#else
eccfp[ECCF_PADDR] = physaddr;
#endif
/* primary caches are virtually tagged; build & save vaddr */
edp->e_vaddr = (ce_pidx << CACHERR_PIDX_SHIFT) | (ce_sidx & (NBPP-1));
edp->e_cache_err = cache_err,
edp->e_error_epc = errorepc,
edp->e_sr = efp->ef_sr;
/* There is an R4k chip bug which mistakenly turns on CACHERR_EB
* under convoluted circumstances. The workaround is to believe
* CACHERR_EB only if CACHERR_ER indicates an instruction error.
* So have edp->e_cache_err contain the original cache err register
* contents, but fix up our local cache_err value.
*/
if ((cache_err & (CACHERR_EB|CACHERR_ER)) == (CACHERR_EB|CACHERR_ER))
cache_err &= ~CACHERR_EB; /* not an instruction err */
ASSERT(cache_err & (CACHERR_ED|CACHERR_ET));
if (cache_err & CACHERR_ED) /* Error in data */
t_or_d = DATA_ERR;
if (cache_err & CACHERR_ET) { /* Error in tag or both */
if (t_or_d == DATA_ERR)
t_or_d = D_AND_T_ERR;
else
t_or_d = TAG_ERR;
}
if (cache_err & CACHERR_EE) { /* wrong from SysAD bus */
location = SYSAD;
ecc_log_error(SYSAD_ERRS, index);
} else if ( (cache_err & CACHERR_EC) && (cache_err & CACHERR_ER) )
location = CACH_SD;
else if ( !(cache_err & CACHERR_EC) && (cache_err & CACHERR_ER) )
location = CACH_PD;
else if ( (cache_err & CACHERR_EC) && !(cache_err & CACHERR_ER) )
location = CACH_SI;
else if ( !(cache_err & CACHERR_EC) && !(cache_err & CACHERR_ER) )
location = CACH_PI;
else {
ecc_assign_msg(ECC_PANIC_MSG, index, ecc_incons_err);
#if IP19
ecc_info_param->ecc_entry_state[index] = 2;
#endif
return(1);
}
#ifdef IP19
/* check for occurance of cache error exception while already
* in cache error handler (or double error due to error on both
* out-going and in-coming cacheline).
* NOTE: EW bit only defined for R4400 processors.
*/
if ( (cache_err & CACHERR_EW) ) {
ecc_assign_msg(ECC_PANIC_MSG, index, ecc_ew_err);
goto uncorrectable;
}
#ifdef ECC_TEST_EW_BIT
{
extern int get_cacheerr(void);
ecc_info_param->ecc_wait_for_external = 1;
while (ecc_info_param->ecc_wait_for_external != 2)
/* NOP */ ;
ecc_info_param->ecc_cpu1_cacheerr2 = get_cacheerr();
}
#endif
edp->e_location = location;
edp->e_tag_or_data = t_or_d;
if (location == CACH_PD || location == CACH_PI)
eccfp[ECCF_PADDR] = edp->e_vaddr;
if (location == SYSAD) {
res = ecc_fixmem(index,efp,eccfp,cache_err,errorepc);
} else { /* it's error(s) in cache(s) */
res = ecc_fixcache(index,efp,eccfp,cache_err,errorepc);
}
/* if cache_err EB bit is set, a data error occurred in addition to
* i-cache error indicated by the other cache_err bits. Flush both
* data caches after sanity-checking that main error was in icache.
* Note: if a) error is in clean data, the line won't be written-
* back, so the error will be fixed. b) if error is in dirty line,
* line will flush out through RMI, which will fix 1-bit errors,
* pass over > 1-bit errors. Therefore, two cases: 1) 1-bit data
* error: transparently fixed (not logged, unfortunately);
* 2) multibit data error: it will be stored in memory with the
* errors; if it is written to disk it is dealt-with then; if it
* is re-read the R4K will raise an exception and we'll take action
* then, so this is sufficient. */
if (cache_err & CACHERR_EB) {
/*
* XXX: we *may* be able to recover from the data error
* with great difficulty, for now we will just die.
*/
res = 1;
/* We must avoid cached accesses since that might force
* corrupted data out from primary cache (if error is due
* to a store-miss). So just flush an area large enough
* to guarentee we've flushed the entire cache, rather
* than loading p_scachesize, which is a cached variable.
*/
__cache_wb_inval((void *)FLUSH_ADDR, FOUR_MEG);
}
if (res) { /* failed to correct error: kill process or IRIX */
/* For now we will panic on IP19 machines. The error
* may have occurred in user mode, but perhaps the data
* destroyed (forced out corrupted ?) may be kernel data.
*/
if (USERMODE(efp->ef_sr)) {
ecc_assign_msg(ECC_INFO_MSG, index, ecc_user_err);
edp->e_user = 1;
#if 0
/* Following code is bogus since it will make a
* cached reference. Even if error processing
* is complete, we're still running with ERL and
* DE set, so a cache error here would be ignored.
*/
if (private.p_curproc)
edp->e_curprocp = private.p_curproc;
else
edp->e_curprocp = NULL;
goto handler_exit;
#endif
goto uncorrectable;
} else { /* BOOM! kernel encountered the ecc error */
ecc_info_param->ecc_flags |= K_ECC_PANIC;
goto uncorrectable;
}
}
/* if any cleanup work is necessary, the requesting routine
* did a 'MARK_FOR_CLEANUP'. If so, raise an interrupt. Else
* decrement the index for re-use. */
if (CLEANUP_IS_NEEDED) {
ECC_INTERRUPT;
} else
#ifdef ECC_DEBUG
/* keep frame for reference let ecc_cleanup sync ptrs */
ECC_INTERRUPT;
#else
PREV_INDEX(ecc_info_param->ecc_w_index); /* overwrite frame */
#endif
/* give an indication as to whether the error is theorectically
* recoverable (or more correctly, try to report uncorrectable only
* if it's an MBE and we should replace the CPU).
*/
if ((!res) && (!ecc_info_param->ecc_panic_recoverable))
ecc_info_param->ecc_panic_recoverable = 1;
ecc_info_param->ecc_entry_state[index] = 2;
#ifdef IP19_CACHEERRS_FATAL
return(1);
#else
if (!ecc_info_param->ecc_attempt_recovery)
return(1);
/* Error may have been due to store-miss which did not set the EI
* bit. Indications are that the following test should fail
* and return "one" in that case, which should be considered
* fatal.
*/
if ((!res) && (ecc_check_cache(ecc_info_param->ecc_dummyline))) {
ecc_assign_msg(ECC_PANIC_MSG, index, ecc_possible_ei);
res = 1;
}
return(res);
#endif
uncorrectable:
ecc_info_param->ecc_entry_state[index] = 2;
return(1);
#else /* !IP19 */
edp->e_location = location;
edp->e_tag_or_data = t_or_d;
if (location == CACH_PD || location == CACH_PI)
eccfp[ECCF_PADDR] = edp->e_vaddr;
#if IP20 || IP22 || IPMHSIM
/* The SP IP20/22 should never encounter external requests,
* so there'd better not be any cache errors as a result of them:
* panic.
*
* XXX: on the R4600(!two_set_pcaches) ES does not mean an error
* caused by an external request, it means that the error occured
* on a cache miss in the first doubleword of read response data.
*/
#if R4600
if ((cache_err & CACHERR_ES) && !two_set_pcaches) {
#else
if ((cache_err & CACHERR_ES)) {
#endif
ecc_assign_msg(ECC_PANIC_MSG, index, ecc_extreq);
return(1);
}
#endif
if (location == SYSAD) {
#ifdef MCCHIP
res = 1;
#else
res = ecc_fixmem(index,efp,eccfp,cache_err,errorepc);
#endif /* MCCHIP */
} else { /* it's error(s) in cache(s) */
#if _MEM_PARITY_WAR || IP32
if ((edp->e_flags & (E_PADDR_VALID|E_VADDR_VALID)) !=
(E_PADDR_VALID|E_VADDR_VALID)) {
/*
* we must be sure of both the physaddr and the cache
* index to attempt a fix.
*/
res = 1;
} else
#endif
res = ecc_fixcache(index,efp,eccfp,cache_err,errorepc);
}
/* if cache_err EB bit is set, a data error occurred in addition to
* i-cache error indicated by the other cache_err bits. Flush both
* data caches after sanity-checking that main error was in icache.
* Note: if a) error is in clean data, the line won't be written-
* back, so the error will be fixed. b) if error is in dirty line,
* line will flush out through RMI, which will fix 1-bit errors,
* pass over > 1-bit errors. Therefore, two cases: 1) 1-bit data
* error: transparently fixed (not logged, unfortunately);
* 2) multibit data error: it will be stored in memory with the
* errors; if it is written to disk it is dealt-with then; if it
* is re-read the R4K will raise an exception and we'll take action
* then, so this is sufficient. */
if (cache_err & CACHERR_EB) {
#if MCCHIP || IP32
/*
* XXX: we *may* be able to recover from the data error
* with great difficulty, for now we will just die.
*/
res = 1;
#else
if (edp->e_location != CACH_PI && edp->e_location != CACH_SI) {
ecc_assign_msg(ECC_ERROR_MSG, index, ecc_eb_not_i);
}
__cache_wb_inval((void *)FLUSH_ADDR, private.p_scachesize);
#endif
}
if (res) { /* failed to correct error: kill process or IRIX */
if (USERMODE(efp->ef_sr)) {
ecc_assign_msg(ECC_INFO_MSG, index, ecc_user_err);
edp->e_user = 1;
#ifdef _MEM_PARITY_WAR
allow_nofault_error:
#endif /* _MEM_PARITY_WAR */
edp->e_pid = current_pid();
goto handler_exit;
} else { /* BOOM! kernel encountered the ecc error */
#ifdef _MEM_PARITY_WAR
if (private.p_nofault || (curthreadp->k_nofault))
goto allow_nofault_error;
#endif /* _MEM_PARITY_WAR */
ecc_info.ecc_flags |= K_ECC_PANIC;
return(1); /* we're dead meat--panic now */
}
}
handler_exit:
#ifdef R4600SC
if (!r4600sc_scache_disabled)
#ifdef _MEM_PARITY_WAR
_r4600sc_enable_scache_erl();
#else /* _MEM_PARITY_WAR */
_r4600sc_enable_scache();
#endif /* _MEM_PARITY_WAR */
#endif /* R4600SC */
#ifdef _MEM_PARITY_WAR
if (res) {
return(-1);
}
#endif /* _MEM_PARITY_WAR */
/* if any cleanup work is necessary, the requesting routine
* did a 'MARK_FOR_CLEANUP'. If so, raise an interrupt. Else
* decrement the index for re-use. */
if (CLEANUP_IS_NEEDED) {
ECC_INTERRUPT;
} else
#ifdef ECC_DEBUG
/* keep frame for reference let ecc_cleanup sync ptrs */
ECC_INTERRUPT;
#else
PREV_INDEX(ecc_info.ecc_w_index); /* overwrite frame */
#endif
#ifdef _MEM_PARITY_WAR
ASSERT(!res);
#endif
return(res);
#endif /* !IP19 */
} /* ecc_handler */
#ifndef MCCHIP
/*ARGSUSED*/
static
#if IP19
real_ecc_fixmem(
uint index,
eframe_t *efp,
k_machreg_t *eccfp,
uint cache_err,
k_machreg_t errorepc,
volatile ecc_info_t *ecc_info_param )
#else
ecc_fixmem(
uint index,
eframe_t *efp,
k_machreg_t *eccfp,
uint cache_err,
k_machreg_t errorepc)
#endif
{
err_desc_t *edp = (err_desc_t *)&(ECC_INFO(desc[index]));
uint tags[NUM_TAGS];
__psunsigned_t physaddr = edp->e_paddr;
__psunsigned_t k0addr, k0oneoff;
error_info_t err_info;
unsigned char hi_syn;
#if IP19
__psunsigned_t pmem = (ecc_info_param->ecc_physmem * NBPP);
#else
__psunsigned_t pmem = (physmem * NBPP);
#endif
eccdesc_t syn_info;
uint hi_taglo;
#ifdef SYNDROME_CHECKING
__psunsigned_t addr;
int foundone = 0;
#endif
/* since it came in wrong off the bus, the s_taglo register is the
* one we're interested in; shove it on the eccframe. */
#ifdef R4000PC
if ((r4000_config & CONFIG_SC) != 0) /* 0 == scache present */
eccfp[ECCF_TAGLO] = edp->e_p_taglo;
else
#endif /* R4000PC */
eccfp[ECCF_TAGLO] = edp->e_s_taglo;
#ifdef _MEM_PARITY_WAR
k0addr = physaddr;
/* This works up to 2 GB of memory, because KUSEG is physical
* memory when SR_ERL is set in $sr.
*/
#else /* _MEM_PARITY_WAR */
#if IP19
k0addr = PHYS_TO_K0(physaddr & (ecc_info_param->ecc_k0size_less1));
/* XXX This won't work if physaddr >= K0SIZE */
/* flush the bad line out through the RMI (which will fix it
* in memory if possible) by reading an address one 2nd-cache-
* size higher or lower, whichever is within physical mem.
*
* NOTE: we need to avoid cached accesses on IP19 so loading
* anything from pda (like p_scachesize) is a no-no.
* Exact number is not important as long as it is at least as
* large as the scachesize. So we just use 4MB.
*/
if ((physaddr + FOUR_MEG) >= pmem)
k0oneoff = (k0addr - FOUR_MEG);
else
k0oneoff = (k0addr + FOUR_MEG);
#else
k0addr = PHYS_TO_K0(physaddr & (K0SIZE-1));
/* XXX This won't work if physaddr >= K0SIZE */
/* flush the bad line out through the RMI (which will fix it
* in memory if possible) by reading an address one 2nd-cache-
* size higher or lower, whichever is within physical mem.
*/
if ((physaddr + private.p_scachesize) >= pmem)
k0oneoff = (k0addr - private.p_scachesize);
else
k0oneoff = (k0addr + private.p_scachesize);
#endif
#endif /* _MEM_PARITY_WAR */
#if IP19 || IP32
err_info.eidata_lo = 0xdeadbeef;
err_info.eidata_hi = 0xdeadbeef;
#else
err_info.eidata_lo = *(uint *)(k0addr);
err_info.eidata_hi = *(uint *)(k0addr+BYTESPERWD);
#endif
/* ASSERT(edp->e_badecc); */
err_info.cbits_in = edp->e_badecc;
edp->e_lo_badval = err_info.eidata_lo;
edp->e_hi_badval = err_info.eidata_hi;
#if IP20 || IP22 || IPMHSIM
/*
* The IP20 and IP22 (MC-based) systems have only parity
* memory, so correction is not possible, except when an
* instruction overwrites the memory. Therefore, we just
* reflect this error to trap(), for appropriate disposition.
*/
/* force a software trap */
return(-1);
#elif IP32
/*
* the only errors which will get reflected in the cache on
* IP32 are bad address errors and non-correctable ECC errors
* in any case, none of these are fixable.
* XXX: this is probably wrong, we need to extract the correct
* syndrome for the appropriate byte, but I'm lazy right now.
*/
edp->e_syndrome = (uint)
(READ_REG64(PHYS_TO_K1(CRM_MEM_ERROR_ECC_SYN), _crmreg_t) &
0xffffffff);
return(1);
#else /* !(IP20 || IP22) */
hi_taglo = edp->e_s_taglo;
tags[TAGLO_IDX] = hi_taglo;
tags[TAGHI_IDX] = 0;
/* change line-state from clean to dirty so that the cached read
* we'll do one 2nd-cache-size-segment up from the bad addr will
* flush the current line through the RMI, fixing memory */
tags[TAGLO_IDX] = ((tags[TAGLO_IDX] & ~SSTATEMASK) | SDIRTYEXCL);
_c_ist(CACH_SD, k0addr, tags);
hi_syn = calc_err_info(DATA_CBITS, &err_info);
#ifdef SYNDROME_CHECKING
if (!hi_syn) { /* NO ERROR! */
printf("WEIRDITY!!! Check ecc on all dbl wds in line!\n");
startaddr = k0addr & ~(SCACHE_LINESIZE-1);
printf("k0addr 0x%x, startaddr 0x%x\n",k0addr,startaddr);
for (addr = startaddr;addr < startaddr+SCACHE_LINESIZE;addr += 8) {
alt_err_info.eidata_lo = *(uint *)addr;
alt_err_info.eidata_hi = *(uint *)(addr+BYTESPERWD);
_c_ilt_n_ecc(CACH_SD, addr, tags, &data_ecc);
alt_err_info.cbits_in = data_ecc;
lo_syn = calc_err_info(DATA_CBITS, &alt_err_info);
if (lo_syn) {
foundone++;
printf("addr 0x%x, w0 0x%x, w1 0x%x, ",
addr, alt_err_info.eidata_lo,
alt_err_info.eidata_hi);
printf("cbin 0x%x, cbout 0x%x, syn 0x%x\n",
alt_err_info.cbits_in,
alt_err_info.cbits_out,lo_syn);
}
}
if (!foundone) {
printf("NO SYNDROMES IN LINE BEGINNING AT 0x%x ",addr);
printf("were non-zero\n");
}
ecc_log_error(NO_ERROR, index);
return(1);
}
#else
if (!hi_syn) { /* NO ERROR! */
ecc_log_error(NO_ERROR, index);
return(0);
}
#endif
/* use the syndrome to determine the severity of the error */
edp->e_syndrome = hi_syn;
syn_info = err_info.syn_info;
/* if it is a correctable error (DBx or CBx), force it back
* through the RMI to scrub memory. */
if (syn_info.type == DB || syn_info.type == CB) {
edp->e_user = 0;
*(volatile uint *)k0oneoff;
/* XXXXXXXXXXXXXXXXXXX SHOULD I CHECK IF THE FIX WORKED??? */
return(0);
} else { /* 2-bit or greater: can't fix it */
#if IP19 || IP32
eccfp[ECCF_PADDR] = physaddr & 0x0ffffffff;
eccfp[ECCF_PADDRHI] = physaddr>>32;
#else
eccfp[ECCF_PADDR] = physaddr;
#endif
return(1); /* ecc_handler will kill process or IRIX */
}
#endif /* !(IP20 || IP22) */
/*NOTREACHED*/
} /* ecc_fixmem */
#endif /* ! MCCHIP */
volatile int cache_hit = -1;
/* ARGSUSED */
#if IP19
static
real_ecc_fixcache(
uint index,
eframe_t *efp,
k_machreg_t *eccfp,
uint cache_err,
k_machreg_t errorepc,
volatile ecc_info_t *ecc_info_param )
#else
static
ecc_fixcache(
uint index,
eframe_t *efp,
k_machreg_t *eccfp,
uint cache_err,
k_machreg_t errorepc)
#endif
{
int offset;
err_desc_t *edp = (err_desc_t *)&(ECC_INFO(desc[index]));
__psunsigned_t s_caddr = PHYS_TO_K0(SCACHE_PADDR(edp));
__psunsigned_t p_caddr = PHYS_TO_K0(edp->e_vaddr);
uint tags[NUM_TAGS];
uint data_ecc;
uint res = 0;
/* XXXXXXXXXXXXXXXXXXXXXXXX SET ALL e_ VALUES! */
/* set e_p_taglo to PI tag if main error is in PI or SI (i.e.
* *instruction* error in in either cache); if PD or SD ==> PD.
* (Use computed virtual address when accessing the P-caches.) */
_c_ilt_n_ecc((((edp->e_location == CACH_PI) ||
(edp->e_location == CACH_SI))
? CACH_PI
: CACH_PD),
p_caddr, tags, &data_ecc);
edp->e_p_taglo = tags[TAGLO_IDX];
#ifdef R4000PC
if ((r4000_config & CONFIG_SC) != 0) /* 0 == scache present */
edp->e_s_taglo = 0;
else
#endif /* R4000PC */
{
_c_ilt_n_ecc(CACH_SD, s_caddr, tags, &data_ecc);
edp->e_s_taglo = tags[TAGLO_IDX];
/* if EI bit set, there is corrupted data in primary Dcache.
* Invalidate the line by zeroing-out the tag */
if (cache_err & CACHERR_EI) {
#ifdef IP19
/* Various wierd errors afflict an IP19 after a store-miss
* cache-error. It appears that the state of the cache
* is really confused. The cpu rarely recovers and other
* cpus seem to get errors when accessing this cpu's
* cache. So simply panic now.
*/
ecc_info_param->ecc_panic_recoverable = 2;
ecc_assign_msg(ECC_PANIC_MSG, index, ecc_ei_norecover);
return(1);
#ifdef DO_NOT_ENABLE
/* Theorectically correct IP19 store-miss recover code */
tags[TAGLO_IDX] = 0;
_c_ilt(CACH_PD,p_caddr,tags);
tags[TAGLO_IDX] &= ~PSTATEMASK; /* change state to invalid */
_c_ist(CACH_PD,p_caddr,tags);
/* On an MP system, an intervention from another cpu could
* cause that cpu to get this cacheline with corrupt data
* and good ECC (intervention will flush data from primary
* to secondary and since DW bit is set will update secondary
* with good ECC). To make sure that we don't silently
* consume bad data we check that the secondary cacheline
* is still marked "dirty" after we've invalidated the
* primary cache.
*/
_c_ilt(CACH_SD, s_caddr, tags);
if (!(DIRTY_S_TAG(tags[TAGLO_IDX]))) {
ecc_assign_msg(ECC_PANIC_MSG, index, ecc_ei_notdirty);
return(1);
}
#endif /* DO_NOT_ENABLE */
#else /* !IP19 */
tags[TAGLO_IDX] = 0;
_c_ist(CACH_PD,p_caddr,tags);
#endif /* !IP19 */
}
}
/* NOTE: in all correctable-cases we must verify that the fix
* succeeded in order to avoid an infinite-loop of instruction-
* restarts re-raising the ecc exception in the event of stuck
* cache bits. Otherwise we could just invalidate the line and
* let the restart refill the line.
* If there are errors in both tag and data, start with the
* tag. Depending on how we fix the tag error, the data error may
* be corrected also. If not, see comment for CACHERR_EB at end
* of ecc_handler. (Either the tag will be successfully repaired
* or we will panic, so if the data error remains, a subsequent
* exception will spotlight it).
*/
ASSERT(edp->e_location >= CACH_PI && edp->e_location <= CACH_SD);
/* set index into error-counting array to proper cache
* ( 2x cuz tag-data pairs for each cache) */
if (edp->e_location == CACH_SD) /* 2ndary is I and D combined */
offset = (2 * CACH_SI);
else
offset = (2 * edp->e_location);
switch(edp->e_location) {
case CACH_PD:
case CACH_PI:
/* err is in primary: p_taglo is useful. Poke it into frame */
eccfp[ECCF_TAGLO] = edp->e_p_taglo;
/* At this point the R4K doesn't return the correct
* checkbits for data in either of the primary caches,
* so the only potentially-relevant ecc value is
* contained in the p_taglo. */
break;
case CACH_SD:
#ifndef R10000
case CACH_SI:
#endif /* !R10000 */
/* error is in 2ndary; save s_taglo on eccframe */
eccfp[ECCF_TAGLO] = edp->e_s_taglo;
break;
default:
ecc_assign_msg(ECC_PANIC_MSG, index, ecc_inval_loc);
return(1);
}
ASSERT(edp->e_tag_or_data>=DATA_ERR && edp->e_tag_or_data<=D_AND_T_ERR);
if (edp->e_tag_or_data != DATA_ERR) { /* tag or both */
res = ecc_fixctag(edp->e_location, index);
if (res == 2) { /* unfixable 2nd-level tag: panic */
ecc_assign_msg(ECC_PANIC_MSG, index, ecc_bad_s_tag);
res = 1;
}
} else /* error in data field */
res = ecc_fixcdata(edp->e_location, index, eccfp);
if (edp->e_tag_or_data == DATA_ERR)
ecc_log_error(offset, index);
else if (edp->e_tag_or_data == TAG_ERR)
ecc_log_error((offset+1), index);
else { /* errors in both data and tag */
ecc_log_error(offset, index);
ecc_log_error((offset+1), index);
}
return(res);
} /* ecc_fixcache */
#ifdef R4600
/*
* ecc_find_bad_cline -- searchs for tag which caused a cache tag
* error. returns the index in the parameter
* *idx.
*
* returns 1 if it found a tag which had bad parity at the correct
* index, 0 if not.
*
* XXX: this routine has a hidden assumption that loc is CACH_PI or
* CACH_PD.
*/
ecc_find_bad_cline(int loc, __psunsigned_t p_caddr, uint *idx)
{
uint tags[NUM_TAGS];
extern int two_set_pcaches;
ASSERT(loc == CACH_PI || loc == CACH_PD);
_read_tag(loc,(caddr_t)p_caddr,(int *)tags);
if (ecc_bad_ptag(tags[TAGLO_IDX])) {
*idx = p_caddr;
return(1);
}
_read_tag(loc,(caddr_t)(p_caddr^two_set_pcaches),(int *)tags);
if (ecc_bad_ptag(tags[TAGLO_IDX])) {
*idx = p_caddr^two_set_pcaches;
return(1);
}
return(0);
}
#endif /* R4600 */
/* the ptaglo field of the taglo register holds bits 35..12 of the
* physaddr that the line contains. This mask grabs that field
* from a virtual address, which is then shifted to its correct
* position in ptaglo (>> 4) */
#define PTAG_ADDRMASK 0xFFFFF000
/* fix tag error in 'loc' cache. 'index' indicates the
* frame of variables being used during this invokation
* of ecc_handler(). */
#if IP19
real_ecc_fixctag(uint loc, int index, volatile ecc_info_t *ecc_info_param)
#else
ecc_fixctag(uint loc, int index)
#endif
{
err_desc_t *edp = (err_desc_t *)&(ECC_INFO(desc[index]));
uint tags[NUM_TAGS];
uint p_taglo = edp->e_p_taglo;
uint s_taglo = edp->e_s_taglo;
uint new_p_taglo;
error_info_t err_info;
uint tag_syndrome;
eccdesc_t tag_syn_info;
uint ce_sidx = (edp->e_cache_err & CACHERR_SIDX_MASK);
__psunsigned_t s_caddr = PHYS_TO_K0(SCACHE_PADDR(edp));
__psunsigned_t p_caddr = PHYS_TO_K0(edp->e_vaddr);
__uint64_t physaddr; /* must be 64-bits always (16 GB memory) */
#ifdef R4000PC
int pidx = 0;
#if R4600
extern int two_set_pcaches;
#endif
#endif
/* uncorrectable errors in 2ndary tags (i.e. > 1 bit) cause a
* fatal enigma regardless of whether the data in the line is
* dirty or clean: with a corrupted 2ndary tag we can't identify
* the (also possibly corrupted) primary line(s) associated with
* it. This means that none of the cacheops are guaranteed, and
* the state of the caching-system is or may be indeterminate.
* Panic. Note that if the bad 2ndary line is holding an
* instruction (and is therefore clean) we could blow out the
* primary I-cache and invalidate this 2ndary line; at this
* time, however, we're just going to panic on all uncorrectable
* errors in 2ndary tags.
*/
if (loc == CACH_SI || loc == CACH_SD) {
/* since the error is in the tag, all the e_values we set
* in ecc_handler using it are suspect. We know that the
* sidx field in cache_err is correct: use it to fetch the
* 2ndary line. e_s_taglo, e_p_taglo, e_paddr and e_vaddr
* may be wrong. Calculate the syndrome, then either
* a) fix it and recalculate e_paddr and c_vaddr if the
* bad bit was a data-bit (not a checkbit), or
* b) panic if the error is uncorrectable.
*/
_c_ilt(loc, PHYS_TO_K0(ce_sidx), tags);
err_info.eis_taglo = tags[TAGLO_IDX];
err_info.cbits_in = SET_CBITS_IN;
tag_syndrome = calc_err_info(TAG_CBITS, &err_info);
ASSERT(err_info.cbits_in == (tags[TAGLO_IDX] & SECC_MASK));
edp->e_prevbadecc = edp->e_badecc;
edp->e_badecc = err_info.cbits_in; /* from s_taglo */
edp->e_syndrome = tag_syndrome;
if (!tag_syndrome) { /* NO ERROR! */
ecc_assign_msg(ECC_ERROR_MSG, index, ecc_no_stagerr);
ecc_log_error(NO_ERROR, index);
return(1);
}
/* use the syndrome to determine the severity of the error */
tag_syn_info = err_info.syn_info;
/* DBx and CBx errors are correctable; all others panic */
if (tag_syn_info.type != DB && tag_syn_info.type != CB)
return(2);
/* if the error is in a databit, fix it and recalculate
* all values that were set relying upon possibly-bogus
* tag values. If the error is in a checkbit, let the
* R4K correct it when we store the tag. Note that the
* syndrome identifies the bad bit number *in the internal
* format* (i.e. as it is stored in the 2ndary cache),
* not as it appears in the taglo register. If it is a
* data-bit error we will fix it; the syndrome bitposition
* must therefore be translated to taglo format. */
if (tag_syn_info.type == DB) {
uint bitpos, badbit;
bitpos=xlate_bit(tag_syn_info.type,tag_syn_info.value);
badbit = (0x1 << bitpos);
tags[TAGLO_IDX] ^= badbit;
edp->e_s_taglo = tags[TAGLO_IDX];
/* Now that we have a correct 2ndary tag, recalculate
* all values that were based on the bad one.
* ce_sidx is paddr[21..3], 2nd taglo is paddr[35..17]
* but must be shifted to proper position. Together
* they make up the full vaddress. */
physaddr = (ce_sidx |
((edp->e_s_taglo & SADDRMASK) << SADDR_SHIFT));
edp->e_paddr = physaddr;
s_caddr = PHYS_TO_K0(SCACHE_PADDR(edp));
#if IP19
edp->e_vaddr = (physaddr & (ecc_info_param->ecc_picache_size-1));
#else
edp->e_vaddr = (physaddr & (picache_size-1));
#endif
p_caddr = PHYS_TO_K0(edp->e_vaddr);
/* set e_p_taglo to PI tag if error is in PI or SI
* (i.e. *instruction* error in either cache);
* if PD or SD ==> PD. */
if (loc == CACH_PI || loc == CACH_SI)
_c_ilt(CACH_PI, p_caddr, tags);
else
_c_ilt(CACH_PD, p_caddr, tags);
edp->e_p_taglo = tags[TAGLO_IDX];
} /* error in tag data bit */
/* now store the corrected tag */
tags[TAGLO_IDX] = edp->e_s_taglo;
tags[TAGHI_IDX] = 0;
_c_ist(loc,s_caddr,tags);
/* Check that the newly-computed tag is correct */
_c_ilt(loc,s_caddr,tags);
err_info.eis_taglo = tags[TAGLO_IDX];
tag_syndrome = calc_err_info(TAG_CBITS, &err_info);
if (tag_syndrome) { /* panic/kill user */
ecc_assign_msg(ECC_PANIC_MSG,index,
ecc_stfix_failed);
return(1);
}
return(0);
} else { /* it is a primary-tag error. We can reconstruct
* the bad tag from the info we have, even if the
* data in the line is dirty. 2ndary-tag ecc is
* checked each time a line is transferred to primary,
* and traps at that point. Therefore, primary lines
* already transferred from this 2ndary line are
* valid (because the ecc-check didn't cause a trap
* during those fills), and the current primary-fill
* didn't occur because of this trap
*/
tags[TAGLO_IDX] = tags[TAGHI_IDX] = 0;
#ifdef R4000PC
if ((r4000_config & CONFIG_SC) != 0) { /* 0 == scache present */
/* must invalidate the line */
if (loc == CACH_PI) {
_c_ist(loc,p_caddr,tags);
#ifdef R4600
_c_ist(loc,p_caddr^two_set_pcaches,tags);
#endif
} else {
#ifdef R4600
uint ttags[NUM_TAGS];
int numcleaned = 0;
/*
* XXX: if we don't find a bad line at the
* appropriate index what should we do? We
* can't really go on because we can't invalidate
* the line. I suppose that we should panic.
* First, though we'll see if both lines are clean
* we'll just invalidate them, 'case this just
* means that one of them had a parity error in
* the w or w' bit of the tag.
*/
if (ecc_find_bad_cline(loc,p_caddr,(uint *)&pidx)) {
#endif
_c_ist(loc,pidx,tags);
goto send_bad_ptag_msg;
}
_read_tag(loc,(caddr_t)p_caddr,(int *)ttags);
if ((ttags[TAGLO_IDX] & PSTATEMASK)
== PCLEANEXCL) {
_c_ist(loc,pidx,tags);
numcleaned++;
}
#ifdef R4600
_read_tag(loc,(caddr_t)(p_caddr^two_set_pcaches),(int *)ttags);
if ((ttags[TAGLO_IDX] & PSTATEMASK)
== PCLEANEXCL) {
_c_ist(loc,pidx,tags);
numcleaned++;
}
#endif
/*
* although we didn't find the bad tag
* we were able to invalidate both elements
* of the set. We can consider this to be
* success.
*/
if (numcleaned > 1)
return(0);
}
/* primary cache only: fail if in data cache */
if (edp->e_cache_err & (CACHERR_ER | CACHERR_EB))
goto send_bad_ptag_msg; /* unrecoverable */
return(0);
}
#endif /* R4000PC */
if (!ecc_bad_ptag(p_taglo)) {
ecc_assign_msg(ECC_ERROR_MSG, index, ecc_no_ptagerr);
ecc_log_error(NO_ERROR, index);
return(1);
}
/* construct a new ptag from e_vaddr and set state
* depending on scache */
new_p_taglo = ((edp->e_vaddr&PTAG_ADDRMASK)>>PADDR_SHIFT);
if ((s_taglo & SSTATEMASK) == SCLEANEXCL)
new_p_taglo |= PCLEANEXCL;
else
new_p_taglo |= PDIRTYEXCL;
tags[TAGLO_IDX] = new_p_taglo;
tags[TAGHI_IDX] = 0;
_c_ist(loc,p_caddr,tags);
_c_ilt(loc,p_caddr,tags);
if (ecc_bad_ptag(tags[TAGLO_IDX])) {
#ifdef R4000PC
send_bad_ptag_msg:
#endif /* R4000PC */
ecc_assign_msg(ECC_PANIC_MSG, index, ecc_ptfix_failed);
return(1);
}
return(0);
} /* primary-tag error */
} /* ecc_fixctag */
#ifdef ECC_DEBUG
int eccdebug_foundone=0;
__psunsigned_t eccdebug_badaddr[128], eccdebug_loc[128];
int eccdebug_syndrome[128];
int eccdebug_datalo[128], eccdebug_datahi[128], eccdebug_ecc[128], eccdebug_cnt[128];
int eccdebug_entry_cnt=0;
__psunsigned_t eccdebug_entry_loc[128];
#endif /* ECC_DEBUG */
#ifdef IP19
extern k_machreg_t get_config(void);
int
ecc_check_correctable(volatile uint * loc, err_desc_t *edp,
volatile ecc_info_t *ecc_info_param)
{
__psunsigned_t addr, startaddr, paddr;
int lo_syn;
error_info_t alt_err_info;
uint tags[NUM_TAGS], data_ecc, dblwrd_mask;
int foundone=0, mbe=0, plinesize=0;
/* if not IP19 or no datap, then really can't check */
if (loc == 0)
return(0);
#ifdef ECC_DEBUG
eccdebug_entry_loc[eccdebug_entry_cnt] = (__psunsigned_t)loc;
eccdebug_entry_cnt++;
if (eccdebug_entry_cnt == 128)
eccdebug_entry_cnt = 0;
#endif /* ECC_DEBUG */
if (get_config() & CONFIG_DB) {
plinesize = 32;
/* primary cacheline size is four double-words, which will be
* the resolution of the cache error exception.
*/
startaddr = (__psunsigned_t)loc & ~0x1f;
paddr = PHYS_TO_K0(edp->e_paddr & ~0x1f);
/* This following statement sets the correct initial bit
* position in the doubleword mask.
*/
dblwrd_mask = (1 << (((__psunsigned_t)loc & 0x60) >> 3));
} else {
plinesize = 16;
/* primary cacheline size is two double-words, which will be
* the resolution of the cache error exception.
*/
startaddr = (__psunsigned_t)loc & ~0x0f;
paddr = PHYS_TO_K0(edp->e_paddr & ~0x0f);
/* This following statement sets the correct initial bit
* position in the doubleword mask.
*/
dblwrd_mask = (1 << (((__psunsigned_t)loc & 0x70) >> 3));
}
/* We scan that portion of the secondary cacheline which caused the
* cache error exception. This corresponds to a primary cacheline
* which is 16 bytes (two doublewords) on the IP19. Either
* doubleword may contain an error.
*/
for (addr = startaddr;addr < startaddr+plinesize;addr += 8,paddr +=8) {
tags[TAGLO_IDX] = tags[TAGHI_IDX] = 0;
alt_err_info.eidata_lo = *(uint *)addr;
alt_err_info.eidata_hi = *(uint *)(addr+BYTESPERWD);
_c_ilt_n_ecc(CACH_SD, paddr, tags, &data_ecc);
alt_err_info.cbits_in = data_ecc;
lo_syn = calc_err_info(DATA_CBITS, &alt_err_info);
if (lo_syn) {
#ifdef ECC_DEBUG
eccdebug_loc[eccdebug_foundone] = (__psunsigned_t)loc;
eccdebug_cnt[eccdebug_foundone] = eccdebug_entry_cnt;
eccdebug_badaddr[eccdebug_foundone] = addr;
eccdebug_syndrome[eccdebug_foundone] = lo_syn;
eccdebug_datalo[eccdebug_foundone] = alt_err_info.eidata_lo;
eccdebug_datahi[eccdebug_foundone] = alt_err_info.eidata_hi;
eccdebug_ecc[eccdebug_foundone] = alt_err_info.cbits_in;
eccdebug_foundone++;
if (eccdebug_foundone == 128)
eccdebug_foundone = 0;
#endif /* ECC_DEBUG */
switch (alt_err_info.syn_info.type) {
case DB: /* 1-bit err in data */
case CB:
if (!mbe) {
edp->e_s_taglo = tags[TAGLO_IDX];
edp->e_prevbadecc = edp->e_badecc;
edp->e_badecc = data_ecc;
edp->e_syndrome = lo_syn;
edp->e_lo_badval = alt_err_info.eidata_lo;
edp->e_hi_badval = alt_err_info.eidata_hi;
edp->e_paddr = K0_TO_PHYS(paddr);
}
edp->e_sbe_dblwrds |= dblwrd_mask;
foundone++;
break;
case B2: /* error is 2-bit or greater */
case B3:
case B4:
case UN:
default:
edp->e_s_taglo = tags[TAGLO_IDX];
edp->e_prevbadecc = edp->e_badecc;
edp->e_badecc = data_ecc;
edp->e_syndrome = lo_syn;
edp->e_lo_badval = alt_err_info.eidata_lo;
edp->e_hi_badval = alt_err_info.eidata_hi;
edp->e_paddr = K0_TO_PHYS(paddr);
edp->e_mbe_dblwrds |= dblwrd_mask;
mbe++;
} /* switch */
}
dblwrd_mask = dblwrd_mask << 1;
}
if (mbe)
return(-1);
else
return(foundone);
}
#endif /* IP19 */
/* fix data error in 'loc' cache. 'index' indicates the frame of
* variables being used during this invokation of ecc_handler. */
#if IP19
real_ecc_fixcdata(uint loc, int index, k_machreg_t *eccfp,
volatile ecc_info_t *ecc_info_param)
#else
ecc_fixcdata(uint loc, int index, k_machreg_t *eccfp)
#endif
{
err_desc_t *edp = (err_desc_t *)&(ECC_INFO(desc[index]));
uint tags[NUM_TAGS];
__psunsigned_t p_caddr = PHYS_TO_K0(edp->e_vaddr);
uint pidx_test, pidx_max;
error_info_t err_info;
#ifndef IP19
uint data_syndrome, data_ecc;
__psunsigned_t s_caddr = PHYS_TO_K0(SCACHE_PADDR(edp));
volatile uint *p_cptr = (volatile uint *)p_caddr;
eccdesc_t d_syn_info;
#endif
__psunsigned_t prim_addr;
volatile uint *datap=0;
__psunsigned_t local_ecc_kvaddr;
#ifdef R4000PC
/*
* If we are on an R4000PC or R4600, we only have primary
* caches, and we only have parity, so the best we can
* do is to invalidate the line, if it is clean. Otherwise,
* we give up.
*
* However, we have higher level routines which can recover
* from some data errors. Since neither the R4000 Rev. 2.2
* nor the R4600 Rev. 1.7 return the correct ECC check bits
* for the data cache, we cannot simply look at the check bits
* to find bad words.
*/
if ((r4000_config & CONFIG_SC) != 0) { /* 0 == scache present */
#ifdef _MEM_PARITY_WAR
if (ecc_fixup_caches(loc, edp->e_paddr, edp->e_vaddr,
edp->e_flags & E_PADDR_MC))
return(0);
else
#endif
return(-1);
}
#endif /* R4000PC */
/* Currently the R4K does not return the ecc byte-checkbits
* for the double-word of data at the specified address during
* the index load tag cacheop. The desired algorithm for
* dealing with primary cache data errors would basically
* panic if the line was dirty (since there is only parity,
* so we can't fix it), and invalidate the tag and refetch
* if it was clean (obviously always the case if the error
* was in the I-cache). However, we must avoid infinite-ECC-
* exception-loops which would occur when the error was due
* to a stuck bit, for example, and wasn't fixed during the
* refetch. To do this we must either save a 'sufficient'
* amount of history--whatever that amount might be--or be
* able to check whether the refetch fixed the error. With
* this bug we can't do the latter, and the former is a
* rather unpalatable alternative, so we'll just panic for now,
* but only after determining that the paddrs match (otherwise
* it is probably either a) a 'phantom' exception caused
* when the bad line was replaced by a new one: the exception
* still occurs but the error is no longer in the line, or
* b) a manifestation of the R4K bug (fixed in 2.0) in which
* the vidx (and apparently the sidx also) info in the
* cacherr was bogus when a parity error was detected in
* the primary).
*/
if ((loc == CACH_PI) || (loc == CACH_PD)) {
/* Try every possible PIDX */
#if IP19
pidx_max = ((loc == CACH_PI) ?
ecc_info_param->ecc_picache_size :
ecc_info_param->ecc_pdcache_size) - NBPP;
#else
pidx_max = ((loc == CACH_PI) ? picache_size : pdcache_size) - NBPP;
#endif
for (pidx_test = 0; pidx_test <= pidx_max; pidx_test += NBPP) {
edp->e_vaddr = pidx_test | (edp->e_vaddr & (NBPP-1));
p_caddr = PHYS_TO_K0(edp->e_vaddr);
_c_ilt(loc, p_caddr, tags);
prim_addr = ((tags[TAGLO_IDX] & PADDRMASK) << PADDR_SHIFT);
/* just check that the bits from taglo match the physaddr */
if (prim_addr == (POFFSET_PADDR(edp))) {
ecc_assign_msg(ECC_PANIC_MSG, index, ecc_p_data_err);
return(1);
}
}
/* if get to here, then no error found!!?? */
ecc_assign_msg(ECC_ERROR_MSG, index, ecc_no_pdataerr);
ecc_log_error(NO_ERROR, index);
return(0);
}
/* error is in CACH_SI or CACH_SD */
tags[TAGLO_IDX] = tags[TAGHI_IDX] = 0;
/* do cached read to get values of dbl-words. This would
* cause another ecc exception but we have the SR_DE bit set. */
#ifndef IP19
err_info.eidata_lo = *p_cptr;
err_info.eidata_hi = *(p_cptr+1);
/* but fetch tag by 2ndary (physical) addr */
_c_ilt_n_ecc(loc, s_caddr, tags, &data_ecc);
edp->e_s_taglo = tags[TAGLO_IDX];
edp->e_prevbadecc = edp->e_badecc;
edp->e_badecc = data_ecc;
err_info.cbits_in = (unchar)data_ecc;
data_syndrome = calc_err_info(DATA_CBITS, &err_info);
edp->e_syndrome = data_syndrome;
edp->e_lo_badval = err_info.eidata_lo;
edp->e_hi_badval = err_info.eidata_hi;
d_syn_info = err_info.syn_info;
edp->e_sbe_dblwrds = edp->e_mbe_dblwrds = 0;
if (!data_syndrome) {
/* no error in this dbl word */
ecc_assign_msg(ECC_ERROR_MSG, index, ecc_no_sdataerr);
ecc_log_error(NO_ERROR, index);
return(0);
}
/* If the line is clean we don't need to protect the data:
* invalidate, refill, and check it again. Must invalidate
* all the primary lines it maps too.
*/
if (loc==CACH_SI || (loc==CACH_SD && CLEAN_S_TAG(edp->e_s_taglo))) {
if (!_c_hinv(loc,s_caddr)) { /* missed 2ndary! */
ecc_assign_msg(ECC_ERROR_MSG, index,
ecc_ft_hinv_m_sc);
return(1);
}
err_info.eidata_lo = *p_cptr;
err_info.eidata_hi = *(p_cptr+1);
_c_ilt_n_ecc(loc, s_caddr, tags, &data_ecc);
err_info.cbits_in = data_ecc;
data_syndrome = calc_err_info(DATA_CBITS, &err_info);
if (data_syndrome) { /* didn't fix it: panic */
ecc_assign_msg(ECC_PANIC_MSG, index, ecc_sdcfix_failed);
edp->e_prevbadecc = data_ecc;
edp->e_2nd_syn = data_syndrome;
#ifdef DEBUG_ECC
f_staglo = tags[TAGLO_IDX];
_c_ilt(CACH_PD, p_caddr, tags);
f_ptaglo = tags[TAGLO_IDX];
f_loval = err_info.eidata_lo;
f_hival = err_info.eidata_hi;
f_p_caddr = p_caddr;
f_s_caddr = s_caddr;
#endif
return(1);
} else {
ecc_assign_msg(ECC_INFO_MSG, index, ecc_sdcfix_good);
return(0);
}
} else { /* dirty line: can't invalidate line and refetch */
/* Now the severity of the error becomes relevant.
* If it is a one bit error we can force the line
* out to memory through the RMI--which corrects
* single-bit errors--then read it back and check
* if it is now correct. Panic if not--probably
* a stuck bit.
*/
ASSERT(loc != CACH_SI);
switch(d_syn_info.type) {
case 0: /* no error found */
case DB: /* 1-bit err in data: write out then refetch */
case CB:
break;
case B2: /* error is 2-bit or greater: can't fix it */
case B3:
case B4:
case UN:
default:
eccfp[ECCF_PADDR] = edp->e_paddr;
ecc_assign_msg(ECC_PANIC_MSG, index,
ecc_md_sddfix_failed);
return(1);
} /* switch */
_c_hwbinv(CACH_SD, s_caddr);
/* now refetch the info and ensure that it is fixed */
err_info.eidata_lo = *p_cptr;
err_info.eidata_hi = *(p_cptr+1);
_c_ilt_n_ecc(loc, s_caddr, tags, &data_ecc);
err_info.cbits_in = data_ecc;
data_syndrome = calc_err_info(DATA_CBITS, &err_info);
if (data_syndrome) { /* didn't fix it: panic */
ecc_assign_msg(ECC_PANIC_MSG, index, ecc_sddfix_failed);
edp->e_prevbadecc = data_ecc;
edp->e_2nd_syn = data_syndrome;
#ifdef DEBUG_ECC
f_staglo = tags[TAGLO_IDX];
_c_ilt(CACH_PD, p_caddr, tags);
f_ptaglo = tags[TAGLO_IDX];
f_loval = err_info.eidata_lo;
f_hival = err_info.eidata_hi;
f_p_caddr = p_caddr;
f_s_caddr = s_caddr;
#endif
return(1);
} else {
ecc_assign_msg(ECC_INFO_MSG, index, ecc_sddfix_good);
return(0);
}
} /* else dirty line */
#else /* IP19 */
/* Currently code assumes that the primary-icache and primary-dcache
* linesizes are the same. This is used to determine the number
* of doublewords which must be read from the secondary in order
* to check ECC values. All IP19 systems currently have a
* primary cache linesize of 16 bytes (IB == DB == 0) so just
* verify this assumption in case it changes.
*/
if (get_config() & CONFIG_IB) {
if (!(get_config() & CONFIG_DB)) {
ecc_assign_msg(ECC_PANIC_MSG, index, ecc_mixed_psize);
return(1);
}
} else {
if (get_config() & CONFIG_DB) {
ecc_assign_msg(ECC_PANIC_MSG, index, ecc_mixed_psize);
return(1);
}
}
/* The following code allows us to pickup the virtual address using
* an uncached load so as not to interfere with the state of the
* cache while we're examining the cause of the error.
*/
#if 0
local_ecc_kvaddr = *(__psunsigned_t *)((K0_TO_K1(&ecc_kvaddr_vcecolor)));
#endif
local_ecc_kvaddr = ecc_info_param->ecc_vcecolor;
if (local_ecc_kvaddr) {
int vcecolor=0;
char *vceaddr=0;
pde_t pde;
extern uint ecc_tlbdropin(unsigned char *, caddr_t, pte_t);
vcecolor = (edp->e_s_taglo & STAG_VINDEX)<<STAG_VIND_SHIFT;
vceaddr = (char*)(local_ecc_kvaddr + vcecolor);
pde.pgi = mkpde(PG_VR|PG_M|PG_G|PG_SV|pte_cachebits(),
btoct(edp->e_paddr));
ecc_tlbdropin(0, vceaddr, pde.pte);
datap = (uint *)(((__psunsigned_t)vceaddr & ~POFFMASK)
+poff(edp->e_paddr));
err_info.eidata_lo = *datap;
err_info.eidata_hi = *(datap+1);
} else {
ecc_assign_msg(ECC_PANIC_MSG, index, ecc_scerr_too_early);
return(1);
}
/* but fetch tag by 2ndary (physical) addr */
edp->e_sbe_dblwrds = edp->e_mbe_dblwrds = 0;
if (ecc_check_correctable(datap,edp,ecc_info_param)==0) {
/* no error in this dbl word */
ecc_assign_msg(ECC_INFO_MSG, index, ecc_no_sdataerr);
ecc_log_error(NO_ERROR, index);
return(0);
}
/* If the line is clean we don't need to protect the data:
* invalidate, refill, and check it again. Must invalidate
* all the primary lines it maps too.
*
* NOTE: If the cache error is reported due to an external request
* (i.e. ES is set), then we can actually have loc == CACH_SI but
* the actual problem might be in some other cacheline which is
* "dirty" (that other cacheline address is indicated by s_taglo).
* So checking for loc == CACH_SI is not sufficient unless we
* qualify it with ES==0.
*
* It's better to replace this check with a "simple" check for
* CLEAN_S_TAG().
*/
if (CLEAN_S_TAG(edp->e_s_taglo)) {
if (!_c_hinv(loc,(__psunsigned_t)datap)) {
/* missed 2ndary! */
/* We can miss here IFF another cpu (or I/O) has
* referenced this line after we performed the
* ecc check which explicitly loaded data from this
* scacheline.
* So we check that the line is currently invalid,
* then we reload and check the ECC to make sure
* that a multiple-bit error did not occur.
*/
_c_ilt(loc, (__psunsigned_t)datap, tags);
if ((tags[TAGLO_IDX] & SSTATEMASK) == SINVALID) {
if (ecc_check_correctable(datap,edp,ecc_info_param)==0) {
ecc_assign_msg(ECC_INFO_MSG, index,
ecc_sinvalid_noerr);
ecc_log_error(NO_ERROR, index);
return(0);
}
ecc_assign_msg(ECC_ERROR_MSG, index,
ecc_sinvalid_err);
return(1);
} else {
ecc_assign_msg(ECC_ERROR_MSG, index,
ecc_ft_hinv_m_sc);
return(1);
}
}
err_info.eidata_lo = *datap;
err_info.eidata_hi = *(datap+1);
if (ecc_check_correctable(datap,edp,ecc_info_param) !=0 ) {
/* didn't fix it: panic */
ecc_assign_msg(ECC_PANIC_MSG, index, ecc_sdcfix_failed);
#ifdef DEBUG_ECC
f_staglo = tags[TAGLO_IDX];
_c_ilt(CACH_PD, p_caddr, tags);
f_ptaglo = tags[TAGLO_IDX];
f_loval = err_info.eidata_lo;
f_hival = err_info.eidata_hi;
f_p_caddr = p_caddr;
f_s_caddr = s_caddr;
#endif
return(1);
} else {
ecc_assign_msg(ECC_INFO_MSG, index, ecc_sdcfix_good);
return(0);
}
} else { /* dirty line: can't invalidate line and refetch */
/* Now the severity of the error becomes relevant.
* If it is a one bit error we can force the line
* out to memory through the CC --which corrects
* single-bit errors--then read it back and check
* if it is now correct. Panic if not--probably
* a stuck bit.
*/
if (ecc_check_correctable(datap,edp,ecc_info_param) < 0) {
eccfp[ECCF_PADDR] = edp->e_paddr;
ecc_assign_msg(ECC_PANIC_MSG, index,
ecc_md_sddfix_failed);
return(1);
}
_c_hwbinv(CACH_SD, (__psunsigned_t)datap);
/* now refetch the info and ensure that it is fixed */
err_info.eidata_lo = *datap;
err_info.eidata_hi = *(datap+1);
if (ecc_check_correctable(datap,edp,ecc_info_param) != 0) { /* didn't fix it: panic */
ecc_assign_msg(ECC_PANIC_MSG, index, ecc_sddfix_failed);
#ifdef DEBUG_ECC
f_staglo = tags[TAGLO_IDX];
_c_ilt(CACH_PD, p_caddr, tags);
f_ptaglo = tags[TAGLO_IDX];
f_loval = err_info.eidata_lo;
f_hival = err_info.eidata_hi;
f_p_caddr = p_caddr;
f_s_caddr = s_caddr;
#endif
return(1);
} else {
ecc_assign_msg(ECC_INFO_MSG, index, ecc_sddfix_good);
return(0);
}
} /* else dirty line */
#endif /* IP19 */
} /* ecc_fixcdata */
#if defined(_MEM_PARITY_WAR) || IP20 || IP22
pfn_t
allocate_ecc_info(pfn_t fpage)
{
/*
* Allocate stack and log area for cache error exception handler
* in dedicated uncached pages.
*/
#ifdef _MEM_PARITY_WAR
bzero((void *)PHYS_TO_K1(ECCF_ADDR(0)),ECCF_SIZE);
CACHE_ERR_STACK_BASE_P = PHYS_TO_K1((ctob(fpage)+CACHE_ERR_STACK_SIZE));
fpage += btoc(CACHE_ERR_STACK_SIZE);
CACHE_ERR_ECCINFO_P = PHYS_TO_K1(ctob(fpage));
fpage += btoc((sizeof(ecc_info) +
perr_mem_init(((caddr_t) CACHE_ERR_ECCINFO_P) +
sizeof(ecc_info))));
init_ecc_info();
#else /* _MEM_PARITY_WAR */
fpage += btoc(perr_mem_init((caddr_t) (PHYS_TO_K1(ctob(fpage)))));
#endif /* _MEM_PARITY_WAR */
return(fpage);
}
#endif /* _MEM_PARITY_WAR */
#ifndef IP19
static void
init_ecc_info(void)
{
#ifdef _MEM_PARITY_WAR
msg_addrs[ECC_PANIC_MSG] = (volatile char **)&ecc_info.ecc_panic_msg[0];
msg_addrs[ECC_INFO_MSG] = (volatile char **)&ecc_info.ecc_info_msg[0];
msg_addrs[ECC_ERROR_MSG] = (volatile char **)&ecc_info.ecc_error_msg[0];
#endif /* _MEM_PARITY_WAR */
/* doing a bzero will cause both ecc_handler and
* ecc_cleanup/ecc_panic to skip the 0th slot 1st
* time around the circular buffer. who cares. */
bzero((void *)&ecc_info, sizeof(ecc_info));
#ifndef _MEM_PARITY_WAR
ecc_info.eframep = CACHE_ERR_EFRAME;
ecc_info.eccframep = CACHE_ERR_ECCFRAME;
#endif /* _MEM_PARITY_WAR */
ecc_info_initialized = 1;
#ifdef R4000PC
r4000_config = get_r4k_config();
#endif /* R4000PC */
} /* init_ecc_info */
#endif /* !IP19 */
#ifdef IP19
static
real_ecc_assign_msg(
int msg_type,
int index,
char msg,
volatile ecc_info_t *ecc_info_param)
{
switch(msg_type) {
case ECC_PANIC_MSG:
ecc_info_param->ecc_panic_msg[index] = msg;
break;
case ECC_INFO_MSG:
ecc_info_param->ecc_info_msg[index] = msg;
break;
case ECC_ERROR_MSG:
ecc_info_param->ecc_error_msg[index] = msg;
break;
default:
;
}
MARK_FOR_CLEANUP; /* msg queued ==> need ecc_cleanup */
return(0);
} /* ecc_assign_msg */
static int
real_ecc_print_msg(
int msg_type, /* message-type to print; -1 for all msgs at 'index' */
uint index, /* message-array index to print */
int clear_it, /* if panic'ing don't clear msg--can use symmon */
int disp_hdr, /* if non-zero print message-type before msg */
uint cpu, /* cpuid of failing cpu */
volatile ecc_info_t *ecc_info_param)
{
char *ppc;
char *nameptr, ppindex;
pfunc pptr = (pm_use_qprintf ? (pfunc)qprintf : printf);
int i;
switch (msg_type) {
case ECC_ALL_MSGS:
for (i = ECC_PANIC_MSG; i <= ECC_ERROR_MSG; i++)
real_ecc_print_msg(i,index,clear_it,disp_hdr,cpu,ecc_info_param);
return(0);
case ECC_PANIC_MSG:
case ECC_INFO_MSG:
case ECC_ERROR_MSG:
break;
default:
return(-1);
}
switch(msg_type) {
case ECC_PANIC_MSG:
ppindex = ecc_info_param->ecc_panic_msg[index];
break;
case ECC_INFO_MSG:
ppindex = ecc_info_param->ecc_info_msg[index];
break;
case ECC_ERROR_MSG:
ppindex = ecc_info_param->ecc_error_msg[index];
break;
default:
ppindex = 0;
}
switch(ppindex) {
case ecc_overrun_msg:
ppc = real_ecc_overrun_msg;
break;
case ecc_eb_not_i:
ppc = real_ecc_eb_not_i;
break;
case ecc_incons_err:
ppc = real_ecc_incons_err;
break;
case ecc_ew_err:
ppc = real_ecc_ew_err;
break;
case ecc_kernel_err:
ppc = real_ecc_kernel_err;
break;
case ecc_user_err:
ppc = real_ecc_user_err;
break;
case ecc_inval_loc:
ppc = real_ecc_inval_loc;
break;
case ecc_no_ptagerr:
ppc = real_ecc_no_ptagerr;
break;
case ecc_no_stagerr:
ppc = real_ecc_no_stagerr;
break;
case ecc_ptfix_failed:
ppc = real_ecc_ptfix_failed;
break;
case ecc_stfix_failed:
ppc = real_ecc_stfix_failed;
break;
case ecc_no_pdataerr:
ppc = real_ecc_no_pdataerr;
break;
case ecc_no_sdataerr:
ppc = real_ecc_no_sdataerr;
break;
case ecc_sinvalid_noerr:
ppc = real_ecc_sinvalid_noerr;
break;
case ecc_sinvalid_err:
ppc = real_ecc_sinvalid_err;
break;
case ecc_sdcfix_failed:
ppc = real_ecc_sdcfix_failed;
break;
case ecc_sdcfix_good:
ppc = real_ecc_sdcfix_good;
break;
case ecc_sddfix_failed:
ppc = real_ecc_sddfix_failed;
break;
case ecc_sddfix_good:
ppc = real_ecc_sddfix_good;
break;
case ecc_md_sddfix_failed:
ppc = real_ecc_md_sddfix_failed;
break;
case ecc_p_data_err:
ppc = real_ecc_p_data_err;
break;
case ecc_inval_eloc:
ppc = real_ecc_inval_eloc;
break;
case ecc_bad_s_tag:
ppc = real_ecc_bad_s_tag;
break;
case ecc_ft_hinv_m_sc:
ppc = real_ecc_ft_hinv_m_sc;
break;
case ecc_scerr_too_early:
ppc = real_ecc_scerr_too_early;
break;
case ecc_ei_notdirty:
ppc = real_ecc_ei_notdirty;
break;
case ecc_mixed_psize:
ppc = real_ecc_mixed_psize;
break;
case ecc_ei_norecover:
ppc = real_ecc_ei_norecover;
break;
case ecc_possible_ei:
ppc = real_ecc_possible_ei;
break;
default:
ppc = NULL;
}
nameptr = (char *)msg_strs[msg_type];
if (ppc) {
if (maxcpus > 1)
pptr("CPU %d: ",cpu);
pptr(" %s %s\n",(disp_hdr?nameptr : " "),ppc);
if (clear_it) {
switch(msg_type) {
case ECC_PANIC_MSG:
ecc_info_param->ecc_panic_msg[index] = 0;
break;
case ECC_INFO_MSG:
ecc_info_param->ecc_info_msg[index] = 0;
break;
case ECC_ERROR_MSG:
ecc_info_param->ecc_error_msg[index] = 0;
break;
default:
;
}
}
}
return(0);
} /* ecc_print_msg */
#else /* !IP19 */
static
ecc_assign_msg(
int msg_type,
int index,
char *msg)
{
msg_addrs[msg_type][index] = msg;
MARK_FOR_CLEANUP; /* msg queued ==> need ecc_cleanup */
return(0);
} /* ecc_assign_msg */
static int
ecc_print_msg(
int msg_type, /* message-type to print; -1 for all msgs at 'index' */
uint index, /* message-array index to print */
int clear_it, /* if panic'ing don't clear msg--can use symmon */
int disp_hdr, /* if non-zero print message-type before msg */
uint cpu) /* cpuid of failing cpu */
{
char **ppc;
char *nameptr;
pfunc pptr = (pm_use_qprintf ? (pfunc)qprintf : printf);
int i;
switch (msg_type) {
case ECC_ALL_MSGS:
for (i = ECC_PANIC_MSG; i <= ECC_ERROR_MSG; i++)
ecc_print_msg(i,index,clear_it,disp_hdr,cpu);
return(0);
case ECC_PANIC_MSG:
case ECC_INFO_MSG:
case ECC_ERROR_MSG:
break;
default:
return(-1);
}
ppc = (char **)msg_addrs[msg_type];
nameptr = (char *)msg_strs[msg_type];
if (ppc[index]) {
#if MP
if (maxcpus > 1)
pptr("CPU %d: ",cpu);
#endif
pptr(" %s %s\n",(disp_hdr?nameptr : " "),ppc[index]);
if (clear_it)
ppc[index] = NULL;
}
return(0);
} /* ecc_print_msg */
#endif /* !IP19 */
#define PTAG_PARITY_BIT 0x1 /* ptaglo parity bit is #0 */
#define PTAG_1ST_DATA_BIT 6 /* low 6 bits are undefined + parity */
#define PTAG_PTAGLO_BITS 24
#define PTAG_PSTATE_BITS 2
#define PTAG_DATA_BITS (PTAG_PTAGLO_BITS+PTAG_PTAG_PSTATE_BITS)
/* ecc_bad_ptag(taglo): Determine if the ecc/parity in 'taglo' is
* correct. Calculate the even parity for the 26-bit field (5 undefined
* bits plus low bit == parity bit). Return 0 if parity is correct, else 1.
*/
static
ecc_bad_ptag(uint taglo)
{
uint bit = (0x1 << PTAG_1ST_DATA_BIT);
int numsetbits = 0;
uint pbit = 0;
int i;
for (i = PTAG_1ST_DATA_BIT; i < BITSPERWORD; bit <<= 1, i++) {
if (taglo & bit)
numsetbits++;
}
if (numsetbits & 0x1) /* odd # of bits; set p to even it */
pbit = 1;
if ((taglo & 0x1) == pbit)
return(0); /* computed parity matches ptaglo's */
else
return(1);
} /* ecc_bad_ptag */
#if IP19
static
real_ecc_log_error(int where, int index, volatile ecc_info_t *ecc_info_param)
#else
volatile int inval_eloc = 0;
static
ecc_log_error(int where, int index)
#endif
{
if (where < 0 || where >= ECC_ERR_TYPES) {
ecc_assign_msg(ECC_ERROR_MSG, index, ecc_inval_eloc);
#if IP19
/* avoid global references which generate cached accesses */
ecc_info_param->ecc_inval_eloc_where = where;
#else
inval_eloc = where;
#endif
return(1);
}
ECC_INFO(ecc_err_cnts)[where]++;
return(0);
} /* ecc_log_error */
/* First set of trees and structs are for computing the 8 checkbits
* that accompany each set of double-words in memory and secondary
* cache: i.e. the data trees */
#define ECC8B_DTREE7H 0xff280ff0
#define ECC8B_DTREE7L 0x88880928
#define ECC8B_DTREE6H 0xfa24000f
#define ECC8B_DTREE6L 0x4444ff24
#define ECC8B_DTREE5H 0x0b22ff00
#define ECC8B_DTREE5L 0x2222fa32
#define ECC8B_DTREE4H 0x0931f0ff
#define ECC8B_DTREE4L 0x11110b21
#define ECC8B_DTREE3H 0x84d08888
#define ECC8B_DTREE3L 0xff0f8c50
#define ECC8B_DTREE2H 0x4c9f4444
#define ECC8B_DTREE2L 0x00ff44d0
#define ECC8B_DTREE1H 0x24ff2222
#define ECC8B_DTREE1L 0xf000249f
#define ECC8B_DTREE0H 0x14501111
#define ECC8B_DTREE0L 0x0ff014ff
struct d_emask {
uint d_emaskhi;
uint d_emasklo;
};
struct d_emask d_ptrees[] = {
{ ECC8B_DTREE0H, ECC8B_DTREE0L },
{ ECC8B_DTREE1H, ECC8B_DTREE1L },
{ ECC8B_DTREE2H, ECC8B_DTREE2L },
{ ECC8B_DTREE3H, ECC8B_DTREE3L },
{ ECC8B_DTREE4H, ECC8B_DTREE4L },
{ ECC8B_DTREE5H, ECC8B_DTREE5L },
{ ECC8B_DTREE6H, ECC8B_DTREE6L },
{ ECC8B_DTREE7H, ECC8B_DTREE7L },
};
/* Next, the data necessary for computing the 7 checkbits
* for the 25-bit secondary cache tags: i.e. the tag trees. */
#define ECC7B_TTREE6 0x0a8f888
#define ECC7B_TTREE5 0x114ff04
#define ECC7B_TTREE4 0x2620f42
#define ECC7B_TTREE3 0x29184f0
#define ECC7B_TTREE2 0x10a40ff
#define ECC7B_TTREE1 0x245222f
#define ECC7B_TTREE0 0x1ff1111
struct t_emask {
uint t_emask;
};
struct t_emask t_ptrees[] = {
ECC7B_TTREE0,
ECC7B_TTREE1,
ECC7B_TTREE2,
ECC7B_TTREE3,
ECC7B_TTREE4,
ECC7B_TTREE5,
ECC7B_TTREE6,
};
/* 2ndary cache tags consist of 25 data bits monitored by 7 checkbits */
#define STAG_DBIT_SIZE 25
#define STAG_CBIT_SIZE 7
#define STAG_SIZE (STAG_DBIT_SIZE+STAG_CBIT_SIZE)
/* S_taglo field format:
* bitpositions--> 31..13 12..10 9..7 6..0
* fields --> < p_addr, cstate, vind, ecc >.
* Internal format:
* 31..25 24..22 21..19 18..0
* < ecc, cstate, vind, p_addr >.
* the following defines tell ecc_swap_s_tag() how to shift the fields to
* create the internal format from the s_taglo format.
*/
/* sizes of the fields */
#define S_TAG_PADDR_BITS 19
#define S_TAG_CS_BITS 3
#define S_TAG_VIND_BITS 3
#define S_TAG_ECC_CBITS 7
/* bit positions of the fields in the s_taglo format */
#define S_TAG_ECC_BITPOS 0
#define S_TAG_VIND_BITPOS (S_TAG_ECC_BITPOS+S_TAG_ECC_CBITS) /* 7 */
#define S_TAG_CS_BITPOS (S_TAG_VIND_BITPOS+S_TAG_VIND_BITS) /* 10 */
#define S_TAG_PADDR_BITPOS (S_TAG_CS_BITPOS+S_TAG_CS_BITS) /* 13 */
/* bit positions of the fields in the internal format */
#define S_INT_PADDR_BITPOS 0
#define S_INT_VIND_BITPOS (S_INT_PADDR_BITPOS+S_TAG_PADDR_BITS) /* 19 */
#define S_INT_CS_BITPOS (S_INT_VIND_BITPOS+S_TAG_VIND_BITS) /* 22 */
#define S_INT_ECC_BITPOS (S_INT_CS_BITPOS+S_TAG_CS_BITS) /* 25 */
/* masks for the four fields in the 2ndary-cache-internal format:
* < ecc, cstate, vindex, p_addr > */
#define S_INT_PADDR_MASK 0x0007ffff
#define S_INT_VIND_MASK 0x00380000
#define S_INT_CS_MASK 0x01c00000
#define S_INT_ECC_MASK 0xfe000000
/* Below macros enable easy swapping of each of the 4 2ndary tag fields.
* Note that the mask used by the conversion macros in extracting the
* bits of the field depends on the direction of the swap */
/* paddr: tag bit 13 <--> internal bit 0 */
#define SADDR_SWAP_ROLL (S_TAG_PADDR_BITPOS-S_INT_PADDR_BITPOS)/* 13 */
/* the saddr conversion rolls opposite from the other 3 fields: TagTOInternal
* rolls addr DOWN to bottom */
#define SADDR_TTOI(S_TAG) ((S_TAG & SADDRMASK) >> SADDR_SWAP_ROLL)
#define SADDR_ITOT(S_TAG) ((S_TAG & S_INT_PADDR_MASK) << SADDR_SWAP_ROLL)
/* cache state: tag bit 10 <--> internal bit 22 */
#define SSTATE_SWAP_ROLL (S_INT_CS_BITPOS-S_TAG_CS_BITPOS) /* 12 */
#define SSTATE_TTOI(S_TAG) ((S_TAG & SSTATEMASK) << SSTATE_SWAP_ROLL)
#define SSTATE_ITOT(S_TAG) ((S_TAG & S_INT_CS_MASK) >> SSTATE_SWAP_ROLL)
/* vindex: tag bit 7 <--> internal bit 19 */
#define SVIND_SWAP_ROLL (S_INT_VIND_BITPOS-S_TAG_VIND_BITPOS) /* 12 */
#define SVIND_TTOI(S_TAG) ((S_TAG & SVINDEXMASK) << SVIND_SWAP_ROLL)
#define SVIND_ITOT(S_TAG) ((S_TAG & S_INT_VIND_MASK) >> SVIND_SWAP_ROLL)
/* ecc: tag bit 0 <--> internal bit 25 */
#define SECC_SWAP_ROLL (S_INT_ECC_BITPOS-S_TAG_ECC_BITPOS) /* 25 */
#define SECC_TTOI(S_TAG) ((S_TAG & SECC_MASK) << SECC_SWAP_ROLL)
#define SECC_ITOT(S_TAG) ((S_TAG & S_INT_ECC_MASK) >> SECC_SWAP_ROLL)
/* ecc_swap_s_tag() converts between the field-ordering of the taglo
* register (holding a 2ndary cache tag) and the internal format
* actually used in the secondary caches. The conversion may be
* done in either direction. The routine requires the ctag_swap_info
* structure */
#define TAG_TO_INTERNAL 1
#define INTERNAL_TO_TAG 2
typedef struct tag_swap_info {
uint ts_in_val; /* value to be swapped */
uint ts_out_32; /* INTERNAL_TO_TAG sets 32-bit val (including cbits) */
uint ts_out_25; /* TAG_TO_INTERNAl sets 25-bit val (excluding cbits) */
uint ts_cbits; /* both directions set this field */
} tag_swap_info_t;
int ecc_swap_s_tag(uint, tag_swap_info_t *);
/* tag_dbpos is a lookup-table which translates the bit-positions of data
* errors as indicated by syndromes to their counterparts in the taglo format.
* Internally the low 19 bits contain the paddr; in taglo the paddr field
* begins at 13. The next 6 bits internally contain the vindex and state
* fields; in the tag reg these are ordered the same but begin at bit 7.
*/
uint tag_dbpos[] = {
/* paddr --> */ 13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,
/* vind & state--> */ 7, 8, 9, 10,11,12 };
/* computes relevant info about ECC errors, and returns it in an
* error_info_t struct. ecc_type is DATA_CBITS or TAG_CBITS and
* determines whether calc_err_info will compute the 8-bit checkbit
* and syndrome of two data-words, or the 7-bit info for a 25-bit
* second-level tag.
*/
#ifdef IP19
real_calc_err_info(int ecc_type, error_info_t *e_infop,
volatile ecc_info_t *ecc_info_param)
#else
calc_err_info(int ecc_type, error_info_t *e_infop)
#endif
{
uint shi, slo;
uint true_val = 0;
register int pbithi, pbitlo, pbit;
register int i;
register int j;
struct d_emask *dep;
struct t_emask *tep;
unchar cbits = 0;
uint lo_in, hi_in;
tag_swap_info_t swap_info;
if (ecc_type == DATA_CBITS) {
/* XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX */
lo_in = e_infop->eidata_hi;
hi_in = e_infop->eidata_lo;
/* XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX */
#if IP19
dep = &ecc_info_param->ecc_d_ptrees[0];
#else
dep = &d_ptrees[0];
#endif
for (i = 0; i < 8; i++, dep++) {
shi = hi_in & dep->d_emaskhi;
slo = lo_in & dep->d_emasklo;
pbithi = 0;
pbitlo = 0;
for (j = 0; j < 32; j++) {
if (shi & (1 << j))
pbithi++;
if (slo & (1 << j))
pbitlo++;
}
if ((pbithi + pbitlo) & 1)
cbits |= 1 << i;
}
e_infop->cbits_out = cbits;
e_infop->syndrome = e_infop->cbits_in ^ cbits;
e_infop->syn_info = data_eccsyns[(int)e_infop->syndrome];
return(e_infop->syndrome);
} else if (ecc_type == TAG_CBITS) {
/* Internally the R4k stores the fields comprising
* secondary tags differently than the format it
* uses for s_ptaglo. The cache format is:
* <ECC, CS, Vidx, Stag>; the STaglo format is:
* <STag, CS, Vidx, ECC>. The ECC is computed and
* checked with the fields arranged as they are
* internally; therefore we must swap them before
* computing. TAG_TO_INTERNAL sets ts_out_25 to
* the 25-bit data value and ts_cbits to the 7-bit
* ecc field from the tag.
*/
swap_info.ts_in_val = e_infop->eis_taglo;
ecc_swap_s_tag(TAG_TO_INTERNAL, &swap_info);
true_val = swap_info.ts_out_25;
#ifdef ECC_DEBUG
printf("after tag swap, value 0x%x, cbits 0x%x\n",true_val,
swap_info.ts_cbits);
#endif
/* if caller set high bit (never valid in 7-bit cbit field),
* use the cbits in the taglo as cbits_in for xor; else
* it holds the cbits for the bad tag */
if (SET_CBITS_IN & e_infop->cbits_in)
e_infop->cbits_in = (unchar)swap_info.ts_cbits;
#if IP19
tep = &ecc_info_param->ecc_t_ptrees[0];
#else
tep = &t_ptrees[0];
#endif
for (i = 0; i < 7; i++, tep++) {
shi = true_val & tep->t_emask;
pbit = 0;
for (j = 0; j < 25; j++) {
if (shi & (1 << j))
pbit++;
}
if (pbit & 1)
cbits |= 1 << i;
}
e_infop->cbits_out = cbits;
e_infop->syndrome = e_infop->cbits_in ^ cbits;
e_infop->syn_info = tag_eccsyns[(int)e_infop->syndrome];
return(e_infop->syndrome);
} else
return(0x80000000);
} /* calc_err_info */
/* the format of the taglo register when it contains a 2ndary tag is
* <paddr, state, vindex, ecc>. Internally the fields are ordered
* <ecc, state, vindex, paddr>, and the checkbits are generated and
* checked with the fields ordered in the internal format.
* ecc_swap_s_tag() converts the fields into either format.
*/
ecc_swap_s_tag(
uint which_way,
tag_swap_info_t *swap_infop)
{
uint swapped_val;
uint in_val = swap_infop->ts_in_val;
switch (which_way) {
case TAG_TO_INTERNAL: /* swap, then set 25-bit value and cbits */
swapped_val = SADDR_TTOI(in_val);
swapped_val |= SSTATE_TTOI(in_val);
swapped_val |= SVIND_TTOI(in_val);
swap_infop->ts_out_25 = swapped_val;
swap_infop->ts_out_32 = (swapped_val | SECC_TTOI(in_val));
/* low 7 bits of TagLo reg are checkbits */
swap_infop->ts_cbits = (in_val & SECC_MASK);
return(0);
case INTERNAL_TO_TAG: /* set entire 32-bits plus cbits */
swapped_val = SADDR_ITOT(in_val);
swapped_val |= SSTATE_ITOT(in_val);
swapped_val |= SVIND_ITOT(in_val);
swapped_val |= SECC_ITOT(in_val);
swap_infop->ts_out_32 = swapped_val;
/* high 7 bits of internal format are checkbits */
swap_infop->ts_cbits = SECC_ITOT(in_val);
return(0);
default:
printf("ecc_swap_s_tag: illegal direction (%d)\n",which_way);
return(-1);
}
} /* ecc_swap_s_tag */
/* given a single-bit error type (CB or DB) and the bit position of that
* error in the R4K's internal format (i.e. as it is stored in the 2ndary
* cache), xlate_bit returns the bitposition of its counterpart in the
* taglo format. */
#if IP19
real_xlate_bit(enum error_type etype, uint bitpos,
volatile ecc_info_t *ecc_info_param)
#else
xlate_bit(enum error_type etype, uint bitpos)
#endif
{
/* ecc field is 6..0 in taglo; the syndrome differentiates between
* data and checkbit errors, (numbering them 0..24 and 0..6 resp.)
* so no translation is necessary for cbit errors. */
if (etype == CB) {
ASSERT(bitpos < STAG_CBIT_SIZE);
return(bitpos);
} else {
ASSERT(bitpos < STAG_DBIT_SIZE);
#if IP19
return(ecc_info_param->ecc_tag_dbpos[bitpos]);
#else
return(tag_dbpos[bitpos]);
#endif
}
} /* xlate_bit */
#ifdef IP19
void
ip19_init_ecc_info( __psunsigned_t vceaddr )
{
ecc_info_ptr.ecc_vcecolor = vceaddr;
/* these fields are only necessary because the compiler tends to
* generate the constants needed and place them in a globally
* addressed location (either K0 or off of "gp"), so loading the
* constants involves cached accesses. So we perform the
* conversions once and just load the (uncached) pointer from
* the ecc_info array which is accessed uncached too.
*/
ecc_info_ptr.everror_ext = EVERROR_EXT;
/* Following global structures need to be reference uncached by
* ecc_handler and friends.
*/
ecc_info_ptr.ecc_tag_dbpos = (uint *)K0_TO_K1(&tag_dbpos);
ecc_info_ptr.ecc_d_ptrees = (struct d_emask *)K0_TO_K1(&d_ptrees);
ecc_info_ptr.ecc_t_ptrees = (struct t_emask *)K0_TO_K1(&t_ptrees);
ecc_info_ptr.ecc_data_eccsyns = (eccdesc_t*)K0_TO_K1(&real_data_eccsyns);
ecc_info_ptr.ecc_tag_eccsyns = (eccdesc_t*)K0_TO_K1(&real_tag_eccsyns);
ecc_info_ptr.ecc_k0size_less1 = K0SIZE-1;
ecc_info_ptr.ecc_physmem = physmem;
ecc_info_ptr.ecc_picache_size = picache_size;
ecc_info_ptr.ecc_pdcache_size = pdcache_size;
ecc_info_ptr.ecc_attempt_recovery = 0;
#ifndef IP19_CACHEERRS_FATAL
{
extern int r4k_corrupt_scache_data;
if (r4k_corrupt_scache_data)
ecc_info_ptr.ecc_attempt_recovery = 1;
}
#endif
/* Following address should be cached for test to work properly */
ecc_info_ptr.ecc_dummyline =
((__psunsigned_t)(&dummy_cacheline[16]) & ~(SCACHE_LINESIZE-1));
ecc_info_ptr.ecc_info_inited = 1;
}
#endif /* IP19 */
#define NUM_CE_BITS 8
#define SIDX_VAL(x) (x & CACHERR_SIDX_MASK)
#define PIDX_VAL(x) ((x & CACHERR_PIDX_MASK) << 12)
#define CEBUFSIZ 180
/* if sindex == -1, print all frames from read ptr to write ptr;
* else just the specified frame */
void
print_ecc_info(sindex,eindex)
int sindex;
int eindex;
{
ecc_info_t *eip = (ecc_info_t *)&ecc_info_ptr;
err_desc_t *edp; /* ptr to set of variables to set this time */
__uint64_t eaddr;
int i, loc;
if (sindex == -1) {
sindex = eip->ecc_r_index;
eindex = eip->ecc_w_index;
}
if (sindex < 0) sindex = 0;
if (eindex < 0) eindex = 0;
if (sindex >= ECC_FRAMES) sindex = ECC_FRAMES-1;
if (eindex >= ECC_FRAMES) eindex = ECC_FRAMES-1;
if (eindex < sindex) eindex = sindex;
if (sindex != eindex)
qprintf("\necc_info for slots %d through %d\n",sindex,eindex);
else
qprintf("\necc_info for slot %d\n",sindex);
#ifndef _MEM_PARITY_WAR
qprintf(" efptr 0x%x eccfptr 0x%x, ",
eip->eframep, eip->eccframep);
#endif /* _MEM_PARITY_WAR */
qprintf(" w_ind %d r_ind %d clean %d c_cnt %d flags 0x%x\n",
eip->ecc_w_index, eip->ecc_r_index, eip->needs_cleanup,
eip->cleanup_cnt, eip->ecc_flags);
qprintf(" err cnts: ");
for (i = 0; i < ECC_ERR_TYPES; i++ )
qprintf("%s %d ",err_type_names[i],
ecc_info_ptr.ecc_err_cnts[i]);
qprintf("\n\n");
for (i = sindex; i <= eindex; i++) {
edp = (err_desc_t *)&(ecc_info_ptr.desc[i]);
if (!edp->e_cache_err) {
qprintf("SLOT #%d empty\n",i);
continue;
}
qprintf("SLOT #%d:\n",i);
pm_use_qprintf = 1;
#if IP19
real_ecc_print_msg(ECC_ALL_MSGS, i, 0, 1, edp->e_cpuid,
&ecc_info_ptr);
#else
ecc_print_msg(ECC_ALL_MSGS, i, 0, 1, edp->e_cpuid);
#endif
pm_use_qprintf = 0;
eaddr = edp->e_paddr;
loc = edp->e_location;
if (loc < 0 || loc > SYSAD)
loc = BAD_LOC;
qprintf(" %s (%d) %s (%d) error:\n",
error_loc_names[loc], edp->e_location,
(edp->e_tag_or_data == DATA_ERR ? "data" : "tag"),
edp->e_tag_or_data);
qprintf(" sr %R\n",edp->e_sr,
#if R4000 && R10000
IS_R10000() ? r10k_sr_desc :
#endif
sr_desc);
qprintf(" cache_err %R, epc 0x%x\n",
edp->e_cache_err, cache_err_desc, edp->e_error_epc);
qprintf(" S-taglo %R%sP-taglo %R\n", edp->e_s_taglo,
#if R4000 && R10000
IS_R10000() ? r10k_s_taglo_desc :
#endif /* R4000 && R10000 */
s_taglo_desc, (edp->e_p_taglo ? "\n " : " "),
edp->e_p_taglo, p_taglo_desc);
qprintf(" paddr %llx vaddr %x syn 0x%x user %d pid %d\n",
edp->e_paddr, edp->e_vaddr, edp->e_syndrome,
edp->e_user, (__psint_t)edp->e_pid);
#ifdef _MEM_PARITY_WAR
qprintf(" efptr 0x%x eccfptr 0x%x, ",
(__psunsigned_t)edp->e_eframep,
(__psunsigned_t)edp->e_eccframep);
#endif /* _MEM_PARITY_WAR */
if (edp->e_prevbadecc)
qprintf(" prevbadecc %x ",edp->e_prevbadecc);
if (edp->e_2nd_syn)
qprintf(" 2nd_syn %x\n",edp->e_2nd_syn);
else
qprintf("\n");
if (edp->e_tag_or_data == DATA_ERR)
qprintf(" lo_val 0x%x hi_val 0x%x badecc %x syn 0x%x\n",
edp->e_lo_badval, edp->e_hi_badval,
edp->e_badecc, edp->e_syndrome);
else if (edp->e_location==CACH_SI || edp->e_location==CACH_SD)
/* secondary tag: print ecc, syndrome and staglo */
qprintf(" S_Tag %R badecc 0x%x, syn 0x%x, addr %llx\n",
edp->e_s_taglo,
#if R4000 && R10000
IS_R10000() ? r10k_s_taglo_desc :
#endif /* R4000 && R10000 */
s_taglo_desc, edp->e_badecc,
edp->e_syndrome, edp->e_paddr);
else if (edp->e_location==CACH_PI || edp->e_location==CACH_PD)
/* primary tag: print p_taglo */
qprintf(" PTagLo %R, Vaddr 0x%x\n",
edp->e_p_taglo,p_taglo_desc,edp->e_vaddr);
if (edp->e_location == CACH_PI || edp->e_location == CACH_PD)
eaddr = edp->e_vaddr;
pm_use_qprintf = 1;
print_ecctype(edp->e_location, edp->e_tag_or_data,
edp->e_syndrome, eaddr, 1, edp->e_cpuid);
pm_use_qprintf = 0;
}
#if DEBUG_ECC
if (f_s_caddr) {
qprintf(" f_ vars:\n lov 0x%x hiv 0x%x pcad %x scad %x\n",
f_loval, f_hival, f_p_caddr, f_s_caddr);
qprintf(" P-lo %R%sS-lo %R\n",
f_ptaglo,p_taglo_desc,
(f_ptaglo ? "\n " : " "),
f_staglo,
#if R4000 && R10000
IS_R10000() ? r10k_s_taglo_desc :
#endif /* R4000 && R10000 */
s_taglo_desc);
qprintf(" cooked 0x%x, f_d_ecc 0x%x\n",f_cooked_ecc,f_d_ecc);
qprintf(" P-lo1 %R%sS-lo1 %R\n",
f_ptaglo1,p_taglo_desc,
(f_ptaglo1 ? "\n " : " "),
f_staglo1,
#if R4000 && R10000
IS_R10000() ? r10k_s_taglo_desc :
#endif /* R4000 && R10000 */
s_taglo_desc);
}
#endif /* DBEUG_ECC */
} /* print_ecc_info */
void
idbg_ecc_info(void)
{
register int i;
qprintf(" err cnts:\n ");
for (i = 0; i < ECC_ERR_TYPES; i++ )
#if IP19
qprintf("%s %d ",err_type_names[i],ecc_info_ptr.ecc_err_cnts[i]);
#else
qprintf("%s %d ",err_type_names[i],ecc_info.ecc_err_cnts[i]);
#endif
qprintf("\n\n");
}
static int
print_ecctype(
int loc,
int ecc_type,
uint syndrome,
__uint64_t eaddr,
int printerr,
uint cpu)
{
eccdesc_t syn_info, *syntab_ptr;
uint es_tsize;
pfunc pptr = (pm_use_qprintf ? (pfunc)qprintf : printf);
if (ecc_type == D_AND_T_ERR) /* ecc info will be on the tag error */
ecc_type = TAG_CBITS;
if (loc < 0 || loc > SYSAD)
loc = BAD_LOC;
if (ecc_type == TAG_CBITS) {
es_tsize = ECCSYN_TABSIZE(real_tag_eccsyns);
#if IP19
/* It's safe to use the ecc_info_ptr since this routine is
* invoked from ecc_cleanup so it's safe to perform the
* 'gp' relative accesses the compiler generates in the
* K0_TO_K1 macro expansion. Note that referencing the
* tag_eccsyns array is uncached.
*/
syntab_ptr = ecc_info_ptr.ecc_tag_eccsyns;
#else
syntab_ptr = tag_eccsyns;
#endif
} else {
es_tsize = ECCSYN_TABSIZE(real_data_eccsyns);
#if IP19
/* It's safe to use the ecc_info_ptr since this routine is
* invoked from ecc_cleanup so it's safe to perform the
* 'gp' relative accesses the compiler generates in the
* K0_TO_K1 macro expansion. Note that referencing the
* data_eccsyns array is uncached.
*/
syntab_ptr = ecc_info_ptr.ecc_data_eccsyns;
#else
syntab_ptr = data_eccsyns;
#endif
}
if (syndrome >= es_tsize) {
if (printerr) {
#if MP
if (maxcpus > 1)
pptr("CPU %d: ",cpu);
#endif
pptr("print_ecctype(): invalid %s syndrome (%d)\n",
(ecc_type == TAG_CBITS ? "tag" : "data"),es_tsize);
}
return(-1);
}
syn_info = syntab_ptr[syndrome];
#ifdef ECC_DEBUG
#if MP
if (maxcpus > 1)
pptr("CPU %d: ",cpu);
#endif
pptr("syndrome 0x%x, type 0x%x, value 0x%x\n",syndrome,
syn_info.type, syn_info.value);
#endif /* ECC_DEBUG */
#if MP
if (maxcpus > 1)
pptr("CPU %d: ",cpu);
#endif
pptr(" %s: ", error_loc_names[loc]);
switch (syn_info.type) {
case OK:
#ifdef IP19
pptr("Syndrome at addr 0x%llx normal! Error in evicted line handled by CC\n",eaddr);
#else
pptr("??!?!Syndrome at addr 0x%llx normal!\n",eaddr);
#endif
return(-2);
case UN:
case B2:
case B3:
if (ecc_type == TAG_CBITS)
pptr("%s TAG error in secondary cache at addr 0x%llx\n",
etstrings[syn_info.type],eaddr);
else
pptr("%s DATA error in doubleword at addr 0x%llx\n",
etstrings[syn_info.type],eaddr);
return(0);
case DB:
case CB:
if (ecc_type == TAG_CBITS)
pptr("One-bit (%s%d) TAG err; 2nd cache: addr 0x%llx\n",
etstrings[syn_info.type],syn_info.value,eaddr);
else
pptr("One-bit (%s%d) DATA err: dbl-word addr 0x%llx\n",
etstrings[syn_info.type],syn_info.value,eaddr);
return(0);
default:
if (printerr)
pptr("Unknown eccdesc_t type (%d)\n",syn_info.type);
return(-1);
}
} /* print_ecctype */
#endif /* R4000 */
#if (defined(R4000) && defined(_FORCE_ECC))
/* each double-word in memory has an 8-bit ECC checkbit value that
* is computed and stored with it. */
typedef struct ecc_data_word {
uint hi_word;
uint lo_word;
u_char ecc_val;
} ecc_data_word_t;
#define IN_PD 0
#define IN_PI 1
#define IN_SD 2
#define IN_SI 3
#define IN_MEM 4
volatile int force_verbose = 0;
volatile int missed_2nd = 0;
volatile uint v_orig_ecc, n_ecc, xor_ecc;
volatile uint used_sr = -1;
extern void uncached(void);
extern void setecc(int);
extern void runcached(void);
/* 'force_ecc_where' enum and 'ecc_data_word_t' typedef in sys/syssgi.h */
int
_force_ecc(inwhat, k1addr, ecc_info_param)
int inwhat; /* IN_{PD,PI,SD,SI,MEM, or IO3 (MEM via IO3)} */
__psunsigned_t k1addr; /* force ecc error at this K1SEG address */
ecc_data_word_t *ecc_info_param;
{
ecc_data_word_t new_ecc;
__psunsigned_t k0addr;
__psunsigned_t physaddr;
volatile int *k1ptr;
volatile int *k0ptr;
volatile int k0oneoff;
__psunsigned_t pmem = (physmem * NBPP);
uint tags[NUM_TAGS];
char *cptr;
uint no_ints_sr, ce_no_ints_sr, oldsr, oneoffval;
uint orig_ecc;
uint lo_val, hi_val;
k1addr &= ~(BYTESPERDBLWD-1); /* rnd down to a dbl-word boundry */
k0addr = K1_TO_K0(k1addr);
physaddr = K1_TO_PHYS(k1addr);
k1ptr = (volatile int *)k1addr;
k0ptr = (volatile int *)k0addr;
if (copyin((caddr_t)ecc_info_param,(caddr_t)&new_ecc,
sizeof(ecc_data_word_t))) {
return EFAULT;
}
if (force_verbose)
printf("What %d: k1 0x%x k0 0x%x hi 0x%x lo 0x%x cbits 0x%x\n",
inwhat,k1addr,k0addr,new_ecc.hi_word,
new_ecc.lo_word,(uint)new_ecc.ecc_val);
switch(inwhat) {
case IN_MEM: /* force ecc error in memory via cache-munge */
case IN_SD:
if (inwhat == IN_SD) {
cptr = error_loc_names[CACH_SD];
if (force_verbose)
printf(" Force %s ecc error (%d)\n",cptr,inwhat);
} else {
cptr = error_loc_names[SYSAD];
if (force_verbose)
printf(" Force %s ecc err (%d) via secondary cbit-munge\n",
cptr,inwhat);
}
if ((physaddr + private.p_scachesize) >= pmem)
k0oneoff = (k0addr - private.p_scachesize);
else
k0oneoff = (k0addr + private.p_scachesize);
oldsr = no_ints_sr = getsr();
/* disable interrupts for entire time */
no_ints_sr &= ~SR_IE;
/* next sr will allow us to 'cook' the 2ndary ecc */
ce_no_ints_sr = (no_ints_sr | SR_CE);
setsr(no_ints_sr); /* no ints while running uncached */
uncached(); /* uncached instr stream: line won't be replaced */
/* get valid cache line and write the specified dbl-word */
*k0ptr = new_ecc.lo_word;
*(k0ptr+1) = new_ecc.hi_word;
/* force it into 2ndary to init correct ecc */
_c_hwbinv(CACH_PD, k0addr);
/* and read it back into primary */
lo_val = *k0ptr;
hi_val = *(k0ptr+1);
/* now make it dirty again, with the same data so the ecc in
* the 2ndary is correct until we xor in the change; the
* pd_hwbinv will hit since it is again dirty */
*k0ptr = lo_val;
_c_ilt_n_ecc(CACH_SD, k0addr,tags, &orig_ecc);
/* with SR_CE bit set, the ecc reg contributes to the generated
* value. Contrary to the current documentation (which says that
* the ecc register is xor'ed into the existing checkbits), the
* R4K appears to first do a one's complement on the ECC register;
* THEN it's xor'ed into the cbits. Therefore, for us to end up
* with the specified cbits we must xor the old and new, then
* NOT it. Nice documentation... */
v_orig_ecc = orig_ecc;
n_ecc = (uint)new_ecc.ecc_val;
xor_ecc = ~(orig_ecc ^ n_ecc);
setecc((int)xor_ecc);
_munge_decc(k0addr, ce_no_ints_sr);
#ifdef ECC_TEST_TWO_BAD
/* corrupt a second word so we can test EW bit in ecc_handler */
/* Assumes that we're corrupting 0x300 first, and second error is
* at 0x500 so you better back sure that's OK !
*/
ecc_info_ptr.ecc_err2_ptr = (k0ptr+128); /* add 4 cachelines */
/* get valid cache line and write the specified dbl-word */
*(k0ptr+128) = new_ecc.lo_word;
*(k0ptr+129) = new_ecc.hi_word;
/* force it into 2ndary to init correct ecc */
_c_hwbinv(CACH_PD, (k0addr+128*sizeof(int)));
/* and read it back into primary */
lo_val = *(k0ptr+128);
hi_val = *(k0ptr+129);
/* now make it dirty again, with the same data so the ecc in
* the 2ndary is correct until we xor in the change; the
* pd_hwbinv will hit since it is again dirty */
*(k0ptr+128) = lo_val;
_c_ilt_n_ecc(CACH_SD, (k0addr+128*sizeof(int)),tags, &orig_ecc);
/* with SR_CE bit set, the ecc reg contributes to the generated
* value. Contrary to the current documentation (which says that
* the ecc register is xor'ed into the existing checkbits), the
* R4K appears to first do a one's complement on the ECC register;
* THEN it's xor'ed into the cbits. Therefore, for us to end up
* with the specified cbits we must xor the old and new, then
* NOT it. Nice documentation... */
v_orig_ecc = orig_ecc;
n_ecc = (uint)new_ecc.ecc_val;
xor_ecc = ~(orig_ecc ^ n_ecc);
setecc((int)xor_ecc);
_munge_decc(k0addr+128*sizeof(int), ce_no_ints_sr);
#endif /* ECC_TEST_TWO_BAD */
setsr(no_ints_sr); /* clear CE bit before going cached */
runcached();
setsr(oldsr); /* now enable interrupts */
missed_2nd = 0;
if (inwhat == IN_MEM) {
/* now flush this line to memory by reading an address
* one 2nd cache-size above K0addr
oneoffval = *(uint *)k0oneoff;
*/
/* flush the bad line out to memory. Since the rmi fixes
* all one bit errors unconditionally on writes, this
* must be at least a 2-bit error */
/* prevent ecc error now; this way it'll get out there
* flawed and the next cached-read will get a SysAD ECC */
oldsr = getsr();
setsr(ce_no_ints_sr);
if (!_c_hwbinv(CACH_SD, k0addr))
missed_2nd = 1; /* mustn't print with CE bit on */
setsr(oldsr);
}
if (inwhat == IN_MEM && missed_2nd)
printf("!!?force_ecc: addr 0x%x 2ndary hwbinv missed cache!\n",
k0addr);
if (inwhat == IN_SD) { /* reading into primary will check ecc */
if (force_verbose)
printf("IN_SD: here we go!\n");
#ifndef FORCE_CACHERR_ON_STORE
/* Force cache error on load */
lo_val = *k0ptr;
#else
/* force cache error on store (should turn on EI)
*
* Two interesting case. In one we write completely new
* data into one of the words of the doubleword. This will
* most likely cause us to report an MBE if the EI bit
* does not get set since the ECC will be computed on
* this value (in the PD) and comareed to the ECC in the
* secondary.
* The other case stores the same data (test program is
* generating error in the other word of the double word).
*/
#if 0
/* this test tends to generate FATAL MBE if EI not set */
ecc_store_err(0x1234, k0addr); /* write some new data */
#else
/* this test replaces with same data, so looks like SBE */
ecc_store_err(lo_val, k0addr);
#endif
#if 0
/* for now we use more controlled environment of
* assembly language code.
*/
/* Force cache error on store (EI) */
*k0ptr = lo_val;;
#endif /* 0 */
#endif /* force cache error on store */
}
if (force_verbose)
printf("force_ecc %s exits\n",cptr);
return 0;
case IN_PD:
cptr = error_loc_names[CACH_PD];
if (force_verbose)
printf(" Force PD cache ecc error (%d)\n",inwhat);
/* get valid cache line and write the specified dbl-word */
*k0ptr = new_ecc.lo_word;
*(k0ptr+1) = new_ecc.hi_word;
_c_ilt_n_ecc(CACH_PD, k0addr, tags, &orig_ecc);
if (force_verbose)
printf(" f_ecc IN_PD: addr 0x%x: taglo 0x%x, ecc 0x%x\n",
k0addr,tags[TAGLO_IDX],orig_ecc);
orig_ecc ^= 0x1; /* toggle parity bit */
if (force_verbose)
printf("new ecc: 0x%x\n",orig_ecc);
setecc(orig_ecc);
/* set CE status bit--cachops will use contents of ecc register
* for data parity instead of computing the correct one. */
oldsr = no_ints_sr = getsr();
/* disable interrupts for entire time */
no_ints_sr &= ~SR_IMASK8;
ce_no_ints_sr = (no_ints_sr | SR_CE);
setsr(no_ints_sr); /* no ints while running uncached */
uncached(); /* uncached instr stream: line won't be replaced */
/* get line again in case instr. forced it out */
*k0ptr = new_ecc.lo_word;
*(k0ptr+1) = new_ecc.hi_word;
setsr(ce_no_ints_sr);
/* storing the same value as above with SR_CE bit set, using the
* ECC register with the parity bit toggled forces incorrect
* data parity and causes an ecc exception. */
*k0ptr = new_ecc.lo_word;
setsr(no_ints_sr); /* clear CE bit */
runcached();
setsr(oldsr); /* and enable interrupts */
if (force_verbose)
printf(" exiting force_ecc, case IN_PD (%d)\n",IN_PD);
return 0;
case IN_PI:
cptr = error_loc_names[CACH_PI];
break;
case IN_SI:
cptr = error_loc_names[CACH_SI];
break;
case 120:
ecc_cleanup();
return 0;
default:
printf("Illegal inwhat (%d)\n",inwhat);
return 0;
} /* switch */
if (force_verbose)
printf(" force ecc in %s (%d)\n",cptr,inwhat);
return 0;
}
#endif /* IP19 && _FORCE_ECC */
#endif /* !TFP && !BEAST */
#if EVEREST
#include <sys/inst.h>
#define EFRAME_REG(efp,reg) (((k_machreg_t *)(efp))[reg])
#define REGVAL(efp,x) ((x)?EFRAME_REG((efp),(x)+EF_AT-1):0)
/* ARGSUSED */
int
find_buserror_info(eframe_t *ep, inst_t **epcp, int *ldstp,
void **vaddrp, uint *paddrhip, uint *paddrlop)
{
inst_t *epc;
#ifndef TFP
union mips_instruction inst;
void *vaddr;
int ldst;
pfn_t pfn;
uint paddrlo, paddrhi;
#endif
epc = (inst_t *)EFRAME_REG(ep,EF_EPC);
if ((long)EFRAME_REG(ep,EF_CAUSE) & CAUSE_BD)
epc +=4;
#if TFP
/*
* Bus errors are imprecise on TFP, so the EPC is meaningless. Printing
* out information based on the EPC will only confuse the user. Just
* return the EPC in the exception frame so the panic message matches
* the warning message.
*/
*epcp = epc;
return 0;
#else /* ! TFP */
if (IS_KUSEG((long)epc))
inst.word = fuword(epc);
else
inst.word = *epc;
vaddr = (void *)((__psint_t)REGVAL(ep, inst.i_format.rs) +
inst.i_format.simmediate);
switch (inst.i_format.opcode) {
/* Loads */
case ld_op:
case lwu_op:
case lw_op:
case lhu_op:
case lh_op:
case lbu_op:
case lb_op:
ldst = 1;
break;
/* Stores */
case sd_op:
case sw_op:
case sh_op:
case sb_op:
ldst = 0;
break;
/* XXX What do we do about these? */
/* Cop1 instructions */
case lwc1_op:
case ldc1_op:
case swc1_op:
case sdc1_op:
/* Unaligned load/stores */
case ldl_op:
case ldr_op:
case lwl_op:
case lwr_op:
case sdl_op:
case sdr_op:
case swl_op:
case swr_op:
/* Load linked/store conditional */
case lld_op:
case scd_op:
case ll_op:
case sc_op:
default:
return 0;
}
if (IS_KUSEG(vaddr)) {
vtop(vaddr, 1, &pfn, 1);
} else
pfn = kvtophyspnum((void *)vaddr);
paddrhi = (pfn>>20);
paddrlo = (pfn << 12) | ((long)vaddr & 0xfff);
*epcp = epc;
*ldstp = ldst;
*vaddrp = vaddr;
*paddrhip = paddrhi;
*paddrlop = paddrlo;
return 1;
#endif /* TFP */
}
#if ECC_RECOVER
static void *last_ecc_recoverable = 0;
/*
* We introduce here the arbitrary concept of a "flurry" of recoverable
* multibit errors. We want to survive instances of isolated flurries (e.g.,
* lots of errors on a single page), but not continue to "recover" from truly
* hard errors which cause endless bus errors which just happen to appear to
* be "recoverable". What we do is to timestamp the first recoverable error
* of a flurry, allow some number of additional recoveries in a short period
* of time, then refuse to "recover" more than some max number occuring in
* that "short period".
*/
#define ECC_RECOVERABLE_FLURRY_MAX 32 /* s-cache lines per 4096 byte page */
static int time_ecc_recoverable_flurry = 0; /* time, in secs, of flurry */
static int count_ecc_recoverable_flurry = 0; /* count recoveries in flurry */
#endif /* ECC_RECOVER */
/*
* See if we can recover from an ECC error:
* IF the PC points to kernel "block zero" or "block copy" code AND
* IF we were just crossing into a secondary cache line AND
* IF we planned to update the entire secondary cache line with new data AND
* IF we did not fault on this same location recently AND
* IF the systune parameter "ecc_recover_enable" is nonzero, which specifies
* a time interval (in seconds) within which we will keep trying to recover
* a maximum of ECC_RECOVERABLE_FLURRY_MAX errors.
* THEN we can recover.
*
* Returns:
* 0 if cannot recover
* 1 if can recover
*
* Side effect: sets global last_ecc_recoverable to faulting virtual addr.
*
* NOTE: This code counts on the bcopy/bzero routines to supply the
* appropriate lables and to use register A3 to hold the upper bound
* for destination addresses!
*/
/* ARGSUSED */
ecc_recoverable(eframe_t *ep, inst_t *epc, void *vaddr)
{
#if ECC_RECOVER
extern char bzero_stores[];
extern char bcopy_stores[];
extern int ecc_recover_enable;
long destination_limit;
if (!ecc_recover_enable)
return 0;
if (((long)epc != (long)bzero_stores) &&
((long)epc != (long)bcopy_stores))
return 0;
if (!SCACHE_ALIGNED((long)vaddr))
return 0;
/* The following code assumes certain details of the bcopy/bzero
* code in order to determine if we will be storing the entire
* cacheline. If we get a multibit error on the first store into
* a cacheline AND if we will be storing the entire line THEN
* we can safely ignore the error.
*/
destination_limit = (long)EFRAME_REG(ep,EF_A3);
if (destination_limit-(long)vaddr < SCACHE_LINESIZE)
return 0;
if (last_ecc_recoverable == vaddr)
return 0;
if (!ecc_recover_enable)
return 0;
last_ecc_recoverable = vaddr;
if (time - time_ecc_recoverable_flurry > ecc_recover_enable) {
/*
* It has been a sufficiently "long time" since the latest
* flurry of recoverable multibit errors, so reset the
* count/time.
*/
time_ecc_recoverable_flurry = time;
count_ecc_recoverable_flurry = 1;
} else {
/*
* This is another recoverable error in a "short" period of
* time. Allow a certain number of these in that time, then
* give up and stop trying to recover.
*/
if (++count_ecc_recoverable_flurry > ECC_RECOVERABLE_FLURRY_MAX)
return 0;
}
return 1;
#else /* ECC_RECOVER */
return 0;
#endif /* ECC_RECOVER */
}
#endif /* EVEREST */
#if !MCCHIP && !IP30 && !IP32
/* MC based systems don not currently have ECC */
#if !defined (SN) /* SN has its own bus error processing */
/*
* dobuserre - handle bus error exception
*/
int ecc_recover_count = 0;
static volatile cpumask_t buserr_panic_pending = {0};
/* 0: kernel; 1: kernel - no print; 2: user */
int
dobuserre(register eframe_t *ep, inst_t *epc, uint flag)
{
#if TFP
unsigned ev_ile;
#endif
#ifdef EVEREST
#if IP19 || IP25
cpu_cookie_t err_info;
#endif
#endif
int s = splhi(); /* Prevent preemption from now on */
#ifdef EVEREST
#if IP19 || IP25
if (curthreadp) {
err_info = setmustrun(ep->ef_cpuid);
}
ASSERT(cpuid() == ep->ef_cpuid);
#endif
#endif
#if TFP
ev_ile = EV_GET_LOCAL(EV_ILE); /* Current ILE register */
#endif /* TFP */
switch (flag) {
case 0:
default:
#ifdef EVEREST
cmn_err(CE_WARN|CE_CPUID,
"%s Bus Error, Kernel mode, eframe:0x%x EPC:0x%x",
((ep->ef_cause & CAUSE_EXCMASK) == EXC_IBE)
? "Instruction" : "Data", ep, epc);
#endif
/*
* If we're not already panicing, then start to panic.
* If we're already panicing on another cpu, then just
* silently spin here, waiting for an intercpu command.
* If we're already panicing on this cpu, then go ahead and
* double-panic.
*/
while (CPUMASK_IS_NONZERO(buserr_panic_pending) &&
(!CPUMASK_TSTM(buserr_panic_pending, private.p_cpumask)))
; /* sit and spin */
{
inst_t *epc;
int ldst;
void *vaddr;
uint paddrhi, paddrlo;
CPUMASK_ATOMSET(buserr_panic_pending, cpumask());
#ifdef EVEREST
dump_hwstate(1);
if (find_buserror_info(ep,&epc,&ldst,&vaddr,&paddrhi, &paddrlo)) {
cmn_err(CE_WARN,
"BUSERR: %s instruction, virtual address 0x%x (addrhi=0x%x addrlo=0x%x)\n",
ldst ? "LOAD" : "STORE", vaddr, paddrhi, paddrlo);
printf("BUSERR: ");
mc3_decode_addr(printf, paddrhi, paddrlo);
if (ecc_recoverable(ep, epc, vaddr)) {
cmn_err(CE_WARN,
"ECC RECOVERED -- CONTINUE NORMAL OPERATION.\n");
/*
* We can try to recover this error.
* Clear our recollection of the
* event, to avoid future confusion.
*/
everest_error_clear(0);
ecc_recover_count++;
CPUMASK_ATOMCLR(buserr_panic_pending, cpumask());
#ifdef EVEREST
#if IP19 || IP25
if(curthreadp)
restoremustrun(err_info);
#endif
#endif
splx(s);
return 1; /* problem handled */
}
}
#endif
cmn_err_tag(74,CE_PANIC,
"Bus Error in Kernel mode, eframe:0x%x EPC:0x%x",
ep, epc);
}
case 1:
/* nofault */
#if TFP
EV_SET_REG(EV_CERTOIP, 0xffff); /* Clear Bus Error */
EV_SET_LOCAL(EV_ILE, ev_ile|EV_ERTOINT_MASK); /* re-enable BE */
tfp_clear_gparity_error();
#endif
#ifdef EVEREST
#if IP19 || IP25
if(curthreadp)
restoremustrun(err_info);
#endif
#endif
splx(s);
return 0;
case 2:
#ifdef EVEREST
#if IP19 || IP25
if( uvme_errclr(ep) == 1) {
if(curthreadp)
restoremustrun(err_info);
splx(s);
return 0;
}
#endif /* IP19 || IP25 */
cmn_err(CE_WARN|CE_CPUID,
"%s Bus Error, User mode, eframe:0x%x EPC:0x%x",
((ep->ef_cause & CAUSE_EXCMASK) == EXC_IBE)
? "Instruction" : "Data", ep, epc);
#endif /* EVEREST */
/*
* If we're not already panicing, then start to panic.
* If we're already panicing on another cpu, then just
* silently spin here, waiting for an intercpu command.
* If we're already panicing on this cpu, then go ahead and
* double-panic.
*/
while (CPUMASK_IS_NONZERO(buserr_panic_pending) &&
(!CPUMASK_TSTM(buserr_panic_pending, private.p_cpumask)))
; /* sit and spin */
{
inst_t *epc;
int ldst;
void *vaddr;
uint paddrhi, paddrlo;
CPUMASK_ATOMSET(buserr_panic_pending, cpumask());
#ifdef EVEREST
dump_hwstate(1);
if (find_buserror_info(ep,&epc,&ldst,&vaddr,&paddrhi, &paddrlo)) {
cmn_err(CE_WARN,
"BUSERR: %s instruction: virtual address 0x%x (physical 0x%x%x)\n", ldst ? "LOAD" : "STORE", vaddr, paddrhi, paddrlo);
printf("BUSERR: ");
mc3_decode_addr(printf, paddrhi, paddrlo);
}
#endif
cmn_err_tag(75,CE_PANIC,
"Bus Error in User mode, eframe:0x%x EPC:0x%x",
ep, epc);
}
} /* switch */
/* NOTREACHED */
}
#endif /* SN0 */
#endif /* !MCCHIP && !IP30 */
/* The R4000 has a built-in floating-point unit. These 2 functions
* are used by floating-point emulation (nofphw.s), which is not
* included in R4000 kernels. So these routines stub out the
* unresolved externals.
*/
int
softfp_adderr()
{
cmn_err(CE_PANIC, "softfp_adderr for R4000?");
return 0;
}
int
softfp_insterr()
{
cmn_err(CE_PANIC, "softfp_insterr for R4000?");
return 0;
}
#ifdef _MEM_PARITY_WAR
/*
* ecc_create_exception
*
* Create an exception frame from an ecc exception frame,
* including changing the state of the system to common
* exception state. The new frame is allocated on the
* appropriate kernel stack. The code runs with SR_ERL set
* in $sr, so it must avoid taking any exceptions.
*/
extern void ecc_map_uarea(void);
u_long *
ecc_create_exception(eframe_t *ep)
{
eframe_t *nep;
u_long *nsp;
if (private.p_kstackflag == PDA_CURUSRSTK) {
/* map the u-area */
ecc_map_uarea();
/* allocate the frame */
nep = &curexceptionp->u_eframe;
private.p_kstackflag = PDA_CURKERSTK;
nsp = ((u_long *) (KERNELSTACK)) - 4;
} else {
/* was on kernel, idle, or interrupt stack */
nep = ((eframe_t *) ep->ef_sp) - 1;
nsp = (u_long *) nep;
}
*nep = *ep;
nsp[0] = (u_long) nep;
nep->ef_sr &= ~SR_ERL; /* turn off SR_ERL in frame */
return(nsp);
}
#endif /* _MEM_PARITY_WAR */
#if defined (IP19)
pfn_t
init_ecc_sp(fpage)
pfn_t fpage;
{
__psunsigned_t *cache_sp_k1ptr;
cache_sp_k1ptr = (__psunsigned_t *)(PHYS_TO_K1(CACHE_ERR_SP_PTR));
*cache_sp_k1ptr = PHYS_TO_K1(ctob(fpage) + CACHE_ERR_STACK_SIZE
- sizeof(void *));
return (fpage + btoc(CACHE_ERR_STACK_SIZE));
}
#endif
/*
* Interface to dump stuff in ioerror
*/
char *error_mode_string[] =
{ "probe", "kernel", "user", "reenable" };
extern void
ioerror_dump(char *name, int error_code, int error_mode, ioerror_t *ioerror)
{
printf("%s%s%s%s%s error in %s mode\n",
name,
(error_code & IOECODE_PIO) ? " PIO" : "",
(error_code & IOECODE_DMA) ? " DMA" : "",
(error_code & IOECODE_READ) ? " Read" : "",
(error_code & IOECODE_WRITE) ? " Write" : "",
error_mode_string[error_mode]);
#define PRFIELD(f) \
if (IOERROR_FIELDVALID(ioerror,f)) \
printf("\t%20s: 0x%X\n", #f, IOERROR_GETVALUE(ioerror,f));
PRFIELD(errortype); /* error type: extra info about error */
PRFIELD(widgetnum); /* Widget number that's in error */
PRFIELD(widgetdev); /* Device within widget in error */
PRFIELD(srccpu); /* CPU on srcnode generating error */
PRFIELD(srcnode); /* Node which caused the error */
PRFIELD(errnode); /* Node where error was noticed */
PRFIELD(sysioaddr); /* Sys specific IO address */
PRFIELD(xtalkaddr); /* Xtalk (48bit) addr of Error */
PRFIELD(busspace); /* Bus specific address space */
PRFIELD(busaddr); /* Bus specific address */
PRFIELD(vaddr); /* Virtual address of error */
PRFIELD(memaddr); /* Physical memory address */
PRFIELD(epc); /* pc when error reported */
PRFIELD(ef); /* eframe when error reported */
#undef PRFIELD
printf("\n");
}
/*
* machine dependent code for error handling. Mark a page inaccessible and
* later clean and put it back in VM circulation if possible.
*/
/* ARGSUSED */
void
error_mark_page(paddr_t paddr)
{
#if defined (SN0)
extern void sn0_error_mark_page(paddr_t);
sn0_error_mark_page(paddr);
#else
cmn_err(CE_NOTE, "error_mark_page: not supported");
#endif
}
/* ARGSUSED */
int
error_reclaim_page(paddr_t paddr, int flag)
{
#if defined (SN0)
extern int sn0_error_reclaim_page(paddr_t, int);
return sn0_error_reclaim_page(paddr, flag);
#else
cmn_err(CE_NOTE, "error_reclaim_page: not supported");
return 0;
#endif
}