/************************************************************************** * * * Copyright (C) 1989-1994 Silicon Graphics, Inc. * * * * These coded instructions, statements, and computer programs contain * * unpublished proprietary information of Silicon Graphics, Inc., and * * are protected by Federal copyright law. They may not be disclosed * * to third parties or copied or duplicated in any form, in whole or * * in part, without the prior written consent of Silicon Graphics, Inc. * * * **************************************************************************/ #ident "$Revision: 1.125 $" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef EVEREST #include #include #include #include /* Until we know code is correct, report cache errors * but consider them FATAL and panic. Removing this * define will re-enable correction. */ /* #define IP19_CACHEERRS_FATAL 1 */ /* #define FORCE_CACHERR_ON_STORE 1 */ /* Following are flags which can be turned on to test very specific error * conditions. * ECC_TEST_EW_BIT attempts to generate an EW condition (unsuccessfully) * ECC_TEST_TWO_BAD causes two bad cachelines to be setup so we can see * what happens when another cpu references the other line. */ /* #define ECC_TEST_EW_BIT 1 */ /* #define ECC_TEST_TWO_BAD 1 */ #endif /* EVEREST */ #include #ifdef _MEM_PARITY_WAR #if IP20 || IP22 #include #endif /* IP20 || IP22 */ #include #endif /* _MEM_PARITY_WAR */ #if IP20 || IP22 || IPMHSIM #define GIO_ERRMASK 0xff00 extern int perr_mem_init(caddr_t); #endif /* IP20 || IP22 */ extern struct reg_desc sr_desc[], cause_desc[]; #if R4000 && R10000 extern struct reg_desc r10k_sr_desc[]; #endif /* R4000 && R10000 */ extern int picache_size; extern int pdcache_size; #ifdef R4000 static void init_ecc_info(void); #endif void ecc_cleanup(void); #define SET_CBITS_IN 0x80 #if EVEREST void dump_hwstate(int); #endif #ifdef R4000PC extern int get_r4k_config(void); int r4000_config; #endif /* R4000PC */ extern char bytetab[]; #define BYTEOFF(bl) ((bl&0xf0)?(bytetab[bl>>4]):(bytetab[bl]+4)) /* * CP0 status register description */ struct reg_values imask_values[] = { { SR_IMASK8, "8" }, { SR_IMASK7, "7" }, { SR_IMASK6, "6" }, { SR_IMASK5, "5" }, { SR_IMASK4, "4" }, { SR_IMASK3, "3" }, { SR_IMASK2, "2" }, { SR_IMASK1, "1" }, { SR_IMASK0, "0" }, { 0, NULL }, }; struct reg_values mode_values[] = { { SR_KSU_USR, "USER" }, #if R4000 || R10000 { SR_KSU_KS, "SPRVSR" }, #endif { 0, "KERNEL" }, { 0, NULL }, }; #if TFP struct reg_values kps_values[] = { { SR_KPS_4K, "4k" }, { SR_KPS_8K, "8k" }, { SR_KPS_16K, "16k" }, { SR_KPS_64K, "64k" }, { SR_KPS_1M, "1m" }, { SR_KPS_4M, "4m" }, { SR_KPS_16M, "16m" }, { 0, NULL }, }; struct reg_values ups_values[] = { { SR_UPS_4K, "4k" }, { SR_UPS_8K, "8k" }, { SR_UPS_16K, "16k" }, { SR_UPS_64K, "64k" }, { SR_UPS_1M, "1m" }, { SR_UPS_4M, "4m" }, { SR_UPS_16M, "16m" }, { 0, NULL }, }; struct reg_desc sr_desc[] = { /* mask shift name format values */ { SR_DM, 0, "DM", NULL, NULL }, { SR_KPSMASK, 0, "KPS", NULL, kps_values }, { SR_UPSMASK, 0, "UPS", NULL, ups_values }, { SR_CU1, 0, "CU1", NULL, NULL }, { SR_CU0, 0, "CU0", NULL, NULL }, { SR_FR, 0, "FR", NULL, NULL }, { SR_RE, 0, "RE", NULL, NULL }, { SR_IBIT8, 0, "IM8", NULL, NULL }, { SR_IBIT7, 0, "IM7", NULL, NULL }, { SR_IBIT6, 0, "IM6", NULL, NULL }, { SR_IBIT5, 0, "IM5", NULL, NULL }, { SR_IBIT4, 0, "IM4", NULL, NULL }, { SR_IBIT3, 0, "IM3", NULL, NULL }, { SR_IBIT2, 0, "IM2", NULL, NULL }, { SR_IBIT1, 0, "IM1", NULL, NULL }, { SR_IMASK, 0, "IPL", NULL, imask_values }, { SR_XX, 0, "XX", NULL, NULL }, { SR_UX, 0, "UX", NULL, NULL }, { SR_KSU_MSK, 0, "MODE", NULL, mode_values }, { SR_EXL, 0, "EXL", NULL, NULL }, { SR_IE, 0, "IE", NULL, NULL }, { 0, 0, NULL, NULL, NULL }, }; #elif defined (BEAST) struct reg_desc sr_desc[] = { /* mask shift name format values */ { SR_CU2, 0, "CU2", NULL, NULL }, { SR_CU1, 0, "CU1", NULL, NULL }, { SR_CU0, 0, "CU0", NULL, NULL }, { SR_FR, 0, "FR", NULL, NULL }, { SR_RE, 0, "RE", NULL, NULL }, { SR_SR, 0, "SR", NULL, NULL }, { SR_NMI, 0, "NMI", NULL, NULL }, { SR_CE, 0, "CE", NULL, NULL }, { SR_IBIT10, 0, "IM10", NULL, NULL }, { SR_IBIT9, 0, "IM9", NULL, NULL }, { SR_IBIT8, 0, "IM8", NULL, NULL }, { SR_IBIT7, 0, "IM7", NULL, NULL }, { SR_IBIT6, 0, "IM6", NULL, NULL }, { SR_IBIT5, 0, "IM5", NULL, NULL }, { SR_IBIT4, 0, "IM4", NULL, NULL }, { SR_IBIT3, 0, "IM3", NULL, NULL }, { SR_IBIT2, 0, "IM2", NULL, NULL }, { SR_IBIT1, 0, "IM1", NULL, NULL }, { SR_IMASK, 0, "IPL", NULL, imask_values }, { SR_KSU_MSK, 0, "MODE", NULL, mode_values }, { SR_EXL, 0, "EXL", NULL, NULL }, { SR_IE, 0, "IE", NULL, NULL }, { 0, 0, NULL, NULL, NULL }, }; #else /* !TFP && !BEAST */ struct reg_desc sr_desc[] = { #if R4000 && R10000 /* mask shift name format values */ { SR_CU3, 0, "CU3", NULL, NULL }, { SR_CU2, 0, "CU2", NULL, NULL }, { SR_CU1, 0, "CU1", NULL, NULL }, { SR_CU0, 0, "CU0", NULL, NULL }, { SR_RP, 0, "RP", NULL, NULL }, { SR_FR, 0, "FR", NULL, NULL }, { SR_RE, 0, "RE", NULL, NULL }, { SR_RE, 0, "RE", NULL, NULL }, { SR_BEV, 0, "BEV", NULL, NULL }, { SR_TS, 0, "TS", NULL, NULL }, { SR_SR, 0, "SR", NULL, NULL }, { SR_CH, 0, "CH", NULL, NULL }, { SR_CE, 0, "CE", NULL, NULL }, { SR_DE, 0, "DE", NULL, NULL }, { SR_IBIT8, 0, "IM8", NULL, NULL }, { SR_IBIT7, 0, "IM7", NULL, NULL }, { SR_IBIT6, 0, "IM6", NULL, NULL }, { SR_IBIT5, 0, "IM5", NULL, NULL }, { SR_IBIT4, 0, "IM4", NULL, NULL }, { SR_IBIT3, 0, "IM3", NULL, NULL }, { SR_IBIT2, 0, "IM2", NULL, NULL }, { SR_IBIT1, 0, "IM1", NULL, NULL }, { SR_IMASK, 0, "IPL", NULL, imask_values }, { SR_KX, 0, "KX", NULL, NULL }, { SR_SX, 0, "SX", NULL, NULL }, { SR_UX, 0, "UX", NULL, NULL }, { SR_KSU_MSK, 0, "MODE", NULL, mode_values }, { SR_ERL, 0, "ERL", NULL, NULL }, { SR_EXL, 0, "EXL", NULL, NULL }, { SR_IE, 0, "IE", NULL, NULL }, { 0, 0, NULL, NULL, NULL }, }; struct reg_desc r10k_sr_desc[] = { #endif /* R4000 && R10000 */ /* mask shift name format values */ #ifdef R10000 { SR_XX, 0, "XX", NULL, NULL }, #else { SR_CU3, 0, "CU3", NULL, NULL }, #endif { SR_CU2, 0, "CU2", NULL, NULL }, { SR_CU1, 0, "CU1", NULL, NULL }, { SR_CU0, 0, "CU0", NULL, NULL }, #ifndef R10000 { SR_RP, 0, "RP", NULL, NULL }, #endif { SR_FR, 0, "FR", NULL, NULL }, { SR_RE, 0, "RE", NULL, NULL }, { SR_RE, 0, "RE", NULL, NULL }, { SR_BEV, 0, "BEV", NULL, NULL }, { SR_TS, 0, "TS", NULL, NULL }, { SR_SR, 0, "SR", NULL, NULL }, { SR_CH, 0, "CH", NULL, NULL }, #ifdef R10000 { SR_NMI, 0, "NMI", NULL, NULL }, #else { SR_CE, 0, "CE", NULL, NULL }, #endif { SR_DE, 0, "DE", NULL, NULL }, { SR_IBIT8, 0, "IM8", NULL, NULL }, { SR_IBIT7, 0, "IM7", NULL, NULL }, { SR_IBIT6, 0, "IM6", NULL, NULL }, { SR_IBIT5, 0, "IM5", NULL, NULL }, { SR_IBIT4, 0, "IM4", NULL, NULL }, { SR_IBIT3, 0, "IM3", NULL, NULL }, { SR_IBIT2, 0, "IM2", NULL, NULL }, { SR_IBIT1, 0, "IM1", NULL, NULL }, { SR_IMASK, 0, "IPL", NULL, imask_values }, { SR_KX, 0, "KX", NULL, NULL }, { SR_SX, 0, "SX", NULL, NULL }, { SR_UX, 0, "UX", NULL, NULL }, { SR_KSU_MSK, 0, "MODE", NULL, mode_values }, { SR_ERL, 0, "ERL", NULL, NULL }, { SR_EXL, 0, "EXL", NULL, NULL }, { SR_IE, 0, "IE", NULL, NULL }, { 0, 0, NULL, NULL, NULL }, }; #endif /* * CP0 cause register description */ struct reg_values exc_values[] = { { EXC_INT, "INT" }, { EXC_MOD, "MOD" }, { EXC_RMISS, "RMISS" }, { EXC_WMISS, "WMISS" }, { EXC_RADE, "RADE" }, { EXC_WADE, "WADE" }, #if !TFP { EXC_IBE, "IBE" }, { EXC_DBE, "DBE" }, #endif { EXC_SYSCALL, "SYSCALL" }, { EXC_BREAK, "BREAK" }, { EXC_II, "II" }, { EXC_CPU, "CPU" }, { EXC_OV, "OV" }, { EXC_TRAP, "TRAP" }, #if R4000 { EXC_VCEI, "VCEI" }, { EXC_FPE, "FPE" }, { EXC_WATCH, "WATCH" }, { EXC_VCED, "VCED" }, #endif #if R10000 { EXC_FPE, "FPE" }, #ifndef R4000 { EXC_WATCH, "WATCH" }, #endif /* !R4000 */ #endif /* R10000 */ { 0, NULL }, }; struct reg_desc cause_desc[] = { /* mask shift name format values */ { CAUSE_BD, 0, "BD", NULL, NULL }, { CAUSE_CEMASK, -CAUSE_CESHIFT, "CE", "%d", NULL }, #if TFP { CAUSE_NMI, 0, "NMI", NULL, NULL }, { CAUSE_BE, 0, "BE", NULL, NULL }, { CAUSE_VCI, 0, "VCI/TLBX", NULL, NULL }, { CAUSE_FPI, 0, "FPI", NULL, NULL }, { CAUSE_IP11, 0, "IP11", NULL, NULL }, { CAUSE_IP10, 0, "IP10", NULL, NULL }, { CAUSE_IP9, 0, "IP9", NULL, NULL }, #endif { CAUSE_IP8, 0, "IP8", NULL, NULL }, { CAUSE_IP7, 0, "IP7", NULL, NULL }, { CAUSE_IP6, 0, "IP6", NULL, NULL }, { CAUSE_IP5, 0, "IP5", NULL, NULL }, { CAUSE_IP4, 0, "IP4", NULL, NULL }, { CAUSE_IP3, 0, "IP3", NULL, NULL }, { CAUSE_SW2, 0, "SW2", NULL, NULL }, { CAUSE_SW1, 0, "SW1", NULL, NULL }, { CAUSE_EXCMASK,0, "EXC", NULL, exc_values }, { 0, 0, NULL, NULL, NULL }, }; #if !defined (TFP) && !defined (BEAST) #if ((!defined(R10000)) || defined(R4000)) struct reg_desc cache_err_desc[] = { /* mask shift name format values */ { CACHERR_ER, 0, "ER", NULL, NULL }, { CACHERR_EC, 0, "EC", NULL, NULL }, { CACHERR_ED, 0, "ED", NULL, NULL }, { CACHERR_ET, 0, "ET", NULL, NULL }, { CACHERR_ES, 0, "ES", NULL, NULL }, { CACHERR_EE, 0, "EE", NULL, NULL }, { CACHERR_EB, 0, "EB", NULL, NULL }, { CACHERR_EI, 0, "EI", NULL, NULL }, #if IP19 { CACHERR_EW, 0, "EW", NULL, NULL }, #endif { CACHERR_SIDX_MASK, 0, "SIDX", "0x%x", NULL }, { CACHERR_PIDX_MASK, CACHERR_PIDX_SHIFT, "PIDX", "0x%x", NULL }, { 0, 0, NULL, NULL, NULL }, }; #endif /* ((!defined(R10000)) || defined(R4000)) */ #define SSTATE_INVALID 0 #define SSTATE_CLEX 4 #define SSTATE_DIRTEX 5 #define SSTATE_SHARED 6 #define SSTATE_DIRTSHAR 7 struct reg_values scache_states[] = { #if R4000 && R10000 { SSTATE_INVALID, "INVAL" }, { SSTATE_CLEX, "CE" }, { SSTATE_DIRTEX, "DE" }, { SSTATE_SHARED, "shared" }, { SSTATE_DIRTSHAR, "dirty-shared" }, { 0, NULL }, }; struct reg_values r10k_scache_states[] = { #endif /* R4000 && R10000 */ #ifdef R10000 { SSTATE_INVALID, "INVAL" }, { SSTATE_SHARED, "shared" }, { SSTATE_CLEX, "CE" }, { SSTATE_DIRTEX, "DE" }, #else { SSTATE_INVALID, "INVAL" }, { SSTATE_CLEX, "CE" }, { SSTATE_DIRTEX, "DE" }, { SSTATE_SHARED, "shared" }, { SSTATE_DIRTSHAR, "dirty-shared" }, #endif { 0, NULL }, }; #define PSTATE_INVALID 0 #define PSTATE_SHARED 1 #define PSTATE_CLEX 2 #define PSTATE_DIRTEX 3 struct reg_values pcache_states[] = { { PSTATE_INVALID, "INVAL" }, { PSTATE_SHARED, "shared" }, { PSTATE_CLEX, "CE" }, { PSTATE_DIRTEX, "DE" }, { 0, NULL }, }; #define STAG_LO 0xffffe000 #define STAG_STATE 0x00001c00 #define STAG_STATE_SHIFT -10 #define STAG_VINDEX 0x00000380 #define STAG_ECC SECC_MASK #define STAG_VIND_SHIFT 5 /* taglo bits 31..13 << 35..17 */ #ifdef R10000 #define SECC_MASK 0x0000007f #define SADDR_SHIFT 4 #endif struct reg_desc s_taglo_desc[] = { /* mask shift name format values */ { STAG_LO, SADDR_SHIFT, "paddr","0x%x", NULL }, { STAG_STATE, STAG_STATE_SHIFT, NULL, NULL, scache_states }, { STAG_VINDEX, STAG_VIND_SHIFT, "vind", "0x%x", NULL }, { STAG_ECC, 0, "ecc", "0x%x", NULL }, { 0, 0, NULL, NULL, NULL }, }; #if R4000 && R10000 struct reg_desc r10k_s_taglo_desc[] = { /* mask shift name format values */ { STAG_LO, SADDR_SHIFT, "paddr","0x%x", NULL }, { STAG_STATE, STAG_STATE_SHIFT, NULL, NULL, r10k_scache_states }, { STAG_VINDEX, STAG_VIND_SHIFT, "vind", "0x%x", NULL }, { STAG_ECC, 0, "ecc", "0x%x", NULL }, { 0, 0, NULL, NULL, NULL }, }; #endif /* R4000 && R10000 */ #define PTAG_LO 0xffffff00 #define PTAG_STATE 0x000000c0 #define PTAG_STATE_SHIFT -6 #define PTAG_PARITY 0x00000001 #ifdef R10000 #define PTAG_WAY 0x00000002 #define PTAG_SP 0x00000004 #define PTAG_LRU 0x00000008 #define PADDR_SHIFT 4 #endif struct reg_desc p_taglo_desc[] = { /* mask shift name format values */ { PTAG_LO, PADDR_SHIFT, "paddr","0x%x", NULL }, { PTAG_STATE, PTAG_STATE_SHIFT,NULL, NULL, pcache_states }, #ifdef R10000 { PTAG_LRU, 0, "LRU", NULL, NULL }, { PTAG_SP, 2, "SP", "%d", NULL }, { PTAG_WAY, 1, "WAY", "%d", NULL }, #endif { PTAG_PARITY, 0, "parity","%x", NULL }, { 0, 0, NULL, NULL, NULL }, }; #if IP19 #undef PHYS_TO_K0 #undef K0_TO_PHYS extern __psunsigned_t ecc_phys_to_k0( __psunsigned_t); extern __psunsigned_t ecc_k0_to_phys( __psunsigned_t); #define PHYS_TO_K0 ecc_phys_to_k0 #define K0_TO_PHYS ecc_k0_to_phys /* The standard ECC_INTERRUPT macro makes cached references and * we have ERL and DE set, so cache errors during these kernel * routines would go un-reported. */ #define ECC_INTERRUPT /* only routines called during ecc handling use the following macro, and * they all execute at splhi with SR_DE set, so no locking is necessary */ #define MARK_FOR_CLEANUP ecc_info_param->needs_cleanup = 1 /* as handler exits, if cleanup is needed it raises an interrupt; else * it decrements the w_index */ #define CLEANUP_IS_NEEDED (ecc_info_param->needs_cleanup) #else /* !IP19 */ /* ecc_handler mustn't do anything that could cause exceptions (printing, * for example) since we aren't on a stack that the exception code * recognizes. It therefore raises a software interrupt that invokes * ecc_cleanup() to do its dirty work. */ #define ECC_INTERRUPT timeout(ecc_cleanup, 0, TIMEPOKE_NOW); \ ecc_info.needs_cleanup = 0; \ call_cleanup = 1 \ /* only routines called during ecc handling use the following macro, and * they all execute at splhi with SR_DE set, so no locking is necessary */ #define MARK_FOR_CLEANUP ecc_info.needs_cleanup = 1 /* as handler exits, if cleanup is needed it raises an interrupt; else * it decrements the w_index */ #define CLEANUP_IS_NEEDED (ecc_info.needs_cleanup) #endif char *etstrings[] = {"OK", "DB", "CB", "2 Bit", "3 Bit", "4 Bit", "Fatal"}; eccdesc_t real_data_eccsyns[] = { /* 0|8 1|9 2|A 3|B 4|C 5|D 6|E 7|F */ /* 0*/ {OK, 0},{CB, 0},{CB, 1},{B2, 0},{CB, 2},{B2, 0},{B2, 0},{DB, 7}, /* 8*/ {CB, 3},{B2, 0},{B2, 0},{DB,54},{B2, 0},{DB, 6},{DB,55},{B2, 0}, /*10*/ {CB, 4},{B2, 0},{B2, 0},{DB, 0},{B2, 0},{DB,20},{DB,48},{B2, 0}, /*18*/ {B2, 0},{DB,24},{DB,28},{B2, 0},{DB,16},{B2, 0},{B2, 0},{DB,52}, /*20*/ {CB, 5},{B2, 0},{B2, 0},{DB, 1},{B2, 0},{DB,21},{DB,49},{B2, 0}, /*28*/ {B2, 0},{DB,25},{DB,29},{B2, 0},{DB,17},{B2, 0},{B2, 0},{DB, 4}, /*30*/ {B2, 0},{DB,44},{DB,45},{B2, 0},{DB,46},{B2, 0},{B2, 0},{B3, 0}, /*38*/ {DB,47},{B2, 0},{B2, 0},{B3, 0},{B2, 0},{B3, 0},{B3, 0},{B2, 0}, /*40*/ {CB, 6},{B2, 0},{B2, 0},{DB, 2},{B2, 0},{DB,22},{DB,50},{B2, 0}, /*48*/ {B2, 0},{DB,26},{DB,30},{B2, 0},{DB,18},{B2, 0},{B2, 0},{DB,10}, /*50*/ {B2, 0},{DB,32},{DB,33},{B2, 0},{DB,34},{B2, 0},{B2, 0},{B3, 0}, /*58*/ {DB,35},{B2, 0},{B2, 0},{B3, 0},{B2, 0},{B3, 0},{B3, 0},{B2, 0}, /*60*/ {B2, 0},{DB,12},{DB,13},{B2, 0},{DB,14},{B2, 0},{B2, 0},{B3, 0}, /*68*/ {DB,15},{B2, 0},{B2, 0},{B3, 0},{B2, 0},{B3, 0},{B3, 0},{B2, 0}, /*70*/ {DB, 9},{B2, 0},{B2, 0},{B3, 0},{B2, 0},{B3, 0},{B3, 0},{B2, 0}, /*78*/ {B2, 0},{B3, 0},{B3, 0},{B2, 0},{B3, 0},{B2, 0},{B2, 0},{UN, 0}, /*80*/ {CB, 7},{B2, 0},{B2, 0},{DB, 3},{B2, 0},{DB,23},{DB,51},{B2, 0}, /*88*/ {B2, 0},{DB,27},{DB,31},{B2, 0},{DB,19},{B2, 0},{B2, 0},{DB,58}, /*90*/ {B2, 0},{DB,36},{DB,37},{B2, 0},{DB,38},{B2, 0},{B2, 0},{B3, 0}, /*98*/ {DB,39},{B2, 0},{B2, 0},{B3, 0},{B2, 0},{B3, 0},{B3, 0},{B2, 0}, /*a0*/ {B2, 0},{DB,40},{DB,41},{B2, 0},{DB,42},{B2, 0},{B2, 0},{B3, 0}, /*a8*/ {DB,43},{B2, 0},{B2, 0},{B3, 0},{B2, 0},{B3, 0},{B3, 0},{B2, 0}, /*b0*/ {DB,56},{B2, 0},{B2, 0},{B3, 0},{B2, 0},{B3, 0},{B3, 0},{B2, 0}, /*b8*/ {B2, 0},{B3, 0},{B3, 0},{B2, 0},{B3, 0},{B2, 0},{B2, 0},{UN, 0}, /*c0*/ {B2, 0},{DB,60},{DB,61},{B2, 0},{DB,62},{B2, 0},{B2, 0},{B3, 0}, /*c8*/ {DB,63},{B2, 0},{B2, 0},{B3, 0},{B2, 0},{B3, 0},{B3, 0},{B2, 0}, /*d0*/ {DB, 8},{B2, 0},{B2, 0},{B3, 0},{B2, 0},{B3, 0},{B3, 0},{B2, 0}, /*d8*/ {B2, 0},{B3, 0},{B3, 0},{B2, 0},{B3, 0},{B2, 0},{B2, 0},{UN, 0}, /*e8*/ {DB,57},{B2, 0},{B2, 0},{B3, 0},{B2, 0},{B3, 0},{B3, 0},{B2, 0}, /*e8*/ {B2, 0},{B3, 0},{B3, 0},{B2, 0},{B3, 0},{B2, 0},{B2, 0},{UN, 0}, /*f8*/ {B2, 0},{DB, 5},{DB,53},{B2, 0},{DB,59},{B2, 0},{B2, 0},{UN, 0}, /*f8*/ {DB,11},{B2, 0},{B2, 0},{UN, 0},{B2, 0},{UN, 0},{UN, 0},{B2, 0}, }; eccdesc_t real_tag_eccsyns[] = { /* 0|8 1|9 2|A 3|B 4|C 5|D 6|E 7|F */ /* 0 */ {OK, 0},{CB, 0},{CB, 1},{B2, 0},{CB, 2},{B2, 0},{B2, 0},{DB, 0}, /* 8 */ {CB, 3},{B2, 0},{B2, 0},{DB,16},{B2, 0},{DB, 4},{DB, 5},{B2, 0}, /*10*/ {CB, 4},{B2, 0},{B2, 0},{DB,22},{B2, 0},{DB,17},{DB, 1},{B2, 0}, #ifdef R4000 /*18*/ {B2, 0},{UN, 0},{UN, 0},{B2, 0},{DB, 6},{B2, 0},{B2, 0},{B3, 0}, #else /*18*/ {B2, 0},{UN, 0},{DB,25},{B2, 0},{DB, 6},{B2, 0},{B2, 0},{B3, 0}, #endif /* R4000 */ /*20*/ {CB, 5},{B2, 0},{B2, 0},{DB,18},{B2, 0},{DB,24},{DB, 2},{B2, 0}, /*28*/ {B2, 0},{DB,20},{UN, 0},{B2, 0},{UN, 0},{B2, 0},{B2, 0},{B3, 0}, /*30*/ {B2, 0},{DB, 8},{DB, 9},{B2, 0},{UN, 0},{B2, 0},{B2, 0},{B3, 0}, /*38*/ {DB,10},{B2, 0},{B2, 0},{B3, 0},{B2, 0},{B3, 0},{B3, 0},{B2, 0}, /*40*/ {CB, 6},{B2, 0},{B2, 0},{UN, 0},{B2, 0},{DB,19},{DB, 3},{B2, 0}, /*48*/ {B2, 0},{DB,23},{UN, 0},{B2, 0},{DB, 7},{B2, 0},{B2, 0},{B3, 0}, /*50*/ {B2, 0},{DB,21},{UN, 0},{B2, 0},{UN, 0},{B2, 0},{B2, 0},{B3, 0}, /*58*/ {UN, 0},{B2, 0},{B2, 0},{B3, 0},{B2, 0},{B3, 0},{B3, 0},{B2, 0}, /*60*/ {B2, 0},{DB,12},{DB,13},{B2, 0},{DB,14},{B2, 0},{B2, 0},{B3, 0}, /*68*/ {DB,15},{B2, 0},{B2, 0},{B3, 0},{B2, 0},{B3, 0},{B3, 0},{B2, 0}, /*70*/ {DB,11},{B3, 0},{B2, 0},{B3, 0},{B2, 0},{B3, 0},{B3, 0},{B2, 0}, /*78*/ {B3, 0},{B3, 0},{B3, 0},{B2, 0},{B3, 0},{B2, 0},{B2, 0},{B3, 0}, }; #ifdef IP19 /* really need to access all data uncached while processing a cache error * exception in order to not perturb the state of the cache. */ #define data_eccsyns ecc_info_param->ecc_data_eccsyns #define tag_eccsyns ecc_info_param->ecc_tag_eccsyns #else /* !IP19 */ #define data_eccsyns real_data_eccsyns #define tag_eccsyns real_tag_eccsyns #endif /* !IP19 */ #ifdef R4000 /* calc_err_info() computes the checkbits for the incoming value(s) * (two data uints if data, one uint (STagLo) if tag. It then derives * the syndrome and uses it to fetch the eccdesc entry from the proper * table. The following #defines and structures allow it to determine * which ecc to compute, and to return the info to the calling routine. */ #define DATA_CBITS 1 #define TAG_CBITS 2 /* Both data and tag ecc submit checkbits and receive computed checkbits, * the resulting syndrome, and syn_info. Data ecc, however, is computed * from the values of two uints; tag ecc needs a portion of the STagLo * register. */ typedef struct error_info { unchar cbits_in; unchar cbits_out; unchar syndrome; eccdesc_t syn_info; union { struct { uint d_lo; uint d_hi; } data_in; struct { uint s_tlo; } tag_in; } ecc_type_t; } error_info_t; #define eidata_lo ecc_type_t.data_in.d_lo #define eidata_hi ecc_type_t.data_in.d_hi #define eis_taglo ecc_type_t.tag_in.s_tlo /* Bits in the CacheErr reg tell the handler where the ECC error occurred. * The sbd.h CACH_XX defines, plus the following SYSAD describe location; * ecc_t_or_d tells whether the error was in the data field, the tag, or * both. (The primary caches have separate parity bits for data & tags; * the 2ndary has separate ecc checkbits for each, 7-bit for tags, 8 for data). */ #define SYSAD (CACH_SD + 1) #define BAD_LOC (SYSAD + 1) static char *error_loc_names[] = { "primary i-cache", /* CACH_PI */ "primary d-cache", /* CACH_PD */ "secondary i-cache", /* CACH_SI */ "secondary d-cache", /* CACH_SD */ "CPU SysAD bus", /* SYSAD */ "", /* invalid */ }; enum ecc_t_or_d { DATA_ERR = 1, TAG_ERR, D_AND_T_ERR }; #define BYTESPERWD (sizeof(int)) #define BYTESPERDBLWD (2 * BYTESPERWD) #define BYTESPERQUADWD (4 * BYTESPERWD) #define NUM_TAGS 2 #define TAGLO_IDX 0 /* load & store cachops: lo == [0], hi [1] */ #define TAGHI_IDX 1 /* not used on IP17 (taghi must be zero!) */ /* one error mandates that the data caches be flushed (not * just lines that 'hit'. Since we aren't trying to match * any particular virtual address, pick an arbitrary address * that maps to the beginning of the secondary cache. */ #define FOUR_MEG (0x400000l) #define FLUSH_ADDR FOUR_MEG /* the taglo register has different formats depending on whether the * tag info is from a primary or secondary tag. The following macros * return the state of the cacheline: clean or dirty, which are the * only valid choices on the IP17. */ #define CLEAN_P_TAG(p_tlo) ((p_tlo & PSTATEMASK) == PCLEANEXCL) #define CLEAN_S_TAG(s_tlo) ((s_tlo & SSTATEMASK) == SCLEANEXCL) #define DIRTY_P_TAG(p_tlo) ((p_tlo & PSTATEMASK) == PDIRTYEXCL) #define DIRTY_S_TAG(s_tlo) ((s_tlo & SSTATEMASK) == SDIRTYEXCL) /* In order to allow more than one ECC exception to be handled before * the cleanup-interrupt invokes ecc_cleanup(), define a structure * that contains all info relevant to each ecc exception. An array * of these allows multiple exceptions. Use two pointers, a 'writing' * pointer for the handler to write the frames, and a 'reading' * pointer for ecc_cleanup and ecc_panic to display the frames. * Implement them as circular buffers. */ #ifdef IP19 #define ECC_FRAMES 64 #else #define ECC_FRAMES 10 #endif /* #define ECC_DEBUG */ /* Keep a tally of the ECC errors in each part (tag or data) of each * cache. Since any errors in either of the primary caches means the * entire R4K must be discarded, we don't track primary errors by address: * frequency of occurrence is sufficiently detailed. Cache Errors * may be in data or tag, SysAD errors are in data only (tag ecc * is computed when the data is put into the cache lines). NO_ERROR * keeps a count of the number of times the handler found no error * in the indicated spot. This is the 'err_cnts' field in ecc_info. * the #define of ECC_ERR_TYPES and the ecc_err_types enum are declared * in IP17.h to allow cmd/ecc to determine the array size needed when * doing an SGI_R4K_CERRS syscall for cache-error tallies. */ /* +++++++++++++ sys/IP17.h +++++++++++++++ */ /* #define ECC_ERR_TYPES 8 enum ecc_err_types { PI_DERRS = 0, PI_TERRS, PD_DERRS, PD_TERRS, SC_DERRS, SC_TERRS, SYSAD_ERRS, NO_ERROR }; */ /* +++++++++++++ sys/IP17.h +++++++++++++++ */ static char *err_type_names[] = { "pi-d","pi-t","pd-d","pd-t", "sc-d","sc-t", "sysad", "noerr" }; #define ECC_ALL_MSGS -1 #define ECC_PANIC_MSG 0 #define ECC_INFO_MSG 1 #define ECC_ERROR_MSG 2 volatile char panic_str[] = "PANIC MSG: "; volatile char info_str[] = "INFO MSG: "; volatile char error_str[] = "ERROR MSG: "; volatile char *msg_strs[] = { panic_str, info_str, error_str }; /* each ecc_handler invokation saves lots of info: */ typedef struct err_desc { volatile k_machreg_t e_sr; volatile uint e_cache_err; volatile k_machreg_t e_error_epc; volatile int e_location; /* CACH_{ PI, PD, SI, SD } or SYSAD */ volatile uint e_tag_or_data; /* DATA_ERR, TAG_ERR, or D_AND_T_ERR */ volatile __uint64_t e_paddr; /* entire physical addr of error (16 GB)*/ volatile k_machreg_t e_vaddr; /* p-cache virtual addr of error */ volatile uint e_s_taglo; volatile uint e_p_taglo; volatile uint e_badecc; volatile uint e_lo_badval; volatile uint e_hi_badval; volatile uint e_syndrome; volatile uint e_2nd_syn; volatile uint e_syn_info; volatile uint e_user; volatile uint e_prevbadecc; volatile pid_t e_pid; volatile cpuid_t e_cpuid; volatile uint e_sbe_dblwrds; /* bit mask of double-words with SBE */ volatile uint e_mbe_dblwrds; /* bit mask of double-words with DBE */ #ifdef _MEM_PARITY_WAR volatile eframe_t *e_eframep; volatile k_machreg_t *e_eccframep; #endif /* _MEM_PARITY_WAR */ volatile uchar_t e_flags; } err_desc_t; /* definitions for e_flags */ #define E_PADDR_VALID 1 /* we are certain of the physical address */ #define E_VADDR_VALID 2 /* we are certain of pidx */ #define E_PADDR_MC 4 /* bad address reported by MC */ #define E_PADDR_GIO 8 /* bad address reported by HPC3 */ typedef struct ecc_info { #ifdef IP19 /* rest of data is referenced uncached so need to be sure no cached data * is in same cacheline. */ char cacheline_pad1[128]; #endif /* IP19 */ #ifdef ECC_TEST_EW_BIT /* The following variable will be set to "1" when the ecc_handler has * reached an "interesting place" and where it will wait for the "master * cpu" to perform a cached access to the error location. */ int ecc_wait_for_external; /* following fields setup by cpu2 which accesses the second bad line in * cpu1's cache. */ int ecc_err2_datahi; int ecc_err2_datalo; int ecc_err2_cpuid; /* cpu1 sets up the address of the second error */ int *ecc_err2_ptr; /* cpu1 logs its' cacheErr register value after the second error has been * accessed by cpu2. */ int ecc_cpu1_cacheerr2; #endif volatile int ecc_w_index; /* writing index (used by ecc_handler) */ volatile int ecc_r_index; /* reading index (ecc_cleanup &ecc_panic) */ volatile uint needs_cleanup; volatile uint cleanup_cnt; volatile uint ecc_flags; #ifndef _MEM_PARITY_WAR volatile k_machreg_t eframep; volatile k_machreg_t eccframep; #endif /* _MEM_PARITY_WAR */ volatile uint ecc_err_cnts[ECC_ERR_TYPES]; volatile err_desc_t desc[ECC_FRAMES]; #ifndef IP19 volatile char *ecc_panic_msg[ECC_FRAMES]; volatile char *ecc_info_msg[ECC_FRAMES]; volatile char *ecc_error_msg[ECC_FRAMES]; #else /* IP19 */ volatile int ecc_info_inited; volatile int ecc_inval_eloc_where; volatile int ecc_panic; volatile int ecc_panic_cpuid; /* cpuid of panicing cpu */ volatile int ecc_panic_newmaster; volatile int ecc_panic_recoverable; volatile char ecc_panic_msg[ECC_FRAMES]; volatile char ecc_info_msg[ECC_FRAMES]; volatile char ecc_error_msg[ECC_FRAMES]; /* ecc_entry_state indicates current state of the ECC_FRAME entry: * 0 == unused * 1 == ecc_handler is currently active on entry * 2 == ecc_handler has completed entry, awaiting ecc_cleanup */ volatile uint ecc_entry_state[ECC_FRAMES]; /* Following set of virtual addresses will be used by the kernel during * ECC error processing in order to access the data at the point of the * error without causing a VCE exception. */ __psunsigned_t ecc_vcecolor; /* Following location will hold copy of EVERROR_EXT so it can be picked * up by the ecc_handler wihtout the compiler generating loads to cached * global address space. */ everror_ext_t *everror_ext; /* global data items which need to be references uncached */ uint *ecc_tag_dbpos; /* avoid cached or gp-rel reference */ struct d_emask *ecc_d_ptrees; struct t_emask *ecc_t_ptrees; eccdesc_t *ecc_data_eccsyns; eccdesc_t *ecc_tag_eccsyns; __psunsigned_t ecc_dummyline; __psunsigned_t ecc_k0size_less1; pfn_t ecc_physmem; int ecc_picache_size; int ecc_pdcache_size; int ecc_attempt_recovery; /* rest of data is referenced uncached so need to be sure no cached data * is in same cacheline. */ char cacheline_pad2[128]; #endif /* IP19 */ } ecc_info_t; #ifdef IP19 /* Can't load from PDA since that would be a cached access. * Most usage of SCACHE_PADDR is passed to "indexed load" routines * which will automatically size to the secondary cache in HW> */ #define SCACHE_PADDR(edp) (edp->e_paddr) #else #define SCACHE_PADDR(edp) (edp->e_paddr & (private.p_scachesize-1)) #endif #define POFFSET_PADDR(edp) (edp->e_paddr & ~(NBPP-1)) #ifdef _MEM_PARITY_WAR #define ecc_info (*((volatile ecc_info_t *) CACHE_ERR_ECCINFO_P)) #define ecc_info_ptr ecc_info #define ECC_INFO(a) ecc_info.a #else /* _MEM_PARITY_WAR */ #ifdef IP19 volatile ecc_info_t real_ecc_info; /* the following macro should NOT be used when in ecc_handler since compiler * generates "gp" relative constants to perform this conversion and loading * these constants results in cached accesses. */ #define ecc_info_ptr (*(volatile ecc_info_t*)(K0_TO_K1(&real_ecc_info))) #define ECC_INFO(a) ecc_info_param->a /* dummy cacheline is 3 cachelines long and we use the middle to * guarentee it's not on the same line as any other cached data. */ static long long dummy_cacheline[48]; #else /* !IP19 */ volatile ecc_info_t ecc_info; #define ecc_info_ptr ecc_info #define ECC_INFO(a) ecc_info.a #endif /* !IP19 */ #endif /* _MEM_PARITY_WAR */ #ifndef IP19 volatile int ecc_info_initialized = 0; #endif volatile int call_cleanup = 0; volatile int in_cleanup = 0; #if DEBUG_ECC volatile uint f_ptaglo; volatile uint f_staglo; volatile uint f_loval; volatile uint f_hival; volatile __psunsigned_t f_p_caddr; volatile __psunsigned_t f_s_caddr; volatile uint f_cooked_ecc; volatile uint f_d_ecc; volatile uint f_ptaglo1; volatile uint f_staglo1; #endif /* DEBUG_ECC */ /* when calling print_ecc_info from symmon must use qprintf to avoid * scrogging the kernel buffers. When non-zero, this global directs * all display routines to use qprintf, else printf */ volatile int pm_use_qprintf = 0; typedef void (*pfunc)(char *, ...); extern void qprintf(char *, ...); #define K_ECC_PANIC 0x1 #define HANDLER_OVERRAN 0x2 #ifdef IP19 extern int ecc_check_cache( __psunsigned_t ); #else /* !IP19 */ #ifdef _MEM_PARITY_WAR extern int log_perr(uint addr, uint bytes, int no_console, int print_help); extern int ecc_find_pidx(int, paddr_t); volatile char **msg_addrs[] = { (volatile char **)NULL, (volatile char **)NULL, (volatile char **)NULL }; #else /* _MEM_PARITY_WAR */ volatile char **msg_addrs[] = { (volatile char **)&ecc_info.ecc_panic_msg[0], (volatile char **)&ecc_info.ecc_info_msg[0], (volatile char **)&ecc_info.ecc_error_msg[0] }; #endif /* _MEM_PARITY_WAR */ #endif /* !IP19 */ #define NEXT_INDEX(x) if (x+1 >= ECC_FRAMES) \ x = 0; \ else \ x += 1 #define PREV_INDEX(x) if (x-1 < 0) \ x = (ECC_FRAMES-1); \ else \ x -= 1 #if MP #define PRINT_CPUID(id) cmn_err(CE_CONT, "CPU %d: ", id) #else #define PRINT_CPUID(id) #endif /* ecc handling prototypes */ static int print_ecctype(int, int, uint, __uint64_t, int, uint); #if IP19 int real_calc_err_info(int, error_info_t *, volatile ecc_info_t *); static int real_ecc_print_msg(int, uint, int, int, uint, volatile ecc_info_t *); static int real_ecc_assign_msg(int, int, char, volatile ecc_info_t *); static int real_ecc_fixmem(uint, eframe_t *, k_machreg_t *, uint, k_machreg_t, volatile ecc_info_t *); static int real_ecc_fixcache(uint, eframe_t *, k_machreg_t *, uint, k_machreg_t, volatile ecc_info_t *); int real_ecc_fixctag(uint, int, volatile ecc_info_t *); int real_ecc_fixcdata(uint, int, k_machreg_t *, volatile ecc_info_t *); static int real_ecc_log_error(int, int, volatile ecc_info_t *); int real_xlate_bit(enum error_type, uint, volatile ecc_info_t *); #define ecc_print_msg(a0,a1,a2,a3,a4) real_ecc_print_msg(a0,a1,a2,a3,a4,ecc_info_param) #define ecc_log_error(a0,a1) real_ecc_log_error(a0,a1,ecc_info_param) #define ecc_assign_msg(a0,a1,a2) real_ecc_assign_msg(a0,a1,a2,ecc_info_param) #define ecc_fixmem(a0,a1,a2,a3,a4) real_ecc_fixmem(a0,a1,a2,a3,a4,ecc_info_param) #define ecc_fixcache(a0,a1,a2,a3,a4) real_ecc_fixcache(a0,a1,a2,a3,a4,ecc_info_param) #define ecc_fixctag(a0,a1) real_ecc_fixctag(a0,a1,ecc_info_param) #define ecc_fixcdata(a0,a1,a2) real_ecc_fixcdata(a0,a1,a2,ecc_info_param) #define xlate_bit(a0,a1) real_xlate_bit(a0,a1,ecc_info_param) #define calc_err_info(a0,a1) real_calc_err_info(a0,a1,ecc_info_param) #else /* !IP19 */ int calc_err_info(int, error_info_t *); static int ecc_print_msg(int, uint, int, int, uint); static int ecc_assign_msg(int, int, char *); #ifndef MCCHIP static int ecc_fixmem(uint, eframe_t *, k_machreg_t *, uint, k_machreg_t); #endif /* ! MCCHIP */ static int ecc_fixcache(uint, eframe_t *, k_machreg_t *, uint, k_machreg_t); int ecc_fixctag(uint, int); int ecc_fixcdata(uint, int, k_machreg_t *); static int ecc_log_error(int, int); int xlate_bit(enum error_type, uint); #endif /* !IP19 */ static int ecc_bad_ptag(uint); int _c_hwbinv(int, __psunsigned_t); int _c_hinv(int, __psunsigned_t); int _c_ilt_n_ecc(int, __psunsigned_t, uint[], uint *); int _c_ilt(int, __psunsigned_t, uint[]); int _c_ist(int, __psunsigned_t, uint[]); int _munge_decc(__psunsigned_t, uint); #ifndef SCACHE_LINESIZE #define SCACHE_LINESIZE (32*4) #endif #ifdef IP19 static char real_ecc_overrun_msg[] = "ECC error overrun!"; static char real_ecc_eb_not_i[] = "ecc_handler: EB bit set but error not i-cache"; static char real_ecc_incons_err[] = "ECC error not SysAD or either cache!"; static char real_ecc_ew_err[] = "double ECC error, incomplete information!"; static char real_ecc_kernel_err[] = "Uncorrectable HARDWARE ECC error, in kernel mode"; static char real_ecc_user_err[] = "Uncorrectable HARDWARE ECC error, in user mode"; static char real_ecc_inval_loc[] = "Invalid 'location' parameter in fixcache"; static char real_ecc_no_ptagerr[] = "No ecc tag error found in primary cacheline"; static char real_ecc_no_stagerr[] = "No ecc tag error found in secondary cacheline"; static char real_ecc_ptfix_failed[] = "ECC repair on primary tag unsuccessful"; static char real_ecc_stfix_failed[] = "ECC repair on secondary tag unsuccessful"; static char real_ecc_no_pdataerr[] = "No ecc data error found in primary cacheline"; static char real_ecc_no_sdataerr[]= "No ecc data error found in secondary cacheline"; static char real_ecc_sinvalid_noerr[]= "Secondary cacheline invalid, OK on re-read"; static char real_ecc_sinvalid_err[]= "Secondary cacheline invalid, ERROR on re-read"; static char real_ecc_sdcfix_failed[]="Data repair on clean secondary cache-line failed"; static char real_ecc_sdcfix_good[]="Data repair on clean 2nd cache-line SUCCESSFUL"; static char real_ecc_sddfix_failed[]="Data repair on dirty secondary cache-line failed"; static char real_ecc_sddfix_good[]="Data repair on dirty 2nd cache-line SUCCESSFUL"; static char real_ecc_md_sddfix_failed[]="Multi-bit data fix on dirty S-line failed"; static char real_ecc_p_data_err[] = "Data parity error in primary cache"; static char real_ecc_inval_eloc[] = "ecc_log_error: bad error location"; static char real_ecc_bad_s_tag[] = "Uncorrectable error in secondary cache tag"; static char real_ecc_ft_hinv_m_sc[] = "fixtag: _c_hinv missed cache"; static char real_ecc_scerr_too_early[] = "Scache error before recovery is possible"; static char real_ecc_ei_notdirty[] = "Scache error on store-miss but line not dirty"; static char real_ecc_mixed_psize[] = "ecc_handler expects primary linesizes equal"; static char real_ecc_ei_norecover[] = "Scache error on store-miss, recovery not possible "; static char real_ecc_possible_ei[] = "cache test failed, possible store-miss?"; #define ecc_overrun_msg 1 #define ecc_eb_not_i 2 #define ecc_incons_err 3 #define ecc_ew_err 4 #define ecc_kernel_err 5 #define ecc_user_err 6 #define ecc_inval_loc 7 #define ecc_no_ptagerr 8 #define ecc_no_stagerr 9 #define ecc_ptfix_failed 10 #define ecc_stfix_failed 11 #define ecc_no_pdataerr 12 #define ecc_no_sdataerr 13 #define ecc_sinvalid_noerr 14 #define ecc_sinvalid_err 15 #define ecc_sdcfix_failed 16 #define ecc_sdcfix_good 17 #define ecc_sddfix_failed 18 #define ecc_sddfix_good 19 #define ecc_md_sddfix_failed 20 #define ecc_p_data_err 21 #define ecc_inval_eloc 22 #define ecc_bad_s_tag 23 #define ecc_ft_hinv_m_sc 24 #define ecc_scerr_too_early 25 #define ecc_ei_notdirty 26 #define ecc_mixed_psize 27 #define ecc_ei_norecover 28 #define ecc_possible_ei 29 #else /* !IP19 */ static char ecc_overrun_msg[] = "ECC error overrun!"; #if !MCCHIP && !IP32 static char ecc_eb_not_i[] = "ecc_handler: EB bit set but error not i-cache"; #endif static char ecc_incons_err[] = "ECC error not SysAD or either cache!"; static char ecc_kernel_err[] = "Uncorrectable HARDWARE ECC error, in kernel mode"; static char ecc_user_err[] = "Uncorrectable HARDWARE ECC error, in user mode"; static char ecc_inval_loc[] = "Invalid 'location' parameter in fixcache"; static char ecc_no_ptagerr[] = "No ecc tag error found in primary cacheline"; static char ecc_no_stagerr[] = "No ecc tag error found in secondary cacheline"; static char ecc_ptfix_failed[] = "ECC repair on primary tag unsuccessful"; static char ecc_stfix_failed[] = "ECC repair on secondary tag unsuccessful"; static char ecc_no_pdataerr[] = "No ecc data error found in primary cacheline"; static char ecc_no_sdataerr[]= "No ecc data error found in secondary cacheline"; static char ecc_sinvalid_noerr[]= "Secondary cacheline invalid, OK on re-read"; static char ecc_sinvalid_err[]= "Secondary cacheline invalid, ERROR on re-read"; static char ecc_sdcfix_failed[]="Data repair on clean secondary cache-line failed"; static char ecc_sdcfix_good[]="Data repair on clean 2nd cache-line SUCCESSFUL"; static char ecc_sddfix_failed[]="Data repair on dirty secondary cache-line failed"; static char ecc_sddfix_good[]="Data repair on dirty 2nd cache-line SUCCESSFUL"; static char ecc_md_sddfix_failed[]="Multi-bit data fix on dirty S-line failed"; static char ecc_p_data_err[] = "Data parity error in primary cache"; static char ecc_inval_eloc[] = "ecc_log_error: bad error location"; static char ecc_bad_s_tag[] = "Uncorrectable error in secondary cache tag"; static char ecc_ft_hinv_m_sc[] = "fixtag: _c_hinv missed cache"; #if IP20 || IP22 || IP32 || IPMHSIM static char ecc_extreq[] = "ecc_handler: ECC error result of external request"; #endif #endif /* !IP19 */ #ifdef IP19_CACHEERRS_FATAL volatile int verbose_ecc = 1; /* get lots of info on the (single) error */ #else #ifdef IP19 volatile int verbose_ecc = 1; /* for now, get lots of info on correction too */ #else /* !IP19 */ volatile int verbose_ecc = 0; #endif /* !IP19 */ #endif volatile int syslog_ecctype = 1; #if IP19 extern real_ecc_panic(void); /* This routine is invoked from doacvec() when a cpu is checking for * cpuvactions and it finds no pending actions. * This solves the problem of a cpu which is trying to panic due to an ecc * error in the cache. We know that continuing to execute on that cpu causes * problems in some circumstances, so we attempt to change processors for * the actual panic. * * Now, if the original failing cpu is the "master" cpu (which normally * checks for ecc_cleanup() calls and panics, then we assign a new "master" * and send it a cpuaction interrupt with NO pending actions (since we * don't want a cpu with a bad cache to start fetching lines from another * cpu). We don't want the other cpus to always perform this check since * it involves an uncached access which is very slow. */ void doacvec_check_ecc_logs(void) { if (ecc_info_ptr.ecc_panic == 1) real_ecc_panic( ); } /* This routine is invoked from ducons_write() in order to determine if the * master cpu has moved. If it has, it returns the new master cpuid. * Otherwise ducons_write will run the system out of cpuaction blocks trying * to send console actions to cpu 0. */ int ecc_panic_newmaster(void) { if (ecc_info_ptr.ecc_panic_newmaster) return(ecc_info_ptr.ecc_panic_newmaster - 1); return(0); } /* Following routine primarily used by do_mprboot() in order to determine * that a cpu has died due to an ecc_panic. This is needed to avoid delaying * the system restart waiting for all cpus to enter do_mprboot() since the * cpu which died with an ecc error will never enter that routine since we * attempt to keep it busy "idling" so it does no further damage. * * NOTE: only returns an indication that we're in ecc_panic. In the future * it might be a good idea to return the number of cpus which have invoked * ecc_panic in case we have several simulataneous failures (maybe due to * bad memory). */ int ecc_panic_deadcpus(void) { if (ecc_info_ptr.ecc_panic) return(1); else return(0); } /* this routine is invoked from system clock processing to check * if we need to perform ecc cleanup. This avoids having the ecc_handler * making cached references while ERL and DE are set. */ void ecc_interrupt_check(void) { if (ecc_info_ptr.needs_cleanup) { extern real_ecc_panic(void); if (ecc_info_ptr.ecc_panic) real_ecc_panic( ); ecc_info_ptr.needs_cleanup = 0; timeout(ecc_cleanup, 0, TIMEPOKE_NOW); call_cleanup = 1; } #ifdef ECC_TEST_EW_BIT /* This code assumes that certain locations are useable. Should be * generalized. */ if (ecc_info_ptr.ecc_wait_for_external == 1) { ecc_info_ptr.ecc_err2_datahi = *ecc_info_ptr.ecc_err2_ptr; ecc_info_ptr.ecc_err2_datalo = *(ecc_info_ptr.ecc_err2_ptr+1); ecc_info_ptr.ecc_err2_cpuid = cpuid(); ecc_info_ptr.ecc_wait_for_external = 2; } #endif /* ECC_TEST_EW_BIT */ } #endif /* IP19 */ /* called from os/clock.c:timein, which is invoked due to a software * interrupt (see #define of ECC_INTERRUPT), ecc_cleanup does all the * work that ecc_handler can't finish because it is a) executing with * ecc exceptions and interrupts disabled, and b) on an isolated * stack which won't work with nested exceptions such as K2 tlbfaults. * These cleanup actions are primarily printing and more detailed * logging of errors. */ void ecc_cleanup(void) { int index; uint ospl; err_desc_t *edp; /* ptr to set of variables to set this time */ int i; #if IP19 volatile ecc_info_t *ecc_info_param; ecc_info_param = (volatile ecc_info_t *)(K0_TO_K1(&real_ecc_info)); /* Only let one cpu at a time enter this code */ if (atomicSetInt((int *)&in_cleanup, 1)) return; #endif while (1) { ospl = splecc(); /* lock during index incr and test */ #ifdef IP19 /* We check to see if the ecc_handler has finished updating the * entry we're about to read (it updates the ecc_w_index before * the entry is complete) */ index = ecc_info_ptr.ecc_r_index; NEXT_INDEX(index); if ((ecc_info_ptr.ecc_r_index == ecc_info_ptr.ecc_w_index) || (ecc_info_ptr.ecc_entry_state[index] != 2)) { if (ecc_info_ptr.ecc_entry_state[index] != 0) ecc_info_ptr.needs_cleanup = 1; /* try again later */ in_cleanup = 0; /* if the handler hasn't bumped the w_index, call_cleanup * should still be 0 from the last time through the while */ ASSERT(!call_cleanup); splxecc(ospl); return; } else { call_cleanup = 0; } #else /* !IP19 */ if (ecc_info_ptr.ecc_r_index == ecc_info_ptr.ecc_w_index) { in_cleanup = 0; /* if the handler hasn't bumped the w_index, call_cleanup * should still be 0 from the last time through the while */ ASSERT(!call_cleanup); splxecc(ospl); return; } else { in_cleanup = 1; call_cleanup = 0; } #endif /* !IP19 */ /* cleanup uses the (trailing) read-index */ NEXT_INDEX(ecc_info_ptr.ecc_r_index); index = ecc_info_ptr.ecc_r_index; ecc_info_ptr.cleanup_cnt++; splxecc(ospl); /* point edb to set of variables to use */ edp = (err_desc_t *)&(ecc_info_ptr.desc[index]); if (verbose_ecc || edp->e_user) { PRINT_CPUID(edp->e_cpuid); cmn_err(CE_CONT," ecc_cleanup: %d times (r_index %d w_index %d)\n", ecc_info_ptr.cleanup_cnt, ecc_info_ptr.ecc_r_index, ecc_info_ptr.ecc_w_index); } /* always display error msgs for SYSLOG */ ecc_print_msg(ECC_ERROR_MSG,index,1,1,edp->e_cpuid); #ifdef IP19 if (ecc_info_param->ecc_attempt_recovery) cmn_err(CE_WARN,"Data may have been corrupted by scache error\n"); #endif /* IP19 */ if (syslog_ecctype) print_ecctype(edp->e_location, edp->e_tag_or_data, edp->e_syndrome, edp->e_paddr, 1, edp->e_cpuid); if (edp->e_user || verbose_ecc) { for (i = ECC_INFO_MSG; i >= ECC_PANIC_MSG; i--) ecc_print_msg(i,index,1,1,edp->e_cpuid); PRINT_CPUID(edp->e_cpuid); cmn_err(CE_CONT," c_err %R, err_epc 0x%x\n", edp->e_cache_err, cache_err_desc, edp->e_error_epc); if (edp->e_user || verbose_ecc) { PRINT_CPUID(edp->e_cpuid); cmn_err(CE_CONT," s_taglo %R%secc 0x%x e_pc 0x%x\n", edp->e_s_taglo, #if R4000 && R10000 IS_R10000() ? r10k_s_taglo_desc : #endif /* R4000 && R10000 */ s_taglo_desc, (edp->e_s_taglo ? "\n " : " "), edp->e_badecc, edp->e_error_epc); PRINT_CPUID(edp->e_cpuid); cmn_err(CE_CONT," data_lo 0x%x data_hi 0x%x sbe dblwrds 0x%x mbe dblwrds 0x%x\n", edp->e_lo_badval, edp->e_hi_badval, edp->e_sbe_dblwrds, edp->e_mbe_dblwrds); PRINT_CPUID(edp->e_cpuid); cmn_err(CE_CONT, " Err SR %R%spaddr 0x%llx, vaddr 0x%x\n", edp->e_sr, #if R4000 && R10000 IS_R10000() ? r10k_sr_desc : #endif /* R4000 && R10000 */ sr_desc, (edp->e_sr ? "\n " : " "), edp->e_paddr,edp->e_vaddr); } } if (edp->e_user) { #ifdef _MEM_PARITY_WAR dobuserr((struct eframe_s *)edp->e_eframep, (inst_t *)edp->e_error_epc, 2); #else dobuserr((struct eframe_s *)ecc_info_ptr.eframep, (inst_t *)edp->e_error_epc, 2); #endif if (edp->e_pid) { while (sigtopid(edp->e_pid, SIGBUS, SIG_ISKERN|SIG_NOSLEEP, 0, 0, 0) == EAGAIN) ; PCB(pcb_resched) = 1; } else cmn_err(CE_WARN, "NULL curuthread with user ecc error!\n"); edp->e_user = 0; edp->e_pid = 0; } #ifdef IP19 ecc_info_ptr.ecc_entry_state[index] = 0; #endif } /* while */ } /* ecc_cleanup */ #if IP19 void ecc_panic(volatile ecc_info_t *ecc_info_param) { /* We keep cached access to a minimum, though it appaears that stores * are the real problem (cached ones that is). */ ecc_info_param->ecc_panic = 1; ecc_info_param->needs_cleanup = 1; ecc_info_param->ecc_panic_cpuid = cpuid(); /* Not much we can do if only one cpu, go ahead and try to panic */ if (maxcpus == 1) { real_ecc_panic(); } /* If we're the master cpu, try nominating a new master and send * it an interrupt. Otherwise no-one notices the cache error since * it is only the master cpu which polls the uncached location * "needs_cleanup" once per second. */ if (private.p_flags & PDAF_MASTER) { cpuid_t who; if (cpuid() == 0) who = 1; else who = 0; ecc_info_param->ecc_panic_newmaster = who+1; sendintr(who, DOACTION); } else /* attempt to wakeup the master more quickly than its' * one second maintaince processing. May not work, but * will be noticed eventually. */ sendintr(masterpda->p_cpuid, DOACTION); /* wait for other cpu to actually report the panic. If this cpu * does ANYTHING cached it may corrupt other data if this error * was due to a store-miss. */ /* wait for reset signal (HW) from master cpu */ while (1) ; } real_ecc_panic() { volatile ecc_info_t *ecc_info_param = (volatile ecc_info_t *)(K0_TO_K1(&real_ecc_info)); #else /* !IP19 */ /* ARGSUSED */ ecc_panic( uint cache_err, uint errorepc) { #endif /* !IP19 */ err_desc_t *edp; /* ptr to set of variables to set this time */ /* use w_index, handler was working there when it panic'ed */ int index = ECC_INFO(ecc_w_index); #if defined (IP19) if (ecc_info_param->ecc_info_inited != 1) /* NOTE: No machine_error_dump() called in this case, but I've * seen cpus "hang" trying to print that info and nothing comes * out on the console. So get simple error message out first. */ cmn_err_tag(69,CE_PANIC|CE_CPUID, "CPU cache error occurred before handler inited\n"); /* Set ecc_panic to 2 to indicate that we're already processing * the panic condition. Decreases the chance that another cpu will * try this at the same time. Not MP safe, but the cache error recovery * logic is not MP safe for other reasons, so just keep risk to a minimum. */ ecc_info_param->ecc_panic = 2; /* See if we're supposed to become the "master" due to a cache error * on the real "master" which is now unresponsive in an * "idle forever" loop. */ if ((ecc_info_param->ecc_panic_newmaster) && (ecc_info_param->ecc_panic_newmaster-1 == cpuid())){ private.p_flags |= PDAF_MASTER; /* need to update masterpda gloabl so that we can reboot * after panic is complete. */ masterpda = pdaindr[cpuid()].pda; cmn_err(CE_CONT|CE_CPUID, "ecc_panic: assuming role of master cpu (due to cache error)\n"); } #endif /* IP19 */ { extern int ecc_panic_cpu; /* this flag lets icmn_err know that we're panic-ing so it * avoids performing some operations which may lead to * tlbmisses. * Also, make first message give as much info as possible * in case only first message makes it into console buffer. */ #ifdef IP19 ecc_panic_cpu = ecc_info_param->ecc_panic_cpuid; #else ecc_panic_cpu = cpuid(); #endif if (ECC_INFO(ecc_flags) & HANDLER_OVERRAN) cmn_err(CE_CONT|CE_CPUID, "ecc_panic initiated, ECC error overrun!\n"); else cmn_err(CE_CONT|CE_CPUID,"ecc_panic initiated! (for cpu %d)\n", ecc_panic_cpu); } #if defined (IP19) /* Empirical evidence suggests that this machine_err_dump should come * after the preceding "panic" variables and initial error message. * I saw many cases where the cpu "hung" attempting to pring the * machine error state, quite possible due to holding the putbuflck. * With this placement we always seem to get the panic cleanly. */ machine_error_dump(""); #endif /* IP19 */ /* point edb to set of variables to use */ edp = (err_desc_t *)&(ECC_INFO(desc[index])); if (ECC_INFO(ecc_flags & (K_ECC_PANIC | HANDLER_OVERRAN))) { PRINT_CPUID(edp->e_cpuid); cmn_err(CE_CONT,"ECC PANIC: %s\n", ((ECC_INFO(ecc_flags) & K_ECC_PANIC) #ifdef IP19 ? real_ecc_kernel_err : real_ecc_overrun_msg)); #else /* !IP19 */ ? ecc_kernel_err : ecc_overrun_msg)); #endif /* !IP19 */ } #if IP19 real_ecc_print_msg(ECC_ALL_MSGS,index,0,1,edp->e_cpuid, ecc_info_param); #else ecc_print_msg(ECC_ALL_MSGS,index,0,1,edp->e_cpuid); #endif if (edp->e_location != CACH_PI && edp->e_location != CACH_PD) print_ecctype(edp->e_location, edp->e_tag_or_data, edp->e_syndrome,edp->e_paddr, 1, edp->e_cpuid); PRINT_CPUID(edp->e_cpuid); cmn_err(CE_CONT," CacheErr %R\n", edp->e_cache_err, cache_err_desc); PRINT_CPUID(edp->e_cpuid); cmn_err(CE_CONT," Status %R\n", edp->e_sr, #if R4000 && R10000 IS_R10000() ? r10k_sr_desc : #endif /* R4000 && R10000 */ sr_desc); PRINT_CPUID(edp->e_cpuid); cmn_err(CE_CONT, " ErrorEPC 0x%x, Exception Frame 0x%x, ECC Frame 0x%x\n", #ifdef _MEM_PARITY_WAR edp->e_error_epc, (__psunsigned_t)edp->e_eframep, (__psunsigned_t)edp->e_eccframep); #else edp->e_error_epc, ECC_INFO(eframep), ECC_INFO(eccframep)); #endif /* _MEM_PARITY_WAR */ PRINT_CPUID(edp->e_cpuid); cmn_err(CE_CONT," PhysAddr 0x%llx, VirtAddr 0x%x\n", edp->e_paddr, edp->e_vaddr); #if _MEM_PARITY_WAR if (edp->e_flags & E_PADDR_MC) { PRINT_CPUID(edp->e_cpuid); log_perr(edp->e_paddr, edp->e_eccframep[ECCF_CPU_ERR_STAT] & 0xff, 0, 1); } PRINT_CPUID(edp->e_cpuid); cmn_err(CE_CONT," cpu_err_stat: 0x%x, cpu_err_addr: 0x%x\n", edp->e_eccframep[ECCF_CPU_ERR_STAT], edp->e_eccframep[ECCF_CPU_ERR_ADDR]); PRINT_CPUID(edp->e_cpuid); cmn_err(CE_CONT," gio_err_stat: 0x%x, gio_err_addr: 0x%x\n", edp->e_eccframep[ECCF_GIO_ERR_STAT], edp->e_eccframep[ECCF_GIO_ERR_ADDR]); #if IP22 if (is_fullhouse()) cmn_err(CE_CONT," hpc3_buserr_stat: 0x%x\n", PHYS_TO_K1(HPC3_BUSERR_STAT_ADDR)); #endif /* IP22 */ #endif /* _MEM_PARITY_WAR */ PRINT_CPUID(edp->e_cpuid); cmn_err(CE_CONT," ECC cbits 0x%x data_lo 0x%x data_hi 0x%x\n", edp->e_badecc,edp->e_lo_badval, edp->e_hi_badval); PRINT_CPUID(edp->e_cpuid); cmn_err(CE_CONT," sbe dblwrds 0x%x mbe dblwrds 0x%x\n", edp->e_sbe_dblwrds, edp->e_mbe_dblwrds); PRINT_CPUID(edp->e_cpuid); cmn_err(CE_CONT," s_taglo %R%s\n", edp->e_s_taglo,s_taglo_desc, (edp->e_s_taglo ? "\n " : " ")); if (edp->e_2nd_syn) cmn_err(CE_CONT,"2nd_syn 0x%x\n",edp->e_2nd_syn); else cmn_err(CE_CONT,"\n"); #if DEBUG_ECC if (f_s_caddr) { PRINT_CPUID(edp->e_cpuid); cmn_err(CE_CONT, " f_ vars:\n lov 0x%x hiv 0x%x pcad %x scad %x\n", f_loval, f_hival, f_p_caddr, f_s_caddr); PRINT_CPUID(edp->e_cpuid); cmn_err(CE_CONT," P-lo %R%sS-lo %R\n", f_ptaglo,p_taglo_desc, (f_ptaglo ? "\n " : " "), f_staglo, #if R4000 && R10000 IS_R10000() ? r10k_s_taglo_desc : #endif /* R4000 && R10000 */ s_taglo_desc); PRINT_CPUID(edp->e_cpuid); cmn_err(CE_CONT," cooked 0x%x, f_d_ecc 0x%x\n", f_cooked_ecc,f_d_ecc); PRINT_CPUID(edp->e_cpuid); cmn_err(CE_CONT," P-lo1 %R%sS-lo1 %R\n", f_ptaglo1,p_taglo_desc, (f_ptaglo1 ? "\n " : " "), f_staglo1, #if R4000 && R10000 IS_R10000() ? r10k_s_taglo_desc : #endif /* R4000 && R10000 */ s_taglo_desc); } #endif /* DEBUG_ECC */ #ifdef IP19 if (ecc_info_param->ecc_panic_recoverable == 1) cmn_err_tag(70,CE_PANIC, "Single-bit cache error but recovery disabled\n"); else if (ecc_info_param->ecc_panic_recoverable == 2) cmn_err_tag(71,CE_PANIC, "Store-miss cache error, possibly recoverable\n"); else cmn_err_tag(72,CE_PANIC, "Uncorrectable cache ecc/parity error\n"); #else /* !IP19 */ cmn_err_tag(73,CE_PANIC, "Uncorrectable cache ecc/parity error\n"); #endif /* !IP19 */ /*NOTREACHED*/ } /* ecc_panic */ volatile int did_it = 0; #ifdef _MEM_PARITY_WAR extern int utlbmiss[], eutlbmiss[]; #ifdef R4600 extern int utlbmiss_r4600[]; extern int eutlbmiss_r4600[]; #endif /* R4600 */ #ifdef _R5000_BADVADDR_WAR extern int utlbmiss_r5000[]; extern int eutlbmiss_r5000[]; extern int utlbmiss2_r5000[]; extern int eutlbmiss2_r5000[]; extern int utlbmiss1_r5000[]; extern int eutlbmiss1_r5000[]; extern int utlbmiss3_r5000[]; extern int eutlbmiss3_r5000[]; #endif /* _R5000_BADVADDR_WAR */ #if R4000 && (IP19 || IP22) extern int utlbmiss_250mhz[], eutlbmiss_250mhz[]; extern int utlbmiss2_250mhz[], eutlbmiss3_250mhz[]; #endif /* R4000 && (IP19 || IP22) */ extern int utlbmiss1[], eutlbmiss1[]; extern int utlbmiss2[], eutlbmiss2[]; extern int utlbmiss3[], eutlbmiss3[]; #ifndef _NO_R4000 extern int locore_exl_0[], elocore_exl_0[]; extern int locore_exl_1[], elocore_exl_1[]; extern int locore_exl_2[], elocore_exl_2[]; extern int locore_exl_3[], elocore_exl_3[]; extern int locore_exl_4[], elocore_exl_4[]; extern int locore_exl_5[], elocore_exl_5[]; extern int locore_exl_6[], elocore_exl_6[]; extern int locore_exl_7[], elocore_exl_7[]; extern int locore_exl_8[], elocore_exl_8[]; extern int locore_exl_9[], elocore_exl_9[]; extern int locore_exl_10[], elocore_exl_10[]; extern int locore_exl_11[], elocore_exl_11[]; extern int locore_exl_12[], elocore_exl_12[]; extern int locore_exl_13[], elocore_exl_13[]; #ifdef _R5000_CVT_WAR extern int locore_exl_14[], elocore_exl_14[]; extern int locore_exl_15[], elocore_exl_15[]; #endif /* _R5000_CVT_WAR */ extern int locore_exl_16[], elocore_exl_16[]; #ifdef USE_PTHREAD_RSA extern int locore_exl_17[], elocore_exl_17[]; #endif /* USE_PTHREAD_RSA */ extern int locore_exl_18[], elocore_exl_18[]; extern int locore_exl_19[], elocore_exl_19[]; extern int locore_exl_20[], elocore_exl_20[]; extern int locore_exl_21[], elocore_exl_21[]; extern int locore_exl_22[], elocore_exl_22[]; extern int locore_exl_23[], elocore_exl_23[]; extern int locore_exl_24[], elocore_exl_24[]; extern int locore_exl_25[], elocore_exl_25[]; struct exl_handler_table_s { int *base; int *limit; } exl_handler_table[] = { { (int *) K0BASE, (int *) K0BASE + NBPP }, /* exception handlers */ { (int *) K1BASE, (int *) K1BASE + NBPP }, { utlbmiss, eutlbmiss }, #ifdef R4600 { utlbmiss_r4600, eutlbmiss_r4600 }, #endif /* R4600 */ #ifdef _R5000_BADVADDR_WAR { utlbmiss_r5000, eutlbmiss_r5000 }, { utlbmiss2_r5000, eutlbmiss2_r5000 }, { utlbmiss1_r5000, eutlbmiss1_r5000 }, { utlbmiss3_r5000, eutlbmiss3_r5000 }, #endif /* _R5000_BADVADDR_WAR */ { utlbmiss2, eutlbmiss3 }, /* includes utlbmiss1 and sharedseg */ #if R4000 && (IP19 || IP22) { utlbmiss_250mhz, eutlbmiss_250mhz }, { utlbmiss2_250mhz, eutlbmiss3_250mhz }, #endif { locore_exl_0, elocore_exl_0 }, { locore_exl_1, elocore_exl_1 }, { locore_exl_2, elocore_exl_2 }, { locore_exl_3, elocore_exl_3 }, { locore_exl_4, elocore_exl_4 }, { locore_exl_5, elocore_exl_5 }, { locore_exl_6, elocore_exl_6 }, { locore_exl_7, elocore_exl_7 }, { locore_exl_8, elocore_exl_8 }, { locore_exl_9, elocore_exl_9 }, { locore_exl_10, elocore_exl_10 }, { locore_exl_11, elocore_exl_11 }, { locore_exl_12, elocore_exl_12 }, { locore_exl_13, elocore_exl_13 }, #ifdef _R5000_CVT_WAR { locore_exl_14, elocore_exl_14 }, { locore_exl_15, elocore_exl_15 }, #endif /* _R5000_CVT_WAR */ { locore_exl_16, elocore_exl_16 }, #ifdef USE_PTHREAD_RSA { locore_exl_17, elocore_exl_17 }, #endif /* USE_PTHREAD_RSA */ { locore_exl_18, elocore_exl_18 }, { locore_exl_19, elocore_exl_19 }, { locore_exl_20, elocore_exl_20 }, { locore_exl_21, elocore_exl_21 }, { locore_exl_22, elocore_exl_22 }, { locore_exl_23, elocore_exl_23 }, { locore_exl_24, elocore_exl_24 }, { locore_exl_25, elocore_exl_25 }, { NULL, NULL } }; #define exl_handler_table_uc ((struct exl_handler_table_s *) K0_TO_K1((ulong) exl_handler_table)) #endif /* _NO_R4000 */ extern int perr_save_info(eframe_t *, k_machreg_t *, uint, k_machreg_t, int); #ifdef R4600SC extern int _r4600sc_enable_scache_erl(void); extern int _r4600sc_disable_scache_erl(void); #endif /* R4600SC */ extern int ecc_same_cache_block(int, paddr_t, paddr_t); extern int tlb_to_phys(k_machreg_t , paddr_t *, int *); extern unsigned int r_phys_word(paddr_t); extern int _read_tag(int, caddr_t, int *); extern ecc_fixup_caches(int, paddr_t, k_machreg_t, uchar_t); extern int decode_inst(eframe_t *, int, int *, k_machreg_t *, int *); #endif /* MEM_PARITY_WAR */ #if defined(_MEM_PARITY_WAR) || defined(IP32) static int ecc_is_branch(inst_t inst) { union mips_instruction i; unsigned int op; i.word = inst; op = i.j_format.opcode; if (op == spec_op) { if (i.r_format.func == jr_op || i.r_format.func == jalr_op) return(1); return(0); } else if (op == bcond_op) { op = i.i_format.rt; if ((/* op >= bltz_op && */ op <= bgezl_op) || (op >= bltzal_op && op <= bgezall_op)) return(1); return(0); } else if (op >= cop0_op && op <= cop3_op) { if (i.r_format.rs == bc_op) return(1); return(0); } else if (((op >= j_op) && (op <= bgtz_op)) || ((op >= beql_op && op <= bgtzl_op))) return(1); return(0); } #define LOAD_INSTR 1 #define STORE_INSTR 2 static paddr_t ecc_get_perr_addr(eframe_t *ep, k_machreg_t errorepc, int *cache_err) { paddr_t instaddr, bdaddr, paddr = 0; k_machreg_t vaddr; int cached, width, ldst; #ifdef WRONG int pidx, sidx; #endif inst_t inst, bdinst; int is_bdslot = 0; if (!tlb_to_phys(errorepc, &instaddr, &cached)) /* can't translate this address, fail */ return(-1); #ifdef _MEM_PARITY_WAR inst = (inst_t)r_phys_word(instaddr); #else inst = (inst_t)r_phys_word_erl(instaddr); #endif if (ecc_is_branch(inst)) { if (!tlb_to_phys(errorepc+sizeof(inst_t), &bdaddr, &cached)) return(-1); #ifdef _MEM_PARITY_WAR bdinst = (inst_t)r_phys_word(bdaddr); #else bdinst = (inst_t)r_phys_word_erl(bdaddr); #endif is_bdslot = 1; } if (!(*cache_err & CACHERR_ER)) { /* * we got an error on an instruction access * so errorepc points to the offending instruction * or a branch instruction which may be the offending * instruction or the offender may be the instruction * in the branch delay slot. * * If only one instruction is involved, or if both * instructions are in the same cache line, we can * synthesize a physaddr and build a corresponding * ce_sidx and ce_pidx and add them to the contents * of the cache_err register from this exception. */ if (is_bdslot) { if (!ecc_same_cache_block(CACH_PI, instaddr, bdaddr)) return(-1); } #ifdef WRONG /* * build a reasonable facsimile of ce_pidx and ce_pidx * and place them in the cache_err register image */ pidx = (errorepc >> CACHERR_PIDX_SHIFT) & CACHERR_PIDX_MASK; sidx = instaddr & CACHERR_SIDX_MASK; *cache_err &= ~(CACHERR_PIDX_MASK | CACHERR_SIDX_MASK); *cache_err |= (sidx | pidx); #endif return((__uint64_t)instaddr); } else { /* * we got a data error, look at the instruction * at errorepc -- if it is a load/store calculate * the physical address of the target. If it is * a branch, the instruction in the branch delay * slot should be the offending instruction. * calculate the physical address of the target * of this instruction and use it as the physical * address. * * if neither of these situations holds true, we've * got a real problem. */ if (bdaddr) inst = bdinst; /* * see if we can decode the instruction which * caused the fault, if not, we can't get a physaddr. */ if (!decode_inst(ep, inst, &ldst, &vaddr, &width)) return(-1); if (!tlb_to_phys(vaddr, &paddr, &cached)) return(-1); #ifdef WRONG /* * build a reasonable facsimile of ce_pidx and ce_pidx * and place them in the cache_err register image */ pidx = (vaddr >> CACHERR_PIDX_SHIFT) & CACHERR_PIDX_MASK; sidx = paddr & CACHERR_SIDX_MASK; *cache_err &= ~(CACHERR_PIDX_MASK | CACHERR_SIDX_MASK); *cache_err |= (sidx | pidx); #endif return((__uint64_t)paddr); } /*NOTREACHED*/ } #endif /* _MEM_PARITY_WAR || IP32 */ ecc_handler( eframe_t *efp, k_machreg_t *eccfp, uint cache_err, #if IP19 k_machreg_t errorepc, volatile ecc_info_t *ecc_info_param, cpuid_t ecc_cpuid) #else k_machreg_t errorepc) #endif { int location; err_desc_t *edp; /* ptr to set of variables to set this time */ uint ce_sidx = (cache_err & CACHERR_SIDX_MASK); uint ce_pidx = (cache_err & CACHERR_PIDX_MASK); /* must be shifted */ __uint64_t physaddr; #if IP32 _crmreg_t regval; #endif register int t_or_d = 0; uint tags[NUM_TAGS], s_data_ecc; int res = 0; uint index = 0; #if _MEM_PARITY_WAR static time_t last_time; #endif /* _MEM_PARITY_WAR */ #ifdef R4600SC extern int two_set_pcaches; int r4600sc_scache_disabled = 1; int _r4600sc_disable_scache(void); void _r4600sc_enable_scache(void); if (two_set_pcaches && private.p_scachesize) #ifdef _MEM_PARITY_WAR r4600sc_scache_disabled = _r4600sc_disable_scache_erl(); #else /* _MEM_PARITY_WAR */ r4600sc_scache_disabled = _r4600sc_disable_scache(); #endif /* _MEM_PARITY_WAR */ #endif #if defined (EVEREST) /* Now save the cache error in the extended everror structure * for future use by the FRU analyzer */ if (ecc_info_param->ecc_info_inited != 1) return(1); ecc_info_param->everror_ext->eex_cpu[ecc_cpuid].cpu_cache_err = cache_err; ecc_info_param->ecc_panic_recoverable = 0; #endif /* EVEREST */ #ifdef _MEM_PARITY_WAR #ifndef CMPLR_BUG_277906_FIXED errorepc = eccfp[ECCF_ERROREPC]; #endif #if R4000 && (! _NO_R4000) if (efp->ef_sr & SR_EXL) { /* check if we need to clear SR_EXL due to an R4000 bug: * we clear SR_EXL if $errorepc was not in one of the * SR_EXL handlers. */ k_machreg_t errorepc_k0; if (IS_KSEG1(errorepc)) errorepc_k0 = K1_TO_K0(errorepc); else errorepc_k0 = errorepc; for (index = 0; exl_handler_table_uc[index].base != 0; index++) { if (((int *) errorepc_k0) >= exl_handler_table_uc[index].base && ((int *) errorepc_k0) < exl_handler_table_uc[index].limit) break; } if (exl_handler_table_uc[index].base == NULL) /* errorepc not found in table */ efp->ef_sr &= ~SR_EXL; } #endif /* R4000 && (! _NO_R4000) */ #if (defined(IP20) || defined(IP22) || defined(IPMHSIM)) /* save bus error status */ eccfp[ECCF_CPU_ERR_STAT] = *(volatile uint *)PHYS_TO_K1(CPU_ERR_STAT); eccfp[ECCF_CPU_ERR_ADDR] = *(volatile uint *)PHYS_TO_K1(CPU_ERR_ADDR); eccfp[ECCF_GIO_ERR_STAT] = *(volatile uint *)PHYS_TO_K1(GIO_ERR_STAT); eccfp[ECCF_GIO_ERR_ADDR] = *(volatile uint *)PHYS_TO_K1(GIO_ERR_ADDR); /* clear possible errors */ *(volatile uint *)PHYS_TO_K1(CPU_ERR_STAT) = 0x0; *(volatile uint *)PHYS_TO_K1(GIO_ERR_STAT) = 0x0; flushbus(); /* retry if CPU see SYSAD error but MC did not see any */ if ((cache_err & CACHERR_EE) && !eccfp[ECCF_CPU_ERR_STAT] && !eccfp[ECCF_GIO_ERR_STAT] && (time - last_time > 5)) { ecc_info.ecc_err_cnts[SYSAD_ERRS]++; last_time = time; return 0; } /* save_perr_info checks to see if it is a memory error * we might be able to workaround, and saves away enough * information to be able to fix it. */ if (perr_save_info(efp, eccfp, cache_err, errorepc, ((cache_err & CACHERR_EE) ? PERC_CACHE_SYSAD : PERC_CACHE_LOCAL))) { #ifdef R4600SC if (!r4600sc_scache_disabled) _r4600sc_enable_scache_erl(); #endif /* R4600SC */ return(-1); /* force an exception */ } #endif /* IP20 || IP22 */ #endif /* _MEM_PARITY_WAR */ #ifdef IP19 /* On IP19 all memory (BSS) is zeroed at boot time, so we don't * really have much to initialize. We really want to avoid * referencing global variables which are cached. */ ecc_info_param->eframep = CACHE_ERR_EFRAME; ecc_info_param->eccframep = CACHE_ERR_ECCFRAME; #else /* !IP19 */ if (!ecc_info_initialized) init_ecc_info(); #endif /* !Ip19 */ /* if this error was 'forced' the CE bit will be set--clear it * in the eframe SR */ efp->ef_sr &= ~SR_CE; /* Check if we have handled too many ecc errors without * allowing the cleanup routine to execute. (splhi and * DE bit set ensures we won't be interrupted during this * test) */ NEXT_INDEX(ECC_INFO(ecc_w_index)); #ifdef IP19 /* On IP19 we have a state indicator built-in to the entry. * Make sure entry is free before using it. */ if (ecc_info_param->ecc_entry_state[ecc_info_param->ecc_w_index] != 0) { ecc_info_param->ecc_flags |= HANDLER_OVERRAN; /* back up write index so ecc_panic() will do something */ PREV_INDEX(ecc_info_param->ecc_w_index); return(1); /* ecc_panic will print proper msg */ } else { index = ecc_info_param->ecc_w_index; ecc_info_param->ecc_entry_state[index] = 1; } #else /* !IP19 */ if (ecc_info.ecc_w_index == ecc_info.ecc_r_index) { ecc_info.ecc_flags |= HANDLER_OVERRAN; /* back up write index so ecc_panic() will do something */ PREV_INDEX(ecc_info.ecc_w_index); #ifdef R4600SC /* * on R4600SC there is no need to renable cache * since we are going to panic anyway. */ #endif /* R4600SC */ return(1); /* ecc_panic will print proper msg */ } else { index = ecc_info.ecc_w_index; } #endif /* !IP19 */ /* point edb to set of variables to use */ edp = (err_desc_t *)&(ECC_INFO(desc[index])); #ifdef IP19 edp->e_cpuid = ecc_cpuid; #else edp->e_cpuid = cpuid(); #endif #ifdef _MEM_PARITY_WAR edp->e_eframep = efp; edp->e_eccframep = eccfp; #endif /* _MEM_PARITY_WAR */ edp->e_flags = E_PADDR_VALID|E_VADDR_VALID; #ifdef R4000PC if ((r4000_config & CONFIG_SC) == 0) { /* 0 == scache present */ #endif /* R4000PC */ /* use CacheErr sidx to fetch 2ndary tag of the line mapping it, * and ecc checkbits of the data in that line */ _c_ilt_n_ecc(CACH_SD, PHYS_TO_K0(ce_sidx), tags, &s_data_ecc); edp->e_badecc = eccfp[ECCF_ECC] = s_data_ecc; edp->e_s_taglo = tags[TAGLO_IDX]; /* ce_sidx has paddr[21..3], 2ndary taglo has paddr[35..17] but * must be shifted to proper position */ physaddr = (ce_sidx | ((edp->e_s_taglo & SADDRMASK) << SADDR_SHIFT)); #ifdef R4000PC } else { #ifdef _MEM_PARITY_WAR if (((eccfp[ECCF_CPU_ERR_STAT] & CPU_ERR_STAT_RD_PAR) == CPU_ERR_STAT_RD_PAR)) { physaddr = eccfp[ECCF_CPU_ERR_ADDR] & ~0x7; physaddr += BYTEOFF(eccfp[ECCF_CPU_ERR_STAT] & 0xff); edp->e_flags |= E_PADDR_MC; } else if (eccfp[ECCF_GIO_ERR_STAT] & GIO_ERRMASK) { physaddr = eccfp[ECCF_GIO_ERR_ADDR] & ~0x7; physaddr |= BYTEOFF(eccfp[ECCF_GIO_ERR_STAT] & 0xff); edp->e_flags |= E_PADDR_GIO; } else { physaddr = ecc_get_perr_addr(efp,errorepc,(int*)&cache_err); /* no physaddr, can't go on */ if (physaddr == -1) return(1); if (cache_err & CACHERR_ER) /* * if fault occured on a data reference * and was *not* reported by MC, we can't * be certain that the physaddr we got * from decoding the instruction is correct. */ edp->e_flags &= ~E_PADDR_VALID; } /* * this allows us to work around the rather persistent * bug in the R4000 which causes it to report an incorrect * pidx when it takes a primary cache parity error. This * workaround is not necessary on the R4600 so we skip it * if we are running on one. If we can't find it in the * cache (ce_pidx == -1) we continue to collect information * for error reporting, but we will not attempt to fix the * error, we will either kill the process or we will panic. */ if (!(cache_err & CACHERR_ET) && !two_set_pcaches) { ce_pidx = ecc_find_pidx((cache_err & CACHERR_ER) ? CACH_PD : CACH_PI, physaddr); if (ce_pidx == -1) { edp->e_flags &= ~E_VADDR_VALID; } else { cache_err = (cache_err & ~CACHERR_PIDX_MASK) | ce_pidx; } } #elif IP32 #define CPU_ERR (CRM_CPU_ERROR_CPU_ILL_ADDR | \ CRM_CPU_ERROR_CPU_WRT_PRTY) #define CPU_ERR_REV0 (CRM_CPU_ERROR_CPU_INV_ADDR_RD | \ CRM_CPU_ERROR_CPU_INV_REG_ADDR) #define MEM_ERR (CRM_MEM_ERROR_CPU_ACCESS | \ CRM_MEM_ERROR_HARD_ERR) regval = READ_REG64(PHYS_TO_K1(CRM_CPU_ERROR_STAT), _crmreg_t); eccfp[ECCF_CPU_ERR_STAT] = (uint)(regval & 0xffffffff); regval = READ_REG64(PHYS_TO_K1(CRM_MEM_ERROR_STAT), _crmreg_t); eccfp[ECCF_MEM_ERR_STAT] = (uint)(regval & 0xffffffff); regval = READ_REG64(PHYS_TO_K1(CRM_CPU_ERROR_ADDR), _crmreg_t); eccfp[ECCF_CPU_ERR_ADDR] = regval; regval = READ_REG64(PHYS_TO_K1(CRM_MEM_ERROR_ADDR), _crmreg_t); eccfp[ECCF_MEM_ERR_ADDR] = (uint)(regval & 0xffffffff); if ((eccfp[ECCF_MEM_ERR_STAT] & MEM_ERR) == MEM_ERR) { WRITE_REG64(0LL, PHYS_TO_K1(CRM_MEM_ERROR_STAT), _crmreg_t); physaddr = (__uint64_t)eccfp[ECCF_MEM_ERR_ADDR]; /* * we access all memory below 256Mb at the 0 based * alias. Memory at or above 256Mb is accessed above * 1Gb. Correct for this since CRIME only reports * memory error address bits 29:0. */ if (physaddr >= 0x10000000) physaddr += 0x40000000; } else if ((eccfp[ECCF_CPU_ERR_STAT] & CPU_ERR_REV0) == CPU_ERR_REV0) { WRITE_REG64(0LL, PHYS_TO_K1(CRM_CPU_ERROR_STAT), _crmreg_t); physaddr = eccfp[ECCF_CPU_ERR_ADDRHI]; physaddr = (physaddr << 32) | eccfp[ECCF_CPU_ERR_ADDR]; } else if ((eccfp[ECCF_CPU_ERR_STAT] & CPU_ERR) == CPU_ERR) { WRITE_REG64(0LL, PHYS_TO_K1(CRM_CPU_ERROR_STAT), _crmreg_t); physaddr = eccfp[ECCF_CPU_ERR_ADDRHI]; physaddr = (physaddr << 32) | eccfp[ECCF_CPU_ERR_ADDR]; } else { physaddr = (long long)ecc_get_perr_addr(efp,errorepc, (int *)&cache_err); /* no physaddr, can't go on */ if (physaddr == -1) return(1); if (cache_err & CACHERR_ER) /* * if fault occured on a data reference * and was *not* reported by CRIME, we can't * be certain that the physaddr we got * from decoding the instruction is correct. */ edp->e_flags &= ~E_PADDR_VALID; } edp->e_paddr = physaddr; #else physaddr = 0; #endif /* _MEM_PARITY_WAR */ } #endif /* R4000PC */ edp->e_paddr = physaddr; /* set the eccframe paddr to physaddr for now; later routines will * change it if needed (e.g. a p-cache error needs e_vaddr */ #if IP19 || IP32 eccfp[ECCF_PADDR] = physaddr & 0x0ffffffff; eccfp[ECCF_PADDRHI] = physaddr>>32; #else eccfp[ECCF_PADDR] = physaddr; #endif /* primary caches are virtually tagged; build & save vaddr */ edp->e_vaddr = (ce_pidx << CACHERR_PIDX_SHIFT) | (ce_sidx & (NBPP-1)); edp->e_cache_err = cache_err, edp->e_error_epc = errorepc, edp->e_sr = efp->ef_sr; /* There is an R4k chip bug which mistakenly turns on CACHERR_EB * under convoluted circumstances. The workaround is to believe * CACHERR_EB only if CACHERR_ER indicates an instruction error. * So have edp->e_cache_err contain the original cache err register * contents, but fix up our local cache_err value. */ if ((cache_err & (CACHERR_EB|CACHERR_ER)) == (CACHERR_EB|CACHERR_ER)) cache_err &= ~CACHERR_EB; /* not an instruction err */ ASSERT(cache_err & (CACHERR_ED|CACHERR_ET)); if (cache_err & CACHERR_ED) /* Error in data */ t_or_d = DATA_ERR; if (cache_err & CACHERR_ET) { /* Error in tag or both */ if (t_or_d == DATA_ERR) t_or_d = D_AND_T_ERR; else t_or_d = TAG_ERR; } if (cache_err & CACHERR_EE) { /* wrong from SysAD bus */ location = SYSAD; ecc_log_error(SYSAD_ERRS, index); } else if ( (cache_err & CACHERR_EC) && (cache_err & CACHERR_ER) ) location = CACH_SD; else if ( !(cache_err & CACHERR_EC) && (cache_err & CACHERR_ER) ) location = CACH_PD; else if ( (cache_err & CACHERR_EC) && !(cache_err & CACHERR_ER) ) location = CACH_SI; else if ( !(cache_err & CACHERR_EC) && !(cache_err & CACHERR_ER) ) location = CACH_PI; else { ecc_assign_msg(ECC_PANIC_MSG, index, ecc_incons_err); #if IP19 ecc_info_param->ecc_entry_state[index] = 2; #endif return(1); } #ifdef IP19 /* check for occurance of cache error exception while already * in cache error handler (or double error due to error on both * out-going and in-coming cacheline). * NOTE: EW bit only defined for R4400 processors. */ if ( (cache_err & CACHERR_EW) ) { ecc_assign_msg(ECC_PANIC_MSG, index, ecc_ew_err); goto uncorrectable; } #ifdef ECC_TEST_EW_BIT { extern int get_cacheerr(void); ecc_info_param->ecc_wait_for_external = 1; while (ecc_info_param->ecc_wait_for_external != 2) /* NOP */ ; ecc_info_param->ecc_cpu1_cacheerr2 = get_cacheerr(); } #endif edp->e_location = location; edp->e_tag_or_data = t_or_d; if (location == CACH_PD || location == CACH_PI) eccfp[ECCF_PADDR] = edp->e_vaddr; if (location == SYSAD) { res = ecc_fixmem(index,efp,eccfp,cache_err,errorepc); } else { /* it's error(s) in cache(s) */ res = ecc_fixcache(index,efp,eccfp,cache_err,errorepc); } /* if cache_err EB bit is set, a data error occurred in addition to * i-cache error indicated by the other cache_err bits. Flush both * data caches after sanity-checking that main error was in icache. * Note: if a) error is in clean data, the line won't be written- * back, so the error will be fixed. b) if error is in dirty line, * line will flush out through RMI, which will fix 1-bit errors, * pass over > 1-bit errors. Therefore, two cases: 1) 1-bit data * error: transparently fixed (not logged, unfortunately); * 2) multibit data error: it will be stored in memory with the * errors; if it is written to disk it is dealt-with then; if it * is re-read the R4K will raise an exception and we'll take action * then, so this is sufficient. */ if (cache_err & CACHERR_EB) { /* * XXX: we *may* be able to recover from the data error * with great difficulty, for now we will just die. */ res = 1; /* We must avoid cached accesses since that might force * corrupted data out from primary cache (if error is due * to a store-miss). So just flush an area large enough * to guarentee we've flushed the entire cache, rather * than loading p_scachesize, which is a cached variable. */ __cache_wb_inval((void *)FLUSH_ADDR, FOUR_MEG); } if (res) { /* failed to correct error: kill process or IRIX */ /* For now we will panic on IP19 machines. The error * may have occurred in user mode, but perhaps the data * destroyed (forced out corrupted ?) may be kernel data. */ if (USERMODE(efp->ef_sr)) { ecc_assign_msg(ECC_INFO_MSG, index, ecc_user_err); edp->e_user = 1; #if 0 /* Following code is bogus since it will make a * cached reference. Even if error processing * is complete, we're still running with ERL and * DE set, so a cache error here would be ignored. */ if (private.p_curproc) edp->e_curprocp = private.p_curproc; else edp->e_curprocp = NULL; goto handler_exit; #endif goto uncorrectable; } else { /* BOOM! kernel encountered the ecc error */ ecc_info_param->ecc_flags |= K_ECC_PANIC; goto uncorrectable; } } /* if any cleanup work is necessary, the requesting routine * did a 'MARK_FOR_CLEANUP'. If so, raise an interrupt. Else * decrement the index for re-use. */ if (CLEANUP_IS_NEEDED) { ECC_INTERRUPT; } else #ifdef ECC_DEBUG /* keep frame for reference let ecc_cleanup sync ptrs */ ECC_INTERRUPT; #else PREV_INDEX(ecc_info_param->ecc_w_index); /* overwrite frame */ #endif /* give an indication as to whether the error is theorectically * recoverable (or more correctly, try to report uncorrectable only * if it's an MBE and we should replace the CPU). */ if ((!res) && (!ecc_info_param->ecc_panic_recoverable)) ecc_info_param->ecc_panic_recoverable = 1; ecc_info_param->ecc_entry_state[index] = 2; #ifdef IP19_CACHEERRS_FATAL return(1); #else if (!ecc_info_param->ecc_attempt_recovery) return(1); /* Error may have been due to store-miss which did not set the EI * bit. Indications are that the following test should fail * and return "one" in that case, which should be considered * fatal. */ if ((!res) && (ecc_check_cache(ecc_info_param->ecc_dummyline))) { ecc_assign_msg(ECC_PANIC_MSG, index, ecc_possible_ei); res = 1; } return(res); #endif uncorrectable: ecc_info_param->ecc_entry_state[index] = 2; return(1); #else /* !IP19 */ edp->e_location = location; edp->e_tag_or_data = t_or_d; if (location == CACH_PD || location == CACH_PI) eccfp[ECCF_PADDR] = edp->e_vaddr; #if IP20 || IP22 || IPMHSIM /* The SP IP20/22 should never encounter external requests, * so there'd better not be any cache errors as a result of them: * panic. * * XXX: on the R4600(!two_set_pcaches) ES does not mean an error * caused by an external request, it means that the error occured * on a cache miss in the first doubleword of read response data. */ #if R4600 if ((cache_err & CACHERR_ES) && !two_set_pcaches) { #else if ((cache_err & CACHERR_ES)) { #endif ecc_assign_msg(ECC_PANIC_MSG, index, ecc_extreq); return(1); } #endif if (location == SYSAD) { #ifdef MCCHIP res = 1; #else res = ecc_fixmem(index,efp,eccfp,cache_err,errorepc); #endif /* MCCHIP */ } else { /* it's error(s) in cache(s) */ #if _MEM_PARITY_WAR || IP32 if ((edp->e_flags & (E_PADDR_VALID|E_VADDR_VALID)) != (E_PADDR_VALID|E_VADDR_VALID)) { /* * we must be sure of both the physaddr and the cache * index to attempt a fix. */ res = 1; } else #endif res = ecc_fixcache(index,efp,eccfp,cache_err,errorepc); } /* if cache_err EB bit is set, a data error occurred in addition to * i-cache error indicated by the other cache_err bits. Flush both * data caches after sanity-checking that main error was in icache. * Note: if a) error is in clean data, the line won't be written- * back, so the error will be fixed. b) if error is in dirty line, * line will flush out through RMI, which will fix 1-bit errors, * pass over > 1-bit errors. Therefore, two cases: 1) 1-bit data * error: transparently fixed (not logged, unfortunately); * 2) multibit data error: it will be stored in memory with the * errors; if it is written to disk it is dealt-with then; if it * is re-read the R4K will raise an exception and we'll take action * then, so this is sufficient. */ if (cache_err & CACHERR_EB) { #if MCCHIP || IP32 /* * XXX: we *may* be able to recover from the data error * with great difficulty, for now we will just die. */ res = 1; #else if (edp->e_location != CACH_PI && edp->e_location != CACH_SI) { ecc_assign_msg(ECC_ERROR_MSG, index, ecc_eb_not_i); } __cache_wb_inval((void *)FLUSH_ADDR, private.p_scachesize); #endif } if (res) { /* failed to correct error: kill process or IRIX */ if (USERMODE(efp->ef_sr)) { ecc_assign_msg(ECC_INFO_MSG, index, ecc_user_err); edp->e_user = 1; #ifdef _MEM_PARITY_WAR allow_nofault_error: #endif /* _MEM_PARITY_WAR */ edp->e_pid = current_pid(); goto handler_exit; } else { /* BOOM! kernel encountered the ecc error */ #ifdef _MEM_PARITY_WAR if (private.p_nofault || (curthreadp->k_nofault)) goto allow_nofault_error; #endif /* _MEM_PARITY_WAR */ ecc_info.ecc_flags |= K_ECC_PANIC; return(1); /* we're dead meat--panic now */ } } handler_exit: #ifdef R4600SC if (!r4600sc_scache_disabled) #ifdef _MEM_PARITY_WAR _r4600sc_enable_scache_erl(); #else /* _MEM_PARITY_WAR */ _r4600sc_enable_scache(); #endif /* _MEM_PARITY_WAR */ #endif /* R4600SC */ #ifdef _MEM_PARITY_WAR if (res) { return(-1); } #endif /* _MEM_PARITY_WAR */ /* if any cleanup work is necessary, the requesting routine * did a 'MARK_FOR_CLEANUP'. If so, raise an interrupt. Else * decrement the index for re-use. */ if (CLEANUP_IS_NEEDED) { ECC_INTERRUPT; } else #ifdef ECC_DEBUG /* keep frame for reference let ecc_cleanup sync ptrs */ ECC_INTERRUPT; #else PREV_INDEX(ecc_info.ecc_w_index); /* overwrite frame */ #endif #ifdef _MEM_PARITY_WAR ASSERT(!res); #endif return(res); #endif /* !IP19 */ } /* ecc_handler */ #ifndef MCCHIP /*ARGSUSED*/ static #if IP19 real_ecc_fixmem( uint index, eframe_t *efp, k_machreg_t *eccfp, uint cache_err, k_machreg_t errorepc, volatile ecc_info_t *ecc_info_param ) #else ecc_fixmem( uint index, eframe_t *efp, k_machreg_t *eccfp, uint cache_err, k_machreg_t errorepc) #endif { err_desc_t *edp = (err_desc_t *)&(ECC_INFO(desc[index])); uint tags[NUM_TAGS]; __psunsigned_t physaddr = edp->e_paddr; __psunsigned_t k0addr, k0oneoff; error_info_t err_info; unsigned char hi_syn; #if IP19 __psunsigned_t pmem = (ecc_info_param->ecc_physmem * NBPP); #else __psunsigned_t pmem = (physmem * NBPP); #endif eccdesc_t syn_info; uint hi_taglo; #ifdef SYNDROME_CHECKING __psunsigned_t addr; int foundone = 0; #endif /* since it came in wrong off the bus, the s_taglo register is the * one we're interested in; shove it on the eccframe. */ #ifdef R4000PC if ((r4000_config & CONFIG_SC) != 0) /* 0 == scache present */ eccfp[ECCF_TAGLO] = edp->e_p_taglo; else #endif /* R4000PC */ eccfp[ECCF_TAGLO] = edp->e_s_taglo; #ifdef _MEM_PARITY_WAR k0addr = physaddr; /* This works up to 2 GB of memory, because KUSEG is physical * memory when SR_ERL is set in $sr. */ #else /* _MEM_PARITY_WAR */ #if IP19 k0addr = PHYS_TO_K0(physaddr & (ecc_info_param->ecc_k0size_less1)); /* XXX This won't work if physaddr >= K0SIZE */ /* flush the bad line out through the RMI (which will fix it * in memory if possible) by reading an address one 2nd-cache- * size higher or lower, whichever is within physical mem. * * NOTE: we need to avoid cached accesses on IP19 so loading * anything from pda (like p_scachesize) is a no-no. * Exact number is not important as long as it is at least as * large as the scachesize. So we just use 4MB. */ if ((physaddr + FOUR_MEG) >= pmem) k0oneoff = (k0addr - FOUR_MEG); else k0oneoff = (k0addr + FOUR_MEG); #else k0addr = PHYS_TO_K0(physaddr & (K0SIZE-1)); /* XXX This won't work if physaddr >= K0SIZE */ /* flush the bad line out through the RMI (which will fix it * in memory if possible) by reading an address one 2nd-cache- * size higher or lower, whichever is within physical mem. */ if ((physaddr + private.p_scachesize) >= pmem) k0oneoff = (k0addr - private.p_scachesize); else k0oneoff = (k0addr + private.p_scachesize); #endif #endif /* _MEM_PARITY_WAR */ #if IP19 || IP32 err_info.eidata_lo = 0xdeadbeef; err_info.eidata_hi = 0xdeadbeef; #else err_info.eidata_lo = *(uint *)(k0addr); err_info.eidata_hi = *(uint *)(k0addr+BYTESPERWD); #endif /* ASSERT(edp->e_badecc); */ err_info.cbits_in = edp->e_badecc; edp->e_lo_badval = err_info.eidata_lo; edp->e_hi_badval = err_info.eidata_hi; #if IP20 || IP22 || IPMHSIM /* * The IP20 and IP22 (MC-based) systems have only parity * memory, so correction is not possible, except when an * instruction overwrites the memory. Therefore, we just * reflect this error to trap(), for appropriate disposition. */ /* force a software trap */ return(-1); #elif IP32 /* * the only errors which will get reflected in the cache on * IP32 are bad address errors and non-correctable ECC errors * in any case, none of these are fixable. * XXX: this is probably wrong, we need to extract the correct * syndrome for the appropriate byte, but I'm lazy right now. */ edp->e_syndrome = (uint) (READ_REG64(PHYS_TO_K1(CRM_MEM_ERROR_ECC_SYN), _crmreg_t) & 0xffffffff); return(1); #else /* !(IP20 || IP22) */ hi_taglo = edp->e_s_taglo; tags[TAGLO_IDX] = hi_taglo; tags[TAGHI_IDX] = 0; /* change line-state from clean to dirty so that the cached read * we'll do one 2nd-cache-size-segment up from the bad addr will * flush the current line through the RMI, fixing memory */ tags[TAGLO_IDX] = ((tags[TAGLO_IDX] & ~SSTATEMASK) | SDIRTYEXCL); _c_ist(CACH_SD, k0addr, tags); hi_syn = calc_err_info(DATA_CBITS, &err_info); #ifdef SYNDROME_CHECKING if (!hi_syn) { /* NO ERROR! */ printf("WEIRDITY!!! Check ecc on all dbl wds in line!\n"); startaddr = k0addr & ~(SCACHE_LINESIZE-1); printf("k0addr 0x%x, startaddr 0x%x\n",k0addr,startaddr); for (addr = startaddr;addr < startaddr+SCACHE_LINESIZE;addr += 8) { alt_err_info.eidata_lo = *(uint *)addr; alt_err_info.eidata_hi = *(uint *)(addr+BYTESPERWD); _c_ilt_n_ecc(CACH_SD, addr, tags, &data_ecc); alt_err_info.cbits_in = data_ecc; lo_syn = calc_err_info(DATA_CBITS, &alt_err_info); if (lo_syn) { foundone++; printf("addr 0x%x, w0 0x%x, w1 0x%x, ", addr, alt_err_info.eidata_lo, alt_err_info.eidata_hi); printf("cbin 0x%x, cbout 0x%x, syn 0x%x\n", alt_err_info.cbits_in, alt_err_info.cbits_out,lo_syn); } } if (!foundone) { printf("NO SYNDROMES IN LINE BEGINNING AT 0x%x ",addr); printf("were non-zero\n"); } ecc_log_error(NO_ERROR, index); return(1); } #else if (!hi_syn) { /* NO ERROR! */ ecc_log_error(NO_ERROR, index); return(0); } #endif /* use the syndrome to determine the severity of the error */ edp->e_syndrome = hi_syn; syn_info = err_info.syn_info; /* if it is a correctable error (DBx or CBx), force it back * through the RMI to scrub memory. */ if (syn_info.type == DB || syn_info.type == CB) { edp->e_user = 0; *(volatile uint *)k0oneoff; /* XXXXXXXXXXXXXXXXXXX SHOULD I CHECK IF THE FIX WORKED??? */ return(0); } else { /* 2-bit or greater: can't fix it */ #if IP19 || IP32 eccfp[ECCF_PADDR] = physaddr & 0x0ffffffff; eccfp[ECCF_PADDRHI] = physaddr>>32; #else eccfp[ECCF_PADDR] = physaddr; #endif return(1); /* ecc_handler will kill process or IRIX */ } #endif /* !(IP20 || IP22) */ /*NOTREACHED*/ } /* ecc_fixmem */ #endif /* ! MCCHIP */ volatile int cache_hit = -1; /* ARGSUSED */ #if IP19 static real_ecc_fixcache( uint index, eframe_t *efp, k_machreg_t *eccfp, uint cache_err, k_machreg_t errorepc, volatile ecc_info_t *ecc_info_param ) #else static ecc_fixcache( uint index, eframe_t *efp, k_machreg_t *eccfp, uint cache_err, k_machreg_t errorepc) #endif { int offset; err_desc_t *edp = (err_desc_t *)&(ECC_INFO(desc[index])); __psunsigned_t s_caddr = PHYS_TO_K0(SCACHE_PADDR(edp)); __psunsigned_t p_caddr = PHYS_TO_K0(edp->e_vaddr); uint tags[NUM_TAGS]; uint data_ecc; uint res = 0; /* XXXXXXXXXXXXXXXXXXXXXXXX SET ALL e_ VALUES! */ /* set e_p_taglo to PI tag if main error is in PI or SI (i.e. * *instruction* error in in either cache); if PD or SD ==> PD. * (Use computed virtual address when accessing the P-caches.) */ _c_ilt_n_ecc((((edp->e_location == CACH_PI) || (edp->e_location == CACH_SI)) ? CACH_PI : CACH_PD), p_caddr, tags, &data_ecc); edp->e_p_taglo = tags[TAGLO_IDX]; #ifdef R4000PC if ((r4000_config & CONFIG_SC) != 0) /* 0 == scache present */ edp->e_s_taglo = 0; else #endif /* R4000PC */ { _c_ilt_n_ecc(CACH_SD, s_caddr, tags, &data_ecc); edp->e_s_taglo = tags[TAGLO_IDX]; /* if EI bit set, there is corrupted data in primary Dcache. * Invalidate the line by zeroing-out the tag */ if (cache_err & CACHERR_EI) { #ifdef IP19 /* Various wierd errors afflict an IP19 after a store-miss * cache-error. It appears that the state of the cache * is really confused. The cpu rarely recovers and other * cpus seem to get errors when accessing this cpu's * cache. So simply panic now. */ ecc_info_param->ecc_panic_recoverable = 2; ecc_assign_msg(ECC_PANIC_MSG, index, ecc_ei_norecover); return(1); #ifdef DO_NOT_ENABLE /* Theorectically correct IP19 store-miss recover code */ tags[TAGLO_IDX] = 0; _c_ilt(CACH_PD,p_caddr,tags); tags[TAGLO_IDX] &= ~PSTATEMASK; /* change state to invalid */ _c_ist(CACH_PD,p_caddr,tags); /* On an MP system, an intervention from another cpu could * cause that cpu to get this cacheline with corrupt data * and good ECC (intervention will flush data from primary * to secondary and since DW bit is set will update secondary * with good ECC). To make sure that we don't silently * consume bad data we check that the secondary cacheline * is still marked "dirty" after we've invalidated the * primary cache. */ _c_ilt(CACH_SD, s_caddr, tags); if (!(DIRTY_S_TAG(tags[TAGLO_IDX]))) { ecc_assign_msg(ECC_PANIC_MSG, index, ecc_ei_notdirty); return(1); } #endif /* DO_NOT_ENABLE */ #else /* !IP19 */ tags[TAGLO_IDX] = 0; _c_ist(CACH_PD,p_caddr,tags); #endif /* !IP19 */ } } /* NOTE: in all correctable-cases we must verify that the fix * succeeded in order to avoid an infinite-loop of instruction- * restarts re-raising the ecc exception in the event of stuck * cache bits. Otherwise we could just invalidate the line and * let the restart refill the line. * If there are errors in both tag and data, start with the * tag. Depending on how we fix the tag error, the data error may * be corrected also. If not, see comment for CACHERR_EB at end * of ecc_handler. (Either the tag will be successfully repaired * or we will panic, so if the data error remains, a subsequent * exception will spotlight it). */ ASSERT(edp->e_location >= CACH_PI && edp->e_location <= CACH_SD); /* set index into error-counting array to proper cache * ( 2x cuz tag-data pairs for each cache) */ if (edp->e_location == CACH_SD) /* 2ndary is I and D combined */ offset = (2 * CACH_SI); else offset = (2 * edp->e_location); switch(edp->e_location) { case CACH_PD: case CACH_PI: /* err is in primary: p_taglo is useful. Poke it into frame */ eccfp[ECCF_TAGLO] = edp->e_p_taglo; /* At this point the R4K doesn't return the correct * checkbits for data in either of the primary caches, * so the only potentially-relevant ecc value is * contained in the p_taglo. */ break; case CACH_SD: #ifndef R10000 case CACH_SI: #endif /* !R10000 */ /* error is in 2ndary; save s_taglo on eccframe */ eccfp[ECCF_TAGLO] = edp->e_s_taglo; break; default: ecc_assign_msg(ECC_PANIC_MSG, index, ecc_inval_loc); return(1); } ASSERT(edp->e_tag_or_data>=DATA_ERR && edp->e_tag_or_data<=D_AND_T_ERR); if (edp->e_tag_or_data != DATA_ERR) { /* tag or both */ res = ecc_fixctag(edp->e_location, index); if (res == 2) { /* unfixable 2nd-level tag: panic */ ecc_assign_msg(ECC_PANIC_MSG, index, ecc_bad_s_tag); res = 1; } } else /* error in data field */ res = ecc_fixcdata(edp->e_location, index, eccfp); if (edp->e_tag_or_data == DATA_ERR) ecc_log_error(offset, index); else if (edp->e_tag_or_data == TAG_ERR) ecc_log_error((offset+1), index); else { /* errors in both data and tag */ ecc_log_error(offset, index); ecc_log_error((offset+1), index); } return(res); } /* ecc_fixcache */ #ifdef R4600 /* * ecc_find_bad_cline -- searchs for tag which caused a cache tag * error. returns the index in the parameter * *idx. * * returns 1 if it found a tag which had bad parity at the correct * index, 0 if not. * * XXX: this routine has a hidden assumption that loc is CACH_PI or * CACH_PD. */ ecc_find_bad_cline(int loc, __psunsigned_t p_caddr, uint *idx) { uint tags[NUM_TAGS]; extern int two_set_pcaches; ASSERT(loc == CACH_PI || loc == CACH_PD); _read_tag(loc,(caddr_t)p_caddr,(int *)tags); if (ecc_bad_ptag(tags[TAGLO_IDX])) { *idx = p_caddr; return(1); } _read_tag(loc,(caddr_t)(p_caddr^two_set_pcaches),(int *)tags); if (ecc_bad_ptag(tags[TAGLO_IDX])) { *idx = p_caddr^two_set_pcaches; return(1); } return(0); } #endif /* R4600 */ /* the ptaglo field of the taglo register holds bits 35..12 of the * physaddr that the line contains. This mask grabs that field * from a virtual address, which is then shifted to its correct * position in ptaglo (>> 4) */ #define PTAG_ADDRMASK 0xFFFFF000 /* fix tag error in 'loc' cache. 'index' indicates the * frame of variables being used during this invokation * of ecc_handler(). */ #if IP19 real_ecc_fixctag(uint loc, int index, volatile ecc_info_t *ecc_info_param) #else ecc_fixctag(uint loc, int index) #endif { err_desc_t *edp = (err_desc_t *)&(ECC_INFO(desc[index])); uint tags[NUM_TAGS]; uint p_taglo = edp->e_p_taglo; uint s_taglo = edp->e_s_taglo; uint new_p_taglo; error_info_t err_info; uint tag_syndrome; eccdesc_t tag_syn_info; uint ce_sidx = (edp->e_cache_err & CACHERR_SIDX_MASK); __psunsigned_t s_caddr = PHYS_TO_K0(SCACHE_PADDR(edp)); __psunsigned_t p_caddr = PHYS_TO_K0(edp->e_vaddr); __uint64_t physaddr; /* must be 64-bits always (16 GB memory) */ #ifdef R4000PC int pidx = 0; #if R4600 extern int two_set_pcaches; #endif #endif /* uncorrectable errors in 2ndary tags (i.e. > 1 bit) cause a * fatal enigma regardless of whether the data in the line is * dirty or clean: with a corrupted 2ndary tag we can't identify * the (also possibly corrupted) primary line(s) associated with * it. This means that none of the cacheops are guaranteed, and * the state of the caching-system is or may be indeterminate. * Panic. Note that if the bad 2ndary line is holding an * instruction (and is therefore clean) we could blow out the * primary I-cache and invalidate this 2ndary line; at this * time, however, we're just going to panic on all uncorrectable * errors in 2ndary tags. */ if (loc == CACH_SI || loc == CACH_SD) { /* since the error is in the tag, all the e_values we set * in ecc_handler using it are suspect. We know that the * sidx field in cache_err is correct: use it to fetch the * 2ndary line. e_s_taglo, e_p_taglo, e_paddr and e_vaddr * may be wrong. Calculate the syndrome, then either * a) fix it and recalculate e_paddr and c_vaddr if the * bad bit was a data-bit (not a checkbit), or * b) panic if the error is uncorrectable. */ _c_ilt(loc, PHYS_TO_K0(ce_sidx), tags); err_info.eis_taglo = tags[TAGLO_IDX]; err_info.cbits_in = SET_CBITS_IN; tag_syndrome = calc_err_info(TAG_CBITS, &err_info); ASSERT(err_info.cbits_in == (tags[TAGLO_IDX] & SECC_MASK)); edp->e_prevbadecc = edp->e_badecc; edp->e_badecc = err_info.cbits_in; /* from s_taglo */ edp->e_syndrome = tag_syndrome; if (!tag_syndrome) { /* NO ERROR! */ ecc_assign_msg(ECC_ERROR_MSG, index, ecc_no_stagerr); ecc_log_error(NO_ERROR, index); return(1); } /* use the syndrome to determine the severity of the error */ tag_syn_info = err_info.syn_info; /* DBx and CBx errors are correctable; all others panic */ if (tag_syn_info.type != DB && tag_syn_info.type != CB) return(2); /* if the error is in a databit, fix it and recalculate * all values that were set relying upon possibly-bogus * tag values. If the error is in a checkbit, let the * R4K correct it when we store the tag. Note that the * syndrome identifies the bad bit number *in the internal * format* (i.e. as it is stored in the 2ndary cache), * not as it appears in the taglo register. If it is a * data-bit error we will fix it; the syndrome bitposition * must therefore be translated to taglo format. */ if (tag_syn_info.type == DB) { uint bitpos, badbit; bitpos=xlate_bit(tag_syn_info.type,tag_syn_info.value); badbit = (0x1 << bitpos); tags[TAGLO_IDX] ^= badbit; edp->e_s_taglo = tags[TAGLO_IDX]; /* Now that we have a correct 2ndary tag, recalculate * all values that were based on the bad one. * ce_sidx is paddr[21..3], 2nd taglo is paddr[35..17] * but must be shifted to proper position. Together * they make up the full vaddress. */ physaddr = (ce_sidx | ((edp->e_s_taglo & SADDRMASK) << SADDR_SHIFT)); edp->e_paddr = physaddr; s_caddr = PHYS_TO_K0(SCACHE_PADDR(edp)); #if IP19 edp->e_vaddr = (physaddr & (ecc_info_param->ecc_picache_size-1)); #else edp->e_vaddr = (physaddr & (picache_size-1)); #endif p_caddr = PHYS_TO_K0(edp->e_vaddr); /* set e_p_taglo to PI tag if error is in PI or SI * (i.e. *instruction* error in either cache); * if PD or SD ==> PD. */ if (loc == CACH_PI || loc == CACH_SI) _c_ilt(CACH_PI, p_caddr, tags); else _c_ilt(CACH_PD, p_caddr, tags); edp->e_p_taglo = tags[TAGLO_IDX]; } /* error in tag data bit */ /* now store the corrected tag */ tags[TAGLO_IDX] = edp->e_s_taglo; tags[TAGHI_IDX] = 0; _c_ist(loc,s_caddr,tags); /* Check that the newly-computed tag is correct */ _c_ilt(loc,s_caddr,tags); err_info.eis_taglo = tags[TAGLO_IDX]; tag_syndrome = calc_err_info(TAG_CBITS, &err_info); if (tag_syndrome) { /* panic/kill user */ ecc_assign_msg(ECC_PANIC_MSG,index, ecc_stfix_failed); return(1); } return(0); } else { /* it is a primary-tag error. We can reconstruct * the bad tag from the info we have, even if the * data in the line is dirty. 2ndary-tag ecc is * checked each time a line is transferred to primary, * and traps at that point. Therefore, primary lines * already transferred from this 2ndary line are * valid (because the ecc-check didn't cause a trap * during those fills), and the current primary-fill * didn't occur because of this trap */ tags[TAGLO_IDX] = tags[TAGHI_IDX] = 0; #ifdef R4000PC if ((r4000_config & CONFIG_SC) != 0) { /* 0 == scache present */ /* must invalidate the line */ if (loc == CACH_PI) { _c_ist(loc,p_caddr,tags); #ifdef R4600 _c_ist(loc,p_caddr^two_set_pcaches,tags); #endif } else { #ifdef R4600 uint ttags[NUM_TAGS]; int numcleaned = 0; /* * XXX: if we don't find a bad line at the * appropriate index what should we do? We * can't really go on because we can't invalidate * the line. I suppose that we should panic. * First, though we'll see if both lines are clean * we'll just invalidate them, 'case this just * means that one of them had a parity error in * the w or w' bit of the tag. */ if (ecc_find_bad_cline(loc,p_caddr,(uint *)&pidx)) { #endif _c_ist(loc,pidx,tags); goto send_bad_ptag_msg; } _read_tag(loc,(caddr_t)p_caddr,(int *)ttags); if ((ttags[TAGLO_IDX] & PSTATEMASK) == PCLEANEXCL) { _c_ist(loc,pidx,tags); numcleaned++; } #ifdef R4600 _read_tag(loc,(caddr_t)(p_caddr^two_set_pcaches),(int *)ttags); if ((ttags[TAGLO_IDX] & PSTATEMASK) == PCLEANEXCL) { _c_ist(loc,pidx,tags); numcleaned++; } #endif /* * although we didn't find the bad tag * we were able to invalidate both elements * of the set. We can consider this to be * success. */ if (numcleaned > 1) return(0); } /* primary cache only: fail if in data cache */ if (edp->e_cache_err & (CACHERR_ER | CACHERR_EB)) goto send_bad_ptag_msg; /* unrecoverable */ return(0); } #endif /* R4000PC */ if (!ecc_bad_ptag(p_taglo)) { ecc_assign_msg(ECC_ERROR_MSG, index, ecc_no_ptagerr); ecc_log_error(NO_ERROR, index); return(1); } /* construct a new ptag from e_vaddr and set state * depending on scache */ new_p_taglo = ((edp->e_vaddr&PTAG_ADDRMASK)>>PADDR_SHIFT); if ((s_taglo & SSTATEMASK) == SCLEANEXCL) new_p_taglo |= PCLEANEXCL; else new_p_taglo |= PDIRTYEXCL; tags[TAGLO_IDX] = new_p_taglo; tags[TAGHI_IDX] = 0; _c_ist(loc,p_caddr,tags); _c_ilt(loc,p_caddr,tags); if (ecc_bad_ptag(tags[TAGLO_IDX])) { #ifdef R4000PC send_bad_ptag_msg: #endif /* R4000PC */ ecc_assign_msg(ECC_PANIC_MSG, index, ecc_ptfix_failed); return(1); } return(0); } /* primary-tag error */ } /* ecc_fixctag */ #ifdef ECC_DEBUG int eccdebug_foundone=0; __psunsigned_t eccdebug_badaddr[128], eccdebug_loc[128]; int eccdebug_syndrome[128]; int eccdebug_datalo[128], eccdebug_datahi[128], eccdebug_ecc[128], eccdebug_cnt[128]; int eccdebug_entry_cnt=0; __psunsigned_t eccdebug_entry_loc[128]; #endif /* ECC_DEBUG */ #ifdef IP19 extern k_machreg_t get_config(void); int ecc_check_correctable(volatile uint * loc, err_desc_t *edp, volatile ecc_info_t *ecc_info_param) { __psunsigned_t addr, startaddr, paddr; int lo_syn; error_info_t alt_err_info; uint tags[NUM_TAGS], data_ecc, dblwrd_mask; int foundone=0, mbe=0, plinesize=0; /* if not IP19 or no datap, then really can't check */ if (loc == 0) return(0); #ifdef ECC_DEBUG eccdebug_entry_loc[eccdebug_entry_cnt] = (__psunsigned_t)loc; eccdebug_entry_cnt++; if (eccdebug_entry_cnt == 128) eccdebug_entry_cnt = 0; #endif /* ECC_DEBUG */ if (get_config() & CONFIG_DB) { plinesize = 32; /* primary cacheline size is four double-words, which will be * the resolution of the cache error exception. */ startaddr = (__psunsigned_t)loc & ~0x1f; paddr = PHYS_TO_K0(edp->e_paddr & ~0x1f); /* This following statement sets the correct initial bit * position in the doubleword mask. */ dblwrd_mask = (1 << (((__psunsigned_t)loc & 0x60) >> 3)); } else { plinesize = 16; /* primary cacheline size is two double-words, which will be * the resolution of the cache error exception. */ startaddr = (__psunsigned_t)loc & ~0x0f; paddr = PHYS_TO_K0(edp->e_paddr & ~0x0f); /* This following statement sets the correct initial bit * position in the doubleword mask. */ dblwrd_mask = (1 << (((__psunsigned_t)loc & 0x70) >> 3)); } /* We scan that portion of the secondary cacheline which caused the * cache error exception. This corresponds to a primary cacheline * which is 16 bytes (two doublewords) on the IP19. Either * doubleword may contain an error. */ for (addr = startaddr;addr < startaddr+plinesize;addr += 8,paddr +=8) { tags[TAGLO_IDX] = tags[TAGHI_IDX] = 0; alt_err_info.eidata_lo = *(uint *)addr; alt_err_info.eidata_hi = *(uint *)(addr+BYTESPERWD); _c_ilt_n_ecc(CACH_SD, paddr, tags, &data_ecc); alt_err_info.cbits_in = data_ecc; lo_syn = calc_err_info(DATA_CBITS, &alt_err_info); if (lo_syn) { #ifdef ECC_DEBUG eccdebug_loc[eccdebug_foundone] = (__psunsigned_t)loc; eccdebug_cnt[eccdebug_foundone] = eccdebug_entry_cnt; eccdebug_badaddr[eccdebug_foundone] = addr; eccdebug_syndrome[eccdebug_foundone] = lo_syn; eccdebug_datalo[eccdebug_foundone] = alt_err_info.eidata_lo; eccdebug_datahi[eccdebug_foundone] = alt_err_info.eidata_hi; eccdebug_ecc[eccdebug_foundone] = alt_err_info.cbits_in; eccdebug_foundone++; if (eccdebug_foundone == 128) eccdebug_foundone = 0; #endif /* ECC_DEBUG */ switch (alt_err_info.syn_info.type) { case DB: /* 1-bit err in data */ case CB: if (!mbe) { edp->e_s_taglo = tags[TAGLO_IDX]; edp->e_prevbadecc = edp->e_badecc; edp->e_badecc = data_ecc; edp->e_syndrome = lo_syn; edp->e_lo_badval = alt_err_info.eidata_lo; edp->e_hi_badval = alt_err_info.eidata_hi; edp->e_paddr = K0_TO_PHYS(paddr); } edp->e_sbe_dblwrds |= dblwrd_mask; foundone++; break; case B2: /* error is 2-bit or greater */ case B3: case B4: case UN: default: edp->e_s_taglo = tags[TAGLO_IDX]; edp->e_prevbadecc = edp->e_badecc; edp->e_badecc = data_ecc; edp->e_syndrome = lo_syn; edp->e_lo_badval = alt_err_info.eidata_lo; edp->e_hi_badval = alt_err_info.eidata_hi; edp->e_paddr = K0_TO_PHYS(paddr); edp->e_mbe_dblwrds |= dblwrd_mask; mbe++; } /* switch */ } dblwrd_mask = dblwrd_mask << 1; } if (mbe) return(-1); else return(foundone); } #endif /* IP19 */ /* fix data error in 'loc' cache. 'index' indicates the frame of * variables being used during this invokation of ecc_handler. */ #if IP19 real_ecc_fixcdata(uint loc, int index, k_machreg_t *eccfp, volatile ecc_info_t *ecc_info_param) #else ecc_fixcdata(uint loc, int index, k_machreg_t *eccfp) #endif { err_desc_t *edp = (err_desc_t *)&(ECC_INFO(desc[index])); uint tags[NUM_TAGS]; __psunsigned_t p_caddr = PHYS_TO_K0(edp->e_vaddr); uint pidx_test, pidx_max; error_info_t err_info; #ifndef IP19 uint data_syndrome, data_ecc; __psunsigned_t s_caddr = PHYS_TO_K0(SCACHE_PADDR(edp)); volatile uint *p_cptr = (volatile uint *)p_caddr; eccdesc_t d_syn_info; #endif __psunsigned_t prim_addr; volatile uint *datap=0; __psunsigned_t local_ecc_kvaddr; #ifdef R4000PC /* * If we are on an R4000PC or R4600, we only have primary * caches, and we only have parity, so the best we can * do is to invalidate the line, if it is clean. Otherwise, * we give up. * * However, we have higher level routines which can recover * from some data errors. Since neither the R4000 Rev. 2.2 * nor the R4600 Rev. 1.7 return the correct ECC check bits * for the data cache, we cannot simply look at the check bits * to find bad words. */ if ((r4000_config & CONFIG_SC) != 0) { /* 0 == scache present */ #ifdef _MEM_PARITY_WAR if (ecc_fixup_caches(loc, edp->e_paddr, edp->e_vaddr, edp->e_flags & E_PADDR_MC)) return(0); else #endif return(-1); } #endif /* R4000PC */ /* Currently the R4K does not return the ecc byte-checkbits * for the double-word of data at the specified address during * the index load tag cacheop. The desired algorithm for * dealing with primary cache data errors would basically * panic if the line was dirty (since there is only parity, * so we can't fix it), and invalidate the tag and refetch * if it was clean (obviously always the case if the error * was in the I-cache). However, we must avoid infinite-ECC- * exception-loops which would occur when the error was due * to a stuck bit, for example, and wasn't fixed during the * refetch. To do this we must either save a 'sufficient' * amount of history--whatever that amount might be--or be * able to check whether the refetch fixed the error. With * this bug we can't do the latter, and the former is a * rather unpalatable alternative, so we'll just panic for now, * but only after determining that the paddrs match (otherwise * it is probably either a) a 'phantom' exception caused * when the bad line was replaced by a new one: the exception * still occurs but the error is no longer in the line, or * b) a manifestation of the R4K bug (fixed in 2.0) in which * the vidx (and apparently the sidx also) info in the * cacherr was bogus when a parity error was detected in * the primary). */ if ((loc == CACH_PI) || (loc == CACH_PD)) { /* Try every possible PIDX */ #if IP19 pidx_max = ((loc == CACH_PI) ? ecc_info_param->ecc_picache_size : ecc_info_param->ecc_pdcache_size) - NBPP; #else pidx_max = ((loc == CACH_PI) ? picache_size : pdcache_size) - NBPP; #endif for (pidx_test = 0; pidx_test <= pidx_max; pidx_test += NBPP) { edp->e_vaddr = pidx_test | (edp->e_vaddr & (NBPP-1)); p_caddr = PHYS_TO_K0(edp->e_vaddr); _c_ilt(loc, p_caddr, tags); prim_addr = ((tags[TAGLO_IDX] & PADDRMASK) << PADDR_SHIFT); /* just check that the bits from taglo match the physaddr */ if (prim_addr == (POFFSET_PADDR(edp))) { ecc_assign_msg(ECC_PANIC_MSG, index, ecc_p_data_err); return(1); } } /* if get to here, then no error found!!?? */ ecc_assign_msg(ECC_ERROR_MSG, index, ecc_no_pdataerr); ecc_log_error(NO_ERROR, index); return(0); } /* error is in CACH_SI or CACH_SD */ tags[TAGLO_IDX] = tags[TAGHI_IDX] = 0; /* do cached read to get values of dbl-words. This would * cause another ecc exception but we have the SR_DE bit set. */ #ifndef IP19 err_info.eidata_lo = *p_cptr; err_info.eidata_hi = *(p_cptr+1); /* but fetch tag by 2ndary (physical) addr */ _c_ilt_n_ecc(loc, s_caddr, tags, &data_ecc); edp->e_s_taglo = tags[TAGLO_IDX]; edp->e_prevbadecc = edp->e_badecc; edp->e_badecc = data_ecc; err_info.cbits_in = (unchar)data_ecc; data_syndrome = calc_err_info(DATA_CBITS, &err_info); edp->e_syndrome = data_syndrome; edp->e_lo_badval = err_info.eidata_lo; edp->e_hi_badval = err_info.eidata_hi; d_syn_info = err_info.syn_info; edp->e_sbe_dblwrds = edp->e_mbe_dblwrds = 0; if (!data_syndrome) { /* no error in this dbl word */ ecc_assign_msg(ECC_ERROR_MSG, index, ecc_no_sdataerr); ecc_log_error(NO_ERROR, index); return(0); } /* If the line is clean we don't need to protect the data: * invalidate, refill, and check it again. Must invalidate * all the primary lines it maps too. */ if (loc==CACH_SI || (loc==CACH_SD && CLEAN_S_TAG(edp->e_s_taglo))) { if (!_c_hinv(loc,s_caddr)) { /* missed 2ndary! */ ecc_assign_msg(ECC_ERROR_MSG, index, ecc_ft_hinv_m_sc); return(1); } err_info.eidata_lo = *p_cptr; err_info.eidata_hi = *(p_cptr+1); _c_ilt_n_ecc(loc, s_caddr, tags, &data_ecc); err_info.cbits_in = data_ecc; data_syndrome = calc_err_info(DATA_CBITS, &err_info); if (data_syndrome) { /* didn't fix it: panic */ ecc_assign_msg(ECC_PANIC_MSG, index, ecc_sdcfix_failed); edp->e_prevbadecc = data_ecc; edp->e_2nd_syn = data_syndrome; #ifdef DEBUG_ECC f_staglo = tags[TAGLO_IDX]; _c_ilt(CACH_PD, p_caddr, tags); f_ptaglo = tags[TAGLO_IDX]; f_loval = err_info.eidata_lo; f_hival = err_info.eidata_hi; f_p_caddr = p_caddr; f_s_caddr = s_caddr; #endif return(1); } else { ecc_assign_msg(ECC_INFO_MSG, index, ecc_sdcfix_good); return(0); } } else { /* dirty line: can't invalidate line and refetch */ /* Now the severity of the error becomes relevant. * If it is a one bit error we can force the line * out to memory through the RMI--which corrects * single-bit errors--then read it back and check * if it is now correct. Panic if not--probably * a stuck bit. */ ASSERT(loc != CACH_SI); switch(d_syn_info.type) { case 0: /* no error found */ case DB: /* 1-bit err in data: write out then refetch */ case CB: break; case B2: /* error is 2-bit or greater: can't fix it */ case B3: case B4: case UN: default: eccfp[ECCF_PADDR] = edp->e_paddr; ecc_assign_msg(ECC_PANIC_MSG, index, ecc_md_sddfix_failed); return(1); } /* switch */ _c_hwbinv(CACH_SD, s_caddr); /* now refetch the info and ensure that it is fixed */ err_info.eidata_lo = *p_cptr; err_info.eidata_hi = *(p_cptr+1); _c_ilt_n_ecc(loc, s_caddr, tags, &data_ecc); err_info.cbits_in = data_ecc; data_syndrome = calc_err_info(DATA_CBITS, &err_info); if (data_syndrome) { /* didn't fix it: panic */ ecc_assign_msg(ECC_PANIC_MSG, index, ecc_sddfix_failed); edp->e_prevbadecc = data_ecc; edp->e_2nd_syn = data_syndrome; #ifdef DEBUG_ECC f_staglo = tags[TAGLO_IDX]; _c_ilt(CACH_PD, p_caddr, tags); f_ptaglo = tags[TAGLO_IDX]; f_loval = err_info.eidata_lo; f_hival = err_info.eidata_hi; f_p_caddr = p_caddr; f_s_caddr = s_caddr; #endif return(1); } else { ecc_assign_msg(ECC_INFO_MSG, index, ecc_sddfix_good); return(0); } } /* else dirty line */ #else /* IP19 */ /* Currently code assumes that the primary-icache and primary-dcache * linesizes are the same. This is used to determine the number * of doublewords which must be read from the secondary in order * to check ECC values. All IP19 systems currently have a * primary cache linesize of 16 bytes (IB == DB == 0) so just * verify this assumption in case it changes. */ if (get_config() & CONFIG_IB) { if (!(get_config() & CONFIG_DB)) { ecc_assign_msg(ECC_PANIC_MSG, index, ecc_mixed_psize); return(1); } } else { if (get_config() & CONFIG_DB) { ecc_assign_msg(ECC_PANIC_MSG, index, ecc_mixed_psize); return(1); } } /* The following code allows us to pickup the virtual address using * an uncached load so as not to interfere with the state of the * cache while we're examining the cause of the error. */ #if 0 local_ecc_kvaddr = *(__psunsigned_t *)((K0_TO_K1(&ecc_kvaddr_vcecolor))); #endif local_ecc_kvaddr = ecc_info_param->ecc_vcecolor; if (local_ecc_kvaddr) { int vcecolor=0; char *vceaddr=0; pde_t pde; extern uint ecc_tlbdropin(unsigned char *, caddr_t, pte_t); vcecolor = (edp->e_s_taglo & STAG_VINDEX)<e_paddr)); ecc_tlbdropin(0, vceaddr, pde.pte); datap = (uint *)(((__psunsigned_t)vceaddr & ~POFFMASK) +poff(edp->e_paddr)); err_info.eidata_lo = *datap; err_info.eidata_hi = *(datap+1); } else { ecc_assign_msg(ECC_PANIC_MSG, index, ecc_scerr_too_early); return(1); } /* but fetch tag by 2ndary (physical) addr */ edp->e_sbe_dblwrds = edp->e_mbe_dblwrds = 0; if (ecc_check_correctable(datap,edp,ecc_info_param)==0) { /* no error in this dbl word */ ecc_assign_msg(ECC_INFO_MSG, index, ecc_no_sdataerr); ecc_log_error(NO_ERROR, index); return(0); } /* If the line is clean we don't need to protect the data: * invalidate, refill, and check it again. Must invalidate * all the primary lines it maps too. * * NOTE: If the cache error is reported due to an external request * (i.e. ES is set), then we can actually have loc == CACH_SI but * the actual problem might be in some other cacheline which is * "dirty" (that other cacheline address is indicated by s_taglo). * So checking for loc == CACH_SI is not sufficient unless we * qualify it with ES==0. * * It's better to replace this check with a "simple" check for * CLEAN_S_TAG(). */ if (CLEAN_S_TAG(edp->e_s_taglo)) { if (!_c_hinv(loc,(__psunsigned_t)datap)) { /* missed 2ndary! */ /* We can miss here IFF another cpu (or I/O) has * referenced this line after we performed the * ecc check which explicitly loaded data from this * scacheline. * So we check that the line is currently invalid, * then we reload and check the ECC to make sure * that a multiple-bit error did not occur. */ _c_ilt(loc, (__psunsigned_t)datap, tags); if ((tags[TAGLO_IDX] & SSTATEMASK) == SINVALID) { if (ecc_check_correctable(datap,edp,ecc_info_param)==0) { ecc_assign_msg(ECC_INFO_MSG, index, ecc_sinvalid_noerr); ecc_log_error(NO_ERROR, index); return(0); } ecc_assign_msg(ECC_ERROR_MSG, index, ecc_sinvalid_err); return(1); } else { ecc_assign_msg(ECC_ERROR_MSG, index, ecc_ft_hinv_m_sc); return(1); } } err_info.eidata_lo = *datap; err_info.eidata_hi = *(datap+1); if (ecc_check_correctable(datap,edp,ecc_info_param) !=0 ) { /* didn't fix it: panic */ ecc_assign_msg(ECC_PANIC_MSG, index, ecc_sdcfix_failed); #ifdef DEBUG_ECC f_staglo = tags[TAGLO_IDX]; _c_ilt(CACH_PD, p_caddr, tags); f_ptaglo = tags[TAGLO_IDX]; f_loval = err_info.eidata_lo; f_hival = err_info.eidata_hi; f_p_caddr = p_caddr; f_s_caddr = s_caddr; #endif return(1); } else { ecc_assign_msg(ECC_INFO_MSG, index, ecc_sdcfix_good); return(0); } } else { /* dirty line: can't invalidate line and refetch */ /* Now the severity of the error becomes relevant. * If it is a one bit error we can force the line * out to memory through the CC --which corrects * single-bit errors--then read it back and check * if it is now correct. Panic if not--probably * a stuck bit. */ if (ecc_check_correctable(datap,edp,ecc_info_param) < 0) { eccfp[ECCF_PADDR] = edp->e_paddr; ecc_assign_msg(ECC_PANIC_MSG, index, ecc_md_sddfix_failed); return(1); } _c_hwbinv(CACH_SD, (__psunsigned_t)datap); /* now refetch the info and ensure that it is fixed */ err_info.eidata_lo = *datap; err_info.eidata_hi = *(datap+1); if (ecc_check_correctable(datap,edp,ecc_info_param) != 0) { /* didn't fix it: panic */ ecc_assign_msg(ECC_PANIC_MSG, index, ecc_sddfix_failed); #ifdef DEBUG_ECC f_staglo = tags[TAGLO_IDX]; _c_ilt(CACH_PD, p_caddr, tags); f_ptaglo = tags[TAGLO_IDX]; f_loval = err_info.eidata_lo; f_hival = err_info.eidata_hi; f_p_caddr = p_caddr; f_s_caddr = s_caddr; #endif return(1); } else { ecc_assign_msg(ECC_INFO_MSG, index, ecc_sddfix_good); return(0); } } /* else dirty line */ #endif /* IP19 */ } /* ecc_fixcdata */ #if defined(_MEM_PARITY_WAR) || IP20 || IP22 pfn_t allocate_ecc_info(pfn_t fpage) { /* * Allocate stack and log area for cache error exception handler * in dedicated uncached pages. */ #ifdef _MEM_PARITY_WAR bzero((void *)PHYS_TO_K1(ECCF_ADDR(0)),ECCF_SIZE); CACHE_ERR_STACK_BASE_P = PHYS_TO_K1((ctob(fpage)+CACHE_ERR_STACK_SIZE)); fpage += btoc(CACHE_ERR_STACK_SIZE); CACHE_ERR_ECCINFO_P = PHYS_TO_K1(ctob(fpage)); fpage += btoc((sizeof(ecc_info) + perr_mem_init(((caddr_t) CACHE_ERR_ECCINFO_P) + sizeof(ecc_info)))); init_ecc_info(); #else /* _MEM_PARITY_WAR */ fpage += btoc(perr_mem_init((caddr_t) (PHYS_TO_K1(ctob(fpage))))); #endif /* _MEM_PARITY_WAR */ return(fpage); } #endif /* _MEM_PARITY_WAR */ #ifndef IP19 static void init_ecc_info(void) { #ifdef _MEM_PARITY_WAR msg_addrs[ECC_PANIC_MSG] = (volatile char **)&ecc_info.ecc_panic_msg[0]; msg_addrs[ECC_INFO_MSG] = (volatile char **)&ecc_info.ecc_info_msg[0]; msg_addrs[ECC_ERROR_MSG] = (volatile char **)&ecc_info.ecc_error_msg[0]; #endif /* _MEM_PARITY_WAR */ /* doing a bzero will cause both ecc_handler and * ecc_cleanup/ecc_panic to skip the 0th slot 1st * time around the circular buffer. who cares. */ bzero((void *)&ecc_info, sizeof(ecc_info)); #ifndef _MEM_PARITY_WAR ecc_info.eframep = CACHE_ERR_EFRAME; ecc_info.eccframep = CACHE_ERR_ECCFRAME; #endif /* _MEM_PARITY_WAR */ ecc_info_initialized = 1; #ifdef R4000PC r4000_config = get_r4k_config(); #endif /* R4000PC */ } /* init_ecc_info */ #endif /* !IP19 */ #ifdef IP19 static real_ecc_assign_msg( int msg_type, int index, char msg, volatile ecc_info_t *ecc_info_param) { switch(msg_type) { case ECC_PANIC_MSG: ecc_info_param->ecc_panic_msg[index] = msg; break; case ECC_INFO_MSG: ecc_info_param->ecc_info_msg[index] = msg; break; case ECC_ERROR_MSG: ecc_info_param->ecc_error_msg[index] = msg; break; default: ; } MARK_FOR_CLEANUP; /* msg queued ==> need ecc_cleanup */ return(0); } /* ecc_assign_msg */ static int real_ecc_print_msg( int msg_type, /* message-type to print; -1 for all msgs at 'index' */ uint index, /* message-array index to print */ int clear_it, /* if panic'ing don't clear msg--can use symmon */ int disp_hdr, /* if non-zero print message-type before msg */ uint cpu, /* cpuid of failing cpu */ volatile ecc_info_t *ecc_info_param) { char *ppc; char *nameptr, ppindex; pfunc pptr = (pm_use_qprintf ? (pfunc)qprintf : printf); int i; switch (msg_type) { case ECC_ALL_MSGS: for (i = ECC_PANIC_MSG; i <= ECC_ERROR_MSG; i++) real_ecc_print_msg(i,index,clear_it,disp_hdr,cpu,ecc_info_param); return(0); case ECC_PANIC_MSG: case ECC_INFO_MSG: case ECC_ERROR_MSG: break; default: return(-1); } switch(msg_type) { case ECC_PANIC_MSG: ppindex = ecc_info_param->ecc_panic_msg[index]; break; case ECC_INFO_MSG: ppindex = ecc_info_param->ecc_info_msg[index]; break; case ECC_ERROR_MSG: ppindex = ecc_info_param->ecc_error_msg[index]; break; default: ppindex = 0; } switch(ppindex) { case ecc_overrun_msg: ppc = real_ecc_overrun_msg; break; case ecc_eb_not_i: ppc = real_ecc_eb_not_i; break; case ecc_incons_err: ppc = real_ecc_incons_err; break; case ecc_ew_err: ppc = real_ecc_ew_err; break; case ecc_kernel_err: ppc = real_ecc_kernel_err; break; case ecc_user_err: ppc = real_ecc_user_err; break; case ecc_inval_loc: ppc = real_ecc_inval_loc; break; case ecc_no_ptagerr: ppc = real_ecc_no_ptagerr; break; case ecc_no_stagerr: ppc = real_ecc_no_stagerr; break; case ecc_ptfix_failed: ppc = real_ecc_ptfix_failed; break; case ecc_stfix_failed: ppc = real_ecc_stfix_failed; break; case ecc_no_pdataerr: ppc = real_ecc_no_pdataerr; break; case ecc_no_sdataerr: ppc = real_ecc_no_sdataerr; break; case ecc_sinvalid_noerr: ppc = real_ecc_sinvalid_noerr; break; case ecc_sinvalid_err: ppc = real_ecc_sinvalid_err; break; case ecc_sdcfix_failed: ppc = real_ecc_sdcfix_failed; break; case ecc_sdcfix_good: ppc = real_ecc_sdcfix_good; break; case ecc_sddfix_failed: ppc = real_ecc_sddfix_failed; break; case ecc_sddfix_good: ppc = real_ecc_sddfix_good; break; case ecc_md_sddfix_failed: ppc = real_ecc_md_sddfix_failed; break; case ecc_p_data_err: ppc = real_ecc_p_data_err; break; case ecc_inval_eloc: ppc = real_ecc_inval_eloc; break; case ecc_bad_s_tag: ppc = real_ecc_bad_s_tag; break; case ecc_ft_hinv_m_sc: ppc = real_ecc_ft_hinv_m_sc; break; case ecc_scerr_too_early: ppc = real_ecc_scerr_too_early; break; case ecc_ei_notdirty: ppc = real_ecc_ei_notdirty; break; case ecc_mixed_psize: ppc = real_ecc_mixed_psize; break; case ecc_ei_norecover: ppc = real_ecc_ei_norecover; break; case ecc_possible_ei: ppc = real_ecc_possible_ei; break; default: ppc = NULL; } nameptr = (char *)msg_strs[msg_type]; if (ppc) { if (maxcpus > 1) pptr("CPU %d: ",cpu); pptr(" %s %s\n",(disp_hdr?nameptr : " "),ppc); if (clear_it) { switch(msg_type) { case ECC_PANIC_MSG: ecc_info_param->ecc_panic_msg[index] = 0; break; case ECC_INFO_MSG: ecc_info_param->ecc_info_msg[index] = 0; break; case ECC_ERROR_MSG: ecc_info_param->ecc_error_msg[index] = 0; break; default: ; } } } return(0); } /* ecc_print_msg */ #else /* !IP19 */ static ecc_assign_msg( int msg_type, int index, char *msg) { msg_addrs[msg_type][index] = msg; MARK_FOR_CLEANUP; /* msg queued ==> need ecc_cleanup */ return(0); } /* ecc_assign_msg */ static int ecc_print_msg( int msg_type, /* message-type to print; -1 for all msgs at 'index' */ uint index, /* message-array index to print */ int clear_it, /* if panic'ing don't clear msg--can use symmon */ int disp_hdr, /* if non-zero print message-type before msg */ uint cpu) /* cpuid of failing cpu */ { char **ppc; char *nameptr; pfunc pptr = (pm_use_qprintf ? (pfunc)qprintf : printf); int i; switch (msg_type) { case ECC_ALL_MSGS: for (i = ECC_PANIC_MSG; i <= ECC_ERROR_MSG; i++) ecc_print_msg(i,index,clear_it,disp_hdr,cpu); return(0); case ECC_PANIC_MSG: case ECC_INFO_MSG: case ECC_ERROR_MSG: break; default: return(-1); } ppc = (char **)msg_addrs[msg_type]; nameptr = (char *)msg_strs[msg_type]; if (ppc[index]) { #if MP if (maxcpus > 1) pptr("CPU %d: ",cpu); #endif pptr(" %s %s\n",(disp_hdr?nameptr : " "),ppc[index]); if (clear_it) ppc[index] = NULL; } return(0); } /* ecc_print_msg */ #endif /* !IP19 */ #define PTAG_PARITY_BIT 0x1 /* ptaglo parity bit is #0 */ #define PTAG_1ST_DATA_BIT 6 /* low 6 bits are undefined + parity */ #define PTAG_PTAGLO_BITS 24 #define PTAG_PSTATE_BITS 2 #define PTAG_DATA_BITS (PTAG_PTAGLO_BITS+PTAG_PTAG_PSTATE_BITS) /* ecc_bad_ptag(taglo): Determine if the ecc/parity in 'taglo' is * correct. Calculate the even parity for the 26-bit field (5 undefined * bits plus low bit == parity bit). Return 0 if parity is correct, else 1. */ static ecc_bad_ptag(uint taglo) { uint bit = (0x1 << PTAG_1ST_DATA_BIT); int numsetbits = 0; uint pbit = 0; int i; for (i = PTAG_1ST_DATA_BIT; i < BITSPERWORD; bit <<= 1, i++) { if (taglo & bit) numsetbits++; } if (numsetbits & 0x1) /* odd # of bits; set p to even it */ pbit = 1; if ((taglo & 0x1) == pbit) return(0); /* computed parity matches ptaglo's */ else return(1); } /* ecc_bad_ptag */ #if IP19 static real_ecc_log_error(int where, int index, volatile ecc_info_t *ecc_info_param) #else volatile int inval_eloc = 0; static ecc_log_error(int where, int index) #endif { if (where < 0 || where >= ECC_ERR_TYPES) { ecc_assign_msg(ECC_ERROR_MSG, index, ecc_inval_eloc); #if IP19 /* avoid global references which generate cached accesses */ ecc_info_param->ecc_inval_eloc_where = where; #else inval_eloc = where; #endif return(1); } ECC_INFO(ecc_err_cnts)[where]++; return(0); } /* ecc_log_error */ /* First set of trees and structs are for computing the 8 checkbits * that accompany each set of double-words in memory and secondary * cache: i.e. the data trees */ #define ECC8B_DTREE7H 0xff280ff0 #define ECC8B_DTREE7L 0x88880928 #define ECC8B_DTREE6H 0xfa24000f #define ECC8B_DTREE6L 0x4444ff24 #define ECC8B_DTREE5H 0x0b22ff00 #define ECC8B_DTREE5L 0x2222fa32 #define ECC8B_DTREE4H 0x0931f0ff #define ECC8B_DTREE4L 0x11110b21 #define ECC8B_DTREE3H 0x84d08888 #define ECC8B_DTREE3L 0xff0f8c50 #define ECC8B_DTREE2H 0x4c9f4444 #define ECC8B_DTREE2L 0x00ff44d0 #define ECC8B_DTREE1H 0x24ff2222 #define ECC8B_DTREE1L 0xf000249f #define ECC8B_DTREE0H 0x14501111 #define ECC8B_DTREE0L 0x0ff014ff struct d_emask { uint d_emaskhi; uint d_emasklo; }; struct d_emask d_ptrees[] = { { ECC8B_DTREE0H, ECC8B_DTREE0L }, { ECC8B_DTREE1H, ECC8B_DTREE1L }, { ECC8B_DTREE2H, ECC8B_DTREE2L }, { ECC8B_DTREE3H, ECC8B_DTREE3L }, { ECC8B_DTREE4H, ECC8B_DTREE4L }, { ECC8B_DTREE5H, ECC8B_DTREE5L }, { ECC8B_DTREE6H, ECC8B_DTREE6L }, { ECC8B_DTREE7H, ECC8B_DTREE7L }, }; /* Next, the data necessary for computing the 7 checkbits * for the 25-bit secondary cache tags: i.e. the tag trees. */ #define ECC7B_TTREE6 0x0a8f888 #define ECC7B_TTREE5 0x114ff04 #define ECC7B_TTREE4 0x2620f42 #define ECC7B_TTREE3 0x29184f0 #define ECC7B_TTREE2 0x10a40ff #define ECC7B_TTREE1 0x245222f #define ECC7B_TTREE0 0x1ff1111 struct t_emask { uint t_emask; }; struct t_emask t_ptrees[] = { ECC7B_TTREE0, ECC7B_TTREE1, ECC7B_TTREE2, ECC7B_TTREE3, ECC7B_TTREE4, ECC7B_TTREE5, ECC7B_TTREE6, }; /* 2ndary cache tags consist of 25 data bits monitored by 7 checkbits */ #define STAG_DBIT_SIZE 25 #define STAG_CBIT_SIZE 7 #define STAG_SIZE (STAG_DBIT_SIZE+STAG_CBIT_SIZE) /* S_taglo field format: * bitpositions--> 31..13 12..10 9..7 6..0 * fields --> < p_addr, cstate, vind, ecc >. * Internal format: * 31..25 24..22 21..19 18..0 * < ecc, cstate, vind, p_addr >. * the following defines tell ecc_swap_s_tag() how to shift the fields to * create the internal format from the s_taglo format. */ /* sizes of the fields */ #define S_TAG_PADDR_BITS 19 #define S_TAG_CS_BITS 3 #define S_TAG_VIND_BITS 3 #define S_TAG_ECC_CBITS 7 /* bit positions of the fields in the s_taglo format */ #define S_TAG_ECC_BITPOS 0 #define S_TAG_VIND_BITPOS (S_TAG_ECC_BITPOS+S_TAG_ECC_CBITS) /* 7 */ #define S_TAG_CS_BITPOS (S_TAG_VIND_BITPOS+S_TAG_VIND_BITS) /* 10 */ #define S_TAG_PADDR_BITPOS (S_TAG_CS_BITPOS+S_TAG_CS_BITS) /* 13 */ /* bit positions of the fields in the internal format */ #define S_INT_PADDR_BITPOS 0 #define S_INT_VIND_BITPOS (S_INT_PADDR_BITPOS+S_TAG_PADDR_BITS) /* 19 */ #define S_INT_CS_BITPOS (S_INT_VIND_BITPOS+S_TAG_VIND_BITS) /* 22 */ #define S_INT_ECC_BITPOS (S_INT_CS_BITPOS+S_TAG_CS_BITS) /* 25 */ /* masks for the four fields in the 2ndary-cache-internal format: * < ecc, cstate, vindex, p_addr > */ #define S_INT_PADDR_MASK 0x0007ffff #define S_INT_VIND_MASK 0x00380000 #define S_INT_CS_MASK 0x01c00000 #define S_INT_ECC_MASK 0xfe000000 /* Below macros enable easy swapping of each of the 4 2ndary tag fields. * Note that the mask used by the conversion macros in extracting the * bits of the field depends on the direction of the swap */ /* paddr: tag bit 13 <--> internal bit 0 */ #define SADDR_SWAP_ROLL (S_TAG_PADDR_BITPOS-S_INT_PADDR_BITPOS)/* 13 */ /* the saddr conversion rolls opposite from the other 3 fields: TagTOInternal * rolls addr DOWN to bottom */ #define SADDR_TTOI(S_TAG) ((S_TAG & SADDRMASK) >> SADDR_SWAP_ROLL) #define SADDR_ITOT(S_TAG) ((S_TAG & S_INT_PADDR_MASK) << SADDR_SWAP_ROLL) /* cache state: tag bit 10 <--> internal bit 22 */ #define SSTATE_SWAP_ROLL (S_INT_CS_BITPOS-S_TAG_CS_BITPOS) /* 12 */ #define SSTATE_TTOI(S_TAG) ((S_TAG & SSTATEMASK) << SSTATE_SWAP_ROLL) #define SSTATE_ITOT(S_TAG) ((S_TAG & S_INT_CS_MASK) >> SSTATE_SWAP_ROLL) /* vindex: tag bit 7 <--> internal bit 19 */ #define SVIND_SWAP_ROLL (S_INT_VIND_BITPOS-S_TAG_VIND_BITPOS) /* 12 */ #define SVIND_TTOI(S_TAG) ((S_TAG & SVINDEXMASK) << SVIND_SWAP_ROLL) #define SVIND_ITOT(S_TAG) ((S_TAG & S_INT_VIND_MASK) >> SVIND_SWAP_ROLL) /* ecc: tag bit 0 <--> internal bit 25 */ #define SECC_SWAP_ROLL (S_INT_ECC_BITPOS-S_TAG_ECC_BITPOS) /* 25 */ #define SECC_TTOI(S_TAG) ((S_TAG & SECC_MASK) << SECC_SWAP_ROLL) #define SECC_ITOT(S_TAG) ((S_TAG & S_INT_ECC_MASK) >> SECC_SWAP_ROLL) /* ecc_swap_s_tag() converts between the field-ordering of the taglo * register (holding a 2ndary cache tag) and the internal format * actually used in the secondary caches. The conversion may be * done in either direction. The routine requires the ctag_swap_info * structure */ #define TAG_TO_INTERNAL 1 #define INTERNAL_TO_TAG 2 typedef struct tag_swap_info { uint ts_in_val; /* value to be swapped */ uint ts_out_32; /* INTERNAL_TO_TAG sets 32-bit val (including cbits) */ uint ts_out_25; /* TAG_TO_INTERNAl sets 25-bit val (excluding cbits) */ uint ts_cbits; /* both directions set this field */ } tag_swap_info_t; int ecc_swap_s_tag(uint, tag_swap_info_t *); /* tag_dbpos is a lookup-table which translates the bit-positions of data * errors as indicated by syndromes to their counterparts in the taglo format. * Internally the low 19 bits contain the paddr; in taglo the paddr field * begins at 13. The next 6 bits internally contain the vindex and state * fields; in the tag reg these are ordered the same but begin at bit 7. */ uint tag_dbpos[] = { /* paddr --> */ 13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31, /* vind & state--> */ 7, 8, 9, 10,11,12 }; /* computes relevant info about ECC errors, and returns it in an * error_info_t struct. ecc_type is DATA_CBITS or TAG_CBITS and * determines whether calc_err_info will compute the 8-bit checkbit * and syndrome of two data-words, or the 7-bit info for a 25-bit * second-level tag. */ #ifdef IP19 real_calc_err_info(int ecc_type, error_info_t *e_infop, volatile ecc_info_t *ecc_info_param) #else calc_err_info(int ecc_type, error_info_t *e_infop) #endif { uint shi, slo; uint true_val = 0; register int pbithi, pbitlo, pbit; register int i; register int j; struct d_emask *dep; struct t_emask *tep; unchar cbits = 0; uint lo_in, hi_in; tag_swap_info_t swap_info; if (ecc_type == DATA_CBITS) { /* XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX */ lo_in = e_infop->eidata_hi; hi_in = e_infop->eidata_lo; /* XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX */ #if IP19 dep = &ecc_info_param->ecc_d_ptrees[0]; #else dep = &d_ptrees[0]; #endif for (i = 0; i < 8; i++, dep++) { shi = hi_in & dep->d_emaskhi; slo = lo_in & dep->d_emasklo; pbithi = 0; pbitlo = 0; for (j = 0; j < 32; j++) { if (shi & (1 << j)) pbithi++; if (slo & (1 << j)) pbitlo++; } if ((pbithi + pbitlo) & 1) cbits |= 1 << i; } e_infop->cbits_out = cbits; e_infop->syndrome = e_infop->cbits_in ^ cbits; e_infop->syn_info = data_eccsyns[(int)e_infop->syndrome]; return(e_infop->syndrome); } else if (ecc_type == TAG_CBITS) { /* Internally the R4k stores the fields comprising * secondary tags differently than the format it * uses for s_ptaglo. The cache format is: * ; the STaglo format is: * . The ECC is computed and * checked with the fields arranged as they are * internally; therefore we must swap them before * computing. TAG_TO_INTERNAL sets ts_out_25 to * the 25-bit data value and ts_cbits to the 7-bit * ecc field from the tag. */ swap_info.ts_in_val = e_infop->eis_taglo; ecc_swap_s_tag(TAG_TO_INTERNAL, &swap_info); true_val = swap_info.ts_out_25; #ifdef ECC_DEBUG printf("after tag swap, value 0x%x, cbits 0x%x\n",true_val, swap_info.ts_cbits); #endif /* if caller set high bit (never valid in 7-bit cbit field), * use the cbits in the taglo as cbits_in for xor; else * it holds the cbits for the bad tag */ if (SET_CBITS_IN & e_infop->cbits_in) e_infop->cbits_in = (unchar)swap_info.ts_cbits; #if IP19 tep = &ecc_info_param->ecc_t_ptrees[0]; #else tep = &t_ptrees[0]; #endif for (i = 0; i < 7; i++, tep++) { shi = true_val & tep->t_emask; pbit = 0; for (j = 0; j < 25; j++) { if (shi & (1 << j)) pbit++; } if (pbit & 1) cbits |= 1 << i; } e_infop->cbits_out = cbits; e_infop->syndrome = e_infop->cbits_in ^ cbits; e_infop->syn_info = tag_eccsyns[(int)e_infop->syndrome]; return(e_infop->syndrome); } else return(0x80000000); } /* calc_err_info */ /* the format of the taglo register when it contains a 2ndary tag is * . Internally the fields are ordered * , and the checkbits are generated and * checked with the fields ordered in the internal format. * ecc_swap_s_tag() converts the fields into either format. */ ecc_swap_s_tag( uint which_way, tag_swap_info_t *swap_infop) { uint swapped_val; uint in_val = swap_infop->ts_in_val; switch (which_way) { case TAG_TO_INTERNAL: /* swap, then set 25-bit value and cbits */ swapped_val = SADDR_TTOI(in_val); swapped_val |= SSTATE_TTOI(in_val); swapped_val |= SVIND_TTOI(in_val); swap_infop->ts_out_25 = swapped_val; swap_infop->ts_out_32 = (swapped_val | SECC_TTOI(in_val)); /* low 7 bits of TagLo reg are checkbits */ swap_infop->ts_cbits = (in_val & SECC_MASK); return(0); case INTERNAL_TO_TAG: /* set entire 32-bits plus cbits */ swapped_val = SADDR_ITOT(in_val); swapped_val |= SSTATE_ITOT(in_val); swapped_val |= SVIND_ITOT(in_val); swapped_val |= SECC_ITOT(in_val); swap_infop->ts_out_32 = swapped_val; /* high 7 bits of internal format are checkbits */ swap_infop->ts_cbits = SECC_ITOT(in_val); return(0); default: printf("ecc_swap_s_tag: illegal direction (%d)\n",which_way); return(-1); } } /* ecc_swap_s_tag */ /* given a single-bit error type (CB or DB) and the bit position of that * error in the R4K's internal format (i.e. as it is stored in the 2ndary * cache), xlate_bit returns the bitposition of its counterpart in the * taglo format. */ #if IP19 real_xlate_bit(enum error_type etype, uint bitpos, volatile ecc_info_t *ecc_info_param) #else xlate_bit(enum error_type etype, uint bitpos) #endif { /* ecc field is 6..0 in taglo; the syndrome differentiates between * data and checkbit errors, (numbering them 0..24 and 0..6 resp.) * so no translation is necessary for cbit errors. */ if (etype == CB) { ASSERT(bitpos < STAG_CBIT_SIZE); return(bitpos); } else { ASSERT(bitpos < STAG_DBIT_SIZE); #if IP19 return(ecc_info_param->ecc_tag_dbpos[bitpos]); #else return(tag_dbpos[bitpos]); #endif } } /* xlate_bit */ #ifdef IP19 void ip19_init_ecc_info( __psunsigned_t vceaddr ) { ecc_info_ptr.ecc_vcecolor = vceaddr; /* these fields are only necessary because the compiler tends to * generate the constants needed and place them in a globally * addressed location (either K0 or off of "gp"), so loading the * constants involves cached accesses. So we perform the * conversions once and just load the (uncached) pointer from * the ecc_info array which is accessed uncached too. */ ecc_info_ptr.everror_ext = EVERROR_EXT; /* Following global structures need to be reference uncached by * ecc_handler and friends. */ ecc_info_ptr.ecc_tag_dbpos = (uint *)K0_TO_K1(&tag_dbpos); ecc_info_ptr.ecc_d_ptrees = (struct d_emask *)K0_TO_K1(&d_ptrees); ecc_info_ptr.ecc_t_ptrees = (struct t_emask *)K0_TO_K1(&t_ptrees); ecc_info_ptr.ecc_data_eccsyns = (eccdesc_t*)K0_TO_K1(&real_data_eccsyns); ecc_info_ptr.ecc_tag_eccsyns = (eccdesc_t*)K0_TO_K1(&real_tag_eccsyns); ecc_info_ptr.ecc_k0size_less1 = K0SIZE-1; ecc_info_ptr.ecc_physmem = physmem; ecc_info_ptr.ecc_picache_size = picache_size; ecc_info_ptr.ecc_pdcache_size = pdcache_size; ecc_info_ptr.ecc_attempt_recovery = 0; #ifndef IP19_CACHEERRS_FATAL { extern int r4k_corrupt_scache_data; if (r4k_corrupt_scache_data) ecc_info_ptr.ecc_attempt_recovery = 1; } #endif /* Following address should be cached for test to work properly */ ecc_info_ptr.ecc_dummyline = ((__psunsigned_t)(&dummy_cacheline[16]) & ~(SCACHE_LINESIZE-1)); ecc_info_ptr.ecc_info_inited = 1; } #endif /* IP19 */ #define NUM_CE_BITS 8 #define SIDX_VAL(x) (x & CACHERR_SIDX_MASK) #define PIDX_VAL(x) ((x & CACHERR_PIDX_MASK) << 12) #define CEBUFSIZ 180 /* if sindex == -1, print all frames from read ptr to write ptr; * else just the specified frame */ void print_ecc_info(sindex,eindex) int sindex; int eindex; { ecc_info_t *eip = (ecc_info_t *)&ecc_info_ptr; err_desc_t *edp; /* ptr to set of variables to set this time */ __uint64_t eaddr; int i, loc; if (sindex == -1) { sindex = eip->ecc_r_index; eindex = eip->ecc_w_index; } if (sindex < 0) sindex = 0; if (eindex < 0) eindex = 0; if (sindex >= ECC_FRAMES) sindex = ECC_FRAMES-1; if (eindex >= ECC_FRAMES) eindex = ECC_FRAMES-1; if (eindex < sindex) eindex = sindex; if (sindex != eindex) qprintf("\necc_info for slots %d through %d\n",sindex,eindex); else qprintf("\necc_info for slot %d\n",sindex); #ifndef _MEM_PARITY_WAR qprintf(" efptr 0x%x eccfptr 0x%x, ", eip->eframep, eip->eccframep); #endif /* _MEM_PARITY_WAR */ qprintf(" w_ind %d r_ind %d clean %d c_cnt %d flags 0x%x\n", eip->ecc_w_index, eip->ecc_r_index, eip->needs_cleanup, eip->cleanup_cnt, eip->ecc_flags); qprintf(" err cnts: "); for (i = 0; i < ECC_ERR_TYPES; i++ ) qprintf("%s %d ",err_type_names[i], ecc_info_ptr.ecc_err_cnts[i]); qprintf("\n\n"); for (i = sindex; i <= eindex; i++) { edp = (err_desc_t *)&(ecc_info_ptr.desc[i]); if (!edp->e_cache_err) { qprintf("SLOT #%d empty\n",i); continue; } qprintf("SLOT #%d:\n",i); pm_use_qprintf = 1; #if IP19 real_ecc_print_msg(ECC_ALL_MSGS, i, 0, 1, edp->e_cpuid, &ecc_info_ptr); #else ecc_print_msg(ECC_ALL_MSGS, i, 0, 1, edp->e_cpuid); #endif pm_use_qprintf = 0; eaddr = edp->e_paddr; loc = edp->e_location; if (loc < 0 || loc > SYSAD) loc = BAD_LOC; qprintf(" %s (%d) %s (%d) error:\n", error_loc_names[loc], edp->e_location, (edp->e_tag_or_data == DATA_ERR ? "data" : "tag"), edp->e_tag_or_data); qprintf(" sr %R\n",edp->e_sr, #if R4000 && R10000 IS_R10000() ? r10k_sr_desc : #endif sr_desc); qprintf(" cache_err %R, epc 0x%x\n", edp->e_cache_err, cache_err_desc, edp->e_error_epc); qprintf(" S-taglo %R%sP-taglo %R\n", edp->e_s_taglo, #if R4000 && R10000 IS_R10000() ? r10k_s_taglo_desc : #endif /* R4000 && R10000 */ s_taglo_desc, (edp->e_p_taglo ? "\n " : " "), edp->e_p_taglo, p_taglo_desc); qprintf(" paddr %llx vaddr %x syn 0x%x user %d pid %d\n", edp->e_paddr, edp->e_vaddr, edp->e_syndrome, edp->e_user, (__psint_t)edp->e_pid); #ifdef _MEM_PARITY_WAR qprintf(" efptr 0x%x eccfptr 0x%x, ", (__psunsigned_t)edp->e_eframep, (__psunsigned_t)edp->e_eccframep); #endif /* _MEM_PARITY_WAR */ if (edp->e_prevbadecc) qprintf(" prevbadecc %x ",edp->e_prevbadecc); if (edp->e_2nd_syn) qprintf(" 2nd_syn %x\n",edp->e_2nd_syn); else qprintf("\n"); if (edp->e_tag_or_data == DATA_ERR) qprintf(" lo_val 0x%x hi_val 0x%x badecc %x syn 0x%x\n", edp->e_lo_badval, edp->e_hi_badval, edp->e_badecc, edp->e_syndrome); else if (edp->e_location==CACH_SI || edp->e_location==CACH_SD) /* secondary tag: print ecc, syndrome and staglo */ qprintf(" S_Tag %R badecc 0x%x, syn 0x%x, addr %llx\n", edp->e_s_taglo, #if R4000 && R10000 IS_R10000() ? r10k_s_taglo_desc : #endif /* R4000 && R10000 */ s_taglo_desc, edp->e_badecc, edp->e_syndrome, edp->e_paddr); else if (edp->e_location==CACH_PI || edp->e_location==CACH_PD) /* primary tag: print p_taglo */ qprintf(" PTagLo %R, Vaddr 0x%x\n", edp->e_p_taglo,p_taglo_desc,edp->e_vaddr); if (edp->e_location == CACH_PI || edp->e_location == CACH_PD) eaddr = edp->e_vaddr; pm_use_qprintf = 1; print_ecctype(edp->e_location, edp->e_tag_or_data, edp->e_syndrome, eaddr, 1, edp->e_cpuid); pm_use_qprintf = 0; } #if DEBUG_ECC if (f_s_caddr) { qprintf(" f_ vars:\n lov 0x%x hiv 0x%x pcad %x scad %x\n", f_loval, f_hival, f_p_caddr, f_s_caddr); qprintf(" P-lo %R%sS-lo %R\n", f_ptaglo,p_taglo_desc, (f_ptaglo ? "\n " : " "), f_staglo, #if R4000 && R10000 IS_R10000() ? r10k_s_taglo_desc : #endif /* R4000 && R10000 */ s_taglo_desc); qprintf(" cooked 0x%x, f_d_ecc 0x%x\n",f_cooked_ecc,f_d_ecc); qprintf(" P-lo1 %R%sS-lo1 %R\n", f_ptaglo1,p_taglo_desc, (f_ptaglo1 ? "\n " : " "), f_staglo1, #if R4000 && R10000 IS_R10000() ? r10k_s_taglo_desc : #endif /* R4000 && R10000 */ s_taglo_desc); } #endif /* DBEUG_ECC */ } /* print_ecc_info */ void idbg_ecc_info(void) { register int i; qprintf(" err cnts:\n "); for (i = 0; i < ECC_ERR_TYPES; i++ ) #if IP19 qprintf("%s %d ",err_type_names[i],ecc_info_ptr.ecc_err_cnts[i]); #else qprintf("%s %d ",err_type_names[i],ecc_info.ecc_err_cnts[i]); #endif qprintf("\n\n"); } static int print_ecctype( int loc, int ecc_type, uint syndrome, __uint64_t eaddr, int printerr, uint cpu) { eccdesc_t syn_info, *syntab_ptr; uint es_tsize; pfunc pptr = (pm_use_qprintf ? (pfunc)qprintf : printf); if (ecc_type == D_AND_T_ERR) /* ecc info will be on the tag error */ ecc_type = TAG_CBITS; if (loc < 0 || loc > SYSAD) loc = BAD_LOC; if (ecc_type == TAG_CBITS) { es_tsize = ECCSYN_TABSIZE(real_tag_eccsyns); #if IP19 /* It's safe to use the ecc_info_ptr since this routine is * invoked from ecc_cleanup so it's safe to perform the * 'gp' relative accesses the compiler generates in the * K0_TO_K1 macro expansion. Note that referencing the * tag_eccsyns array is uncached. */ syntab_ptr = ecc_info_ptr.ecc_tag_eccsyns; #else syntab_ptr = tag_eccsyns; #endif } else { es_tsize = ECCSYN_TABSIZE(real_data_eccsyns); #if IP19 /* It's safe to use the ecc_info_ptr since this routine is * invoked from ecc_cleanup so it's safe to perform the * 'gp' relative accesses the compiler generates in the * K0_TO_K1 macro expansion. Note that referencing the * data_eccsyns array is uncached. */ syntab_ptr = ecc_info_ptr.ecc_data_eccsyns; #else syntab_ptr = data_eccsyns; #endif } if (syndrome >= es_tsize) { if (printerr) { #if MP if (maxcpus > 1) pptr("CPU %d: ",cpu); #endif pptr("print_ecctype(): invalid %s syndrome (%d)\n", (ecc_type == TAG_CBITS ? "tag" : "data"),es_tsize); } return(-1); } syn_info = syntab_ptr[syndrome]; #ifdef ECC_DEBUG #if MP if (maxcpus > 1) pptr("CPU %d: ",cpu); #endif pptr("syndrome 0x%x, type 0x%x, value 0x%x\n",syndrome, syn_info.type, syn_info.value); #endif /* ECC_DEBUG */ #if MP if (maxcpus > 1) pptr("CPU %d: ",cpu); #endif pptr(" %s: ", error_loc_names[loc]); switch (syn_info.type) { case OK: #ifdef IP19 pptr("Syndrome at addr 0x%llx normal! Error in evicted line handled by CC\n",eaddr); #else pptr("??!?!Syndrome at addr 0x%llx normal!\n",eaddr); #endif return(-2); case UN: case B2: case B3: if (ecc_type == TAG_CBITS) pptr("%s TAG error in secondary cache at addr 0x%llx\n", etstrings[syn_info.type],eaddr); else pptr("%s DATA error in doubleword at addr 0x%llx\n", etstrings[syn_info.type],eaddr); return(0); case DB: case CB: if (ecc_type == TAG_CBITS) pptr("One-bit (%s%d) TAG err; 2nd cache: addr 0x%llx\n", etstrings[syn_info.type],syn_info.value,eaddr); else pptr("One-bit (%s%d) DATA err: dbl-word addr 0x%llx\n", etstrings[syn_info.type],syn_info.value,eaddr); return(0); default: if (printerr) pptr("Unknown eccdesc_t type (%d)\n",syn_info.type); return(-1); } } /* print_ecctype */ #endif /* R4000 */ #if (defined(R4000) && defined(_FORCE_ECC)) /* each double-word in memory has an 8-bit ECC checkbit value that * is computed and stored with it. */ typedef struct ecc_data_word { uint hi_word; uint lo_word; u_char ecc_val; } ecc_data_word_t; #define IN_PD 0 #define IN_PI 1 #define IN_SD 2 #define IN_SI 3 #define IN_MEM 4 volatile int force_verbose = 0; volatile int missed_2nd = 0; volatile uint v_orig_ecc, n_ecc, xor_ecc; volatile uint used_sr = -1; extern void uncached(void); extern void setecc(int); extern void runcached(void); /* 'force_ecc_where' enum and 'ecc_data_word_t' typedef in sys/syssgi.h */ int _force_ecc(inwhat, k1addr, ecc_info_param) int inwhat; /* IN_{PD,PI,SD,SI,MEM, or IO3 (MEM via IO3)} */ __psunsigned_t k1addr; /* force ecc error at this K1SEG address */ ecc_data_word_t *ecc_info_param; { ecc_data_word_t new_ecc; __psunsigned_t k0addr; __psunsigned_t physaddr; volatile int *k1ptr; volatile int *k0ptr; volatile int k0oneoff; __psunsigned_t pmem = (physmem * NBPP); uint tags[NUM_TAGS]; char *cptr; uint no_ints_sr, ce_no_ints_sr, oldsr, oneoffval; uint orig_ecc; uint lo_val, hi_val; k1addr &= ~(BYTESPERDBLWD-1); /* rnd down to a dbl-word boundry */ k0addr = K1_TO_K0(k1addr); physaddr = K1_TO_PHYS(k1addr); k1ptr = (volatile int *)k1addr; k0ptr = (volatile int *)k0addr; if (copyin((caddr_t)ecc_info_param,(caddr_t)&new_ecc, sizeof(ecc_data_word_t))) { return EFAULT; } if (force_verbose) printf("What %d: k1 0x%x k0 0x%x hi 0x%x lo 0x%x cbits 0x%x\n", inwhat,k1addr,k0addr,new_ecc.hi_word, new_ecc.lo_word,(uint)new_ecc.ecc_val); switch(inwhat) { case IN_MEM: /* force ecc error in memory via cache-munge */ case IN_SD: if (inwhat == IN_SD) { cptr = error_loc_names[CACH_SD]; if (force_verbose) printf(" Force %s ecc error (%d)\n",cptr,inwhat); } else { cptr = error_loc_names[SYSAD]; if (force_verbose) printf(" Force %s ecc err (%d) via secondary cbit-munge\n", cptr,inwhat); } if ((physaddr + private.p_scachesize) >= pmem) k0oneoff = (k0addr - private.p_scachesize); else k0oneoff = (k0addr + private.p_scachesize); oldsr = no_ints_sr = getsr(); /* disable interrupts for entire time */ no_ints_sr &= ~SR_IE; /* next sr will allow us to 'cook' the 2ndary ecc */ ce_no_ints_sr = (no_ints_sr | SR_CE); setsr(no_ints_sr); /* no ints while running uncached */ uncached(); /* uncached instr stream: line won't be replaced */ /* get valid cache line and write the specified dbl-word */ *k0ptr = new_ecc.lo_word; *(k0ptr+1) = new_ecc.hi_word; /* force it into 2ndary to init correct ecc */ _c_hwbinv(CACH_PD, k0addr); /* and read it back into primary */ lo_val = *k0ptr; hi_val = *(k0ptr+1); /* now make it dirty again, with the same data so the ecc in * the 2ndary is correct until we xor in the change; the * pd_hwbinv will hit since it is again dirty */ *k0ptr = lo_val; _c_ilt_n_ecc(CACH_SD, k0addr,tags, &orig_ecc); /* with SR_CE bit set, the ecc reg contributes to the generated * value. Contrary to the current documentation (which says that * the ecc register is xor'ed into the existing checkbits), the * R4K appears to first do a one's complement on the ECC register; * THEN it's xor'ed into the cbits. Therefore, for us to end up * with the specified cbits we must xor the old and new, then * NOT it. Nice documentation... */ v_orig_ecc = orig_ecc; n_ecc = (uint)new_ecc.ecc_val; xor_ecc = ~(orig_ecc ^ n_ecc); setecc((int)xor_ecc); _munge_decc(k0addr, ce_no_ints_sr); #ifdef ECC_TEST_TWO_BAD /* corrupt a second word so we can test EW bit in ecc_handler */ /* Assumes that we're corrupting 0x300 first, and second error is * at 0x500 so you better back sure that's OK ! */ ecc_info_ptr.ecc_err2_ptr = (k0ptr+128); /* add 4 cachelines */ /* get valid cache line and write the specified dbl-word */ *(k0ptr+128) = new_ecc.lo_word; *(k0ptr+129) = new_ecc.hi_word; /* force it into 2ndary to init correct ecc */ _c_hwbinv(CACH_PD, (k0addr+128*sizeof(int))); /* and read it back into primary */ lo_val = *(k0ptr+128); hi_val = *(k0ptr+129); /* now make it dirty again, with the same data so the ecc in * the 2ndary is correct until we xor in the change; the * pd_hwbinv will hit since it is again dirty */ *(k0ptr+128) = lo_val; _c_ilt_n_ecc(CACH_SD, (k0addr+128*sizeof(int)),tags, &orig_ecc); /* with SR_CE bit set, the ecc reg contributes to the generated * value. Contrary to the current documentation (which says that * the ecc register is xor'ed into the existing checkbits), the * R4K appears to first do a one's complement on the ECC register; * THEN it's xor'ed into the cbits. Therefore, for us to end up * with the specified cbits we must xor the old and new, then * NOT it. Nice documentation... */ v_orig_ecc = orig_ecc; n_ecc = (uint)new_ecc.ecc_val; xor_ecc = ~(orig_ecc ^ n_ecc); setecc((int)xor_ecc); _munge_decc(k0addr+128*sizeof(int), ce_no_ints_sr); #endif /* ECC_TEST_TWO_BAD */ setsr(no_ints_sr); /* clear CE bit before going cached */ runcached(); setsr(oldsr); /* now enable interrupts */ missed_2nd = 0; if (inwhat == IN_MEM) { /* now flush this line to memory by reading an address * one 2nd cache-size above K0addr oneoffval = *(uint *)k0oneoff; */ /* flush the bad line out to memory. Since the rmi fixes * all one bit errors unconditionally on writes, this * must be at least a 2-bit error */ /* prevent ecc error now; this way it'll get out there * flawed and the next cached-read will get a SysAD ECC */ oldsr = getsr(); setsr(ce_no_ints_sr); if (!_c_hwbinv(CACH_SD, k0addr)) missed_2nd = 1; /* mustn't print with CE bit on */ setsr(oldsr); } if (inwhat == IN_MEM && missed_2nd) printf("!!?force_ecc: addr 0x%x 2ndary hwbinv missed cache!\n", k0addr); if (inwhat == IN_SD) { /* reading into primary will check ecc */ if (force_verbose) printf("IN_SD: here we go!\n"); #ifndef FORCE_CACHERR_ON_STORE /* Force cache error on load */ lo_val = *k0ptr; #else /* force cache error on store (should turn on EI) * * Two interesting case. In one we write completely new * data into one of the words of the doubleword. This will * most likely cause us to report an MBE if the EI bit * does not get set since the ECC will be computed on * this value (in the PD) and comareed to the ECC in the * secondary. * The other case stores the same data (test program is * generating error in the other word of the double word). */ #if 0 /* this test tends to generate FATAL MBE if EI not set */ ecc_store_err(0x1234, k0addr); /* write some new data */ #else /* this test replaces with same data, so looks like SBE */ ecc_store_err(lo_val, k0addr); #endif #if 0 /* for now we use more controlled environment of * assembly language code. */ /* Force cache error on store (EI) */ *k0ptr = lo_val;; #endif /* 0 */ #endif /* force cache error on store */ } if (force_verbose) printf("force_ecc %s exits\n",cptr); return 0; case IN_PD: cptr = error_loc_names[CACH_PD]; if (force_verbose) printf(" Force PD cache ecc error (%d)\n",inwhat); /* get valid cache line and write the specified dbl-word */ *k0ptr = new_ecc.lo_word; *(k0ptr+1) = new_ecc.hi_word; _c_ilt_n_ecc(CACH_PD, k0addr, tags, &orig_ecc); if (force_verbose) printf(" f_ecc IN_PD: addr 0x%x: taglo 0x%x, ecc 0x%x\n", k0addr,tags[TAGLO_IDX],orig_ecc); orig_ecc ^= 0x1; /* toggle parity bit */ if (force_verbose) printf("new ecc: 0x%x\n",orig_ecc); setecc(orig_ecc); /* set CE status bit--cachops will use contents of ecc register * for data parity instead of computing the correct one. */ oldsr = no_ints_sr = getsr(); /* disable interrupts for entire time */ no_ints_sr &= ~SR_IMASK8; ce_no_ints_sr = (no_ints_sr | SR_CE); setsr(no_ints_sr); /* no ints while running uncached */ uncached(); /* uncached instr stream: line won't be replaced */ /* get line again in case instr. forced it out */ *k0ptr = new_ecc.lo_word; *(k0ptr+1) = new_ecc.hi_word; setsr(ce_no_ints_sr); /* storing the same value as above with SR_CE bit set, using the * ECC register with the parity bit toggled forces incorrect * data parity and causes an ecc exception. */ *k0ptr = new_ecc.lo_word; setsr(no_ints_sr); /* clear CE bit */ runcached(); setsr(oldsr); /* and enable interrupts */ if (force_verbose) printf(" exiting force_ecc, case IN_PD (%d)\n",IN_PD); return 0; case IN_PI: cptr = error_loc_names[CACH_PI]; break; case IN_SI: cptr = error_loc_names[CACH_SI]; break; case 120: ecc_cleanup(); return 0; default: printf("Illegal inwhat (%d)\n",inwhat); return 0; } /* switch */ if (force_verbose) printf(" force ecc in %s (%d)\n",cptr,inwhat); return 0; } #endif /* IP19 && _FORCE_ECC */ #endif /* !TFP && !BEAST */ #if EVEREST #include #define EFRAME_REG(efp,reg) (((k_machreg_t *)(efp))[reg]) #define REGVAL(efp,x) ((x)?EFRAME_REG((efp),(x)+EF_AT-1):0) /* ARGSUSED */ int find_buserror_info(eframe_t *ep, inst_t **epcp, int *ldstp, void **vaddrp, uint *paddrhip, uint *paddrlop) { inst_t *epc; #ifndef TFP union mips_instruction inst; void *vaddr; int ldst; pfn_t pfn; uint paddrlo, paddrhi; #endif epc = (inst_t *)EFRAME_REG(ep,EF_EPC); if ((long)EFRAME_REG(ep,EF_CAUSE) & CAUSE_BD) epc +=4; #if TFP /* * Bus errors are imprecise on TFP, so the EPC is meaningless. Printing * out information based on the EPC will only confuse the user. Just * return the EPC in the exception frame so the panic message matches * the warning message. */ *epcp = epc; return 0; #else /* ! TFP */ if (IS_KUSEG((long)epc)) inst.word = fuword(epc); else inst.word = *epc; vaddr = (void *)((__psint_t)REGVAL(ep, inst.i_format.rs) + inst.i_format.simmediate); switch (inst.i_format.opcode) { /* Loads */ case ld_op: case lwu_op: case lw_op: case lhu_op: case lh_op: case lbu_op: case lb_op: ldst = 1; break; /* Stores */ case sd_op: case sw_op: case sh_op: case sb_op: ldst = 0; break; /* XXX What do we do about these? */ /* Cop1 instructions */ case lwc1_op: case ldc1_op: case swc1_op: case sdc1_op: /* Unaligned load/stores */ case ldl_op: case ldr_op: case lwl_op: case lwr_op: case sdl_op: case sdr_op: case swl_op: case swr_op: /* Load linked/store conditional */ case lld_op: case scd_op: case ll_op: case sc_op: default: return 0; } if (IS_KUSEG(vaddr)) { vtop(vaddr, 1, &pfn, 1); } else pfn = kvtophyspnum((void *)vaddr); paddrhi = (pfn>>20); paddrlo = (pfn << 12) | ((long)vaddr & 0xfff); *epcp = epc; *ldstp = ldst; *vaddrp = vaddr; *paddrhip = paddrhi; *paddrlop = paddrlo; return 1; #endif /* TFP */ } #if ECC_RECOVER static void *last_ecc_recoverable = 0; /* * We introduce here the arbitrary concept of a "flurry" of recoverable * multibit errors. We want to survive instances of isolated flurries (e.g., * lots of errors on a single page), but not continue to "recover" from truly * hard errors which cause endless bus errors which just happen to appear to * be "recoverable". What we do is to timestamp the first recoverable error * of a flurry, allow some number of additional recoveries in a short period * of time, then refuse to "recover" more than some max number occuring in * that "short period". */ #define ECC_RECOVERABLE_FLURRY_MAX 32 /* s-cache lines per 4096 byte page */ static int time_ecc_recoverable_flurry = 0; /* time, in secs, of flurry */ static int count_ecc_recoverable_flurry = 0; /* count recoveries in flurry */ #endif /* ECC_RECOVER */ /* * See if we can recover from an ECC error: * IF the PC points to kernel "block zero" or "block copy" code AND * IF we were just crossing into a secondary cache line AND * IF we planned to update the entire secondary cache line with new data AND * IF we did not fault on this same location recently AND * IF the systune parameter "ecc_recover_enable" is nonzero, which specifies * a time interval (in seconds) within which we will keep trying to recover * a maximum of ECC_RECOVERABLE_FLURRY_MAX errors. * THEN we can recover. * * Returns: * 0 if cannot recover * 1 if can recover * * Side effect: sets global last_ecc_recoverable to faulting virtual addr. * * NOTE: This code counts on the bcopy/bzero routines to supply the * appropriate lables and to use register A3 to hold the upper bound * for destination addresses! */ /* ARGSUSED */ ecc_recoverable(eframe_t *ep, inst_t *epc, void *vaddr) { #if ECC_RECOVER extern char bzero_stores[]; extern char bcopy_stores[]; extern int ecc_recover_enable; long destination_limit; if (!ecc_recover_enable) return 0; if (((long)epc != (long)bzero_stores) && ((long)epc != (long)bcopy_stores)) return 0; if (!SCACHE_ALIGNED((long)vaddr)) return 0; /* The following code assumes certain details of the bcopy/bzero * code in order to determine if we will be storing the entire * cacheline. If we get a multibit error on the first store into * a cacheline AND if we will be storing the entire line THEN * we can safely ignore the error. */ destination_limit = (long)EFRAME_REG(ep,EF_A3); if (destination_limit-(long)vaddr < SCACHE_LINESIZE) return 0; if (last_ecc_recoverable == vaddr) return 0; if (!ecc_recover_enable) return 0; last_ecc_recoverable = vaddr; if (time - time_ecc_recoverable_flurry > ecc_recover_enable) { /* * It has been a sufficiently "long time" since the latest * flurry of recoverable multibit errors, so reset the * count/time. */ time_ecc_recoverable_flurry = time; count_ecc_recoverable_flurry = 1; } else { /* * This is another recoverable error in a "short" period of * time. Allow a certain number of these in that time, then * give up and stop trying to recover. */ if (++count_ecc_recoverable_flurry > ECC_RECOVERABLE_FLURRY_MAX) return 0; } return 1; #else /* ECC_RECOVER */ return 0; #endif /* ECC_RECOVER */ } #endif /* EVEREST */ #if !MCCHIP && !IP30 && !IP32 /* MC based systems don not currently have ECC */ #if !defined (SN) /* SN has its own bus error processing */ /* * dobuserre - handle bus error exception */ int ecc_recover_count = 0; static volatile cpumask_t buserr_panic_pending = {0}; /* 0: kernel; 1: kernel - no print; 2: user */ int dobuserre(register eframe_t *ep, inst_t *epc, uint flag) { #if TFP unsigned ev_ile; #endif #ifdef EVEREST #if IP19 || IP25 cpu_cookie_t err_info; #endif #endif int s = splhi(); /* Prevent preemption from now on */ #ifdef EVEREST #if IP19 || IP25 if (curthreadp) { err_info = setmustrun(ep->ef_cpuid); } ASSERT(cpuid() == ep->ef_cpuid); #endif #endif #if TFP ev_ile = EV_GET_LOCAL(EV_ILE); /* Current ILE register */ #endif /* TFP */ switch (flag) { case 0: default: #ifdef EVEREST cmn_err(CE_WARN|CE_CPUID, "%s Bus Error, Kernel mode, eframe:0x%x EPC:0x%x", ((ep->ef_cause & CAUSE_EXCMASK) == EXC_IBE) ? "Instruction" : "Data", ep, epc); #endif /* * If we're not already panicing, then start to panic. * If we're already panicing on another cpu, then just * silently spin here, waiting for an intercpu command. * If we're already panicing on this cpu, then go ahead and * double-panic. */ while (CPUMASK_IS_NONZERO(buserr_panic_pending) && (!CPUMASK_TSTM(buserr_panic_pending, private.p_cpumask))) ; /* sit and spin */ { inst_t *epc; int ldst; void *vaddr; uint paddrhi, paddrlo; CPUMASK_ATOMSET(buserr_panic_pending, cpumask()); #ifdef EVEREST dump_hwstate(1); if (find_buserror_info(ep,&epc,&ldst,&vaddr,&paddrhi, &paddrlo)) { cmn_err(CE_WARN, "BUSERR: %s instruction, virtual address 0x%x (addrhi=0x%x addrlo=0x%x)\n", ldst ? "LOAD" : "STORE", vaddr, paddrhi, paddrlo); printf("BUSERR: "); mc3_decode_addr(printf, paddrhi, paddrlo); if (ecc_recoverable(ep, epc, vaddr)) { cmn_err(CE_WARN, "ECC RECOVERED -- CONTINUE NORMAL OPERATION.\n"); /* * We can try to recover this error. * Clear our recollection of the * event, to avoid future confusion. */ everest_error_clear(0); ecc_recover_count++; CPUMASK_ATOMCLR(buserr_panic_pending, cpumask()); #ifdef EVEREST #if IP19 || IP25 if(curthreadp) restoremustrun(err_info); #endif #endif splx(s); return 1; /* problem handled */ } } #endif cmn_err_tag(74,CE_PANIC, "Bus Error in Kernel mode, eframe:0x%x EPC:0x%x", ep, epc); } case 1: /* nofault */ #if TFP EV_SET_REG(EV_CERTOIP, 0xffff); /* Clear Bus Error */ EV_SET_LOCAL(EV_ILE, ev_ile|EV_ERTOINT_MASK); /* re-enable BE */ tfp_clear_gparity_error(); #endif #ifdef EVEREST #if IP19 || IP25 if(curthreadp) restoremustrun(err_info); #endif #endif splx(s); return 0; case 2: #ifdef EVEREST #if IP19 || IP25 if( uvme_errclr(ep) == 1) { if(curthreadp) restoremustrun(err_info); splx(s); return 0; } #endif /* IP19 || IP25 */ cmn_err(CE_WARN|CE_CPUID, "%s Bus Error, User mode, eframe:0x%x EPC:0x%x", ((ep->ef_cause & CAUSE_EXCMASK) == EXC_IBE) ? "Instruction" : "Data", ep, epc); #endif /* EVEREST */ /* * If we're not already panicing, then start to panic. * If we're already panicing on another cpu, then just * silently spin here, waiting for an intercpu command. * If we're already panicing on this cpu, then go ahead and * double-panic. */ while (CPUMASK_IS_NONZERO(buserr_panic_pending) && (!CPUMASK_TSTM(buserr_panic_pending, private.p_cpumask))) ; /* sit and spin */ { inst_t *epc; int ldst; void *vaddr; uint paddrhi, paddrlo; CPUMASK_ATOMSET(buserr_panic_pending, cpumask()); #ifdef EVEREST dump_hwstate(1); if (find_buserror_info(ep,&epc,&ldst,&vaddr,&paddrhi, &paddrlo)) { cmn_err(CE_WARN, "BUSERR: %s instruction: virtual address 0x%x (physical 0x%x%x)\n", ldst ? "LOAD" : "STORE", vaddr, paddrhi, paddrlo); printf("BUSERR: "); mc3_decode_addr(printf, paddrhi, paddrlo); } #endif cmn_err_tag(75,CE_PANIC, "Bus Error in User mode, eframe:0x%x EPC:0x%x", ep, epc); } } /* switch */ /* NOTREACHED */ } #endif /* SN0 */ #endif /* !MCCHIP && !IP30 */ /* The R4000 has a built-in floating-point unit. These 2 functions * are used by floating-point emulation (nofphw.s), which is not * included in R4000 kernels. So these routines stub out the * unresolved externals. */ int softfp_adderr() { cmn_err(CE_PANIC, "softfp_adderr for R4000?"); return 0; } int softfp_insterr() { cmn_err(CE_PANIC, "softfp_insterr for R4000?"); return 0; } #ifdef _MEM_PARITY_WAR /* * ecc_create_exception * * Create an exception frame from an ecc exception frame, * including changing the state of the system to common * exception state. The new frame is allocated on the * appropriate kernel stack. The code runs with SR_ERL set * in $sr, so it must avoid taking any exceptions. */ extern void ecc_map_uarea(void); u_long * ecc_create_exception(eframe_t *ep) { eframe_t *nep; u_long *nsp; if (private.p_kstackflag == PDA_CURUSRSTK) { /* map the u-area */ ecc_map_uarea(); /* allocate the frame */ nep = &curexceptionp->u_eframe; private.p_kstackflag = PDA_CURKERSTK; nsp = ((u_long *) (KERNELSTACK)) - 4; } else { /* was on kernel, idle, or interrupt stack */ nep = ((eframe_t *) ep->ef_sp) - 1; nsp = (u_long *) nep; } *nep = *ep; nsp[0] = (u_long) nep; nep->ef_sr &= ~SR_ERL; /* turn off SR_ERL in frame */ return(nsp); } #endif /* _MEM_PARITY_WAR */ #if defined (IP19) pfn_t init_ecc_sp(fpage) pfn_t fpage; { __psunsigned_t *cache_sp_k1ptr; cache_sp_k1ptr = (__psunsigned_t *)(PHYS_TO_K1(CACHE_ERR_SP_PTR)); *cache_sp_k1ptr = PHYS_TO_K1(ctob(fpage) + CACHE_ERR_STACK_SIZE - sizeof(void *)); return (fpage + btoc(CACHE_ERR_STACK_SIZE)); } #endif /* * Interface to dump stuff in ioerror */ char *error_mode_string[] = { "probe", "kernel", "user", "reenable" }; extern void ioerror_dump(char *name, int error_code, int error_mode, ioerror_t *ioerror) { printf("%s%s%s%s%s error in %s mode\n", name, (error_code & IOECODE_PIO) ? " PIO" : "", (error_code & IOECODE_DMA) ? " DMA" : "", (error_code & IOECODE_READ) ? " Read" : "", (error_code & IOECODE_WRITE) ? " Write" : "", error_mode_string[error_mode]); #define PRFIELD(f) \ if (IOERROR_FIELDVALID(ioerror,f)) \ printf("\t%20s: 0x%X\n", #f, IOERROR_GETVALUE(ioerror,f)); PRFIELD(errortype); /* error type: extra info about error */ PRFIELD(widgetnum); /* Widget number that's in error */ PRFIELD(widgetdev); /* Device within widget in error */ PRFIELD(srccpu); /* CPU on srcnode generating error */ PRFIELD(srcnode); /* Node which caused the error */ PRFIELD(errnode); /* Node where error was noticed */ PRFIELD(sysioaddr); /* Sys specific IO address */ PRFIELD(xtalkaddr); /* Xtalk (48bit) addr of Error */ PRFIELD(busspace); /* Bus specific address space */ PRFIELD(busaddr); /* Bus specific address */ PRFIELD(vaddr); /* Virtual address of error */ PRFIELD(memaddr); /* Physical memory address */ PRFIELD(epc); /* pc when error reported */ PRFIELD(ef); /* eframe when error reported */ #undef PRFIELD printf("\n"); } /* * machine dependent code for error handling. Mark a page inaccessible and * later clean and put it back in VM circulation if possible. */ /* ARGSUSED */ void error_mark_page(paddr_t paddr) { #if defined (SN0) extern void sn0_error_mark_page(paddr_t); sn0_error_mark_page(paddr); #else cmn_err(CE_NOTE, "error_mark_page: not supported"); #endif } /* ARGSUSED */ int error_reclaim_page(paddr_t paddr, int flag) { #if defined (SN0) extern int sn0_error_reclaim_page(paddr_t, int); return sn0_error_reclaim_page(paddr, flag); #else cmn_err(CE_NOTE, "error_reclaim_page: not supported"); return 0; #endif }