1
0
Files
irix-657m-src/stand/arcs/lib/libkl/fru/sn0_fru_node.c
2022-09-29 17:59:04 +03:00

1568 lines
42 KiB
C

/**************************************************************************
* *
* Copyright (C) 1992-1996, Silicon Graphics, Inc. *
* *
* These coded instructions, statements, and computer programs contain *
* unpublished proprietary information of Silicon Graphics, Inc., and *
* are protected by Federal copyright law. They may not be disclosed *
* to third parties or copied or duplicated in any form, in whole or *
* in part, without the prior written consent of Silicon Graphics, Inc. *
* *
**************************************************************************/
#include "sn0_fru_analysis.h"
#include <sys/reg.h>
#include <sys/SN/addrs.h>
#include <sys/SN/agent.h>
#include <sys/R10k.h>
#ifdef PCOUNT_WAR
#include <sys/SN/SN0/ip27log.h>
#endif /* PCOUNT_WAR */
#ifdef FRUTEST
#include <string.h>
extern kl_config_hdr_t g_node[];
/*
* HOW DO WE INTEGRATE THESE CONFIDENCE LEVELS INTO THE KLCONFIG
* STRUCTURES
*/
extern confidence_t g_io_conf;
extern confidence_t g_sn0net_conf;
extern confidence_t g_xbow_conf;
extern int g_ce_valid;
#endif /* #ifdef FRUTEST */
#ifdef _STANDALONE
extern nasid_t get_nasid(void);
#endif
extern confidence_t *kf_conf_tab[]; /* table of component confidence level
* pointers
*/
extern hubreg_t kf_reg_tab[]; /* table of error registers*/
nasid_t current_nasid; /* nasid of the node
* that is being
* analyzed
*/
/*
* analyze each component on this board
*/
kf_result_t
kf_board_analyze(lboard_t *board,kf_analysis_t *curr_analysis)
{
kf_result_t rv = KF_SUCCESS;
int comp;
kf_analysis_t save_curr_analysis;
KF_DEBUG("\tkf_board_analyze:doing board analysis......\n");
KF_ASSERT(board);
if (curr_analysis) {
save_curr_analysis = *curr_analysis;
board_serial_number_get(board,curr_analysis->kfa_serial_number);
}
/* check to see if we have already analyzed this board
* during a previous node analysis
*/
if (board->brd_flags & DUPLICATE_BOARD) {
KF_DEBUG("\tkf_board_analyze:finished board analysis -- duplicate board\n");
return KF_SUCCESS;
}
/*
* analyze each of the components of this ip27 board
*/
for (comp = 0; comp < KLCF_NUM_COMPS(board);comp++) {
if ((rv = kf_comp_analyze(board,KLCF_COMP(board,comp),curr_analysis))
!= KF_SUCCESS) {
KF_DEBUG("\tkf_board_analyze:finished board analysis -- component analysis failed\n");
return rv;
}
}
KF_DEBUG("\tkf_board_analyze:finished board analysis\n");
if (curr_analysis)
*curr_analysis = save_curr_analysis;
return KF_SUCCESS;
}
/*
* analyze the component
* ARGSUSED
*/
kf_result_t
kf_comp_analyze(lboard_t *board,
klinfo_t *comp,
kf_analysis_t *curr_analysis)
{
kf_result_t rv = KF_SUCCESS;
kf_analysis_t save_curr_analysis;
KF_DEBUG("\t\tkf_comp_analyze:doing component analysis......\n");
if (curr_analysis)
save_curr_analysis = *curr_analysis;
KF_ASSERT(comp);
switch(KLCF_COMP_TYPE(comp)) {
case KLSTRUCT_CPU:
if (curr_analysis) {
curr_analysis->kfa_info[KF_IP27_LEVEL].kfi_type = KFTYPE_IP27;
curr_analysis->kfa_info[KF_IP27_LEVEL].kfi_inst = BOARD_SLOT(board);
if (kf_conf_tab[KF_SYSBUS_CONF_INDEX]) {
curr_analysis->kfa_info[KF_SYSBUS_LEVEL].kfi_type = KFTYPE_SYSBUS;
curr_analysis->kfa_info[KF_SYSBUS_LEVEL].kfi_inst = 0;
curr_analysis->kfa_conf = *kf_conf_tab[KF_SYSBUS_CONF_INDEX];
kf_guess_put(curr_analysis);
curr_analysis->kfa_info[KF_SYSBUS_LEVEL].kfi_type = KF_UNKNOWN;
curr_analysis->kfa_info[KF_SYSBUS_LEVEL].kfi_inst = KF_UNKNOWN;
curr_analysis->kfa_conf = 0;
}
}
rv = kf_cpu_analyze(CPU_COMP_ERROR(board,comp),comp->virtid,curr_analysis);
break;
case KLSTRUCT_HUB:
if (curr_analysis) {
curr_analysis->kfa_info[KF_IP27_LEVEL].kfi_type = KFTYPE_IP27;
curr_analysis->kfa_info[KF_IP27_LEVEL].kfi_inst = BOARD_SLOT(board);
}
rv = kf_hub_analyze(curr_analysis);
break;
case KLSTRUCT_ROU:
if (curr_analysis) {
curr_analysis->kfa_info[KF_ROUTER_LEVEL].kfi_inst = BOARD_SLOT(board);
}
rv = kf_router_analyze(curr_analysis);
break;
default:
KF_DEBUG("\t\t\tkf_comp_analyze:unknown component type %d\n",KLCF_COMP_TYPE(comp));
break;
}
if (curr_analysis)
*curr_analysis = save_curr_analysis;
KF_DEBUG("\t\tkf_comp_analyze:finished component analysis\n");
return rv;
}
/*
* do the router error analysis
*/
kf_result_t
kf_router_analyze(kf_analysis_t *curr_analysis)
{
kf_result_t rv = KF_SUCCESS;
int port;
hubreg_t link_status;
kf_analysis_t save_curr_analysis;
#define PORT_ERR_MASK 0x3ff000000 /* mask for the port error bits in status_error
* register of the router for a port
*/
#define ILL_PORT_MASK 0x200000000 /* illegal port direction error bit mask
* in the status error register for a port
*/
KF_DEBUG("\t\t\tkf_router_analyze:doing router analysis..........\n");
if (curr_analysis) {
save_curr_analysis = *curr_analysis;
curr_analysis->kfa_info[KF_ROUTER_LEVEL].kfi_type = KFTYPE_ROUTER;
if (kf_conf_tab[KF_ROUTER_CONF_INDEX]) {
curr_analysis->kfa_conf = *kf_conf_tab[KF_ROUTER_CONF_INDEX];
kf_guess_put(curr_analysis);
}
}
/* go through each port's status register and update the
* confidence levels
*/
for(port = 0; port < MAX_ROUTER_PORTS;port++) {
if (curr_analysis) {
if (kf_conf_tab[KF_ROUTER_LINK0_CONF_INDEX + port]) {
curr_analysis->kfa_info[KF_ROUTER_LINK_LEVEL].kfi_type = KFTYPE_ROUTER_LINK0;
curr_analysis->kfa_info[KF_ROUTER_LINK_LEVEL].kfi_inst = port;
if (kf_conf_tab[KF_ROUTER_LINK0_CONF_INDEX + port]) {
curr_analysis->kfa_conf = *kf_conf_tab[KF_ROUTER_LINK0_CONF_INDEX + port];
kf_guess_put(curr_analysis);
}
}
} else {
link_status = kf_reg_tab[KF_ROUTER_STS_ERR0_INDEX + port];
KF_CONDITIONAL_UPDATE(kf_conf_tab[KF_ROUTER_LINK0_CONF_INDEX + port],
link_status & PORT_ERR_MASK,
60);
KF_CONDITIONAL_UPDATE(kf_conf_tab[KF_SOFTWARE_CONF_INDEX],
link_status & ILL_PORT_MASK,
70);
}
}
if (curr_analysis)
*curr_analysis = save_curr_analysis;
KF_DEBUG("\t\t\tkf_router_analyze:finished router analysis\n");
return rv;
}
/*
* do the hub error analysis
*/
kf_result_t
kf_hub_analyze(kf_analysis_t *curr_analysis)
{
kf_result_t rv = KF_SUCCESS;
kf_analysis_t save_curr_analysis;
KF_DEBUG("\t\t\tkf_hub_analyze:doing hub analysis..........\n");
if (curr_analysis) {
save_curr_analysis = *curr_analysis;
curr_analysis->kfa_info[KF_HUB_LEVEL].kfi_type = KFTYPE_HUB;
curr_analysis->kfa_info[KF_HUB_LEVEL].kfi_inst = 0;
#ifdef PCOUNT_WAR
#define KF_ICRB_EXC(_icrbb_val) ((_icrbb_val >> 25) & 0x1f)
if (kf_conf_tab[KF_HUB_CONF_INDEX] &&
(*kf_conf_tab[KF_HUB_CONF_INDEX] == FRU_FLAG_CONF)) {
char slot_name[SLOTNUM_MAXLENGTH];
kf_analysis_t new_analysis = *curr_analysis;
new_analysis.kfa_info[KF_PCOUNT_LEVEL].kfi_type =
KFTYPE_PCOUNT;
new_analysis.kfa_info[KF_PCOUNT_LEVEL].kfi_inst = 0;
new_analysis.kfa_conf =
*kf_conf_tab[KF_HUB_CONF_INDEX];
kf_guess_put(&new_analysis);
#if !defined(FRUTEST)
get_slotname(new_analysis.kfa_info[KF_IP27_LEVEL].kfi_inst,slot_name);
/* Write to the PROM log in case we can't write to
the console. */
ip27log_printf(IP27LOG_FATAL,
"/hw/module/%d/slot/%s/node failed with "
"the incident 439797 error signature",
new_analysis.kfa_info[KF_MODULE_LEVEL].kfi_inst,
slot_name);
#endif /* !FRUTEST */
}
/* We are using the flag confidence value to indicate the
node got the pcount hang. */
if ((kf_conf_tab[KF_HUB_CONF_INDEX]) &&
(*kf_conf_tab[KF_HUB_CONF_INDEX] != FRU_FLAG_CONF)) {
#else /* !PCOUNT_WAR */
if (kf_conf_tab[KF_HUB_CONF_INDEX]) {
#endif /* !PCOUNT_WAR */
curr_analysis->kfa_conf = *kf_conf_tab[KF_HUB_CONF_INDEX];
kf_guess_put(curr_analysis);
}
if (kf_conf_tab[KF_HUB_LINK_CONF_INDEX]) {
kf_analysis_t tmp_analysis = *curr_analysis;
curr_analysis->kfa_info[KF_HUB_LINK_LEVEL].kfi_type = KFTYPE_HUB_LINK;
curr_analysis->kfa_info[KF_HUB_LINK_LEVEL].kfi_inst = 0;
curr_analysis->kfa_conf = *kf_conf_tab[KF_HUB_LINK_CONF_INDEX];
kf_guess_put(curr_analysis);
*curr_analysis = tmp_analysis;
}
}
#if defined(PCOUNT_WAR) && !defined(FRUTEST)
else {
__psunsigned_t hub_base;
int i, crb, num_dex = 0;
hubreg_t crb_entB;
hub_base = (__psunsigned_t)REMOTE_HUB_ADDR(current_nasid, 0);
#define KF_PCOUNT_READ_REPEAT 2
for (i = 0; i < KF_PCOUNT_READ_REPEAT; i++) {
/* check multiple times just in case we caught the
ICRBs rigth before an ejection */
for (crb = 0; crb < IIO_NUM_CRBS; crb++) {
crb_entB = HUB_REG_PTR_L(hub_base,
IIO_ICRB_B(crb));
#ifdef PCOUNT_TEST
printf("*****ICRB_B %d is: 0x%llx\n",crb,
crb_entB);
#endif
/* If the entry is in DEX or RDEX mode,
increment. */
if ((KF_ICRB_EXC(crb_entB) == 11) ||
(KF_ICRB_EXC(crb_entB) == 6))
num_dex++;
}
}
#ifdef PCOUNT_TEST
printf("*****num_dex is %d\n",num_dex);
#endif
if (num_dex == (2 * IIO_NUM_CRBS)) {
/* If we read the registers twice and both times all
ICRBs were in DEX or RDEX we have hit the problem. */
KF_CONDITIONAL_UPDATE(kf_conf_tab[KF_HUB_CONF_INDEX],
1,
FRU_FLAG_CONF);
}
}
#endif /* PCOUNT_WAR && !FRUTEST */
if ((rv = kf_pi_analyze(curr_analysis)) != KF_SUCCESS)
return rv;
if ((rv = kf_md_analyze(curr_analysis)) != KF_SUCCESS)
return rv;
if ((rv = kf_ii_analyze(curr_analysis)) != KF_SUCCESS)
return rv;
rv = kf_ni_analyze(curr_analysis);
if (curr_analysis)
*curr_analysis = save_curr_analysis;
KF_DEBUG("\t\t\tkf_hub_analyze:finished hub analysis\n");
return rv;
}
/*
* analyze if there are pi errors
* in this function we look at the
* PI_ERR_INT_PEND
* PI_ERR_STATUS0
* PI_ERR_STATUS1
*/
/*
* some useful macros for pi analysis
*/
/*
* to get the bits corresponding to syscmd sysad & sysstate errors in
* err_int_pend
*/
#define SYSBUS_ERR_A (PI_ERR_SYSSTATE_A | \
PI_ERR_SYSAD_DATA_A | \
PI_ERR_SYSAD_ADDR_A | \
PI_ERR_SYSCMD_DATA_A | \
PI_ERR_SYSCMD_ADDR_A | \
PI_ERR_SYSSTATE_TAG_A)
#define SYSBUS_ERR_B (PI_ERR_SYSSTATE_B | \
PI_ERR_SYSAD_DATA_B | \
PI_ERR_SYSAD_ADDR_B | \
PI_ERR_SYSCMD_DATA_B | \
PI_ERR_SYSCMD_ADDR_B | \
PI_ERR_SYSSTATE_TAG_B)
kf_result_t
kf_pi_analyze(kf_analysis_t *curr_analysis)
{
/*ERR_INT_PEND
* <23> [A,60]
* <17> [HUB,70]
* <15>
* <13>
* <11>
* <9>
*
* <22> [B,60]
* <16> [HUB,70]
* <14>
* <12>
* <10>
* <8>
*
* <24> can't tell anything
*
* <4>
* <5> [HUB,70]
* [MEM,70]
* [MD,60]
*
* <6>
* <7> [MEM,70]
*
*ERR_STATUS0
*ERR_STATUS1
* VALID
*
* ERROR_TYPE
* RTERR | WTERR | [MEM,70]
*
* UPWERR | UPRERR
*
*
* RRB STATUS BIT SEMANTICS
* V - valid
* E - type of req
* T - target of an incoming intervention
* I - target of an incoming invalidate
* R - resp. data given to T5
* A - data ack recvd.
* H - gathering invalidates
* W - waiting for write to complete
* P - double , single or partial word read
*
* WRB STATUS BIT SEMANTICS
* V - valid
* T - target of an incoming intervention
* W - received a WBBAK
* P - double, single or Partial word write
*
* CRB_STATUS
*
* T
* RRB & I [MEM,70]
*
*
* W [MEM,70]
*
*/
kf_result_t rv = KF_SUCCESS;
hubreg_t pi_err_sts0_a,pi_err_sts0_b;
hubreg_t pi_err_sts1_a,pi_err_sts1_b;
extern int KF_PI_RULE_INDEX;
nasid_t nasid_a,nasid_b;
paddr_t addr_a , addr_b;
KF_DEBUG("\t\t\t\tkf_pi_analyze:doing pi analysis........\n");
if (curr_analysis) {
kf_analysis_t save_curr_analysis;
save_curr_analysis = *curr_analysis;
curr_analysis->kfa_info[KF_PI_LEVEL].kfi_type = KFTYPE_PI;
curr_analysis->kfa_info[KF_PI_LEVEL].kfi_inst = 0;
if (kf_conf_tab[KF_PI_CONF_INDEX]) {
curr_analysis->kfa_conf = *kf_conf_tab[KF_PI_CONF_INDEX];
kf_guess_put(curr_analysis);
}
*curr_analysis = save_curr_analysis;
return KF_SUCCESS;
}
/* first do the rule table driven analysis */
kf_rule_tab_analyze(KF_PI_RULE_INDEX);
/* copy the pi error registers into local varibles */
pi_err_sts0_a = kf_reg_tab[KF_PI_ERR_STS0_A_INDEX];
pi_err_sts0_b = kf_reg_tab[KF_PI_ERR_STS0_B_INDEX];
pi_err_sts1_a = kf_reg_tab[KF_PI_ERR_STS1_A_INDEX];
pi_err_sts1_b = kf_reg_tab[KF_PI_ERR_STS1_B_INDEX];
/* to check the validity of any of the err_status0 registers */
#define MEM_ACC_ERR(_r1,_r2) ((_r1 & PI_ERR_ST0_VALID_MASK) || \
(_r2 & PI_ERR_ST0_VALID_MASK))
/*
* on page 9 of the hub programming manual it says that for a given
* cache line A[39:7] the Hspec address of the corr. directory entry
* is given as A[39:31],1,1,A[30:12],1,A[11:7],DWbit,000.
* ------
*
* on page 102 of the hub programming manual it says that the bank selects
* for the DIMMs are always taken from the incoming address[31:29]
*
* this means DIMM_SEL for a directory access can be either a 3 (M-mode)
* or 7 (N-mode) only
*
* IS THIS INTERPRETATION CORRECT ????
*/
#define DIR_ERR_DIMM_SEL(_r) ((_r >> 27) & 0x7)
#define ERR_STS0_TYPE_MASK 0x3
#define ERR_STS0_VALID(_r) (_r & PI_ERR_ST0_VALID_MASK)
#define ERR_STS0_ADDR(_r) (_r >> 22)
#define ERR_STS0_ADDR_DIMM(_r) ((ERR_STS0_ADDR(_r) & MEM_DIMM_MASK) >> MEM_DIMM_SHFT)
/* macros to get the error type from the err_status0 register */
/* does the err_status0 register indicate a write error? */
#define WERR(_r) ((_r & ERR_STS0_TYPE_MASK) == 0x0)
/* does the err_status0 register indicate an uncached partial read or write error? */
#define UPRW_ERR(_r) ((_r & ERR_STS0_TYPE_MASK) == 0x1)
/* does the err_status0 register indicate a directory error? */
#define DERR(_r) ((_r & ERR_STS0_TYPE_MASK) == 0x2)
/* does the err_status0 register indicate a timeout error? */
#define TO_ERR(_r) ((_r & ERR_STS0_TYPE_MASK) == 0x3)
/* CRB type bit in the err_status_1 register */
#define WRB(_r) (((_r & PI_ERR_ST1_WRBRRB_MASK) >> PI_ERR_ST1_WRBRRB_SHFT) == 1)
addr_a = ERR_STS0_ADDR(pi_err_sts0_a);
addr_b = ERR_STS0_ADDR(pi_err_sts0_b);
nasid_a = NASID_GET(addr_a);
nasid_b = NASID_GET(addr_b);
/*
*
* WERR --> [MEM,70]
* WERR & WRB ---> [SOFTWARE , 80]
*/
KF_CONDITIONAL_UPDATE(kf_conf_tab[KF_SOFTWARE_CONF_INDEX],
((ERR_STS0_VALID(pi_err_sts0_a) && WERR(pi_err_sts0_a) && WRB(pi_err_sts1_a)) ||
(ERR_STS0_VALID(pi_err_sts0_b) && WERR(pi_err_sts0_b) && WRB(pi_err_sts1_b))),
80);
/* update the appropriate DIMM's confidence */
KF_CONDITIONAL_UPDATE(kf_conf_tab[KF_DIMM0_CONF_INDEX + ERR_STS0_ADDR_DIMM(pi_err_sts0_a)],
((nasid_a == current_nasid) &&
(ERR_STS0_VALID(pi_err_sts0_a) && WERR(pi_err_sts0_a) && WRB(pi_err_sts1_a))),
70);
KF_CONDITIONAL_UPDATE(kf_conf_tab[KF_DIMM0_CONF_INDEX + ERR_STS0_ADDR_DIMM(pi_err_sts0_b)],
((nasid_b == current_nasid) &&
(ERR_STS0_VALID(pi_err_sts0_b) && WERR(pi_err_sts0_b) && WRB(pi_err_sts1_b))),
70);
/*
*
* UPRERR | UPWERR --> [MEM,70]
*/
/* update the appropriate DIMM's confidence */
KF_CONDITIONAL_UPDATE(kf_conf_tab[KF_DIMM0_CONF_INDEX + ERR_STS0_ADDR_DIMM(pi_err_sts0_a)],
((nasid_a == current_nasid) &&
(ERR_STS0_VALID(pi_err_sts0_a) && UPRW_ERR(pi_err_sts0_a))),
70);
KF_CONDITIONAL_UPDATE(kf_conf_tab[KF_DIMM0_CONF_INDEX + ERR_STS0_ADDR_DIMM(pi_err_sts0_b)],
((nasid_b == current_nasid) &&
(ERR_STS0_VALID(pi_err_sts0_b) && UPRW_ERR(pi_err_sts0_b))),
70);
/* update the appropriate DIMM's confidence */
KF_CONDITIONAL_UPDATE(kf_conf_tab[KF_DIMM0_CONF_INDEX + ERR_STS0_ADDR_DIMM(pi_err_sts0_a)],
((nasid_a == current_nasid) &&
(ERR_STS0_VALID(pi_err_sts0_a) && DERR(pi_err_sts0_a))),
70);
KF_CONDITIONAL_UPDATE(kf_conf_tab[KF_DIMM0_CONF_INDEX + ERR_STS0_ADDR_DIMM(pi_err_sts0_b)],
((nasid_b == current_nasid) &&
(ERR_STS0_VALID(pi_err_sts0_b) && DERR(pi_err_sts0_b))),
70);
/*
* TIMEOUT ERR --> [HUB,70]
*/
KF_CONDITIONAL_UPDATE(kf_conf_tab[KF_HUB_CONF_INDEX],
((ERR_STS0_VALID(pi_err_sts0_a) && TO_ERR(pi_err_sts0_a)) ||
(ERR_STS0_VALID(pi_err_sts0_b) && TO_ERR(pi_err_sts0_b))),
40);
/* CRB type bit in the err_status_1 register */
#define RRB(_r) (((_r & PI_ERR_ST1_WRBRRB_MASK) >> PI_ERR_ST1_WRBRRB_SHFT) == 0)
/* H bit of CRB status field in err_status_1 register */
#define H_MASK 0x800000000000ull
#define H(_r) (_r & H_MASK)
#define RRB_N_H(_r1,_r2) (ERR_STS0_VALID(_r1) && (RRB(_r2)) && (H(_r2)))
/* T bit of CRB status field in err_status_1 register */
#define T_MASK 0x200000000000ull
#define T(_r) (_r & T_MASK)
/* I bit of CRB status field in err_status_1 register */
#define I_MASK 0x400000000000ull
#define I(_r) (_r & I_MASK)
#define RRB_N_I(_r) ((RRB(_r)) && (I(_r)))
#define T_OR_RRB_N_I(_r1,_r2) (ERR_STS0_VALID(_r1) && (T(_r2) || RRB_N_I(_r2)))
/*
* T | (RRB & I) ---> [MEM,70]
*/
KF_CONDITIONAL_UPDATE(kf_conf_tab[KF_MEM_CONF_INDEX],
(((nasid_a == current_nasid) && T_OR_RRB_N_I(pi_err_sts0_a,pi_err_sts1_a)) ||
((nasid_b == current_nasid) && T_OR_RRB_N_I(pi_err_sts0_b,pi_err_sts1_b))),
70);
/* W-bit of CRB status in the err_status_1 resgister */
#define W_MASK 0x1000000000000ull
#define W(_r1,_r2) (ERR_STS0_VALID(_r1) && (_r2 & W_MASK))
/*
* W ---> [MEM,70]
*/
KF_CONDITIONAL_UPDATE(kf_conf_tab[KF_MEM_CONF_INDEX],
(((nasid_a == current_nasid) && W(pi_err_sts0_a,pi_err_sts1_a)) ||
((nasid_b == current_nasid) && W(pi_err_sts0_b,pi_err_sts1_b))),
70);
KF_DEBUG("\t\t\t\tkf_pi_analyze:finished pi analysis\n");
return rv;
}
#define SYNDROME(_r) ((_r >> 32) & 0xff) /* syndrome bits in MD_MEM_ERROR register */
#define BAD_ECC_FORCED(_r) (SYNDROME(_r) == 0xff) /* check if syndrome == 0xff in MEM_ERROR */
#if 0
/* Check if bad ecc was written by MD (0xff) due to other
* errors. Return 1 if this is true 0 otherwise
*/
static int
check_bad_ecc_forced(paddr_t paddr)
{
unsigned char ecc;
#ifdef FRUTEST
extern int force_bad_ecc;
if (force_bad_ecc)
ecc = (uint)0xff;
else
ecc = 0;
#else
/* get the ecc byte*/
ecc = *((volatile unsigned char *)BDECC_ENTRY(paddr));
#endif
#ifdef FRU_DEBUG_RULES
kf_print("++check_bad_ecc_forced : "
"paddr = 0x%llx ECC addr=0x%llx ecc = 0x%x\n",
paddr,BDECC_ENTRY(paddr),ecc);
#endif
if (ecc == 0xff) {
#ifdef FRU_DEBUG_RULES
kf_print("++check_bad_ecc_forced: Bad ECC external cause\n");
#endif
return 1;
} else {
#ifdef FRU_DEBUG_RULES
kf_print("++check_bad_ecc_forced: Not Bad ECC due to external"
"cause : 0x%x\n",ecc);
#endif
}
return 0;
}
/* Check if the memory error is due to a bad dimm
* return the belief that memory is the suspect.
*/
static confidence_t
kf_check_mem_error(int valid,paddr_t offset)
{
paddr_t paddr;
/* check if the error is valid */
if (!valid)
return 0;
/* Form the physical address from the nasid & the node offset */
paddr = TO_NODE(current_nasid,offset);
#ifdef FRU_DEBUG_RULES
kf_print("++kf_check_mem_error: paddr = 0x%llx nasid = 0x%x offset = 0x%x\n",
paddr,current_nasid,offset);
#endif
/* If MD was not forced to write 0xff as ecc
* then it is definitely a memory bank problem
*/
if (!check_bad_ecc_forced(paddr))
return 90; /* Suspect memory with a high prob 90% */
return 70; /* Suspect memory with a prob 70% */
}
#endif
/* macros to check whether the cache err register indicates
* the primary instruction cache , primary data cache ,
* secondary cache or system interface error
*/
#ifdef FRUTEST
#define R10K_CACHE_ERR_VALID g_ce_valid
#else
#define R10K_CACHE_ERR_VALID 1
#endif /* FRUTEST */
/* primary instruction cache error ? */
#define R10K_ICACHE_ERR(_r) (R10K_CACHE_ERR_VALID && \
((_r & CE_TYPE_MASK) == CE_TYPE_I))
/* primary data cache error ? */
#define R10K_DCACHE_ERR(_r) (R10K_CACHE_ERR_VALID && \
((_r & CE_TYPE_MASK) == CE_TYPE_D))
/* secondary cache error ? */
#define R10K_SCACHE_ERR(_r) (R10K_CACHE_ERR_VALID && \
((_r & CE_TYPE_MASK) == CE_TYPE_S))
/* system interface error ? */
#define R10K_SYSINTF_ERR(_r) (R10K_CACHE_ERR_VALID && \
((_r & CE_TYPE_MASK) == CE_TYPE_SIE))
/*
* Get the MD_MEM_ERROR register on a node with
* the given nasid
*/
hubreg_t
kf_mem_error_get(nasid_t nasid)
{
lboard_t *board; /* IP27 board pointer */
klhub_err_t *hub_err; /* hub info pointer */
#ifdef FRUTEST
board = (lboard_t *)g_node[nasid].ch_board_info;
#else
board = (lboard_t *)KL_CONFIG_INFO(nasid);
#endif /* #ifdef FRUTEST */
board = find_lboard(board,KLTYPE_IP27);
/* Sanity check */
if (!board)
return 0;
/* get the hub error info for the hub in this ip27 board */
hub_err = kf_hub_err_info_get(board);
/* Sanity check */
if (!hub_err)
return 0;
return KF_MD_MEM_ERR(hub_err);
}
/* Some useful macros to operate on the MD_MEM_ERROR
* register
*/
#define MEM_ERR_UCE_MASK 0x8000000000000000
#define MEM_ERR_UCE_SHFT 63
#define MEM_ERR_UCE(_r) ((_r & MEM_ERR_UCE_MASK) >> MEM_ERR_UCE_SHFT)
#define MEM_ERR_ECC_MASK 0xff00000000
#define MEM_ERR_ECC_SHFT 32
#define MEM_ERR_ADDR_DIMM(_r) ((_r & MEM_DIMM_MASK) >> MEM_DIMM_SHFT)
#define MEM_ERR_ECC_FORCED(_r) (((_r & MEM_ERR_ECC_MASK) >> MEM_ERR_ECC_SHFT) == 0xff)
#define MEM_ERR_ADDR(_r) (_r & 0xfffffff8)
#define SCACHE_LINE_MASK 0xffffff80
/*
* Return the belief that scache is bad.
*/
static int
kf_check_scache_way_error(uint cerr,__uint64_t tag)
{
hubreg_t mem_err; /* MD_MEM_ERROR register from the relevant node */
nasid_t mem_err_nasid; /* nasid of the relevant node */
paddr_t paddr,mem_paddr;/* paddr = phy. addr. from the cache
* mem_paddr = phy. addr from the MD_MEM_ERROR
*/
/* Form the physical address from the cache tag & index */
paddr = SCACHE_ERROR_ADDR(cerr,tag);
mem_err_nasid = NASID_GET(paddr); /* Get the nasid corr. to the home node
* of this address
*/
mem_err = kf_mem_error_get(mem_err_nasid); /* Get the mem error register
* on this node
*/
/* If only the scache error is set and the
* mem error is not set then it is definitely
* the scache
*/
if (!(MEM_ERR_UCE(mem_err))) {
if (R10K_SCACHE_ERR(cerr))
return 90; /* Suspect scache with 90% */
}
/* Form the physical address from the MD_MEM_ERROR
* register
*/
mem_paddr = TO_NODE(mem_err_nasid,MEM_ERR_ADDR(mem_err));
#ifdef FRU_DEBUG_RULES
kf_print("++kf_check_scache_way_error: "
"MEM_ERR_ADDR(mem_err) = 0x%x"
" mem_paddr = 0x%llx\n"
"*kf_check_scache_way_error: "
"cache_addr = 0x%llx"
" mem_err = 0x%llx mem_err_nasid = 0x%x\n",
MEM_ERR_ADDR(mem_err),mem_paddr,paddr,
mem_err,mem_err_nasid);
#endif
/* check if there is already memory error
* for this cache line
*/
if ((mem_paddr & CACHE_SLINE_MASK) ==
(paddr & CACHE_SLINE_MASK)) {
/* Check that md has not written bad ecc
* In this case donot suspect scache it is a
* memory problem.
*/
if (!BAD_ECC_FORCED(mem_err))
return 0;
} else if (R10K_SCACHE_ERR(cerr))
return 90; /* If there is an scache error
* and the MD_MEM_ERROR is set for
* a different phs. addr. suspect
* scache with 90% prob.
*/
return -1; /* Not able to decide */
}
/* Check if the scache or sysinf error is actually
* due to a bad memory .
* Update the scache confidence appropriately.
*/
static int
kf_check_scache_error(uint cerr, cache_err_t cache_err) {
int belief = 0;
/* Check if the scache error is valid */
if (!cerr && !(R10K_SCACHE_ERR(cerr)) && !(R10K_SYSINTF_ERR(cerr))) {
#ifdef FRU_DEBUG_RULES
kf_print("++cache error reg = 0x%x\n",cerr);
#endif
return 0;
}
/* Check if there is a memory ecc error for an address
* in this cache line
*/
/* Form the physical address that caused the scache error */
if (cerr & CE_D_WAY0) {
belief = kf_check_scache_way_error(cerr,cache_err.ce_tags[0]);
}
if (cerr & CE_D_WAY1) {
int temp_belief = kf_check_scache_way_error(cerr,
cache_err.ce_tags[1]);
belief = (temp_belief < belief ? belief : temp_belief);
}
if (belief >= 0)
return belief; /* If we are able to assign confidence then
* return that
*/
if (R10K_SYSINTF_ERR(cerr)) /* Dont suspect scache on system
* interface error and we are
* not able to conclude anything
* concrete
*/
return 0;
return 70; /* Default Suspect scache with a prob 70% */
}
/*
* analyze if there are md errors
*/
kf_result_t
kf_md_analyze(kf_analysis_t *curr_analysis)
{
/*
*DIR_ERROR
* UCE_VALID [MEM,70]
* AE_VALID
* [MD,60]
* [HUB,60]
*PROTOCOL_ERROR
* valid bit [SOFTWARE,60]
* [HUB,60]
*MEM_ERROR
* UCE_VALID [MEM,70]
* [MD,60]
* [HUB,60]
*
*MISC_ERROR
* ILL_MSG [HUB,70]
* ILL_REVISION
* LONG_PACK
* SHORT_PACK
*
* BAD_DATA [HUB,0]
* [MD,0]
* [MEM,0]
* [SYSBUS,60]
* [A,60]
* [B,60]
*/
kf_result_t rv = KF_SUCCESS;
hubreg_t dir_err,mem_err,proto_err;
extern int KF_MD_RULE_INDEX;
int suspect_belief = 0;
KF_DEBUG("\t\t\t\tkf_md_analyze:doing md analysis........\n");
if (curr_analysis) {
kf_analysis_t save_curr_analysis;
int dimm;
/* Check if we have put in a special confidence in the
* MD confidence which is not being used for any other
* purpose. If so then we have hit a t5 writeback surprise
* protocol bug.
*/
if (kf_conf_tab[KF_MD_CONF_INDEX] &&
(*kf_conf_tab[KF_MD_CONF_INDEX] == FRU_FLAG_CONF)) {
kf_analysis_t new_analysis = *curr_analysis;
new_analysis.kfa_info[KF_T5_WB_SURPRISE_LEVEL].kfi_type =
KFTYPE_T5_WB_SURPRISE;
new_analysis.kfa_info[KF_T5_WB_SURPRISE_LEVEL].kfi_inst = 0;
new_analysis.kfa_conf =
*kf_conf_tab[KF_MD_CONF_INDEX];
kf_guess_put(&new_analysis);
}
save_curr_analysis = *curr_analysis;
curr_analysis->kfa_info[KF_MD_LEVEL].kfi_type = KFTYPE_MD;
curr_analysis->kfa_info[KF_MD_LEVEL].kfi_inst = 0;
/* If we had already stored FRU_FLAG_CONF value in md confidence
* make sure that it doesn't get interpreted as faulty md
*/
if (kf_conf_tab[KF_MD_CONF_INDEX]) {
curr_analysis->kfa_conf =
(*kf_conf_tab[KF_MD_CONF_INDEX] ==
FRU_FLAG_CONF) ? 0 :
*kf_conf_tab[KF_MD_CONF_INDEX];
kf_guess_put(curr_analysis);
}
curr_analysis->kfa_info[KF_MEM_LEVEL].kfi_type = KFTYPE_MEM;
curr_analysis->kfa_info[KF_MEM_LEVEL].kfi_inst = 0;
if (kf_conf_tab[KF_MEM_CONF_INDEX]) {
curr_analysis->kfa_conf = *kf_conf_tab[KF_MEM_CONF_INDEX];
kf_guess_put(curr_analysis);
}
for(dimm = 0 ; dimm < 8; dimm++) {
curr_analysis->kfa_info[KF_DIMM_LEVEL].kfi_type = KFTYPE_DIMM0;
curr_analysis->kfa_info[KF_DIMM_LEVEL].kfi_inst = dimm;
if (kf_conf_tab[KF_DIMM0_CONF_INDEX + dimm]) {
curr_analysis->kfa_conf = *kf_conf_tab[KF_DIMM0_CONF_INDEX + dimm];
kf_guess_put(curr_analysis);
}
}
*curr_analysis = save_curr_analysis;
return KF_SUCCESS;
}
/* first do the rule table driven analysis */
kf_rule_tab_analyze(KF_MD_RULE_INDEX);
/* copy the error registers into local variables */
dir_err = kf_reg_tab[KF_MD_DIR_ERR_INDEX];
mem_err = kf_reg_tab[KF_MD_MEM_ERR_INDEX];
proto_err = kf_reg_tab[KF_MD_PROTO_ERR_INDEX];
/* macros to check if there is an uncorrectable directory ecc
* error or an access error
*/
#define DIR_ERR_UCE_AE_MASK 0xc000000000000000
#define DIR_ERR_UCE_AE_SHFT 62
#define DIR_ERR_UCE_AE(_r) ((_r & DIR_ERR_UCE_AE_MASK) >> DIR_ERR_UCE_AE_SHFT)
#define DIR_ERR_HSPEC_ADDR_MASK 0x3ffffff8
#define DIR_ERR_HSPEC_ADDR(_r) (_r & DIR_ERR_HSPEC_ADDR_MASK)
/*
* DIR_ERR[UCE_VALID | AE_VALID] ---> [MEM,80],[SOFTWARE,40]
*/
KF_CONDITIONAL_UPDATE(kf_conf_tab[KF_DIMM0_CONF_INDEX +
DIR_ERR_DIMM_SEL(dir_err)],
DIR_ERR_UCE_AE(dir_err),
80);
KF_CONDITIONAL_UPDATE(kf_conf_tab[KF_SOFTWARE_CONF_INDEX],
DIR_ERR_UCE_AE(dir_err),
40);
if (!BAD_ECC_FORCED(mem_err))
suspect_belief = 90;
else
suspect_belief = 70;
KF_CONDITIONAL_UPDATE(kf_conf_tab[KF_DIMM0_CONF_INDEX +
MEM_ERR_ADDR_DIMM(mem_err)],
MEM_ERR_UCE(mem_err),
suspect_belief);
#define PROTO_ERR_VALID(_r) ((_r >> 63) & 0x1)
/* If we hit a protocol error this is due to the
* T5 writeback surprise . Recognize this by storing
* a special value in the MD confidence.
*/
KF_CONDITIONAL_UPDATE(kf_conf_tab[KF_MD_CONF_INDEX],
PROTO_ERR_VALID(proto_err),
FRU_FLAG_CONF);
KF_DEBUG("\t\t\t\tkf_md_analyze:finished md analysis\n");
return rv;
}
/*
* analyze if there are io errors
*/
kf_result_t
kf_ii_analyze(kf_analysis_t *curr_analysis)
{
/*
* BTE ERR STATUS
* [SRCMEM,60]
* [DESTMEM,60]
* [SOFTWARE,60]
*
*/
kf_result_t rv = KF_SUCCESS;
#if 0
confidence_t *mem_confidence;
confidence_t *dimm_confidence;
hubreg_t bte0_sts,bte1_sts;
hubreg_t bte0_src,bte1_src;
hubreg_t bte0_dst,bte1_dst;
#endif
hubreg_t widget_status;
int i;
KF_DEBUG("\t\t\t\tkf_ii_analyze:doing ii analysis..........\n");
if (curr_analysis) {
kf_analysis_t save_curr_analysis;
/* Check if we have put in a special confidence in the
* MD confidence which is not being used for any other
* purpose. If so then we have hit a t5 writeback surprise
* protocol bug.
*/
if (kf_conf_tab[KF_II_CONF_INDEX] &&
(*kf_conf_tab[KF_II_CONF_INDEX] == FRU_FLAG_CONF)) {
kf_analysis_t new_analysis = *curr_analysis;
new_analysis.kfa_info[KF_BTE_PUSH_LEVEL].kfi_type =
KFTYPE_BTE_PUSH;
new_analysis.kfa_info[KF_BTE_PUSH_LEVEL].kfi_inst = 0;
new_analysis.kfa_conf =
*kf_conf_tab[KF_II_CONF_INDEX];
kf_guess_put(&new_analysis);
}
save_curr_analysis = *curr_analysis;
curr_analysis->kfa_info[KF_II_LEVEL].kfi_type = KFTYPE_II;
curr_analysis->kfa_info[KF_II_LEVEL].kfi_inst = 0;
if (kf_conf_tab[KF_II_CONF_INDEX]) {
curr_analysis->kfa_conf =
(*kf_conf_tab[KF_II_CONF_INDEX] ==
FRU_FLAG_CONF) ? 0 :
*kf_conf_tab[KF_II_CONF_INDEX];
kf_guess_put(curr_analysis);
}
*curr_analysis = save_curr_analysis;
return KF_SUCCESS;
}
#if 0
/* copy the error register values into local variables */
bte0_sts = kf_reg_tab[KF_II_BTE0_STS_INDEX];
bte1_sts = kf_reg_tab[KF_II_BTE1_STS_INDEX];
bte0_src = kf_reg_tab[KF_II_BTE0_SRC_INDEX];
bte1_src = kf_reg_tab[KF_II_BTE1_SRC_INDEX];
bte0_dst = kf_reg_tab[KF_II_BTE0_DST_INDEX];
bte1_dst = kf_reg_tab[KF_II_BTE1_DST_INDEX];
/* to check if the bte status indicates an error */
#define BTE_ERR(_r) ((_r >> IBLS_ERROR_SHFT) & 0x1)
/*
* BTE ERROR ---> [SOFTWARE,60],
* [SRCMEM | DSTMEM , 60]?
*/
KF_CONDITIONAL_UPDATE(kf_conf_tab[KF_SOFTWARE_CONF_INDEX],
BTE_ERR(bte0_sts) || BTE_ERR(bte1_sts) ,
60);
KF_CONDITIONAL_UPDATE(kf_conf_tab[KF_II_CONF_INDEX],
BTE_ERR(bte0_sts) || BTE_ERR(bte1_sts) ,
60);
/* M mode #defines to get the node id out of a physical address
* SHOULD EVENTUALLY TAKE CARE OF BOTH THE MODES .
* LOOK FOR #defs IN sys/SN HEADERS
*/
kf_mem_conf_get(bte0_src,&mem_confidence,&dimm_confidence);
if (mem_confidence) {
KF_CONDITIONAL_UPDATE(mem_confidence,
BTE_ERR(bte0_sts),
60);
}
if (dimm_confidence) {
KF_CONDITIONAL_UPDATE(dimm_confidence,
BTE_ERR(bte0_sts),
60);
}
kf_mem_conf_get(bte1_src,&mem_confidence,&dimm_confidence);
if (mem_confidence)
KF_CONDITIONAL_UPDATE(mem_confidence,
BTE_ERR(bte1_sts),
60);
if (dimm_confidence) {
KF_CONDITIONAL_UPDATE(dimm_confidence,
BTE_ERR(bte1_sts),
60);
}
kf_mem_conf_get(bte0_dst,&mem_confidence,&dimm_confidence);
if (mem_confidence)
KF_CONDITIONAL_UPDATE(mem_confidence,
BTE_ERR(bte0_sts),
60);
if (dimm_confidence) {
KF_CONDITIONAL_UPDATE(dimm_confidence,
BTE_ERR(bte0_sts),
60);
}
kf_mem_conf_get(bte1_dst,&mem_confidence,&dimm_confidence);
if (mem_confidence)
KF_CONDITIONAL_UPDATE(mem_confidence,
BTE_ERR(bte1_sts),
60);
if (dimm_confidence) {
KF_CONDITIONAL_UPDATE(dimm_confidence,
BTE_ERR(bte1_sts),
60);
}
#endif
#define CRAZY(_r) ((_r >> 32) & 0x1) /* crazy bit in widget status */
widget_status = kf_reg_tab[KF_II_WIDGET_STATUS_INDEX];
KF_CONDITIONAL_UPDATE(kf_conf_tab[KF_HUB_LINK_CONF_INDEX],
CRAZY(widget_status),
80);
/*
* IO CRB ENTRY A analysis
*/
for (i = 0; i < IIO_NUM_CRBS; i++) {
KF_DEBUG("\t\t\t\tdoing ii crb %d analysis..........\n",i);
kf_ii_crb_analyze(kf_reg_tab[KF_II_CRB_ENT0_A_INDEX + i]);
KF_DEBUG("\t\t\t\tfinished ii crb analysis\n");
}
KF_DEBUG("\t\t\t\tkf_ii_analyze:finished ii analysis\n");
return rv;
}
/* States in which capture the sequence of bits seen so far
* in the crb address
*/
#define STATE00 0 /* sequence seen so far is 0* */
#define STATE01 1 /* sequence seen so far is 0*1+ */
#define STATE10 2 /* seqeunce seen so far is 0*1+0+ */
#define STATE11 3 /* seqeunce seen so far is 0*1+0+1+ */
/*
* Check if the CRB address corresponds to a pattern which
* occurs on a BTE PUSH pattern bug
*/
int
kf_bte_push_pattern_check(paddr_t crb_addr)
{
char state = STATE00;
char bit = 0;
/* Crb address that we are getting here is
* bits [39-7] of the actual crb error address
* which are in turn [32:0] of "crb_addr".
* Make sure that bit 32 of crb_addr is 0 for this case.
*/
if (crb_addr >> 32)
return(0);
while(crb_addr) {
bit = crb_addr & 1;
/* This switch statement implements the state machine to
* recognize the pattern 0*1+0*
*/
switch(state) {
case STATE00:
/* STATE00 ---(1)--- STATE01
* | |
* +-(0)-+
*/
state = bit ? STATE01 : STATE00;
break;
case STATE01:
/* STATE01 ---(0)--- STATE10
* | |
* +-(1)-+
*/
state = bit ? STATE01 : STATE10;
break;
case STATE10:
/* STATE10 ---(1)--- STATE11
* | |
* +-(0)-+
*/
state = bit ? STATE11 : STATE10;
break;
case STATE11:
/* +-(1)-+
* | |
* STATE11
* | |
* +-(0)-+
*/
state = bit ? STATE11 : STATE11;
break;
}
crb_addr >>= 1;
}
if (state == STATE11 || state == STATE00)
return(0);
return(1);
}
/*
* analyze the io crb entryA registers
*/
kf_result_t
kf_ii_crb_analyze(hubreg_t crb_ent_a)
{
kf_result_t rv = KF_SUCCESS;
paddr_t crb_addr;
confidence_t *mem_conf = 0,*dimm_conf = 0;
#define CRB_ADDR(_r) ((_r & 0xfffffffffc) << 1)/* Addr. of the req. in legonet fmt */
#define CRB_SIDN(_r) ((_r >> 45) & 0xf) /* Source ID number of the req */
#define CRB_TNUM(_r) ((_r >> 40) & 0x1f) /* Transaction number of the req . */
#define CRB_ERR_VALID(_r) ((_r >> 55) & 0x1) /* An error was encountered with this
* CRB
*/
#define CRB_ERRCODE(_r) ((_r >> 52) & 0x7)
#define CRB_PERR(_r) (CRB_ERRCODE(_r) == 1) /* poison error */
#define CRB_WERR(_r) (CRB_ERRCODE(_r) == 2) /* write error */
#define CRB_AERR(_r) (CRB_ERRCODE(_r) == 3) /* access error */
#define CRB_PWERR(_r) (CRB_ERRCODE(_r) == 4) /* partial write error */
#define CRB_PRERR(_r) (CRB_ERRCODE(_r) == 5) /* partial read error */
#define CRB_TO_ERR(_r) (CRB_ERRCODE(_r) == 6) /* timeout error */
#define CRB_XTERR(_r) (CRB_ERRCODE(_r) == 7) /* xtalk packet has hdr or sideband error bit set*/
#define CRB_DERR(_r) (CRB_ERRCODE(_r) == 8) /* directory error */
#define CRB_LN_UCE_MASK 0x08000000000000
#define CRB_LN_UCE(_r) (_r & CRB_LN_UCE_MASK) /* uncorrectable error on sn0net data */
#define CRB_XT_ERR_MASK 0x02000000000000
#define CRB_XT_ERR(_r) (_r & CRB_XT_ERR_MASK) /* xtalk request has error bit set */
/* WERR | AERR | WERR | PRERR | PWERR --> [SOFTWARE,70] */
KF_CONDITIONAL_UPDATE(kf_conf_tab[KF_SOFTWARE_CONF_INDEX],
(CRB_ERR_VALID(crb_ent_a) &&
(CRB_WERR(crb_ent_a) || CRB_AERR(crb_ent_a) ||
CRB_PWERR(crb_ent_a) || CRB_PRERR(crb_ent_a))),
70);
/* PERR --> [SOFTWARE , 50] */
KF_CONDITIONAL_UPDATE(kf_conf_tab[KF_SOFTWARE_CONF_INDEX],
(CRB_ERR_VALID(crb_ent_a) &&
CRB_PERR(crb_ent_a)),
50);
/* DERR --> [MD , 60] */
KF_CONDITIONAL_UPDATE(kf_conf_tab[KF_MD_CONF_INDEX],
(CRB_ERR_VALID(crb_ent_a) &&
CRB_DERR(crb_ent_a)),
60);
/* Get the crb error address in hub internal format */
crb_addr = CRB_ADDR(crb_ent_a);
/* Convert the addr into R10k format */
crb_addr = TO_NODE(NASID_GET(crb_addr),
TO_NODE_ADDRSPACE(crb_addr));
kf_mem_conf_get(crb_addr,&mem_conf,&dimm_conf);
if (dimm_conf)
KF_CONDITIONAL_UPDATE(dimm_conf,
(CRB_ERR_VALID(crb_ent_a) &&
CRB_DERR(crb_ent_a)),
60);
/* Most significant 6 bits of CRB entry A are reserved. These
* are being used for special hacks like recognizing a BTE PUSH
* op etc.
*/
#define CRB_RESERVED_MASK 0x3f /* 6 reserved bits */
#define CRB_RESERVED_SHFT 58 /* bits 63-58 are reserved */
#define CRB_RESERVED(_r) ((_r >> CRB_RESERVED_SHFT) & CRB_RESERVED_MASK)
#define BTE_OP(_r) (CRB_RESERVED(_r) == 0x1) /* bte operation */
KF_CONDITIONAL_UPDATE(kf_conf_tab[KF_II_CONF_INDEX],
BTE_OP(crb_ent_a) &&
kf_bte_push_pattern_check(crb_addr >> 0x7),
FRU_FLAG_CONF);
return rv;
}
/*
* analyze if there are ni errors
*/
kf_result_t
kf_ni_analyze(kf_analysis_t *curr_analysis)
{
/*NI_VECTOR_STATUS
* STATUS_VALID
*
* TYPE
* ADDR ERROR [SOFTWARE,70]
* COMMAND ERROR
* PROT VIOLATION
*
*
* UNKNOWN RESPONSE
* [REMOTE-BOARD,60]
*
*PORT_ERROR
*
* NETWORK ERROR BIT SET
* INTERNAL ERROR BIT SET [HUB,90]
* [PI/MD/II,60]
*/
kf_result_t rv = KF_SUCCESS;
hubreg_t vec_sts;
extern int KF_NI_RULE_INDEX;
vec_sts = kf_reg_tab[KF_NI_VECT_STS_INDEX];
KF_DEBUG("\t\t\t\tkf_ni_analyze:doing ni analysis.........\n");
if (curr_analysis) {
kf_analysis_t save_curr_analysis;
save_curr_analysis = *curr_analysis;
curr_analysis->kfa_info[KF_NI_LEVEL].kfi_type = KFTYPE_NI;
curr_analysis->kfa_info[KF_NI_LEVEL].kfi_inst = 0;
if (kf_conf_tab[KF_NI_CONF_INDEX]) {
curr_analysis->kfa_conf = *kf_conf_tab[KF_NI_CONF_INDEX];
kf_guess_put(curr_analysis);
}
*curr_analysis = save_curr_analysis;
return KF_SUCCESS;
}
/* first do the table driven analysis */
kf_rule_tab_analyze(KF_NI_RULE_INDEX);
/* to check if the vector status is valid */
#define VEC_STAT_VALID(_r) (_r & NVS_VALID)
#define VEC_STAT_ERR_MASK 0x7
/* to check if the vector status register indicates an
* address error
* command error
* protection violation
*/
#define ADDR_CMD_PROT_ERR(_r) (VEC_STAT_VALID(_r) && \
(((_r & VEC_STAT_ERR_MASK) != 7) && \
((_r & VEC_STAT_ERR_MASK) >= 4)))
/*
* ADDR | COMMAND | PROTECTION ERROR ---> [SOFTWARE,70]
*/
KF_CONDITIONAL_UPDATE(kf_conf_tab[KF_SOFTWARE_CONF_INDEX],
ADDR_CMD_PROT_ERR(vec_sts),
70);
/* to check if the vector status register indicates an unknown
* response
*/
#define UNKNOWN_RESP(_r) ((_r & VEC_STAT_ERR_MASK) == 7)
KF_DEBUG("\t\t\t\tkf_ni_analyze:finished ni analysis\n");
return rv;
}
/*
* analyze if there are any cache errors
*/
kf_result_t
kf_cpu_analyze(klcpu_err_t *cpu_err,unsigned int cpuid,kf_analysis_t *curr_analysis)
{
cpuid %= 2; /* convert the system virtual id to a cpuid
* per hub
*/
if (!curr_analysis) {
uint cache_err;
int belief = 0;
/* if the cache error saved is not valid
* there is no need to analyze that
*/
if (cpu_err->ce_valid != 1)
return KF_SUCCESS;
/* get the dumped cache error register */
cache_err = cpu_err->ce_cache_err_dmp.ce_cache_err;
KF_DEBUG("\t\t\tkf_cpu_analyze:doing cpu%d analysis........\n",
cpuid);
KF_CONDITIONAL_UPDATE(kf_conf_tab[KF_IC0_CONF_INDEX + cpuid],
R10K_ICACHE_ERR(cache_err),
90);
KF_CONDITIONAL_UPDATE(kf_conf_tab[KF_DC0_CONF_INDEX + cpuid],
R10K_DCACHE_ERR(cache_err),
90);
belief = kf_check_scache_error(cache_err,
cpu_err->ce_cache_err_dmp);
KF_CONDITIONAL_UPDATE(kf_conf_tab[KF_SC0_CONF_INDEX + cpuid],
belief,
belief);
KF_CONDITIONAL_UPDATE(kf_conf_tab[KF_SYSBUS_CONF_INDEX],
R10K_SYSINTF_ERR(cache_err) &&
cache_err & (CE_SA | CE_SC | CE_SR) ,
70);
KF_CONDITIONAL_UPDATE(kf_conf_tab[KF_HUB_CONF_INDEX],
R10K_SYSINTF_ERR(cache_err) &&
cache_err & (CE_SA | CE_SC | CE_SR) ,
70);
KF_DEBUG("\t\t\tkf_cpu_analyze:finished cpu analysis\n");
} else {
kf_analysis_t save_curr_analysis;
save_curr_analysis = *curr_analysis;
curr_analysis->kfa_info[KF_CPU_LEVEL].kfi_type = KFTYPE_CPU0;
curr_analysis->kfa_info[KF_CPU_LEVEL].kfi_inst = cpuid;
if (kf_conf_tab[KF_CPU0_CONF_INDEX + cpuid]) {
curr_analysis->kfa_conf = *kf_conf_tab[KF_CPU0_CONF_INDEX+cpuid];
kf_guess_put(curr_analysis);
}
if (kf_conf_tab[KF_IC0_CONF_INDEX + cpuid]) {
curr_analysis->kfa_info[KF_IC_LEVEL].kfi_type = KFTYPE_IC0;
curr_analysis->kfa_info[KF_IC_LEVEL].kfi_inst = cpuid;
curr_analysis->kfa_conf = *kf_conf_tab[KF_IC0_CONF_INDEX+cpuid];
kf_guess_put(curr_analysis);
}
if (kf_conf_tab[KF_DC0_CONF_INDEX + cpuid]) {
curr_analysis->kfa_info[KF_DC_LEVEL].kfi_type = KFTYPE_DC0;
curr_analysis->kfa_info[KF_DC_LEVEL].kfi_inst = cpuid;
curr_analysis->kfa_conf = *kf_conf_tab[KF_DC0_CONF_INDEX+cpuid];
kf_guess_put(curr_analysis);
}
if (kf_conf_tab[KF_SC0_CONF_INDEX + cpuid]) {
curr_analysis->kfa_info[KF_SC_LEVEL].kfi_type = KFTYPE_SC0;
curr_analysis->kfa_info[KF_SC_LEVEL].kfi_inst = cpuid;
curr_analysis->kfa_conf = *kf_conf_tab[KF_SC0_CONF_INDEX+cpuid];
kf_guess_put(curr_analysis);
}
*curr_analysis = save_curr_analysis;
}
return KF_SUCCESS;
}