1
0
Files
irix-657m-src/stand/arcs/ide/EVEREST/lib/evfru.c
2022-09-29 17:59:04 +03:00

914 lines
26 KiB
C

#if NOT_USED
/**************************************************************************
* *
* Copyright (C) 1992, Silicon Graphics, Inc. *
* *
* These coded instructions, statements, and computer programs contain *
* unpublished proprietary information of Silicon Graphics, Inc., and *
* are protected by Federal copyright law. They may not be disclosed *
* to third parties or copied or duplicated in any form, in whole or *
* in part, without the prior written consent of Silicon Graphics, Inc. *
* *
**************************************************************************/
#include <sys/types.h>
#include <sys/systm.h>
#include <sys/reg.h>
#include <sys/EVEREST/everest.h>
#include <sys/EVEREST/evconfig.h>
#include <sys/EVEREST/everror.h>
#include <sys/EVEREST/evintr.h>
#include <sys/EVEREST/io4.h>
#include <sys/EVEREST/fchip.h>
#include <sys/EVEREST/s1chip.h>
/*
* The analysis of everest error data to determine a fru is not an easy job
* and may take years of field experience to make it good enough.
*
* No matter how good a job we did, the result is still a guess!.
*/
static eframe_t *evfru_ep;
/*
* Data structures used within evfru.c to keep track of error propogation
* upstream/downstream
*/
static int Ignore_data_error;
static int my_data_err, my_addr_err;
static int Expect_adap_cmd_err, Expect_adap_data_err;
static char io4_ebus_timeout[EV_MAX_SLOTS], ibus_par_err[EV_MAX_SLOTS];
static char Ignore_ibus_cmd_err[EV_MAX_SLOTS];
static char Ignore_ibus_data_err[EV_MAX_SLOTS];
static char ia_ebus_timeout[EV_MAX_SLOTS];
static char ia_addrerr[EV_MAX_SLOTS];
static char fruline[128]; /* tmp buf to format a line */
static int Expect_fci_par_err, Ignore_fci_par_err;
#define FRU_LVL 1
#define fru_msg_s1(s,p,c,m) ev_perr(FRU_LVL, "FRU: S1 Chnl %d, adap %d, IO4 slot %d. Reason: %s\n",c,p,s,m)
#define fru_msg_cpu(s,c,m) ev_perr(FRU_LVL, "FRU: CPU %d in slot %d. Reason: %s\n",c,s,m)
#define fru_msg_mem(s,l,b,m) ev_perr(FRU_LVL, "FRU: Simm in Bank %d leaf %d MC3 slot %d. Reason: %s\n", b,l,s,m)
#define fru_msg_md(s,m) ev_perr(FRU_LVL, "FRU: MC3 in slot %d. Reason: %s\n",s,m)
#define fru_msg_fchip(s,p,m) ev_perr(FRU_LVL, "FRU: Fchip adap %d, IO4 slot %d. Reason: %s\n", p, s, m)
#define fru_msg_vme(s,p,m) ev_perr(FRU_LVL, "FRU: vmecc adap %d, IO4 slot %d. Reason: %s\n", p, s, m)
#define fru_msg_hip fru_msg_vme
#define fru_msg_fci(s,p,m) ev_perr(FRU_LVL, "FRU: FCI in adap %d IO4 slot %d. Reason: %s\n", p, s, m)
#define fru_msg_epc(s,p,m) ev_perr(FRU_LVL, "FRU: EPC adap %d IO4 slot %d. Reason: %s\n", p, s, m)
#define fru_msg_ibus(s,m) ev_perr(FRU_LVL, "FRU: IBus in IO4 slot %d. Reason: %s\n",s, m)
#define fru_msg_io4(s,m) ev_perr(FRU_LVL, "FRU: IO4 in slot %d. Reason: %s\n",s,m)
#define fru_msg_ebus(m) ev_perr(FRU_LVL, "FRU: Ebus. Reason: %s\n", m)
#define fru_msg_sw(m) ev_perr(FRU_LVL, "Software Error???. Reason: %s\n",m);
#define fru_msg_mapram(s,m) ev_perr(FRU_LVL, "FRU: Mapram in IO4 slot %d. Reason: %s\n", s, m);
extern void bad_board_type(int, int);
extern void bad_ioa_type(int, int, int);
extern void ebus_fru_check(void);
#if R4000
#define CC_ERROR_TIMEOUT (CC_ERROR_MYREQ_TIMEOUT|CC_ERROR_MYRES_TIMEOUT| \
CC_ERROR_MYINT_TIMEOUT)
#endif
#if TFP
#define CC_ERROR_TIMEOUT (CC_ERROR_MYREQ_TIMEOUT|CC_ERROR_MYRES_TIMEOUT)
#endif
/*
* List of hardware which are considered as FRUs
*
* Each of the boards (IP19, IO4, MC3)
* One of 4 CPUs in IP19 board, Tagram, and Secondary Cache
* Any Leaf, Bank and SIMM in Memory Board.
* Mezz boards in IO4, Flat cable, VCAM, Any SCSI channels
*
* On-board ASICs in IO4 are not taken as FRUs. If there is some problem in
* them, entire IO4 board is considered as FRU.
*
* When software tries to analyze the error state, it tries to point to one
* or more of the above mentioned hardware to be broken.
*
*/
/*
* FRU analysis logic..
*
* Aim of this FRU analyzer is to weed out the stray error messages (which
* would have occured due to propogation of errors), and identify the root
* cause for a particular system crash. Once a set of error messages are
* identified to be the main cause, it finds out the FRUs which could have
* caused this error and flags them as broken.
*
* This is a fairly simplistic approach to the problem of analyzing the
* errors, but one which I believe is most useful and implementable without
* much complexity.
*
*/
/*
* Some support routines needed
*/
/* ARGSUSED */
Analyze_address(eframe_t *ep)
{
fru_msg_sw("Bad Address used by cpu..");
return 1;
}
/*
* Logic for cpu_fru_check:
*
* For each CPU
* Look at the bits set in CC, and see if any truly local problems could be
* identified. If so, flag the appropriate CPU as FRU.
*
* If there is an Scache problem... this CPU is a FRU.
* If there is a Tagram problem... this CPU is a FRU.
* in both these cases, set Ignore_data_error to be true.
*
* If CC says it's out of Sync.. Dont know what causes it..
* Flag this CPU as FRU.
*
* If CC sees an Addr(data) parity error from A(D), but no other boards see
* it, should be a local problem, Flag this CPU as FRU
*
* If CC encounters a Timeout error....it could be due to a bad external
* board. Try to analyze address found in eframe.badvaddr, and if it turns
* out to be bad..flag this CPU to be bad..
*/
void
cpu_fru_check(int slot)
{
int id, vcpuid;
int aerror, error;
ulong slotmask = (1 << slot);
evbrdinfo_t *eb = &(EVCFGINFO->ecfg_board[slot]);
aerror = EVERROR->ip[eb->eb_cpu.eb_cpunum].a_error;
for (id = 0; id < EV_MAX_CPUS_BOARD; id++) {
if (eb->eb_cpuarr[id].cpu_enable == 0)
continue;
vcpuid = eb->eb_cpuarr[id].cpu_vpid;
error = EVERROR->cpu[vcpuid].cc_ertoip;
#if R4000
if (error & (CC_ERROR_SCACHE_MBE|CC_ERROR_PARITY_TAGRAM)){
if (error & CC_ERROR_SCACHE_MBE)
fru_msg_cpu(slot, id, "Bad Scache");
else
fru_msg_cpu(slot, id, "Bad Tagram in CC");
Ignore_data_error |= slotmask;
if (!(my_data_err & slotmask))
/* Why my_data_err is not set?? */
my_data_err |= slotmask;
}
#endif /* R4000 */
if (error & CC_ERROR_ASYNC)
fru_msg_cpu(slot, id, "Internal Bus Out of Sync");
/* If CC sees an Addr/data error, but no other boards see
* it, should be a local problem
*/
if (((error & CC_ERROR_PARITY_A) && !my_addr_err) ||
((error & CC_ERROR_PARITY_D) && !my_data_err))
fru_msg_cpu(slot, id, "Broken A->CC Path");
if (error & CC_ERROR_TIMEOUT){
if(Analyze_address(evfru_ep) == 0)
fru_msg_cpu(slot, id, "Ebus Protocol Timeout");
}
if (error & CC_ERROR_MY_ADDR){
/* Could be due to ADDR_HERE timeout or
* A chip seeing a parity error in the address sent
*/
if (aerror & (A_ERROR_CC2A_PARITY << id))
fru_msg_cpu(slot, id, "Broken CC->A path");
if ((aerror & (A_ERROR_ADDR_HERE_TIMEOUT << id)) &&
(Analyze_address(evfru_ep) == 0))
fru_msg_cpu(slot, id, "Broken CC->A path");
}
/* Check if Data error was due to bad scache */
if (((error & CC_ERROR_MY_DATA) ||
(aerror & (A_ERROR_CC2D_PARITY << id))) &&
!(Ignore_data_error & slotmask)){
/* Should be some broken path between CC and D chip */
fru_msg_cpu(slot, id, "Broken CC->D data path");
Ignore_data_error |= slotmask;
}
}
}
void
mc3_fru_check(int slot)
{
int mbid = EVCFGINFO->ecfg_board[slot].eb_mem.eb_mc3num;
mc3error_t *me = &(EVERROR->mc3[mbid]);
int bank;
if((me->ebus_error & (MC3_EBUS_ERROR_SENDER_DATA)) &&
!((me->mem_error[0] & MC3_MEM_ERROR_MBE ) ||
(me->mem_error[1] & MC3_MEM_ERROR_MBE ))){
fru_msg_md(slot, "Bad MD chip");
return;
}
if (me->mem_error[0] & MC3_MEM_ERROR_MBE) {
bank = me->syndrome0[0] ? 0 :
me->syndrome1[0] ? 1 :
me->syndrome2[0] ? 2 : 3;
fru_msg_mem(slot, 0, bank, "Bad SIMMs causing Data error");
}
if (me->mem_error[1] & MC3_MEM_ERROR_MBE) {
bank = me->syndrome0[0] ? 0 :
me->syndrome1[0] ? 1 :
me->syndrome2[0] ? 2 : 3;
fru_msg_mem(slot, 1, bank, "Bad SIMMs causing Data error");
}
}
/*****************************************************************************/
void
fchip_fru_check(ulong ferror, int slot, int padap)
{
/* io4_fru_check and epc_fru_check should have been executed
* before calling this routine
*/
/* F to IBus command error */
if (ferror & F2IBUS_CMND_ERR){
fru_msg_fchip(slot,padap,"Command On IBus had Bad parity");
if (Expect_adap_cmd_err == padap)
Expect_adap_cmd_err = 0;
}
if (ferror & FCI2F_PAR_ERR)
Expect_fci_par_err = 1;
/* F to IBus data error */
if (ferror & F2IBUS_DATA_ERR){
if (!Expect_fci_par_err){
fru_msg_fchip(slot,padap,"Data from F on IBus had Bad parity");
if (Expect_adap_data_err == padap)
Expect_adap_data_err = 0;
}
}
if (ferror & FCHIP_TIMEOUT_ERR){
if (!io4_ebus_timeout[slot] && !ibus_par_err[slot])
fru_msg_fchip(slot,padap,"Command timeout");
}
if (ferror & FCHIP_ERROR_FR_IBUS_CMND){
if (!Ignore_ibus_cmd_err[slot] && !ibus_par_err[slot])
fru_msg_fchip(slot,padap,"Broken Internal command path");
}
if (ferror & IBUS2F_DATA_ERR){
if (!Ignore_ibus_data_err[slot] && !ibus_par_err[slot]){
fru_msg_fchip(slot,padap,"Broken Internal Data path");
Ignore_fci_par_err = 1;
}
}
if (ferror & FCHIP_ERROR_FR_IBUS_PIOW_INTRNL)
fru_msg_fchip(slot,padap,"Internal PIO write error");
}
/*
* vme_dmaaddr:
* This routine accepts the slot number
* Assumptions:
* ebus_error1 and ebus_error2 in IO4 error register are valid.
* The value got set due to a DMA request from Fchip.
* Return:
* It returns the IO address which maps to the physical address
* found in ebus_error1 and ebus_error2
*
*/
#define EBUS_PG_MASK 0xFFFFFFF /* 40 - 12 = 28 bits */
#define IO_ADDR(x,y) (((x << 1)|y) << 12)
int
vme_dmaaddr(int slot, int window, int padap)
{
uint phys_addr;
uint io_addr, i;
evreg_t ftlb_addr;
int vadap = EVCFGINFO->ecfg_board[slot].eb_ioarr[padap].ioa_virtid;
__psunsigned_t swin = SWIN_BASE(window, padap);
/* phys_addr holds the physical address as would be available in
* F Tlb
*/
/* place the appropriate page no in bits 0-27 */
phys_addr = ((EVERROR->io4[window].ebus_error2 & 0xff) << 20) |
((EVERROR->io4[window].ebus_error1 >> 12) & 0xfffff);
for (i=0; i < 8; i++){
/* Out of 21 addr bits in io_addr, upper two correspond to
* Mapram-id. Strip them while returning the io-address
*/
io_addr = EV_GET_REG(swin+FCHIP_TLB_IO0+(i*8));
if (!(io_addr & 0x200000)) /* Not a valid entry */
continue;
ftlb_addr = EV_GET_REG(swin+FCHIP_TLB_EBUS0+(i*8));
if ((ftlb_addr & EBUS_PG_MASK) == phys_addr)
/* Lower IO page */
return IO_ADDR(io_addr, 0);
if(((ftlb_addr >> 32) & EBUS_PG_MASK) == phys_addr)
/* Higher IO page */
return (IO_ADDR(io_addr, 1));
}
return 0;
}
void
vmecc_fru_check(int slot, int padap)
{
int vadap = EVCFGINFO->ecfg_board[slot].eb_ioarr[padap].ioa_virtid;
vmeccerror_t *vmecc = &EVERROR->vmecc[vadap];
ulong vmecc_error = vmecc->error;
ulong ferror = EVERROR->fvmecc[vadap].error & FCHIP_ERROR_MASK;
char board[32];
/* Check for any Fchip originated Errors. Otherwise, it should just
* set some flags to inform VMECC to ignore data errors
*/
Expect_fci_par_err = Ignore_fci_par_err = 0;
fchip_fru_check(ferror, slot, padap);
if (vmecc_error & VMECC_ERROR_VMEBUS_TO ||
vmecc_error & VMECC_ERROR_VMEBUS_PIOW) {
fru_msg_vme(slot, padap, "Broken VME Bus or Bad Board");
}
/* PIOR path, VME -> VMECC -> F */
if (vmecc_error & VMECC_ERROR_VMEBUS_PIOR){
vme_ioaddr(vmecc->addrvme, slot, padap, board);
sprintf(fruline,"PIO Read from VME Board %s failed", board);
fru_msg_vme(slot, padap, fruline);
if (Expect_adap_data_err == padap)
Expect_adap_data_err = 0;
if (Expect_fci_par_err)
Expect_fci_par_err = 0;
}
/* Ignore VMECC_ERROR_FCIDB_TO since it gets set in a bogus way */
if (vmecc_error & VMECC_ERROR_VMEBUS_SLVP ){
/* VME did not get a response for a DMA request... */
if (ia_addrerr[slot] == padap){
fru_msg_vme(slot,padap,"Bad DMA request from a VME controller");
}
else
fru_msg_fci(slot,padap,"Problem in Flat Cable");
}
if ((vmecc_error & (VMECC_ERROR_VMEBUS_SLVP|VMECC_ERROR_FCI_PIOPAR)) &&
!Ignore_fci_par_err)
fru_msg_vme(slot, padap, "Problem in Flat Cable..");
if (Expect_fci_par_err)
fru_msg_fci(slot, padap, "F Got bad parity data from FCI");
}
void
io4hip_fru_check(int slot, int padap)
{
int vadap = EVCFGINFO->ecfg_board[slot].eb_ioarr[padap].ioa_virtid;
vmeccerror_t *vmecc = &EVERROR->vmecc[vadap];
ulong vmecc_error = vmecc->error;
ulong ferror = EVERROR->fvmecc[vadap].error & FCHIP_ERROR_MASK;
char board[32];
/* Check for any Fchip originated Errors. Otherwise, it should just
* set some flags to inform VMECC to ignore data errors
*/
Expect_fci_par_err = Ignore_fci_par_err = 0;
fchip_fru_check(ferror, slot, padap);
if (vmecc_error & VMECC_ERROR_VMEBUS_TO ||
vmecc_error & VMECC_ERROR_VMEBUS_PIOW) {
fru_msg_hip(slot, padap, "Broken HIPPI(vmecc) connection");
}
/* PIOR path, VME -> VMECC -> F */
if (vmecc_error & VMECC_ERROR_VMEBUS_PIOR){
vme_ioaddr(vmecc->addrvme, slot, padap, board);
sprintf(fruline,"PIO Read from HIPPI(vmecc) failed", board);
fru_msg_hip(slot, padap, fruline);
if (Expect_adap_data_err == padap)
Expect_adap_data_err = 0;
if (Expect_fci_par_err)
Expect_fci_par_err = 0;
}
/* Ignore VMECC_ERROR_FCIDB_TO since it gets set in a bogus way */
if ((vmecc_error & (VMECC_ERROR_VMEBUS_SLVP|VMECC_ERROR_FCI_PIOPAR)) &&
!Ignore_fci_par_err)
fru_msg_hip(slot, padap, "Bad parity data sent on FCI");
if (Expect_fci_par_err){
fru_msg_fci(slot, padap, "F Got bad parity data on FCI");
Expect_adap_data_err = 0;
}
}
void
epc_fru_check(int slot, int padap)
{
/* There is no error protection on the PBUS side at all */
/* All errors reported by epc are fatal */
/* We will check errors as two different kinds, parity and timeout. */
/* Parity errors could be found by IA and/or EPC */
int vadap = EVCFGINFO->ecfg_board[slot].eb_ioarr[padap].ioa_virtid;
ulong epc_error = EVERROR->epc[vadap].ibus_error;
if (epc_error == 0)
return;
ibus_par_err[slot] = EPC_IBUS_PAR(epc_error);
/* Error in Command/Data from IA to IOA */
switch(EPC_IERR_IA(epc_error)){
case 0 : break; /* No error if Zero */
case EPC_IERR_IA_PIOW_DATA:
case EPC_IERR_IA_DMAR_DATA:
if (Ignore_ibus_data_err[slot] == 0){ /* IA saw no error !!*/
if(ibus_par_err[slot] == 0)
fru_msg_epc(slot,padap,"Broken EPC data path");
else
fru_msg_ibus(slot,"EPC detected Bad Parity on IBus data");
}
break;
case EPC_IERR_IA_UNXPCTD_DMA:
/* EPC received Unexpected DMA Read resp */
if ((epc_error & EPC_DMA_RDRSP_TOUT) == 0)
fru_msg_io4(slot,"EPC Received unexpected DMA response");
break;
case EPC_IERR_IA_BAD_CMND:
if (Ignore_ibus_cmd_err[slot] == 0){ /* IA saw no error !!! */
if (ibus_par_err[slot] == 0)
fru_msg_epc(slot,padap,"Broken EPC command path");
else
fru_msg_ibus(slot,"EPC detected Bad Parity on IBus cmd");
}
break;
default:
fru_msg_epc(slot,padap,"Unexpected Error value from EPC");
} /* switch */
/*
* Non participation Errors observed by EPC are being ignored now..
* Need to find a way to make use of it.
*/
/* Errors in data/cmd sent by EPC */
switch(EPC_IERR_EPC(epc_error)){
case 0: break; /* No error if Zero */
case EPC_IERR_EPC_PIOR_CMND:
case EPC_IERR_EPC_DMAR_CMND:
case EPC_IERR_EPC_DMAW_CMND:
case EPC_IERR_EPC_INTR_CMND:
case EPC_IERR_EPC_PIOR_CMNDX: /* Cmd from some device on Pbus */
case EPC_IERR_EPC_DMAR_CMNDX: /* DMA rd/wr req from Pport */
case EPC_IERR_EPC_DMAW_CMNDX:
/*
* XXXXXXXXXXXXXXXXXXXXXXXX
* Not comfortable with this yet.
* Is it possible that EPC has command error bits set but
* no parity error is seen on Ibus and by IA??
* Is it possible that EPC and IBus see errors and not IA ??
* Is it possible that IA sees it and not EPC or IBus ??
*/
fru_msg_epc(slot,padap,"Sent commands with bad parity");
if (Expect_adap_cmd_err == padap)
Expect_adap_cmd_err = 0;
break;
case EPC_IERR_EPC_PIOR_DATA:
case EPC_IERR_EPC_DMAW_DATA:
case EPC_IERR_EPC_PIOR_DATAX: /* response from some device on Pbus */
case EPC_IERR_EPC_DMAW_DATAX: /* DMA write data from Pport */
/*
* XXXXXXXXXXXXXXXXXXXXXXXXXXX
* Similar questions as above case ???
*/
fru_msg_epc(slot,padap,"Data with bad parity from EPC");
if (Expect_adap_data_err == padap)
Expect_adap_data_err = 0;
break;
default:
fru_msg_epc(slot,padap,"Invalid Error in EPC Ibus error register");
break;
} /* switch */
}
void
fcg_fru_check(int slot, int padap)
{
int vadap = EVCFGINFO->ecfg_board[slot].eb_ioarr[padap].ioa_virtid;
ulong ferror = EVERROR->ffcg[vadap].error & FCHIP_ERROR_MASK;
/* Check for any Fchip originated Errors. Otherwise, it should just
* set some flags to inform VMECC to ignore data errors
*/
Expect_fci_par_err = Ignore_fci_par_err = 0;
fchip_fru_check(ferror, slot, padap);
if (Expect_fci_par_err){
fru_msg_fci(slot, padap, "F Got bad parity data on FCI");
Expect_adap_data_err = 0;
}
}
/*
* Analyze the Error information given out by the SCSI chips.
* This should be executed after io4_fru_check and epc_fru_check
*/
void
scsi_fru_check(int slot, int padap)
{
#if SABLE
/* SABLE does not support S1 chip emulation */
return;
#else
int vadap = EVCFGINFO->ecfg_board[slot].eb_ioarr[padap].ioa_virtid;
ulong s1_error = EVERROR->s1[vadap].ibus_error;
int sc_chnl;
if (s1_error == 0)
return;
sc_chnl = (s1_error >> 6) & 7; /* Bit0-> Chnl 0, .. bit2-> chnl 2 */
if (((s1_error & (S1_IERR_IN_DATA|S1_IERR_DMA_READ)) &&
!Ignore_ibus_data_err[slot]) ||
((s1_error & (S1_IERR_IN_CMD|S1_IERR_WRITE_REQ)) &&
!Ignore_ibus_cmd_err[slot]))
fru_msg_ibus(slot,"S1 received cmd/data with Bad parity");
if (s1_error & (S1_IERR_OUT_DATA)){
Expect_adap_data_err = 0;
fru_msg_s1(slot,padap,sc_chnl,"Sent Bad Parity data on IBus");
}
if (s1_error & S1_IERR_OUT_CMD) {
Expect_adap_cmd_err = 0;
fru_msg_s1(slot,padap,sc_chnl,"Sent Bad Parity command on IBus");
}
if ((s1_error & S1_IERR_SURPRISE) && !ia_ebus_timeout[slot])
fru_msg_io4(slot,"S1 received an unexpected DMA response");
if (s1_error & S1_IERR_PIO_READ){
Expect_adap_data_err = 0;
fru_msg_s1(slot,padap,sc_chnl,"Bad parity on PIO data");
}
#endif /* !SABLE */
}
void
adap_fru_check(int slot)
{
int padap;
int type;
evbrdinfo_t *eb = &(EVCFGINFO->ecfg_board[slot]);
for (padap = 1; padap < IO4_MAX_PADAPS; padap++) {
if (eb->eb_ioarr[padap].ioa_enable == 0)
continue;
type = eb->eb_ioarr[padap].ioa_type;
switch (type) {
case IO4_ADAP_VMECC:
vmecc_fru_check(slot, padap);
break;
case IO4_ADAP_HIPPI:
io4hip_fru_check(slot, padap);
break;
case IO4_ADAP_EPC:
epc_fru_check(slot, padap);
break;
case IO4_ADAP_FCG:
fcg_fru_check(slot, padap);
break;
case IO4_ADAP_SCSI:
case IO4_ADAP_SCIP:
scsi_fru_check(slot, padap);
break;
case IO4_ADAP_NULL:
break;
default:
bad_ioa_type(type, slot, padap);
}
}
}
#define BAD_DATA_FRIOA (IO4_IBUSERROR_DMAWDATA|IO4_IBUSERROR_PIORESPDATA)
void
io4_fru_check(int slot)
{
int padap;
evbrdinfo_t *eb = &(EVCFGINFO->ecfg_board[slot]);
int window = eb->eb_io.eb_winnum;
ulong ia_ierror = EVERROR->io4[window].ibus_error;
ulong ia_eerror = EVERROR->io4[window].ebus_error;
ulong ia_eerror2 = EVERROR->io4[window].ebus_error2;
ia_addrerr[slot] = 0;
Ignore_ibus_cmd_err[slot] = 0;
Ignore_ibus_data_err[slot] = 0;
padap = IO4_IBUSERROR_IOA(ia_ierror);
if (!padap)
padap = IO4_MAX_PADAPS;
if (ia_eerror & IO4_EBUSERROR_INVDIRTYCACHE)
fru_msg_ebus("IO4 Received Invalidate for dirty cache line");
if (ia_eerror & IO4_EBUSERROR_PIO)
fru_msg_sw("IO4 detected an illegal PIO");
if (ia_eerror & IO4_EBUSERROR_BADIOA)
fru_msg_sw("IO4 detected PIO to Non-existant IOA");
if (ia_eerror & IO4_EBUSERROR_MY_DATA_ERR){
if((ia_ierror & BAD_DATA_FRIOA) == 0)
fru_msg_io4(slot,"Bad IA/ID connection to Ebus");
}
if (ia_eerror & IO4_EBUSERROR_MY_ADDR_ERR){
/* Complicated Case...
* Could happen due to two reasons..
*
* 1- Broken software. IOA was programmed to
* do DMA from a bad address..OR mapram programmed incorrectly
* or 2nd level Map table got trashed...
*
* 2- Broken ASIC.. sent out a wrong DMA address
*
* I dont know how to handle first case... It may be possible
* to findout the IO address which caused DMA request to this
* Ebus address (assuming it came from FCG/VMECC via Fchip.
* But how to map the IO address to the driver ????
*
* Second case should become obvious once the adapter error is
* processed..
*
* For now just set a flag
*/
padap = (ia_eerror2 & IO4_EBUSERROR2_IOA_ID) >> 16;
ia_addrerr[slot] = padap;
}
if (ia_eerror & (IO4_EBUSERROR_TRANTIMEOUT|IO4_EBUSERROR_TIMEOUT)){
ia_ebus_timeout[slot] = 1;
if (ia_eerror & IO4_EBUSERROR_TRANTIMEOUT)
fru_msg_io4(slot,"IO4 timed out trying to access EBus");
if (ia_eerror & IO4_EBUSERROR_TIMEOUT)
fru_msg_ebus("IO4 got no response from some board(Memory??)");
}
if (ia_eerror & IO4_EBUSERROR_DATA_ERR)
/* Ignore all data errors down the line */
Ignore_data_error |= ( 1 << slot);
/* Processing IA Ibus error register contents */
if (ia_ierror & IO4_IBUSERROR_MAPRAM)
fru_msg_mapram(slot,"Parity Error in Data from Mapram");
/* Command sent from IA to IOA was flagged to have Bad parity */
if (ia_ierror & IO4_IBUS_CMDERR_TO_IOA){
/* Broken Ibus/IA. Replace IO4 */
fru_msg_io4(slot,"Broken IBus Connection to IA");
Ignore_ibus_cmd_err[slot] = 1;
}
if (ia_ierror & (IO4_IBUSERROR_IARESP|IO4_IBUSERROR_PIOWDATA)){
/* Bad data sent onto Ibus by IA.. Check if it came from Ebus */
if (Ignore_data_error == 0){
fru_msg_io4(slot,"Broken IBus Connection to IA");
}
Ignore_ibus_data_err[slot] = 1;
}
/*
* Some Command or Data sent from IOA to IA was found to have bad
* parity. Check if the adapter also says so. If adapter thinks
* everything is fine, we have a broken IBus. Otherwise handle
* the error from adapter perspective.
*/
if (ia_ierror & (IO4_IBUSERROR_PIORESPDATA|IO4_IBUSERROR_DMAWDATA))
/* Ensure that the Adapter 'padap' did send bad data to IA */
Expect_adap_data_err = padap;
if (ia_ierror & IO4_IBUS_CMDERR_FR_IOA)
Expect_adap_cmd_err = padap;
adap_fru_check(slot);
/* Some of the 'Expect*' variables set by IO4 should have been
* reset during adapter_fru processing. If they are not turned
* off, then the adapters did not see the errors which IO4 saw,
* indicating a broken path
*/
if (Expect_adap_data_err){
/* Adapter did not send down any bad parity data. So the
* problem should be in ID
*/
fru_msg_io4(slot,"ID chip got bad data, but NOT sent by padaps");
Expect_adap_data_err = 0;
}
if (Expect_adap_cmd_err){
/* Adapter did not get flagged by IBus that a command it sent
* was bad, but IA received bad command !!!!
*/
fru_msg_io4(slot,"IA chip got bad command, but NOT sent by padaps");
Expect_adap_cmd_err = 0;
}
if (ia_addrerr[slot]){
/* There was no related error bit set in any other adapter .
* eg.. DMA error/timeout. So this should be due to broken
* IA
*/
sprintf(fruline,"IA failed in Ebus read for data requested by %s",
ioa_name(eb->eb_ioarr[padap].ioa_type));
fru_msg_io4(slot,fruline);
ia_addrerr[slot] = 0;
}
}
/*****************************************************************************/
/*
* VMECC
* --------- |
* ---------| -- FCI
* ---------| |
* ---------| EPC S1 F
* | CC | | | |
* --------- --------------------- IBUS
* | |
* --------- --------- ---------
* | A + D |... | MA+MD |... | IA+ID |...
* --------- --------- ---------
* | | |
* ======================================================== EBUS
*
*/
/*
* The backplane check is done first. These error state are kept global thus
* to be used later on by other module's xxx_fru_check().
*/
void
ebus_fru_check(void)
{
int slot, pcpuid;
ulong mask, error, slotmask=0;
my_addr_err = my_data_err = 0;
/*
* only checks ADDR_ERR and DATA_ERR state on all cards.
*/
for (slot = 1, mask = 2; slot < EV_MAX_SLOTS; slot++, mask <<= 1) {
evbrdinfo_t *eb = &(EVCFGINFO->ecfg_board[slot]);
if (eb->eb_enabled == 0)
continue;
slotmask |= mask;
switch (eb->eb_type) {
case EVTYPE_IP19:
pcpuid = eb->eb_cpu.eb_cpunum;
error = EVERROR->ip[pcpuid].a_error;
if (error & A_ERROR_MY_ADDR_ERR)
my_addr_err |= mask;
if (error & (A_ERROR_CC2D_PARITY << pcpuid))
my_data_err |= mask;
for (pcpuid = 0; pcpuid < 4; pcpuid++) {
error = EVERROR->cpu[eb->eb_cpuarr[pcpuid].cpu_vpid].cc_ertoip;
if (error & CC_ERROR_MY_ADDR)
my_addr_err |= mask;
if (error & CC_ERROR_MY_DATA)
my_data_err |= mask;
}
break;
case EVTYPE_MC3:
error = EVERROR->mc3[eb->eb_mem.eb_mc3num].ebus_error;
if (error & MC3_EBUS_ERROR_SENDER_ADDR)
my_addr_err |= mask;
if (error & MC3_EBUS_ERROR_SENDER_DATA)
my_data_err |= mask;
break;
case EVTYPE_IO4:
error = EVERROR->io4[eb->eb_io.eb_winnum].ebus_error;
if (error & IO4_EBUSERROR_MY_ADDR_ERR)
my_addr_err |= mask;
if (error & IO4_EBUSERROR_MY_DATA_ERR)
my_data_err |= mask;
break;
default:
bad_board_type(eb->eb_type, slot);
}
}
}
void
everest_error_fru(eframe_t *ep)
{
int slot;
ev_perr(0, "\nEVEREST FRU Analysis:\n");
evfru_ep = ep;
/* Check for Address/Data Errors seen on Ebus. */
ebus_fru_check();
/*
* Next check each board for error within the board.
*/
for (slot = 1; slot < EV_MAX_SLOTS; slot++) {
evbrdinfo_t *eb = &(EVCFGINFO->ecfg_board[slot]);
if (eb->eb_enabled == 0)
continue;
switch (eb->eb_type) {
case EVTYPE_IP19:
cpu_fru_check(slot);
break;
case EVTYPE_MC3:
mc3_fru_check(slot);
break;
case EVTYPE_IO4:
io4_fru_check(slot);
break;
default:
bad_board_type(eb->eb_type, slot);
}
}
}
#endif /* NOT_USED */