direct-io.hg

changeset 11497:22e01a4864b0

[POWERPC][XEN] Machine Check Processing

This patch allows allows for a processor to report on the cause of the
machine check and possible even recover from it.

Signed-off-by: Jimi Xenidis <jimix@watson.ibm.com>
Signed-off-by: Hollis Blanchard <hollisb@us.ibm.com>
author Jimi Xenidis <jimix@watson.ibm.com>
date Thu Sep 07 01:48:42 2006 -0400 (2006-09-07)
parents a817acb39386
children 464acece0dad
files xen/arch/powerpc/exceptions.c xen/arch/powerpc/powerpc64/ppc970.c xen/include/asm-powerpc/powerpc64/ppc970-hid.h xen/include/asm-powerpc/processor.h
line diff
     1.1 --- a/xen/arch/powerpc/exceptions.c	Thu Sep 07 01:30:12 2006 -0400
     1.2 +++ b/xen/arch/powerpc/exceptions.c	Thu Sep 07 01:48:42 2006 -0400
     1.3 @@ -25,6 +25,7 @@
     1.4  #include <xen/serial.h>
     1.5  #include <xen/gdbstub.h>
     1.6  #include <asm/time.h>
     1.7 +#include <asm/processor.h>
     1.8  
     1.9  #undef DEBUG
    1.10  
    1.11 @@ -54,6 +55,8 @@ void do_dec(struct cpu_user_regs *regs)
    1.12  
    1.13  void program_exception(struct cpu_user_regs *regs, unsigned long cookie)
    1.14  {
    1.15 +    int recover = 0;
    1.16 +
    1.17  #ifdef CRASH_DEBUG
    1.18      __trap_to_gdb(regs, cookie);
    1.19  #else /* CRASH_DEBUG */
    1.20 @@ -62,6 +65,11 @@ void program_exception(struct cpu_user_r
    1.21      printk("hid4 0x%016lx\n", regs->hid4);
    1.22      printk("---[ backtrace ]---\n");
    1.23      show_backtrace(regs->gprs[1], regs->lr, regs->pc);
    1.24 -    panic("%s: 0x%lx\n", __func__, cookie);
    1.25 +
    1.26 +    if (cookie == 0x200)
    1.27 +        recover = cpu_machinecheck(regs);
    1.28 +
    1.29 +    if (!recover)
    1.30 +        panic("%s: 0x%lx\n", __func__, cookie);
    1.31  #endif /* CRASH_DEBUG */
    1.32  }
     2.1 --- a/xen/arch/powerpc/powerpc64/ppc970.c	Thu Sep 07 01:30:12 2006 -0400
     2.2 +++ b/xen/arch/powerpc/powerpc64/ppc970.c	Thu Sep 07 01:48:42 2006 -0400
     2.3 @@ -17,6 +17,7 @@
     2.4   *
     2.5   * Authors: Hollis Blanchard <hollisb@us.ibm.com>
     2.6   *          Jimi Xenidis <jimix@watson.ibm.com>
     2.7 + *          Amos Waterland  <apw@us.ibm.com>
     2.8   */
     2.9  
    2.10  #include <xen/config.h>
    2.11 @@ -30,6 +31,7 @@
    2.12  #include <asm/powerpc64/processor.h>
    2.13  #include <asm/powerpc64/ppc970-hid.h>
    2.14  
    2.15 +#undef DEBUG
    2.16  #undef SERIALIZE
    2.17  
    2.18  struct rma_settings {
    2.19 @@ -190,7 +192,7 @@ void cpu_initialize(int cpuid)
    2.20      hid5.bits.DCBZ32_ill = 0; /* make dzbz 32byte illeagal */
    2.21      mthid5(hid5.word);
    2.22  
    2.23 -#ifdef DUMP_HIDS
    2.24 +#ifdef DEBUG
    2.25      printk("hid0 0x%016lx\n"
    2.26             "hid1 0x%016lx\n"
    2.27             "hid4 0x%016lx\n"
    2.28 @@ -238,3 +240,73 @@ void load_cpu_sprs(struct vcpu *v)
    2.29  {
    2.30      mthid4(v->arch.cpu.hid4.word);
    2.31  }
    2.32 +
    2.33 +int cpu_machinecheck(struct cpu_user_regs *regs)
    2.34 +{
    2.35 +    int recover = 0;
    2.36 +    u32 dsisr = mfdsisr();
    2.37 +
    2.38 +    if (regs->msr & MCK_SRR1_RI)
    2.39 +        recover = 1;
    2.40 +
    2.41 +    printk("MACHINE CHECK: %s Recoverable\n", recover ? "IS": "NOT");
    2.42 +    printk("SRR1: 0x%016lx\n", regs->msr);
    2.43 +    if (regs->msr & MCK_SRR1_INSN_FETCH_UNIT)
    2.44 +        printk("42: Exception caused by Instruction Fetch Unit (IFU) "
    2.45 +               "detection of a hardware uncorrectable error (UE).\n");
    2.46 +
    2.47 +    if (regs->msr & MCK_SRR1_LOAD_STORE)
    2.48 +        printk("43: Exception caused by load/store detection of error "
    2.49 +               "(see DSISR)\n");
    2.50 +
    2.51 +    switch (regs->msr & MCK_SRR1_CAUSE_MASK) {
    2.52 +    case MCK_SRR1_CAUSE_SLB_PAR:
    2.53 +        printk("0b01: Exception caused by an SLB parity error detected "
    2.54 +               "while translating an instruction fetch address.\n");
    2.55 +        break;
    2.56 +    case MCK_SRR1_CAUSE_TLB_PAR:
    2.57 +        printk("0b10: Exception caused by a TLB parity error detected "
    2.58 +               "while translating an instruction fetch address.\n");
    2.59 +        break;
    2.60 +    case MCK_SRR1_CAUSE_UE:
    2.61 +        printk("0b11: Exception caused by a hardware uncorrectable "
    2.62 +               "error (UE) detected while doing a reload of an "
    2.63 +               "instruction-fetch TLB tablewalk.\n");
    2.64 +        break;
    2.65 +    default:
    2.66 +        break;
    2.67 +    }
    2.68 +
    2.69 +    printk("\nDSIDR: 0x%08x\n", dsisr);
    2.70 +    if (dsisr & MCK_DSISR_UE)
    2.71 +        printk("16: Exception caused by a UE deferred error "
    2.72 +               "(DAR is undefined).\n");
    2.73 +    
    2.74 +    if (dsisr & MCK_DSISR_UE_TABLE_WALK)
    2.75 +        printk("17: Exception caused by a UE deferred error "
    2.76 +               "during a tablewalk (D-side).\n"); 
    2.77 +
    2.78 +    if (dsisr & MCK_DSISR_L1_DCACHE_PAR)
    2.79 +        printk("18: Exception was caused by a software recoverable "
    2.80 +               "parity error in the L1 D-cache.\n");
    2.81 +
    2.82 +    if (dsisr & MCK_DSISR_L1_DCACHE_TAG_PAR)
    2.83 +        printk("19: Exception was caused by a software recoverable "
    2.84 +               "parity error in the L1 D-cache tag.\n");
    2.85 +
    2.86 +    if (dsisr & MCK_DSISR_D_ERAT_PAR)
    2.87 +        printk("20: Exception was caused by a software recoverable parity "
    2.88 +               "error in the D-ERAT.\n");
    2.89 +        
    2.90 +    if (dsisr & MCK_DSISR_TLB_PAR)
    2.91 +        printk("21: Exception was caused by a software recoverable parity "
    2.92 +               "error in the TLB.\n");
    2.93 +
    2.94 +    if (dsisr & MCK_DSISR_SLB_PAR)
    2.95 +        printk("23: Exception was caused by an SLB parity error (may not be "
    2.96 +               "recoverable). This condition could occur if the "
    2.97 +               "effective segment ID (ESID) fields of two or more SLB "
    2.98 +               "entries contain the same value.");
    2.99 +
   2.100 +    return 0; /* for now lets not recover; */
   2.101 +}
     3.1 --- a/xen/include/asm-powerpc/powerpc64/ppc970-hid.h	Thu Sep 07 01:30:12 2006 -0400
     3.2 +++ b/xen/include/asm-powerpc/powerpc64/ppc970-hid.h	Thu Sep 07 01:48:42 2006 -0400
     3.3 @@ -141,4 +141,21 @@ union hid5 {
     3.4      ulong word;
     3.5  };
     3.6  
     3.7 +#define MCK_SRR1_INSN_FETCH_UNIT    0x0000000000200000 /* 42 */
     3.8 +#define MCK_SRR1_LOAD_STORE         0x0000000000100000 /* 43 */
     3.9 +#define MCK_SRR1_CAUSE_MASK         0x00000000000c0000 /* 44:45 */
    3.10 +#define MCK_SRR1_CAUSE_NONE         0x0000000000000000 /* 0b00 */
    3.11 +#define MCK_SRR1_CAUSE_SLB_PAR      0x0000000000040000 /* 0b01 */
    3.12 +#define MCK_SRR1_CAUSE_TLB_PAR      0x0000000000080000 /* 0b10 */
    3.13 +#define MCK_SRR1_CAUSE_UE           0x00000000000c0000 /* 0b11 */
    3.14 +#define MCK_SRR1_RI                 MSR_RI
    3.15 +
    3.16 +#define MCK_DSISR_UE                0x00008000 /* 16 */
    3.17 +#define MCK_DSISR_UE_TABLE_WALK     0x00004000 /* 17 */
    3.18 +#define MCK_DSISR_L1_DCACHE_PAR     0x00002000 /* 18 */
    3.19 +#define MCK_DSISR_L1_DCACHE_TAG_PAR 0x00001000 /* 19 */
    3.20 +#define MCK_DSISR_D_ERAT_PAR        0x00000800 /* 20 */
    3.21 +#define MCK_DSISR_TLB_PAR           0x00000400 /* 21 */
    3.22 +#define MCK_DSISR_SLB_PAR           0x00000100 /* 23 */
    3.23 +
    3.24  #endif
     4.1 --- a/xen/include/asm-powerpc/processor.h	Thu Sep 07 01:30:12 2006 -0400
     4.2 +++ b/xen/include/asm-powerpc/processor.h	Thu Sep 07 01:48:42 2006 -0400
     4.3 @@ -37,6 +37,7 @@
     4.4  struct domain;
     4.5  struct vcpu;
     4.6  struct cpu_user_regs;
     4.7 +extern int cpu_machinecheck(struct cpu_user_regs *);
     4.8  extern void show_registers(struct cpu_user_regs *);
     4.9  extern void show_execution_state(struct cpu_user_regs *);
    4.10  extern void show_backtrace(ulong sp, ulong lr, ulong pc);