ia64/xen-unstable

changeset 19743:c23aeb37b17f

x86 mca: Support MCA recovery actions for latest Intel platforms

When an UC = 1 PCC = 0 MCE happens, there're more types of software
recoverable error defined on on latest Intel Platform. For identifying
those new errors, some new bit (S/AR bit) is defined in MCi_STATUS
register. Also we need MCACOD help's to judge the detailed
error. Combined with the OVER bit, different recovery policies are
required for containing those new errors.

SRAO error is an software recoverable MCA error, no recovery action
required. while SRAR is an software recoverable MCA error, recovery
action is required.

Signed-off-by: Liping Ke <liping.ke@intel.com>
Signed-off-by: Yunhong Jiang <yunhong.jiang@intel.com>
author Keir Fraser <keir.fraser@citrix.com>
date Tue Jun 16 10:58:56 2009 +0100 (2009-06-16)
parents 376c0749586e
children 2cbedd22149a
files xen/arch/x86/cpu/mcheck/amd_nonfatal.c xen/arch/x86/cpu/mcheck/mce.c xen/arch/x86/cpu/mcheck/mce.h xen/arch/x86/cpu/mcheck/mce_intel.c xen/arch/x86/cpu/mcheck/non-fatal.c xen/arch/x86/cpu/mcheck/x86_mca.h
line diff
     1.1 --- a/xen/arch/x86/cpu/mcheck/amd_nonfatal.c	Tue Jun 16 10:55:36 2009 +0100
     1.2 +++ b/xen/arch/x86/cpu/mcheck/amd_nonfatal.c	Tue Jun 16 10:58:56 2009 +0100
     1.3 @@ -86,7 +86,7 @@ void mce_amd_checkregs(void *info)
     1.4  	struct mca_summary bs;
     1.5  	unsigned int event_enabled;
     1.6  
     1.7 -	mctc = mcheck_mca_logout(MCA_POLLER, mca_allbanks, &bs);
     1.8 +	mctc = mcheck_mca_logout(MCA_POLLER, mca_allbanks, &bs, NULL);
     1.9  
    1.10  	event_enabled = guest_enabled_event(dom0->vcpu[0], VIRQ_MCA);
    1.11  
     2.1 --- a/xen/arch/x86/cpu/mcheck/mce.c	Tue Jun 16 10:55:36 2009 +0100
     2.2 +++ b/xen/arch/x86/cpu/mcheck/mce.c	Tue Jun 16 10:58:56 2009 +0100
     2.3 @@ -82,14 +82,41 @@ void x86_mce_callback_register(x86_mce_c
     2.4  	mc_callback_bank_extended = cbfunc;
     2.5  }
     2.6  
     2.7 +/* Machine check recoverable judgement callback handler 
     2.8 + * It is used to judge whether an UC error is recoverable by software
     2.9 + */
    2.10 +static mce_recoverable_t mc_recoverable_scan = NULL;
    2.11 +
    2.12 +void mce_recoverable_register(mce_recoverable_t cbfunc)
    2.13 +{
    2.14 +    mc_recoverable_scan = cbfunc;
    2.15 +}
    2.16 +
    2.17 +/* Judging whether to Clear Machine Check error bank callback handler
    2.18 + * According to Intel latest MCA OS Recovery Writer's Guide, 
    2.19 + * whether the error MCA bank needs to be cleared is decided by the mca_source
    2.20 + * and MCi_status bit value. 
    2.21 + */
    2.22 +static mce_need_clearbank_t mc_need_clearbank_scan = NULL;
    2.23 +
    2.24 +void mce_need_clearbank_register(mce_need_clearbank_t cbfunc)
    2.25 +{
    2.26 +    mc_need_clearbank_scan = cbfunc;
    2.27 +}
    2.28 +
    2.29  /* Utility function to perform MCA bank telemetry readout and to push that
    2.30   * telemetry towards an interested dom0 for logging and diagnosis.
    2.31   * The caller - #MC handler or MCA poll function - must arrange that we
    2.32   * do not migrate cpus. */
    2.33  
    2.34  /* XXFM Could add overflow counting? */
    2.35 +
    2.36 +/* Add out_param clear_bank for Machine Check Handler Caller.
    2.37 + * For Intel latest CPU, whether to clear the error bank status needs to
    2.38 + * be judged by the callback function defined above.
    2.39 + */
    2.40  mctelem_cookie_t mcheck_mca_logout(enum mca_source who, cpu_banks_t bankmask,
    2.41 -    struct mca_summary *sp)
    2.42 +    struct mca_summary *sp, cpu_banks_t* clear_bank)
    2.43  {
    2.44  	struct vcpu *v = current;
    2.45  	struct domain *d;
    2.46 @@ -98,7 +125,7 @@ mctelem_cookie_t mcheck_mca_logout(enum 
    2.47  	struct mcinfo_common *mic;
    2.48  	struct mcinfo_global *mig;	/* on stack */
    2.49  	mctelem_cookie_t mctc = NULL;
    2.50 -	uint32_t uc = 0, pcc = 0;
    2.51 +	uint32_t uc = 0, pcc = 0, recover, need_clear = 1 ;
    2.52  	struct mc_info *mci = NULL;
    2.53  	mctelem_class_t which = MC_URGENT;	/* XXXgcc */
    2.54  	unsigned int cpu_nr;
    2.55 @@ -150,6 +177,11 @@ mctelem_cookie_t mcheck_mca_logout(enum 
    2.56  	    &mcg.mc_coreid, &mcg.mc_core_threadid,
    2.57  	    &mcg.mc_apicid, NULL, NULL, NULL);
    2.58  
    2.59 +	/* If no mc_recovery_scan callback handler registered,
    2.60 +	 * this error is not recoverable
    2.61 +	 */
    2.62 +	recover = (mc_recoverable_scan)? 1: 0;
    2.63 +
    2.64  	for (i = 0; i < 32 && i < nr_mce_banks; i++) {
    2.65  		struct mcinfo_bank mcb;		/* on stack */
    2.66  
    2.67 @@ -161,6 +193,13 @@ mctelem_cookie_t mcheck_mca_logout(enum 
    2.68  		if (!(status & MCi_STATUS_VAL))
    2.69  			continue;	/* this bank has no valid telemetry */
    2.70  
    2.71 +		/* For Intel Latest CPU CMCI/MCE Handler caller, we need to
    2.72 +		 * decide whether to clear bank by MCi_STATUS bit value such as
    2.73 +		 * OVER/UC/EN/PCC/S/AR
    2.74 +		 */
    2.75 +		if ( mc_need_clearbank_scan )
    2.76 +			need_clear = mc_need_clearbank_scan(who, status);
    2.77 +
    2.78  		/* If this is the first bank with valid MCA DATA, then
    2.79  		 * try to reserve an entry from the urgent/nonurgent queue
    2.80  		 * depending on whethere we are called from an exception or
    2.81 @@ -187,6 +226,11 @@ mctelem_cookie_t mcheck_mca_logout(enum 
    2.82  		if ((status & MCi_STATUS_PCC) != 0)
    2.83  			pcc |= (1 << i);
    2.84  
    2.85 +		if (recover && uc)
    2.86 +		 /* uc = 1, recover = 1, we need not panic.
    2.87 +		  */
    2.88 +			recover = mc_recoverable_scan(status);
    2.89 +
    2.90  		addr = misc = 0;
    2.91  
    2.92  		if (status & MCi_STATUS_ADDRV) {
    2.93 @@ -221,9 +265,13 @@ mctelem_cookie_t mcheck_mca_logout(enum 
    2.94  			cbret = mc_callback_bank_extended(mci, i, status);
    2.95  		}
    2.96  
    2.97 -		if (who != MCA_MCE_SCAN)
    2.98 +		/* By default, need_clear = 1 */
    2.99 +		if (who != MCA_MCE_SCAN && need_clear)
   2.100  			/* Clear status */
   2.101  			mca_wrmsrl(MSR_IA32_MC0_STATUS + 4 * i, 0x0ULL);
   2.102 +		else if ( who == MCA_MCE_SCAN && need_clear)
   2.103 +			set_bit(i, clear_bank);
   2.104 +
   2.105  		wmb();
   2.106  	}
   2.107  
   2.108 @@ -245,6 +293,7 @@ mctelem_cookie_t mcheck_mca_logout(enum 
   2.109  		sp->eipv = (gstatus & MCG_STATUS_EIPV) != 0;
   2.110  		sp->uc = uc;
   2.111  		sp->pcc = pcc;
   2.112 +		sp->recoverable = recover;
   2.113  	}
   2.114  
   2.115  	return mci != NULL ? mctc : NULL;	/* may be NULL */
   2.116 @@ -296,7 +345,7 @@ void mcheck_cmn_handler(struct cpu_user_
   2.117  	 * for logging or dismiss the cookie that is returned, and must not
   2.118  	 * reference the cookie after that action.
   2.119  	 */
   2.120 -	mctc = mcheck_mca_logout(MCA_MCE_HANDLER, bankmask, &bs);
   2.121 +	mctc = mcheck_mca_logout(MCA_MCE_HANDLER, bankmask, &bs, NULL);
   2.122  	if (mctc != NULL)
   2.123  		mci = (struct mc_info *)mctelem_dataptr(mctc);
   2.124  
   2.125 @@ -606,7 +655,7 @@ static void __init mcheck_disable(char *
   2.126  
   2.127  static void __init mcheck_enable(char *str)
   2.128  {
   2.129 -	mce_disabled = -1;
   2.130 +	mce_disabled = 0;
   2.131  }
   2.132  
   2.133  custom_param("nomce", mcheck_disable);
     3.1 --- a/xen/arch/x86/cpu/mcheck/mce.h	Tue Jun 16 10:55:36 2009 +0100
     3.2 +++ b/xen/arch/x86/cpu/mcheck/mce.h	Tue Jun 16 10:58:56 2009 +0100
     3.3 @@ -42,6 +42,10 @@ extern void x86_mce_vector_register(x86_
     3.4   * via x86_mce_vector_register. */
     3.5  extern void mcheck_cmn_handler(struct cpu_user_regs *, long, cpu_banks_t);
     3.6  
     3.7 +/* Register a handler for judging whether mce is recoverable. */
     3.8 +typedef int (*mce_recoverable_t)(u64 status);
     3.9 +extern void mce_recoverable_register(mce_recoverable_t);
    3.10 +
    3.11  /* Read an MSR, checking for an interposed value first */
    3.12  extern struct intpose_ent *intpose_lookup(unsigned int, uint64_t,
    3.13      uint64_t *);
    3.14 @@ -86,6 +90,8 @@ struct mca_summary {
    3.15  	int		eipv;	/* meaningful on #MC */
    3.16  	uint32_t	uc;	/* bitmask of banks with UC */
    3.17  	uint32_t	pcc;	/* bitmask of banks with PCC */
    3.18 +	/* bitmask of banks with software error recovery ability*/
    3.19 +	uint32_t	recoverable; 
    3.20  };
    3.21  
    3.22  extern cpu_banks_t mca_allbanks;
    3.23 @@ -93,11 +99,12 @@ void set_poll_bankmask(struct cpuinfo_x8
    3.24  DECLARE_PER_CPU(cpu_banks_t, poll_bankmask);
    3.25  DECLARE_PER_CPU(cpu_banks_t, no_cmci_banks);
    3.26  extern int cmci_support;
    3.27 +extern int ser_support;
    3.28  extern int is_mc_panic;
    3.29  extern void mcheck_mca_clearbanks(cpu_banks_t);
    3.30  
    3.31  extern mctelem_cookie_t mcheck_mca_logout(enum mca_source, cpu_banks_t,
    3.32 -    struct mca_summary *);
    3.33 +    struct mca_summary *, cpu_banks_t*);
    3.34  
    3.35  /* Register a callback to be made during bank telemetry logout.
    3.36   * This callback is only available to those machine check handlers
    3.37 @@ -112,6 +119,11 @@ extern mctelem_cookie_t mcheck_mca_logou
    3.38   * the current MCA bank number we are reading telemetry from, and the
    3.39   * MCi_STATUS value for that bank.
    3.40   */
    3.41 +
    3.42 +/* Register a handler for judging whether the bank need to be cleared */
    3.43 +typedef int (*mce_need_clearbank_t)(enum mca_source who, u64 status);
    3.44 +extern void mce_need_clearbank_register(mce_need_clearbank_t);
    3.45 +
    3.46  typedef enum mca_extinfo (*x86_mce_callback_t)
    3.47      (struct mc_info *, uint16_t, uint64_t);
    3.48  extern void x86_mce_callback_register(x86_mce_callback_t);
     4.1 --- a/xen/arch/x86/cpu/mcheck/mce_intel.c	Tue Jun 16 10:55:36 2009 +0100
     4.2 +++ b/xen/arch/x86/cpu/mcheck/mce_intel.c	Tue Jun 16 10:58:56 2009 +0100
     4.3 @@ -5,7 +5,9 @@
     4.4  #include <xen/kernel.h>
     4.5  #include <xen/delay.h>
     4.6  #include <xen/smp.h>
     4.7 +#include <xen/mm.h>
     4.8  #include <asm/processor.h> 
     4.9 +#include <public/sysctl.h>
    4.10  #include <asm/system.h>
    4.11  #include <asm/msr.h>
    4.12  #include "mce.h"
    4.13 @@ -14,6 +16,7 @@
    4.14  DEFINE_PER_CPU(cpu_banks_t, mce_banks_owned);
    4.15  DEFINE_PER_CPU(cpu_banks_t, no_cmci_banks);
    4.16  int cmci_support = 0;
    4.17 +int ser_support = 0;
    4.18  
    4.19  static int nr_intel_ext_msrs = 0;
    4.20  static int firstbank;
    4.21 @@ -37,9 +40,11 @@ static struct mce_softirq_barrier mce_tr
    4.22  static DEFINE_SPINLOCK(mce_logout_lock);
    4.23  
    4.24  static atomic_t severity_cpu = ATOMIC_INIT(-1);
    4.25 +static atomic_t found_error = ATOMIC_INIT(0);
    4.26  
    4.27  static void mce_barrier_enter(struct mce_softirq_barrier *);
    4.28  static void mce_barrier_exit(struct mce_softirq_barrier *);
    4.29 +static int mce_barrier_last(struct mce_softirq_barrier *);
    4.30  
    4.31  #ifdef CONFIG_X86_MCE_THERMAL
    4.32  static void unexpected_thermal_interrupt(struct cpu_user_regs *regs)
    4.33 @@ -261,6 +266,44 @@ static int fill_vmsr_data(int cpu, struc
    4.34      return 0;
    4.35  }
    4.36  
    4.37 +void intel_UCR_handler(struct mcinfo_bank *bank,
    4.38 +             struct mcinfo_global *global,
    4.39 +             struct mcinfo_extended *extension,
    4.40 +             struct mca_handle_result *result)
    4.41 +{
    4.42 +    struct domain *d;
    4.43 +    unsigned long mfn;
    4.44 +    uint32_t status;
    4.45 +
    4.46 +    printk(KERN_DEBUG "MCE: Enter EWB UCR recovery action\n");
    4.47 +    result->result = MCA_NEED_RESET;
    4.48 +    if (bank->mc_addr != 0) {
    4.49 +         mfn = bank->mc_addr >> PAGE_SHIFT;
    4.50 +         if (!offline_page(mfn, 1, &status)) {
    4.51 +              if (status & PG_OFFLINE_OFFLINED)
    4.52 +                  result->result = MCA_RECOVERED;
    4.53 +              else if (status & PG_OFFLINE_PENDING) {
    4.54 +                 /* This page has owner */
    4.55 +                  if (status & PG_OFFLINE_OWNED) {
    4.56 +                      result->result |= MCA_OWNER;
    4.57 +                      result->owner = status >> PG_OFFLINE_OWNER_SHIFT;
    4.58 +                      printk(KERN_DEBUG "MCE: This error page is ownded"
    4.59 +                                  " by DOM %d\n", result->owner);
    4.60 +                      if (result->owner != 0 && result->owner != DOMID_XEN) {
    4.61 +                          d = get_domain_by_id(result->owner);
    4.62 +                          domain_crash(d);
    4.63 +                          result->result = MCA_RECOVERED;
    4.64 +                      }
    4.65 +                  }
    4.66 +              }
    4.67 +         }
    4.68 +    }
    4.69 +}
    4.70 +
    4.71 +#define INTEL_MAX_RECOVERY 2
    4.72 +struct mca_error_handler intel_recovery_handler[INTEL_MAX_RECOVERY] =
    4.73 +            {{0x017A, intel_UCR_handler}, {0x00C0, intel_UCR_handler}};
    4.74 +
    4.75  /*
    4.76   * Called from mctelem_process_deferred. Return 1 if the telemetry
    4.77   * should be committed for dom0 consumption, 0 if it should be
    4.78 @@ -269,9 +312,11 @@ static int fill_vmsr_data(int cpu, struc
    4.79  static int mce_action(unsigned int cpu, mctelem_cookie_t mctc)
    4.80  {
    4.81      struct mc_info *local_mi;
    4.82 +    uint32_t i;
    4.83      struct mcinfo_common *mic = NULL;
    4.84      struct mcinfo_global *mc_global;
    4.85      struct mcinfo_bank *mc_bank;
    4.86 +    struct mca_handle_result mca_res;
    4.87  
    4.88      local_mi = (struct mc_info*)mctelem_dataptr(mctc);
    4.89      x86_mcinfo_lookup(mic, local_mi, MC_TYPE_GLOBAL);
    4.90 @@ -294,9 +339,45 @@ static int mce_action(unsigned int cpu, 
    4.91          if (fill_vmsr_data(cpu, mc_bank, mc_global->mc_gstatus) == -1)
    4.92               break;
    4.93  
    4.94 -       /* TODO: Add recovery actions here, such as page-offline, etc */
    4.95 +        /* TODO: Add recovery actions here, such as page-offline, etc */
    4.96 +        memset(&mca_res, 0x0f, sizeof(mca_res));
    4.97 +        for ( i = 0; i < INTEL_MAX_RECOVERY; i++ ) {
    4.98 +            if ( (mc_bank->mc_status & 0xffff) == 
    4.99 +                        intel_recovery_handler[i].mca_code ) {
   4.100 +                /* For SRAR, OVER = 1 should have caused reset
   4.101 +                 * For SRAO, OVER = 1 skip recovery action, continue execution
   4.102 +                 */
   4.103 +                if (!(mc_bank->mc_status & MCi_STATUS_OVER))
   4.104 +                    intel_recovery_handler[i].recovery_handler
   4.105 +                                (mc_bank, mc_global, NULL, &mca_res);
   4.106 +                else {
   4.107 +                   if (!mc_global->mc_gstatus & MCG_STATUS_RIPV)
   4.108 +                       mca_res.result = MCA_NEED_RESET;
   4.109 +                   else
   4.110 +                       mca_res.result = MCA_NO_ACTION; 
   4.111 +                }
   4.112 +                if (mca_res.result & MCA_OWNER)
   4.113 +                    mc_bank->mc_domid = mca_res.owner;
   4.114 +                if (mca_res.result == MCA_NEED_RESET)
   4.115 +                    /* DOMID_XEN*/
   4.116 +                    mc_panic("MCE: Software recovery failed for the UCR "
   4.117 +                                "error\n");
   4.118 +                else if (mca_res.result == MCA_RECOVERED)
   4.119 +                    printk(KERN_DEBUG "MCE: The UCR error is succesfully "
   4.120 +                                "recovered by software!\n");
   4.121 +                else if (mca_res.result == MCA_NO_ACTION)
   4.122 +                    printk(KERN_DEBUG "MCE: Overwrite SRAO error can't execute "
   4.123 +                                "recover action, RIPV=1, let it be.\n");
   4.124 +                break;
   4.125 +            }
   4.126 +            /* For SRAR, no defined recovery action should have caused reset
   4.127 +             * in MCA Handler
   4.128 +             */
   4.129 +            if ( i >= INTEL_MAX_RECOVERY )
   4.130 +                printk(KERN_DEBUG "MCE: No software recovery action found for "
   4.131 +                                "this SRAO error\n");
   4.132 +        }
   4.133      }
   4.134 -
   4.135      return 1;
   4.136  }
   4.137  
   4.138 @@ -468,21 +549,35 @@ static void mce_barrier_exit(struct mce_
   4.139        }
   4.140  }
   4.141  
   4.142 +static int mce_barrier_last(struct mce_softirq_barrier *bar)
   4.143 +{
   4.144 +    int gen = atomic_read(&bar->ingen);
   4.145 +    if ( atomic_read(&bar->ingen) == gen &&
   4.146 +        atomic_read(&bar->val) == 1 ) {
   4.147 +        return 1;
   4.148 +    }
   4.149 +    return 0;
   4.150 +}
   4.151 +
   4.152 +#if 0
   4.153  static void mce_barrier(struct mce_softirq_barrier *bar)
   4.154  {
   4.155        mce_barrier_enter(bar);
   4.156        mce_barrier_exit(bar);
   4.157  }
   4.158 +#endif
   4.159  
   4.160  static void intel_machine_check(struct cpu_user_regs * regs, long error_code)
   4.161  {
   4.162      uint64_t gstatus;
   4.163      mctelem_cookie_t mctc = NULL;
   4.164      struct mca_summary bs;
   4.165 +    cpu_banks_t clear_bank; 
   4.166  
   4.167      mce_spin_lock(&mce_logout_lock);
   4.168  
   4.169 -    mctc = mcheck_mca_logout(MCA_MCE_SCAN, mca_allbanks, &bs);
   4.170 +    memset( &clear_bank, 0x0, sizeof(cpu_banks_t));
   4.171 +    mctc = mcheck_mca_logout(MCA_MCE_SCAN, mca_allbanks, &bs, &clear_bank);
   4.172  
   4.173      if (bs.errcnt) {
   4.174          /*
   4.175 @@ -493,28 +588,47 @@ static void intel_machine_check(struct c
   4.176              if (mctc != NULL)
   4.177                  mctelem_defer(mctc);
   4.178              /*
   4.179 -             * For PCC=1, context is lost, so reboot now without clearing
   4.180 -             * the banks, and deal with the telemetry after reboot
   4.181 +             * For PCC=1 and can't be recovered, context is lost, so reboot now without
   4.182 +             * clearing  the banks, and deal with the telemetry after reboot
   4.183               * (the MSRs are sticky)
   4.184               */
   4.185              if (bs.pcc)
   4.186                  mc_panic("State lost due to machine check exception.\n");
   4.187 +            if (!bs.ripv)
   4.188 +                mc_panic("RIPV =0 can't resume execution!\n");
   4.189 +            if (!bs.recoverable)
   4.190 +                mc_panic("Machine check exception software recovery fail.\n");
   4.191          } else {
   4.192              if (mctc != NULL)
   4.193                  mctelem_commit(mctc);
   4.194          }
   4.195 -        mcheck_mca_clearbanks(mca_allbanks);
   4.196 +        atomic_set(&found_error, 1);
   4.197 +
   4.198 +        printk(KERN_DEBUG "MCE: clear_bank map %lx\n", 
   4.199 +                *((unsigned long*)clear_bank));
   4.200 +        mcheck_mca_clearbanks(clear_bank);
   4.201 +
   4.202      } else {
   4.203          if (mctc != NULL)
   4.204              mctelem_dismiss(mctc);
   4.205      }
   4.206 -
   4.207      mce_spin_unlock(&mce_logout_lock);
   4.208  
   4.209      /*
   4.210       * Wait until everybody has processed the trap.
   4.211       */
   4.212 -    mce_barrier(&mce_trap_bar);
   4.213 +    mce_barrier_enter(&mce_trap_bar);
   4.214 +    /* According to latest MCA OS writer guide, if no error bank found
   4.215 +     * on all cpus, something unexpected happening, we can't do any 
   4.216 +     * recovery job but to reset the system.
   4.217 +     */
   4.218 +    if (atomic_read(&found_error) == 0)
   4.219 +        mc_panic("Unexpected condition for the MCE handler, need reset\n");
   4.220 +    if (mce_barrier_last(&mce_trap_bar)) {
   4.221 +        printk(KERN_DEBUG "Choose one CPU to clear error finding flag\n ");
   4.222 +        atomic_set(&found_error, 0);
   4.223 +    }
   4.224 +    mce_barrier_exit(&mce_trap_bar);
   4.225  
   4.226      /*
   4.227       * Clear MCIP if it wasn't already. There is a small
   4.228 @@ -532,6 +646,90 @@ static void intel_machine_check(struct c
   4.229      raise_softirq(MACHINE_CHECK_SOFTIRQ);
   4.230  }
   4.231  
   4.232 +/* According to MCA OS writer guide, CMCI handler need to clear bank when
   4.233 + * 1) CE (UC = 0)
   4.234 + * 2) ser_support = 1, Superious error, OVER = 0, EN = 0, [UC = 1]
   4.235 + * 3) ser_support = 1, UCNA, OVER = 0, S = 1, AR = 0, PCC = 0, [UC = 1, EN = 1]
   4.236 + * MCA handler need to clear bank when
   4.237 + * 1) ser_support = 1, Superious error, OVER = 0, EN = 0, UC = 1
   4.238 + * 2) ser_support = 1, SRAR, UC = 1, OVER = 0, S = 1, AR = 1, [EN = 1]
   4.239 + * 3) ser_support = 1, SRAO, UC = 1, S = 1, AR = 0, [EN = 1]
   4.240 +*/
   4.241 +
   4.242 +static int intel_need_clearbank_scan(enum mca_source who, u64 status)
   4.243 +{
   4.244 +    if ( who == MCA_CMCI_HANDLER) {
   4.245 +        /* CMCI need clear bank */
   4.246 +        if ( !(status & MCi_STATUS_UC) )
   4.247 +            return 1;
   4.248 +        /* Spurious need clear bank */
   4.249 +        else if ( ser_support && !(status & MCi_STATUS_OVER)
   4.250 +                    && !(status & MCi_STATUS_EN) )
   4.251 +            return 1;
   4.252 +        /* UCNA OVER = 0 need clear bank */
   4.253 +        else if ( ser_support && !(status & MCi_STATUS_OVER) 
   4.254 +                    && !(status & MCi_STATUS_PCC) && !(status & MCi_STATUS_S) 
   4.255 +                    && !(status & MCi_STATUS_AR))
   4.256 +            return 1;
   4.257 +        /* Only Log, no clear */
   4.258 +        else return 0;
   4.259 +    }
   4.260 +    else if ( who == MCA_MCE_SCAN) {
   4.261 +        /* Spurious need clear bank */
   4.262 +        if ( ser_support && !(status & MCi_STATUS_OVER)
   4.263 +                    && (status & MCi_STATUS_UC) && !(status & MCi_STATUS_EN))
   4.264 +            return 1;
   4.265 +        /* SRAR OVER=0 clear bank. OVER = 1 have caused reset */
   4.266 +        else if ( ser_support && (status & MCi_STATUS_UC)
   4.267 +                    && (status & MCi_STATUS_S) && (status & MCi_STATUS_AR )
   4.268 +                    && (status & MCi_STATUS_OVER) )
   4.269 +            return 1;
   4.270 +        /* SRAO need clear bank */
   4.271 +        else if ( ser_support && !(status & MCi_STATUS_AR) 
   4.272 +                    && (status & MCi_STATUS_S) && (status & MCi_STATUS_UC))
   4.273 +            return 1; 
   4.274 +        else
   4.275 +            return 0;
   4.276 +    }
   4.277 +
   4.278 +    return 1;
   4.279 +}
   4.280 +
   4.281 +/* MCE continues/is recoverable when 
   4.282 + * 1) CE UC = 0
   4.283 + * 2) Supious ser_support = 1, OVER = 0, En = 0 [UC = 1]
   4.284 + * 3) SRAR ser_support = 1, OVER = 0, PCC = 0, S = 1, AR = 1 [UC =1, EN = 1]
   4.285 + * 4) SRAO ser_support = 1, PCC = 0, S = 1, AR = 0, EN = 1 [UC = 1]
   4.286 + * 5) UCNA ser_support = 1, OVER = 0, EN = 1, PCC = 0, S = 0, AR = 0, [UC = 1]
   4.287 + */
   4.288 +static int intel_recoverable_scan(u64 status)
   4.289 +{
   4.290 +
   4.291 +    if ( !(status & MCi_STATUS_UC ) )
   4.292 +        return 1;
   4.293 +    else if ( ser_support && !(status & MCi_STATUS_EN) 
   4.294 +                && !(status & MCi_STATUS_OVER) )
   4.295 +        return 1;
   4.296 +    /* SRAR error */
   4.297 +    else if ( ser_support && !(status & MCi_STATUS_OVER) 
   4.298 +                && !(status & MCi_STATUS_PCC) && (status & MCi_STATUS_S)
   4.299 +                && (status & MCi_STATUS_AR) ) {
   4.300 +        printk(KERN_DEBUG "MCE: No SRAR error defined currently.\n");
   4.301 +        return 0;
   4.302 +    }
   4.303 +    /* SRAO error */
   4.304 +    else if (ser_support && !(status & MCi_STATUS_PCC)
   4.305 +                && (status & MCi_STATUS_S) && !(status & MCi_STATUS_AR)
   4.306 +                && (status & MCi_STATUS_EN))
   4.307 +        return 1;
   4.308 +    /* UCNA error */
   4.309 +    else if (ser_support && !(status & MCi_STATUS_OVER)
   4.310 +                && (status & MCi_STATUS_EN) && !(status & MCi_STATUS_PCC)
   4.311 +                && !(status & MCi_STATUS_S) && !(status & MCi_STATUS_AR))
   4.312 +        return 1;
   4.313 +    return 0;
   4.314 +}
   4.315 +
   4.316  static DEFINE_SPINLOCK(cmci_discover_lock);
   4.317  
   4.318  /*
   4.319 @@ -586,7 +784,7 @@ static void cmci_discover(void)
   4.320       */
   4.321  
   4.322      mctc = mcheck_mca_logout(
   4.323 -        MCA_CMCI_HANDLER, __get_cpu_var(mce_banks_owned), &bs);
   4.324 +        MCA_CMCI_HANDLER, __get_cpu_var(mce_banks_owned), &bs, NULL);
   4.325  
   4.326      if (bs.errcnt && mctc != NULL) {
   4.327          if (guest_enabled_event(dom0->vcpu[0], VIRQ_MCA)) {
   4.328 @@ -700,7 +898,7 @@ fastcall void smp_cmci_interrupt(struct 
   4.329      irq_enter();
   4.330  
   4.331      mctc = mcheck_mca_logout(
   4.332 -        MCA_CMCI_HANDLER, __get_cpu_var(mce_banks_owned), &bs);
   4.333 +        MCA_CMCI_HANDLER, __get_cpu_var(mce_banks_owned), &bs, NULL);
   4.334  
   4.335      if (bs.errcnt && mctc != NULL) {
   4.336          if (guest_enabled_event(dom0->vcpu[0], VIRQ_MCA)) {
   4.337 @@ -738,6 +936,10 @@ static void mce_cap_init(struct cpuinfo_
   4.338      if ((l & MCG_CMCI_P) && cpu_has_apic)
   4.339          cmci_support = 1;
   4.340  
   4.341 +    /* Support Software Error Recovery */
   4.342 +    if (l & MCG_SER_P)
   4.343 +        ser_support = 1;
   4.344 +
   4.345      nr_mce_banks = l & MCG_CAP_COUNT;
   4.346      if (nr_mce_banks > MAX_NR_BANKS)
   4.347      {
   4.348 @@ -770,7 +972,7 @@ static void mce_init(void)
   4.349      /* log the machine checks left over from the previous reset.
   4.350       * This also clears all registers*/
   4.351  
   4.352 -    mctc = mcheck_mca_logout(MCA_RESET, mca_allbanks, &bs);
   4.353 +    mctc = mcheck_mca_logout(MCA_RESET, mca_allbanks, &bs, NULL);
   4.354  
   4.355      /* in the boot up stage, don't inject to DOM0, but print out */
   4.356      if (bs.errcnt && mctc != NULL) {
   4.357 @@ -810,6 +1012,8 @@ int intel_mcheck_init(struct cpuinfo_x86
   4.358      /* machine check is available */
   4.359      x86_mce_vector_register(intel_machine_check);
   4.360      x86_mce_callback_register(intel_get_extended_msrs);
   4.361 +    mce_recoverable_register(intel_recoverable_scan);
   4.362 +    mce_need_clearbank_register(intel_need_clearbank_scan);
   4.363  
   4.364      mce_init();
   4.365      mce_intel_feature_init(c);
     5.1 --- a/xen/arch/x86/cpu/mcheck/non-fatal.c	Tue Jun 16 10:55:36 2009 +0100
     5.2 +++ b/xen/arch/x86/cpu/mcheck/non-fatal.c	Tue Jun 16 10:58:56 2009 +0100
     5.3 @@ -39,7 +39,7 @@ static void mce_checkregs (void *info)
     5.4  	struct mca_summary bs;
     5.5  	static uint64_t dumpcount = 0;
     5.6  
     5.7 -	mctc = mcheck_mca_logout(MCA_POLLER, __get_cpu_var(poll_bankmask), &bs);
     5.8 +	mctc = mcheck_mca_logout(MCA_POLLER, __get_cpu_var(poll_bankmask), &bs, NULL);
     5.9  
    5.10  	if (bs.errcnt && mctc != NULL) {
    5.11  		adjust++;
     6.1 --- a/xen/arch/x86/cpu/mcheck/x86_mca.h	Tue Jun 16 10:55:36 2009 +0100
     6.2 +++ b/xen/arch/x86/cpu/mcheck/x86_mca.h	Tue Jun 16 10:58:56 2009 +0100
     6.3 @@ -30,6 +30,7 @@
     6.4  
     6.5  
     6.6  /* Bitfield of the MSR_IA32_MCG_CAP register */
     6.7 +#define MCG_SER_P               (1UL<<24)
     6.8  #define MCG_CAP_COUNT           0x00000000000000ffULL
     6.9  #define MCG_CTL_P               0x0000000000000100ULL
    6.10  #define MCG_EXT_P		(1UL<<9)
    6.11 @@ -50,6 +51,10 @@
    6.12  #define MCi_STATUS_MSEC         0x00000000ffff0000ULL
    6.13  /* Other information */
    6.14  #define MCi_STATUS_OTHER        0x01ffffff00000000ULL
    6.15 +/* Action Required flag */
    6.16 +#define MCi_STATUS_AR           0x0080000000000000ULL
    6.17 +/* Signaling flag */
    6.18 +#define MCi_STATUS_S            0x0100000000000000ULL
    6.19  /* processor context corrupt */
    6.20  #define MCi_STATUS_PCC          0x0200000000000000ULL
    6.21  /* MSR_K8_MCi_ADDR register valid */
    6.22 @@ -105,8 +110,8 @@ DECLARE_PER_CPU(cpu_banks_t, mce_banks_o
    6.23  #define MCA_OWNER (0x1 < 1)
    6.24  /* MCA error can't be recovered and need reset */
    6.25  #define MCA_NEED_RESET (0x1 < 2)
    6.26 -/* MCA error need further actions in softIRQ context for recovery */
    6.27 -#define MCA_MORE_ACTION (0x1 < 3)
    6.28 +/* MCA error did not have any action yet */
    6.29 +#define MCA_NO_ACTION (0x1 < 3)
    6.30  
    6.31  struct mca_handle_result
    6.32  {