ia64/xen-unstable

changeset 19503:1a7457bb1fdf

x86 mce: Small fix for polling/CMCI race conditions.

When CMCI happens very quickly, polling/CMCI processing path might
cross. For Intel CPUs which support CMCI, if the error bank has CMCI
capability, we'll disable poll on this bank.

Signed-off-by: Liping Ke <liping.ke@intel.com>
Signed-off-by: Yunhong Jiang<yunhong.jiang@intel.com>
author Keir Fraser <keir.fraser@citrix.com>
date Mon Apr 06 13:46:11 2009 +0100 (2009-04-06)
parents 5a60eb7fad79
children 5966b71195b4
files xen/arch/x86/cpu/mcheck/mce.c xen/arch/x86/cpu/mcheck/mce.h xen/arch/x86/cpu/mcheck/mce_intel.c xen/arch/x86/cpu/mcheck/non-fatal.c
line diff
     1.1 --- a/xen/arch/x86/cpu/mcheck/mce.c	Thu Apr 02 14:17:19 2009 +0100
     1.2 +++ b/xen/arch/x86/cpu/mcheck/mce.c	Mon Apr 06 13:46:11 2009 +0100
     1.3 @@ -577,6 +577,7 @@ void mcheck_init(struct cpuinfo_x86 *c)
     1.4  		break;
     1.5  	}
     1.6  
     1.7 +    set_poll_bankmask(c);
     1.8  	if (!inited)
     1.9  		printk(XENLOG_INFO "CPU%i: No machine check initialization\n",
    1.10  		    smp_processor_id());
    1.11 @@ -1230,7 +1231,19 @@ long do_mca(XEN_GUEST_HANDLE(xen_mc_t) u
    1.12  
    1.13  	return ret;
    1.14  }
    1.15 +void set_poll_bankmask(struct cpuinfo_x86 *c)
    1.16 +{
    1.17  
    1.18 +    if (cmci_support && !mce_disabled) {
    1.19 +        memcpy(&(__get_cpu_var(poll_bankmask)),
    1.20 +                &(__get_cpu_var(no_cmci_banks)), sizeof(cpu_banks_t));
    1.21 +    }
    1.22 +    else {
    1.23 +        memcpy(&(get_cpu_var(poll_bankmask)), &mca_allbanks, sizeof(cpu_banks_t));
    1.24 +        if (mce_firstbank(c))
    1.25 +            clear_bit(0, get_cpu_var(poll_bankmask));
    1.26 +    }
    1.27 +}
    1.28  void mc_panic(char *s)
    1.29  {
    1.30      console_start_sync();
     2.1 --- a/xen/arch/x86/cpu/mcheck/mce.h	Thu Apr 02 14:17:19 2009 +0100
     2.2 +++ b/xen/arch/x86/cpu/mcheck/mce.h	Mon Apr 06 13:46:11 2009 +0100
     2.3 @@ -88,6 +88,10 @@ struct mca_summary {
     2.4  };
     2.5  
     2.6  extern cpu_banks_t mca_allbanks;
     2.7 +void set_poll_bankmask(struct cpuinfo_x86 *c);
     2.8 +DECLARE_PER_CPU(cpu_banks_t, poll_bankmask);
     2.9 +DECLARE_PER_CPU(cpu_banks_t, no_cmci_banks);
    2.10 +extern int cmci_support;
    2.11  
    2.12  extern mctelem_cookie_t mcheck_mca_logout(enum mca_source, cpu_banks_t,
    2.13      struct mca_summary *);
     3.1 --- a/xen/arch/x86/cpu/mcheck/mce_intel.c	Thu Apr 02 14:17:19 2009 +0100
     3.2 +++ b/xen/arch/x86/cpu/mcheck/mce_intel.c	Mon Apr 06 13:46:11 2009 +0100
     3.3 @@ -12,9 +12,10 @@
     3.4  #include "x86_mca.h"
     3.5  
     3.6  DEFINE_PER_CPU(cpu_banks_t, mce_banks_owned);
     3.7 +DEFINE_PER_CPU(cpu_banks_t, no_cmci_banks);
     3.8 +int cmci_support = 0;
     3.9  
    3.10  static int nr_intel_ext_msrs = 0;
    3.11 -static int cmci_support = 0;
    3.12  static int firstbank;
    3.13  
    3.14  #ifdef CONFIG_X86_MCE_THERMAL
    3.15 @@ -548,7 +549,6 @@ static void intel_machine_check(struct c
    3.16  }
    3.17  
    3.18  static DEFINE_SPINLOCK(cmci_discover_lock);
    3.19 -static DEFINE_PER_CPU(cpu_banks_t, no_cmci_banks);
    3.20  
    3.21  /*
    3.22   * Discover bank sharing using the algorithm recommended in the SDM.
     4.1 --- a/xen/arch/x86/cpu/mcheck/non-fatal.c	Thu Apr 02 14:17:19 2009 +0100
     4.2 +++ b/xen/arch/x86/cpu/mcheck/non-fatal.c	Mon Apr 06 13:46:11 2009 +0100
     4.3 @@ -22,7 +22,7 @@
     4.4  
     4.5  #include "mce.h"
     4.6  
     4.7 -static cpu_banks_t bankmask;
     4.8 +DEFINE_PER_CPU(cpu_banks_t, poll_bankmask);
     4.9  static struct timer mce_timer;
    4.10  
    4.11  #define MCE_PERIOD MILLISECS(8000)
    4.12 @@ -39,7 +39,7 @@ static void mce_checkregs (void *info)
    4.13  	struct mca_summary bs;
    4.14  	static uint64_t dumpcount = 0;
    4.15  
    4.16 -	mctc = mcheck_mca_logout(MCA_POLLER, bankmask, &bs);
    4.17 +	mctc = mcheck_mca_logout(MCA_POLLER, __get_cpu_var(poll_bankmask), &bs);
    4.18  
    4.19  	if (bs.errcnt && mctc != NULL) {
    4.20  		adjust++;
    4.21 @@ -94,10 +94,6 @@ static int __init init_nonfatal_mce_chec
    4.22  	if (!mce_available(c))
    4.23  		return -ENODEV;
    4.24  
    4.25 -	memcpy(&bankmask, &mca_allbanks, sizeof (cpu_banks_t));
    4.26 -	if (mce_firstbank(c) == 1)
    4.27 -		clear_bit(0, bankmask);
    4.28 -
    4.29  	/*
    4.30  	 * Check for non-fatal errors every MCE_RATE s
    4.31  	 */