ia64/xen-unstable
changeset 19503:1a7457bb1fdf
x86 mce: Small fix for polling/CMCI race conditions.
When CMCI happens very quickly, polling/CMCI processing path might
cross. For Intel CPUs which support CMCI, if the error bank has CMCI
capability, we'll disable poll on this bank.
Signed-off-by: Liping Ke <liping.ke@intel.com>
Signed-off-by: Yunhong Jiang<yunhong.jiang@intel.com>
When CMCI happens very quickly, polling/CMCI processing path might
cross. For Intel CPUs which support CMCI, if the error bank has CMCI
capability, we'll disable poll on this bank.
Signed-off-by: Liping Ke <liping.ke@intel.com>
Signed-off-by: Yunhong Jiang<yunhong.jiang@intel.com>
author | Keir Fraser <keir.fraser@citrix.com> |
---|---|
date | Mon Apr 06 13:46:11 2009 +0100 (2009-04-06) |
parents | 5a60eb7fad79 |
children | 5966b71195b4 |
files | xen/arch/x86/cpu/mcheck/mce.c xen/arch/x86/cpu/mcheck/mce.h xen/arch/x86/cpu/mcheck/mce_intel.c xen/arch/x86/cpu/mcheck/non-fatal.c |
line diff
1.1 --- a/xen/arch/x86/cpu/mcheck/mce.c Thu Apr 02 14:17:19 2009 +0100 1.2 +++ b/xen/arch/x86/cpu/mcheck/mce.c Mon Apr 06 13:46:11 2009 +0100 1.3 @@ -577,6 +577,7 @@ void mcheck_init(struct cpuinfo_x86 *c) 1.4 break; 1.5 } 1.6 1.7 + set_poll_bankmask(c); 1.8 if (!inited) 1.9 printk(XENLOG_INFO "CPU%i: No machine check initialization\n", 1.10 smp_processor_id()); 1.11 @@ -1230,7 +1231,19 @@ long do_mca(XEN_GUEST_HANDLE(xen_mc_t) u 1.12 1.13 return ret; 1.14 } 1.15 +void set_poll_bankmask(struct cpuinfo_x86 *c) 1.16 +{ 1.17 1.18 + if (cmci_support && !mce_disabled) { 1.19 + memcpy(&(__get_cpu_var(poll_bankmask)), 1.20 + &(__get_cpu_var(no_cmci_banks)), sizeof(cpu_banks_t)); 1.21 + } 1.22 + else { 1.23 + memcpy(&(get_cpu_var(poll_bankmask)), &mca_allbanks, sizeof(cpu_banks_t)); 1.24 + if (mce_firstbank(c)) 1.25 + clear_bit(0, get_cpu_var(poll_bankmask)); 1.26 + } 1.27 +} 1.28 void mc_panic(char *s) 1.29 { 1.30 console_start_sync();
2.1 --- a/xen/arch/x86/cpu/mcheck/mce.h Thu Apr 02 14:17:19 2009 +0100 2.2 +++ b/xen/arch/x86/cpu/mcheck/mce.h Mon Apr 06 13:46:11 2009 +0100 2.3 @@ -88,6 +88,10 @@ struct mca_summary { 2.4 }; 2.5 2.6 extern cpu_banks_t mca_allbanks; 2.7 +void set_poll_bankmask(struct cpuinfo_x86 *c); 2.8 +DECLARE_PER_CPU(cpu_banks_t, poll_bankmask); 2.9 +DECLARE_PER_CPU(cpu_banks_t, no_cmci_banks); 2.10 +extern int cmci_support; 2.11 2.12 extern mctelem_cookie_t mcheck_mca_logout(enum mca_source, cpu_banks_t, 2.13 struct mca_summary *);
3.1 --- a/xen/arch/x86/cpu/mcheck/mce_intel.c Thu Apr 02 14:17:19 2009 +0100 3.2 +++ b/xen/arch/x86/cpu/mcheck/mce_intel.c Mon Apr 06 13:46:11 2009 +0100 3.3 @@ -12,9 +12,10 @@ 3.4 #include "x86_mca.h" 3.5 3.6 DEFINE_PER_CPU(cpu_banks_t, mce_banks_owned); 3.7 +DEFINE_PER_CPU(cpu_banks_t, no_cmci_banks); 3.8 +int cmci_support = 0; 3.9 3.10 static int nr_intel_ext_msrs = 0; 3.11 -static int cmci_support = 0; 3.12 static int firstbank; 3.13 3.14 #ifdef CONFIG_X86_MCE_THERMAL 3.15 @@ -548,7 +549,6 @@ static void intel_machine_check(struct c 3.16 } 3.17 3.18 static DEFINE_SPINLOCK(cmci_discover_lock); 3.19 -static DEFINE_PER_CPU(cpu_banks_t, no_cmci_banks); 3.20 3.21 /* 3.22 * Discover bank sharing using the algorithm recommended in the SDM.
4.1 --- a/xen/arch/x86/cpu/mcheck/non-fatal.c Thu Apr 02 14:17:19 2009 +0100 4.2 +++ b/xen/arch/x86/cpu/mcheck/non-fatal.c Mon Apr 06 13:46:11 2009 +0100 4.3 @@ -22,7 +22,7 @@ 4.4 4.5 #include "mce.h" 4.6 4.7 -static cpu_banks_t bankmask; 4.8 +DEFINE_PER_CPU(cpu_banks_t, poll_bankmask); 4.9 static struct timer mce_timer; 4.10 4.11 #define MCE_PERIOD MILLISECS(8000) 4.12 @@ -39,7 +39,7 @@ static void mce_checkregs (void *info) 4.13 struct mca_summary bs; 4.14 static uint64_t dumpcount = 0; 4.15 4.16 - mctc = mcheck_mca_logout(MCA_POLLER, bankmask, &bs); 4.17 + mctc = mcheck_mca_logout(MCA_POLLER, __get_cpu_var(poll_bankmask), &bs); 4.18 4.19 if (bs.errcnt && mctc != NULL) { 4.20 adjust++; 4.21 @@ -94,10 +94,6 @@ static int __init init_nonfatal_mce_chec 4.22 if (!mce_available(c)) 4.23 return -ENODEV; 4.24 4.25 - memcpy(&bankmask, &mca_allbanks, sizeof (cpu_banks_t)); 4.26 - if (mce_firstbank(c) == 1) 4.27 - clear_bit(0, bankmask); 4.28 - 4.29 /* 4.30 * Check for non-fatal errors every MCE_RATE s 4.31 */