ia64/linux-2.6.18-xen.hg
changeset 902:75e5bfa7fbdc
x86: add MCA logging support in DOM0
When an MCE/CMCI error happens (or by polling), the related error
information will be sent to DOM0 by XEN. This patch will help to fetch
the xen-logged information by hypercall and then convert XEN-format
log into Linux format MCELOG. It makes using current available mcelog
tools for native Linux possible.
With this patch, after mce/cmci error log information is sent to DOM0,
running mcelog tools in DOM0, you will get same detailed decoded mce
information as in Native Linux.
Signed-Off-By: Liping Ke <liping.ke@intel.com>
Signed-Off-By: Yunhong Jiang <yunhong.jiang@intel.com>
Acked-By: Jan Beulich <jbeulich@novell.com>
When an MCE/CMCI error happens (or by polling), the related error
information will be sent to DOM0 by XEN. This patch will help to fetch
the xen-logged information by hypercall and then convert XEN-format
log into Linux format MCELOG. It makes using current available mcelog
tools for native Linux possible.
With this patch, after mce/cmci error log information is sent to DOM0,
running mcelog tools in DOM0, you will get same detailed decoded mce
information as in Native Linux.
Signed-Off-By: Liping Ke <liping.ke@intel.com>
Signed-Off-By: Yunhong Jiang <yunhong.jiang@intel.com>
Acked-By: Jan Beulich <jbeulich@novell.com>
author | Keir Fraser <keir.fraser@citrix.com> |
---|---|
date | Tue Jun 16 11:58:55 2009 +0100 (2009-06-16) |
parents | 9242c5b965c1 |
children | e4790de3234a |
files | arch/x86_64/Kconfig arch/x86_64/kernel/Makefile arch/x86_64/kernel/entry-xen.S arch/x86_64/kernel/mce.c arch/x86_64/kernel/mce_dom0.c include/asm-x86_64/mach-xen/asm/hypercall.h |
line diff
1.1 --- a/arch/x86_64/Kconfig Tue Jun 16 11:09:39 2009 +0100 1.2 +++ b/arch/x86_64/Kconfig Tue Jun 16 11:58:55 2009 +0100 1.3 @@ -471,8 +471,8 @@ config SWIOTLB 1.4 bool 1.5 1.6 config X86_MCE 1.7 - bool "Machine check support" if EMBEDDED 1.8 - depends on !X86_64_XEN 1.9 + bool "Machine check support" 1.10 + depends on (!XEN_UNPRIVILEGED_GUEST) 1.11 default y 1.12 help 1.13 Include a machine check error handler to report hardware errors. 1.14 @@ -482,7 +482,7 @@ config X86_MCE 1.15 1.16 config X86_MCE_INTEL 1.17 bool "Intel MCE features" 1.18 - depends on X86_MCE && X86_LOCAL_APIC 1.19 + depends on X86_MCE && X86_LOCAL_APIC && !X86_64_XEN 1.20 default y 1.21 help 1.22 Additional support for intel specific MCE features such as 1.23 @@ -490,12 +490,16 @@ config X86_MCE_INTEL 1.24 1.25 config X86_MCE_AMD 1.26 bool "AMD MCE features" 1.27 - depends on X86_MCE && X86_LOCAL_APIC 1.28 + depends on X86_MCE && X86_LOCAL_APIC && !X86_64_XEN 1.29 default y 1.30 help 1.31 Additional support for AMD specific MCE features such as 1.32 the DRAM Error Threshold. 1.33 1.34 +config X86_XEN_MCE 1.35 + def_bool y 1.36 + depends on X86_64_XEN && X86_MCE 1.37 + 1.38 config KEXEC 1.39 bool "kexec system call (EXPERIMENTAL)" 1.40 depends on EXPERIMENTAL && !XEN_UNPRIVILEGED_GUEST
2.1 --- a/arch/x86_64/kernel/Makefile Tue Jun 16 11:09:39 2009 +0100 2.2 +++ b/arch/x86_64/kernel/Makefile Tue Jun 16 11:58:55 2009 +0100 2.3 @@ -13,6 +13,7 @@ obj-y := process.o signal.o entry.o trap 2.4 obj-$(CONFIG_STACKTRACE) += stacktrace.o 2.5 obj-$(CONFIG_X86_MCE) += mce.o 2.6 obj-$(CONFIG_X86_MCE_INTEL) += mce_intel.o 2.7 +obj-$(CONFIG_X86_XEN_MCE) += mce_dom0.o 2.8 obj-$(CONFIG_X86_MCE_AMD) += mce_amd.o 2.9 obj-$(CONFIG_MTRR) += ../../i386/kernel/cpu/mtrr/ 2.10 obj-$(CONFIG_ACPI) += acpi/
3.1 --- a/arch/x86_64/kernel/entry-xen.S Tue Jun 16 11:09:39 2009 +0100 3.2 +++ b/arch/x86_64/kernel/entry-xen.S Tue Jun 16 11:58:55 2009 +0100 3.3 @@ -1258,13 +1258,8 @@ END(spurious_interrupt_bug) 3.4 3.5 #ifdef CONFIG_X86_MCE 3.6 /* runs on exception stack */ 3.7 -ENTRY(machine_check) 3.8 - INTR_FRAME 3.9 - pushq $0 3.10 - CFI_ADJUST_CFA_OFFSET 8 3.11 - paranoidentry do_machine_check 3.12 - jmp paranoid_exit1 3.13 - CFI_ENDPROC 3.14 +KPROBE_ENTRY(machine_check) 3.15 + zeroentry do_machine_check 3.16 END(machine_check) 3.17 #endif 3.18
4.1 --- a/arch/x86_64/kernel/mce.c Tue Jun 16 11:09:39 2009 +0100 4.2 +++ b/arch/x86_64/kernel/mce.c Tue Jun 16 11:58:55 2009 +0100 4.3 @@ -276,9 +276,16 @@ void do_machine_check(struct pt_regs * r 4.4 4.5 /* 4.6 * Periodic polling timer for "silent" machine check errors. 4.7 - */ 4.8 + * We will disable polling in DOM0 since all CMCI/Polling 4.9 + * mechanism will be done in XEN for Intel CPUs 4.10 +*/ 4.11 4.12 +#if defined (CONFIG_X86_XEN_MCE) 4.13 +static int check_interval = 0; /* disable polling */ 4.14 +#else 4.15 static int check_interval = 5 * 60; /* 5 minutes */ 4.16 +#endif 4.17 + 4.18 static void mcheck_timer(void *data); 4.19 static DECLARE_WORK(mcheck_work, mcheck_timer, NULL); 4.20 4.21 @@ -367,6 +374,7 @@ static void __cpuinit mce_cpu_quirks(str 4.22 4.23 static void __cpuinit mce_cpu_features(struct cpuinfo_x86 *c) 4.24 { 4.25 +#ifndef CONFIG_X86_64_XEN 4.26 switch (c->x86_vendor) { 4.27 case X86_VENDOR_INTEL: 4.28 mce_intel_feature_init(c); 4.29 @@ -377,8 +385,8 @@ static void __cpuinit mce_cpu_features(s 4.30 default: 4.31 break; 4.32 } 4.33 +#endif 4.34 } 4.35 - 4.36 /* 4.37 * Called for each booted CPU to set up machine checks. 4.38 * Must be called with preempt off. 4.39 @@ -649,6 +657,7 @@ static struct notifier_block mce_cpu_not 4.40 }; 4.41 #endif 4.42 4.43 +extern void bind_virq_for_mce(void); 4.44 static __init int mce_init_device(void) 4.45 { 4.46 int err; 4.47 @@ -664,6 +673,13 @@ static __init int mce_init_device(void) 4.48 4.49 register_hotcpu_notifier(&mce_cpu_notifier); 4.50 misc_register(&mce_log_device); 4.51 + 4.52 + /*Register vIRQ handler for MCE LOG processing*/ 4.53 +#if defined(CONFIG_X86_XEN_MCE) 4.54 + printk(KERN_DEBUG "MCE: bind virq for DOM0 Logging\n"); 4.55 + bind_virq_for_mce(); 4.56 +#endif 4.57 + 4.58 return err; 4.59 } 4.60
5.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 5.2 +++ b/arch/x86_64/kernel/mce_dom0.c Tue Jun 16 11:58:55 2009 +0100 5.3 @@ -0,0 +1,131 @@ 5.4 +#include <linux/init.h> 5.5 +#include <linux/types.h> 5.6 +#include <linux/kernel.h> 5.7 +#include <xen/interface/xen.h> 5.8 +#include <xen/evtchn.h> 5.9 +#include <xen/interface/vcpu.h> 5.10 +#include <asm/hypercall.h> 5.11 +#include <asm/mce.h> 5.12 + 5.13 +static int convert_log(struct mc_info *mi) 5.14 +{ 5.15 + struct mcinfo_common *mic = NULL; 5.16 + struct mcinfo_global *mc_global; 5.17 + struct mcinfo_bank *mc_bank; 5.18 + struct mce m; 5.19 + 5.20 + x86_mcinfo_lookup(mic, mi, MC_TYPE_GLOBAL); 5.21 + if (mic == NULL) 5.22 + { 5.23 + printk(KERN_ERR "DOM0_MCE_LOG: global data is NULL\n"); 5.24 + return -1; 5.25 + } 5.26 + 5.27 + mc_global = (struct mcinfo_global*)mic; 5.28 + m.mcgstatus = mc_global->mc_gstatus; 5.29 + m.cpu = mc_global->mc_coreid;/*for test*/ 5.30 + x86_mcinfo_lookup(mic, mi, MC_TYPE_BANK); 5.31 + do 5.32 + { 5.33 + if (mic == NULL || mic->size == 0) 5.34 + break; 5.35 + if (mic->type == MC_TYPE_BANK) 5.36 + { 5.37 + mc_bank = (struct mcinfo_bank*)mic; 5.38 + m.misc = mc_bank->mc_misc; 5.39 + m.status = mc_bank->mc_status; 5.40 + m.addr = mc_bank->mc_addr; 5.41 + m.tsc = mc_bank->mc_tsc; 5.42 + m.res1 = mc_bank->mc_ctrl2; 5.43 + m.bank = mc_bank->mc_bank; 5.44 + printk(KERN_DEBUG "[CPU%d, BANK%d, addr %llx, state %llx]\n", 5.45 + m.bank, m.cpu, m.addr, m.status); 5.46 + /*log this record*/ 5.47 + mce_log(&m); 5.48 + } 5.49 + mic = x86_mcinfo_next(mic); 5.50 + }while (1); 5.51 + 5.52 + return 0; 5.53 +} 5.54 + 5.55 +static struct mc_info *g_mi; 5.56 + 5.57 +/*dom0 mce virq handler, logging physical mce error info*/ 5.58 + 5.59 +static irqreturn_t mce_dom0_interrupt(int irq, void *dev_id, 5.60 + struct pt_regs *regs) 5.61 +{ 5.62 + xen_mc_t mc_op; 5.63 + int result = 0; 5.64 + 5.65 + printk(KERN_DEBUG "MCE_DOM0_LOG: enter dom0 mce vIRQ handler\n"); 5.66 + mc_op.cmd = XEN_MC_fetch; 5.67 + mc_op.interface_version = XEN_MCA_INTERFACE_VERSION; 5.68 + set_xen_guest_handle(mc_op.u.mc_fetch.data, g_mi); 5.69 +urgent: 5.70 + mc_op.u.mc_fetch.flags = XEN_MC_URGENT; 5.71 + result = HYPERVISOR_mca(&mc_op); 5.72 + if (result || mc_op.u.mc_fetch.flags & XEN_MC_NODATA || 5.73 + mc_op.u.mc_fetch.flags & XEN_MC_FETCHFAILED) 5.74 + { 5.75 + printk(KERN_DEBUG "MCE_DOM0_LOG: No more urgent data\n"); 5.76 + goto nonurgent; 5.77 + } 5.78 + else 5.79 + { 5.80 + result = convert_log(g_mi); 5.81 + if (result) { 5.82 + printk(KERN_ERR "MCE_DOM0_LOG: Log conversion failed\n"); 5.83 + goto end; 5.84 + } 5.85 + /* After fetching the telem from DOM0, we need to dec the telem's 5.86 + * refcnt and release the entry. The telem is reserved and inc 5.87 + * refcnt when filling the telem. 5.88 + */ 5.89 + mc_op.u.mc_fetch.flags = XEN_MC_URGENT | XEN_MC_ACK; 5.90 + result = HYPERVISOR_mca(&mc_op); 5.91 + 5.92 + goto urgent; 5.93 + } 5.94 +nonurgent: 5.95 + mc_op.u.mc_fetch.flags = XEN_MC_NONURGENT; 5.96 + result = HYPERVISOR_mca(&mc_op); 5.97 + if (result || mc_op.u.mc_fetch.flags & XEN_MC_NODATA || 5.98 + mc_op.u.mc_fetch.flags & XEN_MC_FETCHFAILED) 5.99 + { 5.100 + printk(KERN_DEBUG "MCE_DOM0_LOG: No more nonurgent data\n"); 5.101 + goto end; 5.102 + } 5.103 + else 5.104 + { 5.105 + result = convert_log(g_mi); 5.106 + if (result) { 5.107 + printk(KERN_ERR "MCE_DOM0_LOG: Log conversion failed\n"); 5.108 + goto end; 5.109 + } 5.110 + /* After fetching the telem from DOM0, we need to dec the telem's 5.111 + * refcnt and release the entry. The telem is reserved and inc 5.112 + * refcnt when filling the telem. 5.113 + */ 5.114 + mc_op.u.mc_fetch.flags = XEN_MC_NONURGENT | XEN_MC_ACK; 5.115 + result = HYPERVISOR_mca(&mc_op); 5.116 + 5.117 + goto nonurgent; 5.118 + } 5.119 +end: 5.120 + return IRQ_HANDLED; 5.121 +} 5.122 + 5.123 +void bind_virq_for_mce(void) 5.124 +{ 5.125 + int ret; 5.126 + 5.127 + ret = bind_virq_to_irqhandler(VIRQ_MCA, 0, 5.128 + mce_dom0_interrupt, 0, "mce", NULL); 5.129 + 5.130 + g_mi = kmalloc(sizeof(struct mc_info), GFP_KERNEL); 5.131 + if (ret < 0) 5.132 + printk(KERN_ERR "MCE_DOM0_LOG: bind_virq for DOM0 failed\n"); 5.133 +} 5.134 +
6.1 --- a/include/asm-x86_64/mach-xen/asm/hypercall.h Tue Jun 16 11:09:39 2009 +0100 6.2 +++ b/include/asm-x86_64/mach-xen/asm/hypercall.h Tue Jun 16 11:58:55 2009 +0100 6.3 @@ -39,6 +39,7 @@ 6.4 6.5 #include <linux/string.h> /* memcpy() */ 6.6 #include <linux/stringify.h> 6.7 +#include <xen/interface/arch-x86/xen-mca.h> 6.8 6.9 #ifndef __HYPERVISOR_H__ 6.10 # error "please don't include this file directly" 6.11 @@ -215,7 +216,13 @@ HYPERVISOR_platform_op( 6.12 platform_op->interface_version = XENPF_INTERFACE_VERSION; 6.13 return _hypercall1(int, platform_op, platform_op); 6.14 } 6.15 - 6.16 +static inline int __must_check 6.17 +HYPERVISOR_mca( 6.18 + struct xen_mc *mc_op) 6.19 +{ 6.20 + mc_op->interface_version = XEN_MCA_INTERFACE_VERSION; 6.21 + return _hypercall1(int, mca, mc_op); 6.22 +} 6.23 static inline int __must_check 6.24 HYPERVISOR_set_debugreg( 6.25 unsigned int reg, unsigned long value)