From ec4927863164da59de51cf8e4f93996bcb30af43 Mon Sep 17 00:00:00 2001 From: Keith Coleman Date: Mon, 9 May 2011 03:06:33 -0400 Subject: [PATCH] mce: Replace BUG() with a console warning in the MCE handler. If the hardware reports corrected errors that we didn't see through the status MSRs, complain on the console but don't BUG() the machine. Signed-off-by: Tim Deegan xen-unstable changeset: 21757:50cf787b70eb xen-unstable date: Fri Jul 09 12:21:31 2010 +0100 --- xen/arch/x86/cpu/mcheck/amd_nonfatal.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/xen/arch/x86/cpu/mcheck/amd_nonfatal.c b/xen/arch/x86/cpu/mcheck/amd_nonfatal.c index d354b1f06..5431a3321 100644 --- a/xen/arch/x86/cpu/mcheck/amd_nonfatal.c +++ b/xen/arch/x86/cpu/mcheck/amd_nonfatal.c @@ -155,14 +155,19 @@ static void mce_amd_work_fn(void *data) /* HW does not count *all* kinds of correctable errors. * Thus it is possible, that the polling routine finds an - * correctable error even if the HW reports nothing. - * However, the other way around is not possible (= BUG). - */ + * correctable error even if the HW reports nothing. */ if (counter > 0) { /* HW reported correctable errors, * the polling routine did not find... */ - BUG_ON(adjust == 0); + if (adjust == 0) { + printk("CPU counter reports %"PRIu32 + " correctable hardware error%s that %s" + " not reported by the status MSRs\n", + counter, + (counter == 1 ? "" : "s"), + (counter == 1 ? "was" : "were")); + } /* subtract 1 to not double count the error * from the polling service routine */ adjust += (counter - 1); -- 2.39.5