]> xenbits.xensource.com Git - xen.git/commitdiff
x86/mce: don't spam the console with "CPUx: Temperature z"
authorKonrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Tue, 24 Jun 2014 08:18:29 +0000 (10:18 +0200)
committerJan Beulich <jbeulich@suse.com>
Tue, 24 Jun 2014 08:18:29 +0000 (10:18 +0200)
If the machine has been quite busy it ends up with these messages
printed on the hypervisor console:

(XEN) CPU3: Temperature/speed normal
(XEN) CPU1: Temperature/speed normal
(XEN) CPU0: Temperature/speed normal
(XEN) CPU1: Temperature/speed normal
(XEN) CPU0: Temperature/speed normal
(XEN) CPU2: Temperature/speed normal
(XEN) CPU3: Temperature/speed normal
(XEN) CPU0: Temperature/speed normal
(XEN) CPU2: Temperature/speed normal
(XEN) CPU3: Temperature/speed normal
(XEN) CPU1: Temperature/speed normal
(XEN) CPU0: Temperature above threshold
(XEN) CPU0: Running in modulated clock mode
(XEN) CPU1: Temperature/speed normal
(XEN) CPU2: Temperature/speed normal
(XEN) CPU3: Temperature/speed normal

While the state changes are important, the non-altered state
information is not needed. As such add a latch mechanism to only print
the information if it has changed since the last update (and the
hardware doesn't properly suppress redundant notifications).

This was observed on Intel DQ67SW,
BIOS SWQ6710H.86A.0066.2012.1105.1504 11/05/2012

Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
Acked-by: Christoph Egger <chegger@amazon.de>
master commit: 323338f86fb6cd6f6dba4f59a84eed71b3552d21
master date: 2014-06-16 11:59:32 +0200

xen/arch/x86/cpu/mcheck/mce_intel.c

index 02e2db57d49e40ae8580014dac4163bb02dd0ab9..be14593b1caf1f5e28dddc94f6e088e2c9b90449 100644 (file)
@@ -47,11 +47,15 @@ static int __read_mostly nr_intel_ext_msrs;
 #define INTEL_SRAR_INSTR_FETCH 0x150
 
 #ifdef CONFIG_X86_MCE_THERMAL
+#define MCE_RING                0x1
+static DEFINE_PER_CPU(int, last_state);
+
 static void intel_thermal_interrupt(struct cpu_user_regs *regs)
 {
     uint64_t msr_content;
     unsigned int cpu = smp_processor_id();
     static DEFINE_PER_CPU(s_time_t, next);
+    int *this_last_state;
 
     ack_APIC_irq();
 
@@ -60,13 +64,17 @@ static void intel_thermal_interrupt(struct cpu_user_regs *regs)
 
     per_cpu(next, cpu) = NOW() + MILLISECS(5000);
     rdmsrl(MSR_IA32_THERM_STATUS, msr_content);
-    if (msr_content & 0x1) {
-        printk(KERN_EMERG "CPU%d: Temperature above threshold\n", cpu);
-        printk(KERN_EMERG "CPU%d: Running in modulated clock mode\n",
-                cpu);
+    this_last_state = &per_cpu(last_state, cpu);
+    if ( *this_last_state == (msr_content & MCE_RING) )
+        return;
+    *this_last_state = msr_content & MCE_RING;
+    if ( msr_content & MCE_RING )
+    {
+        printk(KERN_EMERG "CPU%u: Temperature above threshold\n", cpu);
+        printk(KERN_EMERG "CPU%u: Running in modulated clock mode\n", cpu);
         add_taint(TAINT_MACHINE_CHECK);
     } else {
-        printk(KERN_INFO "CPU%d: Temperature/speed normal\n", cpu);
+        printk(KERN_INFO "CPU%u: Temperature/speed normal\n", cpu);
     }
 }
 
@@ -1329,6 +1337,7 @@ static int cpu_mcabank_alloc(unsigned int cpu)
     per_cpu(mce_clear_banks, cpu) = mb1;
     per_cpu(no_cmci_banks, cpu) = mb2;
     per_cpu(mce_banks_owned, cpu) = mb3;
+    per_cpu(last_state, cpu) = -1;
 
     return 0;
 out: