]> xenbits.xensource.com Git - xen.git/commitdiff
x86/mce: don't spam the console with "CPUx: Temperature z"
authorKonrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Mon, 16 Jun 2014 09:59:32 +0000 (11:59 +0200)
committerJan Beulich <jbeulich@suse.com>
Mon, 16 Jun 2014 09:59:32 +0000 (11:59 +0200)
If the machine has been quite busy it ends up with these messages
printed on the hypervisor console:

(XEN) CPU3: Temperature/speed normal
(XEN) CPU1: Temperature/speed normal
(XEN) CPU0: Temperature/speed normal
(XEN) CPU1: Temperature/speed normal
(XEN) CPU0: Temperature/speed normal
(XEN) CPU2: Temperature/speed normal
(XEN) CPU3: Temperature/speed normal
(XEN) CPU0: Temperature/speed normal
(XEN) CPU2: Temperature/speed normal
(XEN) CPU3: Temperature/speed normal
(XEN) CPU1: Temperature/speed normal
(XEN) CPU0: Temperature above threshold
(XEN) CPU0: Running in modulated clock mode
(XEN) CPU1: Temperature/speed normal
(XEN) CPU2: Temperature/speed normal
(XEN) CPU3: Temperature/speed normal

While the state changes are important, the non-altered state
information is not needed. As such add a latch mechanism to only print
the information if it has changed since the last update (and the
hardware doesn't properly suppress redundant notifications).

This was observed on Intel DQ67SW,
BIOS SWQ6710H.86A.0066.2012.1105.1504 11/05/2012

Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
Acked-by: Christoph Egger <chegger@amazon.de>
xen/arch/x86/cpu/mcheck/mce_intel.c

index ad06efca030aa9fb4bbae13cbb1736ff8186a2ea..bb4ce47ef9e070f5ceaa54cac56a493042f84f05 100644 (file)
@@ -49,11 +49,15 @@ static int __read_mostly nr_intel_ext_msrs;
 #define INTEL_SRAR_INSTR_FETCH 0x150
 
 #ifdef CONFIG_X86_MCE_THERMAL
+#define MCE_RING                0x1
+static DEFINE_PER_CPU(int, last_state);
+
 static void intel_thermal_interrupt(struct cpu_user_regs *regs)
 {
     uint64_t msr_content;
     unsigned int cpu = smp_processor_id();
     static DEFINE_PER_CPU(s_time_t, next);
+    int *this_last_state;
 
     ack_APIC_irq();
 
@@ -62,13 +66,17 @@ static void intel_thermal_interrupt(struct cpu_user_regs *regs)
 
     per_cpu(next, cpu) = NOW() + MILLISECS(5000);
     rdmsrl(MSR_IA32_THERM_STATUS, msr_content);
-    if (msr_content & 0x1) {
-        printk(KERN_EMERG "CPU%d: Temperature above threshold\n", cpu);
-        printk(KERN_EMERG "CPU%d: Running in modulated clock mode\n",
-                cpu);
+    this_last_state = &per_cpu(last_state, cpu);
+    if ( *this_last_state == (msr_content & MCE_RING) )
+        return;
+    *this_last_state = msr_content & MCE_RING;
+    if ( msr_content & MCE_RING )
+    {
+        printk(KERN_EMERG "CPU%u: Temperature above threshold\n", cpu);
+        printk(KERN_EMERG "CPU%u: Running in modulated clock mode\n", cpu);
         add_taint(TAINT_MACHINE_CHECK);
     } else {
-        printk(KERN_INFO "CPU%d: Temperature/speed normal\n", cpu);
+        printk(KERN_INFO "CPU%u: Temperature/speed normal\n", cpu);
     }
 }
 
@@ -802,6 +810,7 @@ static int cpu_mcabank_alloc(unsigned int cpu)
 
     per_cpu(no_cmci_banks, cpu) = cmci;
     per_cpu(mce_banks_owned, cpu) = owned;
+    per_cpu(last_state, cpu) = -1;
 
     return 0;
 out: