ia64/xen-unstable

view xen/arch/i386/nmi.c @ 722:7a9d47fea66c

bitkeeper revision 1.428 (3f677454_j81KDQLm_L7AscjYn2nYg)

Merge labyrinth.cl.cam.ac.uk:/auto/groups/xeno/BK/xeno.bk
into labyrinth.cl.cam.ac.uk:/auto/anfs/scratch/labyrinth/iap10/xeno-clone/xeno.bk
author iap10@labyrinth.cl.cam.ac.uk
date Tue Sep 16 20:36:36 2003 +0000 (2003-09-16)
parents c82a216f0773 d0cdb9994a2b
children 8870c27357dd
line source
1 /*
2 * linux/arch/i386/nmi.c
3 *
4 * NMI watchdog support on APIC systems
5 *
6 * Started by Ingo Molnar <mingo@redhat.com>
7 *
8 * Fixes:
9 * Mikael Pettersson : AMD K7 support for local APIC NMI watchdog.
10 * Mikael Pettersson : Power Management for local APIC NMI watchdog.
11 * Mikael Pettersson : Pentium 4 support for local APIC NMI watchdog.
12 * Keir Fraser : Pentium 4 Hyperthreading support
13 */
15 #include <linux/config.h>
16 #include <linux/init.h>
17 #include <linux/lib.h>
18 #include <linux/mm.h>
19 #include <linux/irq.h>
20 #include <linux/delay.h>
21 #include <linux/interrupt.h>
22 #include <linux/time.h>
23 #include <linux/timex.h>
24 #include <linux/sched.h>
26 #include <asm/mc146818rtc.h>
27 #include <asm/smp.h>
28 #include <asm/msr.h>
29 #include <asm/mpspec.h>
31 unsigned int nmi_watchdog = NMI_NONE;
32 unsigned int watchdog_on = 0;
33 static unsigned int nmi_hz = HZ;
34 unsigned int nmi_perfctr_msr; /* the MSR to reset in NMI handler */
35 extern void show_registers(struct pt_regs *regs);
37 extern int logical_proc_id[];
39 #define K7_EVNTSEL_ENABLE (1 << 22)
40 #define K7_EVNTSEL_INT (1 << 20)
41 #define K7_EVNTSEL_OS (1 << 17)
42 #define K7_EVNTSEL_USR (1 << 16)
43 #define K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING 0x76
44 #define K7_NMI_EVENT K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING
46 #define P6_EVNTSEL0_ENABLE (1 << 22)
47 #define P6_EVNTSEL_INT (1 << 20)
48 #define P6_EVNTSEL_OS (1 << 17)
49 #define P6_EVNTSEL_USR (1 << 16)
50 #define P6_EVENT_CPU_CLOCKS_NOT_HALTED 0x79
51 #define P6_NMI_EVENT P6_EVENT_CPU_CLOCKS_NOT_HALTED
53 #define MSR_P4_MISC_ENABLE 0x1A0
54 #define MSR_P4_MISC_ENABLE_PERF_AVAIL (1<<7)
55 #define MSR_P4_MISC_ENABLE_PEBS_UNAVAIL (1<<12)
56 #define MSR_P4_PERFCTR0 0x300
57 #define MSR_P4_CCCR0 0x360
58 #define P4_ESCR_EVENT_SELECT(N) ((N)<<25)
59 #define P4_ESCR_OS0 (1<<3)
60 #define P4_ESCR_USR0 (1<<2)
61 #define P4_ESCR_OS1 (1<<1)
62 #define P4_ESCR_USR1 (1<<0)
63 #define P4_CCCR_OVF_PMI0 (1<<26)
64 #define P4_CCCR_OVF_PMI1 (1<<27)
65 #define P4_CCCR_THRESHOLD(N) ((N)<<20)
66 #define P4_CCCR_COMPLEMENT (1<<19)
67 #define P4_CCCR_COMPARE (1<<18)
68 #define P4_CCCR_REQUIRED (3<<16)
69 #define P4_CCCR_ESCR_SELECT(N) ((N)<<13)
70 #define P4_CCCR_ENABLE (1<<12)
71 /*
72 * Set up IQ_COUNTER{0,1} to behave like a clock, by having IQ_CCCR{0,1} filter
73 * CRU_ESCR0 (with any non-null event selector) through a complemented
74 * max threshold. [IA32-Vol3, Section 14.9.9]
75 */
76 #define MSR_P4_IQ_COUNTER0 0x30C
77 #define MSR_P4_IQ_COUNTER1 0x30D
78 #define MSR_P4_IQ_CCCR0 0x36C
79 #define MSR_P4_IQ_CCCR1 0x36D
80 #define MSR_P4_CRU_ESCR0 0x3B8 /* ESCR no. 4 */
81 #define P4_NMI_CRU_ESCR0 \
82 (P4_ESCR_EVENT_SELECT(0x3F)|P4_ESCR_OS0|P4_ESCR_USR0| \
83 P4_ESCR_OS1|P4_ESCR_USR1)
84 #define P4_NMI_IQ_CCCR0 \
85 (P4_CCCR_OVF_PMI0|P4_CCCR_THRESHOLD(15)|P4_CCCR_COMPLEMENT| \
86 P4_CCCR_COMPARE|P4_CCCR_REQUIRED|P4_CCCR_ESCR_SELECT(4)|P4_CCCR_ENABLE)
87 #define P4_NMI_IQ_CCCR1 \
88 (P4_CCCR_OVF_PMI1|P4_CCCR_THRESHOLD(15)|P4_CCCR_COMPLEMENT| \
89 P4_CCCR_COMPARE|P4_CCCR_REQUIRED|P4_CCCR_ESCR_SELECT(4)|P4_CCCR_ENABLE)
91 int __init check_nmi_watchdog (void)
92 {
93 unsigned int prev_nmi_count[NR_CPUS];
94 int j, cpu;
96 if ( !nmi_watchdog )
97 return 0;
99 printk("Testing NMI watchdog --- ");
101 for ( j = 0; j < smp_num_cpus; j++ )
102 {
103 cpu = cpu_logical_map(j);
104 prev_nmi_count[cpu] = irq_stat[cpu].__nmi_count;
105 }
106 sti();
107 mdelay((10*1000)/nmi_hz); /* wait 10 ticks */
109 for ( j = 0; j < smp_num_cpus; j++ )
110 {
111 cpu = cpu_logical_map(j);
112 if ( nmi_count(cpu) - prev_nmi_count[cpu] <= 5 )
113 printk("CPU#%d stuck. ", cpu);
114 else
115 printk("CPU#%d okay. ", cpu);
116 }
118 printk("\n");
120 /* now that we know it works we can reduce NMI frequency to
121 something more reasonable; makes a difference in some configs */
122 if ( nmi_watchdog == NMI_LOCAL_APIC )
123 nmi_hz = 1;
125 return 0;
126 }
128 static inline void nmi_pm_init(void) { }
129 #define __pminit __init
131 /*
132 * Activate the NMI watchdog via the local APIC.
133 * Original code written by Keith Owens.
134 */
136 static void __pminit clear_msr_range(unsigned int base, unsigned int n)
137 {
138 unsigned int i;
139 for ( i = 0; i < n; i++ )
140 wrmsr(base+i, 0, 0);
141 }
143 static void __pminit setup_k7_watchdog(void)
144 {
145 unsigned int evntsel;
147 nmi_perfctr_msr = MSR_K7_PERFCTR0;
149 clear_msr_range(MSR_K7_EVNTSEL0, 4);
150 clear_msr_range(MSR_K7_PERFCTR0, 4);
152 evntsel = K7_EVNTSEL_INT
153 | K7_EVNTSEL_OS
154 | K7_EVNTSEL_USR
155 | K7_NMI_EVENT;
157 wrmsr(MSR_K7_EVNTSEL0, evntsel, 0);
158 Dprintk("setting K7_PERFCTR0 to %08lx\n", -(cpu_khz/nmi_hz*1000));
159 wrmsr(MSR_K7_PERFCTR0, -(cpu_khz/nmi_hz*1000), -1);
160 apic_write(APIC_LVTPC, APIC_DM_NMI);
161 evntsel |= K7_EVNTSEL_ENABLE;
162 wrmsr(MSR_K7_EVNTSEL0, evntsel, 0);
163 }
165 static void __pminit setup_p6_watchdog(void)
166 {
167 unsigned int evntsel;
169 nmi_perfctr_msr = MSR_P6_PERFCTR0;
171 clear_msr_range(MSR_P6_EVNTSEL0, 2);
172 clear_msr_range(MSR_P6_PERFCTR0, 2);
174 evntsel = P6_EVNTSEL_INT
175 | P6_EVNTSEL_OS
176 | P6_EVNTSEL_USR
177 | P6_NMI_EVENT;
179 wrmsr(MSR_P6_EVNTSEL0, evntsel, 0);
180 Dprintk("setting P6_PERFCTR0 to %08lx\n", -(cpu_khz/nmi_hz*1000));
181 wrmsr(MSR_P6_PERFCTR0, -(cpu_khz/nmi_hz*1000), 0);
182 apic_write(APIC_LVTPC, APIC_DM_NMI);
183 evntsel |= P6_EVNTSEL0_ENABLE;
184 wrmsr(MSR_P6_EVNTSEL0, evntsel, 0);
185 }
187 static int __pminit setup_p4_watchdog(void)
188 {
189 unsigned int misc_enable, dummy;
191 rdmsr(MSR_P4_MISC_ENABLE, misc_enable, dummy);
192 if (!(misc_enable & MSR_P4_MISC_ENABLE_PERF_AVAIL))
193 return 0;
195 nmi_perfctr_msr = MSR_P4_IQ_COUNTER0;
197 if ( logical_proc_id[smp_processor_id()] == 0 )
198 {
199 if (!(misc_enable & MSR_P4_MISC_ENABLE_PEBS_UNAVAIL))
200 clear_msr_range(0x3F1, 2);
201 /* MSR 0x3F0 seems to have a default value of 0xFC00, but current
202 docs doesn't fully define it, so leave it alone for now. */
203 clear_msr_range(0x3A0, 31);
204 clear_msr_range(0x3C0, 6);
205 clear_msr_range(0x3C8, 6);
206 clear_msr_range(0x3E0, 2);
207 clear_msr_range(MSR_P4_CCCR0, 18);
208 clear_msr_range(MSR_P4_PERFCTR0, 18);
210 wrmsr(MSR_P4_CRU_ESCR0, P4_NMI_CRU_ESCR0, 0);
211 wrmsr(MSR_P4_IQ_CCCR0, P4_NMI_IQ_CCCR0 & ~P4_CCCR_ENABLE, 0);
212 Dprintk("setting P4_IQ_COUNTER0 to 0x%08lx\n", -(cpu_khz/nmi_hz*1000));
213 wrmsr(MSR_P4_IQ_COUNTER0, -(cpu_khz/nmi_hz*1000), -1);
214 apic_write(APIC_LVTPC, APIC_DM_NMI);
215 wrmsr(MSR_P4_IQ_CCCR0, P4_NMI_IQ_CCCR0, 0);
216 }
217 else if ( logical_proc_id[smp_processor_id()] == 1 )
218 {
219 wrmsr(MSR_P4_IQ_CCCR1, P4_NMI_IQ_CCCR1 & ~P4_CCCR_ENABLE, 0);
220 Dprintk("setting P4_IQ_COUNTER2 to 0x%08lx\n", -(cpu_khz/nmi_hz*1000));
221 wrmsr(MSR_P4_IQ_COUNTER1, -(cpu_khz/nmi_hz*1000), -1);
222 apic_write(APIC_LVTPC, APIC_DM_NMI);
223 wrmsr(MSR_P4_IQ_CCCR1, P4_NMI_IQ_CCCR1, 0);
224 }
225 else
226 {
227 return 0;
228 }
230 return 1;
231 }
233 void __pminit setup_apic_nmi_watchdog(void)
234 {
235 if (!nmi_watchdog)
236 return;
238 switch (boot_cpu_data.x86_vendor) {
239 case X86_VENDOR_AMD:
240 if (boot_cpu_data.x86 != 6 && boot_cpu_data.x86 != 15)
241 return;
242 setup_k7_watchdog();
243 break;
244 case X86_VENDOR_INTEL:
245 switch (boot_cpu_data.x86) {
246 case 6:
247 setup_p6_watchdog();
248 break;
249 case 15:
250 if (!setup_p4_watchdog())
251 return;
252 break;
253 default:
254 return;
255 }
256 break;
257 default:
258 return;
259 }
260 nmi_pm_init();
261 }
264 static unsigned int
265 last_irq_sums [NR_CPUS],
266 alert_counter [NR_CPUS];
268 void touch_nmi_watchdog (void)
269 {
270 int i;
271 for (i = 0; i < smp_num_cpus; i++)
272 alert_counter[i] = 0;
273 }
275 void nmi_watchdog_tick (struct pt_regs * regs)
276 {
277 extern spinlock_t console_lock;
278 extern void die(const char * str, struct pt_regs * regs, long err);
279 extern void putchar_serial(unsigned char c);
281 int sum, cpu = smp_processor_id();
283 sum = apic_timer_irqs[cpu];
285 if ( (last_irq_sums[cpu] == sum) && watchdog_on )
286 {
287 /*
288 * Ayiee, looks like this CPU is stuck ... wait a few IRQs (5 seconds)
289 * before doing the oops ...
290 */
291 alert_counter[cpu]++;
292 if (alert_counter[cpu] == 5*nmi_hz) {
293 console_lock = SPIN_LOCK_UNLOCKED;
294 die("NMI Watchdog detected LOCKUP on CPU", regs, cpu);
295 }
296 }
297 else
298 {
299 last_irq_sums[cpu] = sum;
300 alert_counter[cpu] = 0;
301 }
303 if ( nmi_perfctr_msr )
304 {
305 if ( nmi_perfctr_msr == MSR_P4_IQ_COUNTER0 )
306 {
307 if ( logical_proc_id[cpu] == 0 )
308 {
309 wrmsr(MSR_P4_IQ_CCCR0, P4_NMI_IQ_CCCR0, 0);
310 apic_write(APIC_LVTPC, APIC_DM_NMI);
311 wrmsr(MSR_P4_IQ_COUNTER0, -(cpu_khz/nmi_hz*1000), -1);
312 }
313 else
314 {
315 wrmsr(MSR_P4_IQ_CCCR1, P4_NMI_IQ_CCCR1, 0);
316 apic_write(APIC_LVTPC, APIC_DM_NMI);
317 wrmsr(MSR_P4_IQ_COUNTER1, -(cpu_khz/nmi_hz*1000), -1);
318 }
319 }
320 else
321 {
322 wrmsr(nmi_perfctr_msr, -(cpu_khz/nmi_hz*1000), -1);
323 }
324 }
325 }