ia64/linux-2.6.18-xen.hg

view arch/alpha/kernel/err_titan.c @ 897:329ea0ccb344

balloon: try harder to balloon up under memory pressure.

Currently if the balloon driver is unable to increase the guest's
reservation it assumes the failure was due to reaching its full
allocation, gives up on the ballooning operation and records the limit
it reached as the "hard limit". The driver will not try again until
the target is set again (even to the same value).

However it is possible that ballooning has in fact failed due to
memory pressure in the host and therefore it is desirable to keep
attempting to reach the target in case memory becomes available. The
most likely scenario is that some guests are ballooning down while
others are ballooning up and therefore there is temporary memory
pressure while things stabilise. You would not expect a well behaved
toolstack to ask a domain to balloon to more than its allocation nor
would you expect it to deliberately over-commit memory by setting
balloon targets which exceed the total host memory.

This patch drops the concept of a hard limit and causes the balloon
driver to retry increasing the reservation on a timer in the same
manner as when decreasing the reservation.

Also if we partially succeed in increasing the reservation
(i.e. receive less pages than we asked for) then we may as well keep
those pages rather than returning them to Xen.

Signed-off-by: Ian Campbell <ian.campbell@citrix.com>
author Keir Fraser <keir.fraser@citrix.com>
date Fri Jun 05 14:01:20 2009 +0100 (2009-06-05)
parents 831230e53067
children
line source
1 /*
2 * linux/arch/alpha/kernel/err_titan.c
3 *
4 * Copyright (C) 2000 Jeff Wiedemeier (Compaq Computer Corporation)
5 *
6 * Error handling code supporting TITAN systems
7 */
9 #include <linux/init.h>
10 #include <linux/pci.h>
11 #include <linux/sched.h>
13 #include <asm/io.h>
14 #include <asm/core_titan.h>
15 #include <asm/hwrpb.h>
16 #include <asm/smp.h>
17 #include <asm/err_common.h>
18 #include <asm/err_ev6.h>
20 #include "err_impl.h"
21 #include "proto.h"
24 static int
25 titan_parse_c_misc(u64 c_misc, int print)
26 {
27 #ifdef CONFIG_VERBOSE_MCHECK
28 char *src;
29 int nxs = 0;
30 #endif
31 int status = MCHK_DISPOSITION_REPORT;
33 #define TITAN__CCHIP_MISC__NXM (1UL << 28)
34 #define TITAN__CCHIP_MISC__NXS__S (29)
35 #define TITAN__CCHIP_MISC__NXS__M (0x7)
37 if (!(c_misc & TITAN__CCHIP_MISC__NXM))
38 return MCHK_DISPOSITION_UNKNOWN_ERROR;
40 #ifdef CONFIG_VERBOSE_MCHECK
41 if (!print)
42 return status;
44 nxs = EXTRACT(c_misc, TITAN__CCHIP_MISC__NXS);
45 switch(nxs) {
46 case 0: /* CPU 0 */
47 case 1: /* CPU 1 */
48 case 2: /* CPU 2 */
49 case 3: /* CPU 3 */
50 src = "CPU";
51 /* num is already the CPU number */
52 break;
53 case 4: /* Pchip 0 */
54 case 5: /* Pchip 1 */
55 src = "Pchip";
56 nxs -= 4;
57 break;
58 default:/* reserved */
59 src = "Unknown, NXS =";
60 /* leave num untouched */
61 break;
62 }
64 printk("%s Non-existent memory access from: %s %d\n",
65 err_print_prefix, src, nxs);
66 #endif /* CONFIG_VERBOSE_MCHECK */
68 return status;
69 }
71 static int
72 titan_parse_p_serror(int which, u64 serror, int print)
73 {
74 int status = MCHK_DISPOSITION_REPORT;
76 #ifdef CONFIG_VERBOSE_MCHECK
77 char *serror_src[] = {"GPCI", "APCI", "AGP HP", "AGP LP"};
78 char *serror_cmd[] = {"DMA Read", "DMA RMW", "SGTE Read", "Reserved"};
79 #endif /* CONFIG_VERBOSE_MCHECK */
81 #define TITAN__PCHIP_SERROR__LOST_UECC (1UL << 0)
82 #define TITAN__PCHIP_SERROR__UECC (1UL << 1)
83 #define TITAN__PCHIP_SERROR__CRE (1UL << 2)
84 #define TITAN__PCHIP_SERROR__NXIO (1UL << 3)
85 #define TITAN__PCHIP_SERROR__LOST_CRE (1UL << 4)
86 #define TITAN__PCHIP_SERROR__ECCMASK (TITAN__PCHIP_SERROR__UECC | \
87 TITAN__PCHIP_SERROR__CRE)
88 #define TITAN__PCHIP_SERROR__ERRMASK (TITAN__PCHIP_SERROR__LOST_UECC | \
89 TITAN__PCHIP_SERROR__UECC | \
90 TITAN__PCHIP_SERROR__CRE | \
91 TITAN__PCHIP_SERROR__NXIO | \
92 TITAN__PCHIP_SERROR__LOST_CRE)
93 #define TITAN__PCHIP_SERROR__SRC__S (52)
94 #define TITAN__PCHIP_SERROR__SRC__M (0x3)
95 #define TITAN__PCHIP_SERROR__CMD__S (54)
96 #define TITAN__PCHIP_SERROR__CMD__M (0x3)
97 #define TITAN__PCHIP_SERROR__SYN__S (56)
98 #define TITAN__PCHIP_SERROR__SYN__M (0xff)
99 #define TITAN__PCHIP_SERROR__ADDR__S (15)
100 #define TITAN__PCHIP_SERROR__ADDR__M (0xffffffffUL)
102 if (!(serror & TITAN__PCHIP_SERROR__ERRMASK))
103 return MCHK_DISPOSITION_UNKNOWN_ERROR;
105 #ifdef CONFIG_VERBOSE_MCHECK
106 if (!print)
107 return status;
109 printk("%s PChip %d SERROR: %016lx\n",
110 err_print_prefix, which, serror);
111 if (serror & TITAN__PCHIP_SERROR__ECCMASK) {
112 printk("%s %sorrectable ECC Error:\n"
113 " Source: %-6s Command: %-8s Syndrome: 0x%08x\n"
114 " Address: 0x%lx\n",
115 err_print_prefix,
116 (serror & TITAN__PCHIP_SERROR__UECC) ? "Unc" : "C",
117 serror_src[EXTRACT(serror, TITAN__PCHIP_SERROR__SRC)],
118 serror_cmd[EXTRACT(serror, TITAN__PCHIP_SERROR__CMD)],
119 (unsigned)EXTRACT(serror, TITAN__PCHIP_SERROR__SYN),
120 EXTRACT(serror, TITAN__PCHIP_SERROR__ADDR));
121 }
122 if (serror & TITAN__PCHIP_SERROR__NXIO)
123 printk("%s Non Existent I/O Error\n", err_print_prefix);
124 if (serror & TITAN__PCHIP_SERROR__LOST_UECC)
125 printk("%s Lost Uncorrectable ECC Error\n",
126 err_print_prefix);
127 if (serror & TITAN__PCHIP_SERROR__LOST_CRE)
128 printk("%s Lost Correctable ECC Error\n", err_print_prefix);
129 #endif /* CONFIG_VERBOSE_MCHECK */
131 return status;
132 }
134 static int
135 titan_parse_p_perror(int which, int port, u64 perror, int print)
136 {
137 int cmd;
138 unsigned long addr;
139 int status = MCHK_DISPOSITION_REPORT;
141 #ifdef CONFIG_VERBOSE_MCHECK
142 char *perror_cmd[] = { "Interrupt Acknowledge", "Special Cycle",
143 "I/O Read", "I/O Write",
144 "Reserved", "Reserved",
145 "Memory Read", "Memory Write",
146 "Reserved", "Reserved",
147 "Configuration Read", "Configuration Write",
148 "Memory Read Multiple", "Dual Address Cycle",
149 "Memory Read Line","Memory Write and Invalidate"
150 };
151 #endif /* CONFIG_VERBOSE_MCHECK */
153 #define TITAN__PCHIP_PERROR__LOST (1UL << 0)
154 #define TITAN__PCHIP_PERROR__SERR (1UL << 1)
155 #define TITAN__PCHIP_PERROR__PERR (1UL << 2)
156 #define TITAN__PCHIP_PERROR__DCRTO (1UL << 3)
157 #define TITAN__PCHIP_PERROR__SGE (1UL << 4)
158 #define TITAN__PCHIP_PERROR__APE (1UL << 5)
159 #define TITAN__PCHIP_PERROR__TA (1UL << 6)
160 #define TITAN__PCHIP_PERROR__DPE (1UL << 7)
161 #define TITAN__PCHIP_PERROR__NDS (1UL << 8)
162 #define TITAN__PCHIP_PERROR__IPTPR (1UL << 9)
163 #define TITAN__PCHIP_PERROR__IPTPW (1UL << 10)
164 #define TITAN__PCHIP_PERROR__ERRMASK (TITAN__PCHIP_PERROR__LOST | \
165 TITAN__PCHIP_PERROR__SERR | \
166 TITAN__PCHIP_PERROR__PERR | \
167 TITAN__PCHIP_PERROR__DCRTO | \
168 TITAN__PCHIP_PERROR__SGE | \
169 TITAN__PCHIP_PERROR__APE | \
170 TITAN__PCHIP_PERROR__TA | \
171 TITAN__PCHIP_PERROR__DPE | \
172 TITAN__PCHIP_PERROR__NDS | \
173 TITAN__PCHIP_PERROR__IPTPR | \
174 TITAN__PCHIP_PERROR__IPTPW)
175 #define TITAN__PCHIP_PERROR__DAC (1UL << 47)
176 #define TITAN__PCHIP_PERROR__MWIN (1UL << 48)
177 #define TITAN__PCHIP_PERROR__CMD__S (52)
178 #define TITAN__PCHIP_PERROR__CMD__M (0x0f)
179 #define TITAN__PCHIP_PERROR__ADDR__S (14)
180 #define TITAN__PCHIP_PERROR__ADDR__M (0x1fffffffful)
182 if (!(perror & TITAN__PCHIP_PERROR__ERRMASK))
183 return MCHK_DISPOSITION_UNKNOWN_ERROR;
185 cmd = EXTRACT(perror, TITAN__PCHIP_PERROR__CMD);
186 addr = EXTRACT(perror, TITAN__PCHIP_PERROR__ADDR) << 2;
188 /*
189 * Initializing the BIOS on a video card on a bus without
190 * a south bridge (subtractive decode agent) can result in
191 * master aborts as the BIOS probes the capabilities of the
192 * card. XFree86 does such initialization. If the error
193 * is a master abort (No DevSel as PCI Master) and the command
194 * is an I/O read or write below the address where we start
195 * assigning PCI I/O spaces (SRM uses 0x1000), then mark the
196 * error as dismissable so starting XFree86 doesn't result
197 * in a series of uncorrectable errors being reported. Also
198 * dismiss master aborts to VGA frame buffer space
199 * (0xA0000 - 0xC0000) and legacy BIOS space (0xC0000 - 0x100000)
200 * for the same reason.
201 *
202 * Also mark the error dismissible if it looks like the right
203 * error but only the Lost bit is set. Since the BIOS initialization
204 * can cause multiple master aborts and the error interrupt can
205 * be handled on a different CPU than the BIOS code is run on,
206 * it is possible for a second master abort to occur between the
207 * time the PALcode reads PERROR and the time it writes PERROR
208 * to acknowledge the error. If this timing happens, a second
209 * error will be signalled after the first, and if no additional
210 * errors occur, will look like a Lost error with no additional
211 * errors on the same transaction as the previous error.
212 */
213 if (((perror & TITAN__PCHIP_PERROR__NDS) ||
214 ((perror & TITAN__PCHIP_PERROR__ERRMASK) ==
215 TITAN__PCHIP_PERROR__LOST)) &&
216 ((((cmd & 0xE) == 2) && (addr < 0x1000)) ||
217 (((cmd & 0xE) == 6) && (addr >= 0xA0000) && (addr < 0x100000)))) {
218 status = MCHK_DISPOSITION_DISMISS;
219 }
221 #ifdef CONFIG_VERBOSE_MCHECK
222 if (!print)
223 return status;
225 printk("%s PChip %d %cPERROR: %016lx\n",
226 err_print_prefix, which,
227 port ? 'A' : 'G', perror);
228 if (perror & TITAN__PCHIP_PERROR__IPTPW)
229 printk("%s Invalid Peer-to-Peer Write\n", err_print_prefix);
230 if (perror & TITAN__PCHIP_PERROR__IPTPR)
231 printk("%s Invalid Peer-to-Peer Read\n", err_print_prefix);
232 if (perror & TITAN__PCHIP_PERROR__NDS)
233 printk("%s No DEVSEL as PCI Master [Master Abort]\n",
234 err_print_prefix);
235 if (perror & TITAN__PCHIP_PERROR__DPE)
236 printk("%s Data Parity Error\n", err_print_prefix);
237 if (perror & TITAN__PCHIP_PERROR__TA)
238 printk("%s Target Abort\n", err_print_prefix);
239 if (perror & TITAN__PCHIP_PERROR__APE)
240 printk("%s Address Parity Error\n", err_print_prefix);
241 if (perror & TITAN__PCHIP_PERROR__SGE)
242 printk("%s Scatter-Gather Error, Invalid PTE\n",
243 err_print_prefix);
244 if (perror & TITAN__PCHIP_PERROR__DCRTO)
245 printk("%s Delayed-Completion Retry Timeout\n",
246 err_print_prefix);
247 if (perror & TITAN__PCHIP_PERROR__PERR)
248 printk("%s PERR Asserted\n", err_print_prefix);
249 if (perror & TITAN__PCHIP_PERROR__SERR)
250 printk("%s SERR Asserted\n", err_print_prefix);
251 if (perror & TITAN__PCHIP_PERROR__LOST)
252 printk("%s Lost Error\n", err_print_prefix);
253 printk("%s Command: 0x%x - %s\n"
254 " Address: 0x%lx\n",
255 err_print_prefix,
256 cmd, perror_cmd[cmd],
257 addr);
258 if (perror & TITAN__PCHIP_PERROR__DAC)
259 printk("%s Dual Address Cycle\n", err_print_prefix);
260 if (perror & TITAN__PCHIP_PERROR__MWIN)
261 printk("%s Hit in Monster Window\n", err_print_prefix);
262 #endif /* CONFIG_VERBOSE_MCHECK */
264 return status;
265 }
267 static int
268 titan_parse_p_agperror(int which, u64 agperror, int print)
269 {
270 int status = MCHK_DISPOSITION_REPORT;
271 #ifdef CONFIG_VERBOSE_MCHECK
272 int cmd, len;
273 unsigned long addr;
275 char *agperror_cmd[] = { "Read (low-priority)", "Read (high-priority)",
276 "Write (low-priority)",
277 "Write (high-priority)",
278 "Reserved", "Reserved",
279 "Flush", "Fence"
280 };
281 #endif /* CONFIG_VERBOSE_MCHECK */
283 #define TITAN__PCHIP_AGPERROR__LOST (1UL << 0)
284 #define TITAN__PCHIP_AGPERROR__LPQFULL (1UL << 1)
285 #define TITAN__PCHIP_AGPERROR__HPQFULL (1UL << 2)
286 #define TITAN__PCHIP_AGPERROR__RESCMD (1UL << 3)
287 #define TITAN__PCHIP_AGPERROR__IPTE (1UL << 4)
288 #define TITAN__PCHIP_AGPERROR__PTP (1UL << 5)
289 #define TITAN__PCHIP_AGPERROR__NOWINDOW (1UL << 6)
290 #define TITAN__PCHIP_AGPERROR__ERRMASK (TITAN__PCHIP_AGPERROR__LOST | \
291 TITAN__PCHIP_AGPERROR__LPQFULL | \
292 TITAN__PCHIP_AGPERROR__HPQFULL | \
293 TITAN__PCHIP_AGPERROR__RESCMD | \
294 TITAN__PCHIP_AGPERROR__IPTE | \
295 TITAN__PCHIP_AGPERROR__PTP | \
296 TITAN__PCHIP_AGPERROR__NOWINDOW)
297 #define TITAN__PCHIP_AGPERROR__DAC (1UL << 48)
298 #define TITAN__PCHIP_AGPERROR__MWIN (1UL << 49)
299 #define TITAN__PCHIP_AGPERROR__FENCE (1UL << 59)
300 #define TITAN__PCHIP_AGPERROR__CMD__S (50)
301 #define TITAN__PCHIP_AGPERROR__CMD__M (0x07)
302 #define TITAN__PCHIP_AGPERROR__ADDR__S (15)
303 #define TITAN__PCHIP_AGPERROR__ADDR__M (0xffffffffUL)
304 #define TITAN__PCHIP_AGPERROR__LEN__S (53)
305 #define TITAN__PCHIP_AGPERROR__LEN__M (0x3f)
307 if (!(agperror & TITAN__PCHIP_AGPERROR__ERRMASK))
308 return MCHK_DISPOSITION_UNKNOWN_ERROR;
310 #ifdef CONFIG_VERBOSE_MCHECK
311 if (!print)
312 return status;
314 cmd = EXTRACT(agperror, TITAN__PCHIP_AGPERROR__CMD);
315 addr = EXTRACT(agperror, TITAN__PCHIP_AGPERROR__ADDR) << 3;
316 len = EXTRACT(agperror, TITAN__PCHIP_AGPERROR__LEN);
318 printk("%s PChip %d AGPERROR: %016lx\n", err_print_prefix,
319 which, agperror);
320 if (agperror & TITAN__PCHIP_AGPERROR__NOWINDOW)
321 printk("%s No Window\n", err_print_prefix);
322 if (agperror & TITAN__PCHIP_AGPERROR__PTP)
323 printk("%s Peer-to-Peer set\n", err_print_prefix);
324 if (agperror & TITAN__PCHIP_AGPERROR__IPTE)
325 printk("%s Invalid PTE\n", err_print_prefix);
326 if (agperror & TITAN__PCHIP_AGPERROR__RESCMD)
327 printk("%s Reserved Command\n", err_print_prefix);
328 if (agperror & TITAN__PCHIP_AGPERROR__HPQFULL)
329 printk("%s HP Transaction Received while Queue Full\n",
330 err_print_prefix);
331 if (agperror & TITAN__PCHIP_AGPERROR__LPQFULL)
332 printk("%s LP Transaction Received while Queue Full\n",
333 err_print_prefix);
334 if (agperror & TITAN__PCHIP_AGPERROR__LOST)
335 printk("%s Lost Error\n", err_print_prefix);
336 printk("%s Command: 0x%x - %s, %d Quadwords%s\n"
337 " Address: 0x%lx\n",
338 err_print_prefix, cmd, agperror_cmd[cmd], len,
339 (agperror & TITAN__PCHIP_AGPERROR__FENCE) ? ", FENCE" : "",
340 addr);
341 if (agperror & TITAN__PCHIP_AGPERROR__DAC)
342 printk("%s Dual Address Cycle\n", err_print_prefix);
343 if (agperror & TITAN__PCHIP_AGPERROR__MWIN)
344 printk("%s Hit in Monster Window\n", err_print_prefix);
345 #endif /* CONFIG_VERBOSE_MCHECK */
347 return status;
348 }
350 static int
351 titan_parse_p_chip(int which, u64 serror, u64 gperror,
352 u64 aperror, u64 agperror, int print)
353 {
354 int status = MCHK_DISPOSITION_UNKNOWN_ERROR;
355 status |= titan_parse_p_serror(which, serror, print);
356 status |= titan_parse_p_perror(which, 0, gperror, print);
357 status |= titan_parse_p_perror(which, 1, aperror, print);
358 status |= titan_parse_p_agperror(which, agperror, print);
359 return status;
360 }
362 int
363 titan_process_logout_frame(struct el_common *mchk_header, int print)
364 {
365 struct el_TITAN_sysdata_mcheck *tmchk =
366 (struct el_TITAN_sysdata_mcheck *)
367 ((unsigned long)mchk_header + mchk_header->sys_offset);
368 int status = MCHK_DISPOSITION_UNKNOWN_ERROR;
370 status |= titan_parse_c_misc(tmchk->c_misc, print);
371 status |= titan_parse_p_chip(0, tmchk->p0_serror, tmchk->p0_gperror,
372 tmchk->p0_aperror, tmchk->p0_agperror,
373 print);
374 status |= titan_parse_p_chip(1, tmchk->p1_serror, tmchk->p1_gperror,
375 tmchk->p1_aperror, tmchk->p1_agperror,
376 print);
378 return status;
379 }
381 void
382 titan_machine_check(u64 vector, u64 la_ptr, struct pt_regs *regs)
383 {
384 struct el_common *mchk_header = (struct el_common *)la_ptr;
385 struct el_TITAN_sysdata_mcheck *tmchk =
386 (struct el_TITAN_sysdata_mcheck *)
387 ((unsigned long)mchk_header + mchk_header->sys_offset);
388 u64 irqmask;
390 /*
391 * Mask of Titan interrupt sources which are reported as machine checks
392 *
393 * 63 - CChip Error
394 * 62 - PChip 0 H_Error
395 * 61 - PChip 1 H_Error
396 * 60 - PChip 0 C_Error
397 * 59 - PChip 1 C_Error
398 */
399 #define TITAN_MCHECK_INTERRUPT_MASK 0xF800000000000000UL
401 /*
402 * Sync the processor
403 */
404 mb();
405 draina();
407 /*
408 * Only handle system errors here
409 */
410 if ((vector != SCB_Q_SYSMCHK) && (vector != SCB_Q_SYSERR)) {
411 ev6_machine_check(vector, la_ptr, regs);
412 return;
413 }
415 /*
416 * It's a system error, handle it here
417 *
418 * The PALcode has already cleared the error, so just parse it
419 */
421 /*
422 * Parse the logout frame without printing first. If the only error(s)
423 * found are classified as "dismissable", then just dismiss them and
424 * don't print any message
425 */
426 if (titan_process_logout_frame(mchk_header, 0) !=
427 MCHK_DISPOSITION_DISMISS) {
428 char *saved_err_prefix = err_print_prefix;
429 err_print_prefix = KERN_CRIT;
431 /*
432 * Either a nondismissable error was detected or no
433 * recognized error was detected in the logout frame
434 * -- report the error in either case
435 */
436 printk("%s"
437 "*System %s Error (Vector 0x%x) reported on CPU %d:\n",
438 err_print_prefix,
439 (vector == SCB_Q_SYSERR)?"Correctable":"Uncorrectable",
440 (unsigned int)vector, (int)smp_processor_id());
442 #ifdef CONFIG_VERBOSE_MCHECK
443 titan_process_logout_frame(mchk_header, alpha_verbose_mcheck);
444 if (alpha_verbose_mcheck)
445 dik_show_regs(regs, NULL);
446 #endif /* CONFIG_VERBOSE_MCHECK */
448 err_print_prefix = saved_err_prefix;
450 /*
451 * Convert any pending interrupts which report as system
452 * machine checks to interrupts
453 */
454 irqmask = tmchk->c_dirx & TITAN_MCHECK_INTERRUPT_MASK;
455 titan_dispatch_irqs(irqmask, regs);
456 }
459 /*
460 * Release the logout frame
461 */
462 wrmces(0x7);
463 mb();
464 }
466 /*
467 * Subpacket Annotations
468 */
469 static char *el_titan_pchip0_extended_annotation[] = {
470 "Subpacket Header", "P0_SCTL", "P0_SERREN",
471 "P0_APCTL", "P0_APERREN", "P0_AGPERREN",
472 "P0_ASPRST", "P0_AWSBA0", "P0_AWSBA1",
473 "P0_AWSBA2", "P0_AWSBA3", "P0_AWSM0",
474 "P0_AWSM1", "P0_AWSM2", "P0_AWSM3",
475 "P0_ATBA0", "P0_ATBA1", "P0_ATBA2",
476 "P0_ATBA3", "P0_GPCTL", "P0_GPERREN",
477 "P0_GSPRST", "P0_GWSBA0", "P0_GWSBA1",
478 "P0_GWSBA2", "P0_GWSBA3", "P0_GWSM0",
479 "P0_GWSM1", "P0_GWSM2", "P0_GWSM3",
480 "P0_GTBA0", "P0_GTBA1", "P0_GTBA2",
481 "P0_GTBA3", NULL
482 };
483 static char *el_titan_pchip1_extended_annotation[] = {
484 "Subpacket Header", "P1_SCTL", "P1_SERREN",
485 "P1_APCTL", "P1_APERREN", "P1_AGPERREN",
486 "P1_ASPRST", "P1_AWSBA0", "P1_AWSBA1",
487 "P1_AWSBA2", "P1_AWSBA3", "P1_AWSM0",
488 "P1_AWSM1", "P1_AWSM2", "P1_AWSM3",
489 "P1_ATBA0", "P1_ATBA1", "P1_ATBA2",
490 "P1_ATBA3", "P1_GPCTL", "P1_GPERREN",
491 "P1_GSPRST", "P1_GWSBA0", "P1_GWSBA1",
492 "P1_GWSBA2", "P1_GWSBA3", "P1_GWSM0",
493 "P1_GWSM1", "P1_GWSM2", "P1_GWSM3",
494 "P1_GTBA0", "P1_GTBA1", "P1_GTBA2",
495 "P1_GTBA3", NULL
496 };
497 static char *el_titan_memory_extended_annotation[] = {
498 "Subpacket Header", "AAR0", "AAR1",
499 "AAR2", "AAR3", "P0_SCTL",
500 "P0_GPCTL", "P0_APCTL", "P1_SCTL",
501 "P1_GPCTL", "P1_SCTL", NULL
502 };
504 static struct el_subpacket_annotation el_titan_annotations[] = {
505 SUBPACKET_ANNOTATION(EL_CLASS__REGATTA_FAMILY,
506 EL_TYPE__REGATTA__TITAN_PCHIP0_EXTENDED,
507 1,
508 "Titan PChip 0 Extended Frame",
509 el_titan_pchip0_extended_annotation),
510 SUBPACKET_ANNOTATION(EL_CLASS__REGATTA_FAMILY,
511 EL_TYPE__REGATTA__TITAN_PCHIP1_EXTENDED,
512 1,
513 "Titan PChip 1 Extended Frame",
514 el_titan_pchip1_extended_annotation),
515 SUBPACKET_ANNOTATION(EL_CLASS__REGATTA_FAMILY,
516 EL_TYPE__REGATTA__TITAN_MEMORY_EXTENDED,
517 1,
518 "Titan Memory Extended Frame",
519 el_titan_memory_extended_annotation),
520 SUBPACKET_ANNOTATION(EL_CLASS__REGATTA_FAMILY,
521 EL_TYPE__TERMINATION__TERMINATION,
522 1,
523 "Termination Subpacket",
524 NULL)
525 };
527 static struct el_subpacket *
528 el_process_regatta_subpacket(struct el_subpacket *header)
529 {
530 int status;
532 if (header->class != EL_CLASS__REGATTA_FAMILY) {
533 printk("%s ** Unexpected header CLASS %d TYPE %d, aborting\n",
534 err_print_prefix,
535 header->class, header->type);
536 return NULL;
537 }
539 switch(header->type) {
540 case EL_TYPE__REGATTA__PROCESSOR_ERROR_FRAME:
541 case EL_TYPE__REGATTA__SYSTEM_ERROR_FRAME:
542 case EL_TYPE__REGATTA__ENVIRONMENTAL_FRAME:
543 case EL_TYPE__REGATTA__PROCESSOR_DBL_ERROR_HALT:
544 case EL_TYPE__REGATTA__SYSTEM_DBL_ERROR_HALT:
545 printk("%s ** Occurred on CPU %d:\n",
546 err_print_prefix,
547 (int)header->by_type.regatta_frame.cpuid);
548 status = privateer_process_logout_frame((struct el_common *)
549 header->by_type.regatta_frame.data_start, 1);
550 break;
551 default:
552 printk("%s ** REGATTA TYPE %d SUBPACKET\n",
553 err_print_prefix, header->type);
554 el_annotate_subpacket(header);
555 break;
556 }
559 return (struct el_subpacket *)((unsigned long)header + header->length);
560 }
562 static struct el_subpacket_handler titan_subpacket_handler =
563 SUBPACKET_HANDLER_INIT(EL_CLASS__REGATTA_FAMILY,
564 el_process_regatta_subpacket);
566 void
567 titan_register_error_handlers(void)
568 {
569 size_t i;
571 for (i = 0; i < ARRAY_SIZE (el_titan_annotations); i++)
572 cdl_register_subpacket_annotation(&el_titan_annotations[i]);
574 cdl_register_subpacket_handler(&titan_subpacket_handler);
576 ev6_register_error_handlers();
577 }
580 /*
581 * Privateer
582 */
584 static int
585 privateer_process_680_frame(struct el_common *mchk_header, int print)
586 {
587 int status = MCHK_DISPOSITION_UNKNOWN_ERROR;
588 #ifdef CONFIG_VERBOSE_MCHECK
589 struct el_PRIVATEER_envdata_mcheck *emchk =
590 (struct el_PRIVATEER_envdata_mcheck *)
591 ((unsigned long)mchk_header + mchk_header->sys_offset);
593 /* TODO - catagorize errors, for now, no error */
595 if (!print)
596 return status;
598 /* TODO - decode instead of just dumping... */
599 printk("%s Summary Flags: %016lx\n"
600 " CChip DIRx: %016lx\n"
601 " System Management IR: %016lx\n"
602 " CPU IR: %016lx\n"
603 " Power Supply IR: %016lx\n"
604 " LM78 Fault Status: %016lx\n"
605 " System Doors: %016lx\n"
606 " Temperature Warning: %016lx\n"
607 " Fan Control: %016lx\n"
608 " Fatal Power Down Code: %016lx\n",
609 err_print_prefix,
610 emchk->summary,
611 emchk->c_dirx,
612 emchk->smir,
613 emchk->cpuir,
614 emchk->psir,
615 emchk->fault,
616 emchk->sys_doors,
617 emchk->temp_warn,
618 emchk->fan_ctrl,
619 emchk->code);
620 #endif /* CONFIG_VERBOSE_MCHECK */
622 return status;
623 }
625 int
626 privateer_process_logout_frame(struct el_common *mchk_header, int print)
627 {
628 struct el_common_EV6_mcheck *ev6mchk =
629 (struct el_common_EV6_mcheck *)mchk_header;
630 int status = MCHK_DISPOSITION_UNKNOWN_ERROR;
632 /*
633 * Machine check codes
634 */
635 #define PRIVATEER_MCHK__CORR_ECC 0x86 /* 630 */
636 #define PRIVATEER_MCHK__DC_TAG_PERR 0x9E /* 630 */
637 #define PRIVATEER_MCHK__PAL_BUGCHECK 0x8E /* 670 */
638 #define PRIVATEER_MCHK__OS_BUGCHECK 0x90 /* 670 */
639 #define PRIVATEER_MCHK__PROC_HRD_ERR 0x98 /* 670 */
640 #define PRIVATEER_MCHK__ISTREAM_CMOV_PRX 0xA0 /* 670 */
641 #define PRIVATEER_MCHK__ISTREAM_CMOV_FLT 0xA2 /* 670 */
642 #define PRIVATEER_MCHK__SYS_HRD_ERR 0x202 /* 660 */
643 #define PRIVATEER_MCHK__SYS_CORR_ERR 0x204 /* 620 */
644 #define PRIVATEER_MCHK__SYS_ENVIRON 0x206 /* 680 */
646 switch(ev6mchk->MCHK_Code) {
647 /*
648 * Vector 630 - Processor, Correctable
649 */
650 case PRIVATEER_MCHK__CORR_ECC:
651 case PRIVATEER_MCHK__DC_TAG_PERR:
652 /*
653 * Fall through to vector 670 for processing...
654 */
655 /*
656 * Vector 670 - Processor, Uncorrectable
657 */
658 case PRIVATEER_MCHK__PAL_BUGCHECK:
659 case PRIVATEER_MCHK__OS_BUGCHECK:
660 case PRIVATEER_MCHK__PROC_HRD_ERR:
661 case PRIVATEER_MCHK__ISTREAM_CMOV_PRX:
662 case PRIVATEER_MCHK__ISTREAM_CMOV_FLT:
663 status |= ev6_process_logout_frame(mchk_header, print);
664 break;
666 /*
667 * Vector 620 - System, Correctable
668 */
669 case PRIVATEER_MCHK__SYS_CORR_ERR:
670 /*
671 * Fall through to vector 660 for processing...
672 */
673 /*
674 * Vector 660 - System, Uncorrectable
675 */
676 case PRIVATEER_MCHK__SYS_HRD_ERR:
677 status |= titan_process_logout_frame(mchk_header, print);
678 break;
680 /*
681 * Vector 680 - System, Environmental
682 */
683 case PRIVATEER_MCHK__SYS_ENVIRON: /* System, Environmental */
684 status |= privateer_process_680_frame(mchk_header, print);
685 break;
687 /*
688 * Unknown
689 */
690 default:
691 status |= MCHK_DISPOSITION_REPORT;
692 if (print) {
693 printk("%s** Unknown Error, frame follows\n",
694 err_print_prefix);
695 mchk_dump_logout_frame(mchk_header);
696 }
698 }
700 return status;
701 }
703 void
704 privateer_machine_check(u64 vector, u64 la_ptr, struct pt_regs *regs)
705 {
706 struct el_common *mchk_header = (struct el_common *)la_ptr;
707 struct el_TITAN_sysdata_mcheck *tmchk =
708 (struct el_TITAN_sysdata_mcheck *)
709 (la_ptr + mchk_header->sys_offset);
710 u64 irqmask;
711 char *saved_err_prefix = err_print_prefix;
713 #define PRIVATEER_680_INTERRUPT_MASK (0xE00UL)
714 #define PRIVATEER_HOTPLUG_INTERRUPT_MASK (0xE00UL)
716 /*
717 * Sync the processor.
718 */
719 mb();
720 draina();
722 /*
723 * Only handle system events here.
724 */
725 if (vector != SCB_Q_SYSEVENT)
726 return titan_machine_check(vector, la_ptr, regs);
728 /*
729 * Report the event - System Events should be reported even if no
730 * error is indicated since the event could indicate the return
731 * to normal status.
732 */
733 err_print_prefix = KERN_CRIT;
734 printk("%s*System Event (Vector 0x%x) reported on CPU %d:\n",
735 err_print_prefix,
736 (unsigned int)vector, (int)smp_processor_id());
737 privateer_process_680_frame(mchk_header, 1);
738 err_print_prefix = saved_err_prefix;
740 /*
741 * Convert any pending interrupts which report as 680 machine
742 * checks to interrupts.
743 */
744 irqmask = tmchk->c_dirx & PRIVATEER_680_INTERRUPT_MASK;
746 /*
747 * Dispatch the interrupt(s).
748 */
749 titan_dispatch_irqs(irqmask, regs);
751 /*
752 * Release the logout frame.
753 */
754 wrmces(0x7);
755 mb();
756 }