ia64/linux-2.6.18-xen.hg

view arch/alpha/kernel/err_ev6.c @ 897:329ea0ccb344

balloon: try harder to balloon up under memory pressure.

Currently if the balloon driver is unable to increase the guest's
reservation it assumes the failure was due to reaching its full
allocation, gives up on the ballooning operation and records the limit
it reached as the "hard limit". The driver will not try again until
the target is set again (even to the same value).

However it is possible that ballooning has in fact failed due to
memory pressure in the host and therefore it is desirable to keep
attempting to reach the target in case memory becomes available. The
most likely scenario is that some guests are ballooning down while
others are ballooning up and therefore there is temporary memory
pressure while things stabilise. You would not expect a well behaved
toolstack to ask a domain to balloon to more than its allocation nor
would you expect it to deliberately over-commit memory by setting
balloon targets which exceed the total host memory.

This patch drops the concept of a hard limit and causes the balloon
driver to retry increasing the reservation on a timer in the same
manner as when decreasing the reservation.

Also if we partially succeed in increasing the reservation
(i.e. receive less pages than we asked for) then we may as well keep
those pages rather than returning them to Xen.

Signed-off-by: Ian Campbell <ian.campbell@citrix.com>
author Keir Fraser <keir.fraser@citrix.com>
date Fri Jun 05 14:01:20 2009 +0100 (2009-06-05)
parents 831230e53067
children
line source
1 /*
2 * linux/arch/alpha/kernel/err_ev6.c
3 *
4 * Copyright (C) 2000 Jeff Wiedemeier (Compaq Computer Corporation)
5 *
6 * Error handling code supporting Alpha systems
7 */
9 #include <linux/init.h>
10 #include <linux/pci.h>
11 #include <linux/sched.h>
13 #include <asm/io.h>
14 #include <asm/hwrpb.h>
15 #include <asm/smp.h>
16 #include <asm/err_common.h>
17 #include <asm/err_ev6.h>
19 #include "err_impl.h"
20 #include "proto.h"
22 static int
23 ev6_parse_ibox(u64 i_stat, int print)
24 {
25 int status = MCHK_DISPOSITION_REPORT;
27 #define EV6__I_STAT__PAR (1UL << 29)
28 #define EV6__I_STAT__ERRMASK (EV6__I_STAT__PAR)
30 if (!(i_stat & EV6__I_STAT__ERRMASK))
31 return MCHK_DISPOSITION_UNKNOWN_ERROR;
33 if (!print)
34 return status;
36 if (i_stat & EV6__I_STAT__PAR)
37 printk("%s Icache parity error\n", err_print_prefix);
39 return status;
40 }
42 static int
43 ev6_parse_mbox(u64 mm_stat, u64 d_stat, u64 c_stat, int print)
44 {
45 int status = MCHK_DISPOSITION_REPORT;
47 #define EV6__MM_STAT__DC_TAG_PERR (1UL << 10)
48 #define EV6__MM_STAT__ERRMASK (EV6__MM_STAT__DC_TAG_PERR)
49 #define EV6__D_STAT__TPERR_P0 (1UL << 0)
50 #define EV6__D_STAT__TPERR_P1 (1UL << 1)
51 #define EV6__D_STAT__ECC_ERR_ST (1UL << 2)
52 #define EV6__D_STAT__ECC_ERR_LD (1UL << 3)
53 #define EV6__D_STAT__SEO (1UL << 4)
54 #define EV6__D_STAT__ERRMASK (EV6__D_STAT__TPERR_P0 | \
55 EV6__D_STAT__TPERR_P1 | \
56 EV6__D_STAT__ECC_ERR_ST | \
57 EV6__D_STAT__ECC_ERR_LD | \
58 EV6__D_STAT__SEO)
60 if (!(d_stat & EV6__D_STAT__ERRMASK) &&
61 !(mm_stat & EV6__MM_STAT__ERRMASK))
62 return MCHK_DISPOSITION_UNKNOWN_ERROR;
64 if (!print)
65 return status;
67 if (mm_stat & EV6__MM_STAT__DC_TAG_PERR)
68 printk("%s Dcache tag parity error on probe\n",
69 err_print_prefix);
70 if (d_stat & EV6__D_STAT__TPERR_P0)
71 printk("%s Dcache tag parity error - pipe 0\n",
72 err_print_prefix);
73 if (d_stat & EV6__D_STAT__TPERR_P1)
74 printk("%s Dcache tag parity error - pipe 1\n",
75 err_print_prefix);
76 if (d_stat & EV6__D_STAT__ECC_ERR_ST)
77 printk("%s ECC error occurred on a store\n",
78 err_print_prefix);
79 if (d_stat & EV6__D_STAT__ECC_ERR_LD)
80 printk("%s ECC error occurred on a %s load\n",
81 err_print_prefix,
82 c_stat ? "" : "speculative ");
83 if (d_stat & EV6__D_STAT__SEO)
84 printk("%s Dcache second error\n", err_print_prefix);
86 return status;
87 }
89 static int
90 ev6_parse_cbox(u64 c_addr, u64 c1_syn, u64 c2_syn,
91 u64 c_stat, u64 c_sts, int print)
92 {
93 char *sourcename[] = { "UNKNOWN", "UNKNOWN", "UNKNOWN",
94 "MEMORY", "BCACHE", "DCACHE",
95 "BCACHE PROBE", "BCACHE PROBE" };
96 char *streamname[] = { "D", "I" };
97 char *bitsname[] = { "SINGLE", "DOUBLE" };
98 int status = MCHK_DISPOSITION_REPORT;
99 int source = -1, stream = -1, bits = -1;
101 #define EV6__C_STAT__BC_PERR (0x01)
102 #define EV6__C_STAT__DC_PERR (0x02)
103 #define EV6__C_STAT__DSTREAM_MEM_ERR (0x03)
104 #define EV6__C_STAT__DSTREAM_BC_ERR (0x04)
105 #define EV6__C_STAT__DSTREAM_DC_ERR (0x05)
106 #define EV6__C_STAT__PROBE_BC_ERR0 (0x06) /* both 6 and 7 indicate... */
107 #define EV6__C_STAT__PROBE_BC_ERR1 (0x07) /* ...probe bc error. */
108 #define EV6__C_STAT__ISTREAM_MEM_ERR (0x0B)
109 #define EV6__C_STAT__ISTREAM_BC_ERR (0x0C)
110 #define EV6__C_STAT__DSTREAM_MEM_DBL (0x13)
111 #define EV6__C_STAT__DSTREAM_BC_DBL (0x14)
112 #define EV6__C_STAT__ISTREAM_MEM_DBL (0x1B)
113 #define EV6__C_STAT__ISTREAM_BC_DBL (0x1C)
114 #define EV6__C_STAT__SOURCE_MEMORY (0x03)
115 #define EV6__C_STAT__SOURCE_BCACHE (0x04)
116 #define EV6__C_STAT__SOURCE__S (0)
117 #define EV6__C_STAT__SOURCE__M (0x07)
118 #define EV6__C_STAT__ISTREAM__S (3)
119 #define EV6__C_STAT__ISTREAM__M (0x01)
120 #define EV6__C_STAT__DOUBLE__S (4)
121 #define EV6__C_STAT__DOUBLE__M (0x01)
122 #define EV6__C_STAT__ERRMASK (0x1F)
123 #define EV6__C_STS__SHARED (1 << 0)
124 #define EV6__C_STS__DIRTY (1 << 1)
125 #define EV6__C_STS__VALID (1 << 2)
126 #define EV6__C_STS__PARITY (1 << 3)
128 if (!(c_stat & EV6__C_STAT__ERRMASK))
129 return MCHK_DISPOSITION_UNKNOWN_ERROR;
131 if (!print)
132 return status;
134 source = EXTRACT(c_stat, EV6__C_STAT__SOURCE);
135 stream = EXTRACT(c_stat, EV6__C_STAT__ISTREAM);
136 bits = EXTRACT(c_stat, EV6__C_STAT__DOUBLE);
138 if (c_stat & EV6__C_STAT__BC_PERR) {
139 printk("%s Bcache tag parity error\n", err_print_prefix);
140 source = -1;
141 }
143 if (c_stat & EV6__C_STAT__DC_PERR) {
144 printk("%s Dcache tag parity error\n", err_print_prefix);
145 source = -1;
146 }
148 if (c_stat == EV6__C_STAT__PROBE_BC_ERR0 ||
149 c_stat == EV6__C_STAT__PROBE_BC_ERR1) {
150 printk("%s Bcache single-bit error on a probe hit\n",
151 err_print_prefix);
152 source = -1;
153 }
155 if (source != -1)
156 printk("%s %s-STREAM %s-BIT ECC error from %s\n",
157 err_print_prefix,
158 streamname[stream], bitsname[bits], sourcename[source]);
160 printk("%s Address: 0x%016lx\n"
161 " Syndrome[upper.lower]: %02lx.%02lx\n",
162 err_print_prefix,
163 c_addr,
164 c2_syn, c1_syn);
166 if (source == EV6__C_STAT__SOURCE_MEMORY ||
167 source == EV6__C_STAT__SOURCE_BCACHE)
168 printk("%s Block status: %s%s%s%s\n",
169 err_print_prefix,
170 (c_sts & EV6__C_STS__SHARED) ? "SHARED " : "",
171 (c_sts & EV6__C_STS__DIRTY) ? "DIRTY " : "",
172 (c_sts & EV6__C_STS__VALID) ? "VALID " : "",
173 (c_sts & EV6__C_STS__PARITY) ? "PARITY " : "");
175 return status;
176 }
178 void
179 ev6_register_error_handlers(void)
180 {
181 /* None right now. */
182 }
184 int
185 ev6_process_logout_frame(struct el_common *mchk_header, int print)
186 {
187 struct el_common_EV6_mcheck *ev6mchk =
188 (struct el_common_EV6_mcheck *)mchk_header;
189 int status = MCHK_DISPOSITION_UNKNOWN_ERROR;
191 status |= ev6_parse_ibox(ev6mchk->I_STAT, print);
192 status |= ev6_parse_mbox(ev6mchk->MM_STAT, ev6mchk->DC_STAT,
193 ev6mchk->C_STAT, print);
194 status |= ev6_parse_cbox(ev6mchk->C_ADDR, ev6mchk->DC1_SYNDROME,
195 ev6mchk->DC0_SYNDROME, ev6mchk->C_STAT,
196 ev6mchk->C_STS, print);
198 if (!print)
199 return status;
201 if (status != MCHK_DISPOSITION_DISMISS) {
202 char *saved_err_prefix = err_print_prefix;
204 /*
205 * Dump some additional information from the frame
206 */
207 printk("%s EXC_ADDR: 0x%016lx IER_CM: 0x%016lx"
208 " ISUM: 0x%016lx\n"
209 " PAL_BASE: 0x%016lx I_CTL: 0x%016lx"
210 " PCTX: 0x%016lx\n",
211 err_print_prefix,
212 ev6mchk->EXC_ADDR, ev6mchk->IER_CM, ev6mchk->ISUM,
213 ev6mchk->PAL_BASE, ev6mchk->I_CTL, ev6mchk->PCTX);
215 if (status == MCHK_DISPOSITION_UNKNOWN_ERROR) {
216 printk("%s UNKNOWN error, frame follows:\n",
217 err_print_prefix);
218 } else {
219 /* had decode -- downgrade print level for frame */
220 err_print_prefix = KERN_NOTICE;
221 }
223 mchk_dump_logout_frame(mchk_header);
225 err_print_prefix = saved_err_prefix;
226 }
228 return status;
229 }
231 void
232 ev6_machine_check(u64 vector, u64 la_ptr, struct pt_regs *regs)
233 {
234 struct el_common *mchk_header = (struct el_common *)la_ptr;
236 /*
237 * Sync the processor
238 */
239 mb();
240 draina();
242 /*
243 * Parse the logout frame without printing first. If the only error(s)
244 * found are have a disposition of "dismiss", then just dismiss them
245 * and don't print any message
246 */
247 if (ev6_process_logout_frame(mchk_header, 0) !=
248 MCHK_DISPOSITION_DISMISS) {
249 char *saved_err_prefix = err_print_prefix;
250 err_print_prefix = KERN_CRIT;
252 /*
253 * Either a nondismissable error was detected or no
254 * recognized error was detected in the logout frame
255 * -- report the error in either case
256 */
257 printk("%s*CPU %s Error (Vector 0x%x) reported on CPU %d:\n",
258 err_print_prefix,
259 (vector == SCB_Q_PROCERR)?"Correctable":"Uncorrectable",
260 (unsigned int)vector, (int)smp_processor_id());
262 ev6_process_logout_frame(mchk_header, 1);
263 dik_show_regs(regs, NULL);
265 err_print_prefix = saved_err_prefix;
266 }
268 /*
269 * Release the logout frame
270 */
271 wrmces(0x7);
272 mb();
273 }