ia64/linux-2.6.18-xen.hg

view init/initramfs.c @ 897:329ea0ccb344

balloon: try harder to balloon up under memory pressure.

Currently if the balloon driver is unable to increase the guest's
reservation it assumes the failure was due to reaching its full
allocation, gives up on the ballooning operation and records the limit
it reached as the "hard limit". The driver will not try again until
the target is set again (even to the same value).

However it is possible that ballooning has in fact failed due to
memory pressure in the host and therefore it is desirable to keep
attempting to reach the target in case memory becomes available. The
most likely scenario is that some guests are ballooning down while
others are ballooning up and therefore there is temporary memory
pressure while things stabilise. You would not expect a well behaved
toolstack to ask a domain to balloon to more than its allocation nor
would you expect it to deliberately over-commit memory by setting
balloon targets which exceed the total host memory.

This patch drops the concept of a hard limit and causes the balloon
driver to retry increasing the reservation on a timer in the same
manner as when decreasing the reservation.

Also if we partially succeed in increasing the reservation
(i.e. receive less pages than we asked for) then we may as well keep
those pages rather than returning them to Xen.

Signed-off-by: Ian Campbell <ian.campbell@citrix.com>
author Keir Fraser <keir.fraser@citrix.com>
date Fri Jun 05 14:01:20 2009 +0100 (2009-06-05)
parents 831230e53067
children
line source
1 #include <linux/init.h>
2 #include <linux/fs.h>
3 #include <linux/slab.h>
4 #include <linux/types.h>
5 #include <linux/fcntl.h>
6 #include <linux/delay.h>
7 #include <linux/string.h>
8 #include <linux/syscalls.h>
10 static __initdata char *message;
11 static void __init error(char *x)
12 {
13 if (!message)
14 message = x;
15 }
17 static void __init *malloc(size_t size)
18 {
19 return kmalloc(size, GFP_KERNEL);
20 }
22 static void __init free(void *where)
23 {
24 kfree(where);
25 }
27 /* link hash */
29 #define N_ALIGN(len) ((((len) + 1) & ~3) + 2)
31 static __initdata struct hash {
32 int ino, minor, major;
33 mode_t mode;
34 struct hash *next;
35 char name[N_ALIGN(PATH_MAX)];
36 } *head[32];
38 static inline int hash(int major, int minor, int ino)
39 {
40 unsigned long tmp = ino + minor + (major << 3);
41 tmp += tmp >> 5;
42 return tmp & 31;
43 }
45 static char __init *find_link(int major, int minor, int ino,
46 mode_t mode, char *name)
47 {
48 struct hash **p, *q;
49 for (p = head + hash(major, minor, ino); *p; p = &(*p)->next) {
50 if ((*p)->ino != ino)
51 continue;
52 if ((*p)->minor != minor)
53 continue;
54 if ((*p)->major != major)
55 continue;
56 if (((*p)->mode ^ mode) & S_IFMT)
57 continue;
58 return (*p)->name;
59 }
60 q = (struct hash *)malloc(sizeof(struct hash));
61 if (!q)
62 panic("can't allocate link hash entry");
63 q->major = major;
64 q->minor = minor;
65 q->ino = ino;
66 q->mode = mode;
67 strcpy(q->name, name);
68 q->next = NULL;
69 *p = q;
70 return NULL;
71 }
73 static void __init free_hash(void)
74 {
75 struct hash **p, *q;
76 for (p = head; p < head + 32; p++) {
77 while (*p) {
78 q = *p;
79 *p = q->next;
80 free(q);
81 }
82 }
83 }
85 /* cpio header parsing */
87 static __initdata unsigned long ino, major, minor, nlink;
88 static __initdata mode_t mode;
89 static __initdata unsigned long body_len, name_len;
90 static __initdata uid_t uid;
91 static __initdata gid_t gid;
92 static __initdata unsigned rdev;
94 static void __init parse_header(char *s)
95 {
96 unsigned long parsed[12];
97 char buf[9];
98 int i;
100 buf[8] = '\0';
101 for (i = 0, s += 6; i < 12; i++, s += 8) {
102 memcpy(buf, s, 8);
103 parsed[i] = simple_strtoul(buf, NULL, 16);
104 }
105 ino = parsed[0];
106 mode = parsed[1];
107 uid = parsed[2];
108 gid = parsed[3];
109 nlink = parsed[4];
110 body_len = parsed[6];
111 major = parsed[7];
112 minor = parsed[8];
113 rdev = new_encode_dev(MKDEV(parsed[9], parsed[10]));
114 name_len = parsed[11];
115 }
117 /* FSM */
119 static __initdata enum state {
120 Start,
121 Collect,
122 GotHeader,
123 SkipIt,
124 GotName,
125 CopyFile,
126 GotSymlink,
127 Reset
128 } state, next_state;
130 static __initdata char *victim;
131 static __initdata unsigned count;
132 static __initdata loff_t this_header, next_header;
134 static __initdata int dry_run;
136 static inline void eat(unsigned n)
137 {
138 victim += n;
139 this_header += n;
140 count -= n;
141 }
143 static __initdata char *collected;
144 static __initdata int remains;
145 static __initdata char *collect;
147 static void __init read_into(char *buf, unsigned size, enum state next)
148 {
149 if (count >= size) {
150 collected = victim;
151 eat(size);
152 state = next;
153 } else {
154 collect = collected = buf;
155 remains = size;
156 next_state = next;
157 state = Collect;
158 }
159 }
161 static __initdata char *header_buf, *symlink_buf, *name_buf;
163 static int __init do_start(void)
164 {
165 read_into(header_buf, 110, GotHeader);
166 return 0;
167 }
169 static int __init do_collect(void)
170 {
171 unsigned n = remains;
172 if (count < n)
173 n = count;
174 memcpy(collect, victim, n);
175 eat(n);
176 collect += n;
177 if ((remains -= n) != 0)
178 return 1;
179 state = next_state;
180 return 0;
181 }
183 static int __init do_header(void)
184 {
185 if (memcmp(collected, "070701", 6)) {
186 error("no cpio magic");
187 return 1;
188 }
189 parse_header(collected);
190 next_header = this_header + N_ALIGN(name_len) + body_len;
191 next_header = (next_header + 3) & ~3;
192 if (dry_run) {
193 read_into(name_buf, N_ALIGN(name_len), GotName);
194 return 0;
195 }
196 state = SkipIt;
197 if (name_len <= 0 || name_len > PATH_MAX)
198 return 0;
199 if (S_ISLNK(mode)) {
200 if (body_len > PATH_MAX)
201 return 0;
202 collect = collected = symlink_buf;
203 remains = N_ALIGN(name_len) + body_len;
204 next_state = GotSymlink;
205 state = Collect;
206 return 0;
207 }
208 if (S_ISREG(mode) || !body_len)
209 read_into(name_buf, N_ALIGN(name_len), GotName);
210 return 0;
211 }
213 static int __init do_skip(void)
214 {
215 if (this_header + count < next_header) {
216 eat(count);
217 return 1;
218 } else {
219 eat(next_header - this_header);
220 state = next_state;
221 return 0;
222 }
223 }
225 static int __init do_reset(void)
226 {
227 while(count && *victim == '\0')
228 eat(1);
229 if (count && (this_header & 3))
230 error("broken padding");
231 return 1;
232 }
234 static int __init maybe_link(void)
235 {
236 if (nlink >= 2) {
237 char *old = find_link(major, minor, ino, mode, collected);
238 if (old)
239 return (sys_link(old, collected) < 0) ? -1 : 1;
240 }
241 return 0;
242 }
244 static void __init clean_path(char *path, mode_t mode)
245 {
246 struct stat st;
248 if (!sys_newlstat(path, &st) && (st.st_mode^mode) & S_IFMT) {
249 if (S_ISDIR(st.st_mode))
250 sys_rmdir(path);
251 else
252 sys_unlink(path);
253 }
254 }
256 static __initdata int wfd;
258 static int __init do_name(void)
259 {
260 state = SkipIt;
261 next_state = Reset;
262 if (strcmp(collected, "TRAILER!!!") == 0) {
263 free_hash();
264 return 0;
265 }
266 if (dry_run)
267 return 0;
268 clean_path(collected, mode);
269 if (S_ISREG(mode)) {
270 int ml = maybe_link();
271 if (ml >= 0) {
272 int openflags = O_WRONLY|O_CREAT;
273 if (ml != 1)
274 openflags |= O_TRUNC;
275 wfd = sys_open(collected, openflags, mode);
277 if (wfd >= 0) {
278 sys_fchown(wfd, uid, gid);
279 sys_fchmod(wfd, mode);
280 state = CopyFile;
281 }
282 }
283 } else if (S_ISDIR(mode)) {
284 sys_mkdir(collected, mode);
285 sys_chown(collected, uid, gid);
286 sys_chmod(collected, mode);
287 } else if (S_ISBLK(mode) || S_ISCHR(mode) ||
288 S_ISFIFO(mode) || S_ISSOCK(mode)) {
289 if (maybe_link() == 0) {
290 sys_mknod(collected, mode, rdev);
291 sys_chown(collected, uid, gid);
292 sys_chmod(collected, mode);
293 }
294 }
295 return 0;
296 }
298 static int __init do_copy(void)
299 {
300 if (count >= body_len) {
301 sys_write(wfd, victim, body_len);
302 sys_close(wfd);
303 eat(body_len);
304 state = SkipIt;
305 return 0;
306 } else {
307 sys_write(wfd, victim, count);
308 body_len -= count;
309 eat(count);
310 return 1;
311 }
312 }
314 static int __init do_symlink(void)
315 {
316 collected[N_ALIGN(name_len) + body_len] = '\0';
317 clean_path(collected, 0);
318 sys_symlink(collected + N_ALIGN(name_len), collected);
319 sys_lchown(collected, uid, gid);
320 state = SkipIt;
321 next_state = Reset;
322 return 0;
323 }
325 static __initdata int (*actions[])(void) = {
326 [Start] = do_start,
327 [Collect] = do_collect,
328 [GotHeader] = do_header,
329 [SkipIt] = do_skip,
330 [GotName] = do_name,
331 [CopyFile] = do_copy,
332 [GotSymlink] = do_symlink,
333 [Reset] = do_reset,
334 };
336 static int __init write_buffer(char *buf, unsigned len)
337 {
338 count = len;
339 victim = buf;
341 while (!actions[state]())
342 ;
343 return len - count;
344 }
346 static void __init flush_buffer(char *buf, unsigned len)
347 {
348 int written;
349 if (message)
350 return;
351 while ((written = write_buffer(buf, len)) < len && !message) {
352 char c = buf[written];
353 if (c == '0') {
354 buf += written;
355 len -= written;
356 state = Start;
357 } else if (c == 0) {
358 buf += written;
359 len -= written;
360 state = Reset;
361 } else
362 error("junk in compressed archive");
363 }
364 }
366 /*
367 * gzip declarations
368 */
370 #define OF(args) args
372 #ifndef memzero
373 #define memzero(s, n) memset ((s), 0, (n))
374 #endif
376 typedef unsigned char uch;
377 typedef unsigned short ush;
378 typedef unsigned long ulg;
380 #define WSIZE 0x8000 /* window size--must be a power of two, and */
381 /* at least 32K for zip's deflate method */
383 static uch *inbuf;
384 static uch *window;
386 static unsigned insize; /* valid bytes in inbuf */
387 static unsigned inptr; /* index of next byte to be processed in inbuf */
388 static unsigned outcnt; /* bytes in output buffer */
389 static long bytes_out;
391 #define get_byte() (inptr < insize ? inbuf[inptr++] : -1)
393 /* Diagnostic functions (stubbed out) */
394 #define Assert(cond,msg)
395 #define Trace(x)
396 #define Tracev(x)
397 #define Tracevv(x)
398 #define Tracec(c,x)
399 #define Tracecv(c,x)
401 #define STATIC static
402 #define INIT __init
404 static void __init flush_window(void);
405 static void __init error(char *m);
406 static void __init gzip_mark(void **);
407 static void __init gzip_release(void **);
409 #include "../lib/inflate.c"
411 static void __init gzip_mark(void **ptr)
412 {
413 }
415 static void __init gzip_release(void **ptr)
416 {
417 }
419 /* ===========================================================================
420 * Write the output window window[0..outcnt-1] and update crc and bytes_out.
421 * (Used for the decompressed data only.)
422 */
423 static void __init flush_window(void)
424 {
425 ulg c = crc; /* temporary variable */
426 unsigned n;
427 uch *in, ch;
429 flush_buffer(window, outcnt);
430 in = window;
431 for (n = 0; n < outcnt; n++) {
432 ch = *in++;
433 c = crc_32_tab[((int)c ^ ch) & 0xff] ^ (c >> 8);
434 }
435 crc = c;
436 bytes_out += (ulg)outcnt;
437 outcnt = 0;
438 }
440 static char * __init unpack_to_rootfs(char *buf, unsigned len, int check_only)
441 {
442 int written;
443 dry_run = check_only;
444 header_buf = malloc(110);
445 symlink_buf = malloc(PATH_MAX + N_ALIGN(PATH_MAX) + 1);
446 name_buf = malloc(N_ALIGN(PATH_MAX));
447 window = malloc(WSIZE);
448 if (!window || !header_buf || !symlink_buf || !name_buf)
449 panic("can't allocate buffers");
450 state = Start;
451 this_header = 0;
452 message = NULL;
453 while (!message && len) {
454 loff_t saved_offset = this_header;
455 if (*buf == '0' && !(this_header & 3)) {
456 state = Start;
457 written = write_buffer(buf, len);
458 buf += written;
459 len -= written;
460 continue;
461 }
462 if (!*buf) {
463 buf++;
464 len--;
465 this_header++;
466 continue;
467 }
468 this_header = 0;
469 insize = len;
470 inbuf = buf;
471 inptr = 0;
472 outcnt = 0; /* bytes in output buffer */
473 bytes_out = 0;
474 crc = (ulg)0xffffffffL; /* shift register contents */
475 makecrc();
476 gunzip();
477 if (state != Reset)
478 error("junk in gzipped archive");
479 this_header = saved_offset + inptr;
480 buf += inptr;
481 len -= inptr;
482 }
483 free(window);
484 free(name_buf);
485 free(symlink_buf);
486 free(header_buf);
487 return message;
488 }
490 extern char __initramfs_start[], __initramfs_end[];
491 #ifdef CONFIG_BLK_DEV_INITRD
492 #include <linux/initrd.h>
493 #include <linux/kexec.h>
495 static void __init free_initrd(void)
496 {
497 #ifdef CONFIG_KEXEC
498 unsigned long crashk_start = (unsigned long)__va(crashk_res.start);
499 unsigned long crashk_end = (unsigned long)__va(crashk_res.end);
501 /*
502 * If the initrd region is overlapped with crashkernel reserved region,
503 * free only memory that is not part of crashkernel region.
504 */
505 if (initrd_start < crashk_end && initrd_end > crashk_start) {
506 /*
507 * Initialize initrd memory region since the kexec boot does
508 * not do.
509 */
510 memset((void *)initrd_start, 0, initrd_end - initrd_start);
511 if (initrd_start < crashk_start)
512 free_initrd_mem(initrd_start, crashk_start);
513 if (initrd_end > crashk_end)
514 free_initrd_mem(crashk_end, initrd_end);
515 } else
516 #endif
517 free_initrd_mem(initrd_start, initrd_end);
519 initrd_start = 0;
520 initrd_end = 0;
521 }
523 #endif
525 void __init populate_rootfs(void)
526 {
527 char *err = unpack_to_rootfs(__initramfs_start,
528 __initramfs_end - __initramfs_start, 0);
529 if (err)
530 panic(err);
531 #ifdef CONFIG_BLK_DEV_INITRD
532 if (initrd_start) {
533 #ifdef CONFIG_BLK_DEV_RAM
534 int fd;
535 printk(KERN_INFO "checking if image is initramfs...");
536 err = unpack_to_rootfs((char *)initrd_start,
537 initrd_end - initrd_start, 1);
538 if (!err) {
539 printk(" it is\n");
540 unpack_to_rootfs((char *)initrd_start,
541 initrd_end - initrd_start, 0);
542 free_initrd();
543 return;
544 }
545 printk("it isn't (%s); looks like an initrd\n", err);
546 fd = sys_open("/initrd.image", O_WRONLY|O_CREAT, 0700);
547 if (fd >= 0) {
548 sys_write(fd, (char *)initrd_start,
549 initrd_end - initrd_start);
550 sys_close(fd);
551 free_initrd();
552 }
553 #else
554 printk(KERN_INFO "Unpacking initramfs...");
555 err = unpack_to_rootfs((char *)initrd_start,
556 initrd_end - initrd_start, 0);
557 if (err)
558 panic(err);
559 printk(" done\n");
560 free_initrd();
561 #endif
562 }
563 #endif
564 }