ia64/linux-2.6.18-xen.hg

view init/do_mounts.c @ 897:329ea0ccb344

balloon: try harder to balloon up under memory pressure.

Currently if the balloon driver is unable to increase the guest's
reservation it assumes the failure was due to reaching its full
allocation, gives up on the ballooning operation and records the limit
it reached as the "hard limit". The driver will not try again until
the target is set again (even to the same value).

However it is possible that ballooning has in fact failed due to
memory pressure in the host and therefore it is desirable to keep
attempting to reach the target in case memory becomes available. The
most likely scenario is that some guests are ballooning down while
others are ballooning up and therefore there is temporary memory
pressure while things stabilise. You would not expect a well behaved
toolstack to ask a domain to balloon to more than its allocation nor
would you expect it to deliberately over-commit memory by setting
balloon targets which exceed the total host memory.

This patch drops the concept of a hard limit and causes the balloon
driver to retry increasing the reservation on a timer in the same
manner as when decreasing the reservation.

Also if we partially succeed in increasing the reservation
(i.e. receive less pages than we asked for) then we may as well keep
those pages rather than returning them to Xen.

Signed-off-by: Ian Campbell <ian.campbell@citrix.com>
author Keir Fraser <keir.fraser@citrix.com>
date Fri Jun 05 14:01:20 2009 +0100 (2009-06-05)
parents 831230e53067
children
line source
1 #include <linux/module.h>
2 #include <linux/sched.h>
3 #include <linux/ctype.h>
4 #include <linux/fd.h>
5 #include <linux/tty.h>
6 #include <linux/suspend.h>
7 #include <linux/root_dev.h>
8 #include <linux/security.h>
9 #include <linux/delay.h>
10 #include <linux/mount.h>
12 #include <linux/nfs_fs.h>
13 #include <linux/nfs_fs_sb.h>
14 #include <linux/nfs_mount.h>
16 #include "do_mounts.h"
18 extern int get_filesystem_list(char * buf);
20 int __initdata rd_doload; /* 1 = load RAM disk, 0 = don't load */
22 int root_mountflags = MS_RDONLY | MS_SILENT;
23 char * __initdata root_device_name;
24 static char __initdata saved_root_name[64];
26 dev_t ROOT_DEV;
28 static int __init load_ramdisk(char *str)
29 {
30 rd_doload = simple_strtol(str,NULL,0) & 3;
31 return 1;
32 }
33 __setup("load_ramdisk=", load_ramdisk);
35 static int __init readonly(char *str)
36 {
37 if (*str)
38 return 0;
39 root_mountflags |= MS_RDONLY;
40 return 1;
41 }
43 static int __init readwrite(char *str)
44 {
45 if (*str)
46 return 0;
47 root_mountflags &= ~MS_RDONLY;
48 return 1;
49 }
51 __setup("ro", readonly);
52 __setup("rw", readwrite);
54 static dev_t try_name(char *name, int part)
55 {
56 char path[64];
57 char buf[32];
58 int range;
59 dev_t res;
60 char *s;
61 int len;
62 int fd;
63 unsigned int maj, min;
65 /* read device number from .../dev */
67 sprintf(path, "/sys/block/%s/dev", name);
68 fd = sys_open(path, 0, 0);
69 if (fd < 0)
70 goto fail;
71 len = sys_read(fd, buf, 32);
72 sys_close(fd);
73 if (len <= 0 || len == 32 || buf[len - 1] != '\n')
74 goto fail;
75 buf[len - 1] = '\0';
76 if (sscanf(buf, "%u:%u", &maj, &min) == 2) {
77 /*
78 * Try the %u:%u format -- see print_dev_t()
79 */
80 res = MKDEV(maj, min);
81 if (maj != MAJOR(res) || min != MINOR(res))
82 goto fail;
83 } else {
84 /*
85 * Nope. Try old-style "0321"
86 */
87 res = new_decode_dev(simple_strtoul(buf, &s, 16));
88 if (*s)
89 goto fail;
90 }
92 /* if it's there and we are not looking for a partition - that's it */
93 if (!part)
94 return res;
96 /* otherwise read range from .../range */
97 sprintf(path, "/sys/block/%s/range", name);
98 fd = sys_open(path, 0, 0);
99 if (fd < 0)
100 goto fail;
101 len = sys_read(fd, buf, 32);
102 sys_close(fd);
103 if (len <= 0 || len == 32 || buf[len - 1] != '\n')
104 goto fail;
105 buf[len - 1] = '\0';
106 range = simple_strtoul(buf, &s, 10);
107 if (*s)
108 goto fail;
110 /* if partition is within range - we got it */
111 if (part < range)
112 return res + part;
113 fail:
114 return 0;
115 }
117 /*
118 * Convert a name into device number. We accept the following variants:
119 *
120 * 1) device number in hexadecimal represents itself
121 * 2) /dev/nfs represents Root_NFS (0xff)
122 * 3) /dev/<disk_name> represents the device number of disk
123 * 4) /dev/<disk_name><decimal> represents the device number
124 * of partition - device number of disk plus the partition number
125 * 5) /dev/<disk_name>p<decimal> - same as the above, that form is
126 * used when disk name of partitioned disk ends on a digit.
127 *
128 * If name doesn't have fall into the categories above, we return 0.
129 * Sysfs is used to check if something is a disk name - it has
130 * all known disks under bus/block/devices. If the disk name
131 * contains slashes, name of sysfs node has them replaced with
132 * bangs. try_name() does the actual checks, assuming that sysfs
133 * is mounted on rootfs /sys.
134 */
136 dev_t name_to_dev_t(char *name)
137 {
138 char s[32];
139 char *p;
140 dev_t res = 0;
141 int part;
143 #ifdef CONFIG_SYSFS
144 int mkdir_err = sys_mkdir("/sys", 0700);
145 if (sys_mount("sysfs", "/sys", "sysfs", 0, NULL) < 0)
146 goto out;
147 #endif
149 if (strncmp(name, "/dev/", 5) != 0) {
150 unsigned maj, min;
152 if (sscanf(name, "%u:%u", &maj, &min) == 2) {
153 res = MKDEV(maj, min);
154 if (maj != MAJOR(res) || min != MINOR(res))
155 goto fail;
156 } else {
157 res = new_decode_dev(simple_strtoul(name, &p, 16));
158 if (*p)
159 goto fail;
160 }
161 goto done;
162 }
163 name += 5;
164 res = Root_NFS;
165 if (strcmp(name, "nfs") == 0)
166 goto done;
167 res = Root_RAM0;
168 if (strcmp(name, "ram") == 0)
169 goto done;
171 if (strlen(name) > 31)
172 goto fail;
173 strcpy(s, name);
174 for (p = s; *p; p++)
175 if (*p == '/')
176 *p = '!';
177 res = try_name(s, 0);
178 if (res)
179 goto done;
181 while (p > s && isdigit(p[-1]))
182 p--;
183 if (p == s || !*p || *p == '0')
184 goto fail;
185 part = simple_strtoul(p, NULL, 10);
186 *p = '\0';
187 res = try_name(s, part);
188 if (res)
189 goto done;
191 if (p < s + 2 || !isdigit(p[-2]) || p[-1] != 'p')
192 goto fail;
193 p[-1] = '\0';
194 res = try_name(s, part);
195 done:
196 #ifdef CONFIG_SYSFS
197 sys_umount("/sys", 0);
198 out:
199 if (!mkdir_err)
200 sys_rmdir("/sys");
201 #endif
202 return res;
203 fail:
204 res = 0;
205 goto done;
206 }
208 static int __init root_dev_setup(char *line)
209 {
210 strlcpy(saved_root_name, line, sizeof(saved_root_name));
211 return 1;
212 }
214 __setup("root=", root_dev_setup);
216 static char * __initdata root_mount_data;
217 static int __init root_data_setup(char *str)
218 {
219 root_mount_data = str;
220 return 1;
221 }
223 static char * __initdata root_fs_names;
224 static int __init fs_names_setup(char *str)
225 {
226 root_fs_names = str;
227 return 1;
228 }
230 static unsigned int __initdata root_delay;
231 static int __init root_delay_setup(char *str)
232 {
233 root_delay = simple_strtoul(str, NULL, 0);
234 return 1;
235 }
237 __setup("rootflags=", root_data_setup);
238 __setup("rootfstype=", fs_names_setup);
239 __setup("rootdelay=", root_delay_setup);
241 static void __init get_fs_names(char *page)
242 {
243 char *s = page;
245 if (root_fs_names) {
246 strcpy(page, root_fs_names);
247 while (*s++) {
248 if (s[-1] == ',')
249 s[-1] = '\0';
250 }
251 } else {
252 int len = get_filesystem_list(page);
253 char *p, *next;
255 page[len] = '\0';
256 for (p = page-1; p; p = next) {
257 next = strchr(++p, '\n');
258 if (*p++ != '\t')
259 continue;
260 while ((*s++ = *p++) != '\n')
261 ;
262 s[-1] = '\0';
263 }
264 }
265 *s = '\0';
266 }
268 static int __init do_mount_root(char *name, char *fs, int flags, void *data)
269 {
270 int err = sys_mount(name, "/root", fs, flags, data);
271 if (err)
272 return err;
274 sys_chdir("/root");
275 ROOT_DEV = current->fs->pwdmnt->mnt_sb->s_dev;
276 printk("VFS: Mounted root (%s filesystem)%s.\n",
277 current->fs->pwdmnt->mnt_sb->s_type->name,
278 current->fs->pwdmnt->mnt_sb->s_flags & MS_RDONLY ?
279 " readonly" : "");
280 return 0;
281 }
283 void __init mount_block_root(char *name, int flags)
284 {
285 char *fs_names = __getname();
286 char *p;
287 char b[BDEVNAME_SIZE];
289 get_fs_names(fs_names);
290 retry:
291 for (p = fs_names; *p; p += strlen(p)+1) {
292 int err = do_mount_root(name, p, flags, root_mount_data);
293 switch (err) {
294 case 0:
295 goto out;
296 case -EACCES:
297 flags |= MS_RDONLY;
298 goto retry;
299 case -EINVAL:
300 continue;
301 }
302 /*
303 * Allow the user to distinguish between failed sys_open
304 * and bad superblock on root device.
305 */
306 __bdevname(ROOT_DEV, b);
307 printk("VFS: Cannot open root device \"%s\" or %s\n",
308 root_device_name, b);
309 printk("Please append a correct \"root=\" boot option\n");
311 panic("VFS: Unable to mount root fs on %s", b);
312 }
314 printk("No filesystem could mount root, tried: ");
315 for (p = fs_names; *p; p += strlen(p)+1)
316 printk(" %s", p);
317 printk("\n");
318 panic("VFS: Unable to mount root fs on %s", __bdevname(ROOT_DEV, b));
319 out:
320 putname(fs_names);
321 }
323 #ifdef CONFIG_ROOT_NFS
324 static int __init mount_nfs_root(void)
325 {
326 void *data = nfs_root_data();
328 create_dev("/dev/root", ROOT_DEV);
329 if (data &&
330 do_mount_root("/dev/root", "nfs", root_mountflags, data) == 0)
331 return 1;
332 return 0;
333 }
334 #endif
336 #if defined(CONFIG_BLK_DEV_RAM) || defined(CONFIG_BLK_DEV_FD)
337 void __init change_floppy(char *fmt, ...)
338 {
339 struct termios termios;
340 char buf[80];
341 char c;
342 int fd;
343 va_list args;
344 va_start(args, fmt);
345 vsprintf(buf, fmt, args);
346 va_end(args);
347 fd = sys_open("/dev/root", O_RDWR | O_NDELAY, 0);
348 if (fd >= 0) {
349 sys_ioctl(fd, FDEJECT, 0);
350 sys_close(fd);
351 }
352 printk(KERN_NOTICE "VFS: Insert %s and press ENTER\n", buf);
353 fd = sys_open("/dev/console", O_RDWR, 0);
354 if (fd >= 0) {
355 sys_ioctl(fd, TCGETS, (long)&termios);
356 termios.c_lflag &= ~ICANON;
357 sys_ioctl(fd, TCSETSF, (long)&termios);
358 sys_read(fd, &c, 1);
359 termios.c_lflag |= ICANON;
360 sys_ioctl(fd, TCSETSF, (long)&termios);
361 sys_close(fd);
362 }
363 }
364 #endif
366 void __init mount_root(void)
367 {
368 #ifdef CONFIG_ROOT_NFS
369 if (MAJOR(ROOT_DEV) == UNNAMED_MAJOR) {
370 if (mount_nfs_root())
371 return;
373 printk(KERN_ERR "VFS: Unable to mount root fs via NFS, trying floppy.\n");
374 ROOT_DEV = Root_FD0;
375 }
376 #endif
377 #ifdef CONFIG_BLK_DEV_FD
378 if (MAJOR(ROOT_DEV) == FLOPPY_MAJOR) {
379 /* rd_doload is 2 for a dual initrd/ramload setup */
380 if (rd_doload==2) {
381 if (rd_load_disk(1)) {
382 ROOT_DEV = Root_RAM1;
383 root_device_name = NULL;
384 }
385 } else
386 change_floppy("root floppy");
387 }
388 #endif
389 create_dev("/dev/root", ROOT_DEV);
390 mount_block_root("/dev/root", root_mountflags);
391 }
393 /*
394 * Prepare the namespace - decide what/where to mount, load ramdisks, etc.
395 */
396 void __init prepare_namespace(void)
397 {
398 int is_floppy;
400 if (root_delay) {
401 printk(KERN_INFO "Waiting %dsec before mounting root device...\n",
402 root_delay);
403 ssleep(root_delay);
404 }
406 md_run_setup();
408 if (saved_root_name[0]) {
409 root_device_name = saved_root_name;
410 if (!strncmp(root_device_name, "mtd", 3)) {
411 mount_block_root(root_device_name, root_mountflags);
412 goto out;
413 }
414 ROOT_DEV = name_to_dev_t(root_device_name);
415 if (strncmp(root_device_name, "/dev/", 5) == 0)
416 root_device_name += 5;
417 }
419 is_floppy = MAJOR(ROOT_DEV) == FLOPPY_MAJOR;
421 if (initrd_load())
422 goto out;
424 if (is_floppy && rd_doload && rd_load_disk(0))
425 ROOT_DEV = Root_RAM0;
427 mount_root();
428 out:
429 sys_mount(".", "/", NULL, MS_MOVE, NULL);
430 sys_chroot(".");
431 security_sb_post_mountroot();
432 }