ia64/linux-2.6.18-xen.hg

view init/do_mounts_md.c @ 897:329ea0ccb344

balloon: try harder to balloon up under memory pressure.

Currently if the balloon driver is unable to increase the guest's
reservation it assumes the failure was due to reaching its full
allocation, gives up on the ballooning operation and records the limit
it reached as the "hard limit". The driver will not try again until
the target is set again (even to the same value).

However it is possible that ballooning has in fact failed due to
memory pressure in the host and therefore it is desirable to keep
attempting to reach the target in case memory becomes available. The
most likely scenario is that some guests are ballooning down while
others are ballooning up and therefore there is temporary memory
pressure while things stabilise. You would not expect a well behaved
toolstack to ask a domain to balloon to more than its allocation nor
would you expect it to deliberately over-commit memory by setting
balloon targets which exceed the total host memory.

This patch drops the concept of a hard limit and causes the balloon
driver to retry increasing the reservation on a timer in the same
manner as when decreasing the reservation.

Also if we partially succeed in increasing the reservation
(i.e. receive less pages than we asked for) then we may as well keep
those pages rather than returning them to Xen.

Signed-off-by: Ian Campbell <ian.campbell@citrix.com>
author Keir Fraser <keir.fraser@citrix.com>
date Fri Jun 05 14:01:20 2009 +0100 (2009-06-05)
parents 831230e53067
children
line source
2 #include <linux/raid/md.h>
4 #include "do_mounts.h"
6 /*
7 * When md (and any require personalities) are compiled into the kernel
8 * (not a module), arrays can be assembles are boot time using with AUTODETECT
9 * where specially marked partitions are registered with md_autodetect_dev(),
10 * and with MD_BOOT where devices to be collected are given on the boot line
11 * with md=.....
12 * The code for that is here.
13 */
15 static int __initdata raid_noautodetect, raid_autopart;
17 static struct {
18 int minor;
19 int partitioned;
20 int level;
21 int chunk;
22 char *device_names;
23 } md_setup_args[MAX_MD_DEVS] __initdata;
25 static int md_setup_ents __initdata;
27 extern int mdp_major;
28 /*
29 * Parse the command-line parameters given our kernel, but do not
30 * actually try to invoke the MD device now; that is handled by
31 * md_setup_drive after the low-level disk drivers have initialised.
32 *
33 * 27/11/1999: Fixed to work correctly with the 2.3 kernel (which
34 * assigns the task of parsing integer arguments to the
35 * invoked program now). Added ability to initialise all
36 * the MD devices (by specifying multiple "md=" lines)
37 * instead of just one. -- KTK
38 * 18May2000: Added support for persistent-superblock arrays:
39 * md=n,0,factor,fault,device-list uses RAID0 for device n
40 * md=n,-1,factor,fault,device-list uses LINEAR for device n
41 * md=n,device-list reads a RAID superblock from the devices
42 * elements in device-list are read by name_to_kdev_t so can be
43 * a hex number or something like /dev/hda1 /dev/sdb
44 * 2001-06-03: Dave Cinege <dcinege@psychosis.com>
45 * Shifted name_to_kdev_t() and related operations to md_set_drive()
46 * for later execution. Rewrote section to make devfs compatible.
47 */
48 static int __init md_setup(char *str)
49 {
50 int minor, level, factor, fault, partitioned = 0;
51 char *pername = "";
52 char *str1;
53 int ent;
55 if (*str == 'd') {
56 partitioned = 1;
57 str++;
58 }
59 if (get_option(&str, &minor) != 2) { /* MD Number */
60 printk(KERN_WARNING "md: Too few arguments supplied to md=.\n");
61 return 0;
62 }
63 str1 = str;
64 if (minor >= MAX_MD_DEVS) {
65 printk(KERN_WARNING "md: md=%d, Minor device number too high.\n", minor);
66 return 0;
67 }
68 for (ent=0 ; ent< md_setup_ents ; ent++)
69 if (md_setup_args[ent].minor == minor &&
70 md_setup_args[ent].partitioned == partitioned) {
71 printk(KERN_WARNING "md: md=%s%d, Specified more than once. "
72 "Replacing previous definition.\n", partitioned?"d":"", minor);
73 break;
74 }
75 if (ent >= MAX_MD_DEVS) {
76 printk(KERN_WARNING "md: md=%s%d - too many md initialisations\n", partitioned?"d":"", minor);
77 return 0;
78 }
79 if (ent >= md_setup_ents)
80 md_setup_ents++;
81 switch (get_option(&str, &level)) { /* RAID level */
82 case 2: /* could be 0 or -1.. */
83 if (level == 0 || level == LEVEL_LINEAR) {
84 if (get_option(&str, &factor) != 2 || /* Chunk Size */
85 get_option(&str, &fault) != 2) {
86 printk(KERN_WARNING "md: Too few arguments supplied to md=.\n");
87 return 0;
88 }
89 md_setup_args[ent].level = level;
90 md_setup_args[ent].chunk = 1 << (factor+12);
91 if (level == LEVEL_LINEAR)
92 pername = "linear";
93 else
94 pername = "raid0";
95 break;
96 }
97 /* FALL THROUGH */
98 case 1: /* the first device is numeric */
99 str = str1;
100 /* FALL THROUGH */
101 case 0:
102 md_setup_args[ent].level = LEVEL_NONE;
103 pername="super-block";
104 }
106 printk(KERN_INFO "md: Will configure md%d (%s) from %s, below.\n",
107 minor, pername, str);
108 md_setup_args[ent].device_names = str;
109 md_setup_args[ent].partitioned = partitioned;
110 md_setup_args[ent].minor = minor;
112 return 1;
113 }
115 #define MdpMinorShift 6
117 static void __init md_setup_drive(void)
118 {
119 int minor, i, ent, partitioned;
120 dev_t dev;
121 dev_t devices[MD_SB_DISKS+1];
123 for (ent = 0; ent < md_setup_ents ; ent++) {
124 int fd;
125 int err = 0;
126 char *devname;
127 mdu_disk_info_t dinfo;
128 char name[16];
130 minor = md_setup_args[ent].minor;
131 partitioned = md_setup_args[ent].partitioned;
132 devname = md_setup_args[ent].device_names;
134 sprintf(name, "/dev/md%s%d", partitioned?"_d":"", minor);
135 if (partitioned)
136 dev = MKDEV(mdp_major, minor << MdpMinorShift);
137 else
138 dev = MKDEV(MD_MAJOR, minor);
139 create_dev(name, dev);
140 for (i = 0; i < MD_SB_DISKS && devname != 0; i++) {
141 char *p;
142 char comp_name[64];
143 u32 rdev;
145 p = strchr(devname, ',');
146 if (p)
147 *p++ = 0;
149 dev = name_to_dev_t(devname);
150 if (strncmp(devname, "/dev/", 5) == 0)
151 devname += 5;
152 snprintf(comp_name, 63, "/dev/%s", devname);
153 rdev = bstat(comp_name);
154 if (rdev)
155 dev = new_decode_dev(rdev);
156 if (!dev) {
157 printk(KERN_WARNING "md: Unknown device name: %s\n", devname);
158 break;
159 }
161 devices[i] = dev;
163 devname = p;
164 }
165 devices[i] = 0;
167 if (!i)
168 continue;
170 printk(KERN_INFO "md: Loading md%s%d: %s\n",
171 partitioned ? "_d" : "", minor,
172 md_setup_args[ent].device_names);
174 fd = sys_open(name, 0, 0);
175 if (fd < 0) {
176 printk(KERN_ERR "md: open failed - cannot start "
177 "array %s\n", name);
178 continue;
179 }
180 if (sys_ioctl(fd, SET_ARRAY_INFO, 0) == -EBUSY) {
181 printk(KERN_WARNING
182 "md: Ignoring md=%d, already autodetected. (Use raid=noautodetect)\n",
183 minor);
184 sys_close(fd);
185 continue;
186 }
188 if (md_setup_args[ent].level != LEVEL_NONE) {
189 /* non-persistent */
190 mdu_array_info_t ainfo;
191 ainfo.level = md_setup_args[ent].level;
192 ainfo.size = 0;
193 ainfo.nr_disks =0;
194 ainfo.raid_disks =0;
195 while (devices[ainfo.raid_disks])
196 ainfo.raid_disks++;
197 ainfo.md_minor =minor;
198 ainfo.not_persistent = 1;
200 ainfo.state = (1 << MD_SB_CLEAN);
201 ainfo.layout = 0;
202 ainfo.chunk_size = md_setup_args[ent].chunk;
203 err = sys_ioctl(fd, SET_ARRAY_INFO, (long)&ainfo);
204 for (i = 0; !err && i <= MD_SB_DISKS; i++) {
205 dev = devices[i];
206 if (!dev)
207 break;
208 dinfo.number = i;
209 dinfo.raid_disk = i;
210 dinfo.state = (1<<MD_DISK_ACTIVE)|(1<<MD_DISK_SYNC);
211 dinfo.major = MAJOR(dev);
212 dinfo.minor = MINOR(dev);
213 err = sys_ioctl(fd, ADD_NEW_DISK, (long)&dinfo);
214 }
215 } else {
216 /* persistent */
217 for (i = 0; i <= MD_SB_DISKS; i++) {
218 dev = devices[i];
219 if (!dev)
220 break;
221 dinfo.major = MAJOR(dev);
222 dinfo.minor = MINOR(dev);
223 sys_ioctl(fd, ADD_NEW_DISK, (long)&dinfo);
224 }
225 }
226 if (!err)
227 err = sys_ioctl(fd, RUN_ARRAY, 0);
228 if (err)
229 printk(KERN_WARNING "md: starting md%d failed\n", minor);
230 else {
231 /* reread the partition table.
232 * I (neilb) and not sure why this is needed, but I cannot
233 * boot a kernel with devfs compiled in from partitioned md
234 * array without it
235 */
236 sys_close(fd);
237 fd = sys_open(name, 0, 0);
238 sys_ioctl(fd, BLKRRPART, 0);
239 }
240 sys_close(fd);
241 }
242 }
244 static int __init raid_setup(char *str)
245 {
246 int len, pos;
248 len = strlen(str) + 1;
249 pos = 0;
251 while (pos < len) {
252 char *comma = strchr(str+pos, ',');
253 int wlen;
254 if (comma)
255 wlen = (comma-str)-pos;
256 else wlen = (len-1)-pos;
258 if (!strncmp(str, "noautodetect", wlen))
259 raid_noautodetect = 1;
260 if (strncmp(str, "partitionable", wlen)==0)
261 raid_autopart = 1;
262 if (strncmp(str, "part", wlen)==0)
263 raid_autopart = 1;
264 pos += wlen+1;
265 }
266 return 1;
267 }
269 __setup("raid=", raid_setup);
270 __setup("md=", md_setup);
272 void __init md_run_setup(void)
273 {
274 create_dev("/dev/md0", MKDEV(MD_MAJOR, 0));
275 if (raid_noautodetect)
276 printk(KERN_INFO "md: Skipping autodetection of RAID arrays. (raid=noautodetect)\n");
277 else {
278 int fd = sys_open("/dev/md0", 0, 0);
279 if (fd >= 0) {
280 sys_ioctl(fd, RAID_AUTORUN, raid_autopart);
281 sys_close(fd);
282 }
283 }
284 md_setup_drive();
285 }