ia64/linux-2.6.18-xen.hg

view drivers/mtd/mtdpart.c @ 897:329ea0ccb344

balloon: try harder to balloon up under memory pressure.

Currently if the balloon driver is unable to increase the guest's
reservation it assumes the failure was due to reaching its full
allocation, gives up on the ballooning operation and records the limit
it reached as the "hard limit". The driver will not try again until
the target is set again (even to the same value).

However it is possible that ballooning has in fact failed due to
memory pressure in the host and therefore it is desirable to keep
attempting to reach the target in case memory becomes available. The
most likely scenario is that some guests are ballooning down while
others are ballooning up and therefore there is temporary memory
pressure while things stabilise. You would not expect a well behaved
toolstack to ask a domain to balloon to more than its allocation nor
would you expect it to deliberately over-commit memory by setting
balloon targets which exceed the total host memory.

This patch drops the concept of a hard limit and causes the balloon
driver to retry increasing the reservation on a timer in the same
manner as when decreasing the reservation.

Also if we partially succeed in increasing the reservation
(i.e. receive less pages than we asked for) then we may as well keep
those pages rather than returning them to Xen.

Signed-off-by: Ian Campbell <ian.campbell@citrix.com>
author Keir Fraser <keir.fraser@citrix.com>
date Fri Jun 05 14:01:20 2009 +0100 (2009-06-05)
parents 831230e53067
children
line source
1 /*
2 * Simple MTD partitioning layer
3 *
4 * (C) 2000 Nicolas Pitre <nico@cam.org>
5 *
6 * This code is GPL
7 *
8 * $Id: mtdpart.c,v 1.55 2005/11/07 11:14:20 gleixner Exp $
9 *
10 * 02-21-2002 Thomas Gleixner <gleixner@autronix.de>
11 * added support for read_oob, write_oob
12 */
14 #include <linux/module.h>
15 #include <linux/types.h>
16 #include <linux/kernel.h>
17 #include <linux/slab.h>
18 #include <linux/list.h>
19 #include <linux/kmod.h>
20 #include <linux/mtd/mtd.h>
21 #include <linux/mtd/partitions.h>
22 #include <linux/mtd/compatmac.h>
24 /* Our partition linked list */
25 static LIST_HEAD(mtd_partitions);
27 /* Our partition node structure */
28 struct mtd_part {
29 struct mtd_info mtd;
30 struct mtd_info *master;
31 u_int32_t offset;
32 int index;
33 struct list_head list;
34 int registered;
35 };
37 /*
38 * Given a pointer to the MTD object in the mtd_part structure, we can retrieve
39 * the pointer to that structure with this macro.
40 */
41 #define PART(x) ((struct mtd_part *)(x))
44 /*
45 * MTD methods which simply translate the effective address and pass through
46 * to the _real_ device.
47 */
49 static int part_read (struct mtd_info *mtd, loff_t from, size_t len,
50 size_t *retlen, u_char *buf)
51 {
52 struct mtd_part *part = PART(mtd);
53 int res;
55 if (from >= mtd->size)
56 len = 0;
57 else if (from + len > mtd->size)
58 len = mtd->size - from;
59 res = part->master->read (part->master, from + part->offset,
60 len, retlen, buf);
61 if (unlikely(res)) {
62 if (res == -EUCLEAN)
63 mtd->ecc_stats.corrected++;
64 if (res == -EBADMSG)
65 mtd->ecc_stats.failed++;
66 }
67 return res;
68 }
70 static int part_point (struct mtd_info *mtd, loff_t from, size_t len,
71 size_t *retlen, u_char **buf)
72 {
73 struct mtd_part *part = PART(mtd);
74 if (from >= mtd->size)
75 len = 0;
76 else if (from + len > mtd->size)
77 len = mtd->size - from;
78 return part->master->point (part->master, from + part->offset,
79 len, retlen, buf);
80 }
82 static void part_unpoint (struct mtd_info *mtd, u_char *addr, loff_t from, size_t len)
83 {
84 struct mtd_part *part = PART(mtd);
86 part->master->unpoint (part->master, addr, from + part->offset, len);
87 }
89 static int part_read_oob(struct mtd_info *mtd, loff_t from,
90 struct mtd_oob_ops *ops)
91 {
92 struct mtd_part *part = PART(mtd);
93 int res;
95 if (from >= mtd->size)
96 return -EINVAL;
97 if (from + ops->len > mtd->size)
98 return -EINVAL;
99 res = part->master->read_oob(part->master, from + part->offset, ops);
101 if (unlikely(res)) {
102 if (res == -EUCLEAN)
103 mtd->ecc_stats.corrected++;
104 if (res == -EBADMSG)
105 mtd->ecc_stats.failed++;
106 }
107 return res;
108 }
110 static int part_read_user_prot_reg (struct mtd_info *mtd, loff_t from, size_t len,
111 size_t *retlen, u_char *buf)
112 {
113 struct mtd_part *part = PART(mtd);
114 return part->master->read_user_prot_reg (part->master, from,
115 len, retlen, buf);
116 }
118 static int part_get_user_prot_info (struct mtd_info *mtd,
119 struct otp_info *buf, size_t len)
120 {
121 struct mtd_part *part = PART(mtd);
122 return part->master->get_user_prot_info (part->master, buf, len);
123 }
125 static int part_read_fact_prot_reg (struct mtd_info *mtd, loff_t from, size_t len,
126 size_t *retlen, u_char *buf)
127 {
128 struct mtd_part *part = PART(mtd);
129 return part->master->read_fact_prot_reg (part->master, from,
130 len, retlen, buf);
131 }
133 static int part_get_fact_prot_info (struct mtd_info *mtd,
134 struct otp_info *buf, size_t len)
135 {
136 struct mtd_part *part = PART(mtd);
137 return part->master->get_fact_prot_info (part->master, buf, len);
138 }
140 static int part_write (struct mtd_info *mtd, loff_t to, size_t len,
141 size_t *retlen, const u_char *buf)
142 {
143 struct mtd_part *part = PART(mtd);
144 if (!(mtd->flags & MTD_WRITEABLE))
145 return -EROFS;
146 if (to >= mtd->size)
147 len = 0;
148 else if (to + len > mtd->size)
149 len = mtd->size - to;
150 return part->master->write (part->master, to + part->offset,
151 len, retlen, buf);
152 }
154 static int part_write_oob(struct mtd_info *mtd, loff_t to,
155 struct mtd_oob_ops *ops)
156 {
157 struct mtd_part *part = PART(mtd);
159 if (!(mtd->flags & MTD_WRITEABLE))
160 return -EROFS;
162 if (to >= mtd->size)
163 return -EINVAL;
164 if (to + ops->len > mtd->size)
165 return -EINVAL;
166 return part->master->write_oob(part->master, to + part->offset, ops);
167 }
169 static int part_write_user_prot_reg (struct mtd_info *mtd, loff_t from, size_t len,
170 size_t *retlen, u_char *buf)
171 {
172 struct mtd_part *part = PART(mtd);
173 return part->master->write_user_prot_reg (part->master, from,
174 len, retlen, buf);
175 }
177 static int part_lock_user_prot_reg (struct mtd_info *mtd, loff_t from, size_t len)
178 {
179 struct mtd_part *part = PART(mtd);
180 return part->master->lock_user_prot_reg (part->master, from, len);
181 }
183 static int part_writev (struct mtd_info *mtd, const struct kvec *vecs,
184 unsigned long count, loff_t to, size_t *retlen)
185 {
186 struct mtd_part *part = PART(mtd);
187 if (!(mtd->flags & MTD_WRITEABLE))
188 return -EROFS;
189 return part->master->writev (part->master, vecs, count,
190 to + part->offset, retlen);
191 }
193 static int part_erase (struct mtd_info *mtd, struct erase_info *instr)
194 {
195 struct mtd_part *part = PART(mtd);
196 int ret;
197 if (!(mtd->flags & MTD_WRITEABLE))
198 return -EROFS;
199 if (instr->addr >= mtd->size)
200 return -EINVAL;
201 instr->addr += part->offset;
202 ret = part->master->erase(part->master, instr);
203 return ret;
204 }
206 void mtd_erase_callback(struct erase_info *instr)
207 {
208 if (instr->mtd->erase == part_erase) {
209 struct mtd_part *part = PART(instr->mtd);
211 if (instr->fail_addr != 0xffffffff)
212 instr->fail_addr -= part->offset;
213 instr->addr -= part->offset;
214 }
215 if (instr->callback)
216 instr->callback(instr);
217 }
218 EXPORT_SYMBOL_GPL(mtd_erase_callback);
220 static int part_lock (struct mtd_info *mtd, loff_t ofs, size_t len)
221 {
222 struct mtd_part *part = PART(mtd);
223 if ((len + ofs) > mtd->size)
224 return -EINVAL;
225 return part->master->lock(part->master, ofs + part->offset, len);
226 }
228 static int part_unlock (struct mtd_info *mtd, loff_t ofs, size_t len)
229 {
230 struct mtd_part *part = PART(mtd);
231 if ((len + ofs) > mtd->size)
232 return -EINVAL;
233 return part->master->unlock(part->master, ofs + part->offset, len);
234 }
236 static void part_sync(struct mtd_info *mtd)
237 {
238 struct mtd_part *part = PART(mtd);
239 part->master->sync(part->master);
240 }
242 static int part_suspend(struct mtd_info *mtd)
243 {
244 struct mtd_part *part = PART(mtd);
245 return part->master->suspend(part->master);
246 }
248 static void part_resume(struct mtd_info *mtd)
249 {
250 struct mtd_part *part = PART(mtd);
251 part->master->resume(part->master);
252 }
254 static int part_block_isbad (struct mtd_info *mtd, loff_t ofs)
255 {
256 struct mtd_part *part = PART(mtd);
257 if (ofs >= mtd->size)
258 return -EINVAL;
259 ofs += part->offset;
260 return part->master->block_isbad(part->master, ofs);
261 }
263 static int part_block_markbad (struct mtd_info *mtd, loff_t ofs)
264 {
265 struct mtd_part *part = PART(mtd);
266 int res;
268 if (!(mtd->flags & MTD_WRITEABLE))
269 return -EROFS;
270 if (ofs >= mtd->size)
271 return -EINVAL;
272 ofs += part->offset;
273 res = part->master->block_markbad(part->master, ofs);
274 if (!res)
275 mtd->ecc_stats.badblocks++;
276 return res;
277 }
279 /*
280 * This function unregisters and destroy all slave MTD objects which are
281 * attached to the given master MTD object.
282 */
284 int del_mtd_partitions(struct mtd_info *master)
285 {
286 struct list_head *node;
287 struct mtd_part *slave;
289 for (node = mtd_partitions.next;
290 node != &mtd_partitions;
291 node = node->next) {
292 slave = list_entry(node, struct mtd_part, list);
293 if (slave->master == master) {
294 struct list_head *prev = node->prev;
295 __list_del(prev, node->next);
296 if(slave->registered)
297 del_mtd_device(&slave->mtd);
298 kfree(slave);
299 node = prev;
300 }
301 }
303 return 0;
304 }
306 /*
307 * This function, given a master MTD object and a partition table, creates
308 * and registers slave MTD objects which are bound to the master according to
309 * the partition definitions.
310 * (Q: should we register the master MTD object as well?)
311 */
313 int add_mtd_partitions(struct mtd_info *master,
314 const struct mtd_partition *parts,
315 int nbparts)
316 {
317 struct mtd_part *slave;
318 u_int32_t cur_offset = 0;
319 int i;
321 printk (KERN_NOTICE "Creating %d MTD partitions on \"%s\":\n", nbparts, master->name);
323 for (i = 0; i < nbparts; i++) {
325 /* allocate the partition structure */
326 slave = kmalloc (sizeof(*slave), GFP_KERNEL);
327 if (!slave) {
328 printk ("memory allocation error while creating partitions for \"%s\"\n",
329 master->name);
330 del_mtd_partitions(master);
331 return -ENOMEM;
332 }
333 memset(slave, 0, sizeof(*slave));
334 list_add(&slave->list, &mtd_partitions);
336 /* set up the MTD object for this partition */
337 slave->mtd.type = master->type;
338 slave->mtd.flags = master->flags & ~parts[i].mask_flags;
339 slave->mtd.size = parts[i].size;
340 slave->mtd.writesize = master->writesize;
341 slave->mtd.oobsize = master->oobsize;
342 slave->mtd.ecctype = master->ecctype;
343 slave->mtd.eccsize = master->eccsize;
345 slave->mtd.name = parts[i].name;
346 slave->mtd.bank_size = master->bank_size;
347 slave->mtd.owner = master->owner;
349 slave->mtd.read = part_read;
350 slave->mtd.write = part_write;
352 if(master->point && master->unpoint){
353 slave->mtd.point = part_point;
354 slave->mtd.unpoint = part_unpoint;
355 }
357 if (master->read_oob)
358 slave->mtd.read_oob = part_read_oob;
359 if (master->write_oob)
360 slave->mtd.write_oob = part_write_oob;
361 if(master->read_user_prot_reg)
362 slave->mtd.read_user_prot_reg = part_read_user_prot_reg;
363 if(master->read_fact_prot_reg)
364 slave->mtd.read_fact_prot_reg = part_read_fact_prot_reg;
365 if(master->write_user_prot_reg)
366 slave->mtd.write_user_prot_reg = part_write_user_prot_reg;
367 if(master->lock_user_prot_reg)
368 slave->mtd.lock_user_prot_reg = part_lock_user_prot_reg;
369 if(master->get_user_prot_info)
370 slave->mtd.get_user_prot_info = part_get_user_prot_info;
371 if(master->get_fact_prot_info)
372 slave->mtd.get_fact_prot_info = part_get_fact_prot_info;
373 if (master->sync)
374 slave->mtd.sync = part_sync;
375 if (!i && master->suspend && master->resume) {
376 slave->mtd.suspend = part_suspend;
377 slave->mtd.resume = part_resume;
378 }
379 if (master->writev)
380 slave->mtd.writev = part_writev;
381 if (master->lock)
382 slave->mtd.lock = part_lock;
383 if (master->unlock)
384 slave->mtd.unlock = part_unlock;
385 if (master->block_isbad)
386 slave->mtd.block_isbad = part_block_isbad;
387 if (master->block_markbad)
388 slave->mtd.block_markbad = part_block_markbad;
389 slave->mtd.erase = part_erase;
390 slave->master = master;
391 slave->offset = parts[i].offset;
392 slave->index = i;
394 if (slave->offset == MTDPART_OFS_APPEND)
395 slave->offset = cur_offset;
396 if (slave->offset == MTDPART_OFS_NXTBLK) {
397 slave->offset = cur_offset;
398 if ((cur_offset % master->erasesize) != 0) {
399 /* Round up to next erasesize */
400 slave->offset = ((cur_offset / master->erasesize) + 1) * master->erasesize;
401 printk(KERN_NOTICE "Moving partition %d: "
402 "0x%08x -> 0x%08x\n", i,
403 cur_offset, slave->offset);
404 }
405 }
406 if (slave->mtd.size == MTDPART_SIZ_FULL)
407 slave->mtd.size = master->size - slave->offset;
408 cur_offset = slave->offset + slave->mtd.size;
410 printk (KERN_NOTICE "0x%08x-0x%08x : \"%s\"\n", slave->offset,
411 slave->offset + slave->mtd.size, slave->mtd.name);
413 /* let's do some sanity checks */
414 if (slave->offset >= master->size) {
415 /* let's register it anyway to preserve ordering */
416 slave->offset = 0;
417 slave->mtd.size = 0;
418 printk ("mtd: partition \"%s\" is out of reach -- disabled\n",
419 parts[i].name);
420 }
421 if (slave->offset + slave->mtd.size > master->size) {
422 slave->mtd.size = master->size - slave->offset;
423 printk ("mtd: partition \"%s\" extends beyond the end of device \"%s\" -- size truncated to %#x\n",
424 parts[i].name, master->name, slave->mtd.size);
425 }
426 if (master->numeraseregions>1) {
427 /* Deal with variable erase size stuff */
428 int i;
429 struct mtd_erase_region_info *regions = master->eraseregions;
431 /* Find the first erase regions which is part of this partition. */
432 for (i=0; i < master->numeraseregions && slave->offset >= regions[i].offset; i++)
433 ;
435 for (i--; i < master->numeraseregions && slave->offset + slave->mtd.size > regions[i].offset; i++) {
436 if (slave->mtd.erasesize < regions[i].erasesize) {
437 slave->mtd.erasesize = regions[i].erasesize;
438 }
439 }
440 } else {
441 /* Single erase size */
442 slave->mtd.erasesize = master->erasesize;
443 }
445 if ((slave->mtd.flags & MTD_WRITEABLE) &&
446 (slave->offset % slave->mtd.erasesize)) {
447 /* Doesn't start on a boundary of major erase size */
448 /* FIXME: Let it be writable if it is on a boundary of _minor_ erase size though */
449 slave->mtd.flags &= ~MTD_WRITEABLE;
450 printk ("mtd: partition \"%s\" doesn't start on an erase block boundary -- force read-only\n",
451 parts[i].name);
452 }
453 if ((slave->mtd.flags & MTD_WRITEABLE) &&
454 (slave->mtd.size % slave->mtd.erasesize)) {
455 slave->mtd.flags &= ~MTD_WRITEABLE;
456 printk ("mtd: partition \"%s\" doesn't end on an erase block -- force read-only\n",
457 parts[i].name);
458 }
460 slave->mtd.ecclayout = master->ecclayout;
461 if (master->block_isbad) {
462 uint32_t offs = 0;
464 while(offs < slave->mtd.size) {
465 if (master->block_isbad(master,
466 offs + slave->offset))
467 slave->mtd.ecc_stats.badblocks++;
468 offs += slave->mtd.erasesize;
469 }
470 }
472 if(parts[i].mtdp)
473 { /* store the object pointer (caller may or may not register it */
474 *parts[i].mtdp = &slave->mtd;
475 slave->registered = 0;
476 }
477 else
478 {
479 /* register our partition */
480 add_mtd_device(&slave->mtd);
481 slave->registered = 1;
482 }
483 }
485 return 0;
486 }
488 EXPORT_SYMBOL(add_mtd_partitions);
489 EXPORT_SYMBOL(del_mtd_partitions);
491 static DEFINE_SPINLOCK(part_parser_lock);
492 static LIST_HEAD(part_parsers);
494 static struct mtd_part_parser *get_partition_parser(const char *name)
495 {
496 struct list_head *this;
497 void *ret = NULL;
498 spin_lock(&part_parser_lock);
500 list_for_each(this, &part_parsers) {
501 struct mtd_part_parser *p = list_entry(this, struct mtd_part_parser, list);
503 if (!strcmp(p->name, name) && try_module_get(p->owner)) {
504 ret = p;
505 break;
506 }
507 }
508 spin_unlock(&part_parser_lock);
510 return ret;
511 }
513 int register_mtd_parser(struct mtd_part_parser *p)
514 {
515 spin_lock(&part_parser_lock);
516 list_add(&p->list, &part_parsers);
517 spin_unlock(&part_parser_lock);
519 return 0;
520 }
522 int deregister_mtd_parser(struct mtd_part_parser *p)
523 {
524 spin_lock(&part_parser_lock);
525 list_del(&p->list);
526 spin_unlock(&part_parser_lock);
527 return 0;
528 }
530 int parse_mtd_partitions(struct mtd_info *master, const char **types,
531 struct mtd_partition **pparts, unsigned long origin)
532 {
533 struct mtd_part_parser *parser;
534 int ret = 0;
536 for ( ; ret <= 0 && *types; types++) {
537 parser = get_partition_parser(*types);
538 #ifdef CONFIG_KMOD
539 if (!parser && !request_module("%s", *types))
540 parser = get_partition_parser(*types);
541 #endif
542 if (!parser) {
543 printk(KERN_NOTICE "%s partition parsing not available\n",
544 *types);
545 continue;
546 }
547 ret = (*parser->parse_fn)(master, pparts, origin);
548 if (ret > 0) {
549 printk(KERN_NOTICE "%d %s partitions found on MTD device %s\n",
550 ret, parser->name, master->name);
551 }
552 put_partition_parser(parser);
553 }
554 return ret;
555 }
557 EXPORT_SYMBOL_GPL(parse_mtd_partitions);
558 EXPORT_SYMBOL_GPL(register_mtd_parser);
559 EXPORT_SYMBOL_GPL(deregister_mtd_parser);
561 MODULE_LICENSE("GPL");
562 MODULE_AUTHOR("Nicolas Pitre <nico@cam.org>");
563 MODULE_DESCRIPTION("Generic support for partitioning of MTD devices");