ia64/xen-unstable

view linux-2.6-xen-sparse/drivers/xen/balloon/balloon.c @ 10178:79d74ce206bb

[BALLOON] Do not allow target to be set below two percent of maximum
memory size.
Signed-off-by: Keir Fraser <keir@xensource.com>
author kaf24@firebug.cl.cam.ac.uk
date Thu May 25 14:54:43 2006 +0100 (2006-05-25)
parents 84dd753a9247
children 1dd2062668b2
line source
1 /******************************************************************************
2 * balloon.c
3 *
4 * Xen balloon driver - enables returning/claiming memory to/from Xen.
5 *
6 * Copyright (c) 2003, B Dragovic
7 * Copyright (c) 2003-2004, M Williamson, K Fraser
8 * Copyright (c) 2005 Dan M. Smith, IBM Corporation
9 *
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License version 2
12 * as published by the Free Software Foundation; or, when distributed
13 * separately from the Linux kernel or incorporated into other
14 * software packages, subject to the following license:
15 *
16 * Permission is hereby granted, free of charge, to any person obtaining a copy
17 * of this source file (the "Software"), to deal in the Software without
18 * restriction, including without limitation the rights to use, copy, modify,
19 * merge, publish, distribute, sublicense, and/or sell copies of the Software,
20 * and to permit persons to whom the Software is furnished to do so, subject to
21 * the following conditions:
22 *
23 * The above copyright notice and this permission notice shall be included in
24 * all copies or substantial portions of the Software.
25 *
26 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
27 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
28 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
29 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
30 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
31 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
32 * IN THE SOFTWARE.
33 */
35 #include <linux/config.h>
36 #include <linux/kernel.h>
37 #include <linux/module.h>
38 #include <linux/sched.h>
39 #include <linux/errno.h>
40 #include <linux/mm.h>
41 #include <linux/mman.h>
42 #include <linux/smp_lock.h>
43 #include <linux/pagemap.h>
44 #include <linux/bootmem.h>
45 #include <linux/highmem.h>
46 #include <linux/vmalloc.h>
47 #include <xen/xen_proc.h>
48 #include <asm/hypervisor.h>
49 #include <xen/balloon.h>
50 #include <xen/interface/memory.h>
51 #include <asm/pgalloc.h>
52 #include <asm/pgtable.h>
53 #include <asm/uaccess.h>
54 #include <asm/tlb.h>
55 #include <linux/list.h>
57 #include <xen/xenbus.h>
59 #define PAGES2KB(_p) ((_p)<<(PAGE_SHIFT-10))
61 static struct proc_dir_entry *balloon_pde;
63 static DECLARE_MUTEX(balloon_mutex);
65 /*
66 * Protects atomic reservation decrease/increase against concurrent increases.
67 * Also protects non-atomic updates of current_pages and driver_pages, and
68 * balloon lists.
69 */
70 DEFINE_SPINLOCK(balloon_lock);
72 /* We aim for 'current allocation' == 'target allocation'. */
73 static unsigned long current_pages;
74 static unsigned long target_pages;
76 /* VM /proc information for memory */
77 extern unsigned long totalram_pages;
79 /* We may hit the hard limit in Xen. If we do then we remember it. */
80 static unsigned long hard_limit;
82 /*
83 * Drivers may alter the memory reservation independently, but they must
84 * inform the balloon driver so that we can avoid hitting the hard limit.
85 */
86 static unsigned long driver_pages;
88 /* List of ballooned pages, threaded through the mem_map array. */
89 static LIST_HEAD(ballooned_pages);
90 static unsigned long balloon_low, balloon_high;
92 /* Main work function, always executed in process context. */
93 static void balloon_process(void *unused);
94 static DECLARE_WORK(balloon_worker, balloon_process, NULL);
95 static struct timer_list balloon_timer;
97 #define PAGE_TO_LIST(p) (&(p)->lru)
98 #define LIST_TO_PAGE(l) list_entry((l), struct page, lru)
99 #define UNLIST_PAGE(p) \
100 do { \
101 list_del(PAGE_TO_LIST(p)); \
102 PAGE_TO_LIST(p)->next = NULL; \
103 PAGE_TO_LIST(p)->prev = NULL; \
104 } while(0)
106 #define IPRINTK(fmt, args...) \
107 printk(KERN_INFO "xen_mem: " fmt, ##args)
108 #define WPRINTK(fmt, args...) \
109 printk(KERN_WARNING "xen_mem: " fmt, ##args)
111 /* balloon_append: add the given page to the balloon. */
112 static void balloon_append(struct page *page)
113 {
114 /* Lowmem is re-populated first, so highmem pages go at list tail. */
115 if (PageHighMem(page)) {
116 list_add_tail(PAGE_TO_LIST(page), &ballooned_pages);
117 balloon_high++;
118 } else {
119 list_add(PAGE_TO_LIST(page), &ballooned_pages);
120 balloon_low++;
121 }
122 }
124 /* balloon_retrieve: rescue a page from the balloon, if it is not empty. */
125 static struct page *balloon_retrieve(void)
126 {
127 struct page *page;
129 if (list_empty(&ballooned_pages))
130 return NULL;
132 page = LIST_TO_PAGE(ballooned_pages.next);
133 UNLIST_PAGE(page);
135 if (PageHighMem(page))
136 balloon_high--;
137 else
138 balloon_low--;
140 return page;
141 }
143 static struct page *balloon_first_page(void)
144 {
145 if (list_empty(&ballooned_pages))
146 return NULL;
147 return LIST_TO_PAGE(ballooned_pages.next);
148 }
150 static struct page *balloon_next_page(struct page *page)
151 {
152 struct list_head *next = PAGE_TO_LIST(page)->next;
153 if (next == &ballooned_pages)
154 return NULL;
155 return LIST_TO_PAGE(next);
156 }
158 static void balloon_alarm(unsigned long unused)
159 {
160 schedule_work(&balloon_worker);
161 }
163 static unsigned long current_target(void)
164 {
165 unsigned long target = min(target_pages, hard_limit);
166 if (target > (current_pages + balloon_low + balloon_high))
167 target = current_pages + balloon_low + balloon_high;
168 return target;
169 }
171 static int increase_reservation(unsigned long nr_pages)
172 {
173 unsigned long *frame_list, pfn, i, flags;
174 struct page *page;
175 long rc;
176 struct xen_memory_reservation reservation = {
177 .address_bits = 0,
178 .extent_order = 0,
179 .domid = DOMID_SELF
180 };
182 if (nr_pages > (PAGE_SIZE / sizeof(unsigned long)))
183 nr_pages = PAGE_SIZE / sizeof(unsigned long);
185 frame_list = (unsigned long *)__get_free_page(GFP_KERNEL);
186 if (frame_list == NULL)
187 return -ENOMEM;
189 balloon_lock(flags);
191 page = balloon_first_page();
192 for (i = 0; i < nr_pages; i++) {
193 BUG_ON(page == NULL);
194 frame_list[i] = page_to_pfn(page);;
195 page = balloon_next_page(page);
196 }
198 set_xen_guest_handle(reservation.extent_start, frame_list);
199 reservation.nr_extents = nr_pages;
200 rc = HYPERVISOR_memory_op(
201 XENMEM_populate_physmap, &reservation);
202 if (rc < nr_pages) {
203 int ret;
204 /* We hit the Xen hard limit: reprobe. */
205 set_xen_guest_handle(reservation.extent_start, frame_list);
206 reservation.nr_extents = rc;
207 ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation,
208 &reservation);
209 BUG_ON(ret != rc);
210 hard_limit = current_pages + rc - driver_pages;
211 goto out;
212 }
214 for (i = 0; i < nr_pages; i++) {
215 page = balloon_retrieve();
216 BUG_ON(page == NULL);
218 pfn = page_to_pfn(page);
219 BUG_ON(!xen_feature(XENFEAT_auto_translated_physmap) &&
220 phys_to_machine_mapping_valid(pfn));
222 /* Update P->M and M->P tables. */
223 set_phys_to_machine(pfn, frame_list[i]);
224 xen_machphys_update(frame_list[i], pfn);
226 /* Link back into the page tables if not highmem. */
227 if (pfn < max_low_pfn) {
228 int ret;
229 ret = HYPERVISOR_update_va_mapping(
230 (unsigned long)__va(pfn << PAGE_SHIFT),
231 pfn_pte_ma(frame_list[i], PAGE_KERNEL),
232 0);
233 BUG_ON(ret);
234 }
236 /* Relinquish the page back to the allocator. */
237 ClearPageReserved(page);
238 set_page_count(page, 1);
239 __free_page(page);
240 }
242 current_pages += nr_pages;
243 totalram_pages = current_pages;
245 out:
246 balloon_unlock(flags);
248 free_page((unsigned long)frame_list);
250 return 0;
251 }
253 static int decrease_reservation(unsigned long nr_pages)
254 {
255 unsigned long *frame_list, pfn, i, flags;
256 struct page *page;
257 void *v;
258 int need_sleep = 0;
259 int ret;
260 struct xen_memory_reservation reservation = {
261 .address_bits = 0,
262 .extent_order = 0,
263 .domid = DOMID_SELF
264 };
266 if (nr_pages > (PAGE_SIZE / sizeof(unsigned long)))
267 nr_pages = PAGE_SIZE / sizeof(unsigned long);
269 frame_list = (unsigned long *)__get_free_page(GFP_KERNEL);
270 if (frame_list == NULL)
271 return -ENOMEM;
273 for (i = 0; i < nr_pages; i++) {
274 if ((page = alloc_page(GFP_HIGHUSER)) == NULL) {
275 nr_pages = i;
276 need_sleep = 1;
277 break;
278 }
280 pfn = page_to_pfn(page);
281 frame_list[i] = pfn_to_mfn(pfn);
283 if (!PageHighMem(page)) {
284 v = phys_to_virt(pfn << PAGE_SHIFT);
285 scrub_pages(v, 1);
286 ret = HYPERVISOR_update_va_mapping(
287 (unsigned long)v, __pte_ma(0), 0);
288 BUG_ON(ret);
289 }
290 #ifdef CONFIG_XEN_SCRUB_PAGES
291 else {
292 v = kmap(page);
293 scrub_pages(v, 1);
294 kunmap(page);
295 }
296 #endif
297 }
299 /* Ensure that ballooned highmem pages don't have kmaps. */
300 kmap_flush_unused();
301 flush_tlb_all();
303 balloon_lock(flags);
305 /* No more mappings: invalidate P2M and add to balloon. */
306 for (i = 0; i < nr_pages; i++) {
307 pfn = mfn_to_pfn(frame_list[i]);
308 set_phys_to_machine(pfn, INVALID_P2M_ENTRY);
309 balloon_append(pfn_to_page(pfn));
310 }
312 set_xen_guest_handle(reservation.extent_start, frame_list);
313 reservation.nr_extents = nr_pages;
314 ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation, &reservation);
315 BUG_ON(ret != nr_pages);
317 current_pages -= nr_pages;
318 totalram_pages = current_pages;
320 balloon_unlock(flags);
322 free_page((unsigned long)frame_list);
324 return need_sleep;
325 }
327 /*
328 * We avoid multiple worker processes conflicting via the balloon mutex.
329 * We may of course race updates of the target counts (which are protected
330 * by the balloon lock), or with changes to the Xen hard limit, but we will
331 * recover from these in time.
332 */
333 static void balloon_process(void *unused)
334 {
335 int need_sleep = 0;
336 long credit;
338 down(&balloon_mutex);
340 do {
341 credit = current_target() - current_pages;
342 if (credit > 0)
343 need_sleep = (increase_reservation(credit) != 0);
344 if (credit < 0)
345 need_sleep = (decrease_reservation(-credit) != 0);
347 #ifndef CONFIG_PREEMPT
348 if (need_resched())
349 schedule();
350 #endif
351 } while ((credit != 0) && !need_sleep);
353 /* Schedule more work if there is some still to be done. */
354 if (current_target() != current_pages)
355 mod_timer(&balloon_timer, jiffies + HZ);
357 up(&balloon_mutex);
358 }
360 /* Resets the Xen limit, sets new target, and kicks off processing. */
361 static void set_new_target(unsigned long target)
362 {
363 unsigned long min_target;
365 /* Do not allow target to reduce below 2% of maximum memory size. */
366 min_target = max_pfn / 50;
367 target = max(target, min_target);
369 /* No need for lock. Not read-modify-write updates. */
370 hard_limit = ~0UL;
371 target_pages = target;
372 schedule_work(&balloon_worker);
373 }
375 static struct xenbus_watch target_watch =
376 {
377 .node = "memory/target"
378 };
380 /* React to a change in the target key */
381 static void watch_target(struct xenbus_watch *watch,
382 const char **vec, unsigned int len)
383 {
384 unsigned long long new_target;
385 int err;
387 err = xenbus_scanf(XBT_NULL, "memory", "target", "%llu", &new_target);
388 if (err != 1) {
389 /* This is ok (for domain0 at least) - so just return */
390 return;
391 }
393 /* The given memory/target value is in KiB, so it needs converting to
394 pages. PAGE_SHIFT converts bytes to pages, hence PAGE_SHIFT - 10.
395 */
396 set_new_target(new_target >> (PAGE_SHIFT - 10));
398 }
400 static int balloon_init_watcher(struct notifier_block *notifier,
401 unsigned long event,
402 void *data)
403 {
404 int err;
406 err = register_xenbus_watch(&target_watch);
407 if (err)
408 printk(KERN_ERR "Failed to set balloon watcher\n");
410 return NOTIFY_DONE;
412 }
414 static int balloon_write(struct file *file, const char __user *buffer,
415 unsigned long count, void *data)
416 {
417 char memstring[64], *endchar;
418 unsigned long long target_bytes;
420 if (!capable(CAP_SYS_ADMIN))
421 return -EPERM;
423 if (count <= 1)
424 return -EBADMSG; /* runt */
425 if (count > sizeof(memstring))
426 return -EFBIG; /* too long */
428 if (copy_from_user(memstring, buffer, count))
429 return -EFAULT;
430 memstring[sizeof(memstring)-1] = '\0';
432 target_bytes = memparse(memstring, &endchar);
433 set_new_target(target_bytes >> PAGE_SHIFT);
435 return count;
436 }
438 static int balloon_read(char *page, char **start, off_t off,
439 int count, int *eof, void *data)
440 {
441 int len;
443 len = sprintf(
444 page,
445 "Current allocation: %8lu kB\n"
446 "Requested target: %8lu kB\n"
447 "Low-mem balloon: %8lu kB\n"
448 "High-mem balloon: %8lu kB\n"
449 "Xen hard limit: ",
450 PAGES2KB(current_pages), PAGES2KB(target_pages),
451 PAGES2KB(balloon_low), PAGES2KB(balloon_high));
453 if (hard_limit != ~0UL) {
454 len += sprintf(
455 page + len,
456 "%8lu kB (inc. %8lu kB driver headroom)\n",
457 PAGES2KB(hard_limit), PAGES2KB(driver_pages));
458 } else {
459 len += sprintf(
460 page + len,
461 " ??? kB\n");
462 }
464 *eof = 1;
465 return len;
466 }
468 static struct notifier_block xenstore_notifier;
470 static int __init balloon_init(void)
471 {
472 unsigned long pfn;
473 struct page *page;
475 IPRINTK("Initialising balloon driver.\n");
477 if (!is_running_on_xen())
478 return -ENODEV;
480 current_pages = min(xen_start_info->nr_pages, max_pfn);
481 totalram_pages = current_pages;
482 target_pages = current_pages;
483 balloon_low = 0;
484 balloon_high = 0;
485 driver_pages = 0UL;
486 hard_limit = ~0UL;
488 init_timer(&balloon_timer);
489 balloon_timer.data = 0;
490 balloon_timer.function = balloon_alarm;
492 if ((balloon_pde = create_xen_proc_entry("balloon", 0644)) == NULL) {
493 WPRINTK("Unable to create /proc/xen/balloon.\n");
494 return -1;
495 }
497 balloon_pde->read_proc = balloon_read;
498 balloon_pde->write_proc = balloon_write;
500 /* Initialise the balloon with excess memory space. */
501 for (pfn = xen_start_info->nr_pages; pfn < max_pfn; pfn++) {
502 page = pfn_to_page(pfn);
503 if (!PageReserved(page))
504 balloon_append(page);
505 }
507 target_watch.callback = watch_target;
508 xenstore_notifier.notifier_call = balloon_init_watcher;
510 register_xenstore_notifier(&xenstore_notifier);
512 return 0;
513 }
515 subsys_initcall(balloon_init);
517 void balloon_update_driver_allowance(long delta)
518 {
519 unsigned long flags;
521 balloon_lock(flags);
522 driver_pages += delta;
523 balloon_unlock(flags);
524 }
526 static int dealloc_pte_fn(
527 pte_t *pte, struct page *pmd_page, unsigned long addr, void *data)
528 {
529 unsigned long mfn = pte_mfn(*pte);
530 int ret;
531 struct xen_memory_reservation reservation = {
532 .nr_extents = 1,
533 .extent_order = 0,
534 .domid = DOMID_SELF
535 };
536 set_xen_guest_handle(reservation.extent_start, &mfn);
537 set_pte_at(&init_mm, addr, pte, __pte_ma(0));
538 set_phys_to_machine(__pa(addr) >> PAGE_SHIFT, INVALID_P2M_ENTRY);
539 ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation, &reservation);
540 BUG_ON(ret != 1);
541 return 0;
542 }
544 struct page *balloon_alloc_empty_page_range(unsigned long nr_pages)
545 {
546 unsigned long vstart, flags;
547 unsigned int order = get_order(nr_pages * PAGE_SIZE);
548 int ret;
549 unsigned long i;
550 struct page *page;
552 vstart = __get_free_pages(GFP_KERNEL, order);
553 if (vstart == 0)
554 return NULL;
556 scrub_pages(vstart, 1 << order);
558 balloon_lock(flags);
559 if (xen_feature(XENFEAT_auto_translated_physmap)) {
560 unsigned long gmfn = __pa(vstart) >> PAGE_SHIFT;
561 struct xen_memory_reservation reservation = {
562 .nr_extents = 1,
563 .extent_order = order,
564 .domid = DOMID_SELF
565 };
566 set_xen_guest_handle(reservation.extent_start, &gmfn);
567 ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation,
568 &reservation);
569 BUG_ON(ret != 1);
570 } else {
571 ret = apply_to_page_range(&init_mm, vstart, PAGE_SIZE << order,
572 dealloc_pte_fn, NULL);
573 BUG_ON(ret);
574 }
575 current_pages -= 1UL << order;
576 totalram_pages = current_pages;
577 balloon_unlock(flags);
579 schedule_work(&balloon_worker);
581 flush_tlb_all();
583 page = virt_to_page(vstart);
585 for (i = 0; i < (1UL << order); i++)
586 set_page_count(page + i, 1);
588 return page;
589 }
591 void balloon_dealloc_empty_page_range(
592 struct page *page, unsigned long nr_pages)
593 {
594 unsigned long i, flags;
595 unsigned int order = get_order(nr_pages * PAGE_SIZE);
597 balloon_lock(flags);
598 for (i = 0; i < (1UL << order); i++) {
599 BUG_ON(page_count(page + i) != 1);
600 balloon_append(page + i);
601 }
602 balloon_unlock(flags);
604 schedule_work(&balloon_worker);
605 }
607 EXPORT_SYMBOL_GPL(balloon_update_driver_allowance);
608 EXPORT_SYMBOL_GPL(balloon_alloc_empty_page_range);
609 EXPORT_SYMBOL_GPL(balloon_dealloc_empty_page_range);
611 MODULE_LICENSE("Dual BSD/GPL");