ia64/linux-2.6.18-xen.hg

view drivers/xen/balloon/balloon.c @ 551:a741afb71700

balloon: Fix minimum_target() check to work when built as a module.
From: Jan Beulich <jbeulich@novell.com>
Signed-off-by: Keir Fraser <keir.fraser@citrix.com>
author Keir Fraser <keir.fraser@citrix.com>
date Thu May 15 09:42:27 2008 +0100 (2008-05-15)
parents b61443b1bf76
children 163a3807cb1f
line source
1 /******************************************************************************
2 * balloon.c
3 *
4 * Xen balloon driver - enables returning/claiming memory to/from Xen.
5 *
6 * Copyright (c) 2003, B Dragovic
7 * Copyright (c) 2003-2004, M Williamson, K Fraser
8 * Copyright (c) 2005 Dan M. Smith, IBM Corporation
9 *
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License version 2
12 * as published by the Free Software Foundation; or, when distributed
13 * separately from the Linux kernel or incorporated into other
14 * software packages, subject to the following license:
15 *
16 * Permission is hereby granted, free of charge, to any person obtaining a copy
17 * of this source file (the "Software"), to deal in the Software without
18 * restriction, including without limitation the rights to use, copy, modify,
19 * merge, publish, distribute, sublicense, and/or sell copies of the Software,
20 * and to permit persons to whom the Software is furnished to do so, subject to
21 * the following conditions:
22 *
23 * The above copyright notice and this permission notice shall be included in
24 * all copies or substantial portions of the Software.
25 *
26 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
27 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
28 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
29 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
30 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
31 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
32 * IN THE SOFTWARE.
33 */
35 #include <linux/kernel.h>
36 #include <linux/module.h>
37 #include <linux/sched.h>
38 #include <linux/errno.h>
39 #include <linux/mm.h>
40 #include <linux/mman.h>
41 #include <linux/smp_lock.h>
42 #include <linux/pagemap.h>
43 #include <linux/bootmem.h>
44 #include <linux/highmem.h>
45 #include <linux/vmalloc.h>
46 #include <linux/mutex.h>
47 #include <xen/xen_proc.h>
48 #include <asm/hypervisor.h>
49 #include <xen/balloon.h>
50 #include <xen/interface/memory.h>
51 #include <asm/maddr.h>
52 #include <asm/page.h>
53 #include <asm/pgalloc.h>
54 #include <asm/pgtable.h>
55 #include <asm/uaccess.h>
56 #include <asm/tlb.h>
57 #include <linux/highmem.h>
58 #include <linux/list.h>
59 #include <xen/xenbus.h>
60 #include "common.h"
62 #ifdef HAVE_XEN_PLATFORM_COMPAT_H
63 #include <xen/platform-compat.h>
64 #endif
66 #ifdef CONFIG_PROC_FS
67 static struct proc_dir_entry *balloon_pde;
68 #endif
70 static DEFINE_MUTEX(balloon_mutex);
72 /*
73 * Protects atomic reservation decrease/increase against concurrent increases.
74 * Also protects non-atomic updates of current_pages and driver_pages, and
75 * balloon lists.
76 */
77 DEFINE_SPINLOCK(balloon_lock);
79 struct balloon_stats balloon_stats;
81 /* We increase/decrease in batches which fit in a page */
82 static unsigned long frame_list[PAGE_SIZE / sizeof(unsigned long)];
84 /* VM /proc information for memory */
85 extern unsigned long totalram_pages;
87 #ifndef MODULE
88 extern unsigned long totalhigh_pages;
89 #define inc_totalhigh_pages() (totalhigh_pages++)
90 #define dec_totalhigh_pages() (totalhigh_pages--)
91 #else
92 #define inc_totalhigh_pages() ((void)0)
93 #define dec_totalhigh_pages() ((void)0)
94 #endif
96 /* List of ballooned pages, threaded through the mem_map array. */
97 static LIST_HEAD(ballooned_pages);
99 /* Main work function, always executed in process context. */
100 static void balloon_process(void *unused);
101 static DECLARE_WORK(balloon_worker, balloon_process, NULL);
102 static struct timer_list balloon_timer;
104 /* When ballooning out (allocating memory to return to Xen) we don't really
105 want the kernel to try too hard since that can trigger the oom killer. */
106 #define GFP_BALLOON \
107 (GFP_HIGHUSER|__GFP_NOWARN|__GFP_NORETRY|__GFP_NOMEMALLOC|__GFP_COLD)
109 #define PAGE_TO_LIST(p) (&(p)->lru)
110 #define LIST_TO_PAGE(l) list_entry((l), struct page, lru)
111 #define UNLIST_PAGE(p) \
112 do { \
113 list_del(PAGE_TO_LIST(p)); \
114 PAGE_TO_LIST(p)->next = NULL; \
115 PAGE_TO_LIST(p)->prev = NULL; \
116 } while(0)
118 #define IPRINTK(fmt, args...) \
119 printk(KERN_INFO "xen_mem: " fmt, ##args)
120 #define WPRINTK(fmt, args...) \
121 printk(KERN_WARNING "xen_mem: " fmt, ##args)
123 /* balloon_append: add the given page to the balloon. */
124 static void balloon_append(struct page *page)
125 {
126 /* Lowmem is re-populated first, so highmem pages go at list tail. */
127 if (PageHighMem(page)) {
128 list_add_tail(PAGE_TO_LIST(page), &ballooned_pages);
129 bs.balloon_high++;
130 dec_totalhigh_pages();
131 } else {
132 list_add(PAGE_TO_LIST(page), &ballooned_pages);
133 bs.balloon_low++;
134 }
135 }
137 /* balloon_retrieve: rescue a page from the balloon, if it is not empty. */
138 static struct page *balloon_retrieve(void)
139 {
140 struct page *page;
142 if (list_empty(&ballooned_pages))
143 return NULL;
145 page = LIST_TO_PAGE(ballooned_pages.next);
146 UNLIST_PAGE(page);
148 if (PageHighMem(page)) {
149 bs.balloon_high--;
150 inc_totalhigh_pages();
151 }
152 else
153 bs.balloon_low--;
155 return page;
156 }
158 static struct page *balloon_first_page(void)
159 {
160 if (list_empty(&ballooned_pages))
161 return NULL;
162 return LIST_TO_PAGE(ballooned_pages.next);
163 }
165 static struct page *balloon_next_page(struct page *page)
166 {
167 struct list_head *next = PAGE_TO_LIST(page)->next;
168 if (next == &ballooned_pages)
169 return NULL;
170 return LIST_TO_PAGE(next);
171 }
173 static inline void balloon_free_page(struct page *page)
174 {
175 #ifndef MODULE
176 if (put_page_testzero(page))
177 free_cold_page(page);
178 #else
179 /* free_cold_page() is not being exported. */
180 __free_page(page);
181 #endif
182 }
184 static void balloon_alarm(unsigned long unused)
185 {
186 schedule_work(&balloon_worker);
187 }
189 static unsigned long current_target(void)
190 {
191 unsigned long target = min(bs.target_pages, bs.hard_limit);
192 if (target > (bs.current_pages + bs.balloon_low + bs.balloon_high))
193 target = bs.current_pages + bs.balloon_low + bs.balloon_high;
194 return target;
195 }
197 static unsigned long minimum_target(void)
198 {
199 #ifndef CONFIG_XEN
200 #define max_pfn num_physpages
201 #endif
202 unsigned long min_pages, curr_pages = current_target();
204 #define MB2PAGES(mb) ((mb) << (20 - PAGE_SHIFT))
205 /* Simple continuous piecewiese linear function:
206 * max MiB -> min MiB gradient
207 * 0 0
208 * 16 16
209 * 32 24
210 * 128 72 (1/2)
211 * 512 168 (1/4)
212 * 2048 360 (1/8)
213 * 8192 552 (1/32)
214 * 32768 1320
215 * 131072 4392
216 */
217 if (max_pfn < MB2PAGES(128))
218 min_pages = MB2PAGES(8) + (max_pfn >> 1);
219 else if (max_pfn < MB2PAGES(512))
220 min_pages = MB2PAGES(40) + (max_pfn >> 2);
221 else if (max_pfn < MB2PAGES(2048))
222 min_pages = MB2PAGES(104) + (max_pfn >> 3);
223 else
224 min_pages = MB2PAGES(296) + (max_pfn >> 5);
225 #undef MB2PAGES
227 /* Don't enforce growth */
228 return min(min_pages, curr_pages);
229 #ifndef CONFIG_XEN
230 #undef max_pfn
231 #endif
232 }
234 static int increase_reservation(unsigned long nr_pages)
235 {
236 unsigned long pfn, i, flags;
237 struct page *page;
238 long rc;
239 struct xen_memory_reservation reservation = {
240 .address_bits = 0,
241 .extent_order = 0,
242 .domid = DOMID_SELF
243 };
245 if (nr_pages > ARRAY_SIZE(frame_list))
246 nr_pages = ARRAY_SIZE(frame_list);
248 balloon_lock(flags);
250 page = balloon_first_page();
251 for (i = 0; i < nr_pages; i++) {
252 BUG_ON(page == NULL);
253 frame_list[i] = page_to_pfn(page);;
254 page = balloon_next_page(page);
255 }
257 set_xen_guest_handle(reservation.extent_start, frame_list);
258 reservation.nr_extents = nr_pages;
259 rc = HYPERVISOR_memory_op(
260 XENMEM_populate_physmap, &reservation);
261 if (rc < nr_pages) {
262 if (rc > 0) {
263 int ret;
265 /* We hit the Xen hard limit: reprobe. */
266 reservation.nr_extents = rc;
267 ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation,
268 &reservation);
269 BUG_ON(ret != rc);
270 }
271 if (rc >= 0)
272 bs.hard_limit = (bs.current_pages + rc -
273 bs.driver_pages);
274 goto out;
275 }
277 for (i = 0; i < nr_pages; i++) {
278 page = balloon_retrieve();
279 BUG_ON(page == NULL);
281 pfn = page_to_pfn(page);
282 BUG_ON(!xen_feature(XENFEAT_auto_translated_physmap) &&
283 phys_to_machine_mapping_valid(pfn));
285 set_phys_to_machine(pfn, frame_list[i]);
287 #ifdef CONFIG_XEN
288 /* Link back into the page tables if not highmem. */
289 if (pfn < max_low_pfn) {
290 int ret;
291 ret = HYPERVISOR_update_va_mapping(
292 (unsigned long)__va(pfn << PAGE_SHIFT),
293 pfn_pte_ma(frame_list[i], PAGE_KERNEL),
294 0);
295 BUG_ON(ret);
296 }
297 #endif
299 /* Relinquish the page back to the allocator. */
300 ClearPageReserved(page);
301 init_page_count(page);
302 balloon_free_page(page);
303 }
305 bs.current_pages += nr_pages;
306 totalram_pages = bs.current_pages;
308 out:
309 balloon_unlock(flags);
311 return 0;
312 }
314 static int decrease_reservation(unsigned long nr_pages)
315 {
316 unsigned long pfn, i, flags;
317 struct page *page;
318 void *v;
319 int need_sleep = 0;
320 int ret;
321 struct xen_memory_reservation reservation = {
322 .address_bits = 0,
323 .extent_order = 0,
324 .domid = DOMID_SELF
325 };
327 if (nr_pages > ARRAY_SIZE(frame_list))
328 nr_pages = ARRAY_SIZE(frame_list);
330 for (i = 0; i < nr_pages; i++) {
331 if ((page = alloc_page(GFP_BALLOON)) == NULL) {
332 nr_pages = i;
333 need_sleep = 1;
334 break;
335 }
337 pfn = page_to_pfn(page);
338 frame_list[i] = pfn_to_mfn(pfn);
340 if (!PageHighMem(page)) {
341 v = phys_to_virt(pfn << PAGE_SHIFT);
342 scrub_pages(v, 1);
343 #ifdef CONFIG_XEN
344 ret = HYPERVISOR_update_va_mapping(
345 (unsigned long)v, __pte_ma(0), 0);
346 BUG_ON(ret);
347 #endif
348 }
349 #ifdef CONFIG_XEN_SCRUB_PAGES
350 else {
351 v = kmap(page);
352 scrub_pages(v, 1);
353 kunmap(page);
354 }
355 #endif
356 }
358 #ifdef CONFIG_XEN
359 /* Ensure that ballooned highmem pages don't have kmaps. */
360 kmap_flush_unused();
361 flush_tlb_all();
362 #endif
364 balloon_lock(flags);
366 /* No more mappings: invalidate P2M and add to balloon. */
367 for (i = 0; i < nr_pages; i++) {
368 pfn = mfn_to_pfn(frame_list[i]);
369 set_phys_to_machine(pfn, INVALID_P2M_ENTRY);
370 balloon_append(pfn_to_page(pfn));
371 }
373 set_xen_guest_handle(reservation.extent_start, frame_list);
374 reservation.nr_extents = nr_pages;
375 ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation, &reservation);
376 BUG_ON(ret != nr_pages);
378 bs.current_pages -= nr_pages;
379 totalram_pages = bs.current_pages;
381 balloon_unlock(flags);
383 return need_sleep;
384 }
386 /*
387 * We avoid multiple worker processes conflicting via the balloon mutex.
388 * We may of course race updates of the target counts (which are protected
389 * by the balloon lock), or with changes to the Xen hard limit, but we will
390 * recover from these in time.
391 */
392 static void balloon_process(void *unused)
393 {
394 int need_sleep = 0;
395 long credit;
397 mutex_lock(&balloon_mutex);
399 do {
400 credit = current_target() - bs.current_pages;
401 if (credit > 0)
402 need_sleep = (increase_reservation(credit) != 0);
403 if (credit < 0)
404 need_sleep = (decrease_reservation(-credit) != 0);
406 #ifndef CONFIG_PREEMPT
407 if (need_resched())
408 schedule();
409 #endif
410 } while ((credit != 0) && !need_sleep);
412 /* Schedule more work if there is some still to be done. */
413 if (current_target() != bs.current_pages)
414 mod_timer(&balloon_timer, jiffies + HZ);
416 mutex_unlock(&balloon_mutex);
417 }
419 /* Resets the Xen limit, sets new target, and kicks off processing. */
420 void balloon_set_new_target(unsigned long target)
421 {
422 /* No need for lock. Not read-modify-write updates. */
423 bs.hard_limit = ~0UL;
424 bs.target_pages = max(target, minimum_target());
425 schedule_work(&balloon_worker);
426 }
428 static struct xenbus_watch target_watch =
429 {
430 .node = "memory/target"
431 };
433 /* React to a change in the target key */
434 static void watch_target(struct xenbus_watch *watch,
435 const char **vec, unsigned int len)
436 {
437 unsigned long long new_target;
438 int err;
440 err = xenbus_scanf(XBT_NIL, "memory", "target", "%llu", &new_target);
441 if (err != 1) {
442 /* This is ok (for domain0 at least) - so just return */
443 return;
444 }
446 /* The given memory/target value is in KiB, so it needs converting to
447 * pages. PAGE_SHIFT converts bytes to pages, hence PAGE_SHIFT - 10.
448 */
449 balloon_set_new_target(new_target >> (PAGE_SHIFT - 10));
450 }
452 static int balloon_init_watcher(struct notifier_block *notifier,
453 unsigned long event,
454 void *data)
455 {
456 int err;
458 err = register_xenbus_watch(&target_watch);
459 if (err)
460 printk(KERN_ERR "Failed to set balloon watcher\n");
462 return NOTIFY_DONE;
463 }
465 #ifdef CONFIG_PROC_FS
466 static int balloon_write(struct file *file, const char __user *buffer,
467 unsigned long count, void *data)
468 {
469 char memstring[64], *endchar;
470 unsigned long long target_bytes;
472 if (!capable(CAP_SYS_ADMIN))
473 return -EPERM;
475 if (count <= 1)
476 return -EBADMSG; /* runt */
477 if (count > sizeof(memstring))
478 return -EFBIG; /* too long */
480 if (copy_from_user(memstring, buffer, count))
481 return -EFAULT;
482 memstring[sizeof(memstring)-1] = '\0';
484 target_bytes = memparse(memstring, &endchar);
485 balloon_set_new_target(target_bytes >> PAGE_SHIFT);
487 return count;
488 }
490 static int balloon_read(char *page, char **start, off_t off,
491 int count, int *eof, void *data)
492 {
493 int len;
495 len = sprintf(
496 page,
497 "Current allocation: %8lu kB\n"
498 "Requested target: %8lu kB\n"
499 "Low-mem balloon: %8lu kB\n"
500 "High-mem balloon: %8lu kB\n"
501 "Driver pages: %8lu kB\n"
502 "Xen hard limit: ",
503 PAGES2KB(bs.current_pages), PAGES2KB(bs.target_pages),
504 PAGES2KB(bs.balloon_low), PAGES2KB(bs.balloon_high),
505 PAGES2KB(bs.driver_pages));
507 if (bs.hard_limit != ~0UL)
508 len += sprintf(page + len, "%8lu kB\n",
509 PAGES2KB(bs.hard_limit));
510 else
511 len += sprintf(page + len, " ??? kB\n");
513 *eof = 1;
514 return len;
515 }
516 #endif
518 static struct notifier_block xenstore_notifier;
520 static int __init balloon_init(void)
521 {
522 #if defined(CONFIG_X86) && defined(CONFIG_XEN)
523 unsigned long pfn;
524 struct page *page;
525 #endif
527 if (!is_running_on_xen())
528 return -ENODEV;
530 IPRINTK("Initialising balloon driver.\n");
532 #ifdef CONFIG_XEN
533 bs.current_pages = min(xen_start_info->nr_pages, max_pfn);
534 totalram_pages = bs.current_pages;
535 #else
536 bs.current_pages = totalram_pages;
537 #endif
538 bs.target_pages = bs.current_pages;
539 bs.balloon_low = 0;
540 bs.balloon_high = 0;
541 bs.driver_pages = 0UL;
542 bs.hard_limit = ~0UL;
544 init_timer(&balloon_timer);
545 balloon_timer.data = 0;
546 balloon_timer.function = balloon_alarm;
548 #ifdef CONFIG_PROC_FS
549 if ((balloon_pde = create_xen_proc_entry("balloon", 0644)) == NULL) {
550 WPRINTK("Unable to create /proc/xen/balloon.\n");
551 return -1;
552 }
554 balloon_pde->read_proc = balloon_read;
555 balloon_pde->write_proc = balloon_write;
556 #endif
557 balloon_sysfs_init();
559 #if defined(CONFIG_X86) && defined(CONFIG_XEN)
560 /* Initialise the balloon with excess memory space. */
561 for (pfn = xen_start_info->nr_pages; pfn < max_pfn; pfn++) {
562 page = pfn_to_page(pfn);
563 if (!PageReserved(page))
564 balloon_append(page);
565 }
566 #endif
568 target_watch.callback = watch_target;
569 xenstore_notifier.notifier_call = balloon_init_watcher;
571 register_xenstore_notifier(&xenstore_notifier);
573 return 0;
574 }
576 subsys_initcall(balloon_init);
578 static void __exit balloon_exit(void)
579 {
580 /* XXX - release balloon here */
581 return;
582 }
584 module_exit(balloon_exit);
586 void balloon_update_driver_allowance(long delta)
587 {
588 unsigned long flags;
590 balloon_lock(flags);
591 bs.driver_pages += delta;
592 balloon_unlock(flags);
593 }
595 #ifdef CONFIG_XEN
596 static int dealloc_pte_fn(
597 pte_t *pte, struct page *pmd_page, unsigned long addr, void *data)
598 {
599 unsigned long mfn = pte_mfn(*pte);
600 int ret;
601 struct xen_memory_reservation reservation = {
602 .nr_extents = 1,
603 .extent_order = 0,
604 .domid = DOMID_SELF
605 };
606 set_xen_guest_handle(reservation.extent_start, &mfn);
607 set_pte_at(&init_mm, addr, pte, __pte_ma(0));
608 set_phys_to_machine(__pa(addr) >> PAGE_SHIFT, INVALID_P2M_ENTRY);
609 ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation, &reservation);
610 BUG_ON(ret != 1);
611 return 0;
612 }
613 #endif
615 struct page **alloc_empty_pages_and_pagevec(int nr_pages)
616 {
617 unsigned long flags;
618 void *v;
619 struct page *page, **pagevec;
620 int i, ret;
622 pagevec = kmalloc(sizeof(page) * nr_pages, GFP_KERNEL);
623 if (pagevec == NULL)
624 return NULL;
626 for (i = 0; i < nr_pages; i++) {
627 page = pagevec[i] = alloc_page(GFP_KERNEL|__GFP_COLD);
628 if (page == NULL)
629 goto err;
631 v = page_address(page);
632 scrub_pages(v, 1);
634 balloon_lock(flags);
636 if (xen_feature(XENFEAT_auto_translated_physmap)) {
637 unsigned long gmfn = page_to_pfn(page);
638 struct xen_memory_reservation reservation = {
639 .nr_extents = 1,
640 .extent_order = 0,
641 .domid = DOMID_SELF
642 };
643 set_xen_guest_handle(reservation.extent_start, &gmfn);
644 ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation,
645 &reservation);
646 if (ret == 1)
647 ret = 0; /* success */
648 } else {
649 #ifdef CONFIG_XEN
650 ret = apply_to_page_range(&init_mm, (unsigned long)v,
651 PAGE_SIZE, dealloc_pte_fn,
652 NULL);
653 #else
654 /* Cannot handle non-auto translate mode. */
655 ret = 1;
656 #endif
657 }
659 if (ret != 0) {
660 balloon_unlock(flags);
661 balloon_free_page(page);
662 goto err;
663 }
665 totalram_pages = --bs.current_pages;
667 balloon_unlock(flags);
668 }
670 out:
671 schedule_work(&balloon_worker);
672 #ifdef CONFIG_XEN
673 flush_tlb_all();
674 #endif
675 return pagevec;
677 err:
678 balloon_lock(flags);
679 while (--i >= 0)
680 balloon_append(pagevec[i]);
681 balloon_unlock(flags);
682 kfree(pagevec);
683 pagevec = NULL;
684 goto out;
685 }
687 void free_empty_pages_and_pagevec(struct page **pagevec, int nr_pages)
688 {
689 unsigned long flags;
690 int i;
692 if (pagevec == NULL)
693 return;
695 balloon_lock(flags);
696 for (i = 0; i < nr_pages; i++) {
697 BUG_ON(page_count(pagevec[i]) != 1);
698 balloon_append(pagevec[i]);
699 }
700 balloon_unlock(flags);
702 kfree(pagevec);
704 schedule_work(&balloon_worker);
705 }
707 void balloon_release_driver_page(struct page *page)
708 {
709 unsigned long flags;
711 balloon_lock(flags);
712 balloon_append(page);
713 bs.driver_pages--;
714 balloon_unlock(flags);
716 schedule_work(&balloon_worker);
717 }
719 EXPORT_SYMBOL_GPL(balloon_update_driver_allowance);
720 EXPORT_SYMBOL_GPL(alloc_empty_pages_and_pagevec);
721 EXPORT_SYMBOL_GPL(free_empty_pages_and_pagevec);
722 EXPORT_SYMBOL_GPL(balloon_release_driver_page);
724 MODULE_LICENSE("Dual BSD/GPL");