ia64/xen-unstable

view linux-2.6-xen-sparse/drivers/xen/balloon/balloon.c @ 14729:768a4fe105e8

Fix hvm balloon driver; missed this hunk in last checkin.

Signed-off-by: Steven Hand <steven@xensource.com>
author Steven Hand <steven@xensource.com>
date Thu Apr 05 08:24:56 2007 +0100 (2007-04-05)
parents f4318c89291a
children 1a347b19142a
line source
1 /******************************************************************************
2 * balloon.c
3 *
4 * Xen balloon driver - enables returning/claiming memory to/from Xen.
5 *
6 * Copyright (c) 2003, B Dragovic
7 * Copyright (c) 2003-2004, M Williamson, K Fraser
8 * Copyright (c) 2005 Dan M. Smith, IBM Corporation
9 *
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License version 2
12 * as published by the Free Software Foundation; or, when distributed
13 * separately from the Linux kernel or incorporated into other
14 * software packages, subject to the following license:
15 *
16 * Permission is hereby granted, free of charge, to any person obtaining a copy
17 * of this source file (the "Software"), to deal in the Software without
18 * restriction, including without limitation the rights to use, copy, modify,
19 * merge, publish, distribute, sublicense, and/or sell copies of the Software,
20 * and to permit persons to whom the Software is furnished to do so, subject to
21 * the following conditions:
22 *
23 * The above copyright notice and this permission notice shall be included in
24 * all copies or substantial portions of the Software.
25 *
26 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
27 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
28 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
29 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
30 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
31 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
32 * IN THE SOFTWARE.
33 */
35 #include <linux/kernel.h>
36 #include <linux/module.h>
37 #include <linux/sched.h>
38 #include <linux/errno.h>
39 #include <linux/mm.h>
40 #include <linux/mman.h>
41 #include <linux/smp_lock.h>
42 #include <linux/pagemap.h>
43 #include <linux/bootmem.h>
44 #include <linux/highmem.h>
45 #include <linux/vmalloc.h>
46 #include <linux/mutex.h>
47 #include <xen/xen_proc.h>
48 #include <asm/hypervisor.h>
49 #include <xen/balloon.h>
50 #include <xen/interface/memory.h>
51 #include <asm/maddr.h>
52 #include <asm/page.h>
53 #include <asm/pgalloc.h>
54 #include <asm/pgtable.h>
55 #include <asm/uaccess.h>
56 #include <asm/tlb.h>
57 #include <linux/highmem.h>
58 #include <linux/list.h>
59 #include <xen/xenbus.h>
60 #include "common.h"
62 #ifdef CONFIG_PROC_FS
63 static struct proc_dir_entry *balloon_pde;
64 #endif
66 static DEFINE_MUTEX(balloon_mutex);
68 /*
69 * Protects atomic reservation decrease/increase against concurrent increases.
70 * Also protects non-atomic updates of current_pages and driver_pages, and
71 * balloon lists.
72 */
73 DEFINE_SPINLOCK(balloon_lock);
75 struct balloon_stats balloon_stats;
77 /* We increase/decrease in batches which fit in a page */
78 static unsigned long frame_list[PAGE_SIZE / sizeof(unsigned long)];
80 /* VM /proc information for memory */
81 extern unsigned long totalram_pages;
83 /* List of ballooned pages, threaded through the mem_map array. */
84 static LIST_HEAD(ballooned_pages);
86 /* Main work function, always executed in process context. */
87 static void balloon_process(void *unused);
88 static DECLARE_WORK(balloon_worker, balloon_process, NULL);
89 static struct timer_list balloon_timer;
91 /* When ballooning out (allocating memory to return to Xen) we don't really
92 want the kernel to try too hard since that can trigger the oom killer. */
93 #define GFP_BALLOON \
94 (GFP_HIGHUSER | __GFP_NOWARN | __GFP_NORETRY | __GFP_NOMEMALLOC)
96 #define PAGE_TO_LIST(p) (&(p)->lru)
97 #define LIST_TO_PAGE(l) list_entry((l), struct page, lru)
98 #define UNLIST_PAGE(p) \
99 do { \
100 list_del(PAGE_TO_LIST(p)); \
101 PAGE_TO_LIST(p)->next = NULL; \
102 PAGE_TO_LIST(p)->prev = NULL; \
103 } while(0)
105 #define IPRINTK(fmt, args...) \
106 printk(KERN_INFO "xen_mem: " fmt, ##args)
107 #define WPRINTK(fmt, args...) \
108 printk(KERN_WARNING "xen_mem: " fmt, ##args)
110 /* balloon_append: add the given page to the balloon. */
111 static void balloon_append(struct page *page)
112 {
113 /* Lowmem is re-populated first, so highmem pages go at list tail. */
114 if (PageHighMem(page)) {
115 list_add_tail(PAGE_TO_LIST(page), &ballooned_pages);
116 bs.balloon_high++;
117 } else {
118 list_add(PAGE_TO_LIST(page), &ballooned_pages);
119 bs.balloon_low++;
120 }
121 }
123 /* balloon_retrieve: rescue a page from the balloon, if it is not empty. */
124 static struct page *balloon_retrieve(void)
125 {
126 struct page *page;
128 if (list_empty(&ballooned_pages))
129 return NULL;
131 page = LIST_TO_PAGE(ballooned_pages.next);
132 UNLIST_PAGE(page);
134 if (PageHighMem(page))
135 bs.balloon_high--;
136 else
137 bs.balloon_low--;
139 return page;
140 }
142 static struct page *balloon_first_page(void)
143 {
144 if (list_empty(&ballooned_pages))
145 return NULL;
146 return LIST_TO_PAGE(ballooned_pages.next);
147 }
149 static struct page *balloon_next_page(struct page *page)
150 {
151 struct list_head *next = PAGE_TO_LIST(page)->next;
152 if (next == &ballooned_pages)
153 return NULL;
154 return LIST_TO_PAGE(next);
155 }
157 static void balloon_alarm(unsigned long unused)
158 {
159 schedule_work(&balloon_worker);
160 }
162 static unsigned long current_target(void)
163 {
164 unsigned long target = min(bs.target_pages, bs.hard_limit);
165 if (target > (bs.current_pages + bs.balloon_low + bs.balloon_high))
166 target = bs.current_pages + bs.balloon_low + bs.balloon_high;
167 return target;
168 }
170 static int increase_reservation(unsigned long nr_pages)
171 {
172 unsigned long pfn, i, flags;
173 struct page *page;
174 long rc;
175 struct xen_memory_reservation reservation = {
176 .address_bits = 0,
177 .extent_order = 0,
178 .domid = DOMID_SELF
179 };
181 if (nr_pages > ARRAY_SIZE(frame_list))
182 nr_pages = ARRAY_SIZE(frame_list);
184 balloon_lock(flags);
186 page = balloon_first_page();
187 for (i = 0; i < nr_pages; i++) {
188 BUG_ON(page == NULL);
189 frame_list[i] = page_to_pfn(page);;
190 page = balloon_next_page(page);
191 }
193 set_xen_guest_handle(reservation.extent_start, frame_list);
194 reservation.nr_extents = nr_pages;
195 rc = HYPERVISOR_memory_op(
196 XENMEM_populate_physmap, &reservation);
197 if (rc < nr_pages) {
198 if (rc > 0) {
199 int ret;
201 /* We hit the Xen hard limit: reprobe. */
202 reservation.nr_extents = rc;
203 ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation,
204 &reservation);
205 BUG_ON(ret != rc);
206 }
207 if (rc >= 0)
208 bs.hard_limit = (bs.current_pages + rc -
209 bs.driver_pages);
210 goto out;
211 }
213 for (i = 0; i < nr_pages; i++) {
214 page = balloon_retrieve();
215 BUG_ON(page == NULL);
217 pfn = page_to_pfn(page);
218 BUG_ON(!xen_feature(XENFEAT_auto_translated_physmap) &&
219 phys_to_machine_mapping_valid(pfn));
221 set_phys_to_machine(pfn, frame_list[i]);
223 #ifdef CONFIG_XEN
224 /* Link back into the page tables if not highmem. */
225 if (pfn < max_low_pfn) {
226 int ret;
227 ret = HYPERVISOR_update_va_mapping(
228 (unsigned long)__va(pfn << PAGE_SHIFT),
229 pfn_pte_ma(frame_list[i], PAGE_KERNEL),
230 0);
231 BUG_ON(ret);
232 }
233 #endif
235 /* Relinquish the page back to the allocator. */
236 ClearPageReserved(page);
237 init_page_count(page);
238 __free_page(page);
239 }
241 bs.current_pages += nr_pages;
242 totalram_pages = bs.current_pages;
244 out:
245 balloon_unlock(flags);
247 return 0;
248 }
250 static int decrease_reservation(unsigned long nr_pages)
251 {
252 unsigned long pfn, i, flags;
253 struct page *page;
254 void *v;
255 int need_sleep = 0;
256 int ret;
257 struct xen_memory_reservation reservation = {
258 .address_bits = 0,
259 .extent_order = 0,
260 .domid = DOMID_SELF
261 };
263 if (nr_pages > ARRAY_SIZE(frame_list))
264 nr_pages = ARRAY_SIZE(frame_list);
266 for (i = 0; i < nr_pages; i++) {
267 if ((page = alloc_page(GFP_BALLOON)) == NULL) {
268 nr_pages = i;
269 need_sleep = 1;
270 break;
271 }
273 pfn = page_to_pfn(page);
274 frame_list[i] = pfn_to_mfn(pfn);
276 if (!PageHighMem(page)) {
277 v = phys_to_virt(pfn << PAGE_SHIFT);
278 scrub_pages(v, 1);
279 #ifdef CONFIG_XEN
280 ret = HYPERVISOR_update_va_mapping(
281 (unsigned long)v, __pte_ma(0), 0);
282 BUG_ON(ret);
283 #endif
284 }
285 #ifdef CONFIG_XEN_SCRUB_PAGES
286 else {
287 v = kmap(page);
288 scrub_pages(v, 1);
289 kunmap(page);
290 }
291 #endif
292 }
294 #ifdef CONFIG_XEN
295 /* Ensure that ballooned highmem pages don't have kmaps. */
296 kmap_flush_unused();
297 flush_tlb_all();
298 #endif
300 balloon_lock(flags);
302 /* No more mappings: invalidate P2M and add to balloon. */
303 for (i = 0; i < nr_pages; i++) {
304 pfn = mfn_to_pfn(frame_list[i]);
305 set_phys_to_machine(pfn, INVALID_P2M_ENTRY);
306 balloon_append(pfn_to_page(pfn));
307 }
309 #ifndef CONFIG_XEN
310 /* XXX Temporary hack. */
311 {
312 extern void xen_invalidate_foreign_mappings(void);
313 xen_invalidate_foreign_mappings();
314 }
315 #endif
317 set_xen_guest_handle(reservation.extent_start, frame_list);
318 reservation.nr_extents = nr_pages;
319 ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation, &reservation);
320 BUG_ON(ret != nr_pages);
322 bs.current_pages -= nr_pages;
323 totalram_pages = bs.current_pages;
325 balloon_unlock(flags);
327 return need_sleep;
328 }
330 /*
331 * We avoid multiple worker processes conflicting via the balloon mutex.
332 * We may of course race updates of the target counts (which are protected
333 * by the balloon lock), or with changes to the Xen hard limit, but we will
334 * recover from these in time.
335 */
336 static void balloon_process(void *unused)
337 {
338 int need_sleep = 0;
339 long credit;
341 mutex_lock(&balloon_mutex);
343 do {
344 credit = current_target() - bs.current_pages;
345 if (credit > 0)
346 need_sleep = (increase_reservation(credit) != 0);
347 if (credit < 0)
348 need_sleep = (decrease_reservation(-credit) != 0);
350 #ifndef CONFIG_PREEMPT
351 if (need_resched())
352 schedule();
353 #endif
354 } while ((credit != 0) && !need_sleep);
356 /* Schedule more work if there is some still to be done. */
357 if (current_target() != bs.current_pages)
358 mod_timer(&balloon_timer, jiffies + HZ);
360 mutex_unlock(&balloon_mutex);
361 }
363 /* Resets the Xen limit, sets new target, and kicks off processing. */
364 void balloon_set_new_target(unsigned long target)
365 {
366 /* No need for lock. Not read-modify-write updates. */
367 bs.hard_limit = ~0UL;
368 bs.target_pages = target;
369 schedule_work(&balloon_worker);
370 }
372 static struct xenbus_watch target_watch =
373 {
374 .node = "memory/target"
375 };
377 /* React to a change in the target key */
378 static void watch_target(struct xenbus_watch *watch,
379 const char **vec, unsigned int len)
380 {
381 unsigned long long new_target;
382 int err;
384 err = xenbus_scanf(XBT_NIL, "memory", "target", "%llu", &new_target);
385 if (err != 1) {
386 /* This is ok (for domain0 at least) - so just return */
387 return;
388 }
390 /* The given memory/target value is in KiB, so it needs converting to
391 * pages. PAGE_SHIFT converts bytes to pages, hence PAGE_SHIFT - 10.
392 */
393 balloon_set_new_target(new_target >> (PAGE_SHIFT - 10));
394 }
396 static int balloon_init_watcher(struct notifier_block *notifier,
397 unsigned long event,
398 void *data)
399 {
400 int err;
402 err = register_xenbus_watch(&target_watch);
403 if (err)
404 printk(KERN_ERR "Failed to set balloon watcher\n");
406 return NOTIFY_DONE;
407 }
409 #ifdef CONFIG_PROC_FS
410 static int balloon_write(struct file *file, const char __user *buffer,
411 unsigned long count, void *data)
412 {
413 char memstring[64], *endchar;
414 unsigned long long target_bytes;
416 if (!capable(CAP_SYS_ADMIN))
417 return -EPERM;
419 if (count <= 1)
420 return -EBADMSG; /* runt */
421 if (count > sizeof(memstring))
422 return -EFBIG; /* too long */
424 if (copy_from_user(memstring, buffer, count))
425 return -EFAULT;
426 memstring[sizeof(memstring)-1] = '\0';
428 target_bytes = memparse(memstring, &endchar);
429 balloon_set_new_target(target_bytes >> PAGE_SHIFT);
431 return count;
432 }
434 static int balloon_read(char *page, char **start, off_t off,
435 int count, int *eof, void *data)
436 {
437 int len;
439 len = sprintf(
440 page,
441 "Current allocation: %8lu kB\n"
442 "Requested target: %8lu kB\n"
443 "Low-mem balloon: %8lu kB\n"
444 "High-mem balloon: %8lu kB\n"
445 "Driver pages: %8lu kB\n"
446 "Xen hard limit: ",
447 PAGES2KB(bs.current_pages), PAGES2KB(bs.target_pages),
448 PAGES2KB(bs.balloon_low), PAGES2KB(bs.balloon_high),
449 PAGES2KB(bs.driver_pages));
451 if (bs.hard_limit != ~0UL)
452 len += sprintf(page + len, "%8lu kB\n",
453 PAGES2KB(bs.hard_limit));
454 else
455 len += sprintf(page + len, " ??? kB\n");
457 *eof = 1;
458 return len;
459 }
460 #endif
462 static struct notifier_block xenstore_notifier;
464 static int __init balloon_init(void)
465 {
466 #if defined(CONFIG_X86) && defined(CONFIG_XEN)
467 unsigned long pfn;
468 struct page *page;
469 #endif
471 if (!is_running_on_xen())
472 return -ENODEV;
474 IPRINTK("Initialising balloon driver.\n");
476 #ifdef CONFIG_XEN
477 bs.current_pages = min(xen_start_info->nr_pages, max_pfn);
478 totalram_pages = bs.current_pages;
479 #else
480 bs.current_pages = totalram_pages;
481 #endif
482 bs.target_pages = bs.current_pages;
483 bs.balloon_low = 0;
484 bs.balloon_high = 0;
485 bs.driver_pages = 0UL;
486 bs.hard_limit = ~0UL;
488 init_timer(&balloon_timer);
489 balloon_timer.data = 0;
490 balloon_timer.function = balloon_alarm;
492 #ifdef CONFIG_PROC_FS
493 if ((balloon_pde = create_xen_proc_entry("balloon", 0644)) == NULL) {
494 WPRINTK("Unable to create /proc/xen/balloon.\n");
495 return -1;
496 }
498 balloon_pde->read_proc = balloon_read;
499 balloon_pde->write_proc = balloon_write;
500 #endif
501 balloon_sysfs_init();
503 #if defined(CONFIG_X86) && defined(CONFIG_XEN)
504 /* Initialise the balloon with excess memory space. */
505 for (pfn = xen_start_info->nr_pages; pfn < max_pfn; pfn++) {
506 page = pfn_to_page(pfn);
507 if (!PageReserved(page))
508 balloon_append(page);
509 }
510 #endif
512 target_watch.callback = watch_target;
513 xenstore_notifier.notifier_call = balloon_init_watcher;
515 register_xenstore_notifier(&xenstore_notifier);
517 return 0;
518 }
520 subsys_initcall(balloon_init);
522 static void balloon_exit(void)
523 {
524 /* XXX - release balloon here */
525 return;
526 }
528 module_exit(balloon_exit);
530 void balloon_update_driver_allowance(long delta)
531 {
532 unsigned long flags;
534 balloon_lock(flags);
535 bs.driver_pages += delta;
536 balloon_unlock(flags);
537 }
539 #ifdef CONFIG_XEN
540 static int dealloc_pte_fn(
541 pte_t *pte, struct page *pmd_page, unsigned long addr, void *data)
542 {
543 unsigned long mfn = pte_mfn(*pte);
544 int ret;
545 struct xen_memory_reservation reservation = {
546 .nr_extents = 1,
547 .extent_order = 0,
548 .domid = DOMID_SELF
549 };
550 set_xen_guest_handle(reservation.extent_start, &mfn);
551 set_pte_at(&init_mm, addr, pte, __pte_ma(0));
552 set_phys_to_machine(__pa(addr) >> PAGE_SHIFT, INVALID_P2M_ENTRY);
553 ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation, &reservation);
554 BUG_ON(ret != 1);
555 return 0;
556 }
557 #endif
559 struct page **alloc_empty_pages_and_pagevec(int nr_pages)
560 {
561 unsigned long vaddr, flags;
562 struct page *page, **pagevec;
563 int i, ret;
565 pagevec = kmalloc(sizeof(page) * nr_pages, GFP_KERNEL);
566 if (pagevec == NULL)
567 return NULL;
569 for (i = 0; i < nr_pages; i++) {
570 page = pagevec[i] = alloc_page(GFP_KERNEL);
571 if (page == NULL)
572 goto err;
574 vaddr = (unsigned long)page_address(page);
576 scrub_pages(vaddr, 1);
578 balloon_lock(flags);
580 if (xen_feature(XENFEAT_auto_translated_physmap)) {
581 unsigned long gmfn = page_to_pfn(page);
582 struct xen_memory_reservation reservation = {
583 .nr_extents = 1,
584 .extent_order = 0,
585 .domid = DOMID_SELF
586 };
587 set_xen_guest_handle(reservation.extent_start, &gmfn);
588 ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation,
589 &reservation);
590 if (ret == 1)
591 ret = 0; /* success */
592 } else {
593 #ifdef CONFIG_XEN
594 ret = apply_to_page_range(&init_mm, vaddr, PAGE_SIZE,
595 dealloc_pte_fn, NULL);
596 #else
597 /* Cannot handle non-auto translate mode. */
598 ret = 1;
599 #endif
600 }
602 if (ret != 0) {
603 balloon_unlock(flags);
604 __free_page(page);
605 goto err;
606 }
608 totalram_pages = --bs.current_pages;
610 balloon_unlock(flags);
611 }
613 out:
614 schedule_work(&balloon_worker);
615 #ifdef CONFIG_XEN
616 flush_tlb_all();
617 #endif
618 return pagevec;
620 err:
621 balloon_lock(flags);
622 while (--i >= 0)
623 balloon_append(pagevec[i]);
624 balloon_unlock(flags);
625 kfree(pagevec);
626 pagevec = NULL;
627 goto out;
628 }
630 void free_empty_pages_and_pagevec(struct page **pagevec, int nr_pages)
631 {
632 unsigned long flags;
633 int i;
635 if (pagevec == NULL)
636 return;
638 balloon_lock(flags);
639 for (i = 0; i < nr_pages; i++) {
640 BUG_ON(page_count(pagevec[i]) != 1);
641 balloon_append(pagevec[i]);
642 }
643 balloon_unlock(flags);
645 kfree(pagevec);
647 schedule_work(&balloon_worker);
648 }
650 void balloon_release_driver_page(struct page *page)
651 {
652 unsigned long flags;
654 balloon_lock(flags);
655 balloon_append(page);
656 bs.driver_pages--;
657 balloon_unlock(flags);
659 schedule_work(&balloon_worker);
660 }
662 EXPORT_SYMBOL_GPL(balloon_update_driver_allowance);
663 EXPORT_SYMBOL_GPL(alloc_empty_pages_and_pagevec);
664 EXPORT_SYMBOL_GPL(free_empty_pages_and_pagevec);
665 EXPORT_SYMBOL_GPL(balloon_release_driver_page);
667 MODULE_LICENSE("Dual BSD/GPL");