ia64/xen-unstable

view linux-2.6-xen-sparse/drivers/xen/balloon/balloon.c @ 14100:e47738923a05

[LINUX] Purge include <linux/config.h>. It has been obsolete for some time now.

Signed-off-by: Ian Campbell <ian.campbell@xensource.com>
author Ian Campbell <ian.campbell@xensource.com>
date Fri Feb 23 16:56:45 2007 +0000 (2007-02-23)
parents b3c41b9f01a6
children 5138c48c7138
line source
1 /******************************************************************************
2 * balloon.c
3 *
4 * Xen balloon driver - enables returning/claiming memory to/from Xen.
5 *
6 * Copyright (c) 2003, B Dragovic
7 * Copyright (c) 2003-2004, M Williamson, K Fraser
8 * Copyright (c) 2005 Dan M. Smith, IBM Corporation
9 *
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License version 2
12 * as published by the Free Software Foundation; or, when distributed
13 * separately from the Linux kernel or incorporated into other
14 * software packages, subject to the following license:
15 *
16 * Permission is hereby granted, free of charge, to any person obtaining a copy
17 * of this source file (the "Software"), to deal in the Software without
18 * restriction, including without limitation the rights to use, copy, modify,
19 * merge, publish, distribute, sublicense, and/or sell copies of the Software,
20 * and to permit persons to whom the Software is furnished to do so, subject to
21 * the following conditions:
22 *
23 * The above copyright notice and this permission notice shall be included in
24 * all copies or substantial portions of the Software.
25 *
26 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
27 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
28 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
29 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
30 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
31 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
32 * IN THE SOFTWARE.
33 */
35 #include <linux/kernel.h>
36 #include <linux/module.h>
37 #include <linux/sched.h>
38 #include <linux/errno.h>
39 #include <linux/mm.h>
40 #include <linux/mman.h>
41 #include <linux/smp_lock.h>
42 #include <linux/pagemap.h>
43 #include <linux/bootmem.h>
44 #include <linux/highmem.h>
45 #include <linux/vmalloc.h>
46 #include <xen/xen_proc.h>
47 #include <asm/hypervisor.h>
48 #include <xen/balloon.h>
49 #include <xen/interface/memory.h>
50 #include <asm/pgalloc.h>
51 #include <asm/pgtable.h>
52 #include <asm/uaccess.h>
53 #include <asm/tlb.h>
54 #include <linux/list.h>
55 #include <xen/xenbus.h>
56 #include "common.h"
58 #ifdef CONFIG_PROC_FS
59 static struct proc_dir_entry *balloon_pde;
60 #endif
62 static DEFINE_MUTEX(balloon_mutex);
64 /*
65 * Protects atomic reservation decrease/increase against concurrent increases.
66 * Also protects non-atomic updates of current_pages and driver_pages, and
67 * balloon lists.
68 */
69 DEFINE_SPINLOCK(balloon_lock);
71 struct balloon_stats balloon_stats;
73 /* We increase/decrease in batches which fit in a page */
74 static unsigned long frame_list[PAGE_SIZE / sizeof(unsigned long)];
76 /* VM /proc information for memory */
77 extern unsigned long totalram_pages;
79 /* List of ballooned pages, threaded through the mem_map array. */
80 static LIST_HEAD(ballooned_pages);
82 /* Main work function, always executed in process context. */
83 static void balloon_process(void *unused);
84 static DECLARE_WORK(balloon_worker, balloon_process, NULL);
85 static struct timer_list balloon_timer;
87 /* When ballooning out (allocating memory to return to Xen) we don't really
88 want the kernel to try too hard since that can trigger the oom killer. */
89 #define GFP_BALLOON \
90 (GFP_HIGHUSER | __GFP_NOWARN | __GFP_NORETRY | __GFP_NOMEMALLOC)
92 #define PAGE_TO_LIST(p) (&(p)->lru)
93 #define LIST_TO_PAGE(l) list_entry((l), struct page, lru)
94 #define UNLIST_PAGE(p) \
95 do { \
96 list_del(PAGE_TO_LIST(p)); \
97 PAGE_TO_LIST(p)->next = NULL; \
98 PAGE_TO_LIST(p)->prev = NULL; \
99 } while(0)
101 #define IPRINTK(fmt, args...) \
102 printk(KERN_INFO "xen_mem: " fmt, ##args)
103 #define WPRINTK(fmt, args...) \
104 printk(KERN_WARNING "xen_mem: " fmt, ##args)
106 /* balloon_append: add the given page to the balloon. */
107 static void balloon_append(struct page *page)
108 {
109 /* Lowmem is re-populated first, so highmem pages go at list tail. */
110 if (PageHighMem(page)) {
111 list_add_tail(PAGE_TO_LIST(page), &ballooned_pages);
112 bs.balloon_high++;
113 } else {
114 list_add(PAGE_TO_LIST(page), &ballooned_pages);
115 bs.balloon_low++;
116 }
117 }
119 /* balloon_retrieve: rescue a page from the balloon, if it is not empty. */
120 static struct page *balloon_retrieve(void)
121 {
122 struct page *page;
124 if (list_empty(&ballooned_pages))
125 return NULL;
127 page = LIST_TO_PAGE(ballooned_pages.next);
128 UNLIST_PAGE(page);
130 if (PageHighMem(page))
131 bs.balloon_high--;
132 else
133 bs.balloon_low--;
135 return page;
136 }
138 static struct page *balloon_first_page(void)
139 {
140 if (list_empty(&ballooned_pages))
141 return NULL;
142 return LIST_TO_PAGE(ballooned_pages.next);
143 }
145 static struct page *balloon_next_page(struct page *page)
146 {
147 struct list_head *next = PAGE_TO_LIST(page)->next;
148 if (next == &ballooned_pages)
149 return NULL;
150 return LIST_TO_PAGE(next);
151 }
153 static void balloon_alarm(unsigned long unused)
154 {
155 schedule_work(&balloon_worker);
156 }
158 static unsigned long current_target(void)
159 {
160 unsigned long target = min(bs.target_pages, bs.hard_limit);
161 if (target > (bs.current_pages + bs.balloon_low + bs.balloon_high))
162 target = bs.current_pages + bs.balloon_low + bs.balloon_high;
163 return target;
164 }
166 static int increase_reservation(unsigned long nr_pages)
167 {
168 unsigned long pfn, i, flags;
169 struct page *page;
170 long rc;
171 struct xen_memory_reservation reservation = {
172 .address_bits = 0,
173 .extent_order = 0,
174 .domid = DOMID_SELF
175 };
177 if (nr_pages > ARRAY_SIZE(frame_list))
178 nr_pages = ARRAY_SIZE(frame_list);
180 balloon_lock(flags);
182 page = balloon_first_page();
183 for (i = 0; i < nr_pages; i++) {
184 BUG_ON(page == NULL);
185 frame_list[i] = page_to_pfn(page);;
186 page = balloon_next_page(page);
187 }
189 set_xen_guest_handle(reservation.extent_start, frame_list);
190 reservation.nr_extents = nr_pages;
191 rc = HYPERVISOR_memory_op(
192 XENMEM_populate_physmap, &reservation);
193 if (rc < nr_pages) {
194 if (rc > 0) {
195 int ret;
197 /* We hit the Xen hard limit: reprobe. */
198 reservation.nr_extents = rc;
199 ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation,
200 &reservation);
201 BUG_ON(ret != rc);
202 }
203 if (rc >= 0)
204 bs.hard_limit = (bs.current_pages + rc -
205 bs.driver_pages);
206 goto out;
207 }
209 for (i = 0; i < nr_pages; i++) {
210 page = balloon_retrieve();
211 BUG_ON(page == NULL);
213 pfn = page_to_pfn(page);
214 BUG_ON(!xen_feature(XENFEAT_auto_translated_physmap) &&
215 phys_to_machine_mapping_valid(pfn));
217 set_phys_to_machine(pfn, frame_list[i]);
219 /* Link back into the page tables if not highmem. */
220 if (pfn < max_low_pfn) {
221 int ret;
222 ret = HYPERVISOR_update_va_mapping(
223 (unsigned long)__va(pfn << PAGE_SHIFT),
224 pfn_pte_ma(frame_list[i], PAGE_KERNEL),
225 0);
226 BUG_ON(ret);
227 }
229 /* Relinquish the page back to the allocator. */
230 ClearPageReserved(page);
231 init_page_count(page);
232 __free_page(page);
233 }
235 bs.current_pages += nr_pages;
236 totalram_pages = bs.current_pages;
238 out:
239 balloon_unlock(flags);
241 return 0;
242 }
244 static int decrease_reservation(unsigned long nr_pages)
245 {
246 unsigned long pfn, i, flags;
247 struct page *page;
248 void *v;
249 int need_sleep = 0;
250 int ret;
251 struct xen_memory_reservation reservation = {
252 .address_bits = 0,
253 .extent_order = 0,
254 .domid = DOMID_SELF
255 };
257 if (nr_pages > ARRAY_SIZE(frame_list))
258 nr_pages = ARRAY_SIZE(frame_list);
260 for (i = 0; i < nr_pages; i++) {
261 if ((page = alloc_page(GFP_BALLOON)) == NULL) {
262 nr_pages = i;
263 need_sleep = 1;
264 break;
265 }
267 pfn = page_to_pfn(page);
268 frame_list[i] = pfn_to_mfn(pfn);
270 if (!PageHighMem(page)) {
271 v = phys_to_virt(pfn << PAGE_SHIFT);
272 scrub_pages(v, 1);
273 ret = HYPERVISOR_update_va_mapping(
274 (unsigned long)v, __pte_ma(0), 0);
275 BUG_ON(ret);
276 }
277 #ifdef CONFIG_XEN_SCRUB_PAGES
278 else {
279 v = kmap(page);
280 scrub_pages(v, 1);
281 kunmap(page);
282 }
283 #endif
284 }
286 /* Ensure that ballooned highmem pages don't have kmaps. */
287 kmap_flush_unused();
288 flush_tlb_all();
290 balloon_lock(flags);
292 /* No more mappings: invalidate P2M and add to balloon. */
293 for (i = 0; i < nr_pages; i++) {
294 pfn = mfn_to_pfn(frame_list[i]);
295 set_phys_to_machine(pfn, INVALID_P2M_ENTRY);
296 balloon_append(pfn_to_page(pfn));
297 }
299 set_xen_guest_handle(reservation.extent_start, frame_list);
300 reservation.nr_extents = nr_pages;
301 ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation, &reservation);
302 BUG_ON(ret != nr_pages);
304 bs.current_pages -= nr_pages;
305 totalram_pages = bs.current_pages;
307 balloon_unlock(flags);
309 return need_sleep;
310 }
312 /*
313 * We avoid multiple worker processes conflicting via the balloon mutex.
314 * We may of course race updates of the target counts (which are protected
315 * by the balloon lock), or with changes to the Xen hard limit, but we will
316 * recover from these in time.
317 */
318 static void balloon_process(void *unused)
319 {
320 int need_sleep = 0;
321 long credit;
323 mutex_lock(&balloon_mutex);
325 do {
326 credit = current_target() - bs.current_pages;
327 if (credit > 0)
328 need_sleep = (increase_reservation(credit) != 0);
329 if (credit < 0)
330 need_sleep = (decrease_reservation(-credit) != 0);
332 #ifndef CONFIG_PREEMPT
333 if (need_resched())
334 schedule();
335 #endif
336 } while ((credit != 0) && !need_sleep);
338 /* Schedule more work if there is some still to be done. */
339 if (current_target() != bs.current_pages)
340 mod_timer(&balloon_timer, jiffies + HZ);
342 mutex_unlock(&balloon_mutex);
343 }
345 /* Resets the Xen limit, sets new target, and kicks off processing. */
346 void balloon_set_new_target(unsigned long target)
347 {
348 /* No need for lock. Not read-modify-write updates. */
349 bs.hard_limit = ~0UL;
350 bs.target_pages = target;
351 schedule_work(&balloon_worker);
352 }
354 static struct xenbus_watch target_watch =
355 {
356 .node = "memory/target"
357 };
359 /* React to a change in the target key */
360 static void watch_target(struct xenbus_watch *watch,
361 const char **vec, unsigned int len)
362 {
363 unsigned long long new_target;
364 int err;
366 err = xenbus_scanf(XBT_NIL, "memory", "target", "%llu", &new_target);
367 if (err != 1) {
368 /* This is ok (for domain0 at least) - so just return */
369 return;
370 }
372 /* The given memory/target value is in KiB, so it needs converting to
373 * pages. PAGE_SHIFT converts bytes to pages, hence PAGE_SHIFT - 10.
374 */
375 balloon_set_new_target(new_target >> (PAGE_SHIFT - 10));
376 }
378 static int balloon_init_watcher(struct notifier_block *notifier,
379 unsigned long event,
380 void *data)
381 {
382 int err;
384 err = register_xenbus_watch(&target_watch);
385 if (err)
386 printk(KERN_ERR "Failed to set balloon watcher\n");
388 return NOTIFY_DONE;
389 }
391 #ifdef CONFIG_PROC_FS
392 static int balloon_write(struct file *file, const char __user *buffer,
393 unsigned long count, void *data)
394 {
395 char memstring[64], *endchar;
396 unsigned long long target_bytes;
398 if (!capable(CAP_SYS_ADMIN))
399 return -EPERM;
401 if (count <= 1)
402 return -EBADMSG; /* runt */
403 if (count > sizeof(memstring))
404 return -EFBIG; /* too long */
406 if (copy_from_user(memstring, buffer, count))
407 return -EFAULT;
408 memstring[sizeof(memstring)-1] = '\0';
410 target_bytes = memparse(memstring, &endchar);
411 balloon_set_new_target(target_bytes >> PAGE_SHIFT);
413 return count;
414 }
416 static int balloon_read(char *page, char **start, off_t off,
417 int count, int *eof, void *data)
418 {
419 int len;
421 len = sprintf(
422 page,
423 "Current allocation: %8lu kB\n"
424 "Requested target: %8lu kB\n"
425 "Low-mem balloon: %8lu kB\n"
426 "High-mem balloon: %8lu kB\n"
427 "Driver pages: %8lu kB\n"
428 "Xen hard limit: ",
429 PAGES2KB(bs.current_pages), PAGES2KB(bs.target_pages),
430 PAGES2KB(bs.balloon_low), PAGES2KB(bs.balloon_high),
431 PAGES2KB(bs.driver_pages));
433 if (bs.hard_limit != ~0UL)
434 len += sprintf(page + len, "%8lu kB\n",
435 PAGES2KB(bs.hard_limit));
436 else
437 len += sprintf(page + len, " ??? kB\n");
439 *eof = 1;
440 return len;
441 }
442 #endif
444 static struct notifier_block xenstore_notifier;
446 static int __init balloon_init(void)
447 {
448 #ifdef CONFIG_X86
449 unsigned long pfn;
450 struct page *page;
451 #endif
453 if (!is_running_on_xen())
454 return -ENODEV;
456 IPRINTK("Initialising balloon driver.\n");
458 bs.current_pages = min(xen_start_info->nr_pages, max_pfn);
459 totalram_pages = bs.current_pages;
460 bs.target_pages = bs.current_pages;
461 bs.balloon_low = 0;
462 bs.balloon_high = 0;
463 bs.driver_pages = 0UL;
464 bs.hard_limit = ~0UL;
466 init_timer(&balloon_timer);
467 balloon_timer.data = 0;
468 balloon_timer.function = balloon_alarm;
470 #ifdef CONFIG_PROC_FS
471 if ((balloon_pde = create_xen_proc_entry("balloon", 0644)) == NULL) {
472 WPRINTK("Unable to create /proc/xen/balloon.\n");
473 return -1;
474 }
476 balloon_pde->read_proc = balloon_read;
477 balloon_pde->write_proc = balloon_write;
478 #endif
479 balloon_sysfs_init();
481 #ifdef CONFIG_X86
482 /* Initialise the balloon with excess memory space. */
483 for (pfn = xen_start_info->nr_pages; pfn < max_pfn; pfn++) {
484 page = pfn_to_page(pfn);
485 if (!PageReserved(page))
486 balloon_append(page);
487 }
488 #endif
490 target_watch.callback = watch_target;
491 xenstore_notifier.notifier_call = balloon_init_watcher;
493 register_xenstore_notifier(&xenstore_notifier);
495 return 0;
496 }
498 subsys_initcall(balloon_init);
500 void balloon_update_driver_allowance(long delta)
501 {
502 unsigned long flags;
504 balloon_lock(flags);
505 bs.driver_pages += delta;
506 balloon_unlock(flags);
507 }
509 static int dealloc_pte_fn(
510 pte_t *pte, struct page *pmd_page, unsigned long addr, void *data)
511 {
512 unsigned long mfn = pte_mfn(*pte);
513 int ret;
514 struct xen_memory_reservation reservation = {
515 .nr_extents = 1,
516 .extent_order = 0,
517 .domid = DOMID_SELF
518 };
519 set_xen_guest_handle(reservation.extent_start, &mfn);
520 set_pte_at(&init_mm, addr, pte, __pte_ma(0));
521 set_phys_to_machine(__pa(addr) >> PAGE_SHIFT, INVALID_P2M_ENTRY);
522 ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation, &reservation);
523 BUG_ON(ret != 1);
524 return 0;
525 }
527 struct page **alloc_empty_pages_and_pagevec(int nr_pages)
528 {
529 unsigned long vaddr, flags;
530 struct page *page, **pagevec;
531 int i, ret;
533 pagevec = kmalloc(sizeof(page) * nr_pages, GFP_KERNEL);
534 if (pagevec == NULL)
535 return NULL;
537 for (i = 0; i < nr_pages; i++) {
538 page = pagevec[i] = alloc_page(GFP_KERNEL);
539 if (page == NULL)
540 goto err;
542 vaddr = (unsigned long)page_address(page);
544 scrub_pages(vaddr, 1);
546 balloon_lock(flags);
548 if (xen_feature(XENFEAT_auto_translated_physmap)) {
549 unsigned long gmfn = page_to_pfn(page);
550 struct xen_memory_reservation reservation = {
551 .nr_extents = 1,
552 .extent_order = 0,
553 .domid = DOMID_SELF
554 };
555 set_xen_guest_handle(reservation.extent_start, &gmfn);
556 ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation,
557 &reservation);
558 if (ret == 1)
559 ret = 0; /* success */
560 } else {
561 ret = apply_to_page_range(&init_mm, vaddr, PAGE_SIZE,
562 dealloc_pte_fn, NULL);
563 }
565 if (ret != 0) {
566 balloon_unlock(flags);
567 __free_page(page);
568 goto err;
569 }
571 totalram_pages = --bs.current_pages;
573 balloon_unlock(flags);
574 }
576 out:
577 schedule_work(&balloon_worker);
578 flush_tlb_all();
579 return pagevec;
581 err:
582 balloon_lock(flags);
583 while (--i >= 0)
584 balloon_append(pagevec[i]);
585 balloon_unlock(flags);
586 kfree(pagevec);
587 pagevec = NULL;
588 goto out;
589 }
591 void free_empty_pages_and_pagevec(struct page **pagevec, int nr_pages)
592 {
593 unsigned long flags;
594 int i;
596 if (pagevec == NULL)
597 return;
599 balloon_lock(flags);
600 for (i = 0; i < nr_pages; i++) {
601 BUG_ON(page_count(pagevec[i]) != 1);
602 balloon_append(pagevec[i]);
603 }
604 balloon_unlock(flags);
606 kfree(pagevec);
608 schedule_work(&balloon_worker);
609 }
611 void balloon_release_driver_page(struct page *page)
612 {
613 unsigned long flags;
615 balloon_lock(flags);
616 balloon_append(page);
617 bs.driver_pages--;
618 balloon_unlock(flags);
620 schedule_work(&balloon_worker);
621 }
623 EXPORT_SYMBOL_GPL(balloon_update_driver_allowance);
624 EXPORT_SYMBOL_GPL(alloc_empty_pages_and_pagevec);
625 EXPORT_SYMBOL_GPL(free_empty_pages_and_pagevec);
626 EXPORT_SYMBOL_GPL(balloon_release_driver_page);
628 MODULE_LICENSE("Dual BSD/GPL");