direct-io.hg

view linux-2.6-xen-sparse/drivers/xen/balloon/balloon.c @ 10472:fc1c6dfd1807

[LINUX] Transparent virtualization fixes.
Signed-off-by: Tristan Gingold <tristan.gingold@bull.net>
author kaf24@firebug.cl.cam.ac.uk
date Wed Jun 21 16:54:09 2006 +0100 (2006-06-21)
parents 0849bb1b73f9
children b4e328334392
line source
1 /******************************************************************************
2 * balloon.c
3 *
4 * Xen balloon driver - enables returning/claiming memory to/from Xen.
5 *
6 * Copyright (c) 2003, B Dragovic
7 * Copyright (c) 2003-2004, M Williamson, K Fraser
8 * Copyright (c) 2005 Dan M. Smith, IBM Corporation
9 *
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License version 2
12 * as published by the Free Software Foundation; or, when distributed
13 * separately from the Linux kernel or incorporated into other
14 * software packages, subject to the following license:
15 *
16 * Permission is hereby granted, free of charge, to any person obtaining a copy
17 * of this source file (the "Software"), to deal in the Software without
18 * restriction, including without limitation the rights to use, copy, modify,
19 * merge, publish, distribute, sublicense, and/or sell copies of the Software,
20 * and to permit persons to whom the Software is furnished to do so, subject to
21 * the following conditions:
22 *
23 * The above copyright notice and this permission notice shall be included in
24 * all copies or substantial portions of the Software.
25 *
26 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
27 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
28 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
29 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
30 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
31 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
32 * IN THE SOFTWARE.
33 */
35 #include <linux/config.h>
36 #include <linux/kernel.h>
37 #include <linux/module.h>
38 #include <linux/sched.h>
39 #include <linux/errno.h>
40 #include <linux/mm.h>
41 #include <linux/mman.h>
42 #include <linux/smp_lock.h>
43 #include <linux/pagemap.h>
44 #include <linux/bootmem.h>
45 #include <linux/highmem.h>
46 #include <linux/vmalloc.h>
47 #include <xen/xen_proc.h>
48 #include <asm/hypervisor.h>
49 #include <xen/balloon.h>
50 #include <xen/interface/memory.h>
51 #include <asm/pgalloc.h>
52 #include <asm/pgtable.h>
53 #include <asm/uaccess.h>
54 #include <asm/tlb.h>
55 #include <linux/list.h>
57 #include <xen/xenbus.h>
59 #define PAGES2KB(_p) ((_p)<<(PAGE_SHIFT-10))
61 #ifdef CONFIG_PROC_FS
62 static struct proc_dir_entry *balloon_pde;
63 #endif
65 static DECLARE_MUTEX(balloon_mutex);
67 /*
68 * Protects atomic reservation decrease/increase against concurrent increases.
69 * Also protects non-atomic updates of current_pages and driver_pages, and
70 * balloon lists.
71 */
72 DEFINE_SPINLOCK(balloon_lock);
74 /* We aim for 'current allocation' == 'target allocation'. */
75 static unsigned long current_pages;
76 static unsigned long target_pages;
78 /* VM /proc information for memory */
79 extern unsigned long totalram_pages;
81 /* We may hit the hard limit in Xen. If we do then we remember it. */
82 static unsigned long hard_limit;
84 /*
85 * Drivers may alter the memory reservation independently, but they must
86 * inform the balloon driver so that we can avoid hitting the hard limit.
87 */
88 static unsigned long driver_pages;
90 /* List of ballooned pages, threaded through the mem_map array. */
91 static LIST_HEAD(ballooned_pages);
92 static unsigned long balloon_low, balloon_high;
94 /* Main work function, always executed in process context. */
95 static void balloon_process(void *unused);
96 static DECLARE_WORK(balloon_worker, balloon_process, NULL);
97 static struct timer_list balloon_timer;
99 #define PAGE_TO_LIST(p) (&(p)->lru)
100 #define LIST_TO_PAGE(l) list_entry((l), struct page, lru)
101 #define UNLIST_PAGE(p) \
102 do { \
103 list_del(PAGE_TO_LIST(p)); \
104 PAGE_TO_LIST(p)->next = NULL; \
105 PAGE_TO_LIST(p)->prev = NULL; \
106 } while(0)
108 #define IPRINTK(fmt, args...) \
109 printk(KERN_INFO "xen_mem: " fmt, ##args)
110 #define WPRINTK(fmt, args...) \
111 printk(KERN_WARNING "xen_mem: " fmt, ##args)
113 /* balloon_append: add the given page to the balloon. */
114 static void balloon_append(struct page *page)
115 {
116 /* Lowmem is re-populated first, so highmem pages go at list tail. */
117 if (PageHighMem(page)) {
118 list_add_tail(PAGE_TO_LIST(page), &ballooned_pages);
119 balloon_high++;
120 } else {
121 list_add(PAGE_TO_LIST(page), &ballooned_pages);
122 balloon_low++;
123 }
124 }
126 /* balloon_retrieve: rescue a page from the balloon, if it is not empty. */
127 static struct page *balloon_retrieve(void)
128 {
129 struct page *page;
131 if (list_empty(&ballooned_pages))
132 return NULL;
134 page = LIST_TO_PAGE(ballooned_pages.next);
135 UNLIST_PAGE(page);
137 if (PageHighMem(page))
138 balloon_high--;
139 else
140 balloon_low--;
142 return page;
143 }
145 static struct page *balloon_first_page(void)
146 {
147 if (list_empty(&ballooned_pages))
148 return NULL;
149 return LIST_TO_PAGE(ballooned_pages.next);
150 }
152 static struct page *balloon_next_page(struct page *page)
153 {
154 struct list_head *next = PAGE_TO_LIST(page)->next;
155 if (next == &ballooned_pages)
156 return NULL;
157 return LIST_TO_PAGE(next);
158 }
160 static void balloon_alarm(unsigned long unused)
161 {
162 schedule_work(&balloon_worker);
163 }
165 static unsigned long current_target(void)
166 {
167 unsigned long target = min(target_pages, hard_limit);
168 if (target > (current_pages + balloon_low + balloon_high))
169 target = current_pages + balloon_low + balloon_high;
170 return target;
171 }
173 static int increase_reservation(unsigned long nr_pages)
174 {
175 unsigned long *frame_list, pfn, i, flags;
176 struct page *page;
177 long rc;
178 struct xen_memory_reservation reservation = {
179 .address_bits = 0,
180 .extent_order = 0,
181 .domid = DOMID_SELF
182 };
184 if (nr_pages > (PAGE_SIZE / sizeof(unsigned long)))
185 nr_pages = PAGE_SIZE / sizeof(unsigned long);
187 frame_list = (unsigned long *)__get_free_page(GFP_KERNEL);
188 if (frame_list == NULL)
189 return -ENOMEM;
191 balloon_lock(flags);
193 page = balloon_first_page();
194 for (i = 0; i < nr_pages; i++) {
195 BUG_ON(page == NULL);
196 frame_list[i] = page_to_pfn(page);;
197 page = balloon_next_page(page);
198 }
200 set_xen_guest_handle(reservation.extent_start, frame_list);
201 reservation.nr_extents = nr_pages;
202 rc = HYPERVISOR_memory_op(
203 XENMEM_populate_physmap, &reservation);
204 if (rc < nr_pages) {
205 int ret;
206 /* We hit the Xen hard limit: reprobe. */
207 set_xen_guest_handle(reservation.extent_start, frame_list);
208 reservation.nr_extents = rc;
209 ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation,
210 &reservation);
211 BUG_ON(ret != rc);
212 hard_limit = current_pages + rc - driver_pages;
213 goto out;
214 }
216 for (i = 0; i < nr_pages; i++) {
217 page = balloon_retrieve();
218 BUG_ON(page == NULL);
220 pfn = page_to_pfn(page);
221 BUG_ON(!xen_feature(XENFEAT_auto_translated_physmap) &&
222 phys_to_machine_mapping_valid(pfn));
224 /* Update P->M and M->P tables. */
225 set_phys_to_machine(pfn, frame_list[i]);
226 xen_machphys_update(frame_list[i], pfn);
228 /* Link back into the page tables if not highmem. */
229 if (pfn < max_low_pfn) {
230 int ret;
231 ret = HYPERVISOR_update_va_mapping(
232 (unsigned long)__va(pfn << PAGE_SHIFT),
233 pfn_pte_ma(frame_list[i], PAGE_KERNEL),
234 0);
235 BUG_ON(ret);
236 }
238 /* Relinquish the page back to the allocator. */
239 ClearPageReserved(page);
240 set_page_count(page, 1);
241 __free_page(page);
242 }
244 current_pages += nr_pages;
245 totalram_pages = current_pages;
247 out:
248 balloon_unlock(flags);
250 free_page((unsigned long)frame_list);
252 return 0;
253 }
255 static int decrease_reservation(unsigned long nr_pages)
256 {
257 unsigned long *frame_list, pfn, i, flags;
258 struct page *page;
259 void *v;
260 int need_sleep = 0;
261 int ret;
262 struct xen_memory_reservation reservation = {
263 .address_bits = 0,
264 .extent_order = 0,
265 .domid = DOMID_SELF
266 };
268 if (nr_pages > (PAGE_SIZE / sizeof(unsigned long)))
269 nr_pages = PAGE_SIZE / sizeof(unsigned long);
271 frame_list = (unsigned long *)__get_free_page(GFP_KERNEL);
272 if (frame_list == NULL)
273 return -ENOMEM;
275 for (i = 0; i < nr_pages; i++) {
276 if ((page = alloc_page(GFP_HIGHUSER)) == NULL) {
277 nr_pages = i;
278 need_sleep = 1;
279 break;
280 }
282 pfn = page_to_pfn(page);
283 frame_list[i] = pfn_to_mfn(pfn);
285 if (!PageHighMem(page)) {
286 v = phys_to_virt(pfn << PAGE_SHIFT);
287 scrub_pages(v, 1);
288 ret = HYPERVISOR_update_va_mapping(
289 (unsigned long)v, __pte_ma(0), 0);
290 BUG_ON(ret);
291 }
292 #ifdef CONFIG_XEN_SCRUB_PAGES
293 else {
294 v = kmap(page);
295 scrub_pages(v, 1);
296 kunmap(page);
297 }
298 #endif
299 }
301 /* Ensure that ballooned highmem pages don't have kmaps. */
302 kmap_flush_unused();
303 flush_tlb_all();
305 balloon_lock(flags);
307 /* No more mappings: invalidate P2M and add to balloon. */
308 for (i = 0; i < nr_pages; i++) {
309 pfn = mfn_to_pfn(frame_list[i]);
310 set_phys_to_machine(pfn, INVALID_P2M_ENTRY);
311 balloon_append(pfn_to_page(pfn));
312 }
314 set_xen_guest_handle(reservation.extent_start, frame_list);
315 reservation.nr_extents = nr_pages;
316 ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation, &reservation);
317 BUG_ON(ret != nr_pages);
319 current_pages -= nr_pages;
320 totalram_pages = current_pages;
322 balloon_unlock(flags);
324 free_page((unsigned long)frame_list);
326 return need_sleep;
327 }
329 /*
330 * We avoid multiple worker processes conflicting via the balloon mutex.
331 * We may of course race updates of the target counts (which are protected
332 * by the balloon lock), or with changes to the Xen hard limit, but we will
333 * recover from these in time.
334 */
335 static void balloon_process(void *unused)
336 {
337 int need_sleep = 0;
338 long credit;
340 down(&balloon_mutex);
342 do {
343 credit = current_target() - current_pages;
344 if (credit > 0)
345 need_sleep = (increase_reservation(credit) != 0);
346 if (credit < 0)
347 need_sleep = (decrease_reservation(-credit) != 0);
349 #ifndef CONFIG_PREEMPT
350 if (need_resched())
351 schedule();
352 #endif
353 } while ((credit != 0) && !need_sleep);
355 /* Schedule more work if there is some still to be done. */
356 if (current_target() != current_pages)
357 mod_timer(&balloon_timer, jiffies + HZ);
359 up(&balloon_mutex);
360 }
362 /* Resets the Xen limit, sets new target, and kicks off processing. */
363 static void set_new_target(unsigned long target)
364 {
365 /* No need for lock. Not read-modify-write updates. */
366 hard_limit = ~0UL;
367 target_pages = target;
368 schedule_work(&balloon_worker);
369 }
371 static struct xenbus_watch target_watch =
372 {
373 .node = "memory/target"
374 };
376 /* React to a change in the target key */
377 static void watch_target(struct xenbus_watch *watch,
378 const char **vec, unsigned int len)
379 {
380 unsigned long long new_target;
381 int err;
383 err = xenbus_scanf(XBT_NIL, "memory", "target", "%llu", &new_target);
384 if (err != 1) {
385 /* This is ok (for domain0 at least) - so just return */
386 return;
387 }
389 /* The given memory/target value is in KiB, so it needs converting to
390 * pages. PAGE_SHIFT converts bytes to pages, hence PAGE_SHIFT - 10.
391 */
392 set_new_target(new_target >> (PAGE_SHIFT - 10));
393 }
395 static int balloon_init_watcher(struct notifier_block *notifier,
396 unsigned long event,
397 void *data)
398 {
399 int err;
401 err = register_xenbus_watch(&target_watch);
402 if (err)
403 printk(KERN_ERR "Failed to set balloon watcher\n");
405 return NOTIFY_DONE;
406 }
408 #ifdef CONFIG_PROC_FS
409 static int balloon_write(struct file *file, const char __user *buffer,
410 unsigned long count, void *data)
411 {
412 char memstring[64], *endchar;
413 unsigned long long target_bytes;
415 if (!capable(CAP_SYS_ADMIN))
416 return -EPERM;
418 if (count <= 1)
419 return -EBADMSG; /* runt */
420 if (count > sizeof(memstring))
421 return -EFBIG; /* too long */
423 if (copy_from_user(memstring, buffer, count))
424 return -EFAULT;
425 memstring[sizeof(memstring)-1] = '\0';
427 target_bytes = memparse(memstring, &endchar);
428 set_new_target(target_bytes >> PAGE_SHIFT);
430 return count;
431 }
433 static int balloon_read(char *page, char **start, off_t off,
434 int count, int *eof, void *data)
435 {
436 int len;
438 len = sprintf(
439 page,
440 "Current allocation: %8lu kB\n"
441 "Requested target: %8lu kB\n"
442 "Low-mem balloon: %8lu kB\n"
443 "High-mem balloon: %8lu kB\n"
444 "Xen hard limit: ",
445 PAGES2KB(current_pages), PAGES2KB(target_pages),
446 PAGES2KB(balloon_low), PAGES2KB(balloon_high));
448 if (hard_limit != ~0UL) {
449 len += sprintf(
450 page + len,
451 "%8lu kB (inc. %8lu kB driver headroom)\n",
452 PAGES2KB(hard_limit), PAGES2KB(driver_pages));
453 } else {
454 len += sprintf(
455 page + len,
456 " ??? kB\n");
457 }
459 *eof = 1;
460 return len;
461 }
462 #endif
464 static struct notifier_block xenstore_notifier;
466 static int __init balloon_init(void)
467 {
468 unsigned long pfn;
469 struct page *page;
471 if (!is_running_on_xen())
472 return -ENODEV;
474 IPRINTK("Initialising balloon driver.\n");
476 current_pages = min(xen_start_info->nr_pages, max_pfn);
477 totalram_pages = current_pages;
478 target_pages = current_pages;
479 balloon_low = 0;
480 balloon_high = 0;
481 driver_pages = 0UL;
482 hard_limit = ~0UL;
484 init_timer(&balloon_timer);
485 balloon_timer.data = 0;
486 balloon_timer.function = balloon_alarm;
488 #ifdef CONFIG_PROC_FS
489 if ((balloon_pde = create_xen_proc_entry("balloon", 0644)) == NULL) {
490 WPRINTK("Unable to create /proc/xen/balloon.\n");
491 return -1;
492 }
494 balloon_pde->read_proc = balloon_read;
495 balloon_pde->write_proc = balloon_write;
496 #endif
498 /* Initialise the balloon with excess memory space. */
499 for (pfn = xen_start_info->nr_pages; pfn < max_pfn; pfn++) {
500 page = pfn_to_page(pfn);
501 if (!PageReserved(page))
502 balloon_append(page);
503 }
505 target_watch.callback = watch_target;
506 xenstore_notifier.notifier_call = balloon_init_watcher;
508 register_xenstore_notifier(&xenstore_notifier);
510 return 0;
511 }
513 subsys_initcall(balloon_init);
515 void balloon_update_driver_allowance(long delta)
516 {
517 unsigned long flags;
519 balloon_lock(flags);
520 driver_pages += delta;
521 balloon_unlock(flags);
522 }
524 static int dealloc_pte_fn(
525 pte_t *pte, struct page *pmd_page, unsigned long addr, void *data)
526 {
527 unsigned long mfn = pte_mfn(*pte);
528 int ret;
529 struct xen_memory_reservation reservation = {
530 .nr_extents = 1,
531 .extent_order = 0,
532 .domid = DOMID_SELF
533 };
534 set_xen_guest_handle(reservation.extent_start, &mfn);
535 set_pte_at(&init_mm, addr, pte, __pte_ma(0));
536 set_phys_to_machine(__pa(addr) >> PAGE_SHIFT, INVALID_P2M_ENTRY);
537 ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation, &reservation);
538 BUG_ON(ret != 1);
539 return 0;
540 }
542 struct page *balloon_alloc_empty_page_range(unsigned long nr_pages)
543 {
544 unsigned long vstart, flags;
545 unsigned int order = get_order(nr_pages * PAGE_SIZE);
546 int ret;
547 unsigned long i;
548 struct page *page;
550 vstart = __get_free_pages(GFP_KERNEL, order);
551 if (vstart == 0)
552 return NULL;
554 scrub_pages(vstart, 1 << order);
556 balloon_lock(flags);
557 if (xen_feature(XENFEAT_auto_translated_physmap)) {
558 unsigned long gmfn = __pa(vstart) >> PAGE_SHIFT;
559 struct xen_memory_reservation reservation = {
560 .nr_extents = 1,
561 .extent_order = order,
562 .domid = DOMID_SELF
563 };
564 set_xen_guest_handle(reservation.extent_start, &gmfn);
565 ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation,
566 &reservation);
567 BUG_ON(ret != 1);
568 } else {
569 ret = apply_to_page_range(&init_mm, vstart, PAGE_SIZE << order,
570 dealloc_pte_fn, NULL);
571 BUG_ON(ret);
572 }
573 current_pages -= 1UL << order;
574 totalram_pages = current_pages;
575 balloon_unlock(flags);
577 schedule_work(&balloon_worker);
579 flush_tlb_all();
581 page = virt_to_page(vstart);
583 for (i = 0; i < (1UL << order); i++)
584 set_page_count(page + i, 1);
586 return page;
587 }
589 void balloon_dealloc_empty_page_range(
590 struct page *page, unsigned long nr_pages)
591 {
592 unsigned long i, flags;
593 unsigned int order = get_order(nr_pages * PAGE_SIZE);
595 balloon_lock(flags);
596 for (i = 0; i < (1UL << order); i++) {
597 BUG_ON(page_count(page + i) != 1);
598 balloon_append(page + i);
599 }
600 balloon_unlock(flags);
602 schedule_work(&balloon_worker);
603 }
605 EXPORT_SYMBOL_GPL(balloon_update_driver_allowance);
606 EXPORT_SYMBOL_GPL(balloon_alloc_empty_page_range);
607 EXPORT_SYMBOL_GPL(balloon_dealloc_empty_page_range);
609 MODULE_LICENSE("Dual BSD/GPL");