ia64/xen-unstable

view linux-2.6-xen-sparse/drivers/xen/balloon/balloon.c @ 7004:a498dab0beef

Match change to the memory/target value, as it is now in KiB, not bytes.

Signed-off-by: Ewan Mellor <ewan@xensource.com>
author emellor@ewan
date Wed Sep 21 15:12:32 2005 +0100 (2005-09-21)
parents 3233e7ecfa9f
children 55fc0ecc19c3
line source
1 /******************************************************************************
2 * balloon.c
3 *
4 * Xen balloon driver - enables returning/claiming memory to/from Xen.
5 *
6 * Copyright (c) 2003, B Dragovic
7 * Copyright (c) 2003-2004, M Williamson, K Fraser
8 * Copyright (c) 2005 Dan M. Smith, IBM Corporation
9 *
10 * This file may be distributed separately from the Linux kernel, or
11 * incorporated into other software packages, subject to the following license:
12 *
13 * Permission is hereby granted, free of charge, to any person obtaining a copy
14 * of this source file (the "Software"), to deal in the Software without
15 * restriction, including without limitation the rights to use, copy, modify,
16 * merge, publish, distribute, sublicense, and/or sell copies of the Software,
17 * and to permit persons to whom the Software is furnished to do so, subject to
18 * the following conditions:
19 *
20 * The above copyright notice and this permission notice shall be included in
21 * all copies or substantial portions of the Software.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
24 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
25 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
26 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
27 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
28 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
29 * IN THE SOFTWARE.
30 */
32 #include <linux/config.h>
33 #include <linux/kernel.h>
34 #include <linux/module.h>
35 #include <linux/sched.h>
36 #include <linux/errno.h>
37 #include <linux/mm.h>
38 #include <linux/mman.h>
39 #include <linux/smp_lock.h>
40 #include <linux/pagemap.h>
41 #include <linux/bootmem.h>
42 #include <linux/highmem.h>
43 #include <linux/vmalloc.h>
44 #include <asm-xen/xen_proc.h>
45 #include <asm/hypervisor.h>
46 #include <asm-xen/balloon.h>
47 #include <asm-xen/xen-public/memory.h>
48 #include <asm/pgalloc.h>
49 #include <asm/pgtable.h>
50 #include <asm/uaccess.h>
51 #include <asm/tlb.h>
52 #include <linux/list.h>
54 #include<asm-xen/xenbus.h>
56 #define PAGES2KB(_p) ((_p)<<(PAGE_SHIFT-10))
58 static struct proc_dir_entry *balloon_pde;
60 static DECLARE_MUTEX(balloon_mutex);
62 /*
63 * Protects atomic reservation decrease/increase against concurrent increases.
64 * Also protects non-atomic updates of current_pages and driver_pages, and
65 * balloon lists.
66 */
67 spinlock_t balloon_lock = SPIN_LOCK_UNLOCKED;
69 /* We aim for 'current allocation' == 'target allocation'. */
70 static unsigned long current_pages;
71 static unsigned long target_pages;
73 /* We may hit the hard limit in Xen. If we do then we remember it. */
74 static unsigned long hard_limit;
76 /*
77 * Drivers may alter the memory reservation independently, but they must
78 * inform the balloon driver so that we can avoid hitting the hard limit.
79 */
80 static unsigned long driver_pages;
82 /* List of ballooned pages, threaded through the mem_map array. */
83 static LIST_HEAD(ballooned_pages);
84 static unsigned long balloon_low, balloon_high;
86 /* Main work function, always executed in process context. */
87 static void balloon_process(void *unused);
88 static DECLARE_WORK(balloon_worker, balloon_process, NULL);
89 static struct timer_list balloon_timer;
91 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
92 /* Use the private and mapping fields of struct page as a list. */
93 #define PAGE_TO_LIST(p) ((struct list_head *)&p->private)
94 #define LIST_TO_PAGE(l) \
95 (list_entry(((unsigned long *)l), struct page, private))
96 #define UNLIST_PAGE(p) \
97 do { \
98 list_del(PAGE_TO_LIST(p)); \
99 p->mapping = NULL; \
100 p->private = 0; \
101 } while(0)
102 #else
103 /* There's a dedicated list field in struct page we can use. */
104 #define PAGE_TO_LIST(p) ( &p->list )
105 #define LIST_TO_PAGE(l) ( list_entry(l, struct page, list) )
106 #define UNLIST_PAGE(p) ( list_del(&p->list) )
107 #define pte_offset_kernel pte_offset
108 #define pud_t pgd_t
109 #define pud_offset(d, va) d
110 #define pud_none(d) 0
111 #define pud_bad(d) 0
112 #define subsys_initcall(_fn) __initcall(_fn)
113 #define pfn_to_page(_pfn) (mem_map + (_pfn))
114 #endif
116 #define IPRINTK(fmt, args...) \
117 printk(KERN_INFO "xen_mem: " fmt, ##args)
118 #define WPRINTK(fmt, args...) \
119 printk(KERN_WARNING "xen_mem: " fmt, ##args)
121 /* balloon_append: add the given page to the balloon. */
122 static void balloon_append(struct page *page)
123 {
124 /* Lowmem is re-populated first, so highmem pages go at list tail. */
125 if (PageHighMem(page)) {
126 list_add_tail(PAGE_TO_LIST(page), &ballooned_pages);
127 balloon_high++;
128 } else {
129 list_add(PAGE_TO_LIST(page), &ballooned_pages);
130 balloon_low++;
131 }
132 }
134 /* balloon_retrieve: rescue a page from the balloon, if it is not empty. */
135 static struct page *balloon_retrieve(void)
136 {
137 struct page *page;
139 if (list_empty(&ballooned_pages))
140 return NULL;
142 page = LIST_TO_PAGE(ballooned_pages.next);
143 UNLIST_PAGE(page);
145 if (PageHighMem(page))
146 balloon_high--;
147 else
148 balloon_low--;
150 return page;
151 }
153 static void balloon_alarm(unsigned long unused)
154 {
155 schedule_work(&balloon_worker);
156 }
158 static unsigned long current_target(void)
159 {
160 unsigned long target = min(target_pages, hard_limit);
161 if (target > (current_pages + balloon_low + balloon_high))
162 target = current_pages + balloon_low + balloon_high;
163 return target;
164 }
166 static int increase_reservation(unsigned long nr_pages)
167 {
168 unsigned long *mfn_list, pfn, i, flags;
169 struct page *page;
170 long rc;
171 struct xen_memory_reservation reservation = {
172 .address_bits = 0,
173 .extent_order = 0,
174 .domid = DOMID_SELF
175 };
177 if (nr_pages > (PAGE_SIZE / sizeof(unsigned long)))
178 nr_pages = PAGE_SIZE / sizeof(unsigned long);
180 mfn_list = (unsigned long *)__get_free_page(GFP_KERNEL);
181 if (mfn_list == NULL)
182 return -ENOMEM;
184 balloon_lock(flags);
186 reservation.extent_start = mfn_list;
187 reservation.nr_extents = nr_pages;
188 rc = HYPERVISOR_memory_op(
189 XENMEM_increase_reservation, &reservation);
190 if (rc < nr_pages) {
191 /* We hit the Xen hard limit: reprobe. */
192 reservation.extent_start = mfn_list;
193 reservation.nr_extents = rc;
194 BUG_ON(HYPERVISOR_memory_op(
195 XENMEM_decrease_reservation,
196 &reservation) != rc);
197 hard_limit = current_pages + rc - driver_pages;
198 goto out;
199 }
201 for (i = 0; i < nr_pages; i++) {
202 page = balloon_retrieve();
203 BUG_ON(page == NULL);
205 pfn = page - mem_map;
206 BUG_ON(phys_to_machine_mapping[pfn] != INVALID_P2M_ENTRY);
208 /* Update P->M and M->P tables. */
209 phys_to_machine_mapping[pfn] = mfn_list[i];
210 xen_machphys_update(mfn_list[i], pfn);
212 /* Link back into the page tables if not highmem. */
213 if (pfn < max_low_pfn)
214 BUG_ON(HYPERVISOR_update_va_mapping(
215 (unsigned long)__va(pfn << PAGE_SHIFT),
216 pfn_pte_ma(mfn_list[i], PAGE_KERNEL),
217 0));
219 /* Relinquish the page back to the allocator. */
220 ClearPageReserved(page);
221 set_page_count(page, 1);
222 __free_page(page);
223 }
225 current_pages += nr_pages;
227 out:
228 balloon_unlock(flags);
230 free_page((unsigned long)mfn_list);
232 return 0;
233 }
235 static int decrease_reservation(unsigned long nr_pages)
236 {
237 unsigned long *mfn_list, pfn, i, flags;
238 struct page *page;
239 void *v;
240 int need_sleep = 0;
241 struct xen_memory_reservation reservation = {
242 .address_bits = 0,
243 .extent_order = 0,
244 .domid = DOMID_SELF
245 };
247 if (nr_pages > (PAGE_SIZE / sizeof(unsigned long)))
248 nr_pages = PAGE_SIZE / sizeof(unsigned long);
250 mfn_list = (unsigned long *)__get_free_page(GFP_KERNEL);
251 if (mfn_list == NULL)
252 return -ENOMEM;
254 for (i = 0; i < nr_pages; i++) {
255 if ((page = alloc_page(GFP_HIGHUSER)) == NULL) {
256 nr_pages = i;
257 need_sleep = 1;
258 break;
259 }
261 pfn = page - mem_map;
262 mfn_list[i] = phys_to_machine_mapping[pfn];
264 if (!PageHighMem(page)) {
265 v = phys_to_virt(pfn << PAGE_SHIFT);
266 scrub_pages(v, 1);
267 BUG_ON(HYPERVISOR_update_va_mapping(
268 (unsigned long)v, __pte_ma(0), 0));
269 }
270 #ifdef CONFIG_XEN_SCRUB_PAGES
271 else {
272 v = kmap(page);
273 scrub_pages(v, 1);
274 kunmap(page);
275 }
276 #endif
277 }
279 /* Ensure that ballooned highmem pages don't have kmaps. */
280 kmap_flush_unused();
281 flush_tlb_all();
283 balloon_lock(flags);
285 /* No more mappings: invalidate P2M and add to balloon. */
286 for (i = 0; i < nr_pages; i++) {
287 pfn = mfn_to_pfn(mfn_list[i]);
288 phys_to_machine_mapping[pfn] = INVALID_P2M_ENTRY;
289 balloon_append(pfn_to_page(pfn));
290 }
292 reservation.extent_start = mfn_list;
293 reservation.nr_extents = nr_pages;
294 BUG_ON(HYPERVISOR_memory_op(
295 XENMEM_decrease_reservation, &reservation) != nr_pages);
297 current_pages -= nr_pages;
299 balloon_unlock(flags);
301 free_page((unsigned long)mfn_list);
303 return need_sleep;
304 }
306 /*
307 * We avoid multiple worker processes conflicting via the balloon mutex.
308 * We may of course race updates of the target counts (which are protected
309 * by the balloon lock), or with changes to the Xen hard limit, but we will
310 * recover from these in time.
311 */
312 static void balloon_process(void *unused)
313 {
314 int need_sleep = 0;
315 long credit;
317 down(&balloon_mutex);
319 do {
320 credit = current_target() - current_pages;
321 if (credit > 0)
322 need_sleep = (increase_reservation(credit) != 0);
323 if (credit < 0)
324 need_sleep = (decrease_reservation(-credit) != 0);
326 #ifndef CONFIG_PREEMPT
327 if (need_resched())
328 schedule();
329 #endif
330 } while ((credit != 0) && !need_sleep);
332 /* Schedule more work if there is some still to be done. */
333 if (current_target() != current_pages)
334 mod_timer(&balloon_timer, jiffies + HZ);
336 up(&balloon_mutex);
337 }
339 /* Resets the Xen limit, sets new target, and kicks off processing. */
340 static void set_new_target(unsigned long target)
341 {
342 /* No need for lock. Not read-modify-write updates. */
343 hard_limit = ~0UL;
344 target_pages = target;
345 schedule_work(&balloon_worker);
346 }
348 static struct xenbus_watch target_watch =
349 {
350 .node = "memory/target"
351 };
353 /* React to a change in the target key */
354 static void watch_target(struct xenbus_watch *watch, const char *node)
355 {
356 unsigned long long new_target;
357 int err;
359 err = xenbus_scanf("memory", "target", "%llu", &new_target);
360 if (err != 1) {
361 printk(KERN_ERR "Unable to read memory/target\n");
362 return;
363 }
365 /* The given memory/target value is in KiB, so it needs converting to
366 pages. PAGE_SHIFT converts bytes to pages, hence PAGE_SHIFT - 10.
367 */
368 set_new_target(new_target >> (PAGE_SHIFT - 10));
370 }
372 /* Setup our watcher
373 NB: Assumes xenbus_lock is held!
374 */
375 int balloon_init_watcher(struct notifier_block *notifier,
376 unsigned long event,
377 void *data)
378 {
379 int err;
381 BUG_ON(down_trylock(&xenbus_lock) == 0);
383 err = register_xenbus_watch(&target_watch);
384 if (err)
385 printk(KERN_ERR "Failed to set balloon watcher\n");
387 return NOTIFY_DONE;
389 }
391 static int balloon_write(struct file *file, const char __user *buffer,
392 unsigned long count, void *data)
393 {
394 char memstring[64], *endchar;
395 unsigned long long target_bytes;
397 if (!capable(CAP_SYS_ADMIN))
398 return -EPERM;
400 if (count <= 1)
401 return -EBADMSG; /* runt */
402 if (count > sizeof(memstring))
403 return -EFBIG; /* too long */
405 if (copy_from_user(memstring, buffer, count))
406 return -EFAULT;
407 memstring[sizeof(memstring)-1] = '\0';
409 target_bytes = memparse(memstring, &endchar);
410 set_new_target(target_bytes >> PAGE_SHIFT);
412 return count;
413 }
415 static int balloon_read(char *page, char **start, off_t off,
416 int count, int *eof, void *data)
417 {
418 int len;
420 len = sprintf(
421 page,
422 "Current allocation: %8lu kB\n"
423 "Requested target: %8lu kB\n"
424 "Low-mem balloon: %8lu kB\n"
425 "High-mem balloon: %8lu kB\n"
426 "Xen hard limit: ",
427 PAGES2KB(current_pages), PAGES2KB(target_pages),
428 PAGES2KB(balloon_low), PAGES2KB(balloon_high));
430 if (hard_limit != ~0UL) {
431 len += sprintf(
432 page + len,
433 "%8lu kB (inc. %8lu kB driver headroom)\n",
434 PAGES2KB(hard_limit), PAGES2KB(driver_pages));
435 } else {
436 len += sprintf(
437 page + len,
438 " ??? kB\n");
439 }
441 *eof = 1;
442 return len;
443 }
445 static struct notifier_block xenstore_notifier;
447 static int __init balloon_init(void)
448 {
449 unsigned long pfn;
450 struct page *page;
452 IPRINTK("Initialising balloon driver.\n");
454 current_pages = min(xen_start_info->nr_pages, max_pfn);
455 target_pages = current_pages;
456 balloon_low = 0;
457 balloon_high = 0;
458 driver_pages = 0UL;
459 hard_limit = ~0UL;
461 init_timer(&balloon_timer);
462 balloon_timer.data = 0;
463 balloon_timer.function = balloon_alarm;
465 if ((balloon_pde = create_xen_proc_entry("balloon", 0644)) == NULL) {
466 WPRINTK("Unable to create /proc/xen/balloon.\n");
467 return -1;
468 }
470 balloon_pde->read_proc = balloon_read;
471 balloon_pde->write_proc = balloon_write;
473 /* Initialise the balloon with excess memory space. */
474 for (pfn = xen_start_info->nr_pages; pfn < max_pfn; pfn++) {
475 page = &mem_map[pfn];
476 if (!PageReserved(page))
477 balloon_append(page);
478 }
480 target_watch.callback = watch_target;
481 xenstore_notifier.notifier_call = balloon_init_watcher;
483 register_xenstore_notifier(&xenstore_notifier);
485 return 0;
486 }
488 subsys_initcall(balloon_init);
490 void balloon_update_driver_allowance(long delta)
491 {
492 unsigned long flags;
494 balloon_lock(flags);
495 driver_pages += delta;
496 balloon_unlock(flags);
497 }
499 static int dealloc_pte_fn(
500 pte_t *pte, struct page *pte_page, unsigned long addr, void *data)
501 {
502 unsigned long mfn = pte_mfn(*pte);
503 struct xen_memory_reservation reservation = {
504 .extent_start = &mfn,
505 .nr_extents = 1,
506 .extent_order = 0,
507 .domid = DOMID_SELF
508 };
509 set_pte(pte, __pte_ma(0));
510 phys_to_machine_mapping[__pa(addr) >> PAGE_SHIFT] =
511 INVALID_P2M_ENTRY;
512 BUG_ON(HYPERVISOR_memory_op(
513 XENMEM_decrease_reservation, &reservation) != 1);
514 return 0;
515 }
517 struct page *balloon_alloc_empty_page_range(unsigned long nr_pages)
518 {
519 unsigned long vstart, flags;
520 unsigned int order = get_order(nr_pages * PAGE_SIZE);
522 vstart = __get_free_pages(GFP_KERNEL, order);
523 if (vstart == 0)
524 return NULL;
526 scrub_pages(vstart, 1 << order);
528 BUG_ON(generic_page_range(
529 &init_mm, vstart, PAGE_SIZE << order, dealloc_pte_fn, NULL));
531 balloon_lock(flags);
532 current_pages -= 1UL << order;
533 balloon_unlock(flags);
535 schedule_work(&balloon_worker);
537 flush_tlb_all();
539 return virt_to_page(vstart);
540 }
542 void balloon_dealloc_empty_page_range(
543 struct page *page, unsigned long nr_pages)
544 {
545 unsigned long i, flags;
546 unsigned int order = get_order(nr_pages * PAGE_SIZE);
548 balloon_lock(flags);
549 for (i = 0; i < (1UL << order); i++)
550 balloon_append(page + i);
551 balloon_unlock(flags);
553 schedule_work(&balloon_worker);
554 }
556 EXPORT_SYMBOL(balloon_update_driver_allowance);
557 EXPORT_SYMBOL(balloon_alloc_empty_page_range);
558 EXPORT_SYMBOL(balloon_dealloc_empty_page_range);
560 /*
561 * Local variables:
562 * c-file-style: "linux"
563 * indent-tabs-mode: t
564 * c-indent-level: 8
565 * c-basic-offset: 8
566 * tab-width: 8
567 * End:
568 */