ia64/xen-unstable

view linux-2.6-xen-sparse/drivers/xen/balloon/balloon.c @ 6552:a9873d384da4

Merge.
author adsharma@los-vmm.sc.intel.com
date Thu Aug 25 12:24:48 2005 -0700 (2005-08-25)
parents 112d44270733 fa0754a9f64f
children dfaf788ab18c
line source
1 /******************************************************************************
2 * balloon.c
3 *
4 * Xen balloon driver - enables returning/claiming memory to/from Xen.
5 *
6 * Copyright (c) 2003, B Dragovic
7 * Copyright (c) 2003-2004, M Williamson, K Fraser
8 * Copyright (c) 2005 Dan M. Smith, IBM Corporation
9 *
10 * This file may be distributed separately from the Linux kernel, or
11 * incorporated into other software packages, subject to the following license:
12 *
13 * Permission is hereby granted, free of charge, to any person obtaining a copy
14 * of this source file (the "Software"), to deal in the Software without
15 * restriction, including without limitation the rights to use, copy, modify,
16 * merge, publish, distribute, sublicense, and/or sell copies of the Software,
17 * and to permit persons to whom the Software is furnished to do so, subject to
18 * the following conditions:
19 *
20 * The above copyright notice and this permission notice shall be included in
21 * all copies or substantial portions of the Software.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
24 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
25 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
26 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
27 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
28 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
29 * IN THE SOFTWARE.
30 */
32 #include <linux/config.h>
33 #include <linux/kernel.h>
34 #include <linux/module.h>
35 #include <linux/sched.h>
36 #include <linux/errno.h>
37 #include <linux/mm.h>
38 #include <linux/mman.h>
39 #include <linux/smp_lock.h>
40 #include <linux/pagemap.h>
41 #include <linux/bootmem.h>
42 #include <linux/highmem.h>
43 #include <linux/vmalloc.h>
44 #include <asm-xen/xen_proc.h>
45 #include <asm-xen/hypervisor.h>
46 #include <asm-xen/balloon.h>
47 #include <asm/pgalloc.h>
48 #include <asm/pgtable.h>
49 #include <asm/uaccess.h>
50 #include <asm/tlb.h>
51 #include <linux/list.h>
53 #include<asm-xen/xenbus.h>
55 #define PAGES2KB(_p) ((_p)<<(PAGE_SHIFT-10))
57 static struct proc_dir_entry *balloon_pde;
59 static DECLARE_MUTEX(balloon_mutex);
60 spinlock_t balloon_lock = SPIN_LOCK_UNLOCKED;
62 /* We aim for 'current allocation' == 'target allocation'. */
63 static unsigned long current_pages;
64 static unsigned long target_pages;
66 /* We may hit the hard limit in Xen. If we do then we remember it. */
67 static unsigned long hard_limit;
69 /*
70 * Drivers may alter the memory reservation independently, but they must
71 * inform the balloon driver so that we can avoid hitting the hard limit.
72 */
73 static unsigned long driver_pages;
75 /* List of ballooned pages, threaded through the mem_map array. */
76 static LIST_HEAD(ballooned_pages);
77 static unsigned long balloon_low, balloon_high;
79 /* Main work function, always executed in process context. */
80 static void balloon_process(void *unused);
81 static DECLARE_WORK(balloon_worker, balloon_process, NULL);
82 static struct timer_list balloon_timer;
84 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
85 /* Use the private and mapping fields of struct page as a list. */
86 #define PAGE_TO_LIST(p) ((struct list_head *)&p->private)
87 #define LIST_TO_PAGE(l) \
88 (list_entry(((unsigned long *)l), struct page, private))
89 #define UNLIST_PAGE(p) \
90 do { \
91 list_del(PAGE_TO_LIST(p)); \
92 p->mapping = NULL; \
93 p->private = 0; \
94 } while(0)
95 #else
96 /* There's a dedicated list field in struct page we can use. */
97 #define PAGE_TO_LIST(p) ( &p->list )
98 #define LIST_TO_PAGE(l) ( list_entry(l, struct page, list) )
99 #define UNLIST_PAGE(p) ( list_del(&p->list) )
100 #define pte_offset_kernel pte_offset
101 #define pud_t pgd_t
102 #define pud_offset(d, va) d
103 #define pud_none(d) 0
104 #define pud_bad(d) 0
105 #define subsys_initcall(_fn) __initcall(_fn)
106 #define pfn_to_page(_pfn) (mem_map + (_pfn))
107 #endif
109 #define IPRINTK(fmt, args...) \
110 printk(KERN_INFO "xen_mem: " fmt, ##args)
111 #define WPRINTK(fmt, args...) \
112 printk(KERN_WARNING "xen_mem: " fmt, ##args)
114 /* balloon_append: add the given page to the balloon. */
115 static void balloon_append(struct page *page)
116 {
117 /* Lowmem is re-populated first, so highmem pages go at list tail. */
118 if (PageHighMem(page)) {
119 list_add_tail(PAGE_TO_LIST(page), &ballooned_pages);
120 balloon_high++;
121 } else {
122 list_add(PAGE_TO_LIST(page), &ballooned_pages);
123 balloon_low++;
124 }
125 }
127 /* balloon_retrieve: rescue a page from the balloon, if it is not empty. */
128 static struct page *balloon_retrieve(void)
129 {
130 struct page *page;
132 if (list_empty(&ballooned_pages))
133 return NULL;
135 page = LIST_TO_PAGE(ballooned_pages.next);
136 UNLIST_PAGE(page);
138 if (PageHighMem(page))
139 balloon_high--;
140 else
141 balloon_low--;
143 return page;
144 }
146 static void balloon_alarm(unsigned long unused)
147 {
148 schedule_work(&balloon_worker);
149 }
151 static unsigned long current_target(void)
152 {
153 unsigned long target = min(target_pages, hard_limit);
154 if (target > (current_pages + balloon_low + balloon_high))
155 target = current_pages + balloon_low + balloon_high;
156 return target;
157 }
159 /*
160 * We avoid multiple worker processes conflicting via the balloon mutex.
161 * We may of course race updates of the target counts (which are protected
162 * by the balloon lock), or with changes to the Xen hard limit, but we will
163 * recover from these in time.
164 */
165 static void balloon_process(void *unused)
166 {
167 unsigned long *mfn_list, pfn, i, flags;
168 struct page *page;
169 long credit, debt, rc;
170 void *v;
172 down(&balloon_mutex);
174 retry:
175 mfn_list = NULL;
177 if ((credit = current_target() - current_pages) > 0) {
178 mfn_list = vmalloc(credit * sizeof(*mfn_list));
179 if (mfn_list == NULL)
180 goto out;
182 balloon_lock(flags);
183 rc = HYPERVISOR_dom_mem_op(
184 MEMOP_increase_reservation, mfn_list, credit, 0);
185 balloon_unlock(flags);
186 if (rc < credit) {
187 /* We hit the Xen hard limit: reprobe. */
188 BUG_ON(HYPERVISOR_dom_mem_op(
189 MEMOP_decrease_reservation,
190 mfn_list, rc, 0) != rc);
191 hard_limit = current_pages + rc - driver_pages;
192 vfree(mfn_list);
193 goto retry;
194 }
196 for (i = 0; i < credit; i++) {
197 page = balloon_retrieve();
198 BUG_ON(page == NULL);
200 pfn = page - mem_map;
201 if (phys_to_machine_mapping[pfn] != INVALID_P2M_ENTRY)
202 BUG();
204 /* Update P->M and M->P tables. */
205 phys_to_machine_mapping[pfn] = mfn_list[i];
206 xen_machphys_update(mfn_list[i], pfn);
208 /* Link back into the page tables if not highmem. */
209 if (pfn < max_low_pfn)
210 BUG_ON(HYPERVISOR_update_va_mapping(
211 (unsigned long)__va(pfn << PAGE_SHIFT),
212 pfn_pte_ma(mfn_list[i], PAGE_KERNEL),
213 0));
215 /* Relinquish the page back to the allocator. */
216 ClearPageReserved(page);
217 set_page_count(page, 1);
218 __free_page(page);
219 }
221 current_pages += credit;
222 } else if (credit < 0) {
223 debt = -credit;
225 mfn_list = vmalloc(debt * sizeof(*mfn_list));
226 if (mfn_list == NULL)
227 goto out;
229 for (i = 0; i < debt; i++) {
230 if ((page = alloc_page(GFP_HIGHUSER)) == NULL) {
231 debt = i;
232 break;
233 }
235 pfn = page - mem_map;
236 mfn_list[i] = phys_to_machine_mapping[pfn];
238 if (!PageHighMem(page)) {
239 v = phys_to_virt(pfn << PAGE_SHIFT);
240 scrub_pages(v, 1);
241 BUG_ON(HYPERVISOR_update_va_mapping(
242 (unsigned long)v, __pte_ma(0), 0));
243 }
244 #ifdef CONFIG_XEN_SCRUB_PAGES
245 else {
246 v = kmap(page);
247 scrub_pages(v, 1);
248 kunmap(page);
249 }
250 #endif
251 }
253 /* Ensure that ballooned highmem pages don't have kmaps. */
254 kmap_flush_unused();
255 flush_tlb_all();
257 /* No more mappings: invalidate P2M and add to balloon. */
258 for (i = 0; i < debt; i++) {
259 pfn = mfn_to_pfn(mfn_list[i]);
260 phys_to_machine_mapping[pfn] = INVALID_P2M_ENTRY;
261 balloon_append(pfn_to_page(pfn));
262 }
264 BUG_ON(HYPERVISOR_dom_mem_op(
265 MEMOP_decrease_reservation,mfn_list, debt, 0) != debt);
267 current_pages -= debt;
268 }
270 out:
271 if (mfn_list != NULL)
272 vfree(mfn_list);
274 /* Schedule more work if there is some still to be done. */
275 if (current_target() != current_pages)
276 mod_timer(&balloon_timer, jiffies + HZ);
278 up(&balloon_mutex);
279 }
281 /* Resets the Xen limit, sets new target, and kicks off processing. */
282 static void set_new_target(unsigned long target)
283 {
284 /* No need for lock. Not read-modify-write updates. */
285 hard_limit = ~0UL;
286 target_pages = target;
287 schedule_work(&balloon_worker);
288 }
290 static struct xenbus_watch target_watch =
291 {
292 .node = "memory/target"
293 };
295 /* React to a change in the target key */
296 static void watch_target(struct xenbus_watch *watch, const char *node)
297 {
298 unsigned long new_target;
299 int err;
301 err = xenbus_scanf("memory", "target", "%lu", &new_target);
302 if (err != 1) {
303 printk(KERN_ERR "Unable to read memory/target\n");
304 return;
305 }
307 set_new_target(new_target >> PAGE_SHIFT);
309 }
311 /* Setup our watcher
312 NB: Assumes xenbus_lock is held!
313 */
314 int balloon_init_watcher(struct notifier_block *notifier,
315 unsigned long event,
316 void *data)
317 {
318 int err;
320 BUG_ON(down_trylock(&xenbus_lock) == 0);
322 err = register_xenbus_watch(&target_watch);
323 if (err)
324 printk(KERN_ERR "Failed to set balloon watcher\n");
326 return NOTIFY_DONE;
328 }
330 static int balloon_write(struct file *file, const char __user *buffer,
331 unsigned long count, void *data)
332 {
333 char memstring[64], *endchar;
334 unsigned long long target_bytes;
336 if (!capable(CAP_SYS_ADMIN))
337 return -EPERM;
339 if (count <= 1)
340 return -EBADMSG; /* runt */
341 if (count > sizeof(memstring))
342 return -EFBIG; /* too long */
344 if (copy_from_user(memstring, buffer, count))
345 return -EFAULT;
346 memstring[sizeof(memstring)-1] = '\0';
348 target_bytes = memparse(memstring, &endchar);
349 set_new_target(target_bytes >> PAGE_SHIFT);
351 return count;
352 }
354 static int balloon_read(char *page, char **start, off_t off,
355 int count, int *eof, void *data)
356 {
357 int len;
359 len = sprintf(
360 page,
361 "Current allocation: %8lu kB\n"
362 "Requested target: %8lu kB\n"
363 "Low-mem balloon: %8lu kB\n"
364 "High-mem balloon: %8lu kB\n"
365 "Xen hard limit: ",
366 PAGES2KB(current_pages), PAGES2KB(target_pages),
367 PAGES2KB(balloon_low), PAGES2KB(balloon_high));
369 if (hard_limit != ~0UL) {
370 len += sprintf(
371 page + len,
372 "%8lu kB (inc. %8lu kB driver headroom)\n",
373 PAGES2KB(hard_limit), PAGES2KB(driver_pages));
374 } else {
375 len += sprintf(
376 page + len,
377 " ??? kB\n");
378 }
380 *eof = 1;
381 return len;
382 }
384 static struct notifier_block xenstore_notifier;
386 static int __init balloon_init(void)
387 {
388 unsigned long pfn;
389 struct page *page;
391 IPRINTK("Initialising balloon driver.\n");
393 current_pages = min(xen_start_info.nr_pages, max_pfn);
394 target_pages = current_pages;
395 balloon_low = 0;
396 balloon_high = 0;
397 driver_pages = 0UL;
398 hard_limit = ~0UL;
400 init_timer(&balloon_timer);
401 balloon_timer.data = 0;
402 balloon_timer.function = balloon_alarm;
404 if ((balloon_pde = create_xen_proc_entry("balloon", 0644)) == NULL) {
405 WPRINTK("Unable to create /proc/xen/balloon.\n");
406 return -1;
407 }
409 balloon_pde->read_proc = balloon_read;
410 balloon_pde->write_proc = balloon_write;
412 /* Initialise the balloon with excess memory space. */
413 for (pfn = xen_start_info.nr_pages; pfn < max_pfn; pfn++) {
414 page = &mem_map[pfn];
415 if (!PageReserved(page))
416 balloon_append(page);
417 }
419 target_watch.callback = watch_target;
420 xenstore_notifier.notifier_call = balloon_init_watcher;
422 register_xenstore_notifier(&xenstore_notifier);
424 return 0;
425 }
427 subsys_initcall(balloon_init);
429 void balloon_update_driver_allowance(long delta)
430 {
431 unsigned long flags;
432 balloon_lock(flags);
433 driver_pages += delta; /* non-atomic update */
434 balloon_unlock(flags);
435 }
437 static int dealloc_pte_fn(
438 pte_t *pte, struct page *pte_page, unsigned long addr, void *data)
439 {
440 unsigned long mfn = pte_mfn(*pte);
441 set_pte(pte, __pte_ma(0));
442 phys_to_machine_mapping[__pa(addr) >> PAGE_SHIFT] =
443 INVALID_P2M_ENTRY;
444 BUG_ON(HYPERVISOR_dom_mem_op(
445 MEMOP_decrease_reservation, &mfn, 1, 0) != 1);
446 return 0;
447 }
449 struct page *balloon_alloc_empty_page_range(unsigned long nr_pages)
450 {
451 unsigned long vstart, flags;
452 unsigned int order = get_order(nr_pages * PAGE_SIZE);
454 vstart = __get_free_pages(GFP_KERNEL, order);
455 if (vstart == 0)
456 return NULL;
458 scrub_pages(vstart, 1 << order);
460 balloon_lock(flags);
461 BUG_ON(generic_page_range(
462 &init_mm, vstart, PAGE_SIZE << order, dealloc_pte_fn, NULL));
463 current_pages -= 1UL << order;
464 balloon_unlock(flags);
466 schedule_work(&balloon_worker);
468 flush_tlb_all();
470 return virt_to_page(vstart);
471 }
473 void balloon_dealloc_empty_page_range(
474 struct page *page, unsigned long nr_pages)
475 {
476 unsigned long i, flags;
477 unsigned int order = get_order(nr_pages * PAGE_SIZE);
479 balloon_lock(flags);
480 for (i = 0; i < (1UL << order); i++)
481 balloon_append(page + i);
482 balloon_unlock(flags);
484 schedule_work(&balloon_worker);
485 }
487 EXPORT_SYMBOL(balloon_update_driver_allowance);
488 EXPORT_SYMBOL(balloon_alloc_empty_page_range);
489 EXPORT_SYMBOL(balloon_dealloc_empty_page_range);
491 /*
492 * Local variables:
493 * c-file-style: "linux"
494 * indent-tabs-mode: t
495 * c-indent-level: 8
496 * c-basic-offset: 8
497 * tab-width: 8
498 * End:
499 */