direct-io.hg

view linux-2.6.8.1-xen-sparse/drivers/xen/balloon/balloon.c @ 2693:2584528df9e1

bitkeeper revision 1.1159.123.2 (4177d169N58TtQXn_XJO4xNBKbMQUw)

Merge freefall.cl.cam.ac.uk:/auto/groups/xeno/BK/xeno.bk
into freefall.cl.cam.ac.uk:/local/scratch/kaf24/xeno
author kaf24@freefall.cl.cam.ac.uk
date Thu Oct 21 15:10:33 2004 +0000 (2004-10-21)
parents 081dd58e4d58 fa98ee4c6a0c
children a9128b3b9f45 d0beb68a7ae0
line source
1 /******************************************************************************
2 * balloon.c
3 *
4 * Xen balloon driver - enables returning/claiming memory to/from Xen.
5 *
6 * Copyright (c) 2003, B Dragovic
7 */
9 #include <linux/config.h>
10 #include <linux/module.h>
11 #include <linux/kernel.h>
12 #include <linux/sched.h>
13 #include <linux/errno.h>
14 #include <asm-xen/xen_proc.h>
16 #include <linux/mm.h>
17 #include <linux/mman.h>
18 #include <linux/smp_lock.h>
19 #include <linux/pagemap.h>
20 #include <linux/bootmem.h>
21 #include <linux/highmem.h>
22 #include <linux/vmalloc.h>
24 #include <asm-xen/hypervisor.h>
25 #include <asm-xen/ctrl_if.h>
26 #include <asm/pgalloc.h>
27 #include <asm/pgtable.h>
28 #include <asm/uaccess.h>
29 #include <asm/tlb.h>
31 /* USER DEFINES -- THESE SHOULD BE COPIED TO USER-SPACE TOOLS */
32 #define USER_INFLATE_BALLOON 1 /* return mem to hypervisor */
33 #define USER_DEFLATE_BALLOON 2 /* claim mem from hypervisor */
34 typedef struct user_balloon_op {
35 unsigned int op;
36 unsigned long size;
37 } user_balloon_op_t;
38 /* END OF USER DEFINE */
40 static struct proc_dir_entry *balloon_pde;
42 unsigned long credit;
43 static unsigned long current_pages, most_seen_pages;
45 /*
46 * Dead entry written into balloon-owned entries in the PMT.
47 * It is deliberately different to INVALID_P2M_ENTRY.
48 */
49 #define DEAD 0xdead1234
51 static inline pte_t *get_ptep(unsigned long addr)
52 {
53 pgd_t *pgd; pmd_t *pmd; pte_t *ptep;
54 pgd = pgd_offset_k(addr);
56 if ( pgd_none(*pgd) || pgd_bad(*pgd) ) BUG();
58 pmd = pmd_offset(pgd, addr);
59 if ( pmd_none(*pmd) || pmd_bad(*pmd) ) BUG();
61 ptep = pte_offset_kernel(pmd, addr);
63 return ptep;
64 }
66 /* Main function for relinquishing memory. */
67 static unsigned long inflate_balloon(unsigned long num_pages)
68 {
69 unsigned long *parray;
70 unsigned long *currp;
71 unsigned long curraddr;
72 unsigned long ret = 0;
73 unsigned long i, j;
75 parray = (unsigned long *)vmalloc(num_pages * sizeof(unsigned long));
76 if ( parray == NULL )
77 {
78 printk(KERN_ERR "inflate_balloon: Unable to vmalloc parray\n");
79 return -EFAULT;
80 }
82 currp = parray;
84 for ( i = 0; i < num_pages; i++, currp++ )
85 {
86 struct page *page = alloc_page(GFP_HIGHUSER);
87 unsigned long pfn = page - mem_map;
89 /* If allocation fails then free all reserved pages. */
90 if ( page == NULL )
91 {
92 printk(KERN_ERR "Unable to inflate balloon by %ld, only"
93 " %ld pages free.", num_pages, i);
94 currp = parray;
95 for ( j = 0; j < i; j++, currp++ )
96 __free_page((struct page *) (mem_map + *currp));
97 ret = -EFAULT;
98 goto cleanup;
99 }
101 *currp = pfn;
102 }
105 for ( i = 0, currp = parray; i < num_pages; i++, currp++ )
106 {
107 unsigned long mfn = phys_to_machine_mapping[*currp];
108 curraddr = (unsigned long)page_address(mem_map + *currp);
109 /* Blow away page contents for security, and also p.t. ref if any. */
110 if ( curraddr != 0 )
111 {
112 scrub_pages(curraddr, 1);
113 queue_l1_entry_update(get_ptep(curraddr), 0);
114 }
115 #ifdef CONFIG_XEN_SCRUB_PAGES
116 else
117 {
118 void *p = kmap(&mem_map[*currp]);
119 scrub_pages(p, 1);
120 kunmap(&mem_map[*currp]);
121 }
122 #endif
123 phys_to_machine_mapping[*currp] = DEAD;
124 *currp = mfn;
125 }
127 /* Flush updates through and flush the TLB. */
128 xen_tlb_flush();
130 ret = HYPERVISOR_dom_mem_op(MEMOP_decrease_reservation,
131 parray, num_pages, 0);
132 if ( unlikely(ret != num_pages) )
133 {
134 printk(KERN_ERR "Unable to inflate balloon, error %lx\n", ret);
135 goto cleanup;
136 }
138 credit += num_pages;
139 ret = num_pages;
141 cleanup:
142 vfree(parray);
144 return ret;
145 }
147 /*
148 * Install new mem pages obtained by deflate_balloon. function walks
149 * phys->machine mapping table looking for DEAD entries and populates
150 * them.
151 */
152 static unsigned long process_returned_pages(unsigned long * parray,
153 unsigned long num)
154 {
155 /* currently, this function is rather simplistic as
156 * it is assumed that domain reclaims only number of
157 * pages previously released. this is to change soon
158 * and the code to extend page tables etc. will be
159 * incorporated here.
160 */
162 unsigned long tot_pages = most_seen_pages;
163 unsigned long * curr = parray;
164 unsigned long num_installed;
165 unsigned long i;
167 num_installed = 0;
168 for ( i = 0; (i < tot_pages) && (num_installed < num); i++ )
169 {
170 if ( phys_to_machine_mapping[i] == DEAD )
171 {
172 phys_to_machine_mapping[i] = *curr;
173 queue_machphys_update(*curr, i);
174 if (i<max_low_pfn)
175 queue_l1_entry_update(
176 get_ptep((unsigned long)__va(i << PAGE_SHIFT)),
177 ((*curr) << PAGE_SHIFT) | pgprot_val(PAGE_KERNEL));
179 __free_page(mem_map + i);
181 curr++;
182 num_installed++;
183 }
184 }
186 return num_installed;
187 }
189 unsigned long deflate_balloon(unsigned long num_pages)
190 {
191 unsigned long ret;
192 unsigned long * parray;
194 if ( num_pages > credit )
195 {
196 printk(KERN_ERR "deflate_balloon: %lu pages > %lu credit.\n",
197 num_pages, credit);
198 return -EAGAIN;
199 }
201 parray = (unsigned long *)vmalloc(num_pages * sizeof(unsigned long));
202 if ( parray == NULL )
203 {
204 printk(KERN_ERR "deflate_balloon: Unable to vmalloc parray\n");
205 return 0;
206 }
208 ret = HYPERVISOR_dom_mem_op(MEMOP_increase_reservation,
209 parray, num_pages, 0);
210 if ( unlikely(ret != num_pages) )
211 {
212 printk(KERN_ERR "deflate_balloon: xen increase_reservation err %lx\n",
213 ret);
214 goto cleanup;
215 }
217 if ( (ret = process_returned_pages(parray, num_pages)) < num_pages )
218 {
219 printk(KERN_WARNING
220 "deflate_balloon: restored only %lx of %lx pages.\n",
221 ret, num_pages);
222 goto cleanup;
223 }
225 ret = num_pages;
226 credit -= num_pages;
228 cleanup:
229 vfree(parray);
231 return ret;
232 }
234 #define PAGE_TO_MB_SHIFT 8
236 /*
237 * pagetable_extend() mimics pagetable_init() from arch/xen/mm/init.c
238 * The loops do go through all of low memory (ZONE_NORMAL). The
239 * old pages have _PAGE_PRESENT set and so get skipped.
240 * If low memory is not full, the new pages are used to fill it, going
241 * from cur_low_pfn to low_pfn. high memory is not direct mapped so
242 * no extension is needed for new high memory.
243 */
245 static void pagetable_extend (int cur_low_pfn, int newpages)
246 {
247 unsigned long vaddr, end;
248 pgd_t *kpgd, *pgd, *pgd_base;
249 int i, j, k;
250 pmd_t *kpmd, *pmd;
251 pte_t *kpte, *pte, *pte_base;
252 int low_pfn = min(cur_low_pfn+newpages,(int)max_low_pfn);
254 /*
255 * This can be zero as well - no problem, in that case we exit
256 * the loops anyway due to the PTRS_PER_* conditions.
257 */
258 end = (unsigned long)__va(low_pfn*PAGE_SIZE);
260 pgd_base = init_mm.pgd;
261 i = pgd_index(PAGE_OFFSET);
262 pgd = pgd_base + i;
264 for (; i < PTRS_PER_PGD; pgd++, i++) {
265 vaddr = i*PGDIR_SIZE;
266 if (end && (vaddr >= end))
267 break;
268 pmd = (pmd_t *)pgd;
269 for (j = 0; j < PTRS_PER_PMD; pmd++, j++) {
270 vaddr = i*PGDIR_SIZE + j*PMD_SIZE;
271 if (end && (vaddr >= end))
272 break;
274 /* Filled in for us already? */
275 if ( pmd_val(*pmd) & _PAGE_PRESENT )
276 continue;
278 pte_base = pte = (pte_t *) __get_free_page(GFP_KERNEL);
280 for (k = 0; k < PTRS_PER_PTE; pte++, k++) {
281 vaddr = i*PGDIR_SIZE + j*PMD_SIZE + k*PAGE_SIZE;
282 if (end && (vaddr >= end))
283 break;
284 *pte = mk_pte(virt_to_page(vaddr), PAGE_KERNEL);
285 }
286 kpgd = pgd_offset_k((unsigned long)pte_base);
287 kpmd = pmd_offset(kpgd, (unsigned long)pte_base);
288 kpte = pte_offset_kernel(kpmd, (unsigned long)pte_base);
289 queue_l1_entry_update(kpte,
290 (*(unsigned long *)kpte)&~_PAGE_RW);
291 set_pmd(pmd, __pmd(_KERNPG_TABLE + __pa(pte_base)));
292 XEN_flush_page_update_queue();
293 }
294 }
295 }
297 /*
298 * claim_new_pages() asks xen to increase this domain's memory reservation
299 * and return a list of the new pages of memory. This new pages are
300 * added to the free list of the memory manager.
301 *
302 * Available RAM does not normally change while Linux runs. To make this work,
303 * the linux mem= boottime command line param must say how big memory could
304 * possibly grow. Then setup_arch() in arch/xen/kernel/setup.c
305 * sets max_pfn, max_low_pfn and the zones according to
306 * this max memory size. The page tables themselves can only be
307 * extended after xen has assigned new pages to this domain.
308 */
310 static unsigned long
311 claim_new_pages(unsigned long num_pages)
312 {
313 unsigned long new_page_cnt, pfn;
314 unsigned long * parray, *curr;
316 if (most_seen_pages+num_pages> max_pfn)
317 num_pages = max_pfn-most_seen_pages;
318 if (num_pages==0) return -EINVAL;
320 parray = (unsigned long *)vmalloc(num_pages * sizeof(unsigned long));
321 if ( parray == NULL )
322 {
323 printk(KERN_ERR "claim_new_pages: Unable to vmalloc parray\n");
324 return 0;
325 }
327 new_page_cnt = HYPERVISOR_dom_mem_op(MEMOP_increase_reservation,
328 parray, num_pages, 0);
329 if ( new_page_cnt != num_pages )
330 {
331 printk(KERN_WARNING
332 "claim_new_pages: xen granted only %lu of %lu requested pages\n",
333 new_page_cnt, num_pages);
335 /*
336 * Avoid xen lockup when user forgot to setdomainmaxmem. Xen
337 * usually can dribble out a few pages and then hangs.
338 */
339 if ( new_page_cnt < 1000 )
340 {
341 printk(KERN_WARNING "Remember to use setdomainmaxmem\n");
342 HYPERVISOR_dom_mem_op(MEMOP_decrease_reservation,
343 parray, new_page_cnt, 0);
344 return -EFAULT;
345 }
346 }
347 memcpy(phys_to_machine_mapping+most_seen_pages, parray,
348 new_page_cnt * sizeof(unsigned long));
350 pagetable_extend(most_seen_pages,new_page_cnt);
352 for ( pfn = most_seen_pages, curr = parray;
353 pfn < most_seen_pages+new_page_cnt;
354 pfn++, curr++ )
355 {
356 struct page *page = mem_map + pfn;
358 #ifndef CONFIG_HIGHMEM
359 if ( pfn>=max_low_pfn )
360 {
361 printk(KERN_WARNING "Warning only %ldMB will be used.\n",
362 pfn>>PAGE_TO_MB_SHIFT);
363 printk(KERN_WARNING "Use a HIGHMEM enabled kernel.\n");
364 break;
365 }
366 #endif
367 queue_machphys_update(*curr, pfn);
368 if ( pfn < max_low_pfn )
369 queue_l1_entry_update(
370 get_ptep((unsigned long)__va(pfn << PAGE_SHIFT)),
371 ((*curr) << PAGE_SHIFT) | pgprot_val(PAGE_KERNEL));
373 XEN_flush_page_update_queue();
375 /* this next bit mimics arch/xen/mm/init.c:one_highpage_init() */
376 ClearPageReserved(page);
377 if ( pfn >= max_low_pfn )
378 set_bit(PG_highmem, &page->flags);
379 set_page_count(page, 1);
380 __free_page(page);
381 }
383 vfree(parray);
385 return new_page_cnt;
386 }
389 static int balloon_try_target(int target)
390 {
391 int change, reclaim;
393 if ( target < current_pages )
394 {
395 int change = inflate_balloon(current_pages-target);
396 if ( change <= 0 )
397 return change;
399 current_pages -= change;
400 printk(KERN_INFO "Relinquish %dMB to xen. Domain now has %luMB\n",
401 change>>PAGE_TO_MB_SHIFT, current_pages>>PAGE_TO_MB_SHIFT);
402 }
403 else if ( target > current_pages )
404 {
405 reclaim = min((unsigned long)target,most_seen_pages) - current_pages;
407 if ( reclaim )
408 {
409 change = deflate_balloon( reclaim );
410 if ( change <= 0 )
411 return change;
412 current_pages += change;
413 printk(KERN_INFO "Reclaim %dMB from xen. Domain now has %luMB\n",
414 change>>PAGE_TO_MB_SHIFT, current_pages>>PAGE_TO_MB_SHIFT);
415 }
417 if ( most_seen_pages < target )
418 {
419 int growth = claim_new_pages(target-most_seen_pages);
420 if ( growth <= 0 )
421 return growth;
422 most_seen_pages += growth;
423 current_pages += growth;
424 printk(KERN_INFO "Granted %dMB new mem. Dom now has %luMB\n",
425 growth>>PAGE_TO_MB_SHIFT, current_pages>>PAGE_TO_MB_SHIFT);
426 }
427 }
429 return 1;
430 }
433 static void balloon_ctrlif_rx(ctrl_msg_t *msg, unsigned long id)
434 {
435 switch ( msg->subtype )
436 {
437 case CMSG_MEM_REQUEST_SET:
438 if ( msg->length != sizeof(mem_request_t) )
439 goto parse_error;
440 {
441 mem_request_t *req = (mem_request_t *)&msg->msg[0];
442 req->status = balloon_try_target(req->target);
443 }
444 break;
445 default:
446 goto parse_error;
447 }
449 ctrl_if_send_response(msg);
450 return;
452 parse_error:
453 msg->length = 0;
454 ctrl_if_send_response(msg);
455 }
458 static int balloon_write(struct file *file, const char *buffer,
459 size_t count, loff_t *offp)
460 {
461 char memstring[64], *endchar;
462 int len, i;
463 unsigned long target;
464 unsigned long long targetbytes;
466 /* Only admin can play with the balloon :) */
467 if ( !capable(CAP_SYS_ADMIN) )
468 return -EPERM;
470 if ( count > sizeof(memstring) )
471 return -EFBIG;
473 len = strnlen_user(buffer, count);
474 if ( len == 0 ) return -EBADMSG;
475 if ( len == 1 ) return 1; /* input starts with a NUL char */
476 if ( strncpy_from_user(memstring, buffer, len) < 0 )
477 return -EFAULT;
479 endchar = memstring;
480 for ( i = 0; i < len; ++i, ++endchar )
481 if ( (memstring[i] < '0') || (memstring[i] > '9') )
482 break;
483 if ( i == 0 )
484 return -EBADMSG;
486 targetbytes = memparse(memstring,&endchar);
487 target = targetbytes >> PAGE_SHIFT;
489 i = balloon_try_target(target);
491 if ( i <= 0 ) return i;
493 *offp += len;
494 return len;
495 }
498 static int balloon_read(struct file *filp, char *buffer,
499 size_t count, loff_t *offp)
500 {
501 static char priv_buf[32];
502 char *priv_bufp = priv_buf;
503 int len;
504 len = sprintf(priv_buf,"%lu\n",current_pages<<PAGE_SHIFT);
506 len -= *offp;
507 priv_bufp += *offp;
508 if (len>count) len = count;
509 if (len<0) len = 0;
511 copy_to_user(buffer, priv_bufp, len);
513 *offp += len;
514 return len;
515 }
517 static struct file_operations balloon_fops = {
518 .read = balloon_read,
519 .write = balloon_write
520 };
522 static int __init balloon_init(void)
523 {
524 printk(KERN_ALERT "Starting Xen Balloon driver\n");
526 most_seen_pages = current_pages = min(xen_start_info.nr_pages,max_pfn);
527 if ( (balloon_pde = create_xen_proc_entry("memory_target", 0644)) == NULL )
528 {
529 printk(KERN_ALERT "Unable to create balloon driver proc entry!");
530 return -1;
531 }
533 balloon_pde->owner = THIS_MODULE;
534 balloon_pde->nlink = 1;
535 balloon_pde->proc_fops = &balloon_fops;
537 (void)ctrl_if_register_receiver(CMSG_MEM_REQUEST, balloon_ctrlif_rx,
538 CALLBACK_IN_BLOCKING_CONTEXT);
540 /*
541 * make_module a new phys map if mem= says xen can give us memory to grow
542 */
543 if ( max_pfn > xen_start_info.nr_pages )
544 {
545 extern unsigned long *phys_to_machine_mapping;
546 unsigned long *newmap;
547 newmap = (unsigned long *)vmalloc(max_pfn * sizeof(unsigned long));
548 memset(newmap, ~0, max_pfn * sizeof(unsigned long));
549 memcpy(newmap, phys_to_machine_mapping,
550 xen_start_info.nr_pages * sizeof(unsigned long));
551 phys_to_machine_mapping = newmap;
552 }
554 return 0;
555 }
557 static void __exit balloon_cleanup(void)
558 {
559 if ( balloon_pde != NULL )
560 {
561 remove_xen_proc_entry("memory_target");
562 balloon_pde = NULL;
563 }
564 }
566 module_init(balloon_init);
567 module_exit(balloon_cleanup);