ia64/xen-unstable

view linux-2.4.27-xen-sparse/arch/xen/drivers/balloon/balloon.c @ 3726:a00d7a994a59

bitkeeper revision 1.1159.212.131 (4208dea93kRwptTEZzh2Dm2aH9dAKA)

merge
author iap10@freefall.cl.cam.ac.uk
date Tue Feb 08 15:45:45 2005 +0000 (2005-02-08)
parents ff4e7a241335
children
line source
1 /******************************************************************************
2 * balloon.c
3 *
4 * Xen balloon driver - enables returning/claiming memory to/from Xen.
5 *
6 * Copyright (c) 2003, B Dragovic
7 */
9 #include <linux/config.h>
10 #include <linux/module.h>
11 #include <linux/kernel.h>
12 #include <linux/sched.h>
13 #include <linux/errno.h>
14 #include <asm/xen_proc.h>
16 #include <linux/mm.h>
17 #include <linux/mman.h>
18 #include <linux/smp_lock.h>
19 #include <linux/pagemap.h>
20 #include <linux/bootmem.h>
21 #include <linux/highmem.h>
22 #include <linux/vmalloc.h>
24 #include <asm/hypervisor.h>
25 #include <asm/pgalloc.h>
26 #include <asm/pgtable.h>
27 #include <asm/uaccess.h>
28 #include <asm/tlb.h>
30 /* USER DEFINES -- THESE SHOULD BE COPIED TO USER-SPACE TOOLS */
31 #define USER_INFLATE_BALLOON 1 /* return mem to hypervisor */
32 #define USER_DEFLATE_BALLOON 2 /* claim mem from hypervisor */
33 typedef struct user_balloon_op {
34 unsigned int op;
35 unsigned long size;
36 } user_balloon_op_t;
37 /* END OF USER DEFINE */
39 static struct proc_dir_entry *balloon_pde;
40 unsigned long credit;
41 static unsigned long current_pages, most_seen_pages;
43 /*
44 * Dead entry written into balloon-owned entries in the PMT.
45 * It is deliberately different to INVALID_P2M_ENTRY.
46 */
47 #define DEAD 0xdead1234
49 static inline pte_t *get_ptep(unsigned long addr)
50 {
51 pgd_t *pgd; pmd_t *pmd; pte_t *ptep;
52 pgd = pgd_offset_k(addr);
54 if ( pgd_none(*pgd) || pgd_bad(*pgd) ) BUG();
56 pmd = pmd_offset(pgd, addr);
57 if ( pmd_none(*pmd) || pmd_bad(*pmd) ) BUG();
59 ptep = pte_offset(pmd, addr);
61 return ptep;
62 }
64 /* Main function for relinquishing memory. */
65 static unsigned long inflate_balloon(unsigned long num_pages)
66 {
67 unsigned long *parray;
68 unsigned long *currp;
69 unsigned long curraddr;
70 unsigned long ret = 0;
71 unsigned long i, j;
73 parray = (unsigned long *)vmalloc(num_pages * sizeof(unsigned long));
74 if ( parray == NULL )
75 {
76 printk(KERN_ERR "inflate_balloon: Unable to vmalloc parray\n");
77 return -EFAULT;
78 }
80 currp = parray;
82 for ( i = 0; i < num_pages; i++, currp++ )
83 {
84 struct page *page = alloc_page(GFP_HIGHUSER);
85 unsigned long pfn = page - mem_map;
87 /* If allocation fails then free all reserved pages. */
88 if ( page == NULL )
89 {
90 printk(KERN_ERR "Unable to inflate balloon by %ld, only"
91 " %ld pages free.", num_pages, i);
92 currp = parray;
93 for ( j = 0; j < i; j++, currp++ )
94 __free_page((struct page *) (mem_map + *currp));
95 ret = -EFAULT;
96 goto cleanup;
97 }
99 *currp = pfn;
100 }
103 for ( i = 0, currp = parray; i < num_pages; i++, currp++ )
104 {
105 unsigned long mfn = phys_to_machine_mapping[*currp];
106 curraddr = (unsigned long)page_address(mem_map + *currp);
107 /* Blow away page contents for security, and also p.t. ref if any. */
108 if ( curraddr != 0 )
109 {
110 scrub_pages(curraddr, 1);
111 queue_l1_entry_update(get_ptep(curraddr), 0);
112 }
113 #ifdef CONFIG_XEN_SCRUB_PAGES
114 else
115 {
116 void *p = kmap(&mem_map[*currp]);
117 scrub_pages(p, 1);
118 kunmap(&mem_map[*currp]);
119 }
120 #endif
121 phys_to_machine_mapping[*currp] = DEAD;
122 *currp = mfn;
123 }
125 /* Flush updates through and flush the TLB. */
126 xen_tlb_flush();
128 ret = HYPERVISOR_dom_mem_op(MEMOP_decrease_reservation,
129 parray, num_pages, 0);
130 if ( unlikely(ret != num_pages) )
131 {
132 printk(KERN_ERR "Unable to inflate balloon, error %lx\n", ret);
133 goto cleanup;
134 }
136 credit += num_pages;
137 ret = num_pages;
139 cleanup:
140 vfree(parray);
142 return ret;
143 }
145 /*
146 * Install new mem pages obtained by deflate_balloon. function walks
147 * phys->machine mapping table looking for DEAD entries and populates
148 * them.
149 */
150 static unsigned long process_returned_pages(unsigned long * parray,
151 unsigned long num)
152 {
153 /* currently, this function is rather simplistic as
154 * it is assumed that domain reclaims only number of
155 * pages previously released. this is to change soon
156 * and the code to extend page tables etc. will be
157 * incorporated here.
158 */
160 unsigned long tot_pages = most_seen_pages;
161 unsigned long * curr = parray;
162 unsigned long num_installed;
163 unsigned long i;
165 num_installed = 0;
166 for ( i = 0; (i < tot_pages) && (num_installed < num); i++ )
167 {
168 if ( phys_to_machine_mapping[i] == DEAD )
169 {
170 phys_to_machine_mapping[i] = *curr;
171 queue_machphys_update(*curr, i);
172 if (i<max_low_pfn)
173 queue_l1_entry_update(
174 get_ptep((unsigned long)__va(i << PAGE_SHIFT)),
175 ((*curr) << PAGE_SHIFT) | pgprot_val(PAGE_KERNEL));
177 __free_page(mem_map + i);
179 curr++;
180 num_installed++;
181 }
182 }
184 return num_installed;
185 }
187 unsigned long deflate_balloon(unsigned long num_pages)
188 {
189 unsigned long ret;
190 unsigned long * parray;
192 if ( num_pages > credit )
193 {
194 printk(KERN_ERR "deflate_balloon: %lu pages > %lu credit.\n",
195 num_pages, credit);
196 return -EAGAIN;
197 }
199 parray = (unsigned long *)vmalloc(num_pages * sizeof(unsigned long));
200 if ( parray == NULL )
201 {
202 printk(KERN_ERR "deflate_balloon: Unable to vmalloc parray\n");
203 return 0;
204 }
206 ret = HYPERVISOR_dom_mem_op(MEMOP_increase_reservation,
207 parray, num_pages, 0);
208 if ( unlikely(ret != num_pages) )
209 {
210 printk(KERN_ERR "deflate_balloon: xen increase_reservation err %lx\n",
211 ret);
212 goto cleanup;
213 }
215 if ( (ret = process_returned_pages(parray, num_pages)) < num_pages )
216 {
217 printk(KERN_WARNING
218 "deflate_balloon: restored only %lx of %lx pages.\n",
219 ret, num_pages);
220 goto cleanup;
221 }
223 ret = num_pages;
224 credit -= num_pages;
226 cleanup:
227 vfree(parray);
229 return ret;
230 }
232 #define PAGE_TO_MB_SHIFT 8
234 /*
235 * pagetable_extend() mimics pagetable_init() from arch/xen/mm/init.c
236 * The loops do go through all of low memory (ZONE_NORMAL). The
237 * old pages have _PAGE_PRESENT set and so get skipped.
238 * If low memory is not full, the new pages are used to fill it, going
239 * from cur_low_pfn to low_pfn. high memory is not direct mapped so
240 * no extension is needed for new high memory.
241 */
243 static void pagetable_extend (int cur_low_pfn, int newpages)
244 {
245 unsigned long vaddr, end;
246 pgd_t *kpgd, *pgd, *pgd_base;
247 int i, j, k;
248 pmd_t *kpmd, *pmd;
249 pte_t *kpte, *pte, *pte_base;
250 int low_pfn = min(cur_low_pfn+newpages,(int)max_low_pfn);
252 /*
253 * This can be zero as well - no problem, in that case we exit
254 * the loops anyway due to the PTRS_PER_* conditions.
255 */
256 end = (unsigned long)__va(low_pfn*PAGE_SIZE);
258 pgd_base = init_mm.pgd;
259 i = __pgd_offset(PAGE_OFFSET);
260 pgd = pgd_base + i;
262 for (; i < PTRS_PER_PGD; pgd++, i++) {
263 vaddr = i*PGDIR_SIZE;
264 if (end && (vaddr >= end))
265 break;
266 pmd = (pmd_t *)pgd;
267 for (j = 0; j < PTRS_PER_PMD; pmd++, j++) {
268 vaddr = i*PGDIR_SIZE + j*PMD_SIZE;
269 if (end && (vaddr >= end))
270 break;
272 /* Filled in for us already? */
273 if ( pmd_val(*pmd) & _PAGE_PRESENT )
274 continue;
276 pte_base = pte = (pte_t *) __get_free_page(GFP_KERNEL);
278 for (k = 0; k < PTRS_PER_PTE; pte++, k++) {
279 vaddr = i*PGDIR_SIZE + j*PMD_SIZE + k*PAGE_SIZE;
280 if (end && (vaddr >= end))
281 break;
282 *pte = mk_pte_phys(__pa(vaddr), PAGE_KERNEL);
283 }
284 kpgd = pgd_offset_k((unsigned long)pte_base);
285 kpmd = pmd_offset(kpgd, (unsigned long)pte_base);
286 kpte = pte_offset(kpmd, (unsigned long)pte_base);
287 queue_l1_entry_update(kpte,
288 (*(unsigned long *)kpte)&~_PAGE_RW);
289 set_pmd(pmd, __pmd(_KERNPG_TABLE + __pa(pte_base)));
290 XEN_flush_page_update_queue();
291 }
292 }
293 }
295 /*
296 * claim_new_pages() asks xen to increase this domain's memory reservation
297 * and return a list of the new pages of memory. This new pages are
298 * added to the free list of the memory manager.
299 *
300 * Available RAM does not normally change while Linux runs. To make this work,
301 * the linux mem= boottime command line param must say how big memory could
302 * possibly grow. Then setup_arch() in arch/xen/kernel/setup.c
303 * sets max_pfn, max_low_pfn and the zones according to
304 * this max memory size. The page tables themselves can only be
305 * extended after xen has assigned new pages to this domain.
306 */
308 static unsigned long
309 claim_new_pages(unsigned long num_pages)
310 {
311 unsigned long new_page_cnt, pfn;
312 unsigned long * parray, *curr;
314 if (most_seen_pages+num_pages> max_pfn)
315 num_pages = max_pfn-most_seen_pages;
316 if (num_pages==0) return 0;
318 parray = (unsigned long *)vmalloc(num_pages * sizeof(unsigned long));
319 if ( parray == NULL )
320 {
321 printk(KERN_ERR "claim_new_pages: Unable to vmalloc parray\n");
322 return 0;
323 }
325 new_page_cnt = HYPERVISOR_dom_mem_op(MEMOP_increase_reservation,
326 parray, num_pages, 0);
327 if ( new_page_cnt != num_pages )
328 {
329 printk(KERN_WARNING
330 "claim_new_pages: xen granted only %lu of %lu requested pages\n",
331 new_page_cnt, num_pages);
333 /*
334 * Avoid xen lockup when user forgot to setdomainmaxmem. Xen
335 * usually can dribble out a few pages and then hangs.
336 */
337 if ( new_page_cnt < 1000 )
338 {
339 printk(KERN_WARNING "Remember to use setdomainmaxmem\n");
340 HYPERVISOR_dom_mem_op(MEMOP_decrease_reservation,
341 parray, new_page_cnt, 0);
342 return -EFAULT;
343 }
344 }
345 memcpy(phys_to_machine_mapping+most_seen_pages, parray,
346 new_page_cnt * sizeof(unsigned long));
348 pagetable_extend(most_seen_pages,new_page_cnt);
350 for ( pfn = most_seen_pages, curr = parray;
351 pfn < most_seen_pages+new_page_cnt;
352 pfn++, curr++ )
353 {
354 struct page *page = mem_map + pfn;
356 #ifndef CONFIG_HIGHMEM
357 if ( pfn>=max_low_pfn )
358 {
359 printk(KERN_WARNING "Warning only %ldMB will be used.\n",
360 pfn>>PAGE_TO_MB_SHIFT);
361 printk(KERN_WARNING "Use a HIGHMEM enabled kernel.\n");
362 break;
363 }
364 #endif
365 queue_machphys_update(*curr, pfn);
366 if ( pfn < max_low_pfn )
367 queue_l1_entry_update(
368 get_ptep((unsigned long)__va(pfn << PAGE_SHIFT)),
369 ((*curr) << PAGE_SHIFT) | pgprot_val(PAGE_KERNEL));
371 XEN_flush_page_update_queue();
373 /* this next bit mimics arch/xen/mm/init.c:one_highpage_init() */
374 ClearPageReserved(page);
375 if ( pfn >= max_low_pfn )
376 set_bit(PG_highmem, &page->flags);
377 set_page_count(page, 1);
378 __free_page(page);
379 }
381 vfree(parray);
383 return new_page_cnt;
384 }
386 static int balloon_write(struct file *file, const char *buffer,
387 u_long count, void *data)
388 {
389 char memstring[64], *endchar;
390 int len, i;
391 unsigned long target;
392 unsigned long long targetbytes;
394 /* Only admin can play with the balloon :) */
395 if ( !capable(CAP_SYS_ADMIN) )
396 return -EPERM;
398 if ( count > sizeof(memstring) )
399 return -EFBIG;
401 len = strnlen_user(buffer, count);
402 if ( len == 0 ) return -EBADMSG;
403 if ( len == 1 ) return 1; /* input starts with a NUL char */
404 if ( strncpy_from_user(memstring, buffer, len) < 0 )
405 return -EFAULT;
407 endchar = memstring;
408 for ( i = 0; i < len; ++i, ++endchar )
409 if ( (memstring[i] < '0') || (memstring[i] > '9') )
410 break;
411 if ( i == 0 )
412 return -EBADMSG;
414 targetbytes = memparse(memstring,&endchar);
415 target = targetbytes >> PAGE_SHIFT;
417 if ( target < current_pages )
418 {
419 int change = inflate_balloon(current_pages-target);
420 if ( change <= 0 )
421 return change;
423 current_pages -= change;
424 printk(KERN_INFO "Relinquish %dMB to xen. Domain now has %luMB\n",
425 change>>PAGE_TO_MB_SHIFT, current_pages>>PAGE_TO_MB_SHIFT);
426 }
427 else if ( target > current_pages )
428 {
429 int change, reclaim = min(target,most_seen_pages) - current_pages;
431 if ( reclaim )
432 {
433 change = deflate_balloon( reclaim);
434 if ( change <= 0 )
435 return change;
436 current_pages += change;
437 printk(KERN_INFO "Reclaim %dMB from xen. Domain now has %luMB\n",
438 change>>PAGE_TO_MB_SHIFT, current_pages>>PAGE_TO_MB_SHIFT);
439 }
441 if ( most_seen_pages < target )
442 {
443 int growth = claim_new_pages(target-most_seen_pages);
444 if ( growth <= 0 )
445 return growth;
446 most_seen_pages += growth;
447 current_pages += growth;
448 printk(KERN_INFO "Granted %dMB new mem. Dom now has %luMB\n",
449 growth>>PAGE_TO_MB_SHIFT, current_pages>>PAGE_TO_MB_SHIFT);
450 }
451 }
454 return len;
455 }
458 static int balloon_read(char *page, char **start, off_t off,
459 int count, int *eof, void *data)
460 {
461 int len;
462 len = sprintf(page,"%lu\n",current_pages<<PAGE_SHIFT);
464 if (len <= off+count) *eof = 1;
465 *start = page + off;
466 len -= off;
467 if (len>count) len = count;
468 if (len<0) len = 0;
469 return len;
470 }
472 static int __init init_module(void)
473 {
474 printk(KERN_ALERT "Starting Xen Balloon driver\n");
476 most_seen_pages = current_pages = min(xen_start_info.nr_pages,max_pfn);
477 if ( (balloon_pde = create_xen_proc_entry("memory_target", 0644)) == NULL )
478 {
479 printk(KERN_ALERT "Unable to create balloon driver proc entry!");
480 return -1;
481 }
483 balloon_pde->write_proc = balloon_write;
484 balloon_pde->read_proc = balloon_read;
486 /*
487 * make a new phys map if mem= says xen can give us memory to grow
488 */
489 if ( max_pfn > xen_start_info.nr_pages )
490 {
491 extern unsigned long *phys_to_machine_mapping;
492 unsigned long *newmap;
493 newmap = (unsigned long *)vmalloc(max_pfn * sizeof(unsigned long));
494 memset(newmap, ~0, max_pfn * sizeof(unsigned long));
495 memcpy(newmap, phys_to_machine_mapping,
496 xen_start_info.nr_pages * sizeof(unsigned long));
497 phys_to_machine_mapping = newmap;
498 }
500 return 0;
501 }
503 static void __exit cleanup_module(void)
504 {
505 if ( balloon_pde != NULL )
506 {
507 remove_xen_proc_entry("balloon");
508 balloon_pde = NULL;
509 }
510 }
512 module_init(init_module);
513 module_exit(cleanup_module);