ia64/xen-unstable
view linux-2.4.27-xen-sparse/arch/xen/drivers/balloon/balloon.c @ 4895:24dfd18ea63e
bitkeeper revision 1.1159.258.120 (42848bfe8kMyWWcBA64rq7h7l7AyoA)
Shadow code bug fix (found by Ian) that was breaking refcounts, and subsequently
causing migration problems.
Shadow code bug fix (found by Ian) that was breaking refcounts, and subsequently
causing migration problems.
author | mafetter@fleming.research |
---|---|
date | Fri May 13 11:14:06 2005 +0000 (2005-05-13) |
parents | ff4e7a241335 |
children |
line source
1 /******************************************************************************
2 * balloon.c
3 *
4 * Xen balloon driver - enables returning/claiming memory to/from Xen.
5 *
6 * Copyright (c) 2003, B Dragovic
7 */
9 #include <linux/config.h>
10 #include <linux/module.h>
11 #include <linux/kernel.h>
12 #include <linux/sched.h>
13 #include <linux/errno.h>
14 #include <asm/xen_proc.h>
16 #include <linux/mm.h>
17 #include <linux/mman.h>
18 #include <linux/smp_lock.h>
19 #include <linux/pagemap.h>
20 #include <linux/bootmem.h>
21 #include <linux/highmem.h>
22 #include <linux/vmalloc.h>
24 #include <asm/hypervisor.h>
25 #include <asm/pgalloc.h>
26 #include <asm/pgtable.h>
27 #include <asm/uaccess.h>
28 #include <asm/tlb.h>
30 /* USER DEFINES -- THESE SHOULD BE COPIED TO USER-SPACE TOOLS */
31 #define USER_INFLATE_BALLOON 1 /* return mem to hypervisor */
32 #define USER_DEFLATE_BALLOON 2 /* claim mem from hypervisor */
33 typedef struct user_balloon_op {
34 unsigned int op;
35 unsigned long size;
36 } user_balloon_op_t;
37 /* END OF USER DEFINE */
39 static struct proc_dir_entry *balloon_pde;
40 unsigned long credit;
41 static unsigned long current_pages, most_seen_pages;
43 /*
44 * Dead entry written into balloon-owned entries in the PMT.
45 * It is deliberately different to INVALID_P2M_ENTRY.
46 */
47 #define DEAD 0xdead1234
49 static inline pte_t *get_ptep(unsigned long addr)
50 {
51 pgd_t *pgd; pmd_t *pmd; pte_t *ptep;
52 pgd = pgd_offset_k(addr);
54 if ( pgd_none(*pgd) || pgd_bad(*pgd) ) BUG();
56 pmd = pmd_offset(pgd, addr);
57 if ( pmd_none(*pmd) || pmd_bad(*pmd) ) BUG();
59 ptep = pte_offset(pmd, addr);
61 return ptep;
62 }
64 /* Main function for relinquishing memory. */
65 static unsigned long inflate_balloon(unsigned long num_pages)
66 {
67 unsigned long *parray;
68 unsigned long *currp;
69 unsigned long curraddr;
70 unsigned long ret = 0;
71 unsigned long i, j;
73 parray = (unsigned long *)vmalloc(num_pages * sizeof(unsigned long));
74 if ( parray == NULL )
75 {
76 printk(KERN_ERR "inflate_balloon: Unable to vmalloc parray\n");
77 return -EFAULT;
78 }
80 currp = parray;
82 for ( i = 0; i < num_pages; i++, currp++ )
83 {
84 struct page *page = alloc_page(GFP_HIGHUSER);
85 unsigned long pfn = page - mem_map;
87 /* If allocation fails then free all reserved pages. */
88 if ( page == NULL )
89 {
90 printk(KERN_ERR "Unable to inflate balloon by %ld, only"
91 " %ld pages free.", num_pages, i);
92 currp = parray;
93 for ( j = 0; j < i; j++, currp++ )
94 __free_page((struct page *) (mem_map + *currp));
95 ret = -EFAULT;
96 goto cleanup;
97 }
99 *currp = pfn;
100 }
103 for ( i = 0, currp = parray; i < num_pages; i++, currp++ )
104 {
105 unsigned long mfn = phys_to_machine_mapping[*currp];
106 curraddr = (unsigned long)page_address(mem_map + *currp);
107 /* Blow away page contents for security, and also p.t. ref if any. */
108 if ( curraddr != 0 )
109 {
110 scrub_pages(curraddr, 1);
111 queue_l1_entry_update(get_ptep(curraddr), 0);
112 }
113 #ifdef CONFIG_XEN_SCRUB_PAGES
114 else
115 {
116 void *p = kmap(&mem_map[*currp]);
117 scrub_pages(p, 1);
118 kunmap(&mem_map[*currp]);
119 }
120 #endif
121 phys_to_machine_mapping[*currp] = DEAD;
122 *currp = mfn;
123 }
125 /* Flush updates through and flush the TLB. */
126 xen_tlb_flush();
128 ret = HYPERVISOR_dom_mem_op(MEMOP_decrease_reservation,
129 parray, num_pages, 0);
130 if ( unlikely(ret != num_pages) )
131 {
132 printk(KERN_ERR "Unable to inflate balloon, error %lx\n", ret);
133 goto cleanup;
134 }
136 credit += num_pages;
137 ret = num_pages;
139 cleanup:
140 vfree(parray);
142 return ret;
143 }
145 /*
146 * Install new mem pages obtained by deflate_balloon. function walks
147 * phys->machine mapping table looking for DEAD entries and populates
148 * them.
149 */
150 static unsigned long process_returned_pages(unsigned long * parray,
151 unsigned long num)
152 {
153 /* currently, this function is rather simplistic as
154 * it is assumed that domain reclaims only number of
155 * pages previously released. this is to change soon
156 * and the code to extend page tables etc. will be
157 * incorporated here.
158 */
160 unsigned long tot_pages = most_seen_pages;
161 unsigned long * curr = parray;
162 unsigned long num_installed;
163 unsigned long i;
165 num_installed = 0;
166 for ( i = 0; (i < tot_pages) && (num_installed < num); i++ )
167 {
168 if ( phys_to_machine_mapping[i] == DEAD )
169 {
170 phys_to_machine_mapping[i] = *curr;
171 queue_machphys_update(*curr, i);
172 if (i<max_low_pfn)
173 queue_l1_entry_update(
174 get_ptep((unsigned long)__va(i << PAGE_SHIFT)),
175 ((*curr) << PAGE_SHIFT) | pgprot_val(PAGE_KERNEL));
177 __free_page(mem_map + i);
179 curr++;
180 num_installed++;
181 }
182 }
184 return num_installed;
185 }
187 unsigned long deflate_balloon(unsigned long num_pages)
188 {
189 unsigned long ret;
190 unsigned long * parray;
192 if ( num_pages > credit )
193 {
194 printk(KERN_ERR "deflate_balloon: %lu pages > %lu credit.\n",
195 num_pages, credit);
196 return -EAGAIN;
197 }
199 parray = (unsigned long *)vmalloc(num_pages * sizeof(unsigned long));
200 if ( parray == NULL )
201 {
202 printk(KERN_ERR "deflate_balloon: Unable to vmalloc parray\n");
203 return 0;
204 }
206 ret = HYPERVISOR_dom_mem_op(MEMOP_increase_reservation,
207 parray, num_pages, 0);
208 if ( unlikely(ret != num_pages) )
209 {
210 printk(KERN_ERR "deflate_balloon: xen increase_reservation err %lx\n",
211 ret);
212 goto cleanup;
213 }
215 if ( (ret = process_returned_pages(parray, num_pages)) < num_pages )
216 {
217 printk(KERN_WARNING
218 "deflate_balloon: restored only %lx of %lx pages.\n",
219 ret, num_pages);
220 goto cleanup;
221 }
223 ret = num_pages;
224 credit -= num_pages;
226 cleanup:
227 vfree(parray);
229 return ret;
230 }
232 #define PAGE_TO_MB_SHIFT 8
234 /*
235 * pagetable_extend() mimics pagetable_init() from arch/xen/mm/init.c
236 * The loops do go through all of low memory (ZONE_NORMAL). The
237 * old pages have _PAGE_PRESENT set and so get skipped.
238 * If low memory is not full, the new pages are used to fill it, going
239 * from cur_low_pfn to low_pfn. high memory is not direct mapped so
240 * no extension is needed for new high memory.
241 */
243 static void pagetable_extend (int cur_low_pfn, int newpages)
244 {
245 unsigned long vaddr, end;
246 pgd_t *kpgd, *pgd, *pgd_base;
247 int i, j, k;
248 pmd_t *kpmd, *pmd;
249 pte_t *kpte, *pte, *pte_base;
250 int low_pfn = min(cur_low_pfn+newpages,(int)max_low_pfn);
252 /*
253 * This can be zero as well - no problem, in that case we exit
254 * the loops anyway due to the PTRS_PER_* conditions.
255 */
256 end = (unsigned long)__va(low_pfn*PAGE_SIZE);
258 pgd_base = init_mm.pgd;
259 i = __pgd_offset(PAGE_OFFSET);
260 pgd = pgd_base + i;
262 for (; i < PTRS_PER_PGD; pgd++, i++) {
263 vaddr = i*PGDIR_SIZE;
264 if (end && (vaddr >= end))
265 break;
266 pmd = (pmd_t *)pgd;
267 for (j = 0; j < PTRS_PER_PMD; pmd++, j++) {
268 vaddr = i*PGDIR_SIZE + j*PMD_SIZE;
269 if (end && (vaddr >= end))
270 break;
272 /* Filled in for us already? */
273 if ( pmd_val(*pmd) & _PAGE_PRESENT )
274 continue;
276 pte_base = pte = (pte_t *) __get_free_page(GFP_KERNEL);
278 for (k = 0; k < PTRS_PER_PTE; pte++, k++) {
279 vaddr = i*PGDIR_SIZE + j*PMD_SIZE + k*PAGE_SIZE;
280 if (end && (vaddr >= end))
281 break;
282 *pte = mk_pte_phys(__pa(vaddr), PAGE_KERNEL);
283 }
284 kpgd = pgd_offset_k((unsigned long)pte_base);
285 kpmd = pmd_offset(kpgd, (unsigned long)pte_base);
286 kpte = pte_offset(kpmd, (unsigned long)pte_base);
287 queue_l1_entry_update(kpte,
288 (*(unsigned long *)kpte)&~_PAGE_RW);
289 set_pmd(pmd, __pmd(_KERNPG_TABLE + __pa(pte_base)));
290 XEN_flush_page_update_queue();
291 }
292 }
293 }
295 /*
296 * claim_new_pages() asks xen to increase this domain's memory reservation
297 * and return a list of the new pages of memory. This new pages are
298 * added to the free list of the memory manager.
299 *
300 * Available RAM does not normally change while Linux runs. To make this work,
301 * the linux mem= boottime command line param must say how big memory could
302 * possibly grow. Then setup_arch() in arch/xen/kernel/setup.c
303 * sets max_pfn, max_low_pfn and the zones according to
304 * this max memory size. The page tables themselves can only be
305 * extended after xen has assigned new pages to this domain.
306 */
308 static unsigned long
309 claim_new_pages(unsigned long num_pages)
310 {
311 unsigned long new_page_cnt, pfn;
312 unsigned long * parray, *curr;
314 if (most_seen_pages+num_pages> max_pfn)
315 num_pages = max_pfn-most_seen_pages;
316 if (num_pages==0) return 0;
318 parray = (unsigned long *)vmalloc(num_pages * sizeof(unsigned long));
319 if ( parray == NULL )
320 {
321 printk(KERN_ERR "claim_new_pages: Unable to vmalloc parray\n");
322 return 0;
323 }
325 new_page_cnt = HYPERVISOR_dom_mem_op(MEMOP_increase_reservation,
326 parray, num_pages, 0);
327 if ( new_page_cnt != num_pages )
328 {
329 printk(KERN_WARNING
330 "claim_new_pages: xen granted only %lu of %lu requested pages\n",
331 new_page_cnt, num_pages);
333 /*
334 * Avoid xen lockup when user forgot to setdomainmaxmem. Xen
335 * usually can dribble out a few pages and then hangs.
336 */
337 if ( new_page_cnt < 1000 )
338 {
339 printk(KERN_WARNING "Remember to use setdomainmaxmem\n");
340 HYPERVISOR_dom_mem_op(MEMOP_decrease_reservation,
341 parray, new_page_cnt, 0);
342 return -EFAULT;
343 }
344 }
345 memcpy(phys_to_machine_mapping+most_seen_pages, parray,
346 new_page_cnt * sizeof(unsigned long));
348 pagetable_extend(most_seen_pages,new_page_cnt);
350 for ( pfn = most_seen_pages, curr = parray;
351 pfn < most_seen_pages+new_page_cnt;
352 pfn++, curr++ )
353 {
354 struct page *page = mem_map + pfn;
356 #ifndef CONFIG_HIGHMEM
357 if ( pfn>=max_low_pfn )
358 {
359 printk(KERN_WARNING "Warning only %ldMB will be used.\n",
360 pfn>>PAGE_TO_MB_SHIFT);
361 printk(KERN_WARNING "Use a HIGHMEM enabled kernel.\n");
362 break;
363 }
364 #endif
365 queue_machphys_update(*curr, pfn);
366 if ( pfn < max_low_pfn )
367 queue_l1_entry_update(
368 get_ptep((unsigned long)__va(pfn << PAGE_SHIFT)),
369 ((*curr) << PAGE_SHIFT) | pgprot_val(PAGE_KERNEL));
371 XEN_flush_page_update_queue();
373 /* this next bit mimics arch/xen/mm/init.c:one_highpage_init() */
374 ClearPageReserved(page);
375 if ( pfn >= max_low_pfn )
376 set_bit(PG_highmem, &page->flags);
377 set_page_count(page, 1);
378 __free_page(page);
379 }
381 vfree(parray);
383 return new_page_cnt;
384 }
386 static int balloon_write(struct file *file, const char *buffer,
387 u_long count, void *data)
388 {
389 char memstring[64], *endchar;
390 int len, i;
391 unsigned long target;
392 unsigned long long targetbytes;
394 /* Only admin can play with the balloon :) */
395 if ( !capable(CAP_SYS_ADMIN) )
396 return -EPERM;
398 if ( count > sizeof(memstring) )
399 return -EFBIG;
401 len = strnlen_user(buffer, count);
402 if ( len == 0 ) return -EBADMSG;
403 if ( len == 1 ) return 1; /* input starts with a NUL char */
404 if ( strncpy_from_user(memstring, buffer, len) < 0 )
405 return -EFAULT;
407 endchar = memstring;
408 for ( i = 0; i < len; ++i, ++endchar )
409 if ( (memstring[i] < '0') || (memstring[i] > '9') )
410 break;
411 if ( i == 0 )
412 return -EBADMSG;
414 targetbytes = memparse(memstring,&endchar);
415 target = targetbytes >> PAGE_SHIFT;
417 if ( target < current_pages )
418 {
419 int change = inflate_balloon(current_pages-target);
420 if ( change <= 0 )
421 return change;
423 current_pages -= change;
424 printk(KERN_INFO "Relinquish %dMB to xen. Domain now has %luMB\n",
425 change>>PAGE_TO_MB_SHIFT, current_pages>>PAGE_TO_MB_SHIFT);
426 }
427 else if ( target > current_pages )
428 {
429 int change, reclaim = min(target,most_seen_pages) - current_pages;
431 if ( reclaim )
432 {
433 change = deflate_balloon( reclaim);
434 if ( change <= 0 )
435 return change;
436 current_pages += change;
437 printk(KERN_INFO "Reclaim %dMB from xen. Domain now has %luMB\n",
438 change>>PAGE_TO_MB_SHIFT, current_pages>>PAGE_TO_MB_SHIFT);
439 }
441 if ( most_seen_pages < target )
442 {
443 int growth = claim_new_pages(target-most_seen_pages);
444 if ( growth <= 0 )
445 return growth;
446 most_seen_pages += growth;
447 current_pages += growth;
448 printk(KERN_INFO "Granted %dMB new mem. Dom now has %luMB\n",
449 growth>>PAGE_TO_MB_SHIFT, current_pages>>PAGE_TO_MB_SHIFT);
450 }
451 }
454 return len;
455 }
458 static int balloon_read(char *page, char **start, off_t off,
459 int count, int *eof, void *data)
460 {
461 int len;
462 len = sprintf(page,"%lu\n",current_pages<<PAGE_SHIFT);
464 if (len <= off+count) *eof = 1;
465 *start = page + off;
466 len -= off;
467 if (len>count) len = count;
468 if (len<0) len = 0;
469 return len;
470 }
472 static int __init init_module(void)
473 {
474 printk(KERN_ALERT "Starting Xen Balloon driver\n");
476 most_seen_pages = current_pages = min(xen_start_info.nr_pages,max_pfn);
477 if ( (balloon_pde = create_xen_proc_entry("memory_target", 0644)) == NULL )
478 {
479 printk(KERN_ALERT "Unable to create balloon driver proc entry!");
480 return -1;
481 }
483 balloon_pde->write_proc = balloon_write;
484 balloon_pde->read_proc = balloon_read;
486 /*
487 * make a new phys map if mem= says xen can give us memory to grow
488 */
489 if ( max_pfn > xen_start_info.nr_pages )
490 {
491 extern unsigned long *phys_to_machine_mapping;
492 unsigned long *newmap;
493 newmap = (unsigned long *)vmalloc(max_pfn * sizeof(unsigned long));
494 memset(newmap, ~0, max_pfn * sizeof(unsigned long));
495 memcpy(newmap, phys_to_machine_mapping,
496 xen_start_info.nr_pages * sizeof(unsigned long));
497 phys_to_machine_mapping = newmap;
498 }
500 return 0;
501 }
503 static void __exit cleanup_module(void)
504 {
505 if ( balloon_pde != NULL )
506 {
507 remove_xen_proc_entry("balloon");
508 balloon_pde = NULL;
509 }
510 }
512 module_init(init_module);
513 module_exit(cleanup_module);