]> xenbits.xensource.com Git - people/pauldu/xen.git/commitdiff
bitkeeper revision 1.1159.187.42 (41acab13co1dnyoD3jJfv5m4ox_FFg)
authorkaf24@scramble.cl.cam.ac.uk <kaf24@scramble.cl.cam.ac.uk>
Tue, 30 Nov 2004 17:17:07 +0000 (17:17 +0000)
committerkaf24@scramble.cl.cam.ac.uk <kaf24@scramble.cl.cam.ac.uk>
Tue, 30 Nov 2004 17:17:07 +0000 (17:17 +0000)
Major balloon driver cleanups. This has required some moderately large
changes to start-of-day code in Linux 2.4 and 2.6.

linux-2.4.28-xen-sparse/arch/xen/kernel/setup.c
linux-2.4.28-xen-sparse/arch/xen/mm/init.c
linux-2.6.9-xen-sparse/arch/xen/i386/kernel/setup.c
linux-2.6.9-xen-sparse/arch/xen/i386/mm/init.c
linux-2.6.9-xen-sparse/drivers/xen/balloon/balloon.c
linux-2.6.9-xen-sparse/include/asm-xen/hypervisor.h

index 72454469d0a2d5e84e19dce58e3ac899caec44ce..3b27174ba66797150fe036443fa0e4b6c154c1b6 100644 (file)
@@ -275,7 +275,8 @@ void __init setup_arch(char **cmdline_p)
      * arch/xen/drivers/balloon/balloon.c
      */
     mem_param = parse_mem_cmdline(cmdline_p);
-    if (!mem_param) mem_param = xen_start_info.nr_pages;
+    if (mem_param < xen_start_info.nr_pages)
+        mem_param = xen_start_info.nr_pages;
 
 #define PFN_UP(x)      (((x) + PAGE_SIZE-1) >> PAGE_SHIFT)
 #define PFN_DOWN(x)    ((x) >> PAGE_SHIFT)
@@ -303,6 +304,7 @@ void __init setup_arch(char **cmdline_p)
             printk(KERN_WARNING "Use a PAE enabled kernel.\n");
         else
             printk(KERN_WARNING "Use a HIGHMEM enabled kernel.\n");
+        max_pfn = lmax_low_pfn;
 #else /* !CONFIG_HIGHMEM */
 #ifndef CONFIG_X86_PAE
         if (max_pfn > MAX_NONPAE_PFN) {
@@ -350,8 +352,6 @@ void __init setup_arch(char **cmdline_p)
      */
     max_low_pfn = lmax_low_pfn;
 
-
-
 #ifdef CONFIG_BLK_DEV_INITRD
     if ( xen_start_info.mod_start != 0 )
     {
@@ -375,6 +375,20 @@ void __init setup_arch(char **cmdline_p)
 
     paging_init();
 
+    /* Make sure we have a large enough P->M table. */
+    if ( max_pfn > xen_start_info.nr_pages )
+    {
+        phys_to_machine_mapping = alloc_bootmem_low_pages(
+            max_pfn * sizeof(unsigned long));
+        memset(phys_to_machine_mapping, ~0, max_pfn * sizeof(unsigned long));
+        memcpy(phys_to_machine_mapping,
+               (unsigned long *)xen_start_info.mfn_list,
+               xen_start_info.nr_pages * sizeof(unsigned long));
+        free_bootmem(__pa(xen_start_info.mfn_list), 
+                     PFN_PHYS(PFN_UP(xen_start_info.nr_pages *
+                                     sizeof(unsigned long))));
+    }
+
     pfn_to_mfn_frame_list = alloc_bootmem_low_pages(PAGE_SIZE);
     for ( i=0, j=0; i < max_pfn; i+=(PAGE_SIZE/sizeof(unsigned long)), j++ )
     {  
index 6a694022bf2d82aada5f28127116ed394488cc73..dd622aebda18aabb7f0ef0b602f12999e8b020d7 100644 (file)
@@ -213,23 +213,16 @@ static void __init fixrange_init (unsigned long start,
 
 static void __init pagetable_init (void)
 {
-    unsigned long vaddr, end;
+    unsigned long vaddr, end, ram_end;
     pgd_t *kpgd, *pgd, *pgd_base;
     int i, j, k;
     pmd_t *kpmd, *pmd;
     pte_t *kpte, *pte, *pte_base;
 
-    /* create tables only for boot_pfn frames.  max_low_pfn may be sized for
-     * pages yet to be allocated from the hypervisor, or it may be set
-     * to override the xen_start_info amount of memory
-     */
-    int boot_pfn = min(xen_start_info.nr_pages,max_low_pfn);
-
-    /*
-     * This can be zero as well - no problem, in that case we exit
-     * the loops anyway due to the PTRS_PER_* conditions.
-     */
-    end = (unsigned long)__va(boot_pfn *PAGE_SIZE);
+    end     = (unsigned long)__va(max_low_pfn * PAGE_SIZE);
+    ram_end = (unsigned long)__va(xen_start_info.nr_pages * PAGE_SIZE);
+    if ( ram_end > end )
+        ram_end = end;
 
     pgd_base = init_mm.pgd;
     i = __pgd_offset(PAGE_OFFSET);
@@ -237,12 +230,12 @@ static void __init pagetable_init (void)
 
     for (; i < PTRS_PER_PGD; pgd++, i++) {
         vaddr = i*PGDIR_SIZE;
-        if (end && (vaddr >= end))
+        if (vaddr >= end)
             break;
         pmd = (pmd_t *)pgd;
         for (j = 0; j < PTRS_PER_PMD; pmd++, j++) {
             vaddr = i*PGDIR_SIZE + j*PMD_SIZE;
-            if (end && (vaddr >= end))
+            if (vaddr >= end)
                 break;
 
             /* Filled in for us already? */
@@ -250,10 +243,11 @@ static void __init pagetable_init (void)
                 continue;
 
             pte_base = pte = (pte_t *) alloc_bootmem_low_pages(PAGE_SIZE);
+            clear_page(pte_base);
 
             for (k = 0; k < PTRS_PER_PTE; pte++, k++) {
                 vaddr = i*PGDIR_SIZE + j*PMD_SIZE + k*PAGE_SIZE;
-                if (end && (vaddr >= end))
+                if (vaddr >= ram_end)
                     break;
                 *pte = mk_pte_phys(__pa(vaddr), PAGE_KERNEL);
             }
@@ -329,28 +323,14 @@ static inline int page_is_ram (unsigned long pagenr)
     return 1;
 }
 
-static inline int page_kills_ppro(unsigned long pagenr)
-{
-    return 0;
-}
-
 #ifdef CONFIG_HIGHMEM
-void __init one_highpage_init(struct page *page, int pfn, int bad_ppro)
+void __init one_highpage_init(struct page *page, int free_page)
 {
-    if (!page_is_ram(pfn)) {
-        SetPageReserved(page);
-        return;
-    }
-       
-    if (bad_ppro && page_kills_ppro(pfn)) {
-        SetPageReserved(page);
-        return;
-    }
-       
     ClearPageReserved(page);
     set_bit(PG_highmem, &page->flags);
     atomic_set(&page->count, 1);
-    __free_page(page);
+    if ( free_page )
+        __free_page(page);
     totalhigh_pages++;
 }
 #endif /* CONFIG_HIGHMEM */
@@ -392,8 +372,9 @@ static int __init free_pages_init(void)
             reservedpages++;
     }
 #ifdef CONFIG_HIGHMEM
-    for (pfn = xen_start_info.nr_pages-1; pfn >= highstart_pfn; pfn--)
-        one_highpage_init((struct page *) (mem_map + pfn), pfn, bad_ppro);
+    for (pfn = highend_pfn-1; pfn >= highstart_pfn; pfn--)
+        one_highpage_init((struct page *) (mem_map + pfn), pfn,
+                          (pfn < xen_start_info.nr_pages));
     totalram_pages += totalhigh_pages;
 #endif
     return reservedpages;
index 8eb0add9d67c5c3a656ef1156adec4b49d56ae82..049e4aa0f6157a495a0d9407b44915abbfca94c9 100644 (file)
@@ -52,6 +52,9 @@
 #include "setup_arch_pre.h"
 #include <bios_ebda.h>
 
+/* Allows setting of maximum possible memory size  */
+static unsigned long xen_override_max_pfn;
+
 int disable_pse __initdata = 0;
 
 /*
@@ -718,8 +721,13 @@ static void __init parse_cmdline_early (char ** cmdline_p)
                                unsigned long long mem_size;
  
                                mem_size = memparse(from+4, &from);
+#if 0
                                limit_regions(mem_size);
                                userdef=1;
+#else
+                               xen_override_max_pfn =
+                                       (unsigned long)(mem_size>>PAGE_SHIFT);
+#endif
                        }
                }
 
@@ -857,6 +865,7 @@ static void __init parse_cmdline_early (char ** cmdline_p)
        }
 }
 
+#if 0 /* !XEN */
 /*
  * Callback for efi_memory_walk.
  */
@@ -873,7 +882,6 @@ efi_find_max_pfn(unsigned long start, unsigned long end, void *arg)
        return 0;
 }
 
-
 /*
  * Find the highest page frame number we have available
  */
@@ -900,6 +908,15 @@ void __init find_max_pfn(void)
                        max_pfn = end;
        }
 }
+#else
+/* We don't use the fake e820 because we need to respond to user override. */
+void __init find_max_pfn(void)
+{
+       if ( xen_override_max_pfn < xen_start_info.nr_pages )
+               xen_override_max_pfn = xen_start_info.nr_pages;
+       max_pfn = xen_override_max_pfn;
+}
+#endif /* XEN */
 
 /*
  * Determine low and high memory ranges:
@@ -1414,6 +1431,21 @@ void __init setup_arch(char **cmdline_p)
 #endif
        paging_init();
 
+       /* Make sure we have a large enough P->M table. */
+       if (max_pfn > xen_start_info.nr_pages) {
+               phys_to_machine_mapping = alloc_bootmem_low_pages(
+                       max_pfn * sizeof(unsigned long));
+               memset(phys_to_machine_mapping, ~0,
+                       max_pfn * sizeof(unsigned long));
+               memcpy(phys_to_machine_mapping,
+                       (unsigned long *)xen_start_info.mfn_list,
+                       xen_start_info.nr_pages * sizeof(unsigned long));
+               free_bootmem(
+                       __pa(xen_start_info.mfn_list), 
+                       PFN_PHYS(PFN_UP(xen_start_info.nr_pages *
+                       sizeof(unsigned long))));
+       }
+
        pfn_to_mfn_frame_list = alloc_bootmem_low_pages(PAGE_SIZE);
        for ( i=0, j=0; i < max_pfn; i+=(PAGE_SIZE/sizeof(unsigned long)), j++ )
        {       
index fbf48798d34ed530b97d056bf48c7c0f8b3442bd..e2408ea3f717e2e15f3941440a4b9beedb7f8a53 100644 (file)
@@ -77,6 +77,12 @@ static pte_t * __init one_page_table_init(pmd_t *pmd)
 {
        if (pmd_none(*pmd)) {
                pte_t *page_table = (pte_t *) alloc_bootmem_low_pages(PAGE_SIZE);
+               /* XEN: Make the new p.t. read-only. */
+               pgd_t *kpgd = pgd_offset_k((unsigned long)page_table);
+               pmd_t *kpmd = pmd_offset(kpgd, (unsigned long)page_table);
+               pte_t *kpte = pte_offset_kernel(kpmd, (unsigned long)page_table);
+               xen_l1_entry_update(
+                       kpte, (*(unsigned long *)kpte)&~_PAGE_RW);
                set_pmd(pmd, __pmd(__pa(page_table) | _PAGE_TABLE));
                if (page_table != pte_offset_kernel(pmd, 0))
                        BUG();  
@@ -141,25 +147,6 @@ void __init protect_page(pgd_t *pgd, void *page, int mode)
                                        pte_val_ma(*pte) | _PAGE_RW);
 }
 
-void __init protect_pagetable(pgd_t *dpgd, pgd_t *spgd, int mode)
-{
-       pmd_t *pmd;
-       pte_t *pte;
-       int pgd_idx, pmd_idx;
-
-       protect_page(dpgd, spgd, mode);
-
-       for (pgd_idx = 0; pgd_idx < PTRS_PER_PGD_NO_HV; spgd++, pgd_idx++) {
-               pmd = pmd_offset(spgd, 0);
-               if (pmd_none(*pmd))
-                       continue;
-               for (pmd_idx = 0; pmd_idx < PTRS_PER_PMD; pmd++, pmd_idx++) {
-                       pte = pte_offset_kernel(pmd, 0);
-                       protect_page(dpgd, pte, mode);
-               }
-       }
-}
-
 static inline int is_kernel_text(unsigned long addr)
 {
        if (addr >= (unsigned long)_stext && addr <= (unsigned long)__init_end)
@@ -180,6 +167,10 @@ static void __init kernel_physical_mapping_init(pgd_t *pgd_base)
        pte_t *pte;
        int pgd_idx, pmd_idx, pte_ofs;
 
+       unsigned long max_ram_pfn = xen_start_info.nr_pages;
+       if (max_ram_pfn > max_low_pfn)
+               max_ram_pfn = max_low_pfn;
+
        pgd_idx = pgd_index(PAGE_OFFSET);
        pgd = pgd_base + pgd_idx;
        pfn = 0;
@@ -207,7 +198,10 @@ static void __init kernel_physical_mapping_init(pgd_t *pgd_base)
                                pte = one_page_table_init(pmd);
 
                                pte += pte_ofs;
-                               for (; pte_ofs < PTRS_PER_PTE && pfn < max_low_pfn; pte++, pfn++, pte_ofs++) {
+                               /* XEN: Only map initial RAM allocation. */
+                               for (; pte_ofs < PTRS_PER_PTE && pfn < max_ram_pfn; pte++, pfn++, pte_ofs++) {
+                                               if (pte_present(*pte))
+                                                       continue;
                                                if (is_kernel_text(address))
                                                        set_pte(pte, pfn_pte(pfn, PAGE_KERNEL_EXEC));
                                                else
@@ -311,7 +305,8 @@ void __init one_highpage_init(struct page *page, int pfn, int bad_ppro)
                ClearPageReserved(page);
                set_bit(PG_highmem, &page->flags);
                set_page_count(page, 1);
-               __free_page(page);
+               if (pfn < xen_start_info.nr_pages)
+                       __free_page(page);
                totalhigh_pages++;
        } else
                SetPageReserved(page);
@@ -347,7 +342,8 @@ extern void __init remap_numa_kva(void);
 static void __init pagetable_init (void)
 {
        unsigned long vaddr;
-       pgd_t *pgd_base = swapper_pg_dir;
+       pgd_t *old_pgd = (pgd_t *)xen_start_info.pt_base;
+       pgd_t *new_pgd = swapper_pg_dir;
 
 #ifdef CONFIG_X86_PAE
        int i;
@@ -368,7 +364,22 @@ static void __init pagetable_init (void)
                __PAGE_KERNEL_EXEC |= _PAGE_GLOBAL;
        }
 
-       kernel_physical_mapping_init(pgd_base);
+       /*
+        * Switch to proper mm_init page directory. Initialise from the current
+        * page directory, write-protect the new page directory, then switch to
+        * it. We clean up by write-enabling and then freeing the old page dir.
+        */
+       memcpy(new_pgd, old_pgd, PTRS_PER_PGD_NO_HV*sizeof(pgd_t));
+       protect_page(new_pgd, new_pgd, PROT_ON);
+       queue_pgd_pin(__pa(new_pgd));
+       load_cr3(new_pgd);
+       queue_pgd_unpin(__pa(old_pgd));
+       __flush_tlb_all(); /* implicit flush */
+       protect_page(new_pgd, old_pgd, PROT_OFF);
+       flush_page_update_queue();
+       free_bootmem(__pa(old_pgd), PAGE_SIZE);
+
+       kernel_physical_mapping_init(new_pgd);
        remap_numa_kva();
 
        /*
@@ -376,9 +387,9 @@ static void __init pagetable_init (void)
         * created - mappings will be set by set_fixmap():
         */
        vaddr = __fix_to_virt(__end_of_fixed_addresses - 1) & PMD_MASK;
-       page_table_range_init(vaddr, 0, pgd_base);
+       page_table_range_init(vaddr, 0, new_pgd);
 
-       permanent_kmaps_init(pgd_base);
+       permanent_kmaps_init(new_pgd);
 
 #ifdef CONFIG_X86_PAE
        /*
@@ -388,7 +399,7 @@ static void __init pagetable_init (void)
         * All user-space mappings are explicitly cleared after
         * SMP startup.
         */
-       pgd_base[0] = pgd_base[USER_PTRS_PER_PGD];
+       new_pgd[0] = new_pgd[USER_PTRS_PER_PGD];
 #endif
 }
 
@@ -545,8 +556,6 @@ out:
  */
 void __init paging_init(void)
 {
-       pgd_t *old_pgd = (pgd_t *)xen_start_info.pt_base;
-       pgd_t *new_pgd = swapper_pg_dir;
 #ifdef CONFIG_XEN_PHYSDEV_ACCESS
        int i;
 #endif
@@ -559,25 +568,6 @@ void __init paging_init(void)
 
        pagetable_init();
 
-       /*
-        * Write-protect both page tables within both page tables.
-        * That's three ops, as the old p.t. is already protected
-        * within the old p.t. Then pin the new table, switch tables,
-        * and unprotect the old table.
-        */
-       protect_pagetable(new_pgd, old_pgd, PROT_ON);
-       protect_pagetable(new_pgd, new_pgd, PROT_ON);
-       protect_pagetable(old_pgd, new_pgd, PROT_ON);
-       queue_pgd_pin(__pa(new_pgd));
-       load_cr3(new_pgd);
-       queue_pgd_unpin(__pa(old_pgd));
-       __flush_tlb_all(); /* implicit flush */
-       protect_pagetable(new_pgd, old_pgd, PROT_OFF);
-       flush_page_update_queue();
-
-       /* Completely detached from old tables, so free them. */
-       free_bootmem(__pa(old_pgd), xen_start_info.nr_pt_frames << PAGE_SHIFT);
-
 #ifdef CONFIG_X86_PAE
        /*
         * We will bail out later - printk doesn't work right now so
index 566a9578dd5d871b88720f5974708036576b8193..94658f1e54b37a6bf92ce4fb39d5d87c2e546b78 100644 (file)
@@ -4,6 +4,7 @@
  * Xen balloon driver - enables returning/claiming memory to/from Xen.
  *
  * Copyright (c) 2003, B Dragovic
+ * Copyright (c) 2003-2004, M Williamson, K Fraser
  * 
  * This file may be distributed separately from the Linux kernel, or
  * incorporated into other software packages, subject to the following license:
 #include <asm/tlb.h>
 #include <linux/list.h>
 
-/* USER DEFINES -- THESE SHOULD BE COPIED TO USER-SPACE TOOLS */
-#define USER_INFLATE_BALLOON  1   /* return mem to hypervisor */
-#define USER_DEFLATE_BALLOON  2   /* claim mem from hypervisor */
-typedef struct user_balloon_op {
-    unsigned int  op;
-    unsigned long size;
-} user_balloon_op_t;
-/* END OF USER DEFINE */
-
 static struct proc_dir_entry *balloon_pde;
 
 unsigned long credit;
-static unsigned long current_pages, most_seen_pages;
+static unsigned long current_pages;
 
 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
 /* Use the private and mapping fields of struct page as a list. */
@@ -78,71 +70,66 @@ static unsigned long current_pages, most_seen_pages;
 #define pte_offset_kernel pte_offset
 #endif
 
+#define IPRINTK(fmt, args...) \
+    printk(KERN_INFO "xen_mem: " fmt, ##args)
+#define WPRINTK(fmt, args...) \
+    printk(KERN_WARNING "xen_mem: " fmt, ##args)
+
 /* List of ballooned pages, threaded through the mem_map array. */
 LIST_HEAD(ballooned_pages);
 
-/** add_ballooned_page - remember we've ballooned a pfn */
-void add_ballooned_page(unsigned long pfn)
+/* balloon_append: add the given page to the balloon. */
+void balloon_append(struct page *page)
 {
-    struct page *p = mem_map + pfn;
-
-    list_add(PAGE_TO_LIST(p), &ballooned_pages);
+    list_add(PAGE_TO_LIST(page), &ballooned_pages);
 }
 
-/* rem_ballooned_page - recall a ballooned page and remove from list. */
-struct page *rem_ballooned_page(void)
+/* balloon_retrieve: rescue a page from the balloon, if it is not empty. */
+struct page *balloon_retrieve(void)
 {
-    if(!list_empty(&ballooned_pages))
-    {
-        struct page *ret;
-
-        ret = LIST_TO_PAGE(ballooned_pages.next);
-       UNLIST_PAGE(ret);
+    struct page *page;
 
-        return ret;
-    }
-    else
+    if ( list_empty(&ballooned_pages) )
         return NULL;
+
+    page = LIST_TO_PAGE(ballooned_pages.next);
+    UNLIST_PAGE(page);
+    return page;
 }
 
 static inline pte_t *get_ptep(unsigned long addr)
 {
-    pgd_t *pgd; pmd_t *pmd; pte_t *ptep;
-    pgd = pgd_offset_k(addr);
+    pgd_t *pgd;
+    pmd_t *pmd;
 
+    pgd = pgd_offset_k(addr);
     if ( pgd_none(*pgd) || pgd_bad(*pgd) ) BUG();
 
     pmd = pmd_offset(pgd, addr);
     if ( pmd_none(*pmd) || pmd_bad(*pmd) ) BUG();
 
-    ptep = pte_offset_kernel(pmd, addr);
-
-    return ptep;
+    return pte_offset_kernel(pmd, addr);
 }
 
 /* Main function for relinquishing memory. */
 static unsigned long inflate_balloon(unsigned long num_pages)
-
 {
-    unsigned long *parray;
-    unsigned long *currp;
-    unsigned long curraddr;
-    unsigned long ret = 0;
-    unsigned long i, j;
+    unsigned long *parray, *currp, curraddr, ret = 0, i, j, mfn, pfn;
+    struct page *page;
 
     parray = (unsigned long *)vmalloc(num_pages * sizeof(unsigned long));
     if ( parray == NULL )
     {
-        printk(KERN_ERR "inflate_balloon: Unable to vmalloc parray\n");
-        return -EFAULT;
+        WPRINTK("inflate_balloon: Unable to vmalloc parray\n");
+        return -ENOMEM;
     }
 
     currp = parray;
 
     for ( i = 0; i < num_pages; i++, currp++ )
     {
-        struct page *page = alloc_page(GFP_HIGHUSER);
-        unsigned long pfn = page - mem_map;
+        page = alloc_page(GFP_HIGHUSER);
+        pfn  = page - mem_map;
 
         /* If allocation fails then free all reserved pages. */
         if ( page == NULL )
@@ -160,10 +147,9 @@ static unsigned long inflate_balloon(unsigned long num_pages)
         *currp = pfn;
     }
 
-
     for ( i = 0, currp = parray; i < num_pages; i++, currp++ )
     {
-        unsigned long mfn = phys_to_machine_mapping[*currp];
+        mfn      = phys_to_machine_mapping[*currp];
         curraddr = (unsigned long)page_address(mem_map + *currp);
         /* Blow away page contents for security, and also p.t. ref if any. */
         if ( curraddr != 0 )
@@ -180,7 +166,7 @@ static unsigned long inflate_balloon(unsigned long num_pages)
         }
 #endif
 
-        add_ballooned_page(*currp);
+        balloon_append(&mem_map[*currp]);
 
         phys_to_machine_mapping[*currp] = INVALID_P2M_ENTRY;
         *currp = mfn;
@@ -206,62 +192,45 @@ static unsigned long inflate_balloon(unsigned long num_pages)
     return ret;
 }
 
-/*
- * Install new mem pages obtained by deflate_balloon. function walks 
- * phys->machine mapping table looking for DEAD entries and populates
- * them.
- */
-static unsigned long process_returned_pages(unsigned long * parray, 
-                                       unsigned long num)
+/* Install a set of new pages (@mfn_list, @nr_mfns) into the memory map. */
+static unsigned long process_returned_pages(
+    unsigned long *mfn_list, unsigned long nr_mfns)
 {
-    /* currently, this function is rather simplistic as 
-     * it is assumed that domain reclaims only number of 
-     * pages previously released. this is to change soon
-     * and the code to extend page tables etc. will be 
-     * incorporated here.
-     */
-     
-    unsigned long * curr = parray;
-    unsigned long num_installed;
-
+    unsigned long pfn, i;
     struct page *page;
 
-    num_installed = 0;
-    while ( (page = rem_ballooned_page()) != NULL )
+    for ( i = 0; i < nr_mfns; i++ )
     {
-        unsigned long pfn;
-
-        if ( num_installed == num )
+        if ( (page = balloon_retrieve()) != NULL )
             break;
 
         pfn = page - mem_map;
-
-        if(phys_to_machine_mapping[pfn] != INVALID_P2M_ENTRY)
-        {
-            printk("BUG: Tried to unballoon existing page!");
+        if ( phys_to_machine_mapping[pfn] != INVALID_P2M_ENTRY )
             BUG();
-        }
 
-        phys_to_machine_mapping[pfn] = *curr;
-        queue_machphys_update(*curr, pfn);
-        if (pfn<max_low_pfn)
+        /* Update P->M and M->P tables. */
+        phys_to_machine_mapping[pfn] = mfn_list[i];
+        queue_machphys_update(mfn_list[i], pfn);
+
+        /* Link back into the page tables if it's not a highmem page. */
+        if ( pfn < max_low_pfn )
             queue_l1_entry_update(
                 get_ptep((unsigned long)__va(pfn << PAGE_SHIFT)),
-                ((*curr) << PAGE_SHIFT) | pgprot_val(PAGE_KERNEL));
-        
-        __free_page(mem_map + pfn);
+                (mfn_list[i] << PAGE_SHIFT) | pgprot_val(PAGE_KERNEL));
 
-        curr++;
-        num_installed++;
+        /* Finally, relinquish the memory back to the system allocator. */
+        ClearPageReserved(page);
+        set_page_count(page, 1);
+        __free_page(page);
     }
 
-    return num_installed;
+    return i;
 }
 
 unsigned long deflate_balloon(unsigned long num_pages)
 {
     unsigned long ret;
-    unsigned long * parray;
+    unsigned long *parray;
 
     if ( num_pages > credit )
     {
@@ -305,205 +274,25 @@ unsigned long deflate_balloon(unsigned long num_pages)
 
 #define PAGE_TO_MB_SHIFT 8
 
-/*
- * pagetable_extend() mimics pagetable_init() from arch/xen/mm/init.c 
- * The loops do go through all of low memory (ZONE_NORMAL).  The
- * old pages have _PAGE_PRESENT set and so get skipped.
- * If low memory is not full, the new pages are used to fill it, going
- * from cur_low_pfn to low_pfn.   high memory is not direct mapped so
- * no extension is needed for new high memory.
- */
-
-static void pagetable_extend (int cur_low_pfn, int newpages)
-{
-    unsigned long vaddr, end;
-    pgd_t *kpgd, *pgd, *pgd_base;
-    int i, j, k;
-    pmd_t *kpmd, *pmd;
-    pte_t *kpte, *pte, *pte_base;
-    int low_pfn = min(cur_low_pfn+newpages,(int)max_low_pfn);
-
-    /*
-     * This can be zero as well - no problem, in that case we exit
-     * the loops anyway due to the PTRS_PER_* conditions.
-     */
-    end = (unsigned long)__va(low_pfn*PAGE_SIZE);
-
-    pgd_base = init_mm.pgd;
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
-    i = pgd_index(PAGE_OFFSET);
-#else
-    i = __pgd_offset(PAGE_OFFSET);
-#endif
-    pgd = pgd_base + i;
-
-    for (; i < PTRS_PER_PGD; pgd++, i++) {
-        vaddr = i*PGDIR_SIZE;
-        if (end && (vaddr >= end))
-            break;
-        pmd = (pmd_t *)pgd;
-        for (j = 0; j < PTRS_PER_PMD; pmd++, j++) {
-            vaddr = i*PGDIR_SIZE + j*PMD_SIZE;
-            if (end && (vaddr >= end))
-                break;
-
-            /* Filled in for us already? */
-            if ( pmd_val(*pmd) & _PAGE_PRESENT )
-                continue;
-
-            pte_base = pte = (pte_t *) __get_free_page(GFP_KERNEL);
-
-            for (k = 0; k < PTRS_PER_PTE; pte++, k++) {
-                vaddr = i*PGDIR_SIZE + j*PMD_SIZE + k*PAGE_SIZE;
-                if (end && (vaddr >= end))
-                    break;
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
-                *pte = mk_pte(virt_to_page(vaddr), PAGE_KERNEL);
-#else
-               *pte = mk_pte_phys(__pa(vaddr), PAGE_KERNEL);
-#endif
-            }
-            kpgd = pgd_offset_k((unsigned long)pte_base);
-            kpmd = pmd_offset(kpgd, (unsigned long)pte_base);
-            kpte = pte_offset_kernel(kpmd, (unsigned long)pte_base);
-            queue_l1_entry_update(kpte,
-                                  (*(unsigned long *)kpte)&~_PAGE_RW);
-            set_pmd(pmd, __pmd(_KERNPG_TABLE + __pa(pte_base)));
-            XEN_flush_page_update_queue();
-        }
-    }
-}
-
-/*
- * claim_new_pages() asks xen to increase this domain's memory  reservation
- * and return a list of the new pages of memory.  This new pages are
- * added to the free list of the memory manager.
- *
- * Available RAM does not normally change while Linux runs.  To make this work,
- * the linux mem= boottime command line param must say how big memory could
- * possibly grow.  Then setup_arch() in arch/xen/kernel/setup.c
- * sets max_pfn, max_low_pfn and the zones according to
- * this max memory size.   The page tables themselves can only be
- * extended after xen has assigned new pages to this domain.
- */
-
-static unsigned long
-claim_new_pages(unsigned long num_pages)
-{
-    unsigned long new_page_cnt, pfn;
-    unsigned long * parray, *curr;
-
-    if (most_seen_pages+num_pages> max_pfn)
-        num_pages = max_pfn-most_seen_pages;
-    if (num_pages==0) return -EINVAL;
-
-    parray = (unsigned long *)vmalloc(num_pages * sizeof(unsigned long));
-    if ( parray == NULL )
-    {
-        printk(KERN_ERR "claim_new_pages: Unable to vmalloc parray\n");
-        return 0;
-    }
-
-    new_page_cnt = HYPERVISOR_dom_mem_op(MEMOP_increase_reservation, 
-                                parray, num_pages, 0);
-    if ( new_page_cnt != num_pages )
-    {
-        printk(KERN_WARNING
-            "claim_new_pages: xen granted only %lu of %lu requested pages\n",
-            new_page_cnt, num_pages);
-
-        /* 
-         * Avoid xen lockup when user forgot to setdomainmaxmem. Xen
-         * usually can dribble out a few pages and then hangs.
-         */
-        if ( new_page_cnt < 1000 )
-        {
-            printk(KERN_WARNING "Remember to use setdomainmaxmem\n");
-            HYPERVISOR_dom_mem_op(MEMOP_decrease_reservation, 
-                                parray, new_page_cnt, 0);
-            return -EFAULT;
-        }
-    }
-    memcpy(phys_to_machine_mapping+most_seen_pages, parray,
-           new_page_cnt * sizeof(unsigned long));
-
-    pagetable_extend(most_seen_pages,new_page_cnt);
-
-    for ( pfn = most_seen_pages, curr = parray;
-          pfn < most_seen_pages+new_page_cnt;
-          pfn++, curr++ )
-    {
-        struct page *page = mem_map + pfn;
-
-#ifndef CONFIG_HIGHMEM
-        if ( pfn>=max_low_pfn )
-        {
-            printk(KERN_WARNING "Warning only %ldMB will be used.\n",
-               pfn>>PAGE_TO_MB_SHIFT);
-            printk(KERN_WARNING "Use a HIGHMEM enabled kernel.\n");
-            break;
-        }
-#endif
-        queue_machphys_update(*curr, pfn);
-        if ( pfn < max_low_pfn )
-            queue_l1_entry_update(
-                get_ptep((unsigned long)__va(pfn << PAGE_SHIFT)),
-                ((*curr) << PAGE_SHIFT) | pgprot_val(PAGE_KERNEL));
-        
-        XEN_flush_page_update_queue();
-        
-        /* this next bit mimics arch/xen/mm/init.c:one_highpage_init() */
-        ClearPageReserved(page);
-        if ( pfn >= max_low_pfn )
-            set_bit(PG_highmem, &page->flags);
-        set_page_count(page, 1);
-        __free_page(page);
-    }
-
-    vfree(parray);
-
-    return new_page_cnt;
-}
-
-
 static int balloon_try_target(int target)
 {
     int change, reclaim;
 
     if ( target < current_pages )
     {
-        int change = inflate_balloon(current_pages-target);
-        if ( change <= 0 )
+        if ( (change = inflate_balloon(current_pages-target)) <= 0 )
             return change;
-
         current_pages -= change;
         printk(KERN_INFO "Relinquish %dMB to xen. Domain now has %luMB\n",
             change>>PAGE_TO_MB_SHIFT, current_pages>>PAGE_TO_MB_SHIFT);
     }
-    else if ( target > current_pages )
+    else if ( (reclaim = target - current_pages) > 0 )
     {
-        reclaim = min((unsigned long)target,most_seen_pages) - current_pages;
-
-        if ( reclaim )
-        {
-            change = deflate_balloon( reclaim );
-            if ( change <= 0 )
-                return change;
-            current_pages += change;
-            printk(KERN_INFO "Reclaim %dMB from xen. Domain now has %luMB\n",
-                change>>PAGE_TO_MB_SHIFT, current_pages>>PAGE_TO_MB_SHIFT);
-        }
-
-        if ( most_seen_pages < target )
-        {
-            int growth = claim_new_pages(target-most_seen_pages);
-            if ( growth <= 0 )
-                return growth;
-            most_seen_pages += growth;
-            current_pages += growth;
-            printk(KERN_INFO "Granted %dMB new mem. Dom now has %luMB\n",
-                growth>>PAGE_TO_MB_SHIFT, current_pages>>PAGE_TO_MB_SHIFT);
-        }
+        if ( (change = deflate_balloon(reclaim)) <= 0 )
+            return change;
+        current_pages += change;
+        printk(KERN_INFO "Reclaim %dMB from xen. Domain now has %luMB\n",
+               change>>PAGE_TO_MB_SHIFT, current_pages>>PAGE_TO_MB_SHIFT);
     }
 
     return 1;
@@ -640,12 +429,15 @@ static int balloon_read(char *page, char **start, off_t off,
 
 static int __init balloon_init(void)
 {
-    printk(KERN_ALERT "Starting Xen Balloon driver\n");
+    unsigned long pfn;
+    struct page *page;
+
+    IPRINTK("Initialising balloon driver.\n");
 
-    most_seen_pages = current_pages = min(xen_start_info.nr_pages,max_pfn);
+    current_pages = min(xen_start_info.nr_pages, max_pfn);
     if ( (balloon_pde = create_xen_proc_entry("memory_target", 0644)) == NULL )
     {
-        printk(KERN_ALERT "Unable to create balloon driver proc entry!");
+        WPRINTK("Unable to create balloon driver proc entry!");
         return -1;
     }
 
@@ -661,18 +453,12 @@ static int __init balloon_init(void)
     (void)ctrl_if_register_receiver(CMSG_MEM_REQUEST, balloon_ctrlif_rx,
                                     CALLBACK_IN_BLOCKING_CONTEXT);
 
-    /* 
-     * make_module a new phys map if mem= says xen can give us memory  to grow
-     */
-    if ( max_pfn > xen_start_info.nr_pages )
+    /* Initialise the balloon with excess memory space. */
+    for ( pfn = xen_start_info.nr_pages; pfn < max_pfn; pfn++ )
     {
-        extern unsigned long *phys_to_machine_mapping;
-        unsigned long *newmap;
-        newmap = (unsigned long *)vmalloc(max_pfn * sizeof(unsigned long));
-        memset(newmap, ~0, max_pfn * sizeof(unsigned long));
-        memcpy(newmap, phys_to_machine_mapping,
-               xen_start_info.nr_pages * sizeof(unsigned long));
-        phys_to_machine_mapping = newmap;
+        page = &mem_map[pfn];
+        if ( !PageReserved(page) )
+            balloon_append(page);
     }
 
     return 0;
index 235c4fc5a6d0c0eb9640812b6ef96804a3bf340d..85f0f1162cbb3e234260ba051bee725a015b8b49 100644 (file)
@@ -59,7 +59,6 @@ void do_hypervisor_callback(struct pt_regs *regs);
 #define PROT_ON  1
 #define PROT_OFF 0
 void /* __init */ protect_page(pgd_t *dpgd, void *page, int mode);
-void /* __init */ protect_pagetable(pgd_t *dpgd, pgd_t *spgd, int mode);
 
 /* arch/xen/i386/kernel/head.S */
 void lgdt_finish(void);