ia64/xen-unstable

changeset 3221:1a3005f838bc

bitkeeper revision 1.1159.183.35 (41acab40rFEtjpNAJkBrPgWMpUHO5w)

Merge scramble.cl.cam.ac.uk:/auto/groups/xeno-xenod/BK/xen-2.0-testing.bk
into scramble.cl.cam.ac.uk:/auto/groups/xeno-xenod/BK/xen-unstable.bk
author kaf24@scramble.cl.cam.ac.uk
date Tue Nov 30 17:17:52 2004 +0000 (2004-11-30)
parents d7e53f4de825 6f0846972a4c
children 89652683e374
files linux-2.4.28-xen-sparse/arch/xen/kernel/setup.c linux-2.4.28-xen-sparse/arch/xen/mm/init.c linux-2.6.9-xen-sparse/arch/xen/i386/kernel/setup.c linux-2.6.9-xen-sparse/arch/xen/i386/mm/init.c linux-2.6.9-xen-sparse/drivers/xen/balloon/balloon.c linux-2.6.9-xen-sparse/include/asm-xen/hypervisor.h xen/arch/x86/x86_32/entry.S
line diff
     1.1 --- a/linux-2.4.28-xen-sparse/arch/xen/kernel/setup.c	Tue Nov 30 09:01:47 2004 +0000
     1.2 +++ b/linux-2.4.28-xen-sparse/arch/xen/kernel/setup.c	Tue Nov 30 17:17:52 2004 +0000
     1.3 @@ -275,7 +275,8 @@ void __init setup_arch(char **cmdline_p)
     1.4       * arch/xen/drivers/balloon/balloon.c
     1.5       */
     1.6      mem_param = parse_mem_cmdline(cmdline_p);
     1.7 -    if (!mem_param) mem_param = xen_start_info.nr_pages;
     1.8 +    if (mem_param < xen_start_info.nr_pages)
     1.9 +        mem_param = xen_start_info.nr_pages;
    1.10  
    1.11  #define PFN_UP(x)	(((x) + PAGE_SIZE-1) >> PAGE_SHIFT)
    1.12  #define PFN_DOWN(x)	((x) >> PAGE_SHIFT)
    1.13 @@ -303,6 +304,7 @@ void __init setup_arch(char **cmdline_p)
    1.14              printk(KERN_WARNING "Use a PAE enabled kernel.\n");
    1.15          else
    1.16              printk(KERN_WARNING "Use a HIGHMEM enabled kernel.\n");
    1.17 +        max_pfn = lmax_low_pfn;
    1.18  #else /* !CONFIG_HIGHMEM */
    1.19  #ifndef CONFIG_X86_PAE
    1.20          if (max_pfn > MAX_NONPAE_PFN) {
    1.21 @@ -350,8 +352,6 @@ void __init setup_arch(char **cmdline_p)
    1.22       */
    1.23      max_low_pfn = lmax_low_pfn;
    1.24  
    1.25 -
    1.26 -
    1.27  #ifdef CONFIG_BLK_DEV_INITRD
    1.28      if ( xen_start_info.mod_start != 0 )
    1.29      {
    1.30 @@ -375,6 +375,20 @@ void __init setup_arch(char **cmdline_p)
    1.31  
    1.32      paging_init();
    1.33  
    1.34 +    /* Make sure we have a large enough P->M table. */
    1.35 +    if ( max_pfn > xen_start_info.nr_pages )
    1.36 +    {
    1.37 +        phys_to_machine_mapping = alloc_bootmem_low_pages(
    1.38 +            max_pfn * sizeof(unsigned long));
    1.39 +        memset(phys_to_machine_mapping, ~0, max_pfn * sizeof(unsigned long));
    1.40 +        memcpy(phys_to_machine_mapping,
    1.41 +               (unsigned long *)xen_start_info.mfn_list,
    1.42 +               xen_start_info.nr_pages * sizeof(unsigned long));
    1.43 +        free_bootmem(__pa(xen_start_info.mfn_list), 
    1.44 +                     PFN_PHYS(PFN_UP(xen_start_info.nr_pages *
    1.45 +                                     sizeof(unsigned long))));
    1.46 +    }
    1.47 +
    1.48      pfn_to_mfn_frame_list = alloc_bootmem_low_pages(PAGE_SIZE);
    1.49      for ( i=0, j=0; i < max_pfn; i+=(PAGE_SIZE/sizeof(unsigned long)), j++ )
    1.50      {	
     2.1 --- a/linux-2.4.28-xen-sparse/arch/xen/mm/init.c	Tue Nov 30 09:01:47 2004 +0000
     2.2 +++ b/linux-2.4.28-xen-sparse/arch/xen/mm/init.c	Tue Nov 30 17:17:52 2004 +0000
     2.3 @@ -213,23 +213,16 @@ static void __init fixrange_init (unsign
     2.4  
     2.5  static void __init pagetable_init (void)
     2.6  {
     2.7 -    unsigned long vaddr, end;
     2.8 +    unsigned long vaddr, end, ram_end;
     2.9      pgd_t *kpgd, *pgd, *pgd_base;
    2.10      int i, j, k;
    2.11      pmd_t *kpmd, *pmd;
    2.12      pte_t *kpte, *pte, *pte_base;
    2.13  
    2.14 -    /* create tables only for boot_pfn frames.  max_low_pfn may be sized for
    2.15 -     * pages yet to be allocated from the hypervisor, or it may be set
    2.16 -     * to override the xen_start_info amount of memory
    2.17 -     */
    2.18 -    int boot_pfn = min(xen_start_info.nr_pages,max_low_pfn);
    2.19 -
    2.20 -    /*
    2.21 -     * This can be zero as well - no problem, in that case we exit
    2.22 -     * the loops anyway due to the PTRS_PER_* conditions.
    2.23 -     */
    2.24 -    end = (unsigned long)__va(boot_pfn *PAGE_SIZE);
    2.25 +    end     = (unsigned long)__va(max_low_pfn * PAGE_SIZE);
    2.26 +    ram_end = (unsigned long)__va(xen_start_info.nr_pages * PAGE_SIZE);
    2.27 +    if ( ram_end > end )
    2.28 +        ram_end = end;
    2.29  
    2.30      pgd_base = init_mm.pgd;
    2.31      i = __pgd_offset(PAGE_OFFSET);
    2.32 @@ -237,12 +230,12 @@ static void __init pagetable_init (void)
    2.33  
    2.34      for (; i < PTRS_PER_PGD; pgd++, i++) {
    2.35          vaddr = i*PGDIR_SIZE;
    2.36 -        if (end && (vaddr >= end))
    2.37 +        if (vaddr >= end)
    2.38              break;
    2.39          pmd = (pmd_t *)pgd;
    2.40          for (j = 0; j < PTRS_PER_PMD; pmd++, j++) {
    2.41              vaddr = i*PGDIR_SIZE + j*PMD_SIZE;
    2.42 -            if (end && (vaddr >= end))
    2.43 +            if (vaddr >= end)
    2.44                  break;
    2.45  
    2.46              /* Filled in for us already? */
    2.47 @@ -250,10 +243,11 @@ static void __init pagetable_init (void)
    2.48                  continue;
    2.49  
    2.50              pte_base = pte = (pte_t *) alloc_bootmem_low_pages(PAGE_SIZE);
    2.51 +            clear_page(pte_base);
    2.52  
    2.53              for (k = 0; k < PTRS_PER_PTE; pte++, k++) {
    2.54                  vaddr = i*PGDIR_SIZE + j*PMD_SIZE + k*PAGE_SIZE;
    2.55 -                if (end && (vaddr >= end))
    2.56 +                if (vaddr >= ram_end)
    2.57                      break;
    2.58                  *pte = mk_pte_phys(__pa(vaddr), PAGE_KERNEL);
    2.59              }
    2.60 @@ -329,28 +323,14 @@ static inline int page_is_ram (unsigned 
    2.61      return 1;
    2.62  }
    2.63  
    2.64 -static inline int page_kills_ppro(unsigned long pagenr)
    2.65 -{
    2.66 -    return 0;
    2.67 -}
    2.68 -
    2.69  #ifdef CONFIG_HIGHMEM
    2.70 -void __init one_highpage_init(struct page *page, int pfn, int bad_ppro)
    2.71 +void __init one_highpage_init(struct page *page, int free_page)
    2.72  {
    2.73 -    if (!page_is_ram(pfn)) {
    2.74 -        SetPageReserved(page);
    2.75 -        return;
    2.76 -    }
    2.77 -	
    2.78 -    if (bad_ppro && page_kills_ppro(pfn)) {
    2.79 -        SetPageReserved(page);
    2.80 -        return;
    2.81 -    }
    2.82 -	
    2.83      ClearPageReserved(page);
    2.84      set_bit(PG_highmem, &page->flags);
    2.85      atomic_set(&page->count, 1);
    2.86 -    __free_page(page);
    2.87 +    if ( free_page )
    2.88 +        __free_page(page);
    2.89      totalhigh_pages++;
    2.90  }
    2.91  #endif /* CONFIG_HIGHMEM */
    2.92 @@ -392,8 +372,9 @@ static int __init free_pages_init(void)
    2.93              reservedpages++;
    2.94      }
    2.95  #ifdef CONFIG_HIGHMEM
    2.96 -    for (pfn = xen_start_info.nr_pages-1; pfn >= highstart_pfn; pfn--)
    2.97 -        one_highpage_init((struct page *) (mem_map + pfn), pfn, bad_ppro);
    2.98 +    for (pfn = highend_pfn-1; pfn >= highstart_pfn; pfn--)
    2.99 +        one_highpage_init((struct page *) (mem_map + pfn), pfn,
   2.100 +                          (pfn < xen_start_info.nr_pages));
   2.101      totalram_pages += totalhigh_pages;
   2.102  #endif
   2.103      return reservedpages;
     3.1 --- a/linux-2.6.9-xen-sparse/arch/xen/i386/kernel/setup.c	Tue Nov 30 09:01:47 2004 +0000
     3.2 +++ b/linux-2.6.9-xen-sparse/arch/xen/i386/kernel/setup.c	Tue Nov 30 17:17:52 2004 +0000
     3.3 @@ -52,6 +52,9 @@
     3.4  #include "setup_arch_pre.h"
     3.5  #include <bios_ebda.h>
     3.6  
     3.7 +/* Allows setting of maximum possible memory size  */
     3.8 +static unsigned long xen_override_max_pfn;
     3.9 +
    3.10  int disable_pse __initdata = 0;
    3.11  
    3.12  /*
    3.13 @@ -718,8 +721,13 @@ static void __init parse_cmdline_early (
    3.14  				unsigned long long mem_size;
    3.15   
    3.16  				mem_size = memparse(from+4, &from);
    3.17 +#if 0
    3.18  				limit_regions(mem_size);
    3.19  				userdef=1;
    3.20 +#else
    3.21 +				xen_override_max_pfn =
    3.22 +					(unsigned long)(mem_size>>PAGE_SHIFT);
    3.23 +#endif
    3.24  			}
    3.25  		}
    3.26  
    3.27 @@ -857,6 +865,7 @@ static void __init parse_cmdline_early (
    3.28  	}
    3.29  }
    3.30  
    3.31 +#if 0 /* !XEN */
    3.32  /*
    3.33   * Callback for efi_memory_walk.
    3.34   */
    3.35 @@ -873,7 +882,6 @@ efi_find_max_pfn(unsigned long start, un
    3.36  	return 0;
    3.37  }
    3.38  
    3.39 -
    3.40  /*
    3.41   * Find the highest page frame number we have available
    3.42   */
    3.43 @@ -900,6 +908,15 @@ void __init find_max_pfn(void)
    3.44  			max_pfn = end;
    3.45  	}
    3.46  }
    3.47 +#else
    3.48 +/* We don't use the fake e820 because we need to respond to user override. */
    3.49 +void __init find_max_pfn(void)
    3.50 +{
    3.51 +	if ( xen_override_max_pfn < xen_start_info.nr_pages )
    3.52 +		xen_override_max_pfn = xen_start_info.nr_pages;
    3.53 +	max_pfn = xen_override_max_pfn;
    3.54 +}
    3.55 +#endif /* XEN */
    3.56  
    3.57  /*
    3.58   * Determine low and high memory ranges:
    3.59 @@ -1414,6 +1431,21 @@ void __init setup_arch(char **cmdline_p)
    3.60  #endif
    3.61  	paging_init();
    3.62  
    3.63 +	/* Make sure we have a large enough P->M table. */
    3.64 +	if (max_pfn > xen_start_info.nr_pages) {
    3.65 +		phys_to_machine_mapping = alloc_bootmem_low_pages(
    3.66 +			max_pfn * sizeof(unsigned long));
    3.67 +		memset(phys_to_machine_mapping, ~0,
    3.68 +			max_pfn * sizeof(unsigned long));
    3.69 +		memcpy(phys_to_machine_mapping,
    3.70 +			(unsigned long *)xen_start_info.mfn_list,
    3.71 +			xen_start_info.nr_pages * sizeof(unsigned long));
    3.72 +		free_bootmem(
    3.73 +			__pa(xen_start_info.mfn_list), 
    3.74 +			PFN_PHYS(PFN_UP(xen_start_info.nr_pages *
    3.75 +			sizeof(unsigned long))));
    3.76 +	}
    3.77 +
    3.78  	pfn_to_mfn_frame_list = alloc_bootmem_low_pages(PAGE_SIZE);
    3.79  	for ( i=0, j=0; i < max_pfn; i+=(PAGE_SIZE/sizeof(unsigned long)), j++ )
    3.80  	{	
     4.1 --- a/linux-2.6.9-xen-sparse/arch/xen/i386/mm/init.c	Tue Nov 30 09:01:47 2004 +0000
     4.2 +++ b/linux-2.6.9-xen-sparse/arch/xen/i386/mm/init.c	Tue Nov 30 17:17:52 2004 +0000
     4.3 @@ -77,6 +77,12 @@ static pte_t * __init one_page_table_ini
     4.4  {
     4.5  	if (pmd_none(*pmd)) {
     4.6  		pte_t *page_table = (pte_t *) alloc_bootmem_low_pages(PAGE_SIZE);
     4.7 +		/* XEN: Make the new p.t. read-only. */
     4.8 +		pgd_t *kpgd = pgd_offset_k((unsigned long)page_table);
     4.9 +		pmd_t *kpmd = pmd_offset(kpgd, (unsigned long)page_table);
    4.10 +		pte_t *kpte = pte_offset_kernel(kpmd, (unsigned long)page_table);
    4.11 +		xen_l1_entry_update(
    4.12 +			kpte, (*(unsigned long *)kpte)&~_PAGE_RW);
    4.13  		set_pmd(pmd, __pmd(__pa(page_table) | _PAGE_TABLE));
    4.14  		if (page_table != pte_offset_kernel(pmd, 0))
    4.15  			BUG();	
    4.16 @@ -141,25 +147,6 @@ void __init protect_page(pgd_t *pgd, voi
    4.17  					pte_val_ma(*pte) | _PAGE_RW);
    4.18  }
    4.19  
    4.20 -void __init protect_pagetable(pgd_t *dpgd, pgd_t *spgd, int mode)
    4.21 -{
    4.22 -	pmd_t *pmd;
    4.23 -	pte_t *pte;
    4.24 -	int pgd_idx, pmd_idx;
    4.25 -
    4.26 -	protect_page(dpgd, spgd, mode);
    4.27 -
    4.28 -	for (pgd_idx = 0; pgd_idx < PTRS_PER_PGD_NO_HV; spgd++, pgd_idx++) {
    4.29 -		pmd = pmd_offset(spgd, 0);
    4.30 -		if (pmd_none(*pmd))
    4.31 -			continue;
    4.32 -		for (pmd_idx = 0; pmd_idx < PTRS_PER_PMD; pmd++, pmd_idx++) {
    4.33 -			pte = pte_offset_kernel(pmd, 0);
    4.34 -			protect_page(dpgd, pte, mode);
    4.35 -		}
    4.36 -	}
    4.37 -}
    4.38 -
    4.39  static inline int is_kernel_text(unsigned long addr)
    4.40  {
    4.41  	if (addr >= (unsigned long)_stext && addr <= (unsigned long)__init_end)
    4.42 @@ -180,6 +167,10 @@ static void __init kernel_physical_mappi
    4.43  	pte_t *pte;
    4.44  	int pgd_idx, pmd_idx, pte_ofs;
    4.45  
    4.46 +	unsigned long max_ram_pfn = xen_start_info.nr_pages;
    4.47 +	if (max_ram_pfn > max_low_pfn)
    4.48 +		max_ram_pfn = max_low_pfn;
    4.49 +
    4.50  	pgd_idx = pgd_index(PAGE_OFFSET);
    4.51  	pgd = pgd_base + pgd_idx;
    4.52  	pfn = 0;
    4.53 @@ -207,7 +198,10 @@ static void __init kernel_physical_mappi
    4.54  				pte = one_page_table_init(pmd);
    4.55  
    4.56  				pte += pte_ofs;
    4.57 -				for (; pte_ofs < PTRS_PER_PTE && pfn < max_low_pfn; pte++, pfn++, pte_ofs++) {
    4.58 +				/* XEN: Only map initial RAM allocation. */
    4.59 +				for (; pte_ofs < PTRS_PER_PTE && pfn < max_ram_pfn; pte++, pfn++, pte_ofs++) {
    4.60 +						if (pte_present(*pte))
    4.61 +							continue;
    4.62  						if (is_kernel_text(address))
    4.63  							set_pte(pte, pfn_pte(pfn, PAGE_KERNEL_EXEC));
    4.64  						else
    4.65 @@ -311,7 +305,8 @@ void __init one_highpage_init(struct pag
    4.66  		ClearPageReserved(page);
    4.67  		set_bit(PG_highmem, &page->flags);
    4.68  		set_page_count(page, 1);
    4.69 -		__free_page(page);
    4.70 +		if (pfn < xen_start_info.nr_pages)
    4.71 +			__free_page(page);
    4.72  		totalhigh_pages++;
    4.73  	} else
    4.74  		SetPageReserved(page);
    4.75 @@ -347,7 +342,8 @@ extern void __init remap_numa_kva(void);
    4.76  static void __init pagetable_init (void)
    4.77  {
    4.78  	unsigned long vaddr;
    4.79 -	pgd_t *pgd_base = swapper_pg_dir;
    4.80 +	pgd_t *old_pgd = (pgd_t *)xen_start_info.pt_base;
    4.81 +	pgd_t *new_pgd = swapper_pg_dir;
    4.82  
    4.83  #ifdef CONFIG_X86_PAE
    4.84  	int i;
    4.85 @@ -368,7 +364,22 @@ static void __init pagetable_init (void)
    4.86  		__PAGE_KERNEL_EXEC |= _PAGE_GLOBAL;
    4.87  	}
    4.88  
    4.89 -	kernel_physical_mapping_init(pgd_base);
    4.90 +	/*
    4.91 +	 * Switch to proper mm_init page directory. Initialise from the current
    4.92 +	 * page directory, write-protect the new page directory, then switch to
    4.93 +	 * it. We clean up by write-enabling and then freeing the old page dir.
    4.94 +	 */
    4.95 +	memcpy(new_pgd, old_pgd, PTRS_PER_PGD_NO_HV*sizeof(pgd_t));
    4.96 +	protect_page(new_pgd, new_pgd, PROT_ON);
    4.97 +	queue_pgd_pin(__pa(new_pgd));
    4.98 +	load_cr3(new_pgd);
    4.99 +	queue_pgd_unpin(__pa(old_pgd));
   4.100 +	__flush_tlb_all(); /* implicit flush */
   4.101 +	protect_page(new_pgd, old_pgd, PROT_OFF);
   4.102 +	flush_page_update_queue();
   4.103 +	free_bootmem(__pa(old_pgd), PAGE_SIZE);
   4.104 +
   4.105 +	kernel_physical_mapping_init(new_pgd);
   4.106  	remap_numa_kva();
   4.107  
   4.108  	/*
   4.109 @@ -376,9 +387,9 @@ static void __init pagetable_init (void)
   4.110  	 * created - mappings will be set by set_fixmap():
   4.111  	 */
   4.112  	vaddr = __fix_to_virt(__end_of_fixed_addresses - 1) & PMD_MASK;
   4.113 -	page_table_range_init(vaddr, 0, pgd_base);
   4.114 +	page_table_range_init(vaddr, 0, new_pgd);
   4.115  
   4.116 -	permanent_kmaps_init(pgd_base);
   4.117 +	permanent_kmaps_init(new_pgd);
   4.118  
   4.119  #ifdef CONFIG_X86_PAE
   4.120  	/*
   4.121 @@ -388,7 +399,7 @@ static void __init pagetable_init (void)
   4.122  	 * All user-space mappings are explicitly cleared after
   4.123  	 * SMP startup.
   4.124  	 */
   4.125 -	pgd_base[0] = pgd_base[USER_PTRS_PER_PGD];
   4.126 +	new_pgd[0] = new_pgd[USER_PTRS_PER_PGD];
   4.127  #endif
   4.128  }
   4.129  
   4.130 @@ -545,8 +556,6 @@ out:
   4.131   */
   4.132  void __init paging_init(void)
   4.133  {
   4.134 -	pgd_t *old_pgd = (pgd_t *)xen_start_info.pt_base;
   4.135 -	pgd_t *new_pgd = swapper_pg_dir;
   4.136  #ifdef CONFIG_XEN_PHYSDEV_ACCESS
   4.137  	int i;
   4.138  #endif
   4.139 @@ -559,25 +568,6 @@ void __init paging_init(void)
   4.140  
   4.141  	pagetable_init();
   4.142  
   4.143 -	/*
   4.144 -	 * Write-protect both page tables within both page tables.
   4.145 -	 * That's three ops, as the old p.t. is already protected
   4.146 -	 * within the old p.t. Then pin the new table, switch tables,
   4.147 -	 * and unprotect the old table.
   4.148 -	 */
   4.149 -	protect_pagetable(new_pgd, old_pgd, PROT_ON);
   4.150 -	protect_pagetable(new_pgd, new_pgd, PROT_ON);
   4.151 -	protect_pagetable(old_pgd, new_pgd, PROT_ON);
   4.152 -	queue_pgd_pin(__pa(new_pgd));
   4.153 -	load_cr3(new_pgd);
   4.154 -	queue_pgd_unpin(__pa(old_pgd));
   4.155 -	__flush_tlb_all(); /* implicit flush */
   4.156 -	protect_pagetable(new_pgd, old_pgd, PROT_OFF);
   4.157 -	flush_page_update_queue();
   4.158 -
   4.159 -	/* Completely detached from old tables, so free them. */
   4.160 -	free_bootmem(__pa(old_pgd), xen_start_info.nr_pt_frames << PAGE_SHIFT);
   4.161 -
   4.162  #ifdef CONFIG_X86_PAE
   4.163  	/*
   4.164  	 * We will bail out later - printk doesn't work right now so
     5.1 --- a/linux-2.6.9-xen-sparse/drivers/xen/balloon/balloon.c	Tue Nov 30 09:01:47 2004 +0000
     5.2 +++ b/linux-2.6.9-xen-sparse/drivers/xen/balloon/balloon.c	Tue Nov 30 17:17:52 2004 +0000
     5.3 @@ -4,6 +4,7 @@
     5.4   * Xen balloon driver - enables returning/claiming memory to/from Xen.
     5.5   *
     5.6   * Copyright (c) 2003, B Dragovic
     5.7 + * Copyright (c) 2003-2004, M Williamson, K Fraser
     5.8   * 
     5.9   * This file may be distributed separately from the Linux kernel, or
    5.10   * incorporated into other software packages, subject to the following license:
    5.11 @@ -48,19 +49,10 @@
    5.12  #include <asm/tlb.h>
    5.13  #include <linux/list.h>
    5.14  
    5.15 -/* USER DEFINES -- THESE SHOULD BE COPIED TO USER-SPACE TOOLS */
    5.16 -#define USER_INFLATE_BALLOON  1   /* return mem to hypervisor */
    5.17 -#define USER_DEFLATE_BALLOON  2   /* claim mem from hypervisor */
    5.18 -typedef struct user_balloon_op {
    5.19 -    unsigned int  op;
    5.20 -    unsigned long size;
    5.21 -} user_balloon_op_t;
    5.22 -/* END OF USER DEFINE */
    5.23 -
    5.24  static struct proc_dir_entry *balloon_pde;
    5.25  
    5.26  unsigned long credit;
    5.27 -static unsigned long current_pages, most_seen_pages;
    5.28 +static unsigned long current_pages;
    5.29  
    5.30  #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
    5.31  /* Use the private and mapping fields of struct page as a list. */
    5.32 @@ -78,71 +70,66 @@ static unsigned long current_pages, most
    5.33  #define pte_offset_kernel pte_offset
    5.34  #endif
    5.35  
    5.36 +#define IPRINTK(fmt, args...) \
    5.37 +    printk(KERN_INFO "xen_mem: " fmt, ##args)
    5.38 +#define WPRINTK(fmt, args...) \
    5.39 +    printk(KERN_WARNING "xen_mem: " fmt, ##args)
    5.40 +
    5.41  /* List of ballooned pages, threaded through the mem_map array. */
    5.42  LIST_HEAD(ballooned_pages);
    5.43  
    5.44 -/** add_ballooned_page - remember we've ballooned a pfn */
    5.45 -void add_ballooned_page(unsigned long pfn)
    5.46 +/* balloon_append: add the given page to the balloon. */
    5.47 +void balloon_append(struct page *page)
    5.48  {
    5.49 -    struct page *p = mem_map + pfn;
    5.50 -
    5.51 -    list_add(PAGE_TO_LIST(p), &ballooned_pages);
    5.52 +    list_add(PAGE_TO_LIST(page), &ballooned_pages);
    5.53  }
    5.54  
    5.55 -/* rem_ballooned_page - recall a ballooned page and remove from list. */
    5.56 -struct page *rem_ballooned_page(void)
    5.57 +/* balloon_retrieve: rescue a page from the balloon, if it is not empty. */
    5.58 +struct page *balloon_retrieve(void)
    5.59  {
    5.60 -    if(!list_empty(&ballooned_pages))
    5.61 -    {
    5.62 -        struct page *ret;
    5.63 +    struct page *page;
    5.64  
    5.65 -        ret = LIST_TO_PAGE(ballooned_pages.next);
    5.66 -	UNLIST_PAGE(ret);
    5.67 +    if ( list_empty(&ballooned_pages) )
    5.68 +        return NULL;
    5.69  
    5.70 -        return ret;
    5.71 -    }
    5.72 -    else
    5.73 -        return NULL;
    5.74 +    page = LIST_TO_PAGE(ballooned_pages.next);
    5.75 +    UNLIST_PAGE(page);
    5.76 +    return page;
    5.77  }
    5.78  
    5.79  static inline pte_t *get_ptep(unsigned long addr)
    5.80  {
    5.81 -    pgd_t *pgd; pmd_t *pmd; pte_t *ptep;
    5.82 +    pgd_t *pgd;
    5.83 +    pmd_t *pmd;
    5.84 +
    5.85      pgd = pgd_offset_k(addr);
    5.86 -
    5.87      if ( pgd_none(*pgd) || pgd_bad(*pgd) ) BUG();
    5.88  
    5.89      pmd = pmd_offset(pgd, addr);
    5.90      if ( pmd_none(*pmd) || pmd_bad(*pmd) ) BUG();
    5.91  
    5.92 -    ptep = pte_offset_kernel(pmd, addr);
    5.93 -
    5.94 -    return ptep;
    5.95 +    return pte_offset_kernel(pmd, addr);
    5.96  }
    5.97  
    5.98  /* Main function for relinquishing memory. */
    5.99  static unsigned long inflate_balloon(unsigned long num_pages)
   5.100 -
   5.101  {
   5.102 -    unsigned long *parray;
   5.103 -    unsigned long *currp;
   5.104 -    unsigned long curraddr;
   5.105 -    unsigned long ret = 0;
   5.106 -    unsigned long i, j;
   5.107 +    unsigned long *parray, *currp, curraddr, ret = 0, i, j, mfn, pfn;
   5.108 +    struct page *page;
   5.109  
   5.110      parray = (unsigned long *)vmalloc(num_pages * sizeof(unsigned long));
   5.111      if ( parray == NULL )
   5.112      {
   5.113 -        printk(KERN_ERR "inflate_balloon: Unable to vmalloc parray\n");
   5.114 -        return -EFAULT;
   5.115 +        WPRINTK("inflate_balloon: Unable to vmalloc parray\n");
   5.116 +        return -ENOMEM;
   5.117      }
   5.118  
   5.119      currp = parray;
   5.120  
   5.121      for ( i = 0; i < num_pages; i++, currp++ )
   5.122      {
   5.123 -        struct page *page = alloc_page(GFP_HIGHUSER);
   5.124 -        unsigned long pfn = page - mem_map;
   5.125 +        page = alloc_page(GFP_HIGHUSER);
   5.126 +        pfn  = page - mem_map;
   5.127  
   5.128          /* If allocation fails then free all reserved pages. */
   5.129          if ( page == NULL )
   5.130 @@ -160,10 +147,9 @@ static unsigned long inflate_balloon(uns
   5.131          *currp = pfn;
   5.132      }
   5.133  
   5.134 -
   5.135      for ( i = 0, currp = parray; i < num_pages; i++, currp++ )
   5.136      {
   5.137 -        unsigned long mfn = phys_to_machine_mapping[*currp];
   5.138 +        mfn      = phys_to_machine_mapping[*currp];
   5.139          curraddr = (unsigned long)page_address(mem_map + *currp);
   5.140          /* Blow away page contents for security, and also p.t. ref if any. */
   5.141          if ( curraddr != 0 )
   5.142 @@ -180,7 +166,7 @@ static unsigned long inflate_balloon(uns
   5.143          }
   5.144  #endif
   5.145  
   5.146 -        add_ballooned_page(*currp);
   5.147 +        balloon_append(&mem_map[*currp]);
   5.148  
   5.149          phys_to_machine_mapping[*currp] = INVALID_P2M_ENTRY;
   5.150          *currp = mfn;
   5.151 @@ -206,62 +192,45 @@ static unsigned long inflate_balloon(uns
   5.152      return ret;
   5.153  }
   5.154  
   5.155 -/*
   5.156 - * Install new mem pages obtained by deflate_balloon. function walks 
   5.157 - * phys->machine mapping table looking for DEAD entries and populates
   5.158 - * them.
   5.159 - */
   5.160 -static unsigned long process_returned_pages(unsigned long * parray, 
   5.161 -                                       unsigned long num)
   5.162 +/* Install a set of new pages (@mfn_list, @nr_mfns) into the memory map. */
   5.163 +static unsigned long process_returned_pages(
   5.164 +    unsigned long *mfn_list, unsigned long nr_mfns)
   5.165  {
   5.166 -    /* currently, this function is rather simplistic as 
   5.167 -     * it is assumed that domain reclaims only number of 
   5.168 -     * pages previously released. this is to change soon
   5.169 -     * and the code to extend page tables etc. will be 
   5.170 -     * incorporated here.
   5.171 -     */
   5.172 -     
   5.173 -    unsigned long * curr = parray;
   5.174 -    unsigned long num_installed;
   5.175 -
   5.176 +    unsigned long pfn, i;
   5.177      struct page *page;
   5.178  
   5.179 -    num_installed = 0;
   5.180 -    while ( (page = rem_ballooned_page()) != NULL )
   5.181 +    for ( i = 0; i < nr_mfns; i++ )
   5.182      {
   5.183 -        unsigned long pfn;
   5.184 -
   5.185 -        if ( num_installed == num )
   5.186 +        if ( (page = balloon_retrieve()) != NULL )
   5.187              break;
   5.188  
   5.189          pfn = page - mem_map;
   5.190 -
   5.191 -        if(phys_to_machine_mapping[pfn] != INVALID_P2M_ENTRY)
   5.192 -        {
   5.193 -            printk("BUG: Tried to unballoon existing page!");
   5.194 +        if ( phys_to_machine_mapping[pfn] != INVALID_P2M_ENTRY )
   5.195              BUG();
   5.196 -        }
   5.197  
   5.198 -        phys_to_machine_mapping[pfn] = *curr;
   5.199 -        queue_machphys_update(*curr, pfn);
   5.200 -        if (pfn<max_low_pfn)
   5.201 +        /* Update P->M and M->P tables. */
   5.202 +        phys_to_machine_mapping[pfn] = mfn_list[i];
   5.203 +        queue_machphys_update(mfn_list[i], pfn);
   5.204 +
   5.205 +        /* Link back into the page tables if it's not a highmem page. */
   5.206 +        if ( pfn < max_low_pfn )
   5.207              queue_l1_entry_update(
   5.208                  get_ptep((unsigned long)__va(pfn << PAGE_SHIFT)),
   5.209 -                ((*curr) << PAGE_SHIFT) | pgprot_val(PAGE_KERNEL));
   5.210 -        
   5.211 -        __free_page(mem_map + pfn);
   5.212 +                (mfn_list[i] << PAGE_SHIFT) | pgprot_val(PAGE_KERNEL));
   5.213  
   5.214 -        curr++;
   5.215 -        num_installed++;
   5.216 +        /* Finally, relinquish the memory back to the system allocator. */
   5.217 +        ClearPageReserved(page);
   5.218 +        set_page_count(page, 1);
   5.219 +        __free_page(page);
   5.220      }
   5.221  
   5.222 -    return num_installed;
   5.223 +    return i;
   5.224  }
   5.225  
   5.226  unsigned long deflate_balloon(unsigned long num_pages)
   5.227  {
   5.228      unsigned long ret;
   5.229 -    unsigned long * parray;
   5.230 +    unsigned long *parray;
   5.231  
   5.232      if ( num_pages > credit )
   5.233      {
   5.234 @@ -305,205 +274,25 @@ unsigned long deflate_balloon(unsigned l
   5.235  
   5.236  #define PAGE_TO_MB_SHIFT 8
   5.237  
   5.238 -/*
   5.239 - * pagetable_extend() mimics pagetable_init() from arch/xen/mm/init.c 
   5.240 - * The loops do go through all of low memory (ZONE_NORMAL).  The
   5.241 - * old pages have _PAGE_PRESENT set and so get skipped.
   5.242 - * If low memory is not full, the new pages are used to fill it, going
   5.243 - * from cur_low_pfn to low_pfn.   high memory is not direct mapped so
   5.244 - * no extension is needed for new high memory.
   5.245 - */
   5.246 -
   5.247 -static void pagetable_extend (int cur_low_pfn, int newpages)
   5.248 -{
   5.249 -    unsigned long vaddr, end;
   5.250 -    pgd_t *kpgd, *pgd, *pgd_base;
   5.251 -    int i, j, k;
   5.252 -    pmd_t *kpmd, *pmd;
   5.253 -    pte_t *kpte, *pte, *pte_base;
   5.254 -    int low_pfn = min(cur_low_pfn+newpages,(int)max_low_pfn);
   5.255 -
   5.256 -    /*
   5.257 -     * This can be zero as well - no problem, in that case we exit
   5.258 -     * the loops anyway due to the PTRS_PER_* conditions.
   5.259 -     */
   5.260 -    end = (unsigned long)__va(low_pfn*PAGE_SIZE);
   5.261 -
   5.262 -    pgd_base = init_mm.pgd;
   5.263 -#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
   5.264 -    i = pgd_index(PAGE_OFFSET);
   5.265 -#else
   5.266 -    i = __pgd_offset(PAGE_OFFSET);
   5.267 -#endif
   5.268 -    pgd = pgd_base + i;
   5.269 -
   5.270 -    for (; i < PTRS_PER_PGD; pgd++, i++) {
   5.271 -        vaddr = i*PGDIR_SIZE;
   5.272 -        if (end && (vaddr >= end))
   5.273 -            break;
   5.274 -        pmd = (pmd_t *)pgd;
   5.275 -        for (j = 0; j < PTRS_PER_PMD; pmd++, j++) {
   5.276 -            vaddr = i*PGDIR_SIZE + j*PMD_SIZE;
   5.277 -            if (end && (vaddr >= end))
   5.278 -                break;
   5.279 -
   5.280 -            /* Filled in for us already? */
   5.281 -            if ( pmd_val(*pmd) & _PAGE_PRESENT )
   5.282 -                continue;
   5.283 -
   5.284 -            pte_base = pte = (pte_t *) __get_free_page(GFP_KERNEL);
   5.285 -
   5.286 -            for (k = 0; k < PTRS_PER_PTE; pte++, k++) {
   5.287 -                vaddr = i*PGDIR_SIZE + j*PMD_SIZE + k*PAGE_SIZE;
   5.288 -                if (end && (vaddr >= end))
   5.289 -                    break;
   5.290 -#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
   5.291 -                *pte = mk_pte(virt_to_page(vaddr), PAGE_KERNEL);
   5.292 -#else
   5.293 -		*pte = mk_pte_phys(__pa(vaddr), PAGE_KERNEL);
   5.294 -#endif
   5.295 -            }
   5.296 -            kpgd = pgd_offset_k((unsigned long)pte_base);
   5.297 -            kpmd = pmd_offset(kpgd, (unsigned long)pte_base);
   5.298 -            kpte = pte_offset_kernel(kpmd, (unsigned long)pte_base);
   5.299 -            queue_l1_entry_update(kpte,
   5.300 -                                  (*(unsigned long *)kpte)&~_PAGE_RW);
   5.301 -            set_pmd(pmd, __pmd(_KERNPG_TABLE + __pa(pte_base)));
   5.302 -            XEN_flush_page_update_queue();
   5.303 -        }
   5.304 -    }
   5.305 -}
   5.306 -
   5.307 -/*
   5.308 - * claim_new_pages() asks xen to increase this domain's memory  reservation
   5.309 - * and return a list of the new pages of memory.  This new pages are
   5.310 - * added to the free list of the memory manager.
   5.311 - *
   5.312 - * Available RAM does not normally change while Linux runs.  To make this work,
   5.313 - * the linux mem= boottime command line param must say how big memory could
   5.314 - * possibly grow.  Then setup_arch() in arch/xen/kernel/setup.c
   5.315 - * sets max_pfn, max_low_pfn and the zones according to
   5.316 - * this max memory size.   The page tables themselves can only be
   5.317 - * extended after xen has assigned new pages to this domain.
   5.318 - */
   5.319 -
   5.320 -static unsigned long
   5.321 -claim_new_pages(unsigned long num_pages)
   5.322 -{
   5.323 -    unsigned long new_page_cnt, pfn;
   5.324 -    unsigned long * parray, *curr;
   5.325 -
   5.326 -    if (most_seen_pages+num_pages> max_pfn)
   5.327 -        num_pages = max_pfn-most_seen_pages;
   5.328 -    if (num_pages==0) return -EINVAL;
   5.329 -
   5.330 -    parray = (unsigned long *)vmalloc(num_pages * sizeof(unsigned long));
   5.331 -    if ( parray == NULL )
   5.332 -    {
   5.333 -        printk(KERN_ERR "claim_new_pages: Unable to vmalloc parray\n");
   5.334 -        return 0;
   5.335 -    }
   5.336 -
   5.337 -    new_page_cnt = HYPERVISOR_dom_mem_op(MEMOP_increase_reservation, 
   5.338 -                                parray, num_pages, 0);
   5.339 -    if ( new_page_cnt != num_pages )
   5.340 -    {
   5.341 -        printk(KERN_WARNING
   5.342 -            "claim_new_pages: xen granted only %lu of %lu requested pages\n",
   5.343 -            new_page_cnt, num_pages);
   5.344 -
   5.345 -        /* 
   5.346 -         * Avoid xen lockup when user forgot to setdomainmaxmem. Xen
   5.347 -         * usually can dribble out a few pages and then hangs.
   5.348 -         */
   5.349 -        if ( new_page_cnt < 1000 )
   5.350 -        {
   5.351 -            printk(KERN_WARNING "Remember to use setdomainmaxmem\n");
   5.352 -            HYPERVISOR_dom_mem_op(MEMOP_decrease_reservation, 
   5.353 -                                parray, new_page_cnt, 0);
   5.354 -            return -EFAULT;
   5.355 -        }
   5.356 -    }
   5.357 -    memcpy(phys_to_machine_mapping+most_seen_pages, parray,
   5.358 -           new_page_cnt * sizeof(unsigned long));
   5.359 -
   5.360 -    pagetable_extend(most_seen_pages,new_page_cnt);
   5.361 -
   5.362 -    for ( pfn = most_seen_pages, curr = parray;
   5.363 -          pfn < most_seen_pages+new_page_cnt;
   5.364 -          pfn++, curr++ )
   5.365 -    {
   5.366 -        struct page *page = mem_map + pfn;
   5.367 -
   5.368 -#ifndef CONFIG_HIGHMEM
   5.369 -        if ( pfn>=max_low_pfn )
   5.370 -        {
   5.371 -            printk(KERN_WARNING "Warning only %ldMB will be used.\n",
   5.372 -               pfn>>PAGE_TO_MB_SHIFT);
   5.373 -            printk(KERN_WARNING "Use a HIGHMEM enabled kernel.\n");
   5.374 -            break;
   5.375 -        }
   5.376 -#endif
   5.377 -        queue_machphys_update(*curr, pfn);
   5.378 -        if ( pfn < max_low_pfn )
   5.379 -            queue_l1_entry_update(
   5.380 -                get_ptep((unsigned long)__va(pfn << PAGE_SHIFT)),
   5.381 -                ((*curr) << PAGE_SHIFT) | pgprot_val(PAGE_KERNEL));
   5.382 -        
   5.383 -        XEN_flush_page_update_queue();
   5.384 -        
   5.385 -        /* this next bit mimics arch/xen/mm/init.c:one_highpage_init() */
   5.386 -        ClearPageReserved(page);
   5.387 -        if ( pfn >= max_low_pfn )
   5.388 -            set_bit(PG_highmem, &page->flags);
   5.389 -        set_page_count(page, 1);
   5.390 -        __free_page(page);
   5.391 -    }
   5.392 -
   5.393 -    vfree(parray);
   5.394 -
   5.395 -    return new_page_cnt;
   5.396 -}
   5.397 -
   5.398 -
   5.399  static int balloon_try_target(int target)
   5.400  {
   5.401      int change, reclaim;
   5.402  
   5.403      if ( target < current_pages )
   5.404      {
   5.405 -        int change = inflate_balloon(current_pages-target);
   5.406 -        if ( change <= 0 )
   5.407 +        if ( (change = inflate_balloon(current_pages-target)) <= 0 )
   5.408              return change;
   5.409 -
   5.410          current_pages -= change;
   5.411          printk(KERN_INFO "Relinquish %dMB to xen. Domain now has %luMB\n",
   5.412              change>>PAGE_TO_MB_SHIFT, current_pages>>PAGE_TO_MB_SHIFT);
   5.413      }
   5.414 -    else if ( target > current_pages )
   5.415 +    else if ( (reclaim = target - current_pages) > 0 )
   5.416      {
   5.417 -        reclaim = min((unsigned long)target,most_seen_pages) - current_pages;
   5.418 -
   5.419 -        if ( reclaim )
   5.420 -        {
   5.421 -            change = deflate_balloon( reclaim );
   5.422 -            if ( change <= 0 )
   5.423 -                return change;
   5.424 -            current_pages += change;
   5.425 -            printk(KERN_INFO "Reclaim %dMB from xen. Domain now has %luMB\n",
   5.426 -                change>>PAGE_TO_MB_SHIFT, current_pages>>PAGE_TO_MB_SHIFT);
   5.427 -        }
   5.428 -
   5.429 -        if ( most_seen_pages < target )
   5.430 -        {
   5.431 -            int growth = claim_new_pages(target-most_seen_pages);
   5.432 -            if ( growth <= 0 )
   5.433 -                return growth;
   5.434 -            most_seen_pages += growth;
   5.435 -            current_pages += growth;
   5.436 -            printk(KERN_INFO "Granted %dMB new mem. Dom now has %luMB\n",
   5.437 -                growth>>PAGE_TO_MB_SHIFT, current_pages>>PAGE_TO_MB_SHIFT);
   5.438 -        }
   5.439 +        if ( (change = deflate_balloon(reclaim)) <= 0 )
   5.440 +            return change;
   5.441 +        current_pages += change;
   5.442 +        printk(KERN_INFO "Reclaim %dMB from xen. Domain now has %luMB\n",
   5.443 +               change>>PAGE_TO_MB_SHIFT, current_pages>>PAGE_TO_MB_SHIFT);
   5.444      }
   5.445  
   5.446      return 1;
   5.447 @@ -640,12 +429,15 @@ static int balloon_read(char *page, char
   5.448  
   5.449  static int __init balloon_init(void)
   5.450  {
   5.451 -    printk(KERN_ALERT "Starting Xen Balloon driver\n");
   5.452 +    unsigned long pfn;
   5.453 +    struct page *page;
   5.454  
   5.455 -    most_seen_pages = current_pages = min(xen_start_info.nr_pages,max_pfn);
   5.456 +    IPRINTK("Initialising balloon driver.\n");
   5.457 +
   5.458 +    current_pages = min(xen_start_info.nr_pages, max_pfn);
   5.459      if ( (balloon_pde = create_xen_proc_entry("memory_target", 0644)) == NULL )
   5.460      {
   5.461 -        printk(KERN_ALERT "Unable to create balloon driver proc entry!");
   5.462 +        WPRINTK("Unable to create balloon driver proc entry!");
   5.463          return -1;
   5.464      }
   5.465  
   5.466 @@ -661,18 +453,12 @@ static int __init balloon_init(void)
   5.467      (void)ctrl_if_register_receiver(CMSG_MEM_REQUEST, balloon_ctrlif_rx,
   5.468                                      CALLBACK_IN_BLOCKING_CONTEXT);
   5.469  
   5.470 -    /* 
   5.471 -     * make_module a new phys map if mem= says xen can give us memory  to grow
   5.472 -     */
   5.473 -    if ( max_pfn > xen_start_info.nr_pages )
   5.474 +    /* Initialise the balloon with excess memory space. */
   5.475 +    for ( pfn = xen_start_info.nr_pages; pfn < max_pfn; pfn++ )
   5.476      {
   5.477 -        extern unsigned long *phys_to_machine_mapping;
   5.478 -        unsigned long *newmap;
   5.479 -        newmap = (unsigned long *)vmalloc(max_pfn * sizeof(unsigned long));
   5.480 -        memset(newmap, ~0, max_pfn * sizeof(unsigned long));
   5.481 -        memcpy(newmap, phys_to_machine_mapping,
   5.482 -               xen_start_info.nr_pages * sizeof(unsigned long));
   5.483 -        phys_to_machine_mapping = newmap;
   5.484 +        page = &mem_map[pfn];
   5.485 +        if ( !PageReserved(page) )
   5.486 +            balloon_append(page);
   5.487      }
   5.488  
   5.489      return 0;
     6.1 --- a/linux-2.6.9-xen-sparse/include/asm-xen/hypervisor.h	Tue Nov 30 09:01:47 2004 +0000
     6.2 +++ b/linux-2.6.9-xen-sparse/include/asm-xen/hypervisor.h	Tue Nov 30 17:17:52 2004 +0000
     6.3 @@ -59,7 +59,6 @@ void do_hypervisor_callback(struct pt_re
     6.4  #define PROT_ON  1
     6.5  #define PROT_OFF 0
     6.6  void /* __init */ protect_page(pgd_t *dpgd, void *page, int mode);
     6.7 -void /* __init */ protect_pagetable(pgd_t *dpgd, pgd_t *spgd, int mode);
     6.8  
     6.9  /* arch/xen/i386/kernel/head.S */
    6.10  void lgdt_finish(void);
     7.1 --- a/xen/arch/x86/x86_32/entry.S	Tue Nov 30 09:01:47 2004 +0000
     7.2 +++ b/xen/arch/x86/x86_32/entry.S	Tue Nov 30 17:17:52 2004 +0000
     7.3 @@ -341,6 +341,7 @@ process_guest_exception_and_events:
     7.4          leal DOMAIN_trap_bounce(%ebx),%edx
     7.5          testb $TBF_EXCEPTION,TRAPBOUNCE_flags(%edx)
     7.6          jz   test_all_events
     7.7 +        cli  # create_bounce_frame needs CLI for pre-exceptions to work
     7.8          call create_bounce_frame
     7.9          jmp  test_all_events
    7.10