ia64/xen-unstable

changeset 3251:a169836882cb

bitkeeper revision 1.1159.170.59 (41b4c2fdJ2gj_BWy27Vj3ptayZp_yg)

sync w/ head.
author cl349@arcadians.cl.cam.ac.uk
date Mon Dec 06 20:37:17 2004 +0000 (2004-12-06)
parents 63bb7af323c9 0e6cf5ea9f5c
children d455acded006
files .rootkeys BitKeeper/etc/ignore Makefile linux-2.4.28-xen-sparse/arch/xen/drivers/balloon/Makefile linux-2.4.28-xen-sparse/arch/xen/kernel/Makefile linux-2.4.28-xen-sparse/arch/xen/kernel/setup.c linux-2.4.28-xen-sparse/arch/xen/mm/init.c linux-2.4.28-xen-sparse/mkbuildtree linux-2.6.10-rc2-xen-sparse/arch/xen/configs/xen0_defconfig linux-2.6.10-rc2-xen-sparse/arch/xen/configs/xenU_defconfig linux-2.6.10-rc2-xen-sparse/arch/xen/i386/kernel/cpu/common.c linux-2.6.10-rc2-xen-sparse/arch/xen/i386/kernel/pci-dma.c linux-2.6.10-rc2-xen-sparse/arch/xen/i386/kernel/setup.c linux-2.6.10-rc2-xen-sparse/arch/xen/i386/mm/hypervisor.c linux-2.6.10-rc2-xen-sparse/arch/xen/i386/mm/init.c linux-2.6.10-rc2-xen-sparse/arch/xen/i386/mm/ioremap.c linux-2.6.10-rc2-xen-sparse/arch/xen/kernel/Makefile linux-2.6.10-rc2-xen-sparse/arch/xen/kernel/ctrl_if.c linux-2.6.10-rc2-xen-sparse/drivers/char/mem.c linux-2.6.10-rc2-xen-sparse/drivers/xen/Makefile linux-2.6.10-rc2-xen-sparse/drivers/xen/balloon/balloon.c linux-2.6.10-rc2-xen-sparse/drivers/xen/netback/interface.c linux-2.6.10-rc2-xen-sparse/drivers/xen/netback/netback.c linux-2.6.10-rc2-xen-sparse/drivers/xen/netfront/netfront.c linux-2.6.10-rc2-xen-sparse/drivers/xen/privcmd/privcmd.c linux-2.6.10-rc2-xen-sparse/include/asm-xen/asm-i386/io.h linux-2.6.10-rc2-xen-sparse/include/asm-xen/asm-i386/pgalloc.h linux-2.6.10-rc2-xen-sparse/include/asm-xen/asm-i386/pgtable.h linux-2.6.10-rc2-xen-sparse/include/asm-xen/hypervisor.h linux-2.6.9-xen-sparse/arch/xen/kernel/devmem.c linux-2.6.9-xen-sparse/include/asm-xen/balloon.h tools/libxc/Makefile tools/libxc/xc.h tools/libxc/xc_misc.c tools/libxutil/Makefile tools/misc/Makefile tools/misc/xenperf.c xen/arch/x86/memory.c xen/arch/x86/x86_32/entry.S xen/common/dom0_ops.c xen/common/perfc.c xen/include/public/dom0_ops.h xen/include/public/xen.h
line diff
     1.1 --- a/.rootkeys	Mon Dec 06 20:03:12 2004 +0000
     1.2 +++ b/.rootkeys	Mon Dec 06 20:37:17 2004 +0000
     1.3 @@ -253,6 +253,8 @@ 412f46c0LJuKAgSPGoC0Z1DEkLfuLA linux-2.6
     1.4  410a94a4KT6I6X0LVc7djB39tRDp4g linux-2.6.10-rc2-xen-sparse/mm/page_alloc.c
     1.5  41505c572m-s9ATiO1LiD1GPznTTIg linux-2.6.10-rc2-xen-sparse/net/core/skbuff.c
     1.6  4149ec79wMpIHdvbntxqVGLRZZjPxw linux-2.6.10-rc2-xen-sparse/net/ipv4/raw.c
     1.7 +41ab6fa06JdF7jxUsuDcjN3UhuIAxg linux-2.6.9-xen-sparse/arch/xen/kernel/devmem.c
     1.8 +41af4017PDMuSmMWtSRU5UC9Vylw5g linux-2.6.9-xen-sparse/include/asm-xen/balloon.h
     1.9  413cb1e4zst25MDYjg63Y-NGC5_pLg netbsd-2.0-xen-sparse/Makefile
    1.10  413cb1e5c_Mkxf_X0zimEhTKI_l4DA netbsd-2.0-xen-sparse/mkbuildtree
    1.11  413cb1e5kY_Zil7-b0kI6hvCIxBEYg netbsd-2.0-xen-sparse/nbconfig-xen
    1.12 @@ -378,6 +380,7 @@ 3f5ef5a2ir1kVAthS14Dc5QIRCEFWg tools/mis
    1.13  3f5ef5a2dTZP0nnsFoeq2jRf3mWDDg tools/misc/xen-clone.README
    1.14  405eedf6_nnNhFQ1I85lhCkLK6jFGA tools/misc/xencons
    1.15  40c9c4697z76HDfkCLdMhmaEwzFoNQ tools/misc/xend
    1.16 +41adc641dV-0cDLSyzMs5BT8nL7v3Q tools/misc/xenperf.c
    1.17  4107986eMWVdBoz4tXYoOscpN_BCYg tools/misc/xensv
    1.18  4056f5155QYZdsk-1fLdjsZPFTnlhg tools/misc/xensymoops
    1.19  40cf2937dqM1jWW87O5OoOYND8leuA tools/misc/xm
     2.1 --- a/BitKeeper/etc/ignore	Mon Dec 06 20:03:12 2004 +0000
     2.2 +++ b/BitKeeper/etc/ignore	Mon Dec 06 20:37:17 2004 +0000
     2.3 @@ -58,7 +58,7 @@ tools/balloon/balloon
     2.4  tools/check/.*
     2.5  tools/libxc/xen/*
     2.6  tools/misc/miniterm/miniterm
     2.7 -tools/misc/xen_cpuperf
     2.8 +tools/misc/xenperf
     2.9  tools/vnet/gc
    2.10  tools/vnet/gc*/*
    2.11  tools/vnet/vnet-module/.tmp_versions/*
     3.1 --- a/Makefile	Mon Dec 06 20:03:12 2004 +0000
     3.2 +++ b/Makefile	Mon Dec 06 20:37:17 2004 +0000
     3.3 @@ -19,10 +19,30 @@ export INSTALL_DIR
     3.4  include buildconfigs/Rules.mk
     3.5  
     3.6  .PHONY:	all dist install xen tools kernels docs world clean mkpatches mrproper
     3.7 -.PHONY:	kbuild kdelete kclean
     3.8 +.PHONY:	kbuild kdelete kclean install-tools install-xen install-docs
     3.9 +.PHONY: install-kernels
    3.10  
    3.11  all: dist
    3.12  
    3.13 +# install everything into the standard system directories
    3.14 +# NB: install explicitly does not check that everything is up to date!
    3.15 +install: install-tools install-xen install-kernels install-docs
    3.16 +
    3.17 +install-xen:
    3.18 +	$(MAKE) -C xen install
    3.19 +
    3.20 +install-tools:
    3.21 +	$(MAKE) -C tools install
    3.22 +
    3.23 +install-kernels:
    3.24 +	$(shell cp -a $(INSTALL_DIR)/boot/* /boot/)
    3.25 +	$(shell cp -a $(INSTALL_DIR)/lib/modules/* /lib/modules/)
    3.26 +	$(shell cp -dR $(INSTALL_DIR)/boot/*$(LINUX_VER)* $(prefix)/boot/)
    3.27 +	$(shell cp -dR $(INSTALL_DIR)/lib/modules/* $(prefix)/lib/modules/)
    3.28 +
    3.29 +install-docs:
    3.30 +	sh ./docs/check_pkgs && $(MAKE) -C docs install || true
    3.31 +
    3.32  # build and install everything into local dist directory
    3.33  dist: xen tools kernels docs
    3.34  	install -m0644 ./COPYING $(DIST_DIR)
    3.35 @@ -31,17 +51,6 @@ dist: xen tools kernels docs
    3.36  	mkdir -p $(DIST_DIR)/check
    3.37  	install -m0755 tools/check/chk tools/check/check_* $(DIST_DIR)/check
    3.38  
    3.39 -# install everything into the standard system directories
    3.40 -# NB: install explicitly does not check that everything is up to date!
    3.41 -install: 
    3.42 -	$(MAKE) -C xen install
    3.43 -	$(MAKE) -C tools install
    3.44 -	$(shell cp -a $(INSTALL_DIR)/boot/* /boot/)
    3.45 -	$(shell cp -a $(INSTALL_DIR)/lib/modules/* /lib/modules/)
    3.46 -	sh ./docs/check_pkgs && $(MAKE) -C docs install || true
    3.47 -	$(shell cp -dR $(INSTALL_DIR)/boot/*$(LINUX_VER)* $(prefix)/boot/)
    3.48 -	$(shell cp -dR $(INSTALL_DIR)/lib/modules/* $(prefix)/lib/modules/)
    3.49 -
    3.50  xen:
    3.51  	$(MAKE) prefix=$(INSTALL_DIR) dist=yes -C xen install
    3.52  
     4.1 --- a/linux-2.4.28-xen-sparse/arch/xen/drivers/balloon/Makefile	Mon Dec 06 20:03:12 2004 +0000
     4.2 +++ b/linux-2.4.28-xen-sparse/arch/xen/drivers/balloon/Makefile	Mon Dec 06 20:37:17 2004 +0000
     4.3 @@ -1,3 +1,4 @@
     4.4  O_TARGET := drv.o
     4.5 +export-objs := balloon.o
     4.6  obj-y := balloon.o
     4.7  include $(TOPDIR)/Rules.make
     5.1 --- a/linux-2.4.28-xen-sparse/arch/xen/kernel/Makefile	Mon Dec 06 20:03:12 2004 +0000
     5.2 +++ b/linux-2.4.28-xen-sparse/arch/xen/kernel/Makefile	Mon Dec 06 20:37:17 2004 +0000
     5.3 @@ -6,7 +6,7 @@ all: kernel.o head.o init_task.o
     5.4  
     5.5  O_TARGET := kernel.o
     5.6  
     5.7 -export-objs     := i386_ksyms.o gnttab.o skbuff.o
     5.8 +export-objs     := i386_ksyms.o gnttab.o skbuff.o ctrl_if.o
     5.9  
    5.10  obj-y	:= process.o semaphore.o signal.o entry.o traps.o irq.o  \
    5.11  		ptrace.o ioport.o ldt.o setup.o time.o sys_i386.o \
     6.1 --- a/linux-2.4.28-xen-sparse/arch/xen/kernel/setup.c	Mon Dec 06 20:03:12 2004 +0000
     6.2 +++ b/linux-2.4.28-xen-sparse/arch/xen/kernel/setup.c	Mon Dec 06 20:37:17 2004 +0000
     6.3 @@ -275,7 +275,8 @@ void __init setup_arch(char **cmdline_p)
     6.4       * arch/xen/drivers/balloon/balloon.c
     6.5       */
     6.6      mem_param = parse_mem_cmdline(cmdline_p);
     6.7 -    if (!mem_param) mem_param = xen_start_info.nr_pages;
     6.8 +    if (mem_param < xen_start_info.nr_pages)
     6.9 +        mem_param = xen_start_info.nr_pages;
    6.10  
    6.11  #define PFN_UP(x)	(((x) + PAGE_SIZE-1) >> PAGE_SHIFT)
    6.12  #define PFN_DOWN(x)	((x) >> PAGE_SHIFT)
    6.13 @@ -303,6 +304,7 @@ void __init setup_arch(char **cmdline_p)
    6.14              printk(KERN_WARNING "Use a PAE enabled kernel.\n");
    6.15          else
    6.16              printk(KERN_WARNING "Use a HIGHMEM enabled kernel.\n");
    6.17 +        max_pfn = lmax_low_pfn;
    6.18  #else /* !CONFIG_HIGHMEM */
    6.19  #ifndef CONFIG_X86_PAE
    6.20          if (max_pfn > MAX_NONPAE_PFN) {
    6.21 @@ -350,8 +352,6 @@ void __init setup_arch(char **cmdline_p)
    6.22       */
    6.23      max_low_pfn = lmax_low_pfn;
    6.24  
    6.25 -
    6.26 -
    6.27  #ifdef CONFIG_BLK_DEV_INITRD
    6.28      if ( xen_start_info.mod_start != 0 )
    6.29      {
    6.30 @@ -375,6 +375,20 @@ void __init setup_arch(char **cmdline_p)
    6.31  
    6.32      paging_init();
    6.33  
    6.34 +    /* Make sure we have a large enough P->M table. */
    6.35 +    if ( max_pfn > xen_start_info.nr_pages )
    6.36 +    {
    6.37 +        phys_to_machine_mapping = alloc_bootmem_low_pages(
    6.38 +            max_pfn * sizeof(unsigned long));
    6.39 +        memset(phys_to_machine_mapping, ~0, max_pfn * sizeof(unsigned long));
    6.40 +        memcpy(phys_to_machine_mapping,
    6.41 +               (unsigned long *)xen_start_info.mfn_list,
    6.42 +               xen_start_info.nr_pages * sizeof(unsigned long));
    6.43 +        free_bootmem(__pa(xen_start_info.mfn_list), 
    6.44 +                     PFN_PHYS(PFN_UP(xen_start_info.nr_pages *
    6.45 +                                     sizeof(unsigned long))));
    6.46 +    }
    6.47 +
    6.48      pfn_to_mfn_frame_list = alloc_bootmem_low_pages(PAGE_SIZE);
    6.49      for ( i=0, j=0; i < max_pfn; i+=(PAGE_SIZE/sizeof(unsigned long)), j++ )
    6.50      {	
     7.1 --- a/linux-2.4.28-xen-sparse/arch/xen/mm/init.c	Mon Dec 06 20:03:12 2004 +0000
     7.2 +++ b/linux-2.4.28-xen-sparse/arch/xen/mm/init.c	Mon Dec 06 20:37:17 2004 +0000
     7.3 @@ -213,23 +213,16 @@ static void __init fixrange_init (unsign
     7.4  
     7.5  static void __init pagetable_init (void)
     7.6  {
     7.7 -    unsigned long vaddr, end;
     7.8 +    unsigned long vaddr, end, ram_end;
     7.9      pgd_t *kpgd, *pgd, *pgd_base;
    7.10      int i, j, k;
    7.11      pmd_t *kpmd, *pmd;
    7.12      pte_t *kpte, *pte, *pte_base;
    7.13  
    7.14 -    /* create tables only for boot_pfn frames.  max_low_pfn may be sized for
    7.15 -     * pages yet to be allocated from the hypervisor, or it may be set
    7.16 -     * to override the xen_start_info amount of memory
    7.17 -     */
    7.18 -    int boot_pfn = min(xen_start_info.nr_pages,max_low_pfn);
    7.19 -
    7.20 -    /*
    7.21 -     * This can be zero as well - no problem, in that case we exit
    7.22 -     * the loops anyway due to the PTRS_PER_* conditions.
    7.23 -     */
    7.24 -    end = (unsigned long)__va(boot_pfn *PAGE_SIZE);
    7.25 +    end     = (unsigned long)__va(max_low_pfn * PAGE_SIZE);
    7.26 +    ram_end = (unsigned long)__va(xen_start_info.nr_pages * PAGE_SIZE);
    7.27 +    if ( ram_end > end )
    7.28 +        ram_end = end;
    7.29  
    7.30      pgd_base = init_mm.pgd;
    7.31      i = __pgd_offset(PAGE_OFFSET);
    7.32 @@ -237,12 +230,12 @@ static void __init pagetable_init (void)
    7.33  
    7.34      for (; i < PTRS_PER_PGD; pgd++, i++) {
    7.35          vaddr = i*PGDIR_SIZE;
    7.36 -        if (end && (vaddr >= end))
    7.37 +        if (vaddr >= end)
    7.38              break;
    7.39          pmd = (pmd_t *)pgd;
    7.40          for (j = 0; j < PTRS_PER_PMD; pmd++, j++) {
    7.41              vaddr = i*PGDIR_SIZE + j*PMD_SIZE;
    7.42 -            if (end && (vaddr >= end))
    7.43 +            if (vaddr >= end)
    7.44                  break;
    7.45  
    7.46              /* Filled in for us already? */
    7.47 @@ -250,10 +243,11 @@ static void __init pagetable_init (void)
    7.48                  continue;
    7.49  
    7.50              pte_base = pte = (pte_t *) alloc_bootmem_low_pages(PAGE_SIZE);
    7.51 +            clear_page(pte_base);
    7.52  
    7.53              for (k = 0; k < PTRS_PER_PTE; pte++, k++) {
    7.54                  vaddr = i*PGDIR_SIZE + j*PMD_SIZE + k*PAGE_SIZE;
    7.55 -                if (end && (vaddr >= end))
    7.56 +                if (vaddr >= ram_end)
    7.57                      break;
    7.58                  *pte = mk_pte_phys(__pa(vaddr), PAGE_KERNEL);
    7.59              }
    7.60 @@ -329,28 +323,14 @@ static inline int page_is_ram (unsigned 
    7.61      return 1;
    7.62  }
    7.63  
    7.64 -static inline int page_kills_ppro(unsigned long pagenr)
    7.65 -{
    7.66 -    return 0;
    7.67 -}
    7.68 -
    7.69  #ifdef CONFIG_HIGHMEM
    7.70 -void __init one_highpage_init(struct page *page, int pfn, int bad_ppro)
    7.71 +void __init one_highpage_init(struct page *page, int free_page)
    7.72  {
    7.73 -    if (!page_is_ram(pfn)) {
    7.74 -        SetPageReserved(page);
    7.75 -        return;
    7.76 -    }
    7.77 -	
    7.78 -    if (bad_ppro && page_kills_ppro(pfn)) {
    7.79 -        SetPageReserved(page);
    7.80 -        return;
    7.81 -    }
    7.82 -	
    7.83      ClearPageReserved(page);
    7.84      set_bit(PG_highmem, &page->flags);
    7.85      atomic_set(&page->count, 1);
    7.86 -    __free_page(page);
    7.87 +    if ( free_page )
    7.88 +        __free_page(page);
    7.89      totalhigh_pages++;
    7.90  }
    7.91  #endif /* CONFIG_HIGHMEM */
    7.92 @@ -392,8 +372,9 @@ static int __init free_pages_init(void)
    7.93              reservedpages++;
    7.94      }
    7.95  #ifdef CONFIG_HIGHMEM
    7.96 -    for (pfn = xen_start_info.nr_pages-1; pfn >= highstart_pfn; pfn--)
    7.97 -        one_highpage_init((struct page *) (mem_map + pfn), pfn, bad_ppro);
    7.98 +    for (pfn = highend_pfn-1; pfn >= highstart_pfn; pfn--)
    7.99 +        one_highpage_init((struct page *) (mem_map + pfn), pfn,
   7.100 +                          (pfn < xen_start_info.nr_pages));
   7.101      totalram_pages += totalhigh_pages;
   7.102  #endif
   7.103      return reservedpages;
     8.1 --- a/linux-2.4.28-xen-sparse/mkbuildtree	Mon Dec 06 20:03:12 2004 +0000
     8.2 +++ b/linux-2.4.28-xen-sparse/mkbuildtree	Mon Dec 06 20:37:17 2004 +0000
     8.3 @@ -204,6 +204,7 @@ ln -sf ../asm-i386/unaligned.h
     8.4  ln -sf ../asm-i386/unistd.h 
     8.5  ln -sf ../asm-i386/user.h 
     8.6  ln -sf ../asm-i386/vm86.h 
     8.7 +ln -sf ../../${LINUX_26}/include/asm-xen/balloon.h
     8.8  ln -sf ../../${LINUX_26}/include/asm-xen/ctrl_if.h
     8.9  ln -sf ../../${LINUX_26}/include/asm-xen/evtchn.h
    8.10  ln -sf ../../${LINUX_26}/include/asm-xen/gnttab.h
     9.1 --- a/linux-2.6.10-rc2-xen-sparse/arch/xen/configs/xen0_defconfig	Mon Dec 06 20:03:12 2004 +0000
     9.2 +++ b/linux-2.6.10-rc2-xen-sparse/arch/xen/configs/xen0_defconfig	Mon Dec 06 20:37:17 2004 +0000
     9.3 @@ -1,7 +1,7 @@
     9.4  #
     9.5  # Automatically generated make config: don't edit
     9.6  # Linux kernel version: 2.6.10-rc2-xen0
     9.7 -# Fri Nov 19 20:16:38 2004
     9.8 +# Wed Dec  1 09:22:49 2004
     9.9  #
    9.10  CONFIG_XEN=y
    9.11  CONFIG_ARCH_XEN=y
    9.12 @@ -152,10 +152,10 @@ CONFIG_DEBUG_KERNEL=y
    9.13  CONFIG_EARLY_PRINTK=y
    9.14  # CONFIG_DEBUG_STACKOVERFLOW is not set
    9.15  # CONFIG_DEBUG_STACK_USAGE is not set
    9.16 -# CONFIG_DEBUG_SLAB is not set
    9.17 +CONFIG_DEBUG_SLAB=y
    9.18  CONFIG_MAGIC_SYSRQ=y
    9.19  # CONFIG_DEBUG_SPINLOCK is not set
    9.20 -# CONFIG_DEBUG_PAGEALLOC is not set
    9.21 +CONFIG_DEBUG_PAGEALLOC=y
    9.22  # CONFIG_DEBUG_INFO is not set
    9.23  # CONFIG_DEBUG_SPINLOCK_SLEEP is not set
    9.24  # CONFIG_FRAME_POINTER is not set
    10.1 --- a/linux-2.6.10-rc2-xen-sparse/arch/xen/configs/xenU_defconfig	Mon Dec 06 20:03:12 2004 +0000
    10.2 +++ b/linux-2.6.10-rc2-xen-sparse/arch/xen/configs/xenU_defconfig	Mon Dec 06 20:37:17 2004 +0000
    10.3 @@ -1,7 +1,7 @@
    10.4  #
    10.5  # Automatically generated make config: don't edit
    10.6  # Linux kernel version: 2.6.10-rc2-xenU
    10.7 -# Fri Nov 19 20:16:52 2004
    10.8 +# Wed Dec  1 09:22:09 2004
    10.9  #
   10.10  CONFIG_XEN=y
   10.11  CONFIG_ARCH_XEN=y
   10.12 @@ -47,6 +47,7 @@ CONFIG_KOBJECT_UEVENT=y
   10.13  # CONFIG_IKCONFIG is not set
   10.14  # CONFIG_EMBEDDED is not set
   10.15  CONFIG_KALLSYMS=y
   10.16 +# CONFIG_KALLSYMS_ALL is not set
   10.17  # CONFIG_KALLSYMS_EXTRA_PASS is not set
   10.18  CONFIG_FUTEX=y
   10.19  CONFIG_EPOLL=y
   10.20 @@ -118,8 +119,15 @@ CONFIG_HAVE_DEC_LOCK=y
   10.21  #
   10.22  # Kernel hacking
   10.23  #
   10.24 -# CONFIG_DEBUG_KERNEL is not set
   10.25 +CONFIG_DEBUG_KERNEL=y
   10.26  CONFIG_EARLY_PRINTK=y
   10.27 +# CONFIG_DEBUG_STACKOVERFLOW is not set
   10.28 +# CONFIG_DEBUG_STACK_USAGE is not set
   10.29 +CONFIG_DEBUG_SLAB=y
   10.30 +# CONFIG_MAGIC_SYSRQ is not set
   10.31 +# CONFIG_DEBUG_SPINLOCK is not set
   10.32 +CONFIG_DEBUG_PAGEALLOC=y
   10.33 +# CONFIG_DEBUG_INFO is not set
   10.34  # CONFIG_DEBUG_SPINLOCK_SLEEP is not set
   10.35  # CONFIG_FRAME_POINTER is not set
   10.36  # CONFIG_4KSTACKS is not set
   10.37 @@ -145,6 +153,7 @@ CONFIG_BINFMT_ELF=y
   10.38  CONFIG_STANDALONE=y
   10.39  CONFIG_PREVENT_FIRMWARE_BUILD=y
   10.40  # CONFIG_FW_LOADER is not set
   10.41 +# CONFIG_DEBUG_DRIVER is not set
   10.42  
   10.43  #
   10.44  # Block devices
    11.1 --- a/linux-2.6.10-rc2-xen-sparse/arch/xen/i386/kernel/cpu/common.c	Mon Dec 06 20:03:12 2004 +0000
    11.2 +++ b/linux-2.6.10-rc2-xen-sparse/arch/xen/i386/kernel/cpu/common.c	Mon Dec 06 20:37:17 2004 +0000
    11.3 @@ -513,7 +513,7 @@ void __init cpu_gdt_init(struct Xgt_desc
    11.4  	     va < gdt_descr->address + gdt_descr->size;
    11.5  	     va += PAGE_SIZE, f++) {
    11.6  		frames[f] = virt_to_machine(va) >> PAGE_SHIFT;
    11.7 -		protect_page(swapper_pg_dir, (void *)va, PROT_ON);
    11.8 +		make_page_readonly((void *)va);
    11.9  	}
   11.10  	flush_page_update_queue();
   11.11  	if (HYPERVISOR_set_gdt(frames, gdt_descr->size / 8))
    12.1 --- a/linux-2.6.10-rc2-xen-sparse/arch/xen/i386/kernel/pci-dma.c	Mon Dec 06 20:03:12 2004 +0000
    12.2 +++ b/linux-2.6.10-rc2-xen-sparse/arch/xen/i386/kernel/pci-dma.c	Mon Dec 06 20:37:17 2004 +0000
    12.3 @@ -13,6 +13,7 @@
    12.4  #include <linux/pci.h>
    12.5  #include <linux/version.h>
    12.6  #include <asm/io.h>
    12.7 +#include <asm-xen/balloon.h>
    12.8  
    12.9  #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0)
   12.10  #define pte_offset_kernel pte_offset
   12.11 @@ -37,9 +38,12 @@ xen_contig_memory(unsigned long vstart, 
   12.12  	pgd_t         *pgd; 
   12.13  	pmd_t         *pmd;
   12.14  	pte_t         *pte;
   12.15 -	unsigned long  pfn, i;
   12.16 +	unsigned long  pfn, i, flags;
   12.17  
   12.18  	scrub_pages(vstart, 1 << order);
   12.19 +
   12.20 +        balloon_lock(flags);
   12.21 +
   12.22  	/* 1. Zap current PTEs, giving away the underlying pages. */
   12.23  	for (i = 0; i < (1<<order); i++) {
   12.24  		pgd = pgd_offset_k(   (vstart + (i*PAGE_SIZE)));
   12.25 @@ -70,6 +74,8 @@ xen_contig_memory(unsigned long vstart, 
   12.26  	}
   12.27  	/* Flush updates through and flush the TLB. */
   12.28  	xen_tlb_flush();
   12.29 +
   12.30 +        balloon_unlock(flags);
   12.31  }
   12.32  
   12.33  #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0)
    13.1 --- a/linux-2.6.10-rc2-xen-sparse/arch/xen/i386/kernel/setup.c	Mon Dec 06 20:03:12 2004 +0000
    13.2 +++ b/linux-2.6.10-rc2-xen-sparse/arch/xen/i386/kernel/setup.c	Mon Dec 06 20:37:17 2004 +0000
    13.3 @@ -52,6 +52,9 @@
    13.4  #include "setup_arch_pre.h"
    13.5  #include <bios_ebda.h>
    13.6  
    13.7 +/* Allows setting of maximum possible memory size  */
    13.8 +static unsigned long xen_override_max_pfn;
    13.9 +
   13.10  int disable_pse __initdata = 0;
   13.11  
   13.12  /*
   13.13 @@ -718,8 +721,13 @@ static void __init parse_cmdline_early (
   13.14  				unsigned long long mem_size;
   13.15   
   13.16  				mem_size = memparse(from+4, &from);
   13.17 +#if 0
   13.18  				limit_regions(mem_size);
   13.19  				userdef=1;
   13.20 +#else
   13.21 +				xen_override_max_pfn =
   13.22 +					(unsigned long)(mem_size>>PAGE_SHIFT);
   13.23 +#endif
   13.24  			}
   13.25  		}
   13.26  
   13.27 @@ -857,6 +865,7 @@ static void __init parse_cmdline_early (
   13.28  	}
   13.29  }
   13.30  
   13.31 +#if 0 /* !XEN */
   13.32  /*
   13.33   * Callback for efi_memory_walk.
   13.34   */
   13.35 @@ -873,7 +882,6 @@ efi_find_max_pfn(unsigned long start, un
   13.36  	return 0;
   13.37  }
   13.38  
   13.39 -
   13.40  /*
   13.41   * Find the highest page frame number we have available
   13.42   */
   13.43 @@ -900,6 +908,15 @@ void __init find_max_pfn(void)
   13.44  			max_pfn = end;
   13.45  	}
   13.46  }
   13.47 +#else
   13.48 +/* We don't use the fake e820 because we need to respond to user override. */
   13.49 +void __init find_max_pfn(void)
   13.50 +{
   13.51 +	if ( xen_override_max_pfn < xen_start_info.nr_pages )
   13.52 +		xen_override_max_pfn = xen_start_info.nr_pages;
   13.53 +	max_pfn = xen_override_max_pfn;
   13.54 +}
   13.55 +#endif /* XEN */
   13.56  
   13.57  /*
   13.58   * Determine low and high memory ranges:
   13.59 @@ -1414,6 +1431,21 @@ void __init setup_arch(char **cmdline_p)
   13.60  #endif
   13.61  	paging_init();
   13.62  
   13.63 +	/* Make sure we have a large enough P->M table. */
   13.64 +	if (max_pfn > xen_start_info.nr_pages) {
   13.65 +		phys_to_machine_mapping = alloc_bootmem_low_pages(
   13.66 +			max_pfn * sizeof(unsigned long));
   13.67 +		memset(phys_to_machine_mapping, ~0,
   13.68 +			max_pfn * sizeof(unsigned long));
   13.69 +		memcpy(phys_to_machine_mapping,
   13.70 +			(unsigned long *)xen_start_info.mfn_list,
   13.71 +			xen_start_info.nr_pages * sizeof(unsigned long));
   13.72 +		free_bootmem(
   13.73 +			__pa(xen_start_info.mfn_list), 
   13.74 +			PFN_PHYS(PFN_UP(xen_start_info.nr_pages *
   13.75 +			sizeof(unsigned long))));
   13.76 +	}
   13.77 +
   13.78  	pfn_to_mfn_frame_list = alloc_bootmem_low_pages(PAGE_SIZE);
   13.79  	for ( i=0, j=0; i < max_pfn; i+=(PAGE_SIZE/sizeof(unsigned long)), j++ )
   13.80  	{	
    14.1 --- a/linux-2.6.10-rc2-xen-sparse/arch/xen/i386/mm/hypervisor.c	Mon Dec 06 20:03:12 2004 +0000
    14.2 +++ b/linux-2.6.10-rc2-xen-sparse/arch/xen/i386/mm/hypervisor.c	Mon Dec 06 20:37:17 2004 +0000
    14.3 @@ -35,6 +35,7 @@
    14.4  #include <asm/pgtable.h>
    14.5  #include <asm-xen/hypervisor.h>
    14.6  #include <asm-xen/multicall.h>
    14.7 +#include <asm-xen/balloon.h>
    14.8  
    14.9  /*
   14.10   * This suffices to protect us if we ever move to SMP domains.
   14.11 @@ -352,7 +353,6 @@ unsigned long allocate_empty_lowmem_regi
   14.12      unsigned long *pfn_array;
   14.13      unsigned long  vstart;
   14.14      unsigned long  i;
   14.15 -    int            ret;
   14.16      unsigned int   order = get_order(pages*PAGE_SIZE);
   14.17  
   14.18      vstart = __get_free_pages(GFP_KERNEL, order);
   14.19 @@ -378,57 +378,11 @@ unsigned long allocate_empty_lowmem_regi
   14.20      /* Flush updates through and flush the TLB. */
   14.21      xen_tlb_flush();
   14.22  
   14.23 -    ret = HYPERVISOR_dom_mem_op(MEMOP_decrease_reservation, 
   14.24 -                                pfn_array, 1<<order, 0);
   14.25 -    if ( unlikely(ret != (1<<order)) )
   14.26 -    {
   14.27 -        printk(KERN_WARNING "Unable to reduce memory reservation (%d)\n", ret);
   14.28 -        BUG();
   14.29 -    }
   14.30 +    balloon_put_pages(pfn_array, 1 << order);
   14.31  
   14.32      vfree(pfn_array);
   14.33  
   14.34      return vstart;
   14.35  }
   14.36  
   14.37 -void deallocate_lowmem_region(unsigned long vstart, unsigned long pages)
   14.38 -{
   14.39 -    pgd_t         *pgd; 
   14.40 -    pmd_t         *pmd;
   14.41 -    pte_t         *pte;
   14.42 -    unsigned long *pfn_array;
   14.43 -    unsigned long  i;
   14.44 -    int            ret;
   14.45 -    unsigned int   order = get_order(pages*PAGE_SIZE);
   14.46 -
   14.47 -    pfn_array = vmalloc((1<<order) * sizeof(*pfn_array));
   14.48 -    if ( pfn_array == NULL )
   14.49 -        BUG();
   14.50 -
   14.51 -    ret = HYPERVISOR_dom_mem_op(MEMOP_increase_reservation,
   14.52 -                                pfn_array, 1<<order, 0);
   14.53 -    if ( unlikely(ret != (1<<order)) )
   14.54 -    {
   14.55 -        printk(KERN_WARNING "Unable to increase memory reservation (%d)\n",
   14.56 -               ret);
   14.57 -        BUG();
   14.58 -    }
   14.59 -
   14.60 -    for ( i = 0; i < (1<<order); i++ )
   14.61 -    {
   14.62 -        pgd = pgd_offset_k(   (vstart + (i*PAGE_SIZE)));
   14.63 -        pmd = pmd_offset(pgd, (vstart + (i*PAGE_SIZE)));
   14.64 -        pte = pte_offset_kernel(pmd, (vstart + (i*PAGE_SIZE)));
   14.65 -        queue_l1_entry_update(pte, (pfn_array[i]<<PAGE_SHIFT)|__PAGE_KERNEL);
   14.66 -        queue_machphys_update(pfn_array[i], __pa(vstart)>>PAGE_SHIFT);
   14.67 -        phys_to_machine_mapping[__pa(vstart)>>PAGE_SHIFT] = pfn_array[i];
   14.68 -    }
   14.69 -
   14.70 -    flush_page_update_queue();
   14.71 -
   14.72 -    vfree(pfn_array);
   14.73 -
   14.74 -    free_pages(vstart, order);
   14.75 -}
   14.76 -
   14.77  #endif /* CONFIG_XEN_PHYSDEV_ACCESS */
    15.1 --- a/linux-2.6.10-rc2-xen-sparse/arch/xen/i386/mm/init.c	Mon Dec 06 20:03:12 2004 +0000
    15.2 +++ b/linux-2.6.10-rc2-xen-sparse/arch/xen/i386/mm/init.c	Mon Dec 06 20:37:17 2004 +0000
    15.3 @@ -77,6 +77,7 @@ static pte_t * __init one_page_table_ini
    15.4  {
    15.5  	if (pmd_none(*pmd)) {
    15.6  		pte_t *page_table = (pte_t *) alloc_bootmem_low_pages(PAGE_SIZE);
    15.7 +		make_page_readonly(page_table);
    15.8  		set_pmd(pmd, __pmd(__pa(page_table) | _PAGE_TABLE));
    15.9  		if (page_table != pte_offset_kernel(pmd, 0))
   15.10  			BUG();	
   15.11 @@ -125,41 +126,6 @@ static void __init page_table_range_init
   15.12  	}
   15.13  }
   15.14  
   15.15 -void __init protect_page(pgd_t *pgd, void *page, int mode)
   15.16 -{
   15.17 -	pmd_t *pmd;
   15.18 -	pte_t *pte;
   15.19 -	unsigned long addr;
   15.20 -
   15.21 -	addr = (unsigned long)page;
   15.22 -	pgd += pgd_index(addr);
   15.23 -	pmd = pmd_offset(pgd, addr);
   15.24 -	pte = pte_offset_kernel(pmd, addr);
   15.25 -	if (!pte_present(*pte))
   15.26 -		return;
   15.27 -	queue_l1_entry_update(pte, mode ? pte_val_ma(*pte) & ~_PAGE_RW :
   15.28 -					pte_val_ma(*pte) | _PAGE_RW);
   15.29 -}
   15.30 -
   15.31 -void __init protect_pagetable(pgd_t *dpgd, pgd_t *spgd, int mode)
   15.32 -{
   15.33 -	pmd_t *pmd;
   15.34 -	pte_t *pte;
   15.35 -	int pgd_idx, pmd_idx;
   15.36 -
   15.37 -	protect_page(dpgd, spgd, mode);
   15.38 -
   15.39 -	for (pgd_idx = 0; pgd_idx < PTRS_PER_PGD_NO_HV; spgd++, pgd_idx++) {
   15.40 -		pmd = pmd_offset(spgd, 0);
   15.41 -		if (pmd_none(*pmd))
   15.42 -			continue;
   15.43 -		for (pmd_idx = 0; pmd_idx < PTRS_PER_PMD; pmd++, pmd_idx++) {
   15.44 -			pte = pte_offset_kernel(pmd, 0);
   15.45 -			protect_page(dpgd, pte, mode);
   15.46 -		}
   15.47 -	}
   15.48 -}
   15.49 -
   15.50  static inline int is_kernel_text(unsigned long addr)
   15.51  {
   15.52  	if (addr >= (unsigned long)_stext && addr <= (unsigned long)__init_end)
   15.53 @@ -180,6 +146,10 @@ static void __init kernel_physical_mappi
   15.54  	pte_t *pte;
   15.55  	int pgd_idx, pmd_idx, pte_ofs;
   15.56  
   15.57 +	unsigned long max_ram_pfn = xen_start_info.nr_pages;
   15.58 +	if (max_ram_pfn > max_low_pfn)
   15.59 +		max_ram_pfn = max_low_pfn;
   15.60 +
   15.61  	pgd_idx = pgd_index(PAGE_OFFSET);
   15.62  	pgd = pgd_base + pgd_idx;
   15.63  	pfn = 0;
   15.64 @@ -207,7 +177,10 @@ static void __init kernel_physical_mappi
   15.65  				pte = one_page_table_init(pmd);
   15.66  
   15.67  				pte += pte_ofs;
   15.68 -				for (; pte_ofs < PTRS_PER_PTE && pfn < max_low_pfn; pte++, pfn++, pte_ofs++) {
   15.69 +				/* XEN: Only map initial RAM allocation. */
   15.70 +				for (; pte_ofs < PTRS_PER_PTE && pfn < max_ram_pfn; pte++, pfn++, pte_ofs++) {
   15.71 +						if (pte_present(*pte))
   15.72 +							continue;
   15.73  						if (is_kernel_text(address))
   15.74  							set_pte(pte, pfn_pte(pfn, PAGE_KERNEL_EXEC));
   15.75  						else
   15.76 @@ -311,7 +284,8 @@ void __init one_highpage_init(struct pag
   15.77  		ClearPageReserved(page);
   15.78  		set_bit(PG_highmem, &page->flags);
   15.79  		set_page_count(page, 1);
   15.80 -		__free_page(page);
   15.81 +		if (pfn < xen_start_info.nr_pages)
   15.82 +			__free_page(page);
   15.83  		totalhigh_pages++;
   15.84  	} else
   15.85  		SetPageReserved(page);
   15.86 @@ -347,7 +321,8 @@ extern void __init remap_numa_kva(void);
   15.87  static void __init pagetable_init (void)
   15.88  {
   15.89  	unsigned long vaddr;
   15.90 -	pgd_t *pgd_base = swapper_pg_dir;
   15.91 +	pgd_t *old_pgd = (pgd_t *)xen_start_info.pt_base;
   15.92 +	pgd_t *new_pgd = swapper_pg_dir;
   15.93  
   15.94  #ifdef CONFIG_X86_PAE
   15.95  	int i;
   15.96 @@ -368,7 +343,22 @@ static void __init pagetable_init (void)
   15.97  		__PAGE_KERNEL_EXEC |= _PAGE_GLOBAL;
   15.98  	}
   15.99  
  15.100 -	kernel_physical_mapping_init(pgd_base);
  15.101 +	/*
  15.102 +	 * Switch to proper mm_init page directory. Initialise from the current
  15.103 +	 * page directory, write-protect the new page directory, then switch to
  15.104 +	 * it. We clean up by write-enabling and then freeing the old page dir.
  15.105 +	 */
  15.106 +	memcpy(new_pgd, old_pgd, PTRS_PER_PGD_NO_HV*sizeof(pgd_t));
  15.107 +	make_page_readonly(new_pgd);
  15.108 +	queue_pgd_pin(__pa(new_pgd));
  15.109 +	load_cr3(new_pgd);
  15.110 +	queue_pgd_unpin(__pa(old_pgd));
  15.111 +	__flush_tlb_all(); /* implicit flush */
  15.112 +	make_page_writable(old_pgd);
  15.113 +	flush_page_update_queue();
  15.114 +	free_bootmem(__pa(old_pgd), PAGE_SIZE);
  15.115 +
  15.116 +	kernel_physical_mapping_init(new_pgd);
  15.117  	remap_numa_kva();
  15.118  
  15.119  	/*
  15.120 @@ -376,9 +366,9 @@ static void __init pagetable_init (void)
  15.121  	 * created - mappings will be set by set_fixmap():
  15.122  	 */
  15.123  	vaddr = __fix_to_virt(__end_of_fixed_addresses - 1) & PMD_MASK;
  15.124 -	page_table_range_init(vaddr, 0, pgd_base);
  15.125 +	page_table_range_init(vaddr, 0, new_pgd);
  15.126  
  15.127 -	permanent_kmaps_init(pgd_base);
  15.128 +	permanent_kmaps_init(new_pgd);
  15.129  
  15.130  #ifdef CONFIG_X86_PAE
  15.131  	/*
  15.132 @@ -388,7 +378,7 @@ static void __init pagetable_init (void)
  15.133  	 * All user-space mappings are explicitly cleared after
  15.134  	 * SMP startup.
  15.135  	 */
  15.136 -	pgd_base[0] = pgd_base[USER_PTRS_PER_PGD];
  15.137 +	new_pgd[0] = new_pgd[USER_PTRS_PER_PGD];
  15.138  #endif
  15.139  }
  15.140  
  15.141 @@ -545,8 +535,6 @@ out:
  15.142   */
  15.143  void __init paging_init(void)
  15.144  {
  15.145 -	pgd_t *old_pgd = (pgd_t *)xen_start_info.pt_base;
  15.146 -	pgd_t *new_pgd = swapper_pg_dir;
  15.147  #ifdef CONFIG_XEN_PHYSDEV_ACCESS
  15.148  	int i;
  15.149  #endif
  15.150 @@ -559,25 +547,6 @@ void __init paging_init(void)
  15.151  
  15.152  	pagetable_init();
  15.153  
  15.154 -	/*
  15.155 -	 * Write-protect both page tables within both page tables.
  15.156 -	 * That's three ops, as the old p.t. is already protected
  15.157 -	 * within the old p.t. Then pin the new table, switch tables,
  15.158 -	 * and unprotect the old table.
  15.159 -	 */
  15.160 -	protect_pagetable(new_pgd, old_pgd, PROT_ON);
  15.161 -	protect_pagetable(new_pgd, new_pgd, PROT_ON);
  15.162 -	protect_pagetable(old_pgd, new_pgd, PROT_ON);
  15.163 -	queue_pgd_pin(__pa(new_pgd));
  15.164 -	load_cr3(new_pgd);
  15.165 -	queue_pgd_unpin(__pa(old_pgd));
  15.166 -	__flush_tlb_all(); /* implicit flush */
  15.167 -	protect_pagetable(new_pgd, old_pgd, PROT_OFF);
  15.168 -	flush_page_update_queue();
  15.169 -
  15.170 -	/* Completely detached from old tables, so free them. */
  15.171 -	free_bootmem(__pa(old_pgd), xen_start_info.nr_pt_frames << PAGE_SHIFT);
  15.172 -
  15.173  #ifdef CONFIG_X86_PAE
  15.174  	/*
  15.175  	 * We will bail out later - printk doesn't work right now so
    16.1 --- a/linux-2.6.10-rc2-xen-sparse/arch/xen/i386/mm/ioremap.c	Mon Dec 06 20:03:12 2004 +0000
    16.2 +++ b/linux-2.6.10-rc2-xen-sparse/arch/xen/i386/mm/ioremap.c	Mon Dec 06 20:37:17 2004 +0000
    16.3 @@ -11,16 +11,18 @@
    16.4  #include <linux/vmalloc.h>
    16.5  #include <linux/init.h>
    16.6  #include <linux/slab.h>
    16.7 +#include <linux/module.h>
    16.8  #include <asm/io.h>
    16.9  #include <asm/fixmap.h>
   16.10  #include <asm/cacheflush.h>
   16.11  #include <asm/tlbflush.h>
   16.12  #include <asm/pgtable.h>
   16.13 +#include <asm/pgalloc.h>
   16.14  
   16.15  #ifndef CONFIG_XEN_PHYSDEV_ACCESS
   16.16  
   16.17  void * __ioremap(unsigned long phys_addr, unsigned long size,
   16.18 -		 unsigned long flags)
   16.19 +		unsigned long flags)
   16.20  {
   16.21  	return NULL;
   16.22  }
   16.23 @@ -59,86 +61,6 @@ static inline int is_local_lowmem(unsign
   16.24  	return ((pfn < max_low_pfn) && (pfn_to_mfn(pfn) == mfn));
   16.25  }
   16.26  
   16.27 -static inline void remap_area_pte(pte_t * pte, unsigned long address, unsigned long size,
   16.28 -	unsigned long phys_addr, unsigned long flags)
   16.29 -{
   16.30 -	unsigned long end;
   16.31 -	unsigned long pfn;
   16.32 -
   16.33 -	address &= ~PMD_MASK;
   16.34 -	end = address + size;
   16.35 -	if (end > PMD_SIZE)
   16.36 -		end = PMD_SIZE;
   16.37 -	if (address >= end)
   16.38 -		BUG();
   16.39 -	pfn = phys_addr >> PAGE_SHIFT;
   16.40 -	do {
   16.41 -		if (!pte_none(*pte)) {
   16.42 -			printk("remap_area_pte: page already exists\n");
   16.43 -			BUG();
   16.44 -		}
   16.45 -		set_pte(pte, pfn_pte_ma(pfn, __pgprot(_PAGE_PRESENT | _PAGE_RW | 
   16.46 -					_PAGE_DIRTY | _PAGE_ACCESSED | flags)));
   16.47 -		address += PAGE_SIZE;
   16.48 -		pfn++;
   16.49 -		pte++;
   16.50 -	} while (address && (address < end));
   16.51 -}
   16.52 -
   16.53 -static inline int remap_area_pmd(pmd_t * pmd, unsigned long address, unsigned long size,
   16.54 -	unsigned long phys_addr, unsigned long flags)
   16.55 -{
   16.56 -	unsigned long end;
   16.57 -
   16.58 -	address &= ~PGDIR_MASK;
   16.59 -	end = address + size;
   16.60 -	if (end > PGDIR_SIZE)
   16.61 -		end = PGDIR_SIZE;
   16.62 -	phys_addr -= address;
   16.63 -	if (address >= end)
   16.64 -		BUG();
   16.65 -	do {
   16.66 -		pte_t * pte = pte_alloc_kernel(&init_mm, pmd, address);
   16.67 -		if (!pte)
   16.68 -			return -ENOMEM;
   16.69 -		remap_area_pte(pte, address, end - address, address + phys_addr, flags);
   16.70 -		address = (address + PMD_SIZE) & PMD_MASK;
   16.71 -		pmd++;
   16.72 -	} while (address && (address < end));
   16.73 -	return 0;
   16.74 -}
   16.75 -
   16.76 -static int remap_area_pages(unsigned long address, unsigned long phys_addr,
   16.77 -				 unsigned long size, unsigned long flags)
   16.78 -{
   16.79 -	int error;
   16.80 -	pgd_t * dir;
   16.81 -	unsigned long end = address + size;
   16.82 -
   16.83 -	phys_addr -= address;
   16.84 -	dir = pgd_offset(&init_mm, address);
   16.85 -	flush_cache_all();
   16.86 -	if (address >= end)
   16.87 -		BUG();
   16.88 -	spin_lock(&init_mm.page_table_lock);
   16.89 -	do {
   16.90 -		pmd_t *pmd;
   16.91 -		pmd = pmd_alloc(&init_mm, dir, address);
   16.92 -		error = -ENOMEM;
   16.93 -		if (!pmd)
   16.94 -			break;
   16.95 -		if (remap_area_pmd(pmd, address, end - address,
   16.96 -					 phys_addr + address, flags))
   16.97 -			break;
   16.98 -		error = 0;
   16.99 -		address = (address + PGDIR_SIZE) & PGDIR_MASK;
  16.100 -		dir++;
  16.101 -	} while (address && (address < end));
  16.102 -	spin_unlock(&init_mm.page_table_lock);
  16.103 -	flush_tlb_all();
  16.104 -	return error;
  16.105 -}
  16.106 -
  16.107  /*
  16.108   * Generic mapping function (not visible outside):
  16.109   */
  16.110 @@ -201,7 +123,7 @@ void __iomem * __ioremap(unsigned long p
  16.111  		return NULL;
  16.112  	area->phys_addr = phys_addr;
  16.113  	addr = (void __iomem *) area->addr;
  16.114 -	if (remap_area_pages((unsigned long) addr, phys_addr, size, flags)) {
  16.115 +	if (direct_remap_area_pages(&init_mm, (unsigned long) addr, phys_addr, size, __pgprot(_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED | flags), DOMID_IO)) {
  16.116  		vunmap((void __force *) addr);
  16.117  		return NULL;
  16.118  	}
  16.119 @@ -406,7 +328,9 @@ static inline int direct_remap_area_pmd(
  16.120  	if (address >= end)
  16.121  		BUG();
  16.122  	do {
  16.123 -		pte_t *pte = pte_alloc_map(mm, pmd, address);
  16.124 +		pte_t *pte = (mm == &init_mm) ? 
  16.125 +			pte_alloc_kernel(mm, pmd, address) :
  16.126 +			pte_alloc_map(mm, pmd, address);
  16.127  		if (!pte)
  16.128  			return -ENOMEM;
  16.129  		direct_remap_area_pte(pte, address, end - address, v);
  16.130 @@ -426,7 +350,6 @@ int __direct_remap_area_pages(struct mm_
  16.131  	unsigned long end = address + size;
  16.132  
  16.133  	dir = pgd_offset(mm, address);
  16.134 -	flush_cache_all();
  16.135  	if (address >= end)
  16.136  		BUG();
  16.137  	spin_lock(&mm->page_table_lock);
  16.138 @@ -440,7 +363,6 @@ int __direct_remap_area_pages(struct mm_
  16.139  
  16.140  	} while (address && (address < end));
  16.141  	spin_unlock(&mm->page_table_lock);
  16.142 -	flush_tlb_all();
  16.143  	return 0;
  16.144  }
  16.145  
  16.146 @@ -464,16 +386,18 @@ int direct_remap_area_pages(struct mm_st
  16.147  
  16.148  	start_address = address;
  16.149  
  16.150 -	for(i = 0; i < size; i += PAGE_SIZE) {
  16.151 +	flush_cache_all();
  16.152 +
  16.153 +	for (i = 0; i < size; i += PAGE_SIZE) {
  16.154  		if ((v - u) == MAX_DIRECTMAP_MMU_QUEUE) {
  16.155  			/* Fill in the PTE pointers. */
  16.156  			__direct_remap_area_pages(mm,
  16.157  						  start_address, 
  16.158  						  address-start_address, 
  16.159  						  w);
  16.160 -	    
  16.161 +	
  16.162  			if (HYPERVISOR_mmu_update(u, v - u, NULL) < 0)
  16.163 -				return -EFAULT;	    
  16.164 +				return -EFAULT;
  16.165  			v = w;
  16.166  			start_address = address;
  16.167  		}
  16.168 @@ -494,10 +418,14 @@ int direct_remap_area_pages(struct mm_st
  16.169  		__direct_remap_area_pages(mm,
  16.170  					  start_address, 
  16.171  					  address-start_address, 
  16.172 -					  w);	 
  16.173 +					  w);
  16.174  		if (unlikely(HYPERVISOR_mmu_update(u, v - u, NULL) < 0))
  16.175 -			return -EFAULT;	    
  16.176 +			return -EFAULT;	
  16.177  	}
  16.178 -    
  16.179 +
  16.180 +	flush_tlb_all();
  16.181 +
  16.182  	return 0;
  16.183  }
  16.184 +
  16.185 +EXPORT_SYMBOL(direct_remap_area_pages);
    17.1 --- a/linux-2.6.10-rc2-xen-sparse/arch/xen/kernel/Makefile	Mon Dec 06 20:03:12 2004 +0000
    17.2 +++ b/linux-2.6.10-rc2-xen-sparse/arch/xen/kernel/Makefile	Mon Dec 06 20:37:17 2004 +0000
    17.3 @@ -11,4 +11,4 @@ CPPFLAGS_vmlinux.lds += -U$(XENARCH)
    17.4  
    17.5  extra-y += vmlinux.lds
    17.6  
    17.7 -obj-y	:= ctrl_if.o evtchn.o fixup.o reboot.o xen_proc.o gnttab.o skbuff.o
    17.8 +obj-y	:= ctrl_if.o evtchn.o fixup.o reboot.o xen_proc.o gnttab.o skbuff.o devmem.o
    18.1 --- a/linux-2.6.10-rc2-xen-sparse/arch/xen/kernel/ctrl_if.c	Mon Dec 06 20:03:12 2004 +0000
    18.2 +++ b/linux-2.6.10-rc2-xen-sparse/arch/xen/kernel/ctrl_if.c	Mon Dec 06 20:37:17 2004 +0000
    18.3 @@ -35,6 +35,7 @@
    18.4  #include <linux/errno.h>
    18.5  #include <linux/irq.h>
    18.6  #include <linux/interrupt.h>
    18.7 +#include <linux/module.h>
    18.8  #include <asm-xen/ctrl_if.h>
    18.9  #include <asm-xen/evtchn.h>
   18.10  
   18.11 @@ -539,3 +540,10 @@ void ctrl_if_discard_responses(void)
   18.12      ctrl_if_tx_resp_cons = get_ctrl_if()->tx_resp_prod;
   18.13  }
   18.14  
   18.15 +EXPORT_SYMBOL(ctrl_if_send_message_noblock);
   18.16 +EXPORT_SYMBOL(ctrl_if_send_message_block);
   18.17 +EXPORT_SYMBOL(ctrl_if_send_message_and_get_response);
   18.18 +EXPORT_SYMBOL(ctrl_if_enqueue_space_callback);
   18.19 +EXPORT_SYMBOL(ctrl_if_send_response);
   18.20 +EXPORT_SYMBOL(ctrl_if_register_receiver);
   18.21 +EXPORT_SYMBOL(ctrl_if_unregister_receiver);
    19.1 --- a/linux-2.6.10-rc2-xen-sparse/drivers/char/mem.c	Mon Dec 06 20:03:12 2004 +0000
    19.2 +++ b/linux-2.6.10-rc2-xen-sparse/drivers/char/mem.c	Mon Dec 06 20:37:17 2004 +0000
    19.3 @@ -42,12 +42,7 @@ extern void tapechar_init(void);
    19.4   */
    19.5  static inline int uncached_access(struct file *file, unsigned long addr)
    19.6  {
    19.7 -#ifdef CONFIG_XEN
    19.8 -	if (file->f_flags & O_SYNC)
    19.9 -		return 1;
   19.10 -	/* Xen sets correct MTRR type on non-RAM for us. */
   19.11 -	return 0;
   19.12 -#elif defined(__i386__)
   19.13 +#if defined(__i386__)
   19.14  	/*
   19.15  	 * On the PPro and successors, the MTRRs are used to set
   19.16  	 * memory types for physical addresses outside main memory,
   19.17 @@ -148,7 +143,7 @@ static ssize_t do_write_mem(void *p, uns
   19.18  	return written;
   19.19  }
   19.20  
   19.21 -
   19.22 +#ifndef ARCH_HAS_DEV_MEM
   19.23  /*
   19.24   * This funcion reads the *physical* memory. The f_pos points directly to the 
   19.25   * memory location. 
   19.26 @@ -194,8 +189,9 @@ static ssize_t write_mem(struct file * f
   19.27  		return -EFAULT;
   19.28  	return do_write_mem(__va(p), p, buf, count, ppos);
   19.29  }
   19.30 +#endif
   19.31  
   19.32 -static int mmap_mem(struct file * file, struct vm_area_struct * vma)
   19.33 +static int mmap_kmem(struct file * file, struct vm_area_struct * vma)
   19.34  {
   19.35  #ifdef pgprot_noncached
   19.36  	unsigned long offset = vma->vm_pgoff << PAGE_SHIFT;
   19.37 @@ -206,22 +202,18 @@ static int mmap_mem(struct file * file, 
   19.38  		vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
   19.39  #endif
   19.40  
   19.41 -#if defined(CONFIG_XEN)
   19.42 -	if (io_remap_page_range(vma,
   19.43 -				vma->vm_start,
   19.44 -				vma->vm_pgoff << PAGE_SHIFT,
   19.45 -				vma->vm_end-vma->vm_start,
   19.46 -				vma->vm_page_prot))
   19.47 +	/* Don't try to swap out physical pages.. */
   19.48 +	vma->vm_flags |= VM_RESERVED;
   19.49 +
   19.50 +	/*
   19.51 +	 * Don't dump addresses that are not real memory to a core file.
   19.52 +	 */
   19.53 +	if (uncached)
   19.54 +		vma->vm_flags |= VM_IO;
   19.55 +
   19.56 +	if (remap_page_range(vma, vma->vm_start, offset, vma->vm_end-vma->vm_start,
   19.57 +			     vma->vm_page_prot))
   19.58  		return -EAGAIN;
   19.59 -#else
   19.60 -	/* Remap-pfn-range will mark the range VM_IO and VM_RESERVED */
   19.61 -	if (remap_pfn_range(vma,
   19.62 -			    vma->vm_start,
   19.63 -			    vma->vm_pgoff,
   19.64 -			    vma->vm_end-vma->vm_start,
   19.65 -			    vma->vm_page_prot))
   19.66 -		return -EAGAIN;
   19.67 -#endif
   19.68  	return 0;
   19.69  }
   19.70  
   19.71 @@ -581,7 +573,7 @@ static int open_port(struct inode * inod
   19.72  	return capable(CAP_SYS_RAWIO) ? 0 : -EPERM;
   19.73  }
   19.74  
   19.75 -#define mmap_kmem	mmap_mem
   19.76 +#define mmap_mem	mmap_kmem
   19.77  #define zero_lseek	null_lseek
   19.78  #define full_lseek      null_lseek
   19.79  #define write_zero	write_null
   19.80 @@ -589,6 +581,7 @@ static int open_port(struct inode * inod
   19.81  #define open_mem	open_port
   19.82  #define open_kmem	open_mem
   19.83  
   19.84 +#ifndef ARCH_HAS_DEV_MEM
   19.85  static struct file_operations mem_fops = {
   19.86  	.llseek		= memory_lseek,
   19.87  	.read		= read_mem,
   19.88 @@ -596,6 +589,9 @@ static struct file_operations mem_fops =
   19.89  	.mmap		= mmap_mem,
   19.90  	.open		= open_mem,
   19.91  };
   19.92 +#else
   19.93 +extern struct file_operations mem_fops;
   19.94 +#endif
   19.95  
   19.96  static struct file_operations kmem_fops = {
   19.97  	.llseek		= memory_lseek,
    20.1 --- a/linux-2.6.10-rc2-xen-sparse/drivers/xen/Makefile	Mon Dec 06 20:03:12 2004 +0000
    20.2 +++ b/linux-2.6.10-rc2-xen-sparse/drivers/xen/Makefile	Mon Dec 06 20:37:17 2004 +0000
    20.3 @@ -2,9 +2,9 @@
    20.4  
    20.5  obj-y	+= console/
    20.6  obj-y	+= evtchn/
    20.7 -obj-y	+= privcmd/
    20.8 -obj-y   += balloon/
    20.9 +obj-y	+= balloon/
   20.10  
   20.11 +obj-$(CONFIG_XEN_PRIVILEGED_GUEST)	+= privcmd/
   20.12  obj-$(CONFIG_XEN_BLKDEV_BACKEND)	+= blkback/
   20.13  obj-$(CONFIG_XEN_NETDEV_BACKEND)	+= netback/
   20.14  obj-$(CONFIG_XEN_BLKDEV_FRONTEND)	+= blkfront/
    21.1 --- a/linux-2.6.10-rc2-xen-sparse/drivers/xen/balloon/balloon.c	Mon Dec 06 20:03:12 2004 +0000
    21.2 +++ b/linux-2.6.10-rc2-xen-sparse/drivers/xen/balloon/balloon.c	Mon Dec 06 20:37:17 2004 +0000
    21.3 @@ -4,6 +4,7 @@
    21.4   * Xen balloon driver - enables returning/claiming memory to/from Xen.
    21.5   *
    21.6   * Copyright (c) 2003, B Dragovic
    21.7 + * Copyright (c) 2003-2004, M Williamson, K Fraser
    21.8   * 
    21.9   * This file may be distributed separately from the Linux kernel, or
   21.10   * incorporated into other software packages, subject to the following license:
   21.11 @@ -28,8 +29,8 @@
   21.12   */
   21.13  
   21.14  #include <linux/config.h>
   21.15 +#include <linux/kernel.h>
   21.16  #include <linux/module.h>
   21.17 -#include <linux/kernel.h>
   21.18  #include <linux/sched.h>
   21.19  #include <linux/errno.h>
   21.20  #include <linux/mm.h>
   21.21 @@ -42,25 +43,39 @@
   21.22  #include <asm-xen/xen_proc.h>
   21.23  #include <asm-xen/hypervisor.h>
   21.24  #include <asm-xen/ctrl_if.h>
   21.25 +#include <asm-xen/balloon.h>
   21.26  #include <asm/pgalloc.h>
   21.27  #include <asm/pgtable.h>
   21.28  #include <asm/uaccess.h>
   21.29  #include <asm/tlb.h>
   21.30  #include <linux/list.h>
   21.31  
   21.32 -/* USER DEFINES -- THESE SHOULD BE COPIED TO USER-SPACE TOOLS */
   21.33 -#define USER_INFLATE_BALLOON  1   /* return mem to hypervisor */
   21.34 -#define USER_DEFLATE_BALLOON  2   /* claim mem from hypervisor */
   21.35 -typedef struct user_balloon_op {
   21.36 -    unsigned int  op;
   21.37 -    unsigned long size;
   21.38 -} user_balloon_op_t;
   21.39 -/* END OF USER DEFINE */
   21.40 -
   21.41  static struct proc_dir_entry *balloon_pde;
   21.42  
   21.43 -unsigned long credit;
   21.44 -static unsigned long current_pages, most_seen_pages;
   21.45 +static DECLARE_MUTEX(balloon_mutex);
   21.46 +spinlock_t balloon_lock = SPIN_LOCK_UNLOCKED;
   21.47 +
   21.48 +/* We aim for 'current allocation' == 'target allocation'. */
   21.49 +static unsigned long current_pages;
   21.50 +static unsigned long target_pages;
   21.51 +
   21.52 +/* We may hit the hard limit in Xen. If we do then we remember it. */
   21.53 +static unsigned long hard_limit;
   21.54 +
   21.55 +/*
   21.56 + * Drivers may alter the memory reservation independently, but they must
   21.57 + * inform the balloon driver so that we can avoid hitting the hard limit.
   21.58 + */
   21.59 +static unsigned long driver_pages;
   21.60 +
   21.61 +/* List of ballooned pages, threaded through the mem_map array. */
   21.62 +static LIST_HEAD(ballooned_pages);
   21.63 +static unsigned long balloon_low, balloon_high;
   21.64 +
   21.65 +/* Main work function, always executed in process context. */
   21.66 +static void balloon_process(void *unused);
   21.67 +static DECLARE_WORK(balloon_worker, balloon_process, NULL);
   21.68 +static struct timer_list balloon_timer;
   21.69  
   21.70  #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
   21.71  /* Use the private and mapping fields of struct page as a list. */
   21.72 @@ -76,452 +91,223 @@ static unsigned long current_pages, most
   21.73  #define LIST_TO_PAGE(l) ( list_entry(l, struct page, list) )
   21.74  #define UNLIST_PAGE(p)  ( list_del(&p->list) )
   21.75  #define pte_offset_kernel pte_offset
   21.76 +#define subsys_initcall(_fn) __initcall(_fn)
   21.77  #endif
   21.78  
   21.79 -/* List of ballooned pages, threaded through the mem_map array. */
   21.80 -LIST_HEAD(ballooned_pages);
   21.81 +#define IPRINTK(fmt, args...) \
   21.82 +    printk(KERN_INFO "xen_mem: " fmt, ##args)
   21.83 +#define WPRINTK(fmt, args...) \
   21.84 +    printk(KERN_WARNING "xen_mem: " fmt, ##args)
   21.85  
   21.86 -/** add_ballooned_page - remember we've ballooned a pfn */
   21.87 -void add_ballooned_page(unsigned long pfn)
   21.88 +/* balloon_append: add the given page to the balloon. */
   21.89 +static void balloon_append(struct page *page)
   21.90  {
   21.91 -    struct page *p = mem_map + pfn;
   21.92 -
   21.93 -    list_add(PAGE_TO_LIST(p), &ballooned_pages);
   21.94 +    /* Low memory is re-populated first, so highmem pages go at list tail. */
   21.95 +    if ( PageHighMem(page) )
   21.96 +    {
   21.97 +        list_add_tail(PAGE_TO_LIST(page), &ballooned_pages);
   21.98 +        balloon_high++;
   21.99 +    }
  21.100 +    else
  21.101 +    {
  21.102 +        list_add(PAGE_TO_LIST(page), &ballooned_pages);
  21.103 +        balloon_low++;
  21.104 +    }
  21.105  }
  21.106  
  21.107 -/* rem_ballooned_page - recall a ballooned page and remove from list. */
  21.108 -struct page *rem_ballooned_page(void)
  21.109 +/* balloon_retrieve: rescue a page from the balloon, if it is not empty. */
  21.110 +static struct page *balloon_retrieve(void)
  21.111  {
  21.112 -    if(!list_empty(&ballooned_pages))
  21.113 -    {
  21.114 -        struct page *ret;
  21.115 +    struct page *page;
  21.116 +
  21.117 +    if ( list_empty(&ballooned_pages) )
  21.118 +        return NULL;
  21.119  
  21.120 -        ret = LIST_TO_PAGE(ballooned_pages.next);
  21.121 -	UNLIST_PAGE(ret);
  21.122 +    page = LIST_TO_PAGE(ballooned_pages.next);
  21.123 +    UNLIST_PAGE(page);
  21.124  
  21.125 -        return ret;
  21.126 -    }
  21.127 +    if ( PageHighMem(page) )
  21.128 +        balloon_high--;
  21.129      else
  21.130 -        return NULL;
  21.131 +        balloon_low--;
  21.132 +
  21.133 +    return page;
  21.134  }
  21.135  
  21.136  static inline pte_t *get_ptep(unsigned long addr)
  21.137  {
  21.138 -    pgd_t *pgd; pmd_t *pmd; pte_t *ptep;
  21.139 +    pgd_t *pgd;
  21.140 +    pmd_t *pmd;
  21.141 +
  21.142      pgd = pgd_offset_k(addr);
  21.143 -
  21.144      if ( pgd_none(*pgd) || pgd_bad(*pgd) ) BUG();
  21.145  
  21.146      pmd = pmd_offset(pgd, addr);
  21.147      if ( pmd_none(*pmd) || pmd_bad(*pmd) ) BUG();
  21.148  
  21.149 -    ptep = pte_offset_kernel(pmd, addr);
  21.150 -
  21.151 -    return ptep;
  21.152 +    return pte_offset_kernel(pmd, addr);
  21.153  }
  21.154  
  21.155 -/* Main function for relinquishing memory. */
  21.156 -static unsigned long inflate_balloon(unsigned long num_pages)
  21.157 -
  21.158 +static void balloon_alarm(unsigned long unused)
  21.159  {
  21.160 -    unsigned long *parray;
  21.161 -    unsigned long *currp;
  21.162 -    unsigned long curraddr;
  21.163 -    unsigned long ret = 0;
  21.164 -    unsigned long i, j;
  21.165 -
  21.166 -    parray = (unsigned long *)vmalloc(num_pages * sizeof(unsigned long));
  21.167 -    if ( parray == NULL )
  21.168 -    {
  21.169 -        printk(KERN_ERR "inflate_balloon: Unable to vmalloc parray\n");
  21.170 -        return -EFAULT;
  21.171 -    }
  21.172 -
  21.173 -    currp = parray;
  21.174 -
  21.175 -    for ( i = 0; i < num_pages; i++, currp++ )
  21.176 -    {
  21.177 -        struct page *page = alloc_page(GFP_HIGHUSER);
  21.178 -        unsigned long pfn = page - mem_map;
  21.179 -
  21.180 -        /* If allocation fails then free all reserved pages. */
  21.181 -        if ( page == NULL )
  21.182 -        {
  21.183 -            printk(KERN_ERR "Unable to inflate balloon by %ld, only"
  21.184 -                   " %ld pages free.", num_pages, i);
  21.185 -            currp = parray;
  21.186 -            for ( j = 0; j < i; j++, currp++ )
  21.187 -                __free_page((struct page *) (mem_map + *currp));
  21.188 -
  21.189 -            ret = -EFAULT;
  21.190 -            goto cleanup;
  21.191 -        }
  21.192 -
  21.193 -        *currp = pfn;
  21.194 -    }
  21.195 -
  21.196 +    schedule_work(&balloon_worker);
  21.197 +}
  21.198  
  21.199 -    for ( i = 0, currp = parray; i < num_pages; i++, currp++ )
  21.200 -    {
  21.201 -        unsigned long mfn = phys_to_machine_mapping[*currp];
  21.202 -        curraddr = (unsigned long)page_address(mem_map + *currp);
  21.203 -        /* Blow away page contents for security, and also p.t. ref if any. */
  21.204 -        if ( curraddr != 0 )
  21.205 -        {
  21.206 -            scrub_pages(curraddr, 1);
  21.207 -            queue_l1_entry_update(get_ptep(curraddr), 0);
  21.208 -        }
  21.209 -#ifdef CONFIG_XEN_SCRUB_PAGES
  21.210 -        else
  21.211 -        {
  21.212 -            void *p = kmap(&mem_map[*currp]);
  21.213 -            scrub_pages(p, 1);
  21.214 -            kunmap(&mem_map[*currp]);
  21.215 -        }
  21.216 -#endif
  21.217 -
  21.218 -        add_ballooned_page(*currp);
  21.219 -
  21.220 -        phys_to_machine_mapping[*currp] = INVALID_P2M_ENTRY;
  21.221 -        *currp = mfn;
  21.222 -    }
  21.223 -
  21.224 -    /* Flush updates through and flush the TLB. */
  21.225 -    xen_tlb_flush();
  21.226 -
  21.227 -    ret = HYPERVISOR_dom_mem_op(MEMOP_decrease_reservation, 
  21.228 -                                parray, num_pages, 0);
  21.229 -    if ( unlikely(ret != num_pages) )
  21.230 -    {
  21.231 -        printk(KERN_ERR "Unable to inflate balloon, error %lx\n", ret);
  21.232 -        goto cleanup;
  21.233 -    }
  21.234 -
  21.235 -    credit += num_pages;
  21.236 -    ret = num_pages;
  21.237 -
  21.238 - cleanup:
  21.239 -    vfree(parray);
  21.240 -
  21.241 -    return ret;
  21.242 +static unsigned long current_target(void)
  21.243 +{
  21.244 +    unsigned long target = min(target_pages, hard_limit);
  21.245 +    if ( target > (current_pages + balloon_low + balloon_high) )
  21.246 +        target = current_pages + balloon_low + balloon_high;
  21.247 +    return target;
  21.248  }
  21.249  
  21.250  /*
  21.251 - * Install new mem pages obtained by deflate_balloon. function walks 
  21.252 - * phys->machine mapping table looking for DEAD entries and populates
  21.253 - * them.
  21.254 + * We avoid multiple worker processes conflicting via the balloon mutex.
  21.255 + * We may of course race updates of the target counts (which are protected
  21.256 + * by the balloon lock), or with changes to the Xen hard limit, but we will
  21.257 + * recover from these in time.
  21.258   */
  21.259 -static unsigned long process_returned_pages(unsigned long * parray, 
  21.260 -                                       unsigned long num)
  21.261 +static void balloon_process(void *unused)
  21.262  {
  21.263 -    /* currently, this function is rather simplistic as 
  21.264 -     * it is assumed that domain reclaims only number of 
  21.265 -     * pages previously released. this is to change soon
  21.266 -     * and the code to extend page tables etc. will be 
  21.267 -     * incorporated here.
  21.268 -     */
  21.269 -     
  21.270 -    unsigned long * curr = parray;
  21.271 -    unsigned long num_installed;
  21.272 +    unsigned long *mfn_list, pfn, i, flags;
  21.273 +    struct page   *page;
  21.274 +    long           credit, debt, rc;
  21.275 +    void          *v;
  21.276 +
  21.277 +    down(&balloon_mutex);
  21.278 +
  21.279 + retry:
  21.280 +    mfn_list = NULL;
  21.281  
  21.282 -    struct page *page;
  21.283 -
  21.284 -    num_installed = 0;
  21.285 -    while ( (page = rem_ballooned_page()) != NULL )
  21.286 +    if ( (credit = current_target() - current_pages) > 0 )
  21.287      {
  21.288 -        unsigned long pfn;
  21.289 +        mfn_list = (unsigned long *)vmalloc(credit * sizeof(*mfn_list));
  21.290 +        if ( mfn_list == NULL )
  21.291 +            goto out;
  21.292  
  21.293 -        if ( num_installed == num )
  21.294 -            break;
  21.295 -
  21.296 -        pfn = page - mem_map;
  21.297 -
  21.298 -        if(phys_to_machine_mapping[pfn] != INVALID_P2M_ENTRY)
  21.299 +        balloon_lock(flags);
  21.300 +        rc = HYPERVISOR_dom_mem_op(
  21.301 +            MEMOP_increase_reservation, mfn_list, credit, 0);
  21.302 +        balloon_unlock(flags);
  21.303 +        if ( rc < credit )
  21.304          {
  21.305 -            printk("BUG: Tried to unballoon existing page!");
  21.306 -            BUG();
  21.307 +            /* We hit the Xen hard limit: reprobe. */
  21.308 +            if ( HYPERVISOR_dom_mem_op(
  21.309 +                MEMOP_decrease_reservation, mfn_list, rc, 0) != rc )
  21.310 +                BUG();
  21.311 +            hard_limit = current_pages + rc - driver_pages;
  21.312 +            vfree(mfn_list);
  21.313 +            goto retry;
  21.314          }
  21.315  
  21.316 -        phys_to_machine_mapping[pfn] = *curr;
  21.317 -        queue_machphys_update(*curr, pfn);
  21.318 -        if (pfn<max_low_pfn)
  21.319 -            queue_l1_entry_update(
  21.320 -                get_ptep((unsigned long)__va(pfn << PAGE_SHIFT)),
  21.321 -                ((*curr) << PAGE_SHIFT) | pgprot_val(PAGE_KERNEL));
  21.322 -        
  21.323 -        __free_page(mem_map + pfn);
  21.324 +        for ( i = 0; i < credit; i++ )
  21.325 +        {
  21.326 +            if ( (page = balloon_retrieve()) == NULL )
  21.327 +                BUG();
  21.328 +
  21.329 +            pfn = page - mem_map;
  21.330 +            if ( phys_to_machine_mapping[pfn] != INVALID_P2M_ENTRY )
  21.331 +                BUG();
  21.332  
  21.333 -        curr++;
  21.334 -        num_installed++;
  21.335 +            /* Update P->M and M->P tables. */
  21.336 +            phys_to_machine_mapping[pfn] = mfn_list[i];
  21.337 +            queue_machphys_update(mfn_list[i], pfn);
  21.338 +            
  21.339 +            /* Link back into the page tables if it's not a highmem page. */
  21.340 +            if ( pfn < max_low_pfn )
  21.341 +                queue_l1_entry_update(
  21.342 +                    get_ptep((unsigned long)__va(pfn << PAGE_SHIFT)),
  21.343 +                    (mfn_list[i] << PAGE_SHIFT) | pgprot_val(PAGE_KERNEL));
  21.344 +            
  21.345 +            /* Finally, relinquish the memory back to the system allocator. */
  21.346 +            ClearPageReserved(page);
  21.347 +            set_page_count(page, 1);
  21.348 +            __free_page(page);
  21.349 +        }
  21.350 +
  21.351 +        current_pages += credit;
  21.352      }
  21.353 +    else if ( credit < 0 )
  21.354 +    {
  21.355 +        debt = -credit;
  21.356 +
  21.357 +        mfn_list = (unsigned long *)vmalloc(debt * sizeof(*mfn_list));
  21.358 +        if ( mfn_list == NULL )
  21.359 +            goto out;
  21.360  
  21.361 -    return num_installed;
  21.362 -}
  21.363 +        for ( i = 0; i < debt; i++ )
  21.364 +        {
  21.365 +            if ( (page = alloc_page(GFP_HIGHUSER)) == NULL )
  21.366 +            {
  21.367 +                debt = i;
  21.368 +                break;
  21.369 +            }
  21.370 +
  21.371 +            pfn = page - mem_map;
  21.372 +            mfn_list[i] = phys_to_machine_mapping[pfn];
  21.373 +            phys_to_machine_mapping[pfn] = INVALID_P2M_ENTRY;
  21.374  
  21.375 -unsigned long deflate_balloon(unsigned long num_pages)
  21.376 -{
  21.377 -    unsigned long ret;
  21.378 -    unsigned long * parray;
  21.379 +            if ( !PageHighMem(page) )
  21.380 +            {
  21.381 +                v = phys_to_virt((page - mem_map) << PAGE_SHIFT);
  21.382 +                scrub_pages(v, 1);
  21.383 +                queue_l1_entry_update(get_ptep((unsigned long)v), 0);
  21.384 +            }
  21.385 +#ifdef CONFIG_XEN_SCRUB_PAGES
  21.386 +            else
  21.387 +            {
  21.388 +                v = kmap(page);
  21.389 +                scrub_pages(v, 1);
  21.390 +                kunmap(page);
  21.391 +            }
  21.392 +#endif            
  21.393  
  21.394 -    if ( num_pages > credit )
  21.395 -    {
  21.396 -        printk(KERN_ERR "deflate_balloon: %lu pages > %lu credit.\n",
  21.397 -               num_pages, credit);
  21.398 -        return -EAGAIN;
  21.399 +            balloon_append(page);
  21.400 +        }
  21.401 +
  21.402 +        /* Flush updates through and flush the TLB. */
  21.403 +        xen_tlb_flush();
  21.404 +
  21.405 +        if ( HYPERVISOR_dom_mem_op(
  21.406 +            MEMOP_decrease_reservation, mfn_list, debt, 0) != debt )
  21.407 +            BUG();
  21.408 +
  21.409 +        current_pages -= debt;
  21.410      }
  21.411  
  21.412 -    parray = (unsigned long *)vmalloc(num_pages * sizeof(unsigned long));
  21.413 -    if ( parray == NULL )
  21.414 -    {
  21.415 -        printk(KERN_ERR "deflate_balloon: Unable to vmalloc parray\n");
  21.416 -        return 0;
  21.417 -    }
  21.418 -
  21.419 -    ret = HYPERVISOR_dom_mem_op(MEMOP_increase_reservation, 
  21.420 -                                parray, num_pages, 0);
  21.421 -    if ( unlikely(ret != num_pages) )
  21.422 -    {
  21.423 -        printk(KERN_ERR "deflate_balloon: xen increase_reservation err %lx\n",
  21.424 -               ret);
  21.425 -        goto cleanup;
  21.426 -    }
  21.427 + out:
  21.428 +    if ( mfn_list != NULL )
  21.429 +        vfree(mfn_list);
  21.430  
  21.431 -    if ( (ret = process_returned_pages(parray, num_pages)) < num_pages )
  21.432 -    {
  21.433 -        printk(KERN_WARNING
  21.434 -               "deflate_balloon: restored only %lx of %lx pages.\n",
  21.435 -           ret, num_pages);
  21.436 -        goto cleanup;
  21.437 -    }
  21.438 +    /* Schedule more work if there is some still to be done. */
  21.439 +    if ( current_target() != current_pages )
  21.440 +        mod_timer(&balloon_timer, jiffies + HZ);
  21.441  
  21.442 -    ret = num_pages;
  21.443 -    credit -= num_pages;
  21.444 -
  21.445 - cleanup:
  21.446 -    vfree(parray);
  21.447 -
  21.448 -    return ret;
  21.449 +    up(&balloon_mutex);
  21.450  }
  21.451  
  21.452 -#define PAGE_TO_MB_SHIFT 8
  21.453 -
  21.454 -/*
  21.455 - * pagetable_extend() mimics pagetable_init() from arch/xen/mm/init.c 
  21.456 - * The loops do go through all of low memory (ZONE_NORMAL).  The
  21.457 - * old pages have _PAGE_PRESENT set and so get skipped.
  21.458 - * If low memory is not full, the new pages are used to fill it, going
  21.459 - * from cur_low_pfn to low_pfn.   high memory is not direct mapped so
  21.460 - * no extension is needed for new high memory.
  21.461 - */
  21.462 -
  21.463 -static void pagetable_extend (int cur_low_pfn, int newpages)
  21.464 -{
  21.465 -    unsigned long vaddr, end;
  21.466 -    pgd_t *kpgd, *pgd, *pgd_base;
  21.467 -    int i, j, k;
  21.468 -    pmd_t *kpmd, *pmd;
  21.469 -    pte_t *kpte, *pte, *pte_base;
  21.470 -    int low_pfn = min(cur_low_pfn+newpages,(int)max_low_pfn);
  21.471 -
  21.472 -    /*
  21.473 -     * This can be zero as well - no problem, in that case we exit
  21.474 -     * the loops anyway due to the PTRS_PER_* conditions.
  21.475 -     */
  21.476 -    end = (unsigned long)__va(low_pfn*PAGE_SIZE);
  21.477 -
  21.478 -    pgd_base = init_mm.pgd;
  21.479 -#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
  21.480 -    i = pgd_index(PAGE_OFFSET);
  21.481 -#else
  21.482 -    i = __pgd_offset(PAGE_OFFSET);
  21.483 -#endif
  21.484 -    pgd = pgd_base + i;
  21.485 -
  21.486 -    for (; i < PTRS_PER_PGD; pgd++, i++) {
  21.487 -        vaddr = i*PGDIR_SIZE;
  21.488 -        if (end && (vaddr >= end))
  21.489 -            break;
  21.490 -        pmd = (pmd_t *)pgd;
  21.491 -        for (j = 0; j < PTRS_PER_PMD; pmd++, j++) {
  21.492 -            vaddr = i*PGDIR_SIZE + j*PMD_SIZE;
  21.493 -            if (end && (vaddr >= end))
  21.494 -                break;
  21.495 -
  21.496 -            /* Filled in for us already? */
  21.497 -            if ( pmd_val(*pmd) & _PAGE_PRESENT )
  21.498 -                continue;
  21.499 -
  21.500 -            pte_base = pte = (pte_t *) __get_free_page(GFP_KERNEL);
  21.501 -
  21.502 -            for (k = 0; k < PTRS_PER_PTE; pte++, k++) {
  21.503 -                vaddr = i*PGDIR_SIZE + j*PMD_SIZE + k*PAGE_SIZE;
  21.504 -                if (end && (vaddr >= end))
  21.505 -                    break;
  21.506 -#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
  21.507 -                *pte = mk_pte(virt_to_page(vaddr), PAGE_KERNEL);
  21.508 -#else
  21.509 -		*pte = mk_pte_phys(__pa(vaddr), PAGE_KERNEL);
  21.510 -#endif
  21.511 -            }
  21.512 -            kpgd = pgd_offset_k((unsigned long)pte_base);
  21.513 -            kpmd = pmd_offset(kpgd, (unsigned long)pte_base);
  21.514 -            kpte = pte_offset_kernel(kpmd, (unsigned long)pte_base);
  21.515 -            queue_l1_entry_update(kpte,
  21.516 -                                  (*(unsigned long *)kpte)&~_PAGE_RW);
  21.517 -            set_pmd(pmd, __pmd(_KERNPG_TABLE + __pa(pte_base)));
  21.518 -            XEN_flush_page_update_queue();
  21.519 -        }
  21.520 -    }
  21.521 -}
  21.522 -
  21.523 -/*
  21.524 - * claim_new_pages() asks xen to increase this domain's memory  reservation
  21.525 - * and return a list of the new pages of memory.  This new pages are
  21.526 - * added to the free list of the memory manager.
  21.527 - *
  21.528 - * Available RAM does not normally change while Linux runs.  To make this work,
  21.529 - * the linux mem= boottime command line param must say how big memory could
  21.530 - * possibly grow.  Then setup_arch() in arch/xen/kernel/setup.c
  21.531 - * sets max_pfn, max_low_pfn and the zones according to
  21.532 - * this max memory size.   The page tables themselves can only be
  21.533 - * extended after xen has assigned new pages to this domain.
  21.534 - */
  21.535 -
  21.536 -static unsigned long
  21.537 -claim_new_pages(unsigned long num_pages)
  21.538 +/* Resets the Xen limit, sets new target, and kicks off processing. */
  21.539 +static void set_new_target(unsigned long target)
  21.540  {
  21.541 -    unsigned long new_page_cnt, pfn;
  21.542 -    unsigned long * parray, *curr;
  21.543 -
  21.544 -    if (most_seen_pages+num_pages> max_pfn)
  21.545 -        num_pages = max_pfn-most_seen_pages;
  21.546 -    if (num_pages==0) return -EINVAL;
  21.547 -
  21.548 -    parray = (unsigned long *)vmalloc(num_pages * sizeof(unsigned long));
  21.549 -    if ( parray == NULL )
  21.550 -    {
  21.551 -        printk(KERN_ERR "claim_new_pages: Unable to vmalloc parray\n");
  21.552 -        return 0;
  21.553 -    }
  21.554 -
  21.555 -    new_page_cnt = HYPERVISOR_dom_mem_op(MEMOP_increase_reservation, 
  21.556 -                                parray, num_pages, 0);
  21.557 -    if ( new_page_cnt != num_pages )
  21.558 -    {
  21.559 -        printk(KERN_WARNING
  21.560 -            "claim_new_pages: xen granted only %lu of %lu requested pages\n",
  21.561 -            new_page_cnt, num_pages);
  21.562 -
  21.563 -        /* 
  21.564 -         * Avoid xen lockup when user forgot to setdomainmaxmem. Xen
  21.565 -         * usually can dribble out a few pages and then hangs.
  21.566 -         */
  21.567 -        if ( new_page_cnt < 1000 )
  21.568 -        {
  21.569 -            printk(KERN_WARNING "Remember to use setdomainmaxmem\n");
  21.570 -            HYPERVISOR_dom_mem_op(MEMOP_decrease_reservation, 
  21.571 -                                parray, new_page_cnt, 0);
  21.572 -            return -EFAULT;
  21.573 -        }
  21.574 -    }
  21.575 -    memcpy(phys_to_machine_mapping+most_seen_pages, parray,
  21.576 -           new_page_cnt * sizeof(unsigned long));
  21.577 -
  21.578 -    pagetable_extend(most_seen_pages,new_page_cnt);
  21.579 -
  21.580 -    for ( pfn = most_seen_pages, curr = parray;
  21.581 -          pfn < most_seen_pages+new_page_cnt;
  21.582 -          pfn++, curr++ )
  21.583 -    {
  21.584 -        struct page *page = mem_map + pfn;
  21.585 -
  21.586 -#ifndef CONFIG_HIGHMEM
  21.587 -        if ( pfn>=max_low_pfn )
  21.588 -        {
  21.589 -            printk(KERN_WARNING "Warning only %ldMB will be used.\n",
  21.590 -               pfn>>PAGE_TO_MB_SHIFT);
  21.591 -            printk(KERN_WARNING "Use a HIGHMEM enabled kernel.\n");
  21.592 -            break;
  21.593 -        }
  21.594 -#endif
  21.595 -        queue_machphys_update(*curr, pfn);
  21.596 -        if ( pfn < max_low_pfn )
  21.597 -            queue_l1_entry_update(
  21.598 -                get_ptep((unsigned long)__va(pfn << PAGE_SHIFT)),
  21.599 -                ((*curr) << PAGE_SHIFT) | pgprot_val(PAGE_KERNEL));
  21.600 -        
  21.601 -        XEN_flush_page_update_queue();
  21.602 -        
  21.603 -        /* this next bit mimics arch/xen/mm/init.c:one_highpage_init() */
  21.604 -        ClearPageReserved(page);
  21.605 -        if ( pfn >= max_low_pfn )
  21.606 -            set_bit(PG_highmem, &page->flags);
  21.607 -        set_page_count(page, 1);
  21.608 -        __free_page(page);
  21.609 -    }
  21.610 -
  21.611 -    vfree(parray);
  21.612 -
  21.613 -    return new_page_cnt;
  21.614 +    /* No need for lock. Not read-modify-write updates. */
  21.615 +    hard_limit   = ~0UL;
  21.616 +    target_pages = target;
  21.617 +    schedule_work(&balloon_worker);
  21.618  }
  21.619  
  21.620 -
  21.621 -static int balloon_try_target(int target)
  21.622 -{
  21.623 -    int change, reclaim;
  21.624 -
  21.625 -    if ( target < current_pages )
  21.626 -    {
  21.627 -        int change = inflate_balloon(current_pages-target);
  21.628 -        if ( change <= 0 )
  21.629 -            return change;
  21.630 -
  21.631 -        current_pages -= change;
  21.632 -        printk(KERN_INFO "Relinquish %dMB to xen. Domain now has %luMB\n",
  21.633 -            change>>PAGE_TO_MB_SHIFT, current_pages>>PAGE_TO_MB_SHIFT);
  21.634 -    }
  21.635 -    else if ( target > current_pages )
  21.636 -    {
  21.637 -        reclaim = min((unsigned long)target,most_seen_pages) - current_pages;
  21.638 -
  21.639 -        if ( reclaim )
  21.640 -        {
  21.641 -            change = deflate_balloon( reclaim );
  21.642 -            if ( change <= 0 )
  21.643 -                return change;
  21.644 -            current_pages += change;
  21.645 -            printk(KERN_INFO "Reclaim %dMB from xen. Domain now has %luMB\n",
  21.646 -                change>>PAGE_TO_MB_SHIFT, current_pages>>PAGE_TO_MB_SHIFT);
  21.647 -        }
  21.648 -
  21.649 -        if ( most_seen_pages < target )
  21.650 -        {
  21.651 -            int growth = claim_new_pages(target-most_seen_pages);
  21.652 -            if ( growth <= 0 )
  21.653 -                return growth;
  21.654 -            most_seen_pages += growth;
  21.655 -            current_pages += growth;
  21.656 -            printk(KERN_INFO "Granted %dMB new mem. Dom now has %luMB\n",
  21.657 -                growth>>PAGE_TO_MB_SHIFT, current_pages>>PAGE_TO_MB_SHIFT);
  21.658 -        }
  21.659 -    }
  21.660 -
  21.661 -    return 1;
  21.662 -}
  21.663 -
  21.664 -
  21.665  static void balloon_ctrlif_rx(ctrl_msg_t *msg, unsigned long id)
  21.666  {
  21.667      switch ( msg->subtype )
  21.668      {
  21.669      case CMSG_MEM_REQUEST_SET:
  21.670 +    {
  21.671 +        mem_request_t *req = (mem_request_t *)&msg->msg[0];
  21.672          if ( msg->length != sizeof(mem_request_t) )
  21.673              goto parse_error;
  21.674 -        {
  21.675 -            mem_request_t *req = (mem_request_t *)&msg->msg[0];
  21.676 -            req->status = balloon_try_target(req->target);
  21.677 -        }
  21.678 -        break;        
  21.679 +        set_new_target(req->target);
  21.680 +        req->status = 0;
  21.681 +    }
  21.682 +    break;        
  21.683      default:
  21.684          goto parse_error;
  21.685      }
  21.686 @@ -534,158 +320,122 @@ static void balloon_ctrlif_rx(ctrl_msg_t
  21.687      ctrl_if_send_response(msg);
  21.688  }
  21.689  
  21.690 -#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
  21.691 -typedef size_t count_t;
  21.692 -#else
  21.693 -typedef u_long count_t;
  21.694 -#endif
  21.695 -
  21.696 -static int do_balloon_write(const char *buffer, count_t count)
  21.697 +static int balloon_write(struct file *file, const char __user *buffer,
  21.698 +                         unsigned long count, void *data)
  21.699  {
  21.700      char memstring[64], *endchar;
  21.701 -    int len, i;
  21.702 -    unsigned long target;
  21.703 -    unsigned long long targetbytes;
  21.704 +    unsigned long long target_bytes;
  21.705  
  21.706 -    /* Only admin can play with the balloon :) */
  21.707      if ( !capable(CAP_SYS_ADMIN) )
  21.708          return -EPERM;
  21.709  
  21.710 +    if ( count <= 1 )
  21.711 +        return -EBADMSG; /* runt */
  21.712      if ( count > sizeof(memstring) )
  21.713 -        return -EFBIG;
  21.714 -
  21.715 -    len = strnlen_user(buffer, count);
  21.716 -    if ( len == 0 ) return -EBADMSG;
  21.717 -    if ( len == 1 ) return 1; /* input starts with a NUL char */
  21.718 -    if ( strncpy_from_user(memstring, buffer, len) < 0 )
  21.719 -        return -EFAULT;
  21.720 +        return -EFBIG;   /* too long */
  21.721  
  21.722 -    endchar = memstring;
  21.723 -    for ( i = 0; i < len; ++i, ++endchar )
  21.724 -        if ( (memstring[i] < '0') || (memstring[i] > '9') )
  21.725 -            break;
  21.726 -    if ( i == 0 )
  21.727 -        return -EBADMSG;
  21.728 +    if ( copy_from_user(memstring, buffer, count) )
  21.729 +        return -EFAULT;
  21.730 +    memstring[sizeof(memstring)-1] = '\0';
  21.731  
  21.732 -    targetbytes = memparse(memstring,&endchar);
  21.733 -    target = targetbytes >> PAGE_SHIFT;
  21.734 +    target_bytes = memparse(memstring, &endchar);
  21.735 +    set_new_target(target_bytes >> PAGE_SHIFT);
  21.736  
  21.737 -    i = balloon_try_target(target);
  21.738 -
  21.739 -    if ( i <= 0 ) return i;
  21.740 -
  21.741 -    return len;
  21.742 +    return count;
  21.743  }
  21.744  
  21.745 -#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
  21.746 -static int balloon_write(struct file *file, const char *buffer,
  21.747 -                         size_t count, loff_t *offp)
  21.748 +static int balloon_read(char *page, char **start, off_t off,
  21.749 +                        int count, int *eof, void *data)
  21.750  {
  21.751 -    int len = do_balloon_write(buffer, count);
  21.752 -    
  21.753 -    if ( len <= 0 ) return len;
  21.754 +    int len;
  21.755 +
  21.756 +#define K(_p) ((_p)<<(PAGE_SHIFT-10))
  21.757 +    len = sprintf(
  21.758 +        page,
  21.759 +        "Current allocation: %8lu kB\n"
  21.760 +        "Requested target:   %8lu kB\n"
  21.761 +        "Low-mem balloon:    %8lu kB\n"
  21.762 +        "High-mem balloon:   %8lu kB\n"
  21.763 +        "Xen hard limit:     ",
  21.764 +        K(current_pages), K(target_pages), K(balloon_low), K(balloon_high));
  21.765  
  21.766 -    *offp += len;
  21.767 +    if ( hard_limit != ~0UL )
  21.768 +        len += sprintf(
  21.769 +            page + len, 
  21.770 +            "%8lu kB (inc. %8lu kB driver headroom)\n",
  21.771 +            K(hard_limit), K(driver_pages));
  21.772 +    else
  21.773 +        len += sprintf(
  21.774 +            page + len,
  21.775 +            "     ??? kB\n");
  21.776 +
  21.777 +    *eof = 1;
  21.778      return len;
  21.779  }
  21.780  
  21.781 -static int balloon_read(struct file *filp, char *buffer,
  21.782 -                        size_t count, loff_t *offp)
  21.783 -{
  21.784 -    static char priv_buf[32];
  21.785 -    char *priv_bufp = priv_buf;
  21.786 -    int len;
  21.787 -    len = sprintf(priv_buf,"%lu\n",current_pages<<PAGE_SHIFT);
  21.788 -
  21.789 -    len -= *offp;
  21.790 -    priv_bufp += *offp;
  21.791 -    if (len>count) len = count;
  21.792 -    if (len<0) len = 0;
  21.793 -
  21.794 -    if ( copy_to_user(buffer, priv_bufp, len) != 0 )
  21.795 -        return -EFAULT;
  21.796 -
  21.797 -    *offp += len;
  21.798 -    return len;
  21.799 -}
  21.800 -
  21.801 -static struct file_operations balloon_fops = {
  21.802 -    .read  = balloon_read,
  21.803 -    .write = balloon_write
  21.804 -};
  21.805 -
  21.806 -#else
  21.807 -
  21.808 -static int balloon_write(struct file *file, const char *buffer,
  21.809 -                         u_long count, void *data)
  21.810 -{
  21.811 -    return do_balloon_write(buffer, count);
  21.812 -}
  21.813 -
  21.814 -static int balloon_read(char *page, char **start, off_t off,
  21.815 -			int count, int *eof, void *data)
  21.816 -{
  21.817 -  int len;
  21.818 -  len = sprintf(page,"%lu\n",current_pages<<PAGE_SHIFT);
  21.819 -  
  21.820 -  if (len <= off+count) *eof = 1;
  21.821 -  *start = page + off;
  21.822 -  len -= off;
  21.823 -  if (len>count) len = count;
  21.824 -  if (len<0) len = 0;
  21.825 -  return len;
  21.826 -}
  21.827 -
  21.828 -#endif
  21.829 -
  21.830  static int __init balloon_init(void)
  21.831  {
  21.832 -    printk(KERN_ALERT "Starting Xen Balloon driver\n");
  21.833 +    unsigned long pfn;
  21.834 +    struct page *page;
  21.835 +
  21.836 +    IPRINTK("Initialising balloon driver.\n");
  21.837  
  21.838 -    most_seen_pages = current_pages = min(xen_start_info.nr_pages,max_pfn);
  21.839 -    if ( (balloon_pde = create_xen_proc_entry("memory_target", 0644)) == NULL )
  21.840 +    current_pages = min(xen_start_info.nr_pages, max_pfn);
  21.841 +    target_pages  = current_pages;
  21.842 +    balloon_low   = 0;
  21.843 +    balloon_high  = 0;
  21.844 +    driver_pages  = 0UL;
  21.845 +    hard_limit    = ~0UL;
  21.846 +
  21.847 +    init_timer(&balloon_timer);
  21.848 +    balloon_timer.data = 0;
  21.849 +    balloon_timer.function = balloon_alarm;
  21.850 +    
  21.851 +    if ( (balloon_pde = create_xen_proc_entry("balloon", 0644)) == NULL )
  21.852      {
  21.853 -        printk(KERN_ALERT "Unable to create balloon driver proc entry!");
  21.854 +        WPRINTK("Unable to create /proc/xen/balloon.\n");
  21.855          return -1;
  21.856      }
  21.857  
  21.858 -#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
  21.859 -    balloon_pde->owner     = THIS_MODULE;
  21.860 -    balloon_pde->nlink     = 1;
  21.861 -    balloon_pde->proc_fops = &balloon_fops;
  21.862 -#else
  21.863 +    balloon_pde->read_proc  = balloon_read;
  21.864      balloon_pde->write_proc = balloon_write;
  21.865 -    balloon_pde->read_proc  = balloon_read;
  21.866 -#endif
  21.867  
  21.868 -    (void)ctrl_if_register_receiver(CMSG_MEM_REQUEST, balloon_ctrlif_rx,
  21.869 -                                    CALLBACK_IN_BLOCKING_CONTEXT);
  21.870 +    (void)ctrl_if_register_receiver(CMSG_MEM_REQUEST, balloon_ctrlif_rx, 0);
  21.871  
  21.872 -    /* 
  21.873 -     * make_module a new phys map if mem= says xen can give us memory  to grow
  21.874 -     */
  21.875 -    if ( max_pfn > xen_start_info.nr_pages )
  21.876 +    /* Initialise the balloon with excess memory space. */
  21.877 +    for ( pfn = xen_start_info.nr_pages; pfn < max_pfn; pfn++ )
  21.878      {
  21.879 -        extern unsigned long *phys_to_machine_mapping;
  21.880 -        unsigned long *newmap;
  21.881 -        newmap = (unsigned long *)vmalloc(max_pfn * sizeof(unsigned long));
  21.882 -        memset(newmap, ~0, max_pfn * sizeof(unsigned long));
  21.883 -        memcpy(newmap, phys_to_machine_mapping,
  21.884 -               xen_start_info.nr_pages * sizeof(unsigned long));
  21.885 -        phys_to_machine_mapping = newmap;
  21.886 +        page = &mem_map[pfn];
  21.887 +        if ( !PageReserved(page) )
  21.888 +            balloon_append(page);
  21.889      }
  21.890  
  21.891      return 0;
  21.892  }
  21.893  
  21.894 -static void __exit balloon_cleanup(void)
  21.895 +subsys_initcall(balloon_init);
  21.896 +
  21.897 +void balloon_update_driver_allowance(long delta)
  21.898  {
  21.899 -    if ( balloon_pde != NULL )
  21.900 -    {
  21.901 -        remove_xen_proc_entry("memory_target");
  21.902 -        balloon_pde = NULL;
  21.903 -    }
  21.904 +    unsigned long flags;
  21.905 +    balloon_lock(flags);
  21.906 +    driver_pages += delta; /* non-atomic update */
  21.907 +    balloon_unlock(flags);
  21.908  }
  21.909  
  21.910 -module_init(balloon_init);
  21.911 -module_exit(balloon_cleanup);
  21.912 +void balloon_put_pages(unsigned long *mfn_list, unsigned long nr_mfns)
  21.913 +{
  21.914 +    unsigned long flags;
  21.915 +
  21.916 +    balloon_lock(flags);
  21.917 +    if ( HYPERVISOR_dom_mem_op(MEMOP_decrease_reservation, 
  21.918 +                               mfn_list, nr_mfns, 0) != nr_mfns )
  21.919 +        BUG();
  21.920 +    current_pages -= nr_mfns; /* non-atomic update */
  21.921 +    balloon_unlock(flags);
  21.922 +
  21.923 +    schedule_work(&balloon_worker);
  21.924 +}
  21.925 +
  21.926 +EXPORT_SYMBOL(balloon_update_driver_allowance);
  21.927 +EXPORT_SYMBOL(balloon_put_pages);
    22.1 --- a/linux-2.6.10-rc2-xen-sparse/drivers/xen/netback/interface.c	Mon Dec 06 20:03:12 2004 +0000
    22.2 +++ b/linux-2.6.10-rc2-xen-sparse/drivers/xen/netback/interface.c	Mon Dec 06 20:37:17 2004 +0000
    22.3 @@ -35,8 +35,8 @@ static void __netif_disconnect_complete(
    22.4  
    22.5      /*
    22.6       * These can't be done in netif_disconnect() because at that point there
    22.7 -     * may be outstanding requests at the disc whose asynchronous responses
    22.8 -     * must still be notified to the remote driver.
    22.9 +     * may be outstanding requests in the network stack whose asynchronous
   22.10 +     * responses must still be notified to the remote driver.
   22.11       */
   22.12      unbind_evtchn_from_irq(netif->evtchn);
   22.13      vfree(netif->tx); /* Frees netif->rx as well. */
   22.14 @@ -84,7 +84,7 @@ void netif_create(netif_be_create_t *cre
   22.15      unsigned int       handle = create->netif_handle;
   22.16      struct net_device *dev;
   22.17      netif_t          **pnetif, *netif;
   22.18 -    char               name[IFNAMSIZ] = {};
   22.19 +    char               name[IFNAMSIZ];
   22.20  
   22.21      snprintf(name, IFNAMSIZ - 1, "vif%u.%u", domid, handle);
   22.22      dev = alloc_netdev(sizeof(netif_t), name, ether_setup);
   22.23 @@ -116,7 +116,7 @@ void netif_create(netif_be_create_t *cre
   22.24          {
   22.25              DPRINTK("Could not create netif: already exists\n");
   22.26              create->status = NETIF_BE_STATUS_INTERFACE_EXISTS;
   22.27 -            kfree(dev);
   22.28 +            free_netdev(dev);
   22.29              return;
   22.30          }
   22.31          pnetif = &(*pnetif)->hash_next;
   22.32 @@ -137,7 +137,7 @@ void netif_create(netif_be_create_t *cre
   22.33          DPRINTK("Could not register new net device %s: err=%d\n",
   22.34                  dev->name, err);
   22.35          create->status = NETIF_BE_STATUS_OUT_OF_MEMORY;
   22.36 -        kfree(dev);
   22.37 +        free_netdev(dev);
   22.38          return;
   22.39      }
   22.40  
   22.41 @@ -176,7 +176,7 @@ void netif_destroy(netif_be_destroy_t *d
   22.42   destroy:
   22.43      *pnetif = netif->hash_next;
   22.44      unregister_netdev(netif->dev);
   22.45 -    kfree(netif->dev);
   22.46 +    free_netdev(netif->dev);
   22.47      destroy->status = NETIF_BE_STATUS_OKAY;
   22.48  }
   22.49  
    23.1 --- a/linux-2.6.10-rc2-xen-sparse/drivers/xen/netback/netback.c	Mon Dec 06 20:03:12 2004 +0000
    23.2 +++ b/linux-2.6.10-rc2-xen-sparse/drivers/xen/netback/netback.c	Mon Dec 06 20:37:17 2004 +0000
    23.3 @@ -11,6 +11,7 @@
    23.4   */
    23.5  
    23.6  #include "common.h"
    23.7 +#include <asm-xen/balloon.h>
    23.8  
    23.9  static void netif_page_release(struct page *page);
   23.10  static void netif_skb_release(struct sk_buff *skb);
   23.11 @@ -29,6 +30,8 @@ static DECLARE_TASKLET(net_tx_tasklet, n
   23.12  static void net_rx_action(unsigned long unused);
   23.13  static DECLARE_TASKLET(net_rx_tasklet, net_rx_action, 0);
   23.14  
   23.15 +static struct timer_list net_timer;
   23.16 +
   23.17  static struct sk_buff_head rx_queue;
   23.18  static multicall_entry_t rx_mcl[NETIF_RX_RING_SIZE*2];
   23.19  static mmu_update_t rx_mmu[NETIF_RX_RING_SIZE*3];
   23.20 @@ -69,27 +72,20 @@ static unsigned long mfn_list[MAX_MFN_AL
   23.21  static unsigned int alloc_index = 0;
   23.22  static spinlock_t mfn_lock = SPIN_LOCK_UNLOCKED;
   23.23  
   23.24 -static void __refresh_mfn_list(void)
   23.25 +static unsigned long alloc_mfn(void)
   23.26  {
   23.27 -    int ret = HYPERVISOR_dom_mem_op(MEMOP_increase_reservation,
   23.28 -                                    mfn_list, MAX_MFN_ALLOC, 0);
   23.29 -    if ( unlikely(ret != MAX_MFN_ALLOC) )
   23.30 -        BUG();
   23.31 -    alloc_index = MAX_MFN_ALLOC;
   23.32 -}
   23.33 -
   23.34 -static unsigned long get_new_mfn(void)
   23.35 -{
   23.36 -    unsigned long mfn, flags;
   23.37 +    unsigned long mfn = 0, flags;
   23.38      spin_lock_irqsave(&mfn_lock, flags);
   23.39 -    if ( alloc_index == 0 )
   23.40 -        __refresh_mfn_list();
   23.41 -    mfn = mfn_list[--alloc_index];
   23.42 +    if ( unlikely(alloc_index == 0) )
   23.43 +        alloc_index = HYPERVISOR_dom_mem_op(
   23.44 +            MEMOP_increase_reservation, mfn_list, MAX_MFN_ALLOC, 0);
   23.45 +    if ( alloc_index != 0 )
   23.46 +        mfn = mfn_list[--alloc_index];
   23.47      spin_unlock_irqrestore(&mfn_lock, flags);
   23.48      return mfn;
   23.49  }
   23.50  
   23.51 -static void dealloc_mfn(unsigned long mfn)
   23.52 +static void free_mfn(unsigned long mfn)
   23.53  {
   23.54      unsigned long flags;
   23.55      spin_lock_irqsave(&mfn_lock, flags);
   23.56 @@ -210,8 +206,16 @@ static void net_rx_action(unsigned long 
   23.57          netif   = (netif_t *)skb->dev->priv;
   23.58          vdata   = (unsigned long)skb->data;
   23.59          mdata   = virt_to_machine(vdata);
   23.60 -        new_mfn = get_new_mfn();
   23.61 -        
   23.62 +
   23.63 +        /* Memory squeeze? Back off for an arbitrary while. */
   23.64 +        if ( (new_mfn = alloc_mfn()) == 0 )
   23.65 +        {
   23.66 +            if ( net_ratelimit() )
   23.67 +                printk(KERN_WARNING "Memory squeeze in netback driver.\n");
   23.68 +            mod_timer(&net_timer, jiffies + HZ);
   23.69 +            break;
   23.70 +        }
   23.71 +
   23.72          /*
   23.73           * Set the new P2M table entry before reassigning the old data page.
   23.74           * Heed the comment in pgtable-2level.h:pte_page(). :-)
   23.75 @@ -280,7 +284,7 @@ static void net_rx_action(unsigned long 
   23.76          if ( unlikely(mcl[1].args[5] != 0) )
   23.77          {
   23.78              DPRINTK("Failed MMU update transferring to DOM%u\n", netif->domid);
   23.79 -            dealloc_mfn(mdata >> PAGE_SHIFT);
   23.80 +            free_mfn(mdata >> PAGE_SHIFT);
   23.81              status = NETIF_RSP_ERROR;
   23.82          }
   23.83  
   23.84 @@ -307,7 +311,7 @@ static void net_rx_action(unsigned long 
   23.85      }
   23.86  
   23.87      /* More work to do? */
   23.88 -    if ( !skb_queue_empty(&rx_queue) )
   23.89 +    if ( !skb_queue_empty(&rx_queue) && !timer_pending(&net_timer) )
   23.90          tasklet_schedule(&net_rx_tasklet);
   23.91  #if 0
   23.92      else
   23.93 @@ -315,6 +319,11 @@ static void net_rx_action(unsigned long 
   23.94  #endif
   23.95  }
   23.96  
   23.97 +static void net_alarm(unsigned long unused)
   23.98 +{
   23.99 +    tasklet_schedule(&net_rx_tasklet);
  23.100 +}
  23.101 +
  23.102  struct net_device_stats *netif_be_get_stats(struct net_device *dev)
  23.103  {
  23.104      netif_t *netif = dev->priv;
  23.105 @@ -781,9 +790,16 @@ static int __init netback_init(void)
  23.106  
  23.107      printk("Initialising Xen netif backend\n");
  23.108  
  23.109 +    /* We can increase reservation by this much in net_rx_action(). */
  23.110 +    balloon_update_driver_allowance(NETIF_RX_RING_SIZE);
  23.111 +
  23.112      skb_queue_head_init(&rx_queue);
  23.113      skb_queue_head_init(&tx_queue);
  23.114  
  23.115 +    init_timer(&net_timer);
  23.116 +    net_timer.data = 0;
  23.117 +    net_timer.function = net_alarm;
  23.118 +    
  23.119      netif_interface_init();
  23.120  
  23.121      if ( (mmap_vstart = allocate_empty_lowmem_region(MAX_PENDING_REQS)) == 0 )
    24.1 --- a/linux-2.6.10-rc2-xen-sparse/drivers/xen/netfront/netfront.c	Mon Dec 06 20:03:12 2004 +0000
    24.2 +++ b/linux-2.6.10-rc2-xen-sparse/drivers/xen/netfront/netfront.c	Mon Dec 06 20:37:17 2004 +0000
    24.3 @@ -45,6 +45,7 @@
    24.4  #include <asm-xen/evtchn.h>
    24.5  #include <asm-xen/ctrl_if.h>
    24.6  #include <asm-xen/xen-public/io/netif.h>
    24.7 +#include <asm-xen/balloon.h>
    24.8  #include <asm/page.h>
    24.9  
   24.10  #include <net/arp.h>
   24.11 @@ -409,6 +410,9 @@ static void network_alloc_rx_buffers(str
   24.12      rx_mcl[i].args[3] = 0;
   24.13      rx_mcl[i].args[4] = DOMID_SELF;
   24.14  
   24.15 +    /* Tell the ballon driver what is going on. */
   24.16 +    balloon_update_driver_allowance(i);
   24.17 +
   24.18      /* Zap PTEs and give away pages in one big multicall. */
   24.19      (void)HYPERVISOR_multicall(rx_mcl, i+1);
   24.20  
   24.21 @@ -557,14 +561,15 @@ static int netif_poll(struct net_device 
   24.22          /*
   24.23           * An error here is very odd. Usually indicates a backend bug,
   24.24           * low-memory condition, or that we didn't have reservation headroom.
   24.25 -         * Whatever - print an error and queue the id again straight away.
   24.26           */
   24.27          if ( unlikely(rx->status <= 0) )
   24.28          {
   24.29 -	    printk(KERN_ALERT "bad buffer on RX ring!(%d)\n", rx->status);
   24.30 +            if ( net_ratelimit() )
   24.31 +                printk(KERN_WARNING "Bad rx buffer (memory squeeze?).\n");
   24.32              np->rx->ring[MASK_NETIF_RX_IDX(np->rx->req_prod)].req.id = rx->id;
   24.33              wmb();
   24.34              np->rx->req_prod++;
   24.35 +            work_done--;
   24.36              continue;
   24.37          }
   24.38  
   24.39 @@ -595,6 +600,9 @@ static int netif_poll(struct net_device 
   24.40          __skb_queue_tail(&rxq, skb);
   24.41      }
   24.42  
   24.43 +    /* Some pages are no longer absent... */
   24.44 +    balloon_update_driver_allowance(-work_done);
   24.45 +
   24.46      /* Do all the remapping work, and M->P updates, in one big hypercall. */
   24.47      if ( likely((mcl - rx_mcl) != 0) )
   24.48      {
    25.1 --- a/linux-2.6.10-rc2-xen-sparse/drivers/xen/privcmd/privcmd.c	Mon Dec 06 20:03:12 2004 +0000
    25.2 +++ b/linux-2.6.10-rc2-xen-sparse/drivers/xen/privcmd/privcmd.c	Mon Dec 06 20:37:17 2004 +0000
    25.3 @@ -7,7 +7,6 @@
    25.4   */
    25.5  
    25.6  #include <linux/config.h>
    25.7 -#include <linux/module.h>
    25.8  #include <linux/kernel.h>
    25.9  #include <linux/sched.h>
   25.10  #include <linux/slab.h>
   25.11 @@ -213,23 +212,9 @@ static int __init privcmd_init(void)
   25.12  
   25.13      privcmd_intf = create_xen_proc_entry("privcmd", 0400);
   25.14      if ( privcmd_intf != NULL )
   25.15 -    {
   25.16 -        privcmd_intf->owner      = THIS_MODULE;
   25.17 -        privcmd_intf->nlink      = 1;
   25.18 -        privcmd_intf->proc_fops  = &privcmd_file_ops;
   25.19 -    }
   25.20 +        privcmd_intf->proc_fops = &privcmd_file_ops;
   25.21  
   25.22      return 0;
   25.23  }
   25.24  
   25.25 -
   25.26 -static void __exit privcmd_cleanup(void)
   25.27 -{
   25.28 -    if ( privcmd_intf == NULL ) return;
   25.29 -    remove_xen_proc_entry("privcmd");
   25.30 -    privcmd_intf = NULL;
   25.31 -}
   25.32 -
   25.33 -
   25.34 -module_init(privcmd_init);
   25.35 -module_exit(privcmd_cleanup);
   25.36 +__initcall(privcmd_init);
    26.1 --- a/linux-2.6.10-rc2-xen-sparse/include/asm-xen/asm-i386/io.h	Mon Dec 06 20:03:12 2004 +0000
    26.2 +++ b/linux-2.6.10-rc2-xen-sparse/include/asm-xen/asm-i386/io.h	Mon Dec 06 20:37:17 2004 +0000
    26.3 @@ -449,4 +449,7 @@ BUILDIO(b,b,char)
    26.4  BUILDIO(w,w,short)
    26.5  BUILDIO(l,,int)
    26.6  
    26.7 +/* We will be supplying our own /dev/mem implementation */
    26.8 +#define ARCH_HAS_DEV_MEM
    26.9 +
   26.10  #endif
    28.1 --- a/linux-2.6.10-rc2-xen-sparse/include/asm-xen/asm-i386/pgtable.h	Mon Dec 06 20:03:12 2004 +0000
    28.2 +++ b/linux-2.6.10-rc2-xen-sparse/include/asm-xen/asm-i386/pgtable.h	Mon Dec 06 20:37:17 2004 +0000
    28.3 @@ -424,14 +424,13 @@ extern pte_t *lookup_address(unsigned lo
    28.4  #define ptep_set_access_flags(__vma, __address, __ptep, __entry, __dirty) \
    28.5  	do {								  \
    28.6  		if (__dirty) {						  \
    28.7 -			if (likely(vma->vm_mm == current->mm)) {	  \
    28.8 -			    xen_flush_page_update_queue();		  \
    28.9 -			    HYPERVISOR_update_va_mapping(address>>PAGE_SHIFT, \
   28.10 -							 entry, UVMF_INVLPG); \
   28.11 -			} else {					  \
   28.12 -			    xen_l1_entry_update((__ptep), (__entry).pte_low); \
   28.13 -			    flush_tlb_page(__vma, __address);	          \
   28.14 -			}						  \
   28.15 +		        if ( likely((__vma)->vm_mm == current->mm) ) {    \
   28.16 +			    xen_flush_page_update_queue();                \
   28.17 +			    HYPERVISOR_update_va_mapping((__address)>>PAGE_SHIFT, (__entry), UVMF_INVLPG); \
   28.18 +			} else {                                          \
   28.19 +                            xen_l1_entry_update((__ptep), (__entry).pte_low); \
   28.20 +			    flush_tlb_page((__vma), (__address));         \
   28.21 +			}                                                 \
   28.22  		}							  \
   28.23  	} while (0)
   28.24  
   28.25 @@ -461,14 +460,14 @@ void make_page_writable(void *va);
   28.26  void make_pages_readonly(void *va, unsigned int nr);
   28.27  void make_pages_writable(void *va, unsigned int nr);
   28.28  
   28.29 -static inline unsigned long arbitrary_virt_to_machine(void *va)
   28.30 -{
   28.31 -	pgd_t *pgd = pgd_offset_k((unsigned long)va);
   28.32 -	pmd_t *pmd = pmd_offset(pgd, (unsigned long)va);
   28.33 -	pte_t *pte = pte_offset_kernel(pmd, (unsigned long)va);
   28.34 -	unsigned long pa = (*(unsigned long *)pte) & PAGE_MASK;
   28.35 -	return pa | ((unsigned long)va & (PAGE_SIZE-1));
   28.36 -}
   28.37 +#define arbitrary_virt_to_machine(__va)					\
   28.38 +({									\
   28.39 +	pgd_t *__pgd = pgd_offset_k((unsigned long)(__va));		\
   28.40 +	pmd_t *__pmd = pmd_offset(__pgd, (unsigned long)(__va));	\
   28.41 +	pte_t *__pte = pte_offset_kernel(__pmd, (unsigned long)(__va));	\
   28.42 +	unsigned long __pa = (*(unsigned long *)__pte) & PAGE_MASK;	\
   28.43 +	__pa | ((unsigned long)(__va) & (PAGE_SIZE-1));			\
   28.44 +})
   28.45  
   28.46  #endif /* !__ASSEMBLY__ */
   28.47  
   28.48 @@ -476,6 +475,17 @@ static inline unsigned long arbitrary_vi
   28.49  #define kern_addr_valid(addr)	(1)
   28.50  #endif /* !CONFIG_DISCONTIGMEM */
   28.51  
   28.52 +int direct_remap_area_pages(struct mm_struct *mm,
   28.53 +                            unsigned long address, 
   28.54 +                            unsigned long machine_addr,
   28.55 +                            unsigned long size, 
   28.56 +                            pgprot_t prot,
   28.57 +                            domid_t  domid);
   28.58 +int __direct_remap_area_pages(struct mm_struct *mm,
   28.59 +			      unsigned long address, 
   28.60 +			      unsigned long size, 
   28.61 +			      mmu_update_t *v);
   28.62 +
   28.63  #define io_remap_page_range(vma,from,phys,size,prot) \
   28.64  	direct_remap_area_pages(vma->vm_mm,from,phys,size,prot,DOMID_IO)
   28.65  
    29.1 --- a/linux-2.6.10-rc2-xen-sparse/include/asm-xen/hypervisor.h	Mon Dec 06 20:03:12 2004 +0000
    29.2 +++ b/linux-2.6.10-rc2-xen-sparse/include/asm-xen/hypervisor.h	Mon Dec 06 20:37:17 2004 +0000
    29.3 @@ -54,13 +54,6 @@ void xen_cpu_idle (void);
    29.4  /* arch/xen/i386/kernel/hypervisor.c */
    29.5  void do_hypervisor_callback(struct pt_regs *regs);
    29.6  
    29.7 -/* arch/xen/i386/mm/init.c */
    29.8 -/* NOTE: caller must call flush_page_update_queue() */
    29.9 -#define PROT_ON  1
   29.10 -#define PROT_OFF 0
   29.11 -void /* __init */ protect_page(pgd_t *dpgd, void *page, int mode);
   29.12 -void /* __init */ protect_pagetable(pgd_t *dpgd, pgd_t *spgd, int mode);
   29.13 -
   29.14  /* arch/xen/i386/kernel/head.S */
   29.15  void lgdt_finish(void);
   29.16  
   29.17 @@ -109,8 +102,6 @@ void MULTICALL_flush_page_update_queue(v
   29.18  #ifdef CONFIG_XEN_PHYSDEV_ACCESS
   29.19  /* Allocate a contiguous empty region of low memory. Return virtual start. */
   29.20  unsigned long allocate_empty_lowmem_region(unsigned long pages);
   29.21 -/* Deallocate a contiguous region of low memory. Return it to the allocator. */
   29.22 -void deallocate_lowmem_region(unsigned long vstart, unsigned long pages);
   29.23  #endif
   29.24  
   29.25  /*
    30.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    30.2 +++ b/linux-2.6.9-xen-sparse/arch/xen/kernel/devmem.c	Mon Dec 06 20:37:17 2004 +0000
    30.3 @@ -0,0 +1,158 @@
    30.4 +/*
    30.5 + *  Originally from linux/drivers/char/mem.c
    30.6 + *
    30.7 + *  Copyright (C) 1991, 1992  Linus Torvalds
    30.8 + *
    30.9 + *  Added devfs support. 
   30.10 + *    Jan-11-1998, C. Scott Ananian <cananian@alumni.princeton.edu>
   30.11 + *  Shared /dev/zero mmaping support, Feb 2000, Kanoj Sarcar <kanoj@sgi.com>
   30.12 + */
   30.13 +
   30.14 +#include <linux/config.h>
   30.15 +#include <linux/mm.h>
   30.16 +#include <linux/miscdevice.h>
   30.17 +#include <linux/slab.h>
   30.18 +#include <linux/vmalloc.h>
   30.19 +#include <linux/mman.h>
   30.20 +#include <linux/random.h>
   30.21 +#include <linux/init.h>
   30.22 +#include <linux/raw.h>
   30.23 +#include <linux/tty.h>
   30.24 +#include <linux/capability.h>
   30.25 +#include <linux/smp_lock.h>
   30.26 +#include <linux/devfs_fs_kernel.h>
   30.27 +#include <linux/ptrace.h>
   30.28 +#include <linux/device.h>
   30.29 +#include <asm/pgalloc.h>
   30.30 +#include <asm/uaccess.h>
   30.31 +#include <asm/io.h>
   30.32 +
   30.33 +static inline int uncached_access(struct file *file, unsigned long addr)
   30.34 +{
   30.35 +        if (file->f_flags & O_SYNC)
   30.36 +                return 1;
   30.37 +        /* Xen sets correct MTRR type on non-RAM for us. */
   30.38 +        return 0;
   30.39 +}
   30.40 +
   30.41 +/*
   30.42 + * This funcion reads the *physical* memory. The f_pos points directly to the 
   30.43 + * memory location. 
   30.44 + */
   30.45 +static ssize_t read_mem(struct file * file, char __user * buf,
   30.46 +			size_t count, loff_t *ppos)
   30.47 +{
   30.48 +	unsigned long i, p = *ppos;
   30.49 +	ssize_t read = 0;
   30.50 +	void *v;
   30.51 +
   30.52 +	if ((v = ioremap(p, count)) == NULL) {
   30.53 +		/*
   30.54 +		 * Some programs (e.g., dmidecode) groove off into weird RAM
   30.55 +		 * areas where no table scan possibly exist (because Xen will
   30.56 +		 * have stomped on them!). These programs get rather upset if
   30.57 +                 * we let them know that Xen failed their access, so we fake
   30.58 +                 * out a read of all zeroes. :-)
   30.59 +		 */
   30.60 +		for (i = 0; i < count; i++)
   30.61 +			if (put_user(0, buf+i))
   30.62 +				return -EFAULT;
   30.63 +		return count;
   30.64 +	}
   30.65 +	if (copy_to_user(buf, v, count))
   30.66 +		return -EFAULT;
   30.67 +	iounmap(v);
   30.68 +
   30.69 +	read += count;
   30.70 +	*ppos += read;
   30.71 +
   30.72 +	return read;
   30.73 +}
   30.74 +
   30.75 +static ssize_t write_mem(struct file * file, const char __user * buf, 
   30.76 +			 size_t count, loff_t *ppos)
   30.77 +{
   30.78 +	unsigned long p = *ppos;
   30.79 +	ssize_t written = 0;
   30.80 +	void *v;
   30.81 +
   30.82 +	if ((v = ioremap(p, count)) == NULL)
   30.83 +		return -EFAULT;
   30.84 +	if (copy_to_user(v, buf, count))
   30.85 +		return -EFAULT;
   30.86 +	iounmap(v);
   30.87 +
   30.88 +	written += count;
   30.89 +	*ppos += written;
   30.90 +
   30.91 +	return written;
   30.92 +}
   30.93 +
   30.94 +static int mmap_mem(struct file * file, struct vm_area_struct * vma)
   30.95 +{
   30.96 +	unsigned long offset = vma->vm_pgoff << PAGE_SHIFT;
   30.97 +	int uncached;
   30.98 +
   30.99 +	uncached = uncached_access(file, offset);
  30.100 +	if (uncached)
  30.101 +		vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
  30.102 +
  30.103 +	/* Don't try to swap out physical pages.. */
  30.104 +	vma->vm_flags |= VM_RESERVED;
  30.105 +
  30.106 +	/*
  30.107 +	 * Don't dump addresses that are not real memory to a core file.
  30.108 +	 */
  30.109 +	if (uncached)
  30.110 +		vma->vm_flags |= VM_IO;
  30.111 +
  30.112 +	if (io_remap_page_range(vma, vma->vm_start, offset, 
  30.113 +				vma->vm_end-vma->vm_start, vma->vm_page_prot))
  30.114 +		return -EAGAIN;
  30.115 +
  30.116 +	return 0;
  30.117 +}
  30.118 +
  30.119 +/*
  30.120 + * The memory devices use the full 32/64 bits of the offset, and so we cannot
  30.121 + * check against negative addresses: they are ok. The return value is weird,
  30.122 + * though, in that case (0).
  30.123 + *
  30.124 + * also note that seeking relative to the "end of file" isn't supported:
  30.125 + * it has no meaning, so it returns -EINVAL.
  30.126 + */
  30.127 +static loff_t memory_lseek(struct file * file, loff_t offset, int orig)
  30.128 +{
  30.129 +	loff_t ret;
  30.130 +
  30.131 +	down(&file->f_dentry->d_inode->i_sem);
  30.132 +	switch (orig) {
  30.133 +		case 0:
  30.134 +			file->f_pos = offset;
  30.135 +			ret = file->f_pos;
  30.136 +			force_successful_syscall_return();
  30.137 +			break;
  30.138 +		case 1:
  30.139 +			file->f_pos += offset;
  30.140 +			ret = file->f_pos;
  30.141 +			force_successful_syscall_return();
  30.142 +			break;
  30.143 +		default:
  30.144 +			ret = -EINVAL;
  30.145 +	}
  30.146 +	up(&file->f_dentry->d_inode->i_sem);
  30.147 +	return ret;
  30.148 +}
  30.149 +
  30.150 +static int open_mem(struct inode * inode, struct file * filp)
  30.151 +{
  30.152 +	return capable(CAP_SYS_RAWIO) ? 0 : -EPERM;
  30.153 +}
  30.154 +
  30.155 +struct file_operations mem_fops = {
  30.156 +	.llseek		= memory_lseek,
  30.157 +	.read		= read_mem,
  30.158 +	.write		= write_mem,
  30.159 +	.mmap		= mmap_mem,
  30.160 +	.open		= open_mem,
  30.161 +};
    31.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    31.2 +++ b/linux-2.6.9-xen-sparse/include/asm-xen/balloon.h	Mon Dec 06 20:37:17 2004 +0000
    31.3 @@ -0,0 +1,51 @@
    31.4 +/******************************************************************************
    31.5 + * balloon.h
    31.6 + *
    31.7 + * Xen balloon driver - enables returning/claiming memory to/from Xen.
    31.8 + *
    31.9 + * Copyright (c) 2003, B Dragovic
   31.10 + * Copyright (c) 2003-2004, M Williamson, K Fraser
   31.11 + * 
   31.12 + * This file may be distributed separately from the Linux kernel, or
   31.13 + * incorporated into other software packages, subject to the following license:
   31.14 + * 
   31.15 + * Permission is hereby granted, free of charge, to any person obtaining a copy
   31.16 + * of this source file (the "Software"), to deal in the Software without
   31.17 + * restriction, including without limitation the rights to use, copy, modify,
   31.18 + * merge, publish, distribute, sublicense, and/or sell copies of the Software,
   31.19 + * and to permit persons to whom the Software is furnished to do so, subject to
   31.20 + * the following conditions:
   31.21 + * 
   31.22 + * The above copyright notice and this permission notice shall be included in
   31.23 + * all copies or substantial portions of the Software.
   31.24 + * 
   31.25 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
   31.26 + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
   31.27 + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
   31.28 + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
   31.29 + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
   31.30 + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
   31.31 + * IN THE SOFTWARE.
   31.32 + */
   31.33 +
   31.34 +#ifndef __ASM_BALLOON_H__
   31.35 +#define __ASM_BALLOON_H__
   31.36 +
   31.37 +/*
   31.38 + * Inform the balloon driver that it should allow some slop for device-driver
   31.39 + * memory activities.
   31.40 + */
   31.41 +extern void balloon_update_driver_allowance(long delta);
   31.42 +
   31.43 +/* Give up unmapped pages to the balloon driver. */
   31.44 +extern void balloon_put_pages(unsigned long *mfn_list, unsigned long nr_mfns);
   31.45 +
   31.46 +/*
   31.47 + * Prevent the balloon driver from changing the memory reservation during
   31.48 + * a driver critical region.
   31.49 + */
   31.50 +extern spinlock_t balloon_lock;
   31.51 +#define balloon_lock(__flags)   spin_lock_irqsave(&balloon_lock, __flags)
   31.52 +#define balloon_unlock(__flags) spin_unlock_irqrestore(&balloon_lock, __flags)
   31.53 +
   31.54 +#endif /* __ASM_BALLOON_H__ */
    32.1 --- a/tools/libxc/Makefile	Mon Dec 06 20:03:12 2004 +0000
    32.2 +++ b/tools/libxc/Makefile	Mon Dec 06 20:37:17 2004 +0000
    32.3 @@ -1,5 +1,5 @@
    32.4  
    32.5 -MAJOR    = 1.3
    32.6 +MAJOR    = 2.0
    32.7  MINOR    = 0
    32.8  SONAME   = libxc.so.$(MAJOR)
    32.9  
    33.1 --- a/tools/libxc/xc.h	Mon Dec 06 20:03:12 2004 +0000
    33.2 +++ b/tools/libxc/xc.h	Mon Dec 06 20:37:17 2004 +0000
    33.3 @@ -178,14 +178,19 @@ int xc_domain_setinitialmem(int xc_handl
    33.4                              unsigned int initial_memkb);
    33.5  
    33.6  int xc_domain_setmaxmem(int xc_handle,
    33.7 -                            u32 domid, 
    33.8 -                            unsigned int max_memkb);
    33.9 +                        u32 domid, 
   33.10 +                        unsigned int max_memkb);
   33.11  
   33.12  int xc_domain_setvmassist(int xc_handle,
   33.13                            u32 domid, 
   33.14                            unsigned int cmd,
   33.15                            unsigned int type);
   33.16  
   33.17 +typedef dom0_perfc_desc_t xc_perfc_desc_t;
   33.18 +/* IMPORTANT: The caller is responsible for mlock()'ing the @desc array. */
   33.19 +int xc_perfc_control(int xc_handle,
   33.20 +                     u32 op,
   33.21 +                     xc_perfc_desc_t *desc);
   33.22  
   33.23  void *xc_map_foreign_range(int xc_handle, u32 dom,
   33.24                              int size, int prot,
    34.1 --- a/tools/libxc/xc_misc.c	Mon Dec 06 20:03:12 2004 +0000
    34.2 +++ b/tools/libxc/xc_misc.c	Mon Dec 06 20:37:17 2004 +0000
    34.3 @@ -74,10 +74,26 @@ int xc_sched_id(int xc_handle,
    34.4      op.cmd = DOM0_SCHED_ID;
    34.5      op.interface_version = DOM0_INTERFACE_VERSION;
    34.6      
    34.7 -    if((ret = do_dom0_op(xc_handle, &op))) return ret;
    34.8 +    if ( (ret = do_dom0_op(xc_handle, &op)) != 0 )
    34.9 +        return ret;
   34.10      
   34.11      *sched_id = op.u.sched_id.sched_id;
   34.12      
   34.13      return 0;
   34.14  }
   34.15  
   34.16 +int xc_perfc_control(int xc_handle,
   34.17 +                     u32 op,
   34.18 +                     xc_perfc_desc_t *desc)
   34.19 +{
   34.20 +    int rc;
   34.21 +    dom0_op_t dop;
   34.22 +
   34.23 +    dop.cmd = DOM0_PERFCCONTROL;
   34.24 +    dop.u.perfccontrol.op   = op;
   34.25 +    dop.u.perfccontrol.desc = desc;
   34.26 +
   34.27 +    rc = do_dom0_op(xc_handle, &dop);
   34.28 +
   34.29 +    return (rc == 0) ? dop.u.perfccontrol.nr_counters : rc;
   34.30 +}
    35.1 --- a/tools/libxutil/Makefile	Mon Dec 06 20:03:12 2004 +0000
    35.2 +++ b/tools/libxutil/Makefile	Mon Dec 06 20:37:17 2004 +0000
    35.3 @@ -30,7 +30,7 @@ CFLAGS   += -fno-strict-aliasing
    35.4  CFLAGS   += -Wp,-MD,.$(@F).d
    35.5  DEPS     = .*.d
    35.6  
    35.7 -MAJOR    := 1.3
    35.8 +MAJOR    := 2.0
    35.9  MINOR    := 0
   35.10  LIB_NAME := libxutil
   35.11  LIB      := $(LIB_NAME).so 
    36.1 --- a/tools/misc/Makefile	Mon Dec 06 20:03:12 2004 +0000
    36.2 +++ b/tools/misc/Makefile	Mon Dec 06 20:37:17 2004 +0000
    36.3 @@ -3,22 +3,18 @@ XEN_ROOT=../..
    36.4  include $(XEN_ROOT)/tools/Make.defs
    36.5  
    36.6  CC         = gcc
    36.7 -CFLAGS     = -Wall -O3 
    36.8 +CFLAGS     = -Wall -Werror -O3 
    36.9  
   36.10  INCLUDES += -I $(XEN_XC)
   36.11  INCLUDES += -I $(XEN_LIBXC)
   36.12 -INCLUDES += -I $(XEN_LIBXUTIL)
   36.13 -
   36.14 -CFLAGS += $(INCLUDES)
   36.15 +CFLAGS   += $(INCLUDES)
   36.16  
   36.17  HDRS     = $(wildcard *.h)
   36.18 -SRCS     = $(wildcard *.c)
   36.19 -OBJS     = $(patsubst %.c,%.o,$(SRCS))
   36.20  
   36.21 -TARGETS  = 
   36.22 +TARGETS  = xenperf
   36.23  
   36.24  INSTALL_BIN  = $(TARGETS) xencons
   36.25 -INSTALL_SBIN = netfix xm xend xensv
   36.26 +INSTALL_SBIN = netfix xm xend xensv xenperf
   36.27  
   36.28  all: $(TARGETS)
   36.29  	$(MAKE) -C miniterm
   36.30 @@ -32,7 +28,7 @@ install: all
   36.31  
   36.32  clean:
   36.33  	$(RM) *.o $(TARGETS) *~
   36.34 -	$(MAKE) -C miniterm clean	
   36.35 +	$(MAKE) -C miniterm clean
   36.36  
   36.37  %: %.c $(HDRS) Makefile
   36.38 -	$(CC) $(CFLAGS) -o $@ $<
   36.39 +	$(CC) $(CFLAGS) -o $@ $< -L$(XEN_LIBXC) -lxc -L$(XEN_LIBXUTIL) -lxutil
    37.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    37.2 +++ b/tools/misc/xenperf.c	Mon Dec 06 20:37:17 2004 +0000
    37.3 @@ -0,0 +1,104 @@
    37.4 +/* -*-  Mode:C; c-basic-offset:4; tab-width:4 -*-
    37.5 + ****************************************************************************
    37.6 + * (C) 2004 - Rolf Neugebauer - Intel Research Cambridge
    37.7 + ****************************************************************************
    37.8 + *
    37.9 + *        File: xenperf.c
   37.10 + *      Author: Rolf Neugebauer (rolf.neugebauer@intel.com)
   37.11 + *        Date: Nov 2004
   37.12 + * 
   37.13 + * Description: 
   37.14 + */
   37.15 +
   37.16 +
   37.17 +#include <xc.h>
   37.18 +#include <stdio.h>
   37.19 +#include <stdlib.h>
   37.20 +#include <sys/mman.h>
   37.21 +#include <errno.h>
   37.22 +#include <string.h>
   37.23 +
   37.24 +int main(int argc, char *argv[])
   37.25 +{
   37.26 +    int              i, j, xc_handle;
   37.27 +    xc_perfc_desc_t *pcd;
   37.28 +    unsigned int     num, sum, reset = 0;
   37.29 +
   37.30 +    if ( argc > 1 )
   37.31 +    {
   37.32 +        char *p = argv[1];
   37.33 +        if ( (*p++ == '-')  && (*p == 'r') )
   37.34 +            reset = 1;
   37.35 +        else
   37.36 +        {
   37.37 +            printf("%s: [-r]\n", argv[0]);
   37.38 +            printf("no args: print xen performance counters\n");
   37.39 +            printf("    -r : reset xen performance counters\n");
   37.40 +            return 0;
   37.41 +        }
   37.42 +    }   
   37.43 +
   37.44 +    if ( (xc_handle = xc_interface_open()) == -1 )
   37.45 +    {
   37.46 +        fprintf(stderr, "Error opening xc interface: %d (%s)\n",
   37.47 +                errno, strerror(errno));
   37.48 +        return 1;
   37.49 +    }
   37.50 +    
   37.51 +    if ( reset )
   37.52 +    {
   37.53 +        if ( xc_perfc_control(xc_handle, DOM0_PERFCCONTROL_OP_RESET,
   37.54 +                              NULL) < 0 )
   37.55 +        {
   37.56 +            fprintf(stderr, "Error reseting performance counters: %d (%s)\n",
   37.57 +                    errno, strerror(errno));
   37.58 +            return 1;
   37.59 +        }
   37.60 +
   37.61 +        return 0;
   37.62 +    }
   37.63 +
   37.64 +
   37.65 +    if ( (num = xc_perfc_control(xc_handle, DOM0_PERFCCONTROL_OP_QUERY,
   37.66 +                                 NULL)) < 0 )
   37.67 +    {
   37.68 +        fprintf(stderr, "Error getting number of perf counters: %d (%s)\n",
   37.69 +                errno, strerror(errno));
   37.70 +        return 1;
   37.71 +    }
   37.72 +
   37.73 +    pcd = malloc(sizeof(*pcd) * num);
   37.74 +
   37.75 +    if ( mlock(pcd, sizeof(*pcd) * num) != 0 )
   37.76 +    {
   37.77 +        fprintf(stderr, "Could not mlock descriptor buffer: %d (%s)\n",
   37.78 +                errno, strerror(errno));
   37.79 +        exit(-1);
   37.80 +    }
   37.81 +
   37.82 +    if ( xc_perfc_control(xc_handle, DOM0_PERFCCONTROL_OP_QUERY, pcd) <= 0 )
   37.83 +    {
   37.84 +        fprintf(stderr, "Error getting perf counter description: %d (%s)\n",
   37.85 +                errno, strerror(errno));
   37.86 +        return 1;
   37.87 +    }
   37.88 +
   37.89 +    munlock(pcd, sizeof(*pcd) * num);
   37.90 +
   37.91 +    for ( i = 0; i < num; i++ )
   37.92 +    {
   37.93 +        printf ("%-35s ", pcd[i].name);
   37.94 +        
   37.95 +        sum = 0;
   37.96 +        for ( j = 0; j < pcd[i].nr_vals; j++ )
   37.97 +            sum += pcd[i].vals[j];
   37.98 +        printf ("T=%10u ", (unsigned int)sum);
   37.99 +
  37.100 +        for ( j = 0; j < pcd[i].nr_vals; j++ )
  37.101 +            printf(" %10u", (unsigned int)pcd[i].vals[j]);
  37.102 +
  37.103 +        printf("\n");
  37.104 +    }
  37.105 +
  37.106 +    return 0;
  37.107 +}
    38.1 --- a/xen/arch/x86/memory.c	Mon Dec 06 20:03:12 2004 +0000
    38.2 +++ b/xen/arch/x86/memory.c	Mon Dec 06 20:37:17 2004 +0000
    38.3 @@ -1299,9 +1299,6 @@ int do_mmu_update(
    38.4      u32 type_info;
    38.5      domid_t domid;
    38.6  
    38.7 -    perfc_incrc(calls_to_mmu_update); 
    38.8 -    perfc_addc(num_page_updates, count);
    38.9 -
   38.10      cleanup_writable_pagetable(d, PTWR_CLEANUP_ACTIVE | PTWR_CLEANUP_INACTIVE);
   38.11  
   38.12      /*
   38.13 @@ -1331,6 +1328,9 @@ int do_mmu_update(
   38.14          }
   38.15      }
   38.16  
   38.17 +    perfc_incrc(calls_to_mmu_update); 
   38.18 +    perfc_addc(num_page_updates, count);
   38.19 +
   38.20      if ( unlikely(!array_access_ok(VERIFY_READ, ureqs, count, sizeof(req))) )
   38.21      {
   38.22          rc = -EFAULT;
    39.1 --- a/xen/arch/x86/x86_32/entry.S	Mon Dec 06 20:03:12 2004 +0000
    39.2 +++ b/xen/arch/x86/x86_32/entry.S	Mon Dec 06 20:37:17 2004 +0000
    39.3 @@ -341,6 +341,7 @@ process_guest_exception_and_events:
    39.4          leal DOMAIN_trap_bounce(%ebx),%edx
    39.5          testb $TBF_EXCEPTION,TRAPBOUNCE_flags(%edx)
    39.6          jz   test_all_events
    39.7 +        cli  # create_bounce_frame needs CLI for pre-exceptions to work
    39.8          call create_bounce_frame
    39.9          jmp  test_all_events
   39.10  
    40.1 --- a/xen/common/dom0_ops.c	Mon Dec 06 20:03:12 2004 +0000
    40.2 +++ b/xen/common/dom0_ops.c	Mon Dec 06 20:37:17 2004 +0000
    40.3 @@ -651,6 +651,16 @@ long do_dom0_op(dom0_op_t *u_dom0_op)
    40.4      }
    40.5      break;
    40.6  
    40.7 +#ifdef PERF_COUNTERS
    40.8 +    case DOM0_PERFCCONTROL:
    40.9 +    {
   40.10 +        extern int perfc_control(dom0_perfccontrol_t *);
   40.11 +        ret = perfc_control(&op->u.perfccontrol);
   40.12 +        copy_to_user(u_dom0_op, op, sizeof(*op));
   40.13 +    }
   40.14 +    break;
   40.15 +#endif
   40.16 +
   40.17      default:
   40.18          ret = arch_do_dom0_op(op,u_dom0_op);
   40.19  
    41.1 --- a/xen/common/perfc.c	Mon Dec 06 20:03:12 2004 +0000
    41.2 +++ b/xen/common/perfc.c	Mon Dec 06 20:37:17 2004 +0000
    41.3 @@ -4,6 +4,8 @@
    41.4  #include <xen/time.h>
    41.5  #include <xen/perfc.h>
    41.6  #include <xen/keyhandler.h> 
    41.7 +#include <public/dom0_ops.h>
    41.8 +#include <asm/uaccess.h>
    41.9  
   41.10  #undef  PERFCOUNTER
   41.11  #undef  PERFCOUNTER_CPU
   41.12 @@ -79,8 +81,9 @@ void perfc_reset(unsigned char key)
   41.13      s_time_t now = NOW();
   41.14      atomic_t *counters = (atomic_t *)&perfcounters;
   41.15  
   41.16 -    printk("Xen performance counters RESET (now = 0x%08X:%08X)\n",
   41.17 -           (u32)(now>>32), (u32)now);
   41.18 +    if ( key != '\0' )
   41.19 +        printk("Xen performance counters RESET (now = 0x%08X:%08X)\n",
   41.20 +               (u32)(now>>32), (u32)now);
   41.21  
   41.22      /* leave STATUS counters alone -- don't reset */
   41.23  
   41.24 @@ -109,3 +112,107 @@ void perfc_reset(unsigned char key)
   41.25      }
   41.26  }
   41.27  
   41.28 +static dom0_perfc_desc_t perfc_d[NR_PERFCTRS];
   41.29 +static int               perfc_init = 0;
   41.30 +static int perfc_copy_info(dom0_perfc_desc_t *desc)
   41.31 +{
   41.32 +    unsigned int i, j;
   41.33 +    atomic_t *counters = (atomic_t *)&perfcounters;
   41.34 +
   41.35 +    if ( desc == NULL )
   41.36 +        return 0;
   41.37 +
   41.38 +    /* We only copy the name and array-size information once. */
   41.39 +    if ( !perfc_init ) 
   41.40 +    {
   41.41 +        for ( i = 0; i < NR_PERFCTRS; i++ )
   41.42 +        {
   41.43 +            strncpy(perfc_d[i].name, perfc_info[i].name,
   41.44 +                    sizeof(perfc_d[i].name));
   41.45 +            perfc_d[i].name[sizeof(perfc_d[i].name)-1] = '\0';
   41.46 +
   41.47 +            switch ( perfc_info[i].type )
   41.48 +            {
   41.49 +            case TYPE_SINGLE:
   41.50 +            case TYPE_S_SINGLE:
   41.51 +                perfc_d[i].nr_vals = 1;
   41.52 +                break;
   41.53 +            case TYPE_CPU:
   41.54 +            case TYPE_S_CPU:
   41.55 +                perfc_d[i].nr_vals = smp_num_cpus;
   41.56 +                break;
   41.57 +            case TYPE_ARRAY:
   41.58 +            case TYPE_S_ARRAY:
   41.59 +                perfc_d[i].nr_vals = perfc_info[i].nr_elements;
   41.60 +                break;
   41.61 +            }
   41.62 +
   41.63 +            if ( perfc_d[i].nr_vals > ARRAY_SIZE(perfc_d[i].vals) )
   41.64 +                perfc_d[i].nr_vals = ARRAY_SIZE(perfc_d[i].vals);
   41.65 +        }
   41.66 +
   41.67 +        perfc_init = 1;
   41.68 +    }
   41.69 +
   41.70 +    /* We gather the counts together every time. */
   41.71 +    for ( i = 0; i < NR_PERFCTRS; i++ )
   41.72 +    {
   41.73 +        switch ( perfc_info[i].type )
   41.74 +        {
   41.75 +        case TYPE_SINGLE:
   41.76 +        case TYPE_S_SINGLE:
   41.77 +            perfc_d[i].vals[0] = atomic_read(&counters[0]);
   41.78 +            counters += 1;
   41.79 +            break;
   41.80 +        case TYPE_CPU:
   41.81 +        case TYPE_S_CPU:
   41.82 +            for ( j = 0; j < perfc_d[i].nr_vals; j++ )
   41.83 +                perfc_d[i].vals[j] = atomic_read(&counters[j]);
   41.84 +            counters += NR_CPUS;
   41.85 +            break;
   41.86 +        case TYPE_ARRAY:
   41.87 +        case TYPE_S_ARRAY:
   41.88 +            for ( j = 0; j < perfc_d[i].nr_vals; j++ )
   41.89 +                perfc_d[i].vals[j] = atomic_read(&counters[j]);
   41.90 +            counters += perfc_info[i].nr_elements;
   41.91 +            break;
   41.92 +        }
   41.93 +    }
   41.94 +
   41.95 +    return (copy_to_user(desc, perfc_d, NR_PERFCTRS * sizeof(*desc)) ?
   41.96 +            -EFAULT : 0);
   41.97 +}
   41.98 +
   41.99 +/* Dom0 control of perf counters */
  41.100 +int perfc_control(dom0_perfccontrol_t *pc)
  41.101 +{
  41.102 +    static spinlock_t lock = SPIN_LOCK_UNLOCKED;
  41.103 +    u32 op = pc->op;
  41.104 +    int rc;
  41.105 +
  41.106 +    pc->nr_counters = NR_PERFCTRS;
  41.107 +
  41.108 +    spin_lock(&lock);
  41.109 +
  41.110 +    switch ( op )
  41.111 +    {
  41.112 +    case DOM0_PERFCCONTROL_OP_RESET:
  41.113 +        perfc_copy_info(pc->desc);
  41.114 +        perfc_reset(0);
  41.115 +        rc = 0;
  41.116 +        break;
  41.117 +
  41.118 +    case DOM0_PERFCCONTROL_OP_QUERY:
  41.119 +        perfc_copy_info(pc->desc);
  41.120 +        rc = 0;
  41.121 +        break;
  41.122 +
  41.123 +    default:
  41.124 +        rc = -EINVAL;
  41.125 +        break;
  41.126 +    }
  41.127 +
  41.128 +    spin_unlock(&lock);
  41.129 +
  41.130 +    return rc;
  41.131 +}
    42.1 --- a/xen/include/public/dom0_ops.h	Mon Dec 06 20:03:12 2004 +0000
    42.2 +++ b/xen/include/public/dom0_ops.h	Mon Dec 06 20:37:17 2004 +0000
    42.3 @@ -386,6 +386,25 @@ typedef struct {
    42.4      u32      __pad1;
    42.5  } PACKED dom0_read_memtype_t; /* 32 bytes */
    42.6  
    42.7 +/* Interface for controlling Xen software performance counters. */
    42.8 +#define DOM0_PERFCCONTROL        34
    42.9 +/* Sub-operations: */
   42.10 +#define DOM0_PERFCCONTROL_OP_RESET 1   /* Reset all counters to zero. */
   42.11 +#define DOM0_PERFCCONTROL_OP_QUERY 2   /* Get perfctr information. */
   42.12 +typedef struct {
   42.13 +    u8      name[80];               /*  0: name of perf counter */
   42.14 +    u32     nr_vals;                /* 80: number of values for this counter */
   42.15 +    u32     vals[64];               /* 84: array of values */
   42.16 +} PACKED dom0_perfc_desc_t; /* 340 bytes */
   42.17 +typedef struct {
   42.18 +    /* IN variables. */
   42.19 +    u32            op;                /*  0: DOM0_PERFCCONTROL_OP_??? */
   42.20 +    /* OUT variables. */
   42.21 +    u32            nr_counters;       /*  4: number of counters */
   42.22 +    dom0_perfc_desc_t *desc;          /*  8: counter information (or NULL) */
   42.23 +    MEMORY_PADDING;
   42.24 +} PACKED dom0_perfccontrol_t; /* 16 bytes */
   42.25 +
   42.26  typedef struct {
   42.27      u32 cmd;                          /* 0 */
   42.28      u32 interface_version;            /* 4 */ /* DOM0_INTERFACE_VERSION */
   42.29 @@ -419,6 +438,7 @@ typedef struct {
   42.30          dom0_add_memtype_t       add_memtype;
   42.31          dom0_del_memtype_t       del_memtype;
   42.32          dom0_read_memtype_t      read_memtype;
   42.33 +        dom0_perfccontrol_t      perfccontrol;
   42.34      } PACKED u;
   42.35  } PACKED dom0_op_t; /* 80 bytes */
   42.36  
    43.1 --- a/xen/include/public/xen.h	Mon Dec 06 20:03:12 2004 +0000
    43.2 +++ b/xen/include/public/xen.h	Mon Dec 06 20:37:17 2004 +0000
    43.3 @@ -9,8 +9,10 @@
    43.4  #ifndef __XEN_PUBLIC_XEN_H__
    43.5  #define __XEN_PUBLIC_XEN_H__
    43.6  
    43.7 +#ifndef PACKED
    43.8  /* GCC-specific way to pack structure definitions (no implicit padding). */
    43.9  #define PACKED __attribute__ ((packed))
   43.10 +#endif
   43.11  
   43.12  #if defined(__i386__)
   43.13  #include "arch-x86_32.h"