direct-io.hg

changeset 3264:fd0d4d8e6193

bitkeeper revision 1.1159.1.475 (41b9efabAfHQClS6PJMUaGIogh-MMw)

sync w/ head.
author cl349@arcadians.cl.cam.ac.uk
date Fri Dec 10 18:49:15 2004 +0000 (2004-12-10)
parents 3b6cceecc500 bf681d2cb7c8
children 510e8b6f4fae
files .rootkeys BitKeeper/etc/ignore Makefile linux-2.4.28-xen-sparse/arch/xen/drivers/balloon/Makefile linux-2.4.28-xen-sparse/arch/xen/kernel/Makefile linux-2.4.28-xen-sparse/arch/xen/kernel/setup.c linux-2.4.28-xen-sparse/arch/xen/mm/init.c linux-2.4.28-xen-sparse/mkbuildtree linux-2.6.9-xen-sparse/arch/xen/configs/xen0_defconfig linux-2.6.9-xen-sparse/arch/xen/configs/xenU_defconfig linux-2.6.9-xen-sparse/arch/xen/i386/kernel/cpu/common.c linux-2.6.9-xen-sparse/arch/xen/i386/kernel/pci-dma.c linux-2.6.9-xen-sparse/arch/xen/i386/kernel/setup.c linux-2.6.9-xen-sparse/arch/xen/i386/mm/hypervisor.c linux-2.6.9-xen-sparse/arch/xen/i386/mm/init.c linux-2.6.9-xen-sparse/arch/xen/i386/mm/ioremap.c linux-2.6.9-xen-sparse/arch/xen/kernel/Makefile linux-2.6.9-xen-sparse/arch/xen/kernel/ctrl_if.c linux-2.6.9-xen-sparse/arch/xen/kernel/devmem.c linux-2.6.9-xen-sparse/arch/xen/kernel/reboot.c linux-2.6.9-xen-sparse/drivers/char/mem.c linux-2.6.9-xen-sparse/drivers/xen/Makefile linux-2.6.9-xen-sparse/drivers/xen/balloon/balloon.c linux-2.6.9-xen-sparse/drivers/xen/netback/interface.c linux-2.6.9-xen-sparse/drivers/xen/netback/netback.c linux-2.6.9-xen-sparse/drivers/xen/netfront/netfront.c linux-2.6.9-xen-sparse/drivers/xen/privcmd/privcmd.c linux-2.6.9-xen-sparse/include/asm-xen/asm-i386/io.h linux-2.6.9-xen-sparse/include/asm-xen/asm-i386/pgalloc.h linux-2.6.9-xen-sparse/include/asm-xen/asm-i386/pgtable.h linux-2.6.9-xen-sparse/include/asm-xen/balloon.h linux-2.6.9-xen-sparse/include/asm-xen/hypervisor.h tools/libxc/Makefile tools/libxc/xc.h tools/libxc/xc_misc.c tools/libxutil/Makefile tools/misc/Makefile tools/misc/xenperf.c tools/python/xen/xend/XendClient.py tools/python/xen/xend/XendDomain.py tools/python/xen/xend/encode.py tools/python/xen/xend/server/SrvDaemon.py tools/python/xen/xend/server/SrvDomain.py tools/python/xen/xend/server/domain.py tools/python/xen/xend/server/messages.py tools/python/xen/xend/sxp.py tools/python/xen/xm/main.py tools/python/xen/xm/sysrq.py xen/arch/x86/memory.c xen/arch/x86/x86_32/entry.S xen/common/dom0_ops.c xen/common/perfc.c xen/include/public/dom0_ops.h xen/include/public/io/domain_controller.h xen/include/public/xen.h
line diff
     1.1 --- a/.rootkeys	Mon Nov 29 17:11:03 2004 +0000
     1.2 +++ b/.rootkeys	Fri Dec 10 18:49:15 2004 +0000
     1.3 @@ -173,6 +173,7 @@ 4107adf1WcCgkhsdLTRGX52cOG1vJg linux-2.6
     1.4  4107adf1s5u6249DNPUViX1YNagbUQ linux-2.6.9-xen-sparse/arch/xen/i386/pci/irq.c
     1.5  40f56239zOksGg_H4XD4ye6iZNtoZA linux-2.6.9-xen-sparse/arch/xen/kernel/Makefile
     1.6  40f56239bvOjuuuViZ0XMlNiREFC0A linux-2.6.9-xen-sparse/arch/xen/kernel/ctrl_if.c
     1.7 +41ab6fa06JdF7jxUsuDcjN3UhuIAxg linux-2.6.9-xen-sparse/arch/xen/kernel/devmem.c
     1.8  40f56238xFQe9T7M_U_FItM-bZIpLw linux-2.6.9-xen-sparse/arch/xen/kernel/evtchn.c
     1.9  4110f478aeQWllIN7J4kouAHiAqrPw linux-2.6.9-xen-sparse/arch/xen/kernel/fixup.c
    1.10  412dfae9eA3_6e6bCGUtg1mj8b56fQ linux-2.6.9-xen-sparse/arch/xen/kernel/gnttab.c
    1.11 @@ -248,6 +249,7 @@ 40f5623bVdKP7Dt7qm8twu3NcnGNbA linux-2.6
    1.12  40f5623bc8LKPRO09wY5dGDnY_YCpw linux-2.6.9-xen-sparse/include/asm-xen/asm-i386/tlbflush.h
    1.13  41062ab7uFxnCq-KtPeAm-aV8CicgA linux-2.6.9-xen-sparse/include/asm-xen/asm-i386/vga.h
    1.14  40f5623bxUbeGjkRrjDguCy_Gm8RLw linux-2.6.9-xen-sparse/include/asm-xen/asm-i386/xor.h
    1.15 +41af4017PDMuSmMWtSRU5UC9Vylw5g linux-2.6.9-xen-sparse/include/asm-xen/balloon.h
    1.16  40f5623bYNP7tHE2zX6YQxp9Zq2utQ linux-2.6.9-xen-sparse/include/asm-xen/ctrl_if.h
    1.17  40f5623b3Eqs8pAc5WpPX8_jTzV2qw linux-2.6.9-xen-sparse/include/asm-xen/evtchn.h
    1.18  419b4e9367PjTEvdjwavWN12BeBBXg linux-2.6.9-xen-sparse/include/asm-xen/foreign_page.h
    1.19 @@ -391,6 +393,7 @@ 3f5ef5a2ir1kVAthS14Dc5QIRCEFWg tools/mis
    1.20  3f5ef5a2dTZP0nnsFoeq2jRf3mWDDg tools/misc/xen-clone.README
    1.21  405eedf6_nnNhFQ1I85lhCkLK6jFGA tools/misc/xencons
    1.22  40c9c4697z76HDfkCLdMhmaEwzFoNQ tools/misc/xend
    1.23 +41adc641dV-0cDLSyzMs5BT8nL7v3Q tools/misc/xenperf.c
    1.24  4107986eMWVdBoz4tXYoOscpN_BCYg tools/misc/xensv
    1.25  4056f5155QYZdsk-1fLdjsZPFTnlhg tools/misc/xensymoops
    1.26  40cf2937dqM1jWW87O5OoOYND8leuA tools/misc/xm
    1.27 @@ -530,6 +533,7 @@ 40cf2937isyS250zyd0Q2GuEDoNXfQ tools/pyt
    1.28  411b2c1ehdEGO_CwG0tvn85Q-Tfh5g tools/python/xen/xm/migrate.py
    1.29  40cf2937PSslwBliN1g7ofDy2H_RhA tools/python/xen/xm/opts.py
    1.30  40cf2937Z8WCNOnO2FcWdubvEAF9QQ tools/python/xen/xm/shutdown.py
    1.31 +41b88ba6_C4---jeA895Efg9YFZgKA tools/python/xen/xm/sysrq.py
    1.32  40fcefb2K1xqVVT4D-p7nL2GzS4scg tools/sv/Main.rpy
    1.33  40ffbcb66Dj5F-1kCK9BcgSqCWkt1w tools/sv/Makefile
    1.34  4120b0e5L_nW-u0MWRfIdXg4ng4OjA tools/sv/images/destroy.png
     2.1 --- a/BitKeeper/etc/ignore	Mon Nov 29 17:11:03 2004 +0000
     2.2 +++ b/BitKeeper/etc/ignore	Fri Dec 10 18:49:15 2004 +0000
     2.3 @@ -63,6 +63,7 @@ tools/check/.*
     2.4  tools/libxc/xen/*
     2.5  tools/misc/miniterm/miniterm
     2.6  tools/misc/xen_cpuperf
     2.7 +tools/misc/xenperf
     2.8  tools/vnet/gc
     2.9  tools/vnet/gc*/*
    2.10  tools/vnet/vnet-module/*.ko
     3.1 --- a/Makefile	Mon Nov 29 17:11:03 2004 +0000
     3.2 +++ b/Makefile	Fri Dec 10 18:49:15 2004 +0000
     3.3 @@ -19,10 +19,30 @@ export INSTALL_DIR
     3.4  include buildconfigs/Rules.mk
     3.5  
     3.6  .PHONY:	all dist install xen tools kernels docs world clean mkpatches mrproper
     3.7 -.PHONY:	kbuild kdelete kclean
     3.8 +.PHONY:	kbuild kdelete kclean install-tools install-xen install-docs
     3.9 +.PHONY: install-kernels
    3.10  
    3.11  all: dist
    3.12  
    3.13 +# install everything into the standard system directories
    3.14 +# NB: install explicitly does not check that everything is up to date!
    3.15 +install: install-tools install-xen install-kernels install-docs
    3.16 +
    3.17 +install-xen:
    3.18 +	$(MAKE) -C xen install
    3.19 +
    3.20 +install-tools:
    3.21 +	$(MAKE) -C tools install
    3.22 +
    3.23 +install-kernels:
    3.24 +	$(shell cp -a $(INSTALL_DIR)/boot/* /boot/)
    3.25 +	$(shell cp -a $(INSTALL_DIR)/lib/modules/* /lib/modules/)
    3.26 +	$(shell cp -dR $(INSTALL_DIR)/boot/*$(LINUX_VER)* $(prefix)/boot/)
    3.27 +	$(shell cp -dR $(INSTALL_DIR)/lib/modules/* $(prefix)/lib/modules/)
    3.28 +
    3.29 +install-docs:
    3.30 +	sh ./docs/check_pkgs && $(MAKE) -C docs install || true
    3.31 +
    3.32  # build and install everything into local dist directory
    3.33  dist: xen tools kernels docs
    3.34  	install -m0644 ./COPYING $(DIST_DIR)
    3.35 @@ -31,17 +51,6 @@ dist: xen tools kernels docs
    3.36  	mkdir -p $(DIST_DIR)/check
    3.37  	install -m0755 tools/check/chk tools/check/check_* $(DIST_DIR)/check
    3.38  
    3.39 -# install everything into the standard system directories
    3.40 -# NB: install explicitly does not check that everything is up to date!
    3.41 -install: 
    3.42 -	$(MAKE) -C xen install
    3.43 -	$(MAKE) -C tools install
    3.44 -	$(shell cp -a $(INSTALL_DIR)/boot/* /boot/)
    3.45 -	$(shell cp -a $(INSTALL_DIR)/lib/modules/* /lib/modules/)
    3.46 -	sh ./docs/check_pkgs && $(MAKE) -C docs install || true
    3.47 -	$(shell cp -dR $(INSTALL_DIR)/boot/*$(LINUX_VER)* $(prefix)/boot/)
    3.48 -	$(shell cp -dR $(INSTALL_DIR)/lib/modules/* $(prefix)/lib/modules/)
    3.49 -
    3.50  xen:
    3.51  	$(MAKE) prefix=$(INSTALL_DIR) dist=yes -C xen install
    3.52  
     4.1 --- a/linux-2.4.28-xen-sparse/arch/xen/drivers/balloon/Makefile	Mon Nov 29 17:11:03 2004 +0000
     4.2 +++ b/linux-2.4.28-xen-sparse/arch/xen/drivers/balloon/Makefile	Fri Dec 10 18:49:15 2004 +0000
     4.3 @@ -1,3 +1,4 @@
     4.4  O_TARGET := drv.o
     4.5 +export-objs := balloon.o
     4.6  obj-y := balloon.o
     4.7  include $(TOPDIR)/Rules.make
     5.1 --- a/linux-2.4.28-xen-sparse/arch/xen/kernel/Makefile	Mon Nov 29 17:11:03 2004 +0000
     5.2 +++ b/linux-2.4.28-xen-sparse/arch/xen/kernel/Makefile	Fri Dec 10 18:49:15 2004 +0000
     5.3 @@ -6,7 +6,7 @@ all: kernel.o head.o init_task.o
     5.4  
     5.5  O_TARGET := kernel.o
     5.6  
     5.7 -export-objs     := i386_ksyms.o gnttab.o skbuff.o
     5.8 +export-objs     := i386_ksyms.o gnttab.o skbuff.o ctrl_if.o
     5.9  
    5.10  obj-y	:= process.o semaphore.o signal.o entry.o traps.o irq.o  \
    5.11  		ptrace.o ioport.o ldt.o setup.o time.o sys_i386.o \
     6.1 --- a/linux-2.4.28-xen-sparse/arch/xen/kernel/setup.c	Mon Nov 29 17:11:03 2004 +0000
     6.2 +++ b/linux-2.4.28-xen-sparse/arch/xen/kernel/setup.c	Fri Dec 10 18:49:15 2004 +0000
     6.3 @@ -275,7 +275,8 @@ void __init setup_arch(char **cmdline_p)
     6.4       * arch/xen/drivers/balloon/balloon.c
     6.5       */
     6.6      mem_param = parse_mem_cmdline(cmdline_p);
     6.7 -    if (!mem_param) mem_param = xen_start_info.nr_pages;
     6.8 +    if (mem_param < xen_start_info.nr_pages)
     6.9 +        mem_param = xen_start_info.nr_pages;
    6.10  
    6.11  #define PFN_UP(x)	(((x) + PAGE_SIZE-1) >> PAGE_SHIFT)
    6.12  #define PFN_DOWN(x)	((x) >> PAGE_SHIFT)
    6.13 @@ -303,6 +304,7 @@ void __init setup_arch(char **cmdline_p)
    6.14              printk(KERN_WARNING "Use a PAE enabled kernel.\n");
    6.15          else
    6.16              printk(KERN_WARNING "Use a HIGHMEM enabled kernel.\n");
    6.17 +        max_pfn = lmax_low_pfn;
    6.18  #else /* !CONFIG_HIGHMEM */
    6.19  #ifndef CONFIG_X86_PAE
    6.20          if (max_pfn > MAX_NONPAE_PFN) {
    6.21 @@ -350,8 +352,6 @@ void __init setup_arch(char **cmdline_p)
    6.22       */
    6.23      max_low_pfn = lmax_low_pfn;
    6.24  
    6.25 -
    6.26 -
    6.27  #ifdef CONFIG_BLK_DEV_INITRD
    6.28      if ( xen_start_info.mod_start != 0 )
    6.29      {
    6.30 @@ -375,6 +375,20 @@ void __init setup_arch(char **cmdline_p)
    6.31  
    6.32      paging_init();
    6.33  
    6.34 +    /* Make sure we have a large enough P->M table. */
    6.35 +    if ( max_pfn > xen_start_info.nr_pages )
    6.36 +    {
    6.37 +        phys_to_machine_mapping = alloc_bootmem_low_pages(
    6.38 +            max_pfn * sizeof(unsigned long));
    6.39 +        memset(phys_to_machine_mapping, ~0, max_pfn * sizeof(unsigned long));
    6.40 +        memcpy(phys_to_machine_mapping,
    6.41 +               (unsigned long *)xen_start_info.mfn_list,
    6.42 +               xen_start_info.nr_pages * sizeof(unsigned long));
    6.43 +        free_bootmem(__pa(xen_start_info.mfn_list), 
    6.44 +                     PFN_PHYS(PFN_UP(xen_start_info.nr_pages *
    6.45 +                                     sizeof(unsigned long))));
    6.46 +    }
    6.47 +
    6.48      pfn_to_mfn_frame_list = alloc_bootmem_low_pages(PAGE_SIZE);
    6.49      for ( i=0, j=0; i < max_pfn; i+=(PAGE_SIZE/sizeof(unsigned long)), j++ )
    6.50      {	
     7.1 --- a/linux-2.4.28-xen-sparse/arch/xen/mm/init.c	Mon Nov 29 17:11:03 2004 +0000
     7.2 +++ b/linux-2.4.28-xen-sparse/arch/xen/mm/init.c	Fri Dec 10 18:49:15 2004 +0000
     7.3 @@ -213,23 +213,16 @@ static void __init fixrange_init (unsign
     7.4  
     7.5  static void __init pagetable_init (void)
     7.6  {
     7.7 -    unsigned long vaddr, end;
     7.8 +    unsigned long vaddr, end, ram_end;
     7.9      pgd_t *kpgd, *pgd, *pgd_base;
    7.10      int i, j, k;
    7.11      pmd_t *kpmd, *pmd;
    7.12      pte_t *kpte, *pte, *pte_base;
    7.13  
    7.14 -    /* create tables only for boot_pfn frames.  max_low_pfn may be sized for
    7.15 -     * pages yet to be allocated from the hypervisor, or it may be set
    7.16 -     * to override the xen_start_info amount of memory
    7.17 -     */
    7.18 -    int boot_pfn = min(xen_start_info.nr_pages,max_low_pfn);
    7.19 -
    7.20 -    /*
    7.21 -     * This can be zero as well - no problem, in that case we exit
    7.22 -     * the loops anyway due to the PTRS_PER_* conditions.
    7.23 -     */
    7.24 -    end = (unsigned long)__va(boot_pfn *PAGE_SIZE);
    7.25 +    end     = (unsigned long)__va(max_low_pfn * PAGE_SIZE);
    7.26 +    ram_end = (unsigned long)__va(xen_start_info.nr_pages * PAGE_SIZE);
    7.27 +    if ( ram_end > end )
    7.28 +        ram_end = end;
    7.29  
    7.30      pgd_base = init_mm.pgd;
    7.31      i = __pgd_offset(PAGE_OFFSET);
    7.32 @@ -237,12 +230,12 @@ static void __init pagetable_init (void)
    7.33  
    7.34      for (; i < PTRS_PER_PGD; pgd++, i++) {
    7.35          vaddr = i*PGDIR_SIZE;
    7.36 -        if (end && (vaddr >= end))
    7.37 +        if (vaddr >= end)
    7.38              break;
    7.39          pmd = (pmd_t *)pgd;
    7.40          for (j = 0; j < PTRS_PER_PMD; pmd++, j++) {
    7.41              vaddr = i*PGDIR_SIZE + j*PMD_SIZE;
    7.42 -            if (end && (vaddr >= end))
    7.43 +            if (vaddr >= end)
    7.44                  break;
    7.45  
    7.46              /* Filled in for us already? */
    7.47 @@ -250,10 +243,11 @@ static void __init pagetable_init (void)
    7.48                  continue;
    7.49  
    7.50              pte_base = pte = (pte_t *) alloc_bootmem_low_pages(PAGE_SIZE);
    7.51 +            clear_page(pte_base);
    7.52  
    7.53              for (k = 0; k < PTRS_PER_PTE; pte++, k++) {
    7.54                  vaddr = i*PGDIR_SIZE + j*PMD_SIZE + k*PAGE_SIZE;
    7.55 -                if (end && (vaddr >= end))
    7.56 +                if (vaddr >= ram_end)
    7.57                      break;
    7.58                  *pte = mk_pte_phys(__pa(vaddr), PAGE_KERNEL);
    7.59              }
    7.60 @@ -329,28 +323,14 @@ static inline int page_is_ram (unsigned 
    7.61      return 1;
    7.62  }
    7.63  
    7.64 -static inline int page_kills_ppro(unsigned long pagenr)
    7.65 -{
    7.66 -    return 0;
    7.67 -}
    7.68 -
    7.69  #ifdef CONFIG_HIGHMEM
    7.70 -void __init one_highpage_init(struct page *page, int pfn, int bad_ppro)
    7.71 +void __init one_highpage_init(struct page *page, int free_page)
    7.72  {
    7.73 -    if (!page_is_ram(pfn)) {
    7.74 -        SetPageReserved(page);
    7.75 -        return;
    7.76 -    }
    7.77 -	
    7.78 -    if (bad_ppro && page_kills_ppro(pfn)) {
    7.79 -        SetPageReserved(page);
    7.80 -        return;
    7.81 -    }
    7.82 -	
    7.83      ClearPageReserved(page);
    7.84      set_bit(PG_highmem, &page->flags);
    7.85      atomic_set(&page->count, 1);
    7.86 -    __free_page(page);
    7.87 +    if ( free_page )
    7.88 +        __free_page(page);
    7.89      totalhigh_pages++;
    7.90  }
    7.91  #endif /* CONFIG_HIGHMEM */
    7.92 @@ -392,8 +372,9 @@ static int __init free_pages_init(void)
    7.93              reservedpages++;
    7.94      }
    7.95  #ifdef CONFIG_HIGHMEM
    7.96 -    for (pfn = xen_start_info.nr_pages-1; pfn >= highstart_pfn; pfn--)
    7.97 -        one_highpage_init((struct page *) (mem_map + pfn), pfn, bad_ppro);
    7.98 +    for (pfn = highend_pfn-1; pfn >= highstart_pfn; pfn--)
    7.99 +        one_highpage_init((struct page *) (mem_map + pfn), pfn,
   7.100 +                          (pfn < xen_start_info.nr_pages));
   7.101      totalram_pages += totalhigh_pages;
   7.102  #endif
   7.103      return reservedpages;
     8.1 --- a/linux-2.4.28-xen-sparse/mkbuildtree	Mon Nov 29 17:11:03 2004 +0000
     8.2 +++ b/linux-2.4.28-xen-sparse/mkbuildtree	Fri Dec 10 18:49:15 2004 +0000
     8.3 @@ -204,6 +204,7 @@ ln -sf ../asm-i386/unaligned.h
     8.4  ln -sf ../asm-i386/unistd.h 
     8.5  ln -sf ../asm-i386/user.h 
     8.6  ln -sf ../asm-i386/vm86.h 
     8.7 +ln -sf ../../${LINUX_26}/include/asm-xen/balloon.h
     8.8  ln -sf ../../${LINUX_26}/include/asm-xen/ctrl_if.h
     8.9  ln -sf ../../${LINUX_26}/include/asm-xen/evtchn.h
    8.10  ln -sf ../../${LINUX_26}/include/asm-xen/gnttab.h
     9.1 --- a/linux-2.6.9-xen-sparse/arch/xen/configs/xen0_defconfig	Mon Nov 29 17:11:03 2004 +0000
     9.2 +++ b/linux-2.6.9-xen-sparse/arch/xen/configs/xen0_defconfig	Fri Dec 10 18:49:15 2004 +0000
     9.3 @@ -1,7 +1,7 @@
     9.4  #
     9.5  # Automatically generated make config: don't edit
     9.6  # Linux kernel version: 2.6.9-xen0
     9.7 -# Sun Nov 14 16:55:05 2004
     9.8 +# Wed Dec  1 09:22:49 2004
     9.9  #
    9.10  CONFIG_XEN=y
    9.11  CONFIG_ARCH_XEN=y
    9.12 @@ -153,10 +153,10 @@ CONFIG_DEBUG_KERNEL=y
    9.13  CONFIG_EARLY_PRINTK=y
    9.14  # CONFIG_DEBUG_STACKOVERFLOW is not set
    9.15  # CONFIG_DEBUG_STACK_USAGE is not set
    9.16 -# CONFIG_DEBUG_SLAB is not set
    9.17 +CONFIG_DEBUG_SLAB=y
    9.18  CONFIG_MAGIC_SYSRQ=y
    9.19  # CONFIG_DEBUG_SPINLOCK is not set
    9.20 -# CONFIG_DEBUG_PAGEALLOC is not set
    9.21 +CONFIG_DEBUG_PAGEALLOC=y
    9.22  # CONFIG_DEBUG_INFO is not set
    9.23  # CONFIG_DEBUG_SPINLOCK_SLEEP is not set
    9.24  # CONFIG_FRAME_POINTER is not set
    10.1 --- a/linux-2.6.9-xen-sparse/arch/xen/configs/xenU_defconfig	Mon Nov 29 17:11:03 2004 +0000
    10.2 +++ b/linux-2.6.9-xen-sparse/arch/xen/configs/xenU_defconfig	Fri Dec 10 18:49:15 2004 +0000
    10.3 @@ -1,7 +1,7 @@
    10.4  #
    10.5  # Automatically generated make config: don't edit
    10.6 -# Linux kernel version: 2.6.9-xen0
    10.7 -# Sun Oct 31 21:32:23 2004
    10.8 +# Linux kernel version: 2.6.9-xenU
    10.9 +# Wed Dec  1 09:22:09 2004
   10.10  #
   10.11  CONFIG_XEN=y
   10.12  CONFIG_ARCH_XEN=y
   10.13 @@ -47,6 +47,7 @@ CONFIG_HOTPLUG=y
   10.14  # CONFIG_IKCONFIG is not set
   10.15  # CONFIG_EMBEDDED is not set
   10.16  CONFIG_KALLSYMS=y
   10.17 +# CONFIG_KALLSYMS_ALL is not set
   10.18  # CONFIG_KALLSYMS_EXTRA_PASS is not set
   10.19  CONFIG_FUTEX=y
   10.20  CONFIG_EPOLL=y
   10.21 @@ -116,8 +117,15 @@ CONFIG_HAVE_DEC_LOCK=y
   10.22  #
   10.23  # Kernel hacking
   10.24  #
   10.25 -# CONFIG_DEBUG_KERNEL is not set
   10.26 +CONFIG_DEBUG_KERNEL=y
   10.27  CONFIG_EARLY_PRINTK=y
   10.28 +# CONFIG_DEBUG_STACKOVERFLOW is not set
   10.29 +# CONFIG_DEBUG_STACK_USAGE is not set
   10.30 +CONFIG_DEBUG_SLAB=y
   10.31 +# CONFIG_MAGIC_SYSRQ is not set
   10.32 +# CONFIG_DEBUG_SPINLOCK is not set
   10.33 +CONFIG_DEBUG_PAGEALLOC=y
   10.34 +# CONFIG_DEBUG_INFO is not set
   10.35  # CONFIG_DEBUG_SPINLOCK_SLEEP is not set
   10.36  # CONFIG_FRAME_POINTER is not set
   10.37  # CONFIG_4KSTACKS is not set
   10.38 @@ -142,6 +150,7 @@ CONFIG_BINFMT_ELF=y
   10.39  CONFIG_STANDALONE=y
   10.40  CONFIG_PREVENT_FIRMWARE_BUILD=y
   10.41  # CONFIG_FW_LOADER is not set
   10.42 +# CONFIG_DEBUG_DRIVER is not set
   10.43  
   10.44  #
   10.45  # Block devices
    11.1 --- a/linux-2.6.9-xen-sparse/arch/xen/i386/kernel/cpu/common.c	Mon Nov 29 17:11:03 2004 +0000
    11.2 +++ b/linux-2.6.9-xen-sparse/arch/xen/i386/kernel/cpu/common.c	Fri Dec 10 18:49:15 2004 +0000
    11.3 @@ -513,7 +513,7 @@ void __init cpu_gdt_init(struct Xgt_desc
    11.4  	     va < gdt_descr->address + gdt_descr->size;
    11.5  	     va += PAGE_SIZE, f++) {
    11.6  		frames[f] = virt_to_machine(va) >> PAGE_SHIFT;
    11.7 -		protect_page(swapper_pg_dir, (void *)va, PROT_ON);
    11.8 +		make_page_readonly((void *)va);
    11.9  	}
   11.10  	flush_page_update_queue();
   11.11  	if (HYPERVISOR_set_gdt(frames, gdt_descr->size / 8))
    12.1 --- a/linux-2.6.9-xen-sparse/arch/xen/i386/kernel/pci-dma.c	Mon Nov 29 17:11:03 2004 +0000
    12.2 +++ b/linux-2.6.9-xen-sparse/arch/xen/i386/kernel/pci-dma.c	Fri Dec 10 18:49:15 2004 +0000
    12.3 @@ -13,6 +13,7 @@
    12.4  #include <linux/pci.h>
    12.5  #include <linux/version.h>
    12.6  #include <asm/io.h>
    12.7 +#include <asm-xen/balloon.h>
    12.8  
    12.9  #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0)
   12.10  #define pte_offset_kernel pte_offset
   12.11 @@ -37,9 +38,12 @@ xen_contig_memory(unsigned long vstart, 
   12.12  	pgd_t         *pgd; 
   12.13  	pmd_t         *pmd;
   12.14  	pte_t         *pte;
   12.15 -	unsigned long  pfn, i;
   12.16 +	unsigned long  pfn, i, flags;
   12.17  
   12.18  	scrub_pages(vstart, 1 << order);
   12.19 +
   12.20 +        balloon_lock(flags);
   12.21 +
   12.22  	/* 1. Zap current PTEs, giving away the underlying pages. */
   12.23  	for (i = 0; i < (1<<order); i++) {
   12.24  		pgd = pgd_offset_k(   (vstart + (i*PAGE_SIZE)));
   12.25 @@ -70,6 +74,8 @@ xen_contig_memory(unsigned long vstart, 
   12.26  	}
   12.27  	/* Flush updates through and flush the TLB. */
   12.28  	xen_tlb_flush();
   12.29 +
   12.30 +        balloon_unlock(flags);
   12.31  }
   12.32  
   12.33  #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0)
    13.1 --- a/linux-2.6.9-xen-sparse/arch/xen/i386/kernel/setup.c	Mon Nov 29 17:11:03 2004 +0000
    13.2 +++ b/linux-2.6.9-xen-sparse/arch/xen/i386/kernel/setup.c	Fri Dec 10 18:49:15 2004 +0000
    13.3 @@ -53,6 +53,9 @@
    13.4  #include "setup_arch_pre.h"
    13.5  #include <bios_ebda.h>
    13.6  
    13.7 +/* Allows setting of maximum possible memory size  */
    13.8 +static unsigned long xen_override_max_pfn;
    13.9 +
   13.10  int disable_pse __initdata = 0;
   13.11  
   13.12  /*
   13.13 @@ -719,8 +722,13 @@ static void __init parse_cmdline_early (
   13.14  				unsigned long long mem_size;
   13.15   
   13.16  				mem_size = memparse(from+4, &from);
   13.17 +#if 0
   13.18  				limit_regions(mem_size);
   13.19  				userdef=1;
   13.20 +#else
   13.21 +				xen_override_max_pfn =
   13.22 +					(unsigned long)(mem_size>>PAGE_SHIFT);
   13.23 +#endif
   13.24  			}
   13.25  		}
   13.26  
   13.27 @@ -858,6 +866,7 @@ static void __init parse_cmdline_early (
   13.28  	}
   13.29  }
   13.30  
   13.31 +#if 0 /* !XEN */
   13.32  /*
   13.33   * Callback for efi_memory_walk.
   13.34   */
   13.35 @@ -874,7 +883,6 @@ efi_find_max_pfn(unsigned long start, un
   13.36  	return 0;
   13.37  }
   13.38  
   13.39 -
   13.40  /*
   13.41   * Find the highest page frame number we have available
   13.42   */
   13.43 @@ -901,6 +909,15 @@ void __init find_max_pfn(void)
   13.44  			max_pfn = end;
   13.45  	}
   13.46  }
   13.47 +#else
   13.48 +/* We don't use the fake e820 because we need to respond to user override. */
   13.49 +void __init find_max_pfn(void)
   13.50 +{
   13.51 +	if ( xen_override_max_pfn < xen_start_info.nr_pages )
   13.52 +		xen_override_max_pfn = xen_start_info.nr_pages;
   13.53 +	max_pfn = xen_override_max_pfn;
   13.54 +}
   13.55 +#endif /* XEN */
   13.56  
   13.57  /*
   13.58   * Determine low and high memory ranges:
   13.59 @@ -1415,6 +1432,21 @@ void __init setup_arch(char **cmdline_p)
   13.60  #endif
   13.61  	paging_init();
   13.62  
   13.63 +	/* Make sure we have a large enough P->M table. */
   13.64 +	if (max_pfn > xen_start_info.nr_pages) {
   13.65 +		phys_to_machine_mapping = alloc_bootmem_low_pages(
   13.66 +			max_pfn * sizeof(unsigned long));
   13.67 +		memset(phys_to_machine_mapping, ~0,
   13.68 +			max_pfn * sizeof(unsigned long));
   13.69 +		memcpy(phys_to_machine_mapping,
   13.70 +			(unsigned long *)xen_start_info.mfn_list,
   13.71 +			xen_start_info.nr_pages * sizeof(unsigned long));
   13.72 +		free_bootmem(
   13.73 +			__pa(xen_start_info.mfn_list), 
   13.74 +			PFN_PHYS(PFN_UP(xen_start_info.nr_pages *
   13.75 +			sizeof(unsigned long))));
   13.76 +	}
   13.77 +
   13.78  	pfn_to_mfn_frame_list = alloc_bootmem_low_pages(PAGE_SIZE);
   13.79  	for ( i=0, j=0; i < max_pfn; i+=(PAGE_SIZE/sizeof(unsigned long)), j++ )
   13.80  	{	
    14.1 --- a/linux-2.6.9-xen-sparse/arch/xen/i386/mm/hypervisor.c	Mon Nov 29 17:11:03 2004 +0000
    14.2 +++ b/linux-2.6.9-xen-sparse/arch/xen/i386/mm/hypervisor.c	Fri Dec 10 18:49:15 2004 +0000
    14.3 @@ -35,6 +35,7 @@
    14.4  #include <asm/pgtable.h>
    14.5  #include <asm-xen/hypervisor.h>
    14.6  #include <asm-xen/multicall.h>
    14.7 +#include <asm-xen/balloon.h>
    14.8  #include <linux/percpu.h>
    14.9  
   14.10  /*
   14.11 @@ -429,7 +430,6 @@ unsigned long allocate_empty_lowmem_regi
   14.12      unsigned long *pfn_array;
   14.13      unsigned long  vstart;
   14.14      unsigned long  i;
   14.15 -    int            ret;
   14.16      unsigned int   order = get_order(pages*PAGE_SIZE);
   14.17  
   14.18      vstart = __get_free_pages(GFP_KERNEL, order);
   14.19 @@ -455,57 +455,11 @@ unsigned long allocate_empty_lowmem_regi
   14.20      /* Flush updates through and flush the TLB. */
   14.21      xen_tlb_flush();
   14.22  
   14.23 -    ret = HYPERVISOR_dom_mem_op(MEMOP_decrease_reservation, 
   14.24 -                                pfn_array, 1<<order, 0);
   14.25 -    if ( unlikely(ret != (1<<order)) )
   14.26 -    {
   14.27 -        printk(KERN_WARNING "Unable to reduce memory reservation (%d)\n", ret);
   14.28 -        BUG();
   14.29 -    }
   14.30 +    balloon_put_pages(pfn_array, 1 << order);
   14.31  
   14.32      vfree(pfn_array);
   14.33  
   14.34      return vstart;
   14.35  }
   14.36  
   14.37 -void deallocate_lowmem_region(unsigned long vstart, unsigned long pages)
   14.38 -{
   14.39 -    pgd_t         *pgd; 
   14.40 -    pmd_t         *pmd;
   14.41 -    pte_t         *pte;
   14.42 -    unsigned long *pfn_array;
   14.43 -    unsigned long  i;
   14.44 -    int            ret;
   14.45 -    unsigned int   order = get_order(pages*PAGE_SIZE);
   14.46 -
   14.47 -    pfn_array = vmalloc((1<<order) * sizeof(*pfn_array));
   14.48 -    if ( pfn_array == NULL )
   14.49 -        BUG();
   14.50 -
   14.51 -    ret = HYPERVISOR_dom_mem_op(MEMOP_increase_reservation,
   14.52 -                                pfn_array, 1<<order, 0);
   14.53 -    if ( unlikely(ret != (1<<order)) )
   14.54 -    {
   14.55 -        printk(KERN_WARNING "Unable to increase memory reservation (%d)\n",
   14.56 -               ret);
   14.57 -        BUG();
   14.58 -    }
   14.59 -
   14.60 -    for ( i = 0; i < (1<<order); i++ )
   14.61 -    {
   14.62 -        pgd = pgd_offset_k(   (vstart + (i*PAGE_SIZE)));
   14.63 -        pmd = pmd_offset(pgd, (vstart + (i*PAGE_SIZE)));
   14.64 -        pte = pte_offset_kernel(pmd, (vstart + (i*PAGE_SIZE)));
   14.65 -        queue_l1_entry_update(pte, (pfn_array[i]<<PAGE_SHIFT)|__PAGE_KERNEL);
   14.66 -        queue_machphys_update(pfn_array[i], __pa(vstart)>>PAGE_SHIFT);
   14.67 -        phys_to_machine_mapping[__pa(vstart)>>PAGE_SHIFT] = pfn_array[i];
   14.68 -    }
   14.69 -
   14.70 -    flush_page_update_queue();
   14.71 -
   14.72 -    vfree(pfn_array);
   14.73 -
   14.74 -    free_pages(vstart, order);
   14.75 -}
   14.76 -
   14.77  #endif /* CONFIG_XEN_PHYSDEV_ACCESS */
    15.1 --- a/linux-2.6.9-xen-sparse/arch/xen/i386/mm/init.c	Mon Nov 29 17:11:03 2004 +0000
    15.2 +++ b/linux-2.6.9-xen-sparse/arch/xen/i386/mm/init.c	Fri Dec 10 18:49:15 2004 +0000
    15.3 @@ -77,6 +77,7 @@ static pte_t * __init one_page_table_ini
    15.4  {
    15.5  	if (pmd_none(*pmd)) {
    15.6  		pte_t *page_table = (pte_t *) alloc_bootmem_low_pages(PAGE_SIZE);
    15.7 +		make_page_readonly(page_table);
    15.8  		set_pmd(pmd, __pmd(__pa(page_table) | _PAGE_TABLE));
    15.9  		if (page_table != pte_offset_kernel(pmd, 0))
   15.10  			BUG();	
   15.11 @@ -125,41 +126,6 @@ static void __init page_table_range_init
   15.12  	}
   15.13  }
   15.14  
   15.15 -void __init protect_page(pgd_t *pgd, void *page, int mode)
   15.16 -{
   15.17 -	pmd_t *pmd;
   15.18 -	pte_t *pte;
   15.19 -	unsigned long addr;
   15.20 -
   15.21 -	addr = (unsigned long)page;
   15.22 -	pgd += pgd_index(addr);
   15.23 -	pmd = pmd_offset(pgd, addr);
   15.24 -	pte = pte_offset_kernel(pmd, addr);
   15.25 -	if (!pte_present(*pte))
   15.26 -		return;
   15.27 -	queue_l1_entry_update(pte, mode ? pte_val_ma(*pte) & ~_PAGE_RW :
   15.28 -					pte_val_ma(*pte) | _PAGE_RW);
   15.29 -}
   15.30 -
   15.31 -void __init protect_pagetable(pgd_t *dpgd, pgd_t *spgd, int mode)
   15.32 -{
   15.33 -	pmd_t *pmd;
   15.34 -	pte_t *pte;
   15.35 -	int pgd_idx, pmd_idx;
   15.36 -
   15.37 -	protect_page(dpgd, spgd, mode);
   15.38 -
   15.39 -	for (pgd_idx = 0; pgd_idx < PTRS_PER_PGD_NO_HV; spgd++, pgd_idx++) {
   15.40 -		pmd = pmd_offset(spgd, 0);
   15.41 -		if (pmd_none(*pmd))
   15.42 -			continue;
   15.43 -		for (pmd_idx = 0; pmd_idx < PTRS_PER_PMD; pmd++, pmd_idx++) {
   15.44 -			pte = pte_offset_kernel(pmd, 0);
   15.45 -			protect_page(dpgd, pte, mode);
   15.46 -		}
   15.47 -	}
   15.48 -}
   15.49 -
   15.50  static inline int is_kernel_text(unsigned long addr)
   15.51  {
   15.52  	if (addr >= (unsigned long)_stext && addr <= (unsigned long)__init_end)
   15.53 @@ -180,6 +146,10 @@ static void __init kernel_physical_mappi
   15.54  	pte_t *pte;
   15.55  	int pgd_idx, pmd_idx, pte_ofs;
   15.56  
   15.57 +	unsigned long max_ram_pfn = xen_start_info.nr_pages;
   15.58 +	if (max_ram_pfn > max_low_pfn)
   15.59 +		max_ram_pfn = max_low_pfn;
   15.60 +
   15.61  	pgd_idx = pgd_index(PAGE_OFFSET);
   15.62  	pgd = pgd_base + pgd_idx;
   15.63  	pfn = 0;
   15.64 @@ -207,7 +177,10 @@ static void __init kernel_physical_mappi
   15.65  				pte = one_page_table_init(pmd);
   15.66  
   15.67  				pte += pte_ofs;
   15.68 -				for (; pte_ofs < PTRS_PER_PTE && pfn < max_low_pfn; pte++, pfn++, pte_ofs++) {
   15.69 +				/* XEN: Only map initial RAM allocation. */
   15.70 +				for (; pte_ofs < PTRS_PER_PTE && pfn < max_ram_pfn; pte++, pfn++, pte_ofs++) {
   15.71 +						if (pte_present(*pte))
   15.72 +							continue;
   15.73  						if (is_kernel_text(address))
   15.74  							set_pte(pte, pfn_pte(pfn, PAGE_KERNEL_EXEC));
   15.75  						else
   15.76 @@ -311,7 +284,8 @@ void __init one_highpage_init(struct pag
   15.77  		ClearPageReserved(page);
   15.78  		set_bit(PG_highmem, &page->flags);
   15.79  		set_page_count(page, 1);
   15.80 -		__free_page(page);
   15.81 +		if (pfn < xen_start_info.nr_pages)
   15.82 +			__free_page(page);
   15.83  		totalhigh_pages++;
   15.84  	} else
   15.85  		SetPageReserved(page);
   15.86 @@ -347,7 +321,8 @@ extern void __init remap_numa_kva(void);
   15.87  static void __init pagetable_init (void)
   15.88  {
   15.89  	unsigned long vaddr;
   15.90 -	pgd_t *pgd_base = swapper_pg_dir;
   15.91 +	pgd_t *old_pgd = (pgd_t *)xen_start_info.pt_base;
   15.92 +	pgd_t *new_pgd = swapper_pg_dir;
   15.93  
   15.94  #ifdef CONFIG_X86_PAE
   15.95  	int i;
   15.96 @@ -368,7 +343,22 @@ static void __init pagetable_init (void)
   15.97  		__PAGE_KERNEL_EXEC |= _PAGE_GLOBAL;
   15.98  	}
   15.99  
  15.100 -	kernel_physical_mapping_init(pgd_base);
  15.101 +	/*
  15.102 +	 * Switch to proper mm_init page directory. Initialise from the current
  15.103 +	 * page directory, write-protect the new page directory, then switch to
  15.104 +	 * it. We clean up by write-enabling and then freeing the old page dir.
  15.105 +	 */
  15.106 +	memcpy(new_pgd, old_pgd, PTRS_PER_PGD_NO_HV*sizeof(pgd_t));
  15.107 +	make_page_readonly(new_pgd);
  15.108 +	queue_pgd_pin(__pa(new_pgd));
  15.109 +	load_cr3(new_pgd);
  15.110 +	queue_pgd_unpin(__pa(old_pgd));
  15.111 +	__flush_tlb_all(); /* implicit flush */
  15.112 +	make_page_writable(old_pgd);
  15.113 +	flush_page_update_queue();
  15.114 +	free_bootmem(__pa(old_pgd), PAGE_SIZE);
  15.115 +
  15.116 +	kernel_physical_mapping_init(new_pgd);
  15.117  	remap_numa_kva();
  15.118  
  15.119  	/*
  15.120 @@ -376,9 +366,9 @@ static void __init pagetable_init (void)
  15.121  	 * created - mappings will be set by set_fixmap():
  15.122  	 */
  15.123  	vaddr = __fix_to_virt(__end_of_fixed_addresses - 1) & PMD_MASK;
  15.124 -	page_table_range_init(vaddr, 0, pgd_base);
  15.125 +	page_table_range_init(vaddr, 0, new_pgd);
  15.126  
  15.127 -	permanent_kmaps_init(pgd_base);
  15.128 +	permanent_kmaps_init(new_pgd);
  15.129  
  15.130  #ifdef CONFIG_X86_PAE
  15.131  	/*
  15.132 @@ -388,7 +378,7 @@ static void __init pagetable_init (void)
  15.133  	 * All user-space mappings are explicitly cleared after
  15.134  	 * SMP startup.
  15.135  	 */
  15.136 -	pgd_base[0] = pgd_base[USER_PTRS_PER_PGD];
  15.137 +	new_pgd[0] = new_pgd[USER_PTRS_PER_PGD];
  15.138  #endif
  15.139  }
  15.140  
  15.141 @@ -545,8 +535,6 @@ out:
  15.142   */
  15.143  void __init paging_init(void)
  15.144  {
  15.145 -	pgd_t *old_pgd = (pgd_t *)xen_start_info.pt_base;
  15.146 -	pgd_t *new_pgd = swapper_pg_dir;
  15.147  #ifdef CONFIG_XEN_PHYSDEV_ACCESS
  15.148  	int i;
  15.149  #endif
  15.150 @@ -559,25 +547,6 @@ void __init paging_init(void)
  15.151  
  15.152  	pagetable_init();
  15.153  
  15.154 -	/*
  15.155 -	 * Write-protect both page tables within both page tables.
  15.156 -	 * That's three ops, as the old p.t. is already protected
  15.157 -	 * within the old p.t. Then pin the new table, switch tables,
  15.158 -	 * and unprotect the old table.
  15.159 -	 */
  15.160 -	protect_pagetable(new_pgd, old_pgd, PROT_ON);
  15.161 -	protect_pagetable(new_pgd, new_pgd, PROT_ON);
  15.162 -	protect_pagetable(old_pgd, new_pgd, PROT_ON);
  15.163 -	queue_pgd_pin(__pa(new_pgd));
  15.164 -	load_cr3(new_pgd);
  15.165 -	queue_pgd_unpin(__pa(old_pgd));
  15.166 -	__flush_tlb_all(); /* implicit flush */
  15.167 -	protect_pagetable(new_pgd, old_pgd, PROT_OFF);
  15.168 -	flush_page_update_queue();
  15.169 -
  15.170 -	/* Completely detached from old tables, so free them. */
  15.171 -	free_bootmem(__pa(old_pgd), xen_start_info.nr_pt_frames << PAGE_SHIFT);
  15.172 -
  15.173  #ifdef CONFIG_X86_PAE
  15.174  	/*
  15.175  	 * We will bail out later - printk doesn't work right now so
    16.1 --- a/linux-2.6.9-xen-sparse/arch/xen/i386/mm/ioremap.c	Mon Nov 29 17:11:03 2004 +0000
    16.2 +++ b/linux-2.6.9-xen-sparse/arch/xen/i386/mm/ioremap.c	Fri Dec 10 18:49:15 2004 +0000
    16.3 @@ -11,28 +11,39 @@
    16.4  #include <linux/vmalloc.h>
    16.5  #include <linux/init.h>
    16.6  #include <linux/slab.h>
    16.7 +#include <linux/module.h>
    16.8  #include <asm/io.h>
    16.9  #include <asm/fixmap.h>
   16.10  #include <asm/cacheflush.h>
   16.11  #include <asm/tlbflush.h>
   16.12  #include <asm/pgtable.h>
   16.13 +#include <asm/pgalloc.h>
   16.14  
   16.15  #ifndef CONFIG_XEN_PHYSDEV_ACCESS
   16.16  
   16.17 -void * __ioremap(unsigned long phys_addr, unsigned long size, unsigned long flags)
   16.18 -{ return NULL; }
   16.19 +void * __ioremap(unsigned long phys_addr, unsigned long size,
   16.20 +		unsigned long flags)
   16.21 +{
   16.22 +	return NULL;
   16.23 +}
   16.24  
   16.25  void *ioremap_nocache (unsigned long phys_addr, unsigned long size)
   16.26 -{ return NULL; }
   16.27 +{
   16.28 +	return NULL;
   16.29 +}
   16.30  
   16.31  void iounmap(volatile void __iomem *addr)
   16.32 -{ }
   16.33 +{
   16.34 +}
   16.35  
   16.36  void __init *bt_ioremap(unsigned long phys_addr, unsigned long size)
   16.37 -{ return NULL; }
   16.38 +{
   16.39 +	return NULL;
   16.40 +}
   16.41  
   16.42  void __init bt_iounmap(void *addr, unsigned long size)
   16.43 -{ }
   16.44 +{
   16.45 +}
   16.46  
   16.47  #else
   16.48  
   16.49 @@ -50,86 +61,6 @@ static inline int is_local_lowmem(unsign
   16.50  	return ((pfn < max_low_pfn) && (pfn_to_mfn(pfn) == mfn));
   16.51  }
   16.52  
   16.53 -static inline void remap_area_pte(pte_t * pte, unsigned long address, unsigned long size,
   16.54 -	unsigned long phys_addr, unsigned long flags)
   16.55 -{
   16.56 -	unsigned long end;
   16.57 -	unsigned long pfn;
   16.58 -
   16.59 -	address &= ~PMD_MASK;
   16.60 -	end = address + size;
   16.61 -	if (end > PMD_SIZE)
   16.62 -		end = PMD_SIZE;
   16.63 -	if (address >= end)
   16.64 -		BUG();
   16.65 -	pfn = phys_addr >> PAGE_SHIFT;
   16.66 -	do {
   16.67 -		if (!pte_none(*pte)) {
   16.68 -			printk("remap_area_pte: page already exists\n");
   16.69 -			BUG();
   16.70 -		}
   16.71 -		set_pte(pte, pfn_pte_ma(pfn, __pgprot(_PAGE_PRESENT | _PAGE_RW | 
   16.72 -					_PAGE_DIRTY | _PAGE_ACCESSED | flags)));
   16.73 -		address += PAGE_SIZE;
   16.74 -		pfn++;
   16.75 -		pte++;
   16.76 -	} while (address && (address < end));
   16.77 -}
   16.78 -
   16.79 -static inline int remap_area_pmd(pmd_t * pmd, unsigned long address, unsigned long size,
   16.80 -	unsigned long phys_addr, unsigned long flags)
   16.81 -{
   16.82 -	unsigned long end;
   16.83 -
   16.84 -	address &= ~PGDIR_MASK;
   16.85 -	end = address + size;
   16.86 -	if (end > PGDIR_SIZE)
   16.87 -		end = PGDIR_SIZE;
   16.88 -	phys_addr -= address;
   16.89 -	if (address >= end)
   16.90 -		BUG();
   16.91 -	do {
   16.92 -		pte_t * pte = pte_alloc_kernel(&init_mm, pmd, address);
   16.93 -		if (!pte)
   16.94 -			return -ENOMEM;
   16.95 -		remap_area_pte(pte, address, end - address, address + phys_addr, flags);
   16.96 -		address = (address + PMD_SIZE) & PMD_MASK;
   16.97 -		pmd++;
   16.98 -	} while (address && (address < end));
   16.99 -	return 0;
  16.100 -}
  16.101 -
  16.102 -static int remap_area_pages(unsigned long address, unsigned long phys_addr,
  16.103 -				 unsigned long size, unsigned long flags)
  16.104 -{
  16.105 -	int error;
  16.106 -	pgd_t * dir;
  16.107 -	unsigned long end = address + size;
  16.108 -
  16.109 -	phys_addr -= address;
  16.110 -	dir = pgd_offset(&init_mm, address);
  16.111 -	flush_cache_all();
  16.112 -	if (address >= end)
  16.113 -		BUG();
  16.114 -	spin_lock(&init_mm.page_table_lock);
  16.115 -	do {
  16.116 -		pmd_t *pmd;
  16.117 -		pmd = pmd_alloc(&init_mm, dir, address);
  16.118 -		error = -ENOMEM;
  16.119 -		if (!pmd)
  16.120 -			break;
  16.121 -		if (remap_area_pmd(pmd, address, end - address,
  16.122 -					 phys_addr + address, flags))
  16.123 -			break;
  16.124 -		error = 0;
  16.125 -		address = (address + PGDIR_SIZE) & PGDIR_MASK;
  16.126 -		dir++;
  16.127 -	} while (address && (address < end));
  16.128 -	spin_unlock(&init_mm.page_table_lock);
  16.129 -	flush_tlb_all();
  16.130 -	return error;
  16.131 -}
  16.132 -
  16.133  /*
  16.134   * Generic mapping function (not visible outside):
  16.135   */
  16.136 @@ -192,7 +123,7 @@ void __iomem * __ioremap(unsigned long p
  16.137  		return NULL;
  16.138  	area->phys_addr = phys_addr;
  16.139  	addr = (void __iomem *) area->addr;
  16.140 -	if (remap_area_pages((unsigned long) addr, phys_addr, size, flags)) {
  16.141 +	if (direct_remap_area_pages(&init_mm, (unsigned long) addr, phys_addr, size, __pgprot(_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED | flags), DOMID_IO)) {
  16.142  		vunmap((void __force *) addr);
  16.143  		return NULL;
  16.144  	}
  16.145 @@ -360,138 +291,147 @@ void __init bt_iounmap(void *addr, unsig
  16.146  #define direct_mk_pte_phys(physpage, pgprot) \
  16.147    __direct_mk_pte((physpage) >> PAGE_SHIFT, pgprot)
  16.148  
  16.149 -static inline void direct_remap_area_pte(pte_t *pte, 
  16.150 -                                        unsigned long address, 
  16.151 -                                        unsigned long size,
  16.152 -					mmu_update_t **v)
  16.153 +static inline void direct_remap_area_pte(
  16.154 +	pte_t *pte, 
  16.155 +	unsigned long address, 
  16.156 +	unsigned long size,
  16.157 +	mmu_update_t **v)
  16.158  {
  16.159 -    unsigned long end;
  16.160 +	unsigned long end;
  16.161  
  16.162 -    address &= ~PMD_MASK;
  16.163 -    end = address + size;
  16.164 -    if (end > PMD_SIZE)
  16.165 -        end = PMD_SIZE;
  16.166 -    if (address >= end)
  16.167 -        BUG();
  16.168 +	address &= ~PMD_MASK;
  16.169 +	end = address + size;
  16.170 +	if (end > PMD_SIZE)
  16.171 +		end = PMD_SIZE;
  16.172 +	if (address >= end)
  16.173 +		BUG();
  16.174  
  16.175 -    do {
  16.176 -        (*v)->ptr = virt_to_machine(pte);
  16.177 -        (*v)++;
  16.178 -        address += PAGE_SIZE;
  16.179 -        pte++;
  16.180 -    } while (address && (address < end));
  16.181 +	do {
  16.182 +		(*v)->ptr = virt_to_machine(pte);
  16.183 +		(*v)++;
  16.184 +		address += PAGE_SIZE;
  16.185 +		pte++;
  16.186 +	} while (address && (address < end));
  16.187  }
  16.188  
  16.189 -static inline int direct_remap_area_pmd(struct mm_struct *mm,
  16.190 -                                        pmd_t *pmd, 
  16.191 -                                        unsigned long address, 
  16.192 -                                        unsigned long size,
  16.193 -					mmu_update_t **v)
  16.194 +static inline int direct_remap_area_pmd(
  16.195 +	struct mm_struct *mm,
  16.196 +	pmd_t *pmd, 
  16.197 +	unsigned long address, 
  16.198 +	unsigned long size,
  16.199 +	mmu_update_t **v)
  16.200  {
  16.201 -    unsigned long end;
  16.202 +	unsigned long end;
  16.203  
  16.204 -    address &= ~PGDIR_MASK;
  16.205 -    end = address + size;
  16.206 -    if (end > PGDIR_SIZE)
  16.207 -        end = PGDIR_SIZE;
  16.208 -    if (address >= end)
  16.209 -        BUG();
  16.210 -    do {
  16.211 -        pte_t *pte = pte_alloc_map(mm, pmd, address);
  16.212 -        if (!pte)
  16.213 -            return -ENOMEM;
  16.214 -        direct_remap_area_pte(pte, address, end - address, v);
  16.215 -	pte_unmap(pte);
  16.216 -        address = (address + PMD_SIZE) & PMD_MASK;
  16.217 -        pmd++;
  16.218 -    } while (address && (address < end));
  16.219 -    return 0;
  16.220 +	address &= ~PGDIR_MASK;
  16.221 +	end = address + size;
  16.222 +	if (end > PGDIR_SIZE)
  16.223 +		end = PGDIR_SIZE;
  16.224 +	if (address >= end)
  16.225 +		BUG();
  16.226 +	do {
  16.227 +		pte_t *pte = (mm == &init_mm) ? 
  16.228 +			pte_alloc_kernel(mm, pmd, address) :
  16.229 +			pte_alloc_map(mm, pmd, address);
  16.230 +		if (!pte)
  16.231 +			return -ENOMEM;
  16.232 +		direct_remap_area_pte(pte, address, end - address, v);
  16.233 +		pte_unmap(pte);
  16.234 +		address = (address + PMD_SIZE) & PMD_MASK;
  16.235 +		pmd++;
  16.236 +	} while (address && (address < end));
  16.237 +	return 0;
  16.238  }
  16.239   
  16.240 -int __direct_remap_area_pages(struct mm_struct *mm,
  16.241 -			      unsigned long address, 
  16.242 -			      unsigned long size, 
  16.243 -			      mmu_update_t *v)
  16.244 +int __direct_remap_area_pages(
  16.245 +	struct mm_struct *mm,
  16.246 +	unsigned long address, 
  16.247 +	unsigned long size, 
  16.248 +	mmu_update_t *v)
  16.249  {
  16.250 -    pgd_t * dir;
  16.251 -    unsigned long end = address + size;
  16.252 +	pgd_t * dir;
  16.253 +	unsigned long end = address + size;
  16.254  
  16.255 -    dir = pgd_offset(mm, address);
  16.256 -    flush_cache_all();
  16.257 -    if (address >= end)
  16.258 -        BUG();
  16.259 -    spin_lock(&mm->page_table_lock);
  16.260 -    do {
  16.261 -        pmd_t *pmd = pmd_alloc(mm, dir, address);
  16.262 -        if (!pmd)
  16.263 -	    return -ENOMEM;
  16.264 -        direct_remap_area_pmd(mm, pmd, address, end - address, &v);
  16.265 -        address = (address + PGDIR_SIZE) & PGDIR_MASK;
  16.266 -        dir++;
  16.267 +	dir = pgd_offset(mm, address);
  16.268 +	if (address >= end)
  16.269 +		BUG();
  16.270 +	spin_lock(&mm->page_table_lock);
  16.271 +	do {
  16.272 +		pmd_t *pmd = pmd_alloc(mm, dir, address);
  16.273 +		if (!pmd)
  16.274 +			return -ENOMEM;
  16.275 +		direct_remap_area_pmd(mm, pmd, address, end - address, &v);
  16.276 +		address = (address + PGDIR_SIZE) & PGDIR_MASK;
  16.277 +		dir++;
  16.278  
  16.279 -    } while (address && (address < end));
  16.280 -    spin_unlock(&mm->page_table_lock);
  16.281 -    flush_tlb_all();
  16.282 -    return 0;
  16.283 +	} while (address && (address < end));
  16.284 +	spin_unlock(&mm->page_table_lock);
  16.285 +	return 0;
  16.286  }
  16.287  
  16.288  
  16.289 -int direct_remap_area_pages(struct mm_struct *mm,
  16.290 -                            unsigned long address, 
  16.291 -                            unsigned long machine_addr,
  16.292 -                            unsigned long size, 
  16.293 -                            pgprot_t prot,
  16.294 -                            domid_t  domid)
  16.295 +int direct_remap_area_pages(
  16.296 +	struct mm_struct *mm,
  16.297 +	unsigned long address, 
  16.298 +	unsigned long machine_addr,
  16.299 +	unsigned long size, 
  16.300 +	pgprot_t prot,
  16.301 +	domid_t  domid)
  16.302  {
  16.303 -    int i;
  16.304 -    unsigned long start_address;
  16.305 +	int i;
  16.306 +	unsigned long start_address;
  16.307  #define MAX_DIRECTMAP_MMU_QUEUE 130
  16.308 -    mmu_update_t u[MAX_DIRECTMAP_MMU_QUEUE], *w, *v;
  16.309 +	mmu_update_t u[MAX_DIRECTMAP_MMU_QUEUE], *w, *v;
  16.310  
  16.311 -    u[0].ptr  = MMU_EXTENDED_COMMAND;
  16.312 -    u[0].val  = MMUEXT_SET_FOREIGNDOM;
  16.313 -    u[0].val |= (unsigned long)domid << 16;
  16.314 -    v = w = &u[1];
  16.315 +	u[0].ptr  = MMU_EXTENDED_COMMAND;
  16.316 +	u[0].val  = MMUEXT_SET_FOREIGNDOM;
  16.317 +	u[0].val |= (unsigned long)domid << 16;
  16.318 +	v = w = &u[1];
  16.319 +
  16.320 +	start_address = address;
  16.321 +
  16.322 +	flush_cache_all();
  16.323  
  16.324 -    start_address = address;
  16.325 +	for (i = 0; i < size; i += PAGE_SIZE) {
  16.326 +		if ((v - u) == MAX_DIRECTMAP_MMU_QUEUE) {
  16.327 +			/* Fill in the PTE pointers. */
  16.328 +			__direct_remap_area_pages(
  16.329 +				mm,
  16.330 +				start_address, 
  16.331 +				address-start_address, 
  16.332 +				w);
  16.333 +	
  16.334 +			if (HYPERVISOR_mmu_update(u, v - u, NULL) < 0)
  16.335 +				return -EFAULT;
  16.336 +			v = w;
  16.337 +			start_address = address;
  16.338 +		}
  16.339  
  16.340 -    for( i = 0; i < size; i += PAGE_SIZE )
  16.341 -    {
  16.342 -	if ( (v - u) == MAX_DIRECTMAP_MMU_QUEUE )
  16.343 -	{
  16.344 -	    /* Fill in the PTE pointers. */
  16.345 -	    __direct_remap_area_pages( mm,
  16.346 -				       start_address, 
  16.347 -				       address-start_address, 
  16.348 -				       w);
  16.349 -	    
  16.350 -	    if ( HYPERVISOR_mmu_update(u, v - u, NULL) < 0 )
  16.351 -		return -EFAULT;	    
  16.352 -	    v = w;
  16.353 -	    start_address = address;
  16.354 +		/*
  16.355 +		 * Fill in the machine address: PTE ptr is done later by
  16.356 +		 * __direct_remap_area_pages(). 
  16.357 +		 */
  16.358 +		v->val = (machine_addr & PAGE_MASK) | pgprot_val(prot);
  16.359 +
  16.360 +		machine_addr += PAGE_SIZE;
  16.361 +		address += PAGE_SIZE; 
  16.362 +		v++;
  16.363  	}
  16.364  
  16.365 -	/*
  16.366 -         * Fill in the machine address: PTE ptr is done later by
  16.367 -         * __direct_remap_area_pages(). 
  16.368 -         */
  16.369 -        v->val = (machine_addr & PAGE_MASK) | pgprot_val(prot);
  16.370 -
  16.371 -        machine_addr += PAGE_SIZE;
  16.372 -        address += PAGE_SIZE; 
  16.373 -        v++;
  16.374 -    }
  16.375 +	if (v != w) {
  16.376 +		/* get the ptep's filled in */
  16.377 +		__direct_remap_area_pages(
  16.378 +			mm,
  16.379 +			start_address, 
  16.380 +			address-start_address, 
  16.381 +			w);
  16.382 +		if (unlikely(HYPERVISOR_mmu_update(u, v - u, NULL) < 0))
  16.383 +			return -EFAULT;	
  16.384 +	}
  16.385  
  16.386 -    if ( v != w )
  16.387 -    {
  16.388 -	/* get the ptep's filled in */
  16.389 -	__direct_remap_area_pages(mm,
  16.390 -                                  start_address, 
  16.391 -                                  address-start_address, 
  16.392 -                                  w);	 
  16.393 -	if ( unlikely(HYPERVISOR_mmu_update(u, v - u, NULL) < 0) )
  16.394 -	    return -EFAULT;	    
  16.395 -    }
  16.396 -    
  16.397 -    return 0;
  16.398 +	flush_tlb_all();
  16.399 +
  16.400 +	return 0;
  16.401  }
  16.402 +
  16.403 +EXPORT_SYMBOL(direct_remap_area_pages);
    17.1 --- a/linux-2.6.9-xen-sparse/arch/xen/kernel/Makefile	Mon Nov 29 17:11:03 2004 +0000
    17.2 +++ b/linux-2.6.9-xen-sparse/arch/xen/kernel/Makefile	Fri Dec 10 18:49:15 2004 +0000
    17.3 @@ -10,4 +10,4 @@ XENARCH	:= $(subst ",,$(CONFIG_XENARCH))
    17.4  extra-y += vmlinux.lds
    17.5  
    17.6  obj-y	:= ctrl_if.o evtchn.o fixup.o reboot.o xen_proc.o \
    17.7 -           gnttab.o skbuff.o smp.o
    17.8 +           gnttab.o skbuff.o devmem.o smp.o
    18.1 --- a/linux-2.6.9-xen-sparse/arch/xen/kernel/ctrl_if.c	Mon Nov 29 17:11:03 2004 +0000
    18.2 +++ b/linux-2.6.9-xen-sparse/arch/xen/kernel/ctrl_if.c	Fri Dec 10 18:49:15 2004 +0000
    18.3 @@ -35,6 +35,7 @@
    18.4  #include <linux/errno.h>
    18.5  #include <linux/irq.h>
    18.6  #include <linux/interrupt.h>
    18.7 +#include <linux/module.h>
    18.8  #include <asm-xen/ctrl_if.h>
    18.9  #include <asm-xen/evtchn.h>
   18.10  
   18.11 @@ -541,3 +542,10 @@ void ctrl_if_discard_responses(void)
   18.12      ctrl_if_tx_resp_cons = get_ctrl_if()->tx_resp_prod;
   18.13  }
   18.14  
   18.15 +EXPORT_SYMBOL(ctrl_if_send_message_noblock);
   18.16 +EXPORT_SYMBOL(ctrl_if_send_message_block);
   18.17 +EXPORT_SYMBOL(ctrl_if_send_message_and_get_response);
   18.18 +EXPORT_SYMBOL(ctrl_if_enqueue_space_callback);
   18.19 +EXPORT_SYMBOL(ctrl_if_send_response);
   18.20 +EXPORT_SYMBOL(ctrl_if_register_receiver);
   18.21 +EXPORT_SYMBOL(ctrl_if_unregister_receiver);
    19.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    19.2 +++ b/linux-2.6.9-xen-sparse/arch/xen/kernel/devmem.c	Fri Dec 10 18:49:15 2004 +0000
    19.3 @@ -0,0 +1,158 @@
    19.4 +/*
    19.5 + *  Originally from linux/drivers/char/mem.c
    19.6 + *
    19.7 + *  Copyright (C) 1991, 1992  Linus Torvalds
    19.8 + *
    19.9 + *  Added devfs support. 
   19.10 + *    Jan-11-1998, C. Scott Ananian <cananian@alumni.princeton.edu>
   19.11 + *  Shared /dev/zero mmaping support, Feb 2000, Kanoj Sarcar <kanoj@sgi.com>
   19.12 + */
   19.13 +
   19.14 +#include <linux/config.h>
   19.15 +#include <linux/mm.h>
   19.16 +#include <linux/miscdevice.h>
   19.17 +#include <linux/slab.h>
   19.18 +#include <linux/vmalloc.h>
   19.19 +#include <linux/mman.h>
   19.20 +#include <linux/random.h>
   19.21 +#include <linux/init.h>
   19.22 +#include <linux/raw.h>
   19.23 +#include <linux/tty.h>
   19.24 +#include <linux/capability.h>
   19.25 +#include <linux/smp_lock.h>
   19.26 +#include <linux/devfs_fs_kernel.h>
   19.27 +#include <linux/ptrace.h>
   19.28 +#include <linux/device.h>
   19.29 +#include <asm/pgalloc.h>
   19.30 +#include <asm/uaccess.h>
   19.31 +#include <asm/io.h>
   19.32 +
   19.33 +static inline int uncached_access(struct file *file, unsigned long addr)
   19.34 +{
   19.35 +        if (file->f_flags & O_SYNC)
   19.36 +                return 1;
   19.37 +        /* Xen sets correct MTRR type on non-RAM for us. */
   19.38 +        return 0;
   19.39 +}
   19.40 +
   19.41 +/*
   19.42 + * This funcion reads the *physical* memory. The f_pos points directly to the 
   19.43 + * memory location. 
   19.44 + */
   19.45 +static ssize_t read_mem(struct file * file, char __user * buf,
   19.46 +			size_t count, loff_t *ppos)
   19.47 +{
   19.48 +	unsigned long i, p = *ppos;
   19.49 +	ssize_t read = 0;
   19.50 +	void *v;
   19.51 +
   19.52 +	if ((v = ioremap(p, count)) == NULL) {
   19.53 +		/*
   19.54 +		 * Some programs (e.g., dmidecode) groove off into weird RAM
   19.55 +		 * areas where no table scan possibly exist (because Xen will
   19.56 +		 * have stomped on them!). These programs get rather upset if
   19.57 +                 * we let them know that Xen failed their access, so we fake
   19.58 +                 * out a read of all zeroes. :-)
   19.59 +		 */
   19.60 +		for (i = 0; i < count; i++)
   19.61 +			if (put_user(0, buf+i))
   19.62 +				return -EFAULT;
   19.63 +		return count;
   19.64 +	}
   19.65 +	if (copy_to_user(buf, v, count))
   19.66 +		return -EFAULT;
   19.67 +	iounmap(v);
   19.68 +
   19.69 +	read += count;
   19.70 +	*ppos += read;
   19.71 +
   19.72 +	return read;
   19.73 +}
   19.74 +
   19.75 +static ssize_t write_mem(struct file * file, const char __user * buf, 
   19.76 +			 size_t count, loff_t *ppos)
   19.77 +{
   19.78 +	unsigned long p = *ppos;
   19.79 +	ssize_t written = 0;
   19.80 +	void *v;
   19.81 +
   19.82 +	if ((v = ioremap(p, count)) == NULL)
   19.83 +		return -EFAULT;
   19.84 +	if (copy_to_user(v, buf, count))
   19.85 +		return -EFAULT;
   19.86 +	iounmap(v);
   19.87 +
   19.88 +	written += count;
   19.89 +	*ppos += written;
   19.90 +
   19.91 +	return written;
   19.92 +}
   19.93 +
   19.94 +static int mmap_mem(struct file * file, struct vm_area_struct * vma)
   19.95 +{
   19.96 +	unsigned long offset = vma->vm_pgoff << PAGE_SHIFT;
   19.97 +	int uncached;
   19.98 +
   19.99 +	uncached = uncached_access(file, offset);
  19.100 +	if (uncached)
  19.101 +		vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
  19.102 +
  19.103 +	/* Don't try to swap out physical pages.. */
  19.104 +	vma->vm_flags |= VM_RESERVED;
  19.105 +
  19.106 +	/*
  19.107 +	 * Don't dump addresses that are not real memory to a core file.
  19.108 +	 */
  19.109 +	if (uncached)
  19.110 +		vma->vm_flags |= VM_IO;
  19.111 +
  19.112 +	if (io_remap_page_range(vma, vma->vm_start, offset, 
  19.113 +				vma->vm_end-vma->vm_start, vma->vm_page_prot))
  19.114 +		return -EAGAIN;
  19.115 +
  19.116 +	return 0;
  19.117 +}
  19.118 +
  19.119 +/*
  19.120 + * The memory devices use the full 32/64 bits of the offset, and so we cannot
  19.121 + * check against negative addresses: they are ok. The return value is weird,
  19.122 + * though, in that case (0).
  19.123 + *
  19.124 + * also note that seeking relative to the "end of file" isn't supported:
  19.125 + * it has no meaning, so it returns -EINVAL.
  19.126 + */
  19.127 +static loff_t memory_lseek(struct file * file, loff_t offset, int orig)
  19.128 +{
  19.129 +	loff_t ret;
  19.130 +
  19.131 +	down(&file->f_dentry->d_inode->i_sem);
  19.132 +	switch (orig) {
  19.133 +		case 0:
  19.134 +			file->f_pos = offset;
  19.135 +			ret = file->f_pos;
  19.136 +			force_successful_syscall_return();
  19.137 +			break;
  19.138 +		case 1:
  19.139 +			file->f_pos += offset;
  19.140 +			ret = file->f_pos;
  19.141 +			force_successful_syscall_return();
  19.142 +			break;
  19.143 +		default:
  19.144 +			ret = -EINVAL;
  19.145 +	}
  19.146 +	up(&file->f_dentry->d_inode->i_sem);
  19.147 +	return ret;
  19.148 +}
  19.149 +
  19.150 +static int open_mem(struct inode * inode, struct file * filp)
  19.151 +{
  19.152 +	return capable(CAP_SYS_RAWIO) ? 0 : -EPERM;
  19.153 +}
  19.154 +
  19.155 +struct file_operations mem_fops = {
  19.156 +	.llseek		= memory_lseek,
  19.157 +	.read		= read_mem,
  19.158 +	.write		= write_mem,
  19.159 +	.mmap		= mmap_mem,
  19.160 +	.open		= open_mem,
  19.161 +};
    20.1 --- a/linux-2.6.9-xen-sparse/arch/xen/kernel/reboot.c	Mon Nov 29 17:11:03 2004 +0000
    20.2 +++ b/linux-2.6.9-xen-sparse/arch/xen/kernel/reboot.c	Fri Dec 10 18:49:15 2004 +0000
    20.3 @@ -8,6 +8,7 @@ static int errno;
    20.4  #include <linux/unistd.h>
    20.5  #include <linux/module.h>
    20.6  #include <linux/reboot.h>
    20.7 +#include <linux/sysrq.h>
    20.8  #include <asm/irq.h>
    20.9  #include <asm/mmu_context.h>
   20.10  #include <asm-xen/ctrl_if.h>
   20.11 @@ -49,10 +50,9 @@ EXPORT_SYMBOL(machine_power_off);
   20.12   * Stop/pickle callback handling.
   20.13   */
   20.14  
   20.15 -//#include <asm/suspend.h>
   20.16 -
   20.17  /* Ignore multiple shutdown requests. */
   20.18  static int shutting_down = -1;
   20.19 +static int pending_sysrq = -1;
   20.20  
   20.21  static void __do_suspend(void)
   20.22  {
   20.23 @@ -214,9 +214,18 @@ static void __shutdown_handler(void *unu
   20.24      }
   20.25  }
   20.26  
   20.27 +static void __sysrq_handler(void *unused)
   20.28 +{
   20.29 +#ifdef CONFIG_MAGIC_SYSRQ
   20.30 +    handle_sysrq(pending_sysrq, NULL, NULL);
   20.31 +#endif
   20.32 +    pending_sysrq = -1;
   20.33 +}
   20.34 +
   20.35  static void shutdown_handler(ctrl_msg_t *msg, unsigned long id)
   20.36  {
   20.37      static DECLARE_WORK(shutdown_work, __shutdown_handler, NULL);
   20.38 +    static DECLARE_WORK(sysrq_work, __sysrq_handler, NULL);
   20.39  
   20.40      if ( (shutting_down == -1) &&
   20.41           ((msg->subtype == CMSG_SHUTDOWN_POWEROFF) ||
   20.42 @@ -226,6 +235,12 @@ static void shutdown_handler(ctrl_msg_t 
   20.43          shutting_down = msg->subtype;
   20.44          schedule_work(&shutdown_work);
   20.45      }
   20.46 +    else if ( (pending_sysrq == -1) && 
   20.47 +              (msg->subtype == CMSG_SHUTDOWN_SYSRQ) )
   20.48 +    {
   20.49 +        pending_sysrq = msg->msg[0];
   20.50 +        schedule_work(&sysrq_work);
   20.51 +    }
   20.52      else
   20.53      {
   20.54          printk("Ignore spurious shutdown request\n");
    21.1 --- a/linux-2.6.9-xen-sparse/drivers/char/mem.c	Mon Nov 29 17:11:03 2004 +0000
    21.2 +++ b/linux-2.6.9-xen-sparse/drivers/char/mem.c	Fri Dec 10 18:49:15 2004 +0000
    21.3 @@ -26,7 +26,6 @@
    21.4  
    21.5  #include <asm/uaccess.h>
    21.6  #include <asm/io.h>
    21.7 -#include <asm/pgalloc.h>
    21.8  
    21.9  #ifdef CONFIG_IA64
   21.10  # include <linux/efi.h>
   21.11 @@ -43,12 +42,7 @@ extern void tapechar_init(void);
   21.12   */
   21.13  static inline int uncached_access(struct file *file, unsigned long addr)
   21.14  {
   21.15 -#ifdef CONFIG_XEN
   21.16 -        if (file->f_flags & O_SYNC)
   21.17 -                return 1;
   21.18 -        /* Xen sets correct MTRR type on non-RAM for us. */
   21.19 -        return 0;
   21.20 -#elif defined(__i386__)
   21.21 +#if defined(__i386__)
   21.22  	/*
   21.23  	 * On the PPro and successors, the MTRRs are used to set
   21.24  	 * memory types for physical addresses outside main memory,
   21.25 @@ -149,7 +143,7 @@ static ssize_t do_write_mem(void *p, uns
   21.26  	return written;
   21.27  }
   21.28  
   21.29 -
   21.30 +#ifndef ARCH_HAS_DEV_MEM
   21.31  /*
   21.32   * This funcion reads the *physical* memory. The f_pos points directly to the 
   21.33   * memory location. 
   21.34 @@ -195,8 +189,9 @@ static ssize_t write_mem(struct file * f
   21.35  		return -EFAULT;
   21.36  	return do_write_mem(__va(p), p, buf, count, ppos);
   21.37  }
   21.38 +#endif
   21.39  
   21.40 -static int mmap_mem(struct file * file, struct vm_area_struct * vma)
   21.41 +static int mmap_kmem(struct file * file, struct vm_area_struct * vma)
   21.42  {
   21.43  	unsigned long offset = vma->vm_pgoff << PAGE_SHIFT;
   21.44  	int uncached;
   21.45 @@ -216,15 +211,9 @@ static int mmap_mem(struct file * file, 
   21.46  	if (uncached)
   21.47  		vma->vm_flags |= VM_IO;
   21.48  
   21.49 -#if defined(CONFIG_XEN)
   21.50 -	if (io_remap_page_range(vma, vma->vm_start, offset, 
   21.51 -				vma->vm_end-vma->vm_start, vma->vm_page_prot))
   21.52 -		return -EAGAIN;
   21.53 -#else
   21.54  	if (remap_page_range(vma, vma->vm_start, offset, vma->vm_end-vma->vm_start,
   21.55  			     vma->vm_page_prot))
   21.56  		return -EAGAIN;
   21.57 -#endif
   21.58  	return 0;
   21.59  }
   21.60  
   21.61 @@ -584,7 +573,7 @@ static int open_port(struct inode * inod
   21.62  	return capable(CAP_SYS_RAWIO) ? 0 : -EPERM;
   21.63  }
   21.64  
   21.65 -#define mmap_kmem	mmap_mem
   21.66 +#define mmap_mem	mmap_kmem
   21.67  #define zero_lseek	null_lseek
   21.68  #define full_lseek      null_lseek
   21.69  #define write_zero	write_null
   21.70 @@ -592,6 +581,7 @@ static int open_port(struct inode * inod
   21.71  #define open_mem	open_port
   21.72  #define open_kmem	open_mem
   21.73  
   21.74 +#ifndef ARCH_HAS_DEV_MEM
   21.75  static struct file_operations mem_fops = {
   21.76  	.llseek		= memory_lseek,
   21.77  	.read		= read_mem,
   21.78 @@ -599,6 +589,9 @@ static struct file_operations mem_fops =
   21.79  	.mmap		= mmap_mem,
   21.80  	.open		= open_mem,
   21.81  };
   21.82 +#else
   21.83 +extern struct file_operations mem_fops;
   21.84 +#endif
   21.85  
   21.86  static struct file_operations kmem_fops = {
   21.87  	.llseek		= memory_lseek,
    22.1 --- a/linux-2.6.9-xen-sparse/drivers/xen/Makefile	Mon Nov 29 17:11:03 2004 +0000
    22.2 +++ b/linux-2.6.9-xen-sparse/drivers/xen/Makefile	Fri Dec 10 18:49:15 2004 +0000
    22.3 @@ -2,9 +2,9 @@
    22.4  
    22.5  obj-y	+= console/
    22.6  obj-y	+= evtchn/
    22.7 -obj-y	+= privcmd/
    22.8 -obj-y   += balloon/
    22.9 +obj-y	+= balloon/
   22.10  
   22.11 +obj-$(CONFIG_XEN_PRIVILEGED_GUEST)	+= privcmd/
   22.12  obj-$(CONFIG_XEN_BLKDEV_BACKEND)	+= blkback/
   22.13  obj-$(CONFIG_XEN_NETDEV_BACKEND)	+= netback/
   22.14  obj-$(CONFIG_XEN_BLKDEV_FRONTEND)	+= blkfront/
    23.1 --- a/linux-2.6.9-xen-sparse/drivers/xen/balloon/balloon.c	Mon Nov 29 17:11:03 2004 +0000
    23.2 +++ b/linux-2.6.9-xen-sparse/drivers/xen/balloon/balloon.c	Fri Dec 10 18:49:15 2004 +0000
    23.3 @@ -4,6 +4,7 @@
    23.4   * Xen balloon driver - enables returning/claiming memory to/from Xen.
    23.5   *
    23.6   * Copyright (c) 2003, B Dragovic
    23.7 + * Copyright (c) 2003-2004, M Williamson, K Fraser
    23.8   * 
    23.9   * This file may be distributed separately from the Linux kernel, or
   23.10   * incorporated into other software packages, subject to the following license:
   23.11 @@ -28,8 +29,8 @@
   23.12   */
   23.13  
   23.14  #include <linux/config.h>
   23.15 +#include <linux/kernel.h>
   23.16  #include <linux/module.h>
   23.17 -#include <linux/kernel.h>
   23.18  #include <linux/sched.h>
   23.19  #include <linux/errno.h>
   23.20  #include <linux/mm.h>
   23.21 @@ -42,25 +43,39 @@
   23.22  #include <asm-xen/xen_proc.h>
   23.23  #include <asm-xen/hypervisor.h>
   23.24  #include <asm-xen/ctrl_if.h>
   23.25 +#include <asm-xen/balloon.h>
   23.26  #include <asm/pgalloc.h>
   23.27  #include <asm/pgtable.h>
   23.28  #include <asm/uaccess.h>
   23.29  #include <asm/tlb.h>
   23.30  #include <linux/list.h>
   23.31  
   23.32 -/* USER DEFINES -- THESE SHOULD BE COPIED TO USER-SPACE TOOLS */
   23.33 -#define USER_INFLATE_BALLOON  1   /* return mem to hypervisor */
   23.34 -#define USER_DEFLATE_BALLOON  2   /* claim mem from hypervisor */
   23.35 -typedef struct user_balloon_op {
   23.36 -    unsigned int  op;
   23.37 -    unsigned long size;
   23.38 -} user_balloon_op_t;
   23.39 -/* END OF USER DEFINE */
   23.40 -
   23.41  static struct proc_dir_entry *balloon_pde;
   23.42  
   23.43 -unsigned long credit;
   23.44 -static unsigned long current_pages, most_seen_pages;
   23.45 +static DECLARE_MUTEX(balloon_mutex);
   23.46 +spinlock_t balloon_lock = SPIN_LOCK_UNLOCKED;
   23.47 +
   23.48 +/* We aim for 'current allocation' == 'target allocation'. */
   23.49 +static unsigned long current_pages;
   23.50 +static unsigned long target_pages;
   23.51 +
   23.52 +/* We may hit the hard limit in Xen. If we do then we remember it. */
   23.53 +static unsigned long hard_limit;
   23.54 +
   23.55 +/*
   23.56 + * Drivers may alter the memory reservation independently, but they must
   23.57 + * inform the balloon driver so that we can avoid hitting the hard limit.
   23.58 + */
   23.59 +static unsigned long driver_pages;
   23.60 +
   23.61 +/* List of ballooned pages, threaded through the mem_map array. */
   23.62 +static LIST_HEAD(ballooned_pages);
   23.63 +static unsigned long balloon_low, balloon_high;
   23.64 +
   23.65 +/* Main work function, always executed in process context. */
   23.66 +static void balloon_process(void *unused);
   23.67 +static DECLARE_WORK(balloon_worker, balloon_process, NULL);
   23.68 +static struct timer_list balloon_timer;
   23.69  
   23.70  #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
   23.71  /* Use the private and mapping fields of struct page as a list. */
   23.72 @@ -76,452 +91,223 @@ static unsigned long current_pages, most
   23.73  #define LIST_TO_PAGE(l) ( list_entry(l, struct page, list) )
   23.74  #define UNLIST_PAGE(p)  ( list_del(&p->list) )
   23.75  #define pte_offset_kernel pte_offset
   23.76 +#define subsys_initcall(_fn) __initcall(_fn)
   23.77  #endif
   23.78  
   23.79 -/* List of ballooned pages, threaded through the mem_map array. */
   23.80 -LIST_HEAD(ballooned_pages);
   23.81 +#define IPRINTK(fmt, args...) \
   23.82 +    printk(KERN_INFO "xen_mem: " fmt, ##args)
   23.83 +#define WPRINTK(fmt, args...) \
   23.84 +    printk(KERN_WARNING "xen_mem: " fmt, ##args)
   23.85  
   23.86 -/** add_ballooned_page - remember we've ballooned a pfn */
   23.87 -void add_ballooned_page(unsigned long pfn)
   23.88 +/* balloon_append: add the given page to the balloon. */
   23.89 +static void balloon_append(struct page *page)
   23.90  {
   23.91 -    struct page *p = mem_map + pfn;
   23.92 -
   23.93 -    list_add(PAGE_TO_LIST(p), &ballooned_pages);
   23.94 +    /* Low memory is re-populated first, so highmem pages go at list tail. */
   23.95 +    if ( PageHighMem(page) )
   23.96 +    {
   23.97 +        list_add_tail(PAGE_TO_LIST(page), &ballooned_pages);
   23.98 +        balloon_high++;
   23.99 +    }
  23.100 +    else
  23.101 +    {
  23.102 +        list_add(PAGE_TO_LIST(page), &ballooned_pages);
  23.103 +        balloon_low++;
  23.104 +    }
  23.105  }
  23.106  
  23.107 -/* rem_ballooned_page - recall a ballooned page and remove from list. */
  23.108 -struct page *rem_ballooned_page(void)
  23.109 +/* balloon_retrieve: rescue a page from the balloon, if it is not empty. */
  23.110 +static struct page *balloon_retrieve(void)
  23.111  {
  23.112 -    if(!list_empty(&ballooned_pages))
  23.113 -    {
  23.114 -        struct page *ret;
  23.115 +    struct page *page;
  23.116 +
  23.117 +    if ( list_empty(&ballooned_pages) )
  23.118 +        return NULL;
  23.119  
  23.120 -        ret = LIST_TO_PAGE(ballooned_pages.next);
  23.121 -	UNLIST_PAGE(ret);
  23.122 +    page = LIST_TO_PAGE(ballooned_pages.next);
  23.123 +    UNLIST_PAGE(page);
  23.124  
  23.125 -        return ret;
  23.126 -    }
  23.127 +    if ( PageHighMem(page) )
  23.128 +        balloon_high--;
  23.129      else
  23.130 -        return NULL;
  23.131 +        balloon_low--;
  23.132 +
  23.133 +    return page;
  23.134  }
  23.135  
  23.136  static inline pte_t *get_ptep(unsigned long addr)
  23.137  {
  23.138 -    pgd_t *pgd; pmd_t *pmd; pte_t *ptep;
  23.139 +    pgd_t *pgd;
  23.140 +    pmd_t *pmd;
  23.141 +
  23.142      pgd = pgd_offset_k(addr);
  23.143 -
  23.144      if ( pgd_none(*pgd) || pgd_bad(*pgd) ) BUG();
  23.145  
  23.146      pmd = pmd_offset(pgd, addr);
  23.147      if ( pmd_none(*pmd) || pmd_bad(*pmd) ) BUG();
  23.148  
  23.149 -    ptep = pte_offset_kernel(pmd, addr);
  23.150 -
  23.151 -    return ptep;
  23.152 +    return pte_offset_kernel(pmd, addr);
  23.153  }
  23.154  
  23.155 -/* Main function for relinquishing memory. */
  23.156 -static unsigned long inflate_balloon(unsigned long num_pages)
  23.157 -
  23.158 +static void balloon_alarm(unsigned long unused)
  23.159  {
  23.160 -    unsigned long *parray;
  23.161 -    unsigned long *currp;
  23.162 -    unsigned long curraddr;
  23.163 -    unsigned long ret = 0;
  23.164 -    unsigned long i, j;
  23.165 -
  23.166 -    parray = (unsigned long *)vmalloc(num_pages * sizeof(unsigned long));
  23.167 -    if ( parray == NULL )
  23.168 -    {
  23.169 -        printk(KERN_ERR "inflate_balloon: Unable to vmalloc parray\n");
  23.170 -        return -EFAULT;
  23.171 -    }
  23.172 -
  23.173 -    currp = parray;
  23.174 -
  23.175 -    for ( i = 0; i < num_pages; i++, currp++ )
  23.176 -    {
  23.177 -        struct page *page = alloc_page(GFP_HIGHUSER);
  23.178 -        unsigned long pfn = page - mem_map;
  23.179 -
  23.180 -        /* If allocation fails then free all reserved pages. */
  23.181 -        if ( page == NULL )
  23.182 -        {
  23.183 -            printk(KERN_ERR "Unable to inflate balloon by %ld, only"
  23.184 -                   " %ld pages free.", num_pages, i);
  23.185 -            currp = parray;
  23.186 -            for ( j = 0; j < i; j++, currp++ )
  23.187 -                __free_page((struct page *) (mem_map + *currp));
  23.188 -
  23.189 -            ret = -EFAULT;
  23.190 -            goto cleanup;
  23.191 -        }
  23.192 -
  23.193 -        *currp = pfn;
  23.194 -    }
  23.195 -
  23.196 +    schedule_work(&balloon_worker);
  23.197 +}
  23.198  
  23.199 -    for ( i = 0, currp = parray; i < num_pages; i++, currp++ )
  23.200 -    {
  23.201 -        unsigned long mfn = phys_to_machine_mapping[*currp];
  23.202 -        curraddr = (unsigned long)page_address(mem_map + *currp);
  23.203 -        /* Blow away page contents for security, and also p.t. ref if any. */
  23.204 -        if ( curraddr != 0 )
  23.205 -        {
  23.206 -            scrub_pages(curraddr, 1);
  23.207 -            queue_l1_entry_update(get_ptep(curraddr), 0);
  23.208 -        }
  23.209 -#ifdef CONFIG_XEN_SCRUB_PAGES
  23.210 -        else
  23.211 -        {
  23.212 -            void *p = kmap(&mem_map[*currp]);
  23.213 -            scrub_pages(p, 1);
  23.214 -            kunmap(&mem_map[*currp]);
  23.215 -        }
  23.216 -#endif
  23.217 -
  23.218 -        add_ballooned_page(*currp);
  23.219 -
  23.220 -        phys_to_machine_mapping[*currp] = INVALID_P2M_ENTRY;
  23.221 -        *currp = mfn;
  23.222 -    }
  23.223 -
  23.224 -    /* Flush updates through and flush the TLB. */
  23.225 -    xen_tlb_flush();
  23.226 -
  23.227 -    ret = HYPERVISOR_dom_mem_op(MEMOP_decrease_reservation, 
  23.228 -                                parray, num_pages, 0);
  23.229 -    if ( unlikely(ret != num_pages) )
  23.230 -    {
  23.231 -        printk(KERN_ERR "Unable to inflate balloon, error %lx\n", ret);
  23.232 -        goto cleanup;
  23.233 -    }
  23.234 -
  23.235 -    credit += num_pages;
  23.236 -    ret = num_pages;
  23.237 -
  23.238 - cleanup:
  23.239 -    vfree(parray);
  23.240 -
  23.241 -    return ret;
  23.242 +static unsigned long current_target(void)
  23.243 +{
  23.244 +    unsigned long target = min(target_pages, hard_limit);
  23.245 +    if ( target > (current_pages + balloon_low + balloon_high) )
  23.246 +        target = current_pages + balloon_low + balloon_high;
  23.247 +    return target;
  23.248  }
  23.249  
  23.250  /*
  23.251 - * Install new mem pages obtained by deflate_balloon. function walks 
  23.252 - * phys->machine mapping table looking for DEAD entries and populates
  23.253 - * them.
  23.254 + * We avoid multiple worker processes conflicting via the balloon mutex.
  23.255 + * We may of course race updates of the target counts (which are protected
  23.256 + * by the balloon lock), or with changes to the Xen hard limit, but we will
  23.257 + * recover from these in time.
  23.258   */
  23.259 -static unsigned long process_returned_pages(unsigned long * parray, 
  23.260 -                                       unsigned long num)
  23.261 +static void balloon_process(void *unused)
  23.262  {
  23.263 -    /* currently, this function is rather simplistic as 
  23.264 -     * it is assumed that domain reclaims only number of 
  23.265 -     * pages previously released. this is to change soon
  23.266 -     * and the code to extend page tables etc. will be 
  23.267 -     * incorporated here.
  23.268 -     */
  23.269 -     
  23.270 -    unsigned long * curr = parray;
  23.271 -    unsigned long num_installed;
  23.272 +    unsigned long *mfn_list, pfn, i, flags;
  23.273 +    struct page   *page;
  23.274 +    long           credit, debt, rc;
  23.275 +    void          *v;
  23.276 +
  23.277 +    down(&balloon_mutex);
  23.278 +
  23.279 + retry:
  23.280 +    mfn_list = NULL;
  23.281  
  23.282 -    struct page *page;
  23.283 -
  23.284 -    num_installed = 0;
  23.285 -    while ( (page = rem_ballooned_page()) != NULL )
  23.286 +    if ( (credit = current_target() - current_pages) > 0 )
  23.287      {
  23.288 -        unsigned long pfn;
  23.289 +        mfn_list = (unsigned long *)vmalloc(credit * sizeof(*mfn_list));
  23.290 +        if ( mfn_list == NULL )
  23.291 +            goto out;
  23.292  
  23.293 -        if ( num_installed == num )
  23.294 -            break;
  23.295 -
  23.296 -        pfn = page - mem_map;
  23.297 -
  23.298 -        if(phys_to_machine_mapping[pfn] != INVALID_P2M_ENTRY)
  23.299 +        balloon_lock(flags);
  23.300 +        rc = HYPERVISOR_dom_mem_op(
  23.301 +            MEMOP_increase_reservation, mfn_list, credit, 0);
  23.302 +        balloon_unlock(flags);
  23.303 +        if ( rc < credit )
  23.304          {
  23.305 -            printk("BUG: Tried to unballoon existing page!");
  23.306 -            BUG();
  23.307 +            /* We hit the Xen hard limit: reprobe. */
  23.308 +            if ( HYPERVISOR_dom_mem_op(
  23.309 +                MEMOP_decrease_reservation, mfn_list, rc, 0) != rc )
  23.310 +                BUG();
  23.311 +            hard_limit = current_pages + rc - driver_pages;
  23.312 +            vfree(mfn_list);
  23.313 +            goto retry;
  23.314          }
  23.315  
  23.316 -        phys_to_machine_mapping[pfn] = *curr;
  23.317 -        queue_machphys_update(*curr, pfn);
  23.318 -        if (pfn<max_low_pfn)
  23.319 -            queue_l1_entry_update(
  23.320 -                get_ptep((unsigned long)__va(pfn << PAGE_SHIFT)),
  23.321 -                ((*curr) << PAGE_SHIFT) | pgprot_val(PAGE_KERNEL));
  23.322 -        
  23.323 -        __free_page(mem_map + pfn);
  23.324 +        for ( i = 0; i < credit; i++ )
  23.325 +        {
  23.326 +            if ( (page = balloon_retrieve()) == NULL )
  23.327 +                BUG();
  23.328 +
  23.329 +            pfn = page - mem_map;
  23.330 +            if ( phys_to_machine_mapping[pfn] != INVALID_P2M_ENTRY )
  23.331 +                BUG();
  23.332  
  23.333 -        curr++;
  23.334 -        num_installed++;
  23.335 +            /* Update P->M and M->P tables. */
  23.336 +            phys_to_machine_mapping[pfn] = mfn_list[i];
  23.337 +            queue_machphys_update(mfn_list[i], pfn);
  23.338 +            
  23.339 +            /* Link back into the page tables if it's not a highmem page. */
  23.340 +            if ( pfn < max_low_pfn )
  23.341 +                queue_l1_entry_update(
  23.342 +                    get_ptep((unsigned long)__va(pfn << PAGE_SHIFT)),
  23.343 +                    (mfn_list[i] << PAGE_SHIFT) | pgprot_val(PAGE_KERNEL));
  23.344 +            
  23.345 +            /* Finally, relinquish the memory back to the system allocator. */
  23.346 +            ClearPageReserved(page);
  23.347 +            set_page_count(page, 1);
  23.348 +            __free_page(page);
  23.349 +        }
  23.350 +
  23.351 +        current_pages += credit;
  23.352      }
  23.353 +    else if ( credit < 0 )
  23.354 +    {
  23.355 +        debt = -credit;
  23.356 +
  23.357 +        mfn_list = (unsigned long *)vmalloc(debt * sizeof(*mfn_list));
  23.358 +        if ( mfn_list == NULL )
  23.359 +            goto out;
  23.360  
  23.361 -    return num_installed;
  23.362 -}
  23.363 +        for ( i = 0; i < debt; i++ )
  23.364 +        {
  23.365 +            if ( (page = alloc_page(GFP_HIGHUSER)) == NULL )
  23.366 +            {
  23.367 +                debt = i;
  23.368 +                break;
  23.369 +            }
  23.370 +
  23.371 +            pfn = page - mem_map;
  23.372 +            mfn_list[i] = phys_to_machine_mapping[pfn];
  23.373 +            phys_to_machine_mapping[pfn] = INVALID_P2M_ENTRY;
  23.374  
  23.375 -unsigned long deflate_balloon(unsigned long num_pages)
  23.376 -{
  23.377 -    unsigned long ret;
  23.378 -    unsigned long * parray;
  23.379 +            if ( !PageHighMem(page) )
  23.380 +            {
  23.381 +                v = phys_to_virt((page - mem_map) << PAGE_SHIFT);
  23.382 +                scrub_pages(v, 1);
  23.383 +                queue_l1_entry_update(get_ptep((unsigned long)v), 0);
  23.384 +            }
  23.385 +#ifdef CONFIG_XEN_SCRUB_PAGES
  23.386 +            else
  23.387 +            {
  23.388 +                v = kmap(page);
  23.389 +                scrub_pages(v, 1);
  23.390 +                kunmap(page);
  23.391 +            }
  23.392 +#endif            
  23.393  
  23.394 -    if ( num_pages > credit )
  23.395 -    {
  23.396 -        printk(KERN_ERR "deflate_balloon: %lu pages > %lu credit.\n",
  23.397 -               num_pages, credit);
  23.398 -        return -EAGAIN;
  23.399 +            balloon_append(page);
  23.400 +        }
  23.401 +
  23.402 +        /* Flush updates through and flush the TLB. */
  23.403 +        xen_tlb_flush();
  23.404 +
  23.405 +        if ( HYPERVISOR_dom_mem_op(
  23.406 +            MEMOP_decrease_reservation, mfn_list, debt, 0) != debt )
  23.407 +            BUG();
  23.408 +
  23.409 +        current_pages -= debt;
  23.410      }
  23.411  
  23.412 -    parray = (unsigned long *)vmalloc(num_pages * sizeof(unsigned long));
  23.413 -    if ( parray == NULL )
  23.414 -    {
  23.415 -        printk(KERN_ERR "deflate_balloon: Unable to vmalloc parray\n");
  23.416 -        return 0;
  23.417 -    }
  23.418 -
  23.419 -    ret = HYPERVISOR_dom_mem_op(MEMOP_increase_reservation, 
  23.420 -                                parray, num_pages, 0);
  23.421 -    if ( unlikely(ret != num_pages) )
  23.422 -    {
  23.423 -        printk(KERN_ERR "deflate_balloon: xen increase_reservation err %lx\n",
  23.424 -               ret);
  23.425 -        goto cleanup;
  23.426 -    }
  23.427 + out:
  23.428 +    if ( mfn_list != NULL )
  23.429 +        vfree(mfn_list);
  23.430  
  23.431 -    if ( (ret = process_returned_pages(parray, num_pages)) < num_pages )
  23.432 -    {
  23.433 -        printk(KERN_WARNING
  23.434 -               "deflate_balloon: restored only %lx of %lx pages.\n",
  23.435 -           ret, num_pages);
  23.436 -        goto cleanup;
  23.437 -    }
  23.438 +    /* Schedule more work if there is some still to be done. */
  23.439 +    if ( current_target() != current_pages )
  23.440 +        mod_timer(&balloon_timer, jiffies + HZ);
  23.441  
  23.442 -    ret = num_pages;
  23.443 -    credit -= num_pages;
  23.444 -
  23.445 - cleanup:
  23.446 -    vfree(parray);
  23.447 -
  23.448 -    return ret;
  23.449 +    up(&balloon_mutex);
  23.450  }
  23.451  
  23.452 -#define PAGE_TO_MB_SHIFT 8
  23.453 -
  23.454 -/*
  23.455 - * pagetable_extend() mimics pagetable_init() from arch/xen/mm/init.c 
  23.456 - * The loops do go through all of low memory (ZONE_NORMAL).  The
  23.457 - * old pages have _PAGE_PRESENT set and so get skipped.
  23.458 - * If low memory is not full, the new pages are used to fill it, going
  23.459 - * from cur_low_pfn to low_pfn.   high memory is not direct mapped so
  23.460 - * no extension is needed for new high memory.
  23.461 - */
  23.462 -
  23.463 -static void pagetable_extend (int cur_low_pfn, int newpages)
  23.464 -{
  23.465 -    unsigned long vaddr, end;
  23.466 -    pgd_t *kpgd, *pgd, *pgd_base;
  23.467 -    int i, j, k;
  23.468 -    pmd_t *kpmd, *pmd;
  23.469 -    pte_t *kpte, *pte, *pte_base;
  23.470 -    int low_pfn = min(cur_low_pfn+newpages,(int)max_low_pfn);
  23.471 -
  23.472 -    /*
  23.473 -     * This can be zero as well - no problem, in that case we exit
  23.474 -     * the loops anyway due to the PTRS_PER_* conditions.
  23.475 -     */
  23.476 -    end = (unsigned long)__va(low_pfn*PAGE_SIZE);
  23.477 -
  23.478 -    pgd_base = init_mm.pgd;
  23.479 -#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
  23.480 -    i = pgd_index(PAGE_OFFSET);
  23.481 -#else
  23.482 -    i = __pgd_offset(PAGE_OFFSET);
  23.483 -#endif
  23.484 -    pgd = pgd_base + i;
  23.485 -
  23.486 -    for (; i < PTRS_PER_PGD; pgd++, i++) {
  23.487 -        vaddr = i*PGDIR_SIZE;
  23.488 -        if (end && (vaddr >= end))
  23.489 -            break;
  23.490 -        pmd = (pmd_t *)pgd;
  23.491 -        for (j = 0; j < PTRS_PER_PMD; pmd++, j++) {
  23.492 -            vaddr = i*PGDIR_SIZE + j*PMD_SIZE;
  23.493 -            if (end && (vaddr >= end))
  23.494 -                break;
  23.495 -
  23.496 -            /* Filled in for us already? */
  23.497 -            if ( pmd_val(*pmd) & _PAGE_PRESENT )
  23.498 -                continue;
  23.499 -
  23.500 -            pte_base = pte = (pte_t *) __get_free_page(GFP_KERNEL);
  23.501 -
  23.502 -            for (k = 0; k < PTRS_PER_PTE; pte++, k++) {
  23.503 -                vaddr = i*PGDIR_SIZE + j*PMD_SIZE + k*PAGE_SIZE;
  23.504 -                if (end && (vaddr >= end))
  23.505 -                    break;
  23.506 -#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
  23.507 -                *pte = mk_pte(virt_to_page(vaddr), PAGE_KERNEL);
  23.508 -#else
  23.509 -		*pte = mk_pte_phys(__pa(vaddr), PAGE_KERNEL);
  23.510 -#endif
  23.511 -            }
  23.512 -            kpgd = pgd_offset_k((unsigned long)pte_base);
  23.513 -            kpmd = pmd_offset(kpgd, (unsigned long)pte_base);
  23.514 -            kpte = pte_offset_kernel(kpmd, (unsigned long)pte_base);
  23.515 -            queue_l1_entry_update(kpte,
  23.516 -                                  (*(unsigned long *)kpte)&~_PAGE_RW);
  23.517 -            set_pmd(pmd, __pmd(_KERNPG_TABLE + __pa(pte_base)));
  23.518 -            XEN_flush_page_update_queue();
  23.519 -        }
  23.520 -    }
  23.521 -}
  23.522 -
  23.523 -/*
  23.524 - * claim_new_pages() asks xen to increase this domain's memory  reservation
  23.525 - * and return a list of the new pages of memory.  This new pages are
  23.526 - * added to the free list of the memory manager.
  23.527 - *
  23.528 - * Available RAM does not normally change while Linux runs.  To make this work,
  23.529 - * the linux mem= boottime command line param must say how big memory could
  23.530 - * possibly grow.  Then setup_arch() in arch/xen/kernel/setup.c
  23.531 - * sets max_pfn, max_low_pfn and the zones according to
  23.532 - * this max memory size.   The page tables themselves can only be
  23.533 - * extended after xen has assigned new pages to this domain.
  23.534 - */
  23.535 -
  23.536 -static unsigned long
  23.537 -claim_new_pages(unsigned long num_pages)
  23.538 +/* Resets the Xen limit, sets new target, and kicks off processing. */
  23.539 +static void set_new_target(unsigned long target)
  23.540  {
  23.541 -    unsigned long new_page_cnt, pfn;
  23.542 -    unsigned long * parray, *curr;
  23.543 -
  23.544 -    if (most_seen_pages+num_pages> max_pfn)
  23.545 -        num_pages = max_pfn-most_seen_pages;
  23.546 -    if (num_pages==0) return -EINVAL;
  23.547 -
  23.548 -    parray = (unsigned long *)vmalloc(num_pages * sizeof(unsigned long));
  23.549 -    if ( parray == NULL )
  23.550 -    {
  23.551 -        printk(KERN_ERR "claim_new_pages: Unable to vmalloc parray\n");
  23.552 -        return 0;
  23.553 -    }
  23.554 -
  23.555 -    new_page_cnt = HYPERVISOR_dom_mem_op(MEMOP_increase_reservation, 
  23.556 -                                parray, num_pages, 0);
  23.557 -    if ( new_page_cnt != num_pages )
  23.558 -    {
  23.559 -        printk(KERN_WARNING
  23.560 -            "claim_new_pages: xen granted only %lu of %lu requested pages\n",
  23.561 -            new_page_cnt, num_pages);
  23.562 -
  23.563 -        /* 
  23.564 -         * Avoid xen lockup when user forgot to setdomainmaxmem. Xen
  23.565 -         * usually can dribble out a few pages and then hangs.
  23.566 -         */
  23.567 -        if ( new_page_cnt < 1000 )
  23.568 -        {
  23.569 -            printk(KERN_WARNING "Remember to use setdomainmaxmem\n");
  23.570 -            HYPERVISOR_dom_mem_op(MEMOP_decrease_reservation, 
  23.571 -                                parray, new_page_cnt, 0);
  23.572 -            return -EFAULT;
  23.573 -        }
  23.574 -    }
  23.575 -    memcpy(phys_to_machine_mapping+most_seen_pages, parray,
  23.576 -           new_page_cnt * sizeof(unsigned long));
  23.577 -
  23.578 -    pagetable_extend(most_seen_pages,new_page_cnt);
  23.579 -
  23.580 -    for ( pfn = most_seen_pages, curr = parray;
  23.581 -          pfn < most_seen_pages+new_page_cnt;
  23.582 -          pfn++, curr++ )
  23.583 -    {
  23.584 -        struct page *page = mem_map + pfn;
  23.585 -
  23.586 -#ifndef CONFIG_HIGHMEM
  23.587 -        if ( pfn>=max_low_pfn )
  23.588 -        {
  23.589 -            printk(KERN_WARNING "Warning only %ldMB will be used.\n",
  23.590 -               pfn>>PAGE_TO_MB_SHIFT);
  23.591 -            printk(KERN_WARNING "Use a HIGHMEM enabled kernel.\n");
  23.592 -            break;
  23.593 -        }
  23.594 -#endif
  23.595 -        queue_machphys_update(*curr, pfn);
  23.596 -        if ( pfn < max_low_pfn )
  23.597 -            queue_l1_entry_update(
  23.598 -                get_ptep((unsigned long)__va(pfn << PAGE_SHIFT)),
  23.599 -                ((*curr) << PAGE_SHIFT) | pgprot_val(PAGE_KERNEL));
  23.600 -        
  23.601 -        XEN_flush_page_update_queue();
  23.602 -        
  23.603 -        /* this next bit mimics arch/xen/mm/init.c:one_highpage_init() */
  23.604 -        ClearPageReserved(page);
  23.605 -        if ( pfn >= max_low_pfn )
  23.606 -            set_bit(PG_highmem, &page->flags);
  23.607 -        set_page_count(page, 1);
  23.608 -        __free_page(page);
  23.609 -    }
  23.610 -
  23.611 -    vfree(parray);
  23.612 -
  23.613 -    return new_page_cnt;
  23.614 +    /* No need for lock. Not read-modify-write updates. */
  23.615 +    hard_limit   = ~0UL;
  23.616 +    target_pages = target;
  23.617 +    schedule_work(&balloon_worker);
  23.618  }
  23.619  
  23.620 -
  23.621 -static int balloon_try_target(int target)
  23.622 -{
  23.623 -    int change, reclaim;
  23.624 -
  23.625 -    if ( target < current_pages )
  23.626 -    {
  23.627 -        int change = inflate_balloon(current_pages-target);
  23.628 -        if ( change <= 0 )
  23.629 -            return change;
  23.630 -
  23.631 -        current_pages -= change;
  23.632 -        printk(KERN_INFO "Relinquish %dMB to xen. Domain now has %luMB\n",
  23.633 -            change>>PAGE_TO_MB_SHIFT, current_pages>>PAGE_TO_MB_SHIFT);
  23.634 -    }
  23.635 -    else if ( target > current_pages )
  23.636 -    {
  23.637 -        reclaim = min((unsigned long)target,most_seen_pages) - current_pages;
  23.638 -
  23.639 -        if ( reclaim )
  23.640 -        {
  23.641 -            change = deflate_balloon( reclaim );
  23.642 -            if ( change <= 0 )
  23.643 -                return change;
  23.644 -            current_pages += change;
  23.645 -            printk(KERN_INFO "Reclaim %dMB from xen. Domain now has %luMB\n",
  23.646 -                change>>PAGE_TO_MB_SHIFT, current_pages>>PAGE_TO_MB_SHIFT);
  23.647 -        }
  23.648 -
  23.649 -        if ( most_seen_pages < target )
  23.650 -        {
  23.651 -            int growth = claim_new_pages(target-most_seen_pages);
  23.652 -            if ( growth <= 0 )
  23.653 -                return growth;
  23.654 -            most_seen_pages += growth;
  23.655 -            current_pages += growth;
  23.656 -            printk(KERN_INFO "Granted %dMB new mem. Dom now has %luMB\n",
  23.657 -                growth>>PAGE_TO_MB_SHIFT, current_pages>>PAGE_TO_MB_SHIFT);
  23.658 -        }
  23.659 -    }
  23.660 -
  23.661 -    return 1;
  23.662 -}
  23.663 -
  23.664 -
  23.665  static void balloon_ctrlif_rx(ctrl_msg_t *msg, unsigned long id)
  23.666  {
  23.667      switch ( msg->subtype )
  23.668      {
  23.669      case CMSG_MEM_REQUEST_SET:
  23.670 +    {
  23.671 +        mem_request_t *req = (mem_request_t *)&msg->msg[0];
  23.672          if ( msg->length != sizeof(mem_request_t) )
  23.673              goto parse_error;
  23.674 -        {
  23.675 -            mem_request_t *req = (mem_request_t *)&msg->msg[0];
  23.676 -            req->status = balloon_try_target(req->target);
  23.677 -        }
  23.678 -        break;        
  23.679 +        set_new_target(req->target);
  23.680 +        req->status = 0;
  23.681 +    }
  23.682 +    break;        
  23.683      default:
  23.684          goto parse_error;
  23.685      }
  23.686 @@ -534,158 +320,122 @@ static void balloon_ctrlif_rx(ctrl_msg_t
  23.687      ctrl_if_send_response(msg);
  23.688  }
  23.689  
  23.690 -#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
  23.691 -typedef size_t count_t;
  23.692 -#else
  23.693 -typedef u_long count_t;
  23.694 -#endif
  23.695 -
  23.696 -static int do_balloon_write(const char *buffer, count_t count)
  23.697 +static int balloon_write(struct file *file, const char __user *buffer,
  23.698 +                         unsigned long count, void *data)
  23.699  {
  23.700      char memstring[64], *endchar;
  23.701 -    int len, i;
  23.702 -    unsigned long target;
  23.703 -    unsigned long long targetbytes;
  23.704 +    unsigned long long target_bytes;
  23.705  
  23.706 -    /* Only admin can play with the balloon :) */
  23.707      if ( !capable(CAP_SYS_ADMIN) )
  23.708          return -EPERM;
  23.709  
  23.710 +    if ( count <= 1 )
  23.711 +        return -EBADMSG; /* runt */
  23.712      if ( count > sizeof(memstring) )
  23.713 -        return -EFBIG;
  23.714 -
  23.715 -    len = strnlen_user(buffer, count);
  23.716 -    if ( len == 0 ) return -EBADMSG;
  23.717 -    if ( len == 1 ) return 1; /* input starts with a NUL char */
  23.718 -    if ( strncpy_from_user(memstring, buffer, len) < 0 )
  23.719 -        return -EFAULT;
  23.720 +        return -EFBIG;   /* too long */
  23.721  
  23.722 -    endchar = memstring;
  23.723 -    for ( i = 0; i < len; ++i, ++endchar )
  23.724 -        if ( (memstring[i] < '0') || (memstring[i] > '9') )
  23.725 -            break;
  23.726 -    if ( i == 0 )
  23.727 -        return -EBADMSG;
  23.728 +    if ( copy_from_user(memstring, buffer, count) )
  23.729 +        return -EFAULT;
  23.730 +    memstring[sizeof(memstring)-1] = '\0';
  23.731  
  23.732 -    targetbytes = memparse(memstring,&endchar);
  23.733 -    target = targetbytes >> PAGE_SHIFT;
  23.734 +    target_bytes = memparse(memstring, &endchar);
  23.735 +    set_new_target(target_bytes >> PAGE_SHIFT);
  23.736  
  23.737 -    i = balloon_try_target(target);
  23.738 -
  23.739 -    if ( i <= 0 ) return i;
  23.740 -
  23.741 -    return len;
  23.742 +    return count;
  23.743  }
  23.744  
  23.745 -#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
  23.746 -static int balloon_write(struct file *file, const char *buffer,
  23.747 -                         size_t count, loff_t *offp)
  23.748 +static int balloon_read(char *page, char **start, off_t off,
  23.749 +                        int count, int *eof, void *data)
  23.750  {
  23.751 -    int len = do_balloon_write(buffer, count);
  23.752 -    
  23.753 -    if ( len <= 0 ) return len;
  23.754 +    int len;
  23.755 +
  23.756 +#define K(_p) ((_p)<<(PAGE_SHIFT-10))
  23.757 +    len = sprintf(
  23.758 +        page,
  23.759 +        "Current allocation: %8lu kB\n"
  23.760 +        "Requested target:   %8lu kB\n"
  23.761 +        "Low-mem balloon:    %8lu kB\n"
  23.762 +        "High-mem balloon:   %8lu kB\n"
  23.763 +        "Xen hard limit:     ",
  23.764 +        K(current_pages), K(target_pages), K(balloon_low), K(balloon_high));
  23.765  
  23.766 -    *offp += len;
  23.767 +    if ( hard_limit != ~0UL )
  23.768 +        len += sprintf(
  23.769 +            page + len, 
  23.770 +            "%8lu kB (inc. %8lu kB driver headroom)\n",
  23.771 +            K(hard_limit), K(driver_pages));
  23.772 +    else
  23.773 +        len += sprintf(
  23.774 +            page + len,
  23.775 +            "     ??? kB\n");
  23.776 +
  23.777 +    *eof = 1;
  23.778      return len;
  23.779  }
  23.780  
  23.781 -static int balloon_read(struct file *filp, char *buffer,
  23.782 -                        size_t count, loff_t *offp)
  23.783 -{
  23.784 -    static char priv_buf[32];
  23.785 -    char *priv_bufp = priv_buf;
  23.786 -    int len;
  23.787 -    len = sprintf(priv_buf,"%lu\n",current_pages<<PAGE_SHIFT);
  23.788 -
  23.789 -    len -= *offp;
  23.790 -    priv_bufp += *offp;
  23.791 -    if (len>count) len = count;
  23.792 -    if (len<0) len = 0;
  23.793 -
  23.794 -    if ( copy_to_user(buffer, priv_bufp, len) != 0 )
  23.795 -        return -EFAULT;
  23.796 -
  23.797 -    *offp += len;
  23.798 -    return len;
  23.799 -}
  23.800 -
  23.801 -static struct file_operations balloon_fops = {
  23.802 -    .read  = balloon_read,
  23.803 -    .write = balloon_write
  23.804 -};
  23.805 -
  23.806 -#else
  23.807 -
  23.808 -static int balloon_write(struct file *file, const char *buffer,
  23.809 -                         u_long count, void *data)
  23.810 -{
  23.811 -    return do_balloon_write(buffer, count);
  23.812 -}
  23.813 -
  23.814 -static int balloon_read(char *page, char **start, off_t off,
  23.815 -			int count, int *eof, void *data)
  23.816 -{
  23.817 -  int len;
  23.818 -  len = sprintf(page,"%lu\n",current_pages<<PAGE_SHIFT);
  23.819 -  
  23.820 -  if (len <= off+count) *eof = 1;
  23.821 -  *start = page + off;
  23.822 -  len -= off;
  23.823 -  if (len>count) len = count;
  23.824 -  if (len<0) len = 0;
  23.825 -  return len;
  23.826 -}
  23.827 -
  23.828 -#endif
  23.829 -
  23.830  static int __init balloon_init(void)
  23.831  {
  23.832 -    printk(KERN_ALERT "Starting Xen Balloon driver\n");
  23.833 +    unsigned long pfn;
  23.834 +    struct page *page;
  23.835 +
  23.836 +    IPRINTK("Initialising balloon driver.\n");
  23.837  
  23.838 -    most_seen_pages = current_pages = min(xen_start_info.nr_pages,max_pfn);
  23.839 -    if ( (balloon_pde = create_xen_proc_entry("memory_target", 0644)) == NULL )
  23.840 +    current_pages = min(xen_start_info.nr_pages, max_pfn);
  23.841 +    target_pages  = current_pages;
  23.842 +    balloon_low   = 0;
  23.843 +    balloon_high  = 0;
  23.844 +    driver_pages  = 0UL;
  23.845 +    hard_limit    = ~0UL;
  23.846 +
  23.847 +    init_timer(&balloon_timer);
  23.848 +    balloon_timer.data = 0;
  23.849 +    balloon_timer.function = balloon_alarm;
  23.850 +    
  23.851 +    if ( (balloon_pde = create_xen_proc_entry("balloon", 0644)) == NULL )
  23.852      {
  23.853 -        printk(KERN_ALERT "Unable to create balloon driver proc entry!");
  23.854 +        WPRINTK("Unable to create /proc/xen/balloon.\n");
  23.855          return -1;
  23.856      }
  23.857  
  23.858 -#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
  23.859 -    balloon_pde->owner     = THIS_MODULE;
  23.860 -    balloon_pde->nlink     = 1;
  23.861 -    balloon_pde->proc_fops = &balloon_fops;
  23.862 -#else
  23.863 +    balloon_pde->read_proc  = balloon_read;
  23.864      balloon_pde->write_proc = balloon_write;
  23.865 -    balloon_pde->read_proc  = balloon_read;
  23.866 -#endif
  23.867  
  23.868 -    (void)ctrl_if_register_receiver(CMSG_MEM_REQUEST, balloon_ctrlif_rx,
  23.869 -                                    CALLBACK_IN_BLOCKING_CONTEXT);
  23.870 +    (void)ctrl_if_register_receiver(CMSG_MEM_REQUEST, balloon_ctrlif_rx, 0);
  23.871  
  23.872 -    /* 
  23.873 -     * make_module a new phys map if mem= says xen can give us memory  to grow
  23.874 -     */
  23.875 -    if ( max_pfn > xen_start_info.nr_pages )
  23.876 +    /* Initialise the balloon with excess memory space. */
  23.877 +    for ( pfn = xen_start_info.nr_pages; pfn < max_pfn; pfn++ )
  23.878      {
  23.879 -        extern unsigned long *phys_to_machine_mapping;
  23.880 -        unsigned long *newmap;
  23.881 -        newmap = (unsigned long *)vmalloc(max_pfn * sizeof(unsigned long));
  23.882 -        memset(newmap, ~0, max_pfn * sizeof(unsigned long));
  23.883 -        memcpy(newmap, phys_to_machine_mapping,
  23.884 -               xen_start_info.nr_pages * sizeof(unsigned long));
  23.885 -        phys_to_machine_mapping = newmap;
  23.886 +        page = &mem_map[pfn];
  23.887 +        if ( !PageReserved(page) )
  23.888 +            balloon_append(page);
  23.889      }
  23.890  
  23.891      return 0;
  23.892  }
  23.893  
  23.894 -static void __exit balloon_cleanup(void)
  23.895 +subsys_initcall(balloon_init);
  23.896 +
  23.897 +void balloon_update_driver_allowance(long delta)
  23.898  {
  23.899 -    if ( balloon_pde != NULL )
  23.900 -    {
  23.901 -        remove_xen_proc_entry("memory_target");
  23.902 -        balloon_pde = NULL;
  23.903 -    }
  23.904 +    unsigned long flags;
  23.905 +    balloon_lock(flags);
  23.906 +    driver_pages += delta; /* non-atomic update */
  23.907 +    balloon_unlock(flags);
  23.908  }
  23.909  
  23.910 -module_init(balloon_init);
  23.911 -module_exit(balloon_cleanup);
  23.912 +void balloon_put_pages(unsigned long *mfn_list, unsigned long nr_mfns)
  23.913 +{
  23.914 +    unsigned long flags;
  23.915 +
  23.916 +    balloon_lock(flags);
  23.917 +    if ( HYPERVISOR_dom_mem_op(MEMOP_decrease_reservation, 
  23.918 +                               mfn_list, nr_mfns, 0) != nr_mfns )
  23.919 +        BUG();
  23.920 +    current_pages -= nr_mfns; /* non-atomic update */
  23.921 +    balloon_unlock(flags);
  23.922 +
  23.923 +    schedule_work(&balloon_worker);
  23.924 +}
  23.925 +
  23.926 +EXPORT_SYMBOL(balloon_update_driver_allowance);
  23.927 +EXPORT_SYMBOL(balloon_put_pages);
    24.1 --- a/linux-2.6.9-xen-sparse/drivers/xen/netback/interface.c	Mon Nov 29 17:11:03 2004 +0000
    24.2 +++ b/linux-2.6.9-xen-sparse/drivers/xen/netback/interface.c	Fri Dec 10 18:49:15 2004 +0000
    24.3 @@ -35,8 +35,8 @@ static void __netif_disconnect_complete(
    24.4  
    24.5      /*
    24.6       * These can't be done in netif_disconnect() because at that point there
    24.7 -     * may be outstanding requests at the disc whose asynchronous responses
    24.8 -     * must still be notified to the remote driver.
    24.9 +     * may be outstanding requests in the network stack whose asynchronous
   24.10 +     * responses must still be notified to the remote driver.
   24.11       */
   24.12      unbind_evtchn_from_irq(netif->evtchn);
   24.13      vfree(netif->tx); /* Frees netif->rx as well. */
   24.14 @@ -84,7 +84,7 @@ void netif_create(netif_be_create_t *cre
   24.15      unsigned int       handle = create->netif_handle;
   24.16      struct net_device *dev;
   24.17      netif_t          **pnetif, *netif;
   24.18 -    char               name[IFNAMSIZ] = {};
   24.19 +    char               name[IFNAMSIZ];
   24.20  
   24.21      snprintf(name, IFNAMSIZ - 1, "vif%u.%u", domid, handle);
   24.22      dev = alloc_netdev(sizeof(netif_t), name, ether_setup);
   24.23 @@ -116,7 +116,7 @@ void netif_create(netif_be_create_t *cre
   24.24          {
   24.25              DPRINTK("Could not create netif: already exists\n");
   24.26              create->status = NETIF_BE_STATUS_INTERFACE_EXISTS;
   24.27 -            kfree(dev);
   24.28 +            free_netdev(dev);
   24.29              return;
   24.30          }
   24.31          pnetif = &(*pnetif)->hash_next;
   24.32 @@ -137,7 +137,7 @@ void netif_create(netif_be_create_t *cre
   24.33          DPRINTK("Could not register new net device %s: err=%d\n",
   24.34                  dev->name, err);
   24.35          create->status = NETIF_BE_STATUS_OUT_OF_MEMORY;
   24.36 -        kfree(dev);
   24.37 +        free_netdev(dev);
   24.38          return;
   24.39      }
   24.40  
   24.41 @@ -176,7 +176,7 @@ void netif_destroy(netif_be_destroy_t *d
   24.42   destroy:
   24.43      *pnetif = netif->hash_next;
   24.44      unregister_netdev(netif->dev);
   24.45 -    kfree(netif->dev);
   24.46 +    free_netdev(netif->dev);
   24.47      destroy->status = NETIF_BE_STATUS_OKAY;
   24.48  }
   24.49  
    25.1 --- a/linux-2.6.9-xen-sparse/drivers/xen/netback/netback.c	Mon Nov 29 17:11:03 2004 +0000
    25.2 +++ b/linux-2.6.9-xen-sparse/drivers/xen/netback/netback.c	Fri Dec 10 18:49:15 2004 +0000
    25.3 @@ -11,6 +11,7 @@
    25.4   */
    25.5  
    25.6  #include "common.h"
    25.7 +#include <asm-xen/balloon.h>
    25.8  #include <asm-xen/evtchn.h>
    25.9  
   25.10  static void netif_page_release(struct page *page);
   25.11 @@ -30,6 +31,8 @@ static DECLARE_TASKLET(net_tx_tasklet, n
   25.12  static void net_rx_action(unsigned long unused);
   25.13  static DECLARE_TASKLET(net_rx_tasklet, net_rx_action, 0);
   25.14  
   25.15 +static struct timer_list net_timer;
   25.16 +
   25.17  static struct sk_buff_head rx_queue;
   25.18  static multicall_entry_t rx_mcl[NETIF_RX_RING_SIZE*2];
   25.19  static mmu_update_t rx_mmu[NETIF_RX_RING_SIZE*3];
   25.20 @@ -70,27 +73,20 @@ static unsigned long mfn_list[MAX_MFN_AL
   25.21  static unsigned int alloc_index = 0;
   25.22  static spinlock_t mfn_lock = SPIN_LOCK_UNLOCKED;
   25.23  
   25.24 -static void __refresh_mfn_list(void)
   25.25 +static unsigned long alloc_mfn(void)
   25.26  {
   25.27 -    int ret = HYPERVISOR_dom_mem_op(MEMOP_increase_reservation,
   25.28 -                                    mfn_list, MAX_MFN_ALLOC, 0);
   25.29 -    if ( unlikely(ret != MAX_MFN_ALLOC) )
   25.30 -        BUG();
   25.31 -    alloc_index = MAX_MFN_ALLOC;
   25.32 -}
   25.33 -
   25.34 -static unsigned long get_new_mfn(void)
   25.35 -{
   25.36 -    unsigned long mfn, flags;
   25.37 +    unsigned long mfn = 0, flags;
   25.38      spin_lock_irqsave(&mfn_lock, flags);
   25.39 -    if ( alloc_index == 0 )
   25.40 -        __refresh_mfn_list();
   25.41 -    mfn = mfn_list[--alloc_index];
   25.42 +    if ( unlikely(alloc_index == 0) )
   25.43 +        alloc_index = HYPERVISOR_dom_mem_op(
   25.44 +            MEMOP_increase_reservation, mfn_list, MAX_MFN_ALLOC, 0);
   25.45 +    if ( alloc_index != 0 )
   25.46 +        mfn = mfn_list[--alloc_index];
   25.47      spin_unlock_irqrestore(&mfn_lock, flags);
   25.48      return mfn;
   25.49  }
   25.50  
   25.51 -static void dealloc_mfn(unsigned long mfn)
   25.52 +static void free_mfn(unsigned long mfn)
   25.53  {
   25.54      unsigned long flags;
   25.55      spin_lock_irqsave(&mfn_lock, flags);
   25.56 @@ -211,8 +207,16 @@ static void net_rx_action(unsigned long 
   25.57          netif   = (netif_t *)skb->dev->priv;
   25.58          vdata   = (unsigned long)skb->data;
   25.59          mdata   = virt_to_machine(vdata);
   25.60 -        new_mfn = get_new_mfn();
   25.61 -        
   25.62 +
   25.63 +        /* Memory squeeze? Back off for an arbitrary while. */
   25.64 +        if ( (new_mfn = alloc_mfn()) == 0 )
   25.65 +        {
   25.66 +            if ( net_ratelimit() )
   25.67 +                printk(KERN_WARNING "Memory squeeze in netback driver.\n");
   25.68 +            mod_timer(&net_timer, jiffies + HZ);
   25.69 +            break;
   25.70 +        }
   25.71 +
   25.72          /*
   25.73           * Set the new P2M table entry before reassigning the old data page.
   25.74           * Heed the comment in pgtable-2level.h:pte_page(). :-)
   25.75 @@ -281,7 +285,7 @@ static void net_rx_action(unsigned long 
   25.76          if ( unlikely(mcl[1].args[5] != 0) )
   25.77          {
   25.78              DPRINTK("Failed MMU update transferring to DOM%u\n", netif->domid);
   25.79 -            dealloc_mfn(mdata >> PAGE_SHIFT);
   25.80 +            free_mfn(mdata >> PAGE_SHIFT);
   25.81              status = NETIF_RSP_ERROR;
   25.82          }
   25.83  
   25.84 @@ -308,7 +312,7 @@ static void net_rx_action(unsigned long 
   25.85      }
   25.86  
   25.87      /* More work to do? */
   25.88 -    if ( !skb_queue_empty(&rx_queue) )
   25.89 +    if ( !skb_queue_empty(&rx_queue) && !timer_pending(&net_timer) )
   25.90          tasklet_schedule(&net_rx_tasklet);
   25.91  #if 0
   25.92      else
   25.93 @@ -316,6 +320,11 @@ static void net_rx_action(unsigned long 
   25.94  #endif
   25.95  }
   25.96  
   25.97 +static void net_alarm(unsigned long unused)
   25.98 +{
   25.99 +    tasklet_schedule(&net_rx_tasklet);
  25.100 +}
  25.101 +
  25.102  struct net_device_stats *netif_be_get_stats(struct net_device *dev)
  25.103  {
  25.104      netif_t *netif = dev->priv;
  25.105 @@ -782,9 +791,16 @@ static int __init netback_init(void)
  25.106  
  25.107      printk("Initialising Xen netif backend\n");
  25.108  
  25.109 +    /* We can increase reservation by this much in net_rx_action(). */
  25.110 +    balloon_update_driver_allowance(NETIF_RX_RING_SIZE);
  25.111 +
  25.112      skb_queue_head_init(&rx_queue);
  25.113      skb_queue_head_init(&tx_queue);
  25.114  
  25.115 +    init_timer(&net_timer);
  25.116 +    net_timer.data = 0;
  25.117 +    net_timer.function = net_alarm;
  25.118 +    
  25.119      netif_interface_init();
  25.120  
  25.121      if ( (mmap_vstart = allocate_empty_lowmem_region(MAX_PENDING_REQS)) == 0 )
    26.1 --- a/linux-2.6.9-xen-sparse/drivers/xen/netfront/netfront.c	Mon Nov 29 17:11:03 2004 +0000
    26.2 +++ b/linux-2.6.9-xen-sparse/drivers/xen/netfront/netfront.c	Fri Dec 10 18:49:15 2004 +0000
    26.3 @@ -45,6 +45,7 @@
    26.4  #include <asm-xen/evtchn.h>
    26.5  #include <asm-xen/ctrl_if.h>
    26.6  #include <asm-xen/xen-public/io/netif.h>
    26.7 +#include <asm-xen/balloon.h>
    26.8  #include <asm/page.h>
    26.9  
   26.10  #include <net/arp.h>
   26.11 @@ -409,6 +410,9 @@ static void network_alloc_rx_buffers(str
   26.12      rx_mcl[i].args[3] = 0;
   26.13      rx_mcl[i].args[4] = DOMID_SELF;
   26.14  
   26.15 +    /* Tell the ballon driver what is going on. */
   26.16 +    balloon_update_driver_allowance(i);
   26.17 +
   26.18      /* Zap PTEs and give away pages in one big multicall. */
   26.19      (void)HYPERVISOR_multicall(rx_mcl, i+1);
   26.20  
   26.21 @@ -557,14 +561,15 @@ static int netif_poll(struct net_device 
   26.22          /*
   26.23           * An error here is very odd. Usually indicates a backend bug,
   26.24           * low-memory condition, or that we didn't have reservation headroom.
   26.25 -         * Whatever - print an error and queue the id again straight away.
   26.26           */
   26.27          if ( unlikely(rx->status <= 0) )
   26.28          {
   26.29 -	    printk(KERN_ALERT "bad buffer on RX ring!(%d)\n", rx->status);
   26.30 +            if ( net_ratelimit() )
   26.31 +                printk(KERN_WARNING "Bad rx buffer (memory squeeze?).\n");
   26.32              np->rx->ring[MASK_NETIF_RX_IDX(np->rx->req_prod)].req.id = rx->id;
   26.33              wmb();
   26.34              np->rx->req_prod++;
   26.35 +            work_done--;
   26.36              continue;
   26.37          }
   26.38  
   26.39 @@ -595,6 +600,9 @@ static int netif_poll(struct net_device 
   26.40          __skb_queue_tail(&rxq, skb);
   26.41      }
   26.42  
   26.43 +    /* Some pages are no longer absent... */
   26.44 +    balloon_update_driver_allowance(-work_done);
   26.45 +
   26.46      /* Do all the remapping work, and M->P updates, in one big hypercall. */
   26.47      if ( likely((mcl - rx_mcl) != 0) )
   26.48      {
    27.1 --- a/linux-2.6.9-xen-sparse/drivers/xen/privcmd/privcmd.c	Mon Nov 29 17:11:03 2004 +0000
    27.2 +++ b/linux-2.6.9-xen-sparse/drivers/xen/privcmd/privcmd.c	Fri Dec 10 18:49:15 2004 +0000
    27.3 @@ -7,7 +7,6 @@
    27.4   */
    27.5  
    27.6  #include <linux/config.h>
    27.7 -#include <linux/module.h>
    27.8  #include <linux/kernel.h>
    27.9  #include <linux/sched.h>
   27.10  #include <linux/slab.h>
   27.11 @@ -213,23 +212,9 @@ static int __init privcmd_init(void)
   27.12  
   27.13      privcmd_intf = create_xen_proc_entry("privcmd", 0400);
   27.14      if ( privcmd_intf != NULL )
   27.15 -    {
   27.16 -        privcmd_intf->owner      = THIS_MODULE;
   27.17 -        privcmd_intf->nlink      = 1;
   27.18 -        privcmd_intf->proc_fops  = &privcmd_file_ops;
   27.19 -    }
   27.20 +        privcmd_intf->proc_fops = &privcmd_file_ops;
   27.21  
   27.22      return 0;
   27.23  }
   27.24  
   27.25 -
   27.26 -static void __exit privcmd_cleanup(void)
   27.27 -{
   27.28 -    if ( privcmd_intf == NULL ) return;
   27.29 -    remove_xen_proc_entry("privcmd");
   27.30 -    privcmd_intf = NULL;
   27.31 -}
   27.32 -
   27.33 -
   27.34 -module_init(privcmd_init);
   27.35 -module_exit(privcmd_cleanup);
   27.36 +__initcall(privcmd_init);
    28.1 --- a/linux-2.6.9-xen-sparse/include/asm-xen/asm-i386/io.h	Mon Nov 29 17:11:03 2004 +0000
    28.2 +++ b/linux-2.6.9-xen-sparse/include/asm-xen/asm-i386/io.h	Fri Dec 10 18:49:15 2004 +0000
    28.3 @@ -444,4 +444,7 @@ BUILDIO(b,b,char)
    28.4  BUILDIO(w,w,short)
    28.5  BUILDIO(l,,int)
    28.6  
    28.7 +/* We will be supplying our own /dev/mem implementation */
    28.8 +#define ARCH_HAS_DEV_MEM
    28.9 +
   28.10  #endif
    29.1 --- a/linux-2.6.9-xen-sparse/include/asm-xen/asm-i386/pgalloc.h	Mon Nov 29 17:11:03 2004 +0000
    29.2 +++ b/linux-2.6.9-xen-sparse/include/asm-xen/asm-i386/pgalloc.h	Fri Dec 10 18:49:15 2004 +0000
    29.3 @@ -53,15 +53,4 @@ extern void pte_free(struct page *pte);
    29.4  
    29.5  #define check_pgt_cache()	do { } while (0)
    29.6  
    29.7 -int direct_remap_area_pages(struct mm_struct *mm,
    29.8 -                            unsigned long address, 
    29.9 -                            unsigned long machine_addr,
   29.10 -                            unsigned long size, 
   29.11 -                            pgprot_t prot,
   29.12 -                            domid_t  domid);
   29.13 -int __direct_remap_area_pages(struct mm_struct *mm,
   29.14 -			      unsigned long address, 
   29.15 -			      unsigned long size, 
   29.16 -			      mmu_update_t *v);
   29.17 -
   29.18  #endif /* _I386_PGALLOC_H */
    30.1 --- a/linux-2.6.9-xen-sparse/include/asm-xen/asm-i386/pgtable.h	Mon Nov 29 17:11:03 2004 +0000
    30.2 +++ b/linux-2.6.9-xen-sparse/include/asm-xen/asm-i386/pgtable.h	Fri Dec 10 18:49:15 2004 +0000
    30.3 @@ -421,31 +421,19 @@ extern pte_t *lookup_address(unsigned lo
    30.4  #define update_mmu_cache(vma,address,pte) do { } while (0)
    30.5  #define  __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS
    30.6  
    30.7 -#if 0
    30.8  #define ptep_set_access_flags(__vma, __address, __ptep, __entry, __dirty) \
    30.9  	do {								  \
   30.10  		if (__dirty) {						  \
   30.11 -			queue_l1_entry_update((__ptep), (__entry).pte_low); \
   30.12 -			flush_tlb_page(__vma, __address);                 \
   30.13 -			xen_flush_page_update_queue();                    \
   30.14 -		}							  \
   30.15 -	} while (0)
   30.16 -#else
   30.17 -#define ptep_set_access_flags(__vma, __address, __ptep, __entry, __dirty) \
   30.18 -	do {								  \
   30.19 -		if (__dirty) {						  \
   30.20 -		        if ( likely(vma->vm_mm == current->mm) ) {        \
   30.21 +		        if ( likely((__vma)->vm_mm == current->mm) ) {    \
   30.22  			    xen_flush_page_update_queue();                \
   30.23 -			    HYPERVISOR_update_va_mapping(address>>PAGE_SHIFT, entry, UVMF_INVLPG); \
   30.24 +			    HYPERVISOR_update_va_mapping((__address)>>PAGE_SHIFT, (__entry), UVMF_INVLPG); \
   30.25  			} else {                                          \
   30.26                              xen_l1_entry_update((__ptep), (__entry).pte_low); \
   30.27 -			    flush_tlb_page(__vma, __address);             \
   30.28 +			    flush_tlb_page((__vma), (__address));         \
   30.29  			}                                                 \
   30.30  		}							  \
   30.31  	} while (0)
   30.32  
   30.33 -#endif
   30.34 -
   30.35  #define __HAVE_ARCH_PTEP_ESTABLISH
   30.36  #define ptep_establish(__vma, __address, __ptep, __entry)		\
   30.37  do {				  					\
   30.38 @@ -472,14 +460,14 @@ void make_page_writable(void *va);
   30.39  void make_pages_readonly(void *va, unsigned int nr);
   30.40  void make_pages_writable(void *va, unsigned int nr);
   30.41  
   30.42 -static inline unsigned long arbitrary_virt_to_machine(void *va)
   30.43 -{
   30.44 -	pgd_t *pgd = pgd_offset_k((unsigned long)va);
   30.45 -	pmd_t *pmd = pmd_offset(pgd, (unsigned long)va);
   30.46 -	pte_t *pte = pte_offset_kernel(pmd, (unsigned long)va);
   30.47 -	unsigned long pa = (*(unsigned long *)pte) & PAGE_MASK;
   30.48 -	return pa | ((unsigned long)va & (PAGE_SIZE-1));
   30.49 -}
   30.50 +#define arbitrary_virt_to_machine(__va)					\
   30.51 +({									\
   30.52 +	pgd_t *__pgd = pgd_offset_k((unsigned long)(__va));		\
   30.53 +	pmd_t *__pmd = pmd_offset(__pgd, (unsigned long)(__va));	\
   30.54 +	pte_t *__pte = pte_offset_kernel(__pmd, (unsigned long)(__va));	\
   30.55 +	unsigned long __pa = (*(unsigned long *)__pte) & PAGE_MASK;	\
   30.56 +	__pa | ((unsigned long)(__va) & (PAGE_SIZE-1));			\
   30.57 +})
   30.58  
   30.59  #endif /* !__ASSEMBLY__ */
   30.60  
   30.61 @@ -487,6 +475,17 @@ static inline unsigned long arbitrary_vi
   30.62  #define kern_addr_valid(addr)	(1)
   30.63  #endif /* !CONFIG_DISCONTIGMEM */
   30.64  
   30.65 +int direct_remap_area_pages(struct mm_struct *mm,
   30.66 +                            unsigned long address, 
   30.67 +                            unsigned long machine_addr,
   30.68 +                            unsigned long size, 
   30.69 +                            pgprot_t prot,
   30.70 +                            domid_t  domid);
   30.71 +int __direct_remap_area_pages(struct mm_struct *mm,
   30.72 +			      unsigned long address, 
   30.73 +			      unsigned long size, 
   30.74 +			      mmu_update_t *v);
   30.75 +
   30.76  #define io_remap_page_range(vma,from,phys,size,prot)                     \
   30.77          direct_remap_area_pages(vma->vm_mm,from,phys,size,prot,DOMID_IO)
   30.78  
    31.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    31.2 +++ b/linux-2.6.9-xen-sparse/include/asm-xen/balloon.h	Fri Dec 10 18:49:15 2004 +0000
    31.3 @@ -0,0 +1,51 @@
    31.4 +/******************************************************************************
    31.5 + * balloon.h
    31.6 + *
    31.7 + * Xen balloon driver - enables returning/claiming memory to/from Xen.
    31.8 + *
    31.9 + * Copyright (c) 2003, B Dragovic
   31.10 + * Copyright (c) 2003-2004, M Williamson, K Fraser
   31.11 + * 
   31.12 + * This file may be distributed separately from the Linux kernel, or
   31.13 + * incorporated into other software packages, subject to the following license:
   31.14 + * 
   31.15 + * Permission is hereby granted, free of charge, to any person obtaining a copy
   31.16 + * of this source file (the "Software"), to deal in the Software without
   31.17 + * restriction, including without limitation the rights to use, copy, modify,
   31.18 + * merge, publish, distribute, sublicense, and/or sell copies of the Software,
   31.19 + * and to permit persons to whom the Software is furnished to do so, subject to
   31.20 + * the following conditions:
   31.21 + * 
   31.22 + * The above copyright notice and this permission notice shall be included in
   31.23 + * all copies or substantial portions of the Software.
   31.24 + * 
   31.25 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
   31.26 + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
   31.27 + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
   31.28 + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
   31.29 + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
   31.30 + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
   31.31 + * IN THE SOFTWARE.
   31.32 + */
   31.33 +
   31.34 +#ifndef __ASM_BALLOON_H__
   31.35 +#define __ASM_BALLOON_H__
   31.36 +
   31.37 +/*
   31.38 + * Inform the balloon driver that it should allow some slop for device-driver
   31.39 + * memory activities.
   31.40 + */
   31.41 +extern void balloon_update_driver_allowance(long delta);
   31.42 +
   31.43 +/* Give up unmapped pages to the balloon driver. */
   31.44 +extern void balloon_put_pages(unsigned long *mfn_list, unsigned long nr_mfns);
   31.45 +
   31.46 +/*
   31.47 + * Prevent the balloon driver from changing the memory reservation during
   31.48 + * a driver critical region.
   31.49 + */
   31.50 +extern spinlock_t balloon_lock;
   31.51 +#define balloon_lock(__flags)   spin_lock_irqsave(&balloon_lock, __flags)
   31.52 +#define balloon_unlock(__flags) spin_unlock_irqrestore(&balloon_lock, __flags)
   31.53 +
   31.54 +#endif /* __ASM_BALLOON_H__ */
    32.1 --- a/linux-2.6.9-xen-sparse/include/asm-xen/hypervisor.h	Mon Nov 29 17:11:03 2004 +0000
    32.2 +++ b/linux-2.6.9-xen-sparse/include/asm-xen/hypervisor.h	Fri Dec 10 18:49:15 2004 +0000
    32.3 @@ -58,13 +58,6 @@ void xen_cpu_idle (void);
    32.4  /* arch/xen/i386/kernel/hypervisor.c */
    32.5  void do_hypervisor_callback(struct pt_regs *regs);
    32.6  
    32.7 -/* arch/xen/i386/mm/init.c */
    32.8 -/* NOTE: caller must call flush_page_update_queue() */
    32.9 -#define PROT_ON  1
   32.10 -#define PROT_OFF 0
   32.11 -void /* __init */ protect_page(pgd_t *dpgd, void *page, int mode);
   32.12 -void /* __init */ protect_pagetable(pgd_t *dpgd, pgd_t *spgd, int mode);
   32.13 -
   32.14  /* arch/xen/i386/kernel/head.S */
   32.15  void lgdt_finish(void);
   32.16  
   32.17 @@ -110,8 +103,6 @@ void MULTICALL_flush_page_update_queue(v
   32.18  #ifdef CONFIG_XEN_PHYSDEV_ACCESS
   32.19  /* Allocate a contiguous empty region of low memory. Return virtual start. */
   32.20  unsigned long allocate_empty_lowmem_region(unsigned long pages);
   32.21 -/* Deallocate a contiguous region of low memory. Return it to the allocator. */
   32.22 -void deallocate_lowmem_region(unsigned long vstart, unsigned long pages);
   32.23  #endif
   32.24  
   32.25  /*
    33.1 --- a/tools/libxc/Makefile	Mon Nov 29 17:11:03 2004 +0000
    33.2 +++ b/tools/libxc/Makefile	Fri Dec 10 18:49:15 2004 +0000
    33.3 @@ -1,5 +1,5 @@
    33.4  
    33.5 -MAJOR    = 1.3
    33.6 +MAJOR    = 2.0
    33.7  MINOR    = 0
    33.8  SONAME   = libxc.so.$(MAJOR)
    33.9  
    34.1 --- a/tools/libxc/xc.h	Mon Nov 29 17:11:03 2004 +0000
    34.2 +++ b/tools/libxc/xc.h	Fri Dec 10 18:49:15 2004 +0000
    34.3 @@ -178,14 +178,19 @@ int xc_domain_setinitialmem(int xc_handl
    34.4                              unsigned int initial_memkb);
    34.5  
    34.6  int xc_domain_setmaxmem(int xc_handle,
    34.7 -                            u32 domid, 
    34.8 -                            unsigned int max_memkb);
    34.9 +                        u32 domid, 
   34.10 +                        unsigned int max_memkb);
   34.11  
   34.12  int xc_domain_setvmassist(int xc_handle,
   34.13                            u32 domid, 
   34.14                            unsigned int cmd,
   34.15                            unsigned int type);
   34.16  
   34.17 +typedef dom0_perfc_desc_t xc_perfc_desc_t;
   34.18 +/* IMPORTANT: The caller is responsible for mlock()'ing the @desc array. */
   34.19 +int xc_perfc_control(int xc_handle,
   34.20 +                     u32 op,
   34.21 +                     xc_perfc_desc_t *desc);
   34.22  
   34.23  void *xc_map_foreign_range(int xc_handle, u32 dom,
   34.24                              int size, int prot,
    35.1 --- a/tools/libxc/xc_misc.c	Mon Nov 29 17:11:03 2004 +0000
    35.2 +++ b/tools/libxc/xc_misc.c	Fri Dec 10 18:49:15 2004 +0000
    35.3 @@ -74,10 +74,26 @@ int xc_sched_id(int xc_handle,
    35.4      op.cmd = DOM0_SCHED_ID;
    35.5      op.interface_version = DOM0_INTERFACE_VERSION;
    35.6      
    35.7 -    if((ret = do_dom0_op(xc_handle, &op))) return ret;
    35.8 +    if ( (ret = do_dom0_op(xc_handle, &op)) != 0 )
    35.9 +        return ret;
   35.10      
   35.11      *sched_id = op.u.sched_id.sched_id;
   35.12      
   35.13      return 0;
   35.14  }
   35.15  
   35.16 +int xc_perfc_control(int xc_handle,
   35.17 +                     u32 op,
   35.18 +                     xc_perfc_desc_t *desc)
   35.19 +{
   35.20 +    int rc;
   35.21 +    dom0_op_t dop;
   35.22 +
   35.23 +    dop.cmd = DOM0_PERFCCONTROL;
   35.24 +    dop.u.perfccontrol.op   = op;
   35.25 +    dop.u.perfccontrol.desc = desc;
   35.26 +
   35.27 +    rc = do_dom0_op(xc_handle, &dop);
   35.28 +
   35.29 +    return (rc == 0) ? dop.u.perfccontrol.nr_counters : rc;
   35.30 +}
    36.1 --- a/tools/libxutil/Makefile	Mon Nov 29 17:11:03 2004 +0000
    36.2 +++ b/tools/libxutil/Makefile	Fri Dec 10 18:49:15 2004 +0000
    36.3 @@ -30,7 +30,7 @@ CFLAGS   += -fno-strict-aliasing
    36.4  CFLAGS   += -Wp,-MD,.$(@F).d
    36.5  DEPS     = .*.d
    36.6  
    36.7 -MAJOR    := 1.3
    36.8 +MAJOR    := 2.0
    36.9  MINOR    := 0
   36.10  LIB_NAME := libxutil
   36.11  LIB      := $(LIB_NAME).so 
    37.1 --- a/tools/misc/Makefile	Mon Nov 29 17:11:03 2004 +0000
    37.2 +++ b/tools/misc/Makefile	Fri Dec 10 18:49:15 2004 +0000
    37.3 @@ -3,22 +3,18 @@ XEN_ROOT=../..
    37.4  include $(XEN_ROOT)/tools/Make.defs
    37.5  
    37.6  CC         = gcc
    37.7 -CFLAGS     = -Wall -O3 
    37.8 +CFLAGS     = -Wall -Werror -O3 
    37.9  
   37.10  INCLUDES += -I $(XEN_XC)
   37.11  INCLUDES += -I $(XEN_LIBXC)
   37.12 -INCLUDES += -I $(XEN_LIBXUTIL)
   37.13 -
   37.14 -CFLAGS += $(INCLUDES)
   37.15 +CFLAGS   += $(INCLUDES)
   37.16  
   37.17  HDRS     = $(wildcard *.h)
   37.18 -SRCS     = $(wildcard *.c)
   37.19 -OBJS     = $(patsubst %.c,%.o,$(SRCS))
   37.20  
   37.21 -TARGETS  = 
   37.22 +TARGETS  = xenperf
   37.23  
   37.24  INSTALL_BIN  = $(TARGETS) xencons
   37.25 -INSTALL_SBIN = netfix xm xend xensv
   37.26 +INSTALL_SBIN = netfix xm xend xensv xenperf
   37.27  
   37.28  all: $(TARGETS)
   37.29  	$(MAKE) -C miniterm
   37.30 @@ -32,7 +28,7 @@ install: all
   37.31  
   37.32  clean:
   37.33  	$(RM) *.o $(TARGETS) *~
   37.34 -	$(MAKE) -C miniterm clean	
   37.35 +	$(MAKE) -C miniterm clean
   37.36  
   37.37  %: %.c $(HDRS) Makefile
   37.38 -	$(CC) $(CFLAGS) -o $@ $<
   37.39 +	$(CC) $(CFLAGS) -o $@ $< -L$(XEN_LIBXC) -lxc -L$(XEN_LIBXUTIL) -lxutil
    38.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    38.2 +++ b/tools/misc/xenperf.c	Fri Dec 10 18:49:15 2004 +0000
    38.3 @@ -0,0 +1,104 @@
    38.4 +/* -*-  Mode:C; c-basic-offset:4; tab-width:4 -*-
    38.5 + ****************************************************************************
    38.6 + * (C) 2004 - Rolf Neugebauer - Intel Research Cambridge
    38.7 + ****************************************************************************
    38.8 + *
    38.9 + *        File: xenperf.c
   38.10 + *      Author: Rolf Neugebauer (rolf.neugebauer@intel.com)
   38.11 + *        Date: Nov 2004
   38.12 + * 
   38.13 + * Description: 
   38.14 + */
   38.15 +
   38.16 +
   38.17 +#include <xc.h>
   38.18 +#include <stdio.h>
   38.19 +#include <stdlib.h>
   38.20 +#include <sys/mman.h>
   38.21 +#include <errno.h>
   38.22 +#include <string.h>
   38.23 +
   38.24 +int main(int argc, char *argv[])
   38.25 +{
   38.26 +    int              i, j, xc_handle;
   38.27 +    xc_perfc_desc_t *pcd;
   38.28 +    unsigned int     num, sum, reset = 0;
   38.29 +
   38.30 +    if ( argc > 1 )
   38.31 +    {
   38.32 +        char *p = argv[1];
   38.33 +        if ( (*p++ == '-')  && (*p == 'r') )
   38.34 +            reset = 1;
   38.35 +        else
   38.36 +        {
   38.37 +            printf("%s: [-r]\n", argv[0]);
   38.38 +            printf("no args: print xen performance counters\n");
   38.39 +            printf("    -r : reset xen performance counters\n");
   38.40 +            return 0;
   38.41 +        }
   38.42 +    }   
   38.43 +
   38.44 +    if ( (xc_handle = xc_interface_open()) == -1 )
   38.45 +    {
   38.46 +        fprintf(stderr, "Error opening xc interface: %d (%s)\n",
   38.47 +                errno, strerror(errno));
   38.48 +        return 1;
   38.49 +    }
   38.50 +    
   38.51 +    if ( reset )
   38.52 +    {
   38.53 +        if ( xc_perfc_control(xc_handle, DOM0_PERFCCONTROL_OP_RESET,
   38.54 +                              NULL) < 0 )
   38.55 +        {
   38.56 +            fprintf(stderr, "Error reseting performance counters: %d (%s)\n",
   38.57 +                    errno, strerror(errno));
   38.58 +            return 1;
   38.59 +        }
   38.60 +
   38.61 +        return 0;
   38.62 +    }
   38.63 +
   38.64 +
   38.65 +    if ( (num = xc_perfc_control(xc_handle, DOM0_PERFCCONTROL_OP_QUERY,
   38.66 +                                 NULL)) < 0 )
   38.67 +    {
   38.68 +        fprintf(stderr, "Error getting number of perf counters: %d (%s)\n",
   38.69 +                errno, strerror(errno));
   38.70 +        return 1;
   38.71 +    }
   38.72 +
   38.73 +    pcd = malloc(sizeof(*pcd) * num);
   38.74 +
   38.75 +    if ( mlock(pcd, sizeof(*pcd) * num) != 0 )
   38.76 +    {
   38.77 +        fprintf(stderr, "Could not mlock descriptor buffer: %d (%s)\n",
   38.78 +                errno, strerror(errno));
   38.79 +        exit(-1);
   38.80 +    }
   38.81 +
   38.82 +    if ( xc_perfc_control(xc_handle, DOM0_PERFCCONTROL_OP_QUERY, pcd) <= 0 )
   38.83 +    {
   38.84 +        fprintf(stderr, "Error getting perf counter description: %d (%s)\n",
   38.85 +                errno, strerror(errno));
   38.86 +        return 1;
   38.87 +    }
   38.88 +
   38.89 +    munlock(pcd, sizeof(*pcd) * num);
   38.90 +
   38.91 +    for ( i = 0; i < num; i++ )
   38.92 +    {
   38.93 +        printf ("%-35s ", pcd[i].name);
   38.94 +        
   38.95 +        sum = 0;
   38.96 +        for ( j = 0; j < pcd[i].nr_vals; j++ )
   38.97 +            sum += pcd[i].vals[j];
   38.98 +        printf ("T=%10u ", (unsigned int)sum);
   38.99 +
  38.100 +        for ( j = 0; j < pcd[i].nr_vals; j++ )
  38.101 +            printf(" %10u", (unsigned int)pcd[i].vals[j]);
  38.102 +
  38.103 +        printf("\n");
  38.104 +    }
  38.105 +
  38.106 +    return 0;
  38.107 +}
    39.1 --- a/tools/python/xen/xend/XendClient.py	Mon Nov 29 17:11:03 2004 +0000
    39.2 +++ b/tools/python/xen/xend/XendClient.py	Fri Dec 10 18:49:15 2004 +0000
    39.3 @@ -228,10 +228,11 @@ class Xend:
    39.4          return self.xendPost(self.domainurl(id),
    39.5                               {'op'      : 'pause' })
    39.6  
    39.7 -    def xend_domain_shutdown(self, id, reason):
    39.8 +    def xend_domain_shutdown(self, id, reason, key=0):
    39.9          return self.xendPost(self.domainurl(id),
   39.10                               {'op'      : 'shutdown',
   39.11 -                              'reason'  : reason })
   39.12 +                              'reason'  : reason,
   39.13 +                              'key'     : key })
   39.14  
   39.15      def xend_domain_destroy(self, id, reason):
   39.16          return self.xendPost(self.domainurl(id),
    40.1 --- a/tools/python/xen/xend/XendDomain.py	Mon Nov 29 17:11:03 2004 +0000
    40.2 +++ b/tools/python/xen/xend/XendDomain.py	Fri Dec 10 18:49:15 2004 +0000
    40.3 @@ -455,7 +455,7 @@ class XendDomain:
    40.4          except Exception, ex:
    40.5              raise XendError(str(ex))
    40.6      
    40.7 -    def domain_shutdown(self, id, reason='poweroff'):
    40.8 +    def domain_shutdown(self, id, reason='poweroff', key=0):
    40.9          """Shutdown domain (nicely).
   40.10           - poweroff: restart according to exit code and restart mode
   40.11           - reboot:   restart on exit
   40.12 @@ -474,7 +474,7 @@ class XendDomain:
   40.13          eserver.inject('xend.domain.shutdown', [dominfo.name, dominfo.id, reason])
   40.14          if reason == 'halt':
   40.15              reason = 'poweroff'
   40.16 -        val = xend.domain_shutdown(dominfo.id, reason)
   40.17 +        val = xend.domain_shutdown(dominfo.id, reason, key)
   40.18          self.refresh_schedule()
   40.19          return val
   40.20  
    41.1 --- a/tools/python/xen/xend/encode.py	Mon Nov 29 17:11:03 2004 +0000
    41.2 +++ b/tools/python/xen/xend/encode.py	Fri Dec 10 18:49:15 2004 +0000
    41.3 @@ -14,6 +14,8 @@ import httplib
    41.4  import random
    41.5  import md5
    41.6  
    41.7 +from xen.util.ip import _readline, _readlines
    41.8 +
    41.9  # Extract from HTML4 spec.
   41.10  ## The following example illustrates "multipart/form-data"
   41.11  ## encoding. Suppose we have the following form:
   41.12 @@ -122,7 +124,7 @@ def encode_multipart(d):
   41.13              out.write('"\r\n')
   41.14              out.write('Content-Type: application/octet-stream\r\n')
   41.15              out.write('\r\n')
   41.16 -            for l in v.readlines():
   41.17 +            for l in _readlines(v):
   41.18                 out.write(l)  
   41.19          else:
   41.20              out.write('Content-Disposition: form-data; name="')
    42.1 --- a/tools/python/xen/xend/server/SrvDaemon.py	Mon Nov 29 17:11:03 2004 +0000
    42.2 +++ b/tools/python/xen/xend/server/SrvDaemon.py	Fri Dec 10 18:49:15 2004 +0000
    42.3 @@ -711,14 +711,14 @@ class Daemon:
    42.4              raise XendError('Invalid console id')
    42.5          console.disconnect()
    42.6  
    42.7 -    def domain_shutdown(self, dom, reason):
    42.8 +    def domain_shutdown(self, dom, reason, key=0):
    42.9          """Shutdown a domain.
   42.10          """
   42.11          dom = int(dom)
   42.12          ctrl = self.domainCF.getController(dom)
   42.13          if not ctrl:
   42.14              raise XendError('No domain controller: %s' % dom)
   42.15 -        ctrl.shutdown(reason)
   42.16 +        ctrl.shutdown(reason, key)
   42.17          return 0
   42.18  
   42.19      def domain_mem_target_set(self, dom, target):
    43.1 --- a/tools/python/xen/xend/server/SrvDomain.py	Mon Nov 29 17:11:03 2004 +0000
    43.2 +++ b/tools/python/xen/xend/server/SrvDomain.py	Fri Dec 10 18:49:15 2004 +0000
    43.3 @@ -47,7 +47,8 @@ class SrvDomain(SrvDir):
    43.4      def op_shutdown(self, op, req):
    43.5          fn = FormFn(self.xd.domain_shutdown,
    43.6                      [['dom', 'str'],
    43.7 -                     ['reason', 'str']])
    43.8 +                     ['reason', 'str'],
    43.9 +                     ['key', 'int']])
   43.10          val = fn(req.args, {'dom': self.dom.id})
   43.11          req.setResponseCode(http.ACCEPTED)
   43.12          req.setHeader("Location", "%s/.." % req.prePathURL())
    44.1 --- a/tools/python/xen/xend/server/domain.py	Mon Nov 29 17:11:03 2004 +0000
    44.2 +++ b/tools/python/xen/xend/server/domain.py	Fri Dec 10 18:49:15 2004 +0000
    44.3 @@ -28,7 +28,8 @@ class DomainController(controller.Contro
    44.4      """
    44.5      reasons = {'poweroff' : 'shutdown_poweroff_t',
    44.6                 'reboot'   : 'shutdown_reboot_t',
    44.7 -               'suspend'  : 'shutdown_suspend_t' }
    44.8 +               'suspend'  : 'shutdown_suspend_t',
    44.9 +               'sysrq'    : 'shutdown_sysrq_t' }
   44.10  
   44.11      def __init__(self, factory, dom):
   44.12          controller.Controller.__init__(self, factory, dom)
   44.13 @@ -36,16 +37,19 @@ class DomainController(controller.Contro
   44.14          self.addMethod(CMSG_MEM_REQUEST, 0, None)
   44.15          self.registerChannel()
   44.16  
   44.17 -    def shutdown(self, reason):
   44.18 +    def shutdown(self, reason, key=0):
   44.19          """Shutdown a domain.
   44.20  
   44.21          reason shutdown reason
   44.22 +        key    sysrq key (only if reason is 'sysrq')
   44.23          """
   44.24          msgtype = self.reasons.get(reason)
   44.25          if not msgtype:
   44.26              raise XendError('invalid reason:' + reason)
   44.27 -        msg = packMsg(msgtype, {})
   44.28 -        self.writeRequest(msg)
   44.29 +        extra = {}
   44.30 +        if reason == 'sysrq': extra['key'] = key
   44.31 +        print extra
   44.32 +        self.writeRequest(packMsg(msgtype, extra))
   44.33  
   44.34      def mem_target_set(self, target):
   44.35          """Set domain memory target in pages.
    45.1 --- a/tools/python/xen/xend/server/messages.py	Mon Nov 29 17:11:03 2004 +0000
    45.2 +++ b/tools/python/xen/xend/server/messages.py	Fri Dec 10 18:49:15 2004 +0000
    45.3 @@ -197,10 +197,12 @@ CMSG_SHUTDOWN = 6
    45.4  CMSG_SHUTDOWN_POWEROFF  = 0
    45.5  CMSG_SHUTDOWN_REBOOT    = 1
    45.6  CMSG_SHUTDOWN_SUSPEND   = 2
    45.7 +CMSG_SHUTDOWN_SYSRQ     = 3
    45.8  
    45.9  STOPCODE_shutdown       = 0
   45.10  STOPCODE_reboot         = 1
   45.11  STOPCODE_suspend        = 2
   45.12 +STOPCODE_sysrq          = 3
   45.13  
   45.14  shutdown_formats = {
   45.15      'shutdown_poweroff_t':
   45.16 @@ -211,6 +213,9 @@ shutdown_formats = {
   45.17  
   45.18      'shutdown_suspend_t':
   45.19      (CMSG_SHUTDOWN, CMSG_SHUTDOWN_SUSPEND),
   45.20 +    
   45.21 +    'shutdown_sysrq_t':
   45.22 +    (CMSG_SHUTDOWN, CMSG_SHUTDOWN_SYSRQ)
   45.23      }
   45.24  
   45.25  msg_formats.update(shutdown_formats)
    46.1 --- a/tools/python/xen/xend/sxp.py	Mon Nov 29 17:11:03 2004 +0000
    46.2 +++ b/tools/python/xen/xend/sxp.py	Fri Dec 10 18:49:15 2004 +0000
    46.3 @@ -17,6 +17,7 @@ import types
    46.4  import errno
    46.5  import string
    46.6  from StringIO import StringIO
    46.7 +from xen.util.ip import _readline, _readlines
    46.8  
    46.9  __all__ = [
   46.10      "mime_type", 
   46.11 @@ -713,7 +714,7 @@ def parse(io):
   46.12      """
   46.13      pin = Parser()
   46.14      while 1:
   46.15 -        buf = io.readline()
   46.16 +        buf = _readline(io)
   46.17          pin.input(buf)
   46.18          if len(buf) == 0:
   46.19              break
    47.1 --- a/tools/python/xen/xm/main.py	Mon Nov 29 17:11:03 2004 +0000
    47.2 +++ b/tools/python/xen/xm/main.py	Fri Dec 10 18:49:15 2004 +0000
    47.3 @@ -11,7 +11,7 @@ from xen.xend import PrettyPrint
    47.4  from xen.xend import sxp
    47.5  from xen.xend.XendClient import XendError, server
    47.6  from xen.xend.XendClient import main as xend_client_main
    47.7 -from xen.xm import create, destroy, migrate, shutdown
    47.8 +from xen.xm import create, destroy, migrate, shutdown, sysrq
    47.9  from xen.xm.opts import *
   47.10  
   47.11  class Group:
   47.12 @@ -401,6 +401,19 @@ class ProgShutdown(Prog):
   47.13  
   47.14  xm.prog(ProgShutdown)
   47.15  
   47.16 +class ProgSysrq(Prog):
   47.17 +    group = 'domain'
   47.18 +    name = "sysrq"
   47.19 +    info = """Send a sysrq to a domain."""
   47.20 +
   47.21 +    def help(self, args):
   47.22 +        sysrq.main([args[0], '-h'])
   47.23 +    
   47.24 +    def main(self, args):
   47.25 +        sysrq.main(args)
   47.26 +
   47.27 +xm.prog(ProgSysrq)
   47.28 +
   47.29  class ProgPause(Prog):
   47.30      group = 'domain'
   47.31      name = "pause"
    48.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    48.2 +++ b/tools/python/xen/xm/sysrq.py	Fri Dec 10 18:49:15 2004 +0000
    48.3 @@ -0,0 +1,39 @@
    48.4 +# (C) Matthew Bloch <matthew@bytemark.co.uk> 2004
    48.5 +
    48.6 +"""Domain shutdown.
    48.7 +"""
    48.8 +import string
    48.9 +import sys
   48.10 +import time
   48.11 +
   48.12 +from xen.xend.XendClient import server
   48.13 +from xen.xm.opts import *
   48.14 +
   48.15 +DOM0_NAME = 'Domain-0'
   48.16 +DOM0_ID = '0'
   48.17 +
   48.18 +gopts = Opts(use="""[DOM] [letter]
   48.19 +
   48.20 +Sends a Linux sysrq to a domain.
   48.21 +""")
   48.22 +
   48.23 +gopts.opt('help', short='h',
   48.24 +         fn=set_true, default=0,
   48.25 +         use="Print this help.")
   48.26 +
   48.27 +def sysrq(dom, req):
   48.28 +    server.xend_domain_shutdown(dom, 'sysrq', req)
   48.29 +
   48.30 +def main(argv):
   48.31 +    opts = gopts
   48.32 +    args = opts.parse(argv)
   48.33 +    if opts.vals.help:
   48.34 +        opts.usage()
   48.35 +        return
   48.36 +        
   48.37 +    # no options for the moment
   48.38 +    if len(args) < 1: opts.err('Missing domain')
   48.39 +    if len(args) < 2: opts.err('Missing sysrq character')
   48.40 +    dom = args[0]
   48.41 +    req = ord(args[1][0])
   48.42 +    sysrq(dom, req)
    49.1 --- a/xen/arch/x86/memory.c	Mon Nov 29 17:11:03 2004 +0000
    49.2 +++ b/xen/arch/x86/memory.c	Fri Dec 10 18:49:15 2004 +0000
    49.3 @@ -1302,9 +1302,6 @@ int do_mmu_update(
    49.4      u32 type_info;
    49.5      domid_t domid;
    49.6  
    49.7 -    perfc_incrc(calls_to_mmu_update); 
    49.8 -    perfc_addc(num_page_updates, count);
    49.9 -
   49.10      LOCK_BIGLOCK(d);
   49.11  
   49.12      cleanup_writable_pagetable(d, PTWR_CLEANUP_ACTIVE | PTWR_CLEANUP_INACTIVE);
   49.13 @@ -1336,6 +1333,9 @@ int do_mmu_update(
   49.14          }
   49.15      }
   49.16  
   49.17 +    perfc_incrc(calls_to_mmu_update); 
   49.18 +    perfc_addc(num_page_updates, count);
   49.19 +
   49.20      if ( unlikely(!array_access_ok(VERIFY_READ, ureqs, count, sizeof(req))) )
   49.21      {
   49.22          rc = -EFAULT;
    50.1 --- a/xen/arch/x86/x86_32/entry.S	Mon Nov 29 17:11:03 2004 +0000
    50.2 +++ b/xen/arch/x86/x86_32/entry.S	Fri Dec 10 18:49:15 2004 +0000
    50.3 @@ -341,6 +341,7 @@ process_guest_exception_and_events:
    50.4          leal EDOMAIN_trap_bounce(%ebx),%edx
    50.5          testb $TBF_EXCEPTION,TRAPBOUNCE_flags(%edx)
    50.6          jz   test_all_events
    50.7 +        cli  # create_bounce_frame needs CLI for pre-exceptions to work
    50.8          call create_bounce_frame
    50.9          jmp  test_all_events
   50.10  
    51.1 --- a/xen/common/dom0_ops.c	Mon Nov 29 17:11:03 2004 +0000
    51.2 +++ b/xen/common/dom0_ops.c	Fri Dec 10 18:49:15 2004 +0000
    51.3 @@ -666,6 +666,16 @@ long do_dom0_op(dom0_op_t *u_dom0_op)
    51.4      }
    51.5      break;
    51.6  
    51.7 +#ifdef PERF_COUNTERS
    51.8 +    case DOM0_PERFCCONTROL:
    51.9 +    {
   51.10 +        extern int perfc_control(dom0_perfccontrol_t *);
   51.11 +        ret = perfc_control(&op->u.perfccontrol);
   51.12 +        copy_to_user(u_dom0_op, op, sizeof(*op));
   51.13 +    }
   51.14 +    break;
   51.15 +#endif
   51.16 +
   51.17      default:
   51.18          ret = arch_do_dom0_op(op,u_dom0_op);
   51.19  
    52.1 --- a/xen/common/perfc.c	Mon Nov 29 17:11:03 2004 +0000
    52.2 +++ b/xen/common/perfc.c	Fri Dec 10 18:49:15 2004 +0000
    52.3 @@ -4,6 +4,8 @@
    52.4  #include <xen/time.h>
    52.5  #include <xen/perfc.h>
    52.6  #include <xen/keyhandler.h> 
    52.7 +#include <public/dom0_ops.h>
    52.8 +#include <asm/uaccess.h>
    52.9  
   52.10  #undef  PERFCOUNTER
   52.11  #undef  PERFCOUNTER_CPU
   52.12 @@ -79,8 +81,9 @@ void perfc_reset(unsigned char key)
   52.13      s_time_t now = NOW();
   52.14      atomic_t *counters = (atomic_t *)&perfcounters;
   52.15  
   52.16 -    printk("Xen performance counters RESET (now = 0x%08X:%08X)\n",
   52.17 -           (u32)(now>>32), (u32)now);
   52.18 +    if ( key != '\0' )
   52.19 +        printk("Xen performance counters RESET (now = 0x%08X:%08X)\n",
   52.20 +               (u32)(now>>32), (u32)now);
   52.21  
   52.22      /* leave STATUS counters alone -- don't reset */
   52.23  
   52.24 @@ -109,3 +112,107 @@ void perfc_reset(unsigned char key)
   52.25      }
   52.26  }
   52.27  
   52.28 +static dom0_perfc_desc_t perfc_d[NR_PERFCTRS];
   52.29 +static int               perfc_init = 0;
   52.30 +static int perfc_copy_info(dom0_perfc_desc_t *desc)
   52.31 +{
   52.32 +    unsigned int i, j;
   52.33 +    atomic_t *counters = (atomic_t *)&perfcounters;
   52.34 +
   52.35 +    if ( desc == NULL )
   52.36 +        return 0;
   52.37 +
   52.38 +    /* We only copy the name and array-size information once. */
   52.39 +    if ( !perfc_init ) 
   52.40 +    {
   52.41 +        for ( i = 0; i < NR_PERFCTRS; i++ )
   52.42 +        {
   52.43 +            strncpy(perfc_d[i].name, perfc_info[i].name,
   52.44 +                    sizeof(perfc_d[i].name));
   52.45 +            perfc_d[i].name[sizeof(perfc_d[i].name)-1] = '\0';
   52.46 +
   52.47 +            switch ( perfc_info[i].type )
   52.48 +            {
   52.49 +            case TYPE_SINGLE:
   52.50 +            case TYPE_S_SINGLE:
   52.51 +                perfc_d[i].nr_vals = 1;
   52.52 +                break;
   52.53 +            case TYPE_CPU:
   52.54 +            case TYPE_S_CPU:
   52.55 +                perfc_d[i].nr_vals = smp_num_cpus;
   52.56 +                break;
   52.57 +            case TYPE_ARRAY:
   52.58 +            case TYPE_S_ARRAY:
   52.59 +                perfc_d[i].nr_vals = perfc_info[i].nr_elements;
   52.60 +                break;
   52.61 +            }
   52.62 +
   52.63 +            if ( perfc_d[i].nr_vals > ARRAY_SIZE(perfc_d[i].vals) )
   52.64 +                perfc_d[i].nr_vals = ARRAY_SIZE(perfc_d[i].vals);
   52.65 +        }
   52.66 +
   52.67 +        perfc_init = 1;
   52.68 +    }
   52.69 +
   52.70 +    /* We gather the counts together every time. */
   52.71 +    for ( i = 0; i < NR_PERFCTRS; i++ )
   52.72 +    {
   52.73 +        switch ( perfc_info[i].type )
   52.74 +        {
   52.75 +        case TYPE_SINGLE:
   52.76 +        case TYPE_S_SINGLE:
   52.77 +            perfc_d[i].vals[0] = atomic_read(&counters[0]);
   52.78 +            counters += 1;
   52.79 +            break;
   52.80 +        case TYPE_CPU:
   52.81 +        case TYPE_S_CPU:
   52.82 +            for ( j = 0; j < perfc_d[i].nr_vals; j++ )
   52.83 +                perfc_d[i].vals[j] = atomic_read(&counters[j]);
   52.84 +            counters += NR_CPUS;
   52.85 +            break;
   52.86 +        case TYPE_ARRAY:
   52.87 +        case TYPE_S_ARRAY:
   52.88 +            for ( j = 0; j < perfc_d[i].nr_vals; j++ )
   52.89 +                perfc_d[i].vals[j] = atomic_read(&counters[j]);
   52.90 +            counters += perfc_info[i].nr_elements;
   52.91 +            break;
   52.92 +        }
   52.93 +    }
   52.94 +
   52.95 +    return (copy_to_user(desc, perfc_d, NR_PERFCTRS * sizeof(*desc)) ?
   52.96 +            -EFAULT : 0);
   52.97 +}
   52.98 +
   52.99 +/* Dom0 control of perf counters */
  52.100 +int perfc_control(dom0_perfccontrol_t *pc)
  52.101 +{
  52.102 +    static spinlock_t lock = SPIN_LOCK_UNLOCKED;
  52.103 +    u32 op = pc->op;
  52.104 +    int rc;
  52.105 +
  52.106 +    pc->nr_counters = NR_PERFCTRS;
  52.107 +
  52.108 +    spin_lock(&lock);
  52.109 +
  52.110 +    switch ( op )
  52.111 +    {
  52.112 +    case DOM0_PERFCCONTROL_OP_RESET:
  52.113 +        perfc_copy_info(pc->desc);
  52.114 +        perfc_reset(0);
  52.115 +        rc = 0;
  52.116 +        break;
  52.117 +
  52.118 +    case DOM0_PERFCCONTROL_OP_QUERY:
  52.119 +        perfc_copy_info(pc->desc);
  52.120 +        rc = 0;
  52.121 +        break;
  52.122 +
  52.123 +    default:
  52.124 +        rc = -EINVAL;
  52.125 +        break;
  52.126 +    }
  52.127 +
  52.128 +    spin_unlock(&lock);
  52.129 +
  52.130 +    return rc;
  52.131 +}
    53.1 --- a/xen/include/public/dom0_ops.h	Mon Nov 29 17:11:03 2004 +0000
    53.2 +++ b/xen/include/public/dom0_ops.h	Fri Dec 10 18:49:15 2004 +0000
    53.3 @@ -386,6 +386,25 @@ typedef struct {
    53.4      u32      __pad1;
    53.5  } PACKED dom0_read_memtype_t; /* 32 bytes */
    53.6  
    53.7 +/* Interface for controlling Xen software performance counters. */
    53.8 +#define DOM0_PERFCCONTROL        34
    53.9 +/* Sub-operations: */
   53.10 +#define DOM0_PERFCCONTROL_OP_RESET 1   /* Reset all counters to zero. */
   53.11 +#define DOM0_PERFCCONTROL_OP_QUERY 2   /* Get perfctr information. */
   53.12 +typedef struct {
   53.13 +    u8      name[80];               /*  0: name of perf counter */
   53.14 +    u32     nr_vals;                /* 80: number of values for this counter */
   53.15 +    u32     vals[64];               /* 84: array of values */
   53.16 +} PACKED dom0_perfc_desc_t; /* 340 bytes */
   53.17 +typedef struct {
   53.18 +    /* IN variables. */
   53.19 +    u32            op;                /*  0: DOM0_PERFCCONTROL_OP_??? */
   53.20 +    /* OUT variables. */
   53.21 +    u32            nr_counters;       /*  4: number of counters */
   53.22 +    dom0_perfc_desc_t *desc;          /*  8: counter information (or NULL) */
   53.23 +    MEMORY_PADDING;
   53.24 +} PACKED dom0_perfccontrol_t; /* 16 bytes */
   53.25 +
   53.26  typedef struct {
   53.27      u32 cmd;                          /* 0 */
   53.28      u32 interface_version;            /* 4 */ /* DOM0_INTERFACE_VERSION */
   53.29 @@ -419,6 +438,7 @@ typedef struct {
   53.30          dom0_add_memtype_t       add_memtype;
   53.31          dom0_del_memtype_t       del_memtype;
   53.32          dom0_read_memtype_t      read_memtype;
   53.33 +        dom0_perfccontrol_t      perfccontrol;
   53.34      } PACKED u;
   53.35  } PACKED dom0_op_t; /* 80 bytes */
   53.36  
    54.1 --- a/xen/include/public/io/domain_controller.h	Mon Nov 29 17:11:03 2004 +0000
    54.2 +++ b/xen/include/public/io/domain_controller.h	Fri Dec 10 18:49:15 2004 +0000
    54.3 @@ -555,6 +555,7 @@ typedef struct {
    54.4  #define CMSG_SHUTDOWN_REBOOT    1   /* Clean shutdown (SHUTDOWN_reboot).     */
    54.5  #define CMSG_SHUTDOWN_SUSPEND   2   /* Create suspend info, then             */
    54.6                                      /* SHUTDOWN_suspend.                     */
    54.7 +#define CMSG_SHUTDOWN_SYSRQ     3
    54.8  
    54.9  
   54.10  /******************************************************************************
    55.1 --- a/xen/include/public/xen.h	Mon Nov 29 17:11:03 2004 +0000
    55.2 +++ b/xen/include/public/xen.h	Fri Dec 10 18:49:15 2004 +0000
    55.3 @@ -9,8 +9,10 @@
    55.4  #ifndef __XEN_PUBLIC_XEN_H__
    55.5  #define __XEN_PUBLIC_XEN_H__
    55.6  
    55.7 +#ifndef PACKED
    55.8  /* GCC-specific way to pack structure definitions (no implicit padding). */
    55.9  #define PACKED __attribute__ ((packed))
   55.10 +#endif
   55.11  
   55.12  #if defined(__i386__)
   55.13  #include "arch-x86_32.h"