ia64/xen-unstable
changeset 3251:a169836882cb
bitkeeper revision 1.1159.170.59 (41b4c2fdJ2gj_BWy27Vj3ptayZp_yg)
sync w/ head.
sync w/ head.
line diff
1.1 --- a/.rootkeys Mon Dec 06 20:03:12 2004 +0000 1.2 +++ b/.rootkeys Mon Dec 06 20:37:17 2004 +0000 1.3 @@ -253,6 +253,8 @@ 412f46c0LJuKAgSPGoC0Z1DEkLfuLA linux-2.6 1.4 410a94a4KT6I6X0LVc7djB39tRDp4g linux-2.6.10-rc2-xen-sparse/mm/page_alloc.c 1.5 41505c572m-s9ATiO1LiD1GPznTTIg linux-2.6.10-rc2-xen-sparse/net/core/skbuff.c 1.6 4149ec79wMpIHdvbntxqVGLRZZjPxw linux-2.6.10-rc2-xen-sparse/net/ipv4/raw.c 1.7 +41ab6fa06JdF7jxUsuDcjN3UhuIAxg linux-2.6.9-xen-sparse/arch/xen/kernel/devmem.c 1.8 +41af4017PDMuSmMWtSRU5UC9Vylw5g linux-2.6.9-xen-sparse/include/asm-xen/balloon.h 1.9 413cb1e4zst25MDYjg63Y-NGC5_pLg netbsd-2.0-xen-sparse/Makefile 1.10 413cb1e5c_Mkxf_X0zimEhTKI_l4DA netbsd-2.0-xen-sparse/mkbuildtree 1.11 413cb1e5kY_Zil7-b0kI6hvCIxBEYg netbsd-2.0-xen-sparse/nbconfig-xen 1.12 @@ -378,6 +380,7 @@ 3f5ef5a2ir1kVAthS14Dc5QIRCEFWg tools/mis 1.13 3f5ef5a2dTZP0nnsFoeq2jRf3mWDDg tools/misc/xen-clone.README 1.14 405eedf6_nnNhFQ1I85lhCkLK6jFGA tools/misc/xencons 1.15 40c9c4697z76HDfkCLdMhmaEwzFoNQ tools/misc/xend 1.16 +41adc641dV-0cDLSyzMs5BT8nL7v3Q tools/misc/xenperf.c 1.17 4107986eMWVdBoz4tXYoOscpN_BCYg tools/misc/xensv 1.18 4056f5155QYZdsk-1fLdjsZPFTnlhg tools/misc/xensymoops 1.19 40cf2937dqM1jWW87O5OoOYND8leuA tools/misc/xm
2.1 --- a/BitKeeper/etc/ignore Mon Dec 06 20:03:12 2004 +0000 2.2 +++ b/BitKeeper/etc/ignore Mon Dec 06 20:37:17 2004 +0000 2.3 @@ -58,7 +58,7 @@ tools/balloon/balloon 2.4 tools/check/.* 2.5 tools/libxc/xen/* 2.6 tools/misc/miniterm/miniterm 2.7 -tools/misc/xen_cpuperf 2.8 +tools/misc/xenperf 2.9 tools/vnet/gc 2.10 tools/vnet/gc*/* 2.11 tools/vnet/vnet-module/.tmp_versions/*
3.1 --- a/Makefile Mon Dec 06 20:03:12 2004 +0000 3.2 +++ b/Makefile Mon Dec 06 20:37:17 2004 +0000 3.3 @@ -19,10 +19,30 @@ export INSTALL_DIR 3.4 include buildconfigs/Rules.mk 3.5 3.6 .PHONY: all dist install xen tools kernels docs world clean mkpatches mrproper 3.7 -.PHONY: kbuild kdelete kclean 3.8 +.PHONY: kbuild kdelete kclean install-tools install-xen install-docs 3.9 +.PHONY: install-kernels 3.10 3.11 all: dist 3.12 3.13 +# install everything into the standard system directories 3.14 +# NB: install explicitly does not check that everything is up to date! 3.15 +install: install-tools install-xen install-kernels install-docs 3.16 + 3.17 +install-xen: 3.18 + $(MAKE) -C xen install 3.19 + 3.20 +install-tools: 3.21 + $(MAKE) -C tools install 3.22 + 3.23 +install-kernels: 3.24 + $(shell cp -a $(INSTALL_DIR)/boot/* /boot/) 3.25 + $(shell cp -a $(INSTALL_DIR)/lib/modules/* /lib/modules/) 3.26 + $(shell cp -dR $(INSTALL_DIR)/boot/*$(LINUX_VER)* $(prefix)/boot/) 3.27 + $(shell cp -dR $(INSTALL_DIR)/lib/modules/* $(prefix)/lib/modules/) 3.28 + 3.29 +install-docs: 3.30 + sh ./docs/check_pkgs && $(MAKE) -C docs install || true 3.31 + 3.32 # build and install everything into local dist directory 3.33 dist: xen tools kernels docs 3.34 install -m0644 ./COPYING $(DIST_DIR) 3.35 @@ -31,17 +51,6 @@ dist: xen tools kernels docs 3.36 mkdir -p $(DIST_DIR)/check 3.37 install -m0755 tools/check/chk tools/check/check_* $(DIST_DIR)/check 3.38 3.39 -# install everything into the standard system directories 3.40 -# NB: install explicitly does not check that everything is up to date! 3.41 -install: 3.42 - $(MAKE) -C xen install 3.43 - $(MAKE) -C tools install 3.44 - $(shell cp -a $(INSTALL_DIR)/boot/* /boot/) 3.45 - $(shell cp -a $(INSTALL_DIR)/lib/modules/* /lib/modules/) 3.46 - sh ./docs/check_pkgs && $(MAKE) -C docs install || true 3.47 - $(shell cp -dR $(INSTALL_DIR)/boot/*$(LINUX_VER)* $(prefix)/boot/) 3.48 - $(shell cp -dR $(INSTALL_DIR)/lib/modules/* $(prefix)/lib/modules/) 3.49 - 3.50 xen: 3.51 $(MAKE) prefix=$(INSTALL_DIR) dist=yes -C xen install 3.52
4.1 --- a/linux-2.4.28-xen-sparse/arch/xen/drivers/balloon/Makefile Mon Dec 06 20:03:12 2004 +0000 4.2 +++ b/linux-2.4.28-xen-sparse/arch/xen/drivers/balloon/Makefile Mon Dec 06 20:37:17 2004 +0000 4.3 @@ -1,3 +1,4 @@ 4.4 O_TARGET := drv.o 4.5 +export-objs := balloon.o 4.6 obj-y := balloon.o 4.7 include $(TOPDIR)/Rules.make
5.1 --- a/linux-2.4.28-xen-sparse/arch/xen/kernel/Makefile Mon Dec 06 20:03:12 2004 +0000 5.2 +++ b/linux-2.4.28-xen-sparse/arch/xen/kernel/Makefile Mon Dec 06 20:37:17 2004 +0000 5.3 @@ -6,7 +6,7 @@ all: kernel.o head.o init_task.o 5.4 5.5 O_TARGET := kernel.o 5.6 5.7 -export-objs := i386_ksyms.o gnttab.o skbuff.o 5.8 +export-objs := i386_ksyms.o gnttab.o skbuff.o ctrl_if.o 5.9 5.10 obj-y := process.o semaphore.o signal.o entry.o traps.o irq.o \ 5.11 ptrace.o ioport.o ldt.o setup.o time.o sys_i386.o \
6.1 --- a/linux-2.4.28-xen-sparse/arch/xen/kernel/setup.c Mon Dec 06 20:03:12 2004 +0000 6.2 +++ b/linux-2.4.28-xen-sparse/arch/xen/kernel/setup.c Mon Dec 06 20:37:17 2004 +0000 6.3 @@ -275,7 +275,8 @@ void __init setup_arch(char **cmdline_p) 6.4 * arch/xen/drivers/balloon/balloon.c 6.5 */ 6.6 mem_param = parse_mem_cmdline(cmdline_p); 6.7 - if (!mem_param) mem_param = xen_start_info.nr_pages; 6.8 + if (mem_param < xen_start_info.nr_pages) 6.9 + mem_param = xen_start_info.nr_pages; 6.10 6.11 #define PFN_UP(x) (((x) + PAGE_SIZE-1) >> PAGE_SHIFT) 6.12 #define PFN_DOWN(x) ((x) >> PAGE_SHIFT) 6.13 @@ -303,6 +304,7 @@ void __init setup_arch(char **cmdline_p) 6.14 printk(KERN_WARNING "Use a PAE enabled kernel.\n"); 6.15 else 6.16 printk(KERN_WARNING "Use a HIGHMEM enabled kernel.\n"); 6.17 + max_pfn = lmax_low_pfn; 6.18 #else /* !CONFIG_HIGHMEM */ 6.19 #ifndef CONFIG_X86_PAE 6.20 if (max_pfn > MAX_NONPAE_PFN) { 6.21 @@ -350,8 +352,6 @@ void __init setup_arch(char **cmdline_p) 6.22 */ 6.23 max_low_pfn = lmax_low_pfn; 6.24 6.25 - 6.26 - 6.27 #ifdef CONFIG_BLK_DEV_INITRD 6.28 if ( xen_start_info.mod_start != 0 ) 6.29 { 6.30 @@ -375,6 +375,20 @@ void __init setup_arch(char **cmdline_p) 6.31 6.32 paging_init(); 6.33 6.34 + /* Make sure we have a large enough P->M table. */ 6.35 + if ( max_pfn > xen_start_info.nr_pages ) 6.36 + { 6.37 + phys_to_machine_mapping = alloc_bootmem_low_pages( 6.38 + max_pfn * sizeof(unsigned long)); 6.39 + memset(phys_to_machine_mapping, ~0, max_pfn * sizeof(unsigned long)); 6.40 + memcpy(phys_to_machine_mapping, 6.41 + (unsigned long *)xen_start_info.mfn_list, 6.42 + xen_start_info.nr_pages * sizeof(unsigned long)); 6.43 + free_bootmem(__pa(xen_start_info.mfn_list), 6.44 + PFN_PHYS(PFN_UP(xen_start_info.nr_pages * 6.45 + sizeof(unsigned long)))); 6.46 + } 6.47 + 6.48 pfn_to_mfn_frame_list = alloc_bootmem_low_pages(PAGE_SIZE); 6.49 for ( i=0, j=0; i < max_pfn; i+=(PAGE_SIZE/sizeof(unsigned long)), j++ ) 6.50 {
7.1 --- a/linux-2.4.28-xen-sparse/arch/xen/mm/init.c Mon Dec 06 20:03:12 2004 +0000 7.2 +++ b/linux-2.4.28-xen-sparse/arch/xen/mm/init.c Mon Dec 06 20:37:17 2004 +0000 7.3 @@ -213,23 +213,16 @@ static void __init fixrange_init (unsign 7.4 7.5 static void __init pagetable_init (void) 7.6 { 7.7 - unsigned long vaddr, end; 7.8 + unsigned long vaddr, end, ram_end; 7.9 pgd_t *kpgd, *pgd, *pgd_base; 7.10 int i, j, k; 7.11 pmd_t *kpmd, *pmd; 7.12 pte_t *kpte, *pte, *pte_base; 7.13 7.14 - /* create tables only for boot_pfn frames. max_low_pfn may be sized for 7.15 - * pages yet to be allocated from the hypervisor, or it may be set 7.16 - * to override the xen_start_info amount of memory 7.17 - */ 7.18 - int boot_pfn = min(xen_start_info.nr_pages,max_low_pfn); 7.19 - 7.20 - /* 7.21 - * This can be zero as well - no problem, in that case we exit 7.22 - * the loops anyway due to the PTRS_PER_* conditions. 7.23 - */ 7.24 - end = (unsigned long)__va(boot_pfn *PAGE_SIZE); 7.25 + end = (unsigned long)__va(max_low_pfn * PAGE_SIZE); 7.26 + ram_end = (unsigned long)__va(xen_start_info.nr_pages * PAGE_SIZE); 7.27 + if ( ram_end > end ) 7.28 + ram_end = end; 7.29 7.30 pgd_base = init_mm.pgd; 7.31 i = __pgd_offset(PAGE_OFFSET); 7.32 @@ -237,12 +230,12 @@ static void __init pagetable_init (void) 7.33 7.34 for (; i < PTRS_PER_PGD; pgd++, i++) { 7.35 vaddr = i*PGDIR_SIZE; 7.36 - if (end && (vaddr >= end)) 7.37 + if (vaddr >= end) 7.38 break; 7.39 pmd = (pmd_t *)pgd; 7.40 for (j = 0; j < PTRS_PER_PMD; pmd++, j++) { 7.41 vaddr = i*PGDIR_SIZE + j*PMD_SIZE; 7.42 - if (end && (vaddr >= end)) 7.43 + if (vaddr >= end) 7.44 break; 7.45 7.46 /* Filled in for us already? */ 7.47 @@ -250,10 +243,11 @@ static void __init pagetable_init (void) 7.48 continue; 7.49 7.50 pte_base = pte = (pte_t *) alloc_bootmem_low_pages(PAGE_SIZE); 7.51 + clear_page(pte_base); 7.52 7.53 for (k = 0; k < PTRS_PER_PTE; pte++, k++) { 7.54 vaddr = i*PGDIR_SIZE + j*PMD_SIZE + k*PAGE_SIZE; 7.55 - if (end && (vaddr >= end)) 7.56 + if (vaddr >= ram_end) 7.57 break; 7.58 *pte = mk_pte_phys(__pa(vaddr), PAGE_KERNEL); 7.59 } 7.60 @@ -329,28 +323,14 @@ static inline int page_is_ram (unsigned 7.61 return 1; 7.62 } 7.63 7.64 -static inline int page_kills_ppro(unsigned long pagenr) 7.65 -{ 7.66 - return 0; 7.67 -} 7.68 - 7.69 #ifdef CONFIG_HIGHMEM 7.70 -void __init one_highpage_init(struct page *page, int pfn, int bad_ppro) 7.71 +void __init one_highpage_init(struct page *page, int free_page) 7.72 { 7.73 - if (!page_is_ram(pfn)) { 7.74 - SetPageReserved(page); 7.75 - return; 7.76 - } 7.77 - 7.78 - if (bad_ppro && page_kills_ppro(pfn)) { 7.79 - SetPageReserved(page); 7.80 - return; 7.81 - } 7.82 - 7.83 ClearPageReserved(page); 7.84 set_bit(PG_highmem, &page->flags); 7.85 atomic_set(&page->count, 1); 7.86 - __free_page(page); 7.87 + if ( free_page ) 7.88 + __free_page(page); 7.89 totalhigh_pages++; 7.90 } 7.91 #endif /* CONFIG_HIGHMEM */ 7.92 @@ -392,8 +372,9 @@ static int __init free_pages_init(void) 7.93 reservedpages++; 7.94 } 7.95 #ifdef CONFIG_HIGHMEM 7.96 - for (pfn = xen_start_info.nr_pages-1; pfn >= highstart_pfn; pfn--) 7.97 - one_highpage_init((struct page *) (mem_map + pfn), pfn, bad_ppro); 7.98 + for (pfn = highend_pfn-1; pfn >= highstart_pfn; pfn--) 7.99 + one_highpage_init((struct page *) (mem_map + pfn), pfn, 7.100 + (pfn < xen_start_info.nr_pages)); 7.101 totalram_pages += totalhigh_pages; 7.102 #endif 7.103 return reservedpages;
8.1 --- a/linux-2.4.28-xen-sparse/mkbuildtree Mon Dec 06 20:03:12 2004 +0000 8.2 +++ b/linux-2.4.28-xen-sparse/mkbuildtree Mon Dec 06 20:37:17 2004 +0000 8.3 @@ -204,6 +204,7 @@ ln -sf ../asm-i386/unaligned.h 8.4 ln -sf ../asm-i386/unistd.h 8.5 ln -sf ../asm-i386/user.h 8.6 ln -sf ../asm-i386/vm86.h 8.7 +ln -sf ../../${LINUX_26}/include/asm-xen/balloon.h 8.8 ln -sf ../../${LINUX_26}/include/asm-xen/ctrl_if.h 8.9 ln -sf ../../${LINUX_26}/include/asm-xen/evtchn.h 8.10 ln -sf ../../${LINUX_26}/include/asm-xen/gnttab.h
9.1 --- a/linux-2.6.10-rc2-xen-sparse/arch/xen/configs/xen0_defconfig Mon Dec 06 20:03:12 2004 +0000 9.2 +++ b/linux-2.6.10-rc2-xen-sparse/arch/xen/configs/xen0_defconfig Mon Dec 06 20:37:17 2004 +0000 9.3 @@ -1,7 +1,7 @@ 9.4 # 9.5 # Automatically generated make config: don't edit 9.6 # Linux kernel version: 2.6.10-rc2-xen0 9.7 -# Fri Nov 19 20:16:38 2004 9.8 +# Wed Dec 1 09:22:49 2004 9.9 # 9.10 CONFIG_XEN=y 9.11 CONFIG_ARCH_XEN=y 9.12 @@ -152,10 +152,10 @@ CONFIG_DEBUG_KERNEL=y 9.13 CONFIG_EARLY_PRINTK=y 9.14 # CONFIG_DEBUG_STACKOVERFLOW is not set 9.15 # CONFIG_DEBUG_STACK_USAGE is not set 9.16 -# CONFIG_DEBUG_SLAB is not set 9.17 +CONFIG_DEBUG_SLAB=y 9.18 CONFIG_MAGIC_SYSRQ=y 9.19 # CONFIG_DEBUG_SPINLOCK is not set 9.20 -# CONFIG_DEBUG_PAGEALLOC is not set 9.21 +CONFIG_DEBUG_PAGEALLOC=y 9.22 # CONFIG_DEBUG_INFO is not set 9.23 # CONFIG_DEBUG_SPINLOCK_SLEEP is not set 9.24 # CONFIG_FRAME_POINTER is not set
10.1 --- a/linux-2.6.10-rc2-xen-sparse/arch/xen/configs/xenU_defconfig Mon Dec 06 20:03:12 2004 +0000 10.2 +++ b/linux-2.6.10-rc2-xen-sparse/arch/xen/configs/xenU_defconfig Mon Dec 06 20:37:17 2004 +0000 10.3 @@ -1,7 +1,7 @@ 10.4 # 10.5 # Automatically generated make config: don't edit 10.6 # Linux kernel version: 2.6.10-rc2-xenU 10.7 -# Fri Nov 19 20:16:52 2004 10.8 +# Wed Dec 1 09:22:09 2004 10.9 # 10.10 CONFIG_XEN=y 10.11 CONFIG_ARCH_XEN=y 10.12 @@ -47,6 +47,7 @@ CONFIG_KOBJECT_UEVENT=y 10.13 # CONFIG_IKCONFIG is not set 10.14 # CONFIG_EMBEDDED is not set 10.15 CONFIG_KALLSYMS=y 10.16 +# CONFIG_KALLSYMS_ALL is not set 10.17 # CONFIG_KALLSYMS_EXTRA_PASS is not set 10.18 CONFIG_FUTEX=y 10.19 CONFIG_EPOLL=y 10.20 @@ -118,8 +119,15 @@ CONFIG_HAVE_DEC_LOCK=y 10.21 # 10.22 # Kernel hacking 10.23 # 10.24 -# CONFIG_DEBUG_KERNEL is not set 10.25 +CONFIG_DEBUG_KERNEL=y 10.26 CONFIG_EARLY_PRINTK=y 10.27 +# CONFIG_DEBUG_STACKOVERFLOW is not set 10.28 +# CONFIG_DEBUG_STACK_USAGE is not set 10.29 +CONFIG_DEBUG_SLAB=y 10.30 +# CONFIG_MAGIC_SYSRQ is not set 10.31 +# CONFIG_DEBUG_SPINLOCK is not set 10.32 +CONFIG_DEBUG_PAGEALLOC=y 10.33 +# CONFIG_DEBUG_INFO is not set 10.34 # CONFIG_DEBUG_SPINLOCK_SLEEP is not set 10.35 # CONFIG_FRAME_POINTER is not set 10.36 # CONFIG_4KSTACKS is not set 10.37 @@ -145,6 +153,7 @@ CONFIG_BINFMT_ELF=y 10.38 CONFIG_STANDALONE=y 10.39 CONFIG_PREVENT_FIRMWARE_BUILD=y 10.40 # CONFIG_FW_LOADER is not set 10.41 +# CONFIG_DEBUG_DRIVER is not set 10.42 10.43 # 10.44 # Block devices
11.1 --- a/linux-2.6.10-rc2-xen-sparse/arch/xen/i386/kernel/cpu/common.c Mon Dec 06 20:03:12 2004 +0000 11.2 +++ b/linux-2.6.10-rc2-xen-sparse/arch/xen/i386/kernel/cpu/common.c Mon Dec 06 20:37:17 2004 +0000 11.3 @@ -513,7 +513,7 @@ void __init cpu_gdt_init(struct Xgt_desc 11.4 va < gdt_descr->address + gdt_descr->size; 11.5 va += PAGE_SIZE, f++) { 11.6 frames[f] = virt_to_machine(va) >> PAGE_SHIFT; 11.7 - protect_page(swapper_pg_dir, (void *)va, PROT_ON); 11.8 + make_page_readonly((void *)va); 11.9 } 11.10 flush_page_update_queue(); 11.11 if (HYPERVISOR_set_gdt(frames, gdt_descr->size / 8))
12.1 --- a/linux-2.6.10-rc2-xen-sparse/arch/xen/i386/kernel/pci-dma.c Mon Dec 06 20:03:12 2004 +0000 12.2 +++ b/linux-2.6.10-rc2-xen-sparse/arch/xen/i386/kernel/pci-dma.c Mon Dec 06 20:37:17 2004 +0000 12.3 @@ -13,6 +13,7 @@ 12.4 #include <linux/pci.h> 12.5 #include <linux/version.h> 12.6 #include <asm/io.h> 12.7 +#include <asm-xen/balloon.h> 12.8 12.9 #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0) 12.10 #define pte_offset_kernel pte_offset 12.11 @@ -37,9 +38,12 @@ xen_contig_memory(unsigned long vstart, 12.12 pgd_t *pgd; 12.13 pmd_t *pmd; 12.14 pte_t *pte; 12.15 - unsigned long pfn, i; 12.16 + unsigned long pfn, i, flags; 12.17 12.18 scrub_pages(vstart, 1 << order); 12.19 + 12.20 + balloon_lock(flags); 12.21 + 12.22 /* 1. Zap current PTEs, giving away the underlying pages. */ 12.23 for (i = 0; i < (1<<order); i++) { 12.24 pgd = pgd_offset_k( (vstart + (i*PAGE_SIZE))); 12.25 @@ -70,6 +74,8 @@ xen_contig_memory(unsigned long vstart, 12.26 } 12.27 /* Flush updates through and flush the TLB. */ 12.28 xen_tlb_flush(); 12.29 + 12.30 + balloon_unlock(flags); 12.31 } 12.32 12.33 #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0)
13.1 --- a/linux-2.6.10-rc2-xen-sparse/arch/xen/i386/kernel/setup.c Mon Dec 06 20:03:12 2004 +0000 13.2 +++ b/linux-2.6.10-rc2-xen-sparse/arch/xen/i386/kernel/setup.c Mon Dec 06 20:37:17 2004 +0000 13.3 @@ -52,6 +52,9 @@ 13.4 #include "setup_arch_pre.h" 13.5 #include <bios_ebda.h> 13.6 13.7 +/* Allows setting of maximum possible memory size */ 13.8 +static unsigned long xen_override_max_pfn; 13.9 + 13.10 int disable_pse __initdata = 0; 13.11 13.12 /* 13.13 @@ -718,8 +721,13 @@ static void __init parse_cmdline_early ( 13.14 unsigned long long mem_size; 13.15 13.16 mem_size = memparse(from+4, &from); 13.17 +#if 0 13.18 limit_regions(mem_size); 13.19 userdef=1; 13.20 +#else 13.21 + xen_override_max_pfn = 13.22 + (unsigned long)(mem_size>>PAGE_SHIFT); 13.23 +#endif 13.24 } 13.25 } 13.26 13.27 @@ -857,6 +865,7 @@ static void __init parse_cmdline_early ( 13.28 } 13.29 } 13.30 13.31 +#if 0 /* !XEN */ 13.32 /* 13.33 * Callback for efi_memory_walk. 13.34 */ 13.35 @@ -873,7 +882,6 @@ efi_find_max_pfn(unsigned long start, un 13.36 return 0; 13.37 } 13.38 13.39 - 13.40 /* 13.41 * Find the highest page frame number we have available 13.42 */ 13.43 @@ -900,6 +908,15 @@ void __init find_max_pfn(void) 13.44 max_pfn = end; 13.45 } 13.46 } 13.47 +#else 13.48 +/* We don't use the fake e820 because we need to respond to user override. */ 13.49 +void __init find_max_pfn(void) 13.50 +{ 13.51 + if ( xen_override_max_pfn < xen_start_info.nr_pages ) 13.52 + xen_override_max_pfn = xen_start_info.nr_pages; 13.53 + max_pfn = xen_override_max_pfn; 13.54 +} 13.55 +#endif /* XEN */ 13.56 13.57 /* 13.58 * Determine low and high memory ranges: 13.59 @@ -1414,6 +1431,21 @@ void __init setup_arch(char **cmdline_p) 13.60 #endif 13.61 paging_init(); 13.62 13.63 + /* Make sure we have a large enough P->M table. */ 13.64 + if (max_pfn > xen_start_info.nr_pages) { 13.65 + phys_to_machine_mapping = alloc_bootmem_low_pages( 13.66 + max_pfn * sizeof(unsigned long)); 13.67 + memset(phys_to_machine_mapping, ~0, 13.68 + max_pfn * sizeof(unsigned long)); 13.69 + memcpy(phys_to_machine_mapping, 13.70 + (unsigned long *)xen_start_info.mfn_list, 13.71 + xen_start_info.nr_pages * sizeof(unsigned long)); 13.72 + free_bootmem( 13.73 + __pa(xen_start_info.mfn_list), 13.74 + PFN_PHYS(PFN_UP(xen_start_info.nr_pages * 13.75 + sizeof(unsigned long)))); 13.76 + } 13.77 + 13.78 pfn_to_mfn_frame_list = alloc_bootmem_low_pages(PAGE_SIZE); 13.79 for ( i=0, j=0; i < max_pfn; i+=(PAGE_SIZE/sizeof(unsigned long)), j++ ) 13.80 {
14.1 --- a/linux-2.6.10-rc2-xen-sparse/arch/xen/i386/mm/hypervisor.c Mon Dec 06 20:03:12 2004 +0000 14.2 +++ b/linux-2.6.10-rc2-xen-sparse/arch/xen/i386/mm/hypervisor.c Mon Dec 06 20:37:17 2004 +0000 14.3 @@ -35,6 +35,7 @@ 14.4 #include <asm/pgtable.h> 14.5 #include <asm-xen/hypervisor.h> 14.6 #include <asm-xen/multicall.h> 14.7 +#include <asm-xen/balloon.h> 14.8 14.9 /* 14.10 * This suffices to protect us if we ever move to SMP domains. 14.11 @@ -352,7 +353,6 @@ unsigned long allocate_empty_lowmem_regi 14.12 unsigned long *pfn_array; 14.13 unsigned long vstart; 14.14 unsigned long i; 14.15 - int ret; 14.16 unsigned int order = get_order(pages*PAGE_SIZE); 14.17 14.18 vstart = __get_free_pages(GFP_KERNEL, order); 14.19 @@ -378,57 +378,11 @@ unsigned long allocate_empty_lowmem_regi 14.20 /* Flush updates through and flush the TLB. */ 14.21 xen_tlb_flush(); 14.22 14.23 - ret = HYPERVISOR_dom_mem_op(MEMOP_decrease_reservation, 14.24 - pfn_array, 1<<order, 0); 14.25 - if ( unlikely(ret != (1<<order)) ) 14.26 - { 14.27 - printk(KERN_WARNING "Unable to reduce memory reservation (%d)\n", ret); 14.28 - BUG(); 14.29 - } 14.30 + balloon_put_pages(pfn_array, 1 << order); 14.31 14.32 vfree(pfn_array); 14.33 14.34 return vstart; 14.35 } 14.36 14.37 -void deallocate_lowmem_region(unsigned long vstart, unsigned long pages) 14.38 -{ 14.39 - pgd_t *pgd; 14.40 - pmd_t *pmd; 14.41 - pte_t *pte; 14.42 - unsigned long *pfn_array; 14.43 - unsigned long i; 14.44 - int ret; 14.45 - unsigned int order = get_order(pages*PAGE_SIZE); 14.46 - 14.47 - pfn_array = vmalloc((1<<order) * sizeof(*pfn_array)); 14.48 - if ( pfn_array == NULL ) 14.49 - BUG(); 14.50 - 14.51 - ret = HYPERVISOR_dom_mem_op(MEMOP_increase_reservation, 14.52 - pfn_array, 1<<order, 0); 14.53 - if ( unlikely(ret != (1<<order)) ) 14.54 - { 14.55 - printk(KERN_WARNING "Unable to increase memory reservation (%d)\n", 14.56 - ret); 14.57 - BUG(); 14.58 - } 14.59 - 14.60 - for ( i = 0; i < (1<<order); i++ ) 14.61 - { 14.62 - pgd = pgd_offset_k( (vstart + (i*PAGE_SIZE))); 14.63 - pmd = pmd_offset(pgd, (vstart + (i*PAGE_SIZE))); 14.64 - pte = pte_offset_kernel(pmd, (vstart + (i*PAGE_SIZE))); 14.65 - queue_l1_entry_update(pte, (pfn_array[i]<<PAGE_SHIFT)|__PAGE_KERNEL); 14.66 - queue_machphys_update(pfn_array[i], __pa(vstart)>>PAGE_SHIFT); 14.67 - phys_to_machine_mapping[__pa(vstart)>>PAGE_SHIFT] = pfn_array[i]; 14.68 - } 14.69 - 14.70 - flush_page_update_queue(); 14.71 - 14.72 - vfree(pfn_array); 14.73 - 14.74 - free_pages(vstart, order); 14.75 -} 14.76 - 14.77 #endif /* CONFIG_XEN_PHYSDEV_ACCESS */
15.1 --- a/linux-2.6.10-rc2-xen-sparse/arch/xen/i386/mm/init.c Mon Dec 06 20:03:12 2004 +0000 15.2 +++ b/linux-2.6.10-rc2-xen-sparse/arch/xen/i386/mm/init.c Mon Dec 06 20:37:17 2004 +0000 15.3 @@ -77,6 +77,7 @@ static pte_t * __init one_page_table_ini 15.4 { 15.5 if (pmd_none(*pmd)) { 15.6 pte_t *page_table = (pte_t *) alloc_bootmem_low_pages(PAGE_SIZE); 15.7 + make_page_readonly(page_table); 15.8 set_pmd(pmd, __pmd(__pa(page_table) | _PAGE_TABLE)); 15.9 if (page_table != pte_offset_kernel(pmd, 0)) 15.10 BUG(); 15.11 @@ -125,41 +126,6 @@ static void __init page_table_range_init 15.12 } 15.13 } 15.14 15.15 -void __init protect_page(pgd_t *pgd, void *page, int mode) 15.16 -{ 15.17 - pmd_t *pmd; 15.18 - pte_t *pte; 15.19 - unsigned long addr; 15.20 - 15.21 - addr = (unsigned long)page; 15.22 - pgd += pgd_index(addr); 15.23 - pmd = pmd_offset(pgd, addr); 15.24 - pte = pte_offset_kernel(pmd, addr); 15.25 - if (!pte_present(*pte)) 15.26 - return; 15.27 - queue_l1_entry_update(pte, mode ? pte_val_ma(*pte) & ~_PAGE_RW : 15.28 - pte_val_ma(*pte) | _PAGE_RW); 15.29 -} 15.30 - 15.31 -void __init protect_pagetable(pgd_t *dpgd, pgd_t *spgd, int mode) 15.32 -{ 15.33 - pmd_t *pmd; 15.34 - pte_t *pte; 15.35 - int pgd_idx, pmd_idx; 15.36 - 15.37 - protect_page(dpgd, spgd, mode); 15.38 - 15.39 - for (pgd_idx = 0; pgd_idx < PTRS_PER_PGD_NO_HV; spgd++, pgd_idx++) { 15.40 - pmd = pmd_offset(spgd, 0); 15.41 - if (pmd_none(*pmd)) 15.42 - continue; 15.43 - for (pmd_idx = 0; pmd_idx < PTRS_PER_PMD; pmd++, pmd_idx++) { 15.44 - pte = pte_offset_kernel(pmd, 0); 15.45 - protect_page(dpgd, pte, mode); 15.46 - } 15.47 - } 15.48 -} 15.49 - 15.50 static inline int is_kernel_text(unsigned long addr) 15.51 { 15.52 if (addr >= (unsigned long)_stext && addr <= (unsigned long)__init_end) 15.53 @@ -180,6 +146,10 @@ static void __init kernel_physical_mappi 15.54 pte_t *pte; 15.55 int pgd_idx, pmd_idx, pte_ofs; 15.56 15.57 + unsigned long max_ram_pfn = xen_start_info.nr_pages; 15.58 + if (max_ram_pfn > max_low_pfn) 15.59 + max_ram_pfn = max_low_pfn; 15.60 + 15.61 pgd_idx = pgd_index(PAGE_OFFSET); 15.62 pgd = pgd_base + pgd_idx; 15.63 pfn = 0; 15.64 @@ -207,7 +177,10 @@ static void __init kernel_physical_mappi 15.65 pte = one_page_table_init(pmd); 15.66 15.67 pte += pte_ofs; 15.68 - for (; pte_ofs < PTRS_PER_PTE && pfn < max_low_pfn; pte++, pfn++, pte_ofs++) { 15.69 + /* XEN: Only map initial RAM allocation. */ 15.70 + for (; pte_ofs < PTRS_PER_PTE && pfn < max_ram_pfn; pte++, pfn++, pte_ofs++) { 15.71 + if (pte_present(*pte)) 15.72 + continue; 15.73 if (is_kernel_text(address)) 15.74 set_pte(pte, pfn_pte(pfn, PAGE_KERNEL_EXEC)); 15.75 else 15.76 @@ -311,7 +284,8 @@ void __init one_highpage_init(struct pag 15.77 ClearPageReserved(page); 15.78 set_bit(PG_highmem, &page->flags); 15.79 set_page_count(page, 1); 15.80 - __free_page(page); 15.81 + if (pfn < xen_start_info.nr_pages) 15.82 + __free_page(page); 15.83 totalhigh_pages++; 15.84 } else 15.85 SetPageReserved(page); 15.86 @@ -347,7 +321,8 @@ extern void __init remap_numa_kva(void); 15.87 static void __init pagetable_init (void) 15.88 { 15.89 unsigned long vaddr; 15.90 - pgd_t *pgd_base = swapper_pg_dir; 15.91 + pgd_t *old_pgd = (pgd_t *)xen_start_info.pt_base; 15.92 + pgd_t *new_pgd = swapper_pg_dir; 15.93 15.94 #ifdef CONFIG_X86_PAE 15.95 int i; 15.96 @@ -368,7 +343,22 @@ static void __init pagetable_init (void) 15.97 __PAGE_KERNEL_EXEC |= _PAGE_GLOBAL; 15.98 } 15.99 15.100 - kernel_physical_mapping_init(pgd_base); 15.101 + /* 15.102 + * Switch to proper mm_init page directory. Initialise from the current 15.103 + * page directory, write-protect the new page directory, then switch to 15.104 + * it. We clean up by write-enabling and then freeing the old page dir. 15.105 + */ 15.106 + memcpy(new_pgd, old_pgd, PTRS_PER_PGD_NO_HV*sizeof(pgd_t)); 15.107 + make_page_readonly(new_pgd); 15.108 + queue_pgd_pin(__pa(new_pgd)); 15.109 + load_cr3(new_pgd); 15.110 + queue_pgd_unpin(__pa(old_pgd)); 15.111 + __flush_tlb_all(); /* implicit flush */ 15.112 + make_page_writable(old_pgd); 15.113 + flush_page_update_queue(); 15.114 + free_bootmem(__pa(old_pgd), PAGE_SIZE); 15.115 + 15.116 + kernel_physical_mapping_init(new_pgd); 15.117 remap_numa_kva(); 15.118 15.119 /* 15.120 @@ -376,9 +366,9 @@ static void __init pagetable_init (void) 15.121 * created - mappings will be set by set_fixmap(): 15.122 */ 15.123 vaddr = __fix_to_virt(__end_of_fixed_addresses - 1) & PMD_MASK; 15.124 - page_table_range_init(vaddr, 0, pgd_base); 15.125 + page_table_range_init(vaddr, 0, new_pgd); 15.126 15.127 - permanent_kmaps_init(pgd_base); 15.128 + permanent_kmaps_init(new_pgd); 15.129 15.130 #ifdef CONFIG_X86_PAE 15.131 /* 15.132 @@ -388,7 +378,7 @@ static void __init pagetable_init (void) 15.133 * All user-space mappings are explicitly cleared after 15.134 * SMP startup. 15.135 */ 15.136 - pgd_base[0] = pgd_base[USER_PTRS_PER_PGD]; 15.137 + new_pgd[0] = new_pgd[USER_PTRS_PER_PGD]; 15.138 #endif 15.139 } 15.140 15.141 @@ -545,8 +535,6 @@ out: 15.142 */ 15.143 void __init paging_init(void) 15.144 { 15.145 - pgd_t *old_pgd = (pgd_t *)xen_start_info.pt_base; 15.146 - pgd_t *new_pgd = swapper_pg_dir; 15.147 #ifdef CONFIG_XEN_PHYSDEV_ACCESS 15.148 int i; 15.149 #endif 15.150 @@ -559,25 +547,6 @@ void __init paging_init(void) 15.151 15.152 pagetable_init(); 15.153 15.154 - /* 15.155 - * Write-protect both page tables within both page tables. 15.156 - * That's three ops, as the old p.t. is already protected 15.157 - * within the old p.t. Then pin the new table, switch tables, 15.158 - * and unprotect the old table. 15.159 - */ 15.160 - protect_pagetable(new_pgd, old_pgd, PROT_ON); 15.161 - protect_pagetable(new_pgd, new_pgd, PROT_ON); 15.162 - protect_pagetable(old_pgd, new_pgd, PROT_ON); 15.163 - queue_pgd_pin(__pa(new_pgd)); 15.164 - load_cr3(new_pgd); 15.165 - queue_pgd_unpin(__pa(old_pgd)); 15.166 - __flush_tlb_all(); /* implicit flush */ 15.167 - protect_pagetable(new_pgd, old_pgd, PROT_OFF); 15.168 - flush_page_update_queue(); 15.169 - 15.170 - /* Completely detached from old tables, so free them. */ 15.171 - free_bootmem(__pa(old_pgd), xen_start_info.nr_pt_frames << PAGE_SHIFT); 15.172 - 15.173 #ifdef CONFIG_X86_PAE 15.174 /* 15.175 * We will bail out later - printk doesn't work right now so
16.1 --- a/linux-2.6.10-rc2-xen-sparse/arch/xen/i386/mm/ioremap.c Mon Dec 06 20:03:12 2004 +0000 16.2 +++ b/linux-2.6.10-rc2-xen-sparse/arch/xen/i386/mm/ioremap.c Mon Dec 06 20:37:17 2004 +0000 16.3 @@ -11,16 +11,18 @@ 16.4 #include <linux/vmalloc.h> 16.5 #include <linux/init.h> 16.6 #include <linux/slab.h> 16.7 +#include <linux/module.h> 16.8 #include <asm/io.h> 16.9 #include <asm/fixmap.h> 16.10 #include <asm/cacheflush.h> 16.11 #include <asm/tlbflush.h> 16.12 #include <asm/pgtable.h> 16.13 +#include <asm/pgalloc.h> 16.14 16.15 #ifndef CONFIG_XEN_PHYSDEV_ACCESS 16.16 16.17 void * __ioremap(unsigned long phys_addr, unsigned long size, 16.18 - unsigned long flags) 16.19 + unsigned long flags) 16.20 { 16.21 return NULL; 16.22 } 16.23 @@ -59,86 +61,6 @@ static inline int is_local_lowmem(unsign 16.24 return ((pfn < max_low_pfn) && (pfn_to_mfn(pfn) == mfn)); 16.25 } 16.26 16.27 -static inline void remap_area_pte(pte_t * pte, unsigned long address, unsigned long size, 16.28 - unsigned long phys_addr, unsigned long flags) 16.29 -{ 16.30 - unsigned long end; 16.31 - unsigned long pfn; 16.32 - 16.33 - address &= ~PMD_MASK; 16.34 - end = address + size; 16.35 - if (end > PMD_SIZE) 16.36 - end = PMD_SIZE; 16.37 - if (address >= end) 16.38 - BUG(); 16.39 - pfn = phys_addr >> PAGE_SHIFT; 16.40 - do { 16.41 - if (!pte_none(*pte)) { 16.42 - printk("remap_area_pte: page already exists\n"); 16.43 - BUG(); 16.44 - } 16.45 - set_pte(pte, pfn_pte_ma(pfn, __pgprot(_PAGE_PRESENT | _PAGE_RW | 16.46 - _PAGE_DIRTY | _PAGE_ACCESSED | flags))); 16.47 - address += PAGE_SIZE; 16.48 - pfn++; 16.49 - pte++; 16.50 - } while (address && (address < end)); 16.51 -} 16.52 - 16.53 -static inline int remap_area_pmd(pmd_t * pmd, unsigned long address, unsigned long size, 16.54 - unsigned long phys_addr, unsigned long flags) 16.55 -{ 16.56 - unsigned long end; 16.57 - 16.58 - address &= ~PGDIR_MASK; 16.59 - end = address + size; 16.60 - if (end > PGDIR_SIZE) 16.61 - end = PGDIR_SIZE; 16.62 - phys_addr -= address; 16.63 - if (address >= end) 16.64 - BUG(); 16.65 - do { 16.66 - pte_t * pte = pte_alloc_kernel(&init_mm, pmd, address); 16.67 - if (!pte) 16.68 - return -ENOMEM; 16.69 - remap_area_pte(pte, address, end - address, address + phys_addr, flags); 16.70 - address = (address + PMD_SIZE) & PMD_MASK; 16.71 - pmd++; 16.72 - } while (address && (address < end)); 16.73 - return 0; 16.74 -} 16.75 - 16.76 -static int remap_area_pages(unsigned long address, unsigned long phys_addr, 16.77 - unsigned long size, unsigned long flags) 16.78 -{ 16.79 - int error; 16.80 - pgd_t * dir; 16.81 - unsigned long end = address + size; 16.82 - 16.83 - phys_addr -= address; 16.84 - dir = pgd_offset(&init_mm, address); 16.85 - flush_cache_all(); 16.86 - if (address >= end) 16.87 - BUG(); 16.88 - spin_lock(&init_mm.page_table_lock); 16.89 - do { 16.90 - pmd_t *pmd; 16.91 - pmd = pmd_alloc(&init_mm, dir, address); 16.92 - error = -ENOMEM; 16.93 - if (!pmd) 16.94 - break; 16.95 - if (remap_area_pmd(pmd, address, end - address, 16.96 - phys_addr + address, flags)) 16.97 - break; 16.98 - error = 0; 16.99 - address = (address + PGDIR_SIZE) & PGDIR_MASK; 16.100 - dir++; 16.101 - } while (address && (address < end)); 16.102 - spin_unlock(&init_mm.page_table_lock); 16.103 - flush_tlb_all(); 16.104 - return error; 16.105 -} 16.106 - 16.107 /* 16.108 * Generic mapping function (not visible outside): 16.109 */ 16.110 @@ -201,7 +123,7 @@ void __iomem * __ioremap(unsigned long p 16.111 return NULL; 16.112 area->phys_addr = phys_addr; 16.113 addr = (void __iomem *) area->addr; 16.114 - if (remap_area_pages((unsigned long) addr, phys_addr, size, flags)) { 16.115 + if (direct_remap_area_pages(&init_mm, (unsigned long) addr, phys_addr, size, __pgprot(_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED | flags), DOMID_IO)) { 16.116 vunmap((void __force *) addr); 16.117 return NULL; 16.118 } 16.119 @@ -406,7 +328,9 @@ static inline int direct_remap_area_pmd( 16.120 if (address >= end) 16.121 BUG(); 16.122 do { 16.123 - pte_t *pte = pte_alloc_map(mm, pmd, address); 16.124 + pte_t *pte = (mm == &init_mm) ? 16.125 + pte_alloc_kernel(mm, pmd, address) : 16.126 + pte_alloc_map(mm, pmd, address); 16.127 if (!pte) 16.128 return -ENOMEM; 16.129 direct_remap_area_pte(pte, address, end - address, v); 16.130 @@ -426,7 +350,6 @@ int __direct_remap_area_pages(struct mm_ 16.131 unsigned long end = address + size; 16.132 16.133 dir = pgd_offset(mm, address); 16.134 - flush_cache_all(); 16.135 if (address >= end) 16.136 BUG(); 16.137 spin_lock(&mm->page_table_lock); 16.138 @@ -440,7 +363,6 @@ int __direct_remap_area_pages(struct mm_ 16.139 16.140 } while (address && (address < end)); 16.141 spin_unlock(&mm->page_table_lock); 16.142 - flush_tlb_all(); 16.143 return 0; 16.144 } 16.145 16.146 @@ -464,16 +386,18 @@ int direct_remap_area_pages(struct mm_st 16.147 16.148 start_address = address; 16.149 16.150 - for(i = 0; i < size; i += PAGE_SIZE) { 16.151 + flush_cache_all(); 16.152 + 16.153 + for (i = 0; i < size; i += PAGE_SIZE) { 16.154 if ((v - u) == MAX_DIRECTMAP_MMU_QUEUE) { 16.155 /* Fill in the PTE pointers. */ 16.156 __direct_remap_area_pages(mm, 16.157 start_address, 16.158 address-start_address, 16.159 w); 16.160 - 16.161 + 16.162 if (HYPERVISOR_mmu_update(u, v - u, NULL) < 0) 16.163 - return -EFAULT; 16.164 + return -EFAULT; 16.165 v = w; 16.166 start_address = address; 16.167 } 16.168 @@ -494,10 +418,14 @@ int direct_remap_area_pages(struct mm_st 16.169 __direct_remap_area_pages(mm, 16.170 start_address, 16.171 address-start_address, 16.172 - w); 16.173 + w); 16.174 if (unlikely(HYPERVISOR_mmu_update(u, v - u, NULL) < 0)) 16.175 - return -EFAULT; 16.176 + return -EFAULT; 16.177 } 16.178 - 16.179 + 16.180 + flush_tlb_all(); 16.181 + 16.182 return 0; 16.183 } 16.184 + 16.185 +EXPORT_SYMBOL(direct_remap_area_pages);
17.1 --- a/linux-2.6.10-rc2-xen-sparse/arch/xen/kernel/Makefile Mon Dec 06 20:03:12 2004 +0000 17.2 +++ b/linux-2.6.10-rc2-xen-sparse/arch/xen/kernel/Makefile Mon Dec 06 20:37:17 2004 +0000 17.3 @@ -11,4 +11,4 @@ CPPFLAGS_vmlinux.lds += -U$(XENARCH) 17.4 17.5 extra-y += vmlinux.lds 17.6 17.7 -obj-y := ctrl_if.o evtchn.o fixup.o reboot.o xen_proc.o gnttab.o skbuff.o 17.8 +obj-y := ctrl_if.o evtchn.o fixup.o reboot.o xen_proc.o gnttab.o skbuff.o devmem.o
18.1 --- a/linux-2.6.10-rc2-xen-sparse/arch/xen/kernel/ctrl_if.c Mon Dec 06 20:03:12 2004 +0000 18.2 +++ b/linux-2.6.10-rc2-xen-sparse/arch/xen/kernel/ctrl_if.c Mon Dec 06 20:37:17 2004 +0000 18.3 @@ -35,6 +35,7 @@ 18.4 #include <linux/errno.h> 18.5 #include <linux/irq.h> 18.6 #include <linux/interrupt.h> 18.7 +#include <linux/module.h> 18.8 #include <asm-xen/ctrl_if.h> 18.9 #include <asm-xen/evtchn.h> 18.10 18.11 @@ -539,3 +540,10 @@ void ctrl_if_discard_responses(void) 18.12 ctrl_if_tx_resp_cons = get_ctrl_if()->tx_resp_prod; 18.13 } 18.14 18.15 +EXPORT_SYMBOL(ctrl_if_send_message_noblock); 18.16 +EXPORT_SYMBOL(ctrl_if_send_message_block); 18.17 +EXPORT_SYMBOL(ctrl_if_send_message_and_get_response); 18.18 +EXPORT_SYMBOL(ctrl_if_enqueue_space_callback); 18.19 +EXPORT_SYMBOL(ctrl_if_send_response); 18.20 +EXPORT_SYMBOL(ctrl_if_register_receiver); 18.21 +EXPORT_SYMBOL(ctrl_if_unregister_receiver);
19.1 --- a/linux-2.6.10-rc2-xen-sparse/drivers/char/mem.c Mon Dec 06 20:03:12 2004 +0000 19.2 +++ b/linux-2.6.10-rc2-xen-sparse/drivers/char/mem.c Mon Dec 06 20:37:17 2004 +0000 19.3 @@ -42,12 +42,7 @@ extern void tapechar_init(void); 19.4 */ 19.5 static inline int uncached_access(struct file *file, unsigned long addr) 19.6 { 19.7 -#ifdef CONFIG_XEN 19.8 - if (file->f_flags & O_SYNC) 19.9 - return 1; 19.10 - /* Xen sets correct MTRR type on non-RAM for us. */ 19.11 - return 0; 19.12 -#elif defined(__i386__) 19.13 +#if defined(__i386__) 19.14 /* 19.15 * On the PPro and successors, the MTRRs are used to set 19.16 * memory types for physical addresses outside main memory, 19.17 @@ -148,7 +143,7 @@ static ssize_t do_write_mem(void *p, uns 19.18 return written; 19.19 } 19.20 19.21 - 19.22 +#ifndef ARCH_HAS_DEV_MEM 19.23 /* 19.24 * This funcion reads the *physical* memory. The f_pos points directly to the 19.25 * memory location. 19.26 @@ -194,8 +189,9 @@ static ssize_t write_mem(struct file * f 19.27 return -EFAULT; 19.28 return do_write_mem(__va(p), p, buf, count, ppos); 19.29 } 19.30 +#endif 19.31 19.32 -static int mmap_mem(struct file * file, struct vm_area_struct * vma) 19.33 +static int mmap_kmem(struct file * file, struct vm_area_struct * vma) 19.34 { 19.35 #ifdef pgprot_noncached 19.36 unsigned long offset = vma->vm_pgoff << PAGE_SHIFT; 19.37 @@ -206,22 +202,18 @@ static int mmap_mem(struct file * file, 19.38 vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); 19.39 #endif 19.40 19.41 -#if defined(CONFIG_XEN) 19.42 - if (io_remap_page_range(vma, 19.43 - vma->vm_start, 19.44 - vma->vm_pgoff << PAGE_SHIFT, 19.45 - vma->vm_end-vma->vm_start, 19.46 - vma->vm_page_prot)) 19.47 + /* Don't try to swap out physical pages.. */ 19.48 + vma->vm_flags |= VM_RESERVED; 19.49 + 19.50 + /* 19.51 + * Don't dump addresses that are not real memory to a core file. 19.52 + */ 19.53 + if (uncached) 19.54 + vma->vm_flags |= VM_IO; 19.55 + 19.56 + if (remap_page_range(vma, vma->vm_start, offset, vma->vm_end-vma->vm_start, 19.57 + vma->vm_page_prot)) 19.58 return -EAGAIN; 19.59 -#else 19.60 - /* Remap-pfn-range will mark the range VM_IO and VM_RESERVED */ 19.61 - if (remap_pfn_range(vma, 19.62 - vma->vm_start, 19.63 - vma->vm_pgoff, 19.64 - vma->vm_end-vma->vm_start, 19.65 - vma->vm_page_prot)) 19.66 - return -EAGAIN; 19.67 -#endif 19.68 return 0; 19.69 } 19.70 19.71 @@ -581,7 +573,7 @@ static int open_port(struct inode * inod 19.72 return capable(CAP_SYS_RAWIO) ? 0 : -EPERM; 19.73 } 19.74 19.75 -#define mmap_kmem mmap_mem 19.76 +#define mmap_mem mmap_kmem 19.77 #define zero_lseek null_lseek 19.78 #define full_lseek null_lseek 19.79 #define write_zero write_null 19.80 @@ -589,6 +581,7 @@ static int open_port(struct inode * inod 19.81 #define open_mem open_port 19.82 #define open_kmem open_mem 19.83 19.84 +#ifndef ARCH_HAS_DEV_MEM 19.85 static struct file_operations mem_fops = { 19.86 .llseek = memory_lseek, 19.87 .read = read_mem, 19.88 @@ -596,6 +589,9 @@ static struct file_operations mem_fops = 19.89 .mmap = mmap_mem, 19.90 .open = open_mem, 19.91 }; 19.92 +#else 19.93 +extern struct file_operations mem_fops; 19.94 +#endif 19.95 19.96 static struct file_operations kmem_fops = { 19.97 .llseek = memory_lseek,
20.1 --- a/linux-2.6.10-rc2-xen-sparse/drivers/xen/Makefile Mon Dec 06 20:03:12 2004 +0000 20.2 +++ b/linux-2.6.10-rc2-xen-sparse/drivers/xen/Makefile Mon Dec 06 20:37:17 2004 +0000 20.3 @@ -2,9 +2,9 @@ 20.4 20.5 obj-y += console/ 20.6 obj-y += evtchn/ 20.7 -obj-y += privcmd/ 20.8 -obj-y += balloon/ 20.9 +obj-y += balloon/ 20.10 20.11 +obj-$(CONFIG_XEN_PRIVILEGED_GUEST) += privcmd/ 20.12 obj-$(CONFIG_XEN_BLKDEV_BACKEND) += blkback/ 20.13 obj-$(CONFIG_XEN_NETDEV_BACKEND) += netback/ 20.14 obj-$(CONFIG_XEN_BLKDEV_FRONTEND) += blkfront/
21.1 --- a/linux-2.6.10-rc2-xen-sparse/drivers/xen/balloon/balloon.c Mon Dec 06 20:03:12 2004 +0000 21.2 +++ b/linux-2.6.10-rc2-xen-sparse/drivers/xen/balloon/balloon.c Mon Dec 06 20:37:17 2004 +0000 21.3 @@ -4,6 +4,7 @@ 21.4 * Xen balloon driver - enables returning/claiming memory to/from Xen. 21.5 * 21.6 * Copyright (c) 2003, B Dragovic 21.7 + * Copyright (c) 2003-2004, M Williamson, K Fraser 21.8 * 21.9 * This file may be distributed separately from the Linux kernel, or 21.10 * incorporated into other software packages, subject to the following license: 21.11 @@ -28,8 +29,8 @@ 21.12 */ 21.13 21.14 #include <linux/config.h> 21.15 +#include <linux/kernel.h> 21.16 #include <linux/module.h> 21.17 -#include <linux/kernel.h> 21.18 #include <linux/sched.h> 21.19 #include <linux/errno.h> 21.20 #include <linux/mm.h> 21.21 @@ -42,25 +43,39 @@ 21.22 #include <asm-xen/xen_proc.h> 21.23 #include <asm-xen/hypervisor.h> 21.24 #include <asm-xen/ctrl_if.h> 21.25 +#include <asm-xen/balloon.h> 21.26 #include <asm/pgalloc.h> 21.27 #include <asm/pgtable.h> 21.28 #include <asm/uaccess.h> 21.29 #include <asm/tlb.h> 21.30 #include <linux/list.h> 21.31 21.32 -/* USER DEFINES -- THESE SHOULD BE COPIED TO USER-SPACE TOOLS */ 21.33 -#define USER_INFLATE_BALLOON 1 /* return mem to hypervisor */ 21.34 -#define USER_DEFLATE_BALLOON 2 /* claim mem from hypervisor */ 21.35 -typedef struct user_balloon_op { 21.36 - unsigned int op; 21.37 - unsigned long size; 21.38 -} user_balloon_op_t; 21.39 -/* END OF USER DEFINE */ 21.40 - 21.41 static struct proc_dir_entry *balloon_pde; 21.42 21.43 -unsigned long credit; 21.44 -static unsigned long current_pages, most_seen_pages; 21.45 +static DECLARE_MUTEX(balloon_mutex); 21.46 +spinlock_t balloon_lock = SPIN_LOCK_UNLOCKED; 21.47 + 21.48 +/* We aim for 'current allocation' == 'target allocation'. */ 21.49 +static unsigned long current_pages; 21.50 +static unsigned long target_pages; 21.51 + 21.52 +/* We may hit the hard limit in Xen. If we do then we remember it. */ 21.53 +static unsigned long hard_limit; 21.54 + 21.55 +/* 21.56 + * Drivers may alter the memory reservation independently, but they must 21.57 + * inform the balloon driver so that we can avoid hitting the hard limit. 21.58 + */ 21.59 +static unsigned long driver_pages; 21.60 + 21.61 +/* List of ballooned pages, threaded through the mem_map array. */ 21.62 +static LIST_HEAD(ballooned_pages); 21.63 +static unsigned long balloon_low, balloon_high; 21.64 + 21.65 +/* Main work function, always executed in process context. */ 21.66 +static void balloon_process(void *unused); 21.67 +static DECLARE_WORK(balloon_worker, balloon_process, NULL); 21.68 +static struct timer_list balloon_timer; 21.69 21.70 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0) 21.71 /* Use the private and mapping fields of struct page as a list. */ 21.72 @@ -76,452 +91,223 @@ static unsigned long current_pages, most 21.73 #define LIST_TO_PAGE(l) ( list_entry(l, struct page, list) ) 21.74 #define UNLIST_PAGE(p) ( list_del(&p->list) ) 21.75 #define pte_offset_kernel pte_offset 21.76 +#define subsys_initcall(_fn) __initcall(_fn) 21.77 #endif 21.78 21.79 -/* List of ballooned pages, threaded through the mem_map array. */ 21.80 -LIST_HEAD(ballooned_pages); 21.81 +#define IPRINTK(fmt, args...) \ 21.82 + printk(KERN_INFO "xen_mem: " fmt, ##args) 21.83 +#define WPRINTK(fmt, args...) \ 21.84 + printk(KERN_WARNING "xen_mem: " fmt, ##args) 21.85 21.86 -/** add_ballooned_page - remember we've ballooned a pfn */ 21.87 -void add_ballooned_page(unsigned long pfn) 21.88 +/* balloon_append: add the given page to the balloon. */ 21.89 +static void balloon_append(struct page *page) 21.90 { 21.91 - struct page *p = mem_map + pfn; 21.92 - 21.93 - list_add(PAGE_TO_LIST(p), &ballooned_pages); 21.94 + /* Low memory is re-populated first, so highmem pages go at list tail. */ 21.95 + if ( PageHighMem(page) ) 21.96 + { 21.97 + list_add_tail(PAGE_TO_LIST(page), &ballooned_pages); 21.98 + balloon_high++; 21.99 + } 21.100 + else 21.101 + { 21.102 + list_add(PAGE_TO_LIST(page), &ballooned_pages); 21.103 + balloon_low++; 21.104 + } 21.105 } 21.106 21.107 -/* rem_ballooned_page - recall a ballooned page and remove from list. */ 21.108 -struct page *rem_ballooned_page(void) 21.109 +/* balloon_retrieve: rescue a page from the balloon, if it is not empty. */ 21.110 +static struct page *balloon_retrieve(void) 21.111 { 21.112 - if(!list_empty(&ballooned_pages)) 21.113 - { 21.114 - struct page *ret; 21.115 + struct page *page; 21.116 + 21.117 + if ( list_empty(&ballooned_pages) ) 21.118 + return NULL; 21.119 21.120 - ret = LIST_TO_PAGE(ballooned_pages.next); 21.121 - UNLIST_PAGE(ret); 21.122 + page = LIST_TO_PAGE(ballooned_pages.next); 21.123 + UNLIST_PAGE(page); 21.124 21.125 - return ret; 21.126 - } 21.127 + if ( PageHighMem(page) ) 21.128 + balloon_high--; 21.129 else 21.130 - return NULL; 21.131 + balloon_low--; 21.132 + 21.133 + return page; 21.134 } 21.135 21.136 static inline pte_t *get_ptep(unsigned long addr) 21.137 { 21.138 - pgd_t *pgd; pmd_t *pmd; pte_t *ptep; 21.139 + pgd_t *pgd; 21.140 + pmd_t *pmd; 21.141 + 21.142 pgd = pgd_offset_k(addr); 21.143 - 21.144 if ( pgd_none(*pgd) || pgd_bad(*pgd) ) BUG(); 21.145 21.146 pmd = pmd_offset(pgd, addr); 21.147 if ( pmd_none(*pmd) || pmd_bad(*pmd) ) BUG(); 21.148 21.149 - ptep = pte_offset_kernel(pmd, addr); 21.150 - 21.151 - return ptep; 21.152 + return pte_offset_kernel(pmd, addr); 21.153 } 21.154 21.155 -/* Main function for relinquishing memory. */ 21.156 -static unsigned long inflate_balloon(unsigned long num_pages) 21.157 - 21.158 +static void balloon_alarm(unsigned long unused) 21.159 { 21.160 - unsigned long *parray; 21.161 - unsigned long *currp; 21.162 - unsigned long curraddr; 21.163 - unsigned long ret = 0; 21.164 - unsigned long i, j; 21.165 - 21.166 - parray = (unsigned long *)vmalloc(num_pages * sizeof(unsigned long)); 21.167 - if ( parray == NULL ) 21.168 - { 21.169 - printk(KERN_ERR "inflate_balloon: Unable to vmalloc parray\n"); 21.170 - return -EFAULT; 21.171 - } 21.172 - 21.173 - currp = parray; 21.174 - 21.175 - for ( i = 0; i < num_pages; i++, currp++ ) 21.176 - { 21.177 - struct page *page = alloc_page(GFP_HIGHUSER); 21.178 - unsigned long pfn = page - mem_map; 21.179 - 21.180 - /* If allocation fails then free all reserved pages. */ 21.181 - if ( page == NULL ) 21.182 - { 21.183 - printk(KERN_ERR "Unable to inflate balloon by %ld, only" 21.184 - " %ld pages free.", num_pages, i); 21.185 - currp = parray; 21.186 - for ( j = 0; j < i; j++, currp++ ) 21.187 - __free_page((struct page *) (mem_map + *currp)); 21.188 - 21.189 - ret = -EFAULT; 21.190 - goto cleanup; 21.191 - } 21.192 - 21.193 - *currp = pfn; 21.194 - } 21.195 - 21.196 + schedule_work(&balloon_worker); 21.197 +} 21.198 21.199 - for ( i = 0, currp = parray; i < num_pages; i++, currp++ ) 21.200 - { 21.201 - unsigned long mfn = phys_to_machine_mapping[*currp]; 21.202 - curraddr = (unsigned long)page_address(mem_map + *currp); 21.203 - /* Blow away page contents for security, and also p.t. ref if any. */ 21.204 - if ( curraddr != 0 ) 21.205 - { 21.206 - scrub_pages(curraddr, 1); 21.207 - queue_l1_entry_update(get_ptep(curraddr), 0); 21.208 - } 21.209 -#ifdef CONFIG_XEN_SCRUB_PAGES 21.210 - else 21.211 - { 21.212 - void *p = kmap(&mem_map[*currp]); 21.213 - scrub_pages(p, 1); 21.214 - kunmap(&mem_map[*currp]); 21.215 - } 21.216 -#endif 21.217 - 21.218 - add_ballooned_page(*currp); 21.219 - 21.220 - phys_to_machine_mapping[*currp] = INVALID_P2M_ENTRY; 21.221 - *currp = mfn; 21.222 - } 21.223 - 21.224 - /* Flush updates through and flush the TLB. */ 21.225 - xen_tlb_flush(); 21.226 - 21.227 - ret = HYPERVISOR_dom_mem_op(MEMOP_decrease_reservation, 21.228 - parray, num_pages, 0); 21.229 - if ( unlikely(ret != num_pages) ) 21.230 - { 21.231 - printk(KERN_ERR "Unable to inflate balloon, error %lx\n", ret); 21.232 - goto cleanup; 21.233 - } 21.234 - 21.235 - credit += num_pages; 21.236 - ret = num_pages; 21.237 - 21.238 - cleanup: 21.239 - vfree(parray); 21.240 - 21.241 - return ret; 21.242 +static unsigned long current_target(void) 21.243 +{ 21.244 + unsigned long target = min(target_pages, hard_limit); 21.245 + if ( target > (current_pages + balloon_low + balloon_high) ) 21.246 + target = current_pages + balloon_low + balloon_high; 21.247 + return target; 21.248 } 21.249 21.250 /* 21.251 - * Install new mem pages obtained by deflate_balloon. function walks 21.252 - * phys->machine mapping table looking for DEAD entries and populates 21.253 - * them. 21.254 + * We avoid multiple worker processes conflicting via the balloon mutex. 21.255 + * We may of course race updates of the target counts (which are protected 21.256 + * by the balloon lock), or with changes to the Xen hard limit, but we will 21.257 + * recover from these in time. 21.258 */ 21.259 -static unsigned long process_returned_pages(unsigned long * parray, 21.260 - unsigned long num) 21.261 +static void balloon_process(void *unused) 21.262 { 21.263 - /* currently, this function is rather simplistic as 21.264 - * it is assumed that domain reclaims only number of 21.265 - * pages previously released. this is to change soon 21.266 - * and the code to extend page tables etc. will be 21.267 - * incorporated here. 21.268 - */ 21.269 - 21.270 - unsigned long * curr = parray; 21.271 - unsigned long num_installed; 21.272 + unsigned long *mfn_list, pfn, i, flags; 21.273 + struct page *page; 21.274 + long credit, debt, rc; 21.275 + void *v; 21.276 + 21.277 + down(&balloon_mutex); 21.278 + 21.279 + retry: 21.280 + mfn_list = NULL; 21.281 21.282 - struct page *page; 21.283 - 21.284 - num_installed = 0; 21.285 - while ( (page = rem_ballooned_page()) != NULL ) 21.286 + if ( (credit = current_target() - current_pages) > 0 ) 21.287 { 21.288 - unsigned long pfn; 21.289 + mfn_list = (unsigned long *)vmalloc(credit * sizeof(*mfn_list)); 21.290 + if ( mfn_list == NULL ) 21.291 + goto out; 21.292 21.293 - if ( num_installed == num ) 21.294 - break; 21.295 - 21.296 - pfn = page - mem_map; 21.297 - 21.298 - if(phys_to_machine_mapping[pfn] != INVALID_P2M_ENTRY) 21.299 + balloon_lock(flags); 21.300 + rc = HYPERVISOR_dom_mem_op( 21.301 + MEMOP_increase_reservation, mfn_list, credit, 0); 21.302 + balloon_unlock(flags); 21.303 + if ( rc < credit ) 21.304 { 21.305 - printk("BUG: Tried to unballoon existing page!"); 21.306 - BUG(); 21.307 + /* We hit the Xen hard limit: reprobe. */ 21.308 + if ( HYPERVISOR_dom_mem_op( 21.309 + MEMOP_decrease_reservation, mfn_list, rc, 0) != rc ) 21.310 + BUG(); 21.311 + hard_limit = current_pages + rc - driver_pages; 21.312 + vfree(mfn_list); 21.313 + goto retry; 21.314 } 21.315 21.316 - phys_to_machine_mapping[pfn] = *curr; 21.317 - queue_machphys_update(*curr, pfn); 21.318 - if (pfn<max_low_pfn) 21.319 - queue_l1_entry_update( 21.320 - get_ptep((unsigned long)__va(pfn << PAGE_SHIFT)), 21.321 - ((*curr) << PAGE_SHIFT) | pgprot_val(PAGE_KERNEL)); 21.322 - 21.323 - __free_page(mem_map + pfn); 21.324 + for ( i = 0; i < credit; i++ ) 21.325 + { 21.326 + if ( (page = balloon_retrieve()) == NULL ) 21.327 + BUG(); 21.328 + 21.329 + pfn = page - mem_map; 21.330 + if ( phys_to_machine_mapping[pfn] != INVALID_P2M_ENTRY ) 21.331 + BUG(); 21.332 21.333 - curr++; 21.334 - num_installed++; 21.335 + /* Update P->M and M->P tables. */ 21.336 + phys_to_machine_mapping[pfn] = mfn_list[i]; 21.337 + queue_machphys_update(mfn_list[i], pfn); 21.338 + 21.339 + /* Link back into the page tables if it's not a highmem page. */ 21.340 + if ( pfn < max_low_pfn ) 21.341 + queue_l1_entry_update( 21.342 + get_ptep((unsigned long)__va(pfn << PAGE_SHIFT)), 21.343 + (mfn_list[i] << PAGE_SHIFT) | pgprot_val(PAGE_KERNEL)); 21.344 + 21.345 + /* Finally, relinquish the memory back to the system allocator. */ 21.346 + ClearPageReserved(page); 21.347 + set_page_count(page, 1); 21.348 + __free_page(page); 21.349 + } 21.350 + 21.351 + current_pages += credit; 21.352 } 21.353 + else if ( credit < 0 ) 21.354 + { 21.355 + debt = -credit; 21.356 + 21.357 + mfn_list = (unsigned long *)vmalloc(debt * sizeof(*mfn_list)); 21.358 + if ( mfn_list == NULL ) 21.359 + goto out; 21.360 21.361 - return num_installed; 21.362 -} 21.363 + for ( i = 0; i < debt; i++ ) 21.364 + { 21.365 + if ( (page = alloc_page(GFP_HIGHUSER)) == NULL ) 21.366 + { 21.367 + debt = i; 21.368 + break; 21.369 + } 21.370 + 21.371 + pfn = page - mem_map; 21.372 + mfn_list[i] = phys_to_machine_mapping[pfn]; 21.373 + phys_to_machine_mapping[pfn] = INVALID_P2M_ENTRY; 21.374 21.375 -unsigned long deflate_balloon(unsigned long num_pages) 21.376 -{ 21.377 - unsigned long ret; 21.378 - unsigned long * parray; 21.379 + if ( !PageHighMem(page) ) 21.380 + { 21.381 + v = phys_to_virt((page - mem_map) << PAGE_SHIFT); 21.382 + scrub_pages(v, 1); 21.383 + queue_l1_entry_update(get_ptep((unsigned long)v), 0); 21.384 + } 21.385 +#ifdef CONFIG_XEN_SCRUB_PAGES 21.386 + else 21.387 + { 21.388 + v = kmap(page); 21.389 + scrub_pages(v, 1); 21.390 + kunmap(page); 21.391 + } 21.392 +#endif 21.393 21.394 - if ( num_pages > credit ) 21.395 - { 21.396 - printk(KERN_ERR "deflate_balloon: %lu pages > %lu credit.\n", 21.397 - num_pages, credit); 21.398 - return -EAGAIN; 21.399 + balloon_append(page); 21.400 + } 21.401 + 21.402 + /* Flush updates through and flush the TLB. */ 21.403 + xen_tlb_flush(); 21.404 + 21.405 + if ( HYPERVISOR_dom_mem_op( 21.406 + MEMOP_decrease_reservation, mfn_list, debt, 0) != debt ) 21.407 + BUG(); 21.408 + 21.409 + current_pages -= debt; 21.410 } 21.411 21.412 - parray = (unsigned long *)vmalloc(num_pages * sizeof(unsigned long)); 21.413 - if ( parray == NULL ) 21.414 - { 21.415 - printk(KERN_ERR "deflate_balloon: Unable to vmalloc parray\n"); 21.416 - return 0; 21.417 - } 21.418 - 21.419 - ret = HYPERVISOR_dom_mem_op(MEMOP_increase_reservation, 21.420 - parray, num_pages, 0); 21.421 - if ( unlikely(ret != num_pages) ) 21.422 - { 21.423 - printk(KERN_ERR "deflate_balloon: xen increase_reservation err %lx\n", 21.424 - ret); 21.425 - goto cleanup; 21.426 - } 21.427 + out: 21.428 + if ( mfn_list != NULL ) 21.429 + vfree(mfn_list); 21.430 21.431 - if ( (ret = process_returned_pages(parray, num_pages)) < num_pages ) 21.432 - { 21.433 - printk(KERN_WARNING 21.434 - "deflate_balloon: restored only %lx of %lx pages.\n", 21.435 - ret, num_pages); 21.436 - goto cleanup; 21.437 - } 21.438 + /* Schedule more work if there is some still to be done. */ 21.439 + if ( current_target() != current_pages ) 21.440 + mod_timer(&balloon_timer, jiffies + HZ); 21.441 21.442 - ret = num_pages; 21.443 - credit -= num_pages; 21.444 - 21.445 - cleanup: 21.446 - vfree(parray); 21.447 - 21.448 - return ret; 21.449 + up(&balloon_mutex); 21.450 } 21.451 21.452 -#define PAGE_TO_MB_SHIFT 8 21.453 - 21.454 -/* 21.455 - * pagetable_extend() mimics pagetable_init() from arch/xen/mm/init.c 21.456 - * The loops do go through all of low memory (ZONE_NORMAL). The 21.457 - * old pages have _PAGE_PRESENT set and so get skipped. 21.458 - * If low memory is not full, the new pages are used to fill it, going 21.459 - * from cur_low_pfn to low_pfn. high memory is not direct mapped so 21.460 - * no extension is needed for new high memory. 21.461 - */ 21.462 - 21.463 -static void pagetable_extend (int cur_low_pfn, int newpages) 21.464 -{ 21.465 - unsigned long vaddr, end; 21.466 - pgd_t *kpgd, *pgd, *pgd_base; 21.467 - int i, j, k; 21.468 - pmd_t *kpmd, *pmd; 21.469 - pte_t *kpte, *pte, *pte_base; 21.470 - int low_pfn = min(cur_low_pfn+newpages,(int)max_low_pfn); 21.471 - 21.472 - /* 21.473 - * This can be zero as well - no problem, in that case we exit 21.474 - * the loops anyway due to the PTRS_PER_* conditions. 21.475 - */ 21.476 - end = (unsigned long)__va(low_pfn*PAGE_SIZE); 21.477 - 21.478 - pgd_base = init_mm.pgd; 21.479 -#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0) 21.480 - i = pgd_index(PAGE_OFFSET); 21.481 -#else 21.482 - i = __pgd_offset(PAGE_OFFSET); 21.483 -#endif 21.484 - pgd = pgd_base + i; 21.485 - 21.486 - for (; i < PTRS_PER_PGD; pgd++, i++) { 21.487 - vaddr = i*PGDIR_SIZE; 21.488 - if (end && (vaddr >= end)) 21.489 - break; 21.490 - pmd = (pmd_t *)pgd; 21.491 - for (j = 0; j < PTRS_PER_PMD; pmd++, j++) { 21.492 - vaddr = i*PGDIR_SIZE + j*PMD_SIZE; 21.493 - if (end && (vaddr >= end)) 21.494 - break; 21.495 - 21.496 - /* Filled in for us already? */ 21.497 - if ( pmd_val(*pmd) & _PAGE_PRESENT ) 21.498 - continue; 21.499 - 21.500 - pte_base = pte = (pte_t *) __get_free_page(GFP_KERNEL); 21.501 - 21.502 - for (k = 0; k < PTRS_PER_PTE; pte++, k++) { 21.503 - vaddr = i*PGDIR_SIZE + j*PMD_SIZE + k*PAGE_SIZE; 21.504 - if (end && (vaddr >= end)) 21.505 - break; 21.506 -#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0) 21.507 - *pte = mk_pte(virt_to_page(vaddr), PAGE_KERNEL); 21.508 -#else 21.509 - *pte = mk_pte_phys(__pa(vaddr), PAGE_KERNEL); 21.510 -#endif 21.511 - } 21.512 - kpgd = pgd_offset_k((unsigned long)pte_base); 21.513 - kpmd = pmd_offset(kpgd, (unsigned long)pte_base); 21.514 - kpte = pte_offset_kernel(kpmd, (unsigned long)pte_base); 21.515 - queue_l1_entry_update(kpte, 21.516 - (*(unsigned long *)kpte)&~_PAGE_RW); 21.517 - set_pmd(pmd, __pmd(_KERNPG_TABLE + __pa(pte_base))); 21.518 - XEN_flush_page_update_queue(); 21.519 - } 21.520 - } 21.521 -} 21.522 - 21.523 -/* 21.524 - * claim_new_pages() asks xen to increase this domain's memory reservation 21.525 - * and return a list of the new pages of memory. This new pages are 21.526 - * added to the free list of the memory manager. 21.527 - * 21.528 - * Available RAM does not normally change while Linux runs. To make this work, 21.529 - * the linux mem= boottime command line param must say how big memory could 21.530 - * possibly grow. Then setup_arch() in arch/xen/kernel/setup.c 21.531 - * sets max_pfn, max_low_pfn and the zones according to 21.532 - * this max memory size. The page tables themselves can only be 21.533 - * extended after xen has assigned new pages to this domain. 21.534 - */ 21.535 - 21.536 -static unsigned long 21.537 -claim_new_pages(unsigned long num_pages) 21.538 +/* Resets the Xen limit, sets new target, and kicks off processing. */ 21.539 +static void set_new_target(unsigned long target) 21.540 { 21.541 - unsigned long new_page_cnt, pfn; 21.542 - unsigned long * parray, *curr; 21.543 - 21.544 - if (most_seen_pages+num_pages> max_pfn) 21.545 - num_pages = max_pfn-most_seen_pages; 21.546 - if (num_pages==0) return -EINVAL; 21.547 - 21.548 - parray = (unsigned long *)vmalloc(num_pages * sizeof(unsigned long)); 21.549 - if ( parray == NULL ) 21.550 - { 21.551 - printk(KERN_ERR "claim_new_pages: Unable to vmalloc parray\n"); 21.552 - return 0; 21.553 - } 21.554 - 21.555 - new_page_cnt = HYPERVISOR_dom_mem_op(MEMOP_increase_reservation, 21.556 - parray, num_pages, 0); 21.557 - if ( new_page_cnt != num_pages ) 21.558 - { 21.559 - printk(KERN_WARNING 21.560 - "claim_new_pages: xen granted only %lu of %lu requested pages\n", 21.561 - new_page_cnt, num_pages); 21.562 - 21.563 - /* 21.564 - * Avoid xen lockup when user forgot to setdomainmaxmem. Xen 21.565 - * usually can dribble out a few pages and then hangs. 21.566 - */ 21.567 - if ( new_page_cnt < 1000 ) 21.568 - { 21.569 - printk(KERN_WARNING "Remember to use setdomainmaxmem\n"); 21.570 - HYPERVISOR_dom_mem_op(MEMOP_decrease_reservation, 21.571 - parray, new_page_cnt, 0); 21.572 - return -EFAULT; 21.573 - } 21.574 - } 21.575 - memcpy(phys_to_machine_mapping+most_seen_pages, parray, 21.576 - new_page_cnt * sizeof(unsigned long)); 21.577 - 21.578 - pagetable_extend(most_seen_pages,new_page_cnt); 21.579 - 21.580 - for ( pfn = most_seen_pages, curr = parray; 21.581 - pfn < most_seen_pages+new_page_cnt; 21.582 - pfn++, curr++ ) 21.583 - { 21.584 - struct page *page = mem_map + pfn; 21.585 - 21.586 -#ifndef CONFIG_HIGHMEM 21.587 - if ( pfn>=max_low_pfn ) 21.588 - { 21.589 - printk(KERN_WARNING "Warning only %ldMB will be used.\n", 21.590 - pfn>>PAGE_TO_MB_SHIFT); 21.591 - printk(KERN_WARNING "Use a HIGHMEM enabled kernel.\n"); 21.592 - break; 21.593 - } 21.594 -#endif 21.595 - queue_machphys_update(*curr, pfn); 21.596 - if ( pfn < max_low_pfn ) 21.597 - queue_l1_entry_update( 21.598 - get_ptep((unsigned long)__va(pfn << PAGE_SHIFT)), 21.599 - ((*curr) << PAGE_SHIFT) | pgprot_val(PAGE_KERNEL)); 21.600 - 21.601 - XEN_flush_page_update_queue(); 21.602 - 21.603 - /* this next bit mimics arch/xen/mm/init.c:one_highpage_init() */ 21.604 - ClearPageReserved(page); 21.605 - if ( pfn >= max_low_pfn ) 21.606 - set_bit(PG_highmem, &page->flags); 21.607 - set_page_count(page, 1); 21.608 - __free_page(page); 21.609 - } 21.610 - 21.611 - vfree(parray); 21.612 - 21.613 - return new_page_cnt; 21.614 + /* No need for lock. Not read-modify-write updates. */ 21.615 + hard_limit = ~0UL; 21.616 + target_pages = target; 21.617 + schedule_work(&balloon_worker); 21.618 } 21.619 21.620 - 21.621 -static int balloon_try_target(int target) 21.622 -{ 21.623 - int change, reclaim; 21.624 - 21.625 - if ( target < current_pages ) 21.626 - { 21.627 - int change = inflate_balloon(current_pages-target); 21.628 - if ( change <= 0 ) 21.629 - return change; 21.630 - 21.631 - current_pages -= change; 21.632 - printk(KERN_INFO "Relinquish %dMB to xen. Domain now has %luMB\n", 21.633 - change>>PAGE_TO_MB_SHIFT, current_pages>>PAGE_TO_MB_SHIFT); 21.634 - } 21.635 - else if ( target > current_pages ) 21.636 - { 21.637 - reclaim = min((unsigned long)target,most_seen_pages) - current_pages; 21.638 - 21.639 - if ( reclaim ) 21.640 - { 21.641 - change = deflate_balloon( reclaim ); 21.642 - if ( change <= 0 ) 21.643 - return change; 21.644 - current_pages += change; 21.645 - printk(KERN_INFO "Reclaim %dMB from xen. Domain now has %luMB\n", 21.646 - change>>PAGE_TO_MB_SHIFT, current_pages>>PAGE_TO_MB_SHIFT); 21.647 - } 21.648 - 21.649 - if ( most_seen_pages < target ) 21.650 - { 21.651 - int growth = claim_new_pages(target-most_seen_pages); 21.652 - if ( growth <= 0 ) 21.653 - return growth; 21.654 - most_seen_pages += growth; 21.655 - current_pages += growth; 21.656 - printk(KERN_INFO "Granted %dMB new mem. Dom now has %luMB\n", 21.657 - growth>>PAGE_TO_MB_SHIFT, current_pages>>PAGE_TO_MB_SHIFT); 21.658 - } 21.659 - } 21.660 - 21.661 - return 1; 21.662 -} 21.663 - 21.664 - 21.665 static void balloon_ctrlif_rx(ctrl_msg_t *msg, unsigned long id) 21.666 { 21.667 switch ( msg->subtype ) 21.668 { 21.669 case CMSG_MEM_REQUEST_SET: 21.670 + { 21.671 + mem_request_t *req = (mem_request_t *)&msg->msg[0]; 21.672 if ( msg->length != sizeof(mem_request_t) ) 21.673 goto parse_error; 21.674 - { 21.675 - mem_request_t *req = (mem_request_t *)&msg->msg[0]; 21.676 - req->status = balloon_try_target(req->target); 21.677 - } 21.678 - break; 21.679 + set_new_target(req->target); 21.680 + req->status = 0; 21.681 + } 21.682 + break; 21.683 default: 21.684 goto parse_error; 21.685 } 21.686 @@ -534,158 +320,122 @@ static void balloon_ctrlif_rx(ctrl_msg_t 21.687 ctrl_if_send_response(msg); 21.688 } 21.689 21.690 -#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0) 21.691 -typedef size_t count_t; 21.692 -#else 21.693 -typedef u_long count_t; 21.694 -#endif 21.695 - 21.696 -static int do_balloon_write(const char *buffer, count_t count) 21.697 +static int balloon_write(struct file *file, const char __user *buffer, 21.698 + unsigned long count, void *data) 21.699 { 21.700 char memstring[64], *endchar; 21.701 - int len, i; 21.702 - unsigned long target; 21.703 - unsigned long long targetbytes; 21.704 + unsigned long long target_bytes; 21.705 21.706 - /* Only admin can play with the balloon :) */ 21.707 if ( !capable(CAP_SYS_ADMIN) ) 21.708 return -EPERM; 21.709 21.710 + if ( count <= 1 ) 21.711 + return -EBADMSG; /* runt */ 21.712 if ( count > sizeof(memstring) ) 21.713 - return -EFBIG; 21.714 - 21.715 - len = strnlen_user(buffer, count); 21.716 - if ( len == 0 ) return -EBADMSG; 21.717 - if ( len == 1 ) return 1; /* input starts with a NUL char */ 21.718 - if ( strncpy_from_user(memstring, buffer, len) < 0 ) 21.719 - return -EFAULT; 21.720 + return -EFBIG; /* too long */ 21.721 21.722 - endchar = memstring; 21.723 - for ( i = 0; i < len; ++i, ++endchar ) 21.724 - if ( (memstring[i] < '0') || (memstring[i] > '9') ) 21.725 - break; 21.726 - if ( i == 0 ) 21.727 - return -EBADMSG; 21.728 + if ( copy_from_user(memstring, buffer, count) ) 21.729 + return -EFAULT; 21.730 + memstring[sizeof(memstring)-1] = '\0'; 21.731 21.732 - targetbytes = memparse(memstring,&endchar); 21.733 - target = targetbytes >> PAGE_SHIFT; 21.734 + target_bytes = memparse(memstring, &endchar); 21.735 + set_new_target(target_bytes >> PAGE_SHIFT); 21.736 21.737 - i = balloon_try_target(target); 21.738 - 21.739 - if ( i <= 0 ) return i; 21.740 - 21.741 - return len; 21.742 + return count; 21.743 } 21.744 21.745 -#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0) 21.746 -static int balloon_write(struct file *file, const char *buffer, 21.747 - size_t count, loff_t *offp) 21.748 +static int balloon_read(char *page, char **start, off_t off, 21.749 + int count, int *eof, void *data) 21.750 { 21.751 - int len = do_balloon_write(buffer, count); 21.752 - 21.753 - if ( len <= 0 ) return len; 21.754 + int len; 21.755 + 21.756 +#define K(_p) ((_p)<<(PAGE_SHIFT-10)) 21.757 + len = sprintf( 21.758 + page, 21.759 + "Current allocation: %8lu kB\n" 21.760 + "Requested target: %8lu kB\n" 21.761 + "Low-mem balloon: %8lu kB\n" 21.762 + "High-mem balloon: %8lu kB\n" 21.763 + "Xen hard limit: ", 21.764 + K(current_pages), K(target_pages), K(balloon_low), K(balloon_high)); 21.765 21.766 - *offp += len; 21.767 + if ( hard_limit != ~0UL ) 21.768 + len += sprintf( 21.769 + page + len, 21.770 + "%8lu kB (inc. %8lu kB driver headroom)\n", 21.771 + K(hard_limit), K(driver_pages)); 21.772 + else 21.773 + len += sprintf( 21.774 + page + len, 21.775 + " ??? kB\n"); 21.776 + 21.777 + *eof = 1; 21.778 return len; 21.779 } 21.780 21.781 -static int balloon_read(struct file *filp, char *buffer, 21.782 - size_t count, loff_t *offp) 21.783 -{ 21.784 - static char priv_buf[32]; 21.785 - char *priv_bufp = priv_buf; 21.786 - int len; 21.787 - len = sprintf(priv_buf,"%lu\n",current_pages<<PAGE_SHIFT); 21.788 - 21.789 - len -= *offp; 21.790 - priv_bufp += *offp; 21.791 - if (len>count) len = count; 21.792 - if (len<0) len = 0; 21.793 - 21.794 - if ( copy_to_user(buffer, priv_bufp, len) != 0 ) 21.795 - return -EFAULT; 21.796 - 21.797 - *offp += len; 21.798 - return len; 21.799 -} 21.800 - 21.801 -static struct file_operations balloon_fops = { 21.802 - .read = balloon_read, 21.803 - .write = balloon_write 21.804 -}; 21.805 - 21.806 -#else 21.807 - 21.808 -static int balloon_write(struct file *file, const char *buffer, 21.809 - u_long count, void *data) 21.810 -{ 21.811 - return do_balloon_write(buffer, count); 21.812 -} 21.813 - 21.814 -static int balloon_read(char *page, char **start, off_t off, 21.815 - int count, int *eof, void *data) 21.816 -{ 21.817 - int len; 21.818 - len = sprintf(page,"%lu\n",current_pages<<PAGE_SHIFT); 21.819 - 21.820 - if (len <= off+count) *eof = 1; 21.821 - *start = page + off; 21.822 - len -= off; 21.823 - if (len>count) len = count; 21.824 - if (len<0) len = 0; 21.825 - return len; 21.826 -} 21.827 - 21.828 -#endif 21.829 - 21.830 static int __init balloon_init(void) 21.831 { 21.832 - printk(KERN_ALERT "Starting Xen Balloon driver\n"); 21.833 + unsigned long pfn; 21.834 + struct page *page; 21.835 + 21.836 + IPRINTK("Initialising balloon driver.\n"); 21.837 21.838 - most_seen_pages = current_pages = min(xen_start_info.nr_pages,max_pfn); 21.839 - if ( (balloon_pde = create_xen_proc_entry("memory_target", 0644)) == NULL ) 21.840 + current_pages = min(xen_start_info.nr_pages, max_pfn); 21.841 + target_pages = current_pages; 21.842 + balloon_low = 0; 21.843 + balloon_high = 0; 21.844 + driver_pages = 0UL; 21.845 + hard_limit = ~0UL; 21.846 + 21.847 + init_timer(&balloon_timer); 21.848 + balloon_timer.data = 0; 21.849 + balloon_timer.function = balloon_alarm; 21.850 + 21.851 + if ( (balloon_pde = create_xen_proc_entry("balloon", 0644)) == NULL ) 21.852 { 21.853 - printk(KERN_ALERT "Unable to create balloon driver proc entry!"); 21.854 + WPRINTK("Unable to create /proc/xen/balloon.\n"); 21.855 return -1; 21.856 } 21.857 21.858 -#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0) 21.859 - balloon_pde->owner = THIS_MODULE; 21.860 - balloon_pde->nlink = 1; 21.861 - balloon_pde->proc_fops = &balloon_fops; 21.862 -#else 21.863 + balloon_pde->read_proc = balloon_read; 21.864 balloon_pde->write_proc = balloon_write; 21.865 - balloon_pde->read_proc = balloon_read; 21.866 -#endif 21.867 21.868 - (void)ctrl_if_register_receiver(CMSG_MEM_REQUEST, balloon_ctrlif_rx, 21.869 - CALLBACK_IN_BLOCKING_CONTEXT); 21.870 + (void)ctrl_if_register_receiver(CMSG_MEM_REQUEST, balloon_ctrlif_rx, 0); 21.871 21.872 - /* 21.873 - * make_module a new phys map if mem= says xen can give us memory to grow 21.874 - */ 21.875 - if ( max_pfn > xen_start_info.nr_pages ) 21.876 + /* Initialise the balloon with excess memory space. */ 21.877 + for ( pfn = xen_start_info.nr_pages; pfn < max_pfn; pfn++ ) 21.878 { 21.879 - extern unsigned long *phys_to_machine_mapping; 21.880 - unsigned long *newmap; 21.881 - newmap = (unsigned long *)vmalloc(max_pfn * sizeof(unsigned long)); 21.882 - memset(newmap, ~0, max_pfn * sizeof(unsigned long)); 21.883 - memcpy(newmap, phys_to_machine_mapping, 21.884 - xen_start_info.nr_pages * sizeof(unsigned long)); 21.885 - phys_to_machine_mapping = newmap; 21.886 + page = &mem_map[pfn]; 21.887 + if ( !PageReserved(page) ) 21.888 + balloon_append(page); 21.889 } 21.890 21.891 return 0; 21.892 } 21.893 21.894 -static void __exit balloon_cleanup(void) 21.895 +subsys_initcall(balloon_init); 21.896 + 21.897 +void balloon_update_driver_allowance(long delta) 21.898 { 21.899 - if ( balloon_pde != NULL ) 21.900 - { 21.901 - remove_xen_proc_entry("memory_target"); 21.902 - balloon_pde = NULL; 21.903 - } 21.904 + unsigned long flags; 21.905 + balloon_lock(flags); 21.906 + driver_pages += delta; /* non-atomic update */ 21.907 + balloon_unlock(flags); 21.908 } 21.909 21.910 -module_init(balloon_init); 21.911 -module_exit(balloon_cleanup); 21.912 +void balloon_put_pages(unsigned long *mfn_list, unsigned long nr_mfns) 21.913 +{ 21.914 + unsigned long flags; 21.915 + 21.916 + balloon_lock(flags); 21.917 + if ( HYPERVISOR_dom_mem_op(MEMOP_decrease_reservation, 21.918 + mfn_list, nr_mfns, 0) != nr_mfns ) 21.919 + BUG(); 21.920 + current_pages -= nr_mfns; /* non-atomic update */ 21.921 + balloon_unlock(flags); 21.922 + 21.923 + schedule_work(&balloon_worker); 21.924 +} 21.925 + 21.926 +EXPORT_SYMBOL(balloon_update_driver_allowance); 21.927 +EXPORT_SYMBOL(balloon_put_pages);
22.1 --- a/linux-2.6.10-rc2-xen-sparse/drivers/xen/netback/interface.c Mon Dec 06 20:03:12 2004 +0000 22.2 +++ b/linux-2.6.10-rc2-xen-sparse/drivers/xen/netback/interface.c Mon Dec 06 20:37:17 2004 +0000 22.3 @@ -35,8 +35,8 @@ static void __netif_disconnect_complete( 22.4 22.5 /* 22.6 * These can't be done in netif_disconnect() because at that point there 22.7 - * may be outstanding requests at the disc whose asynchronous responses 22.8 - * must still be notified to the remote driver. 22.9 + * may be outstanding requests in the network stack whose asynchronous 22.10 + * responses must still be notified to the remote driver. 22.11 */ 22.12 unbind_evtchn_from_irq(netif->evtchn); 22.13 vfree(netif->tx); /* Frees netif->rx as well. */ 22.14 @@ -84,7 +84,7 @@ void netif_create(netif_be_create_t *cre 22.15 unsigned int handle = create->netif_handle; 22.16 struct net_device *dev; 22.17 netif_t **pnetif, *netif; 22.18 - char name[IFNAMSIZ] = {}; 22.19 + char name[IFNAMSIZ]; 22.20 22.21 snprintf(name, IFNAMSIZ - 1, "vif%u.%u", domid, handle); 22.22 dev = alloc_netdev(sizeof(netif_t), name, ether_setup); 22.23 @@ -116,7 +116,7 @@ void netif_create(netif_be_create_t *cre 22.24 { 22.25 DPRINTK("Could not create netif: already exists\n"); 22.26 create->status = NETIF_BE_STATUS_INTERFACE_EXISTS; 22.27 - kfree(dev); 22.28 + free_netdev(dev); 22.29 return; 22.30 } 22.31 pnetif = &(*pnetif)->hash_next; 22.32 @@ -137,7 +137,7 @@ void netif_create(netif_be_create_t *cre 22.33 DPRINTK("Could not register new net device %s: err=%d\n", 22.34 dev->name, err); 22.35 create->status = NETIF_BE_STATUS_OUT_OF_MEMORY; 22.36 - kfree(dev); 22.37 + free_netdev(dev); 22.38 return; 22.39 } 22.40 22.41 @@ -176,7 +176,7 @@ void netif_destroy(netif_be_destroy_t *d 22.42 destroy: 22.43 *pnetif = netif->hash_next; 22.44 unregister_netdev(netif->dev); 22.45 - kfree(netif->dev); 22.46 + free_netdev(netif->dev); 22.47 destroy->status = NETIF_BE_STATUS_OKAY; 22.48 } 22.49
23.1 --- a/linux-2.6.10-rc2-xen-sparse/drivers/xen/netback/netback.c Mon Dec 06 20:03:12 2004 +0000 23.2 +++ b/linux-2.6.10-rc2-xen-sparse/drivers/xen/netback/netback.c Mon Dec 06 20:37:17 2004 +0000 23.3 @@ -11,6 +11,7 @@ 23.4 */ 23.5 23.6 #include "common.h" 23.7 +#include <asm-xen/balloon.h> 23.8 23.9 static void netif_page_release(struct page *page); 23.10 static void netif_skb_release(struct sk_buff *skb); 23.11 @@ -29,6 +30,8 @@ static DECLARE_TASKLET(net_tx_tasklet, n 23.12 static void net_rx_action(unsigned long unused); 23.13 static DECLARE_TASKLET(net_rx_tasklet, net_rx_action, 0); 23.14 23.15 +static struct timer_list net_timer; 23.16 + 23.17 static struct sk_buff_head rx_queue; 23.18 static multicall_entry_t rx_mcl[NETIF_RX_RING_SIZE*2]; 23.19 static mmu_update_t rx_mmu[NETIF_RX_RING_SIZE*3]; 23.20 @@ -69,27 +72,20 @@ static unsigned long mfn_list[MAX_MFN_AL 23.21 static unsigned int alloc_index = 0; 23.22 static spinlock_t mfn_lock = SPIN_LOCK_UNLOCKED; 23.23 23.24 -static void __refresh_mfn_list(void) 23.25 +static unsigned long alloc_mfn(void) 23.26 { 23.27 - int ret = HYPERVISOR_dom_mem_op(MEMOP_increase_reservation, 23.28 - mfn_list, MAX_MFN_ALLOC, 0); 23.29 - if ( unlikely(ret != MAX_MFN_ALLOC) ) 23.30 - BUG(); 23.31 - alloc_index = MAX_MFN_ALLOC; 23.32 -} 23.33 - 23.34 -static unsigned long get_new_mfn(void) 23.35 -{ 23.36 - unsigned long mfn, flags; 23.37 + unsigned long mfn = 0, flags; 23.38 spin_lock_irqsave(&mfn_lock, flags); 23.39 - if ( alloc_index == 0 ) 23.40 - __refresh_mfn_list(); 23.41 - mfn = mfn_list[--alloc_index]; 23.42 + if ( unlikely(alloc_index == 0) ) 23.43 + alloc_index = HYPERVISOR_dom_mem_op( 23.44 + MEMOP_increase_reservation, mfn_list, MAX_MFN_ALLOC, 0); 23.45 + if ( alloc_index != 0 ) 23.46 + mfn = mfn_list[--alloc_index]; 23.47 spin_unlock_irqrestore(&mfn_lock, flags); 23.48 return mfn; 23.49 } 23.50 23.51 -static void dealloc_mfn(unsigned long mfn) 23.52 +static void free_mfn(unsigned long mfn) 23.53 { 23.54 unsigned long flags; 23.55 spin_lock_irqsave(&mfn_lock, flags); 23.56 @@ -210,8 +206,16 @@ static void net_rx_action(unsigned long 23.57 netif = (netif_t *)skb->dev->priv; 23.58 vdata = (unsigned long)skb->data; 23.59 mdata = virt_to_machine(vdata); 23.60 - new_mfn = get_new_mfn(); 23.61 - 23.62 + 23.63 + /* Memory squeeze? Back off for an arbitrary while. */ 23.64 + if ( (new_mfn = alloc_mfn()) == 0 ) 23.65 + { 23.66 + if ( net_ratelimit() ) 23.67 + printk(KERN_WARNING "Memory squeeze in netback driver.\n"); 23.68 + mod_timer(&net_timer, jiffies + HZ); 23.69 + break; 23.70 + } 23.71 + 23.72 /* 23.73 * Set the new P2M table entry before reassigning the old data page. 23.74 * Heed the comment in pgtable-2level.h:pte_page(). :-) 23.75 @@ -280,7 +284,7 @@ static void net_rx_action(unsigned long 23.76 if ( unlikely(mcl[1].args[5] != 0) ) 23.77 { 23.78 DPRINTK("Failed MMU update transferring to DOM%u\n", netif->domid); 23.79 - dealloc_mfn(mdata >> PAGE_SHIFT); 23.80 + free_mfn(mdata >> PAGE_SHIFT); 23.81 status = NETIF_RSP_ERROR; 23.82 } 23.83 23.84 @@ -307,7 +311,7 @@ static void net_rx_action(unsigned long 23.85 } 23.86 23.87 /* More work to do? */ 23.88 - if ( !skb_queue_empty(&rx_queue) ) 23.89 + if ( !skb_queue_empty(&rx_queue) && !timer_pending(&net_timer) ) 23.90 tasklet_schedule(&net_rx_tasklet); 23.91 #if 0 23.92 else 23.93 @@ -315,6 +319,11 @@ static void net_rx_action(unsigned long 23.94 #endif 23.95 } 23.96 23.97 +static void net_alarm(unsigned long unused) 23.98 +{ 23.99 + tasklet_schedule(&net_rx_tasklet); 23.100 +} 23.101 + 23.102 struct net_device_stats *netif_be_get_stats(struct net_device *dev) 23.103 { 23.104 netif_t *netif = dev->priv; 23.105 @@ -781,9 +790,16 @@ static int __init netback_init(void) 23.106 23.107 printk("Initialising Xen netif backend\n"); 23.108 23.109 + /* We can increase reservation by this much in net_rx_action(). */ 23.110 + balloon_update_driver_allowance(NETIF_RX_RING_SIZE); 23.111 + 23.112 skb_queue_head_init(&rx_queue); 23.113 skb_queue_head_init(&tx_queue); 23.114 23.115 + init_timer(&net_timer); 23.116 + net_timer.data = 0; 23.117 + net_timer.function = net_alarm; 23.118 + 23.119 netif_interface_init(); 23.120 23.121 if ( (mmap_vstart = allocate_empty_lowmem_region(MAX_PENDING_REQS)) == 0 )
24.1 --- a/linux-2.6.10-rc2-xen-sparse/drivers/xen/netfront/netfront.c Mon Dec 06 20:03:12 2004 +0000 24.2 +++ b/linux-2.6.10-rc2-xen-sparse/drivers/xen/netfront/netfront.c Mon Dec 06 20:37:17 2004 +0000 24.3 @@ -45,6 +45,7 @@ 24.4 #include <asm-xen/evtchn.h> 24.5 #include <asm-xen/ctrl_if.h> 24.6 #include <asm-xen/xen-public/io/netif.h> 24.7 +#include <asm-xen/balloon.h> 24.8 #include <asm/page.h> 24.9 24.10 #include <net/arp.h> 24.11 @@ -409,6 +410,9 @@ static void network_alloc_rx_buffers(str 24.12 rx_mcl[i].args[3] = 0; 24.13 rx_mcl[i].args[4] = DOMID_SELF; 24.14 24.15 + /* Tell the ballon driver what is going on. */ 24.16 + balloon_update_driver_allowance(i); 24.17 + 24.18 /* Zap PTEs and give away pages in one big multicall. */ 24.19 (void)HYPERVISOR_multicall(rx_mcl, i+1); 24.20 24.21 @@ -557,14 +561,15 @@ static int netif_poll(struct net_device 24.22 /* 24.23 * An error here is very odd. Usually indicates a backend bug, 24.24 * low-memory condition, or that we didn't have reservation headroom. 24.25 - * Whatever - print an error and queue the id again straight away. 24.26 */ 24.27 if ( unlikely(rx->status <= 0) ) 24.28 { 24.29 - printk(KERN_ALERT "bad buffer on RX ring!(%d)\n", rx->status); 24.30 + if ( net_ratelimit() ) 24.31 + printk(KERN_WARNING "Bad rx buffer (memory squeeze?).\n"); 24.32 np->rx->ring[MASK_NETIF_RX_IDX(np->rx->req_prod)].req.id = rx->id; 24.33 wmb(); 24.34 np->rx->req_prod++; 24.35 + work_done--; 24.36 continue; 24.37 } 24.38 24.39 @@ -595,6 +600,9 @@ static int netif_poll(struct net_device 24.40 __skb_queue_tail(&rxq, skb); 24.41 } 24.42 24.43 + /* Some pages are no longer absent... */ 24.44 + balloon_update_driver_allowance(-work_done); 24.45 + 24.46 /* Do all the remapping work, and M->P updates, in one big hypercall. */ 24.47 if ( likely((mcl - rx_mcl) != 0) ) 24.48 {
25.1 --- a/linux-2.6.10-rc2-xen-sparse/drivers/xen/privcmd/privcmd.c Mon Dec 06 20:03:12 2004 +0000 25.2 +++ b/linux-2.6.10-rc2-xen-sparse/drivers/xen/privcmd/privcmd.c Mon Dec 06 20:37:17 2004 +0000 25.3 @@ -7,7 +7,6 @@ 25.4 */ 25.5 25.6 #include <linux/config.h> 25.7 -#include <linux/module.h> 25.8 #include <linux/kernel.h> 25.9 #include <linux/sched.h> 25.10 #include <linux/slab.h> 25.11 @@ -213,23 +212,9 @@ static int __init privcmd_init(void) 25.12 25.13 privcmd_intf = create_xen_proc_entry("privcmd", 0400); 25.14 if ( privcmd_intf != NULL ) 25.15 - { 25.16 - privcmd_intf->owner = THIS_MODULE; 25.17 - privcmd_intf->nlink = 1; 25.18 - privcmd_intf->proc_fops = &privcmd_file_ops; 25.19 - } 25.20 + privcmd_intf->proc_fops = &privcmd_file_ops; 25.21 25.22 return 0; 25.23 } 25.24 25.25 - 25.26 -static void __exit privcmd_cleanup(void) 25.27 -{ 25.28 - if ( privcmd_intf == NULL ) return; 25.29 - remove_xen_proc_entry("privcmd"); 25.30 - privcmd_intf = NULL; 25.31 -} 25.32 - 25.33 - 25.34 -module_init(privcmd_init); 25.35 -module_exit(privcmd_cleanup); 25.36 +__initcall(privcmd_init);
26.1 --- a/linux-2.6.10-rc2-xen-sparse/include/asm-xen/asm-i386/io.h Mon Dec 06 20:03:12 2004 +0000 26.2 +++ b/linux-2.6.10-rc2-xen-sparse/include/asm-xen/asm-i386/io.h Mon Dec 06 20:37:17 2004 +0000 26.3 @@ -449,4 +449,7 @@ BUILDIO(b,b,char) 26.4 BUILDIO(w,w,short) 26.5 BUILDIO(l,,int) 26.6 26.7 +/* We will be supplying our own /dev/mem implementation */ 26.8 +#define ARCH_HAS_DEV_MEM 26.9 + 26.10 #endif
28.1 --- a/linux-2.6.10-rc2-xen-sparse/include/asm-xen/asm-i386/pgtable.h Mon Dec 06 20:03:12 2004 +0000 28.2 +++ b/linux-2.6.10-rc2-xen-sparse/include/asm-xen/asm-i386/pgtable.h Mon Dec 06 20:37:17 2004 +0000 28.3 @@ -424,14 +424,13 @@ extern pte_t *lookup_address(unsigned lo 28.4 #define ptep_set_access_flags(__vma, __address, __ptep, __entry, __dirty) \ 28.5 do { \ 28.6 if (__dirty) { \ 28.7 - if (likely(vma->vm_mm == current->mm)) { \ 28.8 - xen_flush_page_update_queue(); \ 28.9 - HYPERVISOR_update_va_mapping(address>>PAGE_SHIFT, \ 28.10 - entry, UVMF_INVLPG); \ 28.11 - } else { \ 28.12 - xen_l1_entry_update((__ptep), (__entry).pte_low); \ 28.13 - flush_tlb_page(__vma, __address); \ 28.14 - } \ 28.15 + if ( likely((__vma)->vm_mm == current->mm) ) { \ 28.16 + xen_flush_page_update_queue(); \ 28.17 + HYPERVISOR_update_va_mapping((__address)>>PAGE_SHIFT, (__entry), UVMF_INVLPG); \ 28.18 + } else { \ 28.19 + xen_l1_entry_update((__ptep), (__entry).pte_low); \ 28.20 + flush_tlb_page((__vma), (__address)); \ 28.21 + } \ 28.22 } \ 28.23 } while (0) 28.24 28.25 @@ -461,14 +460,14 @@ void make_page_writable(void *va); 28.26 void make_pages_readonly(void *va, unsigned int nr); 28.27 void make_pages_writable(void *va, unsigned int nr); 28.28 28.29 -static inline unsigned long arbitrary_virt_to_machine(void *va) 28.30 -{ 28.31 - pgd_t *pgd = pgd_offset_k((unsigned long)va); 28.32 - pmd_t *pmd = pmd_offset(pgd, (unsigned long)va); 28.33 - pte_t *pte = pte_offset_kernel(pmd, (unsigned long)va); 28.34 - unsigned long pa = (*(unsigned long *)pte) & PAGE_MASK; 28.35 - return pa | ((unsigned long)va & (PAGE_SIZE-1)); 28.36 -} 28.37 +#define arbitrary_virt_to_machine(__va) \ 28.38 +({ \ 28.39 + pgd_t *__pgd = pgd_offset_k((unsigned long)(__va)); \ 28.40 + pmd_t *__pmd = pmd_offset(__pgd, (unsigned long)(__va)); \ 28.41 + pte_t *__pte = pte_offset_kernel(__pmd, (unsigned long)(__va)); \ 28.42 + unsigned long __pa = (*(unsigned long *)__pte) & PAGE_MASK; \ 28.43 + __pa | ((unsigned long)(__va) & (PAGE_SIZE-1)); \ 28.44 +}) 28.45 28.46 #endif /* !__ASSEMBLY__ */ 28.47 28.48 @@ -476,6 +475,17 @@ static inline unsigned long arbitrary_vi 28.49 #define kern_addr_valid(addr) (1) 28.50 #endif /* !CONFIG_DISCONTIGMEM */ 28.51 28.52 +int direct_remap_area_pages(struct mm_struct *mm, 28.53 + unsigned long address, 28.54 + unsigned long machine_addr, 28.55 + unsigned long size, 28.56 + pgprot_t prot, 28.57 + domid_t domid); 28.58 +int __direct_remap_area_pages(struct mm_struct *mm, 28.59 + unsigned long address, 28.60 + unsigned long size, 28.61 + mmu_update_t *v); 28.62 + 28.63 #define io_remap_page_range(vma,from,phys,size,prot) \ 28.64 direct_remap_area_pages(vma->vm_mm,from,phys,size,prot,DOMID_IO) 28.65
29.1 --- a/linux-2.6.10-rc2-xen-sparse/include/asm-xen/hypervisor.h Mon Dec 06 20:03:12 2004 +0000 29.2 +++ b/linux-2.6.10-rc2-xen-sparse/include/asm-xen/hypervisor.h Mon Dec 06 20:37:17 2004 +0000 29.3 @@ -54,13 +54,6 @@ void xen_cpu_idle (void); 29.4 /* arch/xen/i386/kernel/hypervisor.c */ 29.5 void do_hypervisor_callback(struct pt_regs *regs); 29.6 29.7 -/* arch/xen/i386/mm/init.c */ 29.8 -/* NOTE: caller must call flush_page_update_queue() */ 29.9 -#define PROT_ON 1 29.10 -#define PROT_OFF 0 29.11 -void /* __init */ protect_page(pgd_t *dpgd, void *page, int mode); 29.12 -void /* __init */ protect_pagetable(pgd_t *dpgd, pgd_t *spgd, int mode); 29.13 - 29.14 /* arch/xen/i386/kernel/head.S */ 29.15 void lgdt_finish(void); 29.16 29.17 @@ -109,8 +102,6 @@ void MULTICALL_flush_page_update_queue(v 29.18 #ifdef CONFIG_XEN_PHYSDEV_ACCESS 29.19 /* Allocate a contiguous empty region of low memory. Return virtual start. */ 29.20 unsigned long allocate_empty_lowmem_region(unsigned long pages); 29.21 -/* Deallocate a contiguous region of low memory. Return it to the allocator. */ 29.22 -void deallocate_lowmem_region(unsigned long vstart, unsigned long pages); 29.23 #endif 29.24 29.25 /*
30.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 30.2 +++ b/linux-2.6.9-xen-sparse/arch/xen/kernel/devmem.c Mon Dec 06 20:37:17 2004 +0000 30.3 @@ -0,0 +1,158 @@ 30.4 +/* 30.5 + * Originally from linux/drivers/char/mem.c 30.6 + * 30.7 + * Copyright (C) 1991, 1992 Linus Torvalds 30.8 + * 30.9 + * Added devfs support. 30.10 + * Jan-11-1998, C. Scott Ananian <cananian@alumni.princeton.edu> 30.11 + * Shared /dev/zero mmaping support, Feb 2000, Kanoj Sarcar <kanoj@sgi.com> 30.12 + */ 30.13 + 30.14 +#include <linux/config.h> 30.15 +#include <linux/mm.h> 30.16 +#include <linux/miscdevice.h> 30.17 +#include <linux/slab.h> 30.18 +#include <linux/vmalloc.h> 30.19 +#include <linux/mman.h> 30.20 +#include <linux/random.h> 30.21 +#include <linux/init.h> 30.22 +#include <linux/raw.h> 30.23 +#include <linux/tty.h> 30.24 +#include <linux/capability.h> 30.25 +#include <linux/smp_lock.h> 30.26 +#include <linux/devfs_fs_kernel.h> 30.27 +#include <linux/ptrace.h> 30.28 +#include <linux/device.h> 30.29 +#include <asm/pgalloc.h> 30.30 +#include <asm/uaccess.h> 30.31 +#include <asm/io.h> 30.32 + 30.33 +static inline int uncached_access(struct file *file, unsigned long addr) 30.34 +{ 30.35 + if (file->f_flags & O_SYNC) 30.36 + return 1; 30.37 + /* Xen sets correct MTRR type on non-RAM for us. */ 30.38 + return 0; 30.39 +} 30.40 + 30.41 +/* 30.42 + * This funcion reads the *physical* memory. The f_pos points directly to the 30.43 + * memory location. 30.44 + */ 30.45 +static ssize_t read_mem(struct file * file, char __user * buf, 30.46 + size_t count, loff_t *ppos) 30.47 +{ 30.48 + unsigned long i, p = *ppos; 30.49 + ssize_t read = 0; 30.50 + void *v; 30.51 + 30.52 + if ((v = ioremap(p, count)) == NULL) { 30.53 + /* 30.54 + * Some programs (e.g., dmidecode) groove off into weird RAM 30.55 + * areas where no table scan possibly exist (because Xen will 30.56 + * have stomped on them!). These programs get rather upset if 30.57 + * we let them know that Xen failed their access, so we fake 30.58 + * out a read of all zeroes. :-) 30.59 + */ 30.60 + for (i = 0; i < count; i++) 30.61 + if (put_user(0, buf+i)) 30.62 + return -EFAULT; 30.63 + return count; 30.64 + } 30.65 + if (copy_to_user(buf, v, count)) 30.66 + return -EFAULT; 30.67 + iounmap(v); 30.68 + 30.69 + read += count; 30.70 + *ppos += read; 30.71 + 30.72 + return read; 30.73 +} 30.74 + 30.75 +static ssize_t write_mem(struct file * file, const char __user * buf, 30.76 + size_t count, loff_t *ppos) 30.77 +{ 30.78 + unsigned long p = *ppos; 30.79 + ssize_t written = 0; 30.80 + void *v; 30.81 + 30.82 + if ((v = ioremap(p, count)) == NULL) 30.83 + return -EFAULT; 30.84 + if (copy_to_user(v, buf, count)) 30.85 + return -EFAULT; 30.86 + iounmap(v); 30.87 + 30.88 + written += count; 30.89 + *ppos += written; 30.90 + 30.91 + return written; 30.92 +} 30.93 + 30.94 +static int mmap_mem(struct file * file, struct vm_area_struct * vma) 30.95 +{ 30.96 + unsigned long offset = vma->vm_pgoff << PAGE_SHIFT; 30.97 + int uncached; 30.98 + 30.99 + uncached = uncached_access(file, offset); 30.100 + if (uncached) 30.101 + vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); 30.102 + 30.103 + /* Don't try to swap out physical pages.. */ 30.104 + vma->vm_flags |= VM_RESERVED; 30.105 + 30.106 + /* 30.107 + * Don't dump addresses that are not real memory to a core file. 30.108 + */ 30.109 + if (uncached) 30.110 + vma->vm_flags |= VM_IO; 30.111 + 30.112 + if (io_remap_page_range(vma, vma->vm_start, offset, 30.113 + vma->vm_end-vma->vm_start, vma->vm_page_prot)) 30.114 + return -EAGAIN; 30.115 + 30.116 + return 0; 30.117 +} 30.118 + 30.119 +/* 30.120 + * The memory devices use the full 32/64 bits of the offset, and so we cannot 30.121 + * check against negative addresses: they are ok. The return value is weird, 30.122 + * though, in that case (0). 30.123 + * 30.124 + * also note that seeking relative to the "end of file" isn't supported: 30.125 + * it has no meaning, so it returns -EINVAL. 30.126 + */ 30.127 +static loff_t memory_lseek(struct file * file, loff_t offset, int orig) 30.128 +{ 30.129 + loff_t ret; 30.130 + 30.131 + down(&file->f_dentry->d_inode->i_sem); 30.132 + switch (orig) { 30.133 + case 0: 30.134 + file->f_pos = offset; 30.135 + ret = file->f_pos; 30.136 + force_successful_syscall_return(); 30.137 + break; 30.138 + case 1: 30.139 + file->f_pos += offset; 30.140 + ret = file->f_pos; 30.141 + force_successful_syscall_return(); 30.142 + break; 30.143 + default: 30.144 + ret = -EINVAL; 30.145 + } 30.146 + up(&file->f_dentry->d_inode->i_sem); 30.147 + return ret; 30.148 +} 30.149 + 30.150 +static int open_mem(struct inode * inode, struct file * filp) 30.151 +{ 30.152 + return capable(CAP_SYS_RAWIO) ? 0 : -EPERM; 30.153 +} 30.154 + 30.155 +struct file_operations mem_fops = { 30.156 + .llseek = memory_lseek, 30.157 + .read = read_mem, 30.158 + .write = write_mem, 30.159 + .mmap = mmap_mem, 30.160 + .open = open_mem, 30.161 +};
31.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 31.2 +++ b/linux-2.6.9-xen-sparse/include/asm-xen/balloon.h Mon Dec 06 20:37:17 2004 +0000 31.3 @@ -0,0 +1,51 @@ 31.4 +/****************************************************************************** 31.5 + * balloon.h 31.6 + * 31.7 + * Xen balloon driver - enables returning/claiming memory to/from Xen. 31.8 + * 31.9 + * Copyright (c) 2003, B Dragovic 31.10 + * Copyright (c) 2003-2004, M Williamson, K Fraser 31.11 + * 31.12 + * This file may be distributed separately from the Linux kernel, or 31.13 + * incorporated into other software packages, subject to the following license: 31.14 + * 31.15 + * Permission is hereby granted, free of charge, to any person obtaining a copy 31.16 + * of this source file (the "Software"), to deal in the Software without 31.17 + * restriction, including without limitation the rights to use, copy, modify, 31.18 + * merge, publish, distribute, sublicense, and/or sell copies of the Software, 31.19 + * and to permit persons to whom the Software is furnished to do so, subject to 31.20 + * the following conditions: 31.21 + * 31.22 + * The above copyright notice and this permission notice shall be included in 31.23 + * all copies or substantial portions of the Software. 31.24 + * 31.25 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 31.26 + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 31.27 + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 31.28 + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 31.29 + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 31.30 + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 31.31 + * IN THE SOFTWARE. 31.32 + */ 31.33 + 31.34 +#ifndef __ASM_BALLOON_H__ 31.35 +#define __ASM_BALLOON_H__ 31.36 + 31.37 +/* 31.38 + * Inform the balloon driver that it should allow some slop for device-driver 31.39 + * memory activities. 31.40 + */ 31.41 +extern void balloon_update_driver_allowance(long delta); 31.42 + 31.43 +/* Give up unmapped pages to the balloon driver. */ 31.44 +extern void balloon_put_pages(unsigned long *mfn_list, unsigned long nr_mfns); 31.45 + 31.46 +/* 31.47 + * Prevent the balloon driver from changing the memory reservation during 31.48 + * a driver critical region. 31.49 + */ 31.50 +extern spinlock_t balloon_lock; 31.51 +#define balloon_lock(__flags) spin_lock_irqsave(&balloon_lock, __flags) 31.52 +#define balloon_unlock(__flags) spin_unlock_irqrestore(&balloon_lock, __flags) 31.53 + 31.54 +#endif /* __ASM_BALLOON_H__ */
32.1 --- a/tools/libxc/Makefile Mon Dec 06 20:03:12 2004 +0000 32.2 +++ b/tools/libxc/Makefile Mon Dec 06 20:37:17 2004 +0000 32.3 @@ -1,5 +1,5 @@ 32.4 32.5 -MAJOR = 1.3 32.6 +MAJOR = 2.0 32.7 MINOR = 0 32.8 SONAME = libxc.so.$(MAJOR) 32.9
33.1 --- a/tools/libxc/xc.h Mon Dec 06 20:03:12 2004 +0000 33.2 +++ b/tools/libxc/xc.h Mon Dec 06 20:37:17 2004 +0000 33.3 @@ -178,14 +178,19 @@ int xc_domain_setinitialmem(int xc_handl 33.4 unsigned int initial_memkb); 33.5 33.6 int xc_domain_setmaxmem(int xc_handle, 33.7 - u32 domid, 33.8 - unsigned int max_memkb); 33.9 + u32 domid, 33.10 + unsigned int max_memkb); 33.11 33.12 int xc_domain_setvmassist(int xc_handle, 33.13 u32 domid, 33.14 unsigned int cmd, 33.15 unsigned int type); 33.16 33.17 +typedef dom0_perfc_desc_t xc_perfc_desc_t; 33.18 +/* IMPORTANT: The caller is responsible for mlock()'ing the @desc array. */ 33.19 +int xc_perfc_control(int xc_handle, 33.20 + u32 op, 33.21 + xc_perfc_desc_t *desc); 33.22 33.23 void *xc_map_foreign_range(int xc_handle, u32 dom, 33.24 int size, int prot,
34.1 --- a/tools/libxc/xc_misc.c Mon Dec 06 20:03:12 2004 +0000 34.2 +++ b/tools/libxc/xc_misc.c Mon Dec 06 20:37:17 2004 +0000 34.3 @@ -74,10 +74,26 @@ int xc_sched_id(int xc_handle, 34.4 op.cmd = DOM0_SCHED_ID; 34.5 op.interface_version = DOM0_INTERFACE_VERSION; 34.6 34.7 - if((ret = do_dom0_op(xc_handle, &op))) return ret; 34.8 + if ( (ret = do_dom0_op(xc_handle, &op)) != 0 ) 34.9 + return ret; 34.10 34.11 *sched_id = op.u.sched_id.sched_id; 34.12 34.13 return 0; 34.14 } 34.15 34.16 +int xc_perfc_control(int xc_handle, 34.17 + u32 op, 34.18 + xc_perfc_desc_t *desc) 34.19 +{ 34.20 + int rc; 34.21 + dom0_op_t dop; 34.22 + 34.23 + dop.cmd = DOM0_PERFCCONTROL; 34.24 + dop.u.perfccontrol.op = op; 34.25 + dop.u.perfccontrol.desc = desc; 34.26 + 34.27 + rc = do_dom0_op(xc_handle, &dop); 34.28 + 34.29 + return (rc == 0) ? dop.u.perfccontrol.nr_counters : rc; 34.30 +}
35.1 --- a/tools/libxutil/Makefile Mon Dec 06 20:03:12 2004 +0000 35.2 +++ b/tools/libxutil/Makefile Mon Dec 06 20:37:17 2004 +0000 35.3 @@ -30,7 +30,7 @@ CFLAGS += -fno-strict-aliasing 35.4 CFLAGS += -Wp,-MD,.$(@F).d 35.5 DEPS = .*.d 35.6 35.7 -MAJOR := 1.3 35.8 +MAJOR := 2.0 35.9 MINOR := 0 35.10 LIB_NAME := libxutil 35.11 LIB := $(LIB_NAME).so
36.1 --- a/tools/misc/Makefile Mon Dec 06 20:03:12 2004 +0000 36.2 +++ b/tools/misc/Makefile Mon Dec 06 20:37:17 2004 +0000 36.3 @@ -3,22 +3,18 @@ XEN_ROOT=../.. 36.4 include $(XEN_ROOT)/tools/Make.defs 36.5 36.6 CC = gcc 36.7 -CFLAGS = -Wall -O3 36.8 +CFLAGS = -Wall -Werror -O3 36.9 36.10 INCLUDES += -I $(XEN_XC) 36.11 INCLUDES += -I $(XEN_LIBXC) 36.12 -INCLUDES += -I $(XEN_LIBXUTIL) 36.13 - 36.14 -CFLAGS += $(INCLUDES) 36.15 +CFLAGS += $(INCLUDES) 36.16 36.17 HDRS = $(wildcard *.h) 36.18 -SRCS = $(wildcard *.c) 36.19 -OBJS = $(patsubst %.c,%.o,$(SRCS)) 36.20 36.21 -TARGETS = 36.22 +TARGETS = xenperf 36.23 36.24 INSTALL_BIN = $(TARGETS) xencons 36.25 -INSTALL_SBIN = netfix xm xend xensv 36.26 +INSTALL_SBIN = netfix xm xend xensv xenperf 36.27 36.28 all: $(TARGETS) 36.29 $(MAKE) -C miniterm 36.30 @@ -32,7 +28,7 @@ install: all 36.31 36.32 clean: 36.33 $(RM) *.o $(TARGETS) *~ 36.34 - $(MAKE) -C miniterm clean 36.35 + $(MAKE) -C miniterm clean 36.36 36.37 %: %.c $(HDRS) Makefile 36.38 - $(CC) $(CFLAGS) -o $@ $< 36.39 + $(CC) $(CFLAGS) -o $@ $< -L$(XEN_LIBXC) -lxc -L$(XEN_LIBXUTIL) -lxutil
37.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 37.2 +++ b/tools/misc/xenperf.c Mon Dec 06 20:37:17 2004 +0000 37.3 @@ -0,0 +1,104 @@ 37.4 +/* -*- Mode:C; c-basic-offset:4; tab-width:4 -*- 37.5 + **************************************************************************** 37.6 + * (C) 2004 - Rolf Neugebauer - Intel Research Cambridge 37.7 + **************************************************************************** 37.8 + * 37.9 + * File: xenperf.c 37.10 + * Author: Rolf Neugebauer (rolf.neugebauer@intel.com) 37.11 + * Date: Nov 2004 37.12 + * 37.13 + * Description: 37.14 + */ 37.15 + 37.16 + 37.17 +#include <xc.h> 37.18 +#include <stdio.h> 37.19 +#include <stdlib.h> 37.20 +#include <sys/mman.h> 37.21 +#include <errno.h> 37.22 +#include <string.h> 37.23 + 37.24 +int main(int argc, char *argv[]) 37.25 +{ 37.26 + int i, j, xc_handle; 37.27 + xc_perfc_desc_t *pcd; 37.28 + unsigned int num, sum, reset = 0; 37.29 + 37.30 + if ( argc > 1 ) 37.31 + { 37.32 + char *p = argv[1]; 37.33 + if ( (*p++ == '-') && (*p == 'r') ) 37.34 + reset = 1; 37.35 + else 37.36 + { 37.37 + printf("%s: [-r]\n", argv[0]); 37.38 + printf("no args: print xen performance counters\n"); 37.39 + printf(" -r : reset xen performance counters\n"); 37.40 + return 0; 37.41 + } 37.42 + } 37.43 + 37.44 + if ( (xc_handle = xc_interface_open()) == -1 ) 37.45 + { 37.46 + fprintf(stderr, "Error opening xc interface: %d (%s)\n", 37.47 + errno, strerror(errno)); 37.48 + return 1; 37.49 + } 37.50 + 37.51 + if ( reset ) 37.52 + { 37.53 + if ( xc_perfc_control(xc_handle, DOM0_PERFCCONTROL_OP_RESET, 37.54 + NULL) < 0 ) 37.55 + { 37.56 + fprintf(stderr, "Error reseting performance counters: %d (%s)\n", 37.57 + errno, strerror(errno)); 37.58 + return 1; 37.59 + } 37.60 + 37.61 + return 0; 37.62 + } 37.63 + 37.64 + 37.65 + if ( (num = xc_perfc_control(xc_handle, DOM0_PERFCCONTROL_OP_QUERY, 37.66 + NULL)) < 0 ) 37.67 + { 37.68 + fprintf(stderr, "Error getting number of perf counters: %d (%s)\n", 37.69 + errno, strerror(errno)); 37.70 + return 1; 37.71 + } 37.72 + 37.73 + pcd = malloc(sizeof(*pcd) * num); 37.74 + 37.75 + if ( mlock(pcd, sizeof(*pcd) * num) != 0 ) 37.76 + { 37.77 + fprintf(stderr, "Could not mlock descriptor buffer: %d (%s)\n", 37.78 + errno, strerror(errno)); 37.79 + exit(-1); 37.80 + } 37.81 + 37.82 + if ( xc_perfc_control(xc_handle, DOM0_PERFCCONTROL_OP_QUERY, pcd) <= 0 ) 37.83 + { 37.84 + fprintf(stderr, "Error getting perf counter description: %d (%s)\n", 37.85 + errno, strerror(errno)); 37.86 + return 1; 37.87 + } 37.88 + 37.89 + munlock(pcd, sizeof(*pcd) * num); 37.90 + 37.91 + for ( i = 0; i < num; i++ ) 37.92 + { 37.93 + printf ("%-35s ", pcd[i].name); 37.94 + 37.95 + sum = 0; 37.96 + for ( j = 0; j < pcd[i].nr_vals; j++ ) 37.97 + sum += pcd[i].vals[j]; 37.98 + printf ("T=%10u ", (unsigned int)sum); 37.99 + 37.100 + for ( j = 0; j < pcd[i].nr_vals; j++ ) 37.101 + printf(" %10u", (unsigned int)pcd[i].vals[j]); 37.102 + 37.103 + printf("\n"); 37.104 + } 37.105 + 37.106 + return 0; 37.107 +}
38.1 --- a/xen/arch/x86/memory.c Mon Dec 06 20:03:12 2004 +0000 38.2 +++ b/xen/arch/x86/memory.c Mon Dec 06 20:37:17 2004 +0000 38.3 @@ -1299,9 +1299,6 @@ int do_mmu_update( 38.4 u32 type_info; 38.5 domid_t domid; 38.6 38.7 - perfc_incrc(calls_to_mmu_update); 38.8 - perfc_addc(num_page_updates, count); 38.9 - 38.10 cleanup_writable_pagetable(d, PTWR_CLEANUP_ACTIVE | PTWR_CLEANUP_INACTIVE); 38.11 38.12 /* 38.13 @@ -1331,6 +1328,9 @@ int do_mmu_update( 38.14 } 38.15 } 38.16 38.17 + perfc_incrc(calls_to_mmu_update); 38.18 + perfc_addc(num_page_updates, count); 38.19 + 38.20 if ( unlikely(!array_access_ok(VERIFY_READ, ureqs, count, sizeof(req))) ) 38.21 { 38.22 rc = -EFAULT;
39.1 --- a/xen/arch/x86/x86_32/entry.S Mon Dec 06 20:03:12 2004 +0000 39.2 +++ b/xen/arch/x86/x86_32/entry.S Mon Dec 06 20:37:17 2004 +0000 39.3 @@ -341,6 +341,7 @@ process_guest_exception_and_events: 39.4 leal DOMAIN_trap_bounce(%ebx),%edx 39.5 testb $TBF_EXCEPTION,TRAPBOUNCE_flags(%edx) 39.6 jz test_all_events 39.7 + cli # create_bounce_frame needs CLI for pre-exceptions to work 39.8 call create_bounce_frame 39.9 jmp test_all_events 39.10
40.1 --- a/xen/common/dom0_ops.c Mon Dec 06 20:03:12 2004 +0000 40.2 +++ b/xen/common/dom0_ops.c Mon Dec 06 20:37:17 2004 +0000 40.3 @@ -651,6 +651,16 @@ long do_dom0_op(dom0_op_t *u_dom0_op) 40.4 } 40.5 break; 40.6 40.7 +#ifdef PERF_COUNTERS 40.8 + case DOM0_PERFCCONTROL: 40.9 + { 40.10 + extern int perfc_control(dom0_perfccontrol_t *); 40.11 + ret = perfc_control(&op->u.perfccontrol); 40.12 + copy_to_user(u_dom0_op, op, sizeof(*op)); 40.13 + } 40.14 + break; 40.15 +#endif 40.16 + 40.17 default: 40.18 ret = arch_do_dom0_op(op,u_dom0_op); 40.19
41.1 --- a/xen/common/perfc.c Mon Dec 06 20:03:12 2004 +0000 41.2 +++ b/xen/common/perfc.c Mon Dec 06 20:37:17 2004 +0000 41.3 @@ -4,6 +4,8 @@ 41.4 #include <xen/time.h> 41.5 #include <xen/perfc.h> 41.6 #include <xen/keyhandler.h> 41.7 +#include <public/dom0_ops.h> 41.8 +#include <asm/uaccess.h> 41.9 41.10 #undef PERFCOUNTER 41.11 #undef PERFCOUNTER_CPU 41.12 @@ -79,8 +81,9 @@ void perfc_reset(unsigned char key) 41.13 s_time_t now = NOW(); 41.14 atomic_t *counters = (atomic_t *)&perfcounters; 41.15 41.16 - printk("Xen performance counters RESET (now = 0x%08X:%08X)\n", 41.17 - (u32)(now>>32), (u32)now); 41.18 + if ( key != '\0' ) 41.19 + printk("Xen performance counters RESET (now = 0x%08X:%08X)\n", 41.20 + (u32)(now>>32), (u32)now); 41.21 41.22 /* leave STATUS counters alone -- don't reset */ 41.23 41.24 @@ -109,3 +112,107 @@ void perfc_reset(unsigned char key) 41.25 } 41.26 } 41.27 41.28 +static dom0_perfc_desc_t perfc_d[NR_PERFCTRS]; 41.29 +static int perfc_init = 0; 41.30 +static int perfc_copy_info(dom0_perfc_desc_t *desc) 41.31 +{ 41.32 + unsigned int i, j; 41.33 + atomic_t *counters = (atomic_t *)&perfcounters; 41.34 + 41.35 + if ( desc == NULL ) 41.36 + return 0; 41.37 + 41.38 + /* We only copy the name and array-size information once. */ 41.39 + if ( !perfc_init ) 41.40 + { 41.41 + for ( i = 0; i < NR_PERFCTRS; i++ ) 41.42 + { 41.43 + strncpy(perfc_d[i].name, perfc_info[i].name, 41.44 + sizeof(perfc_d[i].name)); 41.45 + perfc_d[i].name[sizeof(perfc_d[i].name)-1] = '\0'; 41.46 + 41.47 + switch ( perfc_info[i].type ) 41.48 + { 41.49 + case TYPE_SINGLE: 41.50 + case TYPE_S_SINGLE: 41.51 + perfc_d[i].nr_vals = 1; 41.52 + break; 41.53 + case TYPE_CPU: 41.54 + case TYPE_S_CPU: 41.55 + perfc_d[i].nr_vals = smp_num_cpus; 41.56 + break; 41.57 + case TYPE_ARRAY: 41.58 + case TYPE_S_ARRAY: 41.59 + perfc_d[i].nr_vals = perfc_info[i].nr_elements; 41.60 + break; 41.61 + } 41.62 + 41.63 + if ( perfc_d[i].nr_vals > ARRAY_SIZE(perfc_d[i].vals) ) 41.64 + perfc_d[i].nr_vals = ARRAY_SIZE(perfc_d[i].vals); 41.65 + } 41.66 + 41.67 + perfc_init = 1; 41.68 + } 41.69 + 41.70 + /* We gather the counts together every time. */ 41.71 + for ( i = 0; i < NR_PERFCTRS; i++ ) 41.72 + { 41.73 + switch ( perfc_info[i].type ) 41.74 + { 41.75 + case TYPE_SINGLE: 41.76 + case TYPE_S_SINGLE: 41.77 + perfc_d[i].vals[0] = atomic_read(&counters[0]); 41.78 + counters += 1; 41.79 + break; 41.80 + case TYPE_CPU: 41.81 + case TYPE_S_CPU: 41.82 + for ( j = 0; j < perfc_d[i].nr_vals; j++ ) 41.83 + perfc_d[i].vals[j] = atomic_read(&counters[j]); 41.84 + counters += NR_CPUS; 41.85 + break; 41.86 + case TYPE_ARRAY: 41.87 + case TYPE_S_ARRAY: 41.88 + for ( j = 0; j < perfc_d[i].nr_vals; j++ ) 41.89 + perfc_d[i].vals[j] = atomic_read(&counters[j]); 41.90 + counters += perfc_info[i].nr_elements; 41.91 + break; 41.92 + } 41.93 + } 41.94 + 41.95 + return (copy_to_user(desc, perfc_d, NR_PERFCTRS * sizeof(*desc)) ? 41.96 + -EFAULT : 0); 41.97 +} 41.98 + 41.99 +/* Dom0 control of perf counters */ 41.100 +int perfc_control(dom0_perfccontrol_t *pc) 41.101 +{ 41.102 + static spinlock_t lock = SPIN_LOCK_UNLOCKED; 41.103 + u32 op = pc->op; 41.104 + int rc; 41.105 + 41.106 + pc->nr_counters = NR_PERFCTRS; 41.107 + 41.108 + spin_lock(&lock); 41.109 + 41.110 + switch ( op ) 41.111 + { 41.112 + case DOM0_PERFCCONTROL_OP_RESET: 41.113 + perfc_copy_info(pc->desc); 41.114 + perfc_reset(0); 41.115 + rc = 0; 41.116 + break; 41.117 + 41.118 + case DOM0_PERFCCONTROL_OP_QUERY: 41.119 + perfc_copy_info(pc->desc); 41.120 + rc = 0; 41.121 + break; 41.122 + 41.123 + default: 41.124 + rc = -EINVAL; 41.125 + break; 41.126 + } 41.127 + 41.128 + spin_unlock(&lock); 41.129 + 41.130 + return rc; 41.131 +}
42.1 --- a/xen/include/public/dom0_ops.h Mon Dec 06 20:03:12 2004 +0000 42.2 +++ b/xen/include/public/dom0_ops.h Mon Dec 06 20:37:17 2004 +0000 42.3 @@ -386,6 +386,25 @@ typedef struct { 42.4 u32 __pad1; 42.5 } PACKED dom0_read_memtype_t; /* 32 bytes */ 42.6 42.7 +/* Interface for controlling Xen software performance counters. */ 42.8 +#define DOM0_PERFCCONTROL 34 42.9 +/* Sub-operations: */ 42.10 +#define DOM0_PERFCCONTROL_OP_RESET 1 /* Reset all counters to zero. */ 42.11 +#define DOM0_PERFCCONTROL_OP_QUERY 2 /* Get perfctr information. */ 42.12 +typedef struct { 42.13 + u8 name[80]; /* 0: name of perf counter */ 42.14 + u32 nr_vals; /* 80: number of values for this counter */ 42.15 + u32 vals[64]; /* 84: array of values */ 42.16 +} PACKED dom0_perfc_desc_t; /* 340 bytes */ 42.17 +typedef struct { 42.18 + /* IN variables. */ 42.19 + u32 op; /* 0: DOM0_PERFCCONTROL_OP_??? */ 42.20 + /* OUT variables. */ 42.21 + u32 nr_counters; /* 4: number of counters */ 42.22 + dom0_perfc_desc_t *desc; /* 8: counter information (or NULL) */ 42.23 + MEMORY_PADDING; 42.24 +} PACKED dom0_perfccontrol_t; /* 16 bytes */ 42.25 + 42.26 typedef struct { 42.27 u32 cmd; /* 0 */ 42.28 u32 interface_version; /* 4 */ /* DOM0_INTERFACE_VERSION */ 42.29 @@ -419,6 +438,7 @@ typedef struct { 42.30 dom0_add_memtype_t add_memtype; 42.31 dom0_del_memtype_t del_memtype; 42.32 dom0_read_memtype_t read_memtype; 42.33 + dom0_perfccontrol_t perfccontrol; 42.34 } PACKED u; 42.35 } PACKED dom0_op_t; /* 80 bytes */ 42.36
43.1 --- a/xen/include/public/xen.h Mon Dec 06 20:03:12 2004 +0000 43.2 +++ b/xen/include/public/xen.h Mon Dec 06 20:37:17 2004 +0000 43.3 @@ -9,8 +9,10 @@ 43.4 #ifndef __XEN_PUBLIC_XEN_H__ 43.5 #define __XEN_PUBLIC_XEN_H__ 43.6 43.7 +#ifndef PACKED 43.8 /* GCC-specific way to pack structure definitions (no implicit padding). */ 43.9 #define PACKED __attribute__ ((packed)) 43.10 +#endif 43.11 43.12 #if defined(__i386__) 43.13 #include "arch-x86_32.h"