ia64/xen-unstable

changeset 5730:22541999d8bb

upgrade linux sparse tree from 2.6.11 to 2.6.12

Signed-off-by: Vincent Hanquez <vincent@xensource.com>
author vh249@kneesaa.uk.xensource.com
date Mon Jul 11 09:35:19 2005 -0500 (2005-07-11)
parents 02282ff7cc8c
children 56a63f9f378f
files buildconfigs/mk.linux-2.6-xen0 buildconfigs/mk.linux-2.6-xenU linux-2.6-xen-sparse/arch/xen/Kconfig linux-2.6-xen-sparse/arch/xen/Kconfig.debug linux-2.6-xen-sparse/arch/xen/configs/xen0_defconfig_x86_32 linux-2.6-xen-sparse/arch/xen/configs/xen0_defconfig_x86_64 linux-2.6-xen-sparse/arch/xen/configs/xenU_defconfig_x86_32 linux-2.6-xen-sparse/arch/xen/configs/xenU_defconfig_x86_64 linux-2.6-xen-sparse/arch/xen/i386/Kconfig linux-2.6-xen-sparse/arch/xen/i386/Makefile linux-2.6-xen-sparse/arch/xen/i386/kernel/Makefile linux-2.6-xen-sparse/arch/xen/i386/kernel/acpi/boot.c linux-2.6-xen-sparse/arch/xen/i386/kernel/cpu/common.c linux-2.6-xen-sparse/arch/xen/i386/kernel/cpu/mtrr/main.c linux-2.6-xen-sparse/arch/xen/i386/kernel/entry.S linux-2.6-xen-sparse/arch/xen/i386/kernel/i386_ksyms.c linux-2.6-xen-sparse/arch/xen/i386/kernel/io_apic.c linux-2.6-xen-sparse/arch/xen/i386/kernel/irq.c linux-2.6-xen-sparse/arch/xen/i386/kernel/mpparse.c linux-2.6-xen-sparse/arch/xen/i386/kernel/pci-dma.c linux-2.6-xen-sparse/arch/xen/i386/kernel/process.c linux-2.6-xen-sparse/arch/xen/i386/kernel/setup.c linux-2.6-xen-sparse/arch/xen/i386/kernel/signal.c linux-2.6-xen-sparse/arch/xen/i386/kernel/smp.c linux-2.6-xen-sparse/arch/xen/i386/kernel/smpboot.c linux-2.6-xen-sparse/arch/xen/i386/kernel/time.c linux-2.6-xen-sparse/arch/xen/i386/kernel/traps.c linux-2.6-xen-sparse/arch/xen/i386/mm/highmem.c linux-2.6-xen-sparse/arch/xen/i386/mm/init.c linux-2.6-xen-sparse/arch/xen/i386/mm/pgtable.c linux-2.6-xen-sparse/arch/xen/i386/pci/irq.c linux-2.6-xen-sparse/arch/xen/x86_64/Kconfig linux-2.6-xen-sparse/arch/xen/x86_64/ia32/Makefile linux-2.6-xen-sparse/arch/xen/x86_64/ia32/ia32entry.S linux-2.6-xen-sparse/arch/xen/x86_64/ia32/syscall32.c linux-2.6-xen-sparse/arch/xen/x86_64/kernel/Makefile linux-2.6-xen-sparse/arch/xen/x86_64/kernel/asm-offsets.c linux-2.6-xen-sparse/arch/xen/x86_64/kernel/e820.c linux-2.6-xen-sparse/arch/xen/x86_64/kernel/early_printk.c linux-2.6-xen-sparse/arch/xen/x86_64/kernel/entry.S linux-2.6-xen-sparse/arch/xen/x86_64/kernel/head.S linux-2.6-xen-sparse/arch/xen/x86_64/kernel/head64.c linux-2.6-xen-sparse/arch/xen/x86_64/kernel/io_apic.c linux-2.6-xen-sparse/arch/xen/x86_64/kernel/ioport.c linux-2.6-xen-sparse/arch/xen/x86_64/kernel/mpparse.c linux-2.6-xen-sparse/arch/xen/x86_64/kernel/process.c linux-2.6-xen-sparse/arch/xen/x86_64/kernel/setup.c linux-2.6-xen-sparse/arch/xen/x86_64/kernel/setup64.c linux-2.6-xen-sparse/arch/xen/x86_64/kernel/signal.c linux-2.6-xen-sparse/arch/xen/x86_64/kernel/smp.c linux-2.6-xen-sparse/arch/xen/x86_64/kernel/smpboot.c linux-2.6-xen-sparse/arch/xen/x86_64/kernel/traps.c linux-2.6-xen-sparse/arch/xen/x86_64/kernel/vsyscall.c linux-2.6-xen-sparse/arch/xen/x86_64/kernel/x8664_ksyms.c linux-2.6-xen-sparse/arch/xen/x86_64/mm/fault.c linux-2.6-xen-sparse/arch/xen/x86_64/mm/init.c linux-2.6-xen-sparse/drivers/Makefile linux-2.6-xen-sparse/drivers/char/mem.c linux-2.6-xen-sparse/drivers/char/tty_io.c linux-2.6-xen-sparse/include/asm-generic/pgtable.h linux-2.6-xen-sparse/include/asm-xen/asm-i386/desc.h linux-2.6-xen-sparse/include/asm-xen/asm-i386/dma-mapping.h linux-2.6-xen-sparse/include/asm-xen/asm-i386/highmem.h linux-2.6-xen-sparse/include/asm-xen/asm-i386/io.h linux-2.6-xen-sparse/include/asm-xen/asm-i386/mmu_context.h linux-2.6-xen-sparse/include/asm-xen/asm-i386/pgalloc.h linux-2.6-xen-sparse/include/asm-xen/asm-i386/pgtable-2level.h linux-2.6-xen-sparse/include/asm-xen/asm-i386/pgtable.h linux-2.6-xen-sparse/include/asm-xen/asm-i386/processor.h linux-2.6-xen-sparse/include/asm-xen/asm-i386/segment.h linux-2.6-xen-sparse/include/asm-xen/asm-i386/setup.h linux-2.6-xen-sparse/include/asm-xen/asm-i386/system.h linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/bootsetup.h linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/io.h linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/page.h linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/pgalloc.h linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/pgtable.h linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/processor.h linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/ptrace.h linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/segment.h linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/smp.h linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/system.h linux-2.6-xen-sparse/include/linux/gfp.h linux-2.6-xen-sparse/include/linux/mm.h linux-2.6-xen-sparse/include/linux/skbuff.h linux-2.6-xen-sparse/mm/highmem.c linux-2.6-xen-sparse/mm/memory.c linux-2.6-xen-sparse/mm/mmap.c linux-2.6-xen-sparse/mm/page_alloc.c linux-2.6-xen-sparse/net/core/dev.c linux-2.6-xen-sparse/net/core/skbuff.c patches/linux-2.6.12/i386-cpu-hotplug-updated-for-mm.patch patches/linux-2.6.12/net-csum.patch patches/linux-2.6.12/rcu-nohz.patch patches/linux-2.6.12/smp-alts.patch patches/linux-2.6.12/x86_64-linux.patch
line diff
     1.1 --- a/buildconfigs/mk.linux-2.6-xen0	Mon Jul 11 09:29:56 2005 -0500
     1.2 +++ b/buildconfigs/mk.linux-2.6-xen0	Mon Jul 11 09:35:19 2005 -0500
     1.3 @@ -2,7 +2,7 @@
     1.4  OS           = linux
     1.5  
     1.6  LINUX_SERIES = 2.6
     1.7 -LINUX_VER    = 2.6.11
     1.8 +LINUX_VER    = 2.6.12
     1.9  
    1.10  EXTRAVERSION = xen0
    1.11  
     2.1 --- a/buildconfigs/mk.linux-2.6-xenU	Mon Jul 11 09:29:56 2005 -0500
     2.2 +++ b/buildconfigs/mk.linux-2.6-xenU	Mon Jul 11 09:35:19 2005 -0500
     2.3 @@ -2,7 +2,7 @@
     2.4  OS           = linux
     2.5  
     2.6  LINUX_SERIES = 2.6
     2.7 -LINUX_VER    = 2.6.11
     2.8 +LINUX_VER    = 2.6.12
     2.9  
    2.10  EXTRAVERSION = xenU
    2.11  
     3.1 --- a/linux-2.6-xen-sparse/arch/xen/Kconfig	Mon Jul 11 09:29:56 2005 -0500
     3.2 +++ b/linux-2.6-xen-sparse/arch/xen/Kconfig	Mon Jul 11 09:35:19 2005 -0500
     3.3 @@ -194,3 +194,5 @@ source "security/Kconfig"
     3.4  source "crypto/Kconfig"
     3.5  
     3.6  source "lib/Kconfig"
     3.7 +
     3.8 +source "arch/xen/Kconfig.debug"
     4.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     4.2 +++ b/linux-2.6-xen-sparse/arch/xen/Kconfig.debug	Mon Jul 11 09:35:19 2005 -0500
     4.3 @@ -0,0 +1,129 @@
     4.4 +menu "Kernel hacking"
     4.5 +
     4.6 +source "lib/Kconfig.debug"
     4.7 +
     4.8 +# X86
     4.9 +config EARLY_PRINTK
    4.10 +	bool "Early printk" if EMBEDDED && DEBUG_KERNEL
    4.11 +	default y
    4.12 +	depends on X86
    4.13 +	help
    4.14 +	  Write kernel log output directly into the VGA buffer or to a serial
    4.15 +	  port.
    4.16 +
    4.17 +	  This is useful for kernel debugging when your machine crashes very
    4.18 +	  early before the console code is initialized. For normal operation
    4.19 +	  it is not recommended because it looks ugly and doesn't cooperate
    4.20 +	  with klogd/syslogd or the X server. You should normally N here,
    4.21 +	  unless you want to debug such a crash.
    4.22 +
    4.23 +config DEBUG_STACKOVERFLOW
    4.24 +	bool "Check for stack overflows"
    4.25 +	depends on DEBUG_KERNEL && X86
    4.26 +
    4.27 +config KPROBES
    4.28 +	bool "Kprobes"
    4.29 +	depends on DEBUG_KERNEL && X86
    4.30 +	help
    4.31 +	  Kprobes allows you to trap at almost any kernel address and
    4.32 +	  execute a callback function.  register_kprobe() establishes
    4.33 +	  a probepoint and specifies the callback.  Kprobes is useful
    4.34 +	  for kernel debugging, non-intrusive instrumentation and testing.
    4.35 +	  If in doubt, say "N".
    4.36 +
    4.37 +config DEBUG_STACK_USAGE
    4.38 +	bool "Stack utilization instrumentation"
    4.39 +	depends on DEBUG_KERNEL && X86
    4.40 +	help
    4.41 +	  Enables the display of the minimum amount of free stack which each
    4.42 +	  task has ever had available in the sysrq-T and sysrq-P debug output.
    4.43 +
    4.44 +	  This option will slow down process creation somewhat.
    4.45 +
    4.46 +comment "Page alloc debug is incompatible with Software Suspend on i386"
    4.47 +	depends on DEBUG_KERNEL && SOFTWARE_SUSPEND && X86
    4.48 +
    4.49 +config DEBUG_PAGEALLOC
    4.50 +	bool "Page alloc debugging"
    4.51 +	depends on DEBUG_KERNEL && !SOFTWARE_SUSPEND && X86
    4.52 +	help
    4.53 +	  Unmap pages from the kernel linear mapping after free_pages().
    4.54 +	  This results in a large slowdown, but helps to find certain types
    4.55 +	  of memory corruptions.
    4.56 +
    4.57 +config 4KSTACKS
    4.58 +	bool "Use 4Kb for kernel stacks instead of 8Kb"
    4.59 +	depends on DEBUG_KERNEL && X86
    4.60 +	help
    4.61 +	  If you say Y here the kernel will use a 4Kb stacksize for the
    4.62 +	  kernel stack attached to each process/thread. This facilitates
    4.63 +	  running more threads on a system and also reduces the pressure
    4.64 +	  on the VM subsystem for higher order allocations. This option
    4.65 +	  will also use IRQ stacks to compensate for the reduced stackspace.
    4.66 +
    4.67 +config X86_FIND_SMP_CONFIG
    4.68 +	bool
    4.69 +	depends on X86_LOCAL_APIC || X86_VOYAGER && X86
    4.70 +	default y
    4.71 +
    4.72 +config X86_MPPARSE
    4.73 +	bool
    4.74 +	depends on X86_LOCAL_APIC && !X86_VISWS && X86
    4.75 +	default y
    4.76 +
    4.77 +# X86_64
    4.78 +
    4.79 +# !SMP for now because the context switch early causes GPF in segment reloading
    4.80 +# and the GS base checking does the wrong thing then, causing a hang.
    4.81 +config CHECKING
    4.82 +	bool "Additional run-time checks"
    4.83 +	depends on DEBUG_KERNEL && !SMP && X86_64
    4.84 +	help
    4.85 +	  Enables some internal consistency checks for kernel debugging.
    4.86 +	  You should normally say N.
    4.87 +
    4.88 +config INIT_DEBUG
    4.89 +	bool "Debug __init statements"
    4.90 +	depends on DEBUG_KERNEL && X86_64
    4.91 +	help
    4.92 +	  Fill __init and __initdata at the end of boot. This helps debugging
    4.93 +	  illegal uses of __init and __initdata after initialization.
    4.94 +
    4.95 +config IOMMU_DEBUG
    4.96 +       depends on GART_IOMMU && DEBUG_KERNEL && X86_64
    4.97 +       bool "Enable IOMMU debugging"
    4.98 +       help
    4.99 +         Force the IOMMU to on even when you have less than 4GB of
   4.100 +	 memory and add debugging code. On overflow always panic. And
   4.101 +	 allow to enable IOMMU leak tracing. Can be disabled at boot
   4.102 +	 time with iommu=noforce. This will also enable scatter gather
   4.103 +	 list merging.  Currently not recommended for production
   4.104 +	 code. When you use it make sure you have a big enough
   4.105 +	 IOMMU/AGP aperture.  Most of the options enabled by this can
   4.106 +	 be set more finegrained using the iommu= command line
   4.107 +	 options. See Documentation/x86_64/boot-options.txt for more
   4.108 +	 details.
   4.109 +
   4.110 +config IOMMU_LEAK
   4.111 +       bool "IOMMU leak tracing"
   4.112 +       depends on DEBUG_KERNEL && X86_64
   4.113 +       depends on IOMMU_DEBUG
   4.114 +       help
   4.115 +         Add a simple leak tracer to the IOMMU code. This is useful when you
   4.116 +	 are debugging a buggy device driver that leaks IOMMU mappings.
   4.117 +
   4.118 +#config X86_REMOTE_DEBUG
   4.119 +#       bool "kgdb debugging stub"
   4.120 +
   4.121 +# X86 & X86_64
   4.122 +config KPROBES
   4.123 +	bool "Kprobes"
   4.124 +	depends on DEBUG_KERNEL
   4.125 +	help
   4.126 +	  Kprobes allows you to trap at almost any kernel address and
   4.127 +	  execute a callback function.  register_kprobe() establishes
   4.128 +	  a probepoint and specifies the callback.  Kprobes is useful
   4.129 +	  for kernel debugging, non-intrusive instrumentation and testing.
   4.130 +	  If in doubt, say "N".
   4.131 +
   4.132 +endmenu
     5.1 --- a/linux-2.6-xen-sparse/arch/xen/configs/xen0_defconfig_x86_32	Mon Jul 11 09:29:56 2005 -0500
     5.2 +++ b/linux-2.6-xen-sparse/arch/xen/configs/xen0_defconfig_x86_32	Mon Jul 11 09:35:19 2005 -0500
     5.3 @@ -1,7 +1,7 @@
     5.4  #
     5.5  # Automatically generated make config: don't edit
     5.6 -# Linux kernel version: 2.6.11.12-xen0
     5.7 -# Wed Jul  6 18:26:29 2005
     5.8 +# Linux kernel version: 2.6.12-xen0
     5.9 +# Sat Jul  9 09:19:47 2005
    5.10  #
    5.11  CONFIG_XEN=y
    5.12  CONFIG_ARCH_XEN=y
    5.13 @@ -34,6 +34,7 @@ CONFIG_EXPERIMENTAL=y
    5.14  CONFIG_BROKEN=y
    5.15  CONFIG_BROKEN_ON_SMP=y
    5.16  CONFIG_LOCK_KERNEL=y
    5.17 +CONFIG_INIT_ENV_ARG_LIMIT=32
    5.18  
    5.19  #
    5.20  # General setup
    5.21 @@ -45,7 +46,6 @@ CONFIG_SYSVIPC=y
    5.22  # CONFIG_BSD_PROCESS_ACCT is not set
    5.23  CONFIG_SYSCTL=y
    5.24  # CONFIG_AUDIT is not set
    5.25 -CONFIG_LOG_BUF_SHIFT=14
    5.26  CONFIG_HOTPLUG=y
    5.27  CONFIG_KOBJECT_UEVENT=y
    5.28  # CONFIG_IKCONFIG is not set
    5.29 @@ -53,15 +53,18 @@ CONFIG_KOBJECT_UEVENT=y
    5.30  CONFIG_KALLSYMS=y
    5.31  # CONFIG_KALLSYMS_ALL is not set
    5.32  # CONFIG_KALLSYMS_EXTRA_PASS is not set
    5.33 +CONFIG_PRINTK=y
    5.34 +CONFIG_BUG=y
    5.35 +CONFIG_BASE_FULL=y
    5.36  CONFIG_FUTEX=y
    5.37  CONFIG_EPOLL=y
    5.38 -# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
    5.39  CONFIG_SHMEM=y
    5.40  CONFIG_CC_ALIGN_FUNCTIONS=0
    5.41  CONFIG_CC_ALIGN_LABELS=0
    5.42  CONFIG_CC_ALIGN_LOOPS=0
    5.43  CONFIG_CC_ALIGN_JUMPS=0
    5.44  # CONFIG_TINY_SHMEM is not set
    5.45 +CONFIG_BASE_SMALL=0
    5.46  
    5.47  #
    5.48  # Loadable module support
    5.49 @@ -101,6 +104,7 @@ CONFIG_MPENTIUM4=y
    5.50  # CONFIG_MWINCHIPC6 is not set
    5.51  # CONFIG_MWINCHIP2 is not set
    5.52  # CONFIG_MWINCHIP3D is not set
    5.53 +# CONFIG_MGEODEGX1 is not set
    5.54  # CONFIG_MCYRIXIII is not set
    5.55  # CONFIG_MVIAC3_2 is not set
    5.56  # CONFIG_X86_GENERIC is not set
    5.57 @@ -121,6 +125,7 @@ CONFIG_X86_USE_PPRO_CHECKSUM=y
    5.58  # CONFIG_SMP is not set
    5.59  CONFIG_PREEMPT=y
    5.60  CONFIG_PREEMPT_BKL=y
    5.61 +# CONFIG_X86_REBOOTFIXUPS is not set
    5.62  CONFIG_MICROCODE=y
    5.63  CONFIG_X86_CPUID=y
    5.64  
    5.65 @@ -154,6 +159,8 @@ CONFIG_PCI_MMCONFIG=y
    5.66  # CONFIG_PCI_MSI is not set
    5.67  CONFIG_PCI_LEGACY_PROC=y
    5.68  # CONFIG_PCI_NAMES is not set
    5.69 +# CONFIG_PCI_DEBUG is not set
    5.70 +CONFIG_ISA_DMA_API=y
    5.71  CONFIG_ISA=y
    5.72  # CONFIG_EISA is not set
    5.73  # CONFIG_MCA is not set
    5.74 @@ -165,11 +172,6 @@ CONFIG_ISA=y
    5.75  # CONFIG_PCCARD is not set
    5.76  
    5.77  #
    5.78 -# PC-card bridges
    5.79 -#
    5.80 -CONFIG_PCMCIA_PROBE=y
    5.81 -
    5.82 -#
    5.83  # PCI Hotplug Support
    5.84  #
    5.85  # CONFIG_HOTPLUG_PCI is not set
    5.86 @@ -177,8 +179,10 @@ CONFIG_PCMCIA_PROBE=y
    5.87  #
    5.88  # Kernel hacking
    5.89  #
    5.90 +# CONFIG_PRINTK_TIME is not set
    5.91  CONFIG_DEBUG_KERNEL=y
    5.92  CONFIG_MAGIC_SYSRQ=y
    5.93 +CONFIG_LOG_BUF_SHIFT=14
    5.94  # CONFIG_SCHEDSTATS is not set
    5.95  # CONFIG_DEBUG_SLAB is not set
    5.96  # CONFIG_DEBUG_PREEMPT is not set
    5.97 @@ -202,6 +206,7 @@ CONFIG_GENERIC_HARDIRQS=y
    5.98  CONFIG_GENERIC_IRQ_PROBE=y
    5.99  CONFIG_X86_BIOS_REBOOT=y
   5.100  CONFIG_PC=y
   5.101 +CONFIG_SECCOMP=y
   5.102  
   5.103  #
   5.104  # Executable file formats
   5.105 @@ -358,7 +363,7 @@ CONFIG_BLK_DEV_SD=y
   5.106  #
   5.107  # SCSI Transport Attributes
   5.108  #
   5.109 -# CONFIG_SCSI_SPI_ATTRS is not set
   5.110 +CONFIG_SCSI_SPI_ATTRS=y
   5.111  # CONFIG_SCSI_FC_ATTRS is not set
   5.112  # CONFIG_SCSI_ISCSI_ATTRS is not set
   5.113  
   5.114 @@ -435,6 +440,7 @@ CONFIG_SCSI_QLA2XXX=y
   5.115  # CONFIG_SCSI_QLA2300 is not set
   5.116  # CONFIG_SCSI_QLA2322 is not set
   5.117  # CONFIG_SCSI_QLA6312 is not set
   5.118 +# CONFIG_SCSI_LPFC is not set
   5.119  # CONFIG_SCSI_SEAGATE is not set
   5.120  # CONFIG_SCSI_SYM53C416 is not set
   5.121  # CONFIG_SCSI_DC395x is not set
   5.122 @@ -468,6 +474,7 @@ CONFIG_BLK_DEV_DM=y
   5.123  CONFIG_DM_SNAPSHOT=y
   5.124  CONFIG_DM_MIRROR=y
   5.125  # CONFIG_DM_ZERO is not set
   5.126 +# CONFIG_DM_MULTIPATH is not set
   5.127  
   5.128  #
   5.129  # Fusion MPT device support
   5.130 @@ -496,7 +503,6 @@ CONFIG_NET=y
   5.131  #
   5.132  CONFIG_PACKET=y
   5.133  # CONFIG_PACKET_MMAP is not set
   5.134 -# CONFIG_NETLINK_DEV is not set
   5.135  CONFIG_UNIX=y
   5.136  # CONFIG_NET_KEY is not set
   5.137  CONFIG_INET=y
   5.138 @@ -676,7 +682,6 @@ CONFIG_PCNET32=y
   5.139  # CONFIG_DGRS is not set
   5.140  # CONFIG_EEPRO100 is not set
   5.141  CONFIG_E100=y
   5.142 -# CONFIG_E100_NAPI is not set
   5.143  # CONFIG_FEALNX is not set
   5.144  # CONFIG_NATSEMI is not set
   5.145  CONFIG_NE2K_PCI=y
   5.146 @@ -709,6 +714,7 @@ CONFIG_E1000=y
   5.147  # CONFIG_SK98LIN is not set
   5.148  # CONFIG_VIA_VELOCITY is not set
   5.149  CONFIG_TIGON3=y
   5.150 +# CONFIG_BNX2 is not set
   5.151  
   5.152  #
   5.153  # Ethernet (10000 Mbit)
   5.154 @@ -766,19 +772,6 @@ CONFIG_INPUT_MOUSEDEV_SCREEN_Y=768
   5.155  # CONFIG_INPUT_EVBUG is not set
   5.156  
   5.157  #
   5.158 -# Input I/O drivers
   5.159 -#
   5.160 -# CONFIG_GAMEPORT is not set
   5.161 -CONFIG_SOUND_GAMEPORT=y
   5.162 -CONFIG_SERIO=y
   5.163 -CONFIG_SERIO_I8042=y
   5.164 -CONFIG_SERIO_SERPORT=y
   5.165 -# CONFIG_SERIO_CT82C710 is not set
   5.166 -# CONFIG_SERIO_PCIPS2 is not set
   5.167 -CONFIG_SERIO_LIBPS2=y
   5.168 -# CONFIG_SERIO_RAW is not set
   5.169 -
   5.170 -#
   5.171  # Input Device Drivers
   5.172  #
   5.173  CONFIG_INPUT_KEYBOARD=y
   5.174 @@ -799,6 +792,18 @@ CONFIG_MOUSE_PS2=y
   5.175  # CONFIG_INPUT_MISC is not set
   5.176  
   5.177  #
   5.178 +# Hardware I/O ports
   5.179 +#
   5.180 +CONFIG_SERIO=y
   5.181 +CONFIG_SERIO_I8042=y
   5.182 +CONFIG_SERIO_SERPORT=y
   5.183 +# CONFIG_SERIO_CT82C710 is not set
   5.184 +# CONFIG_SERIO_PCIPS2 is not set
   5.185 +CONFIG_SERIO_LIBPS2=y
   5.186 +# CONFIG_SERIO_RAW is not set
   5.187 +# CONFIG_GAMEPORT is not set
   5.188 +
   5.189 +#
   5.190  # Character devices
   5.191  #
   5.192  CONFIG_VT=y
   5.193 @@ -814,6 +819,7 @@ CONFIG_HW_CONSOLE=y
   5.194  #
   5.195  # Non-8250 serial port support
   5.196  #
   5.197 +# CONFIG_SERIAL_JSM is not set
   5.198  CONFIG_UNIX98_PTYS=y
   5.199  CONFIG_LEGACY_PTYS=y
   5.200  CONFIG_LEGACY_PTY_COUNT=256
   5.201 @@ -846,7 +852,6 @@ CONFIG_AGP_ATI=m
   5.202  CONFIG_AGP_AMD=m
   5.203  CONFIG_AGP_AMD64=m
   5.204  CONFIG_AGP_INTEL=m
   5.205 -CONFIG_AGP_INTEL_MCH=m
   5.206  CONFIG_AGP_NVIDIA=m
   5.207  CONFIG_AGP_SIS=m
   5.208  CONFIG_AGP_SWORKS=m
   5.209 @@ -868,6 +873,11 @@ CONFIG_DRM_SIS=m
   5.210  # CONFIG_HANGCHECK_TIMER is not set
   5.211  
   5.212  #
   5.213 +# TPM devices
   5.214 +#
   5.215 +# CONFIG_TCG_TPM is not set
   5.216 +
   5.217 +#
   5.218  # I2C support
   5.219  #
   5.220  # CONFIG_I2C is not set
   5.221 @@ -913,6 +923,8 @@ CONFIG_DUMMY_CONSOLE=y
   5.222  #
   5.223  # USB support
   5.224  #
   5.225 +CONFIG_USB_ARCH_HAS_HCD=y
   5.226 +CONFIG_USB_ARCH_HAS_OHCI=y
   5.227  CONFIG_USB=y
   5.228  # CONFIG_USB_DEBUG is not set
   5.229  
   5.230 @@ -923,14 +935,14 @@ CONFIG_USB=y
   5.231  # CONFIG_USB_BANDWIDTH is not set
   5.232  # CONFIG_USB_DYNAMIC_MINORS is not set
   5.233  # CONFIG_USB_OTG is not set
   5.234 -CONFIG_USB_ARCH_HAS_HCD=y
   5.235 -CONFIG_USB_ARCH_HAS_OHCI=y
   5.236  
   5.237  #
   5.238  # USB Host Controller Drivers
   5.239  #
   5.240  # CONFIG_USB_EHCI_HCD is not set
   5.241  CONFIG_USB_OHCI_HCD=y
   5.242 +# CONFIG_USB_OHCI_BIG_ENDIAN is not set
   5.243 +CONFIG_USB_OHCI_LITTLE_ENDIAN=y
   5.244  CONFIG_USB_UHCI_HCD=y
   5.245  # CONFIG_USB_SL811_HCD is not set
   5.246  
   5.247 @@ -967,7 +979,6 @@ CONFIG_USB_HIDINPUT=y
   5.248  #
   5.249  # CONFIG_USB_MDC800 is not set
   5.250  # CONFIG_USB_MICROTEK is not set
   5.251 -# CONFIG_USB_HPUSBSCSI is not set
   5.252  
   5.253  #
   5.254  # USB Multimedia devices
   5.255 @@ -986,6 +997,7 @@ CONFIG_USB_HIDINPUT=y
   5.256  # CONFIG_USB_PEGASUS is not set
   5.257  # CONFIG_USB_RTL8150 is not set
   5.258  # CONFIG_USB_USBNET is not set
   5.259 +CONFIG_USB_MON=y
   5.260  
   5.261  #
   5.262  # USB port drivers
   5.263 @@ -1232,6 +1244,7 @@ CONFIG_CRYPTO_SHA1=m
   5.264  # CONFIG_CRYPTO_SHA256 is not set
   5.265  # CONFIG_CRYPTO_SHA512 is not set
   5.266  # CONFIG_CRYPTO_WP512 is not set
   5.267 +# CONFIG_CRYPTO_TGR192 is not set
   5.268  CONFIG_CRYPTO_DES=m
   5.269  # CONFIG_CRYPTO_BLOWFISH is not set
   5.270  # CONFIG_CRYPTO_TWOFISH is not set
     6.1 --- a/linux-2.6-xen-sparse/arch/xen/configs/xen0_defconfig_x86_64	Mon Jul 11 09:29:56 2005 -0500
     6.2 +++ b/linux-2.6-xen-sparse/arch/xen/configs/xen0_defconfig_x86_64	Mon Jul 11 09:35:19 2005 -0500
     6.3 @@ -1,7 +1,7 @@
     6.4  #
     6.5  # Automatically generated make config: don't edit
     6.6 -# Linux kernel version: 2.6.11.1-xen0
     6.7 -# Tue May 10 11:07:02 2005
     6.8 +# Linux kernel version: 2.6.12-xen0
     6.9 +# Wed Jun 29 10:01:20 2005
    6.10  #
    6.11  CONFIG_XEN=y
    6.12  CONFIG_ARCH_XEN=y
    6.13 @@ -33,6 +33,7 @@ CONFIG_EXPERIMENTAL=y
    6.14  # CONFIG_CLEAN_COMPILE is not set
    6.15  CONFIG_BROKEN=y
    6.16  CONFIG_BROKEN_ON_SMP=y
    6.17 +CONFIG_INIT_ENV_ARG_LIMIT=32
    6.18  
    6.19  #
    6.20  # General setup
    6.21 @@ -44,22 +45,24 @@ CONFIG_SYSVIPC=y
    6.22  # CONFIG_BSD_PROCESS_ACCT is not set
    6.23  CONFIG_SYSCTL=y
    6.24  # CONFIG_AUDIT is not set
    6.25 -CONFIG_LOG_BUF_SHIFT=14
    6.26  # CONFIG_HOTPLUG is not set
    6.27  CONFIG_KOBJECT_UEVENT=y
    6.28  # CONFIG_IKCONFIG is not set
    6.29  # CONFIG_EMBEDDED is not set
    6.30  CONFIG_KALLSYMS=y
    6.31  # CONFIG_KALLSYMS_EXTRA_PASS is not set
    6.32 +CONFIG_PRINTK=y
    6.33 +CONFIG_BUG=y
    6.34 +CONFIG_BASE_FULL=y
    6.35  CONFIG_FUTEX=y
    6.36  CONFIG_EPOLL=y
    6.37 -# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
    6.38  CONFIG_SHMEM=y
    6.39  CONFIG_CC_ALIGN_FUNCTIONS=0
    6.40  CONFIG_CC_ALIGN_LABELS=0
    6.41  CONFIG_CC_ALIGN_LOOPS=0
    6.42  CONFIG_CC_ALIGN_JUMPS=0
    6.43  # CONFIG_TINY_SHMEM is not set
    6.44 +CONFIG_BASE_SMALL=0
    6.45  
    6.46  #
    6.47  # Loadable module support
    6.48 @@ -74,6 +77,7 @@ CONFIG_KMOD=y
    6.49  CONFIG_XENARCH="x86_64"
    6.50  CONFIG_X86=y
    6.51  CONFIG_MMU=y
    6.52 +CONFIG_UID16=y
    6.53  CONFIG_GENERIC_ISA_DMA=y
    6.54  CONFIG_GENERIC_IOMAP=y
    6.55  CONFIG_X86_CMPXCHG=y
    6.56 @@ -93,15 +97,17 @@ CONFIG_X86_IO_APIC=y
    6.57  CONFIG_PCI=y
    6.58  CONFIG_PCI_DIRECT=y
    6.59  # CONFIG_PCI_MMCONFIG is not set
    6.60 -CONFIG_EARLY_PRINTK=y
    6.61 +CONFIG_ISA_DMA_API=y
    6.62  CONFIG_GENERIC_HARDIRQS=y
    6.63  CONFIG_GENERIC_IRQ_PROBE=y
    6.64 +CONFIG_SECCOMP=y
    6.65  
    6.66  #
    6.67  # X86_64 processor configuration
    6.68  #
    6.69  CONFIG_X86_64=y
    6.70  CONFIG_64BIT=y
    6.71 +CONFIG_EARLY_PRINTK=y
    6.72  
    6.73  #
    6.74  # Processor type and features
    6.75 @@ -135,6 +141,9 @@ CONFIG_DUMMY_IOMMU=y
    6.76  #
    6.77  CONFIG_IA32_EMULATION=y
    6.78  # CONFIG_IA32_AOUT is not set
    6.79 +CONFIG_COMPAT=y
    6.80 +CONFIG_SYSVIPC_COMPAT=y
    6.81 +
    6.82  #
    6.83  # Executable file formats
    6.84  #
    6.85 @@ -285,7 +294,7 @@ CONFIG_BLK_DEV_SD=y
    6.86  #
    6.87  # SCSI Transport Attributes
    6.88  #
    6.89 -# CONFIG_SCSI_SPI_ATTRS is not set
    6.90 +CONFIG_SCSI_SPI_ATTRS=y
    6.91  # CONFIG_SCSI_FC_ATTRS is not set
    6.92  # CONFIG_SCSI_ISCSI_ATTRS is not set
    6.93  
    6.94 @@ -352,6 +361,7 @@ CONFIG_SCSI_QLA2XXX=y
    6.95  # CONFIG_SCSI_QLA2300 is not set
    6.96  # CONFIG_SCSI_QLA2322 is not set
    6.97  # CONFIG_SCSI_QLA6312 is not set
    6.98 +# CONFIG_SCSI_LPFC is not set
    6.99  # CONFIG_SCSI_DC395x is not set
   6.100  # CONFIG_SCSI_DC390T is not set
   6.101  # CONFIG_SCSI_DEBUG is not set
   6.102 @@ -388,7 +398,6 @@ CONFIG_NET=y
   6.103  #
   6.104  CONFIG_PACKET=y
   6.105  # CONFIG_PACKET_MMAP is not set
   6.106 -# CONFIG_NETLINK_DEV is not set
   6.107  CONFIG_UNIX=y
   6.108  # CONFIG_NET_KEY is not set
   6.109  CONFIG_INET=y
   6.110 @@ -553,7 +562,6 @@ CONFIG_PCNET32=y
   6.111  # CONFIG_DGRS is not set
   6.112  # CONFIG_EEPRO100 is not set
   6.113  CONFIG_E100=y
   6.114 -# CONFIG_E100_NAPI is not set
   6.115  # CONFIG_FEALNX is not set
   6.116  # CONFIG_NATSEMI is not set
   6.117  CONFIG_NE2K_PCI=y
   6.118 @@ -584,6 +592,7 @@ CONFIG_E1000=y
   6.119  # CONFIG_SK98LIN is not set
   6.120  # CONFIG_VIA_VELOCITY is not set
   6.121  CONFIG_TIGON3=y
   6.122 +# CONFIG_BNX2 is not set
   6.123  
   6.124  #
   6.125  # Ethernet (10000 Mbit)
   6.126 @@ -641,19 +650,6 @@ CONFIG_INPUT_MOUSEDEV_SCREEN_Y=768
   6.127  # CONFIG_INPUT_EVBUG is not set
   6.128  
   6.129  #
   6.130 -# Input I/O drivers
   6.131 -#
   6.132 -# CONFIG_GAMEPORT is not set
   6.133 -CONFIG_SOUND_GAMEPORT=y
   6.134 -CONFIG_SERIO=y
   6.135 -CONFIG_SERIO_I8042=y
   6.136 -CONFIG_SERIO_SERPORT=y
   6.137 -# CONFIG_SERIO_CT82C710 is not set
   6.138 -# CONFIG_SERIO_PCIPS2 is not set
   6.139 -CONFIG_SERIO_LIBPS2=y
   6.140 -# CONFIG_SERIO_RAW is not set
   6.141 -
   6.142 -#
   6.143  # Input Device Drivers
   6.144  #
   6.145  CONFIG_INPUT_KEYBOARD=y
   6.146 @@ -671,6 +667,18 @@ CONFIG_MOUSE_PS2=y
   6.147  # CONFIG_INPUT_MISC is not set
   6.148  
   6.149  #
   6.150 +# Hardware I/O ports
   6.151 +#
   6.152 +CONFIG_SERIO=y
   6.153 +CONFIG_SERIO_I8042=y
   6.154 +CONFIG_SERIO_SERPORT=y
   6.155 +# CONFIG_SERIO_CT82C710 is not set
   6.156 +# CONFIG_SERIO_PCIPS2 is not set
   6.157 +CONFIG_SERIO_LIBPS2=y
   6.158 +# CONFIG_SERIO_RAW is not set
   6.159 +# CONFIG_GAMEPORT is not set
   6.160 +
   6.161 +#
   6.162  # Character devices
   6.163  #
   6.164  CONFIG_VT=y
   6.165 @@ -686,6 +694,7 @@ CONFIG_HW_CONSOLE=y
   6.166  #
   6.167  # Non-8250 serial port support
   6.168  #
   6.169 +# CONFIG_SERIAL_JSM is not set
   6.170  CONFIG_UNIX98_PTYS=y
   6.171  CONFIG_LEGACY_PTYS=y
   6.172  CONFIG_LEGACY_PTY_COUNT=256
   6.173 @@ -702,7 +711,6 @@ CONFIG_LEGACY_PTY_COUNT=256
   6.174  # CONFIG_HW_RANDOM is not set
   6.175  # CONFIG_NVRAM is not set
   6.176  CONFIG_RTC=y
   6.177 -# CONFIG_GEN_RTC is not set
   6.178  # CONFIG_DTLK is not set
   6.179  # CONFIG_R3964 is not set
   6.180  # CONFIG_APPLICOM is not set
   6.181 @@ -713,7 +721,7 @@ CONFIG_RTC=y
   6.182  # CONFIG_FTAPE is not set
   6.183  CONFIG_AGP=m
   6.184  CONFIG_AGP_AMD64=m
   6.185 -CONFIG_AGP_INTEL_MCH=m
   6.186 +# CONFIG_AGP_INTEL is not set
   6.187  CONFIG_DRM=m
   6.188  CONFIG_DRM_TDFX=m
   6.189  # CONFIG_DRM_GAMMA is not set
   6.190 @@ -727,6 +735,11 @@ CONFIG_DRM_SIS=m
   6.191  # CONFIG_HANGCHECK_TIMER is not set
   6.192  
   6.193  #
   6.194 +# TPM devices
   6.195 +#
   6.196 +# CONFIG_TCG_TPM is not set
   6.197 +
   6.198 +#
   6.199  # I2C support
   6.200  #
   6.201  # CONFIG_I2C is not set
   6.202 @@ -771,13 +784,9 @@ CONFIG_DUMMY_CONSOLE=y
   6.203  #
   6.204  # USB support
   6.205  #
   6.206 -# CONFIG_USB is not set
   6.207  CONFIG_USB_ARCH_HAS_HCD=y
   6.208  CONFIG_USB_ARCH_HAS_OHCI=y
   6.209 -
   6.210 -#
   6.211 -# NOTE: USB_STORAGE enables SCSI, and 'SCSI disk support' may also be needed; see USB_STORAGE Help for more information
   6.212 -#
   6.213 +# CONFIG_USB is not set
   6.214  
   6.215  #
   6.216  # USB Gadget Support
   6.217 @@ -994,6 +1003,7 @@ CONFIG_CRYPTO_SHA1=m
   6.218  # CONFIG_CRYPTO_SHA256 is not set
   6.219  # CONFIG_CRYPTO_SHA512 is not set
   6.220  # CONFIG_CRYPTO_WP512 is not set
   6.221 +# CONFIG_CRYPTO_TGR192 is not set
   6.222  CONFIG_CRYPTO_DES=m
   6.223  # CONFIG_CRYPTO_BLOWFISH is not set
   6.224  # CONFIG_CRYPTO_TWOFISH is not set
   6.225 @@ -1019,5 +1029,14 @@ CONFIG_CRYPTO_CRC32C=m
   6.226  #
   6.227  # CONFIG_CRC_CCITT is not set
   6.228  CONFIG_CRC32=y
   6.229 -CONFIG_LIBCRC32C=y
   6.230 +CONFIG_LIBCRC32C=m
   6.231  CONFIG_ZLIB_INFLATE=y
   6.232 +
   6.233 +#
   6.234 +# Kernel hacking
   6.235 +#
   6.236 +# CONFIG_PRINTK_TIME is not set
   6.237 +# CONFIG_DEBUG_KERNEL is not set
   6.238 +CONFIG_LOG_BUF_SHIFT=14
   6.239 +CONFIG_X86_FIND_SMP_CONFIG=y
   6.240 +CONFIG_X86_MPPARSE=y
     7.1 --- a/linux-2.6-xen-sparse/arch/xen/configs/xenU_defconfig_x86_32	Mon Jul 11 09:29:56 2005 -0500
     7.2 +++ b/linux-2.6-xen-sparse/arch/xen/configs/xenU_defconfig_x86_32	Mon Jul 11 09:35:19 2005 -0500
     7.3 @@ -1,7 +1,7 @@
     7.4  #
     7.5  # Automatically generated make config: don't edit
     7.6 -# Linux kernel version: 2.6.11.12-xenU
     7.7 -# Wed Jul  6 22:40:19 2005
     7.8 +# Linux kernel version: 2.6.12-xenU
     7.9 +# Sun Jul 10 17:32:04 2005
    7.10  #
    7.11  CONFIG_XEN=y
    7.12  CONFIG_ARCH_XEN=y
    7.13 @@ -29,6 +29,7 @@ CONFIG_HAVE_ARCH_DEV_ALLOC_SKB=y
    7.14  CONFIG_EXPERIMENTAL=y
    7.15  CONFIG_CLEAN_COMPILE=y
    7.16  CONFIG_LOCK_KERNEL=y
    7.17 +CONFIG_INIT_ENV_ARG_LIMIT=32
    7.18  
    7.19  #
    7.20  # General setup
    7.21 @@ -40,23 +41,26 @@ CONFIG_SYSVIPC=y
    7.22  # CONFIG_BSD_PROCESS_ACCT is not set
    7.23  CONFIG_SYSCTL=y
    7.24  # CONFIG_AUDIT is not set
    7.25 -CONFIG_LOG_BUF_SHIFT=14
    7.26  CONFIG_HOTPLUG=y
    7.27  CONFIG_KOBJECT_UEVENT=y
    7.28  # CONFIG_IKCONFIG is not set
    7.29 +# CONFIG_CPUSETS is not set
    7.30  # CONFIG_EMBEDDED is not set
    7.31  CONFIG_KALLSYMS=y
    7.32  # CONFIG_KALLSYMS_ALL is not set
    7.33  # CONFIG_KALLSYMS_EXTRA_PASS is not set
    7.34 +CONFIG_PRINTK=y
    7.35 +CONFIG_BUG=y
    7.36 +CONFIG_BASE_FULL=y
    7.37  CONFIG_FUTEX=y
    7.38  CONFIG_EPOLL=y
    7.39 -# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
    7.40  CONFIG_SHMEM=y
    7.41  CONFIG_CC_ALIGN_FUNCTIONS=0
    7.42  CONFIG_CC_ALIGN_LABELS=0
    7.43  CONFIG_CC_ALIGN_LOOPS=0
    7.44  CONFIG_CC_ALIGN_JUMPS=0
    7.45  # CONFIG_TINY_SHMEM is not set
    7.46 +CONFIG_BASE_SMALL=0
    7.47  
    7.48  #
    7.49  # Loadable module support
    7.50 @@ -97,6 +101,7 @@ CONFIG_MPENTIUM4=y
    7.51  # CONFIG_MWINCHIPC6 is not set
    7.52  # CONFIG_MWINCHIP2 is not set
    7.53  # CONFIG_MWINCHIP3D is not set
    7.54 +# CONFIG_MGEODEGX1 is not set
    7.55  # CONFIG_MCYRIXIII is not set
    7.56  # CONFIG_MVIAC3_2 is not set
    7.57  # CONFIG_X86_GENERIC is not set
    7.58 @@ -120,6 +125,7 @@ CONFIG_NR_CPUS=8
    7.59  # CONFIG_SCHED_SMT is not set
    7.60  CONFIG_PREEMPT=y
    7.61  CONFIG_PREEMPT_BKL=y
    7.62 +# CONFIG_X86_REBOOTFIXUPS is not set
    7.63  CONFIG_X86_CPUID=y
    7.64  
    7.65  #
    7.66 @@ -132,35 +138,14 @@ CONFIG_HIGHMEM=y
    7.67  CONFIG_HAVE_DEC_LOCK=y
    7.68  # CONFIG_REGPARM is not set
    7.69  CONFIG_HOTPLUG_CPU=y
    7.70 -
    7.71 -#
    7.72 -# Kernel hacking
    7.73 -#
    7.74 -CONFIG_DEBUG_KERNEL=y
    7.75 -CONFIG_MAGIC_SYSRQ=y
    7.76 -# CONFIG_SCHEDSTATS is not set
    7.77 -# CONFIG_DEBUG_SLAB is not set
    7.78 -# CONFIG_DEBUG_PREEMPT is not set
    7.79 -# CONFIG_DEBUG_SPINLOCK is not set
    7.80 -# CONFIG_DEBUG_SPINLOCK_SLEEP is not set
    7.81 -# CONFIG_DEBUG_KOBJECT is not set
    7.82 -# CONFIG_DEBUG_HIGHMEM is not set
    7.83 -CONFIG_DEBUG_BUGVERBOSE=y
    7.84 -# CONFIG_DEBUG_INFO is not set
    7.85 -# CONFIG_DEBUG_FS is not set
    7.86 -# CONFIG_FRAME_POINTER is not set
    7.87 -CONFIG_EARLY_PRINTK=y
    7.88 -# CONFIG_DEBUG_STACKOVERFLOW is not set
    7.89 -# CONFIG_KPROBES is not set
    7.90 -# CONFIG_DEBUG_STACK_USAGE is not set
    7.91 -# CONFIG_DEBUG_PAGEALLOC is not set
    7.92 -# CONFIG_4KSTACKS is not set
    7.93  CONFIG_GENERIC_HARDIRQS=y
    7.94  CONFIG_GENERIC_IRQ_PROBE=y
    7.95  CONFIG_X86_SMP=y
    7.96  CONFIG_X86_BIOS_REBOOT=y
    7.97  CONFIG_X86_TRAMPOLINE=y
    7.98  CONFIG_PC=y
    7.99 +CONFIG_SECCOMP=y
   7.100 +CONFIG_EARLY_PRINTK=y
   7.101  
   7.102  #
   7.103  # Executable file formats
   7.104 @@ -256,7 +241,6 @@ CONFIG_NET=y
   7.105  #
   7.106  CONFIG_PACKET=y
   7.107  # CONFIG_PACKET_MMAP is not set
   7.108 -# CONFIG_NETLINK_DEV is not set
   7.109  CONFIG_UNIX=y
   7.110  # CONFIG_NET_KEY is not set
   7.111  CONFIG_INET=y
   7.112 @@ -523,6 +507,7 @@ CONFIG_CRYPTO_MD5=m
   7.113  # CONFIG_CRYPTO_SHA256 is not set
   7.114  # CONFIG_CRYPTO_SHA512 is not set
   7.115  # CONFIG_CRYPTO_WP512 is not set
   7.116 +# CONFIG_CRYPTO_TGR192 is not set
   7.117  # CONFIG_CRYPTO_DES is not set
   7.118  # CONFIG_CRYPTO_BLOWFISH is not set
   7.119  # CONFIG_CRYPTO_TWOFISH is not set
   7.120 @@ -551,3 +536,27 @@ CONFIG_CRYPTO_CRC32C=m
   7.121  # CONFIG_CRC32 is not set
   7.122  CONFIG_LIBCRC32C=m
   7.123  CONFIG_ZLIB_INFLATE=y
   7.124 +
   7.125 +#
   7.126 +# Kernel hacking
   7.127 +#
   7.128 +# CONFIG_PRINTK_TIME is not set
   7.129 +CONFIG_DEBUG_KERNEL=y
   7.130 +CONFIG_MAGIC_SYSRQ=y
   7.131 +CONFIG_LOG_BUF_SHIFT=14
   7.132 +# CONFIG_SCHEDSTATS is not set
   7.133 +# CONFIG_DEBUG_SLAB is not set
   7.134 +# CONFIG_DEBUG_PREEMPT is not set
   7.135 +# CONFIG_DEBUG_SPINLOCK is not set
   7.136 +# CONFIG_DEBUG_SPINLOCK_SLEEP is not set
   7.137 +# CONFIG_DEBUG_KOBJECT is not set
   7.138 +# CONFIG_DEBUG_HIGHMEM is not set
   7.139 +CONFIG_DEBUG_BUGVERBOSE=y
   7.140 +# CONFIG_DEBUG_INFO is not set
   7.141 +# CONFIG_DEBUG_FS is not set
   7.142 +# CONFIG_FRAME_POINTER is not set
   7.143 +# CONFIG_DEBUG_STACKOVERFLOW is not set
   7.144 +# CONFIG_KPROBES is not set
   7.145 +# CONFIG_DEBUG_STACK_USAGE is not set
   7.146 +# CONFIG_DEBUG_PAGEALLOC is not set
   7.147 +# CONFIG_4KSTACKS is not set
     8.1 --- a/linux-2.6-xen-sparse/arch/xen/configs/xenU_defconfig_x86_64	Mon Jul 11 09:29:56 2005 -0500
     8.2 +++ b/linux-2.6-xen-sparse/arch/xen/configs/xenU_defconfig_x86_64	Mon Jul 11 09:35:19 2005 -0500
     8.3 @@ -1,7 +1,7 @@
     8.4  #
     8.5  # Automatically generated make config: don't edit
     8.6 -# Linux kernel version: 2.6.11.10-xenU
     8.7 -# Mon May 23 15:07:58 2005
     8.8 +# Linux kernel version: 2.6.12-xenU
     8.9 +# Thu Jul  7 11:43:14 2005
    8.10  #
    8.11  CONFIG_XEN=y
    8.12  CONFIG_ARCH_XEN=y
    8.13 @@ -29,6 +29,7 @@ CONFIG_HAVE_ARCH_DEV_ALLOC_SKB=y
    8.14  CONFIG_EXPERIMENTAL=y
    8.15  CONFIG_CLEAN_COMPILE=y
    8.16  CONFIG_BROKEN_ON_SMP=y
    8.17 +CONFIG_INIT_ENV_ARG_LIMIT=32
    8.18  
    8.19  #
    8.20  # General setup
    8.21 @@ -42,22 +43,24 @@ CONFIG_BSD_PROCESS_ACCT=y
    8.22  CONFIG_SYSCTL=y
    8.23  CONFIG_AUDIT=y
    8.24  CONFIG_AUDITSYSCALL=y
    8.25 -CONFIG_LOG_BUF_SHIFT=14
    8.26  CONFIG_HOTPLUG=y
    8.27  CONFIG_KOBJECT_UEVENT=y
    8.28  # CONFIG_IKCONFIG is not set
    8.29  # CONFIG_EMBEDDED is not set
    8.30  CONFIG_KALLSYMS=y
    8.31  CONFIG_KALLSYMS_EXTRA_PASS=y
    8.32 +CONFIG_PRINTK=y
    8.33 +CONFIG_BUG=y
    8.34 +CONFIG_BASE_FULL=y
    8.35  CONFIG_FUTEX=y
    8.36  CONFIG_EPOLL=y
    8.37 -# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
    8.38  CONFIG_SHMEM=y
    8.39  CONFIG_CC_ALIGN_FUNCTIONS=0
    8.40  CONFIG_CC_ALIGN_LABELS=0
    8.41  CONFIG_CC_ALIGN_LOOPS=0
    8.42  CONFIG_CC_ALIGN_JUMPS=0
    8.43  # CONFIG_TINY_SHMEM is not set
    8.44 +CONFIG_BASE_SMALL=0
    8.45  
    8.46  #
    8.47  # Loadable module support
    8.48 @@ -72,6 +75,7 @@ CONFIG_KMOD=y
    8.49  CONFIG_XENARCH="x86_64"
    8.50  CONFIG_X86=y
    8.51  CONFIG_MMU=y
    8.52 +CONFIG_UID16=y
    8.53  CONFIG_GENERIC_ISA_DMA=y
    8.54  CONFIG_GENERIC_IOMAP=y
    8.55  CONFIG_X86_CMPXCHG=y
    8.56 @@ -89,15 +93,17 @@ CONFIG_X86_CPUID=y
    8.57  # CONFIG_X86_LOCAL_APIC is not set
    8.58  # CONFIG_X86_IO_APIC is not set
    8.59  # CONFIG_PCI is not set
    8.60 -CONFIG_EARLY_PRINTK=y
    8.61 +CONFIG_ISA_DMA_API=y
    8.62  CONFIG_GENERIC_HARDIRQS=y
    8.63  CONFIG_GENERIC_IRQ_PROBE=y
    8.64 +CONFIG_SECCOMP=y
    8.65  
    8.66  #
    8.67  # X86_64 processor configuration
    8.68  #
    8.69  CONFIG_X86_64=y
    8.70  CONFIG_64BIT=y
    8.71 +CONFIG_EARLY_PRINTK=y
    8.72  
    8.73  #
    8.74  # Processor type and features
    8.75 @@ -130,6 +136,9 @@ CONFIG_DUMMY_IOMMU=y
    8.76  #
    8.77  CONFIG_IA32_EMULATION=y
    8.78  # CONFIG_IA32_AOUT is not set
    8.79 +CONFIG_COMPAT=y
    8.80 +CONFIG_SYSVIPC_COMPAT=y
    8.81 +
    8.82  #
    8.83  # Executable file formats
    8.84  #
    8.85 @@ -226,6 +235,7 @@ CONFIG_DM_CRYPT=m
    8.86  CONFIG_DM_SNAPSHOT=m
    8.87  CONFIG_DM_MIRROR=m
    8.88  CONFIG_DM_ZERO=m
    8.89 +# CONFIG_DM_MULTIPATH is not set
    8.90  
    8.91  #
    8.92  # Networking support
    8.93 @@ -237,7 +247,6 @@ CONFIG_NET=y
    8.94  #
    8.95  CONFIG_PACKET=y
    8.96  CONFIG_PACKET_MMAP=y
    8.97 -CONFIG_NETLINK_DEV=y
    8.98  CONFIG_UNIX=y
    8.99  CONFIG_NET_KEY=m
   8.100  CONFIG_INET=y
   8.101 @@ -246,6 +255,7 @@ CONFIG_IP_ADVANCED_ROUTER=y
   8.102  CONFIG_IP_MULTIPLE_TABLES=y
   8.103  CONFIG_IP_ROUTE_FWMARK=y
   8.104  CONFIG_IP_ROUTE_MULTIPATH=y
   8.105 +# CONFIG_IP_ROUTE_MULTIPATH_CACHED is not set
   8.106  CONFIG_IP_ROUTE_VERBOSE=y
   8.107  # CONFIG_IP_PNP is not set
   8.108  CONFIG_NET_IPIP=m
   8.109 @@ -373,7 +383,7 @@ CONFIG_IP_NF_ARPFILTER=m
   8.110  CONFIG_IP_NF_ARP_MANGLE=m
   8.111  
   8.112  #
   8.113 -# IPv6: Netfilter Configuration
   8.114 +# IPv6: Netfilter Configuration (EXPERIMENTAL)
   8.115  #
   8.116  # CONFIG_IP6_NF_QUEUE is not set
   8.117  CONFIG_IP6_NF_IPTABLES=m
   8.118 @@ -480,6 +490,7 @@ CONFIG_NET_SCH_INGRESS=m
   8.119  CONFIG_NET_QOS=y
   8.120  CONFIG_NET_ESTIMATOR=y
   8.121  CONFIG_NET_CLS=y
   8.122 +# CONFIG_NET_CLS_BASIC is not set
   8.123  CONFIG_NET_CLS_TCINDEX=m
   8.124  CONFIG_NET_CLS_ROUTE4=m
   8.125  CONFIG_NET_CLS_ROUTE=y
   8.126 @@ -490,6 +501,7 @@ CONFIG_NET_CLS_IND=y
   8.127  # CONFIG_CLS_U32_MARK is not set
   8.128  CONFIG_NET_CLS_RSVP=m
   8.129  CONFIG_NET_CLS_RSVP6=m
   8.130 +# CONFIG_NET_EMATCH is not set
   8.131  # CONFIG_NET_CLS_ACT is not set
   8.132  CONFIG_NET_CLS_POLICE=y
   8.133  
   8.134 @@ -554,6 +566,11 @@ CONFIG_ACT200L_DONGLE=m
   8.135  #
   8.136  # FIR device drivers
   8.137  #
   8.138 +# CONFIG_NSC_FIR is not set
   8.139 +# CONFIG_WINBOND_FIR is not set
   8.140 +# CONFIG_SMC_IRCC_FIR is not set
   8.141 +# CONFIG_ALI_FIR is not set
   8.142 +# CONFIG_VIA_FIR is not set
   8.143  CONFIG_BT=m
   8.144  CONFIG_BT_L2CAP=m
   8.145  CONFIG_BT_SCO=m
   8.146 @@ -577,7 +594,6 @@ CONFIG_DUMMY=m
   8.147  CONFIG_BONDING=m
   8.148  CONFIG_EQUALIZER=m
   8.149  CONFIG_TUN=m
   8.150 -CONFIG_ETHERTAP=m
   8.151  
   8.152  #
   8.153  # Ethernet (10 or 100Mbit)
   8.154 @@ -853,7 +869,7 @@ CONFIG_SECURITY_SELINUX_BOOTPARAM_VALUE=
   8.155  CONFIG_SECURITY_SELINUX_DISABLE=y
   8.156  CONFIG_SECURITY_SELINUX_DEVELOP=y
   8.157  CONFIG_SECURITY_SELINUX_AVC_STATS=y
   8.158 -# CONFIG_SECURITY_SELINUX_MLS is not set
   8.159 +CONFIG_SECURITY_SELINUX_CHECKREQPROT_VALUE=1
   8.160  
   8.161  #
   8.162  # Cryptographic options
   8.163 @@ -867,6 +883,7 @@ CONFIG_CRYPTO_SHA1=y
   8.164  CONFIG_CRYPTO_SHA256=m
   8.165  CONFIG_CRYPTO_SHA512=m
   8.166  CONFIG_CRYPTO_WP512=m
   8.167 +# CONFIG_CRYPTO_TGR192 is not set
   8.168  CONFIG_CRYPTO_DES=m
   8.169  CONFIG_CRYPTO_BLOWFISH=m
   8.170  CONFIG_CRYPTO_TWOFISH=m
   8.171 @@ -895,3 +912,10 @@ CONFIG_CRC32=y
   8.172  CONFIG_LIBCRC32C=m
   8.173  CONFIG_ZLIB_INFLATE=y
   8.174  CONFIG_ZLIB_DEFLATE=m
   8.175 +
   8.176 +#
   8.177 +# Kernel hacking
   8.178 +#
   8.179 +# CONFIG_PRINTK_TIME is not set
   8.180 +# CONFIG_DEBUG_KERNEL is not set
   8.181 +CONFIG_LOG_BUF_SHIFT=14
     9.1 --- a/linux-2.6-xen-sparse/arch/xen/i386/Kconfig	Mon Jul 11 09:29:56 2005 -0500
     9.2 +++ b/linux-2.6-xen-sparse/arch/xen/i386/Kconfig	Mon Jul 11 09:35:19 2005 -0500
     9.3 @@ -74,6 +74,7 @@ config M386
     9.4  	  - "Winchip-C6" for original IDT Winchip.
     9.5  	  - "Winchip-2" for IDT Winchip 2.
     9.6  	  - "Winchip-2A" for IDT Winchips with 3dNow! capabilities.
     9.7 +	  - "GeodeGX1" for Geode GX1 (Cyrix MediaGX).
     9.8  	  - "CyrixIII/VIA C3" for VIA Cyrix III or VIA C3.
     9.9  	  - "VIA C3-2 for VIA C3-2 "Nehemiah" (model 9 and above).
    9.10  
    9.11 @@ -201,6 +202,11 @@ config MWINCHIP3D
    9.12  	  stores for this CPU, which can increase performance of some
    9.13  	  operations.
    9.14  
    9.15 +config MGEODEGX1
    9.16 +	bool "GeodeGX1"
    9.17 +	help
    9.18 +	  Select this for a Geode GX1 (Cyrix MediaGX) chip.
    9.19 +
    9.20  config MCYRIXIII
    9.21  	bool "CyrixIII/VIA-C3"
    9.22  	help
    9.23 @@ -249,7 +255,7 @@ config X86_L1_CACHE_SHIFT
    9.24  	int
    9.25  	default "7" if MPENTIUM4 || X86_GENERIC
    9.26  	default "4" if X86_ELAN || M486 || M386
    9.27 -	default "5" if MWINCHIP3D || MWINCHIP2 || MWINCHIPC6 || MCRUSOE || MEFFICEON || MCYRIXIII || MK6 || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || M586 || MVIAC3_2
    9.28 +	default "5" if MWINCHIP3D || MWINCHIP2 || MWINCHIPC6 || MCRUSOE || MEFFICEON || MCYRIXIII || MK6 || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || M586 || MVIAC3_2 || MGEODEGX1
    9.29  	default "6" if MK7 || MK8 || MPENTIUMM
    9.30  
    9.31  config RWSEM_GENERIC_SPINLOCK
    9.32 @@ -268,7 +274,7 @@ config GENERIC_CALIBRATE_DELAY
    9.33  
    9.34  config X86_PPRO_FENCE
    9.35  	bool
    9.36 -	depends on M686 || M586MMX || M586TSC || M586 || M486 || M386
    9.37 +	depends on M686 || M586MMX || M586TSC || M586 || M486 || M386 || MGEODEGX1
    9.38  	default y
    9.39  
    9.40  config X86_F00F_BUG
    9.41 @@ -298,7 +304,7 @@ config X86_POPAD_OK
    9.42  
    9.43  config X86_ALIGNMENT_16
    9.44  	bool
    9.45 -	depends on MWINCHIP3D || MWINCHIP2 || MWINCHIPC6 || MCYRIXIII || X86_ELAN || MK6 || M586MMX || M586TSC || M586 || M486 || MVIAC3_2
    9.46 +	depends on MWINCHIP3D || MWINCHIP2 || MWINCHIPC6 || MCYRIXIII || X86_ELAN || MK6 || M586MMX || M586TSC || M586 || M486 || MVIAC3_2 || MGEODEGX1
    9.47  	default y
    9.48  
    9.49  config X86_GOOD_APIC
    9.50 @@ -434,7 +440,7 @@ config PREEMPT_BKL
    9.51  
    9.52  #config X86_TSC
    9.53  #	 bool
    9.54 -#	depends on (MWINCHIP3D || MWINCHIP2 || MCRUSOE || MEFFICEON || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || MK8 || MVIAC3_2) && !X86_NUMAQ
    9.55 +# 	depends on (MWINCHIP3D || MWINCHIP2 || MCRUSOE || MEFFICEON || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || MK8 || MVIAC3_2 || MGEODEGX1) && !X86_NUMAQ
    9.56  #	 default y
    9.57  
    9.58  #config X86_MCE
    9.59 @@ -474,6 +480,24 @@ config PREEMPT_BKL
    9.60  #	   Enabling this feature will cause a message to be printed when the P4
    9.61  #	   enters thermal throttling.
    9.62  
    9.63 +config X86_REBOOTFIXUPS
    9.64 +	bool "Enable X86 board specific fixups for reboot"
    9.65 +	depends on X86
    9.66 +	default n
    9.67 +	---help---
    9.68 +	  This enables chipset and/or board specific fixups to be done
    9.69 +	  in order to get reboot to work correctly. This is only needed on
    9.70 +	  some combinations of hardware and BIOS. The symptom, for which
    9.71 +	  this config is intended, is when reboot ends with a stalled/hung
    9.72 +	  system.
    9.73 +
    9.74 +	  Currently, the only fixup is for the Geode GX1/CS5530A/TROM2.1.
    9.75 +	  combination.
    9.76 +
    9.77 +	  Say Y if you want to enable the fixup. Currently, it's safe to
    9.78 +	  enable this option even if you don't need it.
    9.79 +	  Say N otherwise.
    9.80 +
    9.81  config MICROCODE
    9.82  	tristate "/dev/cpu/microcode - Intel IA32 CPU microcode support"
    9.83  	 depends on XEN_PRIVILEGED_GUEST
    9.84 @@ -599,6 +623,16 @@ config HAVE_ARCH_BOOTMEM_NODE
    9.85  	depends on NUMA
    9.86  	default y
    9.87  
    9.88 +config HAVE_MEMORY_PRESENT
    9.89 +	bool
    9.90 +	depends on DISCONTIGMEM
    9.91 +	default y
    9.92 +
    9.93 +config NEED_NODE_MEMMAP_SIZE
    9.94 +	bool
    9.95 +	depends on DISCONTIGMEM
    9.96 +	default y
    9.97 +
    9.98  #config HIGHPTE
    9.99  #	bool "Allocate 3rd-level pagetables from highmem"
   9.100  #	depends on HIGHMEM4G || HIGHMEM64G
   9.101 @@ -682,14 +716,19 @@ config REGPARM
   9.102  
   9.103  config X86_LOCAL_APIC
   9.104  	bool
   9.105 -	depends on !SMP && X86_UP_APIC
   9.106 +	depends on XEN_PRIVILEGED_GUEST && (X86_UP_APIC || ((X86_VISWS || SMP) && !X86_VOYAGER))
   9.107  	default y
   9.108  
   9.109  config X86_IO_APIC
   9.110  	bool
   9.111 -	depends on !SMP && X86_UP_IOAPIC
   9.112 +	depends on XEN_PRIVILEGED_GUEST && (X86_UP_IOAPIC || (SMP && !(X86_VISWS || X86_VOYAGER)))
   9.113  	default y
   9.114  
   9.115 +config X86_VISWS_APIC
   9.116 +	bool
   9.117 +	depends on X86_VISWS
   9.118 +  	default y
   9.119 +
   9.120  config HOTPLUG_CPU
   9.121  	bool "Support for hot-pluggable CPUs (EXPERIMENTAL)"
   9.122  	depends on SMP && HOTPLUG && EXPERIMENTAL
   9.123 @@ -704,20 +743,10 @@ if XEN_PHYSDEV_ACCESS
   9.124  
   9.125  menu "Bus options (PCI, PCMCIA, EISA, MCA, ISA)"
   9.126  
   9.127 -config X86_VISWS_APIC
   9.128 -	bool
   9.129 -	depends on X86_VISWS
   9.130 -	default y
   9.131 -
   9.132 -config X86_LOCAL_APIC
   9.133 -	bool
   9.134 -	depends on (X86_VISWS || SMP) && !X86_VOYAGER
   9.135 -	default y
   9.136 -
   9.137  config X86_UP_APIC
   9.138 -	bool "Local APIC support on uniprocessors" if !SMP
   9.139 -	depends on !(X86_VISWS || X86_VOYAGER)
   9.140 -	---help---
   9.141 +	bool "Local APIC support on uniprocessors"
   9.142 +	depends on !SMP && !(X86_VISWS || X86_VOYAGER)
   9.143 +	help
   9.144  	  A local APIC (Advanced Programmable Interrupt Controller) is an
   9.145  	  integrated interrupt controller in the CPU. If you have a single-CPU
   9.146  	  system which has a processor with a local APIC, you can say Y here to
   9.147 @@ -727,28 +756,18 @@ config X86_UP_APIC
   9.148  	  performance counters), and the NMI watchdog which detects hard
   9.149  	  lockups.
   9.150  
   9.151 -	  If you have a system with several CPUs, you do not need to say Y
   9.152 -	  here: the local APIC will be used automatically.
   9.153 -
   9.154  config X86_UP_IOAPIC
   9.155  	bool "IO-APIC support on uniprocessors"
   9.156 -	depends on !SMP && X86_UP_APIC
   9.157 +	depends on X86_UP_APIC
   9.158  	help
   9.159  	  An IO-APIC (I/O Advanced Programmable Interrupt Controller) is an
   9.160  	  SMP-capable replacement for PC-style interrupt controllers. Most
   9.161 -	  SMP systems and a small number of uniprocessor systems have one.
   9.162 +	  SMP systems and many recent uniprocessor systems have one.
   9.163 +
   9.164  	  If you have a single-CPU system with an IO-APIC, you can say Y here
   9.165  	  to use it. If you say Y here even though your machine doesn't have
   9.166  	  an IO-APIC, then the kernel will still run with no slowdown at all.
   9.167  
   9.168 -	  If you have a system with several CPUs, you do not need to say Y
   9.169 -	  here: the IO-APIC will be used automatically.
   9.170 -
   9.171 -config X86_IO_APIC
   9.172 -	bool
   9.173 -	depends on SMP && !(X86_VISWS || X86_VOYAGER)
   9.174 -	default y
   9.175 -
   9.176  config PCI
   9.177  	bool "PCI support" if !X86_VISWS
   9.178  	depends on !X86_VOYAGER
   9.179 @@ -809,7 +828,7 @@ config PCI_DIRECT
   9.180  
   9.181  config PCI_MMCONFIG
   9.182  	bool
   9.183 -	depends on PCI && (PCI_GOMMCONFIG || (PCI_GOANY && ACPI))
   9.184 +	depends on PCI && ACPI && (PCI_GOMMCONFIG || PCI_GOANY)
   9.185  	select ACPI_BOOT
   9.186  	default y
   9.187  
   9.188 @@ -817,6 +836,10 @@ source "drivers/pci/pcie/Kconfig"
   9.189  
   9.190  source "drivers/pci/Kconfig"
   9.191  
   9.192 +config ISA_DMA_API
   9.193 +	bool
   9.194 +	default y
   9.195 +
   9.196  config ISA
   9.197  	bool "ISA support"
   9.198  	depends on !(X86_VOYAGER || X86_VISWS)
   9.199 @@ -846,18 +869,14 @@ config EISA
   9.200  source "drivers/eisa/Kconfig"
   9.201  
   9.202  config MCA
   9.203 -	bool "MCA support"
   9.204 -	depends on !(X86_VISWS || X86_VOYAGER)
   9.205 +	bool "MCA support" if !(X86_VISWS || X86_VOYAGER)
   9.206 +	default y if X86_VOYAGER
   9.207  	help
   9.208  	  MicroChannel Architecture is found in some IBM PS/2 machines and
   9.209  	  laptops.  It is a bus system similar to PCI or ISA. See
   9.210  	  <file:Documentation/mca.txt> (and especially the web page given
   9.211  	  there) before attempting to build an MCA bus kernel.
   9.212  
   9.213 -config MCA
   9.214 -	depends on X86_VOYAGER
   9.215 -	default y if X86_VOYAGER
   9.216 -
   9.217  source "drivers/mca/Kconfig"
   9.218  
   9.219  config SCx200
   9.220 @@ -880,8 +899,6 @@ endmenu
   9.221  
   9.222  endif
   9.223  
   9.224 -source "arch/i386/Kconfig.debug"
   9.225 -
   9.226  #
   9.227  # Use the generic interrupt handling code in kernel/irq/:
   9.228  #
   9.229 @@ -918,4 +935,21 @@ config PC
   9.230  	depends on X86 && !EMBEDDED
   9.231  	default y
   9.232  
   9.233 +config SECCOMP
   9.234 +	bool "Enable seccomp to safely compute untrusted bytecode"
   9.235 +	depends on PROC_FS
   9.236 +	default y
   9.237 +	help
   9.238 +	  This kernel feature is useful for number crunching applications
   9.239 +	  that may need to compute untrusted bytecode during their
   9.240 +	  execution. By using pipes or other transports made available to
   9.241 +	  the process as file descriptors supporting the read/write
   9.242 +	  syscalls, it's possible to isolate those applications in
   9.243 +	  their own address space using seccomp. Once seccomp is
   9.244 +	  enabled via /proc/<pid>/seccomp, it cannot be disabled
   9.245 +	  and the task is only allowed to execute a few safe syscalls
   9.246 +	  defined by each seccomp mode.
   9.247 +
   9.248 +	  If unsure, say Y. Only embedded should say N here.
   9.249 +
   9.250  endmenu
    10.1 --- a/linux-2.6-xen-sparse/arch/xen/i386/Makefile	Mon Jul 11 09:29:56 2005 -0500
    10.2 +++ b/linux-2.6-xen-sparse/arch/xen/i386/Makefile	Mon Jul 11 09:35:19 2005 -0500
    10.3 @@ -14,6 +14,8 @@
    10.4  # 19990713  Artur Skawina <skawina@geocities.com>
    10.5  #           Added '-march' and '-mpreferred-stack-boundary' support
    10.6  #
    10.7 +# 20050320  Kianusch Sayah Karadji <kianusch@sk-tech.net>
    10.8 +#           Added support for GEODE CPU
    10.9  
   10.10  XENARCH	:= $(subst ",,$(CONFIG_XENARCH))
   10.11  
   10.12 @@ -56,6 +58,9 @@ cflags-$(CONFIG_MVIAC3_2)	+= $(call cc-o
   10.13  # AMD Elan support
   10.14  cflags-$(CONFIG_X86_ELAN)	+= -march=i486
   10.15  
   10.16 +# Geode GX1 support
   10.17 +cflags-$(CONFIG_MGEODEGX1)		+= $(call cc-option,-march=pentium-mmx,-march=i486)
   10.18 +
   10.19  # -mregparm=3 works ok on gcc-3.0 and later
   10.20  #
   10.21  GCC_VERSION			:= $(shell $(CONFIG_SHELL) $(srctree)/scripts/gcc-version.sh $(CC))
    11.1 --- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/Makefile	Mon Jul 11 09:29:56 2005 -0500
    11.2 +++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/Makefile	Mon Jul 11 09:35:19 2005 -0500
    11.3 @@ -33,6 +33,7 @@ obj-$(CONFIG_X86_MPPARSE)	+= mpparse.o
    11.4  obj-$(CONFIG_X86_LOCAL_APIC)	+= apic.o
    11.5  c-obj-$(CONFIG_X86_LOCAL_APIC)	+= nmi.o
    11.6  obj-$(CONFIG_X86_IO_APIC)	+= io_apic.o
    11.7 +c-obj-$(CONFIG_X86_REBOOTFIXUPS)+= reboot_fixups.o
    11.8  c-obj-$(CONFIG_X86_NUMAQ)	+= numaq.o
    11.9  c-obj-$(CONFIG_X86_SUMMIT_NUMA)	+= summit.o
   11.10  c-obj-$(CONFIG_MODULES)		+= module.o
   11.11 @@ -53,11 +54,11 @@ c-obj-$(CONFIG_SCx200)		+= scx200.o
   11.12  # Note: kbuild does not track this dependency due to usage of .incbin
   11.13  $(obj)/vsyscall.o: $(obj)/vsyscall-int80.so $(obj)/vsyscall-sysenter.so
   11.14  targets += $(foreach F,int80 sysenter,vsyscall-$F.o vsyscall-$F.so)
   11.15 -targets += vsyscall.lds
   11.16 +targets += vsyscall-note.o vsyscall.lds
   11.17  
   11.18  # The DSO images are built using a special linker script.
   11.19  quiet_cmd_syscall = SYSCALL $@
   11.20 -      cmd_syscall = $(CC) -nostdlib -m32 $(SYSCFLAGS_$(@F)) \
   11.21 +      cmd_syscall = $(CC) -m elf_i386 -nostdlib $(SYSCFLAGS_$(@F)) \
   11.22  		          -Wl,-T,$(filter-out FORCE,$^) -o $@
   11.23  
   11.24  export CPPFLAGS_vsyscall.lds += -P -C -U$(ARCH)
   11.25 @@ -67,7 +68,8 @@ SYSCFLAGS_vsyscall-sysenter.so	= $(vsysc
   11.26  SYSCFLAGS_vsyscall-int80.so	= $(vsyscall-flags)
   11.27  
   11.28  $(obj)/vsyscall-int80.so $(obj)/vsyscall-sysenter.so: \
   11.29 -$(obj)/vsyscall-%.so: $(src)/vsyscall.lds $(obj)/vsyscall-%.o FORCE
   11.30 +$(obj)/vsyscall-%.so: $(src)/vsyscall.lds \
   11.31 +		      $(obj)/vsyscall-%.o FORCE
   11.32  	$(call if_changed,syscall)
   11.33  
   11.34  # We also create a special relocatable object that should mirror the symbol
   11.35 @@ -78,17 +80,20 @@ extra-y += vsyscall-syms.o
   11.36  $(obj)/built-in.o: ld_flags += -R $(obj)/vsyscall-syms.o
   11.37  
   11.38  SYSCFLAGS_vsyscall-syms.o = -r
   11.39 -$(obj)/vsyscall-syms.o: $(src)/vsyscall.lds $(obj)/vsyscall-sysenter.o FORCE
   11.40 +$(obj)/vsyscall-syms.o: $(src)/vsyscall.lds \
   11.41 +			$(obj)/vsyscall-sysenter.o FORCE
   11.42  	$(call if_changed,syscall)
   11.43  
   11.44  c-link	:= init_task.o
   11.45 -s-link	:= vsyscall-int80.o vsyscall-sysenter.o vsyscall-sigreturn.o vsyscall.lds.o
   11.46 +s-link	:= vsyscall-int80.o vsyscall-sysenter.o vsyscall-sigreturn.o vsyscall.lds.o syscall_table.o
   11.47  
   11.48  $(patsubst %.o,$(obj)/%.c,$(c-obj-y) $(c-obj-m) $(c-link)) $(patsubst %.o,$(obj)/%.S,$(s-obj-y) $(s-link)):
   11.49  	@ln -fsn $(srctree)/arch/i386/kernel/$(notdir $@) $@
   11.50  
   11.51  $(obj)/vsyscall-int80.S: $(obj)/vsyscall-sigreturn.S
   11.52  
   11.53 +$(obj)/entry.o: $(src)/entry.S $(src)/syscall_table.S
   11.54 +
   11.55  obj-y	+= $(c-obj-y) $(s-obj-y)
   11.56  obj-m	+= $(c-obj-m)
   11.57  
    12.1 --- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/acpi/boot.c	Mon Jul 11 09:29:56 2005 -0500
    12.2 +++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/acpi/boot.c	Mon Jul 11 09:35:19 2005 -0500
    12.3 @@ -604,6 +604,12 @@ static int __init acpi_parse_fadt(unsign
    12.4  	acpi_fadt.sci_int = fadt->sci_int;
    12.5  #endif
    12.6  
    12.7 +#ifdef CONFIG_ACPI_BUS
    12.8 +	/* initialize rev and apic_phys_dest_mode for x86_64 genapic */
    12.9 +	acpi_fadt.revision = fadt->revision;
   12.10 +	acpi_fadt.force_apic_physical_destination_mode = fadt->force_apic_physical_destination_mode;
   12.11 +#endif
   12.12 +
   12.13  #ifdef CONFIG_X86_PM_TIMER
   12.14  	/* detect the location of the ACPI PM Timer */
   12.15  	if (fadt->revision >= FADT2_REVISION_ID) {
    13.1 --- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/cpu/common.c	Mon Jul 11 09:29:56 2005 -0500
    13.2 +++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/cpu/common.c	Mon Jul 11 09:35:19 2005 -0500
    13.3 @@ -22,6 +22,9 @@
    13.4  DEFINE_PER_CPU(struct desc_struct, cpu_gdt_table[GDT_ENTRIES]);
    13.5  EXPORT_PER_CPU_SYMBOL(cpu_gdt_table);
    13.6  
    13.7 +DEFINE_PER_CPU(unsigned char, cpu_16bit_stack[CPU_16BIT_STACK_SIZE]);
    13.8 +EXPORT_PER_CPU_SYMBOL(cpu_16bit_stack);
    13.9 +
   13.10  static int cachesize_override __initdata = -1;
   13.11  static int disable_x86_fxsr __initdata = 0;
   13.12  static int disable_x86_serial_nr __initdata = 1;
   13.13 @@ -202,7 +205,7 @@ static inline int flag_is_changeable_p(u
   13.14  
   13.15  
   13.16  /* Probe for the CPUID instruction */
   13.17 -int __init have_cpuid_p(void)
   13.18 +static int __init have_cpuid_p(void)
   13.19  {
   13.20  	return flag_is_changeable_p(X86_EFLAGS_ID);
   13.21  }
   13.22 @@ -210,7 +213,7 @@ int __init have_cpuid_p(void)
   13.23  /* Do minimum CPU detection early.
   13.24     Fields really needed: vendor, cpuid_level, family, model, mask, cache alignment.
   13.25     The others are not touched to avoid unwanted side effects. */
   13.26 -void __init early_cpu_detect(void)
   13.27 +static void __init early_cpu_detect(void)
   13.28  {
   13.29  	struct cpuinfo_x86 *c = &boot_cpu_data;
   13.30  
   13.31 @@ -243,6 +246,10 @@ void __init early_cpu_detect(void)
   13.32  	}
   13.33  
   13.34  	early_intel_workaround(c);
   13.35 +
   13.36 +#ifdef CONFIG_X86_HT
   13.37 +	phys_proc_id[smp_processor_id()] = (cpuid_ebx(1) >> 24) & 0xff;
   13.38 +#endif
   13.39  }
   13.40  
   13.41  void __init generic_identify(struct cpuinfo_x86 * c)
   13.42 @@ -431,25 +438,15 @@ void __init identify_cpu(struct cpuinfo_
   13.43  	mcheck_init(c);
   13.44  #endif
   13.45  }
   13.46 -/*
   13.47 - *	Perform early boot up checks for a valid TSC. See arch/i386/kernel/time.c
   13.48 - */
   13.49 - 
   13.50 -void __init dodgy_tsc(void)
   13.51 -{
   13.52 -	if (( boot_cpu_data.x86_vendor == X86_VENDOR_CYRIX ) ||
   13.53 -	    ( boot_cpu_data.x86_vendor == X86_VENDOR_NSC   ))
   13.54 -		cpu_devs[X86_VENDOR_CYRIX]->c_init(&boot_cpu_data);
   13.55 -}
   13.56  
   13.57  #ifdef CONFIG_X86_HT
   13.58  void __init detect_ht(struct cpuinfo_x86 *c)
   13.59  {
   13.60  	u32 	eax, ebx, ecx, edx;
   13.61 -	int 	index_lsb, index_msb, tmp;
   13.62 +	int 	index_msb, tmp;
   13.63  	int 	cpu = smp_processor_id();
   13.64  
   13.65 -	if (!cpu_has(c, X86_FEATURE_HT))
   13.66 +	if (!cpu_has(c, X86_FEATURE_HT) || cpu_has(c, X86_FEATURE_CMP_LEGACY))
   13.67  		return;
   13.68  
   13.69  	cpuid(1, &eax, &ebx, &ecx, &edx);
   13.70 @@ -458,7 +455,6 @@ void __init detect_ht(struct cpuinfo_x86
   13.71  	if (smp_num_siblings == 1) {
   13.72  		printk(KERN_INFO  "CPU: Hyper-Threading is disabled\n");
   13.73  	} else if (smp_num_siblings > 1 ) {
   13.74 -		index_lsb = 0;
   13.75  		index_msb = 31;
   13.76  
   13.77  		if (smp_num_siblings > NR_CPUS) {
   13.78 @@ -467,21 +463,34 @@ void __init detect_ht(struct cpuinfo_x86
   13.79  			return;
   13.80  		}
   13.81  		tmp = smp_num_siblings;
   13.82 -		while ((tmp & 1) == 0) {
   13.83 -			tmp >>=1 ;
   13.84 -			index_lsb++;
   13.85 -		}
   13.86 -		tmp = smp_num_siblings;
   13.87  		while ((tmp & 0x80000000 ) == 0) {
   13.88  			tmp <<=1 ;
   13.89  			index_msb--;
   13.90  		}
   13.91 -		if (index_lsb != index_msb )
   13.92 +		if (smp_num_siblings & (smp_num_siblings - 1))
   13.93  			index_msb++;
   13.94  		phys_proc_id[cpu] = phys_pkg_id((ebx >> 24) & 0xFF, index_msb);
   13.95  
   13.96  		printk(KERN_INFO  "CPU: Physical Processor ID: %d\n",
   13.97  		       phys_proc_id[cpu]);
   13.98 +
   13.99 +		smp_num_siblings = smp_num_siblings / c->x86_num_cores;
  13.100 +
  13.101 +		tmp = smp_num_siblings;
  13.102 +		index_msb = 31;
  13.103 +		while ((tmp & 0x80000000) == 0) {
  13.104 +			tmp <<=1 ;
  13.105 +			index_msb--;
  13.106 +		}
  13.107 +
  13.108 +		if (smp_num_siblings & (smp_num_siblings - 1))
  13.109 +			index_msb++;
  13.110 +
  13.111 +		cpu_core_id[cpu] = phys_pkg_id((ebx >> 24) & 0xFF, index_msb);
  13.112 +
  13.113 +		if (c->x86_num_cores > 1)
  13.114 +			printk(KERN_INFO  "CPU: Processor Core ID: %d\n",
  13.115 +			       cpu_core_id[cpu]);
  13.116  	}
  13.117  }
  13.118  #endif
  13.119 @@ -528,7 +537,6 @@ extern int transmeta_init_cpu(void);
  13.120  extern int rise_init_cpu(void);
  13.121  extern int nexgen_init_cpu(void);
  13.122  extern int umc_init_cpu(void);
  13.123 -void early_cpu_detect(void);
  13.124  
  13.125  void __init early_cpu_init(void)
  13.126  {
    14.1 --- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/cpu/mtrr/main.c	Mon Jul 11 09:29:56 2005 -0500
    14.2 +++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/cpu/mtrr/main.c	Mon Jul 11 09:35:19 2005 -0500
    14.3 @@ -31,7 +31,7 @@ struct mtrr_ops *mtrr_if = &generic_mtrr
    14.4  unsigned int num_var_ranges;
    14.5  unsigned int *usage_table;
    14.6  
    14.7 -void __init set_num_var_ranges(void)
    14.8 +static void __init set_num_var_ranges(void)
    14.9  {
   14.10  	dom0_op_t op;
   14.11  
    15.1 --- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/entry.S	Mon Jul 11 09:29:56 2005 -0500
    15.2 +++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/entry.S	Mon Jul 11 09:35:19 2005 -0500
    15.3 @@ -752,296 +752,6 @@ ENTRY(fixup_4gb_segment)
    15.4  	pushl $do_fixup_4gb_segment
    15.5  	jmp error_code
    15.6  
    15.7 -.data
    15.8 -ENTRY(sys_call_table)
    15.9 -	.long sys_restart_syscall	/* 0 - old "setup()" system call, used for restarting */
   15.10 -	.long sys_exit
   15.11 -	.long sys_fork
   15.12 -	.long sys_read
   15.13 -	.long sys_write
   15.14 -	.long sys_open		/* 5 */
   15.15 -	.long sys_close
   15.16 -	.long sys_waitpid
   15.17 -	.long sys_creat
   15.18 -	.long sys_link
   15.19 -	.long sys_unlink	/* 10 */
   15.20 -	.long sys_execve
   15.21 -	.long sys_chdir
   15.22 -	.long sys_time
   15.23 -	.long sys_mknod
   15.24 -	.long sys_chmod		/* 15 */
   15.25 -	.long sys_lchown16
   15.26 -	.long sys_ni_syscall	/* old break syscall holder */
   15.27 -	.long sys_stat
   15.28 -	.long sys_lseek
   15.29 -	.long sys_getpid	/* 20 */
   15.30 -	.long sys_mount
   15.31 -	.long sys_oldumount
   15.32 -	.long sys_setuid16
   15.33 -	.long sys_getuid16
   15.34 -	.long sys_stime		/* 25 */
   15.35 -	.long sys_ptrace
   15.36 -	.long sys_alarm
   15.37 -	.long sys_fstat
   15.38 -	.long sys_pause
   15.39 -	.long sys_utime		/* 30 */
   15.40 -	.long sys_ni_syscall	/* old stty syscall holder */
   15.41 -	.long sys_ni_syscall	/* old gtty syscall holder */
   15.42 -	.long sys_access
   15.43 -	.long sys_nice
   15.44 -	.long sys_ni_syscall	/* 35 - old ftime syscall holder */
   15.45 -	.long sys_sync
   15.46 -	.long sys_kill
   15.47 -	.long sys_rename
   15.48 -	.long sys_mkdir
   15.49 -	.long sys_rmdir		/* 40 */
   15.50 -	.long sys_dup
   15.51 -	.long sys_pipe
   15.52 -	.long sys_times
   15.53 -	.long sys_ni_syscall	/* old prof syscall holder */
   15.54 -	.long sys_brk		/* 45 */
   15.55 -	.long sys_setgid16
   15.56 -	.long sys_getgid16
   15.57 -	.long sys_signal
   15.58 -	.long sys_geteuid16
   15.59 -	.long sys_getegid16	/* 50 */
   15.60 -	.long sys_acct
   15.61 -	.long sys_umount	/* recycled never used phys() */
   15.62 -	.long sys_ni_syscall	/* old lock syscall holder */
   15.63 -	.long sys_ioctl
   15.64 -	.long sys_fcntl		/* 55 */
   15.65 -	.long sys_ni_syscall	/* old mpx syscall holder */
   15.66 -	.long sys_setpgid
   15.67 -	.long sys_ni_syscall	/* old ulimit syscall holder */
   15.68 -	.long sys_olduname
   15.69 -	.long sys_umask		/* 60 */
   15.70 -	.long sys_chroot
   15.71 -	.long sys_ustat
   15.72 -	.long sys_dup2
   15.73 -	.long sys_getppid
   15.74 -	.long sys_getpgrp	/* 65 */
   15.75 -	.long sys_setsid
   15.76 -	.long sys_sigaction
   15.77 -	.long sys_sgetmask
   15.78 -	.long sys_ssetmask
   15.79 -	.long sys_setreuid16	/* 70 */
   15.80 -	.long sys_setregid16
   15.81 -	.long sys_sigsuspend
   15.82 -	.long sys_sigpending
   15.83 -	.long sys_sethostname
   15.84 -	.long sys_setrlimit	/* 75 */
   15.85 -	.long sys_old_getrlimit
   15.86 -	.long sys_getrusage
   15.87 -	.long sys_gettimeofday
   15.88 -	.long sys_settimeofday
   15.89 -	.long sys_getgroups16	/* 80 */
   15.90 -	.long sys_setgroups16
   15.91 -	.long old_select
   15.92 -	.long sys_symlink
   15.93 -	.long sys_lstat
   15.94 -	.long sys_readlink	/* 85 */
   15.95 -	.long sys_uselib
   15.96 -	.long sys_swapon
   15.97 -	.long sys_reboot
   15.98 -	.long old_readdir
   15.99 -	.long old_mmap		/* 90 */
  15.100 -	.long sys_munmap
  15.101 -	.long sys_truncate
  15.102 -	.long sys_ftruncate
  15.103 -	.long sys_fchmod
  15.104 -	.long sys_fchown16	/* 95 */
  15.105 -	.long sys_getpriority
  15.106 -	.long sys_setpriority
  15.107 -	.long sys_ni_syscall	/* old profil syscall holder */
  15.108 -	.long sys_statfs
  15.109 -	.long sys_fstatfs	/* 100 */
  15.110 -	.long sys_ioperm
  15.111 -	.long sys_socketcall
  15.112 -	.long sys_syslog
  15.113 -	.long sys_setitimer
  15.114 -	.long sys_getitimer	/* 105 */
  15.115 -	.long sys_newstat
  15.116 -	.long sys_newlstat
  15.117 -	.long sys_newfstat
  15.118 -	.long sys_uname
  15.119 -	.long sys_iopl		/* 110 */
  15.120 -	.long sys_vhangup
  15.121 -	.long sys_ni_syscall	/* old "idle" system call */
  15.122 -	.long sys_vm86old
  15.123 -	.long sys_wait4
  15.124 -	.long sys_swapoff	/* 115 */
  15.125 -	.long sys_sysinfo
  15.126 -	.long sys_ipc
  15.127 -	.long sys_fsync
  15.128 -	.long sys_sigreturn
  15.129 -	.long sys_clone		/* 120 */
  15.130 -	.long sys_setdomainname
  15.131 -	.long sys_newuname
  15.132 -	.long sys_modify_ldt
  15.133 -	.long sys_adjtimex
  15.134 -	.long sys_mprotect	/* 125 */
  15.135 -	.long sys_sigprocmask
  15.136 -	.long sys_ni_syscall	/* old "create_module" */ 
  15.137 -	.long sys_init_module
  15.138 -	.long sys_delete_module
  15.139 -	.long sys_ni_syscall	/* 130:	old "get_kernel_syms" */
  15.140 -	.long sys_quotactl
  15.141 -	.long sys_getpgid
  15.142 -	.long sys_fchdir
  15.143 -	.long sys_bdflush
  15.144 -	.long sys_sysfs		/* 135 */
  15.145 -	.long sys_personality
  15.146 -	.long sys_ni_syscall	/* reserved for afs_syscall */
  15.147 -	.long sys_setfsuid16
  15.148 -	.long sys_setfsgid16
  15.149 -	.long sys_llseek	/* 140 */
  15.150 -	.long sys_getdents
  15.151 -	.long sys_select
  15.152 -	.long sys_flock
  15.153 -	.long sys_msync
  15.154 -	.long sys_readv		/* 145 */
  15.155 -	.long sys_writev
  15.156 -	.long sys_getsid
  15.157 -	.long sys_fdatasync
  15.158 -	.long sys_sysctl
  15.159 -	.long sys_mlock		/* 150 */
  15.160 -	.long sys_munlock
  15.161 -	.long sys_mlockall
  15.162 -	.long sys_munlockall
  15.163 -	.long sys_sched_setparam
  15.164 -	.long sys_sched_getparam   /* 155 */
  15.165 -	.long sys_sched_setscheduler
  15.166 -	.long sys_sched_getscheduler
  15.167 -	.long sys_sched_yield
  15.168 -	.long sys_sched_get_priority_max
  15.169 -	.long sys_sched_get_priority_min  /* 160 */
  15.170 -	.long sys_sched_rr_get_interval
  15.171 -	.long sys_nanosleep
  15.172 -	.long sys_mremap
  15.173 -	.long sys_setresuid16
  15.174 -	.long sys_getresuid16	/* 165 */
  15.175 -	.long sys_vm86
  15.176 -	.long sys_ni_syscall	/* Old sys_query_module */
  15.177 -	.long sys_poll
  15.178 -	.long sys_nfsservctl
  15.179 -	.long sys_setresgid16	/* 170 */
  15.180 -	.long sys_getresgid16
  15.181 -	.long sys_prctl
  15.182 -	.long sys_rt_sigreturn
  15.183 -	.long sys_rt_sigaction
  15.184 -	.long sys_rt_sigprocmask	/* 175 */
  15.185 -	.long sys_rt_sigpending
  15.186 -	.long sys_rt_sigtimedwait
  15.187 -	.long sys_rt_sigqueueinfo
  15.188 -	.long sys_rt_sigsuspend
  15.189 -	.long sys_pread64	/* 180 */
  15.190 -	.long sys_pwrite64
  15.191 -	.long sys_chown16
  15.192 -	.long sys_getcwd
  15.193 -	.long sys_capget
  15.194 -	.long sys_capset	/* 185 */
  15.195 -	.long sys_sigaltstack
  15.196 -	.long sys_sendfile
  15.197 -	.long sys_ni_syscall	/* reserved for streams1 */
  15.198 -	.long sys_ni_syscall	/* reserved for streams2 */
  15.199 -	.long sys_vfork		/* 190 */
  15.200 -	.long sys_getrlimit
  15.201 -	.long sys_mmap2
  15.202 -	.long sys_truncate64
  15.203 -	.long sys_ftruncate64
  15.204 -	.long sys_stat64	/* 195 */
  15.205 -	.long sys_lstat64
  15.206 -	.long sys_fstat64
  15.207 -	.long sys_lchown
  15.208 -	.long sys_getuid
  15.209 -	.long sys_getgid	/* 200 */
  15.210 -	.long sys_geteuid
  15.211 -	.long sys_getegid
  15.212 -	.long sys_setreuid
  15.213 -	.long sys_setregid
  15.214 -	.long sys_getgroups	/* 205 */
  15.215 -	.long sys_setgroups
  15.216 -	.long sys_fchown
  15.217 -	.long sys_setresuid
  15.218 -	.long sys_getresuid
  15.219 -	.long sys_setresgid	/* 210 */
  15.220 -	.long sys_getresgid
  15.221 -	.long sys_chown
  15.222 -	.long sys_setuid
  15.223 -	.long sys_setgid
  15.224 -	.long sys_setfsuid	/* 215 */
  15.225 -	.long sys_setfsgid
  15.226 -	.long sys_pivot_root
  15.227 -	.long sys_mincore
  15.228 -	.long sys_madvise
  15.229 -	.long sys_getdents64	/* 220 */
  15.230 -	.long sys_fcntl64
  15.231 -	.long sys_ni_syscall	/* reserved for TUX */
  15.232 -	.long sys_ni_syscall
  15.233 -	.long sys_gettid
  15.234 -	.long sys_readahead	/* 225 */
  15.235 -	.long sys_setxattr
  15.236 -	.long sys_lsetxattr
  15.237 -	.long sys_fsetxattr
  15.238 -	.long sys_getxattr
  15.239 -	.long sys_lgetxattr	/* 230 */
  15.240 -	.long sys_fgetxattr
  15.241 -	.long sys_listxattr
  15.242 -	.long sys_llistxattr
  15.243 -	.long sys_flistxattr
  15.244 -	.long sys_removexattr	/* 235 */
  15.245 -	.long sys_lremovexattr
  15.246 -	.long sys_fremovexattr
  15.247 -	.long sys_tkill
  15.248 -	.long sys_sendfile64
  15.249 -	.long sys_futex		/* 240 */
  15.250 -	.long sys_sched_setaffinity
  15.251 -	.long sys_sched_getaffinity
  15.252 -	.long sys_set_thread_area
  15.253 -	.long sys_get_thread_area
  15.254 -	.long sys_io_setup	/* 245 */
  15.255 -	.long sys_io_destroy
  15.256 -	.long sys_io_getevents
  15.257 -	.long sys_io_submit
  15.258 -	.long sys_io_cancel
  15.259 -	.long sys_fadvise64	/* 250 */
  15.260 -	.long sys_ni_syscall
  15.261 -	.long sys_exit_group
  15.262 -	.long sys_lookup_dcookie
  15.263 -	.long sys_epoll_create
  15.264 -	.long sys_epoll_ctl	/* 255 */
  15.265 -	.long sys_epoll_wait
  15.266 - 	.long sys_remap_file_pages
  15.267 - 	.long sys_set_tid_address
  15.268 - 	.long sys_timer_create
  15.269 - 	.long sys_timer_settime		/* 260 */
  15.270 - 	.long sys_timer_gettime
  15.271 - 	.long sys_timer_getoverrun
  15.272 - 	.long sys_timer_delete
  15.273 - 	.long sys_clock_settime
  15.274 - 	.long sys_clock_gettime		/* 265 */
  15.275 - 	.long sys_clock_getres
  15.276 - 	.long sys_clock_nanosleep
  15.277 -	.long sys_statfs64
  15.278 -	.long sys_fstatfs64	
  15.279 -	.long sys_tgkill	/* 270 */
  15.280 -	.long sys_utimes
  15.281 - 	.long sys_fadvise64_64
  15.282 -	.long sys_ni_syscall	/* sys_vserver */
  15.283 -	.long sys_mbind
  15.284 -	.long sys_get_mempolicy
  15.285 -	.long sys_set_mempolicy
  15.286 -	.long sys_mq_open
  15.287 -	.long sys_mq_unlink
  15.288 -	.long sys_mq_timedsend
  15.289 -	.long sys_mq_timedreceive	/* 280 */
  15.290 -	.long sys_mq_notify
  15.291 -	.long sys_mq_getsetattr
  15.292 -	.long sys_ni_syscall		/* reserved for kexec */
  15.293 -	.long sys_waitid
  15.294 -	.long sys_ni_syscall		/* 285 */ /* available */
  15.295 -	.long sys_add_key
  15.296 -	.long sys_request_key
  15.297 -	.long sys_keyctl
  15.298 +#include "syscall_table.S"
  15.299  
  15.300  syscall_table_size=(.-sys_call_table)
    16.1 --- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/i386_ksyms.c	Mon Jul 11 09:29:56 2005 -0500
    16.2 +++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/i386_ksyms.c	Mon Jul 11 09:35:19 2005 -0500
    16.3 @@ -99,6 +99,11 @@ EXPORT_SYMBOL(__get_user_1);
    16.4  EXPORT_SYMBOL(__get_user_2);
    16.5  EXPORT_SYMBOL(__get_user_4);
    16.6  
    16.7 +EXPORT_SYMBOL(__put_user_1);
    16.8 +EXPORT_SYMBOL(__put_user_2);
    16.9 +EXPORT_SYMBOL(__put_user_4);
   16.10 +EXPORT_SYMBOL(__put_user_8);
   16.11 +
   16.12  EXPORT_SYMBOL(strpbrk);
   16.13  EXPORT_SYMBOL(strstr);
   16.14  
   16.15 @@ -114,7 +119,6 @@ EXPORT_SYMBOL(dma_alloc_coherent);
   16.16  EXPORT_SYMBOL(dma_free_coherent);
   16.17  
   16.18  #ifdef CONFIG_PCI
   16.19 -EXPORT_SYMBOL(pcibios_penalize_isa_irq);
   16.20  EXPORT_SYMBOL(pci_mem_start);
   16.21  #endif
   16.22  
   16.23 @@ -146,7 +150,6 @@ EXPORT_SYMBOL(smp_call_function);
   16.24  
   16.25  /* TLB flushing */
   16.26  EXPORT_SYMBOL(flush_tlb_page);
   16.27 -EXPORT_SYMBOL_GPL(flush_tlb_all);
   16.28  #endif
   16.29  
   16.30  #ifdef CONFIG_X86_IO_APIC
   16.31 @@ -168,10 +171,6 @@ EXPORT_SYMBOL(rtc_lock);
   16.32  EXPORT_SYMBOL_GPL(set_nmi_callback);
   16.33  EXPORT_SYMBOL_GPL(unset_nmi_callback);
   16.34  
   16.35 -#undef memcmp
   16.36 -extern int memcmp(const void *,const void *,__kernel_size_t);
   16.37 -EXPORT_SYMBOL(memcmp);
   16.38 -
   16.39  EXPORT_SYMBOL(register_die_notifier);
   16.40  #ifdef CONFIG_HAVE_DEC_LOCK
   16.41  EXPORT_SYMBOL(_atomic_dec_and_lock);
    17.1 --- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/io_apic.c	Mon Jul 11 09:29:56 2005 -0500
    17.2 +++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/io_apic.c	Mon Jul 11 09:35:19 2005 -0500
    17.3 @@ -231,7 +231,7 @@ static void unmask_IO_APIC_irq (unsigned
    17.4  	spin_unlock_irqrestore(&ioapic_lock, flags);
    17.5  }
    17.6  
    17.7 -void clear_IO_APIC_pin(unsigned int apic, unsigned int pin)
    17.8 +static void clear_IO_APIC_pin(unsigned int apic, unsigned int pin)
    17.9  {
   17.10  	struct IO_APIC_route_entry entry;
   17.11  	unsigned long flags;
   17.12 @@ -310,7 +310,7 @@ cpumask_t __cacheline_aligned pending_ir
   17.13  static int irqbalance_disabled = IRQBALANCE_CHECK_ARCH;
   17.14  static int physical_balance = 0;
   17.15  
   17.16 -struct irq_cpu_info {
   17.17 +static struct irq_cpu_info {
   17.18  	unsigned long * last_irq;
   17.19  	unsigned long * irq_delta;
   17.20  	unsigned long irq;
   17.21 @@ -321,7 +321,7 @@ struct irq_cpu_info {
   17.22  #define IRQ_DELTA(cpu,irq) 	(irq_cpu_data[cpu].irq_delta[irq])
   17.23  
   17.24  #define IDLE_ENOUGH(cpu,now) \
   17.25 -		(idle_cpu(cpu) && ((now) - irq_stat[(cpu)].idle_timestamp > 1))
   17.26 +	(idle_cpu(cpu) && ((now) - per_cpu(irq_stat, (cpu)).idle_timestamp > 1))
   17.27  
   17.28  #define IRQ_ALLOWED(cpu, allowed_mask)	cpu_isset(cpu, allowed_mask)
   17.29  
   17.30 @@ -332,7 +332,7 @@ struct irq_cpu_info {
   17.31  #define BALANCED_IRQ_MORE_DELTA		(HZ/10)
   17.32  #define BALANCED_IRQ_LESS_DELTA		(HZ)
   17.33  
   17.34 -long balanced_irq_interval = MAX_BALANCED_IRQ_INTERVAL;
   17.35 +static long balanced_irq_interval = MAX_BALANCED_IRQ_INTERVAL;
   17.36  
   17.37  static unsigned long move(int curr_cpu, cpumask_t allowed_mask,
   17.38  			unsigned long now, int direction)
   17.39 @@ -733,8 +733,8 @@ void fastcall send_IPI_self(int vector)
   17.40   */
   17.41  
   17.42  #define MAX_PIRQS 8
   17.43 -int pirq_entries [MAX_PIRQS];
   17.44 -int pirqs_enabled;
   17.45 +static int pirq_entries [MAX_PIRQS];
   17.46 +static int pirqs_enabled;
   17.47  int skip_ioapic_setup;
   17.48  
   17.49  static int __init ioapic_setup(char *str)
   17.50 @@ -1231,7 +1231,7 @@ static inline void ioapic_register_intr(
   17.51  #define ioapic_register_intr(_irq,_vector,_trigger) ((void)0)
   17.52  #endif
   17.53  
   17.54 -void __init setup_IO_APIC_irqs(void)
   17.55 +static void __init setup_IO_APIC_irqs(void)
   17.56  {
   17.57  	struct IO_APIC_route_entry entry;
   17.58  	int apic, pin, idx, irq, first_notcon = 1, vector;
   17.59 @@ -1311,7 +1311,7 @@ void __init setup_IO_APIC_irqs(void)
   17.60   * Set up the 8259A-master output pin:
   17.61   */
   17.62  #ifndef CONFIG_XEN
   17.63 -void __init setup_ExtINT_IRQ0_pin(unsigned int pin, int vector)
   17.64 +static void __init setup_ExtINT_IRQ0_pin(unsigned int pin, int vector)
   17.65  {
   17.66  	struct IO_APIC_route_entry entry;
   17.67  	unsigned long flags;
   17.68 @@ -2234,7 +2234,6 @@ static inline void check_timer(void)
   17.69  				disable_8259A_irq(0);
   17.70  				setup_nmi();
   17.71  				enable_8259A_irq(0);
   17.72 -				check_nmi_watchdog();
   17.73  			}
   17.74  			return;
   17.75  		}
   17.76 @@ -2257,7 +2256,6 @@ static inline void check_timer(void)
   17.77  				add_pin_to_irq(0, 0, pin2);
   17.78  			if (nmi_watchdog == NMI_IO_APIC) {
   17.79  				setup_nmi();
   17.80 -				check_nmi_watchdog();
   17.81  			}
   17.82  			return;
   17.83  		}
   17.84 @@ -2363,7 +2361,7 @@ struct sysfs_ioapic_data {
   17.85  };
   17.86  static struct sysfs_ioapic_data * mp_ioapic_data[MAX_IO_APICS];
   17.87  
   17.88 -static int ioapic_suspend(struct sys_device *dev, u32 state)
   17.89 +static int ioapic_suspend(struct sys_device *dev, pm_message_t state)
   17.90  {
   17.91  	struct IO_APIC_route_entry *entry;
   17.92  	struct sysfs_ioapic_data *data;
    18.1 --- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/irq.c	Mon Jul 11 09:29:56 2005 -0500
    18.2 +++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/irq.c	Mon Jul 11 09:35:19 2005 -0500
    18.3 @@ -19,6 +19,9 @@
    18.4  #include <linux/cpu.h>
    18.5  #include <linux/delay.h>
    18.6  
    18.7 +DEFINE_PER_CPU(irq_cpustat_t, irq_stat) ____cacheline_maxaligned_in_smp;
    18.8 +EXPORT_PER_CPU_SYMBOL(irq_stat);
    18.9 +
   18.10  #ifndef CONFIG_X86_LOCAL_APIC
   18.11  /*
   18.12   * 'what should we do if we get a hw irq event on an illegal vector'.
   18.13 @@ -244,7 +247,7 @@ skip:
   18.14  #ifdef CONFIG_X86_LOCAL_APIC
   18.15  		seq_printf(p, "LOC: ");
   18.16  		for_each_cpu(j)
   18.17 -			seq_printf(p, "%10u ", irq_stat[j].apic_timer_irqs);
   18.18 +			seq_printf(p, "%10u ", per_cpu(irq_stat, j).apic_timer_irqs);
   18.19  		seq_putc(p, '\n');
   18.20  #endif
   18.21  		seq_printf(p, "ERR: %10u\n", atomic_read(&irq_err_count));
    19.1 --- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/mpparse.c	Mon Jul 11 09:29:56 2005 -0500
    19.2 +++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/mpparse.c	Mon Jul 11 09:35:19 2005 -0500
    19.3 @@ -49,7 +49,7 @@ int mp_bus_id_to_node [MAX_MP_BUSSES];
    19.4  int mp_bus_id_to_local [MAX_MP_BUSSES];
    19.5  int quad_local_to_mp_bus_id [NR_CPUS/4][4];
    19.6  int mp_bus_id_to_pci_bus [MAX_MP_BUSSES] = { [0 ... MAX_MP_BUSSES-1] = -1 };
    19.7 -int mp_current_pci_id;
    19.8 +static int mp_current_pci_id;
    19.9  
   19.10  /* I/O APIC entries */
   19.11  struct mpc_config_ioapic mp_ioapics[MAX_IO_APICS];
   19.12 @@ -120,7 +120,7 @@ static int MP_valid_apicid(int apicid, i
   19.13  #endif
   19.14  
   19.15  #ifndef CONFIG_XEN
   19.16 -void __init MP_processor_info (struct mpc_config_processor *m)
   19.17 +static void __init MP_processor_info (struct mpc_config_processor *m)
   19.18  {
   19.19   	int ver, apicid;
   19.20  	physid_mask_t tmp;
   19.21 @@ -871,7 +871,7 @@ void __init mp_register_lapic (
   19.22  #define MP_ISA_BUS		0
   19.23  #define MP_MAX_IOAPIC_PIN	127
   19.24  
   19.25 -struct mp_ioapic_routing {
   19.26 +static struct mp_ioapic_routing {
   19.27  	int			apic_id;
   19.28  	int			gsi_base;
   19.29  	int			gsi_end;
   19.30 @@ -989,6 +989,7 @@ void __init mp_override_legacy_irq (
   19.31  	return;
   19.32  }
   19.33  
   19.34 +int es7000_plat;
   19.35  
   19.36  void __init mp_config_acpi_legacy_irqs (void)
   19.37  {
   19.38 @@ -1003,9 +1004,9 @@ void __init mp_config_acpi_legacy_irqs (
   19.39  	Dprintk("Bus #%d is ISA\n", MP_ISA_BUS);
   19.40  
   19.41  	/*
   19.42 -	 * ES7000 has no legacy identity mappings
   19.43 +	 * Older generations of ES7000 have no legacy identity mappings
   19.44  	 */
   19.45 -	if (es7000_plat)
   19.46 +	if (es7000_plat == 1)
   19.47  		return;
   19.48  
   19.49  	/* 
    20.1 --- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/pci-dma.c	Mon Jul 11 09:29:56 2005 -0500
    20.2 +++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/pci-dma.c	Mon Jul 11 09:35:19 2005 -0500
    20.3 @@ -25,7 +25,7 @@ struct dma_coherent_mem {
    20.4  };
    20.5  
    20.6  void *dma_alloc_coherent(struct device *dev, size_t size,
    20.7 -			   dma_addr_t *dma_handle, int gfp)
    20.8 +			   dma_addr_t *dma_handle, unsigned int __nocast gfp)
    20.9  {
   20.10  	void *ret;
   20.11  	struct dma_coherent_mem *mem = dev ? dev->dma_mem : NULL;
    21.1 --- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/process.c	Mon Jul 11 09:29:56 2005 -0500
    21.2 +++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/process.c	Mon Jul 11 09:35:19 2005 -0500
    21.3 @@ -37,6 +37,7 @@
    21.4  #include <linux/module.h>
    21.5  #include <linux/kallsyms.h>
    21.6  #include <linux/ptrace.h>
    21.7 +#include <linux/random.h>
    21.8  
    21.9  #include <asm/uaccess.h>
   21.10  #include <asm/pgtable.h>
   21.11 @@ -60,7 +61,7 @@
   21.12  
   21.13  asmlinkage void ret_from_fork(void) __asm__("ret_from_fork");
   21.14  
   21.15 -int hlt_counter;
   21.16 +static int hlt_counter;
   21.17  
   21.18  unsigned long boot_option_idle_override = 0;
   21.19  EXPORT_SYMBOL(boot_option_idle_override);
   21.20 @@ -77,7 +78,7 @@ unsigned long thread_saved_pc(struct tas
   21.21   * Powermanagement idle function, if any..
   21.22   */
   21.23  void (*pm_idle)(void);
   21.24 -static cpumask_t cpu_idle_map;
   21.25 +static DEFINE_PER_CPU(unsigned int, cpu_idle_state);
   21.26  
   21.27  void disable_hlt(void)
   21.28  {
   21.29 @@ -150,8 +151,8 @@ void cpu_idle (void)
   21.30  	while (1) {
   21.31  		while (!need_resched()) {
   21.32  
   21.33 -			if (cpu_isset(cpu, cpu_idle_map))
   21.34 -				cpu_clear(cpu, cpu_idle_map);
   21.35 +			if (__get_cpu_var(cpu_idle_state))
   21.36 +				__get_cpu_var(cpu_idle_state) = 0;
   21.37  			rmb();
   21.38  
   21.39  			if (cpu_is_offline(cpu)) {
   21.40 @@ -162,7 +163,7 @@ void cpu_idle (void)
   21.41  				play_dead();
   21.42           }
   21.43  
   21.44 -			irq_stat[cpu].idle_timestamp = jiffies;
   21.45 +			__get_cpu_var(irq_stat).idle_timestamp = jiffies;
   21.46  			xen_idle();
   21.47  		}
   21.48  		schedule();
   21.49 @@ -171,16 +172,28 @@ void cpu_idle (void)
   21.50  
   21.51  void cpu_idle_wait(void)
   21.52  {
   21.53 -	int cpu;
   21.54 +	unsigned int cpu, this_cpu = get_cpu();
   21.55  	cpumask_t map;
   21.56  
   21.57 -	for_each_online_cpu(cpu)
   21.58 -		cpu_set(cpu, cpu_idle_map);
   21.59 +	set_cpus_allowed(current, cpumask_of_cpu(this_cpu));
   21.60 +	put_cpu();
   21.61 +
   21.62 +	cpus_clear(map);
   21.63 +	for_each_online_cpu(cpu) {
   21.64 +		per_cpu(cpu_idle_state, cpu) = 1;
   21.65 +		cpu_set(cpu, map);
   21.66 +	}
   21.67 +
   21.68 +	__get_cpu_var(cpu_idle_state) = 0;
   21.69  
   21.70  	wmb();
   21.71  	do {
   21.72  		ssleep(1);
   21.73 -		cpus_and(map, cpu_idle_map, cpu_online_map);
   21.74 +		for_each_online_cpu(cpu) {
   21.75 +			if (cpu_isset(cpu, map) && !per_cpu(cpu_idle_state, cpu))
   21.76 +				cpu_clear(cpu, map);
   21.77 +		}
   21.78 +		cpus_and(map, map, cpu_online_map);
   21.79  	} while (!cpus_empty(map));
   21.80  }
   21.81  EXPORT_SYMBOL_GPL(cpu_idle_wait);
   21.82 @@ -314,6 +327,17 @@ int copy_thread(int nr, unsigned long cl
   21.83  	int err;
   21.84  
   21.85  	childregs = ((struct pt_regs *) (THREAD_SIZE + (unsigned long) p->thread_info)) - 1;
   21.86 +	/*
   21.87 +	 * The below -8 is to reserve 8 bytes on top of the ring0 stack.
   21.88 +	 * This is necessary to guarantee that the entire "struct pt_regs"
   21.89 +	 * is accessable even if the CPU haven't stored the SS/ESP registers
   21.90 +	 * on the stack (interrupt gate does not save these registers
   21.91 +	 * when switching to the same priv ring).
   21.92 +	 * Therefore beware: accessing the xss/esp fields of the
   21.93 +	 * "struct pt_regs" is possible, but they may contain the
   21.94 +	 * completely wrong values.
   21.95 +	 */
   21.96 +	childregs = (struct pt_regs *) ((unsigned long) childregs - 8);
   21.97  	*childregs = *regs;
   21.98  	childregs->eax = 0;
   21.99  	childregs->esp = esp;
  21.100 @@ -434,12 +458,6 @@ int dump_task_regs(struct task_struct *t
  21.101  	return 1;
  21.102  }
  21.103  
  21.104 -/*
  21.105 - * This special macro can be used to load a debugging register
  21.106 - */
  21.107 -#define loaddebug(thread,register) \
  21.108 -		HYPERVISOR_set_debugreg((register),	\
  21.109 -			(thread->debugreg[register]))
  21.110  
  21.111  /*
  21.112   *	switch_to(x,yn) should switch tasks from x to y.
  21.113 @@ -767,3 +785,9 @@ asmlinkage int sys_get_thread_area(struc
  21.114  	return 0;
  21.115  }
  21.116  
  21.117 +unsigned long arch_align_stack(unsigned long sp)
  21.118 +{
  21.119 +	if (randomize_va_space)
  21.120 +		sp -= get_random_int() % 8192;
  21.121 +	return sp & ~0xf;
  21.122 +}
    22.1 --- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/setup.c	Mon Jul 11 09:29:56 2005 -0500
    22.2 +++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/setup.c	Mon Jul 11 09:35:19 2005 -0500
    22.3 @@ -40,6 +40,7 @@
    22.4  #include <linux/efi.h>
    22.5  #include <linux/init.h>
    22.6  #include <linux/edd.h>
    22.7 +#include <linux/nodemask.h>
    22.8  #include <linux/kernel.h>
    22.9  #include <linux/percpu.h>
   22.10  #include <linux/notifier.h>
   22.11 @@ -82,7 +83,6 @@ struct cpuinfo_x86 new_cpu_data __initda
   22.12  struct cpuinfo_x86 boot_cpu_data = { 0, 0, 0, 0, -1, 0, 1, 0, -1 };
   22.13  
   22.14  unsigned long mmu_cr4_features;
   22.15 -EXPORT_SYMBOL_GPL(mmu_cr4_features);
   22.16  
   22.17  #ifdef	CONFIG_ACPI_INTERPRETER
   22.18  	int acpi_disabled = 0;
   22.19 @@ -125,8 +125,6 @@ struct edid_info edid_info;
   22.20  struct ist_info ist_info;
   22.21  struct e820map e820;
   22.22  
   22.23 -unsigned char aux_device_present;
   22.24 -
   22.25  extern void early_cpu_init(void);
   22.26  extern void dmi_scan_machine(void);
   22.27  extern void generic_apic_probe(char *);
   22.28 @@ -457,10 +455,10 @@ struct change_member {
   22.29  	struct e820entry *pbios; /* pointer to original bios entry */
   22.30  	unsigned long long addr; /* address for this change point */
   22.31  };
   22.32 -struct change_member change_point_list[2*E820MAX] __initdata;
   22.33 -struct change_member *change_point[2*E820MAX] __initdata;
   22.34 -struct e820entry *overlap_list[E820MAX] __initdata;
   22.35 -struct e820entry new_bios[E820MAX] __initdata;
   22.36 +static struct change_member change_point_list[2*E820MAX] __initdata;
   22.37 +static struct change_member *change_point[2*E820MAX] __initdata;
   22.38 +static struct e820entry *overlap_list[E820MAX] __initdata;
   22.39 +static struct e820entry new_bios[E820MAX] __initdata;
   22.40  
   22.41  static int __init sanitize_e820_map(struct e820entry * biosmap, char * pnr_map)
   22.42  {
   22.43 @@ -1000,8 +998,6 @@ unsigned long __init find_max_low_pfn(vo
   22.44  	return max_low_pfn;
   22.45  }
   22.46  
   22.47 -#ifndef CONFIG_DISCONTIGMEM
   22.48 -
   22.49  /*
   22.50   * Free all available memory for boot time allocation.  Used
   22.51   * as a callback function by efi_memory_walk()
   22.52 @@ -1075,15 +1071,16 @@ static void __init reserve_ebda_region(v
   22.53  		reserve_bootmem(addr, PAGE_SIZE);	
   22.54  }
   22.55  
   22.56 +#ifndef CONFIG_DISCONTIGMEM
   22.57 +void __init setup_bootmem_allocator(void);
   22.58  static unsigned long __init setup_memory(void)
   22.59  {
   22.60 -	unsigned long bootmap_size, start_pfn, max_low_pfn;
   22.61  
   22.62  	/*
   22.63  	 * partially used pages are not usable - thus
   22.64  	 * we are rounding upwards:
   22.65  	 */
   22.66 -	start_pfn = PFN_UP(__pa(xen_start_info.pt_base)) + xen_start_info.nr_pt_frames;
   22.67 + 	min_low_pfn = PFN_UP(__pa(xen_start_info.pt_base)) + xen_start_info.nr_pt_frames;
   22.68  
   22.69  	find_max_pfn();
   22.70  
   22.71 @@ -1099,10 +1096,43 @@ static unsigned long __init setup_memory
   22.72  #endif
   22.73  	printk(KERN_NOTICE "%ldMB LOWMEM available.\n",
   22.74  			pages_to_mb(max_low_pfn));
   22.75 +
   22.76 +	setup_bootmem_allocator();
   22.77 +
   22.78 +	return max_low_pfn;
   22.79 +}
   22.80 +
   22.81 +void __init zone_sizes_init(void)
   22.82 +{
   22.83 +	unsigned long zones_size[MAX_NR_ZONES] = {0, 0, 0};
   22.84 +	unsigned int max_dma, low;
   22.85 +
   22.86 +	max_dma = virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT;
   22.87 +	low = max_low_pfn;
   22.88 +
   22.89 +	if (low < max_dma)
   22.90 +		zones_size[ZONE_DMA] = low;
   22.91 +	else {
   22.92 +		zones_size[ZONE_DMA] = max_dma;
   22.93 +		zones_size[ZONE_NORMAL] = low - max_dma;
   22.94 +#ifdef CONFIG_HIGHMEM
   22.95 +		zones_size[ZONE_HIGHMEM] = highend_pfn - low;
   22.96 +#endif
   22.97 +	}
   22.98 +	free_area_init(zones_size);
   22.99 +}
  22.100 +#else
  22.101 +extern unsigned long setup_memory(void);
  22.102 +extern void zone_sizes_init(void);
  22.103 +#endif /* !CONFIG_DISCONTIGMEM */
  22.104 +
  22.105 +void __init setup_bootmem_allocator(void)
  22.106 +{
  22.107 +	unsigned long bootmap_size;
  22.108  	/*
  22.109  	 * Initialize the boot-time allocator (with low memory only):
  22.110  	 */
  22.111 -	bootmap_size = init_bootmem(start_pfn, max_low_pfn);
  22.112 +	bootmap_size = init_bootmem(min_low_pfn, max_low_pfn);
  22.113  
  22.114  	register_bootmem_low_pages(max_low_pfn);
  22.115  
  22.116 @@ -1112,7 +1142,7 @@ static unsigned long __init setup_memory
  22.117  	 * the (very unlikely) case of us accidentally initializing the
  22.118  	 * bootmem allocator with an invalid RAM area.
  22.119  	 */
  22.120 -	reserve_bootmem(HIGH_MEMORY, (PFN_PHYS(start_pfn) +
  22.121 +	reserve_bootmem(HIGH_MEMORY, (PFN_PHYS(min_low_pfn) +
  22.122  			 bootmap_size + PAGE_SIZE-1) - (HIGH_MEMORY));
  22.123  
  22.124  	/* reserve EBDA region, it's a 4K region */
  22.125 @@ -1159,12 +1189,25 @@ static unsigned long __init setup_memory
  22.126  #endif
  22.127  
  22.128  	phys_to_machine_mapping = (unsigned int *)xen_start_info.mfn_list;
  22.129 -
  22.130 -	return max_low_pfn;
  22.131  }
  22.132 -#else
  22.133 -extern unsigned long setup_memory(void);
  22.134 -#endif /* !CONFIG_DISCONTIGMEM */
  22.135 +
  22.136 +/*
  22.137 + * The node 0 pgdat is initialized before all of these because
  22.138 + * it's needed for bootmem.  node>0 pgdats have their virtual
  22.139 + * space allocated before the pagetables are in place to access
  22.140 + * them, so they can't be cleared then.
  22.141 + *
  22.142 + * This should all compile down to nothing when NUMA is off.
  22.143 + */
  22.144 +void __init remapped_pgdat_init(void)
  22.145 +{
  22.146 +	int nid;
  22.147 +
  22.148 +	for_each_online_node(nid) {
  22.149 +		if (nid != 0)
  22.150 +			memset(NODE_DATA(nid), 0, sizeof(struct pglist_data));
  22.151 +	}
  22.152 +}
  22.153  
  22.154  /*
  22.155   * Request address space for all standard RAM and ROM resources
  22.156 @@ -1443,7 +1486,6 @@ void __init setup_arch(char **cmdline_p)
  22.157  		machine_submodel_id = SYS_DESC_TABLE.table[1];
  22.158  		BIOS_revision = SYS_DESC_TABLE.table[2];
  22.159  	}
  22.160 -	aux_device_present = AUX_DEVICE_INFO;
  22.161  	bootloader_type = LOADER_TYPE;
  22.162  
  22.163  #ifdef CONFIG_XEN_PHYSDEV_ACCESS
  22.164 @@ -1503,6 +1545,8 @@ void __init setup_arch(char **cmdline_p)
  22.165  	smp_alloc_memory(); /* AP processor realmode stacks in low memory*/
  22.166  #endif
  22.167  	paging_init();
  22.168 +	remapped_pgdat_init();
  22.169 +	zone_sizes_init();
  22.170  
  22.171  #ifdef CONFIG_X86_FIND_SMP_CONFIG
  22.172  	/*
  22.173 @@ -1586,11 +1630,13 @@ void __init setup_arch(char **cmdline_p)
  22.174  	}
  22.175  #endif
  22.176  
  22.177 +#ifdef CONFIG_ACPI_BOOT
  22.178  	/*
  22.179  	 * Parse the ACPI tables for possible boot-time SMP configuration.
  22.180  	 */
  22.181  	acpi_boot_table_init();
  22.182  	acpi_boot_init();
  22.183 +#endif
  22.184  
  22.185  #ifdef CONFIG_X86_LOCAL_APIC
  22.186  	if (smp_found_config)
    23.1 --- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/signal.c	Mon Jul 11 09:29:56 2005 -0500
    23.2 +++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/signal.c	Mon Jul 11 09:35:19 2005 -0500
    23.3 @@ -93,7 +93,7 @@ sys_sigaction(int sig, const struct old_
    23.4  
    23.5  	if (act) {
    23.6  		old_sigset_t mask;
    23.7 -		if (verify_area(VERIFY_READ, act, sizeof(*act)) ||
    23.8 +		if (!access_ok(VERIFY_READ, act, sizeof(*act)) ||
    23.9  		    __get_user(new_ka.sa.sa_handler, &act->sa_handler) ||
   23.10  		    __get_user(new_ka.sa.sa_restorer, &act->sa_restorer))
   23.11  			return -EFAULT;
   23.12 @@ -105,7 +105,7 @@ sys_sigaction(int sig, const struct old_
   23.13  	ret = do_sigaction(sig, act ? &new_ka : NULL, oact ? &old_ka : NULL);
   23.14  
   23.15  	if (!ret && oact) {
   23.16 -		if (verify_area(VERIFY_WRITE, oact, sizeof(*oact)) ||
   23.17 +		if (!access_ok(VERIFY_WRITE, oact, sizeof(*oact)) ||
   23.18  		    __put_user(old_ka.sa.sa_handler, &oact->sa_handler) ||
   23.19  		    __put_user(old_ka.sa.sa_restorer, &oact->sa_restorer))
   23.20  			return -EFAULT;
   23.21 @@ -187,7 +187,7 @@ restore_sigcontext(struct pt_regs *regs,
   23.22  		struct _fpstate __user * buf;
   23.23  		err |= __get_user(buf, &sc->fpstate);
   23.24  		if (buf) {
   23.25 -			if (verify_area(VERIFY_READ, buf, sizeof(*buf)))
   23.26 +			if (!access_ok(VERIFY_READ, buf, sizeof(*buf)))
   23.27  				goto badframe;
   23.28  			err |= restore_i387(buf);
   23.29  		} else {
   23.30 @@ -213,7 +213,7 @@ asmlinkage int sys_sigreturn(unsigned lo
   23.31  	sigset_t set;
   23.32  	int eax;
   23.33  
   23.34 -	if (verify_area(VERIFY_READ, frame, sizeof(*frame)))
   23.35 +	if (!access_ok(VERIFY_READ, frame, sizeof(*frame)))
   23.36  		goto badframe;
   23.37  	if (__get_user(set.sig[0], &frame->sc.oldmask)
   23.38  	    || (_NSIG_WORDS > 1
   23.39 @@ -243,7 +243,7 @@ asmlinkage int sys_rt_sigreturn(unsigned
   23.40  	sigset_t set;
   23.41  	int eax;
   23.42  
   23.43 -	if (verify_area(VERIFY_READ, frame, sizeof(*frame)))
   23.44 +	if (!access_ok(VERIFY_READ, frame, sizeof(*frame)))
   23.45  		goto badframe;
   23.46  	if (__copy_from_user(&set, &frame->uc.uc_sigmask, sizeof(set)))
   23.47  		goto badframe;
   23.48 @@ -557,6 +557,16 @@ handle_signal(unsigned long sig, siginfo
   23.49  		}
   23.50  	}
   23.51  
   23.52 +	/*
   23.53 +	 * If TF is set due to a debugger (PT_DTRACE), clear the TF flag so
   23.54 +	 * that register information in the sigcontext is correct.
   23.55 +	 */
   23.56 +	if (unlikely(regs->eflags & TF_MASK)
   23.57 +	    && likely(current->ptrace & PT_DTRACE)) {
   23.58 +		current->ptrace &= ~PT_DTRACE;
   23.59 +		regs->eflags &= ~TF_MASK;
   23.60 +	}
   23.61 +
   23.62  	/* Set up the stack frame */
   23.63  	if (ka->sa.sa_flags & SA_SIGINFO)
   23.64  		setup_rt_frame(sig, ka, info, oldset, regs);
   23.65 @@ -608,8 +618,7 @@ int fastcall do_signal(struct pt_regs *r
   23.66  		 * inside the kernel.
   23.67  		 */
   23.68  		if (unlikely(current->thread.debugreg[7])) {
   23.69 -			HYPERVISOR_set_debugreg(7,
   23.70 -						current->thread.debugreg[7]);
   23.71 +			loaddebug(&current->thread, 7);
   23.72  		}
   23.73  
   23.74  		/* Whee!  Actually deliver the signal.  */
    24.1 --- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/smp.c	Mon Jul 11 09:29:56 2005 -0500
    24.2 +++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/smp.c	Mon Jul 11 09:35:19 2005 -0500
    24.3 @@ -197,7 +197,7 @@ void send_IPI_mask_bitmask(cpumask_t mas
    24.4  	local_irq_restore(flags);
    24.5  }
    24.6  
    24.7 -inline void send_IPI_mask_sequence(cpumask_t mask, int vector)
    24.8 +void send_IPI_mask_sequence(cpumask_t mask, int vector)
    24.9  {
   24.10  
   24.11  	send_IPI_mask_bitmask(mask, vector);
    25.1 --- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/smpboot.c	Mon Jul 11 09:29:56 2005 -0500
    25.2 +++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/smpboot.c	Mon Jul 11 09:35:19 2005 -0500
    25.3 @@ -69,6 +69,8 @@ static int __initdata smp_b_stepping;
    25.4  int smp_num_siblings = 1;
    25.5  int phys_proc_id[NR_CPUS]; /* Package ID of each logical CPU */
    25.6  EXPORT_SYMBOL(phys_proc_id);
    25.7 +int cpu_core_id[NR_CPUS]; /* Core ID of each logical CPU */
    25.8 +EXPORT_SYMBOL(cpu_core_id);
    25.9  
   25.10  /* bitmap of online cpus */
   25.11  cpumask_t cpu_online_map;
   25.12 @@ -84,9 +86,6 @@ u8 x86_cpu_to_apicid[NR_CPUS] =
   25.13  			{ [0 ... NR_CPUS-1] = 0xff };
   25.14  EXPORT_SYMBOL(x86_cpu_to_apicid);
   25.15  
   25.16 -/* Set when the idlers are all forked */
   25.17 -int smp_threads_ready;
   25.18 -
   25.19  #if 0
   25.20  /*
   25.21   * Trampoline 80x86 program as an array.
   25.22 @@ -122,6 +121,8 @@ static unsigned long __init setup_trampo
   25.23  }
   25.24  #endif
   25.25  
   25.26 +static void map_cpu_to_logical_apicid(void);
   25.27 +
   25.28  /*
   25.29   * We are called very early to get the low memory for the
   25.30   * SMP bootup trampoline page.
   25.31 @@ -352,7 +353,7 @@ extern void calibrate_delay(void);
   25.32  
   25.33  static atomic_t init_deasserted;
   25.34  
   25.35 -void __init smp_callin(void)
   25.36 +static void __init smp_callin(void)
   25.37  {
   25.38  	int cpuid, phys_id;
   25.39  	unsigned long timeout;
   25.40 @@ -449,7 +450,7 @@ void __init smp_callin(void)
   25.41  #endif
   25.42  }
   25.43  
   25.44 -int cpucount;
   25.45 +static int cpucount;
   25.46  
   25.47  
   25.48  static irqreturn_t ldebug_interrupt(
   25.49 @@ -567,7 +568,7 @@ static inline void unmap_cpu_to_node(int
   25.50  
   25.51  u8 cpu_2_logical_apicid[NR_CPUS] = { [0 ... NR_CPUS-1] = BAD_APICID };
   25.52  
   25.53 -void map_cpu_to_logical_apicid(void)
   25.54 +static void map_cpu_to_logical_apicid(void)
   25.55  {
   25.56  	int cpu = smp_processor_id();
   25.57  	int apicid = smp_processor_id();
   25.58 @@ -576,7 +577,7 @@ void map_cpu_to_logical_apicid(void)
   25.59  	map_cpu_to_node(cpu, apicid_to_node(apicid));
   25.60  }
   25.61  
   25.62 -void unmap_cpu_to_logical_apicid(int cpu)
   25.63 +static void unmap_cpu_to_logical_apicid(int cpu)
   25.64  {
   25.65  	cpu_2_logical_apicid[cpu] = BAD_APICID;
   25.66  	unmap_cpu_to_node(cpu);
   25.67 @@ -861,6 +862,9 @@ static int __init do_boot_cpu(int apicid
   25.68  	if (cpu_gdt_descr[0].size > PAGE_SIZE)
   25.69  		BUG();
   25.70  	cpu_gdt_descr[cpu].size = cpu_gdt_descr[0].size;
   25.71 +	printk("GDT: copying %d bytes from %lx to %lx\n",
   25.72 +		cpu_gdt_descr[0].size, cpu_gdt_descr[0].address,
   25.73 +		cpu_gdt_descr[cpu].address); 
   25.74  	memcpy((void *)cpu_gdt_descr[cpu].address,
   25.75  	       (void *)cpu_gdt_descr[0].address, cpu_gdt_descr[0].size);
   25.76  
   25.77 @@ -916,6 +920,7 @@ static int __init do_boot_cpu(int apicid
   25.78  	ctxt.ctrlreg[3] = (unsigned long)virt_to_machine(swapper_pg_dir);
   25.79  
   25.80  	boot_error = HYPERVISOR_boot_vcpu(cpu, &ctxt);
   25.81 +	printk("boot error: %ld\n", boot_error);
   25.82  
   25.83  	if (!boot_error) {
   25.84  		/*
   25.85 @@ -1016,9 +1021,6 @@ static int __init do_boot_cpu(int apicid
   25.86  	return boot_error;
   25.87  }
   25.88  
   25.89 -cycles_t cacheflush_time;
   25.90 -unsigned long cache_decay_ticks;
   25.91 -
   25.92  static void smp_tune_scheduling (void)
   25.93  {
   25.94  	unsigned long cachesize;       /* kB   */
   25.95 @@ -1039,7 +1041,6 @@ static void smp_tune_scheduling (void)
   25.96  		 * this basically disables processor-affinity
   25.97  		 * scheduling on SMP without a TSC.
   25.98  		 */
   25.99 -		cacheflush_time = 0;
  25.100  		return;
  25.101  	} else {
  25.102  		cachesize = boot_cpu_data.x86_cache_size;
  25.103 @@ -1047,17 +1048,7 @@ static void smp_tune_scheduling (void)
  25.104  			cachesize = 16; /* Pentiums, 2x8kB cache */
  25.105  			bandwidth = 100;
  25.106  		}
  25.107 -
  25.108 -		cacheflush_time = (cpu_khz>>10) * (cachesize<<10) / bandwidth;
  25.109  	}
  25.110 -
  25.111 -	cache_decay_ticks = (long)cacheflush_time/cpu_khz + 1;
  25.112 -
  25.113 -	printk("per-CPU timeslice cutoff: %ld.%02ld usecs.\n",
  25.114 -		(long)cacheflush_time/(cpu_khz/1000),
  25.115 -		((long)cacheflush_time*100/(cpu_khz/1000)) % 100);
  25.116 -	printk("task migration cache decay timeout: %ld msecs.\n",
  25.117 -		cache_decay_ticks);
  25.118  }
  25.119  
  25.120  /*
  25.121 @@ -1071,6 +1062,8 @@ static int boot_cpu_logical_apicid;
  25.122  void *xquad_portio;
  25.123  
  25.124  cpumask_t cpu_sibling_map[NR_CPUS] __cacheline_aligned;
  25.125 +cpumask_t cpu_core_map[NR_CPUS] __cacheline_aligned;
  25.126 +EXPORT_SYMBOL(cpu_core_map);
  25.127  
  25.128  static void __init smp_boot_cpus(unsigned int max_cpus)
  25.129  {
  25.130 @@ -1102,6 +1095,9 @@ static void __init smp_boot_cpus(unsigne
  25.131  	cpus_clear(cpu_sibling_map[0]);
  25.132  	cpu_set(0, cpu_sibling_map[0]);
  25.133  
  25.134 +	cpus_clear(cpu_core_map[0]);
  25.135 +	cpu_set(0, cpu_core_map[0]);
  25.136 +
  25.137  #ifdef CONFIG_X86_IO_APIC
  25.138  	/*
  25.139  	 * If we couldn't find an SMP configuration at boot time,
  25.140 @@ -1119,6 +1115,8 @@ static void __init smp_boot_cpus(unsigne
  25.141  					   " Using dummy APIC emulation.\n");
  25.142  #endif
  25.143  		map_cpu_to_logical_apicid();
  25.144 +		cpu_set(0, cpu_sibling_map[0]);
  25.145 +		cpu_set(0, cpu_core_map[0]);
  25.146  		return;
  25.147  	}
  25.148  #endif
  25.149 @@ -1144,6 +1142,10 @@ static void __init smp_boot_cpus(unsigne
  25.150  		printk(KERN_ERR "... forcing use of dummy APIC emulation. (tell your hw vendor)\n");
  25.151  		smpboot_clear_io_apic_irqs();
  25.152  		phys_cpu_present_map = physid_mask_of_physid(0);
  25.153 +		cpu_set(0, cpu_sibling_map[0]);
  25.154 +		cpu_set(0, cpu_core_map[0]);
  25.155 +		cpu_set(0, cpu_sibling_map[0]);
  25.156 +		cpu_set(0, cpu_core_map[0]);
  25.157  		return;
  25.158  	}
  25.159  
  25.160 @@ -1246,10 +1248,13 @@ static void __init smp_boot_cpus(unsigne
  25.161  	 * construct cpu_sibling_map[], so that we can tell sibling CPUs
  25.162  	 * efficiently.
  25.163  	 */
  25.164 -	for (cpu = 0; cpu < NR_CPUS; cpu++)
  25.165 +	for (cpu = 0; cpu < NR_CPUS; cpu++) {
  25.166  		cpus_clear(cpu_sibling_map[cpu]);
  25.167 +		cpus_clear(cpu_core_map[cpu]);
  25.168 +	}
  25.169  
  25.170  	for (cpu = 0; cpu < NR_CPUS; cpu++) {
  25.171 +		struct cpuinfo_x86 *c = cpu_data + cpu;
  25.172  		int siblings = 0;
  25.173  		int i;
  25.174  		if (!cpu_isset(cpu, cpu_callout_map))
  25.175 @@ -1259,7 +1264,7 @@ static void __init smp_boot_cpus(unsigne
  25.176  			for (i = 0; i < NR_CPUS; i++) {
  25.177  				if (!cpu_isset(i, cpu_callout_map))
  25.178  					continue;
  25.179 -				if (phys_proc_id[cpu] == phys_proc_id[i]) {
  25.180 +				if (cpu_core_id[cpu] == cpu_core_id[i]) {
  25.181  					siblings++;
  25.182  					cpu_set(i, cpu_sibling_map[cpu]);
  25.183  				}
  25.184 @@ -1269,15 +1274,23 @@ static void __init smp_boot_cpus(unsigne
  25.185  			cpu_set(cpu, cpu_sibling_map[cpu]);
  25.186  		}
  25.187  
  25.188 -		if (siblings != smp_num_siblings)
  25.189 +		if (siblings != smp_num_siblings) {
  25.190  			printk(KERN_WARNING "WARNING: %d siblings found for CPU%d, should be %d\n", siblings, cpu, smp_num_siblings);
  25.191 +			smp_num_siblings = siblings;
  25.192 +		}
  25.193 +		if (c->x86_num_cores > 1) {
  25.194 +			for (i = 0; i < NR_CPUS; i++) {
  25.195 +				if (!cpu_isset(i, cpu_callout_map))
  25.196 +					continue;
  25.197 +				if (phys_proc_id[cpu] == phys_proc_id[i]) {
  25.198 +					cpu_set(i, cpu_core_map[cpu]);
  25.199 +				}
  25.200 +			}
  25.201 +		} else {
  25.202 +			cpu_core_map[cpu] = cpu_sibling_map[cpu];
  25.203 +		}
  25.204  	}
  25.205  
  25.206 -#if 0
  25.207 -	if (nmi_watchdog == NMI_LOCAL_APIC)
  25.208 -		check_nmi_watchdog();
  25.209 -#endif
  25.210 -
  25.211  	smpboot_setup_io_apic();
  25.212  
  25.213  #if 0
    26.1 --- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/time.c	Mon Jul 11 09:29:56 2005 -0500
    26.2 +++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/time.c	Mon Jul 11 09:35:19 2005 -0500
    26.3 @@ -190,6 +190,35 @@ static void __get_time_values_from_xen(v
    26.4   ({ rmb(); (shadow_time_version == HYPERVISOR_shared_info->time_version2); })
    26.5  
    26.6  /*
    26.7 + * This is a special lock that is owned by the CPU and holds the index
    26.8 + * register we are working with.  It is required for NMI access to the
    26.9 + * CMOS/RTC registers.  See include/asm-i386/mc146818rtc.h for details.
   26.10 + */
   26.11 +volatile unsigned long cmos_lock = 0;
   26.12 +EXPORT_SYMBOL(cmos_lock);
   26.13 +
   26.14 +/* Routines for accessing the CMOS RAM/RTC. */
   26.15 +unsigned char rtc_cmos_read(unsigned char addr)
   26.16 +{
   26.17 +	unsigned char val;
   26.18 +	lock_cmos_prefix(addr);
   26.19 +	outb_p(addr, RTC_PORT(0));
   26.20 +	val = inb_p(RTC_PORT(1));
   26.21 +	lock_cmos_suffix(addr);
   26.22 +	return val;
   26.23 +}
   26.24 +EXPORT_SYMBOL(rtc_cmos_read);
   26.25 +
   26.26 +void rtc_cmos_write(unsigned char val, unsigned char addr)
   26.27 +{
   26.28 +	lock_cmos_prefix(addr);
   26.29 +	outb_p(addr, RTC_PORT(0));
   26.30 +	outb_p(val, RTC_PORT(1));
   26.31 +	lock_cmos_suffix(addr);
   26.32 +}
   26.33 +EXPORT_SYMBOL(rtc_cmos_write);
   26.34 +
   26.35 +/*
   26.36   * This version of gettimeofday has microsecond resolution
   26.37   * and better than microsecond precision on fast x86 machines with TSC.
   26.38   */
   26.39 @@ -349,16 +378,23 @@ static int set_rtc_mmss(unsigned long no
   26.40  {
   26.41  	int retval;
   26.42  
   26.43 +	WARN_ON(irqs_disabled());
   26.44 +
   26.45  	/* gets recalled with irq locally disabled */
   26.46 -	spin_lock(&rtc_lock);
   26.47 +	spin_lock_irq(&rtc_lock);
   26.48  	if (efi_enabled)
   26.49  		retval = efi_set_rtc_mmss(nowtime);
   26.50  	else
   26.51  		retval = mach_set_rtc_mmss(nowtime);
   26.52 -	spin_unlock(&rtc_lock);
   26.53 +	spin_unlock_irq(&rtc_lock);
   26.54  
   26.55  	return retval;
   26.56  }
   26.57 +#else
   26.58 +static int set_rtc_mmss(unsigned long nowtime)
   26.59 +{
   26.60 +	return 0;
   26.61 +}
   26.62  #endif
   26.63  
   26.64  /* monotonic_clock(): returns # of nanoseconds passed since time_init()
   26.65 @@ -503,29 +539,6 @@ static inline void do_timer_interrupt(in
   26.66  
   26.67  		last_update_to_xen = xtime.tv_sec;
   26.68  	}
   26.69 -
   26.70 -	/*
   26.71 -	 * If we have an externally synchronized Linux clock, then update
   26.72 -	 * CMOS clock accordingly every ~11 minutes. Set_rtc_mmss() has to be
   26.73 -	 * called as close as possible to 500 ms before the new second starts.
   26.74 -	 */
   26.75 -	if ((time_status & STA_UNSYNC) == 0 &&
   26.76 -	    xtime.tv_sec > last_rtc_update + 660 &&
   26.77 -	    (xtime.tv_nsec / 1000)
   26.78 -			>= USEC_AFTER - ((unsigned) TICK_SIZE) / 2 &&
   26.79 -	    (xtime.tv_nsec / 1000)
   26.80 -			<= USEC_BEFORE + ((unsigned) TICK_SIZE) / 2) {
   26.81 -		/* horrible...FIXME */
   26.82 -		if (efi_enabled) {
   26.83 -	 		if (efi_set_rtc_mmss(xtime.tv_sec) == 0)
   26.84 -				last_rtc_update = xtime.tv_sec;
   26.85 -			else
   26.86 -				last_rtc_update = xtime.tv_sec - 600;
   26.87 -		} else if (set_rtc_mmss(xtime.tv_sec) == 0)
   26.88 -			last_rtc_update = xtime.tv_sec;
   26.89 -		else
   26.90 -			last_rtc_update = xtime.tv_sec - 600; /* do it again in 60 s */
   26.91 -	}
   26.92  #endif
   26.93  }
   26.94  
   26.95 @@ -565,10 +578,59 @@ unsigned long get_cmos_time(void)
   26.96  
   26.97  	return retval;
   26.98  }
   26.99 +static void sync_cmos_clock(unsigned long dummy);
  26.100 +
  26.101 +static struct timer_list sync_cmos_timer =
  26.102 +                                      TIMER_INITIALIZER(sync_cmos_clock, 0, 0);
  26.103 +
  26.104 +static void sync_cmos_clock(unsigned long dummy)
  26.105 +{
  26.106 +	struct timeval now, next;
  26.107 +	int fail = 1;
  26.108 +
  26.109 +	/*
  26.110 +	 * If we have an externally synchronized Linux clock, then update
  26.111 +	 * CMOS clock accordingly every ~11 minutes. Set_rtc_mmss() has to be
  26.112 +	 * called as close as possible to 500 ms before the new second starts.
  26.113 +	 * This code is run on a timer.  If the clock is set, that timer
  26.114 +	 * may not expire at the correct time.  Thus, we adjust...
  26.115 +	 */
  26.116 +	if ((time_status & STA_UNSYNC) != 0)
  26.117 +		/*
  26.118 +		 * Not synced, exit, do not restart a timer (if one is
  26.119 +		 * running, let it run out).
  26.120 +		 */
  26.121 +		return;
  26.122 +
  26.123 +	do_gettimeofday(&now);
  26.124 +	if (now.tv_usec >= USEC_AFTER - ((unsigned) TICK_SIZE) / 2 &&
  26.125 +	    now.tv_usec <= USEC_BEFORE + ((unsigned) TICK_SIZE) / 2)
  26.126 +		fail = set_rtc_mmss(now.tv_sec);
  26.127 +
  26.128 +	next.tv_usec = USEC_AFTER - now.tv_usec;
  26.129 +	if (next.tv_usec <= 0)
  26.130 +		next.tv_usec += USEC_PER_SEC;
  26.131 +
  26.132 +	if (!fail)
  26.133 +		next.tv_sec = 659;
  26.134 +	else
  26.135 +		next.tv_sec = 0;
  26.136 +
  26.137 +	if (next.tv_usec >= USEC_PER_SEC) {
  26.138 +		next.tv_sec++;
  26.139 +		next.tv_usec -= USEC_PER_SEC;
  26.140 +	}
  26.141 +	mod_timer(&sync_cmos_timer, jiffies + timeval_to_jiffies(&next));
  26.142 +}
  26.143 +
  26.144 +void notify_arch_cmos_timer(void)
  26.145 +{
  26.146 +	mod_timer(&sync_cmos_timer, jiffies + 1);
  26.147 +}
  26.148  
  26.149  static long clock_cmos_diff, sleep_start;
  26.150  
  26.151 -static int timer_suspend(struct sys_device *dev, u32 state)
  26.152 +static int timer_suspend(struct sys_device *dev, pm_message_t state)
  26.153  {
  26.154  	/*
  26.155  	 * Estimate time zone so that set_time can update the clock
  26.156 @@ -626,14 +688,14 @@ device_initcall(time_init_device);
  26.157  #ifdef CONFIG_HPET_TIMER
  26.158  extern void (*late_time_init)(void);
  26.159  /* Duplicate of time_init() below, with hpet_enable part added */
  26.160 -void __init hpet_time_init(void)
  26.161 +static void __init hpet_time_init(void)
  26.162  {
  26.163  	xtime.tv_sec = get_cmos_time();
  26.164  	xtime.tv_nsec = (INITIAL_JIFFIES % HZ) * (NSEC_PER_SEC / HZ);
  26.165  	set_normalized_timespec(&wall_to_monotonic,
  26.166  		-xtime.tv_sec, -xtime.tv_nsec);
  26.167  
  26.168 -	if (hpet_enable() >= 0) {
  26.169 +	if ((hpet_enable() >= 0) && hpet_use_timer) {
  26.170  		printk("Using HPET for base-timer\n");
  26.171  	}
  26.172  
    27.1 --- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/traps.c	Mon Jul 11 09:29:56 2005 -0500
    27.2 +++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/traps.c	Mon Jul 11 09:35:19 2005 -0500
    27.3 @@ -342,8 +342,7 @@ void die(const char * str, struct pt_reg
    27.4  
    27.5  	if (panic_on_oops) {
    27.6  		printk(KERN_EMERG "Fatal exception: panic in 5 seconds\n");
    27.7 -		set_current_state(TASK_UNINTERRUPTIBLE);
    27.8 -		schedule_timeout(5 * HZ);
    27.9 +		ssleep(5);
   27.10  		panic("Fatal exception");
   27.11  	}
   27.12  	do_exit(SIGSEGV);
   27.13 @@ -450,6 +449,7 @@ DO_ERROR(10, SIGSEGV, "invalid TSS", inv
   27.14  DO_ERROR(11, SIGBUS,  "segment not present", segment_not_present)
   27.15  DO_ERROR(12, SIGBUS,  "stack segment", stack_segment)
   27.16  DO_ERROR_INFO(17, SIGBUS, "alignment check", alignment_check, BUS_ADRALN, 0)
   27.17 +DO_ERROR_INFO(32, SIGSEGV, "iret exception", iret_error, ILL_BADSTK, 0)
   27.18  #ifdef CONFIG_X86_MCE
   27.19  DO_ERROR(18, SIGBUS, "machine check", machine_check)
   27.20  #endif
   27.21 @@ -636,16 +636,15 @@ void unset_nmi_callback(void)
   27.22  }
   27.23  
   27.24  #ifdef CONFIG_KPROBES
   27.25 -fastcall int do_int3(struct pt_regs *regs, long error_code)
   27.26 +fastcall void do_int3(struct pt_regs *regs, long error_code)
   27.27  {
   27.28  	if (notify_die(DIE_INT3, "int3", regs, error_code, 3, SIGTRAP)
   27.29  			== NOTIFY_STOP)
   27.30 -		return 1;
   27.31 +		return;
   27.32  	/* This is an interrupt gate, because kprobes wants interrupts
   27.33  	disabled.  Normal trap handlers don't. */
   27.34  	restore_interrupts(regs);
   27.35  	do_trap(3, SIGTRAP, "int3", 1, regs, error_code, NULL);
   27.36 -	return 0;
   27.37  }
   27.38  #endif
   27.39  
   27.40 @@ -702,8 +701,6 @@ fastcall void do_debug(struct pt_regs * 
   27.41  	/*
   27.42  	 * Single-stepping through TF: make sure we ignore any events in
   27.43  	 * kernel space (but re-enable TF when returning to user mode).
   27.44 -	 * And if the event was due to a debugger (PT_DTRACE), clear the
   27.45 -	 * TF flag so that register information is correct.
   27.46  	 */
   27.47  	if (condition & DR_STEP) {
   27.48  		/*
   27.49 @@ -713,11 +710,6 @@ fastcall void do_debug(struct pt_regs * 
   27.50  		 */
   27.51  		if ((regs->xcs & 2) == 0)
   27.52  			goto clear_TF_reenable;
   27.53 -
   27.54 -		if (likely(tsk->ptrace & PT_DTRACE)) {
   27.55 -			tsk->ptrace &= ~PT_DTRACE;
   27.56 -			regs->eflags &= ~TF_MASK;
   27.57 -		}
   27.58  	}
   27.59  
   27.60  	/* Ok, finally something we can handle */
   27.61 @@ -807,7 +799,7 @@ fastcall void do_coprocessor_error(struc
   27.62  	math_error((void __user *)regs->eip);
   27.63  }
   27.64  
   27.65 -void simd_math_error(void __user *eip)
   27.66 +static void simd_math_error(void __user *eip)
   27.67  {
   27.68  	struct task_struct * task;
   27.69  	siginfo_t info;
   27.70 @@ -879,6 +871,51 @@ fastcall void do_simd_coprocessor_error(
   27.71  	}
   27.72  }
   27.73  
   27.74 +fastcall void setup_x86_bogus_stack(unsigned char * stk)
   27.75 +{
   27.76 +	unsigned long *switch16_ptr, *switch32_ptr;
   27.77 +	struct pt_regs *regs;
   27.78 +	unsigned long stack_top, stack_bot;
   27.79 +	unsigned short iret_frame16_off;
   27.80 +	int cpu = smp_processor_id();
   27.81 +	/* reserve the space on 32bit stack for the magic switch16 pointer */
   27.82 +	memmove(stk, stk + 8, sizeof(struct pt_regs));
   27.83 +	switch16_ptr = (unsigned long *)(stk + sizeof(struct pt_regs));
   27.84 +	regs = (struct pt_regs *)stk;
   27.85 +	/* now the switch32 on 16bit stack */
   27.86 +	stack_bot = (unsigned long)&per_cpu(cpu_16bit_stack, cpu);
   27.87 +	stack_top = stack_bot +	CPU_16BIT_STACK_SIZE;
   27.88 +	switch32_ptr = (unsigned long *)(stack_top - 8);
   27.89 +	iret_frame16_off = CPU_16BIT_STACK_SIZE - 8 - 20;
   27.90 +	/* copy iret frame on 16bit stack */
   27.91 +	memcpy((void *)(stack_bot + iret_frame16_off), &regs->eip, 20);
   27.92 +	/* fill in the switch pointers */
   27.93 +	switch16_ptr[0] = (regs->esp & 0xffff0000) | iret_frame16_off;
   27.94 +	switch16_ptr[1] = __ESPFIX_SS;
   27.95 +	switch32_ptr[0] = (unsigned long)stk + sizeof(struct pt_regs) +
   27.96 +		8 - CPU_16BIT_STACK_SIZE;
   27.97 +	switch32_ptr[1] = __KERNEL_DS;
   27.98 +}
   27.99 +
  27.100 +fastcall unsigned char * fixup_x86_bogus_stack(unsigned short sp)
  27.101 +{
  27.102 +	unsigned long *switch32_ptr;
  27.103 +	unsigned char *stack16, *stack32;
  27.104 +	unsigned long stack_top, stack_bot;
  27.105 +	int len;
  27.106 +	int cpu = smp_processor_id();
  27.107 +	stack_bot = (unsigned long)&per_cpu(cpu_16bit_stack, cpu);
  27.108 +	stack_top = stack_bot +	CPU_16BIT_STACK_SIZE;
  27.109 +	switch32_ptr = (unsigned long *)(stack_top - 8);
  27.110 +	/* copy the data from 16bit stack to 32bit stack */
  27.111 +	len = CPU_16BIT_STACK_SIZE - 8 - sp;
  27.112 +	stack16 = (unsigned char *)(stack_bot + sp);
  27.113 +	stack32 = (unsigned char *)
  27.114 +		(switch32_ptr[0] + CPU_16BIT_STACK_SIZE - 8 - len);
  27.115 +	memcpy(stack32, stack16, len);
  27.116 +	return stack32;
  27.117 +}
  27.118 +
  27.119  /*
  27.120   *  'math_state_restore()' saves the current math information in the
  27.121   * old math state array, and gets the new ones from the current task
  27.122 @@ -980,3 +1017,10 @@ void smp_trap_init(trap_info_t *trap_ctx
  27.123  		trap_ctxt[t->vector].address = t->address;
  27.124  	}
  27.125  }
  27.126 +
  27.127 +static int __init kstack_setup(char *s)
  27.128 +{
  27.129 +	kstack_depth_to_print = simple_strtoul(s, NULL, 0);
  27.130 +	return 0;
  27.131 +}
  27.132 +__setup("kstack=", kstack_setup);
    28.1 --- a/linux-2.6-xen-sparse/arch/xen/i386/mm/highmem.c	Mon Jul 11 09:29:56 2005 -0500
    28.2 +++ b/linux-2.6-xen-sparse/arch/xen/i386/mm/highmem.c	Mon Jul 11 09:35:19 2005 -0500
    28.3 @@ -77,7 +77,7 @@ void kunmap_atomic(void *kvaddr, enum km
    28.4  	 * force other mappings to Oops if they'll try to access
    28.5  	 * this pte without first remap it
    28.6  	 */
    28.7 -	pte_clear(kmap_pte-idx);
    28.8 +	pte_clear(&init_mm, vaddr, kmap_pte-idx);
    28.9  	__flush_tlb_one(vaddr);
   28.10  #endif
   28.11  
    29.1 --- a/linux-2.6-xen-sparse/arch/xen/i386/mm/init.c	Mon Jul 11 09:29:56 2005 -0500
    29.2 +++ b/linux-2.6-xen-sparse/arch/xen/i386/mm/init.c	Mon Jul 11 09:35:19 2005 -0500
    29.3 @@ -248,13 +248,10 @@ static inline int page_is_ram(unsigned l
    29.4  pte_t *kmap_pte;
    29.5  pgprot_t kmap_prot;
    29.6  
    29.7 -EXPORT_SYMBOL(kmap_prot);
    29.8 -EXPORT_SYMBOL(kmap_pte);
    29.9 -
   29.10  #define kmap_get_fixmap_pte(vaddr)					\
   29.11  	pte_offset_kernel(pmd_offset(pud_offset(pgd_offset_k(vaddr), vaddr), (vaddr)), (vaddr))
   29.12  
   29.13 -void __init kmap_init(void)
   29.14 +static void __init kmap_init(void)
   29.15  {
   29.16  	unsigned long kmap_vstart;
   29.17  
   29.18 @@ -265,7 +262,7 @@ void __init kmap_init(void)
   29.19  	kmap_prot = PAGE_KERNEL;
   29.20  }
   29.21  
   29.22 -void __init permanent_kmaps_init(pgd_t *pgd_base)
   29.23 +static void __init permanent_kmaps_init(pgd_t *pgd_base)
   29.24  {
   29.25  	pgd_t *pgd;
   29.26  	pud_t *pud;
   29.27 @@ -297,7 +294,7 @@ void __init one_highpage_init(struct pag
   29.28  }
   29.29  
   29.30  #ifndef CONFIG_DISCONTIGMEM
   29.31 -void __init set_highmem_pages_init(int bad_ppro) 
   29.32 +static void __init set_highmem_pages_init(int bad_ppro)
   29.33  {
   29.34  	int pfn;
   29.35  	for (pfn = highstart_pfn; pfn < highend_pfn; pfn++)
   29.36 @@ -426,38 +423,6 @@ void zap_low_mappings (void)
   29.37  	flush_tlb_all();
   29.38  }
   29.39  
   29.40 -#ifndef CONFIG_DISCONTIGMEM
   29.41 -void __init zone_sizes_init(void)
   29.42 -{
   29.43 -	unsigned long zones_size[MAX_NR_ZONES] = {0, 0, 0};
   29.44 -	unsigned int /*max_dma,*/ high, low;
   29.45 -	
   29.46 -	/*
   29.47 -	 * XEN: Our notion of "DMA memory" is fake when running over Xen.
   29.48 -	 * We simply put all RAM in the DMA zone so that those drivers which
   29.49 -	 * needlessly specify GFP_DMA do not get starved of RAM unnecessarily.
   29.50 -	 * Those drivers that *do* require lowmem are screwed anyway when
   29.51 -	 * running over Xen!
   29.52 -	 */
   29.53 -	/*max_dma = virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT;*/
   29.54 -	low = max_low_pfn;
   29.55 -	high = highend_pfn;
   29.56 -	
   29.57 -	/*if (low < max_dma)*/
   29.58 -		zones_size[ZONE_DMA] = low;
   29.59 -	/*else*/ {
   29.60 -		/*zones_size[ZONE_DMA] = max_dma;*/
   29.61 -		/*zones_size[ZONE_NORMAL] = low - max_dma;*/
   29.62 -#ifdef CONFIG_HIGHMEM
   29.63 -		zones_size[ZONE_HIGHMEM] = high - low;
   29.64 -#endif
   29.65 -	}
   29.66 -	free_area_init(zones_size);	
   29.67 -}
   29.68 -#else
   29.69 -extern void zone_sizes_init(void);
   29.70 -#endif /* !CONFIG_DISCONTIGMEM */
   29.71 -
   29.72  static int disable_nx __initdata = 0;
   29.73  u64 __supported_pte_mask = ~_PAGE_NX;
   29.74  
   29.75 @@ -560,7 +525,6 @@ void __init paging_init(void)
   29.76  	__flush_tlb_all();
   29.77  
   29.78  	kmap_init();
   29.79 -	zone_sizes_init();
   29.80  
   29.81  	/* Switch to the real shared_info page, and clear the dummy page. */
   29.82  	set_fixmap(FIX_SHARED_INFO, xen_start_info.shared_info);
   29.83 @@ -586,7 +550,7 @@ void __init paging_init(void)
   29.84   * but fortunately the switch to using exceptions got rid of all that.
   29.85   */
   29.86  
   29.87 -void __init test_wp_bit(void)
   29.88 +static void __init test_wp_bit(void)
   29.89  {
   29.90  	printk("Checking if this processor honours the WP bit even in supervisor mode... ");
   29.91  
   29.92 @@ -605,20 +569,17 @@ void __init test_wp_bit(void)
   29.93  	}
   29.94  }
   29.95  
   29.96 -#ifndef CONFIG_DISCONTIGMEM
   29.97  static void __init set_max_mapnr_init(void)
   29.98  {
   29.99  #ifdef CONFIG_HIGHMEM
  29.100 -	max_mapnr = num_physpages = highend_pfn;
  29.101 +	num_physpages = highend_pfn;
  29.102  #else
  29.103 -	max_mapnr = num_physpages = max_low_pfn;
  29.104 +	num_physpages = max_low_pfn;
  29.105 +#endif
  29.106 +#ifndef CONFIG_DISCONTIGMEM
  29.107 +	max_mapnr = num_physpages;
  29.108  #endif
  29.109  }
  29.110 -#define __free_all_bootmem() free_all_bootmem()
  29.111 -#else
  29.112 -#define __free_all_bootmem() free_all_bootmem_node(NODE_DATA(0))
  29.113 -extern void set_max_mapnr_init(void);
  29.114 -#endif /* !CONFIG_DISCONTIGMEM */
  29.115  
  29.116  static struct kcore_list kcore_mem, kcore_vmalloc; 
  29.117  
  29.118 @@ -650,16 +611,16 @@ void __init mem_init(void)
  29.119  	set_max_mapnr_init();
  29.120  
  29.121  #ifdef CONFIG_HIGHMEM
  29.122 -	high_memory = (void *) __va(highstart_pfn * PAGE_SIZE);
  29.123 +	high_memory = (void *) __va(highstart_pfn * PAGE_SIZE - 1) + 1;
  29.124  #else
  29.125 -	high_memory = (void *) __va(max_low_pfn * PAGE_SIZE);
  29.126 +	high_memory = (void *) __va(max_low_pfn * PAGE_SIZE - 1) + 1;
  29.127  #endif
  29.128  	printk("vmalloc area: %lx-%lx, maxmem %lx\n",
  29.129  	       VMALLOC_START,VMALLOC_END,MAXMEM);
  29.130  	BUG_ON(VMALLOC_START > VMALLOC_END);
  29.131  	
  29.132  	/* this will put all low memory onto the freelists */
  29.133 -	totalram_pages += __free_all_bootmem();
  29.134 +	totalram_pages += free_all_bootmem();
  29.135  	/* XEN: init and count low-mem pages outside initial allocation. */
  29.136  	for (pfn = xen_start_info.nr_pages; pfn < max_low_pfn; pfn++) {
  29.137  		ClearPageReserved(&mem_map[pfn]);
    30.1 --- a/linux-2.6-xen-sparse/arch/xen/i386/mm/pgtable.c	Mon Jul 11 09:29:56 2005 -0500
    30.2 +++ b/linux-2.6-xen-sparse/arch/xen/i386/mm/pgtable.c	Mon Jul 11 09:35:19 2005 -0500
    30.3 @@ -342,7 +342,7 @@ void pgd_free(pgd_t *pgd)
    30.4  	if (PTRS_PER_PMD > 1)
    30.5  		for (i = 0; i < USER_PTRS_PER_PGD; ++i)
    30.6  			kmem_cache_free(pmd_cache, (void *)__va(pgd_val(pgd[i])-1));
    30.7 -	/* in the non-PAE case, clear_page_range() clears user pgd entries */
    30.8 +	/* in the non-PAE case, free_pgtables() clears user pgd entries */
    30.9  	kmem_cache_free(pgd_cache, pgd);
   30.10  }
   30.11  
    31.1 --- a/linux-2.6-xen-sparse/arch/xen/i386/pci/irq.c	Mon Jul 11 09:29:56 2005 -0500
    31.2 +++ b/linux-2.6-xen-sparse/arch/xen/i386/pci/irq.c	Mon Jul 11 09:35:19 2005 -0500
    31.3 @@ -500,6 +500,9 @@ static __init int intel_router_probe(str
    31.4  		case PCI_DEVICE_ID_INTEL_ICH6_1:
    31.5  		case PCI_DEVICE_ID_INTEL_ICH7_0:
    31.6  		case PCI_DEVICE_ID_INTEL_ICH7_1:
    31.7 +		case PCI_DEVICE_ID_INTEL_ICH7_30:
    31.8 +		case PCI_DEVICE_ID_INTEL_ICH7_31:
    31.9 +		case PCI_DEVICE_ID_INTEL_ESB2_0:
   31.10  			r->name = "PIIX/ICH";
   31.11  			r->get = pirq_piix_get;
   31.12  			r->set = pirq_piix_set;
   31.13 @@ -1031,66 +1034,60 @@ void pcibios_penalize_isa_irq(int irq)
   31.14  static int pirq_enable_irq(struct pci_dev *dev)
   31.15  {
   31.16  	u8 pin;
   31.17 -	extern int via_interrupt_line_quirk;
   31.18  	struct pci_dev *temp_dev;
   31.19  
   31.20  	pci_read_config_byte(dev, PCI_INTERRUPT_PIN, &pin);
   31.21  	if (pin && !pcibios_lookup_irq(dev, 1) && !dev->irq) {
   31.22 -		char *msg;
   31.23 -		msg = "";
   31.24 +		char *msg = "";
   31.25 +
   31.26 +		pin--;		/* interrupt pins are numbered starting from 1 */
   31.27 +
   31.28  		if (io_apic_assign_pci_irqs) {
   31.29  			int irq;
   31.30  
   31.31 -			if (pin) {
   31.32 -				pin--;		/* interrupt pins are numbered starting from 1 */
   31.33 -				irq = IO_APIC_get_PCI_irq_vector(dev->bus->number, PCI_SLOT(dev->devfn), pin);
   31.34 -				/*
   31.35 -				 * Busses behind bridges are typically not listed in the MP-table.
   31.36 -				 * In this case we have to look up the IRQ based on the parent bus,
   31.37 -				 * parent slot, and pin number. The SMP code detects such bridged
   31.38 -				 * busses itself so we should get into this branch reliably.
   31.39 -				 */
   31.40 -				temp_dev = dev;
   31.41 -				while (irq < 0 && dev->bus->parent) { /* go back to the bridge */
   31.42 -					struct pci_dev * bridge = dev->bus->self;
   31.43 +			irq = IO_APIC_get_PCI_irq_vector(dev->bus->number, PCI_SLOT(dev->devfn), pin);
   31.44 +			/*
   31.45 +			 * Busses behind bridges are typically not listed in the MP-table.
   31.46 +			 * In this case we have to look up the IRQ based on the parent bus,
   31.47 +			 * parent slot, and pin number. The SMP code detects such bridged
   31.48 +			 * busses itself so we should get into this branch reliably.
   31.49 +			 */
   31.50 +			temp_dev = dev;
   31.51 +			while (irq < 0 && dev->bus->parent) { /* go back to the bridge */
   31.52 +				struct pci_dev * bridge = dev->bus->self;
   31.53  
   31.54 -					pin = (pin + PCI_SLOT(dev->devfn)) % 4;
   31.55 -					irq = IO_APIC_get_PCI_irq_vector(bridge->bus->number, 
   31.56 -							PCI_SLOT(bridge->devfn), pin);
   31.57 -					if (irq >= 0)
   31.58 -						printk(KERN_WARNING "PCI: using PPB %s[%c] to get irq %d\n",
   31.59 -							pci_name(bridge), 'A' + pin, irq);
   31.60 -					dev = bridge;
   31.61 -				}
   31.62 -				dev = temp_dev;
   31.63 -				if (irq >= 0) {
   31.64 +				pin = (pin + PCI_SLOT(dev->devfn)) % 4;
   31.65 +				irq = IO_APIC_get_PCI_irq_vector(bridge->bus->number, 
   31.66 +						PCI_SLOT(bridge->devfn), pin);
   31.67 +				if (irq >= 0)
   31.68 +					printk(KERN_WARNING "PCI: using PPB %s[%c] to get irq %d\n",
   31.69 +						pci_name(bridge), 'A' + pin, irq);
   31.70 +				dev = bridge;
   31.71 +			}
   31.72 +			dev = temp_dev;
   31.73 +			if (irq >= 0) {
   31.74  #ifdef CONFIG_PCI_MSI
   31.75 -					if (!platform_legacy_irq(irq))
   31.76 -						irq = IO_APIC_VECTOR(irq);
   31.77 +				if (!platform_legacy_irq(irq))
   31.78 +					irq = IO_APIC_VECTOR(irq);
   31.79  #endif
   31.80 -					printk(KERN_INFO "PCI->APIC IRQ transform: %s[%c] -> IRQ %d\n",
   31.81 -						pci_name(dev), 'A' + pin, irq);
   31.82 -					dev->irq = irq;
   31.83 -					return 0;
   31.84 -				} else
   31.85 -					msg = " Probably buggy MP table.";
   31.86 -			}
   31.87 +				printk(KERN_INFO "PCI->APIC IRQ transform: %s[%c] -> IRQ %d\n",
   31.88 +					pci_name(dev), 'A' + pin, irq);
   31.89 +				dev->irq = irq;
   31.90 +				return 0;
   31.91 +			} else
   31.92 +				msg = " Probably buggy MP table.";
   31.93  		} else if (pci_probe & PCI_BIOS_IRQ_SCAN)
   31.94  			msg = "";
   31.95  		else
   31.96  			msg = " Please try using pci=biosirq.";
   31.97 -			
   31.98 +
   31.99  		/* With IDE legacy devices the IRQ lookup failure is not a problem.. */
  31.100  		if (dev->class >> 8 == PCI_CLASS_STORAGE_IDE && !(dev->class & 0x5))
  31.101  			return 0;
  31.102 -			
  31.103 +
  31.104  		printk(KERN_WARNING "PCI: No IRQ known for interrupt pin %c of device %s.%s\n",
  31.105 -		       'A' + pin - 1, pci_name(dev), msg);
  31.106 +		       'A' + pin, pci_name(dev), msg);
  31.107  	}
  31.108 -	/* VIA bridges use interrupt line for apic/pci steering across
  31.109 -	   the V-Link */
  31.110 -	else if (via_interrupt_line_quirk)
  31.111 -		pci_write_config_byte(dev, PCI_INTERRUPT_LINE, dev->irq & 15);
  31.112  	return 0;
  31.113  }
  31.114  
    32.1 --- a/linux-2.6-xen-sparse/arch/xen/x86_64/Kconfig	Mon Jul 11 09:29:56 2005 -0500
    32.2 +++ b/linux-2.6-xen-sparse/arch/xen/x86_64/Kconfig	Mon Jul 11 09:35:19 2005 -0500
    32.3 @@ -66,23 +66,6 @@ config EARLY_PRINTK
    32.4  	  with klogd/syslogd or the X server. You should normally N here,
    32.5  	  unless you want to debug such a crash.
    32.6  
    32.7 -config HPET_TIMER
    32.8 -	bool
    32.9 -	default n
   32.10 -	help
   32.11 -	  Use the IA-PC HPET (High Precision Event Timer) to manage
   32.12 -	  time in preference to the PIT and RTC, if a HPET is
   32.13 -	  present.  The HPET provides a stable time base on SMP
   32.14 -	  systems, unlike the RTC, but it is more expensive to access,
   32.15 -	  as it is off-chip.  You can find the HPET spec at
   32.16 -	  <http://www.intel.com/labs/platcomp/hpet/hpetspec.htm>.
   32.17 -
   32.18 -	  If unsure, say Y.
   32.19 -
   32.20 -config HPET_EMULATE_RTC
   32.21 -	bool "Provide RTC interrupt"
   32.22 -	depends on HPET_TIMER && RTC=y
   32.23 -
   32.24  config GENERIC_ISA_DMA
   32.25  	bool
   32.26  	default y
   32.27 @@ -255,7 +238,7 @@ config PREEMPT
   32.28  config SCHED_SMT
   32.29  	bool "SMT (Hyperthreading) scheduler support"
   32.30  	depends on SMP
   32.31 -	default off
   32.32 +	default n
   32.33  	help
   32.34  	  SMT scheduler support improves the CPU scheduler's decision making
   32.35  	  when dealing with Intel Pentium 4 chips with HyperThreading at a
   32.36 @@ -312,6 +295,23 @@ config NR_CPUS
   32.37  	  This is purely to save memory - each supported CPU requires
   32.38  	  memory in the static kernel configuration.
   32.39  
   32.40 +config HPET_TIMER
   32.41 +	bool
   32.42 +	default n
   32.43 +	help
   32.44 +	  Use the IA-PC HPET (High Precision Event Timer) to manage
   32.45 +	  time in preference to the PIT and RTC, if a HPET is
   32.46 +	  present.  The HPET provides a stable time base on SMP
   32.47 +	  systems, unlike the RTC, but it is more expensive to access,
   32.48 +	  as it is off-chip.  You can find the HPET spec at
   32.49 +	  <http://www.intel.com/labs/platcomp/hpet/hpetspec.htm>.
   32.50 +
   32.51 +	  If unsure, say Y.
   32.52 +
   32.53 +config HPET_EMULATE_RTC
   32.54 +	bool "Provide RTC interrupt"
   32.55 +	depends on HPET_TIMER && RTC=y
   32.56 +
   32.57  config GART_IOMMU
   32.58  	bool "IOMMU support"
   32.59  	depends on PCI
   32.60 @@ -346,6 +346,24 @@ config X86_MCE
   32.61  	   machine check error logs. See
   32.62  	   ftp://ftp.x86-64.org/pub/linux/tools/mcelog
   32.63  
   32.64 +config SECCOMP
   32.65 +	bool "Enable seccomp to safely compute untrusted bytecode"
   32.66 +	depends on PROC_FS
   32.67 +	default y
   32.68 +	help
   32.69 +	  This kernel feature is useful for number crunching applications
   32.70 +	  that may need to compute untrusted bytecode during their
   32.71 +	  execution. By using pipes or other transports made available to
   32.72 +	  the process as file descriptors supporting the read/write
   32.73 +	  syscalls, it's possible to isolate those applications in
   32.74 +	  their own address space using seccomp. Once seccomp is
   32.75 +	  enabled via /proc/<pid>/seccomp, it cannot be disabled
   32.76 +	  and the task is only allowed to execute a few safe syscalls
   32.77 +	  defined by each seccomp mode.
   32.78 +
   32.79 +	  If unsure, say Y. Only embedded should say N here.
   32.80 +
   32.81 +
   32.82  endmenu
   32.83  
   32.84  #
   32.85 @@ -359,6 +377,11 @@ config GENERIC_IRQ_PROBE
   32.86  	bool
   32.87  	default y
   32.88  
   32.89 +# we have no ISA slots, but we do have ISA-style DMA.
   32.90 +config ISA_DMA_API
   32.91 +	bool
   32.92 +	default y
   32.93 +
   32.94  menu "Power management options"
   32.95  
   32.96  source kernel/power/Kconfig
   32.97 @@ -380,7 +403,7 @@ config PCI_DIRECT
   32.98  
   32.99  config PCI_MMCONFIG
  32.100  	bool "Support mmconfig PCI config space access"
  32.101 -	depends on PCI
  32.102 +	depends on PCI && ACPI
  32.103  	select ACPI_BOOT
  32.104  
  32.105  config UNORDERED_IO
  32.106 @@ -393,6 +416,8 @@ config UNORDERED_IO
  32.107  	 from i386. Requires that the driver writer used memory barriers
  32.108  	 properly.
  32.109  
  32.110 +#source "drivers/pci/pcie/Kconfig"
  32.111 +
  32.112  #source "drivers/pci/Kconfig"
  32.113  
  32.114  #source "drivers/pcmcia/Kconfig"
  32.115 @@ -444,12 +469,8 @@ endmenu
  32.116  
  32.117  #source "arch/x86_64/oprofile/Kconfig"
  32.118  
  32.119 -#source "arch/x86_64/Kconfig.debug"
  32.120 -
  32.121  # source "security/Kconfig"
  32.122  
  32.123  # source "crypto/Kconfig"
  32.124  
  32.125 -# source "lib/Kconfig"
  32.126 -
  32.127  endmenu
    33.1 --- a/linux-2.6-xen-sparse/arch/xen/x86_64/ia32/Makefile	Mon Jul 11 09:29:56 2005 -0500
    33.2 +++ b/linux-2.6-xen-sparse/arch/xen/x86_64/ia32/Makefile	Mon Jul 11 09:35:19 2005 -0500
    33.3 @@ -48,10 +48,14 @@ s-link	:= vsyscall-syscall.o vsyscall-sy
    33.4  $(patsubst %.o,$(obj)/%.c,$(c-obj-y) $(c-link)) $(patsubst %.o,$(obj)/%.S,$(s-obj-y) $(s-link)):
    33.5  	@ln -fsn $(srctree)/arch/x86_64/ia32/$(notdir $@) $@
    33.6  
    33.7 -$(obj)/vsyscall-int80.S: $(obj)/vsyscall-sigreturn.S
    33.8 +$(obj)/vsyscall-int80.S: $(obj)/vsyscall-sigreturn.S ../../i386/kernel/vsyscall-note.S
    33.9  $(obj)/vsyscall-sysenter.S: $(obj)/vsyscall-sigreturn.S
   33.10  $(obj)/vsyscall-syscall.S: $(obj)/vsyscall-sigreturn.S
   33.11  
   33.12 +../../i386/kernel/vsyscall-note.S:
   33.13 +	@ln -fsn $(srctree)/arch/i386/kernel/$(notdir $@) $(srctree)/arch/xen/i386/kernel/$(notdir $@)
   33.14 +	make -C arch/xen/i386/kernel vsyscall-note.S
   33.15 +
   33.16  obj-y	+= $(c-obj-y) $(s-obj-y)
   33.17  
   33.18  clean-files += $(patsubst %.o,%.c,$(c-obj-y) $(c-obj-) $(c-link))
    34.1 --- a/linux-2.6-xen-sparse/arch/xen/x86_64/ia32/ia32entry.S	Mon Jul 11 09:29:56 2005 -0500
    34.2 +++ b/linux-2.6-xen-sparse/arch/xen/x86_64/ia32/ia32entry.S	Mon Jul 11 09:35:19 2005 -0500
    34.3 @@ -96,7 +96,7 @@ 1:	movl	(%rbp),%r9d
    34.4   	.quad 1b,ia32_badarg
    34.5   	.previous	
    34.6  	GET_THREAD_INFO(%r10)
    34.7 -	testl  $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT),threadinfo_flags(%r10)
    34.8 +	testl  $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP),threadinfo_flags(%r10)
    34.9  	jnz  sysenter_tracesys
   34.10  sysenter_do_call:	
   34.11  	cmpl	$(IA32_NR_syscalls),%eax
   34.12 @@ -184,7 +184,7 @@ 1:	movl	(%r8),%r9d
   34.13  	.quad 1b,ia32_badarg
   34.14  	.previous	
   34.15  	GET_THREAD_INFO(%r10)
   34.16 -	testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT),threadinfo_flags(%r10)
   34.17 +	testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP),threadinfo_flags(%r10)
   34.18  	jnz   cstar_tracesys
   34.19  cstar_do_call:	
   34.20  	cmpl $IA32_NR_syscalls,%eax
   34.21 @@ -263,7 +263,7 @@ ENTRY(ia32_syscall)
   34.22  	   this could be a problem. */
   34.23  	SAVE_ARGS 0,0,1
   34.24  	GET_THREAD_INFO(%r10)
   34.25 -	testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT),threadinfo_flags(%r10)
   34.26 +	testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP),threadinfo_flags(%r10)
   34.27  	jnz ia32_tracesys
   34.28  ia32_do_syscall:	
   34.29  	cmpl $(IA32_NR_syscalls),%eax
   34.30 @@ -617,7 +617,7 @@ ia32_sys_call_table:
   34.31  	.quad compat_sys_mq_notify
   34.32  	.quad compat_sys_mq_getsetattr
   34.33  	.quad quiet_ni_syscall		/* reserved for kexec */
   34.34 -	.quad sys32_waitid
   34.35 +	.quad compat_sys_waitid
   34.36  	.quad quiet_ni_syscall		/* sys_altroot */
   34.37  	.quad sys_add_key
   34.38  	.quad sys_request_key
    35.1 --- a/linux-2.6-xen-sparse/arch/xen/x86_64/ia32/syscall32.c	Mon Jul 11 09:29:56 2005 -0500
    35.2 +++ b/linux-2.6-xen-sparse/arch/xen/x86_64/ia32/syscall32.c	Mon Jul 11 09:35:19 2005 -0500
    35.3 @@ -9,6 +9,7 @@
    35.4  #include <linux/gfp.h>
    35.5  #include <linux/init.h>
    35.6  #include <linux/stringify.h>
    35.7 +#include <linux/security.h>
    35.8  #include <asm/proto.h>
    35.9  #include <asm/tlbflush.h>
   35.10  #include <asm/ia32_unistd.h>
   35.11 @@ -50,51 +51,57 @@ extern int sysctl_vsyscall32;
   35.12  
   35.13  char *syscall32_page; 
   35.14  
   35.15 -/*
   35.16 - * Map the 32bit vsyscall page on demand.
   35.17 - *
   35.18 - * RED-PEN: This knows too much about high level VM.
   35.19 - *
   35.20 - * Alternative would be to generate a vma with appropriate backing options
   35.21 - * and let it be handled by generic VM.
   35.22 - */
   35.23 -int __map_syscall32(struct mm_struct *mm, unsigned long address)
   35.24 -{ 
   35.25 -	pgd_t *pgd;
   35.26 -	pud_t *pud;
   35.27 -	pte_t *pte;
   35.28 -	pmd_t *pmd;
   35.29 -	int err = -ENOMEM;
   35.30 -
   35.31 -	spin_lock(&mm->page_table_lock); 
   35.32 - 	pgd = pgd_offset(mm, address);
   35.33 - 	pud = pud_alloc(mm, pgd, address);
   35.34 - 	if (pud) {
   35.35 - 		pmd = pmd_alloc(mm, pud, address);
   35.36 - 		if (pmd && (pte = pte_alloc_map(mm, pmd, address)) != NULL) {
   35.37 - 			if (pte_none(*pte)) {
   35.38 - 				set_pte(pte,
   35.39 - 					mk_pte(virt_to_page(syscall32_page),
   35.40 - 					       PAGE_KERNEL_VSYSCALL32));
   35.41 - 			}
   35.42 - 			/* Flush only the local CPU. Other CPUs taking a fault
   35.43 - 			   will just end up here again
   35.44 -			   This probably not needed and just paranoia. */
   35.45 - 			__flush_tlb_one(address);
   35.46 - 			err = 0;
   35.47 -		}
   35.48 -	}
   35.49 -	spin_unlock(&mm->page_table_lock);
   35.50 -	return err;
   35.51 +static struct page *
   35.52 +syscall32_nopage(struct vm_area_struct *vma, unsigned long adr, int *type)
   35.53 +{
   35.54 +	struct page *p = virt_to_page(adr - vma->vm_start + syscall32_page);
   35.55 +	get_page(p);
   35.56 +	return p;
   35.57  }
   35.58  
   35.59 -int map_syscall32(struct mm_struct *mm, unsigned long address)
   35.60 +/* Prevent VMA merging */
   35.61 +static void syscall32_vma_close(struct vm_area_struct *vma)
   35.62  {
   35.63 -	int err;
   35.64 -	down_read(&mm->mmap_sem);
   35.65 -	err = __map_syscall32(mm, address);
   35.66 -	up_read(&mm->mmap_sem);
   35.67 -	return err;
   35.68 +}
   35.69 +
   35.70 +static struct vm_operations_struct syscall32_vm_ops = {
   35.71 +	.close = syscall32_vma_close,
   35.72 +	.nopage = syscall32_nopage,
   35.73 +};
   35.74 +
   35.75 +struct linux_binprm;
   35.76 +
   35.77 +/* Setup a VMA at program startup for the vsyscall page */
   35.78 +int syscall32_setup_pages(struct linux_binprm *bprm, int exstack)
   35.79 +{
   35.80 +	int npages = (VSYSCALL32_END - VSYSCALL32_BASE) >> PAGE_SHIFT;
   35.81 +	struct vm_area_struct *vma;
   35.82 +	struct mm_struct *mm = current->mm;
   35.83 +
   35.84 +	vma = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
   35.85 +	if (!vma)
   35.86 +		return -ENOMEM;
   35.87 +	if (security_vm_enough_memory(npages)) {
   35.88 +		kmem_cache_free(vm_area_cachep, vma);
   35.89 +		return -ENOMEM;
   35.90 +	}
   35.91 +
   35.92 +	memset(vma, 0, sizeof(struct vm_area_struct));
   35.93 +	/* Could randomize here */
   35.94 +	vma->vm_start = VSYSCALL32_BASE;
   35.95 +	vma->vm_end = VSYSCALL32_END;
   35.96 +	/* MAYWRITE to allow gdb to COW and set breakpoints */
   35.97 +	vma->vm_flags = VM_READ|VM_EXEC|VM_MAYREAD|VM_MAYEXEC|VM_MAYEXEC|VM_MAYWRITE;
   35.98 +	vma->vm_flags |= mm->def_flags;
   35.99 +	vma->vm_page_prot = protection_map[vma->vm_flags & 7];
  35.100 +	vma->vm_ops = &syscall32_vm_ops;
  35.101 +	vma->vm_mm = mm;
  35.102 +
  35.103 +	down_write(&mm->mmap_sem);
  35.104 +	insert_vm_struct(mm, vma);
  35.105 +	mm->total_vm += npages;
  35.106 +	up_write(&mm->mmap_sem);
  35.107 +	return 0;
  35.108  }
  35.109  
  35.110  static int __init init_syscall32(void)
  35.111 @@ -102,7 +109,6 @@ static int __init init_syscall32(void)
  35.112  	syscall32_page = (void *)get_zeroed_page(GFP_KERNEL); 
  35.113  	if (!syscall32_page) 
  35.114  		panic("Cannot allocate syscall32 page"); 
  35.115 -	SetPageReserved(virt_to_page(syscall32_page));
  35.116  
  35.117  #ifdef USE_INT80
  35.118  	/*
    36.1 --- a/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/Makefile	Mon Jul 11 09:29:56 2005 -0500
    36.2 +++ b/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/Makefile	Mon Jul 11 09:35:19 2005 -0500
    36.3 @@ -38,6 +38,7 @@ c-obj-$(CONFIG_X86_IO_APIC)	+= genapic.o
    36.4  c-obj-$(CONFIG_DUMMY_IOMMU)	+= pci-nommu.o pci-dma.o
    36.5  #obj-$(CONFIG_SWIOTLB)		+= swiotlb.o
    36.6  obj-$(CONFIG_KPROBES)		+= kprobes.o
    36.7 +obj-$(CONFIG_X86_PM_TIMER)	+= pmtimer.o
    36.8  
    36.9  c-obj-$(CONFIG_MODULES)		+= module.o
   36.10  
    37.1 --- a/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/asm-offsets.c	Mon Jul 11 09:29:56 2005 -0500
    37.2 +++ b/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/asm-offsets.c	Mon Jul 11 09:35:19 2005 -0500
    37.3 @@ -63,8 +63,8 @@ int main(void)
    37.4  	       offsetof (struct rt_sigframe32, uc.uc_mcontext));
    37.5  	BLANK();
    37.6  #endif
    37.7 -	DEFINE(SIZEOF_PBE, sizeof(struct pbe));
    37.8  	DEFINE(pbe_address, offsetof(struct pbe, address));
    37.9  	DEFINE(pbe_orig_address, offsetof(struct pbe, orig_address));
   37.10 +	DEFINE(pbe_next, offsetof(struct pbe, next));
   37.11  	return 0;
   37.12  }
    38.1 --- a/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/e820.c	Mon Jul 11 09:29:56 2005 -0500
    38.2 +++ b/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/e820.c	Mon Jul 11 09:35:19 2005 -0500
    38.3 @@ -2,6 +2,12 @@
    38.4   * Handle the memory map.
    38.5   * The functions here do the job until bootmem takes over.
    38.6   * $Id: e820.c,v 1.4 2002/09/19 19:25:32 ak Exp $
    38.7 + *
    38.8 + *  Getting sanitize_e820_map() in sync with i386 version by applying change:
    38.9 + *  -  Provisions for empty E820 memory regions (reported by certain BIOSes).
   38.10 + *     Alex Achenbach <xela@slit.de>, December 2002.
   38.11 + *  Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
   38.12 + *
   38.13   */
   38.14  #include <linux/config.h>
   38.15  #include <linux/kernel.h>
   38.16 @@ -279,7 +285,7 @@ static int __init sanitize_e820_map(stru
   38.17  	int chgidx, still_changing;
   38.18  	int overlap_entries;
   38.19  	int new_bios_entry;
   38.20 -	int old_nr, new_nr;
   38.21 +	int old_nr, new_nr, chg_nr;
   38.22  	int i;
   38.23  
   38.24  	/*
   38.25 @@ -333,20 +339,24 @@ static int __init sanitize_e820_map(stru
   38.26  	for (i=0; i < 2*old_nr; i++)
   38.27  		change_point[i] = &change_point_list[i];
   38.28  
   38.29 -	/* record all known change-points (starting and ending addresses) */
   38.30 +	/* record all known change-points (starting and ending addresses),
   38.31 +	   omitting those that are for empty memory regions */
   38.32  	chgidx = 0;
   38.33  	for (i=0; i < old_nr; i++)	{
   38.34 -		change_point[chgidx]->addr = biosmap[i].addr;
   38.35 -		change_point[chgidx++]->pbios = &biosmap[i];
   38.36 -		change_point[chgidx]->addr = biosmap[i].addr + biosmap[i].size;
   38.37 -		change_point[chgidx++]->pbios = &biosmap[i];
   38.38 +		if (biosmap[i].size != 0) {
   38.39 +			change_point[chgidx]->addr = biosmap[i].addr;
   38.40 +			change_point[chgidx++]->pbios = &biosmap[i];
   38.41 +			change_point[chgidx]->addr = biosmap[i].addr + biosmap[i].size;
   38.42 +			change_point[chgidx++]->pbios = &biosmap[i];
   38.43 +		}
   38.44  	}
   38.45 +	chg_nr = chgidx;
   38.46  
   38.47  	/* sort change-point list by memory addresses (low -> high) */
   38.48  	still_changing = 1;
   38.49  	while (still_changing)	{
   38.50  		still_changing = 0;
   38.51 -		for (i=1; i < 2*old_nr; i++)  {
   38.52 +		for (i=1; i < chg_nr; i++)  {
   38.53  			/* if <current_addr> > <last_addr>, swap */
   38.54  			/* or, if current=<start_addr> & last=<end_addr>, swap */
   38.55  			if ((change_point[i]->addr < change_point[i-1]->addr) ||
   38.56 @@ -369,7 +379,7 @@ static int __init sanitize_e820_map(stru
   38.57  	last_type = 0;		 /* start with undefined memory type */
   38.58  	last_addr = 0;		 /* start with 0 as last starting address */
   38.59  	/* loop through change-points, determining affect on the new bios map */
   38.60 -	for (chgidx=0; chgidx < 2*old_nr; chgidx++)
   38.61 +	for (chgidx=0; chgidx < chg_nr; chgidx++)
   38.62  	{
   38.63  		/* keep track of all overlapping bios entries */
   38.64  		if (change_point[chgidx]->addr == change_point[chgidx]->pbios->addr)
   38.65 @@ -512,22 +522,67 @@ extern unsigned long xen_override_max_pf
   38.66  
   38.67  void __init parse_memopt(char *p, char **from) 
   38.68  { 
   38.69 -	/*
   38.70 -	 * mem=XXX[kKmM] limits kernel memory to XXX+1MB
   38.71 -	 *
   38.72 -	 * It would be more logical to count from 0 instead of from
   38.73 -	 * HIGH_MEMORY, but we keep that for now for i386 compatibility. 
   38.74 -	 *	
   38.75 -	 * No support for custom mapping like i386.  The reason is
   38.76 -	 * that we need to read the e820 map anyways to handle the
   38.77 -	 * ACPI mappings in the direct map.  Also on x86-64 there
   38.78 -	 * should be always a good e820 map. This is only an upper
   38.79 -	 * limit, you cannot force usage of memory not in e820.
   38.80 -	 *
   38.81 -	 * -AK
   38.82 -			 */
   38.83 -	end_user_pfn = memparse(p, from) + HIGH_MEMORY;
   38.84 +	end_user_pfn = memparse(p, from);
   38.85  	end_user_pfn >>= PAGE_SHIFT;	
   38.86          xen_override_max_pfn = (unsigned long) end_user_pfn;
   38.87  } 
   38.88  
   38.89 +unsigned long pci_mem_start = 0xaeedbabe;
   38.90 +
   38.91 +/*
   38.92 + * Search for the biggest gap in the low 32 bits of the e820
   38.93 + * memory space.  We pass this space to PCI to assign MMIO resources
   38.94 + * for hotplug or unconfigured devices in.
   38.95 + * Hopefully the BIOS let enough space left.
   38.96 + */
   38.97 +__init void e820_setup_gap(void)
   38.98 +{
   38.99 +	unsigned long gapstart, gapsize;
  38.100 +	unsigned long last;
  38.101 +	int i;
  38.102 +	int found = 0;
  38.103 +
  38.104 +	last = 0x100000000ull;
  38.105 +	gapstart = 0x10000000;
  38.106 +	gapsize = 0x400000;
  38.107 +	i = e820.nr_map;
  38.108 +	while (--i >= 0) {
  38.109 +		unsigned long long start = e820.map[i].addr;
  38.110 +		unsigned long long end = start + e820.map[i].size;
  38.111 +
  38.112 +		/*
  38.113 +		 * Since "last" is at most 4GB, we know we'll
  38.114 +		 * fit in 32 bits if this condition is true
  38.115 +		 */
  38.116 +		if (last > end) {
  38.117 +			unsigned long gap = last - end;
  38.118 +
  38.119 +			if (gap > gapsize) {
  38.120 +				gapsize = gap;
  38.121 +				gapstart = end;
  38.122 +				found = 1;
  38.123 +			}
  38.124 +		}
  38.125 +		if (start < last)
  38.126 +			last = start;
  38.127 +	}
  38.128 +
  38.129 +	if (!found) {
  38.130 +		gapstart = (end_pfn << PAGE_SHIFT) + 1024*1024;
  38.131 +		printk(KERN_ERR "PCI: Warning: Cannot find a gap in the 32bit address range\n"
  38.132 +		       KERN_ERR "PCI: Unassigned devices with 32bit resource registers may break!\n");
  38.133 +	}
  38.134 +
  38.135 +	/*
  38.136 +	 * Start allocating dynamic PCI memory a bit into the gap,
  38.137 +	 * aligned up to the nearest megabyte.
  38.138 +	 *
  38.139 +	 * Question: should we try to pad it up a bit (do something
  38.140 +	 * like " + (gapsize >> 3)" in there too?). We now have the
  38.141 +	 * technology.
  38.142 +	 */
  38.143 +	pci_mem_start = (gapstart + 0xfffff) & ~0xfffff;
  38.144 +
  38.145 +	printk(KERN_INFO "Allocating PCI resources starting at %lx (gap: %lx:%lx)\n",
  38.146 +		pci_mem_start, gapstart, gapsize);
  38.147 +}
    39.1 --- a/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/early_printk.c	Mon Jul 11 09:29:56 2005 -0500
    39.2 +++ b/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/early_printk.c	Mon Jul 11 09:35:19 2005 -0500
    39.3 @@ -62,7 +62,7 @@ static struct console early_vga_console 
    39.4  #ifndef CONFIG_XEN
    39.5  /* Serial functions loosely based on a similar package from Klaus P. Gerlicher */ 
    39.6  
    39.7 -int early_serial_base = 0x3f8;  /* ttyS0 */ 
    39.8 +static int early_serial_base = 0x3f8;  /* ttyS0 */
    39.9  
   39.10  #define XMTRDY          0x20
   39.11  
    40.1 --- a/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/entry.S	Mon Jul 11 09:29:56 2005 -0500
    40.2 +++ b/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/entry.S	Mon Jul 11 09:35:19 2005 -0500
    40.3 @@ -54,10 +54,7 @@
    40.4  	
    40.5  	.code64
    40.6  
    40.7 -#ifdef CONFIG_PREEMPT
    40.8 -#define preempt_stop XEN_BLOCK_EVENTS(%rsi)
    40.9 -#else
   40.10 -#define preempt_stop
   40.11 +#ifndef CONFIG_PREEMPT
   40.12  #define retint_kernel retint_restore_args
   40.13  #endif	
   40.14  
   40.15 @@ -157,7 +154,7 @@ ENTRY(ret_from_fork)
   40.16  	CFI_DEFAULT_STACK
   40.17  	call schedule_tail
   40.18  	GET_THREAD_INFO(%rcx)
   40.19 -	testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT),threadinfo_flags(%rcx)
   40.20 +	testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP),threadinfo_flags(%rcx)
   40.21  	jnz rff_trace
   40.22  rff_action:	
   40.23  	RESTORE_REST
   40.24 @@ -207,7 +204,7 @@ ENTRY(system_call)
   40.25  	movq  %rax,ORIG_RAX-ARGOFFSET(%rsp) 
   40.26          XEN_UNBLOCK_EVENTS(%r11)        
   40.27  	GET_THREAD_INFO(%rcx)
   40.28 -	testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT),threadinfo_flags(%rcx)
   40.29 +	testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP),threadinfo_flags(%rcx)
   40.30  	jnz tracesys
   40.31  	cmpq $__NR_syscall_max,%rax
   40.32  	ja badsys
   40.33 @@ -220,7 +217,7 @@ ENTRY(system_call)
   40.34   */		
   40.35  	.globl ret_from_sys_call
   40.36  ret_from_sys_call:
   40.37 -	movl $_TIF_WORK_MASK,%edi
   40.38 +	movl $_TIF_ALLWORK_MASK,%edi
   40.39  	/* edi:	flagmask */
   40.40  sysret_check:		
   40.41  	GET_THREAD_INFO(%rcx)
   40.42 @@ -316,6 +313,7 @@ int_careful:
   40.43  	pushq %rdi
   40.44  	call schedule
   40.45  	popq %rdi
   40.46 +	cli
   40.47  	jmp int_with_check
   40.48  
   40.49  	/* handle signals and tracing -- both require a full stack frame */
   40.50 @@ -330,9 +328,8 @@ int_very_careful:
   40.51  	leaq 8(%rsp),%rdi	# &ptregs -> arg1	
   40.52  	call syscall_trace_leave
   40.53  	popq %rdi
   40.54 -	btr  $TIF_SYSCALL_TRACE,%edi
   40.55 -	btr  $TIF_SYSCALL_AUDIT,%edi
   40.56 -	btr  $TIF_SINGLESTEP,%edi
   40.57 +	andl $~(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP),%edi
   40.58 +	cli
   40.59  	jmp int_restore_rest
   40.60  	
   40.61  int_signal:
   40.62 @@ -344,6 +341,7 @@ int_signal:
   40.63  1:	movl $_TIF_NEED_RESCHED,%edi	
   40.64  int_restore_rest:
   40.65  	RESTORE_REST
   40.66 +	cli
   40.67  	jmp int_with_check
   40.68  	CFI_ENDPROC
   40.69  		
   40.70 @@ -511,7 +509,7 @@ retint_signal:
   40.71  	RESTORE_REST
   40.72          XEN_BLOCK_EVENTS(%rsi)		
   40.73  	movl $_TIF_NEED_RESCHED,%edi
   40.74 -	GET_THREAD_INFO(%rcx)	
   40.75 +	GET_THREAD_INFO(%rcx)
   40.76  	jmp retint_check
   40.77  
   40.78  #ifdef CONFIG_PREEMPT
   40.79 @@ -524,15 +522,8 @@ retint_kernel:
   40.80  	bt  $TIF_NEED_RESCHED,threadinfo_flags(%rcx)
   40.81  	jnc  retint_restore_args
   40.82  	bt   $9,EFLAGS-ARGOFFSET(%rsp)	/* interrupts off? */
   40.83 -	jc   retint_restore_args
   40.84 -	movl $PREEMPT_ACTIVE,threadinfo_preempt_count(%rcx)
   40.85 -/*	sti */
   40.86 -	XEN_UNBLOCK_EVENTS(%rsi)
   40.87 -	call schedule
   40.88 -	XEN_BLOCK_EVENTS(%rsi)
   40.89 -/*	cli */
   40.90 -	GET_THREAD_INFO(%rcx)
   40.91 -	movl $0,threadinfo_preempt_count(%rcx) 
   40.92 +	jnc  retint_restore_args
   40.93 +	call preempt_schedule_irq
   40.94  	jmp retint_kernel       /* check again */
   40.95  #endif	
   40.96  	CFI_ENDPROC
   40.97 @@ -610,6 +601,7 @@ 1:	movq %rsp,%rdi
   40.98  	movq ORIG_RAX(%rsp),%rsi
   40.99  	movq $-1,ORIG_RAX(%rsp)
  40.100  	call \sym
  40.101 +	cli
  40.102  	.endm
  40.103  	
  40.104  /*
  40.105 @@ -934,8 +926,6 @@ ENTRY(debug)
  40.106  	pushq $0
  40.107  	CFI_ADJUST_CFA_OFFSET 8		
  40.108  	paranoidentry do_debug
  40.109 -	/* switch back to process stack to restore the state ptrace touched */
  40.110 -	movq %rax,%rsp	
  40.111  	jmp paranoid_exit
  40.112  	CFI_ENDPROC
  40.113  
  40.114 @@ -946,38 +936,62 @@ ENTRY(nmi)
  40.115  	pushq $-1
  40.116  	CFI_ADJUST_CFA_OFFSET 8		
  40.117  	paranoidentry do_nmi
  40.118 +	/*
  40.119 + 	 * "Paranoid" exit path from exception stack.
  40.120 +  	 * Paranoid because this is used by NMIs and cannot take
  40.121 +	 * any kernel state for granted.
  40.122 +	 * We don't do kernel preemption checks here, because only
  40.123 +	 * NMI should be common and it does not enable IRQs and
  40.124 +	 * cannot get reschedule ticks.
  40.125 +	 */
  40.126  	/* ebx:	no swapgs flag */
  40.127  #endif        
  40.128  paranoid_exit:
  40.129  	testl %ebx,%ebx				/* swapgs needed? */
  40.130  	jnz paranoid_restore
  40.131  paranoid_swapgs:	
  40.132 -/*	cli
  40.133 -	swapgs */
  40.134 +/*	swapgs */
  40.135  paranoid_restore:	
  40.136  	RESTORE_ALL 8
  40.137  /*	iretq */
  40.138  paranoid_userspace:	
  40.139 -/*	cli */
  40.140  	GET_THREAD_INFO(%rcx)
  40.141 -	movl threadinfo_flags(%rcx),%edx
  40.142 -	testl $_TIF_NEED_RESCHED,%edx
  40.143 -	jnz paranoid_resched
  40.144 -	testl $(_TIF_SIGPENDING|_TIF_NOTIFY_RESUME|_TIF_SINGLESTEP),%edx
  40.145 -	jnz paranoid_signal
  40.146 -	jmp paranoid_swapgs
  40.147 -paranoid_resched:		
  40.148 +#	movl threadinfo_flags(%rcx),%edx
  40.149 +#	testl $_TIF_NEED_RESCHED,%edx
  40.150 +#	jnz paranoid_resched
  40.151 +#	testl $(_TIF_SIGPENDING|_TIF_NOTIFY_RESUME|_TIF_SINGLESTEP),%edx
  40.152 +#	jnz paranoid_signal
  40.153 +#	jmp paranoid_swapgs
  40.154 +#paranoid_resched:		
  40.155 +#/*	sti */
  40.156 +#	call schedule
  40.157 +#	jmp paranoid_exit
  40.158 +#paranoid_signal:		
  40.159 +	movl threadinfo_flags(%rcx),%ebx
  40.160 +	andl $_TIF_WORK_MASK,%ebx
  40.161 +	jz paranoid_swapgs
  40.162 +	movq %rsp,%rdi			/* &pt_regs */
  40.163 +	call sync_regs
  40.164 +	movq %rax,%rsp			/* switch stack for scheduling */
  40.165 +	testl $_TIF_NEED_RESCHED,%ebx
  40.166 +	jnz paranoid_schedule
  40.167 +	movl %ebx,%edx			/* arg3: thread flags */
  40.168  /*	sti */
  40.169 +#	xorl %esi,%esi /* oldset */
  40.170 +#	movq %rsp,%rdi /* &pt_regs */
  40.171 +	xorl %esi,%esi 			/* arg2: oldset */
  40.172 +	movq %rsp,%rdi 			/* arg1: &pt_regs */
  40.173 +	call do_notify_resume
  40.174 +#	jmp paranoid_exit
  40.175 +	cli
  40.176 +	jmp paranoid_userspace
  40.177 +paranoid_schedule:
  40.178 +	sti
  40.179  	call schedule
  40.180 -	jmp paranoid_exit
  40.181 -paranoid_signal:		
  40.182 -/*	sti */
  40.183 -	xorl %esi,%esi /* oldset */
  40.184 -	movq %rsp,%rdi /* &pt_regs */
  40.185 -	call do_notify_resume
  40.186 -	jmp paranoid_exit
  40.187 +	cli
  40.188 +	jmp paranoid_userspace
  40.189  	CFI_ENDPROC
  40.190 -	
  40.191 +
  40.192  ENTRY(int3)
  40.193  	zeroentry do_int3	
  40.194  
  40.195 @@ -1000,7 +1014,6 @@ ENTRY(reserved)
  40.196  ENTRY(double_fault)
  40.197  	CFI_STARTPROC
  40.198  	paranoidentry do_double_fault
  40.199 -	movq %rax,%rsp
  40.200  	jmp paranoid_exit
  40.201  	CFI_ENDPROC
  40.202  
  40.203 @@ -1014,7 +1027,6 @@ ENTRY(segment_not_present)
  40.204  ENTRY(stack_segment)
  40.205  	CFI_STARTPROC
  40.206  	paranoidentry do_stack_segment
  40.207 -	movq %rax,%rsp
  40.208  	jmp paranoid_exit
  40.209  	CFI_ENDPROC
  40.210  
    41.1 --- a/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/head.S	Mon Jul 11 09:29:56 2005 -0500
    41.2 +++ b/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/head.S	Mon Jul 11 09:35:19 2005 -0500
    41.3 @@ -39,6 +39,8 @@
    41.4  
    41.5  	.text
    41.6  	.code64
    41.7 +	.globl startup_64
    41.8 +startup_64:
    41.9  ENTRY(_start)
   41.10          cld                
   41.11  	movq init_rsp(%rip),%rsp
   41.12 @@ -136,9 +138,8 @@ ENTRY(cpu_gdt_table)
   41.13  	.quad	0x00affa000000ffff	/* __USER_CS */
   41.14  	.quad	0x00cffa000000ffff	/* __KERNEL32_CS */        
   41.15  	.quad	0,0			/* TSS */
   41.16 -	.quad	0			/* LDT */
   41.17 +	.quad	0,0			/* LDT */
   41.18  	.quad   0,0,0			/* three TLS descriptors */ 
   41.19 -	.quad	0			/* unused now */
   41.20  
   41.21  gdt_end:	
   41.22  	/* asm/segment.h:GDT_ENTRIES must match this */	
   41.23 @@ -184,12 +185,6 @@ gdt:
   41.24  	.endr
   41.25  #endif
   41.26  
   41.27 -ENTRY(gdt_table32)
   41.28 -	.quad	0x0000000000000000	/* This one is magic */
   41.29 -	.quad	0x0000000000000000	/* unused */
   41.30 -	.quad	0x00af9a000000ffff	/* __KERNEL_CS */
   41.31 -gdt32_end:	
   41.32 -	
   41.33  /* We need valid kernel segments for data and code in long mode too
   41.34   * IRET will check the segment types  kkeil 2000/10/28
   41.35   * Also sysret mandates a special GDT layout 
    42.1 --- a/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/head64.c	Mon Jul 11 09:29:56 2005 -0500
    42.2 +++ b/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/head64.c	Mon Jul 11 09:35:19 2005 -0500
    42.3 @@ -36,8 +36,6 @@ static void __init clear_bss(void)
    42.4  }
    42.5  #endif
    42.6  
    42.7 -extern char x86_boot_params[2048];
    42.8 -
    42.9  #define NEW_CL_POINTER		0x228	/* Relative to real mode data */
   42.10  #define OLD_CL_MAGIC_ADDR	0x90020
   42.11  #define OLD_CL_MAGIC            0xA33F
   42.12 @@ -52,7 +50,7 @@ static void __init copy_bootdata(char *r
   42.13  	int new_data;
   42.14  	char * command_line;
   42.15  
   42.16 -	memcpy(x86_boot_params, real_mode_data, 2048); 
   42.17 +	memcpy(x86_boot_params, real_mode_data, BOOT_PARAM_SIZE);
   42.18  	new_data = *(int *) (x86_boot_params + NEW_CL_POINTER);
   42.19  	if (!new_data) {
   42.20  		if (OLD_CL_MAGIC != * (u16 *) OLD_CL_MAGIC_ADDR) {
   42.21 @@ -105,9 +103,6 @@ void __init x86_64_start_kernel(char * r
   42.22  #ifdef CONFIG_SMP
   42.23  	cpu_set(0, cpu_online_map);
   42.24  #endif
   42.25 -	/* default console: */
   42.26 -	if (!strstr(saved_command_line, "console="))
   42.27 -		strcat(saved_command_line, " console=tty0"); 
   42.28  #if 0
   42.29  	s = strstr(saved_command_line, "earlyprintk=");
   42.30  	if (s != NULL)
    43.1 --- a/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/io_apic.c	Mon Jul 11 09:29:56 2005 -0500
    43.2 +++ b/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/io_apic.c	Mon Jul 11 09:35:19 2005 -0500
    43.3 @@ -37,11 +37,14 @@
    43.4  #include <asm/desc.h>
    43.5  #include <asm/proto.h>
    43.6  #include <asm/mach_apic.h>
    43.7 +#include <asm/acpi.h>
    43.8  
    43.9  #define __apicdebuginit  __init
   43.10  
   43.11  int sis_apic_bug; /* not actually supported, dummy for compile */
   43.12  
   43.13 +static int no_timer_check;
   43.14 +
   43.15  static DEFINE_SPINLOCK(ioapic_lock);
   43.16  
   43.17  /*
   43.18 @@ -192,7 +195,7 @@ static void unmask_IO_APIC_irq (unsigned
   43.19  	spin_unlock_irqrestore(&ioapic_lock, flags);
   43.20  }
   43.21  
   43.22 -void clear_IO_APIC_pin(unsigned int apic, unsigned int pin)
   43.23 +static void clear_IO_APIC_pin(unsigned int apic, unsigned int pin)
   43.24  {
   43.25  	struct IO_APIC_route_entry entry;
   43.26  	unsigned long flags;
   43.27 @@ -232,8 +235,8 @@ static void clear_IO_APIC (void)
   43.28   */
   43.29  
   43.30  #define MAX_PIRQS 8
   43.31 -int pirq_entries [MAX_PIRQS];
   43.32 -int pirqs_enabled;
   43.33 +static int pirq_entries [MAX_PIRQS];
   43.34 +static int pirqs_enabled;
   43.35  int skip_ioapic_setup;
   43.36  int ioapic_force;
   43.37  
   43.38 @@ -758,7 +761,7 @@ static inline void ioapic_register_intr(
   43.39  #define ioapic_register_intr(_irq,_vector,_trigger) ((void)0)
   43.40  #endif /* !CONFIG_XEN */
   43.41  
   43.42 -void __init setup_IO_APIC_irqs(void)
   43.43 +static void __init setup_IO_APIC_irqs(void)
   43.44  {
   43.45  	struct IO_APIC_route_entry entry;
   43.46  	int apic, pin, idx, irq, first_notcon = 1, vector;
   43.47 @@ -828,7 +831,7 @@ void __init setup_IO_APIC_irqs(void)
   43.48   * Set up the 8259A-master output pin as broadcast to all
   43.49   * CPUs.
   43.50   */
   43.51 -void __init setup_ExtINT_IRQ0_pin(unsigned int pin, int vector)
   43.52 +static void __init setup_ExtINT_IRQ0_pin(unsigned int pin, int vector)
   43.53  {
   43.54  	struct IO_APIC_route_entry entry;
   43.55  	unsigned long flags;
   43.56 @@ -1000,6 +1003,8 @@ void __apicdebuginit print_IO_APIC(void)
   43.57  	return;
   43.58  }
   43.59  
   43.60 +#if 0
   43.61 +
   43.62  static __apicdebuginit void print_APIC_bitfield (int base)
   43.63  {
   43.64  	unsigned int v;
   43.65 @@ -1141,6 +1146,8 @@ void __apicdebuginit print_PIC(void)
   43.66  	v = inb(0x4d1) << 8 | inb(0x4d0);
   43.67  	printk(KERN_DEBUG "... PIC ELCR: %04x\n", v);
   43.68  }
   43.69 +#endif  /*  0  */
   43.70 +
   43.71  #else
   43.72  void __init print_IO_APIC(void) { }
   43.73  #endif /* !CONFIG_XEN */
   43.74 @@ -1661,13 +1668,12 @@ static inline void check_timer(void)
   43.75  		 * Ok, does IRQ0 through the IOAPIC work?
   43.76  		 */
   43.77  		unmask_IO_APIC_irq(0);
   43.78 -		if (timer_irq_works()) {
   43.79 +		if (!no_timer_check && timer_irq_works()) {
   43.80  			nmi_watchdog_default();
   43.81  			if (nmi_watchdog == NMI_IO_APIC) {
   43.82  				disable_8259A_irq(0);
   43.83  				setup_nmi();
   43.84  				enable_8259A_irq(0);
   43.85 -				check_nmi_watchdog();
   43.86  			}
   43.87  			return;
   43.88  		}
   43.89 @@ -1687,7 +1693,6 @@ static inline void check_timer(void)
   43.90  			nmi_watchdog_default();
   43.91  			if (nmi_watchdog == NMI_IO_APIC) {
   43.92  				setup_nmi();
   43.93 -				check_nmi_watchdog();
   43.94  			}
   43.95  			return;
   43.96  		}
   43.97 @@ -1736,6 +1741,13 @@ static inline void check_timer(void)
   43.98  #define check_timer() ((void)0)
   43.99  #endif /* !CONFIG_XEN */
  43.100  
  43.101 +static int __init notimercheck(char *s)
  43.102 +{
  43.103 +	no_timer_check = 1;
  43.104 +	return 1;
  43.105 +}
  43.106 +__setup("no_timer_check", notimercheck);
  43.107 +
  43.108  /*
  43.109   *
  43.110   * IRQ's that are handled by the PIC in the MPS IOAPIC case.
  43.111 @@ -1777,7 +1789,7 @@ struct sysfs_ioapic_data {
  43.112  };
  43.113  static struct sysfs_ioapic_data * mp_ioapic_data[MAX_IO_APICS];
  43.114  
  43.115 -static int ioapic_suspend(struct sys_device *dev, u32 state)
  43.116 +static int ioapic_suspend(struct sys_device *dev, pm_message_t state)
  43.117  {
  43.118  	struct IO_APIC_route_entry *entry;
  43.119  	struct sysfs_ioapic_data *data;
  43.120 @@ -1871,78 +1883,6 @@ device_initcall(ioapic_init_sysfs);
  43.121  
  43.122  #define IO_APIC_MAX_ID		0xFE
  43.123  
  43.124 -int __init io_apic_get_unique_id (int ioapic, int apic_id)
  43.125 -{
  43.126 -#ifndef CONFIG_XEN
  43.127 -	union IO_APIC_reg_00 reg_00;
  43.128 -	static physid_mask_t apic_id_map;
  43.129 -	unsigned long flags;
  43.130 -	int i = 0;
  43.131 -
  43.132 -	/*
  43.133 -	 * The P4 platform supports up to 256 APIC IDs on two separate APIC 
  43.134 -	 * buses (one for LAPICs, one for IOAPICs), where predecessors only 
  43.135 -	 * supports up to 16 on one shared APIC bus.
  43.136 -	 * 
  43.137 -	 * TBD: Expand LAPIC/IOAPIC support on P4-class systems to take full
  43.138 -	 *      advantage of new APIC bus architecture.
  43.139 -	 */
  43.140 -
  43.141 -	if (physids_empty(apic_id_map))
  43.142 -		apic_id_map = phys_cpu_present_map;
  43.143 -
  43.144 -	spin_lock_irqsave(&ioapic_lock, flags);
  43.145 -	reg_00.raw = io_apic_read(ioapic, 0);
  43.146 -	spin_unlock_irqrestore(&ioapic_lock, flags);
  43.147 -
  43.148 -	if (apic_id >= IO_APIC_MAX_ID) {
  43.149 -		apic_printk(APIC_QUIET, KERN_WARNING "IOAPIC[%d]: Invalid apic_id %d, trying "
  43.150 -			"%d\n", ioapic, apic_id, reg_00.bits.ID);
  43.151 -		apic_id = reg_00.bits.ID;
  43.152 -	}
  43.153 -
  43.154 -	/*
  43.155 -	 * Every APIC in a system must have a unique ID or we get lots of nice 
  43.156 -	 * 'stuck on smp_invalidate_needed IPI wait' messages.
  43.157 -	 */
  43.158 -	if (physid_isset(apic_id, apic_id_map)) {
  43.159 -
  43.160 -		for (i = 0; i < IO_APIC_MAX_ID; i++) {
  43.161 -			if (!physid_isset(i, apic_id_map))
  43.162 -				break;
  43.163 -		}
  43.164 -
  43.165 -		if (i == IO_APIC_MAX_ID)
  43.166 -			panic("Max apic_id exceeded!\n");
  43.167 -
  43.168 -		apic_printk(APIC_VERBOSE, KERN_WARNING "IOAPIC[%d]: apic_id %d already used, "
  43.169 -			"trying %d\n", ioapic, apic_id, i);
  43.170 -
  43.171 -		apic_id = i;
  43.172 -	} 
  43.173 -
  43.174 -	physid_set(apic_id, apic_id_map);
  43.175 -
  43.176 -	if (reg_00.bits.ID != apic_id) {
  43.177 -		reg_00.bits.ID = apic_id;
  43.178 -
  43.179 -		spin_lock_irqsave(&ioapic_lock, flags);
  43.180 -		io_apic_write(ioapic, 0, reg_00.raw);
  43.181 -		reg_00.raw = io_apic_read(ioapic, 0);
  43.182 -		spin_unlock_irqrestore(&ioapic_lock, flags);
  43.183 -
  43.184 -		/* Sanity check */
  43.185 -		if (reg_00.bits.ID != apic_id)
  43.186 -			panic("IOAPIC[%d]: Unable change apic_id!\n", ioapic);
  43.187 -	}
  43.188 -
  43.189 -	apic_printk(APIC_VERBOSE,KERN_INFO "IOAPIC[%d]: Assigned apic_id %d\n", ioapic, apic_id);
  43.190 -#endif /* !CONFIG_XEN */
  43.191 -
  43.192 -	return apic_id;
  43.193 -}
  43.194 -
  43.195 -
  43.196  int __init io_apic_get_version (int ioapic)
  43.197  {
  43.198  	union IO_APIC_reg_01	reg_01;
    44.1 --- a/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/ioport.c	Mon Jul 11 09:29:56 2005 -0500
    44.2 +++ b/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/ioport.c	Mon Jul 11 09:35:19 2005 -0500
    44.3 @@ -25,8 +25,7 @@
    44.4   *
    44.5   */
    44.6  
    44.7 -// asmlinkage long sys_iopl(unsigned int level, struct pt_regs *regs)
    44.8 -asmlinkage long sys_iopl(unsigned int new_io_pl)
    44.9 +asmlinkage long sys_iopl(unsigned int new_io_pl, struct pt_regs *regs)
   44.10  {
   44.11          unsigned int old_io_pl = current->thread.io_pl;
   44.12          physdev_op_t op;
   44.13 @@ -59,5 +58,5 @@ asmlinkage long sys_iopl(unsigned int ne
   44.14   */
   44.15  asmlinkage long sys_ioperm(unsigned long from, unsigned long num, int turn_on)
   44.16  {
   44.17 -  return turn_on ? sys_iopl(3) : 0;
   44.18 +  return turn_on ? sys_iopl(3, NULL) : 0;
   44.19  }
    45.1 --- a/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/mpparse.c	Mon Jul 11 09:29:56 2005 -0500
    45.2 +++ b/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/mpparse.c	Mon Jul 11 09:35:19 2005 -0500
    45.3 @@ -30,6 +30,7 @@
    45.4  #include <asm/pgalloc.h>
    45.5  #include <asm/io_apic.h>
    45.6  #include <asm/proto.h>
    45.7 +#include <asm/acpi.h>
    45.8  
    45.9  /* Have we found an MP table */
   45.10  int smp_found_config;
   45.11 @@ -46,7 +47,7 @@ unsigned char mp_bus_id_to_type [MAX_MP_
   45.12  int mp_bus_id_to_pci_bus [MAX_MP_BUSSES] = { [0 ... MAX_MP_BUSSES-1] = -1 };
   45.13  cpumask_t pci_bus_to_cpumask [256] = { [0 ... 255] = CPU_MASK_ALL };
   45.14  
   45.15 -int mp_current_pci_id = 0;
   45.16 +static int mp_current_pci_id = 0;
   45.17  /* I/O APIC entries */
   45.18  struct mpc_config_ioapic mp_ioapics[MAX_IO_APICS];
   45.19  
   45.20 @@ -108,6 +109,7 @@ static int __init mpf_checksum(unsigned 
   45.21  static void __init MP_processor_info (struct mpc_config_processor *m)
   45.22  {
   45.23  	int ver;
   45.24 +	static int found_bsp=0;
   45.25  
   45.26  	if (!(m->mpc_cpuflag & CPU_ENABLED))
   45.27  		return;
   45.28 @@ -127,11 +129,6 @@ static void __init MP_processor_info (st
   45.29  			" Processor ignored.\n", NR_CPUS);
   45.30  		return;
   45.31  	}
   45.32 -	if (num_processors >= maxcpus) {
   45.33 -		printk(KERN_WARNING "WARNING: maxcpus limit of %i reached."
   45.34 -			" Processor ignored.\n", maxcpus);
   45.35 -		return;
   45.36 -	}
   45.37  
   45.38  	num_processors++;
   45.39  
   45.40 @@ -151,7 +148,19 @@ static void __init MP_processor_info (st
   45.41  		ver = 0x10;
   45.42  	}
   45.43  	apic_version[m->mpc_apicid] = ver;
   45.44 -	bios_cpu_apicid[num_processors - 1] = m->mpc_apicid;
   45.45 + 	if (m->mpc_cpuflag & CPU_BOOTPROCESSOR) {
   45.46 + 		/*
   45.47 + 		 * bios_cpu_apicid is required to have processors listed
   45.48 + 		 * in same order as logical cpu numbers. Hence the first
   45.49 + 		 * entry is BSP, and so on.
   45.50 + 		 */
   45.51 + 		bios_cpu_apicid[0] = m->mpc_apicid;
   45.52 + 		x86_cpu_to_apicid[0] = m->mpc_apicid;
   45.53 + 		found_bsp = 1;
   45.54 + 	} else {
   45.55 + 		bios_cpu_apicid[num_processors - found_bsp] = m->mpc_apicid;
   45.56 + 		x86_cpu_to_apicid[num_processors - found_bsp] = m->mpc_apicid;
   45.57 + 	}
   45.58  }
   45.59  #else
   45.60  void __init MP_processor_info (struct mpc_config_processor *m)
   45.61 @@ -714,7 +723,7 @@ void __init mp_register_lapic (
   45.62  #define MP_ISA_BUS		0
   45.63  #define MP_MAX_IOAPIC_PIN	127
   45.64  
   45.65 -struct mp_ioapic_routing {
   45.66 +static struct mp_ioapic_routing {
   45.67  	int			apic_id;
   45.68  	int			gsi_start;
   45.69  	int			gsi_end;
   45.70 @@ -764,7 +773,7 @@ void __init mp_register_ioapic (
   45.71  	mp_ioapics[idx].mpc_flags = MPC_APIC_USABLE;
   45.72  	mp_ioapics[idx].mpc_apicaddr = address;
   45.73  
   45.74 -	mp_ioapics[idx].mpc_apicid = io_apic_get_unique_id(idx, id);
   45.75 +	mp_ioapics[idx].mpc_apicid = id;
   45.76  	mp_ioapics[idx].mpc_apicver = io_apic_get_version(idx);
   45.77  	
   45.78  	/* 
    46.1 --- a/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/process.c	Mon Jul 11 09:29:56 2005 -0500
    46.2 +++ b/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/process.c	Mon Jul 11 09:35:19 2005 -0500
    46.3 @@ -37,6 +37,7 @@
    46.4  #include <linux/irq.h>
    46.5  #include <linux/ptrace.h>
    46.6  #include <linux/utsname.h>
    46.7 +#include <linux/random.h>
    46.8  
    46.9  #include <asm/uaccess.h>
   46.10  #include <asm/pgtable.h>
   46.11 @@ -59,7 +60,7 @@ asmlinkage extern void ret_from_fork(voi
   46.12  
   46.13  unsigned long kernel_thread_flags = CLONE_VM | CLONE_UNTRACED;
   46.14  
   46.15 -atomic_t hlt_counter = ATOMIC_INIT(0);
   46.16 +static atomic_t hlt_counter = ATOMIC_INIT(0);
   46.17  
   46.18  unsigned long boot_option_idle_override = 0;
   46.19  EXPORT_SYMBOL(boot_option_idle_override);
   46.20 @@ -68,7 +69,7 @@ EXPORT_SYMBOL(boot_option_idle_override)
   46.21   * Powermanagement idle function, if any..
   46.22   */
   46.23  void (*pm_idle)(void);
   46.24 -static cpumask_t cpu_idle_map;
   46.25 +static DEFINE_PER_CPU(unsigned int, cpu_idle_state);
   46.26  
   46.27  void disable_hlt(void)
   46.28  {
   46.29 @@ -140,8 +141,8 @@ void cpu_idle (void)
   46.30  	/* endless idle loop with no priority at all */
   46.31  	while (1) {
   46.32  		while (!need_resched()) {
   46.33 -			if (cpu_isset(cpu, cpu_idle_map))
   46.34 -				cpu_clear(cpu, cpu_idle_map);
   46.35 +			if (__get_cpu_var(cpu_idle_state))
   46.36 +				__get_cpu_var(cpu_idle_state) = 0;
   46.37  			rmb();
   46.38  			
   46.39  			if (cpu_is_offline(cpu))
   46.40 @@ -150,22 +151,35 @@ void cpu_idle (void)
   46.41                          __IRQ_STAT(cpu,idle_timestamp) = jiffies;
   46.42  			xen_idle();
   46.43  		}
   46.44 +
   46.45  		schedule();
   46.46  	}
   46.47  }
   46.48  
   46.49  void cpu_idle_wait(void)
   46.50  {
   46.51 -	int cpu;
   46.52 +	unsigned int cpu, this_cpu = get_cpu();
   46.53  	cpumask_t map;
   46.54  
   46.55 -	for_each_online_cpu(cpu)
   46.56 -		cpu_set(cpu, cpu_idle_map);
   46.57 +	set_cpus_allowed(current, cpumask_of_cpu(this_cpu));
   46.58 +	put_cpu();
   46.59 +
   46.60 + 	cpus_clear(map);
   46.61 +	for_each_online_cpu(cpu) {
   46.62 +		per_cpu(cpu_idle_state, cpu) = 1;
   46.63 +		cpu_set(cpu, map);
   46.64 +	}
   46.65 +
   46.66 +	__get_cpu_var(cpu_idle_state) = 0;
   46.67  
   46.68  	wmb();
   46.69  	do {
   46.70  		ssleep(1);
   46.71 -		cpus_and(map, cpu_idle_map, cpu_online_map);
   46.72 +		for_each_online_cpu(cpu) {
   46.73 +			if (cpu_isset(cpu, map) && !per_cpu(cpu_idle_state, cpu))
   46.74 +				cpu_clear(cpu, map);
   46.75 +		}
   46.76 +		cpus_and(map, map, cpu_online_map);
   46.77  	} while (!cpus_empty(map));
   46.78  }
   46.79  EXPORT_SYMBOL_GPL(cpu_idle_wait);
   46.80 @@ -199,11 +213,11 @@ void __show_regs(struct pt_regs * regs)
   46.81  	printk("R13: %016lx R14: %016lx R15: %016lx\n",
   46.82  	       regs->r13, regs->r14, regs->r15); 
   46.83  
   46.84 -	asm("movl %%ds,%0" : "=r" (ds)); 
   46.85 -	asm("movl %%cs,%0" : "=r" (cs)); 
   46.86 -	asm("movl %%es,%0" : "=r" (es)); 
   46.87 -	asm("movl %%fs,%0" : "=r" (fsindex));
   46.88 -	asm("movl %%gs,%0" : "=r" (gsindex));
   46.89 +	asm("mov %%ds,%0" : "=r" (ds)); 
   46.90 +	asm("mov %%cs,%0" : "=r" (cs)); 
   46.91 +	asm("mov %%es,%0" : "=r" (es)); 
   46.92 +	asm("mov %%fs,%0" : "=r" (fsindex));
   46.93 +	asm("mov %%gs,%0" : "=r" (gsindex));
   46.94  
   46.95  	rdmsrl(MSR_FS_BASE, fs);
   46.96  	rdmsrl(MSR_GS_BASE, gs); 
   46.97 @@ -343,10 +357,10 @@ int copy_thread(int nr, unsigned long cl
   46.98  	p->thread.fs = me->thread.fs;
   46.99  	p->thread.gs = me->thread.gs;
  46.100  
  46.101 -	asm("movl %%gs,%0" : "=m" (p->thread.gsindex));
  46.102 -	asm("movl %%fs,%0" : "=m" (p->thread.fsindex));
  46.103 -	asm("movl %%es,%0" : "=m" (p->thread.es));
  46.104 -	asm("movl %%ds,%0" : "=m" (p->thread.ds));
  46.105 +	asm("mov %%gs,%0" : "=m" (p->thread.gsindex));
  46.106 +	asm("mov %%fs,%0" : "=m" (p->thread.fsindex));
  46.107 +	asm("mov %%es,%0" : "=m" (p->thread.es));
  46.108 +	asm("mov %%ds,%0" : "=m" (p->thread.ds));
  46.109  
  46.110  	if (unlikely(me->thread.io_bitmap_ptr != NULL)) { 
  46.111  		p->thread.io_bitmap_ptr = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL);
  46.112 @@ -651,7 +665,7 @@ long do_arch_prctl(struct task_struct *t
  46.113  			set_32bit_tls(task, FS_TLS, addr);
  46.114  			if (doit) { 
  46.115  				load_TLS(&task->thread, cpu); 
  46.116 -				asm volatile("movl %0,%%fs" :: "r" (FS_TLS_SEL));
  46.117 +				asm volatile("mov %0,%%fs" :: "r" (FS_TLS_SEL));
  46.118  			}
  46.119  			task->thread.fsindex = FS_TLS_SEL;
  46.120  			task->thread.fs = 0;
  46.121 @@ -661,7 +675,7 @@ long do_arch_prctl(struct task_struct *t
  46.122  			if (doit) {
  46.123  				/* set the selector to 0 to not confuse
  46.124  				   __switch_to */
  46.125 -		asm volatile("movl %0,%%fs" :: "r" (0));
  46.126 +		asm volatile("mov %0,%%fs" :: "r" (0));
  46.127                                  ret = HYPERVISOR_set_segment_base(SEGBASE_FS, addr);
  46.128  
  46.129  			}
  46.130 @@ -723,3 +737,10 @@ int dump_task_regs(struct task_struct *t
  46.131          boot_option_idle_override = 1;
  46.132  	return 1;
  46.133  }
  46.134 +
  46.135 +unsigned long arch_align_stack(unsigned long sp)
  46.136 +{
  46.137 +	if (randomize_va_space)
  46.138 +		sp -= get_random_int() % 8192;
  46.139 +	return sp & ~0xf;
  46.140 +}
    47.1 --- a/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/setup.c	Mon Jul 11 09:29:56 2005 -0500
    47.2 +++ b/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/setup.c	Mon Jul 11 09:35:19 2005 -0500
    47.3 @@ -86,7 +86,6 @@ unsigned long xen_override_max_pfn;
    47.4  struct cpuinfo_x86 boot_cpu_data;
    47.5  
    47.6  unsigned long mmu_cr4_features;
    47.7 -EXPORT_SYMBOL_GPL(mmu_cr4_features);
    47.8  
    47.9  int acpi_disabled;
   47.10  EXPORT_SYMBOL(acpi_disabled);
   47.11 @@ -98,9 +97,6 @@ int __initdata acpi_force = 0;
   47.12  
   47.13  int acpi_numa __initdata;
   47.14  
   47.15 -/* For PCI or other memory-mapped resources */
   47.16 -unsigned long pci_mem_start = 0x10000000;
   47.17 -
   47.18  /* Boot loader ID as an integer, for the benefit of proc_dointvec */
   47.19  int bootloader_type;
   47.20  
   47.21 @@ -124,8 +120,6 @@ struct sys_desc_table_struct {
   47.22  struct edid_info edid_info;
   47.23  struct e820map e820;
   47.24  
   47.25 -unsigned char aux_device_present;
   47.26 -
   47.27  extern int root_mountflags;
   47.28  extern char _text, _etext, _edata, _end;
   47.29  
   47.30 @@ -377,6 +371,10 @@ static __init void parse_cmdline_early (
   47.31  		else if (!memcmp(from, "acpi=strict", 11)) {
   47.32  			acpi_strict = 1;
   47.33  		}
   47.34 +#ifdef CONFIG_X86_IO_APIC
   47.35 +		else if (!memcmp(from, "acpi_skip_timer_override", 24))
   47.36 +			acpi_skip_timer_override = 1;
   47.37 +#endif
   47.38  #endif
   47.39  
   47.40  #if 0
   47.41 @@ -603,7 +601,6 @@ static void __init print_memory_map(char
   47.42  
   47.43  void __init setup_arch(char **cmdline_p)
   47.44  {
   47.45 -	unsigned long low_mem_size;
   47.46  	int i, j;
   47.47  	physdev_op_t op;
   47.48  
   47.49 @@ -618,7 +615,6 @@ void __init setup_arch(char **cmdline_p)
   47.50   	screen_info = SCREEN_INFO;
   47.51  #endif
   47.52  	edid_info = EDID_INFO;
   47.53 -	aux_device_present = AUX_DEVICE_INFO;
   47.54  	saved_video_mode = SAVED_VIDEO_MODE;
   47.55  	bootloader_type = LOADER_TYPE;
   47.56  
   47.57 @@ -821,13 +817,7 @@ void __init setup_arch(char **cmdline_p)
   47.58  		request_resource(&ioport_resource, &standard_io_resources[i]);
   47.59  	}
   47.60  
   47.61 -	/* Will likely break when you have unassigned resources with more
   47.62 -	   than 4GB memory and bridges that don't support more than 4GB. 
   47.63 -	   Doing it properly would require to use pci_alloc_consistent
   47.64 -	   in this case. */
   47.65 -	low_mem_size = ((end_pfn << PAGE_SHIFT) + 0xfffff) & ~0xfffff;
   47.66 -	if (low_mem_size > pci_mem_start)
   47.67 -		pci_mem_start = low_mem_size;
   47.68 +	e820_setup_gap();
   47.69  
   47.70  #ifdef CONFIG_GART_IOMMU
   47.71         iommu_hole_init();
   47.72 @@ -867,7 +857,7 @@ static int __init get_model_name(struct 
   47.73  {
   47.74  	unsigned int *v;
   47.75  
   47.76 -	if (c->x86_cpuid_level < 0x80000004)
   47.77 +	if (c->extended_cpuid_level < 0x80000004)
   47.78  		return 0;
   47.79  
   47.80  	v = (unsigned int *) c->x86_model_id;
   47.81 @@ -883,7 +873,7 @@ static void __init display_cacheinfo(str
   47.82  {
   47.83  	unsigned int n, dummy, eax, ebx, ecx, edx;
   47.84  
   47.85 -	n = c->x86_cpuid_level;
   47.86 +	n = c->extended_cpuid_level;
   47.87  
   47.88  	if (n >= 0x80000005) {
   47.89  		cpuid(0x80000005, &dummy, &ebx, &ecx, &edx);
   47.90 @@ -913,14 +903,50 @@ static void __init display_cacheinfo(str
   47.91  	}
   47.92  }
   47.93  
   47.94 +/*
   47.95 + * On a AMD dual core setup the lower bits of the APIC id distingush the cores.
   47.96 + * Assumes number of cores is a power of two.
   47.97 + */
   47.98 +static void __init amd_detect_cmp(struct cpuinfo_x86 *c)
   47.99 +{
  47.100 +#ifdef CONFIG_SMP
  47.101 +	int cpu = smp_processor_id();
  47.102 +	int node = 0;
  47.103 +	unsigned bits;
  47.104 +	if (c->x86_num_cores == 1)
  47.105 +		return;
  47.106 +
  47.107 +	bits = 0;
  47.108 +	while ((1 << bits) < c->x86_num_cores)
  47.109 +		bits++;
  47.110 +
  47.111 +	/* Low order bits define the core id (index of core in socket) */
  47.112 +	cpu_core_id[cpu] = phys_proc_id[cpu] & ((1 << bits)-1);
  47.113 +	/* Convert the APIC ID into the socket ID */
  47.114 +	phys_proc_id[cpu] >>= bits;
  47.115 +
  47.116 +#ifdef CONFIG_NUMA
  47.117 +	/* When an ACPI SRAT table is available use the mappings from SRAT
  47.118 + 	   instead. */
  47.119 +	if (acpi_numa <= 0) {
  47.120 +		node = phys_proc_id[cpu];
  47.121 +		if (!node_online(node))
  47.122 +			node = first_node(node_online_map);
  47.123 +		cpu_to_node[cpu] = node;
  47.124 +	} else {
  47.125 +		node = cpu_to_node[cpu];
  47.126 +	}
  47.127 +#endif
  47.128 +
  47.129 +	printk(KERN_INFO "CPU %d(%d) -> Node %d -> Core %d\n",
  47.130 +			cpu, c->x86_num_cores, node, cpu_core_id[cpu]);
  47.131 +#endif
  47.132 +}
  47.133  
  47.134  static int __init init_amd(struct cpuinfo_x86 *c)
  47.135  {
  47.136  	int r;
  47.137  	int level;
  47.138 -#ifdef CONFIG_NUMA
  47.139 -	int cpu;
  47.140 -#endif
  47.141  
  47.142  	/* Bit 31 in normal CPUID used for nonstandard 3DNow ID;
  47.143  	   3DNow is IDd by bit 31 in extended CPUID (1*32+31) anyway */
  47.144 @@ -943,26 +969,12 @@ static int __init init_amd(struct cpuinf
  47.145  	} 
  47.146  	display_cacheinfo(c);
  47.147  
  47.148 -	if (c->x86_cpuid_level >= 0x80000008) {
  47.149 +	if (c->extended_cpuid_level >= 0x80000008) {
  47.150  		c->x86_num_cores = (cpuid_ecx(0x80000008) & 0xff) + 1;
  47.151  		if (c->x86_num_cores & (c->x86_num_cores - 1))
  47.152  			c->x86_num_cores = 1;
  47.153  
  47.154 -#ifdef CONFIG_NUMA
  47.155 -		/* On a dual core setup the lower bits of apic id
  47.156 -		   distingush the cores. Fix up the CPU<->node mappings
  47.157 -		   here based on that.
  47.158 -		   Assumes number of cores is a power of two.
  47.159 -		   When using SRAT use mapping from SRAT. */
  47.160 -		cpu = c->x86_apicid;
  47.161 -		if (acpi_numa <= 0 && c->x86_num_cores > 1) {
  47.162 -			cpu_to_node[cpu] = cpu >> hweight32(c->x86_num_cores - 1);
  47.163 -			if (!node_online(cpu_to_node[cpu]))
  47.164 -				cpu_to_node[cpu] = first_node(node_online_map);
  47.165 -		}
  47.166 -		printk(KERN_INFO "CPU %d(%d) -> Node %d\n",
  47.167 -				cpu, c->x86_num_cores, cpu_to_node[cpu]);
  47.168 -#endif
  47.169 +		amd_detect_cmp(c);
  47.170  	}
  47.171  
  47.172  	return r;
  47.173 @@ -972,10 +984,10 @@ static void __init detect_ht(struct cpui
  47.174  {
  47.175  #ifdef CONFIG_SMP
  47.176  	u32 	eax, ebx, ecx, edx;
  47.177 -	int 	index_lsb, index_msb, tmp;
  47.178 +	int 	index_msb, tmp;
  47.179  	int 	cpu = smp_processor_id();
  47.180  	
  47.181 -	if (!cpu_has(c, X86_FEATURE_HT))
  47.182 +	if (!cpu_has(c, X86_FEATURE_HT) || cpu_has(c, X86_FEATURE_CMP_LEGACY))
  47.183  		return;
  47.184  
  47.185  	cpuid(1, &eax, &ebx, &ecx, &edx);
  47.186 @@ -984,7 +996,6 @@ static void __init detect_ht(struct cpui
  47.187  	if (smp_num_siblings == 1) {
  47.188  		printk(KERN_INFO  "CPU: Hyper-Threading is disabled\n");
  47.189  	} else if (smp_num_siblings > 1) {
  47.190 -		index_lsb = 0;
  47.191  		index_msb = 31;
  47.192  		/*
  47.193  		 * At this point we only support two siblings per
  47.194 @@ -996,45 +1007,65 @@ static void __init detect_ht(struct cpui
  47.195  			return;
  47.196  		}
  47.197  		tmp = smp_num_siblings;
  47.198 -		while ((tmp & 1) == 0) {
  47.199 -			tmp >>=1 ;
  47.200 -			index_lsb++;
  47.201 -		}
  47.202 -		tmp = smp_num_siblings;
  47.203  		while ((tmp & 0x80000000 ) == 0) {
  47.204  			tmp <<=1 ;
  47.205  			index_msb--;
  47.206  		}
  47.207 -		if (index_lsb != index_msb )
  47.208 +		if (smp_num_siblings & (smp_num_siblings - 1))
  47.209  			index_msb++;
  47.210  		phys_proc_id[cpu] = phys_pkg_id(index_msb);
  47.211  		
  47.212  		printk(KERN_INFO  "CPU: Physical Processor ID: %d\n",
  47.213  		       phys_proc_id[cpu]);
  47.214 +
  47.215 +		smp_num_siblings = smp_num_siblings / c->x86_num_cores;
  47.216 +
  47.217 +		tmp = smp_num_siblings;
  47.218 +		index_msb = 31;
  47.219 +		while ((tmp & 0x80000000) == 0) {
  47.220 +			tmp <<=1 ;
  47.221 +			index_msb--;
  47.222 +		}
  47.223 +		if (smp_num_siblings & (smp_num_siblings - 1))
  47.224 +			index_msb++;
  47.225 +
  47.226 +		cpu_core_id[cpu] = phys_pkg_id(index_msb);
  47.227 +
  47.228 +		if (c->x86_num_cores > 1)
  47.229 +			printk(KERN_INFO  "CPU: Processor Core ID: %d\n",
  47.230 +			       cpu_core_id[cpu]);
  47.231  	}
  47.232  #endif
  47.233  }
  47.234  
  47.235 -static void __init sched_cmp_hack(struct cpuinfo_x86 *c)
  47.236 +/*
  47.237 + * find out the number of processor cores on the die
  47.238 + */
  47.239 +static int __init intel_num_cpu_cores(struct cpuinfo_x86 *c)
  47.240  {
  47.241 -#ifdef CONFIG_SMP
  47.242 -	/* AMD dual core looks like HT but isn't really. Hide it from the
  47.243 -	   scheduler. This works around problems with the domain scheduler.
  47.244 -	   Also probably gives slightly better scheduling and disables
  47.245 -	   SMT nice which is harmful on dual core.
  47.246 -	   TBD tune the domain scheduler for dual core. */
  47.247 -	if (c->x86_vendor == X86_VENDOR_AMD && cpu_has(c, X86_FEATURE_CMP_LEGACY))
  47.248 -		smp_num_siblings = 1;
  47.249 -#endif
  47.250 +	unsigned int eax;
  47.251 +
  47.252 +	if (c->cpuid_level < 4)
  47.253 +		return 1;
  47.254 +
  47.255 +	__asm__("cpuid"
  47.256 +		: "=a" (eax)
  47.257 +		: "0" (4), "c" (0)
  47.258 +		: "bx", "dx");
  47.259 +
  47.260 +	if (eax & 0x1f)
  47.261 +		return ((eax >> 26) + 1);
  47.262 +	else
  47.263 +		return 1;
  47.264  }
  47.265 -	
  47.266 +
  47.267  static void __init init_intel(struct cpuinfo_x86 *c)
  47.268  {
  47.269  	/* Cache sizes */
  47.270  	unsigned n;
  47.271  
  47.272  	init_intel_cacheinfo(c);
  47.273 -	n = c->x86_cpuid_level;
  47.274 +	n = c->extended_cpuid_level;
  47.275  	if (n >= 0x80000008) {
  47.276  		unsigned eax = cpuid_eax(0x80000008);
  47.277  		c->x86_virt_bits = (eax >> 8) & 0xff;
  47.278 @@ -1043,6 +1074,9 @@ static void __init init_intel(struct cpu
  47.279  
  47.280  	if (c->x86 == 15)
  47.281  		c->x86_cache_alignment = c->x86_clflush_size * 2;
  47.282 +	if (c->x86 >= 15)
  47.283 +		set_bit(X86_FEATURE_CONSTANT_TSC, &c->x86_capability);
  47.284 + 	c->x86_num_cores = intel_num_cpu_cores(c);
  47.285  }
  47.286  
  47.287  void __init get_cpu_vendor(struct cpuinfo_x86 *c)
  47.288 @@ -1079,8 +1113,7 @@ void __init early_identify_cpu(struct cp
  47.289  	c->x86_clflush_size = 64;
  47.290  	c->x86_cache_alignment = c->x86_clflush_size;
  47.291  	c->x86_num_cores = 1;
  47.292 -	c->x86_apicid = c == &boot_cpu_data ? 0 : c - cpu_data;
  47.293 -	c->x86_cpuid_level = 0;
  47.294 +	c->extended_cpuid_level = 0;
  47.295  	memset(&c->x86_capability, 0, sizeof c->x86_capability);
  47.296  
  47.297  	/* Get vendor name */
  47.298 @@ -1108,11 +1141,14 @@ void __init early_identify_cpu(struct cp
  47.299  		} 
  47.300  		if (c->x86_capability[0] & (1<<19)) 
  47.301  			c->x86_clflush_size = ((misc >> 8) & 0xff) * 8;
  47.302 -		c->x86_apicid = misc >> 24;
  47.303  	} else {
  47.304  		/* Have CPUID level 0 only - unheard of */
  47.305  		c->x86 = 4;
  47.306  	}
  47.307 +
  47.308 +#ifdef CONFIG_SMP
  47.309 +	phys_proc_id[smp_processor_id()] = (cpuid_ebx(1) >> 24) & 0xff;
  47.310 +#endif
  47.311  }
  47.312  
  47.313  /*
  47.314 @@ -1127,11 +1163,11 @@ void __init identify_cpu(struct cpuinfo_
  47.315  
  47.316  	/* AMD-defined flags: level 0x80000001 */
  47.317  	xlvl = cpuid_eax(0x80000000);
  47.318 -	c->x86_cpuid_level = xlvl;
  47.319 +	c->extended_cpuid_level = xlvl;
  47.320  	if ((xlvl & 0xffff0000) == 0x80000000) {
  47.321  		if (xlvl >= 0x80000001) {
  47.322  			c->x86_capability[1] = cpuid_edx(0x80000001);
  47.323 -			c->x86_capability[5] = cpuid_ecx(0x80000001);
  47.324 +			c->x86_capability[6] = cpuid_ecx(0x80000001);
  47.325  		}
  47.326  		if (xlvl >= 0x80000004)
  47.327  			get_model_name(c); /* Default name */
  47.328 @@ -1172,7 +1208,6 @@ void __init identify_cpu(struct cpuinfo_
  47.329  
  47.330  	select_idle_routine(c);
  47.331  	detect_ht(c); 
  47.332 -	sched_cmp_hack(c);
  47.333  
  47.334  	/*
  47.335  	 * On SMP, boot_cpu_data holds the common feature set between
  47.336 @@ -1231,7 +1266,7 @@ static int show_cpuinfo(struct seq_file 
  47.337  	        "fxsr", "sse", "sse2", "ss", "ht", "tm", "ia64", NULL,
  47.338  
  47.339  		/* AMD-defined */
  47.340 -		"pni", NULL, NULL, NULL, NULL, NULL, NULL, NULL,
  47.341 +		NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
  47.342  		NULL, NULL, NULL, "syscall", NULL, NULL, NULL, NULL,
  47.343  		NULL, NULL, NULL, NULL, "nx", NULL, "mmxext", NULL,
  47.344  		NULL, "fxsr_opt", NULL, NULL, NULL, "lm", "3dnowext", "3dnow",
  47.345 @@ -1243,7 +1278,8 @@ static int show_cpuinfo(struct seq_file 
  47.346  		NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
  47.347  
  47.348  		/* Other (Linux-defined) */
  47.349 -		"cxmmx", "k6_mtrr", "cyrix_arr", "centaur_mcr", NULL, NULL, NULL, NULL,
  47.350 +		"cxmmx", NULL, "cyrix_arr", "centaur_mcr", NULL,
  47.351 +		"constant_tsc", NULL, NULL,
  47.352  		NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
  47.353  		NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
  47.354  		NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
  47.355 @@ -1254,17 +1290,25 @@ static int show_cpuinfo(struct seq_file 
  47.356  		NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
  47.357  		NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
  47.358  
  47.359 +		/* VIA/Cyrix/Centaur-defined */
  47.360 +		NULL, NULL, "rng", "rng_en", NULL, NULL, "ace", "ace_en",
  47.361 +		NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
  47.362 +		NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
  47.363 +		NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
  47.364 +
  47.365  		/* AMD-defined (#2) */
  47.366  		"lahf_lm", "cmp_legacy", NULL, NULL, NULL, NULL, NULL, NULL,
  47.367  		NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
  47.368  		NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
  47.369 -		NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL
  47.370 +		NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
  47.371  	};
  47.372  	static char *x86_power_flags[] = { 
  47.373  		"ts",	/* temperature sensor */
  47.374  		"fid",  /* frequency id control */
  47.375  		"vid",  /* voltage id control */
  47.376  		"ttp",  /* thermal trip */
  47.377 +		"tm",
  47.378 +		"stc"
  47.379  	};
  47.380  
  47.381  
  47.382 @@ -1299,8 +1343,14 @@ static int show_cpuinfo(struct seq_file 
  47.383  		seq_printf(m, "cache size\t: %d KB\n", c->x86_cache_size);
  47.384  	
  47.385  #ifdef CONFIG_SMP
  47.386 -	seq_printf(m, "physical id\t: %d\n", phys_proc_id[c - cpu_data]);
  47.387 -	seq_printf(m, "siblings\t: %d\n", c->x86_num_cores * smp_num_siblings);
  47.388 +	if (smp_num_siblings * c->x86_num_cores > 1) {
  47.389 +		int cpu = c - cpu_data;
  47.390 +		seq_printf(m, "physical id\t: %d\n", phys_proc_id[cpu]);
  47.391 +		seq_printf(m, "siblings\t: %d\n",
  47.392 +				c->x86_num_cores * smp_num_siblings);
  47.393 +		seq_printf(m, "core id\t\t: %d\n", cpu_core_id[cpu]);
  47.394 +		seq_printf(m, "cpu cores\t: %d\n", c->x86_num_cores);
  47.395 +	}
  47.396  #endif	
  47.397  
  47.398  	seq_printf(m,
  47.399 @@ -1342,12 +1392,8 @@ static int show_cpuinfo(struct seq_file 
  47.400  					seq_printf(m, " [%d]", i);
  47.401  			}
  47.402  	}
  47.403 -	seq_printf(m, "\n");
  47.404  
  47.405 -	if (c->x86_num_cores > 1)
  47.406 -		seq_printf(m, "cpu cores\t: %d\n", c->x86_num_cores);
  47.407 -
  47.408 -	seq_printf(m, "\n\n"); 
  47.409 +	seq_printf(m, "\n\n");
  47.410  
  47.411  	return 0;
  47.412  }
    48.1 --- a/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/setup64.c	Mon Jul 11 09:29:56 2005 -0500
    48.2 +++ b/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/setup64.c	Mon Jul 11 09:35:19 2005 -0500
    48.3 @@ -16,6 +16,7 @@
    48.4  #include <linux/string.h>
    48.5  #include <linux/bootmem.h>
    48.6  #include <linux/bitops.h>
    48.7 +#include <asm/bootsetup.h>
    48.8  #include <asm/pda.h>
    48.9  #include <asm/pgtable.h>
   48.10  #include <asm/processor.h>
   48.11 @@ -32,7 +33,7 @@
   48.12  
   48.13  #include <asm-xen/hypervisor.h>
   48.14  
   48.15 -char x86_boot_params[2048] __initdata = {0,};
   48.16 +char x86_boot_params[BOOT_PARAM_SIZE] __initdata = {0,};
   48.17  
   48.18  cpumask_t cpu_initialized __initdata = CPU_MASK_NONE;
   48.19  
    49.1 --- a/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/signal.c	Mon Jul 11 09:29:56 2005 -0500
    49.2 +++ b/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/signal.c	Mon Jul 11 09:35:19 2005 -0500
    49.3 @@ -28,6 +28,7 @@
    49.4  #include <asm/uaccess.h>
    49.5  #include <asm/i387.h>
    49.6  #include <asm/proto.h>
    49.7 +#include <asm/ia32_unistd.h>
    49.8  
    49.9  /* #define DEBUG_SIG 1 */
   49.10  
   49.11 @@ -83,7 +84,7 @@ sys_sigaltstack(const stack_t __user *us
   49.12  
   49.13  struct rt_sigframe
   49.14  {
   49.15 -	char *pretcode;
   49.16 +	char __user *pretcode;
   49.17  	struct ucontext uc;
   49.18  	struct siginfo info;
   49.19  };
   49.20 @@ -121,7 +122,7 @@ restore_sigcontext(struct pt_regs *regs,
   49.21  		err |= __get_user(buf, &sc->fpstate);
   49.22  
   49.23  		if (buf) {
   49.24 -			if (verify_area(VERIFY_READ, buf, sizeof(*buf)))
   49.25 +			if (!access_ok(VERIFY_READ, buf, sizeof(*buf)))
   49.26  				goto badframe;
   49.27  			err |= restore_i387(buf);
   49.28  		} else {
   49.29 @@ -147,7 +148,7 @@ asmlinkage long sys_rt_sigreturn(struct 
   49.30  	unsigned long eax;
   49.31  
   49.32  	frame = (struct rt_sigframe __user *)(regs->rsp - 8);
   49.33 -	if (verify_area(VERIFY_READ, frame, sizeof(*frame))) { 
   49.34 +	if (!access_ok(VERIFY_READ, frame, sizeof(*frame))) {
   49.35  		goto badframe;
   49.36  	} 
   49.37  	if (__copy_from_user(&set, &frame->uc.uc_sigmask, sizeof(set))) { 
   49.38 @@ -160,9 +161,8 @@ asmlinkage long sys_rt_sigreturn(struct 
   49.39  	recalc_sigpending();
   49.40  	spin_unlock_irq(&current->sighand->siglock);
   49.41  	
   49.42 -	if (restore_sigcontext(regs, &frame->uc.uc_mcontext, &eax)) {
   49.43 +	if (restore_sigcontext(regs, &frame->uc.uc_mcontext, &eax))
   49.44  		goto badframe;
   49.45 -	} 
   49.46  
   49.47  #ifdef DEBUG_SIG
   49.48  	printk("%d sigreturn rip:%lx rsp:%lx frame:%p rax:%lx\n",current->pid,regs.rip,regs.rsp,frame,eax);
   49.49 @@ -186,7 +186,6 @@ static inline int
   49.50  setup_sigcontext(struct sigcontext __user *sc, struct pt_regs *regs, unsigned long mask, struct task_struct *me)
   49.51  {
   49.52  	int err = 0;
   49.53 -	unsigned long eflags;
   49.54  
   49.55  	err |= __put_user(0, &sc->gs);
   49.56  	err |= __put_user(0, &sc->fs);
   49.57 @@ -210,11 +209,7 @@ setup_sigcontext(struct sigcontext __use
   49.58  	err |= __put_user(me->thread.trap_no, &sc->trapno);
   49.59  	err |= __put_user(me->thread.error_code, &sc->err);
   49.60  	err |= __put_user(regs->rip, &sc->rip);
   49.61 -	eflags = regs->eflags;
   49.62 -	if (current->ptrace & PT_PTRACED) {
   49.63 -		eflags &= ~TF_MASK;
   49.64 -	}
   49.65 -	err |= __put_user(eflags, &sc->eflags);
   49.66 +	err |= __put_user(regs->eflags, &sc->eflags);
   49.67  	err |= __put_user(mask, &sc->oldmask);
   49.68  	err |= __put_user(me->thread.cr2, &sc->cr2);
   49.69  
   49.70 @@ -253,28 +248,25 @@ static void setup_rt_frame(int sig, stru
   49.71  
   49.72  	if (used_math()) {
   49.73  		fp = get_stack(ka, regs, sizeof(struct _fpstate)); 
   49.74 -		frame = (void __user *)round_down((unsigned long)fp - sizeof(struct rt_sigframe), 16) - 8;
   49.75 +		frame = (void __user *)round_down(
   49.76 +			(unsigned long)fp - sizeof(struct rt_sigframe), 16) - 8;
   49.77  
   49.78 -		if (!access_ok(VERIFY_WRITE, fp, sizeof(struct _fpstate))) { 
   49.79 -		goto give_sigsegv;
   49.80 -		}
   49.81 +		if (!access_ok(VERIFY_WRITE, fp, sizeof(struct _fpstate)))
   49.82 +			goto give_sigsegv;
   49.83  
   49.84  		if (save_i387(fp) < 0) 
   49.85  			err |= -1; 
   49.86 -	} else {
   49.87 +	} else
   49.88  		frame = get_stack(ka, regs, sizeof(struct rt_sigframe)) - 8;
   49.89 -	}
   49.90  
   49.91 -	if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame))) {
   49.92 +	if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame)))
   49.93  		goto give_sigsegv;
   49.94 -	}
   49.95  
   49.96  	if (ka->sa.sa_flags & SA_SIGINFO) { 
   49.97  		err |= copy_siginfo_to_user(&frame->info, info);
   49.98 -		if (err) { 
   49.99 +		if (err)
  49.100  			goto give_sigsegv;
  49.101  	}
  49.102 -	}
  49.103  		
  49.104  	/* Create the ucontext.  */
  49.105  	err |= __put_user(0, &frame->uc.uc_flags);
  49.106 @@ -288,9 +280,8 @@ static void setup_rt_frame(int sig, stru
  49.107  	if (sizeof(*set) == 16) { 
  49.108  		__put_user(set->sig[0], &frame->uc.uc_sigmask.sig[0]);
  49.109  		__put_user(set->sig[1], &frame->uc.uc_sigmask.sig[1]); 
  49.110 -	} else { 		
  49.111 -	err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set));
  49.112 -	}
  49.113 +	} else
  49.114 +		err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set));
  49.115  
  49.116  	/* Set up to return from userspace.  If provided, use a stub
  49.117  	   already in userspace.  */
  49.118 @@ -302,9 +293,8 @@ static void setup_rt_frame(int sig, stru
  49.119  		goto give_sigsegv; 
  49.120  	}
  49.121  
  49.122 -	if (err) { 
  49.123 +	if (err)
  49.124  		goto give_sigsegv;
  49.125 -	} 
  49.126  
  49.127  #ifdef DEBUG_SIG
  49.128  	printk("%d old rip %lx old rsp %lx old rax %lx\n", current->pid,regs->rip,regs->rsp,regs->rax);
  49.129 @@ -329,14 +319,9 @@ static void setup_rt_frame(int sig, stru
  49.130  	regs->rsp = (unsigned long)frame;
  49.131  
  49.132  	set_fs(USER_DS);
  49.133 -	if (regs->eflags & TF_MASK) {
  49.134 -		if ((current->ptrace & (PT_PTRACED | PT_DTRACE)) == (PT_PTRACED | PT_DTRACE)) {
  49.135 -			ptrace_notify(SIGTRAP);
  49.136 -		} else {
  49.137 -			regs->eflags &= ~TF_MASK;
  49.138 -		}
  49.139 -	}
  49.140 -
  49.141 +	regs->eflags &= ~TF_MASK;
  49.142 +	if (test_thread_flag(TIF_SINGLESTEP))
  49.143 +		ptrace_notify(SIGTRAP);
  49.144  #ifdef DEBUG_SIG
  49.145  	printk("SIG deliver (%s:%d): sp=%p pc=%p ra=%p\n",
  49.146  		current->comm, current->pid, frame, regs->rip, frame->pretcode);
  49.147 @@ -357,7 +342,8 @@ handle_signal(unsigned long sig, siginfo
  49.148  		sigset_t *oldset, struct pt_regs *regs)
  49.149  {
  49.150  #ifdef DEBUG_SIG
  49.151 -	printk("handle_signal pid:%d sig:%lu rip:%lx rsp:%lx regs=%p\n", current->pid, sig, 
  49.152 +	printk("handle_signal pid:%d sig:%lu rip:%lx rsp:%lx regs=%p\n",
  49.153 +		current->pid, sig,
  49.154  		regs->rip, regs->rsp, regs);
  49.155  #endif
  49.156  
  49.157 @@ -375,10 +361,23 @@ handle_signal(unsigned long sig, siginfo
  49.158  					regs->rax = -EINTR;
  49.159  					break;
  49.160  				}
  49.161 -			/* fallthrough */
  49.162 +				/* fallthrough */
  49.163  			case -ERESTARTNOINTR:
  49.164  				regs->rax = regs->orig_rax;
  49.165  				regs->rip -= 2;
  49.166 +				break;
  49.167 +		}
  49.168 +	}
  49.169 +
  49.170 +	/*
  49.171 +	 * If TF is set due to a debugger (PT_DTRACE), clear the TF
  49.172 +	 * flag so that register information in the sigcontext is
  49.173 +	 * correct.
  49.174 +	 */
  49.175 +	if (unlikely(regs->eflags & TF_MASK)) {
  49.176 +		if (likely(current->ptrace & PT_DTRACE)) {
  49.177 +			current->ptrace &= ~PT_DTRACE;
  49.178 +			regs->eflags &= ~TF_MASK;
  49.179  		}
  49.180  	}
  49.181  
  49.182 @@ -418,9 +417,8 @@ int do_signal(struct pt_regs *regs, sigs
  49.183  	 * kernel mode. Just return without doing anything
  49.184  	 * if so.
  49.185  	 */
  49.186 -	if ((regs->cs & 2) != 2) {
  49.187 +	if ((regs->cs & 2) != 2)
  49.188  		return 1;
  49.189 -	} 	
  49.190  
  49.191  	if (try_to_freeze(0))
  49.192  		goto no_signal;
  49.193 @@ -457,7 +455,9 @@ int do_signal(struct pt_regs *regs, sigs
  49.194  			regs->rip -= 2;
  49.195  		}
  49.196  		if (regs->rax == (unsigned long)-ERESTART_RESTARTBLOCK) {
  49.197 -			regs->rax = __NR_restart_syscall;
  49.198 +			regs->rax = test_thread_flag(TIF_IA32) ?
  49.199 +					__NR_ia32_restart_syscall :
  49.200 +					__NR_restart_syscall;
  49.201  			regs->rip -= 2;
  49.202  		}
  49.203  	}
    50.1 --- a/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/smp.c	Mon Jul 11 09:29:56 2005 -0500
    50.2 +++ b/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/smp.c	Mon Jul 11 09:35:19 2005 -0500
    50.3 @@ -25,7 +25,9 @@
    50.4  #include <asm/pgalloc.h>
    50.5  #include <asm/tlbflush.h>
    50.6  #include <asm/mach_apic.h>
    50.7 +#include <asm/mmu_context.h>
    50.8  #include <asm/proto.h>
    50.9 +#include <asm/apicdef.h>
   50.10  
   50.11  /*
   50.12   *	Smarter SMP flushing macros. 
   50.13 @@ -41,7 +43,7 @@ static cpumask_t flush_cpumask;
   50.14  static struct mm_struct * flush_mm;
   50.15  static unsigned long flush_va;
   50.16  static DEFINE_SPINLOCK(tlbstate_lock);
   50.17 -#define FLUSH_ALL	0xffffffff
   50.18 +#define FLUSH_ALL	-1ULL
   50.19  
   50.20  /*
   50.21   * We cannot call mmdrop() because we are in interrupt context, 
   50.22 @@ -52,7 +54,7 @@ static inline void leave_mm (unsigned lo
   50.23  	if (read_pda(mmu_state) == TLBSTATE_OK)
   50.24  		BUG();
   50.25  	clear_bit(cpu, &read_pda(active_mm)->cpu_vm_mask);
   50.26 -	__flush_tlb();
   50.27 +	load_cr3(swapper_pg_dir);
   50.28  }
   50.29  
   50.30  /*
   50.31 @@ -372,7 +374,10 @@ void smp_send_stop(void)
   50.32  	__smp_call_function(smp_really_stop_cpu, NULL, 0, 0);
   50.33  	if (!nolock)
   50.34  		spin_unlock(&call_lock);
   50.35 -	smp_stop_cpu();
   50.36 +
   50.37 +	local_irq_disable();
   50.38 +	disable_local_APIC();
   50.39 +	local_irq_enable();
   50.40  }
   50.41  
   50.42  /*
   50.43 @@ -409,3 +414,27 @@ asmlinkage void smp_call_function_interr
   50.44  		atomic_inc(&call_data->finished);
   50.45  	}
   50.46  }
   50.47 +
   50.48 +int safe_smp_processor_id(void)
   50.49 +{
   50.50 +	int apicid, i;
   50.51 +
   50.52 +	if (disable_apic)
   50.53 +		return 0;
   50.54 +
   50.55 +	apicid = hard_smp_processor_id();
   50.56 +	if (x86_cpu_to_apicid[apicid] == apicid)
   50.57 +		return apicid;
   50.58 +
   50.59 +	for (i = 0; i < NR_CPUS; ++i) {
   50.60 +		if (x86_cpu_to_apicid[i] == apicid)
   50.61 +			return i;
   50.62 +	}
   50.63 +
   50.64 +	/* No entries in x86_cpu_to_apicid?  Either no MPS|ACPI,
   50.65 +	 * or called too early.  Either way, we must be CPU 0. */
   50.66 +      	if (x86_cpu_to_apicid[0] == BAD_APICID)
   50.67 +		return 0;
   50.68 +
   50.69 +	return 0; /* Should not happen */
   50.70 +}
    51.1 --- a/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/smpboot.c	Mon Jul 11 09:29:56 2005 -0500
    51.2 +++ b/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/smpboot.c	Mon Jul 11 09:35:19 2005 -0500
    51.3 @@ -12,8 +12,7 @@
    51.4   *	Pentium Pro and Pentium-II/Xeon MP machines.
    51.5   *	Original development of Linux SMP code supported by Caldera.
    51.6   *
    51.7 - *	This code is released under the GNU General Public License version 2 or
    51.8 - *	later.
    51.9 + *	This code is released under the GNU General Public License version 2
   51.10   *
   51.11   *	Fixes
   51.12   *		Felix Koop	:	NR_CPUS used properly
   51.13 @@ -31,9 +30,13 @@
   51.14   *		Tigran Aivazian	:	fixed "0.00 in /proc/uptime on SMP" bug.
   51.15   *	Maciej W. Rozycki	:	Bits for genuine 82489DX APICs
   51.16   *	Andi Kleen		:	Changed for SMP boot into long mode.
   51.17 - *		Rusty Russell	:	Hacked into shape for new "hotplug" boot process. 
   51.18 + *		Rusty Russell	:	Hacked into shape for new "hotplug" boot process.
   51.19 + *      Andi Kleen              :       Converted to new state machine.
   51.20 + *					Various cleanups.
   51.21 + *					Probably mostly hotplug CPU ready now.
   51.22   */
   51.23  
   51.24 +
   51.25  #include <linux/config.h>
   51.26  #include <linux/init.h>
   51.27  
   51.28 @@ -53,19 +56,35 @@
   51.29  #include <asm/kdebug.h>
   51.30  #include <asm/tlbflush.h>
   51.31  #include <asm/proto.h>
   51.32 +#include <asm/nmi.h>
   51.33 +
   51.34 +/* Change for real CPU hotplug. Note other files need to be fixed
   51.35 +   first too. */
   51.36 +#define __cpuinit __init
   51.37 +#define __cpuinitdata __initdata
   51.38  
   51.39  /* Number of siblings per CPU package */
   51.40  int smp_num_siblings = 1;
   51.41  /* Package ID of each logical CPU */
   51.42  u8 phys_proc_id[NR_CPUS] = { [0 ... NR_CPUS-1] = BAD_APICID };
   51.43 +u8 cpu_core_id[NR_CPUS] = { [0 ... NR_CPUS-1] = BAD_APICID };
   51.44  EXPORT_SYMBOL(phys_proc_id);
   51.45 +EXPORT_SYMBOL(cpu_core_id);
   51.46  
   51.47  /* Bitmask of currently online CPUs */
   51.48  cpumask_t cpu_online_map;
   51.49  
   51.50 +EXPORT_SYMBOL(cpu_online_map);
   51.51 +
   51.52 +/*
   51.53 + * Private maps to synchronize booting between AP and BP.
   51.54 + * Probably not needed anymore, but it makes for easier debugging. -AK
   51.55 + */
   51.56  cpumask_t cpu_callin_map;
   51.57  cpumask_t cpu_callout_map;
   51.58 -static cpumask_t smp_commenced_mask;
   51.59 +
   51.60 +cpumask_t cpu_possible_map;
   51.61 +EXPORT_SYMBOL(cpu_possible_map);
   51.62  
   51.63  /* Per CPU bogomips and other parameters */
   51.64  struct cpuinfo_x86 cpu_data[NR_CPUS] __cacheline_aligned;
   51.65 @@ -74,13 +93,15 @@ struct cpuinfo_x86 cpu_data[NR_CPUS] __c
   51.66  int smp_threads_ready;
   51.67  
   51.68  cpumask_t cpu_sibling_map[NR_CPUS] __cacheline_aligned;
   51.69 +cpumask_t cpu_core_map[NR_CPUS] __cacheline_aligned;
   51.70 +EXPORT_SYMBOL(cpu_core_map);
   51.71  
   51.72  /*
   51.73   * Trampoline 80x86 program as an array.
   51.74   */
   51.75  
   51.76 -extern unsigned char trampoline_data [];
   51.77 -extern unsigned char trampoline_end  [];
   51.78 +extern unsigned char trampoline_data[];
   51.79 +extern unsigned char trampoline_end[];
   51.80  
   51.81  /*
   51.82   * Currently trivial. Write the real->protected mode
   51.83 @@ -88,11 +109,9 @@ extern unsigned char trampoline_end  [];
   51.84   * has made sure it's suitably aligned.
   51.85   */
   51.86  
   51.87 -static unsigned long __init setup_trampoline(void)
   51.88 +static unsigned long __cpuinit setup_trampoline(void)
   51.89  {
   51.90  	void *tramp = __va(SMP_TRAMPOLINE_BASE); 
   51.91 -	extern volatile __u32 tramp_gdt_ptr; 
   51.92 -	tramp_gdt_ptr = __pa_symbol(&cpu_gdt_table); 
   51.93  	memcpy(tramp, trampoline_data, trampoline_end - trampoline_data);
   51.94  	return virt_to_phys(tramp);
   51.95  }
   51.96 @@ -102,154 +121,224 @@ static unsigned long __init setup_trampo
   51.97   * a given CPU
   51.98   */
   51.99  
  51.100 -static void __init smp_store_cpu_info(int id)
  51.101 +static void __cpuinit smp_store_cpu_info(int id)
  51.102  {
  51.103  	struct cpuinfo_x86 *c = cpu_data + id;
  51.104  
  51.105  	*c = boot_cpu_data;
  51.106  	identify_cpu(c);
  51.107 +	print_cpu_info(c);
  51.108  }
  51.109  
  51.110  /*
  51.111 - * TSC synchronization.
  51.112 + * New Funky TSC sync algorithm borrowed from IA64.
  51.113 + * Main advantage is that it doesn't reset the TSCs fully and
  51.114 + * in general looks more robust and it works better than my earlier
  51.115 + * attempts. I believe it was written by David Mosberger. Some minor
  51.116 + * adjustments for x86-64 by me -AK
  51.117   *
  51.118 - * We first check whether all CPUs have their TSC's synchronized,
  51.119 - * then we print a warning if not, and always resync.
  51.120 + * Original comment reproduced below.
  51.121 + *
  51.122 + * Synchronize TSC of the current (slave) CPU with the TSC of the
  51.123 + * MASTER CPU (normally the time-keeper CPU).  We use a closed loop to
  51.124 + * eliminate the possibility of unaccounted-for errors (such as
  51.125 + * getting a machine check in the middle of a calibration step).  The
  51.126 + * basic idea is for the slave to ask the master what itc value it has
  51.127 + * and to read its own itc before and after the master responds.  Each
  51.128 + * iteration gives us three timestamps:
  51.129 + *
  51.130 + *	slave		master
  51.131 + *
  51.132 + *	t0 ---\
  51.133 + *             ---\
  51.134 + *		   --->
  51.135 + *			tm
  51.136 + *		   /---
  51.137 + *	       /---
  51.138 + *	t1 <---
  51.139 + *
  51.140 + *
  51.141 + * The goal is to adjust the slave's TSC such that tm falls exactly
  51.142 + * half-way between t0 and t1.  If we achieve this, the clocks are
  51.143 + * synchronized provided the interconnect between the slave and the
  51.144 + * master is symmetric.  Even if the interconnect were asymmetric, we
  51.145 + * would still know that the synchronization error is smaller than the
  51.146 + * roundtrip latency (t0 - t1).
  51.147 + *
  51.148 + * When the interconnect is quiet and symmetric, this lets us
  51.149 + * synchronize the TSC to within one or two cycles.  However, we can
  51.150 + * only *guarantee* that the synchronization is accurate to within a
  51.151 + * round-trip time, which is typically in the range of several hundred
  51.152 + * cycles (e.g., ~500 cycles).  In practice, this means that the TSCs
  51.153 + * are usually almost perfectly synchronized, but we shouldn't assume
  51.154 + * that the accuracy is much better than half a micro second or so.
  51.155 + *
  51.156 + * [there are other errors like the latency of RDTSC and of the
  51.157 + * WRMSR. These can also account to hundreds of cycles. So it's
  51.158 + * probably worse. It claims 153 cycles error on a dual Opteron,
  51.159 + * but I suspect the numbers are actually somewhat worse -AK]
  51.160   */
  51.161  
  51.162 -static atomic_t tsc_start_flag = ATOMIC_INIT(0);
  51.163 -static atomic_t tsc_count_start = ATOMIC_INIT(0);
  51.164 -static atomic_t tsc_count_stop = ATOMIC_INIT(0);
  51.165 -static unsigned long long tsc_values[NR_CPUS];
  51.166 -
  51.167 -#define NR_LOOPS 5
  51.168 -
  51.169 -extern unsigned int fast_gettimeoffset_quotient;
  51.170 -
  51.171 -static void __init synchronize_tsc_bp (void)
  51.172 -{
  51.173 -	int i;
  51.174 -	unsigned long long t0;
  51.175 -	unsigned long long sum, avg;
  51.176 -	long long delta;
  51.177 -	long one_usec;
  51.178 -	int buggy = 0;
  51.179 -
  51.180 -	printk(KERN_INFO "checking TSC synchronization across %u CPUs: ",num_booting_cpus());
  51.181 -
  51.182 -	one_usec = cpu_khz; 
  51.183 -
  51.184 -	atomic_set(&tsc_start_flag, 1);
  51.185 -	wmb();
  51.186 +#define MASTER	0
  51.187 +#define SLAVE	(SMP_CACHE_BYTES/8)
  51.188  
  51.189 -	/*
  51.190 -	 * We loop a few times to get a primed instruction cache,
  51.191 -	 * then the last pass is more or less synchronized and
  51.192 -	 * the BP and APs set their cycle counters to zero all at
  51.193 -	 * once. This reduces the chance of having random offsets
  51.194 -	 * between the processors, and guarantees that the maximum
  51.195 -	 * delay between the cycle counters is never bigger than
  51.196 -	 * the latency of information-passing (cachelines) between
  51.197 -	 * two CPUs.
  51.198 -	 */
  51.199 -	for (i = 0; i < NR_LOOPS; i++) {
  51.200 -		/*
  51.201 -		 * all APs synchronize but they loop on '== num_cpus'
  51.202 -		 */
  51.203 -		while (atomic_read(&tsc_count_start) != num_booting_cpus()-1) mb();
  51.204 -		atomic_set(&tsc_count_stop, 0);
  51.205 -		wmb();
  51.206 -		/*
  51.207 -		 * this lets the APs save their current TSC:
  51.208 -		 */
  51.209 -		atomic_inc(&tsc_count_start);
  51.210 +/* Intentionally don't use cpu_relax() while TSC synchronization
  51.211 +   because we don't want to go into funky power save modi or cause
  51.212 +   hypervisors to schedule us away.  Going to sleep would likely affect
  51.213 +   latency and low latency is the primary objective here. -AK */
  51.214 +#define no_cpu_relax() barrier()
  51.215  
  51.216 -		sync_core();
  51.217 -		rdtscll(tsc_values[smp_processor_id()]);
  51.218 -		/*
  51.219 -		 * We clear the TSC in the last loop:
  51.220 -		 */
  51.221 -		if (i == NR_LOOPS-1)
  51.222 -			write_tsc(0, 0);
  51.223 +static __cpuinitdata DEFINE_SPINLOCK(tsc_sync_lock);
  51.224 +static volatile __cpuinitdata unsigned long go[SLAVE + 1];
  51.225 +static int notscsync __cpuinitdata;
  51.226  
  51.227 -		/*
  51.228 -		 * Wait for all APs to leave the synchronization point:
  51.229 -		 */
  51.230 -		while (atomic_read(&tsc_count_stop) != num_booting_cpus()-1) mb();
  51.231 -		atomic_set(&tsc_count_start, 0);
  51.232 -		wmb();
  51.233 -		atomic_inc(&tsc_count_stop);
  51.234 +#undef DEBUG_TSC_SYNC
  51.235 +
  51.236 +#define NUM_ROUNDS	64	/* magic value */
  51.237 +#define NUM_ITERS	5	/* likewise */
  51.238 +
  51.239 +/* Callback on boot CPU */
  51.240 +static __cpuinit void sync_master(void *arg)
  51.241 +{
  51.242 +	unsigned long flags, i;
  51.243 +
  51.244 +	if (smp_processor_id() != boot_cpu_id)
  51.245 +		return;
  51.246 +
  51.247 +	go[MASTER] = 0;
  51.248 +
  51.249 +	local_irq_save(flags);
  51.250 +	{
  51.251 +		for (i = 0; i < NUM_ROUNDS*NUM_ITERS; ++i) {
  51.252 +			while (!go[MASTER])
  51.253 +				no_cpu_relax();
  51.254 +			go[MASTER] = 0;
  51.255 +			rdtscll(go[SLAVE]);
  51.256 +		}
  51.257 +	}
  51.258 +	local_irq_restore(flags);
  51.259 +}
  51.260 +
  51.261 +/*
  51.262 + * Return the number of cycles by which our tsc differs from the tsc
  51.263 + * on the master (time-keeper) CPU.  A positive number indicates our
  51.264 + * tsc is ahead of the master, negative that it is behind.
  51.265 + */
  51.266 +static inline long
  51.267 +get_delta(long *rt, long *master)
  51.268 +{
  51.269 +	unsigned long best_t0 = 0, best_t1 = ~0UL, best_tm = 0;
  51.270 +	unsigned long tcenter, t0, t1, tm;
  51.271 +	int i;
  51.272 +
  51.273 +	for (i = 0; i < NUM_ITERS; ++i) {
  51.274 +		rdtscll(t0);
  51.275 +		go[MASTER] = 1;
  51.276 +		while (!(tm = go[SLAVE]))
  51.277 +			no_cpu_relax();
  51.278 +		go[SLAVE] = 0;
  51.279 +		rdtscll(t1);
  51.280 +
  51.281 +		if (t1 - t0 < best_t1 - best_t0)
  51.282 +			best_t0 = t0, best_t1 = t1, best_tm = tm;
  51.283  	}
  51.284  
  51.285 -	sum = 0;
  51.286 -	for (i = 0; i < NR_CPUS; i++) {
  51.287 -		if (cpu_isset(i, cpu_callout_map)) {
  51.288 -		t0 = tsc_values[i];
  51.289 -		sum += t0;
  51.290 -	}
  51.291 -	}
  51.292 -	avg = sum / num_booting_cpus();
  51.293 -
  51.294 -	sum = 0;
  51.295 -	for (i = 0; i < NR_CPUS; i++) {
  51.296 -		if (!cpu_isset(i, cpu_callout_map))
  51.297 -			continue;
  51.298 +	*rt = best_t1 - best_t0;
  51.299 +	*master = best_tm - best_t0;
  51.300  
  51.301 -		delta = tsc_values[i] - avg;
  51.302 -		if (delta < 0)
  51.303 -			delta = -delta;
  51.304 -		/*
  51.305 -		 * We report bigger than 2 microseconds clock differences.
  51.306 -		 */
  51.307 -		if (delta > 2*one_usec) {
  51.308 -			long realdelta;
  51.309 -			if (!buggy) {
  51.310 -				buggy = 1;
  51.311 -				printk("\n");
  51.312 -			}
  51.313 -			realdelta = delta / one_usec;
  51.314 -			if (tsc_values[i] < avg)
  51.315 -				realdelta = -realdelta;
  51.316 -
  51.317 -			printk("BIOS BUG: CPU#%d improperly initialized, has %ld usecs TSC skew! FIXED.\n",
  51.318 -				i, realdelta);
  51.319 -		}
  51.320 -
  51.321 -		sum += delta;
  51.322 -	}
  51.323 -	if (!buggy)
  51.324 -		printk("passed.\n");
  51.325 +	/* average best_t0 and best_t1 without overflow: */
  51.326 +	tcenter = (best_t0/2 + best_t1/2);
  51.327 +	if (best_t0 % 2 + best_t1 % 2 == 2)
  51.328 +		++tcenter;
  51.329 +	return tcenter - best_tm;
  51.330  }
  51.331  
  51.332 -static void __init synchronize_tsc_ap (void)
  51.333 +static __cpuinit void sync_tsc(void)
  51.334  {
  51.335 -	int i;
  51.336 -
  51.337 -	/*
  51.338 -	 * Not every cpu is online at the time
  51.339 -	 * this gets called, so we first wait for the BP to
  51.340 -	 * finish SMP initialization:
  51.341 -	 */
  51.342 -	while (!atomic_read(&tsc_start_flag)) mb();
  51.343 +	int i, done = 0;
  51.344 +	long delta, adj, adjust_latency = 0;
  51.345 +	unsigned long flags, rt, master_time_stamp, bound;
  51.346 +#if DEBUG_TSC_SYNC
  51.347 +	static struct syncdebug {
  51.348 +		long rt;	/* roundtrip time */
  51.349 +		long master;	/* master's timestamp */
  51.350 +		long diff;	/* difference between midpoint and master's timestamp */
  51.351 +		long lat;	/* estimate of tsc adjustment latency */
  51.352 +	} t[NUM_ROUNDS] __cpuinitdata;
  51.353 +#endif
  51.354  
  51.355 -	for (i = 0; i < NR_LOOPS; i++) {
  51.356 -		atomic_inc(&tsc_count_start);
  51.357 -		while (atomic_read(&tsc_count_start) != num_booting_cpus()) mb();
  51.358 -
  51.359 -		sync_core();
  51.360 -		rdtscll(tsc_values[smp_processor_id()]);
  51.361 -		if (i == NR_LOOPS-1)
  51.362 -			write_tsc(0, 0);
  51.363 +	go[MASTER] = 1;
  51.364  
  51.365 -		atomic_inc(&tsc_count_stop);
  51.366 -		while (atomic_read(&tsc_count_stop) != num_booting_cpus()) mb();
  51.367 +	smp_call_function(sync_master, NULL, 1, 0);
  51.368 +
  51.369 +	while (go[MASTER])	/* wait for master to be ready */
  51.370 +		no_cpu_relax();
  51.371 +
  51.372 +	spin_lock_irqsave(&tsc_sync_lock, flags);
  51.373 +	{
  51.374 +		for (i = 0; i < NUM_ROUNDS; ++i) {
  51.375 +			delta = get_delta(&rt, &master_time_stamp);
  51.376 +			if (delta == 0) {
  51.377 +				done = 1;	/* let's lock on to this... */
  51.378 +				bound = rt;
  51.379 +			}
  51.380 +
  51.381 +			if (!done) {
  51.382 +				unsigned long t;
  51.383 +				if (i > 0) {
  51.384 +					adjust_latency += -delta;
  51.385 +					adj = -delta + adjust_latency/4;
  51.386 +				} else
  51.387 +					adj = -delta;
  51.388 +
  51.389 +				rdtscll(t);
  51.390 +				wrmsrl(MSR_IA32_TSC, t + adj);
  51.391 +			}
  51.392 +#if DEBUG_TSC_SYNC
  51.393 +			t[i].rt = rt;
  51.394 +			t[i].master = master_time_stamp;
  51.395 +			t[i].diff = delta;
  51.396 +			t[i].lat = adjust_latency/4;
  51.397 +#endif
  51.398 +		}
  51.399  	}
  51.400 +	spin_unlock_irqrestore(&tsc_sync_lock, flags);
  51.401 +
  51.402 +#if DEBUG_TSC_SYNC
  51.403 +	for (i = 0; i < NUM_ROUNDS; ++i)
  51.404 +		printk("rt=%5ld master=%5ld diff=%5ld adjlat=%5ld\n",
  51.405 +		       t[i].rt, t[i].master, t[i].diff, t[i].lat);
  51.406 +#endif
  51.407 +
  51.408 +	printk(KERN_INFO
  51.409 +	       "CPU %d: synchronized TSC with CPU %u (last diff %ld cycles, "
  51.410 +	       "maxerr %lu cycles)\n",
  51.411 +	       smp_processor_id(), boot_cpu_id, delta, rt);
  51.412  }
  51.413 -#undef NR_LOOPS
  51.414  
  51.415 -static atomic_t init_deasserted;
  51.416 +static void __cpuinit tsc_sync_wait(void)
  51.417 +{
  51.418 +	if (notscsync || !cpu_has_tsc)
  51.419 +		return;
  51.420 +	printk(KERN_INFO "CPU %d: Syncing TSC to CPU %u.\n", smp_processor_id(),
  51.421 +			boot_cpu_id);
  51.422 +	sync_tsc();
  51.423 +}
  51.424  
  51.425 -void __init smp_callin(void)
  51.426 +static __init int notscsync_setup(char *s)
  51.427 +{
  51.428 +	notscsync = 1;
  51.429 +	return 0;
  51.430 +}
  51.431 +__setup("notscsync", notscsync_setup);
  51.432 +
  51.433 +static atomic_t init_deasserted __cpuinitdata;
  51.434 +
  51.435 +/*
  51.436 + * Report back to the Boot Processor.
  51.437 + * Running on AP.
  51.438 + */
  51.439 +void __cpuinit smp_callin(void)
  51.440  {
  51.441  	int cpuid, phys_id;
  51.442  	unsigned long timeout;
  51.443 @@ -260,7 +349,8 @@ void __init smp_callin(void)
  51.444  	 * our local APIC.  We have to wait for the IPI or we'll
  51.445  	 * lock up on an APIC access.
  51.446  	 */
  51.447 -	while (!atomic_read(&init_deasserted));
  51.448 +	while (!atomic_read(&init_deasserted))
  51.449 +		cpu_relax();
  51.450  
  51.451  	/*
  51.452  	 * (This works even if the APIC is not enabled.)
  51.453 @@ -291,7 +381,7 @@ void __init smp_callin(void)
  51.454  		 */
  51.455  		if (cpu_isset(cpuid, cpu_callout_map))
  51.456  			break;
  51.457 -		rep_nop();
  51.458 +		cpu_relax();
  51.459  	}
  51.460  
  51.461  	if (!time_before(jiffies, timeout)) {
  51.462 @@ -309,8 +399,6 @@ void __init smp_callin(void)
  51.463  	Dprintk("CALLIN, before setup_local_APIC().\n");
  51.464  	setup_local_APIC();
  51.465  
  51.466 -	local_irq_enable();
  51.467 -
  51.468  	/*
  51.469  	 * Get our bogomips.
  51.470  	 */
  51.471 @@ -324,26 +412,16 @@ void __init smp_callin(void)
  51.472  	 */
  51.473   	smp_store_cpu_info(cpuid);
  51.474  
  51.475 -	local_irq_disable();
  51.476 -
  51.477  	/*
  51.478  	 * Allow the master to continue.
  51.479  	 */
  51.480  	cpu_set(cpuid, cpu_callin_map);
  51.481 -
  51.482 -	/*
  51.483 -	 *      Synchronize the TSC with the BP
  51.484 -	 */
  51.485 -	if (cpu_has_tsc)
  51.486 -		synchronize_tsc_ap();
  51.487  }
  51.488  
  51.489 -int cpucount;
  51.490 -
  51.491  /*
  51.492 - * Activate a secondary processor.
  51.493 + * Setup code on secondary processor (after comming out of the trampoline)
  51.494   */
  51.495 -void __init start_secondary(void)
  51.496 +void __cpuinit start_secondary(void)
  51.497  {
  51.498  	/*
  51.499  	 * Dont put anything before smp_callin(), SMP
  51.500 @@ -356,14 +434,10 @@ void __init start_secondary(void)
  51.501  	/* otherwise gcc will move up the smp_processor_id before the cpu_init */
  51.502  	barrier();
  51.503  
  51.504 -	Dprintk("cpu %d: waiting for commence\n", smp_processor_id()); 
  51.505 -	while (!cpu_isset(smp_processor_id(), smp_commenced_mask))
  51.506 -		rep_nop();
  51.507 -
  51.508  	Dprintk("cpu %d: setting up apic clock\n", smp_processor_id()); 	
  51.509  	setup_secondary_APIC_clock();
  51.510  
  51.511 -	Dprintk("cpu %d: enabling apic timer\n", smp_processor_id()); 
  51.512 +	Dprintk("cpu %d: enabling apic timer\n", smp_processor_id());
  51.513  
  51.514  	if (nmi_watchdog == NMI_IO_APIC) {
  51.515  		disable_8259A_irq(0);
  51.516 @@ -371,27 +445,27 @@ void __init start_secondary(void)
  51.517  		enable_8259A_irq(0);
  51.518  	}
  51.519  
  51.520 -
  51.521 -	enable_APIC_timer(); 
  51.522 +	enable_APIC_timer();
  51.523  
  51.524  	/*
  51.525 -	 * low-memory mappings have been cleared, flush them from
  51.526 -	 * the local TLBs too.
  51.527 +	 * Allow the master to continue.
  51.528  	 */
  51.529 -	local_flush_tlb();
  51.530 +	cpu_set(smp_processor_id(), cpu_online_map);
  51.531 +	mb();
  51.532  
  51.533 -	Dprintk("cpu %d eSetting cpu_online_map\n", smp_processor_id()); 
  51.534 -	cpu_set(smp_processor_id(), cpu_online_map);
  51.535 -	wmb();
  51.536 -	
  51.537 +	/* Wait for TSC sync to not schedule things before.
  51.538 +	   We still process interrupts, which could see an inconsistent
  51.539 +	   time in that window unfortunately. */
  51.540 +	tsc_sync_wait();
  51.541 +
  51.542  	cpu_idle();
  51.543  }
  51.544  
  51.545 -extern volatile unsigned long init_rsp; 
  51.546 +extern volatile unsigned long init_rsp;
  51.547  extern void (*initial_code)(void);
  51.548  
  51.549  #if APIC_DEBUG
  51.550 -static inline void inquire_remote_apic(int apicid)
  51.551 +static void inquire_remote_apic(int apicid)
  51.552  {
  51.553  	unsigned i, regs[] = { APIC_ID >> 4, APIC_LVR >> 4, APIC_SPIV >> 4 };
  51.554  	char *names[] = { "ID", "VERSION", "SPIV" };
  51.555 @@ -428,7 +502,10 @@ static inline void inquire_remote_apic(i
  51.556  }
  51.557  #endif
  51.558  
  51.559 -static int __init wakeup_secondary_via_INIT(int phys_apicid, unsigned int start_rip)
  51.560 +/*
  51.561 + * Kick the secondary to wake up.
  51.562 + */
  51.563 +static int __cpuinit wakeup_secondary_via_INIT(int phys_apicid, unsigned int start_rip)
  51.564  {
  51.565  	unsigned long send_status = 0, accept_status = 0;
  51.566  	int maxlvt, timeout, num_starts, j;
  51.567 @@ -551,33 +628,35 @@ static int __init wakeup_secondary_via_I
  51.568  	return (send_status | accept_status);
  51.569  }
  51.570  
  51.571 -static void __init do_boot_cpu (int apicid)
  51.572 +/*
  51.573 + * Boot one CPU.
  51.574 + */
  51.575 +static int __cpuinit do_boot_cpu(int cpu, int apicid)
  51.576  {
  51.577  	struct task_struct *idle;
  51.578  	unsigned long boot_error;
  51.579 -	int timeout, cpu;
  51.580 +	int timeout;
  51.581  	unsigned long start_rip;
  51.582 -
  51.583 -	cpu = ++cpucount;
  51.584  	/*
  51.585  	 * We can't use kernel_thread since we must avoid to
  51.586  	 * reschedule the child.
  51.587  	 */
  51.588  	idle = fork_idle(cpu);
  51.589 -	if (IS_ERR(idle))
  51.590 -		panic("failed fork for CPU %d", cpu);
  51.591 -	x86_cpu_to_apicid[cpu] = apicid;
  51.592 +	if (IS_ERR(idle)) {
  51.593 +		printk("failed fork for CPU %d\n", cpu);
  51.594 +		return PTR_ERR(idle);
  51.595 +	}
  51.596  
  51.597  	cpu_pda[cpu].pcurrent = idle;
  51.598  
  51.599  	start_rip = setup_trampoline();
  51.600  
  51.601 -	init_rsp = idle->thread.rsp; 
  51.602 +	init_rsp = idle->thread.rsp;
  51.603  	per_cpu(init_tss,cpu).rsp0 = init_rsp;
  51.604  	initial_code = start_secondary;
  51.605  	clear_ti_thread_flag(idle->thread_info, TIF_FORK);
  51.606  
  51.607 -	printk(KERN_INFO "Booting processor %d/%d rip %lx rsp %lx\n", cpu, apicid, 
  51.608 +	printk(KERN_INFO "Booting processor %d/%d rip %lx rsp %lx\n", cpu, apicid,
  51.609  	       start_rip, init_rsp);
  51.610  
  51.611  	/*
  51.612 @@ -614,7 +693,7 @@ static void __init do_boot_cpu (int apic
  51.613  	/*
  51.614  	 * Starting actual IPI sequence...
  51.615  	 */
  51.616 -	boot_error = wakeup_secondary_via_INIT(apicid, start_rip); 
  51.617 +	boot_error = wakeup_secondary_via_INIT(apicid, start_rip);
  51.618  
  51.619  	if (!boot_error) {
  51.620  		/*
  51.621 @@ -635,8 +714,6 @@ static void __init do_boot_cpu (int apic
  51.622  
  51.623  		if (cpu_isset(cpu, cpu_callin_map)) {
  51.624  			/* number CPUs logically, starting from 1 (BSP is 0) */
  51.625 -			Dprintk("OK.\n");
  51.626 -			print_cpu_info(&cpu_data[cpu]);
  51.627  			Dprintk("CPU has booted.\n");
  51.628  		} else {
  51.629  			boot_error = 1;
  51.630 @@ -655,76 +732,131 @@ static void __init do_boot_cpu (int apic
  51.631  	if (boot_error) {
  51.632  		cpu_clear(cpu, cpu_callout_map); /* was set here (do_boot_cpu()) */
  51.633  		clear_bit(cpu, &cpu_initialized); /* was set by cpu_init() */
  51.634 -		cpucount--;
  51.635 +		cpu_clear(cpu, cpu_present_map);
  51.636 +		cpu_clear(cpu, cpu_possible_map);
  51.637  		x86_cpu_to_apicid[cpu] = BAD_APICID;
  51.638  		x86_cpu_to_log_apicid[cpu] = BAD_APICID;
  51.639 +		return -EIO;
  51.640  	}
  51.641 +
  51.642 +	return 0;
  51.643  }
  51.644  
  51.645  cycles_t cacheflush_time;
  51.646  unsigned long cache_decay_ticks;
  51.647  
  51.648 -static void smp_tune_scheduling (void)
  51.649 +/*
  51.650 + * Construct cpu_sibling_map[], so that we can tell the sibling CPU
  51.651 + * on SMT systems efficiently.
  51.652 + */
  51.653 +static __cpuinit void detect_siblings(void)
  51.654  {
  51.655 -	int cachesize;       /* kB   */
  51.656 -	unsigned long bandwidth = 1000; /* MB/s */
  51.657 -	/*
  51.658 -	 * Rough estimation for SMP scheduling, this is the number of
  51.659 -	 * cycles it takes for a fully memory-limited process to flush
  51.660 -	 * the SMP-local cache.
  51.661 -	 *
  51.662 -	 * (For a P5 this pretty much means we will choose another idle
  51.663 -	 *  CPU almost always at wakeup time (this is due to the small
  51.664 -	 *  L1 cache), on PIIs it's around 50-100 usecs, depending on
  51.665 -	 *  the cache size)
  51.666 -	 */
  51.667 +	int cpu;
  51.668  
  51.669 -	if (!cpu_khz) {
  51.670 -		/*
  51.671 -		 * this basically disables processor-affinity
  51.672 -		 * scheduling on SMP without a TSC.
  51.673 -		 */
  51.674 -		cacheflush_time = 0;
  51.675 -		return;
  51.676 -	} else {
  51.677 -		cachesize = boot_cpu_data.x86_cache_size;
  51.678 -		if (cachesize == -1) {
  51.679 -			cachesize = 16; /* Pentiums, 2x8kB cache */
  51.680 -			bandwidth = 100;
  51.681 +	for (cpu = 0; cpu < NR_CPUS; cpu++) {
  51.682 +		cpus_clear(cpu_sibling_map[cpu]);
  51.683 +		cpus_clear(cpu_core_map[cpu]);
  51.684 +	}
  51.685 +
  51.686 +	for_each_online_cpu (cpu) {
  51.687 +		struct cpuinfo_x86 *c = cpu_data + cpu;
  51.688 +		int siblings = 0;
  51.689 +		int i;
  51.690 +		if (smp_num_siblings > 1) {
  51.691 +			for_each_online_cpu (i) {
  51.692 +				if (cpu_core_id[cpu] == cpu_core_id[i]) {
  51.693 +					siblings++;
  51.694 +					cpu_set(i, cpu_sibling_map[cpu]);
  51.695 +				}
  51.696 +			}
  51.697 +		} else {
  51.698 +			siblings++;
  51.699 +			cpu_set(cpu, cpu_sibling_map[cpu]);
  51.700  		}
  51.701  
  51.702 -		cacheflush_time = (cpu_khz>>10) * (cachesize<<10) / bandwidth;
  51.703 +		if (siblings != smp_num_siblings) {
  51.704 +			printk(KERN_WARNING
  51.705 +	       "WARNING: %d siblings found for CPU%d, should be %d\n",
  51.706 +			       siblings, cpu, smp_num_siblings);
  51.707 +			smp_num_siblings = siblings;
  51.708 +		}
  51.709 +		if (c->x86_num_cores > 1) {
  51.710 +			for_each_online_cpu(i) {
  51.711 +				if (phys_proc_id[cpu] == phys_proc_id[i])
  51.712 +					cpu_set(i, cpu_core_map[cpu]);
  51.713 +			}
  51.714 +		} else
  51.715 +			cpu_core_map[cpu] = cpu_sibling_map[cpu];
  51.716  	}
  51.717 -
  51.718 -	cache_decay_ticks = (long)cacheflush_time/cpu_khz * HZ / 1000;
  51.719 -
  51.720 -	printk(KERN_INFO "per-CPU timeslice cutoff: %ld.%02ld usecs.\n",
  51.721 -		(long)cacheflush_time/(cpu_khz/1000),
  51.722 -		((long)cacheflush_time*100/(cpu_khz/1000)) % 100);
  51.723 -	printk(KERN_INFO "task migration cache decay timeout: %ld msecs.\n",
  51.724 -		(cache_decay_ticks + 1) * 1000 / HZ);
  51.725  }
  51.726  
  51.727  /*
  51.728 - * Cycle through the processors sending APIC IPIs to boot each.
  51.729 + * Cleanup possible dangling ends...
  51.730   */
  51.731 -
  51.732 -static void __init smp_boot_cpus(unsigned int max_cpus)
  51.733 +static __cpuinit void smp_cleanup_boot(void)
  51.734  {
  51.735 -	unsigned apicid, cpu, bit, kicked;
  51.736 -
  51.737 -	nmi_watchdog_default();
  51.738 +	/*
  51.739 +	 * Paranoid:  Set warm reset code and vector here back
  51.740 +	 * to default values.
  51.741 +	 */
  51.742 +	CMOS_WRITE(0, 0xf);
  51.743  
  51.744  	/*
  51.745 -	 * Setup boot CPU information
  51.746 +	 * Reset trampoline flag
  51.747  	 */
  51.748 -	smp_store_cpu_info(0); /* Final full version of the data */
  51.749 -	printk(KERN_INFO "CPU%d: ", 0);
  51.750 -	print_cpu_info(&cpu_data[0]);
  51.751 +	*((volatile int *) phys_to_virt(0x467)) = 0;
  51.752  
  51.753 -	current_thread_info()->cpu = 0;
  51.754 -	smp_tune_scheduling();
  51.755 +#ifndef CONFIG_HOTPLUG_CPU
  51.756 +	/*
  51.757 +	 * Free pages reserved for SMP bootup.
  51.758 +	 * When you add hotplug CPU support later remove this
  51.759 +	 * Note there is more work to be done for later CPU bootup.
  51.760 +	 */
  51.761  
  51.762 +	free_page((unsigned long) __va(PAGE_SIZE));
  51.763 +	free_page((unsigned long) __va(SMP_TRAMPOLINE_BASE));
  51.764 +#endif
  51.765 +}
  51.766 +
  51.767 +/*
  51.768 + * Fall back to non SMP mode after errors.
  51.769 + *
  51.770 + * RED-PEN audit/test this more. I bet there is more state messed up here.
  51.771 + */
  51.772 +static __cpuinit void disable_smp(void)
  51.773 +{
  51.774 +	cpu_present_map = cpumask_of_cpu(0);
  51.775 +	cpu_possible_map = cpumask_of_cpu(0);
  51.776 +	if (smp_found_config)
  51.777 +		phys_cpu_present_map = physid_mask_of_physid(boot_cpu_id);
  51.778 +	else
  51.779 +		phys_cpu_present_map = physid_mask_of_physid(0);
  51.780 +	cpu_set(0, cpu_sibling_map[0]);
  51.781 +	cpu_set(0, cpu_core_map[0]);
  51.782 +}
  51.783 +
  51.784 +/*
  51.785 + * Handle user cpus=... parameter.
  51.786 + */
  51.787 +static __cpuinit void enforce_max_cpus(unsigned max_cpus)
  51.788 +{
  51.789 +	int i, k;
  51.790 +	k = 0;
  51.791 +	for (i = 0; i < NR_CPUS; i++) {
  51.792 +		if (!cpu_possible(i))
  51.793 +			continue;
  51.794 +		if (++k > max_cpus) {
  51.795 +			cpu_clear(i, cpu_possible_map);
  51.796 +			cpu_clear(i, cpu_present_map);
  51.797 +		}
  51.798 +	}
  51.799 +}
  51.800 +
  51.801 +/*
  51.802 + * Various sanity checks.
  51.803 + */
  51.804 +static int __cpuinit smp_sanity_check(unsigned max_cpus)
  51.805 +{
  51.806  	if (!physid_isset(hard_smp_processor_id(), phys_cpu_present_map)) {
  51.807  		printk("weird, boot CPU (#%d) not listed by the BIOS.\n",
  51.808  		       hard_smp_processor_id());
  51.809 @@ -737,13 +869,11 @@ static void __init smp_boot_cpus(unsigne
  51.810  	 */
  51.811  	if (!smp_found_config) {
  51.812  		printk(KERN_NOTICE "SMP motherboard not detected.\n");
  51.813 -		io_apic_irqs = 0;
  51.814 -		cpu_online_map = cpumask_of_cpu(0);
  51.815 -		phys_cpu_present_map = physid_mask_of_physid(0);
  51.816 +		disable_smp();
  51.817  		if (APIC_init_uniprocessor())
  51.818  			printk(KERN_NOTICE "Local APIC not detected."
  51.819  					   " Using dummy APIC emulation.\n");
  51.820 -		goto smp_done;
  51.821 +		return -1;
  51.822  	}
  51.823  
  51.824  	/*
  51.825 @@ -763,196 +893,143 @@ static void __init smp_boot_cpus(unsigne
  51.826  		printk(KERN_ERR "BIOS bug, local APIC #%d not detected!...\n",
  51.827  			boot_cpu_id);
  51.828  		printk(KERN_ERR "... forcing use of dummy APIC emulation. (tell your hw vendor)\n");
  51.829 -		io_apic_irqs = 0;
  51.830 -		cpu_online_map = cpumask_of_cpu(0);
  51.831 -		phys_cpu_present_map = physid_mask_of_physid(0);
  51.832 -		disable_apic = 1;
  51.833 -		goto smp_done;
  51.834 +		nr_ioapics = 0;
  51.835 +		return -1;
  51.836  	}
  51.837  
  51.838 -	verify_local_APIC();
  51.839 -
  51.840  	/*
  51.841  	 * If SMP should be disabled, then really disable it!
  51.842  	 */
  51.843  	if (!max_cpus) {
  51.844 -		smp_found_config = 0;
  51.845  		printk(KERN_INFO "SMP mode deactivated, forcing use of dummy APIC emulation.\n");
  51.846 -		io_apic_irqs = 0;
  51.847 -		cpu_online_map = cpumask_of_cpu(0);
  51.848 -		phys_cpu_present_map = physid_mask_of_physid(0);
  51.849 -		disable_apic = 1;
  51.850 -		goto smp_done;
  51.851 +		nr_ioapics = 0;
  51.852 +		return -1;
  51.853  	}
  51.854  
  51.855 +	return 0;
  51.856 +}
  51.857 +
  51.858 +/*
  51.859 + * Prepare for SMP bootup.  The MP table or ACPI has been read
  51.860 + * earlier.  Just do some sanity checking here and enable APIC mode.
  51.861 + */
  51.862 +void __cpuinit smp_prepare_cpus(unsigned int max_cpus)
  51.863 +{
  51.864 +	int i;
  51.865 +
  51.866 +	nmi_watchdog_default();
  51.867 +	current_cpu_data = boot_cpu_data;
  51.868 +	current_thread_info()->cpu = 0;  /* needed? */
  51.869 +
  51.870 +	enforce_max_cpus(max_cpus);
  51.871 +
  51.872 +	/*
  51.873 +	 * Fill in cpu_present_mask
  51.874 +	 */
  51.875 +	for (i = 0; i < NR_CPUS; i++) {
  51.876 +		int apicid = cpu_present_to_apicid(i);
  51.877 +		if (physid_isset(apicid, phys_cpu_present_map)) {
  51.878 +			cpu_set(i, cpu_present_map);
  51.879 +			/* possible map would be different if we supported real
  51.880 +			   CPU hotplug. */
  51.881 +			cpu_set(i, cpu_possible_map);
  51.882 +		}
  51.883 +	}
  51.884 +
  51.885 +	if (smp_sanity_check(max_cpus) < 0) {
  51.886 +		printk(KERN_INFO "SMP disabled\n");
  51.887 +		disable_smp();
  51.888 +		return;
  51.889 +	}
  51.890 +
  51.891 +
  51.892 +	/*
  51.893 +	 * Switch from PIC to APIC mode.
  51.894 +	 */
  51.895  	connect_bsp_APIC();
  51.896  	setup_local_APIC();
  51.897  
  51.898 -	if (GET_APIC_ID(apic_read(APIC_ID)) != boot_cpu_id)
  51.899 -		BUG();
  51.900 -
  51.901 -	x86_cpu_to_apicid[0] = boot_cpu_id;
  51.902 -
  51.903 -	/*
  51.904 -	 * Now scan the CPU present map and fire up the other CPUs.
  51.905 -	 */
  51.906 -	Dprintk("CPU present map: %lx\n", physids_coerce(phys_cpu_present_map));
  51.907 -
  51.908 -	kicked = 1;
  51.909 -	for (bit = 0; kicked < NR_CPUS && bit < MAX_APICS; bit++) {
  51.910 -		apicid = cpu_present_to_apicid(bit);
  51.911 -		/*
  51.912 -		 * Don't even attempt to start the boot CPU!
  51.913 -		 */
  51.914 -		if (apicid == boot_cpu_id || (apicid == BAD_APICID))
  51.915 -			continue;
  51.916 -
  51.917 -		if (!physid_isset(apicid, phys_cpu_present_map))
  51.918 -			continue;
  51.919 -		if ((max_cpus >= 0) && (max_cpus <= cpucount+1))
  51.920 -			continue;
  51.921 -
  51.922 -		do_boot_cpu(apicid);
  51.923 -		++kicked;
  51.924 +	if (GET_APIC_ID(apic_read(APIC_ID)) != boot_cpu_id) {
  51.925 +		panic("Boot APIC ID in local APIC unexpected (%d vs %d)",
  51.926 +		      GET_APIC_ID(apic_read(APIC_ID)), boot_cpu_id);
  51.927 +		/* Or can we switch back to PIC here? */
  51.928  	}
  51.929  
  51.930  	/*
  51.931 -	 * Cleanup possible dangling ends...
  51.932 -	 */
  51.933 -	{
  51.934 -		/*
  51.935 -		 * Install writable page 0 entry to set BIOS data area.
  51.936 -		 */
  51.937 -		local_flush_tlb();
  51.938 -
  51.939 -		/*
  51.940 -		 * Paranoid:  Set warm reset code and vector here back
  51.941 -		 * to default values.
  51.942 -		 */
  51.943 -		CMOS_WRITE(0, 0xf);
  51.944 -
  51.945 -		*((volatile int *) phys_to_virt(0x467)) = 0;
  51.946 -	}
  51.947 -
  51.948 -	/*
  51.949 -	 * Allow the user to impress friends.
  51.950 -	 */
  51.951 -
  51.952 -	Dprintk("Before bogomips.\n");
  51.953 -	if (!cpucount) {
  51.954 -		printk(KERN_INFO "Only one processor found.\n");
  51.955 -	} else {
  51.956 -		unsigned long bogosum = 0;
  51.957 -		for (cpu = 0; cpu < NR_CPUS; cpu++)
  51.958 -			if (cpu_isset(cpu, cpu_callout_map))
  51.959 -				bogosum += cpu_data[cpu].loops_per_jiffy;
  51.960 -		printk(KERN_INFO "Total of %d processors activated (%lu.%02lu BogoMIPS).\n",
  51.961 -			cpucount+1,
  51.962 -			bogosum/(500000/HZ),
  51.963 -			(bogosum/(5000/HZ))%100);
  51.964 -		Dprintk("Before bogocount - setting activated=1.\n");
  51.965 -	}
  51.966 -
  51.967 -	/*
  51.968 -	 * Construct cpu_sibling_map[], so that we can tell the
  51.969 -	 * sibling CPU efficiently.
  51.970 -	 */
  51.971 -	for (cpu = 0; cpu < NR_CPUS; cpu++)
  51.972 -		cpus_clear(cpu_sibling_map[cpu]);
  51.973 -
  51.974 -	for (cpu = 0; cpu < NR_CPUS; cpu++) {
  51.975 -		int siblings = 0;
  51.976 -		int i;
  51.977 -		if (!cpu_isset(cpu, cpu_callout_map))
  51.978 -			continue;
  51.979 -
  51.980 -		if (smp_num_siblings > 1) {
  51.981 -			for (i = 0; i < NR_CPUS; i++) {
  51.982 -				if (!cpu_isset(i, cpu_callout_map))
  51.983 -					continue;
  51.984 -				if (phys_proc_id[cpu] == phys_proc_id[i]) {
  51.985 -					siblings++;
  51.986 -					cpu_set(i, cpu_sibling_map[cpu]);
  51.987 -				}
  51.988 -			}
  51.989 -		} else { 
  51.990 -			siblings++;
  51.991 -			cpu_set(cpu, cpu_sibling_map[cpu]);
  51.992 -		}
  51.993 -
  51.994 -		if (siblings != smp_num_siblings) {
  51.995 -			printk(KERN_WARNING 
  51.996 -	       "WARNING: %d siblings found for CPU%d, should be %d\n", 
  51.997 -			       siblings, cpu, smp_num_siblings);
  51.998 -			smp_num_siblings = siblings;
  51.999 -		}       
 51.1000 -	}
 51.1001 -
 51.1002 -	Dprintk("Boot done.\n");
 51.1003 -
 51.1004 -	/*
 51.1005 -	 * Here we can be sure that there is an IO-APIC in the system. Let's
 51.1006 -	 * go and set it up:
 51.1007 +	 * Now start the IO-APICs
 51.1008  	 */
 51.1009  	if (!skip_ioapic_setup && nr_ioapics)
 51.1010  		setup_IO_APIC();
 51.1011  	else
 51.1012  		nr_ioapics = 0;
 51.1013  
 51.1014 -	setup_boot_APIC_clock();
 51.1015 -
 51.1016  	/*
 51.1017 -	 * Synchronize the TSC with the AP
 51.1018 +	 * Set up local APIC timer on boot CPU.
 51.1019  	 */
 51.1020 -	if (cpu_has_tsc && cpucount)
 51.1021 -		synchronize_tsc_bp();
 51.1022  
 51.1023 - smp_done:
 51.1024 -	time_init_smp();
 51.1025 -}
 51.1026 -
 51.1027 -/* These are wrappers to interface to the new boot process.  Someone
 51.1028 -   who understands all this stuff should rewrite it properly. --RR 15/Jul/02 */
 51.1029 -void __init smp_prepare_cpus(unsigned int max_cpus)
 51.1030 -{
 51.1031 -	smp_boot_cpus(max_cpus);
 51.1032 +	setup_boot_APIC_clock();
 51.1033  }
 51.1034  
 51.1035 -void __devinit smp_prepare_boot_cpu(void)
 51.1036 +/*
 51.1037 + * Early setup to make printk work.
 51.1038 + */
 51.1039 +void __init smp_prepare_boot_cpu(void)
 51.1040  {
 51.1041 -	cpu_set(smp_processor_id(), cpu_online_map);
 51.1042 -	cpu_set(smp_processor_id(), cpu_callout_map);
 51.1043 +	int me = smp_processor_id();
 51.1044 +	cpu_set(me, cpu_online_map);
 51.1045 +	cpu_set(me, cpu_callout_map);
 51.1046  }
 51.1047  
 51.1048 -int __devinit __cpu_up(unsigned int cpu)
 51.1049 +/*
 51.1050 + * Entry point to boot a CPU.
 51.1051 + *
 51.1052 + * This is all __cpuinit, not __devinit for now because we don't support
 51.1053 + * CPU hotplug (yet).
 51.1054 + */
 51.1055 +int __cpuinit __cpu_up(unsigned int cpu)
 51.1056  {
 51.1057 -	/* This only works at boot for x86.  See "rewrite" above. */
 51.1058 -	if (cpu_isset(cpu, smp_commenced_mask)) {
 51.1059 -		local_irq_enable();
 51.1060 -		return -ENOSYS;
 51.1061 +	int err;
 51.1062 +	int apicid = cpu_present_to_apicid(cpu);
 51.1063 +
 51.1064 +	WARN_ON(irqs_disabled());
 51.1065 +
 51.1066 +	Dprintk("++++++++++++++++++++=_---CPU UP  %u\n", cpu);
 51.1067 +
 51.1068 +	if (apicid == BAD_APICID || apicid == boot_cpu_id ||
 51.1069 +	    !physid_isset(apicid, phys_cpu_present_map)) {
 51.1070 +		printk("__cpu_up: bad cpu %d\n", cpu);
 51.1071 +		return -EINVAL;
 51.1072  	}
 51.1073  
 51.1074 -	/* In case one didn't come up */
 51.1075 -	if (!cpu_isset(cpu, cpu_callin_map)) {
 51.1076 -		local_irq_enable();
 51.1077 -		return -EIO;
 51.1078 +	/* Boot it! */
 51.1079 +	err = do_boot_cpu(cpu, apicid);
 51.1080 +	if (err < 0) {
 51.1081 +		Dprintk("do_boot_cpu failed %d\n", err);
 51.1082 +		return err;
 51.1083  	}
 51.1084 -	local_irq_enable();
 51.1085  
 51.1086  	/* Unleash the CPU! */
 51.1087  	Dprintk("waiting for cpu %d\n", cpu);
 51.1088  
 51.1089 -	cpu_set(cpu, smp_commenced_mask);
 51.1090  	while (!cpu_isset(cpu, cpu_online_map))
 51.1091 -		mb();
 51.1092 +		cpu_relax();
 51.1093  	return 0;
 51.1094  }
 51.1095  
 51.1096 -void __init smp_cpus_done(unsigned int max_cpus)
 51.1097 +/*
 51.1098 + * Finish the SMP boot.
 51.1099 + */
 51.1100 +void __cpuinit smp_cpus_done(unsigned int max_cpus)
 51.1101  {
 51.1102 +	zap_low_mappings();
 51.1103 +	smp_cleanup_boot();
 51.1104 +
 51.1105  #ifdef CONFIG_X86_IO_APIC
 51.1106  	setup_ioapic_dest();
 51.1107  #endif
 51.1108 -	zap_low_mappings();
 51.1109 +
 51.1110 +	detect_siblings();
 51.1111 +	time_init_gtod();
 51.1112 +
 51.1113 +	check_nmi_watchdog();
 51.1114  }
 51.1115 -
    52.1 --- a/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/traps.c	Mon Jul 11 09:29:56 2005 -0500
    52.2 +++ b/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/traps.c	Mon Jul 11 09:35:19 2005 -0500
    52.3 @@ -28,6 +28,7 @@
    52.4  #include <linux/interrupt.h>
    52.5  #include <linux/module.h>
    52.6  #include <linux/moduleparam.h>
    52.7 +#include <linux/nmi.h>
    52.8  
    52.9  #include <asm/system.h>
   52.10  #include <asm/uaccess.h>
   52.11 @@ -119,95 +120,106 @@ int printk_address(unsigned long address
   52.12  } 
   52.13  #endif
   52.14  
   52.15 -unsigned long *in_exception_stack(int cpu, unsigned long stack) 
   52.16 -{ 
   52.17 -	int k;
   52.18 +static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack,
   52.19 +					unsigned *usedp, const char **idp)
   52.20 +{
   52.21 +	static const char ids[N_EXCEPTION_STACKS][8] = {
   52.22 +		[DEBUG_STACK - 1] = "#DB",
   52.23 +		[NMI_STACK - 1] = "NMI",
   52.24 +		[DOUBLEFAULT_STACK - 1] = "#DF",
   52.25 +		[STACKFAULT_STACK - 1] = "#SS",
   52.26 +		[MCE_STACK - 1] = "#MC",
   52.27 +	};
   52.28 +	unsigned k;
   52.29 +
   52.30  	for (k = 0; k < N_EXCEPTION_STACKS; k++) {
   52.31 -		struct tss_struct *tss = &per_cpu(init_tss, cpu);
   52.32 -		unsigned long end = tss->ist[k] + EXCEPTION_STKSZ;
   52.33 +		unsigned long end;
   52.34  
   52.35 -		if (stack >= tss->ist[k]  && stack <= end)
   52.36 +		end = per_cpu(init_tss, cpu).ist[k];
   52.37 +		if (stack >= end)
   52.38 +			continue;
   52.39 +		if (stack >= end - EXCEPTION_STKSZ) {
   52.40 +			if (*usedp & (1U << k))
   52.41 +				break;
   52.42 +			*usedp |= 1U << k;
   52.43 +			*idp = ids[k];
   52.44  			return (unsigned long *)end;
   52.45 +		}
   52.46  	}
   52.47  	return NULL;
   52.48 -} 
   52.49 +}
   52.50  
   52.51  /*
   52.52   * x86-64 can have upto three kernel stacks: 
   52.53   * process stack
   52.54   * interrupt stack
   52.55 - * severe exception (double fault, nmi, stack fault) hardware stack
   52.56 - * Check and process them in order.
   52.57 + * severe exception (double fault, nmi, stack fault, debug, mce) hardware stack
   52.58   */
   52.59  
   52.60  void show_trace(unsigned long *stack)
   52.61  {
   52.62  	unsigned long addr;
   52.63 -	unsigned long *irqstack, *irqstack_end, *estack_end;
   52.64 -	const int cpu = safe_smp_processor_id();
   52.65 +	const unsigned cpu = safe_smp_processor_id();
   52.66 +	unsigned long *irqstack_end = (unsigned long *)cpu_pda[cpu].irqstackptr;
   52.67  	int i;
   52.68 +	unsigned used = 0;
   52.69  
   52.70  	printk("\nCall Trace:");
   52.71 -	i = 0; 
   52.72 -	
   52.73 -	estack_end = in_exception_stack(cpu, (unsigned long)stack); 
   52.74 -	if (estack_end) { 
   52.75 -		while (stack < estack_end) { 
   52.76 -			addr = *stack++; 
   52.77 -			if (__kernel_text_address(addr)) {
   52.78 -				i += printk_address(addr);
   52.79 -				i += printk(" "); 
   52.80 -				if (i > 50) {
   52.81 -					printk("\n"); 
   52.82 -					i = 0;
   52.83 -				}
   52.84 +
   52.85 +#define HANDLE_STACK(cond) \
   52.86 +	do while (cond) { \
   52.87 +		addr = *stack++; \
   52.88 +		if (kernel_text_address(addr)) { \
   52.89 +			/* \
   52.90 +			 * If the address is either in the text segment of the \
   52.91 +			 * kernel, or in the region which contains vmalloc'ed \
   52.92 +			 * memory, it *may* be the address of a calling \
   52.93 +			 * routine; if so, print it so that someone tracing \
   52.94 +			 * down the cause of the crash will be able to figure \
   52.95 +			 * out the call path that was taken. \
   52.96 +			 */ \
   52.97 +			i += printk_address(addr); \
   52.98 +			if (i > 50) { \
   52.99 +				printk("\n       "); \
  52.100 +				i = 0; \
  52.101 +			} \
  52.102 +			else \
  52.103 +				i += printk(" "); \
  52.104 +		} \
  52.105 +	} while (0)
  52.106 +
  52.107 +	for(i = 0; ; ) {
  52.108 +		const char *id;
  52.109 +		unsigned long *estack_end;
  52.110 +		estack_end = in_exception_stack(cpu, (unsigned long)stack,
  52.111 +						&used, &id);
  52.112 +
  52.113 +		if (estack_end) {
  52.114 +			i += printk(" <%s> ", id);
  52.115 +			HANDLE_STACK (stack < estack_end);
  52.116 +			i += printk(" <EOE> ");
  52.117 +			stack = (unsigned long *) estack_end[-2];
  52.118 +			continue;
  52.119 +		}
  52.120 +		if (irqstack_end) {
  52.121 +			unsigned long *irqstack;
  52.122 +			irqstack = irqstack_end -
  52.123 +				(IRQSTACKSIZE - 64) / sizeof(*irqstack);
  52.124 +
  52.125 +			if (stack >= irqstack && stack < irqstack_end) {
  52.126 +				i += printk(" <IRQ> ");
  52.127 +				HANDLE_STACK (stack < irqstack_end);
  52.128 +				stack = (unsigned long *) (irqstack_end[-1]);
  52.129 +				irqstack_end = NULL;
  52.130 +				i += printk(" <EOI> ");
  52.131 +				continue;
  52.132  			}
  52.133  		}
  52.134 -		i += printk(" <EOE> "); 
  52.135 -		i += 7;
  52.136 -		stack = (unsigned long *) estack_end[-2]; 
  52.137 -	}  
  52.138 -
  52.139 -	irqstack_end = (unsigned long *) (cpu_pda[cpu].irqstackptr);
  52.140 -	irqstack = (unsigned long *) (cpu_pda[cpu].irqstackptr - IRQSTACKSIZE + 64);
  52.141 +		break;
  52.142 +	}
  52.143  
  52.144 -	if (stack >= irqstack && stack < irqstack_end) {
  52.145 -		printk("<IRQ> ");  
  52.146 -		while (stack < irqstack_end) {
  52.147 -			addr = *stack++;
  52.148 -			/*
  52.149 -			 * If the address is either in the text segment of the
  52.150 -			 * kernel, or in the region which contains vmalloc'ed
  52.151 -			 * memory, it *may* be the address of a calling
  52.152 -			 * routine; if so, print it so that someone tracing
  52.153 -			 * down the cause of the crash will be able to figure
  52.154 -			 * out the call path that was taken.
  52.155 -			 */
  52.156 -			 if (__kernel_text_address(addr)) {
  52.157 -				 i += printk_address(addr);
  52.158 -				 i += printk(" "); 
  52.159 -				 if (i > 50) { 
  52.160 -					printk("\n       ");
  52.161 -					 i = 0;
  52.162 -				 } 
  52.163 -			}
  52.164 -		} 
  52.165 -		stack = (unsigned long *) (irqstack_end[-1]);
  52.166 -		printk(" <EOI> ");
  52.167 -		i += 7;
  52.168 -	} 
  52.169 -
  52.170 -	while (((long) stack & (THREAD_SIZE-1)) != 0) {
  52.171 -		addr = *stack++;
  52.172 -		if (__kernel_text_address(addr)) {
  52.173 -			i += printk_address(addr);
  52.174 -			i += printk(" "); 
  52.175 -			if (i > 50) { 
  52.176 -				printk("\n       ");
  52.177 -					 i = 0;
  52.178 -			} 
  52.179 -		}
  52.180 -	}
  52.181 +	HANDLE_STACK (((long) stack & (THREAD_SIZE-1)) != 0);
  52.182 +#undef HANDLE_STACK
  52.183  	printk("\n");
  52.184  }
  52.185  
  52.186 @@ -243,6 +255,7 @@ void show_stack(struct task_struct *tsk,
  52.187  		if (i && ((i % 4) == 0))
  52.188  			printk("\n       ");
  52.189  		printk("%016lx ", *stack++);
  52.190 +		touch_nmi_watchdog();
  52.191  	}
  52.192  	show_trace((unsigned long *)rsp);
  52.193  }
  52.194 @@ -319,10 +332,12 @@ void handle_BUG(struct pt_regs *regs)
  52.195  	printk(KERN_ALERT "Kernel BUG at %.50s:%d\n", f.filename, f.line);
  52.196  } 
  52.197  
  52.198 +#ifdef CONFIG_BUG
  52.199  void out_of_line_bug(void)
  52.200  { 
  52.201  	BUG(); 
  52.202  } 
  52.203 +#endif
  52.204  
  52.205  static DEFINE_SPINLOCK(die_lock);
  52.206  static int die_owner = -1;
  52.207 @@ -348,7 +363,6 @@ void oops_end(void)
  52.208  	die_owner = -1;
  52.209  	bust_spinlocks(0); 
  52.210  	spin_unlock(&die_lock); 
  52.211 -	local_irq_enable();	/* make sure back scroll still works */
  52.212  	if (panic_on_oops)
  52.213  		panic("Oops"); 
  52.214  } 
  52.215 @@ -489,24 +503,8 @@ DO_ERROR(10, SIGSEGV, "invalid TSS", inv
  52.216  DO_ERROR(11, SIGBUS,  "segment not present", segment_not_present)
  52.217  DO_ERROR_INFO(17, SIGBUS, "alignment check", alignment_check, BUS_ADRALN, 0)
  52.218  DO_ERROR(18, SIGSEGV, "reserved", reserved)
  52.219 -
  52.220 -#define DO_ERROR_STACK(trapnr, signr, str, name) \
  52.221 -asmlinkage void *do_##name(struct pt_regs * regs, long error_code) \
  52.222 -{ \
  52.223 -	struct pt_regs *pr = ((struct pt_regs *)(current->thread.rsp0))-1; \
  52.224 -	if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \
  52.225 -							== NOTIFY_STOP) \
  52.226 -		return regs; \
  52.227 -	if (regs->cs & 3) { \
  52.228 -		memcpy(pr, regs, sizeof(struct pt_regs)); \
  52.229 -		regs = pr; \
  52.230 -	} \
  52.231 -	do_trap(trapnr, signr, str, regs, error_code, NULL); \
  52.232 -	return regs;		\
  52.233 -}
  52.234 -
  52.235 -DO_ERROR_STACK(12, SIGBUS,  "stack segment", stack_segment)
  52.236 -DO_ERROR_STACK( 8, SIGSEGV, "double fault", double_fault)
  52.237 +DO_ERROR(12, SIGBUS,  "stack segment", stack_segment)
  52.238 +DO_ERROR( 8, SIGSEGV, "double fault", double_fault)
  52.239  
  52.240  asmlinkage void do_general_protection(struct pt_regs * regs, long error_code)
  52.241  {
  52.242 @@ -585,6 +583,8 @@ static void unknown_nmi_error(unsigned c
  52.243  	printk("Do you have a strange power saving mode enabled?\n");
  52.244  }
  52.245  
  52.246 +/* Runs on IST stack. This code must keep interrupts off all the time.
  52.247 +   Nested NMIs are prevented by the CPU. */
  52.248  asmlinkage void default_do_nmi(struct pt_regs *regs)
  52.249  {
  52.250  	unsigned char reason = 0;
  52.251 @@ -619,15 +619,6 @@ asmlinkage void default_do_nmi(struct pt
  52.252  		mem_parity_error(reason, regs);
  52.253  	if (reason & 0x40)
  52.254  		io_check_error(reason, regs);
  52.255 -
  52.256 -	/*
  52.257 -	 * Reassert NMI in case it became active meanwhile
  52.258 -	 * as it's edge-triggered.
  52.259 -	 */
  52.260 -	outb(0x8f, 0x70);
  52.261 -	inb(0x71);		/* dummy */
  52.262 -	outb(0x0f, 0x70);
  52.263 -	inb(0x71);		/* dummy */
  52.264  }
  52.265  
  52.266  asmlinkage void do_int3(struct pt_regs * regs, long error_code)
  52.267 @@ -639,20 +630,34 @@ asmlinkage void do_int3(struct pt_regs *
  52.268  	return;
  52.269  }
  52.270  
  52.271 +/* Help handler running on IST stack to switch back to user stack
  52.272 +   for scheduling or signal handling. The actual stack switch is done in
  52.273 +   entry.S */
  52.274 +asmlinkage struct pt_regs *sync_regs(struct pt_regs *eregs)
  52.275 +{
  52.276 +	struct pt_regs *regs = eregs;
  52.277 +	/* Did already sync */
  52.278 +	if (eregs == (struct pt_regs *)eregs->rsp)
  52.279 +		;
  52.280 +	/* Exception from user space */
  52.281 +	else if (eregs->cs & 3)
  52.282 +		regs = ((struct pt_regs *)current->thread.rsp0) - 1;
  52.283 +	/* Exception from kernel and interrupts are enabled. Move to
  52.284 + 	   kernel process stack. */
  52.285 +	else if (eregs->eflags & X86_EFLAGS_IF)
  52.286 +		regs = (struct pt_regs *)(eregs->rsp -= sizeof(struct pt_regs));
  52.287 +	if (eregs != regs)
  52.288 +		*regs = *eregs;
  52.289 +	return regs;
  52.290 +}
  52.291 +
  52.292  /* runs on IST stack. */
  52.293 -asmlinkage void *do_debug(struct pt_regs * regs, unsigned long error_code)
  52.294 +asmlinkage void do_debug(struct pt_regs * regs, unsigned long error_code)
  52.295  {
  52.296 -	struct pt_regs *pr;
  52.297  	unsigned long condition;
  52.298  	struct task_struct *tsk = current;
  52.299  	siginfo_t info;
  52.300  
  52.301 -	pr = (struct pt_regs *)(current->thread.rsp0)-1;
  52.302 -	if (regs->cs & 3) {
  52.303 -		memcpy(pr, regs, sizeof(struct pt_regs));
  52.304 -		regs = pr;
  52.305 -	}	
  52.306 -
  52.307  #ifdef CONFIG_CHECKING
  52.308         { 
  52.309  	       /* RED-PEN interaction with debugger - could destroy gs */
  52.310 @@ -669,9 +674,9 @@ asmlinkage void *do_debug(struct pt_regs
  52.311  	asm("movq %%db6,%0" : "=r" (condition));
  52.312  
  52.313  	if (notify_die(DIE_DEBUG, "debug", regs, condition, error_code,
  52.314 -						SIGTRAP) == NOTIFY_STOP) {
  52.315 -		return regs;
  52.316 -	}
  52.317 +						SIGTRAP) == NOTIFY_STOP)
  52.318 +		return;
  52.319 +
  52.320  	conditional_sti(regs);
  52.321  
  52.322  	/* Mask out spurious debug traps due to lazy DR7 setting */
  52.323 @@ -684,9 +689,7 @@ asmlinkage void *do_debug(struct pt_regs
  52.324  	tsk->thread.debugreg6 = condition;
  52.325  
  52.326  	/* Mask out spurious TF errors due to lazy TF clearing */
  52.327 -	if ((condition & DR_STEP) &&
  52.328 -	    (notify_die(DIE_DEBUGSTEP, "debugstep", regs, condition,
  52.329 -			1, SIGTRAP) != NOTIFY_STOP)) {
  52.330 +	if (condition & DR_STEP) {
  52.331  		/*
  52.332  		 * The TF error should be masked out only if the current
  52.333  		 * process is not traced and if the TRAP flag has been set
  52.334 @@ -698,8 +701,14 @@ asmlinkage void *do_debug(struct pt_regs
  52.335  		 */
  52.336                  if ((regs->cs & 3) == 0)
  52.337                         goto clear_TF_reenable;
  52.338 -		if ((tsk->ptrace & (PT_DTRACE|PT_PTRACED)) == PT_DTRACE)
  52.339 -			goto clear_TF;
  52.340 +		/*
  52.341 +		 * Was the TF flag set by a debugger? If so, clear it now,
  52.342 +		 * so that register information is correct.
  52.343 +		 */
  52.344 +		if (tsk->ptrace & PT_DTRACE) {
  52.345 +			regs->eflags &= ~TF_MASK;
  52.346 +			tsk->ptrace &= ~PT_DTRACE;
  52.347 +		}
  52.348  	}
  52.349  
  52.350  	/* Ok, finally something we can handle */
  52.351 @@ -715,18 +724,11 @@ asmlinkage void *do_debug(struct pt_regs
  52.352  	force_sig_info(SIGTRAP, &info, tsk);	
  52.353  clear_dr7:
  52.354  	asm volatile("movq %0,%%db7"::"r"(0UL));
  52.355 -	notify_die(DIE_DEBUG, "debug", regs, condition, 1, SIGTRAP);
  52.356 -	return regs;
  52.357 +	return;
  52.358  
  52.359  clear_TF_reenable:
  52.360  	set_tsk_thread_flag(tsk, TIF_SINGLESTEP);
  52.361 -
  52.362 -clear_TF:
  52.363 -	/* RED-PEN could cause spurious errors */
  52.364 -	if (notify_die(DIE_DEBUG, "debug2", regs, condition, 1, SIGTRAP) 
  52.365 -								!= NOTIFY_STOP)
  52.366  	regs->eflags &= ~TF_MASK;
  52.367 -	return regs;	
  52.368  }
  52.369  
  52.370  static int kernel_math_error(struct pt_regs *regs, char *str)
  52.371 @@ -738,14 +740,8 @@ static int kernel_math_error(struct pt_r
  52.372  		return 1;
  52.373  	}
  52.374  	notify_die(DIE_GPF, str, regs, 0, 16, SIGFPE);
  52.375 -#if 0
  52.376 -	/* This should be a die, but warn only for now */
  52.377 +	/* Illegal floating point operation in the kernel */
  52.378  	die(str, regs, 0);
  52.379 -#else
  52.380 -	printk(KERN_DEBUG "%s: %s at ", current->comm, str);
  52.381 -	printk_address(regs->rip);
  52.382 -	printk("\n");
  52.383 -#endif
  52.384  	return 0;
  52.385  }
  52.386  
  52.387 @@ -829,7 +825,7 @@ asmlinkage void do_simd_coprocessor_erro
  52.388  
  52.389  	conditional_sti(regs);
  52.390  	if ((regs->cs & 3) == 0 &&
  52.391 -        	kernel_math_error(regs, "simd math error"))
  52.392 +        	kernel_math_error(regs, "kernel simd math error"))
  52.393  		return;
  52.394  
  52.395  	/*
    53.1 --- a/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/vsyscall.c	Mon Jul 11 09:29:56 2005 -0500
    53.2 +++ b/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/vsyscall.c	Mon Jul 11 09:35:19 2005 -0500
    53.3 @@ -9,30 +9,14 @@
    53.4   *  a different vsyscall implementation for Linux/IA32 and for the name.
    53.5   *
    53.6   *  vsyscall 1 is located at -10Mbyte, vsyscall 2 is located
    53.7 - *  at virtual address -10Mbyte+1024bytes etc... There are at max 8192
    53.8 + *  at virtual address -10Mbyte+1024bytes etc... There are at max 4
    53.9   *  vsyscalls. One vsyscall can reserve more than 1 slot to avoid
   53.10 - *  jumping out of line if necessary.
   53.11 - *
   53.12 - *  Note: the concept clashes with user mode linux. If you use UML just
   53.13 - *  set the kernel.vsyscall sysctl to 0.
   53.14 - */
   53.15 -
   53.16 -/*
   53.17 - * TODO 2001-03-20:
   53.18 + *  jumping out of line if necessary. We cannot add more with this
   53.19 + *  mechanism because older kernels won't return -ENOSYS.
   53.20 + *  If we want more than four we need a vDSO.
   53.21   *
   53.22 - * 1) make page fault handler detect faults on page1-page-last of the vsyscall
   53.23 - *    virtual space, and make it increase %rip and write -ENOSYS in %rax (so
   53.24 - *    we'll be able to upgrade to a new glibc without upgrading kernel after
   53.25 - *    we add more vsyscalls.
   53.26 - * 2) Possibly we need a fixmap table for the vsyscalls too if we want
   53.27 - *    to avoid SIGSEGV and we want to return -EFAULT from the vsyscalls as well.
   53.28 - *    Can we segfault inside a "syscall"? We can fix this anytime and those fixes
   53.29 - *    won't be visible for userspace. Not fixing this is a noop for correct programs,
   53.30 - *    broken programs will segfault and there's no security risk until we choose to
   53.31 - *    fix it.
   53.32 - *
   53.33 - * These are not urgent things that we need to address only before shipping the first
   53.34 - * production binary kernels.
   53.35 + *  Note: the concept clashes with user mode linux. If you use UML and
   53.36 + *  want per guest time just set the kernel.vsyscall64 sysctl to 0.
   53.37   */
   53.38  
   53.39  #include <linux/time.h>
   53.40 @@ -41,6 +25,7 @@
   53.41  #include <linux/timer.h>
   53.42  #include <linux/seqlock.h>
   53.43  #include <linux/jiffies.h>
   53.44 +#include <linux/sysctl.h>
   53.45  
   53.46  #include <asm/vsyscall.h>
   53.47  #include <asm/pgtable.h>
   53.48 @@ -62,8 +47,7 @@ static force_inline void timeval_normali
   53.49  	time_t __sec;
   53.50  
   53.51  	__sec = tv->tv_usec / 1000000;
   53.52 -	if (__sec)
   53.53 -	{
   53.54 +	if (__sec) {
   53.55  		tv->tv_usec %= 1000000;
   53.56  		tv->tv_sec += __sec;
   53.57  	}
   53.58 @@ -81,13 +65,14 @@ static force_inline void do_vgettimeofda
   53.59  		usec = (__xtime.tv_nsec / 1000) +
   53.60  			(__jiffies - __wall_jiffies) * (1000000 / HZ);
   53.61  
   53.62 -		if (__vxtime.mode == VXTIME_TSC) {
   53.63 +		if (__vxtime.mode != VXTIME_HPET) {
   53.64  			sync_core();
   53.65  			rdtscll(t);
   53.66 -			if (t < __vxtime.last_tsc) t = __vxtime.last_tsc;
   53.67 +			if (t < __vxtime.last_tsc)
   53.68 +				t = __vxtime.last_tsc;
   53.69  			usec += ((t - __vxtime.last_tsc) *
   53.70  				 __vxtime.tsc_quot) >> 32;
   53.71 -			/* See comment in x86_64 do_gettimeofday. */ 
   53.72 +			/* See comment in x86_64 do_gettimeofday. */
   53.73  		} else {
   53.74  			usec += ((readl((void *)fix_to_virt(VSYSCALL_HPET) + 0xf0) -
   53.75  				  __vxtime.last) * __vxtime.quot) >> 32;
   53.76 @@ -101,14 +86,13 @@ static force_inline void do_vgettimeofda
   53.77  /* RED-PEN may want to readd seq locking, but then the variable should be write-once. */
   53.78  static force_inline void do_get_tz(struct timezone * tz)
   53.79  {
   53.80 -		*tz = __sys_tz;
   53.81 +	*tz = __sys_tz;
   53.82  }
   53.83  
   53.84 -
   53.85  static force_inline int gettimeofday(struct timeval *tv, struct timezone *tz)
   53.86  {
   53.87  	int ret;
   53.88 -	asm volatile("syscall" 
   53.89 +	asm volatile("vsysc2: syscall"
   53.90  		: "=a" (ret)
   53.91  		: "0" (__NR_gettimeofday),"D" (tv),"S" (tz) : __syscall_clobber );
   53.92  	return ret;
   53.93 @@ -117,7 +101,7 @@ static force_inline int gettimeofday(str
   53.94  static force_inline long time_syscall(long *t)
   53.95  {
   53.96  	long secs;
   53.97 -	asm volatile("syscall" 
   53.98 +	asm volatile("vsysc1: syscall"
   53.99  		: "=a" (secs)
  53.100  		: "0" (__NR_time),"D" (t) : __syscall_clobber);
  53.101  	return secs;
  53.102 @@ -126,7 +110,7 @@ static force_inline long time_syscall(lo
  53.103  static int __vsyscall(0) vgettimeofday(struct timeval * tv, struct timezone * tz)
  53.104  {
  53.105  	if (unlikely(!__sysctl_vsyscall))
  53.106 -	return gettimeofday(tv,tz); 
  53.107 +		return gettimeofday(tv,tz);
  53.108  	if (tv)
  53.109  		do_vgettimeofday(tv);
  53.110  	if (tz)
  53.111 @@ -153,9 +137,71 @@ static long __vsyscall(2) venosys_0(void
  53.112  static long __vsyscall(3) venosys_1(void)
  53.113  {
  53.114  	return -ENOSYS;
  53.115 +}
  53.116  
  53.117 +#ifdef CONFIG_SYSCTL
  53.118 +
  53.119 +#define SYSCALL 0x050f
  53.120 +#define NOP2    0x9090
  53.121 +
  53.122 +/*
  53.123 + * NOP out syscall in vsyscall page when not needed.
  53.124 + */
  53.125 +static int vsyscall_sysctl_change(ctl_table *ctl, int write, struct file * filp,
  53.126 +                        void __user *buffer, size_t *lenp, loff_t *ppos)
  53.127 +{
  53.128 +	extern u16 vsysc1, vsysc2;
  53.129 +	u16 *map1, *map2;
  53.130 +	int ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
  53.131 +	if (!write)
  53.132 +		return ret;
  53.133 +	/* gcc has some trouble with __va(__pa()), so just do it this
  53.134 +	   way. */
  53.135 +	map1 = ioremap(__pa_symbol(&vsysc1), 2);
  53.136 +	if (!map1)
  53.137 +		return -ENOMEM;
  53.138 +	map2 = ioremap(__pa_symbol(&vsysc2), 2);
  53.139 +	if (!map2) {
  53.140 +		ret = -ENOMEM;
  53.141 +		goto out;
  53.142 +	}
  53.143 +	if (!sysctl_vsyscall) {
  53.144 +		*map1 = SYSCALL;
  53.145 +		*map2 = SYSCALL;
  53.146 +	} else {
  53.147 +		*map1 = NOP2;
  53.148 +		*map2 = NOP2;
  53.149 +	}
  53.150 +	iounmap(map2);
  53.151 +out:
  53.152 +	iounmap(map1);
  53.153 +	return ret;
  53.154  }
  53.155  
  53.156 +static int vsyscall_sysctl_nostrat(ctl_table *t, int __user *name, int nlen,
  53.157 +				void __user *oldval, size_t __user *oldlenp,
  53.158 +				void __user *newval, size_t newlen,
  53.159 +				void **context)
  53.160 +{
  53.161 +	return -ENOSYS;
  53.162 +}
  53.163 +
  53.164 +static ctl_table kernel_table2[] = {
  53.165 +	{ .ctl_name = 99, .procname = "vsyscall64",
  53.166 +	  .data = &sysctl_vsyscall, .maxlen = sizeof(int), .mode = 0644,
  53.167 +	  .strategy = vsyscall_sysctl_nostrat,
  53.168 +	  .proc_handler = vsyscall_sysctl_change },
  53.169 +	{ 0, }
  53.170 +};
  53.171 +
  53.172 +static ctl_table kernel_root_table2[] = {
  53.173 +	{ .ctl_name = CTL_KERN, .procname = "kernel", .mode = 0555,
  53.174 +	  .child = kernel_table2 },
  53.175 +	{ 0 },
  53.176 +};
  53.177 +
  53.178 +#endif
  53.179 +
  53.180  static void __init map_vsyscall(void)
  53.181  {
  53.182  	extern char __vsyscall_0;
  53.183 @@ -176,14 +222,15 @@ static void __init map_vsyscall_user(voi
  53.184  
  53.185  static int __init vsyscall_init(void)
  53.186  {
  53.187 -        BUG_ON(((unsigned long) &vgettimeofday != 
  53.188 -		      VSYSCALL_ADDR(__NR_vgettimeofday)));
  53.189 +	BUG_ON(((unsigned long) &vgettimeofday !=
  53.190 +			VSYSCALL_ADDR(__NR_vgettimeofday)));
  53.191  	BUG_ON((unsigned long) &vtime != VSYSCALL_ADDR(__NR_vtime));
  53.192  	BUG_ON((VSYSCALL_ADDR(0) != __fix_to_virt(VSYSCALL_FIRST_PAGE)));
  53.193  	map_vsyscall();
  53.194          map_vsyscall_user();    /* establish tranlation for user address space */
  53.195 -	sysctl_vsyscall = 0;    /* TBD */
  53.196 -
  53.197 +#ifdef CONFIG_SYSCTL
  53.198 +	register_sysctl_table(kernel_root_table2, 0);
  53.199 +#endif
  53.200  	return 0;
  53.201  }
  53.202  
    54.1 --- a/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/x8664_ksyms.c	Mon Jul 11 09:29:56 2005 -0500
    54.2 +++ b/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/x8664_ksyms.c	Mon Jul 11 09:35:19 2005 -0500
    54.3 @@ -105,7 +105,6 @@ EXPORT_SYMBOL(pci_free_consistent);
    54.4  #endif
    54.5  
    54.6  #ifdef CONFIG_PCI
    54.7 -EXPORT_SYMBOL(pcibios_penalize_isa_irq);
    54.8  EXPORT_SYMBOL(pci_mem_start);
    54.9  #endif
   54.10  
   54.11 @@ -142,35 +141,23 @@ EXPORT_SYMBOL(rtc_lock);
   54.12  #undef memmove
   54.13  #undef memchr
   54.14  #undef strlen
   54.15 -#undef strcpy
   54.16  #undef strncmp
   54.17  #undef strncpy
   54.18  #undef strchr	
   54.19 -#undef strcmp 
   54.20 -#undef strcpy 
   54.21 -#undef strcat
   54.22 -#undef memcmp
   54.23  
   54.24  extern void * memset(void *,int,__kernel_size_t);
   54.25  extern size_t strlen(const char *);
   54.26  extern void * memmove(void * dest,const void *src,size_t count);
   54.27 -extern char * strcpy(char * dest,const char *src);
   54.28 -extern int strcmp(const char * cs,const char * ct);
   54.29  extern void *memchr(const void *s, int c, size_t n);
   54.30  extern void * memcpy(void *,const void *,__kernel_size_t);
   54.31  extern void * __memcpy(void *,const void *,__kernel_size_t);
   54.32 -extern char * strcat(char *, const char *);
   54.33 -extern int memcmp(const void * cs,const void * ct,size_t count);
   54.34  
   54.35  EXPORT_SYMBOL(memset);
   54.36  EXPORT_SYMBOL(strlen);
   54.37  EXPORT_SYMBOL(memmove);
   54.38 -EXPORT_SYMBOL(strcpy);
   54.39  EXPORT_SYMBOL(strncmp);
   54.40  EXPORT_SYMBOL(strncpy);
   54.41  EXPORT_SYMBOL(strchr);
   54.42 -EXPORT_SYMBOL(strcmp);
   54.43 -EXPORT_SYMBOL(strcat);
   54.44  EXPORT_SYMBOL(strncat);
   54.45  EXPORT_SYMBOL(memchr);
   54.46  EXPORT_SYMBOL(strrchr);
   54.47 @@ -178,7 +165,6 @@ EXPORT_SYMBOL(strnlen);
   54.48  EXPORT_SYMBOL(memscan);
   54.49  EXPORT_SYMBOL(memcpy);
   54.50  EXPORT_SYMBOL(__memcpy);
   54.51 -EXPORT_SYMBOL(memcmp);
   54.52  
   54.53  #ifdef CONFIG_RWSEM_XCHGADD_ALGORITHM
   54.54  /* prototypes are wrong, these are assembly with custom calling functions */
   54.55 @@ -209,8 +195,9 @@ EXPORT_SYMBOL(smp_num_siblings);
   54.56  extern void do_softirq_thunk(void);
   54.57  EXPORT_SYMBOL(do_softirq_thunk);
   54.58  
   54.59 -void out_of_line_bug(void);
   54.60 +#ifdef CONFIG_BUG
   54.61  EXPORT_SYMBOL(out_of_line_bug);
   54.62 +#endif
   54.63  
   54.64  EXPORT_SYMBOL(init_level4_pgt);
   54.65  
   54.66 @@ -219,7 +206,6 @@ EXPORT_SYMBOL(__supported_pte_mask);
   54.67  
   54.68  #ifdef CONFIG_SMP
   54.69  EXPORT_SYMBOL(flush_tlb_page);
   54.70 -EXPORT_SYMBOL_GPL(flush_tlb_all);
   54.71  #endif
   54.72  
   54.73  EXPORT_SYMBOL(cpu_khz);
    55.1 --- a/linux-2.6-xen-sparse/arch/xen/x86_64/mm/fault.c	Mon Jul 11 09:29:56 2005 -0500
    55.2 +++ b/linux-2.6-xen-sparse/arch/xen/x86_64/mm/fault.c	Mon Jul 11 09:35:19 2005 -0500
    55.3 @@ -65,21 +65,19 @@ void bust_spinlocks(int yes)
    55.4  static noinline int is_prefetch(struct pt_regs *regs, unsigned long addr,
    55.5  				unsigned long error_code)
    55.6  { 
    55.7 -	unsigned char *instr = (unsigned char *)(regs->rip);
    55.8 +	unsigned char *instr;
    55.9  	int scan_more = 1;
   55.10  	int prefetch = 0; 
   55.11 -	unsigned char *max_instr = instr + 15;
   55.12 +	unsigned char *max_instr;
   55.13  
   55.14  	/* If it was a exec fault ignore */
   55.15  	if (error_code & (1<<4))
   55.16  		return 0;
   55.17  	
   55.18 -	/* Code segments in LDT could have a non zero base. Don't check
   55.19 -	   when that's possible */
   55.20 -	if (regs->cs & (1<<2))
   55.21 -		return 0;
   55.22 +	instr = (unsigned char *)convert_rip_to_linear(current, regs);
   55.23 +	max_instr = instr + 15;
   55.24  
   55.25 -	if ((regs->cs & 3) != 0 && regs->rip >= TASK_SIZE)
   55.26 +	if ((regs->cs & 3) != 0 && instr >= (unsigned char *)TASK_SIZE)
   55.27  		return 0;
   55.28  
   55.29  	while (scan_more && instr < max_instr) { 
   55.30 @@ -238,6 +236,8 @@ static noinline void pgtable_bad(unsigne
   55.31  
   55.32  /*
   55.33   * Handle a fault on the vmalloc or module mapping area
   55.34 + *
   55.35 + * This assumes no large pages in there.
   55.36   */
   55.37  static int vmalloc_fault(unsigned long address)
   55.38  {
   55.39 @@ -276,7 +276,10 @@ static int vmalloc_fault(unsigned long a
   55.40  	if (!pte_present(*pte_ref))
   55.41  		return -1;
   55.42  	pte = pte_offset_kernel(pmd, address);
   55.43 -	if (!pte_present(*pte) || pte_page(*pte) != pte_page(*pte_ref))
   55.44 +	/* Don't use pte_page here, because the mappings can point
   55.45 +	   outside mem_map, and the NUMA hash lookup cannot handle
   55.46 +	   that. */
   55.47 +	if (!pte_present(*pte) || pte_pfn(*pte) != pte_pfn(*pte_ref))
   55.48  		BUG();
   55.49  	__flush_tlb_all();
   55.50  	return 0;
   55.51 @@ -361,7 +364,9 @@ asmlinkage void do_page_fault(struct pt_
   55.52  	 * protection error (error_code & 1) == 0.
   55.53  	 */
   55.54  	if (unlikely(address >= TASK_SIZE)) {
   55.55 -		if (!(error_code & 5)) {
   55.56 +		if (!(error_code & 5) &&
   55.57 +		      ((address >= VMALLOC_START && address < VMALLOC_END) ||
   55.58 +		       (address >= MODULES_VADDR && address < MODULES_END))) {
   55.59  			if (vmalloc_fault(address) < 0)
   55.60  				goto bad_area_nosemaphore;
   55.61  			return;
   55.62 @@ -471,17 +476,6 @@ bad_area:
   55.63  	up_read(&mm->mmap_sem);
   55.64  
   55.65  bad_area_nosemaphore:
   55.66 -
   55.67 -#ifdef CONFIG_IA32_EMULATION
   55.68 -	/* 32bit vsyscall. map on demand. */
   55.69 -	if (test_thread_flag(TIF_IA32) &&
   55.70 -	    address >= VSYSCALL32_BASE && address < VSYSCALL32_END) {
   55.71 -		if (map_syscall32(mm, address) < 0)
   55.72 -			goto out_of_memory2;
   55.73 -		return;
   55.74 -	}
   55.75 -#endif
   55.76 -
   55.77  	/* User mode accesses just cause a SIGSEGV */
   55.78  	if (error_code & 4) {
   55.79  		if (is_prefetch(regs, address, error_code))
   55.80 @@ -563,7 +557,6 @@ no_context:
   55.81   */
   55.82  out_of_memory:
   55.83  	up_read(&mm->mmap_sem);
   55.84 -out_of_memory2:
   55.85  	if (current->pid == 1) { 
   55.86  		yield();
   55.87  		goto again;
    56.1 --- a/linux-2.6-xen-sparse/arch/xen/x86_64/mm/init.c	Mon Jul 11 09:29:56 2005 -0500
    56.2 +++ b/linux-2.6-xen-sparse/arch/xen/x86_64/mm/init.c	Mon Jul 11 09:35:19 2005 -0500
    56.3 @@ -919,9 +919,9 @@ static __init int x8664_sysctl_init(void
    56.4  __initcall(x8664_sysctl_init);
    56.5  #endif
    56.6  
    56.7 -/* Pseudo VMAs to allow ptrace access for the vsyscall pages.  x86-64 has two
    56.8 -   different ones: one for 32bit and one for 64bit. Use the appropiate
    56.9 -   for the target task. */
   56.10 +/* A pseudo VMAs to allow ptrace access for the vsyscall page.   This only
   56.11 +   covers the 64bit vsyscall page now. 32bit has a real VMA now and does
   56.12 +   not need special handling anymore. */
   56.13  
   56.14  static struct vm_area_struct gate_vma = {
   56.15  	.vm_start = VSYSCALL_START,
   56.16 @@ -929,22 +929,11 @@ static struct vm_area_struct gate_vma = 
   56.17  	.vm_page_prot = PAGE_READONLY
   56.18  };
   56.19  
   56.20 -static struct vm_area_struct gate32_vma = {
   56.21 -	.vm_start = VSYSCALL32_BASE,
   56.22 -	.vm_end = VSYSCALL32_END,
   56.23 -	.vm_page_prot = PAGE_READONLY
   56.24 -};
   56.25 -
   56.26  struct vm_area_struct *get_gate_vma(struct task_struct *tsk)
   56.27  {
   56.28  #ifdef CONFIG_IA32_EMULATION
   56.29 -	if (test_tsk_thread_flag(tsk, TIF_IA32)) {
   56.30 -		/* lookup code assumes the pages are present. set them up
   56.31 -		   now */
   56.32 -		if (__map_syscall32(tsk->mm, VSYSCALL32_BASE) < 0)
   56.33 -			return NULL;
   56.34 -		return &gate32_vma;
   56.35 -	}
   56.36 +	if (test_tsk_thread_flag(tsk, TIF_IA32))
   56.37 +		return NULL;
   56.38  #endif
   56.39  	return &gate_vma;
   56.40  }
   56.41 @@ -952,6 +941,8 @@ struct vm_area_struct *get_gate_vma(stru
   56.42  int in_gate_area(struct task_struct *task, unsigned long addr)
   56.43  {
   56.44  	struct vm_area_struct *vma = get_gate_vma(task);
   56.45 +	if (!vma)
   56.46 +		return 0;
   56.47  	return (addr >= vma->vm_start) && (addr < vma->vm_end);
   56.48  }
   56.49  
   56.50 @@ -961,6 +952,5 @@ int in_gate_area(struct task_struct *tas
   56.51   */
   56.52  int in_gate_area_no_task(unsigned long addr)
   56.53  {
   56.54 -	return (((addr >= VSYSCALL_START) && (addr < VSYSCALL_END)) ||
   56.55 -		((addr >= VSYSCALL32_BASE) && (addr < VSYSCALL32_END)));
   56.56 +	return (addr >= VSYSCALL_START) && (addr < VSYSCALL_END);
   56.57  }
    57.1 --- a/linux-2.6-xen-sparse/drivers/Makefile	Mon Jul 11 09:29:56 2005 -0500
    57.2 +++ b/linux-2.6-xen-sparse/drivers/Makefile	Mon Jul 11 09:35:19 2005 -0500
    57.3 @@ -48,8 +48,8 @@ obj-$(CONFIG_PARIDE) 		+= block/paride/
    57.4  obj-$(CONFIG_TC)		+= tc/
    57.5  obj-$(CONFIG_USB)		+= usb/
    57.6  obj-$(CONFIG_USB_GADGET)	+= usb/gadget/
    57.7 +obj-$(CONFIG_GAMEPORT)		+= input/gameport/
    57.8  obj-$(CONFIG_INPUT)		+= input/
    57.9 -obj-$(CONFIG_GAMEPORT)		+= input/gameport/
   57.10  obj-$(CONFIG_I2O)		+= message/
   57.11  obj-$(CONFIG_I2C)		+= i2c/
   57.12  obj-$(CONFIG_W1)		+= w1/
   57.13 @@ -62,5 +62,6 @@ obj-$(CONFIG_EISA)		+= eisa/
   57.14  obj-$(CONFIG_CPU_FREQ)		+= cpufreq/
   57.15  obj-$(CONFIG_MMC)		+= mmc/
   57.16  obj-$(CONFIG_INFINIBAND)	+= infiniband/
   57.17 +obj-$(CONFIG_BLK_DEV_SGIIOC4)	+= sn/
   57.18  obj-y				+= firmware/
   57.19  obj-$(CONFIG_CRYPTO)		+= crypto/
    58.1 --- a/linux-2.6-xen-sparse/drivers/char/mem.c	Mon Jul 11 09:29:56 2005 -0500
    58.2 +++ b/linux-2.6-xen-sparse/drivers/char/mem.c	Mon Jul 11 09:35:19 2005 -0500
    58.3 @@ -23,6 +23,7 @@
    58.4  #include <linux/devfs_fs_kernel.h>
    58.5  #include <linux/ptrace.h>
    58.6  #include <linux/device.h>
    58.7 +#include <linux/backing-dev.h>
    58.8  
    58.9  #include <asm/uaccess.h>
   58.10  #include <asm/io.h>
   58.11 @@ -76,14 +77,6 @@ static inline int uncached_access(struct
   58.12  	 * On ia64, we ignore O_SYNC because we cannot tolerate memory attribute aliases.
   58.13  	 */
   58.14  	return !(efi_mem_attributes(addr) & EFI_MEMORY_WB);
   58.15 -#elif defined(CONFIG_PPC64)
   58.16 -	/* On PPC64, we always do non-cacheable access to the IO hole and
   58.17 -	 * cacheable elsewhere. Cache paradox can checkstop the CPU and
   58.18 -	 * the high_memory heuristic below is wrong on machines with memory
   58.19 -	 * above the IO hole... Ah, and of course, XFree86 doesn't pass
   58.20 -	 * O_SYNC when mapping us to tap IO space. Surprised ?
   58.21 -	 */
   58.22 -	return !page_is_ram(addr >> PAGE_SHIFT);
   58.23  #else
   58.24  	/*
   58.25  	 * Accessing memory above the top the kernel knows about or through a file pointer
   58.26 @@ -111,38 +104,6 @@ static inline int valid_phys_addr_range(
   58.27  }
   58.28  #endif
   58.29  
   58.30 -static ssize_t do_write_mem(void *p, unsigned long realp,
   58.31 -			    const char __user * buf, size_t count, loff_t *ppos)
   58.32 -{
   58.33 -	ssize_t written;
   58.34 -	unsigned long copied;
   58.35 -
   58.36 -	written = 0;
   58.37 -#if defined(__sparc__) || (defined(__mc68000__) && defined(CONFIG_MMU))
   58.38 -	/* we don't have page 0 mapped on sparc and m68k.. */
   58.39 -	if (realp < PAGE_SIZE) {
   58.40 -		unsigned long sz = PAGE_SIZE-realp;
   58.41 -		if (sz > count) sz = count; 
   58.42 -		/* Hmm. Do something? */
   58.43 -		buf+=sz;
   58.44 -		p+=sz;
   58.45 -		count-=sz;
   58.46 -		written+=sz;
   58.47 -	}
   58.48 -#endif
   58.49 -	copied = copy_from_user(p, buf, count);
   58.50 -	if (copied) {
   58.51 -		ssize_t ret = written + (count - copied);
   58.52 -
   58.53 -		if (ret)
   58.54 -			return ret;
   58.55 -		return -EFAULT;
   58.56 -	}
   58.57 -	written += count;
   58.58 -	*ppos += written;
   58.59 -	return written;
   58.60 -}
   58.61 -
   58.62  #ifndef ARCH_HAS_DEV_MEM
   58.63  /*
   58.64   * This funcion reads the *physical* memory. The f_pos points directly to the 
   58.65 @@ -152,15 +113,16 @@ static ssize_t read_mem(struct file * fi
   58.66  			size_t count, loff_t *ppos)
   58.67  {
   58.68  	unsigned long p = *ppos;
   58.69 -	ssize_t read;
   58.70 +	ssize_t read, sz;
   58.71 +	char *ptr;
   58.72  
   58.73  	if (!valid_phys_addr_range(p, &count))
   58.74  		return -EFAULT;
   58.75  	read = 0;
   58.76 -#if defined(__sparc__) || (defined(__mc68000__) && defined(CONFIG_MMU))
   58.77 +#ifdef __ARCH_HAS_NO_PAGE_ZERO_MAPPED
   58.78  	/* we don't have page 0 mapped on sparc and m68k.. */
   58.79  	if (p < PAGE_SIZE) {
   58.80 -		unsigned long sz = PAGE_SIZE-p;
   58.81 +		sz = PAGE_SIZE - p;
   58.82  		if (sz > count) 
   58.83  			sz = count; 
   58.84  		if (sz > 0) {
   58.85 @@ -173,9 +135,33 @@ static ssize_t read_mem(struct file * fi
   58.86  		}
   58.87  	}
   58.88  #endif
   58.89 -	if (copy_to_user(buf, __va(p), count))
   58.90 -		return -EFAULT;
   58.91 -	read += count;
   58.92 +
   58.93 +	while (count > 0) {
   58.94 +		/*
   58.95 +		 * Handle first page in case it's not aligned
   58.96 +		 */
   58.97 +		if (-p & (PAGE_SIZE - 1))
   58.98 +			sz = -p & (PAGE_SIZE - 1);
   58.99 +		else
  58.100 +			sz = PAGE_SIZE;
  58.101 +
  58.102 +		sz = min_t(unsigned long, sz, count);
  58.103 +
  58.104 +		/*
  58.105 +		 * On ia64 if a page has been mapped somewhere as
  58.106 +		 * uncached, then it must also be accessed uncached
  58.107 +		 * by the kernel or data corruption may occur
  58.108 +		 */
  58.109 +		ptr = xlate_dev_mem_ptr(p);
  58.110 +
  58.111 +		if (copy_to_user(buf, ptr, sz))
  58.112 +			return -EFAULT;
  58.113 +		buf += sz;
  58.114 +		p += sz;
  58.115 +		count -= sz;
  58.116 +		read += sz;
  58.117 +	}
  58.118 +
  58.119  	*ppos += read;
  58.120  	return read;
  58.121  }
  58.122 @@ -184,16 +170,76 @@ static ssize_t write_mem(struct file * f
  58.123  			 size_t count, loff_t *ppos)
  58.124  {
  58.125  	unsigned long p = *ppos;
  58.126 +	ssize_t written, sz;
  58.127 +	unsigned long copied;
  58.128 +	void *ptr;
  58.129  
  58.130  	if (!valid_phys_addr_range(p, &count))
  58.131  		return -EFAULT;
  58.132 -	return do_write_mem(__va(p), p, buf, count, ppos);
  58.133 +
  58.134 +	written = 0;
  58.135 +
  58.136 +#ifdef __ARCH_HAS_NO_PAGE_ZERO_MAPPED
  58.137 +	/* we don't have page 0 mapped on sparc and m68k.. */
  58.138 +	if (p < PAGE_SIZE) {
  58.139 +		unsigned long sz = PAGE_SIZE - p;
  58.140 +		if (sz > count)
  58.141 +			sz = count;
  58.142 +		/* Hmm. Do something? */
  58.143 +		buf += sz;
  58.144 +		p += sz;
  58.145 +		count -= sz;
  58.146 +		written += sz;
  58.147 +	}
  58.148 +#endif
  58.149 +
  58.150 +	while (count > 0) {
  58.151 +		/*
  58.152 +		 * Handle first page in case it's not aligned
  58.153 +		 */
  58.154 +		if (-p & (PAGE_SIZE - 1))
  58.155 +			sz = -p & (PAGE_SIZE - 1);
  58.156 +		else
  58.157 +			sz = PAGE_SIZE;
  58.158 +
  58.159 +		sz = min_t(unsigned long, sz, count);
  58.160 +
  58.161 +		/*
  58.162 +		 * On ia64 if a page has been mapped somewhere as
  58.163 +		 * uncached, then it must also be accessed uncached
  58.164 +		 * by the kernel or data corruption may occur
  58.165 +		 */
  58.166 +		ptr = xlate_dev_mem_ptr(p);
  58.167 +
  58.168 +		copied = copy_from_user(ptr, buf, sz);
  58.169 +		if (copied) {
  58.170 +			ssize_t ret;
  58.171 +
  58.172 +			ret = written + (sz - copied);
  58.173 +			if (ret)
  58.174 +				return ret;
  58.175 +			return -EFAULT;
  58.176 +		}
  58.177 +		buf += sz;
  58.178 +		p += sz;
  58.179 +		count -= sz;
  58.180 +		written += sz;
  58.181 +	}
  58.182 +
  58.183 +	*ppos += written;
  58.184 +	return written;
  58.185  }
  58.186  #endif
  58.187  
  58.188  static int mmap_kmem(struct file * file, struct vm_area_struct * vma)
  58.189  {
  58.190 -#ifdef pgprot_noncached
  58.191 +#if defined(__HAVE_PHYS_MEM_ACCESS_PROT)
  58.192 +	unsigned long offset = vma->vm_pgoff << PAGE_SHIFT;
  58.193 +
  58.194 +	vma->vm_page_prot = phys_mem_access_prot(file, offset,
  58.195 +						 vma->vm_end - vma->vm_start,
  58.196 +						 vma->vm_page_prot);
  58.197 +#elif defined(pgprot_noncached)
  58.198  	unsigned long offset = vma->vm_pgoff << PAGE_SHIFT;
  58.199  	int uncached;
  58.200  
  58.201 @@ -212,6 +258,25 @@ static int mmap_kmem(struct file * file,
  58.202  	return 0;
  58.203  }
  58.204  
  58.205 +#if 0
  58.206 +static int mmap_kmem(struct file * file, struct vm_area_struct * vma)
  58.207 +{
  58.208 +        unsigned long long val;
  58.209 +	/*
  58.210 +	 * RED-PEN: on some architectures there is more mapped memory
  58.211 +	 * than available in mem_map which pfn_valid checks
  58.212 +	 * for. Perhaps should add a new macro here.
  58.213 +	 *
  58.214 +	 * RED-PEN: vmalloc is not supported right now.
  58.215 +	 */
  58.216 +	if (!pfn_valid(vma->vm_pgoff))
  58.217 +		return -EIO;
  58.218 +	val = (u64)vma->vm_pgoff << PAGE_SHIFT;
  58.219 +	vma->vm_pgoff = __pa(val) >> PAGE_SHIFT;
  58.220 +	return mmap_mem(file, vma);
  58.221 +}
  58.222 +#endif
  58.223 +
  58.224  extern long vread(char *buf, char *addr, unsigned long count);
  58.225  extern long vwrite(char *buf, char *addr, unsigned long count);
  58.226  
  58.227 @@ -222,33 +287,55 @@ static ssize_t read_kmem(struct file *fi
  58.228  			 size_t count, loff_t *ppos)
  58.229  {
  58.230  	unsigned long p = *ppos;
  58.231 -	ssize_t read = 0;
  58.232 -	ssize_t virtr = 0;
  58.233 +	ssize_t low_count, read, sz;
  58.234  	char * kbuf; /* k-addr because vread() takes vmlist_lock rwlock */
  58.235 -		
  58.236 +
  58.237 +	read = 0;
  58.238  	if (p < (unsigned long) high_memory) {
  58.239 -		read = count;
  58.240 +		low_count = count;
  58.241  		if (count > (unsigned long) high_memory - p)
  58.242 -			read = (unsigned long) high_memory - p;
  58.243 +			low_count = (unsigned long) high_memory - p;
  58.244  
  58.245 -#if defined(__sparc__) || (defined(__mc68000__) && defined(CONFIG_MMU))
  58.246 +#ifdef __ARCH_HAS_NO_PAGE_ZERO_MAPPED
  58.247  		/* we don't have page 0 mapped on sparc and m68k.. */
  58.248 -		if (p < PAGE_SIZE && read > 0) {
  58.249 +		if (p < PAGE_SIZE && low_count > 0) {
  58.250  			size_t tmp = PAGE_SIZE - p;
  58.251 -			if (tmp > read) tmp = read;
  58.252 +			if (tmp > low_count) tmp = low_count;
  58.253  			if (clear_user(buf, tmp))
  58.254  				return -EFAULT;
  58.255  			buf += tmp;
  58.256  			p += tmp;
  58.257 -			read -= tmp;
  58.258 +			read += tmp;
  58.259 +			low_count -= tmp;
  58.260  			count -= tmp;
  58.261  		}
  58.262  #endif
  58.263 -		if (copy_to_user(buf, (char *)p, read))
  58.264 -			return -EFAULT;
  58.265 -		p += read;
  58.266 -		buf += read;
  58.267 -		count -= read;
  58.268 +		while (low_count > 0) {
  58.269 +			/*
  58.270 +			 * Handle first page in case it's not aligned
  58.271 +			 */
  58.272 +			if (-p & (PAGE_SIZE - 1))
  58.273 +				sz = -p & (PAGE_SIZE - 1);
  58.274 +			else
  58.275 +				sz = PAGE_SIZE;
  58.276 +
  58.277 +			sz = min_t(unsigned long, sz, low_count);
  58.278 +
  58.279 +			/*
  58.280 +			 * On ia64 if a page has been mapped somewhere as
  58.281 +			 * uncached, then it must also be accessed uncached
  58.282 +			 * by the kernel or data corruption may occur
  58.283 +			 */
  58.284 +			kbuf = xlate_dev_kmem_ptr((char *)p);
  58.285 +
  58.286 +			if (copy_to_user(buf, kbuf, sz))
  58.287 +				return -EFAULT;
  58.288 +			buf += sz;
  58.289 +			p += sz;
  58.290 +			read += sz;
  58.291 +			low_count -= sz;
  58.292 +			count -= sz;
  58.293 +		}
  58.294  	}
  58.295  
  58.296  	if (count > 0) {
  58.297 @@ -269,15 +356,79 @@ static ssize_t read_kmem(struct file *fi
  58.298  			}
  58.299  			count -= len;
  58.300  			buf += len;
  58.301 -			virtr += len;
  58.302 +			read += len;
  58.303  			p += len;
  58.304  		}
  58.305  		free_page((unsigned long)kbuf);
  58.306  	}
  58.307   	*ppos = p;
  58.308 - 	return virtr + read;
  58.309 + 	return read;
  58.310  }
  58.311  
  58.312 +
  58.313 +static inline ssize_t
  58.314 +do_write_kmem(void *p, unsigned long realp, const char __user * buf,
  58.315 +	      size_t count, loff_t *ppos)
  58.316 +{
  58.317 +	ssize_t written, sz;
  58.318 +	unsigned long copied;
  58.319 +
  58.320 +	written = 0;
  58.321 +#ifdef __ARCH_HAS_NO_PAGE_ZERO_MAPPED
  58.322 +	/* we don't have page 0 mapped on sparc and m68k.. */
  58.323 +	if (realp < PAGE_SIZE) {
  58.324 +		unsigned long sz = PAGE_SIZE - realp;
  58.325 +		if (sz > count)
  58.326 +			sz = count;
  58.327 +		/* Hmm. Do something? */
  58.328 +		buf += sz;
  58.329 +		p += sz;
  58.330 +		realp += sz;
  58.331 +		count -= sz;
  58.332 +		written += sz;
  58.333 +	}
  58.334 +#endif
  58.335 +
  58.336 +	while (count > 0) {
  58.337 +		char *ptr;
  58.338 +		/*
  58.339 +		 * Handle first page in case it's not aligned
  58.340 +		 */
  58.341 +		if (-realp & (PAGE_SIZE - 1))
  58.342 +			sz = -realp & (PAGE_SIZE - 1);
  58.343 +		else
  58.344 +			sz = PAGE_SIZE;
  58.345 +
  58.346 +		sz = min_t(unsigned long, sz, count);
  58.347 +
  58.348 +		/*
  58.349 +		 * On ia64 if a page has been mapped somewhere as
  58.350 +		 * uncached, then it must also be accessed uncached
  58.351 +		 * by the kernel or data corruption may occur
  58.352 +		 */
  58.353 +		ptr = xlate_dev_kmem_ptr(p);
  58.354 +
  58.355 +		copied = copy_from_user(ptr, buf, sz);
  58.356 +		if (copied) {
  58.357 +			ssize_t ret;
  58.358 +
  58.359 +			ret = written + (sz - copied);
  58.360 +			if (ret)
  58.361 +				return ret;
  58.362 +			return -EFAULT;
  58.363 +		}
  58.364 +		buf += sz;
  58.365 +		p += sz;
  58.366 +		realp += sz;
  58.367 +		count -= sz;
  58.368 +		written += sz;
  58.369 +	}
  58.370 +
  58.371 +	*ppos += written;
  58.372 +	return written;
  58.373 +}
  58.374 +
  58.375 +
  58.376  /*
  58.377   * This function writes to the *virtual* memory as seen by the kernel.
  58.378   */
  58.379 @@ -296,7 +447,7 @@ static ssize_t write_kmem(struct file * 
  58.380  		if (count > (unsigned long) high_memory - p)
  58.381  			wrote = (unsigned long) high_memory - p;
  58.382  
  58.383 -		written = do_write_mem((void*)p, p, buf, wrote, ppos);
  58.384 +		written = do_write_kmem((void*)p, p, buf, wrote, ppos);
  58.385  		if (written != wrote)
  58.386  			return written;
  58.387  		wrote = written;
  58.388 @@ -344,7 +495,7 @@ static ssize_t read_port(struct file * f
  58.389  	unsigned long i = *ppos;
  58.390  	char __user *tmp = buf;
  58.391  
  58.392 -	if (verify_area(VERIFY_WRITE,buf,count))
  58.393 +	if (!access_ok(VERIFY_WRITE, buf, count))
  58.394  		return -EFAULT; 
  58.395  	while (count-- > 0 && i < 65536) {
  58.396  		if (__put_user(inb(i),tmp) < 0) 
  58.397 @@ -362,7 +513,7 @@ static ssize_t write_port(struct file * 
  58.398  	unsigned long i = *ppos;
  58.399  	const char __user * tmp = buf;
  58.400  
  58.401 -	if (verify_area(VERIFY_READ,buf,count))
  58.402 +	if (!access_ok(VERIFY_READ,buf,count))
  58.403  		return -EFAULT;
  58.404  	while (count-- > 0 && i < 65536) {
  58.405  		char c;
  58.406 @@ -568,7 +719,6 @@ static int open_port(struct inode * inod
  58.407  	return capable(CAP_SYS_RAWIO) ? 0 : -EPERM;
  58.408  }
  58.409  
  58.410 -#define mmap_mem	mmap_kmem
  58.411  #define zero_lseek	null_lseek
  58.412  #define full_lseek      null_lseek
  58.413  #define write_zero	write_null
  58.414 @@ -581,7 +731,7 @@ static struct file_operations mem_fops =
  58.415  	.llseek		= memory_lseek,
  58.416  	.read		= read_mem,
  58.417  	.write		= write_mem,
  58.418 -	.mmap		= mmap_mem,
  58.419 +	.mmap		= mmap_kmem,
  58.420  	.open		= open_mem,
  58.421  };
  58.422  #else
  58.423 @@ -618,6 +768,10 @@ static struct file_operations zero_fops 
  58.424  	.mmap		= mmap_zero,
  58.425  };
  58.426  
  58.427 +static struct backing_dev_info zero_bdi = {
  58.428 +	.capabilities	= BDI_CAP_MAP_COPY,
  58.429 +};
  58.430 +
  58.431  static struct file_operations full_fops = {
  58.432  	.llseek		= full_lseek,
  58.433  	.read		= read_full,
  58.434 @@ -664,6 +818,7 @@ static int memory_open(struct inode * in
  58.435  			break;
  58.436  #endif
  58.437  		case 5:
  58.438 +			filp->f_mapping->backing_dev_info = &zero_bdi;
  58.439  			filp->f_op = &zero_fops;
  58.440  			break;
  58.441  		case 7:
    59.1 --- a/linux-2.6-xen-sparse/drivers/char/tty_io.c	Mon Jul 11 09:29:56 2005 -0500
    59.2 +++ b/linux-2.6-xen-sparse/drivers/char/tty_io.c	Mon Jul 11 09:35:19 2005 -0500
    59.3 @@ -187,7 +187,7 @@ char *tty_name(struct tty_struct *tty, c
    59.4  
    59.5  EXPORT_SYMBOL(tty_name);
    59.6  
    59.7 -inline int tty_paranoia_check(struct tty_struct *tty, struct inode *inode,
    59.8 +int tty_paranoia_check(struct tty_struct *tty, struct inode *inode,
    59.9  			      const char *routine)
   59.10  {
   59.11  #ifdef TTY_PARANOIA_CHECK
   59.12 @@ -1791,7 +1791,6 @@ retry_open:
   59.13  	}
   59.14  #ifdef CONFIG_VT
   59.15  	if (console_use_vt && (device == MKDEV(TTY_MAJOR,0))) {
   59.16 -		extern int fg_console;
   59.17  		extern struct tty_driver *console_driver;
   59.18  		driver = console_driver;
   59.19  		index = fg_console;
   59.20 @@ -2018,11 +2017,10 @@ static int tiocswinsz(struct tty_struct 
   59.21  		return 0;
   59.22  #ifdef CONFIG_VT
   59.23  	if (tty->driver->type == TTY_DRIVER_TYPE_CONSOLE) {
   59.24 -		unsigned int currcons = tty->index;
   59.25  		int rc;
   59.26  
   59.27  		acquire_console_sem();
   59.28 -		rc = vc_resize(currcons, tmp_ws.ws_col, tmp_ws.ws_row);
   59.29 +		rc = vc_resize(tty->driver_data, tmp_ws.ws_col, tmp_ws.ws_row);
   59.30  		release_console_sem();
   59.31  		if (rc)
   59.32  			return -ENXIO;
   59.33 @@ -2634,6 +2632,7 @@ static void initialize_tty_struct(struct
   59.34  	tty->magic = TTY_MAGIC;
   59.35  	tty_ldisc_assign(tty, tty_ldisc_get(N_TTY));
   59.36  	tty->pgrp = -1;
   59.37 +	tty->overrun_time = jiffies;
   59.38  	tty->flip.char_buf_ptr = tty->flip.char_buf;
   59.39  	tty->flip.flag_buf_ptr = tty->flip.flag_buf;
   59.40  	INIT_WORK(&tty->flip.work, flush_to_ldisc, tty);
    60.1 --- a/linux-2.6-xen-sparse/include/asm-generic/pgtable.h	Mon Jul 11 09:29:56 2005 -0500
    60.2 +++ b/linux-2.6-xen-sparse/include/asm-generic/pgtable.h	Mon Jul 11 09:35:19 2005 -0500
    60.3 @@ -16,7 +16,7 @@
    60.4  #ifndef __HAVE_ARCH_SET_PTE_ATOMIC
    60.5  #define ptep_establish(__vma, __address, __ptep, __entry)		\
    60.6  do {				  					\
    60.7 -	set_pte(__ptep, __entry);					\
    60.8 +	set_pte_at((__vma)->vm_mm, (__address), __ptep, __entry);	\
    60.9  	flush_tlb_page(__vma, __address);				\
   60.10  } while (0)
   60.11  #else /* __HAVE_ARCH_SET_PTE_ATOMIC */
   60.12 @@ -37,7 +37,7 @@ do {				  					\
   60.13   */
   60.14  #define ptep_set_access_flags(__vma, __address, __ptep, __entry, __dirty) \
   60.15  do {				  					  \
   60.16 -	set_pte(__ptep, __entry);					  \
   60.17 +	set_pte_at((__vma)>vm_mm, (__address), __ptep, __entry);	  \
   60.18  	flush_tlb_page(__vma, __address);				  \
   60.19  } while (0)
   60.20  #endif
   60.21 @@ -53,20 +53,24 @@ do {									\
   60.22  #endif
   60.23  
   60.24  #ifndef __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
   60.25 -static inline int ptep_test_and_clear_young(pte_t *ptep)
   60.26 -{
   60.27 -	pte_t pte = *ptep;
   60.28 -	if (!pte_young(pte))
   60.29 -		return 0;
   60.30 -	set_pte(ptep, pte_mkold(pte));
   60.31 -	return 1;
   60.32 -}
   60.33 +#define ptep_test_and_clear_young(__vma, __address, __ptep)		\
   60.34 +({									\
   60.35 +	pte_t __pte = *(__ptep);					\
   60.36 +	int r = 1;							\
   60.37 +	if (!pte_young(__pte))						\
   60.38 +		r = 0;							\
   60.39 +	else								\
   60.40 +		set_pte_at((__vma)->vm_mm, (__address),			\
   60.41 +			   (__ptep), pte_mkold(__pte));			\
   60.42 +	r;								\
   60.43 +})
   60.44  #endif
   60.45  
   60.46  #ifndef __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH
   60.47  #define ptep_clear_flush_young(__vma, __address, __ptep)		\
   60.48  ({									\
   60.49 -	int __young = ptep_test_and_clear_young(__ptep);		\
   60.50 +	int __young;							\
   60.51 +	__young = ptep_test_and_clear_young(__vma, __address, __ptep);	\
   60.52  	if (__young)							\
   60.53  		flush_tlb_page(__vma, __address);			\
   60.54  	__young;							\
   60.55 @@ -74,20 +78,24 @@ static inline int ptep_test_and_clear_yo
   60.56  #endif
   60.57  
   60.58  #ifndef __HAVE_ARCH_PTEP_TEST_AND_CLEAR_DIRTY
   60.59 -static inline int ptep_test_and_clear_dirty(pte_t *ptep)
   60.60 -{
   60.61 -	pte_t pte = *ptep;
   60.62 -	if (!pte_dirty(pte))
   60.63 -		return 0;
   60.64 -	set_pte(ptep, pte_mkclean(pte));
   60.65 -	return 1;
   60.66 -}
   60.67 +#define ptep_test_and_clear_dirty(__vma, __address, __ptep)		\
   60.68 +({									\
   60.69 +	pte_t __pte = *__ptep;						\
   60.70 +	int r = 1;							\
   60.71 +	if (!pte_dirty(__pte))						\
   60.72 +		r = 0;							\
   60.73 +	else								\
   60.74 +		set_pte_at((__vma)->vm_mm, (__address), (__ptep),	\
   60.75 +			   pte_mkclean(__pte));				\
   60.76 +	r;								\
   60.77 +})
   60.78  #endif
   60.79  
   60.80  #ifndef __HAVE_ARCH_PTEP_CLEAR_DIRTY_FLUSH
   60.81  #define ptep_clear_flush_dirty(__vma, __address, __ptep)		\
   60.82  ({									\
   60.83 -	int __dirty = ptep_test_and_clear_dirty(__ptep);		\
   60.84 +	int __dirty;							\
   60.85 +	__dirty = ptep_test_and_clear_dirty(__vma, __address, __ptep);	\
   60.86  	if (__dirty)							\
   60.87  		flush_tlb_page(__vma, __address);			\
   60.88  	__dirty;							\
   60.89 @@ -95,36 +103,29 @@ static inline int ptep_test_and_clear_di
   60.90  #endif
   60.91  
   60.92  #ifndef __HAVE_ARCH_PTEP_GET_AND_CLEAR
   60.93 -static inline pte_t ptep_get_and_clear(pte_t *ptep)
   60.94 -{
   60.95 -	pte_t pte = *ptep;
   60.96 -	pte_clear(ptep);
   60.97 -	return pte;
   60.98 -}
   60.99 +#define ptep_get_and_clear(__mm, __address, __ptep)			\
  60.100 +({									\
  60.101 +	pte_t __pte = *(__ptep);					\
  60.102 +	pte_clear((__mm), (__address), (__ptep));			\
  60.103 +	__pte;								\
  60.104 +})
  60.105  #endif
  60.106  
  60.107  #ifndef __HAVE_ARCH_PTEP_CLEAR_FLUSH
  60.108  #define ptep_clear_flush(__vma, __address, __ptep)			\
  60.109  ({									\
  60.110 -	pte_t __pte = ptep_get_and_clear(__ptep);			\
  60.111 +	pte_t __pte;							\
  60.112 +	__pte = ptep_get_and_clear((__vma)->vm_mm, __address, __ptep);	\
  60.113  	flush_tlb_page(__vma, __address);				\
  60.114  	__pte;								\
  60.115  })
  60.116  #endif
  60.117  
  60.118  #ifndef __HAVE_ARCH_PTEP_SET_WRPROTECT
  60.119 -static inline void ptep_set_wrprotect(pte_t *ptep)
  60.120 +static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long address, pte_t *ptep)
  60.121  {
  60.122  	pte_t old_pte = *ptep;
  60.123 -	set_pte(ptep, pte_wrprotect(old_pte));
  60.124 -}
  60.125 -#endif
  60.126 -
  60.127 -#ifndef __HAVE_ARCH_PTEP_MKDIRTY
  60.128 -static inline void ptep_mkdirty(pte_t *ptep)
  60.129 -{
  60.130 -	pte_t old_pte = *ptep;
  60.131 -	set_pte(ptep, pte_mkdirty(old_pte));
  60.132 +	set_pte_at(mm, address, ptep, pte_wrprotect(old_pte));
  60.133  }
  60.134  #endif
  60.135  
  60.136 @@ -144,4 +145,77 @@ static inline void ptep_mkdirty(pte_t *p
  60.137  #define pgd_offset_gate(mm, addr)	pgd_offset(mm, addr)
  60.138  #endif
  60.139  
  60.140 +#ifndef __HAVE_ARCH_LAZY_MMU_PROT_UPDATE
  60.141 +#define lazy_mmu_prot_update(pte)	do { } while (0)
  60.142 +#endif
  60.143 +
  60.144 +/*
  60.145 + * When walking page tables, get the address of the next boundary,
  60.146 + * or the end address of the range if that comes earlier.  Although no
  60.147 + * vma end wraps to 0, rounded up __boundary may wrap to 0 throughout.
  60.148 + */
  60.149 +
  60.150 +#define pgd_addr_end(addr, end)						\
  60.151 +({	unsigned long __boundary = ((addr) + PGDIR_SIZE) & PGDIR_MASK;	\
  60.152 +	(__boundary - 1 < (end) - 1)? __boundary: (end);		\
  60.153 +})
  60.154 +
  60.155 +#ifndef pud_addr_end
  60.156 +#define pud_addr_end(addr, end)						\
  60.157 +({	unsigned long __boundary = ((addr) + PUD_SIZE) & PUD_MASK;	\
  60.158 +	(__boundary - 1 < (end) - 1)? __boundary: (end);		\
  60.159 +})
  60.160 +#endif
  60.161 +
  60.162 +#ifndef pmd_addr_end
  60.163 +#define pmd_addr_end(addr, end)						\
  60.164 +({	unsigned long __boundary = ((addr) + PMD_SIZE) & PMD_MASK;	\
  60.165 +	(__boundary - 1 < (end) - 1)? __boundary: (end);		\
  60.166 +})
  60.167 +#endif
  60.168 +
  60.169 +#ifndef __ASSEMBLY__
  60.170 +/*
  60.171 + * When walking page tables, we usually want to skip any p?d_none entries;
  60.172 + * and any p?d_bad entries - reporting the error before resetting to none.
  60.173 + * Do the tests inline, but report and clear the bad entry in mm/memory.c.
  60.174 + */
  60.175 +void pgd_clear_bad(pgd_t *);
  60.176 +void pud_clear_bad(pud_t *);
  60.177 +void pmd_clear_bad(pmd_t *);
  60.178 +
  60.179 +static inline int pgd_none_or_clear_bad(pgd_t *pgd)
  60.180 +{
  60.181 +	if (pgd_none(*pgd))
  60.182 +		return 1;
  60.183 +	if (unlikely(pgd_bad(*pgd))) {
  60.184 +		pgd_clear_bad(pgd);
  60.185 +		return 1;
  60.186 +	}
  60.187 +	return 0;
  60.188 +}
  60.189 +
  60.190 +static inline int pud_none_or_clear_bad(pud_t *pud)
  60.191 +{
  60.192 +	if (pud_none(*pud))
  60.193 +		return 1;
  60.194 +	if (unlikely(pud_bad(*pud))) {
  60.195 +		pud_clear_bad(pud);
  60.196 +		return 1;
  60.197 +	}
  60.198 +	return 0;
  60.199 +}
  60.200 +
  60.201 +static inline int pmd_none_or_clear_bad(pmd_t *pmd)
  60.202 +{
  60.203 +	if (pmd_none(*pmd))
  60.204 +		return 1;
  60.205 +	if (unlikely(pmd_bad(*pmd))) {
  60.206 +		pmd_clear_bad(pmd);
  60.207 +		return 1;
  60.208 +	}
  60.209 +	return 0;
  60.210 +}
  60.211 +#endif /* !__ASSEMBLY__ */
  60.212 +
  60.213  #endif /* _ASM_GENERIC_PGTABLE_H */
    61.1 --- a/linux-2.6-xen-sparse/include/asm-xen/asm-i386/desc.h	Mon Jul 11 09:29:56 2005 -0500
    61.2 +++ b/linux-2.6-xen-sparse/include/asm-xen/asm-i386/desc.h	Mon Jul 11 09:35:19 2005 -0500
    61.3 @@ -4,6 +4,8 @@
    61.4  #include <asm/ldt.h>
    61.5  #include <asm/segment.h>
    61.6  
    61.7 +#define CPU_16BIT_STACK_SIZE 1024
    61.8 +
    61.9  #ifndef __ASSEMBLY__
   61.10  
   61.11  #include <linux/preempt.h>
   61.12 @@ -13,6 +15,8 @@
   61.13  
   61.14  extern struct desc_struct cpu_gdt_table[NR_CPUS][GDT_ENTRIES];
   61.15  
   61.16 +DECLARE_PER_CPU(unsigned char, cpu_16bit_stack[CPU_16BIT_STACK_SIZE]);
   61.17 +
   61.18  struct Xgt_desc_struct {
   61.19  	unsigned short size;
   61.20  	unsigned long address __attribute__((packed));
    62.1 --- a/linux-2.6-xen-sparse/include/asm-xen/asm-i386/dma-mapping.h	Mon Jul 11 09:29:56 2005 -0500
    62.2 +++ b/linux-2.6-xen-sparse/include/asm-xen/asm-i386/dma-mapping.h	Mon Jul 11 09:35:19 2005 -0500
    62.3 @@ -11,7 +11,7 @@
    62.4  #define dma_free_noncoherent(d, s, v, h) dma_free_coherent(d, s, v, h)
    62.5  
    62.6  void *dma_alloc_coherent(struct device *dev, size_t size,
    62.7 -			   dma_addr_t *dma_handle, int flag);
    62.8 +			   dma_addr_t *dma_handle, unsigned int __nocast flag);
    62.9  
   62.10  void dma_free_coherent(struct device *dev, size_t size,
   62.11  			 void *vaddr, dma_addr_t dma_handle);
    63.1 --- a/linux-2.6-xen-sparse/include/asm-xen/asm-i386/highmem.h	Mon Jul 11 09:29:56 2005 -0500
    63.2 +++ b/linux-2.6-xen-sparse/include/asm-xen/asm-i386/highmem.h	Mon Jul 11 09:35:19 2005 -0500
    63.3 @@ -33,8 +33,6 @@ extern pte_t *kmap_pte;
    63.4  extern pgprot_t kmap_prot;
    63.5  extern pte_t *pkmap_page_table;
    63.6  
    63.7 -extern void kmap_init(void);
    63.8 -
    63.9  /*
   63.10   * Right now we initialize only a single pte table. It can be extended
   63.11   * easily, subsequent pte tables have to be allocated in one physical
    64.1 --- a/linux-2.6-xen-sparse/include/asm-xen/asm-i386/io.h	Mon Jul 11 09:29:56 2005 -0500
    64.2 +++ b/linux-2.6-xen-sparse/include/asm-xen/asm-i386/io.h	Mon Jul 11 09:35:19 2005 -0500
    64.3 @@ -50,6 +50,17 @@
    64.4  #include <linux/vmalloc.h>
    64.5  #include <asm/fixmap.h>
    64.6  
    64.7 +/*
    64.8 + * Convert a physical pointer to a virtual kernel pointer for /dev/mem
    64.9 + * access
   64.10 + */
   64.11 +#define xlate_dev_mem_ptr(p)	__va(p)
   64.12 +
   64.13 +/*
   64.14 + * Convert a virtual cached pointer to an uncached pointer
   64.15 + */
   64.16 +#define xlate_dev_kmem_ptr(p)	p
   64.17 +
   64.18  /**
   64.19   *	virt_to_phys	-	map virtual addresses to physical
   64.20   *	@address: address to remap
    65.1 --- a/linux-2.6-xen-sparse/include/asm-xen/asm-i386/mmu_context.h	Mon Jul 11 09:29:56 2005 -0500
    65.2 +++ b/linux-2.6-xen-sparse/include/asm-xen/asm-i386/mmu_context.h	Mon Jul 11 09:35:19 2005 -0500
    65.3 @@ -34,10 +34,10 @@ static inline void __prepare_arch_switch
    65.4  	 * are always kernel segments while inside the kernel. Must
    65.5  	 * happen before reload of cr3/ldt (i.e., not in __switch_to).
    65.6  	 */
    65.7 -	__asm__ __volatile__ ( "movl %%fs,%0 ; movl %%gs,%1"
    65.8 +	__asm__ __volatile__ ( "mov %%fs,%0 ; mov %%gs,%1"
    65.9  		: "=m" (*(int *)&current->thread.fs),
   65.10  		  "=m" (*(int *)&current->thread.gs));
   65.11 -	__asm__ __volatile__ ( "movl %0,%%fs ; movl %0,%%gs"
   65.12 +	__asm__ __volatile__ ( "mov %0,%%fs ; mov %0,%%gs"
   65.13  		: : "r" (0) );
   65.14  }
   65.15  
   65.16 @@ -100,7 +100,7 @@ static inline void switch_mm(struct mm_s
   65.17  }
   65.18  
   65.19  #define deactivate_mm(tsk, mm) \
   65.20 -	asm("movl %0,%%fs ; movl %0,%%gs": :"r" (0))
   65.21 +	asm("mov %0,%%fs ; mov %0,%%gs": :"r" (0))
   65.22  
   65.23  #define activate_mm(prev, next) \
   65.24  	switch_mm((prev),(next),NULL)
    66.1 --- a/linux-2.6-xen-sparse/include/asm-xen/asm-i386/pgalloc.h	Mon Jul 11 09:29:56 2005 -0500
    66.2 +++ b/linux-2.6-xen-sparse/include/asm-xen/asm-i386/pgalloc.h	Mon Jul 11 09:35:19 2005 -0500
    66.3 @@ -2,7 +2,6 @@
    66.4  #define _I386_PGALLOC_H
    66.5  
    66.6  #include <linux/config.h>
    66.7 -#include <asm/processor.h>
    66.8  #include <asm/fixmap.h>
    66.9  #include <linux/threads.h>
   66.10  #include <linux/mm.h>		/* for struct page */
    67.1 --- a/linux-2.6-xen-sparse/include/asm-xen/asm-i386/pgtable-2level.h	Mon Jul 11 09:29:56 2005 -0500
    67.2 +++ b/linux-2.6-xen-sparse/include/asm-xen/asm-i386/pgtable-2level.h	Mon Jul 11 09:35:19 2005 -0500
    67.3 @@ -14,6 +14,7 @@
    67.4   * hook is made available.
    67.5   */
    67.6  #define set_pte(pteptr, pteval) (*(pteptr) = pteval)
    67.7 +#define set_pte_at(mm,addr,ptep,pteval) set_pte(ptep,pteval)
    67.8  #define set_pte_atomic(pteptr, pteval) set_pte(pteptr,pteval)
    67.9  
   67.10  #ifndef CONFIG_XEN_SHADOW_MODE
   67.11 @@ -22,7 +23,7 @@
   67.12  #define set_pmd(pmdptr, pmdval) (*(pmdptr) = (pmdval))
   67.13  #endif
   67.14  
   67.15 -#define ptep_get_and_clear(xp)	__pte_ma(xchg(&(xp)->pte_low, 0))
   67.16 +#define ptep_get_and_clear(mm,addr,xp)	__pte_ma(xchg(&(xp)->pte_low, 0))
   67.17  #define pte_same(a, b)		((a).pte_low == (b).pte_low)
   67.18  /*
   67.19   * We detect special mappings in one of two ways:
    68.1 --- a/linux-2.6-xen-sparse/include/asm-xen/asm-i386/pgtable.h	Mon Jul 11 09:29:56 2005 -0500
    68.2 +++ b/linux-2.6-xen-sparse/include/asm-xen/asm-i386/pgtable.h	Mon Jul 11 09:35:19 2005 -0500
    68.3 @@ -61,7 +61,7 @@ void paging_init(void);
    68.4  #define PGDIR_MASK	(~(PGDIR_SIZE-1))
    68.5  
    68.6  #define USER_PTRS_PER_PGD	(TASK_SIZE/PGDIR_SIZE)
    68.7 -#define FIRST_USER_PGD_NR	0
    68.8 +#define FIRST_USER_ADDRESS	0
    68.9  
   68.10  #define USER_PGD_PTRS (PAGE_OFFSET >> PGDIR_SHIFT)
   68.11  #define KERNEL_PGD_PTRS (PTRS_PER_PGD-USER_PGD_PTRS)
   68.12 @@ -194,15 +194,15 @@ extern unsigned long long __PAGE_KERNEL,
   68.13  /*
   68.14   * Define this if things work differently on an i386 and an i486:
   68.15   * it will (on an i486) warn about kernel memory accesses that are
   68.16 - * done without a 'verify_area(VERIFY_WRITE,..)'
   68.17 + * done without a 'access_ok(VERIFY_WRITE,..)'
   68.18   */
   68.19 -#undef TEST_VERIFY_AREA
   68.20 +#undef TEST_ACCESS_OK
   68.21  
   68.22  /* The boot page tables (all created as a single array) */
   68.23  extern unsigned long pg0[];
   68.24  
   68.25  #define pte_present(x)	((x).pte_low & (_PAGE_PRESENT | _PAGE_PROTNONE))
   68.26 -#define pte_clear(xp)	do { set_pte(xp, __pte(0)); } while (0)
   68.27 +#define pte_clear(mm,addr,xp)	do { set_pte_at(mm, addr, xp, __pte(0)); } while (0)
   68.28  
   68.29  #define pmd_none(x)	(!pmd_val(x))
   68.30  /* pmd_present doesn't just test the _PAGE_PRESENT bit since wr.p.t.
   68.31 @@ -246,32 +246,26 @@ static inline pte_t pte_mkwrite(pte_t pt
   68.32  # include <asm/pgtable-2level.h>
   68.33  #endif
   68.34  
   68.35 -static inline int ptep_test_and_clear_dirty(pte_t *ptep)
   68.36 +static inline int ptep_test_and_clear_dirty(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep)
   68.37  {
   68.38  	if (!pte_dirty(*ptep))
   68.39  		return 0;
   68.40  	return test_and_clear_bit(_PAGE_BIT_DIRTY, &ptep->pte_low);
   68.41  }
   68.42  
   68.43 -static inline int ptep_test_and_clear_young(pte_t *ptep)
   68.44 +static inline int ptep_test_and_clear_young(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep)
   68.45  {
   68.46  	if (!pte_young(*ptep))
   68.47  		return 0;
   68.48  	return test_and_clear_bit(_PAGE_BIT_ACCESSED, &ptep->pte_low);
   68.49  }
   68.50  
   68.51 -static inline void ptep_set_wrprotect(pte_t *ptep)
   68.52 +static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
   68.53  {
   68.54  	if (pte_write(*ptep))
   68.55  		clear_bit(_PAGE_BIT_RW, &ptep->pte_low);
   68.56  }
   68.57  
   68.58 -static inline void ptep_mkdirty(pte_t *ptep)
   68.59 -{
   68.60 -	if (!pte_dirty(*ptep))
   68.61 -		set_bit(_PAGE_BIT_DIRTY, &ptep->pte_low);
   68.62 -}
   68.63 -
   68.64  /*
   68.65   * Macro to mark a page protection value as "uncacheable".  On processors which do not support
   68.66   * it, this is a no-op.
   68.67 @@ -483,11 +477,14 @@ direct_remap_area_pages(vma->vm_mm,from,
   68.68  #define io_remap_pfn_range(vma,from,pfn,size,prot) \
   68.69  direct_remap_area_pages(vma->vm_mm,from,pfn<<PAGE_SHIFT,size,prot,DOMID_IO)
   68.70  
   68.71 +#define MK_IOSPACE_PFN(space, pfn)	(pfn)
   68.72 +#define GET_IOSPACE(pfn)		0
   68.73 +#define GET_PFN(pfn)			(pfn)
   68.74 +
   68.75  #define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
   68.76  #define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_DIRTY
   68.77  #define __HAVE_ARCH_PTEP_GET_AND_CLEAR
   68.78  #define __HAVE_ARCH_PTEP_SET_WRPROTECT
   68.79 -#define __HAVE_ARCH_PTEP_MKDIRTY
   68.80  #define __HAVE_ARCH_PTE_SAME
   68.81  #include <asm-generic/pgtable.h>
   68.82  
    69.1 --- a/linux-2.6-xen-sparse/include/asm-xen/asm-i386/processor.h	Mon Jul 11 09:29:56 2005 -0500
    69.2 +++ b/linux-2.6-xen-sparse/include/asm-xen/asm-i386/processor.h	Mon Jul 11 09:35:19 2005 -0500
    69.3 @@ -99,12 +99,12 @@ extern struct cpuinfo_x86 cpu_data[];
    69.4  #endif
    69.5  
    69.6  extern	int phys_proc_id[NR_CPUS];
    69.7 +extern	int cpu_core_id[NR_CPUS];
    69.8  extern char ignore_fpu_irq;
    69.9  
   69.10  extern void identify_cpu(struct cpuinfo_x86 *);
   69.11  extern void print_cpu_info(struct cpuinfo_x86 *);
   69.12  extern unsigned int init_intel_cacheinfo(struct cpuinfo_x86 *c);
   69.13 -extern void dodgy_tsc(void);
   69.14  
   69.15  #ifdef CONFIG_X86_HT
   69.16  extern void detect_ht(struct cpuinfo_x86 *c);
   69.17 @@ -138,7 +138,7 @@ static inline void detect_ht(struct cpui
   69.18   * clear %ecx since some cpus (Cyrix MII) do not set or clear %ecx
   69.19   * resulting in stale register contents being returned.
   69.20   */
   69.21 -static inline void cpuid(int op, int *eax, int *ebx, int *ecx, int *edx)
   69.22 +static inline void cpuid(unsigned int op, unsigned int *eax, unsigned int *ebx, unsigned int *ecx, unsigned int *edx)
   69.23  {
   69.24  	__asm__("cpuid"
   69.25  		: "=a" (*eax),
   69.26 @@ -148,6 +148,18 @@ static inline void cpuid(int op, int *ea
   69.27  		: "0" (op), "c"(0));
   69.28  }
   69.29  
   69.30 +/* Some CPUID calls want 'count' to be placed in ecx */
   69.31 +static inline void cpuid_count(int op, int count, int *eax, int *ebx, int *ecx,
   69.32 +	       	int *edx)
   69.33 +{
   69.34 +	__asm__("cpuid"
   69.35 +		: "=a" (*eax),
   69.36 +		  "=b" (*ebx),
   69.37 +		  "=c" (*ecx),
   69.38 +		  "=d" (*edx)
   69.39 +		: "0" (op), "c" (count));
   69.40 +}
   69.41 +
   69.42  /*
   69.43   * CPUID functions returning a single datum
   69.44   */
   69.45 @@ -501,6 +513,13 @@ static inline void load_esp0(struct tss_
   69.46  	regs->esp = new_esp;					\
   69.47  } while (0)
   69.48  
   69.49 +/*
   69.50 + * This special macro can be used to load a debugging register
   69.51 + */
   69.52 +#define loaddebug(thread,register) \
   69.53 +	HYPERVISOR_set_debugreg((register),     \
   69.54 +			((thread)->debugreg[register]))
   69.55 +
   69.56  /* Forward declaration, a strange C thing */
   69.57  struct task_struct;
   69.58  struct mm_struct;
    70.1 --- a/linux-2.6-xen-sparse/include/asm-xen/asm-i386/segment.h	Mon Jul 11 09:29:56 2005 -0500
    70.2 +++ b/linux-2.6-xen-sparse/include/asm-xen/asm-i386/segment.h	Mon Jul 11 09:35:19 2005 -0500
    70.3 @@ -38,7 +38,7 @@
    70.4   *  24 - APM BIOS support
    70.5   *  25 - APM BIOS support 
    70.6   *
    70.7 - *  26 - unused
    70.8 + *  26 - ESPFIX small SS
    70.9   *  27 - unused
   70.10   *  28 - unused
   70.11   *  29 - unused
   70.12 @@ -71,6 +71,9 @@
   70.13  #define GDT_ENTRY_PNPBIOS_BASE		(GDT_ENTRY_KERNEL_BASE + 6)
   70.14  #define GDT_ENTRY_APMBIOS_BASE		(GDT_ENTRY_KERNEL_BASE + 11)
   70.15  
   70.16 +#define GDT_ENTRY_ESPFIX_SS		(GDT_ENTRY_KERNEL_BASE + 14)
   70.17 +#define __ESPFIX_SS (GDT_ENTRY_ESPFIX_SS * 8)
   70.18 +
   70.19  #define GDT_ENTRY_DOUBLEFAULT_TSS	31
   70.20  
   70.21  /*
    71.1 --- a/linux-2.6-xen-sparse/include/asm-xen/asm-i386/setup.h	Mon Jul 11 09:29:56 2005 -0500
    71.2 +++ b/linux-2.6-xen-sparse/include/asm-xen/asm-i386/setup.h	Mon Jul 11 09:35:19 2005 -0500
    71.3 @@ -16,7 +16,7 @@
    71.4  #define MAXMEM_PFN	PFN_DOWN(MAXMEM)
    71.5  #define MAX_NONPAE_PFN	(1 << 20)
    71.6  
    71.7 -#define PARAM_SIZE 2048
    71.8 +#define PARAM_SIZE 4096
    71.9  #define COMMAND_LINE_SIZE 256
   71.10  
   71.11  #define OLD_CL_MAGIC_ADDR	0x90020
    72.1 --- a/linux-2.6-xen-sparse/include/asm-xen/asm-i386/system.h	Mon Jul 11 09:29:56 2005 -0500
    72.2 +++ b/linux-2.6-xen-sparse/include/asm-xen/asm-i386/system.h	Mon Jul 11 09:35:19 2005 -0500
    72.3 @@ -84,7 +84,7 @@ static inline unsigned long _get_base(ch
    72.4  #define loadsegment(seg,value)			\
    72.5  	asm volatile("\n"			\
    72.6  		"1:\t"				\
    72.7 -		"movl %0,%%" #seg "\n"		\
    72.8 +		"mov %0,%%" #seg "\n"		\
    72.9  		"2:\n"				\
   72.10  		".section .fixup,\"ax\"\n"	\
   72.11  		"3:\t"				\
   72.12 @@ -96,13 +96,13 @@ static inline unsigned long _get_base(ch
   72.13  		".align 4\n\t"			\
   72.14  		".long 1b,3b\n"			\
   72.15  		".previous"			\
   72.16 -		: :"m" (*(unsigned int *)&(value)))
   72.17 +		: :"m" (value))
   72.18  
   72.19  /*
   72.20   * Save a segment register away
   72.21   */
   72.22  #define savesegment(seg, value) \
   72.23 -	asm volatile("movl %%" #seg ",%0":"=m" (*(int *)&(value)))
   72.24 +	asm volatile("mov %%" #seg ",%0":"=m" (value))
   72.25  
   72.26  /*
   72.27   * Clear and set 'TS' bit respectively
   72.28 @@ -574,4 +574,6 @@ void enable_hlt(void);
   72.29  extern int es7000_plat;
   72.30  void cpu_idle_wait(void);
   72.31  
   72.32 +extern unsigned long arch_align_stack(unsigned long sp);
   72.33 +
   72.34  #endif
    73.1 --- a/linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/bootsetup.h	Mon Jul 11 09:29:56 2005 -0500
    73.2 +++ b/linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/bootsetup.h	Mon Jul 11 09:35:19 2005 -0500
    73.3 @@ -2,7 +2,8 @@
    73.4  #ifndef _X86_64_BOOTSETUP_H
    73.5  #define _X86_64_BOOTSETUP_H 1
    73.6  
    73.7 -extern char x86_boot_params[2048];
    73.8 +#define BOOT_PARAM_SIZE		4096
    73.9 +extern char x86_boot_params[BOOT_PARAM_SIZE];
   73.10  
   73.11  /*
   73.12   * This is set up by the setup-routine at boot-time
    74.1 --- a/linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/io.h	Mon Jul 11 09:29:56 2005 -0500
    74.2 +++ b/linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/io.h	Mon Jul 11 09:35:19 2005 -0500
    74.3 @@ -36,11 +36,7 @@
    74.4    *  - Arnaldo Carvalho de Melo <acme@conectiva.com.br>
    74.5    */
    74.6  
    74.7 -#ifdef SLOW_IO_BY_JUMPING
    74.8 -#define __SLOW_DOWN_IO "\njmp 1f\n1:\tjmp 1f\n1:"
    74.9 -#else
   74.10  #define __SLOW_DOWN_IO "\noutb %%al,$0x80"
   74.11 -#endif
   74.12  
   74.13  #ifdef REALLY_SLOW_IO
   74.14  #define __FULL_SLOW_DOWN_IO __SLOW_DOWN_IO __SLOW_DOWN_IO __SLOW_DOWN_IO __SLOW_DOWN_IO
   74.15 @@ -360,6 +356,17 @@ out:
   74.16  extern int iommu_bio_merge;
   74.17  #define BIO_VMERGE_BOUNDARY iommu_bio_merge
   74.18  
   74.19 +/*
   74.20 + * Convert a physical pointer to a virtual kernel pointer for /dev/mem
   74.21 + * access
   74.22 + */
   74.23 +#define xlate_dev_mem_ptr(p)	__va(p)
   74.24 +
   74.25 +/*
   74.26 + * Convert a virtual cached pointer to an uncached pointer
   74.27 + */
   74.28 +#define xlate_dev_kmem_ptr(p)	p
   74.29 +
   74.30  #endif /* __KERNEL__ */
   74.31  
   74.32  #endif
    75.1 --- a/linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/page.h	Mon Jul 11 09:29:56 2005 -0500
    75.2 +++ b/linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/page.h	Mon Jul 11 09:35:19 2005 -0500
    75.3 @@ -145,10 +145,6 @@ static inline pgd_t __pgd(unsigned long 
    75.4  
    75.5  #define __pgprot(x)	((pgprot_t) { (x) } )
    75.6  
    75.7 -extern unsigned long vm_stack_flags, vm_stack_flags32;
    75.8 -extern unsigned long vm_data_default_flags, vm_data_default_flags32;
    75.9 -extern unsigned long vm_force_exec32;
   75.10 -
   75.11  #define __START_KERNEL		0xffffffff80100000UL
   75.12  #define __START_KERNEL_map	0xffffffff80000000UL
   75.13  #define __PAGE_OFFSET           0xffff880000000000UL	
    76.1 --- a/linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/pgalloc.h	Mon Jul 11 09:29:56 2005 -0500
    76.2 +++ b/linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/pgalloc.h	Mon Jul 11 09:35:19 2005 -0500
    76.3 @@ -1,7 +1,6 @@
    76.4  #ifndef _X86_64_PGALLOC_H
    76.5  #define _X86_64_PGALLOC_H
    76.6  
    76.7 -#include <asm/processor.h>
    76.8  #include <asm/fixmap.h>
    76.9  #include <asm/pda.h>
   76.10  #include <linux/threads.h>
   76.11 @@ -163,6 +162,8 @@ extern __inline__ void pte_free_kernel(p
   76.12  extern void pte_free(struct page *pte);
   76.13  
   76.14  //#define __pte_free_tlb(tlb,pte) tlb_remove_page((tlb),(pte)) 
   76.15 +//#define __pmd_free_tlb(tlb,x)   tlb_remove_page((tlb),virt_to_page(x))
   76.16 +//#define __pud_free_tlb(tlb,x)   tlb_remove_page((tlb),virt_to_page(x))
   76.17  
   76.18  #define __pte_free_tlb(tlb,x)   pte_free((x))
   76.19  #define __pmd_free_tlb(tlb,x)   pmd_free((x))
    77.1 --- a/linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/pgtable.h	Mon Jul 11 09:29:56 2005 -0500
    77.2 +++ b/linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/pgtable.h	Mon Jul 11 09:35:19 2005 -0500
    77.3 @@ -132,7 +132,7 @@ extern inline void pgd_clear (pgd_t * pg
    77.4   * each domain will have separate page tables, with their own versions of
    77.5   * accessed & dirty state.
    77.6   */
    77.7 -static inline pte_t ptep_get_and_clear(pte_t *xp)
    77.8 +static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, pte_t *xp)
    77.9  {
   77.10          pte_t pte = *xp;
   77.11          if (pte.pte)
   77.12 @@ -150,7 +150,7 @@ static inline pte_t ptep_get_and_clear(p
   77.13  #define PGDIR_MASK      (~(PGDIR_SIZE-1))
   77.14  
   77.15  #define USER_PTRS_PER_PGD       (TASK_SIZE/PGDIR_SIZE)
   77.16 -#define FIRST_USER_PGD_NR       0
   77.17 +#define FIRST_USER_ADDRESS	0
   77.18  
   77.19  #ifndef __ASSEMBLY__
   77.20  #define MAXMEM           0x3fffffffffffUL
   77.21 @@ -262,10 +262,11 @@ static inline unsigned long pud_bad(pud_
   77.22         val &= ~(_PAGE_USER | _PAGE_DIRTY); 
   77.23         return val & ~(_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED);      
   77.24  } 
   77.25 +#define set_pte_at(mm,addr,ptep,pteval) set_pte(ptep,pteval)
   77.26  
   77.27  #define pte_none(x)	(!(x).pte)
   77.28  #define pte_present(x)	((x).pte & (_PAGE_PRESENT | _PAGE_PROTNONE))
   77.29 -#define pte_clear(xp)	do { set_pte(xp, __pte(0)); } while (0)
   77.30 +#define pte_clear(mm,addr,xp)	do { set_pte_at(mm, addr, xp, __pte(0)); } while (0)
   77.31  
   77.32  #define pages_to_mb(x) ((x) >> (20-PAGE_SHIFT))
   77.33  
   77.34 @@ -339,7 +340,9 @@ extern inline pte_t pte_mkdirty(pte_t pt
   77.35  extern inline pte_t pte_mkyoung(pte_t pte)	{ __pte_val(pte) |= _PAGE_ACCESSED; return pte; }
   77.36  extern inline pte_t pte_mkwrite(pte_t pte)	{ __pte_val(pte) |= _PAGE_RW; return pte; }
   77.37  
   77.38 -static inline int ptep_test_and_clear_dirty(pte_t *ptep)
   77.39 +struct vm_area_struct;
   77.40 +
   77.41 +static inline int ptep_test_and_clear_dirty(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep)
   77.42  {
   77.43  	pte_t pte = *ptep;
   77.44  	int ret = pte_dirty(pte);
   77.45 @@ -348,7 +351,7 @@ static inline int ptep_test_and_clear_di
   77.46  	return ret;
   77.47  }
   77.48  
   77.49 -static inline int ptep_test_and_clear_young(pte_t *ptep)
   77.50 +static inline int ptep_test_and_clear_young(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep)
   77.51  {
   77.52  	pte_t pte = *ptep;
   77.53  	int ret = pte_young(pte);
   77.54 @@ -357,18 +360,12 @@ static inline int ptep_test_and_clear_yo
   77.55  	return ret;
   77.56  }
   77.57  
   77.58 -static inline void ptep_set_wrprotect(pte_t *ptep)
   77.59 +static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
   77.60  {
   77.61  	pte_t pte = *ptep;
   77.62  	if (pte_write(pte))
   77.63  		set_pte(ptep, pte_wrprotect(pte));
   77.64  }
   77.65 -static inline void ptep_mkdirty(pte_t *ptep)
   77.66 -{
   77.67 -	pte_t pte = *ptep;
   77.68 -	if (!pte_dirty(pte))
   77.69 -		xen_l1_entry_update(ptep, pte_mkdirty(pte).pte);
   77.70 -}
   77.71  
   77.72  /*
   77.73   * Macro to mark a page protection value as "uncacheable".
   77.74 @@ -517,6 +514,13 @@ int __direct_remap_area_pages(struct mm_
   77.75  #define io_remap_page_range(vma, vaddr, paddr, size, prot)		\
   77.76  		remap_pfn_range(vma, vaddr, (paddr) >> PAGE_SHIFT, size, prot)
   77.77  
   77.78 +#define io_remap_pfn_range(vma, vaddr, pfn, size, prot)		\
   77.79 +		remap_pfn_range(vma, vaddr, pfn, size, prot)
   77.80 +
   77.81 +#define MK_IOSPACE_PFN(space, pfn)	(pfn)
   77.82 +#define GET_IOSPACE(pfn)		0
   77.83 +#define GET_PFN(pfn)			(pfn)
   77.84 +
   77.85  #define HAVE_ARCH_UNMAPPED_AREA
   77.86  
   77.87  #define pgtable_cache_init()   do { } while (0)
   77.88 @@ -534,7 +538,6 @@ int __direct_remap_area_pages(struct mm_
   77.89  #define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_DIRTY
   77.90  #define __HAVE_ARCH_PTEP_GET_AND_CLEAR
   77.91  #define __HAVE_ARCH_PTEP_SET_WRPROTECT
   77.92 -#define __HAVE_ARCH_PTEP_MKDIRTY
   77.93  #define __HAVE_ARCH_PTE_SAME
   77.94  #include <asm-generic/pgtable.h>
   77.95  
    78.1 --- a/linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/processor.h	Mon Jul 11 09:29:56 2005 -0500
    78.2 +++ b/linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/processor.h	Mon Jul 11 09:35:19 2005 -0500
    78.3 @@ -62,9 +62,8 @@ struct cpuinfo_x86 {
    78.4  	int	x86_tlbsize;	/* number of 4K pages in DTLB/ITLB combined(in pages)*/
    78.5          __u8    x86_virt_bits, x86_phys_bits;
    78.6  	__u8	x86_num_cores;
    78.7 -	__u8	x86_apicid;
    78.8          __u32   x86_power; 	
    78.9 -	__u32   x86_cpuid_level;	/* Max CPUID function supported */
   78.10 +	__u32   extended_cpuid_level;	/* Max extended CPUID function supported */
   78.11  	unsigned long loops_per_jiffy;
   78.12  } ____cacheline_aligned;
   78.13  
   78.14 @@ -92,7 +91,6 @@ extern char ignore_irq13;
   78.15  extern void identify_cpu(struct cpuinfo_x86 *);
   78.16  extern void print_cpu_info(struct cpuinfo_x86 *);
   78.17  extern unsigned int init_intel_cacheinfo(struct cpuinfo_x86 *c);
   78.18 -extern void dodgy_tsc(void);
   78.19  
   78.20  /*
   78.21   * EFLAGS bits
   78.22 @@ -169,9 +167,9 @@ static inline void set_in_cr4 (unsigned 
   78.23  
   78.24  
   78.25  /*
   78.26 - * User space process size. 47bits.
   78.27 + * User space process size. 47bits minus one guard page.
   78.28   */
   78.29 -#define TASK_SIZE	(0x800000000000UL)
   78.30 +#define TASK_SIZE	(0x800000000000UL - 4096)
   78.31  
   78.32  /* This decides where the kernel will search for a free chunk of vm
   78.33   * space during mmap's.
    79.1 --- a/linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/ptrace.h	Mon Jul 11 09:29:56 2005 -0500
    79.2 +++ b/linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/ptrace.h	Mon Jul 11 09:35:19 2005 -0500
    79.3 @@ -91,6 +91,11 @@ extern unsigned long profile_pc(struct p
    79.4  
    79.5  void signal_fault(struct pt_regs *regs, void __user *frame, char *where);
    79.6  
    79.7 +struct task_struct;
    79.8 +
    79.9 +extern unsigned long
   79.10 +convert_rip_to_linear(struct task_struct *child, struct pt_regs *regs);
   79.11 +
   79.12  enum {
   79.13          EF_CF   = 0x00000001,
   79.14          EF_PF   = 0x00000004,
    80.1 --- a/linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/segment.h	Mon Jul 11 09:29:56 2005 -0500
    80.2 +++ b/linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/segment.h	Mon Jul 11 09:35:19 2005 -0500
    80.3 @@ -24,10 +24,9 @@
    80.4  
    80.5  #define GDT_ENTRY_TLS 1
    80.6  #define GDT_ENTRY_TSS 8	/* needs two entries */
    80.7 -#define GDT_ENTRY_LDT 10
    80.8 -#define GDT_ENTRY_TLS_MIN 11
    80.9 -#define GDT_ENTRY_TLS_MAX 13
   80.10 -/* 14 free */
   80.11 +#define GDT_ENTRY_LDT 10 /* needs two entries */
   80.12 +#define GDT_ENTRY_TLS_MIN 12
   80.13 +#define GDT_ENTRY_TLS_MAX 14
   80.14  #define GDT_ENTRY_KERNELCS16 15
   80.15  
   80.16  #define GDT_ENTRY_TLS_ENTRIES 3
    81.1 --- a/linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/smp.h	Mon Jul 11 09:29:56 2005 -0500
    81.2 +++ b/linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/smp.h	Mon Jul 11 09:35:19 2005 -0500
    81.3 @@ -31,12 +31,16 @@ extern int disable_apic;
    81.4  
    81.5  struct pt_regs;
    81.6  
    81.7 +extern cpumask_t cpu_present_mask;
    81.8 +extern cpumask_t cpu_possible_map;
    81.9 +extern cpumask_t cpu_online_map;
   81.10 +extern cpumask_t cpu_callout_map;
   81.11 +
   81.12  /*
   81.13   * Private routines/data
   81.14   */
   81.15   
   81.16  extern void smp_alloc_memory(void);
   81.17 -extern cpumask_t cpu_online_map;
   81.18  extern volatile unsigned long smp_invalidate_needed;
   81.19  extern int pic_mode;
   81.20  extern int smp_num_siblings;
   81.21 @@ -44,11 +48,12 @@ extern void smp_flush_tlb(void);
   81.22  extern void smp_message_irq(int cpl, void *dev_id, struct pt_regs *regs);
   81.23  extern void smp_send_reschedule(int cpu);
   81.24  extern void smp_invalidate_rcv(void);		/* Process an NMI */
   81.25 -extern void (*mtrr_hook) (void);
   81.26  extern void zap_low_mappings(void);
   81.27  void smp_stop_cpu(void);
   81.28  extern cpumask_t cpu_sibling_map[NR_CPUS];
   81.29 +extern cpumask_t cpu_core_map[NR_CPUS];
   81.30  extern u8 phys_proc_id[NR_CPUS];
   81.31 +extern u8 cpu_core_id[NR_CPUS];
   81.32  
   81.33  #define SMP_TRAMPOLINE_BASE 0x6000
   81.34  
   81.35 @@ -58,10 +63,6 @@ extern u8 phys_proc_id[NR_CPUS];
   81.36   * compresses data structures.
   81.37   */
   81.38  
   81.39 -extern cpumask_t cpu_callout_map;
   81.40 -extern cpumask_t cpu_callin_map;
   81.41 -#define cpu_possible_map cpu_callout_map
   81.42 -
   81.43  static inline int num_booting_cpus(void)
   81.44  {
   81.45  	return cpus_weight(cpu_callout_map);
   81.46 @@ -77,7 +78,7 @@ extern __inline int hard_smp_processor_i
   81.47  }
   81.48  #endif
   81.49  
   81.50 -#define safe_smp_processor_id() (disable_apic ? 0 : x86_apicid_to_cpu(hard_smp_processor_id()))
   81.51 +extern int safe_smp_processor_id(void);
   81.52  
   81.53  #endif /* !ASSEMBLY */
   81.54  
   81.55 @@ -99,22 +100,6 @@ static inline unsigned int cpu_mask_to_a
   81.56  	return cpus_addr(cpumask)[0];
   81.57  }
   81.58  
   81.59 -static inline int x86_apicid_to_cpu(u8 apicid)
   81.60 -{
   81.61 -	int i;
   81.62 -
   81.63 -	for (i = 0; i < NR_CPUS; ++i)
   81.64 -		if (x86_cpu_to_apicid[i] == apicid)
   81.65 -			return i;
   81.66 -
   81.67 -	/* No entries in x86_cpu_to_apicid?  Either no MPS|ACPI,
   81.68 -	 * or called too early.  Either way, we must be CPU 0. */
   81.69 -      	if (x86_cpu_to_apicid[0] == BAD_APICID)
   81.70 -		return 0;
   81.71 -
   81.72 -	return -1;
   81.73 -}
   81.74 -
   81.75  static inline int cpu_present_to_apicid(int mps_cpu)
   81.76  {
   81.77  	if (mps_cpu < NR_CPUS)
    82.1 --- a/linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/system.h	Mon Jul 11 09:29:56 2005 -0500
    82.2 +++ b/linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/system.h	Mon Jul 11 09:35:19 2005 -0500
    82.3 @@ -400,4 +400,6 @@ void enable_hlt(void);
    82.4  #define HAVE_EAT_KEY
    82.5  void eat_key(void);
    82.6  
    82.7 +extern unsigned long arch_align_stack(unsigned long sp);
    82.8 +
    82.9  #endif
    83.1 --- a/linux-2.6-xen-sparse/include/linux/gfp.h	Mon Jul 11 09:29:56 2005 -0500
    83.2 +++ b/linux-2.6-xen-sparse/include/linux/gfp.h	Mon Jul 11 09:35:19 2005 -0500
    83.3 @@ -26,26 +26,28 @@ struct vm_area_struct;
    83.4   *
    83.5   * __GFP_NORETRY: The VM implementation must not retry indefinitely.
    83.6   */
    83.7 -#define __GFP_WAIT	0x10	/* Can wait and reschedule? */
    83.8 -#define __GFP_HIGH	0x20	/* Should access emergency pools? */
    83.9 -#define __GFP_IO	0x40	/* Can start physical IO? */
   83.10 -#define __GFP_FS	0x80	/* Can call down to low-level FS? */
   83.11 -#define __GFP_COLD	0x100	/* Cache-cold page required */
   83.12 -#define __GFP_NOWARN	0x200	/* Suppress page allocation failure warning */
   83.13 -#define __GFP_REPEAT	0x400	/* Retry the allocation.  Might fail */
   83.14 -#define __GFP_NOFAIL	0x800	/* Retry for ever.  Cannot fail */
   83.15 -#define __GFP_NORETRY	0x1000	/* Do not retry.  Might fail */
   83.16 -#define __GFP_NO_GROW	0x2000	/* Slab internal usage */
   83.17 -#define __GFP_COMP	0x4000	/* Add compound page metadata */
   83.18 -#define __GFP_ZERO	0x8000	/* Return zeroed page on success */
   83.19 +#define __GFP_WAIT	0x10u	/* Can wait and reschedule? */
   83.20 +#define __GFP_HIGH	0x20u	/* Should access emergency pools? */
   83.21 +#define __GFP_IO	0x40u	/* Can start physical IO? */
   83.22 +#define __GFP_FS	0x80u	/* Can call down to low-level FS? */
   83.23 +#define __GFP_COLD	0x100u	/* Cache-cold page required */
   83.24 +#define __GFP_NOWARN	0x200u	/* Suppress page allocation failure warning */
   83.25 +#define __GFP_REPEAT	0x400u	/* Retry the allocation.  Might fail */
   83.26 +#define __GFP_NOFAIL	0x800u	/* Retry for ever.  Cannot fail */
   83.27 +#define __GFP_NORETRY	0x1000u	/* Do not retry.  Might fail */
   83.28 +#define __GFP_NO_GROW	0x2000u	/* Slab internal usage */
   83.29 +#define __GFP_COMP	0x4000u	/* Add compound page metadata */
   83.30 +#define __GFP_ZERO	0x8000u	/* Return zeroed page on success */
   83.31 +#define __GFP_NOMEMALLOC 0x10000u /* Don't use emergency reserves */
   83.32  
   83.33 -#define __GFP_BITS_SHIFT 16	/* Room for 16 __GFP_FOO bits */
   83.34 +#define __GFP_BITS_SHIFT 20	/* Room for 20 __GFP_FOO bits */
   83.35  #define __GFP_BITS_MASK ((1 << __GFP_BITS_SHIFT) - 1)
   83.36  
   83.37  /* if you forget to add the bitmask here kernel will crash, period */
   83.38  #define GFP_LEVEL_MASK (__GFP_WAIT|__GFP_HIGH|__GFP_IO|__GFP_FS| \
   83.39  			__GFP_COLD|__GFP_NOWARN|__GFP_REPEAT| \
   83.40 -			__GFP_NOFAIL|__GFP_NORETRY|__GFP_NO_GROW|__GFP_COMP)
   83.41 +			__GFP_NOFAIL|__GFP_NORETRY|__GFP_NO_GROW|__GFP_COMP| \
   83.42 +			__GFP_NOMEMALLOC)
   83.43  
   83.44  #define GFP_ATOMIC	(__GFP_HIGH)
   83.45  #define GFP_NOIO	(__GFP_WAIT)
   83.46 @@ -86,7 +88,7 @@ struct vm_area_struct;
   83.47  extern struct page *
   83.48  FASTCALL(__alloc_pages(unsigned int, unsigned int, struct zonelist *));
   83.49  
   83.50 -static inline struct page *alloc_pages_node(int nid, unsigned int gfp_mask,
   83.51 +static inline struct page *alloc_pages_node(int nid, unsigned int __nocast gfp_mask,
   83.52  						unsigned int order)
   83.53  {
   83.54  	if (unlikely(order >= MAX_ORDER))
   83.55 @@ -97,17 +99,17 @@ static inline struct page *alloc_pages_n
   83.56  }
   83.57  
   83.58  #ifdef CONFIG_NUMA
   83.59 -extern struct page *alloc_pages_current(unsigned gfp_mask, unsigned order);
   83.60 +extern struct page *alloc_pages_current(unsigned int __nocast gfp_mask, unsigned order);
   83.61  
   83.62  static inline struct page *
   83.63 -alloc_pages(unsigned int gfp_mask, unsigned int order)
   83.64 +alloc_pages(unsigned int __nocast gfp_mask, unsigned int order)
   83.65  {
   83.66  	if (unlikely(order >= MAX_ORDER))
   83.67  		return NULL;
   83.68  
   83.69  	return alloc_pages_current(gfp_mask, order);
   83.70  }
   83.71 -extern struct page *alloc_page_vma(unsigned gfp_mask,
   83.72 +extern struct page *alloc_page_vma(unsigned __nocast gfp_mask,
   83.73  			struct vm_area_struct *vma, unsigned long addr);
   83.74  #else
   83.75  #define alloc_pages(gfp_mask, order) \
   83.76 @@ -116,8 +118,8 @@ extern struct page *alloc_page_vma(unsig
   83.77  #endif
   83.78  #define alloc_page(gfp_mask) alloc_pages(gfp_mask, 0)
   83.79  
   83.80 -extern unsigned long FASTCALL(__get_free_pages(unsigned int gfp_mask, unsigned int order));
   83.81 -extern unsigned long FASTCALL(get_zeroed_page(unsigned int gfp_mask));
   83.82 +extern unsigned long FASTCALL(__get_free_pages(unsigned int __nocast gfp_mask, unsigned int order));
   83.83 +extern unsigned long FASTCALL(get_zeroed_page(unsigned int __nocast gfp_mask));
   83.84  
   83.85  #define __get_free_page(gfp_mask) \
   83.86  		__get_free_pages((gfp_mask),0)
    84.1 --- a/linux-2.6-xen-sparse/include/linux/mm.h	Mon Jul 11 09:29:56 2005 -0500
    84.2 +++ b/linux-2.6-xen-sparse/include/linux/mm.h	Mon Jul 11 09:35:19 2005 -0500
    84.3 @@ -37,10 +37,6 @@ extern int sysctl_legacy_va_layout;
    84.4  #include <asm/processor.h>
    84.5  #include <asm/atomic.h>
    84.6  
    84.7 -#ifndef MM_VM_SIZE
    84.8 -#define MM_VM_SIZE(mm)	((TASK_SIZE + PGDIR_SIZE - 1) & PGDIR_MASK)
    84.9 -#endif
   84.10 -
   84.11  #define nth_page(page,n) pfn_to_page(page_to_pfn((page)) + (n))
   84.12  
   84.13  /*
   84.14 @@ -164,7 +160,8 @@ extern unsigned int kobjsize(const void 
   84.15  #define VM_ACCOUNT	0x00100000	/* Is a VM accounted object */
   84.16  #define VM_HUGETLB	0x00400000	/* Huge TLB Page VM */
   84.17  #define VM_NONLINEAR	0x00800000	/* Is non-linear (remap_file_pages) */
   84.18 -#define VM_FOREIGN      0x01000000      /* Has pages belonging to another VM */
   84.19 +#define VM_MAPPED_COPY	0x01000000	/* T if mapped copy of data (nommu mmap) */
   84.20 +#define VM_FOREIGN	0x02000000	/* Has pages belonging to another VM */
   84.21  
   84.22  #ifndef VM_STACK_DEFAULT_FLAGS		/* arch can override this */
   84.23  #define VM_STACK_DEFAULT_FLAGS VM_DATA_DEFAULT_FLAGS
   84.24 @@ -582,17 +579,19 @@ struct zap_details {
   84.25  	pgoff_t	first_index;			/* Lowest page->index to unmap */
   84.26  	pgoff_t last_index;			/* Highest page->index to unmap */
   84.27  	spinlock_t *i_mmap_lock;		/* For unmap_mapping_range: */
   84.28 -	unsigned long break_addr;		/* Where unmap_vmas stopped */
   84.29  	unsigned long truncate_count;		/* Compare vm_truncate_count */
   84.30  };
   84.31  
   84.32 -void zap_page_range(struct vm_area_struct *vma, unsigned long address,
   84.33 +unsigned long zap_page_range(struct vm_area_struct *vma, unsigned long address,
   84.34  		unsigned long size, struct zap_details *);
   84.35 -int unmap_vmas(struct mmu_gather **tlbp, struct mm_struct *mm,
   84.36 +unsigned long unmap_vmas(struct mmu_gather **tlb, struct mm_struct *mm,
   84.37  		struct vm_area_struct *start_vma, unsigned long start_addr,
   84.38  		unsigned long end_addr, unsigned long *nr_accounted,
   84.39  		struct zap_details *);
   84.40 -void clear_page_range(struct mmu_gather *tlb, unsigned long addr, unsigned long end);
   84.41 +void free_pgd_range(struct mmu_gather **tlb, unsigned long addr,
   84.42 +		unsigned long end, unsigned long floor, unsigned long ceiling);
   84.43 +void free_pgtables(struct mmu_gather **tlb, struct vm_area_struct *start_vma,
   84.44 +		unsigned long floor, unsigned long ceiling);
   84.45  int copy_page_range(struct mm_struct *dst, struct mm_struct *src,
   84.46  			struct vm_area_struct *vma);
   84.47  int zeromap_page_range(struct vm_area_struct *vma, unsigned long from,
   84.48 @@ -639,9 +638,9 @@ extern unsigned long do_mremap(unsigned 
   84.49   * These functions are passed a count `nr_to_scan' and a gfpmask.  They should
   84.50   * scan `nr_to_scan' objects, attempting to free them.
   84.51   *
   84.52 - * The callback must the number of objects which remain in the cache.
   84.53 + * The callback must return the number of objects which remain in the cache.
   84.54   *
   84.55 - * The callback will be passes nr_to_scan == 0 when the VM is querying the
   84.56 + * The callback will be passed nr_to_scan == 0 when the VM is querying the
   84.57   * cache size, so a fastpath for that case is appropriate.
   84.58   */
   84.59  typedef int (*shrinker_t)(int nr_to_scan, unsigned int gfp_mask);
   84.60 @@ -728,6 +727,7 @@ extern void __vma_link_rb(struct mm_stru
   84.61  extern struct vm_area_struct *copy_vma(struct vm_area_struct **,
   84.62  	unsigned long addr, unsigned long len, pgoff_t pgoff);
   84.63  extern void exit_mmap(struct mm_struct *);
   84.64 +extern int may_expand_vm(struct mm_struct *mm, unsigned long npages);
   84.65  
   84.66  extern unsigned long get_unmapped_area(struct file *, unsigned long, unsigned long, unsigned long, unsigned long);
   84.67  
   84.68 @@ -843,7 +843,7 @@ static inline void vm_stat_unaccount(str
   84.69  }
   84.70  
   84.71  /* update per process rss and vm hiwater data */
   84.72 -extern void update_mem_hiwater(void);
   84.73 +extern void update_mem_hiwater(struct task_struct *tsk);
   84.74  
   84.75  #ifndef CONFIG_DEBUG_PAGEALLOC
   84.76  static inline void
   84.77 @@ -861,5 +861,8 @@ int in_gate_area_no_task(unsigned long a
   84.78  #define in_gate_area(task, addr) ({(void)task; in_gate_area_no_task(addr);})
   84.79  #endif	/* __HAVE_ARCH_GATE_AREA */
   84.80  
   84.81 +/* /proc/<pid>/oom_adj set to -17 protects from the oom-killer */
   84.82 +#define OOM_DISABLE -17
   84.83 +
   84.84  #endif /* __KERNEL__ */
   84.85  #endif /* _LINUX_MM_H */
    85.1 --- a/linux-2.6-xen-sparse/include/linux/skbuff.h	Mon Jul 11 09:29:56 2005 -0500
    85.2 +++ b/linux-2.6-xen-sparse/include/linux/skbuff.h	Mon Jul 11 09:35:19 2005 -0500
    85.3 @@ -83,12 +83,6 @@
    85.4   *	Any questions? No questions, good. 		--ANK
    85.5   */
    85.6  
    85.7 -#ifdef __i386__
    85.8 -#define NET_CALLER(arg) (*(((void **)&arg) - 1))
    85.9 -#else
   85.10 -#define NET_CALLER(arg) __builtin_return_address(0)
   85.11 -#endif
   85.12 -
   85.13  struct net_device;
   85.14  
   85.15  #ifdef CONFIG_NETFILTER
   85.16 @@ -146,6 +140,20 @@ struct skb_shared_info {
   85.17  	skb_frag_t	frags[MAX_SKB_FRAGS];
   85.18  };
   85.19  
   85.20 +/* We divide dataref into two halves.  The higher 16 bits hold references
   85.21 + * to the payload part of skb->data.  The lower 16 bits hold references to
   85.22 + * the entire skb->data.  It is up to the users of the skb to agree on
   85.23 + * where the payload starts.
   85.24 + *
   85.25 + * All users must obey the rule that the skb->data reference count must be
   85.26 + * greater than or equal to the payload reference count.
   85.27 + *
   85.28 + * Holding a reference to the payload part means that the user does not
   85.29 + * care about modifications to the header part of skb->data.
   85.30 + */
   85.31 +#define SKB_DATAREF_SHIFT 16
   85.32 +#define SKB_DATAREF_MASK ((1 << SKB_DATAREF_SHIFT) - 1)
   85.33 +
   85.34  /** 
   85.35   *	struct sk_buff - socket buffer
   85.36   *	@next: Next buffer in list
   85.37 @@ -159,14 +167,16 @@ struct skb_shared_info {
   85.38   *	@h: Transport layer header
   85.39   *	@nh: Network layer header
   85.40   *	@mac: Link layer header
   85.41 - *	@dst: FIXME: Describe this field
   85.42 + *	@dst: destination entry
   85.43 + *	@sp: the security path, used for xfrm
   85.44   *	@cb: Control buffer. Free for use by every layer. Put private vars here
   85.45   *	@len: Length of actual data
   85.46   *	@data_len: Data length
   85.47   *	@mac_len: Length of link layer header
   85.48   *	@csum: Checksum
   85.49 - *	@__unused: Dead field, may be reused
   85.50 + *	@local_df: allow local fragmentation
   85.51   *	@cloned: Head may be cloned (check refcnt to be sure)
   85.52 + *	@nohdr: Payload reference only, must not modify header
   85.53   *	@proto_csum_valid: Protocol csum validated since arriving at localhost
   85.54   *	@proto_csum_blank: Protocol csum must be added before leaving localhost
   85.55   *	@pkt_type: Packet class
   85.56 @@ -189,6 +199,8 @@ struct skb_shared_info {
   85.57   *	@nf_bridge: Saved data about a bridged frame - see br_netfilter.c
   85.58   *      @private: Data which is private to the HIPPI implementation
   85.59   *	@tc_index: Traffic control index
   85.60 + *	@tc_verd: traffic control verdict
   85.61 + *	@tc_classid: traffic control classid
   85.62   */
   85.63  
   85.64  struct sk_buff {
   85.65 @@ -241,6 +253,7 @@ struct sk_buff {
   85.66  				csum;
   85.67  	unsigned char		local_df,
   85.68  				cloned:1,
   85.69 +				nohdr:1,
   85.70  				proto_csum_valid:1,
   85.71  				proto_csum_blank:1,
   85.72  				pkt_type,
   85.73 @@ -374,7 +387,42 @@ static inline void kfree_skb(struct sk_b
   85.74   */
   85.75  static inline int skb_cloned(const struct sk_buff *skb)
   85.76  {
   85.77 -	return skb->cloned && atomic_read(&skb_shinfo(skb)->dataref) != 1;
   85.78 +	return skb->cloned &&
   85.79 +	       (atomic_read(&skb_shinfo(skb)->dataref) & SKB_DATAREF_MASK) != 1;
   85.80 +}
   85.81 +
   85.82 +/**
   85.83 + *	skb_header_cloned - is the header a clone
   85.84 + *	@skb: buffer to check
   85.85 + *
   85.86 + *	Returns true if modifying the header part of the buffer requires
   85.87 + *	the data to be copied.
   85.88 + */
   85.89 +static inline int skb_header_cloned(const struct sk_buff *skb)
   85.90 +{
   85.91 +	int dataref;
   85.92 +
   85.93 +	if (!skb->cloned)
   85.94 +		return 0;
   85.95 +
   85.96 +	dataref = atomic_read(&skb_shinfo(skb)->dataref);
   85.97 +	dataref = (dataref & SKB_DATAREF_MASK) - (dataref >> SKB_DATAREF_SHIFT);
   85.98 +	return dataref != 1;
   85.99 +}
  85.100 +
  85.101 +/**
  85.102 + *	skb_header_release - release reference to header
  85.103 + *	@skb: buffer to operate on
  85.104 + *
  85.105 + *	Drop a reference to the header part of the buffer.  This is done
  85.106 + *	by acquiring a payload reference.  You must not read from the header
  85.107 + *	part of skb->data after this.
  85.108 + */
  85.109 +static inline void skb_header_release(struct sk_buff *skb)
  85.110 +{
  85.111 +	BUG_ON(skb->nohdr);
  85.112 +	skb->nohdr = 1;
  85.113 +	atomic_add(1 << SKB_DATAREF_SHIFT, &skb_shinfo(skb)->dataref);
  85.114  }
  85.115  
  85.116  /**
  85.117 @@ -925,6 +973,7 @@ static inline void __skb_queue_purge(str
  85.118  		kfree_skb(skb);
  85.119  }
  85.120  
  85.121 +#ifndef CONFIG_HAVE_ARCH_DEV_ALLOC_SKB
  85.122  /**
  85.123   *	__dev_alloc_skb - allocate an skbuff for sending
  85.124   *	@length: length to allocate
  85.125 @@ -937,7 +986,6 @@ static inline void __skb_queue_purge(str
  85.126   *
  85.127   *	%NULL is returned in there is no free memory.
  85.128   */
  85.129 -#ifndef CONFIG_HAVE_ARCH_DEV_ALLOC_SKB
  85.130  static inline struct sk_buff *__dev_alloc_skb(unsigned int length,
  85.131  					      int gfp_mask)
  85.132  {
  85.133 @@ -1058,6 +1106,42 @@ static inline int skb_linearize(struct s
  85.134  	return __skb_linearize(skb, gfp);
  85.135  }
  85.136  
  85.137 +/**
  85.138 + *	skb_postpull_rcsum - update checksum for received skb after pull
  85.139 + *	@skb: buffer to update
  85.140 + *	@start: start of data before pull
  85.141 + *	@len: length of data pulled
  85.142 + *
  85.143 + *	After doing a pull on a received packet, you need to call this to
  85.144 + *	update the CHECKSUM_HW checksum, or set ip_summed to CHECKSUM_NONE
  85.145 + *	so that it can be recomputed from scratch.
  85.146 + */
  85.147 +
  85.148 +static inline void skb_postpull_rcsum(struct sk_buff *skb,
  85.149 +					 const void *start, int len)
  85.150 +{
  85.151 +	if (skb->ip_summed == CHECKSUM_HW)
  85.152 +		skb->csum = csum_sub(skb->csum, csum_partial(start, len, 0));
  85.153 +}
  85.154 +
  85.155 +/**
  85.156 + *	pskb_trim_rcsum - trim received skb and update checksum
  85.157 + *	@skb: buffer to trim
  85.158 + *	@len: new length
  85.159 + *
  85.160 + *	This is exactly the same as pskb_trim except that it ensures the
  85.161 + *	checksum of received packets are still valid after the operation.
  85.162 + */
  85.163 +
  85.164 +static inline int pskb_trim_rcsum(struct sk_buff *skb, unsigned int len)
  85.165 +{
  85.166 +	if (len >= skb->len)
  85.167 +		return 0;
  85.168 +	if (skb->ip_summed == CHECKSUM_HW)
  85.169 +		skb->ip_summed = CHECKSUM_NONE;
  85.170 +	return __pskb_trim(skb, len);
  85.171 +}
  85.172 +
  85.173  static inline void *kmap_skb_frag(const skb_frag_t *frag)
  85.174  {
  85.175  #ifdef CONFIG_HIGHMEM
  85.176 @@ -1098,6 +1182,8 @@ extern unsigned int    skb_checksum(cons
  85.177  				    int len, unsigned int csum);
  85.178  extern int	       skb_copy_bits(const struct sk_buff *skb, int offset,
  85.179  				     void *to, int len);
  85.180 +extern int	       skb_store_bits(const struct sk_buff *skb, int offset,
  85.181 +				      void *from, int len);
  85.182  extern unsigned int    skb_copy_and_csum_bits(const struct sk_buff *skb,
  85.183  					      int offset, u8 *to, int len,
  85.184  					      unsigned int csum);
  85.185 @@ -1122,22 +1208,6 @@ static inline void *skb_header_pointer(c
  85.186  extern void skb_init(void);
  85.187  extern void skb_add_mtu(int mtu);
  85.188  
  85.189 -struct skb_iter {
  85.190 -	/* Iteration functions set these */
  85.191 -	unsigned char *data;
  85.192 -	unsigned int len;
  85.193 -
  85.194 -	/* Private to iteration */
  85.195 -	unsigned int nextfrag;
  85.196 -	struct sk_buff *fraglist;
  85.197 -};
  85.198 -
  85.199 -/* Keep iterating until skb_iter_next returns false. */
  85.200 -extern void skb_iter_first(const struct sk_buff *skb, struct skb_iter *i);
  85.201 -extern int skb_iter_next(const struct sk_buff *skb, struct skb_iter *i);
  85.202 -/* Call this if aborting loop before !skb_iter_next */
  85.203 -extern void skb_iter_abort(const struct sk_buff *skb, struct skb_iter *i);
  85.204 -
  85.205  #ifdef CONFIG_NETFILTER
  85.206  static inline void nf_conntrack_put(struct nf_conntrack *nfct)
  85.207  {
    86.1 --- a/linux-2.6-xen-sparse/mm/highmem.c	Mon Jul 11 09:29:56 2005 -0500
    86.2 +++ b/linux-2.6-xen-sparse/mm/highmem.c	Mon Jul 11 09:35:19 2005 -0500
    86.3 @@ -30,9 +30,9 @@
    86.4  
    86.5  static mempool_t *page_pool, *isa_page_pool;
    86.6  
    86.7 -static void *page_pool_alloc(int gfp_mask, void *data)
    86.8 +static void *page_pool_alloc(unsigned int __nocast gfp_mask, void *data)
    86.9  {
   86.10 -	int gfp = gfp_mask | (int) (long) data;
   86.11 +	unsigned int gfp = gfp_mask | (unsigned int) (long) data;
   86.12  
   86.13  	return alloc_page(gfp);
   86.14  }
   86.15 @@ -90,7 +90,8 @@ static void flush_all_zero_pkmaps(void)
   86.16  		 * So no dangers, even with speculative execution.
   86.17  		 */
   86.18  		page = pte_page(pkmap_page_table[i]);
   86.19 -		pte_clear(&pkmap_page_table[i]);
   86.20 +		pte_clear(&init_mm, (unsigned long)page_address(page),
   86.21 +			  &pkmap_page_table[i]);
   86.22  
   86.23  		set_page_address(page, NULL);
   86.24  	}
   86.25 @@ -138,7 +139,8 @@ start:
   86.26  		}
   86.27  	}
   86.28  	vaddr = PKMAP_ADDR(last_pkmap_nr);
   86.29 -	set_pte(&(pkmap_page_table[last_pkmap_nr]), mk_pte(page, kmap_prot));
   86.30 +	set_pte_at(&init_mm, vaddr,
   86.31 +		   &(pkmap_page_table[last_pkmap_nr]), mk_pte(page, kmap_prot));
   86.32  
   86.33  	pkmap_count[last_pkmap_nr] = 1;
   86.34  	set_page_address(page, (void *)vaddr);
   86.35 @@ -332,6 +334,7 @@ static void bounce_end_io(struct bio *bi
   86.36  			continue;
   86.37  
   86.38  		mempool_free(bvec->bv_page, pool);	
   86.39 +		dec_page_state(nr_bounce);
   86.40  	}
   86.41  
   86.42  	bio_endio(bio_orig, bio_orig->bi_size, err);
   86.43 @@ -412,6 +415,7 @@ static void __blk_queue_bounce(request_q
   86.44  		to->bv_page = mempool_alloc(pool, q->bounce_gfp);
   86.45  		to->bv_len = from->bv_len;
   86.46  		to->bv_offset = from->bv_offset;
   86.47 +		inc_page_state(nr_bounce);
   86.48  
   86.49  		if (rw == WRITE) {
   86.50  			char *vto, *vfrom;
    87.1 --- a/linux-2.6-xen-sparse/mm/memory.c	Mon Jul 11 09:29:56 2005 -0500
    87.2 +++ b/linux-2.6-xen-sparse/mm/memory.c	Mon Jul 11 09:35:19 2005 -0500
    87.3 @@ -46,7 +46,6 @@
    87.4  #include <linux/highmem.h>
    87.5  #include <linux/pagemap.h>
    87.6  #include <linux/rmap.h>
    87.7 -#include <linux/acct.h>
    87.8  #include <linux/module.h>
    87.9  #include <linux/init.h>
   87.10  
   87.11 @@ -84,116 +83,205 @@ EXPORT_SYMBOL(high_memory);
   87.12  EXPORT_SYMBOL(vmalloc_earlyreserve);
   87.13  
   87.14  /*
   87.15 + * If a p?d_bad entry is found while walking page tables, report
   87.16 + * the error, before resetting entry to p?d_none.  Usually (but
   87.17 + * very seldom) called out from the p?d_none_or_clear_bad macros.
   87.18 + */
   87.19 +
   87.20 +void pgd_clear_bad(pgd_t *pgd)
   87.21 +{
   87.22 +	pgd_ERROR(*pgd);
   87.23 +	pgd_clear(pgd);
   87.24 +}
   87.25 +
   87.26 +void pud_clear_bad(pud_t *pud)
   87.27 +{
   87.28 +	pud_ERROR(*pud);
   87.29 +	pud_clear(pud);
   87.30 +}
   87.31 +
   87.32 +void pmd_clear_bad(pmd_t *pmd)
   87.33 +{
   87.34 +	pmd_ERROR(*pmd);
   87.35 +	pmd_clear(pmd);
   87.36 +}
   87.37 +
   87.38 +/*
   87.39   * Note: this doesn't free the actual pages themselves. That
   87.40   * has been handled earlier when unmapping all the memory regions.
   87.41   */
   87.42 -static inline void clear_pmd_range(struct mmu_gather *tlb, pmd_t *pmd, unsigned long start, unsigned long end)
   87.43 +static void free_pte_range(struct mmu_gather *tlb, pmd_t *pmd)
   87.44  {
   87.45 -	struct page *page;
   87.46 -
   87.47 -	if (pmd_none(*pmd))
   87.48 -		return;
   87.49 -	if (unlikely(pmd_bad(*pmd))) {
   87.50 -		pmd_ERROR(*pmd);
   87.51 -		pmd_clear(pmd);
   87.52 -		return;
   87.53 -	}
   87.54 -	if (!((start | end) & ~PMD_MASK)) {
   87.55 -		/* Only clear full, aligned ranges */
   87.56 -		page = pmd_page(*pmd);
   87.57 -		pmd_clear(pmd);
   87.58 -		dec_page_state(nr_page_table_pages);
   87.59 -		tlb->mm->nr_ptes--;
   87.60 -		pte_free_tlb(tlb, page);
   87.61 -	}
   87.62 +	struct page *page = pmd_page(*pmd);
   87.63 +	pmd_clear(pmd);
   87.64 +	pte_free_tlb(tlb, page);
   87.65 +	dec_page_state(nr_page_table_pages);
   87.66 +	tlb->mm->nr_ptes--;
   87.67  }
   87.68  
   87.69 -static inline void clear_pud_range(struct mmu_gather *tlb, pud_t *pud, unsigned long start, unsigned long end)
   87.70 +static inline void free_pmd_range(struct mmu_gather *tlb, pud_t *pud,
   87.71 +				unsigned long addr, unsigned long end,
   87.72 +				unsigned long floor, unsigned long ceiling)
   87.73  {
   87.74 -	unsigned long addr = start, next;
   87.75 -	pmd_t *pmd, *__pmd;
   87.76 -
   87.77 -	if (pud_none(*pud))
   87.78 -		return;
   87.79 -	if (unlikely(pud_bad(*pud))) {
   87.80 -		pud_ERROR(*pud);
   87.81 -		pud_clear(pud);
   87.82 -		return;
   87.83 -	}
   87.84 +	pmd_t *pmd;
   87.85 +	unsigned long next;
   87.86 +	unsigned long start;
   87.87  
   87.88 -	pmd = __pmd = pmd_offset(pud, start);
   87.89 +	start = addr;
   87.90 +	pmd = pmd_offset(pud, addr);
   87.91  	do {
   87.92 -		next = (addr + PMD_SIZE) & PMD_MASK;
   87.93 -		if (next > end || next <= addr)
   87.94 -			next = end;
   87.95 -		
   87.96 -		clear_pmd_range(tlb, pmd, addr, next);
   87.97 -		pmd++;
   87.98 -		addr = next;
   87.99 -	} while (addr && (addr < end));
  87.100 +		next = pmd_addr_end(addr, end);
  87.101 +		if (pmd_none_or_clear_bad(pmd))
  87.102 +			continue;
  87.103 +		free_pte_range(tlb, pmd);
  87.104 +	} while (pmd++, addr = next, addr != end);
  87.105  
  87.106 -	if (!((start | end) & ~PUD_MASK)) {
  87.107 -		/* Only clear full, aligned ranges */
  87.108 -		pud_clear(pud);
  87.109 -		pmd_free_tlb(tlb, __pmd);
  87.110 +	start &= PUD_MASK;
  87.111 +	if (start < floor)
  87.112 +		return;
  87.113 +	if (ceiling) {
  87.114 +		ceiling &= PUD_MASK;
  87.115 +		if (!ceiling)
  87.116 +			return;
  87.117 +	}
  87.118 +	if (end - 1 > ceiling - 1)
  87.119 +		return;
  87.120 +
  87.121 +	pmd = pmd_offset(pud, start);
  87.122 +	pud_clear(pud);
  87.123 +	pmd_free_tlb(tlb, pmd);
  87.124 +}
  87.125 +
  87.126 +static inline void free_pud_range(struct mmu_gather *tlb, pgd_t *pgd,
  87.127 +				unsigned long addr, unsigned long end,
  87.128 +				unsigned long floor, unsigned long ceiling)
  87.129 +{
  87.130 +	pud_t *pud;
  87.131 +	unsigned long next;
  87.132 +	unsigned long start;
  87.133 +
  87.134 +	start = addr;
  87.135 +	pud = pud_offset(pgd, addr);
  87.136 +	do {
  87.137 +		next = pud_addr_end(addr, end);
  87.138 +		if (pud_none_or_clear_bad(pud))
  87.139 +			continue;
  87.140 +		free_pmd_range(tlb, pud, addr, next, floor, ceiling);
  87.141 +	} while (pud++, addr = next, addr != end);
  87.142 +
  87.143 +	start &= PGDIR_MASK;
  87.144 +	if (start < floor)
  87.145 +		return;
  87.146 +	if (ceiling) {
  87.147 +		ceiling &= PGDIR_MASK;
  87.148 +		if (!ceiling)
  87.149 +			return;
  87.150 +	}
  87.151 +	if (end - 1 > ceiling - 1)
  87.152 +		return;
  87.153 +
  87.154 +	pud = pud_offset(pgd, start);
  87.155 +	pgd_clear(pgd);
  87.156 +	pud_free_tlb(tlb, pud);
  87.157 +}
  87.158 +
  87.159 +/*
  87.160 + * This function frees user-level page tables of a process.
  87.161 + *
  87.162 + * Must be called with pagetable lock held.
  87.163 + */
  87.164 +void free_pgd_range(struct mmu_gather **tlb,
  87.165 +			unsigned long addr, unsigned long end,
  87.166 +			unsigned long floor, unsigned long ceiling)
  87.167 +{
  87.168 +	pgd_t *pgd;
  87.169 +	unsigned long next;
  87.170 +	unsigned long start;
  87.171 +
  87.172 +	/*
  87.173 +	 * The next few lines have given us lots of grief...
  87.174 +	 *
  87.175 +	 * Why are we testing PMD* at this top level?  Because often
  87.176 +	 * there will be no work to do at all, and we'd prefer not to
  87.177 +	 * go all the way down to the bottom just to discover that.
  87.178 +	 *
  87.179 +	 * Why all these "- 1"s?  Because 0 represents both the bottom
  87.180 +	 * of the address space and the top of it (using -1 for the
  87.181 +	 * top wouldn't help much: the masks would do the wrong thing).
  87.182 +	 * The rule is that addr 0 and floor 0 refer to the bottom of
  87.183 +	 * the address space, but end 0 and ceiling 0 refer to the top
  87.184 +	 * Comparisons need to use "end - 1" and "ceiling - 1" (though
  87.185 +	 * that end 0 case should be mythical).
  87.186 +	 *
  87.187 +	 * Wherever addr is brought up or ceiling brought down, we must
  87.188 +	 * be careful to reject "the opposite 0" before it confuses the
  87.189 +	 * subsequent tests.  But what about where end is brought down
  87.190 +	 * by PMD_SIZE below? no, end can't go down to 0 there.
  87.191 +	 *
  87.192 +	 * Whereas we round start (addr) and ceiling down, by different
  87.193 +	 * masks at different levels, in order to test whether a table
  87.194 +	 * now has no other vmas using it, so can be freed, we don't
  87.195 +	 * bother to round floor or end up - the tests don't need that.
  87.196 +	 */
  87.197 +
  87.198 +	addr &= PMD_MASK;
  87.199 +	if (addr < floor) {
  87.200 +		addr += PMD_SIZE;
  87.201 +		if (!addr)
  87.202 +			return;
  87.203 +	}
  87.204 +	if (ceiling) {
  87.205 +		ceiling &= PMD_MASK;
  87.206 +		if (!ceiling)
  87.207 +			return;
  87.208 +	}
  87.209 +	if (end - 1 > ceiling - 1)
  87.210 +		end -= PMD_SIZE;
  87.211 +	if (addr > end - 1)
  87.212 +		return;
  87.213 +
  87.214 +	start = addr;
  87.215 +	pgd = pgd_offset((*tlb)->mm, addr);
  87.216 +	do {
  87.217 +		next = pgd_addr_end(addr, end);
  87.218 +		if (pgd_none_or_clear_bad(pgd))
  87.219 +			continue;
  87.220 +		free_pud_range(*tlb, pgd, addr, next, floor, ceiling);
  87.221 +	} while (pgd++, addr = next, addr != end);
  87.222 +
  87.223 +	if (!tlb_is_full_mm(*tlb))
  87.224 +		flush_tlb_pgtables((*tlb)->mm, start, end);
  87.225 +}
  87.226 +
  87.227 +void free_pgtables(struct mmu_gather **tlb, struct vm_area_struct *vma,
  87.228 +		unsigned long floor, unsigned long ceiling)
  87.229 +{
  87.230 +	while (vma) {
  87.231 +		struct vm_area_struct *next = vma->vm_next;
  87.232 +		unsigned long addr = vma->vm_start;
  87.233 +
  87.234 +		if (is_hugepage_only_range(vma->vm_mm, addr, HPAGE_SIZE)) {
  87.235 +			hugetlb_free_pgd_range(tlb, addr, vma->vm_end,
  87.236 +				floor, next? next->vm_start: ceiling);
  87.237 +		} else {
  87.238 +			/*
  87.239 +			 * Optimization: gather nearby vmas into one call down
  87.240 +			 */
  87.241 +			while (next && next->vm_start <= vma->vm_end + PMD_SIZE
  87.242 +			  && !is_hugepage_only_range(vma->vm_mm, next->vm_start,
  87.243 +							HPAGE_SIZE)) {
  87.244 +				vma = next;
  87.245 +				next = vma->vm_next;
  87.246 +			}
  87.247 +			free_pgd_range(tlb, addr, vma->vm_end,
  87.248 +				floor, next? next->vm_start: ceiling);
  87.249 +		}
  87.250 +		vma = next;
  87.251  	}
  87.252  }
  87.253  
  87.254 -
  87.255 -static inline void clear_pgd_range(struct mmu_gather *tlb, pgd_t *pgd, unsigned long start, unsigned long end)
  87.256 -{
  87.257 -	unsigned long addr = start, next;
  87.258 -	pud_t *pud, *__pud;
  87.259 -
  87.260 -	if (pgd_none(*pgd))
  87.261 -		return;
  87.262 -	if (unlikely(pgd_bad(*pgd))) {
  87.263 -		pgd_ERROR(*pgd);
  87.264 -		pgd_clear(pgd);
  87.265 -		return;
  87.266 -	}
  87.267 -
  87.268 -	pud = __pud = pud_offset(pgd, start);
  87.269 -	do {
  87.270 -		next = (addr + PUD_SIZE) & PUD_MASK;
  87.271 -		if (next > end || next <= addr)
  87.272 -			next = end;
  87.273 -		
  87.274 -		clear_pud_range(tlb, pud, addr, next);
  87.275 -		pud++;
  87.276 -		addr = next;
  87.277 -	} while (addr && (addr < end));
  87.278 -
  87.279 -	if (!((start | end) & ~PGDIR_MASK)) {
  87.280 -		/* Only clear full, aligned ranges */
  87.281 -		pgd_clear(pgd);
  87.282 -		pud_free_tlb(tlb, __pud);
  87.283 -	}
  87.284 -}
  87.285 -
  87.286 -/*
  87.287 - * This function clears user-level page tables of a process.
  87.288 - *
  87.289 - * Must be called with pagetable lock held.
  87.290 - */
  87.291 -void clear_page_range(struct mmu_gather *tlb, unsigned long start, unsigned long end)
  87.292 -{
  87.293 -	unsigned long addr = start, next;
  87.294 -	pgd_t * pgd = pgd_offset(tlb->mm, start);
  87.295 -	unsigned long i;
  87.296 -
  87.297 -	for (i = pgd_index(start); i <= pgd_index(end-1); i++) {
  87.298 -		next = (addr + PGDIR_SIZE) & PGDIR_MASK;
  87.299 -		if (next > end || next <= addr)
  87.300 -			next = end;
  87.301 -		
  87.302 -		clear_pgd_range(tlb, pgd, addr, next);
  87.303 -		pgd++;
  87.304 -		addr = next;
  87.305 -	}
  87.306 -}
  87.307 -
  87.308 -pte_t fastcall * pte_alloc_map(struct mm_struct *mm, pmd_t *pmd, unsigned long address)
  87.309 +pte_t fastcall *pte_alloc_map(struct mm_struct *mm, pmd_t *pmd,
  87.310 +				unsigned long address)
  87.311  {
  87.312  	if (!pmd_present(*pmd)) {
  87.313  		struct page *new;
  87.314 @@ -254,20 +342,7 @@ out:
  87.315   */
  87.316  
  87.317  static inline void
  87.318 -copy_swap_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm, pte_t pte)
  87.319 -{
  87.320 -	if (pte_file(pte))
  87.321 -		return;
  87.322 -	swap_duplicate(pte_to_swp_entry(pte));
  87.323 -	if (list_empty(&dst_mm->mmlist)) {
  87.324 -		spin_lock(&mmlist_lock);
  87.325 -		list_add(&dst_mm->mmlist, &src_mm->mmlist);
  87.326 -		spin_unlock(&mmlist_lock);
  87.327 -	}
  87.328 -}
  87.329 -
  87.330 -static inline void
  87.331 -copy_one_pte(struct mm_struct *dst_mm,  struct mm_struct *src_mm,
  87.332 +copy_one_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm,
  87.333  		pte_t *dst_pte, pte_t *src_pte, unsigned long vm_flags,
  87.334  		unsigned long addr)
  87.335  {
  87.336 @@ -275,12 +350,21 @@ copy_one_pte(struct mm_struct *dst_mm,  
  87.337  	struct page *page;
  87.338  	unsigned long pfn;
  87.339  
  87.340 -	/* pte contains position in swap, so copy. */
  87.341 -	if (!pte_present(pte)) {
  87.342 -		copy_swap_pte(dst_mm, src_mm, pte);
  87.343 -		set_pte(dst_pte, pte);
  87.344 +	/* pte contains position in swap or file, so copy. */
  87.345 +	if (unlikely(!pte_present(pte))) {
  87.346 +		if (!pte_file(pte)) {
  87.347 +			swap_duplicate(pte_to_swp_entry(pte));
  87.348 +			/* make sure dst_mm is on swapoff's mmlist. */
  87.349 +			if (unlikely(list_empty(&dst_mm->mmlist))) {
  87.350 +				spin_lock(&mmlist_lock);
  87.351 +				list_add(&dst_mm->mmlist, &src_mm->mmlist);
  87.352 +				spin_unlock(&mmlist_lock);
  87.353 +			}
  87.354 +		}
  87.355 +		set_pte_at(dst_mm, addr, dst_pte, pte);
  87.356  		return;
  87.357  	}
  87.358 +
  87.359  	pfn = pte_pfn(pte);
  87.360  	/* the pte points outside of valid memory, the
  87.361  	 * mapping is assumed to be good, meaningful
  87.362 @@ -292,7 +376,7 @@ copy_one_pte(struct mm_struct *dst_mm,  
  87.363  		page = pfn_to_page(pfn);
  87.364  
  87.365  	if (!page || PageReserved(page)) {
  87.366 -		set_pte(dst_pte, pte);
  87.367 +		set_pte_at(dst_mm, addr, dst_pte, pte);
  87.368  		return;
  87.369  	}
  87.370  
  87.371 @@ -301,7 +385,7 @@ copy_one_pte(struct mm_struct *dst_mm,  
  87.372  	 * in the parent and the child
  87.373  	 */
  87.374  	if ((vm_flags & (VM_SHARED | VM_MAYWRITE)) == VM_MAYWRITE) {
  87.375 -		ptep_set_wrprotect(src_pte);
  87.376 +		ptep_set_wrprotect(src_mm, addr, src_pte);
  87.377  		pte = *src_pte;
  87.378  	}
  87.379  
  87.380 @@ -313,172 +397,137 @@ copy_one_pte(struct mm_struct *dst_mm,  
  87.381  		pte = pte_mkclean(pte);
  87.382  	pte = pte_mkold(pte);
  87.383  	get_page(page);
  87.384 -	dst_mm->rss++;
  87.385 +	inc_mm_counter(dst_mm, rss);
  87.386  	if (PageAnon(page))
  87.387 -		dst_mm->anon_rss++;
  87.388 -	set_pte(dst_pte, pte);
  87.389 +		inc_mm_counter(dst_mm, anon_rss);
  87.390 +	set_pte_at(dst_mm, addr, dst_pte, pte);
  87.391  	page_dup_rmap(page);
  87.392  }
  87.393  
  87.394 -static int copy_pte_range(struct mm_struct *dst_mm,  struct mm_struct *src_mm,
  87.395 +static int copy_pte_range(struct mm_struct *dst_mm, struct mm_struct *src_mm,
  87.396  		pmd_t *dst_pmd, pmd_t *src_pmd, struct vm_area_struct *vma,
  87.397  		unsigned long addr, unsigned long end)
  87.398  {
  87.399  	pte_t *src_pte, *dst_pte;
  87.400 -	pte_t *s, *d;
  87.401  	unsigned long vm_flags = vma->vm_flags;
  87.402 +	int progress;
  87.403  
  87.404 -	d = dst_pte = pte_alloc_map(dst_mm, dst_pmd, addr);
  87.405 +again:
  87.406 +	dst_pte = pte_alloc_map(dst_mm, dst_pmd, addr);
  87.407  	if (!dst_pte)
  87.408  		return -ENOMEM;
  87.409 +	src_pte = pte_offset_map_nested(src_pmd, addr);
  87.410  
  87.411 +	progress = 0;
  87.412  	spin_lock(&src_mm->page_table_lock);
  87.413 -	s = src_pte = pte_offset_map_nested(src_pmd, addr);
  87.414 -	for (; addr < end; addr += PAGE_SIZE, s++, d++) {
  87.415 -		if (pte_none(*s))
  87.416 +	do {
  87.417 +		/*
  87.418 +		 * We are holding two locks at this point - either of them
  87.419 +		 * could generate latencies in another task on another CPU.
  87.420 +		 */
  87.421 +		if (progress >= 32 && (need_resched() ||
  87.422 +		    need_lockbreak(&src_mm->page_table_lock) ||
  87.423 +		    need_lockbreak(&dst_mm->page_table_lock)))
  87.424 +			break;
  87.425 +		if (pte_none(*src_pte)) {
  87.426 +			progress++;
  87.427  			continue;
  87.428 -		copy_one_pte(dst_mm, src_mm, d, s, vm_flags, addr);
  87.429 -	}
  87.430 -	pte_unmap_nested(src_pte);
  87.431 -	pte_unmap(dst_pte);
  87.432 +		}
  87.433 +		copy_one_pte(dst_mm, src_mm, dst_pte, src_pte, vm_flags, addr);
  87.434 +		progress += 8;
  87.435 +	} while (dst_pte++, src_pte++, addr += PAGE_SIZE, addr != end);
  87.436  	spin_unlock(&src_mm->page_table_lock);
  87.437 +
  87.438 +	pte_unmap_nested(src_pte - 1);
  87.439 +	pte_unmap(dst_pte - 1);
  87.440  	cond_resched_lock(&dst_mm->page_table_lock);
  87.441 +	if (addr != end)
  87.442 +		goto again;
  87.443  	return 0;
  87.444  }
  87.445  
  87.446 -static int copy_pmd_range(struct mm_struct *dst_mm,  struct mm_struct *src_mm,
  87.447 +static inline int copy_pmd_range(struct mm_struct *dst_mm, struct mm_struct *src_mm,
  87.448  		pud_t *dst_pud, pud_t *src_pud, struct vm_area_struct *vma,
  87.449  		unsigned long addr, unsigned long end)
  87.450  {
  87.451  	pmd_t *src_pmd, *dst_pmd;
  87.452 -	int err = 0;
  87.453  	unsigned long next;
  87.454  
  87.455 -	src_pmd = pmd_offset(src_pud, addr);
  87.456  	dst_pmd = pmd_alloc(dst_mm, dst_pud, addr);
  87.457  	if (!dst_pmd)
  87.458  		return -ENOMEM;
  87.459 -
  87.460 -	for (; addr < end; addr = next, src_pmd++, dst_pmd++) {
  87.461 -		next = (addr + PMD_SIZE) & PMD_MASK;
  87.462 -		if (next > end || next <= addr)
  87.463 -			next = end;
  87.464 -		if (pmd_none(*src_pmd))
  87.465 +	src_pmd = pmd_offset(src_pud, addr);
  87.466 +	do {
  87.467 +		next = pmd_addr_end(addr, end);
  87.468 +		if (pmd_none_or_clear_bad(src_pmd))
  87.469  			continue;
  87.470 -		if (pmd_bad(*src_pmd)) {
  87.471 -			pmd_ERROR(*src_pmd);
  87.472 -			pmd_clear(src_pmd);
  87.473 -			continue;
  87.474 -		}
  87.475 -		err = copy_pte_range(dst_mm, src_mm, dst_pmd, src_pmd,
  87.476 -							vma, addr, next);
  87.477 -		if (err)
  87.478 -			break;
  87.479 -	}
  87.480 -	return err;
  87.481 +		if (copy_pte_range(dst_mm, src_mm, dst_pmd, src_pmd,
  87.482 +						vma, addr, next))
  87.483 +			return -ENOMEM;
  87.484 +	} while (dst_pmd++, src_pmd++, addr = next, addr != end);
  87.485 +	return 0;
  87.486  }
  87.487  
  87.488 -static int copy_pud_range(struct mm_struct *dst_mm,  struct mm_struct *src_mm,
  87.489 +static inline int copy_pud_range(struct mm_struct *dst_mm, struct mm_struct *src_mm,
  87.490  		pgd_t *dst_pgd, pgd_t *src_pgd, struct vm_area_struct *vma,
  87.491  		unsigned long addr, unsigned long end)
  87.492  {
  87.493  	pud_t *src_pud, *dst_pud;
  87.494 -	int err = 0;
  87.495  	unsigned long next;
  87.496  
  87.497 -	src_pud = pud_offset(src_pgd, addr);
  87.498  	dst_pud = pud_alloc(dst_mm, dst_pgd, addr);
  87.499  	if (!dst_pud)
  87.500  		return -ENOMEM;
  87.501 -
  87.502 -	for (; addr < end; addr = next, src_pud++, dst_pud++) {
  87.503 -		next = (addr + PUD_SIZE) & PUD_MASK;
  87.504 -		if (next > end || next <= addr)
  87.505 -			next = end;
  87.506 -		if (pud_none(*src_pud))
  87.507 +	src_pud = pud_offset(src_pgd, addr);
  87.508 +	do {
  87.509 +		next = pud_addr_end(addr, end);
  87.510 +		if (pud_none_or_clear_bad(src_pud))
  87.511  			continue;
  87.512 -		if (pud_bad(*src_pud)) {
  87.513 -			pud_ERROR(*src_pud);
  87.514 -			pud_clear(src_pud);
  87.515 -			continue;
  87.516 -		}
  87.517 -		err = copy_pmd_range(dst_mm, src_mm, dst_pud, src_pud,
  87.518 -							vma, addr, next);
  87.519 -		if (err)
  87.520 -			break;
  87.521 -	}
  87.522 -	return err;
  87.523 +		if (copy_pmd_range(dst_mm, src_mm, dst_pud, src_pud,
  87.524 +						vma, addr, next))
  87.525 +			return -ENOMEM;
  87.526 +	} while (dst_pud++, src_pud++, addr = next, addr != end);
  87.527 +	return 0;
  87.528  }
  87.529  
  87.530 -int copy_page_range(struct mm_struct *dst, struct mm_struct *src,
  87.531 +int copy_page_range(struct mm_struct *dst_mm, struct mm_struct *src_mm,
  87.532  		struct vm_area_struct *vma)
  87.533  {
  87.534  	pgd_t *src_pgd, *dst_pgd;
  87.535 -	unsigned long addr, start, end, next;
  87.536 -	int err = 0;
  87.537 +	unsigned long next;
  87.538 +	unsigned long addr = vma->vm_start;
  87.539 +	unsigned long end = vma->vm_end;
  87.540  
  87.541  	if (is_vm_hugetlb_page(vma))
  87.542 -		return copy_hugetlb_page_range(dst, src, vma);
  87.543 -
  87.544 -	start = vma->vm_start;
  87.545 -	src_pgd = pgd_offset(src, start);
  87.546 -	dst_pgd = pgd_offset(dst, start);
  87.547 +		return copy_hugetlb_page_range(dst_mm, src_mm, vma);
  87.548  
  87.549 -	end = vma->vm_end;
  87.550 -	addr = start;
  87.551 -	while (addr && (addr < end-1)) {
  87.552 -		next = (addr + PGDIR_SIZE) & PGDIR_MASK;
  87.553 -		if (next > end || next <= addr)
  87.554 -			next = end;
  87.555 -		if (pgd_none(*src_pgd))
  87.556 -			goto next_pgd;
  87.557 -		if (pgd_bad(*src_pgd)) {
  87.558 -			pgd_ERROR(*src_pgd);
  87.559 -			pgd_clear(src_pgd);
  87.560 -			goto next_pgd;
  87.561 -		}
  87.562 -		err = copy_pud_range(dst, src, dst_pgd, src_pgd,
  87.563 -							vma, addr, next);
  87.564 -		if (err)
  87.565 -			break;
  87.566 -
  87.567 -next_pgd:
  87.568 -		src_pgd++;
  87.569 -		dst_pgd++;
  87.570 -		addr = next;
  87.571 -	}
  87.572 -
  87.573 -	return err;
  87.574 +	dst_pgd = pgd_offset(dst_mm, addr);
  87.575 +	src_pgd = pgd_offset(src_mm, addr);
  87.576 +	do {
  87.577 +		next = pgd_addr_end(addr, end);
  87.578 +		if (pgd_none_or_clear_bad(src_pgd))
  87.579 +			continue;
  87.580 +		if (copy_pud_range(dst_mm, src_mm, dst_pgd, src_pgd,
  87.581 +						vma, addr, next))
  87.582 +			return -ENOMEM;
  87.583 +	} while (dst_pgd++, src_pgd++, addr = next, addr != end);
  87.584 +	return 0;
  87.585  }
  87.586  
  87.587 -static void zap_pte_range(struct mmu_gather *tlb,
  87.588 -		pmd_t *pmd, unsigned long address,
  87.589 -		unsigned long size, struct zap_details *details)
  87.590 +static void zap_pte_range(struct mmu_gather *tlb, pmd_t *pmd,
  87.591 +				unsigned long addr, unsigned long end,
  87.592 +				struct zap_details *details)
  87.593  {
  87.594 -	unsigned long offset;
  87.595 -	pte_t *ptep;
  87.596 +	pte_t *pte;
  87.597  
  87.598 -	if (pmd_none(*pmd))
  87.599 -		return;
  87.600 -	if (unlikely(pmd_bad(*pmd))) {
  87.601 -		pmd_ERROR(*pmd);
  87.602 -		pmd_clear(pmd);
  87.603 -		return;
  87.604 -	}
  87.605 -	ptep = pte_offset_map(pmd, address);
  87.606 -	offset = address & ~PMD_MASK;
  87.607 -	if (offset + size > PMD_SIZE)
  87.608 -		size = PMD_SIZE - offset;
  87.609 -	size &= PAGE_MASK;
  87.610 -	if (details && !details->check_mapping && !details->nonlinear_vma)
  87.611 -		details = NULL;
  87.612 -	for (offset=0; offset < size; ptep++, offset += PAGE_SIZE) {
  87.613 -		pte_t pte = *ptep;
  87.614 -		if (pte_none(pte))
  87.615 +	pte = pte_offset_map(pmd, addr);
  87.616 +	do {
  87.617 +		pte_t ptent = *pte;
  87.618 +		if (pte_none(ptent))
  87.619  			continue;
  87.620 -		if (pte_present(pte)) {
  87.621 +		if (pte_present(ptent)) {
  87.622  			struct page *page = NULL;
  87.623 -			unsigned long pfn = pte_pfn(pte);
  87.624 +			unsigned long pfn = pte_pfn(ptent);
  87.625  			if (pfn_valid(pfn)) {
  87.626  				page = pfn_to_page(pfn);
  87.627  				if (PageReserved(page))
  87.628 @@ -502,19 +551,20 @@ static void zap_pte_range(struct mmu_gat
  87.629  				     page->index > details->last_index))
  87.630  					continue;
  87.631  			}
  87.632 -			pte = ptep_get_and_clear(ptep);
  87.633 -			tlb_remove_tlb_entry(tlb, ptep, address+offset);
  87.634 +			ptent = ptep_get_and_clear(tlb->mm, addr, pte);
  87.635 +			tlb_remove_tlb_entry(tlb, pte, addr);
  87.636  			if (unlikely(!page))
  87.637  				continue;
  87.638  			if (unlikely(details) && details->nonlinear_vma
  87.639  			    && linear_page_index(details->nonlinear_vma,
  87.640 -					address+offset) != page->index)
  87.641 -				set_pte(ptep, pgoff_to_pte(page->index));
  87.642 -			if (pte_dirty(pte))
  87.643 +						addr) != page->index)
  87.644 +				set_pte_at(tlb->mm, addr, pte,
  87.645 +					   pgoff_to_pte(page->index));
  87.646 +			if (pte_dirty(ptent))
  87.647  				set_page_dirty(page);
  87.648  			if (PageAnon(page))
  87.649 -				tlb->mm->anon_rss--;
  87.650 -			else if (pte_young(pte))
  87.651 +				dec_mm_counter(tlb->mm, anon_rss);
  87.652 +			else if (pte_young(ptent))
  87.653  				mark_page_accessed(page);
  87.654  			tlb->freed++;
  87.655  			page_remove_rmap(page);
  87.656 @@ -527,78 +577,64 @@ static void zap_pte_range(struct mmu_gat
  87.657  		 */
  87.658  		if (unlikely(details))
  87.659  			continue;
  87.660 -		if (!pte_file(pte))
  87.661 -			free_swap_and_cache(pte_to_swp_entry(pte));
  87.662 -		pte_clear(ptep);
  87.663 -	}
  87.664 -	pte_unmap(ptep-1);
  87.665 -}
  87.666 -
  87.667 -static void zap_pmd_range(struct mmu_gather *tlb,
  87.668 -		pud_t *pud, unsigned long address,
  87.669 -		unsigned long size, struct zap_details *details)
  87.670 -{
  87.671 -	pmd_t * pmd;
  87.672 -	unsigned long end;
  87.673 -
  87.674 -	if (pud_none(*pud))
  87.675 -		return;
  87.676 -	if (unlikely(pud_bad(*pud))) {
  87.677 -		pud_ERROR(*pud);
  87.678 -		pud_clear(pud);
  87.679 -		return;
  87.680 -	}
  87.681 -	pmd = pmd_offset(pud, address);
  87.682 -	end = address + size;
  87.683 -	if (end > ((address + PUD_SIZE) & PUD_MASK))
  87.684 -		end = ((address + PUD_SIZE) & PUD_MASK);
  87.685 -	do {
  87.686 -		zap_pte_range(tlb, pmd, address, end - address, details);
  87.687 -		address = (address + PMD_SIZE) & PMD_MASK; 
  87.688 -		pmd++;
  87.689 -	} while (address && (address < end));
  87.690 +		if (!pte_file(ptent))
  87.691 +			free_swap_and_cache(pte_to_swp_entry(ptent));
  87.692 +		pte_clear(tlb->mm, addr, pte);
  87.693 +	} while (pte++, addr += PAGE_SIZE, addr != end);
  87.694 +	pte_unmap(pte - 1);
  87.695  }
  87.696  
  87.697 -static void zap_pud_range(struct mmu_gather *tlb,
  87.698 -		pgd_t * pgd, unsigned long address,
  87.699 -		unsigned long end, struct zap_details *details)
  87.700 +static inline void zap_pmd_range(struct mmu_gather *tlb, pud_t *pud,
  87.701 +				unsigned long addr, unsigned long end,
  87.702 +				struct zap_details *details)
  87.703  {
  87.704 -	pud_t * pud;
  87.705 +	pmd_t *pmd;
  87.706 +	unsigned long next;
  87.707  
  87.708 -	if (pgd_none(*pgd))
  87.709 -		return;
  87.710 -	if (unlikely(pgd_bad(*pgd))) {
  87.711 -		pgd_ERROR(*pgd);
  87.712 -		pgd_clear(pgd);
  87.713 -		return;
  87.714 -	}
  87.715 -	pud = pud_offset(pgd, address);
  87.716 +	pmd = pmd_offset(pud, addr);
  87.717  	do {
  87.718 -		zap_pmd_range(tlb, pud, address, end - address, details);
  87.719 -		address = (address + PUD_SIZE) & PUD_MASK; 
  87.720 -		pud++;
  87.721 -	} while (address && (address < end));
  87.722 +		next = pmd_addr_end(addr, end);
  87.723 +		if (pmd_none_or_clear_bad(pmd))
  87.724 +			continue;
  87.725 +		zap_pte_range(tlb, pmd, addr, next, details);
  87.726 +	} while (pmd++, addr = next, addr != end);
  87.727  }
  87.728  
  87.729 -static void unmap_page_range(struct mmu_gather *tlb,
  87.730 -		struct vm_area_struct *vma, unsigned long address,
  87.731 -		unsigned long end, struct zap_details *details)
  87.732 +static inline void zap_pud_range(struct mmu_gather *tlb, pgd_t *pgd,
  87.733 +				unsigned long addr, unsigned long end,
  87.734 +				struct zap_details *details)
  87.735  {
  87.736 +	pud_t *pud;
  87.737  	unsigned long next;
  87.738 -	pgd_t *pgd;
  87.739 -	int i;
  87.740  
  87.741 -	BUG_ON(address >= end);
  87.742 -	pgd = pgd_offset(vma->vm_mm, address);
  87.743 +	pud = pud_offset(pgd, addr);
  87.744 +	do {
  87.745 +		next = pud_addr_end(addr, end);
  87.746 +		if (pud_none_or_clear_bad(pud))
  87.747 +			continue;
  87.748 +		zap_pmd_range(tlb, pud, addr, next, details);
  87.749 +	} while (pud++, addr = next, addr != end);
  87.750 +}
  87.751 +
  87.752 +static void unmap_page_range(struct mmu_gather *tlb, struct vm_area_struct *vma,
  87.753 +				unsigned long addr, unsigned long end,
  87.754 +				struct zap_details *details)
  87.755 +{
  87.756 +	pgd_t *pgd;
  87.757 +	unsigned long next;
  87.758 +
  87.759 +	if (details && !details->check_mapping && !details->nonlinear_vma)
  87.760 +		details = NULL;
  87.761 +
  87.762 +	BUG_ON(addr >= end);
  87.763  	tlb_start_vma(tlb, vma);
  87.764 -	for (i = pgd_index(address); i <= pgd_index(end-1); i++) {
  87.765 -		next = (address + PGDIR_SIZE) & PGDIR_MASK;
  87.766 -		if (next <= address || next > end)
  87.767 -			next = end;
  87.768 -		zap_pud_range(tlb, pgd, address, next, details);
  87.769 -		address = next;
  87.770 -		pgd++;
  87.771 -	}
  87.772 +	pgd = pgd_offset(vma->vm_mm, addr);
  87.773 +	do {
  87.774 +		next = pgd_addr_end(addr, end);
  87.775 +		if (pgd_none_or_clear_bad(pgd))
  87.776 +			continue;
  87.777 +		zap_pud_range(tlb, pgd, addr, next, details);
  87.778 +	} while (pgd++, addr = next, addr != end);
  87.779  	tlb_end_vma(tlb, vma);
  87.780  }
  87.781  
  87.782 @@ -619,7 +655,7 @@ static void unmap_page_range(struct mmu_
  87.783   * @nr_accounted: Place number of unmapped pages in vm-accountable vma's here
  87.784   * @details: details of nonlinear truncation or shared cache invalidation
  87.785   *
  87.786 - * Returns the number of vma's which were covered by the unmapping.
  87.787 + * Returns the end address of the unmapping (restart addr if interrupted).
  87.788   *
  87.789   * Unmap all pages in the vma list.  Called under page_table_lock.
  87.790   *
  87.791 @@ -636,7 +672,7 @@ static void unmap_page_range(struct mmu_
  87.792   * ensure that any thus-far unmapped pages are flushed before unmap_vmas()
  87.793   * drops the lock and schedules.
  87.794   */
  87.795 -int unmap_vmas(struct mmu_gather **tlbp, struct mm_struct *mm,
  87.796 +unsigned long unmap_vmas(struct mmu_gather **tlbp, struct mm_struct *mm,
  87.797  		struct vm_area_struct *vma, unsigned long start_addr,
  87.798  		unsigned long end_addr, unsigned long *nr_accounted,
  87.799  		struct zap_details *details)
  87.800 @@ -644,12 +680,11 @@ int unmap_vmas(struct mmu_gather **tlbp,
  87.801  	unsigned long zap_bytes = ZAP_BLOCK_SIZE;
  87.802  	unsigned long tlb_start = 0;	/* For tlb_finish_mmu */
  87.803  	int tlb_start_valid = 0;
  87.804 -	int ret = 0;
  87.805 +	unsigned long start = start_addr;
  87.806  	spinlock_t *i_mmap_lock = details? details->i_mmap_lock: NULL;
  87.807  	int fullmm = tlb_is_full_mm(*tlbp);
  87.808  
  87.809  	for ( ; vma && vma->vm_start < end_addr; vma = vma->vm_next) {
  87.810 -		unsigned long start;
  87.811  		unsigned long end;
  87.812  
  87.813  		start = max(vma->vm_start, start_addr);
  87.814 @@ -662,7 +697,6 @@ int unmap_vmas(struct mmu_gather **tlbp,
  87.815  		if (vma->vm_flags & VM_ACCOUNT)
  87.816  			*nr_accounted += (end - start) >> PAGE_SHIFT;
  87.817  
  87.818 -		ret++;
  87.819  		while (start != end) {
  87.820  			unsigned long block;
  87.821  
  87.822 @@ -693,7 +727,6 @@ int unmap_vmas(struct mmu_gather **tlbp,
  87.823  				if (i_mmap_lock) {
  87.824  					/* must reset count of rss freed */
  87.825  					*tlbp = tlb_gather_mmu(mm, fullmm);
  87.826 -					details->break_addr = start;
  87.827  					goto out;
  87.828  				}
  87.829  				spin_unlock(&mm->page_table_lock);
  87.830 @@ -707,7 +740,7 @@ int unmap_vmas(struct mmu_gather **tlbp,
  87.831  		}
  87.832  	}
  87.833  out:
  87.834 -	return ret;
  87.835 +	return start;	/* which is now the end (or restart) address */
  87.836  }
  87.837  
  87.838  /**
  87.839 @@ -717,7 +750,7 @@ out:
  87.840   * @size: number of bytes to zap
  87.841   * @details: details of nonlinear truncation or shared cache invalidation
  87.842   */
  87.843 -void zap_page_range(struct vm_area_struct *vma, unsigned long address,
  87.844 +unsigned long zap_page_range(struct vm_area_struct *vma, unsigned long address,
  87.845  		unsigned long size, struct zap_details *details)
  87.846  {
  87.847  	struct mm_struct *mm = vma->vm_mm;
  87.848 @@ -727,16 +760,16 @@ void zap_page_range(struct vm_area_struc
  87.849  
  87.850  	if (is_vm_hugetlb_page(vma)) {
  87.851  		zap_hugepage_range(vma, address, size);
  87.852 -		return;
  87.853 +		return end;
  87.854  	}
  87.855  
  87.856  	lru_add_drain();
  87.857  	spin_lock(&mm->page_table_lock);
  87.858  	tlb = tlb_gather_mmu(mm, 0);
  87.859 -	unmap_vmas(&tlb, mm, vma, address, end, &nr_accounted, details);
  87.860 +	end = unmap_vmas(&tlb, mm, vma, address, end, &nr_accounted, details);
  87.861  	tlb_finish_mmu(tlb, address, end);
  87.862 -	acct_update_integrals();
  87.863  	spin_unlock(&mm->page_table_lock);
  87.864 +	return end;
  87.865  }
  87.866  
  87.867  /*
  87.868 @@ -1005,111 +1038,78 @@ out:
  87.869  
  87.870  EXPORT_SYMBOL(get_user_pages);
  87.871  
  87.872 -static void zeromap_pte_range(pte_t * pte, unsigned long address,
  87.873 -                                     unsigned long size, pgprot_t prot)
  87.874 +static int zeromap_pte_range(struct mm_struct *mm, pmd_t *pmd,
  87.875 +			unsigned long addr, unsigned long end, pgprot_t prot)
  87.876  {
  87.877 -	unsigned long end;
  87.878 -
  87.879 -	address &= ~PMD_MASK;
  87.880 -	end = address + size;
  87.881 -	if (end > PMD_SIZE)
  87.882 -		end = PMD_SIZE;
  87.883 -	do {
  87.884 -		pte_t zero_pte = pte_wrprotect(mk_pte(ZERO_PAGE(address), prot));
  87.885 -		BUG_ON(!pte_none(*pte));
  87.886 -		set_pte(pte, zero_pte);
  87.887 -		address += PAGE_SIZE;
  87.888 -		pte++;
  87.889 -	} while (address && (address < end));
  87.890 -}
  87.891 +	pte_t *pte;
  87.892  
  87.893 -static inline int zeromap_pmd_range(struct mm_struct *mm, pmd_t * pmd,
  87.894 -		unsigned long address, unsigned long size, pgprot_t prot)
  87.895 -{
  87.896 -	unsigned long base, end;
  87.897 -
  87.898 -	base = address & PUD_MASK;
  87.899 -	address &= ~PUD_MASK;
  87.900 -	end = address + size;
  87.901 -	if (end > PUD_SIZE)
  87.902 -		end = PUD_SIZE;
  87.903 +	pte = pte_alloc_map(mm, pmd, addr);
  87.904 +	if (!pte)
  87.905 +		return -ENOMEM;
  87.906  	do {
  87.907 -		pte_t * pte = pte_alloc_map(mm, pmd, base + address);
  87.908 -		if (!pte)
  87.909 -			return -ENOMEM;
  87.910 -		zeromap_pte_range(pte, base + address, end - address, prot);
  87.911 -		pte_unmap(pte);
  87.912 -		address = (address + PMD_SIZE) & PMD_MASK;
  87.913 -		pmd++;
  87.914 -	} while (address && (address < end));
  87.915 +		pte_t zero_pte = pte_wrprotect(mk_pte(ZERO_PAGE(addr), prot));
  87.916 +		BUG_ON(!pte_none(*pte));
  87.917 +		set_pte_at(mm, addr, pte, zero_pte);
  87.918 +	} while (pte++, addr += PAGE_SIZE, addr != end);
  87.919 +	pte_unmap(pte - 1);
  87.920  	return 0;
  87.921  }
  87.922  
  87.923 -static inline int zeromap_pud_range(struct mm_struct *mm, pud_t * pud,
  87.924 -				    unsigned long address,
  87.925 -                                    unsigned long size, pgprot_t prot)
  87.926 +static inline int zeromap_pmd_range(struct mm_struct *mm, pud_t *pud,
  87.927 +			unsigned long addr, unsigned long end, pgprot_t prot)
  87.928  {
  87.929 -	unsigned long base, end;
  87.930 -	int error = 0;
  87.931 +	pmd_t *pmd;
  87.932 +	unsigned long next;
  87.933  
  87.934 -	base = address & PGDIR_MASK;
  87.935 -	address &= ~PGDIR_MASK;
  87.936 -	end = address + size;
  87.937 -	if (end > PGDIR_SIZE)
  87.938 -		end = PGDIR_SIZE;
  87.939 +	pmd = pmd_alloc(mm, pud, addr);
  87.940 +	if (!pmd)
  87.941 +		return -ENOMEM;
  87.942  	do {
  87.943 -		pmd_t * pmd = pmd_alloc(mm, pud, base + address);
  87.944 -		error = -ENOMEM;
  87.945 -		if (!pmd)
  87.946 -			break;
  87.947 -		error = zeromap_pmd_range(mm, pmd, base + address,
  87.948 -					  end - address, prot);
  87.949 -		if (error)
  87.950 -			break;
  87.951 -		address = (address + PUD_SIZE) & PUD_MASK;
  87.952 -		pud++;
  87.953 -	} while (address && (address < end));
  87.954 +		next = pmd_addr_end(addr, end);
  87.955 +		if (zeromap_pte_range(mm, pmd, addr, next, prot))
  87.956 +			return -ENOMEM;
  87.957 +	} while (pmd++, addr = next, addr != end);
  87.958  	return 0;
  87.959  }
  87.960  
  87.961 -int zeromap_page_range(struct vm_area_struct *vma, unsigned long address,
  87.962 -					unsigned long size, pgprot_t prot)
  87.963 +static inline int zeromap_pud_range(struct mm_struct *mm, pgd_t *pgd,
  87.964 +			unsigned long addr, unsigned long end, pgprot_t prot)
  87.965  {
  87.966 -	int i;
  87.967 -	int error = 0;
  87.968 -	pgd_t * pgd;
  87.969 -	unsigned long beg = address;
  87.970 -	unsigned long end = address + size;
  87.971 +	pud_t *pud;
  87.972  	unsigned long next;
  87.973 -	struct mm_struct *mm = vma->vm_mm;
  87.974  
  87.975 -	pgd = pgd_offset(mm, address);
  87.976 -	flush_cache_range(vma, beg, end);
  87.977 -	BUG_ON(address >= end);
  87.978 -	BUG_ON(end > vma->vm_end);
  87.979 +	pud = pud_alloc(mm, pgd, addr);
  87.980 +	if (!pud)
  87.981 +		return -ENOMEM;
  87.982 +	do {
  87.983 +		next = pud_addr_end(addr, end);
  87.984 +		if (zeromap_pmd_range(mm, pud, addr, next, prot))
  87.985 +			return -ENOMEM;
  87.986 +	} while (pud++, addr = next, addr != end);
  87.987 +	return 0;
  87.988 +}
  87.989  
  87.990 +int zeromap_page_range(struct vm_area_struct *vma,
  87.991 +			unsigned long addr, unsigned long size, pgprot_t prot)
  87.992 +{
  87.993 +	pgd_t *pgd;
  87.994 +	unsigned long next;
  87.995 +	unsigned long end = addr + size;
  87.996 +	struct mm_struct *mm = vma->vm_mm;
  87.997 +	int err;
  87.998 +
  87.999 +	BUG_ON(addr >= end);
 87.1000 +	pgd = pgd_offset(mm, addr);
 87.1001 +	flush_cache_range(vma, addr, end);
 87.1002  	spin_lock(&mm->page_table_lock);
 87.1003 -	for (i = pgd_index(address); i <= pgd_index(end-1); i++) {
 87.1004 -		pud_t *pud = pud_alloc(mm, pgd, address);
 87.1005 -		error = -ENOMEM;
 87.1006 -		if (!pud)
 87.1007 +	do {
 87.1008 +		next = pgd_addr_end(addr, end);
 87.1009 +		err = zeromap_pud_range(mm, pgd, addr, next, prot);
 87.1010 +		if (err)
 87.1011  			break;
 87.1012 -		next = (address + PGDIR_SIZE) & PGDIR_MASK;
 87.1013 -		if (next <= beg || next > end)
 87.1014 -			next = end;
 87.1015 -		error = zeromap_pud_range(mm, pud, address,
 87.1016 -						next - address, prot);
 87.1017 -		if (error)
 87.1018 -			break;
 87.1019 -		address = next;
 87.1020 -		pgd++;
 87.1021 -	}
 87.1022 -	/*
 87.1023 -	 * Why flush? zeromap_pte_range has a BUG_ON for !pte_none()
 87.1024 -	 */
 87.1025 -	flush_tlb_range(vma, beg, end);
 87.1026 +	} while (pgd++, addr = next, addr != end);
 87.1027  	spin_unlock(&mm->page_table_lock);
 87.1028 -	return error;
 87.1029 +	return err;
 87.1030  }
 87.1031  
 87.1032  /*
 87.1033 @@ -1117,95 +1117,74 @@ int zeromap_page_range(struct vm_area_st
 87.1034   * mappings are removed. any references to nonexistent pages results
 87.1035   * in null mappings (currently treated as "copy-on-access")
 87.1036   */
 87.1037 -static inline void
 87.1038 -remap_pte_range(pte_t * pte, unsigned long address, unsigned long size,
 87.1039 -		unsigned long pfn, pgprot_t prot)
 87.1040 +static int remap_pte_range(struct mm_struct *mm, pmd_t *pmd,
 87.1041 +			unsigned long addr, unsigned long end,
 87.1042 +			unsigned long pfn, pgprot_t prot)
 87.1043  {
 87.1044 -	unsigned long end;
 87.1045 +	pte_t *pte;
 87.1046  
 87.1047 -	address &= ~PMD_MASK;
 87.1048 -	end = address + size;
 87.1049 -	if (end > PMD_SIZE)
 87.1050 -		end = PMD_SIZE;
 87.1051 +	pte = pte_alloc_map(mm, pmd, addr);
 87.1052 +	if (!pte)
 87.1053 +		return -ENOMEM;
 87.1054  	do {
 87.1055  		BUG_ON(!pte_none(*pte));
 87.1056  		if (!pfn_valid(pfn) || PageReserved(pfn_to_page(pfn)))
 87.1057 - 			set_pte(pte, pfn_pte(pfn, prot));
 87.1058 -		address += PAGE_SIZE;
 87.1059 +			set_pte_at(mm, addr, pte, pfn_pte(pfn, prot));
 87.1060  		pfn++;
 87.1061 -		pte++;
 87.1062 -	} while (address && (address < end));
 87.1063 -}
 87.1064 -
 87.1065 -static inline int
 87.1066 -remap_pmd_range(struct mm_struct *mm, pmd_t * pmd, unsigned long address,
 87.1067 -		unsigned long size, unsigned long pfn, pgprot_t prot)
 87.1068 -{
 87.1069 -	unsigned long base, end;
 87.1070 -
 87.1071 -	base = address & PUD_MASK;
 87.1072 -	address &= ~PUD_MASK;
 87.1073 -	end = address + size;
 87.1074 -	if (end > PUD_SIZE)
 87.1075 -		end = PUD_SIZE;
 87.1076 -	pfn -= (address >> PAGE_SHIFT);
 87.1077 -	do {
 87.1078 -		pte_t * pte = pte_alloc_map(mm, pmd, base + address);
 87.1079 -		if (!pte)
 87.1080 -			return -ENOMEM;
 87.1081 -		remap_pte_range(pte, base + address, end - address,
 87.1082 -				(address >> PAGE_SHIFT) + pfn, prot);
 87.1083 -		pte_unmap(pte);
 87.1084 -		address = (address + PMD_SIZE) & PMD_MASK;
 87.1085 -		pmd++;
 87.1086 -	} while (address && (address < end));
 87.1087 +	} while (pte++, addr += PAGE_SIZE, addr != end);
 87.1088 +	pte_unmap(pte - 1);
 87.1089  	return 0;
 87.1090  }
 87.1091  
 87.1092 -static inline int remap_pud_range(struct mm_struct *mm, pud_t * pud,
 87.1093 -				  unsigned long address, unsigned long size,
 87.1094 -				  unsigned long pfn, pgprot_t prot)
 87.1095 +static inline int remap_pmd_range(struct mm_struct *mm, pud_t *pud,
 87.1096 +			unsigned long addr, unsigned long end,
 87.1097 +			unsigned long pfn, pgprot_t prot)
 87.1098  {
 87.1099 -	unsigned long base, end;
 87.1100 -	int error;
 87.1101 +	pmd_t *pmd;
 87.1102 +	unsigned long next;
 87.1103  
 87.1104 -	base = address & PGDIR_MASK;
 87.1105 -	address &= ~PGDIR_MASK;
 87.1106 -	end = address + size;
 87.1107 -	if (end > PGDIR_SIZE)
 87.1108 -		end = PGDIR_SIZE;
 87.1109 -	pfn -= address >> PAGE_SHIFT;
 87.1110 +	pfn -= addr >> PAGE_SHIFT;
 87.1111 +	pmd = pmd_alloc(mm, pud, addr);
 87.1112 +	if (!pmd)
 87.1113 +		return -ENOMEM;
 87.1114  	do {
 87.1115 -		pmd_t *pmd = pmd_alloc(mm, pud, base+address);
 87.1116 -		error = -ENOMEM;
 87.1117 -		if (!pmd)
 87.1118 -			break;
 87.1119 -		error = remap_pmd_range(mm, pmd, base + address, end - address,
 87.1120 -				(address >> PAGE_SHIFT) + pfn, prot);
 87.1121 -		if (error)
 87.1122 -			break;
 87.1123 -		address = (address + PUD_SIZE) & PUD_MASK;
 87.1124 -		pud++;
 87.1125 -	} while (address && (address < end));
 87.1126 -	return error;
 87.1127 +		next = pmd_addr_end(addr, end);
 87.1128 +		if (remap_pte_range(mm, pmd, addr, next,
 87.1129 +				pfn + (addr >> PAGE_SHIFT), prot))
 87.1130 +			return -ENOMEM;
 87.1131 +	} while (pmd++, addr = next, addr != end);
 87.1132 +	return 0;
 87.1133 +}
 87.1134 +
 87.1135 +static inline int remap_pud_range(struct mm_struct *mm, pgd_t *pgd,
 87.1136 +			unsigned long addr, unsigned long end,
 87.1137 +			unsigned long pfn, pgprot_t prot)
 87.1138 +{
 87.1139 +	pud_t *pud;
 87.1140 +	unsigned long next;
 87.1141 +
 87.1142 +	pfn -= addr >> PAGE_SHIFT;
 87.1143 +	pud = pud_alloc(mm, pgd, addr);
 87.1144 +	if (!pud)
 87.1145 +		return -ENOMEM;
 87.1146 +	do {
 87.1147 +		next = pud_addr_end(addr, end);
 87.1148 +		if (remap_pmd_range(mm, pud, addr, next,
 87.1149 +				pfn + (addr >> PAGE_SHIFT), prot))
 87.1150 +			return -ENOMEM;
 87.1151 +	} while (pud++, addr = next, addr != end);
 87.1152 +	return 0;
 87.1153  }
 87.1154  
 87.1155  /*  Note: this is only safe if the mm semaphore is held when called. */
 87.1156 -int remap_pfn_range(struct vm_area_struct *vma, unsigned long from,
 87.1157 +int remap_pfn_range(struct vm_area_struct *vma, unsigned long addr,
 87.1158  		    unsigned long pfn, unsigned long size, pgprot_t prot)
 87.1159  {
 87.1160 -	int error = 0;
 87.1161  	pgd_t *pgd;
 87.1162 -	unsigned long beg = from;
 87.1163 -	unsigned long end = from + size;
 87.1164  	unsigned long next;
 87.1165 +	unsigned long end = addr + size;
 87.1166  	struct mm_struct *mm = vma->vm_mm;
 87.1167 -	int i;
 87.1168 -
 87.1169 -	pfn -= from >> PAGE_SHIFT;
 87.1170 -	pgd = pgd_offset(mm, from);
 87.1171 -	flush_cache_range(vma, beg, end);
 87.1172 -	BUG_ON(from >= end);
 87.1173 +	int err;
 87.1174  
 87.1175  	/*
 87.1176  	 * Physically remapped pages are special. Tell the
 87.1177 @@ -1217,31 +1196,21 @@ int remap_pfn_range(struct vm_area_struc
 87.1178  	 */
 87.1179  	vma->vm_flags |= VM_IO | VM_RESERVED;
 87.1180  
 87.1181 +	BUG_ON(addr >= end);
 87.1182 +	pfn -= addr >> PAGE_SHIFT;
 87.1183 +	pgd = pgd_offset(mm, addr);
 87.1184 +	flush_cache_range(vma, addr, end);
 87.1185  	spin_lock(&mm->page_table_lock);
 87.1186 -	for (i = pgd_index(beg); i <= pgd_index(end-1); i++) {
 87.1187 -		pud_t *pud = pud_alloc(mm, pgd, from);
 87.1188 -		error = -ENOMEM;
 87.1189 -		if (!pud)
 87.1190 +	do {
 87.1191 +		next = pgd_addr_end(addr, end);
 87.1192 +		err = remap_pud_range(mm, pgd, addr, next,
 87.1193 +				pfn + (addr >> PAGE_SHIFT), prot);
 87.1194 +		if (err)
 87.1195  			break;
 87.1196 -		next = (from + PGDIR_SIZE) & PGDIR_MASK;
 87.1197 -		if (next > end || next <= from)
 87.1198 -			next = end;
 87.1199 -		error = remap_pud_range(mm, pud, from, end - from,
 87.1200 -					pfn + (from >> PAGE_SHIFT), prot);
 87.1201 -		if (error)
 87.1202 -			break;
 87.1203 -		from = next;
 87.1204 -		pgd++;
 87.1205 -	}
 87.1206 -	/*
 87.1207 -	 * Why flush? remap_pte_range has a BUG_ON for !pte_none()
 87.1208 -	 */
 87.1209 -	flush_tlb_range(vma, beg, end);
 87.1210 +	} while (pgd++, addr = next, addr != end);
 87.1211  	spin_unlock(&mm->page_table_lock);
 87.1212 -
 87.1213 -	return error;
 87.1214 +	return err;
 87.1215  }
 87.1216 -
 87.1217  EXPORT_SYMBOL(remap_pfn_range);
 87.1218  
 87.1219  /*
 87.1220 @@ -1265,11 +1234,11 @@ static inline void break_cow(struct vm_a
 87.1221  {
 87.1222  	pte_t entry;
 87.1223  
 87.1224 -	flush_cache_page(vma, address);
 87.1225  	entry = maybe_mkwrite(pte_mkdirty(mk_pte(new_page, vma->vm_page_prot)),
 87.1226  			      vma);
 87.1227  	ptep_establish(vma, address, page_table, entry);
 87.1228  	update_mmu_cache(vma, address, entry);
 87.1229 +	lazy_mmu_prot_update(entry);
 87.1230  }
 87.1231  
 87.1232  /*
 87.1233 @@ -1317,11 +1286,12 @@ static int do_wp_page(struct mm_struct *
 87.1234  		int reuse = can_share_swap_page(old_page);
 87.1235  		unlock_page(old_page);
 87.1236  		if (reuse) {
 87.1237 -			flush_cache_page(vma, address);
 87.1238 +			flush_cache_page(vma, address, pfn);
 87.1239  			entry = maybe_mkwrite(pte_mkyoung(pte_mkdirty(pte)),
 87.1240  					      vma);
 87.1241  			ptep_set_access_flags(vma, address, page_table, entry, 1);
 87.1242  			update_mmu_cache(vma, address, entry);
 87.1243 +			lazy_mmu_prot_update(entry);
 87.1244  			pte_unmap(page_table);
 87.1245  			spin_unlock(&mm->page_table_lock);
 87.1246  			return VM_FAULT_MINOR;
 87.1247 @@ -1355,13 +1325,12 @@ static int do_wp_page(struct mm_struct *
 87.1248  	page_table = pte_offset_map(pmd, address);
 87.1249  	if (likely(pte_same(*page_table, pte))) {
 87.1250  		if (PageAnon(old_page))
 87.1251 -			mm->anon_rss--;
 87.1252 -		if (PageReserved(old_page)) {
 87.1253 -			++mm->rss;
 87.1254 -			acct_update_integrals();
 87.1255 -			update_mem_hiwater();
 87.1256 -		} else
 87.1257 +			dec_mm_counter(mm, anon_rss);
 87.1258 +		if (PageReserved(old_page))
 87.1259 +			inc_mm_counter(mm, rss);
 87.1260 +		else
 87.1261  			page_remove_rmap(old_page);
 87.1262 +		flush_cache_page(vma, address, pfn);
 87.1263  		break_cow(vma, new_page, address, page_table);
 87.1264  		lru_cache_add_active(new_page);
 87.1265  		page_add_anon_rmap(new_page, vma, address);
 87.1266 @@ -1405,7 +1374,7 @@ no_new_page:
 87.1267   * i_mmap_lock.
 87.1268   *
 87.1269   * In order to make forward progress despite repeatedly restarting some
 87.1270 - * large vma, note the break_addr set by unmap_vmas when it breaks out:
 87.1271 + * large vma, note the restart_addr from unmap_vmas when it breaks out:
 87.1272   * and restart from that address when we reach that vma again.  It might
 87.1273   * have been split or merged, shrunk or extended, but never shifted: so
 87.1274   * restart_addr remains valid so long as it remains in the vma's range.
 87.1275 @@ -1443,8 +1412,8 @@ again:
 87.1276  		}
 87.1277  	}
 87.1278  
 87.1279 -	details->break_addr = end_addr;
 87.1280 -	zap_page_range(vma, start_addr, end_addr - start_addr, details);
 87.1281 +	restart_addr = zap_page_range(vma, start_addr,
 87.1282 +					end_addr - start_addr, details);
 87.1283  
 87.1284  	/*
 87.1285  	 * We cannot rely on the break test in unmap_vmas:
 87.1286 @@ -1455,14 +1424,14 @@ again:
 87.1287  	need_break = need_resched() ||
 87.1288  			need_lockbreak(details->i_mmap_lock);
 87.1289  
 87.1290 -	if (details->break_addr >= end_addr) {
 87.1291 +	if (restart_addr >= end_addr) {
 87.1292  		/* We have now completed this vma: mark it so */
 87.1293  		vma->vm_truncate_count = details->truncate_count;
 87.1294  		if (!need_break)
 87.1295  			return 0;
 87.1296  	} else {
 87.1297  		/* Note restart_addr in vma's truncate_count field */
 87.1298 -		vma->vm_truncate_count = details->break_addr;
 87.1299 +		vma->vm_truncate_count = restart_addr;
 87.1300  		if (!need_break)
 87.1301  			goto again;
 87.1302  	}
 87.1303 @@ -1750,12 +1719,13 @@ static int do_swap_page(struct mm_struct
 87.1304  	spin_lock(&mm->page_table_lock);
 87.1305  	page_table = pte_offset_map(pmd, address);
 87.1306  	if (unlikely(!pte_same(*page_table, orig_pte))) {
 87.1307 -		pte_unmap(page_table);
 87.1308 -		spin_unlock(&mm->page_table_lock);
 87.1309 -		unlock_page(page);
 87.1310 -		page_cache_release(page);
 87.1311  		ret = VM_FAULT_MINOR;
 87.1312 -		goto out;
 87.1313 +		goto out_nomap;
 87.1314 +	}
 87.1315 +
 87.1316 +	if (unlikely(!PageUptodate(page))) {
 87.1317 +		ret = VM_FAULT_SIGBUS;
 87.1318 +		goto out_nomap;
 87.1319  	}
 87.1320  
 87.1321  	/* The page isn't present yet, go ahead with the fault. */
 87.1322 @@ -1764,10 +1734,7 @@ static int do_swap_page(struct mm_struct
 87.1323  	if (vm_swap_full())
 87.1324  		remove_exclusive_swap_page(page);
 87.1325  
 87.1326 -	mm->rss++;
 87.1327 -	acct_update_integrals();
 87.1328 -	update_mem_hiwater();
 87.1329 -
 87.1330 +	inc_mm_counter(mm, rss);
 87.1331  	pte = mk_pte(page, vma->vm_page_prot);
 87.1332  	if (write_access && can_share_swap_page(page)) {
 87.1333  		pte = maybe_mkwrite(pte_mkdirty(pte), vma);
 87.1334 @@ -1776,7 +1743,7 @@ static int do_swap_page(struct mm_struct
 87.1335  	unlock_page(page);
 87.1336  
 87.1337  	flush_icache_page(vma, page);
 87.1338 -	set_pte(page_table, pte);
 87.1339 +	set_pte_at(mm, address, page_table, pte);
 87.1340  	page_add_anon_rmap(page, vma, address);
 87.1341  
 87.1342  	if (write_access) {
 87.1343 @@ -1788,10 +1755,17 @@ static int do_swap_page(struct mm_struct
 87.1344  
 87.1345  	/* No need to invalidate - it was non-present before */
 87.1346  	update_mmu_cache(vma, address, pte);
 87.1347 +	lazy_mmu_prot_update(pte);
 87.1348  	pte_unmap(page_table);
 87.1349  	spin_unlock(&mm->page_table_lock);
 87.1350  out:
 87.1351  	return ret;
 87.1352 +out_nomap:
 87.1353 +	pte_unmap(page_table);
 87.1354 +	spin_unlock(&mm->page_table_lock);
 87.1355 +	unlock_page(page);
 87.1356 +	page_cache_release(page);
 87.1357 +	goto out;
 87.1358  }
 87.1359  
 87.1360  /*
 87.1361 @@ -1831,9 +1805,7 @@ do_anonymous_page(struct mm_struct *mm, 
 87.1362  			spin_unlock(&mm->page_table_lock);
 87.1363  			goto out;
 87.1364  		}
 87.1365 -		mm->rss++;
 87.1366 -		acct_update_integrals();
 87.1367 -		update_mem_hiwater();
 87.1368 +		inc_mm_counter(mm, rss);
 87.1369  		entry = maybe_mkwrite(pte_mkdirty(mk_pte(page,
 87.1370  							 vma->vm_page_prot)),
 87.1371  				      vma);
 87.1372 @@ -1842,11 +1814,12 @@ do_anonymous_page(struct mm_struct *mm, 
 87.1373  		page_add_anon_rmap(page, vma, addr);
 87.1374  	}
 87.1375  
 87.1376 -	ptep_establish_new(vma, addr, page_table, entry);
 87.1377 +	set_pte_at(vma, addr, page_table, entry);
 87.1378  	pte_unmap(page_table);
 87.1379  
 87.1380  	/* No need to invalidate - it was non-present before */
 87.1381  	update_mmu_cache(vma, addr, entry);
 87.1382 + 	lazy_mmu_prot_update(entry);
 87.1383  	spin_unlock(&mm->page_table_lock);
 87.1384  out:
 87.1385  	return VM_FAULT_MINOR;
 87.1386 @@ -1949,15 +1922,13 @@ retry:
 87.1387  	/* Only go through if we didn't race with anybody else... */
 87.1388  	if (pte_none(*page_table)) {
 87.1389  		if (!PageReserved(new_page))
 87.1390 -			++mm->rss;
 87.1391 -		acct_update_integrals();
 87.1392 -		update_mem_hiwater();
 87.1393 +			inc_mm_counter(mm, rss);
 87.1394  
 87.1395  		flush_icache_page(vma, new_page);
 87.1396  		entry = mk_pte(new_page, vma->vm_page_prot);
 87.1397  		if (write_access)
 87.1398  			entry = maybe_mkwrite(pte_mkdirty(entry), vma);
 87.1399 -		ptep_establish_new(vma, address, page_table, entry);
 87.1400 +		set_pte_at(vma, address, page_table, entry);
 87.1401  		if (anon) {
 87.1402  			lru_cache_add_active(new_page);
 87.1403  			page_add_anon_rmap(new_page, vma, address);
 87.1404 @@ -1974,6 +1945,7 @@ retry:
 87.1405  
 87.1406  	/* no need to invalidate: a not-present page shouldn't be cached */
 87.1407  	update_mmu_cache(vma, address, entry);
 87.1408 +	lazy_mmu_prot_update(entry);
 87.1409  	spin_unlock(&mm->page_table_lock);
 87.1410  out:
 87.1411  	return ret;
 87.1412 @@ -2001,7 +1973,7 @@ static int do_file_page(struct mm_struct
 87.1413  	 */
 87.1414  	if (!vma->vm_ops || !vma->vm_ops->populate || 
 87.1415  			(write_access && !(vma->vm_flags & VM_SHARED))) {
 87.1416 -		pte_clear(pte);
 87.1417 +		pte_clear(mm, address, pte);
 87.1418  		return do_no_page(mm, vma, address, write_access, pte, pmd);
 87.1419  	}
 87.1420  
 87.1421 @@ -2068,6 +2040,7 @@ static inline int handle_pte_fault(struc
 87.1422  	entry = pte_mkyoung(entry);
 87.1423  	ptep_set_access_flags(vma, address, pte, entry, write_access);
 87.1424  	update_mmu_cache(vma, address, entry);
 87.1425 +	lazy_mmu_prot_update(entry);
 87.1426  	pte_unmap(pte);
 87.1427  	spin_unlock(&mm->page_table_lock);
 87.1428  	return VM_FAULT_MINOR;
 87.1429 @@ -2117,15 +2090,12 @@ int handle_mm_fault(struct mm_struct *mm
 87.1430  	return VM_FAULT_OOM;
 87.1431  }
 87.1432  
 87.1433 -#ifndef __ARCH_HAS_4LEVEL_HACK
 87.1434 +#ifndef __PAGETABLE_PUD_FOLDED
 87.1435  /*
 87.1436   * Allocate page upper directory.
 87.1437   *
 87.1438   * We've already handled the fast-path in-line, and we own the
 87.1439   * page table lock.
 87.1440 - *
 87.1441 - * On a two-level or three-level page table, this ends up actually being
 87.1442 - * entirely optimized away.
 87.1443   */
 87.1444  pud_t fastcall *__pud_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long address)
 87.1445  {
 87.1446 @@ -2149,15 +2119,14 @@ pud_t fastcall *__pud_alloc(struct mm_st
 87.1447   out:
 87.1448  	return pud_offset(pgd, address);
 87.1449  }
 87.1450 +#endif /* __PAGETABLE_PUD_FOLDED */
 87.1451  
 87.1452 +#ifndef __PAGETABLE_PMD_FOLDED
 87.1453  /*
 87.1454   * Allocate page middle directory.
 87.1455   *
 87.1456   * We've already handled the fast-path in-line, and we own the
 87.1457   * page table lock.
 87.1458 - *
 87.1459 - * On a two-level page table, this ends up actually being entirely
 87.1460 - * optimized away.
 87.1461   */
 87.1462  pmd_t fastcall *__pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long address)
 87.1463  {
 87.1464 @@ -2173,38 +2142,24 @@ pmd_t fastcall *__pmd_alloc(struct mm_st
 87.1465  	 * Because we dropped the lock, we should re-check the
 87.1466  	 * entry, as somebody else could have populated it..
 87.1467  	 */
 87.1468 +#ifndef __ARCH_HAS_4LEVEL_HACK
 87.1469  	if (pud_present(*pud)) {
 87.1470  		pmd_free(new);
 87.1471  		goto out;
 87.1472  	}
 87.1473  	pud_populate(mm, pud, new);
 87.1474 - out:
 87.1475 -	return pmd_offset(pud, address);
 87.1476 -}
 87.1477  #else
 87.1478 -pmd_t fastcall *__pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long address)
 87.1479 -{
 87.1480 -	pmd_t *new;
 87.1481 -
 87.1482 -	spin_unlock(&mm->page_table_lock);
 87.1483 -	new = pmd_alloc_one(mm, address);
 87.1484 -	spin_lock(&mm->page_table_lock);
 87.1485 -	if (!new)
 87.1486 -		return NULL;
 87.1487 -
 87.1488 -	/*
 87.1489 -	 * Because we dropped the lock, we should re-check the
 87.1490 -	 * entry, as somebody else could have populated it..
 87.1491 -	 */
 87.1492  	if (pgd_present(*pud)) {
 87.1493  		pmd_free(new);
 87.1494  		goto out;
 87.1495  	}
 87.1496  	pgd_populate(mm, pud, new);
 87.1497 -out:
 87.1498 +#endif /* __ARCH_HAS_4LEVEL_HACK */
 87.1499 +
 87.1500 + out:
 87.1501  	return pmd_offset(pud, address);
 87.1502  }
 87.1503 -#endif
 87.1504 +#endif /* __PAGETABLE_PMD_FOLDED */
 87.1505  
 87.1506  int make_pages_present(unsigned long addr, unsigned long end)
 87.1507  {
 87.1508 @@ -2271,13 +2226,13 @@ EXPORT_SYMBOL(vmalloc_to_pfn);
 87.1509   * update_mem_hiwater
 87.1510   *	- update per process rss and vm high water data
 87.1511   */
 87.1512 -void update_mem_hiwater(void)
 87.1513 +void update_mem_hiwater(struct task_struct *tsk)
 87.1514  {
 87.1515 -	struct task_struct *tsk = current;
 87.1516 +	if (tsk->mm) {
 87.1517 +		unsigned long rss = get_mm_counter(tsk->mm, rss);
 87.1518  
 87.1519 -	if (tsk->mm) {
 87.1520 -		if (tsk->mm->hiwater_rss < tsk->mm->rss)
 87.1521 -			tsk->mm->hiwater_rss = tsk->mm->rss;
 87.1522 +		if (tsk->mm->hiwater_rss < rss)
 87.1523 +			tsk->mm->hiwater_rss = rss;
 87.1524  		if (tsk->mm->hiwater_vm < tsk->mm->total_vm)
 87.1525  			tsk->mm->hiwater_vm = tsk->mm->total_vm;
 87.1526  	}
    88.1 --- a/linux-2.6-xen-sparse/mm/mmap.c	Mon Jul 11 09:29:56 2005 -0500
    88.2 +++ b/linux-2.6-xen-sparse/mm/mmap.c	Mon Jul 11 09:35:19 2005 -0500
    88.3 @@ -21,7 +21,6 @@
    88.4  #include <linux/hugetlb.h>
    88.5  #include <linux/profile.h>
    88.6  #include <linux/module.h>
    88.7 -#include <linux/acct.h>
    88.8  #include <linux/mount.h>
    88.9  #include <linux/mempolicy.h>
   88.10  #include <linux/rmap.h>
   88.11 @@ -30,6 +29,10 @@
   88.12  #include <asm/cacheflush.h>
   88.13  #include <asm/tlb.h>
   88.14  
   88.15 +static void unmap_region(struct mm_struct *mm,
   88.16 +		struct vm_area_struct *vma, struct vm_area_struct *prev,
   88.17 +		unsigned long start, unsigned long end);
   88.18 +
   88.19  /*
   88.20   * WARNING: the debugging will use recursive algorithms so never enable this
   88.21   * unless you know what you are doing.
   88.22 @@ -873,7 +876,7 @@ unsigned long do_mmap_pgoff(struct file 
   88.23  	int error;
   88.24  	struct rb_node ** rb_link, * rb_parent;
   88.25  	int accountable = 1;
   88.26 -	unsigned long charged = 0;
   88.27 +	unsigned long charged = 0, reqprot = prot;
   88.28  
   88.29  	if (file) {
   88.30  		if (is_file_hugepages(file))
   88.31 @@ -897,16 +900,16 @@ unsigned long do_mmap_pgoff(struct file 
   88.32  			prot |= PROT_EXEC;
   88.33  
   88.34  	if (!len)
   88.35 -		return addr;
   88.36 +		return -EINVAL;
   88.37  
   88.38  	/* Careful about overflows.. */
   88.39  	len = PAGE_ALIGN(len);
   88.40  	if (!len || len > TASK_SIZE)
   88.41 -		return -EINVAL;
   88.42 +		return -ENOMEM;
   88.43  
   88.44  	/* offset overflow? */
   88.45  	if ((pgoff + (len >> PAGE_SHIFT)) < pgoff)
   88.46 -		return -EINVAL;
   88.47 +               return -EOVERFLOW;
   88.48  
   88.49  	/* Too many mappings? */
   88.50  	if (mm->map_count > sysctl_max_map_count)
   88.51 @@ -934,9 +937,10 @@ unsigned long do_mmap_pgoff(struct file 
   88.52  	/* mlock MCL_FUTURE? */
   88.53  	if (vm_flags & VM_LOCKED) {
   88.54  		unsigned long locked, lock_limit;
   88.55 -		locked = mm->locked_vm << PAGE_SHIFT;
   88.56 +		locked = len >> PAGE_SHIFT;
   88.57 +		locked += mm->locked_vm;
   88.58  		lock_limit = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur;
   88.59 -		locked += len;
   88.60 +		lock_limit >>= PAGE_SHIFT;
   88.61  		if (locked > lock_limit && !capable(CAP_IPC_LOCK))
   88.62  			return -EAGAIN;
   88.63  	}
   88.64 @@ -991,7 +995,7 @@ unsigned long do_mmap_pgoff(struct file 
   88.65  		}
   88.66  	}
   88.67  
   88.68 -	error = security_file_mmap(file, prot, flags);
   88.69 +	error = security_file_mmap(file, reqprot, prot, flags);
   88.70  	if (error)
   88.71  		return error;
   88.72  		
   88.73 @@ -1006,8 +1010,7 @@ munmap_back:
   88.74  	}
   88.75  
   88.76  	/* Check against address space limit. */
   88.77 -	if ((mm->total_vm << PAGE_SHIFT) + len
   88.78 -	    > current->signal->rlim[RLIMIT_AS].rlim_cur)
   88.79 +	if (!may_expand_vm(mm, len >> PAGE_SHIFT))
   88.80  		return -ENOMEM;
   88.81  
   88.82  	if (accountable && (!(flags & MAP_NORESERVE) ||
   88.83 @@ -1121,8 +1124,6 @@ out:
   88.84  					pgoff, flags & MAP_NONBLOCK);
   88.85  		down_write(&mm->mmap_sem);
   88.86  	}
   88.87 -	acct_update_integrals();
   88.88 -	update_mem_hiwater();
   88.89  	return addr;
   88.90  
   88.91  unmap_and_free_vma:
   88.92 @@ -1132,7 +1133,8 @@ unmap_and_free_vma:
   88.93  	fput(file);
   88.94  
   88.95  	/* Undo any partial mapping done by a device driver. */
   88.96 -	zap_page_range(vma, vma->vm_start, vma->vm_end - vma->vm_start, NULL);
   88.97 +	unmap_region(mm, vma, prev, vma->vm_start, vma->vm_end);
   88.98 +	charged = 0;
   88.99  free_vma:
  88.100  	kmem_cache_free(vm_area_cachep, vma);
  88.101  unacct_error:
  88.102 @@ -1221,19 +1223,14 @@ arch_get_unmapped_area_topdown(struct fi
  88.103  			  const unsigned long len, const unsigned long pgoff,
  88.104  			  const unsigned long flags)
  88.105  {
  88.106 -	struct vm_area_struct *vma, *prev_vma;
  88.107 +	struct vm_area_struct *vma;
  88.108  	struct mm_struct *mm = current->mm;
  88.109 -	unsigned long base = mm->mmap_base, addr = addr0;
  88.110 -	int first_time = 1;
  88.111 +	unsigned long addr = addr0;
  88.112  
  88.113  	/* requested length too big for entire address space */
  88.114  	if (len > TASK_SIZE)
  88.115  		return -ENOMEM;
  88.116  
  88.117 -	/* dont allow allocations above current base */
  88.118 -	if (mm->free_area_cache > base)
  88.119 -		mm->free_area_cache = base;
  88.120 -
  88.121  	/* requesting a specific address */
  88.122  	if (addr) {
  88.123  		addr = PAGE_ALIGN(addr);
  88.124 @@ -1243,48 +1240,34 @@ arch_get_unmapped_area_topdown(struct fi
  88.125  			return addr;
  88.126  	}
  88.127  
  88.128 -try_again:
  88.129 +	/* either no address requested or can't fit in requested address hole */
  88.130 +	addr = mm->free_area_cache;
  88.131 +
  88.132  	/* make sure it can fit in the remaining address space */
  88.133 -	if (mm->free_area_cache < len)
  88.134 -		goto fail;
  88.135 +	if (addr > len) {
  88.136 +		vma = find_vma(mm, addr-len);
  88.137 +		if (!vma || addr <= vma->vm_start)
  88.138 +			/* remember the address as a hint for next time */
  88.139 +			return (mm->free_area_cache = addr-len);
  88.140 +	}
  88.141  
  88.142 -	/* either no address requested or cant fit in requested address hole */
  88.143 -	addr = (mm->free_area_cache - len) & PAGE_MASK;
  88.144 +	addr = mm->mmap_base-len;
  88.145 +
  88.146  	do {
  88.147  		/*
  88.148  		 * Lookup failure means no vma is above this address,
  88.149 -		 * i.e. return with success:
  88.150 +		 * else if new region fits below vma->vm_start,
  88.151 +		 * return with success:
  88.152  		 */
  88.153 - 	 	if (!(vma = find_vma_prev(mm, addr, &prev_vma)))
  88.154 -			return addr;
  88.155 -
  88.156 -		/*
  88.157 -		 * new region fits between prev_vma->vm_end and
  88.158 -		 * vma->vm_start, use it:
  88.159 -		 */
  88.160 -		if (addr+len <= vma->vm_start &&
  88.161 -				(!prev_vma || (addr >= prev_vma->vm_end)))
  88.162 +		vma = find_vma(mm, addr);
  88.163 +		if (!vma || addr+len <= vma->vm_start)
  88.164  			/* remember the address as a hint for next time */
  88.165  			return (mm->free_area_cache = addr);
  88.166 -		else
  88.167 -			/* pull free_area_cache down to the first hole */
  88.168 -			if (mm->free_area_cache == vma->vm_end)
  88.169 -				mm->free_area_cache = vma->vm_start;
  88.170  
  88.171  		/* try just below the current vma->vm_start */
  88.172  		addr = vma->vm_start-len;
  88.173 -	} while (len <= vma->vm_start);
  88.174 +	} while (len < vma->vm_start);
  88.175  
  88.176 -fail:
  88.177 -	/*
  88.178 -	 * if hint left us with no space for the requested
  88.179 -	 * mapping then try again:
  88.180 -	 */
  88.181 -	if (first_time) {
  88.182 -		mm->free_area_cache = base;
  88.183 -		first_time = 0;
  88.184 -		goto try_again;
  88.185 -	}
  88.186  	/*
  88.187  	 * A failed mmap() very likely causes application failure,
  88.188  	 * so fall back to the bottom-up function here. This scenario
  88.189 @@ -1296,7 +1279,7 @@ fail:
  88.190  	/*
  88.191  	 * Restore the topdown base:
  88.192  	 */
  88.193 -	mm->free_area_cache = base;
  88.194 +	mm->free_area_cache = mm->mmap_base;
  88.195  
  88.196  	return addr;
  88.197  }
  88.198 @@ -1309,43 +1292,50 @@ void arch_unmap_area_topdown(struct vm_a
  88.199  	 */
  88.200  	if (area->vm_end > area->vm_mm->free_area_cache)
  88.201  		area->vm_mm->free_area_cache = area->vm_end;
  88.202 +
  88.203 +	/* dont allow allocations above current base */
  88.204 +	if (area->vm_mm->free_area_cache > area->vm_mm->mmap_base)
  88.205 +		area->vm_mm->free_area_cache = area->vm_mm->mmap_base;
  88.206  }
  88.207  
  88.208  unsigned long
  88.209  get_unmapped_area(struct file *file, unsigned long addr, unsigned long len,
  88.210  		unsigned long pgoff, unsigned long flags)
  88.211  {
  88.212 -	if (flags & MAP_FIXED) {
  88.213 -		unsigned long ret;
  88.214 +	unsigned long ret;
  88.215  
  88.216 -		if (addr > TASK_SIZE - len)
  88.217 -			return -ENOMEM;
  88.218 -		if (addr & ~PAGE_MASK)
  88.219 -			return -EINVAL;
  88.220 -		if (file && is_file_hugepages(file))  {
  88.221 -			/*
  88.222 -			 * Check if the given range is hugepage aligned, and
  88.223 -			 * can be made suitable for hugepages.
  88.224 -			 */
  88.225 -			ret = prepare_hugepage_range(addr, len);
  88.226 -		} else {
  88.227 -			/*
  88.228 -			 * Ensure that a normal request is not falling in a
  88.229 -			 * reserved hugepage range.  For some archs like IA-64,
  88.230 -			 * there is a separate region for hugepages.
  88.231 -			 */
  88.232 -			ret = is_hugepage_only_range(addr, len);
  88.233 -		}
  88.234 -		if (ret)
  88.235 -			return -EINVAL;
  88.236 -		return addr;
  88.237 +	if (!(flags & MAP_FIXED)) {
  88.238 +		unsigned long (*get_area)(struct file *, unsigned long, unsigned long, unsigned long, unsigned long);
  88.239 +
  88.240 +		get_area = current->mm->get_unmapped_area;
  88.241 +		if (file && file->f_op && file->f_op->get_unmapped_area)
  88.242 +			get_area = file->f_op->get_unmapped_area;
  88.243 +		addr = get_area(file, addr, len, pgoff, flags);
  88.244 +		if (IS_ERR_VALUE(addr))
  88.245 +			return addr;
  88.246  	}
  88.247  
  88.248 -	if (file && file->f_op && file->f_op->get_unmapped_area)
  88.249 -		return file->f_op->get_unmapped_area(file, addr, len,
  88.250 -						pgoff, flags);
  88.251 -
  88.252 -	return current->mm->get_unmapped_area(file, addr, len, pgoff, flags);
  88.253 +	if (addr > TASK_SIZE - len)
  88.254 +		return -ENOMEM;
  88.255 +	if (addr & ~PAGE_MASK)
  88.256 +		return -EINVAL;
  88.257 +	if (file && is_file_hugepages(file))  {
  88.258 +		/*
  88.259 +		 * Check if the given range is hugepage aligned, and
  88.260 +		 * can be made suitable for hugepages.
  88.261 +		 */
  88.262 +		ret = prepare_hugepage_range(addr, len);
  88.263 +	} else {
  88.264 +		/*
  88.265 +		 * Ensure that a normal request is not falling in a
  88.266 +		 * reserved hugepage range.  For some archs like IA-64,
  88.267 +		 * there is a separate region for hugepages.
  88.268 +		 */
  88.269 +		ret = is_hugepage_only_range(current->mm, addr, len);
  88.270 +	}
  88.271 +	if (ret)
  88.272 +		return -EINVAL;
  88.273 +	return addr;
  88.274  }
  88.275  
  88.276  EXPORT_SYMBOL(get_unmapped_area);
  88.277 @@ -1434,7 +1424,7 @@ static int acct_stack_growth(struct vm_a
  88.278  	struct rlimit *rlim = current->signal->rlim;
  88.279  
  88.280  	/* address space limit tests */
  88.281 -	if (mm->total_vm + grow > rlim[RLIMIT_AS].rlim_cur >> PAGE_SHIFT)
  88.282 +	if (!may_expand_vm(mm, grow))
  88.283  		return -ENOMEM;
  88.284  
  88.285  	/* Stack limit test */
  88.286 @@ -1463,8 +1453,6 @@ static int acct_stack_growth(struct vm_a
  88.287  	if (vma->vm_flags & VM_LOCKED)
  88.288  		mm->locked_vm += grow;
  88.289  	__vm_stat_account(mm, vma->vm_flags, vma->vm_file, grow);
  88.290 -	acct_update_integrals();
  88.291 -	update_mem_hiwater();
  88.292  	return 0;
  88.293  }
  88.294  
  88.295 @@ -1592,66 +1580,6 @@ find_extend_vma(struct mm_struct * mm, u
  88.296  }
  88.297  #endif
  88.298  
  88.299 -/*
  88.300 - * Try to free as many page directory entries as we can,
  88.301 - * without having to work very hard at actually scanning
  88.302 - * the page tables themselves.
  88.303 - *
  88.304 - * Right now we try to free page tables if we have a nice
  88.305 - * PGDIR-aligned area that got free'd up. We could be more
  88.306 - * granular if we want to, but this is fast and simple,
  88.307 - * and covers the bad cases.
  88.308 - *
  88.309 - * "prev", if it exists, points to a vma before the one
  88.310 - * we just free'd - but there's no telling how much before.
  88.311 - */
  88.312 -static void free_pgtables(struct mmu_gather *tlb, struct vm_area_struct *prev,
  88.313 -	unsigned long start, unsigned long end)
  88.314 -{
  88.315 -	unsigned long first = start & PGDIR_MASK;
  88.316 -	unsigned long last = end + PGDIR_SIZE - 1;
  88.317 -	struct mm_struct *mm = tlb->mm;
  88.318 -
  88.319 -	if (last > MM_VM_SIZE(mm) || last < end)
  88.320 -		last = MM_VM_SIZE(mm);
  88.321 -
  88.322 -	if (!prev) {
  88.323 -		prev = mm->mmap;
  88.324 -		if (!prev)
  88.325 -			goto no_mmaps;
  88.326 -		if (prev->vm_end > start) {
  88.327 -			if (last > prev->vm_start)
  88.328 -				last = prev->vm_start;
  88.329 -			goto no_mmaps;
  88.330 -		}
  88.331 -	}
  88.332 -	for (;;) {
  88.333 -		struct vm_area_struct *next = prev->vm_next;
  88.334 -
  88.335 -		if (next) {
  88.336 -			if (next->vm_start < start) {
  88.337 -				prev = next;
  88.338 -				continue;
  88.339 -			}
  88.340 -			if (last > next->vm_start)
  88.341 -				last = next->vm_start;
  88.342 -		}
  88.343 -		if (prev->vm_end > first)
  88.344 -			first = prev->vm_end;
  88.345 -		break;
  88.346 -	}
  88.347 -no_mmaps:
  88.348 -	if (last < first)	/* for arches with discontiguous pgd indices */
  88.349 -		return;
  88.350 -	if (first < FIRST_USER_PGD_NR * PGDIR_SIZE)
  88.351 -		first = FIRST_USER_PGD_NR * PGDIR_SIZE;
  88.352 -	/* No point trying to free anything if we're in the same pte page */
  88.353 -	if ((first & PMD_MASK) < (last & PMD_MASK)) {
  88.354 -		clear_page_range(tlb, first, last);
  88.355 -		flush_tlb_pgtables(mm, first, last);
  88.356 -	}
  88.357 -}
  88.358 -
  88.359  /* Normal function to fix up a mapping
  88.360   * This function is the default for when an area has no specific
  88.361   * function.  This may be used as part of a more specific routine.
  88.362 @@ -1677,14 +1605,13 @@ static void unmap_vma(struct mm_struct *
  88.363   * Ok - we have the memory areas we should free on the 'free' list,
  88.364   * so release them, and do the vma updates.
  88.365   */
  88.366 -static void unmap_vma_list(struct mm_struct *mm,
  88.367 -	struct vm_area_struct *mpnt)
  88.368 +static void unmap_vma_list(struct mm_struct *mm, struct vm_area_struct *vma)
  88.369  {
  88.370  	do {
  88.371 -		struct vm_area_struct *next = mpnt->vm_next;
  88.372 -		unmap_vma(mm, mpnt);
  88.373 -		mpnt = next;
  88.374 -	} while (mpnt != NULL);
  88.375 +		struct vm_area_struct *next = vma->vm_next;
  88.376 +		unmap_vma(mm, vma);
  88.377 +		vma = next;
  88.378 +	} while (vma);
  88.379  	validate_mm(mm);
  88.380  }
  88.381  
  88.382 @@ -1694,24 +1621,22 @@ static void unmap_vma_list(struct mm_str
  88.383   * Called with the page table lock held.
  88.384   */
  88.385  static void unmap_region(struct mm_struct *mm,
  88.386 -	struct vm_area_struct *vma,
  88.387 -	struct vm_area_struct *prev,
  88.388 -	unsigned long start,
  88.389 -	unsigned long end)
  88.390 +		struct vm_area_struct *vma, struct vm_area_struct *prev,
  88.391 +		unsigned long start, unsigned long end)
  88.392  {
  88.393 +	struct vm_area_struct *next = prev? prev->vm_next: mm->mmap;
  88.394  	struct mmu_gather *tlb;
  88.395  	unsigned long nr_accounted = 0;
  88.396  
  88.397  	lru_add_drain();
  88.398 +	spin_lock(&mm->page_table_lock);
  88.399  	tlb = tlb_gather_mmu(mm, 0);
  88.400  	unmap_vmas(&tlb, mm, vma, start, end, &nr_accounted, NULL);
  88.401  	vm_unacct_memory(nr_accounted);
  88.402 -
  88.403 -	if (is_hugepage_only_range(start, end - start))
  88.404 -		hugetlb_free_pgtables(tlb, prev, start, end);
  88.405 -	else
  88.406 -		free_pgtables(tlb, prev, start, end);
  88.407 +	free_pgtables(&tlb, vma, prev? prev->vm_end: FIRST_USER_ADDRESS,
  88.408 +				 next? next->vm_start: 0);
  88.409  	tlb_finish_mmu(tlb, start, end);
  88.410 +	spin_unlock(&mm->page_table_lock);
  88.411  }
  88.412  
  88.413  /*
  88.414 @@ -1797,7 +1722,7 @@ int split_vma(struct mm_struct * mm, str
  88.415  int do_munmap(struct mm_struct *mm, unsigned long start, size_t len)
  88.416  {
  88.417  	unsigned long end;
  88.418 -	struct vm_area_struct *mpnt, *prev, *last;
  88.419 +	struct vm_area_struct *vma, *prev, *last;
  88.420  
  88.421  	if ((start & ~PAGE_MASK) || start > TASK_SIZE || len > TASK_SIZE-start)
  88.422  		return -EINVAL;
  88.423 @@ -1806,14 +1731,14 @@ int do_munmap(struct mm_struct *mm, unsi
  88.424  		return -EINVAL;
  88.425  
  88.426  	/* Find the first overlapping VMA */
  88.427 -	mpnt = find_vma_prev(mm, start, &prev);
  88.428 -	if (!mpnt)
  88.429 +	vma = find_vma_prev(mm, start, &prev);
  88.430 +	if (!vma)
  88.431  		return 0;
  88.432 -	/* we have  start < mpnt->vm_end  */
  88.433 +	/* we have  start < vma->vm_end  */
  88.434  
  88.435  	/* if it doesn't overlap, we have nothing.. */
  88.436  	end = start + len;
  88.437 -	if (mpnt->vm_start >= end)
  88.438 +	if (vma->vm_start >= end)
  88.439  		return 0;
  88.440  
  88.441  	/*
  88.442 @@ -1823,11 +1748,11 @@ int do_munmap(struct mm_struct *mm, unsi
  88.443  	 * unmapped vm_area_struct will remain in use: so lower split_vma
  88.444  	 * places tmp vma above, and higher split_vma places tmp vma below.
  88.445  	 */
  88.446 -	if (start > mpnt->vm_start) {
  88.447 -		int error = split_vma(mm, mpnt, start, 0);
  88.448 +	if (start > vma->vm_start) {
  88.449 +		int error = split_vma(mm, vma, start, 0);
  88.450  		if (error)
  88.451  			return error;
  88.452 -		prev = mpnt;
  88.453 +		prev = vma;
  88.454  	}
  88.455  
  88.456  	/* Does it split the last one? */
  88.457 @@ -1837,18 +1762,16 @@ int do_munmap(struct mm_struct *mm, unsi
  88.458  		if (error)
  88.459  			return error;
  88.460  	}
  88.461 -	mpnt = prev? prev->vm_next: mm->mmap;
  88.462 +	vma = prev? prev->vm_next: mm->mmap;
  88.463  
  88.464  	/*
  88.465  	 * Remove the vma's, and unmap the actual pages
  88.466  	 */
  88.467 -	detach_vmas_to_be_unmapped(mm, mpnt, prev, end);
  88.468 -	spin_lock(&mm->page_table_lock);
  88.469 -	unmap_region(mm, mpnt, prev, start, end);
  88.470 -	spin_unlock(&mm->page_table_lock);
  88.471 +	detach_vmas_to_be_unmapped(mm, vma, prev, end);
  88.472 +	unmap_region(mm, vma, prev, start, end);
  88.473  
  88.474  	/* Fix up all other VM information */
  88.475 -	unmap_vma_list(mm, mpnt);
  88.476 +	unmap_vma_list(mm, vma);
  88.477  
  88.478  	return 0;
  88.479  }
  88.480 @@ -1903,9 +1826,10 @@ unsigned long do_brk(unsigned long addr,
  88.481  	 */
  88.482  	if (mm->def_flags & VM_LOCKED) {
  88.483  		unsigned long locked, lock_limit;
  88.484 -		locked = mm->locked_vm << PAGE_SHIFT;
  88.485 +		locked = len >> PAGE_SHIFT;
  88.486 +		locked += mm->locked_vm;
  88.487  		lock_limit = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur;
  88.488 -		locked += len;
  88.489 +		lock_limit >>= PAGE_SHIFT;
  88.490  		if (locked > lock_limit && !capable(CAP_IPC_LOCK))
  88.491  			return -EAGAIN;
  88.492  	}
  88.493 @@ -1928,8 +1852,7 @@ unsigned long do_brk(unsigned long addr,
  88.494  	}
  88.495  
  88.496  	/* Check against address space limits *after* clearing old maps... */
  88.497 -	if ((mm->total_vm << PAGE_SHIFT) + len
  88.498 -	    > current->signal->rlim[RLIMIT_AS].rlim_cur)
  88.499 +	if (!may_expand_vm(mm, len >> PAGE_SHIFT))
  88.500  		return -ENOMEM;
  88.501  
  88.502  	if (mm->map_count > sysctl_max_map_count)
  88.503 @@ -1968,8 +1891,6 @@ out:
  88.504  		mm->locked_vm += len >> PAGE_SHIFT;
  88.505  		make_pages_present(addr, addr + len);
  88.506  	}
  88.507 -	acct_update_integrals();
  88.508 -	update_mem_hiwater();
  88.509  	return addr;
  88.510  }
  88.511  
  88.512 @@ -1979,8 +1900,9 @@ EXPORT_SYMBOL(do_brk);
  88.513  void exit_mmap(struct mm_struct *mm)
  88.514  {
  88.515  	struct mmu_gather *tlb;
  88.516 -	struct vm_area_struct *vma;
  88.517 +	struct vm_area_struct *vma = mm->mmap;
  88.518  	unsigned long nr_accounted = 0;
  88.519 +	unsigned long end;
  88.520  
  88.521  #ifdef arch_exit_mmap
  88.522  	arch_exit_mmap(mm);
  88.523 @@ -1990,21 +1912,17 @@ void exit_mmap(struct mm_struct *mm)
  88.524  
  88.525  	spin_lock(&mm->page_table_lock);
  88.526  
  88.527 -	tlb = tlb_gather_mmu(mm, 1);
  88.528  	flush_cache_mm(mm);
  88.529 -	/* Use ~0UL here to ensure all VMAs in the mm are unmapped */
  88.530 -	mm->map_count -= unmap_vmas(&tlb, mm, mm->mmap, 0,
  88.531 -					~0UL, &nr_accounted, NULL);
  88.532 +	tlb = tlb_gather_mmu(mm, 1);
  88.533 +	/* Use -1 here to ensure all VMAs in the mm are unmapped */
  88.534 +	end = unmap_vmas(&tlb, mm, vma, 0, -1, &nr_accounted, NULL);
  88.535  	vm_unacct_memory(nr_accounted);
  88.536 -	BUG_ON(mm->map_count);	/* This is just debugging */
  88.537 -	clear_page_range(tlb, FIRST_USER_PGD_NR * PGDIR_SIZE, MM_VM_SIZE(mm));
  88.538 -	
  88.539 -	tlb_finish_mmu(tlb, 0, MM_VM_SIZE(mm));
  88.540 +	free_pgtables(&tlb, vma, FIRST_USER_ADDRESS, 0);
  88.541 +	tlb_finish_mmu(tlb, 0, end);
  88.542  
  88.543 -	vma = mm->mmap;
  88.544  	mm->mmap = mm->mmap_cache = NULL;
  88.545  	mm->mm_rb = RB_ROOT;
  88.546 -	mm->rss = 0;
  88.547 +	set_mm_counter(mm, rss, 0);
  88.548  	mm->total_vm = 0;
  88.549  	mm->locked_vm = 0;
  88.550  
  88.551 @@ -2019,6 +1937,8 @@ void exit_mmap(struct mm_struct *mm)
  88.552  		remove_vm_struct(vma);
  88.553  		vma = next;
  88.554  	}
  88.555 +
  88.556 +	BUG_ON(mm->nr_ptes > (FIRST_USER_ADDRESS+PMD_SIZE-1)>>PMD_SHIFT);
  88.557  }
  88.558  
  88.559  /* Insert vm structure into process list sorted by address
  88.560 @@ -2106,3 +2026,19 @@ struct vm_area_struct *copy_vma(struct v
  88.561  	}
  88.562  	return new_vma;
  88.563  }
  88.564 +
  88.565 +/*
  88.566 + * Return true if the calling process may expand its vm space by the passed
  88.567 + * number of pages
  88.568 + */
  88.569 +int may_expand_vm(struct mm_struct *mm, unsigned long npages)
  88.570 +{
  88.571 +	unsigned long cur = mm->total_vm;	/* pages */
  88.572 +	unsigned long lim;
  88.573 +
  88.574 +	lim = current->signal->rlim[RLIMIT_AS].rlim_cur >> PAGE_SHIFT;
  88.575 +
  88.576 +	if (cur + npages > lim)
  88.577 +		return 0;
  88.578 +	return 1;
  88.579 +}
    89.1 --- a/linux-2.6-xen-sparse/mm/page_alloc.c	Mon Jul 11 09:29:56 2005 -0500
    89.2 +++ b/linux-2.6-xen-sparse/mm/page_alloc.c	Mon Jul 11 09:35:19 2005 -0500
    89.3 @@ -31,19 +31,26 @@
    89.4  #include <linux/topology.h>
    89.5  #include <linux/sysctl.h>
    89.6  #include <linux/cpu.h>
    89.7 +#include <linux/cpuset.h>
    89.8  #include <linux/nodemask.h>
    89.9  #include <linux/vmalloc.h>
   89.10  
   89.11  #include <asm/tlbflush.h>
   89.12  #include "internal.h"
   89.13  
   89.14 -/* MCD - HACK: Find somewhere to initialize this EARLY, or make this initializer cleaner */
   89.15 +/*
   89.16 + * MCD - HACK: Find somewhere to initialize this EARLY, or make this
   89.17 + * initializer cleaner
   89.18 + */
   89.19  nodemask_t node_online_map = { { [0] = 1UL } };
   89.20 +EXPORT_SYMBOL(node_online_map);
   89.21  nodemask_t node_possible_map = NODE_MASK_ALL;
   89.22 +EXPORT_SYMBOL(node_possible_map);
   89.23  struct pglist_data *pgdat_list;
   89.24  unsigned long totalram_pages;
   89.25  unsigned long totalhigh_pages;
   89.26  long nr_swap_pages;
   89.27 +
   89.28  /*
   89.29   * results with 256, 32 in the lowmem_reserve sysctl:
   89.30   *	1G machine -> (16M dma, 800M-16M normal, 1G-800M high)
   89.31 @@ -191,6 +198,37 @@ static inline void rmv_page_order(struct
   89.32  }
   89.33  
   89.34  /*
   89.35 + * Locate the struct page for both the matching buddy in our
   89.36 + * pair (buddy1) and the combined O(n+1) page they form (page).
   89.37 + *
   89.38 + * 1) Any buddy B1 will have an order O twin B2 which satisfies
   89.39 + * the following equation:
   89.40 + *     B2 = B1 ^ (1 << O)
   89.41 + * For example, if the starting buddy (buddy2) is #8 its order
   89.42 + * 1 buddy is #10:
   89.43 + *     B2 = 8 ^ (1 << 1) = 8 ^ 2 = 10
   89.44 + *
   89.45 + * 2) Any buddy B will have an order O+1 parent P which
   89.46 + * satisfies the following equation:
   89.47 + *     P = B & ~(1 << O)
   89.48 + *
   89.49 + * Assumption: *_mem_map is contigious at least up to MAX_ORDER
   89.50 + */
   89.51 +static inline struct page *
   89.52 +__page_find_buddy(struct page *page, unsigned long page_idx, unsigned int order)
   89.53 +{
   89.54 +	unsigned long buddy_idx = page_idx ^ (1 << order);
   89.55 +
   89.56 +	return page + (buddy_idx - page_idx);
   89.57 +}
   89.58 +
   89.59 +static inline unsigned long
   89.60 +__find_combined_index(unsigned long page_idx, unsigned int order)
   89.61 +{
   89.62 +	return (page_idx & ~(1 << order));
   89.63 +}
   89.64 +
   89.65 +/*
   89.66   * This function checks whether a page is free && is the buddy
   89.67   * we can do coalesce a page and its buddy if
   89.68   * (a) the buddy is free &&
   89.69 @@ -233,50 +271,49 @@ static inline int page_is_buddy(struct p
   89.70   * -- wli
   89.71   */
   89.72  
   89.73 -static inline void __free_pages_bulk (struct page *page, struct page *base,
   89.74 +static inline void __free_pages_bulk (struct page *page,
   89.75  		struct zone *zone, unsigned int order)
   89.76  {
   89.77  	unsigned long page_idx;
   89.78 -	struct page *coalesced;
   89.79  	int order_size = 1 << order;
   89.80  
   89.81  	if (unlikely(order))
   89.82  		destroy_compound_page(page, order);
   89.83  
   89.84 -	page_idx = page - base;
   89.85 +	page_idx = page_to_pfn(page) & ((1 << MAX_ORDER) - 1);
   89.86  
   89.87  	BUG_ON(page_idx & (order_size - 1));
   89.88  	BUG_ON(bad_range(zone, page));
   89.89  
   89.90  	zone->free_pages += order_size;
   89.91  	while (order < MAX_ORDER-1) {
   89.92 +		unsigned long combined_idx;
   89.93  		struct free_area *area;
   89.94  		struct page *buddy;
   89.95 -		int buddy_idx;
   89.96  
   89.97 -		buddy_idx = (page_idx ^ (1 << order));
   89.98 -		buddy = base + buddy_idx;
   89.99 +		combined_idx = __find_combined_index(page_idx, order);
  89.100 +		buddy = __page_find_buddy(page, page_idx, order);
  89.101 +
  89.102  		if (bad_range(zone, buddy))
  89.103  			break;
  89.104  		if (!page_is_buddy(buddy, order))
  89.105 -			break;
  89.106 -		/* Move the buddy up one level. */
  89.107 +			break;		/* Move the buddy up one level. */
  89.108  		list_del(&buddy->lru);
  89.109  		area = zone->free_area + order;
  89.110  		area->nr_free--;
  89.111  		rmv_page_order(buddy);
  89.112 -		page_idx &= buddy_idx;
  89.113 +		page = page + (combined_idx - page_idx);
  89.114 +		page_idx = combined_idx;
  89.115  		order++;
  89.116  	}
  89.117 -	coalesced = base + page_idx;
  89.118 -	set_page_order(coalesced, order);
  89.119 -	list_add(&coalesced->lru, &zone->free_area[order].free_list);
  89.120 +	set_page_order(page, order);
  89.121 +	list_add(&page->lru, &zone->free_area[order].free_list);
  89.122  	zone->free_area[order].nr_free++;
  89.123  }
  89.124  
  89.125  static inline void free_pages_check(const char *function, struct page *page)
  89.126  {
  89.127 -	if (	page_mapped(page) ||
  89.128 +	if (	page_mapcount(page) ||
  89.129  		page->mapping != NULL ||
  89.130  		page_count(page) != 0 ||
  89.131  		(page->flags & (
  89.132 @@ -309,10 +346,9 @@ free_pages_bulk(struct zone *zone, int c
  89.133  		struct list_head *list, unsigned int order)
  89.134  {
  89.135  	unsigned long flags;
  89.136 -	struct page *base, *page = NULL;
  89.137 +	struct page *page = NULL;
  89.138  	int ret = 0;
  89.139  
  89.140 -	base = zone->zone_mem_map;
  89.141  	spin_lock_irqsave(&zone->lock, flags);
  89.142  	zone->all_unreclaimable = 0;
  89.143  	zone->pages_scanned = 0;
  89.144 @@ -320,7 +356,7 @@ free_pages_bulk(struct zone *zone, int c
  89.145  		page = list_entry(list->prev, struct page, lru);
  89.146  		/* have to delete it as __free_pages_bulk list manipulates */
  89.147  		list_del(&page->lru);
  89.148 -		__free_pages_bulk(page, base, zone, order);
  89.149 +		__free_pages_bulk(page, zone, order);
  89.150  		ret++;
  89.151  	}
  89.152  	spin_unlock_irqrestore(&zone->lock, flags);
  89.153 @@ -405,7 +441,7 @@ void set_page_refs(struct page *page, in
  89.154   */
  89.155  static void prep_new_page(struct page *page, int order)
  89.156  {
  89.157 -	if (page->mapping || page_mapped(page) ||
  89.158 +	if (page->mapping || page_mapcount(page) ||
  89.159  	    (page->flags & (
  89.160  			1 << PG_private	|
  89.161  			1 << PG_locked	|
  89.162 @@ -601,7 +637,7 @@ void fastcall free_cold_page(struct page
  89.163  	free_hot_cold_page(page, 1);
  89.164  }
  89.165  
  89.166 -static inline void prep_zero_page(struct page *page, int order, int gfp_flags)
  89.167 +static inline void prep_zero_page(struct page *page, int order, unsigned int __nocast gfp_flags)
  89.168  {
  89.169  	int i;
  89.170  
  89.171 @@ -616,7 +652,7 @@ static inline void prep_zero_page(struct
  89.172   * or two.
  89.173   */
  89.174  static struct page *
  89.175 -buffered_rmqueue(struct zone *zone, int order, int gfp_flags)
  89.176 +buffered_rmqueue(struct zone *zone, int order, unsigned int __nocast gfp_flags)
  89.177  {
  89.178  	unsigned long flags;
  89.179  	struct page *page = NULL;
  89.180 @@ -694,7 +730,7 @@ int zone_watermark_ok(struct zone *z, in
  89.181   * This is the 'heart' of the zoned buddy allocator.
  89.182   */
  89.183  struct page * fastcall
  89.184 -__alloc_pages(unsigned int gfp_mask, unsigned int order,
  89.185 +__alloc_pages(unsigned int __nocast gfp_mask, unsigned int order,
  89.186  		struct zonelist *zonelist)
  89.187  {
  89.188