direct-io.hg

changeset 14467:dcec453681bc

[POWERPC][XEN] Merge with xen-unstable.hg.
Signed-off-by: Hollis Blanchard <hollisb@us.ibm.com>
author Hollis Blanchard <hollisb@us.ibm.com>
date Thu Mar 08 14:39:52 2007 -0600 (2007-03-08)
parents 8f0b5295bb1b 38513d22d234
children 59305500d95d
files linux-2.6-xen-sparse/arch/i386/kernel/alternative-xen.c linux-2.6-xen-sparse/arch/i386/kernel/cpu/intel_cacheinfo-xen.c linux-2.6-xen-sparse/arch/x86_64/kernel/x8664_ksyms-xen.c linux-2.6-xen-sparse/mm/Kconfig tools/ptsname/Makefile tools/ptsname/ptsname.c tools/ptsname/setup.py xen/arch/x86/mm/shadow/page-guest32.h
line diff
     1.1 --- a/Config.mk	Mon Mar 05 12:49:12 2007 -0600
     1.2 +++ b/Config.mk	Thu Mar 08 14:39:52 2007 -0600
     1.3 @@ -73,9 +73,10 @@ ACM_SECURITY ?= n
     1.4  ACM_DEFAULT_SECURITY_POLICY ?= ACM_NULL_POLICY
     1.5  
     1.6  # Optional components
     1.7 -XENSTAT_XENTOP ?= y
     1.8 -VTPM_TOOLS ?= n
     1.9 +XENSTAT_XENTOP     ?= y
    1.10 +VTPM_TOOLS         ?= n
    1.11  LIBXENAPI_BINDINGS ?= n
    1.12 -XENFB_TOOLS ?= n
    1.13 +XENFB_TOOLS        ?= n
    1.14 +PYTHON_TOOLS       ?= y
    1.15  
    1.16  -include $(XEN_ROOT)/.config
     2.1 --- a/config/StdGNU.mk	Mon Mar 05 12:49:12 2007 -0600
     2.2 +++ b/config/StdGNU.mk	Thu Mar 08 14:39:52 2007 -0600
     2.3 @@ -12,9 +12,9 @@ OBJDUMP    = $(CROSS_COMPILE)objdump
     2.4  MSGFMT     = msgfmt
     2.5  
     2.6  INSTALL      = install
     2.7 -INSTALL_DIR  = $(INSTALL) -d -m0755
     2.8 -INSTALL_DATA = $(INSTALL) -m0644
     2.9 -INSTALL_PROG = $(INSTALL) -m0755
    2.10 +INSTALL_DIR  = $(INSTALL) -d -m0755 -p
    2.11 +INSTALL_DATA = $(INSTALL) -m0644 -p
    2.12 +INSTALL_PROG = $(INSTALL) -m0755 -p
    2.13  
    2.14  LIB64DIR = lib64
    2.15  
     3.1 --- a/config/SunOS.mk	Mon Mar 05 12:49:12 2007 -0600
     3.2 +++ b/config/SunOS.mk	Thu Mar 08 14:39:52 2007 -0600
     3.3 @@ -14,9 +14,9 @@ MSGFMT     = gmsgfmt
     3.4  SHELL      = bash
     3.5  
     3.6  INSTALL      = ginstall
     3.7 -INSTALL_DIR  = $(INSTALL) -d -m0755
     3.8 -INSTALL_DATA = $(INSTALL) -m0644
     3.9 -INSTALL_PROG = $(INSTALL) -m0755
    3.10 +INSTALL_DIR  = $(INSTALL) -d -m0755 -p
    3.11 +INSTALL_DATA = $(INSTALL) -m0644 -p
    3.12 +INSTALL_PROG = $(INSTALL) -m0755 -p
    3.13  
    3.14  LIB64DIR = lib/amd64
    3.15  
     4.1 --- a/docs/misc/dump-core-format.txt	Mon Mar 05 12:49:12 2007 -0600
     4.2 +++ b/docs/misc/dump-core-format.txt	Thu Mar 08 14:39:52 2007 -0600
     4.3 @@ -26,11 +26,12 @@ For xen related structure, please see th
     4.4  Elf header
     4.5  ----------
     4.6  The elf header members are set as follows
     4.7 +        e_ident[EI_CLASS] = ELFCLASS64 = 2
     4.8          e_ident[EI_OSABI] = ELFOSABI_SYSV = 0
     4.9          e_type = ET_CORE = 4
    4.10 -e_ident[EI_CLASS], e_ident[EI_DATA] and e_flags are set according
    4.11 -to an architecture which a file is created. Other members are set as usual.
    4.12 -
    4.13 +ELFCLASS64 is always used independent of architecture.
    4.14 +e_ident[EI_DATA] and e_flags are set according to the dumping system's
    4.15 +architecture. Other members are set as usual.
    4.16  
    4.17  Sections
    4.18  --------
    4.19 @@ -221,5 +222,10 @@ format_version descriptor
    4.20  
    4.21  Format version history
    4.22  ----------------------
    4.23 -The currently only (major, minor) = (0, 1) is used.
    4.24 +Currently only (major, minor) = (0, 1) is used.
    4.25  [When the format is changed, it would be described here.]
    4.26 +
    4.27 +(0, 1) update
    4.28 +- EI_CLASS member of elf header was changed to ELFCLASS64 independent of
    4.29 +  architecture. This is mainly for x86_32pae.
    4.30 +  The format version isn't bumped because analysis tools can distinguish it.
     5.1 --- a/linux-2.6-xen-sparse/arch/i386/Kconfig	Mon Mar 05 12:49:12 2007 -0600
     5.2 +++ b/linux-2.6-xen-sparse/arch/i386/Kconfig	Thu Mar 08 14:39:52 2007 -0600
     5.3 @@ -255,7 +255,6 @@ config NR_CPUS
     5.4  config SCHED_SMT
     5.5  	bool "SMT (Hyperthreading) scheduler support"
     5.6  	depends on X86_HT
     5.7 -	depends on !X86_XEN
     5.8  	help
     5.9  	  SMT scheduler support improves the CPU scheduler's decision making
    5.10  	  when dealing with Intel Pentium 4 chips with HyperThreading at a
    5.11 @@ -313,11 +312,6 @@ config X86_VISWS_APIC
    5.12  	depends on X86_VISWS
    5.13  	default y
    5.14  
    5.15 -config X86_TSC
    5.16 -	bool
    5.17 -	depends on (MWINCHIP3D || MWINCHIP2 || MCRUSOE || MEFFICEON || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || MK8 || MVIAC3_2 || MGEODEGX1) && !X86_NUMAQ && !X86_XEN
    5.18 -	default y
    5.19 -
    5.20  config X86_MCE
    5.21  	bool "Machine Check Exception"
    5.22  	depends on !(X86_VOYAGER || X86_XEN)
     6.1 --- a/linux-2.6-xen-sparse/arch/i386/Kconfig.cpu	Mon Mar 05 12:49:12 2007 -0600
     6.2 +++ b/linux-2.6-xen-sparse/arch/i386/Kconfig.cpu	Thu Mar 08 14:39:52 2007 -0600
     6.3 @@ -311,5 +311,5 @@ config X86_OOSTORE
     6.4  
     6.5  config X86_TSC
     6.6  	bool
     6.7 -	depends on (MWINCHIP3D || MWINCHIP2 || MCRUSOE || MEFFICEON || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || MK8 || MVIAC3_2 || MGEODEGX1 || MGEODE_LX) && !X86_NUMAQ
     6.8 +	depends on (MWINCHIP3D || MWINCHIP2 || MCRUSOE || MEFFICEON || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || MK8 || MVIAC3_2 || MGEODEGX1 || MGEODE_LX) && !X86_NUMAQ && !X86_XEN
     6.9  	default y
     7.1 --- a/linux-2.6-xen-sparse/arch/i386/kernel/alternative-xen.c	Mon Mar 05 12:49:12 2007 -0600
     7.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
     7.3 @@ -1,389 +0,0 @@
     7.4 -#include <linux/module.h>
     7.5 -#include <linux/spinlock.h>
     7.6 -#include <linux/list.h>
     7.7 -#include <asm/alternative.h>
     7.8 -#include <asm/sections.h>
     7.9 -
    7.10 -static int no_replacement    = 0;
    7.11 -static int smp_alt_once      = 0;
    7.12 -static int debug_alternative = 0;
    7.13 -
    7.14 -static int __init noreplacement_setup(char *s)
    7.15 -{
    7.16 -	no_replacement = 1;
    7.17 -	return 1;
    7.18 -}
    7.19 -static int __init bootonly(char *str)
    7.20 -{
    7.21 -	smp_alt_once = 1;
    7.22 -	return 1;
    7.23 -}
    7.24 -static int __init debug_alt(char *str)
    7.25 -{
    7.26 -	debug_alternative = 1;
    7.27 -	return 1;
    7.28 -}
    7.29 -
    7.30 -__setup("noreplacement", noreplacement_setup);
    7.31 -__setup("smp-alt-boot", bootonly);
    7.32 -__setup("debug-alternative", debug_alt);
    7.33 -
    7.34 -#define DPRINTK(fmt, args...) if (debug_alternative) \
    7.35 -	printk(KERN_DEBUG fmt, args)
    7.36 -
    7.37 -#ifdef GENERIC_NOP1
    7.38 -/* Use inline assembly to define this because the nops are defined
    7.39 -   as inline assembly strings in the include files and we cannot
    7.40 -   get them easily into strings. */
    7.41 -asm("\t.data\nintelnops: "
    7.42 -	GENERIC_NOP1 GENERIC_NOP2 GENERIC_NOP3 GENERIC_NOP4 GENERIC_NOP5 GENERIC_NOP6
    7.43 -	GENERIC_NOP7 GENERIC_NOP8);
    7.44 -extern unsigned char intelnops[];
    7.45 -static unsigned char *intel_nops[ASM_NOP_MAX+1] = {
    7.46 -	NULL,
    7.47 -	intelnops,
    7.48 -	intelnops + 1,
    7.49 -	intelnops + 1 + 2,
    7.50 -	intelnops + 1 + 2 + 3,
    7.51 -	intelnops + 1 + 2 + 3 + 4,
    7.52 -	intelnops + 1 + 2 + 3 + 4 + 5,
    7.53 -	intelnops + 1 + 2 + 3 + 4 + 5 + 6,
    7.54 -	intelnops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
    7.55 -};
    7.56 -#endif
    7.57 -
    7.58 -#ifdef K8_NOP1
    7.59 -asm("\t.data\nk8nops: "
    7.60 -	K8_NOP1 K8_NOP2 K8_NOP3 K8_NOP4 K8_NOP5 K8_NOP6
    7.61 -	K8_NOP7 K8_NOP8);
    7.62 -extern unsigned char k8nops[];
    7.63 -static unsigned char *k8_nops[ASM_NOP_MAX+1] = {
    7.64 -	NULL,
    7.65 -	k8nops,
    7.66 -	k8nops + 1,
    7.67 -	k8nops + 1 + 2,
    7.68 -	k8nops + 1 + 2 + 3,
    7.69 -	k8nops + 1 + 2 + 3 + 4,
    7.70 -	k8nops + 1 + 2 + 3 + 4 + 5,
    7.71 -	k8nops + 1 + 2 + 3 + 4 + 5 + 6,
    7.72 -	k8nops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
    7.73 -};
    7.74 -#endif
    7.75 -
    7.76 -#ifdef K7_NOP1
    7.77 -asm("\t.data\nk7nops: "
    7.78 -	K7_NOP1 K7_NOP2 K7_NOP3 K7_NOP4 K7_NOP5 K7_NOP6
    7.79 -	K7_NOP7 K7_NOP8);
    7.80 -extern unsigned char k7nops[];
    7.81 -static unsigned char *k7_nops[ASM_NOP_MAX+1] = {
    7.82 -	NULL,
    7.83 -	k7nops,
    7.84 -	k7nops + 1,
    7.85 -	k7nops + 1 + 2,
    7.86 -	k7nops + 1 + 2 + 3,
    7.87 -	k7nops + 1 + 2 + 3 + 4,
    7.88 -	k7nops + 1 + 2 + 3 + 4 + 5,
    7.89 -	k7nops + 1 + 2 + 3 + 4 + 5 + 6,
    7.90 -	k7nops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
    7.91 -};
    7.92 -#endif
    7.93 -
    7.94 -#ifdef CONFIG_X86_64
    7.95 -
    7.96 -extern char __vsyscall_0;
    7.97 -static inline unsigned char** find_nop_table(void)
    7.98 -{
    7.99 -	return k8_nops;
   7.100 -}
   7.101 -
   7.102 -#else /* CONFIG_X86_64 */
   7.103 -
   7.104 -static struct nop {
   7.105 -	int cpuid;
   7.106 -	unsigned char **noptable;
   7.107 -} noptypes[] = {
   7.108 -	{ X86_FEATURE_K8, k8_nops },
   7.109 -	{ X86_FEATURE_K7, k7_nops },
   7.110 -	{ -1, NULL }
   7.111 -};
   7.112 -
   7.113 -static unsigned char** find_nop_table(void)
   7.114 -{
   7.115 -	unsigned char **noptable = intel_nops;
   7.116 -	int i;
   7.117 -
   7.118 -	for (i = 0; noptypes[i].cpuid >= 0; i++) {
   7.119 -		if (boot_cpu_has(noptypes[i].cpuid)) {
   7.120 -			noptable = noptypes[i].noptable;
   7.121 -			break;
   7.122 -		}
   7.123 -	}
   7.124 -	return noptable;
   7.125 -}
   7.126 -
   7.127 -#endif /* CONFIG_X86_64 */
   7.128 -
   7.129 -extern struct alt_instr __alt_instructions[], __alt_instructions_end[];
   7.130 -extern struct alt_instr __smp_alt_instructions[], __smp_alt_instructions_end[];
   7.131 -extern u8 *__smp_locks[], *__smp_locks_end[];
   7.132 -
   7.133 -extern u8 __smp_alt_begin[], __smp_alt_end[];
   7.134 -
   7.135 -/* Replace instructions with better alternatives for this CPU type.
   7.136 -   This runs before SMP is initialized to avoid SMP problems with
   7.137 -   self modifying code. This implies that assymetric systems where
   7.138 -   APs have less capabilities than the boot processor are not handled.
   7.139 -   Tough. Make sure you disable such features by hand. */
   7.140 -
   7.141 -void apply_alternatives(struct alt_instr *start, struct alt_instr *end)
   7.142 -{
   7.143 -	unsigned char **noptable = find_nop_table();
   7.144 -	struct alt_instr *a;
   7.145 -	u8 *instr;
   7.146 -	int diff, i, k;
   7.147 -
   7.148 -	DPRINTK("%s: alt table %p -> %p\n", __FUNCTION__, start, end);
   7.149 -	for (a = start; a < end; a++) {
   7.150 -		BUG_ON(a->replacementlen > a->instrlen);
   7.151 -		if (!boot_cpu_has(a->cpuid))
   7.152 -			continue;
   7.153 -		instr = a->instr;
   7.154 -#ifdef CONFIG_X86_64
   7.155 -		/* vsyscall code is not mapped yet. resolve it manually. */
   7.156 -		if (instr >= (u8 *)VSYSCALL_START && instr < (u8*)VSYSCALL_END) {
   7.157 -			instr -= VSYSCALL_START - (unsigned long)&__vsyscall_0;
   7.158 -			DPRINTK("%s: vsyscall fixup: %p => %p\n",
   7.159 -				__FUNCTION__, a->instr, instr);
   7.160 -		}
   7.161 -#endif
   7.162 -		memcpy(instr, a->replacement, a->replacementlen);
   7.163 -		diff = a->instrlen - a->replacementlen;
   7.164 -		/* Pad the rest with nops */
   7.165 -		for (i = a->replacementlen; diff > 0; diff -= k, i += k) {
   7.166 -			k = diff;
   7.167 -			if (k > ASM_NOP_MAX)
   7.168 -				k = ASM_NOP_MAX;
   7.169 -			memcpy(a->instr + i, noptable[k], k);
   7.170 -		}
   7.171 -	}
   7.172 -}
   7.173 -
   7.174 -#ifdef CONFIG_SMP
   7.175 -
   7.176 -static void alternatives_smp_save(struct alt_instr *start, struct alt_instr *end)
   7.177 -{
   7.178 -	struct alt_instr *a;
   7.179 -
   7.180 -	DPRINTK("%s: alt table %p-%p\n", __FUNCTION__, start, end);
   7.181 -	for (a = start; a < end; a++) {
   7.182 -		memcpy(a->replacement + a->replacementlen,
   7.183 -		       a->instr,
   7.184 -		       a->instrlen);
   7.185 -	}
   7.186 -}
   7.187 -
   7.188 -static void alternatives_smp_apply(struct alt_instr *start, struct alt_instr *end)
   7.189 -{
   7.190 -	struct alt_instr *a;
   7.191 -
   7.192 -	for (a = start; a < end; a++) {
   7.193 -		memcpy(a->instr,
   7.194 -		       a->replacement + a->replacementlen,
   7.195 -		       a->instrlen);
   7.196 -	}
   7.197 -}
   7.198 -
   7.199 -static void alternatives_smp_lock(u8 **start, u8 **end, u8 *text, u8 *text_end)
   7.200 -{
   7.201 -	u8 **ptr;
   7.202 -
   7.203 -	for (ptr = start; ptr < end; ptr++) {
   7.204 -		if (*ptr < text)
   7.205 -			continue;
   7.206 -		if (*ptr > text_end)
   7.207 -			continue;
   7.208 -		**ptr = 0xf0; /* lock prefix */
   7.209 -	};
   7.210 -}
   7.211 -
   7.212 -static void alternatives_smp_unlock(u8 **start, u8 **end, u8 *text, u8 *text_end)
   7.213 -{
   7.214 -	unsigned char **noptable = find_nop_table();
   7.215 -	u8 **ptr;
   7.216 -
   7.217 -	for (ptr = start; ptr < end; ptr++) {
   7.218 -		if (*ptr < text)
   7.219 -			continue;
   7.220 -		if (*ptr > text_end)
   7.221 -			continue;
   7.222 -		**ptr = noptable[1][0];
   7.223 -	};
   7.224 -}
   7.225 -
   7.226 -struct smp_alt_module {
   7.227 -	/* what is this ??? */
   7.228 -	struct module	*mod;
   7.229 -	char		*name;
   7.230 -
   7.231 -	/* ptrs to lock prefixes */
   7.232 -	u8		**locks;
   7.233 -	u8		**locks_end;
   7.234 -
   7.235 -	/* .text segment, needed to avoid patching init code ;) */
   7.236 -	u8		*text;
   7.237 -	u8		*text_end;
   7.238 -
   7.239 -	struct list_head next;
   7.240 -};
   7.241 -static LIST_HEAD(smp_alt_modules);
   7.242 -static DEFINE_SPINLOCK(smp_alt);
   7.243 -
   7.244 -void alternatives_smp_module_add(struct module *mod, char *name,
   7.245 -				 void *locks, void *locks_end,
   7.246 -				 void *text,  void *text_end)
   7.247 -{
   7.248 -	struct smp_alt_module *smp;
   7.249 -	unsigned long flags;
   7.250 -
   7.251 -	if (no_replacement)
   7.252 -		return;
   7.253 -
   7.254 -	if (smp_alt_once) {
   7.255 -		if (boot_cpu_has(X86_FEATURE_UP))
   7.256 -			alternatives_smp_unlock(locks, locks_end,
   7.257 -						text, text_end);
   7.258 -		return;
   7.259 -	}
   7.260 -
   7.261 -	smp = kzalloc(sizeof(*smp), GFP_KERNEL);
   7.262 -	if (NULL == smp)
   7.263 -		return; /* we'll run the (safe but slow) SMP code then ... */
   7.264 -
   7.265 -	smp->mod	= mod;
   7.266 -	smp->name	= name;
   7.267 -	smp->locks	= locks;
   7.268 -	smp->locks_end	= locks_end;
   7.269 -	smp->text	= text;
   7.270 -	smp->text_end	= text_end;
   7.271 -	DPRINTK("%s: locks %p -> %p, text %p -> %p, name %s\n",
   7.272 -		__FUNCTION__, smp->locks, smp->locks_end,
   7.273 -		smp->text, smp->text_end, smp->name);
   7.274 -
   7.275 -	spin_lock_irqsave(&smp_alt, flags);
   7.276 -	list_add_tail(&smp->next, &smp_alt_modules);
   7.277 -	if (boot_cpu_has(X86_FEATURE_UP))
   7.278 -		alternatives_smp_unlock(smp->locks, smp->locks_end,
   7.279 -					smp->text, smp->text_end);
   7.280 -	spin_unlock_irqrestore(&smp_alt, flags);
   7.281 -}
   7.282 -
   7.283 -void alternatives_smp_module_del(struct module *mod)
   7.284 -{
   7.285 -	struct smp_alt_module *item;
   7.286 -	unsigned long flags;
   7.287 -
   7.288 -	if (no_replacement || smp_alt_once)
   7.289 -		return;
   7.290 -
   7.291 -	spin_lock_irqsave(&smp_alt, flags);
   7.292 -	list_for_each_entry(item, &smp_alt_modules, next) {
   7.293 -		if (mod != item->mod)
   7.294 -			continue;
   7.295 -		list_del(&item->next);
   7.296 -		spin_unlock_irqrestore(&smp_alt, flags);
   7.297 -		DPRINTK("%s: %s\n", __FUNCTION__, item->name);
   7.298 -		kfree(item);
   7.299 -		return;
   7.300 -	}
   7.301 -	spin_unlock_irqrestore(&smp_alt, flags);
   7.302 -}
   7.303 -
   7.304 -void alternatives_smp_switch(int smp)
   7.305 -{
   7.306 -	struct smp_alt_module *mod;
   7.307 -	unsigned long flags;
   7.308 -
   7.309 -#ifdef CONFIG_LOCKDEP
   7.310 -	/*
   7.311 -	 * A not yet fixed binutils section handling bug prevents
   7.312 -	 * alternatives-replacement from working reliably, so turn
   7.313 -	 * it off:
   7.314 -	 */
   7.315 -	printk("lockdep: not fixing up alternatives.\n");
   7.316 -	return;
   7.317 -#endif
   7.318 -
   7.319 -	if (no_replacement || smp_alt_once)
   7.320 -		return;
   7.321 -	BUG_ON(!smp && (num_online_cpus() > 1));
   7.322 -
   7.323 -	spin_lock_irqsave(&smp_alt, flags);
   7.324 -	if (smp) {
   7.325 -		printk(KERN_INFO "SMP alternatives: switching to SMP code\n");
   7.326 -		clear_bit(X86_FEATURE_UP, boot_cpu_data.x86_capability);
   7.327 -		clear_bit(X86_FEATURE_UP, cpu_data[0].x86_capability);
   7.328 -		alternatives_smp_apply(__smp_alt_instructions,
   7.329 -				       __smp_alt_instructions_end);
   7.330 -		list_for_each_entry(mod, &smp_alt_modules, next)
   7.331 -			alternatives_smp_lock(mod->locks, mod->locks_end,
   7.332 -					      mod->text, mod->text_end);
   7.333 -	} else {
   7.334 -		printk(KERN_INFO "SMP alternatives: switching to UP code\n");
   7.335 -		set_bit(X86_FEATURE_UP, boot_cpu_data.x86_capability);
   7.336 -		set_bit(X86_FEATURE_UP, cpu_data[0].x86_capability);
   7.337 -		apply_alternatives(__smp_alt_instructions,
   7.338 -				   __smp_alt_instructions_end);
   7.339 -		list_for_each_entry(mod, &smp_alt_modules, next)
   7.340 -			alternatives_smp_unlock(mod->locks, mod->locks_end,
   7.341 -						mod->text, mod->text_end);
   7.342 -	}
   7.343 -	spin_unlock_irqrestore(&smp_alt, flags);
   7.344 -}
   7.345 -
   7.346 -#endif
   7.347 -
   7.348 -void __init alternative_instructions(void)
   7.349 -{
   7.350 -	if (no_replacement) {
   7.351 -		printk(KERN_INFO "(SMP-)alternatives turned off\n");
   7.352 -		free_init_pages("SMP alternatives",
   7.353 -				(unsigned long)__smp_alt_begin,
   7.354 -				(unsigned long)__smp_alt_end);
   7.355 -		return;
   7.356 -	}
   7.357 -	apply_alternatives(__alt_instructions, __alt_instructions_end);
   7.358 -
   7.359 -	/* switch to patch-once-at-boottime-only mode and free the
   7.360 -	 * tables in case we know the number of CPUs will never ever
   7.361 -	 * change */
   7.362 -#ifdef CONFIG_HOTPLUG_CPU
   7.363 -	if (num_possible_cpus() < 2)
   7.364 -		smp_alt_once = 1;
   7.365 -#else
   7.366 -	smp_alt_once = 1;
   7.367 -#endif
   7.368 -
   7.369 -#ifdef CONFIG_SMP
   7.370 -	if (smp_alt_once) {
   7.371 -		if (1 == num_possible_cpus()) {
   7.372 -			printk(KERN_INFO "SMP alternatives: switching to UP code\n");
   7.373 -			set_bit(X86_FEATURE_UP, boot_cpu_data.x86_capability);
   7.374 -			set_bit(X86_FEATURE_UP, cpu_data[0].x86_capability);
   7.375 -			apply_alternatives(__smp_alt_instructions,
   7.376 -					   __smp_alt_instructions_end);
   7.377 -			alternatives_smp_unlock(__smp_locks, __smp_locks_end,
   7.378 -						_text, _etext);
   7.379 -		}
   7.380 -		free_init_pages("SMP alternatives",
   7.381 -				(unsigned long)__smp_alt_begin,
   7.382 -				(unsigned long)__smp_alt_end);
   7.383 -	} else {
   7.384 -		alternatives_smp_save(__smp_alt_instructions,
   7.385 -				      __smp_alt_instructions_end);
   7.386 -		alternatives_smp_module_add(NULL, "core kernel",
   7.387 -					    __smp_locks, __smp_locks_end,
   7.388 -					    _text, _etext);
   7.389 -		alternatives_smp_switch(0);
   7.390 -	}
   7.391 -#endif
   7.392 -}
     8.1 --- a/linux-2.6-xen-sparse/arch/i386/kernel/cpu/intel_cacheinfo-xen.c	Mon Mar 05 12:49:12 2007 -0600
     8.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
     8.3 @@ -1,774 +0,0 @@
     8.4 -/*
     8.5 - *      Routines to indentify caches on Intel CPU.
     8.6 - *
     8.7 - *      Changes:
     8.8 - *      Venkatesh Pallipadi	: Adding cache identification through cpuid(4)
     8.9 - *		Ashok Raj <ashok.raj@intel.com>: Work with CPU hotplug infrastructure.
    8.10 - *	Andi Kleen		: CPUID4 emulation on AMD.
    8.11 - */
    8.12 -
    8.13 -#include <linux/init.h>
    8.14 -#include <linux/slab.h>
    8.15 -#include <linux/device.h>
    8.16 -#include <linux/compiler.h>
    8.17 -#include <linux/cpu.h>
    8.18 -#include <linux/sched.h>
    8.19 -
    8.20 -#include <asm/processor.h>
    8.21 -#include <asm/smp.h>
    8.22 -
    8.23 -#define LVL_1_INST	1
    8.24 -#define LVL_1_DATA	2
    8.25 -#define LVL_2		3
    8.26 -#define LVL_3		4
    8.27 -#define LVL_TRACE	5
    8.28 -
    8.29 -struct _cache_table
    8.30 -{
    8.31 -	unsigned char descriptor;
    8.32 -	char cache_type;
    8.33 -	short size;
    8.34 -};
    8.35 -
    8.36 -/* all the cache descriptor types we care about (no TLB or trace cache entries) */
    8.37 -static struct _cache_table cache_table[] __cpuinitdata =
    8.38 -{
    8.39 -	{ 0x06, LVL_1_INST, 8 },	/* 4-way set assoc, 32 byte line size */
    8.40 -	{ 0x08, LVL_1_INST, 16 },	/* 4-way set assoc, 32 byte line size */
    8.41 -	{ 0x0a, LVL_1_DATA, 8 },	/* 2 way set assoc, 32 byte line size */
    8.42 -	{ 0x0c, LVL_1_DATA, 16 },	/* 4-way set assoc, 32 byte line size */
    8.43 -	{ 0x22, LVL_3,      512 },	/* 4-way set assoc, sectored cache, 64 byte line size */
    8.44 -	{ 0x23, LVL_3,      1024 },	/* 8-way set assoc, sectored cache, 64 byte line size */
    8.45 -	{ 0x25, LVL_3,      2048 },	/* 8-way set assoc, sectored cache, 64 byte line size */
    8.46 -	{ 0x29, LVL_3,      4096 },	/* 8-way set assoc, sectored cache, 64 byte line size */
    8.47 -	{ 0x2c, LVL_1_DATA, 32 },	/* 8-way set assoc, 64 byte line size */
    8.48 -	{ 0x30, LVL_1_INST, 32 },	/* 8-way set assoc, 64 byte line size */
    8.49 -	{ 0x39, LVL_2,      128 },	/* 4-way set assoc, sectored cache, 64 byte line size */
    8.50 -	{ 0x3a, LVL_2,      192 },	/* 6-way set assoc, sectored cache, 64 byte line size */
    8.51 -	{ 0x3b, LVL_2,      128 },	/* 2-way set assoc, sectored cache, 64 byte line size */
    8.52 -	{ 0x3c, LVL_2,      256 },	/* 4-way set assoc, sectored cache, 64 byte line size */
    8.53 -	{ 0x3d, LVL_2,      384 },	/* 6-way set assoc, sectored cache, 64 byte line size */
    8.54 -	{ 0x3e, LVL_2,      512 },	/* 4-way set assoc, sectored cache, 64 byte line size */
    8.55 -	{ 0x41, LVL_2,      128 },	/* 4-way set assoc, 32 byte line size */
    8.56 -	{ 0x42, LVL_2,      256 },	/* 4-way set assoc, 32 byte line size */
    8.57 -	{ 0x43, LVL_2,      512 },	/* 4-way set assoc, 32 byte line size */
    8.58 -	{ 0x44, LVL_2,      1024 },	/* 4-way set assoc, 32 byte line size */
    8.59 -	{ 0x45, LVL_2,      2048 },	/* 4-way set assoc, 32 byte line size */
    8.60 -	{ 0x46, LVL_3,      4096 },	/* 4-way set assoc, 64 byte line size */
    8.61 -	{ 0x47, LVL_3,      8192 },	/* 8-way set assoc, 64 byte line size */
    8.62 -	{ 0x49, LVL_3,      4096 },	/* 16-way set assoc, 64 byte line size */
    8.63 -	{ 0x4a, LVL_3,      6144 },	/* 12-way set assoc, 64 byte line size */
    8.64 -	{ 0x4b, LVL_3,      8192 },	/* 16-way set assoc, 64 byte line size */
    8.65 -	{ 0x4c, LVL_3,     12288 },	/* 12-way set assoc, 64 byte line size */
    8.66 -	{ 0x4d, LVL_3,     16384 },	/* 16-way set assoc, 64 byte line size */
    8.67 -	{ 0x60, LVL_1_DATA, 16 },	/* 8-way set assoc, sectored cache, 64 byte line size */
    8.68 -	{ 0x66, LVL_1_DATA, 8 },	/* 4-way set assoc, sectored cache, 64 byte line size */
    8.69 -	{ 0x67, LVL_1_DATA, 16 },	/* 4-way set assoc, sectored cache, 64 byte line size */
    8.70 -	{ 0x68, LVL_1_DATA, 32 },	/* 4-way set assoc, sectored cache, 64 byte line size */
    8.71 -	{ 0x70, LVL_TRACE,  12 },	/* 8-way set assoc */
    8.72 -	{ 0x71, LVL_TRACE,  16 },	/* 8-way set assoc */
    8.73 -	{ 0x72, LVL_TRACE,  32 },	/* 8-way set assoc */
    8.74 -	{ 0x73, LVL_TRACE,  64 },	/* 8-way set assoc */
    8.75 -	{ 0x78, LVL_2,    1024 },	/* 4-way set assoc, 64 byte line size */
    8.76 -	{ 0x79, LVL_2,     128 },	/* 8-way set assoc, sectored cache, 64 byte line size */
    8.77 -	{ 0x7a, LVL_2,     256 },	/* 8-way set assoc, sectored cache, 64 byte line size */
    8.78 -	{ 0x7b, LVL_2,     512 },	/* 8-way set assoc, sectored cache, 64 byte line size */
    8.79 -	{ 0x7c, LVL_2,    1024 },	/* 8-way set assoc, sectored cache, 64 byte line size */
    8.80 -	{ 0x7d, LVL_2,    2048 },	/* 8-way set assoc, 64 byte line size */
    8.81 -	{ 0x7f, LVL_2,     512 },	/* 2-way set assoc, 64 byte line size */
    8.82 -	{ 0x82, LVL_2,     256 },	/* 8-way set assoc, 32 byte line size */
    8.83 -	{ 0x83, LVL_2,     512 },	/* 8-way set assoc, 32 byte line size */
    8.84 -	{ 0x84, LVL_2,    1024 },	/* 8-way set assoc, 32 byte line size */
    8.85 -	{ 0x85, LVL_2,    2048 },	/* 8-way set assoc, 32 byte line size */
    8.86 -	{ 0x86, LVL_2,     512 },	/* 4-way set assoc, 64 byte line size */
    8.87 -	{ 0x87, LVL_2,    1024 },	/* 8-way set assoc, 64 byte line size */
    8.88 -	{ 0x00, 0, 0}
    8.89 -};
    8.90 -
    8.91 -
    8.92 -enum _cache_type
    8.93 -{
    8.94 -	CACHE_TYPE_NULL	= 0,
    8.95 -	CACHE_TYPE_DATA = 1,
    8.96 -	CACHE_TYPE_INST = 2,
    8.97 -	CACHE_TYPE_UNIFIED = 3
    8.98 -};
    8.99 -
   8.100 -union _cpuid4_leaf_eax {
   8.101 -	struct {
   8.102 -		enum _cache_type	type:5;
   8.103 -		unsigned int		level:3;
   8.104 -		unsigned int		is_self_initializing:1;
   8.105 -		unsigned int		is_fully_associative:1;
   8.106 -		unsigned int		reserved:4;
   8.107 -		unsigned int		num_threads_sharing:12;
   8.108 -		unsigned int		num_cores_on_die:6;
   8.109 -	} split;
   8.110 -	u32 full;
   8.111 -};
   8.112 -
   8.113 -union _cpuid4_leaf_ebx {
   8.114 -	struct {
   8.115 -		unsigned int		coherency_line_size:12;
   8.116 -		unsigned int		physical_line_partition:10;
   8.117 -		unsigned int		ways_of_associativity:10;
   8.118 -	} split;
   8.119 -	u32 full;
   8.120 -};
   8.121 -
   8.122 -union _cpuid4_leaf_ecx {
   8.123 -	struct {
   8.124 -		unsigned int		number_of_sets:32;
   8.125 -	} split;
   8.126 -	u32 full;
   8.127 -};
   8.128 -
   8.129 -struct _cpuid4_info {
   8.130 -	union _cpuid4_leaf_eax eax;
   8.131 -	union _cpuid4_leaf_ebx ebx;
   8.132 -	union _cpuid4_leaf_ecx ecx;
   8.133 -	unsigned long size;
   8.134 -	cpumask_t shared_cpu_map;
   8.135 -};
   8.136 -
   8.137 -unsigned short			num_cache_leaves;
   8.138 -
   8.139 -/* AMD doesn't have CPUID4. Emulate it here to report the same
   8.140 -   information to the user.  This makes some assumptions about the machine:
   8.141 -   No L3, L2 not shared, no SMT etc. that is currently true on AMD CPUs.
   8.142 -
   8.143 -   In theory the TLBs could be reported as fake type (they are in "dummy").
   8.144 -   Maybe later */
   8.145 -union l1_cache {
   8.146 -	struct {
   8.147 -		unsigned line_size : 8;
   8.148 -		unsigned lines_per_tag : 8;
   8.149 -		unsigned assoc : 8;
   8.150 -		unsigned size_in_kb : 8;
   8.151 -	};
   8.152 -	unsigned val;
   8.153 -};
   8.154 -
   8.155 -union l2_cache {
   8.156 -	struct {
   8.157 -		unsigned line_size : 8;
   8.158 -		unsigned lines_per_tag : 4;
   8.159 -		unsigned assoc : 4;
   8.160 -		unsigned size_in_kb : 16;
   8.161 -	};
   8.162 -	unsigned val;
   8.163 -};
   8.164 -
   8.165 -static const unsigned short assocs[] = {
   8.166 -	[1] = 1, [2] = 2, [4] = 4, [6] = 8,
   8.167 -	[8] = 16,
   8.168 -	[0xf] = 0xffff // ??
   8.169 -	};
   8.170 -static const unsigned char levels[] = { 1, 1, 2 };
   8.171 -static const unsigned char types[] = { 1, 2, 3 };
   8.172 -
   8.173 -static void __cpuinit amd_cpuid4(int leaf, union _cpuid4_leaf_eax *eax,
   8.174 -		       union _cpuid4_leaf_ebx *ebx,
   8.175 -		       union _cpuid4_leaf_ecx *ecx)
   8.176 -{
   8.177 -	unsigned dummy;
   8.178 -	unsigned line_size, lines_per_tag, assoc, size_in_kb;
   8.179 -	union l1_cache l1i, l1d;
   8.180 -	union l2_cache l2;
   8.181 -
   8.182 -	eax->full = 0;
   8.183 -	ebx->full = 0;
   8.184 -	ecx->full = 0;
   8.185 -
   8.186 -	cpuid(0x80000005, &dummy, &dummy, &l1d.val, &l1i.val);
   8.187 -	cpuid(0x80000006, &dummy, &dummy, &l2.val, &dummy);
   8.188 -
   8.189 -	if (leaf > 2 || !l1d.val || !l1i.val || !l2.val)
   8.190 -		return;
   8.191 -
   8.192 -	eax->split.is_self_initializing = 1;
   8.193 -	eax->split.type = types[leaf];
   8.194 -	eax->split.level = levels[leaf];
   8.195 -	eax->split.num_threads_sharing = 0;
   8.196 -	eax->split.num_cores_on_die = current_cpu_data.x86_max_cores - 1;
   8.197 -
   8.198 -	if (leaf <= 1) {
   8.199 -		union l1_cache *l1 = leaf == 0 ? &l1d : &l1i;
   8.200 -		assoc = l1->assoc;
   8.201 -		line_size = l1->line_size;
   8.202 -		lines_per_tag = l1->lines_per_tag;
   8.203 -		size_in_kb = l1->size_in_kb;
   8.204 -	} else {
   8.205 -		assoc = l2.assoc;
   8.206 -		line_size = l2.line_size;
   8.207 -		lines_per_tag = l2.lines_per_tag;
   8.208 -		/* cpu_data has errata corrections for K7 applied */
   8.209 -		size_in_kb = current_cpu_data.x86_cache_size;
   8.210 -	}
   8.211 -
   8.212 -	if (assoc == 0xf)
   8.213 -		eax->split.is_fully_associative = 1;
   8.214 -	ebx->split.coherency_line_size = line_size - 1;
   8.215 -	ebx->split.ways_of_associativity = assocs[assoc] - 1;
   8.216 -	ebx->split.physical_line_partition = lines_per_tag - 1;
   8.217 -	ecx->split.number_of_sets = (size_in_kb * 1024) / line_size /
   8.218 -		(ebx->split.ways_of_associativity + 1) - 1;
   8.219 -}
   8.220 -
   8.221 -static int __cpuinit cpuid4_cache_lookup(int index, struct _cpuid4_info *this_leaf)
   8.222 -{
   8.223 -	union _cpuid4_leaf_eax 	eax;
   8.224 -	union _cpuid4_leaf_ebx 	ebx;
   8.225 -	union _cpuid4_leaf_ecx 	ecx;
   8.226 -	unsigned		edx;
   8.227 -
   8.228 -	if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD)
   8.229 -		amd_cpuid4(index, &eax, &ebx, &ecx);
   8.230 -	else
   8.231 -		cpuid_count(4, index, &eax.full, &ebx.full, &ecx.full,  &edx);
   8.232 -	if (eax.split.type == CACHE_TYPE_NULL)
   8.233 -		return -EIO; /* better error ? */
   8.234 -
   8.235 -	this_leaf->eax = eax;
   8.236 -	this_leaf->ebx = ebx;
   8.237 -	this_leaf->ecx = ecx;
   8.238 -	this_leaf->size = (ecx.split.number_of_sets + 1) *
   8.239 -		(ebx.split.coherency_line_size + 1) *
   8.240 -		(ebx.split.physical_line_partition + 1) *
   8.241 -		(ebx.split.ways_of_associativity + 1);
   8.242 -	return 0;
   8.243 -}
   8.244 -
   8.245 -/* will only be called once; __init is safe here */
   8.246 -static int __init find_num_cache_leaves(void)
   8.247 -{
   8.248 -	unsigned int		eax, ebx, ecx, edx;
   8.249 -	union _cpuid4_leaf_eax	cache_eax;
   8.250 -	int 			i = -1;
   8.251 -
   8.252 -	do {
   8.253 -		++i;
   8.254 -		/* Do cpuid(4) loop to find out num_cache_leaves */
   8.255 -		cpuid_count(4, i, &eax, &ebx, &ecx, &edx);
   8.256 -		cache_eax.full = eax;
   8.257 -	} while (cache_eax.split.type != CACHE_TYPE_NULL);
   8.258 -	return i;
   8.259 -}
   8.260 -
   8.261 -unsigned int __cpuinit init_intel_cacheinfo(struct cpuinfo_x86 *c)
   8.262 -{
   8.263 -	unsigned int trace = 0, l1i = 0, l1d = 0, l2 = 0, l3 = 0; /* Cache sizes */
   8.264 -	unsigned int new_l1d = 0, new_l1i = 0; /* Cache sizes from cpuid(4) */
   8.265 -	unsigned int new_l2 = 0, new_l3 = 0, i; /* Cache sizes from cpuid(4) */
   8.266 -	unsigned int l2_id = 0, l3_id = 0, num_threads_sharing, index_msb;
   8.267 -#ifdef CONFIG_X86_HT
   8.268 -	unsigned int cpu = (c == &boot_cpu_data) ? 0 : (c - cpu_data);
   8.269 -#endif
   8.270 -
   8.271 -	if (c->cpuid_level > 3) {
   8.272 -		static int is_initialized;
   8.273 -
   8.274 -		if (is_initialized == 0) {
   8.275 -			/* Init num_cache_leaves from boot CPU */
   8.276 -			num_cache_leaves = find_num_cache_leaves();
   8.277 -			is_initialized++;
   8.278 -		}
   8.279 -
   8.280 -		/*
   8.281 -		 * Whenever possible use cpuid(4), deterministic cache
   8.282 -		 * parameters cpuid leaf to find the cache details
   8.283 -		 */
   8.284 -		for (i = 0; i < num_cache_leaves; i++) {
   8.285 -			struct _cpuid4_info this_leaf;
   8.286 -
   8.287 -			int retval;
   8.288 -
   8.289 -			retval = cpuid4_cache_lookup(i, &this_leaf);
   8.290 -			if (retval >= 0) {
   8.291 -				switch(this_leaf.eax.split.level) {
   8.292 -				    case 1:
   8.293 -					if (this_leaf.eax.split.type ==
   8.294 -							CACHE_TYPE_DATA)
   8.295 -						new_l1d = this_leaf.size/1024;
   8.296 -					else if (this_leaf.eax.split.type ==
   8.297 -							CACHE_TYPE_INST)
   8.298 -						new_l1i = this_leaf.size/1024;
   8.299 -					break;
   8.300 -				    case 2:
   8.301 -					new_l2 = this_leaf.size/1024;
   8.302 -					num_threads_sharing = 1 + this_leaf.eax.split.num_threads_sharing;
   8.303 -					index_msb = get_count_order(num_threads_sharing);
   8.304 -					l2_id = c->apicid >> index_msb;
   8.305 -					break;
   8.306 -				    case 3:
   8.307 -					new_l3 = this_leaf.size/1024;
   8.308 -					num_threads_sharing = 1 + this_leaf.eax.split.num_threads_sharing;
   8.309 -					index_msb = get_count_order(num_threads_sharing);
   8.310 -					l3_id = c->apicid >> index_msb;
   8.311 -					break;
   8.312 -				    default:
   8.313 -					break;
   8.314 -				}
   8.315 -			}
   8.316 -		}
   8.317 -	}
   8.318 -	/*
   8.319 -	 * Don't use cpuid2 if cpuid4 is supported. For P4, we use cpuid2 for
   8.320 -	 * trace cache
   8.321 -	 */
   8.322 -	if ((num_cache_leaves == 0 || c->x86 == 15) && c->cpuid_level > 1) {
   8.323 -		/* supports eax=2  call */
   8.324 -		int i, j, n;
   8.325 -		int regs[4];
   8.326 -		unsigned char *dp = (unsigned char *)regs;
   8.327 -		int only_trace = 0;
   8.328 -
   8.329 -		if (num_cache_leaves != 0 && c->x86 == 15)
   8.330 -			only_trace = 1;
   8.331 -
   8.332 -		/* Number of times to iterate */
   8.333 -		n = cpuid_eax(2) & 0xFF;
   8.334 -
   8.335 -		for ( i = 0 ; i < n ; i++ ) {
   8.336 -			cpuid(2, &regs[0], &regs[1], &regs[2], &regs[3]);
   8.337 -
   8.338 -			/* If bit 31 is set, this is an unknown format */
   8.339 -			for ( j = 0 ; j < 3 ; j++ ) {
   8.340 -				if ( regs[j] < 0 ) regs[j] = 0;
   8.341 -			}
   8.342 -
   8.343 -			/* Byte 0 is level count, not a descriptor */
   8.344 -			for ( j = 1 ; j < 16 ; j++ ) {
   8.345 -				unsigned char des = dp[j];
   8.346 -				unsigned char k = 0;
   8.347 -
   8.348 -				/* look up this descriptor in the table */
   8.349 -				while (cache_table[k].descriptor != 0)
   8.350 -				{
   8.351 -					if (cache_table[k].descriptor == des) {
   8.352 -						if (only_trace && cache_table[k].cache_type != LVL_TRACE)
   8.353 -							break;
   8.354 -						switch (cache_table[k].cache_type) {
   8.355 -						case LVL_1_INST:
   8.356 -							l1i += cache_table[k].size;
   8.357 -							break;
   8.358 -						case LVL_1_DATA:
   8.359 -							l1d += cache_table[k].size;
   8.360 -							break;
   8.361 -						case LVL_2:
   8.362 -							l2 += cache_table[k].size;
   8.363 -							break;
   8.364 -						case LVL_3:
   8.365 -							l3 += cache_table[k].size;
   8.366 -							break;
   8.367 -						case LVL_TRACE:
   8.368 -							trace += cache_table[k].size;
   8.369 -							break;
   8.370 -						}
   8.371 -
   8.372 -						break;
   8.373 -					}
   8.374 -
   8.375 -					k++;
   8.376 -				}
   8.377 -			}
   8.378 -		}
   8.379 -	}
   8.380 -
   8.381 -	if (new_l1d)
   8.382 -		l1d = new_l1d;
   8.383 -
   8.384 -	if (new_l1i)
   8.385 -		l1i = new_l1i;
   8.386 -
   8.387 -	if (new_l2) {
   8.388 -		l2 = new_l2;
   8.389 -#ifdef CONFIG_X86_HT
   8.390 -		cpu_llc_id[cpu] = l2_id;
   8.391 -#endif
   8.392 -	}
   8.393 -
   8.394 -	if (new_l3) {
   8.395 -		l3 = new_l3;
   8.396 -#ifdef CONFIG_X86_HT
   8.397 -		cpu_llc_id[cpu] = l3_id;
   8.398 -#endif
   8.399 -	}
   8.400 -
   8.401 -	if (trace)
   8.402 -		printk (KERN_INFO "CPU: Trace cache: %dK uops", trace);
   8.403 -	else if ( l1i )
   8.404 -		printk (KERN_INFO "CPU: L1 I cache: %dK", l1i);
   8.405 -
   8.406 -	if (l1d)
   8.407 -		printk(", L1 D cache: %dK\n", l1d);
   8.408 -	else
   8.409 -		printk("\n");
   8.410 -
   8.411 -	if (l2)
   8.412 -		printk(KERN_INFO "CPU: L2 cache: %dK\n", l2);
   8.413 -
   8.414 -	if (l3)
   8.415 -		printk(KERN_INFO "CPU: L3 cache: %dK\n", l3);
   8.416 -
   8.417 -	c->x86_cache_size = l3 ? l3 : (l2 ? l2 : (l1i+l1d));
   8.418 -
   8.419 -	return l2;
   8.420 -}
   8.421 -
   8.422 -/* pointer to _cpuid4_info array (for each cache leaf) */
   8.423 -static struct _cpuid4_info *cpuid4_info[NR_CPUS];
   8.424 -#define CPUID4_INFO_IDX(x,y)    (&((cpuid4_info[x])[y]))
   8.425 -
   8.426 -#ifdef CONFIG_SMP
   8.427 -static void __cpuinit cache_shared_cpu_map_setup(unsigned int cpu, int index)
   8.428 -{
   8.429 -	struct _cpuid4_info	*this_leaf, *sibling_leaf;
   8.430 -	unsigned long num_threads_sharing;
   8.431 -	int index_msb, i;
   8.432 -	struct cpuinfo_x86 *c = cpu_data;
   8.433 -
   8.434 -	this_leaf = CPUID4_INFO_IDX(cpu, index);
   8.435 -	num_threads_sharing = 1 + this_leaf->eax.split.num_threads_sharing;
   8.436 -
   8.437 -	if (num_threads_sharing == 1)
   8.438 -		cpu_set(cpu, this_leaf->shared_cpu_map);
   8.439 -	else {
   8.440 -		index_msb = get_count_order(num_threads_sharing);
   8.441 -
   8.442 -		for_each_online_cpu(i) {
   8.443 -			if (c[i].apicid >> index_msb ==
   8.444 -			    c[cpu].apicid >> index_msb) {
   8.445 -				cpu_set(i, this_leaf->shared_cpu_map);
   8.446 -				if (i != cpu && cpuid4_info[i])  {
   8.447 -					sibling_leaf = CPUID4_INFO_IDX(i, index);
   8.448 -					cpu_set(cpu, sibling_leaf->shared_cpu_map);
   8.449 -				}
   8.450 -			}
   8.451 -		}
   8.452 -	}
   8.453 -}
   8.454 -static void __cpuinit cache_remove_shared_cpu_map(unsigned int cpu, int index)
   8.455 -{
   8.456 -	struct _cpuid4_info	*this_leaf, *sibling_leaf;
   8.457 -	int sibling;
   8.458 -
   8.459 -	this_leaf = CPUID4_INFO_IDX(cpu, index);
   8.460 -	for_each_cpu_mask(sibling, this_leaf->shared_cpu_map) {
   8.461 -		sibling_leaf = CPUID4_INFO_IDX(sibling, index);	
   8.462 -		cpu_clear(cpu, sibling_leaf->shared_cpu_map);
   8.463 -	}
   8.464 -}
   8.465 -#else
   8.466 -static void __init cache_shared_cpu_map_setup(unsigned int cpu, int index) {}
   8.467 -static void __init cache_remove_shared_cpu_map(unsigned int cpu, int index) {}
   8.468 -#endif
   8.469 -
   8.470 -static void free_cache_attributes(unsigned int cpu)
   8.471 -{
   8.472 -	kfree(cpuid4_info[cpu]);
   8.473 -	cpuid4_info[cpu] = NULL;
   8.474 -}
   8.475 -
   8.476 -static int __cpuinit detect_cache_attributes(unsigned int cpu)
   8.477 -{
   8.478 -	struct _cpuid4_info	*this_leaf;
   8.479 -	unsigned long 		j;
   8.480 -	int 			retval;
   8.481 -	cpumask_t		oldmask;
   8.482 -
   8.483 -	if (num_cache_leaves == 0)
   8.484 -		return -ENOENT;
   8.485 -
   8.486 -	cpuid4_info[cpu] = kmalloc(
   8.487 -	    sizeof(struct _cpuid4_info) * num_cache_leaves, GFP_KERNEL);
   8.488 -	if (unlikely(cpuid4_info[cpu] == NULL))
   8.489 -		return -ENOMEM;
   8.490 -	memset(cpuid4_info[cpu], 0,
   8.491 -	    sizeof(struct _cpuid4_info) * num_cache_leaves);
   8.492 -
   8.493 -	oldmask = current->cpus_allowed;
   8.494 -	retval = set_cpus_allowed(current, cpumask_of_cpu(cpu));
   8.495 -	if (retval)
   8.496 -		goto out;
   8.497 -
   8.498 -	/* Do cpuid and store the results */
   8.499 -	retval = 0;
   8.500 -	for (j = 0; j < num_cache_leaves; j++) {
   8.501 -		this_leaf = CPUID4_INFO_IDX(cpu, j);
   8.502 -		retval = cpuid4_cache_lookup(j, this_leaf);
   8.503 -		if (unlikely(retval < 0))
   8.504 -			break;
   8.505 -		cache_shared_cpu_map_setup(cpu, j);
   8.506 -	}
   8.507 -	set_cpus_allowed(current, oldmask);
   8.508 -
   8.509 -out:
   8.510 -	if (retval)
   8.511 -		free_cache_attributes(cpu);
   8.512 -	return retval;
   8.513 -}
   8.514 -
   8.515 -#ifdef CONFIG_SYSFS
   8.516 -
   8.517 -#include <linux/kobject.h>
   8.518 -#include <linux/sysfs.h>
   8.519 -
   8.520 -extern struct sysdev_class cpu_sysdev_class; /* from drivers/base/cpu.c */
   8.521 -
   8.522 -/* pointer to kobject for cpuX/cache */
   8.523 -static struct kobject * cache_kobject[NR_CPUS];
   8.524 -
   8.525 -struct _index_kobject {
   8.526 -	struct kobject kobj;
   8.527 -	unsigned int cpu;
   8.528 -	unsigned short index;
   8.529 -};
   8.530 -
   8.531 -/* pointer to array of kobjects for cpuX/cache/indexY */
   8.532 -static struct _index_kobject *index_kobject[NR_CPUS];
   8.533 -#define INDEX_KOBJECT_PTR(x,y)    (&((index_kobject[x])[y]))
   8.534 -
   8.535 -#define show_one_plus(file_name, object, val)				\
   8.536 -static ssize_t show_##file_name						\
   8.537 -			(struct _cpuid4_info *this_leaf, char *buf)	\
   8.538 -{									\
   8.539 -	return sprintf (buf, "%lu\n", (unsigned long)this_leaf->object + val); \
   8.540 -}
   8.541 -
   8.542 -show_one_plus(level, eax.split.level, 0);
   8.543 -show_one_plus(coherency_line_size, ebx.split.coherency_line_size, 1);
   8.544 -show_one_plus(physical_line_partition, ebx.split.physical_line_partition, 1);
   8.545 -show_one_plus(ways_of_associativity, ebx.split.ways_of_associativity, 1);
   8.546 -show_one_plus(number_of_sets, ecx.split.number_of_sets, 1);
   8.547 -
   8.548 -static ssize_t show_size(struct _cpuid4_info *this_leaf, char *buf)
   8.549 -{
   8.550 -	return sprintf (buf, "%luK\n", this_leaf->size / 1024);
   8.551 -}
   8.552 -
   8.553 -static ssize_t show_shared_cpu_map(struct _cpuid4_info *this_leaf, char *buf)
   8.554 -{
   8.555 -	char mask_str[NR_CPUS];
   8.556 -	cpumask_scnprintf(mask_str, NR_CPUS, this_leaf->shared_cpu_map);
   8.557 -	return sprintf(buf, "%s\n", mask_str);
   8.558 -}
   8.559 -
   8.560 -static ssize_t show_type(struct _cpuid4_info *this_leaf, char *buf) {
   8.561 -	switch(this_leaf->eax.split.type) {
   8.562 -	    case CACHE_TYPE_DATA:
   8.563 -		return sprintf(buf, "Data\n");
   8.564 -		break;
   8.565 -	    case CACHE_TYPE_INST:
   8.566 -		return sprintf(buf, "Instruction\n");
   8.567 -		break;
   8.568 -	    case CACHE_TYPE_UNIFIED:
   8.569 -		return sprintf(buf, "Unified\n");
   8.570 -		break;
   8.571 -	    default:
   8.572 -		return sprintf(buf, "Unknown\n");
   8.573 -		break;
   8.574 -	}
   8.575 -}
   8.576 -
   8.577 -struct _cache_attr {
   8.578 -	struct attribute attr;
   8.579 -	ssize_t (*show)(struct _cpuid4_info *, char *);
   8.580 -	ssize_t (*store)(struct _cpuid4_info *, const char *, size_t count);
   8.581 -};
   8.582 -
   8.583 -#define define_one_ro(_name) \
   8.584 -static struct _cache_attr _name = \
   8.585 -	__ATTR(_name, 0444, show_##_name, NULL)
   8.586 -
   8.587 -define_one_ro(level);
   8.588 -define_one_ro(type);
   8.589 -define_one_ro(coherency_line_size);
   8.590 -define_one_ro(physical_line_partition);
   8.591 -define_one_ro(ways_of_associativity);
   8.592 -define_one_ro(number_of_sets);
   8.593 -define_one_ro(size);
   8.594 -define_one_ro(shared_cpu_map);
   8.595 -
   8.596 -static struct attribute * default_attrs[] = {
   8.597 -	&type.attr,
   8.598 -	&level.attr,
   8.599 -	&coherency_line_size.attr,
   8.600 -	&physical_line_partition.attr,
   8.601 -	&ways_of_associativity.attr,
   8.602 -	&number_of_sets.attr,
   8.603 -	&size.attr,
   8.604 -	&shared_cpu_map.attr,
   8.605 -	NULL
   8.606 -};
   8.607 -
   8.608 -#define to_object(k) container_of(k, struct _index_kobject, kobj)
   8.609 -#define to_attr(a) container_of(a, struct _cache_attr, attr)
   8.610 -
   8.611 -static ssize_t show(struct kobject * kobj, struct attribute * attr, char * buf)
   8.612 -{
   8.613 -	struct _cache_attr *fattr = to_attr(attr);
   8.614 -	struct _index_kobject *this_leaf = to_object(kobj);
   8.615 -	ssize_t ret;
   8.616 -
   8.617 -	ret = fattr->show ?
   8.618 -		fattr->show(CPUID4_INFO_IDX(this_leaf->cpu, this_leaf->index),
   8.619 -			buf) :
   8.620 -	       	0;
   8.621 -	return ret;
   8.622 -}
   8.623 -
   8.624 -static ssize_t store(struct kobject * kobj, struct attribute * attr,
   8.625 -		     const char * buf, size_t count)
   8.626 -{
   8.627 -	return 0;
   8.628 -}
   8.629 -
   8.630 -static struct sysfs_ops sysfs_ops = {
   8.631 -	.show   = show,
   8.632 -	.store  = store,
   8.633 -};
   8.634 -
   8.635 -static struct kobj_type ktype_cache = {
   8.636 -	.sysfs_ops	= &sysfs_ops,
   8.637 -	.default_attrs	= default_attrs,
   8.638 -};
   8.639 -
   8.640 -static struct kobj_type ktype_percpu_entry = {
   8.641 -	.sysfs_ops	= &sysfs_ops,
   8.642 -};
   8.643 -
   8.644 -static void cpuid4_cache_sysfs_exit(unsigned int cpu)
   8.645 -{
   8.646 -	kfree(cache_kobject[cpu]);
   8.647 -	kfree(index_kobject[cpu]);
   8.648 -	cache_kobject[cpu] = NULL;
   8.649 -	index_kobject[cpu] = NULL;
   8.650 -	free_cache_attributes(cpu);
   8.651 -}
   8.652 -
   8.653 -static int __cpuinit cpuid4_cache_sysfs_init(unsigned int cpu)
   8.654 -{
   8.655 -
   8.656 -	if (num_cache_leaves == 0)
   8.657 -		return -ENOENT;
   8.658 -
   8.659 -	detect_cache_attributes(cpu);
   8.660 -	if (cpuid4_info[cpu] == NULL)
   8.661 -		return -ENOENT;
   8.662 -
   8.663 -	/* Allocate all required memory */
   8.664 -	cache_kobject[cpu] = kmalloc(sizeof(struct kobject), GFP_KERNEL);
   8.665 -	if (unlikely(cache_kobject[cpu] == NULL))
   8.666 -		goto err_out;
   8.667 -	memset(cache_kobject[cpu], 0, sizeof(struct kobject));
   8.668 -
   8.669 -	index_kobject[cpu] = kmalloc(
   8.670 -	    sizeof(struct _index_kobject ) * num_cache_leaves, GFP_KERNEL);
   8.671 -	if (unlikely(index_kobject[cpu] == NULL))
   8.672 -		goto err_out;
   8.673 -	memset(index_kobject[cpu], 0,
   8.674 -	    sizeof(struct _index_kobject) * num_cache_leaves);
   8.675 -
   8.676 -	return 0;
   8.677 -
   8.678 -err_out:
   8.679 -	cpuid4_cache_sysfs_exit(cpu);
   8.680 -	return -ENOMEM;
   8.681 -}
   8.682 -
   8.683 -/* Add/Remove cache interface for CPU device */
   8.684 -static int __cpuinit cache_add_dev(struct sys_device * sys_dev)
   8.685 -{
   8.686 -	unsigned int cpu = sys_dev->id;
   8.687 -	unsigned long i, j;
   8.688 -	struct _index_kobject *this_object;
   8.689 -	int retval = 0;
   8.690 -
   8.691 -	retval = cpuid4_cache_sysfs_init(cpu);
   8.692 -	if (unlikely(retval < 0))
   8.693 -		return retval;
   8.694 -
   8.695 -	cache_kobject[cpu]->parent = &sys_dev->kobj;
   8.696 -	kobject_set_name(cache_kobject[cpu], "%s", "cache");
   8.697 -	cache_kobject[cpu]->ktype = &ktype_percpu_entry;
   8.698 -	retval = kobject_register(cache_kobject[cpu]);
   8.699 -
   8.700 -	for (i = 0; i < num_cache_leaves; i++) {
   8.701 -		this_object = INDEX_KOBJECT_PTR(cpu,i);
   8.702 -		this_object->cpu = cpu;
   8.703 -		this_object->index = i;
   8.704 -		this_object->kobj.parent = cache_kobject[cpu];
   8.705 -		kobject_set_name(&(this_object->kobj), "index%1lu", i);
   8.706 -		this_object->kobj.ktype = &ktype_cache;
   8.707 -		retval = kobject_register(&(this_object->kobj));
   8.708 -		if (unlikely(retval)) {
   8.709 -			for (j = 0; j < i; j++) {
   8.710 -				kobject_unregister(
   8.711 -					&(INDEX_KOBJECT_PTR(cpu,j)->kobj));
   8.712 -			}
   8.713 -			kobject_unregister(cache_kobject[cpu]);
   8.714 -			cpuid4_cache_sysfs_exit(cpu);
   8.715 -			break;
   8.716 -		}
   8.717 -	}
   8.718 -	return retval;
   8.719 -}
   8.720 -
   8.721 -static void __cpuexit cache_remove_dev(struct sys_device * sys_dev)
   8.722 -{
   8.723 -	unsigned int cpu = sys_dev->id;
   8.724 -	unsigned long i;
   8.725 -
   8.726 -	for (i = 0; i < num_cache_leaves; i++) {
   8.727 -		cache_remove_shared_cpu_map(cpu, i);
   8.728 -		kobject_unregister(&(INDEX_KOBJECT_PTR(cpu,i)->kobj));
   8.729 -	}
   8.730 -	kobject_unregister(cache_kobject[cpu]);
   8.731 -	cpuid4_cache_sysfs_exit(cpu);
   8.732 -	return;
   8.733 -}
   8.734 -
   8.735 -static int __cpuinit cacheinfo_cpu_callback(struct notifier_block *nfb,
   8.736 -					unsigned long action, void *hcpu)
   8.737 -{
   8.738 -	unsigned int cpu = (unsigned long)hcpu;
   8.739 -	struct sys_device *sys_dev;
   8.740 -
   8.741 -	sys_dev = get_cpu_sysdev(cpu);
   8.742 -	switch (action) {
   8.743 -	case CPU_ONLINE:
   8.744 -		cache_add_dev(sys_dev);
   8.745 -		break;
   8.746 -	case CPU_DEAD:
   8.747 -		cache_remove_dev(sys_dev);
   8.748 -		break;
   8.749 -	}
   8.750 -	return NOTIFY_OK;
   8.751 -}
   8.752 -
   8.753 -static struct notifier_block __cpuinitdata cacheinfo_cpu_notifier =
   8.754 -{
   8.755 -    .notifier_call = cacheinfo_cpu_callback,
   8.756 -};
   8.757 -
   8.758 -static int __cpuinit cache_sysfs_init(void)
   8.759 -{
   8.760 -	int i;
   8.761 -
   8.762 -	if (num_cache_leaves == 0)
   8.763 -		return 0;
   8.764 -
   8.765 -	register_hotcpu_notifier(&cacheinfo_cpu_notifier);
   8.766 -
   8.767 -	for_each_online_cpu(i) {
   8.768 -		cacheinfo_cpu_callback(&cacheinfo_cpu_notifier, CPU_ONLINE,
   8.769 -			(void *)(long)i);
   8.770 -	}
   8.771 -
   8.772 -	return 0;
   8.773 -}
   8.774 -
   8.775 -device_initcall(cache_sysfs_init);
   8.776 -
   8.777 -#endif
     9.1 --- a/linux-2.6-xen-sparse/arch/i386/kernel/entry-xen.S	Mon Mar 05 12:49:12 2007 -0600
     9.2 +++ b/linux-2.6-xen-sparse/arch/i386/kernel/entry-xen.S	Thu Mar 08 14:39:52 2007 -0600
     9.3 @@ -368,6 +368,7 @@ sysexit_scrit:	/**** START OF SYSEXIT CR
     9.4  	movl ESI(%esp), %esi
     9.5  	sysexit
     9.6  14:	__DISABLE_INTERRUPTS
     9.7 +	TRACE_IRQS_OFF
     9.8  sysexit_ecrit:	/**** END OF SYSEXIT CRITICAL REGION ****/
     9.9  	push %esp
    9.10  	call evtchn_do_upcall
    9.11 @@ -427,11 +428,13 @@ restore_nocheck:
    9.12  restore_nocheck:
    9.13  	movl EFLAGS(%esp), %eax
    9.14  	testl $(VM_MASK|NMI_MASK), %eax
    9.15 +	CFI_REMEMBER_STATE
    9.16  	jnz hypervisor_iret
    9.17  	shr $9, %eax			# EAX[0] == IRET_EFLAGS.IF
    9.18  	GET_VCPU_INFO
    9.19  	andb evtchn_upcall_mask(%esi),%al
    9.20  	andb $1,%al			# EAX[0] == IRET_EFLAGS.IF & event_mask
    9.21 +	CFI_REMEMBER_STATE
    9.22  	jnz restore_all_enable_events	#        != 0 => enable event delivery
    9.23  #endif
    9.24  	TRACE_IRQS_IRET
    9.25 @@ -455,8 +458,8 @@ iret_exc:
    9.26  	.long 1b,iret_exc
    9.27  .previous
    9.28  
    9.29 +	CFI_RESTORE_STATE
    9.30  #ifndef CONFIG_XEN
    9.31 -	CFI_RESTORE_STATE
    9.32  ldt_ss:
    9.33  	larl OLDSS(%esp), %eax
    9.34  	jnz restore_nocheck
    9.35 @@ -485,14 +488,36 @@ 1:	iret
    9.36  	.align 4
    9.37  	.long 1b,iret_exc
    9.38  .previous
    9.39 -	CFI_ENDPROC
    9.40  #else
    9.41 +        ALIGN
    9.42 +restore_all_enable_events:
    9.43 +	TRACE_IRQS_ON
    9.44 +	__ENABLE_INTERRUPTS
    9.45 +scrit:	/**** START OF CRITICAL REGION ****/
    9.46 +	__TEST_PENDING
    9.47 +	jnz  14f			# process more events if necessary...
    9.48 +	RESTORE_REGS
    9.49 +	addl $4, %esp
    9.50 +	CFI_ADJUST_CFA_OFFSET -4
    9.51 +1:	iret
    9.52 +.section __ex_table,"a"
    9.53 +	.align 4
    9.54 +	.long 1b,iret_exc
    9.55 +.previous
    9.56 +14:	__DISABLE_INTERRUPTS
    9.57 +	TRACE_IRQS_OFF
    9.58 +	jmp  11f
    9.59 +ecrit:  /**** END OF CRITICAL REGION ****/
    9.60 +
    9.61 +	CFI_RESTORE_STATE
    9.62  hypervisor_iret:
    9.63  	andl $~NMI_MASK, EFLAGS(%esp)
    9.64  	RESTORE_REGS
    9.65  	addl $4, %esp
    9.66 +	CFI_ADJUST_CFA_OFFSET -4
    9.67  	jmp  hypercall_page + (__HYPERVISOR_iret * 32)
    9.68  #endif
    9.69 +	CFI_ENDPROC
    9.70  
    9.71  	# perform work that needs to be done immediately before resumption
    9.72  	ALIGN
    9.73 @@ -736,7 +761,9 @@ error_code:
    9.74  # critical region we know that the entire frame is present and correct
    9.75  # so we can simply throw away the new one.
    9.76  ENTRY(hypervisor_callback)
    9.77 +	RING0_INT_FRAME
    9.78  	pushl %eax
    9.79 +	CFI_ADJUST_CFA_OFFSET 4
    9.80  	SAVE_ALL
    9.81  	movl EIP(%esp),%eax
    9.82  	cmpl $scrit,%eax
    9.83 @@ -749,26 +776,13 @@ ENTRY(hypervisor_callback)
    9.84  	ja   11f
    9.85  	addl $OLDESP,%esp		# Remove eflags...ebx from stack frame.
    9.86  11:	push %esp
    9.87 +	CFI_ADJUST_CFA_OFFSET 4
    9.88  	call evtchn_do_upcall
    9.89  	add  $4,%esp
    9.90 +	CFI_ADJUST_CFA_OFFSET -4
    9.91  	jmp  ret_from_intr
    9.92 +	CFI_ENDPROC
    9.93  
    9.94 -        ALIGN
    9.95 -restore_all_enable_events:
    9.96 -	__ENABLE_INTERRUPTS
    9.97 -scrit:	/**** START OF CRITICAL REGION ****/
    9.98 -	__TEST_PENDING
    9.99 -	jnz  14f			# process more events if necessary...
   9.100 -	RESTORE_REGS
   9.101 -	addl $4, %esp
   9.102 -1:	iret
   9.103 -.section __ex_table,"a"
   9.104 -	.align 4
   9.105 -	.long 1b,iret_exc
   9.106 -.previous
   9.107 -14:	__DISABLE_INTERRUPTS
   9.108 -	jmp  11b
   9.109 -ecrit:  /**** END OF CRITICAL REGION ****/
   9.110  # [How we do the fixup]. We want to merge the current stack frame with the
   9.111  # just-interrupted frame. How we do this depends on where in the critical
   9.112  # region the interrupted handler was executing, and so how many saved
   9.113 @@ -835,6 +849,7 @@ 4:	mov 16(%esp),%gs
   9.114  	addl $16,%esp		# EAX != 0 => Category 2 (Bad IRET)
   9.115  	jmp iret_exc
   9.116  5:	addl $16,%esp		# EAX == 0 => Category 1 (Bad segment)
   9.117 +	RING0_INT_FRAME
   9.118  	pushl $0
   9.119  	SAVE_ALL
   9.120  	jmp ret_from_exception
   9.121 @@ -860,6 +875,7 @@ 9:	xorl %eax,%eax;		\
   9.122  	.long 4b,9b;		\
   9.123  .previous
   9.124  #endif
   9.125 +	CFI_ENDPROC
   9.126  
   9.127  ENTRY(coprocessor_error)
   9.128  	RING0_INT_FRAME
   9.129 @@ -1187,8 +1203,11 @@ ENDPROC(arch_unwind_init_running)
   9.130  #endif
   9.131  
   9.132  ENTRY(fixup_4gb_segment)
   9.133 +	RING0_EC_FRAME
   9.134  	pushl $do_fixup_4gb_segment
   9.135 +	CFI_ADJUST_CFA_OFFSET 4
   9.136  	jmp error_code
   9.137 +	CFI_ENDPROC
   9.138  
   9.139  .section .rodata,"a"
   9.140  .align 4
    10.1 --- a/linux-2.6-xen-sparse/arch/i386/kernel/head-xen.S	Mon Mar 05 12:49:12 2007 -0600
    10.2 +++ b/linux-2.6-xen-sparse/arch/i386/kernel/head-xen.S	Thu Mar 08 14:39:52 2007 -0600
    10.3 @@ -9,11 +9,10 @@
    10.4  #include <asm/cache.h>
    10.5  #include <asm/thread_info.h>
    10.6  #include <asm/asm-offsets.h>
    10.7 +#include <asm/dwarf2.h>
    10.8  #include <xen/interface/xen.h>
    10.9  #include <xen/interface/elfnote.h>
   10.10  
   10.11 -#define _PAGE_PRESENT 0x1
   10.12 -
   10.13  /*
   10.14   * References to members of the new_cpu_data structure.
   10.15   */
   10.16 @@ -65,14 +64,13 @@ ENTRY(startup_32)
   10.17  
   10.18  	pushl %eax		# fake return address
   10.19  	jmp start_kernel
   10.20 -L6:
   10.21 -	jmp L6			# main should never return here, but
   10.22 -				# just in case, we know what happens.
   10.23  
   10.24  #define HYPERCALL_PAGE_OFFSET 0x1000
   10.25  .org HYPERCALL_PAGE_OFFSET
   10.26  ENTRY(hypercall_page)
   10.27 +	CFI_STARTPROC
   10.28  .skip 0x1000
   10.29 +	CFI_ENDPROC
   10.30  
   10.31  /*
   10.32   * Real beginning of normal "text" segment
    11.1 --- a/linux-2.6-xen-sparse/arch/i386/kernel/setup-xen.c	Mon Mar 05 12:49:12 2007 -0600
    11.2 +++ b/linux-2.6-xen-sparse/arch/i386/kernel/setup-xen.c	Thu Mar 08 14:39:52 2007 -0600
    11.3 @@ -1847,6 +1847,9 @@ static __init int add_pcspkr(void)
    11.4  	struct platform_device *pd;
    11.5  	int ret;
    11.6  
    11.7 +	if (!is_initial_xendomain())
    11.8 +		return 0;
    11.9 +
   11.10  	pd = platform_device_alloc("pcspkr", -1);
   11.11  	if (!pd)
   11.12  		return -ENOMEM;
    12.1 --- a/linux-2.6-xen-sparse/arch/i386/kernel/time-xen.c	Mon Mar 05 12:49:12 2007 -0600
    12.2 +++ b/linux-2.6-xen-sparse/arch/i386/kernel/time-xen.c	Thu Mar 08 14:39:52 2007 -0600
    12.3 @@ -1022,16 +1022,21 @@ void halt(void)
    12.4  }
    12.5  EXPORT_SYMBOL(halt);
    12.6  
    12.7 -/* No locking required. We are only CPU running, and interrupts are off. */
    12.8 +/* No locking required. Interrupts are disabled on all CPUs. */
    12.9  void time_resume(void)
   12.10  {
   12.11 +	unsigned int cpu;
   12.12 +
   12.13  	init_cpu_khz();
   12.14  
   12.15 -	get_time_values_from_xen(0);
   12.16 +	for_each_online_cpu(cpu) {
   12.17 +		get_time_values_from_xen(cpu);
   12.18 +		per_cpu(processed_system_time, cpu) =
   12.19 +			per_cpu(shadow_time, 0).system_timestamp;
   12.20 +		init_missing_ticks_accounting(cpu);
   12.21 +	}
   12.22  
   12.23  	processed_system_time = per_cpu(shadow_time, 0).system_timestamp;
   12.24 -	per_cpu(processed_system_time, 0) = processed_system_time;
   12.25 -	init_missing_ticks_accounting(0);
   12.26  
   12.27  	update_wallclock();
   12.28  }
    13.1 --- a/linux-2.6-xen-sparse/arch/i386/mm/ioremap-xen.c	Mon Mar 05 12:49:12 2007 -0600
    13.2 +++ b/linux-2.6-xen-sparse/arch/i386/mm/ioremap-xen.c	Thu Mar 08 14:39:52 2007 -0600
    13.3 @@ -374,8 +374,6 @@ void iounmap(volatile void __iomem *addr
    13.4  }
    13.5  EXPORT_SYMBOL(iounmap);
    13.6  
    13.7 -#ifdef __i386__
    13.8 -
    13.9  void __init *bt_ioremap(unsigned long phys_addr, unsigned long size)
   13.10  {
   13.11  	unsigned long offset, last_addr;
   13.12 @@ -443,5 +441,3 @@ void __init bt_iounmap(void *addr, unsig
   13.13  		--nrpages;
   13.14  	}
   13.15  }
   13.16 -
   13.17 -#endif /* __i386__ */
    14.1 --- a/linux-2.6-xen-sparse/arch/i386/mm/pgtable-xen.c	Mon Mar 05 12:49:12 2007 -0600
    14.2 +++ b/linux-2.6-xen-sparse/arch/i386/mm/pgtable-xen.c	Thu Mar 08 14:39:52 2007 -0600
    14.3 @@ -573,64 +573,67 @@ void make_pages_writable(void *va, unsig
    14.4  	}
    14.5  }
    14.6  
    14.7 -static inline int pgd_walk_set_prot(struct page *page, pgprot_t flags)
    14.8 +static inline void pgd_walk_set_prot(struct page *page, pgprot_t flags)
    14.9  {
   14.10  	unsigned long pfn = page_to_pfn(page);
   14.11 -
   14.12 -	if (PageHighMem(page))
   14.13 -		return pgprot_val(flags) & _PAGE_RW
   14.14 -		       ? test_and_clear_bit(PG_pinned, &page->flags)
   14.15 -		       : !test_and_set_bit(PG_pinned, &page->flags);
   14.16 +	int rc;
   14.17  
   14.18 -	BUG_ON(HYPERVISOR_update_va_mapping(
   14.19 -		(unsigned long)__va(pfn << PAGE_SHIFT),
   14.20 -		pfn_pte(pfn, flags), 0));
   14.21 -
   14.22 -	return 0;
   14.23 +	if (PageHighMem(page)) {
   14.24 +		if (pgprot_val(flags) & _PAGE_RW)
   14.25 +			clear_bit(PG_pinned, &page->flags);
   14.26 +		else
   14.27 +			set_bit(PG_pinned, &page->flags);
   14.28 +	} else {
   14.29 +		rc = HYPERVISOR_update_va_mapping(
   14.30 +			(unsigned long)__va(pfn << PAGE_SHIFT),
   14.31 +			pfn_pte(pfn, flags), 0);
   14.32 +		if (rc)
   14.33 +			BUG();
   14.34 +	}
   14.35  }
   14.36  
   14.37 -static int pgd_walk(pgd_t *pgd_base, pgprot_t flags)
   14.38 +static void pgd_walk(pgd_t *pgd_base, pgprot_t flags)
   14.39  {
   14.40  	pgd_t *pgd = pgd_base;
   14.41  	pud_t *pud;
   14.42  	pmd_t *pmd;
   14.43 -	int    g, u, m, flush;
   14.44 +	int    g, u, m, rc;
   14.45  
   14.46  	if (xen_feature(XENFEAT_auto_translated_physmap))
   14.47  		return 0;
   14.48  
   14.49 -	for (g = 0, flush = 0; g < USER_PTRS_PER_PGD; g++, pgd++) {
   14.50 +	for (g = 0; g < USER_PTRS_PER_PGD; g++, pgd++) {
   14.51  		if (pgd_none(*pgd))
   14.52  			continue;
   14.53  		pud = pud_offset(pgd, 0);
   14.54  		if (PTRS_PER_PUD > 1) /* not folded */
   14.55 -			flush |= pgd_walk_set_prot(virt_to_page(pud),flags);
   14.56 +			pgd_walk_set_prot(virt_to_page(pud),flags);
   14.57  		for (u = 0; u < PTRS_PER_PUD; u++, pud++) {
   14.58  			if (pud_none(*pud))
   14.59  				continue;
   14.60  			pmd = pmd_offset(pud, 0);
   14.61  			if (PTRS_PER_PMD > 1) /* not folded */
   14.62 -				flush |= pgd_walk_set_prot(virt_to_page(pmd),flags);
   14.63 +				pgd_walk_set_prot(virt_to_page(pmd),flags);
   14.64  			for (m = 0; m < PTRS_PER_PMD; m++, pmd++) {
   14.65  				if (pmd_none(*pmd))
   14.66  					continue;
   14.67 -				flush |= pgd_walk_set_prot(pmd_page(*pmd),flags);
   14.68 +				pgd_walk_set_prot(pmd_page(*pmd),flags);
   14.69  			}
   14.70  		}
   14.71  	}
   14.72  
   14.73 -	BUG_ON(HYPERVISOR_update_va_mapping(
   14.74 +	rc = HYPERVISOR_update_va_mapping(
   14.75  		(unsigned long)pgd_base,
   14.76  		pfn_pte(virt_to_phys(pgd_base)>>PAGE_SHIFT, flags),
   14.77 -		UVMF_TLB_FLUSH));
   14.78 -
   14.79 -	return flush;
   14.80 +		UVMF_TLB_FLUSH);
   14.81 +	if (rc)
   14.82 +		BUG();
   14.83  }
   14.84  
   14.85  static void __pgd_pin(pgd_t *pgd)
   14.86  {
   14.87 -	if (pgd_walk(pgd, PAGE_KERNEL_RO))
   14.88 -		kmap_flush_unused();
   14.89 +	pgd_walk(pgd, PAGE_KERNEL_RO);
   14.90 +	kmap_flush_unused();
   14.91  	xen_pgd_pin(__pa(pgd));
   14.92  	set_bit(PG_pinned, &virt_to_page(pgd)->flags);
   14.93  }
   14.94 @@ -638,8 +641,7 @@ static void __pgd_pin(pgd_t *pgd)
   14.95  static void __pgd_unpin(pgd_t *pgd)
   14.96  {
   14.97  	xen_pgd_unpin(__pa(pgd));
   14.98 -	if (pgd_walk(pgd, PAGE_KERNEL))
   14.99 -		kmap_flush_unused();
  14.100 +	pgd_walk(pgd, PAGE_KERNEL);
  14.101  	clear_bit(PG_pinned, &virt_to_page(pgd)->flags);
  14.102  }
  14.103  
    15.1 --- a/linux-2.6-xen-sparse/arch/x86_64/Kconfig	Mon Mar 05 12:49:12 2007 -0600
    15.2 +++ b/linux-2.6-xen-sparse/arch/x86_64/Kconfig	Thu Mar 08 14:39:52 2007 -0600
    15.3 @@ -450,7 +450,7 @@ config CALGARY_IOMMU
    15.4  	bool "IBM Calgary IOMMU support"
    15.5  	default y
    15.6  	select SWIOTLB
    15.7 -	depends on PCI && EXPERIMENTAL
    15.8 +	depends on PCI && !X86_64_XEN && EXPERIMENTAL
    15.9  	help
   15.10  	  Support for hardware IOMMUs in IBM's xSeries x366 and x460
   15.11  	  systems. Needed to run systems with more than 3GB of memory
    16.1 --- a/linux-2.6-xen-sparse/arch/x86_64/kernel/Makefile	Mon Mar 05 12:49:12 2007 -0600
    16.2 +++ b/linux-2.6-xen-sparse/arch/x86_64/kernel/Makefile	Thu Mar 08 14:39:52 2007 -0600
    16.3 @@ -61,9 +61,7 @@ ifdef CONFIG_XEN
    16.4  time-y				+= ../../i386/kernel/time-xen.o
    16.5  pci-dma-y			+= ../../i386/kernel/pci-dma-xen.o
    16.6  microcode-$(subst m,y,$(CONFIG_MICROCODE))  := ../../i386/kernel/microcode-xen.o
    16.7 -intel_cacheinfo-y		:= ../../i386/kernel/cpu/intel_cacheinfo-xen.o
    16.8  quirks-y			:= ../../i386/kernel/quirks-xen.o
    16.9 -alternative-y			:= ../../i386/kernel/alternative-xen.o
   16.10  
   16.11  n-obj-xen := i8259.o reboot.o i8237.o smpboot.o trampoline.o
   16.12  
    17.1 --- a/linux-2.6-xen-sparse/arch/x86_64/kernel/entry-xen.S	Mon Mar 05 12:49:12 2007 -0600
    17.2 +++ b/linux-2.6-xen-sparse/arch/x86_64/kernel/entry-xen.S	Thu Mar 08 14:39:52 2007 -0600
    17.3 @@ -116,19 +116,21 @@ NMI_MASK = 0x80000000
    17.4  	CFI_ADJUST_CFA_OFFSET	-(6*8)
    17.5  	.endm
    17.6  
    17.7 -	.macro	CFI_DEFAULT_STACK start=1
    17.8 +	.macro	CFI_DEFAULT_STACK start=1,adj=0
    17.9  	.if \start
   17.10  	CFI_STARTPROC	simple
   17.11 -	CFI_DEF_CFA	rsp,SS+8
   17.12 +	CFI_DEF_CFA	rsp,SS+8-(\adj*ARGOFFSET)
   17.13  	.else
   17.14 -	CFI_DEF_CFA_OFFSET SS+8
   17.15 +	CFI_DEF_CFA_OFFSET SS+8-(\adj*ARGOFFSET)
   17.16  	.endif
   17.17 +	.if \adj == 0
   17.18  	CFI_REL_OFFSET	r15,R15
   17.19  	CFI_REL_OFFSET	r14,R14
   17.20  	CFI_REL_OFFSET	r13,R13
   17.21  	CFI_REL_OFFSET	r12,R12
   17.22  	CFI_REL_OFFSET	rbp,RBP
   17.23  	CFI_REL_OFFSET	rbx,RBX
   17.24 +	.endif
   17.25  	CFI_REL_OFFSET	r11,R11
   17.26  	CFI_REL_OFFSET	r10,R10
   17.27  	CFI_REL_OFFSET	r9,R9
   17.28 @@ -363,8 +365,8 @@ ENTRY(int_ret_from_sys_call)
   17.29  	CFI_REL_OFFSET	r9,R9-ARGOFFSET
   17.30  	CFI_REL_OFFSET	r10,R10-ARGOFFSET
   17.31  	CFI_REL_OFFSET	r11,R11-ARGOFFSET
   17.32 +        XEN_BLOCK_EVENTS(%rsi)
   17.33  	TRACE_IRQS_OFF
   17.34 -        XEN_BLOCK_EVENTS(%rsi)
   17.35  	testb $3,CS-ARGOFFSET(%rsp)
   17.36          jnz 1f
   17.37          /* Need to set the proper %ss (not NULL) for ring 3 iretq */
   17.38 @@ -534,7 +536,7 @@ END(stub_rt_sigreturn)
   17.39   */ 
   17.40  
   17.41  retint_check:
   17.42 -	CFI_DEFAULT_STACK
   17.43 +	CFI_DEFAULT_STACK adj=1
   17.44  	movl threadinfo_flags(%rcx),%edx
   17.45  	andl %edi,%edx
   17.46  	CFI_REMEMBER_STATE
   17.47 @@ -838,6 +840,7 @@ ENTRY(error_entry)
   17.48  	CFI_REL_OFFSET	r15,R15
   17.49  #if 0        
   17.50  	cmpl $__KERNEL_CS,CS(%rsp)
   17.51 +	CFI_REMEMBER_STATE
   17.52  	je  error_kernelspace
   17.53  #endif        
   17.54  error_call_handler:
   17.55 @@ -864,7 +867,7 @@ error_exit:
   17.56  	TRACE_IRQS_IRETQ
   17.57  	jmp   retint_restore_args
   17.58  
   17.59 -error_kernelspace:
   17.60 +#if 0
   17.61           /*
   17.62           * We need to re-write the logic here because we don't do iretq to 
   17.63           * to return to user mode. It's still possible that we get trap/fault
   17.64 @@ -872,7 +875,8 @@ error_kernelspace:
   17.65           * for example).
   17.66           *
   17.67           */           
   17.68 -#if 0
   17.69 +	CFI_RESTORE_STATE
   17.70 +error_kernelspace:
   17.71  	incl %ebx
   17.72         /* There are two places in the kernel that can potentially fault with
   17.73            usergs. Handle them here. The exception handlers after
   17.74 @@ -888,11 +892,13 @@ error_kernelspace:
   17.75  	cmpq $gs_change,RIP(%rsp)
   17.76          je   error_swapgs
   17.77  	jmp  error_sti
   17.78 -#endif        
   17.79 +#endif
   17.80 +	CFI_ENDPROC
   17.81  END(error_entry)
   17.82  	
   17.83  ENTRY(hypervisor_callback)
   17.84  	zeroentry do_hypervisor_callback
   17.85 +END(hypervisor_callback)
   17.86          
   17.87  /*
   17.88   * Copied from arch/xen/i386/kernel/entry.S
   17.89 @@ -909,48 +915,66 @@ ENTRY(hypervisor_callback)
   17.90  # existing activation in its critical region -- if so, we pop the current
   17.91  # activation and restart the handler using the previous one.
   17.92  ENTRY(do_hypervisor_callback)   # do_hypervisor_callback(struct *pt_regs)
   17.93 +	CFI_STARTPROC
   17.94  # Since we don't modify %rdi, evtchn_do_upall(struct *pt_regs) will
   17.95  # see the correct pointer to the pt_regs
   17.96  	movq %rdi, %rsp            # we don't return, adjust the stack frame
   17.97 -11:	movq %gs:pda_irqstackptr,%rax
   17.98 -	incl %gs:pda_irqcount
   17.99 -	cmovzq %rax,%rsp
  17.100 -	pushq %rdi
  17.101 +	CFI_ENDPROC
  17.102 +	CFI_DEFAULT_STACK
  17.103 +11:	incl %gs:pda_irqcount
  17.104 +	movq %rsp,%rbp
  17.105 +	CFI_DEF_CFA_REGISTER rbp
  17.106 +	cmovzq %gs:pda_irqstackptr,%rsp
  17.107 +	pushq %rbp			# backlink for old unwinder
  17.108  	call evtchn_do_upcall
  17.109  	popq %rsp
  17.110 +	CFI_DEF_CFA_REGISTER rsp
  17.111  	decl %gs:pda_irqcount
  17.112  	jmp  error_exit
  17.113 +	CFI_ENDPROC
  17.114 +END(do_hypervisor_callback)
  17.115  
  17.116  #ifdef CONFIG_X86_LOCAL_APIC
  17.117  KPROBE_ENTRY(nmi)
  17.118  	zeroentry do_nmi_callback
  17.119  ENTRY(do_nmi_callback)
  17.120 +	CFI_STARTPROC
  17.121          addq $8, %rsp
  17.122 +	CFI_ENDPROC
  17.123 +	CFI_DEFAULT_STACK
  17.124          call do_nmi
  17.125          orl  $NMI_MASK,EFLAGS(%rsp)
  17.126          RESTORE_REST
  17.127          XEN_BLOCK_EVENTS(%rsi)
  17.128 +	TRACE_IRQS_OFF
  17.129          GET_THREAD_INFO(%rcx)
  17.130          jmp  retint_restore_args
  17.131 +	CFI_ENDPROC
  17.132  	.previous .text
  17.133 +END(nmi)
  17.134  #endif
  17.135  
  17.136          ALIGN
  17.137  restore_all_enable_events:  
  17.138 +	CFI_DEFAULT_STACK adj=1
  17.139 +	TRACE_IRQS_ON
  17.140  	XEN_UNBLOCK_EVENTS(%rsi)        # %rsi is already set up...
  17.141  
  17.142  scrit:	/**** START OF CRITICAL REGION ****/
  17.143  	XEN_TEST_PENDING(%rsi)
  17.144 +	CFI_REMEMBER_STATE
  17.145  	jnz  14f			# process more events if necessary...
  17.146  	XEN_PUT_VCPU_INFO(%rsi)
  17.147          RESTORE_ARGS 0,8,0
  17.148          HYPERVISOR_IRET 0
  17.149          
  17.150 +	CFI_RESTORE_STATE
  17.151  14:	XEN_LOCKED_BLOCK_EVENTS(%rsi)
  17.152  	XEN_PUT_VCPU_INFO(%rsi)
  17.153  	SAVE_REST
  17.154          movq %rsp,%rdi                  # set the argument again
  17.155  	jmp  11b
  17.156 +	CFI_ENDPROC
  17.157  ecrit:  /**** END OF CRITICAL REGION ****/
  17.158  # At this point, unlike on x86-32, we don't do the fixup to simplify the 
  17.159  # code and the stack frame is more complex on x86-64.
  17.160 @@ -970,8 +994,12 @@ ecrit:  /**** END OF CRITICAL REGION ***
  17.161  # We distinguish between categories by comparing each saved segment register
  17.162  # with its current contents: any discrepancy means we in category 1.
  17.163  ENTRY(failsafe_callback)
  17.164 +	_frame (RIP-0x30)
  17.165 +	CFI_REL_OFFSET rcx, 0
  17.166 +	CFI_REL_OFFSET r11, 8
  17.167  	movw %ds,%cx
  17.168  	cmpw %cx,0x10(%rsp)
  17.169 +	CFI_REMEMBER_STATE
  17.170  	jne 1f
  17.171  	movw %es,%cx
  17.172  	cmpw %cx,0x18(%rsp)
  17.173 @@ -984,17 +1012,26 @@ ENTRY(failsafe_callback)
  17.174  	jne 1f
  17.175  	/* All segments match their saved values => Category 2 (Bad IRET). */
  17.176  	movq (%rsp),%rcx
  17.177 +	CFI_RESTORE rcx
  17.178  	movq 8(%rsp),%r11
  17.179 +	CFI_RESTORE r11
  17.180  	addq $0x30,%rsp
  17.181 +	CFI_ADJUST_CFA_OFFSET -0x30
  17.182  	movq $11,%rdi	/* SIGSEGV */
  17.183  	jmp do_exit			
  17.184 +	CFI_RESTORE_STATE
  17.185  1:	/* Segment mismatch => Category 1 (Bad segment). Retry the IRET. */
  17.186  	movq (%rsp),%rcx
  17.187 +	CFI_RESTORE rcx
  17.188  	movq 8(%rsp),%r11
  17.189 +	CFI_RESTORE r11
  17.190  	addq $0x30,%rsp
  17.191 +	CFI_ADJUST_CFA_OFFSET -0x30
  17.192  	pushq $0
  17.193 +	CFI_ADJUST_CFA_OFFSET 8
  17.194  	SAVE_ALL
  17.195  	jmp error_exit
  17.196 +	CFI_ENDPROC
  17.197  #if 0	      
  17.198          .section __ex_table,"a"
  17.199          .align 8
  17.200 @@ -1117,12 +1154,12 @@ END(device_not_available)
  17.201  
  17.202  	/* runs on exception stack */
  17.203  KPROBE_ENTRY(debug)
  17.204 - 	INTR_FRAME
  17.205 -/*	pushq $0
  17.206 +/* 	INTR_FRAME
  17.207 +	pushq $0
  17.208  	CFI_ADJUST_CFA_OFFSET 8	*/
  17.209  	zeroentry do_debug
  17.210 -/*	paranoid_exit */
  17.211 -	CFI_ENDPROC
  17.212 +/*	paranoidexit
  17.213 +	CFI_ENDPROC */
  17.214  END(debug)
  17.215  	.previous .text
  17.216  
  17.217 @@ -1144,12 +1181,12 @@ END(nmi)
  17.218  #endif        
  17.219  
  17.220  KPROBE_ENTRY(int3)
  17.221 - 	INTR_FRAME
  17.222 -/* 	pushq $0
  17.223 +/* 	INTR_FRAME
  17.224 + 	pushq $0
  17.225   	CFI_ADJUST_CFA_OFFSET 8 */
  17.226   	zeroentry do_int3
  17.227 -/* 	jmp paranoid_exit1 */
  17.228 - 	CFI_ENDPROC
  17.229 +/* 	jmp paranoid_exit1
  17.230 + 	CFI_ENDPROC */
  17.231  END(int3)
  17.232  	.previous .text
  17.233  
  17.234 @@ -1193,9 +1230,11 @@ END(segment_not_present)
  17.235  
  17.236  	/* runs on exception stack */
  17.237  ENTRY(stack_segment)
  17.238 -	XCPT_FRAME
  17.239 +/*	XCPT_FRAME
  17.240 +	paranoidentry do_stack_segment */
  17.241  	errorentry do_stack_segment
  17.242 -	CFI_ENDPROC
  17.243 +/*	jmp paranoid_exit1
  17.244 +	CFI_ENDPROC */
  17.245  END(stack_segment)
  17.246  
  17.247  KPROBE_ENTRY(general_protection)
    18.1 --- a/linux-2.6-xen-sparse/arch/x86_64/kernel/head-xen.S	Mon Mar 05 12:49:12 2007 -0600
    18.2 +++ b/linux-2.6-xen-sparse/arch/x86_64/kernel/head-xen.S	Thu Mar 08 14:39:52 2007 -0600
    18.3 @@ -22,11 +22,9 @@
    18.4  #include <asm/page.h>
    18.5  #include <asm/msr.h>
    18.6  #include <asm/cache.h>
    18.7 -
    18.8 +#include <asm/dwarf2.h>
    18.9  #include <xen/interface/elfnote.h>
   18.10  
   18.11 -#define _PAGE_PRESENT 0x1
   18.12 -
   18.13  	.section .bootstrap.text, "ax", @progbits
   18.14  	.code64
   18.15  #define VIRT_ENTRY_OFFSET 0x0
   18.16 @@ -42,6 +40,7 @@ ENTRY(_start)
   18.17  	/* rsi is pointer to startup info structure.
   18.18  	   pass it to C */
   18.19  	movq %rsi,%rdi
   18.20 +	pushq $0		# fake return address
   18.21  	jmp x86_64_start_kernel
   18.22  
   18.23  ENTRY(stext)
   18.24 @@ -82,7 +81,25 @@ NEXT_PAGE(level2_kernel_pgt)
   18.25  	.fill	512,8,0
   18.26  
   18.27  NEXT_PAGE(hypercall_page)
   18.28 -	.fill	512,8,0
   18.29 +	CFI_STARTPROC
   18.30 +	.rept 0x1000 / 0x20
   18.31 +	.skip 1 /* push %rcx */
   18.32 +	CFI_ADJUST_CFA_OFFSET	8
   18.33 +	CFI_REL_OFFSET	rcx,0
   18.34 +	.skip 2 /* push %r11 */
   18.35 +	CFI_ADJUST_CFA_OFFSET	8
   18.36 +	CFI_REL_OFFSET	rcx,0
   18.37 +	.skip 5 /* mov $#,%eax */
   18.38 +	.skip 2 /* syscall */
   18.39 +	.skip 2 /* pop %r11 */
   18.40 +	CFI_ADJUST_CFA_OFFSET -8
   18.41 +	CFI_RESTORE r11
   18.42 +	.skip 1 /* pop %rcx */
   18.43 +	CFI_ADJUST_CFA_OFFSET -8
   18.44 +	CFI_RESTORE rcx
   18.45 +	.align 0x20,0 /* ret */
   18.46 +	.endr
   18.47 +	CFI_ENDPROC
   18.48  
   18.49  #undef NEXT_PAGE
   18.50  
    19.1 --- a/linux-2.6-xen-sparse/arch/x86_64/kernel/init_task.c	Mon Mar 05 12:49:12 2007 -0600
    19.2 +++ b/linux-2.6-xen-sparse/arch/x86_64/kernel/init_task.c	Thu Mar 08 14:39:52 2007 -0600
    19.3 @@ -47,11 +47,11 @@ EXPORT_SYMBOL(init_task);
    19.4   * on exact cacheline boundaries, to eliminate cacheline ping-pong.
    19.5   */ 
    19.6  DEFINE_PER_CPU(struct tss_struct, init_tss) ____cacheline_internodealigned_in_smp = INIT_TSS;
    19.7 -#endif
    19.8  
    19.9  /* Copies of the original ist values from the tss are only accessed during
   19.10   * debugging, no special alignment required.
   19.11   */
   19.12  DEFINE_PER_CPU(struct orig_ist, orig_ist);
   19.13 +#endif
   19.14  
   19.15  #define ALIGN_TO_4K __attribute__((section(".data.init_task")))
    20.1 --- a/linux-2.6-xen-sparse/arch/x86_64/kernel/setup-xen.c	Mon Mar 05 12:49:12 2007 -0600
    20.2 +++ b/linux-2.6-xen-sparse/arch/x86_64/kernel/setup-xen.c	Thu Mar 08 14:39:52 2007 -0600
    20.3 @@ -676,7 +676,8 @@ void __init setup_arch(char **cmdline_p)
    20.4  
    20.5  	init_memory_mapping(0, (end_pfn_map << PAGE_SHIFT));
    20.6  
    20.7 -	/* dmi_scan_machine(); */
    20.8 +	if (is_initial_xendomain())
    20.9 +		dmi_scan_machine();
   20.10  
   20.11  #ifdef CONFIG_ACPI_NUMA
   20.12  	/*
   20.13 @@ -691,6 +692,14 @@ void __init setup_arch(char **cmdline_p)
   20.14  	contig_initmem_init(0, end_pfn);
   20.15  #endif
   20.16  
   20.17 +#ifdef CONFIG_XEN
   20.18 +	/*
   20.19 +	 * Reserve kernel, physmap, start info, initial page tables, and
   20.20 +	 * direct mapping.
   20.21 +	 */
   20.22 +	reserve_bootmem_generic(__pa_symbol(&_text),
   20.23 +	                        (table_end << PAGE_SHIFT) - __pa_symbol(&_text));
   20.24 +#else
   20.25  	/* Reserve direct mapping */
   20.26  	reserve_bootmem_generic(table_start << PAGE_SHIFT, 
   20.27  				(table_end - table_start) << PAGE_SHIFT);
   20.28 @@ -699,12 +708,6 @@ void __init setup_arch(char **cmdline_p)
   20.29  	reserve_bootmem_generic(__pa_symbol(&_text),
   20.30  				__pa_symbol(&_end) - __pa_symbol(&_text));
   20.31  
   20.32 -#ifdef CONFIG_XEN
   20.33 -	/* reserve physmap, start info and initial page tables */
   20.34 -	reserve_bootmem_generic(__pa_symbol(&_end),
   20.35 -				(table_start << PAGE_SHIFT) -
   20.36 -				__pa_symbol(&_end));
   20.37 -#else
   20.38  	/*
   20.39  	 * reserve physical page 0 - it's a special BIOS page on many boxes,
   20.40  	 * enabling clean reboots, SMP operation, laptop functions.
   20.41 @@ -1625,13 +1628,6 @@ struct seq_operations cpuinfo_op = {
   20.42  	.show =	show_cpuinfo,
   20.43  };
   20.44  
   20.45 -static int __init run_dmi_scan(void)
   20.46 -{
   20.47 -	dmi_scan_machine();
   20.48 -	return 0;
   20.49 -}
   20.50 -core_initcall(run_dmi_scan);
   20.51 -
   20.52  #if defined(CONFIG_INPUT_PCSPKR) || defined(CONFIG_INPUT_PCSPKR_MODULE)
   20.53  #include <linux/platform_device.h>
   20.54  static __init int add_pcspkr(void)
   20.55 @@ -1639,6 +1635,9 @@ static __init int add_pcspkr(void)
   20.56  	struct platform_device *pd;
   20.57  	int ret;
   20.58  
   20.59 +	if (!is_initial_xendomain())
   20.60 +		return 0;
   20.61 +
   20.62  	pd = platform_device_alloc("pcspkr", -1);
   20.63  	if (!pd)
   20.64  		return -ENOMEM;
    21.1 --- a/linux-2.6-xen-sparse/arch/x86_64/kernel/x8664_ksyms-xen.c	Mon Mar 05 12:49:12 2007 -0600
    21.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    21.3 @@ -1,59 +0,0 @@
    21.4 -/* Exports for assembly files.
    21.5 -   All C exports should go in the respective C files. */
    21.6 -
    21.7 -#include <linux/module.h>
    21.8 -#include <linux/smp.h>
    21.9 -
   21.10 -#include <asm/semaphore.h>
   21.11 -#include <asm/processor.h>
   21.12 -#include <asm/uaccess.h>
   21.13 -#include <asm/pgtable.h>
   21.14 -
   21.15 -EXPORT_SYMBOL(kernel_thread);
   21.16 -
   21.17 -EXPORT_SYMBOL(__down_failed);
   21.18 -EXPORT_SYMBOL(__down_failed_interruptible);
   21.19 -EXPORT_SYMBOL(__down_failed_trylock);
   21.20 -EXPORT_SYMBOL(__up_wakeup);
   21.21 -
   21.22 -EXPORT_SYMBOL(__get_user_1);
   21.23 -EXPORT_SYMBOL(__get_user_2);
   21.24 -EXPORT_SYMBOL(__get_user_4);
   21.25 -EXPORT_SYMBOL(__get_user_8);
   21.26 -EXPORT_SYMBOL(__put_user_1);
   21.27 -EXPORT_SYMBOL(__put_user_2);
   21.28 -EXPORT_SYMBOL(__put_user_4);
   21.29 -EXPORT_SYMBOL(__put_user_8);
   21.30 -
   21.31 -EXPORT_SYMBOL(copy_user_generic);
   21.32 -EXPORT_SYMBOL(copy_from_user);
   21.33 -EXPORT_SYMBOL(copy_to_user);
   21.34 -
   21.35 -EXPORT_SYMBOL(copy_page);
   21.36 -EXPORT_SYMBOL(clear_page);
   21.37 -
   21.38 -#ifdef CONFIG_SMP
   21.39 -extern void FASTCALL( __write_lock_failed(rwlock_t *rw));
   21.40 -extern void FASTCALL( __read_lock_failed(rwlock_t *rw));
   21.41 -EXPORT_SYMBOL(__write_lock_failed);
   21.42 -EXPORT_SYMBOL(__read_lock_failed);
   21.43 -#endif
   21.44 -
   21.45 -/* Export string functions. We normally rely on gcc builtin for most of these,
   21.46 -   but gcc sometimes decides not to inline them. */    
   21.47 -#undef memcpy
   21.48 -#undef memset
   21.49 -#undef memmove
   21.50 -
   21.51 -extern void * memset(void *,int,__kernel_size_t);
   21.52 -extern void * memcpy(void *,const void *,__kernel_size_t);
   21.53 -extern void * __memcpy(void *,const void *,__kernel_size_t);
   21.54 -
   21.55 -EXPORT_SYMBOL(memset);
   21.56 -EXPORT_SYMBOL(memcpy);
   21.57 -EXPORT_SYMBOL(__memcpy);
   21.58 -
   21.59 -EXPORT_SYMBOL(empty_zero_page);
   21.60 -
   21.61 -EXPORT_SYMBOL(load_gs_index);
   21.62 -
    22.1 --- a/linux-2.6-xen-sparse/arch/x86_64/mm/init-xen.c	Mon Mar 05 12:49:12 2007 -0600
    22.2 +++ b/linux-2.6-xen-sparse/arch/x86_64/mm/init-xen.c	Thu Mar 08 14:39:52 2007 -0600
    22.3 @@ -102,14 +102,11 @@ static void __meminit early_make_page_re
    22.4  		BUG();
    22.5  }
    22.6  
    22.7 -void make_page_readonly(void *va, unsigned int feature)
    22.8 +static void __make_page_readonly(void *va)
    22.9  {
   22.10  	pgd_t *pgd; pud_t *pud; pmd_t *pmd; pte_t pte, *ptep;
   22.11  	unsigned long addr = (unsigned long) va;
   22.12  
   22.13 -	if (xen_feature(feature))
   22.14 -		return;
   22.15 -
   22.16  	pgd = pgd_offset_k(addr);
   22.17  	pud = pud_offset(pgd, addr);
   22.18  	pmd = pmd_offset(pud, addr);
   22.19 @@ -120,17 +117,14 @@ void make_page_readonly(void *va, unsign
   22.20  		xen_l1_entry_update(ptep, pte); /* fallback */
   22.21  
   22.22  	if ((addr >= VMALLOC_START) && (addr < VMALLOC_END))
   22.23 -		make_page_readonly(__va(pte_pfn(pte) << PAGE_SHIFT), feature);
   22.24 +		__make_page_readonly(__va(pte_pfn(pte) << PAGE_SHIFT));
   22.25  }
   22.26  
   22.27 -void make_page_writable(void *va, unsigned int feature)
   22.28 +static void __make_page_writable(void *va)
   22.29  {
   22.30  	pgd_t *pgd; pud_t *pud; pmd_t *pmd; pte_t pte, *ptep;
   22.31  	unsigned long addr = (unsigned long) va;
   22.32  
   22.33 -	if (xen_feature(feature))
   22.34 -		return;
   22.35 -
   22.36  	pgd = pgd_offset_k(addr);
   22.37  	pud = pud_offset(pgd, addr);
   22.38  	pmd = pmd_offset(pud, addr);
   22.39 @@ -141,7 +135,19 @@ void make_page_writable(void *va, unsign
   22.40  		xen_l1_entry_update(ptep, pte); /* fallback */
   22.41  
   22.42  	if ((addr >= VMALLOC_START) && (addr < VMALLOC_END))
   22.43 -		make_page_writable(__va(pte_pfn(pte) << PAGE_SHIFT), feature);
   22.44 +		__make_page_writable(__va(pte_pfn(pte) << PAGE_SHIFT));
   22.45 +}
   22.46 +
   22.47 +void make_page_readonly(void *va, unsigned int feature)
   22.48 +{
   22.49 +	if (!xen_feature(feature))
   22.50 +		__make_page_readonly(va);
   22.51 +}
   22.52 +
   22.53 +void make_page_writable(void *va, unsigned int feature)
   22.54 +{
   22.55 +	if (!xen_feature(feature))
   22.56 +		__make_page_writable(va);
   22.57  }
   22.58  
   22.59  void make_pages_readonly(void *va, unsigned nr, unsigned int feature)
   22.60 @@ -150,7 +156,7 @@ void make_pages_readonly(void *va, unsig
   22.61  		return;
   22.62  
   22.63  	while (nr-- != 0) {
   22.64 -		make_page_readonly(va, feature);
   22.65 +		__make_page_readonly(va);
   22.66  		va = (void*)((unsigned long)va + PAGE_SIZE);
   22.67  	}
   22.68  }
   22.69 @@ -161,7 +167,7 @@ void make_pages_writable(void *va, unsig
   22.70  		return;
   22.71  
   22.72  	while (nr-- != 0) {
   22.73 -		make_page_writable(va, feature);
   22.74 +		__make_page_writable(va);
   22.75  		va = (void*)((unsigned long)va + PAGE_SIZE);
   22.76  	}
   22.77  }
   22.78 @@ -208,7 +214,11 @@ static __init void *spp_getpage(void)
   22.79  	void *ptr;
   22.80  	if (after_bootmem)
   22.81  		ptr = (void *) get_zeroed_page(GFP_ATOMIC); 
   22.82 -	else
   22.83 +	else if (start_pfn < table_end) {
   22.84 +		ptr = __va(start_pfn << PAGE_SHIFT);
   22.85 +		start_pfn++;
   22.86 +		memset(ptr, 0, PAGE_SIZE);
   22.87 +	} else
   22.88  		ptr = alloc_bootmem_pages(PAGE_SIZE);
   22.89  	if (!ptr || ((unsigned long)ptr & ~PAGE_MASK))
   22.90  		panic("set_pte_phys: cannot allocate page data %s\n", after_bootmem?"after bootmem":"");
   22.91 @@ -299,36 +309,26 @@ static __init void set_pte_phys_ma(unsig
   22.92  
   22.93  		pmd = (pmd_t *) spp_getpage(); 
   22.94  		make_page_readonly(pmd, XENFEAT_writable_page_tables);
   22.95 -
   22.96  		set_pud(pud, __pud(__pa(pmd) | _KERNPG_TABLE | _PAGE_USER));
   22.97 -
   22.98  		if (pmd != pmd_offset(pud, 0)) {
   22.99  			printk("PAGETABLE BUG #01! %p <-> %p\n", pmd, pmd_offset(pud,0));
  22.100  			return;
  22.101  		}
  22.102  	}
  22.103  	pmd = pmd_offset(pud, vaddr);
  22.104 -
  22.105  	if (pmd_none(*pmd)) {
  22.106  		pte = (pte_t *) spp_getpage();
  22.107  		make_page_readonly(pte, XENFEAT_writable_page_tables);
  22.108 -
  22.109  		set_pmd(pmd, __pmd(__pa(pte) | _KERNPG_TABLE | _PAGE_USER));
  22.110  		if (pte != pte_offset_kernel(pmd, 0)) {
  22.111  			printk("PAGETABLE BUG #02!\n");
  22.112  			return;
  22.113  		}
  22.114  	}
  22.115 -
  22.116  	new_pte = pfn_pte_ma(phys >> PAGE_SHIFT, prot);
  22.117 -	pte = pte_offset_kernel(pmd, vaddr);
  22.118  
  22.119 -	/* 
  22.120 -	 * Note that the pte page is already RO, thus we want to use
  22.121 -	 * xen_l1_entry_update(), not set_pte().
  22.122 -	 */
  22.123 -	xen_l1_entry_update(pte, 
  22.124 -			    pfn_pte_ma(phys >> PAGE_SHIFT, prot));
  22.125 +	pte = pte_offset_kernel(pmd, vaddr);
  22.126 +	set_pte(pte, new_pte);
  22.127  
  22.128  	/*
  22.129  	 * It's enough to flush this one mapping.
  22.130 @@ -404,6 +404,7 @@ static inline void __set_pte(pte_t *dst,
  22.131  
  22.132  static inline int make_readonly(unsigned long paddr)
  22.133  {
  22.134 +	extern char __vsyscall_0;
  22.135  	int readonly = 0;
  22.136  
  22.137  	/* Make new page tables read-only. */
  22.138 @@ -420,25 +421,45 @@ static inline int make_readonly(unsigned
  22.139  	/*
  22.140  	 * No need for writable mapping of kernel image. This also ensures that
  22.141  	 * page and descriptor tables embedded inside don't have writable
  22.142 -	 * mappings. 
  22.143 +	 * mappings. Exclude the vsyscall area here, allowing alternative
  22.144 +	 * instruction patching to work.
  22.145  	 */
  22.146 -	if ((paddr >= __pa_symbol(&_text)) && (paddr < __pa_symbol(&_end)))
  22.147 +	if ((paddr >= __pa_symbol(&_text)) && (paddr < __pa_symbol(&_end))
  22.148 +	    && !(paddr >= __pa_symbol(&__vsyscall_0)
  22.149 +	         && paddr < __pa_symbol(&__vsyscall_0) + PAGE_SIZE))
  22.150  		readonly = 1;
  22.151  
  22.152  	return readonly;
  22.153  }
  22.154  
  22.155 +#ifndef CONFIG_XEN
  22.156  /* Must run before zap_low_mappings */
  22.157  __init void *early_ioremap(unsigned long addr, unsigned long size)
  22.158  {
  22.159 -	return ioremap(addr, size);
  22.160 +	unsigned long map = round_down(addr, LARGE_PAGE_SIZE);
  22.161 +
  22.162 +	/* actually usually some more */
  22.163 +	if (size >= LARGE_PAGE_SIZE) {
  22.164 +		printk("SMBIOS area too long %lu\n", size);
  22.165 +		return NULL;
  22.166 +	}
  22.167 +	set_pmd(temp_mappings[0].pmd,  __pmd(map | _KERNPG_TABLE | _PAGE_PSE));
  22.168 +	map += LARGE_PAGE_SIZE;
  22.169 +	set_pmd(temp_mappings[1].pmd,  __pmd(map | _KERNPG_TABLE | _PAGE_PSE));
  22.170 +	__flush_tlb();
  22.171 +	return temp_mappings[0].address + (addr & (LARGE_PAGE_SIZE-1));
  22.172  }
  22.173  
  22.174  /* To avoid virtual aliases later */
  22.175  __init void early_iounmap(void *addr, unsigned long size)
  22.176  {
  22.177 -	iounmap(addr);
  22.178 +	if ((void *)round_down((unsigned long)addr, LARGE_PAGE_SIZE) != temp_mappings[0].address)
  22.179 +		printk("early_iounmap: bad address %p\n", addr);
  22.180 +	set_pmd(temp_mappings[0].pmd, __pmd(0));
  22.181 +	set_pmd(temp_mappings[1].pmd, __pmd(0));
  22.182 +	__flush_tlb();
  22.183  }
  22.184 +#endif
  22.185  
  22.186  static void __meminit
  22.187  phys_pmd_init(pmd_t *pmd, unsigned long address, unsigned long end)
  22.188 @@ -638,7 +659,7 @@ static void __init extend_init_mapping(u
  22.189  
  22.190  static void __init find_early_table_space(unsigned long end)
  22.191  {
  22.192 -	unsigned long puds, pmds, ptes, tables; 
  22.193 +	unsigned long puds, pmds, ptes, tables;
  22.194  
  22.195  	puds = (end + PUD_SIZE - 1) >> PUD_SHIFT;
  22.196  	pmds = (end + PMD_SIZE - 1) >> PMD_SHIFT;
  22.197 @@ -658,12 +679,70 @@ static void __init find_early_table_spac
  22.198  		(table_start << PAGE_SHIFT) + tables);
  22.199  }
  22.200  
  22.201 +static void xen_finish_init_mapping(void)
  22.202 +{
  22.203 +	unsigned long i, start, end;
  22.204 +
  22.205 +	/* Re-vector virtual addresses pointing into the initial
  22.206 +	   mapping to the just-established permanent ones. */
  22.207 +	xen_start_info = __va(__pa(xen_start_info));
  22.208 +	xen_start_info->pt_base = (unsigned long)
  22.209 +		__va(__pa(xen_start_info->pt_base));
  22.210 +	if (!xen_feature(XENFEAT_auto_translated_physmap)) {
  22.211 +		phys_to_machine_mapping =
  22.212 +			__va(__pa(xen_start_info->mfn_list));
  22.213 +		xen_start_info->mfn_list = (unsigned long)
  22.214 +			phys_to_machine_mapping;
  22.215 +	}
  22.216 +	if (xen_start_info->mod_start)
  22.217 +		xen_start_info->mod_start = (unsigned long)
  22.218 +			__va(__pa(xen_start_info->mod_start));
  22.219 +
  22.220 +	/* Destroy the Xen-created mappings beyond the kernel image as
  22.221 +	 * well as the temporary mappings created above. Prevents
  22.222 +	 * overlap with modules area (if init mapping is very big).
  22.223 +	 */
  22.224 +	start = PAGE_ALIGN((unsigned long)_end);
  22.225 +	end   = __START_KERNEL_map + (table_end << PAGE_SHIFT);
  22.226 +	for (; start < end; start += PAGE_SIZE)
  22.227 +		WARN_ON(HYPERVISOR_update_va_mapping(
  22.228 +			start, __pte_ma(0), 0));
  22.229 +
  22.230 +	/* Allocate pte's for initial fixmaps from 'start_pfn' allocator. */
  22.231 +	table_end = ~0UL;
  22.232 +
  22.233 +	/*
  22.234 +	 * Prefetch pte's for the bt_ioremap() area. It gets used before the
  22.235 +	 * boot-time allocator is online, so allocate-on-demand would fail.
  22.236 +	 */
  22.237 +	for (i = FIX_BTMAP_END; i <= FIX_BTMAP_BEGIN; i++)
  22.238 +		__set_fixmap(i, 0, __pgprot(0));
  22.239 +
  22.240 +	/* Switch to the real shared_info page, and clear the dummy page. */
  22.241 +	set_fixmap(FIX_SHARED_INFO, xen_start_info->shared_info);
  22.242 +	HYPERVISOR_shared_info = (shared_info_t *)fix_to_virt(FIX_SHARED_INFO);
  22.243 +	memset(empty_zero_page, 0, sizeof(empty_zero_page));
  22.244 +
  22.245 +	/* Set up mapping of lowest 1MB of physical memory. */
  22.246 +	for (i = 0; i < NR_FIX_ISAMAPS; i++)
  22.247 +		if (is_initial_xendomain())
  22.248 +			set_fixmap(FIX_ISAMAP_BEGIN - i, i * PAGE_SIZE);
  22.249 +		else
  22.250 +			__set_fixmap(FIX_ISAMAP_BEGIN - i,
  22.251 +				     virt_to_mfn(empty_zero_page)
  22.252 +				     << PAGE_SHIFT,
  22.253 +				     PAGE_KERNEL_RO);
  22.254 +
  22.255 +	/* Disable the 'start_pfn' allocator. */
  22.256 +	table_end = start_pfn;
  22.257 +}
  22.258 +
  22.259  /* Setup the direct mapping of the physical memory at PAGE_OFFSET.
  22.260     This runs before bootmem is initialized and gets pages directly from the 
  22.261     physical memory. To access them they are temporarily mapped. */
  22.262  void __meminit init_memory_mapping(unsigned long start, unsigned long end)
  22.263  { 
  22.264 -	unsigned long next; 
  22.265 +	unsigned long next;
  22.266  
  22.267  	Dprintk("init_memory_mapping\n");
  22.268  
  22.269 @@ -702,31 +781,7 @@ void __meminit init_memory_mapping(unsig
  22.270  
  22.271  	if (!after_bootmem) {
  22.272  		BUG_ON(start_pfn != table_end);
  22.273 -
  22.274 -		/* Re-vector virtual addresses pointing into the initial
  22.275 -		   mapping to the just-established permanent ones. */
  22.276 -		xen_start_info = __va(__pa(xen_start_info));
  22.277 -		xen_start_info->pt_base = (unsigned long)
  22.278 -			__va(__pa(xen_start_info->pt_base));
  22.279 -		if (!xen_feature(XENFEAT_auto_translated_physmap)) {
  22.280 -			phys_to_machine_mapping =
  22.281 -				__va(__pa(xen_start_info->mfn_list));
  22.282 -			xen_start_info->mfn_list = (unsigned long)
  22.283 -				phys_to_machine_mapping;
  22.284 -		}
  22.285 -		if (xen_start_info->mod_start)
  22.286 -			xen_start_info->mod_start = (unsigned long)
  22.287 -				__va(__pa(xen_start_info->mod_start));
  22.288 -
  22.289 -		/* Destroy the Xen-created mappings beyond the kernel image as
  22.290 -		 * well as the temporary mappings created above. Prevents
  22.291 -		 * overlap with modules area (if init mapping is very big).
  22.292 -		 */
  22.293 -		start = PAGE_ALIGN((unsigned long)_end);
  22.294 -		end   = __START_KERNEL_map + (table_end << PAGE_SHIFT);
  22.295 -		for (; start < end; start += PAGE_SIZE)
  22.296 -			WARN_ON(HYPERVISOR_update_va_mapping(
  22.297 -				start, __pte_ma(0), 0));
  22.298 +		xen_finish_init_mapping();
  22.299  	}
  22.300  
  22.301  	__flush_tlb_all();
  22.302 @@ -805,7 +860,6 @@ size_zones(unsigned long *z, unsigned lo
  22.303  void __init paging_init(void)
  22.304  {
  22.305  	unsigned long zones[MAX_NR_ZONES], holes[MAX_NR_ZONES];
  22.306 -	int i;
  22.307  
  22.308  	memory_present(0, 0, end_pfn);
  22.309  	sparse_init();
  22.310 @@ -813,22 +867,7 @@ void __init paging_init(void)
  22.311  	free_area_init_node(0, NODE_DATA(0), zones,
  22.312  			    __pa(PAGE_OFFSET) >> PAGE_SHIFT, holes);
  22.313  
  22.314 -	/* Switch to the real shared_info page, and clear the
  22.315 -	 * dummy page. */
  22.316 -	set_fixmap(FIX_SHARED_INFO, xen_start_info->shared_info);
  22.317 -	HYPERVISOR_shared_info = (shared_info_t *)fix_to_virt(FIX_SHARED_INFO);
  22.318 -	memset(empty_zero_page, 0, sizeof(empty_zero_page));
  22.319 -
  22.320  	init_mm.context.pinned = 1;
  22.321 -
  22.322 -	/* Setup mapping of lower 1st MB */
  22.323 -	for (i = 0; i < NR_FIX_ISAMAPS; i++)
  22.324 -		if (is_initial_xendomain())
  22.325 -			set_fixmap(FIX_ISAMAP_BEGIN - i, i * PAGE_SIZE);
  22.326 -		else
  22.327 -			__set_fixmap(FIX_ISAMAP_BEGIN - i,
  22.328 -				     virt_to_mfn(empty_zero_page) << PAGE_SHIFT,
  22.329 -				     PAGE_KERNEL_RO);
  22.330  }
  22.331  #endif
  22.332  
  22.333 @@ -1028,11 +1067,6 @@ void __init mem_init(void)
  22.334  
  22.335  void free_init_pages(char *what, unsigned long begin, unsigned long end)
  22.336  {
  22.337 -#ifdef __DO_LATER__
  22.338 -	/*
  22.339 -	 * Some pages can be pinned, but some are not. Unpinning such pages 
  22.340 -	 * triggers BUG(). 
  22.341 -	 */
  22.342  	unsigned long addr;
  22.343  
  22.344  	if (begin >= end)
  22.345 @@ -1044,25 +1078,27 @@ void free_init_pages(char *what, unsigne
  22.346  		init_page_count(virt_to_page(addr));
  22.347  		memset((void *)(addr & ~(PAGE_SIZE-1)),
  22.348  		       POISON_FREE_INITMEM, PAGE_SIZE); 
  22.349 -		make_page_writable(
  22.350 -			__va(__pa(addr)), XENFEAT_writable_page_tables);
  22.351 -		/*
  22.352 -		 * Make pages from __PAGE_OFFSET address as well
  22.353 -		 */
  22.354 -		make_page_writable(
  22.355 -			(void *)addr, XENFEAT_writable_page_tables);
  22.356 +		if (addr >= __START_KERNEL_map) {
  22.357 +			/* make_readonly() reports all kernel addresses. */
  22.358 +			__make_page_writable(__va(__pa(addr)));
  22.359 +			if (HYPERVISOR_update_va_mapping(addr, __pte(0), 0)) {
  22.360 +				pgd_t *pgd = pgd_offset_k(addr);
  22.361 +				pud_t *pud = pud_offset(pgd, addr);
  22.362 +				pmd_t *pmd = pmd_offset(pud, addr);
  22.363 +				pte_t *pte = pte_offset_kernel(pmd, addr);
  22.364 +
  22.365 +				xen_l1_entry_update(pte, __pte(0)); /* fallback */
  22.366 +			}
  22.367 +		}
  22.368  		free_page(addr);
  22.369  		totalram_pages++;
  22.370  	}
  22.371 -#endif
  22.372  }
  22.373  
  22.374  void free_initmem(void)
  22.375  {
  22.376 -#ifdef __DO_LATER__
  22.377  	memset(__initdata_begin, POISON_FREE_INITDATA,
  22.378  		__initdata_end - __initdata_begin);
  22.379 -#endif
  22.380  	free_init_pages("unused kernel memory",
  22.381  			(unsigned long)(&__init_begin),
  22.382  			(unsigned long)(&__init_end));
  22.383 @@ -1125,7 +1161,7 @@ int kern_addr_valid(unsigned long addr)
  22.384  	if (pgd_none(*pgd))
  22.385  		return 0;
  22.386  
  22.387 -	pud = pud_offset_k(pgd, addr);
  22.388 +	pud = pud_offset(pgd, addr);
  22.389  	if (pud_none(*pud))
  22.390  		return 0; 
  22.391  
    23.1 --- a/linux-2.6-xen-sparse/arch/x86_64/mm/pageattr-xen.c	Mon Mar 05 12:49:12 2007 -0600
    23.2 +++ b/linux-2.6-xen-sparse/arch/x86_64/mm/pageattr-xen.c	Thu Mar 08 14:39:52 2007 -0600
    23.3 @@ -24,10 +24,13 @@ static inline void mm_walk_set_prot(void
    23.4  {
    23.5  	struct page *page = virt_to_page(pt);
    23.6  	unsigned long pfn = page_to_pfn(page);
    23.7 +	int rc;
    23.8  
    23.9 -	BUG_ON(HYPERVISOR_update_va_mapping(
   23.10 -		       (unsigned long)__va(pfn << PAGE_SHIFT),
   23.11 -		       pfn_pte(pfn, flags), 0));
   23.12 +	rc = HYPERVISOR_update_va_mapping(
   23.13 +		(unsigned long)__va(pfn << PAGE_SHIFT),
   23.14 +		pfn_pte(pfn, flags), 0);
   23.15 +	if (rc)
   23.16 +		BUG();
   23.17  }
   23.18  
   23.19  static void mm_walk(struct mm_struct *mm, pgprot_t flags)
    24.1 --- a/linux-2.6-xen-sparse/drivers/char/mem.c	Mon Mar 05 12:49:12 2007 -0600
    24.2 +++ b/linux-2.6-xen-sparse/drivers/char/mem.c	Thu Mar 08 14:39:52 2007 -0600
    24.3 @@ -787,7 +787,7 @@ static const struct file_operations mem_
    24.4  	.open		= open_mem,
    24.5  };
    24.6  #else
    24.7 -extern struct file_operations mem_fops;
    24.8 +extern const struct file_operations mem_fops;
    24.9  #endif
   24.10  
   24.11  static const struct file_operations kmem_fops = {
    25.1 --- a/linux-2.6-xen-sparse/drivers/char/tpm/tpm_xen.c	Mon Mar 05 12:49:12 2007 -0600
    25.2 +++ b/linux-2.6-xen-sparse/drivers/char/tpm/tpm_xen.c	Thu Mar 08 14:39:52 2007 -0600
    25.3 @@ -113,14 +113,13 @@ void __exit tpmif_exit(void);
    25.4  
    25.5  
    25.6  static inline int
    25.7 -tx_buffer_copy(struct tx_buffer *txb, const u8 * src, int len,
    25.8 +tx_buffer_copy(struct tx_buffer *txb, const u8 *src, int len,
    25.9                 int isuserbuffer)
   25.10  {
   25.11  	int copied = len;
   25.12  
   25.13 -	if (len > txb->size) {
   25.14 +	if (len > txb->size)
   25.15  		copied = txb->size;
   25.16 -	}
   25.17  	if (isuserbuffer) {
   25.18  		if (copy_from_user(txb->data, src, copied))
   25.19  			return -EFAULT;
   25.20 @@ -133,18 +132,20 @@ tx_buffer_copy(struct tx_buffer *txb, co
   25.21  
   25.22  static inline struct tx_buffer *tx_buffer_alloc(void)
   25.23  {
   25.24 -	struct tx_buffer *txb = kzalloc(sizeof (struct tx_buffer),
   25.25 -					GFP_KERNEL);
   25.26 +	struct tx_buffer *txb;
   25.27 +
   25.28 +	txb = kzalloc(sizeof(struct tx_buffer), GFP_KERNEL);
   25.29 +	if (!txb)
   25.30 +		return NULL;
   25.31  
   25.32 -	if (txb) {
   25.33 -		txb->len = 0;
   25.34 -		txb->size = PAGE_SIZE;
   25.35 -		txb->data = (unsigned char *)__get_free_page(GFP_KERNEL);
   25.36 -		if (txb->data == NULL) {
   25.37 -			kfree(txb);
   25.38 -			txb = NULL;
   25.39 -		}
   25.40 +	txb->len = 0;
   25.41 +	txb->size = PAGE_SIZE;
   25.42 +	txb->data = (unsigned char *)__get_free_page(GFP_KERNEL);
   25.43 +	if (txb->data == NULL) {
   25.44 +		kfree(txb);
   25.45 +		txb = NULL;
   25.46  	}
   25.47 +
   25.48  	return txb;
   25.49  }
   25.50  
   25.51 @@ -160,37 +161,41 @@ static inline void tx_buffer_free(struct
   25.52  /**************************************************************
   25.53   Utility function for the tpm_private structure
   25.54  **************************************************************/
   25.55 -static inline void tpm_private_init(struct tpm_private *tp)
   25.56 +static void tpm_private_init(struct tpm_private *tp)
   25.57  {
   25.58  	spin_lock_init(&tp->tx_lock);
   25.59  	init_waitqueue_head(&tp->wait_q);
   25.60  	atomic_set(&tp->refcnt, 1);
   25.61  }
   25.62  
   25.63 -static inline void tpm_private_put(void)
   25.64 +static void tpm_private_put(void)
   25.65  {
   25.66 -	if ( atomic_dec_and_test(&my_priv->refcnt)) {
   25.67 -		tpmif_free_tx_buffers(my_priv);
   25.68 -		kfree(my_priv);
   25.69 -		my_priv = NULL;
   25.70 -	}
   25.71 +	if (!atomic_dec_and_test(&my_priv->refcnt))
   25.72 +		return;
   25.73 +
   25.74 +	tpmif_free_tx_buffers(my_priv);
   25.75 +	kfree(my_priv);
   25.76 +	my_priv = NULL;
   25.77  }
   25.78  
   25.79  static struct tpm_private *tpm_private_get(void)
   25.80  {
   25.81  	int err;
   25.82 -	if (!my_priv) {
   25.83 -		my_priv = kzalloc(sizeof(struct tpm_private), GFP_KERNEL);
   25.84 -		if (my_priv) {
   25.85 -			tpm_private_init(my_priv);
   25.86 -			err = tpmif_allocate_tx_buffers(my_priv);
   25.87 -			if (err < 0) {
   25.88 -				tpm_private_put();
   25.89 -			}
   25.90 -		}
   25.91 -	} else {
   25.92 +
   25.93 +	if (my_priv) {
   25.94  		atomic_inc(&my_priv->refcnt);
   25.95 +		return my_priv;
   25.96  	}
   25.97 +
   25.98 +	my_priv = kzalloc(sizeof(struct tpm_private), GFP_KERNEL);
   25.99 +	if (!my_priv)
  25.100 +		return NULL;
  25.101 +
  25.102 +	tpm_private_init(my_priv);
  25.103 +	err = tpmif_allocate_tx_buffers(my_priv);
  25.104 +	if (err < 0)
  25.105 +		tpm_private_put();
  25.106 +
  25.107  	return my_priv;
  25.108  }
  25.109  
  25.110 @@ -379,10 +384,8 @@ static int tpmfront_probe(struct xenbus_
  25.111  		return -ENOMEM;
  25.112  
  25.113  	tp->chip = init_vtpm(&dev->dev, &tvd, tp);
  25.114 -
  25.115 -	if (IS_ERR(tp->chip)) {
  25.116 +	if (IS_ERR(tp->chip))
  25.117  		return PTR_ERR(tp->chip);
  25.118 -	}
  25.119  
  25.120  	err = xenbus_scanf(XBT_NIL, dev->nodename,
  25.121  	                   "handle", "%i", &handle);
  25.122 @@ -401,6 +404,7 @@ static int tpmfront_probe(struct xenbus_
  25.123  		tpm_private_put();
  25.124  		return err;
  25.125  	}
  25.126 +
  25.127  	return 0;
  25.128  }
  25.129  
  25.130 @@ -417,32 +421,36 @@ static int tpmfront_suspend(struct xenbu
  25.131  {
  25.132  	struct tpm_private *tp = tpm_private_from_dev(&dev->dev);
  25.133  	u32 ctr;
  25.134 -	/* lock, so no app can send */
  25.135 +
  25.136 +	/* Take the lock, preventing any application from sending. */
  25.137  	mutex_lock(&suspend_lock);
  25.138  	tp->is_suspended = 1;
  25.139  
  25.140 -	for (ctr = 0; atomic_read(&tp->tx_busy) && ctr <= 300; ctr++) {
  25.141 +	for (ctr = 0; atomic_read(&tp->tx_busy); ctr++) {
  25.142  		if ((ctr % 10) == 0)
  25.143  			printk("TPM-FE [INFO]: Waiting for outstanding "
  25.144  			       "request.\n");
  25.145 -		/*
  25.146 -		 * Wait for a request to be responded to.
  25.147 -		 */
  25.148 +		/* Wait for a request to be responded to. */
  25.149  		interruptible_sleep_on_timeout(&tp->wait_q, 100);
  25.150  	}
  25.151 -	xenbus_switch_state(dev, XenbusStateClosing);
  25.152 -
  25.153 -	if (atomic_read(&tp->tx_busy)) {
  25.154 -		/*
  25.155 -		 * A temporary work-around.
  25.156 -		 */
  25.157 -		printk("TPM-FE [WARNING]: Resetting busy flag.");
  25.158 -		atomic_set(&tp->tx_busy, 0);
  25.159 -	}
  25.160  
  25.161  	return 0;
  25.162  }
  25.163  
  25.164 +static int tpmfront_suspend_finish(struct tpm_private *tp)
  25.165 +{
  25.166 +	tp->is_suspended = 0;
  25.167 +	/* Allow applications to send again. */
  25.168 +	mutex_unlock(&suspend_lock);
  25.169 +	return 0;
  25.170 +}
  25.171 +
  25.172 +static int tpmfront_suspend_cancel(struct xenbus_device *dev)
  25.173 +{
  25.174 +	struct tpm_private *tp = tpm_private_from_dev(&dev->dev);
  25.175 +	return tpmfront_suspend_finish(tp);
  25.176 +}
  25.177 +
  25.178  static int tpmfront_resume(struct xenbus_device *dev)
  25.179  {
  25.180  	struct tpm_private *tp = tpm_private_from_dev(&dev->dev);
  25.181 @@ -484,6 +492,7 @@ static struct xenbus_driver tpmfront = {
  25.182  	.resume = tpmfront_resume,
  25.183  	.otherend_changed = backend_changed,
  25.184  	.suspend = tpmfront_suspend,
  25.185 +	.suspend_cancel = tpmfront_suspend_cancel,
  25.186  };
  25.187  
  25.188  static void __init init_tpm_xenbus(void)
  25.189 @@ -514,9 +523,8 @@ static void tpmif_free_tx_buffers(struct
  25.190  {
  25.191  	unsigned int i;
  25.192  
  25.193 -	for (i = 0; i < TPMIF_TX_RING_SIZE; i++) {
  25.194 +	for (i = 0; i < TPMIF_TX_RING_SIZE; i++)
  25.195  		tx_buffer_free(tp->tx_buffers[i]);
  25.196 -	}
  25.197  }
  25.198  
  25.199  static void tpmif_rx_action(unsigned long priv)
  25.200 @@ -536,9 +544,8 @@ static void tpmif_rx_action(unsigned lon
  25.201  	received = tx->size;
  25.202  
  25.203  	buffer = kmalloc(received, GFP_ATOMIC);
  25.204 -	if (NULL == buffer) {
  25.205 +	if (!buffer)
  25.206  		goto exit;
  25.207 -	}
  25.208  
  25.209  	for (i = 0; i < TPMIF_TX_RING_SIZE && offset < received; i++) {
  25.210  		struct tx_buffer *txb = tp->tx_buffers[i];
  25.211 @@ -547,9 +554,8 @@ static void tpmif_rx_action(unsigned lon
  25.212  
  25.213  		tx = &tp->tx->ring[i].req;
  25.214  		tocopy = tx->size;
  25.215 -		if (tocopy > PAGE_SIZE) {
  25.216 +		if (tocopy > PAGE_SIZE)
  25.217  			tocopy = PAGE_SIZE;
  25.218 -		}
  25.219  
  25.220  		memcpy(&buffer[offset], txb->data, tocopy);
  25.221  
  25.222 @@ -607,12 +613,13 @@ static int tpm_xmit(struct tpm_private *
  25.223  		struct tx_buffer *txb = tp->tx_buffers[i];
  25.224  		int copied;
  25.225  
  25.226 -		if (NULL == txb) {
  25.227 +		if (!txb) {
  25.228  			DPRINTK("txb (i=%d) is NULL. buffers initilized?\n"
  25.229  				"Not transmitting anything!\n", i);
  25.230  			spin_unlock_irq(&tp->tx_lock);
  25.231  			return -EFAULT;
  25.232  		}
  25.233 +
  25.234  		copied = tx_buffer_copy(txb, &buf[offset], count,
  25.235  		                        isuserbuffer);
  25.236  		if (copied < 0) {
  25.237 @@ -624,25 +631,26 @@ static int tpm_xmit(struct tpm_private *
  25.238  		offset += copied;
  25.239  
  25.240  		tx = &tp->tx->ring[i].req;
  25.241 -
  25.242  		tx->addr = virt_to_machine(txb->data);
  25.243  		tx->size = txb->len;
  25.244  
  25.245 -		DPRINTK("First 4 characters sent by TPM-FE are 0x%02x 0x%02x 0x%02x 0x%02x\n",
  25.246 +		DPRINTK("First 4 characters sent by TPM-FE are "
  25.247 +			"0x%02x 0x%02x 0x%02x 0x%02x\n",
  25.248  		        txb->data[0],txb->data[1],txb->data[2],txb->data[3]);
  25.249  
  25.250 -		/* get the granttable reference for this page */
  25.251 +		/* Get the granttable reference for this page. */
  25.252  		tx->ref = gnttab_claim_grant_reference(&gref_head);
  25.253 -
  25.254 -		if (-ENOSPC == tx->ref) {
  25.255 +		if (tx->ref == -ENOSPC) {
  25.256  			spin_unlock_irq(&tp->tx_lock);
  25.257 -			DPRINTK(" Grant table claim reference failed in func:%s line:%d file:%s\n", __FUNCTION__, __LINE__, __FILE__);
  25.258 +			DPRINTK("Grant table claim reference failed in "
  25.259 +				"func:%s line:%d file:%s\n",
  25.260 +				__FUNCTION__, __LINE__, __FILE__);
  25.261  			return -ENOSPC;
  25.262  		}
  25.263 -		gnttab_grant_foreign_access_ref( tx->ref,
  25.264 -		                                 tp->backend_id,
  25.265 -		                                 virt_to_mfn(txb->data),
  25.266 -		                                 0 /*RW*/);
  25.267 +		gnttab_grant_foreign_access_ref(tx->ref,
  25.268 +						tp->backend_id,
  25.269 +						virt_to_mfn(txb->data),
  25.270 +						0 /*RW*/);
  25.271  		wmb();
  25.272  	}
  25.273  
  25.274 @@ -660,15 +668,10 @@ static int tpm_xmit(struct tpm_private *
  25.275  
  25.276  static void tpmif_notify_upperlayer(struct tpm_private *tp)
  25.277  {
  25.278 -	/*
  25.279 -	 * Notify upper layer about the state of the connection
  25.280 -	 * to the BE.
  25.281 -	 */
  25.282 -	if (tp->is_connected) {
  25.283 -		vtpm_vd_status(tp->chip, TPM_VD_STATUS_CONNECTED);
  25.284 -	} else {
  25.285 -		vtpm_vd_status(tp->chip, TPM_VD_STATUS_DISCONNECTED);
  25.286 -	}
  25.287 +	/* Notify upper layer about the state of the connection to the BE. */
  25.288 +	vtpm_vd_status(tp->chip, (tp->is_connected
  25.289 +				  ? TPM_VD_STATUS_CONNECTED
  25.290 +				  : TPM_VD_STATUS_DISCONNECTED));
  25.291  }
  25.292  
  25.293  
  25.294 @@ -679,20 +682,16 @@ static void tpmif_set_connected_state(st
  25.295  	 * should disconnect - assumption is that we will resume
  25.296  	 * The mutex keeps apps from sending.
  25.297  	 */
  25.298 -	if (is_connected == 0 && tp->is_suspended == 1) {
  25.299 +	if (is_connected == 0 && tp->is_suspended == 1)
  25.300  		return;
  25.301 -	}
  25.302  
  25.303  	/*
  25.304  	 * Unlock the mutex if we are connected again
  25.305  	 * after being suspended - now resuming.
  25.306  	 * This also removes the suspend state.
  25.307  	 */
  25.308 -	if (is_connected == 1 && tp->is_suspended == 1) {
  25.309 -		tp->is_suspended = 0;
  25.310 -		/* unlock, so apps can resume sending */
  25.311 -		mutex_unlock(&suspend_lock);
  25.312 -	}
  25.313 +	if (is_connected == 1 && tp->is_suspended == 1)
  25.314 +		tpmfront_suspend_finish(tp);
  25.315  
  25.316  	if (is_connected != tp->is_connected) {
  25.317  		tp->is_connected = is_connected;
  25.318 @@ -710,33 +709,24 @@ static void tpmif_set_connected_state(st
  25.319  
  25.320  static int __init tpmif_init(void)
  25.321  {
  25.322 -	long rc = 0;
  25.323  	struct tpm_private *tp;
  25.324  
  25.325  	if (is_initial_xendomain())
  25.326  		return -EPERM;
  25.327  
  25.328  	tp = tpm_private_get();
  25.329 -	if (!tp) {
  25.330 -		rc = -ENOMEM;
  25.331 -		goto failexit;
  25.332 -	}
  25.333 +	if (!tp)
  25.334 +		return -ENOMEM;
  25.335  
  25.336  	IPRINTK("Initialising the vTPM driver.\n");
  25.337 -	if ( gnttab_alloc_grant_references ( TPMIF_TX_RING_SIZE,
  25.338 -	                                     &gref_head ) < 0) {
  25.339 -		rc = -EFAULT;
  25.340 -		goto gnttab_alloc_failed;
  25.341 +	if (gnttab_alloc_grant_references(TPMIF_TX_RING_SIZE,
  25.342 +					  &gref_head) < 0) {
  25.343 +		tpm_private_put();
  25.344 +		return -EFAULT;
  25.345  	}
  25.346  
  25.347  	init_tpm_xenbus();
  25.348  	return 0;
  25.349 -
  25.350 -gnttab_alloc_failed:
  25.351 -	tpm_private_put();
  25.352 -failexit:
  25.353 -
  25.354 -	return (int)rc;
  25.355  }
  25.356  
  25.357  
    26.1 --- a/linux-2.6-xen-sparse/drivers/xen/blktap/blktap.c	Mon Mar 05 12:49:12 2007 -0600
    26.2 +++ b/linux-2.6-xen-sparse/drivers/xen/blktap/blktap.c	Thu Mar 08 14:39:52 2007 -0600
    26.3 @@ -335,7 +335,7 @@ static int blktap_ioctl(struct inode *in
    26.4                          unsigned int cmd, unsigned long arg);
    26.5  static unsigned int blktap_poll(struct file *file, poll_table *wait);
    26.6  
    26.7 -static struct file_operations blktap_fops = {
    26.8 +static const struct file_operations blktap_fops = {
    26.9  	.owner   = THIS_MODULE,
   26.10  	.poll    = blktap_poll,
   26.11  	.ioctl   = blktap_ioctl,
    27.1 --- a/linux-2.6-xen-sparse/drivers/xen/char/mem.c	Mon Mar 05 12:49:12 2007 -0600
    27.2 +++ b/linux-2.6-xen-sparse/drivers/xen/char/mem.c	Thu Mar 08 14:39:52 2007 -0600
    27.3 @@ -194,7 +194,7 @@ static int open_mem(struct inode * inode
    27.4  	return capable(CAP_SYS_RAWIO) ? 0 : -EPERM;
    27.5  }
    27.6  
    27.7 -struct file_operations mem_fops = {
    27.8 +const struct file_operations mem_fops = {
    27.9  	.llseek		= memory_lseek,
   27.10  	.read		= read_mem,
   27.11  	.write		= write_mem,
    28.1 --- a/linux-2.6-xen-sparse/drivers/xen/core/machine_reboot.c	Mon Mar 05 12:49:12 2007 -0600
    28.2 +++ b/linux-2.6-xen-sparse/drivers/xen/core/machine_reboot.c	Thu Mar 08 14:39:52 2007 -0600
    28.3 @@ -59,27 +59,11 @@ EXPORT_SYMBOL(machine_restart);
    28.4  EXPORT_SYMBOL(machine_halt);
    28.5  EXPORT_SYMBOL(machine_power_off);
    28.6  
    28.7 -/* Ensure we run on the idle task page tables so that we will
    28.8 -   switch page tables before running user space. This is needed
    28.9 -   on architectures with separate kernel and user page tables
   28.10 -   because the user page table pointer is not saved/restored. */
   28.11 -static void switch_idle_mm(void)
   28.12 -{
   28.13 -	struct mm_struct *mm = current->active_mm;
   28.14 -
   28.15 -	if (mm == &init_mm)
   28.16 -		return;
   28.17 -
   28.18 -	atomic_inc(&init_mm.mm_count);
   28.19 -	switch_mm(mm, &init_mm, current);
   28.20 -	current->active_mm = &init_mm;
   28.21 -	mmdrop(mm);
   28.22 -}
   28.23 -
   28.24  static void pre_suspend(void)
   28.25  {
   28.26  	HYPERVISOR_shared_info = (shared_info_t *)empty_zero_page;
   28.27 -	clear_fixmap(FIX_SHARED_INFO);
   28.28 +	HYPERVISOR_update_va_mapping(fix_to_virt(FIX_SHARED_INFO),
   28.29 +				     __pte_ma(0), 0);
   28.30  
   28.31  	xen_start_info->store_mfn = mfn_to_pfn(xen_start_info->store_mfn);
   28.32  	xen_start_info->console.domU.mfn =
   28.33 @@ -89,6 +73,7 @@ static void pre_suspend(void)
   28.34  static void post_suspend(int suspend_cancelled)
   28.35  {
   28.36  	int i, j, k, fpp;
   28.37 +	unsigned long shinfo_mfn;
   28.38  	extern unsigned long max_pfn;
   28.39  	extern unsigned long *pfn_to_mfn_frame_list_list;
   28.40  	extern unsigned long *pfn_to_mfn_frame_list[];
   28.41 @@ -99,11 +84,14 @@ static void post_suspend(int suspend_can
   28.42  		xen_start_info->console.domU.mfn =
   28.43  			pfn_to_mfn(xen_start_info->console.domU.mfn);
   28.44  	} else {
   28.45 +#ifdef CONFIG_SMP
   28.46  		cpu_initialized_map = cpumask_of_cpu(0);
   28.47 +#endif
   28.48  	}
   28.49 -	
   28.50 -	set_fixmap(FIX_SHARED_INFO, xen_start_info->shared_info);
   28.51  
   28.52 +	shinfo_mfn = xen_start_info->shared_info >> PAGE_SHIFT;
   28.53 +	HYPERVISOR_update_va_mapping(fix_to_virt(FIX_SHARED_INFO),
   28.54 +				     pfn_pte_ma(shinfo_mfn, PAGE_KERNEL), 0);
   28.55  	HYPERVISOR_shared_info = (shared_info_t *)fix_to_virt(FIX_SHARED_INFO);
   28.56  
   28.57  	memset(empty_zero_page, 0, PAGE_SIZE);
   28.58 @@ -172,10 +160,25 @@ static int take_machine_down(void *p_fas
   28.59  
   28.60  	post_suspend(suspend_cancelled);
   28.61  	gnttab_resume();
   28.62 -	if (!suspend_cancelled)
   28.63 +	if (!suspend_cancelled) {
   28.64  		irq_resume();
   28.65 +#ifdef __x86_64__
   28.66 +		/*
   28.67 +		 * Older versions of Xen do not save/restore the user %cr3.
   28.68 +		 * We do it here just in case, but there's no need if we are
   28.69 +		 * in fast-suspend mode as that implies a new enough Xen.
   28.70 +		 */
   28.71 +		if (!fast_suspend) {
   28.72 +			struct mmuext_op op;
   28.73 +			op.cmd = MMUEXT_NEW_USER_BASEPTR;
   28.74 +			op.arg1.mfn = pfn_to_mfn(__pa(__user_pgd(
   28.75 +				current->active_mm->pgd)) >> PAGE_SHIFT);
   28.76 +			if (HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF))
   28.77 +				BUG();
   28.78 +		}
   28.79 +#endif
   28.80 +	}
   28.81  	time_resume();
   28.82 -	switch_idle_mm();
   28.83  	local_irq_enable();
   28.84  
   28.85  	if (fast_suspend && !suspend_cancelled) {
   28.86 @@ -210,6 +213,10 @@ int __xen_suspend(int fast_suspend)
   28.87  	}
   28.88  #endif
   28.89  
   28.90 +	/* If we are definitely UP then 'slow mode' is actually faster. */
   28.91 +	if (num_possible_cpus() == 1)
   28.92 +		fast_suspend = 0;
   28.93 +
   28.94  	if (fast_suspend) {
   28.95  		xenbus_suspend();
   28.96  		err = stop_machine_run(take_machine_down, &fast_suspend, 0);
    29.1 --- a/linux-2.6-xen-sparse/drivers/xen/evtchn/evtchn.c	Mon Mar 05 12:49:12 2007 -0600
    29.2 +++ b/linux-2.6-xen-sparse/drivers/xen/evtchn/evtchn.c	Thu Mar 08 14:39:52 2007 -0600
    29.3 @@ -406,7 +406,7 @@ static int evtchn_release(struct inode *
    29.4  	return 0;
    29.5  }
    29.6  
    29.7 -static struct file_operations evtchn_fops = {
    29.8 +static const struct file_operations evtchn_fops = {
    29.9  	.owner   = THIS_MODULE,
   29.10  	.read    = evtchn_read,
   29.11  	.write   = evtchn_write,
    30.1 --- a/linux-2.6-xen-sparse/drivers/xen/netback/common.h	Mon Mar 05 12:49:12 2007 -0600
    30.2 +++ b/linux-2.6-xen-sparse/drivers/xen/netback/common.h	Thu Mar 08 14:39:52 2007 -0600
    30.3 @@ -99,9 +99,21 @@ typedef struct netif_st {
    30.4  	struct net_device *dev;
    30.5  	struct net_device_stats stats;
    30.6  
    30.7 +	unsigned int carrier;
    30.8 +
    30.9  	wait_queue_head_t waiting_to_free;
   30.10  } netif_t;
   30.11  
   30.12 +/*
   30.13 + * Implement our own carrier flag: the network stack's version causes delays
   30.14 + * when the carrier is re-enabled (in particular, dev_activate() may not
   30.15 + * immediately be called, which can cause packet loss; also the etherbridge
   30.16 + * can be rather lazy in activating its port).
   30.17 + */
   30.18 +#define netback_carrier_on(netif)	((netif)->carrier = 1)
   30.19 +#define netback_carrier_off(netif)	((netif)->carrier = 0)
   30.20 +#define netback_carrier_ok(netif)	((netif)->carrier)
   30.21 +
   30.22  #define NET_TX_RING_SIZE __RING_SIZE((netif_tx_sring_t *)0, PAGE_SIZE)
   30.23  #define NET_RX_RING_SIZE __RING_SIZE((netif_rx_sring_t *)0, PAGE_SIZE)
   30.24  
   30.25 @@ -120,7 +132,8 @@ int netif_map(netif_t *netif, unsigned l
   30.26  
   30.27  void netif_xenbus_init(void);
   30.28  
   30.29 -#define netif_schedulable(dev) (netif_running(dev) && netif_carrier_ok(dev))
   30.30 +#define netif_schedulable(netif)				\
   30.31 +	(netif_running((netif)->dev) && netback_carrier_ok(netif))
   30.32  
   30.33  void netif_schedule_work(netif_t *netif);
   30.34  void netif_deschedule_work(netif_t *netif);
    31.1 --- a/linux-2.6-xen-sparse/drivers/xen/netback/interface.c	Mon Mar 05 12:49:12 2007 -0600
    31.2 +++ b/linux-2.6-xen-sparse/drivers/xen/netback/interface.c	Thu Mar 08 14:39:52 2007 -0600
    31.3 @@ -66,16 +66,19 @@ static void __netif_down(netif_t *netif)
    31.4  static int net_open(struct net_device *dev)
    31.5  {
    31.6  	netif_t *netif = netdev_priv(dev);
    31.7 -	if (netif_carrier_ok(dev))
    31.8 +	if (netback_carrier_ok(netif)) {
    31.9  		__netif_up(netif);
   31.10 +		netif_start_queue(dev);
   31.11 +	}
   31.12  	return 0;
   31.13  }
   31.14  
   31.15  static int net_close(struct net_device *dev)
   31.16  {
   31.17  	netif_t *netif = netdev_priv(dev);
   31.18 -	if (netif_carrier_ok(dev))
   31.19 +	if (netback_carrier_ok(netif))
   31.20  		__netif_down(netif);
   31.21 +	netif_stop_queue(dev);
   31.22  	return 0;
   31.23  }
   31.24  
   31.25 @@ -138,8 +141,6 @@ netif_t *netif_alloc(domid_t domid, unsi
   31.26  		return ERR_PTR(-ENOMEM);
   31.27  	}
   31.28  
   31.29 -	netif_carrier_off(dev);
   31.30 -
   31.31  	netif = netdev_priv(dev);
   31.32  	memset(netif, 0, sizeof(*netif));
   31.33  	netif->domid  = domid;
   31.34 @@ -148,6 +149,8 @@ netif_t *netif_alloc(domid_t domid, unsi
   31.35  	init_waitqueue_head(&netif->waiting_to_free);
   31.36  	netif->dev = dev;
   31.37  
   31.38 +	netback_carrier_off(netif);
   31.39 +
   31.40  	netif->credit_bytes = netif->remaining_credit = ~0UL;
   31.41  	netif->credit_usec  = 0UL;
   31.42  	init_timer(&netif->credit_timeout);
   31.43 @@ -285,7 +288,7 @@ int netif_map(netif_t *netif, unsigned l
   31.44  	netif_get(netif);
   31.45  
   31.46  	rtnl_lock();
   31.47 -	netif_carrier_on(netif->dev);
   31.48 +	netback_carrier_on(netif);
   31.49  	if (netif_running(netif->dev))
   31.50  		__netif_up(netif);
   31.51  	rtnl_unlock();
   31.52 @@ -302,9 +305,10 @@ err_rx:
   31.53  
   31.54  void netif_disconnect(netif_t *netif)
   31.55  {
   31.56 -	if (netif_carrier_ok(netif->dev)) {
   31.57 +	if (netback_carrier_ok(netif)) {
   31.58  		rtnl_lock();
   31.59 -		netif_carrier_off(netif->dev);
   31.60 +		netback_carrier_off(netif);
   31.61 +		netif_carrier_off(netif->dev); /* discard queued packets */
   31.62  		if (netif_running(netif->dev))
   31.63  			__netif_down(netif);
   31.64  		rtnl_unlock();
    32.1 --- a/linux-2.6-xen-sparse/drivers/xen/netback/netback.c	Mon Mar 05 12:49:12 2007 -0600
    32.2 +++ b/linux-2.6-xen-sparse/drivers/xen/netback/netback.c	Thu Mar 08 14:39:52 2007 -0600
    32.3 @@ -38,7 +38,10 @@
    32.4  #include <xen/balloon.h>
    32.5  #include <xen/interface/memory.h>
    32.6  
    32.7 -/*#define NETBE_DEBUG_INTERRUPT*/
    32.8 +/*define NETBE_DEBUG_INTERRUPT*/
    32.9 +
   32.10 +/* extra field used in struct page */
   32.11 +#define netif_page_index(pg) (*(long *)&(pg)->mapping)
   32.12  
   32.13  struct netbk_rx_meta {
   32.14  	skb_frag_t frag;
   32.15 @@ -231,7 +234,7 @@ static inline int netbk_queue_full(netif
   32.16  static void tx_queue_callback(unsigned long data)
   32.17  {
   32.18  	netif_t *netif = (netif_t *)data;
   32.19 -	if (netif_schedulable(netif->dev))
   32.20 +	if (netif_schedulable(netif))
   32.21  		netif_wake_queue(netif->dev);
   32.22  }
   32.23  
   32.24 @@ -242,7 +245,7 @@ int netif_be_start_xmit(struct sk_buff *
   32.25  	BUG_ON(skb->dev != dev);
   32.26  
   32.27  	/* Drop the packet if the target domain has no receive buffers. */
   32.28 -	if (unlikely(!netif_schedulable(dev) || netbk_queue_full(netif)))
   32.29 +	if (unlikely(!netif_schedulable(netif) || netbk_queue_full(netif)))
   32.30  		goto drop;
   32.31  
   32.32  	/*
   32.33 @@ -352,7 +355,7 @@ static u16 netbk_gop_frag(netif_t *netif
   32.34  		copy_gop->flags = GNTCOPY_dest_gref;
   32.35  		if (PageForeign(page)) {
   32.36  			struct pending_tx_info *src_pend =
   32.37 -				&pending_tx_info[page->index];
   32.38 +				&pending_tx_info[netif_page_index(page)];
   32.39  			copy_gop->source.domid = src_pend->netif->domid;
   32.40  			copy_gop->source.u.ref = src_pend->req.gref;
   32.41  			copy_gop->flags |= GNTCOPY_source_gref;
   32.42 @@ -681,7 +684,7 @@ static void net_rx_action(unsigned long 
   32.43  		}
   32.44  
   32.45  		if (netif_queue_stopped(netif->dev) &&
   32.46 -		    netif_schedulable(netif->dev) &&
   32.47 +		    netif_schedulable(netif) &&
   32.48  		    !netbk_queue_full(netif))
   32.49  			netif_wake_queue(netif->dev);
   32.50  
   32.51 @@ -739,7 +742,7 @@ static void add_to_net_schedule_list_tai
   32.52  
   32.53  	spin_lock_irq(&net_schedule_list_lock);
   32.54  	if (!__on_net_schedule_list(netif) &&
   32.55 -	    likely(netif_schedulable(netif->dev))) {
   32.56 +	    likely(netif_schedulable(netif))) {
   32.57  		list_add_tail(&netif->list, &net_schedule_list);
   32.58  		netif_get(netif);
   32.59  	}
   32.60 @@ -1327,7 +1330,7 @@ static void netif_page_release(struct pa
   32.61  	/* Ready for next use. */
   32.62  	init_page_count(page);
   32.63  
   32.64 -	netif_idx_release(page->index);
   32.65 +	netif_idx_release(netif_page_index(page));
   32.66  }
   32.67  
   32.68  irqreturn_t netif_be_int(int irq, void *dev_id, struct pt_regs *regs)
   32.69 @@ -1337,7 +1340,7 @@ irqreturn_t netif_be_int(int irq, void *
   32.70  	add_to_net_schedule_list_tail(netif);
   32.71  	maybe_schedule_tx_action();
   32.72  
   32.73 -	if (netif_schedulable(netif->dev) && !netbk_queue_full(netif))
   32.74 +	if (netif_schedulable(netif) && !netbk_queue_full(netif))
   32.75  		netif_wake_queue(netif->dev);
   32.76  
   32.77  	return IRQ_HANDLED;
   32.78 @@ -1457,7 +1460,7 @@ static int __init netback_init(void)
   32.79  	for (i = 0; i < MAX_PENDING_REQS; i++) {
   32.80  		page = mmap_pages[i];
   32.81  		SetPageForeign(page, netif_page_release);
   32.82 -		page->index = i;
   32.83 +		netif_page_index(page) = i;
   32.84  	}
   32.85  
   32.86  	pending_cons = 0;
    33.1 --- a/linux-2.6-xen-sparse/drivers/xen/netback/xenbus.c	Mon Mar 05 12:49:12 2007 -0600
    33.2 +++ b/linux-2.6-xen-sparse/drivers/xen/netback/xenbus.c	Thu Mar 08 14:39:52 2007 -0600
    33.3 @@ -338,9 +338,7 @@ static void connect(struct backend_info 
    33.4  
    33.5  	xenbus_switch_state(dev, XenbusStateConnected);
    33.6  
    33.7 -	/* May not get a kick from the frontend, so start the tx_queue now. */
    33.8 -	if (!netbk_can_queue(be->netif->dev))
    33.9 -		netif_wake_queue(be->netif->dev);
   33.10 +	netif_wake_queue(be->netif->dev);
   33.11  }
   33.12  
   33.13  
    34.1 --- a/linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c	Mon Mar 05 12:49:12 2007 -0600
    34.2 +++ b/linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c	Thu Mar 08 14:39:52 2007 -0600
    34.3 @@ -154,6 +154,7 @@ struct netfront_info {
    34.4  
    34.5  	unsigned int irq;
    34.6  	unsigned int copying_receiver;
    34.7 +	unsigned int carrier;
    34.8  
    34.9  	/* Receive-ring batched refills. */
   34.10  #define RX_MIN_TARGET 8
   34.11 @@ -193,6 +194,15 @@ struct netfront_rx_info {
   34.12  };
   34.13  
   34.14  /*
   34.15 + * Implement our own carrier flag: the network stack's version causes delays
   34.16 + * when the carrier is re-enabled (in particular, dev_activate() may not
   34.17 + * immediately be called, which can cause packet loss).
   34.18 + */
   34.19 +#define netfront_carrier_on(netif)	((netif)->carrier = 1)
   34.20 +#define netfront_carrier_off(netif)	((netif)->carrier = 0)
   34.21 +#define netfront_carrier_ok(netif)	((netif)->carrier)
   34.22 +
   34.23 +/*
   34.24   * Access macros for acquiring freeing slots in tx_skbs[].
   34.25   */
   34.26  
   34.27 @@ -590,26 +600,6 @@ static int send_fake_arp(struct net_devi
   34.28  	return dev_queue_xmit(skb);
   34.29  }
   34.30  
   34.31 -static int network_open(struct net_device *dev)
   34.32 -{
   34.33 -	struct netfront_info *np = netdev_priv(dev);
   34.34 -
   34.35 -	memset(&np->stats, 0, sizeof(np->stats));
   34.36 -
   34.37 -	spin_lock(&np->rx_lock);
   34.38 -	if (netif_carrier_ok(dev)) {
   34.39 -		network_alloc_rx_buffers(dev);
   34.40 -		np->rx.sring->rsp_event = np->rx.rsp_cons + 1;
   34.41 -		if (RING_HAS_UNCONSUMED_RESPONSES(&np->rx))
   34.42 -			netif_rx_schedule(dev);
   34.43 -	}
   34.44 -	spin_unlock(&np->rx_lock);
   34.45 -
   34.46 -	netif_start_queue(dev);
   34.47 -
   34.48 -	return 0;
   34.49 -}
   34.50 -
   34.51  static inline int netfront_tx_slot_available(struct netfront_info *np)
   34.52  {
   34.53  	return ((np->tx.req_prod_pvt - np->tx.rsp_cons) <
   34.54 @@ -626,6 +616,26 @@ static inline void network_maybe_wake_tx
   34.55  		netif_wake_queue(dev);
   34.56  }
   34.57  
   34.58 +static int network_open(struct net_device *dev)
   34.59 +{
   34.60 +	struct netfront_info *np = netdev_priv(dev);
   34.61 +
   34.62 +	memset(&np->stats, 0, sizeof(np->stats));
   34.63 +
   34.64 +	spin_lock(&np->rx_lock);
   34.65 +	if (netfront_carrier_ok(np)) {
   34.66 +		network_alloc_rx_buffers(dev);
   34.67 +		np->rx.sring->rsp_event = np->rx.rsp_cons + 1;
   34.68 +		if (RING_HAS_UNCONSUMED_RESPONSES(&np->rx))
   34.69 +			netif_rx_schedule(dev);
   34.70 +	}
   34.71 +	spin_unlock(&np->rx_lock);
   34.72 +
   34.73 +	network_maybe_wake_tx(dev);
   34.74 +
   34.75 +	return 0;
   34.76 +}
   34.77 +
   34.78  static void network_tx_buf_gc(struct net_device *dev)
   34.79  {
   34.80  	RING_IDX cons, prod;
   34.81 @@ -633,7 +643,7 @@ static void network_tx_buf_gc(struct net
   34.82  	struct netfront_info *np = netdev_priv(dev);
   34.83  	struct sk_buff *skb;
   34.84  
   34.85 -	BUG_ON(!netif_carrier_ok(dev));
   34.86 +	BUG_ON(!netfront_carrier_ok(np));
   34.87  
   34.88  	do {
   34.89  		prod = np->tx.sring->rsp_prod;
   34.90 @@ -703,7 +713,7 @@ static void network_alloc_rx_buffers(str
   34.91  	int nr_flips;
   34.92  	netif_rx_request_t *req;
   34.93  
   34.94 -	if (unlikely(!netif_carrier_ok(dev)))
   34.95 +	if (unlikely(!netfront_carrier_ok(np)))
   34.96  		return;
   34.97  
   34.98  	/*
   34.99 @@ -934,7 +944,7 @@ static int network_start_xmit(struct sk_
  34.100  
  34.101  	spin_lock_irq(&np->tx_lock);
  34.102  
  34.103 -	if (unlikely(!netif_carrier_ok(dev) ||
  34.104 +	if (unlikely(!netfront_carrier_ok(np) ||
  34.105  		     (frags > 1 && !xennet_can_sg(dev)) ||
  34.106  		     netif_needs_gso(dev, skb))) {
  34.107  		spin_unlock_irq(&np->tx_lock);
  34.108 @@ -1024,7 +1034,7 @@ static irqreturn_t netif_int(int irq, vo
  34.109  
  34.110  	spin_lock_irqsave(&np->tx_lock, flags);
  34.111  
  34.112 -	if (likely(netif_carrier_ok(dev))) {
  34.113 +	if (likely(netfront_carrier_ok(np))) {
  34.114  		network_tx_buf_gc(dev);
  34.115  		/* Under tx_lock: protects access to rx shared-ring indexes. */
  34.116  		if (RING_HAS_UNCONSUMED_RESPONSES(&np->rx))
  34.117 @@ -1299,7 +1309,7 @@ static int netif_poll(struct net_device 
  34.118  
  34.119  	spin_lock(&np->rx_lock);
  34.120  
  34.121 -	if (unlikely(!netif_carrier_ok(dev))) {
  34.122 +	if (unlikely(!netfront_carrier_ok(np))) {
  34.123  		spin_unlock(&np->rx_lock);
  34.124  		return 0;
  34.125  	}
  34.126 @@ -1317,7 +1327,7 @@ static int netif_poll(struct net_device 
  34.127  	work_done = 0;
  34.128  	while ((i != rp) && (work_done < budget)) {
  34.129  		memcpy(rx, RING_GET_RESPONSE(&np->rx, i), sizeof(*rx));
  34.130 -		memset(extras, 0, sizeof(extras));
  34.131 +		memset(extras, 0, sizeof(rinfo.extras));
  34.132  
  34.133  		err = xennet_get_responses(np, &rinfo, rp, &tmpq,
  34.134  					   &pages_flipped);
  34.135 @@ -1744,7 +1754,7 @@ static int network_connect(struct net_de
  34.136  	 * domain a kick because we've probably just requeued some
  34.137  	 * packets.
  34.138  	 */
  34.139 -	netif_carrier_on(dev);
  34.140 +	netfront_carrier_on(np);
  34.141  	notify_remote_via_irq(np->irq);
  34.142  	network_tx_buf_gc(dev);
  34.143  	network_alloc_rx_buffers(dev);
  34.144 @@ -1989,7 +1999,7 @@ static struct net_device * __devinit cre
  34.145  
  34.146  	np->netdev = netdev;
  34.147  
  34.148 -	netif_carrier_off(netdev);
  34.149 +	netfront_carrier_off(np);
  34.150  
  34.151  	return netdev;
  34.152  
  34.153 @@ -2023,7 +2033,7 @@ static void netif_disconnect_backend(str
  34.154  	/* Stop old i/f to prevent errors whilst we rebuild the state. */
  34.155  	spin_lock_irq(&info->tx_lock);
  34.156  	spin_lock(&info->rx_lock);
  34.157 -	netif_carrier_off(info->netdev);
  34.158 +	netfront_carrier_off(info);
  34.159  	spin_unlock(&info->rx_lock);
  34.160  	spin_unlock_irq(&info->tx_lock);
  34.161  
    35.1 --- a/linux-2.6-xen-sparse/drivers/xen/pcifront/pci_op.c	Mon Mar 05 12:49:12 2007 -0600
    35.2 +++ b/linux-2.6-xen-sparse/drivers/xen/pcifront/pci_op.c	Thu Mar 08 14:39:52 2007 -0600
    35.3 @@ -239,17 +239,12 @@ static void free_root_bus_devs(struct pc
    35.4  {
    35.5  	struct pci_dev *dev;
    35.6  
    35.7 -	down_write(&pci_bus_sem);
    35.8  	while (!list_empty(&bus->devices)) {
    35.9 -		dev = container_of(bus->devices.next, struct pci_dev, bus_list);
   35.10 -		up_write(&pci_bus_sem);
   35.11 -
   35.12 +		dev = container_of(bus->devices.next, struct pci_dev,
   35.13 +				   bus_list);
   35.14  		dev_dbg(&dev->dev, "removing device\n");
   35.15  		pci_remove_bus_device(dev);
   35.16 -
   35.17 -		down_write(&pci_bus_sem);
   35.18  	}
   35.19 -	up_write(&pci_bus_sem);
   35.20  }
   35.21  
   35.22  void pcifront_free_roots(struct pcifront_device *pdev)
    36.1 --- a/linux-2.6-xen-sparse/drivers/xen/privcmd/privcmd.c	Mon Mar 05 12:49:12 2007 -0600
    36.2 +++ b/linux-2.6-xen-sparse/drivers/xen/privcmd/privcmd.c	Thu Mar 08 14:39:52 2007 -0600
    36.3 @@ -248,7 +248,7 @@ static int privcmd_enforce_singleshot_ma
    36.4  }
    36.5  #endif
    36.6  
    36.7 -static struct file_operations privcmd_file_ops = {
    36.8 +static const struct file_operations privcmd_file_ops = {
    36.9  	.ioctl = privcmd_ioctl,
   36.10  	.mmap  = privcmd_mmap,
   36.11  };
    37.1 --- a/linux-2.6-xen-sparse/drivers/xen/tpmback/tpmback.c	Mon Mar 05 12:49:12 2007 -0600
    37.2 +++ b/linux-2.6-xen-sparse/drivers/xen/tpmback/tpmback.c	Thu Mar 08 14:39:52 2007 -0600
    37.3 @@ -629,7 +629,7 @@ static unsigned int vtpm_op_poll(struct 
    37.4  	return flags;
    37.5  }
    37.6  
    37.7 -static struct file_operations vtpm_ops = {
    37.8 +static const struct file_operations vtpm_ops = {
    37.9  	.owner = THIS_MODULE,
   37.10  	.llseek = no_llseek,
   37.11  	.open = vtpm_op_open,
    38.1 --- a/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_dev.c	Mon Mar 05 12:49:12 2007 -0600
    38.2 +++ b/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_dev.c	Thu Mar 08 14:39:52 2007 -0600
    38.3 @@ -173,17 +173,22 @@ static ssize_t xenbus_dev_write(struct f
    38.4  	void *reply;
    38.5  	char *path, *token;
    38.6  	struct watch_adapter *watch, *tmp_watch;
    38.7 -	int err;
    38.8 +	int err, rc = len;
    38.9  
   38.10 -	if ((len + u->len) > sizeof(u->u.buffer))
   38.11 -		return -EINVAL;
   38.12 +	if ((len + u->len) > sizeof(u->u.buffer)) {
   38.13 +		rc = -EINVAL;
   38.14 +		goto out;
   38.15 +	}
   38.16  
   38.17 -	if (copy_from_user(u->u.buffer + u->len, ubuf, len) != 0)
   38.18 -		return -EFAULT;
   38.19 +	if (copy_from_user(u->u.buffer + u->len, ubuf, len) != 0) {
   38.20 +		rc = -EFAULT;
   38.21 +		goto out;
   38.22 +	}
   38.23  
   38.24  	u->len += len;
   38.25 -	if (u->len < (sizeof(u->u.msg) + u->u.msg.len))
   38.26 -		return len;
   38.27 +	if ((u->len < sizeof(u->u.msg)) ||
   38.28 +	    (u->len < (sizeof(u->u.msg) + u->u.msg.len)))
   38.29 +		return rc;
   38.30  
   38.31  	msg_type = u->u.msg.type;
   38.32  
   38.33 @@ -201,14 +206,17 @@ static ssize_t xenbus_dev_write(struct f
   38.34  	case XS_SET_PERMS:
   38.35  		if (msg_type == XS_TRANSACTION_START) {
   38.36  			trans = kmalloc(sizeof(*trans), GFP_KERNEL);
   38.37 -			if (!trans)
   38.38 -				return -ENOMEM;
   38.39 +			if (!trans) {
   38.40 +				rc = -ENOMEM;
   38.41 +				goto out;
   38.42 +			}
   38.43  		}
   38.44  
   38.45  		reply = xenbus_dev_request_and_reply(&u->u.msg);
   38.46  		if (IS_ERR(reply)) {
   38.47  			kfree(trans);
   38.48 -			return PTR_ERR(reply);
   38.49 +			rc = PTR_ERR(reply);
   38.50 +			goto out;
   38.51  		}
   38.52  
   38.53  		if (msg_type == XS_TRANSACTION_START) {
   38.54 @@ -231,8 +239,10 @@ static ssize_t xenbus_dev_write(struct f
   38.55  	case XS_UNWATCH:
   38.56  		path = u->u.buffer + sizeof(u->u.msg);
   38.57  		token = memchr(path, 0, u->u.msg.len);
   38.58 -		if (token == NULL)
   38.59 -			return -EILSEQ;
   38.60 +		if (token == NULL) {
   38.61 +			rc = -EILSEQ;
   38.62 +			goto out;
   38.63 +		}
   38.64  		token++;
   38.65  
   38.66  		if (msg_type == XS_WATCH) {
   38.67 @@ -251,7 +261,8 @@ static ssize_t xenbus_dev_write(struct f
   38.68  			err = register_xenbus_watch(&watch->watch);
   38.69  			if (err) {
   38.70  				free_watch_adapter(watch);
   38.71 -				return err;
   38.72 +				rc = err;
   38.73 +				goto out;
   38.74  			}
   38.75  			
   38.76  			list_add(&watch->list, &u->watches);
   38.77 @@ -265,7 +276,6 @@ static ssize_t xenbus_dev_write(struct f
   38.78                                                   &u->watches, list) {
   38.79  				if (!strcmp(watch->token, token) &&
   38.80  				    !strcmp(watch->watch.node, path))
   38.81 -					break;
   38.82  				{
   38.83  					unregister_xenbus_watch(&watch->watch);
   38.84  					list_del(&watch->list);
   38.85 @@ -278,11 +288,13 @@ static ssize_t xenbus_dev_write(struct f
   38.86  		break;
   38.87  
   38.88  	default:
   38.89 -		return -EINVAL;
   38.90 +		rc = -EINVAL;
   38.91 +		break;
   38.92  	}
   38.93  
   38.94 + out:
   38.95  	u->len = 0;
   38.96 -	return len;
   38.97 +	return rc;
   38.98  }
   38.99  
  38.100  static int xenbus_dev_open(struct inode *inode, struct file *filp)
  38.101 @@ -342,7 +354,7 @@ static unsigned int xenbus_dev_poll(stru
  38.102  	return 0;
  38.103  }
  38.104  
  38.105 -static struct file_operations xenbus_dev_file_ops = {
  38.106 +static const struct file_operations xenbus_dev_file_ops = {
  38.107  	.read = xenbus_dev_read,
  38.108  	.write = xenbus_dev_write,
  38.109  	.open = xenbus_dev_open,
    39.1 --- a/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/page.h	Mon Mar 05 12:49:12 2007 -0600
    39.2 +++ b/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/page.h	Thu Mar 08 14:39:52 2007 -0600
    39.3 @@ -20,6 +20,14 @@
    39.4  #define LARGE_PAGE_SIZE (1UL << PMD_SHIFT)
    39.5  
    39.6  #ifdef __KERNEL__
    39.7 +
    39.8 +/*
    39.9 + * Need to repeat this here in order to not include pgtable.h (which in turn
   39.10 + * depends on definitions made here), but to be able to use the symbolic
   39.11 + * below. The preprocessor will warn if the two definitions aren't identical.
   39.12 + */
   39.13 +#define _PAGE_PRESENT	0x001
   39.14 +
   39.15  #ifndef __ASSEMBLY__
   39.16  
   39.17  #include <linux/string.h>
   39.18 @@ -29,13 +37,6 @@
   39.19  #include <xen/interface/xen.h>
   39.20  #include <xen/features.h>
   39.21  
   39.22 -/*
   39.23 - * Need to repeat this here in order to not include pgtable.h (which in turn
   39.24 - * depends on definitions made here), but to be able to use the symbolic
   39.25 - * below. The preprocessor will warn if the two definitions aren't identical.
   39.26 - */
   39.27 -#define _PAGE_PRESENT	0x001
   39.28 -
   39.29  #define arch_free_page(_page,_order)		\
   39.30  ({	int foreign = PageForeign(_page);	\
   39.31  	if (foreign)				\
   39.32 @@ -225,8 +226,6 @@ extern int page_is_ram(unsigned long pag
   39.33  	((current->personality & READ_IMPLIES_EXEC) ? VM_EXEC : 0 ) | \
   39.34  		 VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC)
   39.35  
   39.36 -#define __HAVE_ARCH_GATE_AREA 1
   39.37 -
   39.38  #include <asm-generic/memory_model.h>
   39.39  #include <asm-generic/page.h>
   39.40  
    40.1 --- a/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/system.h	Mon Mar 05 12:49:12 2007 -0600
    40.2 +++ b/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/system.h	Thu Mar 08 14:39:52 2007 -0600
    40.3 @@ -137,7 +137,7 @@ extern struct task_struct * FASTCALL(__s
    40.4  })
    40.5  
    40.6  #define write_cr4(x) \
    40.7 -	__asm__ __volatile__("movl %0,%%cr4": :"r" (x));
    40.8 +	__asm__ __volatile__("movl %0,%%cr4": :"r" (x))
    40.9  
   40.10  /*
   40.11   * Clear and set 'TS' bit respectively
    41.1 --- a/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/fixmap.h	Mon Mar 05 12:49:12 2007 -0600
    41.2 +++ b/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/fixmap.h	Thu Mar 08 14:39:52 2007 -0600
    41.3 @@ -53,6 +53,11 @@ enum fixed_addresses {
    41.4  #define NR_FIX_ISAMAPS	256
    41.5  	FIX_ISAMAP_END,
    41.6  	FIX_ISAMAP_BEGIN = FIX_ISAMAP_END + NR_FIX_ISAMAPS - 1,
    41.7 +	__end_of_permanent_fixed_addresses,
    41.8 +	/* temporary boot-time mappings, used before ioremap() is functional */
    41.9 +#define NR_FIX_BTMAPS	16
   41.10 +	FIX_BTMAP_END = __end_of_permanent_fixed_addresses,
   41.11 +	FIX_BTMAP_BEGIN = FIX_BTMAP_END + NR_FIX_BTMAPS - 1,
   41.12  	__end_of_fixed_addresses
   41.13  };
   41.14  
    42.1 --- a/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/io.h	Mon Mar 05 12:49:12 2007 -0600
    42.2 +++ b/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/io.h	Thu Mar 08 14:39:52 2007 -0600
    42.3 @@ -150,8 +150,10 @@ static inline void __iomem * ioremap (un
    42.4  	return __ioremap(offset, size, 0);
    42.5  }
    42.6  
    42.7 -extern void *early_ioremap(unsigned long addr, unsigned long size);
    42.8 -extern void early_iounmap(void *addr, unsigned long size);
    42.9 +extern void *bt_ioremap(unsigned long addr, unsigned long size);
   42.10 +extern void bt_iounmap(void *addr, unsigned long size);
   42.11 +#define early_ioremap bt_ioremap
   42.12 +#define early_iounmap bt_iounmap
   42.13  
   42.14  /*
   42.15   * This one maps high address device memory and turns off caching for that area.
    43.1 --- a/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/pgtable.h	Mon Mar 05 12:49:12 2007 -0600
    43.2 +++ b/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/pgtable.h	Thu Mar 08 14:39:52 2007 -0600
    43.3 @@ -403,19 +403,6 @@ static inline int pmd_large(pmd_t pte) {
    43.4  /* to find an entry in a page-table-directory. */
    43.5  #define pud_index(address) (((address) >> PUD_SHIFT) & (PTRS_PER_PUD-1))
    43.6  #define pud_offset(pgd, address) ((pud_t *) pgd_page(*(pgd)) + pud_index(address))
    43.7 -static inline pud_t *__pud_offset_k(pud_t *pud, unsigned long address)
    43.8 -{ 
    43.9 -	return pud + pud_index(address);
   43.10 -} 
   43.11 -
   43.12 -/* Find correct pud via the hidden fourth level page level: */
   43.13 -
   43.14 -/* This accesses the reference page table of the boot cpu. 
   43.15 -   Other CPUs get synced lazily via the page fault handler. */
   43.16 -static inline pud_t *pud_offset_k(pgd_t *pgd, unsigned long address)
   43.17 -{
   43.18 -	return pud_offset(pgd_offset_k(address), address);
   43.19 -}
   43.20  
   43.21  /* PMD  - Level 2 access */
   43.22  #define pmd_page_kernel(pmd) ((unsigned long) __va(pmd_val(pmd) & PTE_MASK))
    44.1 --- a/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/processor.h	Mon Mar 05 12:49:12 2007 -0600
    44.2 +++ b/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/processor.h	Thu Mar 08 14:39:52 2007 -0600
    44.3 @@ -246,11 +246,13 @@ DECLARE_PER_CPU(struct tss_struct,init_t
    44.4  
    44.5  
    44.6  extern struct cpuinfo_x86 boot_cpu_data;
    44.7 +#ifndef CONFIG_X86_NO_TSS
    44.8  /* Save the original ist values for checking stack pointers during debugging */
    44.9  struct orig_ist {
   44.10  	unsigned long ist[7];
   44.11  };
   44.12  DECLARE_PER_CPU(struct orig_ist, orig_ist);
   44.13 +#endif
   44.14  
   44.15  #ifdef CONFIG_X86_VSMP
   44.16  #define ARCH_MIN_TASKALIGN	(1 << INTERNODE_CACHE_SHIFT)
    45.1 --- a/linux-2.6-xen-sparse/include/linux/page-flags.h	Mon Mar 05 12:49:12 2007 -0600
    45.2 +++ b/linux-2.6-xen-sparse/include/linux/page-flags.h	Thu Mar 08 14:39:52 2007 -0600
    45.3 @@ -252,14 +252,14 @@
    45.4  #define PageForeign(page)	test_bit(PG_foreign, &(page)->flags)
    45.5  #define SetPageForeign(page, dtor) do {		\
    45.6  	set_bit(PG_foreign, &(page)->flags);	\
    45.7 -	(page)->mapping = (void *)dtor;		\
    45.8 +	(page)->index = (long)(dtor);		\
    45.9  } while (0)
   45.10  #define ClearPageForeign(page) do {		\
   45.11  	clear_bit(PG_foreign, &(page)->flags);	\
   45.12 -	(page)->mapping = NULL;			\
   45.13 +	(page)->index = 0;			\
   45.14  } while (0)
   45.15  #define PageForeignDestructor(page)		\
   45.16 -	( (void (*) (struct page *)) (page)->mapping )(page)
   45.17 +	( (void (*) (struct page *)) (page)->index )(page)
   45.18  
   45.19  struct page;	/* forward declaration */
   45.20  
    46.1 --- a/linux-2.6-xen-sparse/include/xen/cpu_hotplug.h	Mon Mar 05 12:49:12 2007 -0600
    46.2 +++ b/linux-2.6-xen-sparse/include/xen/cpu_hotplug.h	Thu Mar 08 14:39:52 2007 -0600
    46.3 @@ -4,7 +4,7 @@
    46.4  #include <linux/kernel.h>
    46.5  #include <linux/cpumask.h>
    46.6  
    46.7 -#if defined(CONFIG_X86)
    46.8 +#if defined(CONFIG_X86) && defined(CONFIG_SMP)
    46.9  extern cpumask_t cpu_initialized_map;
   46.10  #define cpu_set_initialized(cpu) cpu_set(cpu, cpu_initialized_map)
   46.11  #else
    47.1 --- a/linux-2.6-xen-sparse/mm/Kconfig	Mon Mar 05 12:49:12 2007 -0600
    47.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    47.3 @@ -1,157 +0,0 @@
    47.4 -config SELECT_MEMORY_MODEL
    47.5 -	def_bool y
    47.6 -	depends on EXPERIMENTAL || ARCH_SELECT_MEMORY_MODEL
    47.7 -
    47.8 -choice
    47.9 -	prompt "Memory model"
   47.10 -	depends on SELECT_MEMORY_MODEL
   47.11 -	default DISCONTIGMEM_MANUAL if ARCH_DISCONTIGMEM_DEFAULT
   47.12 -	default SPARSEMEM_MANUAL if ARCH_SPARSEMEM_DEFAULT
   47.13 -	default FLATMEM_MANUAL
   47.14 -
   47.15 -config FLATMEM_MANUAL
   47.16 -	bool "Flat Memory"
   47.17 -	depends on !(ARCH_DISCONTIGMEM_ENABLE || ARCH_SPARSEMEM_ENABLE) || ARCH_FLATMEM_ENABLE
   47.18 -	help
   47.19 -	  This option allows you to change some of the ways that
   47.20 -	  Linux manages its memory internally.  Most users will
   47.21 -	  only have one option here: FLATMEM.  This is normal
   47.22 -	  and a correct option.
   47.23 -
   47.24 -	  Some users of more advanced features like NUMA and
   47.25 -	  memory hotplug may have different options here.
   47.26 -	  DISCONTIGMEM is an more mature, better tested system,
   47.27 -	  but is incompatible with memory hotplug and may suffer
   47.28 -	  decreased performance over SPARSEMEM.  If unsure between
   47.29 -	  "Sparse Memory" and "Discontiguous Memory", choose
   47.30 -	  "Discontiguous Memory".
   47.31 -
   47.32 -	  If unsure, choose this option (Flat Memory) over any other.
   47.33 -
   47.34 -config DISCONTIGMEM_MANUAL
   47.35 -	bool "Discontiguous Memory"
   47.36 -	depends on ARCH_DISCONTIGMEM_ENABLE
   47.37 -	help
   47.38 -	  This option provides enhanced support for discontiguous
   47.39 -	  memory systems, over FLATMEM.  These systems have holes
   47.40 -	  in their physical address spaces, and this option provides
   47.41 -	  more efficient handling of these holes.  However, the vast
   47.42 -	  majority of hardware has quite flat address spaces, and
   47.43 -	  can have degraded performance from extra overhead that
   47.44 -	  this option imposes.
   47.45 -
   47.46 -	  Many NUMA configurations will have this as the only option.
   47.47 -
   47.48 -	  If unsure, choose "Flat Memory" over this option.
   47.49 -
   47.50 -config SPARSEMEM_MANUAL
   47.51 -	bool "Sparse Memory"
   47.52 -	depends on ARCH_SPARSEMEM_ENABLE
   47.53 -	help
   47.54 -	  This will be the only option for some systems, including
   47.55 -	  memory hotplug systems.  This is normal.
   47.56 -
   47.57 -	  For many other systems, this will be an alternative to
   47.58 -	  "Discontiguous Memory".  This option provides some potential
   47.59 -	  performance benefits, along with decreased code complexity,
   47.60 -	  but it is newer, and more experimental.
   47.61 -
   47.62 -	  If unsure, choose "Discontiguous Memory" or "Flat Memory"
   47.63 -	  over this option.
   47.64 -
   47.65 -endchoice
   47.66 -
   47.67 -config DISCONTIGMEM
   47.68 -	def_bool y
   47.69 -	depends on (!SELECT_MEMORY_MODEL && ARCH_DISCONTIGMEM_ENABLE) || DISCONTIGMEM_MANUAL
   47.70 -
   47.71 -config SPARSEMEM
   47.72 -	def_bool y
   47.73 -	depends on SPARSEMEM_MANUAL
   47.74 -
   47.75 -config FLATMEM
   47.76 -	def_bool y
   47.77 -	depends on (!DISCONTIGMEM && !SPARSEMEM) || FLATMEM_MANUAL
   47.78 -
   47.79 -config FLAT_NODE_MEM_MAP
   47.80 -	def_bool y
   47.81 -	depends on !SPARSEMEM
   47.82 -
   47.83 -#
   47.84 -# Both the NUMA code and DISCONTIGMEM use arrays of pg_data_t's
   47.85 -# to represent different areas of memory.  This variable allows
   47.86 -# those dependencies to exist individually.
   47.87 -#
   47.88 -config NEED_MULTIPLE_NODES
   47.89 -	def_bool y
   47.90 -	depends on DISCONTIGMEM || NUMA
   47.91 -
   47.92 -config HAVE_MEMORY_PRESENT
   47.93 -	def_bool y
   47.94 -	depends on ARCH_HAVE_MEMORY_PRESENT || SPARSEMEM
   47.95 -
   47.96 -#
   47.97 -# SPARSEMEM_EXTREME (which is the default) does some bootmem
   47.98 -# allocations when memory_present() is called.  If this can not
   47.99 -# be done on your architecture, select this option.  However,
  47.100 -# statically allocating the mem_section[] array can potentially
  47.101 -# consume vast quantities of .bss, so be careful.
  47.102 -#
  47.103 -# This option will also potentially produce smaller runtime code
  47.104 -# with gcc 3.4 and later.
  47.105 -#
  47.106 -config SPARSEMEM_STATIC
  47.107 -	def_bool n
  47.108 -
  47.109 -#
  47.110 -# Architectecture platforms which require a two level mem_section in SPARSEMEM
  47.111 -# must select this option. This is usually for architecture platforms with
  47.112 -# an extremely sparse physical address space.
  47.113 -#
  47.114 -config SPARSEMEM_EXTREME
  47.115 -	def_bool y
  47.116 -	depends on SPARSEMEM && !SPARSEMEM_STATIC
  47.117 -
  47.118 -# eventually, we can have this option just 'select SPARSEMEM'
  47.119 -config MEMORY_HOTPLUG
  47.120 -	bool "Allow for memory hot-add"
  47.121 -	depends on SPARSEMEM && HOTPLUG && !SOFTWARE_SUSPEND && ARCH_ENABLE_MEMORY_HOTPLUG
  47.122 -	depends on (IA64 || X86 || PPC64)
  47.123 -
  47.124 -comment "Memory hotplug is currently incompatible with Software Suspend"
  47.125 -	depends on SPARSEMEM && HOTPLUG && SOFTWARE_SUSPEND
  47.126 -
  47.127 -# Heavily threaded applications may benefit from splitting the mm-wide
  47.128 -# page_table_lock, so that faults on different parts of the user address
  47.129 -# space can be handled with less contention: split it at this NR_CPUS.
  47.130 -# Default to 4 for wider testing, though 8 might be more appropriate.
  47.131 -# ARM's adjust_pte (unused if VIPT) depends on mm-wide page_table_lock.
  47.132 -# PA-RISC 7xxx's spinlock_t would enlarge struct page from 32 to 44 bytes.
  47.133 -# XEN on x86 architecture uses the mapping field on pagetable pages to store a
  47.134 -# pointer to the destructor. This conflicts with pte_lock_deinit().
  47.135 -#
  47.136 -config SPLIT_PTLOCK_CPUS
  47.137 -	int
  47.138 -	default "4096" if ARM && !CPU_CACHE_VIPT
  47.139 -	default "4096" if PARISC && !PA20
  47.140 -	default "4096" if X86_XEN || X86_64_XEN
  47.141 -	default "4"
  47.142 -
  47.143 -#
  47.144 -# support for page migration
  47.145 -#
  47.146 -config MIGRATION
  47.147 -	bool "Page migration"
  47.148 -	def_bool y
  47.149 -	depends on NUMA
  47.150 -	help
  47.151 -	  Allows the migration of the physical location of pages of processes
  47.152 -	  while the virtual addresses are not changed. This is useful for
  47.153 -	  example on NUMA systems to put pages nearer to the processors accessing
  47.154 -	  the page.
  47.155 -
  47.156 -config RESOURCES_64BIT
  47.157 -	bool "64 bit Memory and IO resources (EXPERIMENTAL)" if (!64BIT && EXPERIMENTAL)
  47.158 -	default 64BIT
  47.159 -	help
  47.160 -	  This option allows memory and IO resources to be 64 bit.
    48.1 --- a/patches/linux-2.6.18/blktap-aio-16_03_06.patch	Mon Mar 05 12:49:12 2007 -0600
    48.2 +++ b/patches/linux-2.6.18/blktap-aio-16_03_06.patch	Thu Mar 08 14:39:52 2007 -0600
    48.3 @@ -106,7 +106,7 @@ diff -pruN ../orig-linux-2.6.18/fs/aio.c
    48.4  +	return pollflags;
    48.5  +}
    48.6  +
    48.7 -+static struct file_operations aioq_fops = {
    48.8 ++static const struct file_operations aioq_fops = {
    48.9  +	.release	= aio_queue_fd_close,
   48.10  +	.poll		= aio_queue_fd_poll
   48.11  +};
   48.12 @@ -201,7 +201,7 @@ diff -pruN ../orig-linux-2.6.18/fs/event
   48.13   		   int maxevents, long timeout);
   48.14   static int eventpollfs_delete_dentry(struct dentry *dentry);
   48.15  -static struct inode *ep_eventpoll_inode(void);
   48.16 -+static struct inode *ep_eventpoll_inode(struct file_operations *fops);
   48.17 ++static struct inode *ep_eventpoll_inode(const struct file_operations *fops);
   48.18   static int eventpollfs_get_sb(struct file_system_type *fs_type,
   48.19   			      int flags, const char *dev_name,
   48.20   			      void *data, struct vfsmount *mnt);
   48.21 @@ -221,7 +221,7 @@ diff -pruN ../orig-linux-2.6.18/fs/event
   48.22  -static int ep_getfd(int *efd, struct inode **einode, struct file **efile,
   48.23  -		    struct eventpoll *ep)
   48.24  +int ep_getfd(int *efd, struct inode **einode, struct file **efile,
   48.25 -+		    struct eventpoll *ep, struct file_operations *fops)
   48.26 ++		    struct eventpoll *ep, const struct file_operations *fops)
   48.27   {
   48.28   	struct qstr this;
   48.29   	char name[32];
   48.30 @@ -248,7 +248,7 @@ diff -pruN ../orig-linux-2.6.18/fs/event
   48.31   
   48.32   
   48.33  -static struct inode *ep_eventpoll_inode(void)
   48.34 -+static struct inode *ep_eventpoll_inode(struct file_operations *fops)
   48.35 ++static struct inode *ep_eventpoll_inode(const struct file_operations *fops)
   48.36   {
   48.37   	int error = -ENOMEM;
   48.38   	struct inode *inode = new_inode(eventpoll_mnt->mnt_sb);
   48.39 @@ -288,7 +288,7 @@ diff -pruN ../orig-linux-2.6.18/include/
   48.40  + */
   48.41  +struct eventpoll;
   48.42  +int ep_getfd(int *efd, struct inode **einode, struct file **efile,
   48.43 -+             struct eventpoll *ep, struct file_operations *fops);
   48.44 ++             struct eventpoll *ep, const struct file_operations *fops);
   48.45   #else
   48.46   
   48.47   static inline void eventpoll_init_file(struct file *file) {}
    49.1 --- a/tools/Makefile	Mon Mar 05 12:49:12 2007 -0600
    49.2 +++ b/tools/Makefile	Thu Mar 08 14:39:52 2007 -0600
    49.3 @@ -24,9 +24,8 @@ SUBDIRS-$(LIBXENAPI_BINDINGS) += libxen
    49.4  
    49.5  # These don't cross-compile
    49.6  ifeq ($(XEN_COMPILE_ARCH),$(XEN_TARGET_ARCH))
    49.7 -SUBDIRS-y += python
    49.8 -SUBDIRS-y += pygrub
    49.9 -SUBDIRS-y += ptsname
   49.10 +SUBDIRS-$(PYTHON_TOOLS) += python
   49.11 +SUBDIRS-$(PYTHON_TOOLS) += pygrub
   49.12  endif
   49.13  
   49.14  .PHONY: all
   49.15 @@ -42,8 +41,8 @@ install: check
   49.16  		$(MAKE) -C $$subdir $@; \
   49.17  	done
   49.18  	$(MAKE) ioemuinstall
   49.19 -	$(INSTALL_DIR) -p $(DESTDIR)/var/xen/dump
   49.20 -	$(INSTALL_DIR) -p $(DESTDIR)/var/log/xen
   49.21 +	$(INSTALL_DIR) $(DESTDIR)/var/xen/dump
   49.22 +	$(INSTALL_DIR) $(DESTDIR)/var/log/xen
   49.23  
   49.24  .PHONY: clean
   49.25  clean: check_clean
    50.1 --- a/tools/blktap/lib/Makefile	Mon Mar 05 12:49:12 2007 -0600
    50.2 +++ b/tools/blktap/lib/Makefile	Thu Mar 08 14:39:52 2007 -0600
    50.3 @@ -40,8 +40,8 @@ build: libblktap.a
    50.4  libblktap: libblktap.a
    50.5  
    50.6  install: all
    50.7 -	$(INSTALL_DIR) -p $(DESTDIR)/usr/$(LIBDIR)
    50.8 -	$(INSTALL_DIR) -p $(DESTDIR)/usr/include
    50.9 +	$(INSTALL_DIR) $(DESTDIR)/usr/$(LIBDIR)
   50.10 +	$(INSTALL_DIR) $(DESTDIR)/usr/include
   50.11  	$(INSTALL_PROG) $(LIB) $(DESTDIR)/usr/$(LIBDIR)
   50.12  	ln -sf libblktap.so.$(MAJOR).$(MINOR) $(DESTDIR)/usr/$(LIBDIR)/libblktap.so.$(MAJOR)
   50.13  	ln -sf libblktap.so.$(MAJOR) $(DESTDIR)/usr/$(LIBDIR)/libblktap.so
    51.1 --- a/tools/console/Makefile	Mon Mar 05 12:49:12 2007 -0600
    51.2 +++ b/tools/console/Makefile	Thu Mar 08 14:39:52 2007 -0600
    51.3 @@ -30,7 +30,7 @@ xenconsole: $(patsubst %.c,%.o,$(wildcar
    51.4  
    51.5  .PHONY: install
    51.6  install: $(BIN)
    51.7 -	$(INSTALL_DIR) -p $(DESTDIR)/$(DAEMON_INSTALL_DIR)
    51.8 +	$(INSTALL_DIR) $(DESTDIR)/$(DAEMON_INSTALL_DIR)
    51.9  	$(INSTALL_PROG) xenconsoled $(DESTDIR)/$(DAEMON_INSTALL_DIR)
   51.10 -	$(INSTALL_DIR) -p $(DESTDIR)/$(CLIENT_INSTALL_DIR)
   51.11 +	$(INSTALL_DIR) $(DESTDIR)/$(CLIENT_INSTALL_DIR)
   51.12  	$(INSTALL_PROG) xenconsole $(DESTDIR)/$(CLIENT_INSTALL_DIR)
    52.1 --- a/tools/firmware/rombios/rombios.c	Mon Mar 05 12:49:12 2007 -0600
    52.2 +++ b/tools/firmware/rombios/rombios.c	Thu Mar 08 14:39:52 2007 -0600
    52.3 @@ -890,7 +890,7 @@ static void           int14_function();
    52.4  static void           int15_function();
    52.5  static void           int16_function();
    52.6  static void           int17_function();
    52.7 -static void           int19_function();
    52.8 +static void           int18_function();
    52.9  static void           int1a_function();
   52.10  static void           int70_function();
   52.11  static void           int74_function();
   52.12 @@ -1837,6 +1837,38 @@ keyboard_panic(status)
   52.13  }
   52.14  
   52.15  //--------------------------------------------------------------------------
   52.16 +// machine_reset
   52.17 +//--------------------------------------------------------------------------
   52.18 +  void
   52.19 +machine_reset()
   52.20 +{
   52.21 +  /* Frob the keyboard reset line to reset the processor */
   52.22 +  outb(0x64, 0x60); /* Map the flags register at data port (0x60) */
   52.23 +  outb(0x60, 0x14); /* Set the flags to system|disable */
   52.24 +  outb(0x64, 0xfe); /* Pulse output 0 (system reset) low */
   52.25 +  BX_PANIC("Couldn't reset the machine\n");
   52.26 +}
   52.27 +
   52.28 +//--------------------------------------------------------------------------
   52.29 +// clobber_entry_point
   52.30 +//    Because PV drivers in HVM guests detach some of the emulated devices, 
   52.31 +//    it is not safe to do a soft reboot by just dropping to real mode and
   52.32 +//    jumping at ffff:0000. -- the boot drives might have disappeared!
   52.33 +//    This rather foul function overwrites(!) the BIOS entry point 
   52.34 +//    to point at machine-reset, which will cause the Xen tools to
   52.35 +//    rebuild the whole machine from scratch.
   52.36 +//--------------------------------------------------------------------------
   52.37 +  void 
   52.38 +clobber_entry_point() 
   52.39 +{
   52.40 +    /* The instruction at the entry point is one byte (0xea) for the
   52.41 +     * jump opcode, then two bytes of address, then two of segment. 
   52.42 +     * Overwrite the address bytes.*/
   52.43 +    write_word(0xffff, 0x0001, machine_reset); 
   52.44 +}
   52.45 +
   52.46 +
   52.47 +//--------------------------------------------------------------------------
   52.48  // shutdown_status_panic
   52.49  //   called when the shutdown statsu is not implemented, displays the status
   52.50  //--------------------------------------------------------------------------
   52.51 @@ -7626,7 +7658,7 @@ int17_function(regs, ds, iret_addr)
   52.52  }
   52.53  
   52.54  void
   52.55 -int19_function(seq_nr)
   52.56 +int18_function(seq_nr)
   52.57  Bit16u seq_nr;
   52.58  {
   52.59    Bit16u ebda_seg=read_word(0x0040,0x000E);
   52.60 @@ -7702,8 +7734,8 @@ ASM_START
   52.61      push cx
   52.62      push dx
   52.63  
   52.64 -    mov  dl, _int19_function.bootdrv + 2[bp]
   52.65 -    mov  ax, _int19_function.bootseg + 2[bp]
   52.66 +    mov  dl, _int18_function.bootdrv + 2[bp]
   52.67 +    mov  ax, _int18_function.bootseg + 2[bp]
   52.68      mov  es, ax         ;; segment
   52.69      mov  bx, #0x0000    ;; offset
   52.70      mov  ah, #0x02      ;; function 2, read diskette sector
   52.71 @@ -7714,7 +7746,7 @@ ASM_START
   52.72      int  #0x13          ;; read sector
   52.73      jnc  int19_load_done
   52.74      mov  ax, #0x0001
   52.75 -    mov  _int19_function.status + 2[bp], ax
   52.76 +    mov  _int18_function.status + 2[bp], ax
   52.77  
   52.78  int19_load_done:
   52.79      pop  dx
   52.80 @@ -7789,13 +7821,13 @@ ASM_START
   52.81      ;; Build an iret stack frame that will take us to the boot vector.
   52.82      ;; iret pops ip, then cs, then flags, so push them in the opposite order.
   52.83      pushf
   52.84 -    mov  ax, _int19_function.bootseg + 0[bp] 
   52.85 +    mov  ax, _int18_function.bootseg + 0[bp] 
   52.86      push ax
   52.87 -    mov  ax, _int19_function.bootip + 0[bp] 
   52.88 +    mov  ax, _int18_function.bootip + 0[bp] 
   52.89      push ax
   52.90      ;; Set the magic number in ax and the boot drive in dl.
   52.91      mov  ax, #0xaa55
   52.92 -    mov  dl, _int19_function.bootdrv + 0[bp]
   52.93 +    mov  dl, _int18_function.bootdrv + 0[bp]
   52.94      ;; Zero some of the other registers.
   52.95      xor  bx, bx
   52.96      mov  ds, bx
   52.97 @@ -8272,6 +8304,8 @@ int18_handler: ;; Boot Failure recovery:
   52.98    mov  ss, ax
   52.99  
  52.100    ;; Get the boot sequence number out of the IPL memory
  52.101 +  ;; The first time we do this it will have been set to -1 so 
  52.102 +  ;; we will start from device 0.
  52.103    mov  bx, #IPL_SEG 
  52.104    mov  ds, bx                     ;; Set segment
  52.105    mov  bx, IPL_SEQUENCE_OFFSET    ;; BX is now the sequence number
  52.106 @@ -8279,43 +8313,33 @@ int18_handler: ;; Boot Failure recovery:
  52.107    mov  IPL_SEQUENCE_OFFSET, bx    ;; Write it back
  52.108    mov  ds, ax                     ;; and reset the segment to zero. 
  52.109  
  52.110 -  ;; Carry on in the INT 19h handler, using the new sequence number
  52.111 +  ;; Call the C code for the next boot device
  52.112    push bx
  52.113 -
  52.114 -  jmp  int19_next_boot
  52.115 +  call _int18_function
  52.116 +
  52.117 +  ;; Boot failed: invoke the boot recovery function...
  52.118 +  int  #0x18
  52.119  
  52.120  ;----------
  52.121  ;- INT19h -
  52.122  ;----------
  52.123  int19_relocated: ;; Boot function, relocated
  52.124 -
  52.125 -  ;; int19 was beginning to be really complex, so now it
  52.126 -  ;; just calls a C function that does the work
  52.127 -
  52.128 -  push bp
  52.129 -  mov  bp, sp
  52.130 -  
  52.131 -  ;; Reset SS and SP
  52.132 +  ;;
  52.133 +  ;; *** Warning: INT 19h resets the whole machine *** 
  52.134 +  ;;
  52.135 +  ;; Because PV drivers in HVM guests detach some of the emulated devices, 
  52.136 +  ;; it is not safe to do a soft reboot by just dropping to real mode and
  52.137 +  ;; invoking INT 19h -- the boot drives might have disappeared!
  52.138 +  ;; If the user asks for a soft reboot, the only thing we can do is 
  52.139 +  ;; reset the whole machine.  When it comes back up, the normal BIOS 
  52.140 +  ;; boot sequence will start, which is more or less the required behaviour.
  52.141 +  ;; 
  52.142 +  ;; Reset SP and SS
  52.143    mov  ax, #0xfffe
  52.144    mov  sp, ax
  52.145    xor  ax, ax
  52.146    mov  ss, ax
  52.147 -
  52.148 -  ;; Start from the first boot device (0, in AX)
  52.149 -  mov  bx, #IPL_SEG 
  52.150 -  mov  ds, bx                     ;; Set segment to write to the IPL memory
  52.151 -  mov  IPL_SEQUENCE_OFFSET, ax    ;; Save the sequence number 
  52.152 -  mov  ds, ax                     ;; and reset the segment.
  52.153 -
  52.154 -  push ax
  52.155 -
  52.156 -int19_next_boot:
  52.157 -
  52.158 -  ;; Call the C code for the next boot device
  52.159 -  call _int19_function
  52.160 -
  52.161 -  ;; Boot failed: invoke the boot recovery function
  52.162 -  int  #0x18
  52.163 +  call _machine_reset
  52.164  
  52.165  ;----------
  52.166  ;- INT1Ch -
  52.167 @@ -9609,6 +9633,8 @@ normal_post:
  52.168  
  52.169    call _log_bios_start
  52.170  
  52.171 +  call _clobber_entry_point
  52.172 +
  52.173    ;; set all interrupts to default handler
  52.174    mov  bx, #0x0000    ;; offset index
  52.175    mov  cx, #0x0100    ;; counter (256 interrupts)
  52.176 @@ -9857,8 +9883,10 @@ post_default_ints:
  52.177    call _tcpa_calling_int19h          /* specs: 8.2.3 step 1 */
  52.178    call _tcpa_add_event_separators    /* specs: 8.2.3 step 2 */
  52.179  #endif
  52.180 -  int  #0x19
  52.181 -  //JMP_EP(0x0064) ; INT 19h location
  52.182 +
  52.183 +  ;; Start the boot sequence.   See the comments in int19_relocated 
  52.184 +  ;; for why we use INT 18h instead of INT 19h here.
  52.185 +  int  #0x18
  52.186  
  52.187  #if BX_TCGBIOS
  52.188    call _tcpa_returned_int19h         /* specs: 8.2.3 step 3/7 */
    53.1 --- a/tools/guest-headers/Makefile	Mon Mar 05 12:49:12 2007 -0600
    53.2 +++ b/tools/guest-headers/Makefile	Thu Mar 08 14:39:52 2007 -0600
    53.3 @@ -13,7 +13,7 @@ check:
    53.4  
    53.5  install-Linux:
    53.6  	mkdir -p $(DESTDIR)/usr/include/xen/linux
    53.7 -	install -m0644 $(linuxsparsetree)/include/xen/public/*.h $(DESTDIR)/usr/include/xen/linux
    53.8 +	$(INSTALL_DATA) $(linuxsparsetree)/include/xen/public/*.h $(DESTDIR)/usr/include/xen/linux
    53.9  
   53.10  install-SunOS:
   53.11  
    54.1 --- a/tools/ioemu/Makefile	Mon Mar 05 12:49:12 2007 -0600
    54.2 +++ b/tools/ioemu/Makefile	Thu Mar 08 14:39:52 2007 -0600
    54.3 @@ -65,10 +65,10 @@ common  de-ch  es     fo  fr-ca  hu     
    54.4  
    54.5  install-doc: $(DOCS)
    54.6  	mkdir -p "$(DESTDIR)$(docdir)"
    54.7 -	$(INSTALL) -m 644 qemu-doc.html  qemu-tech.html "$(DESTDIR)$(docdir)"
    54.8 +	$(INSTALL_DATA) qemu-doc.html  qemu-tech.html "$(DESTDIR)$(docdir)"
    54.9  ifndef CONFIG_WIN32
   54.10  	mkdir -p "$(DESTDIR)$(mandir)/man1"
   54.11 -	$(INSTALL) qemu.1 qemu-img.1 "$(DESTDIR)$(mandir)/man1"
   54.12 +	$(INSTALL_DATA) qemu.1 qemu-img.1 "$(DESTDIR)$(mandir)/man1"
   54.13  endif
   54.14  
   54.15  install: all $(if $(BUILD_DOCS),install-doc)
   54.16 @@ -77,12 +77,12 @@ install: all $(if $(BUILD_DOCS),install-
   54.17  #	mkdir -p "$(DESTDIR)$(datadir)"
   54.18  #	for x in bios.bin vgabios.bin vgabios-cirrus.bin ppc_rom.bin \
   54.19  #			video.x openbios-sparc32 linux_boot.bin; do \
   54.20 -#		$(INSTALL) -m 644 $(SRC_PATH)/pc-bios/$$x "$(DESTDIR)$(datadir)"; \
   54.21 +#		$(INSTALL_DATA) $(SRC_PATH)/pc-bios/$$x "$(DESTDIR)$(datadir)"; \
   54.22  #	done
   54.23  ifndef CONFIG_WIN32
   54.24  	mkdir -p "$(DESTDIR)$(datadir)/keymaps"
   54.25  	for x in $(KEYMAPS); do \
   54.26 -		$(INSTALL) -m 644 $(SRC_PATH)/keymaps/$$x "$(DESTDIR)$(datadir)/keymaps"; \
   54.27 +		$(INSTALL_DATA) $(SRC_PATH)/keymaps/$$x "$(DESTDIR)$(datadir)/keymaps"; \
   54.28  	done
   54.29  endif
   54.30  	for d in $(TARGET_DIRS); do \
    55.1 --- a/tools/ioemu/hw/ide.c	Mon Mar 05 12:49:12 2007 -0600
    55.2 +++ b/tools/ioemu/hw/ide.c	Thu Mar 08 14:39:52 2007 -0600
    55.3 @@ -2602,6 +2602,120 @@ void pci_cmd646_ide_init(PCIBus *bus, Bl
    55.4  #endif /* DMA_MULTI_THREAD */
    55.5  }
    55.6  
    55.7 +static void pci_ide_save(QEMUFile* f, void *opaque)
    55.8 +{
    55.9 +    PCIIDEState *d = opaque;
   55.10 +    int i;
   55.11 +
   55.12 +    for(i = 0; i < 2; i++) {
   55.13 +        BMDMAState *bm = &d->bmdma[i];
   55.14 +        qemu_put_8s(f, &bm->cmd);
   55.15 +        qemu_put_8s(f, &bm->status);
   55.16 +        qemu_put_be32s(f, &bm->addr);
   55.17 +        /* XXX: if a transfer is pending, we do not save it yet */
   55.18 +    }
   55.19 +
   55.20 +    /* per IDE interface data */
   55.21 +    for(i = 0; i < 2; i++) {
   55.22 +        IDEState *s = &d->ide_if[i * 2];
   55.23 +        uint8_t drive1_selected;
   55.24 +        qemu_put_8s(f, &s->cmd);
   55.25 +        drive1_selected = (s->cur_drive != s);
   55.26 +        qemu_put_8s(f, &drive1_selected);
   55.27 +    }
   55.28 +
   55.29 +    /* per IDE drive data */
   55.30 +    for(i = 0; i < 4; i++) {
   55.31 +        IDEState *s = &d->ide_if[i];
   55.32 +        qemu_put_be32s(f, &s->mult_sectors);
   55.33 +        qemu_put_be32s(f, &s->identify_set);
   55.34 +        if (s->identify_set) {
   55.35 +            qemu_put_buffer(f, (const uint8_t *)s->identify_data, 512);
   55.36 +        }
   55.37 +        qemu_put_8s(f, &s->write_cache);
   55.38 +        qemu_put_8s(f, &s->feature);
   55.39 +        qemu_put_8s(f, &s->error);
   55.40 +        qemu_put_be32s(f, &s->nsector);
   55.41 +        qemu_put_8s(f, &s->sector);
   55.42 +        qemu_put_8s(f, &s->lcyl);
   55.43 +        qemu_put_8s(f, &s->hcyl);
   55.44 +        qemu_put_8s(f, &s->hob_feature);
   55.45 +        qemu_put_8s(f, &s->hob_nsector);
   55.46 +        qemu_put_8s(f, &s->hob_sector);
   55.47 +        qemu_put_8s(f, &s->hob_lcyl);
   55.48 +        qemu_put_8s(f, &s->hob_hcyl);
   55.49 +        qemu_put_8s(f, &s->select);
   55.50 +        qemu_put_8s(f, &s->status);
   55.51 +        qemu_put_8s(f, &s->lba48);
   55.52 +
   55.53 +        qemu_put_8s(f, &s->sense_key);
   55.54 +        qemu_put_8s(f, &s->asc);
   55.55 +        /* XXX: if a transfer is pending, we do not save it yet */
   55.56 +    }
   55.57 +}
   55.58 +
   55.59 +static int pci_ide_load(QEMUFile* f, void *opaque, int version_id)
   55.60 +{
   55.61 +    PCIIDEState *d = opaque;
   55.62 +    int ret, i;
   55.63 +
   55.64 +    if (version_id != 1)
   55.65 +        return -EINVAL;
   55.66 +
   55.67 +    for(i = 0; i < 2; i++) {
   55.68 +        BMDMAState *bm = &d->bmdma[i];
   55.69 +        qemu_get_8s(f, &bm->cmd);
   55.70 +        qemu_get_8s(f, &bm->status);
   55.71 +        qemu_get_be32s(f, &bm->addr);
   55.72 +        /* XXX: if a transfer is pending, we do not save it yet */
   55.73 +    }
   55.74 +
   55.75 +    /* per IDE interface data */
   55.76 +    for(i = 0; i < 2; i++) {
   55.77 +        IDEState *s = &d->ide_if[i * 2];
   55.78 +        uint8_t drive1_selected;
   55.79 +        qemu_get_8s(f, &s->cmd);
   55.80 +        qemu_get_8s(f, &drive1_selected);
   55.81 +        s->cur_drive = &d->ide_if[i * 2 + (drive1_selected != 0)];
   55.82 +    }
   55.83 +
   55.84 +    /* per IDE drive data */
   55.85 +    for(i = 0; i < 4; i++) {
   55.86 +        IDEState *s = &d->ide_if[i];
   55.87 +        qemu_get_be32s(f, &s->mult_sectors);
   55.88 +        qemu_get_be32s(f, &s->identify_set);
   55.89 +        if (s->identify_set) {
   55.90 +            qemu_get_buffer(f, (uint8_t *)s->identify_data, 512);
   55.91 +        }
   55.92 +        qemu_get_8s(f, &s->write_cache);
   55.93 +        qemu_get_8s(f, &s->feature);
   55.94 +        qemu_get_8s(f, &s->error);
   55.95 +        qemu_get_be32s(f, &s->nsector);
   55.96 +        qemu_get_8s(f, &s->sector);
   55.97 +        qemu_get_8s(f, &s->lcyl);
   55.98 +        qemu_get_8s(f, &s->hcyl);
   55.99 +        qemu_get_8s(f, &s->hob_feature);
  55.100 +        qemu_get_8s(f, &s->hob_nsector);
  55.101 +        qemu_get_8s(f, &s->hob_sector);
  55.102 +        qemu_get_8s(f, &s->hob_lcyl);
  55.103 +        qemu_get_8s(f, &s->hob_hcyl);
  55.104 +        qemu_get_8s(f, &s->select);
  55.105 +        qemu_get_8s(f, &s->status);
  55.106 +        qemu_get_8s(f, &s->lba48);
  55.107 +
  55.108 +        qemu_get_8s(f, &s->sense_key);
  55.109 +        qemu_get_8s(f, &s->asc);
  55.110 +        /* XXX: if a transfer is pending, we do not save it yet */
  55.111 +        if (s->status & (DRQ_STAT|BUSY_STAT)) {
  55.112 +            /* Tell the guest that its transfer has gone away */
  55.113 +            ide_abort_command(s);
  55.114 +            ide_set_irq(s);
  55.115 +        }
  55.116 +    }
  55.117 +    return 0;
  55.118 +}
  55.119 +
  55.120 +
  55.121  /* hd_table must contain 4 block drivers */
  55.122  /* NOTE: for the PIIX3, the IRQs and IOports are hardcoded */
  55.123  void pci_piix3_ide_init(PCIBus *bus, BlockDriverState **hd_table, int devfn)
  55.124 @@ -2643,6 +2757,7 @@ void pci_piix3_ide_init(PCIBus *bus, Blo
  55.125      buffered_pio_init();
  55.126  
  55.127      register_savevm("ide_pci", 0, 1, generic_pci_save, generic_pci_load, d);
  55.128 +    register_savevm("ide", 0, 1, pci_ide_save, pci_ide_load, d);
  55.129  
  55.130  #ifdef DMA_MULTI_THREAD    
  55.131      dma_create_thread();
    56.1 --- a/tools/ioemu/target-i386-dm/qemu-ifup	Mon Mar 05 12:49:12 2007 -0600
    56.2 +++ b/tools/ioemu/target-i386-dm/qemu-ifup	Thu Mar 08 14:39:52 2007 -0600
    56.3 @@ -3,8 +3,7 @@
    56.4  #. /etc/rc.d/init.d/functions
    56.5  #ulimit -c unlimited
    56.6  
    56.7 -echo -c 'config qemu network with xen bridge for '
    56.8 -echo $*
    56.9 +echo 'config qemu network with xen bridge for ' $*
   56.10  
   56.11  ifconfig $1 0.0.0.0 up
   56.12  brctl addif $2 $1
    57.1 --- a/tools/ioemu/vl.c	Mon Mar 05 12:49:12 2007 -0600
    57.2 +++ b/tools/ioemu/vl.c	Thu Mar 08 14:39:52 2007 -0600
    57.3 @@ -3250,6 +3250,14 @@ static int net_tap_init(VLANState *vlan,
    57.4          pid = fork();
    57.5          if (pid >= 0) {
    57.6              if (pid == 0) {
    57.7 +                int open_max = sysconf(_SC_OPEN_MAX), i;
    57.8 +                for (i = 0; i < open_max; i++)
    57.9 +                    if (i != STDIN_FILENO &&
   57.10 +                        i != STDOUT_FILENO &&
   57.11 +                        i != STDERR_FILENO &&
   57.12 +                        i != fd)
   57.13 +                        close(i);
   57.14 +
   57.15                  parg = args;
   57.16                  *parg++ = (char *)setup_script;
   57.17                  *parg++ = ifname;
    58.1 --- a/tools/ioemu/vnc.c	Mon Mar 05 12:49:12 2007 -0600
    58.2 +++ b/tools/ioemu/vnc.c	Thu Mar 08 14:39:52 2007 -0600
    58.3 @@ -1445,7 +1445,7 @@ int vnc_display_init(DisplayState *ds, i
    58.4  
    58.5  int vnc_start_viewer(int port)
    58.6  {
    58.7 -    int pid;
    58.8 +    int pid, i, open_max;
    58.9      char s[16];
   58.10  
   58.11      sprintf(s, ":%d", port);
   58.12 @@ -1456,6 +1456,12 @@ int vnc_start_viewer(int port)
   58.13  	exit(1);
   58.14  
   58.15      case 0:	/* child */
   58.16 +	open_max = sysconf(_SC_OPEN_MAX);
   58.17 +	for (i = 0; i < open_max; i++)
   58.18 +	    if (i != STDIN_FILENO &&
   58.19 +		i != STDOUT_FILENO &&
   58.20 +		i != STDERR_FILENO)
   58.21 +		close(i);
   58.22  	execlp("vncviewer", "vncviewer", s, NULL);
   58.23  	fprintf(stderr, "vncviewer execlp failed\n");
   58.24  	exit(1);
    59.1 --- a/tools/libxc/xc_core.c	Mon Mar 05 12:49:12 2007 -0600
    59.2 +++ b/tools/libxc/xc_core.c	Thu Mar 08 14:39:52 2007 -0600
    59.3 @@ -153,7 +153,7 @@ struct xc_core_section_headers {
    59.4      uint16_t    num;
    59.5      uint16_t    num_max;
    59.6  
    59.7 -    Elf_Shdr   *shdrs;
    59.8 +    Elf64_Shdr  *shdrs;
    59.9  };
   59.10  #define SHDR_INIT       16
   59.11  #define SHDR_INC        4
   59.12 @@ -184,14 +184,14 @@ xc_core_shdr_free(struct xc_core_section
   59.13      free(sheaders);
   59.14  }
   59.15  
   59.16 -Elf_Shdr*
   59.17 +Elf64_Shdr*
   59.18  xc_core_shdr_get(struct xc_core_section_headers *sheaders)
   59.19  {
   59.20 -    Elf_Shdr *shdr;
   59.21 +    Elf64_Shdr *shdr;
   59.22  
   59.23      if ( sheaders->num == sheaders->num_max )
   59.24      {
   59.25 -        Elf_Shdr *shdrs;
   59.26 +        Elf64_Shdr *shdrs;
   59.27          if ( sheaders->num_max + SHDR_INC < sheaders->num_max )
   59.28          {
   59.29              errno = E2BIG;
   59.30 @@ -212,7 +212,7 @@ xc_core_shdr_get(struct xc_core_section_
   59.31  }
   59.32  
   59.33  int
   59.34 -xc_core_shdr_set(Elf_Shdr *shdr,
   59.35 +xc_core_shdr_set(Elf64_Shdr *shdr,
   59.36                   struct xc_core_strtab *strtab,
   59.37                   const char *name, uint32_t type,
   59.38                   uint64_t offset, uint64_t size,
   59.39 @@ -317,15 +317,15 @@ xc_domain_dumpcore_via_callback(int xc_h
   59.40  
   59.41      uint64_t *pfn_array = NULL;
   59.42  
   59.43 -    Elf_Ehdr ehdr;
   59.44 -    unsigned long filesz;
   59.45 -    unsigned long offset;
   59.46 -    unsigned long fixup;
   59.47 +    Elf64_Ehdr ehdr;
   59.48 +    uint64_t filesz;
   59.49 +    uint64_t offset;
   59.50 +    uint64_t fixup;
   59.51  
   59.52      struct xc_core_strtab *strtab = NULL;
   59.53      uint16_t strtab_idx;
   59.54      struct xc_core_section_headers *sheaders = NULL;
   59.55 -    Elf_Shdr *shdr;
   59.56 +    Elf64_Shdr *shdr;
   59.57  
   59.58      /* elf notes */
   59.59      struct elfnote elfnote;
   59.60 @@ -460,7 +460,7 @@ xc_domain_dumpcore_via_callback(int xc_h
   59.61      ehdr.e_ident[EI_MAG1] = ELFMAG1;
   59.62      ehdr.e_ident[EI_MAG2] = ELFMAG2;
   59.63      ehdr.e_ident[EI_MAG3] = ELFMAG3;
   59.64 -    ehdr.e_ident[EI_CLASS] = ELFCLASS;
   59.65 +    ehdr.e_ident[EI_CLASS] = ELFCLASS64;
   59.66      ehdr.e_ident[EI_DATA] = ELF_ARCH_DATA;
   59.67      ehdr.e_ident[EI_VERSION] = EV_CURRENT;
   59.68      ehdr.e_ident[EI_OSABI] = ELFOSABI_SYSV;
   59.69 @@ -474,9 +474,9 @@ xc_domain_dumpcore_via_callback(int xc_h
   59.70      ehdr.e_shoff = sizeof(ehdr);
   59.71      ehdr.e_flags = ELF_CORE_EFLAGS;
   59.72      ehdr.e_ehsize = sizeof(ehdr);
   59.73 -    ehdr.e_phentsize = sizeof(Elf_Phdr);
   59.74 +    ehdr.e_phentsize = sizeof(Elf64_Phdr);
   59.75      ehdr.e_phnum = 0;
   59.76 -    ehdr.e_shentsize = sizeof(Elf_Shdr);
   59.77 +    ehdr.e_shentsize = sizeof(Elf64_Shdr);
   59.78      /* ehdr.e_shnum and ehdr.e_shstrndx aren't known here yet. fill it later*/
   59.79  
   59.80      /* create section header */
    60.1 --- a/tools/libxc/xc_core.h	Mon Mar 05 12:49:12 2007 -0600
    60.2 +++ b/tools/libxc/xc_core.h	Thu Mar 08 14:39:52 2007 -0600
    60.3 @@ -116,10 +116,10 @@ struct xen_dumpcore_p2m {
    60.4  struct xc_core_strtab;
    60.5  struct xc_core_section_headers;
    60.6  
    60.7 -Elf_Shdr*
    60.8 +Elf64_Shdr*
    60.9  xc_core_shdr_get(struct xc_core_section_headers *sheaders);
   60.10  int
   60.11 -xc_core_shdr_set(Elf_Shdr *shdr,
   60.12 +xc_core_shdr_set(Elf64_Shdr *shdr,
   60.13                   struct xc_core_strtab *strtab,
   60.14                   const char *name, uint32_t type,
   60.15                   uint64_t offset, uint64_t size,
    61.1 --- a/tools/libxc/xc_core_ia64.c	Mon Mar 05 12:49:12 2007 -0600
    61.2 +++ b/tools/libxc/xc_core_ia64.c	Thu Mar 08 14:39:52 2007 -0600
    61.3 @@ -266,10 +266,10 @@ int
    61.4  xc_core_arch_context_get_shdr(struct xc_core_arch_context *arch_ctxt, 
    61.5                                struct xc_core_section_headers *sheaders,
    61.6                                struct xc_core_strtab *strtab,
    61.7 -                              unsigned long *filesz, unsigned long offset)
    61.8 +                              uint64_t *filesz, uint64_t offset)
    61.9  {
   61.10      int sts = -1;
   61.11 -    Elf_Shdr *shdr;
   61.12 +    Elf64_Shdr *shdr;
   61.13  
   61.14      /* mmapped priv regs */
   61.15      shdr = xc_core_shdr_get(sheaders);
    62.1 --- a/tools/libxc/xc_core_ia64.h	Mon Mar 05 12:49:12 2007 -0600
    62.2 +++ b/tools/libxc/xc_core_ia64.h	Thu Mar 08 14:39:52 2007 -0600
    62.3 @@ -42,7 +42,7 @@ int
    62.4  xc_core_arch_context_get_shdr(struct xc_core_arch_context* arch_ctxt, 
    62.5                                struct xc_core_section_headers *sheaders,
    62.6                                struct xc_core_strtab *strtab,
    62.7 -                              unsigned long *filesz, unsigned long offset);
    62.8 +                              uint64_t *filesz, uint64_t offset);
    62.9  int
   62.10  xc_core_arch_context_dump(struct xc_core_arch_context* arch_ctxt,
   62.11                            void* args, dumpcore_rtn_t dump_rtn);
    63.1 --- a/tools/libxc/xc_core_x86.h	Mon Mar 05 12:49:12 2007 -0600
    63.2 +++ b/tools/libxc/xc_core_x86.h	Thu Mar 08 14:39:52 2007 -0600
    63.3 @@ -45,7 +45,7 @@ static inline int
    63.4  xc_core_arch_context_get_shdr(struct xc_core_arch_context *arch_ctxt, 
    63.5                                struct xc_core_section_headers *sheaders,
    63.6                                struct xc_core_strtab *strtab,
    63.7 -                              unsigned long *filesz, unsigned long offset)
    63.8 +                              uint64_t *filesz, uint64_t offset)
    63.9  {
   63.10      *filesz = 0;
   63.11      return 0;
    64.1 --- a/tools/libxc/xc_dom_core.c	Mon Mar 05 12:49:12 2007 -0600
    64.2 +++ b/tools/libxc/xc_dom_core.c	Thu Mar 08 14:39:52 2007 -0600
    64.3 @@ -721,9 +721,6 @@ int xc_dom_build_image(struct xc_dom_ima
    64.4      }
    64.5      page_size = XC_DOM_PAGE_SIZE(dom);
    64.6  
    64.7 -    /* 4MB align virtual base address */
    64.8 -    dom->parms.virt_base &= ~(((uint64_t)1<<22)-1);
    64.9 -
   64.10      /* load kernel */
   64.11      if ( xc_dom_alloc_segment(dom, &dom->kernel_seg, "kernel",
   64.12                                dom->kernel_seg.vstart,
    65.1 --- a/tools/libxc/xc_linux_restore.c	Mon Mar 05 12:49:12 2007 -0600
    65.2 +++ b/tools/libxc/xc_linux_restore.c	Thu Mar 08 14:39:52 2007 -0600
    65.3 @@ -19,7 +19,7 @@ static unsigned long max_mfn;
    65.4  /* virtual starting address of the hypervisor */
    65.5  static unsigned long hvirt_start;
    65.6  
    65.7 -/* #levels of page tables used by the currrent guest */
    65.8 +/* #levels of page tables used by the current guest */
    65.9  static unsigned int pt_levels;
   65.10  
   65.11  /* total number of pages used by the current guest */
   65.12 @@ -857,6 +857,28 @@ int xc_linux_restore(int xc_handle, int 
   65.13  
   65.14          ctxt.ctrlreg[3] = xen_pfn_to_cr3(p2m[pfn]);
   65.15  
   65.16 +        /* Guest pagetable (x86/64) stored in otherwise-unused CR1. */
   65.17 +        if ( (pt_levels == 4) && ctxt.ctrlreg[1] )
   65.18 +        {
   65.19 +            pfn = xen_cr3_to_pfn(ctxt.ctrlreg[1]);
   65.20 +
   65.21 +            if (pfn >= max_pfn) {
   65.22 +                ERROR("User PT base is bad: pfn=%lu max_pfn=%lu type=%08lx",
   65.23 +                      pfn, max_pfn, pfn_type[pfn]);
   65.24 +                goto out;
   65.25 +            }
   65.26 +
   65.27 +            if ( (pfn_type[pfn] & XEN_DOMCTL_PFINFO_LTABTYPE_MASK) !=
   65.28 +                 ((unsigned long)pt_levels<<XEN_DOMCTL_PFINFO_LTAB_SHIFT) ) {
   65.29 +                ERROR("User PT base is bad. pfn=%lu nr=%lu type=%08lx %08lx",
   65.30 +                      pfn, max_pfn, pfn_type[pfn],
   65.31 +                      (unsigned long)pt_levels<<XEN_DOMCTL_PFINFO_LTAB_SHIFT);
   65.32 +                goto out;
   65.33 +            }
   65.34 +
   65.35 +            ctxt.ctrlreg[1] = xen_pfn_to_cr3(p2m[pfn]);
   65.36 +        }
   65.37 +
   65.38          domctl.cmd = XEN_DOMCTL_setvcpucontext;
   65.39          domctl.domain = (domid_t)dom;
   65.40          domctl.u.vcpucontext.vcpu = i;
    66.1 --- a/tools/libxc/xc_linux_save.c	Mon Mar 05 12:49:12 2007 -0600
    66.2 +++ b/tools/libxc/xc_linux_save.c	Thu Mar 08 14:39:52 2007 -0600
    66.3 @@ -34,7 +34,7 @@ static unsigned long max_mfn;
    66.4  /* virtual starting address of the hypervisor */
    66.5  static unsigned long hvirt_start;
    66.6  
    66.7 -/* #levels of page tables used by the currrent guest */
    66.8 +/* #levels of page tables used by the current guest */
    66.9  static unsigned int pt_levels;
   66.10  
   66.11  /* total number of pages used by the current guest */
   66.12 @@ -491,7 +491,7 @@ static int canonicalize_pagetable(unsign
   66.13      ** reserved hypervisor mappings. This depends on the current
   66.14      ** page table type as well as the number of paging levels.
   66.15      */
   66.16 -    xen_start = xen_end = pte_last = PAGE_SIZE / ((pt_levels == 2)? 4 : 8);
   66.17 +    xen_start = xen_end = pte_last = PAGE_SIZE / ((pt_levels == 2) ? 4 : 8);
   66.18  
   66.19      if (pt_levels == 2 && type == XEN_DOMCTL_PFINFO_L2TAB)
   66.20          xen_start = (hvirt_start >> L2_PAGETABLE_SHIFT);
   66.21 @@ -1279,6 +1279,18 @@ int xc_linux_save(int xc_handle, int io_
   66.22          ctxt.ctrlreg[3] = 
   66.23              xen_pfn_to_cr3(mfn_to_pfn(xen_cr3_to_pfn(ctxt.ctrlreg[3])));
   66.24  
   66.25 +        /* Guest pagetable (x86/64) stored in otherwise-unused CR1. */
   66.26 +        if ( (pt_levels == 4) && ctxt.ctrlreg[1] )
   66.27 +        {
   66.28 +            if ( !MFN_IS_IN_PSEUDOPHYS_MAP(xen_cr3_to_pfn(ctxt.ctrlreg[1])) ) {
   66.29 +                ERROR("PT base is not in range of pseudophys map");
   66.30 +                goto out;
   66.31 +            }
   66.32 +            /* Least-significant bit means 'valid PFN'. */
   66.33 +            ctxt.ctrlreg[1] = 1 |
   66.34 +                xen_pfn_to_cr3(mfn_to_pfn(xen_cr3_to_pfn(ctxt.ctrlreg[1])));
   66.35 +        }
   66.36 +
   66.37          if (!write_exact(io_fd, &ctxt, sizeof(ctxt))) {
   66.38              ERROR("Error when writing to state file (1) (errno %d)", errno);
   66.39              goto out;
    67.1 --- a/tools/libxc/xc_ptrace_core.c	Mon Mar 05 12:49:12 2007 -0600
    67.2 +++ b/tools/libxc/xc_ptrace_core.c	Thu Mar 08 14:39:52 2007 -0600
    67.3 @@ -192,7 +192,7 @@ pread_exact(int fd, void* buffer, size_t
    67.4  struct elf_core
    67.5  {
    67.6      int         domfd;
    67.7 -    Elf_Ehdr    ehdr;
    67.8 +    Elf64_Ehdr  ehdr;
    67.9  
   67.10      char*       shdr;
   67.11      
   67.12 @@ -242,6 +242,8 @@ elf_core_init(struct elf_core* ecore, in
   67.13      /* check elf header */
   67.14      if (!IS_ELF(ecore->ehdr) || ecore->ehdr.e_type != ET_CORE)
   67.15          goto out;
   67.16 +    if (ecore->ehdr.e_ident[EI_CLASS] != ELFCLASS64)
   67.17 +        goto out;
   67.18      /* check elf header more: EI_DATA, EI_VERSION, e_machine... */
   67.19  
   67.20      /* read section headers */
   67.21 @@ -294,7 +296,7 @@ elf_core_search_note(struct elf_core* ec
   67.22  }
   67.23  
   67.24  static int
   67.25 -elf_core_alloc_read_sec(struct elf_core* ecore, const Elf_Shdr* shdr,
   67.26 +elf_core_alloc_read_sec(struct elf_core* ecore, const Elf64_Shdr* shdr,
   67.27                          char** buf)
   67.28  {
   67.29      int ret;
   67.30 @@ -309,19 +311,19 @@ elf_core_alloc_read_sec(struct elf_core*
   67.31      return ret;
   67.32  }
   67.33  
   67.34 -static Elf_Shdr*
   67.35 +static Elf64_Shdr*
   67.36  elf_core_shdr_by_index(struct elf_core* ecore, uint16_t index)
   67.37  {
   67.38      if (index >= ecore->ehdr.e_shnum)
   67.39          return NULL;
   67.40 -    return (Elf_Shdr*)(ecore->shdr + ecore->ehdr.e_shentsize * index);
   67.41 +    return (Elf64_Shdr*)(ecore->shdr + ecore->ehdr.e_shentsize * index);
   67.42  }
   67.43  
   67.44  static int
   67.45  elf_core_alloc_read_sec_by_index(struct elf_core* ecore, uint16_t index,
   67.46                                   char** buf, uint64_t* size)
   67.47  {
   67.48 -    Elf_Shdr* shdr = elf_core_shdr_by_index(ecore, index);
   67.49 +    Elf64_Shdr* shdr = elf_core_shdr_by_index(ecore, index);
   67.50      if (shdr == NULL)
   67.51          return -1;
   67.52      if (size != NULL)
   67.53 @@ -329,14 +331,14 @@ elf_core_alloc_read_sec_by_index(struct 
   67.54      return elf_core_alloc_read_sec(ecore, shdr, buf);
   67.55  }
   67.56  
   67.57 -static Elf_Shdr*
   67.58 +static Elf64_Shdr*
   67.59  elf_core_shdr_by_name(struct elf_core* ecore, const char* name)
   67.60  {
   67.61      const char* s;
   67.62      for (s = ecore->shdr;
   67.63           s < ecore->shdr + ecore->ehdr.e_shentsize * ecore->ehdr.e_shnum;
   67.64           s += ecore->ehdr.e_shentsize) {
   67.65 -        Elf_Shdr* shdr = (Elf_Shdr*)s;
   67.66 +        Elf64_Shdr* shdr = (Elf64_Shdr*)s;
   67.67  
   67.68          if (strncmp(ecore->shstrtab + shdr->sh_name, name, strlen(name)) == 0)
   67.69              return shdr;
   67.70 @@ -348,7 +350,7 @@ elf_core_shdr_by_name(struct elf_core* e
   67.71  static int
   67.72  elf_core_read_sec_by_name(struct elf_core* ecore, const char* name, char* buf)
   67.73  {
   67.74 -    Elf_Shdr* shdr = elf_core_shdr_by_name(ecore, name);
   67.75 +    Elf64_Shdr* shdr = elf_core_shdr_by_name(ecore, name);
   67.76      return pread_exact(ecore->domfd, buf, shdr->sh_size, shdr->sh_offset);
   67.77      
   67.78  }
   67.79 @@ -357,7 +359,7 @@ static int
   67.80  elf_core_alloc_read_sec_by_name(struct elf_core* ecore, const char* name,
   67.81                                  char** buf, uint64_t* size)
   67.82  {
   67.83 -    Elf_Shdr* shdr = elf_core_shdr_by_name(ecore, name);
   67.84 +    Elf64_Shdr* shdr = elf_core_shdr_by_name(ecore, name);
   67.85      if (shdr == NULL)
   67.86          return -1;
   67.87      if (size != NULL)
   67.88 @@ -508,8 +510,8 @@ xc_waitdomain_core_elf(
   67.89      struct xen_dumpcore_elfnote_xen_version *xen_version;
   67.90      struct xen_dumpcore_elfnote_format_version *format_version;
   67.91  
   67.92 -    Elf_Shdr* table_shdr;
   67.93 -    Elf_Shdr* pages_shdr;
   67.94 +    Elf64_Shdr* table_shdr;
   67.95 +    Elf64_Shdr* pages_shdr;
   67.96  
   67.97      if (elf_core_init(&ecore, domfd) < 0)
   67.98          goto out;
    68.1 --- a/tools/libxen/Makefile	Mon Mar 05 12:49:12 2007 -0600
    68.2 +++ b/tools/libxen/Makefile	Thu Mar 08 14:39:52 2007 -0600
    68.3 @@ -57,8 +57,8 @@ test/test_hvm_bindings: test/test_hvm_bi
    68.4  
    68.5  .PHONY: install
    68.6  install: all
    68.7 -	$(INSTALL_DIR) -p $(DESTDIR)/usr/include/xen/api
    68.8 -	$(INSTALL_DIR) -p $(DESTDIR)/usr/$(LIBDIR)
    68.9 +	$(INSTALL_DIR) $(DESTDIR)/usr/include/xen/api
   68.10 +	$(INSTALL_DIR) $(DESTDIR)/usr/$(LIBDIR)
   68.11  	$(INSTALL_PROG) libxenapi.so.$(MAJOR).$(MINOR) $(DESTDIR)/usr/$(LIBDIR)
   68.12  	ln -sf libxenapi.so.$(MAJOR).$(MINOR) $(DESTDIR)/usr/$(LIBDIR)/libxenapi.so.$(MAJOR)
   68.13  	ln -sf libxenapi.so.$(MAJOR) $(DESTDIR)/usr/$(LIBDIR)/libxenapi.so
    69.1 --- a/tools/ptsname/Makefile	Mon Mar 05 12:49:12 2007 -0600
    69.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    69.3 @@ -1,22 +0,0 @@
    69.4 -
    69.5 -XEN_ROOT = ../..
    69.6 -include $(XEN_ROOT)/tools/Rules.mk
    69.7 -
    69.8 -.PHONY: all
    69.9 -all: build
   69.10 -.PHONY: build
   69.11 -build:
   69.12 -	CC="$(CC)" CFLAGS="$(CFLAGS)" python setup.py build
   69.13 -
   69.14 -.PHONY: install
   69.15 -ifndef XEN_PYTHON_NATIVE_INSTALL
   69.16 -install: all
   69.17 -	CC="$(CC)" CFLAGS="$(CFLAGS)" python setup.py install --home="$(DESTDIR)/usr" --prefix=""
   69.18 -else
   69.19 -install: all
   69.20 -	CC="$(CC)" CFLAGS="$(CFLAGS)" python setup.py install --root="$(DESTDIR)"
   69.21 -endif
   69.22 -
   69.23 -.PHONY: clean
   69.24 -clean:
   69.25 -	rm -rf build tmp *.pyc *.pyo *.o *.a *~ a.out
    70.1 --- a/tools/ptsname/ptsname.c	Mon Mar 05 12:49:12 2007 -0600
    70.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    70.3 @@ -1,44 +0,0 @@
    70.4 -/******************************************************************************
    70.5 - * ptsname.c
    70.6 - * 
    70.7 - * A python extension to expose the POSIX ptsname() function.
    70.8 - * 
    70.9 - * Copyright (C) 2007 XenSource Ltd
   70.10 - */
   70.11 -
   70.12 -#include <Python.h>
   70.13 -#include <stdlib.h>
   70.14 -
   70.15 -/* Needed for Python versions earlier than 2.3. */
   70.16 -#ifndef PyMODINIT_FUNC
   70.17 -#define PyMODINIT_FUNC DL_EXPORT(void)
   70.18 -#endif
   70.19 -
   70.20 -static PyObject *do_ptsname(PyObject *self, PyObject *args)
   70.21 -{
   70.22 -    int fd;
   70.23 -    char *path;
   70.24 -
   70.25 -    if (!PyArg_ParseTuple(args, "i", &fd))
   70.26 -        return NULL;
   70.27 -
   70.28 -    path = ptsname(fd);
   70.29 -
   70.30 -    if (!path)
   70.31 -    {
   70.32 -        PyErr_SetFromErrno(PyExc_IOError);
   70.33 -        return NULL;
   70.34 -    } 
   70.35 -
   70.36 -    return PyString_FromString(path);
   70.37 -}
   70.38 -
   70.39 -static PyMethodDef ptsname_methods[] = { 
   70.40 -    { "ptsname", do_ptsname, METH_VARARGS }, 
   70.41 -    { NULL }
   70.42 -};
   70.43 -
   70.44 -PyMODINIT_FUNC initptsname(void)
   70.45 -{
   70.46 -    Py_InitModule("ptsname", ptsname_methods);
   70.47 -}
    71.1 --- a/tools/ptsname/setup.py	Mon Mar 05 12:49:12 2007 -0600
    71.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    71.3 @@ -1,11 +0,0 @@
    71.4 -from distutils.core import setup, Extension
    71.5 -
    71.6 -extra_compile_args  = [ "-fno-strict-aliasing", "-Werror" ]
    71.7 -
    71.8 -setup(name         = 'ptsname',
    71.9 -      version      = '1.0',
   71.10 -      description  = 'POSIX ptsname() function',
   71.11 -      author       = 'Tim Deegan',
   71.12 -      author_email = 'Tim.Deegan@xensource.com',
   71.13 -      license      = 'GPL',
   71.14 -      ext_modules  = [ Extension("ptsname", [ "ptsname.c" ]) ])
    72.1 --- a/tools/pygrub/Makefile	Mon Mar 05 12:49:12 2007 -0600
    72.2 +++ b/tools/pygrub/Makefile	Thu Mar 08 14:39:52 2007 -0600
    72.3 @@ -10,13 +10,14 @@ build:
    72.4  
    72.5  .PHONY: install
    72.6  ifndef XEN_PYTHON_NATIVE_INSTALL
    72.7 +install: LIBPATH=$(shell PYTHONPATH=../python/xen/util python -c "import auxbin; print auxbin.libpath()")
    72.8  install: all
    72.9 -	CC="$(CC)" CFLAGS="$(CFLAGS)" python setup.py install --home="$(DESTDIR)/usr" --prefix=""
   72.10 -	$(INSTALL_DIR) -p $(DESTDIR)/var/run/xend/boot
   72.11 +	CC="$(CC)" CFLAGS="$(CFLAGS)" python setup.py install --home="$(DESTDIR)/usr" --prefix="" --install-lib="$(DESTDIR)$(LIBPATH)/python"
   72.12 +	$(INSTALL_DIR) $(DESTDIR)/var/run/xend/boot
   72.13  else
   72.14  install: all
   72.15  	CC="$(CC)" CFLAGS="$(CFLAGS)" python setup.py install --root="$(DESTDIR)"
   72.16 -	$(INSTALL_DIR) -p $(DESTDIR)/var/run/xend/boot
   72.17 +	$(INSTALL_DIR) $(DESTDIR)/var/run/xend/boot
   72.18  endif
   72.19  
   72.20  .PHONY: clean
    73.1 --- a/tools/python/Makefile	Mon Mar 05 12:49:12 2007 -0600
    73.2 +++ b/tools/python/Makefile	Thu Mar 08 14:39:52 2007 -0600
    73.3 @@ -18,8 +18,9 @@ build:
    73.4  
    73.5  .PHONY: install
    73.6  ifndef XEN_PYTHON_NATIVE_INSTALL
    73.7 +install: LIBPATH=$(shell PYTHONPATH=xen/util python -c "import auxbin; print auxbin.libpath()")
    73.8  install: install-messages
    73.9 -	CC="$(CC)" CFLAGS="$(CFLAGS)" python setup.py install --home="$(DESTDIR)/usr" --prefix="" --force
   73.10 +	CC="$(CC)" CFLAGS="$(CFLAGS)" python setup.py install --home="$(DESTDIR)/usr" --prefix="" --force --install-lib="$(DESTDIR)$(LIBPATH)/python"
   73.11  else
   73.12  install: install-messages
   73.13  	CC="$(CC)" CFLAGS="$(CFLAGS)" python setup.py install --root="$(DESTDIR)" --force
    74.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    74.2 +++ b/tools/python/ptsname/ptsname.c	Thu Mar 08 14:39:52 2007 -0600
    74.3 @@ -0,0 +1,44 @@
    74.4 +/******************************************************************************
    74.5 + * ptsname.c
    74.6 + * 
    74.7 + * A python extension to expose the POSIX ptsname() function.
    74.8 + * 
    74.9 + * Copyright (C) 2007 XenSource Ltd
   74.10 + */
   74.11 +
   74.12 +#include <Python.h>
   74.13 +#include <stdlib.h>
   74.14 +
   74.15 +/* Needed for Python versions earlier than 2.3. */
   74.16 +#ifndef PyMODINIT_FUNC
   74.17 +#define PyMODINIT_FUNC DL_EXPORT(void)
   74.18 +#endif
   74.19 +
   74.20 +static PyObject *do_ptsname(PyObject *self, PyObject *args)
   74.21 +{
   74.22 +    int fd;
   74.23 +    char *path;
   74.24 +
   74.25 +    if (!PyArg_ParseTuple(args, "i", &fd))
   74.26 +        return NULL;
   74.27 +
   74.28 +    path = ptsname(fd);
   74.29 +
   74.30 +    if (!path)
   74.31 +    {
   74.32 +        PyErr_SetFromErrno(PyExc_IOError);
   74.33 +        return NULL;
   74.34 +    } 
   74.35 +
   74.36 +    return PyString_FromString(path);
   74.37 +}
   74.38 +
   74.39 +static PyMethodDef ptsname_methods[] = { 
   74.40 +    { "ptsname", do_ptsname, METH_VARARGS }, 
   74.41 +    { NULL }
   74.42 +};
   74.43 +
   74.44 +PyMODINIT_FUNC initptsname(void)
   74.45 +{
   74.46 +    Py_InitModule("ptsname", ptsname_methods);
   74.47 +}
    75.1 --- a/tools/python/setup.py	Mon Mar 05 12:49:12 2007 -0600
    75.2 +++ b/tools/python/setup.py	Thu Mar 08 14:39:52 2007 -0600
    75.3 @@ -44,7 +44,14 @@ acm = Extension("acm",
    75.4                 libraries          = libraries,
    75.5                 sources            = [ "xen/lowlevel/acm/acm.c" ])
    75.6  
    75.7 -modules = [ xc, xs, acm ]
    75.8 +ptsname = Extension("ptsname",
    75.9 +               extra_compile_args = extra_compile_args,
   75.10 +               include_dirs       = include_dirs + [ "ptsname" ],
   75.11 +               library_dirs       = library_dirs,
   75.12 +               libraries          = libraries,
   75.13 +               sources            = [ "ptsname/ptsname.c" ])
   75.14 +
   75.15 +modules = [ xc, xs, acm, ptsname ]
   75.16  if os.uname()[0] == 'SunOS':
   75.17      modules.append(scf)
   75.18  
    76.1 --- a/tools/python/xen/xend/XendBootloader.py	Mon Mar 05 12:49:12 2007 -0600
    76.2 +++ b/tools/python/xen/xend/XendBootloader.py	Thu Mar 08 14:39:52 2007 -0600
    76.3 @@ -21,7 +21,8 @@ from xen.util import mkdir
    76.4  from XendLogging import log
    76.5  from XendError import VmError
    76.6  
    76.7 -import pty, ptsname, termios, fcntl
    76.8 +import pty, termios, fcntl
    76.9 +from xen.lowlevel import ptsname
   76.10  
   76.11  def bootloader(blexec, disk, dom, quiet = False, blargs = '', kernel = '',
   76.12                 ramdisk = '', kernel_args = ''):
    77.1 --- a/tools/python/xen/xend/XendDomainInfo.py	Mon Mar 05 12:49:12 2007 -0600
    77.2 +++ b/tools/python/xen/xend/XendDomainInfo.py	Thu Mar 08 14:39:52 2007 -0600
    77.3 @@ -781,7 +781,6 @@ class XendDomainInfo:
    77.4              'name':               self.info['name_label'],
    77.5              'console/limit':      str(xoptions.get_console_limit() * 1024),
    77.6              'memory/target':      str(self.info['memory_static_min'] * 1024),
    77.7 -            'control/platform-feature-multiprocessor-suspend': str(1)
    77.8              }
    77.9  
   77.10          def f(n, v):
   77.11 @@ -796,6 +795,9 @@ class XendDomainInfo:
   77.12          f('store/port',       self.store_port)
   77.13          f('store/ring-ref',   self.store_mfn)
   77.14  
   77.15 +        if arch.type == "x86":
   77.16 +            f('control/platform-feature-multiprocessor-suspend', True)
   77.17 +
   77.18          # elfnotes
   77.19          for n, v in self.info.get_notes().iteritems():
   77.20              n = n.lower().replace('_', '-')
   77.21 @@ -1503,7 +1505,7 @@ class XendDomainInfo:
   77.22              self.info['start_time'] = time.time()
   77.23  
   77.24              self._stateSet(DOM_STATE_RUNNING)
   77.25 -        except RuntimeError, exn:
   77.26 +        except (RuntimeError, VmError), exn:
   77.27              log.exception("XendDomainInfo.initDomain: exception occurred")
   77.28              self.image.cleanupBootloading()
   77.29              raise VmError(str(exn))
   77.30 @@ -2090,26 +2092,26 @@ class XendDomainInfo:
   77.31          return self.info.get('tools_version', {})
   77.32      
   77.33      def get_on_shutdown(self):
   77.34 -        after_shutdown = self.info.get('action_after_shutdown')
   77.35 +        after_shutdown = self.info.get('actions_after_shutdown')
   77.36          if not after_shutdown or after_shutdown not in XEN_API_ON_NORMAL_EXIT:
   77.37              return XEN_API_ON_NORMAL_EXIT[-1]
   77.38          return after_shutdown
   77.39  
   77.40      def get_on_reboot(self):
   77.41 -        after_reboot = self.info.get('action_after_reboot')
   77.42 +        after_reboot = self.info.get('actions_after_reboot')
   77.43          if not after_reboot or after_reboot not in XEN_API_ON_NORMAL_EXIT:
   77.44              return XEN_API_ON_NORMAL_EXIT[-1]
   77.45          return after_reboot
   77.46  
   77.47      def get_on_suspend(self):
   77.48          # TODO: not supported        
   77.49 -        after_suspend = self.info.get('action_after_suspend') 
   77.50 +        after_suspend = self.info.get('actions_after_suspend') 
   77.51          if not after_suspend or after_suspend not in XEN_API_ON_NORMAL_EXIT:
   77.52              return XEN_API_ON_NORMAL_EXIT[-1]
   77.53          return after_suspend        
   77.54  
   77.55      def get_on_crash(self):
   77.56 -        after_crash = self.info.get('action_after_crash')
   77.57 +        after_crash = self.info.get('actions_after_crash')
   77.58          if not after_crash or after_crash not in XEN_API_ON_CRASH_BEHAVIOUR:
   77.59              return XEN_API_ON_CRASH_BEHAVIOUR[0]
   77.60          return after_crash
    78.1 --- a/tools/python/xen/xend/XendNode.py	Mon Mar 05 12:49:12 2007 -0600
    78.2 +++ b/tools/python/xen/xend/XendNode.py	Thu Mar 08 14:39:52 2007 -0600
    78.3 @@ -22,7 +22,7 @@ import xen.lowlevel.xc
    78.4  
    78.5  from xen.util import Brctl
    78.6  
    78.7 -from xen.xend import uuid
    78.8 +from xen.xend import uuid, arch
    78.9  from xen.xend.XendError import *
   78.10  from xen.xend.XendOptions import instance as xendoptions
   78.11  from xen.xend.XendQCoWStorageRepo import XendQCoWStorageRepo
   78.12 @@ -97,17 +97,38 @@ class XendNode:
   78.13          for u in self.cpus.keys():
   78.14              log.error(self.cpus[u])
   78.15              number = self.cpus[u]['number']
   78.16 +            # We can run off the end of the cpuinfo list if domain0 does not
   78.17 +            # have #vcpus == #pcpus. In that case we just replicate one that's
   78.18 +            # in the hash table.
   78.19 +            if not cpuinfo.has_key(number):
   78.20 +                number = cpuinfo.keys()[0]
   78.21              log.error(number)
   78.22              log.error(cpuinfo)
   78.23 -            self.cpus[u].update(
   78.24 -                { 'host'     : self.uuid,
   78.25 -                  'features' : cpu_features,
   78.26 -                  'speed'    : int(float(cpuinfo[number]['cpu MHz'])),
   78.27 -                  'vendor'   : cpuinfo[number]['vendor_id'],
   78.28 -                  'modelname': cpuinfo[number]['model name'],
   78.29 -                  'stepping' : cpuinfo[number]['stepping'],
   78.30 -                  'flags'    : cpuinfo[number]['flags'],
   78.31 -                })
   78.32 +            if arch.type == "x86":
   78.33 +                self.cpus[u].update(
   78.34 +                    { 'host'     : self.uuid,
   78.35 +                      'features' : cpu_features,
   78.36 +                      'speed'    : int(float(cpuinfo[number]['cpu MHz'])),
   78.37 +                      'vendor'   : cpuinfo[number]['vendor_id'],
   78.38 +                      'modelname': cpuinfo[number]['model name'],
   78.39 +                      'stepping' : cpuinfo[number]['stepping'],
   78.40 +                      'flags'    : cpuinfo[number]['flags'],
   78.41 +                    })
   78.42 +            elif arch.type == "ia64":
   78.43 +                self.cpus[u].update(
   78.44 +                    { 'host'     : self.uuid,
   78.45 +                      'features' : cpu_features,
   78.46 +                      'speed'    : int(float(cpuinfo[number]['cpu MHz'])),
   78.47 +                      'vendor'   : cpuinfo[number]['vendor'],
   78.48 +                      'modelname': cpuinfo[number]['family'],
   78.49 +                      'stepping' : cpuinfo[number]['model'],
   78.50 +                      'flags'    : cpuinfo[number]['features'],
   78.51 +                    })
   78.52 +            else:
   78.53 +                self.cpus[u].update(
   78.54 +                    { 'host'     : self.uuid,
   78.55 +                      'features' : cpu_features,
   78.56 +                    })
   78.57  
   78.58          self.pifs = {}
   78.59          self.pif_metrics = {}
    79.1 --- a/tools/security/Makefile	Mon Mar 05 12:49:12 2007 -0600
    79.2 +++ b/tools/security/Makefile	Thu Mar 08 14:39:52 2007 -0600
    79.3 @@ -54,26 +54,29 @@ ifeq ($(ACM_SECURITY),y)
    79.4  all: build
    79.5  
    79.6  .PHONY: install
    79.7 +ifndef XEN_PYTHON_NATIVE_INSTALL
    79.8 +install: LIBPATH=$(shell PYTHONPATH=../python/xen/util python -c "import auxbin; print auxbin.libpath()")
    79.9 +endif
   79.10  install: all $(ACM_CONFIG_FILE)
   79.11 -	$(INSTALL_DIR) -p $(DESTDIR)/usr/sbin
   79.12 -	$(INSTALL_PROG) -p $(ACM_INST_TOOLS) $(DESTDIR)/usr/sbin
   79.13 -	$(INSTALL_PROG) -p $(ACM_EZPOLICY) $(DESTDIR)/usr/sbin
   79.14 -	$(INSTALL_DIR) -p $(DESTDIR)$(ACM_CONFIG_DIR)
   79.15 -	$(INSTALL_DIR) -p $(DESTDIR)$(ACM_POLICY_DIR)
   79.16 -	$(INSTALL_DATA) -p policies/$(ACM_SCHEMA) $(DESTDIR)$(ACM_POLICY_DIR)
   79.17 -	$(INSTALL_DIR) -p $(DESTDIR)$(ACM_POLICY_DIR)/example
   79.18 +	$(INSTALL_DIR) $(DESTDIR)/usr/sbin
   79.19 +	$(INSTALL_PROG) $(ACM_INST_TOOLS) $(DESTDIR)/usr/sbin
   79.20 +	$(INSTALL_PROG) $(ACM_EZPOLICY) $(DESTDIR)/usr/sbin
   79.21 +	$(INSTALL_DIR) $(DESTDIR)$(ACM_CONFIG_DIR)
   79.22 +	$(INSTALL_DIR) $(DESTDIR)$(ACM_POLICY_DIR)
   79.23 +	$(INSTALL_DATA) policies/$(ACM_SCHEMA) $(DESTDIR)$(ACM_POLICY_DIR)
   79.24 +	$(INSTALL_DIR) $(DESTDIR)$(ACM_POLICY_DIR)/example
   79.25  	for i in $(ACM_EXAMPLES); do \
   79.26 -		$(INSTALL_DIR) -p $(DESTDIR)$(ACM_POLICY_DIR)/example/$$i; \
   79.27 -		$(INSTALL_DATA) -p policies/example/$$i/client_v1-$(ACM_POLICY_SUFFIX) $(DESTDIR)$(ACM_POLICY_DIR)/example/$$i; \
   79.28 +		$(INSTALL_DIR) $(DESTDIR)$(ACM_POLICY_DIR)/example/$$i; \
   79.29 +		$(INSTALL_DATA) policies/example/$$i/client_v1-$(ACM_POLICY_SUFFIX) $(DESTDIR)$(ACM_POLICY_DIR)/example/$$i; \
   79.30  	done
   79.31 -	$(INSTALL_DIR) -p $(DESTDIR)$(ACM_SCRIPT_DIR)
   79.32 -	$(INSTALL_PROG) -p $(ACM_SCRIPTS) $(DESTDIR)$(ACM_SCRIPT_DIR)
   79.33 -	$(INSTALL_DIR) -p $(DESTDIR)$(ACM_SECGEN_HTMLDIR)
   79.34 -	$(INSTALL_DATA) -p $(ACM_INST_HTML) $(DESTDIR)$(ACM_SECGEN_HTMLDIR)
   79.35 -	$(INSTALL_DIR) -p $(DESTDIR)$(ACM_SECGEN_CGIDIR)
   79.36 -	$(INSTALL_PROG) -p $(ACM_INST_CGI) $(DESTDIR)$(ACM_SECGEN_CGIDIR)
   79.37 +	$(INSTALL_DIR) $(DESTDIR)$(ACM_SCRIPT_DIR)
   79.38 +	$(INSTALL_PROG) $(ACM_SCRIPTS) $(DESTDIR)$(ACM_SCRIPT_DIR)
   79.39 +	$(INSTALL_DIR) $(DESTDIR)$(ACM_SECGEN_HTMLDIR)
   79.40 +	$(INSTALL_DATA) $(ACM_INST_HTML) $(DESTDIR)$(ACM_SECGEN_HTMLDIR)
   79.41 +	$(INSTALL_DIR) $(DESTDIR)$(ACM_SECGEN_CGIDIR)
   79.42 +	$(INSTALL_PROG) $(ACM_INST_CGI) $(DESTDIR)$(ACM_SECGEN_CGIDIR)
   79.43  ifndef XEN_PYTHON_NATIVE_INSTALL
   79.44 -	python python/setup.py install --home="$(DESTDIR)/usr"
   79.45 +	python python/setup.py install --home="$(DESTDIR)/usr" --install-lib="$(DESTDIR)$(LIBPATH)/python"
   79.46  else
   79.47  	python python/setup.py install --root="$(DESTDIR)"
   79.48  endif
    80.1 --- a/tools/vnet/libxutil/Makefile	Mon Mar 05 12:49:12 2007 -0600
    80.2 +++ b/tools/vnet/libxutil/Makefile	Thu Mar 08 14:39:52 2007 -0600
    80.3 @@ -71,7 +71,7 @@ check-for-zlib:
    80.4  
    80.5  .PHONY: install
    80.6  install: build
    80.7 -	[ -d $(DESTDIR)/usr/$(LIBDIR) ] || $(INSTALL_DIR) -p $(DESTDIR)/usr/$(LIBDIR)
    80.8 +	[ -d $(DESTDIR)/usr/$(LIBDIR) ] || $(INSTALL_DIR) $(DESTDIR)/usr/$(LIBDIR)
    80.9  	$(INSTALL_PROG) libxutil.so.$(MAJOR).$(MINOR) $(DESTDIR)/usr/$(LIBDIR)
   80.10  	$(INSTALL_DATA) libxutil.a $(DESTDIR)/usr/$(LIBDIR)
   80.11  	ln -sf libxutil.so.$(MAJOR).$(MINOR) $(DESTDIR)/usr/$(LIBDIR)/libxutil.so.$(MAJOR)
    81.1 --- a/tools/xenfb/Makefile	Mon Mar 05 12:49:12 2007 -0600
    81.2 +++ b/tools/xenfb/Makefile	Thu Mar 08 14:39:52 2007 -0600
    81.3 @@ -1,13 +1,10 @@
    81.4  XEN_ROOT=../..
    81.5  include $(XEN_ROOT)/tools/Rules.mk
    81.6  
    81.7 -CFLAGS += -I$(XEN_LIBXC) -I$(XEN_XENSTORE) -I$(XEN_ROOT)/linux-2.6-xen-sparse/include -I$(XEN_ROOT)/tools/ioemu
    81.8 +CFLAGS  += -I$(XEN_LIBXC) -I$(XEN_XENSTORE)
    81.9 +CFLAGS  += -I$(XEN_ROOT)/linux-2.6-xen-sparse/include -I$(XEN_ROOT)/tools/ioemu
   81.10  LDFLAGS += -L$(XEN_LIBXC) -L$(XEN_XENSTORE)
   81.11  
   81.12 -INSTALL         = install
   81.13 -INSTALL_PROG    = $(INSTALL) -m0755
   81.14 -INSTALL_DIR     = $(INSTALL) -d -m0755
   81.15 -
   81.16  .PHONY: all
   81.17  all: build
   81.18  
   81.19 @@ -16,7 +13,7 @@ build:
   81.20  	$(MAKE) vncfb sdlfb
   81.21  
   81.22  install: all
   81.23 -	$(INSTALL_DIR) -p $(DESTDIR)/usr/$(LIBDIR)/xen/bin
   81.24 +	$(INSTALL_DIR) $(DESTDIR)/usr/$(LIBDIR)/xen/bin
   81.25  	$(INSTALL_PROG) vncfb $(DESTDIR)/usr/$(LIBDIR)/xen/bin/xen-vncfb
   81.26  	$(INSTALL_PROG) sdlfb $(DESTDIR)/usr/$(LIBDIR)/xen/bin/xen-sdlfb
   81.27  
    82.1 --- a/tools/xenfb/xenfb.c	Mon Mar 05 12:49:12 2007 -0600
    82.2 +++ b/tools/xenfb/xenfb.c	Thu Mar 08 14:39:52 2007 -0600
    82.3 @@ -245,11 +245,10 @@ static int xenfb_wait_for_state(struct x
    82.4  	unsigned state, dummy;
    82.5  	char **vec;
    82.6  
    82.7 +	awaited |= 1 << XenbusStateUnknown;
    82.8 +
    82.9  	for (;;) {
   82.10  		state = xenfb_read_state(xsh, dir);
   82.11 -		if (state < 0)
   82.12 -			return -1;
   82.13 -
   82.14  		if ((1 << state) & awaited)
   82.15  			return state;
   82.16  
    83.1 --- a/tools/xenstore/Makefile	Mon Mar 05 12:49:12 2007 -0600
    83.2 +++ b/tools/xenstore/Makefile	Thu Mar 08 14:39:52 2007 -0600
    83.3 @@ -168,16 +168,16 @@ tarball: clean
    83.4  
    83.5  .PHONY: install
    83.6  install: all
    83.7 -	$(INSTALL_DIR) -p $(DESTDIR)/var/run/xenstored
    83.8 -	$(INSTALL_DIR) -p $(DESTDIR)/var/lib/xenstored
    83.9 -	$(INSTALL_DIR) -p $(DESTDIR)/usr/bin
   83.10 -	$(INSTALL_DIR) -p $(DESTDIR)/usr/sbin
   83.11 -	$(INSTALL_DIR) -p $(DESTDIR)/usr/include
   83.12 +	$(INSTALL_DIR) $(DESTDIR)/var/run/xenstored
   83.13 +	$(INSTALL_DIR) $(DESTDIR)/var/lib/xenstored
   83.14 +	$(INSTALL_DIR) $(DESTDIR)/usr/bin
   83.15 +	$(INSTALL_DIR) $(DESTDIR)/usr/sbin
   83.16 +	$(INSTALL_DIR) $(DESTDIR)/usr/include
   83.17  	$(INSTALL_PROG) xenstored $(DESTDIR)/usr/sbin
   83.18  	$(INSTALL_PROG) $(CLIENTS) $(DESTDIR)/usr/bin
   83.19  	$(INSTALL_PROG) xenstore-control $(DESTDIR)/usr/bin
   83.20  	$(INSTALL_PROG) xenstore-ls $(DESTDIR)/usr/bin
   83.21 -	$(INSTALL_DIR) -p $(DESTDIR)/usr/$(LIBDIR)
   83.22 +	$(INSTALL_DIR) $(DESTDIR)/usr/$(LIBDIR)
   83.23  	$(INSTALL_PROG) libxenstore.so.$(MAJOR).$(MINOR) $(DESTDIR)/usr/$(LIBDIR)
   83.24  	ln -sf libxenstore.so.$(MAJOR).$(MINOR) $(DESTDIR)/usr/$(LIBDIR)/libxenstore.so.$(MAJOR)
   83.25  	ln -sf libxenstore.so.$(MAJOR) $(DESTDIR)/usr/$(LIBDIR)/libxenstore.so
    84.1 --- a/xen/Rules.mk	Mon Mar 05 12:49:12 2007 -0600
    84.2 +++ b/xen/Rules.mk	Thu Mar 08 14:39:52 2007 -0600
    84.3 @@ -41,8 +41,8 @@ HDRS += $(wildcard $(BASEDIR)/include/as
    84.4  include $(BASEDIR)/arch/$(TARGET_ARCH)/Rules.mk
    84.5  
    84.6  # Do not depend on auto-generated header files.
    84.7 -HDRS := $(subst $(BASEDIR)/include/asm-$(TARGET_ARCH)/asm-offsets.h,,$(HDRS))
    84.8 -HDRS := $(subst $(BASEDIR)/include/xen/compile.h,,$(HDRS))
    84.9 +AHDRS := $(filter-out %/include/xen/compile.h,$(HDRS))
   84.10 +HDRS  := $(filter-out %/asm-offsets.h,$(AHDRS))
   84.11  
   84.12  # Note that link order matters!
   84.13  ALL_OBJS-y               += $(BASEDIR)/common/built_in.o
   84.14 @@ -110,12 +110,12 @@ clean:: $(addprefix _clean_, $(subdir-al
   84.15  %.o: %.c $(HDRS) Makefile
   84.16  	$(CC) $(CFLAGS) -c $< -o $@
   84.17  
   84.18 -%.o: %.S $(HDRS) Makefile
   84.19 +%.o: %.S $(AHDRS) Makefile
   84.20  	$(CC) $(AFLAGS) -c $< -o $@
   84.21  
   84.22  %.i: %.c $(HDRS) Makefile
   84.23  	$(CPP) $(CFLAGS) $< -o $@
   84.24  
   84.25  # -std=gnu{89,99} gets confused by # as an end-of-line comment marker
   84.26 -%.s: %.S $(HDRS) Makefile
   84.27 +%.s: %.S $(AHDRS) Makefile
   84.28  	$(CPP) $(AFLAGS) $< -o $@
    85.1 --- a/xen/arch/x86/domain.c	Mon Mar 05 12:49:12 2007 -0600
    85.2 +++ b/xen/arch/x86/domain.c	Thu Mar 08 14:39:52 2007 -0600
    85.3 @@ -641,6 +641,31 @@ int arch_set_info_guest(
    85.4              }
    85.5  
    85.6              v->arch.guest_table = pagetable_from_pfn(cr3_pfn);
    85.7 +
    85.8 +#ifdef __x86_64__
    85.9 +            if ( c.nat->ctrlreg[1] )
   85.10 +            {
   85.11 +                cr3_pfn = gmfn_to_mfn(d, xen_cr3_to_pfn(c.nat->ctrlreg[1]));
   85.12 +
   85.13 +                if ( !mfn_valid(cr3_pfn) ||
   85.14 +                     (paging_mode_refcounts(d)
   85.15 +                      ? !get_page(mfn_to_page(cr3_pfn), d)
   85.16 +                      : !get_page_and_type(mfn_to_page(cr3_pfn), d,
   85.17 +                                           PGT_base_page_table)) )
   85.18 +                {
   85.19 +                    cr3_pfn = pagetable_get_pfn(v->arch.guest_table);
   85.20 +                    v->arch.guest_table = pagetable_null();
   85.21 +                    if ( paging_mode_refcounts(d) )
   85.22 +                        put_page(mfn_to_page(cr3_pfn));
   85.23 +                    else
   85.24 +                        put_page_and_type(mfn_to_page(cr3_pfn));
   85.25 +                    destroy_gdt(v);
   85.26 +                    return -EINVAL;
   85.27 +                }
   85.28 +
   85.29 +                v->arch.guest_table_user = pagetable_from_pfn(cr3_pfn);
   85.30 +            }
   85.31 +#endif
   85.32          }
   85.33  #ifdef CONFIG_COMPAT
   85.34          else
    86.1 --- a/xen/arch/x86/domain_build.c	Mon Mar 05 12:49:12 2007 -0600
    86.2 +++ b/xen/arch/x86/domain_build.c	Thu Mar 08 14:39:52 2007 -0600
    86.3 @@ -374,9 +374,6 @@ int construct_dom0(struct domain *d,
    86.4      if ( parms.f_required[0] /* Huh? -- kraxel */ )
    86.5              panic("Domain 0 requires an unsupported hypervisor feature.\n");
    86.6  
    86.7 -    /* Align load address to 4MB boundary. */
    86.8 -    v_start = parms.virt_base & ~((1UL<<22)-1);
    86.9 -
   86.10      /*
   86.11       * Why do we need this? The number of page-table frames depends on the 
   86.12       * size of the bootstrap address space. But the size of the address space 
   86.13 @@ -384,6 +381,7 @@ int construct_dom0(struct domain *d,
   86.14       * read-only). We have a pair of simultaneous equations in two unknowns, 
   86.15       * which we solve by exhaustive search.
   86.16       */
   86.17 +    v_start          = parms.virt_base;
   86.18      vkern_start      = parms.virt_kstart;
   86.19      vkern_end        = parms.virt_kend;
   86.20      vinitrd_start    = round_pgup(vkern_end);
    87.1 --- a/xen/arch/x86/domctl.c	Mon Mar 05 12:49:12 2007 -0600
    87.2 +++ b/xen/arch/x86/domctl.c	Thu Mar 08 14:39:52 2007 -0600
    87.3 @@ -470,8 +470,15 @@ void arch_get_info_guest(struct vcpu *v,
    87.4          c(user_regs.eflags |= v->arch.iopl << 12);
    87.5  
    87.6          if ( !IS_COMPAT(v->domain) )
    87.7 +        {
    87.8              c.nat->ctrlreg[3] = xen_pfn_to_cr3(
    87.9                  pagetable_get_pfn(v->arch.guest_table));
   87.10 +#ifdef __x86_64__
   87.11 +            if ( !pagetable_is_null(v->arch.guest_table_user) )
   87.12 +                c.nat->ctrlreg[1] = xen_pfn_to_cr3(
   87.13 +                    pagetable_get_pfn(v->arch.guest_table_user));
   87.14 +#endif
   87.15 +        }
   87.16  #ifdef CONFIG_COMPAT
   87.17          else
   87.18          {
    88.1 --- a/xen/arch/x86/hvm/hvm.c	Mon Mar 05 12:49:12 2007 -0600
    88.2 +++ b/xen/arch/x86/hvm/hvm.c	Thu Mar 08 14:39:52 2007 -0600
    88.3 @@ -161,7 +161,8 @@ int hvm_domain_initialise(struct domain 
    88.4      spin_lock_init(&d->arch.hvm_domain.buffered_io_lock);
    88.5      spin_lock_init(&d->arch.hvm_domain.irq_lock);
    88.6  
    88.7 -    rc = paging_enable(d, PG_SH_enable|PG_refcounts|PG_translate|PG_external);
    88.8 +    /* paging support will be determined inside paging.c */
    88.9 +    rc = paging_enable(d, PG_refcounts|PG_translate|PG_external);
   88.10      if ( rc != 0 )
   88.11          return rc;
   88.12  
    89.1 --- a/xen/arch/x86/hvm/svm/emulate.c	Mon Mar 05 12:49:12 2007 -0600
    89.2 +++ b/xen/arch/x86/hvm/svm/emulate.c	Thu Mar 08 14:39:52 2007 -0600
    89.3 @@ -24,9 +24,11 @@
    89.4  #include <asm/msr.h>
    89.5  #include <asm/hvm/hvm.h>
    89.6  #include <asm/hvm/support.h>
    89.7 +#include <asm/hvm/svm/svm.h>
    89.8  #include <asm/hvm/svm/vmcb.h>
    89.9  #include <asm/hvm/svm/emulate.h>
   89.10  
   89.11 +
   89.12  extern int inst_copy_from_guest(unsigned char *buf, unsigned long guest_eip,
   89.13          int inst_len);
   89.14  
   89.15 @@ -133,13 +135,15 @@ static inline unsigned long DECODE_GPR_V
   89.16  #define sib operand [1]
   89.17  
   89.18  
   89.19 -unsigned long get_effective_addr_modrm64(struct vmcb_struct *vmcb, 
   89.20 -        struct cpu_user_regs *regs, const u8 prefix, int inst_len,
   89.21 -        const u8 *operand, u8 *size)
   89.22 +unsigned long get_effective_addr_modrm64(struct cpu_user_regs *regs, 
   89.23 +                                         const u8 prefix, int inst_len,
   89.24 +                                         const u8 *operand, u8 *size)
   89.25  {
   89.26      unsigned long effective_addr = (unsigned long) -1;
   89.27      u8 length, modrm_mod, modrm_rm;
   89.28      u32 disp = 0;
   89.29 +    struct vcpu *v = current;
   89.30 +    struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
   89.31  
   89.32      HVM_DBG_LOG(DBG_LEVEL_1, "get_effective_addr_modrm64(): prefix = %x, "
   89.33              "length = %d, operand[0,1] = %x %x.\n", prefix, *size, operand [0],
   89.34 @@ -198,7 +202,7 @@ unsigned long get_effective_addr_modrm64
   89.35  
   89.36  #if __x86_64__
   89.37          /* 64-bit mode */
   89.38 -        if (vmcb->cs.attr.fields.l && (vmcb->efer & EFER_LMA))
   89.39 +        if (vmcb->cs.attr.fields.l && svm_long_mode_enabled(v))
   89.40              return vmcb->rip + inst_len + *size + disp;
   89.41  #endif
   89.42          return disp;
   89.43 @@ -310,7 +314,7 @@ unsigned int decode_src_reg(u8 prefix, u
   89.44  }
   89.45  
   89.46  
   89.47 -unsigned long svm_rip2pointer(struct vmcb_struct *vmcb)
   89.48 +unsigned long svm_rip2pointer(struct vcpu *v)
   89.49  {
   89.50      /*
   89.51       * The following is subtle. Intuitively this code would be something like:
   89.52 @@ -322,8 +326,9 @@ unsigned long svm_rip2pointer(struct vmc
   89.53       * %cs is update, but fortunately, base contain the valid base address
   89.54       * no matter what kind of addressing is used.
   89.55       */
   89.56 +    struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
   89.57      unsigned long p = vmcb->cs.base + vmcb->rip;
   89.58 -    if (!(vmcb->cs.attr.fields.l && vmcb->efer & EFER_LMA))
   89.59 +    if (!(vmcb->cs.attr.fields.l && svm_long_mode_enabled(v)))
   89.60          return (u32)p; /* mask to 32 bits */
   89.61      /* NB. Should mask to 16 bits if in real mode or 16-bit protected mode. */
   89.62      return p;
   89.63 @@ -410,10 +415,11 @@ static const u8 *opc_bytes[INSTR_MAX_COU
   89.64   * The caller can either pass a NULL pointer to the guest_eip_buf, or a pointer
   89.65   * to enough bytes to satisfy the instruction including prefix bytes.
   89.66   */
   89.67 -int __get_instruction_length_from_list(struct vmcb_struct *vmcb,
   89.68 +int __get_instruction_length_from_list(struct vcpu *v,
   89.69          enum instruction_index *list, unsigned int list_count, 
   89.70          u8 *guest_eip_buf, enum instruction_index *match)
   89.71  {
   89.72 +    struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
   89.73      unsigned int inst_len = 0;
   89.74      unsigned int i;
   89.75      unsigned int j;
   89.76 @@ -429,7 +435,7 @@ int __get_instruction_length_from_list(s
   89.77      }
   89.78      else
   89.79      {
   89.80 -        inst_copy_from_guest(buffer, svm_rip2pointer(vmcb), MAX_INST_LEN);
   89.81 +        inst_copy_from_guest(buffer, svm_rip2pointer(v), MAX_INST_LEN);
   89.82          buf = buffer;
   89.83      }
   89.84  
    90.1 --- a/xen/arch/x86/hvm/svm/svm.c	Mon Mar 05 12:49:12 2007 -0600
    90.2 +++ b/xen/arch/x86/hvm/svm/svm.c	Thu Mar 08 14:39:52 2007 -0600
    90.3 @@ -49,6 +49,7 @@
    90.4  #include <public/sched.h>
    90.5  #include <asm/hvm/vpt.h>
    90.6  #include <asm/hvm/trace.h>
    90.7 +#include <asm/hap.h>
    90.8  
    90.9  #define SVM_EXTRA_DEBUG
   90.10  
   90.11 @@ -76,6 +77,10 @@ static void *root_vmcb[NR_CPUS] __read_m
   90.12  /* physical address of above for host VMSAVE/VMLOAD */
   90.13  u64 root_vmcb_pa[NR_CPUS] __read_mostly;
   90.14  
   90.15 +/* hardware assisted paging bits */
   90.16 +extern int opt_hap_enabled;
   90.17 +extern int hap_capable_system;
   90.18 +
   90.19  static inline void svm_inject_exception(struct vcpu *v, int trap, 
   90.20                                          int ev, int error_code)
   90.21  {
   90.22 @@ -148,31 +153,6 @@ static void svm_store_cpu_guest_regs(
   90.23      }
   90.24  }
   90.25  
   90.26 -static int svm_paging_enabled(struct vcpu *v)
   90.27 -{
   90.28 -    unsigned long cr0;
   90.29 -
   90.30 -    cr0 = v->arch.hvm_svm.cpu_shadow_cr0;
   90.31 -
   90.32 -    return (cr0 & X86_CR0_PE) && (cr0 & X86_CR0_PG);
   90.33 -}
   90.34 -
   90.35 -static int svm_pae_enabled(struct vcpu *v)
   90.36 -{
   90.37 -    unsigned long cr4;
   90.38 -
   90.39 -    if(!svm_paging_enabled(v))
   90.40 -        return 0;
   90.41 -
   90.42 -    cr4 = v->arch.hvm_svm.cpu_shadow_cr4;
   90.43 -
   90.44 -    return (cr4 & X86_CR4_PAE);
   90.45 -}
   90.46 -
   90.47 -static int svm_long_mode_enabled(struct vcpu *v)
   90.48 -{
   90.49 -    return test_bit(SVM_CPU_STATE_LMA_ENABLED, &v->arch.hvm_svm.cpu_state);
   90.50 -}
   90.51  
   90.52  static inline int long_mode_do_msr_read(struct cpu_user_regs *regs)
   90.53  {
   90.54 @@ -183,8 +163,7 @@ static inline int long_mode_do_msr_read(
   90.55      switch ((u32)regs->ecx)
   90.56      {
   90.57      case MSR_EFER:
   90.58 -        msr_content = vmcb->efer;
   90.59 -        msr_content &= ~EFER_SVME;
   90.60 +        msr_content = v->arch.hvm_svm.cpu_shadow_efer;
   90.61          break;
   90.62  
   90.63  #ifdef __x86_64__
   90.64 @@ -255,30 +234,54 @@ static inline int long_mode_do_msr_write
   90.65              goto gp_fault;
   90.66          }
   90.67  
   90.68 +        /* 
   90.69 +         * update the VMCB's EFER with the intended value along with
   90.70 +         * that crucial EFER.SVME bit =)
   90.71 +         */
   90.72 +        vmcb->efer = msr_content | EFER_SVME;
   90.73 +
   90.74  #ifdef __x86_64__
   90.75 -        /* LME: 0 -> 1 */
   90.76 -        if ( msr_content & EFER_LME &&
   90.77 -             !test_bit(SVM_CPU_STATE_LME_ENABLED, &v->arch.hvm_svm.cpu_state))
   90.78 +
   90.79 +        /*
   90.80 +         * Check for EFER.LME transitions from 0->1 or 1->0.  Do the
   90.81 +         * sanity checks and then make sure that both EFER.LME and
   90.82 +         * EFER.LMA are cleared. (EFER.LME can't be set in the vmcb
   90.83 +         * until the guest also sets CR0.PG, since even if the guest has
   90.84 +         * paging "disabled", the vmcb's CR0 always has PG set.)
   90.85 +         */
   90.86 +        if ( (msr_content & EFER_LME) && !svm_lme_is_set(v) )
   90.87          {
   90.88 +            /* EFER.LME transition from 0 to 1 */
   90.89 +            
   90.90              if ( svm_paging_enabled(v) ||
   90.91 -                 !test_bit(SVM_CPU_STATE_PAE_ENABLED,
   90.92 -                           &v->arch.hvm_svm.cpu_state) )
   90.93 +                 !svm_cr4_pae_is_set(v) )
   90.94              {
   90.95                  gdprintk(XENLOG_WARNING, "Trying to set LME bit when "
   90.96                           "in paging mode or PAE bit is not set\n");
   90.97                  goto gp_fault;
   90.98              }
   90.99 -            set_bit(SVM_CPU_STATE_LME_ENABLED, &v->arch.hvm_svm.cpu_state);
  90.100 +
  90.101 +            vmcb->efer &= ~(EFER_LME | EFER_LMA);
  90.102          }
  90.103 -
  90.104 -        /* We have already recorded that we want LME, so it will be set 
  90.105 -         * next time CR0 gets updated. So we clear that bit and continue.
  90.106 -         */
  90.107 -        if ((msr_content ^ vmcb->efer) & EFER_LME)
  90.108 -            msr_content &= ~EFER_LME;  
  90.109 -        /* No update for LME/LMA since it have no effect */
  90.110 -#endif
  90.111 -        vmcb->efer = msr_content | EFER_SVME;
  90.112 +        else if ( !(msr_content & EFER_LME) && svm_lme_is_set(v) )
  90.113 +        {
  90.114 +            /* EFER.LME transistion from 1 to 0 */
  90.115 +            
  90.116 +            if ( svm_paging_enabled(v) )
  90.117 +            {
  90.118 +                gdprintk(XENLOG_WARNING, 
  90.119 +                         "Trying to clear EFER.LME while paging enabled\n");
  90.120 +                goto gp_fault;
  90.121 +            }
  90.122 +
  90.123 +            vmcb->efer &= ~(EFER_LME | EFER_LMA);
  90.124 +        }
  90.125 +
  90.126 +#endif /* __x86_64__ */
  90.127 +
  90.128 +        /* update the guest EFER's shadow with the intended value */
  90.129 +        v->arch.hvm_svm.cpu_shadow_efer = msr_content;
  90.130 +
  90.131          break;
  90.132  
  90.133  #ifdef __x86_64__
  90.134 @@ -468,22 +471,25 @@ int svm_vmcb_restore(struct vcpu *v, str
  90.135              c->cr4);
  90.136  #endif
  90.137  
  90.138 -    if (!svm_paging_enabled(v)) {
  90.139 +    if ( !svm_paging_enabled(v) ) 
  90.140 +    {
  90.141          printk("%s: paging not enabled.", __func__);
  90.142          goto skip_cr3;
  90.143      }
  90.144  
  90.145 -    if (c->cr3 == v->arch.hvm_svm.cpu_cr3) {
  90.146 +    if ( c->cr3 == v->arch.hvm_svm.cpu_cr3 ) 
  90.147 +    {
  90.148          /*
  90.149           * This is simple TLB flush, implying the guest has
  90.150           * removed some translation or changed page attributes.
  90.151           * We simply invalidate the shadow.
  90.152           */
  90.153          mfn = gmfn_to_mfn(v->domain, c->cr3 >> PAGE_SHIFT);
  90.154 -        if (mfn != pagetable_get_pfn(v->arch.guest_table)) {
  90.155 +        if ( mfn != pagetable_get_pfn(v->arch.guest_table) ) 
  90.156              goto bad_cr3;
  90.157 -        }
  90.158 -    } else {
  90.159 +    } 
  90.160 +    else 
  90.161 +    {
  90.162          /*
  90.163           * If different, make a shadow. Check if the PDBR is valid
  90.164           * first.
  90.165 @@ -491,9 +497,9 @@ int svm_vmcb_restore(struct vcpu *v, str
  90.166          HVM_DBG_LOG(DBG_LEVEL_VMMU, "CR3 c->cr3 = %"PRIx64"", c->cr3);
  90.167          /* current!=vcpu as not called by arch_vmx_do_launch */
  90.168          mfn = gmfn_to_mfn(v->domain, c->cr3 >> PAGE_SHIFT);
  90.169 -        if( !mfn_valid(mfn) || !get_page(mfn_to_page(mfn), v->domain)) {
  90.170 +        if( !mfn_valid(mfn) || !get_page(mfn_to_page(mfn), v->domain) ) 
  90.171              goto bad_cr3;
  90.172 -        }
  90.173 +
  90.174          old_base_mfn = pagetable_get_pfn(v->arch.guest_table);
  90.175          v->arch.guest_table = pagetable_from_pfn(mfn);
  90.176          if (old_base_mfn)
  90.177 @@ -631,7 +637,7 @@ static int svm_guest_x86_mode(struct vcp
  90.178  {
  90.179      struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
  90.180  
  90.181 -    if ( (vmcb->efer & EFER_LMA) && vmcb->cs.attr.fields.l )
  90.182 +    if ( svm_long_mode_enabled(v) && vmcb->cs.attr.fields.l )
  90.183          return 8;
  90.184  
  90.185      if ( svm_realmode(v) )
  90.186 @@ -681,7 +687,7 @@ static unsigned long svm_get_segment_bas
  90.187      int long_mode = 0;
  90.188  
  90.189  #ifdef __x86_64__
  90.190 -    long_mode = vmcb->cs.attr.fields.l && (vmcb->efer & EFER_LMA);
  90.191 +    long_mode = vmcb->cs.attr.fields.l && svm_long_mode_enabled(v);
  90.192  #endif
  90.193      switch ( seg )
  90.194      {
  90.195 @@ -905,6 +911,10 @@ static void arch_svm_do_launch(struct vc
  90.196  {
  90.197      svm_do_launch(v);
  90.198  
  90.199 +    if ( paging_mode_hap(v->domain) ) {
  90.200 +        v->arch.hvm_svm.vmcb->h_cr3 = pagetable_get_paddr(v->domain->arch.phys_table);
  90.201 +    }
  90.202 +
  90.203      if ( v->vcpu_id != 0 )
  90.204      {
  90.205          cpu_user_regs_t *regs = &current->arch.guest_context.user_regs;
  90.206 @@ -1011,6 +1021,21 @@ static struct hvm_function_table svm_fun
  90.207      .event_injection_faulted = svm_event_injection_faulted
  90.208  };
  90.209  
  90.210 +void svm_npt_detect(void)
  90.211 +{
  90.212 +    u32 eax, ebx, ecx, edx;
  90.213 +
  90.214 +    /* check CPUID for nested paging support */
  90.215 +    cpuid(0x8000000A, &eax, &ebx, &ecx, &edx);
  90.216 +    if ( edx & 0x01 ) { /* nested paging */
  90.217 +        hap_capable_system = 1;
  90.218 +    }
  90.219 +    else if ( opt_hap_enabled ) {
  90.220 +        printk(" nested paging is not supported by this CPU.\n");
  90.221 +        hap_capable_system = 0; /* no nested paging, we disable flag. */
  90.222 +    }
  90.223 +}
  90.224 +
  90.225  int start_svm(void)
  90.226  {
  90.227      u32 eax, ecx, edx;
  90.228 @@ -1041,6 +1066,8 @@ int start_svm(void)
  90.229      wrmsr(MSR_EFER, eax, edx);
  90.230      printk("AMD SVM Extension is enabled for cpu %d.\n", cpu );
  90.231  
  90.232 +    svm_npt_detect();
  90.233 +
  90.234      /* Initialize the HSA for this core */
  90.235      phys_hsa = (u64) virt_to_maddr(hsa[cpu]);
  90.236      phys_hsa_lo = (u32) phys_hsa;
  90.237 @@ -1077,6 +1104,18 @@ void arch_svm_do_resume(struct vcpu *v)
  90.238      }
  90.239  }
  90.240  
  90.241 +static int svm_do_nested_pgfault(paddr_t gpa, struct cpu_user_regs *regs)
  90.242 +{
  90.243 +    if (mmio_space(gpa)) {
  90.244 +        handle_mmio(gpa);
  90.245 +        return 1;
  90.246 +    }
  90.247 +
  90.248 +    /* We should not reach here. Otherwise, P2M table is not correct.*/
  90.249 +    return 0;
  90.250 +}
  90.251 +
  90.252 +
  90.253  static int svm_do_page_fault(unsigned long va, struct cpu_user_regs *regs) 
  90.254  {
  90.255      HVM_DBG_LOG(DBG_LEVEL_VMMU, 
  90.256 @@ -1114,7 +1153,7 @@ static void svm_do_general_protection_fa
  90.257          printk("Huh? We got a GP Fault with an invalid IDTR!\n");
  90.258          svm_dump_vmcb(__func__, vmcb);
  90.259          svm_dump_regs(__func__, regs);
  90.260 -        svm_dump_inst(svm_rip2pointer(vmcb));
  90.261 +        svm_dump_inst(svm_rip2pointer(v));
  90.262          domain_crash(v->domain);
  90.263          return;
  90.264      }
  90.265 @@ -1209,7 +1248,7 @@ static void svm_vmexit_do_cpuid(struct v
  90.266      HVMTRACE_3D(CPUID, v, input,
  90.267                  ((uint64_t)eax << 32) | ebx, ((uint64_t)ecx << 32) | edx);
  90.268  
  90.269 -    inst_len = __get_instruction_length(vmcb, INSTR_CPUID, NULL);
  90.270 +    inst_len = __get_instruction_length(v, INSTR_CPUID, NULL);
  90.271      ASSERT(inst_len > 0);
  90.272      __update_guest_eip(vmcb, inst_len);
  90.273  }
  90.274 @@ -1312,15 +1351,16 @@ static void svm_dr_access(struct vcpu *v
  90.275  }
  90.276  
  90.277  
  90.278 -static void svm_get_prefix_info(
  90.279 -    struct vmcb_struct *vmcb, 
  90.280 -    unsigned int dir, svm_segment_register_t **seg, unsigned int *asize)
  90.281 +static void svm_get_prefix_info(struct vcpu *v, unsigned int dir, 
  90.282 +                                svm_segment_register_t **seg, 
  90.283 +                                unsigned int *asize)
  90.284  {
  90.285 +    struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
  90.286      unsigned char inst[MAX_INST_LEN];
  90.287      int i;
  90.288  
  90.289      memset(inst, 0, MAX_INST_LEN);
  90.290 -    if (inst_copy_from_guest(inst, svm_rip2pointer(vmcb), sizeof(inst)) 
  90.291 +    if (inst_copy_from_guest(inst, svm_rip2pointer(v), sizeof(inst)) 
  90.292          != MAX_INST_LEN) 
  90.293      {
  90.294          gdprintk(XENLOG_ERR, "get guest instruction failed\n");
  90.295 @@ -1400,7 +1440,7 @@ static inline int svm_get_io_address(
  90.296  
  90.297  #ifdef __x86_64__
  90.298      /* If we're in long mode, we shouldn't check the segment presence & limit */
  90.299 -    long_mode = vmcb->cs.attr.fields.l && vmcb->efer & EFER_LMA;
  90.300 +    long_mode = vmcb->cs.attr.fields.l && svm_long_mode_enabled(v);
  90.301  #endif
  90.302  
  90.303      /* d field of cs.attr is 1 for 32-bit, 0 for 16 or 64 bit. 
  90.304 @@ -1419,7 +1459,7 @@ static inline int svm_get_io_address(
  90.305          isize --;
  90.306  
  90.307      if (isize > 1) 
  90.308 -        svm_get_prefix_info(vmcb, info.fields.type, &seg, &asize);
  90.309 +        svm_get_prefix_info(v, info.fields.type, &seg, &asize);
  90.310  
  90.311      if (info.fields.type == IOREQ_WRITE)
  90.312      {
  90.313 @@ -1702,6 +1742,52 @@ static void svm_io_instruction(struct vc
  90.314      }
  90.315  }
  90.316  
  90.317 +static int npt_set_cr0(unsigned long value) 
  90.318 +{
  90.319 +    struct vcpu *v = current;
  90.320 +    struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
  90.321 +  
  90.322 +    ASSERT(vmcb);
  90.323 +
  90.324 +    /* ET is reserved and should be always be 1*/
  90.325 +    value |= X86_CR0_ET;
  90.326 +
  90.327 +    /* Check whether the guest is about to turn on long mode. 
  90.328 +     * If it is, set EFER.LME and EFER.LMA.  Update the shadow EFER.LMA
  90.329 +     * bit too, so svm_long_mode_enabled() will work.
  90.330 +     */
  90.331 +    if ( (value & X86_CR0_PG) && svm_lme_is_set(v) &&
  90.332 +         (vmcb->cr4 & X86_CR4_PAE) && (vmcb->cr0 & X86_CR0_PE) )
  90.333 +    {
  90.334 +        v->arch.hvm_svm.cpu_shadow_efer |= EFER_LMA;
  90.335 +        vmcb->efer |= EFER_LMA | EFER_LME;
  90.336 +    }
  90.337 +
  90.338 +    /* Whenever CR0.PG is cleared under long mode, LMA will be cleared 
  90.339 +     * immediatly. We emulate this process for svm_long_mode_enabled().
  90.340 +     */
  90.341 +    if ( (value & (X86_CR0_PE | X86_CR0_PG)) == X86_CR0_PE )
  90.342 +    {
  90.343 +        if ( svm_long_mode_enabled(v) )
  90.344 +        {
  90.345 +            v->arch.hvm_svm.cpu_shadow_efer &= ~EFER_LMA;
  90.346 +        }
  90.347 +    }
  90.348 +    
  90.349 +    vmcb->cr0 = value | X86_CR0_WP;
  90.350 +    v->arch.hvm_svm.cpu_shadow_cr0 = value;
  90.351 +
  90.352 +    /* TS cleared? Then initialise FPU now. */
  90.353 +    if ( !(value & X86_CR0_TS) ) {
  90.354 +        setup_fpu(v);
  90.355 +        vmcb->exception_intercepts &= ~EXCEPTION_BITMAP_NM;
  90.356 +    }
  90.357 +    
  90.358 +    paging_update_paging_modes(v);
  90.359 +    
  90.360 +    return 1;
  90.361 +}
  90.362 +
  90.363  static int svm_set_cr0(unsigned long value)
  90.364  {
  90.365      struct vcpu *v = current;
  90.366 @@ -1727,7 +1813,8 @@ static int svm_set_cr0(unsigned long val
  90.367  
  90.368      HVM_DBG_LOG(DBG_LEVEL_VMMU, "Update CR0 value = %lx\n", value);
  90.369  
  90.370 -    if ((value & X86_CR0_PE) && (value & X86_CR0_PG) && !paging_enabled) 
  90.371 +    if ( ((value & (X86_CR0_PE | X86_CR0_PG)) == (X86_CR0_PE | X86_CR0_PG))
  90.372 +         && !paging_enabled ) 
  90.373      {
  90.374          /* The guest CR3 must be pointing to the guest physical. */
  90.375          mfn = get_mfn_from_gpfn(v->arch.hvm_svm.cpu_cr3 >> PAGE_SHIFT);
  90.376 @@ -1740,18 +1827,16 @@ static int svm_set_cr0(unsigned long val
  90.377          }
  90.378  
  90.379  #if defined(__x86_64__)
  90.380 -        if (test_bit(SVM_CPU_STATE_LME_ENABLED, &v->arch.hvm_svm.cpu_state) 
  90.381 -            && !test_bit(SVM_CPU_STATE_PAE_ENABLED, 
  90.382 -                         &v->arch.hvm_svm.cpu_state))
  90.383 +        if ( svm_lme_is_set(v) && !svm_cr4_pae_is_set(v) )
  90.384          {
  90.385              HVM_DBG_LOG(DBG_LEVEL_1, "Enable paging before PAE enable\n");
  90.386              svm_inject_exception(v, TRAP_gp_fault, 1, 0);
  90.387          }
  90.388  
  90.389 -        if (test_bit(SVM_CPU_STATE_LME_ENABLED, &v->arch.hvm_svm.cpu_state))
  90.390 +        if ( svm_lme_is_set(v) )
  90.391          {
  90.392              HVM_DBG_LOG(DBG_LEVEL_1, "Enable the Long mode\n");
  90.393 -            set_bit(SVM_CPU_STATE_LMA_ENABLED, &v->arch.hvm_svm.cpu_state);
  90.394 +            v->arch.hvm_svm.cpu_shadow_efer |= EFER_LMA;
  90.395              vmcb->efer |= EFER_LMA | EFER_LME;
  90.396          }
  90.397  #endif  /* __x86_64__ */
  90.398 @@ -1790,8 +1875,8 @@ static int svm_set_cr0(unsigned long val
  90.399      {
  90.400          if ( svm_long_mode_enabled(v) )
  90.401          {
  90.402 -            vmcb->efer &= ~EFER_LMA;
  90.403 -            clear_bit(SVM_CPU_STATE_LMA_ENABLED, &v->arch.hvm_svm.cpu_state);
  90.404 +            vmcb->efer &= ~(EFER_LME | EFER_LMA);
  90.405 +            v->arch.hvm_svm.cpu_shadow_efer &= ~EFER_LMA;
  90.406          }
  90.407          /* we should take care of this kind of situation */
  90.408          paging_update_paging_modes(v);
  90.409 @@ -1800,6 +1885,85 @@ static int svm_set_cr0(unsigned long val
  90.410      return 1;
  90.411  }
  90.412  
  90.413 +//
  90.414 +// nested paging functions
  90.415 +//
  90.416 +
  90.417 +static int npt_mov_to_cr(int gpreg, int cr, struct cpu_user_regs *regs)
  90.418 +{  
  90.419 +    unsigned long value;
  90.420 +    struct vcpu *v = current;
  90.421 +    struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
  90.422 +    struct vlapic *vlapic = vcpu_vlapic(v);
  90.423 +
  90.424 +    ASSERT(vmcb);
  90.425 +
  90.426 +    value = get_reg(gpreg, regs, vmcb);
  90.427 +
  90.428 +    switch (cr) {
  90.429 +    case 0:
  90.430 +        return npt_set_cr0(value);
  90.431 +
  90.432 +    case 3:
  90.433 +        vmcb->cr3 = value;
  90.434 +        v->arch.hvm_svm.cpu_cr3 = value;
  90.435 +        break;
  90.436 +
  90.437 +    case 4: /* CR4 */
  90.438 +        vmcb->cr4 = value;
  90.439 +        v->arch.hvm_svm.cpu_shadow_cr4 = value;
  90.440 +        paging_update_paging_modes(v);
  90.441 +        break;
  90.442 +
  90.443 +    case 8:
  90.444 +        vlapic_set_reg(vlapic, APIC_TASKPRI, ((value & 0x0F) << 4));
  90.445 +        vmcb->vintr.fields.tpr = value & 0x0F;
  90.446 +        break;
  90.447 +
  90.448 +    default:
  90.449 +        gdprintk(XENLOG_ERR, "invalid cr: %d\n", cr);
  90.450 +        domain_crash(v->domain);
  90.451 +        return 0;
  90.452 +    }
  90.453 +    
  90.454 +    return 1;
  90.455 +}
  90.456 +
  90.457 +static void npt_mov_from_cr(int cr, int gp, struct cpu_user_regs *regs)
  90.458 +{
  90.459 +    unsigned long value = 0;
  90.460 +    struct vcpu *v = current;
  90.461 +    struct vmcb_struct *vmcb;
  90.462 +    struct vlapic *vlapic = vcpu_vlapic(v);
  90.463 +
  90.464 +    vmcb = v->arch.hvm_svm.vmcb;
  90.465 +    ASSERT(vmcb);
  90.466 +
  90.467 +    switch(cr) {
  90.468 +    case 0:
  90.469 +        value = (unsigned long) v->arch.hvm_svm.cpu_shadow_cr0;
  90.470 +        break;
  90.471 +    case 2:
  90.472 +        value = vmcb->cr2;
  90.473 +        break;
  90.474 +    case 3:
  90.475 +        value = (unsigned long) v->arch.hvm_svm.cpu_cr3;
  90.476 +        break;
  90.477 +    case 4:
  90.478 +        value = (unsigned long) v->arch.hvm_svm.cpu_shadow_cr4;
  90.479 +       break;
  90.480 +    case 8:
  90.481 +        value = (unsigned long)vlapic_get_reg(vlapic, APIC_TASKPRI);
  90.482 +        value = (value & 0xF0) >> 4;
  90.483 +        break;
  90.484 +    default:
  90.485 +        domain_crash(v->domain);
  90.486 +        return;
  90.487 +    }
  90.488 +    
  90.489 +    set_reg(gp, value, regs, vmcb);
  90.490 +}
  90.491 +
  90.492  /*
  90.493   * Read from control registers. CR0 and CR4 are read from the shadow.
  90.494   */
  90.495 @@ -1851,12 +2015,6 @@ static void mov_from_cr(int cr, int gp, 
  90.496  }
  90.497  
  90.498  
  90.499 -static inline int svm_pgbit_test(struct vcpu *v)
  90.500 -{
  90.501 -    return v->arch.hvm_svm.cpu_shadow_cr0 & X86_CR0_PG;
  90.502 -}
  90.503 -
  90.504 -
  90.505  /*
  90.506   * Write to control registers
  90.507   */
  90.508 @@ -1933,7 +2091,6 @@ static int mov_to_cr(int gpreg, int cr, 
  90.509          old_cr = v->arch.hvm_svm.cpu_shadow_cr4;
  90.510          if ( value & X86_CR4_PAE && !(old_cr & X86_CR4_PAE) )
  90.511          {
  90.512 -            set_bit(SVM_CPU_STATE_PAE_ENABLED, &v->arch.hvm_svm.cpu_state);
  90.513              if ( svm_pgbit_test(v) )
  90.514              {
  90.515                  /* The guest is a 32-bit PAE guest. */
  90.516 @@ -1962,15 +2119,13 @@ static int mov_to_cr(int gpreg, int cr, 
  90.517                              v->arch.hvm_svm.cpu_cr3, mfn);
  90.518  #endif
  90.519              }
  90.520 -        }
  90.521 -        else if (value & X86_CR4_PAE) {
  90.522 -            set_bit(SVM_CPU_STATE_PAE_ENABLED, &v->arch.hvm_svm.cpu_state);
  90.523 -        } else {
  90.524 -            if (test_bit(SVM_CPU_STATE_LMA_ENABLED,
  90.525 -                         &v->arch.hvm_svm.cpu_state)) {
  90.526 +        } 
  90.527 +        else if ( !(value & X86_CR4_PAE) )
  90.528 +        {
  90.529 +            if ( svm_long_mode_enabled(v) )
  90.530 +            {
  90.531                  svm_inject_exception(v, TRAP_gp_fault, 1, 0);
  90.532              }
  90.533 -            clear_bit(SVM_CPU_STATE_PAE_ENABLED, &v->arch.hvm_svm.cpu_state);
  90.534          }
  90.535  
  90.536          v->arch.hvm_svm.cpu_shadow_cr4 = value;
  90.537 @@ -2024,7 +2179,7 @@ static int svm_cr_access(struct vcpu *v,
  90.538  
  90.539      ASSERT(vmcb);
  90.540  
  90.541 -    inst_copy_from_guest(buffer, svm_rip2pointer(vmcb), sizeof(buffer));
  90.542 +    inst_copy_from_guest(buffer, svm_rip2pointer(v), sizeof(buffer));
  90.543  
  90.544      /* get index to first actual instruction byte - as we will need to know 
  90.545         where the prefix lives later on */
  90.546 @@ -2033,12 +2188,12 @@ static int svm_cr_access(struct vcpu *v,
  90.547      if ( type == TYPE_MOV_TO_CR )
  90.548      {
  90.549          inst_len = __get_instruction_length_from_list(
  90.550 -            vmcb, list_a, ARR_SIZE(list_a), &buffer[index], &match);
  90.551 +            v, list_a, ARR_SIZE(list_a), &buffer[index], &match);
  90.552      }
  90.553      else /* type == TYPE_MOV_FROM_CR */
  90.554      {
  90.555          inst_len = __get_instruction_length_from_list(
  90.556 -            vmcb, list_b, ARR_SIZE(list_b), &buffer[index], &match);
  90.557 +            v, list_b, ARR_SIZE(list_b), &buffer[index], &match);
  90.558      }
  90.559  
  90.560      ASSERT(inst_len > 0);
  90.561 @@ -2055,12 +2210,18 @@ static int svm_cr_access(struct vcpu *v,
  90.562      {
  90.563      case INSTR_MOV2CR:
  90.564          gpreg = decode_src_reg(prefix, buffer[index+2]);
  90.565 -        result = mov_to_cr(gpreg, cr, regs);
  90.566 +        if ( paging_mode_hap(v->domain) )
  90.567 +            result = npt_mov_to_cr(gpreg, cr, regs);
  90.568 +        else
  90.569 +            result = mov_to_cr(gpreg, cr, regs);
  90.570          break;
  90.571  
  90.572      case INSTR_MOVCR2:
  90.573          gpreg = decode_src_reg(prefix, buffer[index+2]);
  90.574 -        mov_from_cr(cr, gpreg, regs);
  90.575 +        if ( paging_mode_hap(v->domain) )
  90.576 +            npt_mov_from_cr(cr, gpreg, regs);
  90.577 +        else
  90.578 +            mov_from_cr(cr, gpreg, regs);
  90.579          break;
  90.580  
  90.581      case INSTR_CLTS:
  90.582 @@ -2073,7 +2234,7 @@ static int svm_cr_access(struct vcpu *v,
  90.583  
  90.584      case INSTR_LMSW:
  90.585          if (svm_dbg_on)
  90.586 -            svm_dump_inst(svm_rip2pointer(vmcb));
  90.587 +            svm_dump_inst(svm_rip2pointer(v));
  90.588          
  90.589          gpreg = decode_src_reg(prefix, buffer[index+2]);
  90.590          value = get_reg(gpreg, regs, vmcb) & 0xF;
  90.591 @@ -2087,12 +2248,15 @@ static int svm_cr_access(struct vcpu *v,
  90.592          if (svm_dbg_on)
  90.593              printk("CR0-LMSW CR0 - New value=%lx\n", value);
  90.594  
  90.595 -        result = svm_set_cr0(value);
  90.596 +        if ( paging_mode_hap(v->domain) )
  90.597 +            result = npt_set_cr0(value);
  90.598 +        else
  90.599 +            result = svm_set_cr0(value);
  90.600          break;
  90.601  
  90.602      case INSTR_SMSW:
  90.603          if (svm_dbg_on)
  90.604 -            svm_dump_inst(svm_rip2pointer(vmcb));
  90.605 +            svm_dump_inst(svm_rip2pointer(v));
  90.606          value = v->arch.hvm_svm.cpu_shadow_cr0;
  90.607          gpreg = decode_src_reg(prefix, buffer[index+2]);
  90.608          set_reg(gpreg, value, regs, vmcb);
  90.609 @@ -2168,7 +2332,7 @@ static inline void svm_do_msr_access(
  90.610          HVM_DBG_LOG(DBG_LEVEL_1, "returns: ecx=%x, eax=%lx, edx=%lx",
  90.611                      ecx, (unsigned long)regs->eax, (unsigned long)regs->edx);
  90.612  
  90.613 -        inst_len = __get_instruction_length(vmcb, INSTR_RDMSR, NULL);
  90.614 +        inst_len = __get_instruction_length(v, INSTR_RDMSR, NULL);
  90.615      }
  90.616      else
  90.617      {
  90.618 @@ -2200,7 +2364,7 @@ static inline void svm_do_msr_access(
  90.619              break;
  90.620          }
  90.621  
  90.622 -        inst_len = __get_instruction_length(vmcb, INSTR_WRMSR, NULL);
  90.623 +        inst_len = __get_instruction_length(v, INSTR_WRMSR, NULL);
  90.624      }
  90.625  
  90.626      __update_guest_eip(vmcb, inst_len);
  90.627 @@ -2223,8 +2387,9 @@ static inline void svm_vmexit_do_hlt(str
  90.628  }
  90.629  
  90.630  
  90.631 -static void svm_vmexit_do_invd(struct vmcb_struct *vmcb)
  90.632 +static void svm_vmexit_do_invd(struct vcpu *v)
  90.633  {
  90.634 +    struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
  90.635      int  inst_len;
  90.636      
  90.637      /* Invalidate the cache - we can't really do that safely - maybe we should 
  90.638 @@ -2237,7 +2402,7 @@ static void svm_vmexit_do_invd(struct vm
  90.639       */
  90.640      printk("INVD instruction intercepted - ignored\n");
  90.641      
  90.642 -    inst_len = __get_instruction_length(vmcb, INSTR_INVD, NULL);
  90.643 +    inst_len = __get_instruction_length(v, INSTR_INVD, NULL);
  90.644      __update_guest_eip(vmcb, inst_len);
  90.645  }    
  90.646          
  90.647 @@ -2289,7 +2454,7 @@ void svm_handle_invlpg(const short invlp
  90.648       * Unknown how many bytes the invlpg instruction will take.  Use the
  90.649       * maximum instruction length here
  90.650       */
  90.651 -    if (inst_copy_from_guest(opcode, svm_rip2pointer(vmcb), length) < length)
  90.652 +    if (inst_copy_from_guest(opcode, svm_rip2pointer(v), length) < length)
  90.653      {
  90.654          gdprintk(XENLOG_ERR, "Error reading memory %d bytes\n", length);
  90.655          domain_crash(v->domain);
  90.656 @@ -2298,7 +2463,7 @@ void svm_handle_invlpg(const short invlp
  90.657  
  90.658      if (invlpga)
  90.659      {
  90.660 -        inst_len = __get_instruction_length(vmcb, INSTR_INVLPGA, opcode);
  90.661 +        inst_len = __get_instruction_length(v, INSTR_INVLPGA, opcode);
  90.662          ASSERT(inst_len > 0);
  90.663          __update_guest_eip(vmcb, inst_len);
  90.664  
  90.665 @@ -2312,7 +2477,7 @@ void svm_handle_invlpg(const short invlp
  90.666      {
  90.667          /* What about multiple prefix codes? */
  90.668          prefix = (is_prefix(opcode[0])?opcode[0]:0);
  90.669 -        inst_len = __get_instruction_length(vmcb, INSTR_INVLPG, opcode);
  90.670 +        inst_len = __get_instruction_length(v, INSTR_INVLPG, opcode);
  90.671          ASSERT(inst_len > 0);
  90.672  
  90.673          inst_len--;
  90.674 @@ -2323,7 +2488,7 @@ void svm_handle_invlpg(const short invlp
  90.675           * displacement to get effective address and length in bytes.  Assume
  90.676           * the system in either 32- or 64-bit mode.
  90.677           */
  90.678 -        g_vaddr = get_effective_addr_modrm64(vmcb, regs, prefix, inst_len,
  90.679 +        g_vaddr = get_effective_addr_modrm64(regs, prefix, inst_len,
  90.680                                               &opcode[inst_len], &length);
  90.681  
  90.682          inst_len += length;
  90.683 @@ -2369,7 +2534,11 @@ static int svm_do_vmmcall_reset_to_realm
  90.684  
  90.685      vmcb->cr4 = SVM_CR4_HOST_MASK;
  90.686      v->arch.hvm_svm.cpu_shadow_cr4 = 0;
  90.687 -    clear_bit(SVM_CPU_STATE_PAE_ENABLED, &v->arch.hvm_svm.cpu_state);
  90.688 +
  90.689 +    if ( paging_mode_hap(v->domain) ) {
  90.690 +        vmcb->cr0 = v->arch.hvm_svm.cpu_shadow_cr0;
  90.691 +        vmcb->cr4 = v->arch.hvm_svm.cpu_shadow_cr4;
  90.692 +    }
  90.693  
  90.694      /* This will jump to ROMBIOS */
  90.695      vmcb->rip = 0xFFF0;
  90.696 @@ -2445,7 +2614,7 @@ static int svm_do_vmmcall(struct vcpu *v
  90.697      ASSERT(vmcb);
  90.698      ASSERT(regs);
  90.699  
  90.700 -    inst_len = __get_instruction_length(vmcb, INSTR_VMCALL, NULL);
  90.701 +    inst_len = __get_instruction_length(v, INSTR_VMCALL, NULL);
  90.702      ASSERT(inst_len > 0);
  90.703  
  90.704      HVMTRACE_1D(VMMCALL, v, regs->eax);
  90.705 @@ -2855,7 +3024,7 @@ asmlinkage void svm_vmexit_handler(struc
  90.706  
  90.707              svm_dump_vmcb(__func__, vmcb);
  90.708              svm_dump_regs(__func__, regs);
  90.709 -            svm_dump_inst(svm_rip2pointer(vmcb));
  90.710 +            svm_dump_inst(svm_rip2pointer(v));
  90.711          }
  90.712  
  90.713  #if defined(__i386__)
  90.714 @@ -2957,7 +3126,7 @@ asmlinkage void svm_vmexit_handler(struc
  90.715          /* Debug info to hopefully help debug WHY the guest double-faulted. */
  90.716          svm_dump_vmcb(__func__, vmcb);
  90.717          svm_dump_regs(__func__, regs);
  90.718 -        svm_dump_inst(svm_rip2pointer(vmcb));
  90.719 +        svm_dump_inst(svm_rip2pointer(v));
  90.720          svm_inject_exception(v, TRAP_double_fault, 1, 0);
  90.721          break;
  90.722  
  90.723 @@ -2967,7 +3136,7 @@ asmlinkage void svm_vmexit_handler(struc
  90.724          break;
  90.725  
  90.726      case VMEXIT_INVD:
  90.727 -        svm_vmexit_do_invd(vmcb);
  90.728 +        svm_vmexit_do_invd(v);
  90.729          break;
  90.730  
  90.731      case VMEXIT_GDTR_WRITE:
  90.732 @@ -3054,6 +3223,15 @@ asmlinkage void svm_vmexit_handler(struc
  90.733          hvm_triple_fault();
  90.734          break;
  90.735  
  90.736 +    case VMEXIT_NPF:
  90.737 +    {
  90.738 +        regs->error_code = vmcb->exitinfo1;
  90.739 +        if ( !svm_do_nested_pgfault(vmcb->exitinfo2, regs) ) {
  90.740 +            domain_crash(v->domain);
  90.741 +        }
  90.742 +        break;
  90.743 +    }
  90.744 +
  90.745      default:
  90.746      exit_and_crash:
  90.747          gdprintk(XENLOG_ERR, "unexpected VMEXIT: exit reason = 0x%x, "
    91.1 --- a/xen/arch/x86/hvm/svm/vmcb.c	Mon Mar 05 12:49:12 2007 -0600
    91.2 +++ b/xen/arch/x86/hvm/svm/vmcb.c	Thu Mar 08 14:39:52 2007 -0600
    91.3 @@ -201,6 +201,13 @@ static int construct_vmcb(struct vcpu *v
    91.4  
    91.5      arch_svm->vmcb->exception_intercepts = MONITOR_DEFAULT_EXCEPTION_BITMAP;
    91.6  
    91.7 +    if ( paging_mode_hap(v->domain) ) {
    91.8 +        vmcb->cr0 = arch_svm->cpu_shadow_cr0;
    91.9 +        vmcb->np_enable = 1; /* enable nested paging */
   91.10 +        vmcb->g_pat = 0x0007040600070406ULL; /* guest PAT */
   91.11 +        vmcb->exception_intercepts &= ~EXCEPTION_BITMAP_PG;
   91.12 +    }
   91.13 +
   91.14      return 0;
   91.15  }
   91.16  
   91.17 @@ -310,7 +317,8 @@ void svm_dump_vmcb(const char *from, str
   91.18      printk("KernGSBase = 0x%016llx PAT = 0x%016llx \n", 
   91.19             (unsigned long long) vmcb->kerngsbase,
   91.20             (unsigned long long) vmcb->g_pat);
   91.21 -    
   91.22 +    printk("H_CR3 = 0x%016llx\n", (unsigned long long)vmcb->h_cr3);
   91.23 +
   91.24      /* print out all the selectors */
   91.25      svm_dump_sel("CS", &vmcb->cs);
   91.26      svm_dump_sel("DS", &vmcb->ds);
    92.1 --- a/xen/arch/x86/mm.c	Mon Mar 05 12:49:12 2007 -0600
    92.2 +++ b/xen/arch/x86/mm.c	Thu Mar 08 14:39:52 2007 -0600
    92.3 @@ -424,7 +424,10 @@ void invalidate_shadow_ldt(struct vcpu *
    92.4      }
    92.5  
    92.6      /* Dispose of the (now possibly invalid) mappings from the TLB.  */
    92.7 -    queue_deferred_ops(v->domain, DOP_FLUSH_TLB | DOP_RELOAD_LDT);
    92.8 +    if ( v == current )
    92.9 +        queue_deferred_ops(v->domain, DOP_FLUSH_TLB | DOP_RELOAD_LDT);
   92.10 +    else
   92.11 +        flush_tlb_mask(v->domain->domain_dirty_cpumask);
   92.12  }
   92.13  
   92.14  
    93.1 --- a/xen/arch/x86/mm/Makefile	Mon Mar 05 12:49:12 2007 -0600
    93.2 +++ b/xen/arch/x86/mm/Makefile	Thu Mar 08 14:39:52 2007 -0600
    93.3 @@ -1,4 +1,5 @@
    93.4  subdir-y += shadow
    93.5 +subdir-y += hap
    93.6  
    93.7  obj-y += paging.o
    93.8  obj-y += p2m.o
    94.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    94.2 +++ b/xen/arch/x86/mm/hap/Makefile	Thu Mar 08 14:39:52 2007 -0600
    94.3 @@ -0,0 +1,2 @@
    94.4 +obj-y += hap.o
    94.5 +obj-y += support.o
    95.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    95.2 +++ b/xen/arch/x86/mm/hap/hap.c	Thu Mar 08 14:39:52 2007 -0600
    95.3 @@ -0,0 +1,708 @@
    95.4 +/******************************************************************************
    95.5 + * arch/x86/mm/hap/hap.c
    95.6 + *
    95.7 + * hardware assisted paging
    95.8 + * Copyright (c) 2007 Advanced Micro Devices (Wei Huang)
    95.9 + * Parts of this code are Copyright (c) 2007 by XenSource Inc.
   95.10 + *
   95.11 + * This program is free software; you can redistribute it and/or modify
   95.12 + * it under the terms of the GNU General Public License as published by
   95.13 + * the Free Software Foundation; either version 2 of the License, or
   95.14 + * (at your option) any later version.
   95.15 + *
   95.16 + * This program is distributed in the hope that it will be useful,
   95.17 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
   95.18 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   95.19 + * GNU General Public License for more details.
   95.20 + *
   95.21 + * You should have received a copy of the GNU General Public License
   95.22 + * along with this program; if not, write to the Free Software
   95.23 + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
   95.24 + */
   95.25 +
   95.26 +#include <xen/config.h>
   95.27 +#include <xen/types.h>
   95.28 +#include <xen/mm.h>
   95.29 +#include <xen/trace.h>
   95.30 +#include <xen/sched.h>
   95.31 +#include <xen/perfc.h>
   95.32 +#include <xen/irq.h>
   95.33 +#include <xen/domain_page.h>
   95.34 +#include <xen/guest_access.h>
   95.35 +#include <xen/keyhandler.h>
   95.36 +#include <asm/event.h>
   95.37 +#include <asm/page.h>
   95.38 +#include <asm/current.h>
   95.39 +#include <asm/flushtlb.h>
   95.40 +#include <asm/shared.h>
   95.41 +#include <asm/hap.h>
   95.42 +#include <asm/paging.h>
   95.43 +#include <asm/domain.h>
   95.44 +
   95.45 +#include "private.h"
   95.46 +
   95.47 +/* Override macros from asm/page.h to make them work with mfn_t */
   95.48 +#undef mfn_to_page
   95.49 +#define mfn_to_page(_m) (frame_table + mfn_x(_m))
   95.50 +#undef mfn_valid
   95.51 +#define mfn_valid(_mfn) (mfn_x(_mfn) < max_page)
   95.52 +#undef page_to_mfn
   95.53 +#define page_to_mfn(_pg) (_mfn((_pg) - frame_table))
   95.54 +
   95.55 +/************************************************/
   95.56 +/*             HAP SUPPORT FUNCTIONS            */
   95.57 +/************************************************/
   95.58 +mfn_t hap_alloc(struct domain *d, unsigned long backpointer)
   95.59 +{
   95.60 +    struct page_info *sp = NULL;
   95.61 +    void *p;
   95.62 +
   95.63 +    ASSERT(hap_locked_by_me(d));
   95.64 +
   95.65 +    sp = list_entry(d->arch.paging.hap.freelists.next, struct page_info, list);
   95.66 +    list_del(&sp->list);
   95.67 +    d->arch.paging.hap.free_pages -= 1;
   95.68 +
   95.69 +    /* Now safe to clear the page for reuse */
   95.70 +    p = hap_map_domain_page(page_to_mfn(sp));
   95.71 +    ASSERT(p != NULL);
   95.72 +    clear_page(p);
   95.73 +    hap_unmap_domain_page(p);
   95.74 +
   95.75 +    return page_to_mfn(sp);
   95.76 +}
   95.77 +
   95.78 +void hap_free(struct domain *d, mfn_t smfn)
   95.79 +{
   95.80 +    struct page_info *sp = mfn_to_page(smfn); 
   95.81 +
   95.82 +    ASSERT(hap_locked_by_me(d));
   95.83 +
   95.84 +    d->arch.paging.hap.free_pages += 1;
   95.85 +    list_add_tail(&sp->list, &d->arch.paging.hap.freelists);
   95.86 +}
   95.87 +
   95.88 +static int hap_alloc_p2m_pages(struct domain *d)
   95.89 +{
   95.90 +    struct page_info *pg;
   95.91 +
   95.92 +    ASSERT(hap_locked_by_me(d));
   95.93 +
   95.94 +    pg = mfn_to_page(hap_alloc(d, 0));
   95.95 +    d->arch.paging.hap.p2m_pages += 1;
   95.96 +    d->arch.paging.hap.total_pages -= 1;
   95.97 +    
   95.98 +    page_set_owner(pg, d);
   95.99 +    pg->count_info = 1;
  95.100 +    list_add_tail(&pg->list, &d->arch.paging.hap.p2m_freelist);
  95.101 +
  95.102 +    return 1;
  95.103 +}
  95.104 +
  95.105 +struct page_info * hap_alloc_p2m_page(struct domain *d)
  95.106 +{
  95.107 +    struct list_head *entry;
  95.108 +    struct page_info *pg;
  95.109 +    mfn_t mfn;
  95.110 +    void *p;
  95.111 +
  95.112 +    hap_lock(d);
  95.113 +    
  95.114 +    if ( list_empty(&d->arch.paging.hap.p2m_freelist) && 
  95.115 +         !hap_alloc_p2m_pages(d) ) {
  95.116 +        hap_unlock(d);
  95.117 +        return NULL;
  95.118 +    }
  95.119 +    entry = d->arch.paging.hap.p2m_freelist.next;
  95.120 +    list_del(entry);
  95.121 +    
  95.122 +    hap_unlock(d);
  95.123 +
  95.124 +    pg = list_entry(entry, struct page_info, list);
  95.125 +    mfn = page_to_mfn(pg);
  95.126 +    p = hap_map_domain_page(mfn);
  95.127 +    clear_page(p);
  95.128 +    hap_unmap_domain_page(p);
  95.129 +
  95.130 +    return pg;
  95.131 +}
  95.132 +
  95.133 +void hap_free_p2m_page(struct domain *d, struct page_info *pg)
  95.134 +{
  95.135 +    ASSERT(page_get_owner(pg) == d);
  95.136 +    /* Should have just the one ref we gave it in alloc_p2m_page() */
  95.137 +    if ( (pg->count_info & PGC_count_mask) != 1 ) {
  95.138 +        HAP_ERROR("Odd p2m page count c=%#x t=%"PRtype_info"\n",
  95.139 +                  pg->count_info, pg->u.inuse.type_info);
  95.140 +    }
  95.141 +    /* Free should not decrement domain's total allocation, since 
  95.142 +     * these pages were allocated without an owner. */
  95.143 +    page_set_owner(pg, NULL); 
  95.144 +    free_domheap_pages(pg, 0);
  95.145 +    d->arch.paging.hap.p2m_pages--;
  95.146 +}
  95.147 +
  95.148 +/* Return the size of the pool, rounded up to the nearest MB */
  95.149 +static unsigned int
  95.150 +hap_get_allocation(struct domain *d)
  95.151 +{
  95.152 +    unsigned int pg = d->arch.paging.hap.total_pages;
  95.153 +
  95.154 +    HERE_I_AM;
  95.155 +    return ((pg >> (20 - PAGE_SHIFT))
  95.156 +            + ((pg & ((1 << (20 - PAGE_SHIFT)) - 1)) ? 1 : 0));
  95.157 +}
  95.158 +
  95.159 +/* Set the pool of pages to the required number of pages.
  95.160 + * Returns 0 for success, non-zero for failure. */
  95.161 +static unsigned int
  95.162 +hap_set_allocation(struct domain *d, unsigned int pages, int *preempted)
  95.163 +{
  95.164 +    struct page_info *sp;
  95.165 +
  95.166 +    ASSERT(hap_locked_by_me(d));
  95.167 +
  95.168 +    while ( d->arch.paging.hap.total_pages != pages ) {
  95.169 +        if ( d->arch.paging.hap.total_pages < pages ) {
  95.170 +            /* Need to allocate more memory from domheap */
  95.171 +            sp = alloc_domheap_pages(NULL, 0, 0);
  95.172 +            if ( sp == NULL ) {
  95.173 +                HAP_PRINTK("failed to allocate hap pages.\n");
  95.174 +                return -ENOMEM;
  95.175 +            }
  95.176 +            d->arch.paging.hap.free_pages += 1;
  95.177 +            d->arch.paging.hap.total_pages += 1;
  95.178 +            list_add_tail(&sp->list, &d->arch.paging.hap.freelists);
  95.179 +        }
  95.180 +        else if ( d->arch.paging.hap.total_pages > pages ) {
  95.181 +            /* Need to return memory to domheap */
  95.182 +            ASSERT(!list_empty(&d->arch.paging.hap.freelists));
  95.183 +            sp = list_entry(d->arch.paging.hap.freelists.next,
  95.184 +                            struct page_info, list);
  95.185 +            list_del(&sp->list);
  95.186 +            d->arch.paging.hap.free_pages -= 1;
  95.187 +            d->arch.paging.hap.total_pages -= 1;
  95.188 +            free_domheap_pages(sp, 0);
  95.189 +        }
  95.190 +        
  95.191 +        /* Check to see if we need to yield and try again */
  95.192 +        if ( preempted && hypercall_preempt_check() ) {
  95.193 +            *preempted = 1;
  95.194 +            return 0;
  95.195 +        }
  95.196 +    }
  95.197 +
  95.198 +    return 0;
  95.199 +}
  95.200 +
  95.201 +#if CONFIG_PAGING_LEVELS == 4
  95.202 +void hap_install_xen_entries_in_l4(struct vcpu *v, mfn_t gl4mfn, mfn_t sl4mfn)
  95.203 +{
  95.204 +    struct domain *d = v->domain;
  95.205 +    l4_pgentry_t *sl4e;
  95.206 +
  95.207 +    sl4e = hap_map_domain_page(sl4mfn);
  95.208 +    ASSERT(sl4e != NULL);
  95.209 +
  95.210 +    /* Copy the common Xen mappings from the idle domain */
  95.211 +    memcpy(&sl4e[ROOT_PAGETABLE_FIRST_XEN_SLOT],
  95.212 +           &idle_pg_table[ROOT_PAGETABLE_FIRST_XEN_SLOT],
  95.213 +           ROOT_PAGETABLE_XEN_SLOTS * sizeof(l4_pgentry_t));
  95.214 +
  95.215 +    /* Install the per-domain mappings for this domain */
  95.216 +    sl4e[l4_table_offset(PERDOMAIN_VIRT_START)] =
  95.217 +        l4e_from_pfn(mfn_x(page_to_mfn(virt_to_page(d->arch.mm_perdomain_l3))),
  95.218 +                     __PAGE_HYPERVISOR);
  95.219 +
  95.220 +    sl4e[l4_table_offset(LINEAR_PT_VIRT_START)] =
  95.221 +        l4e_from_pfn(mfn_x(gl4mfn), __PAGE_HYPERVISOR);
  95.222 +
  95.223 +    /* install domain-specific P2M table */
  95.224 +    sl4e[l4_table_offset(RO_MPT_VIRT_START)] =
  95.225 +        l4e_from_pfn(mfn_x(pagetable_get_mfn(d->arch.phys_table)),
  95.226 +                     __PAGE_HYPERVISOR);
  95.227 +
  95.228 +    hap_unmap_domain_page(sl4e);
  95.229 +}
  95.230 +#endif /* CONFIG_PAGING_LEVELS == 4 */
  95.231 +
  95.232 +#if CONFIG_PAGING_LEVELS == 3
  95.233 +void hap_install_xen_entries_in_l2h(struct vcpu *v, mfn_t sl2hmfn)
  95.234 +{
  95.235 +    struct domain *d = v->domain;
  95.236 +    l2_pgentry_t *sl2e;
  95.237 +
  95.238 +    int i;
  95.239 +
  95.240 +    sl2e = hap_map_domain_page(sl2hmfn);
  95.241 +    ASSERT(sl2e != NULL);
  95.242 +    
  95.243 +    /* Copy the common Xen mappings from the idle domain */
  95.244 +    memcpy(&sl2e[L2_PAGETABLE_FIRST_XEN_SLOT & (L2_PAGETABLE_ENTRIES-1)],
  95.245 +           &idle_pg_table_l2[L2_PAGETABLE_FIRST_XEN_SLOT],
  95.246 +           L2_PAGETABLE_XEN_SLOTS * sizeof(l2_pgentry_t));
  95.247 +
  95.248 +    /* Install the per-domain mappings for this domain */
  95.249 +    for ( i = 0; i < PDPT_L2_ENTRIES; i++ )
  95.250 +        sl2e[l2_table_offset(PERDOMAIN_VIRT_START) + i] =
  95.251 +            l2e_from_pfn(
  95.252 +                         mfn_x(page_to_mfn(virt_to_page(d->arch.mm_perdomain_pt) + i)),
  95.253 +                         __PAGE_HYPERVISOR);
  95.254 +    
  95.255 +    for ( i = 0; i < HAP_L3_PAGETABLE_ENTRIES; i++ )
  95.256 +        sl2e[l2_table_offset(LINEAR_PT_VIRT_START) + i] =
  95.257 +            l2e_empty();
  95.258 +
  95.259 +    if ( paging_mode_translate(d) )
  95.260 +    {
  95.261 +        /* Install the domain-specific p2m table */
  95.262 +        l3_pgentry_t *p2m;
  95.263 +        ASSERT(pagetable_get_pfn(d->arch.phys_table) != 0);
  95.264 +        p2m = hap_map_domain_page(pagetable_get_mfn(d->arch.phys_table));
  95.265 +        for ( i = 0; i < MACHPHYS_MBYTES>>1; i++ )
  95.266 +        {
  95.267 +            sl2e[l2_table_offset(RO_MPT_VIRT_START) + i] =
  95.268 +                (l3e_get_flags(p2m[i]) & _PAGE_PRESENT)
  95.269 +                ? l2e_from_pfn(mfn_x(_mfn(l3e_get_pfn(p2m[i]))),
  95.270 +                                      __PAGE_HYPERVISOR)
  95.271 +                : l2e_empty();
  95.272 +        }
  95.273 +        hap_unmap_domain_page(p2m);
  95.274 +    }
  95.275 +
  95.276 +    hap_unmap_domain_page(sl2e);
  95.277 +}
  95.278 +#endif
  95.279 +
  95.280 +#if CONFIG_PAGING_LEVELS == 2
  95.281 +void hap_install_xen_entries_in_l2(struct vcpu *v, mfn_t gl2mfn, mfn_t sl2mfn)
  95.282 +{
  95.283 +    struct domain *d = v->domain;
  95.284 +    l2_pgentry_t *sl2e;
  95.285 +    int i;
  95.286 +
  95.287 +    sl2e = hap_map_domain_page(sl2mfn);
  95.288 +    ASSERT(sl2e != NULL);
  95.289 +    
  95.290 +    /* Copy the common Xen mappings from the idle domain */
  95.291 +    memcpy(&sl2e[L2_PAGETABLE_FIRST_XEN_SLOT],
  95.292 +           &idle_pg_table[L2_PAGETABLE_FIRST_XEN_SLOT],
  95.293 +           L2_PAGETABLE_XEN_SLOTS * sizeof(l2_pgentry_t));
  95.294 +
  95.295 +    /* Install the per-domain mappings for this domain */
  95.296 +    for ( i = 0; i < PDPT_L2_ENTRIES; i++ )
  95.297 +        sl2e[l2_table_offset(PERDOMAIN_VIRT_START) + i] =
  95.298 +            l2e_from_pfn(
  95.299 +                mfn_x(page_to_mfn(virt_to_page(d->arch.mm_perdomain_pt) + i)),
  95.300 +                __PAGE_HYPERVISOR);
  95.301 +
  95.302 +
  95.303 +    sl2e[l2_table_offset(LINEAR_PT_VIRT_START)] =
  95.304 +        l2e_from_pfn(mfn_x(gl2mfn), __PAGE_HYPERVISOR);
  95.305 +
  95.306 +    /* install domain-specific P2M table */
  95.307 +    sl2e[l2_table_offset(RO_MPT_VIRT_START)] =
  95.308 +        l2e_from_pfn(mfn_x(pagetable_get_mfn(d->arch.phys_table)),
  95.309 +                            __PAGE_HYPERVISOR);
  95.310 +
  95.311 +    hap_unmap_domain_page(sl2e);
  95.312 +}
  95.313 +#endif
  95.314 +
  95.315 +mfn_t hap_make_monitor_table(struct vcpu *v)
  95.316 +{
  95.317 +    struct domain *d = v->domain;
  95.318 +
  95.319 +    ASSERT(pagetable_get_pfn(v->arch.monitor_table) == 0);
  95.320 +
  95.321 +#if CONFIG_PAGING_LEVELS == 4
  95.322 +    {
  95.323 +        mfn_t m4mfn;
  95.324 +        m4mfn = hap_alloc(d, 0);
  95.325 +        hap_install_xen_entries_in_l4(v, m4mfn, m4mfn);
  95.326 +        return m4mfn;
  95.327 +    }
  95.328 +#elif CONFIG_PAGING_LEVELS == 3
  95.329 +    {
  95.330 +        mfn_t m3mfn, m2mfn; 
  95.331 +        l3_pgentry_t *l3e;
  95.332 +        l2_pgentry_t *l2e;
  95.333 +        int i;
  95.334 +
  95.335 +        m3mfn = hap_alloc(d, 0);
  95.336 +
  95.337 +        /* Install a monitor l2 table in slot 3 of the l3 table.
  95.338 +         * This is used for all Xen entries, including linear maps
  95.339 +         */
  95.340 +        m2mfn = hap_alloc(d, 0);
  95.341 +        l3e = hap_map_domain_page(m3mfn);
  95.342 +        l3e[3] = l3e_from_pfn(mfn_x(m2mfn), _PAGE_PRESENT);
  95.343 +        hap_install_xen_entries_in_l2h(v, m2mfn);
  95.344 +        /* Install the monitor's own linear map */
  95.345 +        l2e = hap_map_domain_page(m2mfn);
  95.346 +        for ( i = 0; i < L3_PAGETABLE_ENTRIES; i++ )
  95.347 +            l2e[l2_table_offset(LINEAR_PT_VIRT_START) + i] =
  95.348 +                (l3e_get_flags(l3e[i]) & _PAGE_PRESENT) 
  95.349 +                ? l2e_from_pfn(l3e_get_pfn(l3e[i]), __PAGE_HYPERVISOR) 
  95.350 +                : l2e_empty();
  95.351 +        hap_unmap_domain_page(l2e);
  95.352 +        hap_unmap_domain_page(l3e);
  95.353 +
  95.354 +        HAP_PRINTK("new monitor table: %#lx\n", mfn_x(m3mfn));
  95.355 +        return m3mfn;
  95.356 +    }
  95.357 +#else
  95.358 +    {
  95.359 +        mfn_t m2mfn;
  95.360 +        
  95.361 +        m2mfn = hap_alloc(d, 0);
  95.362 +        hap_install_xen_entries_in_l2(v, m2mfn, m2mfn);
  95.363 +    
  95.364 +        return m2mfn;
  95.365 +    }
  95.366 +#endif
  95.367 +}
  95.368 +
  95.369 +void hap_destroy_monitor_table(struct vcpu* v, mfn_t mmfn)
  95.370 +{
  95.371 +    struct domain *d = v->domain;
  95.372 +
  95.373 +#if CONFIG_PAGING_LEVELS == 4
  95.374 +    /* Need to destroy the l3 monitor page in slot 0 too */
  95.375 +    {
  95.376 +        mfn_t m3mfn;
  95.377 +        l4_pgentry_t *l4e = hap_map_domain_page(mmfn);
  95.378 +        ASSERT(l4e_get_flags(l4e[0]) & _PAGE_PRESENT);
  95.379 +        m3mfn = _mfn(l4e_get_pfn(l4e[0]));
  95.380 +        hap_free(d, m3mfn);
  95.381 +        hap_unmap_domain_page(l4e);
  95.382 +    }
  95.383 +#elif CONFIG_PAGING_LEVELS == 3
  95.384 +    /* Need to destroy the l2 monitor page in slot 4 too */
  95.385 +    {
  95.386 +        l3_pgentry_t *l3e = hap_map_domain_page(mmfn);
  95.387 +        ASSERT(l3e_get_flags(l3e[3]) & _PAGE_PRESENT);
  95.388 +        hap_free(d, _mfn(l3e_get_pfn(l3e[3])));
  95.389 +        hap_unmap_domain_page(l3e);
  95.390 +    }
  95.391 +#endif
  95.392 +
  95.393 +    /* Put the memory back in the pool */
  95.394 +    hap_free(d, mmfn);
  95.395 +}
  95.396 +
  95.397 +/************************************************/
  95.398 +/*          HAP DOMAIN LEVEL FUNCTIONS          */
  95.399 +/************************************************/
  95.400 +void hap_domain_init(struct domain *d)
  95.401 +{
  95.402 +    hap_lock_init(d);
  95.403 +    INIT_LIST_HEAD(&d->arch.paging.hap.freelists);
  95.404 +    INIT_LIST_HEAD(&d->arch.paging.hap.p2m_freelist);
  95.405 +}
  95.406 +
  95.407 +/* return 0 for success, -errno for failure */
  95.408 +int hap_enable(struct domain *d, u32 mode)
  95.409 +{
  95.410 +    unsigned int old_pages;
  95.411 +    int rv = 0;
  95.412 +
  95.413 +    HERE_I_AM;
  95.414 +
  95.415 +    domain_pause(d);
  95.416 +    /* error check */
  95.417 +    if ( (d == current->domain) ) {
  95.418 +        rv = -EINVAL;
  95.419 +        goto out;
  95.420 +    }
  95.421 +
  95.422 +    old_pages = d->arch.paging.hap.total_pages;
  95.423 +    if ( old_pages == 0 ) {
  95.424 +        unsigned int r;
  95.425 +        hap_lock(d);
  95.426 +        r = hap_set_allocation(d, 256, NULL);
  95.427 +        hap_unlock(d);
  95.428 +        if ( r != 0 ) {
  95.429 +            hap_set_allocation(d, 0, NULL);
  95.430 +            rv = -ENOMEM;
  95.431 +            goto out;
  95.432 +        }
  95.433 +    }
  95.434 +
  95.435 +    /* allocate P2m table */
  95.436 +    if ( mode & PG_translate ) {
  95.437 +        rv = p2m_alloc_table(d, hap_alloc_p2m_page, hap_free_p2m_page);
  95.438 +        if ( rv != 0 )
  95.439 +            goto out;
  95.440 +    }
  95.441 +
  95.442 +    d->arch.paging.mode = mode | PG_SH_enable;
  95.443 +
  95.444 + out:
  95.445 +    domain_unpause(d);
  95.446 +    return rv;
  95.447 +}
  95.448 +
  95.449 +void hap_final_teardown(struct domain *d)
  95.450 +{
  95.451 +    HERE_I_AM;
  95.452 +
  95.453 +    if ( d->arch.paging.hap.total_pages != 0 )
  95.454 +        hap_teardown(d);
  95.455 +
  95.456 +    p2m_teardown(d);
  95.457 +}
  95.458 +
  95.459 +void hap_teardown(struct domain *d)
  95.460 +{
  95.461 +    struct vcpu *v;
  95.462 +    mfn_t mfn;
  95.463 +    HERE_I_AM;
  95.464 +
  95.465 +    ASSERT(test_bit(_DOMF_dying, &d->domain_flags));
  95.466 +    ASSERT(d != current->domain);
  95.467 +
  95.468 +    if ( !hap_locked_by_me(d) )
  95.469 +        hap_lock(d); /* Keep various asserts happy */
  95.470 +
  95.471 +    if ( paging_mode_enabled(d) ) {
  95.472 +        /* release the monitor table held by each vcpu */
  95.473 +        for_each_vcpu(d, v) {
  95.474 +            if ( v->arch.paging.mode && paging_mode_external(d) ) {
  95.475 +                mfn = pagetable_get_mfn(v->arch.monitor_table);
  95.476 +                if ( mfn_valid(mfn) && (mfn_x(mfn) != 0) )
  95.477 +                    hap_destroy_monitor_table(v, mfn);
  95.478 +                v->arch.monitor_table = pagetable_null();
  95.479 +            }
  95.480 +        }
  95.481 +    }
  95.482 +
  95.483 +    if ( d->arch.paging.hap.total_pages != 0 ) {
  95.484 +        HAP_PRINTK("teardown of domain %u starts."
  95.485 +                      "  pages total = %u, free = %u, p2m=%u\n",
  95.486 +                      d->domain_id,
  95.487 +                      d->arch.paging.hap.total_pages,
  95.488 +                      d->arch.paging.hap.free_pages,
  95.489 +                      d->arch.paging.hap.p2m_pages);
  95.490 +        hap_set_allocation(d, 0, NULL);
  95.491 +        HAP_PRINTK("teardown done."
  95.492 +                      "  pages total = %u, free = %u, p2m=%u\n",
  95.493 +                      d->arch.paging.hap.total_pages,
  95.494 +                      d->arch.paging.hap.free_pages,
  95.495 +                      d->arch.paging.hap.p2m_pages);
  95.496 +        ASSERT(d->arch.paging.hap.total_pages == 0);
  95.497 +    }
  95.498 +    
  95.499 +    d->arch.paging.mode &= ~PG_log_dirty;
  95.500 +
  95.501 +    hap_unlock(d);
  95.502 +}
  95.503 +
  95.504 +int hap_domctl(struct domain *d, xen_domctl_shadow_op_t *sc,
  95.505 +               XEN_GUEST_HANDLE(void) u_domctl)
  95.506 +{
  95.507 +    int rc, preempted = 0;
  95.508 +
  95.509 +    HERE_I_AM;
  95.510 +
  95.511 +    if ( unlikely(d == current->domain) ) {
  95.512 +        gdprintk(XENLOG_INFO, "Don't try to do a hap op on yourself!\n");
  95.513 +        return -EINVAL;
  95.514 +    }
  95.515 +    
  95.516 +    switch ( sc->op ) {
  95.517 +    case XEN_DOMCTL_SHADOW_OP_OFF:
  95.518 +    case XEN_DOMCTL_SHADOW_OP_ENABLE_TEST:
  95.519 +    case XEN_DOMCTL_SHADOW_OP_ENABLE_LOGDIRTY:
  95.520 +    case XEN_DOMCTL_SHADOW_OP_ENABLE_TRANSLATE:
  95.521 +    case XEN_DOMCTL_SHADOW_OP_CLEAN:
  95.522 +    case XEN_DOMCTL_SHADOW_OP_PEEK:
  95.523 +    case XEN_DOMCTL_SHADOW_OP_ENABLE:
  95.524 +        HAP_ERROR("Bad hap domctl op %u\n", sc->op);
  95.525 +        domain_crash(d);
  95.526 +        return -EINVAL;
  95.527 +    case XEN_DOMCTL_SHADOW_OP_SET_ALLOCATION:
  95.528 +        hap_lock(d);
  95.529 +        rc = hap_set_allocation(d, sc->mb << (20 - PAGE_SHIFT), &preempted);
  95.530 +        hap_unlock(d);
  95.531 +        if ( preempted )
  95.532 +            /* Not finished.  Set up to re-run the call. */
  95.533 +            rc = hypercall_create_continuation(__HYPERVISOR_domctl, "h", 
  95.534 +                                               u_domctl);
  95.535 +        else
  95.536 +            /* Finished.  Return the new allocation */
  95.537 +            sc->mb = hap_get_allocation(d);
  95.538 +        return rc;
  95.539 +    case XEN_DOMCTL_SHADOW_OP_GET_ALLOCATION:
  95.540 +        sc->mb = hap_get_allocation(d);
  95.541 +        return 0;
  95.542 +    default:
  95.543 +        HAP_ERROR("Bad hap domctl op %u\n", sc->op);
  95.544 +        return -EINVAL;
  95.545 +    }
  95.546 +}
  95.547 +
  95.548 +void hap_vcpu_init(struct vcpu *v)
  95.549 +{
  95.550 +    v->arch.paging.mode = &hap_paging_real_mode;
  95.551 +}
  95.552 +/************************************************/
  95.553 +/*          HAP PAGING MODE FUNCTIONS           */
  95.554 +/************************************************/
  95.555 +/* In theory, hap should not intercept guest page fault. This function can 
  95.556 + * be recycled to handle host/nested page fault, if needed.
  95.557 + */
  95.558 +int hap_page_fault(struct vcpu *v, unsigned long va, 
  95.559 +                   struct cpu_user_regs *regs)
  95.560 +{
  95.561 +    HERE_I_AM;
  95.562 +    domain_crash(v->domain);
  95.563 +    return 0;
  95.564 +}
  95.565 +
  95.566 +/* called when guest issues a invlpg request. 
  95.567 + * Return 1 if need to issue page invalidation on CPU; Return 0 if does not
  95.568 + * need to do so.
  95.569 + */
  95.570 +int hap_invlpg(struct vcpu *v, unsigned long va)
  95.571 +{
  95.572 +    HERE_I_AM;
  95.573 +    return 0;
  95.574 +}
  95.575 +
  95.576 +void hap_update_cr3(struct vcpu *v, int do_locking)
  95.577 +{
  95.578 +    struct domain *d = v->domain;
  95.579 +    mfn_t gmfn;
  95.580 +
  95.581 +    HERE_I_AM;
  95.582 +    /* Don't do anything on an uninitialised vcpu */
  95.583 +    if ( !is_hvm_domain(d) && !test_bit(_VCPUF_initialised, &v->vcpu_flags) ) {
  95.584 +        ASSERT(v->arch.cr3 == 0);
  95.585 +        return;
  95.586 +    }
  95.587 +
  95.588 +    if ( do_locking )
  95.589 +        hap_lock(v->domain);
  95.590 +    
  95.591 +    ASSERT(hap_locked_by_me(v->domain));
  95.592 +    ASSERT(v->arch.paging.mode);
  95.593 +    
  95.594 +    gmfn = pagetable_get_mfn(v->arch.guest_table);
  95.595 +
  95.596 +    make_cr3(v, pagetable_get_pfn(v->arch.monitor_table));
  95.597 +    
  95.598 +    hvm_update_guest_cr3(v, pagetable_get_paddr(v->arch.monitor_table));
  95.599 +
  95.600 +    HAP_PRINTK("d=%u v=%u guest_table=%05lx, monitor_table = %05lx\n", 
  95.601 +               d->domain_id, v->vcpu_id, 
  95.602 +               (unsigned long)pagetable_get_pfn(v->arch.guest_table),
  95.603 +               (unsigned long)pagetable_get_pfn(v->arch.monitor_table));
  95.604 +
  95.605 +    flush_tlb_mask(d->domain_dirty_cpumask);
  95.606 +
  95.607 +    if ( do_locking )
  95.608 +        hap_unlock(v->domain);
  95.609 +}
  95.610 +
  95.611 +void hap_update_paging_modes(struct vcpu *v)
  95.612 +{
  95.613 +    struct domain *d;
  95.614 +
  95.615 +    HERE_I_AM;
  95.616 +
  95.617 +    d = v->domain;
  95.618 +    hap_lock(d);
  95.619 +
  95.620 +    /* update guest paging mode. Note that we rely on hvm functions to detect
  95.621 +     * guest's paging mode. So, make sure the shadow registers (CR0, CR4, EFER)
  95.622 +     * reflect guest's status correctly.
  95.623 +     */
  95.624 +    if ( hvm_paging_enabled(v) ) {
  95.625 +        if ( hvm_long_mode_enabled(v) )
  95.626 +            v->arch.paging.mode = &hap_paging_long_mode;
  95.627 +        else if ( hvm_pae_enabled(v) )
  95.628 +            v->arch.paging.mode = &hap_paging_pae_mode;
  95.629 +        else
  95.630 +            v->arch.paging.mode = &hap_paging_protected_mode;
  95.631 +    }
  95.632 +    else {
  95.633 +        v->arch.paging.mode = &hap_paging_real_mode;
  95.634 +    }
  95.635 +
  95.636 +    v->arch.paging.translate_enabled = !!hvm_paging_enabled(v);    
  95.637 +
  95.638 +    /* use p2m map */
  95.639 +    v->arch.guest_table =
  95.640 +        pagetable_from_pfn(pagetable_get_pfn(d->arch.phys_table));
  95.641 +
  95.642 +    if ( pagetable_is_null(v->arch.monitor_table) ) {
  95.643 +        mfn_t mmfn = hap_make_monitor_table(v);
  95.644 +        v->arch.monitor_table = pagetable_from_mfn(mmfn);
  95.645 +        make_cr3(v, mfn_x(mmfn));
  95.646 +    }
  95.647 +
  95.648 +    flush_tlb_mask(d->domain_dirty_cpumask);
  95.649 +    hap_unlock(d);
  95.650 +}
  95.651 +
  95.652 +void 
  95.653 +hap_write_p2m_entry(struct vcpu *v, unsigned long gfn, l1_pgentry_t *p,
  95.654 +                    l1_pgentry_t new, unsigned int level)
  95.655 +{
  95.656 +    hap_lock(v->domain);
  95.657 +    safe_write_pte(p, new);
  95.658 +    hap_unlock(v->domain);
  95.659 +}
  95.660 +
  95.661 +/* Entry points into this mode of the hap code. */
  95.662 +struct paging_mode hap_paging_real_mode = {
  95.663 +    .page_fault             = hap_page_fault, 
  95.664 +    .invlpg                 = hap_invlpg,
  95.665 +    .gva_to_gfn             = hap_gva_to_gfn_real_mode,
  95.666 +    .update_cr3             = hap_update_cr3,
  95.667 +    .update_paging_modes    = hap_update_paging_modes,
  95.668 +    .write_p2m_entry        = hap_write_p2m_entry,
  95.669 +    .guest_levels           = 1
  95.670 +};
  95.671 +
  95.672 +struct paging_mode hap_paging_protected_mode = {
  95.673 +    .page_fault             = hap_page_fault, 
  95.674 +    .invlpg                 = hap_invlpg,
  95.675 +    .gva_to_gfn             = hap_gva_to_gfn_protected_mode,
  95.676 +    .update_cr3             = hap_update_cr3,
  95.677 +    .update_paging_modes    = hap_update_paging_modes,
  95.678 +    .write_p2m_entry        = hap_write_p2m_entry,
  95.679 +    .guest_levels           = 2
  95.680 +};
  95.681 +
  95.682 +struct paging_mode hap_paging_pae_mode = {
  95.683 +    .page_fault             = hap_page_fault, 
  95.684 +    .invlpg                 = hap_invlpg,
  95.685 +    .gva_to_gfn             = hap_gva_to_gfn_pae_mode,
  95.686 +    .update_cr3             = hap_update_cr3,
  95.687 +    .update_paging_modes    = hap_update_paging_modes,
  95.688 +    .write_p2m_entry        = hap_write_p2m_entry,
  95.689 +    .guest_levels           = 3
  95.690 +};
  95.691 +
  95.692 +struct paging_mode hap_paging_long_mode = {
  95.693 +    .page_fault             = hap_page_fault, 
  95.694 +    .invlpg                 = hap_invlpg,
  95.695 +    .gva_to_gfn             = hap_gva_to_gfn_long_mode,
  95.696 +    .update_cr3             = hap_update_cr3,
  95.697 +    .update_paging_modes    = hap_update_paging_modes,
  95.698 +    .write_p2m_entry        = hap_write_p2m_entry,
  95.699 +    .guest_levels           = 4
  95.700 +};
  95.701 +
  95.702 +/*
  95.703 + * Local variables:
  95.704 + * mode: C
  95.705 + * c-set-style: "BSD"
  95.706 + * c-basic-offset: 4
  95.707 + * indent-tabs-mode: nil
  95.708 + * End:
  95.709 + */
  95.710 +
  95.711 +
    96.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    96.2 +++ b/xen/arch/x86/mm/hap/private.h	Thu Mar 08 14:39:52 2007 -0600
    96.3 @@ -0,0 +1,112 @@
    96.4 +/*
    96.5 + * arch/x86/mm/hap/private.h
    96.6 + *
    96.7 + * Copyright (c) 2007, AMD Corporation (Wei Huang)
    96.8 + *
    96.9 + * This program is free software; you can redistribute it and/or modify it
   96.10 + * under the terms and conditions of the GNU General Public License,
   96.11 + * version 2, as published by the Free Software Foundation.
   96.12 + *
   96.13 + * This program is distributed in the hope it will be useful, but WITHOUT
   96.14 + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
   96.15 + * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
   96.16 + * more details.
   96.17 + *
   96.18 + * You should have received a copy of the GNU General Public License along with
   96.19 + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
   96.20 + * Place - Suite 330, Boston, MA 02111-1307 USA.
   96.21 + *
   96.22 + */
   96.23 +#ifndef __HAP_PRIVATE_H__
   96.24 +#define __HAP_PRIVATE_H__
   96.25 +
   96.26 +#include <asm/flushtlb.h>
   96.27 +#include <asm/hvm/support.h>
   96.28 +
   96.29 +/********************************************/
   96.30 +/*          GUEST TRANSLATION FUNCS         */
   96.31 +/********************************************/
   96.32 +unsigned long hap_gva_to_gfn_real_mode(struct vcpu *v, unsigned long gva);
   96.33 +unsigned long hap_gva_to_gfn_protected_mode(struct vcpu *v, unsigned long gva);
   96.34 +unsigned long hap_gva_to_gfn_pae_mode(struct vcpu *v, unsigned long gva);
   96.35 +unsigned long hap_gva_to_gfn_long_mode(struct vcpu *v, unsigned long gva);
   96.36 +/********************************************/
   96.37 +/*            MISC DEFINITIONS              */
   96.38 +/********************************************/
   96.39 +
   96.40 +/* PT_SHIFT describes the amount by which a virtual address is shifted right 
   96.41 + * to right justify the portion to be used for indexing into a page 
   96.42 + * table, given the guest memory model (i.e. number of levels) and the level 
   96.43 + * of the page table being accessed. The idea is from Virtual Iron's code.
   96.44 + */
   96.45 +static const int PT_SHIFT[][5] =
   96.46 +  {   /*     ------  level ------           nr_levels  */
   96.47 +    /*         1     2     3     4                   */
   96.48 +    {    0,    0,    0,    0,    0},   /* 0 not used */
   96.49 +    {    0,    0,    0,    0,    0},   /* 1 not used */
   96.50 +    {    0,   12,   22,    0,    0},   /* 2  */
   96.51 +    {    0,   12,   21,   30,    0},   /* 3  */
   96.52 +    {    0,   12,   21,   30,   39}    /* 4  */
   96.53 +  };
   96.54 +
   96.55 +/* PT_ENTRIES describes the number of entries in a page table, given the 
   96.56 + * memory model (i.e. number of levels) and the level of the page table 
   96.57 + * being considered. This idea from Virtual Iron's shadow code*/
   96.58 +static const int PT_ENTRIES[][5] =
   96.59 +  {   /*     ------  level ------           nr_levels  */
   96.60 +    /*         1     2     3     4                   */
   96.61 +    {    0,    0,    0,    0,    0},   /* 0 not used */
   96.62 +    {    0,    0,    0,    0,    0},   /* 1 not used */
   96.63 +    {    0, 1024, 1024,    0,    0},   /* 2  */
   96.64 +    {    0,  512,  512,    4,    0},   /* 3  */
   96.65 +    {    0,  512,  512,  512,  512}    /* 4  */
   96.66 +  };
   96.67 +
   96.68 +/********************************************/
   96.69 +/*       PAGING DEFINITION FOR GUEST        */
   96.70 +/********************************************/
   96.71 +#define PHYSICAL_PAGE_4K_SIZE (1UL << 12)
   96.72 +#define PHYSICAL_PAGE_2M_SIZE (1UL << 21)
   96.73 +#define PHYSICAL_PAGE_4M_SIZE (1UL << 22)
   96.74 +#define PHYSICAL_PAGE_4K_MASK ( ~(PHYSICAL_PAGE_4K_SIZE - 1) )
   96.75 +#define PHYSICAL_PAGE_2M_MASK ( ~(PHYSICAL_PAGE_2M_SIZE - 1) )
   96.76 +#define PHYSICAL_PAGE_4M_MASK ( ~(PHYSICAL_PAGE_4M_SIZE - 1) )
   96.77 +
   96.78 +/* long mode physical address mask */
   96.79 +#define PHYSICAL_ADDR_BITS_LM    52
   96.80 +#define PHYSICAL_ADDR_MASK_LM    ((1UL << PHYSICAL_ADDR_BITS_LM)-1)
   96.81 +#define PHYSICAL_ADDR_2M_MASK_LM (PHYSICAL_PAGE_2M_MASK & PHYSICAL_ADDR_MASK_LM)
   96.82 +#define PHYSICAL_ADDR_4K_MASK_LM (PHYSICAL_PAGE_4K_MASK & PHYSICAL_ADDR_MASK_LM)
   96.83 +
   96.84 +#define PAGE_NX_BIT      (1ULL << 63)
   96.85 +/************************************************/
   96.86 +/*        PAGETABLE RELATED VARIABLES           */
   96.87 +/************************************************/
   96.88 +#if CONFIG_PAGING_LEVELS == 2
   96.89 +#define HAP_L1_PAGETABLE_ENTRIES    1024
   96.90 +#define HAP_L2_PAGETABLE_ENTRIES    1024
   96.91 +#define HAP_L1_PAGETABLE_SHIFT        12
   96.92 +#define HAP_L2_PAGETABLE_SHIFT        22
   96.93 +#endif
   96.94 +
   96.95 +#if CONFIG_PAGING_LEVELS == 3
   96.96 +#define HAP_L1_PAGETABLE_ENTRIES     512
   96.97 +#define HAP_L2_PAGETABLE_ENTRIES     512
   96.98 +#define HAP_L3_PAGETABLE_ENTRIES       4
   96.99 +#define HAP_L1_PAGETABLE_SHIFT        12
  96.100 +#define HAP_L2_PAGETABLE_SHIFT        21
  96.101 +#define HAP_L3_PAGETABLE_SHIFT        30
  96.102 +#endif
  96.103 +
  96.104 +#if CONFIG_PAGING_LEVELS == 4
  96.105 +#define HAP_L1_PAGETABLE_ENTRIES     512
  96.106 +#define HAP_L2_PAGETABLE_ENTRIES     512
  96.107 +#define HAP_L3_PAGETABLE_ENTRIES     512
  96.108 +#define HAP_L4_PAGETABLE_ENTRIES     512
  96.109 +#define HAP_L1_PAGETABLE_SHIFT        12
  96.110 +#define HAP_L2_PAGETABLE_SHIFT        21
  96.111 +#define HAP_L3_PAGETABLE_SHIFT        30
  96.112 +#define HAP_L4_PAGETABLE_SHIFT        39
  96.113 +#endif
  96.114 +
  96.115 +#endif /* __SVM_NPT_H__ */
    97.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    97.2 +++ b/xen/arch/x86/mm/hap/support.c	Thu Mar 08 14:39:52 2007 -0600
    97.3 @@ -0,0 +1,334 @@
    97.4 +/*
    97.5 + * arch/x86/mm/hap/support.c
    97.6 + * 
    97.7 + * guest page table walker
    97.8 + * Copyright (c) 2007, AMD Corporation (Wei Huang)
    97.9 + *
   97.10 + * This program is free software; you can redistribute it and/or modify it
   97.11 + * under the terms and conditions of the GNU General Public License,
   97.12 + * version 2, as published by the Free Software Foundation.
   97.13 + *
   97.14 + * This program is distributed in the hope it will be useful, but WITHOUT
   97.15 + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
   97.16 + * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
   97.17 + * more details.
   97.18 + *
   97.19 + * You should have received a copy of the GNU General Public License along with
   97.20 + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
   97.21 + * Place - Suite 330, Boston, MA 02111-1307 USA.
   97.22 + *
   97.23 + */
   97.24 +
   97.25 +#include <xen/config.h>
   97.26 +#include <xen/types.h>
   97.27 +#include <xen/mm.h>
   97.28 +#include <xen/domain_page.h>
   97.29 +#include <asm/page.h>
   97.30 +#include <xen/event.h>
   97.31 +#include <xen/sched.h>
   97.32 +#include <asm/hvm/svm/vmcb.h>
   97.33 +#include <asm/domain.h>
   97.34 +#include <asm/shadow.h>
   97.35 +#include <asm/hap.h>
   97.36 +
   97.37 +#include "private.h"
   97.38 +#include "../page-guest32.h"
   97.39 +
   97.40 +/*******************************************/
   97.41 +/*      Platform Specific Functions        */
   97.42 +/*******************************************/
   97.43 +
   97.44 +/* Translate guest virtual address to guest physical address. Specifically
   97.45 + * for real mode guest. 
   97.46 + */
   97.47 +unsigned long hap_gva_to_gfn_real_mode(struct vcpu *v, unsigned long gva)
   97.48 +{
   97.49 +    HERE_I_AM;
   97.50 +    return ((paddr_t)gva >> PAGE_SHIFT);
   97.51 +}
   97.52 +
   97.53 +/* Translate guest virtual address to guest physical address. Specifically
   97.54 + * for protected guest. 
   97.55 + */
   97.56 +unsigned long hap_gva_to_gfn_protected_mode(struct vcpu *v, unsigned long gva)
   97.57 +{
   97.58 +    unsigned long gcr3 = hvm_get_guest_ctrl_reg(v, 3);
   97.59 +    int mode = 2; /* two-level guest */
   97.60 +    int lev, index;
   97.61 +    paddr_t gpa = 0;
   97.62 +    unsigned long gpfn, mfn;
   97.63 +    int success = 1;
   97.64 +    l2_pgentry_32_t *l2e; /* guest page entry size is 32-bit */
   97.65 +    l1_pgentry_32_t *l1e;
   97.66 +
   97.67 +    HERE_I_AM;
   97.68 +
   97.69 +    gpfn = (gcr3 >> PAGE_SHIFT);
   97.70 +    for ( lev = mode; lev >= 1; lev-- ) {
   97.71 +        mfn = get_mfn_from_gpfn( gpfn );
   97.72 +        if ( mfn == INVALID_MFN ) {
   97.73 +            HAP_PRINTK("bad pfn=0x%lx from gva=0x%lx at lev%d\n", gpfn, gva, 
   97.74 +                       lev);
   97.75 +            success = 0;
   97.76 +            break;
   97.77 +        }
   97.78 +        index = (gva >> PT_SHIFT[mode][lev]) & (PT_ENTRIES[mode][lev]-1);
   97.79 +
   97.80 +        if ( lev == 2 ) {
   97.81 +            l2e = map_domain_page( mfn );
   97.82 +            HAP_PRINTK("l2 page table entry is %ulx at index = %d\n", 
   97.83 +                       l2e[index].l2, index);
   97.84 +            if ( !(l2e_get_flags_32(l2e[index]) & _PAGE_PRESENT) ) {
   97.85 +                HAP_PRINTK("Level 2 entry not present at index = %d\n", index);
   97.86 +                success = 0;
   97.87 +            }
   97.88 +
   97.89 +            if ( l2e_get_flags_32(l2e[index]) & _PAGE_PSE ) { /* handle PSE */
   97.90 +                HAP_PRINTK("guest page table is PSE\n");
   97.91 +                if ( l2e_get_intpte(l2e[index]) & 0x001FE000UL ) { /*[13:20] */
   97.92 +                    printk("guest physical memory size is too large!\n");
   97.93 +                    domain_crash(v->domain);
   97.94 +                }
   97.95 +                gpa = (l2e_get_intpte(l2e[index]) & PHYSICAL_PAGE_4M_MASK) + 
   97.96 +                    (gva & ~PHYSICAL_PAGE_4M_MASK);
   97.97 +                unmap_domain_page(l2e);
   97.98 +                break; /* last level page table, return from here */
   97.99 +            }
  97.100 +            else {
  97.101 +                gpfn = l2e_get_pfn( l2e[index] );
  97.102 +            }
  97.103 +            unmap_domain_page(l2e);
  97.104 +        }
  97.105 +
  97.106 +        if ( lev == 1 ) {
  97.107 +            l1e = map_domain_page( mfn );
  97.108 +            HAP_PRINTK("l1 page table entry is %ulx at index = %d\n", 
  97.109 +                       l1e[index].l1, index);
  97.110 +            if ( !(l1e_get_flags_32(l1e[index]) & _PAGE_PRESENT) ) {
  97.111 +                HAP_PRINTK("Level 1 entry not present at index = %d\n", index);
  97.112 +                success = 0;
  97.113 +            }
  97.114 +            gpfn = l1e_get_pfn( l1e[index] );
  97.115 +            gpa = (l1e_get_intpte(l1e[index]) & PHYSICAL_PAGE_4K_MASK) + 
  97.116 +                (gva & ~PHYSICAL_PAGE_4K_MASK);	    
  97.117 +            unmap_domain_page(l1e);
  97.118 +        }
  97.119 +
  97.120 +        if ( !success ) /* error happened, jump out */
  97.121 +            break;
  97.122 +    }
  97.123 +
  97.124 +    HAP_PRINTK("success = %d, gva = %lx, gpa = %lx\n", success, gva, gpa);
  97.125 +
  97.126 +    if ( !success ) /* error happened */
  97.127 +        return INVALID_GFN;
  97.128 +    else
  97.129 +        return ((paddr_t)gpa >> PAGE_SHIFT);
  97.130 +}
  97.131 +
  97.132 +
  97.133 +
  97.134 +/* Translate guest virtual address to guest physical address. Specifically
  97.135 + * for PAE mode guest. 
  97.136 + */
  97.137 +unsigned long hap_gva_to_gfn_pae_mode(struct vcpu *v, unsigned long gva)
  97.138 +{
  97.139 +#if CONFIG_PAGING_LEVELS >= 3
  97.140 +    unsigned long gcr3 = hvm_get_guest_ctrl_reg(v, 3);
  97.141 +    int mode = 3; /* three-level guest */
  97.142 +    int lev, index;
  97.143 +    paddr_t gpa = 0;
  97.144 +    unsigned long gpfn, mfn;
  97.145 +    int success = 1;
  97.146 +    l1_pgentry_t *l1e;
  97.147 +    l2_pgentry_t *l2e;
  97.148 +    l3_pgentry_t *l3e;
  97.149 +    
  97.150 +    HERE_I_AM;
  97.151 +
  97.152 +    gpfn = (gcr3 >> PAGE_SHIFT);
  97.153 +    for ( lev = mode; lev >= 1; lev-- ) {
  97.154 +        mfn = get_mfn_from_gpfn( gpfn );
  97.155 +        if ( mfn == INVALID_MFN ) {
  97.156 +            HAP_PRINTK("bad pfn=0x%lx from gva=0x%lx at lev%d\n", gpfn, gva, 
  97.157 +                       lev);
  97.158 +            success = 0;
  97.159 +            break;
  97.160 +        }
  97.161 +        index = (gva >> PT_SHIFT[mode][lev]) & (PT_ENTRIES[mode][lev]-1);
  97.162 +
  97.163 +        if ( lev == 3 ) {
  97.164 +            l3e = map_domain_page( mfn );
  97.165 +            index += ( ((gcr3 >> 5 ) & 127 ) * 4 );
  97.166 +            if ( !(l3e_get_flags(l3e[index]) & _PAGE_PRESENT) ) {
  97.167 +                HAP_PRINTK("Level 3 entry not present at index = %d\n", index);
  97.168 +                success = 0;
  97.169 +            }
  97.170 +            gpfn = l3e_get_pfn( l3e[index] );
  97.171 +            unmap_domain_page(l3e);
  97.172 +        }
  97.173 +
  97.174 +        if ( lev == 2 ) {
  97.175 +            l2e = map_domain_page( mfn );
  97.176 +            if ( !(l2e_get_flags(l2e[index]) & _PAGE_PRESENT) ) {
  97.177 +                HAP_PRINTK("Level 2 entry not present at index = %d\n", index);
  97.178 +                success = 0;
  97.179 +            }
  97.180 +
  97.181 +            if ( l2e_get_flags(l2e[index]) & _PAGE_PSE ) { /* handle PSE */
  97.182 +                HAP_PRINTK("guest page table is PSE\n");
  97.183 +                gpa = (l2e_get_intpte(l2e[index]) & PHYSICAL_PAGE_2M_MASK) + 
  97.184 +                    (gva & ~PHYSICAL_PAGE_2M_MASK);
  97.185 +                unmap_domain_page(l2e);
  97.186 +                break; /* last level page table, jump out from here */
  97.187 +            }
  97.188 +            else { 
  97.189 +                gpfn = l2e_get_pfn(l2e[index]);
  97.190 +            }
  97.191 +            unmap_domain_page(l2e);
  97.192 +        }
  97.193 +
  97.194 +        if ( lev == 1 ) {
  97.195 +            l1e = map_domain_page( mfn );
  97.196 +            if ( !(l1e_get_flags(l1e[index]) & _PAGE_PRESENT) ) {
  97.197 +                HAP_PRINTK("Level 1 entry not present at index = %d\n", index);
  97.198 +                success = 0;
  97.199 +            }
  97.200 +            gpfn = l1e_get_pfn( l1e[index] );
  97.201 +            gpa = (l1e_get_intpte(l1e[index]) & PHYSICAL_PAGE_4K_MASK) + 
  97.202 +                (gva & ~PHYSICAL_PAGE_4K_MASK);
  97.203 +            unmap_domain_page(l1e);
  97.204 +        }
  97.205 +
  97.206 +        if ( success != 1 ) /* error happened, jump out */
  97.207 +            break;
  97.208 +    }
  97.209 +
  97.210 +    gpa &= ~PAGE_NX_BIT; /* clear NX bit of guest physical address */
  97.211 +    HAP_PRINTK("success = %d, gva = %lx, gpa = %lx\n", success, gva, gpa);
  97.212 +
  97.213 +    if ( !success )
  97.214 +        return INVALID_GFN;
  97.215 +    else
  97.216 +        return ((paddr_t)gpa >> PAGE_SHIFT);
  97.217 +#else
  97.218 +    HERE_I_AM;
  97.219 +    printk("guest paging level (3) is greater than host paging level!\n");
  97.220 +    domain_crash(v->domain);
  97.221 +    return INVALID_GFN;
  97.222 +#endif
  97.223 +}
  97.224 +
  97.225 +
  97.226 +
  97.227 +/* Translate guest virtual address to guest physical address. Specifically
  97.228 + * for long mode guest. 
  97.229 + */
  97.230 +unsigned long hap_gva_to_gfn_long_mode(struct vcpu *v, unsigned long gva)
  97.231 +{
  97.232 +#if CONFIG_PAGING_LEVELS == 4
  97.233 +    unsigned long gcr3 = hvm_get_guest_ctrl_reg(v, 3);
  97.234 +    int mode = 4; /* four-level guest */
  97.235 +    int lev, index;
  97.236 +    paddr_t gpa = 0;
  97.237 +    unsigned long gpfn, mfn;
  97.238 +    int success = 1;
  97.239 +    l4_pgentry_t *l4e;
  97.240 +    l3_pgentry_t *l3e;
  97.241 +    l2_pgentry_t *l2e;
  97.242 +    l1_pgentry_t *l1e;
  97.243 +
  97.244 +    HERE_I_AM;
  97.245 +
  97.246 +    gpfn = (gcr3 >> PAGE_SHIFT);
  97.247 +    for ( lev = mode; lev >= 1; lev-- ) {
  97.248 +        mfn = get_mfn_from_gpfn( gpfn );
  97.249 +        if ( mfn == INVALID_MFN ) {
  97.250 +            HAP_PRINTK("bad pfn=0x%lx from gva=0x%lx at lev%d\n", gpfn, gva, 
  97.251 +                       lev);
  97.252 +            success = 0;
  97.253 +            break;
  97.254 +        }
  97.255 +        index = (gva >> PT_SHIFT[mode][lev]) & (PT_ENTRIES[mode][lev]-1);
  97.256 +
  97.257 +        if ( lev == 4 ) {
  97.258 +            l4e = map_domain_page( mfn );
  97.259 +            if ( !(l4e_get_flags(l4e[index]) & _PAGE_PRESENT) ) {
  97.260 +                HAP_PRINTK("Level 4 entry not present at index = %d\n", index);
  97.261 +                success = 0;
  97.262 +            }
  97.263 +            gpfn = l4e_get_pfn( l4e[index] );
  97.264 +            unmap_domain_page(l4e);
  97.265 +        }
  97.266 +
  97.267 +        if ( lev == 3 ) {
  97.268 +            l3e = map_domain_page( mfn );
  97.269 +            if ( !(l3e_get_flags(l3e[index]) & _PAGE_PRESENT) ) {
  97.270 +                HAP_PRINTK("Level 3 entry not present at index = %d\n", index);
  97.271 +                success = 0;
  97.272 +            }
  97.273 +            gpfn = l3e_get_pfn( l3e[index] );
  97.274 +            unmap_domain_page(l3e);
  97.275 +        }
  97.276 +
  97.277 +        if ( lev == 2 ) {
  97.278 +            l2e = map_domain_page( mfn );
  97.279 +            if ( !(l2e_get_flags(l2e[index]) & _PAGE_PRESENT) ) {
  97.280 +                HAP_PRINTK("Level 2 entry not present at index = %d\n", index);
  97.281 +                success = 0;
  97.282 +            }
  97.283 +
  97.284 +            if ( l2e_get_flags(l2e[index]) & _PAGE_PSE ) { /* handle PSE */
  97.285 +                HAP_PRINTK("guest page table is PSE\n");
  97.286 +                gpa = (l2e_get_intpte(l2e[index]) & PHYSICAL_ADDR_2M_MASK_LM) 
  97.287 +                    + (gva & ~PHYSICAL_PAGE_2M_MASK);
  97.288 +                unmap_domain_page(l2e);
  97.289 +                break; /* last level page table, jump out from here */
  97.290 +            }
  97.291 +            else { 
  97.292 +                gpfn = l2e_get_pfn(l2e[index]);
  97.293 +            }
  97.294 +            unmap_domain_page(l2e);
  97.295 +        }
  97.296 +
  97.297 +        if ( lev == 1 ) {
  97.298 +            l1e = map_domain_page( mfn );
  97.299 +            if ( !(l1e_get_flags(l1e[index]) & _PAGE_PRESENT) ) {
  97.300 +                HAP_PRINTK("Level 1 entry not present at index = %d\n", index);
  97.301 +                success = 0;
  97.302 +            }
  97.303 +            gpfn = l1e_get_pfn( l1e[index] );
  97.304 +            gpa = (l1e_get_intpte(l1e[index]) & PHYSICAL_ADDR_4K_MASK_LM) + 
  97.305 +                (gva & ~PHYSICAL_PAGE_4K_MASK);
  97.306 +            unmap_domain_page(l1e);
  97.307 +        }
  97.308 +
  97.309 +        if ( success != 1 ) /* error happened, jump out */
  97.310 +            break;
  97.311 +    }
  97.312 +
  97.313 +    gpa &= ~PAGE_NX_BIT; /* clear NX bit of guest physical address */
  97.314 +    HAP_PRINTK("success = %d, gva = %lx, gpa = %lx\n", success, gva, gpa);
  97.315 +
  97.316 +    if ( !success )
  97.317 +        return INVALID_GFN;
  97.318 +    else
  97.319 +        return ((paddr_t)gpa >> PAGE_SHIFT);
  97.320 +#else
  97.321 +    HERE_I_AM;
  97.322 +    printk("guest paging level (4) is greater than host paging level!\n");
  97.323 +    domain_crash(v->domain);
  97.324 +    return INVALID_GFN;
  97.325 +#endif
  97.326 +}
  97.327 +
  97.328 +/*
  97.329 + * Local variables:
  97.330 + * mode: C
  97.331 + * c-set-style: "BSD"
  97.332 + * c-basic-offset: 4
  97.333 + * tab-width: 4
  97.334 + * indent-tabs-mode: nil
  97.335 + * End:
  97.336 + */
  97.337 +
    98.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    98.2 +++ b/xen/arch/x86/mm/page-guest32.h	Thu Mar 08 14:39:52 2007 -0600
    98.3 @@ -0,0 +1,100 @@
    98.4 +
    98.5 +#ifndef __X86_PAGE_GUEST_H__
    98.6 +#define __X86_PAGE_GUEST_H__
    98.7 +
    98.8 +#ifndef __ASSEMBLY__
    98.9 +# include <asm/types.h>
   98.10 +#endif
   98.11 +
   98.12 +#define PAGETABLE_ORDER_32         10
   98.13 +#define L1_PAGETABLE_ENTRIES_32    (1<<PAGETABLE_ORDER_32)
   98.14 +#define L2_PAGETABLE_ENTRIES_32    (1<<PAGETABLE_ORDER_32)
   98.15 +#define ROOT_PAGETABLE_ENTRIES_32  L2_PAGETABLE_ENTRIES_32
   98.16 +
   98.17 +
   98.18 +#define L1_PAGETABLE_SHIFT_32 12
   98.19 +#define L2_PAGETABLE_SHIFT_32 22
   98.20 +
   98.21 +/* Extract flags into 12-bit integer, or turn 12-bit flags into a pte mask. */
   98.22 +
   98.23 +#ifndef __ASSEMBLY__
   98.24 +
   98.25 +typedef u32 intpte_32_t;
   98.26 +
   98.27 +typedef struct { intpte_32_t l1; } l1_pgentry_32_t;
   98.28 +typedef struct { intpte_32_t l2; } l2_pgentry_32_t;
   98.29 +typedef l2_pgentry_t root_pgentry_32_t;
   98.30 +#endif
   98.31 +
   98.32 +#define get_pte_flags_32(x) ((u32)(x) & 0xFFF)
   98.33 +#define put_pte_flags_32(x) ((intpte_32_t)(x))
   98.34 +
   98.35 +/* Get pte access flags (unsigned int). */
   98.36 +#define l1e_get_flags_32(x)           (get_pte_flags_32((x).l1))
   98.37 +#define l2e_get_flags_32(x)           (get_pte_flags_32((x).l2))
   98.38 +
   98.39 +#define l1e_get_paddr_32(x)           \
   98.40 +    ((paddr_t)(((x).l1 & (PADDR_MASK&PAGE_MASK))))
   98.41 +#define l2e_get_paddr_32(x)           \
   98.42 +    ((paddr_t)(((x).l2 & (PADDR_MASK&PAGE_MASK))))
   98.43 +
   98.44 +/* Construct an empty pte. */
   98.45 +#define l1e_empty_32()                ((l1_pgentry_32_t) { 0 })
   98.46 +#define l2e_empty_32()                ((l2_pgentry_32_t) { 0 })
   98.47 +
   98.48 +/* Construct a pte from a pfn and access flags. */
   98.49 +#define l1e_from_pfn_32(pfn, flags)   \
   98.50 +    ((l1_pgentry_32_t) { ((intpte_32_t)(pfn) << PAGE_SHIFT) | put_pte_flags_32(flags) })
   98.51 +#define l2e_from_pfn_32(pfn, flags)   \
   98.52 +    ((l2_pgentry_32_t) { ((intpte_32_t)(pfn) << PAGE_SHIFT) | put_pte_flags_32(flags) })
   98.53 +
   98.54 +/* Construct a pte from a physical address and access flags. */
   98.55 +#ifndef __ASSEMBLY__
   98.56 +static inline l1_pgentry_32_t l1e_from_paddr_32(paddr_t pa, unsigned int flags)
   98.57 +{
   98.58 +    ASSERT((pa & ~(PADDR_MASK & PAGE_MASK)) == 0);
   98.59 +    return (l1_pgentry_32_t) { pa | put_pte_flags_32(flags) };
   98.60 +}
   98.61 +static inline l2_pgentry_32_t l2e_from_paddr_32(paddr_t pa, unsigned int flags)
   98.62 +{
   98.63 +    ASSERT((pa & ~(PADDR_MASK & PAGE_MASK)) == 0);
   98.64 +    return (l2_pgentry_32_t) { pa | put_pte_flags_32(flags) };
   98.65 +}
   98.66 +#endif /* !__ASSEMBLY__ */
   98.67 +
   98.68 +
   98.69 +/* Construct a pte from a page pointer and access flags. */
   98.70 +#define l1e_from_page_32(page, flags) (l1e_from_pfn_32(page_to_mfn(page),(flags)))
   98.71 +#define l2e_from_page_32(page, flags) (l2e_from_pfn_32(page_to_mfn(page),(flags)))
   98.72 +
   98.73 +/* Add extra flags to an existing pte. */
   98.74 +#define l1e_add_flags_32(x, flags)    ((x).l1 |= put_pte_flags_32(flags))
   98.75 +#define l2e_add_flags_32(x, flags)    ((x).l2 |= put_pte_flags_32(flags))
   98.76 +
   98.77 +/* Remove flags from an existing pte. */
   98.78 +#define l1e_remove_flags_32(x, flags) ((x).l1 &= ~put_pte_flags_32(flags))
   98.79 +#define l2e_remove_flags_32(x, flags) ((x).l2 &= ~put_pte_flags_32(flags))
   98.80 +
   98.81 +/* Check if a pte's page mapping or significant access flags have changed. */
   98.82 +#define l1e_has_changed_32(x,y,flags) \
   98.83 +    ( !!(((x).l1 ^ (y).l1) & ((PADDR_MASK&PAGE_MASK)|put_pte_flags_32(flags))) )
   98.84 +#define l2e_has_changed_32(x,y,flags) \
   98.85 +    ( !!(((x).l2 ^ (y).l2) & ((PADDR_MASK&PAGE_MASK)|put_pte_flags_32(flags))) )
   98.86 +
   98.87 +/* Given a virtual address, get an entry offset into a page table. */
   98.88 +#define l1_table_offset_32(a)         \
   98.89 +    (((a) >> L1_PAGETABLE_SHIFT_32) & (L1_PAGETABLE_ENTRIES_32 - 1))
   98.90 +#define l2_table_offset_32(a)         \
   98.91 +    (((a) >> L2_PAGETABLE_SHIFT_32) & (L2_PAGETABLE_ENTRIES_32 - 1))
   98.92 +
   98.93 +#endif /* __X86_PAGE_GUEST_H__ */
   98.94 +
   98.95 +/*
   98.96 + * Local variables:
   98.97 + * mode: C
   98.98 + * c-set-style: "BSD"
   98.99 + * c-basic-offset: 4
  98.100 + * tab-width: 4
  98.101 + * indent-tabs-mode: nil
  98.102 + * End:
  98.103 + */
    99.1 --- a/xen/arch/x86/mm/paging.c	Mon Mar 05 12:49:12 2007 -0600
    99.2 +++ b/xen/arch/x86/mm/paging.c	Thu Mar 08 14:39:52 2007 -0600
    99.3 @@ -24,10 +24,12 @@
    99.4  #include <asm/paging.h>
    99.5  #include <asm/shadow.h>
    99.6  #include <asm/p2m.h>
    99.7 +#include <asm/hap.h>
    99.8  
    99.9  /* Xen command-line option to enable hardware-assisted paging */
   99.10  int opt_hap_enabled = 0; 
   99.11  boolean_param("hap", opt_hap_enabled);
   99.12 +int hap_capable_system = 0;
   99.13  
   99.14  /* Printouts */
   99.15  #define PAGING_PRINTK(_f, _a...)                                     \
   99.16 @@ -46,12 +48,18 @@ void paging_domain_init(struct domain *d
   99.17  {
   99.18      p2m_init(d);
   99.19      shadow_domain_init(d);
   99.20 +
   99.21 +    if ( opt_hap_enabled && hap_capable_system && is_hvm_domain(d) )
   99.22 +        hap_domain_init(d);
   99.23  }
   99.24  
   99.25  /* vcpu paging struct initialization goes here */
   99.26  void paging_vcpu_init(struct vcpu *v)
   99.27  {
   99.28 -    shadow_vcpu_init(v);
   99.29 +    if ( opt_hap_enabled && hap_capable_system && is_hvm_vcpu(v) )
   99.30 +        hap_vcpu_init(v);
   99.31 +    else
   99.32 +        shadow_vcpu_init(v);
   99.33  }
   99.34  
   99.35  
   99.36 @@ -59,32 +67,38 @@ int paging_domctl(struct domain *d, xen_
   99.37                    XEN_GUEST_HANDLE(void) u_domctl)
   99.38  {
   99.39      /* Here, dispatch domctl to the appropriate paging code */
   99.40 -    return shadow_domctl(d, sc, u_domctl);
   99.41 +    if ( opt_hap_enabled && hap_capable_system && is_hvm_domain(d) )
   99.42 +        return hap_domctl(d, sc, u_domctl);
   99.43 +    else
   99.44 +        return shadow_domctl(d, sc, u_domctl);
   99.45  }
   99.46  
   99.47  /* Call when destroying a domain */
   99.48  void paging_teardown(struct domain *d)
   99.49  {
   99.50 -    shadow_teardown(d);
   99.51 -    /* Call other modes' teardown code here */    
   99.52 +    if ( opt_hap_enabled && hap_capable_system && is_hvm_domain(d) )
   99.53 +        hap_teardown(d);
   99.54 +    else
   99.55 +        shadow_teardown(d);
   99.56  }
   99.57  
   99.58  /* Call once all of the references to the domain have gone away */
   99.59  void paging_final_teardown(struct domain *d)
   99.60  {
   99.61 -    shadow_teardown(d);
   99.62 -    /* Call other modes' final teardown code here */
   99.63 +    if ( opt_hap_enabled && hap_capable_system && is_hvm_domain(d) )
   99.64 +        hap_final_teardown(d);
   99.65 +    else
   99.66 +        shadow_final_teardown(d);
   99.67  }
   99.68  
   99.69  /* Enable an arbitrary paging-assistance mode.  Call once at domain
   99.70   * creation. */
   99.71  int paging_enable(struct domain *d, u32 mode)
   99.72  {
   99.73 -    if ( mode & PG_SH_enable ) 
   99.74 -        return shadow_enable(d, mode);
   99.75 +    if ( opt_hap_enabled && hap_capable_system && is_hvm_domain(d) )
   99.76 +        return hap_enable(d, mode | PG_HAP_enable);
   99.77      else
   99.78 -        /* No other modes supported yet */
   99.79 -        return -EINVAL; 
   99.80 +        return shadow_enable(d, mode | PG_SH_enable);
   99.81  }
   99.82  
   99.83  /* Print paging-assistance info to the console */
   100.1 --- a/xen/arch/x86/mm/shadow/common.c	Mon Mar 05 12:49:12 2007 -0600
   100.2 +++ b/xen/arch/x86/mm/shadow/common.c	Thu Mar 08 14:39:52 2007 -0600
   100.3 @@ -2912,7 +2912,16 @@ void sh_mark_dirty(struct domain *d, mfn
   100.4       * can be called from __hvm_copy during emulation).
   100.5       * If the lock isn't held, take it for the duration of the call. */
   100.6      do_locking = !shadow_locked_by_me(d);
   100.7 -    if ( do_locking ) shadow_lock(d);
   100.8 +    if ( do_locking ) 
   100.9 +    { 
  100.10 +        shadow_lock(d);
  100.11 +        /* Check the mode again with the lock held */ 
  100.12 +        if ( unlikely(!shadow_mode_log_dirty(d)) )
  100.13 +        {
  100.14 +            shadow_unlock(d);
  100.15 +            return;
  100.16 +        }
  100.17 +    }
  100.18  
  100.19      ASSERT(d->arch.paging.shadow.dirty_bitmap != NULL);
  100.20  
  100.21 @@ -2968,10 +2977,18 @@ int shadow_domctl(struct domain *d,
  100.22  
  100.23      if ( unlikely(d == current->domain) )
  100.24      {
  100.25 -        gdprintk(XENLOG_INFO, "Don't try to do a shadow op on yourself!\n");
  100.26 +        gdprintk(XENLOG_INFO, "Dom %u tried to do a shadow op on itself.\n",
  100.27 +                 d->domain_id);
  100.28          return -EINVAL;
  100.29      }
  100.30  
  100.31 +    if ( unlikely(test_bit(_DOMF_dying, &d->domain_flags)) )
  100.32 +    {
  100.33 +        gdprintk(XENLOG_INFO, "Ignoring shadow op on dying domain %u\n",
  100.34 +                 d->domain_id);
  100.35 +        return 0;
  100.36 +    }
  100.37 +
  100.38      switch ( sc->op )
  100.39      {
  100.40      case XEN_DOMCTL_SHADOW_OP_OFF:
   101.1 --- a/xen/arch/x86/mm/shadow/page-guest32.h	Mon Mar 05 12:49:12 2007 -0600
   101.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
   101.3 @@ -1,100 +0,0 @@
   101.4 -
   101.5 -#ifndef __X86_PAGE_GUEST_H__
   101.6 -#define __X86_PAGE_GUEST_H__
   101.7 -
   101.8 -#ifndef __ASSEMBLY__
   101.9 -# include <asm/types.h>
  101.10 -#endif
  101.11 -
  101.12 -#define PAGETABLE_ORDER_32         10
  101.13 -#define L1_PAGETABLE_ENTRIES_32    (1<<PAGETABLE_ORDER_32)
  101.14 -#define L2_PAGETABLE_ENTRIES_32    (1<<PAGETABLE_ORDER_32)
  101.15 -#define ROOT_PAGETABLE_ENTRIES_32  L2_PAGETABLE_ENTRIES_32
  101.16 -
  101.17 -
  101.18 -#define L1_PAGETABLE_SHIFT_32 12
  101.19 -#define L2_PAGETABLE_SHIFT_32 22
  101.20 -
  101.21 -/* Extract flags into 12-bit integer, or turn 12-bit flags into a pte mask. */
  101.22 -
  101.23 -#ifndef __ASSEMBLY__
  101.24 -
  101.25 -typedef u32 intpte_32_t;
  101.26 -
  101.27 -typedef struct { intpte_32_t l1; } l1_pgentry_32_t;
  101.28 -typedef struct { intpte_32_t l2; } l2_pgentry_32_t;
  101.29 -typedef l2_pgentry_t root_pgentry_32_t;
  101.30 -#endif
  101.31 -
  101.32 -#define get_pte_flags_32(x) ((u32)(x) & 0xFFF)
  101.33 -#define put_pte_flags_32(x) ((intpte_32_t)(x))
  101.34 -
  101.35 -/* Get pte access flags (unsigned int). */
  101.36 -#define l1e_get_flags_32(x)           (get_pte_flags_32((x).l1))
  101.37 -#define l2e_get_flags_32(x)           (get_pte_flags_32((x).l2))
  101.38 -
  101.39 -#define l1e_get_paddr_32(x)           \
  101.40 -    ((paddr_t)(((x).l1 & (PADDR_MASK&PAGE_MASK))))
  101.41 -#define l2e_get_paddr_32(x)           \
  101.42 -    ((paddr_t)(((x).l2 & (PADDR_MASK&PAGE_MASK))))
  101.43 -
  101.44 -/* Construct an empty pte. */
  101.45 -#define l1e_empty_32()                ((l1_pgentry_32_t) { 0 })
  101.46 -#define l2e_empty_32()                ((l2_pgentry_32_t) { 0 })
  101.47 -
  101.48 -/* Construct a pte from a pfn and access flags. */
  101.49 -#define l1e_from_pfn_32(pfn, flags)   \
  101.50 -    ((l1_pgentry_32_t) { ((intpte_32_t)(pfn) << PAGE_SHIFT) | put_pte_flags_32(flags) })
  101.51 -#define l2e_from_pfn_32(pfn, flags)   \
  101.52 -    ((l2_pgentry_32_t) { ((intpte_32_t)(pfn) << PAGE_SHIFT) | put_pte_flags_32(flags) })
  101.53 -
  101.54 -/* Construct a pte from a physical address and access flags. */
  101.55 -#ifndef __ASSEMBLY__
  101.56 -static inline l1_pgentry_32_t l1e_from_paddr_32(paddr_t pa, unsigned int flags)
  101.57 -{
  101.58 -    ASSERT((pa & ~(PADDR_MASK & PAGE_MASK)) == 0);
  101.59 -    return (l1_pgentry_32_t) { pa | put_pte_flags_32(flags) };
  101.60 -}
  101.61 -static inline l2_pgentry_32_t l2e_from_paddr_32(paddr_t pa, unsigned int flags)
  101.62 -{
  101.63 -    ASSERT((pa & ~(PADDR_MASK & PAGE_MASK)) == 0);
  101.64 -    return (l2_pgentry_32_t) { pa | put_pte_flags_32(flags) };
  101.65 -}
  101.66 -#endif /* !__ASSEMBLY__ */
  101.67 -
  101.68 -
  101.69 -/* Construct a pte from a page pointer and access flags. */
  101.70 -#define l1e_from_page_32(page, flags) (l1e_from_pfn_32(page_to_mfn(page),(flags)))
  101.71 -#define l2e_from_page_32(page, flags) (l2e_from_pfn_32(page_to_mfn(page),(flags)))
  101.72 -
  101.73 -/* Add extra flags to an existing pte. */
  101.74 -#define l1e_add_flags_32(x, flags)    ((x).l1 |= put_pte_flags_32(flags))
  101.75 -#define l2e_add_flags_32(x, flags)    ((x).l2 |= put_pte_flags_32(flags))
  101.76 -
  101.77 -/* Remove flags from an existing pte. */
  101.78 -#define l1e_remove_flags_32(x, flags) ((x).l1 &= ~put_pte_flags_32(flags))
  101.79 -#define l2e_remove_flags_32(x, flags) ((x).l2 &= ~put_pte_flags_32(flags))
  101.80 -
  101.81 -/* Check if a pte's page mapping or significant access flags have changed. */
  101.82 -#define l1e_has_changed_32(x,y,flags) \
  101.83 -    ( !!(((x).l1 ^ (y).l1) & ((PADDR_MASK&PAGE_MASK)|put_pte_flags_32(flags))) )
  101.84 -#define l2e_has_changed_32(x,y,flags) \
  101.85 -    ( !!(((x).l2 ^ (y).l2) & ((PADDR_MASK&PAGE_MASK)|put_pte_flags_32(flags))) )
  101.86 -
  101.87 -/* Given a virtual address, get an entry offset into a page table. */
  101.88 -#define l1_table_offset_32(a)         \
  101.89 -    (((a) >> L1_PAGETABLE_SHIFT_32) & (L1_PAGETABLE_ENTRIES_32 - 1))
  101.90 -#define l2_table_offset_32(a)         \
  101.91 -    (((a) >> L2_PAGETABLE_SHIFT_32) & (L2_PAGETABLE_ENTRIES_32 - 1))
  101.92 -
  101.93 -#endif /* __X86_PAGE_GUEST_H__ */
  101.94 -
  101.95 -/*
  101.96 - * Local variables:
  101.97 - * mode: C
  101.98 - * c-set-style: "BSD"
  101.99 - * c-basic-offset: 4
 101.100 - * tab-width: 4
 101.101 - * indent-tabs-mode: nil
 101.102 - * End:
 101.103 - */
   102.1 --- a/xen/arch/x86/mm/shadow/private.h	Mon Mar 05 12:49:12 2007 -0600
   102.2 +++ b/xen/arch/x86/mm/shadow/private.h	Thu Mar 08 14:39:52 2007 -0600
   102.3 @@ -539,7 +539,7 @@ static inline int sh_get_ref(struct vcpu
   102.4  
   102.5      /* We remember the first shadow entry that points to each shadow. */
   102.6      if ( entry_pa != 0 
   102.7 -         && sh_type_is_pinnable(v, sp->type) 
   102.8 +         && !sh_type_is_pinnable(v, sp->type) 
   102.9           && sp->up == 0 ) 
  102.10          sp->up = entry_pa;
  102.11      
  102.12 @@ -559,7 +559,7 @@ static inline void sh_put_ref(struct vcp
  102.13  
  102.14      /* If this is the entry in the up-pointer, remove it */
  102.15      if ( entry_pa != 0 
  102.16 -         && sh_type_is_pinnable(v, sp->type) 
  102.17 +         && !sh_type_is_pinnable(v, sp->type) 
  102.18           && sp->up == entry_pa ) 
  102.19          sp->up = 0;
  102.20  
   103.1 --- a/xen/arch/x86/mm/shadow/types.h	Mon Mar 05 12:49:12 2007 -0600
   103.2 +++ b/xen/arch/x86/mm/shadow/types.h	Thu Mar 08 14:39:52 2007 -0600
   103.3 @@ -235,7 +235,7 @@ static inline shadow_l4e_t shadow_l4e_fr
   103.4  
   103.5  #if GUEST_PAGING_LEVELS == 2
   103.6  
   103.7 -#include "page-guest32.h"
   103.8 +#include "../page-guest32.h"
   103.9  
  103.10  #define GUEST_L1_PAGETABLE_ENTRIES     1024
  103.11  #define GUEST_L2_PAGETABLE_ENTRIES     1024
   104.1 --- a/xen/common/event_channel.c	Mon Mar 05 12:49:12 2007 -0600
   104.2 +++ b/xen/common/event_channel.c	Thu Mar 08 14:39:52 2007 -0600
   104.3 @@ -560,6 +560,9 @@ void send_guest_global_virq(struct domai
   104.4  
   104.5      ASSERT(virq_is_global(virq));
   104.6  
   104.7 +    if ( unlikely(d == NULL) )
   104.8 +        return;
   104.9 +
  104.10      v = d->vcpu[0];
  104.11      if ( unlikely(v == NULL) )
  104.12          return;
   105.1 --- a/xen/common/page_alloc.c	Mon Mar 05 12:49:12 2007 -0600
   105.2 +++ b/xen/common/page_alloc.c	Thu Mar 08 14:39:52 2007 -0600
   105.3 @@ -49,7 +49,7 @@ string_param("badpage", opt_badpage);
   105.4   * Bit width of the DMA heap.
   105.5   */
   105.6  static unsigned int  dma_bitsize = CONFIG_DMA_BITSIZE;
   105.7 -static unsigned long max_dma_mfn = (1UL << (CONFIG_DMA_BITSIZE - PAGE_SHIFT)) - 1;
   105.8 +static unsigned long max_dma_mfn = (1UL<<(CONFIG_DMA_BITSIZE-PAGE_SHIFT))-1;
   105.9  static void parse_dma_bits(char *s)
  105.10  {
  105.11      unsigned int v = simple_strtol(s, NULL, 0);
  105.12 @@ -339,11 +339,13 @@ static void init_heap_block(heap_by_zone
  105.13  
  105.14  /* Allocate 2^@order contiguous pages. */
  105.15  static struct page_info *alloc_heap_pages(
  105.16 -    unsigned int zone_lo, unsigned zone_hi,
  105.17 +    unsigned int zone_lo, unsigned int zone_hi,
  105.18      unsigned int cpu, unsigned int order)
  105.19  {
  105.20 -    unsigned int i, j, node = cpu_to_node(cpu), num_nodes = num_online_nodes();
  105.21 -    unsigned int zone, request = (1UL << order);
  105.22 +    unsigned int i, j, zone;
  105.23 +    unsigned int node = cpu_to_node(cpu), num_nodes = num_online_nodes();
  105.24 +    unsigned long request = 1UL << order;
  105.25 +    cpumask_t extra_cpus_mask, mask;
  105.26      struct page_info *pg;
  105.27  
  105.28      ASSERT(node >= 0);
  105.29 @@ -356,25 +358,24 @@ static struct page_info *alloc_heap_page
  105.30  
  105.31      spin_lock(&heap_lock);
  105.32  
  105.33 -    /* start with requested node, but exhaust all node memory
  105.34 -     * in requested zone before failing, only calc new node
  105.35 -     * value if we fail to find memory in target node, this avoids
  105.36 -     * needless computation on fast-path */
  105.37 +    /*
  105.38 +     * Start with requested node, but exhaust all node memory in requested 
  105.39 +     * zone before failing, only calc new node value if we fail to find memory 
  105.40 +     * in target node, this avoids needless computation on fast-path.
  105.41 +     */
  105.42      for ( i = 0; i < num_nodes; i++ )
  105.43      {
  105.44 -        for ( zone = zone_hi; zone >= zone_lo; --zone )
  105.45 -        {
  105.46 -            /* check if target node can support the allocation */
  105.47 -            if ( avail[node] && (avail[node][zone] >= request) )
  105.48 -            {
  105.49 -                /* Find smallest order which can satisfy the request. */
  105.50 -                for ( j = order; j <= MAX_ORDER; j++ )
  105.51 -                {
  105.52 -                    if ( !list_empty(&heap(node, zone, j)) )
  105.53 -                        goto found;
  105.54 -                }
  105.55 -            }
  105.56 -        }
  105.57 +        zone = zone_hi;
  105.58 +        do {
  105.59 +            /* Check if target node can support the allocation. */
  105.60 +            if ( !avail[node] || (avail[node][zone] < request) )
  105.61 +                continue;
  105.62 +
  105.63 +            /* Find smallest order which can satisfy the request. */
  105.64 +            for ( j = order; j <= MAX_ORDER; j++ )
  105.65 +                if ( !list_empty(&heap(node, zone, j)) )
  105.66 +                    goto found;
  105.67 +        } while ( zone-- > zone_lo ); /* careful: unsigned zone may wrap */
  105.68  
  105.69          /* Pick next node, wrapping around if needed. */
  105.70          if ( ++node == num_nodes )
  105.71 @@ -403,6 +404,29 @@ static struct page_info *alloc_heap_page
  105.72  
  105.73      spin_unlock(&heap_lock);
  105.74  
  105.75 +    cpus_clear(mask);
  105.76 +
  105.77 +    for ( i = 0; i < (1 << order); i++ )
  105.78 +    {
  105.79 +        /* Reference count must continuously be zero for free pages. */
  105.80 +        BUG_ON(pg[i].count_info != 0);
  105.81 +
  105.82 +        /* Add in any extra CPUs that need flushing because of this page. */
  105.83 +        cpus_andnot(extra_cpus_mask, pg[i].u.free.cpumask, mask);
  105.84 +        tlbflush_filter(extra_cpus_mask, pg[i].tlbflush_timestamp);
  105.85 +        cpus_or(mask, mask, extra_cpus_mask);
  105.86 +
  105.87 +        /* Initialise fields which have other uses for free pages. */
  105.88 +        pg[i].u.inuse.type_info = 0;
  105.89 +        page_set_owner(&pg[i], NULL);
  105.90 +    }
  105.91 +
  105.92 +    if ( unlikely(!cpus_empty(mask)) )
  105.93 +    {
  105.94 +        perfc_incrc(need_flush_tlb_flush);
  105.95 +        flush_tlb_mask(mask);
  105.96 +    }
  105.97 +
  105.98      return pg;
  105.99  }
 105.100  
 105.101 @@ -411,13 +435,28 @@ static void free_heap_pages(
 105.102      unsigned int zone, struct page_info *pg, unsigned int order)
 105.103  {
 105.104      unsigned long mask;
 105.105 -    unsigned int node = phys_to_nid(page_to_maddr(pg));
 105.106 +    unsigned int i, node = phys_to_nid(page_to_maddr(pg));
 105.107 +    struct domain *d;
 105.108  
 105.109      ASSERT(zone < NR_ZONES);
 105.110      ASSERT(order <= MAX_ORDER);
 105.111      ASSERT(node >= 0);
 105.112      ASSERT(node < num_online_nodes());
 105.113  
 105.114 +    for ( i = 0; i < (1 << order); i++ )
 105.115 +    {
 105.116 +        BUG_ON(pg[i].count_info != 0);
 105.117 +        if ( (d = page_get_owner(&pg[i])) != NULL )
 105.118 +        {
 105.119 +            pg[i].tlbflush_timestamp = tlbflush_current_time();
 105.120 +            pg[i].u.free.cpumask     = d->domain_dirty_cpumask;
 105.121 +        }
 105.122 +        else
 105.123 +        {
 105.124 +            cpus_clear(pg[i].u.free.cpumask);
 105.125 +        }
 105.126 +    }
 105.127 +
 105.128      spin_lock(&heap_lock);
 105.129  
 105.130      map_free(page_to_mfn(pg), 1 << order);
 105.131 @@ -426,7 +465,7 @@ static void free_heap_pages(
 105.132      /* Merge chunks as far as possible. */
 105.133      while ( order < MAX_ORDER )
 105.134      {
 105.135 -        mask = 1 << order;
 105.136 +        mask = 1UL << order;
 105.137  
 105.138          if ( (page_to_mfn(pg) & mask) )
 105.139          {
 105.140 @@ -554,7 +593,7 @@ void end_boot_allocator(void)
 105.141  /*
 105.142   * Scrub all unallocated pages in all heap zones. This function is more
 105.143   * convoluted than appears necessary because we do not want to continuously
 105.144 - * hold the lock or disable interrupts while scrubbing very large memory areas.
 105.145 + * hold the lock while scrubbing very large memory areas.
 105.146   */
 105.147  void scrub_heap_pages(void)
 105.148  {
 105.149 @@ -575,7 +614,7 @@ void scrub_heap_pages(void)
 105.150          if ( (mfn % ((100*1024*1024)/PAGE_SIZE)) == 0 )
 105.151              printk(".");
 105.152  
 105.153 -        spin_lock_irq(&heap_lock);
 105.154 +        spin_lock(&heap_lock);
 105.155  
 105.156          /* Re-check page status with lock held. */
 105.157          if ( !allocated_in_map(mfn) )
 105.158 @@ -595,7 +634,7 @@ void scrub_heap_pages(void)
 105.159              }
 105.160          }
 105.161  
 105.162 -        spin_unlock_irq(&heap_lock);
 105.163 +        spin_unlock(&heap_lock);
 105.164      }
 105.165  
 105.166      printk("done.\n");
 105.167 @@ -609,8 +648,6 @@ void scrub_heap_pages(void)
 105.168  
 105.169  void init_xenheap_pages(paddr_t ps, paddr_t pe)
 105.170  {
 105.171 -    unsigned long flags;
 105.172 -
 105.173      ps = round_pgup(ps);
 105.174      pe = round_pgdown(pe);
 105.175      if ( pe <= ps )
 105.176 @@ -625,34 +662,22 @@ void init_xenheap_pages(paddr_t ps, padd
 105.177      if ( !IS_XEN_HEAP_FRAME(maddr_to_page(pe)) )
 105.178          pe -= PAGE_SIZE;
 105.179  
 105.180 -    local_irq_save(flags);
 105.181      init_heap_pages(MEMZONE_XEN, maddr_to_page(ps), (pe - ps) >> PAGE_SHIFT);
 105.182 -    local_irq_restore(flags);
 105.183  }
 105.184  
 105.185  
 105.186  void *alloc_xenheap_pages(unsigned int order)
 105.187  {
 105.188 -    unsigned long flags;
 105.189      struct page_info *pg;
 105.190 -    int i;
 105.191  
 105.192 -    local_irq_save(flags);
 105.193 +    ASSERT(!in_irq());
 105.194 +
 105.195      pg = alloc_heap_pages(MEMZONE_XEN, MEMZONE_XEN, smp_processor_id(), order);
 105.196 -    local_irq_restore(flags);
 105.197 -
 105.198      if ( unlikely(pg == NULL) )
 105.199          goto no_memory;
 105.200  
 105.201      memguard_unguard_range(page_to_virt(pg), 1 << (order + PAGE_SHIFT));
 105.202  
 105.203 -    for ( i = 0; i < (1 << order); i++ )
 105.204 -    {
 105.205 -        pg[i].count_info        = 0;
 105.206 -        pg[i].u.inuse._domain   = 0;
 105.207 -        pg[i].u.inuse.type_info = 0;
 105.208 -    }
 105.209 -
 105.210      return page_to_virt(pg);
 105.211  
 105.212   no_memory:
 105.213 @@ -663,16 +688,14 @@ void *alloc_xenheap_pages(unsigned int o
 105.214  
 105.215  void free_xenheap_pages(void *v, unsigned int order)
 105.216  {
 105.217 -    unsigned long flags;
 105.218 +    ASSERT(!in_irq());
 105.219  
 105.220      if ( v == NULL )
 105.221          return;
 105.222  
 105.223 -    memguard_guard_range(v, 1 << (order + PAGE_SHIFT));    
 105.224 +    memguard_guard_range(v, 1 << (order + PAGE_SHIFT));
 105.225  
 105.226 -    local_irq_save(flags);
 105.227      free_heap_pages(MEMZONE_XEN, virt_to_page(v), order);
 105.228 -    local_irq_restore(flags);
 105.229  }
 105.230  
 105.231  
 105.232 @@ -762,8 +785,6 @@ struct page_info *__alloc_domheap_pages(
 105.233      unsigned int memflags)
 105.234  {
 105.235      struct page_info *pg = NULL;
 105.236 -    cpumask_t mask;
 105.237 -    unsigned long i;
 105.238      unsigned int bits = memflags >> _MEMF_bits, zone_hi = NR_ZONES - 1;
 105.239  
 105.240      ASSERT(!in_irq());
 105.241 @@ -792,38 +813,10 @@ struct page_info *__alloc_domheap_pages(
 105.242              return NULL;
 105.243      }
 105.244  
 105.245 -    if ( pg == NULL )
 105.246 -        if ( (pg = alloc_heap_pages(MEMZONE_XEN + 1,
 105.247 -                                    zone_hi,
 105.248 -                                    cpu, order)) == NULL )
 105.249 -            return NULL;
 105.250 -
 105.251 -    mask = pg->u.free.cpumask;
 105.252 -    tlbflush_filter(mask, pg->tlbflush_timestamp);
 105.253 -
 105.254 -    pg->count_info        = 0;
 105.255 -    pg->u.inuse._domain   = 0;
 105.256 -    pg->u.inuse.type_info = 0;
 105.257 -
 105.258 -    for ( i = 1; i < (1 << order); i++ )
 105.259 -    {
 105.260 -        /* Add in any extra CPUs that need flushing because of this page. */
 105.261 -        cpumask_t extra_cpus_mask;
 105.262 -        cpus_andnot(extra_cpus_mask, pg[i].u.free.cpumask, mask);
 105.263 -        tlbflush_filter(extra_cpus_mask, pg[i].tlbflush_timestamp);
 105.264 -        cpus_or(mask, mask, extra_cpus_mask);
 105.265 -
 105.266 -        pg[i].count_info        = 0;
 105.267 -        pg[i].u.inuse._domain   = 0;
 105.268 -        pg[i].u.inuse.type_info = 0;
 105.269 -        page_set_owner(&pg[i], NULL);
 105.270 -    }
 105.271 -
 105.272 -    if ( unlikely(!cpus_empty(mask)) )
 105.273 -    {
 105.274 -        perfc_incrc(need_flush_tlb_flush);
 105.275 -        flush_tlb_mask(mask);
 105.276 -    }
 105.277 +    if ( (pg == NULL) &&
 105.278 +         ((pg = alloc_heap_pages(MEMZONE_XEN + 1, zone_hi,
 105.279 +                                 cpu, order)) == NULL) )
 105.280 +         return NULL;
 105.281  
 105.282      if ( (d != NULL) && assign_pages(d, pg, order, memflags) )
 105.283      {
 105.284 @@ -867,10 +860,7 @@ void free_domheap_pages(struct page_info
 105.285  
 105.286          for ( i = 0; i < (1 << order); i++ )
 105.287          {
 105.288 -            shadow_drop_references(d, &pg[i]);
 105.289 -            ASSERT((pg[i].u.inuse.type_info & PGT_count_mask) == 0);
 105.290 -            pg[i].tlbflush_timestamp  = tlbflush_current_time();
 105.291 -            pg[i].u.free.cpumask      = d->domain_dirty_cpumask;
 105.292 +            BUG_ON((pg[i].u.inuse.type_info & PGT_count_mask) != 0);
 105.293              list_del(&pg[i].list);
 105.294          }
 105.295  
 105.296 @@ -892,6 +882,7 @@ void free_domheap_pages(struct page_info
 105.297               */
 105.298              for ( i = 0; i < (1 << order); i++ )
 105.299              {
 105.300 +                page_set_owner(&pg[i], NULL);
 105.301                  spin_lock(&page_scrub_lock);
 105.302                  list_add(&pg[i].list, &page_scrub_list);
 105.303                  scrub_pages++;
 105.304 @@ -902,8 +893,6 @@ void free_domheap_pages(struct page_info
 105.305      else
 105.306      {
 105.307          /* Freeing anonymous domain-heap pages. */
 105.308 -        for ( i = 0; i < (1 << order); i++ )
 105.309 -            cpus_clear(pg[i].u.free.cpumask);
 105.310          free_heap_pages(pfn_dom_zone_type(page_to_mfn(pg)), pg, order);
 105.311          drop_dom_ref = 0;
 105.312      }
   106.1 --- a/xen/common/xmalloc.c	Mon Mar 05 12:49:12 2007 -0600
   106.2 +++ b/xen/common/xmalloc.c	Thu Mar 08 14:39:52 2007 -0600
   106.3 @@ -33,6 +33,8 @@
   106.4  #include <xen/timer.h>
   106.5  #include <xen/cache.h>
   106.6  #include <xen/prefetch.h>
   106.7 +#include <xen/irq.h>
   106.8 +#include <xen/smp.h>
   106.9  
  106.10  /*
  106.11   * XMALLOC_DEBUG:
  106.12 @@ -175,6 +177,8 @@ void *_xmalloc(size_t size, size_t align
  106.13      struct xmalloc_hdr *i;
  106.14      unsigned long flags;
  106.15  
  106.16 +    ASSERT(!in_irq());
  106.17 +
  106.18      /* We currently always return cacheline aligned. */
  106.19      BUG_ON(align > SMP_CACHE_BYTES);
  106.20  
  106.21 @@ -213,6 +217,8 @@ void xfree(void *p)
  106.22      unsigned long flags;
  106.23      struct xmalloc_hdr *i, *tmp, *hdr;
  106.24  
  106.25 +    ASSERT(!in_irq());
  106.26 +
  106.27      if ( p == NULL )
  106.28          return;
  106.29  
   107.1 --- a/xen/drivers/acpi/numa.c	Mon Mar 05 12:49:12 2007 -0600
   107.2 +++ b/xen/drivers/acpi/numa.c	Thu Mar 08 14:39:52 2007 -0600
   107.3 @@ -22,10 +22,6 @@
   107.4   * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   107.5   *
   107.6   */
   107.7 -#if 0
   107.8 -#include <linux/module.h>
   107.9 -#include <linux/kernel.h>
  107.10 -#endif
  107.11  #include <xen/config.h>
  107.12  #include <xen/init.h>
  107.13  #include <xen/types.h>
  107.14 @@ -34,7 +30,6 @@
  107.15  #include <xen/numa.h>
  107.16  #include <acpi/acpi_bus.h>
  107.17  #include <acpi/acmacros.h>
  107.18 -#include <asm/page.h> /* __va() */
  107.19  
  107.20  #define ACPI_NUMA	0x80000000
  107.21  #define _COMPONENT	ACPI_NUMA
  107.22 @@ -106,7 +101,7 @@ static int __init acpi_parse_slit(unsign
  107.23  	if (!phys_addr || !size)
  107.24  		return -EINVAL;
  107.25  
  107.26 -	slit = (struct acpi_table_slit *)__va(phys_addr);
  107.27 +	slit = (struct acpi_table_slit *)__acpi_map_table(phys_addr, size);
  107.28  
  107.29  	/* downcast just for %llu vs %lu for i386/ia64  */
  107.30  	localities = (u32) slit->localities;
  107.31 @@ -159,7 +154,7 @@ static int __init acpi_parse_srat(unsign
  107.32  	if (!phys_addr || !size)
  107.33  		return -EINVAL;
  107.34  
  107.35 -	srat = (struct acpi_table_srat *)__va(phys_addr);
  107.36 +	srat = (struct acpi_table_srat *)__acpi_map_table(phys_addr, size);
  107.37  
  107.38  	return 0;
  107.39  }
   108.1 --- a/xen/drivers/char/console.c	Mon Mar 05 12:49:12 2007 -0600
   108.2 +++ b/xen/drivers/char/console.c	Thu Mar 08 14:39:52 2007 -0600
   108.3 @@ -399,6 +399,8 @@ static void __putstr(const char *str)
   108.4          vga_putchar(c);
   108.5          putchar_console_ring(c);
   108.6      }
   108.7 +
   108.8 +    send_guest_global_virq(dom0, VIRQ_CON_RING);
   108.9  }
  108.10  
  108.11  static int printk_prefix_check(char *p, char **pp)
   109.1 --- a/xen/include/acm/acm_hooks.h	Mon Mar 05 12:49:12 2007 -0600
   109.2 +++ b/xen/include/acm/acm_hooks.h	Thu Mar 08 14:39:52 2007 -0600
   109.3 @@ -247,12 +247,12 @@ static inline int acm_pre_domctl(struct 
   109.4              if (*ssid == NULL) {
   109.5                  printk("%s: Warning. Destroying domain without ssid pointer.\n", 
   109.6                         __func__);
   109.7 -                domain_rcu_lock(d);
   109.8 +                rcu_unlock_domain(d);
   109.9                  return -EACCES;
  109.10              }
  109.11              d->ssid = NULL; /* make sure it's not used any more */
  109.12               /* no policy-specific hook */
  109.13 -            domain_rcu_lock(d);
  109.14 +            rcu_unlock_domain(d);
  109.15              ret = 0;
  109.16          }
  109.17          break;
   110.1 --- a/xen/include/asm-x86/domain.h	Mon Mar 05 12:49:12 2007 -0600
   110.2 +++ b/xen/include/asm-x86/domain.h	Thu Mar 08 14:39:52 2007 -0600
   110.3 @@ -104,6 +104,21 @@ struct shadow_vcpu {
   110.4  };
   110.5  
   110.6  /************************************************/
   110.7 +/*            hardware assisted paging          */
   110.8 +/************************************************/
   110.9 +struct hap_domain {
  110.10 +    spinlock_t        lock;
  110.11 +    int               locker;
  110.12 +    const char       *locker_function;
  110.13 +    
  110.14 +    struct list_head  freelists;
  110.15 +    struct list_head  p2m_freelist;
  110.16 +    unsigned int      total_pages;  /* number of pages allocated */
  110.17 +    unsigned int      free_pages;   /* number of pages on freelists */
  110.18 +    unsigned int      p2m_pages;    /* number of pages allocates to p2m */
  110.19 +};
  110.20 +
  110.21 +/************************************************/
  110.22  /*       p2m handling                           */
  110.23  /************************************************/
  110.24  
  110.25 @@ -135,6 +150,7 @@ struct paging_domain {
  110.26      struct shadow_domain shadow;
  110.27  
  110.28      /* Other paging assistance code will have structs here */
  110.29 +    struct hap_domain    hap;
  110.30  };
  110.31  
  110.32  struct paging_vcpu {
   111.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
   111.2 +++ b/xen/include/asm-x86/hap.h	Thu Mar 08 14:39:52 2007 -0600
   111.3 @@ -0,0 +1,122 @@
   111.4 +/******************************************************************************
   111.5 + * include/asm-x86/hap.h
   111.6 + *
   111.7 + * hardware-assisted paging
   111.8 + * Copyright (c) 2007 Advanced Micro Devices (Wei Huang)
   111.9 + * 
  111.10 + * Parts of this code are Copyright (c) 2006 by XenSource Inc.
  111.11 + * Parts of this code are Copyright (c) 2006 by Michael A Fetterman
  111.12 + * Parts based on earlier work by Michael A Fetterman, Ian Pratt et al.
  111.13 + *
  111.14 + * This program is free software; you can redistribute it and/or modify
  111.15 + * it under the terms of the GNU General Public License as published by
  111.16 + * the Free Software Foundation; either version 2 of the License, or
  111.17 + * (at your option) any later version.
  111.18 + *
  111.19 + * This program is distributed in the hope that it will be useful,
  111.20 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
  111.21 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  111.22 + * GNU General Public License for more details.
  111.23 + *
  111.24 + * You should have received a copy of the GNU General Public License
  111.25 + * along with this program; if not, write to the Free Software
  111.26 + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  111.27 + */
  111.28 +
  111.29 +#ifndef _XEN_HAP_H
  111.30 +#define _XEN_HAP_H
  111.31 +
  111.32 +#define HERE_I_AM                                                     \
  111.33 +    debugtrace_printk("HERE I AM: %s %s %d\n", __func__, __FILE__, __LINE__)
  111.34 +#define HAP_PRINTK(_f, _a...)                                         \
  111.35 +    debugtrace_printk("hap: %s(): " _f, __func__, ##_a)
  111.36 +#define HAP_ERROR(_f, _a...)                                          \
  111.37 +    printk("hap error: %s(): " _f, __func__, ##_a)
  111.38 +
  111.39 +/************************************************/
  111.40 +/*          hap domain page mapping             */
  111.41 +/************************************************/
  111.42 +static inline void *
  111.43 +hap_map_domain_page(mfn_t mfn)
  111.44 +{
  111.45 +    return map_domain_page(mfn_x(mfn));
  111.46 +}
  111.47 +
  111.48 +static inline void
  111.49 +hap_unmap_domain_page(void *p)
  111.50 +{
  111.51 +    unmap_domain_page(p);
  111.52 +}
  111.53 +
  111.54 +static inline void *
  111.55 +hap_map_domain_page_global(mfn_t mfn)
  111.56 +{
  111.57 +    return map_domain_page_global(mfn_x(mfn));
  111.58 +}
  111.59 +
  111.60 +static inline void 
  111.61 +hap_unmap_domain_page_global(void *p) 
  111.62 +{
  111.63 +    unmap_domain_page_global(p);
  111.64 +}
  111.65 +
  111.66 +/************************************************/
  111.67 +/*           locking for hap code               */
  111.68 +/************************************************/
  111.69 +#define hap_lock_init(_d)                                   \
  111.70 +    do {                                                    \
  111.71 +        spin_lock_init(&(_d)->arch.paging.hap.lock);        \
  111.72 +        (_d)->arch.paging.hap.locker = -1;                  \
  111.73 +        (_d)->arch.paging.hap.locker_function = "nobody";   \
  111.74 +    } while (0)
  111.75 +
  111.76 +#define hap_locked_by_me(_d)                     \
  111.77 +    (current->processor == (_d)->arch.paging.hap.locker)
  111.78 +
  111.79 +#define hap_lock(_d)                                                       \
  111.80 +    do {                                                                   \
  111.81 +        if ( unlikely((_d)->arch.paging.hap.locker == current->processor) )\
  111.82 +        {                                                                  \
  111.83 +            printk("Error: hap lock held by %s\n",                         \
  111.84 +                   (_d)->arch.paging.hap.locker_function);                 \
  111.85 +            BUG();                                                         \
  111.86 +        }                                                                  \
  111.87 +        spin_lock(&(_d)->arch.paging.hap.lock);                            \
  111.88 +        ASSERT((_d)->arch.paging.hap.locker == -1);                        \
  111.89 +        (_d)->arch.paging.hap.locker = current->processor;                 \
  111.90 +        (_d)->arch.paging.hap.locker_function = __func__;                  \
  111.91 +    } while (0)
  111.92 +
  111.93 +#define hap_unlock(_d)                                              \
  111.94 +    do {                                                            \
  111.95 +        ASSERT((_d)->arch.paging.hap.locker == current->processor); \
  111.96 +        (_d)->arch.paging.hap.locker = -1;                          \
  111.97 +        (_d)->arch.paging.hap.locker_function = "nobody";           \
  111.98 +        spin_unlock(&(_d)->arch.paging.hap.lock);                   \
  111.99 +    } while (0)
 111.100 +
 111.101 +/************************************************/
 111.102 +/*        hap domain level functions            */
 111.103 +/************************************************/
 111.104 +void  hap_domain_init(struct domain *d);
 111.105 +int   hap_domctl(struct domain *d, xen_domctl_shadow_op_t *sc,
 111.106 +                 XEN_GUEST_HANDLE(void) u_domctl);
 111.107 +int   hap_enable(struct domain *d, u32 mode);
 111.108 +void  hap_final_teardown(struct domain *d);
 111.109 +void  hap_teardown(struct domain *d);
 111.110 +void  hap_vcpu_init(struct vcpu *v);
 111.111 +
 111.112 +extern struct paging_mode hap_paging_real_mode;
 111.113 +extern struct paging_mode hap_paging_protected_mode;
 111.114 +extern struct paging_mode hap_paging_pae_mode;
 111.115 +extern struct paging_mode hap_paging_long_mode;
 111.116 +#endif /* XEN_HAP_H */
 111.117 +
 111.118 +/*
 111.119 + * Local variables:
 111.120 + * mode: C
 111.121 + * c-set-style: "BSD"
 111.122 + * c-basic-offset: 4
 111.123 + * indent-tabs-mode: nil
 111.124 + * End:
 111.125 + */
   112.1 --- a/xen/include/asm-x86/hvm/svm/emulate.h	Mon Mar 05 12:49:12 2007 -0600
   112.2 +++ b/xen/include/asm-x86/hvm/svm/emulate.h	Thu Mar 08 14:39:52 2007 -0600
   112.3 @@ -76,7 +76,7 @@ enum instruction_index {
   112.4  };
   112.5  
   112.6  
   112.7 -extern unsigned long get_effective_addr_modrm64(struct vmcb_struct *vmcb, 
   112.8 +extern unsigned long get_effective_addr_modrm64(
   112.9          struct cpu_user_regs *regs, const u8 prefix, int inst_len,
  112.10          const u8 *operand, u8 *size);
  112.11  extern unsigned long get_effective_addr_sib(struct vmcb_struct *vmcb, 
  112.12 @@ -85,17 +85,17 @@ extern unsigned long get_effective_addr_
  112.13  extern OPERATING_MODE get_operating_mode (struct vmcb_struct *vmcb);
  112.14  extern unsigned int decode_dest_reg(u8 prefix, u8 modrm);
  112.15  extern unsigned int decode_src_reg(u8 prefix, u8 modrm);
  112.16 -extern unsigned long svm_rip2pointer(struct vmcb_struct *vmcb);
  112.17 -extern int __get_instruction_length_from_list(struct vmcb_struct *vmcb,
  112.18 +extern unsigned long svm_rip2pointer(struct vcpu *v);
  112.19 +extern int __get_instruction_length_from_list(struct vcpu *v,
  112.20          enum instruction_index *list, unsigned int list_count, 
  112.21          u8 *guest_eip_buf, enum instruction_index *match);
  112.22  
  112.23  
  112.24 -static inline int __get_instruction_length(struct vmcb_struct *vmcb, 
  112.25 +static inline int __get_instruction_length(struct vcpu *v, 
  112.26          enum instruction_index instr, u8 *guest_eip_buf)
  112.27  {
  112.28      return __get_instruction_length_from_list(
  112.29 -        vmcb, &instr, 1, guest_eip_buf, NULL);
  112.30 +        v, &instr, 1, guest_eip_buf, NULL);
  112.31  }
  112.32  
  112.33  
   113.1 --- a/xen/include/asm-x86/hvm/svm/svm.h	Mon Mar 05 12:49:12 2007 -0600
   113.2 +++ b/xen/include/asm-x86/hvm/svm/svm.h	Thu Mar 08 14:39:52 2007 -0600
   113.3 @@ -34,6 +34,41 @@ extern void arch_svm_do_resume(struct vc
   113.4  
   113.5  extern u64 root_vmcb_pa[NR_CPUS];
   113.6  
   113.7 +static inline int svm_long_mode_enabled(struct vcpu *v)
   113.8 +{
   113.9 +    u64 guest_efer = v->arch.hvm_svm.cpu_shadow_efer;
  113.10 +    return guest_efer & EFER_LMA;
  113.11 +}
  113.12 +
  113.13 +static inline int svm_lme_is_set(struct vcpu *v)
  113.14 +{
  113.15 +    u64 guest_efer = v->arch.hvm_svm.cpu_shadow_efer;
  113.16 +    return guest_efer & EFER_LME;
  113.17 +}
  113.18 +
  113.19 +static inline int svm_cr4_pae_is_set(struct vcpu *v)
  113.20 +{
  113.21 +    unsigned long guest_cr4 = v->arch.hvm_svm.cpu_shadow_cr4;
  113.22 +    return guest_cr4 & X86_CR4_PAE;
  113.23 +}
  113.24 +
  113.25 +static inline int svm_paging_enabled(struct vcpu *v)
  113.26 +{
  113.27 +    unsigned long guest_cr0 = v->arch.hvm_svm.cpu_shadow_cr0;
  113.28 +    return (guest_cr0 & X86_CR0_PE) && (guest_cr0 & X86_CR0_PG);
  113.29 +}
  113.30 +
  113.31 +static inline int svm_pae_enabled(struct vcpu *v)
  113.32 +{
  113.33 +    unsigned long guest_cr4 = v->arch.hvm_svm.cpu_shadow_cr4;
  113.34 +    return svm_paging_enabled(v) && (guest_cr4 & X86_CR4_PAE);
  113.35 +}
  113.36 +
  113.37 +static inline int svm_pgbit_test(struct vcpu *v)
  113.38 +{
  113.39 +    return v->arch.hvm_svm.cpu_shadow_cr0 & X86_CR0_PG;
  113.40 +}
  113.41 +
  113.42  #define SVM_REG_EAX (0) 
  113.43  #define SVM_REG_ECX (1) 
  113.44  #define SVM_REG_EDX (2) 
   114.1 --- a/xen/include/asm-x86/hvm/svm/vmcb.h	Mon Mar 05 12:49:12 2007 -0600
   114.2 +++ b/xen/include/asm-x86/hvm/svm/vmcb.h	Thu Mar 08 14:39:52 2007 -0600
   114.3 @@ -303,14 +303,6 @@ enum VMEXIT_EXITCODE
   114.4      VMEXIT_INVALID          =  -1
   114.5  };
   114.6  
   114.7 -enum {
   114.8 -    SVM_CPU_STATE_PG_ENABLED=0,
   114.9 -    SVM_CPU_STATE_PAE_ENABLED,
  114.10 -    SVM_CPU_STATE_LME_ENABLED,      
  114.11 -    SVM_CPU_STATE_LMA_ENABLED,
  114.12 -    SVM_CPU_STATE_ASSIST_ENABLED,
  114.13 -};  
  114.14 -
  114.15  /* Definitions of segment state are borrowed by the generic HVM code. */
  114.16  typedef segment_attributes_t svm_segment_attributes_t;
  114.17  typedef segment_register_t svm_segment_register_t;
  114.18 @@ -457,12 +449,12 @@ struct arch_svm_struct {
  114.19      int                 saved_irq_vector;
  114.20      u32                 launch_core;
  114.21      
  114.22 -    unsigned long       flags;      /* VMCB flags */
  114.23 -    unsigned long       cpu_shadow_cr0; /* Guest value for CR0 */
  114.24 -    unsigned long       cpu_shadow_cr4; /* Guest value for CR4 */
  114.25 +    unsigned long       flags;            /* VMCB flags */
  114.26 +    unsigned long       cpu_shadow_cr0;   /* Guest value for CR0 */
  114.27 +    unsigned long       cpu_shadow_cr4;   /* Guest value for CR4 */
  114.28 +    unsigned long       cpu_shadow_efer;  /* Guest value for EFER */
  114.29      unsigned long       cpu_cr2;
  114.30      unsigned long       cpu_cr3;
  114.31 -    unsigned long       cpu_state;
  114.32  };
  114.33  
  114.34  struct vmcb_struct *alloc_vmcb(void);
   115.1 --- a/xen/include/public/arch-x86/xen.h	Mon Mar 05 12:49:12 2007 -0600
   115.2 +++ b/xen/include/public/arch-x86/xen.h	Thu Mar 08 14:39:52 2007 -0600
   115.3 @@ -132,6 +132,7 @@ struct vcpu_guest_context {
   115.4      unsigned long ldt_base, ldt_ents;       /* LDT (linear address, # ents) */
   115.5      unsigned long gdt_frames[16], gdt_ents; /* GDT (machine frames, # ents) */
   115.6      unsigned long kernel_ss, kernel_sp;     /* Virtual TSS (only SS1/SP1)   */
   115.7 +    /* NB. User pagetable on x86/64 is placed in ctrlreg[1]. */
   115.8      unsigned long ctrlreg[8];               /* CR0-CR7 (control registers)  */
   115.9      unsigned long debugreg[8];              /* DB0-DB7 (debug registers)    */
  115.10  #ifdef __i386__
   116.1 --- a/xen/include/public/xen.h	Mon Mar 05 12:49:12 2007 -0600
   116.2 +++ b/xen/include/public/xen.h	Thu Mar 08 14:39:52 2007 -0600
   116.3 @@ -131,6 +131,7 @@
   116.4  #define VIRQ_TBUF       4  /* G. (DOM0) Trace buffer has records available.  */
   116.5  #define VIRQ_DEBUGGER   6  /* G. (DOM0) A domain has paused for debugging.   */
   116.6  #define VIRQ_XENOPROF   7  /* V. XenOprofile interrupt: new sample available */
   116.7 +#define VIRQ_CON_RING   8  /* G. (DOM0) Bytes received on console            */
   116.8  
   116.9  /* Architecture-specific VIRQ definitions. */
  116.10  #define VIRQ_ARCH_0    16
  116.11 @@ -473,26 +474,24 @@ typedef struct shared_info shared_info_t
  116.12  #endif
  116.13  
  116.14  /*
  116.15 - * Start-of-day memory layout for the initial domain (DOM0):
  116.16 + * Start-of-day memory layout:
  116.17   *  1. The domain is started within contiguous virtual-memory region.
  116.18 - *  2. The contiguous region begins and ends on an aligned 4MB boundary.
  116.19 - *  3. The region start corresponds to the load address of the OS image.
  116.20 - *     If the load address is not 4MB aligned then the address is rounded down.
  116.21 - *  4. This the order of bootstrap elements in the initial virtual region:
  116.22 + *  2. The contiguous region ends on an aligned 4MB boundary.
  116.23 + *  3. This the order of bootstrap elements in the initial virtual region:
  116.24   *      a. relocated kernel image
  116.25   *      b. initial ram disk              [mod_start, mod_len]
  116.26   *      c. list of allocated page frames [mfn_list, nr_pages]
  116.27   *      d. start_info_t structure        [register ESI (x86)]
  116.28   *      e. bootstrap page tables         [pt_base, CR3 (x86)]
  116.29   *      f. bootstrap stack               [register ESP (x86)]
  116.30 - *  5. Bootstrap elements are packed together, but each is 4kB-aligned.
  116.31 - *  6. The initial ram disk may be omitted.
  116.32 - *  7. The list of page frames forms a contiguous 'pseudo-physical' memory
  116.33 + *  4. Bootstrap elements are packed together, but each is 4kB-aligned.
  116.34 + *  5. The initial ram disk may be omitted.
  116.35 + *  6. The list of page frames forms a contiguous 'pseudo-physical' memory
  116.36   *     layout for the domain. In particular, the bootstrap virtual-memory
  116.37   *     region is a 1:1 mapping to the first section of the pseudo-physical map.
  116.38 - *  8. All bootstrap elements are mapped read-writable for the guest OS. The
  116.39 + *  7. All bootstrap elements are mapped read-writable for the guest OS. The
  116.40   *     only exception is the bootstrap page table, which is mapped read-only.
  116.41 - *  9. There is guaranteed to be at least 512kB padding after the final
  116.42 + *  8. There is guaranteed to be at least 512kB padding after the final
  116.43   *     bootstrap element. If necessary, the bootstrap virtual region is
  116.44   *     extended by an extra 4MB to ensure this.
  116.45   */