ia64/xen-unstable
changeset 14500:dcec453681bc
[POWERPC][XEN] Merge with xen-unstable.hg.
Signed-off-by: Hollis Blanchard <hollisb@us.ibm.com>
Signed-off-by: Hollis Blanchard <hollisb@us.ibm.com>
author | Hollis Blanchard <hollisb@us.ibm.com> |
---|---|
date | Thu Mar 08 14:39:52 2007 -0600 (2007-03-08) |
parents | 8f0b5295bb1b 38513d22d234 |
children | 59305500d95d |
files | linux-2.6-xen-sparse/arch/i386/kernel/alternative-xen.c linux-2.6-xen-sparse/arch/i386/kernel/cpu/intel_cacheinfo-xen.c linux-2.6-xen-sparse/arch/x86_64/kernel/x8664_ksyms-xen.c linux-2.6-xen-sparse/mm/Kconfig tools/ptsname/Makefile tools/ptsname/ptsname.c tools/ptsname/setup.py xen/arch/x86/mm/shadow/page-guest32.h |
line diff
1.1 --- a/Config.mk Mon Mar 05 12:49:12 2007 -0600 1.2 +++ b/Config.mk Thu Mar 08 14:39:52 2007 -0600 1.3 @@ -73,9 +73,10 @@ ACM_SECURITY ?= n 1.4 ACM_DEFAULT_SECURITY_POLICY ?= ACM_NULL_POLICY 1.5 1.6 # Optional components 1.7 -XENSTAT_XENTOP ?= y 1.8 -VTPM_TOOLS ?= n 1.9 +XENSTAT_XENTOP ?= y 1.10 +VTPM_TOOLS ?= n 1.11 LIBXENAPI_BINDINGS ?= n 1.12 -XENFB_TOOLS ?= n 1.13 +XENFB_TOOLS ?= n 1.14 +PYTHON_TOOLS ?= y 1.15 1.16 -include $(XEN_ROOT)/.config
2.1 --- a/config/StdGNU.mk Mon Mar 05 12:49:12 2007 -0600 2.2 +++ b/config/StdGNU.mk Thu Mar 08 14:39:52 2007 -0600 2.3 @@ -12,9 +12,9 @@ OBJDUMP = $(CROSS_COMPILE)objdump 2.4 MSGFMT = msgfmt 2.5 2.6 INSTALL = install 2.7 -INSTALL_DIR = $(INSTALL) -d -m0755 2.8 -INSTALL_DATA = $(INSTALL) -m0644 2.9 -INSTALL_PROG = $(INSTALL) -m0755 2.10 +INSTALL_DIR = $(INSTALL) -d -m0755 -p 2.11 +INSTALL_DATA = $(INSTALL) -m0644 -p 2.12 +INSTALL_PROG = $(INSTALL) -m0755 -p 2.13 2.14 LIB64DIR = lib64 2.15
3.1 --- a/config/SunOS.mk Mon Mar 05 12:49:12 2007 -0600 3.2 +++ b/config/SunOS.mk Thu Mar 08 14:39:52 2007 -0600 3.3 @@ -14,9 +14,9 @@ MSGFMT = gmsgfmt 3.4 SHELL = bash 3.5 3.6 INSTALL = ginstall 3.7 -INSTALL_DIR = $(INSTALL) -d -m0755 3.8 -INSTALL_DATA = $(INSTALL) -m0644 3.9 -INSTALL_PROG = $(INSTALL) -m0755 3.10 +INSTALL_DIR = $(INSTALL) -d -m0755 -p 3.11 +INSTALL_DATA = $(INSTALL) -m0644 -p 3.12 +INSTALL_PROG = $(INSTALL) -m0755 -p 3.13 3.14 LIB64DIR = lib/amd64 3.15
4.1 --- a/docs/misc/dump-core-format.txt Mon Mar 05 12:49:12 2007 -0600 4.2 +++ b/docs/misc/dump-core-format.txt Thu Mar 08 14:39:52 2007 -0600 4.3 @@ -26,11 +26,12 @@ For xen related structure, please see th 4.4 Elf header 4.5 ---------- 4.6 The elf header members are set as follows 4.7 + e_ident[EI_CLASS] = ELFCLASS64 = 2 4.8 e_ident[EI_OSABI] = ELFOSABI_SYSV = 0 4.9 e_type = ET_CORE = 4 4.10 -e_ident[EI_CLASS], e_ident[EI_DATA] and e_flags are set according 4.11 -to an architecture which a file is created. Other members are set as usual. 4.12 - 4.13 +ELFCLASS64 is always used independent of architecture. 4.14 +e_ident[EI_DATA] and e_flags are set according to the dumping system's 4.15 +architecture. Other members are set as usual. 4.16 4.17 Sections 4.18 -------- 4.19 @@ -221,5 +222,10 @@ format_version descriptor 4.20 4.21 Format version history 4.22 ---------------------- 4.23 -The currently only (major, minor) = (0, 1) is used. 4.24 +Currently only (major, minor) = (0, 1) is used. 4.25 [When the format is changed, it would be described here.] 4.26 + 4.27 +(0, 1) update 4.28 +- EI_CLASS member of elf header was changed to ELFCLASS64 independent of 4.29 + architecture. This is mainly for x86_32pae. 4.30 + The format version isn't bumped because analysis tools can distinguish it.
5.1 --- a/linux-2.6-xen-sparse/arch/i386/Kconfig Mon Mar 05 12:49:12 2007 -0600 5.2 +++ b/linux-2.6-xen-sparse/arch/i386/Kconfig Thu Mar 08 14:39:52 2007 -0600 5.3 @@ -255,7 +255,6 @@ config NR_CPUS 5.4 config SCHED_SMT 5.5 bool "SMT (Hyperthreading) scheduler support" 5.6 depends on X86_HT 5.7 - depends on !X86_XEN 5.8 help 5.9 SMT scheduler support improves the CPU scheduler's decision making 5.10 when dealing with Intel Pentium 4 chips with HyperThreading at a 5.11 @@ -313,11 +312,6 @@ config X86_VISWS_APIC 5.12 depends on X86_VISWS 5.13 default y 5.14 5.15 -config X86_TSC 5.16 - bool 5.17 - depends on (MWINCHIP3D || MWINCHIP2 || MCRUSOE || MEFFICEON || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || MK8 || MVIAC3_2 || MGEODEGX1) && !X86_NUMAQ && !X86_XEN 5.18 - default y 5.19 - 5.20 config X86_MCE 5.21 bool "Machine Check Exception" 5.22 depends on !(X86_VOYAGER || X86_XEN)
6.1 --- a/linux-2.6-xen-sparse/arch/i386/Kconfig.cpu Mon Mar 05 12:49:12 2007 -0600 6.2 +++ b/linux-2.6-xen-sparse/arch/i386/Kconfig.cpu Thu Mar 08 14:39:52 2007 -0600 6.3 @@ -311,5 +311,5 @@ config X86_OOSTORE 6.4 6.5 config X86_TSC 6.6 bool 6.7 - depends on (MWINCHIP3D || MWINCHIP2 || MCRUSOE || MEFFICEON || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || MK8 || MVIAC3_2 || MGEODEGX1 || MGEODE_LX) && !X86_NUMAQ 6.8 + depends on (MWINCHIP3D || MWINCHIP2 || MCRUSOE || MEFFICEON || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || MK8 || MVIAC3_2 || MGEODEGX1 || MGEODE_LX) && !X86_NUMAQ && !X86_XEN 6.9 default y
7.1 --- a/linux-2.6-xen-sparse/arch/i386/kernel/alternative-xen.c Mon Mar 05 12:49:12 2007 -0600 7.2 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 7.3 @@ -1,389 +0,0 @@ 7.4 -#include <linux/module.h> 7.5 -#include <linux/spinlock.h> 7.6 -#include <linux/list.h> 7.7 -#include <asm/alternative.h> 7.8 -#include <asm/sections.h> 7.9 - 7.10 -static int no_replacement = 0; 7.11 -static int smp_alt_once = 0; 7.12 -static int debug_alternative = 0; 7.13 - 7.14 -static int __init noreplacement_setup(char *s) 7.15 -{ 7.16 - no_replacement = 1; 7.17 - return 1; 7.18 -} 7.19 -static int __init bootonly(char *str) 7.20 -{ 7.21 - smp_alt_once = 1; 7.22 - return 1; 7.23 -} 7.24 -static int __init debug_alt(char *str) 7.25 -{ 7.26 - debug_alternative = 1; 7.27 - return 1; 7.28 -} 7.29 - 7.30 -__setup("noreplacement", noreplacement_setup); 7.31 -__setup("smp-alt-boot", bootonly); 7.32 -__setup("debug-alternative", debug_alt); 7.33 - 7.34 -#define DPRINTK(fmt, args...) if (debug_alternative) \ 7.35 - printk(KERN_DEBUG fmt, args) 7.36 - 7.37 -#ifdef GENERIC_NOP1 7.38 -/* Use inline assembly to define this because the nops are defined 7.39 - as inline assembly strings in the include files and we cannot 7.40 - get them easily into strings. */ 7.41 -asm("\t.data\nintelnops: " 7.42 - GENERIC_NOP1 GENERIC_NOP2 GENERIC_NOP3 GENERIC_NOP4 GENERIC_NOP5 GENERIC_NOP6 7.43 - GENERIC_NOP7 GENERIC_NOP8); 7.44 -extern unsigned char intelnops[]; 7.45 -static unsigned char *intel_nops[ASM_NOP_MAX+1] = { 7.46 - NULL, 7.47 - intelnops, 7.48 - intelnops + 1, 7.49 - intelnops + 1 + 2, 7.50 - intelnops + 1 + 2 + 3, 7.51 - intelnops + 1 + 2 + 3 + 4, 7.52 - intelnops + 1 + 2 + 3 + 4 + 5, 7.53 - intelnops + 1 + 2 + 3 + 4 + 5 + 6, 7.54 - intelnops + 1 + 2 + 3 + 4 + 5 + 6 + 7, 7.55 -}; 7.56 -#endif 7.57 - 7.58 -#ifdef K8_NOP1 7.59 -asm("\t.data\nk8nops: " 7.60 - K8_NOP1 K8_NOP2 K8_NOP3 K8_NOP4 K8_NOP5 K8_NOP6 7.61 - K8_NOP7 K8_NOP8); 7.62 -extern unsigned char k8nops[]; 7.63 -static unsigned char *k8_nops[ASM_NOP_MAX+1] = { 7.64 - NULL, 7.65 - k8nops, 7.66 - k8nops + 1, 7.67 - k8nops + 1 + 2, 7.68 - k8nops + 1 + 2 + 3, 7.69 - k8nops + 1 + 2 + 3 + 4, 7.70 - k8nops + 1 + 2 + 3 + 4 + 5, 7.71 - k8nops + 1 + 2 + 3 + 4 + 5 + 6, 7.72 - k8nops + 1 + 2 + 3 + 4 + 5 + 6 + 7, 7.73 -}; 7.74 -#endif 7.75 - 7.76 -#ifdef K7_NOP1 7.77 -asm("\t.data\nk7nops: " 7.78 - K7_NOP1 K7_NOP2 K7_NOP3 K7_NOP4 K7_NOP5 K7_NOP6 7.79 - K7_NOP7 K7_NOP8); 7.80 -extern unsigned char k7nops[]; 7.81 -static unsigned char *k7_nops[ASM_NOP_MAX+1] = { 7.82 - NULL, 7.83 - k7nops, 7.84 - k7nops + 1, 7.85 - k7nops + 1 + 2, 7.86 - k7nops + 1 + 2 + 3, 7.87 - k7nops + 1 + 2 + 3 + 4, 7.88 - k7nops + 1 + 2 + 3 + 4 + 5, 7.89 - k7nops + 1 + 2 + 3 + 4 + 5 + 6, 7.90 - k7nops + 1 + 2 + 3 + 4 + 5 + 6 + 7, 7.91 -}; 7.92 -#endif 7.93 - 7.94 -#ifdef CONFIG_X86_64 7.95 - 7.96 -extern char __vsyscall_0; 7.97 -static inline unsigned char** find_nop_table(void) 7.98 -{ 7.99 - return k8_nops; 7.100 -} 7.101 - 7.102 -#else /* CONFIG_X86_64 */ 7.103 - 7.104 -static struct nop { 7.105 - int cpuid; 7.106 - unsigned char **noptable; 7.107 -} noptypes[] = { 7.108 - { X86_FEATURE_K8, k8_nops }, 7.109 - { X86_FEATURE_K7, k7_nops }, 7.110 - { -1, NULL } 7.111 -}; 7.112 - 7.113 -static unsigned char** find_nop_table(void) 7.114 -{ 7.115 - unsigned char **noptable = intel_nops; 7.116 - int i; 7.117 - 7.118 - for (i = 0; noptypes[i].cpuid >= 0; i++) { 7.119 - if (boot_cpu_has(noptypes[i].cpuid)) { 7.120 - noptable = noptypes[i].noptable; 7.121 - break; 7.122 - } 7.123 - } 7.124 - return noptable; 7.125 -} 7.126 - 7.127 -#endif /* CONFIG_X86_64 */ 7.128 - 7.129 -extern struct alt_instr __alt_instructions[], __alt_instructions_end[]; 7.130 -extern struct alt_instr __smp_alt_instructions[], __smp_alt_instructions_end[]; 7.131 -extern u8 *__smp_locks[], *__smp_locks_end[]; 7.132 - 7.133 -extern u8 __smp_alt_begin[], __smp_alt_end[]; 7.134 - 7.135 -/* Replace instructions with better alternatives for this CPU type. 7.136 - This runs before SMP is initialized to avoid SMP problems with 7.137 - self modifying code. This implies that assymetric systems where 7.138 - APs have less capabilities than the boot processor are not handled. 7.139 - Tough. Make sure you disable such features by hand. */ 7.140 - 7.141 -void apply_alternatives(struct alt_instr *start, struct alt_instr *end) 7.142 -{ 7.143 - unsigned char **noptable = find_nop_table(); 7.144 - struct alt_instr *a; 7.145 - u8 *instr; 7.146 - int diff, i, k; 7.147 - 7.148 - DPRINTK("%s: alt table %p -> %p\n", __FUNCTION__, start, end); 7.149 - for (a = start; a < end; a++) { 7.150 - BUG_ON(a->replacementlen > a->instrlen); 7.151 - if (!boot_cpu_has(a->cpuid)) 7.152 - continue; 7.153 - instr = a->instr; 7.154 -#ifdef CONFIG_X86_64 7.155 - /* vsyscall code is not mapped yet. resolve it manually. */ 7.156 - if (instr >= (u8 *)VSYSCALL_START && instr < (u8*)VSYSCALL_END) { 7.157 - instr -= VSYSCALL_START - (unsigned long)&__vsyscall_0; 7.158 - DPRINTK("%s: vsyscall fixup: %p => %p\n", 7.159 - __FUNCTION__, a->instr, instr); 7.160 - } 7.161 -#endif 7.162 - memcpy(instr, a->replacement, a->replacementlen); 7.163 - diff = a->instrlen - a->replacementlen; 7.164 - /* Pad the rest with nops */ 7.165 - for (i = a->replacementlen; diff > 0; diff -= k, i += k) { 7.166 - k = diff; 7.167 - if (k > ASM_NOP_MAX) 7.168 - k = ASM_NOP_MAX; 7.169 - memcpy(a->instr + i, noptable[k], k); 7.170 - } 7.171 - } 7.172 -} 7.173 - 7.174 -#ifdef CONFIG_SMP 7.175 - 7.176 -static void alternatives_smp_save(struct alt_instr *start, struct alt_instr *end) 7.177 -{ 7.178 - struct alt_instr *a; 7.179 - 7.180 - DPRINTK("%s: alt table %p-%p\n", __FUNCTION__, start, end); 7.181 - for (a = start; a < end; a++) { 7.182 - memcpy(a->replacement + a->replacementlen, 7.183 - a->instr, 7.184 - a->instrlen); 7.185 - } 7.186 -} 7.187 - 7.188 -static void alternatives_smp_apply(struct alt_instr *start, struct alt_instr *end) 7.189 -{ 7.190 - struct alt_instr *a; 7.191 - 7.192 - for (a = start; a < end; a++) { 7.193 - memcpy(a->instr, 7.194 - a->replacement + a->replacementlen, 7.195 - a->instrlen); 7.196 - } 7.197 -} 7.198 - 7.199 -static void alternatives_smp_lock(u8 **start, u8 **end, u8 *text, u8 *text_end) 7.200 -{ 7.201 - u8 **ptr; 7.202 - 7.203 - for (ptr = start; ptr < end; ptr++) { 7.204 - if (*ptr < text) 7.205 - continue; 7.206 - if (*ptr > text_end) 7.207 - continue; 7.208 - **ptr = 0xf0; /* lock prefix */ 7.209 - }; 7.210 -} 7.211 - 7.212 -static void alternatives_smp_unlock(u8 **start, u8 **end, u8 *text, u8 *text_end) 7.213 -{ 7.214 - unsigned char **noptable = find_nop_table(); 7.215 - u8 **ptr; 7.216 - 7.217 - for (ptr = start; ptr < end; ptr++) { 7.218 - if (*ptr < text) 7.219 - continue; 7.220 - if (*ptr > text_end) 7.221 - continue; 7.222 - **ptr = noptable[1][0]; 7.223 - }; 7.224 -} 7.225 - 7.226 -struct smp_alt_module { 7.227 - /* what is this ??? */ 7.228 - struct module *mod; 7.229 - char *name; 7.230 - 7.231 - /* ptrs to lock prefixes */ 7.232 - u8 **locks; 7.233 - u8 **locks_end; 7.234 - 7.235 - /* .text segment, needed to avoid patching init code ;) */ 7.236 - u8 *text; 7.237 - u8 *text_end; 7.238 - 7.239 - struct list_head next; 7.240 -}; 7.241 -static LIST_HEAD(smp_alt_modules); 7.242 -static DEFINE_SPINLOCK(smp_alt); 7.243 - 7.244 -void alternatives_smp_module_add(struct module *mod, char *name, 7.245 - void *locks, void *locks_end, 7.246 - void *text, void *text_end) 7.247 -{ 7.248 - struct smp_alt_module *smp; 7.249 - unsigned long flags; 7.250 - 7.251 - if (no_replacement) 7.252 - return; 7.253 - 7.254 - if (smp_alt_once) { 7.255 - if (boot_cpu_has(X86_FEATURE_UP)) 7.256 - alternatives_smp_unlock(locks, locks_end, 7.257 - text, text_end); 7.258 - return; 7.259 - } 7.260 - 7.261 - smp = kzalloc(sizeof(*smp), GFP_KERNEL); 7.262 - if (NULL == smp) 7.263 - return; /* we'll run the (safe but slow) SMP code then ... */ 7.264 - 7.265 - smp->mod = mod; 7.266 - smp->name = name; 7.267 - smp->locks = locks; 7.268 - smp->locks_end = locks_end; 7.269 - smp->text = text; 7.270 - smp->text_end = text_end; 7.271 - DPRINTK("%s: locks %p -> %p, text %p -> %p, name %s\n", 7.272 - __FUNCTION__, smp->locks, smp->locks_end, 7.273 - smp->text, smp->text_end, smp->name); 7.274 - 7.275 - spin_lock_irqsave(&smp_alt, flags); 7.276 - list_add_tail(&smp->next, &smp_alt_modules); 7.277 - if (boot_cpu_has(X86_FEATURE_UP)) 7.278 - alternatives_smp_unlock(smp->locks, smp->locks_end, 7.279 - smp->text, smp->text_end); 7.280 - spin_unlock_irqrestore(&smp_alt, flags); 7.281 -} 7.282 - 7.283 -void alternatives_smp_module_del(struct module *mod) 7.284 -{ 7.285 - struct smp_alt_module *item; 7.286 - unsigned long flags; 7.287 - 7.288 - if (no_replacement || smp_alt_once) 7.289 - return; 7.290 - 7.291 - spin_lock_irqsave(&smp_alt, flags); 7.292 - list_for_each_entry(item, &smp_alt_modules, next) { 7.293 - if (mod != item->mod) 7.294 - continue; 7.295 - list_del(&item->next); 7.296 - spin_unlock_irqrestore(&smp_alt, flags); 7.297 - DPRINTK("%s: %s\n", __FUNCTION__, item->name); 7.298 - kfree(item); 7.299 - return; 7.300 - } 7.301 - spin_unlock_irqrestore(&smp_alt, flags); 7.302 -} 7.303 - 7.304 -void alternatives_smp_switch(int smp) 7.305 -{ 7.306 - struct smp_alt_module *mod; 7.307 - unsigned long flags; 7.308 - 7.309 -#ifdef CONFIG_LOCKDEP 7.310 - /* 7.311 - * A not yet fixed binutils section handling bug prevents 7.312 - * alternatives-replacement from working reliably, so turn 7.313 - * it off: 7.314 - */ 7.315 - printk("lockdep: not fixing up alternatives.\n"); 7.316 - return; 7.317 -#endif 7.318 - 7.319 - if (no_replacement || smp_alt_once) 7.320 - return; 7.321 - BUG_ON(!smp && (num_online_cpus() > 1)); 7.322 - 7.323 - spin_lock_irqsave(&smp_alt, flags); 7.324 - if (smp) { 7.325 - printk(KERN_INFO "SMP alternatives: switching to SMP code\n"); 7.326 - clear_bit(X86_FEATURE_UP, boot_cpu_data.x86_capability); 7.327 - clear_bit(X86_FEATURE_UP, cpu_data[0].x86_capability); 7.328 - alternatives_smp_apply(__smp_alt_instructions, 7.329 - __smp_alt_instructions_end); 7.330 - list_for_each_entry(mod, &smp_alt_modules, next) 7.331 - alternatives_smp_lock(mod->locks, mod->locks_end, 7.332 - mod->text, mod->text_end); 7.333 - } else { 7.334 - printk(KERN_INFO "SMP alternatives: switching to UP code\n"); 7.335 - set_bit(X86_FEATURE_UP, boot_cpu_data.x86_capability); 7.336 - set_bit(X86_FEATURE_UP, cpu_data[0].x86_capability); 7.337 - apply_alternatives(__smp_alt_instructions, 7.338 - __smp_alt_instructions_end); 7.339 - list_for_each_entry(mod, &smp_alt_modules, next) 7.340 - alternatives_smp_unlock(mod->locks, mod->locks_end, 7.341 - mod->text, mod->text_end); 7.342 - } 7.343 - spin_unlock_irqrestore(&smp_alt, flags); 7.344 -} 7.345 - 7.346 -#endif 7.347 - 7.348 -void __init alternative_instructions(void) 7.349 -{ 7.350 - if (no_replacement) { 7.351 - printk(KERN_INFO "(SMP-)alternatives turned off\n"); 7.352 - free_init_pages("SMP alternatives", 7.353 - (unsigned long)__smp_alt_begin, 7.354 - (unsigned long)__smp_alt_end); 7.355 - return; 7.356 - } 7.357 - apply_alternatives(__alt_instructions, __alt_instructions_end); 7.358 - 7.359 - /* switch to patch-once-at-boottime-only mode and free the 7.360 - * tables in case we know the number of CPUs will never ever 7.361 - * change */ 7.362 -#ifdef CONFIG_HOTPLUG_CPU 7.363 - if (num_possible_cpus() < 2) 7.364 - smp_alt_once = 1; 7.365 -#else 7.366 - smp_alt_once = 1; 7.367 -#endif 7.368 - 7.369 -#ifdef CONFIG_SMP 7.370 - if (smp_alt_once) { 7.371 - if (1 == num_possible_cpus()) { 7.372 - printk(KERN_INFO "SMP alternatives: switching to UP code\n"); 7.373 - set_bit(X86_FEATURE_UP, boot_cpu_data.x86_capability); 7.374 - set_bit(X86_FEATURE_UP, cpu_data[0].x86_capability); 7.375 - apply_alternatives(__smp_alt_instructions, 7.376 - __smp_alt_instructions_end); 7.377 - alternatives_smp_unlock(__smp_locks, __smp_locks_end, 7.378 - _text, _etext); 7.379 - } 7.380 - free_init_pages("SMP alternatives", 7.381 - (unsigned long)__smp_alt_begin, 7.382 - (unsigned long)__smp_alt_end); 7.383 - } else { 7.384 - alternatives_smp_save(__smp_alt_instructions, 7.385 - __smp_alt_instructions_end); 7.386 - alternatives_smp_module_add(NULL, "core kernel", 7.387 - __smp_locks, __smp_locks_end, 7.388 - _text, _etext); 7.389 - alternatives_smp_switch(0); 7.390 - } 7.391 -#endif 7.392 -}
8.1 --- a/linux-2.6-xen-sparse/arch/i386/kernel/cpu/intel_cacheinfo-xen.c Mon Mar 05 12:49:12 2007 -0600 8.2 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 8.3 @@ -1,774 +0,0 @@ 8.4 -/* 8.5 - * Routines to indentify caches on Intel CPU. 8.6 - * 8.7 - * Changes: 8.8 - * Venkatesh Pallipadi : Adding cache identification through cpuid(4) 8.9 - * Ashok Raj <ashok.raj@intel.com>: Work with CPU hotplug infrastructure. 8.10 - * Andi Kleen : CPUID4 emulation on AMD. 8.11 - */ 8.12 - 8.13 -#include <linux/init.h> 8.14 -#include <linux/slab.h> 8.15 -#include <linux/device.h> 8.16 -#include <linux/compiler.h> 8.17 -#include <linux/cpu.h> 8.18 -#include <linux/sched.h> 8.19 - 8.20 -#include <asm/processor.h> 8.21 -#include <asm/smp.h> 8.22 - 8.23 -#define LVL_1_INST 1 8.24 -#define LVL_1_DATA 2 8.25 -#define LVL_2 3 8.26 -#define LVL_3 4 8.27 -#define LVL_TRACE 5 8.28 - 8.29 -struct _cache_table 8.30 -{ 8.31 - unsigned char descriptor; 8.32 - char cache_type; 8.33 - short size; 8.34 -}; 8.35 - 8.36 -/* all the cache descriptor types we care about (no TLB or trace cache entries) */ 8.37 -static struct _cache_table cache_table[] __cpuinitdata = 8.38 -{ 8.39 - { 0x06, LVL_1_INST, 8 }, /* 4-way set assoc, 32 byte line size */ 8.40 - { 0x08, LVL_1_INST, 16 }, /* 4-way set assoc, 32 byte line size */ 8.41 - { 0x0a, LVL_1_DATA, 8 }, /* 2 way set assoc, 32 byte line size */ 8.42 - { 0x0c, LVL_1_DATA, 16 }, /* 4-way set assoc, 32 byte line size */ 8.43 - { 0x22, LVL_3, 512 }, /* 4-way set assoc, sectored cache, 64 byte line size */ 8.44 - { 0x23, LVL_3, 1024 }, /* 8-way set assoc, sectored cache, 64 byte line size */ 8.45 - { 0x25, LVL_3, 2048 }, /* 8-way set assoc, sectored cache, 64 byte line size */ 8.46 - { 0x29, LVL_3, 4096 }, /* 8-way set assoc, sectored cache, 64 byte line size */ 8.47 - { 0x2c, LVL_1_DATA, 32 }, /* 8-way set assoc, 64 byte line size */ 8.48 - { 0x30, LVL_1_INST, 32 }, /* 8-way set assoc, 64 byte line size */ 8.49 - { 0x39, LVL_2, 128 }, /* 4-way set assoc, sectored cache, 64 byte line size */ 8.50 - { 0x3a, LVL_2, 192 }, /* 6-way set assoc, sectored cache, 64 byte line size */ 8.51 - { 0x3b, LVL_2, 128 }, /* 2-way set assoc, sectored cache, 64 byte line size */ 8.52 - { 0x3c, LVL_2, 256 }, /* 4-way set assoc, sectored cache, 64 byte line size */ 8.53 - { 0x3d, LVL_2, 384 }, /* 6-way set assoc, sectored cache, 64 byte line size */ 8.54 - { 0x3e, LVL_2, 512 }, /* 4-way set assoc, sectored cache, 64 byte line size */ 8.55 - { 0x41, LVL_2, 128 }, /* 4-way set assoc, 32 byte line size */ 8.56 - { 0x42, LVL_2, 256 }, /* 4-way set assoc, 32 byte line size */ 8.57 - { 0x43, LVL_2, 512 }, /* 4-way set assoc, 32 byte line size */ 8.58 - { 0x44, LVL_2, 1024 }, /* 4-way set assoc, 32 byte line size */ 8.59 - { 0x45, LVL_2, 2048 }, /* 4-way set assoc, 32 byte line size */ 8.60 - { 0x46, LVL_3, 4096 }, /* 4-way set assoc, 64 byte line size */ 8.61 - { 0x47, LVL_3, 8192 }, /* 8-way set assoc, 64 byte line size */ 8.62 - { 0x49, LVL_3, 4096 }, /* 16-way set assoc, 64 byte line size */ 8.63 - { 0x4a, LVL_3, 6144 }, /* 12-way set assoc, 64 byte line size */ 8.64 - { 0x4b, LVL_3, 8192 }, /* 16-way set assoc, 64 byte line size */ 8.65 - { 0x4c, LVL_3, 12288 }, /* 12-way set assoc, 64 byte line size */ 8.66 - { 0x4d, LVL_3, 16384 }, /* 16-way set assoc, 64 byte line size */ 8.67 - { 0x60, LVL_1_DATA, 16 }, /* 8-way set assoc, sectored cache, 64 byte line size */ 8.68 - { 0x66, LVL_1_DATA, 8 }, /* 4-way set assoc, sectored cache, 64 byte line size */ 8.69 - { 0x67, LVL_1_DATA, 16 }, /* 4-way set assoc, sectored cache, 64 byte line size */ 8.70 - { 0x68, LVL_1_DATA, 32 }, /* 4-way set assoc, sectored cache, 64 byte line size */ 8.71 - { 0x70, LVL_TRACE, 12 }, /* 8-way set assoc */ 8.72 - { 0x71, LVL_TRACE, 16 }, /* 8-way set assoc */ 8.73 - { 0x72, LVL_TRACE, 32 }, /* 8-way set assoc */ 8.74 - { 0x73, LVL_TRACE, 64 }, /* 8-way set assoc */ 8.75 - { 0x78, LVL_2, 1024 }, /* 4-way set assoc, 64 byte line size */ 8.76 - { 0x79, LVL_2, 128 }, /* 8-way set assoc, sectored cache, 64 byte line size */ 8.77 - { 0x7a, LVL_2, 256 }, /* 8-way set assoc, sectored cache, 64 byte line size */ 8.78 - { 0x7b, LVL_2, 512 }, /* 8-way set assoc, sectored cache, 64 byte line size */ 8.79 - { 0x7c, LVL_2, 1024 }, /* 8-way set assoc, sectored cache, 64 byte line size */ 8.80 - { 0x7d, LVL_2, 2048 }, /* 8-way set assoc, 64 byte line size */ 8.81 - { 0x7f, LVL_2, 512 }, /* 2-way set assoc, 64 byte line size */ 8.82 - { 0x82, LVL_2, 256 }, /* 8-way set assoc, 32 byte line size */ 8.83 - { 0x83, LVL_2, 512 }, /* 8-way set assoc, 32 byte line size */ 8.84 - { 0x84, LVL_2, 1024 }, /* 8-way set assoc, 32 byte line size */ 8.85 - { 0x85, LVL_2, 2048 }, /* 8-way set assoc, 32 byte line size */ 8.86 - { 0x86, LVL_2, 512 }, /* 4-way set assoc, 64 byte line size */ 8.87 - { 0x87, LVL_2, 1024 }, /* 8-way set assoc, 64 byte line size */ 8.88 - { 0x00, 0, 0} 8.89 -}; 8.90 - 8.91 - 8.92 -enum _cache_type 8.93 -{ 8.94 - CACHE_TYPE_NULL = 0, 8.95 - CACHE_TYPE_DATA = 1, 8.96 - CACHE_TYPE_INST = 2, 8.97 - CACHE_TYPE_UNIFIED = 3 8.98 -}; 8.99 - 8.100 -union _cpuid4_leaf_eax { 8.101 - struct { 8.102 - enum _cache_type type:5; 8.103 - unsigned int level:3; 8.104 - unsigned int is_self_initializing:1; 8.105 - unsigned int is_fully_associative:1; 8.106 - unsigned int reserved:4; 8.107 - unsigned int num_threads_sharing:12; 8.108 - unsigned int num_cores_on_die:6; 8.109 - } split; 8.110 - u32 full; 8.111 -}; 8.112 - 8.113 -union _cpuid4_leaf_ebx { 8.114 - struct { 8.115 - unsigned int coherency_line_size:12; 8.116 - unsigned int physical_line_partition:10; 8.117 - unsigned int ways_of_associativity:10; 8.118 - } split; 8.119 - u32 full; 8.120 -}; 8.121 - 8.122 -union _cpuid4_leaf_ecx { 8.123 - struct { 8.124 - unsigned int number_of_sets:32; 8.125 - } split; 8.126 - u32 full; 8.127 -}; 8.128 - 8.129 -struct _cpuid4_info { 8.130 - union _cpuid4_leaf_eax eax; 8.131 - union _cpuid4_leaf_ebx ebx; 8.132 - union _cpuid4_leaf_ecx ecx; 8.133 - unsigned long size; 8.134 - cpumask_t shared_cpu_map; 8.135 -}; 8.136 - 8.137 -unsigned short num_cache_leaves; 8.138 - 8.139 -/* AMD doesn't have CPUID4. Emulate it here to report the same 8.140 - information to the user. This makes some assumptions about the machine: 8.141 - No L3, L2 not shared, no SMT etc. that is currently true on AMD CPUs. 8.142 - 8.143 - In theory the TLBs could be reported as fake type (they are in "dummy"). 8.144 - Maybe later */ 8.145 -union l1_cache { 8.146 - struct { 8.147 - unsigned line_size : 8; 8.148 - unsigned lines_per_tag : 8; 8.149 - unsigned assoc : 8; 8.150 - unsigned size_in_kb : 8; 8.151 - }; 8.152 - unsigned val; 8.153 -}; 8.154 - 8.155 -union l2_cache { 8.156 - struct { 8.157 - unsigned line_size : 8; 8.158 - unsigned lines_per_tag : 4; 8.159 - unsigned assoc : 4; 8.160 - unsigned size_in_kb : 16; 8.161 - }; 8.162 - unsigned val; 8.163 -}; 8.164 - 8.165 -static const unsigned short assocs[] = { 8.166 - [1] = 1, [2] = 2, [4] = 4, [6] = 8, 8.167 - [8] = 16, 8.168 - [0xf] = 0xffff // ?? 8.169 - }; 8.170 -static const unsigned char levels[] = { 1, 1, 2 }; 8.171 -static const unsigned char types[] = { 1, 2, 3 }; 8.172 - 8.173 -static void __cpuinit amd_cpuid4(int leaf, union _cpuid4_leaf_eax *eax, 8.174 - union _cpuid4_leaf_ebx *ebx, 8.175 - union _cpuid4_leaf_ecx *ecx) 8.176 -{ 8.177 - unsigned dummy; 8.178 - unsigned line_size, lines_per_tag, assoc, size_in_kb; 8.179 - union l1_cache l1i, l1d; 8.180 - union l2_cache l2; 8.181 - 8.182 - eax->full = 0; 8.183 - ebx->full = 0; 8.184 - ecx->full = 0; 8.185 - 8.186 - cpuid(0x80000005, &dummy, &dummy, &l1d.val, &l1i.val); 8.187 - cpuid(0x80000006, &dummy, &dummy, &l2.val, &dummy); 8.188 - 8.189 - if (leaf > 2 || !l1d.val || !l1i.val || !l2.val) 8.190 - return; 8.191 - 8.192 - eax->split.is_self_initializing = 1; 8.193 - eax->split.type = types[leaf]; 8.194 - eax->split.level = levels[leaf]; 8.195 - eax->split.num_threads_sharing = 0; 8.196 - eax->split.num_cores_on_die = current_cpu_data.x86_max_cores - 1; 8.197 - 8.198 - if (leaf <= 1) { 8.199 - union l1_cache *l1 = leaf == 0 ? &l1d : &l1i; 8.200 - assoc = l1->assoc; 8.201 - line_size = l1->line_size; 8.202 - lines_per_tag = l1->lines_per_tag; 8.203 - size_in_kb = l1->size_in_kb; 8.204 - } else { 8.205 - assoc = l2.assoc; 8.206 - line_size = l2.line_size; 8.207 - lines_per_tag = l2.lines_per_tag; 8.208 - /* cpu_data has errata corrections for K7 applied */ 8.209 - size_in_kb = current_cpu_data.x86_cache_size; 8.210 - } 8.211 - 8.212 - if (assoc == 0xf) 8.213 - eax->split.is_fully_associative = 1; 8.214 - ebx->split.coherency_line_size = line_size - 1; 8.215 - ebx->split.ways_of_associativity = assocs[assoc] - 1; 8.216 - ebx->split.physical_line_partition = lines_per_tag - 1; 8.217 - ecx->split.number_of_sets = (size_in_kb * 1024) / line_size / 8.218 - (ebx->split.ways_of_associativity + 1) - 1; 8.219 -} 8.220 - 8.221 -static int __cpuinit cpuid4_cache_lookup(int index, struct _cpuid4_info *this_leaf) 8.222 -{ 8.223 - union _cpuid4_leaf_eax eax; 8.224 - union _cpuid4_leaf_ebx ebx; 8.225 - union _cpuid4_leaf_ecx ecx; 8.226 - unsigned edx; 8.227 - 8.228 - if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) 8.229 - amd_cpuid4(index, &eax, &ebx, &ecx); 8.230 - else 8.231 - cpuid_count(4, index, &eax.full, &ebx.full, &ecx.full, &edx); 8.232 - if (eax.split.type == CACHE_TYPE_NULL) 8.233 - return -EIO; /* better error ? */ 8.234 - 8.235 - this_leaf->eax = eax; 8.236 - this_leaf->ebx = ebx; 8.237 - this_leaf->ecx = ecx; 8.238 - this_leaf->size = (ecx.split.number_of_sets + 1) * 8.239 - (ebx.split.coherency_line_size + 1) * 8.240 - (ebx.split.physical_line_partition + 1) * 8.241 - (ebx.split.ways_of_associativity + 1); 8.242 - return 0; 8.243 -} 8.244 - 8.245 -/* will only be called once; __init is safe here */ 8.246 -static int __init find_num_cache_leaves(void) 8.247 -{ 8.248 - unsigned int eax, ebx, ecx, edx; 8.249 - union _cpuid4_leaf_eax cache_eax; 8.250 - int i = -1; 8.251 - 8.252 - do { 8.253 - ++i; 8.254 - /* Do cpuid(4) loop to find out num_cache_leaves */ 8.255 - cpuid_count(4, i, &eax, &ebx, &ecx, &edx); 8.256 - cache_eax.full = eax; 8.257 - } while (cache_eax.split.type != CACHE_TYPE_NULL); 8.258 - return i; 8.259 -} 8.260 - 8.261 -unsigned int __cpuinit init_intel_cacheinfo(struct cpuinfo_x86 *c) 8.262 -{ 8.263 - unsigned int trace = 0, l1i = 0, l1d = 0, l2 = 0, l3 = 0; /* Cache sizes */ 8.264 - unsigned int new_l1d = 0, new_l1i = 0; /* Cache sizes from cpuid(4) */ 8.265 - unsigned int new_l2 = 0, new_l3 = 0, i; /* Cache sizes from cpuid(4) */ 8.266 - unsigned int l2_id = 0, l3_id = 0, num_threads_sharing, index_msb; 8.267 -#ifdef CONFIG_X86_HT 8.268 - unsigned int cpu = (c == &boot_cpu_data) ? 0 : (c - cpu_data); 8.269 -#endif 8.270 - 8.271 - if (c->cpuid_level > 3) { 8.272 - static int is_initialized; 8.273 - 8.274 - if (is_initialized == 0) { 8.275 - /* Init num_cache_leaves from boot CPU */ 8.276 - num_cache_leaves = find_num_cache_leaves(); 8.277 - is_initialized++; 8.278 - } 8.279 - 8.280 - /* 8.281 - * Whenever possible use cpuid(4), deterministic cache 8.282 - * parameters cpuid leaf to find the cache details 8.283 - */ 8.284 - for (i = 0; i < num_cache_leaves; i++) { 8.285 - struct _cpuid4_info this_leaf; 8.286 - 8.287 - int retval; 8.288 - 8.289 - retval = cpuid4_cache_lookup(i, &this_leaf); 8.290 - if (retval >= 0) { 8.291 - switch(this_leaf.eax.split.level) { 8.292 - case 1: 8.293 - if (this_leaf.eax.split.type == 8.294 - CACHE_TYPE_DATA) 8.295 - new_l1d = this_leaf.size/1024; 8.296 - else if (this_leaf.eax.split.type == 8.297 - CACHE_TYPE_INST) 8.298 - new_l1i = this_leaf.size/1024; 8.299 - break; 8.300 - case 2: 8.301 - new_l2 = this_leaf.size/1024; 8.302 - num_threads_sharing = 1 + this_leaf.eax.split.num_threads_sharing; 8.303 - index_msb = get_count_order(num_threads_sharing); 8.304 - l2_id = c->apicid >> index_msb; 8.305 - break; 8.306 - case 3: 8.307 - new_l3 = this_leaf.size/1024; 8.308 - num_threads_sharing = 1 + this_leaf.eax.split.num_threads_sharing; 8.309 - index_msb = get_count_order(num_threads_sharing); 8.310 - l3_id = c->apicid >> index_msb; 8.311 - break; 8.312 - default: 8.313 - break; 8.314 - } 8.315 - } 8.316 - } 8.317 - } 8.318 - /* 8.319 - * Don't use cpuid2 if cpuid4 is supported. For P4, we use cpuid2 for 8.320 - * trace cache 8.321 - */ 8.322 - if ((num_cache_leaves == 0 || c->x86 == 15) && c->cpuid_level > 1) { 8.323 - /* supports eax=2 call */ 8.324 - int i, j, n; 8.325 - int regs[4]; 8.326 - unsigned char *dp = (unsigned char *)regs; 8.327 - int only_trace = 0; 8.328 - 8.329 - if (num_cache_leaves != 0 && c->x86 == 15) 8.330 - only_trace = 1; 8.331 - 8.332 - /* Number of times to iterate */ 8.333 - n = cpuid_eax(2) & 0xFF; 8.334 - 8.335 - for ( i = 0 ; i < n ; i++ ) { 8.336 - cpuid(2, ®s[0], ®s[1], ®s[2], ®s[3]); 8.337 - 8.338 - /* If bit 31 is set, this is an unknown format */ 8.339 - for ( j = 0 ; j < 3 ; j++ ) { 8.340 - if ( regs[j] < 0 ) regs[j] = 0; 8.341 - } 8.342 - 8.343 - /* Byte 0 is level count, not a descriptor */ 8.344 - for ( j = 1 ; j < 16 ; j++ ) { 8.345 - unsigned char des = dp[j]; 8.346 - unsigned char k = 0; 8.347 - 8.348 - /* look up this descriptor in the table */ 8.349 - while (cache_table[k].descriptor != 0) 8.350 - { 8.351 - if (cache_table[k].descriptor == des) { 8.352 - if (only_trace && cache_table[k].cache_type != LVL_TRACE) 8.353 - break; 8.354 - switch (cache_table[k].cache_type) { 8.355 - case LVL_1_INST: 8.356 - l1i += cache_table[k].size; 8.357 - break; 8.358 - case LVL_1_DATA: 8.359 - l1d += cache_table[k].size; 8.360 - break; 8.361 - case LVL_2: 8.362 - l2 += cache_table[k].size; 8.363 - break; 8.364 - case LVL_3: 8.365 - l3 += cache_table[k].size; 8.366 - break; 8.367 - case LVL_TRACE: 8.368 - trace += cache_table[k].size; 8.369 - break; 8.370 - } 8.371 - 8.372 - break; 8.373 - } 8.374 - 8.375 - k++; 8.376 - } 8.377 - } 8.378 - } 8.379 - } 8.380 - 8.381 - if (new_l1d) 8.382 - l1d = new_l1d; 8.383 - 8.384 - if (new_l1i) 8.385 - l1i = new_l1i; 8.386 - 8.387 - if (new_l2) { 8.388 - l2 = new_l2; 8.389 -#ifdef CONFIG_X86_HT 8.390 - cpu_llc_id[cpu] = l2_id; 8.391 -#endif 8.392 - } 8.393 - 8.394 - if (new_l3) { 8.395 - l3 = new_l3; 8.396 -#ifdef CONFIG_X86_HT 8.397 - cpu_llc_id[cpu] = l3_id; 8.398 -#endif 8.399 - } 8.400 - 8.401 - if (trace) 8.402 - printk (KERN_INFO "CPU: Trace cache: %dK uops", trace); 8.403 - else if ( l1i ) 8.404 - printk (KERN_INFO "CPU: L1 I cache: %dK", l1i); 8.405 - 8.406 - if (l1d) 8.407 - printk(", L1 D cache: %dK\n", l1d); 8.408 - else 8.409 - printk("\n"); 8.410 - 8.411 - if (l2) 8.412 - printk(KERN_INFO "CPU: L2 cache: %dK\n", l2); 8.413 - 8.414 - if (l3) 8.415 - printk(KERN_INFO "CPU: L3 cache: %dK\n", l3); 8.416 - 8.417 - c->x86_cache_size = l3 ? l3 : (l2 ? l2 : (l1i+l1d)); 8.418 - 8.419 - return l2; 8.420 -} 8.421 - 8.422 -/* pointer to _cpuid4_info array (for each cache leaf) */ 8.423 -static struct _cpuid4_info *cpuid4_info[NR_CPUS]; 8.424 -#define CPUID4_INFO_IDX(x,y) (&((cpuid4_info[x])[y])) 8.425 - 8.426 -#ifdef CONFIG_SMP 8.427 -static void __cpuinit cache_shared_cpu_map_setup(unsigned int cpu, int index) 8.428 -{ 8.429 - struct _cpuid4_info *this_leaf, *sibling_leaf; 8.430 - unsigned long num_threads_sharing; 8.431 - int index_msb, i; 8.432 - struct cpuinfo_x86 *c = cpu_data; 8.433 - 8.434 - this_leaf = CPUID4_INFO_IDX(cpu, index); 8.435 - num_threads_sharing = 1 + this_leaf->eax.split.num_threads_sharing; 8.436 - 8.437 - if (num_threads_sharing == 1) 8.438 - cpu_set(cpu, this_leaf->shared_cpu_map); 8.439 - else { 8.440 - index_msb = get_count_order(num_threads_sharing); 8.441 - 8.442 - for_each_online_cpu(i) { 8.443 - if (c[i].apicid >> index_msb == 8.444 - c[cpu].apicid >> index_msb) { 8.445 - cpu_set(i, this_leaf->shared_cpu_map); 8.446 - if (i != cpu && cpuid4_info[i]) { 8.447 - sibling_leaf = CPUID4_INFO_IDX(i, index); 8.448 - cpu_set(cpu, sibling_leaf->shared_cpu_map); 8.449 - } 8.450 - } 8.451 - } 8.452 - } 8.453 -} 8.454 -static void __cpuinit cache_remove_shared_cpu_map(unsigned int cpu, int index) 8.455 -{ 8.456 - struct _cpuid4_info *this_leaf, *sibling_leaf; 8.457 - int sibling; 8.458 - 8.459 - this_leaf = CPUID4_INFO_IDX(cpu, index); 8.460 - for_each_cpu_mask(sibling, this_leaf->shared_cpu_map) { 8.461 - sibling_leaf = CPUID4_INFO_IDX(sibling, index); 8.462 - cpu_clear(cpu, sibling_leaf->shared_cpu_map); 8.463 - } 8.464 -} 8.465 -#else 8.466 -static void __init cache_shared_cpu_map_setup(unsigned int cpu, int index) {} 8.467 -static void __init cache_remove_shared_cpu_map(unsigned int cpu, int index) {} 8.468 -#endif 8.469 - 8.470 -static void free_cache_attributes(unsigned int cpu) 8.471 -{ 8.472 - kfree(cpuid4_info[cpu]); 8.473 - cpuid4_info[cpu] = NULL; 8.474 -} 8.475 - 8.476 -static int __cpuinit detect_cache_attributes(unsigned int cpu) 8.477 -{ 8.478 - struct _cpuid4_info *this_leaf; 8.479 - unsigned long j; 8.480 - int retval; 8.481 - cpumask_t oldmask; 8.482 - 8.483 - if (num_cache_leaves == 0) 8.484 - return -ENOENT; 8.485 - 8.486 - cpuid4_info[cpu] = kmalloc( 8.487 - sizeof(struct _cpuid4_info) * num_cache_leaves, GFP_KERNEL); 8.488 - if (unlikely(cpuid4_info[cpu] == NULL)) 8.489 - return -ENOMEM; 8.490 - memset(cpuid4_info[cpu], 0, 8.491 - sizeof(struct _cpuid4_info) * num_cache_leaves); 8.492 - 8.493 - oldmask = current->cpus_allowed; 8.494 - retval = set_cpus_allowed(current, cpumask_of_cpu(cpu)); 8.495 - if (retval) 8.496 - goto out; 8.497 - 8.498 - /* Do cpuid and store the results */ 8.499 - retval = 0; 8.500 - for (j = 0; j < num_cache_leaves; j++) { 8.501 - this_leaf = CPUID4_INFO_IDX(cpu, j); 8.502 - retval = cpuid4_cache_lookup(j, this_leaf); 8.503 - if (unlikely(retval < 0)) 8.504 - break; 8.505 - cache_shared_cpu_map_setup(cpu, j); 8.506 - } 8.507 - set_cpus_allowed(current, oldmask); 8.508 - 8.509 -out: 8.510 - if (retval) 8.511 - free_cache_attributes(cpu); 8.512 - return retval; 8.513 -} 8.514 - 8.515 -#ifdef CONFIG_SYSFS 8.516 - 8.517 -#include <linux/kobject.h> 8.518 -#include <linux/sysfs.h> 8.519 - 8.520 -extern struct sysdev_class cpu_sysdev_class; /* from drivers/base/cpu.c */ 8.521 - 8.522 -/* pointer to kobject for cpuX/cache */ 8.523 -static struct kobject * cache_kobject[NR_CPUS]; 8.524 - 8.525 -struct _index_kobject { 8.526 - struct kobject kobj; 8.527 - unsigned int cpu; 8.528 - unsigned short index; 8.529 -}; 8.530 - 8.531 -/* pointer to array of kobjects for cpuX/cache/indexY */ 8.532 -static struct _index_kobject *index_kobject[NR_CPUS]; 8.533 -#define INDEX_KOBJECT_PTR(x,y) (&((index_kobject[x])[y])) 8.534 - 8.535 -#define show_one_plus(file_name, object, val) \ 8.536 -static ssize_t show_##file_name \ 8.537 - (struct _cpuid4_info *this_leaf, char *buf) \ 8.538 -{ \ 8.539 - return sprintf (buf, "%lu\n", (unsigned long)this_leaf->object + val); \ 8.540 -} 8.541 - 8.542 -show_one_plus(level, eax.split.level, 0); 8.543 -show_one_plus(coherency_line_size, ebx.split.coherency_line_size, 1); 8.544 -show_one_plus(physical_line_partition, ebx.split.physical_line_partition, 1); 8.545 -show_one_plus(ways_of_associativity, ebx.split.ways_of_associativity, 1); 8.546 -show_one_plus(number_of_sets, ecx.split.number_of_sets, 1); 8.547 - 8.548 -static ssize_t show_size(struct _cpuid4_info *this_leaf, char *buf) 8.549 -{ 8.550 - return sprintf (buf, "%luK\n", this_leaf->size / 1024); 8.551 -} 8.552 - 8.553 -static ssize_t show_shared_cpu_map(struct _cpuid4_info *this_leaf, char *buf) 8.554 -{ 8.555 - char mask_str[NR_CPUS]; 8.556 - cpumask_scnprintf(mask_str, NR_CPUS, this_leaf->shared_cpu_map); 8.557 - return sprintf(buf, "%s\n", mask_str); 8.558 -} 8.559 - 8.560 -static ssize_t show_type(struct _cpuid4_info *this_leaf, char *buf) { 8.561 - switch(this_leaf->eax.split.type) { 8.562 - case CACHE_TYPE_DATA: 8.563 - return sprintf(buf, "Data\n"); 8.564 - break; 8.565 - case CACHE_TYPE_INST: 8.566 - return sprintf(buf, "Instruction\n"); 8.567 - break; 8.568 - case CACHE_TYPE_UNIFIED: 8.569 - return sprintf(buf, "Unified\n"); 8.570 - break; 8.571 - default: 8.572 - return sprintf(buf, "Unknown\n"); 8.573 - break; 8.574 - } 8.575 -} 8.576 - 8.577 -struct _cache_attr { 8.578 - struct attribute attr; 8.579 - ssize_t (*show)(struct _cpuid4_info *, char *); 8.580 - ssize_t (*store)(struct _cpuid4_info *, const char *, size_t count); 8.581 -}; 8.582 - 8.583 -#define define_one_ro(_name) \ 8.584 -static struct _cache_attr _name = \ 8.585 - __ATTR(_name, 0444, show_##_name, NULL) 8.586 - 8.587 -define_one_ro(level); 8.588 -define_one_ro(type); 8.589 -define_one_ro(coherency_line_size); 8.590 -define_one_ro(physical_line_partition); 8.591 -define_one_ro(ways_of_associativity); 8.592 -define_one_ro(number_of_sets); 8.593 -define_one_ro(size); 8.594 -define_one_ro(shared_cpu_map); 8.595 - 8.596 -static struct attribute * default_attrs[] = { 8.597 - &type.attr, 8.598 - &level.attr, 8.599 - &coherency_line_size.attr, 8.600 - &physical_line_partition.attr, 8.601 - &ways_of_associativity.attr, 8.602 - &number_of_sets.attr, 8.603 - &size.attr, 8.604 - &shared_cpu_map.attr, 8.605 - NULL 8.606 -}; 8.607 - 8.608 -#define to_object(k) container_of(k, struct _index_kobject, kobj) 8.609 -#define to_attr(a) container_of(a, struct _cache_attr, attr) 8.610 - 8.611 -static ssize_t show(struct kobject * kobj, struct attribute * attr, char * buf) 8.612 -{ 8.613 - struct _cache_attr *fattr = to_attr(attr); 8.614 - struct _index_kobject *this_leaf = to_object(kobj); 8.615 - ssize_t ret; 8.616 - 8.617 - ret = fattr->show ? 8.618 - fattr->show(CPUID4_INFO_IDX(this_leaf->cpu, this_leaf->index), 8.619 - buf) : 8.620 - 0; 8.621 - return ret; 8.622 -} 8.623 - 8.624 -static ssize_t store(struct kobject * kobj, struct attribute * attr, 8.625 - const char * buf, size_t count) 8.626 -{ 8.627 - return 0; 8.628 -} 8.629 - 8.630 -static struct sysfs_ops sysfs_ops = { 8.631 - .show = show, 8.632 - .store = store, 8.633 -}; 8.634 - 8.635 -static struct kobj_type ktype_cache = { 8.636 - .sysfs_ops = &sysfs_ops, 8.637 - .default_attrs = default_attrs, 8.638 -}; 8.639 - 8.640 -static struct kobj_type ktype_percpu_entry = { 8.641 - .sysfs_ops = &sysfs_ops, 8.642 -}; 8.643 - 8.644 -static void cpuid4_cache_sysfs_exit(unsigned int cpu) 8.645 -{ 8.646 - kfree(cache_kobject[cpu]); 8.647 - kfree(index_kobject[cpu]); 8.648 - cache_kobject[cpu] = NULL; 8.649 - index_kobject[cpu] = NULL; 8.650 - free_cache_attributes(cpu); 8.651 -} 8.652 - 8.653 -static int __cpuinit cpuid4_cache_sysfs_init(unsigned int cpu) 8.654 -{ 8.655 - 8.656 - if (num_cache_leaves == 0) 8.657 - return -ENOENT; 8.658 - 8.659 - detect_cache_attributes(cpu); 8.660 - if (cpuid4_info[cpu] == NULL) 8.661 - return -ENOENT; 8.662 - 8.663 - /* Allocate all required memory */ 8.664 - cache_kobject[cpu] = kmalloc(sizeof(struct kobject), GFP_KERNEL); 8.665 - if (unlikely(cache_kobject[cpu] == NULL)) 8.666 - goto err_out; 8.667 - memset(cache_kobject[cpu], 0, sizeof(struct kobject)); 8.668 - 8.669 - index_kobject[cpu] = kmalloc( 8.670 - sizeof(struct _index_kobject ) * num_cache_leaves, GFP_KERNEL); 8.671 - if (unlikely(index_kobject[cpu] == NULL)) 8.672 - goto err_out; 8.673 - memset(index_kobject[cpu], 0, 8.674 - sizeof(struct _index_kobject) * num_cache_leaves); 8.675 - 8.676 - return 0; 8.677 - 8.678 -err_out: 8.679 - cpuid4_cache_sysfs_exit(cpu); 8.680 - return -ENOMEM; 8.681 -} 8.682 - 8.683 -/* Add/Remove cache interface for CPU device */ 8.684 -static int __cpuinit cache_add_dev(struct sys_device * sys_dev) 8.685 -{ 8.686 - unsigned int cpu = sys_dev->id; 8.687 - unsigned long i, j; 8.688 - struct _index_kobject *this_object; 8.689 - int retval = 0; 8.690 - 8.691 - retval = cpuid4_cache_sysfs_init(cpu); 8.692 - if (unlikely(retval < 0)) 8.693 - return retval; 8.694 - 8.695 - cache_kobject[cpu]->parent = &sys_dev->kobj; 8.696 - kobject_set_name(cache_kobject[cpu], "%s", "cache"); 8.697 - cache_kobject[cpu]->ktype = &ktype_percpu_entry; 8.698 - retval = kobject_register(cache_kobject[cpu]); 8.699 - 8.700 - for (i = 0; i < num_cache_leaves; i++) { 8.701 - this_object = INDEX_KOBJECT_PTR(cpu,i); 8.702 - this_object->cpu = cpu; 8.703 - this_object->index = i; 8.704 - this_object->kobj.parent = cache_kobject[cpu]; 8.705 - kobject_set_name(&(this_object->kobj), "index%1lu", i); 8.706 - this_object->kobj.ktype = &ktype_cache; 8.707 - retval = kobject_register(&(this_object->kobj)); 8.708 - if (unlikely(retval)) { 8.709 - for (j = 0; j < i; j++) { 8.710 - kobject_unregister( 8.711 - &(INDEX_KOBJECT_PTR(cpu,j)->kobj)); 8.712 - } 8.713 - kobject_unregister(cache_kobject[cpu]); 8.714 - cpuid4_cache_sysfs_exit(cpu); 8.715 - break; 8.716 - } 8.717 - } 8.718 - return retval; 8.719 -} 8.720 - 8.721 -static void __cpuexit cache_remove_dev(struct sys_device * sys_dev) 8.722 -{ 8.723 - unsigned int cpu = sys_dev->id; 8.724 - unsigned long i; 8.725 - 8.726 - for (i = 0; i < num_cache_leaves; i++) { 8.727 - cache_remove_shared_cpu_map(cpu, i); 8.728 - kobject_unregister(&(INDEX_KOBJECT_PTR(cpu,i)->kobj)); 8.729 - } 8.730 - kobject_unregister(cache_kobject[cpu]); 8.731 - cpuid4_cache_sysfs_exit(cpu); 8.732 - return; 8.733 -} 8.734 - 8.735 -static int __cpuinit cacheinfo_cpu_callback(struct notifier_block *nfb, 8.736 - unsigned long action, void *hcpu) 8.737 -{ 8.738 - unsigned int cpu = (unsigned long)hcpu; 8.739 - struct sys_device *sys_dev; 8.740 - 8.741 - sys_dev = get_cpu_sysdev(cpu); 8.742 - switch (action) { 8.743 - case CPU_ONLINE: 8.744 - cache_add_dev(sys_dev); 8.745 - break; 8.746 - case CPU_DEAD: 8.747 - cache_remove_dev(sys_dev); 8.748 - break; 8.749 - } 8.750 - return NOTIFY_OK; 8.751 -} 8.752 - 8.753 -static struct notifier_block __cpuinitdata cacheinfo_cpu_notifier = 8.754 -{ 8.755 - .notifier_call = cacheinfo_cpu_callback, 8.756 -}; 8.757 - 8.758 -static int __cpuinit cache_sysfs_init(void) 8.759 -{ 8.760 - int i; 8.761 - 8.762 - if (num_cache_leaves == 0) 8.763 - return 0; 8.764 - 8.765 - register_hotcpu_notifier(&cacheinfo_cpu_notifier); 8.766 - 8.767 - for_each_online_cpu(i) { 8.768 - cacheinfo_cpu_callback(&cacheinfo_cpu_notifier, CPU_ONLINE, 8.769 - (void *)(long)i); 8.770 - } 8.771 - 8.772 - return 0; 8.773 -} 8.774 - 8.775 -device_initcall(cache_sysfs_init); 8.776 - 8.777 -#endif
9.1 --- a/linux-2.6-xen-sparse/arch/i386/kernel/entry-xen.S Mon Mar 05 12:49:12 2007 -0600 9.2 +++ b/linux-2.6-xen-sparse/arch/i386/kernel/entry-xen.S Thu Mar 08 14:39:52 2007 -0600 9.3 @@ -368,6 +368,7 @@ sysexit_scrit: /**** START OF SYSEXIT CR 9.4 movl ESI(%esp), %esi 9.5 sysexit 9.6 14: __DISABLE_INTERRUPTS 9.7 + TRACE_IRQS_OFF 9.8 sysexit_ecrit: /**** END OF SYSEXIT CRITICAL REGION ****/ 9.9 push %esp 9.10 call evtchn_do_upcall 9.11 @@ -427,11 +428,13 @@ restore_nocheck: 9.12 restore_nocheck: 9.13 movl EFLAGS(%esp), %eax 9.14 testl $(VM_MASK|NMI_MASK), %eax 9.15 + CFI_REMEMBER_STATE 9.16 jnz hypervisor_iret 9.17 shr $9, %eax # EAX[0] == IRET_EFLAGS.IF 9.18 GET_VCPU_INFO 9.19 andb evtchn_upcall_mask(%esi),%al 9.20 andb $1,%al # EAX[0] == IRET_EFLAGS.IF & event_mask 9.21 + CFI_REMEMBER_STATE 9.22 jnz restore_all_enable_events # != 0 => enable event delivery 9.23 #endif 9.24 TRACE_IRQS_IRET 9.25 @@ -455,8 +458,8 @@ iret_exc: 9.26 .long 1b,iret_exc 9.27 .previous 9.28 9.29 + CFI_RESTORE_STATE 9.30 #ifndef CONFIG_XEN 9.31 - CFI_RESTORE_STATE 9.32 ldt_ss: 9.33 larl OLDSS(%esp), %eax 9.34 jnz restore_nocheck 9.35 @@ -485,14 +488,36 @@ 1: iret 9.36 .align 4 9.37 .long 1b,iret_exc 9.38 .previous 9.39 - CFI_ENDPROC 9.40 #else 9.41 + ALIGN 9.42 +restore_all_enable_events: 9.43 + TRACE_IRQS_ON 9.44 + __ENABLE_INTERRUPTS 9.45 +scrit: /**** START OF CRITICAL REGION ****/ 9.46 + __TEST_PENDING 9.47 + jnz 14f # process more events if necessary... 9.48 + RESTORE_REGS 9.49 + addl $4, %esp 9.50 + CFI_ADJUST_CFA_OFFSET -4 9.51 +1: iret 9.52 +.section __ex_table,"a" 9.53 + .align 4 9.54 + .long 1b,iret_exc 9.55 +.previous 9.56 +14: __DISABLE_INTERRUPTS 9.57 + TRACE_IRQS_OFF 9.58 + jmp 11f 9.59 +ecrit: /**** END OF CRITICAL REGION ****/ 9.60 + 9.61 + CFI_RESTORE_STATE 9.62 hypervisor_iret: 9.63 andl $~NMI_MASK, EFLAGS(%esp) 9.64 RESTORE_REGS 9.65 addl $4, %esp 9.66 + CFI_ADJUST_CFA_OFFSET -4 9.67 jmp hypercall_page + (__HYPERVISOR_iret * 32) 9.68 #endif 9.69 + CFI_ENDPROC 9.70 9.71 # perform work that needs to be done immediately before resumption 9.72 ALIGN 9.73 @@ -736,7 +761,9 @@ error_code: 9.74 # critical region we know that the entire frame is present and correct 9.75 # so we can simply throw away the new one. 9.76 ENTRY(hypervisor_callback) 9.77 + RING0_INT_FRAME 9.78 pushl %eax 9.79 + CFI_ADJUST_CFA_OFFSET 4 9.80 SAVE_ALL 9.81 movl EIP(%esp),%eax 9.82 cmpl $scrit,%eax 9.83 @@ -749,26 +776,13 @@ ENTRY(hypervisor_callback) 9.84 ja 11f 9.85 addl $OLDESP,%esp # Remove eflags...ebx from stack frame. 9.86 11: push %esp 9.87 + CFI_ADJUST_CFA_OFFSET 4 9.88 call evtchn_do_upcall 9.89 add $4,%esp 9.90 + CFI_ADJUST_CFA_OFFSET -4 9.91 jmp ret_from_intr 9.92 + CFI_ENDPROC 9.93 9.94 - ALIGN 9.95 -restore_all_enable_events: 9.96 - __ENABLE_INTERRUPTS 9.97 -scrit: /**** START OF CRITICAL REGION ****/ 9.98 - __TEST_PENDING 9.99 - jnz 14f # process more events if necessary... 9.100 - RESTORE_REGS 9.101 - addl $4, %esp 9.102 -1: iret 9.103 -.section __ex_table,"a" 9.104 - .align 4 9.105 - .long 1b,iret_exc 9.106 -.previous 9.107 -14: __DISABLE_INTERRUPTS 9.108 - jmp 11b 9.109 -ecrit: /**** END OF CRITICAL REGION ****/ 9.110 # [How we do the fixup]. We want to merge the current stack frame with the 9.111 # just-interrupted frame. How we do this depends on where in the critical 9.112 # region the interrupted handler was executing, and so how many saved 9.113 @@ -835,6 +849,7 @@ 4: mov 16(%esp),%gs 9.114 addl $16,%esp # EAX != 0 => Category 2 (Bad IRET) 9.115 jmp iret_exc 9.116 5: addl $16,%esp # EAX == 0 => Category 1 (Bad segment) 9.117 + RING0_INT_FRAME 9.118 pushl $0 9.119 SAVE_ALL 9.120 jmp ret_from_exception 9.121 @@ -860,6 +875,7 @@ 9: xorl %eax,%eax; \ 9.122 .long 4b,9b; \ 9.123 .previous 9.124 #endif 9.125 + CFI_ENDPROC 9.126 9.127 ENTRY(coprocessor_error) 9.128 RING0_INT_FRAME 9.129 @@ -1187,8 +1203,11 @@ ENDPROC(arch_unwind_init_running) 9.130 #endif 9.131 9.132 ENTRY(fixup_4gb_segment) 9.133 + RING0_EC_FRAME 9.134 pushl $do_fixup_4gb_segment 9.135 + CFI_ADJUST_CFA_OFFSET 4 9.136 jmp error_code 9.137 + CFI_ENDPROC 9.138 9.139 .section .rodata,"a" 9.140 .align 4
10.1 --- a/linux-2.6-xen-sparse/arch/i386/kernel/head-xen.S Mon Mar 05 12:49:12 2007 -0600 10.2 +++ b/linux-2.6-xen-sparse/arch/i386/kernel/head-xen.S Thu Mar 08 14:39:52 2007 -0600 10.3 @@ -9,11 +9,10 @@ 10.4 #include <asm/cache.h> 10.5 #include <asm/thread_info.h> 10.6 #include <asm/asm-offsets.h> 10.7 +#include <asm/dwarf2.h> 10.8 #include <xen/interface/xen.h> 10.9 #include <xen/interface/elfnote.h> 10.10 10.11 -#define _PAGE_PRESENT 0x1 10.12 - 10.13 /* 10.14 * References to members of the new_cpu_data structure. 10.15 */ 10.16 @@ -65,14 +64,13 @@ ENTRY(startup_32) 10.17 10.18 pushl %eax # fake return address 10.19 jmp start_kernel 10.20 -L6: 10.21 - jmp L6 # main should never return here, but 10.22 - # just in case, we know what happens. 10.23 10.24 #define HYPERCALL_PAGE_OFFSET 0x1000 10.25 .org HYPERCALL_PAGE_OFFSET 10.26 ENTRY(hypercall_page) 10.27 + CFI_STARTPROC 10.28 .skip 0x1000 10.29 + CFI_ENDPROC 10.30 10.31 /* 10.32 * Real beginning of normal "text" segment
11.1 --- a/linux-2.6-xen-sparse/arch/i386/kernel/setup-xen.c Mon Mar 05 12:49:12 2007 -0600 11.2 +++ b/linux-2.6-xen-sparse/arch/i386/kernel/setup-xen.c Thu Mar 08 14:39:52 2007 -0600 11.3 @@ -1847,6 +1847,9 @@ static __init int add_pcspkr(void) 11.4 struct platform_device *pd; 11.5 int ret; 11.6 11.7 + if (!is_initial_xendomain()) 11.8 + return 0; 11.9 + 11.10 pd = platform_device_alloc("pcspkr", -1); 11.11 if (!pd) 11.12 return -ENOMEM;
12.1 --- a/linux-2.6-xen-sparse/arch/i386/kernel/time-xen.c Mon Mar 05 12:49:12 2007 -0600 12.2 +++ b/linux-2.6-xen-sparse/arch/i386/kernel/time-xen.c Thu Mar 08 14:39:52 2007 -0600 12.3 @@ -1022,16 +1022,21 @@ void halt(void) 12.4 } 12.5 EXPORT_SYMBOL(halt); 12.6 12.7 -/* No locking required. We are only CPU running, and interrupts are off. */ 12.8 +/* No locking required. Interrupts are disabled on all CPUs. */ 12.9 void time_resume(void) 12.10 { 12.11 + unsigned int cpu; 12.12 + 12.13 init_cpu_khz(); 12.14 12.15 - get_time_values_from_xen(0); 12.16 + for_each_online_cpu(cpu) { 12.17 + get_time_values_from_xen(cpu); 12.18 + per_cpu(processed_system_time, cpu) = 12.19 + per_cpu(shadow_time, 0).system_timestamp; 12.20 + init_missing_ticks_accounting(cpu); 12.21 + } 12.22 12.23 processed_system_time = per_cpu(shadow_time, 0).system_timestamp; 12.24 - per_cpu(processed_system_time, 0) = processed_system_time; 12.25 - init_missing_ticks_accounting(0); 12.26 12.27 update_wallclock(); 12.28 }
13.1 --- a/linux-2.6-xen-sparse/arch/i386/mm/ioremap-xen.c Mon Mar 05 12:49:12 2007 -0600 13.2 +++ b/linux-2.6-xen-sparse/arch/i386/mm/ioremap-xen.c Thu Mar 08 14:39:52 2007 -0600 13.3 @@ -374,8 +374,6 @@ void iounmap(volatile void __iomem *addr 13.4 } 13.5 EXPORT_SYMBOL(iounmap); 13.6 13.7 -#ifdef __i386__ 13.8 - 13.9 void __init *bt_ioremap(unsigned long phys_addr, unsigned long size) 13.10 { 13.11 unsigned long offset, last_addr; 13.12 @@ -443,5 +441,3 @@ void __init bt_iounmap(void *addr, unsig 13.13 --nrpages; 13.14 } 13.15 } 13.16 - 13.17 -#endif /* __i386__ */
14.1 --- a/linux-2.6-xen-sparse/arch/i386/mm/pgtable-xen.c Mon Mar 05 12:49:12 2007 -0600 14.2 +++ b/linux-2.6-xen-sparse/arch/i386/mm/pgtable-xen.c Thu Mar 08 14:39:52 2007 -0600 14.3 @@ -573,64 +573,67 @@ void make_pages_writable(void *va, unsig 14.4 } 14.5 } 14.6 14.7 -static inline int pgd_walk_set_prot(struct page *page, pgprot_t flags) 14.8 +static inline void pgd_walk_set_prot(struct page *page, pgprot_t flags) 14.9 { 14.10 unsigned long pfn = page_to_pfn(page); 14.11 - 14.12 - if (PageHighMem(page)) 14.13 - return pgprot_val(flags) & _PAGE_RW 14.14 - ? test_and_clear_bit(PG_pinned, &page->flags) 14.15 - : !test_and_set_bit(PG_pinned, &page->flags); 14.16 + int rc; 14.17 14.18 - BUG_ON(HYPERVISOR_update_va_mapping( 14.19 - (unsigned long)__va(pfn << PAGE_SHIFT), 14.20 - pfn_pte(pfn, flags), 0)); 14.21 - 14.22 - return 0; 14.23 + if (PageHighMem(page)) { 14.24 + if (pgprot_val(flags) & _PAGE_RW) 14.25 + clear_bit(PG_pinned, &page->flags); 14.26 + else 14.27 + set_bit(PG_pinned, &page->flags); 14.28 + } else { 14.29 + rc = HYPERVISOR_update_va_mapping( 14.30 + (unsigned long)__va(pfn << PAGE_SHIFT), 14.31 + pfn_pte(pfn, flags), 0); 14.32 + if (rc) 14.33 + BUG(); 14.34 + } 14.35 } 14.36 14.37 -static int pgd_walk(pgd_t *pgd_base, pgprot_t flags) 14.38 +static void pgd_walk(pgd_t *pgd_base, pgprot_t flags) 14.39 { 14.40 pgd_t *pgd = pgd_base; 14.41 pud_t *pud; 14.42 pmd_t *pmd; 14.43 - int g, u, m, flush; 14.44 + int g, u, m, rc; 14.45 14.46 if (xen_feature(XENFEAT_auto_translated_physmap)) 14.47 return 0; 14.48 14.49 - for (g = 0, flush = 0; g < USER_PTRS_PER_PGD; g++, pgd++) { 14.50 + for (g = 0; g < USER_PTRS_PER_PGD; g++, pgd++) { 14.51 if (pgd_none(*pgd)) 14.52 continue; 14.53 pud = pud_offset(pgd, 0); 14.54 if (PTRS_PER_PUD > 1) /* not folded */ 14.55 - flush |= pgd_walk_set_prot(virt_to_page(pud),flags); 14.56 + pgd_walk_set_prot(virt_to_page(pud),flags); 14.57 for (u = 0; u < PTRS_PER_PUD; u++, pud++) { 14.58 if (pud_none(*pud)) 14.59 continue; 14.60 pmd = pmd_offset(pud, 0); 14.61 if (PTRS_PER_PMD > 1) /* not folded */ 14.62 - flush |= pgd_walk_set_prot(virt_to_page(pmd),flags); 14.63 + pgd_walk_set_prot(virt_to_page(pmd),flags); 14.64 for (m = 0; m < PTRS_PER_PMD; m++, pmd++) { 14.65 if (pmd_none(*pmd)) 14.66 continue; 14.67 - flush |= pgd_walk_set_prot(pmd_page(*pmd),flags); 14.68 + pgd_walk_set_prot(pmd_page(*pmd),flags); 14.69 } 14.70 } 14.71 } 14.72 14.73 - BUG_ON(HYPERVISOR_update_va_mapping( 14.74 + rc = HYPERVISOR_update_va_mapping( 14.75 (unsigned long)pgd_base, 14.76 pfn_pte(virt_to_phys(pgd_base)>>PAGE_SHIFT, flags), 14.77 - UVMF_TLB_FLUSH)); 14.78 - 14.79 - return flush; 14.80 + UVMF_TLB_FLUSH); 14.81 + if (rc) 14.82 + BUG(); 14.83 } 14.84 14.85 static void __pgd_pin(pgd_t *pgd) 14.86 { 14.87 - if (pgd_walk(pgd, PAGE_KERNEL_RO)) 14.88 - kmap_flush_unused(); 14.89 + pgd_walk(pgd, PAGE_KERNEL_RO); 14.90 + kmap_flush_unused(); 14.91 xen_pgd_pin(__pa(pgd)); 14.92 set_bit(PG_pinned, &virt_to_page(pgd)->flags); 14.93 } 14.94 @@ -638,8 +641,7 @@ static void __pgd_pin(pgd_t *pgd) 14.95 static void __pgd_unpin(pgd_t *pgd) 14.96 { 14.97 xen_pgd_unpin(__pa(pgd)); 14.98 - if (pgd_walk(pgd, PAGE_KERNEL)) 14.99 - kmap_flush_unused(); 14.100 + pgd_walk(pgd, PAGE_KERNEL); 14.101 clear_bit(PG_pinned, &virt_to_page(pgd)->flags); 14.102 } 14.103
15.1 --- a/linux-2.6-xen-sparse/arch/x86_64/Kconfig Mon Mar 05 12:49:12 2007 -0600 15.2 +++ b/linux-2.6-xen-sparse/arch/x86_64/Kconfig Thu Mar 08 14:39:52 2007 -0600 15.3 @@ -450,7 +450,7 @@ config CALGARY_IOMMU 15.4 bool "IBM Calgary IOMMU support" 15.5 default y 15.6 select SWIOTLB 15.7 - depends on PCI && EXPERIMENTAL 15.8 + depends on PCI && !X86_64_XEN && EXPERIMENTAL 15.9 help 15.10 Support for hardware IOMMUs in IBM's xSeries x366 and x460 15.11 systems. Needed to run systems with more than 3GB of memory
16.1 --- a/linux-2.6-xen-sparse/arch/x86_64/kernel/Makefile Mon Mar 05 12:49:12 2007 -0600 16.2 +++ b/linux-2.6-xen-sparse/arch/x86_64/kernel/Makefile Thu Mar 08 14:39:52 2007 -0600 16.3 @@ -61,9 +61,7 @@ ifdef CONFIG_XEN 16.4 time-y += ../../i386/kernel/time-xen.o 16.5 pci-dma-y += ../../i386/kernel/pci-dma-xen.o 16.6 microcode-$(subst m,y,$(CONFIG_MICROCODE)) := ../../i386/kernel/microcode-xen.o 16.7 -intel_cacheinfo-y := ../../i386/kernel/cpu/intel_cacheinfo-xen.o 16.8 quirks-y := ../../i386/kernel/quirks-xen.o 16.9 -alternative-y := ../../i386/kernel/alternative-xen.o 16.10 16.11 n-obj-xen := i8259.o reboot.o i8237.o smpboot.o trampoline.o 16.12
17.1 --- a/linux-2.6-xen-sparse/arch/x86_64/kernel/entry-xen.S Mon Mar 05 12:49:12 2007 -0600 17.2 +++ b/linux-2.6-xen-sparse/arch/x86_64/kernel/entry-xen.S Thu Mar 08 14:39:52 2007 -0600 17.3 @@ -116,19 +116,21 @@ NMI_MASK = 0x80000000 17.4 CFI_ADJUST_CFA_OFFSET -(6*8) 17.5 .endm 17.6 17.7 - .macro CFI_DEFAULT_STACK start=1 17.8 + .macro CFI_DEFAULT_STACK start=1,adj=0 17.9 .if \start 17.10 CFI_STARTPROC simple 17.11 - CFI_DEF_CFA rsp,SS+8 17.12 + CFI_DEF_CFA rsp,SS+8-(\adj*ARGOFFSET) 17.13 .else 17.14 - CFI_DEF_CFA_OFFSET SS+8 17.15 + CFI_DEF_CFA_OFFSET SS+8-(\adj*ARGOFFSET) 17.16 .endif 17.17 + .if \adj == 0 17.18 CFI_REL_OFFSET r15,R15 17.19 CFI_REL_OFFSET r14,R14 17.20 CFI_REL_OFFSET r13,R13 17.21 CFI_REL_OFFSET r12,R12 17.22 CFI_REL_OFFSET rbp,RBP 17.23 CFI_REL_OFFSET rbx,RBX 17.24 + .endif 17.25 CFI_REL_OFFSET r11,R11 17.26 CFI_REL_OFFSET r10,R10 17.27 CFI_REL_OFFSET r9,R9 17.28 @@ -363,8 +365,8 @@ ENTRY(int_ret_from_sys_call) 17.29 CFI_REL_OFFSET r9,R9-ARGOFFSET 17.30 CFI_REL_OFFSET r10,R10-ARGOFFSET 17.31 CFI_REL_OFFSET r11,R11-ARGOFFSET 17.32 + XEN_BLOCK_EVENTS(%rsi) 17.33 TRACE_IRQS_OFF 17.34 - XEN_BLOCK_EVENTS(%rsi) 17.35 testb $3,CS-ARGOFFSET(%rsp) 17.36 jnz 1f 17.37 /* Need to set the proper %ss (not NULL) for ring 3 iretq */ 17.38 @@ -534,7 +536,7 @@ END(stub_rt_sigreturn) 17.39 */ 17.40 17.41 retint_check: 17.42 - CFI_DEFAULT_STACK 17.43 + CFI_DEFAULT_STACK adj=1 17.44 movl threadinfo_flags(%rcx),%edx 17.45 andl %edi,%edx 17.46 CFI_REMEMBER_STATE 17.47 @@ -838,6 +840,7 @@ ENTRY(error_entry) 17.48 CFI_REL_OFFSET r15,R15 17.49 #if 0 17.50 cmpl $__KERNEL_CS,CS(%rsp) 17.51 + CFI_REMEMBER_STATE 17.52 je error_kernelspace 17.53 #endif 17.54 error_call_handler: 17.55 @@ -864,7 +867,7 @@ error_exit: 17.56 TRACE_IRQS_IRETQ 17.57 jmp retint_restore_args 17.58 17.59 -error_kernelspace: 17.60 +#if 0 17.61 /* 17.62 * We need to re-write the logic here because we don't do iretq to 17.63 * to return to user mode. It's still possible that we get trap/fault 17.64 @@ -872,7 +875,8 @@ error_kernelspace: 17.65 * for example). 17.66 * 17.67 */ 17.68 -#if 0 17.69 + CFI_RESTORE_STATE 17.70 +error_kernelspace: 17.71 incl %ebx 17.72 /* There are two places in the kernel that can potentially fault with 17.73 usergs. Handle them here. The exception handlers after 17.74 @@ -888,11 +892,13 @@ error_kernelspace: 17.75 cmpq $gs_change,RIP(%rsp) 17.76 je error_swapgs 17.77 jmp error_sti 17.78 -#endif 17.79 +#endif 17.80 + CFI_ENDPROC 17.81 END(error_entry) 17.82 17.83 ENTRY(hypervisor_callback) 17.84 zeroentry do_hypervisor_callback 17.85 +END(hypervisor_callback) 17.86 17.87 /* 17.88 * Copied from arch/xen/i386/kernel/entry.S 17.89 @@ -909,48 +915,66 @@ ENTRY(hypervisor_callback) 17.90 # existing activation in its critical region -- if so, we pop the current 17.91 # activation and restart the handler using the previous one. 17.92 ENTRY(do_hypervisor_callback) # do_hypervisor_callback(struct *pt_regs) 17.93 + CFI_STARTPROC 17.94 # Since we don't modify %rdi, evtchn_do_upall(struct *pt_regs) will 17.95 # see the correct pointer to the pt_regs 17.96 movq %rdi, %rsp # we don't return, adjust the stack frame 17.97 -11: movq %gs:pda_irqstackptr,%rax 17.98 - incl %gs:pda_irqcount 17.99 - cmovzq %rax,%rsp 17.100 - pushq %rdi 17.101 + CFI_ENDPROC 17.102 + CFI_DEFAULT_STACK 17.103 +11: incl %gs:pda_irqcount 17.104 + movq %rsp,%rbp 17.105 + CFI_DEF_CFA_REGISTER rbp 17.106 + cmovzq %gs:pda_irqstackptr,%rsp 17.107 + pushq %rbp # backlink for old unwinder 17.108 call evtchn_do_upcall 17.109 popq %rsp 17.110 + CFI_DEF_CFA_REGISTER rsp 17.111 decl %gs:pda_irqcount 17.112 jmp error_exit 17.113 + CFI_ENDPROC 17.114 +END(do_hypervisor_callback) 17.115 17.116 #ifdef CONFIG_X86_LOCAL_APIC 17.117 KPROBE_ENTRY(nmi) 17.118 zeroentry do_nmi_callback 17.119 ENTRY(do_nmi_callback) 17.120 + CFI_STARTPROC 17.121 addq $8, %rsp 17.122 + CFI_ENDPROC 17.123 + CFI_DEFAULT_STACK 17.124 call do_nmi 17.125 orl $NMI_MASK,EFLAGS(%rsp) 17.126 RESTORE_REST 17.127 XEN_BLOCK_EVENTS(%rsi) 17.128 + TRACE_IRQS_OFF 17.129 GET_THREAD_INFO(%rcx) 17.130 jmp retint_restore_args 17.131 + CFI_ENDPROC 17.132 .previous .text 17.133 +END(nmi) 17.134 #endif 17.135 17.136 ALIGN 17.137 restore_all_enable_events: 17.138 + CFI_DEFAULT_STACK adj=1 17.139 + TRACE_IRQS_ON 17.140 XEN_UNBLOCK_EVENTS(%rsi) # %rsi is already set up... 17.141 17.142 scrit: /**** START OF CRITICAL REGION ****/ 17.143 XEN_TEST_PENDING(%rsi) 17.144 + CFI_REMEMBER_STATE 17.145 jnz 14f # process more events if necessary... 17.146 XEN_PUT_VCPU_INFO(%rsi) 17.147 RESTORE_ARGS 0,8,0 17.148 HYPERVISOR_IRET 0 17.149 17.150 + CFI_RESTORE_STATE 17.151 14: XEN_LOCKED_BLOCK_EVENTS(%rsi) 17.152 XEN_PUT_VCPU_INFO(%rsi) 17.153 SAVE_REST 17.154 movq %rsp,%rdi # set the argument again 17.155 jmp 11b 17.156 + CFI_ENDPROC 17.157 ecrit: /**** END OF CRITICAL REGION ****/ 17.158 # At this point, unlike on x86-32, we don't do the fixup to simplify the 17.159 # code and the stack frame is more complex on x86-64. 17.160 @@ -970,8 +994,12 @@ ecrit: /**** END OF CRITICAL REGION *** 17.161 # We distinguish between categories by comparing each saved segment register 17.162 # with its current contents: any discrepancy means we in category 1. 17.163 ENTRY(failsafe_callback) 17.164 + _frame (RIP-0x30) 17.165 + CFI_REL_OFFSET rcx, 0 17.166 + CFI_REL_OFFSET r11, 8 17.167 movw %ds,%cx 17.168 cmpw %cx,0x10(%rsp) 17.169 + CFI_REMEMBER_STATE 17.170 jne 1f 17.171 movw %es,%cx 17.172 cmpw %cx,0x18(%rsp) 17.173 @@ -984,17 +1012,26 @@ ENTRY(failsafe_callback) 17.174 jne 1f 17.175 /* All segments match their saved values => Category 2 (Bad IRET). */ 17.176 movq (%rsp),%rcx 17.177 + CFI_RESTORE rcx 17.178 movq 8(%rsp),%r11 17.179 + CFI_RESTORE r11 17.180 addq $0x30,%rsp 17.181 + CFI_ADJUST_CFA_OFFSET -0x30 17.182 movq $11,%rdi /* SIGSEGV */ 17.183 jmp do_exit 17.184 + CFI_RESTORE_STATE 17.185 1: /* Segment mismatch => Category 1 (Bad segment). Retry the IRET. */ 17.186 movq (%rsp),%rcx 17.187 + CFI_RESTORE rcx 17.188 movq 8(%rsp),%r11 17.189 + CFI_RESTORE r11 17.190 addq $0x30,%rsp 17.191 + CFI_ADJUST_CFA_OFFSET -0x30 17.192 pushq $0 17.193 + CFI_ADJUST_CFA_OFFSET 8 17.194 SAVE_ALL 17.195 jmp error_exit 17.196 + CFI_ENDPROC 17.197 #if 0 17.198 .section __ex_table,"a" 17.199 .align 8 17.200 @@ -1117,12 +1154,12 @@ END(device_not_available) 17.201 17.202 /* runs on exception stack */ 17.203 KPROBE_ENTRY(debug) 17.204 - INTR_FRAME 17.205 -/* pushq $0 17.206 +/* INTR_FRAME 17.207 + pushq $0 17.208 CFI_ADJUST_CFA_OFFSET 8 */ 17.209 zeroentry do_debug 17.210 -/* paranoid_exit */ 17.211 - CFI_ENDPROC 17.212 +/* paranoidexit 17.213 + CFI_ENDPROC */ 17.214 END(debug) 17.215 .previous .text 17.216 17.217 @@ -1144,12 +1181,12 @@ END(nmi) 17.218 #endif 17.219 17.220 KPROBE_ENTRY(int3) 17.221 - INTR_FRAME 17.222 -/* pushq $0 17.223 +/* INTR_FRAME 17.224 + pushq $0 17.225 CFI_ADJUST_CFA_OFFSET 8 */ 17.226 zeroentry do_int3 17.227 -/* jmp paranoid_exit1 */ 17.228 - CFI_ENDPROC 17.229 +/* jmp paranoid_exit1 17.230 + CFI_ENDPROC */ 17.231 END(int3) 17.232 .previous .text 17.233 17.234 @@ -1193,9 +1230,11 @@ END(segment_not_present) 17.235 17.236 /* runs on exception stack */ 17.237 ENTRY(stack_segment) 17.238 - XCPT_FRAME 17.239 +/* XCPT_FRAME 17.240 + paranoidentry do_stack_segment */ 17.241 errorentry do_stack_segment 17.242 - CFI_ENDPROC 17.243 +/* jmp paranoid_exit1 17.244 + CFI_ENDPROC */ 17.245 END(stack_segment) 17.246 17.247 KPROBE_ENTRY(general_protection)
18.1 --- a/linux-2.6-xen-sparse/arch/x86_64/kernel/head-xen.S Mon Mar 05 12:49:12 2007 -0600 18.2 +++ b/linux-2.6-xen-sparse/arch/x86_64/kernel/head-xen.S Thu Mar 08 14:39:52 2007 -0600 18.3 @@ -22,11 +22,9 @@ 18.4 #include <asm/page.h> 18.5 #include <asm/msr.h> 18.6 #include <asm/cache.h> 18.7 - 18.8 +#include <asm/dwarf2.h> 18.9 #include <xen/interface/elfnote.h> 18.10 18.11 -#define _PAGE_PRESENT 0x1 18.12 - 18.13 .section .bootstrap.text, "ax", @progbits 18.14 .code64 18.15 #define VIRT_ENTRY_OFFSET 0x0 18.16 @@ -42,6 +40,7 @@ ENTRY(_start) 18.17 /* rsi is pointer to startup info structure. 18.18 pass it to C */ 18.19 movq %rsi,%rdi 18.20 + pushq $0 # fake return address 18.21 jmp x86_64_start_kernel 18.22 18.23 ENTRY(stext) 18.24 @@ -82,7 +81,25 @@ NEXT_PAGE(level2_kernel_pgt) 18.25 .fill 512,8,0 18.26 18.27 NEXT_PAGE(hypercall_page) 18.28 - .fill 512,8,0 18.29 + CFI_STARTPROC 18.30 + .rept 0x1000 / 0x20 18.31 + .skip 1 /* push %rcx */ 18.32 + CFI_ADJUST_CFA_OFFSET 8 18.33 + CFI_REL_OFFSET rcx,0 18.34 + .skip 2 /* push %r11 */ 18.35 + CFI_ADJUST_CFA_OFFSET 8 18.36 + CFI_REL_OFFSET rcx,0 18.37 + .skip 5 /* mov $#,%eax */ 18.38 + .skip 2 /* syscall */ 18.39 + .skip 2 /* pop %r11 */ 18.40 + CFI_ADJUST_CFA_OFFSET -8 18.41 + CFI_RESTORE r11 18.42 + .skip 1 /* pop %rcx */ 18.43 + CFI_ADJUST_CFA_OFFSET -8 18.44 + CFI_RESTORE rcx 18.45 + .align 0x20,0 /* ret */ 18.46 + .endr 18.47 + CFI_ENDPROC 18.48 18.49 #undef NEXT_PAGE 18.50
19.1 --- a/linux-2.6-xen-sparse/arch/x86_64/kernel/init_task.c Mon Mar 05 12:49:12 2007 -0600 19.2 +++ b/linux-2.6-xen-sparse/arch/x86_64/kernel/init_task.c Thu Mar 08 14:39:52 2007 -0600 19.3 @@ -47,11 +47,11 @@ EXPORT_SYMBOL(init_task); 19.4 * on exact cacheline boundaries, to eliminate cacheline ping-pong. 19.5 */ 19.6 DEFINE_PER_CPU(struct tss_struct, init_tss) ____cacheline_internodealigned_in_smp = INIT_TSS; 19.7 -#endif 19.8 19.9 /* Copies of the original ist values from the tss are only accessed during 19.10 * debugging, no special alignment required. 19.11 */ 19.12 DEFINE_PER_CPU(struct orig_ist, orig_ist); 19.13 +#endif 19.14 19.15 #define ALIGN_TO_4K __attribute__((section(".data.init_task")))
20.1 --- a/linux-2.6-xen-sparse/arch/x86_64/kernel/setup-xen.c Mon Mar 05 12:49:12 2007 -0600 20.2 +++ b/linux-2.6-xen-sparse/arch/x86_64/kernel/setup-xen.c Thu Mar 08 14:39:52 2007 -0600 20.3 @@ -676,7 +676,8 @@ void __init setup_arch(char **cmdline_p) 20.4 20.5 init_memory_mapping(0, (end_pfn_map << PAGE_SHIFT)); 20.6 20.7 - /* dmi_scan_machine(); */ 20.8 + if (is_initial_xendomain()) 20.9 + dmi_scan_machine(); 20.10 20.11 #ifdef CONFIG_ACPI_NUMA 20.12 /* 20.13 @@ -691,6 +692,14 @@ void __init setup_arch(char **cmdline_p) 20.14 contig_initmem_init(0, end_pfn); 20.15 #endif 20.16 20.17 +#ifdef CONFIG_XEN 20.18 + /* 20.19 + * Reserve kernel, physmap, start info, initial page tables, and 20.20 + * direct mapping. 20.21 + */ 20.22 + reserve_bootmem_generic(__pa_symbol(&_text), 20.23 + (table_end << PAGE_SHIFT) - __pa_symbol(&_text)); 20.24 +#else 20.25 /* Reserve direct mapping */ 20.26 reserve_bootmem_generic(table_start << PAGE_SHIFT, 20.27 (table_end - table_start) << PAGE_SHIFT); 20.28 @@ -699,12 +708,6 @@ void __init setup_arch(char **cmdline_p) 20.29 reserve_bootmem_generic(__pa_symbol(&_text), 20.30 __pa_symbol(&_end) - __pa_symbol(&_text)); 20.31 20.32 -#ifdef CONFIG_XEN 20.33 - /* reserve physmap, start info and initial page tables */ 20.34 - reserve_bootmem_generic(__pa_symbol(&_end), 20.35 - (table_start << PAGE_SHIFT) - 20.36 - __pa_symbol(&_end)); 20.37 -#else 20.38 /* 20.39 * reserve physical page 0 - it's a special BIOS page on many boxes, 20.40 * enabling clean reboots, SMP operation, laptop functions. 20.41 @@ -1625,13 +1628,6 @@ struct seq_operations cpuinfo_op = { 20.42 .show = show_cpuinfo, 20.43 }; 20.44 20.45 -static int __init run_dmi_scan(void) 20.46 -{ 20.47 - dmi_scan_machine(); 20.48 - return 0; 20.49 -} 20.50 -core_initcall(run_dmi_scan); 20.51 - 20.52 #if defined(CONFIG_INPUT_PCSPKR) || defined(CONFIG_INPUT_PCSPKR_MODULE) 20.53 #include <linux/platform_device.h> 20.54 static __init int add_pcspkr(void) 20.55 @@ -1639,6 +1635,9 @@ static __init int add_pcspkr(void) 20.56 struct platform_device *pd; 20.57 int ret; 20.58 20.59 + if (!is_initial_xendomain()) 20.60 + return 0; 20.61 + 20.62 pd = platform_device_alloc("pcspkr", -1); 20.63 if (!pd) 20.64 return -ENOMEM;
21.1 --- a/linux-2.6-xen-sparse/arch/x86_64/kernel/x8664_ksyms-xen.c Mon Mar 05 12:49:12 2007 -0600 21.2 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 21.3 @@ -1,59 +0,0 @@ 21.4 -/* Exports for assembly files. 21.5 - All C exports should go in the respective C files. */ 21.6 - 21.7 -#include <linux/module.h> 21.8 -#include <linux/smp.h> 21.9 - 21.10 -#include <asm/semaphore.h> 21.11 -#include <asm/processor.h> 21.12 -#include <asm/uaccess.h> 21.13 -#include <asm/pgtable.h> 21.14 - 21.15 -EXPORT_SYMBOL(kernel_thread); 21.16 - 21.17 -EXPORT_SYMBOL(__down_failed); 21.18 -EXPORT_SYMBOL(__down_failed_interruptible); 21.19 -EXPORT_SYMBOL(__down_failed_trylock); 21.20 -EXPORT_SYMBOL(__up_wakeup); 21.21 - 21.22 -EXPORT_SYMBOL(__get_user_1); 21.23 -EXPORT_SYMBOL(__get_user_2); 21.24 -EXPORT_SYMBOL(__get_user_4); 21.25 -EXPORT_SYMBOL(__get_user_8); 21.26 -EXPORT_SYMBOL(__put_user_1); 21.27 -EXPORT_SYMBOL(__put_user_2); 21.28 -EXPORT_SYMBOL(__put_user_4); 21.29 -EXPORT_SYMBOL(__put_user_8); 21.30 - 21.31 -EXPORT_SYMBOL(copy_user_generic); 21.32 -EXPORT_SYMBOL(copy_from_user); 21.33 -EXPORT_SYMBOL(copy_to_user); 21.34 - 21.35 -EXPORT_SYMBOL(copy_page); 21.36 -EXPORT_SYMBOL(clear_page); 21.37 - 21.38 -#ifdef CONFIG_SMP 21.39 -extern void FASTCALL( __write_lock_failed(rwlock_t *rw)); 21.40 -extern void FASTCALL( __read_lock_failed(rwlock_t *rw)); 21.41 -EXPORT_SYMBOL(__write_lock_failed); 21.42 -EXPORT_SYMBOL(__read_lock_failed); 21.43 -#endif 21.44 - 21.45 -/* Export string functions. We normally rely on gcc builtin for most of these, 21.46 - but gcc sometimes decides not to inline them. */ 21.47 -#undef memcpy 21.48 -#undef memset 21.49 -#undef memmove 21.50 - 21.51 -extern void * memset(void *,int,__kernel_size_t); 21.52 -extern void * memcpy(void *,const void *,__kernel_size_t); 21.53 -extern void * __memcpy(void *,const void *,__kernel_size_t); 21.54 - 21.55 -EXPORT_SYMBOL(memset); 21.56 -EXPORT_SYMBOL(memcpy); 21.57 -EXPORT_SYMBOL(__memcpy); 21.58 - 21.59 -EXPORT_SYMBOL(empty_zero_page); 21.60 - 21.61 -EXPORT_SYMBOL(load_gs_index); 21.62 -
22.1 --- a/linux-2.6-xen-sparse/arch/x86_64/mm/init-xen.c Mon Mar 05 12:49:12 2007 -0600 22.2 +++ b/linux-2.6-xen-sparse/arch/x86_64/mm/init-xen.c Thu Mar 08 14:39:52 2007 -0600 22.3 @@ -102,14 +102,11 @@ static void __meminit early_make_page_re 22.4 BUG(); 22.5 } 22.6 22.7 -void make_page_readonly(void *va, unsigned int feature) 22.8 +static void __make_page_readonly(void *va) 22.9 { 22.10 pgd_t *pgd; pud_t *pud; pmd_t *pmd; pte_t pte, *ptep; 22.11 unsigned long addr = (unsigned long) va; 22.12 22.13 - if (xen_feature(feature)) 22.14 - return; 22.15 - 22.16 pgd = pgd_offset_k(addr); 22.17 pud = pud_offset(pgd, addr); 22.18 pmd = pmd_offset(pud, addr); 22.19 @@ -120,17 +117,14 @@ void make_page_readonly(void *va, unsign 22.20 xen_l1_entry_update(ptep, pte); /* fallback */ 22.21 22.22 if ((addr >= VMALLOC_START) && (addr < VMALLOC_END)) 22.23 - make_page_readonly(__va(pte_pfn(pte) << PAGE_SHIFT), feature); 22.24 + __make_page_readonly(__va(pte_pfn(pte) << PAGE_SHIFT)); 22.25 } 22.26 22.27 -void make_page_writable(void *va, unsigned int feature) 22.28 +static void __make_page_writable(void *va) 22.29 { 22.30 pgd_t *pgd; pud_t *pud; pmd_t *pmd; pte_t pte, *ptep; 22.31 unsigned long addr = (unsigned long) va; 22.32 22.33 - if (xen_feature(feature)) 22.34 - return; 22.35 - 22.36 pgd = pgd_offset_k(addr); 22.37 pud = pud_offset(pgd, addr); 22.38 pmd = pmd_offset(pud, addr); 22.39 @@ -141,7 +135,19 @@ void make_page_writable(void *va, unsign 22.40 xen_l1_entry_update(ptep, pte); /* fallback */ 22.41 22.42 if ((addr >= VMALLOC_START) && (addr < VMALLOC_END)) 22.43 - make_page_writable(__va(pte_pfn(pte) << PAGE_SHIFT), feature); 22.44 + __make_page_writable(__va(pte_pfn(pte) << PAGE_SHIFT)); 22.45 +} 22.46 + 22.47 +void make_page_readonly(void *va, unsigned int feature) 22.48 +{ 22.49 + if (!xen_feature(feature)) 22.50 + __make_page_readonly(va); 22.51 +} 22.52 + 22.53 +void make_page_writable(void *va, unsigned int feature) 22.54 +{ 22.55 + if (!xen_feature(feature)) 22.56 + __make_page_writable(va); 22.57 } 22.58 22.59 void make_pages_readonly(void *va, unsigned nr, unsigned int feature) 22.60 @@ -150,7 +156,7 @@ void make_pages_readonly(void *va, unsig 22.61 return; 22.62 22.63 while (nr-- != 0) { 22.64 - make_page_readonly(va, feature); 22.65 + __make_page_readonly(va); 22.66 va = (void*)((unsigned long)va + PAGE_SIZE); 22.67 } 22.68 } 22.69 @@ -161,7 +167,7 @@ void make_pages_writable(void *va, unsig 22.70 return; 22.71 22.72 while (nr-- != 0) { 22.73 - make_page_writable(va, feature); 22.74 + __make_page_writable(va); 22.75 va = (void*)((unsigned long)va + PAGE_SIZE); 22.76 } 22.77 } 22.78 @@ -208,7 +214,11 @@ static __init void *spp_getpage(void) 22.79 void *ptr; 22.80 if (after_bootmem) 22.81 ptr = (void *) get_zeroed_page(GFP_ATOMIC); 22.82 - else 22.83 + else if (start_pfn < table_end) { 22.84 + ptr = __va(start_pfn << PAGE_SHIFT); 22.85 + start_pfn++; 22.86 + memset(ptr, 0, PAGE_SIZE); 22.87 + } else 22.88 ptr = alloc_bootmem_pages(PAGE_SIZE); 22.89 if (!ptr || ((unsigned long)ptr & ~PAGE_MASK)) 22.90 panic("set_pte_phys: cannot allocate page data %s\n", after_bootmem?"after bootmem":""); 22.91 @@ -299,36 +309,26 @@ static __init void set_pte_phys_ma(unsig 22.92 22.93 pmd = (pmd_t *) spp_getpage(); 22.94 make_page_readonly(pmd, XENFEAT_writable_page_tables); 22.95 - 22.96 set_pud(pud, __pud(__pa(pmd) | _KERNPG_TABLE | _PAGE_USER)); 22.97 - 22.98 if (pmd != pmd_offset(pud, 0)) { 22.99 printk("PAGETABLE BUG #01! %p <-> %p\n", pmd, pmd_offset(pud,0)); 22.100 return; 22.101 } 22.102 } 22.103 pmd = pmd_offset(pud, vaddr); 22.104 - 22.105 if (pmd_none(*pmd)) { 22.106 pte = (pte_t *) spp_getpage(); 22.107 make_page_readonly(pte, XENFEAT_writable_page_tables); 22.108 - 22.109 set_pmd(pmd, __pmd(__pa(pte) | _KERNPG_TABLE | _PAGE_USER)); 22.110 if (pte != pte_offset_kernel(pmd, 0)) { 22.111 printk("PAGETABLE BUG #02!\n"); 22.112 return; 22.113 } 22.114 } 22.115 - 22.116 new_pte = pfn_pte_ma(phys >> PAGE_SHIFT, prot); 22.117 - pte = pte_offset_kernel(pmd, vaddr); 22.118 22.119 - /* 22.120 - * Note that the pte page is already RO, thus we want to use 22.121 - * xen_l1_entry_update(), not set_pte(). 22.122 - */ 22.123 - xen_l1_entry_update(pte, 22.124 - pfn_pte_ma(phys >> PAGE_SHIFT, prot)); 22.125 + pte = pte_offset_kernel(pmd, vaddr); 22.126 + set_pte(pte, new_pte); 22.127 22.128 /* 22.129 * It's enough to flush this one mapping. 22.130 @@ -404,6 +404,7 @@ static inline void __set_pte(pte_t *dst, 22.131 22.132 static inline int make_readonly(unsigned long paddr) 22.133 { 22.134 + extern char __vsyscall_0; 22.135 int readonly = 0; 22.136 22.137 /* Make new page tables read-only. */ 22.138 @@ -420,25 +421,45 @@ static inline int make_readonly(unsigned 22.139 /* 22.140 * No need for writable mapping of kernel image. This also ensures that 22.141 * page and descriptor tables embedded inside don't have writable 22.142 - * mappings. 22.143 + * mappings. Exclude the vsyscall area here, allowing alternative 22.144 + * instruction patching to work. 22.145 */ 22.146 - if ((paddr >= __pa_symbol(&_text)) && (paddr < __pa_symbol(&_end))) 22.147 + if ((paddr >= __pa_symbol(&_text)) && (paddr < __pa_symbol(&_end)) 22.148 + && !(paddr >= __pa_symbol(&__vsyscall_0) 22.149 + && paddr < __pa_symbol(&__vsyscall_0) + PAGE_SIZE)) 22.150 readonly = 1; 22.151 22.152 return readonly; 22.153 } 22.154 22.155 +#ifndef CONFIG_XEN 22.156 /* Must run before zap_low_mappings */ 22.157 __init void *early_ioremap(unsigned long addr, unsigned long size) 22.158 { 22.159 - return ioremap(addr, size); 22.160 + unsigned long map = round_down(addr, LARGE_PAGE_SIZE); 22.161 + 22.162 + /* actually usually some more */ 22.163 + if (size >= LARGE_PAGE_SIZE) { 22.164 + printk("SMBIOS area too long %lu\n", size); 22.165 + return NULL; 22.166 + } 22.167 + set_pmd(temp_mappings[0].pmd, __pmd(map | _KERNPG_TABLE | _PAGE_PSE)); 22.168 + map += LARGE_PAGE_SIZE; 22.169 + set_pmd(temp_mappings[1].pmd, __pmd(map | _KERNPG_TABLE | _PAGE_PSE)); 22.170 + __flush_tlb(); 22.171 + return temp_mappings[0].address + (addr & (LARGE_PAGE_SIZE-1)); 22.172 } 22.173 22.174 /* To avoid virtual aliases later */ 22.175 __init void early_iounmap(void *addr, unsigned long size) 22.176 { 22.177 - iounmap(addr); 22.178 + if ((void *)round_down((unsigned long)addr, LARGE_PAGE_SIZE) != temp_mappings[0].address) 22.179 + printk("early_iounmap: bad address %p\n", addr); 22.180 + set_pmd(temp_mappings[0].pmd, __pmd(0)); 22.181 + set_pmd(temp_mappings[1].pmd, __pmd(0)); 22.182 + __flush_tlb(); 22.183 } 22.184 +#endif 22.185 22.186 static void __meminit 22.187 phys_pmd_init(pmd_t *pmd, unsigned long address, unsigned long end) 22.188 @@ -638,7 +659,7 @@ static void __init extend_init_mapping(u 22.189 22.190 static void __init find_early_table_space(unsigned long end) 22.191 { 22.192 - unsigned long puds, pmds, ptes, tables; 22.193 + unsigned long puds, pmds, ptes, tables; 22.194 22.195 puds = (end + PUD_SIZE - 1) >> PUD_SHIFT; 22.196 pmds = (end + PMD_SIZE - 1) >> PMD_SHIFT; 22.197 @@ -658,12 +679,70 @@ static void __init find_early_table_spac 22.198 (table_start << PAGE_SHIFT) + tables); 22.199 } 22.200 22.201 +static void xen_finish_init_mapping(void) 22.202 +{ 22.203 + unsigned long i, start, end; 22.204 + 22.205 + /* Re-vector virtual addresses pointing into the initial 22.206 + mapping to the just-established permanent ones. */ 22.207 + xen_start_info = __va(__pa(xen_start_info)); 22.208 + xen_start_info->pt_base = (unsigned long) 22.209 + __va(__pa(xen_start_info->pt_base)); 22.210 + if (!xen_feature(XENFEAT_auto_translated_physmap)) { 22.211 + phys_to_machine_mapping = 22.212 + __va(__pa(xen_start_info->mfn_list)); 22.213 + xen_start_info->mfn_list = (unsigned long) 22.214 + phys_to_machine_mapping; 22.215 + } 22.216 + if (xen_start_info->mod_start) 22.217 + xen_start_info->mod_start = (unsigned long) 22.218 + __va(__pa(xen_start_info->mod_start)); 22.219 + 22.220 + /* Destroy the Xen-created mappings beyond the kernel image as 22.221 + * well as the temporary mappings created above. Prevents 22.222 + * overlap with modules area (if init mapping is very big). 22.223 + */ 22.224 + start = PAGE_ALIGN((unsigned long)_end); 22.225 + end = __START_KERNEL_map + (table_end << PAGE_SHIFT); 22.226 + for (; start < end; start += PAGE_SIZE) 22.227 + WARN_ON(HYPERVISOR_update_va_mapping( 22.228 + start, __pte_ma(0), 0)); 22.229 + 22.230 + /* Allocate pte's for initial fixmaps from 'start_pfn' allocator. */ 22.231 + table_end = ~0UL; 22.232 + 22.233 + /* 22.234 + * Prefetch pte's for the bt_ioremap() area. It gets used before the 22.235 + * boot-time allocator is online, so allocate-on-demand would fail. 22.236 + */ 22.237 + for (i = FIX_BTMAP_END; i <= FIX_BTMAP_BEGIN; i++) 22.238 + __set_fixmap(i, 0, __pgprot(0)); 22.239 + 22.240 + /* Switch to the real shared_info page, and clear the dummy page. */ 22.241 + set_fixmap(FIX_SHARED_INFO, xen_start_info->shared_info); 22.242 + HYPERVISOR_shared_info = (shared_info_t *)fix_to_virt(FIX_SHARED_INFO); 22.243 + memset(empty_zero_page, 0, sizeof(empty_zero_page)); 22.244 + 22.245 + /* Set up mapping of lowest 1MB of physical memory. */ 22.246 + for (i = 0; i < NR_FIX_ISAMAPS; i++) 22.247 + if (is_initial_xendomain()) 22.248 + set_fixmap(FIX_ISAMAP_BEGIN - i, i * PAGE_SIZE); 22.249 + else 22.250 + __set_fixmap(FIX_ISAMAP_BEGIN - i, 22.251 + virt_to_mfn(empty_zero_page) 22.252 + << PAGE_SHIFT, 22.253 + PAGE_KERNEL_RO); 22.254 + 22.255 + /* Disable the 'start_pfn' allocator. */ 22.256 + table_end = start_pfn; 22.257 +} 22.258 + 22.259 /* Setup the direct mapping of the physical memory at PAGE_OFFSET. 22.260 This runs before bootmem is initialized and gets pages directly from the 22.261 physical memory. To access them they are temporarily mapped. */ 22.262 void __meminit init_memory_mapping(unsigned long start, unsigned long end) 22.263 { 22.264 - unsigned long next; 22.265 + unsigned long next; 22.266 22.267 Dprintk("init_memory_mapping\n"); 22.268 22.269 @@ -702,31 +781,7 @@ void __meminit init_memory_mapping(unsig 22.270 22.271 if (!after_bootmem) { 22.272 BUG_ON(start_pfn != table_end); 22.273 - 22.274 - /* Re-vector virtual addresses pointing into the initial 22.275 - mapping to the just-established permanent ones. */ 22.276 - xen_start_info = __va(__pa(xen_start_info)); 22.277 - xen_start_info->pt_base = (unsigned long) 22.278 - __va(__pa(xen_start_info->pt_base)); 22.279 - if (!xen_feature(XENFEAT_auto_translated_physmap)) { 22.280 - phys_to_machine_mapping = 22.281 - __va(__pa(xen_start_info->mfn_list)); 22.282 - xen_start_info->mfn_list = (unsigned long) 22.283 - phys_to_machine_mapping; 22.284 - } 22.285 - if (xen_start_info->mod_start) 22.286 - xen_start_info->mod_start = (unsigned long) 22.287 - __va(__pa(xen_start_info->mod_start)); 22.288 - 22.289 - /* Destroy the Xen-created mappings beyond the kernel image as 22.290 - * well as the temporary mappings created above. Prevents 22.291 - * overlap with modules area (if init mapping is very big). 22.292 - */ 22.293 - start = PAGE_ALIGN((unsigned long)_end); 22.294 - end = __START_KERNEL_map + (table_end << PAGE_SHIFT); 22.295 - for (; start < end; start += PAGE_SIZE) 22.296 - WARN_ON(HYPERVISOR_update_va_mapping( 22.297 - start, __pte_ma(0), 0)); 22.298 + xen_finish_init_mapping(); 22.299 } 22.300 22.301 __flush_tlb_all(); 22.302 @@ -805,7 +860,6 @@ size_zones(unsigned long *z, unsigned lo 22.303 void __init paging_init(void) 22.304 { 22.305 unsigned long zones[MAX_NR_ZONES], holes[MAX_NR_ZONES]; 22.306 - int i; 22.307 22.308 memory_present(0, 0, end_pfn); 22.309 sparse_init(); 22.310 @@ -813,22 +867,7 @@ void __init paging_init(void) 22.311 free_area_init_node(0, NODE_DATA(0), zones, 22.312 __pa(PAGE_OFFSET) >> PAGE_SHIFT, holes); 22.313 22.314 - /* Switch to the real shared_info page, and clear the 22.315 - * dummy page. */ 22.316 - set_fixmap(FIX_SHARED_INFO, xen_start_info->shared_info); 22.317 - HYPERVISOR_shared_info = (shared_info_t *)fix_to_virt(FIX_SHARED_INFO); 22.318 - memset(empty_zero_page, 0, sizeof(empty_zero_page)); 22.319 - 22.320 init_mm.context.pinned = 1; 22.321 - 22.322 - /* Setup mapping of lower 1st MB */ 22.323 - for (i = 0; i < NR_FIX_ISAMAPS; i++) 22.324 - if (is_initial_xendomain()) 22.325 - set_fixmap(FIX_ISAMAP_BEGIN - i, i * PAGE_SIZE); 22.326 - else 22.327 - __set_fixmap(FIX_ISAMAP_BEGIN - i, 22.328 - virt_to_mfn(empty_zero_page) << PAGE_SHIFT, 22.329 - PAGE_KERNEL_RO); 22.330 } 22.331 #endif 22.332 22.333 @@ -1028,11 +1067,6 @@ void __init mem_init(void) 22.334 22.335 void free_init_pages(char *what, unsigned long begin, unsigned long end) 22.336 { 22.337 -#ifdef __DO_LATER__ 22.338 - /* 22.339 - * Some pages can be pinned, but some are not. Unpinning such pages 22.340 - * triggers BUG(). 22.341 - */ 22.342 unsigned long addr; 22.343 22.344 if (begin >= end) 22.345 @@ -1044,25 +1078,27 @@ void free_init_pages(char *what, unsigne 22.346 init_page_count(virt_to_page(addr)); 22.347 memset((void *)(addr & ~(PAGE_SIZE-1)), 22.348 POISON_FREE_INITMEM, PAGE_SIZE); 22.349 - make_page_writable( 22.350 - __va(__pa(addr)), XENFEAT_writable_page_tables); 22.351 - /* 22.352 - * Make pages from __PAGE_OFFSET address as well 22.353 - */ 22.354 - make_page_writable( 22.355 - (void *)addr, XENFEAT_writable_page_tables); 22.356 + if (addr >= __START_KERNEL_map) { 22.357 + /* make_readonly() reports all kernel addresses. */ 22.358 + __make_page_writable(__va(__pa(addr))); 22.359 + if (HYPERVISOR_update_va_mapping(addr, __pte(0), 0)) { 22.360 + pgd_t *pgd = pgd_offset_k(addr); 22.361 + pud_t *pud = pud_offset(pgd, addr); 22.362 + pmd_t *pmd = pmd_offset(pud, addr); 22.363 + pte_t *pte = pte_offset_kernel(pmd, addr); 22.364 + 22.365 + xen_l1_entry_update(pte, __pte(0)); /* fallback */ 22.366 + } 22.367 + } 22.368 free_page(addr); 22.369 totalram_pages++; 22.370 } 22.371 -#endif 22.372 } 22.373 22.374 void free_initmem(void) 22.375 { 22.376 -#ifdef __DO_LATER__ 22.377 memset(__initdata_begin, POISON_FREE_INITDATA, 22.378 __initdata_end - __initdata_begin); 22.379 -#endif 22.380 free_init_pages("unused kernel memory", 22.381 (unsigned long)(&__init_begin), 22.382 (unsigned long)(&__init_end)); 22.383 @@ -1125,7 +1161,7 @@ int kern_addr_valid(unsigned long addr) 22.384 if (pgd_none(*pgd)) 22.385 return 0; 22.386 22.387 - pud = pud_offset_k(pgd, addr); 22.388 + pud = pud_offset(pgd, addr); 22.389 if (pud_none(*pud)) 22.390 return 0; 22.391
23.1 --- a/linux-2.6-xen-sparse/arch/x86_64/mm/pageattr-xen.c Mon Mar 05 12:49:12 2007 -0600 23.2 +++ b/linux-2.6-xen-sparse/arch/x86_64/mm/pageattr-xen.c Thu Mar 08 14:39:52 2007 -0600 23.3 @@ -24,10 +24,13 @@ static inline void mm_walk_set_prot(void 23.4 { 23.5 struct page *page = virt_to_page(pt); 23.6 unsigned long pfn = page_to_pfn(page); 23.7 + int rc; 23.8 23.9 - BUG_ON(HYPERVISOR_update_va_mapping( 23.10 - (unsigned long)__va(pfn << PAGE_SHIFT), 23.11 - pfn_pte(pfn, flags), 0)); 23.12 + rc = HYPERVISOR_update_va_mapping( 23.13 + (unsigned long)__va(pfn << PAGE_SHIFT), 23.14 + pfn_pte(pfn, flags), 0); 23.15 + if (rc) 23.16 + BUG(); 23.17 } 23.18 23.19 static void mm_walk(struct mm_struct *mm, pgprot_t flags)
24.1 --- a/linux-2.6-xen-sparse/drivers/char/mem.c Mon Mar 05 12:49:12 2007 -0600 24.2 +++ b/linux-2.6-xen-sparse/drivers/char/mem.c Thu Mar 08 14:39:52 2007 -0600 24.3 @@ -787,7 +787,7 @@ static const struct file_operations mem_ 24.4 .open = open_mem, 24.5 }; 24.6 #else 24.7 -extern struct file_operations mem_fops; 24.8 +extern const struct file_operations mem_fops; 24.9 #endif 24.10 24.11 static const struct file_operations kmem_fops = {
25.1 --- a/linux-2.6-xen-sparse/drivers/char/tpm/tpm_xen.c Mon Mar 05 12:49:12 2007 -0600 25.2 +++ b/linux-2.6-xen-sparse/drivers/char/tpm/tpm_xen.c Thu Mar 08 14:39:52 2007 -0600 25.3 @@ -113,14 +113,13 @@ void __exit tpmif_exit(void); 25.4 25.5 25.6 static inline int 25.7 -tx_buffer_copy(struct tx_buffer *txb, const u8 * src, int len, 25.8 +tx_buffer_copy(struct tx_buffer *txb, const u8 *src, int len, 25.9 int isuserbuffer) 25.10 { 25.11 int copied = len; 25.12 25.13 - if (len > txb->size) { 25.14 + if (len > txb->size) 25.15 copied = txb->size; 25.16 - } 25.17 if (isuserbuffer) { 25.18 if (copy_from_user(txb->data, src, copied)) 25.19 return -EFAULT; 25.20 @@ -133,18 +132,20 @@ tx_buffer_copy(struct tx_buffer *txb, co 25.21 25.22 static inline struct tx_buffer *tx_buffer_alloc(void) 25.23 { 25.24 - struct tx_buffer *txb = kzalloc(sizeof (struct tx_buffer), 25.25 - GFP_KERNEL); 25.26 + struct tx_buffer *txb; 25.27 + 25.28 + txb = kzalloc(sizeof(struct tx_buffer), GFP_KERNEL); 25.29 + if (!txb) 25.30 + return NULL; 25.31 25.32 - if (txb) { 25.33 - txb->len = 0; 25.34 - txb->size = PAGE_SIZE; 25.35 - txb->data = (unsigned char *)__get_free_page(GFP_KERNEL); 25.36 - if (txb->data == NULL) { 25.37 - kfree(txb); 25.38 - txb = NULL; 25.39 - } 25.40 + txb->len = 0; 25.41 + txb->size = PAGE_SIZE; 25.42 + txb->data = (unsigned char *)__get_free_page(GFP_KERNEL); 25.43 + if (txb->data == NULL) { 25.44 + kfree(txb); 25.45 + txb = NULL; 25.46 } 25.47 + 25.48 return txb; 25.49 } 25.50 25.51 @@ -160,37 +161,41 @@ static inline void tx_buffer_free(struct 25.52 /************************************************************** 25.53 Utility function for the tpm_private structure 25.54 **************************************************************/ 25.55 -static inline void tpm_private_init(struct tpm_private *tp) 25.56 +static void tpm_private_init(struct tpm_private *tp) 25.57 { 25.58 spin_lock_init(&tp->tx_lock); 25.59 init_waitqueue_head(&tp->wait_q); 25.60 atomic_set(&tp->refcnt, 1); 25.61 } 25.62 25.63 -static inline void tpm_private_put(void) 25.64 +static void tpm_private_put(void) 25.65 { 25.66 - if ( atomic_dec_and_test(&my_priv->refcnt)) { 25.67 - tpmif_free_tx_buffers(my_priv); 25.68 - kfree(my_priv); 25.69 - my_priv = NULL; 25.70 - } 25.71 + if (!atomic_dec_and_test(&my_priv->refcnt)) 25.72 + return; 25.73 + 25.74 + tpmif_free_tx_buffers(my_priv); 25.75 + kfree(my_priv); 25.76 + my_priv = NULL; 25.77 } 25.78 25.79 static struct tpm_private *tpm_private_get(void) 25.80 { 25.81 int err; 25.82 - if (!my_priv) { 25.83 - my_priv = kzalloc(sizeof(struct tpm_private), GFP_KERNEL); 25.84 - if (my_priv) { 25.85 - tpm_private_init(my_priv); 25.86 - err = tpmif_allocate_tx_buffers(my_priv); 25.87 - if (err < 0) { 25.88 - tpm_private_put(); 25.89 - } 25.90 - } 25.91 - } else { 25.92 + 25.93 + if (my_priv) { 25.94 atomic_inc(&my_priv->refcnt); 25.95 + return my_priv; 25.96 } 25.97 + 25.98 + my_priv = kzalloc(sizeof(struct tpm_private), GFP_KERNEL); 25.99 + if (!my_priv) 25.100 + return NULL; 25.101 + 25.102 + tpm_private_init(my_priv); 25.103 + err = tpmif_allocate_tx_buffers(my_priv); 25.104 + if (err < 0) 25.105 + tpm_private_put(); 25.106 + 25.107 return my_priv; 25.108 } 25.109 25.110 @@ -379,10 +384,8 @@ static int tpmfront_probe(struct xenbus_ 25.111 return -ENOMEM; 25.112 25.113 tp->chip = init_vtpm(&dev->dev, &tvd, tp); 25.114 - 25.115 - if (IS_ERR(tp->chip)) { 25.116 + if (IS_ERR(tp->chip)) 25.117 return PTR_ERR(tp->chip); 25.118 - } 25.119 25.120 err = xenbus_scanf(XBT_NIL, dev->nodename, 25.121 "handle", "%i", &handle); 25.122 @@ -401,6 +404,7 @@ static int tpmfront_probe(struct xenbus_ 25.123 tpm_private_put(); 25.124 return err; 25.125 } 25.126 + 25.127 return 0; 25.128 } 25.129 25.130 @@ -417,32 +421,36 @@ static int tpmfront_suspend(struct xenbu 25.131 { 25.132 struct tpm_private *tp = tpm_private_from_dev(&dev->dev); 25.133 u32 ctr; 25.134 - /* lock, so no app can send */ 25.135 + 25.136 + /* Take the lock, preventing any application from sending. */ 25.137 mutex_lock(&suspend_lock); 25.138 tp->is_suspended = 1; 25.139 25.140 - for (ctr = 0; atomic_read(&tp->tx_busy) && ctr <= 300; ctr++) { 25.141 + for (ctr = 0; atomic_read(&tp->tx_busy); ctr++) { 25.142 if ((ctr % 10) == 0) 25.143 printk("TPM-FE [INFO]: Waiting for outstanding " 25.144 "request.\n"); 25.145 - /* 25.146 - * Wait for a request to be responded to. 25.147 - */ 25.148 + /* Wait for a request to be responded to. */ 25.149 interruptible_sleep_on_timeout(&tp->wait_q, 100); 25.150 } 25.151 - xenbus_switch_state(dev, XenbusStateClosing); 25.152 - 25.153 - if (atomic_read(&tp->tx_busy)) { 25.154 - /* 25.155 - * A temporary work-around. 25.156 - */ 25.157 - printk("TPM-FE [WARNING]: Resetting busy flag."); 25.158 - atomic_set(&tp->tx_busy, 0); 25.159 - } 25.160 25.161 return 0; 25.162 } 25.163 25.164 +static int tpmfront_suspend_finish(struct tpm_private *tp) 25.165 +{ 25.166 + tp->is_suspended = 0; 25.167 + /* Allow applications to send again. */ 25.168 + mutex_unlock(&suspend_lock); 25.169 + return 0; 25.170 +} 25.171 + 25.172 +static int tpmfront_suspend_cancel(struct xenbus_device *dev) 25.173 +{ 25.174 + struct tpm_private *tp = tpm_private_from_dev(&dev->dev); 25.175 + return tpmfront_suspend_finish(tp); 25.176 +} 25.177 + 25.178 static int tpmfront_resume(struct xenbus_device *dev) 25.179 { 25.180 struct tpm_private *tp = tpm_private_from_dev(&dev->dev); 25.181 @@ -484,6 +492,7 @@ static struct xenbus_driver tpmfront = { 25.182 .resume = tpmfront_resume, 25.183 .otherend_changed = backend_changed, 25.184 .suspend = tpmfront_suspend, 25.185 + .suspend_cancel = tpmfront_suspend_cancel, 25.186 }; 25.187 25.188 static void __init init_tpm_xenbus(void) 25.189 @@ -514,9 +523,8 @@ static void tpmif_free_tx_buffers(struct 25.190 { 25.191 unsigned int i; 25.192 25.193 - for (i = 0; i < TPMIF_TX_RING_SIZE; i++) { 25.194 + for (i = 0; i < TPMIF_TX_RING_SIZE; i++) 25.195 tx_buffer_free(tp->tx_buffers[i]); 25.196 - } 25.197 } 25.198 25.199 static void tpmif_rx_action(unsigned long priv) 25.200 @@ -536,9 +544,8 @@ static void tpmif_rx_action(unsigned lon 25.201 received = tx->size; 25.202 25.203 buffer = kmalloc(received, GFP_ATOMIC); 25.204 - if (NULL == buffer) { 25.205 + if (!buffer) 25.206 goto exit; 25.207 - } 25.208 25.209 for (i = 0; i < TPMIF_TX_RING_SIZE && offset < received; i++) { 25.210 struct tx_buffer *txb = tp->tx_buffers[i]; 25.211 @@ -547,9 +554,8 @@ static void tpmif_rx_action(unsigned lon 25.212 25.213 tx = &tp->tx->ring[i].req; 25.214 tocopy = tx->size; 25.215 - if (tocopy > PAGE_SIZE) { 25.216 + if (tocopy > PAGE_SIZE) 25.217 tocopy = PAGE_SIZE; 25.218 - } 25.219 25.220 memcpy(&buffer[offset], txb->data, tocopy); 25.221 25.222 @@ -607,12 +613,13 @@ static int tpm_xmit(struct tpm_private * 25.223 struct tx_buffer *txb = tp->tx_buffers[i]; 25.224 int copied; 25.225 25.226 - if (NULL == txb) { 25.227 + if (!txb) { 25.228 DPRINTK("txb (i=%d) is NULL. buffers initilized?\n" 25.229 "Not transmitting anything!\n", i); 25.230 spin_unlock_irq(&tp->tx_lock); 25.231 return -EFAULT; 25.232 } 25.233 + 25.234 copied = tx_buffer_copy(txb, &buf[offset], count, 25.235 isuserbuffer); 25.236 if (copied < 0) { 25.237 @@ -624,25 +631,26 @@ static int tpm_xmit(struct tpm_private * 25.238 offset += copied; 25.239 25.240 tx = &tp->tx->ring[i].req; 25.241 - 25.242 tx->addr = virt_to_machine(txb->data); 25.243 tx->size = txb->len; 25.244 25.245 - DPRINTK("First 4 characters sent by TPM-FE are 0x%02x 0x%02x 0x%02x 0x%02x\n", 25.246 + DPRINTK("First 4 characters sent by TPM-FE are " 25.247 + "0x%02x 0x%02x 0x%02x 0x%02x\n", 25.248 txb->data[0],txb->data[1],txb->data[2],txb->data[3]); 25.249 25.250 - /* get the granttable reference for this page */ 25.251 + /* Get the granttable reference for this page. */ 25.252 tx->ref = gnttab_claim_grant_reference(&gref_head); 25.253 - 25.254 - if (-ENOSPC == tx->ref) { 25.255 + if (tx->ref == -ENOSPC) { 25.256 spin_unlock_irq(&tp->tx_lock); 25.257 - DPRINTK(" Grant table claim reference failed in func:%s line:%d file:%s\n", __FUNCTION__, __LINE__, __FILE__); 25.258 + DPRINTK("Grant table claim reference failed in " 25.259 + "func:%s line:%d file:%s\n", 25.260 + __FUNCTION__, __LINE__, __FILE__); 25.261 return -ENOSPC; 25.262 } 25.263 - gnttab_grant_foreign_access_ref( tx->ref, 25.264 - tp->backend_id, 25.265 - virt_to_mfn(txb->data), 25.266 - 0 /*RW*/); 25.267 + gnttab_grant_foreign_access_ref(tx->ref, 25.268 + tp->backend_id, 25.269 + virt_to_mfn(txb->data), 25.270 + 0 /*RW*/); 25.271 wmb(); 25.272 } 25.273 25.274 @@ -660,15 +668,10 @@ static int tpm_xmit(struct tpm_private * 25.275 25.276 static void tpmif_notify_upperlayer(struct tpm_private *tp) 25.277 { 25.278 - /* 25.279 - * Notify upper layer about the state of the connection 25.280 - * to the BE. 25.281 - */ 25.282 - if (tp->is_connected) { 25.283 - vtpm_vd_status(tp->chip, TPM_VD_STATUS_CONNECTED); 25.284 - } else { 25.285 - vtpm_vd_status(tp->chip, TPM_VD_STATUS_DISCONNECTED); 25.286 - } 25.287 + /* Notify upper layer about the state of the connection to the BE. */ 25.288 + vtpm_vd_status(tp->chip, (tp->is_connected 25.289 + ? TPM_VD_STATUS_CONNECTED 25.290 + : TPM_VD_STATUS_DISCONNECTED)); 25.291 } 25.292 25.293 25.294 @@ -679,20 +682,16 @@ static void tpmif_set_connected_state(st 25.295 * should disconnect - assumption is that we will resume 25.296 * The mutex keeps apps from sending. 25.297 */ 25.298 - if (is_connected == 0 && tp->is_suspended == 1) { 25.299 + if (is_connected == 0 && tp->is_suspended == 1) 25.300 return; 25.301 - } 25.302 25.303 /* 25.304 * Unlock the mutex if we are connected again 25.305 * after being suspended - now resuming. 25.306 * This also removes the suspend state. 25.307 */ 25.308 - if (is_connected == 1 && tp->is_suspended == 1) { 25.309 - tp->is_suspended = 0; 25.310 - /* unlock, so apps can resume sending */ 25.311 - mutex_unlock(&suspend_lock); 25.312 - } 25.313 + if (is_connected == 1 && tp->is_suspended == 1) 25.314 + tpmfront_suspend_finish(tp); 25.315 25.316 if (is_connected != tp->is_connected) { 25.317 tp->is_connected = is_connected; 25.318 @@ -710,33 +709,24 @@ static void tpmif_set_connected_state(st 25.319 25.320 static int __init tpmif_init(void) 25.321 { 25.322 - long rc = 0; 25.323 struct tpm_private *tp; 25.324 25.325 if (is_initial_xendomain()) 25.326 return -EPERM; 25.327 25.328 tp = tpm_private_get(); 25.329 - if (!tp) { 25.330 - rc = -ENOMEM; 25.331 - goto failexit; 25.332 - } 25.333 + if (!tp) 25.334 + return -ENOMEM; 25.335 25.336 IPRINTK("Initialising the vTPM driver.\n"); 25.337 - if ( gnttab_alloc_grant_references ( TPMIF_TX_RING_SIZE, 25.338 - &gref_head ) < 0) { 25.339 - rc = -EFAULT; 25.340 - goto gnttab_alloc_failed; 25.341 + if (gnttab_alloc_grant_references(TPMIF_TX_RING_SIZE, 25.342 + &gref_head) < 0) { 25.343 + tpm_private_put(); 25.344 + return -EFAULT; 25.345 } 25.346 25.347 init_tpm_xenbus(); 25.348 return 0; 25.349 - 25.350 -gnttab_alloc_failed: 25.351 - tpm_private_put(); 25.352 -failexit: 25.353 - 25.354 - return (int)rc; 25.355 } 25.356 25.357
26.1 --- a/linux-2.6-xen-sparse/drivers/xen/blktap/blktap.c Mon Mar 05 12:49:12 2007 -0600 26.2 +++ b/linux-2.6-xen-sparse/drivers/xen/blktap/blktap.c Thu Mar 08 14:39:52 2007 -0600 26.3 @@ -335,7 +335,7 @@ static int blktap_ioctl(struct inode *in 26.4 unsigned int cmd, unsigned long arg); 26.5 static unsigned int blktap_poll(struct file *file, poll_table *wait); 26.6 26.7 -static struct file_operations blktap_fops = { 26.8 +static const struct file_operations blktap_fops = { 26.9 .owner = THIS_MODULE, 26.10 .poll = blktap_poll, 26.11 .ioctl = blktap_ioctl,
27.1 --- a/linux-2.6-xen-sparse/drivers/xen/char/mem.c Mon Mar 05 12:49:12 2007 -0600 27.2 +++ b/linux-2.6-xen-sparse/drivers/xen/char/mem.c Thu Mar 08 14:39:52 2007 -0600 27.3 @@ -194,7 +194,7 @@ static int open_mem(struct inode * inode 27.4 return capable(CAP_SYS_RAWIO) ? 0 : -EPERM; 27.5 } 27.6 27.7 -struct file_operations mem_fops = { 27.8 +const struct file_operations mem_fops = { 27.9 .llseek = memory_lseek, 27.10 .read = read_mem, 27.11 .write = write_mem,
28.1 --- a/linux-2.6-xen-sparse/drivers/xen/core/machine_reboot.c Mon Mar 05 12:49:12 2007 -0600 28.2 +++ b/linux-2.6-xen-sparse/drivers/xen/core/machine_reboot.c Thu Mar 08 14:39:52 2007 -0600 28.3 @@ -59,27 +59,11 @@ EXPORT_SYMBOL(machine_restart); 28.4 EXPORT_SYMBOL(machine_halt); 28.5 EXPORT_SYMBOL(machine_power_off); 28.6 28.7 -/* Ensure we run on the idle task page tables so that we will 28.8 - switch page tables before running user space. This is needed 28.9 - on architectures with separate kernel and user page tables 28.10 - because the user page table pointer is not saved/restored. */ 28.11 -static void switch_idle_mm(void) 28.12 -{ 28.13 - struct mm_struct *mm = current->active_mm; 28.14 - 28.15 - if (mm == &init_mm) 28.16 - return; 28.17 - 28.18 - atomic_inc(&init_mm.mm_count); 28.19 - switch_mm(mm, &init_mm, current); 28.20 - current->active_mm = &init_mm; 28.21 - mmdrop(mm); 28.22 -} 28.23 - 28.24 static void pre_suspend(void) 28.25 { 28.26 HYPERVISOR_shared_info = (shared_info_t *)empty_zero_page; 28.27 - clear_fixmap(FIX_SHARED_INFO); 28.28 + HYPERVISOR_update_va_mapping(fix_to_virt(FIX_SHARED_INFO), 28.29 + __pte_ma(0), 0); 28.30 28.31 xen_start_info->store_mfn = mfn_to_pfn(xen_start_info->store_mfn); 28.32 xen_start_info->console.domU.mfn = 28.33 @@ -89,6 +73,7 @@ static void pre_suspend(void) 28.34 static void post_suspend(int suspend_cancelled) 28.35 { 28.36 int i, j, k, fpp; 28.37 + unsigned long shinfo_mfn; 28.38 extern unsigned long max_pfn; 28.39 extern unsigned long *pfn_to_mfn_frame_list_list; 28.40 extern unsigned long *pfn_to_mfn_frame_list[]; 28.41 @@ -99,11 +84,14 @@ static void post_suspend(int suspend_can 28.42 xen_start_info->console.domU.mfn = 28.43 pfn_to_mfn(xen_start_info->console.domU.mfn); 28.44 } else { 28.45 +#ifdef CONFIG_SMP 28.46 cpu_initialized_map = cpumask_of_cpu(0); 28.47 +#endif 28.48 } 28.49 - 28.50 - set_fixmap(FIX_SHARED_INFO, xen_start_info->shared_info); 28.51 28.52 + shinfo_mfn = xen_start_info->shared_info >> PAGE_SHIFT; 28.53 + HYPERVISOR_update_va_mapping(fix_to_virt(FIX_SHARED_INFO), 28.54 + pfn_pte_ma(shinfo_mfn, PAGE_KERNEL), 0); 28.55 HYPERVISOR_shared_info = (shared_info_t *)fix_to_virt(FIX_SHARED_INFO); 28.56 28.57 memset(empty_zero_page, 0, PAGE_SIZE); 28.58 @@ -172,10 +160,25 @@ static int take_machine_down(void *p_fas 28.59 28.60 post_suspend(suspend_cancelled); 28.61 gnttab_resume(); 28.62 - if (!suspend_cancelled) 28.63 + if (!suspend_cancelled) { 28.64 irq_resume(); 28.65 +#ifdef __x86_64__ 28.66 + /* 28.67 + * Older versions of Xen do not save/restore the user %cr3. 28.68 + * We do it here just in case, but there's no need if we are 28.69 + * in fast-suspend mode as that implies a new enough Xen. 28.70 + */ 28.71 + if (!fast_suspend) { 28.72 + struct mmuext_op op; 28.73 + op.cmd = MMUEXT_NEW_USER_BASEPTR; 28.74 + op.arg1.mfn = pfn_to_mfn(__pa(__user_pgd( 28.75 + current->active_mm->pgd)) >> PAGE_SHIFT); 28.76 + if (HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF)) 28.77 + BUG(); 28.78 + } 28.79 +#endif 28.80 + } 28.81 time_resume(); 28.82 - switch_idle_mm(); 28.83 local_irq_enable(); 28.84 28.85 if (fast_suspend && !suspend_cancelled) { 28.86 @@ -210,6 +213,10 @@ int __xen_suspend(int fast_suspend) 28.87 } 28.88 #endif 28.89 28.90 + /* If we are definitely UP then 'slow mode' is actually faster. */ 28.91 + if (num_possible_cpus() == 1) 28.92 + fast_suspend = 0; 28.93 + 28.94 if (fast_suspend) { 28.95 xenbus_suspend(); 28.96 err = stop_machine_run(take_machine_down, &fast_suspend, 0);
29.1 --- a/linux-2.6-xen-sparse/drivers/xen/evtchn/evtchn.c Mon Mar 05 12:49:12 2007 -0600 29.2 +++ b/linux-2.6-xen-sparse/drivers/xen/evtchn/evtchn.c Thu Mar 08 14:39:52 2007 -0600 29.3 @@ -406,7 +406,7 @@ static int evtchn_release(struct inode * 29.4 return 0; 29.5 } 29.6 29.7 -static struct file_operations evtchn_fops = { 29.8 +static const struct file_operations evtchn_fops = { 29.9 .owner = THIS_MODULE, 29.10 .read = evtchn_read, 29.11 .write = evtchn_write,
30.1 --- a/linux-2.6-xen-sparse/drivers/xen/netback/common.h Mon Mar 05 12:49:12 2007 -0600 30.2 +++ b/linux-2.6-xen-sparse/drivers/xen/netback/common.h Thu Mar 08 14:39:52 2007 -0600 30.3 @@ -99,9 +99,21 @@ typedef struct netif_st { 30.4 struct net_device *dev; 30.5 struct net_device_stats stats; 30.6 30.7 + unsigned int carrier; 30.8 + 30.9 wait_queue_head_t waiting_to_free; 30.10 } netif_t; 30.11 30.12 +/* 30.13 + * Implement our own carrier flag: the network stack's version causes delays 30.14 + * when the carrier is re-enabled (in particular, dev_activate() may not 30.15 + * immediately be called, which can cause packet loss; also the etherbridge 30.16 + * can be rather lazy in activating its port). 30.17 + */ 30.18 +#define netback_carrier_on(netif) ((netif)->carrier = 1) 30.19 +#define netback_carrier_off(netif) ((netif)->carrier = 0) 30.20 +#define netback_carrier_ok(netif) ((netif)->carrier) 30.21 + 30.22 #define NET_TX_RING_SIZE __RING_SIZE((netif_tx_sring_t *)0, PAGE_SIZE) 30.23 #define NET_RX_RING_SIZE __RING_SIZE((netif_rx_sring_t *)0, PAGE_SIZE) 30.24 30.25 @@ -120,7 +132,8 @@ int netif_map(netif_t *netif, unsigned l 30.26 30.27 void netif_xenbus_init(void); 30.28 30.29 -#define netif_schedulable(dev) (netif_running(dev) && netif_carrier_ok(dev)) 30.30 +#define netif_schedulable(netif) \ 30.31 + (netif_running((netif)->dev) && netback_carrier_ok(netif)) 30.32 30.33 void netif_schedule_work(netif_t *netif); 30.34 void netif_deschedule_work(netif_t *netif);
31.1 --- a/linux-2.6-xen-sparse/drivers/xen/netback/interface.c Mon Mar 05 12:49:12 2007 -0600 31.2 +++ b/linux-2.6-xen-sparse/drivers/xen/netback/interface.c Thu Mar 08 14:39:52 2007 -0600 31.3 @@ -66,16 +66,19 @@ static void __netif_down(netif_t *netif) 31.4 static int net_open(struct net_device *dev) 31.5 { 31.6 netif_t *netif = netdev_priv(dev); 31.7 - if (netif_carrier_ok(dev)) 31.8 + if (netback_carrier_ok(netif)) { 31.9 __netif_up(netif); 31.10 + netif_start_queue(dev); 31.11 + } 31.12 return 0; 31.13 } 31.14 31.15 static int net_close(struct net_device *dev) 31.16 { 31.17 netif_t *netif = netdev_priv(dev); 31.18 - if (netif_carrier_ok(dev)) 31.19 + if (netback_carrier_ok(netif)) 31.20 __netif_down(netif); 31.21 + netif_stop_queue(dev); 31.22 return 0; 31.23 } 31.24 31.25 @@ -138,8 +141,6 @@ netif_t *netif_alloc(domid_t domid, unsi 31.26 return ERR_PTR(-ENOMEM); 31.27 } 31.28 31.29 - netif_carrier_off(dev); 31.30 - 31.31 netif = netdev_priv(dev); 31.32 memset(netif, 0, sizeof(*netif)); 31.33 netif->domid = domid; 31.34 @@ -148,6 +149,8 @@ netif_t *netif_alloc(domid_t domid, unsi 31.35 init_waitqueue_head(&netif->waiting_to_free); 31.36 netif->dev = dev; 31.37 31.38 + netback_carrier_off(netif); 31.39 + 31.40 netif->credit_bytes = netif->remaining_credit = ~0UL; 31.41 netif->credit_usec = 0UL; 31.42 init_timer(&netif->credit_timeout); 31.43 @@ -285,7 +288,7 @@ int netif_map(netif_t *netif, unsigned l 31.44 netif_get(netif); 31.45 31.46 rtnl_lock(); 31.47 - netif_carrier_on(netif->dev); 31.48 + netback_carrier_on(netif); 31.49 if (netif_running(netif->dev)) 31.50 __netif_up(netif); 31.51 rtnl_unlock(); 31.52 @@ -302,9 +305,10 @@ err_rx: 31.53 31.54 void netif_disconnect(netif_t *netif) 31.55 { 31.56 - if (netif_carrier_ok(netif->dev)) { 31.57 + if (netback_carrier_ok(netif)) { 31.58 rtnl_lock(); 31.59 - netif_carrier_off(netif->dev); 31.60 + netback_carrier_off(netif); 31.61 + netif_carrier_off(netif->dev); /* discard queued packets */ 31.62 if (netif_running(netif->dev)) 31.63 __netif_down(netif); 31.64 rtnl_unlock();
32.1 --- a/linux-2.6-xen-sparse/drivers/xen/netback/netback.c Mon Mar 05 12:49:12 2007 -0600 32.2 +++ b/linux-2.6-xen-sparse/drivers/xen/netback/netback.c Thu Mar 08 14:39:52 2007 -0600 32.3 @@ -38,7 +38,10 @@ 32.4 #include <xen/balloon.h> 32.5 #include <xen/interface/memory.h> 32.6 32.7 -/*#define NETBE_DEBUG_INTERRUPT*/ 32.8 +/*define NETBE_DEBUG_INTERRUPT*/ 32.9 + 32.10 +/* extra field used in struct page */ 32.11 +#define netif_page_index(pg) (*(long *)&(pg)->mapping) 32.12 32.13 struct netbk_rx_meta { 32.14 skb_frag_t frag; 32.15 @@ -231,7 +234,7 @@ static inline int netbk_queue_full(netif 32.16 static void tx_queue_callback(unsigned long data) 32.17 { 32.18 netif_t *netif = (netif_t *)data; 32.19 - if (netif_schedulable(netif->dev)) 32.20 + if (netif_schedulable(netif)) 32.21 netif_wake_queue(netif->dev); 32.22 } 32.23 32.24 @@ -242,7 +245,7 @@ int netif_be_start_xmit(struct sk_buff * 32.25 BUG_ON(skb->dev != dev); 32.26 32.27 /* Drop the packet if the target domain has no receive buffers. */ 32.28 - if (unlikely(!netif_schedulable(dev) || netbk_queue_full(netif))) 32.29 + if (unlikely(!netif_schedulable(netif) || netbk_queue_full(netif))) 32.30 goto drop; 32.31 32.32 /* 32.33 @@ -352,7 +355,7 @@ static u16 netbk_gop_frag(netif_t *netif 32.34 copy_gop->flags = GNTCOPY_dest_gref; 32.35 if (PageForeign(page)) { 32.36 struct pending_tx_info *src_pend = 32.37 - &pending_tx_info[page->index]; 32.38 + &pending_tx_info[netif_page_index(page)]; 32.39 copy_gop->source.domid = src_pend->netif->domid; 32.40 copy_gop->source.u.ref = src_pend->req.gref; 32.41 copy_gop->flags |= GNTCOPY_source_gref; 32.42 @@ -681,7 +684,7 @@ static void net_rx_action(unsigned long 32.43 } 32.44 32.45 if (netif_queue_stopped(netif->dev) && 32.46 - netif_schedulable(netif->dev) && 32.47 + netif_schedulable(netif) && 32.48 !netbk_queue_full(netif)) 32.49 netif_wake_queue(netif->dev); 32.50 32.51 @@ -739,7 +742,7 @@ static void add_to_net_schedule_list_tai 32.52 32.53 spin_lock_irq(&net_schedule_list_lock); 32.54 if (!__on_net_schedule_list(netif) && 32.55 - likely(netif_schedulable(netif->dev))) { 32.56 + likely(netif_schedulable(netif))) { 32.57 list_add_tail(&netif->list, &net_schedule_list); 32.58 netif_get(netif); 32.59 } 32.60 @@ -1327,7 +1330,7 @@ static void netif_page_release(struct pa 32.61 /* Ready for next use. */ 32.62 init_page_count(page); 32.63 32.64 - netif_idx_release(page->index); 32.65 + netif_idx_release(netif_page_index(page)); 32.66 } 32.67 32.68 irqreturn_t netif_be_int(int irq, void *dev_id, struct pt_regs *regs) 32.69 @@ -1337,7 +1340,7 @@ irqreturn_t netif_be_int(int irq, void * 32.70 add_to_net_schedule_list_tail(netif); 32.71 maybe_schedule_tx_action(); 32.72 32.73 - if (netif_schedulable(netif->dev) && !netbk_queue_full(netif)) 32.74 + if (netif_schedulable(netif) && !netbk_queue_full(netif)) 32.75 netif_wake_queue(netif->dev); 32.76 32.77 return IRQ_HANDLED; 32.78 @@ -1457,7 +1460,7 @@ static int __init netback_init(void) 32.79 for (i = 0; i < MAX_PENDING_REQS; i++) { 32.80 page = mmap_pages[i]; 32.81 SetPageForeign(page, netif_page_release); 32.82 - page->index = i; 32.83 + netif_page_index(page) = i; 32.84 } 32.85 32.86 pending_cons = 0;
33.1 --- a/linux-2.6-xen-sparse/drivers/xen/netback/xenbus.c Mon Mar 05 12:49:12 2007 -0600 33.2 +++ b/linux-2.6-xen-sparse/drivers/xen/netback/xenbus.c Thu Mar 08 14:39:52 2007 -0600 33.3 @@ -338,9 +338,7 @@ static void connect(struct backend_info 33.4 33.5 xenbus_switch_state(dev, XenbusStateConnected); 33.6 33.7 - /* May not get a kick from the frontend, so start the tx_queue now. */ 33.8 - if (!netbk_can_queue(be->netif->dev)) 33.9 - netif_wake_queue(be->netif->dev); 33.10 + netif_wake_queue(be->netif->dev); 33.11 } 33.12 33.13
34.1 --- a/linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c Mon Mar 05 12:49:12 2007 -0600 34.2 +++ b/linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c Thu Mar 08 14:39:52 2007 -0600 34.3 @@ -154,6 +154,7 @@ struct netfront_info { 34.4 34.5 unsigned int irq; 34.6 unsigned int copying_receiver; 34.7 + unsigned int carrier; 34.8 34.9 /* Receive-ring batched refills. */ 34.10 #define RX_MIN_TARGET 8 34.11 @@ -193,6 +194,15 @@ struct netfront_rx_info { 34.12 }; 34.13 34.14 /* 34.15 + * Implement our own carrier flag: the network stack's version causes delays 34.16 + * when the carrier is re-enabled (in particular, dev_activate() may not 34.17 + * immediately be called, which can cause packet loss). 34.18 + */ 34.19 +#define netfront_carrier_on(netif) ((netif)->carrier = 1) 34.20 +#define netfront_carrier_off(netif) ((netif)->carrier = 0) 34.21 +#define netfront_carrier_ok(netif) ((netif)->carrier) 34.22 + 34.23 +/* 34.24 * Access macros for acquiring freeing slots in tx_skbs[]. 34.25 */ 34.26 34.27 @@ -590,26 +600,6 @@ static int send_fake_arp(struct net_devi 34.28 return dev_queue_xmit(skb); 34.29 } 34.30 34.31 -static int network_open(struct net_device *dev) 34.32 -{ 34.33 - struct netfront_info *np = netdev_priv(dev); 34.34 - 34.35 - memset(&np->stats, 0, sizeof(np->stats)); 34.36 - 34.37 - spin_lock(&np->rx_lock); 34.38 - if (netif_carrier_ok(dev)) { 34.39 - network_alloc_rx_buffers(dev); 34.40 - np->rx.sring->rsp_event = np->rx.rsp_cons + 1; 34.41 - if (RING_HAS_UNCONSUMED_RESPONSES(&np->rx)) 34.42 - netif_rx_schedule(dev); 34.43 - } 34.44 - spin_unlock(&np->rx_lock); 34.45 - 34.46 - netif_start_queue(dev); 34.47 - 34.48 - return 0; 34.49 -} 34.50 - 34.51 static inline int netfront_tx_slot_available(struct netfront_info *np) 34.52 { 34.53 return ((np->tx.req_prod_pvt - np->tx.rsp_cons) < 34.54 @@ -626,6 +616,26 @@ static inline void network_maybe_wake_tx 34.55 netif_wake_queue(dev); 34.56 } 34.57 34.58 +static int network_open(struct net_device *dev) 34.59 +{ 34.60 + struct netfront_info *np = netdev_priv(dev); 34.61 + 34.62 + memset(&np->stats, 0, sizeof(np->stats)); 34.63 + 34.64 + spin_lock(&np->rx_lock); 34.65 + if (netfront_carrier_ok(np)) { 34.66 + network_alloc_rx_buffers(dev); 34.67 + np->rx.sring->rsp_event = np->rx.rsp_cons + 1; 34.68 + if (RING_HAS_UNCONSUMED_RESPONSES(&np->rx)) 34.69 + netif_rx_schedule(dev); 34.70 + } 34.71 + spin_unlock(&np->rx_lock); 34.72 + 34.73 + network_maybe_wake_tx(dev); 34.74 + 34.75 + return 0; 34.76 +} 34.77 + 34.78 static void network_tx_buf_gc(struct net_device *dev) 34.79 { 34.80 RING_IDX cons, prod; 34.81 @@ -633,7 +643,7 @@ static void network_tx_buf_gc(struct net 34.82 struct netfront_info *np = netdev_priv(dev); 34.83 struct sk_buff *skb; 34.84 34.85 - BUG_ON(!netif_carrier_ok(dev)); 34.86 + BUG_ON(!netfront_carrier_ok(np)); 34.87 34.88 do { 34.89 prod = np->tx.sring->rsp_prod; 34.90 @@ -703,7 +713,7 @@ static void network_alloc_rx_buffers(str 34.91 int nr_flips; 34.92 netif_rx_request_t *req; 34.93 34.94 - if (unlikely(!netif_carrier_ok(dev))) 34.95 + if (unlikely(!netfront_carrier_ok(np))) 34.96 return; 34.97 34.98 /* 34.99 @@ -934,7 +944,7 @@ static int network_start_xmit(struct sk_ 34.100 34.101 spin_lock_irq(&np->tx_lock); 34.102 34.103 - if (unlikely(!netif_carrier_ok(dev) || 34.104 + if (unlikely(!netfront_carrier_ok(np) || 34.105 (frags > 1 && !xennet_can_sg(dev)) || 34.106 netif_needs_gso(dev, skb))) { 34.107 spin_unlock_irq(&np->tx_lock); 34.108 @@ -1024,7 +1034,7 @@ static irqreturn_t netif_int(int irq, vo 34.109 34.110 spin_lock_irqsave(&np->tx_lock, flags); 34.111 34.112 - if (likely(netif_carrier_ok(dev))) { 34.113 + if (likely(netfront_carrier_ok(np))) { 34.114 network_tx_buf_gc(dev); 34.115 /* Under tx_lock: protects access to rx shared-ring indexes. */ 34.116 if (RING_HAS_UNCONSUMED_RESPONSES(&np->rx)) 34.117 @@ -1299,7 +1309,7 @@ static int netif_poll(struct net_device 34.118 34.119 spin_lock(&np->rx_lock); 34.120 34.121 - if (unlikely(!netif_carrier_ok(dev))) { 34.122 + if (unlikely(!netfront_carrier_ok(np))) { 34.123 spin_unlock(&np->rx_lock); 34.124 return 0; 34.125 } 34.126 @@ -1317,7 +1327,7 @@ static int netif_poll(struct net_device 34.127 work_done = 0; 34.128 while ((i != rp) && (work_done < budget)) { 34.129 memcpy(rx, RING_GET_RESPONSE(&np->rx, i), sizeof(*rx)); 34.130 - memset(extras, 0, sizeof(extras)); 34.131 + memset(extras, 0, sizeof(rinfo.extras)); 34.132 34.133 err = xennet_get_responses(np, &rinfo, rp, &tmpq, 34.134 &pages_flipped); 34.135 @@ -1744,7 +1754,7 @@ static int network_connect(struct net_de 34.136 * domain a kick because we've probably just requeued some 34.137 * packets. 34.138 */ 34.139 - netif_carrier_on(dev); 34.140 + netfront_carrier_on(np); 34.141 notify_remote_via_irq(np->irq); 34.142 network_tx_buf_gc(dev); 34.143 network_alloc_rx_buffers(dev); 34.144 @@ -1989,7 +1999,7 @@ static struct net_device * __devinit cre 34.145 34.146 np->netdev = netdev; 34.147 34.148 - netif_carrier_off(netdev); 34.149 + netfront_carrier_off(np); 34.150 34.151 return netdev; 34.152 34.153 @@ -2023,7 +2033,7 @@ static void netif_disconnect_backend(str 34.154 /* Stop old i/f to prevent errors whilst we rebuild the state. */ 34.155 spin_lock_irq(&info->tx_lock); 34.156 spin_lock(&info->rx_lock); 34.157 - netif_carrier_off(info->netdev); 34.158 + netfront_carrier_off(info); 34.159 spin_unlock(&info->rx_lock); 34.160 spin_unlock_irq(&info->tx_lock); 34.161
35.1 --- a/linux-2.6-xen-sparse/drivers/xen/pcifront/pci_op.c Mon Mar 05 12:49:12 2007 -0600 35.2 +++ b/linux-2.6-xen-sparse/drivers/xen/pcifront/pci_op.c Thu Mar 08 14:39:52 2007 -0600 35.3 @@ -239,17 +239,12 @@ static void free_root_bus_devs(struct pc 35.4 { 35.5 struct pci_dev *dev; 35.6 35.7 - down_write(&pci_bus_sem); 35.8 while (!list_empty(&bus->devices)) { 35.9 - dev = container_of(bus->devices.next, struct pci_dev, bus_list); 35.10 - up_write(&pci_bus_sem); 35.11 - 35.12 + dev = container_of(bus->devices.next, struct pci_dev, 35.13 + bus_list); 35.14 dev_dbg(&dev->dev, "removing device\n"); 35.15 pci_remove_bus_device(dev); 35.16 - 35.17 - down_write(&pci_bus_sem); 35.18 } 35.19 - up_write(&pci_bus_sem); 35.20 } 35.21 35.22 void pcifront_free_roots(struct pcifront_device *pdev)
36.1 --- a/linux-2.6-xen-sparse/drivers/xen/privcmd/privcmd.c Mon Mar 05 12:49:12 2007 -0600 36.2 +++ b/linux-2.6-xen-sparse/drivers/xen/privcmd/privcmd.c Thu Mar 08 14:39:52 2007 -0600 36.3 @@ -248,7 +248,7 @@ static int privcmd_enforce_singleshot_ma 36.4 } 36.5 #endif 36.6 36.7 -static struct file_operations privcmd_file_ops = { 36.8 +static const struct file_operations privcmd_file_ops = { 36.9 .ioctl = privcmd_ioctl, 36.10 .mmap = privcmd_mmap, 36.11 };
37.1 --- a/linux-2.6-xen-sparse/drivers/xen/tpmback/tpmback.c Mon Mar 05 12:49:12 2007 -0600 37.2 +++ b/linux-2.6-xen-sparse/drivers/xen/tpmback/tpmback.c Thu Mar 08 14:39:52 2007 -0600 37.3 @@ -629,7 +629,7 @@ static unsigned int vtpm_op_poll(struct 37.4 return flags; 37.5 } 37.6 37.7 -static struct file_operations vtpm_ops = { 37.8 +static const struct file_operations vtpm_ops = { 37.9 .owner = THIS_MODULE, 37.10 .llseek = no_llseek, 37.11 .open = vtpm_op_open,
38.1 --- a/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_dev.c Mon Mar 05 12:49:12 2007 -0600 38.2 +++ b/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_dev.c Thu Mar 08 14:39:52 2007 -0600 38.3 @@ -173,17 +173,22 @@ static ssize_t xenbus_dev_write(struct f 38.4 void *reply; 38.5 char *path, *token; 38.6 struct watch_adapter *watch, *tmp_watch; 38.7 - int err; 38.8 + int err, rc = len; 38.9 38.10 - if ((len + u->len) > sizeof(u->u.buffer)) 38.11 - return -EINVAL; 38.12 + if ((len + u->len) > sizeof(u->u.buffer)) { 38.13 + rc = -EINVAL; 38.14 + goto out; 38.15 + } 38.16 38.17 - if (copy_from_user(u->u.buffer + u->len, ubuf, len) != 0) 38.18 - return -EFAULT; 38.19 + if (copy_from_user(u->u.buffer + u->len, ubuf, len) != 0) { 38.20 + rc = -EFAULT; 38.21 + goto out; 38.22 + } 38.23 38.24 u->len += len; 38.25 - if (u->len < (sizeof(u->u.msg) + u->u.msg.len)) 38.26 - return len; 38.27 + if ((u->len < sizeof(u->u.msg)) || 38.28 + (u->len < (sizeof(u->u.msg) + u->u.msg.len))) 38.29 + return rc; 38.30 38.31 msg_type = u->u.msg.type; 38.32 38.33 @@ -201,14 +206,17 @@ static ssize_t xenbus_dev_write(struct f 38.34 case XS_SET_PERMS: 38.35 if (msg_type == XS_TRANSACTION_START) { 38.36 trans = kmalloc(sizeof(*trans), GFP_KERNEL); 38.37 - if (!trans) 38.38 - return -ENOMEM; 38.39 + if (!trans) { 38.40 + rc = -ENOMEM; 38.41 + goto out; 38.42 + } 38.43 } 38.44 38.45 reply = xenbus_dev_request_and_reply(&u->u.msg); 38.46 if (IS_ERR(reply)) { 38.47 kfree(trans); 38.48 - return PTR_ERR(reply); 38.49 + rc = PTR_ERR(reply); 38.50 + goto out; 38.51 } 38.52 38.53 if (msg_type == XS_TRANSACTION_START) { 38.54 @@ -231,8 +239,10 @@ static ssize_t xenbus_dev_write(struct f 38.55 case XS_UNWATCH: 38.56 path = u->u.buffer + sizeof(u->u.msg); 38.57 token = memchr(path, 0, u->u.msg.len); 38.58 - if (token == NULL) 38.59 - return -EILSEQ; 38.60 + if (token == NULL) { 38.61 + rc = -EILSEQ; 38.62 + goto out; 38.63 + } 38.64 token++; 38.65 38.66 if (msg_type == XS_WATCH) { 38.67 @@ -251,7 +261,8 @@ static ssize_t xenbus_dev_write(struct f 38.68 err = register_xenbus_watch(&watch->watch); 38.69 if (err) { 38.70 free_watch_adapter(watch); 38.71 - return err; 38.72 + rc = err; 38.73 + goto out; 38.74 } 38.75 38.76 list_add(&watch->list, &u->watches); 38.77 @@ -265,7 +276,6 @@ static ssize_t xenbus_dev_write(struct f 38.78 &u->watches, list) { 38.79 if (!strcmp(watch->token, token) && 38.80 !strcmp(watch->watch.node, path)) 38.81 - break; 38.82 { 38.83 unregister_xenbus_watch(&watch->watch); 38.84 list_del(&watch->list); 38.85 @@ -278,11 +288,13 @@ static ssize_t xenbus_dev_write(struct f 38.86 break; 38.87 38.88 default: 38.89 - return -EINVAL; 38.90 + rc = -EINVAL; 38.91 + break; 38.92 } 38.93 38.94 + out: 38.95 u->len = 0; 38.96 - return len; 38.97 + return rc; 38.98 } 38.99 38.100 static int xenbus_dev_open(struct inode *inode, struct file *filp) 38.101 @@ -342,7 +354,7 @@ static unsigned int xenbus_dev_poll(stru 38.102 return 0; 38.103 } 38.104 38.105 -static struct file_operations xenbus_dev_file_ops = { 38.106 +static const struct file_operations xenbus_dev_file_ops = { 38.107 .read = xenbus_dev_read, 38.108 .write = xenbus_dev_write, 38.109 .open = xenbus_dev_open,
39.1 --- a/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/page.h Mon Mar 05 12:49:12 2007 -0600 39.2 +++ b/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/page.h Thu Mar 08 14:39:52 2007 -0600 39.3 @@ -20,6 +20,14 @@ 39.4 #define LARGE_PAGE_SIZE (1UL << PMD_SHIFT) 39.5 39.6 #ifdef __KERNEL__ 39.7 + 39.8 +/* 39.9 + * Need to repeat this here in order to not include pgtable.h (which in turn 39.10 + * depends on definitions made here), but to be able to use the symbolic 39.11 + * below. The preprocessor will warn if the two definitions aren't identical. 39.12 + */ 39.13 +#define _PAGE_PRESENT 0x001 39.14 + 39.15 #ifndef __ASSEMBLY__ 39.16 39.17 #include <linux/string.h> 39.18 @@ -29,13 +37,6 @@ 39.19 #include <xen/interface/xen.h> 39.20 #include <xen/features.h> 39.21 39.22 -/* 39.23 - * Need to repeat this here in order to not include pgtable.h (which in turn 39.24 - * depends on definitions made here), but to be able to use the symbolic 39.25 - * below. The preprocessor will warn if the two definitions aren't identical. 39.26 - */ 39.27 -#define _PAGE_PRESENT 0x001 39.28 - 39.29 #define arch_free_page(_page,_order) \ 39.30 ({ int foreign = PageForeign(_page); \ 39.31 if (foreign) \ 39.32 @@ -225,8 +226,6 @@ extern int page_is_ram(unsigned long pag 39.33 ((current->personality & READ_IMPLIES_EXEC) ? VM_EXEC : 0 ) | \ 39.34 VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) 39.35 39.36 -#define __HAVE_ARCH_GATE_AREA 1 39.37 - 39.38 #include <asm-generic/memory_model.h> 39.39 #include <asm-generic/page.h> 39.40
40.1 --- a/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/system.h Mon Mar 05 12:49:12 2007 -0600 40.2 +++ b/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/system.h Thu Mar 08 14:39:52 2007 -0600 40.3 @@ -137,7 +137,7 @@ extern struct task_struct * FASTCALL(__s 40.4 }) 40.5 40.6 #define write_cr4(x) \ 40.7 - __asm__ __volatile__("movl %0,%%cr4": :"r" (x)); 40.8 + __asm__ __volatile__("movl %0,%%cr4": :"r" (x)) 40.9 40.10 /* 40.11 * Clear and set 'TS' bit respectively
41.1 --- a/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/fixmap.h Mon Mar 05 12:49:12 2007 -0600 41.2 +++ b/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/fixmap.h Thu Mar 08 14:39:52 2007 -0600 41.3 @@ -53,6 +53,11 @@ enum fixed_addresses { 41.4 #define NR_FIX_ISAMAPS 256 41.5 FIX_ISAMAP_END, 41.6 FIX_ISAMAP_BEGIN = FIX_ISAMAP_END + NR_FIX_ISAMAPS - 1, 41.7 + __end_of_permanent_fixed_addresses, 41.8 + /* temporary boot-time mappings, used before ioremap() is functional */ 41.9 +#define NR_FIX_BTMAPS 16 41.10 + FIX_BTMAP_END = __end_of_permanent_fixed_addresses, 41.11 + FIX_BTMAP_BEGIN = FIX_BTMAP_END + NR_FIX_BTMAPS - 1, 41.12 __end_of_fixed_addresses 41.13 }; 41.14
42.1 --- a/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/io.h Mon Mar 05 12:49:12 2007 -0600 42.2 +++ b/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/io.h Thu Mar 08 14:39:52 2007 -0600 42.3 @@ -150,8 +150,10 @@ static inline void __iomem * ioremap (un 42.4 return __ioremap(offset, size, 0); 42.5 } 42.6 42.7 -extern void *early_ioremap(unsigned long addr, unsigned long size); 42.8 -extern void early_iounmap(void *addr, unsigned long size); 42.9 +extern void *bt_ioremap(unsigned long addr, unsigned long size); 42.10 +extern void bt_iounmap(void *addr, unsigned long size); 42.11 +#define early_ioremap bt_ioremap 42.12 +#define early_iounmap bt_iounmap 42.13 42.14 /* 42.15 * This one maps high address device memory and turns off caching for that area.
43.1 --- a/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/pgtable.h Mon Mar 05 12:49:12 2007 -0600 43.2 +++ b/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/pgtable.h Thu Mar 08 14:39:52 2007 -0600 43.3 @@ -403,19 +403,6 @@ static inline int pmd_large(pmd_t pte) { 43.4 /* to find an entry in a page-table-directory. */ 43.5 #define pud_index(address) (((address) >> PUD_SHIFT) & (PTRS_PER_PUD-1)) 43.6 #define pud_offset(pgd, address) ((pud_t *) pgd_page(*(pgd)) + pud_index(address)) 43.7 -static inline pud_t *__pud_offset_k(pud_t *pud, unsigned long address) 43.8 -{ 43.9 - return pud + pud_index(address); 43.10 -} 43.11 - 43.12 -/* Find correct pud via the hidden fourth level page level: */ 43.13 - 43.14 -/* This accesses the reference page table of the boot cpu. 43.15 - Other CPUs get synced lazily via the page fault handler. */ 43.16 -static inline pud_t *pud_offset_k(pgd_t *pgd, unsigned long address) 43.17 -{ 43.18 - return pud_offset(pgd_offset_k(address), address); 43.19 -} 43.20 43.21 /* PMD - Level 2 access */ 43.22 #define pmd_page_kernel(pmd) ((unsigned long) __va(pmd_val(pmd) & PTE_MASK))
44.1 --- a/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/processor.h Mon Mar 05 12:49:12 2007 -0600 44.2 +++ b/linux-2.6-xen-sparse/include/asm-x86_64/mach-xen/asm/processor.h Thu Mar 08 14:39:52 2007 -0600 44.3 @@ -246,11 +246,13 @@ DECLARE_PER_CPU(struct tss_struct,init_t 44.4 44.5 44.6 extern struct cpuinfo_x86 boot_cpu_data; 44.7 +#ifndef CONFIG_X86_NO_TSS 44.8 /* Save the original ist values for checking stack pointers during debugging */ 44.9 struct orig_ist { 44.10 unsigned long ist[7]; 44.11 }; 44.12 DECLARE_PER_CPU(struct orig_ist, orig_ist); 44.13 +#endif 44.14 44.15 #ifdef CONFIG_X86_VSMP 44.16 #define ARCH_MIN_TASKALIGN (1 << INTERNODE_CACHE_SHIFT)
45.1 --- a/linux-2.6-xen-sparse/include/linux/page-flags.h Mon Mar 05 12:49:12 2007 -0600 45.2 +++ b/linux-2.6-xen-sparse/include/linux/page-flags.h Thu Mar 08 14:39:52 2007 -0600 45.3 @@ -252,14 +252,14 @@ 45.4 #define PageForeign(page) test_bit(PG_foreign, &(page)->flags) 45.5 #define SetPageForeign(page, dtor) do { \ 45.6 set_bit(PG_foreign, &(page)->flags); \ 45.7 - (page)->mapping = (void *)dtor; \ 45.8 + (page)->index = (long)(dtor); \ 45.9 } while (0) 45.10 #define ClearPageForeign(page) do { \ 45.11 clear_bit(PG_foreign, &(page)->flags); \ 45.12 - (page)->mapping = NULL; \ 45.13 + (page)->index = 0; \ 45.14 } while (0) 45.15 #define PageForeignDestructor(page) \ 45.16 - ( (void (*) (struct page *)) (page)->mapping )(page) 45.17 + ( (void (*) (struct page *)) (page)->index )(page) 45.18 45.19 struct page; /* forward declaration */ 45.20
46.1 --- a/linux-2.6-xen-sparse/include/xen/cpu_hotplug.h Mon Mar 05 12:49:12 2007 -0600 46.2 +++ b/linux-2.6-xen-sparse/include/xen/cpu_hotplug.h Thu Mar 08 14:39:52 2007 -0600 46.3 @@ -4,7 +4,7 @@ 46.4 #include <linux/kernel.h> 46.5 #include <linux/cpumask.h> 46.6 46.7 -#if defined(CONFIG_X86) 46.8 +#if defined(CONFIG_X86) && defined(CONFIG_SMP) 46.9 extern cpumask_t cpu_initialized_map; 46.10 #define cpu_set_initialized(cpu) cpu_set(cpu, cpu_initialized_map) 46.11 #else
47.1 --- a/linux-2.6-xen-sparse/mm/Kconfig Mon Mar 05 12:49:12 2007 -0600 47.2 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 47.3 @@ -1,157 +0,0 @@ 47.4 -config SELECT_MEMORY_MODEL 47.5 - def_bool y 47.6 - depends on EXPERIMENTAL || ARCH_SELECT_MEMORY_MODEL 47.7 - 47.8 -choice 47.9 - prompt "Memory model" 47.10 - depends on SELECT_MEMORY_MODEL 47.11 - default DISCONTIGMEM_MANUAL if ARCH_DISCONTIGMEM_DEFAULT 47.12 - default SPARSEMEM_MANUAL if ARCH_SPARSEMEM_DEFAULT 47.13 - default FLATMEM_MANUAL 47.14 - 47.15 -config FLATMEM_MANUAL 47.16 - bool "Flat Memory" 47.17 - depends on !(ARCH_DISCONTIGMEM_ENABLE || ARCH_SPARSEMEM_ENABLE) || ARCH_FLATMEM_ENABLE 47.18 - help 47.19 - This option allows you to change some of the ways that 47.20 - Linux manages its memory internally. Most users will 47.21 - only have one option here: FLATMEM. This is normal 47.22 - and a correct option. 47.23 - 47.24 - Some users of more advanced features like NUMA and 47.25 - memory hotplug may have different options here. 47.26 - DISCONTIGMEM is an more mature, better tested system, 47.27 - but is incompatible with memory hotplug and may suffer 47.28 - decreased performance over SPARSEMEM. If unsure between 47.29 - "Sparse Memory" and "Discontiguous Memory", choose 47.30 - "Discontiguous Memory". 47.31 - 47.32 - If unsure, choose this option (Flat Memory) over any other. 47.33 - 47.34 -config DISCONTIGMEM_MANUAL 47.35 - bool "Discontiguous Memory" 47.36 - depends on ARCH_DISCONTIGMEM_ENABLE 47.37 - help 47.38 - This option provides enhanced support for discontiguous 47.39 - memory systems, over FLATMEM. These systems have holes 47.40 - in their physical address spaces, and this option provides 47.41 - more efficient handling of these holes. However, the vast 47.42 - majority of hardware has quite flat address spaces, and 47.43 - can have degraded performance from extra overhead that 47.44 - this option imposes. 47.45 - 47.46 - Many NUMA configurations will have this as the only option. 47.47 - 47.48 - If unsure, choose "Flat Memory" over this option. 47.49 - 47.50 -config SPARSEMEM_MANUAL 47.51 - bool "Sparse Memory" 47.52 - depends on ARCH_SPARSEMEM_ENABLE 47.53 - help 47.54 - This will be the only option for some systems, including 47.55 - memory hotplug systems. This is normal. 47.56 - 47.57 - For many other systems, this will be an alternative to 47.58 - "Discontiguous Memory". This option provides some potential 47.59 - performance benefits, along with decreased code complexity, 47.60 - but it is newer, and more experimental. 47.61 - 47.62 - If unsure, choose "Discontiguous Memory" or "Flat Memory" 47.63 - over this option. 47.64 - 47.65 -endchoice 47.66 - 47.67 -config DISCONTIGMEM 47.68 - def_bool y 47.69 - depends on (!SELECT_MEMORY_MODEL && ARCH_DISCONTIGMEM_ENABLE) || DISCONTIGMEM_MANUAL 47.70 - 47.71 -config SPARSEMEM 47.72 - def_bool y 47.73 - depends on SPARSEMEM_MANUAL 47.74 - 47.75 -config FLATMEM 47.76 - def_bool y 47.77 - depends on (!DISCONTIGMEM && !SPARSEMEM) || FLATMEM_MANUAL 47.78 - 47.79 -config FLAT_NODE_MEM_MAP 47.80 - def_bool y 47.81 - depends on !SPARSEMEM 47.82 - 47.83 -# 47.84 -# Both the NUMA code and DISCONTIGMEM use arrays of pg_data_t's 47.85 -# to represent different areas of memory. This variable allows 47.86 -# those dependencies to exist individually. 47.87 -# 47.88 -config NEED_MULTIPLE_NODES 47.89 - def_bool y 47.90 - depends on DISCONTIGMEM || NUMA 47.91 - 47.92 -config HAVE_MEMORY_PRESENT 47.93 - def_bool y 47.94 - depends on ARCH_HAVE_MEMORY_PRESENT || SPARSEMEM 47.95 - 47.96 -# 47.97 -# SPARSEMEM_EXTREME (which is the default) does some bootmem 47.98 -# allocations when memory_present() is called. If this can not 47.99 -# be done on your architecture, select this option. However, 47.100 -# statically allocating the mem_section[] array can potentially 47.101 -# consume vast quantities of .bss, so be careful. 47.102 -# 47.103 -# This option will also potentially produce smaller runtime code 47.104 -# with gcc 3.4 and later. 47.105 -# 47.106 -config SPARSEMEM_STATIC 47.107 - def_bool n 47.108 - 47.109 -# 47.110 -# Architectecture platforms which require a two level mem_section in SPARSEMEM 47.111 -# must select this option. This is usually for architecture platforms with 47.112 -# an extremely sparse physical address space. 47.113 -# 47.114 -config SPARSEMEM_EXTREME 47.115 - def_bool y 47.116 - depends on SPARSEMEM && !SPARSEMEM_STATIC 47.117 - 47.118 -# eventually, we can have this option just 'select SPARSEMEM' 47.119 -config MEMORY_HOTPLUG 47.120 - bool "Allow for memory hot-add" 47.121 - depends on SPARSEMEM && HOTPLUG && !SOFTWARE_SUSPEND && ARCH_ENABLE_MEMORY_HOTPLUG 47.122 - depends on (IA64 || X86 || PPC64) 47.123 - 47.124 -comment "Memory hotplug is currently incompatible with Software Suspend" 47.125 - depends on SPARSEMEM && HOTPLUG && SOFTWARE_SUSPEND 47.126 - 47.127 -# Heavily threaded applications may benefit from splitting the mm-wide 47.128 -# page_table_lock, so that faults on different parts of the user address 47.129 -# space can be handled with less contention: split it at this NR_CPUS. 47.130 -# Default to 4 for wider testing, though 8 might be more appropriate. 47.131 -# ARM's adjust_pte (unused if VIPT) depends on mm-wide page_table_lock. 47.132 -# PA-RISC 7xxx's spinlock_t would enlarge struct page from 32 to 44 bytes. 47.133 -# XEN on x86 architecture uses the mapping field on pagetable pages to store a 47.134 -# pointer to the destructor. This conflicts with pte_lock_deinit(). 47.135 -# 47.136 -config SPLIT_PTLOCK_CPUS 47.137 - int 47.138 - default "4096" if ARM && !CPU_CACHE_VIPT 47.139 - default "4096" if PARISC && !PA20 47.140 - default "4096" if X86_XEN || X86_64_XEN 47.141 - default "4" 47.142 - 47.143 -# 47.144 -# support for page migration 47.145 -# 47.146 -config MIGRATION 47.147 - bool "Page migration" 47.148 - def_bool y 47.149 - depends on NUMA 47.150 - help 47.151 - Allows the migration of the physical location of pages of processes 47.152 - while the virtual addresses are not changed. This is useful for 47.153 - example on NUMA systems to put pages nearer to the processors accessing 47.154 - the page. 47.155 - 47.156 -config RESOURCES_64BIT 47.157 - bool "64 bit Memory and IO resources (EXPERIMENTAL)" if (!64BIT && EXPERIMENTAL) 47.158 - default 64BIT 47.159 - help 47.160 - This option allows memory and IO resources to be 64 bit.
48.1 --- a/patches/linux-2.6.18/blktap-aio-16_03_06.patch Mon Mar 05 12:49:12 2007 -0600 48.2 +++ b/patches/linux-2.6.18/blktap-aio-16_03_06.patch Thu Mar 08 14:39:52 2007 -0600 48.3 @@ -106,7 +106,7 @@ diff -pruN ../orig-linux-2.6.18/fs/aio.c 48.4 + return pollflags; 48.5 +} 48.6 + 48.7 -+static struct file_operations aioq_fops = { 48.8 ++static const struct file_operations aioq_fops = { 48.9 + .release = aio_queue_fd_close, 48.10 + .poll = aio_queue_fd_poll 48.11 +}; 48.12 @@ -201,7 +201,7 @@ diff -pruN ../orig-linux-2.6.18/fs/event 48.13 int maxevents, long timeout); 48.14 static int eventpollfs_delete_dentry(struct dentry *dentry); 48.15 -static struct inode *ep_eventpoll_inode(void); 48.16 -+static struct inode *ep_eventpoll_inode(struct file_operations *fops); 48.17 ++static struct inode *ep_eventpoll_inode(const struct file_operations *fops); 48.18 static int eventpollfs_get_sb(struct file_system_type *fs_type, 48.19 int flags, const char *dev_name, 48.20 void *data, struct vfsmount *mnt); 48.21 @@ -221,7 +221,7 @@ diff -pruN ../orig-linux-2.6.18/fs/event 48.22 -static int ep_getfd(int *efd, struct inode **einode, struct file **efile, 48.23 - struct eventpoll *ep) 48.24 +int ep_getfd(int *efd, struct inode **einode, struct file **efile, 48.25 -+ struct eventpoll *ep, struct file_operations *fops) 48.26 ++ struct eventpoll *ep, const struct file_operations *fops) 48.27 { 48.28 struct qstr this; 48.29 char name[32]; 48.30 @@ -248,7 +248,7 @@ diff -pruN ../orig-linux-2.6.18/fs/event 48.31 48.32 48.33 -static struct inode *ep_eventpoll_inode(void) 48.34 -+static struct inode *ep_eventpoll_inode(struct file_operations *fops) 48.35 ++static struct inode *ep_eventpoll_inode(const struct file_operations *fops) 48.36 { 48.37 int error = -ENOMEM; 48.38 struct inode *inode = new_inode(eventpoll_mnt->mnt_sb); 48.39 @@ -288,7 +288,7 @@ diff -pruN ../orig-linux-2.6.18/include/ 48.40 + */ 48.41 +struct eventpoll; 48.42 +int ep_getfd(int *efd, struct inode **einode, struct file **efile, 48.43 -+ struct eventpoll *ep, struct file_operations *fops); 48.44 ++ struct eventpoll *ep, const struct file_operations *fops); 48.45 #else 48.46 48.47 static inline void eventpoll_init_file(struct file *file) {}
49.1 --- a/tools/Makefile Mon Mar 05 12:49:12 2007 -0600 49.2 +++ b/tools/Makefile Thu Mar 08 14:39:52 2007 -0600 49.3 @@ -24,9 +24,8 @@ SUBDIRS-$(LIBXENAPI_BINDINGS) += libxen 49.4 49.5 # These don't cross-compile 49.6 ifeq ($(XEN_COMPILE_ARCH),$(XEN_TARGET_ARCH)) 49.7 -SUBDIRS-y += python 49.8 -SUBDIRS-y += pygrub 49.9 -SUBDIRS-y += ptsname 49.10 +SUBDIRS-$(PYTHON_TOOLS) += python 49.11 +SUBDIRS-$(PYTHON_TOOLS) += pygrub 49.12 endif 49.13 49.14 .PHONY: all 49.15 @@ -42,8 +41,8 @@ install: check 49.16 $(MAKE) -C $$subdir $@; \ 49.17 done 49.18 $(MAKE) ioemuinstall 49.19 - $(INSTALL_DIR) -p $(DESTDIR)/var/xen/dump 49.20 - $(INSTALL_DIR) -p $(DESTDIR)/var/log/xen 49.21 + $(INSTALL_DIR) $(DESTDIR)/var/xen/dump 49.22 + $(INSTALL_DIR) $(DESTDIR)/var/log/xen 49.23 49.24 .PHONY: clean 49.25 clean: check_clean
50.1 --- a/tools/blktap/lib/Makefile Mon Mar 05 12:49:12 2007 -0600 50.2 +++ b/tools/blktap/lib/Makefile Thu Mar 08 14:39:52 2007 -0600 50.3 @@ -40,8 +40,8 @@ build: libblktap.a 50.4 libblktap: libblktap.a 50.5 50.6 install: all 50.7 - $(INSTALL_DIR) -p $(DESTDIR)/usr/$(LIBDIR) 50.8 - $(INSTALL_DIR) -p $(DESTDIR)/usr/include 50.9 + $(INSTALL_DIR) $(DESTDIR)/usr/$(LIBDIR) 50.10 + $(INSTALL_DIR) $(DESTDIR)/usr/include 50.11 $(INSTALL_PROG) $(LIB) $(DESTDIR)/usr/$(LIBDIR) 50.12 ln -sf libblktap.so.$(MAJOR).$(MINOR) $(DESTDIR)/usr/$(LIBDIR)/libblktap.so.$(MAJOR) 50.13 ln -sf libblktap.so.$(MAJOR) $(DESTDIR)/usr/$(LIBDIR)/libblktap.so
51.1 --- a/tools/console/Makefile Mon Mar 05 12:49:12 2007 -0600 51.2 +++ b/tools/console/Makefile Thu Mar 08 14:39:52 2007 -0600 51.3 @@ -30,7 +30,7 @@ xenconsole: $(patsubst %.c,%.o,$(wildcar 51.4 51.5 .PHONY: install 51.6 install: $(BIN) 51.7 - $(INSTALL_DIR) -p $(DESTDIR)/$(DAEMON_INSTALL_DIR) 51.8 + $(INSTALL_DIR) $(DESTDIR)/$(DAEMON_INSTALL_DIR) 51.9 $(INSTALL_PROG) xenconsoled $(DESTDIR)/$(DAEMON_INSTALL_DIR) 51.10 - $(INSTALL_DIR) -p $(DESTDIR)/$(CLIENT_INSTALL_DIR) 51.11 + $(INSTALL_DIR) $(DESTDIR)/$(CLIENT_INSTALL_DIR) 51.12 $(INSTALL_PROG) xenconsole $(DESTDIR)/$(CLIENT_INSTALL_DIR)
52.1 --- a/tools/firmware/rombios/rombios.c Mon Mar 05 12:49:12 2007 -0600 52.2 +++ b/tools/firmware/rombios/rombios.c Thu Mar 08 14:39:52 2007 -0600 52.3 @@ -890,7 +890,7 @@ static void int14_function(); 52.4 static void int15_function(); 52.5 static void int16_function(); 52.6 static void int17_function(); 52.7 -static void int19_function(); 52.8 +static void int18_function(); 52.9 static void int1a_function(); 52.10 static void int70_function(); 52.11 static void int74_function(); 52.12 @@ -1837,6 +1837,38 @@ keyboard_panic(status) 52.13 } 52.14 52.15 //-------------------------------------------------------------------------- 52.16 +// machine_reset 52.17 +//-------------------------------------------------------------------------- 52.18 + void 52.19 +machine_reset() 52.20 +{ 52.21 + /* Frob the keyboard reset line to reset the processor */ 52.22 + outb(0x64, 0x60); /* Map the flags register at data port (0x60) */ 52.23 + outb(0x60, 0x14); /* Set the flags to system|disable */ 52.24 + outb(0x64, 0xfe); /* Pulse output 0 (system reset) low */ 52.25 + BX_PANIC("Couldn't reset the machine\n"); 52.26 +} 52.27 + 52.28 +//-------------------------------------------------------------------------- 52.29 +// clobber_entry_point 52.30 +// Because PV drivers in HVM guests detach some of the emulated devices, 52.31 +// it is not safe to do a soft reboot by just dropping to real mode and 52.32 +// jumping at ffff:0000. -- the boot drives might have disappeared! 52.33 +// This rather foul function overwrites(!) the BIOS entry point 52.34 +// to point at machine-reset, which will cause the Xen tools to 52.35 +// rebuild the whole machine from scratch. 52.36 +//-------------------------------------------------------------------------- 52.37 + void 52.38 +clobber_entry_point() 52.39 +{ 52.40 + /* The instruction at the entry point is one byte (0xea) for the 52.41 + * jump opcode, then two bytes of address, then two of segment. 52.42 + * Overwrite the address bytes.*/ 52.43 + write_word(0xffff, 0x0001, machine_reset); 52.44 +} 52.45 + 52.46 + 52.47 +//-------------------------------------------------------------------------- 52.48 // shutdown_status_panic 52.49 // called when the shutdown statsu is not implemented, displays the status 52.50 //-------------------------------------------------------------------------- 52.51 @@ -7626,7 +7658,7 @@ int17_function(regs, ds, iret_addr) 52.52 } 52.53 52.54 void 52.55 -int19_function(seq_nr) 52.56 +int18_function(seq_nr) 52.57 Bit16u seq_nr; 52.58 { 52.59 Bit16u ebda_seg=read_word(0x0040,0x000E); 52.60 @@ -7702,8 +7734,8 @@ ASM_START 52.61 push cx 52.62 push dx 52.63 52.64 - mov dl, _int19_function.bootdrv + 2[bp] 52.65 - mov ax, _int19_function.bootseg + 2[bp] 52.66 + mov dl, _int18_function.bootdrv + 2[bp] 52.67 + mov ax, _int18_function.bootseg + 2[bp] 52.68 mov es, ax ;; segment 52.69 mov bx, #0x0000 ;; offset 52.70 mov ah, #0x02 ;; function 2, read diskette sector 52.71 @@ -7714,7 +7746,7 @@ ASM_START 52.72 int #0x13 ;; read sector 52.73 jnc int19_load_done 52.74 mov ax, #0x0001 52.75 - mov _int19_function.status + 2[bp], ax 52.76 + mov _int18_function.status + 2[bp], ax 52.77 52.78 int19_load_done: 52.79 pop dx 52.80 @@ -7789,13 +7821,13 @@ ASM_START 52.81 ;; Build an iret stack frame that will take us to the boot vector. 52.82 ;; iret pops ip, then cs, then flags, so push them in the opposite order. 52.83 pushf 52.84 - mov ax, _int19_function.bootseg + 0[bp] 52.85 + mov ax, _int18_function.bootseg + 0[bp] 52.86 push ax 52.87 - mov ax, _int19_function.bootip + 0[bp] 52.88 + mov ax, _int18_function.bootip + 0[bp] 52.89 push ax 52.90 ;; Set the magic number in ax and the boot drive in dl. 52.91 mov ax, #0xaa55 52.92 - mov dl, _int19_function.bootdrv + 0[bp] 52.93 + mov dl, _int18_function.bootdrv + 0[bp] 52.94 ;; Zero some of the other registers. 52.95 xor bx, bx 52.96 mov ds, bx 52.97 @@ -8272,6 +8304,8 @@ int18_handler: ;; Boot Failure recovery: 52.98 mov ss, ax 52.99 52.100 ;; Get the boot sequence number out of the IPL memory 52.101 + ;; The first time we do this it will have been set to -1 so 52.102 + ;; we will start from device 0. 52.103 mov bx, #IPL_SEG 52.104 mov ds, bx ;; Set segment 52.105 mov bx, IPL_SEQUENCE_OFFSET ;; BX is now the sequence number 52.106 @@ -8279,43 +8313,33 @@ int18_handler: ;; Boot Failure recovery: 52.107 mov IPL_SEQUENCE_OFFSET, bx ;; Write it back 52.108 mov ds, ax ;; and reset the segment to zero. 52.109 52.110 - ;; Carry on in the INT 19h handler, using the new sequence number 52.111 + ;; Call the C code for the next boot device 52.112 push bx 52.113 - 52.114 - jmp int19_next_boot 52.115 + call _int18_function 52.116 + 52.117 + ;; Boot failed: invoke the boot recovery function... 52.118 + int #0x18 52.119 52.120 ;---------- 52.121 ;- INT19h - 52.122 ;---------- 52.123 int19_relocated: ;; Boot function, relocated 52.124 - 52.125 - ;; int19 was beginning to be really complex, so now it 52.126 - ;; just calls a C function that does the work 52.127 - 52.128 - push bp 52.129 - mov bp, sp 52.130 - 52.131 - ;; Reset SS and SP 52.132 + ;; 52.133 + ;; *** Warning: INT 19h resets the whole machine *** 52.134 + ;; 52.135 + ;; Because PV drivers in HVM guests detach some of the emulated devices, 52.136 + ;; it is not safe to do a soft reboot by just dropping to real mode and 52.137 + ;; invoking INT 19h -- the boot drives might have disappeared! 52.138 + ;; If the user asks for a soft reboot, the only thing we can do is 52.139 + ;; reset the whole machine. When it comes back up, the normal BIOS 52.140 + ;; boot sequence will start, which is more or less the required behaviour. 52.141 + ;; 52.142 + ;; Reset SP and SS 52.143 mov ax, #0xfffe 52.144 mov sp, ax 52.145 xor ax, ax 52.146 mov ss, ax 52.147 - 52.148 - ;; Start from the first boot device (0, in AX) 52.149 - mov bx, #IPL_SEG 52.150 - mov ds, bx ;; Set segment to write to the IPL memory 52.151 - mov IPL_SEQUENCE_OFFSET, ax ;; Save the sequence number 52.152 - mov ds, ax ;; and reset the segment. 52.153 - 52.154 - push ax 52.155 - 52.156 -int19_next_boot: 52.157 - 52.158 - ;; Call the C code for the next boot device 52.159 - call _int19_function 52.160 - 52.161 - ;; Boot failed: invoke the boot recovery function 52.162 - int #0x18 52.163 + call _machine_reset 52.164 52.165 ;---------- 52.166 ;- INT1Ch - 52.167 @@ -9609,6 +9633,8 @@ normal_post: 52.168 52.169 call _log_bios_start 52.170 52.171 + call _clobber_entry_point 52.172 + 52.173 ;; set all interrupts to default handler 52.174 mov bx, #0x0000 ;; offset index 52.175 mov cx, #0x0100 ;; counter (256 interrupts) 52.176 @@ -9857,8 +9883,10 @@ post_default_ints: 52.177 call _tcpa_calling_int19h /* specs: 8.2.3 step 1 */ 52.178 call _tcpa_add_event_separators /* specs: 8.2.3 step 2 */ 52.179 #endif 52.180 - int #0x19 52.181 - //JMP_EP(0x0064) ; INT 19h location 52.182 + 52.183 + ;; Start the boot sequence. See the comments in int19_relocated 52.184 + ;; for why we use INT 18h instead of INT 19h here. 52.185 + int #0x18 52.186 52.187 #if BX_TCGBIOS 52.188 call _tcpa_returned_int19h /* specs: 8.2.3 step 3/7 */
53.1 --- a/tools/guest-headers/Makefile Mon Mar 05 12:49:12 2007 -0600 53.2 +++ b/tools/guest-headers/Makefile Thu Mar 08 14:39:52 2007 -0600 53.3 @@ -13,7 +13,7 @@ check: 53.4 53.5 install-Linux: 53.6 mkdir -p $(DESTDIR)/usr/include/xen/linux 53.7 - install -m0644 $(linuxsparsetree)/include/xen/public/*.h $(DESTDIR)/usr/include/xen/linux 53.8 + $(INSTALL_DATA) $(linuxsparsetree)/include/xen/public/*.h $(DESTDIR)/usr/include/xen/linux 53.9 53.10 install-SunOS: 53.11
54.1 --- a/tools/ioemu/Makefile Mon Mar 05 12:49:12 2007 -0600 54.2 +++ b/tools/ioemu/Makefile Thu Mar 08 14:39:52 2007 -0600 54.3 @@ -65,10 +65,10 @@ common de-ch es fo fr-ca hu 54.4 54.5 install-doc: $(DOCS) 54.6 mkdir -p "$(DESTDIR)$(docdir)" 54.7 - $(INSTALL) -m 644 qemu-doc.html qemu-tech.html "$(DESTDIR)$(docdir)" 54.8 + $(INSTALL_DATA) qemu-doc.html qemu-tech.html "$(DESTDIR)$(docdir)" 54.9 ifndef CONFIG_WIN32 54.10 mkdir -p "$(DESTDIR)$(mandir)/man1" 54.11 - $(INSTALL) qemu.1 qemu-img.1 "$(DESTDIR)$(mandir)/man1" 54.12 + $(INSTALL_DATA) qemu.1 qemu-img.1 "$(DESTDIR)$(mandir)/man1" 54.13 endif 54.14 54.15 install: all $(if $(BUILD_DOCS),install-doc) 54.16 @@ -77,12 +77,12 @@ install: all $(if $(BUILD_DOCS),install- 54.17 # mkdir -p "$(DESTDIR)$(datadir)" 54.18 # for x in bios.bin vgabios.bin vgabios-cirrus.bin ppc_rom.bin \ 54.19 # video.x openbios-sparc32 linux_boot.bin; do \ 54.20 -# $(INSTALL) -m 644 $(SRC_PATH)/pc-bios/$$x "$(DESTDIR)$(datadir)"; \ 54.21 +# $(INSTALL_DATA) $(SRC_PATH)/pc-bios/$$x "$(DESTDIR)$(datadir)"; \ 54.22 # done 54.23 ifndef CONFIG_WIN32 54.24 mkdir -p "$(DESTDIR)$(datadir)/keymaps" 54.25 for x in $(KEYMAPS); do \ 54.26 - $(INSTALL) -m 644 $(SRC_PATH)/keymaps/$$x "$(DESTDIR)$(datadir)/keymaps"; \ 54.27 + $(INSTALL_DATA) $(SRC_PATH)/keymaps/$$x "$(DESTDIR)$(datadir)/keymaps"; \ 54.28 done 54.29 endif 54.30 for d in $(TARGET_DIRS); do \
55.1 --- a/tools/ioemu/hw/ide.c Mon Mar 05 12:49:12 2007 -0600 55.2 +++ b/tools/ioemu/hw/ide.c Thu Mar 08 14:39:52 2007 -0600 55.3 @@ -2602,6 +2602,120 @@ void pci_cmd646_ide_init(PCIBus *bus, Bl 55.4 #endif /* DMA_MULTI_THREAD */ 55.5 } 55.6 55.7 +static void pci_ide_save(QEMUFile* f, void *opaque) 55.8 +{ 55.9 + PCIIDEState *d = opaque; 55.10 + int i; 55.11 + 55.12 + for(i = 0; i < 2; i++) { 55.13 + BMDMAState *bm = &d->bmdma[i]; 55.14 + qemu_put_8s(f, &bm->cmd); 55.15 + qemu_put_8s(f, &bm->status); 55.16 + qemu_put_be32s(f, &bm->addr); 55.17 + /* XXX: if a transfer is pending, we do not save it yet */ 55.18 + } 55.19 + 55.20 + /* per IDE interface data */ 55.21 + for(i = 0; i < 2; i++) { 55.22 + IDEState *s = &d->ide_if[i * 2]; 55.23 + uint8_t drive1_selected; 55.24 + qemu_put_8s(f, &s->cmd); 55.25 + drive1_selected = (s->cur_drive != s); 55.26 + qemu_put_8s(f, &drive1_selected); 55.27 + } 55.28 + 55.29 + /* per IDE drive data */ 55.30 + for(i = 0; i < 4; i++) { 55.31 + IDEState *s = &d->ide_if[i]; 55.32 + qemu_put_be32s(f, &s->mult_sectors); 55.33 + qemu_put_be32s(f, &s->identify_set); 55.34 + if (s->identify_set) { 55.35 + qemu_put_buffer(f, (const uint8_t *)s->identify_data, 512); 55.36 + } 55.37 + qemu_put_8s(f, &s->write_cache); 55.38 + qemu_put_8s(f, &s->feature); 55.39 + qemu_put_8s(f, &s->error); 55.40 + qemu_put_be32s(f, &s->nsector); 55.41 + qemu_put_8s(f, &s->sector); 55.42 + qemu_put_8s(f, &s->lcyl); 55.43 + qemu_put_8s(f, &s->hcyl); 55.44 + qemu_put_8s(f, &s->hob_feature); 55.45 + qemu_put_8s(f, &s->hob_nsector); 55.46 + qemu_put_8s(f, &s->hob_sector); 55.47 + qemu_put_8s(f, &s->hob_lcyl); 55.48 + qemu_put_8s(f, &s->hob_hcyl); 55.49 + qemu_put_8s(f, &s->select); 55.50 + qemu_put_8s(f, &s->status); 55.51 + qemu_put_8s(f, &s->lba48); 55.52 + 55.53 + qemu_put_8s(f, &s->sense_key); 55.54 + qemu_put_8s(f, &s->asc); 55.55 + /* XXX: if a transfer is pending, we do not save it yet */ 55.56 + } 55.57 +} 55.58 + 55.59 +static int pci_ide_load(QEMUFile* f, void *opaque, int version_id) 55.60 +{ 55.61 + PCIIDEState *d = opaque; 55.62 + int ret, i; 55.63 + 55.64 + if (version_id != 1) 55.65 + return -EINVAL; 55.66 + 55.67 + for(i = 0; i < 2; i++) { 55.68 + BMDMAState *bm = &d->bmdma[i]; 55.69 + qemu_get_8s(f, &bm->cmd); 55.70 + qemu_get_8s(f, &bm->status); 55.71 + qemu_get_be32s(f, &bm->addr); 55.72 + /* XXX: if a transfer is pending, we do not save it yet */ 55.73 + } 55.74 + 55.75 + /* per IDE interface data */ 55.76 + for(i = 0; i < 2; i++) { 55.77 + IDEState *s = &d->ide_if[i * 2]; 55.78 + uint8_t drive1_selected; 55.79 + qemu_get_8s(f, &s->cmd); 55.80 + qemu_get_8s(f, &drive1_selected); 55.81 + s->cur_drive = &d->ide_if[i * 2 + (drive1_selected != 0)]; 55.82 + } 55.83 + 55.84 + /* per IDE drive data */ 55.85 + for(i = 0; i < 4; i++) { 55.86 + IDEState *s = &d->ide_if[i]; 55.87 + qemu_get_be32s(f, &s->mult_sectors); 55.88 + qemu_get_be32s(f, &s->identify_set); 55.89 + if (s->identify_set) { 55.90 + qemu_get_buffer(f, (uint8_t *)s->identify_data, 512); 55.91 + } 55.92 + qemu_get_8s(f, &s->write_cache); 55.93 + qemu_get_8s(f, &s->feature); 55.94 + qemu_get_8s(f, &s->error); 55.95 + qemu_get_be32s(f, &s->nsector); 55.96 + qemu_get_8s(f, &s->sector); 55.97 + qemu_get_8s(f, &s->lcyl); 55.98 + qemu_get_8s(f, &s->hcyl); 55.99 + qemu_get_8s(f, &s->hob_feature); 55.100 + qemu_get_8s(f, &s->hob_nsector); 55.101 + qemu_get_8s(f, &s->hob_sector); 55.102 + qemu_get_8s(f, &s->hob_lcyl); 55.103 + qemu_get_8s(f, &s->hob_hcyl); 55.104 + qemu_get_8s(f, &s->select); 55.105 + qemu_get_8s(f, &s->status); 55.106 + qemu_get_8s(f, &s->lba48); 55.107 + 55.108 + qemu_get_8s(f, &s->sense_key); 55.109 + qemu_get_8s(f, &s->asc); 55.110 + /* XXX: if a transfer is pending, we do not save it yet */ 55.111 + if (s->status & (DRQ_STAT|BUSY_STAT)) { 55.112 + /* Tell the guest that its transfer has gone away */ 55.113 + ide_abort_command(s); 55.114 + ide_set_irq(s); 55.115 + } 55.116 + } 55.117 + return 0; 55.118 +} 55.119 + 55.120 + 55.121 /* hd_table must contain 4 block drivers */ 55.122 /* NOTE: for the PIIX3, the IRQs and IOports are hardcoded */ 55.123 void pci_piix3_ide_init(PCIBus *bus, BlockDriverState **hd_table, int devfn) 55.124 @@ -2643,6 +2757,7 @@ void pci_piix3_ide_init(PCIBus *bus, Blo 55.125 buffered_pio_init(); 55.126 55.127 register_savevm("ide_pci", 0, 1, generic_pci_save, generic_pci_load, d); 55.128 + register_savevm("ide", 0, 1, pci_ide_save, pci_ide_load, d); 55.129 55.130 #ifdef DMA_MULTI_THREAD 55.131 dma_create_thread();
56.1 --- a/tools/ioemu/target-i386-dm/qemu-ifup Mon Mar 05 12:49:12 2007 -0600 56.2 +++ b/tools/ioemu/target-i386-dm/qemu-ifup Thu Mar 08 14:39:52 2007 -0600 56.3 @@ -3,8 +3,7 @@ 56.4 #. /etc/rc.d/init.d/functions 56.5 #ulimit -c unlimited 56.6 56.7 -echo -c 'config qemu network with xen bridge for ' 56.8 -echo $* 56.9 +echo 'config qemu network with xen bridge for ' $* 56.10 56.11 ifconfig $1 0.0.0.0 up 56.12 brctl addif $2 $1
57.1 --- a/tools/ioemu/vl.c Mon Mar 05 12:49:12 2007 -0600 57.2 +++ b/tools/ioemu/vl.c Thu Mar 08 14:39:52 2007 -0600 57.3 @@ -3250,6 +3250,14 @@ static int net_tap_init(VLANState *vlan, 57.4 pid = fork(); 57.5 if (pid >= 0) { 57.6 if (pid == 0) { 57.7 + int open_max = sysconf(_SC_OPEN_MAX), i; 57.8 + for (i = 0; i < open_max; i++) 57.9 + if (i != STDIN_FILENO && 57.10 + i != STDOUT_FILENO && 57.11 + i != STDERR_FILENO && 57.12 + i != fd) 57.13 + close(i); 57.14 + 57.15 parg = args; 57.16 *parg++ = (char *)setup_script; 57.17 *parg++ = ifname;
58.1 --- a/tools/ioemu/vnc.c Mon Mar 05 12:49:12 2007 -0600 58.2 +++ b/tools/ioemu/vnc.c Thu Mar 08 14:39:52 2007 -0600 58.3 @@ -1445,7 +1445,7 @@ int vnc_display_init(DisplayState *ds, i 58.4 58.5 int vnc_start_viewer(int port) 58.6 { 58.7 - int pid; 58.8 + int pid, i, open_max; 58.9 char s[16]; 58.10 58.11 sprintf(s, ":%d", port); 58.12 @@ -1456,6 +1456,12 @@ int vnc_start_viewer(int port) 58.13 exit(1); 58.14 58.15 case 0: /* child */ 58.16 + open_max = sysconf(_SC_OPEN_MAX); 58.17 + for (i = 0; i < open_max; i++) 58.18 + if (i != STDIN_FILENO && 58.19 + i != STDOUT_FILENO && 58.20 + i != STDERR_FILENO) 58.21 + close(i); 58.22 execlp("vncviewer", "vncviewer", s, NULL); 58.23 fprintf(stderr, "vncviewer execlp failed\n"); 58.24 exit(1);
59.1 --- a/tools/libxc/xc_core.c Mon Mar 05 12:49:12 2007 -0600 59.2 +++ b/tools/libxc/xc_core.c Thu Mar 08 14:39:52 2007 -0600 59.3 @@ -153,7 +153,7 @@ struct xc_core_section_headers { 59.4 uint16_t num; 59.5 uint16_t num_max; 59.6 59.7 - Elf_Shdr *shdrs; 59.8 + Elf64_Shdr *shdrs; 59.9 }; 59.10 #define SHDR_INIT 16 59.11 #define SHDR_INC 4 59.12 @@ -184,14 +184,14 @@ xc_core_shdr_free(struct xc_core_section 59.13 free(sheaders); 59.14 } 59.15 59.16 -Elf_Shdr* 59.17 +Elf64_Shdr* 59.18 xc_core_shdr_get(struct xc_core_section_headers *sheaders) 59.19 { 59.20 - Elf_Shdr *shdr; 59.21 + Elf64_Shdr *shdr; 59.22 59.23 if ( sheaders->num == sheaders->num_max ) 59.24 { 59.25 - Elf_Shdr *shdrs; 59.26 + Elf64_Shdr *shdrs; 59.27 if ( sheaders->num_max + SHDR_INC < sheaders->num_max ) 59.28 { 59.29 errno = E2BIG; 59.30 @@ -212,7 +212,7 @@ xc_core_shdr_get(struct xc_core_section_ 59.31 } 59.32 59.33 int 59.34 -xc_core_shdr_set(Elf_Shdr *shdr, 59.35 +xc_core_shdr_set(Elf64_Shdr *shdr, 59.36 struct xc_core_strtab *strtab, 59.37 const char *name, uint32_t type, 59.38 uint64_t offset, uint64_t size, 59.39 @@ -317,15 +317,15 @@ xc_domain_dumpcore_via_callback(int xc_h 59.40 59.41 uint64_t *pfn_array = NULL; 59.42 59.43 - Elf_Ehdr ehdr; 59.44 - unsigned long filesz; 59.45 - unsigned long offset; 59.46 - unsigned long fixup; 59.47 + Elf64_Ehdr ehdr; 59.48 + uint64_t filesz; 59.49 + uint64_t offset; 59.50 + uint64_t fixup; 59.51 59.52 struct xc_core_strtab *strtab = NULL; 59.53 uint16_t strtab_idx; 59.54 struct xc_core_section_headers *sheaders = NULL; 59.55 - Elf_Shdr *shdr; 59.56 + Elf64_Shdr *shdr; 59.57 59.58 /* elf notes */ 59.59 struct elfnote elfnote; 59.60 @@ -460,7 +460,7 @@ xc_domain_dumpcore_via_callback(int xc_h 59.61 ehdr.e_ident[EI_MAG1] = ELFMAG1; 59.62 ehdr.e_ident[EI_MAG2] = ELFMAG2; 59.63 ehdr.e_ident[EI_MAG3] = ELFMAG3; 59.64 - ehdr.e_ident[EI_CLASS] = ELFCLASS; 59.65 + ehdr.e_ident[EI_CLASS] = ELFCLASS64; 59.66 ehdr.e_ident[EI_DATA] = ELF_ARCH_DATA; 59.67 ehdr.e_ident[EI_VERSION] = EV_CURRENT; 59.68 ehdr.e_ident[EI_OSABI] = ELFOSABI_SYSV; 59.69 @@ -474,9 +474,9 @@ xc_domain_dumpcore_via_callback(int xc_h 59.70 ehdr.e_shoff = sizeof(ehdr); 59.71 ehdr.e_flags = ELF_CORE_EFLAGS; 59.72 ehdr.e_ehsize = sizeof(ehdr); 59.73 - ehdr.e_phentsize = sizeof(Elf_Phdr); 59.74 + ehdr.e_phentsize = sizeof(Elf64_Phdr); 59.75 ehdr.e_phnum = 0; 59.76 - ehdr.e_shentsize = sizeof(Elf_Shdr); 59.77 + ehdr.e_shentsize = sizeof(Elf64_Shdr); 59.78 /* ehdr.e_shnum and ehdr.e_shstrndx aren't known here yet. fill it later*/ 59.79 59.80 /* create section header */
60.1 --- a/tools/libxc/xc_core.h Mon Mar 05 12:49:12 2007 -0600 60.2 +++ b/tools/libxc/xc_core.h Thu Mar 08 14:39:52 2007 -0600 60.3 @@ -116,10 +116,10 @@ struct xen_dumpcore_p2m { 60.4 struct xc_core_strtab; 60.5 struct xc_core_section_headers; 60.6 60.7 -Elf_Shdr* 60.8 +Elf64_Shdr* 60.9 xc_core_shdr_get(struct xc_core_section_headers *sheaders); 60.10 int 60.11 -xc_core_shdr_set(Elf_Shdr *shdr, 60.12 +xc_core_shdr_set(Elf64_Shdr *shdr, 60.13 struct xc_core_strtab *strtab, 60.14 const char *name, uint32_t type, 60.15 uint64_t offset, uint64_t size,
61.1 --- a/tools/libxc/xc_core_ia64.c Mon Mar 05 12:49:12 2007 -0600 61.2 +++ b/tools/libxc/xc_core_ia64.c Thu Mar 08 14:39:52 2007 -0600 61.3 @@ -266,10 +266,10 @@ int 61.4 xc_core_arch_context_get_shdr(struct xc_core_arch_context *arch_ctxt, 61.5 struct xc_core_section_headers *sheaders, 61.6 struct xc_core_strtab *strtab, 61.7 - unsigned long *filesz, unsigned long offset) 61.8 + uint64_t *filesz, uint64_t offset) 61.9 { 61.10 int sts = -1; 61.11 - Elf_Shdr *shdr; 61.12 + Elf64_Shdr *shdr; 61.13 61.14 /* mmapped priv regs */ 61.15 shdr = xc_core_shdr_get(sheaders);
62.1 --- a/tools/libxc/xc_core_ia64.h Mon Mar 05 12:49:12 2007 -0600 62.2 +++ b/tools/libxc/xc_core_ia64.h Thu Mar 08 14:39:52 2007 -0600 62.3 @@ -42,7 +42,7 @@ int 62.4 xc_core_arch_context_get_shdr(struct xc_core_arch_context* arch_ctxt, 62.5 struct xc_core_section_headers *sheaders, 62.6 struct xc_core_strtab *strtab, 62.7 - unsigned long *filesz, unsigned long offset); 62.8 + uint64_t *filesz, uint64_t offset); 62.9 int 62.10 xc_core_arch_context_dump(struct xc_core_arch_context* arch_ctxt, 62.11 void* args, dumpcore_rtn_t dump_rtn);
63.1 --- a/tools/libxc/xc_core_x86.h Mon Mar 05 12:49:12 2007 -0600 63.2 +++ b/tools/libxc/xc_core_x86.h Thu Mar 08 14:39:52 2007 -0600 63.3 @@ -45,7 +45,7 @@ static inline int 63.4 xc_core_arch_context_get_shdr(struct xc_core_arch_context *arch_ctxt, 63.5 struct xc_core_section_headers *sheaders, 63.6 struct xc_core_strtab *strtab, 63.7 - unsigned long *filesz, unsigned long offset) 63.8 + uint64_t *filesz, uint64_t offset) 63.9 { 63.10 *filesz = 0; 63.11 return 0;
64.1 --- a/tools/libxc/xc_dom_core.c Mon Mar 05 12:49:12 2007 -0600 64.2 +++ b/tools/libxc/xc_dom_core.c Thu Mar 08 14:39:52 2007 -0600 64.3 @@ -721,9 +721,6 @@ int xc_dom_build_image(struct xc_dom_ima 64.4 } 64.5 page_size = XC_DOM_PAGE_SIZE(dom); 64.6 64.7 - /* 4MB align virtual base address */ 64.8 - dom->parms.virt_base &= ~(((uint64_t)1<<22)-1); 64.9 - 64.10 /* load kernel */ 64.11 if ( xc_dom_alloc_segment(dom, &dom->kernel_seg, "kernel", 64.12 dom->kernel_seg.vstart,
65.1 --- a/tools/libxc/xc_linux_restore.c Mon Mar 05 12:49:12 2007 -0600 65.2 +++ b/tools/libxc/xc_linux_restore.c Thu Mar 08 14:39:52 2007 -0600 65.3 @@ -19,7 +19,7 @@ static unsigned long max_mfn; 65.4 /* virtual starting address of the hypervisor */ 65.5 static unsigned long hvirt_start; 65.6 65.7 -/* #levels of page tables used by the currrent guest */ 65.8 +/* #levels of page tables used by the current guest */ 65.9 static unsigned int pt_levels; 65.10 65.11 /* total number of pages used by the current guest */ 65.12 @@ -857,6 +857,28 @@ int xc_linux_restore(int xc_handle, int 65.13 65.14 ctxt.ctrlreg[3] = xen_pfn_to_cr3(p2m[pfn]); 65.15 65.16 + /* Guest pagetable (x86/64) stored in otherwise-unused CR1. */ 65.17 + if ( (pt_levels == 4) && ctxt.ctrlreg[1] ) 65.18 + { 65.19 + pfn = xen_cr3_to_pfn(ctxt.ctrlreg[1]); 65.20 + 65.21 + if (pfn >= max_pfn) { 65.22 + ERROR("User PT base is bad: pfn=%lu max_pfn=%lu type=%08lx", 65.23 + pfn, max_pfn, pfn_type[pfn]); 65.24 + goto out; 65.25 + } 65.26 + 65.27 + if ( (pfn_type[pfn] & XEN_DOMCTL_PFINFO_LTABTYPE_MASK) != 65.28 + ((unsigned long)pt_levels<<XEN_DOMCTL_PFINFO_LTAB_SHIFT) ) { 65.29 + ERROR("User PT base is bad. pfn=%lu nr=%lu type=%08lx %08lx", 65.30 + pfn, max_pfn, pfn_type[pfn], 65.31 + (unsigned long)pt_levels<<XEN_DOMCTL_PFINFO_LTAB_SHIFT); 65.32 + goto out; 65.33 + } 65.34 + 65.35 + ctxt.ctrlreg[1] = xen_pfn_to_cr3(p2m[pfn]); 65.36 + } 65.37 + 65.38 domctl.cmd = XEN_DOMCTL_setvcpucontext; 65.39 domctl.domain = (domid_t)dom; 65.40 domctl.u.vcpucontext.vcpu = i;
66.1 --- a/tools/libxc/xc_linux_save.c Mon Mar 05 12:49:12 2007 -0600 66.2 +++ b/tools/libxc/xc_linux_save.c Thu Mar 08 14:39:52 2007 -0600 66.3 @@ -34,7 +34,7 @@ static unsigned long max_mfn; 66.4 /* virtual starting address of the hypervisor */ 66.5 static unsigned long hvirt_start; 66.6 66.7 -/* #levels of page tables used by the currrent guest */ 66.8 +/* #levels of page tables used by the current guest */ 66.9 static unsigned int pt_levels; 66.10 66.11 /* total number of pages used by the current guest */ 66.12 @@ -491,7 +491,7 @@ static int canonicalize_pagetable(unsign 66.13 ** reserved hypervisor mappings. This depends on the current 66.14 ** page table type as well as the number of paging levels. 66.15 */ 66.16 - xen_start = xen_end = pte_last = PAGE_SIZE / ((pt_levels == 2)? 4 : 8); 66.17 + xen_start = xen_end = pte_last = PAGE_SIZE / ((pt_levels == 2) ? 4 : 8); 66.18 66.19 if (pt_levels == 2 && type == XEN_DOMCTL_PFINFO_L2TAB) 66.20 xen_start = (hvirt_start >> L2_PAGETABLE_SHIFT); 66.21 @@ -1279,6 +1279,18 @@ int xc_linux_save(int xc_handle, int io_ 66.22 ctxt.ctrlreg[3] = 66.23 xen_pfn_to_cr3(mfn_to_pfn(xen_cr3_to_pfn(ctxt.ctrlreg[3]))); 66.24 66.25 + /* Guest pagetable (x86/64) stored in otherwise-unused CR1. */ 66.26 + if ( (pt_levels == 4) && ctxt.ctrlreg[1] ) 66.27 + { 66.28 + if ( !MFN_IS_IN_PSEUDOPHYS_MAP(xen_cr3_to_pfn(ctxt.ctrlreg[1])) ) { 66.29 + ERROR("PT base is not in range of pseudophys map"); 66.30 + goto out; 66.31 + } 66.32 + /* Least-significant bit means 'valid PFN'. */ 66.33 + ctxt.ctrlreg[1] = 1 | 66.34 + xen_pfn_to_cr3(mfn_to_pfn(xen_cr3_to_pfn(ctxt.ctrlreg[1]))); 66.35 + } 66.36 + 66.37 if (!write_exact(io_fd, &ctxt, sizeof(ctxt))) { 66.38 ERROR("Error when writing to state file (1) (errno %d)", errno); 66.39 goto out;
67.1 --- a/tools/libxc/xc_ptrace_core.c Mon Mar 05 12:49:12 2007 -0600 67.2 +++ b/tools/libxc/xc_ptrace_core.c Thu Mar 08 14:39:52 2007 -0600 67.3 @@ -192,7 +192,7 @@ pread_exact(int fd, void* buffer, size_t 67.4 struct elf_core 67.5 { 67.6 int domfd; 67.7 - Elf_Ehdr ehdr; 67.8 + Elf64_Ehdr ehdr; 67.9 67.10 char* shdr; 67.11 67.12 @@ -242,6 +242,8 @@ elf_core_init(struct elf_core* ecore, in 67.13 /* check elf header */ 67.14 if (!IS_ELF(ecore->ehdr) || ecore->ehdr.e_type != ET_CORE) 67.15 goto out; 67.16 + if (ecore->ehdr.e_ident[EI_CLASS] != ELFCLASS64) 67.17 + goto out; 67.18 /* check elf header more: EI_DATA, EI_VERSION, e_machine... */ 67.19 67.20 /* read section headers */ 67.21 @@ -294,7 +296,7 @@ elf_core_search_note(struct elf_core* ec 67.22 } 67.23 67.24 static int 67.25 -elf_core_alloc_read_sec(struct elf_core* ecore, const Elf_Shdr* shdr, 67.26 +elf_core_alloc_read_sec(struct elf_core* ecore, const Elf64_Shdr* shdr, 67.27 char** buf) 67.28 { 67.29 int ret; 67.30 @@ -309,19 +311,19 @@ elf_core_alloc_read_sec(struct elf_core* 67.31 return ret; 67.32 } 67.33 67.34 -static Elf_Shdr* 67.35 +static Elf64_Shdr* 67.36 elf_core_shdr_by_index(struct elf_core* ecore, uint16_t index) 67.37 { 67.38 if (index >= ecore->ehdr.e_shnum) 67.39 return NULL; 67.40 - return (Elf_Shdr*)(ecore->shdr + ecore->ehdr.e_shentsize * index); 67.41 + return (Elf64_Shdr*)(ecore->shdr + ecore->ehdr.e_shentsize * index); 67.42 } 67.43 67.44 static int 67.45 elf_core_alloc_read_sec_by_index(struct elf_core* ecore, uint16_t index, 67.46 char** buf, uint64_t* size) 67.47 { 67.48 - Elf_Shdr* shdr = elf_core_shdr_by_index(ecore, index); 67.49 + Elf64_Shdr* shdr = elf_core_shdr_by_index(ecore, index); 67.50 if (shdr == NULL) 67.51 return -1; 67.52 if (size != NULL) 67.53 @@ -329,14 +331,14 @@ elf_core_alloc_read_sec_by_index(struct 67.54 return elf_core_alloc_read_sec(ecore, shdr, buf); 67.55 } 67.56 67.57 -static Elf_Shdr* 67.58 +static Elf64_Shdr* 67.59 elf_core_shdr_by_name(struct elf_core* ecore, const char* name) 67.60 { 67.61 const char* s; 67.62 for (s = ecore->shdr; 67.63 s < ecore->shdr + ecore->ehdr.e_shentsize * ecore->ehdr.e_shnum; 67.64 s += ecore->ehdr.e_shentsize) { 67.65 - Elf_Shdr* shdr = (Elf_Shdr*)s; 67.66 + Elf64_Shdr* shdr = (Elf64_Shdr*)s; 67.67 67.68 if (strncmp(ecore->shstrtab + shdr->sh_name, name, strlen(name)) == 0) 67.69 return shdr; 67.70 @@ -348,7 +350,7 @@ elf_core_shdr_by_name(struct elf_core* e 67.71 static int 67.72 elf_core_read_sec_by_name(struct elf_core* ecore, const char* name, char* buf) 67.73 { 67.74 - Elf_Shdr* shdr = elf_core_shdr_by_name(ecore, name); 67.75 + Elf64_Shdr* shdr = elf_core_shdr_by_name(ecore, name); 67.76 return pread_exact(ecore->domfd, buf, shdr->sh_size, shdr->sh_offset); 67.77 67.78 } 67.79 @@ -357,7 +359,7 @@ static int 67.80 elf_core_alloc_read_sec_by_name(struct elf_core* ecore, const char* name, 67.81 char** buf, uint64_t* size) 67.82 { 67.83 - Elf_Shdr* shdr = elf_core_shdr_by_name(ecore, name); 67.84 + Elf64_Shdr* shdr = elf_core_shdr_by_name(ecore, name); 67.85 if (shdr == NULL) 67.86 return -1; 67.87 if (size != NULL) 67.88 @@ -508,8 +510,8 @@ xc_waitdomain_core_elf( 67.89 struct xen_dumpcore_elfnote_xen_version *xen_version; 67.90 struct xen_dumpcore_elfnote_format_version *format_version; 67.91 67.92 - Elf_Shdr* table_shdr; 67.93 - Elf_Shdr* pages_shdr; 67.94 + Elf64_Shdr* table_shdr; 67.95 + Elf64_Shdr* pages_shdr; 67.96 67.97 if (elf_core_init(&ecore, domfd) < 0) 67.98 goto out;
68.1 --- a/tools/libxen/Makefile Mon Mar 05 12:49:12 2007 -0600 68.2 +++ b/tools/libxen/Makefile Thu Mar 08 14:39:52 2007 -0600 68.3 @@ -57,8 +57,8 @@ test/test_hvm_bindings: test/test_hvm_bi 68.4 68.5 .PHONY: install 68.6 install: all 68.7 - $(INSTALL_DIR) -p $(DESTDIR)/usr/include/xen/api 68.8 - $(INSTALL_DIR) -p $(DESTDIR)/usr/$(LIBDIR) 68.9 + $(INSTALL_DIR) $(DESTDIR)/usr/include/xen/api 68.10 + $(INSTALL_DIR) $(DESTDIR)/usr/$(LIBDIR) 68.11 $(INSTALL_PROG) libxenapi.so.$(MAJOR).$(MINOR) $(DESTDIR)/usr/$(LIBDIR) 68.12 ln -sf libxenapi.so.$(MAJOR).$(MINOR) $(DESTDIR)/usr/$(LIBDIR)/libxenapi.so.$(MAJOR) 68.13 ln -sf libxenapi.so.$(MAJOR) $(DESTDIR)/usr/$(LIBDIR)/libxenapi.so
69.1 --- a/tools/ptsname/Makefile Mon Mar 05 12:49:12 2007 -0600 69.2 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 69.3 @@ -1,22 +0,0 @@ 69.4 - 69.5 -XEN_ROOT = ../.. 69.6 -include $(XEN_ROOT)/tools/Rules.mk 69.7 - 69.8 -.PHONY: all 69.9 -all: build 69.10 -.PHONY: build 69.11 -build: 69.12 - CC="$(CC)" CFLAGS="$(CFLAGS)" python setup.py build 69.13 - 69.14 -.PHONY: install 69.15 -ifndef XEN_PYTHON_NATIVE_INSTALL 69.16 -install: all 69.17 - CC="$(CC)" CFLAGS="$(CFLAGS)" python setup.py install --home="$(DESTDIR)/usr" --prefix="" 69.18 -else 69.19 -install: all 69.20 - CC="$(CC)" CFLAGS="$(CFLAGS)" python setup.py install --root="$(DESTDIR)" 69.21 -endif 69.22 - 69.23 -.PHONY: clean 69.24 -clean: 69.25 - rm -rf build tmp *.pyc *.pyo *.o *.a *~ a.out
70.1 --- a/tools/ptsname/ptsname.c Mon Mar 05 12:49:12 2007 -0600 70.2 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 70.3 @@ -1,44 +0,0 @@ 70.4 -/****************************************************************************** 70.5 - * ptsname.c 70.6 - * 70.7 - * A python extension to expose the POSIX ptsname() function. 70.8 - * 70.9 - * Copyright (C) 2007 XenSource Ltd 70.10 - */ 70.11 - 70.12 -#include <Python.h> 70.13 -#include <stdlib.h> 70.14 - 70.15 -/* Needed for Python versions earlier than 2.3. */ 70.16 -#ifndef PyMODINIT_FUNC 70.17 -#define PyMODINIT_FUNC DL_EXPORT(void) 70.18 -#endif 70.19 - 70.20 -static PyObject *do_ptsname(PyObject *self, PyObject *args) 70.21 -{ 70.22 - int fd; 70.23 - char *path; 70.24 - 70.25 - if (!PyArg_ParseTuple(args, "i", &fd)) 70.26 - return NULL; 70.27 - 70.28 - path = ptsname(fd); 70.29 - 70.30 - if (!path) 70.31 - { 70.32 - PyErr_SetFromErrno(PyExc_IOError); 70.33 - return NULL; 70.34 - } 70.35 - 70.36 - return PyString_FromString(path); 70.37 -} 70.38 - 70.39 -static PyMethodDef ptsname_methods[] = { 70.40 - { "ptsname", do_ptsname, METH_VARARGS }, 70.41 - { NULL } 70.42 -}; 70.43 - 70.44 -PyMODINIT_FUNC initptsname(void) 70.45 -{ 70.46 - Py_InitModule("ptsname", ptsname_methods); 70.47 -}
71.1 --- a/tools/ptsname/setup.py Mon Mar 05 12:49:12 2007 -0600 71.2 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 71.3 @@ -1,11 +0,0 @@ 71.4 -from distutils.core import setup, Extension 71.5 - 71.6 -extra_compile_args = [ "-fno-strict-aliasing", "-Werror" ] 71.7 - 71.8 -setup(name = 'ptsname', 71.9 - version = '1.0', 71.10 - description = 'POSIX ptsname() function', 71.11 - author = 'Tim Deegan', 71.12 - author_email = 'Tim.Deegan@xensource.com', 71.13 - license = 'GPL', 71.14 - ext_modules = [ Extension("ptsname", [ "ptsname.c" ]) ])
72.1 --- a/tools/pygrub/Makefile Mon Mar 05 12:49:12 2007 -0600 72.2 +++ b/tools/pygrub/Makefile Thu Mar 08 14:39:52 2007 -0600 72.3 @@ -10,13 +10,14 @@ build: 72.4 72.5 .PHONY: install 72.6 ifndef XEN_PYTHON_NATIVE_INSTALL 72.7 +install: LIBPATH=$(shell PYTHONPATH=../python/xen/util python -c "import auxbin; print auxbin.libpath()") 72.8 install: all 72.9 - CC="$(CC)" CFLAGS="$(CFLAGS)" python setup.py install --home="$(DESTDIR)/usr" --prefix="" 72.10 - $(INSTALL_DIR) -p $(DESTDIR)/var/run/xend/boot 72.11 + CC="$(CC)" CFLAGS="$(CFLAGS)" python setup.py install --home="$(DESTDIR)/usr" --prefix="" --install-lib="$(DESTDIR)$(LIBPATH)/python" 72.12 + $(INSTALL_DIR) $(DESTDIR)/var/run/xend/boot 72.13 else 72.14 install: all 72.15 CC="$(CC)" CFLAGS="$(CFLAGS)" python setup.py install --root="$(DESTDIR)" 72.16 - $(INSTALL_DIR) -p $(DESTDIR)/var/run/xend/boot 72.17 + $(INSTALL_DIR) $(DESTDIR)/var/run/xend/boot 72.18 endif 72.19 72.20 .PHONY: clean
73.1 --- a/tools/python/Makefile Mon Mar 05 12:49:12 2007 -0600 73.2 +++ b/tools/python/Makefile Thu Mar 08 14:39:52 2007 -0600 73.3 @@ -18,8 +18,9 @@ build: 73.4 73.5 .PHONY: install 73.6 ifndef XEN_PYTHON_NATIVE_INSTALL 73.7 +install: LIBPATH=$(shell PYTHONPATH=xen/util python -c "import auxbin; print auxbin.libpath()") 73.8 install: install-messages 73.9 - CC="$(CC)" CFLAGS="$(CFLAGS)" python setup.py install --home="$(DESTDIR)/usr" --prefix="" --force 73.10 + CC="$(CC)" CFLAGS="$(CFLAGS)" python setup.py install --home="$(DESTDIR)/usr" --prefix="" --force --install-lib="$(DESTDIR)$(LIBPATH)/python" 73.11 else 73.12 install: install-messages 73.13 CC="$(CC)" CFLAGS="$(CFLAGS)" python setup.py install --root="$(DESTDIR)" --force
74.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 74.2 +++ b/tools/python/ptsname/ptsname.c Thu Mar 08 14:39:52 2007 -0600 74.3 @@ -0,0 +1,44 @@ 74.4 +/****************************************************************************** 74.5 + * ptsname.c 74.6 + * 74.7 + * A python extension to expose the POSIX ptsname() function. 74.8 + * 74.9 + * Copyright (C) 2007 XenSource Ltd 74.10 + */ 74.11 + 74.12 +#include <Python.h> 74.13 +#include <stdlib.h> 74.14 + 74.15 +/* Needed for Python versions earlier than 2.3. */ 74.16 +#ifndef PyMODINIT_FUNC 74.17 +#define PyMODINIT_FUNC DL_EXPORT(void) 74.18 +#endif 74.19 + 74.20 +static PyObject *do_ptsname(PyObject *self, PyObject *args) 74.21 +{ 74.22 + int fd; 74.23 + char *path; 74.24 + 74.25 + if (!PyArg_ParseTuple(args, "i", &fd)) 74.26 + return NULL; 74.27 + 74.28 + path = ptsname(fd); 74.29 + 74.30 + if (!path) 74.31 + { 74.32 + PyErr_SetFromErrno(PyExc_IOError); 74.33 + return NULL; 74.34 + } 74.35 + 74.36 + return PyString_FromString(path); 74.37 +} 74.38 + 74.39 +static PyMethodDef ptsname_methods[] = { 74.40 + { "ptsname", do_ptsname, METH_VARARGS }, 74.41 + { NULL } 74.42 +}; 74.43 + 74.44 +PyMODINIT_FUNC initptsname(void) 74.45 +{ 74.46 + Py_InitModule("ptsname", ptsname_methods); 74.47 +}
75.1 --- a/tools/python/setup.py Mon Mar 05 12:49:12 2007 -0600 75.2 +++ b/tools/python/setup.py Thu Mar 08 14:39:52 2007 -0600 75.3 @@ -44,7 +44,14 @@ acm = Extension("acm", 75.4 libraries = libraries, 75.5 sources = [ "xen/lowlevel/acm/acm.c" ]) 75.6 75.7 -modules = [ xc, xs, acm ] 75.8 +ptsname = Extension("ptsname", 75.9 + extra_compile_args = extra_compile_args, 75.10 + include_dirs = include_dirs + [ "ptsname" ], 75.11 + library_dirs = library_dirs, 75.12 + libraries = libraries, 75.13 + sources = [ "ptsname/ptsname.c" ]) 75.14 + 75.15 +modules = [ xc, xs, acm, ptsname ] 75.16 if os.uname()[0] == 'SunOS': 75.17 modules.append(scf) 75.18
76.1 --- a/tools/python/xen/xend/XendBootloader.py Mon Mar 05 12:49:12 2007 -0600 76.2 +++ b/tools/python/xen/xend/XendBootloader.py Thu Mar 08 14:39:52 2007 -0600 76.3 @@ -21,7 +21,8 @@ from xen.util import mkdir 76.4 from XendLogging import log 76.5 from XendError import VmError 76.6 76.7 -import pty, ptsname, termios, fcntl 76.8 +import pty, termios, fcntl 76.9 +from xen.lowlevel import ptsname 76.10 76.11 def bootloader(blexec, disk, dom, quiet = False, blargs = '', kernel = '', 76.12 ramdisk = '', kernel_args = ''):
77.1 --- a/tools/python/xen/xend/XendDomainInfo.py Mon Mar 05 12:49:12 2007 -0600 77.2 +++ b/tools/python/xen/xend/XendDomainInfo.py Thu Mar 08 14:39:52 2007 -0600 77.3 @@ -781,7 +781,6 @@ class XendDomainInfo: 77.4 'name': self.info['name_label'], 77.5 'console/limit': str(xoptions.get_console_limit() * 1024), 77.6 'memory/target': str(self.info['memory_static_min'] * 1024), 77.7 - 'control/platform-feature-multiprocessor-suspend': str(1) 77.8 } 77.9 77.10 def f(n, v): 77.11 @@ -796,6 +795,9 @@ class XendDomainInfo: 77.12 f('store/port', self.store_port) 77.13 f('store/ring-ref', self.store_mfn) 77.14 77.15 + if arch.type == "x86": 77.16 + f('control/platform-feature-multiprocessor-suspend', True) 77.17 + 77.18 # elfnotes 77.19 for n, v in self.info.get_notes().iteritems(): 77.20 n = n.lower().replace('_', '-') 77.21 @@ -1503,7 +1505,7 @@ class XendDomainInfo: 77.22 self.info['start_time'] = time.time() 77.23 77.24 self._stateSet(DOM_STATE_RUNNING) 77.25 - except RuntimeError, exn: 77.26 + except (RuntimeError, VmError), exn: 77.27 log.exception("XendDomainInfo.initDomain: exception occurred") 77.28 self.image.cleanupBootloading() 77.29 raise VmError(str(exn)) 77.30 @@ -2090,26 +2092,26 @@ class XendDomainInfo: 77.31 return self.info.get('tools_version', {}) 77.32 77.33 def get_on_shutdown(self): 77.34 - after_shutdown = self.info.get('action_after_shutdown') 77.35 + after_shutdown = self.info.get('actions_after_shutdown') 77.36 if not after_shutdown or after_shutdown not in XEN_API_ON_NORMAL_EXIT: 77.37 return XEN_API_ON_NORMAL_EXIT[-1] 77.38 return after_shutdown 77.39 77.40 def get_on_reboot(self): 77.41 - after_reboot = self.info.get('action_after_reboot') 77.42 + after_reboot = self.info.get('actions_after_reboot') 77.43 if not after_reboot or after_reboot not in XEN_API_ON_NORMAL_EXIT: 77.44 return XEN_API_ON_NORMAL_EXIT[-1] 77.45 return after_reboot 77.46 77.47 def get_on_suspend(self): 77.48 # TODO: not supported 77.49 - after_suspend = self.info.get('action_after_suspend') 77.50 + after_suspend = self.info.get('actions_after_suspend') 77.51 if not after_suspend or after_suspend not in XEN_API_ON_NORMAL_EXIT: 77.52 return XEN_API_ON_NORMAL_EXIT[-1] 77.53 return after_suspend 77.54 77.55 def get_on_crash(self): 77.56 - after_crash = self.info.get('action_after_crash') 77.57 + after_crash = self.info.get('actions_after_crash') 77.58 if not after_crash or after_crash not in XEN_API_ON_CRASH_BEHAVIOUR: 77.59 return XEN_API_ON_CRASH_BEHAVIOUR[0] 77.60 return after_crash
78.1 --- a/tools/python/xen/xend/XendNode.py Mon Mar 05 12:49:12 2007 -0600 78.2 +++ b/tools/python/xen/xend/XendNode.py Thu Mar 08 14:39:52 2007 -0600 78.3 @@ -22,7 +22,7 @@ import xen.lowlevel.xc 78.4 78.5 from xen.util import Brctl 78.6 78.7 -from xen.xend import uuid 78.8 +from xen.xend import uuid, arch 78.9 from xen.xend.XendError import * 78.10 from xen.xend.XendOptions import instance as xendoptions 78.11 from xen.xend.XendQCoWStorageRepo import XendQCoWStorageRepo 78.12 @@ -97,17 +97,38 @@ class XendNode: 78.13 for u in self.cpus.keys(): 78.14 log.error(self.cpus[u]) 78.15 number = self.cpus[u]['number'] 78.16 + # We can run off the end of the cpuinfo list if domain0 does not 78.17 + # have #vcpus == #pcpus. In that case we just replicate one that's 78.18 + # in the hash table. 78.19 + if not cpuinfo.has_key(number): 78.20 + number = cpuinfo.keys()[0] 78.21 log.error(number) 78.22 log.error(cpuinfo) 78.23 - self.cpus[u].update( 78.24 - { 'host' : self.uuid, 78.25 - 'features' : cpu_features, 78.26 - 'speed' : int(float(cpuinfo[number]['cpu MHz'])), 78.27 - 'vendor' : cpuinfo[number]['vendor_id'], 78.28 - 'modelname': cpuinfo[number]['model name'], 78.29 - 'stepping' : cpuinfo[number]['stepping'], 78.30 - 'flags' : cpuinfo[number]['flags'], 78.31 - }) 78.32 + if arch.type == "x86": 78.33 + self.cpus[u].update( 78.34 + { 'host' : self.uuid, 78.35 + 'features' : cpu_features, 78.36 + 'speed' : int(float(cpuinfo[number]['cpu MHz'])), 78.37 + 'vendor' : cpuinfo[number]['vendor_id'], 78.38 + 'modelname': cpuinfo[number]['model name'], 78.39 + 'stepping' : cpuinfo[number]['stepping'], 78.40 + 'flags' : cpuinfo[number]['flags'], 78.41 + }) 78.42 + elif arch.type == "ia64": 78.43 + self.cpus[u].update( 78.44 + { 'host' : self.uuid, 78.45 + 'features' : cpu_features, 78.46 + 'speed' : int(float(cpuinfo[number]['cpu MHz'])), 78.47 + 'vendor' : cpuinfo[number]['vendor'], 78.48 + 'modelname': cpuinfo[number]['family'], 78.49 + 'stepping' : cpuinfo[number]['model'], 78.50 + 'flags' : cpuinfo[number]['features'], 78.51 + }) 78.52 + else: 78.53 + self.cpus[u].update( 78.54 + { 'host' : self.uuid, 78.55 + 'features' : cpu_features, 78.56 + }) 78.57 78.58 self.pifs = {} 78.59 self.pif_metrics = {}
79.1 --- a/tools/security/Makefile Mon Mar 05 12:49:12 2007 -0600 79.2 +++ b/tools/security/Makefile Thu Mar 08 14:39:52 2007 -0600 79.3 @@ -54,26 +54,29 @@ ifeq ($(ACM_SECURITY),y) 79.4 all: build 79.5 79.6 .PHONY: install 79.7 +ifndef XEN_PYTHON_NATIVE_INSTALL 79.8 +install: LIBPATH=$(shell PYTHONPATH=../python/xen/util python -c "import auxbin; print auxbin.libpath()") 79.9 +endif 79.10 install: all $(ACM_CONFIG_FILE) 79.11 - $(INSTALL_DIR) -p $(DESTDIR)/usr/sbin 79.12 - $(INSTALL_PROG) -p $(ACM_INST_TOOLS) $(DESTDIR)/usr/sbin 79.13 - $(INSTALL_PROG) -p $(ACM_EZPOLICY) $(DESTDIR)/usr/sbin 79.14 - $(INSTALL_DIR) -p $(DESTDIR)$(ACM_CONFIG_DIR) 79.15 - $(INSTALL_DIR) -p $(DESTDIR)$(ACM_POLICY_DIR) 79.16 - $(INSTALL_DATA) -p policies/$(ACM_SCHEMA) $(DESTDIR)$(ACM_POLICY_DIR) 79.17 - $(INSTALL_DIR) -p $(DESTDIR)$(ACM_POLICY_DIR)/example 79.18 + $(INSTALL_DIR) $(DESTDIR)/usr/sbin 79.19 + $(INSTALL_PROG) $(ACM_INST_TOOLS) $(DESTDIR)/usr/sbin 79.20 + $(INSTALL_PROG) $(ACM_EZPOLICY) $(DESTDIR)/usr/sbin 79.21 + $(INSTALL_DIR) $(DESTDIR)$(ACM_CONFIG_DIR) 79.22 + $(INSTALL_DIR) $(DESTDIR)$(ACM_POLICY_DIR) 79.23 + $(INSTALL_DATA) policies/$(ACM_SCHEMA) $(DESTDIR)$(ACM_POLICY_DIR) 79.24 + $(INSTALL_DIR) $(DESTDIR)$(ACM_POLICY_DIR)/example 79.25 for i in $(ACM_EXAMPLES); do \ 79.26 - $(INSTALL_DIR) -p $(DESTDIR)$(ACM_POLICY_DIR)/example/$$i; \ 79.27 - $(INSTALL_DATA) -p policies/example/$$i/client_v1-$(ACM_POLICY_SUFFIX) $(DESTDIR)$(ACM_POLICY_DIR)/example/$$i; \ 79.28 + $(INSTALL_DIR) $(DESTDIR)$(ACM_POLICY_DIR)/example/$$i; \ 79.29 + $(INSTALL_DATA) policies/example/$$i/client_v1-$(ACM_POLICY_SUFFIX) $(DESTDIR)$(ACM_POLICY_DIR)/example/$$i; \ 79.30 done 79.31 - $(INSTALL_DIR) -p $(DESTDIR)$(ACM_SCRIPT_DIR) 79.32 - $(INSTALL_PROG) -p $(ACM_SCRIPTS) $(DESTDIR)$(ACM_SCRIPT_DIR) 79.33 - $(INSTALL_DIR) -p $(DESTDIR)$(ACM_SECGEN_HTMLDIR) 79.34 - $(INSTALL_DATA) -p $(ACM_INST_HTML) $(DESTDIR)$(ACM_SECGEN_HTMLDIR) 79.35 - $(INSTALL_DIR) -p $(DESTDIR)$(ACM_SECGEN_CGIDIR) 79.36 - $(INSTALL_PROG) -p $(ACM_INST_CGI) $(DESTDIR)$(ACM_SECGEN_CGIDIR) 79.37 + $(INSTALL_DIR) $(DESTDIR)$(ACM_SCRIPT_DIR) 79.38 + $(INSTALL_PROG) $(ACM_SCRIPTS) $(DESTDIR)$(ACM_SCRIPT_DIR) 79.39 + $(INSTALL_DIR) $(DESTDIR)$(ACM_SECGEN_HTMLDIR) 79.40 + $(INSTALL_DATA) $(ACM_INST_HTML) $(DESTDIR)$(ACM_SECGEN_HTMLDIR) 79.41 + $(INSTALL_DIR) $(DESTDIR)$(ACM_SECGEN_CGIDIR) 79.42 + $(INSTALL_PROG) $(ACM_INST_CGI) $(DESTDIR)$(ACM_SECGEN_CGIDIR) 79.43 ifndef XEN_PYTHON_NATIVE_INSTALL 79.44 - python python/setup.py install --home="$(DESTDIR)/usr" 79.45 + python python/setup.py install --home="$(DESTDIR)/usr" --install-lib="$(DESTDIR)$(LIBPATH)/python" 79.46 else 79.47 python python/setup.py install --root="$(DESTDIR)" 79.48 endif
80.1 --- a/tools/vnet/libxutil/Makefile Mon Mar 05 12:49:12 2007 -0600 80.2 +++ b/tools/vnet/libxutil/Makefile Thu Mar 08 14:39:52 2007 -0600 80.3 @@ -71,7 +71,7 @@ check-for-zlib: 80.4 80.5 .PHONY: install 80.6 install: build 80.7 - [ -d $(DESTDIR)/usr/$(LIBDIR) ] || $(INSTALL_DIR) -p $(DESTDIR)/usr/$(LIBDIR) 80.8 + [ -d $(DESTDIR)/usr/$(LIBDIR) ] || $(INSTALL_DIR) $(DESTDIR)/usr/$(LIBDIR) 80.9 $(INSTALL_PROG) libxutil.so.$(MAJOR).$(MINOR) $(DESTDIR)/usr/$(LIBDIR) 80.10 $(INSTALL_DATA) libxutil.a $(DESTDIR)/usr/$(LIBDIR) 80.11 ln -sf libxutil.so.$(MAJOR).$(MINOR) $(DESTDIR)/usr/$(LIBDIR)/libxutil.so.$(MAJOR)
81.1 --- a/tools/xenfb/Makefile Mon Mar 05 12:49:12 2007 -0600 81.2 +++ b/tools/xenfb/Makefile Thu Mar 08 14:39:52 2007 -0600 81.3 @@ -1,13 +1,10 @@ 81.4 XEN_ROOT=../.. 81.5 include $(XEN_ROOT)/tools/Rules.mk 81.6 81.7 -CFLAGS += -I$(XEN_LIBXC) -I$(XEN_XENSTORE) -I$(XEN_ROOT)/linux-2.6-xen-sparse/include -I$(XEN_ROOT)/tools/ioemu 81.8 +CFLAGS += -I$(XEN_LIBXC) -I$(XEN_XENSTORE) 81.9 +CFLAGS += -I$(XEN_ROOT)/linux-2.6-xen-sparse/include -I$(XEN_ROOT)/tools/ioemu 81.10 LDFLAGS += -L$(XEN_LIBXC) -L$(XEN_XENSTORE) 81.11 81.12 -INSTALL = install 81.13 -INSTALL_PROG = $(INSTALL) -m0755 81.14 -INSTALL_DIR = $(INSTALL) -d -m0755 81.15 - 81.16 .PHONY: all 81.17 all: build 81.18 81.19 @@ -16,7 +13,7 @@ build: 81.20 $(MAKE) vncfb sdlfb 81.21 81.22 install: all 81.23 - $(INSTALL_DIR) -p $(DESTDIR)/usr/$(LIBDIR)/xen/bin 81.24 + $(INSTALL_DIR) $(DESTDIR)/usr/$(LIBDIR)/xen/bin 81.25 $(INSTALL_PROG) vncfb $(DESTDIR)/usr/$(LIBDIR)/xen/bin/xen-vncfb 81.26 $(INSTALL_PROG) sdlfb $(DESTDIR)/usr/$(LIBDIR)/xen/bin/xen-sdlfb 81.27
82.1 --- a/tools/xenfb/xenfb.c Mon Mar 05 12:49:12 2007 -0600 82.2 +++ b/tools/xenfb/xenfb.c Thu Mar 08 14:39:52 2007 -0600 82.3 @@ -245,11 +245,10 @@ static int xenfb_wait_for_state(struct x 82.4 unsigned state, dummy; 82.5 char **vec; 82.6 82.7 + awaited |= 1 << XenbusStateUnknown; 82.8 + 82.9 for (;;) { 82.10 state = xenfb_read_state(xsh, dir); 82.11 - if (state < 0) 82.12 - return -1; 82.13 - 82.14 if ((1 << state) & awaited) 82.15 return state; 82.16
83.1 --- a/tools/xenstore/Makefile Mon Mar 05 12:49:12 2007 -0600 83.2 +++ b/tools/xenstore/Makefile Thu Mar 08 14:39:52 2007 -0600 83.3 @@ -168,16 +168,16 @@ tarball: clean 83.4 83.5 .PHONY: install 83.6 install: all 83.7 - $(INSTALL_DIR) -p $(DESTDIR)/var/run/xenstored 83.8 - $(INSTALL_DIR) -p $(DESTDIR)/var/lib/xenstored 83.9 - $(INSTALL_DIR) -p $(DESTDIR)/usr/bin 83.10 - $(INSTALL_DIR) -p $(DESTDIR)/usr/sbin 83.11 - $(INSTALL_DIR) -p $(DESTDIR)/usr/include 83.12 + $(INSTALL_DIR) $(DESTDIR)/var/run/xenstored 83.13 + $(INSTALL_DIR) $(DESTDIR)/var/lib/xenstored 83.14 + $(INSTALL_DIR) $(DESTDIR)/usr/bin 83.15 + $(INSTALL_DIR) $(DESTDIR)/usr/sbin 83.16 + $(INSTALL_DIR) $(DESTDIR)/usr/include 83.17 $(INSTALL_PROG) xenstored $(DESTDIR)/usr/sbin 83.18 $(INSTALL_PROG) $(CLIENTS) $(DESTDIR)/usr/bin 83.19 $(INSTALL_PROG) xenstore-control $(DESTDIR)/usr/bin 83.20 $(INSTALL_PROG) xenstore-ls $(DESTDIR)/usr/bin 83.21 - $(INSTALL_DIR) -p $(DESTDIR)/usr/$(LIBDIR) 83.22 + $(INSTALL_DIR) $(DESTDIR)/usr/$(LIBDIR) 83.23 $(INSTALL_PROG) libxenstore.so.$(MAJOR).$(MINOR) $(DESTDIR)/usr/$(LIBDIR) 83.24 ln -sf libxenstore.so.$(MAJOR).$(MINOR) $(DESTDIR)/usr/$(LIBDIR)/libxenstore.so.$(MAJOR) 83.25 ln -sf libxenstore.so.$(MAJOR) $(DESTDIR)/usr/$(LIBDIR)/libxenstore.so
84.1 --- a/xen/Rules.mk Mon Mar 05 12:49:12 2007 -0600 84.2 +++ b/xen/Rules.mk Thu Mar 08 14:39:52 2007 -0600 84.3 @@ -41,8 +41,8 @@ HDRS += $(wildcard $(BASEDIR)/include/as 84.4 include $(BASEDIR)/arch/$(TARGET_ARCH)/Rules.mk 84.5 84.6 # Do not depend on auto-generated header files. 84.7 -HDRS := $(subst $(BASEDIR)/include/asm-$(TARGET_ARCH)/asm-offsets.h,,$(HDRS)) 84.8 -HDRS := $(subst $(BASEDIR)/include/xen/compile.h,,$(HDRS)) 84.9 +AHDRS := $(filter-out %/include/xen/compile.h,$(HDRS)) 84.10 +HDRS := $(filter-out %/asm-offsets.h,$(AHDRS)) 84.11 84.12 # Note that link order matters! 84.13 ALL_OBJS-y += $(BASEDIR)/common/built_in.o 84.14 @@ -110,12 +110,12 @@ clean:: $(addprefix _clean_, $(subdir-al 84.15 %.o: %.c $(HDRS) Makefile 84.16 $(CC) $(CFLAGS) -c $< -o $@ 84.17 84.18 -%.o: %.S $(HDRS) Makefile 84.19 +%.o: %.S $(AHDRS) Makefile 84.20 $(CC) $(AFLAGS) -c $< -o $@ 84.21 84.22 %.i: %.c $(HDRS) Makefile 84.23 $(CPP) $(CFLAGS) $< -o $@ 84.24 84.25 # -std=gnu{89,99} gets confused by # as an end-of-line comment marker 84.26 -%.s: %.S $(HDRS) Makefile 84.27 +%.s: %.S $(AHDRS) Makefile 84.28 $(CPP) $(AFLAGS) $< -o $@
85.1 --- a/xen/arch/x86/domain.c Mon Mar 05 12:49:12 2007 -0600 85.2 +++ b/xen/arch/x86/domain.c Thu Mar 08 14:39:52 2007 -0600 85.3 @@ -641,6 +641,31 @@ int arch_set_info_guest( 85.4 } 85.5 85.6 v->arch.guest_table = pagetable_from_pfn(cr3_pfn); 85.7 + 85.8 +#ifdef __x86_64__ 85.9 + if ( c.nat->ctrlreg[1] ) 85.10 + { 85.11 + cr3_pfn = gmfn_to_mfn(d, xen_cr3_to_pfn(c.nat->ctrlreg[1])); 85.12 + 85.13 + if ( !mfn_valid(cr3_pfn) || 85.14 + (paging_mode_refcounts(d) 85.15 + ? !get_page(mfn_to_page(cr3_pfn), d) 85.16 + : !get_page_and_type(mfn_to_page(cr3_pfn), d, 85.17 + PGT_base_page_table)) ) 85.18 + { 85.19 + cr3_pfn = pagetable_get_pfn(v->arch.guest_table); 85.20 + v->arch.guest_table = pagetable_null(); 85.21 + if ( paging_mode_refcounts(d) ) 85.22 + put_page(mfn_to_page(cr3_pfn)); 85.23 + else 85.24 + put_page_and_type(mfn_to_page(cr3_pfn)); 85.25 + destroy_gdt(v); 85.26 + return -EINVAL; 85.27 + } 85.28 + 85.29 + v->arch.guest_table_user = pagetable_from_pfn(cr3_pfn); 85.30 + } 85.31 +#endif 85.32 } 85.33 #ifdef CONFIG_COMPAT 85.34 else
86.1 --- a/xen/arch/x86/domain_build.c Mon Mar 05 12:49:12 2007 -0600 86.2 +++ b/xen/arch/x86/domain_build.c Thu Mar 08 14:39:52 2007 -0600 86.3 @@ -374,9 +374,6 @@ int construct_dom0(struct domain *d, 86.4 if ( parms.f_required[0] /* Huh? -- kraxel */ ) 86.5 panic("Domain 0 requires an unsupported hypervisor feature.\n"); 86.6 86.7 - /* Align load address to 4MB boundary. */ 86.8 - v_start = parms.virt_base & ~((1UL<<22)-1); 86.9 - 86.10 /* 86.11 * Why do we need this? The number of page-table frames depends on the 86.12 * size of the bootstrap address space. But the size of the address space 86.13 @@ -384,6 +381,7 @@ int construct_dom0(struct domain *d, 86.14 * read-only). We have a pair of simultaneous equations in two unknowns, 86.15 * which we solve by exhaustive search. 86.16 */ 86.17 + v_start = parms.virt_base; 86.18 vkern_start = parms.virt_kstart; 86.19 vkern_end = parms.virt_kend; 86.20 vinitrd_start = round_pgup(vkern_end);
87.1 --- a/xen/arch/x86/domctl.c Mon Mar 05 12:49:12 2007 -0600 87.2 +++ b/xen/arch/x86/domctl.c Thu Mar 08 14:39:52 2007 -0600 87.3 @@ -470,8 +470,15 @@ void arch_get_info_guest(struct vcpu *v, 87.4 c(user_regs.eflags |= v->arch.iopl << 12); 87.5 87.6 if ( !IS_COMPAT(v->domain) ) 87.7 + { 87.8 c.nat->ctrlreg[3] = xen_pfn_to_cr3( 87.9 pagetable_get_pfn(v->arch.guest_table)); 87.10 +#ifdef __x86_64__ 87.11 + if ( !pagetable_is_null(v->arch.guest_table_user) ) 87.12 + c.nat->ctrlreg[1] = xen_pfn_to_cr3( 87.13 + pagetable_get_pfn(v->arch.guest_table_user)); 87.14 +#endif 87.15 + } 87.16 #ifdef CONFIG_COMPAT 87.17 else 87.18 {
88.1 --- a/xen/arch/x86/hvm/hvm.c Mon Mar 05 12:49:12 2007 -0600 88.2 +++ b/xen/arch/x86/hvm/hvm.c Thu Mar 08 14:39:52 2007 -0600 88.3 @@ -161,7 +161,8 @@ int hvm_domain_initialise(struct domain 88.4 spin_lock_init(&d->arch.hvm_domain.buffered_io_lock); 88.5 spin_lock_init(&d->arch.hvm_domain.irq_lock); 88.6 88.7 - rc = paging_enable(d, PG_SH_enable|PG_refcounts|PG_translate|PG_external); 88.8 + /* paging support will be determined inside paging.c */ 88.9 + rc = paging_enable(d, PG_refcounts|PG_translate|PG_external); 88.10 if ( rc != 0 ) 88.11 return rc; 88.12
89.1 --- a/xen/arch/x86/hvm/svm/emulate.c Mon Mar 05 12:49:12 2007 -0600 89.2 +++ b/xen/arch/x86/hvm/svm/emulate.c Thu Mar 08 14:39:52 2007 -0600 89.3 @@ -24,9 +24,11 @@ 89.4 #include <asm/msr.h> 89.5 #include <asm/hvm/hvm.h> 89.6 #include <asm/hvm/support.h> 89.7 +#include <asm/hvm/svm/svm.h> 89.8 #include <asm/hvm/svm/vmcb.h> 89.9 #include <asm/hvm/svm/emulate.h> 89.10 89.11 + 89.12 extern int inst_copy_from_guest(unsigned char *buf, unsigned long guest_eip, 89.13 int inst_len); 89.14 89.15 @@ -133,13 +135,15 @@ static inline unsigned long DECODE_GPR_V 89.16 #define sib operand [1] 89.17 89.18 89.19 -unsigned long get_effective_addr_modrm64(struct vmcb_struct *vmcb, 89.20 - struct cpu_user_regs *regs, const u8 prefix, int inst_len, 89.21 - const u8 *operand, u8 *size) 89.22 +unsigned long get_effective_addr_modrm64(struct cpu_user_regs *regs, 89.23 + const u8 prefix, int inst_len, 89.24 + const u8 *operand, u8 *size) 89.25 { 89.26 unsigned long effective_addr = (unsigned long) -1; 89.27 u8 length, modrm_mod, modrm_rm; 89.28 u32 disp = 0; 89.29 + struct vcpu *v = current; 89.30 + struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb; 89.31 89.32 HVM_DBG_LOG(DBG_LEVEL_1, "get_effective_addr_modrm64(): prefix = %x, " 89.33 "length = %d, operand[0,1] = %x %x.\n", prefix, *size, operand [0], 89.34 @@ -198,7 +202,7 @@ unsigned long get_effective_addr_modrm64 89.35 89.36 #if __x86_64__ 89.37 /* 64-bit mode */ 89.38 - if (vmcb->cs.attr.fields.l && (vmcb->efer & EFER_LMA)) 89.39 + if (vmcb->cs.attr.fields.l && svm_long_mode_enabled(v)) 89.40 return vmcb->rip + inst_len + *size + disp; 89.41 #endif 89.42 return disp; 89.43 @@ -310,7 +314,7 @@ unsigned int decode_src_reg(u8 prefix, u 89.44 } 89.45 89.46 89.47 -unsigned long svm_rip2pointer(struct vmcb_struct *vmcb) 89.48 +unsigned long svm_rip2pointer(struct vcpu *v) 89.49 { 89.50 /* 89.51 * The following is subtle. Intuitively this code would be something like: 89.52 @@ -322,8 +326,9 @@ unsigned long svm_rip2pointer(struct vmc 89.53 * %cs is update, but fortunately, base contain the valid base address 89.54 * no matter what kind of addressing is used. 89.55 */ 89.56 + struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb; 89.57 unsigned long p = vmcb->cs.base + vmcb->rip; 89.58 - if (!(vmcb->cs.attr.fields.l && vmcb->efer & EFER_LMA)) 89.59 + if (!(vmcb->cs.attr.fields.l && svm_long_mode_enabled(v))) 89.60 return (u32)p; /* mask to 32 bits */ 89.61 /* NB. Should mask to 16 bits if in real mode or 16-bit protected mode. */ 89.62 return p; 89.63 @@ -410,10 +415,11 @@ static const u8 *opc_bytes[INSTR_MAX_COU 89.64 * The caller can either pass a NULL pointer to the guest_eip_buf, or a pointer 89.65 * to enough bytes to satisfy the instruction including prefix bytes. 89.66 */ 89.67 -int __get_instruction_length_from_list(struct vmcb_struct *vmcb, 89.68 +int __get_instruction_length_from_list(struct vcpu *v, 89.69 enum instruction_index *list, unsigned int list_count, 89.70 u8 *guest_eip_buf, enum instruction_index *match) 89.71 { 89.72 + struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb; 89.73 unsigned int inst_len = 0; 89.74 unsigned int i; 89.75 unsigned int j; 89.76 @@ -429,7 +435,7 @@ int __get_instruction_length_from_list(s 89.77 } 89.78 else 89.79 { 89.80 - inst_copy_from_guest(buffer, svm_rip2pointer(vmcb), MAX_INST_LEN); 89.81 + inst_copy_from_guest(buffer, svm_rip2pointer(v), MAX_INST_LEN); 89.82 buf = buffer; 89.83 } 89.84
90.1 --- a/xen/arch/x86/hvm/svm/svm.c Mon Mar 05 12:49:12 2007 -0600 90.2 +++ b/xen/arch/x86/hvm/svm/svm.c Thu Mar 08 14:39:52 2007 -0600 90.3 @@ -49,6 +49,7 @@ 90.4 #include <public/sched.h> 90.5 #include <asm/hvm/vpt.h> 90.6 #include <asm/hvm/trace.h> 90.7 +#include <asm/hap.h> 90.8 90.9 #define SVM_EXTRA_DEBUG 90.10 90.11 @@ -76,6 +77,10 @@ static void *root_vmcb[NR_CPUS] __read_m 90.12 /* physical address of above for host VMSAVE/VMLOAD */ 90.13 u64 root_vmcb_pa[NR_CPUS] __read_mostly; 90.14 90.15 +/* hardware assisted paging bits */ 90.16 +extern int opt_hap_enabled; 90.17 +extern int hap_capable_system; 90.18 + 90.19 static inline void svm_inject_exception(struct vcpu *v, int trap, 90.20 int ev, int error_code) 90.21 { 90.22 @@ -148,31 +153,6 @@ static void svm_store_cpu_guest_regs( 90.23 } 90.24 } 90.25 90.26 -static int svm_paging_enabled(struct vcpu *v) 90.27 -{ 90.28 - unsigned long cr0; 90.29 - 90.30 - cr0 = v->arch.hvm_svm.cpu_shadow_cr0; 90.31 - 90.32 - return (cr0 & X86_CR0_PE) && (cr0 & X86_CR0_PG); 90.33 -} 90.34 - 90.35 -static int svm_pae_enabled(struct vcpu *v) 90.36 -{ 90.37 - unsigned long cr4; 90.38 - 90.39 - if(!svm_paging_enabled(v)) 90.40 - return 0; 90.41 - 90.42 - cr4 = v->arch.hvm_svm.cpu_shadow_cr4; 90.43 - 90.44 - return (cr4 & X86_CR4_PAE); 90.45 -} 90.46 - 90.47 -static int svm_long_mode_enabled(struct vcpu *v) 90.48 -{ 90.49 - return test_bit(SVM_CPU_STATE_LMA_ENABLED, &v->arch.hvm_svm.cpu_state); 90.50 -} 90.51 90.52 static inline int long_mode_do_msr_read(struct cpu_user_regs *regs) 90.53 { 90.54 @@ -183,8 +163,7 @@ static inline int long_mode_do_msr_read( 90.55 switch ((u32)regs->ecx) 90.56 { 90.57 case MSR_EFER: 90.58 - msr_content = vmcb->efer; 90.59 - msr_content &= ~EFER_SVME; 90.60 + msr_content = v->arch.hvm_svm.cpu_shadow_efer; 90.61 break; 90.62 90.63 #ifdef __x86_64__ 90.64 @@ -255,30 +234,54 @@ static inline int long_mode_do_msr_write 90.65 goto gp_fault; 90.66 } 90.67 90.68 + /* 90.69 + * update the VMCB's EFER with the intended value along with 90.70 + * that crucial EFER.SVME bit =) 90.71 + */ 90.72 + vmcb->efer = msr_content | EFER_SVME; 90.73 + 90.74 #ifdef __x86_64__ 90.75 - /* LME: 0 -> 1 */ 90.76 - if ( msr_content & EFER_LME && 90.77 - !test_bit(SVM_CPU_STATE_LME_ENABLED, &v->arch.hvm_svm.cpu_state)) 90.78 + 90.79 + /* 90.80 + * Check for EFER.LME transitions from 0->1 or 1->0. Do the 90.81 + * sanity checks and then make sure that both EFER.LME and 90.82 + * EFER.LMA are cleared. (EFER.LME can't be set in the vmcb 90.83 + * until the guest also sets CR0.PG, since even if the guest has 90.84 + * paging "disabled", the vmcb's CR0 always has PG set.) 90.85 + */ 90.86 + if ( (msr_content & EFER_LME) && !svm_lme_is_set(v) ) 90.87 { 90.88 + /* EFER.LME transition from 0 to 1 */ 90.89 + 90.90 if ( svm_paging_enabled(v) || 90.91 - !test_bit(SVM_CPU_STATE_PAE_ENABLED, 90.92 - &v->arch.hvm_svm.cpu_state) ) 90.93 + !svm_cr4_pae_is_set(v) ) 90.94 { 90.95 gdprintk(XENLOG_WARNING, "Trying to set LME bit when " 90.96 "in paging mode or PAE bit is not set\n"); 90.97 goto gp_fault; 90.98 } 90.99 - set_bit(SVM_CPU_STATE_LME_ENABLED, &v->arch.hvm_svm.cpu_state); 90.100 + 90.101 + vmcb->efer &= ~(EFER_LME | EFER_LMA); 90.102 } 90.103 - 90.104 - /* We have already recorded that we want LME, so it will be set 90.105 - * next time CR0 gets updated. So we clear that bit and continue. 90.106 - */ 90.107 - if ((msr_content ^ vmcb->efer) & EFER_LME) 90.108 - msr_content &= ~EFER_LME; 90.109 - /* No update for LME/LMA since it have no effect */ 90.110 -#endif 90.111 - vmcb->efer = msr_content | EFER_SVME; 90.112 + else if ( !(msr_content & EFER_LME) && svm_lme_is_set(v) ) 90.113 + { 90.114 + /* EFER.LME transistion from 1 to 0 */ 90.115 + 90.116 + if ( svm_paging_enabled(v) ) 90.117 + { 90.118 + gdprintk(XENLOG_WARNING, 90.119 + "Trying to clear EFER.LME while paging enabled\n"); 90.120 + goto gp_fault; 90.121 + } 90.122 + 90.123 + vmcb->efer &= ~(EFER_LME | EFER_LMA); 90.124 + } 90.125 + 90.126 +#endif /* __x86_64__ */ 90.127 + 90.128 + /* update the guest EFER's shadow with the intended value */ 90.129 + v->arch.hvm_svm.cpu_shadow_efer = msr_content; 90.130 + 90.131 break; 90.132 90.133 #ifdef __x86_64__ 90.134 @@ -468,22 +471,25 @@ int svm_vmcb_restore(struct vcpu *v, str 90.135 c->cr4); 90.136 #endif 90.137 90.138 - if (!svm_paging_enabled(v)) { 90.139 + if ( !svm_paging_enabled(v) ) 90.140 + { 90.141 printk("%s: paging not enabled.", __func__); 90.142 goto skip_cr3; 90.143 } 90.144 90.145 - if (c->cr3 == v->arch.hvm_svm.cpu_cr3) { 90.146 + if ( c->cr3 == v->arch.hvm_svm.cpu_cr3 ) 90.147 + { 90.148 /* 90.149 * This is simple TLB flush, implying the guest has 90.150 * removed some translation or changed page attributes. 90.151 * We simply invalidate the shadow. 90.152 */ 90.153 mfn = gmfn_to_mfn(v->domain, c->cr3 >> PAGE_SHIFT); 90.154 - if (mfn != pagetable_get_pfn(v->arch.guest_table)) { 90.155 + if ( mfn != pagetable_get_pfn(v->arch.guest_table) ) 90.156 goto bad_cr3; 90.157 - } 90.158 - } else { 90.159 + } 90.160 + else 90.161 + { 90.162 /* 90.163 * If different, make a shadow. Check if the PDBR is valid 90.164 * first. 90.165 @@ -491,9 +497,9 @@ int svm_vmcb_restore(struct vcpu *v, str 90.166 HVM_DBG_LOG(DBG_LEVEL_VMMU, "CR3 c->cr3 = %"PRIx64"", c->cr3); 90.167 /* current!=vcpu as not called by arch_vmx_do_launch */ 90.168 mfn = gmfn_to_mfn(v->domain, c->cr3 >> PAGE_SHIFT); 90.169 - if( !mfn_valid(mfn) || !get_page(mfn_to_page(mfn), v->domain)) { 90.170 + if( !mfn_valid(mfn) || !get_page(mfn_to_page(mfn), v->domain) ) 90.171 goto bad_cr3; 90.172 - } 90.173 + 90.174 old_base_mfn = pagetable_get_pfn(v->arch.guest_table); 90.175 v->arch.guest_table = pagetable_from_pfn(mfn); 90.176 if (old_base_mfn) 90.177 @@ -631,7 +637,7 @@ static int svm_guest_x86_mode(struct vcp 90.178 { 90.179 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb; 90.180 90.181 - if ( (vmcb->efer & EFER_LMA) && vmcb->cs.attr.fields.l ) 90.182 + if ( svm_long_mode_enabled(v) && vmcb->cs.attr.fields.l ) 90.183 return 8; 90.184 90.185 if ( svm_realmode(v) ) 90.186 @@ -681,7 +687,7 @@ static unsigned long svm_get_segment_bas 90.187 int long_mode = 0; 90.188 90.189 #ifdef __x86_64__ 90.190 - long_mode = vmcb->cs.attr.fields.l && (vmcb->efer & EFER_LMA); 90.191 + long_mode = vmcb->cs.attr.fields.l && svm_long_mode_enabled(v); 90.192 #endif 90.193 switch ( seg ) 90.194 { 90.195 @@ -905,6 +911,10 @@ static void arch_svm_do_launch(struct vc 90.196 { 90.197 svm_do_launch(v); 90.198 90.199 + if ( paging_mode_hap(v->domain) ) { 90.200 + v->arch.hvm_svm.vmcb->h_cr3 = pagetable_get_paddr(v->domain->arch.phys_table); 90.201 + } 90.202 + 90.203 if ( v->vcpu_id != 0 ) 90.204 { 90.205 cpu_user_regs_t *regs = ¤t->arch.guest_context.user_regs; 90.206 @@ -1011,6 +1021,21 @@ static struct hvm_function_table svm_fun 90.207 .event_injection_faulted = svm_event_injection_faulted 90.208 }; 90.209 90.210 +void svm_npt_detect(void) 90.211 +{ 90.212 + u32 eax, ebx, ecx, edx; 90.213 + 90.214 + /* check CPUID for nested paging support */ 90.215 + cpuid(0x8000000A, &eax, &ebx, &ecx, &edx); 90.216 + if ( edx & 0x01 ) { /* nested paging */ 90.217 + hap_capable_system = 1; 90.218 + } 90.219 + else if ( opt_hap_enabled ) { 90.220 + printk(" nested paging is not supported by this CPU.\n"); 90.221 + hap_capable_system = 0; /* no nested paging, we disable flag. */ 90.222 + } 90.223 +} 90.224 + 90.225 int start_svm(void) 90.226 { 90.227 u32 eax, ecx, edx; 90.228 @@ -1041,6 +1066,8 @@ int start_svm(void) 90.229 wrmsr(MSR_EFER, eax, edx); 90.230 printk("AMD SVM Extension is enabled for cpu %d.\n", cpu ); 90.231 90.232 + svm_npt_detect(); 90.233 + 90.234 /* Initialize the HSA for this core */ 90.235 phys_hsa = (u64) virt_to_maddr(hsa[cpu]); 90.236 phys_hsa_lo = (u32) phys_hsa; 90.237 @@ -1077,6 +1104,18 @@ void arch_svm_do_resume(struct vcpu *v) 90.238 } 90.239 } 90.240 90.241 +static int svm_do_nested_pgfault(paddr_t gpa, struct cpu_user_regs *regs) 90.242 +{ 90.243 + if (mmio_space(gpa)) { 90.244 + handle_mmio(gpa); 90.245 + return 1; 90.246 + } 90.247 + 90.248 + /* We should not reach here. Otherwise, P2M table is not correct.*/ 90.249 + return 0; 90.250 +} 90.251 + 90.252 + 90.253 static int svm_do_page_fault(unsigned long va, struct cpu_user_regs *regs) 90.254 { 90.255 HVM_DBG_LOG(DBG_LEVEL_VMMU, 90.256 @@ -1114,7 +1153,7 @@ static void svm_do_general_protection_fa 90.257 printk("Huh? We got a GP Fault with an invalid IDTR!\n"); 90.258 svm_dump_vmcb(__func__, vmcb); 90.259 svm_dump_regs(__func__, regs); 90.260 - svm_dump_inst(svm_rip2pointer(vmcb)); 90.261 + svm_dump_inst(svm_rip2pointer(v)); 90.262 domain_crash(v->domain); 90.263 return; 90.264 } 90.265 @@ -1209,7 +1248,7 @@ static void svm_vmexit_do_cpuid(struct v 90.266 HVMTRACE_3D(CPUID, v, input, 90.267 ((uint64_t)eax << 32) | ebx, ((uint64_t)ecx << 32) | edx); 90.268 90.269 - inst_len = __get_instruction_length(vmcb, INSTR_CPUID, NULL); 90.270 + inst_len = __get_instruction_length(v, INSTR_CPUID, NULL); 90.271 ASSERT(inst_len > 0); 90.272 __update_guest_eip(vmcb, inst_len); 90.273 } 90.274 @@ -1312,15 +1351,16 @@ static void svm_dr_access(struct vcpu *v 90.275 } 90.276 90.277 90.278 -static void svm_get_prefix_info( 90.279 - struct vmcb_struct *vmcb, 90.280 - unsigned int dir, svm_segment_register_t **seg, unsigned int *asize) 90.281 +static void svm_get_prefix_info(struct vcpu *v, unsigned int dir, 90.282 + svm_segment_register_t **seg, 90.283 + unsigned int *asize) 90.284 { 90.285 + struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb; 90.286 unsigned char inst[MAX_INST_LEN]; 90.287 int i; 90.288 90.289 memset(inst, 0, MAX_INST_LEN); 90.290 - if (inst_copy_from_guest(inst, svm_rip2pointer(vmcb), sizeof(inst)) 90.291 + if (inst_copy_from_guest(inst, svm_rip2pointer(v), sizeof(inst)) 90.292 != MAX_INST_LEN) 90.293 { 90.294 gdprintk(XENLOG_ERR, "get guest instruction failed\n"); 90.295 @@ -1400,7 +1440,7 @@ static inline int svm_get_io_address( 90.296 90.297 #ifdef __x86_64__ 90.298 /* If we're in long mode, we shouldn't check the segment presence & limit */ 90.299 - long_mode = vmcb->cs.attr.fields.l && vmcb->efer & EFER_LMA; 90.300 + long_mode = vmcb->cs.attr.fields.l && svm_long_mode_enabled(v); 90.301 #endif 90.302 90.303 /* d field of cs.attr is 1 for 32-bit, 0 for 16 or 64 bit. 90.304 @@ -1419,7 +1459,7 @@ static inline int svm_get_io_address( 90.305 isize --; 90.306 90.307 if (isize > 1) 90.308 - svm_get_prefix_info(vmcb, info.fields.type, &seg, &asize); 90.309 + svm_get_prefix_info(v, info.fields.type, &seg, &asize); 90.310 90.311 if (info.fields.type == IOREQ_WRITE) 90.312 { 90.313 @@ -1702,6 +1742,52 @@ static void svm_io_instruction(struct vc 90.314 } 90.315 } 90.316 90.317 +static int npt_set_cr0(unsigned long value) 90.318 +{ 90.319 + struct vcpu *v = current; 90.320 + struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb; 90.321 + 90.322 + ASSERT(vmcb); 90.323 + 90.324 + /* ET is reserved and should be always be 1*/ 90.325 + value |= X86_CR0_ET; 90.326 + 90.327 + /* Check whether the guest is about to turn on long mode. 90.328 + * If it is, set EFER.LME and EFER.LMA. Update the shadow EFER.LMA 90.329 + * bit too, so svm_long_mode_enabled() will work. 90.330 + */ 90.331 + if ( (value & X86_CR0_PG) && svm_lme_is_set(v) && 90.332 + (vmcb->cr4 & X86_CR4_PAE) && (vmcb->cr0 & X86_CR0_PE) ) 90.333 + { 90.334 + v->arch.hvm_svm.cpu_shadow_efer |= EFER_LMA; 90.335 + vmcb->efer |= EFER_LMA | EFER_LME; 90.336 + } 90.337 + 90.338 + /* Whenever CR0.PG is cleared under long mode, LMA will be cleared 90.339 + * immediatly. We emulate this process for svm_long_mode_enabled(). 90.340 + */ 90.341 + if ( (value & (X86_CR0_PE | X86_CR0_PG)) == X86_CR0_PE ) 90.342 + { 90.343 + if ( svm_long_mode_enabled(v) ) 90.344 + { 90.345 + v->arch.hvm_svm.cpu_shadow_efer &= ~EFER_LMA; 90.346 + } 90.347 + } 90.348 + 90.349 + vmcb->cr0 = value | X86_CR0_WP; 90.350 + v->arch.hvm_svm.cpu_shadow_cr0 = value; 90.351 + 90.352 + /* TS cleared? Then initialise FPU now. */ 90.353 + if ( !(value & X86_CR0_TS) ) { 90.354 + setup_fpu(v); 90.355 + vmcb->exception_intercepts &= ~EXCEPTION_BITMAP_NM; 90.356 + } 90.357 + 90.358 + paging_update_paging_modes(v); 90.359 + 90.360 + return 1; 90.361 +} 90.362 + 90.363 static int svm_set_cr0(unsigned long value) 90.364 { 90.365 struct vcpu *v = current; 90.366 @@ -1727,7 +1813,8 @@ static int svm_set_cr0(unsigned long val 90.367 90.368 HVM_DBG_LOG(DBG_LEVEL_VMMU, "Update CR0 value = %lx\n", value); 90.369 90.370 - if ((value & X86_CR0_PE) && (value & X86_CR0_PG) && !paging_enabled) 90.371 + if ( ((value & (X86_CR0_PE | X86_CR0_PG)) == (X86_CR0_PE | X86_CR0_PG)) 90.372 + && !paging_enabled ) 90.373 { 90.374 /* The guest CR3 must be pointing to the guest physical. */ 90.375 mfn = get_mfn_from_gpfn(v->arch.hvm_svm.cpu_cr3 >> PAGE_SHIFT); 90.376 @@ -1740,18 +1827,16 @@ static int svm_set_cr0(unsigned long val 90.377 } 90.378 90.379 #if defined(__x86_64__) 90.380 - if (test_bit(SVM_CPU_STATE_LME_ENABLED, &v->arch.hvm_svm.cpu_state) 90.381 - && !test_bit(SVM_CPU_STATE_PAE_ENABLED, 90.382 - &v->arch.hvm_svm.cpu_state)) 90.383 + if ( svm_lme_is_set(v) && !svm_cr4_pae_is_set(v) ) 90.384 { 90.385 HVM_DBG_LOG(DBG_LEVEL_1, "Enable paging before PAE enable\n"); 90.386 svm_inject_exception(v, TRAP_gp_fault, 1, 0); 90.387 } 90.388 90.389 - if (test_bit(SVM_CPU_STATE_LME_ENABLED, &v->arch.hvm_svm.cpu_state)) 90.390 + if ( svm_lme_is_set(v) ) 90.391 { 90.392 HVM_DBG_LOG(DBG_LEVEL_1, "Enable the Long mode\n"); 90.393 - set_bit(SVM_CPU_STATE_LMA_ENABLED, &v->arch.hvm_svm.cpu_state); 90.394 + v->arch.hvm_svm.cpu_shadow_efer |= EFER_LMA; 90.395 vmcb->efer |= EFER_LMA | EFER_LME; 90.396 } 90.397 #endif /* __x86_64__ */ 90.398 @@ -1790,8 +1875,8 @@ static int svm_set_cr0(unsigned long val 90.399 { 90.400 if ( svm_long_mode_enabled(v) ) 90.401 { 90.402 - vmcb->efer &= ~EFER_LMA; 90.403 - clear_bit(SVM_CPU_STATE_LMA_ENABLED, &v->arch.hvm_svm.cpu_state); 90.404 + vmcb->efer &= ~(EFER_LME | EFER_LMA); 90.405 + v->arch.hvm_svm.cpu_shadow_efer &= ~EFER_LMA; 90.406 } 90.407 /* we should take care of this kind of situation */ 90.408 paging_update_paging_modes(v); 90.409 @@ -1800,6 +1885,85 @@ static int svm_set_cr0(unsigned long val 90.410 return 1; 90.411 } 90.412 90.413 +// 90.414 +// nested paging functions 90.415 +// 90.416 + 90.417 +static int npt_mov_to_cr(int gpreg, int cr, struct cpu_user_regs *regs) 90.418 +{ 90.419 + unsigned long value; 90.420 + struct vcpu *v = current; 90.421 + struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb; 90.422 + struct vlapic *vlapic = vcpu_vlapic(v); 90.423 + 90.424 + ASSERT(vmcb); 90.425 + 90.426 + value = get_reg(gpreg, regs, vmcb); 90.427 + 90.428 + switch (cr) { 90.429 + case 0: 90.430 + return npt_set_cr0(value); 90.431 + 90.432 + case 3: 90.433 + vmcb->cr3 = value; 90.434 + v->arch.hvm_svm.cpu_cr3 = value; 90.435 + break; 90.436 + 90.437 + case 4: /* CR4 */ 90.438 + vmcb->cr4 = value; 90.439 + v->arch.hvm_svm.cpu_shadow_cr4 = value; 90.440 + paging_update_paging_modes(v); 90.441 + break; 90.442 + 90.443 + case 8: 90.444 + vlapic_set_reg(vlapic, APIC_TASKPRI, ((value & 0x0F) << 4)); 90.445 + vmcb->vintr.fields.tpr = value & 0x0F; 90.446 + break; 90.447 + 90.448 + default: 90.449 + gdprintk(XENLOG_ERR, "invalid cr: %d\n", cr); 90.450 + domain_crash(v->domain); 90.451 + return 0; 90.452 + } 90.453 + 90.454 + return 1; 90.455 +} 90.456 + 90.457 +static void npt_mov_from_cr(int cr, int gp, struct cpu_user_regs *regs) 90.458 +{ 90.459 + unsigned long value = 0; 90.460 + struct vcpu *v = current; 90.461 + struct vmcb_struct *vmcb; 90.462 + struct vlapic *vlapic = vcpu_vlapic(v); 90.463 + 90.464 + vmcb = v->arch.hvm_svm.vmcb; 90.465 + ASSERT(vmcb); 90.466 + 90.467 + switch(cr) { 90.468 + case 0: 90.469 + value = (unsigned long) v->arch.hvm_svm.cpu_shadow_cr0; 90.470 + break; 90.471 + case 2: 90.472 + value = vmcb->cr2; 90.473 + break; 90.474 + case 3: 90.475 + value = (unsigned long) v->arch.hvm_svm.cpu_cr3; 90.476 + break; 90.477 + case 4: 90.478 + value = (unsigned long) v->arch.hvm_svm.cpu_shadow_cr4; 90.479 + break; 90.480 + case 8: 90.481 + value = (unsigned long)vlapic_get_reg(vlapic, APIC_TASKPRI); 90.482 + value = (value & 0xF0) >> 4; 90.483 + break; 90.484 + default: 90.485 + domain_crash(v->domain); 90.486 + return; 90.487 + } 90.488 + 90.489 + set_reg(gp, value, regs, vmcb); 90.490 +} 90.491 + 90.492 /* 90.493 * Read from control registers. CR0 and CR4 are read from the shadow. 90.494 */ 90.495 @@ -1851,12 +2015,6 @@ static void mov_from_cr(int cr, int gp, 90.496 } 90.497 90.498 90.499 -static inline int svm_pgbit_test(struct vcpu *v) 90.500 -{ 90.501 - return v->arch.hvm_svm.cpu_shadow_cr0 & X86_CR0_PG; 90.502 -} 90.503 - 90.504 - 90.505 /* 90.506 * Write to control registers 90.507 */ 90.508 @@ -1933,7 +2091,6 @@ static int mov_to_cr(int gpreg, int cr, 90.509 old_cr = v->arch.hvm_svm.cpu_shadow_cr4; 90.510 if ( value & X86_CR4_PAE && !(old_cr & X86_CR4_PAE) ) 90.511 { 90.512 - set_bit(SVM_CPU_STATE_PAE_ENABLED, &v->arch.hvm_svm.cpu_state); 90.513 if ( svm_pgbit_test(v) ) 90.514 { 90.515 /* The guest is a 32-bit PAE guest. */ 90.516 @@ -1962,15 +2119,13 @@ static int mov_to_cr(int gpreg, int cr, 90.517 v->arch.hvm_svm.cpu_cr3, mfn); 90.518 #endif 90.519 } 90.520 - } 90.521 - else if (value & X86_CR4_PAE) { 90.522 - set_bit(SVM_CPU_STATE_PAE_ENABLED, &v->arch.hvm_svm.cpu_state); 90.523 - } else { 90.524 - if (test_bit(SVM_CPU_STATE_LMA_ENABLED, 90.525 - &v->arch.hvm_svm.cpu_state)) { 90.526 + } 90.527 + else if ( !(value & X86_CR4_PAE) ) 90.528 + { 90.529 + if ( svm_long_mode_enabled(v) ) 90.530 + { 90.531 svm_inject_exception(v, TRAP_gp_fault, 1, 0); 90.532 } 90.533 - clear_bit(SVM_CPU_STATE_PAE_ENABLED, &v->arch.hvm_svm.cpu_state); 90.534 } 90.535 90.536 v->arch.hvm_svm.cpu_shadow_cr4 = value; 90.537 @@ -2024,7 +2179,7 @@ static int svm_cr_access(struct vcpu *v, 90.538 90.539 ASSERT(vmcb); 90.540 90.541 - inst_copy_from_guest(buffer, svm_rip2pointer(vmcb), sizeof(buffer)); 90.542 + inst_copy_from_guest(buffer, svm_rip2pointer(v), sizeof(buffer)); 90.543 90.544 /* get index to first actual instruction byte - as we will need to know 90.545 where the prefix lives later on */ 90.546 @@ -2033,12 +2188,12 @@ static int svm_cr_access(struct vcpu *v, 90.547 if ( type == TYPE_MOV_TO_CR ) 90.548 { 90.549 inst_len = __get_instruction_length_from_list( 90.550 - vmcb, list_a, ARR_SIZE(list_a), &buffer[index], &match); 90.551 + v, list_a, ARR_SIZE(list_a), &buffer[index], &match); 90.552 } 90.553 else /* type == TYPE_MOV_FROM_CR */ 90.554 { 90.555 inst_len = __get_instruction_length_from_list( 90.556 - vmcb, list_b, ARR_SIZE(list_b), &buffer[index], &match); 90.557 + v, list_b, ARR_SIZE(list_b), &buffer[index], &match); 90.558 } 90.559 90.560 ASSERT(inst_len > 0); 90.561 @@ -2055,12 +2210,18 @@ static int svm_cr_access(struct vcpu *v, 90.562 { 90.563 case INSTR_MOV2CR: 90.564 gpreg = decode_src_reg(prefix, buffer[index+2]); 90.565 - result = mov_to_cr(gpreg, cr, regs); 90.566 + if ( paging_mode_hap(v->domain) ) 90.567 + result = npt_mov_to_cr(gpreg, cr, regs); 90.568 + else 90.569 + result = mov_to_cr(gpreg, cr, regs); 90.570 break; 90.571 90.572 case INSTR_MOVCR2: 90.573 gpreg = decode_src_reg(prefix, buffer[index+2]); 90.574 - mov_from_cr(cr, gpreg, regs); 90.575 + if ( paging_mode_hap(v->domain) ) 90.576 + npt_mov_from_cr(cr, gpreg, regs); 90.577 + else 90.578 + mov_from_cr(cr, gpreg, regs); 90.579 break; 90.580 90.581 case INSTR_CLTS: 90.582 @@ -2073,7 +2234,7 @@ static int svm_cr_access(struct vcpu *v, 90.583 90.584 case INSTR_LMSW: 90.585 if (svm_dbg_on) 90.586 - svm_dump_inst(svm_rip2pointer(vmcb)); 90.587 + svm_dump_inst(svm_rip2pointer(v)); 90.588 90.589 gpreg = decode_src_reg(prefix, buffer[index+2]); 90.590 value = get_reg(gpreg, regs, vmcb) & 0xF; 90.591 @@ -2087,12 +2248,15 @@ static int svm_cr_access(struct vcpu *v, 90.592 if (svm_dbg_on) 90.593 printk("CR0-LMSW CR0 - New value=%lx\n", value); 90.594 90.595 - result = svm_set_cr0(value); 90.596 + if ( paging_mode_hap(v->domain) ) 90.597 + result = npt_set_cr0(value); 90.598 + else 90.599 + result = svm_set_cr0(value); 90.600 break; 90.601 90.602 case INSTR_SMSW: 90.603 if (svm_dbg_on) 90.604 - svm_dump_inst(svm_rip2pointer(vmcb)); 90.605 + svm_dump_inst(svm_rip2pointer(v)); 90.606 value = v->arch.hvm_svm.cpu_shadow_cr0; 90.607 gpreg = decode_src_reg(prefix, buffer[index+2]); 90.608 set_reg(gpreg, value, regs, vmcb); 90.609 @@ -2168,7 +2332,7 @@ static inline void svm_do_msr_access( 90.610 HVM_DBG_LOG(DBG_LEVEL_1, "returns: ecx=%x, eax=%lx, edx=%lx", 90.611 ecx, (unsigned long)regs->eax, (unsigned long)regs->edx); 90.612 90.613 - inst_len = __get_instruction_length(vmcb, INSTR_RDMSR, NULL); 90.614 + inst_len = __get_instruction_length(v, INSTR_RDMSR, NULL); 90.615 } 90.616 else 90.617 { 90.618 @@ -2200,7 +2364,7 @@ static inline void svm_do_msr_access( 90.619 break; 90.620 } 90.621 90.622 - inst_len = __get_instruction_length(vmcb, INSTR_WRMSR, NULL); 90.623 + inst_len = __get_instruction_length(v, INSTR_WRMSR, NULL); 90.624 } 90.625 90.626 __update_guest_eip(vmcb, inst_len); 90.627 @@ -2223,8 +2387,9 @@ static inline void svm_vmexit_do_hlt(str 90.628 } 90.629 90.630 90.631 -static void svm_vmexit_do_invd(struct vmcb_struct *vmcb) 90.632 +static void svm_vmexit_do_invd(struct vcpu *v) 90.633 { 90.634 + struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb; 90.635 int inst_len; 90.636 90.637 /* Invalidate the cache - we can't really do that safely - maybe we should 90.638 @@ -2237,7 +2402,7 @@ static void svm_vmexit_do_invd(struct vm 90.639 */ 90.640 printk("INVD instruction intercepted - ignored\n"); 90.641 90.642 - inst_len = __get_instruction_length(vmcb, INSTR_INVD, NULL); 90.643 + inst_len = __get_instruction_length(v, INSTR_INVD, NULL); 90.644 __update_guest_eip(vmcb, inst_len); 90.645 } 90.646 90.647 @@ -2289,7 +2454,7 @@ void svm_handle_invlpg(const short invlp 90.648 * Unknown how many bytes the invlpg instruction will take. Use the 90.649 * maximum instruction length here 90.650 */ 90.651 - if (inst_copy_from_guest(opcode, svm_rip2pointer(vmcb), length) < length) 90.652 + if (inst_copy_from_guest(opcode, svm_rip2pointer(v), length) < length) 90.653 { 90.654 gdprintk(XENLOG_ERR, "Error reading memory %d bytes\n", length); 90.655 domain_crash(v->domain); 90.656 @@ -2298,7 +2463,7 @@ void svm_handle_invlpg(const short invlp 90.657 90.658 if (invlpga) 90.659 { 90.660 - inst_len = __get_instruction_length(vmcb, INSTR_INVLPGA, opcode); 90.661 + inst_len = __get_instruction_length(v, INSTR_INVLPGA, opcode); 90.662 ASSERT(inst_len > 0); 90.663 __update_guest_eip(vmcb, inst_len); 90.664 90.665 @@ -2312,7 +2477,7 @@ void svm_handle_invlpg(const short invlp 90.666 { 90.667 /* What about multiple prefix codes? */ 90.668 prefix = (is_prefix(opcode[0])?opcode[0]:0); 90.669 - inst_len = __get_instruction_length(vmcb, INSTR_INVLPG, opcode); 90.670 + inst_len = __get_instruction_length(v, INSTR_INVLPG, opcode); 90.671 ASSERT(inst_len > 0); 90.672 90.673 inst_len--; 90.674 @@ -2323,7 +2488,7 @@ void svm_handle_invlpg(const short invlp 90.675 * displacement to get effective address and length in bytes. Assume 90.676 * the system in either 32- or 64-bit mode. 90.677 */ 90.678 - g_vaddr = get_effective_addr_modrm64(vmcb, regs, prefix, inst_len, 90.679 + g_vaddr = get_effective_addr_modrm64(regs, prefix, inst_len, 90.680 &opcode[inst_len], &length); 90.681 90.682 inst_len += length; 90.683 @@ -2369,7 +2534,11 @@ static int svm_do_vmmcall_reset_to_realm 90.684 90.685 vmcb->cr4 = SVM_CR4_HOST_MASK; 90.686 v->arch.hvm_svm.cpu_shadow_cr4 = 0; 90.687 - clear_bit(SVM_CPU_STATE_PAE_ENABLED, &v->arch.hvm_svm.cpu_state); 90.688 + 90.689 + if ( paging_mode_hap(v->domain) ) { 90.690 + vmcb->cr0 = v->arch.hvm_svm.cpu_shadow_cr0; 90.691 + vmcb->cr4 = v->arch.hvm_svm.cpu_shadow_cr4; 90.692 + } 90.693 90.694 /* This will jump to ROMBIOS */ 90.695 vmcb->rip = 0xFFF0; 90.696 @@ -2445,7 +2614,7 @@ static int svm_do_vmmcall(struct vcpu *v 90.697 ASSERT(vmcb); 90.698 ASSERT(regs); 90.699 90.700 - inst_len = __get_instruction_length(vmcb, INSTR_VMCALL, NULL); 90.701 + inst_len = __get_instruction_length(v, INSTR_VMCALL, NULL); 90.702 ASSERT(inst_len > 0); 90.703 90.704 HVMTRACE_1D(VMMCALL, v, regs->eax); 90.705 @@ -2855,7 +3024,7 @@ asmlinkage void svm_vmexit_handler(struc 90.706 90.707 svm_dump_vmcb(__func__, vmcb); 90.708 svm_dump_regs(__func__, regs); 90.709 - svm_dump_inst(svm_rip2pointer(vmcb)); 90.710 + svm_dump_inst(svm_rip2pointer(v)); 90.711 } 90.712 90.713 #if defined(__i386__) 90.714 @@ -2957,7 +3126,7 @@ asmlinkage void svm_vmexit_handler(struc 90.715 /* Debug info to hopefully help debug WHY the guest double-faulted. */ 90.716 svm_dump_vmcb(__func__, vmcb); 90.717 svm_dump_regs(__func__, regs); 90.718 - svm_dump_inst(svm_rip2pointer(vmcb)); 90.719 + svm_dump_inst(svm_rip2pointer(v)); 90.720 svm_inject_exception(v, TRAP_double_fault, 1, 0); 90.721 break; 90.722 90.723 @@ -2967,7 +3136,7 @@ asmlinkage void svm_vmexit_handler(struc 90.724 break; 90.725 90.726 case VMEXIT_INVD: 90.727 - svm_vmexit_do_invd(vmcb); 90.728 + svm_vmexit_do_invd(v); 90.729 break; 90.730 90.731 case VMEXIT_GDTR_WRITE: 90.732 @@ -3054,6 +3223,15 @@ asmlinkage void svm_vmexit_handler(struc 90.733 hvm_triple_fault(); 90.734 break; 90.735 90.736 + case VMEXIT_NPF: 90.737 + { 90.738 + regs->error_code = vmcb->exitinfo1; 90.739 + if ( !svm_do_nested_pgfault(vmcb->exitinfo2, regs) ) { 90.740 + domain_crash(v->domain); 90.741 + } 90.742 + break; 90.743 + } 90.744 + 90.745 default: 90.746 exit_and_crash: 90.747 gdprintk(XENLOG_ERR, "unexpected VMEXIT: exit reason = 0x%x, "
91.1 --- a/xen/arch/x86/hvm/svm/vmcb.c Mon Mar 05 12:49:12 2007 -0600 91.2 +++ b/xen/arch/x86/hvm/svm/vmcb.c Thu Mar 08 14:39:52 2007 -0600 91.3 @@ -201,6 +201,13 @@ static int construct_vmcb(struct vcpu *v 91.4 91.5 arch_svm->vmcb->exception_intercepts = MONITOR_DEFAULT_EXCEPTION_BITMAP; 91.6 91.7 + if ( paging_mode_hap(v->domain) ) { 91.8 + vmcb->cr0 = arch_svm->cpu_shadow_cr0; 91.9 + vmcb->np_enable = 1; /* enable nested paging */ 91.10 + vmcb->g_pat = 0x0007040600070406ULL; /* guest PAT */ 91.11 + vmcb->exception_intercepts &= ~EXCEPTION_BITMAP_PG; 91.12 + } 91.13 + 91.14 return 0; 91.15 } 91.16 91.17 @@ -310,7 +317,8 @@ void svm_dump_vmcb(const char *from, str 91.18 printk("KernGSBase = 0x%016llx PAT = 0x%016llx \n", 91.19 (unsigned long long) vmcb->kerngsbase, 91.20 (unsigned long long) vmcb->g_pat); 91.21 - 91.22 + printk("H_CR3 = 0x%016llx\n", (unsigned long long)vmcb->h_cr3); 91.23 + 91.24 /* print out all the selectors */ 91.25 svm_dump_sel("CS", &vmcb->cs); 91.26 svm_dump_sel("DS", &vmcb->ds);
92.1 --- a/xen/arch/x86/mm.c Mon Mar 05 12:49:12 2007 -0600 92.2 +++ b/xen/arch/x86/mm.c Thu Mar 08 14:39:52 2007 -0600 92.3 @@ -424,7 +424,10 @@ void invalidate_shadow_ldt(struct vcpu * 92.4 } 92.5 92.6 /* Dispose of the (now possibly invalid) mappings from the TLB. */ 92.7 - queue_deferred_ops(v->domain, DOP_FLUSH_TLB | DOP_RELOAD_LDT); 92.8 + if ( v == current ) 92.9 + queue_deferred_ops(v->domain, DOP_FLUSH_TLB | DOP_RELOAD_LDT); 92.10 + else 92.11 + flush_tlb_mask(v->domain->domain_dirty_cpumask); 92.12 } 92.13 92.14
93.1 --- a/xen/arch/x86/mm/Makefile Mon Mar 05 12:49:12 2007 -0600 93.2 +++ b/xen/arch/x86/mm/Makefile Thu Mar 08 14:39:52 2007 -0600 93.3 @@ -1,4 +1,5 @@ 93.4 subdir-y += shadow 93.5 +subdir-y += hap 93.6 93.7 obj-y += paging.o 93.8 obj-y += p2m.o
94.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 94.2 +++ b/xen/arch/x86/mm/hap/Makefile Thu Mar 08 14:39:52 2007 -0600 94.3 @@ -0,0 +1,2 @@ 94.4 +obj-y += hap.o 94.5 +obj-y += support.o
95.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 95.2 +++ b/xen/arch/x86/mm/hap/hap.c Thu Mar 08 14:39:52 2007 -0600 95.3 @@ -0,0 +1,708 @@ 95.4 +/****************************************************************************** 95.5 + * arch/x86/mm/hap/hap.c 95.6 + * 95.7 + * hardware assisted paging 95.8 + * Copyright (c) 2007 Advanced Micro Devices (Wei Huang) 95.9 + * Parts of this code are Copyright (c) 2007 by XenSource Inc. 95.10 + * 95.11 + * This program is free software; you can redistribute it and/or modify 95.12 + * it under the terms of the GNU General Public License as published by 95.13 + * the Free Software Foundation; either version 2 of the License, or 95.14 + * (at your option) any later version. 95.15 + * 95.16 + * This program is distributed in the hope that it will be useful, 95.17 + * but WITHOUT ANY WARRANTY; without even the implied warranty of 95.18 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 95.19 + * GNU General Public License for more details. 95.20 + * 95.21 + * You should have received a copy of the GNU General Public License 95.22 + * along with this program; if not, write to the Free Software 95.23 + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 95.24 + */ 95.25 + 95.26 +#include <xen/config.h> 95.27 +#include <xen/types.h> 95.28 +#include <xen/mm.h> 95.29 +#include <xen/trace.h> 95.30 +#include <xen/sched.h> 95.31 +#include <xen/perfc.h> 95.32 +#include <xen/irq.h> 95.33 +#include <xen/domain_page.h> 95.34 +#include <xen/guest_access.h> 95.35 +#include <xen/keyhandler.h> 95.36 +#include <asm/event.h> 95.37 +#include <asm/page.h> 95.38 +#include <asm/current.h> 95.39 +#include <asm/flushtlb.h> 95.40 +#include <asm/shared.h> 95.41 +#include <asm/hap.h> 95.42 +#include <asm/paging.h> 95.43 +#include <asm/domain.h> 95.44 + 95.45 +#include "private.h" 95.46 + 95.47 +/* Override macros from asm/page.h to make them work with mfn_t */ 95.48 +#undef mfn_to_page 95.49 +#define mfn_to_page(_m) (frame_table + mfn_x(_m)) 95.50 +#undef mfn_valid 95.51 +#define mfn_valid(_mfn) (mfn_x(_mfn) < max_page) 95.52 +#undef page_to_mfn 95.53 +#define page_to_mfn(_pg) (_mfn((_pg) - frame_table)) 95.54 + 95.55 +/************************************************/ 95.56 +/* HAP SUPPORT FUNCTIONS */ 95.57 +/************************************************/ 95.58 +mfn_t hap_alloc(struct domain *d, unsigned long backpointer) 95.59 +{ 95.60 + struct page_info *sp = NULL; 95.61 + void *p; 95.62 + 95.63 + ASSERT(hap_locked_by_me(d)); 95.64 + 95.65 + sp = list_entry(d->arch.paging.hap.freelists.next, struct page_info, list); 95.66 + list_del(&sp->list); 95.67 + d->arch.paging.hap.free_pages -= 1; 95.68 + 95.69 + /* Now safe to clear the page for reuse */ 95.70 + p = hap_map_domain_page(page_to_mfn(sp)); 95.71 + ASSERT(p != NULL); 95.72 + clear_page(p); 95.73 + hap_unmap_domain_page(p); 95.74 + 95.75 + return page_to_mfn(sp); 95.76 +} 95.77 + 95.78 +void hap_free(struct domain *d, mfn_t smfn) 95.79 +{ 95.80 + struct page_info *sp = mfn_to_page(smfn); 95.81 + 95.82 + ASSERT(hap_locked_by_me(d)); 95.83 + 95.84 + d->arch.paging.hap.free_pages += 1; 95.85 + list_add_tail(&sp->list, &d->arch.paging.hap.freelists); 95.86 +} 95.87 + 95.88 +static int hap_alloc_p2m_pages(struct domain *d) 95.89 +{ 95.90 + struct page_info *pg; 95.91 + 95.92 + ASSERT(hap_locked_by_me(d)); 95.93 + 95.94 + pg = mfn_to_page(hap_alloc(d, 0)); 95.95 + d->arch.paging.hap.p2m_pages += 1; 95.96 + d->arch.paging.hap.total_pages -= 1; 95.97 + 95.98 + page_set_owner(pg, d); 95.99 + pg->count_info = 1; 95.100 + list_add_tail(&pg->list, &d->arch.paging.hap.p2m_freelist); 95.101 + 95.102 + return 1; 95.103 +} 95.104 + 95.105 +struct page_info * hap_alloc_p2m_page(struct domain *d) 95.106 +{ 95.107 + struct list_head *entry; 95.108 + struct page_info *pg; 95.109 + mfn_t mfn; 95.110 + void *p; 95.111 + 95.112 + hap_lock(d); 95.113 + 95.114 + if ( list_empty(&d->arch.paging.hap.p2m_freelist) && 95.115 + !hap_alloc_p2m_pages(d) ) { 95.116 + hap_unlock(d); 95.117 + return NULL; 95.118 + } 95.119 + entry = d->arch.paging.hap.p2m_freelist.next; 95.120 + list_del(entry); 95.121 + 95.122 + hap_unlock(d); 95.123 + 95.124 + pg = list_entry(entry, struct page_info, list); 95.125 + mfn = page_to_mfn(pg); 95.126 + p = hap_map_domain_page(mfn); 95.127 + clear_page(p); 95.128 + hap_unmap_domain_page(p); 95.129 + 95.130 + return pg; 95.131 +} 95.132 + 95.133 +void hap_free_p2m_page(struct domain *d, struct page_info *pg) 95.134 +{ 95.135 + ASSERT(page_get_owner(pg) == d); 95.136 + /* Should have just the one ref we gave it in alloc_p2m_page() */ 95.137 + if ( (pg->count_info & PGC_count_mask) != 1 ) { 95.138 + HAP_ERROR("Odd p2m page count c=%#x t=%"PRtype_info"\n", 95.139 + pg->count_info, pg->u.inuse.type_info); 95.140 + } 95.141 + /* Free should not decrement domain's total allocation, since 95.142 + * these pages were allocated without an owner. */ 95.143 + page_set_owner(pg, NULL); 95.144 + free_domheap_pages(pg, 0); 95.145 + d->arch.paging.hap.p2m_pages--; 95.146 +} 95.147 + 95.148 +/* Return the size of the pool, rounded up to the nearest MB */ 95.149 +static unsigned int 95.150 +hap_get_allocation(struct domain *d) 95.151 +{ 95.152 + unsigned int pg = d->arch.paging.hap.total_pages; 95.153 + 95.154 + HERE_I_AM; 95.155 + return ((pg >> (20 - PAGE_SHIFT)) 95.156 + + ((pg & ((1 << (20 - PAGE_SHIFT)) - 1)) ? 1 : 0)); 95.157 +} 95.158 + 95.159 +/* Set the pool of pages to the required number of pages. 95.160 + * Returns 0 for success, non-zero for failure. */ 95.161 +static unsigned int 95.162 +hap_set_allocation(struct domain *d, unsigned int pages, int *preempted) 95.163 +{ 95.164 + struct page_info *sp; 95.165 + 95.166 + ASSERT(hap_locked_by_me(d)); 95.167 + 95.168 + while ( d->arch.paging.hap.total_pages != pages ) { 95.169 + if ( d->arch.paging.hap.total_pages < pages ) { 95.170 + /* Need to allocate more memory from domheap */ 95.171 + sp = alloc_domheap_pages(NULL, 0, 0); 95.172 + if ( sp == NULL ) { 95.173 + HAP_PRINTK("failed to allocate hap pages.\n"); 95.174 + return -ENOMEM; 95.175 + } 95.176 + d->arch.paging.hap.free_pages += 1; 95.177 + d->arch.paging.hap.total_pages += 1; 95.178 + list_add_tail(&sp->list, &d->arch.paging.hap.freelists); 95.179 + } 95.180 + else if ( d->arch.paging.hap.total_pages > pages ) { 95.181 + /* Need to return memory to domheap */ 95.182 + ASSERT(!list_empty(&d->arch.paging.hap.freelists)); 95.183 + sp = list_entry(d->arch.paging.hap.freelists.next, 95.184 + struct page_info, list); 95.185 + list_del(&sp->list); 95.186 + d->arch.paging.hap.free_pages -= 1; 95.187 + d->arch.paging.hap.total_pages -= 1; 95.188 + free_domheap_pages(sp, 0); 95.189 + } 95.190 + 95.191 + /* Check to see if we need to yield and try again */ 95.192 + if ( preempted && hypercall_preempt_check() ) { 95.193 + *preempted = 1; 95.194 + return 0; 95.195 + } 95.196 + } 95.197 + 95.198 + return 0; 95.199 +} 95.200 + 95.201 +#if CONFIG_PAGING_LEVELS == 4 95.202 +void hap_install_xen_entries_in_l4(struct vcpu *v, mfn_t gl4mfn, mfn_t sl4mfn) 95.203 +{ 95.204 + struct domain *d = v->domain; 95.205 + l4_pgentry_t *sl4e; 95.206 + 95.207 + sl4e = hap_map_domain_page(sl4mfn); 95.208 + ASSERT(sl4e != NULL); 95.209 + 95.210 + /* Copy the common Xen mappings from the idle domain */ 95.211 + memcpy(&sl4e[ROOT_PAGETABLE_FIRST_XEN_SLOT], 95.212 + &idle_pg_table[ROOT_PAGETABLE_FIRST_XEN_SLOT], 95.213 + ROOT_PAGETABLE_XEN_SLOTS * sizeof(l4_pgentry_t)); 95.214 + 95.215 + /* Install the per-domain mappings for this domain */ 95.216 + sl4e[l4_table_offset(PERDOMAIN_VIRT_START)] = 95.217 + l4e_from_pfn(mfn_x(page_to_mfn(virt_to_page(d->arch.mm_perdomain_l3))), 95.218 + __PAGE_HYPERVISOR); 95.219 + 95.220 + sl4e[l4_table_offset(LINEAR_PT_VIRT_START)] = 95.221 + l4e_from_pfn(mfn_x(gl4mfn), __PAGE_HYPERVISOR); 95.222 + 95.223 + /* install domain-specific P2M table */ 95.224 + sl4e[l4_table_offset(RO_MPT_VIRT_START)] = 95.225 + l4e_from_pfn(mfn_x(pagetable_get_mfn(d->arch.phys_table)), 95.226 + __PAGE_HYPERVISOR); 95.227 + 95.228 + hap_unmap_domain_page(sl4e); 95.229 +} 95.230 +#endif /* CONFIG_PAGING_LEVELS == 4 */ 95.231 + 95.232 +#if CONFIG_PAGING_LEVELS == 3 95.233 +void hap_install_xen_entries_in_l2h(struct vcpu *v, mfn_t sl2hmfn) 95.234 +{ 95.235 + struct domain *d = v->domain; 95.236 + l2_pgentry_t *sl2e; 95.237 + 95.238 + int i; 95.239 + 95.240 + sl2e = hap_map_domain_page(sl2hmfn); 95.241 + ASSERT(sl2e != NULL); 95.242 + 95.243 + /* Copy the common Xen mappings from the idle domain */ 95.244 + memcpy(&sl2e[L2_PAGETABLE_FIRST_XEN_SLOT & (L2_PAGETABLE_ENTRIES-1)], 95.245 + &idle_pg_table_l2[L2_PAGETABLE_FIRST_XEN_SLOT], 95.246 + L2_PAGETABLE_XEN_SLOTS * sizeof(l2_pgentry_t)); 95.247 + 95.248 + /* Install the per-domain mappings for this domain */ 95.249 + for ( i = 0; i < PDPT_L2_ENTRIES; i++ ) 95.250 + sl2e[l2_table_offset(PERDOMAIN_VIRT_START) + i] = 95.251 + l2e_from_pfn( 95.252 + mfn_x(page_to_mfn(virt_to_page(d->arch.mm_perdomain_pt) + i)), 95.253 + __PAGE_HYPERVISOR); 95.254 + 95.255 + for ( i = 0; i < HAP_L3_PAGETABLE_ENTRIES; i++ ) 95.256 + sl2e[l2_table_offset(LINEAR_PT_VIRT_START) + i] = 95.257 + l2e_empty(); 95.258 + 95.259 + if ( paging_mode_translate(d) ) 95.260 + { 95.261 + /* Install the domain-specific p2m table */ 95.262 + l3_pgentry_t *p2m; 95.263 + ASSERT(pagetable_get_pfn(d->arch.phys_table) != 0); 95.264 + p2m = hap_map_domain_page(pagetable_get_mfn(d->arch.phys_table)); 95.265 + for ( i = 0; i < MACHPHYS_MBYTES>>1; i++ ) 95.266 + { 95.267 + sl2e[l2_table_offset(RO_MPT_VIRT_START) + i] = 95.268 + (l3e_get_flags(p2m[i]) & _PAGE_PRESENT) 95.269 + ? l2e_from_pfn(mfn_x(_mfn(l3e_get_pfn(p2m[i]))), 95.270 + __PAGE_HYPERVISOR) 95.271 + : l2e_empty(); 95.272 + } 95.273 + hap_unmap_domain_page(p2m); 95.274 + } 95.275 + 95.276 + hap_unmap_domain_page(sl2e); 95.277 +} 95.278 +#endif 95.279 + 95.280 +#if CONFIG_PAGING_LEVELS == 2 95.281 +void hap_install_xen_entries_in_l2(struct vcpu *v, mfn_t gl2mfn, mfn_t sl2mfn) 95.282 +{ 95.283 + struct domain *d = v->domain; 95.284 + l2_pgentry_t *sl2e; 95.285 + int i; 95.286 + 95.287 + sl2e = hap_map_domain_page(sl2mfn); 95.288 + ASSERT(sl2e != NULL); 95.289 + 95.290 + /* Copy the common Xen mappings from the idle domain */ 95.291 + memcpy(&sl2e[L2_PAGETABLE_FIRST_XEN_SLOT], 95.292 + &idle_pg_table[L2_PAGETABLE_FIRST_XEN_SLOT], 95.293 + L2_PAGETABLE_XEN_SLOTS * sizeof(l2_pgentry_t)); 95.294 + 95.295 + /* Install the per-domain mappings for this domain */ 95.296 + for ( i = 0; i < PDPT_L2_ENTRIES; i++ ) 95.297 + sl2e[l2_table_offset(PERDOMAIN_VIRT_START) + i] = 95.298 + l2e_from_pfn( 95.299 + mfn_x(page_to_mfn(virt_to_page(d->arch.mm_perdomain_pt) + i)), 95.300 + __PAGE_HYPERVISOR); 95.301 + 95.302 + 95.303 + sl2e[l2_table_offset(LINEAR_PT_VIRT_START)] = 95.304 + l2e_from_pfn(mfn_x(gl2mfn), __PAGE_HYPERVISOR); 95.305 + 95.306 + /* install domain-specific P2M table */ 95.307 + sl2e[l2_table_offset(RO_MPT_VIRT_START)] = 95.308 + l2e_from_pfn(mfn_x(pagetable_get_mfn(d->arch.phys_table)), 95.309 + __PAGE_HYPERVISOR); 95.310 + 95.311 + hap_unmap_domain_page(sl2e); 95.312 +} 95.313 +#endif 95.314 + 95.315 +mfn_t hap_make_monitor_table(struct vcpu *v) 95.316 +{ 95.317 + struct domain *d = v->domain; 95.318 + 95.319 + ASSERT(pagetable_get_pfn(v->arch.monitor_table) == 0); 95.320 + 95.321 +#if CONFIG_PAGING_LEVELS == 4 95.322 + { 95.323 + mfn_t m4mfn; 95.324 + m4mfn = hap_alloc(d, 0); 95.325 + hap_install_xen_entries_in_l4(v, m4mfn, m4mfn); 95.326 + return m4mfn; 95.327 + } 95.328 +#elif CONFIG_PAGING_LEVELS == 3 95.329 + { 95.330 + mfn_t m3mfn, m2mfn; 95.331 + l3_pgentry_t *l3e; 95.332 + l2_pgentry_t *l2e; 95.333 + int i; 95.334 + 95.335 + m3mfn = hap_alloc(d, 0); 95.336 + 95.337 + /* Install a monitor l2 table in slot 3 of the l3 table. 95.338 + * This is used for all Xen entries, including linear maps 95.339 + */ 95.340 + m2mfn = hap_alloc(d, 0); 95.341 + l3e = hap_map_domain_page(m3mfn); 95.342 + l3e[3] = l3e_from_pfn(mfn_x(m2mfn), _PAGE_PRESENT); 95.343 + hap_install_xen_entries_in_l2h(v, m2mfn); 95.344 + /* Install the monitor's own linear map */ 95.345 + l2e = hap_map_domain_page(m2mfn); 95.346 + for ( i = 0; i < L3_PAGETABLE_ENTRIES; i++ ) 95.347 + l2e[l2_table_offset(LINEAR_PT_VIRT_START) + i] = 95.348 + (l3e_get_flags(l3e[i]) & _PAGE_PRESENT) 95.349 + ? l2e_from_pfn(l3e_get_pfn(l3e[i]), __PAGE_HYPERVISOR) 95.350 + : l2e_empty(); 95.351 + hap_unmap_domain_page(l2e); 95.352 + hap_unmap_domain_page(l3e); 95.353 + 95.354 + HAP_PRINTK("new monitor table: %#lx\n", mfn_x(m3mfn)); 95.355 + return m3mfn; 95.356 + } 95.357 +#else 95.358 + { 95.359 + mfn_t m2mfn; 95.360 + 95.361 + m2mfn = hap_alloc(d, 0); 95.362 + hap_install_xen_entries_in_l2(v, m2mfn, m2mfn); 95.363 + 95.364 + return m2mfn; 95.365 + } 95.366 +#endif 95.367 +} 95.368 + 95.369 +void hap_destroy_monitor_table(struct vcpu* v, mfn_t mmfn) 95.370 +{ 95.371 + struct domain *d = v->domain; 95.372 + 95.373 +#if CONFIG_PAGING_LEVELS == 4 95.374 + /* Need to destroy the l3 monitor page in slot 0 too */ 95.375 + { 95.376 + mfn_t m3mfn; 95.377 + l4_pgentry_t *l4e = hap_map_domain_page(mmfn); 95.378 + ASSERT(l4e_get_flags(l4e[0]) & _PAGE_PRESENT); 95.379 + m3mfn = _mfn(l4e_get_pfn(l4e[0])); 95.380 + hap_free(d, m3mfn); 95.381 + hap_unmap_domain_page(l4e); 95.382 + } 95.383 +#elif CONFIG_PAGING_LEVELS == 3 95.384 + /* Need to destroy the l2 monitor page in slot 4 too */ 95.385 + { 95.386 + l3_pgentry_t *l3e = hap_map_domain_page(mmfn); 95.387 + ASSERT(l3e_get_flags(l3e[3]) & _PAGE_PRESENT); 95.388 + hap_free(d, _mfn(l3e_get_pfn(l3e[3]))); 95.389 + hap_unmap_domain_page(l3e); 95.390 + } 95.391 +#endif 95.392 + 95.393 + /* Put the memory back in the pool */ 95.394 + hap_free(d, mmfn); 95.395 +} 95.396 + 95.397 +/************************************************/ 95.398 +/* HAP DOMAIN LEVEL FUNCTIONS */ 95.399 +/************************************************/ 95.400 +void hap_domain_init(struct domain *d) 95.401 +{ 95.402 + hap_lock_init(d); 95.403 + INIT_LIST_HEAD(&d->arch.paging.hap.freelists); 95.404 + INIT_LIST_HEAD(&d->arch.paging.hap.p2m_freelist); 95.405 +} 95.406 + 95.407 +/* return 0 for success, -errno for failure */ 95.408 +int hap_enable(struct domain *d, u32 mode) 95.409 +{ 95.410 + unsigned int old_pages; 95.411 + int rv = 0; 95.412 + 95.413 + HERE_I_AM; 95.414 + 95.415 + domain_pause(d); 95.416 + /* error check */ 95.417 + if ( (d == current->domain) ) { 95.418 + rv = -EINVAL; 95.419 + goto out; 95.420 + } 95.421 + 95.422 + old_pages = d->arch.paging.hap.total_pages; 95.423 + if ( old_pages == 0 ) { 95.424 + unsigned int r; 95.425 + hap_lock(d); 95.426 + r = hap_set_allocation(d, 256, NULL); 95.427 + hap_unlock(d); 95.428 + if ( r != 0 ) { 95.429 + hap_set_allocation(d, 0, NULL); 95.430 + rv = -ENOMEM; 95.431 + goto out; 95.432 + } 95.433 + } 95.434 + 95.435 + /* allocate P2m table */ 95.436 + if ( mode & PG_translate ) { 95.437 + rv = p2m_alloc_table(d, hap_alloc_p2m_page, hap_free_p2m_page); 95.438 + if ( rv != 0 ) 95.439 + goto out; 95.440 + } 95.441 + 95.442 + d->arch.paging.mode = mode | PG_SH_enable; 95.443 + 95.444 + out: 95.445 + domain_unpause(d); 95.446 + return rv; 95.447 +} 95.448 + 95.449 +void hap_final_teardown(struct domain *d) 95.450 +{ 95.451 + HERE_I_AM; 95.452 + 95.453 + if ( d->arch.paging.hap.total_pages != 0 ) 95.454 + hap_teardown(d); 95.455 + 95.456 + p2m_teardown(d); 95.457 +} 95.458 + 95.459 +void hap_teardown(struct domain *d) 95.460 +{ 95.461 + struct vcpu *v; 95.462 + mfn_t mfn; 95.463 + HERE_I_AM; 95.464 + 95.465 + ASSERT(test_bit(_DOMF_dying, &d->domain_flags)); 95.466 + ASSERT(d != current->domain); 95.467 + 95.468 + if ( !hap_locked_by_me(d) ) 95.469 + hap_lock(d); /* Keep various asserts happy */ 95.470 + 95.471 + if ( paging_mode_enabled(d) ) { 95.472 + /* release the monitor table held by each vcpu */ 95.473 + for_each_vcpu(d, v) { 95.474 + if ( v->arch.paging.mode && paging_mode_external(d) ) { 95.475 + mfn = pagetable_get_mfn(v->arch.monitor_table); 95.476 + if ( mfn_valid(mfn) && (mfn_x(mfn) != 0) ) 95.477 + hap_destroy_monitor_table(v, mfn); 95.478 + v->arch.monitor_table = pagetable_null(); 95.479 + } 95.480 + } 95.481 + } 95.482 + 95.483 + if ( d->arch.paging.hap.total_pages != 0 ) { 95.484 + HAP_PRINTK("teardown of domain %u starts." 95.485 + " pages total = %u, free = %u, p2m=%u\n", 95.486 + d->domain_id, 95.487 + d->arch.paging.hap.total_pages, 95.488 + d->arch.paging.hap.free_pages, 95.489 + d->arch.paging.hap.p2m_pages); 95.490 + hap_set_allocation(d, 0, NULL); 95.491 + HAP_PRINTK("teardown done." 95.492 + " pages total = %u, free = %u, p2m=%u\n", 95.493 + d->arch.paging.hap.total_pages, 95.494 + d->arch.paging.hap.free_pages, 95.495 + d->arch.paging.hap.p2m_pages); 95.496 + ASSERT(d->arch.paging.hap.total_pages == 0); 95.497 + } 95.498 + 95.499 + d->arch.paging.mode &= ~PG_log_dirty; 95.500 + 95.501 + hap_unlock(d); 95.502 +} 95.503 + 95.504 +int hap_domctl(struct domain *d, xen_domctl_shadow_op_t *sc, 95.505 + XEN_GUEST_HANDLE(void) u_domctl) 95.506 +{ 95.507 + int rc, preempted = 0; 95.508 + 95.509 + HERE_I_AM; 95.510 + 95.511 + if ( unlikely(d == current->domain) ) { 95.512 + gdprintk(XENLOG_INFO, "Don't try to do a hap op on yourself!\n"); 95.513 + return -EINVAL; 95.514 + } 95.515 + 95.516 + switch ( sc->op ) { 95.517 + case XEN_DOMCTL_SHADOW_OP_OFF: 95.518 + case XEN_DOMCTL_SHADOW_OP_ENABLE_TEST: 95.519 + case XEN_DOMCTL_SHADOW_OP_ENABLE_LOGDIRTY: 95.520 + case XEN_DOMCTL_SHADOW_OP_ENABLE_TRANSLATE: 95.521 + case XEN_DOMCTL_SHADOW_OP_CLEAN: 95.522 + case XEN_DOMCTL_SHADOW_OP_PEEK: 95.523 + case XEN_DOMCTL_SHADOW_OP_ENABLE: 95.524 + HAP_ERROR("Bad hap domctl op %u\n", sc->op); 95.525 + domain_crash(d); 95.526 + return -EINVAL; 95.527 + case XEN_DOMCTL_SHADOW_OP_SET_ALLOCATION: 95.528 + hap_lock(d); 95.529 + rc = hap_set_allocation(d, sc->mb << (20 - PAGE_SHIFT), &preempted); 95.530 + hap_unlock(d); 95.531 + if ( preempted ) 95.532 + /* Not finished. Set up to re-run the call. */ 95.533 + rc = hypercall_create_continuation(__HYPERVISOR_domctl, "h", 95.534 + u_domctl); 95.535 + else 95.536 + /* Finished. Return the new allocation */ 95.537 + sc->mb = hap_get_allocation(d); 95.538 + return rc; 95.539 + case XEN_DOMCTL_SHADOW_OP_GET_ALLOCATION: 95.540 + sc->mb = hap_get_allocation(d); 95.541 + return 0; 95.542 + default: 95.543 + HAP_ERROR("Bad hap domctl op %u\n", sc->op); 95.544 + return -EINVAL; 95.545 + } 95.546 +} 95.547 + 95.548 +void hap_vcpu_init(struct vcpu *v) 95.549 +{ 95.550 + v->arch.paging.mode = &hap_paging_real_mode; 95.551 +} 95.552 +/************************************************/ 95.553 +/* HAP PAGING MODE FUNCTIONS */ 95.554 +/************************************************/ 95.555 +/* In theory, hap should not intercept guest page fault. This function can 95.556 + * be recycled to handle host/nested page fault, if needed. 95.557 + */ 95.558 +int hap_page_fault(struct vcpu *v, unsigned long va, 95.559 + struct cpu_user_regs *regs) 95.560 +{ 95.561 + HERE_I_AM; 95.562 + domain_crash(v->domain); 95.563 + return 0; 95.564 +} 95.565 + 95.566 +/* called when guest issues a invlpg request. 95.567 + * Return 1 if need to issue page invalidation on CPU; Return 0 if does not 95.568 + * need to do so. 95.569 + */ 95.570 +int hap_invlpg(struct vcpu *v, unsigned long va) 95.571 +{ 95.572 + HERE_I_AM; 95.573 + return 0; 95.574 +} 95.575 + 95.576 +void hap_update_cr3(struct vcpu *v, int do_locking) 95.577 +{ 95.578 + struct domain *d = v->domain; 95.579 + mfn_t gmfn; 95.580 + 95.581 + HERE_I_AM; 95.582 + /* Don't do anything on an uninitialised vcpu */ 95.583 + if ( !is_hvm_domain(d) && !test_bit(_VCPUF_initialised, &v->vcpu_flags) ) { 95.584 + ASSERT(v->arch.cr3 == 0); 95.585 + return; 95.586 + } 95.587 + 95.588 + if ( do_locking ) 95.589 + hap_lock(v->domain); 95.590 + 95.591 + ASSERT(hap_locked_by_me(v->domain)); 95.592 + ASSERT(v->arch.paging.mode); 95.593 + 95.594 + gmfn = pagetable_get_mfn(v->arch.guest_table); 95.595 + 95.596 + make_cr3(v, pagetable_get_pfn(v->arch.monitor_table)); 95.597 + 95.598 + hvm_update_guest_cr3(v, pagetable_get_paddr(v->arch.monitor_table)); 95.599 + 95.600 + HAP_PRINTK("d=%u v=%u guest_table=%05lx, monitor_table = %05lx\n", 95.601 + d->domain_id, v->vcpu_id, 95.602 + (unsigned long)pagetable_get_pfn(v->arch.guest_table), 95.603 + (unsigned long)pagetable_get_pfn(v->arch.monitor_table)); 95.604 + 95.605 + flush_tlb_mask(d->domain_dirty_cpumask); 95.606 + 95.607 + if ( do_locking ) 95.608 + hap_unlock(v->domain); 95.609 +} 95.610 + 95.611 +void hap_update_paging_modes(struct vcpu *v) 95.612 +{ 95.613 + struct domain *d; 95.614 + 95.615 + HERE_I_AM; 95.616 + 95.617 + d = v->domain; 95.618 + hap_lock(d); 95.619 + 95.620 + /* update guest paging mode. Note that we rely on hvm functions to detect 95.621 + * guest's paging mode. So, make sure the shadow registers (CR0, CR4, EFER) 95.622 + * reflect guest's status correctly. 95.623 + */ 95.624 + if ( hvm_paging_enabled(v) ) { 95.625 + if ( hvm_long_mode_enabled(v) ) 95.626 + v->arch.paging.mode = &hap_paging_long_mode; 95.627 + else if ( hvm_pae_enabled(v) ) 95.628 + v->arch.paging.mode = &hap_paging_pae_mode; 95.629 + else 95.630 + v->arch.paging.mode = &hap_paging_protected_mode; 95.631 + } 95.632 + else { 95.633 + v->arch.paging.mode = &hap_paging_real_mode; 95.634 + } 95.635 + 95.636 + v->arch.paging.translate_enabled = !!hvm_paging_enabled(v); 95.637 + 95.638 + /* use p2m map */ 95.639 + v->arch.guest_table = 95.640 + pagetable_from_pfn(pagetable_get_pfn(d->arch.phys_table)); 95.641 + 95.642 + if ( pagetable_is_null(v->arch.monitor_table) ) { 95.643 + mfn_t mmfn = hap_make_monitor_table(v); 95.644 + v->arch.monitor_table = pagetable_from_mfn(mmfn); 95.645 + make_cr3(v, mfn_x(mmfn)); 95.646 + } 95.647 + 95.648 + flush_tlb_mask(d->domain_dirty_cpumask); 95.649 + hap_unlock(d); 95.650 +} 95.651 + 95.652 +void 95.653 +hap_write_p2m_entry(struct vcpu *v, unsigned long gfn, l1_pgentry_t *p, 95.654 + l1_pgentry_t new, unsigned int level) 95.655 +{ 95.656 + hap_lock(v->domain); 95.657 + safe_write_pte(p, new); 95.658 + hap_unlock(v->domain); 95.659 +} 95.660 + 95.661 +/* Entry points into this mode of the hap code. */ 95.662 +struct paging_mode hap_paging_real_mode = { 95.663 + .page_fault = hap_page_fault, 95.664 + .invlpg = hap_invlpg, 95.665 + .gva_to_gfn = hap_gva_to_gfn_real_mode, 95.666 + .update_cr3 = hap_update_cr3, 95.667 + .update_paging_modes = hap_update_paging_modes, 95.668 + .write_p2m_entry = hap_write_p2m_entry, 95.669 + .guest_levels = 1 95.670 +}; 95.671 + 95.672 +struct paging_mode hap_paging_protected_mode = { 95.673 + .page_fault = hap_page_fault, 95.674 + .invlpg = hap_invlpg, 95.675 + .gva_to_gfn = hap_gva_to_gfn_protected_mode, 95.676 + .update_cr3 = hap_update_cr3, 95.677 + .update_paging_modes = hap_update_paging_modes, 95.678 + .write_p2m_entry = hap_write_p2m_entry, 95.679 + .guest_levels = 2 95.680 +}; 95.681 + 95.682 +struct paging_mode hap_paging_pae_mode = { 95.683 + .page_fault = hap_page_fault, 95.684 + .invlpg = hap_invlpg, 95.685 + .gva_to_gfn = hap_gva_to_gfn_pae_mode, 95.686 + .update_cr3 = hap_update_cr3, 95.687 + .update_paging_modes = hap_update_paging_modes, 95.688 + .write_p2m_entry = hap_write_p2m_entry, 95.689 + .guest_levels = 3 95.690 +}; 95.691 + 95.692 +struct paging_mode hap_paging_long_mode = { 95.693 + .page_fault = hap_page_fault, 95.694 + .invlpg = hap_invlpg, 95.695 + .gva_to_gfn = hap_gva_to_gfn_long_mode, 95.696 + .update_cr3 = hap_update_cr3, 95.697 + .update_paging_modes = hap_update_paging_modes, 95.698 + .write_p2m_entry = hap_write_p2m_entry, 95.699 + .guest_levels = 4 95.700 +}; 95.701 + 95.702 +/* 95.703 + * Local variables: 95.704 + * mode: C 95.705 + * c-set-style: "BSD" 95.706 + * c-basic-offset: 4 95.707 + * indent-tabs-mode: nil 95.708 + * End: 95.709 + */ 95.710 + 95.711 +
96.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 96.2 +++ b/xen/arch/x86/mm/hap/private.h Thu Mar 08 14:39:52 2007 -0600 96.3 @@ -0,0 +1,112 @@ 96.4 +/* 96.5 + * arch/x86/mm/hap/private.h 96.6 + * 96.7 + * Copyright (c) 2007, AMD Corporation (Wei Huang) 96.8 + * 96.9 + * This program is free software; you can redistribute it and/or modify it 96.10 + * under the terms and conditions of the GNU General Public License, 96.11 + * version 2, as published by the Free Software Foundation. 96.12 + * 96.13 + * This program is distributed in the hope it will be useful, but WITHOUT 96.14 + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 96.15 + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for 96.16 + * more details. 96.17 + * 96.18 + * You should have received a copy of the GNU General Public License along with 96.19 + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple 96.20 + * Place - Suite 330, Boston, MA 02111-1307 USA. 96.21 + * 96.22 + */ 96.23 +#ifndef __HAP_PRIVATE_H__ 96.24 +#define __HAP_PRIVATE_H__ 96.25 + 96.26 +#include <asm/flushtlb.h> 96.27 +#include <asm/hvm/support.h> 96.28 + 96.29 +/********************************************/ 96.30 +/* GUEST TRANSLATION FUNCS */ 96.31 +/********************************************/ 96.32 +unsigned long hap_gva_to_gfn_real_mode(struct vcpu *v, unsigned long gva); 96.33 +unsigned long hap_gva_to_gfn_protected_mode(struct vcpu *v, unsigned long gva); 96.34 +unsigned long hap_gva_to_gfn_pae_mode(struct vcpu *v, unsigned long gva); 96.35 +unsigned long hap_gva_to_gfn_long_mode(struct vcpu *v, unsigned long gva); 96.36 +/********************************************/ 96.37 +/* MISC DEFINITIONS */ 96.38 +/********************************************/ 96.39 + 96.40 +/* PT_SHIFT describes the amount by which a virtual address is shifted right 96.41 + * to right justify the portion to be used for indexing into a page 96.42 + * table, given the guest memory model (i.e. number of levels) and the level 96.43 + * of the page table being accessed. The idea is from Virtual Iron's code. 96.44 + */ 96.45 +static const int PT_SHIFT[][5] = 96.46 + { /* ------ level ------ nr_levels */ 96.47 + /* 1 2 3 4 */ 96.48 + { 0, 0, 0, 0, 0}, /* 0 not used */ 96.49 + { 0, 0, 0, 0, 0}, /* 1 not used */ 96.50 + { 0, 12, 22, 0, 0}, /* 2 */ 96.51 + { 0, 12, 21, 30, 0}, /* 3 */ 96.52 + { 0, 12, 21, 30, 39} /* 4 */ 96.53 + }; 96.54 + 96.55 +/* PT_ENTRIES describes the number of entries in a page table, given the 96.56 + * memory model (i.e. number of levels) and the level of the page table 96.57 + * being considered. This idea from Virtual Iron's shadow code*/ 96.58 +static const int PT_ENTRIES[][5] = 96.59 + { /* ------ level ------ nr_levels */ 96.60 + /* 1 2 3 4 */ 96.61 + { 0, 0, 0, 0, 0}, /* 0 not used */ 96.62 + { 0, 0, 0, 0, 0}, /* 1 not used */ 96.63 + { 0, 1024, 1024, 0, 0}, /* 2 */ 96.64 + { 0, 512, 512, 4, 0}, /* 3 */ 96.65 + { 0, 512, 512, 512, 512} /* 4 */ 96.66 + }; 96.67 + 96.68 +/********************************************/ 96.69 +/* PAGING DEFINITION FOR GUEST */ 96.70 +/********************************************/ 96.71 +#define PHYSICAL_PAGE_4K_SIZE (1UL << 12) 96.72 +#define PHYSICAL_PAGE_2M_SIZE (1UL << 21) 96.73 +#define PHYSICAL_PAGE_4M_SIZE (1UL << 22) 96.74 +#define PHYSICAL_PAGE_4K_MASK ( ~(PHYSICAL_PAGE_4K_SIZE - 1) ) 96.75 +#define PHYSICAL_PAGE_2M_MASK ( ~(PHYSICAL_PAGE_2M_SIZE - 1) ) 96.76 +#define PHYSICAL_PAGE_4M_MASK ( ~(PHYSICAL_PAGE_4M_SIZE - 1) ) 96.77 + 96.78 +/* long mode physical address mask */ 96.79 +#define PHYSICAL_ADDR_BITS_LM 52 96.80 +#define PHYSICAL_ADDR_MASK_LM ((1UL << PHYSICAL_ADDR_BITS_LM)-1) 96.81 +#define PHYSICAL_ADDR_2M_MASK_LM (PHYSICAL_PAGE_2M_MASK & PHYSICAL_ADDR_MASK_LM) 96.82 +#define PHYSICAL_ADDR_4K_MASK_LM (PHYSICAL_PAGE_4K_MASK & PHYSICAL_ADDR_MASK_LM) 96.83 + 96.84 +#define PAGE_NX_BIT (1ULL << 63) 96.85 +/************************************************/ 96.86 +/* PAGETABLE RELATED VARIABLES */ 96.87 +/************************************************/ 96.88 +#if CONFIG_PAGING_LEVELS == 2 96.89 +#define HAP_L1_PAGETABLE_ENTRIES 1024 96.90 +#define HAP_L2_PAGETABLE_ENTRIES 1024 96.91 +#define HAP_L1_PAGETABLE_SHIFT 12 96.92 +#define HAP_L2_PAGETABLE_SHIFT 22 96.93 +#endif 96.94 + 96.95 +#if CONFIG_PAGING_LEVELS == 3 96.96 +#define HAP_L1_PAGETABLE_ENTRIES 512 96.97 +#define HAP_L2_PAGETABLE_ENTRIES 512 96.98 +#define HAP_L3_PAGETABLE_ENTRIES 4 96.99 +#define HAP_L1_PAGETABLE_SHIFT 12 96.100 +#define HAP_L2_PAGETABLE_SHIFT 21 96.101 +#define HAP_L3_PAGETABLE_SHIFT 30 96.102 +#endif 96.103 + 96.104 +#if CONFIG_PAGING_LEVELS == 4 96.105 +#define HAP_L1_PAGETABLE_ENTRIES 512 96.106 +#define HAP_L2_PAGETABLE_ENTRIES 512 96.107 +#define HAP_L3_PAGETABLE_ENTRIES 512 96.108 +#define HAP_L4_PAGETABLE_ENTRIES 512 96.109 +#define HAP_L1_PAGETABLE_SHIFT 12 96.110 +#define HAP_L2_PAGETABLE_SHIFT 21 96.111 +#define HAP_L3_PAGETABLE_SHIFT 30 96.112 +#define HAP_L4_PAGETABLE_SHIFT 39 96.113 +#endif 96.114 + 96.115 +#endif /* __SVM_NPT_H__ */
97.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 97.2 +++ b/xen/arch/x86/mm/hap/support.c Thu Mar 08 14:39:52 2007 -0600 97.3 @@ -0,0 +1,334 @@ 97.4 +/* 97.5 + * arch/x86/mm/hap/support.c 97.6 + * 97.7 + * guest page table walker 97.8 + * Copyright (c) 2007, AMD Corporation (Wei Huang) 97.9 + * 97.10 + * This program is free software; you can redistribute it and/or modify it 97.11 + * under the terms and conditions of the GNU General Public License, 97.12 + * version 2, as published by the Free Software Foundation. 97.13 + * 97.14 + * This program is distributed in the hope it will be useful, but WITHOUT 97.15 + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 97.16 + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for 97.17 + * more details. 97.18 + * 97.19 + * You should have received a copy of the GNU General Public License along with 97.20 + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple 97.21 + * Place - Suite 330, Boston, MA 02111-1307 USA. 97.22 + * 97.23 + */ 97.24 + 97.25 +#include <xen/config.h> 97.26 +#include <xen/types.h> 97.27 +#include <xen/mm.h> 97.28 +#include <xen/domain_page.h> 97.29 +#include <asm/page.h> 97.30 +#include <xen/event.h> 97.31 +#include <xen/sched.h> 97.32 +#include <asm/hvm/svm/vmcb.h> 97.33 +#include <asm/domain.h> 97.34 +#include <asm/shadow.h> 97.35 +#include <asm/hap.h> 97.36 + 97.37 +#include "private.h" 97.38 +#include "../page-guest32.h" 97.39 + 97.40 +/*******************************************/ 97.41 +/* Platform Specific Functions */ 97.42 +/*******************************************/ 97.43 + 97.44 +/* Translate guest virtual address to guest physical address. Specifically 97.45 + * for real mode guest. 97.46 + */ 97.47 +unsigned long hap_gva_to_gfn_real_mode(struct vcpu *v, unsigned long gva) 97.48 +{ 97.49 + HERE_I_AM; 97.50 + return ((paddr_t)gva >> PAGE_SHIFT); 97.51 +} 97.52 + 97.53 +/* Translate guest virtual address to guest physical address. Specifically 97.54 + * for protected guest. 97.55 + */ 97.56 +unsigned long hap_gva_to_gfn_protected_mode(struct vcpu *v, unsigned long gva) 97.57 +{ 97.58 + unsigned long gcr3 = hvm_get_guest_ctrl_reg(v, 3); 97.59 + int mode = 2; /* two-level guest */ 97.60 + int lev, index; 97.61 + paddr_t gpa = 0; 97.62 + unsigned long gpfn, mfn; 97.63 + int success = 1; 97.64 + l2_pgentry_32_t *l2e; /* guest page entry size is 32-bit */ 97.65 + l1_pgentry_32_t *l1e; 97.66 + 97.67 + HERE_I_AM; 97.68 + 97.69 + gpfn = (gcr3 >> PAGE_SHIFT); 97.70 + for ( lev = mode; lev >= 1; lev-- ) { 97.71 + mfn = get_mfn_from_gpfn( gpfn ); 97.72 + if ( mfn == INVALID_MFN ) { 97.73 + HAP_PRINTK("bad pfn=0x%lx from gva=0x%lx at lev%d\n", gpfn, gva, 97.74 + lev); 97.75 + success = 0; 97.76 + break; 97.77 + } 97.78 + index = (gva >> PT_SHIFT[mode][lev]) & (PT_ENTRIES[mode][lev]-1); 97.79 + 97.80 + if ( lev == 2 ) { 97.81 + l2e = map_domain_page( mfn ); 97.82 + HAP_PRINTK("l2 page table entry is %ulx at index = %d\n", 97.83 + l2e[index].l2, index); 97.84 + if ( !(l2e_get_flags_32(l2e[index]) & _PAGE_PRESENT) ) { 97.85 + HAP_PRINTK("Level 2 entry not present at index = %d\n", index); 97.86 + success = 0; 97.87 + } 97.88 + 97.89 + if ( l2e_get_flags_32(l2e[index]) & _PAGE_PSE ) { /* handle PSE */ 97.90 + HAP_PRINTK("guest page table is PSE\n"); 97.91 + if ( l2e_get_intpte(l2e[index]) & 0x001FE000UL ) { /*[13:20] */ 97.92 + printk("guest physical memory size is too large!\n"); 97.93 + domain_crash(v->domain); 97.94 + } 97.95 + gpa = (l2e_get_intpte(l2e[index]) & PHYSICAL_PAGE_4M_MASK) + 97.96 + (gva & ~PHYSICAL_PAGE_4M_MASK); 97.97 + unmap_domain_page(l2e); 97.98 + break; /* last level page table, return from here */ 97.99 + } 97.100 + else { 97.101 + gpfn = l2e_get_pfn( l2e[index] ); 97.102 + } 97.103 + unmap_domain_page(l2e); 97.104 + } 97.105 + 97.106 + if ( lev == 1 ) { 97.107 + l1e = map_domain_page( mfn ); 97.108 + HAP_PRINTK("l1 page table entry is %ulx at index = %d\n", 97.109 + l1e[index].l1, index); 97.110 + if ( !(l1e_get_flags_32(l1e[index]) & _PAGE_PRESENT) ) { 97.111 + HAP_PRINTK("Level 1 entry not present at index = %d\n", index); 97.112 + success = 0; 97.113 + } 97.114 + gpfn = l1e_get_pfn( l1e[index] ); 97.115 + gpa = (l1e_get_intpte(l1e[index]) & PHYSICAL_PAGE_4K_MASK) + 97.116 + (gva & ~PHYSICAL_PAGE_4K_MASK); 97.117 + unmap_domain_page(l1e); 97.118 + } 97.119 + 97.120 + if ( !success ) /* error happened, jump out */ 97.121 + break; 97.122 + } 97.123 + 97.124 + HAP_PRINTK("success = %d, gva = %lx, gpa = %lx\n", success, gva, gpa); 97.125 + 97.126 + if ( !success ) /* error happened */ 97.127 + return INVALID_GFN; 97.128 + else 97.129 + return ((paddr_t)gpa >> PAGE_SHIFT); 97.130 +} 97.131 + 97.132 + 97.133 + 97.134 +/* Translate guest virtual address to guest physical address. Specifically 97.135 + * for PAE mode guest. 97.136 + */ 97.137 +unsigned long hap_gva_to_gfn_pae_mode(struct vcpu *v, unsigned long gva) 97.138 +{ 97.139 +#if CONFIG_PAGING_LEVELS >= 3 97.140 + unsigned long gcr3 = hvm_get_guest_ctrl_reg(v, 3); 97.141 + int mode = 3; /* three-level guest */ 97.142 + int lev, index; 97.143 + paddr_t gpa = 0; 97.144 + unsigned long gpfn, mfn; 97.145 + int success = 1; 97.146 + l1_pgentry_t *l1e; 97.147 + l2_pgentry_t *l2e; 97.148 + l3_pgentry_t *l3e; 97.149 + 97.150 + HERE_I_AM; 97.151 + 97.152 + gpfn = (gcr3 >> PAGE_SHIFT); 97.153 + for ( lev = mode; lev >= 1; lev-- ) { 97.154 + mfn = get_mfn_from_gpfn( gpfn ); 97.155 + if ( mfn == INVALID_MFN ) { 97.156 + HAP_PRINTK("bad pfn=0x%lx from gva=0x%lx at lev%d\n", gpfn, gva, 97.157 + lev); 97.158 + success = 0; 97.159 + break; 97.160 + } 97.161 + index = (gva >> PT_SHIFT[mode][lev]) & (PT_ENTRIES[mode][lev]-1); 97.162 + 97.163 + if ( lev == 3 ) { 97.164 + l3e = map_domain_page( mfn ); 97.165 + index += ( ((gcr3 >> 5 ) & 127 ) * 4 ); 97.166 + if ( !(l3e_get_flags(l3e[index]) & _PAGE_PRESENT) ) { 97.167 + HAP_PRINTK("Level 3 entry not present at index = %d\n", index); 97.168 + success = 0; 97.169 + } 97.170 + gpfn = l3e_get_pfn( l3e[index] ); 97.171 + unmap_domain_page(l3e); 97.172 + } 97.173 + 97.174 + if ( lev == 2 ) { 97.175 + l2e = map_domain_page( mfn ); 97.176 + if ( !(l2e_get_flags(l2e[index]) & _PAGE_PRESENT) ) { 97.177 + HAP_PRINTK("Level 2 entry not present at index = %d\n", index); 97.178 + success = 0; 97.179 + } 97.180 + 97.181 + if ( l2e_get_flags(l2e[index]) & _PAGE_PSE ) { /* handle PSE */ 97.182 + HAP_PRINTK("guest page table is PSE\n"); 97.183 + gpa = (l2e_get_intpte(l2e[index]) & PHYSICAL_PAGE_2M_MASK) + 97.184 + (gva & ~PHYSICAL_PAGE_2M_MASK); 97.185 + unmap_domain_page(l2e); 97.186 + break; /* last level page table, jump out from here */ 97.187 + } 97.188 + else { 97.189 + gpfn = l2e_get_pfn(l2e[index]); 97.190 + } 97.191 + unmap_domain_page(l2e); 97.192 + } 97.193 + 97.194 + if ( lev == 1 ) { 97.195 + l1e = map_domain_page( mfn ); 97.196 + if ( !(l1e_get_flags(l1e[index]) & _PAGE_PRESENT) ) { 97.197 + HAP_PRINTK("Level 1 entry not present at index = %d\n", index); 97.198 + success = 0; 97.199 + } 97.200 + gpfn = l1e_get_pfn( l1e[index] ); 97.201 + gpa = (l1e_get_intpte(l1e[index]) & PHYSICAL_PAGE_4K_MASK) + 97.202 + (gva & ~PHYSICAL_PAGE_4K_MASK); 97.203 + unmap_domain_page(l1e); 97.204 + } 97.205 + 97.206 + if ( success != 1 ) /* error happened, jump out */ 97.207 + break; 97.208 + } 97.209 + 97.210 + gpa &= ~PAGE_NX_BIT; /* clear NX bit of guest physical address */ 97.211 + HAP_PRINTK("success = %d, gva = %lx, gpa = %lx\n", success, gva, gpa); 97.212 + 97.213 + if ( !success ) 97.214 + return INVALID_GFN; 97.215 + else 97.216 + return ((paddr_t)gpa >> PAGE_SHIFT); 97.217 +#else 97.218 + HERE_I_AM; 97.219 + printk("guest paging level (3) is greater than host paging level!\n"); 97.220 + domain_crash(v->domain); 97.221 + return INVALID_GFN; 97.222 +#endif 97.223 +} 97.224 + 97.225 + 97.226 + 97.227 +/* Translate guest virtual address to guest physical address. Specifically 97.228 + * for long mode guest. 97.229 + */ 97.230 +unsigned long hap_gva_to_gfn_long_mode(struct vcpu *v, unsigned long gva) 97.231 +{ 97.232 +#if CONFIG_PAGING_LEVELS == 4 97.233 + unsigned long gcr3 = hvm_get_guest_ctrl_reg(v, 3); 97.234 + int mode = 4; /* four-level guest */ 97.235 + int lev, index; 97.236 + paddr_t gpa = 0; 97.237 + unsigned long gpfn, mfn; 97.238 + int success = 1; 97.239 + l4_pgentry_t *l4e; 97.240 + l3_pgentry_t *l3e; 97.241 + l2_pgentry_t *l2e; 97.242 + l1_pgentry_t *l1e; 97.243 + 97.244 + HERE_I_AM; 97.245 + 97.246 + gpfn = (gcr3 >> PAGE_SHIFT); 97.247 + for ( lev = mode; lev >= 1; lev-- ) { 97.248 + mfn = get_mfn_from_gpfn( gpfn ); 97.249 + if ( mfn == INVALID_MFN ) { 97.250 + HAP_PRINTK("bad pfn=0x%lx from gva=0x%lx at lev%d\n", gpfn, gva, 97.251 + lev); 97.252 + success = 0; 97.253 + break; 97.254 + } 97.255 + index = (gva >> PT_SHIFT[mode][lev]) & (PT_ENTRIES[mode][lev]-1); 97.256 + 97.257 + if ( lev == 4 ) { 97.258 + l4e = map_domain_page( mfn ); 97.259 + if ( !(l4e_get_flags(l4e[index]) & _PAGE_PRESENT) ) { 97.260 + HAP_PRINTK("Level 4 entry not present at index = %d\n", index); 97.261 + success = 0; 97.262 + } 97.263 + gpfn = l4e_get_pfn( l4e[index] ); 97.264 + unmap_domain_page(l4e); 97.265 + } 97.266 + 97.267 + if ( lev == 3 ) { 97.268 + l3e = map_domain_page( mfn ); 97.269 + if ( !(l3e_get_flags(l3e[index]) & _PAGE_PRESENT) ) { 97.270 + HAP_PRINTK("Level 3 entry not present at index = %d\n", index); 97.271 + success = 0; 97.272 + } 97.273 + gpfn = l3e_get_pfn( l3e[index] ); 97.274 + unmap_domain_page(l3e); 97.275 + } 97.276 + 97.277 + if ( lev == 2 ) { 97.278 + l2e = map_domain_page( mfn ); 97.279 + if ( !(l2e_get_flags(l2e[index]) & _PAGE_PRESENT) ) { 97.280 + HAP_PRINTK("Level 2 entry not present at index = %d\n", index); 97.281 + success = 0; 97.282 + } 97.283 + 97.284 + if ( l2e_get_flags(l2e[index]) & _PAGE_PSE ) { /* handle PSE */ 97.285 + HAP_PRINTK("guest page table is PSE\n"); 97.286 + gpa = (l2e_get_intpte(l2e[index]) & PHYSICAL_ADDR_2M_MASK_LM) 97.287 + + (gva & ~PHYSICAL_PAGE_2M_MASK); 97.288 + unmap_domain_page(l2e); 97.289 + break; /* last level page table, jump out from here */ 97.290 + } 97.291 + else { 97.292 + gpfn = l2e_get_pfn(l2e[index]); 97.293 + } 97.294 + unmap_domain_page(l2e); 97.295 + } 97.296 + 97.297 + if ( lev == 1 ) { 97.298 + l1e = map_domain_page( mfn ); 97.299 + if ( !(l1e_get_flags(l1e[index]) & _PAGE_PRESENT) ) { 97.300 + HAP_PRINTK("Level 1 entry not present at index = %d\n", index); 97.301 + success = 0; 97.302 + } 97.303 + gpfn = l1e_get_pfn( l1e[index] ); 97.304 + gpa = (l1e_get_intpte(l1e[index]) & PHYSICAL_ADDR_4K_MASK_LM) + 97.305 + (gva & ~PHYSICAL_PAGE_4K_MASK); 97.306 + unmap_domain_page(l1e); 97.307 + } 97.308 + 97.309 + if ( success != 1 ) /* error happened, jump out */ 97.310 + break; 97.311 + } 97.312 + 97.313 + gpa &= ~PAGE_NX_BIT; /* clear NX bit of guest physical address */ 97.314 + HAP_PRINTK("success = %d, gva = %lx, gpa = %lx\n", success, gva, gpa); 97.315 + 97.316 + if ( !success ) 97.317 + return INVALID_GFN; 97.318 + else 97.319 + return ((paddr_t)gpa >> PAGE_SHIFT); 97.320 +#else 97.321 + HERE_I_AM; 97.322 + printk("guest paging level (4) is greater than host paging level!\n"); 97.323 + domain_crash(v->domain); 97.324 + return INVALID_GFN; 97.325 +#endif 97.326 +} 97.327 + 97.328 +/* 97.329 + * Local variables: 97.330 + * mode: C 97.331 + * c-set-style: "BSD" 97.332 + * c-basic-offset: 4 97.333 + * tab-width: 4 97.334 + * indent-tabs-mode: nil 97.335 + * End: 97.336 + */ 97.337 +
98.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 98.2 +++ b/xen/arch/x86/mm/page-guest32.h Thu Mar 08 14:39:52 2007 -0600 98.3 @@ -0,0 +1,100 @@ 98.4 + 98.5 +#ifndef __X86_PAGE_GUEST_H__ 98.6 +#define __X86_PAGE_GUEST_H__ 98.7 + 98.8 +#ifndef __ASSEMBLY__ 98.9 +# include <asm/types.h> 98.10 +#endif 98.11 + 98.12 +#define PAGETABLE_ORDER_32 10 98.13 +#define L1_PAGETABLE_ENTRIES_32 (1<<PAGETABLE_ORDER_32) 98.14 +#define L2_PAGETABLE_ENTRIES_32 (1<<PAGETABLE_ORDER_32) 98.15 +#define ROOT_PAGETABLE_ENTRIES_32 L2_PAGETABLE_ENTRIES_32 98.16 + 98.17 + 98.18 +#define L1_PAGETABLE_SHIFT_32 12 98.19 +#define L2_PAGETABLE_SHIFT_32 22 98.20 + 98.21 +/* Extract flags into 12-bit integer, or turn 12-bit flags into a pte mask. */ 98.22 + 98.23 +#ifndef __ASSEMBLY__ 98.24 + 98.25 +typedef u32 intpte_32_t; 98.26 + 98.27 +typedef struct { intpte_32_t l1; } l1_pgentry_32_t; 98.28 +typedef struct { intpte_32_t l2; } l2_pgentry_32_t; 98.29 +typedef l2_pgentry_t root_pgentry_32_t; 98.30 +#endif 98.31 + 98.32 +#define get_pte_flags_32(x) ((u32)(x) & 0xFFF) 98.33 +#define put_pte_flags_32(x) ((intpte_32_t)(x)) 98.34 + 98.35 +/* Get pte access flags (unsigned int). */ 98.36 +#define l1e_get_flags_32(x) (get_pte_flags_32((x).l1)) 98.37 +#define l2e_get_flags_32(x) (get_pte_flags_32((x).l2)) 98.38 + 98.39 +#define l1e_get_paddr_32(x) \ 98.40 + ((paddr_t)(((x).l1 & (PADDR_MASK&PAGE_MASK)))) 98.41 +#define l2e_get_paddr_32(x) \ 98.42 + ((paddr_t)(((x).l2 & (PADDR_MASK&PAGE_MASK)))) 98.43 + 98.44 +/* Construct an empty pte. */ 98.45 +#define l1e_empty_32() ((l1_pgentry_32_t) { 0 }) 98.46 +#define l2e_empty_32() ((l2_pgentry_32_t) { 0 }) 98.47 + 98.48 +/* Construct a pte from a pfn and access flags. */ 98.49 +#define l1e_from_pfn_32(pfn, flags) \ 98.50 + ((l1_pgentry_32_t) { ((intpte_32_t)(pfn) << PAGE_SHIFT) | put_pte_flags_32(flags) }) 98.51 +#define l2e_from_pfn_32(pfn, flags) \ 98.52 + ((l2_pgentry_32_t) { ((intpte_32_t)(pfn) << PAGE_SHIFT) | put_pte_flags_32(flags) }) 98.53 + 98.54 +/* Construct a pte from a physical address and access flags. */ 98.55 +#ifndef __ASSEMBLY__ 98.56 +static inline l1_pgentry_32_t l1e_from_paddr_32(paddr_t pa, unsigned int flags) 98.57 +{ 98.58 + ASSERT((pa & ~(PADDR_MASK & PAGE_MASK)) == 0); 98.59 + return (l1_pgentry_32_t) { pa | put_pte_flags_32(flags) }; 98.60 +} 98.61 +static inline l2_pgentry_32_t l2e_from_paddr_32(paddr_t pa, unsigned int flags) 98.62 +{ 98.63 + ASSERT((pa & ~(PADDR_MASK & PAGE_MASK)) == 0); 98.64 + return (l2_pgentry_32_t) { pa | put_pte_flags_32(flags) }; 98.65 +} 98.66 +#endif /* !__ASSEMBLY__ */ 98.67 + 98.68 + 98.69 +/* Construct a pte from a page pointer and access flags. */ 98.70 +#define l1e_from_page_32(page, flags) (l1e_from_pfn_32(page_to_mfn(page),(flags))) 98.71 +#define l2e_from_page_32(page, flags) (l2e_from_pfn_32(page_to_mfn(page),(flags))) 98.72 + 98.73 +/* Add extra flags to an existing pte. */ 98.74 +#define l1e_add_flags_32(x, flags) ((x).l1 |= put_pte_flags_32(flags)) 98.75 +#define l2e_add_flags_32(x, flags) ((x).l2 |= put_pte_flags_32(flags)) 98.76 + 98.77 +/* Remove flags from an existing pte. */ 98.78 +#define l1e_remove_flags_32(x, flags) ((x).l1 &= ~put_pte_flags_32(flags)) 98.79 +#define l2e_remove_flags_32(x, flags) ((x).l2 &= ~put_pte_flags_32(flags)) 98.80 + 98.81 +/* Check if a pte's page mapping or significant access flags have changed. */ 98.82 +#define l1e_has_changed_32(x,y,flags) \ 98.83 + ( !!(((x).l1 ^ (y).l1) & ((PADDR_MASK&PAGE_MASK)|put_pte_flags_32(flags))) ) 98.84 +#define l2e_has_changed_32(x,y,flags) \ 98.85 + ( !!(((x).l2 ^ (y).l2) & ((PADDR_MASK&PAGE_MASK)|put_pte_flags_32(flags))) ) 98.86 + 98.87 +/* Given a virtual address, get an entry offset into a page table. */ 98.88 +#define l1_table_offset_32(a) \ 98.89 + (((a) >> L1_PAGETABLE_SHIFT_32) & (L1_PAGETABLE_ENTRIES_32 - 1)) 98.90 +#define l2_table_offset_32(a) \ 98.91 + (((a) >> L2_PAGETABLE_SHIFT_32) & (L2_PAGETABLE_ENTRIES_32 - 1)) 98.92 + 98.93 +#endif /* __X86_PAGE_GUEST_H__ */ 98.94 + 98.95 +/* 98.96 + * Local variables: 98.97 + * mode: C 98.98 + * c-set-style: "BSD" 98.99 + * c-basic-offset: 4 98.100 + * tab-width: 4 98.101 + * indent-tabs-mode: nil 98.102 + * End: 98.103 + */
99.1 --- a/xen/arch/x86/mm/paging.c Mon Mar 05 12:49:12 2007 -0600 99.2 +++ b/xen/arch/x86/mm/paging.c Thu Mar 08 14:39:52 2007 -0600 99.3 @@ -24,10 +24,12 @@ 99.4 #include <asm/paging.h> 99.5 #include <asm/shadow.h> 99.6 #include <asm/p2m.h> 99.7 +#include <asm/hap.h> 99.8 99.9 /* Xen command-line option to enable hardware-assisted paging */ 99.10 int opt_hap_enabled = 0; 99.11 boolean_param("hap", opt_hap_enabled); 99.12 +int hap_capable_system = 0; 99.13 99.14 /* Printouts */ 99.15 #define PAGING_PRINTK(_f, _a...) \ 99.16 @@ -46,12 +48,18 @@ void paging_domain_init(struct domain *d 99.17 { 99.18 p2m_init(d); 99.19 shadow_domain_init(d); 99.20 + 99.21 + if ( opt_hap_enabled && hap_capable_system && is_hvm_domain(d) ) 99.22 + hap_domain_init(d); 99.23 } 99.24 99.25 /* vcpu paging struct initialization goes here */ 99.26 void paging_vcpu_init(struct vcpu *v) 99.27 { 99.28 - shadow_vcpu_init(v); 99.29 + if ( opt_hap_enabled && hap_capable_system && is_hvm_vcpu(v) ) 99.30 + hap_vcpu_init(v); 99.31 + else 99.32 + shadow_vcpu_init(v); 99.33 } 99.34 99.35 99.36 @@ -59,32 +67,38 @@ int paging_domctl(struct domain *d, xen_ 99.37 XEN_GUEST_HANDLE(void) u_domctl) 99.38 { 99.39 /* Here, dispatch domctl to the appropriate paging code */ 99.40 - return shadow_domctl(d, sc, u_domctl); 99.41 + if ( opt_hap_enabled && hap_capable_system && is_hvm_domain(d) ) 99.42 + return hap_domctl(d, sc, u_domctl); 99.43 + else 99.44 + return shadow_domctl(d, sc, u_domctl); 99.45 } 99.46 99.47 /* Call when destroying a domain */ 99.48 void paging_teardown(struct domain *d) 99.49 { 99.50 - shadow_teardown(d); 99.51 - /* Call other modes' teardown code here */ 99.52 + if ( opt_hap_enabled && hap_capable_system && is_hvm_domain(d) ) 99.53 + hap_teardown(d); 99.54 + else 99.55 + shadow_teardown(d); 99.56 } 99.57 99.58 /* Call once all of the references to the domain have gone away */ 99.59 void paging_final_teardown(struct domain *d) 99.60 { 99.61 - shadow_teardown(d); 99.62 - /* Call other modes' final teardown code here */ 99.63 + if ( opt_hap_enabled && hap_capable_system && is_hvm_domain(d) ) 99.64 + hap_final_teardown(d); 99.65 + else 99.66 + shadow_final_teardown(d); 99.67 } 99.68 99.69 /* Enable an arbitrary paging-assistance mode. Call once at domain 99.70 * creation. */ 99.71 int paging_enable(struct domain *d, u32 mode) 99.72 { 99.73 - if ( mode & PG_SH_enable ) 99.74 - return shadow_enable(d, mode); 99.75 + if ( opt_hap_enabled && hap_capable_system && is_hvm_domain(d) ) 99.76 + return hap_enable(d, mode | PG_HAP_enable); 99.77 else 99.78 - /* No other modes supported yet */ 99.79 - return -EINVAL; 99.80 + return shadow_enable(d, mode | PG_SH_enable); 99.81 } 99.82 99.83 /* Print paging-assistance info to the console */
100.1 --- a/xen/arch/x86/mm/shadow/common.c Mon Mar 05 12:49:12 2007 -0600 100.2 +++ b/xen/arch/x86/mm/shadow/common.c Thu Mar 08 14:39:52 2007 -0600 100.3 @@ -2912,7 +2912,16 @@ void sh_mark_dirty(struct domain *d, mfn 100.4 * can be called from __hvm_copy during emulation). 100.5 * If the lock isn't held, take it for the duration of the call. */ 100.6 do_locking = !shadow_locked_by_me(d); 100.7 - if ( do_locking ) shadow_lock(d); 100.8 + if ( do_locking ) 100.9 + { 100.10 + shadow_lock(d); 100.11 + /* Check the mode again with the lock held */ 100.12 + if ( unlikely(!shadow_mode_log_dirty(d)) ) 100.13 + { 100.14 + shadow_unlock(d); 100.15 + return; 100.16 + } 100.17 + } 100.18 100.19 ASSERT(d->arch.paging.shadow.dirty_bitmap != NULL); 100.20 100.21 @@ -2968,10 +2977,18 @@ int shadow_domctl(struct domain *d, 100.22 100.23 if ( unlikely(d == current->domain) ) 100.24 { 100.25 - gdprintk(XENLOG_INFO, "Don't try to do a shadow op on yourself!\n"); 100.26 + gdprintk(XENLOG_INFO, "Dom %u tried to do a shadow op on itself.\n", 100.27 + d->domain_id); 100.28 return -EINVAL; 100.29 } 100.30 100.31 + if ( unlikely(test_bit(_DOMF_dying, &d->domain_flags)) ) 100.32 + { 100.33 + gdprintk(XENLOG_INFO, "Ignoring shadow op on dying domain %u\n", 100.34 + d->domain_id); 100.35 + return 0; 100.36 + } 100.37 + 100.38 switch ( sc->op ) 100.39 { 100.40 case XEN_DOMCTL_SHADOW_OP_OFF:
101.1 --- a/xen/arch/x86/mm/shadow/page-guest32.h Mon Mar 05 12:49:12 2007 -0600 101.2 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 101.3 @@ -1,100 +0,0 @@ 101.4 - 101.5 -#ifndef __X86_PAGE_GUEST_H__ 101.6 -#define __X86_PAGE_GUEST_H__ 101.7 - 101.8 -#ifndef __ASSEMBLY__ 101.9 -# include <asm/types.h> 101.10 -#endif 101.11 - 101.12 -#define PAGETABLE_ORDER_32 10 101.13 -#define L1_PAGETABLE_ENTRIES_32 (1<<PAGETABLE_ORDER_32) 101.14 -#define L2_PAGETABLE_ENTRIES_32 (1<<PAGETABLE_ORDER_32) 101.15 -#define ROOT_PAGETABLE_ENTRIES_32 L2_PAGETABLE_ENTRIES_32 101.16 - 101.17 - 101.18 -#define L1_PAGETABLE_SHIFT_32 12 101.19 -#define L2_PAGETABLE_SHIFT_32 22 101.20 - 101.21 -/* Extract flags into 12-bit integer, or turn 12-bit flags into a pte mask. */ 101.22 - 101.23 -#ifndef __ASSEMBLY__ 101.24 - 101.25 -typedef u32 intpte_32_t; 101.26 - 101.27 -typedef struct { intpte_32_t l1; } l1_pgentry_32_t; 101.28 -typedef struct { intpte_32_t l2; } l2_pgentry_32_t; 101.29 -typedef l2_pgentry_t root_pgentry_32_t; 101.30 -#endif 101.31 - 101.32 -#define get_pte_flags_32(x) ((u32)(x) & 0xFFF) 101.33 -#define put_pte_flags_32(x) ((intpte_32_t)(x)) 101.34 - 101.35 -/* Get pte access flags (unsigned int). */ 101.36 -#define l1e_get_flags_32(x) (get_pte_flags_32((x).l1)) 101.37 -#define l2e_get_flags_32(x) (get_pte_flags_32((x).l2)) 101.38 - 101.39 -#define l1e_get_paddr_32(x) \ 101.40 - ((paddr_t)(((x).l1 & (PADDR_MASK&PAGE_MASK)))) 101.41 -#define l2e_get_paddr_32(x) \ 101.42 - ((paddr_t)(((x).l2 & (PADDR_MASK&PAGE_MASK)))) 101.43 - 101.44 -/* Construct an empty pte. */ 101.45 -#define l1e_empty_32() ((l1_pgentry_32_t) { 0 }) 101.46 -#define l2e_empty_32() ((l2_pgentry_32_t) { 0 }) 101.47 - 101.48 -/* Construct a pte from a pfn and access flags. */ 101.49 -#define l1e_from_pfn_32(pfn, flags) \ 101.50 - ((l1_pgentry_32_t) { ((intpte_32_t)(pfn) << PAGE_SHIFT) | put_pte_flags_32(flags) }) 101.51 -#define l2e_from_pfn_32(pfn, flags) \ 101.52 - ((l2_pgentry_32_t) { ((intpte_32_t)(pfn) << PAGE_SHIFT) | put_pte_flags_32(flags) }) 101.53 - 101.54 -/* Construct a pte from a physical address and access flags. */ 101.55 -#ifndef __ASSEMBLY__ 101.56 -static inline l1_pgentry_32_t l1e_from_paddr_32(paddr_t pa, unsigned int flags) 101.57 -{ 101.58 - ASSERT((pa & ~(PADDR_MASK & PAGE_MASK)) == 0); 101.59 - return (l1_pgentry_32_t) { pa | put_pte_flags_32(flags) }; 101.60 -} 101.61 -static inline l2_pgentry_32_t l2e_from_paddr_32(paddr_t pa, unsigned int flags) 101.62 -{ 101.63 - ASSERT((pa & ~(PADDR_MASK & PAGE_MASK)) == 0); 101.64 - return (l2_pgentry_32_t) { pa | put_pte_flags_32(flags) }; 101.65 -} 101.66 -#endif /* !__ASSEMBLY__ */ 101.67 - 101.68 - 101.69 -/* Construct a pte from a page pointer and access flags. */ 101.70 -#define l1e_from_page_32(page, flags) (l1e_from_pfn_32(page_to_mfn(page),(flags))) 101.71 -#define l2e_from_page_32(page, flags) (l2e_from_pfn_32(page_to_mfn(page),(flags))) 101.72 - 101.73 -/* Add extra flags to an existing pte. */ 101.74 -#define l1e_add_flags_32(x, flags) ((x).l1 |= put_pte_flags_32(flags)) 101.75 -#define l2e_add_flags_32(x, flags) ((x).l2 |= put_pte_flags_32(flags)) 101.76 - 101.77 -/* Remove flags from an existing pte. */ 101.78 -#define l1e_remove_flags_32(x, flags) ((x).l1 &= ~put_pte_flags_32(flags)) 101.79 -#define l2e_remove_flags_32(x, flags) ((x).l2 &= ~put_pte_flags_32(flags)) 101.80 - 101.81 -/* Check if a pte's page mapping or significant access flags have changed. */ 101.82 -#define l1e_has_changed_32(x,y,flags) \ 101.83 - ( !!(((x).l1 ^ (y).l1) & ((PADDR_MASK&PAGE_MASK)|put_pte_flags_32(flags))) ) 101.84 -#define l2e_has_changed_32(x,y,flags) \ 101.85 - ( !!(((x).l2 ^ (y).l2) & ((PADDR_MASK&PAGE_MASK)|put_pte_flags_32(flags))) ) 101.86 - 101.87 -/* Given a virtual address, get an entry offset into a page table. */ 101.88 -#define l1_table_offset_32(a) \ 101.89 - (((a) >> L1_PAGETABLE_SHIFT_32) & (L1_PAGETABLE_ENTRIES_32 - 1)) 101.90 -#define l2_table_offset_32(a) \ 101.91 - (((a) >> L2_PAGETABLE_SHIFT_32) & (L2_PAGETABLE_ENTRIES_32 - 1)) 101.92 - 101.93 -#endif /* __X86_PAGE_GUEST_H__ */ 101.94 - 101.95 -/* 101.96 - * Local variables: 101.97 - * mode: C 101.98 - * c-set-style: "BSD" 101.99 - * c-basic-offset: 4 101.100 - * tab-width: 4 101.101 - * indent-tabs-mode: nil 101.102 - * End: 101.103 - */
102.1 --- a/xen/arch/x86/mm/shadow/private.h Mon Mar 05 12:49:12 2007 -0600 102.2 +++ b/xen/arch/x86/mm/shadow/private.h Thu Mar 08 14:39:52 2007 -0600 102.3 @@ -539,7 +539,7 @@ static inline int sh_get_ref(struct vcpu 102.4 102.5 /* We remember the first shadow entry that points to each shadow. */ 102.6 if ( entry_pa != 0 102.7 - && sh_type_is_pinnable(v, sp->type) 102.8 + && !sh_type_is_pinnable(v, sp->type) 102.9 && sp->up == 0 ) 102.10 sp->up = entry_pa; 102.11 102.12 @@ -559,7 +559,7 @@ static inline void sh_put_ref(struct vcp 102.13 102.14 /* If this is the entry in the up-pointer, remove it */ 102.15 if ( entry_pa != 0 102.16 - && sh_type_is_pinnable(v, sp->type) 102.17 + && !sh_type_is_pinnable(v, sp->type) 102.18 && sp->up == entry_pa ) 102.19 sp->up = 0; 102.20
103.1 --- a/xen/arch/x86/mm/shadow/types.h Mon Mar 05 12:49:12 2007 -0600 103.2 +++ b/xen/arch/x86/mm/shadow/types.h Thu Mar 08 14:39:52 2007 -0600 103.3 @@ -235,7 +235,7 @@ static inline shadow_l4e_t shadow_l4e_fr 103.4 103.5 #if GUEST_PAGING_LEVELS == 2 103.6 103.7 -#include "page-guest32.h" 103.8 +#include "../page-guest32.h" 103.9 103.10 #define GUEST_L1_PAGETABLE_ENTRIES 1024 103.11 #define GUEST_L2_PAGETABLE_ENTRIES 1024
104.1 --- a/xen/common/event_channel.c Mon Mar 05 12:49:12 2007 -0600 104.2 +++ b/xen/common/event_channel.c Thu Mar 08 14:39:52 2007 -0600 104.3 @@ -560,6 +560,9 @@ void send_guest_global_virq(struct domai 104.4 104.5 ASSERT(virq_is_global(virq)); 104.6 104.7 + if ( unlikely(d == NULL) ) 104.8 + return; 104.9 + 104.10 v = d->vcpu[0]; 104.11 if ( unlikely(v == NULL) ) 104.12 return;
105.1 --- a/xen/common/page_alloc.c Mon Mar 05 12:49:12 2007 -0600 105.2 +++ b/xen/common/page_alloc.c Thu Mar 08 14:39:52 2007 -0600 105.3 @@ -49,7 +49,7 @@ string_param("badpage", opt_badpage); 105.4 * Bit width of the DMA heap. 105.5 */ 105.6 static unsigned int dma_bitsize = CONFIG_DMA_BITSIZE; 105.7 -static unsigned long max_dma_mfn = (1UL << (CONFIG_DMA_BITSIZE - PAGE_SHIFT)) - 1; 105.8 +static unsigned long max_dma_mfn = (1UL<<(CONFIG_DMA_BITSIZE-PAGE_SHIFT))-1; 105.9 static void parse_dma_bits(char *s) 105.10 { 105.11 unsigned int v = simple_strtol(s, NULL, 0); 105.12 @@ -339,11 +339,13 @@ static void init_heap_block(heap_by_zone 105.13 105.14 /* Allocate 2^@order contiguous pages. */ 105.15 static struct page_info *alloc_heap_pages( 105.16 - unsigned int zone_lo, unsigned zone_hi, 105.17 + unsigned int zone_lo, unsigned int zone_hi, 105.18 unsigned int cpu, unsigned int order) 105.19 { 105.20 - unsigned int i, j, node = cpu_to_node(cpu), num_nodes = num_online_nodes(); 105.21 - unsigned int zone, request = (1UL << order); 105.22 + unsigned int i, j, zone; 105.23 + unsigned int node = cpu_to_node(cpu), num_nodes = num_online_nodes(); 105.24 + unsigned long request = 1UL << order; 105.25 + cpumask_t extra_cpus_mask, mask; 105.26 struct page_info *pg; 105.27 105.28 ASSERT(node >= 0); 105.29 @@ -356,25 +358,24 @@ static struct page_info *alloc_heap_page 105.30 105.31 spin_lock(&heap_lock); 105.32 105.33 - /* start with requested node, but exhaust all node memory 105.34 - * in requested zone before failing, only calc new node 105.35 - * value if we fail to find memory in target node, this avoids 105.36 - * needless computation on fast-path */ 105.37 + /* 105.38 + * Start with requested node, but exhaust all node memory in requested 105.39 + * zone before failing, only calc new node value if we fail to find memory 105.40 + * in target node, this avoids needless computation on fast-path. 105.41 + */ 105.42 for ( i = 0; i < num_nodes; i++ ) 105.43 { 105.44 - for ( zone = zone_hi; zone >= zone_lo; --zone ) 105.45 - { 105.46 - /* check if target node can support the allocation */ 105.47 - if ( avail[node] && (avail[node][zone] >= request) ) 105.48 - { 105.49 - /* Find smallest order which can satisfy the request. */ 105.50 - for ( j = order; j <= MAX_ORDER; j++ ) 105.51 - { 105.52 - if ( !list_empty(&heap(node, zone, j)) ) 105.53 - goto found; 105.54 - } 105.55 - } 105.56 - } 105.57 + zone = zone_hi; 105.58 + do { 105.59 + /* Check if target node can support the allocation. */ 105.60 + if ( !avail[node] || (avail[node][zone] < request) ) 105.61 + continue; 105.62 + 105.63 + /* Find smallest order which can satisfy the request. */ 105.64 + for ( j = order; j <= MAX_ORDER; j++ ) 105.65 + if ( !list_empty(&heap(node, zone, j)) ) 105.66 + goto found; 105.67 + } while ( zone-- > zone_lo ); /* careful: unsigned zone may wrap */ 105.68 105.69 /* Pick next node, wrapping around if needed. */ 105.70 if ( ++node == num_nodes ) 105.71 @@ -403,6 +404,29 @@ static struct page_info *alloc_heap_page 105.72 105.73 spin_unlock(&heap_lock); 105.74 105.75 + cpus_clear(mask); 105.76 + 105.77 + for ( i = 0; i < (1 << order); i++ ) 105.78 + { 105.79 + /* Reference count must continuously be zero for free pages. */ 105.80 + BUG_ON(pg[i].count_info != 0); 105.81 + 105.82 + /* Add in any extra CPUs that need flushing because of this page. */ 105.83 + cpus_andnot(extra_cpus_mask, pg[i].u.free.cpumask, mask); 105.84 + tlbflush_filter(extra_cpus_mask, pg[i].tlbflush_timestamp); 105.85 + cpus_or(mask, mask, extra_cpus_mask); 105.86 + 105.87 + /* Initialise fields which have other uses for free pages. */ 105.88 + pg[i].u.inuse.type_info = 0; 105.89 + page_set_owner(&pg[i], NULL); 105.90 + } 105.91 + 105.92 + if ( unlikely(!cpus_empty(mask)) ) 105.93 + { 105.94 + perfc_incrc(need_flush_tlb_flush); 105.95 + flush_tlb_mask(mask); 105.96 + } 105.97 + 105.98 return pg; 105.99 } 105.100 105.101 @@ -411,13 +435,28 @@ static void free_heap_pages( 105.102 unsigned int zone, struct page_info *pg, unsigned int order) 105.103 { 105.104 unsigned long mask; 105.105 - unsigned int node = phys_to_nid(page_to_maddr(pg)); 105.106 + unsigned int i, node = phys_to_nid(page_to_maddr(pg)); 105.107 + struct domain *d; 105.108 105.109 ASSERT(zone < NR_ZONES); 105.110 ASSERT(order <= MAX_ORDER); 105.111 ASSERT(node >= 0); 105.112 ASSERT(node < num_online_nodes()); 105.113 105.114 + for ( i = 0; i < (1 << order); i++ ) 105.115 + { 105.116 + BUG_ON(pg[i].count_info != 0); 105.117 + if ( (d = page_get_owner(&pg[i])) != NULL ) 105.118 + { 105.119 + pg[i].tlbflush_timestamp = tlbflush_current_time(); 105.120 + pg[i].u.free.cpumask = d->domain_dirty_cpumask; 105.121 + } 105.122 + else 105.123 + { 105.124 + cpus_clear(pg[i].u.free.cpumask); 105.125 + } 105.126 + } 105.127 + 105.128 spin_lock(&heap_lock); 105.129 105.130 map_free(page_to_mfn(pg), 1 << order); 105.131 @@ -426,7 +465,7 @@ static void free_heap_pages( 105.132 /* Merge chunks as far as possible. */ 105.133 while ( order < MAX_ORDER ) 105.134 { 105.135 - mask = 1 << order; 105.136 + mask = 1UL << order; 105.137 105.138 if ( (page_to_mfn(pg) & mask) ) 105.139 { 105.140 @@ -554,7 +593,7 @@ void end_boot_allocator(void) 105.141 /* 105.142 * Scrub all unallocated pages in all heap zones. This function is more 105.143 * convoluted than appears necessary because we do not want to continuously 105.144 - * hold the lock or disable interrupts while scrubbing very large memory areas. 105.145 + * hold the lock while scrubbing very large memory areas. 105.146 */ 105.147 void scrub_heap_pages(void) 105.148 { 105.149 @@ -575,7 +614,7 @@ void scrub_heap_pages(void) 105.150 if ( (mfn % ((100*1024*1024)/PAGE_SIZE)) == 0 ) 105.151 printk("."); 105.152 105.153 - spin_lock_irq(&heap_lock); 105.154 + spin_lock(&heap_lock); 105.155 105.156 /* Re-check page status with lock held. */ 105.157 if ( !allocated_in_map(mfn) ) 105.158 @@ -595,7 +634,7 @@ void scrub_heap_pages(void) 105.159 } 105.160 } 105.161 105.162 - spin_unlock_irq(&heap_lock); 105.163 + spin_unlock(&heap_lock); 105.164 } 105.165 105.166 printk("done.\n"); 105.167 @@ -609,8 +648,6 @@ void scrub_heap_pages(void) 105.168 105.169 void init_xenheap_pages(paddr_t ps, paddr_t pe) 105.170 { 105.171 - unsigned long flags; 105.172 - 105.173 ps = round_pgup(ps); 105.174 pe = round_pgdown(pe); 105.175 if ( pe <= ps ) 105.176 @@ -625,34 +662,22 @@ void init_xenheap_pages(paddr_t ps, padd 105.177 if ( !IS_XEN_HEAP_FRAME(maddr_to_page(pe)) ) 105.178 pe -= PAGE_SIZE; 105.179 105.180 - local_irq_save(flags); 105.181 init_heap_pages(MEMZONE_XEN, maddr_to_page(ps), (pe - ps) >> PAGE_SHIFT); 105.182 - local_irq_restore(flags); 105.183 } 105.184 105.185 105.186 void *alloc_xenheap_pages(unsigned int order) 105.187 { 105.188 - unsigned long flags; 105.189 struct page_info *pg; 105.190 - int i; 105.191 105.192 - local_irq_save(flags); 105.193 + ASSERT(!in_irq()); 105.194 + 105.195 pg = alloc_heap_pages(MEMZONE_XEN, MEMZONE_XEN, smp_processor_id(), order); 105.196 - local_irq_restore(flags); 105.197 - 105.198 if ( unlikely(pg == NULL) ) 105.199 goto no_memory; 105.200 105.201 memguard_unguard_range(page_to_virt(pg), 1 << (order + PAGE_SHIFT)); 105.202 105.203 - for ( i = 0; i < (1 << order); i++ ) 105.204 - { 105.205 - pg[i].count_info = 0; 105.206 - pg[i].u.inuse._domain = 0; 105.207 - pg[i].u.inuse.type_info = 0; 105.208 - } 105.209 - 105.210 return page_to_virt(pg); 105.211 105.212 no_memory: 105.213 @@ -663,16 +688,14 @@ void *alloc_xenheap_pages(unsigned int o 105.214 105.215 void free_xenheap_pages(void *v, unsigned int order) 105.216 { 105.217 - unsigned long flags; 105.218 + ASSERT(!in_irq()); 105.219 105.220 if ( v == NULL ) 105.221 return; 105.222 105.223 - memguard_guard_range(v, 1 << (order + PAGE_SHIFT)); 105.224 + memguard_guard_range(v, 1 << (order + PAGE_SHIFT)); 105.225 105.226 - local_irq_save(flags); 105.227 free_heap_pages(MEMZONE_XEN, virt_to_page(v), order); 105.228 - local_irq_restore(flags); 105.229 } 105.230 105.231 105.232 @@ -762,8 +785,6 @@ struct page_info *__alloc_domheap_pages( 105.233 unsigned int memflags) 105.234 { 105.235 struct page_info *pg = NULL; 105.236 - cpumask_t mask; 105.237 - unsigned long i; 105.238 unsigned int bits = memflags >> _MEMF_bits, zone_hi = NR_ZONES - 1; 105.239 105.240 ASSERT(!in_irq()); 105.241 @@ -792,38 +813,10 @@ struct page_info *__alloc_domheap_pages( 105.242 return NULL; 105.243 } 105.244 105.245 - if ( pg == NULL ) 105.246 - if ( (pg = alloc_heap_pages(MEMZONE_XEN + 1, 105.247 - zone_hi, 105.248 - cpu, order)) == NULL ) 105.249 - return NULL; 105.250 - 105.251 - mask = pg->u.free.cpumask; 105.252 - tlbflush_filter(mask, pg->tlbflush_timestamp); 105.253 - 105.254 - pg->count_info = 0; 105.255 - pg->u.inuse._domain = 0; 105.256 - pg->u.inuse.type_info = 0; 105.257 - 105.258 - for ( i = 1; i < (1 << order); i++ ) 105.259 - { 105.260 - /* Add in any extra CPUs that need flushing because of this page. */ 105.261 - cpumask_t extra_cpus_mask; 105.262 - cpus_andnot(extra_cpus_mask, pg[i].u.free.cpumask, mask); 105.263 - tlbflush_filter(extra_cpus_mask, pg[i].tlbflush_timestamp); 105.264 - cpus_or(mask, mask, extra_cpus_mask); 105.265 - 105.266 - pg[i].count_info = 0; 105.267 - pg[i].u.inuse._domain = 0; 105.268 - pg[i].u.inuse.type_info = 0; 105.269 - page_set_owner(&pg[i], NULL); 105.270 - } 105.271 - 105.272 - if ( unlikely(!cpus_empty(mask)) ) 105.273 - { 105.274 - perfc_incrc(need_flush_tlb_flush); 105.275 - flush_tlb_mask(mask); 105.276 - } 105.277 + if ( (pg == NULL) && 105.278 + ((pg = alloc_heap_pages(MEMZONE_XEN + 1, zone_hi, 105.279 + cpu, order)) == NULL) ) 105.280 + return NULL; 105.281 105.282 if ( (d != NULL) && assign_pages(d, pg, order, memflags) ) 105.283 { 105.284 @@ -867,10 +860,7 @@ void free_domheap_pages(struct page_info 105.285 105.286 for ( i = 0; i < (1 << order); i++ ) 105.287 { 105.288 - shadow_drop_references(d, &pg[i]); 105.289 - ASSERT((pg[i].u.inuse.type_info & PGT_count_mask) == 0); 105.290 - pg[i].tlbflush_timestamp = tlbflush_current_time(); 105.291 - pg[i].u.free.cpumask = d->domain_dirty_cpumask; 105.292 + BUG_ON((pg[i].u.inuse.type_info & PGT_count_mask) != 0); 105.293 list_del(&pg[i].list); 105.294 } 105.295 105.296 @@ -892,6 +882,7 @@ void free_domheap_pages(struct page_info 105.297 */ 105.298 for ( i = 0; i < (1 << order); i++ ) 105.299 { 105.300 + page_set_owner(&pg[i], NULL); 105.301 spin_lock(&page_scrub_lock); 105.302 list_add(&pg[i].list, &page_scrub_list); 105.303 scrub_pages++; 105.304 @@ -902,8 +893,6 @@ void free_domheap_pages(struct page_info 105.305 else 105.306 { 105.307 /* Freeing anonymous domain-heap pages. */ 105.308 - for ( i = 0; i < (1 << order); i++ ) 105.309 - cpus_clear(pg[i].u.free.cpumask); 105.310 free_heap_pages(pfn_dom_zone_type(page_to_mfn(pg)), pg, order); 105.311 drop_dom_ref = 0; 105.312 }
106.1 --- a/xen/common/xmalloc.c Mon Mar 05 12:49:12 2007 -0600 106.2 +++ b/xen/common/xmalloc.c Thu Mar 08 14:39:52 2007 -0600 106.3 @@ -33,6 +33,8 @@ 106.4 #include <xen/timer.h> 106.5 #include <xen/cache.h> 106.6 #include <xen/prefetch.h> 106.7 +#include <xen/irq.h> 106.8 +#include <xen/smp.h> 106.9 106.10 /* 106.11 * XMALLOC_DEBUG: 106.12 @@ -175,6 +177,8 @@ void *_xmalloc(size_t size, size_t align 106.13 struct xmalloc_hdr *i; 106.14 unsigned long flags; 106.15 106.16 + ASSERT(!in_irq()); 106.17 + 106.18 /* We currently always return cacheline aligned. */ 106.19 BUG_ON(align > SMP_CACHE_BYTES); 106.20 106.21 @@ -213,6 +217,8 @@ void xfree(void *p) 106.22 unsigned long flags; 106.23 struct xmalloc_hdr *i, *tmp, *hdr; 106.24 106.25 + ASSERT(!in_irq()); 106.26 + 106.27 if ( p == NULL ) 106.28 return; 106.29
107.1 --- a/xen/drivers/acpi/numa.c Mon Mar 05 12:49:12 2007 -0600 107.2 +++ b/xen/drivers/acpi/numa.c Thu Mar 08 14:39:52 2007 -0600 107.3 @@ -22,10 +22,6 @@ 107.4 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 107.5 * 107.6 */ 107.7 -#if 0 107.8 -#include <linux/module.h> 107.9 -#include <linux/kernel.h> 107.10 -#endif 107.11 #include <xen/config.h> 107.12 #include <xen/init.h> 107.13 #include <xen/types.h> 107.14 @@ -34,7 +30,6 @@ 107.15 #include <xen/numa.h> 107.16 #include <acpi/acpi_bus.h> 107.17 #include <acpi/acmacros.h> 107.18 -#include <asm/page.h> /* __va() */ 107.19 107.20 #define ACPI_NUMA 0x80000000 107.21 #define _COMPONENT ACPI_NUMA 107.22 @@ -106,7 +101,7 @@ static int __init acpi_parse_slit(unsign 107.23 if (!phys_addr || !size) 107.24 return -EINVAL; 107.25 107.26 - slit = (struct acpi_table_slit *)__va(phys_addr); 107.27 + slit = (struct acpi_table_slit *)__acpi_map_table(phys_addr, size); 107.28 107.29 /* downcast just for %llu vs %lu for i386/ia64 */ 107.30 localities = (u32) slit->localities; 107.31 @@ -159,7 +154,7 @@ static int __init acpi_parse_srat(unsign 107.32 if (!phys_addr || !size) 107.33 return -EINVAL; 107.34 107.35 - srat = (struct acpi_table_srat *)__va(phys_addr); 107.36 + srat = (struct acpi_table_srat *)__acpi_map_table(phys_addr, size); 107.37 107.38 return 0; 107.39 }
108.1 --- a/xen/drivers/char/console.c Mon Mar 05 12:49:12 2007 -0600 108.2 +++ b/xen/drivers/char/console.c Thu Mar 08 14:39:52 2007 -0600 108.3 @@ -399,6 +399,8 @@ static void __putstr(const char *str) 108.4 vga_putchar(c); 108.5 putchar_console_ring(c); 108.6 } 108.7 + 108.8 + send_guest_global_virq(dom0, VIRQ_CON_RING); 108.9 } 108.10 108.11 static int printk_prefix_check(char *p, char **pp)
109.1 --- a/xen/include/acm/acm_hooks.h Mon Mar 05 12:49:12 2007 -0600 109.2 +++ b/xen/include/acm/acm_hooks.h Thu Mar 08 14:39:52 2007 -0600 109.3 @@ -247,12 +247,12 @@ static inline int acm_pre_domctl(struct 109.4 if (*ssid == NULL) { 109.5 printk("%s: Warning. Destroying domain without ssid pointer.\n", 109.6 __func__); 109.7 - domain_rcu_lock(d); 109.8 + rcu_unlock_domain(d); 109.9 return -EACCES; 109.10 } 109.11 d->ssid = NULL; /* make sure it's not used any more */ 109.12 /* no policy-specific hook */ 109.13 - domain_rcu_lock(d); 109.14 + rcu_unlock_domain(d); 109.15 ret = 0; 109.16 } 109.17 break;
110.1 --- a/xen/include/asm-x86/domain.h Mon Mar 05 12:49:12 2007 -0600 110.2 +++ b/xen/include/asm-x86/domain.h Thu Mar 08 14:39:52 2007 -0600 110.3 @@ -104,6 +104,21 @@ struct shadow_vcpu { 110.4 }; 110.5 110.6 /************************************************/ 110.7 +/* hardware assisted paging */ 110.8 +/************************************************/ 110.9 +struct hap_domain { 110.10 + spinlock_t lock; 110.11 + int locker; 110.12 + const char *locker_function; 110.13 + 110.14 + struct list_head freelists; 110.15 + struct list_head p2m_freelist; 110.16 + unsigned int total_pages; /* number of pages allocated */ 110.17 + unsigned int free_pages; /* number of pages on freelists */ 110.18 + unsigned int p2m_pages; /* number of pages allocates to p2m */ 110.19 +}; 110.20 + 110.21 +/************************************************/ 110.22 /* p2m handling */ 110.23 /************************************************/ 110.24 110.25 @@ -135,6 +150,7 @@ struct paging_domain { 110.26 struct shadow_domain shadow; 110.27 110.28 /* Other paging assistance code will have structs here */ 110.29 + struct hap_domain hap; 110.30 }; 110.31 110.32 struct paging_vcpu {
111.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 111.2 +++ b/xen/include/asm-x86/hap.h Thu Mar 08 14:39:52 2007 -0600 111.3 @@ -0,0 +1,122 @@ 111.4 +/****************************************************************************** 111.5 + * include/asm-x86/hap.h 111.6 + * 111.7 + * hardware-assisted paging 111.8 + * Copyright (c) 2007 Advanced Micro Devices (Wei Huang) 111.9 + * 111.10 + * Parts of this code are Copyright (c) 2006 by XenSource Inc. 111.11 + * Parts of this code are Copyright (c) 2006 by Michael A Fetterman 111.12 + * Parts based on earlier work by Michael A Fetterman, Ian Pratt et al. 111.13 + * 111.14 + * This program is free software; you can redistribute it and/or modify 111.15 + * it under the terms of the GNU General Public License as published by 111.16 + * the Free Software Foundation; either version 2 of the License, or 111.17 + * (at your option) any later version. 111.18 + * 111.19 + * This program is distributed in the hope that it will be useful, 111.20 + * but WITHOUT ANY WARRANTY; without even the implied warranty of 111.21 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 111.22 + * GNU General Public License for more details. 111.23 + * 111.24 + * You should have received a copy of the GNU General Public License 111.25 + * along with this program; if not, write to the Free Software 111.26 + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 111.27 + */ 111.28 + 111.29 +#ifndef _XEN_HAP_H 111.30 +#define _XEN_HAP_H 111.31 + 111.32 +#define HERE_I_AM \ 111.33 + debugtrace_printk("HERE I AM: %s %s %d\n", __func__, __FILE__, __LINE__) 111.34 +#define HAP_PRINTK(_f, _a...) \ 111.35 + debugtrace_printk("hap: %s(): " _f, __func__, ##_a) 111.36 +#define HAP_ERROR(_f, _a...) \ 111.37 + printk("hap error: %s(): " _f, __func__, ##_a) 111.38 + 111.39 +/************************************************/ 111.40 +/* hap domain page mapping */ 111.41 +/************************************************/ 111.42 +static inline void * 111.43 +hap_map_domain_page(mfn_t mfn) 111.44 +{ 111.45 + return map_domain_page(mfn_x(mfn)); 111.46 +} 111.47 + 111.48 +static inline void 111.49 +hap_unmap_domain_page(void *p) 111.50 +{ 111.51 + unmap_domain_page(p); 111.52 +} 111.53 + 111.54 +static inline void * 111.55 +hap_map_domain_page_global(mfn_t mfn) 111.56 +{ 111.57 + return map_domain_page_global(mfn_x(mfn)); 111.58 +} 111.59 + 111.60 +static inline void 111.61 +hap_unmap_domain_page_global(void *p) 111.62 +{ 111.63 + unmap_domain_page_global(p); 111.64 +} 111.65 + 111.66 +/************************************************/ 111.67 +/* locking for hap code */ 111.68 +/************************************************/ 111.69 +#define hap_lock_init(_d) \ 111.70 + do { \ 111.71 + spin_lock_init(&(_d)->arch.paging.hap.lock); \ 111.72 + (_d)->arch.paging.hap.locker = -1; \ 111.73 + (_d)->arch.paging.hap.locker_function = "nobody"; \ 111.74 + } while (0) 111.75 + 111.76 +#define hap_locked_by_me(_d) \ 111.77 + (current->processor == (_d)->arch.paging.hap.locker) 111.78 + 111.79 +#define hap_lock(_d) \ 111.80 + do { \ 111.81 + if ( unlikely((_d)->arch.paging.hap.locker == current->processor) )\ 111.82 + { \ 111.83 + printk("Error: hap lock held by %s\n", \ 111.84 + (_d)->arch.paging.hap.locker_function); \ 111.85 + BUG(); \ 111.86 + } \ 111.87 + spin_lock(&(_d)->arch.paging.hap.lock); \ 111.88 + ASSERT((_d)->arch.paging.hap.locker == -1); \ 111.89 + (_d)->arch.paging.hap.locker = current->processor; \ 111.90 + (_d)->arch.paging.hap.locker_function = __func__; \ 111.91 + } while (0) 111.92 + 111.93 +#define hap_unlock(_d) \ 111.94 + do { \ 111.95 + ASSERT((_d)->arch.paging.hap.locker == current->processor); \ 111.96 + (_d)->arch.paging.hap.locker = -1; \ 111.97 + (_d)->arch.paging.hap.locker_function = "nobody"; \ 111.98 + spin_unlock(&(_d)->arch.paging.hap.lock); \ 111.99 + } while (0) 111.100 + 111.101 +/************************************************/ 111.102 +/* hap domain level functions */ 111.103 +/************************************************/ 111.104 +void hap_domain_init(struct domain *d); 111.105 +int hap_domctl(struct domain *d, xen_domctl_shadow_op_t *sc, 111.106 + XEN_GUEST_HANDLE(void) u_domctl); 111.107 +int hap_enable(struct domain *d, u32 mode); 111.108 +void hap_final_teardown(struct domain *d); 111.109 +void hap_teardown(struct domain *d); 111.110 +void hap_vcpu_init(struct vcpu *v); 111.111 + 111.112 +extern struct paging_mode hap_paging_real_mode; 111.113 +extern struct paging_mode hap_paging_protected_mode; 111.114 +extern struct paging_mode hap_paging_pae_mode; 111.115 +extern struct paging_mode hap_paging_long_mode; 111.116 +#endif /* XEN_HAP_H */ 111.117 + 111.118 +/* 111.119 + * Local variables: 111.120 + * mode: C 111.121 + * c-set-style: "BSD" 111.122 + * c-basic-offset: 4 111.123 + * indent-tabs-mode: nil 111.124 + * End: 111.125 + */
112.1 --- a/xen/include/asm-x86/hvm/svm/emulate.h Mon Mar 05 12:49:12 2007 -0600 112.2 +++ b/xen/include/asm-x86/hvm/svm/emulate.h Thu Mar 08 14:39:52 2007 -0600 112.3 @@ -76,7 +76,7 @@ enum instruction_index { 112.4 }; 112.5 112.6 112.7 -extern unsigned long get_effective_addr_modrm64(struct vmcb_struct *vmcb, 112.8 +extern unsigned long get_effective_addr_modrm64( 112.9 struct cpu_user_regs *regs, const u8 prefix, int inst_len, 112.10 const u8 *operand, u8 *size); 112.11 extern unsigned long get_effective_addr_sib(struct vmcb_struct *vmcb, 112.12 @@ -85,17 +85,17 @@ extern unsigned long get_effective_addr_ 112.13 extern OPERATING_MODE get_operating_mode (struct vmcb_struct *vmcb); 112.14 extern unsigned int decode_dest_reg(u8 prefix, u8 modrm); 112.15 extern unsigned int decode_src_reg(u8 prefix, u8 modrm); 112.16 -extern unsigned long svm_rip2pointer(struct vmcb_struct *vmcb); 112.17 -extern int __get_instruction_length_from_list(struct vmcb_struct *vmcb, 112.18 +extern unsigned long svm_rip2pointer(struct vcpu *v); 112.19 +extern int __get_instruction_length_from_list(struct vcpu *v, 112.20 enum instruction_index *list, unsigned int list_count, 112.21 u8 *guest_eip_buf, enum instruction_index *match); 112.22 112.23 112.24 -static inline int __get_instruction_length(struct vmcb_struct *vmcb, 112.25 +static inline int __get_instruction_length(struct vcpu *v, 112.26 enum instruction_index instr, u8 *guest_eip_buf) 112.27 { 112.28 return __get_instruction_length_from_list( 112.29 - vmcb, &instr, 1, guest_eip_buf, NULL); 112.30 + v, &instr, 1, guest_eip_buf, NULL); 112.31 } 112.32 112.33
113.1 --- a/xen/include/asm-x86/hvm/svm/svm.h Mon Mar 05 12:49:12 2007 -0600 113.2 +++ b/xen/include/asm-x86/hvm/svm/svm.h Thu Mar 08 14:39:52 2007 -0600 113.3 @@ -34,6 +34,41 @@ extern void arch_svm_do_resume(struct vc 113.4 113.5 extern u64 root_vmcb_pa[NR_CPUS]; 113.6 113.7 +static inline int svm_long_mode_enabled(struct vcpu *v) 113.8 +{ 113.9 + u64 guest_efer = v->arch.hvm_svm.cpu_shadow_efer; 113.10 + return guest_efer & EFER_LMA; 113.11 +} 113.12 + 113.13 +static inline int svm_lme_is_set(struct vcpu *v) 113.14 +{ 113.15 + u64 guest_efer = v->arch.hvm_svm.cpu_shadow_efer; 113.16 + return guest_efer & EFER_LME; 113.17 +} 113.18 + 113.19 +static inline int svm_cr4_pae_is_set(struct vcpu *v) 113.20 +{ 113.21 + unsigned long guest_cr4 = v->arch.hvm_svm.cpu_shadow_cr4; 113.22 + return guest_cr4 & X86_CR4_PAE; 113.23 +} 113.24 + 113.25 +static inline int svm_paging_enabled(struct vcpu *v) 113.26 +{ 113.27 + unsigned long guest_cr0 = v->arch.hvm_svm.cpu_shadow_cr0; 113.28 + return (guest_cr0 & X86_CR0_PE) && (guest_cr0 & X86_CR0_PG); 113.29 +} 113.30 + 113.31 +static inline int svm_pae_enabled(struct vcpu *v) 113.32 +{ 113.33 + unsigned long guest_cr4 = v->arch.hvm_svm.cpu_shadow_cr4; 113.34 + return svm_paging_enabled(v) && (guest_cr4 & X86_CR4_PAE); 113.35 +} 113.36 + 113.37 +static inline int svm_pgbit_test(struct vcpu *v) 113.38 +{ 113.39 + return v->arch.hvm_svm.cpu_shadow_cr0 & X86_CR0_PG; 113.40 +} 113.41 + 113.42 #define SVM_REG_EAX (0) 113.43 #define SVM_REG_ECX (1) 113.44 #define SVM_REG_EDX (2)
114.1 --- a/xen/include/asm-x86/hvm/svm/vmcb.h Mon Mar 05 12:49:12 2007 -0600 114.2 +++ b/xen/include/asm-x86/hvm/svm/vmcb.h Thu Mar 08 14:39:52 2007 -0600 114.3 @@ -303,14 +303,6 @@ enum VMEXIT_EXITCODE 114.4 VMEXIT_INVALID = -1 114.5 }; 114.6 114.7 -enum { 114.8 - SVM_CPU_STATE_PG_ENABLED=0, 114.9 - SVM_CPU_STATE_PAE_ENABLED, 114.10 - SVM_CPU_STATE_LME_ENABLED, 114.11 - SVM_CPU_STATE_LMA_ENABLED, 114.12 - SVM_CPU_STATE_ASSIST_ENABLED, 114.13 -}; 114.14 - 114.15 /* Definitions of segment state are borrowed by the generic HVM code. */ 114.16 typedef segment_attributes_t svm_segment_attributes_t; 114.17 typedef segment_register_t svm_segment_register_t; 114.18 @@ -457,12 +449,12 @@ struct arch_svm_struct { 114.19 int saved_irq_vector; 114.20 u32 launch_core; 114.21 114.22 - unsigned long flags; /* VMCB flags */ 114.23 - unsigned long cpu_shadow_cr0; /* Guest value for CR0 */ 114.24 - unsigned long cpu_shadow_cr4; /* Guest value for CR4 */ 114.25 + unsigned long flags; /* VMCB flags */ 114.26 + unsigned long cpu_shadow_cr0; /* Guest value for CR0 */ 114.27 + unsigned long cpu_shadow_cr4; /* Guest value for CR4 */ 114.28 + unsigned long cpu_shadow_efer; /* Guest value for EFER */ 114.29 unsigned long cpu_cr2; 114.30 unsigned long cpu_cr3; 114.31 - unsigned long cpu_state; 114.32 }; 114.33 114.34 struct vmcb_struct *alloc_vmcb(void);
115.1 --- a/xen/include/public/arch-x86/xen.h Mon Mar 05 12:49:12 2007 -0600 115.2 +++ b/xen/include/public/arch-x86/xen.h Thu Mar 08 14:39:52 2007 -0600 115.3 @@ -132,6 +132,7 @@ struct vcpu_guest_context { 115.4 unsigned long ldt_base, ldt_ents; /* LDT (linear address, # ents) */ 115.5 unsigned long gdt_frames[16], gdt_ents; /* GDT (machine frames, # ents) */ 115.6 unsigned long kernel_ss, kernel_sp; /* Virtual TSS (only SS1/SP1) */ 115.7 + /* NB. User pagetable on x86/64 is placed in ctrlreg[1]. */ 115.8 unsigned long ctrlreg[8]; /* CR0-CR7 (control registers) */ 115.9 unsigned long debugreg[8]; /* DB0-DB7 (debug registers) */ 115.10 #ifdef __i386__
116.1 --- a/xen/include/public/xen.h Mon Mar 05 12:49:12 2007 -0600 116.2 +++ b/xen/include/public/xen.h Thu Mar 08 14:39:52 2007 -0600 116.3 @@ -131,6 +131,7 @@ 116.4 #define VIRQ_TBUF 4 /* G. (DOM0) Trace buffer has records available. */ 116.5 #define VIRQ_DEBUGGER 6 /* G. (DOM0) A domain has paused for debugging. */ 116.6 #define VIRQ_XENOPROF 7 /* V. XenOprofile interrupt: new sample available */ 116.7 +#define VIRQ_CON_RING 8 /* G. (DOM0) Bytes received on console */ 116.8 116.9 /* Architecture-specific VIRQ definitions. */ 116.10 #define VIRQ_ARCH_0 16 116.11 @@ -473,26 +474,24 @@ typedef struct shared_info shared_info_t 116.12 #endif 116.13 116.14 /* 116.15 - * Start-of-day memory layout for the initial domain (DOM0): 116.16 + * Start-of-day memory layout: 116.17 * 1. The domain is started within contiguous virtual-memory region. 116.18 - * 2. The contiguous region begins and ends on an aligned 4MB boundary. 116.19 - * 3. The region start corresponds to the load address of the OS image. 116.20 - * If the load address is not 4MB aligned then the address is rounded down. 116.21 - * 4. This the order of bootstrap elements in the initial virtual region: 116.22 + * 2. The contiguous region ends on an aligned 4MB boundary. 116.23 + * 3. This the order of bootstrap elements in the initial virtual region: 116.24 * a. relocated kernel image 116.25 * b. initial ram disk [mod_start, mod_len] 116.26 * c. list of allocated page frames [mfn_list, nr_pages] 116.27 * d. start_info_t structure [register ESI (x86)] 116.28 * e. bootstrap page tables [pt_base, CR3 (x86)] 116.29 * f. bootstrap stack [register ESP (x86)] 116.30 - * 5. Bootstrap elements are packed together, but each is 4kB-aligned. 116.31 - * 6. The initial ram disk may be omitted. 116.32 - * 7. The list of page frames forms a contiguous 'pseudo-physical' memory 116.33 + * 4. Bootstrap elements are packed together, but each is 4kB-aligned. 116.34 + * 5. The initial ram disk may be omitted. 116.35 + * 6. The list of page frames forms a contiguous 'pseudo-physical' memory 116.36 * layout for the domain. In particular, the bootstrap virtual-memory 116.37 * region is a 1:1 mapping to the first section of the pseudo-physical map. 116.38 - * 8. All bootstrap elements are mapped read-writable for the guest OS. The 116.39 + * 7. All bootstrap elements are mapped read-writable for the guest OS. The 116.40 * only exception is the bootstrap page table, which is mapped read-only. 116.41 - * 9. There is guaranteed to be at least 512kB padding after the final 116.42 + * 8. There is guaranteed to be at least 512kB padding after the final 116.43 * bootstrap element. If necessary, the bootstrap virtual region is 116.44 * extended by an extra 4MB to ensure this. 116.45 */