ia64/xen-unstable

changeset 15470:d146700adf71

merge with xen-unstable.hg
author Alex Williamson <alex.williamson@hp.com>
date Mon Jul 02 12:19:26 2007 -0600 (2007-07-02)
parents 443ce7edad0e 182446677b6b
children 813e945ef5b6
files tools/libxc/xenctrl.h
line diff
     1.1 --- a/buildconfigs/mk.linux-2.6-xen	Mon Jul 02 10:31:03 2007 -0600
     1.2 +++ b/buildconfigs/mk.linux-2.6-xen	Mon Jul 02 12:19:26 2007 -0600
     1.3 @@ -28,6 +28,11 @@ LINUX_VER3  := $(LINUX_SERIES).$(word 3,
     1.4  
     1.5  include buildconfigs/src.$(XEN_LINUX_SOURCE)
     1.6  
     1.7 +# Default to allowing interface mismatch
     1.8 +ifndef XEN_LINUX_ALLOW_INTERFACE_MISMATCH
     1.9 +XEN_LINUX_ALLOW_INTERFACE_MISMATCH := y
    1.10 +endif
    1.11 +
    1.12  # The real action starts here!
    1.13  .PHONY: build
    1.14  build: $(LINUX_DIR)/include/linux/autoconf.h
     2.1 --- a/tools/Makefile	Mon Jul 02 10:31:03 2007 -0600
     2.2 +++ b/tools/Makefile	Mon Jul 02 12:19:26 2007 -0600
     2.3 @@ -43,6 +43,7 @@ install: check
     2.4  	$(MAKE) ioemuinstall
     2.5  	$(INSTALL_DIR) $(DESTDIR)/var/xen/dump
     2.6  	$(INSTALL_DIR) $(DESTDIR)/var/log/xen
     2.7 +	$(INSTALL_DIR) $(DESTDIR)/var/lib/xen
     2.8  
     2.9  .PHONY: clean
    2.10  clean: check_clean
     3.1 --- a/tools/console/daemon/io.c	Mon Jul 02 10:31:03 2007 -0600
     3.2 +++ b/tools/console/daemon/io.c	Mon Jul 02 12:19:26 2007 -0600
     3.3 @@ -764,27 +764,31 @@ void handle_io(void)
     3.4  		/* XXX I wish we didn't have to busy wait for hypervisor logs
     3.5  		 * but there's no obvious way to get event channel notifications
     3.6  		 * for new HV log data as we can with guest */
     3.7 -		ret = select(max_fd + 1, &readfds, &writefds, 0, log_hv_fd != -1 ? &timeout : NULL);
     3.8 +		ret = select(max_fd + 1, &readfds, &writefds, 0,
     3.9 +			     log_hv_fd != -1 ? &timeout : NULL);
    3.10  
    3.11 +		if (log_reload) {
    3.12 +			handle_log_reload();
    3.13 +			log_reload = 0;
    3.14 +		}
    3.15 +
    3.16 +		/* Abort if select failed, except for EINTR cases
    3.17 +		   which indicate a possible log reload */
    3.18  		if (ret == -1) {
    3.19 -			if (errno == EINTR) {
    3.20 -				if (log_reload) {
    3.21 -					handle_log_reload();
    3.22 -					log_reload = 0;
    3.23 -				}
    3.24 +			if (errno == EINTR)
    3.25  				continue;
    3.26 -			}
    3.27  			dolog(LOG_ERR, "Failure in select: %d (%s)",
    3.28  			      errno, strerror(errno));
    3.29  			break;
    3.30  		}
    3.31  
    3.32 -		/* Check for timeout */
    3.33 -		if (ret == 0) {
    3.34 -			if (log_hv_fd != -1)
    3.35 -				handle_hv_logs();
    3.36 +		/* Always process HV logs even if not a timeout */
    3.37 +		if (log_hv_fd != -1)
    3.38 +			handle_hv_logs();
    3.39 +
    3.40 +		/* Must not check returned FDSET if it was a timeout */
    3.41 +		if (ret == 0)
    3.42  			continue;
    3.43 -		}
    3.44  
    3.45  		if (FD_ISSET(xs_fileno(xs), &readfds))
    3.46  			handle_xs();
    3.47 @@ -806,10 +810,14 @@ void handle_io(void)
    3.48  		}
    3.49  	}
    3.50  
    3.51 -	if (log_hv_fd != -1)
    3.52 +	if (log_hv_fd != -1) {
    3.53  		close(log_hv_fd);
    3.54 -	if (xc_handle != -1)
    3.55 +		log_hv_fd = -1;
    3.56 +	}
    3.57 +	if (xc_handle != -1) {
    3.58  		xc_interface_close(xc_handle);
    3.59 +		xc_handle = -1;
    3.60 +	}
    3.61  }
    3.62  
    3.63  /*
     4.1 --- a/tools/examples/init.d/xendomains	Mon Jul 02 10:31:03 2007 -0600
     4.2 +++ b/tools/examples/init.d/xendomains	Mon Jul 02 12:19:26 2007 -0600
     4.3 @@ -221,11 +221,12 @@ start()
     4.4      if [ "$XENDOMAINS_RESTORE" = "true" ] &&
     4.5         contains_something "$XENDOMAINS_SAVE"
     4.6      then
     4.7 -        mkdir -p $(dirname "$LOCKFILE")
     4.8 +	XENDOMAINS_SAVED=`/bin/ls $XENDOMAINS_SAVE/* | grep -v 'lost+found'`
     4.9 +	mkdir -p $(dirname "$LOCKFILE")
    4.10  	touch $LOCKFILE
    4.11  	echo -n "Restoring Xen domains:"
    4.12  	saved_domains=`ls $XENDOMAINS_SAVE`
    4.13 -	for dom in $XENDOMAINS_SAVE/*; do
    4.14 +	for dom in $XENDOMAINS_SAVED; do
    4.15  	    echo -n " ${dom##*/}"
    4.16  	    xm restore $dom
    4.17  	    if [ $? -ne 0 ]; then
    4.18 @@ -259,6 +260,7 @@ start()
    4.19  	    if [ $? -eq 0 ] || is_running $dom; then
    4.20  		echo -n "(skip)"
    4.21  	    else
    4.22 +		echo "(booting)"
    4.23  		xm create --quiet --defconfig $dom
    4.24  		if [ $? -ne 0 ]; then
    4.25  		    rc_failed $?
     5.1 --- a/tools/examples/network-bridge	Mon Jul 02 10:31:03 2007 -0600
     5.2 +++ b/tools/examples/network-bridge	Mon Jul 02 12:19:26 2007 -0600
     5.3 @@ -172,11 +172,23 @@ show_status () {
     5.4      echo '============================================================'
     5.5  }
     5.6  
     5.7 +is_network_root () {
     5.8 +    local rootfs=$(awk '{ if ($1 !~ /^[ \t]*#/ && $2 == "/") { print $3; }}' /etc/mtab)
     5.9 +    local rootopts=$(awk '{ if ($1 !~ /^[ \t]*#/ && $2 == "/") { print $4; }}' /etc/mtab)
    5.10 +
    5.11 +    [[ "$rootfs" =~ "^nfs" ]] || [[ "$rootopts" =~ "_netdev" ]] && return 0 || return 1
    5.12 +}
    5.13 +
    5.14  op_start () {
    5.15      if [ "${bridge}" = "null" ] ; then
    5.16  	return
    5.17      fi
    5.18  
    5.19 +    if is_network_root ; then
    5.20 +        [ -x /usr/bin/logger ] && /usr/bin/logger "network-bridge: bridging not supported on network root; not starting"
    5.21 +        return
    5.22 +    fi
    5.23 +
    5.24      if link_exists "$pdev"; then
    5.25          # The device is already up.
    5.26          return
     6.1 --- a/tools/firmware/vmxassist/head.S	Mon Jul 02 10:31:03 2007 -0600
     6.2 +++ b/tools/firmware/vmxassist/head.S	Mon Jul 02 12:19:26 2007 -0600
     6.3 @@ -25,81 +25,13 @@
     6.4   * switch happens to the environment below. The magic indicates
     6.5   * that this is a valid context.
     6.6   */
     6.7 -#ifdef TEST
     6.8 -	.byte 0x55, 0xaa
     6.9 -	.byte 0x80
    6.10 -	.code16
    6.11 -	jmp	_start16
    6.12 -#else
    6.13  	jmp	_start
    6.14 -#endif
    6.15  
    6.16  	.align	8
    6.17  	.long	VMXASSIST_MAGIC
    6.18  	.long	newctx			/* new context */
    6.19  	.long	oldctx			/* old context */
    6.20  
    6.21 -#ifdef TEST
    6.22 -/*
    6.23 - * We are running in 16-bit. Get into the protected mode as soon as
    6.24 - * possible. We use our own (minimal) GDT to get started.
    6.25 - *
    6.26 - * ROM is a misnomer as this code isn't really rommable (although it
    6.27 - * only requires a few changes) but it does live in a BIOS ROM segment.
    6.28 - * This code allows me to debug vmxassists under (a modified version of)
    6.29 - * Bochs and load it as a "optromimage1".
    6.30 - */
    6.31 -	.code16
    6.32 -	.globl	_start16
    6.33 -_start16:
    6.34 -        cli
    6.35 -
    6.36 -        /* load our own global descriptor table */
    6.37 -        data32 addr32 lgdt %cs:(rom_gdtr - TEXTADDR)
    6.38 -
    6.39 -        /* go to protected mode */
    6.40 -        movl    %cr0, %eax
    6.41 -        orl     $(CR0_PE), %eax
    6.42 -        movl    %eax, %cr0
    6.43 -        data32  ljmp $0x08, $1f
    6.44 -
    6.45 -        .align  32
    6.46 -        .globl  rom_gdt
    6.47 -rom_gdt:
    6.48 -        .word   0, 0            /* 0x00: reserved */
    6.49 -        .byte   0, 0, 0, 0
    6.50 -
    6.51 -        .word   0xFFFF, 0       /* 0x08: CS 32-bit */
    6.52 -        .byte   0, 0x9A, 0xCF, 0
    6.53 -
    6.54 -        .word   0xFFFF, 0       /* 0x10: CS 32-bit */
    6.55 -        .byte   0, 0x92, 0xCF, 0
    6.56 -rom_gdt_end:
    6.57 -
    6.58 -        .align  4
    6.59 -        .globl  rom_gdtr
    6.60 -rom_gdtr:
    6.61 -        .word   rom_gdt_end - rom_gdt - 1
    6.62 -        .long   rom_gdt
    6.63 -
    6.64 -        .code32
    6.65 -1:
    6.66 -        /* welcome to the 32-bit world */
    6.67 -        movw    $0x10, %ax
    6.68 -        movw    %ax, %ds
    6.69 -        movw    %ax, %es
    6.70 -        movw    %ax, %ss
    6.71 -        movw    %ax, %fs
    6.72 -        movw    %ax, %gs
    6.73 -
    6.74 -        /* enable Bochs debug facilities */
    6.75 -        movw    $0x8A00, %dx
    6.76 -        movw    $0x8A00, %ax
    6.77 -        outw    %ax, (%dx)
    6.78 -
    6.79 -	jmp	_start
    6.80 -#endif /* TEST */
    6.81 -
    6.82  /*
    6.83   * This is the real start. Control was transfered to this point
    6.84   * with CR0_PE set and executing in some 32-bit segment. We call
    6.85 @@ -111,9 +43,6 @@ 1:
    6.86  	cli
    6.87  
    6.88  	/* save register parameters to C land */
    6.89 -#ifdef TEST
    6.90 -	xorl	%edx, %edx
    6.91 -#endif
    6.92  
    6.93  	/* clear bss */
    6.94  	cld
    6.95 @@ -145,11 +74,6 @@ 1:
    6.96  halt:
    6.97  	push	$halt_msg
    6.98  	call	printf
    6.99 -#ifdef TEST
   6.100 -        movw    $0x8A00, %dx
   6.101 -        movw    $0x8AE0, %ax
   6.102 -        outw    %ax, (%dx)
   6.103 -#endif
   6.104  	cli
   6.105  	jmp	.
   6.106  
     7.1 --- a/tools/firmware/vmxassist/machine.h	Mon Jul 02 10:31:03 2007 -0600
     7.2 +++ b/tools/firmware/vmxassist/machine.h	Mon Jul 02 12:19:26 2007 -0600
     7.3 @@ -56,13 +56,6 @@
     7.4  #define	LPGSIZE		(1 << LOG_PDSIZE)	/* large page size */
     7.5  #define	LPGMASK		(~(LPGSIZE - 1))	/* large page mask */
     7.6  
     7.7 -#ifdef TEST
     7.8 -#define	PTE_P		(1 << 0)	/* Present */
     7.9 -#define	PTE_RW		(1 << 1)	/* Read/Write */
    7.10 -#define	PTE_US		(1 << 2)	/* User/Supervisor */
    7.11 -#define	PTE_PS		(1 << 7)	/* Page Size */
    7.12 -#endif
    7.13 -
    7.14  /* Programmable Interrupt Contoller (PIC) defines */
    7.15  #define	PIC_MASTER	0x20
    7.16  #define	PIC_SLAVE	0xA0
    7.17 @@ -195,14 +188,6 @@ set_cr4(unsigned value)
    7.18  	__asm__ __volatile__("movl %0, %%cr4" : /* no outputs */ : "r"(value));
    7.19  }
    7.20  
    7.21 -#ifdef TEST
    7.22 -static inline void
    7.23 -breakpoint(void)
    7.24 -{
    7.25 -	outw(0x8A00, 0x8AE0);
    7.26 -}
    7.27 -#endif /* TEST */
    7.28 -
    7.29  #endif /* __ASSEMBLY__ */
    7.30  
    7.31  #endif /* __MACHINE_H__ */
     8.1 --- a/tools/firmware/vmxassist/setup.c	Mon Jul 02 10:31:03 2007 -0600
     8.2 +++ b/tools/firmware/vmxassist/setup.c	Mon Jul 02 12:19:26 2007 -0600
     8.3 @@ -47,19 +47,6 @@ unsigned long long idt[NR_TRAPS] __attri
     8.4  
     8.5  struct dtr idtr = { sizeof(idt)-1, (unsigned long) &idt };
     8.6  
     8.7 -#ifdef TEST
     8.8 -unsigned pgd[NR_PGD] __attribute__ ((aligned(PGSIZE))) = { 0 };
     8.9 -
    8.10 -struct e820entry e820map[] = {
    8.11 -	{ 0x0000000000000000ULL, 0x000000000009F800ULL, E820_RAM },
    8.12 -	{ 0x000000000009F800ULL, 0x0000000000000800ULL, E820_RESERVED },
    8.13 -	{ 0x00000000000C0000ULL, 0x0000000000040000ULL, E820_RESERVED },
    8.14 -	{ 0x0000000000100000ULL, 0x0000000000000000ULL, E820_RAM },
    8.15 -	{ 0x0000000000000000ULL, 0x0000000000003000ULL, E820_NVS },
    8.16 -	{ 0x0000000000003000ULL, 0x000000000000A000ULL, E820_ACPI },
    8.17 -};
    8.18 -#endif /* TEST */
    8.19 -
    8.20  struct vmx_assist_context oldctx;
    8.21  struct vmx_assist_context newctx;
    8.22  
    8.23 @@ -84,39 +71,12 @@ banner(void)
    8.24  		    (((get_cmos(0x31) << 8) | get_cmos(0x30)) + 0x400) << 10;
    8.25  	memory_size += 0x400 << 10; /* + 1MB */
    8.26  
    8.27 -#ifdef TEST
    8.28 -	/* Create an SMAP for our debug environment */
    8.29 -	e820map[4].size = memory_size - e820map[4].addr - PGSIZE;
    8.30 -	e820map[5].addr = memory_size - PGSIZE;
    8.31 -	e820map[6].addr = memory_size;
    8.32 -	e820map[7].addr += memory_size;
    8.33 -
    8.34 -	*HVM_E820_NR = sizeof(e820map)/sizeof(e820map[0]);
    8.35 -	memcpy(HVM_E820, e820map, sizeof(e820map));
    8.36 -#endif
    8.37 -
    8.38  	printf("Memory size %ld MB\n", memory_size >> 20);
    8.39  	printf("E820 map:\n");
    8.40  	print_e820_map(HVM_E820, *HVM_E820_NR);
    8.41  	printf("\n");
    8.42  }
    8.43  
    8.44 -#ifdef TEST
    8.45 -void
    8.46 -setup_paging(void)
    8.47 -{
    8.48 -	unsigned long i;
    8.49 -
    8.50 -	if (((unsigned)pgd & ~PGMASK) != 0)
    8.51 -		panic("PGD not page aligned");
    8.52 -	set_cr4(get_cr4() | CR4_PSE);
    8.53 -	for (i = 0; i < NR_PGD; i++)
    8.54 -		pgd[i] = (i * LPGSIZE)| PTE_PS | PTE_US | PTE_RW | PTE_P;
    8.55 -	set_cr3((unsigned) pgd);
    8.56 -	set_cr0(get_cr0() | (CR0_PE|CR0_PG));
    8.57 -}
    8.58 -#endif /* TEST */
    8.59 -
    8.60  void
    8.61  setup_gdt(void)
    8.62  {
    8.63 @@ -211,11 +171,7 @@ enter_real_mode(struct regs *regs)
    8.64  		regs->ves = regs->vds = regs->vfs = regs->vgs = 0xF000;
    8.65  		if (booting_cpu == 0) {
    8.66  			regs->cs = 0xF000; /* ROM BIOS POST entry point */
    8.67 -#ifdef TEST
    8.68 -			regs->eip = 0xFFE0;
    8.69 -#else
    8.70  			regs->eip = 0xFFF0;
    8.71 -#endif
    8.72  		} else {
    8.73  			regs->cs = booting_vector << 8; /* AP entry point */
    8.74  			regs->eip = 0;
    8.75 @@ -242,9 +198,10 @@ enter_real_mode(struct regs *regs)
    8.76  	}
    8.77  
    8.78  	/* go from protected to real mode */
    8.79 -	regs->eflags |= EFLAGS_VM;
    8.80  	set_mode(regs, VM86_PROTECTED_TO_REAL);
    8.81  	emulate(regs);
    8.82 +	if (mode != VM86_REAL)
    8.83 +		panic("failed to emulate between clear PE and long jump.\n");
    8.84  }
    8.85  
    8.86  /*
    8.87 @@ -269,13 +226,8 @@ setup_ctx(void)
    8.88  	 * more natural to enable CR0.PE to cause a world switch to
    8.89  	 * protected mode rather than disabling it.
    8.90  	 */
    8.91 -#ifdef TEST
    8.92 -	c->cr0 = (get_cr0() | CR0_NE | CR0_PG) & ~CR0_PE;
    8.93 -	c->cr3 = (unsigned long) pgd;
    8.94 -#else
    8.95  	c->cr0 = (get_cr0() | CR0_NE) & ~CR0_PE;
    8.96  	c->cr3 = 0;
    8.97 -#endif
    8.98  	c->cr4 = get_cr4();
    8.99  
   8.100  	c->idtr_limit = sizeof(idt)-1;
   8.101 @@ -369,16 +321,10 @@ main(void)
   8.102  	if (booting_cpu == 0)
   8.103  		banner();
   8.104  
   8.105 -#ifdef TEST
   8.106 -	setup_paging();
   8.107 -#endif
   8.108 -
   8.109  	setup_gdt();
   8.110  	setup_idt();
   8.111  
   8.112 -#ifndef	TEST
   8.113  	set_cr4(get_cr4() | CR4_VME);
   8.114 -#endif
   8.115  
   8.116  	setup_ctx();
   8.117  
     9.1 --- a/tools/firmware/vmxassist/vm86.c	Mon Jul 02 10:31:03 2007 -0600
     9.2 +++ b/tools/firmware/vmxassist/vm86.c	Mon Jul 02 12:19:26 2007 -0600
     9.3 @@ -561,11 +561,7 @@ lmsw(struct regs *regs, unsigned prefix,
     9.4  	unsigned cr0 = (oldctx.cr0 & 0xFFFFFFF0) | ax;
     9.5  
     9.6  	TRACE((regs, regs->eip - eip, "lmsw 0x%x", ax));
     9.7 -#ifndef TEST
     9.8  	oldctx.cr0 = cr0 | CR0_PE | CR0_NE;
     9.9 -#else
    9.10 -	oldctx.cr0 = cr0 | CR0_PE | CR0_NE | CR0_PG;
    9.11 -#endif
    9.12  	if (cr0 & CR0_PE)
    9.13  		set_mode(regs, VM86_REAL_TO_PROTECTED);
    9.14  
    9.15 @@ -584,8 +580,13 @@ movr(struct regs *regs, unsigned prefix,
    9.16  	unsigned addr = operand(prefix, regs, modrm);
    9.17  	unsigned val, r = (modrm >> 3) & 7;
    9.18  
    9.19 -	if ((modrm & 0xC0) == 0xC0) /* no registers */
    9.20 -		return 0;
    9.21 +	if ((modrm & 0xC0) == 0xC0) {
    9.22 +		/*
    9.23 +		 * Emulate all guest instructions in protected to real mode.
    9.24 +		 */
    9.25 +		if (mode != VM86_PROTECTED_TO_REAL)
    9.26 +			return 0;
    9.27 +	}
    9.28  
    9.29  	switch (opc) {
    9.30  	case 0x88: /* addr32 mov r8, r/m8 */
    9.31 @@ -656,13 +657,8 @@ movcr(struct regs *regs, unsigned prefix
    9.32  		TRACE((regs, regs->eip - eip, "movl %%cr%d, %%eax", cr));
    9.33  		switch (cr) {
    9.34  		case 0:
    9.35 -#ifndef TEST
    9.36  			setreg32(regs, modrm,
    9.37  				oldctx.cr0 & ~(CR0_PE | CR0_NE));
    9.38 -#else
    9.39 -			setreg32(regs, modrm,
    9.40 -				oldctx.cr0 & ~(CR0_PE | CR0_NE | CR0_PG));
    9.41 -#endif
    9.42  			break;
    9.43  		case 2:
    9.44  			setreg32(regs, modrm, get_cr2());
    9.45 @@ -680,9 +676,6 @@ movcr(struct regs *regs, unsigned prefix
    9.46  		switch (cr) {
    9.47  		case 0:
    9.48  			oldctx.cr0 = getreg32(regs, modrm) | (CR0_PE | CR0_NE);
    9.49 -#ifdef TEST
    9.50 -			oldctx.cr0 |= CR0_PG;
    9.51 -#endif
    9.52  			if (getreg32(regs, modrm) & CR0_PE)
    9.53  				set_mode(regs, VM86_REAL_TO_PROTECTED);
    9.54  			else
    9.55 @@ -818,8 +811,13 @@ mov_to_seg(struct regs *regs, unsigned p
    9.56  {
    9.57  	unsigned modrm = fetch8(regs);
    9.58  
    9.59 -	/* Only need to emulate segment loads in real->protected mode. */
    9.60 -	if (mode != VM86_REAL_TO_PROTECTED)
    9.61 +	/*
    9.62 +	 * Emulate segment loads in:
    9.63 +	 * 1) real->protected mode.
    9.64 +	 * 2) protected->real mode.
    9.65 +	 */
    9.66 +	if ((mode != VM86_REAL_TO_PROTECTED) &&
    9.67 +	    (mode != VM86_PROTECTED_TO_REAL))
    9.68  		return 0;
    9.69  
    9.70  	/* Register source only. */
    9.71 @@ -829,6 +827,8 @@ mov_to_seg(struct regs *regs, unsigned p
    9.72  	switch ((modrm & 0x38) >> 3) {
    9.73  	case 0: /* es */
    9.74  		regs->ves = getreg16(regs, modrm);
    9.75 +		if (mode == VM86_PROTECTED_TO_REAL)
    9.76 +			return 1;
    9.77  		saved_rm_regs.ves = 0;
    9.78  		oldctx.es_sel = regs->ves;
    9.79  		return 1;
    9.80 @@ -837,21 +837,29 @@ mov_to_seg(struct regs *regs, unsigned p
    9.81  
    9.82  	case 2: /* ss */
    9.83  		regs->uss = getreg16(regs, modrm);
    9.84 +		if (mode == VM86_PROTECTED_TO_REAL)
    9.85 +			return 1;
    9.86  		saved_rm_regs.uss = 0;
    9.87  		oldctx.ss_sel = regs->uss;
    9.88  		return 1;
    9.89  	case 3: /* ds */
    9.90  		regs->vds = getreg16(regs, modrm);
    9.91 +		if (mode == VM86_PROTECTED_TO_REAL)
    9.92 +			return 1;
    9.93  		saved_rm_regs.vds = 0;
    9.94  		oldctx.ds_sel = regs->vds;
    9.95  		return 1;
    9.96  	case 4: /* fs */
    9.97  		regs->vfs = getreg16(regs, modrm);
    9.98 +		if (mode == VM86_PROTECTED_TO_REAL)
    9.99 +			return 1;
   9.100  		saved_rm_regs.vfs = 0;
   9.101  		oldctx.fs_sel = regs->vfs;
   9.102  		return 1;
   9.103  	case 5: /* gs */
   9.104  		regs->vgs = getreg16(regs, modrm);
   9.105 +		if (mode == VM86_PROTECTED_TO_REAL)
   9.106 +			return 1;
   9.107  		saved_rm_regs.vgs = 0;
   9.108  		oldctx.gs_sel = regs->vgs;
   9.109  		return 1;
   9.110 @@ -1067,7 +1075,8 @@ set_mode(struct regs *regs, enum vm86_mo
   9.111  	}
   9.112  
   9.113  	mode = newmode;
   9.114 -	TRACE((regs, 0, states[mode]));
   9.115 +	if (mode != VM86_PROTECTED)
   9.116 +		TRACE((regs, 0, states[mode]));
   9.117  }
   9.118  
   9.119  static void
   9.120 @@ -1086,7 +1095,7 @@ jmpl(struct regs *regs, int prefix)
   9.121  
   9.122  	if (mode == VM86_REAL_TO_PROTECTED)		/* jump to protected mode */
   9.123  		set_mode(regs, VM86_PROTECTED);
   9.124 -	else if (mode == VM86_PROTECTED_TO_REAL)/* jump to real mode */
   9.125 +	else if (mode == VM86_PROTECTED_TO_REAL)	/* jump to real mode */
   9.126  		set_mode(regs, VM86_REAL);
   9.127  	else
   9.128  		panic("jmpl");
   9.129 @@ -1281,6 +1290,12 @@ opcode(struct regs *regs)
   9.130  	unsigned opc, modrm, disp;
   9.131  	unsigned prefix = 0;
   9.132  
   9.133 +	if (mode == VM86_PROTECTED_TO_REAL &&
   9.134 +		oldctx.cs_arbytes.fields.default_ops_size) {
   9.135 +		prefix |= DATA32;
   9.136 +		prefix |= ADDR32;
   9.137 +	}
   9.138 +
   9.139  	for (;;) {
   9.140  		switch ((opc = fetch8(regs))) {
   9.141  		case 0x07: /* pop %es */
   9.142 @@ -1391,17 +1406,29 @@ opcode(struct regs *regs)
   9.143  			continue;
   9.144  
   9.145  		case 0x66:
   9.146 -			TRACE((regs, regs->eip - eip, "data32"));
   9.147 -			prefix |= DATA32;
   9.148 +			if (mode == VM86_PROTECTED_TO_REAL &&
   9.149 +				oldctx.cs_arbytes.fields.default_ops_size) {
   9.150 +				TRACE((regs, regs->eip - eip, "data16"));
   9.151 +				prefix &= ~DATA32;
   9.152 +			} else {
   9.153 +				TRACE((regs, regs->eip - eip, "data32"));
   9.154 +				prefix |= DATA32;
   9.155 +			}
   9.156  			continue;
   9.157  
   9.158  		case 0x67:
   9.159 -			TRACE((regs, regs->eip - eip, "addr32"));
   9.160 -			prefix |= ADDR32;
   9.161 +			if (mode == VM86_PROTECTED_TO_REAL &&
   9.162 +				oldctx.cs_arbytes.fields.default_ops_size) {
   9.163 +				TRACE((regs, regs->eip - eip, "addr16"));
   9.164 +				prefix &= ~ADDR32;
   9.165 +			} else {
   9.166 +				TRACE((regs, regs->eip - eip, "addr32"));
   9.167 +				prefix |= ADDR32;
   9.168 +			}
   9.169  			continue;
   9.170  
   9.171 -		case 0x88: /* addr32 mov r8, r/m8 */
   9.172 -		case 0x8A: /* addr32 mov r/m8, r8 */
   9.173 +		case 0x88: /* mov r8, r/m8 */
   9.174 +		case 0x8A: /* mov r/m8, r8 */
   9.175  			if (mode != VM86_REAL && mode != VM86_REAL_TO_PROTECTED)
   9.176  				goto invalid;
   9.177  			if ((prefix & ADDR32) == 0)
    10.1 --- a/tools/ioemu/target-i386-dm/exec-dm.c	Mon Jul 02 10:31:03 2007 -0600
    10.2 +++ b/tools/ioemu/target-i386-dm/exec-dm.c	Mon Jul 02 12:19:26 2007 -0600
    10.3 @@ -448,7 +448,7 @@ static void memcpy_words(void *dst, void
    10.4  {
    10.5      asm (
    10.6          "   movl %%edx,%%ecx \n"
    10.7 -#ifdef __x86_64
    10.8 +#ifdef __x86_64__
    10.9          "   shrl $3,%%ecx    \n"
   10.10          "   andl $7,%%edx    \n"
   10.11          "   rep  movsq       \n"
    11.1 --- a/tools/libxc/xc_domain_restore.c	Mon Jul 02 10:31:03 2007 -0600
    11.2 +++ b/tools/libxc/xc_domain_restore.c	Mon Jul 02 12:19:26 2007 -0600
    11.3 @@ -465,7 +465,7 @@ int xc_domain_restore(int xc_handle, int
    11.4          if ( j == 0 )
    11.5              break;  /* our work here is done */
    11.6  
    11.7 -        if ( j > MAX_BATCH_SIZE )
    11.8 +        if ( (j > MAX_BATCH_SIZE) || (j < 0) )
    11.9          {
   11.10              ERROR("Max batch size exceeded. Giving up.");
   11.11              goto out;
   11.12 @@ -903,13 +903,14 @@ int xc_domain_restore(int xc_handle, int
   11.13  
   11.14      /* Get the list of PFNs that are not in the psuedo-phys map */
   11.15      {
   11.16 -        unsigned int count;
   11.17 +        unsigned int count = 0;
   11.18          unsigned long *pfntab;
   11.19          int nr_frees, rc;
   11.20  
   11.21 -        if ( !read_exact(io_fd, &count, sizeof(count)) )
   11.22 +        if ( !read_exact(io_fd, &count, sizeof(count)) ||
   11.23 +             (count > (1U << 28)) ) /* up to 1TB of address space */
   11.24          {
   11.25 -            ERROR("Error when reading pfn count");
   11.26 +            ERROR("Error when reading pfn count (= %u)", count);
   11.27              goto out;
   11.28          }
   11.29  
    12.1 --- a/tools/libxc/xc_misc.c	Mon Jul 02 10:31:03 2007 -0600
    12.2 +++ b/tools/libxc/xc_misc.c	Mon Jul 02 12:19:26 2007 -0600
    12.3 @@ -101,14 +101,38 @@ int xc_perfc_control(int xc_handle,
    12.4  
    12.5      rc = do_sysctl(xc_handle, &sysctl);
    12.6  
    12.7 -    if (nbr_desc)
    12.8 +    if ( nbr_desc )
    12.9          *nbr_desc = sysctl.u.perfc_op.nr_counters;
   12.10 -    if (nbr_val)
   12.11 +    if ( nbr_val )
   12.12          *nbr_val = sysctl.u.perfc_op.nr_vals;
   12.13  
   12.14      return rc;
   12.15  }
   12.16  
   12.17 +int xc_getcpuinfo(int xc_handle, int max_cpus,
   12.18 +                  xc_cpuinfo_t *info, int *nr_cpus)
   12.19 +{
   12.20 +    int rc;
   12.21 +    DECLARE_SYSCTL;
   12.22 +
   12.23 +    sysctl.cmd = XEN_SYSCTL_getcpuinfo;
   12.24 +    sysctl.u.getcpuinfo.max_cpus = max_cpus; 
   12.25 +    set_xen_guest_handle(sysctl.u.getcpuinfo.info, info); 
   12.26 +
   12.27 +    if ( (rc = lock_pages(info, max_cpus*sizeof(*info))) != 0 )
   12.28 +        return rc;
   12.29 +
   12.30 +    rc = do_sysctl(xc_handle, &sysctl);
   12.31 +
   12.32 +    unlock_pages(info, max_cpus*sizeof(*info));
   12.33 +
   12.34 +    if ( nr_cpus )
   12.35 +        *nr_cpus = sysctl.u.getcpuinfo.nr_cpus; 
   12.36 +
   12.37 +    return rc;
   12.38 +}
   12.39 +
   12.40 +
   12.41  int xc_hvm_set_pci_intx_level(
   12.42      int xc_handle, domid_t dom,
   12.43      uint8_t domain, uint8_t bus, uint8_t device, uint8_t intx,
    13.1 --- a/tools/libxc/xenctrl.h	Mon Jul 02 10:31:03 2007 -0600
    13.2 +++ b/tools/libxc/xenctrl.h	Mon Jul 02 12:19:26 2007 -0600
    13.3 @@ -491,6 +491,10 @@ int xc_physinfo(int xc_handle,
    13.4  int xc_sched_id(int xc_handle,
    13.5                  int *sched_id);
    13.6  
    13.7 +typedef xen_sysctl_cpuinfo_t xc_cpuinfo_t;
    13.8 +int xc_getcpuinfo(int xc_handle, int max_cpus,
    13.9 +                  xc_cpuinfo_t *info, int *nr_cpus); 
   13.10 +
   13.11  int xc_domain_setmaxmem(int xc_handle,
   13.12                          uint32_t domid,
   13.13                          unsigned int max_memkb);
    14.1 --- a/tools/misc/xen-python-path	Mon Jul 02 10:31:03 2007 -0600
    14.2 +++ b/tools/misc/xen-python-path	Mon Jul 02 12:19:26 2007 -0600
    14.3 @@ -28,8 +28,13 @@ import os
    14.4  import os.path
    14.5  import sys
    14.6  
    14.7 -for p in ['python%s' % sys.version[:3], 'python']:
    14.8 -    for l in ['/usr/lib64', '/usr/lib']:
    14.9 +usr   = os.path.dirname(os.path.dirname(sys.argv[0]))
   14.10 +list  = [ os.path.join(usr,'lib64') ]
   14.11 +list += [ os.path.join(usr,'lib') ]
   14.12 +list += ['/usr/lib64', '/usr/lib']
   14.13 +
   14.14 +for l in list:
   14.15 +    for p in ['python%s' % sys.version[:3], 'python']:
   14.16          for k in ['', 'site-packages/']:
   14.17              d = os.path.join(l, p, k)
   14.18              if os.path.exists(os.path.join(d, AUXBIN)):
    15.1 --- a/tools/python/xen/util/auxbin.py	Mon Jul 02 10:31:03 2007 -0600
    15.2 +++ b/tools/python/xen/util/auxbin.py	Mon Jul 02 12:19:26 2007 -0600
    15.3 @@ -27,6 +27,7 @@ LIB_64_ARCHS = [ 'x86_64', 's390x', 'spa
    15.4  
    15.5  import os
    15.6  import os.path
    15.7 +import sys
    15.8  
    15.9  
   15.10  def execute(exe, args = None):
   15.11 @@ -47,6 +48,14 @@ def path():
   15.12  
   15.13  def libpath():
   15.14      machine = os.uname()[4]
   15.15 +    if sys.argv[0] != '-c':
   15.16 +        prefix = os.path.dirname(os.path.dirname(sys.argv[0]))
   15.17 +        path = os.path.join(prefix, os.path.basename(LIB_64))
   15.18 +        if machine in LIB_64_ARCHS and os.path.exists(path):
   15.19 +            return path
   15.20 +        path = os.path.join(prefix, os.path.basename(LIB_32))
   15.21 +        if os.path.exists(path):
   15.22 +            return path
   15.23      if machine in LIB_64_ARCHS and os.path.exists(LIB_64):
   15.24          return LIB_64
   15.25      else:
    16.1 --- a/tools/python/xen/xend/XendCheckpoint.py	Mon Jul 02 10:31:03 2007 -0600
    16.2 +++ b/tools/python/xen/xend/XendCheckpoint.py	Mon Jul 02 12:19:26 2007 -0600
    16.3 @@ -148,6 +148,8 @@ def save(fd, dominfo, network, live, dst
    16.4          except:
    16.5              log.exception("Failed to reset the migrating domain's name")
    16.6  
    16.7 +        raise exn
    16.8 +
    16.9  
   16.10  def restore(xd, fd, dominfo = None, paused = False):
   16.11      signature = read_exact(fd, len(SIGNATURE),
    17.1 --- a/tools/python/xen/xend/XendConfig.py	Mon Jul 02 10:31:03 2007 -0600
    17.2 +++ b/tools/python/xen/xend/XendConfig.py	Mon Jul 02 12:19:26 2007 -0600
    17.3 @@ -28,6 +28,7 @@ from xen.xend.XendDevices import XendDev
    17.4  from xen.xend.PrettyPrint import prettyprintstring
    17.5  from xen.xend.XendConstants import DOM_STATE_HALTED
    17.6  from xen.xend.server.netif import randomMAC
    17.7 +from xen.util.blkif import blkdev_name_to_number
    17.8  
    17.9  log = logging.getLogger("xend.XendConfig")
   17.10  log.setLevel(logging.WARN)
   17.11 @@ -934,6 +935,62 @@ class XendConfig(dict):
   17.12  
   17.13          return sxpr    
   17.14      
   17.15 +    def _blkdev_name_to_number(self, dev):
   17.16 +        if 'ioemu:' in dev:
   17.17 +            _, dev = dev.split(':', 1)
   17.18 +        try:
   17.19 +            dev, _ = dev.split(':', 1)
   17.20 +        except ValueError:
   17.21 +            pass
   17.22 +        
   17.23 +        try:
   17.24 +            devid = int(dev)
   17.25 +        except ValueError:
   17.26 +            # devid is not a number but a string containing either device
   17.27 +            # name (e.g. xvda) or device_type/device_id (e.g. vbd/51728)
   17.28 +            dev2 = type(dev) is str and dev.split('/')[-1] or None
   17.29 +            if dev2 == None:
   17.30 +                log.debug("Could not check the device %s", dev)
   17.31 +                return None
   17.32 +            try:
   17.33 +                devid = int(dev2)
   17.34 +            except ValueError:
   17.35 +                devid = blkdev_name_to_number(dev2)
   17.36 +                if devid == None:
   17.37 +                    log.debug("The device %s is not device name", dev2)
   17.38 +                    return None
   17.39 +        return devid
   17.40 +    
   17.41 +    def device_duplicate_check(self, dev_type, dev_info, defined_config):
   17.42 +        defined_devices_sxpr = self.all_devices_sxpr(target = defined_config)
   17.43 +        
   17.44 +        if dev_type == 'vbd':
   17.45 +            dev_uname = dev_info.get('uname')
   17.46 +            blkdev_name = dev_info.get('dev')
   17.47 +            devid = self._blkdev_name_to_number(blkdev_name)
   17.48 +            if devid == None:
   17.49 +                return
   17.50 +            
   17.51 +            for o_dev_type, o_dev_info in defined_devices_sxpr:
   17.52 +                if dev_type == o_dev_type:
   17.53 +                    if dev_uname == sxp.child_value(o_dev_info, 'uname'):
   17.54 +                        raise XendConfigError('The uname "%s" is already defined' %
   17.55 +                                              dev_uname)
   17.56 +                    o_blkdev_name = sxp.child_value(o_dev_info, 'dev')
   17.57 +                    o_devid = self._blkdev_name_to_number(o_blkdev_name)
   17.58 +                    if o_devid != None and devid == o_devid:
   17.59 +                        raise XendConfigError('The device "%s" is already defined' %
   17.60 +                                              blkdev_name)
   17.61 +                    
   17.62 +        elif dev_type == 'vif':
   17.63 +            dev_mac = dev_info.get('mac')
   17.64 +            
   17.65 +            for o_dev_type, o_dev_info in defined_devices_sxpr:
   17.66 +                if dev_type == o_dev_type:
   17.67 +                    if dev_mac == sxp.child_value(o_dev_info, 'mac'):
   17.68 +                        raise XendConfigError('The mac "%s" is already defined' %
   17.69 +                                              dev_mac)
   17.70 +    
   17.71      def device_add(self, dev_type, cfg_sxp = None, cfg_xenapi = None,
   17.72                     target = None):
   17.73          """Add a device configuration in SXP format or XenAPI struct format.
   17.74 @@ -998,6 +1055,8 @@ class XendConfig(dict):
   17.75                  if not dev_info.get('mac'):
   17.76                      dev_info['mac'] = randomMAC()
   17.77  
   17.78 +            self.device_duplicate_check(dev_type, dev_info, target)
   17.79 +
   17.80              # create uuid if it doesn't exist
   17.81              dev_uuid = dev_info.get('uuid', None)
   17.82              if not dev_uuid:
   17.83 @@ -1275,15 +1334,19 @@ class XendConfig(dict):
   17.84          return False
   17.85  
   17.86  
   17.87 -    def device_sxpr(self, dev_uuid = None, dev_type = None, dev_info = None):
   17.88 +    def device_sxpr(self, dev_uuid = None, dev_type = None, dev_info = None, target = None):
   17.89          """Get Device SXPR by either giving the device UUID or (type, config).
   17.90  
   17.91          @rtype: list of lists
   17.92          @return: device config sxpr
   17.93          """
   17.94          sxpr = []
   17.95 -        if dev_uuid != None and dev_uuid in self['devices']:
   17.96 -            dev_type, dev_info = self['devices'][dev_uuid]
   17.97 +
   17.98 +        if target == None:
   17.99 +            target = self
  17.100 +
  17.101 +        if dev_uuid != None and dev_uuid in target['devices']:
  17.102 +            dev_type, dev_info = target['devices'][dev_uuid]
  17.103  
  17.104          if dev_type == None or dev_info == None:
  17.105              raise XendConfigError("Required either UUID or device type and "
  17.106 @@ -1300,8 +1363,12 @@ class XendConfig(dict):
  17.107  
  17.108          return sxpr
  17.109  
  17.110 -    def ordered_device_refs(self):
  17.111 +    def ordered_device_refs(self, target = None):
  17.112          result = []
  17.113 +
  17.114 +        if target == None:
  17.115 +            target = self
  17.116 +
  17.117          # vkbd devices *must* be before vfb devices, otherwise
  17.118          # there is a race condition when setting up devices
  17.119          # where the daemon spawned for the vfb may write stuff
  17.120 @@ -1309,27 +1376,30 @@ class XendConfig(dict):
  17.121          # setup permissions on the vkbd backend path. This race
  17.122          # results in domain creation failing with 'device already
  17.123          # connected' messages
  17.124 -        result.extend([u for u in self['devices'].keys() if self['devices'][u][0] == 'vkbd'])
  17.125 +        result.extend([u for u in target['devices'].keys() if target['devices'][u][0] == 'vkbd'])
  17.126  
  17.127 -        result.extend(self['console_refs'] +
  17.128 -                      self['vbd_refs'] +
  17.129 -                      self['vif_refs'] +
  17.130 -                      self['vtpm_refs'])
  17.131 +        result.extend(target.get('console_refs', []) +
  17.132 +                      target.get('vbd_refs', []) +
  17.133 +                      target.get('vif_refs', []) +
  17.134 +                      target.get('vtpm_refs', []))
  17.135  
  17.136 -        result.extend([u for u in self['devices'].keys() if u not in result])
  17.137 +        result.extend([u for u in target['devices'].keys() if u not in result])
  17.138          return result
  17.139  
  17.140 -    def all_devices_sxpr(self):
  17.141 +    def all_devices_sxpr(self, target = None):
  17.142          """Returns the SXPR for all devices in the current configuration."""
  17.143          sxprs = []
  17.144          pci_devs = []
  17.145  
  17.146 -        if 'devices' not in self:
  17.147 +        if target == None:
  17.148 +            target = self
  17.149 +
  17.150 +        if 'devices' not in target:
  17.151              return sxprs
  17.152          
  17.153 -        ordered_refs = self.ordered_device_refs()
  17.154 +        ordered_refs = self.ordered_device_refs(target = target)
  17.155          for dev_uuid in ordered_refs:
  17.156 -            dev_type, dev_info = self['devices'][dev_uuid]
  17.157 +            dev_type, dev_info = target['devices'][dev_uuid]
  17.158              if dev_type == 'pci': # special case for pci devices
  17.159                  sxpr = [['uuid', dev_info['uuid']]]
  17.160                  for pci_dev_info in dev_info['devs']:
  17.161 @@ -1340,7 +1410,8 @@ class XendConfig(dict):
  17.162                  sxprs.append((dev_type, sxpr))
  17.163              else:
  17.164                  sxpr = self.device_sxpr(dev_type = dev_type,
  17.165 -                                        dev_info = dev_info)
  17.166 +                                        dev_info = dev_info,
  17.167 +                                        target   = target)
  17.168                  sxprs.append((dev_type, sxpr))
  17.169  
  17.170          return sxprs
    18.1 --- a/tools/python/xen/xend/XendDomain.py	Mon Jul 02 10:31:03 2007 -0600
    18.2 +++ b/tools/python/xen/xend/XendDomain.py	Mon Jul 02 12:19:26 2007 -0600
    18.3 @@ -1262,8 +1262,10 @@ class XendDomain:
    18.4              try:
    18.5                  XendCheckpoint.save(fd, dominfo, False, False, dst,
    18.6                                      checkpoint=checkpoint)
    18.7 -            finally:
    18.8 +            except Exception, e:
    18.9                  os.close(fd)
   18.10 +                raise e
   18.11 +            os.close(fd)
   18.12          except OSError, ex:
   18.13              raise XendError("can't write guest state file %s: %s" %
   18.14                              (dst, ex[1]))
    19.1 --- a/tools/python/xen/xend/server/irqif.py	Mon Jul 02 10:31:03 2007 -0600
    19.2 +++ b/tools/python/xen/xend/server/irqif.py	Mon Jul 02 12:19:26 2007 -0600
    19.3 @@ -61,7 +61,7 @@ class IRQController(DevController):
    19.4         
    19.5          pirq = get_param('irq')
    19.6  
    19.7 -        rc = xc.domain_irq_permission(dom          = self.getDomid(),
    19.8 +        rc = xc.domain_irq_permission(domid        = self.getDomid(),
    19.9                                        pirq         = pirq,
   19.10                                        allow_access = True)
   19.11  
    20.1 --- a/tools/python/xen/xend/server/pciif.py	Mon Jul 02 10:31:03 2007 -0600
    20.2 +++ b/tools/python/xen/xend/server/pciif.py	Mon Jul 02 12:19:26 2007 -0600
    20.3 @@ -185,3 +185,6 @@ class PciController(DevController):
    20.4  
    20.5      def waitForBackend(self,devid):
    20.6          return (0, "ok - no hotplug")
    20.7 +
    20.8 +    def migrate(self, config, network, dst, step, domName):
    20.9 +        raise XendError('Migration not permitted with assigned PCI device.')
    21.1 --- a/tools/python/xen/xm/main.py	Mon Jul 02 10:31:03 2007 -0600
    21.2 +++ b/tools/python/xen/xm/main.py	Mon Jul 02 12:19:26 2007 -0600
    21.3 @@ -2168,9 +2168,7 @@ def xm_network_attach(args):
    21.4          server.xend.domain.device_create(dom, vif)
    21.5  
    21.6  
    21.7 -def detach(args, command, deviceClass):
    21.8 -    arg_check(args, command, 2, 3)
    21.9 -
   21.10 +def detach(args, deviceClass):
   21.11      dom = args[0]
   21.12      dev = args[1]
   21.13      try:
   21.14 @@ -2204,16 +2202,17 @@ def xm_block_detach(args):
   21.15              raise OptionError("Cannot find device '%s' in domain '%s'"
   21.16                                % (dev,dom))
   21.17      else:
   21.18 +        arg_check(args, 'block-detach', 2, 3)
   21.19          try:
   21.20 -            detach(args, 'block-detach', 'vbd')
   21.21 +            detach(args, 'vbd')
   21.22              return
   21.23          except:
   21.24              pass
   21.25 -        detach(args, 'block-detach', 'tap')
   21.26 +        detach(args, 'tap')
   21.27  
   21.28  def xm_network_detach(args):
   21.29      if serverType == SERVER_XEN_API:
   21.30 -        arg_check(args, "xm_block_detach", 2, 3)
   21.31 +        arg_check(args, "xm_network_detach", 2, 3)
   21.32          dom = args[0]
   21.33          devid = args[1]
   21.34          vif_refs = server.xenapi.VM.get_VIFs(get_single_vm(dom))
   21.35 @@ -2227,7 +2226,8 @@ def xm_network_detach(args):
   21.36          else:
   21.37              print "Cannot find device '%s' in domain '%s'" % (devid,dom)
   21.38      else:
   21.39 -        detach(args, 'network-detach', 'vif')
   21.40 +        arg_check(args, 'network-detach', 2, 3)
   21.41 +        detach(args, 'vif')
   21.42  
   21.43  
   21.44  def xm_vnet_list(args):
    22.1 --- a/xen/acm/acm_core.c	Mon Jul 02 10:31:03 2007 -0600
    22.2 +++ b/xen/acm/acm_core.c	Mon Jul 02 12:19:26 2007 -0600
    22.3 @@ -89,7 +89,7 @@ static void __init set_dom0_ssidref(cons
    22.4          if (hi < ACM_MAX_NUM_TYPES && hi >= 1)
    22.5              dom0_ste_ssidref = hi;
    22.6          for (i = 0; i < sizeof(polname); i++) {
    22.7 -            polname[i] = c[7+i];
    22.8 +            polname[i] = c[5+i];
    22.9              if (polname[i] == '\0' || polname[i] == '\t' ||
   22.10                  polname[i] == '\n' || polname[i] == ' '  ||
   22.11                  polname[i] == ':') {
    23.1 --- a/xen/arch/x86/Makefile	Mon Jul 02 10:31:03 2007 -0600
    23.2 +++ b/xen/arch/x86/Makefile	Mon Jul 02 12:19:26 2007 -0600
    23.3 @@ -10,6 +10,7 @@ subdir-$(x86_64) += x86_64
    23.4  
    23.5  obj-y += apic.o
    23.6  obj-y += bitops.o
    23.7 +obj-y += clear_page.o
    23.8  obj-y += compat.o
    23.9  obj-y += delay.o
   23.10  obj-y += dmi_scan.o
    24.1 --- a/xen/arch/x86/boot/edd.S	Mon Jul 02 10:31:03 2007 -0600
    24.2 +++ b/xen/arch/x86/boot/edd.S	Mon Jul 02 12:19:26 2007 -0600
    24.3 @@ -24,7 +24,7 @@
    24.4  /* Maximum number of EDD information structures at boot_edd_info. */
    24.5  #define EDD_INFO_MAX            6
    24.6  
    24.7 -/* Maximum number of MBR signatures at boot_edd_signature. */
    24.8 +/* Maximum number of MBR signatures at boot_mbr_signature. */
    24.9  #define EDD_MBR_SIG_MAX         16
   24.10  
   24.11  /* Size of components of EDD information structure. */
   24.12 @@ -40,10 +40,8 @@ get_edd:
   24.13  # Read the first sector of each BIOS disk device and store the 4-byte signature
   24.14  edd_mbr_sig_start:
   24.15          movb    $0x80, %dl                      # from device 80
   24.16 -        movw    $bootsym(boot_edd_signature),%bx # store buffer ptr in bx
   24.17 +        movw    $bootsym(boot_mbr_signature),%bx # store buffer ptr in bx
   24.18  edd_mbr_sig_read:
   24.19 -        movl    $0xFFFFFFFF, %eax
   24.20 -        movl    %eax, (%bx)                     # assume failure
   24.21          pushw   %bx
   24.22          movb    $0x02, %ah                      # 0x02 Read Sectors
   24.23          movb    $1, %al                         # read 1 sector
   24.24 @@ -64,11 +62,12 @@ edd_mbr_sig_read:
   24.25          cmpb    $0, %ah                         # some BIOSes do not set CF
   24.26          jne     edd_mbr_sig_done                # on failure, we're done.
   24.27          movl    bootsym(boot_edd_info)+EDD_MBR_SIG_OFFSET,%eax
   24.28 -        movl    %eax, (%bx)                     # store signature from MBR
   24.29 -        incb    bootsym(boot_edd_signature_nr)  # note that we stored something
   24.30 +        movb    %dl, (%bx)                      # store BIOS drive number
   24.31 +        movl    %eax, 4(%bx)                    # store signature from MBR
   24.32 +        incb    bootsym(boot_mbr_signature_nr)  # note that we stored something
   24.33          incb    %dl                             # increment to next device
   24.34 -        addw    $4, %bx                         # increment sig buffer ptr
   24.35 -        cmpb    $EDD_MBR_SIG_MAX,bootsym(boot_edd_signature_nr)
   24.36 +        addw    $8, %bx                         # increment sig buffer ptr
   24.37 +        cmpb    $EDD_MBR_SIG_MAX,bootsym(boot_mbr_signature_nr)
   24.38          jb      edd_mbr_sig_read
   24.39  edd_mbr_sig_done:
   24.40  
   24.41 @@ -150,12 +149,13 @@ edd_done:
   24.42  opt_edd:
   24.43          .byte   0                               # edd=on/off/skipmbr
   24.44  
   24.45 -.globl  boot_edd_info_nr, boot_edd_signature_nr
   24.46 +.globl  boot_edd_info, boot_edd_info_nr
   24.47 +.globl  boot_mbr_signature, boot_mbr_signature_nr
   24.48  boot_edd_info_nr:
   24.49          .byte   0
   24.50 -boot_edd_signature_nr:
   24.51 +boot_mbr_signature_nr:
   24.52          .byte   0
   24.53 -boot_edd_signature:
   24.54 -        .fill   EDD_MBR_SIG_MAX*4,1,0
   24.55 +boot_mbr_signature:
   24.56 +        .fill   EDD_MBR_SIG_MAX*8,1,0
   24.57  boot_edd_info:
   24.58          .fill   512,1,0                         # big enough for a disc sector
    25.1 --- a/xen/arch/x86/boot/x86_32.S	Mon Jul 02 10:31:03 2007 -0600
    25.2 +++ b/xen/arch/x86/boot/x86_32.S	Mon Jul 02 12:19:26 2007 -0600
    25.3 @@ -36,15 +36,29 @@ 1:      mov     %eax,(%edi)
    25.4  
    25.5  /* This is the default interrupt handler. */
    25.6  int_msg:
    25.7 -        .asciz "Unknown interrupt\n"
    25.8 +        .asciz "Unknown interrupt (cr2=%08x)\n"
    25.9 +hex_msg:
   25.10 +        .asciz "  %08x"
   25.11          ALIGN
   25.12  ignore_int:
   25.13 +        pusha
   25.14          cld
   25.15          mov     $(__HYPERVISOR_DS),%eax
   25.16          mov     %eax,%ds
   25.17          mov     %eax,%es
   25.18 +        mov     %cr2,%eax
   25.19 +        push    %eax
   25.20          pushl   $int_msg
   25.21          call    printk
   25.22 +        add     $8,%esp
   25.23 +        mov     %esp,%ebp
   25.24 +0:      pushl   (%ebp)
   25.25 +        add     $4,%ebp
   25.26 +        pushl   $hex_msg
   25.27 +        call    printk
   25.28 +        add     $8,%esp
   25.29 +        test    $0xffc,%ebp
   25.30 +        jnz     0b
   25.31  1:      jmp     1b
   25.32  
   25.33  ENTRY(stack_start)
   25.34 @@ -66,11 +80,6 @@ gdt_descr:
   25.35          .word   LAST_RESERVED_GDT_BYTE
   25.36          .long   gdt_table - FIRST_RESERVED_GDT_BYTE
   25.37  
   25.38 -        .word   0
   25.39 -nopaging_gdt_descr:
   25.40 -        .word   LAST_RESERVED_GDT_BYTE
   25.41 -        .long   sym_phys(gdt_table) - FIRST_RESERVED_GDT_BYTE
   25.42 -
   25.43          .align PAGE_SIZE, 0
   25.44  /* NB. Rings != 0 get access up to MACH2PHYS_VIRT_END. This allows access to */
   25.45  /*     the machine->physical mapping table. Ring 0 can access all memory.    */
    26.1 --- a/xen/arch/x86/boot/x86_64.S	Mon Jul 02 10:31:03 2007 -0600
    26.2 +++ b/xen/arch/x86/boot/x86_64.S	Mon Jul 02 12:19:26 2007 -0600
    26.3 @@ -56,12 +56,23 @@ 1:      movq    %rax,(%rdi)
    26.4  
    26.5  /* This is the default interrupt handler. */
    26.6  int_msg:
    26.7 -        .asciz "Unknown interrupt\n"
    26.8 +        .asciz "Unknown interrupt (cr2=%016lx)\n"
    26.9 +hex_msg:
   26.10 +        .asciz "    %016lx"
   26.11  ignore_int:
   26.12 -        cld
   26.13 +        SAVE_ALL
   26.14 +        movq    %cr2,%rsi
   26.15          leaq    int_msg(%rip),%rdi
   26.16          xorl    %eax,%eax
   26.17          call    printk
   26.18 +        movq    %rsp,%rbp
   26.19 +0:      movq    (%rbp),%rsi
   26.20 +        addq    $8,%rbp
   26.21 +        leaq    hex_msg(%rip),%rdi
   26.22 +        xorl    %eax,%eax
   26.23 +        call    printk
   26.24 +        testq   $0xff8,%rbp
   26.25 +        jnz     0b
   26.26  1:      jmp     1b
   26.27  
   26.28  
    27.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    27.2 +++ b/xen/arch/x86/clear_page.S	Mon Jul 02 12:19:26 2007 -0600
    27.3 @@ -0,0 +1,26 @@
    27.4 +#include <xen/config.h>
    27.5 +#include <asm/page.h>
    27.6 +
    27.7 +#ifdef __i386__
    27.8 +#define ptr_reg %edx
    27.9 +#else
   27.10 +#define ptr_reg %rdi
   27.11 +#endif
   27.12 +
   27.13 +ENTRY(clear_page_sse2)
   27.14 +#ifdef __i386__
   27.15 +        mov     4(%esp), ptr_reg
   27.16 +#endif
   27.17 +        mov     $PAGE_SIZE/16, %ecx
   27.18 +        xor     %eax,%eax
   27.19 +
   27.20 +0:      dec     %ecx
   27.21 +        movnti  %eax, (ptr_reg)
   27.22 +        movnti  %eax, 4(ptr_reg)
   27.23 +        movnti  %eax, 8(ptr_reg)
   27.24 +        movnti  %eax, 12(ptr_reg)
   27.25 +        lea     16(ptr_reg), ptr_reg
   27.26 +        jnz     0b
   27.27 +
   27.28 +        sfence
   27.29 +        ret
    28.1 --- a/xen/arch/x86/domain.c	Mon Jul 02 10:31:03 2007 -0600
    28.2 +++ b/xen/arch/x86/domain.c	Mon Jul 02 12:19:26 2007 -0600
    28.3 @@ -151,7 +151,8 @@ int setup_arg_xlat_area(struct vcpu *v, 
    28.4          pg = alloc_domheap_page(NULL);
    28.5          if ( !pg )
    28.6              return -ENOMEM;
    28.7 -        d->arch.mm_arg_xlat_l3 = clear_page(page_to_virt(pg));
    28.8 +        d->arch.mm_arg_xlat_l3 = page_to_virt(pg);
    28.9 +        clear_page(d->arch.mm_arg_xlat_l3);
   28.10      }
   28.11  
   28.12      l4tab[l4_table_offset(COMPAT_ARG_XLAT_VIRT_BASE)] =
   28.13 @@ -444,7 +445,8 @@ int arch_domain_create(struct domain *d)
   28.14  
   28.15      if ( (pg = alloc_domheap_page(NULL)) == NULL )
   28.16          goto fail;
   28.17 -    d->arch.mm_perdomain_l2 = clear_page(page_to_virt(pg));
   28.18 +    d->arch.mm_perdomain_l2 = page_to_virt(pg);
   28.19 +    clear_page(d->arch.mm_perdomain_l2);
   28.20      for ( i = 0; i < (1 << pdpt_order); i++ )
   28.21          d->arch.mm_perdomain_l2[l2_table_offset(PERDOMAIN_VIRT_START)+i] =
   28.22              l2e_from_page(virt_to_page(d->arch.mm_perdomain_pt)+i,
   28.23 @@ -452,7 +454,8 @@ int arch_domain_create(struct domain *d)
   28.24  
   28.25      if ( (pg = alloc_domheap_page(NULL)) == NULL )
   28.26          goto fail;
   28.27 -    d->arch.mm_perdomain_l3 = clear_page(page_to_virt(pg));
   28.28 +    d->arch.mm_perdomain_l3 = page_to_virt(pg);
   28.29 +    clear_page(d->arch.mm_perdomain_l3);
   28.30      d->arch.mm_perdomain_l3[l3_table_offset(PERDOMAIN_VIRT_START)] =
   28.31          l3e_from_page(virt_to_page(d->arch.mm_perdomain_l2),
   28.32                              __PAGE_HYPERVISOR);
    29.1 --- a/xen/arch/x86/hvm/hvm.c	Mon Jul 02 10:31:03 2007 -0600
    29.2 +++ b/xen/arch/x86/hvm/hvm.c	Mon Jul 02 12:19:26 2007 -0600
    29.3 @@ -242,6 +242,11 @@ void hvm_domain_relinquish_resources(str
    29.4  {
    29.5      hvm_destroy_ioreq_page(d, &d->arch.hvm_domain.ioreq);
    29.6      hvm_destroy_ioreq_page(d, &d->arch.hvm_domain.buf_ioreq);
    29.7 +
    29.8 +    pit_deinit(d);
    29.9 +    rtc_deinit(d);
   29.10 +    pmtimer_deinit(d);
   29.11 +    hpet_deinit(d);
   29.12  }
   29.13  
   29.14  void hvm_domain_destroy(struct domain *d)
   29.15 @@ -421,22 +426,11 @@ int hvm_vcpu_initialise(struct vcpu *v)
   29.16  
   29.17  void hvm_vcpu_destroy(struct vcpu *v)
   29.18  {
   29.19 -    struct domain *d = v->domain;
   29.20 -
   29.21      vlapic_destroy(v);
   29.22      hvm_funcs.vcpu_destroy(v);
   29.23  
   29.24      /* Event channel is already freed by evtchn_destroy(). */
   29.25      /*free_xen_event_channel(v, v->arch.hvm_vcpu.xen_port);*/
   29.26 -
   29.27 -    if ( v->vcpu_id == 0 )
   29.28 -    {
   29.29 -        /* NB. All these really belong in hvm_domain_destroy(). */
   29.30 -        pit_deinit(d);
   29.31 -        rtc_deinit(d);
   29.32 -        pmtimer_deinit(d);
   29.33 -        hpet_deinit(d);
   29.34 -    }
   29.35  }
   29.36  
   29.37  
    30.1 --- a/xen/arch/x86/hvm/io.c	Mon Jul 02 10:31:03 2007 -0600
    30.2 +++ b/xen/arch/x86/hvm/io.c	Mon Jul 02 12:19:26 2007 -0600
    30.3 @@ -858,6 +858,7 @@ void hvm_io_assist(void)
    30.4      }
    30.5  
    30.6      /* Copy register changes back into current guest state. */
    30.7 +    regs->eflags &= ~X86_EFLAGS_RF;
    30.8      hvm_load_cpu_guest_regs(v, regs);
    30.9      memcpy(guest_cpu_user_regs(), regs, HVM_CONTEXT_STACK_BYTES);
   30.10  
    31.1 --- a/xen/arch/x86/hvm/platform.c	Mon Jul 02 10:31:03 2007 -0600
    31.2 +++ b/xen/arch/x86/hvm/platform.c	Mon Jul 02 12:19:26 2007 -0600
    31.3 @@ -1065,6 +1065,7 @@ void handle_mmio(unsigned long gpa)
    31.4      }
    31.5  
    31.6      regs->eip += inst_len; /* advance %eip */
    31.7 +    regs->eflags &= ~X86_EFLAGS_RF;
    31.8  
    31.9      switch ( mmio_op->instr ) {
   31.10      case INSTR_MOV:
   31.11 @@ -1122,6 +1123,7 @@ void handle_mmio(unsigned long gpa)
   31.12              /* IO read --> memory write */
   31.13              if ( dir == IOREQ_READ ) errcode |= PFEC_write_access;
   31.14              regs->eip -= inst_len; /* do not advance %eip */
   31.15 +            regs->eflags |= X86_EFLAGS_RF; /* RF was set by original #PF */
   31.16              hvm_inject_exception(TRAP_page_fault, errcode, addr);
   31.17              return;
   31.18          }
   31.19 @@ -1150,6 +1152,7 @@ void handle_mmio(unsigned long gpa)
   31.20                          /* Failed on the page-spanning copy.  Inject PF into
   31.21                           * the guest for the address where we failed */
   31.22                          regs->eip -= inst_len; /* do not advance %eip */
   31.23 +                        regs->eflags |= X86_EFLAGS_RF; /* RF was set by #PF */
   31.24                          /* Must set CR2 at the failing address */ 
   31.25                          addr += size - rv;
   31.26                          gdprintk(XENLOG_DEBUG, "Pagefault on non-io side of a "
    32.1 --- a/xen/arch/x86/hvm/svm/svm.c	Mon Jul 02 10:31:03 2007 -0600
    32.2 +++ b/xen/arch/x86/hvm/svm/svm.c	Mon Jul 02 12:19:26 2007 -0600
    32.3 @@ -391,7 +391,7 @@ int svm_vmcb_restore(struct vcpu *v, str
    32.4      }
    32.5  
    32.6   skip_cr3:
    32.7 -    vmcb->cr4 = c->cr4 | SVM_CR4_HOST_MASK;
    32.8 +    vmcb->cr4 = c->cr4 | HVM_CR4_HOST_MASK;
    32.9      v->arch.hvm_svm.cpu_shadow_cr4 = c->cr4;
   32.10      
   32.11      vmcb->idtr.limit = c->idtr_limit;
   32.12 @@ -448,7 +448,8 @@ int svm_vmcb_restore(struct vcpu *v, str
   32.13      /* update VMCB for nested paging restore */
   32.14      if ( paging_mode_hap(v->domain) ) {
   32.15          vmcb->cr0 = v->arch.hvm_svm.cpu_shadow_cr0;
   32.16 -        vmcb->cr4 = v->arch.hvm_svm.cpu_shadow_cr4;
   32.17 +        vmcb->cr4 = v->arch.hvm_svm.cpu_shadow_cr4 |
   32.18 +                    (HVM_CR4_HOST_MASK & ~X86_CR4_PAE);
   32.19          vmcb->cr3 = c->cr3;
   32.20          vmcb->np_enable = 1;
   32.21          vmcb->g_pat = 0x0007040600070406ULL; /* guest PAT */
   32.22 @@ -805,8 +806,10 @@ static void svm_ctxt_switch_from(struct 
   32.23          : : "a" (__pa(root_vmcb[cpu])) );
   32.24  
   32.25  #ifdef __x86_64__
   32.26 -    /* Resume use of IST2 for NMIs now that the host TR is reinstated. */
   32.27 -    idt_tables[cpu][TRAP_nmi].a |= 2UL << 32;
   32.28 +    /* Resume use of ISTs now that the host TR is reinstated. */
   32.29 +    idt_tables[cpu][TRAP_double_fault].a  |= 1UL << 32; /* IST1 */
   32.30 +    idt_tables[cpu][TRAP_nmi].a           |= 2UL << 32; /* IST2 */
   32.31 +    idt_tables[cpu][TRAP_machine_check].a |= 3UL << 32; /* IST3 */
   32.32  #endif
   32.33  }
   32.34  
   32.35 @@ -826,10 +829,12 @@ static void svm_ctxt_switch_to(struct vc
   32.36      set_segment_register(ss, 0);
   32.37  
   32.38      /*
   32.39 -     * Cannot use IST2 for NMIs while we are running with the guest TR. But
   32.40 -     * this doesn't matter: the IST is only needed to handle SYSCALL/SYSRET.
   32.41 +     * Cannot use ISTs for NMI/#MC/#DF while we are running with the guest TR.
   32.42 +     * But this doesn't matter: the IST is only req'd to handle SYSCALL/SYSRET.
   32.43       */
   32.44 -    idt_tables[cpu][TRAP_nmi].a &= ~(2UL << 32);
   32.45 +    idt_tables[cpu][TRAP_double_fault].a  &= ~(3UL << 32);
   32.46 +    idt_tables[cpu][TRAP_nmi].a           &= ~(3UL << 32);
   32.47 +    idt_tables[cpu][TRAP_machine_check].a &= ~(3UL << 32);
   32.48  #endif
   32.49  
   32.50      svm_restore_dr(v);
   32.51 @@ -1823,9 +1828,19 @@ static int mov_to_cr(int gpreg, int cr, 
   32.52          break;
   32.53  
   32.54      case 4: /* CR4 */
   32.55 +        if ( value & HVM_CR4_GUEST_RESERVED_BITS )
   32.56 +        {
   32.57 +            HVM_DBG_LOG(DBG_LEVEL_1,
   32.58 +                        "Guest attempts to set reserved bit in CR4: %lx",
   32.59 +                        value);
   32.60 +            svm_inject_exception(v, TRAP_gp_fault, 1, 0);
   32.61 +            break;
   32.62 +        }
   32.63 +
   32.64          if ( paging_mode_hap(v->domain) )
   32.65          {
   32.66 -            vmcb->cr4 = v->arch.hvm_svm.cpu_shadow_cr4 = value;
   32.67 +            v->arch.hvm_svm.cpu_shadow_cr4 = value;
   32.68 +            vmcb->cr4 = value | (HVM_CR4_HOST_MASK & ~X86_CR4_PAE);
   32.69              paging_update_paging_modes(v);
   32.70              /* signal paging update to ASID handler */
   32.71              svm_asid_g_update_paging (v);
   32.72 @@ -1875,7 +1890,7 @@ static int mov_to_cr(int gpreg, int cr, 
   32.73          }
   32.74  
   32.75          v->arch.hvm_svm.cpu_shadow_cr4 = value;
   32.76 -        vmcb->cr4 = value | SVM_CR4_HOST_MASK;
   32.77 +        vmcb->cr4 = value | HVM_CR4_HOST_MASK;
   32.78    
   32.79          /*
   32.80           * Writing to CR4 to modify the PSE, PGE, or PAE flag invalidates
   32.81 @@ -2071,9 +2086,11 @@ static inline void svm_do_msr_access(
   32.82          case MSR_IA32_TIME_STAMP_COUNTER:
   32.83              msr_content = hvm_get_guest_time(v);
   32.84              break;
   32.85 +
   32.86          case MSR_IA32_APICBASE:
   32.87              msr_content = vcpu_vlapic(v)->hw.apic_base_msr;
   32.88              break;
   32.89 +
   32.90          case MSR_EFER:
   32.91              msr_content = v->arch.hvm_svm.cpu_shadow_efer;
   32.92              break;
   32.93 @@ -2095,6 +2112,10 @@ static inline void svm_do_msr_access(
   32.94              msr_content = 0;
   32.95              break;
   32.96  
   32.97 +        case MSR_K8_VM_HSAVE_PA:
   32.98 +            svm_inject_exception(v, TRAP_gp_fault, 1, 0);
   32.99 +            break;
  32.100 +
  32.101          default:
  32.102              if ( rdmsr_hypervisor_regs(ecx, &eax, &edx) ||
  32.103                   rdmsr_safe(ecx, eax, edx) == 0 )
  32.104 @@ -2128,9 +2149,15 @@ static inline void svm_do_msr_access(
  32.105              hvm_set_guest_time(v, msr_content);
  32.106              pt_reset(v);
  32.107              break;
  32.108 +
  32.109          case MSR_IA32_APICBASE:
  32.110              vlapic_msr_set(vcpu_vlapic(v), msr_content);
  32.111              break;
  32.112 +
  32.113 +        case MSR_K8_VM_HSAVE_PA:
  32.114 +            svm_inject_exception(v, TRAP_gp_fault, 1, 0);
  32.115 +            break;
  32.116 +
  32.117          default:
  32.118              if ( !long_mode_do_msr_write(regs) )
  32.119                  wrmsr_hypervisor_regs(ecx, regs->eax, regs->edx);
  32.120 @@ -2265,12 +2292,13 @@ static int svm_reset_to_realmode(struct 
  32.121      vmcb->cr2 = 0;
  32.122      vmcb->efer = EFER_SVME;
  32.123  
  32.124 -    vmcb->cr4 = SVM_CR4_HOST_MASK;
  32.125 +    vmcb->cr4 = HVM_CR4_HOST_MASK;
  32.126      v->arch.hvm_svm.cpu_shadow_cr4 = 0;
  32.127  
  32.128      if ( paging_mode_hap(v->domain) ) {
  32.129          vmcb->cr0 = v->arch.hvm_svm.cpu_shadow_cr0;
  32.130 -        vmcb->cr4 = v->arch.hvm_svm.cpu_shadow_cr4;
  32.131 +        vmcb->cr4 = v->arch.hvm_svm.cpu_shadow_cr4 |
  32.132 +                    (HVM_CR4_HOST_MASK & ~X86_CR4_PAE);
  32.133      }
  32.134  
  32.135      /* This will jump to ROMBIOS */
  32.136 @@ -2411,6 +2439,12 @@ asmlinkage void svm_vmexit_handler(struc
  32.137          break;
  32.138      }
  32.139  
  32.140 +    case VMEXIT_EXCEPTION_MC:
  32.141 +        HVMTRACE_0D(MCE, v);
  32.142 +        svm_store_cpu_guest_regs(v, regs, NULL);
  32.143 +        do_machine_check(regs);
  32.144 +        break;
  32.145 +
  32.146      case VMEXIT_VINTR:
  32.147          vmcb->vintr.fields.irq = 0;
  32.148          vmcb->general1_intercepts &= ~GENERAL1_INTERCEPT_VINTR;
    33.1 --- a/xen/arch/x86/hvm/svm/vmcb.c	Mon Jul 02 10:31:03 2007 -0600
    33.2 +++ b/xen/arch/x86/hvm/svm/vmcb.c	Mon Jul 02 12:19:26 2007 -0600
    33.3 @@ -224,7 +224,7 @@ static int construct_vmcb(struct vcpu *v
    33.4      /* Guest CR4. */
    33.5      arch_svm->cpu_shadow_cr4 =
    33.6          read_cr4() & ~(X86_CR4_PGE | X86_CR4_PSE | X86_CR4_PAE);
    33.7 -    vmcb->cr4 = arch_svm->cpu_shadow_cr4 | SVM_CR4_HOST_MASK;
    33.8 +    vmcb->cr4 = arch_svm->cpu_shadow_cr4 | HVM_CR4_HOST_MASK;
    33.9  
   33.10      paging_update_paging_modes(v);
   33.11      vmcb->cr3 = v->arch.hvm_vcpu.hw_cr3; 
   33.12 @@ -235,7 +235,9 @@ static int construct_vmcb(struct vcpu *v
   33.13          vmcb->np_enable = 1; /* enable nested paging */
   33.14          vmcb->g_pat = 0x0007040600070406ULL; /* guest PAT */
   33.15          vmcb->h_cr3 = pagetable_get_paddr(v->domain->arch.phys_table);
   33.16 -        vmcb->cr4 = arch_svm->cpu_shadow_cr4 = 0;
   33.17 +        vmcb->cr4 = arch_svm->cpu_shadow_cr4 =
   33.18 +                    (HVM_CR4_HOST_MASK & ~X86_CR4_PAE);
   33.19 +        vmcb->exception_intercepts = HVM_TRAP_MASK;
   33.20  
   33.21          /* No point in intercepting CR0/3/4 reads, because the hardware 
   33.22           * will return the guest versions anyway. */
   33.23 @@ -249,7 +251,7 @@ static int construct_vmcb(struct vcpu *v
   33.24      }
   33.25      else
   33.26      {
   33.27 -        vmcb->exception_intercepts = 1U << TRAP_page_fault;
   33.28 +        vmcb->exception_intercepts = HVM_TRAP_MASK | (1U << TRAP_page_fault);
   33.29      }
   33.30  
   33.31      return 0;
    34.1 --- a/xen/arch/x86/hvm/vmx/vmcs.c	Mon Jul 02 10:31:03 2007 -0600
    34.2 +++ b/xen/arch/x86/hvm/vmx/vmcs.c	Mon Jul 02 12:19:26 2007 -0600
    34.3 @@ -43,6 +43,7 @@ u32 vmx_cpu_based_exec_control __read_mo
    34.4  u32 vmx_secondary_exec_control __read_mostly;
    34.5  u32 vmx_vmexit_control __read_mostly;
    34.6  u32 vmx_vmentry_control __read_mostly;
    34.7 +bool_t cpu_has_vmx_ins_outs_instr_info __read_mostly;
    34.8  
    34.9  static u32 vmcs_revision_id __read_mostly;
   34.10  
   34.11 @@ -133,6 +134,7 @@ void vmx_init_vmcs_config(void)
   34.12          vmx_secondary_exec_control = _vmx_secondary_exec_control;
   34.13          vmx_vmexit_control         = _vmx_vmexit_control;
   34.14          vmx_vmentry_control        = _vmx_vmentry_control;
   34.15 +        cpu_has_vmx_ins_outs_instr_info = !!(vmx_msr_high & (1U<<22));
   34.16      }
   34.17      else
   34.18      {
   34.19 @@ -142,6 +144,7 @@ void vmx_init_vmcs_config(void)
   34.20          BUG_ON(vmx_secondary_exec_control != _vmx_secondary_exec_control);
   34.21          BUG_ON(vmx_vmexit_control != _vmx_vmexit_control);
   34.22          BUG_ON(vmx_vmentry_control != _vmx_vmentry_control);
   34.23 +        BUG_ON(cpu_has_vmx_ins_outs_instr_info != !!(vmx_msr_high & (1U<<22)));
   34.24      }
   34.25  
   34.26      /* IA-32 SDM Vol 3B: VMCS size is never greater than 4kB. */
   34.27 @@ -421,7 +424,7 @@ static void construct_vmcs(struct vcpu *
   34.28      __vmwrite(VMCS_LINK_POINTER_HIGH, ~0UL);
   34.29  #endif
   34.30  
   34.31 -    __vmwrite(EXCEPTION_BITMAP, 1U << TRAP_page_fault);
   34.32 +    __vmwrite(EXCEPTION_BITMAP, HVM_TRAP_MASK | (1U << TRAP_page_fault));
   34.33  
   34.34      /* Guest CR0. */
   34.35      cr0 = read_cr0();
    35.1 --- a/xen/arch/x86/hvm/vmx/vmx.c	Mon Jul 02 10:31:03 2007 -0600
    35.2 +++ b/xen/arch/x86/hvm/vmx/vmx.c	Mon Jul 02 12:19:26 2007 -0600
    35.3 @@ -560,6 +560,9 @@ int vmx_vmcs_restore(struct vcpu *v, str
    35.4      __vmwrite(GUEST_RSP, c->rsp);
    35.5      __vmwrite(GUEST_RFLAGS, c->rflags);
    35.6  
    35.7 +    v->arch.hvm_vmx.cpu_cr0 = (c->cr0 | X86_CR0_PE | X86_CR0_PG 
    35.8 +                               | X86_CR0_NE | X86_CR0_WP | X86_CR0_ET);
    35.9 +    __vmwrite(GUEST_CR0, v->arch.hvm_vmx.cpu_cr0);
   35.10      v->arch.hvm_vmx.cpu_shadow_cr0 = c->cr0;
   35.11      __vmwrite(CR0_READ_SHADOW, v->arch.hvm_vmx.cpu_shadow_cr0);
   35.12  
   35.13 @@ -577,33 +580,17 @@ int vmx_vmcs_restore(struct vcpu *v, str
   35.14          goto skip_cr3;
   35.15      }
   35.16  
   35.17 -    if (c->cr3 == v->arch.hvm_vmx.cpu_cr3) {
   35.18 -        /*
   35.19 -         * This is simple TLB flush, implying the guest has
   35.20 -         * removed some translation or changed page attributes.
   35.21 -         * We simply invalidate the shadow.
   35.22 -         */
   35.23 -        mfn = gmfn_to_mfn(v->domain, c->cr3 >> PAGE_SHIFT);
   35.24 -        if (mfn != pagetable_get_pfn(v->arch.guest_table)) {
   35.25 -            goto bad_cr3;
   35.26 -        }
   35.27 -    } else {
   35.28 -        /*
   35.29 -         * If different, make a shadow. Check if the PDBR is valid
   35.30 -         * first.
   35.31 -         */
   35.32 -        HVM_DBG_LOG(DBG_LEVEL_VMMU, "CR3 c->cr3 = %"PRIx64, c->cr3);
   35.33 -        /* current!=vcpu as not called by arch_vmx_do_launch */
   35.34 -        mfn = gmfn_to_mfn(v->domain, c->cr3 >> PAGE_SHIFT);
   35.35 -        if( !mfn_valid(mfn) || !get_page(mfn_to_page(mfn), v->domain)) {
   35.36 -            goto bad_cr3;
   35.37 -        }
   35.38 -        old_base_mfn = pagetable_get_pfn(v->arch.guest_table);
   35.39 -        v->arch.guest_table = pagetable_from_pfn(mfn);
   35.40 -        if (old_base_mfn)
   35.41 -             put_page(mfn_to_page(old_base_mfn));
   35.42 -        v->arch.hvm_vmx.cpu_cr3 = c->cr3;
   35.43 +    HVM_DBG_LOG(DBG_LEVEL_VMMU, "CR3 c->cr3 = %"PRIx64, c->cr3);
   35.44 +    /* current!=vcpu as not called by arch_vmx_do_launch */
   35.45 +    mfn = gmfn_to_mfn(v->domain, c->cr3 >> PAGE_SHIFT);
   35.46 +    if( !mfn_valid(mfn) || !get_page(mfn_to_page(mfn), v->domain)) {
   35.47 +        goto bad_cr3;
   35.48      }
   35.49 +    old_base_mfn = pagetable_get_pfn(v->arch.guest_table);
   35.50 +    v->arch.guest_table = pagetable_from_pfn(mfn);
   35.51 +    if (old_base_mfn)
   35.52 +        put_page(mfn_to_page(old_base_mfn));
   35.53 +    v->arch.hvm_vmx.cpu_cr3 = c->cr3;
   35.54  
   35.55   skip_cr3:
   35.56  #if defined(__x86_64__)
   35.57 @@ -615,7 +602,7 @@ int vmx_vmcs_restore(struct vcpu *v, str
   35.58      }
   35.59  #endif
   35.60  
   35.61 -    __vmwrite(GUEST_CR4, (c->cr4 | VMX_CR4_HOST_MASK));
   35.62 +    __vmwrite(GUEST_CR4, (c->cr4 | HVM_CR4_HOST_MASK));
   35.63      v->arch.hvm_vmx.cpu_shadow_cr4 = c->cr4;
   35.64      __vmwrite(CR4_READ_SHADOW, v->arch.hvm_vmx.cpu_shadow_cr4);
   35.65  
   35.66 @@ -1315,16 +1302,20 @@ static int __get_instruction_length(void
   35.67  
   35.68  static void inline __update_guest_eip(unsigned long inst_len)
   35.69  {
   35.70 -    unsigned long current_eip, intr_shadow;
   35.71 -
   35.72 -    current_eip = __vmread(GUEST_RIP);
   35.73 -    __vmwrite(GUEST_RIP, current_eip + inst_len);
   35.74 -
   35.75 -    intr_shadow = __vmread(GUEST_INTERRUPTIBILITY_INFO);
   35.76 -    if ( intr_shadow & (VMX_INTR_SHADOW_STI | VMX_INTR_SHADOW_MOV_SS) )
   35.77 +    unsigned long x;
   35.78 +
   35.79 +    x = __vmread(GUEST_RIP);
   35.80 +    __vmwrite(GUEST_RIP, x + inst_len);
   35.81 +
   35.82 +    x = __vmread(GUEST_RFLAGS);
   35.83 +    if ( x & X86_EFLAGS_RF )
   35.84 +        __vmwrite(GUEST_RFLAGS, x & ~X86_EFLAGS_RF);
   35.85 +
   35.86 +    x = __vmread(GUEST_INTERRUPTIBILITY_INFO);
   35.87 +    if ( x & (VMX_INTR_SHADOW_STI | VMX_INTR_SHADOW_MOV_SS) )
   35.88      {
   35.89 -        intr_shadow &= ~(VMX_INTR_SHADOW_STI | VMX_INTR_SHADOW_MOV_SS);
   35.90 -        __vmwrite(GUEST_INTERRUPTIBILITY_INFO, intr_shadow);
   35.91 +        x &= ~(VMX_INTR_SHADOW_STI | VMX_INTR_SHADOW_MOV_SS);
   35.92 +        __vmwrite(GUEST_INTERRUPTIBILITY_INFO, x);
   35.93      }
   35.94  }
   35.95  
   35.96 @@ -1475,16 +1466,34 @@ static void vmx_do_invlpg(unsigned long 
   35.97      paging_invlpg(v, va);
   35.98  }
   35.99  
  35.100 -/*
  35.101 - * get segment for string pio according to guest instruction
  35.102 - */
  35.103 -static void vmx_str_pio_get_segment(int long_mode, unsigned long eip,
  35.104 -                                   int inst_len, enum x86_segment *seg)
  35.105 +/* Get segment for OUTS according to guest instruction. */
  35.106 +static enum x86_segment vmx_outs_get_segment(
  35.107 +    int long_mode, unsigned long eip, int inst_len)
  35.108  {
  35.109      unsigned char inst[MAX_INST_LEN];
  35.110 +    enum x86_segment seg = x86_seg_ds;
  35.111      int i;
  35.112      extern int inst_copy_from_guest(unsigned char *, unsigned long, int);
  35.113  
  35.114 +    if ( likely(cpu_has_vmx_ins_outs_instr_info) )
  35.115 +    {
  35.116 +        unsigned int instr_info = __vmread(VMX_INSTRUCTION_INFO);
  35.117 +
  35.118 +        /* Get segment register according to bits 17:15. */
  35.119 +        switch ( (instr_info >> 15) & 7 )
  35.120 +        {
  35.121 +        case 0: seg = x86_seg_es; break;
  35.122 +        case 1: seg = x86_seg_cs; break;
  35.123 +        case 2: seg = x86_seg_ss; break;
  35.124 +        case 3: seg = x86_seg_ds; break;
  35.125 +        case 4: seg = x86_seg_fs; break;
  35.126 +        case 5: seg = x86_seg_gs; break;
  35.127 +        default: BUG();
  35.128 +        }
  35.129 +
  35.130 +        goto out;
  35.131 +    }
  35.132 +
  35.133      if ( !long_mode )
  35.134          eip += __vmread(GUEST_CS_BASE);
  35.135  
  35.136 @@ -1493,7 +1502,7 @@ static void vmx_str_pio_get_segment(int 
  35.137      {
  35.138          gdprintk(XENLOG_ERR, "Get guest instruction failed\n");
  35.139          domain_crash(current->domain);
  35.140 -        return;
  35.141 +        goto out;
  35.142      }
  35.143  
  35.144      for ( i = 0; i < inst_len; i++ )
  35.145 @@ -1510,25 +1519,28 @@ static void vmx_str_pio_get_segment(int 
  35.146  #endif
  35.147              continue;
  35.148          case 0x2e: /* CS */
  35.149 -            *seg = x86_seg_cs;
  35.150 +            seg = x86_seg_cs;
  35.151              continue;
  35.152          case 0x36: /* SS */
  35.153 -            *seg = x86_seg_ss;
  35.154 +            seg = x86_seg_ss;
  35.155              continue;
  35.156          case 0x26: /* ES */
  35.157 -            *seg = x86_seg_es;
  35.158 +            seg = x86_seg_es;
  35.159              continue;
  35.160          case 0x64: /* FS */
  35.161 -            *seg = x86_seg_fs;
  35.162 +            seg = x86_seg_fs;
  35.163              continue;
  35.164          case 0x65: /* GS */
  35.165 -            *seg = x86_seg_gs;
  35.166 +            seg = x86_seg_gs;
  35.167              continue;
  35.168          case 0x3e: /* DS */
  35.169 -            *seg = x86_seg_ds;
  35.170 +            seg = x86_seg_ds;
  35.171              continue;
  35.172          }
  35.173      }
  35.174 +
  35.175 + out:
  35.176 +    return seg;
  35.177  }
  35.178  
  35.179  static int vmx_str_pio_check_descriptor(int long_mode, unsigned long eip,
  35.180 @@ -1541,7 +1553,7 @@ static int vmx_str_pio_check_descriptor(
  35.181      *base = 0;
  35.182      *limit = 0;
  35.183      if ( seg != x86_seg_es )
  35.184 -        vmx_str_pio_get_segment(long_mode, eip, inst_len, &seg);
  35.185 +        seg = vmx_outs_get_segment(long_mode, eip, inst_len);
  35.186  
  35.187      switch ( seg )
  35.188      {
  35.189 @@ -1587,7 +1599,7 @@ static int vmx_str_pio_check_descriptor(
  35.190      }
  35.191      *ar_bytes = __vmread(ar_field);
  35.192  
  35.193 -    return !(*ar_bytes & 0x10000);
  35.194 +    return !(*ar_bytes & X86_SEG_AR_SEG_UNUSABLE);
  35.195  }
  35.196  
  35.197  
  35.198 @@ -1896,7 +1908,7 @@ static void vmx_world_save(struct vcpu *
  35.199      c->eip += __get_instruction_length(); /* Safe: MOV Cn, LMSW, CLTS */
  35.200  
  35.201      c->esp = __vmread(GUEST_RSP);
  35.202 -    c->eflags = __vmread(GUEST_RFLAGS);
  35.203 +    c->eflags = __vmread(GUEST_RFLAGS) & ~X86_EFLAGS_RF;
  35.204  
  35.205      c->cr0 = v->arch.hvm_vmx.cpu_shadow_cr0;
  35.206      c->cr3 = v->arch.hvm_vmx.cpu_cr3;
  35.207 @@ -1997,7 +2009,7 @@ static int vmx_world_restore(struct vcpu
  35.208      else
  35.209          HVM_DBG_LOG(DBG_LEVEL_VMMU, "Update CR3 value = %x", c->cr3);
  35.210  
  35.211 -    __vmwrite(GUEST_CR4, (c->cr4 | VMX_CR4_HOST_MASK));
  35.212 +    __vmwrite(GUEST_CR4, (c->cr4 | HVM_CR4_HOST_MASK));
  35.213      v->arch.hvm_vmx.cpu_shadow_cr4 = c->cr4;
  35.214      __vmwrite(CR4_READ_SHADOW, v->arch.hvm_vmx.cpu_shadow_cr4);
  35.215  
  35.216 @@ -2272,7 +2284,6 @@ static int vmx_set_cr0(unsigned long val
  35.217                      "Enabling CR0.PE at %%eip 0x%lx", eip);
  35.218          if ( vmx_assist(v, VMX_ASSIST_RESTORE) )
  35.219          {
  35.220 -            eip = __vmread(GUEST_RIP);
  35.221              HVM_DBG_LOG(DBG_LEVEL_1,
  35.222                          "Restoring to %%eip 0x%lx", eip);
  35.223              return 0; /* do not update eip! */
  35.224 @@ -2397,6 +2408,15 @@ static int mov_to_cr(int gp, int cr, str
  35.225      case 4: /* CR4 */
  35.226          old_cr = v->arch.hvm_vmx.cpu_shadow_cr4;
  35.227  
  35.228 +        if ( value & HVM_CR4_GUEST_RESERVED_BITS )
  35.229 +        {
  35.230 +            HVM_DBG_LOG(DBG_LEVEL_1,
  35.231 +                        "Guest attempts to set reserved bit in CR4: %lx",
  35.232 +                        value);
  35.233 +            vmx_inject_hw_exception(v, TRAP_gp_fault, 0);
  35.234 +            break;
  35.235 +        }
  35.236 +
  35.237          if ( (value & X86_CR4_PAE) && !(old_cr & X86_CR4_PAE) )
  35.238          {
  35.239              if ( vmx_pgbit_test(v) )
  35.240 @@ -2437,7 +2457,7 @@ static int mov_to_cr(int gp, int cr, str
  35.241              }
  35.242          }
  35.243  
  35.244 -        __vmwrite(GUEST_CR4, value| VMX_CR4_HOST_MASK);
  35.245 +        __vmwrite(GUEST_CR4, value | HVM_CR4_HOST_MASK);
  35.246          v->arch.hvm_vmx.cpu_shadow_cr4 = value;
  35.247          __vmwrite(CR4_READ_SHADOW, v->arch.hvm_vmx.cpu_shadow_cr4);
  35.248  
  35.249 @@ -2581,7 +2601,7 @@ static inline int vmx_do_msr_read(struct
  35.250      case MSR_IA32_APICBASE:
  35.251          msr_content = vcpu_vlapic(v)->hw.apic_base_msr;
  35.252          break;
  35.253 -    case MSR_IA32_VMX_BASIC...MSR_IA32_VMX_CR4_FIXED1:
  35.254 +    case MSR_IA32_VMX_BASIC...MSR_IA32_VMX_PROCBASED_CTLS2:
  35.255          goto gp_fault;
  35.256      default:
  35.257          if ( long_mode_do_msr_read(regs) )
  35.258 @@ -2707,7 +2727,7 @@ static inline int vmx_do_msr_write(struc
  35.259      case MSR_IA32_APICBASE:
  35.260          vlapic_msr_set(vcpu_vlapic(v), msr_content);
  35.261          break;
  35.262 -    case MSR_IA32_VMX_BASIC...MSR_IA32_VMX_CR4_FIXED1:
  35.263 +    case MSR_IA32_VMX_BASIC...MSR_IA32_VMX_PROCBASED_CTLS2:
  35.264          goto gp_fault;
  35.265      default:
  35.266          if ( !long_mode_do_msr_write(regs) )
  35.267 @@ -2823,7 +2843,8 @@ static void vmx_reflect_exception(struct
  35.268      }
  35.269  }
  35.270  
  35.271 -static void vmx_failed_vmentry(unsigned int exit_reason)
  35.272 +static void vmx_failed_vmentry(unsigned int exit_reason,
  35.273 +                               struct cpu_user_regs *regs)
  35.274  {
  35.275      unsigned int failed_vmentry_reason = (uint16_t)exit_reason;
  35.276      unsigned long exit_qualification;
  35.277 @@ -2840,6 +2861,9 @@ static void vmx_failed_vmentry(unsigned 
  35.278          break;
  35.279      case EXIT_REASON_MACHINE_CHECK:
  35.280          printk("caused by machine check.\n");
  35.281 +        HVMTRACE_0D(MCE, current);
  35.282 +        vmx_store_cpu_guest_regs(current, regs, NULL);
  35.283 +        do_machine_check(regs);
  35.284          break;
  35.285      default:
  35.286          printk("reason not known yet!");
  35.287 @@ -2869,7 +2893,7 @@ asmlinkage void vmx_vmexit_handler(struc
  35.288          local_irq_enable();
  35.289  
  35.290      if ( unlikely(exit_reason & VMX_EXIT_REASONS_FAILED_VMENTRY) )
  35.291 -        return vmx_failed_vmentry(exit_reason);
  35.292 +        return vmx_failed_vmentry(exit_reason, regs);
  35.293  
  35.294      switch ( exit_reason )
  35.295      {
  35.296 @@ -2920,12 +2944,20 @@ asmlinkage void vmx_vmexit_handler(struc
  35.297              vmx_inject_hw_exception(v, TRAP_page_fault, regs->error_code);
  35.298              break;
  35.299          case TRAP_nmi:
  35.300 -            HVMTRACE_0D(NMI, v);
  35.301              if ( (intr_info & INTR_INFO_INTR_TYPE_MASK) == INTR_TYPE_NMI )
  35.302 +            {
  35.303 +                HVMTRACE_0D(NMI, v);
  35.304 +                vmx_store_cpu_guest_regs(v, regs, NULL);
  35.305                  do_nmi(regs); /* Real NMI, vector 2: normal processing. */
  35.306 +            }
  35.307              else
  35.308                  vmx_reflect_exception(v);
  35.309              break;
  35.310 +        case TRAP_machine_check:
  35.311 +            HVMTRACE_0D(MCE, v);
  35.312 +            vmx_store_cpu_guest_regs(v, regs, NULL);
  35.313 +            do_machine_check(regs);
  35.314 +            break;
  35.315          default:
  35.316              goto exit_and_crash;
  35.317          }
    36.1 --- a/xen/arch/x86/io_apic.c	Mon Jul 02 10:31:03 2007 -0600
    36.2 +++ b/xen/arch/x86/io_apic.c	Mon Jul 02 12:19:26 2007 -0600
    36.3 @@ -371,7 +371,7 @@ static int pin_2_irq(int idx, int apic, 
    36.4   * so mask in all cases should simply be TARGET_CPUS
    36.5   */
    36.6  #ifdef CONFIG_SMP
    36.7 -void __init setup_ioapic_dest(void)
    36.8 +void /*__init*/ setup_ioapic_dest(void)
    36.9  {
   36.10      int pin, ioapic, irq, irq_entry;
   36.11  
   36.12 @@ -849,7 +849,7 @@ static inline void UNEXPECTED_IO_APIC(vo
   36.13  {
   36.14  }
   36.15  
   36.16 -void __init __print_IO_APIC(void)
   36.17 +void /*__init*/ __print_IO_APIC(void)
   36.18  {
   36.19      int apic, i;
   36.20      union IO_APIC_reg_00 reg_00;
    37.1 --- a/xen/arch/x86/mm.c	Mon Jul 02 10:31:03 2007 -0600
    37.2 +++ b/xen/arch/x86/mm.c	Mon Jul 02 12:19:26 2007 -0600
    37.3 @@ -3240,6 +3240,7 @@ static int ptwr_emulated_update(
    37.4      struct ptwr_emulate_ctxt *ptwr_ctxt)
    37.5  {
    37.6      unsigned long mfn;
    37.7 +    unsigned long unaligned_addr = addr;
    37.8      struct page_info *page;
    37.9      l1_pgentry_t pte, ol1e, nl1e, *pl1e;
   37.10      struct vcpu *v = current;
   37.11 @@ -3294,7 +3295,7 @@ static int ptwr_emulated_update(
   37.12      if ( unlikely(!get_page_from_l1e(nl1e, d)) )
   37.13      {
   37.14          if ( (CONFIG_PAGING_LEVELS >= 3) && is_pv_32bit_domain(d) &&
   37.15 -             (bytes == 4) && (addr & 4) && !do_cmpxchg &&
   37.16 +             (bytes == 4) && (unaligned_addr & 4) && !do_cmpxchg &&
   37.17               (l1e_get_flags(nl1e) & _PAGE_PRESENT) )
   37.18          {
   37.19              /*
    38.1 --- a/xen/arch/x86/platform_hypercall.c	Mon Jul 02 10:31:03 2007 -0600
    38.2 +++ b/xen/arch/x86/platform_hypercall.c	Mon Jul 02 12:19:26 2007 -0600
    38.3 @@ -20,12 +20,20 @@
    38.4  #include <xen/guest_access.h>
    38.5  #include <asm/current.h>
    38.6  #include <public/platform.h>
    38.7 +#include <asm/edd.h>
    38.8  #include <asm/mtrr.h>
    38.9  #include "cpu/mtrr/mtrr.h"
   38.10  
   38.11 +extern uint16_t boot_edid_caps;
   38.12 +extern uint8_t boot_edid_info[];
   38.13 +
   38.14  #ifndef COMPAT
   38.15  typedef long ret_t;
   38.16  DEFINE_SPINLOCK(xenpf_lock);
   38.17 +# undef copy_from_compat
   38.18 +# define copy_from_compat copy_from_guest
   38.19 +# undef copy_to_compat
   38.20 +# define copy_to_compat copy_to_guest
   38.21  #else
   38.22  extern spinlock_t xenpf_lock;
   38.23  #endif
   38.24 @@ -151,6 +159,94 @@ ret_t do_platform_op(XEN_GUEST_HANDLE(xe
   38.25      }
   38.26      break;
   38.27  
   38.28 +    case XENPF_firmware_info:
   38.29 +        switch ( op->u.firmware_info.type )
   38.30 +        {
   38.31 +        case XEN_FW_DISK_INFO: {
   38.32 +            const struct edd_info *info;
   38.33 +            u16 length;
   38.34 +
   38.35 +            ret = -ESRCH;
   38.36 +            if ( op->u.firmware_info.index >= bootsym(boot_edd_info_nr) )
   38.37 +                break;
   38.38 +
   38.39 +            info = bootsym(boot_edd_info) + op->u.firmware_info.index;
   38.40 +
   38.41 +            /* Transfer the EDD info block. */
   38.42 +            ret = -EFAULT;
   38.43 +            if ( copy_from_compat(&length, op->u.firmware_info.u.
   38.44 +                                  disk_info.edd_params, 1) )
   38.45 +                break;
   38.46 +            if ( length > info->edd_device_params.length )
   38.47 +                length = info->edd_device_params.length;
   38.48 +            if ( copy_to_compat(op->u.firmware_info.u.disk_info.edd_params,
   38.49 +                                (u8 *)&info->edd_device_params,
   38.50 +                                length) )
   38.51 +                break;
   38.52 +            if ( copy_to_compat(op->u.firmware_info.u.disk_info.edd_params,
   38.53 +                                &length, 1) )
   38.54 +                break;
   38.55 +
   38.56 +            /* Transfer miscellaneous other information values. */
   38.57 +#define C(x) op->u.firmware_info.u.disk_info.x = info->x
   38.58 +            C(device);
   38.59 +            C(version);
   38.60 +            C(interface_support);
   38.61 +            C(legacy_max_cylinder);
   38.62 +            C(legacy_max_head);
   38.63 +            C(legacy_sectors_per_track);
   38.64 +#undef C
   38.65 +
   38.66 +            ret = (copy_field_to_guest(u_xenpf_op, op,
   38.67 +                                      u.firmware_info.u.disk_info)
   38.68 +                   ? -EFAULT : 0);
   38.69 +            break;
   38.70 +        }
   38.71 +        case XEN_FW_DISK_MBR_SIGNATURE: {
   38.72 +            const struct mbr_signature *sig;
   38.73 +
   38.74 +            ret = -ESRCH;
   38.75 +            if ( op->u.firmware_info.index >= bootsym(boot_mbr_signature_nr) )
   38.76 +                break;
   38.77 +
   38.78 +            sig = bootsym(boot_mbr_signature) + op->u.firmware_info.index;
   38.79 +
   38.80 +            op->u.firmware_info.u.disk_mbr_signature.device = sig->device;
   38.81 +            op->u.firmware_info.u.disk_mbr_signature.mbr_signature =
   38.82 +                sig->signature;
   38.83 +
   38.84 +            ret = (copy_field_to_guest(u_xenpf_op, op,
   38.85 +                                      u.firmware_info.u.disk_mbr_signature)
   38.86 +                   ? -EFAULT : 0);
   38.87 +            break;
   38.88 +        }
   38.89 +        case XEN_FW_VBEDDC_INFO:
   38.90 +            ret = -ESRCH;
   38.91 +            if ( op->u.firmware_info.index != 0 )
   38.92 +                break;
   38.93 +            if ( *(u32 *)bootsym(boot_edid_info) == 0x13131313 )
   38.94 +                break;
   38.95 +
   38.96 +            op->u.firmware_info.u.vbeddc_info.capabilities =
   38.97 +                bootsym(boot_edid_caps);
   38.98 +            op->u.firmware_info.u.vbeddc_info.edid_transfer_time =
   38.99 +                bootsym(boot_edid_caps) >> 8;
  38.100 +
  38.101 +            ret = 0;
  38.102 +            if ( copy_field_to_guest(u_xenpf_op, op, u.firmware_info.
  38.103 +                                     u.vbeddc_info.capabilities) ||
  38.104 +                 copy_field_to_guest(u_xenpf_op, op, u.firmware_info.
  38.105 +                                     u.vbeddc_info.edid_transfer_time) ||
  38.106 +                 copy_to_compat(op->u.firmware_info.u.vbeddc_info.edid,
  38.107 +                                bootsym(boot_edid_info), 128) )
  38.108 +                ret = -EFAULT;
  38.109 +            break;
  38.110 +        default:
  38.111 +            ret = -EINVAL;
  38.112 +            break;
  38.113 +        }
  38.114 +        break;
  38.115 +
  38.116      default:
  38.117          ret = -ENOSYS;
  38.118          break;
    39.1 --- a/xen/arch/x86/setup.c	Mon Jul 02 10:31:03 2007 -0600
    39.2 +++ b/xen/arch/x86/setup.c	Mon Jul 02 12:19:26 2007 -0600
    39.3 @@ -405,7 +405,7 @@ void init_done(void)
    39.4  void __init __start_xen(unsigned long mbi_p)
    39.5  {
    39.6      char *memmap_type = NULL;
    39.7 -    char __cmdline[] = "", *cmdline = __cmdline;
    39.8 +    char __cmdline[] = "", *cmdline = __cmdline, *kextra;
    39.9      unsigned long _initrd_start = 0, _initrd_len = 0;
   39.10      unsigned int initrdidx = 1;
   39.11      char *_policy_start = NULL;
   39.12 @@ -426,6 +426,17 @@ void __init __start_xen(unsigned long mb
   39.13      /* Parse the command-line options. */
   39.14      if ( (mbi->flags & MBI_CMDLINE) && (mbi->cmdline != 0) )
   39.15          cmdline = __va(mbi->cmdline);
   39.16 +    if ( (kextra = strstr(cmdline, " -- ")) != NULL )
   39.17 +    {
   39.18 +        /*
   39.19 +         * Options after ' -- ' separator belong to dom0.
   39.20 +         *  1. Orphan dom0's options from Xen's command line.
   39.21 +         *  2. Skip all but final leading space from dom0's options.
   39.22 +         */
   39.23 +        *kextra = '\0';
   39.24 +        kextra += 3;
   39.25 +        while ( kextra[1] == ' ' ) kextra++;
   39.26 +    }
   39.27      cmdline_parse(cmdline);
   39.28  
   39.29      parse_video_info();
   39.30 @@ -494,7 +505,7 @@ void __init __start_xen(unsigned long mb
   39.31  
   39.32      printk("Disc information:\n");
   39.33      printk(" Found %d MBR signatures\n",
   39.34 -           bootsym(boot_edd_signature_nr));
   39.35 +           bootsym(boot_mbr_signature_nr));
   39.36      printk(" Found %d EDD information structures\n",
   39.37             bootsym(boot_edd_info_nr));
   39.38  
   39.39 @@ -1009,18 +1020,27 @@ void __init __start_xen(unsigned long mb
   39.40  
   39.41      /* Grab the DOM0 command line. */
   39.42      cmdline = (char *)(mod[0].string ? __va(mod[0].string) : NULL);
   39.43 -    if ( cmdline != NULL )
   39.44 +    if ( (cmdline != NULL) || (kextra != NULL) )
   39.45      {
   39.46          static char dom0_cmdline[MAX_GUEST_CMDLINE];
   39.47  
   39.48 -        /* Skip past the image name and copy to a local buffer. */
   39.49 -        while ( *cmdline == ' ' ) cmdline++;
   39.50 -        if ( (cmdline = strchr(cmdline, ' ')) != NULL )
   39.51 +        dom0_cmdline[0] = '\0';
   39.52 +
   39.53 +        if ( cmdline != NULL )
   39.54          {
   39.55 +            /* Skip past the image name and copy to a local buffer. */
   39.56              while ( *cmdline == ' ' ) cmdline++;
   39.57 -            safe_strcpy(dom0_cmdline, cmdline);
   39.58 +            if ( (cmdline = strchr(cmdline, ' ')) != NULL )
   39.59 +            {
   39.60 +                while ( *cmdline == ' ' ) cmdline++;
   39.61 +                safe_strcpy(dom0_cmdline, cmdline);
   39.62 +            }
   39.63          }
   39.64  
   39.65 +        if ( kextra != NULL )
   39.66 +            /* kextra always includes exactly one leading space. */
   39.67 +            safe_strcat(dom0_cmdline, kextra);
   39.68 +
   39.69          /* Append any extra parameters. */
   39.70          if ( skip_ioapic_setup && !strstr(dom0_cmdline, "noapic") )
   39.71              safe_strcat(dom0_cmdline, " noapic");
    40.1 --- a/xen/arch/x86/traps.c	Mon Jul 02 10:31:03 2007 -0600
    40.2 +++ b/xen/arch/x86/traps.c	Mon Jul 02 12:19:26 2007 -0600
    40.3 @@ -86,6 +86,7 @@ asmlinkage void _name(void);            
    40.4  asmlinkage int do_ ## _name(struct cpu_user_regs *regs)
    40.5  
    40.6  asmlinkage void nmi(void);
    40.7 +asmlinkage void machine_check(void);
    40.8  DECLARE_TRAP_HANDLER(divide_error);
    40.9  DECLARE_TRAP_HANDLER(debug);
   40.10  DECLARE_TRAP_HANDLER(int3);
   40.11 @@ -103,7 +104,6 @@ DECLARE_TRAP_HANDLER(coprocessor_error);
   40.12  DECLARE_TRAP_HANDLER(simd_coprocessor_error);
   40.13  DECLARE_TRAP_HANDLER(alignment_check);
   40.14  DECLARE_TRAP_HANDLER(spurious_interrupt_bug);
   40.15 -DECLARE_TRAP_HANDLER(machine_check);
   40.16  
   40.17  long do_set_debugreg(int reg, unsigned long value);
   40.18  unsigned long do_get_debugreg(int reg);
   40.19 @@ -631,6 +631,7 @@ static int emulate_forced_invalid_op(str
   40.20      regs->ecx = c;
   40.21      regs->edx = d;
   40.22      regs->eip = eip;
   40.23 +    regs->eflags &= ~X86_EFLAGS_RF;
   40.24  
   40.25      return EXCRET_fault_fixed;
   40.26  }
   40.27 @@ -730,10 +731,11 @@ asmlinkage int do_int3(struct cpu_user_r
   40.28      return do_guest_trap(TRAP_int3, regs, 0);
   40.29  }
   40.30  
   40.31 -asmlinkage int do_machine_check(struct cpu_user_regs *regs)
   40.32 +asmlinkage void do_machine_check(struct cpu_user_regs *regs)
   40.33  {
   40.34 -    fatal_trap(TRAP_machine_check, regs);
   40.35 -    return 0;
   40.36 +    extern fastcall void (*machine_check_vector)(
   40.37 +        struct cpu_user_regs *, long error_code);
   40.38 +    machine_check_vector(regs, regs->error_code);
   40.39  }
   40.40  
   40.41  void propagate_page_fault(unsigned long addr, u16 error_code)
   40.42 @@ -1787,6 +1789,7 @@ static int emulate_privileged_op(struct 
   40.43  
   40.44   done:
   40.45      regs->eip = eip;
   40.46 +    regs->eflags &= ~X86_EFLAGS_RF;
   40.47      return EXCRET_fault_fixed;
   40.48  
   40.49   fail:
    41.1 --- a/xen/arch/x86/x86_32/entry.S	Mon Jul 02 10:31:03 2007 -0600
    41.2 +++ b/xen/arch/x86/x86_32/entry.S	Mon Jul 02 12:19:26 2007 -0600
    41.3 @@ -72,48 +72,36 @@
    41.4          andl $~3,reg;            \
    41.5          movl (reg),reg;
    41.6  
    41.7 -
    41.8          ALIGN
    41.9  restore_all_guest:
   41.10          ASSERT_INTERRUPTS_DISABLED
   41.11          testl $X86_EFLAGS_VM,UREGS_eflags(%esp)
   41.12 -        jnz  restore_all_vm86
   41.13 +        popl  %ebx
   41.14 +        popl  %ecx
   41.15 +        popl  %edx
   41.16 +        popl  %esi
   41.17 +        popl  %edi
   41.18 +        popl  %ebp
   41.19 +        popl  %eax
   41.20 +        leal  4(%esp),%esp
   41.21 +        jnz   .Lrestore_iret_guest
   41.22  #ifdef CONFIG_X86_SUPERVISOR_MODE_KERNEL
   41.23 -        testl $2,UREGS_cs(%esp)
   41.24 -        jnz   1f
   41.25 +        testb $2,UREGS_cs-UREGS_eip(%esp)
   41.26 +        jnz   .Lrestore_sregs_guest
   41.27          call  restore_ring0_guest
   41.28 -        jmp   restore_all_vm86
   41.29 -1:
   41.30 +        jmp   .Lrestore_iret_guest
   41.31  #endif
   41.32 -.Lft1:  mov  UREGS_ds(%esp),%ds
   41.33 -.Lft2:  mov  UREGS_es(%esp),%es
   41.34 -.Lft3:  mov  UREGS_fs(%esp),%fs
   41.35 -.Lft4:  mov  UREGS_gs(%esp),%gs
   41.36 -restore_all_vm86:
   41.37 -        popl %ebx
   41.38 -        popl %ecx
   41.39 -        popl %edx
   41.40 -        popl %esi
   41.41 -        popl %edi
   41.42 -        popl %ebp
   41.43 -        popl %eax
   41.44 -        addl $4,%esp
   41.45 +.Lrestore_sregs_guest:
   41.46 +.Lft1:  mov  UREGS_ds-UREGS_eip(%esp),%ds
   41.47 +.Lft2:  mov  UREGS_es-UREGS_eip(%esp),%es
   41.48 +.Lft3:  mov  UREGS_fs-UREGS_eip(%esp),%fs
   41.49 +.Lft4:  mov  UREGS_gs-UREGS_eip(%esp),%gs
   41.50 +.Lrestore_iret_guest:
   41.51  .Lft5:  iret
   41.52  .section .fixup,"ax"
   41.53 -.Lfx5:  subl  $28,%esp
   41.54 -        pushl 28(%esp)                 # error_code/entry_vector
   41.55 -        movl  %eax,UREGS_eax+4(%esp)
   41.56 -        movl  %ebp,UREGS_ebp+4(%esp)
   41.57 -        movl  %edi,UREGS_edi+4(%esp)
   41.58 -        movl  %esi,UREGS_esi+4(%esp)
   41.59 -        movl  %edx,UREGS_edx+4(%esp)
   41.60 -        movl  %ecx,UREGS_ecx+4(%esp)
   41.61 -        movl  %ebx,UREGS_ebx+4(%esp)
   41.62 -.Lfx1:  SET_XEN_SEGMENTS(a)
   41.63 -        movl  %eax,%fs
   41.64 -        movl  %eax,%gs
   41.65 -        sti
   41.66 -        popl  %esi
   41.67 +.Lfx1:  sti
   41.68 +        SAVE_ALL_GPRS
   41.69 +        mov   UREGS_error_code(%esp),%esi
   41.70          pushfl                         # EFLAGS
   41.71          movl  $__HYPERVISOR_CS,%eax
   41.72          pushl %eax                     # CS
   41.73 @@ -147,7 +135,7 @@ 1:      call  create_bounce_frame
   41.74          .long .Lft2,.Lfx1
   41.75          .long .Lft3,.Lfx1
   41.76          .long .Lft4,.Lfx1
   41.77 -        .long .Lft5,.Lfx5
   41.78 +        .long .Lft5,.Lfx1
   41.79  .previous
   41.80  .section __ex_table,"a"
   41.81          .long .Ldf1,failsafe_callback
   41.82 @@ -169,8 +157,8 @@ restore_all_xen:
   41.83  ENTRY(hypercall)
   41.84          subl $4,%esp
   41.85          FIXUP_RING0_GUEST_STACK
   41.86 -        SAVE_ALL(b)
   41.87 -        sti
   41.88 +        SAVE_ALL(1f,1f)
   41.89 +1:      sti
   41.90          GET_CURRENT(%ebx)
   41.91          cmpl  $NR_hypercalls,%eax
   41.92          jae   bad_hypercall
   41.93 @@ -420,9 +408,14 @@ ENTRY(divide_error)
   41.94          ALIGN
   41.95  handle_exception:
   41.96          FIXUP_RING0_GUEST_STACK
   41.97 -        SAVE_ALL_NOSEGREGS(a)
   41.98 -        SET_XEN_SEGMENTS(a)
   41.99 -        testb $X86_EFLAGS_IF>>8,UREGS_eflags+1(%esp)
  41.100 +        SAVE_ALL(1f,2f)
  41.101 +        .text 1
  41.102 +        /* Exception within Xen: make sure we have valid %ds,%es. */
  41.103 +1:      mov   %ecx,%ds
  41.104 +        mov   %ecx,%es
  41.105 +        jmp   2f
  41.106 +        .previous
  41.107 +2:      testb $X86_EFLAGS_IF>>8,UREGS_eflags+1(%esp)
  41.108          jz    exception_with_ints_disabled
  41.109          sti                             # re-enable interrupts
  41.110  1:      xorl  %eax,%eax
  41.111 @@ -533,71 +526,81 @@ ENTRY(page_fault)
  41.112          movw  $TRAP_page_fault,2(%esp)
  41.113          jmp   handle_exception
  41.114  
  41.115 -ENTRY(machine_check)
  41.116 -        pushl $TRAP_machine_check<<16
  41.117 -        jmp   handle_exception
  41.118 -
  41.119  ENTRY(spurious_interrupt_bug)
  41.120          pushl $TRAP_spurious_int<<16
  41.121          jmp   handle_exception
  41.122  
  41.123  ENTRY(early_page_fault)
  41.124 -        SAVE_ALL_NOSEGREGS(a)
  41.125 -        movl  %esp,%edx
  41.126 -        pushl %edx
  41.127 +        SAVE_ALL(1f,1f)
  41.128 +1:      movl  %esp,%eax
  41.129 +        pushl %eax
  41.130          call  do_early_page_fault
  41.131          addl  $4,%esp
  41.132          jmp   restore_all_xen
  41.133  
  41.134 -ENTRY(nmi)
  41.135 +handle_nmi_mce:
  41.136  #ifdef CONFIG_X86_SUPERVISOR_MODE_KERNEL
  41.137 -        # NMI entry protocol is incompatible with guest kernel in ring 0.
  41.138 +        # NMI/MCE entry protocol is incompatible with guest kernel in ring 0.
  41.139 +        addl  $4,%esp
  41.140          iret
  41.141  #else
  41.142          # Save state but do not trash the segment registers!
  41.143 -        # We may otherwise be unable to reload them or copy them to ring 1. 
  41.144 -        pushl %eax
  41.145 -        SAVE_ALL_NOSEGREGS(a)
  41.146 -
  41.147 -        # We can only process the NMI if:
  41.148 -        #  A. We are the outermost Xen activation (in which case we have
  41.149 -        #     the selectors safely saved on our stack)
  41.150 -        #  B. DS and ES contain sane Xen values.
  41.151 -        # In all other cases we bail without touching DS-GS, as we have
  41.152 -        # interrupted an enclosing Xen activation in tricky prologue or
  41.153 -        # epilogue code.
  41.154 -        movl  UREGS_eflags(%esp),%eax
  41.155 -        movb  UREGS_cs(%esp),%al
  41.156 -        testl $(3|X86_EFLAGS_VM),%eax
  41.157 -        jnz   continue_nmi
  41.158 -        movl  %ds,%eax
  41.159 -        cmpw  $(__HYPERVISOR_DS),%ax
  41.160 -        jne   defer_nmi
  41.161 -        movl  %es,%eax
  41.162 -        cmpw  $(__HYPERVISOR_DS),%ax
  41.163 -        jne   defer_nmi
  41.164 -
  41.165 -continue_nmi:
  41.166 -        SET_XEN_SEGMENTS(d)
  41.167 +        SAVE_ALL(.Lnmi_mce_xen,.Lnmi_mce_common)
  41.168 +.Lnmi_mce_common:
  41.169 +        xorl  %eax,%eax
  41.170 +        movw  UREGS_entry_vector(%esp),%ax
  41.171          movl  %esp,%edx
  41.172          pushl %edx
  41.173 -        call  do_nmi
  41.174 +        call  *exception_table(,%eax,4)
  41.175          addl  $4,%esp
  41.176 +        /* 
  41.177 +         * NB. We may return to Xen context with polluted %ds/%es. But in such
  41.178 +         * cases we have put guest DS/ES on the guest stack frame, which will
  41.179 +         * be detected by SAVE_ALL(), or we have rolled back restore_guest.
  41.180 +         */
  41.181          jmp   ret_from_intr
  41.182 -
  41.183 -defer_nmi:
  41.184 -        movl  $FIXMAP_apic_base,%eax
  41.185 -        # apic_wait_icr_idle()
  41.186 -1:      movl  %ss:APIC_ICR(%eax),%ebx
  41.187 -        testl $APIC_ICR_BUSY,%ebx
  41.188 -        jnz   1b
  41.189 -        # __send_IPI_shortcut(APIC_DEST_SELF, TRAP_deferred_nmi)
  41.190 -        movl  $(APIC_DM_FIXED | APIC_DEST_SELF | APIC_DEST_PHYSICAL | \
  41.191 -                TRAP_deferred_nmi),%ss:APIC_ICR(%eax)
  41.192 -        jmp   restore_all_xen
  41.193 +.Lnmi_mce_xen:
  41.194 +        /* Check the outer (guest) context for %ds/%es state validity. */
  41.195 +        GET_GUEST_REGS(%ebx)
  41.196 +        testl $X86_EFLAGS_VM,%ss:UREGS_eflags(%ebx)
  41.197 +        mov   %ds,%eax
  41.198 +        mov   %es,%edx
  41.199 +        jnz   .Lnmi_mce_vm86
  41.200 +        /* We may have interrupted Xen while messing with %ds/%es... */
  41.201 +        cmpw  %ax,%cx
  41.202 +        mov   %ecx,%ds             /* Ensure %ds is valid */
  41.203 +        cmove UREGS_ds(%ebx),%eax  /* Grab guest DS if it wasn't in %ds */
  41.204 +        cmpw  %dx,%cx
  41.205 +        movl  %eax,UREGS_ds(%ebx)  /* Ensure guest frame contains guest DS */
  41.206 +        cmove UREGS_es(%ebx),%edx  /* Grab guest ES if it wasn't in %es */
  41.207 +        mov   %ecx,%es             /* Ensure %es is valid */
  41.208 +        movl  $.Lrestore_sregs_guest,%ecx
  41.209 +        movl  %edx,UREGS_es(%ebx)  /* Ensure guest frame contains guest ES */
  41.210 +        cmpl  %ecx,UREGS_eip(%esp)
  41.211 +        jbe   .Lnmi_mce_common
  41.212 +        cmpl  $.Lrestore_iret_guest,UREGS_eip(%esp)
  41.213 +        ja    .Lnmi_mce_common
  41.214 +        /* Roll outer context restore_guest back to restoring %ds/%es. */
  41.215 +        movl  %ecx,UREGS_eip(%esp)
  41.216 +        jmp   .Lnmi_mce_common
  41.217 +.Lnmi_mce_vm86:
  41.218 +        /* vm86 is easy: the CPU saved %ds/%es so we can safely stomp them. */
  41.219 +        mov   %ecx,%ds
  41.220 +        mov   %ecx,%es
  41.221 +        jmp   .Lnmi_mce_common
  41.222  #endif /* !CONFIG_X86_SUPERVISOR_MODE_KERNEL */
  41.223  
  41.224 +ENTRY(nmi)
  41.225 +        pushl $TRAP_nmi<<16
  41.226 +        jmp   handle_nmi_mce
  41.227 +
  41.228 +ENTRY(machine_check)
  41.229 +        pushl $TRAP_machine_check<<16
  41.230 +        jmp   handle_nmi_mce
  41.231 +
  41.232  ENTRY(setup_vm86_frame)
  41.233 +        mov %ecx,%ds
  41.234 +        mov %ecx,%es
  41.235          # Copies the entire stack frame forwards by 16 bytes.
  41.236          .macro copy_vm86_words count=18
  41.237          .if \count
  41.238 @@ -615,7 +618,7 @@ ENTRY(setup_vm86_frame)
  41.239  ENTRY(exception_table)
  41.240          .long do_divide_error
  41.241          .long do_debug
  41.242 -        .long 0 # nmi
  41.243 +        .long do_nmi
  41.244          .long do_int3
  41.245          .long do_overflow
  41.246          .long do_bounds
    42.1 --- a/xen/arch/x86/x86_32/supervisor_mode_kernel.S	Mon Jul 02 10:31:03 2007 -0600
    42.2 +++ b/xen/arch/x86/x86_32/supervisor_mode_kernel.S	Mon Jul 02 12:19:26 2007 -0600
    42.3 @@ -20,40 +20,45 @@
    42.4  #include <asm/asm_defns.h>
    42.5  #include <public/xen.h>
    42.6  
    42.7 +#define guestreg(field) ((field)-UREGS_eip+36)
    42.8 +
    42.9          # Upon entry the stack should be the Xen stack and contain:
   42.10 -        #   %ss, %esp, EFLAGS, %cs|1, %eip, ERROR, SAVE_ALL, RETURN
   42.11 +        #   %ss, %esp, EFLAGS, %cs|1, %eip, RETURN
   42.12          # On exit the stack should be %ss:%esp (i.e. the guest stack)
   42.13          # and contain:
   42.14 -        #   EFLAGS, %cs, %eip, ERROR, SAVE_ALL, RETURN
   42.15 +        #   EFLAGS, %cs, %eip, RETURN
   42.16          ALIGN
   42.17  ENTRY(restore_ring0_guest)
   42.18 +        pusha
   42.19 +
   42.20          # Point %gs:%esi to guest stack.
   42.21 -RRG0:   movw UREGS_ss+4(%esp),%gs
   42.22 -        movl UREGS_esp+4(%esp),%esi
   42.23 +RRG0:   movw guestreg(UREGS_ss)(%esp),%gs
   42.24 +        movl guestreg(UREGS_esp)(%esp),%esi
   42.25  
   42.26 -        # Copy EFLAGS...EBX, RETURN from Xen stack to guest stack.
   42.27 -        movl $(UREGS_kernel_sizeof>>2)+1,%ecx
   42.28 +        # Copy EFLAGS, %cs, %eip, RETURN, PUSHA from Xen stack to guest stack.
   42.29 +        movl $12,%ecx /* 12 32-bit values */
   42.30  
   42.31  1:      subl $4,%esi
   42.32          movl -4(%esp,%ecx,4),%eax
   42.33  RRG1:   movl %eax,%gs:(%esi)
   42.34          loop 1b
   42.35  
   42.36 -RRG2:   andl $~3,%gs:UREGS_cs+4(%esi)
   42.37 +RRG2:   andl $~3,%gs:guestreg(UREGS_cs)(%esi)
   42.38  
   42.39          movl %gs,%eax
   42.40  
   42.41          # We need to do this because these registers are not present
   42.42          # on the guest stack so they cannot be restored by the code in
   42.43          # restore_all_guest.
   42.44 -RRG3:   mov  UREGS_ds+4(%esp),%ds
   42.45 -RRG4:   mov  UREGS_es+4(%esp),%es
   42.46 -RRG5:   mov  UREGS_fs+4(%esp),%fs
   42.47 -RRG6:   mov  UREGS_gs+4(%esp),%gs
   42.48 +RRG3:   mov  guestreg(UREGS_ds)(%esp),%ds
   42.49 +RRG4:   mov  guestreg(UREGS_es)(%esp),%es
   42.50 +RRG5:   mov  guestreg(UREGS_fs)(%esp),%fs
   42.51 +RRG6:   mov  guestreg(UREGS_gs)(%esp),%gs
   42.52  
   42.53  RRG7:   movl %eax,%ss
   42.54          movl %esi,%esp
   42.55  
   42.56 +        popa
   42.57          ret
   42.58  .section __ex_table,"a"
   42.59          .long RRG0,domain_crash_synchronous
    43.1 --- a/xen/arch/x86/x86_32/traps.c	Mon Jul 02 10:31:03 2007 -0600
    43.2 +++ b/xen/arch/x86/x86_32/traps.c	Mon Jul 02 12:19:26 2007 -0600
    43.3 @@ -232,15 +232,6 @@ unsigned long do_iret(void)
    43.4      return 0;
    43.5  }
    43.6  
    43.7 -#include <asm/asm_defns.h>
    43.8 -BUILD_SMP_INTERRUPT(deferred_nmi, TRAP_deferred_nmi)
    43.9 -fastcall void smp_deferred_nmi(struct cpu_user_regs *regs)
   43.10 -{
   43.11 -    asmlinkage void do_nmi(struct cpu_user_regs *);
   43.12 -    ack_APIC_irq();
   43.13 -    do_nmi(regs);
   43.14 -}
   43.15 -
   43.16  void __init percpu_traps_init(void)
   43.17  {
   43.18      struct tss_struct *tss = &doublefault_tss;
   43.19 @@ -252,8 +243,6 @@ void __init percpu_traps_init(void)
   43.20      /* The hypercall entry vector is only accessible from ring 1. */
   43.21      _set_gate(idt_table+HYPERCALL_VECTOR, 14, 1, &hypercall);
   43.22  
   43.23 -    set_intr_gate(TRAP_deferred_nmi, &deferred_nmi);
   43.24 -
   43.25      /*
   43.26       * Make a separate task for double faults. This will get us debug output if
   43.27       * we blow the kernel stack.
    44.1 --- a/xen/arch/x86/x86_64/Makefile	Mon Jul 02 10:31:03 2007 -0600
    44.2 +++ b/xen/arch/x86/x86_64/Makefile	Mon Jul 02 12:19:26 2007 -0600
    44.3 @@ -1,12 +1,12 @@
    44.4  subdir-y += compat
    44.5  
    44.6  obj-y += entry.o
    44.7 -obj-y += compat_kexec.o
    44.8  obj-y += gpr_switch.o
    44.9  obj-y += mm.o
   44.10  obj-y += traps.o
   44.11  
   44.12  obj-$(CONFIG_COMPAT) += compat.o
   44.13 +obj-$(CONFIG_COMPAT) += compat_kexec.o
   44.14  obj-$(CONFIG_COMPAT) += domain.o
   44.15  obj-$(CONFIG_COMPAT) += physdev.o
   44.16  obj-$(CONFIG_COMPAT) += platform_hypercall.o
    45.1 --- a/xen/arch/x86/x86_64/compat/entry.S	Mon Jul 02 10:31:03 2007 -0600
    45.2 +++ b/xen/arch/x86/x86_64/compat/entry.S	Mon Jul 02 12:19:26 2007 -0600
    45.3 @@ -143,12 +143,12 @@ compat_restore_all_guest:
    45.4  .Lft0:  iretq
    45.5  
    45.6  .section .fixup,"ax"
    45.7 -.Lfx0:  popq  -15*8-8(%rsp)            # error_code/entry_vector
    45.8 -        SAVE_ALL                       # 15*8 bytes pushed
    45.9 -        movq  -8(%rsp),%rsi            # error_code/entry_vector
   45.10 -        sti                            # after stack abuse (-1024(%rsp))
   45.11 +.Lfx0:  sti
   45.12 +        SAVE_ALL
   45.13 +        movq  UREGS_error_code(%rsp),%rsi
   45.14 +        movq  %rsp,%rax
   45.15 +        andq  $~0xf,%rsp
   45.16          pushq $__HYPERVISOR_DS         # SS
   45.17 -        leaq  8(%rsp),%rax
   45.18          pushq %rax                     # RSP
   45.19          pushfq                         # RFLAGS
   45.20          pushq $__HYPERVISOR_CS         # CS
    46.1 --- a/xen/arch/x86/x86_64/entry.S	Mon Jul 02 10:31:03 2007 -0600
    46.2 +++ b/xen/arch/x86/x86_64/entry.S	Mon Jul 02 12:19:26 2007 -0600
    46.3 @@ -57,23 +57,23 @@ 1:      sysretl
    46.4  /* No special register assumptions. */
    46.5  iret_exit_to_guest:
    46.6          addq  $8,%rsp
    46.7 -.Lft1:  iretq
    46.8 +.Lft0:  iretq
    46.9  
   46.10  .section .fixup,"ax"
   46.11 -.Lfx1:  popq  -15*8-8(%rsp)            # error_code/entry_vector
   46.12 -        SAVE_ALL                       # 15*8 bytes pushed
   46.13 -        movq  -8(%rsp),%rsi            # error_code/entry_vector
   46.14 -        sti                            # after stack abuse (-1024(%rsp))
   46.15 +.Lfx0:  sti
   46.16 +        SAVE_ALL
   46.17 +        movq  UREGS_error_code(%rsp),%rsi
   46.18 +        movq  %rsp,%rax
   46.19 +        andq  $~0xf,%rsp
   46.20          pushq $__HYPERVISOR_DS         # SS
   46.21 -        leaq  8(%rsp),%rax
   46.22          pushq %rax                     # RSP
   46.23 -        pushf                          # RFLAGS
   46.24 +        pushfq                         # RFLAGS
   46.25          pushq $__HYPERVISOR_CS         # CS
   46.26 -        leaq  .Ldf1(%rip),%rax
   46.27 +        leaq  .Ldf0(%rip),%rax
   46.28          pushq %rax                     # RIP
   46.29          pushq %rsi                     # error_code/entry_vector
   46.30          jmp   handle_exception
   46.31 -.Ldf1:  GET_CURRENT(%rbx)
   46.32 +.Ldf0:  GET_CURRENT(%rbx)
   46.33          jmp   test_all_events
   46.34  failsafe_callback:
   46.35          GET_CURRENT(%rbx)
   46.36 @@ -88,10 +88,10 @@ 1:      call  create_bounce_frame
   46.37          jmp   test_all_events
   46.38  .previous
   46.39  .section __pre_ex_table,"a"
   46.40 -        .quad .Lft1,.Lfx1
   46.41 +        .quad .Lft0,.Lfx0
   46.42  .previous
   46.43  .section __ex_table,"a"
   46.44 -        .quad .Ldf1,failsafe_callback
   46.45 +        .quad .Ldf0,failsafe_callback
   46.46  .previous
   46.47  
   46.48          ALIGN
   46.49 @@ -505,11 +505,6 @@ ENTRY(page_fault)
   46.50          movl  $TRAP_page_fault,4(%rsp)
   46.51          jmp   handle_exception
   46.52  
   46.53 -ENTRY(machine_check)
   46.54 -        pushq $0
   46.55 -        movl  $TRAP_machine_check,4(%rsp)
   46.56 -        jmp   handle_exception
   46.57 -
   46.58  ENTRY(spurious_interrupt_bug)
   46.59          pushq $0
   46.60          movl  $TRAP_spurious_int,4(%rsp)
   46.61 @@ -527,31 +522,38 @@ ENTRY(early_page_fault)
   46.62          call  do_early_page_fault
   46.63          jmp   restore_all_xen
   46.64  
   46.65 -ENTRY(nmi)
   46.66 -        pushq $0
   46.67 +handle_ist_exception:
   46.68          SAVE_ALL
   46.69          testb $3,UREGS_cs(%rsp)
   46.70 -        jz    nmi_in_hypervisor_mode
   46.71 +        jz    1f
   46.72          /* Interrupted guest context. Copy the context to stack bottom. */
   46.73 -        GET_GUEST_REGS(%rbx)
   46.74 +        GET_GUEST_REGS(%rdi)
   46.75 +        movq  %rsp,%rsi
   46.76          movl  $UREGS_kernel_sizeof/8,%ecx
   46.77 -1:      popq  %rax
   46.78 -        movq  %rax,(%rbx)
   46.79 -        addq  $8,%rbx
   46.80 -        loop  1b
   46.81 -        subq  $UREGS_kernel_sizeof,%rbx
   46.82 -        movq  %rbx,%rsp
   46.83 -nmi_in_hypervisor_mode:
   46.84 -        movq  %rsp,%rdi
   46.85 -        call  do_nmi
   46.86 +        movq  %rdi,%rsp
   46.87 +        rep   movsq
   46.88 +1:      movq  %rsp,%rdi
   46.89 +        movl  UREGS_entry_vector(%rsp),%eax
   46.90 +        leaq  exception_table(%rip),%rdx
   46.91 +        callq *(%rdx,%rax,8)
   46.92          jmp   ret_from_intr
   46.93  
   46.94 +ENTRY(nmi)
   46.95 +        pushq $0
   46.96 +        movl  $TRAP_nmi,4(%rsp)
   46.97 +        jmp   handle_ist_exception
   46.98 +
   46.99 +ENTRY(machine_check)
  46.100 +        pushq $0
  46.101 +        movl  $TRAP_machine_check,4(%rsp)
  46.102 +        jmp   handle_ist_exception
  46.103 +
  46.104  .data
  46.105  
  46.106  ENTRY(exception_table)
  46.107          .quad do_divide_error
  46.108          .quad do_debug
  46.109 -        .quad 0 # nmi
  46.110 +        .quad do_nmi
  46.111          .quad do_int3
  46.112          .quad do_overflow
  46.113          .quad do_bounds
    47.1 --- a/xen/arch/x86/x86_64/mm.c	Mon Jul 02 10:31:03 2007 -0600
    47.2 +++ b/xen/arch/x86/x86_64/mm.c	Mon Jul 02 12:19:26 2007 -0600
    47.3 @@ -106,7 +106,8 @@ void __init paging_init(void)
    47.4      /* Create user-accessible L2 directory to map the MPT for guests. */
    47.5      if ( (l2_pg = alloc_domheap_page(NULL)) == NULL )
    47.6          goto nomem;
    47.7 -    l3_ro_mpt = clear_page(page_to_virt(l2_pg));
    47.8 +    l3_ro_mpt = page_to_virt(l2_pg);
    47.9 +    clear_page(l3_ro_mpt);
   47.10      l4e_write(&idle_pg_table[l4_table_offset(RO_MPT_VIRT_START)],
   47.11                l4e_from_page(l2_pg, __PAGE_HYPERVISOR | _PAGE_USER));
   47.12  
   47.13 @@ -132,7 +133,8 @@ void __init paging_init(void)
   47.14              if ( (l2_pg = alloc_domheap_page(NULL)) == NULL )
   47.15                  goto nomem;
   47.16              va = RO_MPT_VIRT_START + (i << L2_PAGETABLE_SHIFT);
   47.17 -            l2_ro_mpt = clear_page(page_to_virt(l2_pg));
   47.18 +            l2_ro_mpt = page_to_virt(l2_pg);
   47.19 +            clear_page(l2_ro_mpt);
   47.20              l3e_write(&l3_ro_mpt[l3_table_offset(va)],
   47.21                        l3e_from_page(l2_pg, __PAGE_HYPERVISOR | _PAGE_USER));
   47.22              l2_ro_mpt += l2_table_offset(va);
   47.23 @@ -152,7 +154,8 @@ void __init paging_init(void)
   47.24          l3_ro_mpt = l4e_to_l3e(idle_pg_table[l4_table_offset(HIRO_COMPAT_MPT_VIRT_START)]);
   47.25          if ( (l2_pg = alloc_domheap_page(NULL)) == NULL )
   47.26              goto nomem;
   47.27 -        compat_idle_pg_table_l2 = l2_ro_mpt = clear_page(page_to_virt(l2_pg));
   47.28 +        compat_idle_pg_table_l2 = l2_ro_mpt = page_to_virt(l2_pg);
   47.29 +        clear_page(l2_ro_mpt);
   47.30          l3e_write(&l3_ro_mpt[l3_table_offset(HIRO_COMPAT_MPT_VIRT_START)],
   47.31                    l3e_from_page(l2_pg, __PAGE_HYPERVISOR));
   47.32          l2_ro_mpt += l2_table_offset(HIRO_COMPAT_MPT_VIRT_START);
    48.1 --- a/xen/arch/x86/x86_64/traps.c	Mon Jul 02 10:31:03 2007 -0600
    48.2 +++ b/xen/arch/x86/x86_64/traps.c	Mon Jul 02 12:19:26 2007 -0600
    48.3 @@ -294,8 +294,9 @@ void __init percpu_traps_init(void)
    48.4      {
    48.5          /* Specify dedicated interrupt stacks for NMIs and double faults. */
    48.6          set_intr_gate(TRAP_double_fault, &double_fault);
    48.7 -        idt_table[TRAP_double_fault].a |= 1UL << 32; /* IST1 */
    48.8 -        idt_table[TRAP_nmi].a          |= 2UL << 32; /* IST2 */
    48.9 +        idt_table[TRAP_double_fault].a  |= 1UL << 32; /* IST1 */
   48.10 +        idt_table[TRAP_nmi].a           |= 2UL << 32; /* IST2 */
   48.11 +        idt_table[TRAP_machine_check].a |= 3UL << 32; /* IST3 */
   48.12  
   48.13          /*
   48.14           * The 32-on-64 hypercall entry vector is only accessible from ring 1.
   48.15 @@ -310,7 +311,10 @@ void __init percpu_traps_init(void)
   48.16      stack_bottom = (char *)get_stack_bottom();
   48.17      stack        = (char *)((unsigned long)stack_bottom & ~(STACK_SIZE - 1));
   48.18  
   48.19 -    /* Double-fault handler has its own per-CPU 2kB stack. */
   48.20 +    /* Machine Check handler has its own per-CPU 1kB stack. */
   48.21 +    init_tss[cpu].ist[2] = (unsigned long)&stack[1024];
   48.22 +
   48.23 +    /* Double-fault handler has its own per-CPU 1kB stack. */
   48.24      init_tss[cpu].ist[0] = (unsigned long)&stack[2048];
   48.25  
   48.26      /* NMI handler has its own per-CPU 1kB stack. */
    49.1 --- a/xen/arch/x86/x86_emulate.c	Mon Jul 02 10:31:03 2007 -0600
    49.2 +++ b/xen/arch/x86/x86_emulate.c	Mon Jul 02 12:19:26 2007 -0600
    49.3 @@ -1630,6 +1630,7 @@ x86_emulate(
    49.4      }
    49.5  
    49.6      /* Commit shadow register state. */
    49.7 +    _regs.eflags &= ~EF_RF;
    49.8      *ctxt->regs = _regs;
    49.9  
   49.10   done:
    50.1 --- a/xen/common/sysctl.c	Mon Jul 02 10:31:03 2007 -0600
    50.2 +++ b/xen/common/sysctl.c	Mon Jul 02 12:19:26 2007 -0600
    50.3 @@ -136,6 +136,39 @@ long do_sysctl(XEN_GUEST_HANDLE(xen_sysc
    50.4      }
    50.5      break;
    50.6  
    50.7 +    case XEN_SYSCTL_getcpuinfo:
    50.8 +    {
    50.9 +        uint32_t i, nr_cpus;
   50.10 +        struct xen_sysctl_cpuinfo cpuinfo;
   50.11 +        struct vcpu *v;
   50.12 +
   50.13 +        nr_cpus = min_t(uint32_t, op->u.getcpuinfo.max_cpus, NR_CPUS);
   50.14 +
   50.15 +        for ( i = 0; i < nr_cpus; i++ )
   50.16 +        {
   50.17 +            /* Assume no holes in idle-vcpu map. */
   50.18 +            if ( (v = idle_vcpu[i]) == NULL )
   50.19 +                break;
   50.20 +
   50.21 +            cpuinfo.idletime = v->runstate.time[RUNSTATE_running];
   50.22 +            if ( v->is_running )
   50.23 +                cpuinfo.idletime += NOW() - v->runstate.state_entry_time;
   50.24 +
   50.25 +            if ( copy_to_guest_offset(op->u.getcpuinfo.info, i, &cpuinfo, 1) )
   50.26 +            {
   50.27 +                ret = -EFAULT;
   50.28 +                break;
   50.29 +            }
   50.30 +        }
   50.31 +
   50.32 +        op->u.getcpuinfo.nr_cpus = i;
   50.33 +        ret = 0;
   50.34 +
   50.35 +        if ( copy_to_guest(u_sysctl, op, 1) )
   50.36 +            ret = -EFAULT;
   50.37 +    }
   50.38 +    break;
   50.39 +
   50.40      default:
   50.41          ret = arch_do_sysctl(op, u_sysctl);
   50.42          break;
    51.1 --- a/xen/include/asm-x86/edd.h	Mon Jul 02 10:31:03 2007 -0600
    51.2 +++ b/xen/include/asm-x86/edd.h	Mon Jul 02 12:19:26 2007 -0600
    51.3 @@ -32,12 +32,22 @@ struct edd_info {
    51.4      u16 legacy_max_cylinder;     /* %cl[7:6]:%ch: maximum cylinder number */
    51.5      u8 legacy_max_head;          /* %dh: maximum head number */
    51.6      u8 legacy_sectors_per_track; /* %cl[5:0]: maximum sector number */
    51.7 -    /* Int13, Fn41: Get Device Parameters */
    51.8 -    u8 edd_device_params[74];    /* as filled into %ds:%si */
    51.9 +    /* Int13, Fn41: Get Device Parameters (as filled into %ds:%esi). */
   51.10 +    struct {
   51.11 +        u16 length;
   51.12 +        u8 data[72];
   51.13 +    } edd_device_params;
   51.14  } __attribute__ ((packed));
   51.15  
   51.16 -extern u32 boot_edd_signature[];
   51.17 -extern u8 boot_edd_signature_nr;
   51.18 +struct mbr_signature {
   51.19 +    u8 device;
   51.20 +    u8 pad[3];
   51.21 +    u32 signature;
   51.22 +} __attribute__ ((packed));
   51.23 +
   51.24 +/* These all reside in the boot trampoline. Access via bootsym(). */
   51.25 +extern struct mbr_signature boot_mbr_signature[];
   51.26 +extern u8 boot_mbr_signature_nr;
   51.27  extern struct edd_info boot_edd_info[];
   51.28  extern u8 boot_edd_info_nr;
   51.29  
    52.1 --- a/xen/include/asm-x86/hvm/hvm.h	Mon Jul 02 10:31:03 2007 -0600
    52.2 +++ b/xen/include/asm-x86/hvm/hvm.h	Mon Jul 02 12:19:26 2007 -0600
    52.3 @@ -302,4 +302,18 @@ static inline int hvm_event_injection_fa
    52.4      return hvm_funcs.event_injection_faulted(v);
    52.5  }
    52.6  
    52.7 +/* These bits in CR4 are owned by the host. */
    52.8 +#define HVM_CR4_HOST_MASK (mmu_cr4_features & \
    52.9 +    (X86_CR4_VMXE | X86_CR4_PAE | X86_CR4_MCE))
   52.10 +
   52.11 +/* These bits in CR4 cannot be set by the guest. */
   52.12 +#define HVM_CR4_GUEST_RESERVED_BITS \
   52.13 +    ~(X86_CR4_VME | X86_CR4_PVI | X86_CR4_TSD | \
   52.14 +      X86_CR4_DE  | X86_CR4_PSE | X86_CR4_PAE | \
   52.15 +      X86_CR4_MCE | X86_CR4_PGE | X86_CR4_PCE | \
   52.16 +      X86_CR4_OSFXSR | X86_CR4_OSXMMEXCPT)
   52.17 +
   52.18 +/* These exceptions must always be intercepted. */
   52.19 +#define HVM_TRAP_MASK (1U << TRAP_machine_check)
   52.20 +
   52.21  #endif /* __ASM_X86_HVM_HVM_H__ */
    53.1 --- a/xen/include/asm-x86/hvm/svm/emulate.h	Mon Jul 02 10:31:03 2007 -0600
    53.2 +++ b/xen/include/asm-x86/hvm/svm/emulate.h	Mon Jul 02 12:19:26 2007 -0600
    53.3 @@ -138,6 +138,7 @@ static void inline __update_guest_eip(
    53.4  {
    53.5      ASSERT(inst_len > 0);
    53.6      vmcb->rip += inst_len;
    53.7 +    vmcb->rflags &= ~X86_EFLAGS_RF;
    53.8  }
    53.9  
   53.10  #endif /* __ASM_X86_HVM_SVM_EMULATE_H__ */
    54.1 --- a/xen/include/asm-x86/hvm/svm/vmcb.h	Mon Jul 02 10:31:03 2007 -0600
    54.2 +++ b/xen/include/asm-x86/hvm/svm/vmcb.h	Mon Jul 02 12:19:26 2007 -0600
    54.3 @@ -464,14 +464,6 @@ void svm_destroy_vmcb(struct vcpu *v);
    54.4  
    54.5  void setup_vmcb_dump(void);
    54.6  
    54.7 -/* These bits in the CR4 are owned by the host */
    54.8 -#if CONFIG_PAGING_LEVELS >= 3
    54.9 -#define SVM_CR4_HOST_MASK (X86_CR4_PAE)
   54.10 -#else
   54.11 -#define SVM_CR4_HOST_MASK 0
   54.12 -#endif
   54.13 -
   54.14 -
   54.15  #endif /* ASM_X86_HVM_SVM_VMCS_H__ */
   54.16  
   54.17  /*
    55.1 --- a/xen/include/asm-x86/hvm/trace.h	Mon Jul 02 10:31:03 2007 -0600
    55.2 +++ b/xen/include/asm-x86/hvm/trace.h	Mon Jul 02 12:19:26 2007 -0600
    55.3 @@ -21,6 +21,7 @@
    55.4  #define DO_TRC_HVM_CPUID       1
    55.5  #define DO_TRC_HVM_INTR        1
    55.6  #define DO_TRC_HVM_NMI         1
    55.7 +#define DO_TRC_HVM_MCE         1
    55.8  #define DO_TRC_HVM_SMI         1
    55.9  #define DO_TRC_HVM_VMMCALL     1
   55.10  #define DO_TRC_HVM_HLT         1
    56.1 --- a/xen/include/asm-x86/hvm/vmx/vmcs.h	Mon Jul 02 10:31:03 2007 -0600
    56.2 +++ b/xen/include/asm-x86/hvm/vmx/vmcs.h	Mon Jul 02 12:19:26 2007 -0600
    56.3 @@ -131,6 +131,8 @@ extern u32 vmx_vmentry_control;
    56.4  #define SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES 0x00000001
    56.5  extern u32 vmx_secondary_exec_control;
    56.6  
    56.7 +extern bool_t cpu_has_vmx_ins_outs_instr_info;
    56.8 +
    56.9  #define cpu_has_vmx_virtualize_apic_accesses \
   56.10      (vmx_secondary_exec_control & SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)
   56.11  #define cpu_has_vmx_tpr_shadow \
    57.1 --- a/xen/include/asm-x86/hvm/vmx/vmx.h	Mon Jul 02 10:31:03 2007 -0600
    57.2 +++ b/xen/include/asm-x86/hvm/vmx/vmx.h	Mon Jul 02 12:19:26 2007 -0600
    57.3 @@ -143,13 +143,6 @@ void vmx_vlapic_msr_changed(struct vcpu 
    57.4  #define X86_SEG_AR_GRANULARITY  (1u << 15) /* 15, granularity */
    57.5  #define X86_SEG_AR_SEG_UNUSABLE (1u << 16) /* 16, segment unusable */
    57.6  
    57.7 -/* These bits in the CR4 are owned by the host */
    57.8 -#if CONFIG_PAGING_LEVELS >= 3
    57.9 -#define VMX_CR4_HOST_MASK (X86_CR4_VMXE | X86_CR4_PAE)
   57.10 -#else
   57.11 -#define VMX_CR4_HOST_MASK (X86_CR4_VMXE)
   57.12 -#endif
   57.13 -
   57.14  #define VMCALL_OPCODE   ".byte 0x0f,0x01,0xc1\n"
   57.15  #define VMCLEAR_OPCODE  ".byte 0x66,0x0f,0xc7\n"        /* reg/opcode: /6 */
   57.16  #define VMLAUNCH_OPCODE ".byte 0x0f,0x01,0xc2\n"
    58.1 --- a/xen/include/asm-x86/page.h	Mon Jul 02 10:31:03 2007 -0600
    58.2 +++ b/xen/include/asm-x86/page.h	Mon Jul 02 12:19:26 2007 -0600
    58.3 @@ -192,8 +192,9 @@ static inline l4_pgentry_t l4e_from_padd
    58.4  #define pgentry_ptr_to_slot(_p)    \
    58.5      (((unsigned long)(_p) & ~PAGE_MASK) / sizeof(*(_p)))
    58.6  
    58.7 +#ifndef __ASSEMBLY__
    58.8 +
    58.9  /* Page-table type. */
   58.10 -#ifndef __ASSEMBLY__
   58.11  #if CONFIG_PAGING_LEVELS == 2
   58.12  /* x86_32 default */
   58.13  typedef struct { u32 pfn; } pagetable_t;
   58.14 @@ -214,9 +215,11 @@ typedef struct { u64 pfn; } pagetable_t;
   58.15  #define pagetable_from_page(pg) pagetable_from_pfn(page_to_mfn(pg))
   58.16  #define pagetable_from_paddr(p) pagetable_from_pfn((p)>>PAGE_SHIFT)
   58.17  #define pagetable_null()        pagetable_from_pfn(0)
   58.18 -#endif
   58.19  
   58.20 -#define clear_page(_p)      memset((void *)(_p), 0, PAGE_SIZE)
   58.21 +void clear_page_sse2(void *);
   58.22 +#define clear_page(_p)      (cpu_has_xmm2 ?                             \
   58.23 +                             clear_page_sse2((void *)(_p)) :            \
   58.24 +                             (void)memset((void *)(_p), 0, PAGE_SIZE))
   58.25  #define copy_page(_t,_f)    memcpy((void *)(_t), (void *)(_f), PAGE_SIZE)
   58.26  
   58.27  #define mfn_valid(mfn)      ((mfn) < max_page)
   58.28 @@ -245,6 +248,8 @@ typedef struct { u64 pfn; } pagetable_t;
   58.29  #define pfn_to_paddr(pfn)   ((paddr_t)(pfn) << PAGE_SHIFT)
   58.30  #define paddr_to_pfn(pa)    ((unsigned long)((pa) >> PAGE_SHIFT))
   58.31  
   58.32 +#endif /* !defined(__ASSEMBLY__) */
   58.33 +
   58.34  /* High table entries are reserved by the hypervisor. */
   58.35  #if defined(CONFIG_X86_32) && !defined(CONFIG_X86_PAE)
   58.36  #define DOMAIN_ENTRIES_PER_L2_PAGETABLE     \
    59.1 --- a/xen/include/asm-x86/processor.h	Mon Jul 02 10:31:03 2007 -0600
    59.2 +++ b/xen/include/asm-x86/processor.h	Mon Jul 02 12:19:26 2007 -0600
    59.3 @@ -104,7 +104,6 @@
    59.4  #define TRAP_alignment_check  17
    59.5  #define TRAP_machine_check    18
    59.6  #define TRAP_simd_error       19
    59.7 -#define TRAP_deferred_nmi     31
    59.8  
    59.9  /* Set for entry via SYSCALL. Informs return code to use SYSRETQ not IRETQ. */
   59.10  /* NB. Same as VGCF_in_syscall. No bits in common with any other TRAP_ defn. */
   59.11 @@ -567,7 +566,8 @@ void compat_show_guest_stack(struct cpu_
   59.12  extern void mtrr_ap_init(void);
   59.13  extern void mtrr_bp_init(void);
   59.14  
   59.15 -extern void mcheck_init(struct cpuinfo_x86 *c);
   59.16 +void mcheck_init(struct cpuinfo_x86 *c);
   59.17 +asmlinkage void do_machine_check(struct cpu_user_regs *regs);
   59.18  
   59.19  int cpuid_hypervisor_leaves(
   59.20      uint32_t idx, uint32_t *eax, uint32_t *ebx, uint32_t *ecx, uint32_t *edx);
    60.1 --- a/xen/include/asm-x86/x86_32/asm_defns.h	Mon Jul 02 10:31:03 2007 -0600
    60.2 +++ b/xen/include/asm-x86/x86_32/asm_defns.h	Mon Jul 02 12:19:26 2007 -0600
    60.3 @@ -26,7 +26,7 @@ 1:      addl  $4,%esp;
    60.4  #define ASSERT_INTERRUPTS_ENABLED  ASSERT_INTERRUPT_STATUS(nz)
    60.5  #define ASSERT_INTERRUPTS_DISABLED ASSERT_INTERRUPT_STATUS(z)
    60.6  
    60.7 -#define __SAVE_ALL_PRE                                  \
    60.8 +#define SAVE_ALL_GPRS                                   \
    60.9          cld;                                            \
   60.10          pushl %eax;                                     \
   60.11          pushl %ebp;                                     \
   60.12 @@ -35,32 +35,48 @@ 1:      addl  $4,%esp;
   60.13          pushl %esi;                                     \
   60.14          pushl %edx;                                     \
   60.15          pushl %ecx;                                     \
   60.16 -        pushl %ebx;                                     \
   60.17 -        testl $(X86_EFLAGS_VM),UREGS_eflags(%esp);      \
   60.18 -        jz 2f;                                          \
   60.19 -        call setup_vm86_frame;                          \
   60.20 -        jmp 3f;                                         \
   60.21 -        2:testb $3,UREGS_cs(%esp);                      \
   60.22 -        jz 1f;                                          \
   60.23 -        mov %ds,UREGS_ds(%esp);                         \
   60.24 -        mov %es,UREGS_es(%esp);                         \
   60.25 -        mov %fs,UREGS_fs(%esp);                         \
   60.26 -        mov %gs,UREGS_gs(%esp);                         \
   60.27 -        3:
   60.28 +        pushl %ebx
   60.29  
   60.30 -#define SAVE_ALL_NOSEGREGS(_reg)                \
   60.31 -        __SAVE_ALL_PRE                          \
   60.32 -        1:
   60.33 -
   60.34 -#define SET_XEN_SEGMENTS(_reg)                          \
   60.35 -        movl $(__HYPERVISOR_DS),%e ## _reg ## x;        \
   60.36 -        mov %e ## _reg ## x,%ds;                        \
   60.37 -        mov %e ## _reg ## x,%es;
   60.38 -
   60.39 -#define SAVE_ALL(_reg)                          \
   60.40 -        __SAVE_ALL_PRE                          \
   60.41 -        SET_XEN_SEGMENTS(_reg)                  \
   60.42 -        1:
   60.43 +/*
   60.44 + * Saves all register state into an exception/interrupt stack frame.
   60.45 + * Returns to the caller at <xen_lbl> if the interrupted context is within
   60.46 + * Xen; at <vm86_lbl> if the interrupted context is vm86; or falls through
   60.47 + * if the interrupted context is an ordinary guest protected-mode context.
   60.48 + * In all cases %ecx contains __HYPERVISOR_DS. %ds/%es are guaranteed to
   60.49 + * contain __HYPERVISOR_DS unless control passes to <xen_lbl>, in which case
   60.50 + * the caller is reponsible for validity of %ds/%es.
   60.51 + */
   60.52 +#define SAVE_ALL(xen_lbl, vm86_lbl)                     \
   60.53 +        SAVE_ALL_GPRS;                                  \
   60.54 +        testl $(X86_EFLAGS_VM),UREGS_eflags(%esp);      \
   60.55 +        mov   %ds,%edi;                                 \
   60.56 +        mov   %es,%esi;                                 \
   60.57 +        mov   $(__HYPERVISOR_DS),%ecx;                  \
   60.58 +        jnz   86f;                                      \
   60.59 +        .text 1;                                        \
   60.60 +        86:   call setup_vm86_frame;                    \
   60.61 +        jmp   vm86_lbl;                                 \
   60.62 +        .previous;                                      \
   60.63 +        testb $3,UREGS_cs(%esp);                        \
   60.64 +        jz    xen_lbl;                                  \
   60.65 +        /*                                              \
   60.66 +         * We are the outermost Xen context, but our    \
   60.67 +         * life is complicated by NMIs and MCEs. These  \
   60.68 +         * could occur in our critical section and      \
   60.69 +         * pollute %ds and %es. We have to detect that  \
   60.70 +         * this has occurred and avoid saving Xen DS/ES \
   60.71 +         * values to the guest stack frame.             \
   60.72 +         */                                             \
   60.73 +        cmpw  %cx,%di;                                  \
   60.74 +        mov   %ecx,%ds;                                 \
   60.75 +        mov   %fs,UREGS_fs(%esp);                       \
   60.76 +        cmove UREGS_ds(%esp),%edi;                      \
   60.77 +        cmpw  %cx,%si;                                  \
   60.78 +        mov   %edi,UREGS_ds(%esp);                      \
   60.79 +        cmove UREGS_es(%esp),%esi;                      \
   60.80 +        mov   %ecx,%es;                                 \
   60.81 +        mov   %gs,UREGS_gs(%esp);                       \
   60.82 +        mov   %esi,UREGS_es(%esp)
   60.83  
   60.84  #ifdef PERF_COUNTERS
   60.85  #define PERFC_INCR(_name,_idx,_cur)                     \
   60.86 @@ -97,8 +113,8 @@ asmlinkage void x(void);                
   60.87      STR(x) ":\n\t"                              \
   60.88      "pushl $"#v"<<16\n\t"                       \
   60.89      STR(FIXUP_RING0_GUEST_STACK)                \
   60.90 -    STR(SAVE_ALL(a))                            \
   60.91 -    "movl %esp,%eax\n\t"                        \
   60.92 +    STR(SAVE_ALL(1f,1f)) "\n\t"                 \
   60.93 +    "1:movl %esp,%eax\n\t"                      \
   60.94      "pushl %eax\n\t"                            \
   60.95      "call "STR(smp_##x)"\n\t"                   \
   60.96      "addl $4,%esp\n\t"                          \
   60.97 @@ -109,8 +125,8 @@ asmlinkage void x(void);                
   60.98      "\n" __ALIGN_STR"\n"                        \
   60.99      "common_interrupt:\n\t"                     \
  60.100      STR(FIXUP_RING0_GUEST_STACK)                \
  60.101 -    STR(SAVE_ALL(a))                            \
  60.102 -    "movl %esp,%eax\n\t"                        \
  60.103 +    STR(SAVE_ALL(1f,1f)) "\n\t"                 \
  60.104 +    "1:movl %esp,%eax\n\t"                      \
  60.105      "pushl %eax\n\t"                            \
  60.106      "call " STR(do_IRQ) "\n\t"                  \
  60.107      "addl $4,%esp\n\t"                          \
    61.1 --- a/xen/include/public/platform.h	Mon Jul 02 10:31:03 2007 -0600
    61.2 +++ b/xen/include/public/platform.h	Mon Jul 02 12:19:26 2007 -0600
    61.3 @@ -114,6 +114,45 @@ struct xenpf_platform_quirk {
    61.4  typedef struct xenpf_platform_quirk xenpf_platform_quirk_t;
    61.5  DEFINE_XEN_GUEST_HANDLE(xenpf_platform_quirk_t);
    61.6  
    61.7 +#define XENPF_firmware_info       50
    61.8 +#define XEN_FW_DISK_INFO          1 /* from int 13 AH=08/41/48 */
    61.9 +#define XEN_FW_DISK_MBR_SIGNATURE 2 /* from MBR offset 0x1b8 */
   61.10 +#define XEN_FW_VBEDDC_INFO        3 /* from int 10 AX=4f15 */
   61.11 +struct xenpf_firmware_info {
   61.12 +    /* IN variables. */
   61.13 +    uint32_t type;
   61.14 +    uint32_t index;
   61.15 +    /* OUT variables. */
   61.16 +    union {
   61.17 +        struct {
   61.18 +            /* Int13, Fn48: Check Extensions Present. */
   61.19 +            uint8_t device;                   /* %dl: bios device number */
   61.20 +            uint8_t version;                  /* %ah: major version      */
   61.21 +            uint16_t interface_support;       /* %cx: support bitmap     */
   61.22 +            /* Int13, Fn08: Legacy Get Device Parameters. */
   61.23 +            uint16_t legacy_max_cylinder;     /* %cl[7:6]:%ch: max cyl # */
   61.24 +            uint8_t legacy_max_head;          /* %dh: max head #         */
   61.25 +            uint8_t legacy_sectors_per_track; /* %cl[5:0]: max sector #  */
   61.26 +            /* Int13, Fn41: Get Device Parameters (as filled into %ds:%esi). */
   61.27 +            /* NB. First uint16_t of buffer must be set to buffer size.      */
   61.28 +            XEN_GUEST_HANDLE(void) edd_params;
   61.29 +        } disk_info; /* XEN_FW_DISK_INFO */
   61.30 +        struct {
   61.31 +            uint8_t device;                   /* bios device number  */
   61.32 +            uint32_t mbr_signature;           /* offset 0x1b8 in mbr */
   61.33 +        } disk_mbr_signature; /* XEN_FW_DISK_MBR_SIGNATURE */
   61.34 +        struct {
   61.35 +            /* Int10, AX=4F15: Get EDID info. */
   61.36 +            uint8_t capabilities;
   61.37 +            uint8_t edid_transfer_time;
   61.38 +            /* must refer to 128-byte buffer */
   61.39 +            XEN_GUEST_HANDLE(uint8_t) edid;
   61.40 +        } vbeddc_info; /* XEN_FW_VBEDDC_INFO */
   61.41 +    } u;
   61.42 +};
   61.43 +typedef struct xenpf_firmware_info xenpf_firmware_info_t;
   61.44 +DEFINE_XEN_GUEST_HANDLE(xenpf_firmware_info_t);
   61.45 +
   61.46  struct xen_platform_op {
   61.47      uint32_t cmd;
   61.48      uint32_t interface_version; /* XENPF_INTERFACE_VERSION */
   61.49 @@ -124,6 +163,7 @@ struct xen_platform_op {
   61.50          struct xenpf_read_memtype      read_memtype;
   61.51          struct xenpf_microcode_update  microcode;
   61.52          struct xenpf_platform_quirk    platform_quirk;
   61.53 +        struct xenpf_firmware_info     firmware_info;
   61.54          uint8_t                        pad[128];
   61.55      } u;
   61.56  };
    62.1 --- a/xen/include/public/sysctl.h	Mon Jul 02 10:31:03 2007 -0600
    62.2 +++ b/xen/include/public/sysctl.h	Mon Jul 02 12:19:26 2007 -0600
    62.3 @@ -140,9 +140,7 @@ struct xen_sysctl_getdomaininfolist {
    62.4  typedef struct xen_sysctl_getdomaininfolist xen_sysctl_getdomaininfolist_t;
    62.5  DEFINE_XEN_GUEST_HANDLE(xen_sysctl_getdomaininfolist_t);
    62.6  
    62.7 -/*
    62.8 - * Inject debug keys into Xen.
    62.9 - */
   62.10 +/* Inject debug keys into Xen. */
   62.11  #define XEN_SYSCTL_debug_keys        7
   62.12  struct xen_sysctl_debug_keys {
   62.13      /* IN variables. */
   62.14 @@ -152,6 +150,23 @@ struct xen_sysctl_debug_keys {
   62.15  typedef struct xen_sysctl_debug_keys xen_sysctl_debug_keys_t;
   62.16  DEFINE_XEN_GUEST_HANDLE(xen_sysctl_debug_keys_t);
   62.17  
   62.18 +/* Get physical CPU information. */
   62.19 +#define XEN_SYSCTL_getcpuinfo        8
   62.20 +struct xen_sysctl_cpuinfo {
   62.21 +    uint64_t idletime;
   62.22 +};
   62.23 +typedef struct xen_sysctl_cpuinfo xen_sysctl_cpuinfo_t;
   62.24 +DEFINE_XEN_GUEST_HANDLE(xen_sysctl_cpuinfo_t); 
   62.25 +struct xen_sysctl_getcpuinfo {
   62.26 +    /* IN variables. */
   62.27 +    uint32_t max_cpus;
   62.28 +    XEN_GUEST_HANDLE_64(xen_sysctl_cpuinfo_t) info;
   62.29 +    /* OUT variables. */
   62.30 +    uint32_t nr_cpus;
   62.31 +}; 
   62.32 +typedef struct xen_sysctl_getcpuinfo xen_sysctl_getcpuinfo_t;
   62.33 +DEFINE_XEN_GUEST_HANDLE(xen_sysctl_getcpuinfo_t); 
   62.34 +
   62.35  struct xen_sysctl {
   62.36      uint32_t cmd;
   62.37      uint32_t interface_version; /* XEN_SYSCTL_INTERFACE_VERSION */
   62.38 @@ -163,6 +178,7 @@ struct xen_sysctl {
   62.39          struct xen_sysctl_perfc_op          perfc_op;
   62.40          struct xen_sysctl_getdomaininfolist getdomaininfolist;
   62.41          struct xen_sysctl_debug_keys        debug_keys;
   62.42 +        struct xen_sysctl_getcpuinfo        getcpuinfo;
   62.43          uint8_t                             pad[128];
   62.44      } u;
   62.45  };
    63.1 --- a/xen/include/public/trace.h	Mon Jul 02 10:31:03 2007 -0600
    63.2 +++ b/xen/include/public/trace.h	Mon Jul 02 12:19:26 2007 -0600
    63.3 @@ -88,6 +88,7 @@
    63.4  #define TRC_HVM_VMMCALL         (TRC_HVM_HANDLER + 0x12)
    63.5  #define TRC_HVM_HLT             (TRC_HVM_HANDLER + 0x13)
    63.6  #define TRC_HVM_INVLPG          (TRC_HVM_HANDLER + 0x14)
    63.7 +#define TRC_HVM_MCE             (TRC_HVM_HANDLER + 0x15)
    63.8  
    63.9  /* This structure represents a single trace buffer record. */
   63.10  struct t_rec {