ia64/xen-unstable

changeset 8577:4b8919585039

Merged.
author emellor@leeni.uk.xensource.com
date Thu Jan 12 13:20:04 2006 +0100 (2006-01-12)
parents 642b26779c4e 821368442403
children b21261034a25
files
line diff
     1.1 --- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/entry.S	Thu Jan 12 13:13:34 2006 +0100
     1.2 +++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/entry.S	Thu Jan 12 13:20:04 2006 +0100
     1.3 @@ -76,7 +76,9 @@ IF_MASK		= 0x00000200
     1.4  DF_MASK		= 0x00000400 
     1.5  NT_MASK		= 0x00004000
     1.6  VM_MASK		= 0x00020000
     1.7 -
     1.8 +/* Pseudo-eflags. */
     1.9 +NMI_MASK	= 0x80000000
    1.10 +	
    1.11  /* Offsets into shared_info_t. */
    1.12  #define evtchn_upcall_pending		/* 0 */
    1.13  #define evtchn_upcall_mask		1
    1.14 @@ -305,8 +307,8 @@ restore_all:
    1.15  	je ldt_ss			# returning to user-space with LDT SS
    1.16  #endif /* XEN */
    1.17  restore_nocheck:
    1.18 -	testl $VM_MASK, EFLAGS(%esp)
    1.19 -	jnz resume_vm86
    1.20 +	testl $(VM_MASK|NMI_MASK), EFLAGS(%esp)
    1.21 +	jnz hypervisor_iret
    1.22  	movb EVENT_MASK(%esp), %al
    1.23  	notb %al			# %al == ~saved_mask
    1.24  	XEN_GET_VCPU_INFO(%esi)
    1.25 @@ -328,11 +330,11 @@ iret_exc:
    1.26  	.long 1b,iret_exc
    1.27  .previous
    1.28  
    1.29 -resume_vm86:
    1.30 -	XEN_UNBLOCK_EVENTS(%esi)
    1.31 +hypervisor_iret:
    1.32 +	andl $~NMI_MASK, EFLAGS(%esp)
    1.33  	RESTORE_REGS
    1.34  	movl %eax,(%esp)
    1.35 -	movl $__HYPERVISOR_switch_vm86,%eax
    1.36 +	movl $__HYPERVISOR_iret,%eax
    1.37  	int $0x82
    1.38  	ud2
    1.39  
    1.40 @@ -691,6 +693,15 @@ debug_stack_correct:
    1.41  	call do_debug
    1.42  	jmp ret_from_exception
    1.43  
    1.44 +ENTRY(nmi)
    1.45 +	pushl %eax
    1.46 +	SAVE_ALL
    1.47 +	xorl %edx,%edx		# zero error code
    1.48 +	movl %esp,%eax		# pt_regs pointer
    1.49 +	call do_nmi
    1.50 +	orl  $NMI_MASK, EFLAGS(%esp)
    1.51 +	jmp restore_all
    1.52 +
    1.53  #if 0 /* XEN */
    1.54  /*
    1.55   * NMI is doubly nasty. It can happen _while_ we're handling
     2.1 --- a/linux-2.6-xen-sparse/arch/xen/i386/kernel/traps.c	Thu Jan 12 13:13:34 2006 +0100
     2.2 +++ b/linux-2.6-xen-sparse/arch/xen/i386/kernel/traps.c	Thu Jan 12 13:20:04 2006 +0100
     2.3 @@ -506,18 +506,11 @@ static void mem_parity_error(unsigned ch
     2.4  
     2.5  static void io_check_error(unsigned char reason, struct pt_regs * regs)
     2.6  {
     2.7 -	unsigned long i;
     2.8 -
     2.9  	printk("NMI: IOCK error (debug interrupt?)\n");
    2.10  	show_registers(regs);
    2.11  
    2.12  	/* Re-enable the IOCK line, wait for a few seconds */
    2.13 -	reason = (reason & 0xf) | 8;
    2.14 -	outb(reason, 0x61);
    2.15 -	i = 2000;
    2.16 -	while (--i) udelay(1000);
    2.17 -	reason &= ~8;
    2.18 -	outb(reason, 0x61);
    2.19 +	clear_io_check_error(reason);
    2.20  }
    2.21  
    2.22  static void unknown_nmi_error(unsigned char reason, struct pt_regs * regs)
     3.1 --- a/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/entry.S	Thu Jan 12 13:13:34 2006 +0100
     3.2 +++ b/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/entry.S	Thu Jan 12 13:20:04 2006 +0100
     3.3 @@ -121,19 +121,19 @@
     3.4  	.endm
     3.5  
     3.6          /*
     3.7 -         * Must be consistent with the definition in arch_x86_64.h:    
     3.8 -         *     struct switch_to_user {
     3.9 +         * Must be consistent with the definition in arch-x86_64.h:    
    3.10 +         *     struct iret_context {
    3.11           *        u64 rax, r11, rcx, flags, rip, cs, rflags, rsp, ss;
    3.12           *     };
    3.13           * #define VGCF_IN_SYSCALL (1<<8) 
    3.14           */
    3.15 -        .macro SWITCH_TO_USER flag
    3.16 +        .macro HYPERVISOR_IRET flag
    3.17          subq $8*4,%rsp                   # reuse rip, cs, rflags, rsp, ss in the stack
    3.18          movq %rax,(%rsp)
    3.19          movq %r11,1*8(%rsp)
    3.20          movq %rcx,2*8(%rsp)              # we saved %rcx upon exceptions
    3.21          movq $\flag,3*8(%rsp)
    3.22 -        movq $__HYPERVISOR_switch_to_user,%rax
    3.23 +        movq $__HYPERVISOR_iret,%rax
    3.24          syscall
    3.25          .endm
    3.26  
    3.27 @@ -225,7 +225,7 @@ sysret_check:
    3.28  	jnz  sysret_careful 
    3.29          XEN_UNBLOCK_EVENTS(%rsi)                
    3.30  	RESTORE_ARGS 0,8,0
    3.31 -        SWITCH_TO_USER VGCF_IN_SYSCALL
    3.32 +        HYPERVISOR_IRET VGCF_IN_SYSCALL
    3.33  
    3.34  	/* Handle reschedules */
    3.35  	/* edx:	work, edi: workmask */	
    3.36 @@ -478,7 +478,7 @@ kernel_mode:
    3.37          orb   $3,1*8(%rsp)
    3.38  	iretq
    3.39  user_mode:
    3.40 -	SWITCH_TO_USER 0                        
    3.41 +	HYPERVISOR_IRET 0
    3.42  	
    3.43  	/* edi: workmask, edx: work */	
    3.44  retint_careful:
    3.45 @@ -719,6 +719,18 @@ 11:	movb $0, EVENT_MASK(%rsp)
    3.46  	call evtchn_do_upcall
    3.47          jmp  error_exit
    3.48  
    3.49 +#ifdef CONFIG_X86_LOCAL_APIC
    3.50 +ENTRY(nmi)
    3.51 +	zeroentry do_nmi_callback
    3.52 +ENTRY(do_nmi_callback)
    3.53 +        addq $8, %rsp
    3.54 +        call do_nmi
    3.55 +        RESTORE_REST
    3.56 +        XEN_BLOCK_EVENTS(%rsi)
    3.57 +        GET_THREAD_INFO(%rcx)
    3.58 +        jmp  retint_restore_args
    3.59 +#endif
    3.60 +
    3.61          ALIGN
    3.62  restore_all_enable_events:  
    3.63  	XEN_UNBLOCK_EVENTS(%rsi)        # %rsi is already set up...
    3.64 @@ -733,7 +745,7 @@ scrit:	/**** START OF CRITICAL REGION **
    3.65          orb   $3,1*8(%rsp)
    3.66          iretq
    3.67  crit_user_mode:
    3.68 -        SWITCH_TO_USER 0
    3.69 +        HYPERVISOR_IRET 0
    3.70          
    3.71  14:	XEN_LOCKED_BLOCK_EVENTS(%rsi)
    3.72  	XEN_PUT_VCPU_INFO(%rsi)
     4.1 --- a/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/setup.c	Thu Jan 12 13:13:34 2006 +0100
     4.2 +++ b/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/setup.c	Thu Jan 12 13:20:04 2006 +0100
     4.3 @@ -62,6 +62,7 @@
     4.4  #include <asm-xen/xen-public/physdev.h>
     4.5  #include "setup_arch_pre.h"
     4.6  #include <asm/hypervisor.h>
     4.7 +#include <asm-xen/xen-public/nmi.h>
     4.8  #define PFN_UP(x)       (((x) + PAGE_SIZE-1) >> PAGE_SHIFT)
     4.9  #define PFN_PHYS(x)     ((x) << PAGE_SHIFT)
    4.10  #define end_pfn_map end_pfn
     5.1 --- a/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/traps.c	Thu Jan 12 13:13:34 2006 +0100
     5.2 +++ b/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/traps.c	Thu Jan 12 13:20:04 2006 +0100
     5.3 @@ -559,9 +559,11 @@ static void mem_parity_error(unsigned ch
     5.4  	printk("Uhhuh. NMI received. Dazed and confused, but trying to continue\n");
     5.5  	printk("You probably have a hardware problem with your RAM chips\n");
     5.6  
     5.7 +#if 0 /* XEN */
     5.8  	/* Clear and disable the memory parity error line. */
     5.9  	reason = (reason & 0xf) | 4;
    5.10  	outb(reason, 0x61);
    5.11 +#endif /* XEN */
    5.12  }
    5.13  
    5.14  static void io_check_error(unsigned char reason, struct pt_regs * regs)
    5.15 @@ -569,12 +571,14 @@ static void io_check_error(unsigned char
    5.16  	printk("NMI: IOCK error (debug interrupt?)\n");
    5.17  	show_registers(regs);
    5.18  
    5.19 +#if 0 /* XEN */
    5.20  	/* Re-enable the IOCK line, wait for a few seconds */
    5.21  	reason = (reason & 0xf) | 8;
    5.22  	outb(reason, 0x61);
    5.23  	mdelay(2000);
    5.24  	reason &= ~8;
    5.25  	outb(reason, 0x61);
    5.26 +#endif /* XEN */
    5.27  }
    5.28  
    5.29  static void unknown_nmi_error(unsigned char reason, struct pt_regs * regs)
     6.1 --- a/linux-2.6-xen-sparse/include/asm-xen/asm-i386/hypercall.h	Thu Jan 12 13:13:34 2006 +0100
     6.2 +++ b/linux-2.6-xen-sparse/include/asm-xen/asm-i386/hypercall.h	Thu Jan 12 13:20:04 2006 +0100
     6.3 @@ -32,6 +32,7 @@
     6.4  
     6.5  #include <asm-xen/xen-public/xen.h>
     6.6  #include <asm-xen/xen-public/sched.h>
     6.7 +#include <asm-xen/xen-public/nmi.h>
     6.8  
     6.9  #define _hypercall0(type, name)			\
    6.10  ({						\
    6.11 @@ -300,6 +301,14 @@ HYPERVISOR_suspend(
    6.12  			   SHUTDOWN_suspend, srec);
    6.13  }
    6.14  
    6.15 +static inline int
    6.16 +HYPERVISOR_nmi_op(
    6.17 +	unsigned long op,
    6.18 +	unsigned long arg)
    6.19 +{
    6.20 +	return _hypercall2(int, nmi_op, op, arg);
    6.21 +}
    6.22 +
    6.23  #endif /* __HYPERCALL_H__ */
    6.24  
    6.25  /*
     7.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     7.2 +++ b/linux-2.6-xen-sparse/include/asm-xen/asm-i386/mach-xen/mach_traps.h	Thu Jan 12 13:20:04 2006 +0100
     7.3 @@ -0,0 +1,33 @@
     7.4 +/*
     7.5 + *  include/asm-xen/asm-i386/mach-xen/mach_traps.h
     7.6 + *
     7.7 + *  Machine specific NMI handling for Xen
     7.8 + */
     7.9 +#ifndef _MACH_TRAPS_H
    7.10 +#define _MACH_TRAPS_H
    7.11 +
    7.12 +#include <linux/bitops.h>
    7.13 +#include <asm-xen/xen-public/nmi.h>
    7.14 +
    7.15 +static inline void clear_mem_error(unsigned char reason) {}
    7.16 +static inline void clear_io_check_error(unsigned char reason) {}
    7.17 +
    7.18 +static inline unsigned char get_nmi_reason(void)
    7.19 +{
    7.20 +	shared_info_t *s = HYPERVISOR_shared_info;
    7.21 +	unsigned char reason = 0;
    7.22 +
    7.23 +	/* construct a value which looks like it came from
    7.24 +	 * port 0x61.
    7.25 +	 */
    7.26 +	if (test_bit(_XEN_NMIREASON_io_error, &s->arch.nmi_reason))
    7.27 +		reason |= 0x40;
    7.28 +	if (test_bit(_XEN_NMIREASON_parity_error, &s->arch.nmi_reason))
    7.29 +		reason |= 0x80;
    7.30 +
    7.31 +        return reason;
    7.32 +}
    7.33 +
    7.34 +static inline void reassert_nmi(void) {}
    7.35 +
    7.36 +#endif /* !_MACH_TRAPS_H */
     8.1 --- a/linux-2.6-xen-sparse/include/asm-xen/asm-i386/mach-xen/setup_arch_post.h	Thu Jan 12 13:13:34 2006 +0100
     8.2 +++ b/linux-2.6-xen-sparse/include/asm-xen/asm-i386/mach-xen/setup_arch_post.h	Thu Jan 12 13:20:04 2006 +0100
     8.3 @@ -29,6 +29,7 @@ void __init machine_specific_modify_cpu_
     8.4  
     8.5  extern void hypervisor_callback(void);
     8.6  extern void failsafe_callback(void);
     8.7 +extern void nmi(void);
     8.8  
     8.9  static void __init machine_specific_arch_setup(void)
    8.10  {
    8.11 @@ -36,5 +37,7 @@ static void __init machine_specific_arch
    8.12  	    __KERNEL_CS, (unsigned long)hypervisor_callback,
    8.13  	    __KERNEL_CS, (unsigned long)failsafe_callback);
    8.14  
    8.15 +	HYPERVISOR_nmi_op(XENNMI_register_callback, (unsigned long)&nmi);
    8.16 +
    8.17  	machine_specific_modify_cpu_capabilities(&boot_cpu_data);
    8.18  }
     9.1 --- a/linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/hypercall.h	Thu Jan 12 13:13:34 2006 +0100
     9.2 +++ b/linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/hypercall.h	Thu Jan 12 13:20:04 2006 +0100
     9.3 @@ -287,9 +287,9 @@ HYPERVISOR_vcpu_op(
     9.4  }
     9.5  
     9.6  static inline int
     9.7 -HYPERVISOR_switch_to_user(void)
     9.8 +HYPERVISOR_iret(void)
     9.9  {
    9.10 -	return _hypercall0(int, switch_to_user);
    9.11 +	return _hypercall0(int, iret);
    9.12  }
    9.13  
    9.14  static inline int
    9.15 @@ -307,6 +307,14 @@ HYPERVISOR_suspend(
    9.16  			   SHUTDOWN_suspend, srec);
    9.17  }
    9.18  
    9.19 +static inline int
    9.20 +HYPERVISOR_nmi_op(
    9.21 +	unsigned long op,
    9.22 +	unsigned long arg)
    9.23 +{
    9.24 +	return _hypercall2(int, nmi_op, op, arg);
    9.25 +}
    9.26 +
    9.27  #endif /* __HYPERCALL_H__ */
    9.28  
    9.29  /*
    10.1 --- a/linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/mach-xen/setup_arch_post.h	Thu Jan 12 13:13:34 2006 +0100
    10.2 +++ b/linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/mach-xen/setup_arch_post.h	Thu Jan 12 13:20:04 2006 +0100
    10.3 @@ -35,6 +35,7 @@ void __init machine_specific_modify_cpu_
    10.4  
    10.5  extern void hypervisor_callback(void);
    10.6  extern void failsafe_callback(void);
    10.7 +extern void nmi(void);
    10.8  
    10.9  static void __init machine_specific_arch_setup(void)
   10.10  {
   10.11 @@ -43,5 +44,9 @@ static void __init machine_specific_arch
   10.12                  (unsigned long) failsafe_callback,
   10.13                  (unsigned long) system_call);
   10.14  
   10.15 +#ifdef CONFIG_X86_LOCAL_APIC
   10.16 +	HYPERVISOR_nmi_op(XENNMI_register_callback, (unsigned long)&nmi);
   10.17 +#endif
   10.18 +
   10.19  	machine_specific_modify_cpu_capabilities(&boot_cpu_data);
   10.20  }
    11.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    11.2 +++ b/linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/nmi.h	Thu Jan 12 13:20:04 2006 +0100
    11.3 @@ -0,0 +1,75 @@
    11.4 +/*
    11.5 + *  linux/include/asm-i386/nmi.h
    11.6 + */
    11.7 +#ifndef ASM_NMI_H
    11.8 +#define ASM_NMI_H
    11.9 +
   11.10 +#include <linux/pm.h>
   11.11 +
   11.12 +#include <asm-xen/xen-public/nmi.h>
   11.13 +
   11.14 +struct pt_regs;
   11.15 + 
   11.16 +typedef int (*nmi_callback_t)(struct pt_regs * regs, int cpu);
   11.17 + 
   11.18 +/** 
   11.19 + * set_nmi_callback
   11.20 + *
   11.21 + * Set a handler for an NMI. Only one handler may be
   11.22 + * set. Return 1 if the NMI was handled.
   11.23 + */
   11.24 +void set_nmi_callback(nmi_callback_t callback);
   11.25 + 
   11.26 +/** 
   11.27 + * unset_nmi_callback
   11.28 + *
   11.29 + * Remove the handler previously set.
   11.30 + */
   11.31 +void unset_nmi_callback(void);
   11.32 + 
   11.33 +#ifdef CONFIG_PM
   11.34 + 
   11.35 +/** Replace the PM callback routine for NMI. */
   11.36 +struct pm_dev * set_nmi_pm_callback(pm_callback callback);
   11.37 +
   11.38 +/** Unset the PM callback routine back to the default. */
   11.39 +void unset_nmi_pm_callback(struct pm_dev * dev);
   11.40 +
   11.41 +#else
   11.42 +
   11.43 +static inline struct pm_dev * set_nmi_pm_callback(pm_callback callback)
   11.44 +{
   11.45 +	return 0;
   11.46 +} 
   11.47 + 
   11.48 +static inline void unset_nmi_pm_callback(struct pm_dev * dev)
   11.49 +{
   11.50 +}
   11.51 +
   11.52 +#endif /* CONFIG_PM */
   11.53 + 
   11.54 +extern void default_do_nmi(struct pt_regs *);
   11.55 +extern void die_nmi(char *str, struct pt_regs *regs);
   11.56 +
   11.57 +static inline unsigned char get_nmi_reason(void)
   11.58 +{
   11.59 +        shared_info_t *s = HYPERVISOR_shared_info;
   11.60 +        unsigned char reason = 0;
   11.61 +
   11.62 +        /* construct a value which looks like it came from
   11.63 +         * port 0x61.
   11.64 +         */
   11.65 +        if (test_bit(_XEN_NMIREASON_io_error, &s->arch.nmi_reason))
   11.66 +                reason |= 0x40;
   11.67 +        if (test_bit(_XEN_NMIREASON_parity_error, &s->arch.nmi_reason))
   11.68 +                reason |= 0x80;
   11.69 +
   11.70 +        return reason;
   11.71 +}
   11.72 +
   11.73 +extern int panic_on_timeout;
   11.74 +extern int unknown_nmi_panic;
   11.75 +
   11.76 +extern int check_nmi_watchdog(void);
   11.77 + 
   11.78 +#endif /* ASM_NMI_H */
    12.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    12.2 +++ b/patches/linux-2.6.12/i386-mach-io-check-nmi.patch	Thu Jan 12 13:20:04 2006 +0100
    12.3 @@ -0,0 +1,43 @@
    12.4 +--- ref-linux-2.6.12/arch/i386/kernel/traps.c	2005-12-19 09:23:44.000000000 +0000
    12.5 ++++ linux-2.6.12-xen0/arch/i386/kernel/traps.c	2006-01-05 15:51:52.000000000 +0000
    12.6 +@@ -521,18 +521,11 @@
    12.7 + 
    12.8 + static void io_check_error(unsigned char reason, struct pt_regs * regs)
    12.9 + {
   12.10 +-	unsigned long i;
   12.11 +-
   12.12 + 	printk("NMI: IOCK error (debug interrupt?)\n");
   12.13 + 	show_registers(regs);
   12.14 + 
   12.15 + 	/* Re-enable the IOCK line, wait for a few seconds */
   12.16 +-	reason = (reason & 0xf) | 8;
   12.17 +-	outb(reason, 0x61);
   12.18 +-	i = 2000;
   12.19 +-	while (--i) udelay(1000);
   12.20 +-	reason &= ~8;
   12.21 +-	outb(reason, 0x61);
   12.22 ++	clear_io_check_error(reason);
   12.23 + }
   12.24 + 
   12.25 + static void unknown_nmi_error(unsigned char reason, struct pt_regs * regs)
   12.26 +--- ref-linux-2.6.12/include/asm-i386/mach-default/mach_traps.h	2005-06-17 20:48:29.000000000 +0100
   12.27 ++++ linux-2.6.12-xen0/include/asm-i386/mach-default/mach_traps.h	2006-01-05 15:52:33.000000000 +0000
   12.28 +@@ -15,6 +15,18 @@
   12.29 + 	outb(reason, 0x61);
   12.30 + }
   12.31 + 
   12.32 ++static inline void clear_io_check_error(unsigned char reason)
   12.33 ++{
   12.34 ++	unsigned long i;
   12.35 ++
   12.36 ++	reason = (reason & 0xf) | 8;
   12.37 ++	outb(reason, 0x61);
   12.38 ++	i = 2000;
   12.39 ++	while (--i) udelay(1000);
   12.40 ++	reason &= ~8;
   12.41 ++	outb(reason, 0x61);
   12.42 ++}
   12.43 ++
   12.44 + static inline unsigned char get_nmi_reason(void)
   12.45 + {
   12.46 + 	return inb(0x61);
    13.1 --- a/xen/arch/x86/domain.c	Thu Jan 12 13:13:34 2006 +0100
    13.2 +++ b/xen/arch/x86/domain.c	Thu Jan 12 13:20:04 2006 +0100
    13.3 @@ -288,9 +288,7 @@ int arch_do_createdomain(struct vcpu *v)
    13.4  
    13.5  #if defined(__i386__)
    13.6  
    13.7 -    d->arch.mapcache.l1tab = d->arch.mm_perdomain_pt +
    13.8 -        (GDT_LDT_MBYTES << (20 - PAGE_SHIFT));
    13.9 -    spin_lock_init(&d->arch.mapcache.lock);
   13.10 +    mapcache_init(d);
   13.11  
   13.12  #else /* __x86_64__ */
   13.13  
   13.14 @@ -482,14 +480,6 @@ void new_thread(struct vcpu *d,
   13.15  
   13.16  #ifdef __x86_64__
   13.17  
   13.18 -void toggle_guest_mode(struct vcpu *v)
   13.19 -{
   13.20 -    v->arch.flags ^= TF_kernel_mode;
   13.21 -    __asm__ __volatile__ ( "swapgs" );
   13.22 -    update_pagetables(v);
   13.23 -    write_ptbase(v);
   13.24 -}
   13.25 -
   13.26  #define loadsegment(seg,value) ({               \
   13.27      int __r = 1;                                \
   13.28      __asm__ __volatile__ (                      \
   13.29 @@ -659,35 +649,6 @@ static void save_segments(struct vcpu *v
   13.30      percpu_ctxt[smp_processor_id()].dirty_segment_mask = dirty_segment_mask;
   13.31  }
   13.32  
   13.33 -long do_switch_to_user(void)
   13.34 -{
   13.35 -    struct cpu_user_regs  *regs = guest_cpu_user_regs();
   13.36 -    struct switch_to_user  stu;
   13.37 -    struct vcpu    *v = current;
   13.38 -
   13.39 -    if ( unlikely(copy_from_user(&stu, (void *)regs->rsp, sizeof(stu))) ||
   13.40 -         unlikely(pagetable_get_paddr(v->arch.guest_table_user) == 0) )
   13.41 -        return -EFAULT;
   13.42 -
   13.43 -    toggle_guest_mode(v);
   13.44 -
   13.45 -    regs->rip    = stu.rip;
   13.46 -    regs->cs     = stu.cs | 3; /* force guest privilege */
   13.47 -    regs->rflags = (stu.rflags & ~(EF_IOPL|EF_VM)) | EF_IE;
   13.48 -    regs->rsp    = stu.rsp;
   13.49 -    regs->ss     = stu.ss | 3; /* force guest privilege */
   13.50 -
   13.51 -    if ( !(stu.flags & VGCF_IN_SYSCALL) )
   13.52 -    {
   13.53 -        regs->entry_vector = 0;
   13.54 -        regs->r11 = stu.r11;
   13.55 -        regs->rcx = stu.rcx;
   13.56 -    }
   13.57 -
   13.58 -    /* Saved %rax gets written back to regs->rax in entry.S. */
   13.59 -    return stu.rax;
   13.60 -}
   13.61 -
   13.62  #define switch_kernel_stack(_n,_c) ((void)0)
   13.63  
   13.64  #elif defined(__i386__)
    14.1 --- a/xen/arch/x86/mm.c	Thu Jan 12 13:13:34 2006 +0100
    14.2 +++ b/xen/arch/x86/mm.c	Thu Jan 12 13:20:04 2006 +0100
    14.3 @@ -297,7 +297,6 @@ int map_ldt_shadow_page(unsigned int off
    14.4  
    14.5  #if defined(__x86_64__)
    14.6      /* If in user mode, switch to kernel mode just to read LDT mapping. */
    14.7 -    extern void toggle_guest_mode(struct vcpu *);
    14.8      int user_mode = !(v->arch.flags & TF_kernel_mode);
    14.9  #define TOGGLE_MODE() if ( user_mode ) toggle_guest_mode(v)
   14.10  #elif defined(__i386__)
   14.11 @@ -2971,7 +2970,6 @@ void ptwr_flush(struct domain *d, const 
   14.12  
   14.13  #ifdef CONFIG_X86_64
   14.14      struct vcpu *v = current;
   14.15 -    extern void toggle_guest_mode(struct vcpu *);
   14.16      int user_mode = !(v->arch.flags & TF_kernel_mode);
   14.17  #endif
   14.18  
    15.1 --- a/xen/arch/x86/traps.c	Thu Jan 12 13:13:34 2006 +0100
    15.2 +++ b/xen/arch/x86/traps.c	Thu Jan 12 13:20:04 2006 +0100
    15.3 @@ -596,7 +596,6 @@ static inline int guest_io_okay(
    15.4      u16 x;
    15.5  #if defined(__x86_64__)
    15.6      /* If in user mode, switch to kernel mode just to read I/O bitmap. */
    15.7 -    extern void toggle_guest_mode(struct vcpu *);
    15.8      int user_mode = !(v->arch.flags & TF_kernel_mode);
    15.9  #define TOGGLE_MODE() if ( user_mode ) toggle_guest_mode(v)
   15.10  #elif defined(__i386__)
   15.11 @@ -1080,26 +1079,23 @@ asmlinkage int do_general_protection(str
   15.12      return 0;
   15.13  }
   15.14  
   15.15 +static void nmi_softirq(void)
   15.16 +{
   15.17 +    /* Only used to defer wakeup of dom0,vcpu0 to a safe (non-NMI) context. */
   15.18 +    evtchn_notify(dom0->vcpu[0]);
   15.19 +}
   15.20  
   15.21 -/* Defer dom0 notification to softirq context (unsafe in NMI context). */
   15.22 -static unsigned long nmi_dom0_softirq_reason;
   15.23 -#define NMI_DOM0_PARITY_ERR 0
   15.24 -#define NMI_DOM0_IO_ERR     1
   15.25 -#define NMI_DOM0_UNKNOWN    2
   15.26 +static void nmi_dom0_report(unsigned int reason_idx)
   15.27 +{
   15.28 +    struct domain *d;
   15.29  
   15.30 -static void nmi_dom0_softirq(void)
   15.31 -{
   15.32 -    if ( dom0 == NULL )
   15.33 +    if ( (d = dom0) == NULL )
   15.34          return;
   15.35  
   15.36 -    if ( test_and_clear_bit(NMI_DOM0_PARITY_ERR, &nmi_dom0_softirq_reason) )
   15.37 -        send_guest_virq(dom0->vcpu[0], VIRQ_PARITY_ERR);
   15.38 +    set_bit(reason_idx, &d->shared_info->arch.nmi_reason);
   15.39  
   15.40 -    if ( test_and_clear_bit(NMI_DOM0_IO_ERR, &nmi_dom0_softirq_reason) )
   15.41 -        send_guest_virq(dom0->vcpu[0], VIRQ_IO_ERR);
   15.42 -
   15.43 -    if ( test_and_clear_bit(NMI_DOM0_UNKNOWN, &nmi_dom0_softirq_reason) )
   15.44 -        send_guest_virq(dom0->vcpu[0], VIRQ_NMI);
   15.45 +    if ( test_and_set_bit(_VCPUF_nmi_pending, &d->vcpu[0]->vcpu_flags) )
   15.46 +        raise_softirq(NMI_SOFTIRQ); /* not safe to wake up a vcpu here */
   15.47  }
   15.48  
   15.49  asmlinkage void mem_parity_error(struct cpu_user_regs *regs)
   15.50 @@ -1107,8 +1103,7 @@ asmlinkage void mem_parity_error(struct 
   15.51      switch ( opt_nmi[0] )
   15.52      {
   15.53      case 'd': /* 'dom0' */
   15.54 -        set_bit(NMI_DOM0_PARITY_ERR, &nmi_dom0_softirq_reason);
   15.55 -        raise_softirq(NMI_DOM0_SOFTIRQ);
   15.56 +        nmi_dom0_report(_XEN_NMIREASON_parity_error);
   15.57      case 'i': /* 'ignore' */
   15.58          break;
   15.59      default:  /* 'fatal' */
   15.60 @@ -1127,8 +1122,7 @@ asmlinkage void io_check_error(struct cp
   15.61      switch ( opt_nmi[0] )
   15.62      {
   15.63      case 'd': /* 'dom0' */
   15.64 -        set_bit(NMI_DOM0_IO_ERR, &nmi_dom0_softirq_reason);
   15.65 -        raise_softirq(NMI_DOM0_SOFTIRQ);
   15.66 +        nmi_dom0_report(_XEN_NMIREASON_io_error);
   15.67      case 'i': /* 'ignore' */
   15.68          break;
   15.69      default:  /* 'fatal' */
   15.70 @@ -1147,8 +1141,7 @@ static void unknown_nmi_error(unsigned c
   15.71      switch ( opt_nmi[0] )
   15.72      {
   15.73      case 'd': /* 'dom0' */
   15.74 -        set_bit(NMI_DOM0_UNKNOWN, &nmi_dom0_softirq_reason);
   15.75 -        raise_softirq(NMI_DOM0_SOFTIRQ);
   15.76 +        nmi_dom0_report(_XEN_NMIREASON_unknown);
   15.77      case 'i': /* 'ignore' */
   15.78          break;
   15.79      default:  /* 'fatal' */
   15.80 @@ -1347,7 +1340,7 @@ void __init trap_init(void)
   15.81  
   15.82      cpu_init();
   15.83  
   15.84 -    open_softirq(NMI_DOM0_SOFTIRQ, nmi_dom0_softirq);
   15.85 +    open_softirq(NMI_SOFTIRQ, nmi_softirq);
   15.86  }
   15.87  
   15.88  
    16.1 --- a/xen/arch/x86/x86_32/asm-offsets.c	Thu Jan 12 13:13:34 2006 +0100
    16.2 +++ b/xen/arch/x86/x86_32/asm-offsets.c	Thu Jan 12 13:20:04 2006 +0100
    16.3 @@ -65,6 +65,10 @@ void __dummy__(void)
    16.4             arch.guest_context.kernel_ss);
    16.5      OFFSET(VCPU_kernel_sp, struct vcpu,
    16.6             arch.guest_context.kernel_sp);
    16.7 +    OFFSET(VCPU_flags, struct vcpu, vcpu_flags);
    16.8 +    OFFSET(VCPU_nmi_addr, struct vcpu, nmi_addr);
    16.9 +    DEFINE(_VCPUF_nmi_pending, _VCPUF_nmi_pending);
   16.10 +    DEFINE(_VCPUF_nmi_masked, _VCPUF_nmi_masked);
   16.11      BLANK();
   16.12  
   16.13      OFFSET(VCPUINFO_upcall_pending, vcpu_info_t, evtchn_upcall_pending);
    17.1 --- a/xen/arch/x86/x86_32/domain_page.c	Thu Jan 12 13:13:34 2006 +0100
    17.2 +++ b/xen/arch/x86/x86_32/domain_page.c	Thu Jan 12 13:20:04 2006 +0100
    17.3 @@ -20,33 +20,16 @@
    17.4  #include <asm/flushtlb.h>
    17.5  #include <asm/hardirq.h>
    17.6  
    17.7 -#define MAPCACHE_ORDER    10
    17.8 -#define MAPCACHE_ENTRIES  (1 << MAPCACHE_ORDER)
    17.9 -
   17.10 -/* Use a spare PTE bit to mark entries ready for recycling. */
   17.11 -#define READY_FOR_TLB_FLUSH (1<<10)
   17.12 -
   17.13 -static void flush_all_ready_maps(void)
   17.14 -{
   17.15 -    struct mapcache *cache = &current->domain->arch.mapcache;
   17.16 -    unsigned int i;
   17.17 -
   17.18 -    for ( i = 0; i < MAPCACHE_ENTRIES; i++ )
   17.19 -        if ( (l1e_get_flags(cache->l1tab[i]) & READY_FOR_TLB_FLUSH) )
   17.20 -            cache->l1tab[i] = l1e_empty();
   17.21 -}
   17.22 -
   17.23 -void *map_domain_pages(unsigned long pfn, unsigned int order)
   17.24 +void *map_domain_page(unsigned long pfn)
   17.25  {
   17.26      unsigned long va;
   17.27 -    unsigned int idx, i, flags, vcpu = current->vcpu_id;
   17.28 +    unsigned int idx, i, vcpu = current->vcpu_id;
   17.29      struct domain *d;
   17.30      struct mapcache *cache;
   17.31 -#ifndef NDEBUG
   17.32 -    unsigned int flush_count = 0;
   17.33 -#endif
   17.34 +    struct vcpu_maphash_entry *hashent;
   17.35  
   17.36      ASSERT(!in_irq());
   17.37 +
   17.38      perfc_incrc(map_domain_page_count);
   17.39  
   17.40      /* If we are the idle domain, ensure that we run on our own page tables. */
   17.41 @@ -56,6 +39,18 @@ void *map_domain_pages(unsigned long pfn
   17.42  
   17.43      cache = &d->arch.mapcache;
   17.44  
   17.45 +    hashent = &cache->vcpu_maphash[vcpu].hash[MAPHASH_HASHFN(pfn)];
   17.46 +#if 0
   17.47 +    if ( hashent->pfn == pfn )
   17.48 +    {
   17.49 +        idx = hashent->idx;
   17.50 +        hashent->refcnt++;
   17.51 +        ASSERT(hashent->refcnt != 0);
   17.52 +        ASSERT(l1e_get_pfn(cache->l1tab[idx]) == pfn);
   17.53 +        goto out;
   17.54 +    }
   17.55 +#endif
   17.56 +
   17.57      spin_lock(&cache->lock);
   17.58  
   17.59      /* Has some other CPU caused a wrap? We must flush if so. */
   17.60 @@ -70,45 +65,97 @@ void *map_domain_pages(unsigned long pfn
   17.61          }
   17.62      }
   17.63  
   17.64 -    do {
   17.65 -        idx = cache->cursor = (cache->cursor + 1) & (MAPCACHE_ENTRIES - 1);
   17.66 -        if ( unlikely(idx == 0) )
   17.67 +    idx = find_next_zero_bit(cache->inuse, MAPCACHE_ENTRIES, cache->cursor);
   17.68 +    if ( unlikely(idx >= MAPCACHE_ENTRIES) )
   17.69 +    {
   17.70 +        /* /First/, clean the garbage map and update the inuse list. */
   17.71 +        for ( i = 0; i < ARRAY_SIZE(cache->garbage); i++ )
   17.72          {
   17.73 -            ASSERT(flush_count++ == 0);
   17.74 -            flush_all_ready_maps();
   17.75 -            perfc_incrc(domain_page_tlb_flush);
   17.76 -            local_flush_tlb();
   17.77 -            cache->shadow_epoch[vcpu] = ++cache->epoch;
   17.78 -            cache->tlbflush_timestamp = tlbflush_current_time();
   17.79 +            unsigned long x = xchg(&cache->garbage[i], 0);
   17.80 +            cache->inuse[i] &= ~x;
   17.81          }
   17.82  
   17.83 -        flags = 0;
   17.84 -        for ( i = 0; i < (1U << order); i++ )
   17.85 -            flags |= l1e_get_flags(cache->l1tab[idx+i]);
   17.86 +        /* /Second/, flush TLBs. */
   17.87 +        perfc_incrc(domain_page_tlb_flush);
   17.88 +        local_flush_tlb();
   17.89 +        cache->shadow_epoch[vcpu] = ++cache->epoch;
   17.90 +        cache->tlbflush_timestamp = tlbflush_current_time();
   17.91 +
   17.92 +        idx = find_first_zero_bit(cache->inuse, MAPCACHE_ENTRIES);
   17.93 +        ASSERT(idx < MAPCACHE_ENTRIES);
   17.94      }
   17.95 -    while ( flags & _PAGE_PRESENT );
   17.96  
   17.97 -    for ( i = 0; i < (1U << order); i++ )
   17.98 -        cache->l1tab[idx+i] = l1e_from_pfn(pfn+i, __PAGE_HYPERVISOR);
   17.99 +    set_bit(idx, cache->inuse);
  17.100 +    cache->cursor = idx + 1;
  17.101  
  17.102      spin_unlock(&cache->lock);
  17.103  
  17.104 +    cache->l1tab[idx] = l1e_from_pfn(pfn, __PAGE_HYPERVISOR);
  17.105 +
  17.106 +/*out:*/
  17.107      va = MAPCACHE_VIRT_START + (idx << PAGE_SHIFT);
  17.108      return (void *)va;
  17.109  }
  17.110  
  17.111 -void unmap_domain_pages(void *va, unsigned int order)
  17.112 +void unmap_domain_page(void *va)
  17.113  {
  17.114 -    unsigned int idx, i;
  17.115 +    unsigned int idx;
  17.116      struct mapcache *cache = &current->domain->arch.mapcache;
  17.117 +    unsigned long pfn;
  17.118 +    struct vcpu_maphash_entry *hashent;
  17.119 +
  17.120 +    ASSERT(!in_irq());
  17.121  
  17.122      ASSERT((void *)MAPCACHE_VIRT_START <= va);
  17.123      ASSERT(va < (void *)MAPCACHE_VIRT_END);
  17.124  
  17.125      idx = ((unsigned long)va - MAPCACHE_VIRT_START) >> PAGE_SHIFT;
  17.126 +    pfn = l1e_get_pfn(cache->l1tab[idx]);
  17.127 +    hashent = &cache->vcpu_maphash[current->vcpu_id].hash[MAPHASH_HASHFN(pfn)];
  17.128  
  17.129 -    for ( i = 0; i < (1U << order); i++ )
  17.130 -        l1e_add_flags(cache->l1tab[idx+i], READY_FOR_TLB_FLUSH);
  17.131 +    if ( hashent->idx == idx )
  17.132 +    {
  17.133 +        ASSERT(hashent->pfn == pfn);
  17.134 +        ASSERT(hashent->refcnt != 0);
  17.135 +        hashent->refcnt--;
  17.136 +    }
  17.137 +    else if ( hashent->refcnt == 0 )
  17.138 +    {
  17.139 +        if ( hashent->idx != MAPHASHENT_NOTINUSE )
  17.140 +        {
  17.141 +            /* /First/, zap the PTE. */
  17.142 +            ASSERT(l1e_get_pfn(cache->l1tab[hashent->idx]) == hashent->pfn);
  17.143 +            cache->l1tab[hashent->idx] = l1e_empty();
  17.144 +            /* /Second/, mark as garbage. */
  17.145 +            set_bit(hashent->idx, cache->garbage);
  17.146 +        }
  17.147 +
  17.148 +        /* Add newly-freed mapping to the maphash. */
  17.149 +        hashent->pfn = pfn;
  17.150 +        hashent->idx = idx;
  17.151 +    }
  17.152 +    else
  17.153 +    {
  17.154 +        /* /First/, zap the PTE. */
  17.155 +        cache->l1tab[idx] = l1e_empty();
  17.156 +        /* /Second/, mark as garbage. */
  17.157 +        set_bit(idx, cache->garbage);
  17.158 +    }
  17.159 +}
  17.160 +
  17.161 +void mapcache_init(struct domain *d)
  17.162 +{
  17.163 +    unsigned int i, j;
  17.164 +
  17.165 +    d->arch.mapcache.l1tab = d->arch.mm_perdomain_pt +
  17.166 +        (GDT_LDT_MBYTES << (20 - PAGE_SHIFT));
  17.167 +    spin_lock_init(&d->arch.mapcache.lock);
  17.168 +
  17.169 +    /* Mark all maphash entries as not in use. */
  17.170 +    for ( i = 0; i < MAX_VIRT_CPUS; i++ )
  17.171 +        for ( j = 0; j < MAPHASH_ENTRIES; j++ )
  17.172 +            d->arch.mapcache.vcpu_maphash[i].hash[j].idx =
  17.173 +                MAPHASHENT_NOTINUSE;
  17.174  }
  17.175  
  17.176  #define GLOBALMAP_BITS (IOREMAP_MBYTES << (20 - PAGE_SHIFT))
  17.177 @@ -128,15 +175,10 @@ void *map_domain_page_global(unsigned lo
  17.178  
  17.179      spin_lock(&globalmap_lock);
  17.180  
  17.181 -    for ( ; ; )
  17.182 +    idx = find_next_zero_bit(inuse, GLOBALMAP_BITS, inuse_cursor);
  17.183 +    va = IOREMAP_VIRT_START + (idx << PAGE_SHIFT);
  17.184 +    if ( unlikely(va >= FIXADDR_START) )
  17.185      {
  17.186 -        idx = find_next_zero_bit(inuse, GLOBALMAP_BITS, inuse_cursor);
  17.187 -        va = IOREMAP_VIRT_START + (idx << PAGE_SHIFT);
  17.188 -
  17.189 -        /* End of round? If not then we're done in this loop. */
  17.190 -        if ( va < FIXADDR_START )
  17.191 -            break;
  17.192 -
  17.193          /* /First/, clean the garbage map and update the inuse list. */
  17.194          for ( i = 0; i < ARRAY_SIZE(garbage); i++ )
  17.195          {
  17.196 @@ -147,7 +189,9 @@ void *map_domain_page_global(unsigned lo
  17.197          /* /Second/, flush all TLBs to get rid of stale garbage mappings. */
  17.198          flush_tlb_all();
  17.199  
  17.200 -        inuse_cursor = 0;
  17.201 +        idx = find_first_zero_bit(inuse, GLOBALMAP_BITS);
  17.202 +        va = IOREMAP_VIRT_START + (idx << PAGE_SHIFT);
  17.203 +        ASSERT(va < FIXADDR_START);
  17.204      }
  17.205  
  17.206      set_bit(idx, inuse);
    18.1 --- a/xen/arch/x86/x86_32/entry.S	Thu Jan 12 13:13:34 2006 +0100
    18.2 +++ b/xen/arch/x86/x86_32/entry.S	Thu Jan 12 13:20:04 2006 +0100
    18.3 @@ -326,7 +326,9 @@ test_all_events:
    18.4          shl  $IRQSTAT_shift,%eax
    18.5          test %ecx,irq_stat(%eax,1)
    18.6          jnz  process_softirqs
    18.7 -/*test_guest_events:*/
    18.8 +        btr  $_VCPUF_nmi_pending,VCPU_flags(%ebx)
    18.9 +        jc   process_nmi
   18.10 +test_guest_events:
   18.11          movl VCPU_vcpu_info(%ebx),%eax
   18.12          testb $0xFF,VCPUINFO_upcall_mask(%eax)
   18.13          jnz  restore_all_guest
   18.14 @@ -348,7 +350,24 @@ process_softirqs:
   18.15          sti       
   18.16          call do_softirq
   18.17          jmp  test_all_events
   18.18 -                
   18.19 +	
   18.20 +	ALIGN
   18.21 +process_nmi:
   18.22 +        movl VCPU_nmi_addr(%ebx),%eax
   18.23 +        test %eax,%eax
   18.24 +        jz   test_all_events
   18.25 +        bts  $_VCPUF_nmi_masked,VCPU_flags(%ebx)
   18.26 +        jc   1f
   18.27 +        sti
   18.28 +        leal VCPU_trap_bounce(%ebx),%edx
   18.29 +        movl %eax,TRAPBOUNCE_eip(%edx)
   18.30 +        movw $FLAT_KERNEL_CS,TRAPBOUNCE_cs(%edx)
   18.31 +        movw $TBF_INTERRUPT,TRAPBOUNCE_flags(%edx)
   18.32 +        call create_bounce_frame
   18.33 +        jmp  test_all_events
   18.34 +1:      bts  $_VCPUF_nmi_pending,VCPU_flags(%ebx)
   18.35 +        jmp  test_guest_events
   18.36 +
   18.37  /* CREATE A BASIC EXCEPTION FRAME ON GUEST OS (RING-1) STACK:            */
   18.38  /*   {EIP, CS, EFLAGS, [ESP, SS]}                                        */
   18.39  /* %edx == trap_bounce, %ebx == struct vcpu                       */
   18.40 @@ -620,9 +639,7 @@ ENTRY(nmi)
   18.41          jne   defer_nmi
   18.42  
   18.43  continue_nmi:
   18.44 -        movl  $(__HYPERVISOR_DS),%edx
   18.45 -        movl  %edx,%ds
   18.46 -        movl  %edx,%es
   18.47 +        SET_XEN_SEGMENTS(d)
   18.48          movl  %esp,%edx
   18.49          pushl %edx
   18.50          call  do_nmi
   18.51 @@ -660,42 +677,6 @@ do_arch_sched_op:
   18.52          movl %eax,UREGS_eax(%ecx)
   18.53          jmp  do_sched_op
   18.54  
   18.55 -do_switch_vm86:
   18.56 -        # Reset the stack pointer
   18.57 -        GET_GUEST_REGS(%ecx)
   18.58 -        movl %ecx,%esp
   18.59 -
   18.60 -        # GS:ESI == Ring-1 stack activation
   18.61 -        movl UREGS_esp(%esp),%esi
   18.62 -VFLT1:  mov  UREGS_ss(%esp),%gs
   18.63 -
   18.64 -        # ES:EDI == Ring-0 stack activation
   18.65 -        leal UREGS_eip(%esp),%edi
   18.66 -
   18.67 -        # Restore the hypercall-number-clobbered EAX on our stack frame
   18.68 -VFLT2:  movl %gs:(%esi),%eax
   18.69 -        movl %eax,UREGS_eax(%esp)
   18.70 -        addl $4,%esi
   18.71 -        	
   18.72 -      	# Copy the VM86 activation from the ring-1 stack to the ring-0 stack
   18.73 -        movl $(UREGS_user_sizeof-UREGS_eip)/4,%ecx
   18.74 -VFLT3:  movl %gs:(%esi),%eax
   18.75 -        stosl
   18.76 -        addl $4,%esi
   18.77 -        loop VFLT3
   18.78 -
   18.79 -        # Fix up EFLAGS: IOPL=0, IF=1, VM=1
   18.80 -        andl $~X86_EFLAGS_IOPL,UREGS_eflags(%esp)
   18.81 -        orl  $X86_EFLAGS_IF|X86_EFLAGS_VM,UREGS_eflags(%esp)
   18.82 -        
   18.83 -        jmp test_all_events
   18.84 -
   18.85 -.section __ex_table,"a"
   18.86 -        .long VFLT1,domain_crash_synchronous
   18.87 -        .long VFLT2,domain_crash_synchronous
   18.88 -        .long VFLT3,domain_crash_synchronous
   18.89 -.previous
   18.90 -
   18.91  .data
   18.92  
   18.93  ENTRY(exception_table)
   18.94 @@ -744,11 +725,12 @@ ENTRY(hypercall_table)
   18.95          .long do_grant_table_op     /* 20 */
   18.96          .long do_vm_assist
   18.97          .long do_update_va_mapping_otherdomain
   18.98 -        .long do_switch_vm86
   18.99 +        .long do_iret
  18.100          .long do_vcpu_op
  18.101          .long do_ni_hypercall       /* 25 */
  18.102          .long do_mmuext_op
  18.103 -        .long do_acm_op             /* 27 */
  18.104 +        .long do_acm_op
  18.105 +        .long do_nmi_op
  18.106          .rept NR_hypercalls-((.-hypercall_table)/4)
  18.107          .long do_ni_hypercall
  18.108          .endr
  18.109 @@ -777,11 +759,12 @@ ENTRY(hypercall_args_table)
  18.110          .byte 3 /* do_grant_table_op    */  /* 20 */
  18.111          .byte 2 /* do_vm_assist         */
  18.112          .byte 5 /* do_update_va_mapping_otherdomain */
  18.113 -        .byte 0 /* do_switch_vm86       */
  18.114 +        .byte 0 /* do_iret              */
  18.115          .byte 3 /* do_vcpu_op           */
  18.116          .byte 0 /* do_ni_hypercall      */  /* 25 */
  18.117          .byte 4 /* do_mmuext_op         */
  18.118          .byte 1 /* do_acm_op            */
  18.119 +        .byte 2 /* do_nmi_op            */
  18.120          .rept NR_hypercalls-(.-hypercall_args_table)
  18.121          .byte 0 /* do_ni_hypercall      */
  18.122          .endr
    19.1 --- a/xen/arch/x86/x86_32/traps.c	Thu Jan 12 13:13:34 2006 +0100
    19.2 +++ b/xen/arch/x86/x86_32/traps.c	Thu Jan 12 13:20:04 2006 +0100
    19.3 @@ -157,6 +157,64 @@ asmlinkage void do_double_fault(void)
    19.4          __asm__ __volatile__ ( "hlt" );
    19.5  }
    19.6  
    19.7 +static inline void pop_from_guest_stack(
    19.8 +    void *dst, struct cpu_user_regs *regs, unsigned int bytes)
    19.9 +{
   19.10 +    if ( unlikely(__copy_from_user(dst, (void __user *)regs->esp, bytes)) )
   19.11 +        domain_crash_synchronous();
   19.12 +    regs->esp += bytes;
   19.13 +}
   19.14 +
   19.15 +asmlinkage unsigned long do_iret(void)
   19.16 +{
   19.17 +    struct cpu_user_regs *regs = guest_cpu_user_regs();
   19.18 +    u32 eflags;
   19.19 +
   19.20 +    /* Check worst-case stack frame for overlap with Xen protected area. */
   19.21 +    if ( unlikely(!access_ok(regs->esp, 40)) )
   19.22 +        domain_crash_synchronous();
   19.23 +
   19.24 +    /* Pop and restore EAX (clobbered by hypercall). */
   19.25 +    pop_from_guest_stack(&regs->eax, regs, 4);
   19.26 +
   19.27 +    /* Pop and restore CS and EIP. */
   19.28 +    pop_from_guest_stack(&regs->eip, regs, 8);
   19.29 +
   19.30 +    /*
   19.31 +     * Pop, fix up and restore EFLAGS. We fix up in a local staging area
   19.32 +     * to avoid firing the BUG_ON(IOPL) check in arch_getdomaininfo_ctxt.
   19.33 +     */
   19.34 +    pop_from_guest_stack(&eflags, regs, 4);
   19.35 +    regs->eflags = (eflags & ~X86_EFLAGS_IOPL) | X86_EFLAGS_IF;
   19.36 +
   19.37 +    if ( VM86_MODE(regs) )
   19.38 +    {
   19.39 +        /* Return to VM86 mode: pop and restore ESP,SS,ES,DS,FS and GS. */
   19.40 +        pop_from_guest_stack(&regs->esp, regs, 24);
   19.41 +    }
   19.42 +    else if ( unlikely(RING_0(regs)) )
   19.43 +    {
   19.44 +        domain_crash_synchronous();
   19.45 +    }
   19.46 +    else if ( !RING_1(regs) )
   19.47 +    {
   19.48 +        /* Return to ring 2/3: pop and restore ESP and SS. */
   19.49 +        pop_from_guest_stack(&regs->esp, regs, 8);
   19.50 +    }
   19.51 +
   19.52 +    /* No longer in NMI context. */
   19.53 +    clear_bit(_VCPUF_nmi_masked, &current->vcpu_flags);
   19.54 +
   19.55 +    /* Restore upcall mask from saved value. */
   19.56 +    current->vcpu_info->evtchn_upcall_mask = regs->saved_upcall_mask;
   19.57 +
   19.58 +    /*
   19.59 +     * The hypercall exit path will overwrite EAX with this return
   19.60 +     * value.
   19.61 +     */
   19.62 +    return regs->eax;
   19.63 +}
   19.64 +
   19.65  BUILD_SMP_INTERRUPT(deferred_nmi, TRAP_deferred_nmi)
   19.66  asmlinkage void smp_deferred_nmi(struct cpu_user_regs regs)
   19.67  {
    20.1 --- a/xen/arch/x86/x86_64/asm-offsets.c	Thu Jan 12 13:13:34 2006 +0100
    20.2 +++ b/xen/arch/x86/x86_64/asm-offsets.c	Thu Jan 12 13:20:04 2006 +0100
    20.3 @@ -65,6 +65,10 @@ void __dummy__(void)
    20.4             arch.guest_context.syscall_callback_eip);
    20.5      OFFSET(VCPU_kernel_sp, struct vcpu,
    20.6             arch.guest_context.kernel_sp);
    20.7 +    OFFSET(VCPU_flags, struct vcpu, vcpu_flags);
    20.8 +    OFFSET(VCPU_nmi_addr, struct vcpu, nmi_addr);
    20.9 +    DEFINE(_VCPUF_nmi_pending, _VCPUF_nmi_pending);
   20.10 +    DEFINE(_VCPUF_nmi_masked, _VCPUF_nmi_masked);
   20.11      BLANK();
   20.12  
   20.13      OFFSET(VCPUINFO_upcall_pending, vcpu_info_t, evtchn_upcall_pending);
    21.1 --- a/xen/arch/x86/x86_64/entry.S	Thu Jan 12 13:13:34 2006 +0100
    21.2 +++ b/xen/arch/x86/x86_64/entry.S	Thu Jan 12 13:20:04 2006 +0100
    21.3 @@ -171,7 +171,9 @@ test_all_events:
    21.4          leaq  irq_stat(%rip),%rcx
    21.5          testl $~0,(%rcx,%rax,1)
    21.6          jnz   process_softirqs
    21.7 -/*test_guest_events:*/
    21.8 +        btr   $_VCPUF_nmi_pending,VCPU_flags(%rbx)
    21.9 +        jc    process_nmi
   21.10 +test_guest_events:
   21.11          movq  VCPU_vcpu_info(%rbx),%rax
   21.12          testb $0xFF,VCPUINFO_upcall_mask(%rax)
   21.13          jnz   restore_all_guest
   21.14 @@ -322,6 +324,23 @@ process_softirqs:
   21.15          call do_softirq
   21.16          jmp  test_all_events
   21.17  
   21.18 +	ALIGN
   21.19 +/* %rbx: struct vcpu */
   21.20 +process_nmi:
   21.21 +        movq VCPU_nmi_addr(%rbx),%rax
   21.22 +        test %rax,%rax
   21.23 +        jz   test_all_events
   21.24 +        bts  $_VCPUF_nmi_masked,VCPU_flags(%rbx)
   21.25 +        jc   1f
   21.26 +        sti
   21.27 +        leaq VCPU_trap_bounce(%rbx),%rdx
   21.28 +        movq %rax,TRAPBOUNCE_eip(%rdx)
   21.29 +        movw $(TBF_INTERRUPT|TBF_SLOW_IRET),TRAPBOUNCE_flags(%rdx)
   21.30 +        call create_bounce_frame
   21.31 +        jmp  test_all_events
   21.32 +1:      bts  $_VCPUF_nmi_pending,VCPU_flags(%rbx)
   21.33 +        jmp  test_guest_events
   21.34 +	
   21.35  /* CREATE A BASIC EXCEPTION FRAME ON GUEST OS STACK:                     */
   21.36  /*   { RCX, R11, [DS-GS,] [CR2,] [ERRCODE,] RIP, CS, RFLAGS, RSP, SS }   */
   21.37  /* %rdx: trap_bounce, %rbx: struct vcpu                           */
   21.38 @@ -339,6 +358,9 @@ create_bounce_frame:
   21.39  1:      /* In kernel context already: push new frame at existing %rsp. */
   21.40          movq  UREGS_rsp+8(%rsp),%rsi
   21.41          andb  $0xfc,UREGS_cs+8(%rsp)    # Indicate kernel context to guest.
   21.42 +	testw $(TBF_SLOW_IRET),TRAPBOUNCE_flags(%rdx)
   21.43 +	jz    2f
   21.44 +	orb   $0x01,UREGS_cs+8(%rsp)
   21.45  2:      andq  $~0xf,%rsi                # Stack frames are 16-byte aligned.
   21.46          movq  $HYPERVISOR_VIRT_START,%rax
   21.47          cmpq  %rax,%rsi
   21.48 @@ -569,7 +591,7 @@ ENTRY(nmi)
   21.49          SAVE_ALL
   21.50          movq  %rsp,%rdi
   21.51          call  do_nmi
   21.52 -	jmp   restore_all_xen
   21.53 +        jmp   ret_from_intr
   21.54  
   21.55  do_arch_sched_op:
   21.56          # Ensure we return success even if we return via schedule_tail()
   21.57 @@ -626,11 +648,12 @@ ENTRY(hypercall_table)
   21.58          .quad do_grant_table_op     /* 20 */
   21.59          .quad do_vm_assist
   21.60          .quad do_update_va_mapping_otherdomain
   21.61 -        .quad do_switch_to_user
   21.62 +        .quad do_iret
   21.63          .quad do_vcpu_op
   21.64          .quad do_set_segment_base   /* 25 */
   21.65          .quad do_mmuext_op
   21.66          .quad do_acm_op
   21.67 +        .quad do_nmi_op
   21.68          .rept NR_hypercalls-((.-hypercall_table)/4)
   21.69          .quad do_ni_hypercall
   21.70          .endr
   21.71 @@ -659,11 +682,12 @@ ENTRY(hypercall_args_table)
   21.72          .byte 3 /* do_grant_table_op    */  /* 20 */
   21.73          .byte 2 /* do_vm_assist         */
   21.74          .byte 4 /* do_update_va_mapping_otherdomain */
   21.75 -        .byte 0 /* do_switch_to_user    */
   21.76 +        .byte 0 /* do_iret              */
   21.77          .byte 3 /* do_vcpu_op           */
   21.78          .byte 2 /* do_set_segment_base  */  /* 25 */
   21.79          .byte 4 /* do_mmuext_op         */
   21.80          .byte 1 /* do_acm_op            */
   21.81 +        .byte 2 /* do_nmi_op            */
   21.82          .rept NR_hypercalls-(.-hypercall_args_table)
   21.83          .byte 0 /* do_ni_hypercall      */
   21.84          .endr
    22.1 --- a/xen/arch/x86/x86_64/traps.c	Thu Jan 12 13:13:34 2006 +0100
    22.2 +++ b/xen/arch/x86/x86_64/traps.c	Thu Jan 12 13:20:04 2006 +0100
    22.3 @@ -12,6 +12,7 @@
    22.4  #include <asm/current.h>
    22.5  #include <asm/flushtlb.h>
    22.6  #include <asm/msr.h>
    22.7 +#include <asm/shadow.h>
    22.8  #include <asm/vmx.h>
    22.9  
   22.10  void show_registers(struct cpu_user_regs *regs)
   22.11 @@ -113,6 +114,52 @@ asmlinkage void do_double_fault(struct c
   22.12          __asm__ __volatile__ ( "hlt" );
   22.13  }
   22.14  
   22.15 +void toggle_guest_mode(struct vcpu *v)
   22.16 +{
   22.17 +    v->arch.flags ^= TF_kernel_mode;
   22.18 +    __asm__ __volatile__ ( "swapgs" );
   22.19 +    update_pagetables(v);
   22.20 +    write_ptbase(v);
   22.21 +}
   22.22 +
   22.23 +long do_iret(void)
   22.24 +{
   22.25 +    struct cpu_user_regs *regs = guest_cpu_user_regs();
   22.26 +    struct iret_context iret_saved;
   22.27 +    struct vcpu *v = current;
   22.28 +
   22.29 +    if ( unlikely(copy_from_user(&iret_saved, (void *)regs->rsp,
   22.30 +                                 sizeof(iret_saved))) )
   22.31 +        domain_crash_synchronous();
   22.32 +
   22.33 +    /* Returning to user mode? */
   22.34 +    if ( (iret_saved.cs & 3) == 3 )
   22.35 +    {
   22.36 +        if ( unlikely(pagetable_get_paddr(v->arch.guest_table_user) == 0) )
   22.37 +            return -EFAULT;
   22.38 +        toggle_guest_mode(v);
   22.39 +    }
   22.40 +
   22.41 +    regs->rip    = iret_saved.rip;
   22.42 +    regs->cs     = iret_saved.cs | 3; /* force guest privilege */
   22.43 +    regs->rflags = (iret_saved.rflags & ~(EF_IOPL|EF_VM)) | EF_IE;
   22.44 +    regs->rsp    = iret_saved.rsp;
   22.45 +    regs->ss     = iret_saved.ss | 3; /* force guest privilege */
   22.46 +
   22.47 +    if ( !(iret_saved.flags & VGCF_IN_SYSCALL) )
   22.48 +    {
   22.49 +        regs->entry_vector = 0;
   22.50 +        regs->r11 = iret_saved.r11;
   22.51 +        regs->rcx = iret_saved.rcx;
   22.52 +    }
   22.53 +
   22.54 +    /* No longer in NMI context. */
   22.55 +    clear_bit(_VCPUF_nmi_masked, &current->vcpu_flags);
   22.56 +
   22.57 +    /* Saved %rax gets written back to regs->rax in entry.S. */
   22.58 +    return iret_saved.rax;
   22.59 +}
   22.60 +
   22.61  asmlinkage void syscall_enter(void);
   22.62  void __init percpu_traps_init(void)
   22.63  {
    23.1 --- a/xen/common/dom0_ops.c	Thu Jan 12 13:13:34 2006 +0100
    23.2 +++ b/xen/common/dom0_ops.c	Thu Jan 12 13:20:04 2006 +0100
    23.3 @@ -323,7 +323,7 @@ long do_dom0_op(dom0_op_t *u_dom0_op)
    23.4          new_affinity = v->cpu_affinity;
    23.5          memcpy(cpus_addr(new_affinity),
    23.6                 &op->u.setvcpuaffinity.cpumap,
    23.7 -               min((int)BITS_TO_LONGS(NR_CPUS),
    23.8 +               min((int)(BITS_TO_LONGS(NR_CPUS) * sizeof(long)),
    23.9                     (int)sizeof(op->u.setvcpuaffinity.cpumap)));
   23.10  
   23.11          ret = vcpu_set_affinity(v, &new_affinity);
   23.12 @@ -501,7 +501,7 @@ long do_dom0_op(dom0_op_t *u_dom0_op)
   23.13          op->u.getvcpuinfo.cpumap   = 0;
   23.14          memcpy(&op->u.getvcpuinfo.cpumap,
   23.15                 cpus_addr(v->cpu_affinity),
   23.16 -               min((int)BITS_TO_LONGS(NR_CPUS),
   23.17 +               min((int)(BITS_TO_LONGS(NR_CPUS) * sizeof(long)),
   23.18                     (int)sizeof(op->u.getvcpuinfo.cpumap)));
   23.19          ret = 0;
   23.20  
    24.1 --- a/xen/common/kernel.c	Thu Jan 12 13:13:34 2006 +0100
    24.2 +++ b/xen/common/kernel.c	Thu Jan 12 13:20:04 2006 +0100
    24.3 @@ -11,6 +11,7 @@
    24.4  #include <xen/compile.h>
    24.5  #include <xen/sched.h>
    24.6  #include <asm/current.h>
    24.7 +#include <public/nmi.h>
    24.8  #include <public/version.h>
    24.9  
   24.10  void cmdline_parse(char *cmdline)
   24.11 @@ -148,6 +149,43 @@ long do_xen_version(int cmd, void *arg)
   24.12      return -ENOSYS;
   24.13  }
   24.14  
   24.15 +long do_nmi_op(unsigned int cmd, void *arg)
   24.16 +{
   24.17 +    struct vcpu *v = current;
   24.18 +    struct domain *d = current->domain;
   24.19 +    long rc = 0;
   24.20 +
   24.21 +    switch ( cmd )
   24.22 +    {
   24.23 +    case XENNMI_register_callback:
   24.24 +        if ( (d->domain_id != 0) || (v->vcpu_id != 0) )
   24.25 +        { 
   24.26 +           rc = -EINVAL;
   24.27 +        }
   24.28 +        else
   24.29 +        {
   24.30 +            v->nmi_addr = (unsigned long)arg;
   24.31 +#ifdef CONFIG_X86
   24.32 +            /*
   24.33 +             * If no handler was registered we can 'lose the NMI edge'.
   24.34 +             * Re-assert it now.
   24.35 +             */
   24.36 +            if ( d->shared_info->arch.nmi_reason != 0 )
   24.37 +                set_bit(_VCPUF_nmi_pending, &v->vcpu_flags);
   24.38 +#endif
   24.39 +        }
   24.40 +        break;
   24.41 +    case XENNMI_unregister_callback:
   24.42 +        v->nmi_addr = 0;
   24.43 +        break;
   24.44 +    default:
   24.45 +        rc = -ENOSYS;
   24.46 +        break;
   24.47 +    }
   24.48 +
   24.49 +    return rc;
   24.50 +}
   24.51 +
   24.52  long do_vm_assist(unsigned int cmd, unsigned int type)
   24.53  {
   24.54      return vm_assist(current->domain, cmd, type);
    25.1 --- a/xen/common/schedule.c	Thu Jan 12 13:13:34 2006 +0100
    25.2 +++ b/xen/common/schedule.c	Thu Jan 12 13:20:04 2006 +0100
    25.3 @@ -207,7 +207,10 @@ void vcpu_wake(struct vcpu *v)
    25.4  
    25.5  int vcpu_set_affinity(struct vcpu *v, cpumask_t *affinity)
    25.6  {
    25.7 -    if ( cpus_empty(*affinity) )
    25.8 +    cpumask_t online_affinity;
    25.9 +
   25.10 +    cpus_and(online_affinity, *affinity, cpu_online_map);
   25.11 +    if ( cpus_empty(online_affinity) )
   25.12          return -EINVAL;
   25.13  
   25.14      return SCHED_OP(set_affinity, v, affinity);
    26.1 --- a/xen/include/asm-x86/domain.h	Thu Jan 12 13:13:34 2006 +0100
    26.2 +++ b/xen/include/asm-x86/domain.h	Thu Jan 12 13:20:04 2006 +0100
    26.3 @@ -13,14 +13,44 @@ struct trap_bounce {
    26.4      unsigned long  eip;
    26.5  };
    26.6  
    26.7 +#define MAPHASH_ENTRIES 8
    26.8 +#define MAPHASH_HASHFN(pfn) ((pfn) & (MAPHASH_ENTRIES-1))
    26.9 +#define MAPHASHENT_NOTINUSE ((u16)~0U)
   26.10 +struct vcpu_maphash {
   26.11 +    struct vcpu_maphash_entry {
   26.12 +        unsigned long pfn;
   26.13 +        uint16_t      idx;
   26.14 +        uint16_t      refcnt;
   26.15 +    } hash[MAPHASH_ENTRIES];
   26.16 +} __cacheline_aligned;
   26.17 +
   26.18 +#define MAPCACHE_ORDER   10
   26.19 +#define MAPCACHE_ENTRIES (1 << MAPCACHE_ORDER)
   26.20  struct mapcache {
   26.21 +    /* The PTEs that provide the mappings, and a cursor into the array. */
   26.22      l1_pgentry_t *l1tab;
   26.23      unsigned int cursor;
   26.24 +
   26.25 +    /* Protects map_domain_page(). */
   26.26 +    spinlock_t lock;
   26.27 +
   26.28 +    /* Garbage mappings are flushed from TLBs in batches called 'epochs'. */
   26.29      unsigned int epoch, shadow_epoch[MAX_VIRT_CPUS];
   26.30      u32 tlbflush_timestamp;
   26.31 -    spinlock_t lock;
   26.32 +
   26.33 +    /* Which mappings are in use, and which are garbage to reap next epoch? */
   26.34 +    unsigned long inuse[BITS_TO_LONGS(MAPCACHE_ENTRIES)];
   26.35 +    unsigned long garbage[BITS_TO_LONGS(MAPCACHE_ENTRIES)];
   26.36 +
   26.37 +    /* Lock-free per-VCPU hash of recently-used mappings. */
   26.38 +    struct vcpu_maphash vcpu_maphash[MAX_VIRT_CPUS];
   26.39  };
   26.40  
   26.41 +extern void mapcache_init(struct domain *);
   26.42 +
   26.43 +/* x86/64: toggle guest between kernel and user modes. */
   26.44 +extern void toggle_guest_mode(struct vcpu *);
   26.45 +
   26.46  struct arch_domain
   26.47  {
   26.48      l1_pgentry_t *mm_perdomain_pt;
    27.1 --- a/xen/include/asm-x86/nmi.h	Thu Jan 12 13:13:34 2006 +0100
    27.2 +++ b/xen/include/asm-x86/nmi.h	Thu Jan 12 13:20:04 2006 +0100
    27.3 @@ -2,6 +2,8 @@
    27.4  #ifndef ASM_NMI_H
    27.5  #define ASM_NMI_H
    27.6  
    27.7 +#include <public/nmi.h>
    27.8 +
    27.9  struct cpu_user_regs;
   27.10   
   27.11  typedef int (*nmi_callback_t)(struct cpu_user_regs *regs, int cpu);
    28.1 --- a/xen/include/asm-x86/processor.h	Thu Jan 12 13:13:34 2006 +0100
    28.2 +++ b/xen/include/asm-x86/processor.h	Thu Jan 12 13:20:04 2006 +0100
    28.3 @@ -123,6 +123,7 @@
    28.4  #define TBF_EXCEPTION_ERRCODE  2
    28.5  #define TBF_INTERRUPT          8
    28.6  #define TBF_FAILSAFE          16
    28.7 +#define TBF_SLOW_IRET         32
    28.8  
    28.9  /* 'arch_vcpu' flags values */
   28.10  #define _TF_kernel_mode        0
    29.1 --- a/xen/include/public/arch-x86_32.h	Thu Jan 12 13:13:34 2006 +0100
    29.2 +++ b/xen/include/public/arch-x86_32.h	Thu Jan 12 13:20:04 2006 +0100
    29.3 @@ -135,6 +135,7 @@ typedef struct arch_shared_info {
    29.4      unsigned long max_pfn;                  /* max pfn that appears in table */
    29.5      /* Frame containing list of mfns containing list of mfns containing p2m. */
    29.6      unsigned long pfn_to_mfn_frame_list_list; 
    29.7 +    unsigned long nmi_reason;
    29.8  } arch_shared_info_t;
    29.9  
   29.10  typedef struct {
    30.1 --- a/xen/include/public/arch-x86_64.h	Thu Jan 12 13:13:34 2006 +0100
    30.2 +++ b/xen/include/public/arch-x86_64.h	Thu Jan 12 13:20:04 2006 +0100
    30.3 @@ -88,11 +88,20 @@
    30.4  #define SEGBASE_GS_USER_SEL 3 /* Set user %gs specified in base[15:0] */
    30.5  
    30.6  /*
    30.7 - * int HYPERVISOR_switch_to_user(void)
    30.8 + * int HYPERVISOR_iret(void)
    30.9   * All arguments are on the kernel stack, in the following format.
   30.10   * Never returns if successful. Current kernel context is lost.
   30.11 + * The saved CS is mapped as follows:
   30.12 + *   RING0 -> RING3 kernel mode.
   30.13 + *   RING1 -> RING3 kernel mode.
   30.14 + *   RING2 -> RING3 kernel mode.
   30.15 + *   RING3 -> RING3 user mode.
   30.16 + * However RING0 indicates that the guest kernel should return to iteself
   30.17 + * directly with
   30.18 + *      orb   $3,1*8(%rsp)
   30.19 + *      iretq
   30.20   * If flags contains VGCF_IN_SYSCALL:
   30.21 - *   Restore RAX, RIP, RFLAGS, RSP. 
   30.22 + *   Restore RAX, RIP, RFLAGS, RSP.
   30.23   *   Discard R11, RCX, CS, SS.
   30.24   * Otherwise:
   30.25   *   Restore RAX, R11, RCX, CS:RIP, RFLAGS, SS:RSP.
   30.26 @@ -100,10 +109,19 @@
   30.27   */
   30.28  /* Guest exited in SYSCALL context? Return to guest with SYSRET? */
   30.29  #define VGCF_IN_SYSCALL (1<<8)
   30.30 +struct iret_context {
   30.31 +    /* Top of stack (%rsp at point of hypercall). */
   30.32 +    uint64_t rax, r11, rcx, flags, rip, cs, rflags, rsp, ss;
   30.33 +    /* Bottom of iret stack frame. */
   30.34 +};
   30.35 +/*
   30.36 + * For compatibility with HYPERVISOR_switch_to_user which is the old
   30.37 + * name for HYPERVISOR_iret.
   30.38 + */
   30.39  struct switch_to_user {
   30.40      /* Top of stack (%rsp at point of hypercall). */
   30.41      uint64_t rax, r11, rcx, flags, rip, cs, rflags, rsp, ss;
   30.42 -    /* Bottom of switch_to_user stack frame. */
   30.43 +    /* Bottom of iret stack frame. */
   30.44  };
   30.45  
   30.46  /*
   30.47 @@ -202,6 +220,7 @@ typedef struct arch_shared_info {
   30.48      unsigned long max_pfn;                  /* max pfn that appears in table */
   30.49      /* Frame containing list of mfns containing list of mfns containing p2m. */
   30.50      unsigned long pfn_to_mfn_frame_list_list; 
   30.51 +    unsigned long nmi_reason;
   30.52  } arch_shared_info_t;
   30.53  
   30.54  typedef struct {
    31.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    31.2 +++ b/xen/include/public/nmi.h	Thu Jan 12 13:20:04 2006 +0100
    31.3 @@ -0,0 +1,54 @@
    31.4 +/******************************************************************************
    31.5 + * nmi.h
    31.6 + * 
    31.7 + * NMI callback registration and reason codes.
    31.8 + * 
    31.9 + * Copyright (c) 2005, Keir Fraser <keir@xensource.com>
   31.10 + */
   31.11 +
   31.12 +#ifndef __XEN_PUBLIC_NMI_H__
   31.13 +#define __XEN_PUBLIC_NMI_H__
   31.14 +
   31.15 +/*
   31.16 + * NMI reason codes:
   31.17 + * Currently these are x86-specific, stored in arch_shared_info.nmi_reason.
   31.18 + */
   31.19 + /* I/O-check error reported via ISA port 0x61, bit 6. */
   31.20 +#define _XEN_NMIREASON_io_error     0
   31.21 +#define XEN_NMIREASON_io_error      (1UL << _XEN_NMIREASON_io_error)
   31.22 + /* Parity error reported via ISA port 0x61, bit 7. */
   31.23 +#define _XEN_NMIREASON_parity_error 1
   31.24 +#define XEN_NMIREASON_parity_error  (1UL << _XEN_NMIREASON_parity_error)
   31.25 + /* Unknown hardware-generated NMI. */
   31.26 +#define _XEN_NMIREASON_unknown      2
   31.27 +#define XEN_NMIREASON_unknown       (1UL << _XEN_NMIREASON_unknown)
   31.28 +
   31.29 +/*
   31.30 + * long nmi_op(unsigned int cmd, void *arg)
   31.31 + * NB. All ops return zero on success, else a negative error code.
   31.32 + */
   31.33 +
   31.34 +/*
   31.35 + * Register NMI callback for this (calling) VCPU. Currently this only makes
   31.36 + * sense for domain 0, vcpu 0. All other callers will be returned EINVAL.
   31.37 + * arg == address of callback function.
   31.38 + */
   31.39 +#define XENNMI_register_callback   0
   31.40 +
   31.41 +/*
   31.42 + * Deregister NMI callback for this (calling) VCPU.
   31.43 + * arg == NULL.
   31.44 + */
   31.45 +#define XENNMI_unregister_callback 1
   31.46 +
   31.47 +#endif /* __XEN_PUBLIC_NMI_H__ */
   31.48 +
   31.49 +/*
   31.50 + * Local variables:
   31.51 + * mode: C
   31.52 + * c-set-style: "BSD"
   31.53 + * c-basic-offset: 4
   31.54 + * tab-width: 4
   31.55 + * indent-tabs-mode: nil
   31.56 + * End:
   31.57 + */
    32.1 --- a/xen/include/public/xen.h	Thu Jan 12 13:13:34 2006 +0100
    32.2 +++ b/xen/include/public/xen.h	Thu Jan 12 13:20:04 2006 +0100
    32.3 @@ -53,12 +53,14 @@
    32.4  #define __HYPERVISOR_grant_table_op       20
    32.5  #define __HYPERVISOR_vm_assist            21
    32.6  #define __HYPERVISOR_update_va_mapping_otherdomain 22
    32.7 -#define __HYPERVISOR_switch_vm86          23 /* x86/32 only */
    32.8 -#define __HYPERVISOR_switch_to_user       23 /* x86/64 only */
    32.9 +#define __HYPERVISOR_iret                 23 /* x86 only */
   32.10 +#define __HYPERVISOR_switch_vm86          23 /* x86/32 only (obsolete name) */
   32.11 +#define __HYPERVISOR_switch_to_user       23 /* x86/64 only (obsolete name) */
   32.12  #define __HYPERVISOR_vcpu_op              24
   32.13  #define __HYPERVISOR_set_segment_base     25 /* x86/64 only */
   32.14  #define __HYPERVISOR_mmuext_op            26
   32.15  #define __HYPERVISOR_acm_op               27
   32.16 +#define __HYPERVISOR_nmi_op               28
   32.17  
   32.18  /* 
   32.19   * VIRTUAL INTERRUPTS
   32.20 @@ -69,10 +71,7 @@
   32.21  #define VIRQ_DEBUG      1  /* Request guest to dump debug info.           */
   32.22  #define VIRQ_CONSOLE    2  /* (DOM0) Bytes received on emergency console. */
   32.23  #define VIRQ_DOM_EXC    3  /* (DOM0) Exceptional event for some domain.   */
   32.24 -#define VIRQ_PARITY_ERR 4  /* (DOM0) NMI parity error (port 0x61, bit 7). */
   32.25 -#define VIRQ_IO_ERR     5  /* (DOM0) NMI I/O error    (port 0x61, bit 6). */
   32.26  #define VIRQ_DEBUGGER   6  /* (DOM0) A domain has paused for debugging.   */
   32.27 -#define VIRQ_NMI        7  /* (DOM0) Unknown NMI (not from ISA port 0x61).*/
   32.28  #define NR_VIRQS        8
   32.29  
   32.30  /*
    33.1 --- a/xen/include/xen/domain_page.h	Thu Jan 12 13:13:34 2006 +0100
    33.2 +++ b/xen/include/xen/domain_page.h	Thu Jan 12 13:20:04 2006 +0100
    33.3 @@ -10,24 +10,19 @@
    33.4  #include <xen/config.h>
    33.5  #include <xen/mm.h>
    33.6  
    33.7 -#define map_domain_page(pfn)   map_domain_pages(pfn,0)
    33.8 -#define unmap_domain_page(va)  unmap_domain_pages(va,0)
    33.9 -
   33.10  #ifdef CONFIG_DOMAIN_PAGE
   33.11  
   33.12  /*
   33.13 - * Maps a given range of page frames, returning the mapped virtual address. The
   33.14 - * pages are now accessible within the current VCPU until a corresponding
   33.15 - * call to unmap_domain_page().
   33.16 + * Map a given page frame, returning the mapped virtual address. The page is
   33.17 + * then accessible within the current VCPU until a corresponding unmap call.
   33.18   */
   33.19 -extern void *map_domain_pages(unsigned long pfn, unsigned int order);
   33.20 +extern void *map_domain_page(unsigned long pfn);
   33.21  
   33.22  /*
   33.23 - * Pass a VA within the first page of a range previously mapped in the context
   33.24 - * of the currently-executing VCPU via a call to map_domain_pages(). Those
   33.25 - * pages will then be removed from the mapping lists.
   33.26 + * Pass a VA within a page previously mapped in the context of the
   33.27 + * currently-executing VCPU via a call to map_domain_pages().
   33.28   */
   33.29 -extern void unmap_domain_pages(void *va, unsigned int order);
   33.30 +extern void unmap_domain_page(void *va);
   33.31  
   33.32  /*
   33.33   * Similar to the above calls, except the mapping is accessible in all
   33.34 @@ -97,8 +92,8 @@ domain_mmap_cache_destroy(struct domain_
   33.35  
   33.36  #else /* !CONFIG_DOMAIN_PAGE */
   33.37  
   33.38 -#define map_domain_pages(pfn,order)         phys_to_virt((pfn)<<PAGE_SHIFT)
   33.39 -#define unmap_domain_pages(va,order)        ((void)((void)(va),(void)(order)))
   33.40 +#define map_domain_page(pfn)                phys_to_virt((pfn)<<PAGE_SHIFT)
   33.41 +#define unmap_domain_page(va)               ((void)(va))
   33.42  
   33.43  #define map_domain_page_global(pfn)         phys_to_virt((pfn)<<PAGE_SHIFT)
   33.44  #define unmap_domain_page_global(va)        ((void)(va))
    34.1 --- a/xen/include/xen/sched.h	Thu Jan 12 13:13:34 2006 +0100
    34.2 +++ b/xen/include/xen/sched.h	Thu Jan 12 13:20:04 2006 +0100
    34.3 @@ -81,6 +81,8 @@ struct vcpu
    34.4      /* Bitmask of CPUs on which this VCPU may run. */
    34.5      cpumask_t        cpu_affinity;
    34.6  
    34.7 +    unsigned long    nmi_addr;      /* NMI callback address. */
    34.8 +
    34.9      /* Bitmask of CPUs which are holding onto this VCPU's state. */
   34.10      cpumask_t        vcpu_dirty_cpumask;
   34.11  
   34.12 @@ -361,6 +363,12 @@ extern struct domain *domain_list;
   34.13   /* VCPU is not-runnable */
   34.14  #define _VCPUF_down            5
   34.15  #define VCPUF_down             (1UL<<_VCPUF_down)
   34.16 + /* NMI callback pending for this VCPU? */
   34.17 +#define _VCPUF_nmi_pending     8
   34.18 +#define VCPUF_nmi_pending      (1UL<<_VCPUF_nmi_pending)
   34.19 + /* Avoid NMI reentry by allowing NMIs to be masked for short periods. */
   34.20 +#define _VCPUF_nmi_masked      9
   34.21 +#define VCPUF_nmi_masked       (1UL<<_VCPUF_nmi_masked)
   34.22  
   34.23  /*
   34.24   * Per-domain flags (domain_flags).
    35.1 --- a/xen/include/xen/softirq.h	Thu Jan 12 13:13:34 2006 +0100
    35.2 +++ b/xen/include/xen/softirq.h	Thu Jan 12 13:20:04 2006 +0100
    35.3 @@ -6,7 +6,7 @@
    35.4  #define SCHEDULE_SOFTIRQ                  1
    35.5  #define NEW_TLBFLUSH_CLOCK_PERIOD_SOFTIRQ 2
    35.6  #define KEYPRESS_SOFTIRQ                  3
    35.7 -#define NMI_DOM0_SOFTIRQ                  4
    35.8 +#define NMI_SOFTIRQ                       4
    35.9  #define PAGE_SCRUB_SOFTIRQ                5
   35.10  #define DOMAIN_SHUTDOWN_FINALISE_SOFTIRQ  6
   35.11  #define NR_SOFTIRQS                       7