ia64/xen-unstable

changeset 8561:06ab200a9e23

Pass NMIs to DOM0 via a dedicated callback, Xen x86_64 support.

Handle NMI interrupts and dispatch to dom0 on x86_64.

Renames the switch_to_user hypercall to iret. Extend the semantics to
include returns to guest/kernel if CS indicates ring 1. Retain the old
semantics of returning to guest/user if CS indicates ring3.

Plumb in nmi_op hypercall to generic code.

Signed-off-by: Ian Campbell <Ian.Campbell@XenSource.com>
author Ian.Campbell@xensource.com
date Wed Jan 11 15:52:33 2006 +0000 (2006-01-11)
parents fe4d06b15a36
children 278e536ade72
files xen/arch/x86/domain.c xen/arch/x86/x86_64/asm-offsets.c xen/arch/x86/x86_64/entry.S xen/arch/x86/x86_64/traps.c xen/include/asm-x86/processor.h xen/include/public/arch-x86_64.h xen/include/public/xen.h
line diff
     1.1 --- a/xen/arch/x86/domain.c	Wed Jan 11 15:52:12 2006 +0000
     1.2 +++ b/xen/arch/x86/domain.c	Wed Jan 11 15:52:33 2006 +0000
     1.3 @@ -659,35 +659,6 @@ static void save_segments(struct vcpu *v
     1.4      percpu_ctxt[smp_processor_id()].dirty_segment_mask = dirty_segment_mask;
     1.5  }
     1.6  
     1.7 -long do_switch_to_user(void)
     1.8 -{
     1.9 -    struct cpu_user_regs  *regs = guest_cpu_user_regs();
    1.10 -    struct switch_to_user  stu;
    1.11 -    struct vcpu    *v = current;
    1.12 -
    1.13 -    if ( unlikely(copy_from_user(&stu, (void *)regs->rsp, sizeof(stu))) ||
    1.14 -         unlikely(pagetable_get_paddr(v->arch.guest_table_user) == 0) )
    1.15 -        return -EFAULT;
    1.16 -
    1.17 -    toggle_guest_mode(v);
    1.18 -
    1.19 -    regs->rip    = stu.rip;
    1.20 -    regs->cs     = stu.cs | 3; /* force guest privilege */
    1.21 -    regs->rflags = (stu.rflags & ~(EF_IOPL|EF_VM)) | EF_IE;
    1.22 -    regs->rsp    = stu.rsp;
    1.23 -    regs->ss     = stu.ss | 3; /* force guest privilege */
    1.24 -
    1.25 -    if ( !(stu.flags & VGCF_IN_SYSCALL) )
    1.26 -    {
    1.27 -        regs->entry_vector = 0;
    1.28 -        regs->r11 = stu.r11;
    1.29 -        regs->rcx = stu.rcx;
    1.30 -    }
    1.31 -
    1.32 -    /* Saved %rax gets written back to regs->rax in entry.S. */
    1.33 -    return stu.rax;
    1.34 -}
    1.35 -
    1.36  #define switch_kernel_stack(_n,_c) ((void)0)
    1.37  
    1.38  #elif defined(__i386__)
     2.1 --- a/xen/arch/x86/x86_64/asm-offsets.c	Wed Jan 11 15:52:12 2006 +0000
     2.2 +++ b/xen/arch/x86/x86_64/asm-offsets.c	Wed Jan 11 15:52:33 2006 +0000
     2.3 @@ -65,6 +65,10 @@ void __dummy__(void)
     2.4             arch.guest_context.syscall_callback_eip);
     2.5      OFFSET(VCPU_kernel_sp, struct vcpu,
     2.6             arch.guest_context.kernel_sp);
     2.7 +    OFFSET(VCPU_flags, struct vcpu, vcpu_flags);
     2.8 +    OFFSET(VCPU_nmi_addr, struct vcpu, nmi_addr);
     2.9 +    DEFINE(_VCPUF_nmi_pending, _VCPUF_nmi_pending);
    2.10 +    DEFINE(_VCPUF_nmi_masked, _VCPUF_nmi_masked);
    2.11      BLANK();
    2.12  
    2.13      OFFSET(VCPUINFO_upcall_pending, vcpu_info_t, evtchn_upcall_pending);
     3.1 --- a/xen/arch/x86/x86_64/entry.S	Wed Jan 11 15:52:12 2006 +0000
     3.2 +++ b/xen/arch/x86/x86_64/entry.S	Wed Jan 11 15:52:33 2006 +0000
     3.3 @@ -171,7 +171,9 @@ test_all_events:
     3.4          leaq  irq_stat(%rip),%rcx
     3.5          testl $~0,(%rcx,%rax,1)
     3.6          jnz   process_softirqs
     3.7 -/*test_guest_events:*/
     3.8 +        btr   $_VCPUF_nmi_pending,VCPU_flags(%rbx)
     3.9 +        jc    process_nmi
    3.10 +test_guest_events:
    3.11          movq  VCPU_vcpu_info(%rbx),%rax
    3.12          testb $0xFF,VCPUINFO_upcall_mask(%rax)
    3.13          jnz   restore_all_guest
    3.14 @@ -322,6 +324,23 @@ process_softirqs:
    3.15          call do_softirq
    3.16          jmp  test_all_events
    3.17  
    3.18 +	ALIGN
    3.19 +/* %rbx: struct vcpu */
    3.20 +process_nmi:
    3.21 +        movq VCPU_nmi_addr(%rbx),%rax
    3.22 +        test %rax,%rax
    3.23 +        jz   test_all_events
    3.24 +        bts  $_VCPUF_nmi_masked,VCPU_flags(%rbx)
    3.25 +        jc   1f
    3.26 +        sti
    3.27 +        leaq VCPU_trap_bounce(%rbx),%rdx
    3.28 +        movq %rax,TRAPBOUNCE_eip(%rdx)
    3.29 +        movw $(TBF_INTERRUPT|TBF_SLOW_IRET),TRAPBOUNCE_flags(%rdx)
    3.30 +        call create_bounce_frame
    3.31 +        jmp  test_all_events
    3.32 +1:      bts  $_VCPUF_nmi_pending,VCPU_flags(%rbx)
    3.33 +        jmp  test_guest_events
    3.34 +	
    3.35  /* CREATE A BASIC EXCEPTION FRAME ON GUEST OS STACK:                     */
    3.36  /*   { RCX, R11, [DS-GS,] [CR2,] [ERRCODE,] RIP, CS, RFLAGS, RSP, SS }   */
    3.37  /* %rdx: trap_bounce, %rbx: struct vcpu                           */
    3.38 @@ -339,6 +358,9 @@ create_bounce_frame:
    3.39  1:      /* In kernel context already: push new frame at existing %rsp. */
    3.40          movq  UREGS_rsp+8(%rsp),%rsi
    3.41          andb  $0xfc,UREGS_cs+8(%rsp)    # Indicate kernel context to guest.
    3.42 +	testw $(TBF_SLOW_IRET),TRAPBOUNCE_flags(%rdx)
    3.43 +	jz    2f
    3.44 +	orb   $0x01,UREGS_cs+8(%rsp)
    3.45  2:      andq  $~0xf,%rsi                # Stack frames are 16-byte aligned.
    3.46          movq  $HYPERVISOR_VIRT_START,%rax
    3.47          cmpq  %rax,%rsi
    3.48 @@ -569,7 +591,7 @@ ENTRY(nmi)
    3.49          SAVE_ALL
    3.50          movq  %rsp,%rdi
    3.51          call  do_nmi
    3.52 -	jmp   restore_all_xen
    3.53 +        jmp   ret_from_intr
    3.54  
    3.55  do_arch_sched_op:
    3.56          # Ensure we return success even if we return via schedule_tail()
    3.57 @@ -626,11 +648,12 @@ ENTRY(hypercall_table)
    3.58          .quad do_grant_table_op     /* 20 */
    3.59          .quad do_vm_assist
    3.60          .quad do_update_va_mapping_otherdomain
    3.61 -        .quad do_switch_to_user
    3.62 +        .quad do_iret
    3.63          .quad do_vcpu_op
    3.64          .quad do_set_segment_base   /* 25 */
    3.65          .quad do_mmuext_op
    3.66          .quad do_acm_op
    3.67 +        .quad do_nmi_op
    3.68          .rept NR_hypercalls-((.-hypercall_table)/4)
    3.69          .quad do_ni_hypercall
    3.70          .endr
    3.71 @@ -659,11 +682,12 @@ ENTRY(hypercall_args_table)
    3.72          .byte 3 /* do_grant_table_op    */  /* 20 */
    3.73          .byte 2 /* do_vm_assist         */
    3.74          .byte 4 /* do_update_va_mapping_otherdomain */
    3.75 -        .byte 0 /* do_switch_to_user    */
    3.76 +        .byte 0 /* do_iret              */
    3.77          .byte 3 /* do_vcpu_op           */
    3.78          .byte 2 /* do_set_segment_base  */  /* 25 */
    3.79          .byte 4 /* do_mmuext_op         */
    3.80          .byte 1 /* do_acm_op            */
    3.81 +        .byte 2 /* do_nmi_op            */
    3.82          .rept NR_hypercalls-(.-hypercall_args_table)
    3.83          .byte 0 /* do_ni_hypercall      */
    3.84          .endr
     4.1 --- a/xen/arch/x86/x86_64/traps.c	Wed Jan 11 15:52:12 2006 +0000
     4.2 +++ b/xen/arch/x86/x86_64/traps.c	Wed Jan 11 15:52:33 2006 +0000
     4.3 @@ -12,6 +12,7 @@
     4.4  #include <asm/current.h>
     4.5  #include <asm/flushtlb.h>
     4.6  #include <asm/msr.h>
     4.7 +#include <asm/shadow.h>
     4.8  #include <asm/vmx.h>
     4.9  
    4.10  void show_registers(struct cpu_user_regs *regs)
    4.11 @@ -113,6 +114,42 @@ asmlinkage void do_double_fault(struct c
    4.12          __asm__ __volatile__ ( "hlt" );
    4.13  }
    4.14  
    4.15 +extern void toggle_guest_mode(struct vcpu *);
    4.16 +
    4.17 +long do_iret(void)
    4.18 +{
    4.19 +    struct cpu_user_regs  *regs = guest_cpu_user_regs();
    4.20 +    struct iret_context iret_saved;
    4.21 +    struct vcpu    *v = current;
    4.22 +
    4.23 +    if ( unlikely(copy_from_user(&iret_saved, (void *)regs->rsp, sizeof(iret_saved))) ||
    4.24 +         unlikely(pagetable_get_paddr(v->arch.guest_table_user) == 0) )
    4.25 +        return -EFAULT;
    4.26 +
    4.27 +    /* returning to user mode */
    4.28 +    if ((iret_saved.cs & 0x03) == 3)
    4.29 +        toggle_guest_mode(v);
    4.30 +
    4.31 +    regs->rip    = iret_saved.rip;
    4.32 +    regs->cs     = iret_saved.cs | 3; /* force guest privilege */
    4.33 +    regs->rflags = (iret_saved.rflags & ~(EF_IOPL|EF_VM)) | EF_IE;
    4.34 +    regs->rsp    = iret_saved.rsp;
    4.35 +    regs->ss     = iret_saved.ss | 3; /* force guest privilege */
    4.36 +
    4.37 +    if ( !(iret_saved.flags & VGCF_IN_SYSCALL) )
    4.38 +    {
    4.39 +        regs->entry_vector = 0;
    4.40 +        regs->r11 = iret_saved.r11;
    4.41 +        regs->rcx = iret_saved.rcx;
    4.42 +    }
    4.43 +
    4.44 +    /* No longer in NMI context */
    4.45 +    clear_bit(_VCPUF_nmi_masked, &current->vcpu_flags);
    4.46 +
    4.47 +    /* Saved %rax gets written back to regs->rax in entry.S. */
    4.48 +    return iret_saved.rax;
    4.49 +}
    4.50 +
    4.51  asmlinkage void syscall_enter(void);
    4.52  void __init percpu_traps_init(void)
    4.53  {
     5.1 --- a/xen/include/asm-x86/processor.h	Wed Jan 11 15:52:12 2006 +0000
     5.2 +++ b/xen/include/asm-x86/processor.h	Wed Jan 11 15:52:33 2006 +0000
     5.3 @@ -123,6 +123,7 @@
     5.4  #define TBF_EXCEPTION_ERRCODE  2
     5.5  #define TBF_INTERRUPT          8
     5.6  #define TBF_FAILSAFE          16
     5.7 +#define TBF_SLOW_IRET         32
     5.8  
     5.9  /* 'arch_vcpu' flags values */
    5.10  #define _TF_kernel_mode        0
     6.1 --- a/xen/include/public/arch-x86_64.h	Wed Jan 11 15:52:12 2006 +0000
     6.2 +++ b/xen/include/public/arch-x86_64.h	Wed Jan 11 15:52:33 2006 +0000
     6.3 @@ -88,11 +88,20 @@
     6.4  #define SEGBASE_GS_USER_SEL 3 /* Set user %gs specified in base[15:0] */
     6.5  
     6.6  /*
     6.7 - * int HYPERVISOR_switch_to_user(void)
     6.8 + * int HYPERVISOR_iret(void)
     6.9   * All arguments are on the kernel stack, in the following format.
    6.10   * Never returns if successful. Current kernel context is lost.
    6.11 + * The saved CS is mapped as follows:
    6.12 + *   RING0 -> RING3 kernel mode.
    6.13 + *   RING1 -> RING3 kernel mode.
    6.14 + *   RING2 -> RING3 kernel mode.
    6.15 + *   RING3 -> RING3 user mode.
    6.16 + * However RING0 indicates that the guest kernel should return to iteself
    6.17 + * directly with
    6.18 + *      orb   $3,1*8(%rsp)
    6.19 + *      iretq
    6.20   * If flags contains VGCF_IN_SYSCALL:
    6.21 - *   Restore RAX, RIP, RFLAGS, RSP. 
    6.22 + *   Restore RAX, RIP, RFLAGS, RSP.
    6.23   *   Discard R11, RCX, CS, SS.
    6.24   * Otherwise:
    6.25   *   Restore RAX, R11, RCX, CS:RIP, RFLAGS, SS:RSP.
    6.26 @@ -100,10 +109,17 @@
    6.27   */
    6.28  /* Guest exited in SYSCALL context? Return to guest with SYSRET? */
    6.29  #define VGCF_IN_SYSCALL (1<<8)
    6.30 +struct iret_context {
    6.31 +    /* Top of stack (%rsp at point of hypercall). */
    6.32 +    uint64_t rax, r11, rcx, flags, rip, cs, rflags, rsp, ss;
    6.33 +    /* Bottom of iret stack frame. */
    6.34 +};
    6.35 +/* For compatibility with HYPERVISOR_switch_to_user which is the old
    6.36 + * name for HYPERVISOR_iret */
    6.37  struct switch_to_user {
    6.38      /* Top of stack (%rsp at point of hypercall). */
    6.39      uint64_t rax, r11, rcx, flags, rip, cs, rflags, rsp, ss;
    6.40 -    /* Bottom of switch_to_user stack frame. */
    6.41 +    /* Bottom of iret stack frame. */
    6.42  };
    6.43  
    6.44  /*
     7.1 --- a/xen/include/public/xen.h	Wed Jan 11 15:52:12 2006 +0000
     7.2 +++ b/xen/include/public/xen.h	Wed Jan 11 15:52:33 2006 +0000
     7.3 @@ -53,9 +53,9 @@
     7.4  #define __HYPERVISOR_grant_table_op       20
     7.5  #define __HYPERVISOR_vm_assist            21
     7.6  #define __HYPERVISOR_update_va_mapping_otherdomain 22
     7.7 -#define __HYPERVISOR_iret                 23 /* x86/32 only */
     7.8 +#define __HYPERVISOR_iret                 23 /* x86 only */
     7.9  #define __HYPERVISOR_switch_vm86          23 /* x86/32 only (obsolete name) */
    7.10 -#define __HYPERVISOR_switch_to_user       23 /* x86/64 only */
    7.11 +#define __HYPERVISOR_switch_to_user       23 /* x86/64 only (obsolete name) */
    7.12  #define __HYPERVISOR_vcpu_op              24
    7.13  #define __HYPERVISOR_set_segment_base     25 /* x86/64 only */
    7.14  #define __HYPERVISOR_mmuext_op            26