ia64/xen-unstable

changeset 15104:f4390e34ad12

x86/hvm: hypercall adjustments

- share more code between 32- and 64-bit variants
- properly handle continuations for 32-bit guests on 64-bit hv
- properly handle preemption (this must *not* rely on regs->eip, as
- other code may overwrite the value there by calling
- hvm_store_cpu_guest_regs()
- deny hypercall access when called from guest in vm86 mode, which
requires that ???_guest_x86_mode() make real and vm86 modes distinguishable

Signed-off-by: Jan Beulich <jbeulich@novell.com>
author kfraser@localhost.localdomain
date Tue May 15 10:28:28 2007 +0100 (2007-05-15)
parents eb027b704dc5
children 9e9c09c75110
files xen/arch/x86/domain.c xen/arch/x86/hvm/hvm.c xen/arch/x86/hvm/platform.c xen/arch/x86/hvm/svm/svm.c xen/arch/x86/hvm/vmx/vmx.c xen/include/asm-x86/hypercall.h
line diff
     1.1 --- a/xen/arch/x86/domain.c	Tue May 15 10:13:11 2007 +0100
     1.2 +++ b/xen/arch/x86/domain.c	Tue May 15 10:28:28 2007 +0100
     1.3 @@ -38,6 +38,7 @@
     1.4  #include <asm/mpspec.h>
     1.5  #include <asm/ldt.h>
     1.6  #include <asm/paging.h>
     1.7 +#include <asm/hypercall.h>
     1.8  #include <asm/hvm/hvm.h>
     1.9  #include <asm/hvm/support.h>
    1.10  #include <asm/msr.h>
    1.11 @@ -1231,6 +1232,8 @@ void sync_vcpu_execstate(struct vcpu *v)
    1.12      __arg;                                                                  \
    1.13  })
    1.14  
    1.15 +DEFINE_PER_CPU(char, hc_preempted);
    1.16 +
    1.17  unsigned long hypercall_create_continuation(
    1.18      unsigned int op, const char *format, ...)
    1.19  {
    1.20 @@ -1262,7 +1265,9 @@ unsigned long hypercall_create_continuat
    1.21          regs->eip -= 2;  /* re-execute 'syscall' / 'int 0x82' */
    1.22  
    1.23  #ifdef __x86_64__
    1.24 -        if ( !is_pv_32on64_domain(current->domain) )
    1.25 +        if ( !is_hvm_vcpu(current) ?
    1.26 +             !is_pv_32on64_vcpu(current) :
    1.27 +             (hvm_guest_x86_mode(current) == 8) )
    1.28          {
    1.29              for ( i = 0; *p != '\0'; i++ )
    1.30              {
    1.31 @@ -1298,6 +1303,8 @@ unsigned long hypercall_create_continuat
    1.32                  }
    1.33              }
    1.34          }
    1.35 +
    1.36 +        this_cpu(hc_preempted) = 1;
    1.37      }
    1.38  
    1.39      va_end(args);
     2.1 --- a/xen/arch/x86/hvm/hvm.c	Tue May 15 10:13:11 2007 +0100
     2.2 +++ b/xen/arch/x86/hvm/hvm.c	Tue May 15 10:28:28 2007 +0100
     2.3 @@ -663,7 +663,7 @@ typedef unsigned long hvm_hypercall_t(
     2.4  
     2.5  #if defined(__i386__)
     2.6  
     2.7 -static hvm_hypercall_t *hvm_hypercall_table[NR_hypercalls] = {
     2.8 +static hvm_hypercall_t *hvm_hypercall32_table[NR_hypercalls] = {
     2.9      HYPERCALL(memory_op),
    2.10      HYPERCALL(multicall),
    2.11      HYPERCALL(xen_version),
    2.12 @@ -672,21 +672,6 @@ static hvm_hypercall_t *hvm_hypercall_ta
    2.13      HYPERCALL(hvm_op)
    2.14  };
    2.15  
    2.16 -static void __hvm_do_hypercall(struct cpu_user_regs *pregs)
    2.17 -{
    2.18 -    if ( (pregs->eax >= NR_hypercalls) || !hvm_hypercall_table[pregs->eax] )
    2.19 -    {
    2.20 -        if ( pregs->eax != __HYPERVISOR_grant_table_op )
    2.21 -            gdprintk(XENLOG_WARNING, "HVM vcpu %d:%d bad hypercall %d.\n",
    2.22 -                     current->domain->domain_id, current->vcpu_id, pregs->eax);
    2.23 -        pregs->eax = -ENOSYS;
    2.24 -        return;
    2.25 -    }
    2.26 -
    2.27 -    pregs->eax = hvm_hypercall_table[pregs->eax](
    2.28 -        pregs->ebx, pregs->ecx, pregs->edx, pregs->esi, pregs->edi);
    2.29 -}
    2.30 -
    2.31  #else /* defined(__x86_64__) */
    2.32  
    2.33  static long do_memory_op_compat32(int cmd, XEN_GUEST_HANDLE(void) arg)
    2.34 @@ -746,49 +731,38 @@ static hvm_hypercall_t *hvm_hypercall32_
    2.35      HYPERCALL(hvm_op)
    2.36  };
    2.37  
    2.38 -static void __hvm_do_hypercall(struct cpu_user_regs *pregs)
    2.39 -{
    2.40 -    pregs->rax = (uint32_t)pregs->eax; /* mask in case compat32 caller */
    2.41 -    if ( (pregs->rax >= NR_hypercalls) || !hvm_hypercall64_table[pregs->rax] )
    2.42 -    {
    2.43 -        if ( pregs->rax != __HYPERVISOR_grant_table_op )
    2.44 -            gdprintk(XENLOG_WARNING, "HVM vcpu %d:%d bad hypercall %ld.\n",
    2.45 -                     current->domain->domain_id, current->vcpu_id, pregs->rax);
    2.46 -        pregs->rax = -ENOSYS;
    2.47 -        return;
    2.48 -    }
    2.49 -
    2.50 -    if ( current->arch.paging.mode->guest_levels == 4 )
    2.51 -    {
    2.52 -        pregs->rax = hvm_hypercall64_table[pregs->rax](pregs->rdi,
    2.53 -                                                       pregs->rsi,
    2.54 -                                                       pregs->rdx,
    2.55 -                                                       pregs->r10,
    2.56 -                                                       pregs->r8);
    2.57 -    }
    2.58 -    else
    2.59 -    {
    2.60 -        pregs->eax = hvm_hypercall32_table[pregs->eax]((uint32_t)pregs->ebx,
    2.61 -                                                       (uint32_t)pregs->ecx,
    2.62 -                                                       (uint32_t)pregs->edx,
    2.63 -                                                       (uint32_t)pregs->esi,
    2.64 -                                                       (uint32_t)pregs->edi);
    2.65 -    }
    2.66 -}
    2.67 -
    2.68  #endif /* defined(__x86_64__) */
    2.69  
    2.70  int hvm_do_hypercall(struct cpu_user_regs *regs)
    2.71  {
    2.72 -    int flush, preempted;
    2.73 -    unsigned long old_eip;
    2.74 -
    2.75 -    hvm_store_cpu_guest_regs(current, regs, NULL);
    2.76 +    int flush, mode = hvm_guest_x86_mode(current);
    2.77 +    uint32_t eax = regs->eax;
    2.78  
    2.79 -    if ( unlikely(ring_3(regs)) )
    2.80 +    switch ( mode )
    2.81      {
    2.82 -        regs->eax = -EPERM;
    2.83 -        return 0;
    2.84 +#ifdef __x86_64__
    2.85 +    case 8:
    2.86 +#endif
    2.87 +    case 4:
    2.88 +    case 2:
    2.89 +        hvm_store_cpu_guest_regs(current, regs, NULL);
    2.90 +        if ( unlikely(ring_3(regs)) )
    2.91 +        {
    2.92 +    default:
    2.93 +            regs->eax = -EPERM;
    2.94 +            return HVM_HCALL_completed;
    2.95 +        }
    2.96 +    case 0:
    2.97 +        break;
    2.98 +    }
    2.99 +
   2.100 +    if ( (eax >= NR_hypercalls) || !hvm_hypercall32_table[eax] )
   2.101 +    {
   2.102 +        if ( eax != __HYPERVISOR_grant_table_op )
   2.103 +            gdprintk(XENLOG_WARNING, "HVM vcpu %d:%d bad hypercall %u.\n",
   2.104 +                     current->domain->domain_id, current->vcpu_id, eax);
   2.105 +        regs->eax = -ENOSYS;
   2.106 +        return HVM_HCALL_completed;
   2.107      }
   2.108  
   2.109      /*
   2.110 @@ -796,20 +770,29 @@ int hvm_do_hypercall(struct cpu_user_reg
   2.111       * For now we also need to flush when pages are added, as qemu-dm is not
   2.112       * yet capable of faulting pages into an existing valid mapcache bucket.
   2.113       */
   2.114 -    flush = ((uint32_t)regs->eax == __HYPERVISOR_memory_op);
   2.115 -
   2.116 -    /* Check for preemption: RIP will be modified from this dummy value. */
   2.117 -    old_eip = regs->eip;
   2.118 -    regs->eip = 0xF0F0F0FF;
   2.119 +    flush = (eax == __HYPERVISOR_memory_op);
   2.120 +    this_cpu(hc_preempted) = 0;
   2.121  
   2.122 -    __hvm_do_hypercall(regs);
   2.123 +#ifdef __x86_64__
   2.124 +    if ( mode == 8 )
   2.125 +    {
   2.126 +        regs->rax = hvm_hypercall64_table[eax](regs->rdi,
   2.127 +                                               regs->rsi,
   2.128 +                                               regs->rdx,
   2.129 +                                               regs->r10,
   2.130 +                                               regs->r8);
   2.131 +    }
   2.132 +    else
   2.133 +#endif
   2.134 +    {
   2.135 +        regs->eax = hvm_hypercall32_table[eax]((uint32_t)regs->ebx,
   2.136 +                                               (uint32_t)regs->ecx,
   2.137 +                                               (uint32_t)regs->edx,
   2.138 +                                               (uint32_t)regs->esi,
   2.139 +                                               (uint32_t)regs->edi);
   2.140 +    }
   2.141  
   2.142 -    preempted = (regs->eip != 0xF0F0F0FF);
   2.143 -    regs->eip = old_eip;
   2.144 -
   2.145 -    hvm_load_cpu_guest_regs(current, regs);
   2.146 -
   2.147 -    return (preempted ? HVM_HCALL_preempted :
   2.148 +    return (this_cpu(hc_preempted) ? HVM_HCALL_preempted :
   2.149              flush ? HVM_HCALL_invalidate : HVM_HCALL_completed);
   2.150  }
   2.151  
     3.1 --- a/xen/arch/x86/hvm/platform.c	Tue May 15 10:13:11 2007 +0100
     3.2 +++ b/xen/arch/x86/hvm/platform.c	Tue May 15 10:28:28 2007 +0100
     3.3 @@ -1037,6 +1037,9 @@ void handle_mmio(unsigned long gpa)
     3.4      df = regs->eflags & X86_EFLAGS_DF ? 1 : 0;
     3.5  
     3.6      address_bytes = hvm_guest_x86_mode(v);
     3.7 +    if (address_bytes < 2)
     3.8 +        /* real or vm86 modes */
     3.9 +        address_bytes = 2;
    3.10      inst_addr = hvm_get_segment_base(v, x86_seg_cs) + regs->eip;
    3.11      inst_len = hvm_instruction_length(inst_addr, address_bytes);
    3.12      if ( inst_len <= 0 )
     4.1 --- a/xen/arch/x86/hvm/svm/svm.c	Tue May 15 10:13:11 2007 +0100
     4.2 +++ b/xen/arch/x86/hvm/svm/svm.c	Tue May 15 10:28:28 2007 +0100
     4.3 @@ -554,14 +554,6 @@ static inline void svm_restore_dr(struct
     4.4          __restore_debug_registers(v);
     4.5  }
     4.6  
     4.7 -static int svm_realmode(struct vcpu *v)
     4.8 -{
     4.9 -    unsigned long cr0 = v->arch.hvm_svm.cpu_shadow_cr0;
    4.10 -    unsigned long eflags = v->arch.hvm_svm.vmcb->rflags;
    4.11 -
    4.12 -    return (eflags & X86_EFLAGS_VM) || !(cr0 & X86_CR0_PE);
    4.13 -}
    4.14 -
    4.15  static int svm_interrupts_enabled(struct vcpu *v)
    4.16  {
    4.17      unsigned long eflags = v->arch.hvm_svm.vmcb->rflags;
    4.18 @@ -572,13 +564,13 @@ static int svm_guest_x86_mode(struct vcp
    4.19  {
    4.20      struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
    4.21  
    4.22 -    if ( svm_long_mode_enabled(v) && vmcb->cs.attr.fields.l )
    4.23 +    if ( unlikely(!(v->arch.hvm_svm.cpu_shadow_cr0 & X86_CR0_PE)) )
    4.24 +        return 0;
    4.25 +    if ( unlikely(vmcb->rflags & X86_EFLAGS_VM) )
    4.26 +        return 1;
    4.27 +    if ( svm_long_mode_enabled(v) && likely(vmcb->cs.attr.fields.l) )
    4.28          return 8;
    4.29 -
    4.30 -    if ( svm_realmode(v) )
    4.31 -        return 2;
    4.32 -
    4.33 -    return (vmcb->cs.attr.fields.db ? 4 : 2);
    4.34 +    return (likely(vmcb->cs.attr.fields.db) ? 4 : 2);
    4.35  }
    4.36  
    4.37  static void svm_update_host_cr3(struct vcpu *v)
    4.38 @@ -1950,7 +1942,9 @@ static int svm_cr_access(struct vcpu *v,
    4.39      case INSTR_SMSW:
    4.40          value = v->arch.hvm_svm.cpu_shadow_cr0 & 0xFFFF;
    4.41          modrm = buffer[index+2];
    4.42 -        addr_size = svm_guest_x86_mode( v );
    4.43 +        addr_size = svm_guest_x86_mode(v);
    4.44 +        if ( addr_size < 2 )
    4.45 +            addr_size = 2;
    4.46          if ( likely((modrm & 0xC0) >> 6 == 3) )
    4.47          {
    4.48              gpreg = decode_src_reg(prefix, modrm);
     5.1 --- a/xen/arch/x86/hvm/vmx/vmx.c	Tue May 15 10:13:11 2007 +0100
     5.2 +++ b/xen/arch/x86/hvm/vmx/vmx.c	Tue May 15 10:28:28 2007 +0100
     5.3 @@ -994,31 +994,20 @@ static void vmx_init_hypercall_page(stru
     5.4      *(u16 *)(hypercall_page + (__HYPERVISOR_iret * 32)) = 0x0b0f; /* ud2 */
     5.5  }
     5.6  
     5.7 -static int vmx_realmode(struct vcpu *v)
     5.8 -{
     5.9 -    unsigned long rflags;
    5.10 -
    5.11 -    ASSERT(v == current);
    5.12 -
    5.13 -    rflags = __vmread(GUEST_RFLAGS);
    5.14 -    return rflags & X86_EFLAGS_VM;
    5.15 -}
    5.16 -
    5.17  static int vmx_guest_x86_mode(struct vcpu *v)
    5.18  {
    5.19 -    unsigned long cs_ar_bytes;
    5.20 +    unsigned int cs_ar_bytes;
    5.21  
    5.22      ASSERT(v == current);
    5.23  
    5.24 +    if ( unlikely(!(v->arch.hvm_vmx.cpu_shadow_cr0 & X86_CR0_PE)) )
    5.25 +        return 0;
    5.26 +    if ( unlikely(__vmread(GUEST_RFLAGS) & X86_EFLAGS_VM) )
    5.27 +        return 1;
    5.28      cs_ar_bytes = __vmread(GUEST_CS_AR_BYTES);
    5.29 -
    5.30 -    if ( vmx_long_mode_enabled(v) && (cs_ar_bytes & (1u<<13)) )
    5.31 +    if ( vmx_long_mode_enabled(v) && likely(cs_ar_bytes & (1u<<13)) )
    5.32          return 8;
    5.33 -
    5.34 -    if ( vmx_realmode(v) )
    5.35 -        return 2;
    5.36 -
    5.37 -    return ((cs_ar_bytes & (1u<<14)) ? 4 : 2);
    5.38 +    return (likely(cs_ar_bytes & (1u<<14)) ? 4 : 2);
    5.39  }
    5.40  
    5.41  static int vmx_pae_enabled(struct vcpu *v)
     6.1 --- a/xen/include/asm-x86/hypercall.h	Tue May 15 10:13:11 2007 +0100
     6.2 +++ b/xen/include/asm-x86/hypercall.h	Tue May 15 10:28:28 2007 +0100
     6.3 @@ -15,6 +15,15 @@
     6.4   */
     6.5  #define MMU_UPDATE_PREEMPTED          (~(~0U>>1))
     6.6  
     6.7 +/*
     6.8 + * This gets set to a non-zero value whenever hypercall_create_continuation()
     6.9 + * is used (outside of multicall context; in multicall context the second call
    6.10 + * from do_multicall() itself will have this effect). Internal callers of
    6.11 + * hypercall handlers interested in this condition must clear the flag prior
    6.12 + * to invoking the respective handler(s).
    6.13 + */
    6.14 +DECLARE_PER_CPU(char, hc_preempted);
    6.15 +
    6.16  extern long
    6.17  do_event_channel_op_compat(
    6.18      XEN_GUEST_HANDLE(evtchn_op_t) uop);