ia64/xen-unstable

changeset 18621:8d993552673a

x86, hvm: Hyper-V guest interface support with small set of enlightenments

A minimal implementation of the Viridian (Hyper-V) guest
interface. The only enlightenments advertised and supported are vAPIC
MSRs and long-spin-wait notifications. The set of enlightenments can
easily be extended in future, as they are found to provide a
performance win, and configured via an extended HVM_PARAM_VIRIDIAN hvm
parameter.

Signed-off-by: Peter Johnston <peter.johnston@citrix.com>
Signed-off-by: Tim Deegan <tim.deegan@citrix.com>
Signed-off-by: Keir Fraser <keir.fraser@citrix.com>
author Keir Fraser <keir.fraser@citrix.com>
date Tue Oct 14 10:45:29 2008 +0100 (2008-10-14)
parents a26194601c8f
children 551c3480beee
files tools/firmware/hvmloader/hvmloader.c tools/python/xen/xend/XendConfig.py tools/python/xen/xend/XendConstants.py tools/python/xen/xend/XendDomainInfo.py tools/python/xen/xm/create.py tools/python/xen/xm/xenapi_create.py unmodified_drivers/linux-2.6/platform-pci/platform-pci.c xen/arch/x86/hvm/Makefile xen/arch/x86/hvm/hvm.c xen/arch/x86/hvm/svm/svm.c xen/arch/x86/hvm/viridian.c xen/arch/x86/hvm/vlapic.c xen/arch/x86/hvm/vmx/vmx.c xen/arch/x86/traps.c xen/arch/x86/x86_emulate/x86_emulate.c xen/include/asm-x86/hvm/domain.h xen/include/asm-x86/hvm/hvm.h xen/include/asm-x86/hvm/viridian.h xen/include/asm-x86/hvm/vlapic.h xen/include/asm-x86/perfc_defn.h xen/include/public/arch-x86/hvm/save.h xen/include/public/hvm/params.h
line diff
     1.1 --- a/tools/firmware/hvmloader/hvmloader.c	Mon Oct 13 13:15:20 2008 +0100
     1.2 +++ b/tools/firmware/hvmloader/hvmloader.c	Tue Oct 14 10:45:29 2008 +0100
     1.3 @@ -101,30 +101,36 @@ asm (
     1.4  
     1.5  static enum { VGA_none, VGA_std, VGA_cirrus } virtual_vga = VGA_none;
     1.6  
     1.7 -static void
     1.8 -init_hypercalls(void)
     1.9 +static void init_hypercalls(void)
    1.10  {
    1.11      uint32_t eax, ebx, ecx, edx;
    1.12      unsigned long i;
    1.13      char signature[13];
    1.14      xen_extraversion_t extraversion;
    1.15 -
    1.16 -    cpuid(0x40000000, &eax, &ebx, &ecx, &edx);
    1.17 +    uint32_t base;
    1.18  
    1.19 -    *(uint32_t *)(signature + 0) = ebx;
    1.20 -    *(uint32_t *)(signature + 4) = ecx;
    1.21 -    *(uint32_t *)(signature + 8) = edx;
    1.22 -    signature[12] = '\0';
    1.23 +    for ( base = 0x40000000; base < 0x40001000; base += 0x100 )
    1.24 +    {
    1.25 +        cpuid(base, &eax, &ebx, &ecx, &edx);
    1.26  
    1.27 -    BUG_ON(strcmp("XenVMMXenVMM", signature) || (eax < 0x40000002));
    1.28 +        *(uint32_t *)(signature + 0) = ebx;
    1.29 +        *(uint32_t *)(signature + 4) = ecx;
    1.30 +        *(uint32_t *)(signature + 8) = edx;
    1.31 +        signature[12] = '\0';
    1.32 +
    1.33 +        if ( !strcmp("XenVMMXenVMM", signature) )
    1.34 +            break;
    1.35 +    }
    1.36 +
    1.37 +    BUG_ON(strcmp("XenVMMXenVMM", signature) || ((eax - base) < 2));
    1.38  
    1.39      /* Fill in hypercall transfer pages. */
    1.40 -    cpuid(0x40000002, &eax, &ebx, &ecx, &edx);
    1.41 +    cpuid(base + 2, &eax, &ebx, &ecx, &edx);
    1.42      for ( i = 0; i < eax; i++ )
    1.43          wrmsr(ebx, HYPERCALL_PHYSICAL_ADDRESS + (i << 12) + i);
    1.44  
    1.45      /* Print version information. */
    1.46 -    cpuid(0x40000001, &eax, &ebx, &ecx, &edx);
    1.47 +    cpuid(base + 1, &eax, &ebx, &ecx, &edx);
    1.48      hypercall_xen_version(XENVER_extraversion, extraversion);
    1.49      printf("Detected Xen v%u.%u%s\n", eax >> 16, eax & 0xffff, extraversion);
    1.50  }
     2.1 --- a/tools/python/xen/xend/XendConfig.py	Mon Oct 13 13:15:20 2008 +0100
     2.2 +++ b/tools/python/xen/xend/XendConfig.py	Tue Oct 14 10:45:29 2008 +0100
     2.3 @@ -155,6 +155,7 @@ XENAPI_PLATFORM_CFG_TYPES = {
     2.4      'vncdisplay': int,
     2.5      'vnclisten': str,
     2.6      'timer_mode': int,
     2.7 +    'viridian': int,
     2.8      'vncpasswd': str,
     2.9      'vncunused': int,
    2.10      'xauthority': str,
    2.11 @@ -442,6 +443,8 @@ class XendConfig(dict):
    2.12          if self.is_hvm():
    2.13              if 'timer_mode' not in self['platform']:
    2.14                  self['platform']['timer_mode'] = 1
    2.15 +            if 'viridian' not in self['platform']:
    2.16 +                self['platform']['viridian'] = 0
    2.17              if 'rtc_timeoffset' not in self['platform']:
    2.18                  self['platform']['rtc_timeoffset'] = 0
    2.19              if 'hpet' not in self['platform']:
     3.1 --- a/tools/python/xen/xend/XendConstants.py	Mon Oct 13 13:15:20 2008 +0100
     3.2 +++ b/tools/python/xen/xend/XendConstants.py	Tue Oct 14 10:45:29 2008 +0100
     3.3 @@ -43,9 +43,10 @@ HVM_PARAM_STORE_EVTCHN = 2
     3.4  HVM_PARAM_PAE_ENABLED  = 4
     3.5  HVM_PARAM_IOREQ_PFN    = 5
     3.6  HVM_PARAM_BUFIOREQ_PFN = 6
     3.7 -HVM_PARAM_NVRAM_FD     = 7
     3.8 -HVM_PARAM_VHPT_SIZE    = 8
     3.9 -HVM_PARAM_BUFPIOREQ_PFN = 9
    3.10 +HVM_PARAM_NVRAM_FD     = 7 # ia64
    3.11 +HVM_PARAM_VHPT_SIZE    = 8 # ia64
    3.12 +HVM_PARAM_BUFPIOREQ_PFN = 9 # ia64
    3.13 +HVM_PARAM_VIRIDIAN     = 9 # x86
    3.14  HVM_PARAM_TIMER_MODE   = 10
    3.15  HVM_PARAM_HPET_ENABLED = 11
    3.16  HVM_PARAM_ACPI_S_STATE = 14
     4.1 --- a/tools/python/xen/xend/XendDomainInfo.py	Mon Oct 13 13:15:20 2008 +0100
     4.2 +++ b/tools/python/xen/xend/XendDomainInfo.py	Tue Oct 14 10:45:29 2008 +0100
     4.3 @@ -2078,6 +2078,11 @@ class XendDomainInfo:
     4.4              xc.hvm_set_param(self.domid, HVM_PARAM_TIMER_MODE,
     4.5                               long(timer_mode))
     4.6  
     4.7 +        # Set Viridian interface configuration of domain
     4.8 +        viridian = self.info["platform"].get("viridian")
     4.9 +        if arch.type == "x86" and hvm and viridian is not None:
    4.10 +            xc.hvm_set_param(self.domid, HVM_PARAM_VIRIDIAN, long(viridian))
    4.11 +
    4.12          # Optionally enable virtual HPET
    4.13          hpet = self.info["platform"].get("hpet")
    4.14          if hvm and hpet is not None:
     5.1 --- a/tools/python/xen/xm/create.py	Mon Oct 13 13:15:20 2008 +0100
     5.2 +++ b/tools/python/xen/xm/create.py	Tue Oct 14 10:45:29 2008 +0100
     5.3 @@ -218,6 +218,11 @@ gopts.var('timer_mode', val='TIMER_MODE'
     5.4            use="""Timer mode (0=delay virtual time when ticks are missed;
     5.5            1=virtual time is always wallclock time.""")
     5.6  
     5.7 +gopts.var('viridian', val='VIRIDIAN',
     5.8 +          fn=set_int, default=0,
     5.9 +          use="""Expose Viridian interface to x86 HVM guest?
    5.10 +          (Default is 0).""")
    5.11 +
    5.12  gopts.var('acpi', val='ACPI',
    5.13            fn=set_int, default=1,
    5.14            use="Disable or enable ACPI of HVM domain.")
    5.15 @@ -856,7 +861,8 @@ def configure_hvm(config_image, vals):
    5.16               'vnc', 'vncdisplay', 'vncunused', 'vncconsole', 'vnclisten',
    5.17               'sdl', 'display', 'xauthority', 'rtc_timeoffset', 'monitor',
    5.18               'acpi', 'apic', 'usb', 'usbdevice', 'keymap', 'pci', 'hpet',
    5.19 -             'guest_os_type', 'hap', 'opengl', 'cpuid', 'cpuid_check']
    5.20 +             'guest_os_type', 'hap', 'opengl', 'cpuid', 'cpuid_check',
    5.21 +             'viridian' ]
    5.22  
    5.23      for a in args:
    5.24          if a in vals.__dict__ and vals.__dict__[a] is not None:
     6.1 --- a/tools/python/xen/xm/xenapi_create.py	Mon Oct 13 13:15:20 2008 +0100
     6.2 +++ b/tools/python/xen/xm/xenapi_create.py	Tue Oct 14 10:45:29 2008 +0100
     6.3 @@ -969,6 +969,7 @@ class sxp2xml:
     6.4              'usbdevice',
     6.5              'hpet',
     6.6              'timer_mode',
     6.7 +            'viridian',
     6.8              'vhpt',
     6.9              'guest_os_type',
    6.10              'hap',
     7.1 --- a/unmodified_drivers/linux-2.6/platform-pci/platform-pci.c	Mon Oct 13 13:15:20 2008 +0100
     7.2 +++ b/unmodified_drivers/linux-2.6/platform-pci/platform-pci.c	Tue Oct 14 10:45:29 2008 +0100
     7.3 @@ -111,26 +111,37 @@ unsigned long alloc_xen_mmio(unsigned lo
     7.4  
     7.5  #ifndef __ia64__
     7.6  
     7.7 -static int init_hypercall_stubs(void)
     7.8 +static uint32_t xen_cpuid_base(void)
     7.9  {
    7.10 -	uint32_t eax, ebx, ecx, edx, pages, msr, i;
    7.11 +	uint32_t base, eax, ebx, ecx, edx;
    7.12  	char signature[13];
    7.13  
    7.14 -	cpuid(0x40000000, &eax, &ebx, &ecx, &edx);
    7.15 -	*(uint32_t*)(signature + 0) = ebx;
    7.16 -	*(uint32_t*)(signature + 4) = ecx;
    7.17 -	*(uint32_t*)(signature + 8) = edx;
    7.18 -	signature[12] = 0;
    7.19 +	for (base = 0x40000000; base < 0x40001000; base += 0x100) {
    7.20 +		cpuid(base, &eax, &ebx, &ecx, &edx);
    7.21 +		*(uint32_t*)(signature + 0) = ebx;
    7.22 +		*(uint32_t*)(signature + 4) = ecx;
    7.23 +		*(uint32_t*)(signature + 8) = edx;
    7.24 +		signature[12] = 0;
    7.25  
    7.26 -	if (strcmp("XenVMMXenVMM", signature) || (eax < 0x40000002)) {
    7.27 +		if (!strcmp("XenVMMXenVMM", signature) && ((eax - base) >= 2))
    7.28 +			return base;
    7.29 +	}
    7.30 +
    7.31 +	return 0;
    7.32 +}
    7.33 +
    7.34 +static int init_hypercall_stubs(void)
    7.35 +{
    7.36 +	uint32_t eax, ebx, ecx, edx, pages, msr, i, base;
    7.37 +
    7.38 +	base = xen_cpuid_base();
    7.39 +	if (base == 0) {
    7.40  		printk(KERN_WARNING
    7.41 -		       "Detected Xen platform device but not Xen VMM?"
    7.42 -		       " (sig %s, eax %x)\n",
    7.43 -		       signature, eax);
    7.44 +		       "Detected Xen platform device but not Xen VMM?\n");
    7.45  		return -EINVAL;
    7.46  	}
    7.47  
    7.48 -	cpuid(0x40000001, &eax, &ebx, &ecx, &edx);
    7.49 +	cpuid(base + 1, &eax, &ebx, &ecx, &edx);
    7.50  
    7.51  	printk(KERN_INFO "Xen version %d.%d.\n", eax >> 16, eax & 0xffff);
    7.52  
    7.53 @@ -138,7 +149,7 @@ static int init_hypercall_stubs(void)
    7.54  	 * Find largest supported number of hypercall pages.
    7.55  	 * We'll create as many as possible up to this number.
    7.56  	 */
    7.57 -	cpuid(0x40000002, &pages, &msr, &ecx, &edx);
    7.58 +	cpuid(base + 2, &pages, &msr, &ecx, &edx);
    7.59  
    7.60  	/*
    7.61  	 * Use __vmalloc() because vmalloc_exec() is not an exported symbol.
    7.62 @@ -174,18 +185,12 @@ static int init_hypercall_stubs(void)
    7.63  
    7.64  static void resume_hypercall_stubs(void)
    7.65  {
    7.66 -	uint32_t eax, ebx, ecx, edx, pages, msr, i;
    7.67 -	char signature[13];
    7.68 +	uint32_t base, ecx, edx, pages, msr, i;
    7.69  
    7.70 -	cpuid(0x40000000, &eax, &ebx, &ecx, &edx);
    7.71 -	*(uint32_t*)(signature + 0) = ebx;
    7.72 -	*(uint32_t*)(signature + 4) = ecx;
    7.73 -	*(uint32_t*)(signature + 8) = edx;
    7.74 -	signature[12] = 0;
    7.75 +	base = xen_cpuid_base();
    7.76 +	BUG_ON(base == 0);
    7.77  
    7.78 -	BUG_ON(strcmp("XenVMMXenVMM", signature) || (eax < 0x40000002));
    7.79 -
    7.80 -	cpuid(0x40000002, &pages, &msr, &ecx, &edx);
    7.81 +	cpuid(base + 2, &pages, &msr, &ecx, &edx);
    7.82  
    7.83  	if (pages > max_hypercall_stub_pages)
    7.84  		pages = max_hypercall_stub_pages;
     8.1 --- a/xen/arch/x86/hvm/Makefile	Mon Oct 13 13:15:20 2008 +0100
     8.2 +++ b/xen/arch/x86/hvm/Makefile	Tue Oct 14 10:45:29 2008 +0100
     8.3 @@ -18,3 +18,4 @@ obj-y += vpic.o
     8.4  obj-y += save.o
     8.5  obj-y += vmsi.o
     8.6  obj-y += stdvga.o
     8.7 +obj-y += viridian.o
     9.1 --- a/xen/arch/x86/hvm/hvm.c	Mon Oct 13 13:15:20 2008 +0100
     9.2 +++ b/xen/arch/x86/hvm/hvm.c	Tue Oct 14 10:45:29 2008 +0100
     9.3 @@ -1636,6 +1636,9 @@ void hvm_cpuid(unsigned int input, unsig
     9.4  {
     9.5      struct vcpu *v = current;
     9.6  
     9.7 +    if ( cpuid_viridian_leaves(input, eax, ebx, ecx, edx) )
     9.8 +        return;
     9.9 +
    9.10      if ( cpuid_hypervisor_leaves(input, eax, ebx, ecx, edx) )
    9.11          return;
    9.12  
    9.13 @@ -1954,6 +1957,9 @@ int hvm_do_hypercall(struct cpu_user_reg
    9.14          break;
    9.15      }
    9.16  
    9.17 +    if ( (eax & 0x80000000) && is_viridian_domain(curr->domain) )
    9.18 +        return viridian_hypercall(regs);
    9.19 +
    9.20      if ( (eax >= NR_hypercalls) || !hvm_hypercall32_table[eax] )
    9.21      {
    9.22          regs->eax = -ENOSYS;
    9.23 @@ -2380,6 +2386,10 @@ long do_hvm_op(unsigned long op, XEN_GUE
    9.24                  if ( a.value > HVMPTM_one_missed_tick_pending )
    9.25                      rc = -EINVAL;
    9.26                  break;
    9.27 +            case HVM_PARAM_VIRIDIAN:
    9.28 +                if ( a.value > 1 )
    9.29 +                    rc = -EINVAL;
    9.30 +                break;
    9.31              case HVM_PARAM_IDENT_PT:
    9.32                  rc = -EPERM;
    9.33                  if ( !IS_PRIV(current->domain) )
    10.1 --- a/xen/arch/x86/hvm/svm/svm.c	Mon Oct 13 13:15:20 2008 +0100
    10.2 +++ b/xen/arch/x86/hvm/svm/svm.c	Tue Oct 14 10:45:29 2008 +0100
    10.3 @@ -1005,7 +1005,8 @@ static int svm_msr_read_intercept(struct
    10.4          break;
    10.5  
    10.6      default:
    10.7 -        if ( rdmsr_hypervisor_regs(ecx, &eax, &edx) ||
    10.8 +        if ( rdmsr_viridian_regs(ecx, &eax, &edx) ||
    10.9 +             rdmsr_hypervisor_regs(ecx, &eax, &edx) ||
   10.10               rdmsr_safe(ecx, eax, edx) == 0 )
   10.11          {
   10.12              regs->eax = eax;
   10.13 @@ -1073,6 +1074,9 @@ static int svm_msr_write_intercept(struc
   10.14          break;
   10.15  
   10.16      default:
   10.17 +        if ( wrmsr_viridian_regs(ecx, regs->eax, regs->edx) )
   10.18 +            break;
   10.19 +
   10.20          switch ( long_mode_do_msr_write(regs) )
   10.21          {
   10.22          case HNDL_unhandled:
    11.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    11.2 +++ b/xen/arch/x86/hvm/viridian.c	Tue Oct 14 10:45:29 2008 +0100
    11.3 @@ -0,0 +1,350 @@
    11.4 +/******************************************************************************
    11.5 + * viridian.c
    11.6 + *
    11.7 + * An implementation of the Viridian hypercall interface.
    11.8 + */
    11.9 +
   11.10 +#include <xen/sched.h>
   11.11 +#include <xen/version.h>
   11.12 +#include <xen/perfc.h>
   11.13 +#include <xen/hypercall.h>
   11.14 +#include <xen/domain_page.h>
   11.15 +#include <asm/paging.h>
   11.16 +#include <asm/p2m.h>
   11.17 +#include <asm/hvm/support.h>
   11.18 +#include <public/sched.h>
   11.19 +#include <public/hvm/hvm_op.h>
   11.20 +
   11.21 +/* Viridian MSR numbers. */
   11.22 +#define VIRIDIAN_MSR_GUEST_OS_ID 0x40000000
   11.23 +#define VIRIDIAN_MSR_HYPERCALL   0x40000001
   11.24 +#define VIRIDIAN_MSR_VP_INDEX    0x40000002
   11.25 +#define VIRIDIAN_MSR_EOI         0x40000070
   11.26 +#define VIRIDIAN_MSR_ICR         0x40000071
   11.27 +#define VIRIDIAN_MSR_TPR         0x40000072
   11.28 +
   11.29 +/* Viridian Hypercall Status Codes. */
   11.30 +#define HV_STATUS_SUCCESS                       0x0000
   11.31 +#define HV_STATUS_INVALID_HYPERCALL_CODE        0x0002
   11.32 +
   11.33 +/* Viridian Hypercall Codes and Parameters. */
   11.34 +#define HvNotifyLongSpinWait    8
   11.35 +
   11.36 +/* Viridian CPUID 4000003, Viridian MSR availability. */
   11.37 +#define CPUID3A_MSR_APIC_ACCESS (1 << 4)
   11.38 +#define CPUID3A_MSR_HYPERCALL   (1 << 5)
   11.39 +#define CPUID3A_MSR_VP_INDEX    (1 << 6)
   11.40 +
   11.41 +/* Viridian CPUID 4000004, Implementation Recommendations. */
   11.42 +#define CPUID4A_MSR_BASED_APIC  (1 << 3)
   11.43 +
   11.44 +int cpuid_viridian_leaves(unsigned int leaf, unsigned int *eax,
   11.45 +                          unsigned int *ebx, unsigned int *ecx,
   11.46 +                          unsigned int *edx)
   11.47 +{
   11.48 +    struct domain *d = current->domain;
   11.49 +
   11.50 +    if ( !is_viridian_domain(d) )
   11.51 +        return 0;
   11.52 +
   11.53 +    leaf -= 0x40000000;
   11.54 +    if ( leaf > 5 )
   11.55 +        return 0;
   11.56 +
   11.57 +    *eax = *ebx = *ecx = *edx = 0;
   11.58 +    switch ( leaf )
   11.59 +    {
   11.60 +    case 0:
   11.61 +        *eax = 0x40000005; /* Maximum leaf */
   11.62 +        *ebx = 0x7263694d; /* Magic numbers  */
   11.63 +        *ecx = 0x666F736F;
   11.64 +        *edx = 0x76482074;
   11.65 +        break;
   11.66 +    case 1:
   11.67 +        *eax = 0x31237648; /* Version number */
   11.68 +        break;
   11.69 +    case 2:
   11.70 +        /* Hypervisor information, but only if the guest has set its
   11.71 +           own version number. */
   11.72 +        if ( d->arch.hvm_domain.viridian.guest_os_id.raw == 0 )
   11.73 +            break;
   11.74 +        *eax = 1; /* Build number */
   11.75 +        *ebx = (xen_major_version() << 16) | xen_minor_version();
   11.76 +        *ecx = 0; /* SP */
   11.77 +        *edx = 0; /* Service branch and number */
   11.78 +        break;
   11.79 +    case 3:
   11.80 +        /* Which hypervisor MSRs are available to the guest */
   11.81 +        *eax = (CPUID3A_MSR_APIC_ACCESS |
   11.82 +                CPUID3A_MSR_HYPERCALL   |
   11.83 +                CPUID3A_MSR_VP_INDEX);
   11.84 +        break;
   11.85 +    case 4:
   11.86 +        /* Recommended hypercall usage. */
   11.87 +        if ( (d->arch.hvm_domain.viridian.guest_os_id.raw == 0) ||
   11.88 +             (d->arch.hvm_domain.viridian.guest_os_id.fields.os < 4) )
   11.89 +            break;
   11.90 +        *eax = CPUID4A_MSR_BASED_APIC;
   11.91 +        *ebx = 2047; /* long spin count */
   11.92 +        break;
   11.93 +    }
   11.94 +
   11.95 +    return 1;
   11.96 +}
   11.97 +
   11.98 +static void enable_hypercall_page(void)
   11.99 +{
  11.100 +    struct domain *d = current->domain;
  11.101 +    unsigned long gmfn = d->arch.hvm_domain.viridian.hypercall_gpa.fields.pfn;
  11.102 +    unsigned long mfn = gmfn_to_mfn(d, gmfn);
  11.103 +    uint8_t *p;
  11.104 +
  11.105 +    if ( !mfn_valid(mfn) ||
  11.106 +         !get_page_and_type(mfn_to_page(mfn), d, PGT_writable_page) )
  11.107 +    {
  11.108 +        gdprintk(XENLOG_WARNING, "Bad GMFN %lx (MFN %lx)\n", gmfn, mfn);
  11.109 +        return;
  11.110 +    }
  11.111 +
  11.112 +    p = map_domain_page(mfn);
  11.113 +
  11.114 +    /*
  11.115 +     * We set the bit 31 in %eax (reserved field in the Viridian hypercall
  11.116 +     * calling convention) to differentiate Xen and Viridian hypercalls.
  11.117 +     */
  11.118 +    *(u8  *)(p + 0) = 0x0d; /* orl $0x80000000, %eax */
  11.119 +    *(u32 *)(p + 1) = 0x80000000;
  11.120 +    *(u8  *)(p + 5) = 0x0f; /* vmcall/vmmcall */
  11.121 +    *(u8  *)(p + 6) = 0x01;
  11.122 +    *(u8  *)(p + 7) = ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)
  11.123 +                       ? 0xc1 : 0xd9);
  11.124 +    *(u8  *)(p + 8) = 0xc3; /* ret */
  11.125 +    memset(p + 9, 0xcc, PAGE_SIZE - 9); /* int3, int3, ... */
  11.126 +
  11.127 +    unmap_domain_page(p);
  11.128 +
  11.129 +    put_page_and_type(mfn_to_page(mfn));
  11.130 +}
  11.131 +
  11.132 +int wrmsr_viridian_regs(uint32_t idx, uint32_t eax, uint32_t edx)
  11.133 +{
  11.134 +    struct domain *d = current->domain;
  11.135 +    uint64_t val = ((uint64_t)edx << 32) | eax;
  11.136 +
  11.137 +    if ( !is_viridian_domain(d) )
  11.138 +        return 0;
  11.139 +
  11.140 +    switch ( idx )
  11.141 +    {
  11.142 +    case VIRIDIAN_MSR_GUEST_OS_ID:
  11.143 +        perfc_incr(mshv_wrmsr_osid);
  11.144 +        d->arch.hvm_domain.viridian.guest_os_id.raw = val;
  11.145 +        gdprintk(XENLOG_INFO, "Guest os:\n");
  11.146 +        gdprintk(XENLOG_INFO, "\tvendor: %x\n",
  11.147 +               d->arch.hvm_domain.viridian.guest_os_id.fields.vendor);
  11.148 +        gdprintk(XENLOG_INFO, "\tos: %x\n",
  11.149 +               d->arch.hvm_domain.viridian.guest_os_id.fields.os);
  11.150 +        gdprintk(XENLOG_INFO, "\tmajor: %x\n",
  11.151 +               d->arch.hvm_domain.viridian.guest_os_id.fields.major);
  11.152 +        gdprintk(XENLOG_INFO, "\tminor: %x\n",
  11.153 +               d->arch.hvm_domain.viridian.guest_os_id.fields.minor);
  11.154 +        gdprintk(XENLOG_INFO, "\tsp: %x\n",
  11.155 +               d->arch.hvm_domain.viridian.guest_os_id.fields.service_pack);
  11.156 +        gdprintk(XENLOG_INFO, "\tbuild: %x\n",
  11.157 +               d->arch.hvm_domain.viridian.guest_os_id.fields.build_number);
  11.158 +        break;
  11.159 +
  11.160 +    case VIRIDIAN_MSR_HYPERCALL:
  11.161 +        perfc_incr(mshv_wrmsr_hc_page);
  11.162 +        gdprintk(XENLOG_INFO, "Set hypercall page %"PRIx64".\n", val);
  11.163 +        if ( d->arch.hvm_domain.viridian.guest_os_id.raw == 0 )
  11.164 +            break;
  11.165 +        d->arch.hvm_domain.viridian.hypercall_gpa.raw = val;
  11.166 +        if ( d->arch.hvm_domain.viridian.hypercall_gpa.fields.enabled )
  11.167 +            enable_hypercall_page();
  11.168 +        break;
  11.169 +
  11.170 +    case VIRIDIAN_MSR_VP_INDEX:
  11.171 +        perfc_incr(mshv_wrmsr_vp_index);
  11.172 +        gdprintk(XENLOG_INFO, "Set VP index %"PRIu64".\n", val);
  11.173 +        break;
  11.174 +
  11.175 +    case VIRIDIAN_MSR_EOI:
  11.176 +        perfc_incr(mshv_wrmsr_eoi);
  11.177 +        vlapic_EOI_set(vcpu_vlapic(current));
  11.178 +        break;
  11.179 +
  11.180 +    case VIRIDIAN_MSR_ICR: {
  11.181 +        struct vlapic *vlapic = vcpu_vlapic(current);
  11.182 +        perfc_incr(mshv_wrmsr_icr);
  11.183 +        eax &= ~(1 << 12);
  11.184 +        edx &= 0xff000000;
  11.185 +        vlapic_set_reg(vlapic, APIC_ICR2, edx);
  11.186 +        if ( vlapic_ipi(vlapic, eax, edx) == X86EMUL_OKAY )
  11.187 +            vlapic_set_reg(vlapic, APIC_ICR, eax);
  11.188 +        break;
  11.189 +    }
  11.190 +
  11.191 +    case VIRIDIAN_MSR_TPR:
  11.192 +        perfc_incr(mshv_wrmsr_tpr);
  11.193 +        vlapic_set_reg(vcpu_vlapic(current), APIC_TASKPRI, eax & 0xff);
  11.194 +        break;
  11.195 +
  11.196 +    default:
  11.197 +        return 0;
  11.198 +    }
  11.199 +
  11.200 +    return 1;
  11.201 +}
  11.202 +
  11.203 +int rdmsr_viridian_regs(uint32_t idx, uint32_t *eax, uint32_t *edx)
  11.204 +{
  11.205 +    uint64_t val;
  11.206 +    struct vcpu *v = current;
  11.207 +    
  11.208 +    if ( !is_viridian_domain(v->domain) )
  11.209 +        return 0;
  11.210 +
  11.211 +    switch ( idx )
  11.212 +    {
  11.213 +    case VIRIDIAN_MSR_GUEST_OS_ID:
  11.214 +        perfc_incr(mshv_rdmsr_osid);
  11.215 +        val = v->domain->arch.hvm_domain.viridian.guest_os_id.raw;
  11.216 +        break;
  11.217 +
  11.218 +    case VIRIDIAN_MSR_HYPERCALL:
  11.219 +        perfc_incr(mshv_rdmsr_hc_page);
  11.220 +        val = v->domain->arch.hvm_domain.viridian.hypercall_gpa.raw;
  11.221 +        break;
  11.222 +
  11.223 +    case VIRIDIAN_MSR_VP_INDEX:
  11.224 +        perfc_incr(mshv_rdmsr_vp_index);
  11.225 +        val = v->vcpu_id;
  11.226 +        break;
  11.227 +
  11.228 +    case VIRIDIAN_MSR_ICR:
  11.229 +        perfc_incr(mshv_rdmsr_icr);
  11.230 +        val = (((uint64_t)vlapic_get_reg(vcpu_vlapic(v), APIC_ICR2) << 32) |
  11.231 +               vlapic_get_reg(vcpu_vlapic(v), APIC_ICR));
  11.232 +        break;
  11.233 +
  11.234 +    case VIRIDIAN_MSR_TPR:
  11.235 +        perfc_incr(mshv_rdmsr_tpr);
  11.236 +        val = vlapic_get_reg(vcpu_vlapic(v), APIC_TASKPRI);
  11.237 +        break;
  11.238 +
  11.239 +    default:
  11.240 +        return 0;
  11.241 +    }
  11.242 +
  11.243 +    *eax = val;
  11.244 +    *edx = val >> 32;
  11.245 +    return 1;
  11.246 +}
  11.247 +
  11.248 +int viridian_hypercall(struct cpu_user_regs *regs)
  11.249 +{
  11.250 +    struct domain *d = current->domain;
  11.251 +    int mode = hvm_guest_x86_mode(current);
  11.252 +    unsigned long input_params_gpa, output_params_gpa;
  11.253 +    uint16_t status = HV_STATUS_SUCCESS;
  11.254 +
  11.255 +    union hypercall_input {
  11.256 +        uint64_t raw;
  11.257 +        struct {
  11.258 +            uint16_t call_code;
  11.259 +            uint16_t rsvd1;
  11.260 +            unsigned rep_count:12;
  11.261 +            unsigned rsvd2:4;
  11.262 +            unsigned rep_start:12;
  11.263 +            unsigned rsvd3:4;
  11.264 +        };
  11.265 +    } input;
  11.266 +
  11.267 +    union hypercall_output {
  11.268 +        uint64_t raw;
  11.269 +        struct {
  11.270 +            uint16_t result;
  11.271 +            uint16_t rsvd1;
  11.272 +            unsigned rep_complete:12;
  11.273 +            unsigned rsvd2:20;
  11.274 +        };
  11.275 +    } output = { 0 };
  11.276 +
  11.277 +    ASSERT(is_viridian_domain(d));
  11.278 +
  11.279 +    switch ( mode )
  11.280 +    {
  11.281 +#ifdef __x86_64__
  11.282 +    case 8:
  11.283 +        input.raw = regs->rcx;
  11.284 +        input_params_gpa = regs->rdx;
  11.285 +        output_params_gpa = regs->r8;
  11.286 +        break;
  11.287 +#endif
  11.288 +    case 4:
  11.289 +        input.raw = ((uint64_t)regs->edx << 32) | regs->eax;
  11.290 +        input_params_gpa = ((uint64_t)regs->ebx << 32) | regs->ecx;
  11.291 +        output_params_gpa = ((uint64_t)regs->edi << 32) | regs->esi;
  11.292 +        break;
  11.293 +    default:
  11.294 +        goto out;
  11.295 +    }
  11.296 +
  11.297 +    switch ( input.call_code )
  11.298 +    {
  11.299 +    case HvNotifyLongSpinWait:
  11.300 +        perfc_incr(mshv_call_long_wait);
  11.301 +        do_sched_op_compat(SCHEDOP_yield, 0);
  11.302 +        status = HV_STATUS_SUCCESS;
  11.303 +        break;
  11.304 +    default:
  11.305 +        status = HV_STATUS_INVALID_HYPERCALL_CODE;
  11.306 +        break;
  11.307 +    }
  11.308 +
  11.309 +out:
  11.310 +    output.result = status;
  11.311 +    switch (mode) {
  11.312 +#ifdef __x86_64__
  11.313 +    case 8:
  11.314 +        regs->rax = output.raw;
  11.315 +        break;
  11.316 +#endif
  11.317 +    default:
  11.318 +        regs->edx = output.raw >> 32;
  11.319 +        regs->eax = output.raw;
  11.320 +        break;
  11.321 +    }
  11.322 +
  11.323 +    return HVM_HCALL_completed;
  11.324 +}
  11.325 +
  11.326 +static int viridian_save_cpu_ctxt(struct domain *d, hvm_domain_context_t *h)
  11.327 +{
  11.328 +    struct hvm_viridian_context ctxt;
  11.329 +
  11.330 +    if ( !is_viridian_domain(d) )
  11.331 +        return 0;
  11.332 +
  11.333 +    ctxt.hypercall_gpa = d->arch.hvm_domain.viridian.hypercall_gpa.raw;
  11.334 +    ctxt.guest_os_id   = d->arch.hvm_domain.viridian.guest_os_id.raw;
  11.335 +
  11.336 +    return (hvm_save_entry(VIRIDIAN, 0, h, &ctxt) != 0);
  11.337 +}
  11.338 +
  11.339 +static int viridian_load_cpu_ctxt(struct domain *d, hvm_domain_context_t *h)
  11.340 +{
  11.341 +    struct hvm_viridian_context ctxt;
  11.342 +
  11.343 +    if ( hvm_load_entry(VIRIDIAN, h, &ctxt) != 0 )
  11.344 +        return -EINVAL;
  11.345 +
  11.346 +    d->arch.hvm_domain.viridian.hypercall_gpa.raw = ctxt.hypercall_gpa;
  11.347 +    d->arch.hvm_domain.viridian.guest_os_id.raw   = ctxt.guest_os_id;
  11.348 +
  11.349 +    return 0;
  11.350 +}
  11.351 +
  11.352 +HVM_REGISTER_SAVE_RESTORE(VIRIDIAN, viridian_save_cpu_ctxt,
  11.353 +                          viridian_load_cpu_ctxt, 1, HVMSR_PER_DOM);
    12.1 --- a/xen/arch/x86/hvm/vlapic.c	Mon Oct 13 13:15:20 2008 +0100
    12.2 +++ b/xen/arch/x86/hvm/vlapic.c	Tue Oct 14 10:45:29 2008 +0100
    12.3 @@ -417,7 +417,7 @@ void vlapic_EOI_set(struct vlapic *vlapi
    12.4      hvm_dpci_msi_eoi(current->domain, vector);
    12.5  }
    12.6  
    12.7 -static int vlapic_ipi(
    12.8 +int vlapic_ipi(
    12.9      struct vlapic *vlapic, uint32_t icr_low, uint32_t icr_high)
   12.10  {
   12.11      unsigned int dest =         GET_xAPIC_DEST_FIELD(icr_high);
    13.1 --- a/xen/arch/x86/hvm/vmx/vmx.c	Mon Oct 13 13:15:20 2008 +0100
    13.2 +++ b/xen/arch/x86/hvm/vmx/vmx.c	Tue Oct 14 10:45:29 2008 +0100
    13.3 @@ -1677,7 +1677,8 @@ static int vmx_msr_read_intercept(struct
    13.4              break;
    13.5          }
    13.6  
    13.7 -        if ( rdmsr_hypervisor_regs(ecx, &eax, &edx) ||
    13.8 +        if ( rdmsr_viridian_regs(ecx, &eax, &edx) ||
    13.9 +             rdmsr_hypervisor_regs(ecx, &eax, &edx) ||
   13.10               rdmsr_safe(ecx, eax, edx) == 0 )
   13.11          {
   13.12              regs->eax = eax;
   13.13 @@ -1852,6 +1853,10 @@ static int vmx_msr_write_intercept(struc
   13.14      default:
   13.15          if ( vpmu_do_wrmsr(regs) )
   13.16              return X86EMUL_OKAY;
   13.17 +
   13.18 +        if ( wrmsr_viridian_regs(ecx, regs->eax, regs->edx) ) 
   13.19 +            break;
   13.20 +
   13.21          switch ( long_mode_do_msr_write(regs) )
   13.22          {
   13.23              case HNDL_unhandled:
    14.1 --- a/xen/arch/x86/traps.c	Mon Oct 13 13:15:20 2008 +0100
    14.2 +++ b/xen/arch/x86/traps.c	Tue Oct 14 10:45:29 2008 +0100
    14.3 @@ -577,7 +577,11 @@ DO_ERROR_NOCODE(TRAP_simd_error,      si
    14.4  int rdmsr_hypervisor_regs(
    14.5      uint32_t idx, uint32_t *eax, uint32_t *edx)
    14.6  {
    14.7 -    idx -= 0x40000000;
    14.8 +    struct domain *d = current->domain;
    14.9 +    /* Optionally shift out of the way of Viridian architectural MSRs. */
   14.10 +    uint32_t base = is_viridian_domain(d) ? 0x40000200 : 0x40000000;
   14.11 +
   14.12 +    idx -= base;
   14.13      if ( idx > 0 )
   14.14          return 0;
   14.15  
   14.16 @@ -599,8 +603,10 @@ int wrmsr_hypervisor_regs(
   14.17      uint32_t idx, uint32_t eax, uint32_t edx)
   14.18  {
   14.19      struct domain *d = current->domain;
   14.20 -
   14.21 -    idx -= 0x40000000;
   14.22 +    /* Optionally shift out of the way of Viridian architectural MSRs. */
   14.23 +    uint32_t base = is_viridian_domain(d) ? 0x40000200 : 0x40000000;
   14.24 +
   14.25 +    idx -= base;
   14.26      if ( idx > 0 )
   14.27          return 0;
   14.28  
   14.29 @@ -628,7 +634,7 @@ int wrmsr_hypervisor_regs(
   14.30          {
   14.31              gdprintk(XENLOG_WARNING,
   14.32                       "Bad GMFN %lx (MFN %lx) to MSR %08x\n",
   14.33 -                     gmfn, mfn, 0x40000000);
   14.34 +                     gmfn, mfn, base + idx);
   14.35              return 0;
   14.36          }
   14.37  
   14.38 @@ -650,14 +656,18 @@ int wrmsr_hypervisor_regs(
   14.39  int cpuid_hypervisor_leaves(
   14.40      uint32_t idx, uint32_t *eax, uint32_t *ebx, uint32_t *ecx, uint32_t *edx)
   14.41  {
   14.42 -    idx -= 0x40000000;
   14.43 -    if ( idx > 2 )
   14.44 +    struct domain *d = current->domain;
   14.45 +    /* Optionally shift out of the way of Viridian architectural leaves. */
   14.46 +    uint32_t base = is_viridian_domain(d) ? 0x40000100 : 0x40000000;
   14.47 +
   14.48 +    idx -= base;
   14.49 +    if ( idx > 2 ) 
   14.50          return 0;
   14.51  
   14.52      switch ( idx )
   14.53      {
   14.54      case 0:
   14.55 -        *eax = 0x40000002; /* Largest leaf */
   14.56 +        *eax = base + 2; /* Largest leaf */
   14.57          *ebx = XEN_CPUID_SIGNATURE_EBX;
   14.58          *ecx = XEN_CPUID_SIGNATURE_ECX;
   14.59          *edx = XEN_CPUID_SIGNATURE_EDX;
   14.60 @@ -673,6 +683,8 @@ int cpuid_hypervisor_leaves(
   14.61      case 2:
   14.62          *eax = 1;          /* Number of hypercall-transfer pages */
   14.63          *ebx = 0x40000000; /* MSR base address */
   14.64 +        if ( is_viridian_domain(d) )
   14.65 +            *ebx = 0x40000200;
   14.66          *ecx = 0;          /* Features 1 */
   14.67          *edx = 0;          /* Features 2 */
   14.68          if ( !is_hvm_vcpu(current) )
    15.1 --- a/xen/arch/x86/x86_emulate/x86_emulate.c	Mon Oct 13 13:15:20 2008 +0100
    15.2 +++ b/xen/arch/x86/x86_emulate/x86_emulate.c	Tue Oct 14 10:45:29 2008 +0100
    15.3 @@ -24,8 +24,9 @@
    15.4  /* Operand sizes: 8-bit operands or specified/overridden size. */
    15.5  #define ByteOp      (1<<0) /* 8-bit operands. */
    15.6  /* Destination operand type. */
    15.7 -#define DstBitBase  (0<<1) /* Memory operand, bit string. */
    15.8 -#define ImplicitOps (1<<1) /* Implicit in opcode. No generic decode. */
    15.9 +#define DstNone     (0<<1) /* No destination operand. */
   15.10 +#define DstImplicit (0<<1) /* Destination operand is implicit in the opcode. */
   15.11 +#define DstBitBase  (1<<1) /* Memory operand, bit string. */
   15.12  #define DstReg      (2<<1) /* Register operand. */
   15.13  #define DstMem      (3<<1) /* Memory operand. */
   15.14  #define DstMask     (3<<1)
   15.15 @@ -42,6 +43,8 @@
   15.16  #define ModRM       (1<<6)
   15.17  /* Destination is only written; never read. */
   15.18  #define Mov         (1<<7)
   15.19 +/* All operands are implicit in the opcode. */
   15.20 +#define ImplicitOps (DstImplicit|SrcImplicit)
   15.21  
   15.22  static uint8_t opcode_table[256] = {
   15.23      /* 0x00 - 0x07 */
   15.24 @@ -1174,13 +1177,12 @@ x86_emulate(
   15.25      int override_seg = -1, rc = X86EMUL_OKAY;
   15.26      struct operand src, dst;
   15.27  
   15.28 -    /* Data operand effective address (usually computed from ModRM). */
   15.29 -    struct operand ea;
   15.30 -
   15.31 -    /* Default is a memory operand relative to segment DS. */
   15.32 -    ea.type    = OP_MEM;
   15.33 -    ea.mem.seg = x86_seg_ds;
   15.34 -    ea.mem.off = 0;
   15.35 +    /*
   15.36 +     * Data operand effective address (usually computed from ModRM).
   15.37 +     * Default is a memory operand relative to segment DS.
   15.38 +     */
   15.39 +    struct operand ea = { .type = OP_MEM };
   15.40 +    ea.mem.seg = x86_seg_ds; /* gcc may reject anon union initializer */
   15.41  
   15.42      ctxt->retire.byte = 0;
   15.43  
   15.44 @@ -1408,14 +1410,11 @@ x86_emulate(
   15.45      if ( override_seg != -1 )
   15.46          ea.mem.seg = override_seg;
   15.47  
   15.48 -    /* Special instructions do their own operand decoding. */
   15.49 -    if ( (d & DstMask) == ImplicitOps )
   15.50 -        goto special_insn;
   15.51 -
   15.52      /* Decode and fetch the source operand: register, memory or immediate. */
   15.53      switch ( d & SrcMask )
   15.54      {
   15.55 -    case SrcNone:
   15.56 +    case SrcNone: /* case SrcImplicit: */
   15.57 +        src.type = OP_NONE;
   15.58          break;
   15.59      case SrcReg:
   15.60          src.type = OP_REG;
   15.61 @@ -1479,7 +1478,21 @@ x86_emulate(
   15.62      /* Decode and fetch the destination operand: register or memory. */
   15.63      switch ( d & DstMask )
   15.64      {
   15.65 +    case DstNone: /* case DstImplicit: */
   15.66 +        /*
   15.67 +         * The only implicit-operands instructions allowed a LOCK prefix are
   15.68 +         * CMPXCHG{8,16}B, MOV CRn, MOV DRn.
   15.69 +         */
   15.70 +        generate_exception_if(
   15.71 +            lock_prefix &&
   15.72 +            ((b < 0x20) || (b > 0x23)) && /* MOV CRn/DRn */
   15.73 +            (b != 0xc7),                  /* CMPXCHG{8,16}B */
   15.74 +            EXC_GP, 0);
   15.75 +        dst.type = OP_NONE;
   15.76 +        break;
   15.77 +
   15.78      case DstReg:
   15.79 +        generate_exception_if(lock_prefix, EXC_GP, 0);
   15.80          dst.type = OP_REG;
   15.81          if ( d & ByteOp )
   15.82          {
   15.83 @@ -1535,6 +1548,7 @@ x86_emulate(
   15.84          dst = ea;
   15.85          if ( dst.type == OP_REG )
   15.86          {
   15.87 +            generate_exception_if(lock_prefix, EXC_GP, 0);
   15.88              switch ( dst.bytes )
   15.89              {
   15.90              case 1: dst.val = *(uint8_t  *)dst.reg; break;
   15.91 @@ -1553,9 +1567,6 @@ x86_emulate(
   15.92          break;
   15.93      }
   15.94  
   15.95 -    /* LOCK prefix allowed only on instructions with memory destination. */
   15.96 -    generate_exception_if(lock_prefix && (dst.type != OP_MEM), EXC_GP, 0);
   15.97 -
   15.98      if ( twobyte )
   15.99          goto twobyte_insn;
  15.100  
  15.101 @@ -1618,583 +1629,6 @@ x86_emulate(
  15.102          dst.type = OP_NONE;
  15.103          break;
  15.104  
  15.105 -    case 0x62: /* bound */ {
  15.106 -        unsigned long src_val2;
  15.107 -        int lb, ub, idx;
  15.108 -        generate_exception_if(mode_64bit() || (src.type != OP_MEM),
  15.109 -                              EXC_UD, -1);
  15.110 -        if ( (rc = read_ulong(src.mem.seg, src.mem.off + op_bytes,
  15.111 -                              &src_val2, op_bytes, ctxt, ops)) )
  15.112 -            goto done;
  15.113 -        ub  = (op_bytes == 2) ? (int16_t)src_val2 : (int32_t)src_val2;
  15.114 -        lb  = (op_bytes == 2) ? (int16_t)src.val  : (int32_t)src.val;
  15.115 -        idx = (op_bytes == 2) ? (int16_t)dst.val  : (int32_t)dst.val;
  15.116 -        generate_exception_if((idx < lb) || (idx > ub), EXC_BR, -1);
  15.117 -        dst.type = OP_NONE;
  15.118 -        break;
  15.119 -    }
  15.120 -
  15.121 -    case 0x63: /* movsxd (x86/64) / arpl (x86/32) */
  15.122 -        if ( mode_64bit() )
  15.123 -        {
  15.124 -            /* movsxd */
  15.125 -            if ( src.type == OP_REG )
  15.126 -                src.val = *(int32_t *)src.reg;
  15.127 -            else if ( (rc = read_ulong(src.mem.seg, src.mem.off,
  15.128 -                                       &src.val, 4, ctxt, ops)) )
  15.129 -                goto done;
  15.130 -            dst.val = (int32_t)src.val;
  15.131 -        }
  15.132 -        else
  15.133 -        {
  15.134 -            /* arpl */
  15.135 -            uint16_t src_val = dst.val;
  15.136 -            dst = src;
  15.137 -            _regs.eflags &= ~EFLG_ZF;
  15.138 -            _regs.eflags |= ((src_val & 3) > (dst.val & 3)) ? EFLG_ZF : 0;
  15.139 -            if ( _regs.eflags & EFLG_ZF )
  15.140 -                dst.val  = (dst.val & ~3) | (src_val & 3);
  15.141 -            else
  15.142 -                dst.type = OP_NONE;
  15.143 -            generate_exception_if(!in_protmode(ctxt, ops), EXC_UD, -1);
  15.144 -        }
  15.145 -        break;
  15.146 -
  15.147 -    case 0x69: /* imul imm16/32 */
  15.148 -    case 0x6b: /* imul imm8 */ {
  15.149 -        unsigned long src1; /* ModR/M source operand */
  15.150 -        if ( ea.type == OP_REG )
  15.151 -            src1 = *ea.reg;
  15.152 -        else if ( (rc = read_ulong(ea.mem.seg, ea.mem.off,
  15.153 -                                   &src1, op_bytes, ctxt, ops)) )
  15.154 -            goto done;
  15.155 -        _regs.eflags &= ~(EFLG_OF|EFLG_CF);
  15.156 -        switch ( dst.bytes )
  15.157 -        {
  15.158 -        case 2:
  15.159 -            dst.val = ((uint32_t)(int16_t)src.val *
  15.160 -                       (uint32_t)(int16_t)src1);
  15.161 -            if ( (int16_t)dst.val != (uint32_t)dst.val )
  15.162 -                _regs.eflags |= EFLG_OF|EFLG_CF;
  15.163 -            break;
  15.164 -#ifdef __x86_64__
  15.165 -        case 4:
  15.166 -            dst.val = ((uint64_t)(int32_t)src.val *
  15.167 -                       (uint64_t)(int32_t)src1);
  15.168 -            if ( (int32_t)dst.val != dst.val )
  15.169 -                _regs.eflags |= EFLG_OF|EFLG_CF;
  15.170 -            break;
  15.171 -#endif
  15.172 -        default: {
  15.173 -            unsigned long m[2] = { src.val, src1 };
  15.174 -            if ( imul_dbl(m) )
  15.175 -                _regs.eflags |= EFLG_OF|EFLG_CF;
  15.176 -            dst.val = m[0];
  15.177 -            break;
  15.178 -        }
  15.179 -        }
  15.180 -        break;
  15.181 -    }
  15.182 -
  15.183 -    case 0x82: /* Grp1 (x86/32 only) */
  15.184 -        generate_exception_if(mode_64bit(), EXC_UD, -1);
  15.185 -    case 0x80: case 0x81: case 0x83: /* Grp1 */
  15.186 -        switch ( modrm_reg & 7 )
  15.187 -        {
  15.188 -        case 0: goto add;
  15.189 -        case 1: goto or;
  15.190 -        case 2: goto adc;
  15.191 -        case 3: goto sbb;
  15.192 -        case 4: goto and;
  15.193 -        case 5: goto sub;
  15.194 -        case 6: goto xor;
  15.195 -        case 7: goto cmp;
  15.196 -        }
  15.197 -        break;
  15.198 -
  15.199 -    case 0xa8 ... 0xa9: /* test imm,%%eax */
  15.200 -        dst.reg = (unsigned long *)&_regs.eax;
  15.201 -        dst.val = _regs.eax;
  15.202 -    case 0x84 ... 0x85: test: /* test */
  15.203 -        emulate_2op_SrcV("test", src, dst, _regs.eflags);
  15.204 -        dst.type = OP_NONE;
  15.205 -        break;
  15.206 -
  15.207 -    case 0x86 ... 0x87: xchg: /* xchg */
  15.208 -        /* Write back the register source. */
  15.209 -        switch ( dst.bytes )
  15.210 -        {
  15.211 -        case 1: *(uint8_t  *)src.reg = (uint8_t)dst.val; break;
  15.212 -        case 2: *(uint16_t *)src.reg = (uint16_t)dst.val; break;
  15.213 -        case 4: *src.reg = (uint32_t)dst.val; break; /* 64b reg: zero-extend */
  15.214 -        case 8: *src.reg = dst.val; break;
  15.215 -        }
  15.216 -        /* Write back the memory destination with implicit LOCK prefix. */
  15.217 -        dst.val = src.val;
  15.218 -        lock_prefix = 1;
  15.219 -        break;
  15.220 -
  15.221 -    case 0xc6 ... 0xc7: /* mov (sole member of Grp11) */
  15.222 -        generate_exception_if((modrm_reg & 7) != 0, EXC_UD, -1);
  15.223 -    case 0x88 ... 0x8b: /* mov */
  15.224 -        dst.val = src.val;
  15.225 -        break;
  15.226 -
  15.227 -    case 0x8c: /* mov Sreg,r/m */ {
  15.228 -        struct segment_register reg;
  15.229 -        enum x86_segment seg = decode_segment(modrm_reg);
  15.230 -        generate_exception_if(seg == decode_segment_failed, EXC_UD, -1);
  15.231 -        fail_if(ops->read_segment == NULL);
  15.232 -        if ( (rc = ops->read_segment(seg, &reg, ctxt)) != 0 )
  15.233 -            goto done;
  15.234 -        dst.val = reg.sel;
  15.235 -        if ( dst.type == OP_MEM )
  15.236 -            dst.bytes = 2;
  15.237 -        break;
  15.238 -    }
  15.239 -
  15.240 -    case 0x8e: /* mov r/m,Sreg */ {
  15.241 -        enum x86_segment seg = decode_segment(modrm_reg);
  15.242 -        generate_exception_if(seg == decode_segment_failed, EXC_UD, -1);
  15.243 -        if ( (rc = load_seg(seg, (uint16_t)src.val, ctxt, ops)) != 0 )
  15.244 -            goto done;
  15.245 -        if ( seg == x86_seg_ss )
  15.246 -            ctxt->retire.flags.mov_ss = 1;
  15.247 -        dst.type = OP_NONE;
  15.248 -        break;
  15.249 -    }
  15.250 -
  15.251 -    case 0x8d: /* lea */
  15.252 -        dst.val = ea.mem.off;
  15.253 -        break;
  15.254 -
  15.255 -    case 0x8f: /* pop (sole member of Grp1a) */
  15.256 -        generate_exception_if((modrm_reg & 7) != 0, EXC_UD, -1);
  15.257 -        /* 64-bit mode: POP defaults to a 64-bit operand. */
  15.258 -        if ( mode_64bit() && (dst.bytes == 4) )
  15.259 -            dst.bytes = 8;
  15.260 -        if ( (rc = read_ulong(x86_seg_ss, sp_post_inc(dst.bytes),
  15.261 -                              &dst.val, dst.bytes, ctxt, ops)) != 0 )
  15.262 -            goto done;
  15.263 -        break;
  15.264 -
  15.265 -    case 0xb0 ... 0xb7: /* mov imm8,r8 */
  15.266 -        dst.reg = decode_register(
  15.267 -            (b & 7) | ((rex_prefix & 1) << 3), &_regs, (rex_prefix == 0));
  15.268 -        dst.val = src.val;
  15.269 -        break;
  15.270 -
  15.271 -    case 0xb8 ... 0xbf: /* mov imm{16,32,64},r{16,32,64} */
  15.272 -        if ( dst.bytes == 8 ) /* Fetch more bytes to obtain imm64 */
  15.273 -            src.val = ((uint32_t)src.val |
  15.274 -                       ((uint64_t)insn_fetch_type(uint32_t) << 32));
  15.275 -        dst.reg = decode_register(
  15.276 -            (b & 7) | ((rex_prefix & 1) << 3), &_regs, 0);
  15.277 -        dst.val = src.val;
  15.278 -        break;
  15.279 -
  15.280 -    case 0xc0 ... 0xc1: grp2: /* Grp2 */
  15.281 -        switch ( modrm_reg & 7 )
  15.282 -        {
  15.283 -        case 0: /* rol */
  15.284 -            emulate_2op_SrcB("rol", src, dst, _regs.eflags);
  15.285 -            break;
  15.286 -        case 1: /* ror */
  15.287 -            emulate_2op_SrcB("ror", src, dst, _regs.eflags);
  15.288 -            break;
  15.289 -        case 2: /* rcl */
  15.290 -            emulate_2op_SrcB("rcl", src, dst, _regs.eflags);
  15.291 -            break;
  15.292 -        case 3: /* rcr */
  15.293 -            emulate_2op_SrcB("rcr", src, dst, _regs.eflags);
  15.294 -            break;
  15.295 -        case 4: /* sal/shl */
  15.296 -        case 6: /* sal/shl */
  15.297 -            emulate_2op_SrcB("sal", src, dst, _regs.eflags);
  15.298 -            break;
  15.299 -        case 5: /* shr */
  15.300 -            emulate_2op_SrcB("shr", src, dst, _regs.eflags);
  15.301 -            break;
  15.302 -        case 7: /* sar */
  15.303 -            emulate_2op_SrcB("sar", src, dst, _regs.eflags);
  15.304 -            break;
  15.305 -        }
  15.306 -        break;
  15.307 -
  15.308 -    case 0xc4: /* les */ {
  15.309 -        unsigned long sel;
  15.310 -        dst.val = x86_seg_es;
  15.311 -    les: /* dst.val identifies the segment */
  15.312 -        generate_exception_if(src.type != OP_MEM, EXC_UD, -1);
  15.313 -        if ( (rc = read_ulong(src.mem.seg, src.mem.off + src.bytes,
  15.314 -                              &sel, 2, ctxt, ops)) != 0 )
  15.315 -            goto done;
  15.316 -        if ( (rc = load_seg(dst.val, (uint16_t)sel, ctxt, ops)) != 0 )
  15.317 -            goto done;
  15.318 -        dst.val = src.val;
  15.319 -        break;
  15.320 -    }
  15.321 -
  15.322 -    case 0xc5: /* lds */
  15.323 -        dst.val = x86_seg_ds;
  15.324 -        goto les;
  15.325 -
  15.326 -    case 0xd0 ... 0xd1: /* Grp2 */
  15.327 -        src.val = 1;
  15.328 -        goto grp2;
  15.329 -
  15.330 -    case 0xd2 ... 0xd3: /* Grp2 */
  15.331 -        src.val = _regs.ecx;
  15.332 -        goto grp2;
  15.333 -
  15.334 -    case 0xf6 ... 0xf7: /* Grp3 */
  15.335 -        switch ( modrm_reg & 7 )
  15.336 -        {
  15.337 -        case 0 ... 1: /* test */
  15.338 -            /* Special case in Grp3: test has an immediate source operand. */
  15.339 -            src.type = OP_IMM;
  15.340 -            src.bytes = (d & ByteOp) ? 1 : op_bytes;
  15.341 -            if ( src.bytes == 8 ) src.bytes = 4;
  15.342 -            switch ( src.bytes )
  15.343 -            {
  15.344 -            case 1: src.val = insn_fetch_type(int8_t);  break;
  15.345 -            case 2: src.val = insn_fetch_type(int16_t); break;
  15.346 -            case 4: src.val = insn_fetch_type(int32_t); break;
  15.347 -            }
  15.348 -            goto test;
  15.349 -        case 2: /* not */
  15.350 -            dst.val = ~dst.val;
  15.351 -            break;
  15.352 -        case 3: /* neg */
  15.353 -            emulate_1op("neg", dst, _regs.eflags);
  15.354 -            break;
  15.355 -        case 4: /* mul */
  15.356 -            src = dst;
  15.357 -            dst.type = OP_REG;
  15.358 -            dst.reg  = (unsigned long *)&_regs.eax;
  15.359 -            dst.val  = *dst.reg;
  15.360 -            _regs.eflags &= ~(EFLG_OF|EFLG_CF);
  15.361 -            switch ( src.bytes )
  15.362 -            {
  15.363 -            case 1:
  15.364 -                dst.val = (uint8_t)dst.val;
  15.365 -                dst.val *= src.val;
  15.366 -                if ( (uint8_t)dst.val != (uint16_t)dst.val )
  15.367 -                    _regs.eflags |= EFLG_OF|EFLG_CF;
  15.368 -                dst.bytes = 2;
  15.369 -                break;
  15.370 -            case 2:
  15.371 -                dst.val = (uint16_t)dst.val;
  15.372 -                dst.val *= src.val;
  15.373 -                if ( (uint16_t)dst.val != (uint32_t)dst.val )
  15.374 -                    _regs.eflags |= EFLG_OF|EFLG_CF;
  15.375 -                *(uint16_t *)&_regs.edx = dst.val >> 16;
  15.376 -                break;
  15.377 -#ifdef __x86_64__
  15.378 -            case 4:
  15.379 -                dst.val = (uint32_t)dst.val;
  15.380 -                dst.val *= src.val;
  15.381 -                if ( (uint32_t)dst.val != dst.val )
  15.382 -                    _regs.eflags |= EFLG_OF|EFLG_CF;
  15.383 -                _regs.edx = (uint32_t)(dst.val >> 32);
  15.384 -                break;
  15.385 -#endif
  15.386 -            default: {
  15.387 -                unsigned long m[2] = { src.val, dst.val };
  15.388 -                if ( mul_dbl(m) )
  15.389 -                    _regs.eflags |= EFLG_OF|EFLG_CF;
  15.390 -                _regs.edx = m[1];
  15.391 -                dst.val  = m[0];
  15.392 -                break;
  15.393 -            }
  15.394 -            }
  15.395 -            break;
  15.396 -        case 5: /* imul */
  15.397 -            src = dst;
  15.398 -            dst.type = OP_REG;
  15.399 -            dst.reg  = (unsigned long *)&_regs.eax;
  15.400 -            dst.val  = *dst.reg;
  15.401 -            _regs.eflags &= ~(EFLG_OF|EFLG_CF);
  15.402 -            switch ( src.bytes )
  15.403 -            {
  15.404 -            case 1:
  15.405 -                dst.val = ((uint16_t)(int8_t)src.val *
  15.406 -                           (uint16_t)(int8_t)dst.val);
  15.407 -                if ( (int8_t)dst.val != (uint16_t)dst.val )
  15.408 -                    _regs.eflags |= EFLG_OF|EFLG_CF;
  15.409 -                dst.bytes = 2;
  15.410 -                break;
  15.411 -            case 2:
  15.412 -                dst.val = ((uint32_t)(int16_t)src.val *
  15.413 -                           (uint32_t)(int16_t)dst.val);
  15.414 -                if ( (int16_t)dst.val != (uint32_t)dst.val )
  15.415 -                    _regs.eflags |= EFLG_OF|EFLG_CF;
  15.416 -                *(uint16_t *)&_regs.edx = dst.val >> 16;
  15.417 -                break;
  15.418 -#ifdef __x86_64__
  15.419 -            case 4:
  15.420 -                dst.val = ((uint64_t)(int32_t)src.val *
  15.421 -                           (uint64_t)(int32_t)dst.val);
  15.422 -                if ( (int32_t)dst.val != dst.val )
  15.423 -                    _regs.eflags |= EFLG_OF|EFLG_CF;
  15.424 -                _regs.edx = (uint32_t)(dst.val >> 32);
  15.425 -                break;
  15.426 -#endif
  15.427 -            default: {
  15.428 -                unsigned long m[2] = { src.val, dst.val };
  15.429 -                if ( imul_dbl(m) )
  15.430 -                    _regs.eflags |= EFLG_OF|EFLG_CF;
  15.431 -                _regs.edx = m[1];
  15.432 -                dst.val  = m[0];
  15.433 -                break;
  15.434 -            }
  15.435 -            }
  15.436 -            break;
  15.437 -        case 6: /* div */ {
  15.438 -            unsigned long u[2], v;
  15.439 -            src = dst;
  15.440 -            dst.type = OP_REG;
  15.441 -            dst.reg  = (unsigned long *)&_regs.eax;
  15.442 -            switch ( src.bytes )
  15.443 -            {
  15.444 -            case 1:
  15.445 -                u[0] = (uint16_t)_regs.eax;
  15.446 -                u[1] = 0;
  15.447 -                v    = (uint8_t)src.val;
  15.448 -                generate_exception_if(
  15.449 -                    div_dbl(u, v) || ((uint8_t)u[0] != (uint16_t)u[0]),
  15.450 -                    EXC_DE, -1);
  15.451 -                dst.val = (uint8_t)u[0];
  15.452 -                ((uint8_t *)&_regs.eax)[1] = u[1];
  15.453 -                break;
  15.454 -            case 2:
  15.455 -                u[0] = ((uint32_t)_regs.edx << 16) | (uint16_t)_regs.eax;
  15.456 -                u[1] = 0;
  15.457 -                v    = (uint16_t)src.val;
  15.458 -                generate_exception_if(
  15.459 -                    div_dbl(u, v) || ((uint16_t)u[0] != (uint32_t)u[0]),
  15.460 -                    EXC_DE, -1);
  15.461 -                dst.val = (uint16_t)u[0];
  15.462 -                *(uint16_t *)&_regs.edx = u[1];
  15.463 -                break;
  15.464 -#ifdef __x86_64__
  15.465 -            case 4:
  15.466 -                u[0] = (_regs.edx << 32) | (uint32_t)_regs.eax;
  15.467 -                u[1] = 0;
  15.468 -                v    = (uint32_t)src.val;
  15.469 -                generate_exception_if(
  15.470 -                    div_dbl(u, v) || ((uint32_t)u[0] != u[0]),
  15.471 -                    EXC_DE, -1);
  15.472 -                dst.val   = (uint32_t)u[0];
  15.473 -                _regs.edx = (uint32_t)u[1];
  15.474 -                break;
  15.475 -#endif
  15.476 -            default:
  15.477 -                u[0] = _regs.eax;
  15.478 -                u[1] = _regs.edx;
  15.479 -                v    = src.val;
  15.480 -                generate_exception_if(div_dbl(u, v), EXC_DE, -1);
  15.481 -                dst.val   = u[0];
  15.482 -                _regs.edx = u[1];
  15.483 -                break;
  15.484 -            }
  15.485 -            break;
  15.486 -        }
  15.487 -        case 7: /* idiv */ {
  15.488 -            unsigned long u[2], v;
  15.489 -            src = dst;
  15.490 -            dst.type = OP_REG;
  15.491 -            dst.reg  = (unsigned long *)&_regs.eax;
  15.492 -            switch ( src.bytes )
  15.493 -            {
  15.494 -            case 1:
  15.495 -                u[0] = (int16_t)_regs.eax;
  15.496 -                u[1] = ((long)u[0] < 0) ? ~0UL : 0UL;
  15.497 -                v    = (int8_t)src.val;
  15.498 -                generate_exception_if(
  15.499 -                    idiv_dbl(u, v) || ((int8_t)u[0] != (int16_t)u[0]),
  15.500 -                    EXC_DE, -1);
  15.501 -                dst.val = (int8_t)u[0];
  15.502 -                ((int8_t *)&_regs.eax)[1] = u[1];
  15.503 -                break;
  15.504 -            case 2:
  15.505 -                u[0] = (int32_t)((_regs.edx << 16) | (uint16_t)_regs.eax);
  15.506 -                u[1] = ((long)u[0] < 0) ? ~0UL : 0UL;
  15.507 -                v    = (int16_t)src.val;
  15.508 -                generate_exception_if(
  15.509 -                    idiv_dbl(u, v) || ((int16_t)u[0] != (int32_t)u[0]),
  15.510 -                    EXC_DE, -1);
  15.511 -                dst.val = (int16_t)u[0];
  15.512 -                *(int16_t *)&_regs.edx = u[1];
  15.513 -                break;
  15.514 -#ifdef __x86_64__
  15.515 -            case 4:
  15.516 -                u[0] = (_regs.edx << 32) | (uint32_t)_regs.eax;
  15.517 -                u[1] = ((long)u[0] < 0) ? ~0UL : 0UL;
  15.518 -                v    = (int32_t)src.val;
  15.519 -                generate_exception_if(
  15.520 -                    idiv_dbl(u, v) || ((int32_t)u[0] != u[0]),
  15.521 -                    EXC_DE, -1);
  15.522 -                dst.val   = (int32_t)u[0];
  15.523 -                _regs.edx = (uint32_t)u[1];
  15.524 -                break;
  15.525 -#endif
  15.526 -            default:
  15.527 -                u[0] = _regs.eax;
  15.528 -                u[1] = _regs.edx;
  15.529 -                v    = src.val;
  15.530 -                generate_exception_if(idiv_dbl(u, v), EXC_DE, -1);
  15.531 -                dst.val   = u[0];
  15.532 -                _regs.edx = u[1];
  15.533 -                break;
  15.534 -            }
  15.535 -            break;
  15.536 -        }
  15.537 -        default:
  15.538 -            goto cannot_emulate;
  15.539 -        }
  15.540 -        break;
  15.541 -
  15.542 -    case 0xfe: /* Grp4 */
  15.543 -        generate_exception_if((modrm_reg & 7) >= 2, EXC_UD, -1);
  15.544 -    case 0xff: /* Grp5 */
  15.545 -        switch ( modrm_reg & 7 )
  15.546 -        {
  15.547 -        case 0: /* inc */
  15.548 -            emulate_1op("inc", dst, _regs.eflags);
  15.549 -            break;
  15.550 -        case 1: /* dec */
  15.551 -            emulate_1op("dec", dst, _regs.eflags);
  15.552 -            break;
  15.553 -        case 2: /* call (near) */
  15.554 -        case 4: /* jmp (near) */
  15.555 -            if ( (dst.bytes != 8) && mode_64bit() )
  15.556 -            {
  15.557 -                dst.bytes = op_bytes = 8;
  15.558 -                if ( dst.type == OP_REG )
  15.559 -                    dst.val = *dst.reg;
  15.560 -                else if ( (rc = read_ulong(dst.mem.seg, dst.mem.off,
  15.561 -                                           &dst.val, 8, ctxt, ops)) != 0 )
  15.562 -                    goto done;
  15.563 -            }
  15.564 -            src.val = _regs.eip;
  15.565 -            _regs.eip = dst.val;
  15.566 -            if ( (modrm_reg & 7) == 2 )
  15.567 -                goto push; /* call */
  15.568 -            dst.type = OP_NONE;
  15.569 -            break;
  15.570 -        case 3: /* call (far, absolute indirect) */
  15.571 -        case 5: /* jmp (far, absolute indirect) */ {
  15.572 -            unsigned long sel;
  15.573 -
  15.574 -            generate_exception_if(dst.type != OP_MEM, EXC_UD, -1);
  15.575 -
  15.576 -            if ( (rc = read_ulong(dst.mem.seg, dst.mem.off+dst.bytes,
  15.577 -                                  &sel, 2, ctxt, ops)) )
  15.578 -                goto done;
  15.579 -
  15.580 -            if ( (modrm_reg & 7) == 3 ) /* call */
  15.581 -            {
  15.582 -                struct segment_register reg;
  15.583 -                fail_if(ops->read_segment == NULL);
  15.584 -                if ( (rc = ops->read_segment(x86_seg_cs, &reg, ctxt)) ||
  15.585 -                     (rc = ops->write(x86_seg_ss, sp_pre_dec(op_bytes),
  15.586 -                                      &reg.sel, op_bytes, ctxt)) ||
  15.587 -                     (rc = ops->write(x86_seg_ss, sp_pre_dec(op_bytes),
  15.588 -                                      &_regs.eip, op_bytes, ctxt)) )
  15.589 -                    goto done;
  15.590 -            }
  15.591 -
  15.592 -            if ( (rc = load_seg(x86_seg_cs, sel, ctxt, ops)) != 0 )
  15.593 -                goto done;
  15.594 -            _regs.eip = dst.val;
  15.595 -
  15.596 -            dst.type = OP_NONE;
  15.597 -            break;
  15.598 -        }
  15.599 -        case 6: /* push */
  15.600 -            /* 64-bit mode: PUSH defaults to a 64-bit operand. */
  15.601 -            if ( mode_64bit() && (dst.bytes == 4) )
  15.602 -            {
  15.603 -                dst.bytes = 8;
  15.604 -                if ( dst.type == OP_REG )
  15.605 -                    dst.val = *dst.reg;
  15.606 -                else if ( (rc = read_ulong(dst.mem.seg, dst.mem.off,
  15.607 -                                           &dst.val, 8, ctxt, ops)) != 0 )
  15.608 -                    goto done;
  15.609 -            }
  15.610 -            if ( (rc = ops->write(x86_seg_ss, sp_pre_dec(dst.bytes),
  15.611 -                                  &dst.val, dst.bytes, ctxt)) != 0 )
  15.612 -                goto done;
  15.613 -            dst.type = OP_NONE;
  15.614 -            break;
  15.615 -        case 7:
  15.616 -            generate_exception_if(1, EXC_UD, -1);
  15.617 -        default:
  15.618 -            goto cannot_emulate;
  15.619 -        }
  15.620 -        break;
  15.621 -    }
  15.622 -
  15.623 - writeback:
  15.624 -    switch ( dst.type )
  15.625 -    {
  15.626 -    case OP_REG:
  15.627 -        /* The 4-byte case *is* correct: in 64-bit mode we zero-extend. */
  15.628 -        switch ( dst.bytes )
  15.629 -        {
  15.630 -        case 1: *(uint8_t  *)dst.reg = (uint8_t)dst.val; break;
  15.631 -        case 2: *(uint16_t *)dst.reg = (uint16_t)dst.val; break;
  15.632 -        case 4: *dst.reg = (uint32_t)dst.val; break; /* 64b: zero-ext */
  15.633 -        case 8: *dst.reg = dst.val; break;
  15.634 -        }
  15.635 -        break;
  15.636 -    case OP_MEM:
  15.637 -        if ( !(d & Mov) && (dst.orig_val == dst.val) &&
  15.638 -             !ctxt->force_writeback )
  15.639 -            /* nothing to do */;
  15.640 -        else if ( lock_prefix )
  15.641 -            rc = ops->cmpxchg(
  15.642 -                dst.mem.seg, dst.mem.off, &dst.orig_val,
  15.643 -                &dst.val, dst.bytes, ctxt);
  15.644 -        else
  15.645 -            rc = ops->write(
  15.646 -                dst.mem.seg, dst.mem.off, &dst.val, dst.bytes, ctxt);
  15.647 -        if ( rc != 0 )
  15.648 -            goto done;
  15.649 -    default:
  15.650 -        break;
  15.651 -    }
  15.652 -
  15.653 -    /* Inject #DB if single-step tracing was enabled at instruction start. */
  15.654 -    if ( (ctxt->regs->eflags & EFLG_TF) && (rc == X86EMUL_OKAY) &&
  15.655 -         (ops->inject_hw_exception != NULL) )
  15.656 -        rc = ops->inject_hw_exception(EXC_DB, -1, ctxt) ? : X86EMUL_EXCEPTION;
  15.657 -
  15.658 -    /* Commit shadow register state. */
  15.659 -    _regs.eflags &= ~EFLG_RF;
  15.660 -    *ctxt->regs = _regs;
  15.661 -
  15.662 - done:
  15.663 -    return rc;
  15.664 -
  15.665 - special_insn:
  15.666 -    dst.type = OP_NONE;
  15.667 -
  15.668 -    /*
  15.669 -     * The only implicit-operands instructions allowed a LOCK prefix are
  15.670 -     * CMPXCHG{8,16}B, MOV CRn, MOV DRn.
  15.671 -     */
  15.672 -    generate_exception_if(lock_prefix &&
  15.673 -                          ((b < 0x20) || (b > 0x23)) && /* MOV CRn/DRn */
  15.674 -                          (b != 0xc7),                  /* CMPXCHG{8,16}B */
  15.675 -                          EXC_GP, 0);
  15.676 -
  15.677 -    if ( twobyte )
  15.678 -        goto twobyte_special_insn;
  15.679 -
  15.680 -    switch ( b )
  15.681 -    {
  15.682      case 0x06: /* push %%es */ {
  15.683          struct segment_register reg;
  15.684          src.val = x86_seg_es;
  15.685 @@ -2370,12 +1804,90 @@ x86_emulate(
  15.686          break;
  15.687      }
  15.688  
  15.689 +    case 0x62: /* bound */ {
  15.690 +        unsigned long src_val2;
  15.691 +        int lb, ub, idx;
  15.692 +        generate_exception_if(mode_64bit() || (src.type != OP_MEM),
  15.693 +                              EXC_UD, -1);
  15.694 +        if ( (rc = read_ulong(src.mem.seg, src.mem.off + op_bytes,
  15.695 +                              &src_val2, op_bytes, ctxt, ops)) )
  15.696 +            goto done;
  15.697 +        ub  = (op_bytes == 2) ? (int16_t)src_val2 : (int32_t)src_val2;
  15.698 +        lb  = (op_bytes == 2) ? (int16_t)src.val  : (int32_t)src.val;
  15.699 +        idx = (op_bytes == 2) ? (int16_t)dst.val  : (int32_t)dst.val;
  15.700 +        generate_exception_if((idx < lb) || (idx > ub), EXC_BR, -1);
  15.701 +        dst.type = OP_NONE;
  15.702 +        break;
  15.703 +    }
  15.704 +
  15.705 +    case 0x63: /* movsxd (x86/64) / arpl (x86/32) */
  15.706 +        if ( mode_64bit() )
  15.707 +        {
  15.708 +            /* movsxd */
  15.709 +            if ( src.type == OP_REG )
  15.710 +                src.val = *(int32_t *)src.reg;
  15.711 +            else if ( (rc = read_ulong(src.mem.seg, src.mem.off,
  15.712 +                                       &src.val, 4, ctxt, ops)) )
  15.713 +                goto done;
  15.714 +            dst.val = (int32_t)src.val;
  15.715 +        }
  15.716 +        else
  15.717 +        {
  15.718 +            /* arpl */
  15.719 +            uint16_t src_val = dst.val;
  15.720 +            dst = src;
  15.721 +            _regs.eflags &= ~EFLG_ZF;
  15.722 +            _regs.eflags |= ((src_val & 3) > (dst.val & 3)) ? EFLG_ZF : 0;
  15.723 +            if ( _regs.eflags & EFLG_ZF )
  15.724 +                dst.val  = (dst.val & ~3) | (src_val & 3);
  15.725 +            else
  15.726 +                dst.type = OP_NONE;
  15.727 +            generate_exception_if(!in_protmode(ctxt, ops), EXC_UD, -1);
  15.728 +        }
  15.729 +        break;
  15.730 +
  15.731      case 0x68: /* push imm{16,32,64} */
  15.732          src.val = ((op_bytes == 2)
  15.733                     ? (int32_t)insn_fetch_type(int16_t)
  15.734                     : insn_fetch_type(int32_t));
  15.735          goto push;
  15.736  
  15.737 +    case 0x69: /* imul imm16/32 */
  15.738 +    case 0x6b: /* imul imm8 */ {
  15.739 +        unsigned long src1; /* ModR/M source operand */
  15.740 +        if ( ea.type == OP_REG )
  15.741 +            src1 = *ea.reg;
  15.742 +        else if ( (rc = read_ulong(ea.mem.seg, ea.mem.off,
  15.743 +                                   &src1, op_bytes, ctxt, ops)) )
  15.744 +            goto done;
  15.745 +        _regs.eflags &= ~(EFLG_OF|EFLG_CF);
  15.746 +        switch ( dst.bytes )
  15.747 +        {
  15.748 +        case 2:
  15.749 +            dst.val = ((uint32_t)(int16_t)src.val *
  15.750 +                       (uint32_t)(int16_t)src1);
  15.751 +            if ( (int16_t)dst.val != (uint32_t)dst.val )
  15.752 +                _regs.eflags |= EFLG_OF|EFLG_CF;
  15.753 +            break;
  15.754 +#ifdef __x86_64__
  15.755 +        case 4:
  15.756 +            dst.val = ((uint64_t)(int32_t)src.val *
  15.757 +                       (uint64_t)(int32_t)src1);
  15.758 +            if ( (int32_t)dst.val != dst.val )
  15.759 +                _regs.eflags |= EFLG_OF|EFLG_CF;
  15.760 +            break;
  15.761 +#endif
  15.762 +        default: {
  15.763 +            unsigned long m[2] = { src.val, src1 };
  15.764 +            if ( imul_dbl(m) )
  15.765 +                _regs.eflags |= EFLG_OF|EFLG_CF;
  15.766 +            dst.val = m[0];
  15.767 +            break;
  15.768 +        }
  15.769 +        }
  15.770 +        break;
  15.771 +    }
  15.772 +
  15.773      case 0x6a: /* push imm8 */
  15.774          src.val = insn_fetch_type(int8_t);
  15.775      push:
  15.776 @@ -2457,6 +1969,88 @@ x86_emulate(
  15.777          break;
  15.778      }
  15.779  
  15.780 +    case 0x82: /* Grp1 (x86/32 only) */
  15.781 +        generate_exception_if(mode_64bit(), EXC_UD, -1);
  15.782 +    case 0x80: case 0x81: case 0x83: /* Grp1 */
  15.783 +        switch ( modrm_reg & 7 )
  15.784 +        {
  15.785 +        case 0: goto add;
  15.786 +        case 1: goto or;
  15.787 +        case 2: goto adc;
  15.788 +        case 3: goto sbb;
  15.789 +        case 4: goto and;
  15.790 +        case 5: goto sub;
  15.791 +        case 6: goto xor;
  15.792 +        case 7: goto cmp;
  15.793 +        }
  15.794 +        break;
  15.795 +
  15.796 +    case 0xa8 ... 0xa9: /* test imm,%%eax */
  15.797 +        dst.reg = (unsigned long *)&_regs.eax;
  15.798 +        dst.val = _regs.eax;
  15.799 +    case 0x84 ... 0x85: test: /* test */
  15.800 +        emulate_2op_SrcV("test", src, dst, _regs.eflags);
  15.801 +        dst.type = OP_NONE;
  15.802 +        break;
  15.803 +
  15.804 +    case 0x86 ... 0x87: xchg: /* xchg */
  15.805 +        /* Write back the register source. */
  15.806 +        switch ( dst.bytes )
  15.807 +        {
  15.808 +        case 1: *(uint8_t  *)src.reg = (uint8_t)dst.val; break;
  15.809 +        case 2: *(uint16_t *)src.reg = (uint16_t)dst.val; break;
  15.810 +        case 4: *src.reg = (uint32_t)dst.val; break; /* 64b reg: zero-extend */
  15.811 +        case 8: *src.reg = dst.val; break;
  15.812 +        }
  15.813 +        /* Write back the memory destination with implicit LOCK prefix. */
  15.814 +        dst.val = src.val;
  15.815 +        lock_prefix = 1;
  15.816 +        break;
  15.817 +
  15.818 +    case 0xc6 ... 0xc7: /* mov (sole member of Grp11) */
  15.819 +        generate_exception_if((modrm_reg & 7) != 0, EXC_UD, -1);
  15.820 +    case 0x88 ... 0x8b: /* mov */
  15.821 +        dst.val = src.val;
  15.822 +        break;
  15.823 +
  15.824 +    case 0x8c: /* mov Sreg,r/m */ {
  15.825 +        struct segment_register reg;
  15.826 +        enum x86_segment seg = decode_segment(modrm_reg);
  15.827 +        generate_exception_if(seg == decode_segment_failed, EXC_UD, -1);
  15.828 +        fail_if(ops->read_segment == NULL);
  15.829 +        if ( (rc = ops->read_segment(seg, &reg, ctxt)) != 0 )
  15.830 +            goto done;
  15.831 +        dst.val = reg.sel;
  15.832 +        if ( dst.type == OP_MEM )
  15.833 +            dst.bytes = 2;
  15.834 +        break;
  15.835 +    }
  15.836 +
  15.837 +    case 0x8e: /* mov r/m,Sreg */ {
  15.838 +        enum x86_segment seg = decode_segment(modrm_reg);
  15.839 +        generate_exception_if(seg == decode_segment_failed, EXC_UD, -1);
  15.840 +        if ( (rc = load_seg(seg, (uint16_t)src.val, ctxt, ops)) != 0 )
  15.841 +            goto done;
  15.842 +        if ( seg == x86_seg_ss )
  15.843 +            ctxt->retire.flags.mov_ss = 1;
  15.844 +        dst.type = OP_NONE;
  15.845 +        break;
  15.846 +    }
  15.847 +
  15.848 +    case 0x8d: /* lea */
  15.849 +        dst.val = ea.mem.off;
  15.850 +        break;
  15.851 +
  15.852 +    case 0x8f: /* pop (sole member of Grp1a) */
  15.853 +        generate_exception_if((modrm_reg & 7) != 0, EXC_UD, -1);
  15.854 +        /* 64-bit mode: POP defaults to a 64-bit operand. */
  15.855 +        if ( mode_64bit() && (dst.bytes == 4) )
  15.856 +            dst.bytes = 8;
  15.857 +        if ( (rc = read_ulong(x86_seg_ss, sp_post_inc(dst.bytes),
  15.858 +                              &dst.val, dst.bytes, ctxt, ops)) != 0 )
  15.859 +            goto done;
  15.860 +        break;
  15.861 +
  15.862      case 0x90: /* nop / xchg %%r8,%%rax */
  15.863          if ( !(rex_prefix & 1) )
  15.864              break; /* nop */
  15.865 @@ -2673,6 +2267,49 @@ x86_emulate(
  15.866          break;
  15.867      }
  15.868  
  15.869 +    case 0xb0 ... 0xb7: /* mov imm8,r8 */
  15.870 +        dst.reg = decode_register(
  15.871 +            (b & 7) | ((rex_prefix & 1) << 3), &_regs, (rex_prefix == 0));
  15.872 +        dst.val = src.val;
  15.873 +        break;
  15.874 +
  15.875 +    case 0xb8 ... 0xbf: /* mov imm{16,32,64},r{16,32,64} */
  15.876 +        if ( dst.bytes == 8 ) /* Fetch more bytes to obtain imm64 */
  15.877 +            src.val = ((uint32_t)src.val |
  15.878 +                       ((uint64_t)insn_fetch_type(uint32_t) << 32));
  15.879 +        dst.reg = decode_register(
  15.880 +            (b & 7) | ((rex_prefix & 1) << 3), &_regs, 0);
  15.881 +        dst.val = src.val;
  15.882 +        break;
  15.883 +
  15.884 +    case 0xc0 ... 0xc1: grp2: /* Grp2 */
  15.885 +        switch ( modrm_reg & 7 )
  15.886 +        {
  15.887 +        case 0: /* rol */
  15.888 +            emulate_2op_SrcB("rol", src, dst, _regs.eflags);
  15.889 +            break;
  15.890 +        case 1: /* ror */
  15.891 +            emulate_2op_SrcB("ror", src, dst, _regs.eflags);
  15.892 +            break;
  15.893 +        case 2: /* rcl */
  15.894 +            emulate_2op_SrcB("rcl", src, dst, _regs.eflags);
  15.895 +            break;
  15.896 +        case 3: /* rcr */
  15.897 +            emulate_2op_SrcB("rcr", src, dst, _regs.eflags);
  15.898 +            break;
  15.899 +        case 4: /* sal/shl */
  15.900 +        case 6: /* sal/shl */
  15.901 +            emulate_2op_SrcB("sal", src, dst, _regs.eflags);
  15.902 +            break;
  15.903 +        case 5: /* shr */
  15.904 +            emulate_2op_SrcB("shr", src, dst, _regs.eflags);
  15.905 +            break;
  15.906 +        case 7: /* sar */
  15.907 +            emulate_2op_SrcB("sar", src, dst, _regs.eflags);
  15.908 +            break;
  15.909 +        }
  15.910 +        break;
  15.911 +
  15.912      case 0xc2: /* ret imm16 (near) */
  15.913      case 0xc3: /* ret (near) */ {
  15.914          int offset = (b == 0xc2) ? insn_fetch_type(uint16_t) : 0;
  15.915 @@ -2684,6 +2321,24 @@ x86_emulate(
  15.916          break;
  15.917      }
  15.918  
  15.919 +    case 0xc4: /* les */ {
  15.920 +        unsigned long sel;
  15.921 +        dst.val = x86_seg_es;
  15.922 +    les: /* dst.val identifies the segment */
  15.923 +        generate_exception_if(src.type != OP_MEM, EXC_UD, -1);
  15.924 +        if ( (rc = read_ulong(src.mem.seg, src.mem.off + src.bytes,
  15.925 +                              &sel, 2, ctxt, ops)) != 0 )
  15.926 +            goto done;
  15.927 +        if ( (rc = load_seg(dst.val, (uint16_t)sel, ctxt, ops)) != 0 )
  15.928 +            goto done;
  15.929 +        dst.val = src.val;
  15.930 +        break;
  15.931 +    }
  15.932 +
  15.933 +    case 0xc5: /* lds */
  15.934 +        dst.val = x86_seg_ds;
  15.935 +        goto les;
  15.936 +
  15.937      case 0xc8: /* enter imm16,imm8 */ {
  15.938          uint16_t size = insn_fetch_type(uint16_t);
  15.939          uint8_t depth = insn_fetch_type(uint8_t) & 31;
  15.940 @@ -2800,6 +2455,14 @@ x86_emulate(
  15.941          break;
  15.942      }
  15.943  
  15.944 +    case 0xd0 ... 0xd1: /* Grp2 */
  15.945 +        src.val = 1;
  15.946 +        goto grp2;
  15.947 +
  15.948 +    case 0xd2 ... 0xd3: /* Grp2 */
  15.949 +        src.val = _regs.ecx;
  15.950 +        goto grp2;
  15.951 +
  15.952      case 0xd4: /* aam */ {
  15.953          unsigned int base = insn_fetch_type(uint8_t);
  15.954          uint8_t al = _regs.eax;
  15.955 @@ -3436,6 +3099,214 @@ x86_emulate(
  15.956          _regs.eflags ^= EFLG_CF;
  15.957          break;
  15.958  
  15.959 +    case 0xf6 ... 0xf7: /* Grp3 */
  15.960 +        switch ( modrm_reg & 7 )
  15.961 +        {
  15.962 +        case 0 ... 1: /* test */
  15.963 +            /* Special case in Grp3: test has an immediate source operand. */
  15.964 +            src.type = OP_IMM;
  15.965 +            src.bytes = (d & ByteOp) ? 1 : op_bytes;
  15.966 +            if ( src.bytes == 8 ) src.bytes = 4;
  15.967 +            switch ( src.bytes )
  15.968 +            {
  15.969 +            case 1: src.val = insn_fetch_type(int8_t);  break;
  15.970 +            case 2: src.val = insn_fetch_type(int16_t); break;
  15.971 +            case 4: src.val = insn_fetch_type(int32_t); break;
  15.972 +            }
  15.973 +            goto test;
  15.974 +        case 2: /* not */
  15.975 +            dst.val = ~dst.val;
  15.976 +            break;
  15.977 +        case 3: /* neg */
  15.978 +            emulate_1op("neg", dst, _regs.eflags);
  15.979 +            break;
  15.980 +        case 4: /* mul */
  15.981 +            src = dst;
  15.982 +            dst.type = OP_REG;
  15.983 +            dst.reg  = (unsigned long *)&_regs.eax;
  15.984 +            dst.val  = *dst.reg;
  15.985 +            _regs.eflags &= ~(EFLG_OF|EFLG_CF);
  15.986 +            switch ( src.bytes )
  15.987 +            {
  15.988 +            case 1:
  15.989 +                dst.val = (uint8_t)dst.val;
  15.990 +                dst.val *= src.val;
  15.991 +                if ( (uint8_t)dst.val != (uint16_t)dst.val )
  15.992 +                    _regs.eflags |= EFLG_OF|EFLG_CF;
  15.993 +                dst.bytes = 2;
  15.994 +                break;
  15.995 +            case 2:
  15.996 +                dst.val = (uint16_t)dst.val;
  15.997 +                dst.val *= src.val;
  15.998 +                if ( (uint16_t)dst.val != (uint32_t)dst.val )
  15.999 +                    _regs.eflags |= EFLG_OF|EFLG_CF;
 15.1000 +                *(uint16_t *)&_regs.edx = dst.val >> 16;
 15.1001 +                break;
 15.1002 +#ifdef __x86_64__
 15.1003 +            case 4:
 15.1004 +                dst.val = (uint32_t)dst.val;
 15.1005 +                dst.val *= src.val;
 15.1006 +                if ( (uint32_t)dst.val != dst.val )
 15.1007 +                    _regs.eflags |= EFLG_OF|EFLG_CF;
 15.1008 +                _regs.edx = (uint32_t)(dst.val >> 32);
 15.1009 +                break;
 15.1010 +#endif
 15.1011 +            default: {
 15.1012 +                unsigned long m[2] = { src.val, dst.val };
 15.1013 +                if ( mul_dbl(m) )
 15.1014 +                    _regs.eflags |= EFLG_OF|EFLG_CF;
 15.1015 +                _regs.edx = m[1];
 15.1016 +                dst.val  = m[0];
 15.1017 +                break;
 15.1018 +            }
 15.1019 +            }
 15.1020 +            break;
 15.1021 +        case 5: /* imul */
 15.1022 +            src = dst;
 15.1023 +            dst.type = OP_REG;
 15.1024 +            dst.reg  = (unsigned long *)&_regs.eax;
 15.1025 +            dst.val  = *dst.reg;
 15.1026 +            _regs.eflags &= ~(EFLG_OF|EFLG_CF);
 15.1027 +            switch ( src.bytes )
 15.1028 +            {
 15.1029 +            case 1:
 15.1030 +                dst.val = ((uint16_t)(int8_t)src.val *
 15.1031 +                           (uint16_t)(int8_t)dst.val);
 15.1032 +                if ( (int8_t)dst.val != (uint16_t)dst.val )
 15.1033 +                    _regs.eflags |= EFLG_OF|EFLG_CF;
 15.1034 +                dst.bytes = 2;
 15.1035 +                break;
 15.1036 +            case 2:
 15.1037 +                dst.val = ((uint32_t)(int16_t)src.val *
 15.1038 +                           (uint32_t)(int16_t)dst.val);
 15.1039 +                if ( (int16_t)dst.val != (uint32_t)dst.val )
 15.1040 +                    _regs.eflags |= EFLG_OF|EFLG_CF;
 15.1041 +                *(uint16_t *)&_regs.edx = dst.val >> 16;
 15.1042 +                break;
 15.1043 +#ifdef __x86_64__
 15.1044 +            case 4:
 15.1045 +                dst.val = ((uint64_t)(int32_t)src.val *
 15.1046 +                           (uint64_t)(int32_t)dst.val);
 15.1047 +                if ( (int32_t)dst.val != dst.val )
 15.1048 +                    _regs.eflags |= EFLG_OF|EFLG_CF;
 15.1049 +                _regs.edx = (uint32_t)(dst.val >> 32);
 15.1050 +                break;
 15.1051 +#endif
 15.1052 +            default: {
 15.1053 +                unsigned long m[2] = { src.val, dst.val };
 15.1054 +                if ( imul_dbl(m) )
 15.1055 +                    _regs.eflags |= EFLG_OF|EFLG_CF;
 15.1056 +                _regs.edx = m[1];
 15.1057 +                dst.val  = m[0];
 15.1058 +                break;
 15.1059 +            }
 15.1060 +            }
 15.1061 +            break;
 15.1062 +        case 6: /* div */ {
 15.1063 +            unsigned long u[2], v;
 15.1064 +            src = dst;
 15.1065 +            dst.type = OP_REG;
 15.1066 +            dst.reg  = (unsigned long *)&_regs.eax;
 15.1067 +            switch ( src.bytes )
 15.1068 +            {
 15.1069 +            case 1:
 15.1070 +                u[0] = (uint16_t)_regs.eax;
 15.1071 +                u[1] = 0;
 15.1072 +                v    = (uint8_t)src.val;
 15.1073 +                generate_exception_if(
 15.1074 +                    div_dbl(u, v) || ((uint8_t)u[0] != (uint16_t)u[0]),
 15.1075 +                    EXC_DE, -1);
 15.1076 +                dst.val = (uint8_t)u[0];
 15.1077 +                ((uint8_t *)&_regs.eax)[1] = u[1];
 15.1078 +                break;
 15.1079 +            case 2:
 15.1080 +                u[0] = ((uint32_t)_regs.edx << 16) | (uint16_t)_regs.eax;
 15.1081 +                u[1] = 0;
 15.1082 +                v    = (uint16_t)src.val;
 15.1083 +                generate_exception_if(
 15.1084 +                    div_dbl(u, v) || ((uint16_t)u[0] != (uint32_t)u[0]),
 15.1085 +                    EXC_DE, -1);
 15.1086 +                dst.val = (uint16_t)u[0];
 15.1087 +                *(uint16_t *)&_regs.edx = u[1];
 15.1088 +                break;
 15.1089 +#ifdef __x86_64__
 15.1090 +            case 4:
 15.1091 +                u[0] = (_regs.edx << 32) | (uint32_t)_regs.eax;
 15.1092 +                u[1] = 0;
 15.1093 +                v    = (uint32_t)src.val;
 15.1094 +                generate_exception_if(
 15.1095 +                    div_dbl(u, v) || ((uint32_t)u[0] != u[0]),
 15.1096 +                    EXC_DE, -1);
 15.1097 +                dst.val   = (uint32_t)u[0];
 15.1098 +                _regs.edx = (uint32_t)u[1];
 15.1099 +                break;
 15.1100 +#endif
 15.1101 +            default:
 15.1102 +                u[0] = _regs.eax;
 15.1103 +                u[1] = _regs.edx;
 15.1104 +                v    = src.val;
 15.1105 +                generate_exception_if(div_dbl(u, v), EXC_DE, -1);
 15.1106 +                dst.val   = u[0];
 15.1107 +                _regs.edx = u[1];
 15.1108 +                break;
 15.1109 +            }
 15.1110 +            break;
 15.1111 +        }
 15.1112 +        case 7: /* idiv */ {
 15.1113 +            unsigned long u[2], v;
 15.1114 +            src = dst;
 15.1115 +            dst.type = OP_REG;
 15.1116 +            dst.reg  = (unsigned long *)&_regs.eax;
 15.1117 +            switch ( src.bytes )
 15.1118 +            {
 15.1119 +            case 1:
 15.1120 +                u[0] = (int16_t)_regs.eax;
 15.1121 +                u[1] = ((long)u[0] < 0) ? ~0UL : 0UL;
 15.1122 +                v    = (int8_t)src.val;
 15.1123 +                generate_exception_if(
 15.1124 +                    idiv_dbl(u, v) || ((int8_t)u[0] != (int16_t)u[0]),
 15.1125 +                    EXC_DE, -1);
 15.1126 +                dst.val = (int8_t)u[0];
 15.1127 +                ((int8_t *)&_regs.eax)[1] = u[1];
 15.1128 +                break;
 15.1129 +            case 2:
 15.1130 +                u[0] = (int32_t)((_regs.edx << 16) | (uint16_t)_regs.eax);
 15.1131 +                u[1] = ((long)u[0] < 0) ? ~0UL : 0UL;
 15.1132 +                v    = (int16_t)src.val;
 15.1133 +                generate_exception_if(
 15.1134 +                    idiv_dbl(u, v) || ((int16_t)u[0] != (int32_t)u[0]),
 15.1135 +                    EXC_DE, -1);
 15.1136 +                dst.val = (int16_t)u[0];
 15.1137 +                *(int16_t *)&_regs.edx = u[1];
 15.1138 +                break;
 15.1139 +#ifdef __x86_64__
 15.1140 +            case 4:
 15.1141 +                u[0] = (_regs.edx << 32) | (uint32_t)_regs.eax;
 15.1142 +                u[1] = ((long)u[0] < 0) ? ~0UL : 0UL;
 15.1143 +                v    = (int32_t)src.val;
 15.1144 +                generate_exception_if(
 15.1145 +                    idiv_dbl(u, v) || ((int32_t)u[0] != u[0]),
 15.1146 +                    EXC_DE, -1);
 15.1147 +                dst.val   = (int32_t)u[0];
 15.1148 +                _regs.edx = (uint32_t)u[1];
 15.1149 +                break;
 15.1150 +#endif
 15.1151 +            default:
 15.1152 +                u[0] = _regs.eax;
 15.1153 +                u[1] = _regs.edx;
 15.1154 +                v    = src.val;
 15.1155 +                generate_exception_if(idiv_dbl(u, v), EXC_DE, -1);
 15.1156 +                dst.val   = u[0];
 15.1157 +                _regs.edx = u[1];
 15.1158 +                break;
 15.1159 +            }
 15.1160 +            break;
 15.1161 +        }
 15.1162 +        default:
 15.1163 +            goto cannot_emulate;
 15.1164 +        }
 15.1165 +        break;
 15.1166 +
 15.1167      case 0xf8: /* clc */
 15.1168          _regs.eflags &= ~EFLG_CF;
 15.1169          break;
 15.1170 @@ -3465,201 +3336,131 @@ x86_emulate(
 15.1171      case 0xfd: /* std */
 15.1172          _regs.eflags |= EFLG_DF;
 15.1173          break;
 15.1174 -    }
 15.1175 -    goto writeback;
 15.1176 -
 15.1177 - twobyte_insn:
 15.1178 -    switch ( b )
 15.1179 -    {
 15.1180 -    case 0x40 ... 0x4f: /* cmovcc */
 15.1181 -        dst.val = src.val;
 15.1182 -        if ( !test_cc(b, _regs.eflags) )
 15.1183 -            dst.type = OP_NONE;
 15.1184 -        break;
 15.1185 -
 15.1186 -    case 0x90 ... 0x9f: /* setcc */
 15.1187 -        dst.val = test_cc(b, _regs.eflags);
 15.1188 -        break;
 15.1189 -
 15.1190 -    case 0xb0 ... 0xb1: /* cmpxchg */
 15.1191 -        /* Save real source value, then compare EAX against destination. */
 15.1192 -        src.orig_val = src.val;
 15.1193 -        src.val = _regs.eax;
 15.1194 -        emulate_2op_SrcV("cmp", src, dst, _regs.eflags);
 15.1195 -        if ( _regs.eflags & EFLG_ZF )
 15.1196 +
 15.1197 +    case 0xfe: /* Grp4 */
 15.1198 +        generate_exception_if((modrm_reg & 7) >= 2, EXC_UD, -1);
 15.1199 +    case 0xff: /* Grp5 */
 15.1200 +        switch ( modrm_reg & 7 )
 15.1201          {
 15.1202 -            /* Success: write back to memory. */
 15.1203 -            dst.val = src.orig_val;
 15.1204 -        }
 15.1205 -        else
 15.1206 -        {
 15.1207 -            /* Failure: write the value we saw to EAX. */
 15.1208 -            dst.type = OP_REG;
 15.1209 -            dst.reg  = (unsigned long *)&_regs.eax;
 15.1210 -        }
 15.1211 -        break;
 15.1212 -
 15.1213 -    case 0xa3: bt: /* bt */
 15.1214 -        emulate_2op_SrcV_nobyte("bt", src, dst, _regs.eflags);
 15.1215 -        dst.type = OP_NONE;
 15.1216 -        break;
 15.1217 -
 15.1218 -    case 0xa4: /* shld imm8,r,r/m */
 15.1219 -    case 0xa5: /* shld %%cl,r,r/m */
 15.1220 -    case 0xac: /* shrd imm8,r,r/m */
 15.1221 -    case 0xad: /* shrd %%cl,r,r/m */ {
 15.1222 -        uint8_t shift, width = dst.bytes << 3;
 15.1223 -        shift = (b & 1) ? (uint8_t)_regs.ecx : insn_fetch_type(uint8_t);
 15.1224 -        if ( (shift &= width - 1) == 0 )
 15.1225 +        case 0: /* inc */
 15.1226 +            emulate_1op("inc", dst, _regs.eflags);
 15.1227              break;
 15.1228 -        dst.orig_val = truncate_word(dst.val, dst.bytes);
 15.1229 -        dst.val = ((shift == width) ? src.val :
 15.1230 -                   (b & 8) ?
 15.1231 -                   /* shrd */
 15.1232 -                   ((dst.orig_val >> shift) |
 15.1233 -                    truncate_word(src.val << (width - shift), dst.bytes)) :
 15.1234 -                   /* shld */
 15.1235 -                   ((dst.orig_val << shift) |
 15.1236 -                    ((src.val >> (width - shift)) & ((1ull << shift) - 1))));
 15.1237 -        dst.val = truncate_word(dst.val, dst.bytes);
 15.1238 -        _regs.eflags &= ~(EFLG_OF|EFLG_SF|EFLG_ZF|EFLG_PF|EFLG_CF);
 15.1239 -        if ( (dst.val >> ((b & 8) ? (shift - 1) : (width - shift))) & 1 )
 15.1240 -            _regs.eflags |= EFLG_CF;
 15.1241 -        if ( ((dst.val ^ dst.orig_val) >> (width - 1)) & 1 )
 15.1242 -            _regs.eflags |= EFLG_OF;
 15.1243 -        _regs.eflags |= ((dst.val >> (width - 1)) & 1) ? EFLG_SF : 0;
 15.1244 -        _regs.eflags |= (dst.val == 0) ? EFLG_ZF : 0;
 15.1245 -        _regs.eflags |= even_parity(dst.val) ? EFLG_PF : 0;
 15.1246 -        break;
 15.1247 -    }
 15.1248 -
 15.1249 -    case 0xb3: btr: /* btr */
 15.1250 -        emulate_2op_SrcV_nobyte("btr", src, dst, _regs.eflags);
 15.1251 -        break;
 15.1252 -
 15.1253 -    case 0xab: bts: /* bts */
 15.1254 -        emulate_2op_SrcV_nobyte("bts", src, dst, _regs.eflags);
 15.1255 -        break;
 15.1256 -
 15.1257 -    case 0xaf: /* imul */
 15.1258 -        _regs.eflags &= ~(EFLG_OF|EFLG_CF);
 15.1259 -        switch ( dst.bytes )
 15.1260 -        {
 15.1261 -        case 2:
 15.1262 -            dst.val = ((uint32_t)(int16_t)src.val *
 15.1263 -                       (uint32_t)(int16_t)dst.val);
 15.1264 -            if ( (int16_t)dst.val != (uint32_t)dst.val )
 15.1265 -                _regs.eflags |= EFLG_OF|EFLG_CF;
 15.1266 +        case 1: /* dec */
 15.1267 +            emulate_1op("dec", dst, _regs.eflags);
 15.1268              break;
 15.1269 -#ifdef __x86_64__
 15.1270 -        case 4:
 15.1271 -            dst.val = ((uint64_t)(int32_t)src.val *
 15.1272 -                       (uint64_t)(int32_t)dst.val);
 15.1273 -            if ( (int32_t)dst.val != dst.val )
 15.1274 -                _regs.eflags |= EFLG_OF|EFLG_CF;
 15.1275 +        case 2: /* call (near) */
 15.1276 +        case 4: /* jmp (near) */
 15.1277 +            if ( (dst.bytes != 8) && mode_64bit() )
 15.1278 +            {
 15.1279 +                dst.bytes = op_bytes = 8;
 15.1280 +                if ( dst.type == OP_REG )
 15.1281 +                    dst.val = *dst.reg;
 15.1282 +                else if ( (rc = read_ulong(dst.mem.seg, dst.mem.off,
 15.1283 +                                           &dst.val, 8, ctxt, ops)) != 0 )
 15.1284 +                    goto done;
 15.1285 +            }
 15.1286 +            src.val = _regs.eip;
 15.1287 +            _regs.eip = dst.val;
 15.1288 +            if ( (modrm_reg & 7) == 2 )
 15.1289 +                goto push; /* call */
 15.1290 +            dst.type = OP_NONE;
 15.1291              break;
 15.1292 -#endif
 15.1293 -        default: {
 15.1294 -            unsigned long m[2] = { src.val, dst.val };
 15.1295 -            if ( imul_dbl(m) )
 15.1296 -                _regs.eflags |= EFLG_OF|EFLG_CF;
 15.1297 -            dst.val = m[0];
 15.1298 +        case 3: /* call (far, absolute indirect) */
 15.1299 +        case 5: /* jmp (far, absolute indirect) */ {
 15.1300 +            unsigned long sel;
 15.1301 +
 15.1302 +            generate_exception_if(dst.type != OP_MEM, EXC_UD, -1);
 15.1303 +
 15.1304 +            if ( (rc = read_ulong(dst.mem.seg, dst.mem.off+dst.bytes,
 15.1305 +                                  &sel, 2, ctxt, ops)) )
 15.1306 +                goto done;
 15.1307 +
 15.1308 +            if ( (modrm_reg & 7) == 3 ) /* call */
 15.1309 +            {
 15.1310 +                struct segment_register reg;
 15.1311 +                fail_if(ops->read_segment == NULL);
 15.1312 +                if ( (rc = ops->read_segment(x86_seg_cs, &reg, ctxt)) ||
 15.1313 +                     (rc = ops->write(x86_seg_ss, sp_pre_dec(op_bytes),
 15.1314 +                                      &reg.sel, op_bytes, ctxt)) ||
 15.1315 +                     (rc = ops->write(x86_seg_ss, sp_pre_dec(op_bytes),
 15.1316 +                                      &_regs.eip, op_bytes, ctxt)) )
 15.1317 +                    goto done;
 15.1318 +            }
 15.1319 +
 15.1320 +            if ( (rc = load_seg(x86_seg_cs, sel, ctxt, ops)) != 0 )
 15.1321 +                goto done;
 15.1322 +            _regs.eip = dst.val;
 15.1323 +
 15.1324 +            dst.type = OP_NONE;
 15.1325              break;
 15.1326          }
 15.1327 -        }
 15.1328 -        break;
 15.1329 -
 15.1330 -    case 0xb2: /* lss */
 15.1331 -        dst.val = x86_seg_ss;
 15.1332 -        goto les;
 15.1333 -
 15.1334 -    case 0xb4: /* lfs */
 15.1335 -        dst.val = x86_seg_fs;
 15.1336 -        goto les;
 15.1337 -
 15.1338 -    case 0xb5: /* lgs */
 15.1339 -        dst.val = x86_seg_gs;
 15.1340 -        goto les;
 15.1341 -
 15.1342 -    case 0xb6: /* movzx rm8,r{16,32,64} */
 15.1343 -        /* Recompute DstReg as we may have decoded AH/BH/CH/DH. */
 15.1344 -        dst.reg   = decode_register(modrm_reg, &_regs, 0);
 15.1345 -        dst.bytes = op_bytes;
 15.1346 -        dst.val   = (uint8_t)src.val;
 15.1347 -        break;
 15.1348 -
 15.1349 -    case 0xbc: /* bsf */ {
 15.1350 -        int zf;
 15.1351 -        asm ( "bsf %2,%0; setz %b1"
 15.1352 -              : "=r" (dst.val), "=q" (zf)
 15.1353 -              : "r" (src.val), "1" (0) );
 15.1354 -        _regs.eflags &= ~EFLG_ZF;
 15.1355 -        if ( zf )
 15.1356 -        {
 15.1357 -            _regs.eflags |= EFLG_ZF;
 15.1358 +        case 6: /* push */
 15.1359 +            /* 64-bit mode: PUSH defaults to a 64-bit operand. */
 15.1360 +            if ( mode_64bit() && (dst.bytes == 4) )
 15.1361 +            {
 15.1362 +                dst.bytes = 8;
 15.1363 +                if ( dst.type == OP_REG )
 15.1364 +                    dst.val = *dst.reg;
 15.1365 +                else if ( (rc = read_ulong(dst.mem.seg, dst.mem.off,
 15.1366 +                                           &dst.val, 8, ctxt, ops)) != 0 )
 15.1367 +                    goto done;
 15.1368 +            }
 15.1369 +            if ( (rc = ops->write(x86_seg_ss, sp_pre_dec(dst.bytes),
 15.1370 +                                  &dst.val, dst.bytes, ctxt)) != 0 )
 15.1371 +                goto done;
 15.1372              dst.type = OP_NONE;
 15.1373 +            break;
 15.1374 +        case 7:
 15.1375 +            generate_exception_if(1, EXC_UD, -1);
 15.1376 +        default:
 15.1377 +            goto cannot_emulate;
 15.1378          }
 15.1379          break;
 15.1380      }
 15.1381  
 15.1382 -    case 0xbd: /* bsr */ {
 15.1383 -        int zf;
 15.1384 -        asm ( "bsr %2,%0; setz %b1"
 15.1385 -              : "=r" (dst.val), "=q" (zf)
 15.1386 -              : "r" (src.val), "1" (0) );
 15.1387 -        _regs.eflags &= ~EFLG_ZF;
 15.1388 -        if ( zf )
 15.1389 + writeback:
 15.1390 +    switch ( dst.type )
 15.1391 +    {
 15.1392 +    case OP_REG:
 15.1393 +        /* The 4-byte case *is* correct: in 64-bit mode we zero-extend. */
 15.1394 +        switch ( dst.bytes )
 15.1395          {
 15.1396 -            _regs.eflags |= EFLG_ZF;
 15.1397 -            dst.type = OP_NONE;
 15.1398 +        case 1: *(uint8_t  *)dst.reg = (uint8_t)dst.val; break;
 15.1399 +        case 2: *(uint16_t *)dst.reg = (uint16_t)dst.val; break;
 15.1400 +        case 4: *dst.reg = (uint32_t)dst.val; break; /* 64b: zero-ext */
 15.1401 +        case 8: *dst.reg = dst.val; break;
 15.1402          }
 15.1403          break;
 15.1404 +    case OP_MEM:
 15.1405 +        if ( !(d & Mov) && (dst.orig_val == dst.val) &&
 15.1406 +             !ctxt->force_writeback )
 15.1407 +            /* nothing to do */;
 15.1408 +        else if ( lock_prefix )
 15.1409 +            rc = ops->cmpxchg(
 15.1410 +                dst.mem.seg, dst.mem.off, &dst.orig_val,
 15.1411 +                &dst.val, dst.bytes, ctxt);
 15.1412 +        else
 15.1413 +            rc = ops->write(
 15.1414 +                dst.mem.seg, dst.mem.off, &dst.val, dst.bytes, ctxt);
 15.1415 +        if ( rc != 0 )
 15.1416 +            goto done;
 15.1417 +    default:
 15.1418 +        break;
 15.1419      }
 15.1420  
 15.1421 -    case 0xb7: /* movzx rm16,r{16,32,64} */
 15.1422 -        dst.val = (uint16_t)src.val;
 15.1423 -        break;
 15.1424 -
 15.1425 -    case 0xbb: btc: /* btc */
 15.1426 -        emulate_2op_SrcV_nobyte("btc", src, dst, _regs.eflags);
 15.1427 -        break;
 15.1428 -
 15.1429 -    case 0xba: /* Grp8 */
 15.1430 -        switch ( modrm_reg & 7 )
 15.1431 -        {
 15.1432 -        case 4: goto bt;
 15.1433 -        case 5: goto bts;
 15.1434 -        case 6: goto btr;
 15.1435 -        case 7: goto btc;
 15.1436 -        default: generate_exception_if(1, EXC_UD, -1);
 15.1437 -        }
 15.1438 -        break;
 15.1439 -
 15.1440 -    case 0xbe: /* movsx rm8,r{16,32,64} */
 15.1441 -        /* Recompute DstReg as we may have decoded AH/BH/CH/DH. */
 15.1442 -        dst.reg   = decode_register(modrm_reg, &_regs, 0);
 15.1443 -        dst.bytes = op_bytes;
 15.1444 -        dst.val   = (int8_t)src.val;
 15.1445 -        break;
 15.1446 -
 15.1447 -    case 0xbf: /* movsx rm16,r{16,32,64} */
 15.1448 -        dst.val = (int16_t)src.val;
 15.1449 -        break;
 15.1450 -
 15.1451 -    case 0xc0 ... 0xc1: /* xadd */
 15.1452 -        /* Write back the register source. */
 15.1453 -        switch ( dst.bytes )
 15.1454 -        {
 15.1455 -        case 1: *(uint8_t  *)src.reg = (uint8_t)dst.val; break;
 15.1456 -        case 2: *(uint16_t *)src.reg = (uint16_t)dst.val; break;
 15.1457 -        case 4: *src.reg = (uint32_t)dst.val; break; /* 64b reg: zero-extend */
 15.1458 -        case 8: *src.reg = dst.val; break;
 15.1459 -        }
 15.1460 -        goto add;
 15.1461 -    }
 15.1462 -    goto writeback;
 15.1463 -
 15.1464 - twobyte_special_insn:
 15.1465 +    /* Inject #DB if single-step tracing was enabled at instruction start. */
 15.1466 +    if ( (ctxt->regs->eflags & EFLG_TF) && (rc == X86EMUL_OKAY) &&
 15.1467 +         (ops->inject_hw_exception != NULL) )
 15.1468 +        rc = ops->inject_hw_exception(EXC_DB, -1, ctxt) ? : X86EMUL_EXCEPTION;
 15.1469 +
 15.1470 +    /* Commit shadow register state. */
 15.1471 +    _regs.eflags &= ~EFLG_RF;
 15.1472 +    *ctxt->regs = _regs;
 15.1473 +
 15.1474 + done:
 15.1475 +    return rc;
 15.1476 +
 15.1477 + twobyte_insn:
 15.1478      switch ( b )
 15.1479      {
 15.1480      case 0x01: /* Grp7 */ {
 15.1481 @@ -3846,6 +3647,12 @@ x86_emulate(
 15.1482          break;
 15.1483      }
 15.1484  
 15.1485 +    case 0x40 ... 0x4f: /* cmovcc */
 15.1486 +        dst.val = src.val;
 15.1487 +        if ( !test_cc(b, _regs.eflags) )
 15.1488 +            dst.type = OP_NONE;
 15.1489 +        break;
 15.1490 +
 15.1491      case 0x6f: /* movq mm/m64,mm */ {
 15.1492          uint8_t stub[] = { 0x0f, 0x6f, modrm, 0xc3 };
 15.1493          struct fpu_insn_ctxt fic = { .insn_bytes = sizeof(stub)-1 };
 15.1494 @@ -3895,6 +3702,10 @@ x86_emulate(
 15.1495          break;
 15.1496      }
 15.1497  
 15.1498 +    case 0x90 ... 0x9f: /* setcc */
 15.1499 +        dst.val = test_cc(b, _regs.eflags);
 15.1500 +        break;
 15.1501 +
 15.1502      case 0xa0: /* push %%fs */
 15.1503          src.val = x86_seg_fs;
 15.1504          goto push_seg;
 15.1505 @@ -3922,6 +3733,182 @@ x86_emulate(
 15.1506          src.val = x86_seg_gs;
 15.1507          goto pop_seg;
 15.1508  
 15.1509 +    case 0xb0 ... 0xb1: /* cmpxchg */
 15.1510 +        /* Save real source value, then compare EAX against destination. */
 15.1511 +        src.orig_val = src.val;
 15.1512 +        src.val = _regs.eax;
 15.1513 +        emulate_2op_SrcV("cmp", src, dst, _regs.eflags);
 15.1514 +        if ( _regs.eflags & EFLG_ZF )
 15.1515 +        {
 15.1516 +            /* Success: write back to memory. */
 15.1517 +            dst.val = src.orig_val;
 15.1518 +        }
 15.1519 +        else
 15.1520 +        {
 15.1521 +            /* Failure: write the value we saw to EAX. */
 15.1522 +            dst.type = OP_REG;
 15.1523 +            dst.reg  = (unsigned long *)&_regs.eax;
 15.1524 +        }
 15.1525 +        break;
 15.1526 +
 15.1527 +    case 0xa3: bt: /* bt */
 15.1528 +        emulate_2op_SrcV_nobyte("bt", src, dst, _regs.eflags);
 15.1529 +        dst.type = OP_NONE;
 15.1530 +        break;
 15.1531 +
 15.1532 +    case 0xa4: /* shld imm8,r,r/m */
 15.1533 +    case 0xa5: /* shld %%cl,r,r/m */
 15.1534 +    case 0xac: /* shrd imm8,r,r/m */
 15.1535 +    case 0xad: /* shrd %%cl,r,r/m */ {
 15.1536 +        uint8_t shift, width = dst.bytes << 3;
 15.1537 +        shift = (b & 1) ? (uint8_t)_regs.ecx : insn_fetch_type(uint8_t);
 15.1538 +        if ( (shift &= width - 1) == 0 )
 15.1539 +            break;
 15.1540 +        dst.orig_val = truncate_word(dst.val, dst.bytes);
 15.1541 +        dst.val = ((shift == width) ? src.val :
 15.1542 +                   (b & 8) ?
 15.1543 +                   /* shrd */
 15.1544 +                   ((dst.orig_val >> shift) |
 15.1545 +                    truncate_word(src.val << (width - shift), dst.bytes)) :
 15.1546 +                   /* shld */
 15.1547 +                   ((dst.orig_val << shift) |
 15.1548 +                    ((src.val >> (width - shift)) & ((1ull << shift) - 1))));
 15.1549 +        dst.val = truncate_word(dst.val, dst.bytes);
 15.1550 +        _regs.eflags &= ~(EFLG_OF|EFLG_SF|EFLG_ZF|EFLG_PF|EFLG_CF);
 15.1551 +        if ( (dst.val >> ((b & 8) ? (shift - 1) : (width - shift))) & 1 )
 15.1552 +            _regs.eflags |= EFLG_CF;
 15.1553 +        if ( ((dst.val ^ dst.orig_val) >> (width - 1)) & 1 )
 15.1554 +            _regs.eflags |= EFLG_OF;
 15.1555 +        _regs.eflags |= ((dst.val >> (width - 1)) & 1) ? EFLG_SF : 0;
 15.1556 +        _regs.eflags |= (dst.val == 0) ? EFLG_ZF : 0;
 15.1557 +        _regs.eflags |= even_parity(dst.val) ? EFLG_PF : 0;
 15.1558 +        break;
 15.1559 +    }
 15.1560 +
 15.1561 +    case 0xb3: btr: /* btr */
 15.1562 +        emulate_2op_SrcV_nobyte("btr", src, dst, _regs.eflags);
 15.1563 +        break;
 15.1564 +
 15.1565 +    case 0xab: bts: /* bts */
 15.1566 +        emulate_2op_SrcV_nobyte("bts", src, dst, _regs.eflags);
 15.1567 +        break;
 15.1568 +
 15.1569 +    case 0xaf: /* imul */
 15.1570 +        _regs.eflags &= ~(EFLG_OF|EFLG_CF);
 15.1571 +        switch ( dst.bytes )
 15.1572 +        {
 15.1573 +        case 2:
 15.1574 +            dst.val = ((uint32_t)(int16_t)src.val *
 15.1575 +                       (uint32_t)(int16_t)dst.val);
 15.1576 +            if ( (int16_t)dst.val != (uint32_t)dst.val )
 15.1577 +                _regs.eflags |= EFLG_OF|EFLG_CF;
 15.1578 +            break;
 15.1579 +#ifdef __x86_64__
 15.1580 +        case 4:
 15.1581 +            dst.val = ((uint64_t)(int32_t)src.val *
 15.1582 +                       (uint64_t)(int32_t)dst.val);
 15.1583 +            if ( (int32_t)dst.val != dst.val )
 15.1584 +                _regs.eflags |= EFLG_OF|EFLG_CF;
 15.1585 +            break;
 15.1586 +#endif
 15.1587 +        default: {
 15.1588 +            unsigned long m[2] = { src.val, dst.val };
 15.1589 +            if ( imul_dbl(m) )
 15.1590 +                _regs.eflags |= EFLG_OF|EFLG_CF;
 15.1591 +            dst.val = m[0];
 15.1592 +            break;
 15.1593 +        }
 15.1594 +        }
 15.1595 +        break;
 15.1596 +
 15.1597 +    case 0xb2: /* lss */
 15.1598 +        dst.val = x86_seg_ss;
 15.1599 +        goto les;
 15.1600 +
 15.1601 +    case 0xb4: /* lfs */
 15.1602 +        dst.val = x86_seg_fs;
 15.1603 +        goto les;
 15.1604 +
 15.1605 +    case 0xb5: /* lgs */
 15.1606 +        dst.val = x86_seg_gs;
 15.1607 +        goto les;
 15.1608 +
 15.1609 +    case 0xb6: /* movzx rm8,r{16,32,64} */
 15.1610 +        /* Recompute DstReg as we may have decoded AH/BH/CH/DH. */
 15.1611 +        dst.reg   = decode_register(modrm_reg, &_regs, 0);
 15.1612 +        dst.bytes = op_bytes;
 15.1613 +        dst.val   = (uint8_t)src.val;
 15.1614 +        break;
 15.1615 +
 15.1616 +    case 0xbc: /* bsf */ {
 15.1617 +        int zf;
 15.1618 +        asm ( "bsf %2,%0; setz %b1"
 15.1619 +              : "=r" (dst.val), "=q" (zf)
 15.1620 +              : "r" (src.val), "1" (0) );
 15.1621 +        _regs.eflags &= ~EFLG_ZF;
 15.1622 +        if ( zf )
 15.1623 +        {
 15.1624 +            _regs.eflags |= EFLG_ZF;
 15.1625 +            dst.type = OP_NONE;
 15.1626 +        }
 15.1627 +        break;
 15.1628 +    }
 15.1629 +
 15.1630 +    case 0xbd: /* bsr */ {
 15.1631 +        int zf;
 15.1632 +        asm ( "bsr %2,%0; setz %b1"
 15.1633 +              : "=r" (dst.val), "=q" (zf)
 15.1634 +              : "r" (src.val), "1" (0) );
 15.1635 +        _regs.eflags &= ~EFLG_ZF;
 15.1636 +        if ( zf )
 15.1637 +        {
 15.1638 +            _regs.eflags |= EFLG_ZF;
 15.1639 +            dst.type = OP_NONE;
 15.1640 +        }
 15.1641 +        break;
 15.1642 +    }
 15.1643 +
 15.1644 +    case 0xb7: /* movzx rm16,r{16,32,64} */
 15.1645 +        dst.val = (uint16_t)src.val;
 15.1646 +        break;
 15.1647 +
 15.1648 +    case 0xbb: btc: /* btc */
 15.1649 +        emulate_2op_SrcV_nobyte("btc", src, dst, _regs.eflags);
 15.1650 +        break;
 15.1651 +
 15.1652 +    case 0xba: /* Grp8 */
 15.1653 +        switch ( modrm_reg & 7 )
 15.1654 +        {
 15.1655 +        case 4: goto bt;
 15.1656 +        case 5: goto bts;
 15.1657 +        case 6: goto btr;
 15.1658 +        case 7: goto btc;
 15.1659 +        default: generate_exception_if(1, EXC_UD, -1);
 15.1660 +        }
 15.1661 +        break;
 15.1662 +
 15.1663 +    case 0xbe: /* movsx rm8,r{16,32,64} */
 15.1664 +        /* Recompute DstReg as we may have decoded AH/BH/CH/DH. */
 15.1665 +        dst.reg   = decode_register(modrm_reg, &_regs, 0);
 15.1666 +        dst.bytes = op_bytes;
 15.1667 +        dst.val   = (int8_t)src.val;
 15.1668 +        break;
 15.1669 +
 15.1670 +    case 0xbf: /* movsx rm16,r{16,32,64} */
 15.1671 +        dst.val = (int16_t)src.val;
 15.1672 +        break;
 15.1673 +
 15.1674 +    case 0xc0 ... 0xc1: /* xadd */
 15.1675 +        /* Write back the register source. */
 15.1676 +        switch ( dst.bytes )
 15.1677 +        {
 15.1678 +        case 1: *(uint8_t  *)src.reg = (uint8_t)dst.val; break;
 15.1679 +        case 2: *(uint16_t *)src.reg = (uint16_t)dst.val; break;
 15.1680 +        case 4: *src.reg = (uint32_t)dst.val; break; /* 64b reg: zero-extend */
 15.1681 +        case 8: *src.reg = dst.val; break;
 15.1682 +        }
 15.1683 +        goto add;
 15.1684 +
 15.1685      case 0xc7: /* Grp9 (cmpxchg8b/cmpxchg16b) */ {
 15.1686          unsigned long old[2], exp[2], new[2];
 15.1687          unsigned int i;
    16.1 --- a/xen/include/asm-x86/hvm/domain.h	Mon Oct 13 13:15:20 2008 +0100
    16.2 +++ b/xen/include/asm-x86/hvm/domain.h	Tue Oct 14 10:45:29 2008 +0100
    16.3 @@ -28,6 +28,7 @@
    16.4  #include <asm/hvm/vioapic.h>
    16.5  #include <asm/hvm/io.h>
    16.6  #include <xen/hvm/iommu.h>
    16.7 +#include <asm/hvm/viridian.h>
    16.8  #include <asm/hvm/vmx/vmcs.h>
    16.9  #include <asm/hvm/svm/vmcb.h>
   16.10  #include <public/hvm/params.h>
   16.11 @@ -74,6 +75,8 @@ struct hvm_domain {
   16.12      /* Pass-through */
   16.13      struct hvm_iommu       hvm_iommu;
   16.14  
   16.15 +    struct viridian_domain viridian;
   16.16 +
   16.17      bool_t                 hap_enabled;
   16.18      bool_t                 qemu_mapcache_invalidate;
   16.19      bool_t                 is_s3_suspended;
    17.1 --- a/xen/include/asm-x86/hvm/hvm.h	Mon Oct 13 13:15:20 2008 +0100
    17.2 +++ b/xen/include/asm-x86/hvm/hvm.h	Tue Oct 14 10:45:29 2008 +0100
    17.3 @@ -219,6 +219,9 @@ hvm_set_segment_register(struct vcpu *v,
    17.4      hvm_funcs.set_segment_register(v, seg, reg);
    17.5  }
    17.6  
    17.7 +#define is_viridian_domain(_d)                                             \
    17.8 + (is_hvm_domain(_d) && ((_d)->arch.hvm_domain.params[HVM_PARAM_VIRIDIAN]))
    17.9 +
   17.10  void hvm_cpuid(unsigned int input, unsigned int *eax, unsigned int *ebx,
   17.11                                     unsigned int *ecx, unsigned int *edx);
   17.12  void hvm_migrate_timers(struct vcpu *v);
    18.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    18.2 +++ b/xen/include/asm-x86/hvm/viridian.h	Tue Oct 14 10:45:29 2008 +0100
    18.3 @@ -0,0 +1,65 @@
    18.4 +/*****************************************************************************
    18.5 + *
    18.6 + * include/xen/viridian.h
    18.7 + *
    18.8 + * Copyright (c) 2008 Citrix Corp.
    18.9 + *
   18.10 + */
   18.11 +
   18.12 +#ifndef __ASM_X86_HVM_VIRIDIAN_H__
   18.13 +#define __ASM_X86_HVM_VIRIDIAN_H__
   18.14 +
   18.15 +union viridian_guest_os_id
   18.16 +{
   18.17 +    uint64_t raw;
   18.18 +    struct
   18.19 +    {
   18.20 +        uint64_t build_number:16;
   18.21 +        uint64_t service_pack:8;
   18.22 +        uint64_t minor:8;
   18.23 +        uint64_t major:8;
   18.24 +        uint64_t os:8;
   18.25 +        uint64_t vendor:16;
   18.26 +    } fields;
   18.27 +};
   18.28 +
   18.29 +union viridian_hypercall_gpa
   18.30 +{   uint64_t raw;
   18.31 +    struct
   18.32 +    {
   18.33 +        uint64_t enabled:1;
   18.34 +        uint64_t reserved_preserved:11;
   18.35 +        uint64_t pfn:48;
   18.36 +    } fields;
   18.37 +};
   18.38 +
   18.39 +struct viridian_domain
   18.40 +{
   18.41 +    union viridian_guest_os_id guest_os_id;
   18.42 +    union viridian_hypercall_gpa hypercall_gpa;
   18.43 +};
   18.44 +
   18.45 +int
   18.46 +cpuid_viridian_leaves(
   18.47 +    unsigned int leaf,
   18.48 +    unsigned int *eax,
   18.49 +    unsigned int *ebx,
   18.50 +    unsigned int *ecx,
   18.51 +    unsigned int *edx);
   18.52 +
   18.53 +int
   18.54 +wrmsr_viridian_regs(
   18.55 +    uint32_t idx,
   18.56 +    uint32_t eax,
   18.57 +    uint32_t edx);
   18.58 +
   18.59 +int
   18.60 +rdmsr_viridian_regs(
   18.61 +    uint32_t idx,
   18.62 +    uint32_t *eax,
   18.63 +    uint32_t *edx);
   18.64 +
   18.65 +int
   18.66 +viridian_hypercall(struct cpu_user_regs *regs);
   18.67 +
   18.68 +#endif /* __ASM_X86_HVM_VIRIDIAN_H__ */
    19.1 --- a/xen/include/asm-x86/hvm/vlapic.h	Mon Oct 13 13:15:20 2008 +0100
    19.2 +++ b/xen/include/asm-x86/hvm/vlapic.h	Tue Oct 14 10:45:29 2008 +0100
    19.3 @@ -98,4 +98,8 @@ struct vlapic *apic_round_robin(
    19.4  
    19.5  int vlapic_match_logical_addr(struct vlapic *vlapic, uint8_t mda);
    19.6  
    19.7 +void vlapic_EOI_set(struct vlapic *vlapic);
    19.8 +
    19.9 +int vlapic_ipi(struct vlapic *vlapic, uint32_t icr_low, uint32_t icr_high);
   19.10 +
   19.11  #endif /* __ASM_X86_HVM_VLAPIC_H__ */
    20.1 --- a/xen/include/asm-x86/perfc_defn.h	Mon Oct 13 13:15:20 2008 +0100
    20.2 +++ b/xen/include/asm-x86/perfc_defn.h	Tue Oct 14 10:45:29 2008 +0100
    20.3 @@ -111,4 +111,20 @@ PERFCOUNTER(shadow_unsync,         "shad
    20.4  PERFCOUNTER(shadow_unsync_evict,   "shadow OOS evictions")
    20.5  PERFCOUNTER(shadow_resync,         "shadow OOS resyncs")
    20.6  
    20.7 +PERFCOUNTER(mshv_call_sw_addr_space,    "MS Hv Switch Address Space")
    20.8 +PERFCOUNTER(mshv_call_flush_tlb_list,   "MS Hv Flush TLB list")
    20.9 +PERFCOUNTER(mshv_call_flush_tlb_all,    "MS Hv Flush TLB all")
   20.10 +PERFCOUNTER(mshv_call_long_wait,        "MS Hv Notify long wait")
   20.11 +PERFCOUNTER(mshv_rdmsr_osid,            "MS Hv rdmsr Guest OS ID")
   20.12 +PERFCOUNTER(mshv_rdmsr_hc_page,         "MS Hv rdmsr hypercall page")
   20.13 +PERFCOUNTER(mshv_rdmsr_vp_index,        "MS Hv rdmsr vp index")
   20.14 +PERFCOUNTER(mshv_rdmsr_icr,             "MS Hv rdmsr icr")
   20.15 +PERFCOUNTER(mshv_rdmsr_tpr,             "MS Hv rdmsr tpr")
   20.16 +PERFCOUNTER(mshv_wrmsr_osid,            "MS Hv wrmsr Guest OS ID")
   20.17 +PERFCOUNTER(mshv_wrmsr_hc_page,         "MS Hv wrmsr hypercall page")
   20.18 +PERFCOUNTER(mshv_wrmsr_vp_index,        "MS Hv wrmsr vp index")
   20.19 +PERFCOUNTER(mshv_wrmsr_icr,             "MS Hv wrmsr icr")
   20.20 +PERFCOUNTER(mshv_wrmsr_tpr,             "MS Hv wrmsr tpr")
   20.21 +PERFCOUNTER(mshv_wrmsr_eoi,             "MS Hv wrmsr eoi")
   20.22 +
   20.23  /*#endif*/ /* __XEN_PERFC_DEFN_H__ */
    21.1 --- a/xen/include/public/arch-x86/hvm/save.h	Mon Oct 13 13:15:20 2008 +0100
    21.2 +++ b/xen/include/public/arch-x86/hvm/save.h	Tue Oct 14 10:45:29 2008 +0100
    21.3 @@ -421,9 +421,20 @@ struct hvm_hw_mtrr {
    21.4  
    21.5  DECLARE_HVM_SAVE_TYPE(MTRR, 14, struct hvm_hw_mtrr);
    21.6  
    21.7 +/*
    21.8 + * Viridian hypervisor context.
    21.9 + */
   21.10 +
   21.11 +struct hvm_viridian_context {
   21.12 +    uint64_t hypercall_gpa;
   21.13 +    uint64_t guest_os_id;
   21.14 +};
   21.15 +
   21.16 +DECLARE_HVM_SAVE_TYPE(VIRIDIAN, 15, struct hvm_viridian_context);
   21.17 +
   21.18  /* 
   21.19   * Largest type-code in use
   21.20   */
   21.21 -#define HVM_SAVE_CODE_MAX 14
   21.22 +#define HVM_SAVE_CODE_MAX 15
   21.23  
   21.24  #endif /* __XEN_PUBLIC_HVM_SAVE_X86_H__ */
    22.1 --- a/xen/include/public/hvm/params.h	Mon Oct 13 13:15:20 2008 +0100
    22.2 +++ b/xen/include/public/hvm/params.h	Tue Oct 14 10:45:29 2008 +0100
    22.3 @@ -51,9 +51,16 @@
    22.4  #define HVM_PARAM_BUFIOREQ_PFN 6
    22.5  
    22.6  #ifdef __ia64__
    22.7 +
    22.8  #define HVM_PARAM_NVRAM_FD     7
    22.9  #define HVM_PARAM_VHPT_SIZE    8
   22.10  #define HVM_PARAM_BUFPIOREQ_PFN	9
   22.11 +
   22.12 +#elif defined(__i386__) || defined(__x86_64__)
   22.13 +
   22.14 +/* Expose Viridian interfaces to this HVM guest? */
   22.15 +#define HVM_PARAM_VIRIDIAN     9
   22.16 +
   22.17  #endif
   22.18  
   22.19  /*