ia64/xen-unstable

changeset 12610:519a74928bd4

[HVM] Non-flat protected mode HVM support.

This is now the full set of changes needed to eliminate the assumption
that segments in protected mode always have zero base addresses. At
once, this further simplifies the instruction length determination
code used for MMIO of HVM domains.

Signed-off-by: Jan Beulich <jbeulich@novell.com>
author kfraser@localhost.localdomain
date Tue Nov 28 11:45:54 2006 +0000 (2006-11-28)
parents 62b0b520ea53
children b75574cb80a3
files xen/arch/x86/hvm/instrlen.c xen/arch/x86/hvm/platform.c xen/arch/x86/hvm/svm/svm.c xen/arch/x86/hvm/vmx/vmx.c xen/include/asm-x86/hvm/hvm.h
line diff
     1.1 --- a/xen/arch/x86/hvm/instrlen.c	Tue Nov 28 11:43:39 2006 +0000
     1.2 +++ b/xen/arch/x86/hvm/instrlen.c	Tue Nov 28 11:45:54 2006 +0000
     1.3 @@ -20,7 +20,6 @@
     1.4  #include <xen/config.h>
     1.5  #include <xen/sched.h>
     1.6  #include <xen/mm.h>
     1.7 -#include <asm/regs.h>
     1.8  #include <asm-x86/x86_emulate.h>
     1.9  
    1.10  /* read from guest memory */
    1.11 @@ -121,9 +120,7 @@ static uint8_t opcode_table[256] = {
    1.12      ByteOp|ImplicitOps|Mov, ImplicitOps|Mov,
    1.13      ByteOp|ImplicitOps, ImplicitOps,
    1.14      /* 0xB0 - 0xBF */
    1.15 -    SrcImmByte, SrcImmByte, SrcImmByte, SrcImmByte, 
    1.16 -    SrcImmByte, SrcImmByte, SrcImmByte, SrcImmByte, 
    1.17 -    0, 0, 0, 0, 0, 0, 0, 0,
    1.18 +    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    1.19      /* 0xC0 - 0xC7 */
    1.20      ByteOp|DstMem|SrcImm|ModRM, DstMem|SrcImmByte|ModRM, 0, 0,
    1.21      0, 0, ByteOp|DstMem|SrcImm|ModRM, DstMem|SrcImm|ModRM,
    1.22 @@ -195,54 +192,52 @@ static uint8_t twobyte_table[256] = {
    1.23  };
    1.24  
    1.25  /* 
    1.26 - * insn_fetch - fetch the next 1 to 4 bytes from instruction stream 
    1.27 - * @_type:   u8, u16, u32, s8, s16, or s32
    1.28 - * @_size:   1, 2, or 4 bytes
    1.29 + * insn_fetch - fetch the next byte from instruction stream
    1.30   */
    1.31 -#define insn_fetch(_type, _size)                                        \
    1.32 -({ unsigned long _x, _ptr = _regs.eip;                                  \
    1.33 -   if ( mode == X86EMUL_MODE_REAL ) _ptr += _regs.cs << 4;              \
    1.34 -   rc = inst_copy_from_guest((unsigned char *)(&(_x)), _ptr, _size);    \
    1.35 -   if ( rc != _size ) goto done;                                        \
    1.36 -   _regs.eip += (_size);                                                \
    1.37 -   length += (_size);                                                   \
    1.38 -   (_type)_x;                                                           \
    1.39 +#define insn_fetch()                                                    \
    1.40 +({ uint8_t _x;                                                          \
    1.41 +   if ( length >= 15 )                                                  \
    1.42 +       return -1;                                                       \
    1.43 +   if ( inst_copy_from_guest(&_x, pc, 1) != 1 ) {                       \
    1.44 +       gdprintk(XENLOG_WARNING,                                         \
    1.45 +                "Cannot read from address %lx (eip %lx, mode %d)\n",    \
    1.46 +                pc, org_pc, mode);                                      \
    1.47 +       return -1;                                                       \
    1.48 +   }                                                                    \
    1.49 +   pc += 1;                                                             \
    1.50 +   length += 1;                                                         \
    1.51 +   _x;                                                                  \
    1.52  })
    1.53  
    1.54  /**
    1.55   * hvm_instruction_length - returns the current instructions length
    1.56   *
    1.57 - * @regs: guest register state
    1.58 + * @org_pc: guest instruction pointer
    1.59   * @mode: guest operating mode
    1.60   *
    1.61   * EXTERNAL this routine calculates the length of the current instruction
    1.62 - * pointed to by eip.  The guest state is _not_ changed by this routine.
    1.63 + * pointed to by org_pc.  The guest state is _not_ changed by this routine.
    1.64   */
    1.65 -int hvm_instruction_length(struct cpu_user_regs *regs, int mode)
    1.66 +int hvm_instruction_length(unsigned long org_pc, int mode)
    1.67  {
    1.68 -    uint8_t b, d, twobyte = 0, rex_prefix = 0;
    1.69 -    uint8_t modrm, modrm_mod = 0, modrm_reg = 0, modrm_rm = 0;
    1.70 -    unsigned int op_bytes, ad_bytes, i;
    1.71 -    int rc = 0;
    1.72 +    uint8_t b, d, twobyte = 0, rex_prefix = 0, modrm_reg = 0;
    1.73 +    unsigned int op_default, op_bytes, ad_default, ad_bytes, tmp;
    1.74      int length = 0;
    1.75 -    unsigned int tmp;
    1.76 -
    1.77 -    /* Shadow copy of register state. Committed on successful emulation. */
    1.78 -    struct cpu_user_regs _regs = *regs;
    1.79 +    unsigned long pc = org_pc;
    1.80  
    1.81      switch ( mode )
    1.82      {
    1.83      case X86EMUL_MODE_REAL:
    1.84      case X86EMUL_MODE_PROT16:
    1.85 -        op_bytes = ad_bytes = 2;
    1.86 +        op_bytes = op_default = ad_bytes = ad_default = 2;
    1.87          break;
    1.88      case X86EMUL_MODE_PROT32:
    1.89 -        op_bytes = ad_bytes = 4;
    1.90 +        op_bytes = op_default = ad_bytes = ad_default = 4;
    1.91          break;
    1.92  #ifdef __x86_64__
    1.93      case X86EMUL_MODE_PROT64:
    1.94 -        op_bytes = 4;
    1.95 -        ad_bytes = 8;
    1.96 +        op_bytes = op_default = 4;
    1.97 +        ad_bytes = ad_default = 8;
    1.98          break;
    1.99  #endif
   1.100      default:
   1.101 @@ -250,18 +245,18 @@ int hvm_instruction_length(struct cpu_us
   1.102      }
   1.103  
   1.104      /* Legacy prefixes. */
   1.105 -    for ( i = 0; i < 8; i++ )
   1.106 +    for ( ; ; )
   1.107      {
   1.108 -        switch ( b = insn_fetch(uint8_t, 1) )
   1.109 +        switch ( b = insn_fetch() )
   1.110          {
   1.111          case 0x66: /* operand-size override */
   1.112 -            op_bytes ^= 6;      /* switch between 2/4 bytes */
   1.113 +            op_bytes = op_default ^ 6;      /* switch between 2/4 bytes */
   1.114              break;
   1.115          case 0x67: /* address-size override */
   1.116              if ( mode == X86EMUL_MODE_PROT64 )
   1.117 -                ad_bytes ^= 12; /* switch between 4/8 bytes */
   1.118 +                ad_bytes = ad_default ^ 12; /* switch between 4/8 bytes */
   1.119              else
   1.120 -                ad_bytes ^= 6;  /* switch between 2/4 bytes */
   1.121 +                ad_bytes = ad_default ^ 6;  /* switch between 2/4 bytes */
   1.122              break;
   1.123          case 0x2e: /* CS override */
   1.124          case 0x3e: /* DS override */
   1.125 @@ -273,22 +268,26 @@ int hvm_instruction_length(struct cpu_us
   1.126          case 0xf3: /* REP/REPE/REPZ */
   1.127          case 0xf2: /* REPNE/REPNZ */
   1.128              break;
   1.129 +#ifdef __x86_64__
   1.130 +        case 0x40 ... 0x4f:
   1.131 +            if ( mode == X86EMUL_MODE_PROT64 )
   1.132 +            {
   1.133 +                rex_prefix = b;
   1.134 +                continue;
   1.135 +            }
   1.136 +            /* FALLTHRU */
   1.137 +#endif
   1.138          default:
   1.139              goto done_prefixes;
   1.140          }
   1.141 +        rex_prefix = 0;
   1.142      }
   1.143  done_prefixes:
   1.144  
   1.145      /* REX prefix. */
   1.146 -    if ( (mode == X86EMUL_MODE_PROT64) && ((b & 0xf0) == 0x40) )
   1.147 -    {
   1.148 -        rex_prefix = b;
   1.149 -        if ( b & 8 )
   1.150 -            op_bytes = 8;          /* REX.W */
   1.151 -        modrm_reg = (b & 4) << 1;  /* REX.R */
   1.152 -        /* REX.B and REX.X do not need to be decoded. */
   1.153 -        b = insn_fetch(uint8_t, 1);
   1.154 -    }
   1.155 +    if ( rex_prefix & 8 )
   1.156 +        op_bytes = 8;                   /* REX.W */
   1.157 +    /* REX.B, REX.R, and REX.X do not need to be decoded. */
   1.158  
   1.159      /* Opcode byte(s). */
   1.160      d = opcode_table[b];
   1.161 @@ -298,7 +297,7 @@ done_prefixes:
   1.162          if ( b == 0x0f )
   1.163          {
   1.164              twobyte = 1;
   1.165 -            b = insn_fetch(uint8_t, 1);
   1.166 +            b = insn_fetch();
   1.167              d = twobyte_table[b];
   1.168          }
   1.169  
   1.170 @@ -310,11 +309,11 @@ done_prefixes:
   1.171      /* ModRM and SIB bytes. */
   1.172      if ( d & ModRM )
   1.173      {
   1.174 -        modrm = insn_fetch(uint8_t, 1);
   1.175 -        modrm_mod |= (modrm & 0xc0) >> 6;
   1.176 -        modrm_reg |= (modrm & 0x38) >> 3;
   1.177 -        modrm_rm  |= (modrm & 0x07);
   1.178 +        uint8_t modrm = insn_fetch();
   1.179 +        uint8_t modrm_mod = (modrm & 0xc0) >> 6;
   1.180 +        uint8_t modrm_rm  = (modrm & 0x07);
   1.181  
   1.182 +        modrm_reg = (modrm & 0x38) >> 3;
   1.183          if ( modrm_mod == 3 )
   1.184          {
   1.185              gdprintk(XENLOG_WARNING, "Cannot parse ModRM.mod == 3.\n");
   1.186 @@ -330,16 +329,16 @@ done_prefixes:
   1.187                  if ( modrm_rm == 6 ) 
   1.188                  {
   1.189                      length += 2;
   1.190 -                    _regs.eip += 2; /* skip disp16 */
   1.191 +                    pc += 2; /* skip disp16 */
   1.192                  }
   1.193                  break;
   1.194              case 1:
   1.195                  length += 1;
   1.196 -                _regs.eip += 1; /* skip disp8 */
   1.197 +                pc += 1; /* skip disp8 */
   1.198                  break;
   1.199              case 2:
   1.200                  length += 2;
   1.201 -                _regs.eip += 2; /* skip disp16 */
   1.202 +                pc += 2; /* skip disp16 */
   1.203                  break;
   1.204              }
   1.205          }
   1.206 @@ -350,33 +349,34 @@ done_prefixes:
   1.207              {
   1.208              case 0:
   1.209                  if ( (modrm_rm == 4) && 
   1.210 -                     (((insn_fetch(uint8_t, 1)) & 7) 
   1.211 -                        == 5) )
   1.212 +                     ((insn_fetch() & 7) == 5) )
   1.213                  {
   1.214                      length += 4;
   1.215 -                    _regs.eip += 4; /* skip disp32 specified by SIB.base */
   1.216 +                    pc += 4; /* skip disp32 specified by SIB.base */
   1.217                  }
   1.218                  else if ( modrm_rm == 5 )
   1.219                  {
   1.220                      length += 4;
   1.221 -                    _regs.eip += 4; /* skip disp32 */
   1.222 +                    pc += 4; /* skip disp32 */
   1.223                  }
   1.224                  break;
   1.225              case 1:
   1.226                  if ( modrm_rm == 4 )
   1.227                  {
   1.228 -                    insn_fetch(uint8_t, 1);
   1.229 +                    length += 1;
   1.230 +                    pc += 1;
   1.231                  }
   1.232                  length += 1;
   1.233 -                _regs.eip += 1; /* skip disp8 */
   1.234 +                pc += 1; /* skip disp8 */
   1.235                  break;
   1.236              case 2:
   1.237                  if ( modrm_rm == 4 )
   1.238                  {
   1.239 -                    insn_fetch(uint8_t, 1);
   1.240 +                    length += 1;
   1.241 +                    pc += 1;
   1.242                  }
   1.243                  length += 4;
   1.244 -                _regs.eip += 4; /* skip disp32 */
   1.245 +                pc += 4; /* skip disp32 */
   1.246                  break;
   1.247              }
   1.248          }
   1.249 @@ -397,15 +397,12 @@ done_prefixes:
   1.250          tmp = (d & ByteOp) ? 1 : op_bytes;
   1.251          if ( tmp == 8 ) tmp = 4;
   1.252          /* NB. Immediates are sign-extended as necessary. */
   1.253 -        switch ( tmp )
   1.254 -        {
   1.255 -        case 1: insn_fetch(int8_t,  1); break;
   1.256 -        case 2: insn_fetch(int16_t, 2); break;
   1.257 -        case 4: insn_fetch(int32_t, 4); break;
   1.258 -        }
   1.259 +        length += tmp;
   1.260 +        pc += tmp;
   1.261          break;
   1.262      case SrcImmByte:
   1.263 -        insn_fetch(int8_t,  1);
   1.264 +        length += 1;
   1.265 +        pc += 1;
   1.266          break;
   1.267      }
   1.268  
   1.269 @@ -414,13 +411,9 @@ done_prefixes:
   1.270  
   1.271      switch ( b )
   1.272      {
   1.273 -    case 0xa0 ... 0xa1: /* mov */
   1.274 +    case 0xa0 ... 0xa3: /* mov */
   1.275          length += ad_bytes;
   1.276 -        _regs.eip += ad_bytes; /* skip src displacement */
   1.277 -        break;
   1.278 -    case 0xa2 ... 0xa3: /* mov */
   1.279 -        length += ad_bytes;
   1.280 -        _regs.eip += ad_bytes; /* skip dst displacement */
   1.281 +        pc += ad_bytes; /* skip src/dst displacement */
   1.282          break;
   1.283      case 0xf6 ... 0xf7: /* Grp3 */
   1.284          switch ( modrm_reg )
   1.285 @@ -429,23 +422,19 @@ done_prefixes:
   1.286              /* Special case in Grp3: test has an immediate source operand. */
   1.287              tmp = (d & ByteOp) ? 1 : op_bytes;
   1.288              if ( tmp == 8 ) tmp = 4;
   1.289 -            switch ( tmp )
   1.290 -            {
   1.291 -            case 1: insn_fetch(int8_t,  1); break;
   1.292 -            case 2: insn_fetch(int16_t, 2); break;
   1.293 -            case 4: insn_fetch(int32_t, 4); break;
   1.294 -            }
   1.295 -            goto done;
   1.296 +            length += tmp;
   1.297 +            pc += tmp;
   1.298 +            break;
   1.299          }
   1.300          break;
   1.301      }
   1.302  
   1.303  done:
   1.304 -    return length;
   1.305 +    return length < 16 ? length : -1;
   1.306  
   1.307  cannot_emulate:
   1.308      gdprintk(XENLOG_WARNING,
   1.309 -            "Cannot emulate %02x at address %lx (eip %lx, mode %d)\n",
   1.310 -            b, (unsigned long)_regs.eip, (unsigned long)regs->eip, mode);
   1.311 +            "Cannot emulate %02x at address %lx (%lx, mode %d)\n",
   1.312 +            b, pc - 1, org_pc, mode);
   1.313      return -1;
   1.314  }
     2.1 --- a/xen/arch/x86/hvm/platform.c	Tue Nov 28 11:43:39 2006 +0000
     2.2 +++ b/xen/arch/x86/hvm/platform.c	Tue Nov 28 11:45:54 2006 +0000
     2.3 @@ -28,6 +28,7 @@
     2.4  #include <xen/trace.h>
     2.5  #include <xen/sched.h>
     2.6  #include <asm/regs.h>
     2.7 +#include <asm/x86_emulate.h>
     2.8  #include <asm/hvm/hvm.h>
     2.9  #include <asm/hvm/support.h>
    2.10  #include <asm/hvm/io.h>
    2.11 @@ -168,13 +169,15 @@ long get_reg_value(int size, int index, 
    2.12  
    2.13  static inline unsigned char *check_prefix(unsigned char *inst,
    2.14                                            struct hvm_io_op *mmio_op,
    2.15 +                                          unsigned char *ad_size,
    2.16                                            unsigned char *op_size,
    2.17 +                                          unsigned char *seg_sel,
    2.18                                            unsigned char *rex_p)
    2.19  {
    2.20      while ( 1 ) {
    2.21          switch ( *inst ) {
    2.22              /* rex prefix for em64t instructions */
    2.23 -        case 0x40 ... 0x4e:
    2.24 +        case 0x40 ... 0x4f:
    2.25              *rex_p = *inst;
    2.26              break;
    2.27          case 0xf3: /* REPZ */
    2.28 @@ -191,12 +194,13 @@ static inline unsigned char *check_prefi
    2.29          case 0x26: /* ES */
    2.30          case 0x64: /* FS */
    2.31          case 0x65: /* GS */
    2.32 -            //mmio_op->seg_sel = *inst;
    2.33 +            *seg_sel = *inst;
    2.34              break;
    2.35          case 0x66: /* 32bit->16bit */
    2.36              *op_size = WORD;
    2.37              break;
    2.38          case 0x67:
    2.39 +            *ad_size = WORD;
    2.40              break;
    2.41          default:
    2.42              return inst;
    2.43 @@ -205,7 +209,7 @@ static inline unsigned char *check_prefi
    2.44      }
    2.45  }
    2.46  
    2.47 -static inline unsigned long get_immediate(int op16, const unsigned char *inst, int op_size)
    2.48 +static inline unsigned long get_immediate(int ad_size, const unsigned char *inst, int op_size)
    2.49  {
    2.50      int mod, reg, rm;
    2.51      unsigned long val = 0;
    2.52 @@ -216,16 +220,19 @@ static inline unsigned long get_immediat
    2.53      rm = *inst & 7;
    2.54  
    2.55      inst++; //skip ModR/M byte
    2.56 -    if ( mod != 3 && rm == 4 ) {
    2.57 +    if ( ad_size != WORD && mod != 3 && rm == 4 ) {
    2.58 +        rm = *inst & 7;
    2.59          inst++; //skip SIB byte
    2.60      }
    2.61  
    2.62      switch ( mod ) {
    2.63      case 0:
    2.64 -        if ( rm == 5 || rm == 4 ) {
    2.65 -            if ( op16 )
    2.66 +        if ( ad_size == WORD ) {
    2.67 +            if ( rm == 6 )
    2.68                  inst = inst + 2; //disp16, skip 2 bytes
    2.69 -            else
    2.70 +        }
    2.71 +        else {
    2.72 +            if ( rm == 5 )
    2.73                  inst = inst + 4; //disp32, skip 4 bytes
    2.74          }
    2.75          break;
    2.76 @@ -233,7 +240,7 @@ static inline unsigned long get_immediat
    2.77          inst++; //disp8, skip 1 byte
    2.78          break;
    2.79      case 2:
    2.80 -        if ( op16 )
    2.81 +        if ( ad_size == WORD )
    2.82              inst = inst + 2; //disp16, skip 2 bytes
    2.83          else
    2.84              inst = inst + 4; //disp32, skip 4 bytes
    2.85 @@ -276,7 +283,6 @@ static void init_instruction(struct hvm_
    2.86      mmio_op->instr = 0;
    2.87  
    2.88      mmio_op->flags = 0;
    2.89 -    //mmio_op->seg_sel = 0;
    2.90  
    2.91      mmio_op->operand[0] = 0;
    2.92      mmio_op->operand[1] = 0;
    2.93 @@ -346,25 +352,52 @@ static int reg_mem(unsigned char size, u
    2.94      return DECODE_success;
    2.95  }
    2.96  
    2.97 -static int mmio_decode(int realmode, unsigned char *opcode,
    2.98 -                      struct hvm_io_op *mmio_op, unsigned char *op_size)
    2.99 +static int mmio_decode(int mode, unsigned char *opcode,
   2.100 +                       struct hvm_io_op *mmio_op,
   2.101 +                       unsigned char *ad_size, unsigned char *op_size,
   2.102 +                       unsigned char *seg_sel)
   2.103  {
   2.104      unsigned char size_reg = 0;
   2.105      unsigned char rex = 0;
   2.106      int index;
   2.107  
   2.108 +    *ad_size = 0;
   2.109      *op_size = 0;
   2.110 +    *seg_sel = 0;
   2.111      init_instruction(mmio_op);
   2.112  
   2.113 -    opcode = check_prefix(opcode, mmio_op, op_size, &rex);
   2.114 +    opcode = check_prefix(opcode, mmio_op, ad_size, op_size, seg_sel, &rex);
   2.115  
   2.116 -    if ( realmode ) { /* meaning is reversed */
   2.117 +    switch ( mode ) {
   2.118 +    case X86EMUL_MODE_REAL: /* meaning is reversed */
   2.119 +    case X86EMUL_MODE_PROT16:
   2.120          if ( *op_size == WORD )
   2.121              *op_size = LONG;
   2.122          else if ( *op_size == LONG )
   2.123              *op_size = WORD;
   2.124          else if ( *op_size == 0 )
   2.125              *op_size = WORD;
   2.126 +        if ( *ad_size == WORD )
   2.127 +            *ad_size = LONG;
   2.128 +        else if ( *ad_size == LONG )
   2.129 +            *ad_size = WORD;
   2.130 +        else if ( *ad_size == 0 )
   2.131 +            *ad_size = WORD;
   2.132 +        break;
   2.133 +    case X86EMUL_MODE_PROT32:
   2.134 +        if ( *op_size == 0 )
   2.135 +            *op_size = LONG;
   2.136 +        if ( *ad_size == 0 )
   2.137 +            *ad_size = LONG;
   2.138 +        break;
   2.139 +#ifdef __x86_64__
   2.140 +    case X86EMUL_MODE_PROT64:
   2.141 +        if ( *op_size == 0 )
   2.142 +            *op_size = rex & 0x8 ? QUAD : LONG;
   2.143 +        if ( *ad_size == 0 )
   2.144 +            *ad_size = QUAD;
   2.145 +        break;
   2.146 +#endif
   2.147      }
   2.148  
   2.149      /* the operands order in comments conforms to AT&T convention */
   2.150 @@ -471,10 +504,10 @@ static int mmio_decode(int realmode, uns
   2.151          /* opcode 0x83 always has a single byte operand */
   2.152          if ( opcode[0] == 0x83 )
   2.153              mmio_op->immediate =
   2.154 -                (signed char)get_immediate(realmode, opcode + 1, BYTE);
   2.155 +                (signed char)get_immediate(*ad_size, opcode + 1, BYTE);
   2.156          else
   2.157              mmio_op->immediate =
   2.158 -                get_immediate(realmode, opcode + 1, *op_size);
   2.159 +                get_immediate(*ad_size, opcode + 1, *op_size);
   2.160  
   2.161          mmio_op->operand[0] = mk_operand(size_reg, 0, 0, IMMEDIATE);
   2.162          mmio_op->operand[1] = mk_operand(size_reg, 0, 0, MEMORY);
   2.163 @@ -598,7 +631,7 @@ static int mmio_decode(int realmode, uns
   2.164  
   2.165              mmio_op->operand[0] = mk_operand(*op_size, 0, 0, IMMEDIATE);
   2.166              mmio_op->immediate  =
   2.167 -                    get_immediate(realmode, opcode + 1, *op_size);
   2.168 +                    get_immediate(*ad_size, opcode + 1, *op_size);
   2.169              mmio_op->operand[1] = mk_operand(*op_size, 0, 0, MEMORY);
   2.170  
   2.171              return DECODE_success;
   2.172 @@ -612,7 +645,7 @@ static int mmio_decode(int realmode, uns
   2.173  
   2.174              mmio_op->operand[0] = mk_operand(*op_size, 0, 0, IMMEDIATE);
   2.175              mmio_op->immediate =
   2.176 -                    get_immediate(realmode, opcode + 1, *op_size);
   2.177 +                    get_immediate(*ad_size, opcode + 1, *op_size);
   2.178              mmio_op->operand[1] = mk_operand(*op_size, 0, 0, MEMORY);
   2.179  
   2.180              return DECODE_success;
   2.181 @@ -634,7 +667,7 @@ static int mmio_decode(int realmode, uns
   2.182  
   2.183              mmio_op->operand[0] = mk_operand(size_reg, 0, 0, IMMEDIATE);
   2.184              mmio_op->immediate =
   2.185 -                    get_immediate(realmode, opcode + 1, *op_size);
   2.186 +                    get_immediate(*ad_size, opcode + 1, *op_size);
   2.187              mmio_op->operand[1] = mk_operand(size_reg, 0, 0, MEMORY);
   2.188  
   2.189              return DECODE_success;
   2.190 @@ -697,7 +730,7 @@ static int mmio_decode(int realmode, uns
   2.191              GET_OP_SIZE_FOR_NONEBYTE(*op_size);
   2.192              mmio_op->operand[0] = mk_operand(BYTE, 0, 0, IMMEDIATE);
   2.193              mmio_op->immediate =
   2.194 -                    (signed char)get_immediate(realmode, opcode + 1, BYTE);
   2.195 +                    (signed char)get_immediate(*ad_size, opcode + 1, BYTE);
   2.196              mmio_op->operand[1] = mk_operand(*op_size, 0, 0, MEMORY);
   2.197              return DECODE_success;
   2.198          }
   2.199 @@ -866,15 +899,15 @@ static void mmio_operands(int type, unsi
   2.200  }
   2.201  
   2.202  #define GET_REPEAT_COUNT() \
   2.203 -     (mmio_op->flags & REPZ ? (realmode ? regs->ecx & 0xFFFF : regs->ecx) : 1)
   2.204 +     (mmio_op->flags & REPZ ? (ad_size == WORD ? regs->ecx & 0xFFFF : regs->ecx) : 1)
   2.205  
   2.206  void handle_mmio(unsigned long gpa)
   2.207  {
   2.208      unsigned long inst_addr;
   2.209      struct hvm_io_op *mmio_op;
   2.210      struct cpu_user_regs *regs;
   2.211 -    unsigned char inst[MAX_INST_LEN], op_size;
   2.212 -    int i, realmode, df, inst_len;
   2.213 +    unsigned char inst[MAX_INST_LEN], ad_size, op_size, seg_sel;
   2.214 +    int i, mode, df, inst_len;
   2.215      struct vcpu *v = current;
   2.216  
   2.217      mmio_op = &v->arch.hvm_vcpu.io_op;
   2.218 @@ -886,27 +919,23 @@ void handle_mmio(unsigned long gpa)
   2.219  
   2.220      df = regs->eflags & X86_EFLAGS_DF ? 1 : 0;
   2.221  
   2.222 -    inst_len = hvm_instruction_length(regs, hvm_guest_x86_mode(v));
   2.223 +    mode = hvm_guest_x86_mode(v);
   2.224 +    inst_addr = hvm_get_segment_base(v, seg_cs) + regs->eip;
   2.225 +    inst_len = hvm_instruction_length(inst_addr, mode);
   2.226      if ( inst_len <= 0 )
   2.227      {
   2.228          printk("handle_mmio: failed to get instruction length\n");
   2.229          domain_crash_synchronous();
   2.230      }
   2.231  
   2.232 -    realmode = hvm_realmode(v);
   2.233 -    if ( realmode )
   2.234 -        inst_addr = regs->cs << 4;
   2.235 -    else
   2.236 -        inst_addr = hvm_get_segment_base(current, seg_cs);
   2.237 -    inst_addr += regs->eip;
   2.238 -
   2.239      memset(inst, 0, MAX_INST_LEN);
   2.240      if ( inst_copy_from_guest(inst, inst_addr, inst_len) != inst_len ) {
   2.241          printk("handle_mmio: failed to copy instruction\n");
   2.242          domain_crash_synchronous();
   2.243      }
   2.244  
   2.245 -    if ( mmio_decode(realmode, inst, mmio_op, &op_size) == DECODE_failure ) {
   2.246 +    if ( mmio_decode(mode, inst, mmio_op, &ad_size, &op_size, &seg_sel)
   2.247 +         == DECODE_failure ) {
   2.248          printk("handle_mmio: failed to decode instruction\n");
   2.249          printk("mmio opcode: gpa 0x%lx, len %d:", gpa, inst_len);
   2.250          for ( i = 0; i < inst_len; i++ )
   2.251 @@ -926,29 +955,39 @@ void handle_mmio(unsigned long gpa)
   2.252      {
   2.253          unsigned long count = GET_REPEAT_COUNT();
   2.254          int sign = regs->eflags & X86_EFLAGS_DF ? -1 : 1;
   2.255 -        unsigned long addr = 0;
   2.256 +        unsigned long addr;
   2.257          int dir, size = op_size;
   2.258  
   2.259          ASSERT(count);
   2.260  
   2.261          /* determine non-MMIO address */
   2.262 -        if ( realmode ) {
   2.263 -            if ( ((regs->es << 4) + (regs->edi & 0xFFFF)) == gpa ) {
   2.264 -                dir = IOREQ_WRITE;
   2.265 -                addr = (regs->ds << 4) + (regs->esi & 0xFFFF);
   2.266 -            } else {
   2.267 -                dir = IOREQ_READ;
   2.268 -                addr = (regs->es << 4) + (regs->edi & 0xFFFF);
   2.269 +        addr = regs->edi;
   2.270 +        if ( ad_size == WORD )
   2.271 +            addr &= 0xFFFF;
   2.272 +        addr += hvm_get_segment_base(v, seg_es);
   2.273 +        if ( addr == gpa )
   2.274 +        {
   2.275 +            enum segment seg;
   2.276 +
   2.277 +            dir = IOREQ_WRITE;
   2.278 +            addr = regs->esi;
   2.279 +            if ( ad_size == WORD )
   2.280 +                addr &= 0xFFFF;
   2.281 +            switch ( seg_sel )
   2.282 +            {
   2.283 +            case 0x26: seg = seg_es; break;
   2.284 +            case 0x2e: seg = seg_cs; break;
   2.285 +            case 0x36: seg = seg_ss; break;
   2.286 +            case 0:
   2.287 +            case 0x3e: seg = seg_ds; break;
   2.288 +            case 0x64: seg = seg_fs; break;
   2.289 +            case 0x65: seg = seg_gs; break;
   2.290 +            default: domain_crash_synchronous();
   2.291              }
   2.292 -        } else {
   2.293 -            if ( gpa == regs->edi ) {
   2.294 -                dir = IOREQ_WRITE;
   2.295 -                addr = regs->esi;
   2.296 -            } else {
   2.297 -                dir = IOREQ_READ;
   2.298 -                addr = regs->edi;
   2.299 -            }
   2.300 +            addr += hvm_get_segment_base(v, seg);
   2.301          }
   2.302 +        else
   2.303 +            dir = IOREQ_READ;
   2.304  
   2.305          if ( addr & (size - 1) )
   2.306              gdprintk(XENLOG_WARNING,
     3.1 --- a/xen/arch/x86/hvm/svm/svm.c	Tue Nov 28 11:43:39 2006 +0000
     3.2 +++ b/xen/arch/x86/hvm/svm/svm.c	Tue Nov 28 11:45:54 2006 +0000
     3.3 @@ -511,18 +511,24 @@ unsigned long svm_get_ctrl_reg(struct vc
     3.4  
     3.5  static unsigned long svm_get_segment_base(struct vcpu *v, enum segment seg)
     3.6  {
     3.7 +    struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
     3.8 +    int long_mode = 0;
     3.9 +
    3.10 +#ifdef __x86_64__
    3.11 +    long_mode = vmcb->cs.attributes.fields.l && (vmcb->efer & EFER_LMA);
    3.12 +#endif
    3.13      switch ( seg )
    3.14      {
    3.15 -    case seg_cs: return v->arch.hvm_svm.vmcb->cs.base;
    3.16 -    case seg_ds: return v->arch.hvm_svm.vmcb->ds.base;
    3.17 -    case seg_es: return v->arch.hvm_svm.vmcb->es.base;
    3.18 -    case seg_fs: return v->arch.hvm_svm.vmcb->fs.base;
    3.19 -    case seg_gs: return v->arch.hvm_svm.vmcb->gs.base;
    3.20 -    case seg_ss: return v->arch.hvm_svm.vmcb->ss.base;
    3.21 -    case seg_tr: return v->arch.hvm_svm.vmcb->tr.base;
    3.22 -    case seg_gdtr: return v->arch.hvm_svm.vmcb->gdtr.base;
    3.23 -    case seg_idtr: return v->arch.hvm_svm.vmcb->idtr.base;
    3.24 -    case seg_ldtr: return v->arch.hvm_svm.vmcb->ldtr.base;
    3.25 +    case seg_cs: return long_mode ? 0 : vmcb->cs.base;
    3.26 +    case seg_ds: return long_mode ? 0 : vmcb->ds.base;
    3.27 +    case seg_es: return long_mode ? 0 : vmcb->es.base;
    3.28 +    case seg_fs: return vmcb->fs.base;
    3.29 +    case seg_gs: return vmcb->gs.base;
    3.30 +    case seg_ss: return long_mode ? 0 : vmcb->ss.base;
    3.31 +    case seg_tr: return vmcb->tr.base;
    3.32 +    case seg_gdtr: return vmcb->gdtr.base;
    3.33 +    case seg_idtr: return vmcb->idtr.base;
    3.34 +    case seg_ldtr: return vmcb->ldtr.base;
    3.35      }
    3.36      BUG();
    3.37      return 0;
    3.38 @@ -832,7 +838,6 @@ int start_svm(void)
    3.39      hvm_funcs.store_cpu_guest_regs = svm_store_cpu_guest_regs;
    3.40      hvm_funcs.load_cpu_guest_regs = svm_load_cpu_guest_regs;
    3.41  
    3.42 -    hvm_funcs.realmode = svm_realmode;
    3.43      hvm_funcs.paging_enabled = svm_paging_enabled;
    3.44      hvm_funcs.long_mode_enabled = svm_long_mode_enabled;
    3.45      hvm_funcs.pae_enabled = svm_pae_enabled;
    3.46 @@ -925,7 +930,7 @@ static void svm_do_general_protection_fa
    3.47          printk("Huh? We got a GP Fault with an invalid IDTR!\n");
    3.48          svm_dump_vmcb(__func__, vmcb);
    3.49          svm_dump_regs(__func__, regs);
    3.50 -        svm_dump_inst(vmcb->rip);
    3.51 +        svm_dump_inst(svm_rip2pointer(vmcb));
    3.52          domain_crash(v->domain);
    3.53          return;
    3.54      }
    3.55 @@ -1223,22 +1228,21 @@ static void svm_get_prefix_info(
    3.56  
    3.57  /* Get the address of INS/OUTS instruction */
    3.58  static inline int svm_get_io_address(
    3.59 -    struct vcpu *v, 
    3.60 -    struct cpu_user_regs *regs, unsigned int dir, 
    3.61 +    struct vcpu *v, struct cpu_user_regs *regs,
    3.62 +    unsigned int size, ioio_info_t info,
    3.63      unsigned long *count, unsigned long *addr)
    3.64  {
    3.65      unsigned long        reg;
    3.66      unsigned int         asize = 0;
    3.67      unsigned int         isize;
    3.68 -    int                  long_mode;
    3.69 -    ioio_info_t          info;
    3.70 +    int                  long_mode = 0;
    3.71      segment_selector_t  *seg = NULL;
    3.72      struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
    3.73  
    3.74 -    info.bytes = vmcb->exitinfo1;
    3.75 -
    3.76 +#ifdef __x86_64__
    3.77      /* If we're in long mode, we shouldn't check the segment presence & limit */
    3.78      long_mode = vmcb->cs.attributes.fields.l && vmcb->efer & EFER_LMA;
    3.79 +#endif
    3.80  
    3.81      /* d field of cs.attributes is 1 for 32-bit, 0 for 16 or 64 bit. 
    3.82       * l field combined with EFER_LMA -> longmode says whether it's 16 or 64 bit. 
    3.83 @@ -1256,11 +1260,9 @@ static inline int svm_get_io_address(
    3.84          isize --;
    3.85  
    3.86      if (isize > 1) 
    3.87 -        svm_get_prefix_info(vmcb, dir, &seg, &asize);
    3.88 -
    3.89 -    ASSERT(dir == IOREQ_READ || dir == IOREQ_WRITE);
    3.90 -
    3.91 -    if (dir == IOREQ_WRITE)
    3.92 +        svm_get_prefix_info(vmcb, info.fields.type, &seg, &asize);
    3.93 +
    3.94 +    if (info.fields.type == IOREQ_WRITE)
    3.95      {
    3.96          reg = regs->esi;
    3.97          if (!seg)               /* If no prefix, used DS. */
    3.98 @@ -1289,6 +1291,8 @@ static inline int svm_get_io_address(
    3.99          *addr = reg;
   3.100          *count = regs->ecx;
   3.101      }
   3.102 +    if (!info.fields.rep)
   3.103 +        *count = 1;
   3.104  
   3.105      if (!long_mode) {
   3.106          if (*addr > seg->limit) 
   3.107 @@ -1301,7 +1305,8 @@ static inline int svm_get_io_address(
   3.108              *addr += seg->base;
   3.109          }
   3.110      }
   3.111 -    
   3.112 +    else if (seg == &vmcb->fs || seg == &vmcb->gs)
   3.113 +        *addr += seg->base;
   3.114  
   3.115      return 1;
   3.116  }
   3.117 @@ -1351,7 +1356,7 @@ static void svm_io_instruction(struct vc
   3.118          unsigned long addr, count;
   3.119          int sign = regs->eflags & X86_EFLAGS_DF ? -1 : 1;
   3.120  
   3.121 -        if (!svm_get_io_address(v, regs, dir, &count, &addr)) 
   3.122 +        if (!svm_get_io_address(v, regs, size, info, &count, &addr))
   3.123          {
   3.124              /* We failed to get a valid address, so don't do the IO operation -
   3.125               * it would just get worse if we do! Hopefully the guest is handing
   3.126 @@ -1365,10 +1370,6 @@ static void svm_io_instruction(struct vc
   3.127          {
   3.128              pio_opp->flags |= REPZ;
   3.129          }
   3.130 -        else 
   3.131 -        {
   3.132 -            count = 1;
   3.133 -        }
   3.134  
   3.135          /*
   3.136           * Handle string pio instructions that cross pages or that
     4.1 --- a/xen/arch/x86/hvm/vmx/vmx.c	Tue Nov 28 11:43:39 2006 +0000
     4.2 +++ b/xen/arch/x86/hvm/vmx/vmx.c	Tue Nov 28 11:45:54 2006 +0000
     4.3 @@ -503,23 +503,32 @@ static unsigned long vmx_get_ctrl_reg(st
     4.4  
     4.5  static unsigned long vmx_get_segment_base(struct vcpu *v, enum segment seg)
     4.6  {
     4.7 -    unsigned long base;
     4.8 +    unsigned long base = 0;
     4.9 +    int long_mode = 0;
    4.10  
    4.11 -    BUG_ON(v != current);
    4.12 +    ASSERT(v == current);
    4.13 +
    4.14 +#ifdef __x86_64__
    4.15 +    if ( vmx_long_mode_enabled(v) &&
    4.16 +         (__vmread(GUEST_CS_AR_BYTES) & (1u<<13)) )
    4.17 +        long_mode = 1;
    4.18 +#endif
    4.19 +
    4.20      switch ( seg )
    4.21      {
    4.22 -    case seg_cs: base = __vmread(GUEST_CS_BASE); break;
    4.23 -    case seg_ds: base = __vmread(GUEST_DS_BASE); break;
    4.24 -    case seg_es: base = __vmread(GUEST_ES_BASE); break;
    4.25 +    case seg_cs: if ( !long_mode ) base = __vmread(GUEST_CS_BASE); break;
    4.26 +    case seg_ds: if ( !long_mode ) base = __vmread(GUEST_DS_BASE); break;
    4.27 +    case seg_es: if ( !long_mode ) base = __vmread(GUEST_ES_BASE); break;
    4.28      case seg_fs: base = __vmread(GUEST_FS_BASE); break;
    4.29      case seg_gs: base = __vmread(GUEST_GS_BASE); break;
    4.30 -    case seg_ss: base = __vmread(GUEST_SS_BASE); break;
    4.31 +    case seg_ss: if ( !long_mode ) base = __vmread(GUEST_SS_BASE); break;
    4.32      case seg_tr: base = __vmread(GUEST_TR_BASE); break;
    4.33      case seg_gdtr: base = __vmread(GUEST_GDTR_BASE); break;
    4.34      case seg_idtr: base = __vmread(GUEST_IDTR_BASE); break;
    4.35      case seg_ldtr: base = __vmread(GUEST_LDTR_BASE); break;
    4.36 -    default: BUG(); base = 0; break;
    4.37 +    default: BUG(); break;
    4.38      }
    4.39 +
    4.40      return base;
    4.41  }
    4.42  
    4.43 @@ -635,7 +644,6 @@ static void vmx_setup_hvm_funcs(void)
    4.44      hvm_funcs.store_cpu_guest_regs = vmx_store_cpu_guest_regs;
    4.45      hvm_funcs.load_cpu_guest_regs = vmx_load_cpu_guest_regs;
    4.46  
    4.47 -    hvm_funcs.realmode = vmx_realmode;
    4.48      hvm_funcs.paging_enabled = vmx_paging_enabled;
    4.49      hvm_funcs.long_mode_enabled = vmx_long_mode_enabled;
    4.50      hvm_funcs.pae_enabled = vmx_pae_enabled;
    4.51 @@ -949,63 +957,100 @@ static void vmx_do_invlpg(unsigned long 
    4.52  }
    4.53  
    4.54  
    4.55 -static int check_for_null_selector(unsigned long eip, int inst_len, int dir)
    4.56 +static int vmx_check_descriptor(int long_mode, unsigned long eip, int inst_len,
    4.57 +                                enum segment seg, unsigned long *base)
    4.58  {
    4.59 -    unsigned char inst[MAX_INST_LEN];
    4.60 -    unsigned long sel;
    4.61 -    int i;
    4.62 -    int inst_copy_from_guest(unsigned char *, unsigned long, int);
    4.63 +    enum vmcs_field ar_field, base_field;
    4.64 +    u32 ar_bytes;
    4.65  
    4.66 -    /* INS can only use ES segment register, and it can't be overridden */
    4.67 -    if ( dir == IOREQ_READ )
    4.68 +    *base = 0;
    4.69 +    if ( seg != seg_es )
    4.70      {
    4.71 -        sel = __vmread(GUEST_ES_SELECTOR);
    4.72 -        return sel == 0 ? 1 : 0;
    4.73 +        unsigned char inst[MAX_INST_LEN];
    4.74 +        int i;
    4.75 +        extern int inst_copy_from_guest(unsigned char *, unsigned long, int);
    4.76 +
    4.77 +        if ( !long_mode )
    4.78 +            eip += __vmread(GUEST_CS_BASE);
    4.79 +        memset(inst, 0, MAX_INST_LEN);
    4.80 +        if ( inst_copy_from_guest(inst, eip, inst_len) != inst_len )
    4.81 +        {
    4.82 +            gdprintk(XENLOG_ERR, "Get guest instruction failed\n");
    4.83 +            domain_crash(current->domain);
    4.84 +            return 0;
    4.85 +        }
    4.86 +
    4.87 +        for ( i = 0; i < inst_len; i++ )
    4.88 +        {
    4.89 +            switch ( inst[i] )
    4.90 +            {
    4.91 +            case 0xf3: /* REPZ */
    4.92 +            case 0xf2: /* REPNZ */
    4.93 +            case 0xf0: /* LOCK */
    4.94 +            case 0x66: /* data32 */
    4.95 +            case 0x67: /* addr32 */
    4.96 +#ifdef __x86_64__
    4.97 +            case 0x40 ... 0x4f: /* REX */
    4.98 +#endif
    4.99 +                continue;
   4.100 +            case 0x2e: /* CS */
   4.101 +                seg = seg_cs;
   4.102 +                continue;
   4.103 +            case 0x36: /* SS */
   4.104 +                seg = seg_ss;
   4.105 +                continue;
   4.106 +            case 0x26: /* ES */
   4.107 +                seg = seg_es;
   4.108 +                continue;
   4.109 +            case 0x64: /* FS */
   4.110 +                seg = seg_fs;
   4.111 +                continue;
   4.112 +            case 0x65: /* GS */
   4.113 +                seg = seg_gs;
   4.114 +                continue;
   4.115 +            case 0x3e: /* DS */
   4.116 +                seg = seg_ds;
   4.117 +                continue;
   4.118 +            }
   4.119 +        }
   4.120      }
   4.121  
   4.122 -    memset(inst, 0, MAX_INST_LEN);
   4.123 -    if ( inst_copy_from_guest(inst, eip, inst_len) != inst_len )
   4.124 +    switch ( seg )
   4.125      {
   4.126 -        gdprintk(XENLOG_ERR, "Get guest instruction failed\n");
   4.127 -        domain_crash(current->domain);
   4.128 +    case seg_cs:
   4.129 +        ar_field = GUEST_CS_AR_BYTES;
   4.130 +        base_field = GUEST_CS_BASE;
   4.131 +        break;
   4.132 +    case seg_ds:
   4.133 +        ar_field = GUEST_DS_AR_BYTES;
   4.134 +        base_field = GUEST_DS_BASE;
   4.135 +        break;
   4.136 +    case seg_es:
   4.137 +        ar_field = GUEST_ES_AR_BYTES;
   4.138 +        base_field = GUEST_ES_BASE;
   4.139 +        break;
   4.140 +    case seg_fs:
   4.141 +        ar_field = GUEST_FS_AR_BYTES;
   4.142 +        base_field = GUEST_FS_BASE;
   4.143 +        break;
   4.144 +    case seg_gs:
   4.145 +        ar_field = GUEST_FS_AR_BYTES;
   4.146 +        base_field = GUEST_FS_BASE;
   4.147 +        break;
   4.148 +    case seg_ss:
   4.149 +        ar_field = GUEST_GS_AR_BYTES;
   4.150 +        base_field = GUEST_GS_BASE;
   4.151 +        break;
   4.152 +    default:
   4.153 +        BUG();
   4.154          return 0;
   4.155      }
   4.156  
   4.157 -    for ( i = 0; i < inst_len; i++ )
   4.158 -    {
   4.159 -        switch ( inst[i] )
   4.160 -        {
   4.161 -        case 0xf3: /* REPZ */
   4.162 -        case 0xf2: /* REPNZ */
   4.163 -        case 0xf0: /* LOCK */
   4.164 -        case 0x66: /* data32 */
   4.165 -        case 0x67: /* addr32 */
   4.166 -            continue;
   4.167 -        case 0x2e: /* CS */
   4.168 -            sel = __vmread(GUEST_CS_SELECTOR);
   4.169 -            break;
   4.170 -        case 0x36: /* SS */
   4.171 -            sel = __vmread(GUEST_SS_SELECTOR);
   4.172 -            break;
   4.173 -        case 0x26: /* ES */
   4.174 -            sel = __vmread(GUEST_ES_SELECTOR);
   4.175 -            break;
   4.176 -        case 0x64: /* FS */
   4.177 -            sel = __vmread(GUEST_FS_SELECTOR);
   4.178 -            break;
   4.179 -        case 0x65: /* GS */
   4.180 -            sel = __vmread(GUEST_GS_SELECTOR);
   4.181 -            break;
   4.182 -        case 0x3e: /* DS */
   4.183 -            /* FALLTHROUGH */
   4.184 -        default:
   4.185 -            /* DS is the default */
   4.186 -            sel = __vmread(GUEST_DS_SELECTOR);
   4.187 -        }
   4.188 -        return sel == 0 ? 1 : 0;
   4.189 -    }
   4.190 +    if ( !long_mode || seg == seg_fs || seg == seg_gs )
   4.191 +        *base = __vmread(base_field);
   4.192 +    ar_bytes = __vmread(ar_field);
   4.193  
   4.194 -    return 0;
   4.195 +    return !(ar_bytes & 0x10000);
   4.196  }
   4.197  
   4.198  static void vmx_io_instruction(unsigned long exit_qualification,
   4.199 @@ -1013,7 +1058,7 @@ static void vmx_io_instruction(unsigned 
   4.200  {
   4.201      struct cpu_user_regs *regs;
   4.202      struct hvm_io_op *pio_opp;
   4.203 -    unsigned long port, size;
   4.204 +    unsigned int port, size;
   4.205      int dir, df, vm86;
   4.206  
   4.207      pio_opp = &current->arch.hvm_vcpu.io_op;
   4.208 @@ -1044,21 +1089,32 @@ static void vmx_io_instruction(unsigned 
   4.209      dir = test_bit(3, &exit_qualification); /* direction */
   4.210  
   4.211      if ( test_bit(4, &exit_qualification) ) { /* string instruction */
   4.212 -        unsigned long addr, count = 1;
   4.213 +        unsigned long addr, count = 1, base;
   4.214 +        u32 ar_bytes;
   4.215          int sign = regs->eflags & X86_EFLAGS_DF ? -1 : 1;
   4.216 +        int long_mode = 0;
   4.217  
   4.218 +        ar_bytes = __vmread(GUEST_CS_AR_BYTES);
   4.219 +#ifdef __x86_64__
   4.220 +        if ( vmx_long_mode_enabled(current) && (ar_bytes & (1u<<13)) )
   4.221 +            long_mode = 1;
   4.222 +#endif
   4.223          addr = __vmread(GUEST_LINEAR_ADDRESS);
   4.224  
   4.225          /*
   4.226           * In protected mode, guest linear address is invalid if the
   4.227           * selector is null.
   4.228           */
   4.229 -        if ( !vm86 && check_for_null_selector(regs->eip, inst_len, dir) )
   4.230 -            addr = dir == IOREQ_WRITE ? regs->esi : regs->edi;
   4.231 +        if ( !vmx_check_descriptor(long_mode, regs->eip, inst_len,
   4.232 +                                   dir == IOREQ_WRITE ? seg_ds : seg_es,
   4.233 +                                   &base) )
   4.234 +            addr = dir == IOREQ_WRITE ? base + regs->esi : regs->edi;
   4.235  
   4.236          if ( test_bit(5, &exit_qualification) ) { /* "rep" prefix */
   4.237              pio_opp->flags |= REPZ;
   4.238 -            count = vm86 ? regs->ecx & 0xFFFF : regs->ecx;
   4.239 +            count = regs->ecx;
   4.240 +            if ( !long_mode && (vm86 || !(ar_bytes & (1u<<14))) )
   4.241 +                count &= 0xFFFF;
   4.242          }
   4.243  
   4.244          /*
     5.1 --- a/xen/include/asm-x86/hvm/hvm.h	Tue Nov 28 11:43:39 2006 +0000
     5.2 +++ b/xen/include/asm-x86/hvm/hvm.h	Tue Nov 28 11:45:54 2006 +0000
     5.3 @@ -62,12 +62,13 @@ struct hvm_function_table {
     5.4          struct vcpu *v, struct cpu_user_regs *r);
     5.5      /*
     5.6       * Examine specifics of the guest state:
     5.7 -     * 1) determine whether the guest is in real or vm8086 mode,
     5.8 -     * 2) determine whether paging is enabled,
     5.9 -     * 3) return the current guest control-register value
    5.10 -     * 4) return the current guest segment descriptor base
    5.11 +     * 1) determine whether paging is enabled,
    5.12 +     * 2) determine whether long mode is enabled,
    5.13 +     * 3) determine whether PAE paging is enabled,
    5.14 +     * 4) determine the mode the guest is running in,
    5.15 +     * 5) return the current guest control-register value
    5.16 +     * 6) return the current guest segment descriptor base
    5.17       */
    5.18 -    int (*realmode)(struct vcpu *v);
    5.19      int (*paging_enabled)(struct vcpu *v);
    5.20      int (*long_mode_enabled)(struct vcpu *v);
    5.21      int (*pae_enabled)(struct vcpu *v);
    5.22 @@ -128,12 +129,6 @@ hvm_load_cpu_guest_regs(struct vcpu *v, 
    5.23  }
    5.24  
    5.25  static inline int
    5.26 -hvm_realmode(struct vcpu *v)
    5.27 -{
    5.28 -    return hvm_funcs.realmode(v);
    5.29 -}
    5.30 -
    5.31 -static inline int
    5.32  hvm_paging_enabled(struct vcpu *v)
    5.33  {
    5.34      return hvm_funcs.paging_enabled(v);
    5.35 @@ -157,7 +152,7 @@ hvm_guest_x86_mode(struct vcpu *v)
    5.36      return hvm_funcs.guest_x86_mode(v);
    5.37  }
    5.38  
    5.39 -int hvm_instruction_length(struct cpu_user_regs *regs, int mode);
    5.40 +int hvm_instruction_length(unsigned long pc, int mode);
    5.41  
    5.42  static inline void
    5.43  hvm_update_host_cr3(struct vcpu *v)