ia64/xen-unstable

changeset 17100:09b53f27a18b

hvm: Remove lots of custom trap-and-emulate code and defer to
handle_mmio()->hvm_emulate_one()->x86_emulate().
Signed-off-by: Keir Fraser <keir.fraser@citrix.com>
author Keir Fraser <keir.fraser@citrix.com>
date Thu Feb 21 18:02:42 2008 +0000 (2008-02-21)
parents 591cfd37bd54
children b21b434b3b1a
files xen/arch/x86/hvm/emulate.c xen/arch/x86/hvm/svm/emulate.c xen/arch/x86/hvm/svm/svm.c xen/arch/x86/hvm/vmx/vmx.c xen/arch/x86/x86_emulate.c xen/include/asm-x86/hvm/hvm.h xen/include/asm-x86/hvm/svm/emulate.h xen/include/asm-x86/x86_emulate.h
line diff
     1.1 --- a/xen/arch/x86/hvm/emulate.c	Thu Feb 21 15:06:37 2008 +0000
     1.2 +++ b/xen/arch/x86/hvm/emulate.c	Thu Feb 21 18:02:42 2008 +0000
     1.3 @@ -666,6 +666,25 @@ static void hvmemul_load_fpu_ctxt(
     1.4          hvm_funcs.fpu_dirty_intercept();
     1.5  }
     1.6  
     1.7 +static int hvmemul_invlpg(
     1.8 +    enum x86_segment seg,
     1.9 +    unsigned long offset,
    1.10 +    struct x86_emulate_ctxt *ctxt)
    1.11 +{
    1.12 +    struct hvm_emulate_ctxt *hvmemul_ctxt =
    1.13 +        container_of(ctxt, struct hvm_emulate_ctxt, ctxt);
    1.14 +    unsigned long addr;
    1.15 +    int rc;
    1.16 +
    1.17 +    rc = hvmemul_virtual_to_linear(
    1.18 +        seg, offset, 1, hvm_access_none, hvmemul_ctxt, &addr);
    1.19 +
    1.20 +    if ( rc == X86EMUL_OKAY )
    1.21 +        hvm_funcs.invlpg_intercept(addr);
    1.22 +
    1.23 +    return rc;
    1.24 +}
    1.25 +
    1.26  static struct x86_emulate_ops hvm_emulate_ops = {
    1.27      .read          = hvmemul_read,
    1.28      .insn_fetch    = hvmemul_insn_fetch,
    1.29 @@ -688,7 +707,8 @@ static struct x86_emulate_ops hvm_emulat
    1.30      .hlt           = hvmemul_hlt,
    1.31      .inject_hw_exception = hvmemul_inject_hw_exception,
    1.32      .inject_sw_interrupt = hvmemul_inject_sw_interrupt,
    1.33 -    .load_fpu_ctxt = hvmemul_load_fpu_ctxt
    1.34 +    .load_fpu_ctxt = hvmemul_load_fpu_ctxt,
    1.35 +    .invlpg        = hvmemul_invlpg
    1.36  };
    1.37  
    1.38  int hvm_emulate_one(
     2.1 --- a/xen/arch/x86/hvm/svm/emulate.c	Thu Feb 21 15:06:37 2008 +0000
     2.2 +++ b/xen/arch/x86/hvm/svm/emulate.c	Thu Feb 21 18:02:42 2008 +0000
     2.3 @@ -14,7 +14,6 @@
     2.4   * You should have received a copy of the GNU General Public License along with
     2.5   * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
     2.6   * Place - Suite 330, Boston, MA 02111-1307 USA.
     2.7 - *
     2.8   */
     2.9  
    2.10  #include <xen/config.h>
    2.11 @@ -28,350 +27,55 @@
    2.12  #include <asm/hvm/svm/vmcb.h>
    2.13  #include <asm/hvm/svm/emulate.h>
    2.14  
    2.15 -
    2.16 -extern int inst_copy_from_guest(unsigned char *buf, unsigned long guest_eip,
    2.17 -        int inst_len);
    2.18 -
    2.19 -#define REX_PREFIX_BASE 0x40
    2.20 -#define REX_X           0x02
    2.21 -#define REX_W           0x08
    2.22 -#define REX_R           0x04
    2.23 -#define REX_B           0x01
    2.24 -
    2.25 -#define IS_REX_PREFIX(prefix) ((prefix & 0xf0) == REX_PREFIX_BASE)
    2.26 -
    2.27 -#define DECODE_MODRM_MOD(modrm) ((modrm & 0xC0) >> 6)
    2.28 +int inst_copy_from_guest(
    2.29 +    unsigned char *buf, unsigned long guest_eip, int inst_len);
    2.30  
    2.31 -#define DECODE_MODRM_REG(prefix, modrm)                             \
    2.32 -    ((prefix & REX_R) && IS_REX_PREFIX(prefix))                     \
    2.33 -        ? (0x08 | ((modrm >> 3) & 0x07)) : ((modrm >> 3) & 0x07)
    2.34 -
    2.35 -#define DECODE_MODRM_RM(prefix, modrm)                              \
    2.36 -    ((prefix & REX_B) && IS_REX_PREFIX(prefix))                     \
    2.37 -        ? (0x08 | (modrm & 0x07)) : (modrm & 0x07)
    2.38 -
    2.39 -#define DECODE_SIB_SCALE(sib) DECODE_MODRM_MOD(sib)
    2.40 -
    2.41 -#define DECODE_SIB_INDEX(prefix, sib)                               \
    2.42 -    ((prefix & REX_X) && IS_REX_PREFIX(prefix))                     \
    2.43 -        ? (0x08 | ((sib >> 3) & 0x07)) : ((sib >> 3) & 0x07)
    2.44 -
    2.45 -#define DECODE_SIB_BASE(prefix, sib) DECODE_MODRM_RM(prefix, sib)
    2.46 -
    2.47 -
    2.48 -static inline unsigned long DECODE_GPR_VALUE(
    2.49 -    struct cpu_user_regs *regs, u8 gpr_rm)
    2.50 +static unsigned int is_prefix(u8 opc)
    2.51  {
    2.52 -    unsigned long value;
    2.53 -    switch (gpr_rm) 
    2.54 -    { 
    2.55 -    case 0x0: 
    2.56 -        value = regs->eax;
    2.57 -        break;
    2.58 -    case 0x1:
    2.59 -        value = regs->ecx;
    2.60 -        break;
    2.61 -    case 0x2:
    2.62 -        value = regs->edx;
    2.63 -        break;
    2.64 -    case 0x3:
    2.65 -        value = regs->ebx;
    2.66 -        break;
    2.67 -    case 0x4:
    2.68 -        value = regs->esp;
    2.69 -    case 0x5:
    2.70 -        value = regs->ebp;
    2.71 -        break;
    2.72 -    case 0x6:
    2.73 -        value = regs->esi;
    2.74 -        break;
    2.75 -    case 0x7:
    2.76 -        value = regs->edi;
    2.77 -        break;
    2.78 +    switch ( opc )
    2.79 +    {
    2.80 +    case 0x66:
    2.81 +    case 0x67:
    2.82 +    case 0x2E:
    2.83 +    case 0x3E:
    2.84 +    case 0x26:
    2.85 +    case 0x64:
    2.86 +    case 0x65:
    2.87 +    case 0x36:
    2.88 +    case 0xF0:
    2.89 +    case 0xF3:
    2.90 +    case 0xF2:
    2.91  #if __x86_64__
    2.92 -    case 0x8:
    2.93 -        value = regs->r8;
    2.94 -        break;
    2.95 -    case 0x9:
    2.96 -        value = regs->r9;
    2.97 -        break;
    2.98 -    case 0xA:
    2.99 -        value = regs->r10;
   2.100 -        break;
   2.101 -    case 0xB:
   2.102 -        value = regs->r11;
   2.103 -        break;
   2.104 -    case 0xC:
   2.105 -        value = regs->r12;
   2.106 -        break;
   2.107 -    case 0xD:
   2.108 -        value = regs->r13;
   2.109 -        break;
   2.110 -    case 0xE:
   2.111 -        value = regs->r14;
   2.112 -        break;
   2.113 -    case 0xF:
   2.114 -        value = regs->r15;
   2.115 -        break;
   2.116 -#endif
   2.117 -    default:
   2.118 -        printk("Invlaid gpr_rm = %d\n", gpr_rm);
   2.119 -        ASSERT(0);
   2.120 -        value = (unsigned long)-1; /* error retrun */
   2.121 +    case 0x40 ... 0x4f:
   2.122 +#endif /* __x86_64__ */
   2.123 +        return 1;
   2.124      }
   2.125 -    return value;
   2.126 +    return 0;
   2.127  }
   2.128  
   2.129 -
   2.130 -#define CHECK_LENGTH64(num) \
   2.131 -    if (num > length) \
   2.132 -    { \
   2.133 -        *size = 0; \
   2.134 -        return (unsigned long) -1; \
   2.135 -    }
   2.136 -
   2.137 -#define modrm operand [0]
   2.138 -
   2.139 -#define sib operand [1]
   2.140 -
   2.141 -
   2.142 -unsigned long get_effective_addr_modrm64(struct cpu_user_regs *regs, 
   2.143 -                                         const u8 prefix, int inst_len,
   2.144 -                                         const u8 *operand, u8 *size)
   2.145 +static unsigned long svm_rip2pointer(struct vcpu *v)
   2.146  {
   2.147 -    unsigned long effective_addr = (unsigned long) -1;
   2.148 -    u8 length, modrm_mod, modrm_rm;
   2.149 -    u32 disp = 0;
   2.150 -    struct vcpu *v = current;
   2.151 -    struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
   2.152 -
   2.153 -    HVM_DBG_LOG(DBG_LEVEL_1, "prefix = %x, length = %d, operand[0,1] = %x %x",
   2.154 -                prefix, *size, operand[0], operand[1]);
   2.155 -
   2.156 -    if ((NULL == size) || (NULL == operand) || (1 > *size))
   2.157 -    {
   2.158 -        *size = 0;
   2.159 -        return effective_addr;
   2.160 -    }
   2.161 -
   2.162 -    modrm_mod = DECODE_MODRM_MOD(modrm);
   2.163 -    modrm_rm = DECODE_MODRM_RM(prefix, modrm);
   2.164 -
   2.165 -    length = *size;
   2.166 -    *size = 1;
   2.167 -    switch (modrm_rm)
   2.168 -    {
   2.169 -    case 0x4:
   2.170 -#if __x86_64__
   2.171 -    case 0xC:
   2.172 -#endif
   2.173 -        if (modrm_mod < 3)
   2.174 -        {
   2.175 -            *size = length;
   2.176 -            effective_addr = get_effective_addr_sib(vmcb, regs, prefix, operand, size);
   2.177 -        }
   2.178 -        else
   2.179 -        {
   2.180 -            effective_addr = DECODE_GPR_VALUE(regs, modrm_rm);
   2.181 -        }
   2.182 -        break;
   2.183 -
   2.184 -    case 0x5:
   2.185 -        if (0 < modrm_mod)
   2.186 -        {
   2.187 -            effective_addr = regs->ebp;
   2.188 -            *size = 1;
   2.189 -            break;
   2.190 -        }
   2.191 -#if __x86_64__
   2.192 -        /* FALLTHRU */
   2.193 -    case 0xD:
   2.194 -        if (0 < modrm_mod)
   2.195 -        {
   2.196 -            *size = 1;
   2.197 -            effective_addr = regs->r13;
   2.198 -            break;
   2.199 -        }
   2.200 -#endif
   2.201 -
   2.202 -        CHECK_LENGTH64(*size + (u8)sizeof(u32));
   2.203 -
   2.204 -        memcpy (&disp, operand + 1, sizeof (u32));
   2.205 -        *size += sizeof (u32);
   2.206 -
   2.207 -#if __x86_64__
   2.208 -        /* 64-bit mode */
   2.209 -        if (vmcb->cs.attr.fields.l && hvm_long_mode_enabled(v))
   2.210 -            return regs->eip + inst_len + *size + disp;
   2.211 -#endif
   2.212 -        return disp;
   2.213 -
   2.214 -    default:
   2.215 -        effective_addr = DECODE_GPR_VALUE(regs, modrm_rm);
   2.216 -
   2.217 -    }
   2.218 -
   2.219 -    if (3 > modrm_mod)
   2.220 -    {
   2.221 -        if (1 == modrm_mod )
   2.222 -        {
   2.223 -            CHECK_LENGTH64(*size + (u8)sizeof(u8));
   2.224 -            disp = sib;
   2.225 -            *size += sizeof (u8);
   2.226 -        }
   2.227 -        else if (2 == modrm_mod )
   2.228 -        {
   2.229 -            CHECK_LENGTH64(*size + sizeof (u32));
   2.230 -            memcpy (&disp, operand + 1, sizeof (u32));
   2.231 -            *size += sizeof (u32);
   2.232 -        }
   2.233 -
   2.234 -        effective_addr += disp;
   2.235 -    }
   2.236 -
   2.237 -    return effective_addr;
   2.238 -}
   2.239 -
   2.240 -
   2.241 -unsigned long get_effective_addr_sib(struct vmcb_struct *vmcb, 
   2.242 -        struct cpu_user_regs *regs, const u8 prefix, const u8 *operand, 
   2.243 -        u8 *size)
   2.244 -{
   2.245 -    unsigned long base, effective_addr = (unsigned long)-1;
   2.246 -    u8 sib_scale, sib_idx, sib_base, length;
   2.247 -    u32 disp = 0;
   2.248 -
   2.249 -    if (NULL == size || NULL == operand || 2 > *size)
   2.250 -    {
   2.251 -        *size = 0;
   2.252 -        return effective_addr;
   2.253 -    }
   2.254 -
   2.255 -    sib_scale = DECODE_SIB_SCALE(sib);
   2.256 -    sib_idx = DECODE_SIB_INDEX(prefix, sib);
   2.257 -    sib_base = DECODE_SIB_BASE(prefix, sib);
   2.258 -
   2.259 -    base = DECODE_GPR_VALUE(regs, sib_base);
   2.260 -
   2.261 -    if ((unsigned long)-1 == base)
   2.262 -    {
   2.263 -        /* 
   2.264 -         * Surely this is wrong. base should be allowed to be -1, even if
   2.265 -         * it's not the usual case...
   2.266 -         */
   2.267 -        *size = 0;
   2.268 -        return base;
   2.269 -    }
   2.270 -
   2.271 -    length = *size;
   2.272 -    *size = 2;
   2.273 -    if (0x5 == (sib_base & 0x5))
   2.274 -    {
   2.275 -        switch (DECODE_MODRM_MOD(modrm))
   2.276 -        {
   2.277 -        case 0:
   2.278 -            CHECK_LENGTH64(*size + (u8)sizeof(u32));
   2.279 -            memcpy (&disp, operand + 2, sizeof(u32));
   2.280 -            *size += sizeof(u32);
   2.281 -            base = disp;
   2.282 -            break;
   2.283 -
   2.284 -        case 1:
   2.285 -            CHECK_LENGTH64(*size + (u8)sizeof (u8));
   2.286 -            *size += sizeof(u8);
   2.287 -            base += operand [2];
   2.288 -            break;
   2.289 -
   2.290 -        case 2:
   2.291 -            CHECK_LENGTH64(*size + (u8)sizeof (u32));
   2.292 -            memcpy(&disp, operand + 2, sizeof(u32));
   2.293 -            *size += sizeof(u32);
   2.294 -            base += disp;
   2.295 -        }
   2.296 -    }
   2.297 -
   2.298 -    if (4 == sib_idx)
   2.299 -        return base;
   2.300 -
   2.301 -    effective_addr = DECODE_GPR_VALUE(regs, sib_idx);
   2.302 -
   2.303 -    effective_addr <<= sib_scale;
   2.304 -
   2.305 -    return (effective_addr + base);
   2.306 -}
   2.307 -
   2.308 -
   2.309 -/* Get the register/mode number of src register in ModRM register. */
   2.310 -unsigned int decode_dest_reg(u8 prefix, u8 m)
   2.311 -{
   2.312 -    return DECODE_MODRM_REG(prefix, m);
   2.313 -}
   2.314 -
   2.315 -unsigned int decode_src_reg(u8 prefix, u8 m)
   2.316 -{
   2.317 -    return DECODE_MODRM_RM(prefix, m);
   2.318 -}
   2.319 -
   2.320 -
   2.321 -unsigned long svm_rip2pointer(struct vcpu *v)
   2.322 -{
   2.323 -    /*
   2.324 -     * The following is subtle. Intuitively this code would be something like:
   2.325 -     *
   2.326 -     *  if (16bit) addr = (cs << 4) + rip; else addr = rip;
   2.327 -     *
   2.328 -     * However, this code doesn't work for code executing after CR0.PE=0,
   2.329 -     * but before the %cs has been updated. We don't get signalled when
   2.330 -     * %cs is update, but fortunately, base contain the valid base address
   2.331 -     * no matter what kind of addressing is used.
   2.332 -     */
   2.333      struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
   2.334      unsigned long p = vmcb->cs.base + guest_cpu_user_regs()->eip;
   2.335 -    ASSERT(v == current);
   2.336 -    if (!(vmcb->cs.attr.fields.l && hvm_long_mode_enabled(v)))
   2.337 +    if ( !(vmcb->cs.attr.fields.l && hvm_long_mode_enabled(v)) )
   2.338          return (u32)p; /* mask to 32 bits */
   2.339 -    /* NB. Should mask to 16 bits if in real mode or 16-bit protected mode. */
   2.340      return p;
   2.341  }
   2.342  
   2.343 -
   2.344 -#define MAKE_INSTR(nm, ...) static const u8 OPCODE_##nm[] = { __VA_ARGS__ }
   2.345 -
   2.346  /* 
   2.347   * Here's how it works:
   2.348   * First byte: Length. 
   2.349   * Following bytes: Opcode bytes. 
   2.350   * Special case: Last byte, if zero, doesn't need to match. 
   2.351   */
   2.352 +#define MAKE_INSTR(nm, ...) static const u8 OPCODE_##nm[] = { __VA_ARGS__ }
   2.353  MAKE_INSTR(INVD,   2, 0x0f, 0x08);
   2.354  MAKE_INSTR(WBINVD, 2, 0x0f, 0x09);
   2.355  MAKE_INSTR(CPUID,  2, 0x0f, 0xa2);
   2.356  MAKE_INSTR(RDMSR,  2, 0x0f, 0x32);
   2.357  MAKE_INSTR(WRMSR,  2, 0x0f, 0x30);
   2.358 -MAKE_INSTR(CLI,    1, 0xfa);
   2.359 -MAKE_INSTR(STI,    1, 0xfb);
   2.360 -MAKE_INSTR(RDPMC,  2, 0x0f, 0x33);
   2.361 -MAKE_INSTR(CLGI,   3, 0x0f, 0x01, 0xdd);
   2.362 -MAKE_INSTR(STGI,   3, 0x0f, 0x01, 0xdc);
   2.363 -MAKE_INSTR(VMRUN,  3, 0x0f, 0x01, 0xd8);
   2.364 -MAKE_INSTR(VMLOAD, 3, 0x0f, 0x01, 0xda);
   2.365 -MAKE_INSTR(VMSAVE, 3, 0x0f, 0x01, 0xdb);
   2.366  MAKE_INSTR(VMCALL, 3, 0x0f, 0x01, 0xd9);
   2.367 -MAKE_INSTR(PAUSE,  2, 0xf3, 0x90);
   2.368 -MAKE_INSTR(SKINIT, 3, 0x0f, 0x01, 0xde);
   2.369 -MAKE_INSTR(MOV2CR, 3, 0x0f, 0x22, 0x00);
   2.370 -MAKE_INSTR(MOVCR2, 3, 0x0f, 0x20, 0x00);
   2.371 -MAKE_INSTR(MOV2DR, 3, 0x0f, 0x23, 0x00);
   2.372 -MAKE_INSTR(MOVDR2, 3, 0x0f, 0x21, 0x00);
   2.373 -MAKE_INSTR(PUSHF,  1, 0x9c);
   2.374 -MAKE_INSTR(POPF,   1, 0x9d);
   2.375 -MAKE_INSTR(RSM,    2, 0x0f, 0xaa);
   2.376 -MAKE_INSTR(INVLPG, 3, 0x0f, 0x01, 0x00);
   2.377 -MAKE_INSTR(INVLPGA,3, 0x0f, 0x01, 0xdf);
   2.378  MAKE_INSTR(HLT,    1, 0xf4);
   2.379 -MAKE_INSTR(CLTS,   2, 0x0f, 0x06);
   2.380 -MAKE_INSTR(LMSW,   3, 0x0f, 0x01, 0x00);
   2.381 -MAKE_INSTR(SMSW,   3, 0x0f, 0x01, 0x00);
   2.382  MAKE_INSTR(INT3,   1, 0xcc);
   2.383  
   2.384  static const u8 *opc_bytes[INSTR_MAX_COUNT] = 
   2.385 @@ -381,55 +85,24 @@ static const u8 *opc_bytes[INSTR_MAX_COU
   2.386      [INSTR_CPUID]  = OPCODE_CPUID,
   2.387      [INSTR_RDMSR]  = OPCODE_RDMSR,
   2.388      [INSTR_WRMSR]  = OPCODE_WRMSR,
   2.389 -    [INSTR_CLI]    = OPCODE_CLI,
   2.390 -    [INSTR_STI]    = OPCODE_STI,
   2.391 -    [INSTR_RDPMC]  = OPCODE_RDPMC,
   2.392 -    [INSTR_CLGI]   = OPCODE_CLGI,
   2.393 -    [INSTR_STGI]   = OPCODE_STGI,
   2.394 -    [INSTR_VMRUN]  = OPCODE_VMRUN,
   2.395 -    [INSTR_VMLOAD] = OPCODE_VMLOAD,
   2.396 -    [INSTR_VMSAVE] = OPCODE_VMSAVE,
   2.397      [INSTR_VMCALL] = OPCODE_VMCALL,
   2.398 -    [INSTR_PAUSE]  = OPCODE_PAUSE,
   2.399 -    [INSTR_SKINIT] = OPCODE_SKINIT,
   2.400 -    [INSTR_MOV2CR] = OPCODE_MOV2CR,
   2.401 -    [INSTR_MOVCR2] = OPCODE_MOVCR2,
   2.402 -    [INSTR_MOV2DR] = OPCODE_MOV2DR,
   2.403 -    [INSTR_MOVDR2] = OPCODE_MOVDR2,
   2.404 -    [INSTR_PUSHF]  = OPCODE_PUSHF,
   2.405 -    [INSTR_POPF]   = OPCODE_POPF,
   2.406 -    [INSTR_RSM]    = OPCODE_RSM,
   2.407 -    [INSTR_INVLPG] = OPCODE_INVLPG,
   2.408 -    [INSTR_INVLPGA]= OPCODE_INVLPGA,
   2.409 -    [INSTR_CLTS]   = OPCODE_CLTS,
   2.410      [INSTR_HLT]    = OPCODE_HLT,
   2.411 -    [INSTR_LMSW]   = OPCODE_LMSW,
   2.412 -    [INSTR_SMSW]   = OPCODE_SMSW,
   2.413      [INSTR_INT3]   = OPCODE_INT3
   2.414  };
   2.415  
   2.416 -/* 
   2.417 - * Intel has a vmcs entry to give the instruction length. AMD doesn't.  So we
   2.418 - * have to do a little bit of work to find out... 
   2.419 - *
   2.420 - * The caller can either pass a NULL pointer to the guest_eip_buf, or a pointer
   2.421 - * to enough bytes to satisfy the instruction including prefix bytes.
   2.422 - */
   2.423  int __get_instruction_length_from_list(struct vcpu *v,
   2.424          enum instruction_index *list, unsigned int list_count, 
   2.425          u8 *guest_eip_buf, enum instruction_index *match)
   2.426  {
   2.427      struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
   2.428 -    unsigned int inst_len = 0;
   2.429 -    unsigned int i;
   2.430 -    unsigned int j;
   2.431 +    unsigned int i, j, inst_len = 0;
   2.432      int found = 0;
   2.433      enum instruction_index instr = 0;
   2.434      u8 buffer[MAX_INST_LEN];
   2.435      u8 *buf;
   2.436      const u8 *opcode = NULL;
   2.437  
   2.438 -    if (guest_eip_buf)
   2.439 +    if ( guest_eip_buf )
   2.440      {
   2.441          buf = guest_eip_buf;
   2.442      }
   2.443 @@ -439,53 +112,47 @@ int __get_instruction_length_from_list(s
   2.444          buf = buffer;
   2.445      }
   2.446  
   2.447 -    for (j = 0; j < list_count; j++)
   2.448 +    for ( j = 0; j < list_count; j++ )
   2.449      {
   2.450          instr = list[j];
   2.451          opcode = opc_bytes[instr];
   2.452          ASSERT(opcode);
   2.453  
   2.454 -        while (inst_len < MAX_INST_LEN && 
   2.455 +        while ( (inst_len < MAX_INST_LEN) && 
   2.456                  is_prefix(buf[inst_len]) && 
   2.457 -                !is_prefix(opcode[1]))
   2.458 +                !is_prefix(opcode[1]) )
   2.459              inst_len++;
   2.460  
   2.461          ASSERT(opcode[0] <= 15);    /* Make sure the table is correct. */
   2.462          found = 1;
   2.463  
   2.464 -        for (i = 0; i < opcode[0]; i++)
   2.465 +        for ( i = 0; i < opcode[0]; i++ )
   2.466          {
   2.467              /* If the last byte is zero, we just accept it without checking */
   2.468 -            if (i == opcode[0]-1 && opcode[i+1] == 0)
   2.469 +            if ( (i == (opcode[0]-1)) && (opcode[i+1] == 0) )
   2.470                  break;
   2.471  
   2.472 -            if (buf[inst_len+i] != opcode[i+1])
   2.473 +            if ( buf[inst_len+i] != opcode[i+1] )
   2.474              {
   2.475                  found = 0;
   2.476                  break;
   2.477              }
   2.478          }
   2.479  
   2.480 -        if (found)
   2.481 -            break;
   2.482 -    }
   2.483 -
   2.484 -    /* It's a match */
   2.485 -    if (found)
   2.486 -    {
   2.487 -        inst_len += opcode[0];
   2.488 -
   2.489 -        ASSERT(inst_len <= MAX_INST_LEN);
   2.490 -
   2.491 -        if (match)
   2.492 -            *match = instr;
   2.493 -
   2.494 -        return inst_len;
   2.495 +        if ( found )
   2.496 +            goto done;
   2.497      }
   2.498  
   2.499      printk("%s: Mismatch between expected and actual instruction bytes: "
   2.500              "eip = %lx\n",  __func__, (unsigned long)vmcb->rip);
   2.501      return 0;
   2.502 +
   2.503 + done:
   2.504 +    inst_len += opcode[0];
   2.505 +    ASSERT(inst_len <= MAX_INST_LEN);
   2.506 +    if ( match )
   2.507 +        *match = instr;
   2.508 +    return inst_len;
   2.509  }
   2.510  
   2.511  /*
     3.1 --- a/xen/arch/x86/hvm/svm/svm.c	Thu Feb 21 15:06:37 2008 +0000
     3.2 +++ b/xen/arch/x86/hvm/svm/svm.c	Thu Feb 21 18:02:42 2008 +0000
     3.3 @@ -73,6 +73,7 @@ static void svm_wbinvd_intercept(void);
     3.4  static void svm_fpu_dirty_intercept(void);
     3.5  static int svm_msr_read_intercept(struct cpu_user_regs *regs);
     3.6  static int svm_msr_write_intercept(struct cpu_user_regs *regs);
     3.7 +static void svm_invlpg_intercept(unsigned long vaddr);
     3.8  
     3.9  /* va of hardware host save area     */
    3.10  static void *hsa[NR_CPUS] __read_mostly;
    3.11 @@ -474,28 +475,6 @@ static void svm_sync_vmcb(struct vcpu *v
    3.12      svm_vmsave(arch_svm->vmcb);
    3.13  }
    3.14  
    3.15 -static unsigned long svm_get_segment_base(struct vcpu *v, enum x86_segment seg)
    3.16 -{
    3.17 -    struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
    3.18 -    int long_mode = vmcb->cs.attr.fields.l && hvm_long_mode_enabled(v);
    3.19 -
    3.20 -    switch ( seg )
    3.21 -    {
    3.22 -    case x86_seg_cs: return long_mode ? 0 : vmcb->cs.base;
    3.23 -    case x86_seg_ds: return long_mode ? 0 : vmcb->ds.base;
    3.24 -    case x86_seg_es: return long_mode ? 0 : vmcb->es.base;
    3.25 -    case x86_seg_fs: svm_sync_vmcb(v); return vmcb->fs.base;
    3.26 -    case x86_seg_gs: svm_sync_vmcb(v); return vmcb->gs.base;
    3.27 -    case x86_seg_ss: return long_mode ? 0 : vmcb->ss.base;
    3.28 -    case x86_seg_tr: svm_sync_vmcb(v); return vmcb->tr.base;
    3.29 -    case x86_seg_gdtr: return vmcb->gdtr.base;
    3.30 -    case x86_seg_idtr: return vmcb->idtr.base;
    3.31 -    case x86_seg_ldtr: svm_sync_vmcb(v); return vmcb->ldtr.base;
    3.32 -    default: BUG();
    3.33 -    }
    3.34 -    return 0;
    3.35 -}
    3.36 -
    3.37  static void svm_get_segment_register(struct vcpu *v, enum x86_segment seg,
    3.38                                       struct segment_register *reg)
    3.39  {
    3.40 @@ -804,7 +783,6 @@ static struct hvm_function_table svm_fun
    3.41      .load_cpu_ctxt        = svm_load_vmcb_ctxt,
    3.42      .interrupt_blocked    = svm_interrupt_blocked,
    3.43      .guest_x86_mode       = svm_guest_x86_mode,
    3.44 -    .get_segment_base     = svm_get_segment_base,
    3.45      .get_segment_register = svm_get_segment_register,
    3.46      .set_segment_register = svm_set_segment_register,
    3.47      .update_host_cr3      = svm_update_host_cr3,
    3.48 @@ -820,7 +798,8 @@ static struct hvm_function_table svm_fun
    3.49      .wbinvd_intercept     = svm_wbinvd_intercept,
    3.50      .fpu_dirty_intercept  = svm_fpu_dirty_intercept,
    3.51      .msr_read_intercept   = svm_msr_read_intercept,
    3.52 -    .msr_write_intercept  = svm_msr_write_intercept
    3.53 +    .msr_write_intercept  = svm_msr_write_intercept,
    3.54 +    .invlpg_intercept     = svm_invlpg_intercept
    3.55  };
    3.56  
    3.57  int start_svm(struct cpuinfo_x86 *c)
    3.58 @@ -987,679 +966,12 @@ static void svm_vmexit_do_cpuid(struct c
    3.59      __update_guest_eip(regs, inst_len);
    3.60  }
    3.61  
    3.62 -static unsigned long *get_reg_p(
    3.63 -    unsigned int gpreg, 
    3.64 -    struct cpu_user_regs *regs, struct vmcb_struct *vmcb)
    3.65 -{
    3.66 -    unsigned long *reg_p = NULL;
    3.67 -    switch (gpreg)
    3.68 -    {
    3.69 -    case SVM_REG_EAX:
    3.70 -        reg_p = (unsigned long *)&regs->eax;
    3.71 -        break;
    3.72 -    case SVM_REG_EBX:
    3.73 -        reg_p = (unsigned long *)&regs->ebx;
    3.74 -        break;
    3.75 -    case SVM_REG_ECX:
    3.76 -        reg_p = (unsigned long *)&regs->ecx;
    3.77 -        break;
    3.78 -    case SVM_REG_EDX:
    3.79 -        reg_p = (unsigned long *)&regs->edx;
    3.80 -        break;
    3.81 -    case SVM_REG_EDI:
    3.82 -        reg_p = (unsigned long *)&regs->edi;
    3.83 -        break;
    3.84 -    case SVM_REG_ESI:
    3.85 -        reg_p = (unsigned long *)&regs->esi;
    3.86 -        break;
    3.87 -    case SVM_REG_EBP:
    3.88 -        reg_p = (unsigned long *)&regs->ebp;
    3.89 -        break;
    3.90 -    case SVM_REG_ESP:
    3.91 -        reg_p = (unsigned long *)&regs->esp;
    3.92 -        break;
    3.93 -#ifdef __x86_64__
    3.94 -    case SVM_REG_R8:
    3.95 -        reg_p = (unsigned long *)&regs->r8;
    3.96 -        break;
    3.97 -    case SVM_REG_R9:
    3.98 -        reg_p = (unsigned long *)&regs->r9;
    3.99 -        break;
   3.100 -    case SVM_REG_R10:
   3.101 -        reg_p = (unsigned long *)&regs->r10;
   3.102 -        break;
   3.103 -    case SVM_REG_R11:
   3.104 -        reg_p = (unsigned long *)&regs->r11;
   3.105 -        break;
   3.106 -    case SVM_REG_R12:
   3.107 -        reg_p = (unsigned long *)&regs->r12;
   3.108 -        break;
   3.109 -    case SVM_REG_R13:
   3.110 -        reg_p = (unsigned long *)&regs->r13;
   3.111 -        break;
   3.112 -    case SVM_REG_R14:
   3.113 -        reg_p = (unsigned long *)&regs->r14;
   3.114 -        break;
   3.115 -    case SVM_REG_R15:
   3.116 -        reg_p = (unsigned long *)&regs->r15;
   3.117 -        break;
   3.118 -#endif
   3.119 -    default:
   3.120 -        BUG();
   3.121 -    } 
   3.122 -    
   3.123 -    return reg_p;
   3.124 -}
   3.125 -
   3.126 -
   3.127 -static unsigned long get_reg(
   3.128 -    unsigned int gpreg, struct cpu_user_regs *regs, struct vmcb_struct *vmcb)
   3.129 -{
   3.130 -    unsigned long *gp;
   3.131 -    gp = get_reg_p(gpreg, regs, vmcb);
   3.132 -    return *gp;
   3.133 -}
   3.134 -
   3.135 -
   3.136 -static void set_reg(
   3.137 -    unsigned int gpreg, unsigned long value, 
   3.138 -    struct cpu_user_regs *regs, struct vmcb_struct *vmcb)
   3.139 -{
   3.140 -    unsigned long *gp;
   3.141 -    gp = get_reg_p(gpreg, regs, vmcb);
   3.142 -    *gp = value;
   3.143 -}
   3.144 -                           
   3.145 -
   3.146  static void svm_dr_access(struct vcpu *v, struct cpu_user_regs *regs)
   3.147  {
   3.148      HVMTRACE_0D(DR_WRITE, v);
   3.149      __restore_debug_registers(v);
   3.150  }
   3.151  
   3.152 -
   3.153 -static void svm_get_prefix_info(struct vcpu *v, unsigned int dir, 
   3.154 -                                svm_segment_register_t **seg, 
   3.155 -                                unsigned int *asize)
   3.156 -{
   3.157 -    struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
   3.158 -    unsigned char inst[MAX_INST_LEN];
   3.159 -    int i;
   3.160 -
   3.161 -    memset(inst, 0, MAX_INST_LEN);
   3.162 -    if (inst_copy_from_guest(inst, svm_rip2pointer(v), sizeof(inst)) 
   3.163 -        != MAX_INST_LEN) 
   3.164 -    {
   3.165 -        gdprintk(XENLOG_ERR, "get guest instruction failed\n");
   3.166 -        domain_crash(current->domain);
   3.167 -        return;
   3.168 -    }
   3.169 -
   3.170 -    for (i = 0; i < MAX_INST_LEN; i++)
   3.171 -    {
   3.172 -        switch (inst[i])
   3.173 -        {
   3.174 -        case 0xf3: /* REPZ */
   3.175 -        case 0xf2: /* REPNZ */
   3.176 -        case 0xf0: /* LOCK */
   3.177 -        case 0x66: /* data32 */
   3.178 -#ifdef __x86_64__
   3.179 -            /* REX prefixes */
   3.180 -        case 0x40:
   3.181 -        case 0x41:
   3.182 -        case 0x42:
   3.183 -        case 0x43:
   3.184 -        case 0x44:
   3.185 -        case 0x45:
   3.186 -        case 0x46:
   3.187 -        case 0x47:
   3.188 -
   3.189 -        case 0x48:
   3.190 -        case 0x49:
   3.191 -        case 0x4a:
   3.192 -        case 0x4b:
   3.193 -        case 0x4c:
   3.194 -        case 0x4d:
   3.195 -        case 0x4e:
   3.196 -        case 0x4f:
   3.197 -#endif
   3.198 -            continue;
   3.199 -        case 0x67: /* addr32 */
   3.200 -            *asize ^= 48;        /* Switch 16/32 bits */
   3.201 -            continue;
   3.202 -        case 0x2e: /* CS */
   3.203 -            *seg = &vmcb->cs;
   3.204 -            continue;
   3.205 -        case 0x36: /* SS */
   3.206 -            *seg = &vmcb->ss;
   3.207 -            continue;
   3.208 -        case 0x26: /* ES */
   3.209 -            *seg = &vmcb->es;
   3.210 -            continue;
   3.211 -        case 0x64: /* FS */
   3.212 -            svm_sync_vmcb(v);
   3.213 -            *seg = &vmcb->fs;
   3.214 -            continue;
   3.215 -        case 0x65: /* GS */
   3.216 -            svm_sync_vmcb(v);
   3.217 -            *seg = &vmcb->gs;
   3.218 -            continue;
   3.219 -        case 0x3e: /* DS */
   3.220 -            *seg = &vmcb->ds;
   3.221 -            continue;
   3.222 -        default:
   3.223 -            break;
   3.224 -        }
   3.225 -        return;
   3.226 -    }
   3.227 -}
   3.228 -
   3.229 -
   3.230 -/* Get the address of INS/OUTS instruction */
   3.231 -static int svm_get_io_address(
   3.232 -    struct vcpu *v, struct cpu_user_regs *regs,
   3.233 -    unsigned int size, ioio_info_t info,
   3.234 -    unsigned long *count, unsigned long *addr)
   3.235 -{
   3.236 -    unsigned long        reg;
   3.237 -    unsigned int         asize, isize;
   3.238 -    int                  long_mode = 0;
   3.239 -    svm_segment_register_t *seg = NULL;
   3.240 -    struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
   3.241 -
   3.242 -    /* If we're in long mode, don't check the segment presence & limit */
   3.243 -    long_mode = vmcb->cs.attr.fields.l && hvm_long_mode_enabled(v);
   3.244 -
   3.245 -    /* d field of cs.attr is 1 for 32-bit, 0 for 16 or 64 bit. 
   3.246 -     * l field combined with EFER_LMA says whether it's 16 or 64 bit. 
   3.247 -     */
   3.248 -    asize = (long_mode)?64:((vmcb->cs.attr.fields.db)?32:16);
   3.249 -
   3.250 -
   3.251 -    /* The ins/outs instructions are single byte, so if we have got more 
   3.252 -     * than one byte (+ maybe rep-prefix), we have some prefix so we need 
   3.253 -     * to figure out what it is...
   3.254 -     */
   3.255 -    isize = vmcb->exitinfo2 - regs->eip;
   3.256 -
   3.257 -    if (info.fields.rep)
   3.258 -        isize --;
   3.259 -
   3.260 -    if (isize > 1) 
   3.261 -        svm_get_prefix_info(v, info.fields.type, &seg, &asize);
   3.262 -
   3.263 -    if (info.fields.type == IOREQ_WRITE)
   3.264 -    {
   3.265 -        reg = regs->esi;
   3.266 -        if (!seg)               /* If no prefix, used DS. */
   3.267 -            seg = &vmcb->ds;
   3.268 -        if (!long_mode && (seg->attr.fields.type & 0xa) == 0x8) {
   3.269 -            svm_inject_exception(TRAP_gp_fault, 0, 0);
   3.270 -            return 0;
   3.271 -        }
   3.272 -    }
   3.273 -    else
   3.274 -    {
   3.275 -        reg = regs->edi;
   3.276 -        seg = &vmcb->es;        /* Note: This is ALWAYS ES. */
   3.277 -        if (!long_mode && (seg->attr.fields.type & 0xa) != 0x2) {
   3.278 -            svm_inject_exception(TRAP_gp_fault, 0, 0);
   3.279 -            return 0;
   3.280 -        }
   3.281 -    }
   3.282 -
   3.283 -    /* If the segment isn't present, give GP fault! */
   3.284 -    if (!long_mode && !seg->attr.fields.p) 
   3.285 -    {
   3.286 -        svm_inject_exception(TRAP_gp_fault, 0, 0);
   3.287 -        return 0;
   3.288 -    }
   3.289 -
   3.290 -    if (asize == 16) 
   3.291 -    {
   3.292 -        *addr = (reg & 0xFFFF);
   3.293 -        *count = regs->ecx & 0xffff;
   3.294 -    }
   3.295 -    else
   3.296 -    {
   3.297 -        *addr = reg;
   3.298 -        *count = regs->ecx;
   3.299 -    }
   3.300 -    if (!info.fields.rep)
   3.301 -        *count = 1;
   3.302 -
   3.303 -    if (!long_mode)
   3.304 -    {
   3.305 -        ASSERT(*addr == (u32)*addr);
   3.306 -        if ((u32)(*addr + size - 1) < (u32)*addr ||
   3.307 -            (seg->attr.fields.type & 0xc) != 0x4 ?
   3.308 -            *addr + size - 1 > seg->limit :
   3.309 -            *addr <= seg->limit)
   3.310 -        {
   3.311 -            svm_inject_exception(TRAP_gp_fault, 0, 0);
   3.312 -            return 0;
   3.313 -        }
   3.314 -
   3.315 -        /* Check the limit for repeated instructions, as above we checked only
   3.316 -           the first instance. Truncate the count if a limit violation would
   3.317 -           occur. Note that the checking is not necessary for page granular
   3.318 -           segments as transfers crossing page boundaries will be broken up
   3.319 -           anyway. */
   3.320 -        if (!seg->attr.fields.g && *count > 1)
   3.321 -        {
   3.322 -            if ((seg->attr.fields.type & 0xc) != 0x4)
   3.323 -            {
   3.324 -                /* expand-up */
   3.325 -                if (!(regs->eflags & EF_DF))
   3.326 -                {
   3.327 -                    if (*addr + *count * size - 1 < *addr ||
   3.328 -                        *addr + *count * size - 1 > seg->limit)
   3.329 -                        *count = (seg->limit + 1UL - *addr) / size;
   3.330 -                }
   3.331 -                else
   3.332 -                {
   3.333 -                    if (*count - 1 > *addr / size)
   3.334 -                        *count = *addr / size + 1;
   3.335 -                }
   3.336 -            }
   3.337 -            else
   3.338 -            {
   3.339 -                /* expand-down */
   3.340 -                if (!(regs->eflags & EF_DF))
   3.341 -                {
   3.342 -                    if (*count - 1 > -(s32)*addr / size)
   3.343 -                        *count = -(s32)*addr / size + 1UL;
   3.344 -                }
   3.345 -                else
   3.346 -                {
   3.347 -                    if (*addr < (*count - 1) * size ||
   3.348 -                        *addr - (*count - 1) * size <= seg->limit)
   3.349 -                        *count = (*addr - seg->limit - 1) / size + 1;
   3.350 -                }
   3.351 -            }
   3.352 -            ASSERT(*count);
   3.353 -        }
   3.354 -
   3.355 -        *addr += seg->base;
   3.356 -    }
   3.357 -#ifdef __x86_64__
   3.358 -    else
   3.359 -    {
   3.360 -        if (seg == &vmcb->fs || seg == &vmcb->gs)
   3.361 -            *addr += seg->base;
   3.362 -
   3.363 -        if (!is_canonical_address(*addr) ||
   3.364 -            !is_canonical_address(*addr + size - 1))
   3.365 -        {
   3.366 -            svm_inject_exception(TRAP_gp_fault, 0, 0);
   3.367 -            return 0;
   3.368 -        }
   3.369 -        if (*count > (1UL << 48) / size)
   3.370 -            *count = (1UL << 48) / size;
   3.371 -        if (!(regs->eflags & EF_DF))
   3.372 -        {
   3.373 -            if (*addr + *count * size - 1 < *addr ||
   3.374 -                !is_canonical_address(*addr + *count * size - 1))
   3.375 -                *count = (*addr & ~((1UL << 48) - 1)) / size;
   3.376 -        }
   3.377 -        else
   3.378 -        {
   3.379 -            if ((*count - 1) * size > *addr ||
   3.380 -                !is_canonical_address(*addr + (*count - 1) * size))
   3.381 -                *count = (*addr & ~((1UL << 48) - 1)) / size + 1;
   3.382 -        }
   3.383 -        ASSERT(*count);
   3.384 -    }
   3.385 -#endif
   3.386 -
   3.387 -    return 1;
   3.388 -}
   3.389 -
   3.390 -
   3.391 -static void svm_io_instruction(struct vcpu *v)
   3.392 -{
   3.393 -    struct cpu_user_regs *regs;
   3.394 -    struct hvm_io_op *pio_opp;
   3.395 -    unsigned int port;
   3.396 -    unsigned int size, dir, df;
   3.397 -    ioio_info_t info;
   3.398 -    struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
   3.399 -
   3.400 -    pio_opp = &current->arch.hvm_vcpu.io_op;
   3.401 -    pio_opp->instr = INSTR_PIO;
   3.402 -    pio_opp->flags = 0;
   3.403 -
   3.404 -    regs = &pio_opp->io_context;
   3.405 -
   3.406 -    /* Copy current guest state into io instruction state structure. */
   3.407 -    memcpy(regs, guest_cpu_user_regs(), HVM_CONTEXT_STACK_BYTES);
   3.408 -
   3.409 -    info.bytes = vmcb->exitinfo1;
   3.410 -
   3.411 -    port = info.fields.port; /* port used to be addr */
   3.412 -    dir = info.fields.type; /* direction */ 
   3.413 -    df = regs->eflags & X86_EFLAGS_DF ? 1 : 0;
   3.414 -
   3.415 -    if (info.fields.sz32) 
   3.416 -        size = 4;
   3.417 -    else if (info.fields.sz16)
   3.418 -        size = 2;
   3.419 -    else 
   3.420 -        size = 1;
   3.421 -
   3.422 -    if (dir==IOREQ_READ)
   3.423 -        HVMTRACE_2D(IO_READ,  v, port, size);
   3.424 -    else
   3.425 -        HVMTRACE_3D(IO_WRITE, v, port, size, regs->eax);
   3.426 -
   3.427 -    HVM_DBG_LOG(DBG_LEVEL_IO, 
   3.428 -                "svm_io_instruction: port 0x%x eip=%x:%"PRIx64", "
   3.429 -                "exit_qualification = %"PRIx64,
   3.430 -                port, vmcb->cs.sel, (uint64_t)regs->eip, info.bytes);
   3.431 -
   3.432 -    /* string instruction */
   3.433 -    if (info.fields.str)
   3.434 -    { 
   3.435 -        unsigned long addr, count;
   3.436 -        paddr_t paddr;
   3.437 -        unsigned long gfn;
   3.438 -        uint32_t pfec;
   3.439 -        int sign = regs->eflags & X86_EFLAGS_DF ? -1 : 1;
   3.440 -
   3.441 -        if (!svm_get_io_address(v, regs, size, info, &count, &addr))
   3.442 -        {
   3.443 -            /* We failed to get a valid address, so don't do the IO operation -
   3.444 -             * it would just get worse if we do! Hopefully the guest is handing
   3.445 -             * gp-faults... 
   3.446 -             */
   3.447 -            return;
   3.448 -        }
   3.449 -
   3.450 -        /* "rep" prefix */
   3.451 -        if (info.fields.rep) 
   3.452 -        {
   3.453 -            pio_opp->flags |= REPZ;
   3.454 -        }
   3.455 -
   3.456 -        /* Translate the address to a physical address */
   3.457 -        pfec = PFEC_page_present;
   3.458 -        if ( dir == IOREQ_READ ) /* Read from PIO --> write to RAM */
   3.459 -            pfec |= PFEC_write_access;
   3.460 -        if ( vmcb->cpl == 3 )
   3.461 -            pfec |= PFEC_user_mode;
   3.462 -        gfn = paging_gva_to_gfn(v, addr, &pfec);
   3.463 -        if ( gfn == INVALID_GFN ) 
   3.464 -        {
   3.465 -            /* The guest does not have the RAM address mapped. 
   3.466 -             * Need to send in a page fault */
   3.467 -            svm_inject_exception(TRAP_page_fault, pfec, addr);
   3.468 -            return;
   3.469 -        }
   3.470 -        paddr = (paddr_t)gfn << PAGE_SHIFT | (addr & ~PAGE_MASK);
   3.471 -
   3.472 -        /*
   3.473 -         * Handle string pio instructions that cross pages or that
   3.474 -         * are unaligned. See the comments in hvm_platform.c/handle_mmio()
   3.475 -         */
   3.476 -        if ((addr & PAGE_MASK) != ((addr + size - 1) & PAGE_MASK))
   3.477 -        {
   3.478 -            unsigned long value = 0;
   3.479 -
   3.480 -            pio_opp->flags |= OVERLAP;
   3.481 -            pio_opp->addr = addr;
   3.482 -
   3.483 -            if (dir == IOREQ_WRITE)   /* OUTS */
   3.484 -            {
   3.485 -                if ( hvm_paging_enabled(current) )
   3.486 -                {
   3.487 -                    int rv = hvm_copy_from_guest_virt(&value, addr, size);
   3.488 -                    if ( rv == HVMCOPY_bad_gva_to_gfn )
   3.489 -                        return; /* exception already injected */
   3.490 -                }
   3.491 -                else
   3.492 -                    (void)hvm_copy_from_guest_phys(&value, addr, size);
   3.493 -            }
   3.494 -            else /* dir != IOREQ_WRITE */
   3.495 -                /* Remember where to write the result, as a *VA*.
   3.496 -                 * Must be a VA so we can handle the page overlap 
   3.497 -                 * correctly in hvm_pio_assist() */
   3.498 -                pio_opp->addr = addr;
   3.499 -
   3.500 -            if (count == 1)
   3.501 -                regs->eip = vmcb->exitinfo2;
   3.502 -
   3.503 -            send_pio_req(port, 1, size, value, dir, df, 0);
   3.504 -        } 
   3.505 -        else 
   3.506 -        {
   3.507 -            unsigned long last_addr = sign > 0 ? addr + count * size - 1
   3.508 -                                               : addr - (count - 1) * size;
   3.509 -
   3.510 -            if ((addr & PAGE_MASK) != (last_addr & PAGE_MASK))
   3.511 -            {
   3.512 -                if (sign > 0)
   3.513 -                    count = (PAGE_SIZE - (addr & ~PAGE_MASK)) / size;
   3.514 -                else
   3.515 -                    count = (addr & ~PAGE_MASK) / size + 1;
   3.516 -            }
   3.517 -            else    
   3.518 -                regs->eip = vmcb->exitinfo2;
   3.519 -
   3.520 -            send_pio_req(port, count, size, paddr, dir, df, 1);
   3.521 -        }
   3.522 -    } 
   3.523 -    else 
   3.524 -    {
   3.525 -        /* 
   3.526 -         * On SVM, the RIP of the intruction following the IN/OUT is saved in
   3.527 -         * ExitInfo2
   3.528 -         */
   3.529 -        regs->eip = vmcb->exitinfo2;
   3.530 -
   3.531 -        if (port == 0xe9 && dir == IOREQ_WRITE && size == 1) 
   3.532 -            hvm_print_line(v, regs->eax); /* guest debug output */
   3.533 -    
   3.534 -        send_pio_req(port, 1, size, regs->eax, dir, df, 0);
   3.535 -    }
   3.536 -}
   3.537 -
   3.538 -static void mov_from_cr(int cr, int gp, struct cpu_user_regs *regs)
   3.539 -{
   3.540 -    unsigned long value = 0;
   3.541 -    struct vcpu *v = current;
   3.542 -    struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
   3.543 -
   3.544 -    switch ( cr )
   3.545 -    {
   3.546 -    case 0:
   3.547 -        value = v->arch.hvm_vcpu.guest_cr[0];
   3.548 -        break;
   3.549 -    case 3:
   3.550 -        value = (unsigned long)v->arch.hvm_vcpu.guest_cr[3];
   3.551 -        break;
   3.552 -    case 4:
   3.553 -        value = (unsigned long)v->arch.hvm_vcpu.guest_cr[4];
   3.554 -        break;
   3.555 -    default:
   3.556 -        gdprintk(XENLOG_ERR, "invalid cr: %d\n", cr);
   3.557 -        domain_crash(v->domain);
   3.558 -        return;
   3.559 -    }
   3.560 -
   3.561 -    HVMTRACE_2D(CR_READ, v, cr, value);
   3.562 -
   3.563 -    set_reg(gp, value, regs, vmcb);
   3.564 -
   3.565 -    HVM_DBG_LOG(DBG_LEVEL_VMMU, "mov_from_cr: CR%d, value = %lx", cr, value);
   3.566 -}
   3.567 -
   3.568 -static int mov_to_cr(int gpreg, int cr, struct cpu_user_regs *regs)
   3.569 -{
   3.570 -    unsigned long value;
   3.571 -    struct vcpu *v = current;
   3.572 -    struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
   3.573 -
   3.574 -    value = get_reg(gpreg, regs, vmcb);
   3.575 -
   3.576 -    HVMTRACE_2D(CR_WRITE, v, cr, value);
   3.577 -
   3.578 -    HVM_DBG_LOG(DBG_LEVEL_1, "mov_to_cr: CR%d, value = %lx, current = %p",
   3.579 -                cr, value, v);
   3.580 -
   3.581 -    switch ( cr )
   3.582 -    {
   3.583 -    case 0: 
   3.584 -        return !hvm_set_cr0(value);
   3.585 -    case 3:
   3.586 -        return !hvm_set_cr3(value);
   3.587 -    case 4:
   3.588 -        return !hvm_set_cr4(value);
   3.589 -    default:
   3.590 -        gdprintk(XENLOG_ERR, "invalid cr: %d\n", cr);
   3.591 -        domain_crash(v->domain);
   3.592 -        return 0;
   3.593 -    }
   3.594 -
   3.595 -    return 1;
   3.596 -}
   3.597 -
   3.598 -static void svm_cr_access(
   3.599 -    struct vcpu *v, unsigned int cr, unsigned int type,
   3.600 -    struct cpu_user_regs *regs)
   3.601 -{
   3.602 -    struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
   3.603 -    int inst_len = 0;
   3.604 -    int index,addr_size,i;
   3.605 -    unsigned int gpreg,offset;
   3.606 -    unsigned long value,addr;
   3.607 -    u8 buffer[MAX_INST_LEN];   
   3.608 -    u8 prefix = 0;
   3.609 -    u8 modrm;
   3.610 -    enum x86_segment seg;
   3.611 -    int result = 1;
   3.612 -    enum instruction_index list_a[] = {INSTR_MOV2CR, INSTR_CLTS, INSTR_LMSW};
   3.613 -    enum instruction_index list_b[] = {INSTR_MOVCR2, INSTR_SMSW};
   3.614 -    enum instruction_index match;
   3.615 -
   3.616 -    inst_copy_from_guest(buffer, svm_rip2pointer(v), sizeof(buffer));
   3.617 -
   3.618 -    /* get index to first actual instruction byte - as we will need to know 
   3.619 -       where the prefix lives later on */
   3.620 -    index = skip_prefix_bytes(buffer, sizeof(buffer));
   3.621 -    
   3.622 -    if ( type == TYPE_MOV_TO_CR )
   3.623 -    {
   3.624 -        inst_len = __get_instruction_length_from_list(
   3.625 -            v, list_a, ARRAY_SIZE(list_a), &buffer[index], &match);
   3.626 -    }
   3.627 -    else /* type == TYPE_MOV_FROM_CR */
   3.628 -    {
   3.629 -        inst_len = __get_instruction_length_from_list(
   3.630 -            v, list_b, ARRAY_SIZE(list_b), &buffer[index], &match);
   3.631 -    }
   3.632 -
   3.633 -    inst_len += index;
   3.634 -
   3.635 -    /* Check for REX prefix - it's ALWAYS the last byte of any prefix bytes */
   3.636 -    if (index > 0 && (buffer[index-1] & 0xF0) == 0x40)
   3.637 -        prefix = buffer[index-1];
   3.638 -
   3.639 -    HVM_DBG_LOG(DBG_LEVEL_1, "eip = %lx", (unsigned long)regs->eip);
   3.640 -
   3.641 -    switch ( match )
   3.642 -
   3.643 -    {
   3.644 -    case INSTR_MOV2CR:
   3.645 -        gpreg = decode_src_reg(prefix, buffer[index+2]);
   3.646 -        result = mov_to_cr(gpreg, cr, regs);
   3.647 -        break;
   3.648 -
   3.649 -    case INSTR_MOVCR2:
   3.650 -        gpreg = decode_src_reg(prefix, buffer[index+2]);
   3.651 -        mov_from_cr(cr, gpreg, regs);
   3.652 -        break;
   3.653 -
   3.654 -    case INSTR_CLTS:
   3.655 -        v->arch.hvm_vcpu.guest_cr[0] &= ~X86_CR0_TS;
   3.656 -        svm_update_guest_cr(v, 0);
   3.657 -        HVMTRACE_0D(CLTS, current);
   3.658 -        break;
   3.659 -
   3.660 -    case INSTR_LMSW:
   3.661 -        gpreg = decode_src_reg(prefix, buffer[index+2]);
   3.662 -        value = get_reg(gpreg, regs, vmcb) & 0xF;
   3.663 -        value = (v->arch.hvm_vcpu.guest_cr[0] & ~0xF) | value;
   3.664 -        result = !hvm_set_cr0(value);
   3.665 -        HVMTRACE_1D(LMSW, current, value);
   3.666 -        break;
   3.667 -
   3.668 -    case INSTR_SMSW:
   3.669 -        value = v->arch.hvm_vcpu.guest_cr[0] & 0xFFFF;
   3.670 -        modrm = buffer[index+2];
   3.671 -        addr_size = svm_guest_x86_mode(v);
   3.672 -        if ( addr_size < 2 )
   3.673 -            addr_size = 2;
   3.674 -        if ( likely((modrm & 0xC0) >> 6 == 3) )
   3.675 -        {
   3.676 -            gpreg = decode_src_reg(prefix, modrm);
   3.677 -            set_reg(gpreg, value, regs, vmcb);
   3.678 -        }
   3.679 -        /*
   3.680 -         * For now, only implement decode of the offset mode, since that's the
   3.681 -         * only mode observed in a real-world OS. This code is also making the
   3.682 -         * assumption that we'll never hit this code in long mode.
   3.683 -         */
   3.684 -        else if ( (modrm == 0x26) || (modrm == 0x25) )
   3.685 -        {   
   3.686 -            seg = x86_seg_ds;
   3.687 -            i = index;
   3.688 -            /* Segment or address size overrides? */
   3.689 -            while ( i-- )
   3.690 -            {
   3.691 -                switch ( buffer[i] )
   3.692 -                {
   3.693 -                   case 0x26: seg = x86_seg_es; break;
   3.694 -                   case 0x2e: seg = x86_seg_cs; break;
   3.695 -                   case 0x36: seg = x86_seg_ss; break;
   3.696 -                   case 0x64: seg = x86_seg_fs; break;
   3.697 -                   case 0x65: seg = x86_seg_gs; break;
   3.698 -                   case 0x67: addr_size ^= 6;   break;
   3.699 -                }
   3.700 -            }
   3.701 -            /* Bail unless this really is a seg_base + offset case */
   3.702 -            if ( ((modrm == 0x26) && (addr_size == 4)) ||
   3.703 -                 ((modrm == 0x25) && (addr_size == 2)) )
   3.704 -            {
   3.705 -                gdprintk(XENLOG_ERR, "SMSW emulation at guest address: "
   3.706 -                         "%lx failed due to unhandled addressing mode."
   3.707 -                         "ModRM byte was: %x \n", svm_rip2pointer(v), modrm);
   3.708 -                domain_crash(v->domain);
   3.709 -            }
   3.710 -            inst_len += addr_size;
   3.711 -            offset = *(( unsigned int *) ( void *) &buffer[index + 3]);
   3.712 -            offset = ( addr_size == 4 ) ? offset : ( offset & 0xFFFF );
   3.713 -            addr = hvm_get_segment_base(v, seg);
   3.714 -            addr += offset;
   3.715 -            result = (hvm_copy_to_guest_virt(addr, &value, 2)
   3.716 -                      != HVMCOPY_bad_gva_to_gfn);
   3.717 -        }
   3.718 -        else
   3.719 -        {
   3.720 -           gdprintk(XENLOG_ERR, "SMSW emulation at guest address: %lx "
   3.721 -                    "failed due to unhandled addressing mode!"
   3.722 -                    "ModRM byte was: %x \n", svm_rip2pointer(v), modrm);
   3.723 -           domain_crash(v->domain);
   3.724 -        }
   3.725 -        break;
   3.726 -
   3.727 -    default:
   3.728 -        BUG();
   3.729 -    }
   3.730 -
   3.731 -    if ( result )
   3.732 -        __update_guest_eip(regs, inst_len);
   3.733 -}
   3.734 -
   3.735  static int svm_msr_read_intercept(struct cpu_user_regs *regs)
   3.736  {
   3.737      u64 msr_content = 0;
   3.738 @@ -1899,68 +1211,12 @@ static void svm_vmexit_do_invalidate_cac
   3.739      __update_guest_eip(regs, inst_len);
   3.740  }
   3.741  
   3.742 -void svm_handle_invlpg(const short invlpga, struct cpu_user_regs *regs)
   3.743 +static void svm_invlpg_intercept(unsigned long vaddr)
   3.744  {
   3.745 -    struct vcpu *v = current;
   3.746 -    u8 opcode[MAX_INST_LEN], prefix, length = MAX_INST_LEN;
   3.747 -    unsigned long g_vaddr;
   3.748 -    int inst_len;
   3.749 -
   3.750 -    /* 
   3.751 -     * Unknown how many bytes the invlpg instruction will take.  Use the
   3.752 -     * maximum instruction length here
   3.753 -     */
   3.754 -    if ( inst_copy_from_guest(opcode, svm_rip2pointer(v), length) < length )
   3.755 -    {
   3.756 -        gdprintk(XENLOG_ERR, "Error reading memory %d bytes\n", length);
   3.757 -        goto crash;
   3.758 -    }
   3.759 -
   3.760 -    if ( invlpga )
   3.761 -    {
   3.762 -        inst_len = __get_instruction_length(v, INSTR_INVLPGA, opcode);
   3.763 -        __update_guest_eip(regs, inst_len);
   3.764 -
   3.765 -        /* 
   3.766 -         * The address is implicit on this instruction. At the moment, we don't
   3.767 -         * use ecx (ASID) to identify individual guests pages 
   3.768 -         */
   3.769 -        g_vaddr = regs->eax;
   3.770 -    }
   3.771 -    else
   3.772 -    {
   3.773 -        /* What about multiple prefix codes? */
   3.774 -        prefix = (is_prefix(opcode[0]) ? opcode[0] : 0);
   3.775 -        inst_len = __get_instruction_length(v, INSTR_INVLPG, opcode);
   3.776 -        if ( inst_len <= 0 )
   3.777 -        {
   3.778 -            gdprintk(XENLOG_ERR, "Error getting invlpg instr len\n");
   3.779 -            goto crash;
   3.780 -        }
   3.781 -
   3.782 -        inst_len--;
   3.783 -        length -= inst_len;
   3.784 -
   3.785 -        /* 
   3.786 -         * Decode memory operand of the instruction including ModRM, SIB, and
   3.787 -         * displacement to get effective address and length in bytes.  Assume
   3.788 -         * the system in either 32- or 64-bit mode.
   3.789 -         */
   3.790 -        g_vaddr = get_effective_addr_modrm64(regs, prefix, inst_len,
   3.791 -                                             &opcode[inst_len], &length);
   3.792 -
   3.793 -        inst_len += length;
   3.794 -        __update_guest_eip(regs, inst_len);
   3.795 -    }
   3.796 -
   3.797 -    HVMTRACE_3D(INVLPG, v, !!invlpga, g_vaddr, (invlpga ? regs->ecx : 0));
   3.798 -
   3.799 -    paging_invlpg(v, g_vaddr);
   3.800 -    svm_asid_g_invlpg(v, g_vaddr);
   3.801 -    return;
   3.802 -
   3.803 - crash:
   3.804 -    domain_crash(v->domain);
   3.805 +    struct vcpu *curr = current;
   3.806 +    HVMTRACE_2D(INVLPG, curr, 0, vaddr);
   3.807 +    paging_invlpg(curr, vaddr);
   3.808 +    svm_asid_g_invlpg(curr, vaddr);
   3.809  }
   3.810  
   3.811  asmlinkage void svm_vmexit_handler(struct cpu_user_regs *regs)
   3.812 @@ -2094,12 +1350,13 @@ asmlinkage void svm_vmexit_handler(struc
   3.813          svm_vmexit_do_hlt(vmcb, regs);
   3.814          break;
   3.815  
   3.816 +    case VMEXIT_CR0_READ ... VMEXIT_CR15_READ:
   3.817 +    case VMEXIT_CR0_WRITE ... VMEXIT_CR15_WRITE:
   3.818      case VMEXIT_INVLPG:
   3.819 -        svm_handle_invlpg(0, regs);
   3.820 -        break;
   3.821 -
   3.822      case VMEXIT_INVLPGA:
   3.823 -        svm_handle_invlpg(1, regs);
   3.824 +    case VMEXIT_IOIO:
   3.825 +        if ( !handle_mmio() )
   3.826 +            hvm_inject_exception(TRAP_gp_fault, 0, 0);
   3.827          break;
   3.828  
   3.829      case VMEXIT_VMMCALL:
   3.830 @@ -2114,25 +1371,11 @@ asmlinkage void svm_vmexit_handler(struc
   3.831          }
   3.832          break;
   3.833  
   3.834 -    case VMEXIT_CR0_READ ... VMEXIT_CR15_READ:
   3.835 -        svm_cr_access(v, exit_reason - VMEXIT_CR0_READ,
   3.836 -                      TYPE_MOV_FROM_CR, regs);
   3.837 -        break;
   3.838 -
   3.839 -    case VMEXIT_CR0_WRITE ... VMEXIT_CR15_WRITE:
   3.840 -        svm_cr_access(v, exit_reason - VMEXIT_CR0_WRITE,
   3.841 -                      TYPE_MOV_TO_CR, regs);
   3.842 -        break;
   3.843 -
   3.844      case VMEXIT_DR0_READ ... VMEXIT_DR7_READ:
   3.845      case VMEXIT_DR0_WRITE ... VMEXIT_DR7_WRITE:
   3.846          svm_dr_access(v, regs);
   3.847          break;
   3.848  
   3.849 -    case VMEXIT_IOIO:
   3.850 -        svm_io_instruction(v);
   3.851 -        break;
   3.852 -
   3.853      case VMEXIT_MSR:
   3.854          svm_do_msr_access(regs);
   3.855          break;
   3.856 @@ -2176,10 +1419,7 @@ asmlinkage void svm_vmexit_handler(struc
   3.857  
   3.858  asmlinkage void svm_trace_vmentry(void)
   3.859  {
   3.860 -    struct vcpu *v = current;
   3.861 -
   3.862 -    /* This is the last C code before the VMRUN instruction. */
   3.863 -    hvmtrace_vmentry(v);
   3.864 +    hvmtrace_vmentry(current);
   3.865  }
   3.866    
   3.867  /*
     4.1 --- a/xen/arch/x86/hvm/vmx/vmx.c	Thu Feb 21 15:06:37 2008 +0000
     4.2 +++ b/xen/arch/x86/hvm/vmx/vmx.c	Thu Feb 21 18:02:42 2008 +0000
     4.3 @@ -67,6 +67,7 @@ static void vmx_wbinvd_intercept(void);
     4.4  static void vmx_fpu_dirty_intercept(void);
     4.5  static int vmx_msr_read_intercept(struct cpu_user_regs *regs);
     4.6  static int vmx_msr_write_intercept(struct cpu_user_regs *regs);
     4.7 +static void vmx_invlpg_intercept(unsigned long vaddr);
     4.8  
     4.9  static int vmx_domain_initialise(struct domain *d)
    4.10  {
    4.11 @@ -701,35 +702,6 @@ static void vmx_ctxt_switch_to(struct vc
    4.12      vpmu_load(v);
    4.13  }
    4.14  
    4.15 -static unsigned long vmx_get_segment_base(struct vcpu *v, enum x86_segment seg)
    4.16 -{
    4.17 -    unsigned long base = 0;
    4.18 -    int long_mode = 0;
    4.19 -
    4.20 -    ASSERT(v == current);
    4.21 -
    4.22 -    if ( hvm_long_mode_enabled(v) &&
    4.23 -         (__vmread(GUEST_CS_AR_BYTES) & X86_SEG_AR_CS_LM_ACTIVE) )
    4.24 -        long_mode = 1;
    4.25 -
    4.26 -    switch ( seg )
    4.27 -    {
    4.28 -    case x86_seg_cs: if ( !long_mode ) base = __vmread(GUEST_CS_BASE); break;
    4.29 -    case x86_seg_ds: if ( !long_mode ) base = __vmread(GUEST_DS_BASE); break;
    4.30 -    case x86_seg_es: if ( !long_mode ) base = __vmread(GUEST_ES_BASE); break;
    4.31 -    case x86_seg_fs: base = __vmread(GUEST_FS_BASE); break;
    4.32 -    case x86_seg_gs: base = __vmread(GUEST_GS_BASE); break;
    4.33 -    case x86_seg_ss: if ( !long_mode ) base = __vmread(GUEST_SS_BASE); break;
    4.34 -    case x86_seg_tr: base = __vmread(GUEST_TR_BASE); break;
    4.35 -    case x86_seg_gdtr: base = __vmread(GUEST_GDTR_BASE); break;
    4.36 -    case x86_seg_idtr: base = __vmread(GUEST_IDTR_BASE); break;
    4.37 -    case x86_seg_ldtr: base = __vmread(GUEST_LDTR_BASE); break;
    4.38 -    default: BUG(); break;
    4.39 -    }
    4.40 -
    4.41 -    return base;
    4.42 -}
    4.43 -
    4.44  static void vmx_get_segment_register(struct vcpu *v, enum x86_segment seg,
    4.45                                       struct segment_register *reg)
    4.46  {
    4.47 @@ -1068,7 +1040,6 @@ static struct hvm_function_table vmx_fun
    4.48      .load_cpu_ctxt        = vmx_load_vmcs_ctxt,
    4.49      .interrupt_blocked    = vmx_interrupt_blocked,
    4.50      .guest_x86_mode       = vmx_guest_x86_mode,
    4.51 -    .get_segment_base     = vmx_get_segment_base,
    4.52      .get_segment_register = vmx_get_segment_register,
    4.53      .set_segment_register = vmx_set_segment_register,
    4.54      .update_host_cr3      = vmx_update_host_cr3,
    4.55 @@ -1086,7 +1057,8 @@ static struct hvm_function_table vmx_fun
    4.56      .wbinvd_intercept     = vmx_wbinvd_intercept,
    4.57      .fpu_dirty_intercept  = vmx_fpu_dirty_intercept,
    4.58      .msr_read_intercept   = vmx_msr_read_intercept,
    4.59 -    .msr_write_intercept  = vmx_msr_write_intercept
    4.60 +    .msr_write_intercept  = vmx_msr_write_intercept,
    4.61 +    .invlpg_intercept     = vmx_invlpg_intercept
    4.62  };
    4.63  
    4.64  void start_vmx(void)
    4.65 @@ -1261,452 +1233,11 @@ static void vmx_dr_access(unsigned long 
    4.66      __vmwrite(CPU_BASED_VM_EXEC_CONTROL, v->arch.hvm_vmx.exec_control);
    4.67  }
    4.68  
    4.69 -/*
    4.70 - * Invalidate the TLB for va. Invalidate the shadow page corresponding
    4.71 - * the address va.
    4.72 - */
    4.73 -static void vmx_do_invlpg(unsigned long va)
    4.74 -{
    4.75 -    struct vcpu *v = current;
    4.76 -
    4.77 -    HVMTRACE_2D(INVLPG, v, /*invlpga=*/ 0, va);
    4.78 -
    4.79 -    /*
    4.80 -     * We do the safest things first, then try to update the shadow
    4.81 -     * copying from guest
    4.82 -     */
    4.83 -    paging_invlpg(v, va);
    4.84 -}
    4.85 -
    4.86 -/* Get segment for OUTS according to guest instruction. */
    4.87 -static enum x86_segment vmx_outs_get_segment(
    4.88 -    int long_mode, unsigned long eip, int inst_len)
    4.89 -{
    4.90 -    unsigned char inst[MAX_INST_LEN];
    4.91 -    enum x86_segment seg = x86_seg_ds;
    4.92 -    int i;
    4.93 -    extern int inst_copy_from_guest(unsigned char *, unsigned long, int);
    4.94 -
    4.95 -    if ( likely(cpu_has_vmx_ins_outs_instr_info) )
    4.96 -    {
    4.97 -        unsigned int instr_info = __vmread(VMX_INSTRUCTION_INFO);
    4.98 -
    4.99 -        /* Get segment register according to bits 17:15. */
   4.100 -        switch ( (instr_info >> 15) & 7 )
   4.101 -        {
   4.102 -        case 0: seg = x86_seg_es; break;
   4.103 -        case 1: seg = x86_seg_cs; break;
   4.104 -        case 2: seg = x86_seg_ss; break;
   4.105 -        case 3: seg = x86_seg_ds; break;
   4.106 -        case 4: seg = x86_seg_fs; break;
   4.107 -        case 5: seg = x86_seg_gs; break;
   4.108 -        default: BUG();
   4.109 -        }
   4.110 -
   4.111 -        goto out;
   4.112 -    }
   4.113 -
   4.114 -    if ( !long_mode )
   4.115 -        eip += __vmread(GUEST_CS_BASE);
   4.116 -
   4.117 -    memset(inst, 0, MAX_INST_LEN);
   4.118 -    if ( inst_copy_from_guest(inst, eip, inst_len) != inst_len )
   4.119 -    {
   4.120 -        gdprintk(XENLOG_ERR, "Get guest instruction failed\n");
   4.121 -        domain_crash(current->domain);
   4.122 -        goto out;
   4.123 -    }
   4.124 -
   4.125 -    for ( i = 0; i < inst_len; i++ )
   4.126 -    {
   4.127 -        switch ( inst[i] )
   4.128 -        {
   4.129 -        case 0xf3: /* REPZ */
   4.130 -        case 0xf2: /* REPNZ */
   4.131 -        case 0xf0: /* LOCK */
   4.132 -        case 0x66: /* data32 */
   4.133 -        case 0x67: /* addr32 */
   4.134 -#ifdef __x86_64__
   4.135 -        case 0x40 ... 0x4f: /* REX */
   4.136 -#endif
   4.137 -            continue;
   4.138 -        case 0x2e: /* CS */
   4.139 -            seg = x86_seg_cs;
   4.140 -            continue;
   4.141 -        case 0x36: /* SS */
   4.142 -            seg = x86_seg_ss;
   4.143 -            continue;
   4.144 -        case 0x26: /* ES */
   4.145 -            seg = x86_seg_es;
   4.146 -            continue;
   4.147 -        case 0x64: /* FS */
   4.148 -            seg = x86_seg_fs;
   4.149 -            continue;
   4.150 -        case 0x65: /* GS */
   4.151 -            seg = x86_seg_gs;
   4.152 -            continue;
   4.153 -        case 0x3e: /* DS */
   4.154 -            seg = x86_seg_ds;
   4.155 -            continue;
   4.156 -        }
   4.157 -    }
   4.158 -
   4.159 - out:
   4.160 -    return seg;
   4.161 -}
   4.162 -
   4.163 -static int vmx_str_pio_check_descriptor(int long_mode, unsigned long eip,
   4.164 -                                        int inst_len, enum x86_segment seg,
   4.165 -                                        unsigned long *base, u32 *limit,
   4.166 -                                        u32 *ar_bytes)
   4.167 -{
   4.168 -    enum vmcs_field ar_field, base_field, limit_field;
   4.169 -
   4.170 -    *base = 0;
   4.171 -    *limit = 0;
   4.172 -    if ( seg != x86_seg_es )
   4.173 -        seg = vmx_outs_get_segment(long_mode, eip, inst_len);
   4.174 -
   4.175 -    switch ( seg )
   4.176 -    {
   4.177 -    case x86_seg_cs:
   4.178 -        ar_field = GUEST_CS_AR_BYTES;
   4.179 -        base_field = GUEST_CS_BASE;
   4.180 -        limit_field = GUEST_CS_LIMIT;
   4.181 -        break;
   4.182 -    case x86_seg_ds:
   4.183 -        ar_field = GUEST_DS_AR_BYTES;
   4.184 -        base_field = GUEST_DS_BASE;
   4.185 -        limit_field = GUEST_DS_LIMIT;
   4.186 -        break;
   4.187 -    case x86_seg_es:
   4.188 -        ar_field = GUEST_ES_AR_BYTES;
   4.189 -        base_field = GUEST_ES_BASE;
   4.190 -        limit_field = GUEST_ES_LIMIT;
   4.191 -        break;
   4.192 -    case x86_seg_fs:
   4.193 -        ar_field = GUEST_FS_AR_BYTES;
   4.194 -        base_field = GUEST_FS_BASE;
   4.195 -        limit_field = GUEST_FS_LIMIT;
   4.196 -        break;
   4.197 -    case x86_seg_gs:
   4.198 -        ar_field = GUEST_GS_AR_BYTES;
   4.199 -        base_field = GUEST_GS_BASE;
   4.200 -        limit_field = GUEST_GS_LIMIT;
   4.201 -        break;
   4.202 -    case x86_seg_ss:
   4.203 -        ar_field = GUEST_SS_AR_BYTES;
   4.204 -        base_field = GUEST_SS_BASE;
   4.205 -        limit_field = GUEST_SS_LIMIT;
   4.206 -        break;
   4.207 -    default:
   4.208 -        BUG();
   4.209 -        return 0;
   4.210 -    }
   4.211 -
   4.212 -    if ( !long_mode || seg == x86_seg_fs || seg == x86_seg_gs )
   4.213 -    {
   4.214 -        *base = __vmread(base_field);
   4.215 -        *limit = __vmread(limit_field);
   4.216 -    }
   4.217 -    *ar_bytes = __vmread(ar_field);
   4.218 -
   4.219 -    return !(*ar_bytes & X86_SEG_AR_SEG_UNUSABLE);
   4.220 -}
   4.221 -
   4.222 -
   4.223 -static int vmx_str_pio_check_limit(u32 limit, unsigned int size,
   4.224 -                                   u32 ar_bytes, unsigned long addr,
   4.225 -                                   unsigned long base, int df,
   4.226 -                                   unsigned long *count)
   4.227 -{
   4.228 -    unsigned long ea = addr - base;
   4.229 -
   4.230 -    /* Offset must be within limits. */
   4.231 -    ASSERT(ea == (u32)ea);
   4.232 -    if ( (u32)(ea + size - 1) < (u32)ea ||
   4.233 -         (ar_bytes & 0xc) != 0x4 ? ea + size - 1 > limit
   4.234 -                                 : ea <= limit )
   4.235 -        return 0;
   4.236 -
   4.237 -    /* Check the limit for repeated instructions, as above we checked
   4.238 -       only the first instance. Truncate the count if a limit violation
   4.239 -       would occur. Note that the checking is not necessary for page
   4.240 -       granular segments as transfers crossing page boundaries will be
   4.241 -       broken up anyway. */
   4.242 -    if ( !(ar_bytes & X86_SEG_AR_GRANULARITY) && *count > 1 )
   4.243 -    {
   4.244 -        if ( (ar_bytes & 0xc) != 0x4 )
   4.245 -        {
   4.246 -            /* expand-up */
   4.247 -            if ( !df )
   4.248 -            {
   4.249 -                if ( ea + *count * size - 1 < ea ||
   4.250 -                     ea + *count * size - 1 > limit )
   4.251 -                    *count = (limit + 1UL - ea) / size;
   4.252 -            }
   4.253 -            else
   4.254 -            {
   4.255 -                if ( *count - 1 > ea / size )
   4.256 -                    *count = ea / size + 1;
   4.257 -            }
   4.258 -        }
   4.259 -        else
   4.260 -        {
   4.261 -            /* expand-down */
   4.262 -            if ( !df )
   4.263 -            {
   4.264 -                if ( *count - 1 > -(s32)ea / size )
   4.265 -                    *count = -(s32)ea / size + 1UL;
   4.266 -            }
   4.267 -            else
   4.268 -            {
   4.269 -                if ( ea < (*count - 1) * size ||
   4.270 -                     ea - (*count - 1) * size <= limit )
   4.271 -                    *count = (ea - limit - 1) / size + 1;
   4.272 -            }
   4.273 -        }
   4.274 -        ASSERT(*count);
   4.275 -    }
   4.276 -
   4.277 -    return 1;
   4.278 -}
   4.279 -
   4.280 -#ifdef __x86_64__
   4.281 -static int vmx_str_pio_lm_check_limit(struct cpu_user_regs *regs,
   4.282 -                                      unsigned int size,
   4.283 -                                      unsigned long addr,
   4.284 -                                      unsigned long *count)
   4.285 +static void vmx_invlpg_intercept(unsigned long vaddr)
   4.286  {
   4.287 -    if ( !is_canonical_address(addr) ||
   4.288 -         !is_canonical_address(addr + size - 1) )
   4.289 -        return 0;
   4.290 -
   4.291 -    if ( *count > (1UL << 48) / size )
   4.292 -        *count = (1UL << 48) / size;
   4.293 -
   4.294 -    if ( !(regs->eflags & EF_DF) )
   4.295 -    {
   4.296 -        if ( addr + *count * size - 1 < addr ||
   4.297 -             !is_canonical_address(addr + *count * size - 1) )
   4.298 -            *count = (addr & ~((1UL << 48) - 1)) / size;
   4.299 -    }
   4.300 -    else
   4.301 -    {
   4.302 -        if ( (*count - 1) * size > addr ||
   4.303 -             !is_canonical_address(addr + (*count - 1) * size) )
   4.304 -            *count = (addr & ~((1UL << 48) - 1)) / size + 1;
   4.305 -    }
   4.306 -
   4.307 -    ASSERT(*count);
   4.308 -
   4.309 -    return 1;
   4.310 -}
   4.311 -#endif
   4.312 -
   4.313 -static void vmx_send_str_pio(struct cpu_user_regs *regs,
   4.314 -                             struct hvm_io_op *pio_opp,
   4.315 -                             unsigned long inst_len, unsigned int port,
   4.316 -                             int sign, unsigned int size, int dir,
   4.317 -                             int df, unsigned long addr,
   4.318 -                             paddr_t paddr, unsigned long count)
   4.319 -{
   4.320 -    /*
   4.321 -     * Handle string pio instructions that cross pages or that
   4.322 -     * are unaligned. See the comments in hvm_domain.c/handle_mmio()
   4.323 -     */
   4.324 -    if ( (addr & PAGE_MASK) != ((addr + size - 1) & PAGE_MASK) ) {
   4.325 -        unsigned long value = 0;
   4.326 -
   4.327 -        pio_opp->flags |= OVERLAP;
   4.328 -
   4.329 -        if ( dir == IOREQ_WRITE )   /* OUTS */
   4.330 -        {
   4.331 -            if ( hvm_paging_enabled(current) )
   4.332 -            {
   4.333 -                int rv = hvm_copy_from_guest_virt(&value, addr, size);
   4.334 -                if ( rv == HVMCOPY_bad_gva_to_gfn )
   4.335 -                    return; /* exception already injected */
   4.336 -            }
   4.337 -            else
   4.338 -                (void)hvm_copy_from_guest_phys(&value, addr, size);
   4.339 -        }
   4.340 -        else /* dir != IOREQ_WRITE */
   4.341 -            /* Remember where to write the result, as a *VA*.
   4.342 -             * Must be a VA so we can handle the page overlap
   4.343 -             * correctly in hvm_pio_assist() */
   4.344 -            pio_opp->addr = addr;
   4.345 -
   4.346 -        if ( count == 1 )
   4.347 -            regs->eip += inst_len;
   4.348 -
   4.349 -        send_pio_req(port, 1, size, value, dir, df, 0);
   4.350 -    } else {
   4.351 -        unsigned long last_addr = sign > 0 ? addr + count * size - 1
   4.352 -                                           : addr - (count - 1) * size;
   4.353 -
   4.354 -        if ( (addr & PAGE_MASK) != (last_addr & PAGE_MASK) )
   4.355 -        {
   4.356 -            if ( sign > 0 )
   4.357 -                count = (PAGE_SIZE - (addr & ~PAGE_MASK)) / size;
   4.358 -            else
   4.359 -                count = (addr & ~PAGE_MASK) / size + 1;
   4.360 -        } else
   4.361 -            regs->eip += inst_len;
   4.362 -
   4.363 -        send_pio_req(port, count, size, paddr, dir, df, 1);
   4.364 -    }
   4.365 -}
   4.366 -
   4.367 -static void vmx_do_str_pio(unsigned long exit_qualification,
   4.368 -                           unsigned long inst_len,
   4.369 -                           struct cpu_user_regs *regs,
   4.370 -                           struct hvm_io_op *pio_opp)
   4.371 -{
   4.372 -    unsigned int port, size;
   4.373 -    int dir, df, vm86;
   4.374 -    unsigned long addr, count = 1, base;
   4.375 -    paddr_t paddr;
   4.376 -    unsigned long gfn;
   4.377 -    u32 ar_bytes, limit, pfec;
   4.378 -    int sign;
   4.379 -    int long_mode = 0;
   4.380 -
   4.381 -    vm86 = regs->eflags & X86_EFLAGS_VM ? 1 : 0;
   4.382 -    df = regs->eflags & X86_EFLAGS_DF ? 1 : 0;
   4.383 -
   4.384 -    if ( test_bit(6, &exit_qualification) )
   4.385 -        port = (exit_qualification >> 16) & 0xFFFF;
   4.386 -    else
   4.387 -        port = regs->edx & 0xffff;
   4.388 -
   4.389 -    size = (exit_qualification & 7) + 1;
   4.390 -    dir = test_bit(3, &exit_qualification); /* direction */
   4.391 -
   4.392 -    if ( dir == IOREQ_READ )
   4.393 -        HVMTRACE_2D(IO_READ,  current, port, size);
   4.394 -    else
   4.395 -        HVMTRACE_2D(IO_WRITE, current, port, size);
   4.396 -
   4.397 -    sign = regs->eflags & X86_EFLAGS_DF ? -1 : 1;
   4.398 -    ar_bytes = __vmread(GUEST_CS_AR_BYTES);
   4.399 -    if ( hvm_long_mode_enabled(current) &&
   4.400 -         (ar_bytes & X86_SEG_AR_CS_LM_ACTIVE) )
   4.401 -        long_mode = 1;
   4.402 -    addr = __vmread(GUEST_LINEAR_ADDRESS);
   4.403 -
   4.404 -    if ( test_bit(5, &exit_qualification) ) { /* "rep" prefix */
   4.405 -        pio_opp->flags |= REPZ;
   4.406 -        count = regs->ecx;
   4.407 -        if ( !long_mode &&
   4.408 -            (vm86 || !(ar_bytes & X86_SEG_AR_DEF_OP_SIZE)) )
   4.409 -            count &= 0xFFFF;
   4.410 -    }
   4.411 -
   4.412 -    /*
   4.413 -     * In protected mode, guest linear address is invalid if the
   4.414 -     * selector is null.
   4.415 -     */
   4.416 -    if ( !vmx_str_pio_check_descriptor(long_mode, regs->eip, inst_len,
   4.417 -                                       dir==IOREQ_WRITE ? x86_seg_ds :
   4.418 -                                       x86_seg_es, &base, &limit,
   4.419 -                                       &ar_bytes) ) {
   4.420 -        if ( !long_mode ) {
   4.421 -            vmx_inject_hw_exception(current, TRAP_gp_fault, 0);
   4.422 -            return;
   4.423 -        }
   4.424 -        addr = dir == IOREQ_WRITE ? base + regs->esi : regs->edi;
   4.425 -    }
   4.426 -
   4.427 -    if ( !long_mode )
   4.428 -    {
   4.429 -        /* Segment must be readable for outs and writeable for ins. */
   4.430 -        if ( ((dir == IOREQ_WRITE)
   4.431 -              ? ((ar_bytes & 0xa) == 0x8)
   4.432 -              : ((ar_bytes & 0xa) != 0x2)) ||
   4.433 -             !vmx_str_pio_check_limit(limit, size, ar_bytes,
   4.434 -                                      addr, base, df, &count) )
   4.435 -        {
   4.436 -            vmx_inject_hw_exception(current, TRAP_gp_fault, 0);
   4.437 -            return;
   4.438 -        }
   4.439 -    }
   4.440 -#ifdef __x86_64__
   4.441 -    else if ( !vmx_str_pio_lm_check_limit(regs, size, addr, &count) )
   4.442 -    {
   4.443 -        vmx_inject_hw_exception(current, TRAP_gp_fault, 0);
   4.444 -        return;
   4.445 -    }
   4.446 -#endif
   4.447 -
   4.448 -    /* Translate the address to a physical address */
   4.449 -    pfec = PFEC_page_present;
   4.450 -    if ( dir == IOREQ_READ ) /* Read from PIO --> write to RAM */
   4.451 -        pfec |= PFEC_write_access;
   4.452 -    if ( ((__vmread(GUEST_SS_AR_BYTES) >> 5) & 3) == 3 )
   4.453 -        pfec |= PFEC_user_mode;
   4.454 -    gfn = paging_gva_to_gfn(current, addr, &pfec);
   4.455 -    if ( gfn == INVALID_GFN )
   4.456 -    {
   4.457 -        /* The guest does not have the RAM address mapped.
   4.458 -         * Need to send in a page fault */
   4.459 -        vmx_inject_exception(TRAP_page_fault, pfec, addr);
   4.460 -        return;
   4.461 -    }
   4.462 -    paddr = (paddr_t)gfn << PAGE_SHIFT | (addr & ~PAGE_MASK);
   4.463 -
   4.464 -    vmx_send_str_pio(regs, pio_opp, inst_len, port, sign,
   4.465 -                     size, dir, df, addr, paddr, count);
   4.466 -}
   4.467 -
   4.468 -static void vmx_io_instruction(unsigned long exit_qualification,
   4.469 -                               unsigned long inst_len)
   4.470 -{
   4.471 -    struct cpu_user_regs *regs;
   4.472 -    struct hvm_io_op *pio_opp;
   4.473 -
   4.474 -    pio_opp = &current->arch.hvm_vcpu.io_op;
   4.475 -    pio_opp->instr = INSTR_PIO;
   4.476 -    pio_opp->flags = 0;
   4.477 -
   4.478 -    regs = &pio_opp->io_context;
   4.479 -
   4.480 -    /* Copy current guest state into io instruction state structure. */
   4.481 -    memcpy(regs, guest_cpu_user_regs(), HVM_CONTEXT_STACK_BYTES);
   4.482 -
   4.483 -    HVM_DBG_LOG(DBG_LEVEL_IO, "vm86 %d, eip=%x:%lx, "
   4.484 -                "exit_qualification = %lx",
   4.485 -                regs->eflags & X86_EFLAGS_VM ? 1 : 0,
   4.486 -                regs->cs, (unsigned long)regs->eip, exit_qualification);
   4.487 -
   4.488 -    if ( test_bit(4, &exit_qualification) ) /* string instrucation */
   4.489 -        vmx_do_str_pio(exit_qualification, inst_len, regs, pio_opp);
   4.490 -    else
   4.491 -    {
   4.492 -        unsigned int port, size;
   4.493 -        int dir, df;
   4.494 -
   4.495 -        df = regs->eflags & X86_EFLAGS_DF ? 1 : 0;
   4.496 -
   4.497 -        if ( test_bit(6, &exit_qualification) )
   4.498 -            port = (exit_qualification >> 16) & 0xFFFF;
   4.499 -        else
   4.500 -            port = regs->edx & 0xffff;
   4.501 -
   4.502 -        size = (exit_qualification & 7) + 1;
   4.503 -        dir = test_bit(3, &exit_qualification); /* direction */
   4.504 -
   4.505 -        if ( dir == IOREQ_READ )
   4.506 -            HVMTRACE_2D(IO_READ,  current, port, size);
   4.507 -        else
   4.508 -            HVMTRACE_3D(IO_WRITE, current, port, size, regs->eax);
   4.509 -
   4.510 -        if ( port == 0xe9 && dir == IOREQ_WRITE && size == 1 )
   4.511 -            hvm_print_line(current, regs->eax); /* guest debug output */
   4.512 -
   4.513 -        regs->eip += inst_len;
   4.514 -        send_pio_req(port, 1, size, regs->eax, dir, df, 0);
   4.515 -    }
   4.516 +    struct vcpu *curr = current;
   4.517 +    HVMTRACE_2D(INVLPG, curr, /*invlpga=*/ 0, vaddr);
   4.518 +    paging_invlpg(curr, vaddr);
   4.519  }
   4.520  
   4.521  #define CASE_SET_REG(REG, reg)      \
   4.522 @@ -2541,7 +2072,7 @@ asmlinkage void vmx_vmexit_handler(struc
   4.523          inst_len = __get_instruction_length(); /* Safe: INVLPG */
   4.524          __update_guest_eip(inst_len);
   4.525          exit_qualification = __vmread(EXIT_QUALIFICATION);
   4.526 -        vmx_do_invlpg(exit_qualification);
   4.527 +        vmx_invlpg_intercept(exit_qualification);
   4.528          break;
   4.529      }
   4.530      case EXIT_REASON_VMCALL:
   4.531 @@ -2570,11 +2101,6 @@ asmlinkage void vmx_vmexit_handler(struc
   4.532          exit_qualification = __vmread(EXIT_QUALIFICATION);
   4.533          vmx_dr_access(exit_qualification, regs);
   4.534          break;
   4.535 -    case EXIT_REASON_IO_INSTRUCTION:
   4.536 -        exit_qualification = __vmread(EXIT_QUALIFICATION);
   4.537 -        inst_len = __get_instruction_length(); /* Safe: IN, INS, OUT, OUTS */
   4.538 -        vmx_io_instruction(exit_qualification, inst_len);
   4.539 -        break;
   4.540      case EXIT_REASON_MSR_READ:
   4.541          inst_len = __get_instruction_length(); /* Safe: RDMSR */
   4.542          if ( vmx_msr_read_intercept(regs) == X86EMUL_OKAY )
   4.543 @@ -2603,15 +2129,11 @@ asmlinkage void vmx_vmexit_handler(struc
   4.544      case EXIT_REASON_TPR_BELOW_THRESHOLD:
   4.545          break;
   4.546  
   4.547 +    case EXIT_REASON_IO_INSTRUCTION:
   4.548      case EXIT_REASON_APIC_ACCESS:
   4.549 -    {
   4.550 -        unsigned long offset;
   4.551 -        exit_qualification = __vmread(EXIT_QUALIFICATION);
   4.552 -        offset = exit_qualification & 0x0fffUL;
   4.553          if ( !handle_mmio() )
   4.554              hvm_inject_exception(TRAP_gp_fault, 0, 0);
   4.555          break;
   4.556 -    }
   4.557  
   4.558      case EXIT_REASON_INVD:
   4.559      case EXIT_REASON_WBINVD:
   4.560 @@ -2632,9 +2154,7 @@ asmlinkage void vmx_vmexit_handler(struc
   4.561  
   4.562  asmlinkage void vmx_trace_vmentry(void)
   4.563  {
   4.564 -    struct vcpu *v = current;
   4.565 -    
   4.566 -    hvmtrace_vmentry(v);
   4.567 +    hvmtrace_vmentry(current);
   4.568  }
   4.569  
   4.570  /*
     5.1 --- a/xen/arch/x86/x86_emulate.c	Thu Feb 21 15:06:37 2008 +0000
     5.2 +++ b/xen/arch/x86/x86_emulate.c	Thu Feb 21 18:02:42 2008 +0000
     5.3 @@ -3036,6 +3036,17 @@ x86_emulate(
     5.4          struct segment_register reg;
     5.5          unsigned long base, limit, cr0, cr0w;
     5.6  
     5.7 +        if ( modrm == 0xdf ) /* invlpga */
     5.8 +        {
     5.9 +            generate_exception_if(in_realmode(ctxt, ops), EXC_UD);
    5.10 +            generate_exception_if(!mode_ring0(), EXC_GP);
    5.11 +            fail_if(ops->invlpg == NULL);
    5.12 +            if ( (rc = ops->invlpg(x86_seg_none, truncate_ea(_regs.eax),
    5.13 +                                   ctxt)) )
    5.14 +                goto done;
    5.15 +            break;
    5.16 +        }
    5.17 +
    5.18          switch ( modrm_reg & 7 )
    5.19          {
    5.20          case 0: /* sgdt */
    5.21 @@ -3096,6 +3107,13 @@ x86_emulate(
    5.22              if ( (rc = ops->write_cr(0, cr0, ctxt)) )
    5.23                  goto done;
    5.24              break;
    5.25 +        case 7: /* invlpg */
    5.26 +            generate_exception_if(!mode_ring0(), EXC_GP);
    5.27 +            generate_exception_if(ea.type != OP_MEM, EXC_UD);
    5.28 +            fail_if(ops->invlpg == NULL);
    5.29 +            if ( (rc = ops->invlpg(ea.mem.seg, ea.mem.off, ctxt)) )
    5.30 +                goto done;
    5.31 +            break;
    5.32          default:
    5.33              goto cannot_emulate;
    5.34          }
     6.1 --- a/xen/include/asm-x86/hvm/hvm.h	Thu Feb 21 15:06:37 2008 +0000
     6.2 +++ b/xen/include/asm-x86/hvm/hvm.h	Thu Feb 21 18:02:42 2008 +0000
     6.3 @@ -81,7 +81,6 @@ struct hvm_function_table {
     6.4       */
     6.5      enum hvm_intblk (*interrupt_blocked)(struct vcpu *v, struct hvm_intack);
     6.6      int (*guest_x86_mode)(struct vcpu *v);
     6.7 -    unsigned long (*get_segment_base)(struct vcpu *v, enum x86_segment seg);
     6.8      void (*get_segment_register)(struct vcpu *v, enum x86_segment seg,
     6.9                                   struct segment_register *reg);
    6.10      void (*set_segment_register)(struct vcpu *v, enum x86_segment seg,
    6.11 @@ -126,6 +125,7 @@ struct hvm_function_table {
    6.12      void (*fpu_dirty_intercept)(void);
    6.13      int (*msr_read_intercept)(struct cpu_user_regs *regs);
    6.14      int (*msr_write_intercept)(struct cpu_user_regs *regs);
    6.15 +    void (*invlpg_intercept)(unsigned long vaddr);
    6.16  };
    6.17  
    6.18  extern struct hvm_function_table hvm_funcs;
    6.19 @@ -198,12 +198,6 @@ hvm_flush_guest_tlbs(void)
    6.20  void hvm_hypercall_page_initialise(struct domain *d,
    6.21                                     void *hypercall_page);
    6.22  
    6.23 -static inline unsigned long
    6.24 -hvm_get_segment_base(struct vcpu *v, enum x86_segment seg)
    6.25 -{
    6.26 -    return hvm_funcs.get_segment_base(v, seg);
    6.27 -}
    6.28 -
    6.29  static inline void
    6.30  hvm_get_segment_register(struct vcpu *v, enum x86_segment seg,
    6.31                           struct segment_register *reg)
    6.32 @@ -321,7 +315,10 @@ void hvm_task_switch(
    6.33      int32_t errcode);
    6.34  
    6.35  enum hvm_access_type {
    6.36 -    hvm_access_insn_fetch, hvm_access_read, hvm_access_write
    6.37 +    hvm_access_insn_fetch,
    6.38 +    hvm_access_none,
    6.39 +    hvm_access_read,
    6.40 +    hvm_access_write
    6.41  };
    6.42  int hvm_virtual_to_linear_addr(
    6.43      enum x86_segment seg,
     7.1 --- a/xen/include/asm-x86/hvm/svm/emulate.h	Thu Feb 21 15:06:37 2008 +0000
     7.2 +++ b/xen/include/asm-x86/hvm/svm/emulate.h	Thu Feb 21 18:02:42 2008 +0000
     7.3 @@ -15,31 +15,11 @@
     7.4   * You should have received a copy of the GNU General Public License along with
     7.5   * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
     7.6   * Place - Suite 330, Boston, MA 02111-1307 USA.
     7.7 - *
     7.8   */
     7.9  
    7.10  #ifndef __ASM_X86_HVM_SVM_EMULATE_H__
    7.11  #define __ASM_X86_HVM_SVM_EMULATE_H__
    7.12  
    7.13 -typedef enum OPERATING_MODE_ {
    7.14 -    INVALID_OPERATING_MODE = -1,
    7.15 -    LEGACY_MODE,
    7.16 -    LEGACY_16BIT,
    7.17 -    LONG_MODE,
    7.18 -    COMP_MODE,
    7.19 -    COMP_16BIT,
    7.20 -    OPMODE_16BIT,
    7.21 -
    7.22 -    LEGACY_32BIT,
    7.23 -    COMP_32BIT,
    7.24 -    OPMODE_32BIT,
    7.25 -
    7.26 -    LONG_64BIT,
    7.27 -    UNKNOWN_OP_MODE,
    7.28 -    NUM_OPERATING_MODES
    7.29 -} OPERATING_MODE;
    7.30 -
    7.31 -
    7.32  /* Enumerate some standard instructions that we support */
    7.33  enum instruction_index {
    7.34      INSTR_INVD,
    7.35 @@ -47,49 +27,16 @@ enum instruction_index {
    7.36      INSTR_CPUID,
    7.37      INSTR_RDMSR,
    7.38      INSTR_WRMSR,
    7.39 -    INSTR_CLI,
    7.40 -    INSTR_STI,
    7.41 -    INSTR_RDPMC,
    7.42 -    INSTR_CLGI,
    7.43 -    INSTR_STGI,
    7.44 -    INSTR_VMRUN,
    7.45 -    INSTR_VMLOAD,
    7.46 -    INSTR_VMSAVE,
    7.47      INSTR_VMCALL,
    7.48 -    INSTR_PAUSE,
    7.49 -    INSTR_SKINIT,
    7.50 -    INSTR_MOV2CR, /* Mov register to CR */
    7.51 -    INSTR_MOVCR2, /* Not MOV CR2, but MOV CRn to register  */
    7.52 -    INSTR_MOV2DR,
    7.53 -    INSTR_MOVDR2,
    7.54 -    INSTR_PUSHF,
    7.55 -    INSTR_POPF,
    7.56 -    INSTR_RSM,
    7.57 -    INSTR_INVLPG,
    7.58 -    INSTR_INVLPGA,
    7.59      INSTR_HLT,
    7.60 -    INSTR_CLTS,
    7.61 -    INSTR_LMSW,
    7.62 -    INSTR_SMSW,
    7.63      INSTR_INT3,
    7.64      INSTR_MAX_COUNT /* Must be last - Number of instructions supported */
    7.65  };
    7.66  
    7.67 -
    7.68 -extern unsigned long get_effective_addr_modrm64(
    7.69 -        struct cpu_user_regs *regs, const u8 prefix, int inst_len,
    7.70 -        const u8 *operand, u8 *size);
    7.71 -extern unsigned long get_effective_addr_sib(struct vmcb_struct *vmcb, 
    7.72 -        struct cpu_user_regs *regs, const u8 prefix, const u8 *operand, 
    7.73 -        u8 *size);
    7.74 -extern OPERATING_MODE get_operating_mode (struct vmcb_struct *vmcb);
    7.75 -extern unsigned int decode_dest_reg(u8 prefix, u8 modrm);
    7.76 -extern unsigned int decode_src_reg(u8 prefix, u8 modrm);
    7.77 -extern unsigned long svm_rip2pointer(struct vcpu *v);
    7.78 -extern int __get_instruction_length_from_list(struct vcpu *v,
    7.79 -        enum instruction_index *list, unsigned int list_count, 
    7.80 -        u8 *guest_eip_buf, enum instruction_index *match);
    7.81 -
    7.82 +int __get_instruction_length_from_list(
    7.83 +    struct vcpu *v,
    7.84 +    enum instruction_index *list, unsigned int list_count, 
    7.85 +    u8 *guest_eip_buf, enum instruction_index *match);
    7.86  
    7.87  static inline int __get_instruction_length(struct vcpu *v, 
    7.88          enum instruction_index instr, u8 *guest_eip_buf)
    7.89 @@ -98,38 +45,6 @@ static inline int __get_instruction_leng
    7.90          v, &instr, 1, guest_eip_buf, NULL);
    7.91  }
    7.92  
    7.93 -
    7.94 -static inline unsigned int is_prefix(u8 opc)
    7.95 -{
    7.96 -    switch ( opc ) {
    7.97 -    case 0x66:
    7.98 -    case 0x67:
    7.99 -    case 0x2E:
   7.100 -    case 0x3E:
   7.101 -    case 0x26:
   7.102 -    case 0x64:
   7.103 -    case 0x65:
   7.104 -    case 0x36:
   7.105 -    case 0xF0:
   7.106 -    case 0xF3:
   7.107 -    case 0xF2:
   7.108 -#if __x86_64__
   7.109 -    case 0x40 ... 0x4f:
   7.110 -#endif /* __x86_64__ */
   7.111 -        return 1;
   7.112 -    }
   7.113 -    return 0;
   7.114 -}
   7.115 -
   7.116 -
   7.117 -static inline int skip_prefix_bytes(u8 *buf, size_t size)
   7.118 -{
   7.119 -    int index;
   7.120 -    for ( index = 0; index < size && is_prefix(buf[index]); index++ )
   7.121 -        continue;
   7.122 -    return index;
   7.123 -}
   7.124 -
   7.125  #endif /* __ASM_X86_HVM_SVM_EMULATE_H__ */
   7.126  
   7.127  /*
     8.1 --- a/xen/include/asm-x86/x86_emulate.h	Thu Feb 21 15:06:37 2008 +0000
     8.2 +++ b/xen/include/asm-x86/x86_emulate.h	Thu Feb 21 18:02:42 2008 +0000
     8.3 @@ -354,6 +354,12 @@ struct x86_emulate_ops
     8.4      /* load_fpu_ctxt: Load emulated environment's FPU state onto processor. */
     8.5      void (*load_fpu_ctxt)(
     8.6          struct x86_emulate_ctxt *ctxt);
     8.7 +
     8.8 +    /* invlpg: Invalidate paging structures which map addressed byte. */
     8.9 +    int (*invlpg)(
    8.10 +        enum x86_segment seg,
    8.11 +        unsigned long offset,
    8.12 +        struct x86_emulate_ctxt *ctxt);
    8.13  };
    8.14  
    8.15  struct cpu_user_regs;