LCOV - code coverage report
Current view: top level - x86_instruction_emulator/x86_emulate - x86_emulate.c (source / functions) Hit Total Coverage
Test: trace.lcov_info_final Lines: 3105 3307 93.9 %
Date: 2017-04-07 10:24:39 Functions: 36 39 92.3 %

          Line data    Source code
       1             : /******************************************************************************
       2             :  * x86_emulate.c
       3             :  * 
       4             :  * Generic x86 (32-bit and 64-bit) instruction decoder and emulator.
       5             :  * 
       6             :  * Copyright (c) 2005-2007 Keir Fraser
       7             :  * Copyright (c) 2005-2007 XenSource Inc.
       8             :  * 
       9             :  * This program is free software; you can redistribute it and/or modify
      10             :  * it under the terms of the GNU General Public License as published by
      11             :  * the Free Software Foundation; either version 2 of the License, or
      12             :  * (at your option) any later version.
      13             :  * 
      14             :  * This program is distributed in the hope that it will be useful,
      15             :  * but WITHOUT ANY WARRANTY; without even the implied warranty of
      16             :  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
      17             :  * GNU General Public License for more details.
      18             :  * 
      19             :  * You should have received a copy of the GNU General Public License
      20             :  * along with this program; If not, see <http://www.gnu.org/licenses/>.
      21             :  */
      22             : 
      23             : /* Operand sizes: 8-bit operands or specified/overridden size. */
      24             : #define ByteOp      (1<<0) /* 8-bit operands. */
      25             : /* Destination operand type. */
      26             : #define DstNone     (0<<1) /* No destination operand. */
      27             : #define DstImplicit (0<<1) /* Destination operand is implicit in the opcode. */
      28             : #define DstBitBase  (1<<1) /* Memory operand, bit string. */
      29             : #define DstReg      (2<<1) /* Register operand. */
      30             : #define DstEax      DstReg /* Register EAX (aka DstReg with no ModRM) */
      31             : #define DstMem      (3<<1) /* Memory operand. */
      32             : #define DstMask     (3<<1)
      33             : /* Source operand type. */
      34             : #define SrcNone     (0<<3) /* No source operand. */
      35             : #define SrcImplicit (0<<3) /* Source operand is implicit in the opcode. */
      36             : #define SrcReg      (1<<3) /* Register operand. */
      37             : #define SrcEax      SrcReg /* Register EAX (aka SrcReg with no ModRM) */
      38             : #define SrcMem      (2<<3) /* Memory operand. */
      39             : #define SrcMem16    (3<<3) /* Memory operand (16-bit). */
      40             : #define SrcImm      (4<<3) /* Immediate operand. */
      41             : #define SrcImmByte  (5<<3) /* 8-bit sign-extended immediate operand. */
      42             : #define SrcImm16    (6<<3) /* 16-bit zero-extended immediate operand. */
      43             : #define SrcMask     (7<<3)
      44             : /* Generic ModRM decode. */
      45             : #define ModRM       (1<<6)
      46             : /* vSIB addressing mode (0f38 extension opcodes only), aliasing ModRM. */
      47             : #define vSIB        (1<<6)
      48             : /* Destination is only written; never read. */
      49             : #define Mov         (1<<7)
      50             : /* VEX/EVEX (SIMD only): 2nd source operand unused (must be all ones) */
      51             : #define TwoOp       Mov
      52             : /* All operands are implicit in the opcode. */
      53             : #define ImplicitOps (DstImplicit|SrcImplicit)
      54             : 
      55             : typedef uint8_t opcode_desc_t;
      56             : 
      57             : static const opcode_desc_t opcode_table[256] = {
      58             :     /* 0x00 - 0x07 */
      59             :     ByteOp|DstMem|SrcReg|ModRM, DstMem|SrcReg|ModRM,
      60             :     ByteOp|DstReg|SrcMem|ModRM, DstReg|SrcMem|ModRM,
      61             :     ByteOp|DstEax|SrcImm, DstEax|SrcImm, ImplicitOps|Mov, ImplicitOps|Mov,
      62             :     /* 0x08 - 0x0F */
      63             :     ByteOp|DstMem|SrcReg|ModRM, DstMem|SrcReg|ModRM,
      64             :     ByteOp|DstReg|SrcMem|ModRM, DstReg|SrcMem|ModRM,
      65             :     ByteOp|DstEax|SrcImm, DstEax|SrcImm, ImplicitOps|Mov, 0,
      66             :     /* 0x10 - 0x17 */
      67             :     ByteOp|DstMem|SrcReg|ModRM, DstMem|SrcReg|ModRM,
      68             :     ByteOp|DstReg|SrcMem|ModRM, DstReg|SrcMem|ModRM,
      69             :     ByteOp|DstEax|SrcImm, DstEax|SrcImm, ImplicitOps|Mov, ImplicitOps|Mov,
      70             :     /* 0x18 - 0x1F */
      71             :     ByteOp|DstMem|SrcReg|ModRM, DstMem|SrcReg|ModRM,
      72             :     ByteOp|DstReg|SrcMem|ModRM, DstReg|SrcMem|ModRM,
      73             :     ByteOp|DstEax|SrcImm, DstEax|SrcImm, ImplicitOps|Mov, ImplicitOps|Mov,
      74             :     /* 0x20 - 0x27 */
      75             :     ByteOp|DstMem|SrcReg|ModRM, DstMem|SrcReg|ModRM,
      76             :     ByteOp|DstReg|SrcMem|ModRM, DstReg|SrcMem|ModRM,
      77             :     ByteOp|DstEax|SrcImm, DstEax|SrcImm, 0, ImplicitOps,
      78             :     /* 0x28 - 0x2F */
      79             :     ByteOp|DstMem|SrcReg|ModRM, DstMem|SrcReg|ModRM,
      80             :     ByteOp|DstReg|SrcMem|ModRM, DstReg|SrcMem|ModRM,
      81             :     ByteOp|DstEax|SrcImm, DstEax|SrcImm, 0, ImplicitOps,
      82             :     /* 0x30 - 0x37 */
      83             :     ByteOp|DstMem|SrcReg|ModRM, DstMem|SrcReg|ModRM,
      84             :     ByteOp|DstReg|SrcMem|ModRM, DstReg|SrcMem|ModRM,
      85             :     ByteOp|DstEax|SrcImm, DstEax|SrcImm, 0, ImplicitOps,
      86             :     /* 0x38 - 0x3F */
      87             :     ByteOp|DstMem|SrcReg|ModRM, DstMem|SrcReg|ModRM,
      88             :     ByteOp|DstReg|SrcMem|ModRM, DstReg|SrcMem|ModRM,
      89             :     ByteOp|DstEax|SrcImm, DstEax|SrcImm, 0, ImplicitOps,
      90             :     /* 0x40 - 0x4F */
      91             :     ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps,
      92             :     ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps,
      93             :     ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps,
      94             :     ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps,
      95             :     /* 0x50 - 0x5F */
      96             :     ImplicitOps|Mov, ImplicitOps|Mov, ImplicitOps|Mov, ImplicitOps|Mov,
      97             :     ImplicitOps|Mov, ImplicitOps|Mov, ImplicitOps|Mov, ImplicitOps|Mov,
      98             :     ImplicitOps|Mov, ImplicitOps|Mov, ImplicitOps|Mov, ImplicitOps|Mov,
      99             :     ImplicitOps|Mov, ImplicitOps|Mov, ImplicitOps|Mov, ImplicitOps|Mov,
     100             :     /* 0x60 - 0x67 */
     101             :     ImplicitOps, ImplicitOps, DstReg|SrcMem|ModRM, DstReg|SrcNone|ModRM|Mov,
     102             :     0, 0, 0, 0,
     103             :     /* 0x68 - 0x6F */
     104             :     DstImplicit|SrcImm|Mov, DstReg|SrcImm|ModRM|Mov,
     105             :     DstImplicit|SrcImmByte|Mov, DstReg|SrcImmByte|ModRM|Mov,
     106             :     ImplicitOps|Mov, ImplicitOps|Mov, ImplicitOps|Mov, ImplicitOps|Mov,
     107             :     /* 0x70 - 0x77 */
     108             :     DstImplicit|SrcImmByte, DstImplicit|SrcImmByte,
     109             :     DstImplicit|SrcImmByte, DstImplicit|SrcImmByte,
     110             :     DstImplicit|SrcImmByte, DstImplicit|SrcImmByte,
     111             :     DstImplicit|SrcImmByte, DstImplicit|SrcImmByte,
     112             :     /* 0x78 - 0x7F */
     113             :     DstImplicit|SrcImmByte, DstImplicit|SrcImmByte,
     114             :     DstImplicit|SrcImmByte, DstImplicit|SrcImmByte,
     115             :     DstImplicit|SrcImmByte, DstImplicit|SrcImmByte,
     116             :     DstImplicit|SrcImmByte, DstImplicit|SrcImmByte,
     117             :     /* 0x80 - 0x87 */
     118             :     ByteOp|DstMem|SrcImm|ModRM, DstMem|SrcImm|ModRM,
     119             :     ByteOp|DstMem|SrcImm|ModRM, DstMem|SrcImmByte|ModRM,
     120             :     ByteOp|DstReg|SrcMem|ModRM, DstReg|SrcMem|ModRM,
     121             :     ByteOp|DstMem|SrcReg|ModRM, DstMem|SrcReg|ModRM,
     122             :     /* 0x88 - 0x8F */
     123             :     ByteOp|DstMem|SrcReg|ModRM|Mov, DstMem|SrcReg|ModRM|Mov,
     124             :     ByteOp|DstReg|SrcMem|ModRM|Mov, DstReg|SrcMem|ModRM|Mov,
     125             :     DstMem|SrcReg|ModRM|Mov, DstReg|SrcNone|ModRM,
     126             :     DstReg|SrcMem16|ModRM|Mov, DstMem|SrcNone|ModRM|Mov,
     127             :     /* 0x90 - 0x97 */
     128             :     DstImplicit|SrcEax, DstImplicit|SrcEax,
     129             :     DstImplicit|SrcEax, DstImplicit|SrcEax,
     130             :     DstImplicit|SrcEax, DstImplicit|SrcEax,
     131             :     DstImplicit|SrcEax, DstImplicit|SrcEax,
     132             :     /* 0x98 - 0x9F */
     133             :     ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps,
     134             :     ImplicitOps|Mov, ImplicitOps|Mov, ImplicitOps, ImplicitOps,
     135             :     /* 0xA0 - 0xA7 */
     136             :     ByteOp|DstEax|SrcMem|Mov, DstEax|SrcMem|Mov,
     137             :     ByteOp|DstMem|SrcEax|Mov, DstMem|SrcEax|Mov,
     138             :     ByteOp|ImplicitOps|Mov, ImplicitOps|Mov,
     139             :     ByteOp|ImplicitOps, ImplicitOps,
     140             :     /* 0xA8 - 0xAF */
     141             :     ByteOp|DstEax|SrcImm, DstEax|SrcImm,
     142             :     ByteOp|DstImplicit|SrcEax|Mov, DstImplicit|SrcEax|Mov,
     143             :     ByteOp|DstEax|SrcImplicit|Mov, DstEax|SrcImplicit|Mov,
     144             :     ByteOp|DstImplicit|SrcEax, DstImplicit|SrcEax,
     145             :     /* 0xB0 - 0xB7 */
     146             :     ByteOp|DstReg|SrcImm|Mov, ByteOp|DstReg|SrcImm|Mov,
     147             :     ByteOp|DstReg|SrcImm|Mov, ByteOp|DstReg|SrcImm|Mov,
     148             :     ByteOp|DstReg|SrcImm|Mov, ByteOp|DstReg|SrcImm|Mov,
     149             :     ByteOp|DstReg|SrcImm|Mov, ByteOp|DstReg|SrcImm|Mov,
     150             :     /* 0xB8 - 0xBF */
     151             :     DstReg|SrcImm|Mov, DstReg|SrcImm|Mov, DstReg|SrcImm|Mov, DstReg|SrcImm|Mov,
     152             :     DstReg|SrcImm|Mov, DstReg|SrcImm|Mov, DstReg|SrcImm|Mov, DstReg|SrcImm|Mov,
     153             :     /* 0xC0 - 0xC7 */
     154             :     ByteOp|DstMem|SrcImm|ModRM, DstMem|SrcImmByte|ModRM,
     155             :     DstImplicit|SrcImm16, ImplicitOps,
     156             :     DstReg|SrcMem|ModRM|Mov, DstReg|SrcMem|ModRM|Mov,
     157             :     ByteOp|DstMem|SrcImm|ModRM|Mov, DstMem|SrcImm|ModRM|Mov,
     158             :     /* 0xC8 - 0xCF */
     159             :     DstImplicit|SrcImm16, ImplicitOps, DstImplicit|SrcImm16, ImplicitOps,
     160             :     ImplicitOps, DstImplicit|SrcImmByte, ImplicitOps, ImplicitOps,
     161             :     /* 0xD0 - 0xD7 */
     162             :     ByteOp|DstMem|SrcImplicit|ModRM, DstMem|SrcImplicit|ModRM,
     163             :     ByteOp|DstMem|SrcImplicit|ModRM, DstMem|SrcImplicit|ModRM,
     164             :     DstImplicit|SrcImmByte, DstImplicit|SrcImmByte, ImplicitOps, ImplicitOps,
     165             :     /* 0xD8 - 0xDF */
     166             :     ImplicitOps|ModRM, ImplicitOps|ModRM|Mov,
     167             :     ImplicitOps|ModRM, ImplicitOps|ModRM|Mov,
     168             :     ImplicitOps|ModRM, ImplicitOps|ModRM|Mov,
     169             :     DstImplicit|SrcMem16|ModRM, ImplicitOps|ModRM|Mov,
     170             :     /* 0xE0 - 0xE7 */
     171             :     DstImplicit|SrcImmByte, DstImplicit|SrcImmByte,
     172             :     DstImplicit|SrcImmByte, DstImplicit|SrcImmByte,
     173             :     DstEax|SrcImmByte, DstEax|SrcImmByte,
     174             :     DstImplicit|SrcImmByte, DstImplicit|SrcImmByte,
     175             :     /* 0xE8 - 0xEF */
     176             :     DstImplicit|SrcImm|Mov, DstImplicit|SrcImm,
     177             :     ImplicitOps, DstImplicit|SrcImmByte,
     178             :     DstEax|SrcImplicit, DstEax|SrcImplicit, ImplicitOps, ImplicitOps,
     179             :     /* 0xF0 - 0xF7 */
     180             :     0, ImplicitOps, 0, 0,
     181             :     ImplicitOps, ImplicitOps, ByteOp|ModRM, ModRM,
     182             :     /* 0xF8 - 0xFF */
     183             :     ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps,
     184             :     ImplicitOps, ImplicitOps, ByteOp|DstMem|SrcNone|ModRM, DstMem|SrcNone|ModRM
     185             : };
     186             : 
     187             : enum simd_opsize {
     188             :     simd_none,
     189             : 
     190             :     /*
     191             :      * Ordinary packed integers:
     192             :      * - 64 bits without prefix 66 (MMX)
     193             :      * - 128 bits with prefix 66 (SSEn)
     194             :      * - 128/256 bits depending on VEX.L (AVX)
     195             :      */
     196             :     simd_packed_int,
     197             : 
     198             :     /*
     199             :      * Ordinary packed/scalar floating point:
     200             :      * - 128 bits without prefix or with prefix 66 (SSEn)
     201             :      * - 128/256 bits depending on VEX.L (AVX)
     202             :      * - 32 bits with prefix F3 (scalar single)
     203             :      * - 64 bits with prefix F2 (scalar doubgle)
     204             :      */
     205             :     simd_any_fp,
     206             : 
     207             :     /*
     208             :      * Packed floating point:
     209             :      * - 128 bits without prefix or with prefix 66 (SSEn)
     210             :      * - 128/256 bits depending on VEX.L (AVX)
     211             :      */
     212             :     simd_packed_fp,
     213             : 
     214             :     /*
     215             :      * Single precision packed/scalar floating point:
     216             :      * - 128 bits without prefix (SSEn)
     217             :      * - 128/256 bits depending on VEX.L, no prefix (AVX)
     218             :      * - 32 bits with prefix F3 (scalar)
     219             :      */
     220             :     simd_single_fp,
     221             : 
     222             :     /*
     223             :      * Scalar floating point:
     224             :      * - 32 bits with low opcode bit clear (scalar single)
     225             :      * - 64 bits with low opcode bit set (scalar double)
     226             :      */
     227             :     simd_scalar_fp,
     228             : 
     229             :     /* Operand size encoded in non-standard way. */
     230             :     simd_other
     231             : };
     232             : typedef uint8_t simd_opsize_t;
     233             : 
     234             : static const struct {
     235             :     opcode_desc_t desc;
     236             :     simd_opsize_t size;
     237             : } twobyte_table[256] = {
     238             :     [0x00] = { ModRM },
     239             :     [0x01] = { ImplicitOps|ModRM },
     240             :     [0x02] = { DstReg|SrcMem16|ModRM },
     241             :     [0x03] = { DstReg|SrcMem16|ModRM },
     242             :     [0x05] = { ImplicitOps },
     243             :     [0x06] = { ImplicitOps },
     244             :     [0x07] = { ImplicitOps },
     245             :     [0x08] = { ImplicitOps },
     246             :     [0x09] = { ImplicitOps },
     247             :     [0x0b] = { ImplicitOps },
     248             :     [0x0d] = { ImplicitOps|ModRM },
     249             :     [0x0e] = { ImplicitOps },
     250             :     [0x0f] = { ModRM|SrcImmByte },
     251             :     [0x10] = { DstImplicit|SrcMem|ModRM|Mov, simd_any_fp },
     252             :     [0x11] = { DstMem|SrcImplicit|ModRM|Mov, simd_any_fp },
     253             :     [0x12] = { DstImplicit|SrcMem|ModRM|Mov, simd_other },
     254             :     [0x13] = { DstMem|SrcImplicit|ModRM|Mov, simd_other },
     255             :     [0x14 ... 0x15] = { DstImplicit|SrcMem|ModRM, simd_packed_fp },
     256             :     [0x16] = { DstImplicit|SrcMem|ModRM|Mov, simd_other },
     257             :     [0x17] = { DstMem|SrcImplicit|ModRM|Mov, simd_other },
     258             :     [0x18 ... 0x1f] = { ImplicitOps|ModRM },
     259             :     [0x20 ... 0x21] = { DstMem|SrcImplicit|ModRM },
     260             :     [0x22 ... 0x23] = { DstImplicit|SrcMem|ModRM },
     261             :     [0x28] = { DstImplicit|SrcMem|ModRM|Mov, simd_packed_fp },
     262             :     [0x29] = { DstMem|SrcImplicit|ModRM|Mov, simd_packed_fp },
     263             :     [0x2a] = { DstImplicit|SrcMem|ModRM|Mov, simd_other },
     264             :     [0x2b] = { DstMem|SrcImplicit|ModRM|Mov, simd_any_fp },
     265             :     [0x2c ... 0x2d] = { DstImplicit|SrcMem|ModRM|Mov, simd_other },
     266             :     [0x2e ... 0x2f] = { ImplicitOps|ModRM|TwoOp },
     267             :     [0x30 ... 0x35] = { ImplicitOps },
     268             :     [0x37] = { ImplicitOps },
     269             :     [0x38] = { DstReg|SrcMem|ModRM },
     270             :     [0x3a] = { DstReg|SrcImmByte|ModRM },
     271             :     [0x40 ... 0x4f] = { DstReg|SrcMem|ModRM|Mov },
     272             :     [0x50] = { DstReg|SrcImplicit|ModRM|Mov },
     273             :     [0x51] = { DstImplicit|SrcMem|ModRM|TwoOp, simd_any_fp },
     274             :     [0x52 ... 0x53] = { DstImplicit|SrcMem|ModRM|TwoOp, simd_single_fp },
     275             :     [0x54 ... 0x57] = { DstImplicit|SrcMem|ModRM, simd_packed_fp },
     276             :     [0x58 ... 0x59] = { DstImplicit|SrcMem|ModRM, simd_any_fp },
     277             :     [0x5a ... 0x5b] = { DstImplicit|SrcMem|ModRM|Mov, simd_other },
     278             :     [0x5c ... 0x5f] = { DstImplicit|SrcMem|ModRM, simd_any_fp },
     279             :     [0x60 ... 0x62] = { DstImplicit|SrcMem|ModRM, simd_other },
     280             :     [0x63 ... 0x67] = { DstImplicit|SrcMem|ModRM, simd_packed_int },
     281             :     [0x68 ... 0x6a] = { DstImplicit|SrcMem|ModRM, simd_other },
     282             :     [0x6b ... 0x6d] = { DstImplicit|SrcMem|ModRM, simd_packed_int },
     283             :     [0x6e] = { DstImplicit|SrcMem|ModRM|Mov },
     284             :     [0x6f] = { DstImplicit|SrcMem|ModRM|Mov, simd_packed_int },
     285             :     [0x70] = { SrcImmByte|ModRM|TwoOp, simd_other },
     286             :     [0x71 ... 0x73] = { DstImplicit|SrcImmByte|ModRM },
     287             :     [0x74 ... 0x76] = { DstImplicit|SrcMem|ModRM, simd_packed_int },
     288             :     [0x77] = { DstImplicit|SrcNone },
     289             :     [0x78] = { ImplicitOps|ModRM },
     290             :     [0x79] = { DstReg|SrcMem|ModRM, simd_packed_int },
     291             :     [0x7c ... 0x7d] = { DstImplicit|SrcMem|ModRM, simd_other },
     292             :     [0x7e] = { DstMem|SrcImplicit|ModRM|Mov },
     293             :     [0x7f] = { DstMem|SrcImplicit|ModRM|Mov, simd_packed_int },
     294             :     [0x80 ... 0x8f] = { DstImplicit|SrcImm },
     295             :     [0x90 ... 0x9f] = { ByteOp|DstMem|SrcNone|ModRM|Mov },
     296             :     [0xa0 ... 0xa1] = { ImplicitOps|Mov },
     297             :     [0xa2] = { ImplicitOps },
     298             :     [0xa3] = { DstBitBase|SrcReg|ModRM },
     299             :     [0xa4] = { DstMem|SrcImmByte|ModRM },
     300             :     [0xa5] = { DstMem|SrcReg|ModRM },
     301             :     [0xa6 ... 0xa7] = { ModRM },
     302             :     [0xa8 ... 0xa9] = { ImplicitOps|Mov },
     303             :     [0xaa] = { ImplicitOps },
     304             :     [0xab] = { DstBitBase|SrcReg|ModRM },
     305             :     [0xac] = { DstMem|SrcImmByte|ModRM },
     306             :     [0xad] = { DstMem|SrcReg|ModRM },
     307             :     [0xae] = { ImplicitOps|ModRM },
     308             :     [0xaf] = { DstReg|SrcMem|ModRM },
     309             :     [0xb0] = { ByteOp|DstMem|SrcReg|ModRM },
     310             :     [0xb1] = { DstMem|SrcReg|ModRM },
     311             :     [0xb2] = { DstReg|SrcMem|ModRM|Mov },
     312             :     [0xb3] = { DstBitBase|SrcReg|ModRM },
     313             :     [0xb4 ... 0xb5] = { DstReg|SrcMem|ModRM|Mov },
     314             :     [0xb6] = { ByteOp|DstReg|SrcMem|ModRM|Mov },
     315             :     [0xb7] = { DstReg|SrcMem16|ModRM|Mov },
     316             :     [0xb8] = { DstReg|SrcMem|ModRM },
     317             :     [0xb9] = { ModRM },
     318             :     [0xba] = { DstBitBase|SrcImmByte|ModRM },
     319             :     [0xbb] = { DstBitBase|SrcReg|ModRM },
     320             :     [0xbc ... 0xbd] = { DstReg|SrcMem|ModRM },
     321             :     [0xbe] = { ByteOp|DstReg|SrcMem|ModRM|Mov },
     322             :     [0xbf] = { DstReg|SrcMem16|ModRM|Mov },
     323             :     [0xc0] = { ByteOp|DstMem|SrcReg|ModRM },
     324             :     [0xc1] = { DstMem|SrcReg|ModRM },
     325             :     [0xc2] = { DstImplicit|SrcImmByte|ModRM, simd_any_fp },
     326             :     [0xc3] = { DstMem|SrcReg|ModRM|Mov },
     327             :     [0xc4] = { DstReg|SrcImmByte|ModRM, simd_packed_int },
     328             :     [0xc5] = { DstReg|SrcImmByte|ModRM|Mov },
     329             :     [0xc6] = { DstImplicit|SrcImmByte|ModRM, simd_packed_fp },
     330             :     [0xc7] = { ImplicitOps|ModRM },
     331             :     [0xc8 ... 0xcf] = { ImplicitOps },
     332             :     [0xd0] = { DstImplicit|SrcMem|ModRM, simd_other },
     333             :     [0xd1 ... 0xd3] = { DstImplicit|SrcMem|ModRM, simd_other },
     334             :     [0xd4 ... 0xd5] = { DstImplicit|SrcMem|ModRM, simd_packed_int },
     335             :     [0xd6] = { DstMem|SrcImplicit|ModRM|Mov, simd_other },
     336             :     [0xd7] = { DstReg|SrcImplicit|ModRM|Mov },
     337             :     [0xd8 ... 0xdf] = { DstImplicit|SrcMem|ModRM, simd_packed_int },
     338             :     [0xe0] = { DstImplicit|SrcMem|ModRM, simd_packed_int },
     339             :     [0xe1 ... 0xe2] = { DstImplicit|SrcMem|ModRM, simd_other },
     340             :     [0xe3 ... 0xe5] = { DstImplicit|SrcMem|ModRM, simd_packed_int },
     341             :     [0xe6] = { DstImplicit|SrcMem|ModRM|Mov, simd_other },
     342             :     [0xe7] = { DstMem|SrcImplicit|ModRM|Mov, simd_packed_int },
     343             :     [0xe8 ... 0xef] = { DstImplicit|SrcMem|ModRM, simd_packed_int },
     344             :     [0xf0] = { DstImplicit|SrcMem|ModRM|Mov, simd_other },
     345             :     [0xf1 ... 0xf3] = { DstImplicit|SrcMem|ModRM, simd_other },
     346             :     [0xf4 ... 0xf6] = { DstImplicit|SrcMem|ModRM, simd_packed_int },
     347             :     [0xf7] = { DstMem|SrcMem|ModRM|Mov, simd_packed_int },
     348             :     [0xf8 ... 0xfe] = { DstImplicit|SrcMem|ModRM, simd_packed_int },
     349             :     [0xff] = { ModRM }
     350             : };
     351             : 
     352             : /*
     353             :  * "two_op" and "four_op" below refer to the number of register operands
     354             :  * (one of which possibly also allowing to be a memory one). The named
     355             :  * operand counts do not include any immediate operands.
     356             :  */
     357             : static const struct {
     358             :     uint8_t simd_size:5;
     359             :     uint8_t to_mem:1;
     360             :     uint8_t two_op:1;
     361             :     uint8_t vsib:1;
     362             : } ext0f38_table[256] = {
     363             :     [0x00 ... 0x0b] = { .simd_size = simd_packed_int },
     364             :     [0x10] = { .simd_size = simd_packed_int },
     365             :     [0x14 ... 0x15] = { .simd_size = simd_packed_fp },
     366             :     [0x17] = { .simd_size = simd_packed_int, .two_op = 1 },
     367             :     [0x1c ... 0x1e] = { .simd_size = simd_packed_int, .two_op = 1 },
     368             :     [0x20 ... 0x25] = { .simd_size = simd_other, .two_op = 1 },
     369             :     [0x28 ... 0x29] = { .simd_size = simd_packed_int },
     370             :     [0x2a] = { .simd_size = simd_packed_int, .two_op = 1 },
     371             :     [0x2b] = { .simd_size = simd_packed_int },
     372             :     [0x30 ... 0x35] = { .simd_size = simd_other, .two_op = 1 },
     373             :     [0x37 ... 0x3f] = { .simd_size = simd_packed_int },
     374             :     [0x40] = { .simd_size = simd_packed_int },
     375             :     [0x41] = { .simd_size = simd_packed_int, .two_op = 1 },
     376             :     [0xc8 ... 0xcd] = { .simd_size = simd_other },
     377             :     [0xdb] = { .simd_size = simd_packed_int, .two_op = 1 },
     378             :     [0xdc ... 0xdf] = { .simd_size = simd_packed_int },
     379             :     [0xf0] = { .two_op = 1 },
     380             :     [0xf1] = { .to_mem = 1, .two_op = 1 },
     381             :     [0xf2 ... 0xf3] = {},
     382             :     [0xf5 ... 0xf7] = {},
     383             : };
     384             : 
     385             : /* Shift values between src and dst sizes of pmov{s,z}x{b,w,d}{w,d,q}. */
     386             : static const uint8_t pmov_convert_delta[] = { 1, 2, 3, 1, 2, 1 };
     387             : 
     388             : static const struct {
     389             :     uint8_t simd_size:5;
     390             :     uint8_t to_mem:1;
     391             :     uint8_t two_op:1;
     392             :     uint8_t four_op:1;
     393             : } ext0f3a_table[256] = {
     394             :     [0x08 ... 0x09] = { .simd_size = simd_packed_fp, .two_op = 1 },
     395             :     [0x0a ... 0x0b] = { .simd_size = simd_scalar_fp },
     396             :     [0x0c ... 0x0d] = { .simd_size = simd_packed_fp },
     397             :     [0x0e ... 0x0f] = { .simd_size = simd_packed_int },
     398             :     [0x14 ... 0x17] = { .simd_size = simd_none, .to_mem = 1, .two_op = 1 },
     399             :     [0x20] = { .simd_size = simd_none },
     400             :     [0x21] = { .simd_size = simd_other },
     401             :     [0x22] = { .simd_size = simd_none },
     402             :     [0x40 ... 0x41] = { .simd_size = simd_packed_fp },
     403             :     [0x42] = { .simd_size = simd_packed_int },
     404             :     [0x44] = { .simd_size = simd_packed_int },
     405             :     [0x4a ... 0x4b] = { .simd_size = simd_packed_fp, .four_op = 1 },
     406             :     [0x4c] = { .simd_size = simd_packed_int, .four_op = 1 },
     407             :     [0x60 ... 0x63] = { .simd_size = simd_packed_int, .two_op = 1 },
     408             :     [0xcc] = { .simd_size = simd_other },
     409             :     [0xdf] = { .simd_size = simd_packed_int, .two_op = 1 },
     410             :     [0xf0] = {},
     411             : };
     412             : 
     413             : static const opcode_desc_t xop_table[] = {
     414             :     DstReg|SrcImmByte|ModRM,
     415             :     DstReg|SrcMem|ModRM,
     416             :     DstReg|SrcImm|ModRM,
     417             : };
     418             : 
     419             : #define REX_PREFIX 0x40
     420             : #define REX_B 0x01
     421             : #define REX_X 0x02
     422             : #define REX_R 0x04
     423             : #define REX_W 0x08
     424             : 
     425             : #define vex_none 0
     426             : 
     427             : enum vex_opcx {
     428             :     vex_0f = vex_none + 1,
     429             :     vex_0f38,
     430             :     vex_0f3a,
     431             : };
     432             : 
     433             : enum vex_pfx {
     434             :     vex_66 = vex_none + 1,
     435             :     vex_f3,
     436             :     vex_f2
     437             : };
     438             : 
     439             : #define VEX_PREFIX_DOUBLE_MASK 0x1
     440             : #define VEX_PREFIX_SCALAR_MASK 0x2
     441             : 
     442             : static const uint8_t sse_prefix[] = { 0x66, 0xf3, 0xf2 };
     443             : 
     444             : union vex {
     445             :     uint8_t raw[2];
     446             :     struct {
     447             :         uint8_t opcx:5;
     448             :         uint8_t b:1;
     449             :         uint8_t x:1;
     450             :         uint8_t r:1;
     451             :         uint8_t pfx:2;
     452             :         uint8_t l:1;
     453             :         uint8_t reg:4;
     454             :         uint8_t w:1;
     455             :     };
     456             : };
     457             : 
     458             : #ifdef __x86_64__
     459             : # define PFX2 REX_PREFIX
     460             : #else
     461             : # define PFX2 0x3e
     462             : #endif
     463             : #define PFX_BYTES 3
     464             : #define init_prefixes(stub) ({ \
     465             :     uint8_t *buf_ = get_stub(stub); \
     466             :     buf_[0] = 0x3e; \
     467             :     buf_[1] = PFX2; \
     468             :     buf_[2] = 0x0f; \
     469             :     buf_ + 3; \
     470             : })
     471             : 
     472             : #define copy_REX_VEX(ptr, rex, vex) do { \
     473             :     if ( (vex).opcx != vex_none ) \
     474             :     { \
     475             :         if ( !mode_64bit() ) \
     476             :             vex.reg |= 8; \
     477             :         (ptr)[0 - PFX_BYTES] = 0xc4; \
     478             :         (ptr)[1 - PFX_BYTES] = (vex).raw[0]; \
     479             :         (ptr)[2 - PFX_BYTES] = (vex).raw[1]; \
     480             :     } \
     481             :     else \
     482             :     { \
     483             :         if ( (vex).pfx ) \
     484             :             (ptr)[0 - PFX_BYTES] = sse_prefix[(vex).pfx - 1]; \
     485             :         /* \
     486             :          * "rex" is always zero for other than 64-bit mode, so OR-ing it \
     487             :          * into any prefix (and not just REX_PREFIX) is safe on 32-bit \
     488             :          * (test harness) builds. \
     489             :          */ \
     490             :         (ptr)[1 - PFX_BYTES] |= rex; \
     491             :     } \
     492             : } while (0)
     493             : 
     494             : union evex {
     495             :     uint8_t raw[3];
     496             :     struct {
     497             :         uint8_t opcx:2;
     498             :         uint8_t :2;
     499             :         uint8_t R:1;
     500             :         uint8_t b:1;
     501             :         uint8_t x:1;
     502             :         uint8_t r:1;
     503             :         uint8_t pfx:2;
     504             :         uint8_t evex:1;
     505             :         uint8_t reg:4;
     506             :         uint8_t w:1;
     507             :         uint8_t opmsk:3;
     508             :         uint8_t RX:1;
     509             :         uint8_t bcst:1;
     510             :         uint8_t lr:2;
     511             :         uint8_t z:1;
     512             :     };
     513             : };
     514             : 
     515             : #define rep_prefix()   (vex.pfx >= vex_f3)
     516             : #define repe_prefix()  (vex.pfx == vex_f3)
     517             : #define repne_prefix() (vex.pfx == vex_f2)
     518             : 
     519             : /* Type, address-of, and value of an instruction's operand. */
     520             : struct operand {
     521             :     enum { OP_REG, OP_MEM, OP_IMM, OP_NONE } type;
     522             :     unsigned int bytes;
     523             : 
     524             :     /* Operand value. */
     525             :     unsigned long val;
     526             : 
     527             :     /* Original operand value. */
     528             :     unsigned long orig_val;
     529             : 
     530             :     /* OP_REG: Pointer to register field. */
     531             :     unsigned long *reg;
     532             : 
     533             :     /* OP_MEM: Segment and offset. */
     534             :     struct {
     535             :         enum x86_segment seg;
     536             :         unsigned long    off;
     537             :     } mem;
     538             : };
     539             : 
     540             : struct x86_emulate_state {
     541             :     unsigned int op_bytes, ad_bytes;
     542             : 
     543             :     enum {
     544             :         ext_none = vex_none,
     545             :         ext_0f   = vex_0f,
     546             :         ext_0f38 = vex_0f38,
     547             :         ext_0f3a = vex_0f3a,
     548             :         /*
     549             :          * For XOP use values such that the respective instruction field
     550             :          * can be used without adjustment.
     551             :          */
     552             :         ext_8f08 = 8,
     553             :         ext_8f09,
     554             :         ext_8f0a,
     555             :     } ext;
     556             :     uint8_t modrm, modrm_mod, modrm_reg, modrm_rm;
     557             :     uint8_t rex_prefix;
     558             :     bool lock_prefix;
     559             :     bool not_64bit; /* Instruction not available in 64bit. */
     560             :     bool fpu_ctrl;  /* Instruction is an FPU control one. */
     561             :     opcode_desc_t desc;
     562             :     union vex vex;
     563             :     union evex evex;
     564             :     enum simd_opsize simd_size;
     565             : 
     566             :     /*
     567             :      * Data operand effective address (usually computed from ModRM).
     568             :      * Default is a memory operand relative to segment DS.
     569             :      */
     570             :     struct operand ea;
     571             : 
     572             :     /* Immediate operand values, if any. Use otherwise unused fields. */
     573             : #define imm1 ea.val
     574             : #define imm2 ea.orig_val
     575             : 
     576             :     unsigned long ip;
     577             :     struct cpu_user_regs *regs;
     578             : 
     579             : #ifndef NDEBUG
     580             :     /*
     581             :      * Track caller of x86_decode_insn() to spot missing as well as
     582             :      * premature calls to x86_emulate_free_state().
     583             :      */
     584             :     void *caller;
     585             : #endif
     586             : };
     587             : 
     588             : #ifdef __x86_64__
     589             : #define PTR_POISON ((void *)0x8086000000008086UL) /* non-canonical */
     590             : #else
     591             : #define PTR_POISON NULL /* 32-bit builds are for user-space, so NULL is OK. */
     592             : #endif
     593             : 
     594             : typedef union {
     595             :     uint64_t mmx;
     596             :     uint64_t __attribute__ ((aligned(16))) xmm[2];
     597             :     uint64_t __attribute__ ((aligned(32))) ymm[4];
     598             : } mmval_t;
     599             : 
     600             : /*
     601             :  * While proper alignment gets specified above, this doesn't get honored by
     602             :  * the compiler for automatic variables. Use this helper to instantiate a
     603             :  * suitably aligned variable, producing a pointer to access it.
     604             :  */
     605             : #define DECLARE_ALIGNED(type, var)                                        \
     606             :     long __##var[(sizeof(type) + __alignof(type)) / __alignof(long) - 1]; \
     607             :     type *const var##p =                                                  \
     608             :         (void *)(((long)__##var + __alignof(type) - __alignof(__##var))   \
     609             :                  & -__alignof(type))
     610             : 
     611             : #ifdef __GCC_ASM_FLAG_OUTPUTS__
     612             : # define ASM_FLAG_OUT(yes, no) yes
     613             : #else
     614             : # define ASM_FLAG_OUT(yes, no) no
     615             : #endif
     616             : 
     617             : /* Floating point status word definitions. */
     618             : #define FSW_ES    (1U << 7)
     619             : 
     620             : /* MXCSR bit definitions. */
     621             : #define MXCSR_MM  (1U << 17)
     622             : 
     623             : /* Exception definitions. */
     624             : #define EXC_DE  0
     625             : #define EXC_DB  1
     626             : #define EXC_BP  3
     627             : #define EXC_OF  4
     628             : #define EXC_BR  5
     629             : #define EXC_UD  6
     630             : #define EXC_NM  7
     631             : #define EXC_DF  8
     632             : #define EXC_TS 10
     633             : #define EXC_NP 11
     634             : #define EXC_SS 12
     635             : #define EXC_GP 13
     636             : #define EXC_PF 14
     637             : #define EXC_MF 16
     638             : #define EXC_AC 17
     639             : #define EXC_XM 19
     640             : 
     641             : #define EXC_HAS_EC                                                      \
     642             :     ((1u << EXC_DF) | (1u << EXC_TS) | (1u << EXC_NP) |                 \
     643             :      (1u << EXC_SS) | (1u << EXC_GP) | (1u << EXC_PF) | (1u << EXC_AC))
     644             : 
     645             : /* Segment selector error code bits. */
     646             : #define ECODE_EXT (1 << 0)
     647             : #define ECODE_IDT (1 << 1)
     648             : #define ECODE_TI  (1 << 2)
     649             : 
     650             : /*
     651             :  * Instruction emulation:
     652             :  * Most instructions are emulated directly via a fragment of inline assembly
     653             :  * code. This allows us to save/restore EFLAGS and thus very easily pick up
     654             :  * any modified flags.
     655             :  */
     656             : 
     657             : #if defined(__x86_64__)
     658             : #define _LO32 "k"          /* force 32-bit operand */
     659             : #define _STK  "%%rsp"      /* stack pointer */
     660             : #define _BYTES_PER_LONG "8"
     661             : #elif defined(__i386__)
     662             : #define _LO32 ""           /* force 32-bit operand */
     663             : #define _STK  "%%esp"      /* stack pointer */
     664             : #define _BYTES_PER_LONG "4"
     665             : #endif
     666             : 
     667             : /*
     668             :  * These EFLAGS bits are restored from saved value during emulation, and
     669             :  * any changes are written back to the saved value after emulation.
     670             :  */
     671             : #define EFLAGS_MASK (X86_EFLAGS_OF | X86_EFLAGS_SF | X86_EFLAGS_ZF | \
     672             :                      X86_EFLAGS_AF | X86_EFLAGS_PF | X86_EFLAGS_CF)
     673             : 
     674             : /*
     675             :  * These EFLAGS bits are modifiable (by POPF and IRET), possibly subject
     676             :  * to further CPL and IOPL constraints.
     677             :  */
     678             : #define EFLAGS_MODIFIABLE (X86_EFLAGS_ID | X86_EFLAGS_AC | X86_EFLAGS_RF | \
     679             :                            X86_EFLAGS_NT | X86_EFLAGS_IOPL | X86_EFLAGS_DF | \
     680             :                            X86_EFLAGS_IF | X86_EFLAGS_TF | EFLAGS_MASK)
     681             : 
     682             : /* Before executing instruction: restore necessary bits in EFLAGS. */
     683             : #define _PRE_EFLAGS(_sav, _msk, _tmp)                           \
     684             : /* EFLAGS = (_sav & _msk) | (EFLAGS & ~_msk); _sav &= ~_msk; */ \
     685             : "movl %"_LO32 _sav",%"_LO32 _tmp"; "                            \
     686             : "push %"_tmp"; "                                                \
     687             : "push %"_tmp"; "                                                \
     688             : "movl %"_msk",%"_LO32 _tmp"; "                                  \
     689             : "andl %"_LO32 _tmp",("_STK"); "                                 \
     690             : "pushf; "                                                       \
     691             : "notl %"_LO32 _tmp"; "                                          \
     692             : "andl %"_LO32 _tmp",("_STK"); "                                 \
     693             : "andl %"_LO32 _tmp",2*"_BYTES_PER_LONG"("_STK"); "              \
     694             : "pop  %"_tmp"; "                                                \
     695             : "orl  %"_LO32 _tmp",("_STK"); "                                 \
     696             : "popf; "                                                        \
     697             : "pop  %"_tmp"; "                                                \
     698             : "movl %"_LO32 _tmp",%"_LO32 _sav"; "
     699             : 
     700             : /* After executing instruction: write-back necessary bits in EFLAGS. */
     701             : #define _POST_EFLAGS(_sav, _msk, _tmp)          \
     702             : /* _sav |= EFLAGS & _msk; */                    \
     703             : "pushf; "                                       \
     704             : "pop  %"_tmp"; "                                \
     705             : "andl %"_msk",%"_LO32 _tmp"; "                  \
     706             : "orl  %"_LO32 _tmp",%"_LO32 _sav"; "
     707             : 
     708             : /* Raw emulation: instruction has two explicit operands. */
     709             : #define __emulate_2op_nobyte(_op,_src,_dst,_eflags, wsx,wsy,wdx,wdy,       \
     710             :                              lsx,lsy,ldx,ldy, qsx,qsy,qdx,qdy)             \
     711             : do{ unsigned long _tmp;                                                    \
     712             :     switch ( (_dst).bytes )                                                \
     713             :     {                                                                      \
     714             :     case 2:                                                                \
     715             :         asm volatile (                                                     \
     716             :             _PRE_EFLAGS("0","4","2")                                       \
     717             :             _op"w %"wsx"3,%"wdx"1; "                                       \
     718             :             _POST_EFLAGS("0","4","2")                                      \
     719             :             : "+g" (_eflags), "+" wdy ((_dst).val), "=&r" (_tmp)           \
     720             :             : wsy ((_src).val), "i" (EFLAGS_MASK) );                       \
     721             :         break;                                                             \
     722             :     case 4:                                                                \
     723             :         asm volatile (                                                     \
     724             :             _PRE_EFLAGS("0","4","2")                                       \
     725             :             _op"l %"lsx"3,%"ldx"1; "                                       \
     726             :             _POST_EFLAGS("0","4","2")                                      \
     727             :             : "+g" (_eflags), "+" ldy ((_dst).val), "=&r" (_tmp)           \
     728             :             : lsy ((_src).val), "i" (EFLAGS_MASK) );                       \
     729             :         break;                                                             \
     730             :     case 8:                                                                \
     731             :         __emulate_2op_8byte(_op, _src, _dst, _eflags, qsx, qsy, qdx, qdy); \
     732             :         break;                                                             \
     733             :     }                                                                      \
     734             : } while (0)
     735             : #define __emulate_2op(_op,_src,_dst,_eflags,_bx,_by,_wx,_wy,_lx,_ly,_qx,_qy)\
     736             : do{ unsigned long _tmp;                                                    \
     737             :     switch ( (_dst).bytes )                                                \
     738             :     {                                                                      \
     739             :     case 1:                                                                \
     740             :         asm volatile (                                                     \
     741             :             _PRE_EFLAGS("0","4","2")                                       \
     742             :             _op"b %"_bx"3,%1; "                                            \
     743             :             _POST_EFLAGS("0","4","2")                                      \
     744             :             : "+g" (_eflags), "+m" ((_dst).val), "=&r" (_tmp)              \
     745             :             : _by ((_src).val), "i" (EFLAGS_MASK) );                       \
     746             :         break;                                                             \
     747             :     default:                                                               \
     748             :         __emulate_2op_nobyte(_op,_src,_dst,_eflags, _wx,_wy,"","m",        \
     749             :                              _lx,_ly,"","m", _qx,_qy,"","m");              \
     750             :         break;                                                             \
     751             :     }                                                                      \
     752             : } while (0)
     753             : /* Source operand is byte-sized and may be restricted to just %cl. */
     754             : #define emulate_2op_SrcB(_op, _src, _dst, _eflags)                         \
     755             :     __emulate_2op(_op, _src, _dst, _eflags,                                \
     756             :                   "b", "c", "b", "c", "b", "c", "b", "c")
     757             : /* Source operand is byte, word, long or quad sized. */
     758             : #define emulate_2op_SrcV(_op, _src, _dst, _eflags)                         \
     759             :     __emulate_2op(_op, _src, _dst, _eflags,                                \
     760             :                   "b", "q", "w", "r", _LO32, "r", "", "r")
     761             : /* Source operand is word, long or quad sized. */
     762             : #define emulate_2op_SrcV_nobyte(_op, _src, _dst, _eflags)                  \
     763             :     __emulate_2op_nobyte(_op, _src, _dst, _eflags, "w", "r", "", "m",      \
     764             :                          _LO32, "r", "", "m", "", "r", "", "m")
     765             : /* Operands are word, long or quad sized and source may be in memory. */
     766             : #define emulate_2op_SrcV_srcmem(_op, _src, _dst, _eflags)                  \
     767             :     __emulate_2op_nobyte(_op, _src, _dst, _eflags, "", "m", "w", "r",      \
     768             :                          "", "m", _LO32, "r", "", "m", "", "r")
     769             : 
     770             : /* Instruction has only one explicit operand (no source operand). */
     771             : #define emulate_1op(_op,_dst,_eflags)                                      \
     772             : do{ unsigned long _tmp;                                                    \
     773             :     switch ( (_dst).bytes )                                                \
     774             :     {                                                                      \
     775             :     case 1:                                                                \
     776             :         asm volatile (                                                     \
     777             :             _PRE_EFLAGS("0","3","2")                                       \
     778             :             _op"b %1; "                                                    \
     779             :             _POST_EFLAGS("0","3","2")                                      \
     780             :             : "+g" (_eflags), "+m" ((_dst).val), "=&r" (_tmp)              \
     781             :             : "i" (EFLAGS_MASK) );                                         \
     782             :         break;                                                             \
     783             :     case 2:                                                                \
     784             :         asm volatile (                                                     \
     785             :             _PRE_EFLAGS("0","3","2")                                       \
     786             :             _op"w %1; "                                                    \
     787             :             _POST_EFLAGS("0","3","2")                                      \
     788             :             : "+g" (_eflags), "+m" ((_dst).val), "=&r" (_tmp)              \
     789             :             : "i" (EFLAGS_MASK) );                                         \
     790             :         break;                                                             \
     791             :     case 4:                                                                \
     792             :         asm volatile (                                                     \
     793             :             _PRE_EFLAGS("0","3","2")                                       \
     794             :             _op"l %1; "                                                    \
     795             :             _POST_EFLAGS("0","3","2")                                      \
     796             :             : "+g" (_eflags), "+m" ((_dst).val), "=&r" (_tmp)              \
     797             :             : "i" (EFLAGS_MASK) );                                         \
     798             :         break;                                                             \
     799             :     case 8:                                                                \
     800             :         __emulate_1op_8byte(_op, _dst, _eflags);                           \
     801             :         break;                                                             \
     802             :     }                                                                      \
     803             : } while (0)
     804             : 
     805             : /* Emulate an instruction with quadword operands (x86/64 only). */
     806             : #if defined(__x86_64__)
     807             : #define __emulate_2op_8byte(_op, _src, _dst, _eflags, qsx, qsy, qdx, qdy) \
     808             : do{ asm volatile (                                                      \
     809             :         _PRE_EFLAGS("0","4","2")                                        \
     810             :         _op"q %"qsx"3,%"qdx"1; "                                        \
     811             :         _POST_EFLAGS("0","4","2")                                       \
     812             :         : "+g" (_eflags), "+" qdy ((_dst).val), "=&r" (_tmp)            \
     813             :         : qsy ((_src).val), "i" (EFLAGS_MASK) );                        \
     814             : } while (0)
     815             : #define __emulate_1op_8byte(_op, _dst, _eflags)                         \
     816             : do{ asm volatile (                                                      \
     817             :         _PRE_EFLAGS("0","3","2")                                        \
     818             :         _op"q %1; "                                                     \
     819             :         _POST_EFLAGS("0","3","2")                                       \
     820             :         : "+g" (_eflags), "+m" ((_dst).val), "=&r" (_tmp)               \
     821             :         : "i" (EFLAGS_MASK) );                                          \
     822             : } while (0)
     823             : #elif defined(__i386__)
     824             : #define __emulate_2op_8byte(_op, _src, _dst, _eflags, qsx, qsy, qdx, qdy)
     825             : #define __emulate_1op_8byte(_op, _dst, _eflags)
     826             : #endif /* __i386__ */
     827             : 
     828             : #ifdef __XEN__
     829             : # define invoke_stub(pre, post, constraints...) do {                    \
     830             :     union stub_exception_token res_ = { .raw = ~0 };                    \
     831             :     asm volatile ( pre "\n\tcall *%[stub]\n\t" post "\n"                \
     832             :                    ".Lret%=:\n\t"                                       \
     833             :                    ".pushsection .fixup,\"ax\"\n"                       \
     834             :                    ".Lfix%=:\n\t"                                       \
     835             :                    "pop %[exn]\n\t"                                     \
     836             :                    "jmp .Lret%=\n\t"                                    \
     837             :                    ".popsection\n\t"                                    \
     838             :                    _ASM_EXTABLE(.Lret%=, .Lfix%=)                       \
     839             :                    : [exn] "+g" (res_), constraints,                    \
     840             :                      [stub] "rm" (stub.func) );                         \
     841             :     if ( unlikely(~res_.raw) )                                          \
     842             :     {                                                                   \
     843             :         gprintk(XENLOG_WARNING,                                         \
     844             :                 "exception %u (ec=%04x) in emulation stub (line %u)\n", \
     845             :                 res_.fields.trapnr, res_.fields.ec, __LINE__);          \
     846             :         gprintk(XENLOG_INFO, "stub: %"__stringify(MAX_INST_LEN)"ph\n",  \
     847             :                 stub.func);                                             \
     848             :         generate_exception_if(res_.fields.trapnr == EXC_UD, EXC_UD);    \
     849             :         domain_crash(current->domain);                                  \
     850             :         goto cannot_emulate;                                            \
     851             :     }                                                                   \
     852             : } while (0)
     853             : #else
     854             : # define invoke_stub(pre, post, constraints...)                         \
     855             :     asm volatile ( pre "\n\tcall *%[stub]\n\t" post                     \
     856             :                    : constraints, [stub] "rm" (stub.func) )
     857             : #endif
     858             : 
     859             : #define emulate_stub(dst, src...) do {                                  \
     860             :     unsigned long tmp;                                                  \
     861             :     invoke_stub(_PRE_EFLAGS("[efl]", "[msk]", "[tmp]"),                 \
     862             :                 _POST_EFLAGS("[efl]", "[msk]", "[tmp]"),                \
     863             :                 dst, [tmp] "=&r" (tmp), [efl] "+g" (_regs.eflags)       \
     864             :                 : [msk] "i" (EFLAGS_MASK), ## src);                     \
     865             : } while (0)
     866             : 
     867             : /* Fetch next part of the instruction being emulated. */
     868             : #define insn_fetch_bytes(_size)                                         \
     869             : ({ unsigned long _x = 0, _ip = state->ip;                               \
     870             :    state->ip += (_size); /* real hardware doesn't truncate */           \
     871             :    generate_exception_if((uint8_t)(state->ip -                          \
     872             :                                    ctxt->regs->r(ip)) > MAX_INST_LEN,   \
     873             :                          EXC_GP, 0);                                    \
     874             :    rc = ops->insn_fetch(x86_seg_cs, _ip, &_x, (_size), ctxt);           \
     875             :    if ( rc ) goto done;                                                 \
     876             :    _x;                                                                  \
     877             : })
     878             : #define insn_fetch_type(_type) ((_type)insn_fetch_bytes(sizeof(_type)))
     879             : 
     880             : #define truncate_word(ea, byte_width)           \
     881             : ({  unsigned long __ea = (ea);                  \
     882             :     unsigned int _width = (byte_width);         \
     883             :     ((_width == sizeof(unsigned long)) ? __ea : \
     884             :      (__ea & ((1UL << (_width << 3)) - 1)));    \
     885             : })
     886             : #define truncate_ea(ea) truncate_word((ea), ad_bytes)
     887             : 
     888             : #ifdef __x86_64__
     889             : # define mode_64bit() (ctxt->addr_size == 64)
     890             : #else
     891             : # define mode_64bit() false
     892             : #endif
     893             : 
     894             : #define fail_if(p)                                      \
     895             : do {                                                    \
     896             :     rc = (p) ? X86EMUL_UNHANDLEABLE : X86EMUL_OKAY;     \
     897             :     if ( rc ) goto done;                                \
     898             : } while (0)
     899             : 
     900        5218 : static inline int mkec(uint8_t e, int32_t ec, ...)
     901             : {
     902        5218 :     return (e < 32 && ((1u << e) & EXC_HAS_EC)) ? ec : X86_EVENT_NO_EC;
     903             : }
     904             : 
     905             : #define generate_exception_if(p, e, ec...)                                \
     906             : ({  if ( (p) ) {                                                          \
     907             :         x86_emul_hw_exception(e, mkec(e, ##ec, 0), ctxt);                 \
     908             :         rc = X86EMUL_EXCEPTION;                                           \
     909             :         goto done;                                                        \
     910             :     }                                                                     \
     911             : })
     912             : 
     913             : #define generate_exception(e, ec...) generate_exception_if(true, e, ##ec)
     914             : 
     915             : /*
     916             :  * Given byte has even parity (even number of 1s)? SDM Vol. 1 Sec. 3.4.3.1,
     917             :  * "Status Flags": EFLAGS.PF reflects parity of least-sig. byte of result only.
     918             :  */
     919      244658 : static bool even_parity(uint8_t v)
     920             : {
     921      244658 :     asm ( "test %1,%1" ASM_FLAG_OUT(, "; setp %0")
     922             :           : ASM_FLAG_OUT("=@ccp", "=qm") (v) : "q" (v) );
     923             : 
     924      244658 :     return v;
     925             : }
     926             : 
     927             : /* Update address held in a register, based on addressing mode. */
     928             : #define _register_address_increment(reg, inc, byte_width)               \
     929             : do {                                                                    \
     930             :     int _inc = (inc); /* signed type ensures sign extension to long */  \
     931             :     unsigned int _width = (byte_width);                                 \
     932             :     if ( _width == sizeof(unsigned long) )                              \
     933             :         (reg) += _inc;                                                  \
     934             :     else if ( mode_64bit() )                                            \
     935             :         (reg) = ((reg) + _inc) & ((1UL << (_width << 3)) - 1);          \
     936             :     else                                                                \
     937             :         (reg) = ((reg) & ~((1UL << (_width << 3)) - 1)) |               \
     938             :                 (((reg) + _inc) & ((1UL << (_width << 3)) - 1));        \
     939             : } while (0)
     940             : #define register_address_adjust(reg, adj)                               \
     941             :     _register_address_increment(reg,                                    \
     942             :                                 _regs.eflags & X86_EFLAGS_DF ?          \
     943             :                                 -(adj) : (adj),                         \
     944             :                                 ad_bytes)
     945             : 
     946             : #define sp_pre_dec(dec) ({                                              \
     947             :     _register_address_increment(_regs.r(sp), -(dec), ctxt->sp_size/8);  \
     948             :     truncate_word(_regs.r(sp), ctxt->sp_size/8);                        \
     949             : })
     950             : #define sp_post_inc(inc) ({                                             \
     951             :     unsigned long sp = truncate_word(_regs.r(sp), ctxt->sp_size/8);     \
     952             :     _register_address_increment(_regs.r(sp), (inc), ctxt->sp_size/8);   \
     953             :     sp;                                                                 \
     954             : })
     955             : 
     956             : #define jmp_rel(rel)                                                    \
     957             : do {                                                                    \
     958             :     unsigned long ip = _regs.r(ip) + (int)(rel);                        \
     959             :     if ( op_bytes == 2 )                                                \
     960             :         ip = (uint16_t)ip;                                              \
     961             :     else if ( !mode_64bit() )                                           \
     962             :         ip = (uint32_t)ip;                                              \
     963             :     rc = ops->insn_fetch(x86_seg_cs, ip, NULL, 0, ctxt);                \
     964             :     if ( rc ) goto done;                                                \
     965             :     _regs.r(ip) = ip;                                                   \
     966             :     singlestep = _regs.eflags & X86_EFLAGS_TF;                          \
     967             : } while (0)
     968             : 
     969             : #define validate_far_branch(cs, ip) ({                                  \
     970             :     if ( sizeof(ip) <= 4 ) {                                            \
     971             :         ASSERT(!ctxt->lma);                                             \
     972             :         generate_exception_if((ip) > (cs)->limit, EXC_GP, 0);           \
     973             :     } else                                                              \
     974             :         generate_exception_if(ctxt->lma && (cs)->attr.fields.l          \
     975             :                               ? !is_canonical_address(ip)               \
     976             :                               : (ip) > (cs)->limit, EXC_GP, 0);         \
     977             : })
     978             : 
     979             : #define commit_far_branch(cs, newip) ({                                 \
     980             :     validate_far_branch(cs, newip);                                     \
     981             :     _regs.r(ip) = (newip);                                              \
     982             :     singlestep = _regs.eflags & X86_EFLAGS_TF;                          \
     983             :     ops->write_segment(x86_seg_cs, cs, ctxt);                           \
     984             : })
     985             : 
     986             : struct fpu_insn_ctxt {
     987             :     uint8_t insn_bytes;
     988             :     uint8_t type;
     989             :     int8_t exn_raised;
     990             : };
     991             : 
     992           0 : static void fpu_handle_exception(void *_fic, struct cpu_user_regs *regs)
     993             : {
     994           0 :     struct fpu_insn_ctxt *fic = _fic;
     995           0 :     ASSERT(regs->entry_vector < 0x20);
     996           0 :     fic->exn_raised = regs->entry_vector;
     997           0 :     regs->r(ip) += fic->insn_bytes;
     998           0 : }
     999             : 
    1000       23863 : static int _get_fpu(
    1001             :     enum x86_emulate_fpu_type type,
    1002             :     struct fpu_insn_ctxt *fic,
    1003             :     struct x86_emulate_ctxt *ctxt,
    1004             :     const struct x86_emulate_ops *ops)
    1005             : {
    1006             :     int rc;
    1007             : 
    1008       23863 :     fail_if(!ops->get_fpu);
    1009       23338 :     ASSERT(type != X86EMUL_FPU_none);
    1010       23338 :     rc = ops->get_fpu(fpu_handle_exception, fic, type, ctxt);
    1011             : 
    1012       23338 :     if ( rc == X86EMUL_OKAY )
    1013             :     {
    1014             :         unsigned long cr0;
    1015             : 
    1016       23813 :         fail_if(type == X86EMUL_FPU_fpu && !ops->put_fpu);
    1017       23327 :         fic->type = type;
    1018             : 
    1019       23327 :         fail_if(!ops->read_cr);
    1020       23136 :         if ( type >= X86EMUL_FPU_xmm )
    1021             :         {
    1022             :             unsigned long cr4;
    1023             : 
    1024        7752 :             rc = ops->read_cr(4, &cr4, ctxt);
    1025        7752 :             if ( rc != X86EMUL_OKAY )
    1026           0 :                 return rc;
    1027        7752 :             generate_exception_if(!(cr4 & ((type == X86EMUL_FPU_xmm)
    1028             :                                            ? X86_CR4_OSFXSR : X86_CR4_OSXSAVE)),
    1029             :                                   EXC_UD);
    1030             :         }
    1031             : 
    1032       23062 :         rc = ops->read_cr(0, &cr0, ctxt);
    1033       23062 :         if ( rc != X86EMUL_OKAY )
    1034           0 :             return rc;
    1035       23062 :         if ( type >= X86EMUL_FPU_ymm )
    1036             :         {
    1037             :             /* Should be unreachable if VEX decoding is working correctly. */
    1038        3822 :             ASSERT((cr0 & X86_CR0_PE) && !(ctxt->regs->eflags & X86_EFLAGS_VM));
    1039             :         }
    1040       23062 :         if ( cr0 & X86_CR0_EM )
    1041             :         {
    1042        3169 :             generate_exception_if(type == X86EMUL_FPU_fpu, EXC_NM);
    1043        3094 :             generate_exception_if(type == X86EMUL_FPU_mmx, EXC_UD);
    1044        3034 :             generate_exception_if(type == X86EMUL_FPU_xmm, EXC_UD);
    1045             :         }
    1046       22852 :         generate_exception_if((cr0 & X86_CR0_TS) &&
    1047             :                               (type != X86EMUL_FPU_wait || (cr0 & X86_CR0_MP)),
    1048             :                               EXC_NM);
    1049             :     }
    1050             : 
    1051             :  done:
    1052       23863 :     return rc;
    1053             : }
    1054             : 
    1055             : #define get_fpu(_type, _fic)                                    \
    1056             : do {                                                            \
    1057             :     rc = _get_fpu(_type, _fic, ctxt, ops);                      \
    1058             :     if ( rc ) goto done;                                        \
    1059             : } while (0)
    1060             : 
    1061             : #define check_fpu_exn(fic)                                      \
    1062             : do {                                                            \
    1063             :     generate_exception_if((fic)->exn_raised >= 0,               \
    1064             :                           (fic)->exn_raised);                   \
    1065             : } while (0)
    1066             : 
    1067             : #define check_xmm_exn(fic)                                      \
    1068             : do {                                                            \
    1069             :     if ( (fic)->exn_raised == EXC_XM && ops->read_cr &&         \
    1070             :          ops->read_cr(4, &cr4, ctxt) == X86EMUL_OKAY &&         \
    1071             :          !(cr4 & X86_CR4_OSXMMEXCPT) )                          \
    1072             :         (fic)->exn_raised = EXC_UD;                             \
    1073             :     check_fpu_exn(fic);                                         \
    1074             : } while (0)
    1075             : 
    1076     7730896 : static void put_fpu(
    1077             :     struct fpu_insn_ctxt *fic,
    1078             :     bool failed_late,
    1079             :     const struct x86_emulate_state *state,
    1080             :     struct x86_emulate_ctxt *ctxt,
    1081             :     const struct x86_emulate_ops *ops)
    1082             : {
    1083     7730896 :     if ( unlikely(failed_late) && fic->type == X86EMUL_FPU_fpu )
    1084         120 :         ops->put_fpu(ctxt, X86EMUL_FPU_fpu, NULL);
    1085     7730776 :     else if ( unlikely(fic->type == X86EMUL_FPU_fpu) && !state->fpu_ctrl )
    1086        9969 :     {
    1087       49845 :         struct x86_emul_fpu_aux aux = {
    1088        9969 :             .ip = ctxt->regs->r(ip),
    1089        9969 :             .cs = ctxt->regs->cs,
    1090       19938 :             .op = ((ctxt->opcode & 7) << 8) | state->modrm,
    1091             :         };
    1092             :         struct segment_register sreg;
    1093             : 
    1094       15819 :         if ( ops->read_segment &&
    1095        5850 :              ops->read_segment(x86_seg_cs, &sreg, ctxt) == X86EMUL_OKAY )
    1096        5850 :             aux.cs = sreg.sel;
    1097        9969 :         if ( state->ea.type == OP_MEM )
    1098             :         {
    1099        9423 :             aux.dp = state->ea.mem.off;
    1100       14992 :             if ( ops->read_segment &&
    1101        5569 :                  ops->read_segment(state->ea.mem.seg, &sreg,
    1102             :                                    ctxt) == X86EMUL_OKAY )
    1103        5569 :                 aux.ds = sreg.sel;
    1104             :             else
    1105        3854 :                 switch ( state->ea.mem.seg )
    1106             :                 {
    1107          72 :                 case x86_seg_cs: aux.ds = ctxt->regs->cs; break;
    1108        2603 :                 case x86_seg_ds: aux.ds = ctxt->regs->ds; break;
    1109           8 :                 case x86_seg_es: aux.ds = ctxt->regs->es; break;
    1110           8 :                 case x86_seg_fs: aux.ds = ctxt->regs->fs; break;
    1111         264 :                 case x86_seg_gs: aux.ds = ctxt->regs->gs; break;
    1112         899 :                 case x86_seg_ss: aux.ds = ctxt->regs->ss; break;
    1113           0 :                 default:         ASSERT_UNREACHABLE();    break;
    1114             :                 }
    1115        9423 :             aux.dval = true;
    1116             :         }
    1117        9969 :         ops->put_fpu(ctxt, X86EMUL_FPU_none, &aux);
    1118             :     }
    1119     7720807 :     else if ( fic->type != X86EMUL_FPU_none && ops->put_fpu )
    1120       13230 :         ops->put_fpu(ctxt, X86EMUL_FPU_none, NULL);
    1121     7730896 :     fic->type = X86EMUL_FPU_none;
    1122     7730896 : }
    1123             : 
    1124        1558 : static inline bool fpu_check_write(void)
    1125             : {
    1126             :     uint16_t fsw;
    1127             : 
    1128        1558 :     asm ( "fnstsw %0" : "=am" (fsw) );
    1129             : 
    1130        1558 :     return !(fsw & FSW_ES);
    1131             : }
    1132             : 
    1133             : #define emulate_fpu_insn(_op)                           \
    1134             :     asm volatile (                                      \
    1135             :         "movb $2f-1f,%0 \n"                             \
    1136             :         "1: " _op "     \n"                             \
    1137             :         "2:             \n"                             \
    1138             :         : "=m" (fic.insn_bytes) : : "memory" )
    1139             : 
    1140             : #define emulate_fpu_insn_memdst(_op, _arg)              \
    1141             :     asm volatile (                                      \
    1142             :         "movb $2f-1f,%0 \n"                             \
    1143             :         "1: " _op " %1  \n"                             \
    1144             :         "2:             \n"                             \
    1145             :         : "=m" (fic.insn_bytes), "=m" (_arg)            \
    1146             :         : : "memory" )
    1147             : 
    1148             : #define emulate_fpu_insn_memsrc(_op, _arg)              \
    1149             :     asm volatile (                                      \
    1150             :         "movb $2f-1f,%0 \n"                             \
    1151             :         "1: " _op " %1  \n"                             \
    1152             :         "2:             \n"                             \
    1153             :         : "=m" (fic.insn_bytes)                         \
    1154             :         : "m" (_arg) : "memory" )
    1155             : 
    1156             : #define emulate_fpu_insn_stub(bytes...)                                 \
    1157             : do {                                                                    \
    1158             :     unsigned int nr_ = sizeof((uint8_t[]){ bytes });                    \
    1159             :     fic.insn_bytes = nr_;                                               \
    1160             :     memcpy(get_stub(stub), ((uint8_t[]){ bytes, 0xc3 }), nr_ + 1);      \
    1161             :     invoke_stub("", "", "=m" (fic) : "m" (fic));                        \
    1162             :     put_stub(stub);                                                     \
    1163             : } while (0)
    1164             : 
    1165             : #define emulate_fpu_insn_stub_eflags(bytes...)                          \
    1166             : do {                                                                    \
    1167             :     unsigned int nr_ = sizeof((uint8_t[]){ bytes });                    \
    1168             :     unsigned long tmp_;                                                 \
    1169             :     fic.insn_bytes = nr_;                                               \
    1170             :     memcpy(get_stub(stub), ((uint8_t[]){ bytes, 0xc3 }), nr_ + 1);      \
    1171             :     invoke_stub(_PRE_EFLAGS("[eflags]", "[mask]", "[tmp]"),             \
    1172             :                 _POST_EFLAGS("[eflags]", "[mask]", "[tmp]"),            \
    1173             :                 [eflags] "+g" (_regs.eflags), [tmp] "=&r" (tmp_),       \
    1174             :                 "+m" (fic)                                              \
    1175             :                 : [mask] "i" (X86_EFLAGS_ZF|X86_EFLAGS_PF|X86_EFLAGS_CF)); \
    1176             :     put_stub(stub);                                                     \
    1177             : } while (0)
    1178             : 
    1179      101494 : static inline unsigned long get_loop_count(
    1180             :     const struct cpu_user_regs *regs,
    1181             :     int ad_bytes)
    1182             : {
    1183      101494 :     return (ad_bytes > 4) ? regs->r(cx)
    1184      101494 :                           : (ad_bytes < 4) ? regs->cx : regs->ecx;
    1185             : }
    1186             : 
    1187       42497 : static inline void put_loop_count(
    1188             :     struct cpu_user_regs *regs,
    1189             :     int ad_bytes,
    1190             :     unsigned long count)
    1191             : {
    1192       42497 :     if ( ad_bytes == 2 )
    1193       13188 :         regs->cx = count;
    1194             :     else
    1195       29309 :         regs->r(cx) = ad_bytes == 4 ? (uint32_t)count : count;
    1196       42497 : }
    1197             : 
    1198             : #define get_rep_prefix(using_si, using_di) ({                           \
    1199             :     unsigned long max_reps = 1;                                         \
    1200             :     if ( rep_prefix() )                                                 \
    1201             :         max_reps = get_loop_count(&_regs, ad_bytes);                    \
    1202             :     if ( max_reps == 0 )                                                \
    1203             :     {                                                                   \
    1204             :         /*                                                              \
    1205             :          * Skip the instruction if no repetitions are required, but     \
    1206             :          * zero extend involved registers first when using 32-bit       \
    1207             :          * addressing in 64-bit mode.                                   \
    1208             :          */                                                             \
    1209             :         if ( mode_64bit() && ad_bytes == 4 )                            \
    1210             :         {                                                               \
    1211             :             _regs.r(cx) = 0;                                            \
    1212             :             if ( using_si ) _regs.r(si) = _regs.esi;                    \
    1213             :             if ( using_di ) _regs.r(di) = _regs.edi;                    \
    1214             :         }                                                               \
    1215             :         goto complete_insn;                                             \
    1216             :     }                                                                   \
    1217             :     if ( max_reps > 1 && (_regs.eflags & X86_EFLAGS_TF) &&              \
    1218             :          !is_branch_step(ctxt, ops) )                                   \
    1219             :         max_reps = 1;                                                   \
    1220             :     max_reps;                                                           \
    1221             : })
    1222             : 
    1223       41376 : static void __put_rep_prefix(
    1224             :     struct cpu_user_regs *int_regs,
    1225             :     struct cpu_user_regs *ext_regs,
    1226             :     int ad_bytes,
    1227             :     unsigned long reps_completed)
    1228             : {
    1229       41376 :     unsigned long ecx = get_loop_count(int_regs, ad_bytes);
    1230             : 
    1231             :     /* Reduce counter appropriately, and repeat instruction if non-zero. */
    1232       41376 :     ecx -= reps_completed;
    1233       41376 :     if ( ecx != 0 )
    1234       37306 :         int_regs->r(ip) = ext_regs->r(ip);
    1235             : 
    1236       41376 :     put_loop_count(int_regs, ad_bytes, ecx);
    1237       41376 : }
    1238             : 
    1239             : #define put_rep_prefix(reps_completed) ({                               \
    1240             :     if ( rep_prefix() )                                                 \
    1241             :     {                                                                   \
    1242             :         __put_rep_prefix(&_regs, ctxt->regs, ad_bytes, reps_completed); \
    1243             :         if ( unlikely(rc == X86EMUL_EXCEPTION) )                        \
    1244             :             goto complete_insn;                                         \
    1245             :     }                                                                   \
    1246             : })
    1247             : 
    1248             : /* Clip maximum repetitions so that the index register at most just wraps. */
    1249             : #define truncate_ea_and_reps(ea, reps, bytes_per_rep) ({                  \
    1250             :     unsigned long todo__, ea__ = truncate_word(ea, ad_bytes);             \
    1251             :     if ( !(_regs.eflags & X86_EFLAGS_DF) )                                \
    1252             :         todo__ = truncate_word(-(ea), ad_bytes) / (bytes_per_rep);        \
    1253             :     else if ( truncate_word((ea) + (bytes_per_rep) - 1, ad_bytes) < ea__ )\
    1254             :         todo__ = 1;                                                       \
    1255             :     else                                                                  \
    1256             :         todo__ = ea__ / (bytes_per_rep) + 1;                              \
    1257             :     if ( !todo__ )                                                        \
    1258             :         (reps) = 1;                                                       \
    1259             :     else if ( todo__ < (reps) )                                           \
    1260             :         (reps) = todo__;                                                  \
    1261             :     ea__;                                                                 \
    1262             : })
    1263             : 
    1264             : /* Compatibility function: read guest memory, zero-extend result to a ulong. */
    1265     1193198 : static int read_ulong(
    1266             :         enum x86_segment seg,
    1267             :         unsigned long offset,
    1268             :         unsigned long *val,
    1269             :         unsigned int bytes,
    1270             :         struct x86_emulate_ctxt *ctxt,
    1271             :         const struct x86_emulate_ops *ops)
    1272             : {
    1273     1193198 :     *val = 0;
    1274     1193198 :     return ops->read(seg, offset, val, bytes, ctxt);
    1275             : }
    1276             : 
    1277             : /*
    1278             :  * Unsigned multiplication with double-word result.
    1279             :  * IN:  Multiplicand=m[0], Multiplier=m[1]
    1280             :  * OUT: Return CF/OF (overflow status); Result=m[1]:m[0]
    1281             :  */
    1282        2605 : static bool mul_dbl(unsigned long m[2])
    1283             : {
    1284             :     bool rc;
    1285             : 
    1286        2605 :     asm ( "mul %1" ASM_FLAG_OUT(, "; seto %2")
    1287        2605 :           : "+a" (m[0]), "+d" (m[1]), ASM_FLAG_OUT("=@cco", "=qm") (rc) );
    1288             : 
    1289        2605 :     return rc;
    1290             : }
    1291             : 
    1292             : /*
    1293             :  * Signed multiplication with double-word result.
    1294             :  * IN:  Multiplicand=m[0], Multiplier=m[1]
    1295             :  * OUT: Return CF/OF (overflow status); Result=m[1]:m[0]
    1296             :  */
    1297        2244 : static bool imul_dbl(unsigned long m[2])
    1298             : {
    1299             :     bool rc;
    1300             : 
    1301        2244 :     asm ( "imul %1" ASM_FLAG_OUT(, "; seto %2")
    1302        2244 :           : "+a" (m[0]), "+d" (m[1]), ASM_FLAG_OUT("=@cco", "=qm") (rc) );
    1303             : 
    1304        2244 :     return rc;
    1305             : }
    1306             : 
    1307             : /*
    1308             :  * Unsigned division of double-word dividend.
    1309             :  * IN:  Dividend=u[1]:u[0], Divisor=v
    1310             :  * OUT: Return 1: #DE
    1311             :  *      Return 0: Quotient=u[0], Remainder=u[1]
    1312             :  */
    1313        4123 : static bool div_dbl(unsigned long u[2], unsigned long v)
    1314             : {
    1315        4123 :     if ( (v == 0) || (u[1] >= v) )
    1316         127 :         return 1;
    1317        3996 :     asm ( "div"__OS" %2" : "+a" (u[0]), "+d" (u[1]) : "rm" (v) );
    1318        3996 :     return 0;
    1319             : }
    1320             : 
    1321             : /*
    1322             :  * Signed division of double-word dividend.
    1323             :  * IN:  Dividend=u[1]:u[0], Divisor=v
    1324             :  * OUT: Return 1: #DE
    1325             :  *      Return 0: Quotient=u[0], Remainder=u[1]
    1326             :  * NB. We don't use idiv directly as it's moderately hard to work out
    1327             :  *     ahead of time whether it will #DE, which we cannot allow to happen.
    1328             :  */
    1329        3745 : static bool idiv_dbl(unsigned long u[2], long v)
    1330             : {
    1331        3745 :     bool negu = (long)u[1] < 0, negv = v < 0;
    1332             : 
    1333             :     /* u = abs(u) */
    1334        3745 :     if ( negu )
    1335             :     {
    1336         421 :         u[1] = ~u[1];
    1337         421 :         if ( (u[0] = -u[0]) == 0 )
    1338           0 :             u[1]++;
    1339             :     }
    1340             : 
    1341             :     /* abs(u) / abs(v) */
    1342        3745 :     if ( div_dbl(u, negv ? -v : v) )
    1343          94 :         return 1;
    1344             : 
    1345             :     /* Remainder has same sign as dividend. It cannot overflow. */
    1346        3651 :     if ( negu )
    1347         405 :         u[1] = -u[1];
    1348             : 
    1349             :     /* Quotient is overflowed if sign bit is set. */
    1350        3651 :     if ( negu ^ negv )
    1351             :     {
    1352        2034 :         if ( (long)u[0] >= 0 )
    1353        2005 :             u[0] = -u[0];
    1354          29 :         else if ( (u[0] << 1) != 0 ) /* == 0x80...0 is okay */
    1355          29 :             return 1;
    1356             :     }
    1357        1617 :     else if ( (long)u[0] < 0 )
    1358          29 :         return 1;
    1359             : 
    1360        3593 :     return 0;
    1361             : }
    1362             : 
    1363             : static bool
    1364      312115 : test_cc(
    1365             :     unsigned int condition, unsigned int flags)
    1366             : {
    1367      312115 :     int rc = 0;
    1368             : 
    1369      312115 :     switch ( (condition & 15) >> 1 )
    1370             :     {
    1371             :     case 0: /* o */
    1372        7912 :         rc |= (flags & X86_EFLAGS_OF);
    1373        7912 :         break;
    1374             :     case 1: /* b/c/nae */
    1375      199175 :         rc |= (flags & X86_EFLAGS_CF);
    1376      199175 :         break;
    1377             :     case 2: /* z/e */
    1378       14454 :         rc |= (flags & X86_EFLAGS_ZF);
    1379       14454 :         break;
    1380             :     case 3: /* be/na */
    1381        7051 :         rc |= (flags & (X86_EFLAGS_CF | X86_EFLAGS_ZF));
    1382        7051 :         break;
    1383             :     case 4: /* s */
    1384        5269 :         rc |= (flags & X86_EFLAGS_SF);
    1385        5269 :         break;
    1386             :     case 5: /* p/pe */
    1387       17256 :         rc |= (flags & X86_EFLAGS_PF);
    1388       17256 :         break;
    1389             :     case 7: /* le/ng */
    1390       49949 :         rc |= (flags & X86_EFLAGS_ZF);
    1391             :         /* fall through */
    1392             :     case 6: /* l/nge */
    1393       60998 :         rc |= (!(flags & X86_EFLAGS_SF) != !(flags & X86_EFLAGS_OF));
    1394       60998 :         break;
    1395             :     }
    1396             : 
    1397             :     /* Odd condition identifiers (lsb == 1) have inverted sense. */
    1398      312115 :     return (!!rc ^ (condition & 1));
    1399             : }
    1400             : 
    1401             : static int
    1402       75781 : get_cpl(
    1403             :     struct x86_emulate_ctxt *ctxt,
    1404             :     const struct x86_emulate_ops  *ops)
    1405             : {
    1406             :     struct segment_register reg;
    1407             : 
    1408       75781 :     if ( ctxt->regs->eflags & X86_EFLAGS_VM )
    1409           0 :         return 3;
    1410             : 
    1411      150138 :     if ( (ops->read_segment == NULL) ||
    1412       74357 :          ops->read_segment(x86_seg_ss, &reg, ctxt) )
    1413        1424 :         return -1;
    1414             : 
    1415       74357 :     return reg.attr.fields.dpl;
    1416             : }
    1417             : 
    1418             : static int
    1419       51983 : _mode_iopl(
    1420             :     struct x86_emulate_ctxt *ctxt,
    1421             :     const struct x86_emulate_ops  *ops)
    1422             : {
    1423       51983 :     int cpl = get_cpl(ctxt, ops);
    1424       51983 :     if ( cpl == -1 )
    1425         531 :         return -1;
    1426       51452 :     return cpl <= MASK_EXTR(ctxt->regs->eflags, X86_EFLAGS_IOPL);
    1427             : }
    1428             : 
    1429             : #define mode_ring0() ({                         \
    1430             :     int _cpl = get_cpl(ctxt, ops);              \
    1431             :     fail_if(_cpl < 0);                          \
    1432             :     (_cpl == 0);                                \
    1433             : })
    1434             : #define mode_iopl() ({                          \
    1435             :     int _iopl = _mode_iopl(ctxt, ops);          \
    1436             :     fail_if(_iopl < 0);                         \
    1437             :     _iopl;                                      \
    1438             : })
    1439             : #define mode_vif() ({                                        \
    1440             :     cr4 = 0;                                                 \
    1441             :     if ( ops->read_cr && get_cpl(ctxt, ops) == 3 )           \
    1442             :     {                                                        \
    1443             :         rc = ops->read_cr(4, &cr4, ctxt);                    \
    1444             :         if ( rc != X86EMUL_OKAY ) goto done;                 \
    1445             :     }                                                        \
    1446             :     !!(cr4 & (_regs.eflags & X86_EFLAGS_VM ? X86_CR4_VME : X86_CR4_PVI)); \
    1447             : })
    1448             : 
    1449       48062 : static int ioport_access_check(
    1450             :     unsigned int first_port,
    1451             :     unsigned int bytes,
    1452             :     struct x86_emulate_ctxt *ctxt,
    1453             :     const struct x86_emulate_ops *ops)
    1454             : {
    1455             :     unsigned long iobmp;
    1456             :     struct segment_register tr;
    1457       48062 :     int rc = X86EMUL_OKAY;
    1458             : 
    1459       48062 :     if ( !(ctxt->regs->eflags & X86_EFLAGS_VM) && mode_iopl() )
    1460       45146 :         return X86EMUL_OKAY;
    1461             : 
    1462        2455 :     fail_if(ops->read_segment == NULL);
    1463             :     /*
    1464             :      * X86EMUL_DONE coming back here may be used to defer the port
    1465             :      * permission check to the respective ioport hook.
    1466             :      */
    1467        2455 :     if ( (rc = ops->read_segment(x86_seg_tr, &tr, ctxt)) != 0 )
    1468           0 :         return rc == X86EMUL_DONE ? X86EMUL_OKAY : rc;
    1469             : 
    1470             :     /* Ensure the TSS has an io-bitmap-offset field. */
    1471        2455 :     generate_exception_if(tr.attr.fields.type != 0xb, EXC_GP, 0);
    1472             : 
    1473        1850 :     switch ( rc = read_ulong(x86_seg_tr, 0x66, &iobmp, 2, ctxt, ops) )
    1474             :     {
    1475             :     case X86EMUL_OKAY:
    1476        1830 :         break;
    1477             : 
    1478             :     case X86EMUL_EXCEPTION:
    1479          16 :         generate_exception_if(!ctxt->event_pending, EXC_GP, 0);
    1480             :         /* fallthrough */
    1481             : 
    1482             :     default:
    1483          12 :         return rc;
    1484             :     }
    1485             : 
    1486             :     /* Read two bytes including byte containing first port. */
    1487        1830 :     switch ( rc = read_ulong(x86_seg_tr, iobmp + first_port / 8,
    1488             :                              &iobmp, 2, ctxt, ops) )
    1489             :     {
    1490             :     case X86EMUL_OKAY:
    1491        1810 :         break;
    1492             : 
    1493             :     case X86EMUL_EXCEPTION:
    1494          16 :         generate_exception_if(!ctxt->event_pending, EXC_GP, 0);
    1495             :         /* fallthrough */
    1496             : 
    1497             :     default:
    1498          12 :         return rc;
    1499             :     }
    1500             : 
    1501        1810 :     generate_exception_if(iobmp & (((1 << bytes) - 1) << (first_port & 7)),
    1502             :                           EXC_GP, 0);
    1503             : 
    1504             :  done:
    1505        2892 :     return rc;
    1506             : }
    1507             : 
    1508             : static bool
    1509       41654 : in_realmode(
    1510             :     struct x86_emulate_ctxt *ctxt,
    1511             :     const struct x86_emulate_ops  *ops)
    1512             : {
    1513             :     unsigned long cr0;
    1514             :     int rc;
    1515             : 
    1516       41654 :     if ( ops->read_cr == NULL )
    1517       11233 :         return 0;
    1518             : 
    1519       30421 :     rc = ops->read_cr(0, &cr0, ctxt);
    1520       30421 :     return (!rc && !(cr0 & X86_CR0_PE));
    1521             : }
    1522             : 
    1523             : static bool
    1524       38128 : in_protmode(
    1525             :     struct x86_emulate_ctxt *ctxt,
    1526             :     const struct x86_emulate_ops  *ops)
    1527             : {
    1528       38128 :     return !(in_realmode(ctxt, ops) || (ctxt->regs->eflags & X86_EFLAGS_VM));
    1529             : }
    1530             : 
    1531             : #define EAX 0
    1532             : #define ECX 1
    1533             : #define EDX 2
    1534             : #define EBX 3
    1535             : 
    1536       56141 : static bool vcpu_has(
    1537             :     unsigned int eax,
    1538             :     unsigned int reg,
    1539             :     unsigned int bit,
    1540             :     struct x86_emulate_ctxt *ctxt,
    1541             :     const struct x86_emulate_ops *ops)
    1542             : {
    1543             :     struct cpuid_leaf res;
    1544       56141 :     int rc = X86EMUL_OKAY;
    1545             : 
    1546       56141 :     fail_if(!ops->cpuid);
    1547       56141 :     rc = ops->cpuid(eax, 0, &res, ctxt);
    1548       56141 :     if ( rc == X86EMUL_OKAY )
    1549             :     {
    1550       56141 :         switch ( reg )
    1551             :         {
    1552           0 :         case EAX: reg = res.a; break;
    1553        3140 :         case EBX: reg = res.b; break;
    1554       12365 :         case ECX: reg = res.c; break;
    1555       40636 :         case EDX: reg = res.d; break;
    1556           0 :         default: BUG();
    1557             :         }
    1558       56141 :         if ( !(reg & (1U << bit)) )
    1559        6766 :             rc = ~X86EMUL_OKAY;
    1560             :     }
    1561             : 
    1562             :  done:
    1563       56141 :     return rc == X86EMUL_OKAY;
    1564             : }
    1565             : 
    1566             : #define vcpu_has_fpu()         vcpu_has(         1, EDX,  0, ctxt, ops)
    1567             : #define vcpu_has_sep()         vcpu_has(         1, EDX, 11, ctxt, ops)
    1568             : #define vcpu_has_cx8()         vcpu_has(         1, EDX,  8, ctxt, ops)
    1569             : #define vcpu_has_cmov()        vcpu_has(         1, EDX, 15, ctxt, ops)
    1570             : #define vcpu_has_clflush()     vcpu_has(         1, EDX, 19, ctxt, ops)
    1571             : #define vcpu_has_mmx()         vcpu_has(         1, EDX, 23, ctxt, ops)
    1572             : #define vcpu_has_sse()         vcpu_has(         1, EDX, 25, ctxt, ops)
    1573             : #define vcpu_has_sse2()        vcpu_has(         1, EDX, 26, ctxt, ops)
    1574             : #define vcpu_has_sse3()        vcpu_has(         1, ECX,  0, ctxt, ops)
    1575             : #define vcpu_has_pclmulqdq()   vcpu_has(         1, ECX,  1, ctxt, ops)
    1576             : #define vcpu_has_ssse3()       vcpu_has(         1, ECX,  9, ctxt, ops)
    1577             : #define vcpu_has_cx16()        vcpu_has(         1, ECX, 13, ctxt, ops)
    1578             : #define vcpu_has_sse4_1()      vcpu_has(         1, ECX, 19, ctxt, ops)
    1579             : #define vcpu_has_sse4_2()      vcpu_has(         1, ECX, 20, ctxt, ops)
    1580             : #define vcpu_has_movbe()       vcpu_has(         1, ECX, 22, ctxt, ops)
    1581             : #define vcpu_has_popcnt()      vcpu_has(         1, ECX, 23, ctxt, ops)
    1582             : #define vcpu_has_aesni()       vcpu_has(         1, ECX, 25, ctxt, ops)
    1583             : #define vcpu_has_avx()         vcpu_has(         1, ECX, 28, ctxt, ops)
    1584             : #define vcpu_has_rdrand()      vcpu_has(         1, ECX, 30, ctxt, ops)
    1585             : #define vcpu_has_mmxext()     (vcpu_has(0x80000001, EDX, 22, ctxt, ops) || \
    1586             :                                vcpu_has_sse())
    1587             : #define vcpu_has_lahf_lm()     vcpu_has(0x80000001, ECX,  0, ctxt, ops)
    1588             : #define vcpu_has_cr8_legacy()  vcpu_has(0x80000001, ECX,  4, ctxt, ops)
    1589             : #define vcpu_has_lzcnt()       vcpu_has(0x80000001, ECX,  5, ctxt, ops)
    1590             : #define vcpu_has_sse4a()       vcpu_has(0x80000001, ECX,  6, ctxt, ops)
    1591             : #define vcpu_has_misalignsse() vcpu_has(0x80000001, ECX,  7, ctxt, ops)
    1592             : #define vcpu_has_tbm()         vcpu_has(0x80000001, ECX, 21, ctxt, ops)
    1593             : #define vcpu_has_bmi1()        vcpu_has(         7, EBX,  3, ctxt, ops)
    1594             : #define vcpu_has_hle()         vcpu_has(         7, EBX,  4, ctxt, ops)
    1595             : #define vcpu_has_avx2()        vcpu_has(         7, EBX,  5, ctxt, ops)
    1596             : #define vcpu_has_bmi2()        vcpu_has(         7, EBX,  8, ctxt, ops)
    1597             : #define vcpu_has_rtm()         vcpu_has(         7, EBX, 11, ctxt, ops)
    1598             : #define vcpu_has_mpx()         vcpu_has(         7, EBX, 14, ctxt, ops)
    1599             : #define vcpu_has_rdseed()      vcpu_has(         7, EBX, 18, ctxt, ops)
    1600             : #define vcpu_has_adx()         vcpu_has(         7, EBX, 19, ctxt, ops)
    1601             : #define vcpu_has_smap()        vcpu_has(         7, EBX, 20, ctxt, ops)
    1602             : #define vcpu_has_clflushopt()  vcpu_has(         7, EBX, 23, ctxt, ops)
    1603             : #define vcpu_has_clwb()        vcpu_has(         7, EBX, 24, ctxt, ops)
    1604             : #define vcpu_has_sha()         vcpu_has(         7, EBX, 29, ctxt, ops)
    1605             : #define vcpu_has_rdpid()       vcpu_has(         7, ECX, 22, ctxt, ops)
    1606             : #define vcpu_has_clzero()      vcpu_has(0x80000008, EBX,  0, ctxt, ops)
    1607             : 
    1608             : #define vcpu_must_have(feat) \
    1609             :     generate_exception_if(!vcpu_has_##feat(), EXC_UD)
    1610             : 
    1611             : #ifdef __XEN__
    1612             : /*
    1613             :  * Note the difference between vcpu_must_have(<feature>) and
    1614             :  * host_and_vcpu_must_have(<feature>): The latter needs to be used when
    1615             :  * emulation code is using the same instruction class for carrying out
    1616             :  * the actual operation.
    1617             :  */
    1618             : #define host_and_vcpu_must_have(feat) ({ \
    1619             :     generate_exception_if(!cpu_has_##feat, EXC_UD); \
    1620             :     vcpu_must_have(feat); \
    1621             : })
    1622             : #else
    1623             : /*
    1624             :  * For the test harness both are fine to be used interchangeably, i.e.
    1625             :  * features known to always be available (e.g. SSE/SSE2) to (64-bit) Xen
    1626             :  * may be checked for by just vcpu_must_have().
    1627             :  */
    1628             : #define host_and_vcpu_must_have(feat) vcpu_must_have(feat)
    1629             : #endif
    1630             : 
    1631             : static int
    1632        7318 : realmode_load_seg(
    1633             :     enum x86_segment seg,
    1634             :     uint16_t sel,
    1635             :     struct segment_register *sreg,
    1636             :     struct x86_emulate_ctxt *ctxt,
    1637             :     const struct x86_emulate_ops *ops)
    1638             : {
    1639             :     int rc;
    1640             : 
    1641        7318 :     if ( !ops->read_segment )
    1642          41 :         return X86EMUL_UNHANDLEABLE;
    1643             : 
    1644        7277 :     if ( (rc = ops->read_segment(seg, sreg, ctxt)) == X86EMUL_OKAY )
    1645             :     {
    1646        7277 :         sreg->sel  = sel;
    1647        7277 :         sreg->base = (uint32_t)sel << 4;
    1648             :     }
    1649             : 
    1650        7277 :     return rc;
    1651             : }
    1652             : 
    1653             : /*
    1654             :  * Passing in x86_seg_none means
    1655             :  * - suppress any exceptions other than #PF,
    1656             :  * - don't commit any state.
    1657             :  */
    1658             : static int
    1659        5968 : protmode_load_seg(
    1660             :     enum x86_segment seg,
    1661             :     uint16_t sel, bool is_ret,
    1662             :     struct segment_register *sreg,
    1663             :     struct x86_emulate_ctxt *ctxt,
    1664             :     const struct x86_emulate_ops *ops)
    1665             : {
    1666        5968 :     enum x86_segment sel_seg = (sel & 4) ? x86_seg_ldtr : x86_seg_gdtr;
    1667        5968 :     struct { uint32_t a, b; } desc, desc_hi = {};
    1668             :     uint8_t dpl, rpl;
    1669        5968 :     int cpl = get_cpl(ctxt, ops);
    1670        5968 :     uint32_t a_flag = 0x100;
    1671        5968 :     int rc, fault_type = EXC_GP;
    1672             : 
    1673        5968 :     if ( cpl < 0 )
    1674          72 :         return X86EMUL_UNHANDLEABLE;
    1675             : 
    1676             :     /* NULL selector? */
    1677        5896 :     if ( (sel & 0xfffc) == 0 )
    1678             :     {
    1679         885 :         switch ( seg )
    1680             :         {
    1681             :         case x86_seg_ss:
    1682          16 :             if ( mode_64bit() && (cpl != 3) && (cpl == sel) )
    1683             :         default:
    1684         861 :                 break;
    1685             :             /* fall through */
    1686             :         case x86_seg_cs:
    1687             :         case x86_seg_tr:
    1688          24 :             goto raise_exn;
    1689             :         }
    1690         861 :         if ( ctxt->vendor != X86_VENDOR_AMD || !ops->read_segment ||
    1691           0 :              ops->read_segment(seg, sreg, ctxt) != X86EMUL_OKAY )
    1692         861 :             memset(sreg, 0, sizeof(*sreg));
    1693             :         else
    1694           0 :             sreg->attr.bytes = 0;
    1695         861 :         sreg->sel = sel;
    1696             : 
    1697             :         /* Since CPL == SS.DPL, we need to put back DPL. */
    1698         861 :         if ( seg == x86_seg_ss )
    1699           8 :             sreg->attr.fields.dpl = sel;
    1700             : 
    1701         861 :         return X86EMUL_OKAY;
    1702             :     }
    1703             : 
    1704             :     /* System segment descriptors must reside in the GDT. */
    1705        5011 :     if ( is_x86_system_segment(seg) && (sel & 4) )
    1706          15 :         goto raise_exn;
    1707             : 
    1708        4996 :     switch ( rc = ops->read(sel_seg, sel & 0xfff8, &desc, sizeof(desc), ctxt) )
    1709             :     {
    1710             :     case X86EMUL_OKAY:
    1711        4742 :         break;
    1712             : 
    1713             :     case X86EMUL_EXCEPTION:
    1714         245 :         if ( !ctxt->event_pending )
    1715         221 :             goto raise_exn;
    1716             :         /* fallthrough */
    1717             : 
    1718             :     default:
    1719          33 :         return rc;
    1720             :     }
    1721             : 
    1722             :     /* System segments must have S flag == 0. */
    1723        4742 :     if ( is_x86_system_segment(seg) && (desc.b & (1u << 12)) )
    1724           8 :         goto raise_exn;
    1725             :     /* User segments must have S flag == 1. */
    1726        4734 :     if ( is_x86_user_segment(seg) && !(desc.b & (1u << 12)) )
    1727         123 :         goto raise_exn;
    1728             : 
    1729        4611 :     dpl = (desc.b >> 13) & 3;
    1730        4611 :     rpl = sel & 3;
    1731             : 
    1732        4611 :     switch ( seg )
    1733             :     {
    1734             :     case x86_seg_cs:
    1735             :         /* Code segment? */
    1736        1043 :         if ( !(desc.b & (1u<<11)) )
    1737          69 :             goto raise_exn;
    1738        2052 :         if ( is_ret
    1739             :              ? /*
    1740             :                 * Really rpl < cpl, but our sole caller doesn't handle
    1741             :                 * privilege level changes.
    1742             :                 */
    1743         188 :                rpl != cpl || (desc.b & (1 << 10) ? dpl > rpl : dpl != rpl)
    1744         786 :              : desc.b & (1 << 10)
    1745             :                /* Conforming segment: check DPL against CPL. */
    1746             :                ? dpl > cpl
    1747             :                /* Non-conforming segment: check RPL and DPL against CPL. */
    1748         104 :                : rpl > cpl || dpl != cpl )
    1749          68 :             goto raise_exn;
    1750             :         /*
    1751             :          * 64-bit code segments (L bit set) must have D bit clear.
    1752             :          * Experimentally in long mode, the L and D bits are checked before
    1753             :          * the Present bit.
    1754             :          */
    1755         906 :         if ( ctxt->lma && (desc.b & (1 << 21)) && (desc.b & (1 << 22)) )
    1756          13 :             goto raise_exn;
    1757         893 :         sel = (sel ^ rpl) | cpl;
    1758         893 :         break;
    1759             :     case x86_seg_ss:
    1760             :         /* Writable data segment? */
    1761         200 :         if ( (desc.b & (5u<<9)) != (1u<<9) )
    1762           8 :             goto raise_exn;
    1763         192 :         if ( (dpl != cpl) || (dpl != rpl) )
    1764             :             goto raise_exn;
    1765         160 :         break;
    1766             :     case x86_seg_ldtr:
    1767             :         /* LDT system segment? */
    1768          16 :         if ( (desc.b & (15u<<8)) != (2u<<8) )
    1769           8 :             goto raise_exn;
    1770           8 :         a_flag = 0;
    1771           8 :         break;
    1772             :     case x86_seg_tr:
    1773             :         /* Available TSS system segment? */
    1774          16 :         if ( (desc.b & (15u<<8)) != (9u<<8) )
    1775           8 :             goto raise_exn;
    1776           8 :         a_flag = 0x200; /* busy flag */
    1777           8 :         break;
    1778             :     default:
    1779             :         /* Readable code or data segment? */
    1780         422 :         if ( (desc.b & (5u<<9)) == (4u<<9) )
    1781           8 :             goto raise_exn;
    1782             :         /* Non-conforming segment: check DPL against RPL and CPL. */
    1783         517 :         if ( ((desc.b & (6u<<9)) != (6u<<9)) &&
    1784         178 :              ((dpl < cpl) || (dpl < rpl)) )
    1785             :             goto raise_exn;
    1786         386 :         break;
    1787             :     case x86_seg_none:
    1788             :         /* Non-conforming segment: check DPL against RPL and CPL. */
    1789        5476 :         if ( ((desc.b & (0x1c << 8)) != (0x1c << 8)) &&
    1790        4867 :              ((dpl < cpl) || (dpl < rpl)) )
    1791         588 :             return X86EMUL_EXCEPTION;
    1792        2326 :         a_flag = 0;
    1793        2326 :         break;
    1794             :     }
    1795             : 
    1796             :     /* Segment present in memory? */
    1797        3781 :     if ( !(desc.b & (1 << 15)) && seg != x86_seg_none )
    1798             :     {
    1799         112 :         fault_type = seg != x86_seg_ss ? EXC_NP : EXC_SS;
    1800         112 :         goto raise_exn;
    1801             :     }
    1802             : 
    1803        3669 :     if ( !is_x86_user_segment(seg) )
    1804             :     {
    1805        2326 :         bool lm = (desc.b & (1u << 12)) ? false : ctxt->lma;
    1806             : 
    1807        2326 :         if ( lm )
    1808             :         {
    1809         673 :             switch ( rc = ops->read(sel_seg, (sel & 0xfff8) + 8,
    1810             :                                     &desc_hi, sizeof(desc_hi), ctxt) )
    1811             :             {
    1812             :             case X86EMUL_OKAY:
    1813         606 :                 break;
    1814             : 
    1815             :             case X86EMUL_EXCEPTION:
    1816          67 :                 if ( !ctxt->event_pending )
    1817          59 :                     goto raise_exn;
    1818             :                 /* fall through */
    1819             :             default:
    1820           8 :                 return rc;
    1821             :             }
    1822         606 :             if ( (desc_hi.b & 0x00001f00) ||
    1823           0 :                  (seg != x86_seg_none &&
    1824           0 :                   !is_canonical_address((uint64_t)desc_hi.a << 32)) )
    1825             :                 goto raise_exn;
    1826             :         }
    1827             :     }
    1828             : 
    1829             :     /* Ensure Accessed flag is set. */
    1830        3169 :     if ( a_flag && !(desc.b & a_flag) )
    1831             :     {
    1832         261 :         uint32_t new_desc_b = desc.b | a_flag;
    1833             : 
    1834         261 :         fail_if(!ops->cmpxchg);
    1835         253 :         switch ( (rc = ops->cmpxchg(sel_seg, (sel & 0xfff8) + 4, &desc.b,
    1836             :                                     &new_desc_b, sizeof(desc.b), ctxt)) )
    1837             :         {
    1838             :         case X86EMUL_OKAY:
    1839         237 :             break;
    1840             : 
    1841             :         case X86EMUL_EXCEPTION:
    1842           8 :             if ( !ctxt->event_pending )
    1843           0 :                 goto raise_exn;
    1844             :             /* fallthrough */
    1845             : 
    1846             :         default:
    1847          16 :             return rc;
    1848             :         }
    1849             : 
    1850             :         /* Force the Accessed flag in our local copy. */
    1851         237 :         desc.b = new_desc_b;
    1852             :     }
    1853             : 
    1854        9435 :     sreg->base = (((uint64_t)desc_hi.a << 32) |
    1855        6290 :                   ((desc.b <<  0) & 0xff000000u) |
    1856        6290 :                   ((desc.b << 16) & 0x00ff0000u) |
    1857        3145 :                   ((desc.a >> 16) & 0x0000ffffu));
    1858        6290 :     sreg->attr.bytes = (((desc.b >>  8) & 0x00ffu) |
    1859        3145 :                         ((desc.b >> 12) & 0x0f00u));
    1860        3145 :     sreg->limit = (desc.b & 0x000f0000u) | (desc.a & 0x0000ffffu);
    1861        3145 :     if ( sreg->attr.fields.g )
    1862         998 :         sreg->limit = (sreg->limit << 12) | 0xfffu;
    1863        3145 :     sreg->sel = sel;
    1864        3145 :     return X86EMUL_OKAY;
    1865             : 
    1866             :  raise_exn:
    1867        1237 :     generate_exception_if(seg != x86_seg_none, fault_type, sel & 0xfffc);
    1868         625 :     rc = X86EMUL_EXCEPTION;
    1869             :  done:
    1870        1245 :     return rc;
    1871             : }
    1872             : 
    1873             : static int
    1874        9893 : load_seg(
    1875             :     enum x86_segment seg,
    1876             :     uint16_t sel, bool is_ret,
    1877             :     struct segment_register *sreg,
    1878             :     struct x86_emulate_ctxt *ctxt,
    1879             :     const struct x86_emulate_ops *ops)
    1880             : {
    1881             :     struct segment_register reg;
    1882             :     int rc;
    1883             : 
    1884        9893 :     if ( !ops->write_segment )
    1885          94 :         return X86EMUL_UNHANDLEABLE;
    1886             : 
    1887        9799 :     if ( !sreg )
    1888        7320 :         sreg = &reg;
    1889             : 
    1890        9799 :     if ( in_protmode(ctxt, ops) )
    1891        2481 :         rc = protmode_load_seg(seg, sel, is_ret, sreg, ctxt, ops);
    1892             :     else
    1893        7318 :         rc = realmode_load_seg(seg, sel, sreg, ctxt, ops);
    1894             : 
    1895        9799 :     if ( !rc && sreg == &reg )
    1896        6928 :         rc = ops->write_segment(seg, sreg, ctxt);
    1897             : 
    1898        9799 :     return rc;
    1899             : }
    1900             : 
    1901             : void *
    1902     3665543 : decode_register(
    1903             :     uint8_t modrm_reg, struct cpu_user_regs *regs, int highbyte_regs)
    1904             : {
    1905             :     void *p;
    1906             : 
    1907     3665543 :     switch ( modrm_reg )
    1908             :     {
    1909      785795 :     case  0: p = &regs->r(ax); break;
    1910      362114 :     case  1: p = &regs->r(cx); break;
    1911      491238 :     case  2: p = &regs->r(dx); break;
    1912      921188 :     case  3: p = &regs->r(bx); break;
    1913      258771 :     case  4: p = (highbyte_regs ? &regs->ah : (void *)&regs->r(sp)); break;
    1914      242493 :     case  5: p = (highbyte_regs ? &regs->ch : (void *)&regs->r(bp)); break;
    1915      191912 :     case  6: p = (highbyte_regs ? &regs->dh : (void *)&regs->r(si)); break;
    1916      260827 :     case  7: p = (highbyte_regs ? &regs->bh : (void *)&regs->r(di)); break;
    1917             : #if defined(__x86_64__)
    1918       27439 :     case  8: p = &regs->r8;  break;
    1919       14018 :     case  9: p = &regs->r9;  break;
    1920       19618 :     case 10: p = &regs->r10; break;
    1921       36729 :     case 11: p = &regs->r11; break;
    1922       10280 :     case 12: mark_regs_dirty(regs); p = &regs->r12; break;
    1923       13620 :     case 13: mark_regs_dirty(regs); p = &regs->r13; break;
    1924       11349 :     case 14: mark_regs_dirty(regs); p = &regs->r14; break;
    1925       18152 :     case 15: mark_regs_dirty(regs); p = &regs->r15; break;
    1926             : #endif
    1927           0 :     default: BUG(); p = NULL; break;
    1928             :     }
    1929             : 
    1930     3665543 :     return p;
    1931             : }
    1932             : 
    1933         430 : static void *decode_vex_gpr(unsigned int vex_reg, struct cpu_user_regs *regs,
    1934             :                             const struct x86_emulate_ctxt *ctxt)
    1935             : {
    1936         430 :     return decode_register(~vex_reg & (mode_64bit() ? 0xf : 7), regs, 0);
    1937             : }
    1938             : 
    1939        2905 : static bool is_aligned(enum x86_segment seg, unsigned long offs,
    1940             :                        unsigned int size, struct x86_emulate_ctxt *ctxt,
    1941             :                        const struct x86_emulate_ops *ops)
    1942             : {
    1943             :     struct segment_register reg;
    1944             : 
    1945             :     /* Expecting powers of two only. */
    1946        2905 :     ASSERT(!(size & (size - 1)));
    1947             : 
    1948        2905 :     if ( mode_64bit() && seg < x86_seg_fs )
    1949         839 :         memset(&reg, 0, sizeof(reg));
    1950             :     else
    1951             :     {
    1952             :         /* No alignment checking when we have no way to read segment data. */
    1953        2066 :         if ( !ops->read_segment )
    1954        1137 :             return true;
    1955             : 
    1956         929 :         if ( ops->read_segment(seg, &reg, ctxt) != X86EMUL_OKAY )
    1957           0 :             return false;
    1958             :     }
    1959             : 
    1960        1768 :     return !((reg.base + offs) & (size - 1));
    1961             : }
    1962             : 
    1963      152101 : static bool is_branch_step(struct x86_emulate_ctxt *ctxt,
    1964             :                            const struct x86_emulate_ops *ops)
    1965             : {
    1966             :     uint64_t debugctl;
    1967             : 
    1968      399588 :     return ops->read_msr &&
    1969      247487 :            ops->read_msr(MSR_IA32_DEBUGCTLMSR, &debugctl, ctxt) == X86EMUL_OKAY &&
    1970       95386 :            (debugctl & IA32_DEBUGCTLMSR_BTF);
    1971             : }
    1972             : 
    1973        5747 : static bool umip_active(struct x86_emulate_ctxt *ctxt,
    1974             :                         const struct x86_emulate_ops *ops)
    1975             : {
    1976             :     unsigned long cr4;
    1977             : 
    1978             :     /* Intentionally not using mode_ring0() here to avoid its fail_if(). */
    1979       13214 :     return get_cpl(ctxt, ops) > 0 &&
    1980        7491 :            ops->read_cr && ops->read_cr(4, &cr4, ctxt) == X86EMUL_OKAY &&
    1981         872 :            (cr4 & X86_CR4_UMIP);
    1982             : }
    1983             : 
    1984      276628 : static void adjust_bnd(struct x86_emulate_ctxt *ctxt,
    1985             :                        const struct x86_emulate_ops *ops, enum vex_pfx pfx)
    1986             : {
    1987             :     uint64_t bndcfg;
    1988             :     int rc;
    1989             : 
    1990             :     if ( pfx == vex_f2 || !cpu_has_mpx || !vcpu_has_mpx() )
    1991      276628 :         return;
    1992             : 
    1993             :     if ( !mode_ring0() )
    1994             :         bndcfg = read_bndcfgu();
    1995             :     else if ( !ops->read_msr ||
    1996             :               ops->read_msr(MSR_IA32_BNDCFGS, &bndcfg, ctxt) != X86EMUL_OKAY )
    1997             :         return;
    1998             :     if ( (bndcfg & IA32_BNDCFGS_ENABLE) && !(bndcfg & IA32_BNDCFGS_PRESERVE) )
    1999             :     {
    2000             :         /*
    2001             :          * Using BNDMK or any other MPX instruction here is pointless, as
    2002             :          * we run with MPX disabled ourselves, and hence they're all no-ops.
    2003             :          * Therefore we have two ways to clear BNDn: Enable MPX temporarily
    2004             :          * (in which case executing any suitable non-prefixed branch
    2005             :          * instruction would do), or use XRSTOR.
    2006             :          */
    2007             :         xstate_set_init(XSTATE_BNDREGS);
    2008             :     }
    2009             :  done:;
    2010             : }
    2011             : 
    2012           0 : int x86emul_unhandleable_rw(
    2013             :     enum x86_segment seg,
    2014             :     unsigned long offset,
    2015             :     void *p_data,
    2016             :     unsigned int bytes,
    2017             :     struct x86_emulate_ctxt *ctxt)
    2018             : {
    2019           0 :     return X86EMUL_UNHANDLEABLE;
    2020             : }
    2021             : 
    2022             : /* Helper definitions. */
    2023             : #define op_bytes (state->op_bytes)
    2024             : #define ad_bytes (state->ad_bytes)
    2025             : #define ext (state->ext)
    2026             : #define modrm (state->modrm)
    2027             : #define modrm_mod (state->modrm_mod)
    2028             : #define modrm_reg (state->modrm_reg)
    2029             : #define modrm_rm (state->modrm_rm)
    2030             : #define rex_prefix (state->rex_prefix)
    2031             : #define lock_prefix (state->lock_prefix)
    2032             : #define vex (state->vex)
    2033             : #define evex (state->evex)
    2034             : #define ea (state->ea)
    2035             : 
    2036             : static int
    2037     3707764 : x86_decode_onebyte(
    2038             :     struct x86_emulate_state *state,
    2039             :     struct x86_emulate_ctxt *ctxt,
    2040             :     const struct x86_emulate_ops *ops)
    2041             : {
    2042     3707764 :     int rc = X86EMUL_OKAY;
    2043             : 
    2044     3707764 :     switch ( ctxt->opcode )
    2045             :     {
    2046             :     case 0x06: /* push %%es */
    2047             :     case 0x07: /* pop %%es */
    2048             :     case 0x0e: /* push %%cs */
    2049             :     case 0x16: /* push %%ss */
    2050             :     case 0x17: /* pop %%ss */
    2051             :     case 0x1e: /* push %%ds */
    2052             :     case 0x1f: /* pop %%ds */
    2053             :     case 0x27: /* daa */
    2054             :     case 0x2f: /* das */
    2055             :     case 0x37: /* aaa */
    2056             :     case 0x3f: /* aas */
    2057             :     case 0x60: /* pusha */
    2058             :     case 0x61: /* popa */
    2059             :     case 0x62: /* bound */
    2060             :     case 0x82: /* Grp1 (x86/32 only) */
    2061             :     case 0xc4: /* les */
    2062             :     case 0xc5: /* lds */
    2063             :     case 0xce: /* into */
    2064             :     case 0xd4: /* aam */
    2065             :     case 0xd5: /* aad */
    2066             :     case 0xd6: /* salc */
    2067      295739 :         state->not_64bit = true;
    2068      295739 :         break;
    2069             : 
    2070             :     case 0x90: /* nop / pause */
    2071        2019 :         if ( repe_prefix() )
    2072         720 :             ctxt->opcode |= X86EMUL_OPC_F3(0, 0);
    2073        2019 :         break;
    2074             : 
    2075             :     case 0x9a: /* call (far, absolute) */
    2076             :     case 0xea: /* jmp (far, absolute) */
    2077        1554 :         generate_exception_if(mode_64bit(), EXC_UD);
    2078             : 
    2079        1569 :         imm1 = insn_fetch_bytes(op_bytes);
    2080        1607 :         imm2 = insn_fetch_type(uint16_t);
    2081        1415 :         break;
    2082             : 
    2083             :     case 0xa0: case 0xa1: /* mov mem.offs,{%al,%ax,%eax,%rax} */
    2084             :     case 0xa2: case 0xa3: /* mov {%al,%ax,%eax,%rax},mem.offs */
    2085             :         /* Source EA is not encoded via ModRM. */
    2086        1710 :         ea.type = OP_MEM;
    2087        2362 :         ea.mem.off = insn_fetch_bytes(ad_bytes);
    2088        1058 :         break;
    2089             : 
    2090             :     case 0xb8 ... 0xbf: /* mov imm{16,32,64},r{16,32,64} */
    2091        7670 :         if ( op_bytes == 8 ) /* Fetch more bytes to obtain imm64. */
    2092        1666 :             imm1 = ((uint32_t)imm1 |
    2093         869 :                     ((uint64_t)insn_fetch_type(uint32_t) << 32));
    2094        7646 :         break;
    2095             : 
    2096             :     case 0xc8: /* enter imm16,imm8 */
    2097        5395 :         imm2 = insn_fetch_type(uint8_t);
    2098        5247 :         break;
    2099             : 
    2100             :     case 0xff: /* Grp5 */
    2101        8565 :         switch ( modrm_reg & 7 )
    2102             :         {
    2103             :         case 2: /* call (near) */
    2104             :         case 4: /* jmp (near) */
    2105             :         case 6: /* push */
    2106        3041 :             if ( mode_64bit() && op_bytes == 4 )
    2107         772 :                 op_bytes = 8;
    2108             :             /* fall through */
    2109             :         case 3: /* call (far, absolute indirect) */
    2110             :         case 5: /* jmp (far, absolute indirect) */
    2111        3613 :             state->desc = DstNone | SrcMem | Mov;
    2112        3613 :             break;
    2113             :         }
    2114        8565 :         break;
    2115             :     }
    2116             : 
    2117             :  done:
    2118     3707764 :     return rc;
    2119             : }
    2120             : 
    2121             : static int
    2122      164404 : x86_decode_twobyte(
    2123             :     struct x86_emulate_state *state,
    2124             :     struct x86_emulate_ctxt *ctxt,
    2125             :     const struct x86_emulate_ops *ops)
    2126             : {
    2127      164404 :     int rc = X86EMUL_OKAY;
    2128             : 
    2129      164404 :     switch ( ctxt->opcode & X86EMUL_OPC_MASK )
    2130             :     {
    2131             :     case 0x00: /* Grp6 */
    2132        2037 :         switch ( modrm_reg & 6 )
    2133             :         {
    2134             :         case 0:
    2135        1198 :             state->desc |= DstMem | SrcImplicit | Mov;
    2136        1198 :             break;
    2137             :         case 2: case 4:
    2138         831 :             state->desc |= SrcMem16;
    2139         831 :             break;
    2140             :         }
    2141        2037 :         break;
    2142             : 
    2143             :     case 0x78:
    2144          72 :         switch ( vex.pfx )
    2145             :         {
    2146             :         case vex_66: /* extrq $imm8, $imm8, xmm */
    2147             :         case vex_f2: /* insertq $imm8, $imm8, xmm, xmm */
    2148          80 :             imm1 = insn_fetch_type(uint8_t);
    2149          72 :             imm2 = insn_fetch_type(uint8_t);
    2150          32 :             break;
    2151             :         }
    2152             :         /* fall through */
    2153             :     case 0x10 ... 0x18:
    2154             :     case 0x28 ... 0x2f:
    2155             :     case 0x50 ... 0x77:
    2156             :     case 0x79 ... 0x7d:
    2157             :     case 0x7f:
    2158             :     case 0xc2 ... 0xc3:
    2159             :     case 0xc5 ... 0xc6:
    2160             :     case 0xd0 ... 0xef:
    2161             :     case 0xf1 ... 0xfe:
    2162        8156 :         ctxt->opcode |= MASK_INSR(vex.pfx, X86EMUL_OPC_PFX_MASK);
    2163        8156 :         break;
    2164             : 
    2165             :     case 0x20: case 0x22: /* mov to/from cr */
    2166         947 :         if ( lock_prefix && vcpu_has_cr8_legacy() )
    2167             :         {
    2168           8 :             modrm_reg += 8;
    2169           8 :             lock_prefix = false;
    2170             :         }
    2171             :         /* fall through */
    2172             :     case 0x21: case 0x23: /* mov to/from dr */
    2173         979 :         ASSERT(ea.type == OP_REG); /* Early operand adjustment ensures this. */
    2174         979 :         generate_exception_if(lock_prefix, EXC_UD);
    2175         971 :         op_bytes = mode_64bit() ? 8 : 4;
    2176         971 :         break;
    2177             : 
    2178             :     case 0x7e:
    2179         328 :         ctxt->opcode |= MASK_INSR(vex.pfx, X86EMUL_OPC_PFX_MASK);
    2180         328 :         if ( vex.pfx == vex_f3 ) /* movq xmm/m64,xmm */
    2181             :         {
    2182             :     case X86EMUL_OPC_VEX_F3(0, 0x7e): /* vmovq xmm/m64,xmm */
    2183          32 :             state->desc = DstImplicit | SrcMem | TwoOp;
    2184          32 :             state->simd_size = simd_other;
    2185             :             /* Avoid the state->desc clobbering of TwoOp below. */
    2186          32 :             return X86EMUL_OKAY;
    2187             :         }
    2188         312 :         break;
    2189             : 
    2190             :     case 0xae:
    2191        4265 :         ctxt->opcode |= MASK_INSR(vex.pfx, X86EMUL_OPC_PFX_MASK);
    2192             :         /* fall through */
    2193             :     case X86EMUL_OPC_VEX(0, 0xae):
    2194        4457 :         switch ( modrm_reg & 7 )
    2195             :         {
    2196             :         case 2: /* {,v}ldmxcsr */
    2197         129 :             state->desc = DstImplicit | SrcMem | Mov;
    2198         129 :             op_bytes = 4;
    2199         129 :             break;
    2200             : 
    2201             :         case 3: /* {,v}stmxcsr */
    2202         313 :             state->desc = DstMem | SrcImplicit | Mov;
    2203         313 :             op_bytes = 4;
    2204         313 :             break;
    2205             :         }
    2206        4457 :         break;
    2207             : 
    2208             :     case 0xb8: /* jmpe / popcnt */
    2209          64 :         if ( rep_prefix() )
    2210          56 :             ctxt->opcode |= MASK_INSR(vex.pfx, X86EMUL_OPC_PFX_MASK);
    2211          64 :         break;
    2212             : 
    2213             :         /* Intentionally not handling here despite being modified by F3:
    2214             :     case 0xbc: bsf / tzcnt
    2215             :     case 0xbd: bsr / lzcnt
    2216             :          * They're being dealt with in the execution phase (if at all).
    2217             :          */
    2218             : 
    2219             :     case 0xc4: /* pinsrw */
    2220         749 :         ctxt->opcode |= MASK_INSR(vex.pfx, X86EMUL_OPC_PFX_MASK);
    2221             :         /* fall through */
    2222             :     case X86EMUL_OPC_VEX_66(0, 0xc4): /* vpinsrw */
    2223         757 :         state->desc = DstReg | SrcMem16;
    2224         757 :         break;
    2225             : 
    2226             :     case 0xf0:
    2227          24 :         ctxt->opcode |= MASK_INSR(vex.pfx, X86EMUL_OPC_PFX_MASK);
    2228          24 :         if ( vex.pfx == vex_f2 ) /* lddqu mem,xmm */
    2229             :         {
    2230             :         /* fall through */
    2231             :     case X86EMUL_OPC_VEX_F2(0, 0xf0): /* vlddqu mem,{x,y}mm */
    2232          32 :             state->desc = DstImplicit | SrcMem | TwoOp;
    2233          32 :             state->simd_size = simd_other;
    2234             :             /* Avoid the state->desc clobbering of TwoOp below. */
    2235          32 :             return X86EMUL_OKAY;
    2236             :         }
    2237           8 :         break;
    2238             :     }
    2239             : 
    2240             :     /*
    2241             :      * Scalar forms of most VEX-encoded TwoOp instructions have
    2242             :      * three operands.  Those which do really have two operands
    2243             :      * should have exited earlier.
    2244             :      */
    2245      165148 :     if ( state->simd_size && vex.opcx &&
    2246         848 :          (vex.pfx & VEX_PREFIX_SCALAR_MASK) )
    2247         535 :         state->desc &= ~TwoOp;
    2248             : 
    2249             :  done:
    2250      164340 :     return rc;
    2251             : }
    2252             : 
    2253             : static int
    2254        2702 : x86_decode_0f38(
    2255             :     struct x86_emulate_state *state,
    2256             :     struct x86_emulate_ctxt *ctxt,
    2257             :     const struct x86_emulate_ops *ops)
    2258             : {
    2259        2702 :     switch ( ctxt->opcode & X86EMUL_OPC_MASK )
    2260             :     {
    2261             :     case 0x00 ... 0xef:
    2262             :     case 0xf2 ... 0xf5:
    2263             :     case 0xf7 ... 0xff:
    2264         327 :         op_bytes = 0;
    2265             :         /* fall through */
    2266             :     case 0xf6: /* adcx / adox */
    2267         591 :         ctxt->opcode |= MASK_INSR(vex.pfx, X86EMUL_OPC_PFX_MASK);
    2268         591 :         break;
    2269             : 
    2270             :     case 0xf0: /* movbe / crc32 */
    2271         624 :         state->desc |= repne_prefix() ? ByteOp : Mov;
    2272         624 :         if ( rep_prefix() )
    2273           8 :             ctxt->opcode |= MASK_INSR(vex.pfx, X86EMUL_OPC_PFX_MASK);
    2274         624 :         break;
    2275             : 
    2276             :     case 0xf1: /* movbe / crc32 */
    2277         102 :         if ( repne_prefix() )
    2278           8 :             state->desc = DstReg | SrcMem;
    2279         102 :         if ( rep_prefix() )
    2280           8 :             ctxt->opcode |= MASK_INSR(vex.pfx, X86EMUL_OPC_PFX_MASK);
    2281         102 :         break;
    2282             : 
    2283             :     case X86EMUL_OPC_VEX(0, 0xf2):    /* andn */
    2284             :     case X86EMUL_OPC_VEX(0, 0xf3):    /* Grp 17 */
    2285             :     case X86EMUL_OPC_VEX(0, 0xf5):    /* bzhi */
    2286             :     case X86EMUL_OPC_VEX_F3(0, 0xf5): /* pext */
    2287             :     case X86EMUL_OPC_VEX_F2(0, 0xf5): /* pdep */
    2288             :     case X86EMUL_OPC_VEX_F2(0, 0xf6): /* mulx */
    2289             :     case X86EMUL_OPC_VEX(0, 0xf7):    /* bextr */
    2290             :     case X86EMUL_OPC_VEX_66(0, 0xf7): /* shlx */
    2291             :     case X86EMUL_OPC_VEX_F3(0, 0xf7): /* sarx */
    2292             :     case X86EMUL_OPC_VEX_F2(0, 0xf7): /* shrx */
    2293         470 :         break;
    2294             : 
    2295             :     default:
    2296         915 :         op_bytes = 0;
    2297         915 :         break;
    2298             :     }
    2299             : 
    2300        2702 :     return X86EMUL_OKAY;
    2301             : }
    2302             : 
    2303             : static int
    2304         670 : x86_decode_0f3a(
    2305             :     struct x86_emulate_state *state,
    2306             :     struct x86_emulate_ctxt *ctxt,
    2307             :     const struct x86_emulate_ops *ops)
    2308             : {
    2309         670 :     if ( !vex.opcx )
    2310         353 :         ctxt->opcode |= MASK_INSR(vex.pfx, X86EMUL_OPC_PFX_MASK);
    2311             : 
    2312         670 :     switch ( ctxt->opcode & X86EMUL_OPC_MASK )
    2313             :     {
    2314             :     case X86EMUL_OPC_66(0, 0x14)
    2315             :      ... X86EMUL_OPC_66(0, 0x17):     /* pextr*, extractps */
    2316             :     case X86EMUL_OPC_VEX_66(0, 0x14)
    2317             :      ... X86EMUL_OPC_VEX_66(0, 0x17): /* vpextr*, vextractps */
    2318             :     case X86EMUL_OPC_VEX_F2(0, 0xf0): /* rorx */
    2319          45 :         break;
    2320             : 
    2321             :     case X86EMUL_OPC_66(0, 0x20):     /* pinsrb */
    2322             :     case X86EMUL_OPC_VEX_66(0, 0x20): /* vpinsrb */
    2323          14 :         state->desc = DstImplicit | SrcMem;
    2324          14 :         if ( modrm_mod != 3 )
    2325          14 :             state->desc |= ByteOp;
    2326          14 :         break;
    2327             : 
    2328             :     case X86EMUL_OPC_66(0, 0x22):     /* pinsr{d,q} */
    2329             :     case X86EMUL_OPC_VEX_66(0, 0x22): /* vpinsr{d,q} */
    2330          32 :         state->desc = DstImplicit | SrcMem;
    2331          32 :         break;
    2332             : 
    2333             :     default:
    2334         579 :         op_bytes = 0;
    2335         579 :         break;
    2336             :     }
    2337             : 
    2338         670 :     return X86EMUL_OKAY;
    2339             : }
    2340             : 
    2341             : static int
    2342     3899535 : x86_decode(
    2343             :     struct x86_emulate_state *state,
    2344             :     struct x86_emulate_ctxt *ctxt,
    2345             :     const struct x86_emulate_ops  *ops)
    2346             : {
    2347             :     uint8_t b, d, sib, sib_index, sib_base;
    2348             :     unsigned int def_op_bytes, def_ad_bytes, opcode;
    2349     3899535 :     enum x86_segment override_seg = x86_seg_none;
    2350     3899535 :     bool pc_rel = false;
    2351     3899535 :     int rc = X86EMUL_OKAY;
    2352             : 
    2353     3899535 :     ASSERT(ops->insn_fetch);
    2354             : 
    2355     3899535 :     memset(state, 0, sizeof(*state));
    2356     3899535 :     ea.type = OP_NONE;
    2357     3899535 :     ea.mem.seg = x86_seg_ds;
    2358     3899535 :     ea.reg = PTR_POISON;
    2359     3899535 :     state->regs = ctxt->regs;
    2360     3899535 :     state->ip = ctxt->regs->r(ip);
    2361             : 
    2362             :     /* Initialise output state in x86_emulate_ctxt */
    2363     3899535 :     ctxt->retire.raw = 0;
    2364     3899535 :     x86_emul_reset_event(ctxt);
    2365             : 
    2366     3899535 :     op_bytes = def_op_bytes = ad_bytes = def_ad_bytes = ctxt->addr_size/8;
    2367     3899535 :     if ( op_bytes == 8 )
    2368             :     {
    2369     1089241 :         op_bytes = def_op_bytes = 4;
    2370             : #ifndef __x86_64__
    2371             :         return X86EMUL_UNHANDLEABLE;
    2372             : #endif
    2373             :     }
    2374             : 
    2375             :     /* Prefix bytes. */
    2376             :     for ( ; ; )
    2377             :     {
    2378     4482042 :         switch ( b = insn_fetch_type(uint8_t) )
    2379             :         {
    2380             :         case 0x66: /* operand-size override */
    2381       57664 :             op_bytes = def_op_bytes ^ 6;
    2382       57664 :             if ( !vex.pfx )
    2383       25845 :                 vex.pfx = vex_66;
    2384       57664 :             break;
    2385             :         case 0x67: /* address-size override */
    2386       33638 :             ad_bytes = def_ad_bytes ^ (mode_64bit() ? 12 : 6);
    2387       33638 :             break;
    2388             :         case 0x2e: /* CS override */
    2389        9376 :             override_seg = x86_seg_cs;
    2390        9376 :             break;
    2391             :         case 0x3e: /* DS override */
    2392        5067 :             override_seg = x86_seg_ds;
    2393        5067 :             break;
    2394             :         case 0x26: /* ES override */
    2395        1544 :             override_seg = x86_seg_es;
    2396        1544 :             break;
    2397             :         case 0x64: /* FS override */
    2398        8375 :             override_seg = x86_seg_fs;
    2399        8375 :             break;
    2400             :         case 0x65: /* GS override */
    2401        5764 :             override_seg = x86_seg_gs;
    2402        5764 :             break;
    2403             :         case 0x36: /* SS override */
    2404        2266 :             override_seg = x86_seg_ss;
    2405        2266 :             break;
    2406             :         case 0xf0: /* LOCK */
    2407        1469 :             lock_prefix = 1;
    2408        1469 :             break;
    2409             :         case 0xf2: /* REPNE/REPNZ */
    2410       18041 :             vex.pfx = vex_f2;
    2411       18041 :             break;
    2412             :         case 0xf3: /* REP/REPE/REPZ */
    2413       41483 :             vex.pfx = vex_f3;
    2414       41483 :             break;
    2415             :         case 0x40 ... 0x4f: /* REX */
    2416     1016982 :             if ( !mode_64bit() )
    2417      619162 :                 goto done_prefixes;
    2418      397820 :             rex_prefix = b;
    2419      397820 :             continue;
    2420             :         default:
    2421     3265424 :             goto done_prefixes;
    2422             :         }
    2423             : 
    2424             :         /* Any legacy prefix after a REX prefix nullifies its effect. */
    2425      184687 :         rex_prefix = 0;
    2426      582507 :     }
    2427             :  done_prefixes:
    2428             : 
    2429     3884586 :     if ( rex_prefix & REX_W )
    2430      147482 :         op_bytes = 8;
    2431             : 
    2432             :     /* Opcode byte(s). */
    2433     3884586 :     d = opcode_table[b];
    2434     3884586 :     if ( d == 0 && b == 0x0f )
    2435             :     {
    2436             :         /* Two-byte opcode. */
    2437      163365 :         b = insn_fetch_type(uint8_t);
    2438      163125 :         d = twobyte_table[b].desc;
    2439      163125 :         switch ( b )
    2440             :         {
    2441             :         default:
    2442      161423 :             opcode = b | MASK_INSR(0x0f, X86EMUL_OPC_EXT_MASK);
    2443      161423 :             ext = ext_0f;
    2444      161423 :             state->simd_size = twobyte_table[b].size;
    2445      161423 :             break;
    2446             :         case 0x38:
    2447        1349 :             b = insn_fetch_type(uint8_t);
    2448        1317 :             opcode = b | MASK_INSR(0x0f38, X86EMUL_OPC_EXT_MASK);
    2449        1317 :             ext = ext_0f38;
    2450        1317 :             break;
    2451             :         case 0x3a:
    2452         385 :             b = insn_fetch_type(uint8_t);
    2453         353 :             opcode = b | MASK_INSR(0x0f3a, X86EMUL_OPC_EXT_MASK);
    2454         353 :             ext = ext_0f3a;
    2455         353 :             break;
    2456             :         }
    2457      163093 :     }
    2458             :     else
    2459     3721341 :         opcode = b;
    2460             : 
    2461             :     /* ModRM and SIB bytes. */
    2462     3884434 :     if ( d & ModRM )
    2463             :     {
    2464     1037807 :         modrm = insn_fetch_type(uint8_t);
    2465     1029735 :         modrm_mod = (modrm & 0xc0) >> 6;
    2466             : 
    2467     1936518 :         if ( !ext && ((b & ~1) == 0xc4 || (b == 0x8f && (modrm & 0x18)) ||
    2468             :                       b == 0x62) )
    2469        8819 :             switch ( def_ad_bytes )
    2470             :             {
    2471             :             default:
    2472           0 :                 BUG(); /* Shouldn't be possible. */
    2473             :             case 2:
    2474        5298 :                 if ( state->regs->eflags & X86_EFLAGS_VM )
    2475           0 :                     break;
    2476             :                 /* fall through */
    2477             :             case 4:
    2478        6934 :                 if ( modrm_mod != 3 || in_realmode(ctxt, ops) )
    2479             :                     break;
    2480             :                 /* fall through */
    2481             :             case 8:
    2482             :                 /* VEX / XOP / EVEX */
    2483        5147 :                 generate_exception_if(rex_prefix || vex.pfx, EXC_UD);
    2484             :                 /*
    2485             :                  * With operand size override disallowed (see above), op_bytes
    2486             :                  * should not have changed from its default.
    2487             :                  */
    2488        5117 :                 ASSERT(op_bytes == def_op_bytes);
    2489             : 
    2490        5117 :                 vex.raw[0] = modrm;
    2491        5117 :                 if ( b == 0xc5 )
    2492             :                 {
    2493        3123 :                     opcode = X86EMUL_OPC_VEX_;
    2494        3123 :                     vex.raw[1] = modrm;
    2495        3123 :                     vex.opcx = vex_0f;
    2496        3123 :                     vex.x = 1;
    2497        3123 :                     vex.b = 1;
    2498        3123 :                     vex.w = 0;
    2499             :                 }
    2500             :                 else
    2501             :                 {
    2502        2105 :                     vex.raw[1] = insn_fetch_type(uint8_t);
    2503        1883 :                     if ( mode_64bit() )
    2504             :                     {
    2505        1192 :                         if ( !vex.b )
    2506        1051 :                             rex_prefix |= REX_B;
    2507        1192 :                         if ( !vex.x )
    2508          84 :                             rex_prefix |= REX_X;
    2509        1192 :                         if ( vex.w )
    2510             :                         {
    2511         141 :                             rex_prefix |= REX_W;
    2512         141 :                             op_bytes = 8;
    2513             :                         }
    2514             :                     }
    2515             :                     else
    2516             :                     {
    2517             :                         /* Operand size fixed at 4 (no override via W bit). */
    2518         691 :                         op_bytes = 4;
    2519         691 :                         vex.b = 1;
    2520             :                     }
    2521        1883 :                     switch ( b )
    2522             :                     {
    2523             :                     case 0x62:
    2524         200 :                         opcode = X86EMUL_OPC_EVEX_;
    2525         200 :                         evex.raw[0] = vex.raw[0];
    2526         200 :                         evex.raw[1] = vex.raw[1];
    2527         216 :                         evex.raw[2] = insn_fetch_type(uint8_t);
    2528             : 
    2529         184 :                         vex.opcx = evex.opcx;
    2530         184 :                         break;
    2531             :                     case 0xc4:
    2532        1627 :                         opcode = X86EMUL_OPC_VEX_;
    2533        1627 :                         break;
    2534             :                     default:
    2535          56 :                         opcode = 0;
    2536          56 :                         break;
    2537             :                     }
    2538             :                 }
    2539        4990 :                 if ( !vex.r )
    2540         502 :                     rex_prefix |= REX_R;
    2541             : 
    2542        4990 :                 ext = vex.opcx;
    2543        4990 :                 if ( b != 0x8f )
    2544             :                 {
    2545        4958 :                     b = insn_fetch_type(uint8_t);
    2546        4910 :                     switch ( ext )
    2547             :                     {
    2548             :                     case vex_0f:
    2549        3176 :                         opcode |= MASK_INSR(0x0f, X86EMUL_OPC_EXT_MASK);
    2550        3176 :                         d = twobyte_table[b].desc;
    2551        3176 :                         state->simd_size = twobyte_table[b].size;
    2552        3176 :                         break;
    2553             :                     case vex_0f38:
    2554        1385 :                         opcode |= MASK_INSR(0x0f38, X86EMUL_OPC_EXT_MASK);
    2555        1385 :                         d = twobyte_table[0x38].desc;
    2556        1385 :                         break;
    2557             :                     case vex_0f3a:
    2558         333 :                         opcode |= MASK_INSR(0x0f3a, X86EMUL_OPC_EXT_MASK);
    2559         333 :                         d = twobyte_table[0x3a].desc;
    2560         333 :                         break;
    2561             :                     default:
    2562          16 :                         rc = X86EMUL_UNHANDLEABLE;
    2563          16 :                         goto done;
    2564             :                     }
    2565             :                 }
    2566          56 :                 else if ( ext < ext_8f08 +
    2567             :                                 sizeof(xop_table) / sizeof(*xop_table) )
    2568             :                 {
    2569          48 :                     b = insn_fetch_type(uint8_t);
    2570          32 :                     opcode |= MASK_INSR(0x8f08 + ext - ext_8f08,
    2571             :                                         X86EMUL_OPC_EXT_MASK);
    2572          32 :                     d = xop_table[ext - ext_8f08];
    2573             :                 }
    2574             :                 else
    2575             :                 {
    2576          16 :                     rc = X86EMUL_UNHANDLEABLE;
    2577          16 :                     goto done;
    2578             :                 }
    2579             : 
    2580        4926 :                 opcode |= b | MASK_INSR(vex.pfx, X86EMUL_OPC_PFX_MASK);
    2581             : 
    2582        4926 :                 if ( !(d & ModRM) )
    2583        1060 :                     break;
    2584             : 
    2585        3905 :                 modrm = insn_fetch_type(uint8_t);
    2586        3827 :                 modrm_mod = (modrm & 0xc0) >> 6;
    2587             : 
    2588        3827 :                 break;
    2589             :             }
    2590             :     }
    2591             : 
    2592     3880138 :     if ( d & ModRM )
    2593             :     {
    2594     1028415 :         d &= ~ModRM;
    2595             : #undef ModRM /* Only its aliases are valid to use from here on. */
    2596     1028415 :         modrm_reg = ((rex_prefix & 4) << 1) | ((modrm & 0x38) >> 3);
    2597     1028415 :         modrm_rm  = modrm & 0x07;
    2598             : 
    2599             :         /*
    2600             :          * Early operand adjustments. Only ones affecting further processing
    2601             :          * prior to the x86_decode_*() calls really belong here. That would
    2602             :          * normally be only addition/removal of SrcImm/SrcImm16, so their
    2603             :          * fetching can be taken care of by the common code below.
    2604             :          */
    2605     1028415 :         switch ( ext )
    2606             :         {
    2607             :         case ext_none:
    2608      901636 :             switch ( b )
    2609             :             {
    2610             :             case 0xf6 ... 0xf7: /* Grp3 */
    2611       17566 :                 switch ( modrm_reg & 7 )
    2612             :                 {
    2613             :                 case 0 ... 1: /* test */
    2614        1241 :                     d |= DstMem | SrcImm;
    2615        1241 :                     break;
    2616             :                 case 2: /* not */
    2617             :                 case 3: /* neg */
    2618         587 :                     d |= DstMem;
    2619         587 :                     break;
    2620             :                 case 4: /* mul */
    2621             :                 case 5: /* imul */
    2622             :                 case 6: /* div */
    2623             :                 case 7: /* idiv */
    2624             :                     /*
    2625             :                      * DstEax isn't really precise for all cases; updates to
    2626             :                      * rDX get handled in an open coded manner.
    2627             :                      */
    2628       15738 :                     d |= DstEax | SrcMem;
    2629       15738 :                     break;
    2630             :                 }
    2631       17566 :                 break;
    2632             :             }
    2633      901636 :             break;
    2634             : 
    2635             :         case ext_0f:
    2636      123367 :             switch ( b )
    2637             :             {
    2638             :             case 0x20: /* mov cr,reg */
    2639             :             case 0x21: /* mov dr,reg */
    2640             :             case 0x22: /* mov reg,cr */
    2641             :             case 0x23: /* mov reg,dr */
    2642             :                 /*
    2643             :                  * Mov to/from cr/dr ignore the encoding of Mod, and behave as
    2644             :                  * if they were encoded as reg/reg instructions.  No futher
    2645             :                  * disp/SIB bytes are fetched.
    2646             :                  */
    2647         981 :                 modrm_mod = 3;
    2648         981 :                 break;
    2649             :             }
    2650      123367 :             break;
    2651             : 
    2652             :         case vex_0f38:
    2653        2702 :             d = ext0f38_table[b].to_mem ? DstMem | SrcReg
    2654             :                                         : DstReg | SrcMem;
    2655        2702 :             if ( ext0f38_table[b].two_op )
    2656        1616 :                 d |= TwoOp;
    2657        2702 :             if ( ext0f38_table[b].vsib )
    2658           0 :                 d |= vSIB;
    2659        2702 :             state->simd_size = ext0f38_table[b].simd_size;
    2660        2702 :             break;
    2661             : 
    2662             :         case vex_0f3a:
    2663             :             /*
    2664             :              * Cannot update d here yet, as the immediate operand still
    2665             :              * needs fetching.
    2666             :              */
    2667             :         default:
    2668         710 :             break;
    2669             :         }
    2670             : 
    2671     1028415 :         if ( modrm_mod == 3 )
    2672             :         {
    2673       65297 :             modrm_rm |= (rex_prefix & 1) << 3;
    2674       65297 :             ea.type = OP_REG;
    2675             :         }
    2676      963118 :         else if ( ad_bytes == 2 )
    2677             :         {
    2678             :             /* 16-bit ModR/M decode. */
    2679      454973 :             generate_exception_if(d & vSIB, EXC_UD);
    2680      454973 :             ea.type = OP_MEM;
    2681      454973 :             switch ( modrm_rm )
    2682             :             {
    2683             :             case 0:
    2684      144307 :                 ea.mem.off = state->regs->bx + state->regs->si;
    2685      144307 :                 break;
    2686             :             case 1:
    2687       56134 :                 ea.mem.off = state->regs->bx + state->regs->di;
    2688       56134 :                 break;
    2689             :             case 2:
    2690       87208 :                 ea.mem.seg = x86_seg_ss;
    2691       87208 :                 ea.mem.off = state->regs->bp + state->regs->si;
    2692       87208 :                 break;
    2693             :             case 3:
    2694      110524 :                 ea.mem.seg = x86_seg_ss;
    2695      110524 :                 ea.mem.off = state->regs->bp + state->regs->di;
    2696      110524 :                 break;
    2697             :             case 4:
    2698       12499 :                 ea.mem.off = state->regs->si;
    2699       12499 :                 break;
    2700             :             case 5:
    2701       14195 :                 ea.mem.off = state->regs->di;
    2702       14195 :                 break;
    2703             :             case 6:
    2704        8929 :                 if ( modrm_mod == 0 )
    2705        3840 :                     break;
    2706        5089 :                 ea.mem.seg = x86_seg_ss;
    2707        5089 :                 ea.mem.off = state->regs->bp;
    2708        5089 :                 break;
    2709             :             case 7:
    2710       21177 :                 ea.mem.off = state->regs->bx;
    2711       21177 :                 break;
    2712             :             }
    2713      454973 :             switch ( modrm_mod )
    2714             :             {
    2715             :             case 0:
    2716      341068 :                 if ( modrm_rm == 6 )
    2717        3840 :                     ea.mem.off = insn_fetch_type(int16_t);
    2718      341042 :                 break;
    2719             :             case 1:
    2720      103221 :                 ea.mem.off += insn_fetch_type(int8_t);
    2721      102513 :                 break;
    2722             :             case 2:
    2723       11182 :                 ea.mem.off += insn_fetch_type(int16_t);
    2724       10894 :                 break;
    2725             :             }
    2726             :         }
    2727             :         else
    2728             :         {
    2729             :             /* 32/64-bit ModR/M decode. */
    2730      508145 :             ea.type = OP_MEM;
    2731      508145 :             if ( modrm_rm == 4 )
    2732             :             {
    2733       21853 :                 sib = insn_fetch_type(uint8_t);
    2734       21755 :                 sib_index = ((sib >> 3) & 7) | ((rex_prefix << 2) & 8);
    2735       21755 :                 sib_base  = (sib & 7) | ((rex_prefix << 3) & 8);
    2736       40045 :                 if ( sib_index != 4 && !(d & vSIB) )
    2737       18290 :                     ea.mem.off = *(long *)decode_register(sib_index,
    2738             :                                                           state->regs, 0);
    2739       21755 :                 ea.mem.off <<= (sib >> 6) & 3;
    2740       21755 :                 if ( (modrm_mod == 0) && ((sib_base & 7) == 5) )
    2741         200 :                     ea.mem.off += insn_fetch_type(int32_t);
    2742       21555 :                 else if ( sib_base == 4 )
    2743             :                 {
    2744        5282 :                     ea.mem.seg  = x86_seg_ss;
    2745        5282 :                     ea.mem.off += state->regs->r(sp);
    2746        5282 :                     if ( !ext && (b == 0x8f) )
    2747             :                         /* POP <rm> computes its EA post increment. */
    2748          16 :                         ea.mem.off += ((mode_64bit() && (op_bytes == 4))
    2749           8 :                                        ? 8 : op_bytes);
    2750             :                 }
    2751       16273 :                 else if ( sib_base == 5 )
    2752             :                 {
    2753        1131 :                     ea.mem.seg  = x86_seg_ss;
    2754        1131 :                     ea.mem.off += state->regs->r(bp);
    2755             :                 }
    2756             :                 else
    2757       15142 :                     ea.mem.off += *(long *)decode_register(sib_base,
    2758             :                                                            state->regs, 0);
    2759             :             }
    2760             :             else
    2761             :             {
    2762      486341 :                 generate_exception_if(d & vSIB, EXC_UD);
    2763      486341 :                 modrm_rm |= (rex_prefix & 1) << 3;
    2764      486341 :                 ea.mem.off = *(long *)decode_register(modrm_rm,
    2765             :                                                       state->regs, 0);
    2766      486341 :                 if ( (modrm_rm == 5) && (modrm_mod != 0) )
    2767       13519 :                     ea.mem.seg = x86_seg_ss;
    2768             :             }
    2769      508080 :             switch ( modrm_mod )
    2770             :             {
    2771             :             case 0:
    2772      351997 :                 if ( (modrm_rm & 7) != 5 )
    2773      346676 :                     break;
    2774        5364 :                 ea.mem.off = insn_fetch_type(int32_t);
    2775        5278 :                 pc_rel = mode_64bit();
    2776        5278 :                 break;
    2777             :             case 1:
    2778      149034 :                 ea.mem.off += insn_fetch_type(int8_t);
    2779      147782 :                 break;
    2780             :             case 2:
    2781        7811 :                 ea.mem.off += insn_fetch_type(int32_t);
    2782        7539 :                 break;
    2783             :             }
    2784             :         }
    2785             :     }
    2786             :     else
    2787             :     {
    2788     2851723 :         modrm_mod = 0xff;
    2789     2851723 :         modrm_reg = modrm_rm = modrm = 0;
    2790             :     }
    2791             : 
    2792     3878744 :     if ( override_seg != x86_seg_none )
    2793       20682 :         ea.mem.seg = override_seg;
    2794             : 
    2795             :     /* Fetch the immediate operand, if present. */
    2796     3878744 :     switch ( d & SrcMask )
    2797             :     {
    2798             :         unsigned int bytes;
    2799             : 
    2800             :     case SrcImm:
    2801      339658 :         if ( !(d & ByteOp) )
    2802      125408 :             bytes = op_bytes != 8 ? op_bytes : 4;
    2803             :         else
    2804             :         {
    2805             :     case SrcImmByte:
    2806      526952 :             bytes = 1;
    2807             :         }
    2808             :         /* NB. Immediates are sign-extended as necessary. */
    2809      652360 :         switch ( bytes )
    2810             :         {
    2811      526952 :         case 1: imm1 = insn_fetch_type(int8_t);  break;
    2812       66672 :         case 2: imm1 = insn_fetch_type(int16_t); break;
    2813       58736 :         case 4: imm1 = insn_fetch_type(int32_t); break;
    2814             :         }
    2815      649287 :         break;
    2816             :     case SrcImm16:
    2817        5897 :         imm1 = insn_fetch_type(uint16_t);
    2818        5699 :         break;
    2819             :     }
    2820             : 
    2821     3875572 :     ctxt->opcode = opcode;
    2822     3875572 :     state->desc = d;
    2823             : 
    2824     3875572 :     switch ( ext )
    2825             :     {
    2826             :     case ext_none:
    2827     3707764 :         rc = x86_decode_onebyte(state, ctxt, ops);
    2828     3707764 :         break;
    2829             : 
    2830             :     case ext_0f:
    2831      164404 :         rc = x86_decode_twobyte(state, ctxt, ops);
    2832      164404 :         break;
    2833             : 
    2834             :     case ext_0f38:
    2835        2702 :         rc = x86_decode_0f38(state, ctxt, ops);
    2836        2702 :         break;
    2837             : 
    2838             :     case ext_0f3a:
    2839         670 :         d = ext0f3a_table[b].to_mem ? DstMem | SrcReg : DstReg | SrcMem;
    2840         670 :         if ( ext0f3a_table[b].two_op )
    2841         385 :             d |= TwoOp;
    2842         285 :         else if ( ext0f3a_table[b].four_op && !mode_64bit() && vex.opcx )
    2843          32 :             imm1 &= 0x7f;
    2844         670 :         state->desc = d;
    2845         670 :         state->simd_size = ext0f3a_table[b].simd_size;
    2846         670 :         rc = x86_decode_0f3a(state, ctxt, ops);
    2847         670 :         break;
    2848             : 
    2849             :     case ext_8f08:
    2850             :     case ext_8f09:
    2851             :     case ext_8f0a:
    2852          32 :         break;
    2853             : 
    2854             :     default:
    2855           0 :         ASSERT_UNREACHABLE();
    2856             :         return X86EMUL_UNHANDLEABLE;
    2857             :     }
    2858             : 
    2859     3875572 :     if ( ea.type == OP_MEM )
    2860             :     {
    2861      963287 :         if ( pc_rel )
    2862        3556 :             ea.mem.off += state->ip;
    2863             : 
    2864      963287 :         ea.mem.off = truncate_ea(ea.mem.off);
    2865             :     }
    2866             : 
    2867             :     /*
    2868             :      * Simple op_bytes calculations. More complicated cases produce 0
    2869             :      * and are further handled during execute.
    2870             :      */
    2871     3875572 :     switch ( state->simd_size )
    2872             :     {
    2873             :     case simd_none:
    2874             :         /*
    2875             :          * When prefix 66 has a meaning different from operand-size override,
    2876             :          * operand size defaults to 4 and can't be overridden to 2.
    2877             :          */
    2878     5774577 :         if ( op_bytes == 2 &&
    2879     1909477 :              (ctxt->opcode & X86EMUL_OPC_PFX_MASK) == X86EMUL_OPC_66(0, 0) )
    2880         665 :             op_bytes = 4;
    2881     3865100 :         break;
    2882             : 
    2883             :     case simd_packed_int:
    2884        3927 :         switch ( vex.pfx )
    2885             :         {
    2886        2698 :         case vex_none: op_bytes = 8;           break;
    2887        1197 :         case vex_66:   op_bytes = 16 << vex.l; break;
    2888          32 :         default:       op_bytes = 0;           break;
    2889             :         }
    2890        3927 :         break;
    2891             : 
    2892             :     case simd_single_fp:
    2893         463 :         if ( vex.pfx & VEX_PREFIX_DOUBLE_MASK )
    2894             :         {
    2895           8 :             op_bytes = 0;
    2896           8 :             break;
    2897             :     case simd_packed_fp:
    2898         891 :             if ( vex.pfx & VEX_PREFIX_SCALAR_MASK )
    2899             :             {
    2900          16 :                 op_bytes = 0;
    2901          16 :                 break;
    2902             :             }
    2903             :         }
    2904             :         /* fall through */
    2905             :     case simd_any_fp:
    2906        3105 :         switch ( vex.pfx )
    2907             :         {
    2908        3041 :         default:     op_bytes = 16 << vex.l; break;
    2909          32 :         case vex_f3: op_bytes = 4;           break;
    2910          32 :         case vex_f2: op_bytes = 8;           break;
    2911             :         }
    2912        3105 :         break;
    2913             : 
    2914             :     case simd_scalar_fp:
    2915          23 :         op_bytes = 4 << (ctxt->opcode & 1);
    2916          23 :         break;
    2917             : 
    2918             :     default:
    2919        3393 :         op_bytes = 0;
    2920        3393 :         break;
    2921             :     }
    2922             : 
    2923             :  done:
    2924     3899535 :     return rc;
    2925             : }
    2926             : 
    2927             : /* No insn fetching past this point. */
    2928             : #undef insn_fetch_bytes
    2929             : #undef insn_fetch_type
    2930             : 
    2931             : /* Undo DEBUG wrapper. */
    2932             : #undef x86_emulate
    2933             : 
    2934             : int
    2935     3899535 : x86_emulate(
    2936             :     struct x86_emulate_ctxt *ctxt,
    2937             :     const struct x86_emulate_ops *ops)
    2938             : {
    2939             :     /* Shadow copy of register state. Committed on successful emulation. */
    2940     3899535 :     struct cpu_user_regs _regs = *ctxt->regs;
    2941             :     struct x86_emulate_state state;
    2942             :     int rc;
    2943     3899535 :     uint8_t b, d, *opc = NULL;
    2944     4035837 :     bool singlestep = (_regs.eflags & X86_EFLAGS_TF) &&
    2945      136302 :             !is_branch_step(ctxt, ops);
    2946     3899535 :     bool sfence = false;
    2947     3899535 :     struct operand src = { .reg = PTR_POISON };
    2948     3899535 :     struct operand dst = { .reg = PTR_POISON };
    2949             :     unsigned long cr4;
    2950     3899535 :     struct fpu_insn_ctxt fic = { .type = X86EMUL_FPU_none, .exn_raised = -1 };
    2951     3899535 :     struct x86_emulate_stub stub = {};
    2952     3899535 :     DECLARE_ALIGNED(mmval_t, mmval);
    2953             : 
    2954     3899535 :     ASSERT(ops->read);
    2955             : 
    2956     3899535 :     rc = x86_decode(&state, ctxt, ops);
    2957     3899535 :     if ( rc != X86EMUL_OKAY )
    2958       24892 :         return rc;
    2959             : 
    2960             :     /* Sync rIP to post decode value. */
    2961     3874643 :     _regs.r(ip) = state.ip;
    2962             : 
    2963     3874643 :     if ( ops->validate )
    2964             :     {
    2965             : #ifndef NDEBUG
    2966           0 :         state.caller = __builtin_return_address(0);
    2967             : #endif
    2968           0 :         rc = ops->validate(&state, ctxt);
    2969             : #ifndef NDEBUG
    2970           0 :         state.caller = NULL;
    2971             : #endif
    2972           0 :         if ( rc == X86EMUL_DONE )
    2973           0 :             goto complete_insn;
    2974           0 :         if ( rc != X86EMUL_OKAY )
    2975           0 :             return rc;
    2976             :     }
    2977             : 
    2978     3874643 :     b = ctxt->opcode;
    2979     3874643 :     d = state.desc;
    2980             : #define state (&state)
    2981             : 
    2982     3874643 :     generate_exception_if(state->not_64bit && mode_64bit(), EXC_UD);
    2983             : 
    2984     3873672 :     if ( ea.type == OP_REG )
    2985       88428 :         ea.reg = decode_register(modrm_rm, &_regs,
    2986       88428 :                                  (d & ByteOp) && !rex_prefix);
    2987             : 
    2988     3873672 :     memset(mmvalp, 0xaa /* arbitrary */, sizeof(*mmvalp));
    2989             : 
    2990             :     /* Decode and fetch the source operand: register, memory or immediate. */
    2991     3873672 :     switch ( d & SrcMask )
    2992             :     {
    2993             :     case SrcNone: /* case SrcImplicit: */
    2994     2313610 :         src.type = OP_NONE;
    2995     2313610 :         break;
    2996             :     case SrcReg:
    2997      489763 :         src.type = OP_REG;
    2998      489763 :         if ( d & ByteOp )
    2999             :         {
    3000      332086 :             src.reg = decode_register(modrm_reg, &_regs, (rex_prefix == 0));
    3001      332086 :             src.val = *(uint8_t *)src.reg;
    3002      332086 :             src.bytes = 1;
    3003             :         }
    3004             :         else
    3005             :         {
    3006      157677 :             src.reg = decode_register(modrm_reg, &_regs, 0);
    3007      157677 :             switch ( (src.bytes = op_bytes) )
    3008             :             {
    3009       56904 :             case 2: src.val = *(uint16_t *)src.reg; break;
    3010       92231 :             case 4: src.val = *(uint32_t *)src.reg; break;
    3011        8534 :             case 8: src.val = *(uint64_t *)src.reg; break;
    3012             :             }
    3013             :         }
    3014      489763 :         break;
    3015             :     case SrcMem16:
    3016        9038 :         ea.bytes = 2;
    3017        9038 :         goto srcmem_common;
    3018             :     case SrcMem:
    3019      407806 :         if ( state->simd_size )
    3020        7416 :             break;
    3021      400390 :         ea.bytes = (d & ByteOp) ? 1 : op_bytes;
    3022             :     srcmem_common:
    3023      409428 :         src = ea;
    3024      409428 :         if ( src.type == OP_REG )
    3025             :         {
    3026       35622 :             switch ( src.bytes )
    3027             :             {
    3028        8567 :             case 1: src.val = *(uint8_t  *)src.reg; break;
    3029        4975 :             case 2: src.val = *(uint16_t *)src.reg; break;
    3030       10119 :             case 4: src.val = *(uint32_t *)src.reg; break;
    3031       11953 :             case 8: src.val = *(uint64_t *)src.reg; break;
    3032             :             }
    3033             :         }
    3034      373806 :         else if ( (rc = read_ulong(src.mem.seg, src.mem.off,
    3035             :                                    &src.val, src.bytes, ctxt, ops)) )
    3036        1639 :             goto done;
    3037      407789 :         break;
    3038             :     case SrcImm:
    3039      337801 :         if ( !(d & ByteOp) )
    3040      124256 :             src.bytes = op_bytes != 8 ? op_bytes : 4;
    3041             :         else
    3042             :         {
    3043             :     case SrcImmByte:
    3044      523574 :             src.bytes = 1;
    3045             :         }
    3046      647830 :         src.type  = OP_IMM;
    3047      647830 :         src.val   = imm1;
    3048      647830 :         break;
    3049             :     case SrcImm16:
    3050        5625 :         src.type  = OP_IMM;
    3051        5625 :         src.bytes = 2;
    3052        5625 :         src.val   = imm1;
    3053        5625 :         break;
    3054             :     }
    3055             : 
    3056             :     /* Decode and fetch the destination operand: register or memory. */
    3057     3872033 :     switch ( d & DstMask )
    3058             :     {
    3059             :     case DstNone: /* case DstImplicit: */
    3060             :         /*
    3061             :          * The only implicit-operands instructions allowed a LOCK prefix are
    3062             :          * CMPXCHG{8,16}B (MOV CRn is being handled elsewhere).
    3063             :          */
    3064     2589370 :         generate_exception_if(lock_prefix && (ext != ext_0f || b != 0xc7),
    3065             :                               EXC_UD);
    3066     2589353 :         dst.type = OP_NONE;
    3067     2589353 :         break;
    3068             : 
    3069             :     case DstReg:
    3070      747308 :         generate_exception_if(lock_prefix, EXC_UD);
    3071      747300 :         dst.type = OP_REG;
    3072      747300 :         if ( d & ByteOp )
    3073             :         {
    3074      393424 :             dst.reg = decode_register(modrm_reg, &_regs, (rex_prefix == 0));
    3075      393424 :             dst.val = *(uint8_t *)dst.reg;
    3076      393424 :             dst.bytes = 1;
    3077             :         }
    3078             :         else
    3079             :         {
    3080      353876 :             dst.reg = decode_register(modrm_reg, &_regs, 0);
    3081      353876 :             switch ( (dst.bytes = op_bytes) )
    3082             :             {
    3083      165894 :             case 2: dst.val = *(uint16_t *)dst.reg; break;
    3084      153964 :             case 4: dst.val = *(uint32_t *)dst.reg; break;
    3085       32282 :             case 8: dst.val = *(uint64_t *)dst.reg; break;
    3086             :             }
    3087             :         }
    3088      747300 :         break;
    3089             :     case DstBitBase:
    3090       17424 :         if ( ea.type == OP_MEM )
    3091             :         {
    3092             :             /*
    3093             :              * Instructions such as bt can reference an arbitrary offset from
    3094             :              * their memory operand, but the instruction doing the actual
    3095             :              * emulation needs the appropriate op_bytes read from memory.
    3096             :              * Adjust both the source register and memory operand to make an
    3097             :              * equivalent instruction.
    3098             :              *
    3099             :              * EA       += BitOffset DIV op_bytes*8
    3100             :              * BitOffset = BitOffset MOD op_bytes*8
    3101             :              * DIV truncates towards negative infinity.
    3102             :              * MOD always produces a positive result.
    3103             :              */
    3104       16700 :             if ( op_bytes == 2 )
    3105        6563 :                 src.val = (int16_t)src.val;
    3106       10137 :             else if ( op_bytes == 4 )
    3107        8577 :                 src.val = (int32_t)src.val;
    3108       16700 :             if ( (long)src.val < 0 )
    3109        4094 :                 ea.mem.off -=
    3110        2047 :                     op_bytes + (((-src.val - 1) >> 3) & ~(op_bytes - 1L));
    3111             :             else
    3112       14653 :                 ea.mem.off += (src.val >> 3) & ~(op_bytes - 1L);
    3113             :         }
    3114             : 
    3115             :         /* Bit index always truncated to within range. */
    3116       17424 :         src.val &= (op_bytes << 3) - 1;
    3117             : 
    3118       17424 :         d = (d & ~DstMask) | DstMem;
    3119             :         /* Becomes a normal DstMem operation from here on. */
    3120             :     case DstMem:
    3121      535355 :         if ( state->simd_size )
    3122             :         {
    3123        1623 :             generate_exception_if(lock_prefix, EXC_UD);
    3124        1615 :             break;
    3125             :         }
    3126      533732 :         ea.bytes = (d & ByteOp) ? 1 : op_bytes;
    3127      533732 :         dst = ea;
    3128      533732 :         if ( dst.type == OP_REG )
    3129             :         {
    3130       22447 :             generate_exception_if(lock_prefix, EXC_UD);
    3131       22439 :             switch ( dst.bytes )
    3132             :             {
    3133       14638 :             case 1: dst.val = *(uint8_t  *)dst.reg; break;
    3134        3214 :             case 2: dst.val = *(uint16_t *)dst.reg; break;
    3135        2905 :             case 4: dst.val = *(uint32_t *)dst.reg; break;
    3136        1682 :             case 8: dst.val = *(uint64_t *)dst.reg; break;
    3137             :             }
    3138             :         }
    3139      511285 :         else if ( !(d & Mov) ) /* optimisation - avoid slow emulated read */
    3140             :         {
    3141      474581 :             fail_if(lock_prefix ? !ops->cmpxchg : !ops->write);
    3142      473882 :             if ( (rc = read_ulong(dst.mem.seg, dst.mem.off,
    3143             :                                   &dst.val, dst.bytes, ctxt, ops)) )
    3144        1376 :                 goto done;
    3145      472506 :             dst.orig_val = dst.val;
    3146             :         }
    3147             :         else
    3148             :         {
    3149             :             /* Lock prefix is allowed only on RMW instructions. */
    3150       36704 :             generate_exception_if(lock_prefix, EXC_UD);
    3151       36696 :             fail_if(!ops->write);
    3152             :         }
    3153      531592 :         break;
    3154             :     }
    3155             : 
    3156     3869860 :     switch ( ctxt->opcode )
    3157             :     {
    3158             :         enum x86_segment seg;
    3159             :         struct segment_register cs, sreg;
    3160             :         struct cpuid_leaf cpuid_leaf;
    3161             :         uint64_t msr_val;
    3162             :         unsigned long dummy;
    3163             : 
    3164             :     case 0x00 ... 0x05: add: /* add */
    3165      158502 :         emulate_2op_SrcV("add", src, dst, _regs.eflags);
    3166     4013170 :         break;
    3167             : 
    3168             :     case 0x08 ... 0x0d: or:  /* or */
    3169       91845 :         emulate_2op_SrcV("or", src, dst, _regs.eflags);
    3170       91845 :         break;
    3171             : 
    3172             :     case 0x10 ... 0x15: adc: /* adc */
    3173       83144 :         emulate_2op_SrcV("adc", src, dst, _regs.eflags);
    3174       83144 :         break;
    3175             : 
    3176             :     case 0x18 ... 0x1d: sbb: /* sbb */
    3177       88192 :         emulate_2op_SrcV("sbb", src, dst, _regs.eflags);
    3178       88192 :         break;
    3179             : 
    3180             :     case 0x20 ... 0x25: and: /* and */
    3181      227870 :         emulate_2op_SrcV("and", src, dst, _regs.eflags);
    3182      227870 :         break;
    3183             : 
    3184             :     case 0x28 ... 0x2d: sub: /* sub */
    3185       96053 :         emulate_2op_SrcV("sub", src, dst, _regs.eflags);
    3186       96053 :         break;
    3187             : 
    3188             :     case 0x30 ... 0x35: xor: /* xor */
    3189      120579 :         emulate_2op_SrcV("xor", src, dst, _regs.eflags);
    3190      120579 :         break;
    3191             : 
    3192             :     case 0x38 ... 0x3d: cmp: /* cmp */
    3193      188942 :         generate_exception_if(lock_prefix, EXC_UD);
    3194      178350 :         emulate_2op_SrcV("cmp", src, dst, _regs.eflags);
    3195      178350 :         dst.type = OP_NONE;
    3196      178350 :         break;
    3197             : 
    3198             :     case 0x06: /* push %%es */
    3199             :     case 0x0e: /* push %%cs */
    3200             :     case 0x16: /* push %%ss */
    3201             :     case 0x1e: /* push %%ds */
    3202             :     case X86EMUL_OPC(0x0f, 0xa0): /* push %%fs */
    3203             :     case X86EMUL_OPC(0x0f, 0xa8): /* push %%gs */
    3204        4479 :         fail_if(ops->read_segment == NULL);
    3205        4315 :         if ( (rc = ops->read_segment((b >> 3) & 7, &sreg,
    3206             :                                      ctxt)) != X86EMUL_OKAY )
    3207           0 :             goto done;
    3208        4315 :         src.val = sreg.sel;
    3209        4315 :         goto push;
    3210             : 
    3211             :     case 0x07: /* pop %%es */
    3212             :     case 0x17: /* pop %%ss */
    3213             :     case 0x1f: /* pop %%ds */
    3214             :     case X86EMUL_OPC(0x0f, 0xa1): /* pop %%fs */
    3215             :     case X86EMUL_OPC(0x0f, 0xa9): /* pop %%gs */
    3216        6175 :         fail_if(ops->write_segment == NULL);
    3217             :         /* 64-bit mode: POP defaults to a 64-bit operand. */
    3218        6130 :         if ( mode_64bit() && (op_bytes == 4) )
    3219           8 :             op_bytes = 8;
    3220        6130 :         seg = (b >> 3) & 7;
    3221        6130 :         if ( (rc = read_ulong(x86_seg_ss, sp_post_inc(op_bytes), &dst.val,
    3222        6129 :                               op_bytes, ctxt, ops)) != X86EMUL_OKAY ||
    3223        6129 :              (rc = load_seg(seg, dst.val, 0, NULL, ctxt, ops)) != X86EMUL_OKAY )
    3224             :             goto done;
    3225        5933 :         if ( seg == x86_seg_ss )
    3226        3897 :             ctxt->retire.mov_ss = true;
    3227        5933 :         break;
    3228             : 
    3229             :     case 0x27: /* daa */
    3230             :     case 0x2f: /* das */ {
    3231      231682 :         uint8_t al = _regs.al;
    3232      231682 :         unsigned int eflags = _regs.eflags;
    3233             : 
    3234      231682 :         _regs.eflags &= ~(X86_EFLAGS_CF | X86_EFLAGS_AF | X86_EFLAGS_SF |
    3235             :                           X86_EFLAGS_ZF | X86_EFLAGS_PF);
    3236      231682 :         if ( ((al & 0x0f) > 9) || (eflags & X86_EFLAGS_AF) )
    3237             :         {
    3238      111851 :             _regs.eflags |= X86_EFLAGS_AF;
    3239      111851 :             if ( b == 0x2f && (al < 6 || (eflags & X86_EFLAGS_CF)) )
    3240       61459 :                 _regs.eflags |= X86_EFLAGS_CF;
    3241      111851 :             _regs.al += (b == 0x27) ? 6 : -6;
    3242             :         }
    3243      231682 :         if ( (al > 0x99) || (eflags & X86_EFLAGS_CF) )
    3244             :         {
    3245      108553 :             _regs.al += (b == 0x27) ? 0x60 : -0x60;
    3246      108553 :             _regs.eflags |= X86_EFLAGS_CF;
    3247             :         }
    3248      231682 :         _regs.eflags |= !_regs.al ? X86_EFLAGS_ZF : 0;
    3249      231682 :         _regs.eflags |= ((int8_t)_regs.al < 0) ? X86_EFLAGS_SF : 0;
    3250      231682 :         _regs.eflags |= even_parity(_regs.al) ? X86_EFLAGS_PF : 0;
    3251      231682 :         break;
    3252             :     }
    3253             : 
    3254             :     case 0x37: /* aaa */
    3255             :     case 0x3f: /* aas */
    3256       29007 :         _regs.eflags &= ~X86_EFLAGS_CF;
    3257       29007 :         if ( (_regs.al > 9) || (_regs.eflags & X86_EFLAGS_AF) )
    3258             :         {
    3259       21106 :             _regs.al += (b == 0x37) ? 6 : -6;
    3260       21106 :             _regs.ah += (b == 0x37) ? 1 : -1;
    3261       21106 :             _regs.eflags |= X86_EFLAGS_CF | X86_EFLAGS_AF;
    3262             :         }
    3263       29007 :         _regs.al &= 0x0f;
    3264       29007 :         break;
    3265             : 
    3266             :     case 0x40 ... 0x4f: /* inc/dec reg */
    3267      619162 :         dst.type  = OP_REG;
    3268      619162 :         dst.reg   = decode_register(b & 7, &_regs, 0);
    3269      619162 :         dst.bytes = op_bytes;
    3270      619162 :         dst.val   = *dst.reg;
    3271      619162 :         if ( b & 8 )
    3272      386565 :             emulate_1op("dec", dst, _regs.eflags);
    3273             :         else
    3274      232597 :             emulate_1op("inc", dst, _regs.eflags);
    3275      619162 :         break;
    3276             : 
    3277             :     case 0x50 ... 0x57: /* push reg */
    3278     1970810 :         src.val = *(unsigned long *)decode_register(
    3279      985405 :             (b & 7) | ((rex_prefix & 1) << 3), &_regs, 0);
    3280      985405 :         goto push;
    3281             : 
    3282             :     case 0x58 ... 0x5f: /* pop reg */
    3283      149693 :         dst.type  = OP_REG;
    3284      299386 :         dst.reg   = decode_register(
    3285      149693 :             (b & 7) | ((rex_prefix & 1) << 3), &_regs, 0);
    3286      149693 :         dst.bytes = op_bytes;
    3287      149693 :         if ( mode_64bit() && (dst.bytes == 4) )
    3288       35482 :             dst.bytes = 8;
    3289      149693 :         if ( (rc = read_ulong(x86_seg_ss, sp_post_inc(dst.bytes),
    3290             :                               &dst.val, dst.bytes, ctxt, ops)) != 0 )
    3291         386 :             goto done;
    3292      149307 :         break;
    3293             : 
    3294             :     case 0x60: /* pusha */ {
    3295             :         int i;
    3296       47576 :         unsigned int regs[] = {
    3297       23788 :             _regs.eax, _regs.ecx, _regs.edx, _regs.ebx,
    3298       23788 :             _regs.esp, _regs.ebp, _regs.esi, _regs.edi };
    3299             : 
    3300        6107 :         fail_if(!ops->write);
    3301       52602 :         for ( i = 0; i < 8; i++ )
    3302       93630 :             if ( (rc = ops->write(x86_seg_ss, sp_pre_dec(op_bytes),
    3303       46815 :                                   &regs[i], op_bytes, ctxt)) != 0 )
    3304         149 :             goto done;
    3305        5787 :         break;
    3306             :     }
    3307             : 
    3308             :     case 0x61: /* popa */ {
    3309             :         int i;
    3310        3711 :         unsigned int dummy_esp, *regs[] = {
    3311             :             &_regs.edi, &_regs.esi, &_regs.ebp, &dummy_esp,
    3312             :             &_regs.ebx, &_regs.edx, &_regs.ecx, &_regs.eax };
    3313             : 
    3314       32054 :         for ( i = 0; i < 8; i++ )
    3315             :         {
    3316       28575 :             if ( (rc = read_ulong(x86_seg_ss, sp_post_inc(op_bytes),
    3317             :                                   &dst.val, op_bytes, ctxt, ops)) != 0 )
    3318         232 :                 goto done;
    3319       28343 :             if ( op_bytes == 2 )
    3320       26007 :                 *(uint16_t *)regs[i] = (uint16_t)dst.val;
    3321             :             else
    3322        2336 :                 *regs[i] = dst.val; /* 64b: zero-ext done by read_ulong() */
    3323             :         }
    3324        3479 :         break;
    3325             :     }
    3326             : 
    3327             :     case 0x62: /* bound */ {
    3328             :         unsigned long src_val2;
    3329             :         int lb, ub, idx;
    3330        3146 :         generate_exception_if(src.type != OP_MEM, EXC_UD);
    3331        2923 :         if ( (rc = read_ulong(src.mem.seg, src.mem.off + op_bytes,
    3332             :                               &src_val2, op_bytes, ctxt, ops)) )
    3333          33 :             goto done;
    3334        2890 :         ub  = (op_bytes == 2) ? (int16_t)src_val2 : (int32_t)src_val2;
    3335        2890 :         lb  = (op_bytes == 2) ? (int16_t)src.val  : (int32_t)src.val;
    3336        2890 :         idx = (op_bytes == 2) ? (int16_t)dst.val  : (int32_t)dst.val;
    3337        2890 :         generate_exception_if((idx < lb) || (idx > ub), EXC_BR);
    3338        2748 :         dst.type = OP_NONE;
    3339        2748 :         break;
    3340             :     }
    3341             : 
    3342             :     case 0x63: /* movsxd (x86/64) / arpl (x86/32) */
    3343       35106 :         if ( mode_64bit() )
    3344             :         {
    3345             :             /* movsxd */
    3346       13791 :             if ( ea.type == OP_REG )
    3347          61 :                 src.val = *ea.reg;
    3348       13730 :             else if ( (rc = read_ulong(ea.mem.seg, ea.mem.off,
    3349             :                                        &src.val, 4, ctxt, ops)) )
    3350          20 :                 goto done;
    3351       13771 :             dst.val = (int32_t)src.val;
    3352             :         }
    3353             :         else
    3354             :         {
    3355             :             /* arpl */
    3356       21315 :             unsigned int src_rpl = dst.val & 3;
    3357             : 
    3358       21315 :             dst = ea;
    3359       21315 :             dst.bytes = 2;
    3360       21315 :             if ( dst.type == OP_REG )
    3361         149 :                 dst.val = *dst.reg;
    3362       21166 :             else if ( (rc = read_ulong(dst.mem.seg, dst.mem.off,
    3363             :                                        &dst.val, 2, ctxt, ops)) )
    3364         116 :                 goto done;
    3365       21199 :             if ( src_rpl > (dst.val & 3) )
    3366             :             {
    3367        1374 :                 _regs.eflags |= X86_EFLAGS_ZF;
    3368        1374 :                 dst.val = (dst.val & ~3) | src_rpl;
    3369             :             }
    3370             :             else
    3371             :             {
    3372       19825 :                 _regs.eflags &= ~X86_EFLAGS_ZF;
    3373       19825 :                 dst.type = OP_NONE;
    3374             :             }
    3375       21199 :             generate_exception_if(!in_protmode(ctxt, ops), EXC_UD);
    3376             :         }
    3377       34954 :         break;
    3378             : 
    3379             :     case 0x68: /* push imm{16,32,64} */
    3380             :     case 0x6a: /* push imm8 */
    3381             :     push:
    3382     1026693 :         ASSERT(d & Mov); /* writeback needed */
    3383     1026693 :         dst.type  = OP_MEM;
    3384     1026693 :         dst.bytes = mode_64bit() && (op_bytes == 4) ? 8 : op_bytes;
    3385     1026693 :         dst.val = src.val;
    3386     1026693 :         dst.mem.seg = x86_seg_ss;
    3387     1026693 :         dst.mem.off = sp_pre_dec(dst.bytes);
    3388     1026693 :         break;
    3389             : 
    3390             :     case 0x69: /* imul imm16/32 */
    3391             :     case 0x6b: /* imul imm8 */
    3392       19567 :         if ( ea.type == OP_REG )
    3393         100 :             dst.val = *ea.reg;
    3394       19467 :         else if ( (rc = read_ulong(ea.mem.seg, ea.mem.off,
    3395             :                                    &dst.val, op_bytes, ctxt, ops)) )
    3396          42 :             goto done;
    3397       19525 :         goto imul;
    3398             : 
    3399             :     case 0x6c ... 0x6d: /* ins %dx,%es:%edi */ {
    3400       31533 :         unsigned long nr_reps = get_rep_prefix(false, true);
    3401       28637 :         unsigned int port = _regs.dx;
    3402             : 
    3403       28637 :         dst.bytes = !(b & 1) ? 1 : (op_bytes == 8) ? 4 : op_bytes;
    3404       28637 :         dst.mem.seg = x86_seg_es;
    3405       28637 :         dst.mem.off = truncate_ea_and_reps(_regs.r(di), nr_reps, dst.bytes);
    3406       28637 :         if ( (rc = ioport_access_check(port, dst.bytes, ctxt, ops)) != 0 )
    3407        1250 :             goto done;
    3408             :         /* Try the presumably most efficient approach first. */
    3409       28107 :         if ( !ops->rep_ins )
    3410       10195 :             nr_reps = 1;
    3411       28107 :         rc = X86EMUL_UNHANDLEABLE;
    3412       28107 :         if ( nr_reps == 1 && ops->read_io && ops->write )
    3413             :         {
    3414       18868 :             rc = ops->read_io(port, dst.bytes, &dst.val, ctxt);
    3415       18868 :             if ( rc != X86EMUL_UNHANDLEABLE )
    3416       18556 :                 nr_reps = 0;
    3417             :         }
    3418       28107 :         if ( (nr_reps > 1 || rc == X86EMUL_UNHANDLEABLE) && ops->rep_ins )
    3419        9419 :             rc = ops->rep_ins(port, dst.mem.seg, dst.mem.off, dst.bytes,
    3420             :                               &nr_reps, ctxt);
    3421       28107 :         if ( nr_reps >= 1 && rc == X86EMUL_UNHANDLEABLE )
    3422             :         {
    3423         132 :             fail_if(!ops->read_io || !ops->write);
    3424         117 :             if ( (rc = ops->read_io(port, dst.bytes, &dst.val, ctxt)) != 0 )
    3425           8 :                 goto done;
    3426         109 :             nr_reps = 0;
    3427             :         }
    3428       28084 :         if ( !nr_reps && rc == X86EMUL_OKAY )
    3429             :         {
    3430       18441 :             dst.type = OP_MEM;
    3431       18441 :             nr_reps = 1;
    3432             :         }
    3433       28084 :         register_address_adjust(_regs.r(di), nr_reps * dst.bytes);
    3434       28084 :         put_rep_prefix(nr_reps);
    3435       27750 :         if ( rc != X86EMUL_OKAY )
    3436         167 :             goto done;
    3437       27583 :         break;
    3438             :     }
    3439             : 
    3440             :     case 0x6e ... 0x6f: /* outs %esi,%dx */ {
    3441       18781 :         unsigned long nr_reps = get_rep_prefix(true, false);
    3442       18477 :         unsigned int port = _regs.dx;
    3443             : 
    3444       18477 :         dst.bytes = !(b & 1) ? 1 : (op_bytes == 8) ? 4 : op_bytes;
    3445       18477 :         ea.mem.off = truncate_ea_and_reps(_regs.r(si), nr_reps, dst.bytes);
    3446       18477 :         if ( (rc = ioport_access_check(port, dst.bytes, ctxt, ops)) != 0 )
    3447         772 :             goto done;
    3448             :         /* Try the presumably most efficient approach first. */
    3449       18200 :         if ( !ops->rep_outs )
    3450        6212 :             nr_reps = 1;
    3451       18200 :         rc = X86EMUL_UNHANDLEABLE;
    3452       18200 :         if ( nr_reps == 1 && ops->write_io )
    3453             :         {
    3454        7611 :             rc = read_ulong(ea.mem.seg, ea.mem.off, &dst.val, dst.bytes,
    3455             :                             ctxt, ops);
    3456        7611 :             if ( rc != X86EMUL_UNHANDLEABLE )
    3457        6706 :                 nr_reps = 0;
    3458             :         }
    3459       18200 :         if ( (nr_reps > 1 || rc == X86EMUL_UNHANDLEABLE) && ops->rep_outs )
    3460       11087 :             rc = ops->rep_outs(ea.mem.seg, ea.mem.off, port, dst.bytes,
    3461             :                                &nr_reps, ctxt);
    3462       18200 :         if ( nr_reps >= 1 && rc == X86EMUL_UNHANDLEABLE )
    3463             :         {
    3464         407 :             if ( (rc = read_ulong(ea.mem.seg, ea.mem.off, &dst.val,
    3465             :                                   dst.bytes, ctxt, ops)) != X86EMUL_OKAY )
    3466          27 :                 goto done;
    3467         380 :             fail_if(ops->write_io == NULL);
    3468         372 :             nr_reps = 0;
    3469             :         }
    3470       18165 :         if ( !nr_reps && rc == X86EMUL_OKAY )
    3471             :         {
    3472        7056 :             if ( (rc = ops->write_io(port, dst.bytes, dst.val, ctxt)) != 0 )
    3473          73 :                 goto done;
    3474        6983 :             nr_reps = 1;
    3475             :         }
    3476       18092 :         register_address_adjust(_regs.r(si), nr_reps * dst.bytes);
    3477       18092 :         put_rep_prefix(nr_reps);
    3478       18052 :         if ( rc != X86EMUL_OKAY )
    3479         110 :             goto done;
    3480       17942 :         break;
    3481             :     }
    3482             : 
    3483             :     case 0x70 ... 0x7f: /* jcc (short) */
    3484      267007 :         if ( test_cc(b, _regs.eflags) )
    3485       47222 :             jmp_rel((int32_t)src.val);
    3486      266750 :         adjust_bnd(ctxt, ops, vex.pfx);
    3487      266750 :         break;
    3488             : 
    3489             :     case 0x80: case 0x81: case 0x82: case 0x83: /* Grp1 */
    3490       22256 :         switch ( modrm_reg & 7 )
    3491             :         {
    3492       11846 :         case 0: goto add;
    3493        4290 :         case 1: goto or;
    3494         863 :         case 2: goto adc;
    3495         474 :         case 3: goto sbb;
    3496        1919 :         case 4: goto and;
    3497         813 :         case 5: goto sub;
    3498        1596 :         case 6: goto xor;
    3499         455 :         case 7: goto cmp;
    3500             :         }
    3501           0 :         break;
    3502             : 
    3503             :     case 0xa8 ... 0xa9: /* test imm,%%eax */
    3504             :     case 0x84 ... 0x85: test: /* test */
    3505        5229 :         emulate_2op_SrcV("test", src, dst, _regs.eflags);
    3506        5229 :         dst.type = OP_NONE;
    3507        5229 :         break;
    3508             : 
    3509             :     case 0x86 ... 0x87: xchg: /* xchg */
    3510             :         /* Write back the register source. */
    3511        8203 :         switch ( dst.bytes )
    3512             :         {
    3513         244 :         case 1: *(uint8_t  *)src.reg = (uint8_t)dst.val; break;
    3514        3609 :         case 2: *(uint16_t *)src.reg = (uint16_t)dst.val; break;
    3515        2764 :         case 4: *src.reg = (uint32_t)dst.val; break; /* 64b reg: zero-extend */
    3516        1586 :         case 8: *src.reg = dst.val; break;
    3517             :         }
    3518             :         /* Write back the memory destination with implicit LOCK prefix. */
    3519        8203 :         dst.val = src.val;
    3520        8203 :         lock_prefix = 1;
    3521        8203 :         break;
    3522             : 
    3523             :     case 0xc6: /* Grp11: mov / xabort */
    3524             :     case 0xc7: /* Grp11: mov / xbegin */
    3525         704 :         if ( modrm == 0xf8 && vcpu_has_rtm() )
    3526             :         {
    3527             :             /*
    3528             :              * xbegin unconditionally aborts, xabort is unconditionally
    3529             :              * a nop.
    3530             :              */
    3531         593 :             if ( b & 1 )
    3532             :             {
    3533         513 :                 jmp_rel((int32_t)src.val);
    3534         505 :                 _regs.r(ax) = 0;
    3535             :             }
    3536         585 :             dst.type = OP_NONE;
    3537         585 :             break;
    3538             :         }
    3539         111 :         generate_exception_if((modrm_reg & 7) != 0, EXC_UD);
    3540             :     case 0x88 ... 0x8b: /* mov */
    3541             :     case 0xa0 ... 0xa1: /* mov mem.offs,{%al,%ax,%eax,%rax} */
    3542             :     case 0xa2 ... 0xa3: /* mov {%al,%ax,%eax,%rax},mem.offs */
    3543        2884 :         dst.val = src.val;
    3544        2884 :         break;
    3545             : 
    3546             :     case 0x8c: /* mov Sreg,r/m */
    3547         265 :         seg = modrm_reg & 7; /* REX.R is ignored. */
    3548         265 :         generate_exception_if(!is_x86_user_segment(seg), EXC_UD);
    3549             :     store_selector:
    3550        1439 :         fail_if(ops->read_segment == NULL);
    3551        1431 :         if ( (rc = ops->read_segment(seg, &sreg, ctxt)) != 0 )
    3552           0 :             goto done;
    3553        1431 :         dst.val = sreg.sel;
    3554        1431 :         if ( dst.type == OP_MEM )
    3555        1311 :             dst.bytes = 2;
    3556        1431 :         break;
    3557             : 
    3558             :     case 0x8e: /* mov r/m,Sreg */
    3559         362 :         seg = modrm_reg & 7; /* REX.R is ignored. */
    3560         362 :         generate_exception_if(!is_x86_user_segment(seg) ||
    3561             :                               seg == x86_seg_cs, EXC_UD);
    3562         354 :         if ( (rc = load_seg(seg, src.val, 0, NULL, ctxt, ops)) != 0 )
    3563          53 :             goto done;
    3564         301 :         if ( seg == x86_seg_ss )
    3565          16 :             ctxt->retire.mov_ss = true;
    3566         301 :         dst.type = OP_NONE;
    3567         301 :         break;
    3568             : 
    3569             :     case 0x8d: /* lea */
    3570         107 :         generate_exception_if(ea.type != OP_MEM, EXC_UD);
    3571          99 :         dst.val = ea.mem.off;
    3572          99 :         break;
    3573             : 
    3574             :     case 0x8f: /* pop (sole member of Grp1a) */
    3575         464 :         generate_exception_if((modrm_reg & 7) != 0, EXC_UD);
    3576             :         /* 64-bit mode: POP defaults to a 64-bit operand. */
    3577         456 :         if ( mode_64bit() && (dst.bytes == 4) )
    3578         287 :             dst.bytes = 8;
    3579         456 :         if ( (rc = read_ulong(x86_seg_ss, sp_post_inc(dst.bytes),
    3580             :                               &dst.val, dst.bytes, ctxt, ops)) != 0 )
    3581          24 :             goto done;
    3582         432 :         break;
    3583             : 
    3584             :     case 0x90: /* nop / xchg %%r8,%%rax */
    3585             :     case X86EMUL_OPC_F3(0, 0x90): /* pause / xchg %%r8,%%rax */
    3586        2019 :         if ( !(rex_prefix & REX_B) )
    3587        1875 :             break; /* nop / pause */
    3588             :         /* fall through */
    3589             : 
    3590             :     case 0x91 ... 0x97: /* xchg reg,%%rax */
    3591        6392 :         dst.type = OP_REG;
    3592        6392 :         dst.bytes = op_bytes;
    3593       12784 :         dst.reg  = decode_register(
    3594        6392 :             (b & 7) | ((rex_prefix & 1) << 3), &_regs, 0);
    3595        6392 :         dst.val  = *dst.reg;
    3596        6392 :         goto xchg;
    3597             : 
    3598             :     case 0x98: /* cbw/cwde/cdqe */
    3599         990 :         switch ( op_bytes )
    3600             :         {
    3601         166 :         case 2: _regs.ax = (int8_t)_regs.al; break; /* cbw */
    3602         672 :         case 4: _regs.r(ax) = (uint32_t)(int16_t)_regs.ax; break; /* cwde */
    3603         152 :         case 8: _regs.r(ax) = (int32_t)_regs.eax; break; /* cdqe */
    3604             :         }
    3605         990 :         break;
    3606             : 
    3607             :     case 0x99: /* cwd/cdq/cqo */
    3608        1942 :         switch ( op_bytes )
    3609             :         {
    3610         640 :         case 2: _regs.dx = -((int16_t)_regs.ax < 0); break;
    3611         734 :         case 4: _regs.r(dx) = (uint32_t)-((int32_t)_regs.eax < 0); break;
    3612             : #ifdef __x86_64__
    3613         568 :         case 8: _regs.rdx = -((int64_t)_regs.rax < 0); break;
    3614             : #endif
    3615             :         }
    3616        1942 :         break;
    3617             : 
    3618             :     case 0x9a: /* call (far, absolute) */
    3619        1353 :         ASSERT(!mode_64bit());
    3620             :     far_call:
    3621        1611 :         fail_if(!ops->read_segment || !ops->write);
    3622             : 
    3623        3188 :         if ( (rc = ops->read_segment(x86_seg_cs, &sreg, ctxt)) ||
    3624        3022 :              (rc = load_seg(x86_seg_cs, imm2, 0, &cs, ctxt, ops)) ||
    3625        1474 :              (validate_far_branch(&cs, imm1),
    3626        1428 :               src.val = sreg.sel,
    3627        1428 :               rc = ops->write(x86_seg_ss, sp_pre_dec(op_bytes),
    3628        1428 :                               &src.val, op_bytes, ctxt)) ||
    3629        1428 :              (rc = ops->write(x86_seg_ss, sp_pre_dec(op_bytes),
    3630        1412 :                               &_regs.r(ip), op_bytes, ctxt)) ||
    3631        1412 :              (rc = ops->write_segment(x86_seg_cs, &cs, ctxt)) )
    3632             :             goto done;
    3633             : 
    3634        1395 :         _regs.r(ip) = imm1;
    3635        1395 :         singlestep = _regs.eflags & X86_EFLAGS_TF;
    3636        1395 :         break;
    3637             : 
    3638             :     case 0x9b:  /* wait/fwait */
    3639          27 :         host_and_vcpu_must_have(fpu);
    3640          27 :         get_fpu(X86EMUL_FPU_wait, &fic);
    3641           0 :         fic.insn_bytes = 1;
    3642           0 :         asm volatile ( "fwait" ::: "memory" );
    3643           0 :         check_fpu_exn(&fic);
    3644           0 :         break;
    3645             : 
    3646             :     case 0x9c: /* pushf */
    3647       19733 :         if ( (_regs.eflags & X86_EFLAGS_VM) &&
    3648           0 :              MASK_EXTR(_regs.eflags, X86_EFLAGS_IOPL) != 3 )
    3649             :         {
    3650           0 :             cr4 = 0;
    3651           0 :             if ( op_bytes == 2 && ops->read_cr )
    3652             :             {
    3653           0 :                 rc = ops->read_cr(4, &cr4, ctxt);
    3654           0 :                 if ( rc != X86EMUL_OKAY )
    3655           0 :                     goto done;
    3656             :             }
    3657           0 :             generate_exception_if(!(cr4 & X86_CR4_VME), EXC_GP, 0);
    3658           0 :             src.val = (_regs.flags & ~X86_EFLAGS_IF) | X86_EFLAGS_IOPL;
    3659           0 :             if ( _regs.eflags & X86_EFLAGS_VIF )
    3660           0 :                 src.val |= X86_EFLAGS_IF;
    3661             :         }
    3662             :         else
    3663       19733 :             src.val = _regs.r(flags) & ~(X86_EFLAGS_VM | X86_EFLAGS_RF);
    3664       19733 :         goto push;
    3665             : 
    3666             :     case 0x9d: /* popf */ {
    3667        3303 :         uint32_t mask = X86_EFLAGS_VIP | X86_EFLAGS_VIF | X86_EFLAGS_VM;
    3668             : 
    3669        3303 :         cr4 = 0;
    3670        3303 :         if ( !mode_ring0() )
    3671             :         {
    3672        2523 :             if ( _regs.eflags & X86_EFLAGS_VM )
    3673             :             {
    3674           0 :                 if ( op_bytes == 2 && ops->read_cr )
    3675             :                 {
    3676           0 :                     rc = ops->read_cr(4, &cr4, ctxt);
    3677           0 :                     if ( rc != X86EMUL_OKAY )
    3678           0 :                         goto done;
    3679             :                 }
    3680           0 :                 generate_exception_if(!(cr4 & X86_CR4_VME) &&
    3681             :                                       MASK_EXTR(_regs.eflags, X86_EFLAGS_IOPL) != 3,
    3682             :                                       EXC_GP, 0);
    3683             :             }
    3684        2523 :             mask |= X86_EFLAGS_IOPL;
    3685        2523 :             if ( !mode_iopl() )
    3686        2523 :                 mask |= X86_EFLAGS_IF;
    3687             :         }
    3688             :         /* 64-bit mode: POP defaults to a 64-bit operand. */
    3689        3279 :         if ( mode_64bit() && (op_bytes == 4) )
    3690         566 :             op_bytes = 8;
    3691        3279 :         if ( (rc = read_ulong(x86_seg_ss, sp_post_inc(op_bytes),
    3692             :                               &dst.val, op_bytes, ctxt, ops)) != 0 )
    3693          11 :             goto done;
    3694        3268 :         if ( op_bytes == 2 )
    3695             :         {
    3696        1897 :             dst.val = (uint16_t)dst.val | (_regs.eflags & 0xffff0000u);
    3697        1897 :             if ( cr4 & X86_CR4_VME )
    3698             :             {
    3699           0 :                 if ( dst.val & X86_EFLAGS_IF )
    3700             :                 {
    3701           0 :                     generate_exception_if(_regs.eflags & X86_EFLAGS_VIP,
    3702             :                                           EXC_GP, 0);
    3703           0 :                     dst.val |= X86_EFLAGS_VIF;
    3704             :                 }
    3705             :                 else
    3706           0 :                     dst.val &= ~X86_EFLAGS_VIF;
    3707           0 :                 mask &= ~X86_EFLAGS_VIF;
    3708             :             }
    3709             :         }
    3710        3268 :         dst.val &= EFLAGS_MODIFIABLE;
    3711        3268 :         _regs.eflags &= mask;
    3712        3268 :         _regs.eflags |= (dst.val & ~mask) | X86_EFLAGS_MBS;
    3713        3268 :         break;
    3714             :     }
    3715             : 
    3716             :     case 0x9e: /* sahf */
    3717        1771 :         if ( mode_64bit() )
    3718         792 :             vcpu_must_have(lahf_lm);
    3719        1771 :         *(uint8_t *)&_regs.eflags = (_regs.ah & EFLAGS_MASK) | X86_EFLAGS_MBS;
    3720        1771 :         break;
    3721             : 
    3722             :     case 0x9f: /* lahf */
    3723         191 :         if ( mode_64bit() )
    3724          80 :             vcpu_must_have(lahf_lm);
    3725         191 :         _regs.ah = (_regs.eflags & EFLAGS_MASK) | X86_EFLAGS_MBS;
    3726         191 :         break;
    3727             : 
    3728             :     case 0xa4 ... 0xa5: /* movs */ {
    3729       17602 :         unsigned long nr_reps = get_rep_prefix(true, true);
    3730             : 
    3731       16402 :         dst.bytes = (d & ByteOp) ? 1 : op_bytes;
    3732       16402 :         dst.mem.seg = x86_seg_es;
    3733       16402 :         dst.mem.off = truncate_ea_and_reps(_regs.r(di), nr_reps, dst.bytes);
    3734       16402 :         src.mem.off = truncate_ea_and_reps(_regs.r(si), nr_reps, dst.bytes);
    3735       16998 :         if ( (nr_reps == 1) || !ops->rep_movs ||
    3736         596 :              ((rc = ops->rep_movs(ea.mem.seg, src.mem.off,
    3737             :                                   dst.mem.seg, dst.mem.off, dst.bytes,
    3738             :                                   &nr_reps, ctxt)) == X86EMUL_UNHANDLEABLE) )
    3739             :         {
    3740       15937 :             if ( (rc = read_ulong(ea.mem.seg, src.mem.off,
    3741             :                                   &dst.val, dst.bytes, ctxt, ops)) != 0 )
    3742         244 :                 goto done;
    3743       15815 :             dst.type = OP_MEM;
    3744       15815 :             nr_reps = 1;
    3745             :         }
    3746       16280 :         register_address_adjust(_regs.r(si), nr_reps * dst.bytes);
    3747       16280 :         register_address_adjust(_regs.r(di), nr_reps * dst.bytes);
    3748       16280 :         put_rep_prefix(nr_reps);
    3749       16222 :         if ( rc != X86EMUL_OKAY )
    3750           0 :             goto done;
    3751       16222 :         break;
    3752             :     }
    3753             : 
    3754             :     case 0xa6 ... 0xa7: /* cmps */ {
    3755       11565 :         unsigned long next_eip = _regs.r(ip);
    3756             : 
    3757       11565 :         get_rep_prefix(true, true);
    3758       11143 :         src.bytes = dst.bytes = (d & ByteOp) ? 1 : op_bytes;
    3759       11143 :         if ( (rc = read_ulong(ea.mem.seg, truncate_ea(_regs.r(si)),
    3760       11112 :                               &dst.val, dst.bytes, ctxt, ops)) ||
    3761       11112 :              (rc = read_ulong(x86_seg_es, truncate_ea(_regs.r(di)),
    3762             :                               &src.val, src.bytes, ctxt, ops)) )
    3763             :             goto done;
    3764       11055 :         register_address_adjust(_regs.r(si), dst.bytes);
    3765       11055 :         register_address_adjust(_regs.r(di), src.bytes);
    3766       11055 :         put_rep_prefix(1);
    3767             :         /* cmp: dst - src ==> src=*%%edi,dst=*%%esi ==> *%%esi - *%%edi */
    3768       11055 :         emulate_2op_SrcV("cmp", src, dst, _regs.eflags);
    3769       18140 :         if ( (repe_prefix() && !(_regs.eflags & X86_EFLAGS_ZF)) ||
    3770        7160 :              (repne_prefix() && (_regs.eflags & X86_EFLAGS_ZF)) )
    3771        3982 :             _regs.r(ip) = next_eip;
    3772       11055 :         break;
    3773             :     }
    3774             : 
    3775             :     case 0xaa ... 0xab: /* stos */ {
    3776        7778 :         unsigned long nr_reps = get_rep_prefix(false, true);
    3777             : 
    3778        4996 :         dst.bytes = src.bytes;
    3779        4996 :         dst.mem.seg = x86_seg_es;
    3780        4996 :         dst.mem.off = truncate_ea(_regs.r(di));
    3781        5611 :         if ( (nr_reps == 1) || !ops->rep_stos ||
    3782         615 :              ((rc = ops->rep_stos(&src.val,
    3783             :                                   dst.mem.seg, dst.mem.off, dst.bytes,
    3784             :                                   &nr_reps, ctxt)) == X86EMUL_UNHANDLEABLE) )
    3785             :         {
    3786        4389 :             dst.val = src.val;
    3787        4389 :             dst.type = OP_MEM;
    3788        4389 :             nr_reps = 1;
    3789        4389 :             rc = X86EMUL_OKAY;
    3790             :         }
    3791        4996 :         register_address_adjust(_regs.r(di), nr_reps * dst.bytes);
    3792        4996 :         put_rep_prefix(nr_reps);
    3793        4988 :         if ( rc != X86EMUL_OKAY )
    3794           0 :             goto done;
    3795        4988 :         break;
    3796             :     }
    3797             : 
    3798             :     case 0xac ... 0xad: /* lods */
    3799        7258 :         get_rep_prefix(true, false);
    3800        7170 :         if ( (rc = read_ulong(ea.mem.seg, truncate_ea(_regs.r(si)),
    3801             :                               &dst.val, dst.bytes, ctxt, ops)) != 0 )
    3802          63 :             goto done;
    3803        7107 :         register_address_adjust(_regs.r(si), dst.bytes);
    3804        7107 :         put_rep_prefix(1);
    3805        7107 :         break;
    3806             : 
    3807             :     case 0xae ... 0xaf: /* scas */ {
    3808       24893 :         unsigned long next_eip = _regs.r(ip);
    3809             : 
    3810       24893 :         get_rep_prefix(false, true);
    3811       24693 :         if ( (rc = read_ulong(x86_seg_es, truncate_ea(_regs.r(di)),
    3812             :                               &dst.val, src.bytes, ctxt, ops)) != 0 )
    3813         193 :             goto done;
    3814       24500 :         register_address_adjust(_regs.r(di), src.bytes);
    3815       24500 :         put_rep_prefix(1);
    3816             :         /* cmp: %%eax - *%%edi ==> src=%%eax,dst=*%%edi ==> src - dst */
    3817       24500 :         dst.bytes = src.bytes;
    3818       24500 :         emulate_2op_SrcV("cmp", dst, src, _regs.eflags);
    3819       48701 :         if ( (repe_prefix() && !(_regs.eflags & X86_EFLAGS_ZF)) ||
    3820       35414 :              (repne_prefix() && (_regs.eflags & X86_EFLAGS_ZF)) )
    3821        4788 :             _regs.r(ip) = next_eip;
    3822       24500 :         break;
    3823             :     }
    3824             : 
    3825             :     case 0xb0 ... 0xb7: /* mov imm8,r8 */
    3826       34122 :         dst.reg = decode_register(
    3827       34122 :             (b & 7) | ((rex_prefix & 1) << 3), &_regs, (rex_prefix == 0));
    3828       17061 :         dst.val = src.val;
    3829       17061 :         break;
    3830             : 
    3831             :     case 0xb8 ... 0xbf: /* mov imm{16,32,64},r{16,32,64} */
    3832       15276 :         dst.reg = decode_register(
    3833        7638 :             (b & 7) | ((rex_prefix & 1) << 3), &_regs, 0);
    3834        7638 :         dst.val = src.val;
    3835        7638 :         break;
    3836             : 
    3837             :     case 0xc0 ... 0xc1: grp2: /* Grp2 */
    3838        7510 :         switch ( modrm_reg & 7 )
    3839             :         {
    3840             :         case 0: /* rol */
    3841         634 :             emulate_2op_SrcB("rol", src, dst, _regs.eflags);
    3842         634 :             break;
    3843             :         case 1: /* ror */
    3844        3495 :             emulate_2op_SrcB("ror", src, dst, _regs.eflags);
    3845        3495 :             break;
    3846             :         case 2: /* rcl */
    3847         621 :             emulate_2op_SrcB("rcl", src, dst, _regs.eflags);
    3848         621 :             break;
    3849             :         case 3: /* rcr */
    3850         460 :             emulate_2op_SrcB("rcr", src, dst, _regs.eflags);
    3851         460 :             break;
    3852             :         case 4: /* sal/shl */
    3853             :         case 6: /* sal/shl */
    3854         544 :             emulate_2op_SrcB("sal", src, dst, _regs.eflags);
    3855         544 :             break;
    3856             :         case 5: /* shr */
    3857         853 :             emulate_2op_SrcB("shr", src, dst, _regs.eflags);
    3858         853 :             break;
    3859             :         case 7: /* sar */
    3860         903 :             emulate_2op_SrcB("sar", src, dst, _regs.eflags);
    3861         903 :             break;
    3862             :         }
    3863        7510 :         break;
    3864             : 
    3865             :     case 0xc2: /* ret imm16 (near) */
    3866             :     case 0xc3: /* ret (near) */
    3867         414 :         op_bytes = ((op_bytes == 4) && mode_64bit()) ? 8 : op_bytes;
    3868         414 :         if ( (rc = read_ulong(x86_seg_ss, sp_post_inc(op_bytes + src.val),
    3869         336 :                               &dst.val, op_bytes, ctxt, ops)) != 0 ||
    3870         336 :              (rc = ops->insn_fetch(x86_seg_cs, dst.val, NULL, 0, ctxt)) )
    3871             :             goto done;
    3872         324 :         _regs.r(ip) = dst.val;
    3873         324 :         adjust_bnd(ctxt, ops, vex.pfx);
    3874         324 :         break;
    3875             : 
    3876             :     case 0xc4: /* les */
    3877             :     case 0xc5: /* lds */
    3878         637 :         seg = (b & 1) * 3; /* es = 0, ds = 3 */
    3879             :     les:
    3880         650 :         generate_exception_if(src.type != OP_MEM, EXC_UD);
    3881         626 :         if ( (rc = read_ulong(src.mem.seg, src.mem.off + src.bytes,
    3882             :                               &dst.val, 2, ctxt, ops)) != X86EMUL_OKAY )
    3883           8 :             goto done;
    3884         618 :         ASSERT(is_x86_user_segment(seg));
    3885         618 :         if ( (rc = load_seg(seg, dst.val, 0, NULL, ctxt, ops)) != X86EMUL_OKAY )
    3886         108 :             goto done;
    3887         510 :         dst.val = src.val;
    3888         510 :         break;
    3889             : 
    3890             :     case 0xc8: /* enter imm16,imm8 */ {
    3891        5247 :         uint8_t depth = imm2 & 31;
    3892             :         int i;
    3893             : 
    3894        5247 :         dst.type = OP_REG;
    3895        5247 :         dst.bytes = (mode_64bit() && (op_bytes == 4)) ? 8 : op_bytes;
    3896        5247 :         dst.reg = (unsigned long *)&_regs.r(bp);
    3897        5247 :         fail_if(!ops->write);
    3898        5211 :         if ( (rc = ops->write(x86_seg_ss, sp_pre_dec(dst.bytes),
    3899             :                               &_regs.r(bp), dst.bytes, ctxt)) )
    3900          43 :             goto done;
    3901        5168 :         dst.val = _regs.r(sp);
    3902             : 
    3903        5168 :         if ( depth > 0 )
    3904             :         {
    3905       16668 :             for ( i = 1; i < depth; i++ )
    3906             :             {
    3907             :                 unsigned long ebp, temp_data;
    3908       12918 :                 ebp = truncate_word(_regs.r(bp) - i*dst.bytes, ctxt->sp_size/8);
    3909       12918 :                 if ( (rc = read_ulong(x86_seg_ss, ebp,
    3910       12833 :                                       &temp_data, dst.bytes, ctxt, ops)) ||
    3911       12833 :                      (rc = ops->write(x86_seg_ss, sp_pre_dec(dst.bytes),
    3912             :                                       &temp_data, dst.bytes, ctxt)) )
    3913             :                     goto done;
    3914             :             }
    3915        3750 :             if ( (rc = ops->write(x86_seg_ss, sp_pre_dec(dst.bytes),
    3916             :                                   &dst.val, dst.bytes, ctxt)) )
    3917          16 :                 goto done;
    3918             :         }
    3919             : 
    3920        4965 :         sp_pre_dec(src.val);
    3921        4965 :         break;
    3922             :     }
    3923             : 
    3924             :     case 0xc9: /* leave */
    3925             :         /* First writeback, to %%esp. */
    3926         399 :         dst.bytes = (mode_64bit() && (op_bytes == 4)) ? 8 : op_bytes;
    3927         399 :         if ( dst.bytes == 2 )
    3928         128 :             _regs.sp = _regs.bp;
    3929             :         else
    3930         271 :             _regs.r(sp) = dst.bytes == 4 ? _regs.ebp : _regs.r(bp);
    3931             : 
    3932             :         /* Second writeback, to %%ebp. */
    3933         399 :         dst.type = OP_REG;
    3934         399 :         dst.reg = (unsigned long *)&_regs.r(bp);
    3935         399 :         if ( (rc = read_ulong(x86_seg_ss, sp_post_inc(dst.bytes),
    3936             :                               &dst.val, dst.bytes, ctxt, ops)) )
    3937         121 :             goto done;
    3938         278 :         break;
    3939             : 
    3940             :     case 0xca: /* ret imm16 (far) */
    3941             :     case 0xcb: /* ret (far) */
    3942         658 :         if ( (rc = read_ulong(x86_seg_ss, sp_post_inc(op_bytes),
    3943         554 :                               &dst.val, op_bytes, ctxt, ops)) ||
    3944         554 :              (rc = read_ulong(x86_seg_ss, sp_post_inc(op_bytes + src.val),
    3945         442 :                               &src.val, op_bytes, ctxt, ops)) ||
    3946         667 :              (rc = load_seg(x86_seg_cs, src.val, 1, &cs, ctxt, ops)) ||
    3947         240 :              (rc = commit_far_branch(&cs, dst.val)) )
    3948             :             goto done;
    3949         209 :         break;
    3950             : 
    3951             :     case 0xce: /* into */
    3952        3934 :         if ( !(_regs.eflags & X86_EFLAGS_OF) )
    3953        3905 :             break;
    3954             :         /* Fallthrough */
    3955             :     case 0xcc: /* int3 */
    3956             :     case 0xcd: /* int imm8 */
    3957             :     case 0xf1: /* int1 (icebp) */
    3958         186 :         ASSERT(!ctxt->event_pending);
    3959         186 :         switch ( ctxt->opcode )
    3960             :         {
    3961             :         case 0xcc: /* int3 */
    3962          11 :             ctxt->event.vector = EXC_BP;
    3963          11 :             ctxt->event.type = X86_EVENTTYPE_SW_EXCEPTION;
    3964          11 :             break;
    3965             :         case 0xcd: /* int imm8 */
    3966         110 :             ctxt->event.vector = imm1;
    3967         110 :             ctxt->event.type = X86_EVENTTYPE_SW_INTERRUPT;
    3968         110 :             break;
    3969             :         case 0xce: /* into */
    3970          29 :             ctxt->event.vector = EXC_OF;
    3971          29 :             ctxt->event.type = X86_EVENTTYPE_SW_EXCEPTION;
    3972          29 :             break;
    3973             :         case 0xf1: /* icebp */
    3974          36 :             ctxt->event.vector = EXC_DB;
    3975          36 :             ctxt->event.type = X86_EVENTTYPE_PRI_SW_EXCEPTION;
    3976          36 :             break;
    3977             :         }
    3978         186 :         ctxt->event.error_code = X86_EVENT_NO_EC;
    3979         186 :         ctxt->event.insn_len = _regs.r(ip) - ctxt->regs->r(ip);
    3980         186 :         ctxt->event_pending = true;
    3981         186 :         rc = X86EMUL_EXCEPTION;
    3982         186 :         goto done;
    3983             : 
    3984             :     case 0xcf: /* iret */ {
    3985             :         unsigned long sel, eip, eflags;
    3986         224 :         uint32_t mask = X86_EFLAGS_VIP | X86_EFLAGS_VIF | X86_EFLAGS_VM;
    3987             : 
    3988         288 :         fail_if(!in_realmode(ctxt, ops));
    3989         200 :         if ( (rc = read_ulong(x86_seg_ss, sp_post_inc(op_bytes),
    3990         199 :                               &eip, op_bytes, ctxt, ops)) ||
    3991         199 :              (rc = read_ulong(x86_seg_ss, sp_post_inc(op_bytes),
    3992         192 :                               &sel, op_bytes, ctxt, ops)) ||
    3993         192 :              (rc = read_ulong(x86_seg_ss, sp_post_inc(op_bytes),
    3994             :                               &eflags, op_bytes, ctxt, ops)) )
    3995             :             goto done;
    3996         192 :         if ( op_bytes == 2 )
    3997         104 :             eflags = (uint16_t)eflags | (_regs.eflags & 0xffff0000u);
    3998         192 :         eflags &= EFLAGS_MODIFIABLE;
    3999         192 :         _regs.eflags &= mask;
    4000         192 :         _regs.eflags |= (eflags & ~mask) | X86_EFLAGS_MBS;
    4001         360 :         if ( (rc = load_seg(x86_seg_cs, sel, 1, &cs, ctxt, ops)) ||
    4002         176 :              (rc = commit_far_branch(&cs, (uint32_t)eip)) )
    4003             :             goto done;
    4004         160 :         break;
    4005             :     }
    4006             : 
    4007             :     case 0xd0 ... 0xd1: /* Grp2 */
    4008        4915 :         src.val = 1;
    4009        4915 :         goto grp2;
    4010             : 
    4011             :     case 0xd2 ... 0xd3: /* Grp2 */
    4012        1666 :         src.val = _regs.cl;
    4013        1666 :         goto grp2;
    4014             : 
    4015             :     case 0xd4: /* aam */
    4016             :     case 0xd5: /* aad */ {
    4017        5212 :         unsigned int base = (uint8_t)src.val;
    4018             : 
    4019        5212 :         if ( b & 0x01 )
    4020             :         {
    4021        5044 :             uint16_t ax = _regs.ax;
    4022             : 
    4023        5044 :             _regs.ax = (uint8_t)(ax + ((ax >> 8) * base));
    4024             :         }
    4025             :         else
    4026             :         {
    4027         168 :             uint8_t al = _regs.al;
    4028             : 
    4029         168 :             generate_exception_if(!base, EXC_DE);
    4030         160 :             _regs.ax = ((al / base) << 8) | (al % base);
    4031             :         }
    4032        5204 :         _regs.eflags &= ~(X86_EFLAGS_SF | X86_EFLAGS_ZF | X86_EFLAGS_PF);
    4033        5204 :         _regs.eflags |= !_regs.al ? X86_EFLAGS_ZF : 0;
    4034        5204 :         _regs.eflags |= ((int8_t)_regs.al < 0) ? X86_EFLAGS_SF : 0;
    4035        5204 :         _regs.eflags |= even_parity(_regs.al) ? X86_EFLAGS_PF : 0;
    4036        5204 :         break;
    4037             :     }
    4038             : 
    4039             :     case 0xd6: /* salc */
    4040         120 :         _regs.al = (_regs.eflags & X86_EFLAGS_CF) ? 0xff : 0x00;
    4041         120 :         break;
    4042             : 
    4043             :     case 0xd7: /* xlat */ {
    4044             :         unsigned long al;
    4045             : 
    4046         201 :         if ( (rc = read_ulong(ea.mem.seg, truncate_ea(_regs.r(bx) + _regs.al),
    4047             :                               &al, 1, ctxt, ops)) != 0 )
    4048          41 :             goto done;
    4049         160 :         _regs.al = al;
    4050         160 :         break;
    4051             :     }
    4052             : 
    4053             :     case 0xd8: /* FPU 0xd8 */
    4054        1054 :         host_and_vcpu_must_have(fpu);
    4055        1054 :         get_fpu(X86EMUL_FPU_fpu, &fic);
    4056        1051 :         switch ( modrm )
    4057             :         {
    4058             :         case 0xc0 ... 0xc7: /* fadd %stN,%st */
    4059             :         case 0xc8 ... 0xcf: /* fmul %stN,%st */
    4060             :         case 0xd0 ... 0xd7: /* fcom %stN,%st */
    4061             :         case 0xd8 ... 0xdf: /* fcomp %stN,%st */
    4062             :         case 0xe0 ... 0xe7: /* fsub %stN,%st */
    4063             :         case 0xe8 ... 0xef: /* fsubr %stN,%st */
    4064             :         case 0xf0 ... 0xf7: /* fdiv %stN,%st */
    4065             :         case 0xf8 ... 0xff: /* fdivr %stN,%st */
    4066          40 :             emulate_fpu_insn_stub(0xd8, modrm);
    4067          40 :             break;
    4068             :         default:
    4069        1011 :             ASSERT(ea.type == OP_MEM);
    4070        1011 :             if ( (rc = ops->read(ea.mem.seg, ea.mem.off, &src.val,
    4071             :                                  4, ctxt)) != X86EMUL_OKAY )
    4072           0 :                 goto done;
    4073        1011 :             switch ( modrm_reg & 7 )
    4074             :             {
    4075             :             case 0: /* fadd */
    4076         204 :                 emulate_fpu_insn_memsrc("fadds", src.val);
    4077         204 :                 break;
    4078             :             case 1: /* fmul */
    4079          54 :                 emulate_fpu_insn_memsrc("fmuls", src.val);
    4080          54 :                 break;
    4081             :             case 2: /* fcom */
    4082         144 :                 emulate_fpu_insn_memsrc("fcoms", src.val);
    4083         144 :                 break;
    4084             :             case 3: /* fcomp */
    4085         160 :                 emulate_fpu_insn_memsrc("fcomps", src.val);
    4086         160 :                 break;
    4087             :             case 4: /* fsub */
    4088         281 :                 emulate_fpu_insn_memsrc("fsubs", src.val);
    4089         281 :                 break;
    4090             :             case 5: /* fsubr */
    4091          96 :                 emulate_fpu_insn_memsrc("fsubrs", src.val);
    4092          96 :                 break;
    4093             :             case 6: /* fdiv */
    4094          24 :                 emulate_fpu_insn_memsrc("fdivs", src.val);
    4095          24 :                 break;
    4096             :             case 7: /* fdivr */
    4097          48 :                 emulate_fpu_insn_memsrc("fdivrs", src.val);
    4098          48 :                 break;
    4099             :             }
    4100             :         }
    4101        1051 :         check_fpu_exn(&fic);
    4102        1051 :         break;
    4103             : 
    4104             :     case 0xd9: /* FPU 0xd9 */
    4105         552 :         host_and_vcpu_must_have(fpu);
    4106         552 :         get_fpu(X86EMUL_FPU_fpu, &fic);
    4107         550 :         switch ( modrm )
    4108             :         {
    4109             :         case 0xfb: /* fsincos */
    4110           0 :             fail_if(cpu_has_amd_erratum(573));
    4111             :             /* fall through */
    4112             :         case 0xc0 ... 0xc7: /* fld %stN */
    4113             :         case 0xc8 ... 0xcf: /* fxch %stN */
    4114             :         case 0xd0: /* fnop */
    4115             :         case 0xd8 ... 0xdf: /* fstp %stN (alternative encoding) */
    4116             :         case 0xe0: /* fchs */
    4117             :         case 0xe1: /* fabs */
    4118             :         case 0xe4: /* ftst */
    4119             :         case 0xe5: /* fxam */
    4120             :         case 0xe8: /* fld1 */
    4121             :         case 0xe9: /* fldl2t */
    4122             :         case 0xea: /* fldl2e */
    4123             :         case 0xeb: /* fldpi */
    4124             :         case 0xec: /* fldlg2 */
    4125             :         case 0xed: /* fldln2 */
    4126             :         case 0xee: /* fldz */
    4127             :         case 0xf0: /* f2xm1 */
    4128             :         case 0xf1: /* fyl2x */
    4129             :         case 0xf2: /* fptan */
    4130             :         case 0xf3: /* fpatan */
    4131             :         case 0xf4: /* fxtract */
    4132             :         case 0xf5: /* fprem1 */
    4133             :         case 0xf6: /* fdecstp */
    4134             :         case 0xf7: /* fincstp */
    4135             :         case 0xf8: /* fprem */
    4136             :         case 0xf9: /* fyl2xp1 */
    4137             :         case 0xfa: /* fsqrt */
    4138             :         case 0xfc: /* frndint */
    4139             :         case 0xfd: /* fscale */
    4140             :         case 0xfe: /* fsin */
    4141             :         case 0xff: /* fcos */
    4142          24 :             emulate_fpu_insn_stub(0xd9, modrm);
    4143          24 :             break;
    4144             :         default:
    4145         526 :             generate_exception_if(ea.type != OP_MEM, EXC_UD);
    4146         518 :             dst = ea;
    4147         518 :             switch ( modrm_reg & 7 )
    4148             :             {
    4149             :             case 0: /* fld m32fp */
    4150          48 :                 if ( (rc = ops->read(ea.mem.seg, ea.mem.off, &src.val,
    4151             :                                      4, ctxt)) != X86EMUL_OKAY )
    4152           0 :                     goto done;
    4153          48 :                 emulate_fpu_insn_memsrc("flds", src.val);
    4154          48 :                 dst.type = OP_NONE;
    4155          48 :                 break;
    4156             :             case 2: /* fst m32fp */
    4157          80 :                 emulate_fpu_insn_memdst("fsts", dst.val);
    4158          80 :                 dst.bytes = 4;
    4159          80 :                 break;
    4160             :             case 3: /* fstp m32fp */
    4161         144 :                 emulate_fpu_insn_memdst("fstps", dst.val);
    4162         144 :                 dst.bytes = 4;
    4163         144 :                 break;
    4164             :             case 4: /* fldenv - TODO */
    4165           8 :                 state->fpu_ctrl = true;
    4166           8 :                 goto cannot_emulate;
    4167             :             case 5: /* fldcw m2byte */
    4168         126 :                 state->fpu_ctrl = true;
    4169         126 :                 if ( (rc = ops->read(ea.mem.seg, ea.mem.off, &src.val,
    4170             :                                      2, ctxt)) != X86EMUL_OKAY )
    4171           7 :                     goto done;
    4172         119 :                 emulate_fpu_insn_memsrc("fldcw", src.val);
    4173         119 :                 dst.type = OP_NONE;
    4174         119 :                 break;
    4175             :             case 6: /* fnstenv - TODO */
    4176           8 :                 state->fpu_ctrl = true;
    4177           8 :                 goto cannot_emulate;
    4178             :             case 7: /* fnstcw m2byte */
    4179          96 :                 state->fpu_ctrl = true;
    4180          96 :                 emulate_fpu_insn_memdst("fnstcw", dst.val);
    4181          96 :                 dst.bytes = 2;
    4182          96 :                 break;
    4183             :             default:
    4184           8 :                 generate_exception(EXC_UD);
    4185             :             }
    4186             :             /*
    4187             :              * Control instructions can't raise FPU exceptions, so we need
    4188             :              * to consider suppressing writes only for non-control ones. All
    4189             :              * of them in this group have data width 4.
    4190             :              */
    4191         487 :             if ( dst.type == OP_MEM && dst.bytes == 4 && !fpu_check_write() )
    4192           0 :                 dst.type = OP_NONE;
    4193             :         }
    4194         511 :         check_fpu_exn(&fic);
    4195         511 :         break;
    4196             : 
    4197             :     case 0xda: /* FPU 0xda */
    4198         936 :         host_and_vcpu_must_have(fpu);
    4199         936 :         get_fpu(X86EMUL_FPU_fpu, &fic);
    4200         888 :         switch ( modrm )
    4201             :         {
    4202             :         case 0xc0 ... 0xc7: /* fcmovb %stN */
    4203             :         case 0xc8 ... 0xcf: /* fcmove %stN */
    4204             :         case 0xd0 ... 0xd7: /* fcmovbe %stN */
    4205             :         case 0xd8 ... 0xdf: /* fcmovu %stN */
    4206          24 :             vcpu_must_have(cmov);
    4207          24 :             emulate_fpu_insn_stub_eflags(0xda, modrm);
    4208          24 :             break;
    4209             :         case 0xe9:          /* fucompp */
    4210           8 :             emulate_fpu_insn_stub(0xda, modrm);
    4211           8 :             break;
    4212             :         default:
    4213         856 :             generate_exception_if(ea.type != OP_MEM, EXC_UD);
    4214         848 :             if ( (rc = ops->read(ea.mem.seg, ea.mem.off, &src.val,
    4215             :                                  4, ctxt)) != X86EMUL_OKAY )
    4216           0 :                 goto done;
    4217         848 :             switch ( modrm_reg & 7 )
    4218             :             {
    4219             :             case 0: /* fiadd m32i */
    4220         161 :                 emulate_fpu_insn_memsrc("fiaddl", src.val);
    4221         161 :                 break;
    4222             :             case 1: /* fimul m32i */
    4223          24 :                 emulate_fpu_insn_memsrc("fimull", src.val);
    4224          24 :                 break;
    4225             :             case 2: /* ficom m32i */
    4226          24 :                 emulate_fpu_insn_memsrc("ficoml", src.val);
    4227          24 :                 break;
    4228             :             case 3: /* ficomp m32i */
    4229         149 :                 emulate_fpu_insn_memsrc("ficompl", src.val);
    4230         149 :                 break;
    4231             :             case 4: /* fisub m32i */
    4232         245 :                 emulate_fpu_insn_memsrc("fisubl", src.val);
    4233         245 :                 break;
    4234             :             case 5: /* fisubr m32i */
    4235         144 :                 emulate_fpu_insn_memsrc("fisubrl", src.val);
    4236         144 :                 break;
    4237             :             case 6: /* fidiv m32i */
    4238          53 :                 emulate_fpu_insn_memsrc("fidivl", src.val);
    4239          53 :                 break;
    4240             :             case 7: /* fidivr m32i */
    4241          48 :                 emulate_fpu_insn_memsrc("fidivrl", src.val);
    4242          48 :                 break;
    4243             :             }
    4244             :         }
    4245         880 :         check_fpu_exn(&fic);
    4246         880 :         break;
    4247             : 
    4248             :     case 0xdb: /* FPU 0xdb */
    4249        1273 :         host_and_vcpu_must_have(fpu);
    4250        1273 :         get_fpu(X86EMUL_FPU_fpu, &fic);
    4251        1250 :         switch ( modrm )
    4252             :         {
    4253             :         case 0xc0 ... 0xc7: /* fcmovnb %stN */
    4254             :         case 0xc8 ... 0xcf: /* fcmovne %stN */
    4255             :         case 0xd0 ... 0xd7: /* fcmovnbe %stN */
    4256             :         case 0xd8 ... 0xdf: /* fcmovnu %stN */
    4257             :         case 0xe8 ... 0xef: /* fucomi %stN */
    4258             :         case 0xf0 ... 0xf7: /* fcomi %stN */
    4259         145 :             vcpu_must_have(cmov);
    4260         145 :             emulate_fpu_insn_stub_eflags(0xdb, modrm);
    4261         145 :             break;
    4262             :         case 0xe0: /* fneni - 8087 only, ignored by 287 */
    4263             :         case 0xe1: /* fndisi - 8087 only, ignored by 287 */
    4264             :         case 0xe2: /* fnclex */
    4265             :         case 0xe3: /* fninit */
    4266             :         case 0xe4: /* fnsetpm - 287 only, ignored by 387 */
    4267             :         /* case 0xe5: frstpm - 287 only, #UD on 387 */
    4268          84 :             state->fpu_ctrl = true;
    4269          84 :             emulate_fpu_insn_stub(0xdb, modrm);
    4270          84 :             break;
    4271             :         default:
    4272        1021 :             generate_exception_if(ea.type != OP_MEM, EXC_UD);
    4273        1013 :             dst = ea;
    4274        1013 :             switch ( modrm_reg & 7 )
    4275             :             {
    4276             :             case 0: /* fild m32i */
    4277         130 :                 if ( (rc = ops->read(ea.mem.seg, ea.mem.off, &src.val,
    4278             :                                      4, ctxt)) != X86EMUL_OKAY )
    4279           0 :                     goto done;
    4280         130 :                 emulate_fpu_insn_memsrc("fildl", src.val);
    4281         130 :                 dst.type = OP_NONE;
    4282         130 :                 break;
    4283             :             case 1: /* fisttp m32i */
    4284           8 :                 host_and_vcpu_must_have(sse3);
    4285           8 :                 emulate_fpu_insn_memdst("fisttpl", dst.val);
    4286           8 :                 dst.bytes = 4;
    4287           8 :                 break;
    4288             :             case 2: /* fist m32i */
    4289         650 :                 emulate_fpu_insn_memdst("fistl", dst.val);
    4290         650 :                 dst.bytes = 4;
    4291         650 :                 break;
    4292             :             case 3: /* fistp m32i */
    4293          85 :                 emulate_fpu_insn_memdst("fistpl", dst.val);
    4294          85 :                 dst.bytes = 4;
    4295          85 :                 break;
    4296             :             case 5: /* fld m80fp */
    4297         112 :                 if ( (rc = ops->read(ea.mem.seg, ea.mem.off, mmvalp,
    4298             :                                      10, ctxt)) != X86EMUL_OKAY )
    4299           0 :                     goto done;
    4300         112 :                 emulate_fpu_insn_memsrc("fldt", *mmvalp);
    4301         112 :                 dst.type = OP_NONE;
    4302         112 :                 break;
    4303             :             case 7: /* fstp m80fp */
    4304          20 :                 fail_if(!ops->write);
    4305          12 :                 emulate_fpu_insn_memdst("fstpt", *mmvalp);
    4306          24 :                 if ( fpu_check_write() &&
    4307          12 :                      (rc = ops->write(ea.mem.seg, ea.mem.off, mmvalp,
    4308             :                                       10, ctxt)) != X86EMUL_OKAY )
    4309           0 :                     goto done;
    4310          12 :                 dst.type = OP_NONE;
    4311          12 :                 break;
    4312             :             default:
    4313           8 :                 generate_exception(EXC_UD);
    4314             :             }
    4315         997 :             if ( dst.type == OP_MEM && !fpu_check_write() )
    4316           0 :                 dst.type = OP_NONE;
    4317             :         }
    4318        1226 :         check_fpu_exn(&fic);
    4319        1226 :         break;
    4320             : 
    4321             :     case 0xdc: /* FPU 0xdc */
    4322        1199 :         host_and_vcpu_must_have(fpu);
    4323        1199 :         get_fpu(X86EMUL_FPU_fpu, &fic);
    4324        1172 :         switch ( modrm )
    4325             :         {
    4326             :         case 0xc0 ... 0xc7: /* fadd %st,%stN */
    4327             :         case 0xc8 ... 0xcf: /* fmul %st,%stN */
    4328             :         case 0xd0 ... 0xd7: /* fcom %stN,%st (alternative encoding) */
    4329             :         case 0xd8 ... 0xdf: /* fcomp %stN,%st (alternative encoding) */
    4330             :         case 0xe0 ... 0xe7: /* fsubr %st,%stN */
    4331             :         case 0xe8 ... 0xef: /* fsub %st,%stN */
    4332             :         case 0xf0 ... 0xf7: /* fdivr %st,%stN */
    4333             :         case 0xf8 ... 0xff: /* fdiv %st,%stN */
    4334          32 :             emulate_fpu_insn_stub(0xdc, modrm);
    4335          32 :             break;
    4336             :         default:
    4337        1140 :             ASSERT(ea.type == OP_MEM);
    4338        1140 :             if ( (rc = ops->read(ea.mem.seg, ea.mem.off, &src.val,
    4339             :                                  8, ctxt)) != X86EMUL_OKAY )
    4340           8 :                 goto done;
    4341        1132 :             switch ( modrm_reg & 7 )
    4342             :             {
    4343             :             case 0: /* fadd m64fp */
    4344         120 :                 emulate_fpu_insn_memsrc("faddl", src.val);
    4345         120 :                 break;
    4346             :             case 1: /* fmul m64fp */
    4347          94 :                 emulate_fpu_insn_memsrc("fmull", src.val);
    4348          94 :                 break;
    4349             :             case 2: /* fcom m64fp */
    4350         405 :                 emulate_fpu_insn_memsrc("fcoml", src.val);
    4351         405 :                 break;
    4352             :             case 3: /* fcomp m64fp */
    4353         136 :                 emulate_fpu_insn_memsrc("fcompl", src.val);
    4354         136 :                 break;
    4355             :             case 4: /* fsub m64fp */
    4356          88 :                 emulate_fpu_insn_memsrc("fsubl", src.val);
    4357          88 :                 break;
    4358             :             case 5: /* fsubr m64fp */
    4359         108 :                 emulate_fpu_insn_memsrc("fsubrl", src.val);
    4360         108 :                 break;
    4361             :             case 6: /* fdiv m64fp */
    4362          80 :                 emulate_fpu_insn_memsrc("fdivl", src.val);
    4363          80 :                 break;
    4364             :             case 7: /* fdivr m64fp */
    4365         101 :                 emulate_fpu_insn_memsrc("fdivrl", src.val);
    4366         101 :                 break;
    4367             :             }
    4368             :         }
    4369        1164 :         check_fpu_exn(&fic);
    4370        1164 :         break;
    4371             : 
    4372             :     case 0xdd: /* FPU 0xdd */
    4373         451 :         host_and_vcpu_must_have(fpu);
    4374         451 :         get_fpu(X86EMUL_FPU_fpu, &fic);
    4375         434 :         switch ( modrm )
    4376             :         {
    4377             :         case 0xc0 ... 0xc7: /* ffree %stN */
    4378             :         case 0xc8 ... 0xcf: /* fxch %stN (alternative encoding) */
    4379             :         case 0xd0 ... 0xd7: /* fst %stN */
    4380             :         case 0xd8 ... 0xdf: /* fstp %stN */
    4381             :         case 0xe0 ... 0xe7: /* fucom %stN */
    4382             :         case 0xe8 ... 0xef: /* fucomp %stN */
    4383          88 :             emulate_fpu_insn_stub(0xdd, modrm);
    4384          88 :             break;
    4385             :         default:
    4386         346 :             generate_exception_if(ea.type != OP_MEM, EXC_UD);
    4387         338 :             dst = ea;
    4388         338 :             switch ( modrm_reg & 7 )
    4389             :             {
    4390             :             case 0: /* fld m64fp */;
    4391         130 :                 if ( (rc = ops->read(ea.mem.seg, ea.mem.off, &src.val,
    4392             :                                      8, ctxt)) != X86EMUL_OKAY )
    4393          50 :                     goto done;
    4394          80 :                 emulate_fpu_insn_memsrc("fldl", src.val);
    4395          80 :                 dst.type = OP_NONE;
    4396          80 :                 break;
    4397             :             case 1: /* fisttp m64i */
    4398          80 :                 host_and_vcpu_must_have(sse3);
    4399          80 :                 emulate_fpu_insn_memdst("fisttpll", dst.val);
    4400          80 :                 dst.bytes = 8;
    4401          80 :                 break;
    4402             :             case 2: /* fst m64fp */
    4403          80 :                 emulate_fpu_insn_memdst("fstl", dst.val);
    4404          80 :                 dst.bytes = 8;
    4405          80 :                 break;
    4406             :             case 3: /* fstp m64fp */
    4407           8 :                 emulate_fpu_insn_memdst("fstpl", dst.val);
    4408           8 :                 dst.bytes = 8;
    4409           8 :                 break;
    4410             :             case 4: /* frstor - TODO */
    4411             :             case 6: /* fnsave - TODO */
    4412           8 :                 state->fpu_ctrl = true;
    4413           8 :                 goto cannot_emulate;
    4414             :             case 7: /* fnstsw m2byte */
    4415          24 :                 state->fpu_ctrl = true;
    4416          24 :                 emulate_fpu_insn_memdst("fnstsw", dst.val);
    4417          24 :                 dst.bytes = 2;
    4418          24 :                 break;
    4419             :             default:
    4420           8 :                 generate_exception(EXC_UD);
    4421             :             }
    4422             :             /*
    4423             :              * Control instructions can't raise FPU exceptions, so we need
    4424             :              * to consider suppressing writes only for non-control ones. All
    4425             :              * of them in this group have data width 8.
    4426             :              */
    4427         272 :             if ( dst.type == OP_MEM && dst.bytes == 8 && !fpu_check_write() )
    4428           0 :                 dst.type = OP_NONE;
    4429             :         }
    4430         360 :         check_fpu_exn(&fic);
    4431         360 :         break;
    4432             : 
    4433             :     case 0xde: /* FPU 0xde */
    4434        4013 :         host_and_vcpu_must_have(fpu);
    4435        4013 :         get_fpu(X86EMUL_FPU_fpu, &fic);
    4436        4010 :         switch ( modrm )
    4437             :         {
    4438             :         case 0xc0 ... 0xc7: /* faddp %stN */
    4439             :         case 0xc8 ... 0xcf: /* fmulp %stN */
    4440             :         case 0xd0 ... 0xd7: /* fcomp %stN (alternative encoding) */
    4441             :         case 0xd9: /* fcompp */
    4442             :         case 0xe0 ... 0xe7: /* fsubrp %stN */
    4443             :         case 0xe8 ... 0xef: /* fsubp %stN */
    4444             :         case 0xf0 ... 0xf7: /* fdivrp %stN */
    4445             :         case 0xf8 ... 0xff: /* fdivp %stN */
    4446          24 :             emulate_fpu_insn_stub(0xde, modrm);
    4447          24 :             break;
    4448             :         default:
    4449        3986 :             generate_exception_if(ea.type != OP_MEM, EXC_UD);
    4450        3978 :             switch ( modrm_reg & 7 )
    4451             :             {
    4452             :             case 0: /* fiadd m16i */
    4453          59 :                 emulate_fpu_insn_memsrc("fiadds", src.val);
    4454          59 :                 break;
    4455             :             case 1: /* fimul m16i */
    4456         376 :                 emulate_fpu_insn_memsrc("fimuls", src.val);
    4457         376 :                 break;
    4458             :             case 2: /* ficom m16i */
    4459         205 :                 emulate_fpu_insn_memsrc("ficoms", src.val);
    4460         205 :                 break;
    4461             :             case 3: /* ficomp m16i */
    4462         160 :                 emulate_fpu_insn_memsrc("ficomps", src.val);
    4463         160 :                 break;
    4464             :             case 4: /* fisub m16i */
    4465         112 :                 emulate_fpu_insn_memsrc("fisubs", src.val);
    4466         112 :                 break;
    4467             :             case 5: /* fisubr m16i */
    4468        2159 :                 emulate_fpu_insn_memsrc("fisubrs", src.val);
    4469        2159 :                 break;
    4470             :             case 6: /* fidiv m16i */
    4471         827 :                 emulate_fpu_insn_memsrc("fidivs", src.val);
    4472         827 :                 break;
    4473             :             case 7: /* fidivr m16i */
    4474          80 :                 emulate_fpu_insn_memsrc("fidivrs", src.val);
    4475          80 :                 break;
    4476             :             }
    4477             :         }
    4478        4002 :         check_fpu_exn(&fic);
    4479        4002 :         break;
    4480             : 
    4481             :     case 0xdf: /* FPU 0xdf */
    4482         993 :         host_and_vcpu_must_have(fpu);
    4483         993 :         get_fpu(X86EMUL_FPU_fpu, &fic);
    4484         969 :         switch ( modrm )
    4485             :         {
    4486             :         case 0xe0:
    4487             :             /* fnstsw %ax */
    4488           8 :             state->fpu_ctrl = true;
    4489           8 :             dst.bytes = 2;
    4490           8 :             dst.type = OP_REG;
    4491           8 :             dst.reg = (void *)&_regs.ax;
    4492           8 :             emulate_fpu_insn_memdst("fnstsw", dst.val);
    4493           8 :             break;
    4494             :         case 0xe8 ... 0xef: /* fucomip %stN */
    4495             :         case 0xf0 ... 0xf7: /* fcomip %stN */
    4496          80 :             vcpu_must_have(cmov);
    4497          80 :             emulate_fpu_insn_stub_eflags(0xdf, modrm);
    4498          80 :             break;
    4499             :         case 0xc0 ... 0xc7: /* ffreep %stN */
    4500             :         case 0xc8 ... 0xcf: /* fxch %stN (alternative encoding) */
    4501             :         case 0xd0 ... 0xd7: /* fstp %stN (alternative encoding) */
    4502             :         case 0xd8 ... 0xdf: /* fstp %stN (alternative encoding) */
    4503          32 :             emulate_fpu_insn_stub(0xdf, modrm);
    4504          32 :             break;
    4505             :         default:
    4506         849 :             generate_exception_if(ea.type != OP_MEM, EXC_UD);
    4507         841 :             dst = ea;
    4508         841 :             switch ( modrm_reg & 7 )
    4509             :             {
    4510             :             case 0: /* fild m16i */
    4511         240 :                 if ( (rc = ops->read(ea.mem.seg, ea.mem.off, &src.val,
    4512             :                                      2, ctxt)) != X86EMUL_OKAY )
    4513           0 :                     goto done;
    4514         240 :                 emulate_fpu_insn_memsrc("filds", src.val);
    4515         240 :                 dst.type = OP_NONE;
    4516         240 :                 break;
    4517             :             case 1: /* fisttp m16i */
    4518          80 :                 host_and_vcpu_must_have(sse3);
    4519          80 :                 emulate_fpu_insn_memdst("fisttps", dst.val);
    4520          80 :                 dst.bytes = 2;
    4521          80 :                 break;
    4522             :             case 2: /* fist m16i */
    4523         118 :                 emulate_fpu_insn_memdst("fists", dst.val);
    4524         118 :                 dst.bytes = 2;
    4525         118 :                 break;
    4526             :             case 3: /* fistp m16i */
    4527          80 :                 emulate_fpu_insn_memdst("fistps", dst.val);
    4528          80 :                 dst.bytes = 2;
    4529          80 :                 break;
    4530             :             case 4: /* fbld m80dec */
    4531         157 :                 if ( (rc = ops->read(ea.mem.seg, ea.mem.off, mmvalp,
    4532             :                                      10, ctxt)) != X86EMUL_OKAY )
    4533           4 :                     goto done;
    4534         153 :                 emulate_fpu_insn_memsrc("fbld", *mmvalp);
    4535         153 :                 dst.type = OP_NONE;
    4536         153 :                 break;
    4537             :             case 5: /* fild m64i */
    4538          24 :                 if ( (rc = ops->read(ea.mem.seg, ea.mem.off, &src.val,
    4539             :                                      8, ctxt)) != X86EMUL_OKAY )
    4540           0 :                     goto done;
    4541          24 :                 emulate_fpu_insn_memsrc("fildll", src.val);
    4542          24 :                 dst.type = OP_NONE;
    4543          24 :                 break;
    4544             :             case 6: /* fbstp packed bcd */
    4545          33 :                 fail_if(!ops->write);
    4546          24 :                 emulate_fpu_insn_memdst("fbstp", *mmvalp);
    4547          48 :                 if ( fpu_check_write() &&
    4548          24 :                      (rc = ops->write(ea.mem.seg, ea.mem.off, mmvalp,
    4549             :                                       10, ctxt)) != X86EMUL_OKAY )
    4550           0 :                     goto done;
    4551          24 :                 dst.type = OP_NONE;
    4552          24 :                 break;
    4553             :             case 7: /* fistp m64i */
    4554         109 :                 emulate_fpu_insn_memdst("fistpll", dst.val);
    4555         109 :                 dst.bytes = 8;
    4556         109 :                 break;
    4557             :             }
    4558         828 :             if ( dst.type == OP_MEM && !fpu_check_write() )
    4559           0 :                 dst.type = OP_NONE;
    4560             :         }
    4561         948 :         check_fpu_exn(&fic);
    4562         948 :         break;
    4563             : 
    4564             :     case 0xe0 ... 0xe2: /* loop{,z,nz} */ {
    4565        1192 :         unsigned long count = get_loop_count(&_regs, ad_bytes);
    4566        1192 :         int do_jmp = !(_regs.eflags & X86_EFLAGS_ZF); /* loopnz */
    4567             : 
    4568        1192 :         if ( b == 0xe1 )
    4569         180 :             do_jmp = !do_jmp; /* loopz */
    4570        1012 :         else if ( b == 0xe2 )
    4571         744 :             do_jmp = 1; /* loop */
    4572        1192 :         if ( count != 1 && do_jmp )
    4573        1034 :             jmp_rel((int32_t)src.val);
    4574        1121 :         put_loop_count(&_regs, ad_bytes, count - 1);
    4575        1121 :         break;
    4576             :     }
    4577             : 
    4578             :     case 0xe3: /* jcxz/jecxz (short) */
    4579       13302 :         if ( !get_loop_count(&_regs, ad_bytes) )
    4580       12390 :             jmp_rel((int32_t)src.val);
    4581       13295 :         break;
    4582             : 
    4583             :     case 0xe4: /* in imm8,%al */
    4584             :     case 0xe5: /* in imm8,%eax */
    4585             :     case 0xe6: /* out %al,imm8 */
    4586             :     case 0xe7: /* out %eax,imm8 */
    4587             :     case 0xec: /* in %dx,%al */
    4588             :     case 0xed: /* in %dx,%eax */
    4589             :     case 0xee: /* out %al,%dx */
    4590             :     case 0xef: /* out %eax,%dx */ {
    4591         948 :         unsigned int port = ((b < 0xe8) ? (uint8_t)src.val : _regs.dx);
    4592             : 
    4593         948 :         op_bytes = !(b & 1) ? 1 : (op_bytes == 8) ? 4 : op_bytes;
    4594         948 :         if ( (rc = ioport_access_check(port, op_bytes, ctxt, ops)) != 0 )
    4595         323 :             goto done;
    4596         625 :         if ( b & 2 )
    4597             :         {
    4598             :             /* out */
    4599         351 :             fail_if(ops->write_io == NULL);
    4600         320 :             rc = ops->write_io(port, op_bytes, _regs.eax, ctxt);
    4601             :         }
    4602             :         else
    4603             :         {
    4604             :             /* in */
    4605         274 :             dst.bytes = op_bytes;
    4606         274 :             fail_if(ops->read_io == NULL);
    4607         265 :             rc = ops->read_io(port, dst.bytes, &dst.val, ctxt);
    4608             :         }
    4609         585 :         if ( rc != 0 )
    4610             :         {
    4611          37 :             if ( rc == X86EMUL_DONE )
    4612           0 :                 goto complete_insn;
    4613          37 :             goto done;
    4614             :         }
    4615         548 :         break;
    4616             :     }
    4617             : 
    4618             :     case 0xe8: /* call (near) */ {
    4619        2240 :         int32_t rel = src.val;
    4620             : 
    4621        2240 :         op_bytes = ((op_bytes == 4) && mode_64bit()) ? 8 : op_bytes;
    4622        2240 :         src.val = _regs.r(ip);
    4623        2240 :         jmp_rel(rel);
    4624        2076 :         adjust_bnd(ctxt, ops, vex.pfx);
    4625        2076 :         goto push;
    4626             :     }
    4627             : 
    4628             :     case 0xe9: /* jmp (near) */
    4629             :     case 0xeb: /* jmp (short) */
    4630        2327 :         jmp_rel((int32_t)src.val);
    4631        2306 :         if ( !(b & 2) )
    4632        2240 :             adjust_bnd(ctxt, ops, vex.pfx);
    4633        2306 :         break;
    4634             : 
    4635             :     case 0xea: /* jmp (far, absolute) */
    4636          62 :         ASSERT(!mode_64bit());
    4637             :     far_jmp:
    4638         572 :         if ( (rc = load_seg(x86_seg_cs, imm2, 0, &cs, ctxt, ops)) ||
    4639         254 :              (rc = commit_far_branch(&cs, imm1)) )
    4640             :             goto done;
    4641         174 :         break;
    4642             : 
    4643             :     case 0xf4: /* hlt */
    4644         107 :         generate_exception_if(!mode_ring0(), EXC_GP, 0);
    4645          80 :         ctxt->retire.hlt = true;
    4646          80 :         break;
    4647             : 
    4648             :     case 0xf5: /* cmc */
    4649         274 :         _regs.eflags ^= X86_EFLAGS_CF;
    4650         274 :         break;
    4651             : 
    4652             :     case 0xf6 ... 0xf7: /* Grp3 */
    4653       17508 :         if ( (d & DstMask) == DstEax )
    4654       15696 :             dst.reg = (unsigned long *)&_regs.r(ax);
    4655       17508 :         switch ( modrm_reg & 7 )
    4656             :         {
    4657             :             unsigned long u[2], v;
    4658             : 
    4659             :         case 0 ... 1: /* test */
    4660        1605 :             generate_exception_if(lock_prefix, EXC_UD);
    4661        1217 :             goto test;
    4662             :         case 2: /* not */
    4663         184 :             dst.val = ~dst.val;
    4664       35620 :             break;
    4665             :         case 3: /* neg */
    4666         403 :             emulate_1op("neg", dst, _regs.eflags);
    4667         403 :             break;
    4668             :         case 4: /* mul */
    4669        7890 :             _regs.eflags &= ~(X86_EFLAGS_OF | X86_EFLAGS_CF);
    4670        7890 :             switch ( dst.bytes )
    4671             :             {
    4672             :             case 1:
    4673        1180 :                 dst.val = _regs.al;
    4674        1180 :                 dst.val *= src.val;
    4675        1180 :                 if ( (uint8_t)dst.val != (uint16_t)dst.val )
    4676         766 :                     _regs.eflags |= X86_EFLAGS_OF | X86_EFLAGS_CF;
    4677        1180 :                 dst.bytes = 2;
    4678        1180 :                 break;
    4679             :             case 2:
    4680         781 :                 dst.val = _regs.ax;
    4681         781 :                 dst.val *= src.val;
    4682         781 :                 if ( (uint16_t)dst.val != (uint32_t)dst.val )
    4683         395 :                     _regs.eflags |= X86_EFLAGS_OF | X86_EFLAGS_CF;
    4684         781 :                 _regs.dx = dst.val >> 16;
    4685         781 :                 break;
    4686             : #ifdef __x86_64__
    4687             :             case 4:
    4688        3324 :                 dst.val = _regs.eax;
    4689        3324 :                 dst.val *= src.val;
    4690        3324 :                 if ( (uint32_t)dst.val != dst.val )
    4691        2195 :                     _regs.eflags |= X86_EFLAGS_OF | X86_EFLAGS_CF;
    4692        3324 :                 _regs.rdx = dst.val >> 32;
    4693        3324 :                 break;
    4694             : #endif
    4695             :             default:
    4696        2605 :                 u[0] = src.val;
    4697        2605 :                 u[1] = _regs.r(ax);
    4698        2605 :                 if ( mul_dbl(u) )
    4699        2352 :                     _regs.eflags |= X86_EFLAGS_OF | X86_EFLAGS_CF;
    4700        2605 :                 _regs.r(dx) = u[1];
    4701        2605 :                 dst.val = u[0];
    4702        2605 :                 break;
    4703             :             }
    4704        7890 :             break;
    4705             :         case 5: /* imul */
    4706             :         imul:
    4707       23208 :             _regs.eflags &= ~(X86_EFLAGS_OF | X86_EFLAGS_CF);
    4708       23208 :             switch ( dst.bytes )
    4709             :             {
    4710             :             case 1:
    4711         318 :                 dst.val = (int8_t)src.val * (int8_t)_regs.al;
    4712         318 :                 if ( (int8_t)dst.val != (int16_t)dst.val )
    4713         153 :                     _regs.eflags |= X86_EFLAGS_OF | X86_EFLAGS_CF;
    4714         318 :                 ASSERT(b > 0x6b);
    4715         318 :                 dst.bytes = 2;
    4716         318 :                 break;
    4717             :             case 2:
    4718       21090 :                 dst.val = ((uint32_t)(int16_t)src.val *
    4719       10545 :                            (uint32_t)(int16_t)_regs.ax);
    4720       10545 :                 if ( (int16_t)dst.val != (int32_t)dst.val )
    4721        3633 :                     _regs.eflags |= X86_EFLAGS_OF | X86_EFLAGS_CF;
    4722       10545 :                 if ( b > 0x6b )
    4723        1332 :                     _regs.dx = dst.val >> 16;
    4724       10545 :                 break;
    4725             : #ifdef __x86_64__
    4726             :             case 4:
    4727       20202 :                 dst.val = ((uint64_t)(int32_t)src.val *
    4728       10101 :                            (uint64_t)(int32_t)_regs.eax);
    4729       10101 :                 if ( (int32_t)dst.val != dst.val )
    4730        3412 :                     _regs.eflags |= X86_EFLAGS_OF | X86_EFLAGS_CF;
    4731       10101 :                 if ( b > 0x6b )
    4732         275 :                     _regs.rdx = dst.val >> 32;
    4733       10101 :                 break;
    4734             : #endif
    4735             :             default:
    4736        2244 :                 u[0] = src.val;
    4737        2244 :                 u[1] = _regs.r(ax);
    4738        2244 :                 if ( imul_dbl(u) )
    4739         498 :                     _regs.eflags |= X86_EFLAGS_OF | X86_EFLAGS_CF;
    4740        2244 :                 if ( b > 0x6b )
    4741        1758 :                     _regs.r(dx) = u[1];
    4742        2244 :                 dst.val = u[0];
    4743        2244 :                 break;
    4744             :             }
    4745       23208 :             break;
    4746             :         case 6: /* div */
    4747         378 :             switch ( src.bytes )
    4748             :             {
    4749             :             case 1:
    4750          90 :                 u[0] = _regs.ax;
    4751          90 :                 u[1] = 0;
    4752          90 :                 v    = (uint8_t)src.val;
    4753          90 :                 generate_exception_if(
    4754             :                     div_dbl(u, v) || ((uint8_t)u[0] != (uint16_t)u[0]),
    4755             :                     EXC_DE);
    4756          58 :                 dst.val = (uint8_t)u[0];
    4757          58 :                 _regs.ah = u[1];
    4758          58 :                 break;
    4759             :             case 2:
    4760          72 :                 u[0] = (_regs.edx << 16) | _regs.ax;
    4761          72 :                 u[1] = 0;
    4762          72 :                 v    = (uint16_t)src.val;
    4763          72 :                 generate_exception_if(
    4764             :                     div_dbl(u, v) || ((uint16_t)u[0] != (uint32_t)u[0]),
    4765             :                     EXC_DE);
    4766          56 :                 dst.val = (uint16_t)u[0];
    4767          56 :                 _regs.dx = u[1];
    4768          56 :                 break;
    4769             : #ifdef __x86_64__
    4770             :             case 4:
    4771         128 :                 u[0] = (_regs.rdx << 32) | _regs.eax;
    4772         128 :                 u[1] = 0;
    4773         128 :                 v    = (uint32_t)src.val;
    4774         128 :                 generate_exception_if(
    4775             :                     div_dbl(u, v) || ((uint32_t)u[0] != u[0]),
    4776             :                     EXC_DE);
    4777         112 :                 dst.val   = (uint32_t)u[0];
    4778         112 :                 _regs.rdx = (uint32_t)u[1];
    4779         112 :                 break;
    4780             : #endif
    4781             :             default:
    4782          88 :                 u[0] = _regs.r(ax);
    4783          88 :                 u[1] = _regs.r(dx);
    4784          88 :                 v    = src.val;
    4785          88 :                 generate_exception_if(div_dbl(u, v), EXC_DE);
    4786          80 :                 dst.val     = u[0];
    4787          80 :                 _regs.r(dx) = u[1];
    4788          80 :                 break;
    4789             :             }
    4790         306 :             break;
    4791             :         case 7: /* idiv */
    4792        3745 :             switch ( src.bytes )
    4793             :             {
    4794             :             case 1:
    4795         212 :                 u[0] = (int16_t)_regs.ax;
    4796         212 :                 u[1] = ((long)u[0] < 0) ? ~0UL : 0UL;
    4797         212 :                 v    = (int8_t)src.val;
    4798         212 :                 generate_exception_if(
    4799             :                     idiv_dbl(u, v) || ((int8_t)u[0] != (int16_t)u[0]),
    4800             :                     EXC_DE);
    4801         175 :                 dst.val = (int8_t)u[0];
    4802         175 :                 _regs.ah = u[1];
    4803         175 :                 break;
    4804             :             case 2:
    4805        1224 :                 u[0] = (int32_t)((_regs.edx << 16) | _regs.ax);
    4806        1224 :                 u[1] = ((long)u[0] < 0) ? ~0UL : 0UL;
    4807        1224 :                 v    = (int16_t)src.val;
    4808        1224 :                 generate_exception_if(
    4809             :                     idiv_dbl(u, v) || ((int16_t)u[0] != (int32_t)u[0]),
    4810             :                     EXC_DE);
    4811        1144 :                 dst.val = (int16_t)u[0];
    4812        1144 :                 _regs.dx = u[1];
    4813        1144 :                 break;
    4814             : #ifdef __x86_64__
    4815             :             case 4:
    4816         704 :                 u[0] = (_regs.rdx << 32) | _regs.eax;
    4817         704 :                 u[1] = ((long)u[0] < 0) ? ~0UL : 0UL;
    4818         704 :                 v    = (int32_t)src.val;
    4819         704 :                 generate_exception_if(
    4820             :                     idiv_dbl(u, v) || ((int32_t)u[0] != u[0]),
    4821             :                     EXC_DE);
    4822         602 :                 dst.val   = (int32_t)u[0];
    4823         602 :                 _regs.rdx = (uint32_t)u[1];
    4824         602 :                 break;
    4825             : #endif
    4826             :             default:
    4827        1605 :                 u[0] = _regs.r(ax);
    4828        1605 :                 u[1] = _regs.r(dx);
    4829        1605 :                 v    = src.val;
    4830        1605 :                 generate_exception_if(idiv_dbl(u, v), EXC_DE);
    4831        1524 :                 dst.val     = u[0];
    4832        1524 :                 _regs.r(dx) = u[1];
    4833        1524 :                 break;
    4834             :             }
    4835        3445 :             break;
    4836             :         }
    4837       35436 :         break;
    4838             : 
    4839             :     case 0xf8: /* clc */
    4840       11536 :         _regs.eflags &= ~X86_EFLAGS_CF;
    4841       11536 :         break;
    4842             : 
    4843             :     case 0xf9: /* stc */
    4844         163 :         _regs.eflags |= X86_EFLAGS_CF;
    4845         163 :         break;
    4846             : 
    4847             :     case 0xfa: /* cli */
    4848         533 :         if ( mode_iopl() )
    4849         240 :             _regs.eflags &= ~X86_EFLAGS_IF;
    4850             :         else
    4851             :         {
    4852         239 :             generate_exception_if(!mode_vif(), EXC_GP, 0);
    4853         190 :             _regs.eflags &= ~X86_EFLAGS_VIF;
    4854             :         }
    4855         430 :         break;
    4856             : 
    4857             :     case 0xfb: /* sti */
    4858         865 :         if ( mode_iopl() )
    4859             :         {
    4860         369 :             if ( !(_regs.eflags & X86_EFLAGS_IF) )
    4861         183 :                 ctxt->retire.sti = true;
    4862         369 :             _regs.eflags |= X86_EFLAGS_IF;
    4863             :         }
    4864             :         else
    4865             :         {
    4866         480 :             generate_exception_if((_regs.eflags & X86_EFLAGS_VIP) ||
    4867             :                                   !mode_vif(),
    4868             :                                   EXC_GP, 0);
    4869         462 :             if ( !(_regs.eflags & X86_EFLAGS_VIF) )
    4870         155 :                 ctxt->retire.sti = true;
    4871         462 :             _regs.eflags |= X86_EFLAGS_VIF;
    4872             :         }
    4873         831 :         break;
    4874             : 
    4875             :     case 0xfc: /* cld */
    4876        1418 :         _regs.eflags &= ~X86_EFLAGS_DF;
    4877        1418 :         break;
    4878             : 
    4879             :     case 0xfd: /* std */
    4880         719 :         _regs.eflags |= X86_EFLAGS_DF;
    4881         719 :         break;
    4882             : 
    4883             :     case 0xfe: /* Grp4 */
    4884         686 :         generate_exception_if((modrm_reg & 7) >= 2, EXC_UD);
    4885             :         /* Fallthrough. */
    4886             :     case 0xff: /* Grp5 */
    4887        8788 :         switch ( modrm_reg & 7 )
    4888             :         {
    4889             :         case 0: /* inc */
    4890         938 :             emulate_1op("inc", dst, _regs.eflags);
    4891         938 :             break;
    4892             :         case 1: /* dec */
    4893        4486 :             emulate_1op("dec", dst, _regs.eflags);
    4894        4486 :             break;
    4895             :         case 2: /* call (near) */
    4896        1728 :             dst.val = _regs.r(ip);
    4897        1728 :             if ( (rc = ops->insn_fetch(x86_seg_cs, src.val, NULL, 0, ctxt)) )
    4898           7 :                 goto done;
    4899        1721 :             _regs.r(ip) = src.val;
    4900        1721 :             src.val = dst.val;
    4901        1721 :             adjust_bnd(ctxt, ops, vex.pfx);
    4902        1721 :             goto push;
    4903             :         case 4: /* jmp (near) */
    4904         528 :             if ( (rc = ops->insn_fetch(x86_seg_cs, src.val, NULL, 0, ctxt)) )
    4905           4 :                 goto done;
    4906         524 :             _regs.r(ip) = src.val;
    4907         524 :             dst.type = OP_NONE;
    4908         524 :             adjust_bnd(ctxt, ops, vex.pfx);
    4909         524 :             break;
    4910             :         case 3: /* call (far, absolute indirect) */
    4911             :         case 5: /* jmp (far, absolute indirect) */
    4912         548 :             generate_exception_if(src.type != OP_MEM, EXC_UD);
    4913             : 
    4914         540 :             if ( (rc = read_ulong(src.mem.seg, src.mem.off + op_bytes,
    4915             :                                   &imm2, 2, ctxt, ops)) )
    4916          18 :                 goto done;
    4917         522 :             imm1 = src.val;
    4918         522 :             if ( !(modrm_reg & 4) )
    4919         258 :                 goto far_call;
    4920         264 :             goto far_jmp;
    4921             :         case 6: /* push */
    4922         463 :             goto push;
    4923             :         case 7:
    4924          97 :             generate_exception(EXC_UD);
    4925             :         }
    4926        5948 :         break;
    4927             : 
    4928             :     case X86EMUL_OPC(0x0f, 0x00): /* Grp6 */
    4929        2026 :         seg = (modrm_reg & 1) ? x86_seg_tr : x86_seg_ldtr;
    4930        2026 :         generate_exception_if(!in_protmode(ctxt, ops), EXC_UD);
    4931        2018 :         switch ( modrm_reg & 6 )
    4932             :         {
    4933             :         case 0: /* sldt / str */
    4934        1190 :             generate_exception_if(umip_active(ctxt, ops), EXC_GP, 0);
    4935        1182 :             goto store_selector;
    4936             :         case 2: /* lldt / ltr */
    4937         254 :             generate_exception_if(!mode_ring0(), EXC_GP, 0);
    4938         238 :             if ( (rc = load_seg(seg, src.val, 0, NULL, ctxt, ops)) != 0 )
    4939          84 :                 goto done;
    4940         154 :             break;
    4941             :         case 4: /* verr / verw */
    4942         566 :             _regs.eflags &= ~X86_EFLAGS_ZF;
    4943         566 :             switch ( rc = protmode_load_seg(x86_seg_none, src.val, false,
    4944             :                                             &sreg, ctxt, ops) )
    4945             :             {
    4946             :             case X86EMUL_OKAY:
    4947        1051 :                 if ( sreg.attr.fields.s &&
    4948         412 :                      ((modrm_reg & 1) ? ((sreg.attr.fields.type & 0xa) == 0x2)
    4949         244 :                                       : ((sreg.attr.fields.type & 0xa) != 0x8)) )
    4950         219 :                     _regs.eflags |= X86_EFLAGS_ZF;
    4951         395 :                 break;
    4952             :             case X86EMUL_EXCEPTION:
    4953         171 :                 if ( ctxt->event_pending )
    4954             :                 {
    4955          16 :                     ASSERT(ctxt->event.vector == EXC_PF);
    4956             :             default:
    4957          16 :                     goto done;
    4958             :                 }
    4959             :                 /* Instead of the exception, ZF remains cleared. */
    4960         155 :                 rc = X86EMUL_OKAY;
    4961         155 :                 break;
    4962             :             }
    4963         550 :             break;
    4964             :         default:
    4965           8 :             generate_exception_if(true, EXC_UD);
    4966             :             break;
    4967             :         }
    4968         704 :         break;
    4969             : 
    4970             :     case X86EMUL_OPC(0x0f, 0x01): /* Grp7 */ {
    4971             :         unsigned long base, limit, cr0, cr0w;
    4972             : 
    4973        6063 :         switch( modrm )
    4974             :         {
    4975             :         case 0xca: /* clac */
    4976             :         case 0xcb: /* stac */
    4977         404 :             vcpu_must_have(smap);
    4978          56 :             generate_exception_if(vex.pfx || !mode_ring0(), EXC_UD);
    4979             : 
    4980          32 :             _regs.eflags &= ~X86_EFLAGS_AC;
    4981          32 :             if ( modrm == 0xcb )
    4982          24 :                 _regs.eflags |= X86_EFLAGS_AC;
    4983          88 :             goto complete_insn;
    4984             : 
    4985             : #ifdef __XEN__
    4986             :         case 0xd1: /* xsetbv */
    4987             :             generate_exception_if(vex.pfx, EXC_UD);
    4988             :             if ( !ops->read_cr || ops->read_cr(4, &cr4, ctxt) != X86EMUL_OKAY )
    4989             :                 cr4 = 0;
    4990             :             generate_exception_if(!(cr4 & X86_CR4_OSXSAVE), EXC_UD);
    4991             :             generate_exception_if(!mode_ring0() ||
    4992             :                                   handle_xsetbv(_regs.ecx,
    4993             :                                                 _regs.eax | (_regs.rdx << 32)),
    4994             :                                   EXC_GP, 0);
    4995             :             goto complete_insn;
    4996             : #endif
    4997             : 
    4998             :         case 0xd4: /* vmfunc */
    4999          16 :             generate_exception_if(vex.pfx, EXC_UD);
    5000           8 :             fail_if(!ops->vmfunc);
    5001           0 :             if ( (rc = ops->vmfunc(ctxt)) != X86EMUL_OKAY )
    5002           0 :                 goto done;
    5003           0 :             goto complete_insn;
    5004             : 
    5005             :         case 0xd5: /* xend */
    5006           8 :             generate_exception_if(vex.pfx, EXC_UD);
    5007           8 :             generate_exception_if(!vcpu_has_rtm(), EXC_UD);
    5008           8 :             generate_exception_if(vcpu_has_rtm(), EXC_GP, 0);
    5009           0 :             break;
    5010             : 
    5011             :         case 0xd6: /* xtest */
    5012          16 :             generate_exception_if(vex.pfx, EXC_UD);
    5013           8 :             generate_exception_if(!vcpu_has_rtm() && !vcpu_has_hle(),
    5014             :                                   EXC_UD);
    5015             :             /* Neither HLE nor RTM can be active when we get here. */
    5016           8 :             _regs.eflags |= X86_EFLAGS_ZF;
    5017           8 :             goto complete_insn;
    5018             : 
    5019             :         case 0xdf: /* invlpga */
    5020          48 :             generate_exception_if(!in_protmode(ctxt, ops), EXC_UD);
    5021          40 :             generate_exception_if(!mode_ring0(), EXC_GP, 0);
    5022          24 :             fail_if(ops->invlpg == NULL);
    5023          16 :             if ( (rc = ops->invlpg(x86_seg_none, truncate_ea(_regs.r(ax)),
    5024             :                                    ctxt)) )
    5025           0 :                 goto done;
    5026          16 :             goto complete_insn;
    5027             : 
    5028             :         case 0xf9: /* rdtscp */
    5029          28 :             fail_if(ops->read_msr == NULL);
    5030          20 :             if ( (rc = ops->read_msr(MSR_TSC_AUX,
    5031             :                                      &msr_val, ctxt)) != X86EMUL_OKAY )
    5032           8 :                 goto done;
    5033          12 :             _regs.r(cx) = (uint32_t)msr_val;
    5034          12 :             goto rdtsc;
    5035             : 
    5036             :         case 0xfc: /* clzero */
    5037             :         {
    5038           8 :             unsigned long zero = 0;
    5039             : 
    5040          16 :             vcpu_must_have(clzero);
    5041             : 
    5042           0 :             base = ad_bytes == 8 ? _regs.r(ax) :
    5043           0 :                    ad_bytes == 4 ? _regs.eax : _regs.ax;
    5044           0 :             limit = 0;
    5045           0 :             if ( vcpu_has_clflush() &&
    5046           0 :                  ops->cpuid(1, 0, &cpuid_leaf, ctxt) == X86EMUL_OKAY )
    5047           0 :                 limit = ((cpuid_leaf.b >> 8) & 0xff) * 8;
    5048           0 :             generate_exception_if(limit < sizeof(long) ||
    5049             :                                   (limit & (limit - 1)), EXC_UD);
    5050           0 :             base &= ~(limit - 1);
    5051           0 :             if ( ops->rep_stos )
    5052             :             {
    5053           0 :                 unsigned long nr_reps = limit / sizeof(zero);
    5054             : 
    5055           0 :                 rc = ops->rep_stos(&zero, ea.mem.seg, base, sizeof(zero),
    5056             :                                    &nr_reps, ctxt);
    5057           0 :                 if ( rc == X86EMUL_OKAY )
    5058             :                 {
    5059           0 :                     base += nr_reps * sizeof(zero);
    5060           0 :                     limit -= nr_reps * sizeof(zero);
    5061             :                 }
    5062           0 :                 else if ( rc != X86EMUL_UNHANDLEABLE )
    5063           0 :                     goto done;
    5064             :             }
    5065           0 :             fail_if(limit && !ops->write);
    5066           0 :             while ( limit )
    5067             :             {
    5068           0 :                 rc = ops->write(ea.mem.seg, base, &zero, sizeof(zero), ctxt);
    5069           0 :                 if ( rc != X86EMUL_OKAY )
    5070           0 :                     goto done;
    5071           0 :                 base += sizeof(zero);
    5072           0 :                 limit -= sizeof(zero);
    5073             :             }
    5074           0 :             goto complete_insn;
    5075             :         }
    5076             :         }
    5077             : 
    5078        5883 :         seg = (modrm_reg & 1) ? x86_seg_idtr : x86_seg_gdtr;
    5079             : 
    5080        5883 :         switch ( modrm_reg & 7 )
    5081             :         {
    5082             :         case 0: /* sgdt */
    5083             :         case 1: /* sidt */
    5084         653 :             generate_exception_if(ea.type != OP_MEM, EXC_UD);
    5085         645 :             generate_exception_if(umip_active(ctxt, ops), EXC_GP, 0);
    5086         637 :             fail_if(!ops->read_segment || !ops->write);
    5087         615 :             if ( (rc = ops->read_segment(seg, &sreg, ctxt)) )
    5088           0 :                 goto done;
    5089         615 :             if ( mode_64bit() )
    5090         120 :                 op_bytes = 8;
    5091         495 :             else if ( op_bytes == 2 )
    5092             :             {
    5093         383 :                 sreg.base &= 0xffffff;
    5094         383 :                 op_bytes = 4;
    5095             :             }
    5096         615 :             if ( (rc = ops->write(ea.mem.seg, ea.mem.off, &sreg.limit,
    5097         615 :                                   2, ctxt)) != X86EMUL_OKAY ||
    5098         615 :                  (rc = ops->write(ea.mem.seg, ea.mem.off + 2, &sreg.base,
    5099             :                                   op_bytes, ctxt)) != X86EMUL_OKAY )
    5100             :                 goto done;
    5101         607 :             break;
    5102             :         case 2: /* lgdt */
    5103             :         case 3: /* lidt */
    5104         492 :             generate_exception_if(!mode_ring0(), EXC_GP, 0);
    5105         476 :             generate_exception_if(ea.type != OP_MEM, EXC_UD);
    5106         468 :             fail_if(ops->write_segment == NULL);
    5107         460 :             memset(&sreg, 0, sizeof(sreg));
    5108         460 :             if ( (rc = read_ulong(ea.mem.seg, ea.mem.off+0,
    5109         460 :                                   &limit, 2, ctxt, ops)) ||
    5110         460 :                  (rc = read_ulong(ea.mem.seg, ea.mem.off+2,
    5111         460 :                                   &base, mode_64bit() ? 8 : 4, ctxt, ops)) )
    5112             :                 goto done;
    5113         460 :             generate_exception_if(!is_canonical_address(base), EXC_GP, 0);
    5114         452 :             sreg.base = base;
    5115         452 :             sreg.limit = limit;
    5116         452 :             if ( !mode_64bit() && op_bytes == 2 )
    5117         364 :                 sreg.base &= 0xffffff;
    5118         452 :             if ( (rc = ops->write_segment(seg, &sreg, ctxt)) )
    5119           8 :                 goto done;
    5120         444 :             break;
    5121             :         case 4: /* smsw */
    5122        3912 :             generate_exception_if(umip_active(ctxt, ops), EXC_GP, 0);
    5123        3904 :             if ( ea.type == OP_MEM )
    5124             :             {
    5125        3887 :                 fail_if(!ops->write);
    5126        3879 :                 d |= Mov; /* force writeback */
    5127        3879 :                 ea.bytes = 2;
    5128             :             }
    5129             :             else
    5130          17 :                 ea.bytes = op_bytes;
    5131        3896 :             dst = ea;
    5132        3896 :             fail_if(ops->read_cr == NULL);
    5133        3880 :             if ( (rc = ops->read_cr(0, &dst.val, ctxt)) )
    5134           0 :                 goto done;
    5135        3880 :             break;
    5136             :         case 6: /* lmsw */
    5137         658 :             fail_if(ops->read_cr == NULL);
    5138         650 :             fail_if(ops->write_cr == NULL);
    5139         642 :             generate_exception_if(!mode_ring0(), EXC_GP, 0);
    5140         626 :             if ( (rc = ops->read_cr(0, &cr0, ctxt)) )
    5141           0 :                 goto done;
    5142         626 :             if ( ea.type == OP_REG )
    5143         161 :                 cr0w = *ea.reg;
    5144         465 :             else if ( (rc = read_ulong(ea.mem.seg, ea.mem.off,
    5145             :                                        &cr0w, 2, ctxt, ops)) )
    5146          30 :                 goto done;
    5147             :             /* LMSW can: (1) set bits 0-3; (2) clear bits 1-3. */
    5148         596 :             cr0 = (cr0 & ~0xe) | (cr0w & 0xf);
    5149         596 :             if ( (rc = ops->write_cr(0, cr0, ctxt)) )
    5150           8 :                 goto done;
    5151         588 :             break;
    5152             :         case 7: /* invlpg */
    5153         160 :             generate_exception_if(!mode_ring0(), EXC_GP, 0);
    5154         144 :             generate_exception_if(ea.type != OP_MEM, EXC_UD);
    5155         136 :             fail_if(ops->invlpg == NULL);
    5156         128 :             if ( (rc = ops->invlpg(ea.mem.seg, ea.mem.off, ctxt)) )
    5157           8 :                 goto done;
    5158         120 :             break;
    5159             :         default:
    5160           8 :             goto cannot_emulate;
    5161             :         }
    5162        5639 :         break;
    5163             :     }
    5164             : 
    5165             :     case X86EMUL_OPC(0x0f, 0x02): /* lar */
    5166        1597 :         generate_exception_if(!in_protmode(ctxt, ops), EXC_UD);
    5167        1589 :         _regs.eflags &= ~X86_EFLAGS_ZF;
    5168        1589 :         switch ( rc = protmode_load_seg(x86_seg_none, src.val, false, &sreg,
    5169             :                                         ctxt, ops) )
    5170             :         {
    5171             :         case X86EMUL_OKAY:
    5172         764 :             if ( !sreg.attr.fields.s )
    5173             :             {
    5174         489 :                 switch ( sreg.attr.fields.type )
    5175             :                 {
    5176             :                 case 0x01: /* available 16-bit TSS */
    5177             :                 case 0x03: /* busy 16-bit TSS */
    5178             :                 case 0x04: /* 16-bit call gate */
    5179             :                 case 0x05: /* 16/32-bit task gate */
    5180         189 :                     if ( ctxt->lma )
    5181         149 :                         break;
    5182             :                     /* fall through */
    5183             :                 case 0x02: /* LDT */
    5184             :                 case 0x09: /* available 32/64-bit TSS */
    5185             :                 case 0x0b: /* busy 32/64-bit TSS */
    5186             :                 case 0x0c: /* 32/64-bit call gate */
    5187         171 :                     _regs.eflags |= X86_EFLAGS_ZF;
    5188         171 :                     break;
    5189             :                 }
    5190             :             }
    5191             :             else
    5192         275 :                 _regs.eflags |= X86_EFLAGS_ZF;
    5193         764 :             break;
    5194             :         case X86EMUL_EXCEPTION:
    5195         794 :             if ( ctxt->event_pending )
    5196             :             {
    5197           8 :                 ASSERT(ctxt->event.vector == EXC_PF);
    5198             :         default:
    5199          39 :                 goto done;
    5200             :             }
    5201             :             /* Instead of the exception, ZF remains cleared. */
    5202         786 :             rc = X86EMUL_OKAY;
    5203         786 :             break;
    5204             :         }
    5205        1550 :         if ( _regs.eflags & X86_EFLAGS_ZF )
    5206        1338 :             dst.val = ((sreg.attr.bytes & 0xff) << 8) |
    5207         446 :                       ((sreg.limit >> (sreg.attr.fields.g ? 12 : 0)) &
    5208         446 :                        0xf0000) |
    5209         446 :                       ((sreg.attr.bytes & 0xf00) << 12);
    5210             :         else
    5211        1104 :             dst.type = OP_NONE;
    5212        1550 :         break;
    5213             : 
    5214             :     case X86EMUL_OPC(0x0f, 0x03): /* lsl */
    5215        1340 :         generate_exception_if(!in_protmode(ctxt, ops), EXC_UD);
    5216        1332 :         _regs.eflags &= ~X86_EFLAGS_ZF;
    5217        1332 :         switch ( rc = protmode_load_seg(x86_seg_none, src.val, false, &sreg,
    5218             :                                         ctxt, ops) )
    5219             :         {
    5220             :         case X86EMUL_OKAY:
    5221        1052 :             if ( !sreg.attr.fields.s )
    5222             :             {
    5223         963 :                 switch ( sreg.attr.fields.type )
    5224             :                 {
    5225             :                 case 0x01: /* available 16-bit TSS */
    5226             :                 case 0x03: /* busy 16-bit TSS */
    5227          53 :                     if ( ctxt->lma )
    5228           8 :                         break;
    5229             :                     /* fall through */
    5230             :                 case 0x02: /* LDT */
    5231             :                 case 0x09: /* available 32/64-bit TSS */
    5232             :                 case 0x0b: /* busy 32/64-bit TSS */
    5233         430 :                     _regs.eflags |= X86_EFLAGS_ZF;
    5234         430 :                     break;
    5235             :                 }
    5236             :             }
    5237             :             else
    5238          89 :                 _regs.eflags |= X86_EFLAGS_ZF;
    5239        1052 :             break;
    5240             :         case X86EMUL_EXCEPTION:
    5241         280 :             if ( ctxt->event_pending )
    5242             :             {
    5243           8 :                 ASSERT(ctxt->event.vector == EXC_PF);
    5244             :         default:
    5245           8 :                 goto done;
    5246             :             }
    5247             :             /* Instead of the exception, ZF remains cleared. */
    5248         272 :             rc = X86EMUL_OKAY;
    5249         272 :             break;
    5250             :         }
    5251        1324 :         if ( _regs.eflags & X86_EFLAGS_ZF )
    5252         519 :             dst.val = sreg.limit;
    5253             :         else
    5254         805 :             dst.type = OP_NONE;
    5255        1324 :         break;
    5256             : 
    5257             :     case X86EMUL_OPC(0x0f, 0x05): /* syscall */
    5258         508 :         generate_exception_if(!in_protmode(ctxt, ops), EXC_UD);
    5259             : 
    5260             :         /* Inject #UD if syscall/sysret are disabled. */
    5261         500 :         fail_if(ops->read_msr == NULL);
    5262         484 :         if ( (rc = ops->read_msr(MSR_EFER, &msr_val, ctxt)) != X86EMUL_OKAY )
    5263           0 :             goto done;
    5264         484 :         generate_exception_if((msr_val & EFER_SCE) == 0, EXC_UD);
    5265             : 
    5266         459 :         if ( (rc = ops->read_msr(MSR_STAR, &msr_val, ctxt)) != X86EMUL_OKAY )
    5267           0 :             goto done;
    5268             : 
    5269         459 :         cs.sel = (msr_val >> 32) & ~3; /* SELECTOR_RPL_MASK */
    5270         459 :         sreg.sel = cs.sel + 8;
    5271             : 
    5272         459 :         cs.base = sreg.base = 0; /* flat segment */
    5273         459 :         cs.limit = sreg.limit = ~0u;  /* 4GB limit */
    5274         459 :         sreg.attr.bytes = 0xc93; /* G+DB+P+S+Data */
    5275             : 
    5276             : #ifdef __x86_64__
    5277         459 :         if ( ctxt->lma )
    5278             :         {
    5279         194 :             cs.attr.bytes = 0xa9b; /* L+DB+P+S+Code */
    5280             : 
    5281         194 :             _regs.rcx = _regs.rip;
    5282         194 :             _regs.r11 = _regs.eflags & ~X86_EFLAGS_RF;
    5283             : 
    5284         194 :             if ( (rc = ops->read_msr(mode_64bit() ? MSR_LSTAR : MSR_CSTAR,
    5285             :                                      &msr_val, ctxt)) != X86EMUL_OKAY )
    5286           0 :                 goto done;
    5287         194 :             _regs.rip = msr_val;
    5288             : 
    5289         194 :             if ( (rc = ops->read_msr(MSR_SYSCALL_MASK,
    5290             :                                      &msr_val, ctxt)) != X86EMUL_OKAY )
    5291           0 :                 goto done;
    5292         194 :             _regs.eflags &= ~(msr_val | X86_EFLAGS_RF);
    5293             :         }
    5294             :         else
    5295             : #endif
    5296             :         {
    5297         265 :             cs.attr.bytes = 0xc9b; /* G+DB+P+S+Code */
    5298             : 
    5299         265 :             _regs.r(cx) = _regs.eip;
    5300         265 :             _regs.eip = msr_val;
    5301         265 :             _regs.eflags &= ~(X86_EFLAGS_VM | X86_EFLAGS_IF | X86_EFLAGS_RF);
    5302             :         }
    5303             : 
    5304         459 :         fail_if(ops->write_segment == NULL);
    5305         850 :         if ( (rc = ops->write_segment(x86_seg_cs, &cs, ctxt)) ||
    5306         399 :              (rc = ops->write_segment(x86_seg_ss, &sreg, ctxt)) )
    5307             :             goto done;
    5308             : 
    5309             :         /*
    5310             :          * SYSCALL (unlike most instructions) evaluates its singlestep action
    5311             :          * based on the resulting EFLAGS.TF, not the starting EFLAGS.TF.
    5312             :          *
    5313             :          * As the #DB is raised after the CPL change and before the OS can
    5314             :          * switch stack, it is a large risk for privilege escalation.
    5315             :          *
    5316             :          * 64bit kernels should mask EFLAGS.TF in MSR_SYSCALL_MASK to avoid any
    5317             :          * vulnerability.  Running the #DB handler on an IST stack is also a
    5318             :          * mitigation.
    5319             :          *
    5320             :          * 32bit kernels have no ability to mask EFLAGS.TF at all.
    5321             :          * Their only mitigation is to use a task gate for handling
    5322             :          * #DB (or to not use enable EFER.SCE to start with).
    5323             :          */
    5324         399 :         singlestep = _regs.eflags & X86_EFLAGS_TF;
    5325         399 :         break;
    5326             : 
    5327             :     case X86EMUL_OPC(0x0f, 0x06): /* clts */
    5328        1420 :         generate_exception_if(!mode_ring0(), EXC_GP, 0);
    5329        1404 :         fail_if((ops->read_cr == NULL) || (ops->write_cr == NULL));
    5330        2776 :         if ( (rc = ops->read_cr(0, &dst.val, ctxt)) != X86EMUL_OKAY ||
    5331        1388 :              (rc = ops->write_cr(0, dst.val & ~X86_CR0_TS, ctxt)) != X86EMUL_OKAY )
    5332             :             goto done;
    5333        1372 :         break;
    5334             : 
    5335             :     case X86EMUL_OPC(0x0f, 0x08): /* invd */
    5336             :     case X86EMUL_OPC(0x0f, 0x09): /* wbinvd */
    5337        1158 :         generate_exception_if(!mode_ring0(), EXC_GP, 0);
    5338        1142 :         fail_if(ops->wbinvd == NULL);
    5339        1134 :         if ( (rc = ops->wbinvd(ctxt)) != 0 )
    5340           8 :             goto done;
    5341        1126 :         break;
    5342             : 
    5343             :     case X86EMUL_OPC(0x0f, 0x0b): /* ud2 */
    5344             :     case X86EMUL_OPC(0x0f, 0xb9): /* ud1 */
    5345             :     case X86EMUL_OPC(0x0f, 0xff): /* ud0 */
    5346          16 :         generate_exception(EXC_UD);
    5347             : 
    5348             :     case X86EMUL_OPC(0x0f, 0x0d): /* GrpP (prefetch) */
    5349             :     case X86EMUL_OPC(0x0f, 0x18): /* Grp16 (prefetch/nop) */
    5350             :     case X86EMUL_OPC(0x0f, 0x19) ... X86EMUL_OPC(0x0f, 0x1f): /* nop */
    5351         818 :         break;
    5352             : 
    5353             : #define CASE_SIMD_PACKED_INT(pfx, opc)       \
    5354             :     case X86EMUL_OPC(pfx, opc):              \
    5355             :     case X86EMUL_OPC_66(pfx, opc)
    5356             : #define CASE_SIMD_SINGLE_FP(kind, pfx, opc)  \
    5357             :     case X86EMUL_OPC##kind(pfx, opc):        \
    5358             :     case X86EMUL_OPC##kind##_F3(pfx, opc)
    5359             : #define CASE_SIMD_DOUBLE_FP(kind, pfx, opc)  \
    5360             :     case X86EMUL_OPC##kind##_66(pfx, opc):   \
    5361             :     case X86EMUL_OPC##kind##_F2(pfx, opc)
    5362             : #define CASE_SIMD_ALL_FP(kind, pfx, opc)     \
    5363             :     CASE_SIMD_SINGLE_FP(kind, pfx, opc):     \
    5364             :     CASE_SIMD_DOUBLE_FP(kind, pfx, opc)
    5365             : #define CASE_SIMD_PACKED_FP(kind, pfx, opc)  \
    5366             :     case X86EMUL_OPC##kind(pfx, opc):        \
    5367             :     case X86EMUL_OPC##kind##_66(pfx, opc)
    5368             : #define CASE_SIMD_SCALAR_FP(kind, pfx, opc)  \
    5369             :     case X86EMUL_OPC##kind##_F3(pfx, opc):   \
    5370             :     case X86EMUL_OPC##kind##_F2(pfx, opc)
    5371             : 
    5372             :     CASE_SIMD_SCALAR_FP(, 0x0f, 0x2b):     /* movnts{s,d} xmm,mem */
    5373           8 :         host_and_vcpu_must_have(sse4a);
    5374             :         /* fall through */
    5375             :     CASE_SIMD_PACKED_FP(, 0x0f, 0x2b):     /* movntp{s,d} xmm,m128 */
    5376             :     CASE_SIMD_PACKED_FP(_VEX, 0x0f, 0x2b): /* vmovntp{s,d} {x,y}mm,mem */
    5377         859 :         generate_exception_if(ea.type != OP_MEM, EXC_UD);
    5378         821 :         sfence = true;
    5379             :         /* fall through */
    5380             :     CASE_SIMD_ALL_FP(, 0x0f, 0x10):        /* mov{up,s}{s,d} xmm/mem,xmm */
    5381             :     CASE_SIMD_PACKED_FP(_VEX, 0x0f, 0x10): /* vmovup{s,d} {x,y}mm/mem,{x,y}mm */
    5382             :     CASE_SIMD_SCALAR_FP(_VEX, 0x0f, 0x10): /* vmovs{s,d} mem,xmm */
    5383             :                                            /* vmovs{s,d} xmm,xmm,xmm */
    5384             :     CASE_SIMD_ALL_FP(, 0x0f, 0x11):        /* mov{up,s}{s,d} xmm,xmm/mem */
    5385             :     CASE_SIMD_PACKED_FP(_VEX, 0x0f, 0x11): /* vmovup{s,d} {x,y}mm,{x,y}mm/mem */
    5386             :     CASE_SIMD_SCALAR_FP(_VEX, 0x0f, 0x11): /* vmovs{s,d} xmm,mem */
    5387             :                                            /* vmovs{s,d} xmm,xmm,xmm */
    5388             :     CASE_SIMD_PACKED_FP(, 0x0f, 0x14):     /* unpcklp{s,d} xmm/m128,xmm */
    5389             :     CASE_SIMD_PACKED_FP(_VEX, 0x0f, 0x14): /* vunpcklp{s,d} {x,y}mm/mem,{x,y}mm,{x,y}mm */
    5390             :     CASE_SIMD_PACKED_FP(, 0x0f, 0x15):     /* unpckhp{s,d} xmm/m128,xmm */
    5391             :     CASE_SIMD_PACKED_FP(_VEX, 0x0f, 0x15): /* vunpckhp{s,d} {x,y}mm/mem,{x,y}mm,{x,y}mm */
    5392             :     CASE_SIMD_PACKED_FP(, 0x0f, 0x28):     /* movap{s,d} xmm/m128,xmm */
    5393             :     CASE_SIMD_PACKED_FP(_VEX, 0x0f, 0x28): /* vmovap{s,d} {x,y}mm/mem,{x,y}mm */
    5394             :     CASE_SIMD_PACKED_FP(, 0x0f, 0x29):     /* movap{s,d} xmm,xmm/m128 */
    5395             :     CASE_SIMD_PACKED_FP(_VEX, 0x0f, 0x29): /* vmovap{s,d} {x,y}mm,{x,y}mm/mem */
    5396             :     CASE_SIMD_ALL_FP(, 0x0f, 0x51):        /* sqrt{p,s}{s,d} xmm/mem,xmm */
    5397             :     CASE_SIMD_ALL_FP(_VEX, 0x0f, 0x51):    /* vsqrtp{s,d} {x,y}mm/mem,{x,y}mm */
    5398             :                                            /* vsqrts{s,d} xmm/m32,xmm,xmm */
    5399             :     CASE_SIMD_SINGLE_FP(, 0x0f, 0x52):     /* rsqrt{p,s}s xmm/mem,xmm */
    5400             :     CASE_SIMD_SINGLE_FP(_VEX, 0x0f, 0x52): /* vrsqrtps {x,y}mm/mem,{x,y}mm */
    5401             :                                            /* vrsqrtss xmm/m32,xmm,xmm */
    5402             :     CASE_SIMD_SINGLE_FP(, 0x0f, 0x53):     /* rcp{p,s}s xmm/mem,xmm */
    5403             :     CASE_SIMD_SINGLE_FP(_VEX, 0x0f, 0x53): /* vrcpps {x,y}mm/mem,{x,y}mm */
    5404             :                                            /* vrcpss xmm/m32,xmm,xmm */
    5405             :     CASE_SIMD_PACKED_FP(, 0x0f, 0x54):     /* andp{s,d} xmm/m128,xmm */
    5406             :     CASE_SIMD_PACKED_FP(_VEX, 0x0f, 0x54): /* vandp{s,d} {x,y}mm/mem,{x,y}mm,{x,y}mm */
    5407             :     CASE_SIMD_PACKED_FP(, 0x0f, 0x55):     /* andnp{s,d} xmm/m128,xmm */
    5408             :     CASE_SIMD_PACKED_FP(_VEX, 0x0f, 0x55): /* vandnp{s,d} {x,y}mm/mem,{x,y}mm,{x,y}mm */
    5409             :     CASE_SIMD_PACKED_FP(, 0x0f, 0x56):     /* orp{s,d} xmm/m128,xmm */
    5410             :     CASE_SIMD_PACKED_FP(_VEX, 0x0f, 0x56): /* vorp{s,d} {x,y}mm/mem,{x,y}mm,{x,y}mm */
    5411             :     CASE_SIMD_PACKED_FP(, 0x0f, 0x57):     /* xorp{s,d} xmm/m128,xmm */
    5412             :     CASE_SIMD_PACKED_FP(_VEX, 0x0f, 0x57): /* vxorp{s,d} {x,y}mm/mem,{x,y}mm,{x,y}mm */
    5413             :     CASE_SIMD_ALL_FP(, 0x0f, 0x58):        /* add{p,s}{s,d} xmm/mem,xmm */
    5414             :     CASE_SIMD_ALL_FP(_VEX, 0x0f, 0x58):    /* vadd{p,s}{s,d} {x,y}mm/mem,{x,y}mm,{x,y}mm */
    5415             :     CASE_SIMD_ALL_FP(, 0x0f, 0x59):        /* mul{p,s}{s,d} xmm/mem,xmm */
    5416             :     CASE_SIMD_ALL_FP(_VEX, 0x0f, 0x59):    /* vmul{p,s}{s,d} {x,y}mm/mem,{x,y}mm,{x,y}mm */
    5417             :     CASE_SIMD_ALL_FP(, 0x0f, 0x5c):        /* sub{p,s}{s,d} xmm/mem,xmm */
    5418             :     CASE_SIMD_ALL_FP(_VEX, 0x0f, 0x5c):    /* vsub{p,s}{s,d} {x,y}mm/mem,{x,y}mm,{x,y}mm */
    5419             :     CASE_SIMD_ALL_FP(, 0x0f, 0x5d):        /* min{p,s}{s,d} xmm/mem,xmm */
    5420             :     CASE_SIMD_ALL_FP(_VEX, 0x0f, 0x5d):    /* vmin{p,s}{s,d} {x,y}mm/mem,{x,y}mm,{x,y}mm */
    5421             :     CASE_SIMD_ALL_FP(, 0x0f, 0x5e):        /* div{p,s}{s,d} xmm/mem,xmm */
    5422             :     CASE_SIMD_ALL_FP(_VEX, 0x0f, 0x5e):    /* vdiv{p,s}{s,d} {x,y}mm/mem,{x,y}mm,{x,y}mm */
    5423             :     CASE_SIMD_ALL_FP(, 0x0f, 0x5f):        /* max{p,s}{s,d} xmm/mem,xmm */
    5424             :     CASE_SIMD_ALL_FP(_VEX, 0x0f, 0x5f):    /* vmax{p,s}{s,d} {x,y}mm/mem,{x,y}mm,{x,y}mm */
    5425             :     simd_0f_fp:
    5426        2990 :         if ( vex.opcx == vex_none )
    5427             :         {
    5428        2784 :             if ( vex.pfx & VEX_PREFIX_DOUBLE_MASK )
    5429             :             {
    5430             :     simd_0f_sse2:
    5431         579 :                 vcpu_must_have(sse2);
    5432             :             }
    5433             :             else
    5434        2429 :                 vcpu_must_have(sse);
    5435             :     simd_0f_xmm:
    5436        3048 :             get_fpu(X86EMUL_FPU_xmm, &fic);
    5437             :         }
    5438             :         else
    5439             :         {
    5440             :             /* vmovs{s,d} to/from memory have only two operands. */
    5441         206 :             if ( (b & ~1) == 0x10 && ea.type == OP_MEM )
    5442           0 :                 d |= TwoOp;
    5443             :     simd_0f_avx:
    5444         919 :             host_and_vcpu_must_have(avx);
    5445             :     simd_0f_ymm:
    5446         942 :             get_fpu(X86EMUL_FPU_ymm, &fic);
    5447             :         }
    5448             :     simd_0f_common:
    5449        6528 :         opc = init_prefixes(stub);
    5450        6528 :         opc[0] = b;
    5451        6528 :         opc[1] = modrm;
    5452        6528 :         if ( ea.type == OP_MEM )
    5453             :         {
    5454             :             /* convert memory operand to (%rAX) */
    5455        5961 :             rex_prefix &= ~REX_B;
    5456        5961 :             vex.b = 1;
    5457        5961 :             opc[1] &= 0x38;
    5458             :         }
    5459        6528 :         fic.insn_bytes = PFX_BYTES + 2;
    5460        6528 :         break;
    5461             : 
    5462             :     case X86EMUL_OPC_66(0x0f, 0x12):       /* movlpd m64,xmm */
    5463             :     case X86EMUL_OPC_VEX_66(0x0f, 0x12):   /* vmovlpd m64,xmm,xmm */
    5464             :     CASE_SIMD_PACKED_FP(, 0x0f, 0x13):     /* movlp{s,d} xmm,m64 */
    5465             :     CASE_SIMD_PACKED_FP(_VEX, 0x0f, 0x13): /* vmovlp{s,d} xmm,m64 */
    5466             :     case X86EMUL_OPC_66(0x0f, 0x16):       /* movhpd m64,xmm */
    5467             :     case X86EMUL_OPC_VEX_66(0x0f, 0x16):   /* vmovhpd m64,xmm,xmm */
    5468             :     CASE_SIMD_PACKED_FP(, 0x0f, 0x17):     /* movhp{s,d} xmm,m64 */
    5469             :     CASE_SIMD_PACKED_FP(_VEX, 0x0f, 0x17): /* vmovhp{s,d} xmm,m64 */
    5470          39 :         generate_exception_if(ea.type != OP_MEM, EXC_UD);
    5471             :         /* fall through */
    5472             :     case X86EMUL_OPC(0x0f, 0x12):          /* movlps m64,xmm */
    5473             :                                            /* movhlps xmm,xmm */
    5474             :     case X86EMUL_OPC_VEX(0x0f, 0x12):      /* vmovlps m64,xmm,xmm */
    5475             :                                            /* vmovhlps xmm,xmm,xmm */
    5476             :     case X86EMUL_OPC(0x0f, 0x16):          /* movhps m64,xmm */
    5477             :                                            /* movlhps xmm,xmm */
    5478             :     case X86EMUL_OPC_VEX(0x0f, 0x16):      /* vmovhps m64,xmm,xmm */
    5479             :                                            /* vmovlhps xmm,xmm,xmm */
    5480          69 :         generate_exception_if(vex.l, EXC_UD);
    5481          61 :         if ( (d & DstMask) != DstMem )
    5482          53 :             d &= ~TwoOp;
    5483          61 :         op_bytes = 8;
    5484          61 :         goto simd_0f_fp;
    5485             : 
    5486             :     case X86EMUL_OPC_F3(0x0f, 0x12):       /* movsldup xmm/m128,xmm */
    5487             :     case X86EMUL_OPC_VEX_F3(0x0f, 0x12):   /* vmovsldup {x,y}mm/mem,{x,y}mm */
    5488             :     case X86EMUL_OPC_F2(0x0f, 0x12):       /* movddup xmm/m64,xmm */
    5489             :     case X86EMUL_OPC_VEX_F2(0x0f, 0x12):   /* vmovddup {x,y}mm/mem,{x,y}mm */
    5490             :     case X86EMUL_OPC_F3(0x0f, 0x16):       /* movshdup xmm/m128,xmm */
    5491             :     case X86EMUL_OPC_VEX_F3(0x0f, 0x16):   /* vmovshdup {x,y}mm/mem,{x,y}mm */
    5492          48 :         d |= TwoOp;
    5493          64 :         op_bytes = !(vex.pfx & VEX_PREFIX_DOUBLE_MASK) || vex.l
    5494          16 :                    ? 16 << vex.l : 8;
    5495             :     simd_0f_sse3_avx:
    5496          88 :         if ( vex.opcx != vex_none )
    5497          48 :             goto simd_0f_avx;
    5498          40 :         host_and_vcpu_must_have(sse3);
    5499          40 :         goto simd_0f_xmm;
    5500             : 
    5501             :     case X86EMUL_OPC(0x0f, 0x20): /* mov cr,reg */
    5502             :     case X86EMUL_OPC(0x0f, 0x21): /* mov dr,reg */
    5503             :     case X86EMUL_OPC(0x0f, 0x22): /* mov reg,cr */
    5504             :     case X86EMUL_OPC(0x0f, 0x23): /* mov reg,dr */
    5505         971 :         generate_exception_if(!mode_ring0(), EXC_GP, 0);
    5506         939 :         if ( b & 2 )
    5507             :         {
    5508             :             /* Write to CR/DR. */
    5509        1502 :             typeof(ops->write_cr) write = (b & 1) ? ops->write_dr
    5510         751 :                                                   : ops->write_cr;
    5511             : 
    5512         751 :             fail_if(!write);
    5513         709 :             rc = write(modrm_reg, src.val, ctxt);
    5514             :         }
    5515             :         else
    5516             :         {
    5517             :             /* Read from CR/DR. */
    5518         188 :             typeof(ops->read_cr) read = (b & 1) ? ops->read_dr : ops->read_cr;
    5519             : 
    5520         188 :             fail_if(!read);
    5521         177 :             rc = read(modrm_reg, &dst.val, ctxt);
    5522             :         }
    5523         886 :         if ( rc != X86EMUL_OKAY )
    5524          16 :             goto done;
    5525         870 :         break;
    5526             : 
    5527             :     case X86EMUL_OPC_66(0x0f, 0x2a):       /* cvtpi2pd mm/m64,xmm */
    5528          16 :         if ( ea.type == OP_REG )
    5529             :         {
    5530             :     case X86EMUL_OPC(0x0f, 0x2a):          /* cvtpi2ps mm/m64,xmm */
    5531             :     CASE_SIMD_PACKED_FP(, 0x0f, 0x2c):     /* cvttp{s,d}2pi xmm/mem,mm */
    5532             :     CASE_SIMD_PACKED_FP(, 0x0f, 0x2d):     /* cvtp{s,d}2pi xmm/mem,mm */
    5533         223 :             host_and_vcpu_must_have(mmx);
    5534             :         }
    5535         231 :         op_bytes = (b & 4) && (vex.pfx & VEX_PREFIX_DOUBLE_MASK) ? 16 : 8;
    5536         231 :         goto simd_0f_fp;
    5537             : 
    5538             :     CASE_SIMD_SCALAR_FP(, 0x0f, 0x2a):     /* cvtsi2s{s,d} r/m,xmm */
    5539             :     CASE_SIMD_SCALAR_FP(_VEX, 0x0f, 0x2a): /* vcvtsi2s{s,d} r/m,xmm,xmm */
    5540          84 :         if ( vex.opcx == vex_none )
    5541             :         {
    5542          16 :             if ( vex.pfx & VEX_PREFIX_DOUBLE_MASK )
    5543           8 :                 vcpu_must_have(sse2);
    5544             :             else
    5545           8 :                 vcpu_must_have(sse);
    5546          16 :             get_fpu(X86EMUL_FPU_xmm, &fic);
    5547             :         }
    5548             :         else
    5549             :         {
    5550          68 :             host_and_vcpu_must_have(avx);
    5551          68 :             get_fpu(X86EMUL_FPU_ymm, &fic);
    5552             :         }
    5553             : 
    5554          60 :         if ( ea.type == OP_MEM )
    5555             :         {
    5556          52 :             rc = read_ulong(ea.mem.seg, ea.mem.off, &src.val,
    5557          52 :                             rex_prefix & REX_W ? 8 : 4, ctxt, ops);
    5558          52 :             if ( rc != X86EMUL_OKAY )
    5559           8 :                 goto done;
    5560             :         }
    5561             :         else
    5562           8 :             src.val = rex_prefix & REX_W ? *ea.reg : (uint32_t)*ea.reg;
    5563             : 
    5564          52 :         state->simd_size = simd_none;
    5565          52 :         goto simd_0f_rm;
    5566             : 
    5567             :     CASE_SIMD_SCALAR_FP(, 0x0f, 0x2c):     /* cvtts{s,d}2si xmm/mem,reg */
    5568             :     CASE_SIMD_SCALAR_FP(_VEX, 0x0f, 0x2c): /* vcvtts{s,d}2si xmm/mem,reg */
    5569             :     CASE_SIMD_SCALAR_FP(, 0x0f, 0x2d):     /* cvts{s,d}2si xmm/mem,reg */
    5570             :     CASE_SIMD_SCALAR_FP(_VEX, 0x0f, 0x2d): /* vcvts{s,d}2si xmm/mem,reg */
    5571         317 :         if ( vex.opcx == vex_none )
    5572             :         {
    5573          24 :             if ( vex.pfx & VEX_PREFIX_DOUBLE_MASK )
    5574           8 :                 vcpu_must_have(sse2);
    5575             :             else
    5576          16 :                 vcpu_must_have(sse);
    5577          24 :             get_fpu(X86EMUL_FPU_xmm, &fic);
    5578             :         }
    5579             :         else
    5580             :         {
    5581         293 :             if ( ctxt->vendor == X86_VENDOR_AMD )
    5582           0 :                 vex.l = 0;
    5583         293 :             generate_exception_if(vex.l || vex.reg != 0xf, EXC_UD);
    5584         285 :             host_and_vcpu_must_have(avx);
    5585         285 :             get_fpu(X86EMUL_FPU_ymm, &fic);
    5586             :         }
    5587             : 
    5588         285 :         opc = init_prefixes(stub);
    5589         285 :         opc[0] = b;
    5590             :         /* Convert GPR destination to %rAX and memory operand to (%rCX). */
    5591         285 :         rex_prefix &= ~REX_R;
    5592         285 :         vex.r = 1;
    5593         285 :         if ( ea.type == OP_MEM )
    5594             :         {
    5595         158 :             rex_prefix &= ~REX_B;
    5596         158 :             vex.b = 1;
    5597         158 :             opc[1] = 0x01;
    5598             : 
    5599         316 :             rc = ops->read(ea.mem.seg, ea.mem.off, mmvalp,
    5600         158 :                            vex.pfx & VEX_PREFIX_DOUBLE_MASK ? 8 : 4, ctxt);
    5601         158 :             if ( rc != X86EMUL_OKAY )
    5602           8 :                 goto done;
    5603             :         }
    5604             :         else
    5605         127 :             opc[1] = modrm & 0xc7;
    5606         277 :         if ( !mode_64bit() )
    5607         183 :             vex.w = 0;
    5608         277 :         fic.insn_bytes = PFX_BYTES + 2;
    5609         277 :         opc[2] = 0xc3;
    5610             : 
    5611         277 :         copy_REX_VEX(opc, rex_prefix, vex);
    5612         277 :         ea.reg = decode_register(modrm_reg, &_regs, 0);
    5613         277 :         invoke_stub("", "", "=a" (*ea.reg), "+m" (fic.exn_raised)
    5614             :                             : "c" (mmvalp), "m" (*mmvalp));
    5615             : 
    5616         277 :         put_stub(stub);
    5617         277 :         check_xmm_exn(&fic);
    5618             : 
    5619         277 :         state->simd_size = simd_none;
    5620         277 :         break;
    5621             : 
    5622             :     CASE_SIMD_PACKED_FP(, 0x0f, 0x2e):     /* ucomis{s,d} xmm/mem,xmm */
    5623             :     CASE_SIMD_PACKED_FP(_VEX, 0x0f, 0x2e): /* vucomis{s,d} xmm/mem,xmm */
    5624             :     CASE_SIMD_PACKED_FP(, 0x0f, 0x2f):     /* comis{s,d} xmm/mem,xmm */
    5625             :     CASE_SIMD_PACKED_FP(_VEX, 0x0f, 0x2f): /* vcomis{s,d} xmm/mem,xmm */
    5626         281 :         if ( vex.opcx == vex_none )
    5627             :         {
    5628          97 :             if ( vex.pfx )
    5629          16 :                 vcpu_must_have(sse2);
    5630             :             else
    5631          81 :                 vcpu_must_have(sse);
    5632          97 :             get_fpu(X86EMUL_FPU_xmm, &fic);
    5633             :         }
    5634             :         else
    5635             :         {
    5636         184 :             generate_exception_if(vex.reg != 0xf, EXC_UD);
    5637         176 :             host_and_vcpu_must_have(avx);
    5638         176 :             get_fpu(X86EMUL_FPU_ymm, &fic);
    5639             :         }
    5640             : 
    5641         265 :         opc = init_prefixes(stub);
    5642         265 :         opc[0] = b;
    5643         265 :         opc[1] = modrm;
    5644         265 :         if ( ea.type == OP_MEM )
    5645             :         {
    5646         241 :             rc = ops->read(ea.mem.seg, ea.mem.off, mmvalp, vex.pfx ? 8 : 4,
    5647             :                            ctxt);
    5648         241 :             if ( rc != X86EMUL_OKAY )
    5649           9 :                 goto done;
    5650             : 
    5651             :             /* Convert memory operand to (%rAX). */
    5652         232 :             rex_prefix &= ~REX_B;
    5653         232 :             vex.b = 1;
    5654         232 :             opc[1] &= 0x38;
    5655             :         }
    5656         256 :         fic.insn_bytes = PFX_BYTES + 2;
    5657         256 :         opc[2] = 0xc3;
    5658             : 
    5659         256 :         copy_REX_VEX(opc, rex_prefix, vex);
    5660         256 :         invoke_stub(_PRE_EFLAGS("[eflags]", "[mask]", "[tmp]"),
    5661             :                     _POST_EFLAGS("[eflags]", "[mask]", "[tmp]"),
    5662             :                     [eflags] "+g" (_regs.eflags),
    5663             :                     [tmp] "=&r" (dummy), "+m" (*mmvalp),
    5664             :                     "+m" (fic.exn_raised)
    5665             :                     : [func] "rm" (stub.func), "a" (mmvalp),
    5666             :                       [mask] "i" (EFLAGS_MASK));
    5667             : 
    5668         256 :         put_stub(stub);
    5669         256 :         check_xmm_exn(&fic);
    5670             : 
    5671         256 :         ASSERT(!state->simd_size);
    5672         256 :         break;
    5673             : 
    5674             :     case X86EMUL_OPC(0x0f, 0x30): /* wrmsr */
    5675         145 :         generate_exception_if(!mode_ring0(), EXC_GP, 0);
    5676         129 :         fail_if(ops->write_msr == NULL);
    5677         242 :         if ( (rc = ops->write_msr(_regs.ecx,
    5678         121 :                                   ((uint64_t)_regs.r(dx) << 32) | _regs.eax,
    5679             :                                   ctxt)) != 0 )
    5680          32 :             goto done;
    5681          89 :         break;
    5682             : 
    5683             :     case X86EMUL_OPC(0x0f, 0x31): rdtsc: /* rdtsc */
    5684          60 :         if ( !mode_ring0() )
    5685             :         {
    5686          32 :             fail_if(ops->read_cr == NULL);
    5687          24 :             if ( (rc = ops->read_cr(4, &cr4, ctxt)) )
    5688           0 :                 goto done;
    5689          24 :             generate_exception_if(cr4 & X86_CR4_TSD, EXC_GP, 0);
    5690             :         }
    5691          36 :         fail_if(ops->read_msr == NULL);
    5692          28 :         if ( (rc = ops->read_msr(MSR_IA32_TSC,
    5693             :                                  &msr_val, ctxt)) != X86EMUL_OKAY )
    5694          16 :             goto done;
    5695          12 :         _regs.r(dx) = msr_val >> 32;
    5696          12 :         _regs.r(ax) = (uint32_t)msr_val;
    5697          12 :         break;
    5698             : 
    5699             :     case X86EMUL_OPC(0x0f, 0x32): /* rdmsr */
    5700         168 :         generate_exception_if(!mode_ring0(), EXC_GP, 0);
    5701         144 :         fail_if(ops->read_msr == NULL);
    5702         144 :         if ( (rc = ops->read_msr(_regs.ecx, &msr_val, ctxt)) != X86EMUL_OKAY )
    5703           0 :             goto done;
    5704         144 :         _regs.r(dx) = msr_val >> 32;
    5705         144 :         _regs.r(ax) = (uint32_t)msr_val;
    5706         144 :         break;
    5707             : 
    5708             :     case X86EMUL_OPC(0x0f, 0x40) ... X86EMUL_OPC(0x0f, 0x4f): /* cmovcc */
    5709        8730 :         vcpu_must_have(cmov);
    5710        8730 :         if ( test_cc(b, _regs.eflags) )
    5711        5120 :             dst.val = src.val;
    5712        8730 :         break;
    5713             : 
    5714             :     case X86EMUL_OPC(0x0f, 0x34): /* sysenter */
    5715         830 :         vcpu_must_have(sep);
    5716         830 :         generate_exception_if(mode_ring0(), EXC_GP, 0);
    5717         806 :         generate_exception_if(!in_protmode(ctxt, ops), EXC_GP, 0);
    5718             : 
    5719         798 :         fail_if(ops->read_msr == NULL);
    5720         790 :         if ( (rc = ops->read_msr(MSR_IA32_SYSENTER_CS,
    5721             :                                  &msr_val, ctxt)) != X86EMUL_OKAY )
    5722           0 :             goto done;
    5723             : 
    5724         790 :         generate_exception_if(!(msr_val & 0xfffc), EXC_GP, 0);
    5725             : 
    5726         782 :         _regs.eflags &= ~(X86_EFLAGS_VM | X86_EFLAGS_IF | X86_EFLAGS_RF);
    5727             : 
    5728         782 :         cs.sel = msr_val & ~3; /* SELECTOR_RPL_MASK */
    5729         782 :         cs.base = 0;   /* flat segment */
    5730         782 :         cs.limit = ~0u;  /* 4GB limit */
    5731         782 :         cs.attr.bytes = ctxt->lma ? 0xa9b  /* G+L+P+S+Code */
    5732             :                                   : 0xc9b; /* G+DB+P+S+Code */
    5733             : 
    5734         782 :         sreg.sel = cs.sel + 8;
    5735         782 :         sreg.base = 0;   /* flat segment */
    5736         782 :         sreg.limit = ~0u;  /* 4GB limit */
    5737         782 :         sreg.attr.bytes = 0xc93; /* G+DB+P+S+Data */
    5738             : 
    5739         782 :         fail_if(ops->write_segment == NULL);
    5740        1540 :         if ( (rc = ops->write_segment(x86_seg_cs, &cs, ctxt)) != 0 ||
    5741         766 :              (rc = ops->write_segment(x86_seg_ss, &sreg, ctxt)) != 0 )
    5742             :             goto done;
    5743             : 
    5744         766 :         if ( (rc = ops->read_msr(MSR_IA32_SYSENTER_EIP,
    5745             :                                  &msr_val, ctxt)) != X86EMUL_OKAY )
    5746           0 :             goto done;
    5747         766 :         _regs.r(ip) = ctxt->lma ? msr_val : (uint32_t)msr_val;
    5748             : 
    5749         766 :         if ( (rc = ops->read_msr(MSR_IA32_SYSENTER_ESP,
    5750             :                                  &msr_val, ctxt)) != X86EMUL_OKAY )
    5751           0 :             goto done;
    5752         766 :         _regs.r(sp) = ctxt->lma ? msr_val : (uint32_t)msr_val;
    5753             : 
    5754         766 :         singlestep = _regs.eflags & X86_EFLAGS_TF;
    5755         766 :         break;
    5756             : 
    5757             :     case X86EMUL_OPC(0x0f, 0x35): /* sysexit */
    5758         830 :         vcpu_must_have(sep);
    5759         830 :         generate_exception_if(!mode_ring0(), EXC_GP, 0);
    5760         805 :         generate_exception_if(!in_protmode(ctxt, ops), EXC_GP, 0);
    5761             : 
    5762         797 :         fail_if(ops->read_msr == NULL);
    5763         789 :         if ( (rc = ops->read_msr(MSR_IA32_SYSENTER_CS,
    5764             :                                  &msr_val, ctxt)) != X86EMUL_OKAY )
    5765           0 :             goto done;
    5766             : 
    5767         789 :         generate_exception_if(!(msr_val & 0xfffc), EXC_GP, 0);
    5768         781 :         generate_exception_if(op_bytes == 8 &&
    5769             :                               (!is_canonical_address(_regs.r(dx)) ||
    5770             :                                !is_canonical_address(_regs.r(cx))),
    5771             :                               EXC_GP, 0);
    5772             : 
    5773        1546 :         cs.sel = (msr_val | 3) + /* SELECTOR_RPL_MASK */
    5774         773 :                  (op_bytes == 8 ? 32 : 16);
    5775         773 :         cs.base = 0;   /* flat segment */
    5776         773 :         cs.limit = ~0u;  /* 4GB limit */
    5777         773 :         cs.attr.bytes = op_bytes == 8 ? 0xafb  /* L+DB+P+DPL3+S+Code */
    5778             :                                       : 0xcfb; /* G+DB+P+DPL3+S+Code */
    5779             : 
    5780         773 :         sreg.sel = cs.sel + 8;
    5781         773 :         sreg.base = 0;   /* flat segment */
    5782         773 :         sreg.limit = ~0u;  /* 4GB limit */
    5783         773 :         sreg.attr.bytes = 0xcf3; /* G+DB+P+DPL3+S+Data */
    5784             : 
    5785         773 :         fail_if(ops->write_segment == NULL);
    5786        1522 :         if ( (rc = ops->write_segment(x86_seg_cs, &cs, ctxt)) != 0 ||
    5787         749 :              (rc = ops->write_segment(x86_seg_ss, &sreg, ctxt)) != 0 )
    5788             :             goto done;
    5789             : 
    5790         743 :         _regs.r(ip) = op_bytes == 8 ? _regs.r(dx) : _regs.edx;
    5791         743 :         _regs.r(sp) = op_bytes == 8 ? _regs.r(cx) : _regs.ecx;
    5792             : 
    5793         743 :         singlestep = _regs.eflags & X86_EFLAGS_TF;
    5794         743 :         break;
    5795             : 
    5796             :     CASE_SIMD_PACKED_FP(, 0x0f, 0x50):     /* movmskp{s,d} xmm,reg */
    5797             :     CASE_SIMD_PACKED_FP(_VEX, 0x0f, 0x50): /* vmovmskp{s,d} {x,y}mm,reg */
    5798             :     CASE_SIMD_PACKED_INT(0x0f, 0xd7):      /* pmovmskb {,x}mm,reg */
    5799             :     case X86EMUL_OPC_VEX_66(0x0f, 0xd7):   /* vpmovmskb {x,y}mm,reg */
    5800          94 :         opc = init_prefixes(stub);
    5801          94 :         opc[0] = b;
    5802             :         /* Convert GPR destination to %rAX. */
    5803          94 :         rex_prefix &= ~REX_R;
    5804          94 :         vex.r = 1;
    5805          94 :         if ( !mode_64bit() )
    5806          54 :             vex.w = 0;
    5807          94 :         opc[1] = modrm & 0xc7;
    5808          94 :         fic.insn_bytes = PFX_BYTES + 2;
    5809             :     simd_0f_to_gpr:
    5810         262 :         opc[fic.insn_bytes - PFX_BYTES] = 0xc3;
    5811             : 
    5812         262 :         generate_exception_if(ea.type != OP_REG, EXC_UD);
    5813             : 
    5814         244 :         if ( vex.opcx == vex_none )
    5815             :         {
    5816         196 :             if ( vex.pfx & VEX_PREFIX_DOUBLE_MASK )
    5817          16 :                 vcpu_must_have(sse2);
    5818             :             else
    5819             :             {
    5820         180 :                 if ( b != 0x50 )
    5821             :                 {
    5822         158 :                     host_and_vcpu_must_have(mmx);
    5823         158 :                     vcpu_must_have(mmxext);
    5824             :                 }
    5825             :                 else
    5826          22 :                     vcpu_must_have(sse);
    5827             :             }
    5828         196 :             if ( b == 0x50 || (vex.pfx & VEX_PREFIX_DOUBLE_MASK) )
    5829          38 :                 get_fpu(X86EMUL_FPU_xmm, &fic);
    5830             :             else
    5831         158 :                 get_fpu(X86EMUL_FPU_mmx, &fic);
    5832             :         }
    5833             :         else
    5834             :         {
    5835          48 :             generate_exception_if(vex.reg != 0xf, EXC_UD);
    5836          40 :             if ( b == 0x50 || !vex.l )
    5837          16 :                 host_and_vcpu_must_have(avx);
    5838             :             else
    5839          24 :                 host_and_vcpu_must_have(avx2);
    5840          40 :             get_fpu(X86EMUL_FPU_ymm, &fic);
    5841             :         }
    5842             : 
    5843         198 :         copy_REX_VEX(opc, rex_prefix, vex);
    5844         198 :         invoke_stub("", "", "=a" (dst.val) : [dummy] "i" (0));
    5845             : 
    5846         198 :         put_stub(stub);
    5847         198 :         check_xmm_exn(&fic);
    5848             : 
    5849         198 :         ASSERT(!state->simd_size);
    5850         198 :         dst.bytes = 4;
    5851         198 :         break;
    5852             : 
    5853             :     CASE_SIMD_ALL_FP(, 0x0f, 0x5a):        /* cvt{p,s}{s,d}2{p,s}{s,d} xmm/mem,xmm */
    5854             :     CASE_SIMD_ALL_FP(_VEX, 0x0f, 0x5a):    /* vcvtp{s,d}2p{s,d} xmm/mem,xmm */
    5855             :                                            /* vcvts{s,d}2s{s,d} xmm/mem,xmm,xmm */
    5856         412 :         op_bytes = 4 << (((vex.pfx & VEX_PREFIX_SCALAR_MASK) ? 0 : 1 + vex.l) +
    5857         206 :                          !!(vex.pfx & VEX_PREFIX_DOUBLE_MASK));
    5858             :     simd_0f_cvt:
    5859         246 :         if ( vex.opcx == vex_none )
    5860         168 :             goto simd_0f_sse2;
    5861          78 :         goto simd_0f_avx;
    5862             : 
    5863             :     CASE_SIMD_PACKED_FP(, 0x0f, 0x5b):     /* cvt{ps,dq}2{dq,ps} xmm/mem,xmm */
    5864             :     CASE_SIMD_PACKED_FP(_VEX, 0x0f, 0x5b): /* vcvt{ps,dq}2{dq,ps} {x,y}mm/mem,{x,y}mm */
    5865             :     case X86EMUL_OPC_F3(0x0f, 0x5b):       /* cvttps2dq xmm/mem,xmm */
    5866             :     case X86EMUL_OPC_VEX_F3(0x0f, 0x5b):   /* vcvttps2dq {x,y}mm/mem,{x,y}mm */
    5867          32 :         d |= TwoOp;
    5868          32 :         op_bytes = 16 << vex.l;
    5869          32 :         goto simd_0f_cvt;
    5870             : 
    5871             :     CASE_SIMD_PACKED_INT(0x0f, 0x60):    /* punpcklbw {,x}mm/mem,{,x}mm */
    5872             :     case X86EMUL_OPC_VEX_66(0x0f, 0x60): /* vpunpcklbw {x,y}mm/mem,{x,y}mm,{x,y}mm */
    5873             :     CASE_SIMD_PACKED_INT(0x0f, 0x61):    /* punpcklwd {,x}mm/mem,{,x}mm */
    5874             :     case X86EMUL_OPC_VEX_66(0x0f, 0x61): /* vpunpcklwd {x,y}mm/mem,{x,y}mm,{x,y}mm */
    5875             :     CASE_SIMD_PACKED_INT(0x0f, 0x62):    /* punpckldq {,x}mm/mem,{,x}mm */
    5876             :     case X86EMUL_OPC_VEX_66(0x0f, 0x62): /* vpunpckldq {x,y}mm/mem,{x,y}mm,{x,y}mm */
    5877             :     CASE_SIMD_PACKED_INT(0x0f, 0x68):    /* punpckhbw {,x}mm/mem,{,x}mm */
    5878             :     case X86EMUL_OPC_VEX_66(0x0f, 0x68): /* vpunpckhbw {x,y}mm/mem,{x,y}mm,{x,y}mm */
    5879             :     CASE_SIMD_PACKED_INT(0x0f, 0x69):    /* punpckhwd {,x}mm/mem,{,x}mm */
    5880             :     case X86EMUL_OPC_VEX_66(0x0f, 0x69): /* vpunpckhwd {x,y}mm/mem,{x,y}mm,{x,y}mm */
    5881             :     CASE_SIMD_PACKED_INT(0x0f, 0x6a):    /* punpckhdq {,x}mm/mem,{,x}mm */
    5882             :     case X86EMUL_OPC_VEX_66(0x0f, 0x6a): /* vpunpckhdq {x,y}mm/mem,{x,y}mm,{x,y}mm */
    5883         856 :         op_bytes = vex.pfx ? 16 << vex.l : b & 8 ? 8 : 4;
    5884             :         /* fall through */
    5885             :     CASE_SIMD_PACKED_INT(0x0f, 0x63):    /* packssbw {,x}mm/mem,{,x}mm */
    5886             :     case X86EMUL_OPC_VEX_66(0x0f, 0x63): /* vpackssbw {x,y}mm/mem,{x,y}mm,{x,y}mm */
    5887             :     CASE_SIMD_PACKED_INT(0x0f, 0x64):    /* pcmpgtb {,x}mm/mem,{,x}mm */
    5888             :     case X86EMUL_OPC_VEX_66(0x0f, 0x64): /* vpcmpgtb {x,y}mm/mem,{x,y}mm,{x,y}mm */
    5889             :     CASE_SIMD_PACKED_INT(0x0f, 0x65):    /* pcmpgtw {,x}mm/mem,{,x}mm */
    5890             :     case X86EMUL_OPC_VEX_66(0x0f, 0x65): /* vpcmpgtw {x,y}mm/mem,{x,y}mm,{x,y}mm */
    5891             :     CASE_SIMD_PACKED_INT(0x0f, 0x66):    /* pcmpgtd {,x}mm/mem,{,x}mm */
    5892             :     case X86EMUL_OPC_VEX_66(0x0f, 0x66): /* vpcmpgtd {x,y}mm/mem,{x,y}mm,{x,y}mm */
    5893             :     CASE_SIMD_PACKED_INT(0x0f, 0x67):    /* packusbw {,x}mm/mem,{,x}mm */
    5894             :     case X86EMUL_OPC_VEX_66(0x0f, 0x67): /* vpackusbw {x,y}mm/mem,{x,y}mm,{x,y}mm */
    5895             :     CASE_SIMD_PACKED_INT(0x0f, 0x6b):    /* packsswd {,x}mm/mem,{,x}mm */
    5896             :     case X86EMUL_OPC_VEX_66(0x0f, 0x6b): /* vpacksswd {x,y}mm/mem,{x,y}mm,{x,y}mm */
    5897             :     case X86EMUL_OPC_66(0x0f, 0x6c):     /* punpcklqdq xmm/m128,xmm */
    5898             :     case X86EMUL_OPC_VEX_66(0x0f, 0x6c): /* vpunpcklqdq {x,y}mm/mem,{x,y}mm,{x,y}mm */
    5899             :     case X86EMUL_OPC_66(0x0f, 0x6d):     /* punpckhqdq xmm/m128,xmm */
    5900             :     case X86EMUL_OPC_VEX_66(0x0f, 0x6d): /* vpunpckhqdq {x,y}mm/mem,{x,y}mm,{x,y}mm */
    5901             :     CASE_SIMD_PACKED_INT(0x0f, 0x74):    /* pcmpeqb {,x}mm/mem,{,x}mm */
    5902             :     case X86EMUL_OPC_VEX_66(0x0f, 0x74): /* vpcmpeqb {x,y}mm/mem,{x,y}mm,{x,y}mm */
    5903             :     CASE_SIMD_PACKED_INT(0x0f, 0x75):    /* pcmpeqw {,x}mm/mem,{,x}mm */
    5904             :     case X86EMUL_OPC_VEX_66(0x0f, 0x75): /* vpcmpeqw {x,y}mm/mem,{x,y}mm,{x,y}mm */
    5905             :     CASE_SIMD_PACKED_INT(0x0f, 0x76):    /* pcmpeqd {,x}mm/mem,{,x}mm */
    5906             :     case X86EMUL_OPC_VEX_66(0x0f, 0x76): /* vpcmpeqd {x,y}mm/mem,{x,y}mm,{x,y}mm */
    5907             :     case X86EMUL_OPC_66(0x0f, 0xd4):     /* paddq xmm/m128,xmm */
    5908             :     case X86EMUL_OPC_VEX_66(0x0f, 0xd4): /* vpaddq {x,y}mm/mem,{x,y}mm,{x,y}mm */
    5909             :     CASE_SIMD_PACKED_INT(0x0f, 0xd5):    /* pmullw {,x}mm/mem,{,x}mm */
    5910             :     case X86EMUL_OPC_VEX_66(0x0f, 0xd5): /* vpmullw {x,y}mm/mem,{x,y}mm,{x,y}mm */
    5911             :     CASE_SIMD_PACKED_INT(0x0f, 0xd8):    /* psubusb {,x}mm/mem,{,x}mm */
    5912             :     case X86EMUL_OPC_VEX_66(0x0f, 0xd8): /* vpsubusb {x,y}mm/mem,{x,y}mm,{x,y}mm */
    5913             :     CASE_SIMD_PACKED_INT(0x0f, 0xd9):    /* psubusw {,x}mm/mem,{,x}mm */
    5914             :     case X86EMUL_OPC_VEX_66(0x0f, 0xd9): /* vpsubusw {x,y}mm/mem,{x,y}mm,{x,y}mm */
    5915             :     case X86EMUL_OPC_66(0x0f, 0xda):     /* pminub xmm/m128,xmm */
    5916             :     case X86EMUL_OPC_VEX_66(0x0f, 0xda): /* vpminub {x,y}mm/mem,{x,y}mm,{x,y}mm */
    5917             :     CASE_SIMD_PACKED_INT(0x0f, 0xdb):    /* pand {,x}mm/mem,{,x}mm */
    5918             :     case X86EMUL_OPC_VEX_66(0x0f, 0xdb): /* vpand {x,y}mm/mem,{x,y}mm,{x,y}mm */
    5919             :     CASE_SIMD_PACKED_INT(0x0f, 0xdc):    /* paddusb {,x}mm/mem,{,x}mm */
    5920             :     case X86EMUL_OPC_VEX_66(0x0f, 0xdc): /* vpaddusb {x,y}mm/mem,{x,y}mm,{x,y}mm */
    5921             :     CASE_SIMD_PACKED_INT(0x0f, 0xdd):    /* paddusw {,x}mm/mem,{,x}mm */
    5922             :     case X86EMUL_OPC_VEX_66(0x0f, 0xdd): /* vpaddusw {x,y}mm/mem,{x,y}mm,{x,y}mm */
    5923             :     case X86EMUL_OPC_66(0x0f, 0xde):     /* pmaxub xmm/m128,xmm */
    5924             :     case X86EMUL_OPC_VEX_66(0x0f, 0xde): /* vpmaxub {x,y}mm/mem,{x,y}mm,{x,y}mm */
    5925             :     CASE_SIMD_PACKED_INT(0x0f, 0xdf):    /* pandn {,x}mm/mem,{,x}mm */
    5926             :     case X86EMUL_OPC_VEX_66(0x0f, 0xdf): /* vpandn {x,y}mm/mem,{x,y}mm,{x,y}mm */
    5927             :     case X86EMUL_OPC_66(0x0f, 0xe0):     /* pavgb xmm/m128,xmm */
    5928             :     case X86EMUL_OPC_VEX_66(0x0f, 0xe0): /* vpavgb {x,y}mm/mem,{x,y}mm,{x,y}mm */
    5929             :     case X86EMUL_OPC_66(0x0f, 0xe3):     /* pavgw xmm/m128,xmm */
    5930             :     case X86EMUL_OPC_VEX_66(0x0f, 0xe3): /* vpavgw {x,y}mm/mem,{x,y}mm,{x,y}mm */
    5931             :     case X86EMUL_OPC_66(0x0f, 0xe4):     /* pmulhuw xmm/m128,xmm */
    5932             :     case X86EMUL_OPC_VEX_66(0x0f, 0xe4): /* vpmulhuw {x,y}mm/mem,{x,y}mm,{x,y}mm */
    5933             :     CASE_SIMD_PACKED_INT(0x0f, 0xe5):    /* pmulhw {,x}mm/mem,{,x}mm */
    5934             :     case X86EMUL_OPC_VEX_66(0x0f, 0xe5): /* vpmulhw {x,y}mm/mem,{x,y}mm,{x,y}mm */
    5935             :     CASE_SIMD_PACKED_INT(0x0f, 0xe8):    /* psubsb {,x}mm/mem,{,x}mm */
    5936             :     case X86EMUL_OPC_VEX_66(0x0f, 0xe8): /* vpsubsb {x,y}mm/mem,{x,y}mm,{x,y}mm */
    5937             :     CASE_SIMD_PACKED_INT(0x0f, 0xe9):    /* psubsw {,x}mm/mem,{,x}mm */
    5938             :     case X86EMUL_OPC_VEX_66(0x0f, 0xe9): /* vpsubsw {x,y}mm/mem,{x,y}mm,{x,y}mm */
    5939             :     case X86EMUL_OPC_66(0x0f, 0xea):     /* pminsw xmm/m128,xmm */
    5940             :     case X86EMUL_OPC_VEX_66(0x0f, 0xea): /* vpminsw {x,y}mm/mem,{x,y}mm,{x,y}mm */
    5941             :     CASE_SIMD_PACKED_INT(0x0f, 0xeb):    /* por {,x}mm/mem,{,x}mm */
    5942             :     case X86EMUL_OPC_VEX_66(0x0f, 0xeb): /* vpor {x,y}mm/mem,{x,y}mm,{x,y}mm */
    5943             :     CASE_SIMD_PACKED_INT(0x0f, 0xec):    /* paddsb {,x}mm/mem,{,x}mm */
    5944             :     case X86EMUL_OPC_VEX_66(0x0f, 0xec): /* vpaddsb {x,y}mm/mem,{x,y}mm,{x,y}mm */
    5945             :     CASE_SIMD_PACKED_INT(0x0f, 0xed):    /* paddsw {,x}mm/mem,{,x}mm */
    5946             :     case X86EMUL_OPC_VEX_66(0x0f, 0xed): /* vpaddsw {x,y}mm/mem,{x,y}mm,{x,y}mm */
    5947             :     case X86EMUL_OPC_66(0x0f, 0xee):     /* pmaxsw xmm/m128,xmm */
    5948             :     case X86EMUL_OPC_VEX_66(0x0f, 0xee): /* vpmaxsw {x,y}mm/mem,{x,y}mm,{x,y}mm */
    5949             :     CASE_SIMD_PACKED_INT(0x0f, 0xef):    /* pxor {,x}mm/mem,{,x}mm */
    5950             :     case X86EMUL_OPC_VEX_66(0x0f, 0xef): /* vpxor {x,y}mm/mem,{x,y}mm,{x,y}mm */
    5951             :     case X86EMUL_OPC_66(0x0f, 0xf4):     /* pmuludq xmm/m128,xmm */
    5952             :     case X86EMUL_OPC_VEX_66(0x0f, 0xf4): /* vpmuludq {x,y}mm/mem,{x,y}mm,{x,y}mm */
    5953             :     case X86EMUL_OPC_66(0x0f, 0xf6):     /* psadbw xmm/m128,xmm */
    5954             :     case X86EMUL_OPC_VEX_66(0x0f, 0xf6): /* vpsadbw {x,y}mm/mem,{x,y}mm,{x,y}mm */
    5955             :     CASE_SIMD_PACKED_INT(0x0f, 0xf8):    /* psubb {,x}mm/mem,{,x}mm */
    5956             :     case X86EMUL_OPC_VEX_66(0x0f, 0xf8): /* vpsubb {x,y}mm/mem,{x,y}mm,{x,y}mm */
    5957             :     CASE_SIMD_PACKED_INT(0x0f, 0xf9):    /* psubw {,x}mm/mem,{,x}mm */
    5958             :     case X86EMUL_OPC_VEX_66(0x0f, 0xf9): /* vpsubw {x,y}mm/mem,{x,y}mm,{x,y}mm */
    5959             :     CASE_SIMD_PACKED_INT(0x0f, 0xfa):    /* psubd {,x}mm/mem,{,x}mm */
    5960             :     case X86EMUL_OPC_VEX_66(0x0f, 0xfa): /* vpsubd {x,y}mm/mem,{x,y}mm,{x,y}mm */
    5961             :     case X86EMUL_OPC_66(0x0f, 0xfb):     /* psubq xmm/m128,xmm */
    5962             :     case X86EMUL_OPC_VEX_66(0x0f, 0xfb): /* vpsubq {x,y}mm/mem,{x,y}mm,{x,y}mm */
    5963             :     CASE_SIMD_PACKED_INT(0x0f, 0xfc):    /* paddb {,x}mm/mem,{,x}mm */
    5964             :     case X86EMUL_OPC_VEX_66(0x0f, 0xfc): /* vpaddb {x,y}mm/mem,{x,y}mm,{x,y}mm */
    5965             :     CASE_SIMD_PACKED_INT(0x0f, 0xfd):    /* paddw {,x}mm/mem,{,x}mm */
    5966             :     case X86EMUL_OPC_VEX_66(0x0f, 0xfd): /* vpaddw {x,y}mm/mem,{x,y}mm,{x,y}mm */
    5967             :     CASE_SIMD_PACKED_INT(0x0f, 0xfe):    /* paddd {,x}mm/mem,{,x}mm */
    5968             :     case X86EMUL_OPC_VEX_66(0x0f, 0xfe): /* vpaddd {x,y}mm/mem,{x,y}mm,{x,y}mm */
    5969             :     simd_0f_int:
    5970        2879 :         if ( vex.opcx != vex_none )
    5971             :         {
    5972             :     case X86EMUL_OPC_VEX_66(0x0f38, 0x00): /* vpshufb {x,y}mm/mem,{x,y}mm,{x,y}mm */
    5973             :     case X86EMUL_OPC_VEX_66(0x0f38, 0x01): /* vphaddw {x,y}mm/mem,{x,y}mm,{x,y}mm */
    5974             :     case X86EMUL_OPC_VEX_66(0x0f38, 0x02): /* vphaddd {x,y}mm/mem,{x,y}mm,{x,y}mm */
    5975             :     case X86EMUL_OPC_VEX_66(0x0f38, 0x03): /* vphaddsw {x,y}mm/mem,{x,y}mm,{x,y}mm */
    5976             :     case X86EMUL_OPC_VEX_66(0x0f38, 0x04): /* vpmaddubsw {x,y}mm/mem,{x,y}mm,{x,y}mm */
    5977             :     case X86EMUL_OPC_VEX_66(0x0f38, 0x05): /* vphsubw {x,y}mm/mem,{x,y}mm,{x,y}mm */
    5978             :     case X86EMUL_OPC_VEX_66(0x0f38, 0x06): /* vphsubd {x,y}mm/mem,{x,y}mm,{x,y}mm */
    5979             :     case X86EMUL_OPC_VEX_66(0x0f38, 0x07): /* vphsubsw {x,y}mm/mem,{x,y}mm,{x,y}mm */
    5980             :     case X86EMUL_OPC_VEX_66(0x0f38, 0x08): /* vpsignb {x,y}mm/mem,{x,y}mm,{x,y}mm */
    5981             :     case X86EMUL_OPC_VEX_66(0x0f38, 0x09): /* vpsignw {x,y}mm/mem,{x,y}mm,{x,y}mm */
    5982             :     case X86EMUL_OPC_VEX_66(0x0f38, 0x0a): /* vpsignd {x,y}mm/mem,{x,y}mm,{x,y}mm */
    5983             :     case X86EMUL_OPC_VEX_66(0x0f38, 0x0b): /* vpmulhrsw {x,y}mm/mem,{x,y}mm,{x,y}mm */
    5984             :     case X86EMUL_OPC_VEX_66(0x0f38, 0x1c): /* vpabsb {x,y}mm/mem,{x,y}mm */
    5985             :     case X86EMUL_OPC_VEX_66(0x0f38, 0x1d): /* vpabsw {x,y}mm/mem,{x,y}mm */
    5986             :     case X86EMUL_OPC_VEX_66(0x0f38, 0x1e): /* vpabsd {x,y}mm/mem,{x,y}mm */
    5987             :     case X86EMUL_OPC_VEX_66(0x0f38, 0x28): /* vpmuldq {x,y}mm/mem,{x,y}mm,{x,y}mm */
    5988             :     case X86EMUL_OPC_VEX_66(0x0f38, 0x29): /* vpcmpeqq {x,y}mm/mem,{x,y}mm,{x,y}mm */
    5989             :     case X86EMUL_OPC_VEX_66(0x0f38, 0x2b): /* vpackusdw {x,y}mm/mem,{x,y}mm,{x,y}mm */
    5990             :     case X86EMUL_OPC_VEX_66(0x0f38, 0x37): /* vpcmpgtq {x,y}mm/mem,{x,y}mm,{x,y}mm */
    5991             :     case X86EMUL_OPC_VEX_66(0x0f38, 0x38): /* vpminsb {x,y}mm/mem,{x,y}mm,{x,y}mm */
    5992             :     case X86EMUL_OPC_VEX_66(0x0f38, 0x39): /* vpminsd {x,y}mm/mem,{x,y}mm,{x,y}mm */
    5993             :     case X86EMUL_OPC_VEX_66(0x0f38, 0x3a): /* vpminub {x,y}mm/mem,{x,y}mm,{x,y}mm */
    5994             :     case X86EMUL_OPC_VEX_66(0x0f38, 0x3b): /* vpminud {x,y}mm/mem,{x,y}mm,{x,y}mm */
    5995             :     case X86EMUL_OPC_VEX_66(0x0f38, 0x3c): /* vpmaxsb {x,y}mm/mem,{x,y}mm,{x,y}mm */
    5996             :     case X86EMUL_OPC_VEX_66(0x0f38, 0x3d): /* vpmaxsd {x,y}mm/mem,{x,y}mm,{x,y}mm */
    5997             :     case X86EMUL_OPC_VEX_66(0x0f38, 0x3e): /* vpmaxub {x,y}mm/mem,{x,y}mm,{x,y}mm */
    5998             :     case X86EMUL_OPC_VEX_66(0x0f38, 0x3f): /* vpmaxud {x,y}mm/mem,{x,y}mm,{x,y}mm */
    5999             :     case X86EMUL_OPC_VEX_66(0x0f38, 0x40): /* vpmulld {x,y}mm/mem,{x,y}mm,{x,y}mm */
    6000         545 :             if ( !vex.l )
    6001         522 :                 goto simd_0f_avx;
    6002          23 :             host_and_vcpu_must_have(avx2);
    6003          23 :             goto simd_0f_ymm;
    6004             :         }
    6005        2426 :         if ( vex.pfx )
    6006          32 :             goto simd_0f_sse2;
    6007             :     simd_0f_mmx:
    6008        3098 :         host_and_vcpu_must_have(mmx);
    6009        3098 :         get_fpu(X86EMUL_FPU_mmx, &fic);
    6010        2905 :         goto simd_0f_common;
    6011             : 
    6012             :     case X86EMUL_OPC_VEX_66(0x0f38, 0x41): /* vphminposuw xmm/m128,xmm,xmm */
    6013          33 :         generate_exception_if(vex.l, EXC_UD);
    6014          25 :         goto simd_0f_avx;
    6015             : 
    6016             :     CASE_SIMD_PACKED_INT(0x0f, 0x6e):    /* mov{d,q} r/m,{,x}mm */
    6017             :     case X86EMUL_OPC_VEX_66(0x0f, 0x6e): /* vmov{d,q} r/m,xmm */
    6018             :     CASE_SIMD_PACKED_INT(0x0f, 0x7e):    /* mov{d,q} {,x}mm,r/m */
    6019             :     case X86EMUL_OPC_VEX_66(0x0f, 0x7e): /* vmov{d,q} xmm,r/m */
    6020         389 :         if ( vex.opcx != vex_none )
    6021             :         {
    6022          24 :             generate_exception_if(vex.l || vex.reg != 0xf, EXC_UD);
    6023          16 :             host_and_vcpu_must_have(avx);
    6024          16 :             get_fpu(X86EMUL_FPU_ymm, &fic);
    6025             :         }
    6026         365 :         else if ( vex.pfx )
    6027             :         {
    6028          31 :             vcpu_must_have(sse2);
    6029          31 :             get_fpu(X86EMUL_FPU_xmm, &fic);
    6030             :         }
    6031             :         else
    6032             :         {
    6033         334 :             host_and_vcpu_must_have(mmx);
    6034         334 :             get_fpu(X86EMUL_FPU_mmx, &fic);
    6035             :         }
    6036             : 
    6037             :     simd_0f_rm:
    6038         412 :         opc = init_prefixes(stub);
    6039         412 :         opc[0] = b;
    6040             :         /* Convert memory/GPR operand to (%rAX). */
    6041         412 :         rex_prefix &= ~REX_B;
    6042         412 :         vex.b = 1;
    6043         412 :         if ( !mode_64bit() )
    6044         216 :             vex.w = 0;
    6045         412 :         opc[1] = modrm & 0x38;
    6046         412 :         fic.insn_bytes = PFX_BYTES + 2;
    6047         412 :         opc[2] = 0xc3;
    6048             : 
    6049         412 :         copy_REX_VEX(opc, rex_prefix, vex);
    6050         412 :         invoke_stub("", "", "+m" (src.val), "+m" (fic.exn_raised)
    6051             :                             : "a" (&src.val));
    6052         412 :         dst.val = src.val;
    6053             : 
    6054         412 :         put_stub(stub);
    6055         412 :         check_xmm_exn(&fic);
    6056             : 
    6057         412 :         ASSERT(!state->simd_size);
    6058         412 :         break;
    6059             : 
    6060             :     case X86EMUL_OPC_66(0x0f, 0xe7):     /* movntdq xmm,m128 */
    6061             :     case X86EMUL_OPC_VEX_66(0x0f, 0xe7): /* vmovntdq {x,y}mm,mem */
    6062          16 :         generate_exception_if(ea.type != OP_MEM, EXC_UD);
    6063           8 :         sfence = true;
    6064             :         /* fall through */
    6065             :     case X86EMUL_OPC_66(0x0f, 0x6f):     /* movdqa xmm/m128,xmm */
    6066             :     case X86EMUL_OPC_VEX_66(0x0f, 0x6f): /* vmovdqa {x,y}mm/mem,{x,y}mm */
    6067             :     case X86EMUL_OPC_F3(0x0f, 0x6f):     /* movdqu xmm/m128,xmm */
    6068             :     case X86EMUL_OPC_VEX_F3(0x0f, 0x6f): /* vmovdqu {x,y}mm/mem,{x,y}mm */
    6069             :     case X86EMUL_OPC_66(0x0f, 0x7f):     /* movdqa xmm,xmm/m128 */
    6070             :     case X86EMUL_OPC_VEX_66(0x0f, 0x7f): /* vmovdqa {x,y}mm,{x,y}mm/m128 */
    6071             :     case X86EMUL_OPC_F3(0x0f, 0x7f):     /* movdqu xmm,xmm/m128 */
    6072             :     case X86EMUL_OPC_VEX_F3(0x0f, 0x7f): /* vmovdqu {x,y}mm,{x,y}mm/mem */
    6073             :     movdqa:
    6074          56 :         d |= TwoOp;
    6075          56 :         op_bytes = 16 << vex.l;
    6076          56 :         if ( vex.opcx != vex_none )
    6077          32 :             goto simd_0f_avx;
    6078          24 :         goto simd_0f_sse2;
    6079             : 
    6080             :     case X86EMUL_OPC_VEX_66(0x0f, 0xd6): /* vmovq xmm,xmm/m64 */
    6081          16 :         generate_exception_if(vex.l, EXC_UD);
    6082           8 :         d |= TwoOp;
    6083             :         /* fall through */
    6084             :     case X86EMUL_OPC_66(0x0f, 0xd6):     /* movq xmm,xmm/m64 */
    6085             :     case X86EMUL_OPC(0x0f, 0x6f):        /* movq mm/m64,mm */
    6086             :     case X86EMUL_OPC(0x0f, 0x7f):        /* movq mm,mm/m64 */
    6087         490 :         op_bytes = 8;
    6088         490 :         goto simd_0f_int;
    6089             : 
    6090             :     CASE_SIMD_PACKED_INT(0x0f, 0x70):    /* pshuf{w,d} $imm8,{,x}mm/mem,{,x}mm */
    6091             :     case X86EMUL_OPC_VEX_66(0x0f, 0x70): /* vpshufd $imm8,{x,y}mm/mem,{x,y}mm */
    6092             :     case X86EMUL_OPC_F3(0x0f, 0x70):     /* pshufhw $imm8,xmm/m128,xmm */
    6093             :     case X86EMUL_OPC_VEX_F3(0x0f, 0x70): /* vpshufhw $imm8,{x,y}mm/mem,{x,y}mm */
    6094             :     case X86EMUL_OPC_F2(0x0f, 0x70):     /* pshuflw $imm8,xmm/m128,xmm */
    6095             :     case X86EMUL_OPC_VEX_F2(0x0f, 0x70): /* vpshuflw $imm8,{x,y}mm/mem,{x,y}mm */
    6096         470 :         d = (d & ~SrcMask) | SrcMem | TwoOp;
    6097         470 :         op_bytes = vex.pfx ? 16 << vex.l : 8;
    6098             :     simd_0f_int_imm8:
    6099        1251 :         if ( vex.opcx != vex_none )
    6100             :         {
    6101             :     case X86EMUL_OPC_VEX_66(0x0f3a, 0x0e): /* vpblendw $imm8,{x,y}mm/mem,{x,y}mm,{x,y}mm */
    6102             :     case X86EMUL_OPC_VEX_66(0x0f3a, 0x0f): /* vpalignr $imm8,{x,y}mm/mem,{x,y}mm,{x,y}mm */
    6103             :     case X86EMUL_OPC_VEX_66(0x0f3a, 0x42): /* vmpsadbw $imm8,{x,y}mm/mem,{x,y}mm,{x,y}mm */
    6104          56 :             if ( vex.l )
    6105          16 :                 host_and_vcpu_must_have(avx2);
    6106             :             else
    6107             :             {
    6108             :     case X86EMUL_OPC_VEX_66(0x0f3a, 0x08): /* vroundps $imm8,{x,y}mm/mem,{x,y}mm */
    6109             :     case X86EMUL_OPC_VEX_66(0x0f3a, 0x09): /* vroundpd $imm8,{x,y}mm/mem,{x,y}mm */
    6110             :     case X86EMUL_OPC_VEX_66(0x0f3a, 0x0a): /* vroundss $imm8,{x,y}mm/mem,{x,y}mm,{x,y}mm */
    6111             :     case X86EMUL_OPC_VEX_66(0x0f3a, 0x0b): /* vroundsd $imm8,{x,y}mm/mem,{x,y}mm,{x,y}mm */
    6112             :     case X86EMUL_OPC_VEX_66(0x0f3a, 0x0c): /* vblendps $imm8,{x,y}mm/mem,{x,y}mm,{x,y}mm */
    6113             :     case X86EMUL_OPC_VEX_66(0x0f3a, 0x0d): /* vblendpd $imm8,{x,y}mm/mem,{x,y}mm,{x,y}mm */
    6114             :     case X86EMUL_OPC_VEX_66(0x0f3a, 0x40): /* vdpps $imm8,{x,y}mm/mem,{x,y}mm,{x,y}mm */
    6115             :     simd_0f_imm8_avx:
    6116          96 :                 host_and_vcpu_must_have(avx);
    6117             :             }
    6118         112 :             get_fpu(X86EMUL_FPU_ymm, &fic);
    6119             :         }
    6120        1203 :         else if ( vex.pfx )
    6121             :         {
    6122             :     simd_0f_imm8_sse2:
    6123          16 :             vcpu_must_have(sse2);
    6124          16 :             get_fpu(X86EMUL_FPU_xmm, &fic);
    6125             :         }
    6126             :         else
    6127             :         {
    6128        1195 :             host_and_vcpu_must_have(mmx);
    6129        1195 :             vcpu_must_have(mmxext);
    6130        1195 :             get_fpu(X86EMUL_FPU_mmx, &fic);
    6131             :         }
    6132             :     simd_0f_imm8:
    6133        1570 :         opc = init_prefixes(stub);
    6134        1570 :         opc[0] = b;
    6135        1570 :         opc[1] = modrm;
    6136        1570 :         if ( ea.type == OP_MEM )
    6137             :         {
    6138             :             /* Convert memory operand to (%rAX). */
    6139        1546 :             rex_prefix &= ~REX_B;
    6140        1546 :             vex.b = 1;
    6141        1546 :             opc[1] &= 0x38;
    6142             :         }
    6143        1570 :         opc[2] = imm1;
    6144        1570 :         fic.insn_bytes = PFX_BYTES + 3;
    6145        1570 :         break;
    6146             : 
    6147             :     CASE_SIMD_PACKED_INT(0x0f, 0x71):    /* Grp12 */
    6148             :     case X86EMUL_OPC_VEX_66(0x0f, 0x71):
    6149             :     CASE_SIMD_PACKED_INT(0x0f, 0x72):    /* Grp13 */
    6150             :     case X86EMUL_OPC_VEX_66(0x0f, 0x72):
    6151         802 :         switch ( modrm_reg & 7 )
    6152             :         {
    6153             :         case 2: /* psrl{w,d} $imm8,{,x}mm */
    6154             :                 /* vpsrl{w,d} $imm8,{x,y}mm,{x,y}mm */
    6155             :         case 4: /* psra{w,d} $imm8,{,x}mm */
    6156             :                 /* vpsra{w,d} $imm8,{x,y}mm,{x,y}mm */
    6157             :         case 6: /* psll{w,d} $imm8,{,x}mm */
    6158             :                 /* vpsll{w,d} $imm8,{x,y}mm,{x,y}mm */
    6159         794 :             break;
    6160             :         default:
    6161           8 :             goto cannot_emulate;
    6162             :         }
    6163             :     simd_0f_shift_imm:
    6164         823 :         generate_exception_if(ea.type != OP_REG, EXC_UD);
    6165             : 
    6166         734 :         if ( vex.opcx != vex_none )
    6167             :         {
    6168         711 :             if ( vex.l )
    6169          16 :                 host_and_vcpu_must_have(avx2);
    6170             :             else
    6171         695 :                 host_and_vcpu_must_have(avx);
    6172         711 :             get_fpu(X86EMUL_FPU_ymm, &fic);
    6173             :         }
    6174          23 :         else if ( vex.pfx )
    6175             :         {
    6176          15 :             vcpu_must_have(sse2);
    6177          15 :             get_fpu(X86EMUL_FPU_xmm, &fic);
    6178             :         }
    6179             :         else
    6180             :         {
    6181           8 :             host_and_vcpu_must_have(mmx);
    6182           8 :             get_fpu(X86EMUL_FPU_mmx, &fic);
    6183             :         }
    6184             : 
    6185         719 :         opc = init_prefixes(stub);
    6186         719 :         opc[0] = b;
    6187         719 :         opc[1] = modrm;
    6188         719 :         opc[2] = imm1;
    6189         719 :         fic.insn_bytes = PFX_BYTES + 3;
    6190             :     simd_0f_reg_only:
    6191        1155 :         opc[fic.insn_bytes - PFX_BYTES] = 0xc3;
    6192             : 
    6193        1155 :         copy_REX_VEX(opc, rex_prefix, vex);
    6194        1155 :         invoke_stub("", "", [dummy_out] "=g" (dummy) : [dummy_in] "i" (0) );
    6195             : 
    6196        1155 :         put_stub(stub);
    6197        1155 :         check_xmm_exn(&fic);
    6198             : 
    6199        1155 :         ASSERT(!state->simd_size);
    6200        1155 :         break;
    6201             : 
    6202             :     case X86EMUL_OPC(0x0f, 0x73):        /* Grp14 */
    6203          14 :         switch ( modrm_reg & 7 )
    6204             :         {
    6205             :         case 2: /* psrlq $imm8,mm */
    6206             :         case 6: /* psllq $imm8,mm */
    6207           6 :             goto simd_0f_shift_imm;
    6208             :         }
    6209           8 :         goto cannot_emulate;
    6210             : 
    6211             :     case X86EMUL_OPC_66(0x0f, 0x73):
    6212             :     case X86EMUL_OPC_VEX_66(0x0f, 0x73):
    6213          31 :         switch ( modrm_reg & 7 )
    6214             :         {
    6215             :         case 2: /* psrlq $imm8,xmm */
    6216             :                 /* vpsrlq $imm8,{x,y}mm,{x,y}mm */
    6217             :         case 3: /* psrldq $imm8,xmm */
    6218             :                 /* vpsrldq $imm8,{x,y}mm,{x,y}mm */
    6219             :         case 6: /* psllq $imm8,xmm */
    6220             :                 /* vpsllq $imm8,{x,y}mm,{x,y}mm */
    6221             :         case 7: /* pslldq $imm8,xmm */
    6222             :                 /* vpslldq $imm8,{x,y}mm,{x,y}mm */
    6223          23 :             goto simd_0f_shift_imm;
    6224             :         }
    6225           8 :         goto cannot_emulate;
    6226             : 
    6227             :     case X86EMUL_OPC(0x0f, 0x77):        /* emms */
    6228             :     case X86EMUL_OPC_VEX(0x0f, 0x77):    /* vzero{all,upper} */
    6229        1333 :         if ( vex.opcx != vex_none )
    6230             :         {
    6231        1051 :             generate_exception_if(vex.reg != 0xf, EXC_UD);
    6232        1043 :             host_and_vcpu_must_have(avx);
    6233        1043 :             get_fpu(X86EMUL_FPU_ymm, &fic);
    6234             : 
    6235             : #ifdef __x86_64__
    6236        1035 :             if ( !mode_64bit() )
    6237             :             {
    6238             :                 /*
    6239             :                  * Can't use the actual instructions here, as we must not
    6240             :                  * touch YMM8...YMM15.
    6241             :                  */
    6242         875 :                 if ( vex.l )
    6243             :                 {
    6244             :                     /* vpxor %xmmN, %xmmN, %xmmN */
    6245         827 :                     asm volatile ( ".byte 0xc5,0xf9,0xef,0xc0" );
    6246         827 :                     asm volatile ( ".byte 0xc5,0xf1,0xef,0xc9" );
    6247         827 :                     asm volatile ( ".byte 0xc5,0xe9,0xef,0xd2" );
    6248         827 :                     asm volatile ( ".byte 0xc5,0xe1,0xef,0xdb" );
    6249         827 :                     asm volatile ( ".byte 0xc5,0xd9,0xef,0xe4" );
    6250         827 :                     asm volatile ( ".byte 0xc5,0xd1,0xef,0xed" );
    6251         827 :                     asm volatile ( ".byte 0xc5,0xc9,0xef,0xf6" );
    6252         827 :                     asm volatile ( ".byte 0xc5,0xc1,0xef,0xff" );
    6253             :                 }
    6254             :                 else
    6255             :                 {
    6256             :                     /* vpor %xmmN, %xmmN, %xmmN */
    6257          48 :                     asm volatile ( ".byte 0xc5,0xf9,0xeb,0xc0" );
    6258          48 :                     asm volatile ( ".byte 0xc5,0xf1,0xeb,0xc9" );
    6259          48 :                     asm volatile ( ".byte 0xc5,0xe9,0xeb,0xd2" );
    6260          48 :                     asm volatile ( ".byte 0xc5,0xe1,0xeb,0xdb" );
    6261          48 :                     asm volatile ( ".byte 0xc5,0xd9,0xeb,0xe4" );
    6262          48 :                     asm volatile ( ".byte 0xc5,0xd1,0xeb,0xed" );
    6263          48 :                     asm volatile ( ".byte 0xc5,0xc9,0xeb,0xf6" );
    6264          48 :                     asm volatile ( ".byte 0xc5,0xc1,0xeb,0xff" );
    6265             :                 }
    6266             : 
    6267         875 :                 ASSERT(!state->simd_size);
    6268         875 :                 break;
    6269             :             }
    6270             : #endif
    6271             :         }
    6272             :         else
    6273             :         {
    6274         282 :             host_and_vcpu_must_have(mmx);
    6275         282 :             get_fpu(X86EMUL_FPU_mmx, &fic);
    6276             :         }
    6277             : 
    6278         436 :         opc = init_prefixes(stub);
    6279         436 :         opc[0] = b;
    6280         436 :         fic.insn_bytes = PFX_BYTES + 1;
    6281         436 :         goto simd_0f_reg_only;
    6282             : 
    6283             :     case X86EMUL_OPC_66(0x0f, 0x78):     /* Grp17 */
    6284          24 :         switch ( modrm_reg & 7 )
    6285             :         {
    6286             :         case 0: /* extrq $imm8,$imm8,xmm */
    6287          16 :             break;
    6288             :         default:
    6289           8 :             goto cannot_emulate;
    6290             :         }
    6291             :         /* fall through */
    6292             :     case X86EMUL_OPC_F2(0x0f, 0x78):     /* insertq $imm8,$imm8,xmm,xmm */
    6293          24 :         generate_exception_if(ea.type != OP_REG, EXC_UD);
    6294             : 
    6295          16 :         host_and_vcpu_must_have(sse4a);
    6296           0 :         get_fpu(X86EMUL_FPU_xmm, &fic);
    6297             : 
    6298           0 :         opc = init_prefixes(stub);
    6299           0 :         opc[0] = b;
    6300           0 :         opc[1] = modrm;
    6301           0 :         opc[2] = imm1;
    6302           0 :         opc[3] = imm2;
    6303           0 :         fic.insn_bytes = PFX_BYTES + 4;
    6304           0 :         goto simd_0f_reg_only;
    6305             : 
    6306             :     case X86EMUL_OPC_66(0x0f, 0x79):     /* extrq xmm,xmm */
    6307             :     case X86EMUL_OPC_F2(0x0f, 0x79):     /* insertq xmm,xmm */
    6308          16 :         generate_exception_if(ea.type != OP_REG, EXC_UD);
    6309           8 :         host_and_vcpu_must_have(sse4a);
    6310           0 :         op_bytes = 8;
    6311           0 :         goto simd_0f_xmm;
    6312             : 
    6313             :     case X86EMUL_OPC_F3(0x0f, 0x7e):     /* movq xmm/m64,xmm */
    6314             :     case X86EMUL_OPC_VEX_F3(0x0f, 0x7e): /* vmovq xmm/m64,xmm */
    6315          32 :         generate_exception_if(vex.l, EXC_UD);
    6316          24 :         op_bytes = 8;
    6317          24 :         goto simd_0f_int;
    6318             : 
    6319             :     case X86EMUL_OPC_F2(0x0f, 0xf0):     /* lddqu m128,xmm */
    6320             :     case X86EMUL_OPC_VEX_F2(0x0f, 0xf0): /* vlddqu mem,{x,y}mm */
    6321          32 :         generate_exception_if(ea.type != OP_MEM, EXC_UD);
    6322             :         /* fall through */
    6323             :     case X86EMUL_OPC_66(0x0f, 0x7c):     /* haddpd xmm/m128,xmm */
    6324             :     case X86EMUL_OPC_F2(0x0f, 0x7c):     /* haddps xmm/m128,xmm */
    6325             :     case X86EMUL_OPC_VEX_66(0x0f, 0x7c): /* vhaddpd {x,y}mm/mem,{x,y}mm,{x,y}mm */
    6326             :     case X86EMUL_OPC_VEX_F2(0x0f, 0x7c): /* vhaddps {x,y}mm/mem,{x,y}mm,{x,y}mm */
    6327             :     case X86EMUL_OPC_66(0x0f, 0x7d):     /* hsubpd xmm/m128,xmm */
    6328             :     case X86EMUL_OPC_F2(0x0f, 0x7d):     /* hsubps xmm/m128,xmm */
    6329             :     case X86EMUL_OPC_VEX_66(0x0f, 0x7d): /* vhsubpd {x,y}mm/mem,{x,y}mm,{x,y}mm */
    6330             :     case X86EMUL_OPC_VEX_F2(0x0f, 0x7d): /* vhsubps {x,y}mm/mem,{x,y}mm,{x,y}mm */
    6331             :     case X86EMUL_OPC_66(0x0f, 0xd0):     /* addsubpd xmm/m128,xmm */
    6332             :     case X86EMUL_OPC_F2(0x0f, 0xd0):     /* addsubps xmm/m128,xmm */
    6333             :     case X86EMUL_OPC_VEX_66(0x0f, 0xd0): /* vaddsubpd {x,y}mm/mem,{x,y}mm,{x,y}mm */
    6334             :     case X86EMUL_OPC_VEX_F2(0x0f, 0xd0): /* vaddsubps {x,y}mm/mem,{x,y}mm,{x,y}mm */
    6335          40 :         op_bytes = 16 << vex.l;
    6336          40 :         goto simd_0f_sse3_avx;
    6337             : 
    6338             :     case X86EMUL_OPC(0x0f, 0x80) ... X86EMUL_OPC(0x0f, 0x8f): /* jcc (near) */
    6339        3016 :         if ( test_cc(b, _regs.eflags) )
    6340        1465 :             jmp_rel((int32_t)src.val);
    6341        2993 :         adjust_bnd(ctxt, ops, vex.pfx);
    6342        2993 :         break;
    6343             : 
    6344             :     case X86EMUL_OPC(0x0f, 0x90) ... X86EMUL_OPC(0x0f, 0x9f): /* setcc */
    6345       33362 :         dst.val = test_cc(b, _regs.eflags);
    6346       33362 :         break;
    6347             : 
    6348             :     case X86EMUL_OPC(0x0f, 0xa2): /* cpuid */
    6349        1058 :         msr_val = 0;
    6350        1058 :         fail_if(ops->cpuid == NULL);
    6351             : 
    6352             :         /* Speculatively read MSR_INTEL_MISC_FEATURES_ENABLES. */
    6353        1464 :         if ( ops->read_msr && !mode_ring0() &&
    6354         406 :              (rc = ops->read_msr(MSR_INTEL_MISC_FEATURES_ENABLES,
    6355             :                                  &msr_val, ctxt)) == X86EMUL_EXCEPTION )
    6356             :         {
    6357             :             /* Not implemented.  Squash the exception and proceed normally. */
    6358         406 :             x86_emul_reset_event(ctxt);
    6359         406 :             rc = X86EMUL_OKAY;
    6360             :         }
    6361        1050 :         if ( rc != X86EMUL_OKAY )
    6362           0 :             goto done;
    6363             : 
    6364        1050 :         generate_exception_if((msr_val & MSR_MISC_FEATURES_CPUID_FAULTING),
    6365             :                               EXC_GP, 0); /* Faulting active? (Inc. CPL test) */
    6366             : 
    6367        1050 :         rc = ops->cpuid(_regs.eax, _regs.ecx, &cpuid_leaf, ctxt);
    6368        1050 :         if ( rc != X86EMUL_OKAY )
    6369           0 :             goto done;
    6370        1050 :         _regs.r(ax) = cpuid_leaf.a;
    6371        1050 :         _regs.r(bx) = cpuid_leaf.b;
    6372        1050 :         _regs.r(cx) = cpuid_leaf.c;
    6373        1050 :         _regs.r(dx) = cpuid_leaf.d;
    6374        1050 :         break;
    6375             : 
    6376             :     case X86EMUL_OPC(0x0f, 0xa3): bt: /* bt */
    6377       12221 :         generate_exception_if(lock_prefix, EXC_UD);
    6378       12213 :         emulate_2op_SrcV_nobyte("bt", src, dst, _regs.eflags);
    6379       12213 :         dst.type = OP_NONE;
    6380       12213 :         break;
    6381             : 
    6382             :     case X86EMUL_OPC(0x0f, 0xa4): /* shld imm8,r,r/m */
    6383             :     case X86EMUL_OPC(0x0f, 0xa5): /* shld %%cl,r,r/m */
    6384             :     case X86EMUL_OPC(0x0f, 0xac): /* shrd imm8,r,r/m */
    6385             :     case X86EMUL_OPC(0x0f, 0xad): /* shrd %%cl,r,r/m */ {
    6386        8808 :         uint8_t shift, width = dst.bytes << 3;
    6387             : 
    6388        8808 :         generate_exception_if(lock_prefix, EXC_UD);
    6389        8800 :         if ( b & 1 )
    6390         423 :             shift = _regs.cl;
    6391             :         else
    6392             :         {
    6393        8377 :             shift = src.val;
    6394        8377 :             src.reg = decode_register(modrm_reg, &_regs, 0);
    6395        8377 :             src.val = truncate_word(*src.reg, dst.bytes);
    6396             :         }
    6397        8800 :         if ( (shift &= width - 1) == 0 )
    6398        1028 :             break;
    6399        7772 :         dst.orig_val = truncate_word(dst.val, dst.bytes);
    6400       23923 :         dst.val = ((shift == width) ? src.val :
    6401        7772 :                    (b & 8) ?
    6402             :                    /* shrd */
    6403        7165 :                    ((dst.orig_val >> shift) |
    6404        7165 :                     truncate_word(src.val << (width - shift), dst.bytes)) :
    6405             :                    /* shld */
    6406        1214 :                    ((dst.orig_val << shift) |
    6407         607 :                     ((src.val >> (width - shift)) & ((1ull << shift) - 1))));
    6408        7772 :         dst.val = truncate_word(dst.val, dst.bytes);
    6409        7772 :         _regs.eflags &= ~(X86_EFLAGS_OF | X86_EFLAGS_SF | X86_EFLAGS_ZF |
    6410             :                           X86_EFLAGS_PF | X86_EFLAGS_CF);
    6411        7772 :         if ( (dst.val >> ((b & 8) ? (shift - 1) : (width - shift))) & 1 )
    6412        2450 :             _regs.eflags |= X86_EFLAGS_CF;
    6413        7772 :         if ( ((dst.val ^ dst.orig_val) >> (width - 1)) & 1 )
    6414        2985 :             _regs.eflags |= X86_EFLAGS_OF;
    6415        7772 :         _regs.eflags |= ((dst.val >> (width - 1)) & 1) ? X86_EFLAGS_SF : 0;
    6416        7772 :         _regs.eflags |= (dst.val == 0) ? X86_EFLAGS_ZF : 0;
    6417        7772 :         _regs.eflags |= even_parity(dst.val) ? X86_EFLAGS_PF : 0;
    6418        7772 :         break;
    6419             :     }
    6420             : 
    6421             :     case X86EMUL_OPC(0x0f, 0xab): bts: /* bts */
    6422        1193 :         emulate_2op_SrcV_nobyte("bts", src, dst, _regs.eflags);
    6423        1193 :         break;
    6424             : 
    6425             :     case X86EMUL_OPC(0x0f, 0xae): case X86EMUL_OPC_66(0x0f, 0xae): /* Grp15 */
    6426        4081 :         switch ( modrm_reg & 7 )
    6427             :         {
    6428             :         case 2: /* ldmxcsr */
    6429          40 :             generate_exception_if(vex.pfx, EXC_UD);
    6430          32 :             vcpu_must_have(sse);
    6431             :         ldmxcsr:
    6432          40 :             generate_exception_if(src.type != OP_MEM, EXC_UD);
    6433          24 :             get_fpu(vex.opcx ? X86EMUL_FPU_ymm : X86EMUL_FPU_xmm, &fic);
    6434          16 :             generate_exception_if(src.val & ~mxcsr_mask, EXC_GP, 0);
    6435           8 :             asm volatile ( "ldmxcsr %0" :: "m" (src.val) );
    6436           8 :             break;
    6437             : 
    6438             :         case 3: /* stmxcsr */
    6439          90 :             generate_exception_if(vex.pfx, EXC_UD);
    6440          82 :             vcpu_must_have(sse);
    6441             :         stmxcsr:
    6442         242 :             generate_exception_if(dst.type != OP_MEM, EXC_UD);
    6443         234 :             get_fpu(vex.opcx ? X86EMUL_FPU_ymm : X86EMUL_FPU_xmm, &fic);
    6444         228 :             asm volatile ( "stmxcsr %0" : "=m" (dst.val) );
    6445         228 :             break;
    6446             : 
    6447             :         case 5: /* lfence */
    6448          96 :             fail_if(modrm_mod != 3);
    6449          88 :             generate_exception_if(vex.pfx, EXC_UD);
    6450          80 :             vcpu_must_have(sse2);
    6451          80 :             asm volatile ( "lfence" ::: "memory" );
    6452          80 :             break;
    6453             :         case 6:
    6454         192 :             if ( modrm_mod == 3 ) /* mfence */
    6455             :             {
    6456          88 :                 generate_exception_if(vex.pfx, EXC_UD);
    6457          80 :                 vcpu_must_have(sse2);
    6458          80 :                 asm volatile ( "mfence" ::: "memory" );
    6459          80 :                 break;
    6460             :             }
    6461             :             /* else clwb */
    6462         104 :             fail_if(!vex.pfx);
    6463          96 :             vcpu_must_have(clwb);
    6464          96 :             fail_if(!ops->wbinvd);
    6465          88 :             if ( (rc = ops->wbinvd(ctxt)) != X86EMUL_OKAY )
    6466           0 :                 goto done;
    6467          88 :             break;
    6468             :         case 7:
    6469        3655 :             if ( modrm_mod == 3 ) /* sfence */
    6470             :             {
    6471        2857 :                 generate_exception_if(vex.pfx, EXC_UD);
    6472        2849 :                 vcpu_must_have(mmxext);
    6473        2849 :                 asm volatile ( "sfence" ::: "memory" );
    6474        2849 :                 break;
    6475             :             }
    6476             :             /* else clflush{,opt} */
    6477         798 :             if ( !vex.pfx )
    6478         467 :                 vcpu_must_have(clflush);
    6479             :             else
    6480         331 :                 vcpu_must_have(clflushopt);
    6481         798 :             fail_if(ops->wbinvd == NULL);
    6482         790 :             if ( (rc = ops->wbinvd(ctxt)) != 0 )
    6483          15 :                 goto done;
    6484         775 :             break;
    6485             :         default:
    6486           8 :             goto cannot_emulate;
    6487             :         }
    6488        4108 :         break;
    6489             : 
    6490             :     case X86EMUL_OPC_VEX(0x0f, 0xae): /* Grp15 */
    6491         192 :         switch ( modrm_reg & 7 )
    6492             :         {
    6493             :         case 2: /* vldmxcsr */
    6494          16 :             generate_exception_if(vex.l || vex.reg != 0xf, EXC_UD);
    6495           8 :             vcpu_must_have(avx);
    6496           8 :             goto ldmxcsr;
    6497             :         case 3: /* vstmxcsr */
    6498         168 :             generate_exception_if(vex.l || vex.reg != 0xf, EXC_UD);
    6499         160 :             vcpu_must_have(avx);
    6500         160 :             goto stmxcsr;
    6501             :         }
    6502           8 :         goto cannot_emulate;
    6503             : 
    6504             :     case X86EMUL_OPC_F3(0x0f, 0xae): /* Grp15 */
    6505         176 :         fail_if(modrm_mod != 3);
    6506         168 :         generate_exception_if((modrm_reg & 4) || !mode_64bit(), EXC_UD);
    6507         152 :         fail_if(!ops->read_cr);
    6508         144 :         if ( (rc = ops->read_cr(4, &cr4, ctxt)) != X86EMUL_OKAY )
    6509           0 :             goto done;
    6510         144 :         generate_exception_if(!(cr4 & X86_CR4_FSGSBASE), EXC_UD);
    6511         136 :         seg = modrm_reg & 1 ? x86_seg_gs : x86_seg_fs;
    6512         136 :         fail_if(!ops->read_segment);
    6513         128 :         if ( (rc = ops->read_segment(seg, &sreg, ctxt)) != X86EMUL_OKAY )
    6514           0 :             goto done;
    6515         128 :         dst.reg = decode_register(modrm_rm, &_regs, 0);
    6516         128 :         if ( !(modrm_reg & 2) )
    6517             :         {
    6518             :             /* rd{f,g}sbase */
    6519          32 :             dst.type = OP_REG;
    6520          32 :             dst.bytes = (op_bytes == 8) ? 8 : 4;
    6521          32 :             dst.val = sreg.base;
    6522             :         }
    6523             :         else
    6524             :         {
    6525             :             /* wr{f,g}sbase */
    6526          96 :             if ( op_bytes == 8 )
    6527             :             {
    6528           0 :                 sreg.base = *dst.reg;
    6529           0 :                 generate_exception_if(!is_canonical_address(sreg.base),
    6530             :                                       EXC_GP, 0);
    6531             :             }
    6532             :             else
    6533          96 :                 sreg.base = (uint32_t)*dst.reg;
    6534          96 :             fail_if(!ops->write_segment);
    6535          81 :             if ( (rc = ops->write_segment(seg, &sreg, ctxt)) != X86EMUL_OKAY )
    6536          81 :                 goto done;
    6537             :         }
    6538          32 :         break;
    6539             : 
    6540             :     case X86EMUL_OPC(0x0f, 0xaf): /* imul */
    6541         376 :         emulate_2op_SrcV_srcmem("imul", src, dst, _regs.eflags);
    6542         376 :         break;
    6543             : 
    6544             :     case X86EMUL_OPC(0x0f, 0xb0): case X86EMUL_OPC(0x0f, 0xb1): /* cmpxchg */
    6545             :         /* Save real source value, then compare EAX against destination. */
    6546         593 :         src.orig_val = src.val;
    6547         593 :         src.val = _regs.r(ax);
    6548             :         /* cmp: %%eax - dst ==> dst and src swapped for macro invocation */
    6549         593 :         emulate_2op_SrcV("cmp", dst, src, _regs.eflags);
    6550         593 :         if ( _regs.eflags & X86_EFLAGS_ZF )
    6551             :         {
    6552             :             /* Success: write back to memory. */
    6553         106 :             dst.val = src.orig_val;
    6554             :         }
    6555             :         else
    6556             :         {
    6557             :             /* Failure: write the value we saw to EAX. */
    6558         487 :             dst.type = OP_REG;
    6559         487 :             dst.reg  = (unsigned long *)&_regs.r(ax);
    6560             :         }
    6561         593 :         break;
    6562             : 
    6563             :     case X86EMUL_OPC(0x0f, 0xb2): /* lss */
    6564             :     case X86EMUL_OPC(0x0f, 0xb4): /* lfs */
    6565             :     case X86EMUL_OPC(0x0f, 0xb5): /* lgs */
    6566          13 :         seg = b & 7;
    6567          13 :         goto les;
    6568             : 
    6569             :     case X86EMUL_OPC(0x0f, 0xb3): btr: /* btr */
    6570        1343 :         emulate_2op_SrcV_nobyte("btr", src, dst, _regs.eflags);
    6571        1343 :         break;
    6572             : 
    6573             :     case X86EMUL_OPC(0x0f, 0xb6): /* movzx rm8,r{16,32,64} */
    6574             :         /* Recompute DstReg as we may have decoded AH/BH/CH/DH. */
    6575        4744 :         dst.reg   = decode_register(modrm_reg, &_regs, 0);
    6576        4744 :         dst.bytes = op_bytes;
    6577        4744 :         dst.val   = (uint8_t)src.val;
    6578        4744 :         break;
    6579             : 
    6580             :     case X86EMUL_OPC(0x0f, 0xb7): /* movzx rm16,r{16,32,64} */
    6581          56 :         dst.val = (uint16_t)src.val;
    6582          56 :         break;
    6583             : 
    6584             :     case X86EMUL_OPC_F3(0x0f, 0xb8): /* popcnt r/m,r */
    6585          48 :         host_and_vcpu_must_have(popcnt);
    6586          48 :         asm ( "popcnt %1,%0" : "=r" (dst.val) : "rm" (src.val) );
    6587          48 :         _regs.eflags &= ~EFLAGS_MASK;
    6588          48 :         if ( !dst.val )
    6589          24 :             _regs.eflags |= X86_EFLAGS_ZF;
    6590          48 :         break;
    6591             : 
    6592             :     case X86EMUL_OPC(0x0f, 0xba): /* Grp8 */
    6593        1176 :         switch ( modrm_reg & 7 )
    6594             :         {
    6595         476 :         case 4: goto bt;
    6596          24 :         case 5: goto bts;
    6597         588 :         case 6: goto btr;
    6598          80 :         case 7: goto btc;
    6599           8 :         default: generate_exception(EXC_UD);
    6600             :         }
    6601             :         break;
    6602             : 
    6603             :     case X86EMUL_OPC(0x0f, 0xbb): btc: /* btc */
    6604        2492 :         emulate_2op_SrcV_nobyte("btc", src, dst, _regs.eflags);
    6605        2492 :         break;
    6606             : 
    6607             :     case X86EMUL_OPC(0x0f, 0xbc): /* bsf or tzcnt */
    6608             :     {
    6609             :         bool zf;
    6610             : 
    6611        1397 :         asm ( "bsf %2,%0" ASM_FLAG_OUT(, "; setz %1")
    6612             :               : "=r" (dst.val), ASM_FLAG_OUT("=@ccz", "=qm") (zf)
    6613        1397 :               : "rm" (src.val) );
    6614        1397 :         _regs.eflags &= ~X86_EFLAGS_ZF;
    6615        1397 :         if ( (vex.pfx == vex_f3) && vcpu_has_bmi1() )
    6616             :         {
    6617        1219 :             _regs.eflags &= ~X86_EFLAGS_CF;
    6618        2438 :             if ( zf )
    6619             :             {
    6620         273 :                 _regs.eflags |= X86_EFLAGS_CF;
    6621         273 :                 dst.val = op_bytes * 8;
    6622             :             }
    6623         946 :             else if ( !dst.val )
    6624         503 :                 _regs.eflags |= X86_EFLAGS_ZF;
    6625             :         }
    6626         178 :         else if ( zf )
    6627             :         {
    6628          98 :             _regs.eflags |= X86_EFLAGS_ZF;
    6629          98 :             dst.type = OP_NONE;
    6630             :         }
    6631        1397 :         break;
    6632             :     }
    6633             : 
    6634             :     case X86EMUL_OPC(0x0f, 0xbd): /* bsr or lzcnt */
    6635             :     {
    6636             :         bool zf;
    6637             : 
    6638        3850 :         asm ( "bsr %2,%0" ASM_FLAG_OUT(, "; setz %1")
    6639             :               : "=r" (dst.val), ASM_FLAG_OUT("=@ccz", "=qm") (zf)
    6640        3850 :               : "rm" (src.val) );
    6641        3850 :         _regs.eflags &= ~X86_EFLAGS_ZF;
    6642        3850 :         if ( (vex.pfx == vex_f3) && vcpu_has_lzcnt() )
    6643             :         {
    6644        2966 :             _regs.eflags &= ~X86_EFLAGS_CF;
    6645        5932 :             if ( zf )
    6646             :             {
    6647         152 :                 _regs.eflags |= X86_EFLAGS_CF;
    6648         152 :                 dst.val = op_bytes * 8;
    6649             :             }
    6650             :             else
    6651             :             {
    6652        2814 :                 dst.val = op_bytes * 8 - 1 - dst.val;
    6653        2814 :                 if ( !dst.val )
    6654        1429 :                     _regs.eflags |= X86_EFLAGS_ZF;
    6655             :             }
    6656             :         }
    6657         884 :         else if ( zf )
    6658             :         {
    6659         333 :             _regs.eflags |= X86_EFLAGS_ZF;
    6660         333 :             dst.type = OP_NONE;
    6661             :         }
    6662        3850 :         break;
    6663             :     }
    6664             : 
    6665             :     case X86EMUL_OPC(0x0f, 0xbe): /* movsx rm8,r{16,32,64} */
    6666             :         /* Recompute DstReg as we may have decoded AH/BH/CH/DH. */
    6667       13664 :         dst.reg   = decode_register(modrm_reg, &_regs, 0);
    6668       13664 :         dst.bytes = op_bytes;
    6669       13664 :         dst.val   = (int8_t)src.val;
    6670       13664 :         break;
    6671             : 
    6672             :     case X86EMUL_OPC(0x0f, 0xbf): /* movsx rm16,r{16,32,64} */
    6673          80 :         dst.val = (int16_t)src.val;
    6674          80 :         break;
    6675             : 
    6676             :     case X86EMUL_OPC(0x0f, 0xc0): case X86EMUL_OPC(0x0f, 0xc1): /* xadd */
    6677             :         /* Write back the register source. */
    6678         586 :         switch ( dst.bytes )
    6679             :         {
    6680         144 :         case 1: *(uint8_t  *)src.reg = (uint8_t)dst.val; break;
    6681         144 :         case 2: *(uint16_t *)src.reg = (uint16_t)dst.val; break;
    6682         144 :         case 4: *src.reg = (uint32_t)dst.val; break; /* 64b reg: zero-extend */
    6683         154 :         case 8: *src.reg = dst.val; break;
    6684             :         }
    6685         586 :         goto add;
    6686             : 
    6687             :     CASE_SIMD_ALL_FP(, 0x0f, 0xc2):        /* cmp{p,s}{s,d} $imm8,xmm/mem,xmm */
    6688             :     CASE_SIMD_ALL_FP(_VEX, 0x0f, 0xc2):    /* vcmp{p,s}{s,d} $imm8,{x,y}mm/mem,{x,y}mm */
    6689             :     CASE_SIMD_PACKED_FP(, 0x0f, 0xc6):     /* shufp{s,d} $imm8,xmm/mem,xmm */
    6690             :     CASE_SIMD_PACKED_FP(_VEX, 0x0f, 0xc6): /* vshufp{s,d} $imm8,{x,y}mm/mem,{x,y}mm */
    6691         312 :         d = (d & ~SrcMask) | SrcMem;
    6692         312 :         if ( vex.opcx == vex_none )
    6693             :         {
    6694         304 :             if ( vex.pfx & VEX_PREFIX_DOUBLE_MASK )
    6695           8 :                 goto simd_0f_imm8_sse2;
    6696         296 :             vcpu_must_have(sse);
    6697         296 :             get_fpu(X86EMUL_FPU_xmm, &fic);
    6698         288 :             goto simd_0f_imm8;
    6699             :         }
    6700           8 :         goto simd_0f_imm8_avx;
    6701             : 
    6702             :     case X86EMUL_OPC(0x0f, 0xc3): /* movnti */
    6703             :         /* Ignore the non-temporal hint for now. */
    6704         231 :         vcpu_must_have(sse2);
    6705         231 :         dst.val = src.val;
    6706         231 :         sfence = true;
    6707         231 :         break;
    6708             : 
    6709             :     CASE_SIMD_PACKED_INT(0x0f, 0xc4):      /* pinsrw $imm8,r32/m16,{,x}mm */
    6710             :     case X86EMUL_OPC_VEX_66(0x0f, 0xc4):   /* vpinsrw $imm8,r32/m16,xmm,xmm */
    6711         757 :         generate_exception_if(vex.l, EXC_UD);
    6712         749 :         memcpy(mmvalp, &src.val, 2);
    6713         749 :         ea.type = OP_MEM;
    6714         749 :         goto simd_0f_int_imm8;
    6715             : 
    6716             :     case X86EMUL_OPC_VEX_66(0x0f, 0xc5):   /* vpextrw $imm8,xmm,reg */
    6717          16 :         generate_exception_if(vex.l, EXC_UD);
    6718             :         /* fall through */
    6719             :     CASE_SIMD_PACKED_INT(0x0f, 0xc5):      /* pextrw $imm8,{,x}mm,reg */
    6720         168 :         opc = init_prefixes(stub);
    6721         168 :         opc[0] = b;
    6722             :         /* Convert GPR destination to %rAX. */
    6723         168 :         rex_prefix &= ~REX_R;
    6724         168 :         vex.r = 1;
    6725         168 :         if ( !mode_64bit() )
    6726          96 :             vex.w = 0;
    6727         168 :         opc[1] = modrm & 0xc7;
    6728         168 :         opc[2] = imm1;
    6729         168 :         fic.insn_bytes = PFX_BYTES + 3;
    6730         168 :         goto simd_0f_to_gpr;
    6731             : 
    6732             :     case X86EMUL_OPC(0x0f, 0xc7): /* Grp9 */
    6733             :     {
    6734             :         union {
    6735             :             uint32_t u32[2];
    6736             :             uint64_t u64[2];
    6737             :         } *old, *aux;
    6738             : 
    6739        1302 :         if ( ea.type == OP_REG )
    6740             :         {
    6741             :             bool __maybe_unused carry;
    6742             : 
    6743         432 :             switch ( modrm_reg & 7 )
    6744             :             {
    6745             :             default:
    6746           8 :                 goto cannot_emulate;
    6747             : 
    6748             : #ifdef HAVE_GAS_RDRAND
    6749             :             case 6: /* rdrand */
    6750             :                 generate_exception_if(rep_prefix(), EXC_UD);
    6751             :                 host_and_vcpu_must_have(rdrand);
    6752             :                 dst = ea;
    6753             :                 switch ( op_bytes )
    6754             :                 {
    6755             :                 case 2:
    6756             :                     asm ( "rdrand %w0" ASM_FLAG_OUT(, "; setc %1")
    6757             :                           : "=r" (dst.val), ASM_FLAG_OUT("=@ccc", "=qm") (carry) );
    6758             :                     break;
    6759             :                 default:
    6760             : # ifdef __x86_64__
    6761             :                     asm ( "rdrand %k0" ASM_FLAG_OUT(, "; setc %1")
    6762             :                           : "=r" (dst.val), ASM_FLAG_OUT("=@ccc", "=qm") (carry) );
    6763             :                     break;
    6764             :                 case 8:
    6765             : # endif
    6766             :                     asm ( "rdrand %0" ASM_FLAG_OUT(, "; setc %1")
    6767             :                           : "=r" (dst.val), ASM_FLAG_OUT("=@ccc", "=qm") (carry) );
    6768             :                     break;
    6769             :                 }
    6770             :                 _regs.eflags &= ~EFLAGS_MASK;
    6771             :                 if ( carry )
    6772             :                     _regs.eflags |= X86_EFLAGS_CF;
    6773             :                 break;
    6774             : #endif
    6775             : 
    6776             :             case 7: /* rdseed / rdpid */
    6777         424 :                 if ( repe_prefix() ) /* rdpid */
    6778             :                 {
    6779          88 :                     generate_exception_if(ea.type != OP_REG, EXC_UD);
    6780          88 :                     vcpu_must_have(rdpid);
    6781          88 :                     fail_if(!ops->read_msr);
    6782          80 :                     if ( (rc = ops->read_msr(MSR_TSC_AUX, &msr_val,
    6783             :                                              ctxt)) != X86EMUL_OKAY )
    6784           0 :                         goto done;
    6785          80 :                     dst = ea;
    6786          80 :                     dst.val = msr_val;
    6787          80 :                     dst.bytes = 4;
    6788          80 :                     break;
    6789             :                 }
    6790             : #ifdef HAVE_GAS_RDSEED
    6791             :                 generate_exception_if(rep_prefix(), EXC_UD);
    6792             :                 host_and_vcpu_must_have(rdseed);
    6793             :                 dst = ea;
    6794             :                 switch ( op_bytes )
    6795             :                 {
    6796             :                 case 2:
    6797             :                     asm ( "rdseed %w0" ASM_FLAG_OUT(, "; setc %1")
    6798             :                           : "=r" (dst.val), ASM_FLAG_OUT("=@ccc", "=qm") (carry) );
    6799             :                     break;
    6800             :                 default:
    6801             : # ifdef __x86_64__
    6802             :                     asm ( "rdseed %k0" ASM_FLAG_OUT(, "; setc %1")
    6803             :                           : "=r" (dst.val), ASM_FLAG_OUT("=@ccc", "=qm") (carry) );
    6804             :                     break;
    6805             :                 case 8:
    6806             : # endif
    6807             :                     asm ( "rdseed %0" ASM_FLAG_OUT(, "; setc %1")
    6808             :                           : "=r" (dst.val), ASM_FLAG_OUT("=@ccc", "=qm") (carry) );
    6809             :                     break;
    6810             :                 }
    6811             :                 _regs.eflags &= ~EFLAGS_MASK;
    6812             :                 if ( carry )
    6813             :                     _regs.eflags |= X86_EFLAGS_CF;
    6814             :                 break;
    6815             : #endif
    6816             :             }
    6817         416 :             break;
    6818             :         }
    6819             : 
    6820             :         /* cmpxchg8b/cmpxchg16b */
    6821         870 :         generate_exception_if((modrm_reg & 7) != 1, EXC_UD);
    6822         862 :         fail_if(!ops->cmpxchg);
    6823         830 :         if ( rex_prefix & REX_W )
    6824             :         {
    6825         562 :             host_and_vcpu_must_have(cx16);
    6826         562 :             generate_exception_if(!is_aligned(ea.mem.seg, ea.mem.off, 16,
    6827             :                                               ctxt, ops),
    6828             :                                   EXC_GP, 0);
    6829         554 :             op_bytes = 16;
    6830             :         }
    6831             :         else
    6832             :         {
    6833         268 :             vcpu_must_have(cx8);
    6834         268 :             op_bytes = 8;
    6835             :         }
    6836             : 
    6837         822 :         old = container_of(&mmvalp->ymm[0], typeof(*old), u64[0]);
    6838         822 :         aux = container_of(&mmvalp->ymm[2], typeof(*aux), u64[0]);
    6839             : 
    6840             :         /* Get actual old value. */
    6841         822 :         if ( (rc = ops->read(ea.mem.seg, ea.mem.off, old, op_bytes,
    6842             :                              ctxt)) != X86EMUL_OKAY )
    6843          40 :             goto done;
    6844             : 
    6845             :         /* Get expected value. */
    6846         782 :         if ( !(rex_prefix & REX_W) )
    6847             :         {
    6848         252 :             aux->u32[0] = _regs.eax;
    6849         252 :             aux->u32[1] = _regs.edx;
    6850             :         }
    6851             :         else
    6852             :         {
    6853         530 :             aux->u64[0] = _regs.r(ax);
    6854         530 :             aux->u64[1] = _regs.r(dx);
    6855             :         }
    6856             : 
    6857         782 :         if ( memcmp(old, aux, op_bytes) )
    6858             :         {
    6859             :             /* Expected != actual: store actual to rDX:rAX and clear ZF. */
    6860         259 :             _regs.r(ax) = !(rex_prefix & REX_W) ? old->u32[0] : old->u64[0];
    6861         259 :             _regs.r(dx) = !(rex_prefix & REX_W) ? old->u32[1] : old->u64[1];
    6862         259 :             _regs.eflags &= ~X86_EFLAGS_ZF;
    6863             :         }
    6864             :         else
    6865             :         {
    6866             :             /*
    6867             :              * Expected == actual: Get proposed value, attempt atomic cmpxchg
    6868             :              * and set ZF.
    6869             :              */
    6870         523 :             if ( !(rex_prefix & REX_W) )
    6871             :             {
    6872          90 :                 aux->u32[0] = _regs.ebx;
    6873          90 :                 aux->u32[1] = _regs.ecx;
    6874             :             }
    6875             :             else
    6876             :             {
    6877         433 :                 aux->u64[0] = _regs.r(bx);
    6878         433 :                 aux->u64[1] = _regs.r(cx);
    6879             :             }
    6880             : 
    6881         523 :             if ( (rc = ops->cmpxchg(ea.mem.seg, ea.mem.off, old, aux,
    6882             :                                     op_bytes, ctxt)) != X86EMUL_OKAY )
    6883           8 :                 goto done;
    6884         515 :             _regs.eflags |= X86_EFLAGS_ZF;
    6885             :         }
    6886         774 :         break;
    6887             :     }
    6888             : 
    6889             :     case X86EMUL_OPC(0x0f, 0xc8) ... X86EMUL_OPC(0x0f, 0xcf): /* bswap */
    6890       30521 :         dst.type = OP_REG;
    6891       61042 :         dst.reg  = decode_register(
    6892       30521 :             (b & 7) | ((rex_prefix & 1) << 3), &_regs, 0);
    6893       30521 :         switch ( dst.bytes = op_bytes )
    6894             :         {
    6895             :         default: /* case 2: */
    6896             :             /* Undefined behaviour. Writes zero on all tested CPUs. */
    6897        6823 :             dst.val = 0;
    6898        6823 :             break;
    6899             :         case 4:
    6900             : #ifdef __x86_64__
    6901        8914 :             asm ( "bswap %k0" : "=r" (dst.val) : "0" (*(uint32_t *)dst.reg) );
    6902        8914 :             break;
    6903             :         case 8:
    6904             : #endif
    6905       14784 :             asm ( "bswap %0" : "=r" (dst.val) : "0" (*dst.reg) );
    6906       14784 :             break;
    6907             :         }
    6908       30521 :         break;
    6909             : 
    6910             :     CASE_SIMD_PACKED_INT(0x0f, 0xd1):    /* psrlw {,x}mm/mem,{,x}mm */
    6911             :     case X86EMUL_OPC_VEX_66(0x0f, 0xd1): /* vpsrlw xmm/m128,{x,y}mm,{x,y}mm */
    6912             :     CASE_SIMD_PACKED_INT(0x0f, 0xd2):    /* psrld {,x}mm/mem,{,x}mm */
    6913             :     case X86EMUL_OPC_VEX_66(0x0f, 0xd2): /* vpsrld xmm/m128,{x,y}mm,{x,y}mm */
    6914             :     CASE_SIMD_PACKED_INT(0x0f, 0xd3):    /* psrlq {,x}mm/mem,{,x}mm */
    6915             :     case X86EMUL_OPC_VEX_66(0x0f, 0xd3): /* vpsrlq xmm/m128,{x,y}mm,{x,y}mm */
    6916             :     CASE_SIMD_PACKED_INT(0x0f, 0xe1):    /* psraw {,x}mm/mem,{,x}mm */
    6917             :     case X86EMUL_OPC_VEX_66(0x0f, 0xe1): /* vpsraw xmm/m128,{x,y}mm,{x,y}mm */
    6918             :     CASE_SIMD_PACKED_INT(0x0f, 0xe2):    /* psrad {,x}mm/mem,{,x}mm */
    6919             :     case X86EMUL_OPC_VEX_66(0x0f, 0xe2): /* vpsrad xmm/m128,{x,y}mm,{x,y}mm */
    6920             :     CASE_SIMD_PACKED_INT(0x0f, 0xf1):    /* psllw {,x}mm/mem,{,x}mm */
    6921             :     case X86EMUL_OPC_VEX_66(0x0f, 0xf1): /* vpsllw xmm/m128,{x,y}mm,{x,y}mm */
    6922             :     CASE_SIMD_PACKED_INT(0x0f, 0xf2):    /* pslld {,x}mm/mem,{,x}mm */
    6923             :     case X86EMUL_OPC_VEX_66(0x0f, 0xf2): /* vpslld xmm/m128,{x,y}mm,{x,y}mm */
    6924             :     CASE_SIMD_PACKED_INT(0x0f, 0xf3):    /* psllq {,x}mm/mem,{,x}mm */
    6925             :     case X86EMUL_OPC_VEX_66(0x0f, 0xf3): /* vpsllq xmm/m128,{x,y}mm,{x,y}mm */
    6926         441 :         op_bytes = vex.pfx ? 16 : 8;
    6927         441 :         goto simd_0f_int;
    6928             : 
    6929             :     case X86EMUL_OPC(0x0f, 0xd4):        /* paddq mm/m64,mm */
    6930             :     case X86EMUL_OPC(0x0f, 0xf4):        /* pmuludq mm/m64,mm */
    6931             :     case X86EMUL_OPC(0x0f, 0xfb):        /* psubq mm/m64,mm */
    6932         593 :         vcpu_must_have(sse2);
    6933         593 :         goto simd_0f_mmx;
    6934             : 
    6935             :     case X86EMUL_OPC_F3(0x0f, 0xd6):     /* movq2dq mm,xmm */
    6936             :     case X86EMUL_OPC_F2(0x0f, 0xd6):     /* movdq2q xmm,mm */
    6937          16 :         generate_exception_if(ea.type != OP_REG, EXC_UD);
    6938           8 :         op_bytes = 8;
    6939           8 :         host_and_vcpu_must_have(mmx);
    6940           8 :         goto simd_0f_int;
    6941             : 
    6942             :     case X86EMUL_OPC(0x0f, 0xe7):        /* movntq mm,m64 */
    6943          57 :         generate_exception_if(ea.type != OP_MEM, EXC_UD);
    6944          49 :         sfence = true;
    6945             :         /* fall through */
    6946             :     case X86EMUL_OPC(0x0f, 0xda):        /* pminub mm/m64,mm */
    6947             :     case X86EMUL_OPC(0x0f, 0xde):        /* pmaxub mm/m64,mm */
    6948             :     case X86EMUL_OPC(0x0f, 0xea):        /* pminsw mm/m64,mm */
    6949             :     case X86EMUL_OPC(0x0f, 0xee):        /* pmaxsw mm/m64,mm */
    6950             :     case X86EMUL_OPC(0x0f, 0xe0):        /* pavgb mm/m64,mm */
    6951             :     case X86EMUL_OPC(0x0f, 0xe3):        /* pavgw mm/m64,mm */
    6952             :     case X86EMUL_OPC(0x0f, 0xe4):        /* pmulhuw mm/m64,mm */
    6953             :     case X86EMUL_OPC(0x0f, 0xf6):        /* psadbw mm/m64,mm */
    6954         111 :         vcpu_must_have(mmxext);
    6955         111 :         goto simd_0f_mmx;
    6956             : 
    6957             :     case X86EMUL_OPC_66(0x0f, 0xe6):       /* cvttpd2dq xmm/mem,xmm */
    6958             :     case X86EMUL_OPC_VEX_66(0x0f, 0xe6):   /* vcvttpd2dq {x,y}mm/mem,xmm */
    6959             :     case X86EMUL_OPC_F3(0x0f, 0xe6):       /* cvtdq2pd xmm/mem,xmm */
    6960             :     case X86EMUL_OPC_VEX_F3(0x0f, 0xe6):   /* vcvtdq2pd xmm/mem,{x,y}mm */
    6961             :     case X86EMUL_OPC_F2(0x0f, 0xe6):       /* cvtpd2dq xmm/mem,xmm */
    6962             :     case X86EMUL_OPC_VEX_F2(0x0f, 0xe6):   /* vcvtpd2dq {x,y}mm/mem,xmm */
    6963           8 :         d |= TwoOp;
    6964           8 :         op_bytes = 8 << (!!(vex.pfx & VEX_PREFIX_DOUBLE_MASK) + vex.l);
    6965           8 :         goto simd_0f_cvt;
    6966             : 
    6967             :     CASE_SIMD_PACKED_INT(0x0f, 0xf7):    /* maskmov{q,dqu} {,x}mm,{,x}mm */
    6968             :     case X86EMUL_OPC_VEX_66(0x0f, 0xf7): /* vmaskmovdqu xmm,xmm */
    6969         106 :         generate_exception_if(ea.type != OP_REG, EXC_UD);
    6970          98 :         if ( vex.opcx != vex_none )
    6971             :         {
    6972          44 :             generate_exception_if(vex.l || vex.reg != 0xf, EXC_UD);
    6973          36 :             d |= TwoOp;
    6974          36 :             host_and_vcpu_must_have(avx);
    6975          36 :             get_fpu(X86EMUL_FPU_ymm, &fic);
    6976             :         }
    6977          54 :         else if ( vex.pfx )
    6978             :         {
    6979          16 :             vcpu_must_have(sse2);
    6980          16 :             get_fpu(X86EMUL_FPU_xmm, &fic);
    6981             :         }
    6982             :         else
    6983             :         {
    6984          38 :             host_and_vcpu_must_have(mmx);
    6985          38 :             vcpu_must_have(mmxext);
    6986          38 :             get_fpu(X86EMUL_FPU_mmx, &fic);
    6987             :         }
    6988             : 
    6989             :         /*
    6990             :          * While we can't reasonably provide fully correct behavior here
    6991             :          * (in particular avoiding the memory read in anticipation of all
    6992             :          * bytes in the range eventually being written), we can (and should)
    6993             :          * still suppress the memory access if all mask bits are clear. Read
    6994             :          * the mask bits via {,v}pmovmskb for that purpose.
    6995             :          */
    6996          67 :         opc = init_prefixes(stub);
    6997          67 :         opc[0] = 0xd7; /* {,v}pmovmskb */
    6998             :         /* (Ab)use "sfence" for latching the original REX.R / VEX.R. */
    6999          67 :         sfence = rex_prefix & REX_R;
    7000             :         /* Convert GPR destination to %rAX. */
    7001          67 :         rex_prefix &= ~REX_R;
    7002          67 :         vex.r = 1;
    7003          67 :         if ( !mode_64bit() )
    7004          32 :             vex.w = 0;
    7005          67 :         opc[1] = modrm & 0xc7;
    7006          67 :         fic.insn_bytes = PFX_BYTES + 2;
    7007          67 :         opc[2] = 0xc3;
    7008             : 
    7009          67 :         copy_REX_VEX(opc, rex_prefix, vex);
    7010          67 :         invoke_stub("", "", "=a" (ea.val) : [dummy] "i" (0));
    7011             : 
    7012          67 :         put_stub(stub);
    7013          67 :         if ( !ea.val )
    7014          39 :             goto complete_insn;
    7015             : 
    7016          28 :         opc = init_prefixes(stub);
    7017          28 :         opc[0] = b;
    7018          28 :         opc[1] = modrm;
    7019             :         /* Restore high bit of XMM destination. */
    7020          28 :         if ( sfence )
    7021             :         {
    7022          21 :             rex_prefix |= REX_R;
    7023          21 :             vex.r = 0;
    7024             :         }
    7025             : 
    7026          28 :         ea.type = OP_MEM;
    7027          28 :         ea.mem.off = truncate_ea(_regs.r(di));
    7028          28 :         sfence = true;
    7029          28 :         break;
    7030             : 
    7031             :     case X86EMUL_OPC(0x0f38, 0x00):    /* pshufb mm/m64,mm */
    7032             :     case X86EMUL_OPC_66(0x0f38, 0x00): /* pshufb xmm/m128,xmm */
    7033             :     case X86EMUL_OPC(0x0f38, 0x01):    /* phaddw mm/m64,mm */
    7034             :     case X86EMUL_OPC_66(0x0f38, 0x01): /* phaddw xmm/m128,xmm */
    7035             :     case X86EMUL_OPC(0x0f38, 0x02):    /* phaddd mm/m64,mm */
    7036             :     case X86EMUL_OPC_66(0x0f38, 0x02): /* phaddd xmm/m128,xmm */
    7037             :     case X86EMUL_OPC(0x0f38, 0x03):    /* phaddsw mm/m64,mm */
    7038             :     case X86EMUL_OPC_66(0x0f38, 0x03): /* phaddsw xmm/m128,xmm */
    7039             :     case X86EMUL_OPC(0x0f38, 0x04):    /* pmaddubsw mm/m64,mm */
    7040             :     case X86EMUL_OPC_66(0x0f38, 0x04): /* pmaddubsw xmm/m128,xmm */
    7041             :     case X86EMUL_OPC(0x0f38, 0x05):    /* phsubw mm/m64,mm */
    7042             :     case X86EMUL_OPC_66(0x0f38, 0x05): /* phsubw xmm/m128,xmm */
    7043             :     case X86EMUL_OPC(0x0f38, 0x06):    /* phsubd mm/m64,mm */
    7044             :     case X86EMUL_OPC_66(0x0f38, 0x06): /* phsubd xmm/m128,xmm */
    7045             :     case X86EMUL_OPC(0x0f38, 0x07):    /* phsubsw mm/m64,mm */
    7046             :     case X86EMUL_OPC_66(0x0f38, 0x07): /* phsubsw xmm/m128,xmm */
    7047             :     case X86EMUL_OPC(0x0f38, 0x08):    /* psignb mm/m64,mm */
    7048             :     case X86EMUL_OPC_66(0x0f38, 0x08): /* psignb xmm/m128,xmm */
    7049             :     case X86EMUL_OPC(0x0f38, 0x09):    /* psignw mm/m64,mm */
    7050             :     case X86EMUL_OPC_66(0x0f38, 0x09): /* psignw xmm/m128,xmm */
    7051             :     case X86EMUL_OPC(0x0f38, 0x0a):    /* psignd mm/m64,mm */
    7052             :     case X86EMUL_OPC_66(0x0f38, 0x0a): /* psignd xmm/m128,xmm */
    7053             :     case X86EMUL_OPC(0x0f38, 0x0b):    /* pmulhrsw mm/m64,mm */
    7054             :     case X86EMUL_OPC_66(0x0f38, 0x0b): /* pmulhrsw xmm/m128,xmm */
    7055             :     case X86EMUL_OPC(0x0f38, 0x1c):    /* pabsb mm/m64,mm */
    7056             :     case X86EMUL_OPC_66(0x0f38, 0x1c): /* pabsb xmm/m128,xmm */
    7057             :     case X86EMUL_OPC(0x0f38, 0x1d):    /* pabsw mm/m64,mm */
    7058             :     case X86EMUL_OPC_66(0x0f38, 0x1d): /* pabsw xmm/m128,xmm */
    7059             :     case X86EMUL_OPC(0x0f38, 0x1e):    /* pabsd mm/m64,mm */
    7060             :     case X86EMUL_OPC_66(0x0f38, 0x1e): /* pabsd xmm/m128,xmm */
    7061          56 :         host_and_vcpu_must_have(ssse3);
    7062          56 :         if ( vex.pfx )
    7063             :         {
    7064             :     simd_0f38_common:
    7065         175 :             get_fpu(X86EMUL_FPU_xmm, &fic);
    7066             :         }
    7067             :         else
    7068             :         {
    7069          24 :             host_and_vcpu_must_have(mmx);
    7070          24 :             get_fpu(X86EMUL_FPU_mmx, &fic);
    7071             :         }
    7072         167 :         opc = init_prefixes(stub);
    7073         167 :         opc[0] = 0x38;
    7074         167 :         opc[1] = b;
    7075         167 :         opc[2] = modrm;
    7076         167 :         if ( ea.type == OP_MEM )
    7077             :         {
    7078             :             /* Convert memory operand to (%rAX). */
    7079          24 :             rex_prefix &= ~REX_B;
    7080          24 :             vex.b = 1;
    7081          24 :             opc[2] &= 0x38;
    7082             :         }
    7083         167 :         fic.insn_bytes = PFX_BYTES + 3;
    7084         167 :         break;
    7085             : 
    7086             :     case X86EMUL_OPC_66(0x0f38, 0x20): /* pmovsxbw xmm/m64,xmm */
    7087             :     case X86EMUL_OPC_66(0x0f38, 0x21): /* pmovsxbd xmm/m32,xmm */
    7088             :     case X86EMUL_OPC_66(0x0f38, 0x22): /* pmovsxbq xmm/m16,xmm */
    7089             :     case X86EMUL_OPC_66(0x0f38, 0x23): /* pmovsxwd xmm/m64,xmm */
    7090             :     case X86EMUL_OPC_66(0x0f38, 0x24): /* pmovsxwq xmm/m32,xmm */
    7091             :     case X86EMUL_OPC_66(0x0f38, 0x25): /* pmovsxdq xmm/m64,xmm */
    7092             :     case X86EMUL_OPC_66(0x0f38, 0x30): /* pmovzxbw xmm/m64,xmm */
    7093             :     case X86EMUL_OPC_66(0x0f38, 0x31): /* pmovzxbd xmm/m32,xmm */
    7094             :     case X86EMUL_OPC_66(0x0f38, 0x32): /* pmovzxbq xmm/m16,xmm */
    7095             :     case X86EMUL_OPC_66(0x0f38, 0x33): /* pmovzxwd xmm/m64,xmm */
    7096             :     case X86EMUL_OPC_66(0x0f38, 0x34): /* pmovzxwq xmm/m32,xmm */
    7097             :     case X86EMUL_OPC_66(0x0f38, 0x35): /* pmovzxdq xmm/m64,xmm */
    7098           8 :         op_bytes = 16 >> pmov_convert_delta[b & 7];
    7099             :         /* fall through */
    7100             :     case X86EMUL_OPC_66(0x0f38, 0x10): /* pblendvb XMM0,xmm/m128,xmm */
    7101             :     case X86EMUL_OPC_66(0x0f38, 0x14): /* blendvps XMM0,xmm/m128,xmm */
    7102             :     case X86EMUL_OPC_66(0x0f38, 0x15): /* blendvpd XMM0,xmm/m128,xmm */
    7103             :     case X86EMUL_OPC_66(0x0f38, 0x28): /* pmuldq xmm/m128,xmm */
    7104             :     case X86EMUL_OPC_66(0x0f38, 0x29): /* pcmpeqq xmm/m128,xmm */
    7105             :     case X86EMUL_OPC_66(0x0f38, 0x2b): /* packusdw xmm/m128,xmm */
    7106             :     case X86EMUL_OPC_66(0x0f38, 0x38): /* pminsb xmm/m128,xmm */
    7107             :     case X86EMUL_OPC_66(0x0f38, 0x39): /* pminsd xmm/m128,xmm */
    7108             :     case X86EMUL_OPC_66(0x0f38, 0x3a): /* pminub xmm/m128,xmm */
    7109             :     case X86EMUL_OPC_66(0x0f38, 0x3b): /* pminud xmm/m128,xmm */
    7110             :     case X86EMUL_OPC_66(0x0f38, 0x3c): /* pmaxsb xmm/m128,xmm */
    7111             :     case X86EMUL_OPC_66(0x0f38, 0x3d): /* pmaxsd xmm/m128,xmm */
    7112             :     case X86EMUL_OPC_66(0x0f38, 0x3e): /* pmaxub xmm/m128,xmm */
    7113             :     case X86EMUL_OPC_66(0x0f38, 0x3f): /* pmaxud xmm/m128,xmm */
    7114             :     case X86EMUL_OPC_66(0x0f38, 0x40): /* pmulld xmm/m128,xmm */
    7115             :     case X86EMUL_OPC_66(0x0f38, 0x41): /* phminposuw xmm/m128,xmm */
    7116          24 :         host_and_vcpu_must_have(sse4_1);
    7117          24 :         goto simd_0f38_common;
    7118             : 
    7119             :     case X86EMUL_OPC_66(0x0f38, 0x17):     /* ptest xmm/m128,xmm */
    7120             :     case X86EMUL_OPC_VEX_66(0x0f38, 0x17): /* vptest {x,y}mm/mem,{x,y}mm */
    7121         353 :         if ( vex.opcx == vex_none )
    7122             :         {
    7123          88 :             host_and_vcpu_must_have(sse4_1);
    7124          88 :             get_fpu(X86EMUL_FPU_xmm, &fic);
    7125             :         }
    7126             :         else
    7127             :         {
    7128         265 :             generate_exception_if(vex.reg != 0xf, EXC_UD);
    7129         257 :             host_and_vcpu_must_have(avx);
    7130         257 :             get_fpu(X86EMUL_FPU_ymm, &fic);
    7131             :         }
    7132             : 
    7133         333 :         opc = init_prefixes(stub);
    7134         333 :         if ( vex.opcx == vex_none )
    7135          83 :             opc++[0] = 0x38;
    7136         333 :         opc[0] = b;
    7137         333 :         opc[1] = modrm;
    7138         333 :         if ( ea.type == OP_MEM )
    7139             :         {
    7140         244 :             rc = ops->read(ea.mem.seg, ea.mem.off, mmvalp, 16 << vex.l, ctxt);
    7141         244 :             if ( rc != X86EMUL_OKAY )
    7142           8 :                 goto done;
    7143             : 
    7144             :             /* Convert memory operand to (%rAX). */
    7145         236 :             rex_prefix &= ~REX_B;
    7146         236 :             vex.b = 1;
    7147         236 :             opc[1] &= 0x38;
    7148             :         }
    7149         325 :         fic.insn_bytes = PFX_BYTES + 2;
    7150         325 :         opc[2] = 0xc3;
    7151         325 :         if ( vex.opcx == vex_none )
    7152             :         {
    7153             :             /* Cover for extra prefix byte. */
    7154          80 :             --opc;
    7155          80 :             ++fic.insn_bytes;
    7156             :         }
    7157             : 
    7158         325 :         copy_REX_VEX(opc, rex_prefix, vex);
    7159         325 :         emulate_stub("+m" (*mmvalp), "a" (mmvalp));
    7160             : 
    7161         325 :         put_stub(stub);
    7162         325 :         check_xmm_exn(&fic);
    7163             : 
    7164         325 :         state->simd_size = simd_none;
    7165         325 :         dst.type = OP_NONE;
    7166         325 :         break;
    7167             : 
    7168             :     case X86EMUL_OPC_VEX_66(0x0f38, 0x20): /* vpmovsxbw xmm/mem,{x,y}mm */
    7169             :     case X86EMUL_OPC_VEX_66(0x0f38, 0x21): /* vpmovsxbd xmm/mem,{x,y}mm */
    7170             :     case X86EMUL_OPC_VEX_66(0x0f38, 0x22): /* vpmovsxbq xmm/mem,{x,y}mm */
    7171             :     case X86EMUL_OPC_VEX_66(0x0f38, 0x23): /* vpmovsxwd xmm/mem,{x,y}mm */
    7172             :     case X86EMUL_OPC_VEX_66(0x0f38, 0x24): /* vpmovsxwq xmm/mem,{x,y}mm */
    7173             :     case X86EMUL_OPC_VEX_66(0x0f38, 0x25): /* vpmovsxdq xmm/mem,{x,y}mm */
    7174             :     case X86EMUL_OPC_VEX_66(0x0f38, 0x30): /* vpmovzxbw xmm/mem,{x,y}mm */
    7175             :     case X86EMUL_OPC_VEX_66(0x0f38, 0x31): /* vpmovzxbd xmm/mem,{x,y}mm */
    7176             :     case X86EMUL_OPC_VEX_66(0x0f38, 0x32): /* vpmovzxbq xmm/mem,{x,y}mm */
    7177             :     case X86EMUL_OPC_VEX_66(0x0f38, 0x33): /* vpmovzxwd xmm/mem,{x,y}mm */
    7178             :     case X86EMUL_OPC_VEX_66(0x0f38, 0x34): /* vpmovzxwq xmm/mem,{x,y}mm */
    7179             :     case X86EMUL_OPC_VEX_66(0x0f38, 0x35): /* vpmovzxdq xmm/mem,{x,y}mm */
    7180         422 :         op_bytes = 16 >> (pmov_convert_delta[b & 7] - vex.l);
    7181         422 :         goto simd_0f_int;
    7182             : 
    7183             :     case X86EMUL_OPC_66(0x0f38, 0x2a):     /* movntdqa m128,xmm */
    7184             :     case X86EMUL_OPC_VEX_66(0x0f38, 0x2a): /* vmovntdqa mem,{x,y}mm */
    7185          32 :         generate_exception_if(ea.type != OP_MEM, EXC_UD);
    7186             :         /* Ignore the non-temporal hint for now, using movdqa instead. */
    7187          24 :         asm volatile ( "mfence" ::: "memory" );
    7188          24 :         b = 0x6f;
    7189          24 :         if ( vex.opcx == vex_none )
    7190           8 :             vcpu_must_have(sse4_1);
    7191             :         else
    7192             :         {
    7193          16 :             vex.opcx = vex_0f;
    7194          16 :             if ( vex.l )
    7195           8 :                 vcpu_must_have(avx2);
    7196             :         }
    7197          24 :         goto movdqa;
    7198             : 
    7199             :     case X86EMUL_OPC_66(0x0f38, 0x37): /* pcmpgtq xmm/m128,xmm */
    7200           8 :         host_and_vcpu_must_have(sse4_2);
    7201           8 :         goto simd_0f38_common;
    7202             : 
    7203             :     case X86EMUL_OPC(0x0f38, 0xc8):     /* sha1nexte xmm/m128,xmm */
    7204             :     case X86EMUL_OPC(0x0f38, 0xc9):     /* sha1msg1 xmm/m128,xmm */
    7205             :     case X86EMUL_OPC(0x0f38, 0xca):     /* sha1msg2 xmm/m128,xmm */
    7206             :     case X86EMUL_OPC(0x0f38, 0xcb):     /* sha256rnds2 XMM0,xmm/m128,xmm */
    7207             :     case X86EMUL_OPC(0x0f38, 0xcc):     /* sha256msg1 xmm/m128,xmm */
    7208             :     case X86EMUL_OPC(0x0f38, 0xcd):     /* sha256msg2 xmm/m128,xmm */
    7209           8 :         host_and_vcpu_must_have(sha);
    7210           0 :         op_bytes = 16;
    7211           0 :         goto simd_0f38_common;
    7212             : 
    7213             :     case X86EMUL_OPC_66(0x0f38, 0xdb):     /* aesimc xmm/m128,xmm */
    7214             :     case X86EMUL_OPC_VEX_66(0x0f38, 0xdb): /* vaesimc xmm/m128,xmm */
    7215             :     case X86EMUL_OPC_66(0x0f38, 0xdc):     /* aesenc xmm/m128,xmm,xmm */
    7216             :     case X86EMUL_OPC_VEX_66(0x0f38, 0xdc): /* vaesenc xmm/m128,xmm,xmm */
    7217             :     case X86EMUL_OPC_66(0x0f38, 0xdd):     /* aesenclast xmm/m128,xmm,xmm */
    7218             :     case X86EMUL_OPC_VEX_66(0x0f38, 0xdd): /* vaesenclast xmm/m128,xmm,xmm */
    7219             :     case X86EMUL_OPC_66(0x0f38, 0xde):     /* aesdec xmm/m128,xmm,xmm */
    7220             :     case X86EMUL_OPC_VEX_66(0x0f38, 0xde): /* vaesdec xmm/m128,xmm,xmm */
    7221             :     case X86EMUL_OPC_66(0x0f38, 0xdf):     /* aesdeclast xmm/m128,xmm,xmm */
    7222             :     case X86EMUL_OPC_VEX_66(0x0f38, 0xdf): /* vaesdeclast xmm/m128,xmm,xmm */
    7223         127 :         host_and_vcpu_must_have(aesni);
    7224         127 :         if ( vex.opcx == vex_none )
    7225         111 :             goto simd_0f38_common;
    7226          16 :         generate_exception_if(vex.l, EXC_UD);
    7227           8 :         goto simd_0f_avx;
    7228             : 
    7229             :     case X86EMUL_OPC(0x0f38, 0xf0): /* movbe m,r */
    7230             :     case X86EMUL_OPC(0x0f38, 0xf1): /* movbe r,m */
    7231         688 :         vcpu_must_have(movbe);
    7232         688 :         switch ( op_bytes )
    7233             :         {
    7234             :         case 2:
    7235         288 :             asm ( "xchg %h0,%b0" : "=Q" (dst.val)
    7236         288 :                                  : "0" (*(uint32_t *)&src.val) );
    7237         288 :             break;
    7238             :         case 4:
    7239             : #ifdef __x86_64__
    7240         352 :             asm ( "bswap %k0" : "=r" (dst.val)
    7241         352 :                               : "0" (*(uint32_t *)&src.val) );
    7242         352 :             break;
    7243             :         case 8:
    7244             : #endif
    7245          48 :             asm ( "bswap %0" : "=r" (dst.val) : "0" (src.val) );
    7246          48 :             break;
    7247             :         default:
    7248           0 :             ASSERT_UNREACHABLE();
    7249             :         }
    7250         688 :         break;
    7251             : #ifdef HAVE_GAS_SSE4_2
    7252             :     case X86EMUL_OPC_F2(0x0f38, 0xf0): /* crc32 r/m8, r{32,64} */
    7253             :     case X86EMUL_OPC_F2(0x0f38, 0xf1): /* crc32 r/m{16,32,64}, r{32,64} */
    7254             :         host_and_vcpu_must_have(sse4_2);
    7255             :         dst.bytes = rex_prefix & REX_W ? 8 : 4;
    7256             :         switch ( op_bytes )
    7257             :         {
    7258             :         case 1:
    7259             :             asm ( "crc32b %1,%k0" : "+r" (dst.val)
    7260             :                                   : "qm" (*(uint8_t *)&src.val) );
    7261             :             break;
    7262             :         case 2:
    7263             :             asm ( "crc32w %1,%k0" : "+r" (dst.val)
    7264             :                                   : "rm" (*(uint16_t *)&src.val) );
    7265             :             break;
    7266             :         case 4:
    7267             :             asm ( "crc32l %1,%k0" : "+r" (dst.val)
    7268             :                                   : "rm" (*(uint32_t *)&src.val) );
    7269             :             break;
    7270             : # ifdef __x86_64__
    7271             :         case 8:
    7272             :             asm ( "crc32q %1,%0" : "+r" (dst.val) : "rm" (src.val) );
    7273             :             break;
    7274             : # endif
    7275             :         default:
    7276             :             ASSERT_UNREACHABLE();
    7277             :         }
    7278             :         break;
    7279             : #endif
    7280             : 
    7281             :     case X86EMUL_OPC_VEX(0x0f38, 0xf2):    /* andn r/m,r,r */
    7282             :     case X86EMUL_OPC_VEX(0x0f38, 0xf5):    /* bzhi r,r/m,r */
    7283             :     case X86EMUL_OPC_VEX_F3(0x0f38, 0xf5): /* pext r/m,r,r */
    7284             :     case X86EMUL_OPC_VEX_F2(0x0f38, 0xf5): /* pdep r/m,r,r */
    7285             :     case X86EMUL_OPC_VEX(0x0f38, 0xf7):    /* bextr r,r/m,r */
    7286             :     case X86EMUL_OPC_VEX_66(0x0f38, 0xf7): /* shlx r,r/m,r */
    7287             :     case X86EMUL_OPC_VEX_F3(0x0f38, 0xf7): /* sarx r,r/m,r */
    7288             :     case X86EMUL_OPC_VEX_F2(0x0f38, 0xf7): /* shrx r,r/m,r */
    7289             :     {
    7290         144 :         uint8_t *buf = get_stub(stub);
    7291         144 :         typeof(vex) *pvex = container_of(buf + 1, typeof(vex), raw[0]);
    7292             : 
    7293         144 :         if ( b == 0xf5 || vex.pfx )
    7294         104 :             host_and_vcpu_must_have(bmi2);
    7295             :         else
    7296          40 :             host_and_vcpu_must_have(bmi1);
    7297         144 :         generate_exception_if(vex.l, EXC_UD);
    7298             : 
    7299         136 :         buf[0] = 0xc4;
    7300         136 :         *pvex = vex;
    7301         136 :         pvex->b = 1;
    7302         136 :         pvex->r = 1;
    7303         136 :         pvex->reg = 0xf; /* rAX */
    7304         136 :         buf[3] = b;
    7305         136 :         buf[4] = 0x09; /* reg=rCX r/m=(%rCX) */
    7306         136 :         buf[5] = 0xc3;
    7307             : 
    7308         136 :         src.reg = decode_vex_gpr(vex.reg, &_regs, ctxt);
    7309         136 :         emulate_stub([dst] "=&c" (dst.val), "[dst]" (&src.val), "a" (*src.reg));
    7310             : 
    7311         136 :         put_stub(stub);
    7312         136 :         break;
    7313             :     }
    7314             : 
    7315             :     case X86EMUL_OPC_VEX(0x0f38, 0xf3): /* Grp 17 */
    7316             :     {
    7317         168 :         uint8_t *buf = get_stub(stub);
    7318         168 :         typeof(vex) *pvex = container_of(buf + 1, typeof(vex), raw[0]);
    7319             : 
    7320         168 :         switch ( modrm_reg & 7 )
    7321             :         {
    7322             :         case 1: /* blsr r,r/m */
    7323             :         case 2: /* blsmsk r,r/m */
    7324             :         case 3: /* blsi r,r/m */
    7325         160 :             host_and_vcpu_must_have(bmi1);
    7326         160 :             break;
    7327             :         default:
    7328           8 :             goto cannot_emulate;
    7329             :         }
    7330             : 
    7331         160 :         generate_exception_if(vex.l, EXC_UD);
    7332             : 
    7333         152 :         buf[0] = 0xc4;
    7334         152 :         *pvex = vex;
    7335         152 :         pvex->b = 1;
    7336         152 :         pvex->r = 1;
    7337         152 :         pvex->reg = 0xf; /* rAX */
    7338         152 :         buf[3] = b;
    7339         152 :         buf[4] = (modrm & 0x38) | 0x01; /* r/m=(%rCX) */
    7340         152 :         buf[5] = 0xc3;
    7341             : 
    7342         152 :         dst.reg = decode_vex_gpr(vex.reg, &_regs, ctxt);
    7343         152 :         emulate_stub("=&a" (dst.val), "c" (&src.val));
    7344             : 
    7345         152 :         put_stub(stub);
    7346         152 :         break;
    7347             :     }
    7348             : 
    7349             :     case X86EMUL_OPC_66(0x0f38, 0xf6): /* adcx r/m,r */
    7350             :     case X86EMUL_OPC_F3(0x0f38, 0xf6): /* adox r/m,r */
    7351             :     {
    7352         256 :         unsigned int mask = rep_prefix() ? X86_EFLAGS_OF : X86_EFLAGS_CF;
    7353         256 :         unsigned int aux = _regs.eflags & mask ? ~0 : 0;
    7354             :         bool carry;
    7355             : 
    7356         256 :         vcpu_must_have(adx);
    7357             : #ifdef __x86_64__
    7358         256 :         if ( op_bytes == 8 )
    7359           0 :             asm ( "add %[aux],%[aux]\n\t"
    7360             :                   "adc %[src],%[dst]\n\t"
    7361             :                   ASM_FLAG_OUT(, "setc %[carry]")
    7362           0 :                   : [dst] "+r" (dst.val),
    7363             :                     [carry] ASM_FLAG_OUT("=@ccc", "=qm") (carry),
    7364             :                     [aux] "+r" (aux)
    7365           0 :                   : [src] "rm" (src.val) );
    7366             :         else
    7367             : #endif
    7368         256 :             asm ( "add %[aux],%[aux]\n\t"
    7369             :                   "adc %k[src],%k[dst]\n\t"
    7370             :                   ASM_FLAG_OUT(, "setc %[carry]")
    7371         256 :                   : [dst] "+r" (dst.val),
    7372             :                     [carry] ASM_FLAG_OUT("=@ccc", "=qm") (carry),
    7373             :                     [aux] "+r" (aux)
    7374         256 :                   : [src] "rm" (src.val) );
    7375         256 :         if ( carry )
    7376          55 :             _regs.eflags |= mask;
    7377             :         else
    7378         201 :             _regs.eflags &= ~mask;
    7379         256 :         break;
    7380             :     }
    7381             : 
    7382             :     case X86EMUL_OPC_VEX_F2(0x0f38, 0xf6): /* mulx r/m,r,r */
    7383         150 :         vcpu_must_have(bmi2);
    7384         150 :         generate_exception_if(vex.l, EXC_UD);
    7385         142 :         ea.reg = decode_vex_gpr(vex.reg, &_regs, ctxt);
    7386         142 :         if ( mode_64bit() && vex.w )
    7387          80 :             asm ( "mulq %3" : "=a" (*ea.reg), "=d" (dst.val)
    7388          40 :                             : "0" (src.val), "rm" (_regs.r(dx)) );
    7389             :         else
    7390         204 :             asm ( "mull %3" : "=a" (*ea.reg), "=d" (dst.val)
    7391         102 :                             : "0" ((uint32_t)src.val), "rm" (_regs.edx) );
    7392         142 :         break;
    7393             : 
    7394             :     case X86EMUL_OPC(0x0f3a, 0x0f):    /* palignr $imm8,mm/m64,mm */
    7395             :     case X86EMUL_OPC_66(0x0f3a, 0x0f): /* palignr $imm8,xmm/m128,xmm */
    7396          64 :         host_and_vcpu_must_have(ssse3);
    7397          64 :         if ( vex.pfx )
    7398             :         {
    7399             :     simd_0f3a_common:
    7400         215 :             get_fpu(X86EMUL_FPU_xmm, &fic);
    7401             :         }
    7402             :         else
    7403             :         {
    7404          39 :             host_and_vcpu_must_have(mmx);
    7405          39 :             get_fpu(X86EMUL_FPU_mmx, &fic);
    7406             :         }
    7407         246 :         opc = init_prefixes(stub);
    7408         246 :         opc[0] = 0x3a;
    7409         246 :         opc[1] = b;
    7410         246 :         opc[2] = modrm;
    7411         246 :         if ( ea.type == OP_MEM )
    7412             :         {
    7413             :             /* Convert memory operand to (%rAX). */
    7414         238 :             rex_prefix &= ~REX_B;
    7415         238 :             vex.b = 1;
    7416         238 :             opc[2] &= 0x38;
    7417             :         }
    7418         246 :         opc[3] = imm1;
    7419         246 :         fic.insn_bytes = PFX_BYTES + 4;
    7420         246 :         break;
    7421             : 
    7422             :     case X86EMUL_OPC_66(0x0f3a, 0x08): /* roundps $imm8,xmm/m128,xmm */
    7423             :     case X86EMUL_OPC_66(0x0f3a, 0x09): /* roundpd $imm8,xmm/m128,xmm */
    7424             :     case X86EMUL_OPC_66(0x0f3a, 0x0a): /* roundss $imm8,xmm/m128,xmm */
    7425             :     case X86EMUL_OPC_66(0x0f3a, 0x0b): /* roundsd $imm8,xmm/m128,xmm */
    7426             :     case X86EMUL_OPC_66(0x0f3a, 0x0c): /* blendps $imm8,xmm/m128,xmm */
    7427             :     case X86EMUL_OPC_66(0x0f3a, 0x0d): /* blendpd $imm8,xmm/m128,xmm */
    7428             :     case X86EMUL_OPC_66(0x0f3a, 0x0e): /* pblendw $imm8,xmm/m128,xmm */
    7429             :     case X86EMUL_OPC_66(0x0f3a, 0x40): /* dpps $imm8,xmm/m128,xmm */
    7430             :     case X86EMUL_OPC_66(0x0f3a, 0x41): /* dppd $imm8,xmm/m128,xmm */
    7431             :     case X86EMUL_OPC_66(0x0f3a, 0x42): /* mpsadbw $imm8,xmm/m128,xmm */
    7432           8 :         host_and_vcpu_must_have(sse4_1);
    7433           8 :         goto simd_0f3a_common;
    7434             : 
    7435             :     case X86EMUL_OPC_66(0x0f3a, 0x14): /* pextrb $imm8,xmm,r/m */
    7436             :     case X86EMUL_OPC_66(0x0f3a, 0x15): /* pextrw $imm8,xmm,r/m */
    7437             :     case X86EMUL_OPC_66(0x0f3a, 0x16): /* pextr{d,q} $imm8,xmm,r/m */
    7438             :     case X86EMUL_OPC_66(0x0f3a, 0x17): /* extractps $imm8,xmm,r/m */
    7439          21 :         host_and_vcpu_must_have(sse4_1);
    7440          21 :         get_fpu(X86EMUL_FPU_xmm, &fic);
    7441             : 
    7442          15 :         opc = init_prefixes(stub);
    7443          15 :         opc++[0] = 0x3a;
    7444             :     pextr:
    7445          31 :         opc[0] = b;
    7446             :         /* Convert memory/GPR operand to (%rAX). */
    7447          31 :         rex_prefix &= ~REX_B;
    7448          31 :         vex.b = 1;
    7449          31 :         if ( !mode_64bit() )
    7450          23 :             vex.w = 0;
    7451          31 :         opc[1] = modrm & 0x38;
    7452          31 :         opc[2] = imm1;
    7453          31 :         fic.insn_bytes = PFX_BYTES + 3;
    7454          31 :         opc[3] = 0xc3;
    7455          31 :         if ( vex.opcx == vex_none )
    7456             :         {
    7457             :             /* Cover for extra prefix byte. */
    7458          15 :             --opc;
    7459          15 :             ++fic.insn_bytes;
    7460             :         }
    7461             : 
    7462          31 :         copy_REX_VEX(opc, rex_prefix, vex);
    7463          31 :         invoke_stub("", "", "=m" (dst.val) : "a" (&dst.val));
    7464             : 
    7465          31 :         put_stub(stub);
    7466          31 :         check_xmm_exn(&fic);
    7467             : 
    7468          31 :         ASSERT(!state->simd_size);
    7469          31 :         dst.bytes = dst.type == OP_REG || b == 0x17 ? 4 : 1 << (b & 3);
    7470          31 :         if ( b == 0x16 && (rex_prefix & REX_W) )
    7471           0 :             dst.bytes = 8;
    7472          31 :         break;
    7473             : 
    7474             :     case X86EMUL_OPC_VEX_66(0x0f3a, 0x14): /* vpextrb $imm8,xmm,r/m */
    7475             :     case X86EMUL_OPC_VEX_66(0x0f3a, 0x15): /* vpextrw $imm8,xmm,r/m */
    7476             :     case X86EMUL_OPC_VEX_66(0x0f3a, 0x16): /* vpextr{d,q} $imm8,xmm,r/m */
    7477             :     case X86EMUL_OPC_VEX_66(0x0f3a, 0x17): /* vextractps $imm8,xmm,r/m */
    7478          24 :         generate_exception_if(vex.l || vex.reg != 0xf, EXC_UD);
    7479          16 :         host_and_vcpu_must_have(avx);
    7480          16 :         get_fpu(X86EMUL_FPU_ymm, &fic);
    7481          16 :         opc = init_prefixes(stub);
    7482          16 :         goto pextr;
    7483             : 
    7484             :     case X86EMUL_OPC_66(0x0f3a, 0x20): /* pinsrb $imm8,r32/m8,xmm */
    7485             :     case X86EMUL_OPC_66(0x0f3a, 0x22): /* pinsr{d,q} $imm8,r/m,xmm */
    7486           8 :         host_and_vcpu_must_have(sse4_1);
    7487           8 :         get_fpu(X86EMUL_FPU_xmm, &fic);
    7488           8 :         memcpy(mmvalp, &src.val, op_bytes);
    7489           8 :         ea.type = OP_MEM;
    7490           8 :         op_bytes = src.bytes;
    7491           8 :         d = SrcMem16; /* Fake for the common SIMD code below. */
    7492           8 :         state->simd_size = simd_other;
    7493           8 :         goto simd_0f3a_common;
    7494             : 
    7495             :     case X86EMUL_OPC_VEX_66(0x0f3a, 0x20): /* vpinsrb $imm8,r32/m8,xmm,xmm */
    7496             :     case X86EMUL_OPC_VEX_66(0x0f3a, 0x22): /* vpinsr{d,q} $imm8,r/m,xmm,xmm */
    7497          32 :         generate_exception_if(vex.l, EXC_UD);
    7498          24 :         memcpy(mmvalp, &src.val, op_bytes);
    7499          24 :         ea.type = OP_MEM;
    7500          24 :         op_bytes = src.bytes;
    7501          24 :         d = SrcMem16; /* Fake for the common SIMD code below. */
    7502          24 :         state->simd_size = simd_other;
    7503          24 :         goto simd_0f_int_imm8;
    7504             : 
    7505             :     case X86EMUL_OPC_66(0x0f3a, 0x21): /* insertps $imm8,xmm/m32,xmm */
    7506           8 :         host_and_vcpu_must_have(sse4_1);
    7507           8 :         op_bytes = 4;
    7508           8 :         goto simd_0f3a_common;
    7509             : 
    7510             :     case X86EMUL_OPC_VEX_66(0x0f3a, 0x21): /* vinsertps $imm8,xmm/m128,xmm,xmm */
    7511          16 :         op_bytes = 4;
    7512             :         /* fall through */
    7513             :     case X86EMUL_OPC_VEX_66(0x0f3a, 0x41): /* vdppd $imm8,{x,y}mm/mem,{x,y}mm,{x,y}mm */
    7514          24 :         generate_exception_if(vex.l, EXC_UD);
    7515          16 :         goto simd_0f_imm8_avx;
    7516             : 
    7517             :     case X86EMUL_OPC_66(0x0f3a, 0x44):     /* pclmulqdq $imm8,xmm/m128,xmm */
    7518             :     case X86EMUL_OPC_VEX_66(0x0f3a, 0x44): /* vpclmulqdq $imm8,xmm/m128,xmm,xmm */
    7519          24 :         host_and_vcpu_must_have(pclmulqdq);
    7520          24 :         if ( vex.opcx == vex_none )
    7521           8 :             goto simd_0f3a_common;
    7522          16 :         generate_exception_if(vex.l, EXC_UD);
    7523           8 :         goto simd_0f_imm8_avx;
    7524             : 
    7525             :     case X86EMUL_OPC_VEX_66(0x0f3a, 0x4a): /* vblendvps {x,y}mm,{x,y}mm/mem,{x,y}mm,{x,y}mm */
    7526             :     case X86EMUL_OPC_VEX_66(0x0f3a, 0x4b): /* vblendvpd {x,y}mm,{x,y}mm/mem,{x,y}mm,{x,y}mm */
    7527          16 :         generate_exception_if(vex.w, EXC_UD);
    7528           8 :         goto simd_0f_imm8_avx;
    7529             : 
    7530             :     case X86EMUL_OPC_VEX_66(0x0f3a, 0x4c): /* vpblendvb {x,y}mm,{x,y}mm/mem,{x,y}mm,{x,y}mm */
    7531          16 :         generate_exception_if(vex.w, EXC_UD);
    7532           8 :         goto simd_0f_int_imm8;
    7533             : 
    7534             :     case X86EMUL_OPC_66(0x0f3a, 0x60):     /* pcmpestrm $imm8,xmm/m128,xmm */
    7535             :     case X86EMUL_OPC_VEX_66(0x0f3a, 0x60): /* vpcmpestrm $imm8,xmm/m128,xmm */
    7536             :     case X86EMUL_OPC_66(0x0f3a, 0x61):     /* pcmpestri $imm8,xmm/m128,xmm */
    7537             :     case X86EMUL_OPC_VEX_66(0x0f3a, 0x61): /* vpcmpestri $imm8,xmm/m128,xmm */
    7538             :     case X86EMUL_OPC_66(0x0f3a, 0x62):     /* pcmpistrm $imm8,xmm/m128,xmm */
    7539             :     case X86EMUL_OPC_VEX_66(0x0f3a, 0x62): /* vpcmpistrm $imm8,xmm/m128,xmm */
    7540             :     case X86EMUL_OPC_66(0x0f3a, 0x63):     /* pcmpistri $imm8,xmm/m128,xmm */
    7541             :     case X86EMUL_OPC_VEX_66(0x0f3a, 0x63): /* vpcmpistri $imm8,xmm/m128,xmm */
    7542         133 :         if ( vex.opcx == vex_none )
    7543             :         {
    7544          32 :             host_and_vcpu_must_have(sse4_2);
    7545          32 :             get_fpu(X86EMUL_FPU_xmm, &fic);
    7546             :         }
    7547             :         else
    7548             :         {
    7549         101 :             generate_exception_if(vex.l || vex.reg != 0xf, EXC_UD);
    7550          93 :             host_and_vcpu_must_have(avx);
    7551          93 :             get_fpu(X86EMUL_FPU_ymm, &fic);
    7552             :         }
    7553             : 
    7554         125 :         opc = init_prefixes(stub);
    7555         125 :         if ( vex.opcx == vex_none )
    7556          32 :             opc++[0] = 0x3a;
    7557         125 :         opc[0] = b;
    7558         125 :         opc[1] = modrm;
    7559         125 :         if ( ea.type == OP_MEM )
    7560             :         {
    7561             :             /* Convert memory operand to (%rDI). */
    7562          31 :             rex_prefix &= ~REX_B;
    7563          31 :             vex.b = 1;
    7564          31 :             opc[1] &= 0x3f;
    7565          31 :             opc[1] |= 0x07;
    7566             : 
    7567          31 :             rc = ops->read(ea.mem.seg, ea.mem.off, mmvalp, 16, ctxt);
    7568          31 :             if ( rc != X86EMUL_OKAY )
    7569          16 :                 goto done;
    7570             :         }
    7571         109 :         opc[2] = imm1;
    7572         109 :         fic.insn_bytes = PFX_BYTES + 3;
    7573         109 :         opc[3] = 0xc3;
    7574         109 :         if ( vex.opcx == vex_none )
    7575             :         {
    7576             :             /* Cover for extra prefix byte. */
    7577          24 :             --opc;
    7578          24 :             ++fic.insn_bytes;
    7579             :         }
    7580             : 
    7581         109 :         copy_REX_VEX(opc, rex_prefix, vex);
    7582             : #ifdef __x86_64__
    7583         109 :         if ( rex_prefix & REX_W )
    7584          14 :             emulate_stub("=c" (dst.val), "m" (*mmvalp), "D" (mmvalp),
    7585             :                          "a" (_regs.rax), "d" (_regs.rdx));
    7586             :         else
    7587             : #endif
    7588          95 :             emulate_stub("=c" (dst.val), "m" (*mmvalp), "D" (mmvalp),
    7589             :                          "a" (_regs.eax), "d" (_regs.edx));
    7590             : 
    7591         109 :         state->simd_size = simd_none;
    7592         109 :         if ( b & 1 )
    7593          16 :             _regs.r(cx) = (uint32_t)dst.val;
    7594         109 :         dst.type = OP_NONE;
    7595         109 :         break;
    7596             : 
    7597             :     case X86EMUL_OPC(0x0f3a, 0xcc):     /* sha1rnds4 $imm8,xmm/m128,xmm */
    7598           8 :         host_and_vcpu_must_have(sha);
    7599           0 :         op_bytes = 16;
    7600           0 :         goto simd_0f3a_common;
    7601             : 
    7602             :     case X86EMUL_OPC_66(0x0f3a, 0xdf):     /* aeskeygenassist $imm8,xmm/m128,xmm */
    7603             :     case X86EMUL_OPC_VEX_66(0x0f3a, 0xdf): /* vaeskeygenassist $imm8,xmm/m128,xmm */
    7604         174 :         host_and_vcpu_must_have(aesni);
    7605         174 :         if ( vex.opcx == vex_none )
    7606         158 :             goto simd_0f3a_common;
    7607          16 :         generate_exception_if(vex.l, EXC_UD);
    7608           8 :         goto simd_0f_imm8_avx;
    7609             : 
    7610             :     case X86EMUL_OPC_VEX_F2(0x0f3a, 0xf0): /* rorx imm,r/m,r */
    7611           0 :         vcpu_must_have(bmi2);
    7612           0 :         generate_exception_if(vex.l || vex.reg != 0xf, EXC_UD);
    7613           0 :         if ( ea.type == OP_REG )
    7614           0 :             src.val = *ea.reg;
    7615           0 :         else if ( (rc = read_ulong(ea.mem.seg, ea.mem.off, &src.val, op_bytes,
    7616             :                                    ctxt, ops)) != X86EMUL_OKAY )
    7617           0 :             goto done;
    7618           0 :         if ( mode_64bit() && vex.w )
    7619           0 :             asm ( "rorq %b1,%0" : "=g" (dst.val) : "c" (imm1), "0" (src.val) );
    7620             :         else
    7621           0 :             asm ( "rorl %b1,%k0" : "=g" (dst.val) : "c" (imm1), "0" (src.val) );
    7622           0 :         break;
    7623             : 
    7624             :     case X86EMUL_OPC_XOP(09, 0x01): /* XOP Grp1 */
    7625          16 :         switch ( modrm_reg & 7 )
    7626             :         {
    7627             :         case 1: /* blcfill r/m,r */
    7628             :         case 2: /* blsfill r/m,r */
    7629             :         case 3: /* blcs r/m,r */
    7630             :         case 4: /* tzmsk r/m,r */
    7631             :         case 5: /* blcic r/m,r */
    7632             :         case 6: /* blsic r/m,r */
    7633             :         case 7: /* t1mskc r/m,r */
    7634           8 :             host_and_vcpu_must_have(tbm);
    7635           0 :             break;
    7636             :         default:
    7637           8 :             goto cannot_emulate;
    7638             :         }
    7639             : 
    7640             :     xop_09_rm_rv:
    7641             :     {
    7642           0 :         uint8_t *buf = get_stub(stub);
    7643           0 :         typeof(vex) *pxop = container_of(buf + 1, typeof(vex), raw[0]);
    7644             : 
    7645           0 :         generate_exception_if(vex.l, EXC_UD);
    7646             : 
    7647           0 :         buf[0] = 0x8f;
    7648           0 :         *pxop = vex;
    7649           0 :         pxop->b = 1;
    7650           0 :         pxop->r = 1;
    7651           0 :         pxop->reg = 0xf; /* rAX */
    7652           0 :         buf[3] = b;
    7653           0 :         buf[4] = (modrm & 0x38) | 0x01; /* r/m=(%rCX) */
    7654           0 :         buf[5] = 0xc3;
    7655             : 
    7656           0 :         dst.reg = decode_vex_gpr(vex.reg, &_regs, ctxt);
    7657           0 :         emulate_stub([dst] "=&a" (dst.val), "c" (&src.val));
    7658             : 
    7659           0 :         put_stub(stub);
    7660           0 :         break;
    7661             :     }
    7662             : 
    7663             :     case X86EMUL_OPC_XOP(09, 0x02): /* XOP Grp2 */
    7664          16 :         switch ( modrm_reg & 7 )
    7665             :         {
    7666             :         case 1: /* blcmsk r/m,r */
    7667             :         case 6: /* blci r/m,r */
    7668           8 :             host_and_vcpu_must_have(tbm);
    7669           0 :             goto xop_09_rm_rv;
    7670             :         }
    7671           8 :         goto cannot_emulate;
    7672             : 
    7673             :     case X86EMUL_OPC_XOP(0a, 0x10): /* bextr imm,r/m,r */
    7674             :     {
    7675           0 :         uint8_t *buf = get_stub(stub);
    7676           0 :         typeof(vex) *pxop = container_of(buf + 1, typeof(vex), raw[0]);
    7677             : 
    7678           0 :         host_and_vcpu_must_have(tbm);
    7679           0 :         generate_exception_if(vex.l || vex.reg != 0xf, EXC_UD);
    7680             : 
    7681           0 :         if ( ea.type == OP_REG )
    7682           0 :             src.val = *ea.reg;
    7683           0 :         else if ( (rc = read_ulong(ea.mem.seg, ea.mem.off, &src.val, op_bytes,
    7684             :                                    ctxt, ops)) != X86EMUL_OKAY )
    7685           0 :             goto done;
    7686             : 
    7687           0 :         buf[0] = 0x8f;
    7688           0 :         *pxop = vex;
    7689           0 :         pxop->b = 1;
    7690           0 :         pxop->r = 1;
    7691           0 :         buf[3] = b;
    7692           0 :         buf[4] = 0x09; /* reg=rCX r/m=(%rCX) */
    7693           0 :         *(uint32_t *)(buf + 5) = imm1;
    7694           0 :         buf[9] = 0xc3;
    7695             : 
    7696           0 :         emulate_stub([dst] "=&c" (dst.val), "[dst]" (&src.val));
    7697             : 
    7698           0 :         put_stub(stub);
    7699           0 :         break;
    7700             :     }
    7701             : 
    7702             :     default:
    7703             :     cannot_emulate:
    7704         464 :         rc = X86EMUL_UNHANDLEABLE;
    7705         464 :         goto done;
    7706             :     }
    7707             : 
    7708     3854668 :     if ( state->simd_size )
    7709             :     {
    7710        8539 :         generate_exception_if(!op_bytes, EXC_UD);
    7711        8539 :         generate_exception_if(vex.opcx && (d & TwoOp) && vex.reg != 0xf,
    7712             :                               EXC_UD);
    7713             : 
    7714        8507 :         if ( !opc )
    7715           0 :             BUG();
    7716        8507 :         opc[fic.insn_bytes - PFX_BYTES] = 0xc3;
    7717        8507 :         copy_REX_VEX(opc, rex_prefix, vex);
    7718             : 
    7719        8507 :         if ( ea.type == OP_MEM )
    7720             :         {
    7721        7765 :             uint32_t mxcsr = 0;
    7722             : 
    7723       13391 :             if ( op_bytes < 16 ||
    7724             :                  (vex.opcx
    7725        2813 :                   ? /* vmov{{a,nt}p{s,d},dqa,ntdq} are exceptions. */
    7726        1069 :                     ext != ext_0f ||
    7727         707 :                     ((b | 1) != 0x29 && b != 0x2b &&
    7728         241 :                      ((b | 0x10) != 0x7f || vex.pfx != vex_66) &&
    7729             :                      b != 0xe7)
    7730             :                   : /* movup{s,d}, {,mask}movdqu, and lddqu are exceptions. */
    7731        4639 :                     ext == ext_0f &&
    7732        4320 :                     ((b | 1) == 0x11 ||
    7733        2144 :                      ((b | 0x10) == 0x7f && vex.pfx == vex_f3) ||
    7734        2128 :                      b == 0xf7 || b == 0xf0)) )
    7735        5422 :                 mxcsr = MXCSR_MM;
    7736        2343 :             else if ( vcpu_has_misalignsse() )
    7737           0 :                 asm ( "stmxcsr %0" : "=m" (mxcsr) );
    7738        8244 :             generate_exception_if(!(mxcsr & MXCSR_MM) &&
    7739             :                                   !is_aligned(ea.mem.seg, ea.mem.off, op_bytes,
    7740             :                                               ctxt, ops),
    7741             :                                   EXC_GP, 0);
    7742        7682 :             switch ( d & SrcMask )
    7743             :             {
    7744             :             case SrcMem:
    7745        5772 :                 rc = ops->read(ea.mem.seg, ea.mem.off, mmvalp, op_bytes, ctxt);
    7746        5772 :                 if ( rc != X86EMUL_OKAY )
    7747         367 :                     goto done;
    7748             :                 /* fall through */
    7749             :             case SrcMem16:
    7750        6177 :                 dst.type = OP_NONE;
    7751        6177 :                 break;
    7752             :             default:
    7753        1138 :                 if ( (d & DstMask) != DstMem )
    7754             :                 {
    7755           0 :                     ASSERT_UNREACHABLE();
    7756             :                     goto cannot_emulate;
    7757             :                 }
    7758        1138 :                 break;
    7759             :             }
    7760        7315 :             if ( (d & DstMask) == DstMem )
    7761             :             {
    7762        1151 :                 fail_if(!ops->write); /* Check before running the stub. */
    7763        1122 :                 ASSERT(d & Mov);
    7764        1122 :                 dst.type = OP_MEM;
    7765        1122 :                 dst.bytes = op_bytes;
    7766        1122 :                 dst.mem = ea.mem;
    7767             :             }
    7768             :         }
    7769             :         else
    7770         742 :             dst.type = OP_NONE;
    7771             : 
    7772             :         /* {,v}maskmov{q,dqu}, as an exception, uses rDI. */
    7773        8028 :         if ( likely((ctxt->opcode & ~(X86EMUL_OPC_PFX_MASK |
    7774             :                                       X86EMUL_OPC_ENCODING_MASK)) !=
    7775             :                     X86EMUL_OPC(0x0f, 0xf7)) )
    7776        8021 :             invoke_stub("", "", "+m" (*mmvalp), "+m" (fic.exn_raised)
    7777             :                                 : "a" (mmvalp));
    7778             :         else
    7779           7 :             invoke_stub("", "", "+m" (*mmvalp) : "D" (mmvalp));
    7780             : 
    7781        8028 :         put_stub(stub);
    7782        8028 :         check_xmm_exn(&fic);
    7783             :     }
    7784             : 
    7785     3854157 :     switch ( dst.type )
    7786             :     {
    7787             :     case OP_REG:
    7788             :         /* The 4-byte case *is* correct: in 64-bit mode we zero-extend. */
    7789     1444014 :         switch ( dst.bytes )
    7790             :         {
    7791      330124 :         case 1: *(uint8_t  *)dst.reg = (uint8_t)dst.val; break;
    7792      605909 :         case 2: *(uint16_t *)dst.reg = (uint16_t)dst.val; break;
    7793      418222 :         case 4: *dst.reg = (uint32_t)dst.val; break; /* 64b: zero-ext */
    7794       89759 :         case 8: *dst.reg = dst.val; break;
    7795             :         }
    7796     1444014 :         break;
    7797             :     case OP_MEM:
    7798     1708961 :         if ( !(d & Mov) && (dst.orig_val == dst.val) &&
    7799      217519 :              !ctxt->force_writeback )
    7800             :             /* nothing to do */;
    7801     1273923 :         else if ( lock_prefix )
    7802             :         {
    7803         372 :             fail_if(!ops->cmpxchg);
    7804         364 :             rc = ops->cmpxchg(
    7805             :                 dst.mem.seg, dst.mem.off, &dst.orig_val,
    7806             :                 &dst.val, dst.bytes, ctxt);
    7807             :         }
    7808             :         else
    7809             :         {
    7810     1273551 :             fail_if(!ops->write);
    7811     2546266 :             rc = ops->write(dst.mem.seg, dst.mem.off,
    7812     1273133 :                             !state->simd_size ? &dst.val : (void *)mmvalp,
    7813             :                             dst.bytes, ctxt);
    7814     1273133 :             if ( sfence )
    7815        1023 :                 asm volatile ( "sfence" ::: "memory" );
    7816             :         }
    7817     1491016 :         if ( rc != 0 )
    7818        2094 :             goto done;
    7819             :     default:
    7820     2407623 :         break;
    7821             :     }
    7822             : 
    7823             :  complete_insn: /* Commit shadow register state. */
    7824     3856253 :     put_fpu(&fic, false, state, ctxt, ops);
    7825             : 
    7826             :     /* Zero the upper 32 bits of %rip if not in 64-bit mode. */
    7827     3856253 :     if ( !mode_64bit() )
    7828     2780390 :         _regs.r(ip) = _regs.eip;
    7829             : 
    7830             :     /* Should a singlestep #DB be raised? */
    7831     3856253 :     if ( rc == X86EMUL_OKAY && singlestep && !ctxt->retire.mov_ss )
    7832             :     {
    7833       89493 :         ctxt->retire.singlestep = true;
    7834       89493 :         ctxt->retire.sti = false;
    7835             :     }
    7836             : 
    7837     3856253 :     if ( rc != X86EMUL_DONE )
    7838     3856253 :         *ctxt->regs = _regs;
    7839             :     else
    7840             :     {
    7841           0 :         ctxt->regs->r(ip) = _regs.r(ip);
    7842           0 :         rc = X86EMUL_OKAY;
    7843             :     }
    7844             : 
    7845     3856253 :     ctxt->regs->eflags &= ~X86_EFLAGS_RF;
    7846             : 
    7847             :  done:
    7848     3874643 :     put_fpu(&fic, fic.insn_bytes > 0 && dst.type == OP_MEM, state, ctxt, ops);
    7849     3874643 :     put_stub(stub);
    7850     3874643 :     return rc;
    7851             : #undef state
    7852             : }
    7853             : 
    7854             : #undef op_bytes
    7855             : #undef ad_bytes
    7856             : #undef ext
    7857             : #undef modrm
    7858             : #undef modrm_mod
    7859             : #undef modrm_reg
    7860             : #undef modrm_rm
    7861             : #undef rex_prefix
    7862             : #undef lock_prefix
    7863             : #undef vex
    7864             : #undef ea
    7865             : 
    7866           0 : static void __init __maybe_unused build_assertions(void)
    7867             : {
    7868             :     /* Check the values against SReg3 encoding in opcode/ModRM bytes. */
    7869             :     BUILD_BUG_ON(x86_seg_es != 0);
    7870             :     BUILD_BUG_ON(x86_seg_cs != 1);
    7871             :     BUILD_BUG_ON(x86_seg_ss != 2);
    7872             :     BUILD_BUG_ON(x86_seg_ds != 3);
    7873             :     BUILD_BUG_ON(x86_seg_fs != 4);
    7874             :     BUILD_BUG_ON(x86_seg_gs != 5);
    7875             : 
    7876             :     /*
    7877             :      * Check X86_EVENTTYPE_* against VMCB EVENTINJ and VMCS INTR_INFO type
    7878             :      * fields.
    7879             :      */
    7880             :     BUILD_BUG_ON(X86_EVENTTYPE_EXT_INTR != 0);
    7881             :     BUILD_BUG_ON(X86_EVENTTYPE_NMI != 2);
    7882             :     BUILD_BUG_ON(X86_EVENTTYPE_HW_EXCEPTION != 3);
    7883             :     BUILD_BUG_ON(X86_EVENTTYPE_SW_INTERRUPT != 4);
    7884             :     BUILD_BUG_ON(X86_EVENTTYPE_PRI_SW_EXCEPTION != 5);
    7885             :     BUILD_BUG_ON(X86_EVENTTYPE_SW_EXCEPTION != 6);
    7886           0 : }
    7887             : 
    7888             : #ifndef NDEBUG
    7889             : /*
    7890             :  * In debug builds, wrap x86_emulate() with some assertions about its expected
    7891             :  * behaviour.
    7892             :  */
    7893     3899535 : int x86_emulate_wrapper(
    7894             :     struct x86_emulate_ctxt *ctxt,
    7895             :     const struct x86_emulate_ops *ops)
    7896             : {
    7897     3899535 :     unsigned long orig_ip = ctxt->regs->r(ip);
    7898             :     int rc;
    7899             : 
    7900     3899535 :     if ( mode_64bit() )
    7901     1089241 :         ASSERT(ctxt->lma);
    7902             : 
    7903     3899535 :     rc = x86_emulate(ctxt, ops);
    7904             : 
    7905             :     /* Retire flags should only be set for successful instruction emulation. */
    7906     3899535 :     if ( rc != X86EMUL_OKAY )
    7907       43722 :         ASSERT(ctxt->retire.raw == 0);
    7908             : 
    7909             :     /* All cases returning X86EMUL_EXCEPTION should have fault semantics. */
    7910     3899535 :     if ( rc == X86EMUL_EXCEPTION )
    7911       30625 :         ASSERT(ctxt->regs->r(ip) == orig_ip);
    7912             : 
    7913             :     /*
    7914             :      * An event being pending should exactly match returning
    7915             :      * X86EMUL_EXCEPTION.  (If this trips, the chances are a codepath has
    7916             :      * called hvm_inject_hw_exception() rather than using
    7917             :      * x86_emul_hw_exception().)
    7918             :      */
    7919     3899535 :     ASSERT(ctxt->event_pending == (rc == X86EMUL_EXCEPTION));
    7920             : 
    7921     3899535 :     return rc;
    7922             : }
    7923             : #endif
    7924             : 
    7925             : #ifdef __XEN__
    7926             : 
    7927             : #include <xen/err.h>
    7928             : 
    7929             : struct x86_emulate_state *
    7930             : x86_decode_insn(
    7931             :     struct x86_emulate_ctxt *ctxt,
    7932             :     int (*insn_fetch)(
    7933             :         enum x86_segment seg, unsigned long offset,
    7934             :         void *p_data, unsigned int bytes,
    7935             :         struct x86_emulate_ctxt *ctxt))
    7936             : {
    7937             :     static DEFINE_PER_CPU(struct x86_emulate_state, state);
    7938             :     struct x86_emulate_state *state = &this_cpu(state);
    7939             :     const struct x86_emulate_ops ops = {
    7940             :         .insn_fetch = insn_fetch,
    7941             :         .read       = x86emul_unhandleable_rw,
    7942             :     };
    7943             :     int rc = x86_decode(state, ctxt, &ops);
    7944             : 
    7945             :     if ( unlikely(rc != X86EMUL_OKAY) )
    7946             :         return ERR_PTR(-rc);
    7947             : 
    7948             : #ifndef NDEBUG
    7949             :     /*
    7950             :      * While we avoid memory allocation (by use of per-CPU data) above,
    7951             :      * nevertheless make sure callers properly release the state structure
    7952             :      * for forward compatibility.
    7953             :      */
    7954             :     if ( state->caller )
    7955             :     {
    7956             :         printk(XENLOG_ERR "Unreleased emulation state acquired by %ps\n",
    7957             :                state->caller);
    7958             :         dump_execution_state();
    7959             :     }
    7960             :     state->caller = __builtin_return_address(0);
    7961             : #endif
    7962             : 
    7963             :     return state;
    7964             : }
    7965             : 
    7966             : static inline void check_state(const struct x86_emulate_state *state)
    7967             : {
    7968             : #ifndef NDEBUG
    7969             :     ASSERT(state->caller);
    7970             : #endif
    7971             : }
    7972             : 
    7973             : #ifndef NDEBUG
    7974             : void x86_emulate_free_state(struct x86_emulate_state *state)
    7975             : {
    7976             :     check_state(state);
    7977             :     state->caller = NULL;
    7978             : }
    7979             : #endif
    7980             : 
    7981             : unsigned int
    7982             : x86_insn_opsize(const struct x86_emulate_state *state)
    7983             : {
    7984             :     check_state(state);
    7985             : 
    7986             :     return state->op_bytes << 3;
    7987             : }
    7988             : 
    7989             : int
    7990             : x86_insn_modrm(const struct x86_emulate_state *state,
    7991             :                unsigned int *rm, unsigned int *reg)
    7992             : {
    7993             :     check_state(state);
    7994             : 
    7995             :     if ( state->modrm_mod > 3 )
    7996             :         return -EINVAL;
    7997             : 
    7998             :     if ( rm )
    7999             :         *rm = state->modrm_rm;
    8000             :     if ( reg )
    8001             :         *reg = state->modrm_reg;
    8002             : 
    8003             :     return state->modrm_mod;
    8004             : }
    8005             : 
    8006             : unsigned long
    8007             : x86_insn_operand_ea(const struct x86_emulate_state *state,
    8008             :                     enum x86_segment *seg)
    8009             : {
    8010             :     *seg = state->ea.type == OP_MEM ? state->ea.mem.seg : x86_seg_none;
    8011             : 
    8012             :     check_state(state);
    8013             : 
    8014             :     return state->ea.mem.off;
    8015             : }
    8016             : 
    8017             : bool
    8018             : x86_insn_is_mem_access(const struct x86_emulate_state *state,
    8019             :                        const struct x86_emulate_ctxt *ctxt)
    8020             : {
    8021             :     if ( state->ea.type == OP_MEM )
    8022             :         return ctxt->opcode != 0x8d /* LEA */ &&
    8023             :                (ctxt->opcode != X86EMUL_OPC(0x0f, 0x01) ||
    8024             :                 (state->modrm_reg & 7) != 7) /* INVLPG */;
    8025             : 
    8026             :     switch ( ctxt->opcode )
    8027             :     {
    8028             :     case 0x6c ... 0x6f: /* INS / OUTS */
    8029             :     case 0xa4 ... 0xa7: /* MOVS / CMPS */
    8030             :     case 0xaa ... 0xaf: /* STOS / LODS / SCAS */
    8031             :     case 0xd7:          /* XLAT */
    8032             :     CASE_SIMD_PACKED_INT(0x0f, 0xf7):    /* MASKMOV{Q,DQU} */
    8033             :     case X86EMUL_OPC_VEX_66(0x0f, 0xf7): /* VMASKMOVDQU */
    8034             :         return true;
    8035             : 
    8036             :     case X86EMUL_OPC(0x0f, 0x01):
    8037             :         /* Cover CLZERO. */
    8038             :         return (state->modrm_rm & 7) == 4 && (state->modrm_reg & 7) == 7;
    8039             :     }
    8040             : 
    8041             :     return false;
    8042             : }
    8043             : 
    8044             : bool
    8045             : x86_insn_is_mem_write(const struct x86_emulate_state *state,
    8046             :                       const struct x86_emulate_ctxt *ctxt)
    8047             : {
    8048             :     switch ( state->desc & DstMask )
    8049             :     {
    8050             :     case DstMem:
    8051             :         /* The SrcMem check is to cover {,V}MASKMOV{Q,DQU}. */
    8052             :         return state->modrm_mod != 3 || (state->desc & SrcMask) == SrcMem;
    8053             : 
    8054             :     case DstBitBase:
    8055             :     case DstImplicit:
    8056             :         break;
    8057             : 
    8058             :     default:
    8059             :         return false;
    8060             :     }
    8061             : 
    8062             :     if ( state->modrm_mod == 3 )
    8063             :         /* CLZERO is the odd one. */
    8064             :         return ctxt->opcode == X86EMUL_OPC(0x0f, 0x01) &&
    8065             :                (state->modrm_rm & 7) == 4 && (state->modrm_reg & 7) == 7;
    8066             : 
    8067             :     switch ( ctxt->opcode )
    8068             :     {
    8069             :     case 0x6c: case 0x6d:                /* INS */
    8070             :     case 0xa4: case 0xa5:                /* MOVS */
    8071             :     case 0xaa: case 0xab:                /* STOS */
    8072             :     case X86EMUL_OPC(0x0f, 0xab):        /* BTS */
    8073             :     case X86EMUL_OPC(0x0f, 0xb3):        /* BTR */
    8074             :     case X86EMUL_OPC(0x0f, 0xbb):        /* BTC */
    8075             :         return true;
    8076             : 
    8077             :     case 0xd9:
    8078             :         switch ( state->modrm_reg & 7 )
    8079             :         {
    8080             :         case 2: /* FST m32fp */
    8081             :         case 3: /* FSTP m32fp */
    8082             :         case 6: /* FNSTENV */
    8083             :         case 7: /* FNSTCW */
    8084             :             return true;
    8085             :         }
    8086             :         break;
    8087             : 
    8088             :     case 0xdb:
    8089             :         switch ( state->modrm_reg & 7 )
    8090             :         {
    8091             :         case 1: /* FISTTP m32i */
    8092             :         case 2: /* FIST m32i */
    8093             :         case 3: /* FISTP m32i */
    8094             :         case 7: /* FSTP m80fp */
    8095             :             return true;
    8096             :         }
    8097             :         break;
    8098             : 
    8099             :     case 0xdd:
    8100             :         switch ( state->modrm_reg & 7 )
    8101             :         {
    8102             :         case 1: /* FISTTP m64i */
    8103             :         case 2: /* FST m64fp */
    8104             :         case 3: /* FSTP m64fp */
    8105             :         case 6: /* FNSAVE */
    8106             :         case 7: /* FNSTSW */
    8107             :             return true;
    8108             :         }
    8109             :         break;
    8110             : 
    8111             :     case 0xdf:
    8112             :         switch ( state->modrm_reg & 7 )
    8113             :         {
    8114             :         case 1: /* FISTTP m16i */
    8115             :         case 2: /* FIST m16i */
    8116             :         case 3: /* FISTP m16i */
    8117             :         case 6: /* FBSTP */
    8118             :         case 7: /* FISTP m64i */
    8119             :             return true;
    8120             :         }
    8121             :         break;
    8122             : 
    8123             :     case X86EMUL_OPC(0x0f, 0x01):
    8124             :         return !(state->modrm_reg & 6); /* SGDT / SIDT */
    8125             : 
    8126             :     case X86EMUL_OPC(0x0f, 0xba):
    8127             :         return (state->modrm_reg & 7) > 4; /* BTS / BTR / BTC */
    8128             : 
    8129             :     case X86EMUL_OPC(0x0f, 0xc7):
    8130             :         return (state->modrm_reg & 7) == 1; /* CMPXCHG{8,16}B */
    8131             :     }
    8132             : 
    8133             :     return false;
    8134             : }
    8135             : 
    8136             : bool
    8137             : x86_insn_is_portio(const struct x86_emulate_state *state,
    8138             :                    const struct x86_emulate_ctxt *ctxt)
    8139             : {
    8140             :     switch ( ctxt->opcode )
    8141             :     {
    8142             :     case 0x6c ... 0x6f: /* INS / OUTS */
    8143             :     case 0xe4 ... 0xe7: /* IN / OUT imm8 */
    8144             :     case 0xec ... 0xef: /* IN / OUT %dx */
    8145             :         return true;
    8146             :     }
    8147             : 
    8148             :     return false;
    8149             : }
    8150             : 
    8151             : bool
    8152             : x86_insn_is_cr_access(const struct x86_emulate_state *state,
    8153             :                       const struct x86_emulate_ctxt *ctxt)
    8154             : {
    8155             :     switch ( ctxt->opcode )
    8156             :     {
    8157             :         unsigned int ext;
    8158             : 
    8159             :     case X86EMUL_OPC(0x0f, 0x01):
    8160             :         if ( x86_insn_modrm(state, NULL, &ext) >= 0
    8161             :              && (ext & 5) == 4 ) /* SMSW / LMSW */
    8162             :             return true;
    8163             :         break;
    8164             : 
    8165             :     case X86EMUL_OPC(0x0f, 0x06): /* CLTS */
    8166             :     case X86EMUL_OPC(0x0f, 0x20): /* MOV from CRn */
    8167             :     case X86EMUL_OPC(0x0f, 0x22): /* MOV to CRn */
    8168             :         return true;
    8169             :     }
    8170             : 
    8171             :     return false;
    8172             : }
    8173             : 
    8174             : unsigned long
    8175             : x86_insn_immediate(const struct x86_emulate_state *state, unsigned int nr)
    8176             : {
    8177             :     check_state(state);
    8178             : 
    8179             :     switch ( nr )
    8180             :     {
    8181             :     case 0:
    8182             :         return state->imm1;
    8183             :     case 1:
    8184             :         return state->imm2;
    8185             :     }
    8186             : 
    8187             :     return 0;
    8188             : }
    8189             : 
    8190             : unsigned int
    8191             : x86_insn_length(const struct x86_emulate_state *state,
    8192             :                 const struct x86_emulate_ctxt *ctxt)
    8193             : {
    8194             :     check_state(state);
    8195             : 
    8196             :     return state->ip - ctxt->regs->r(ip);
    8197             : }
    8198             : 
    8199             : #endif

Generated by: LCOV version 1.11