ia64/xen-unstable

changeset 16849:a878752a83f9

x86_emulate: Provide callbacks for faster emulation of:
REP MOVS, REP INS, REP OUTS.
Signed-off-by: Keir Fraser <keir.fraser@citrix.com>
author Keir Fraser <keir.fraser@citrix.com>
date Tue Jan 22 17:18:02 2008 +0000 (2008-01-22)
parents 0ededc85e6b4
children dbb5a7983775
files xen/arch/x86/x86_emulate.c xen/include/asm-x86/x86_emulate.h
line diff
     1.1 --- a/xen/arch/x86/x86_emulate.c	Tue Jan 22 14:35:17 2008 +0000
     1.2 +++ b/xen/arch/x86/x86_emulate.c	Tue Jan 22 17:18:02 2008 +0000
     1.3 @@ -552,7 +552,7 @@ do {                                    
     1.4                       ? (uint16_t)_regs.eip : (uint32_t)_regs.eip);      \
     1.5  } while (0)
     1.6  
     1.7 -static int __handle_rep_prefix(
     1.8 +static unsigned long __get_rep_prefix(
     1.9      struct cpu_user_regs *int_regs,
    1.10      struct cpu_user_regs *ext_regs,
    1.11      int ad_bytes)
    1.12 @@ -561,11 +561,36 @@ static int __handle_rep_prefix(
    1.13                           (ad_bytes == 4) ? (uint32_t)int_regs->ecx :
    1.14                           int_regs->ecx);
    1.15  
    1.16 -    if ( ecx-- == 0 )
    1.17 -    {
    1.18 +    /* Skip the instruction if no repetitions are required. */
    1.19 +    if ( ecx == 0 )
    1.20          ext_regs->eip = int_regs->eip;
    1.21 -        return 1;
    1.22 -    }
    1.23 +
    1.24 +    return ecx;
    1.25 +}
    1.26 +
    1.27 +#define get_rep_prefix() ({                                             \
    1.28 +    unsigned long max_reps = 1;                                         \
    1.29 +    if ( rep_prefix )                                                   \
    1.30 +        max_reps = __get_rep_prefix(&_regs, ctxt->regs, ad_bytes);      \
    1.31 +    if ( max_reps == 0 )                                                \
    1.32 +        goto done;                                                      \
    1.33 +   max_reps;                                                            \
    1.34 +})
    1.35 +
    1.36 +static void __put_rep_prefix(
    1.37 +    struct cpu_user_regs *int_regs,
    1.38 +    struct cpu_user_regs *ext_regs,
    1.39 +    int ad_bytes,
    1.40 +    unsigned long reps_completed)
    1.41 +{
    1.42 +    unsigned long ecx = ((ad_bytes == 2) ? (uint16_t)int_regs->ecx :
    1.43 +                         (ad_bytes == 4) ? (uint32_t)int_regs->ecx :
    1.44 +                         int_regs->ecx);
    1.45 +
    1.46 +    /* Reduce counter appropriately, and repeat instruction if non-zero. */
    1.47 +    ecx -= reps_completed;
    1.48 +    if ( ecx != 0 )
    1.49 +        int_regs->eip = ext_regs->eip;
    1.50  
    1.51      if ( ad_bytes == 2 )
    1.52          *(uint16_t *)&int_regs->ecx = ecx;
    1.53 @@ -573,15 +598,12 @@ static int __handle_rep_prefix(
    1.54          int_regs->ecx = (uint32_t)ecx;
    1.55      else
    1.56          int_regs->ecx = ecx;
    1.57 -    int_regs->eip = ext_regs->eip;
    1.58 -    return 0;
    1.59  }
    1.60  
    1.61 -#define handle_rep_prefix()                                                \
    1.62 -do {                                                                       \
    1.63 -    if ( rep_prefix && __handle_rep_prefix(&_regs, ctxt->regs, ad_bytes) ) \
    1.64 -        goto done;                                                         \
    1.65 -} while (0)
    1.66 +#define put_rep_prefix(reps_completed) ({                               \
    1.67 +    if ( rep_prefix )                                                   \
    1.68 +        __put_rep_prefix(&_regs, ctxt->regs, ad_bytes, reps_completed); \
    1.69 +})
    1.70  
    1.71  /*
    1.72   * Unsigned multiplication with double-word result.
    1.73 @@ -2051,35 +2073,63 @@ x86_emulate(
    1.74          dst.mem.off = sp_pre_dec(dst.bytes);
    1.75          break;
    1.76  
    1.77 -    case 0x6c ... 0x6d: /* ins %dx,%es:%edi */
    1.78 -        handle_rep_prefix();
    1.79 +    case 0x6c ... 0x6d: /* ins %dx,%es:%edi */ {
    1.80 +        unsigned long nr_reps = get_rep_prefix();
    1.81          generate_exception_if(!mode_iopl(), EXC_GP);
    1.82 -        dst.type  = OP_MEM;
    1.83          dst.bytes = !(b & 1) ? 1 : (op_bytes == 8) ? 4 : op_bytes;
    1.84          dst.mem.seg = x86_seg_es;
    1.85          dst.mem.off = truncate_ea(_regs.edi);
    1.86 -        fail_if(ops->read_io == NULL);
    1.87 -        if ( (rc = ops->read_io((uint16_t)_regs.edx, dst.bytes,
    1.88 -                                &dst.val, ctxt)) != 0 )
    1.89 -            goto done;
    1.90 +        if ( (nr_reps > 1) && (ops->rep_ins != NULL) )
    1.91 +        {
    1.92 +            if ( (rc = ops->rep_ins((uint16_t)_regs.edx, dst.mem.seg,
    1.93 +                                    dst.mem.off, dst.bytes,
    1.94 +                                    &nr_reps, ctxt)) != 0 )
    1.95 +                goto done;
    1.96 +        }
    1.97 +        else
    1.98 +        {
    1.99 +            fail_if(ops->read_io == NULL);
   1.100 +            if ( (rc = ops->read_io((uint16_t)_regs.edx, dst.bytes,
   1.101 +                                    &dst.val, ctxt)) != 0 )
   1.102 +                goto done;
   1.103 +            dst.type = OP_MEM;
   1.104 +            nr_reps = 1;
   1.105 +        }
   1.106          register_address_increment(
   1.107 -            _regs.edi, (_regs.eflags & EFLG_DF) ? -dst.bytes : dst.bytes);
   1.108 +            _regs.edi,
   1.109 +            nr_reps * ((_regs.eflags & EFLG_DF) ? -dst.bytes : dst.bytes));
   1.110 +        put_rep_prefix(nr_reps);
   1.111          break;
   1.112 +    }
   1.113  
   1.114 -    case 0x6e ... 0x6f: /* outs %esi,%dx */
   1.115 -        handle_rep_prefix();
   1.116 +    case 0x6e ... 0x6f: /* outs %esi,%dx */ {
   1.117 +        unsigned long nr_reps = get_rep_prefix();
   1.118          generate_exception_if(!mode_iopl(), EXC_GP);
   1.119          dst.bytes = !(b & 1) ? 1 : (op_bytes == 8) ? 4 : op_bytes;
   1.120 -        if ( (rc = ops->read(ea.mem.seg, truncate_ea(_regs.esi),
   1.121 -                             &dst.val, dst.bytes, ctxt)) != 0 )
   1.122 -            goto done;
   1.123 -        fail_if(ops->write_io == NULL);
   1.124 -        if ( (rc = ops->write_io((uint16_t)_regs.edx, dst.bytes,
   1.125 -                                 dst.val, ctxt)) != 0 )
   1.126 -            goto done;
   1.127 +        if ( (nr_reps > 1) && (ops->rep_outs != NULL) )
   1.128 +        {
   1.129 +            if ( (rc = ops->rep_outs(ea.mem.seg, truncate_ea(_regs.esi),
   1.130 +                                     (uint16_t)_regs.edx, dst.bytes,
   1.131 +                                     &nr_reps, ctxt)) != 0 )
   1.132 +                goto done;
   1.133 +        }
   1.134 +        else
   1.135 +        {
   1.136 +            if ( (rc = ops->read(ea.mem.seg, truncate_ea(_regs.esi),
   1.137 +                                 &dst.val, dst.bytes, ctxt)) != 0 )
   1.138 +                goto done;
   1.139 +            fail_if(ops->write_io == NULL);
   1.140 +            if ( (rc = ops->write_io((uint16_t)_regs.edx, dst.bytes,
   1.141 +                                     dst.val, ctxt)) != 0 )
   1.142 +                goto done;
   1.143 +            nr_reps = 1;
   1.144 +        }
   1.145          register_address_increment(
   1.146 -            _regs.esi, (_regs.eflags & EFLG_DF) ? -dst.bytes : dst.bytes);
   1.147 +            _regs.esi,
   1.148 +            nr_reps * ((_regs.eflags & EFLG_DF) ? -dst.bytes : dst.bytes));
   1.149 +        put_rep_prefix(nr_reps);
   1.150          break;
   1.151 +    }
   1.152  
   1.153      case 0x70 ... 0x7f: /* jcc (short) */ {
   1.154          int rel = insn_fetch_type(int8_t);
   1.155 @@ -2202,24 +2252,39 @@ x86_emulate(
   1.156          dst.val   = (unsigned long)_regs.eax;
   1.157          break;
   1.158  
   1.159 -    case 0xa4 ... 0xa5: /* movs */
   1.160 -        handle_rep_prefix();
   1.161 -        dst.type  = OP_MEM;
   1.162 +    case 0xa4 ... 0xa5: /* movs */ {
   1.163 +        unsigned long nr_reps = get_rep_prefix();
   1.164          dst.bytes = (d & ByteOp) ? 1 : op_bytes;
   1.165          dst.mem.seg = x86_seg_es;
   1.166          dst.mem.off = truncate_ea(_regs.edi);
   1.167 -        if ( (rc = ops->read(ea.mem.seg, truncate_ea(_regs.esi),
   1.168 -                             &dst.val, dst.bytes, ctxt)) != 0 )
   1.169 -            goto done;
   1.170 +        if ( (nr_reps > 1) && (ops->rep_movs != NULL) )
   1.171 +        {
   1.172 +            if ( (rc = ops->rep_movs(ea.mem.seg, truncate_ea(_regs.esi),
   1.173 +                                     dst.mem.seg, dst.mem.off, dst.bytes,
   1.174 +                                     &nr_reps, ctxt)) != 0 )
   1.175 +                goto done;
   1.176 +        }
   1.177 +        else
   1.178 +        {
   1.179 +            if ( (rc = ops->read(ea.mem.seg, truncate_ea(_regs.esi),
   1.180 +                                 &dst.val, dst.bytes, ctxt)) != 0 )
   1.181 +                goto done;
   1.182 +            dst.type = OP_MEM;
   1.183 +            nr_reps = 1;
   1.184 +        }
   1.185          register_address_increment(
   1.186 -            _regs.esi, (_regs.eflags & EFLG_DF) ? -dst.bytes : dst.bytes);
   1.187 +            _regs.esi,
   1.188 +            nr_reps * ((_regs.eflags & EFLG_DF) ? -dst.bytes : dst.bytes));
   1.189          register_address_increment(
   1.190 -            _regs.edi, (_regs.eflags & EFLG_DF) ? -dst.bytes : dst.bytes);
   1.191 +            _regs.edi,
   1.192 +            nr_reps * ((_regs.eflags & EFLG_DF) ? -dst.bytes : dst.bytes));
   1.193 +        put_rep_prefix(nr_reps);
   1.194          break;
   1.195 +    }
   1.196  
   1.197      case 0xa6 ... 0xa7: /* cmps */ {
   1.198          unsigned long next_eip = _regs.eip;
   1.199 -        handle_rep_prefix();
   1.200 +        get_rep_prefix();
   1.201          src.bytes = dst.bytes = (d & ByteOp) ? 1 : op_bytes;
   1.202          if ( (rc = ops->read(ea.mem.seg, truncate_ea(_regs.esi),
   1.203                               &dst.val, dst.bytes, ctxt)) ||
   1.204 @@ -2230,6 +2295,7 @@ x86_emulate(
   1.205              _regs.esi, (_regs.eflags & EFLG_DF) ? -dst.bytes : dst.bytes);
   1.206          register_address_increment(
   1.207              _regs.edi, (_regs.eflags & EFLG_DF) ? -src.bytes : src.bytes);
   1.208 +        put_rep_prefix(1);
   1.209          /* cmp: dst - src ==> src=*%%edi,dst=*%%esi ==> *%%esi - *%%edi */
   1.210          emulate_2op_SrcV("cmp", src, dst, _regs.eflags);
   1.211          if ( ((rep_prefix == REPE_PREFIX) && !(_regs.eflags & EFLG_ZF)) ||
   1.212 @@ -2238,8 +2304,8 @@ x86_emulate(
   1.213          break;
   1.214      }
   1.215  
   1.216 -    case 0xaa ... 0xab: /* stos */
   1.217 -        handle_rep_prefix();
   1.218 +    case 0xaa ... 0xab: /* stos */ {
   1.219 +        /* unsigned long max_reps = */get_rep_prefix();
   1.220          dst.type  = OP_MEM;
   1.221          dst.bytes = (d & ByteOp) ? 1 : op_bytes;
   1.222          dst.mem.seg = x86_seg_es;
   1.223 @@ -2247,10 +2313,12 @@ x86_emulate(
   1.224          dst.val   = _regs.eax;
   1.225          register_address_increment(
   1.226              _regs.edi, (_regs.eflags & EFLG_DF) ? -dst.bytes : dst.bytes);
   1.227 +        put_rep_prefix(1);
   1.228          break;
   1.229 +    }
   1.230  
   1.231 -    case 0xac ... 0xad: /* lods */
   1.232 -        handle_rep_prefix();
   1.233 +    case 0xac ... 0xad: /* lods */ {
   1.234 +        /* unsigned long max_reps = */get_rep_prefix();
   1.235          dst.type  = OP_REG;
   1.236          dst.bytes = (d & ByteOp) ? 1 : op_bytes;
   1.237          dst.reg   = (unsigned long *)&_regs.eax;
   1.238 @@ -2259,11 +2327,13 @@ x86_emulate(
   1.239              goto done;
   1.240          register_address_increment(
   1.241              _regs.esi, (_regs.eflags & EFLG_DF) ? -dst.bytes : dst.bytes);
   1.242 +        put_rep_prefix(1);
   1.243          break;
   1.244 +    }
   1.245  
   1.246      case 0xae ... 0xaf: /* scas */ {
   1.247          unsigned long next_eip = _regs.eip;
   1.248 -        handle_rep_prefix();
   1.249 +        get_rep_prefix();
   1.250          src.bytes = dst.bytes = (d & ByteOp) ? 1 : op_bytes;
   1.251          dst.val = _regs.eax;
   1.252          if ( (rc = ops->read(x86_seg_es, truncate_ea(_regs.edi),
   1.253 @@ -2271,6 +2341,7 @@ x86_emulate(
   1.254              goto done;
   1.255          register_address_increment(
   1.256              _regs.edi, (_regs.eflags & EFLG_DF) ? -src.bytes : src.bytes);
   1.257 +        put_rep_prefix(1);
   1.258          /* cmp: dst - src ==> src=*%%edi,dst=%%eax ==> %%eax - *%%edi */
   1.259          emulate_2op_SrcV("cmp", src, dst, _regs.eflags);
   1.260          if ( ((rep_prefix == REPE_PREFIX) && !(_regs.eflags & EFLG_ZF)) ||
     2.1 --- a/xen/include/asm-x86/x86_emulate.h	Tue Jan 22 14:35:17 2008 +0000
     2.2 +++ b/xen/include/asm-x86/x86_emulate.h	Tue Jan 22 17:18:02 2008 +0000
     2.3 @@ -175,6 +175,49 @@ struct x86_emulate_ops
     2.4          struct x86_emulate_ctxt *ctxt);
     2.5  
     2.6      /*
     2.7 +     * rep_ins: Emulate INS: <src_port> -> <dst_seg:dst_offset>.
     2.8 +     *  @bytes_per_rep: [IN ] Bytes transferred per repetition.
     2.9 +     *  @reps:  [IN ] Maximum repetitions to be emulated.
    2.10 +     *          [OUT] Number of repetitions actually emulated.
    2.11 +     */
    2.12 +    int (*rep_ins)(
    2.13 +        uint16_t src_port,
    2.14 +        enum x86_segment dst_seg,
    2.15 +        unsigned long dst_offset,
    2.16 +        unsigned int bytes_per_rep,
    2.17 +        unsigned long *reps,
    2.18 +        struct x86_emulate_ctxt *ctxt);
    2.19 +
    2.20 +    /*
    2.21 +     * rep_outs: Emulate OUTS: <src_seg:src_offset> -> <dst_port>.
    2.22 +     *  @bytes_per_rep: [IN ] Bytes transferred per repetition.
    2.23 +     *  @reps:  [IN ] Maximum repetitions to be emulated.
    2.24 +     *          [OUT] Number of repetitions actually emulated.
    2.25 +     */
    2.26 +    int (*rep_outs)(
    2.27 +        enum x86_segment src_seg,
    2.28 +        unsigned long src_offset,
    2.29 +        uint16_t dst_port,
    2.30 +        unsigned int bytes_per_rep,
    2.31 +        unsigned long *reps,
    2.32 +        struct x86_emulate_ctxt *ctxt);
    2.33 +
    2.34 +    /*
    2.35 +     * rep_movs: Emulate MOVS: <src_seg:src_offset> -> <dst_seg:dst_offset>.
    2.36 +     *  @bytes_per_rep: [IN ] Bytes transferred per repetition.
    2.37 +     *  @reps:  [IN ] Maximum repetitions to be emulated.
    2.38 +     *          [OUT] Number of repetitions actually emulated.
    2.39 +     */
    2.40 +    int (*rep_movs)(
    2.41 +        enum x86_segment src_seg,
    2.42 +        unsigned long src_offset,
    2.43 +        enum x86_segment dst_seg,
    2.44 +        unsigned long dst_offset,
    2.45 +        unsigned int bytes_per_rep,
    2.46 +        unsigned long *reps,
    2.47 +        struct x86_emulate_ctxt *ctxt);
    2.48 +
    2.49 +    /*
    2.50       * read_segment: Emulate a read of full context of a segment register.
    2.51       *  @reg:   [OUT] Contents of segment register (visible and hidden state).
    2.52       */