ia64/xen-unstable

changeset 16254:3fe75ef9ca93

x86/64: paravirt 32-on-64 call gate support

As we realized while trying out NetWare's ring 3 support, call gates
didn't work for 32-bit guests on 64-bit hypervisor. Since x86-64
doesn't know 16- or 32-bit call gates, the only option was to emulate
them. The code here was developed against 3.0.4, so hasn't been
checked for potential integration possibilities with the much improved
emulator; nevertheless I want to supply this patch.

As was realized in the course of creating this patch, 64-bit gates
don't work either, and will also need to be emulated if any
environment intends to use them. The patch changes behavior here in
that rather silently permitting the use of 64-bit gates (with possibly
difficult to understand exceptions happening on the first instruction
of the call/jump target) the call/jump itself will now fault, with the
error code indicating the gate that was attempted to be used. I intend
to complete the emulation to also cover 64-bit gates, but there is one
issue that first needs to be addressed: Whether a gate transitions
from user to kernel mode doesn't depend on the gate, but rather on the
descriptor referenced by the selector held in the gate. As the two can
change independently, this decision can be made only at the point of
use of the gate, and consequently descriptors for kernel code segments
must become distinguishable from user ones, which they currently
aren't as they both get their DPL forced to 3. An initial thought here
is to possibly leverage the otherwise meaningless conforming bit
(i.e. forcing it on for all user code segments, and off for kernel
ones, where then the distinction can be made at the point the
descriptor gets verified/fixed up based of the kernel supplied DPL
[wouldn't work for old guests when setting the DPL to 3 was still
required to be done by the guest]).

The patch also changes behavior of check_descriptor() in that no
modification is done to the descriptor anymore unless all verification
steps passed, and in that the selector RPL of selectors in call gates
no longer gets fixed up (a comment elsewhere in the code correctly
states that the RPL field here isn't used for anything by the
processor); really, this field is now used on 64-bits to store the
original DPL of the gate, because the architectural one now gets
forced to zero.

Signed-off-by: Jan Beulich <jbeulich@novell.com>
author Keir Fraser <keir@xensource.com>
date Fri Oct 26 11:40:10 2007 +0100 (2007-10-26)
parents 1438f3255384
children 62426339d644
files xen/arch/x86/traps.c xen/arch/x86/x86_64/mm.c
line diff
     1.1 --- a/xen/arch/x86/traps.c	Fri Oct 26 10:57:03 2007 +0100
     1.2 +++ b/xen/arch/x86/traps.c	Fri Oct 26 11:40:10 2007 +0100
     1.3 @@ -1154,6 +1154,63 @@ static int read_descriptor(unsigned int 
     1.4      return 1;
     1.5  }
     1.6  
     1.7 +#ifdef __x86_64__
     1.8 +static int read_gate_descriptor(unsigned int gate_sel,
     1.9 +                                const struct vcpu *v,
    1.10 +                                unsigned int *sel,
    1.11 +                                unsigned long *off,
    1.12 +                                unsigned int *ar)
    1.13 +{
    1.14 +    struct desc_struct desc;
    1.15 +    const struct desc_struct *pdesc;
    1.16 +
    1.17 +
    1.18 +    pdesc = (const struct desc_struct *)(!(gate_sel & 4) ?
    1.19 +                                         GDT_VIRT_START(v) :
    1.20 +                                         LDT_VIRT_START(v))
    1.21 +            + (gate_sel >> 3);
    1.22 +    if ( gate_sel < 4 ||
    1.23 +         (gate_sel >= FIRST_RESERVED_GDT_BYTE && !(gate_sel & 4)) ||
    1.24 +         __get_user(desc, pdesc) )
    1.25 +        return 0;
    1.26 +
    1.27 +    *sel = (desc.a >> 16) & 0x0000fffc;
    1.28 +    *off = (desc.a & 0x0000ffff) | (desc.b & 0xffff0000);
    1.29 +    *ar = desc.b & 0x0000ffff;
    1.30 +    /*
    1.31 +     * check_descriptor() clears the DPL field and stores the
    1.32 +     * guest requested DPL in the selector's RPL field.
    1.33 +     */
    1.34 +    ASSERT(!(*ar & _SEGMENT_DPL));
    1.35 +    *ar |= (desc.a >> (16 - 13)) & _SEGMENT_DPL;
    1.36 +
    1.37 +    if ( !is_pv_32bit_vcpu(v) )
    1.38 +    {
    1.39 +        if ( (*ar & 0x1f00) != 0x0c00 ||
    1.40 +             (gate_sel >= FIRST_RESERVED_GDT_BYTE - 8 && !(gate_sel & 4)) ||
    1.41 +             __get_user(desc, pdesc + 1) ||
    1.42 +             (desc.b & 0x1f00) )
    1.43 +            return 0;
    1.44 +
    1.45 +        *off |= (unsigned long)desc.a << 32;
    1.46 +        return 1;
    1.47 +    }
    1.48 +
    1.49 +    switch ( *ar & 0x1f00 )
    1.50 +    {
    1.51 +    case 0x0400:
    1.52 +        *off &= 0xffff;
    1.53 +        break;
    1.54 +    case 0x0c00:
    1.55 +        break;
    1.56 +    default:
    1.57 +        return 0;
    1.58 +    }
    1.59 +
    1.60 +    return 1;
    1.61 +}
    1.62 +#endif
    1.63 +
    1.64  /* Has the guest requested sufficient permission for this I/O access? */
    1.65  static inline int guest_io_okay(
    1.66      unsigned int port, unsigned int bytes,
    1.67 @@ -1223,6 +1280,8 @@ void (*pv_post_outb_hook)(unsigned int p
    1.68  #define insn_fetch(type, base, eip, limit)                                  \
    1.69  ({  unsigned long _rc, _ptr = (base) + (eip);                               \
    1.70      type _x;                                                                \
    1.71 +    if ( ad_default < 8 )                                                   \
    1.72 +        _ptr = (unsigned int)_ptr;                                          \
    1.73      if ( (limit) < sizeof(_x) - 1 || (eip) > (limit) - (sizeof(_x) - 1) )   \
    1.74          goto fail;                                                          \
    1.75      if ( (_rc = copy_from_user(&_x, (type *)_ptr, sizeof(_x))) != 0 )       \
    1.76 @@ -1862,6 +1921,336 @@ static int emulate_privileged_op(struct 
    1.77      return 0;
    1.78  }
    1.79  
    1.80 +static inline int check_stack_limit(unsigned int ar, unsigned int limit,
    1.81 +                                    unsigned int esp, unsigned int decr)
    1.82 +{
    1.83 +    return (((esp - decr) < (esp - 1)) &&
    1.84 +            (!(ar & _SEGMENT_EC) ? (esp - 1) <= limit : (esp - decr) > limit));
    1.85 +}
    1.86 +
    1.87 +static int emulate_gate_op(struct cpu_user_regs *regs)
    1.88 +{
    1.89 +#ifdef __x86_64__
    1.90 +    struct vcpu *v = current;
    1.91 +    unsigned int sel, ar, dpl, nparm, opnd_sel;
    1.92 +    unsigned int op_default, op_bytes, ad_default, ad_bytes;
    1.93 +    unsigned long off, eip, opnd_off, base, limit;
    1.94 +    int jump;
    1.95 +
    1.96 +    /* Check whether this fault is due to the use of a call gate. */
    1.97 +    if ( !read_gate_descriptor(regs->error_code, v, &sel, &off, &ar) ||
    1.98 +         ((ar >> 13) & 3) < (regs->cs & 3) ||
    1.99 +         (ar & _SEGMENT_TYPE) != 0xc00 )
   1.100 +        return do_guest_trap(TRAP_gp_fault, regs, 1);
   1.101 +    if ( !(ar & _SEGMENT_P) )
   1.102 +        return do_guest_trap(TRAP_no_segment, regs, 1);
   1.103 +    dpl = (ar >> 13) & 3;
   1.104 +    nparm = ar & 0x1f;
   1.105 +
   1.106 +    /*
   1.107 +     * Decode instruction (and perhaps operand) to determine RPL,
   1.108 +     * whether this is a jump or a call, and the call return offset.
   1.109 +     */
   1.110 +    if ( !read_descriptor(regs->cs, v, regs, &base, &limit, &ar, 0) ||
   1.111 +         !(ar & _SEGMENT_S) ||
   1.112 +         !(ar & _SEGMENT_P) ||
   1.113 +         !(ar & _SEGMENT_CODE) )
   1.114 +        return do_guest_trap(TRAP_gp_fault, regs, 1);
   1.115 +
   1.116 +    op_bytes = op_default = ar & _SEGMENT_DB ? 4 : 2;
   1.117 +    ad_default = ad_bytes = op_default;
   1.118 +    opnd_sel = opnd_off = 0;
   1.119 +    jump = -1;
   1.120 +    for ( eip = regs->eip; eip - regs->_eip < 10; )
   1.121 +    {
   1.122 +        switch ( insn_fetch(u8, base, eip, limit) )
   1.123 +        {
   1.124 +        case 0x66: /* operand-size override */
   1.125 +            op_bytes = op_default ^ 6; /* switch between 2/4 bytes */
   1.126 +            continue;
   1.127 +        case 0x67: /* address-size override */
   1.128 +            ad_bytes = ad_default != 4 ? 4 : 2; /* switch to 2/4 bytes */
   1.129 +            continue;
   1.130 +        case 0x2e: /* CS override */
   1.131 +            opnd_sel = regs->cs;
   1.132 +            ASSERT(opnd_sel);
   1.133 +            continue;
   1.134 +        case 0x3e: /* DS override */
   1.135 +            opnd_sel = read_sreg(regs, ds);
   1.136 +            if ( !opnd_sel )
   1.137 +                opnd_sel = dpl;
   1.138 +            continue;
   1.139 +        case 0x26: /* ES override */
   1.140 +            opnd_sel = read_sreg(regs, es);
   1.141 +            if ( !opnd_sel )
   1.142 +                opnd_sel = dpl;
   1.143 +            continue;
   1.144 +        case 0x64: /* FS override */
   1.145 +            opnd_sel = read_sreg(regs, fs);
   1.146 +            if ( !opnd_sel )
   1.147 +                opnd_sel = dpl;
   1.148 +            continue;
   1.149 +        case 0x65: /* GS override */
   1.150 +            opnd_sel = read_sreg(regs, gs);
   1.151 +            if ( !opnd_sel )
   1.152 +                opnd_sel = dpl;
   1.153 +            continue;
   1.154 +        case 0x36: /* SS override */
   1.155 +            opnd_sel = regs->ss;
   1.156 +            if ( !opnd_sel )
   1.157 +                opnd_sel = dpl;
   1.158 +            continue;
   1.159 +        case 0xea:
   1.160 +            ++jump;
   1.161 +            /* FALLTHROUGH */
   1.162 +        case 0x9a:
   1.163 +            ++jump;
   1.164 +            opnd_sel = regs->cs;
   1.165 +            opnd_off = eip;
   1.166 +            ad_bytes = ad_default;
   1.167 +            eip += op_bytes + 2;
   1.168 +            break;
   1.169 +        case 0xff:
   1.170 +            {
   1.171 +                unsigned int modrm;
   1.172 +
   1.173 +                switch ( (modrm = insn_fetch(u8, base, eip, limit)) & 0xf8 )
   1.174 +                {
   1.175 +                case 0x28: case 0x68: case 0xa8:
   1.176 +                    ++jump;
   1.177 +                    /* FALLTHROUGH */
   1.178 +                case 0x18: case 0x58: case 0x98:
   1.179 +                    ++jump;
   1.180 +                    if ( ad_bytes != 2 )
   1.181 +                    {
   1.182 +                        if ( (modrm & 7) == 4 )
   1.183 +                        {
   1.184 +                            unsigned int sib = insn_fetch(u8, base, eip, limit);
   1.185 +
   1.186 +                            modrm = (modrm & ~7) | (sib & 7);
   1.187 +                            if ( (sib >>= 3) != 4 )
   1.188 +                                opnd_off = *(unsigned long *)decode_register(sib & 7, regs, 0);
   1.189 +                            opnd_off <<= sib >> 3;
   1.190 +                        }
   1.191 +                        if ( (modrm & 7) != 5 || (modrm & 0xc0) )
   1.192 +                            opnd_off += *(unsigned long *)decode_register(modrm & 7, regs, 0);
   1.193 +                        else
   1.194 +                            modrm |= 0x87;
   1.195 +                        if ( !opnd_sel )
   1.196 +                        {
   1.197 +                            switch ( modrm & 7 )
   1.198 +                            {
   1.199 +                            default:
   1.200 +                                opnd_sel = read_sreg(regs, ds);
   1.201 +                                break;
   1.202 +                            case 4: case 5:
   1.203 +                                opnd_sel = regs->ss;
   1.204 +                                break;
   1.205 +                            }
   1.206 +                        }
   1.207 +                    }
   1.208 +                    else
   1.209 +                    {
   1.210 +                        switch ( modrm & 7 )
   1.211 +                        {
   1.212 +                        case 0: case 1: case 7:
   1.213 +                            opnd_off = regs->ebx;
   1.214 +                            break;
   1.215 +                        case 6:
   1.216 +                            if ( !(modrm & 0xc0) )
   1.217 +                                modrm |= 0x80;
   1.218 +                            else
   1.219 +                        case 2: case 3:
   1.220 +                            {
   1.221 +                                opnd_off = regs->ebp;
   1.222 +                                if ( !opnd_sel )
   1.223 +                                    opnd_sel = regs->ss;
   1.224 +                            }
   1.225 +                            break;
   1.226 +                        }
   1.227 +                        if ( !opnd_sel )
   1.228 +                            opnd_sel = read_sreg(regs, ds);
   1.229 +                        switch ( modrm & 7 )
   1.230 +                        {
   1.231 +                        case 0: case 2: case 4:
   1.232 +                            opnd_off += regs->esi;
   1.233 +                            break;
   1.234 +                        case 1: case 3: case 5:
   1.235 +                            opnd_off += regs->edi;
   1.236 +                            break;
   1.237 +                        }
   1.238 +                    }
   1.239 +                    switch ( modrm & 0xc0 )
   1.240 +                    {
   1.241 +                    case 0x40:
   1.242 +                        opnd_off += insn_fetch(s8, base, eip, limit);
   1.243 +                        break;
   1.244 +                    case 0x80:
   1.245 +                        opnd_off += insn_fetch(s32, base, eip, limit);
   1.246 +                        break;
   1.247 +                    }
   1.248 +                    if ( ad_bytes == 4 )
   1.249 +                        opnd_off = (unsigned int)opnd_off;
   1.250 +                    else if ( ad_bytes == 2 )
   1.251 +                        opnd_off = (unsigned short)opnd_off;
   1.252 +                    break;
   1.253 +                }
   1.254 +            }
   1.255 +            break;
   1.256 +        }
   1.257 +        break;
   1.258 +    }
   1.259 +
   1.260 +    if ( jump < 0 )
   1.261 +    {
   1.262 + fail:
   1.263 +        return do_guest_trap(TRAP_gp_fault, regs, 1);
   1.264 +    }
   1.265 +
   1.266 +    if ( (opnd_sel != regs->cs &&
   1.267 +          !read_descriptor(opnd_sel, v, regs, &base, &limit, &ar, 0)) ||
   1.268 +         !(ar & _SEGMENT_S) ||
   1.269 +         !(ar & _SEGMENT_P) ||
   1.270 +         ((ar & _SEGMENT_CODE) && !(ar & _SEGMENT_WR)) )
   1.271 +        return do_guest_trap(TRAP_gp_fault, regs, 1);
   1.272 +
   1.273 +    opnd_off += op_bytes;
   1.274 +#define ad_default ad_bytes
   1.275 +    opnd_sel = insn_fetch(u16, base, opnd_off, limit);
   1.276 +#undef ad_default
   1.277 +    ASSERT((opnd_sel & ~3) == regs->error_code);
   1.278 +    if ( dpl < (opnd_sel & 3) )
   1.279 +        return do_guest_trap(TRAP_gp_fault, regs, 1);
   1.280 +
   1.281 +    if ( !read_descriptor(sel, v, regs, &base, &limit, &ar, 0) ||
   1.282 +         !(ar & _SEGMENT_S) ||
   1.283 +         !(ar & _SEGMENT_CODE) ||
   1.284 +         (!jump || (ar & _SEGMENT_EC) ?
   1.285 +          ((ar >> 13) & 3) > (regs->cs & 3) :
   1.286 +          ((ar >> 13) & 3) != (regs->cs & 3)) )
   1.287 +    {
   1.288 +        regs->error_code = sel;
   1.289 +        return do_guest_trap(TRAP_gp_fault, regs, 1);
   1.290 +    }
   1.291 +    if ( !(ar & _SEGMENT_P) )
   1.292 +    {
   1.293 +        regs->error_code = sel;
   1.294 +        return do_guest_trap(TRAP_no_segment, regs, 1);
   1.295 +    }
   1.296 +    if ( off > limit )
   1.297 +    {
   1.298 +        regs->error_code = 0;
   1.299 +        return do_guest_trap(TRAP_gp_fault, regs, 1);
   1.300 +    }
   1.301 +
   1.302 +    if ( !jump )
   1.303 +    {
   1.304 +        unsigned int ss, esp, *stkp;
   1.305 +        int rc;
   1.306 +#define push(item) do \
   1.307 +        { \
   1.308 +            --stkp; \
   1.309 +            esp -= 4; \
   1.310 +            rc = __put_user(item, stkp); \
   1.311 +            if ( rc ) \
   1.312 +            { \
   1.313 +                propagate_page_fault((unsigned long)(stkp + 1) - rc, \
   1.314 +                                     PFEC_write_access); \
   1.315 +                return 0; \
   1.316 +            } \
   1.317 +        } while ( 0 )
   1.318 +
   1.319 +        if ( ((ar >> 13) & 3) < (regs->cs & 3) )
   1.320 +        {
   1.321 +            sel |= (ar >> 13) & 3;
   1.322 +            /* Inner stack known only for kernel ring. */
   1.323 +            if ( (sel & 3) != GUEST_KERNEL_RPL(v->domain) )
   1.324 +                return do_guest_trap(TRAP_gp_fault, regs, 1);
   1.325 +            esp = v->arch.guest_context.kernel_sp;
   1.326 +            ss = v->arch.guest_context.kernel_ss;
   1.327 +            if ( (ss & 3) != (sel & 3) ||
   1.328 +                 !read_descriptor(ss, v, regs, &base, &limit, &ar, 0) ||
   1.329 +                 ((ar >> 13) & 3) != (sel & 3) ||
   1.330 +                 !(ar & _SEGMENT_S) ||
   1.331 +                 (ar & _SEGMENT_CODE) ||
   1.332 +                 !(ar & _SEGMENT_WR) )
   1.333 +            {
   1.334 +                regs->error_code = ss & ~3;
   1.335 +                return do_guest_trap(TRAP_invalid_tss, regs, 1);
   1.336 +            }
   1.337 +            if ( !(ar & _SEGMENT_P) ||
   1.338 +                 !check_stack_limit(ar, limit, esp, (4 + nparm) * 4) )
   1.339 +            {
   1.340 +                regs->error_code = ss & ~3;
   1.341 +                return do_guest_trap(TRAP_stack_error, regs, 1);
   1.342 +            }
   1.343 +            stkp = (unsigned int *)(unsigned long)((unsigned int)base + esp);
   1.344 +            if ( !compat_access_ok(stkp - 4 - nparm, (4 + nparm) * 4) )
   1.345 +                return do_guest_trap(TRAP_gp_fault, regs, 1);
   1.346 +            push(regs->ss);
   1.347 +            push(regs->esp);
   1.348 +            if ( nparm )
   1.349 +            {
   1.350 +                const unsigned int *ustkp;
   1.351 +
   1.352 +                if ( !read_descriptor(regs->ss, v, regs, &base, &limit, &ar, 0) ||
   1.353 +                     ((ar >> 13) & 3) != (regs->cs & 3) ||
   1.354 +                     !(ar & _SEGMENT_S) ||
   1.355 +                     (ar & _SEGMENT_CODE) ||
   1.356 +                     !(ar & _SEGMENT_WR) ||
   1.357 +                     !check_stack_limit(ar, limit, esp + nparm * 4, nparm * 4) )
   1.358 +                    return do_guest_trap(TRAP_gp_fault, regs, 1);
   1.359 +                ustkp = (unsigned int *)(unsigned long)((unsigned int)base + regs->_esp + nparm * 4);
   1.360 +                if ( !compat_access_ok(ustkp - nparm, nparm * 4) )
   1.361 +                    return do_guest_trap(TRAP_gp_fault, regs, 1);
   1.362 +                do
   1.363 +                {
   1.364 +                    unsigned int parm;
   1.365 +
   1.366 +                    --ustkp;
   1.367 +                    rc = __get_user(parm, ustkp);
   1.368 +                    if ( rc )
   1.369 +                    {
   1.370 +                        propagate_page_fault((unsigned long)(ustkp + 1) - rc, 0);
   1.371 +                        return 0;
   1.372 +                    }
   1.373 +                    push(parm);
   1.374 +                } while ( --nparm );
   1.375 +            }
   1.376 +        }
   1.377 +        else
   1.378 +        {
   1.379 +            sel |= (regs->cs & 3);
   1.380 +            esp = regs->esp;
   1.381 +            ss = regs->ss;
   1.382 +            if ( !read_descriptor(ss, v, regs, &base, &limit, &ar, 0) ||
   1.383 +                 ((ar >> 13) & 3) != (sel & 3) )
   1.384 +                return do_guest_trap(TRAP_gp_fault, regs, 1);
   1.385 +            if ( !check_stack_limit(ar, limit, esp, 2 * 4) )
   1.386 +            {
   1.387 +                regs->error_code = 0;
   1.388 +                return do_guest_trap(TRAP_stack_error, regs, 1);
   1.389 +            }
   1.390 +            stkp = (unsigned int *)(unsigned long)((unsigned int)base + esp);
   1.391 +            if ( !compat_access_ok(stkp - 2, 2 * 4) )
   1.392 +                return do_guest_trap(TRAP_gp_fault, regs, 1);
   1.393 +        }
   1.394 +        push(regs->cs);
   1.395 +        push(eip);
   1.396 +#undef push
   1.397 +        regs->esp = esp;
   1.398 +        regs->ss = ss;
   1.399 +    }
   1.400 +    else
   1.401 +        sel |= (regs->cs & 3);
   1.402 +
   1.403 +    regs->eip = off;
   1.404 +    regs->cs = sel;
   1.405 +#endif
   1.406 +
   1.407 +    return 0;
   1.408 +}
   1.409 +
   1.410  asmlinkage int do_general_protection(struct cpu_user_regs *regs)
   1.411  {
   1.412      struct vcpu *v = current;
   1.413 @@ -1907,6 +2296,8 @@ asmlinkage int do_general_protection(str
   1.414              return do_guest_trap(vector, regs, 0);
   1.415          }
   1.416      }
   1.417 +    else if ( is_pv_32on64_vcpu(v) && regs->error_code )
   1.418 +        return emulate_gate_op(regs);
   1.419  
   1.420      /* Emulate some simple privileged and I/O instructions. */
   1.421      if ( (regs->error_code == 0) &&
     2.1 --- a/xen/arch/x86/x86_64/mm.c	Fri Oct 26 10:57:03 2007 +0100
     2.2 +++ b/xen/arch/x86/x86_64/mm.c	Fri Oct 26 11:40:10 2007 +0100
     2.3 @@ -383,14 +383,16 @@ int check_descriptor(const struct domain
     2.4  {
     2.5      u32 a = d->a, b = d->b;
     2.6      u16 cs;
     2.7 +    unsigned int dpl;
     2.8  
     2.9      /* A not-present descriptor will always fault, so is safe. */
    2.10      if ( !(b & _SEGMENT_P) ) 
    2.11          goto good;
    2.12  
    2.13      /* Check and fix up the DPL. */
    2.14 -    if ( (b & _SEGMENT_DPL) < (GUEST_KERNEL_RPL(dom) << 13) )
    2.15 -        d->b = b = (b & ~_SEGMENT_DPL) | (GUEST_KERNEL_RPL(dom) << 13);
    2.16 +    dpl = (b >> 13) & 3;
    2.17 +    __fixup_guest_selector(dom, dpl);
    2.18 +    b = (b & ~_SEGMENT_DPL) | (dpl << 13);
    2.19  
    2.20      /* All code and data segments are okay. No base/limit checking. */
    2.21      if ( (b & _SEGMENT_S) )
    2.22 @@ -408,18 +410,31 @@ int check_descriptor(const struct domain
    2.23      if ( (b & _SEGMENT_TYPE) != 0xc00 )
    2.24          goto bad;
    2.25  
    2.26 -    /* Validate and fix up the target code selector. */
    2.27 +    /* Validate the target code selector. */
    2.28      cs = a >> 16;
    2.29 -    fixup_guest_code_selector(dom, cs);
    2.30      if ( !guest_gate_selector_okay(dom, cs) )
    2.31          goto bad;
    2.32 -    a = d->a = (d->a & 0xffffU) | (cs << 16);
    2.33 +    /*
    2.34 +     * Force DPL to zero, causing a GP fault with its error code indicating
    2.35 +     * the gate in use, allowing emulation. This is necessary because with
    2.36 +     * native guests (kernel in ring 3) call gates cannot be used directly
    2.37 +     * to transition from user to kernel mode (and whether a gate is used
    2.38 +     * to enter the kernel can only be determined when the gate is being
    2.39 +     * used), and with compat guests call gates cannot be used at all as
    2.40 +     * there are only 64-bit ones.
    2.41 +     * Store the original DPL in the selector's RPL field.
    2.42 +     */
    2.43 +    b &= ~_SEGMENT_DPL;
    2.44 +    cs = (cs & ~3) | dpl;
    2.45 +    a = (a & 0xffffU) | (cs << 16);
    2.46  
    2.47      /* Reserved bits must be zero. */
    2.48 -    if ( (b & 0xe0) != 0 )
    2.49 +    if ( b & (is_pv_32bit_domain(dom) ? 0xe0 : 0xff) )
    2.50          goto bad;
    2.51          
    2.52   good:
    2.53 +    d->a = a;
    2.54 +    d->b = b;
    2.55      return 1;
    2.56   bad:
    2.57      return 0;