ia64/xen-unstable

changeset 15919:35fb20c4822c

hvm: Support hardware task switching.
Signed-off-by: Keir Fraser <keir@xensource.com>
author kfraser@localhost.localdomain
date Tue Sep 18 11:49:42 2007 +0100 (2007-09-18)
parents 49700bb716bb
children 3727f7570dff
files xen/arch/x86/hvm/hvm.c xen/arch/x86/hvm/svm/svm.c xen/arch/x86/hvm/vmx/vmx.c xen/arch/x86/mm/shadow/common.c xen/include/asm-x86/hvm/hvm.h
line diff
     1.1 --- a/xen/arch/x86/hvm/hvm.c	Mon Sep 17 13:33:09 2007 +0100
     1.2 +++ b/xen/arch/x86/hvm/hvm.c	Tue Sep 18 11:49:42 2007 +0100
     1.3 @@ -672,6 +672,412 @@ int hvm_set_cr4(unsigned long value)
     1.4      return 0;
     1.5  }
     1.6  
     1.7 +int hvm_virtual_to_linear_addr(
     1.8 +    enum x86_segment seg,
     1.9 +    struct segment_register *reg,
    1.10 +    unsigned long offset,
    1.11 +    unsigned int bytes,
    1.12 +    enum hvm_access_type access_type,
    1.13 +    unsigned int addr_size,
    1.14 +    unsigned long *linear_addr)
    1.15 +{
    1.16 +    unsigned long addr = offset;
    1.17 +    uint32_t last_byte;
    1.18 +
    1.19 +    if ( addr_size != 64 )
    1.20 +    {
    1.21 +        /*
    1.22 +         * COMPATIBILITY MODE: Apply segment checks and add base.
    1.23 +         */
    1.24 +
    1.25 +        switch ( access_type )
    1.26 +        {
    1.27 +        case hvm_access_read:
    1.28 +            if ( (reg->attr.fields.type & 0xa) == 0x8 )
    1.29 +                goto gpf; /* execute-only code segment */
    1.30 +            break;
    1.31 +        case hvm_access_write:
    1.32 +            if ( (reg->attr.fields.type & 0xa) != 0x2 )
    1.33 +                goto gpf; /* not a writable data segment */
    1.34 +            break;
    1.35 +        default:
    1.36 +            break;
    1.37 +        }
    1.38 +
    1.39 +        last_byte = offset + bytes - 1;
    1.40 +
    1.41 +        /* Is this a grows-down data segment? Special limit check if so. */
    1.42 +        if ( (reg->attr.fields.type & 0xc) == 0x4 )
    1.43 +        {
    1.44 +            /* Is upper limit 0xFFFF or 0xFFFFFFFF? */
    1.45 +            if ( !reg->attr.fields.db )
    1.46 +                last_byte = (uint16_t)last_byte;
    1.47 +
    1.48 +            /* Check first byte and last byte against respective bounds. */
    1.49 +            if ( (offset <= reg->limit) || (last_byte < offset) )
    1.50 +                goto gpf;
    1.51 +        }
    1.52 +        else if ( (last_byte > reg->limit) || (last_byte < offset) )
    1.53 +            goto gpf; /* last byte is beyond limit or wraps 0xFFFFFFFF */
    1.54 +
    1.55 +        /*
    1.56 +         * Hardware truncates to 32 bits in compatibility mode.
    1.57 +         * It does not truncate to 16 bits in 16-bit address-size mode.
    1.58 +         */
    1.59 +        addr = (uint32_t)(addr + reg->base);
    1.60 +    }
    1.61 +    else
    1.62 +    {
    1.63 +        /*
    1.64 +         * LONG MODE: FS and GS add segment base. Addresses must be canonical.
    1.65 +         */
    1.66 +
    1.67 +        if ( (seg == x86_seg_fs) || (seg == x86_seg_gs) )
    1.68 +            addr += reg->base;
    1.69 +
    1.70 +        if ( !is_canonical_address(addr) )
    1.71 +            goto gpf;
    1.72 +    }
    1.73 +
    1.74 +    *linear_addr = addr;
    1.75 +    return 1;
    1.76 +
    1.77 + gpf:
    1.78 +    return 0;
    1.79 +}
    1.80 +
    1.81 +static void *hvm_map(unsigned long va, int size)
    1.82 +{
    1.83 +    unsigned long gfn, mfn;
    1.84 +    p2m_type_t p2mt;
    1.85 +
    1.86 +    if ( ((va & ~PAGE_MASK) + size) > PAGE_SIZE )
    1.87 +    {
    1.88 +        hvm_inject_exception(TRAP_page_fault, PFEC_write_access,
    1.89 +                             (va + PAGE_SIZE - 1) & PAGE_MASK);
    1.90 +        return NULL;
    1.91 +    }
    1.92 +
    1.93 +    gfn = paging_gva_to_gfn(current, va);
    1.94 +    mfn = mfn_x(gfn_to_mfn_current(gfn, &p2mt));
    1.95 +    if ( !p2m_is_ram(p2mt) )
    1.96 +    {
    1.97 +        hvm_inject_exception(TRAP_page_fault, PFEC_write_access, va);
    1.98 +        return NULL;
    1.99 +    }
   1.100 +
   1.101 +    ASSERT(mfn_valid(mfn));
   1.102 +
   1.103 +    paging_mark_dirty(current->domain, mfn);
   1.104 +
   1.105 +    return (char *)map_domain_page(mfn) + (va & ~PAGE_MASK);
   1.106 +}
   1.107 +
   1.108 +static void hvm_unmap(void *p)
   1.109 +{
   1.110 +    if ( p )
   1.111 +        unmap_domain_page(p);
   1.112 +}
   1.113 +
   1.114 +static int hvm_load_segment_selector(
   1.115 +    struct vcpu *v, enum x86_segment seg, uint16_t sel)
   1.116 +{
   1.117 +    struct segment_register desctab, cs, segr;
   1.118 +    struct desc_struct *pdesc, desc;
   1.119 +    u8 dpl, rpl, cpl;
   1.120 +    int fault_type = TRAP_invalid_tss;
   1.121 +
   1.122 +    /* NULL selector? */
   1.123 +    if ( (sel & 0xfffc) == 0 )
   1.124 +    {
   1.125 +        if ( (seg == x86_seg_cs) || (seg == x86_seg_ss) )
   1.126 +            goto fail;
   1.127 +        memset(&segr, 0, sizeof(segr));
   1.128 +        hvm_set_segment_register(v, seg, &segr);
   1.129 +        return 0;
   1.130 +    }
   1.131 +
   1.132 +    /* LDT descriptor must be in the GDT. */
   1.133 +    if ( (seg == x86_seg_ldtr) && (sel & 4) )
   1.134 +        goto fail;
   1.135 +
   1.136 +    hvm_get_segment_register(v, x86_seg_cs, &cs);
   1.137 +    hvm_get_segment_register(
   1.138 +        v, (sel & 4) ? x86_seg_ldtr : x86_seg_gdtr, &desctab);
   1.139 +
   1.140 +    /* Check against descriptor table limit. */
   1.141 +    if ( ((sel & 0xfff8) + 7) > desctab.limit )
   1.142 +        goto fail;
   1.143 +
   1.144 +    pdesc = hvm_map(desctab.base + (sel & 0xfff8), 8);
   1.145 +    if ( pdesc == NULL )
   1.146 +        goto hvm_map_fail;
   1.147 +
   1.148 +    do {
   1.149 +        desc = *pdesc;
   1.150 +
   1.151 +        /* Segment present in memory? */
   1.152 +        if ( !(desc.b & (1u<<15)) )
   1.153 +        {
   1.154 +            fault_type = TRAP_no_segment;
   1.155 +            goto unmap_and_fail;
   1.156 +        }
   1.157 +
   1.158 +        /* LDT descriptor is a system segment. All others are code/data. */
   1.159 +        if ( (desc.b & (1u<<12)) == ((seg == x86_seg_ldtr) << 12) )
   1.160 +            goto unmap_and_fail;
   1.161 +
   1.162 +        dpl = (desc.b >> 13) & 3;
   1.163 +        rpl = sel & 3;
   1.164 +        cpl = cs.sel & 3;
   1.165 +
   1.166 +        switch ( seg )
   1.167 +        {
   1.168 +        case x86_seg_cs:
   1.169 +            /* Code segment? */
   1.170 +            if ( !(desc.b & (1u<<11)) )
   1.171 +                goto unmap_and_fail;
   1.172 +            /* Non-conforming segment: check DPL against RPL. */
   1.173 +            if ( ((desc.b & (6u<<9)) != 6) && (dpl != rpl) )
   1.174 +                goto unmap_and_fail;
   1.175 +            break;
   1.176 +        case x86_seg_ss:
   1.177 +            /* Writable data segment? */
   1.178 +            if ( (desc.b & (5u<<9)) != (1u<<9) )
   1.179 +                goto unmap_and_fail;
   1.180 +            if ( (dpl != cpl) || (dpl != rpl) )
   1.181 +                goto unmap_and_fail;
   1.182 +            break;
   1.183 +        case x86_seg_ldtr:
   1.184 +            /* LDT system segment? */
   1.185 +            if ( (desc.b & (15u<<8)) != (2u<<8) )
   1.186 +                goto unmap_and_fail;
   1.187 +            goto skip_accessed_flag;
   1.188 +        default:
   1.189 +            /* Readable code or data segment? */
   1.190 +            if ( (desc.b & (5u<<9)) == (4u<<9) )
   1.191 +                goto unmap_and_fail;
   1.192 +            /* Non-conforming segment: check DPL against RPL and CPL. */
   1.193 +            if ( ((desc.b & (6u<<9)) != 6) && ((dpl < cpl) || (dpl < rpl)) )
   1.194 +                goto unmap_and_fail;
   1.195 +            break;
   1.196 +        }
   1.197 +    } while ( !(desc.b & 0x100) && /* Ensure Accessed flag is set */
   1.198 +              (cmpxchg(&pdesc->b, desc.b, desc.b | 0x100) != desc.b) );
   1.199 +
   1.200 +    /* Force the Accessed flag in our local copy. */
   1.201 +    desc.b |= 0x100;
   1.202 +
   1.203 + skip_accessed_flag:
   1.204 +    hvm_unmap(pdesc);
   1.205 +
   1.206 +    segr.base = (((desc.b <<  0) & 0xff000000u) |
   1.207 +                 ((desc.b << 16) & 0x00ff0000u) |
   1.208 +                 ((desc.a >> 16) & 0x0000ffffu));
   1.209 +    segr.attr.bytes = (((desc.b >>  8) & 0x00ffu) |
   1.210 +                       ((desc.b >> 12) & 0x0f00u));
   1.211 +    segr.limit = (desc.b & 0x000f0000u) | (desc.a & 0x0000ffffu);
   1.212 +    if ( segr.attr.fields.g )
   1.213 +        segr.limit = (segr.limit << 12) | 0xfffu;
   1.214 +    segr.sel = sel;
   1.215 +    hvm_set_segment_register(v, seg, &segr);
   1.216 +
   1.217 +    return 0;
   1.218 +
   1.219 + unmap_and_fail:
   1.220 +    hvm_unmap(pdesc);
   1.221 + fail:
   1.222 +    hvm_inject_exception(fault_type, sel & 0xfffc, 0);
   1.223 + hvm_map_fail:
   1.224 +    return 1;
   1.225 +}
   1.226 +
   1.227 +void hvm_task_switch(
   1.228 +    uint16_t tss_sel, enum hvm_task_switch_reason taskswitch_reason,
   1.229 +    int32_t errcode)
   1.230 +{
   1.231 +    struct vcpu *v = current;
   1.232 +    struct cpu_user_regs *regs = guest_cpu_user_regs();
   1.233 +    struct segment_register gdt, tr, prev_tr, segr;
   1.234 +    struct desc_struct *optss_desc = NULL, *nptss_desc = NULL, tss_desc;
   1.235 +    unsigned long eflags;
   1.236 +    int exn_raised;
   1.237 +    struct {
   1.238 +        u16 back_link,__blh;
   1.239 +        u32 esp0;
   1.240 +        u16 ss0, _0;
   1.241 +        u32 esp1;
   1.242 +        u16 ss1, _1;
   1.243 +        u32 esp2;
   1.244 +        u16 ss2, _2;
   1.245 +        u32 cr3, eip, eflags, eax, ecx, edx, ebx, esp, ebp, esi, edi;
   1.246 +        u16 es, _3, cs, _4, ss, _5, ds, _6, fs, _7, gs, _8, ldt, _9;
   1.247 +        u16 trace, iomap;
   1.248 +    } *ptss, tss;
   1.249 +
   1.250 +    hvm_get_segment_register(v, x86_seg_gdtr, &gdt);
   1.251 +    hvm_get_segment_register(v, x86_seg_tr, &prev_tr);
   1.252 +
   1.253 +    if ( ((tss_sel & 0xfff8) + 7) > gdt.limit )
   1.254 +    {
   1.255 +        hvm_inject_exception((taskswitch_reason == TSW_iret) ?
   1.256 +                             TRAP_invalid_tss : TRAP_gp_fault,
   1.257 +                             tss_sel & 0xfff8, 0);
   1.258 +        goto out;
   1.259 +    }
   1.260 +
   1.261 +    optss_desc = hvm_map(gdt.base + (prev_tr.sel & 0xfff8), 8);
   1.262 +    if ( optss_desc == NULL )
   1.263 +        goto out;
   1.264 +
   1.265 +    nptss_desc = hvm_map(gdt.base + (tss_sel & 0xfff8), 8);
   1.266 +    if ( nptss_desc == NULL )
   1.267 +        goto out;
   1.268 +
   1.269 +    tss_desc = *nptss_desc;
   1.270 +    tr.sel = tss_sel;
   1.271 +    tr.base = (((tss_desc.b <<  0) & 0xff000000u) |
   1.272 +               ((tss_desc.b << 16) & 0x00ff0000u) |
   1.273 +               ((tss_desc.a >> 16) & 0x0000ffffu));
   1.274 +    tr.limit = (tss_desc.b & 0x000f0000u) | (tss_desc.a & 0x0000ffffu);
   1.275 +    tr.attr.bytes = (((tss_desc.b >>  8) & 0x00ffu) |
   1.276 +                     ((tss_desc.b >> 20) & 0x0f00u));
   1.277 +
   1.278 +    if ( !tr.attr.fields.p )
   1.279 +    {
   1.280 +        hvm_inject_exception(TRAP_no_segment, tss_sel & 0xfff8, 0);
   1.281 +        goto out;
   1.282 +    }
   1.283 +
   1.284 +    if ( tr.attr.fields.type != ((taskswitch_reason == TSW_iret) ? 0xb : 0x9) )
   1.285 +    {
   1.286 +        hvm_inject_exception(
   1.287 +            (taskswitch_reason == TSW_iret) ? TRAP_invalid_tss : TRAP_gp_fault,
   1.288 +            tss_sel & 0xfff8, 0);
   1.289 +        goto out;
   1.290 +    }
   1.291 +
   1.292 +    if ( !tr.attr.fields.g && (tr.limit < (sizeof(tss)-1)) )
   1.293 +    {
   1.294 +        hvm_inject_exception(TRAP_invalid_tss, tss_sel & 0xfff8, 0);
   1.295 +        goto out;
   1.296 +    }
   1.297 +
   1.298 +    hvm_store_cpu_guest_regs(v, regs, NULL);
   1.299 +
   1.300 +    ptss = hvm_map(prev_tr.base, sizeof(tss));
   1.301 +    if ( ptss == NULL )
   1.302 +        goto out;
   1.303 +
   1.304 +    eflags = regs->eflags;
   1.305 +    if ( taskswitch_reason == TSW_iret )
   1.306 +        eflags &= ~X86_EFLAGS_NT;
   1.307 +
   1.308 +    ptss->cr3    = v->arch.hvm_vcpu.guest_cr[3];
   1.309 +    ptss->eip    = regs->eip;
   1.310 +    ptss->eflags = eflags;
   1.311 +    ptss->eax    = regs->eax;
   1.312 +    ptss->ecx    = regs->ecx;
   1.313 +    ptss->edx    = regs->edx;
   1.314 +    ptss->ebx    = regs->ebx;
   1.315 +    ptss->esp    = regs->esp;
   1.316 +    ptss->ebp    = regs->ebp;
   1.317 +    ptss->esi    = regs->esi;
   1.318 +    ptss->edi    = regs->edi;
   1.319 +
   1.320 +    hvm_get_segment_register(v, x86_seg_es, &segr);
   1.321 +    ptss->es = segr.sel;
   1.322 +    hvm_get_segment_register(v, x86_seg_cs, &segr);
   1.323 +    ptss->cs = segr.sel;
   1.324 +    hvm_get_segment_register(v, x86_seg_ss, &segr);
   1.325 +    ptss->ss = segr.sel;
   1.326 +    hvm_get_segment_register(v, x86_seg_ds, &segr);
   1.327 +    ptss->ds = segr.sel;
   1.328 +    hvm_get_segment_register(v, x86_seg_fs, &segr);
   1.329 +    ptss->fs = segr.sel;
   1.330 +    hvm_get_segment_register(v, x86_seg_gs, &segr);
   1.331 +    ptss->gs = segr.sel;
   1.332 +    hvm_get_segment_register(v, x86_seg_ldtr, &segr);
   1.333 +    ptss->ldt = segr.sel;
   1.334 +
   1.335 +    hvm_unmap(ptss);
   1.336 +
   1.337 +    ptss = hvm_map(tr.base, sizeof(tss));
   1.338 +    if ( ptss == NULL )
   1.339 +        goto out;
   1.340 +
   1.341 +    if ( !hvm_set_cr3(ptss->cr3) )
   1.342 +    {
   1.343 +        hvm_unmap(ptss);
   1.344 +        goto out;
   1.345 +    }
   1.346 +
   1.347 +    regs->eip    = ptss->eip;
   1.348 +    regs->eflags = ptss->eflags;
   1.349 +    regs->eax    = ptss->eax;
   1.350 +    regs->ecx    = ptss->ecx;
   1.351 +    regs->edx    = ptss->edx;
   1.352 +    regs->ebx    = ptss->ebx;
   1.353 +    regs->esp    = ptss->esp;
   1.354 +    regs->ebp    = ptss->ebp;
   1.355 +    regs->esi    = ptss->esi;
   1.356 +    regs->edi    = ptss->edi;
   1.357 +
   1.358 +    if ( (taskswitch_reason == TSW_call_or_int) )
   1.359 +    {
   1.360 +        regs->eflags |= X86_EFLAGS_NT;
   1.361 +        ptss->back_link = prev_tr.sel;
   1.362 +    }
   1.363 +
   1.364 +    exn_raised = 0;
   1.365 +    if ( hvm_load_segment_selector(v, x86_seg_es, ptss->es) ||
   1.366 +         hvm_load_segment_selector(v, x86_seg_cs, ptss->cs) ||
   1.367 +         hvm_load_segment_selector(v, x86_seg_ss, ptss->ss) ||
   1.368 +         hvm_load_segment_selector(v, x86_seg_ds, ptss->ds) ||
   1.369 +         hvm_load_segment_selector(v, x86_seg_fs, ptss->fs) ||
   1.370 +         hvm_load_segment_selector(v, x86_seg_gs, ptss->gs) ||
   1.371 +         hvm_load_segment_selector(v, x86_seg_ldtr, ptss->ldt) )
   1.372 +        exn_raised = 1;
   1.373 +
   1.374 +    if ( (ptss->trace & 1) && !exn_raised )
   1.375 +        hvm_inject_exception(TRAP_debug, tss_sel & 0xfff8, 0);
   1.376 +
   1.377 +    hvm_unmap(ptss);
   1.378 +
   1.379 +    tr.attr.fields.type = 0xb; /* busy 32-bit tss */
   1.380 +    hvm_set_segment_register(v, x86_seg_tr, &tr);
   1.381 +    paging_update_cr3(v);
   1.382 +
   1.383 +    v->arch.hvm_vcpu.guest_cr[0] |= X86_CR0_TS;
   1.384 +    hvm_update_guest_cr(v, 0);
   1.385 +
   1.386 +    if ( (taskswitch_reason == TSW_iret) ||
   1.387 +         (taskswitch_reason == TSW_jmp) )
   1.388 +        clear_bit(41, optss_desc); /* clear B flag of old task */
   1.389 +
   1.390 +    if ( taskswitch_reason != TSW_iret )
   1.391 +        set_bit(41, nptss_desc); /* set B flag of new task */
   1.392 +
   1.393 +    if ( errcode >= 0 )
   1.394 +    {
   1.395 +        struct segment_register reg;
   1.396 +        unsigned long linear_addr;
   1.397 +        regs->esp -= 4;
   1.398 +        hvm_get_segment_register(current, x86_seg_ss, &reg);
   1.399 +        /* Todo: do not ignore access faults here. */
   1.400 +        if ( hvm_virtual_to_linear_addr(x86_seg_ss, &reg, regs->esp,
   1.401 +                                        4, hvm_access_write, 32,
   1.402 +                                        &linear_addr) )
   1.403 +            hvm_copy_to_guest_virt(linear_addr, &errcode, 4);
   1.404 +    }
   1.405 +
   1.406 +    hvm_load_cpu_guest_regs(v, regs);
   1.407 +
   1.408 + out:
   1.409 +    hvm_unmap(optss_desc);
   1.410 +    hvm_unmap(nptss_desc);
   1.411 +}
   1.412 +
   1.413  /*
   1.414   * __hvm_copy():
   1.415   *  @buf  = hypervisor buffer
     2.1 --- a/xen/arch/x86/hvm/svm/svm.c	Mon Sep 17 13:33:09 2007 +0100
     2.2 +++ b/xen/arch/x86/hvm/svm/svm.c	Tue Sep 18 11:49:42 2007 +0100
     2.3 @@ -648,6 +648,8 @@ static void svm_get_segment_register(str
     2.4  {
     2.5      struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
     2.6  
     2.7 +    ASSERT(v == current);
     2.8 +
     2.9      switch ( seg )
    2.10      {
    2.11      case x86_seg_cs:
    2.12 @@ -694,10 +696,13 @@ static void svm_set_segment_register(str
    2.13  {
    2.14      struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
    2.15  
    2.16 +    ASSERT(v == current);
    2.17 +
    2.18      switch ( seg )
    2.19      {
    2.20      case x86_seg_cs:
    2.21          memcpy(&vmcb->cs, reg, sizeof(*reg));
    2.22 +        guest_cpu_user_regs()->cs = reg->sel;
    2.23          break;
    2.24      case x86_seg_ds:
    2.25          memcpy(&vmcb->ds, reg, sizeof(*reg));
    2.26 @@ -717,6 +722,7 @@ static void svm_set_segment_register(str
    2.27          break;
    2.28      case x86_seg_ss:
    2.29          memcpy(&vmcb->ss, reg, sizeof(*reg));
    2.30 +        guest_cpu_user_regs()->ss = reg->sel;
    2.31          break;
    2.32      case x86_seg_tr:
    2.33          svm_sync_vmcb(v);
    2.34 @@ -2299,12 +2305,20 @@ asmlinkage void svm_vmexit_handler(struc
    2.35          svm_vmexit_do_invd(v);
    2.36          break;
    2.37  
    2.38 -    case VMEXIT_GDTR_WRITE:
    2.39 -        printk("WRITE to GDTR\n");
    2.40 +    case VMEXIT_TASK_SWITCH: {
    2.41 +        enum hvm_task_switch_reason reason;
    2.42 +        int32_t errcode = -1;
    2.43 +        if ( (vmcb->exitinfo2 >> 36) & 1 )
    2.44 +            reason = TSW_iret;
    2.45 +        else if ( (vmcb->exitinfo2 >> 38) & 1 )
    2.46 +            reason = TSW_jmp;
    2.47 +        else
    2.48 +            reason = TSW_call_or_int;
    2.49 +        if ( (vmcb->exitinfo2 >> 44) & 1 )
    2.50 +            errcode = (uint32_t)vmcb->exitinfo2;
    2.51 +        hvm_task_switch((uint16_t)vmcb->exitinfo1, reason, errcode);
    2.52          break;
    2.53 -
    2.54 -    case VMEXIT_TASK_SWITCH:
    2.55 -        goto exit_and_crash;
    2.56 +    }
    2.57  
    2.58      case VMEXIT_CPUID:
    2.59          svm_vmexit_do_cpuid(vmcb, regs);
     3.1 --- a/xen/arch/x86/hvm/vmx/vmx.c	Mon Sep 17 13:33:09 2007 +0100
     3.2 +++ b/xen/arch/x86/hvm/vmx/vmx.c	Tue Sep 18 11:49:42 2007 +0100
     3.3 @@ -885,7 +885,7 @@ static unsigned long vmx_get_segment_bas
     3.4  static void vmx_get_segment_register(struct vcpu *v, enum x86_segment seg,
     3.5                                       struct segment_register *reg)
     3.6  {
     3.7 -    u16 attr = 0;
     3.8 +    uint32_t attr = 0;
     3.9  
    3.10      ASSERT(v == current);
    3.11  
    3.12 @@ -960,13 +960,17 @@ static void vmx_get_segment_register(str
    3.13  static void vmx_set_segment_register(struct vcpu *v, enum x86_segment seg,
    3.14                                       struct segment_register *reg)
    3.15  {
    3.16 -    u16 attr;
    3.17 +    uint32_t attr;
    3.18  
    3.19      ASSERT(v == current);
    3.20  
    3.21      attr = reg->attr.bytes;
    3.22      attr = ((attr & 0xf00) << 4) | (attr & 0xff);
    3.23  
    3.24 +    /* Not-present must mean unusable. */
    3.25 +    if ( !reg->attr.fields.p )
    3.26 +        attr |= (1u << 16);
    3.27 +
    3.28      switch ( seg )
    3.29      {
    3.30      case x86_seg_cs:
    3.31 @@ -974,6 +978,7 @@ static void vmx_set_segment_register(str
    3.32          __vmwrite(GUEST_CS_LIMIT, reg->limit);
    3.33          __vmwrite(GUEST_CS_BASE, reg->base);
    3.34          __vmwrite(GUEST_CS_AR_BYTES, attr);
    3.35 +        guest_cpu_user_regs()->cs = reg->sel;
    3.36          break;
    3.37      case x86_seg_ds:
    3.38          __vmwrite(GUEST_DS_SELECTOR, reg->sel);
    3.39 @@ -1004,6 +1009,7 @@ static void vmx_set_segment_register(str
    3.40          __vmwrite(GUEST_SS_LIMIT, reg->limit);
    3.41          __vmwrite(GUEST_SS_BASE, reg->base);
    3.42          __vmwrite(GUEST_SS_AR_BYTES, attr);
    3.43 +        guest_cpu_user_regs()->ss = reg->sel;
    3.44          break;
    3.45      case x86_seg_tr:
    3.46          __vmwrite(GUEST_TR_SELECTOR, reg->sel);
    3.47 @@ -2668,7 +2674,8 @@ asmlinkage void vmx_vmexit_handler(struc
    3.48  
    3.49      /* Event delivery caused this intercept? Queue for redelivery. */
    3.50      idtv_info = __vmread(IDT_VECTORING_INFO);
    3.51 -    if ( unlikely(idtv_info & INTR_INFO_VALID_MASK) )
    3.52 +    if ( unlikely(idtv_info & INTR_INFO_VALID_MASK) &&
    3.53 +         (exit_reason != EXIT_REASON_TASK_SWITCH) )
    3.54      {
    3.55          if ( hvm_event_needs_reinjection((idtv_info>>8)&7, idtv_info&0xff) )
    3.56          {
    3.57 @@ -2785,8 +2792,19 @@ asmlinkage void vmx_vmexit_handler(struc
    3.58          __vmwrite(CPU_BASED_VM_EXEC_CONTROL,
    3.59                    v->arch.hvm_vmx.exec_control);
    3.60          break;
    3.61 -    case EXIT_REASON_TASK_SWITCH:
    3.62 -        goto exit_and_crash;
    3.63 +    case EXIT_REASON_TASK_SWITCH: {
    3.64 +        const enum hvm_task_switch_reason reasons[] = {
    3.65 +            TSW_call_or_int, TSW_iret, TSW_jmp, TSW_call_or_int };
    3.66 +        int32_t errcode = -1;
    3.67 +        exit_qualification = __vmread(EXIT_QUALIFICATION);
    3.68 +        if ( (idtv_info & INTR_INFO_VALID_MASK) &&
    3.69 +             (idtv_info & INTR_INFO_DELIVER_CODE_MASK) )
    3.70 +            errcode = __vmread(IDT_VECTORING_ERROR_CODE);
    3.71 +        hvm_task_switch((uint16_t)exit_qualification,
    3.72 +                        reasons[(exit_qualification >> 30) & 3],
    3.73 +                        errcode);
    3.74 +        break;
    3.75 +    }
    3.76      case EXIT_REASON_CPUID:
    3.77          inst_len = __get_instruction_length(); /* Safe: CPUID */
    3.78          __update_guest_eip(inst_len);
     4.1 --- a/xen/arch/x86/mm/shadow/common.c	Mon Sep 17 13:33:09 2007 +0100
     4.2 +++ b/xen/arch/x86/mm/shadow/common.c	Tue Sep 18 11:49:42 2007 +0100
     4.3 @@ -101,7 +101,7 @@ int _shadow_mode_refcounts(struct domain
     4.4  /* x86 emulator support for the shadow code
     4.5   */
     4.6  
     4.7 -struct segment_register *hvm_get_seg_reg(
     4.8 +static struct segment_register *hvm_get_seg_reg(
     4.9      enum x86_segment seg, struct sh_emulate_ctxt *sh_ctxt)
    4.10  {
    4.11      struct segment_register *seg_reg = &sh_ctxt->seg_reg[seg];
    4.12 @@ -110,10 +110,6 @@ struct segment_register *hvm_get_seg_reg
    4.13      return seg_reg;
    4.14  }
    4.15  
    4.16 -enum hvm_access_type {
    4.17 -    hvm_access_insn_fetch, hvm_access_read, hvm_access_write
    4.18 -};
    4.19 -
    4.20  static int hvm_translate_linear_addr(
    4.21      enum x86_segment seg,
    4.22      unsigned long offset,
    4.23 @@ -123,76 +119,18 @@ static int hvm_translate_linear_addr(
    4.24      unsigned long *paddr)
    4.25  {
    4.26      struct segment_register *reg = hvm_get_seg_reg(seg, sh_ctxt);
    4.27 -    unsigned long limit, addr = offset;
    4.28 -    uint32_t last_byte;
    4.29 -
    4.30 -    if ( sh_ctxt->ctxt.addr_size != 64 )
    4.31 -    {
    4.32 -        /*
    4.33 -         * COMPATIBILITY MODE: Apply segment checks and add base.
    4.34 -         */
    4.35 +    int okay;
    4.36  
    4.37 -        switch ( access_type )
    4.38 -        {
    4.39 -        case hvm_access_read:
    4.40 -            if ( (reg->attr.fields.type & 0xa) == 0x8 )
    4.41 -                goto gpf; /* execute-only code segment */
    4.42 -            break;
    4.43 -        case hvm_access_write:
    4.44 -            if ( (reg->attr.fields.type & 0xa) != 0x2 )
    4.45 -                goto gpf; /* not a writable data segment */
    4.46 -            break;
    4.47 -        default:
    4.48 -            break;
    4.49 -        }
    4.50 -
    4.51 -        /* Calculate the segment limit, including granularity flag. */
    4.52 -        limit = reg->limit;
    4.53 -        if ( reg->attr.fields.g )
    4.54 -            limit = (limit << 12) | 0xfff;
    4.55 -
    4.56 -        last_byte = offset + bytes - 1;
    4.57 +    okay = hvm_virtual_to_linear_addr(
    4.58 +        seg, reg, offset, bytes, access_type, sh_ctxt->ctxt.addr_size, paddr);
    4.59  
    4.60 -        /* Is this a grows-down data segment? Special limit check if so. */
    4.61 -        if ( (reg->attr.fields.type & 0xc) == 0x4 )
    4.62 -        {
    4.63 -            /* Is upper limit 0xFFFF or 0xFFFFFFFF? */
    4.64 -            if ( !reg->attr.fields.db )
    4.65 -                last_byte = (uint16_t)last_byte;
    4.66 -
    4.67 -            /* Check first byte and last byte against respective bounds. */
    4.68 -            if ( (offset <= limit) || (last_byte < offset) )
    4.69 -                goto gpf;
    4.70 -        }
    4.71 -        else if ( (last_byte > limit) || (last_byte < offset) )
    4.72 -            goto gpf; /* last byte is beyond limit or wraps 0xFFFFFFFF */
    4.73 -
    4.74 -        /*
    4.75 -         * Hardware truncates to 32 bits in compatibility mode.
    4.76 -         * It does not truncate to 16 bits in 16-bit address-size mode.
    4.77 -         */
    4.78 -        addr = (uint32_t)(addr + reg->base);
    4.79 -    }
    4.80 -    else
    4.81 +    if ( !okay )
    4.82      {
    4.83 -        /*
    4.84 -         * LONG MODE: FS and GS add segment base. Addresses must be canonical.
    4.85 -         */
    4.86 -
    4.87 -        if ( (seg == x86_seg_fs) || (seg == x86_seg_gs) )
    4.88 -            addr += reg->base;
    4.89 -
    4.90 -        if ( !is_canonical_address(addr) )
    4.91 -            goto gpf;
    4.92 +        hvm_inject_exception(TRAP_gp_fault, 0, 0);
    4.93 +        return X86EMUL_EXCEPTION;
    4.94      }
    4.95  
    4.96 -    *paddr = addr;
    4.97 -    return 0;    
    4.98 -
    4.99 - gpf:
   4.100 -    /* Inject #GP(0). */
   4.101 -    hvm_inject_exception(TRAP_gp_fault, 0, 0);
   4.102 -    return X86EMUL_EXCEPTION;
   4.103 +    return 0;
   4.104  }
   4.105  
   4.106  static int
     5.1 --- a/xen/include/asm-x86/hvm/hvm.h	Mon Sep 17 13:33:09 2007 +0100
     5.2 +++ b/xen/include/asm-x86/hvm/hvm.h	Tue Sep 18 11:49:42 2007 +0100
     5.3 @@ -362,4 +362,21 @@ static inline void hvm_cpu_down(void)
     5.4          hvm_funcs.cpu_down();
     5.5  }
     5.6  
     5.7 +enum hvm_task_switch_reason { TSW_jmp, TSW_iret, TSW_call_or_int };
     5.8 +void hvm_task_switch(
     5.9 +    uint16_t tss_sel, enum hvm_task_switch_reason taskswitch_reason,
    5.10 +    int32_t errcode);
    5.11 +
    5.12 +enum hvm_access_type {
    5.13 +    hvm_access_insn_fetch, hvm_access_read, hvm_access_write
    5.14 +};
    5.15 +int hvm_virtual_to_linear_addr(
    5.16 +    enum x86_segment seg,
    5.17 +    struct segment_register *reg,
    5.18 +    unsigned long offset,
    5.19 +    unsigned int bytes,
    5.20 +    enum hvm_access_type access_type,
    5.21 +    unsigned int addr_size,
    5.22 +    unsigned long *linear_addr);
    5.23 +
    5.24  #endif /* __ASM_X86_HVM_HVM_H__ */