direct-io.hg

changeset 15199:13eca4bf2c69

[XEN] Shadow: emulate a few extra instructions on PAE pagetable writes
in the hope of catching the "other half" write without another enter/exit.
Signed-off-by: Tim Deegan <Tim.Deegan@xensource.com>
author Tim Deegan <Tim.Deegan@xensource.com>
date Fri Jun 01 14:32:11 2007 +0100 (2007-06-01)
parents b182bd560e47
children bd3d6b4c52ec
files xen/arch/x86/mm/shadow/common.c xen/arch/x86/mm/shadow/multi.c xen/arch/x86/mm/shadow/private.h xen/include/asm-x86/domain.h xen/include/asm-x86/perfc_defn.h
line diff
     1.1 --- a/xen/arch/x86/mm/shadow/common.c	Thu May 31 16:09:11 2007 +0100
     1.2 +++ b/xen/arch/x86/mm/shadow/common.c	Fri Jun 01 14:32:11 2007 +0100
     1.3 @@ -248,7 +248,7 @@ hvm_emulate_insn_fetch(enum x86_segment 
     1.4  {
     1.5      struct sh_emulate_ctxt *sh_ctxt =
     1.6          container_of(ctxt, struct sh_emulate_ctxt, ctxt);
     1.7 -    unsigned int insn_off = offset - ctxt->regs->eip;
     1.8 +    unsigned int insn_off = offset - sh_ctxt->insn_buf_eip;
     1.9  
    1.10      /* Fall back if requested bytes are not in the prefetch cache. */
    1.11      if ( unlikely((insn_off + bytes) > sh_ctxt->insn_buf_bytes) )
    1.12 @@ -450,6 +450,7 @@ struct x86_emulate_ops *shadow_init_emul
    1.13      }
    1.14  
    1.15      /* Attempt to prefetch whole instruction. */
    1.16 +    sh_ctxt->insn_buf_eip = regs->eip;
    1.17      sh_ctxt->insn_buf_bytes =
    1.18          (!hvm_translate_linear_addr(
    1.19              x86_seg_cs, regs->eip, sizeof(sh_ctxt->insn_buf),
    1.20 @@ -461,6 +462,35 @@ struct x86_emulate_ops *shadow_init_emul
    1.21      return &hvm_shadow_emulator_ops;
    1.22  }
    1.23  
    1.24 +/* Update an initialized emulation context to prepare for the next 
    1.25 + * instruction */
    1.26 +void shadow_continue_emulation(struct sh_emulate_ctxt *sh_ctxt, 
    1.27 +                               struct cpu_user_regs *regs)
    1.28 +{
    1.29 +    struct vcpu *v = current;
    1.30 +    unsigned long addr, diff;
    1.31 +
    1.32 +    /* We don't refetch the segment bases, because we don't emulate
    1.33 +     * writes to segment registers */
    1.34 +
    1.35 +    if ( is_hvm_vcpu(v) )
    1.36 +    {
    1.37 +        diff = regs->eip - sh_ctxt->insn_buf_eip;
    1.38 +        if ( diff > sh_ctxt->insn_buf_bytes )
    1.39 +        {
    1.40 +            /* Prefetch more bytes. */
    1.41 +            sh_ctxt->insn_buf_bytes =
    1.42 +                (!hvm_translate_linear_addr(
    1.43 +                    x86_seg_cs, regs->eip, sizeof(sh_ctxt->insn_buf),
    1.44 +                    hvm_access_insn_fetch, sh_ctxt, &addr) &&
    1.45 +                 !hvm_copy_from_guest_virt(
    1.46 +                     sh_ctxt->insn_buf, addr, sizeof(sh_ctxt->insn_buf)))
    1.47 +                ? sizeof(sh_ctxt->insn_buf) : 0;
    1.48 +            sh_ctxt->insn_buf_eip = regs->eip;
    1.49 +        }
    1.50 +    }
    1.51 +}
    1.52 +
    1.53  /**************************************************************************/
    1.54  /* Code for "promoting" a guest page to the point where the shadow code is
    1.55   * willing to let it be treated as a guest page table.  This generally
     2.1 --- a/xen/arch/x86/mm/shadow/multi.c	Thu May 31 16:09:11 2007 +0100
     2.2 +++ b/xen/arch/x86/mm/shadow/multi.c	Fri Jun 01 14:32:11 2007 +0100
     2.3 @@ -2871,6 +2871,20 @@ static int sh_page_fault(struct vcpu *v,
     2.4      if ( !shadow_mode_refcounts(d) || !guest_mode(regs) )
     2.5          goto not_a_shadow_fault;
     2.6  
     2.7 +    /*
     2.8 +     * We do not emulate user writes. Instead we use them as a hint that the
     2.9 +     * page is no longer a page table. This behaviour differs from native, but
    2.10 +     * it seems very unlikely that any OS grants user access to page tables.
    2.11 +     */
    2.12 +    if ( (regs->error_code & PFEC_user_mode) )
    2.13 +    {
    2.14 +        SHADOW_PRINTK("user-mode fault to PT, unshadowing mfn %#lx\n", 
    2.15 +                      mfn_x(gmfn));
    2.16 +        perfc_incr(shadow_fault_emulate_failed);
    2.17 +        sh_remove_shadows(v, gmfn, 0 /* thorough */, 1 /* must succeed */);
    2.18 +        goto done;
    2.19 +    }
    2.20 +
    2.21      if ( is_hvm_domain(d) )
    2.22      {
    2.23          /*
    2.24 @@ -2897,14 +2911,7 @@ static int sh_page_fault(struct vcpu *v,
    2.25  
    2.26      emul_ops = shadow_init_emulation(&emul_ctxt, regs);
    2.27  
    2.28 -    /*
    2.29 -     * We do not emulate user writes. Instead we use them as a hint that the
    2.30 -     * page is no longer a page table. This behaviour differs from native, but
    2.31 -     * it seems very unlikely that any OS grants user access to page tables.
    2.32 -     */
    2.33 -    r = X86EMUL_UNHANDLEABLE;
    2.34 -    if ( !(regs->error_code & PFEC_user_mode) )
    2.35 -        r = x86_emulate(&emul_ctxt.ctxt, emul_ops);
    2.36 +    r = x86_emulate(&emul_ctxt.ctxt, emul_ops);
    2.37  
    2.38      /*
    2.39       * NB. We do not unshadow on X86EMUL_EXCEPTION. It's not clear that it
    2.40 @@ -2922,6 +2929,35 @@ static int sh_page_fault(struct vcpu *v,
    2.41          sh_remove_shadows(v, gmfn, 0 /* thorough */, 1 /* must succeed */);
    2.42      }
    2.43  
    2.44 +#if GUEST_PAGING_LEVELS == 3 /* PAE guest */
    2.45 +    if ( r == X86EMUL_OKAY ) {
    2.46 +        int i;
    2.47 +        /* Emulate up to four extra instructions in the hope of catching 
    2.48 +         * the "second half" of a 64-bit pagetable write. */
    2.49 +        for ( i = 0 ; i < 4 ; i++ )
    2.50 +        {
    2.51 +            shadow_continue_emulation(&emul_ctxt, regs);
    2.52 +            v->arch.paging.last_write_was_pt = 0;
    2.53 +            r = x86_emulate(&emul_ctxt.ctxt, emul_ops);
    2.54 +            if ( r == X86EMUL_OKAY )
    2.55 +            {
    2.56 +                if ( v->arch.paging.last_write_was_pt )
    2.57 +                {
    2.58 +                    perfc_incr(shadow_em_ex_pt);
    2.59 +                    break; /* Don't emulate past the other half of the write */
    2.60 +                }
    2.61 +                else 
    2.62 +                    perfc_incr(shadow_em_ex_non_pt);
    2.63 +            }
    2.64 +            else
    2.65 +            {
    2.66 +                perfc_incr(shadow_em_ex_fail);
    2.67 +                break; /* Don't emulate again if we failed! */
    2.68 +            }
    2.69 +        }
    2.70 +    }
    2.71 +#endif /* PAE guest */
    2.72 +
    2.73      /* Emulator has changed the user registers: write back */
    2.74      if ( is_hvm_domain(d) )
    2.75          hvm_load_cpu_guest_regs(v, regs);
    2.76 @@ -3878,6 +3914,11 @@ static inline void * emulate_map_dest(st
    2.77      gfn_t gfn;
    2.78      mfn_t mfn;
    2.79  
    2.80 +    /* We don't emulate user-mode writes to page tables */
    2.81 +    if ( ring_3(sh_ctxt->ctxt.regs) ) 
    2.82 +        return NULL;
    2.83 +
    2.84 +    /* Walk the guest pagetables */
    2.85      guest_walk_tables(v, vaddr, &gw, 1);
    2.86      flags = accumulate_guest_flags(v, &gw);
    2.87      gfn = guest_l1e_get_gfn(gw.eff_l1e);
    2.88 @@ -3885,27 +3926,24 @@ static inline void * emulate_map_dest(st
    2.89      sh_audit_gw(v, &gw);
    2.90      unmap_walk(v, &gw);
    2.91  
    2.92 -    if ( !(flags & _PAGE_PRESENT) )
    2.93 -    {
    2.94 -        errcode = 0;
    2.95 +    errcode = PFEC_write_access;
    2.96 +    if ( !(flags & _PAGE_PRESENT) ) 
    2.97          goto page_fault;
    2.98 -    }
    2.99 -
   2.100 -    if ( !(flags & _PAGE_RW) ||
   2.101 -         (!(flags & _PAGE_USER) && ring_3(sh_ctxt->ctxt.regs)) )
   2.102 +
   2.103 +    errcode |= PFEC_page_present;
   2.104 +    if ( !(flags & _PAGE_RW) ) 
   2.105 +        goto page_fault;
   2.106 +
   2.107 +    if ( mfn_valid(mfn) )
   2.108      {
   2.109 -        errcode = PFEC_page_present;
   2.110 -        goto page_fault;
   2.111 +        *mfnp = mfn;
   2.112 +        v->arch.paging.last_write_was_pt = !!sh_mfn_is_a_page_table(mfn);
   2.113 +        return sh_map_domain_page(mfn) + (vaddr & ~PAGE_MASK);
   2.114      }
   2.115 -
   2.116 -    if ( !mfn_valid(mfn) )
   2.117 +    else 
   2.118          return NULL;
   2.119  
   2.120 -    *mfnp = mfn;
   2.121 -    return sh_map_domain_page(mfn) + (vaddr & ~PAGE_MASK);
   2.122 -
   2.123   page_fault:
   2.124 -    errcode |= PFEC_write_access;
   2.125      if ( is_hvm_vcpu(v) )
   2.126          hvm_inject_exception(TRAP_page_fault, errcode, vaddr);
   2.127      else
     3.1 --- a/xen/arch/x86/mm/shadow/private.h	Thu May 31 16:09:11 2007 +0100
     3.2 +++ b/xen/arch/x86/mm/shadow/private.h	Fri Jun 01 14:32:11 2007 +0100
     3.3 @@ -634,9 +634,10 @@ static inline void sh_unpin(struct vcpu 
     3.4  struct sh_emulate_ctxt {
     3.5      struct x86_emulate_ctxt ctxt;
     3.6  
     3.7 -    /* [HVM] Cache of up to 15 bytes of instruction. */
     3.8 -    uint8_t insn_buf[15];
     3.9 +    /* [HVM] Cache of up to 31 bytes of instruction. */
    3.10 +    uint8_t insn_buf[31];
    3.11      uint8_t insn_buf_bytes;
    3.12 +    unsigned long insn_buf_eip;
    3.13  
    3.14      /* [HVM] Cache of segment registers already gathered for this emulation. */
    3.15      unsigned int valid_seg_regs;
    3.16 @@ -645,6 +646,8 @@ struct sh_emulate_ctxt {
    3.17  
    3.18  struct x86_emulate_ops *shadow_init_emulation(
    3.19      struct sh_emulate_ctxt *sh_ctxt, struct cpu_user_regs *regs);
    3.20 +void shadow_continue_emulation(
    3.21 +    struct sh_emulate_ctxt *sh_ctxt, struct cpu_user_regs *regs);
    3.22  
    3.23  #endif /* _XEN_SHADOW_PRIVATE_H */
    3.24  
     4.1 --- a/xen/include/asm-x86/domain.h	Thu May 31 16:09:11 2007 +0100
     4.2 +++ b/xen/include/asm-x86/domain.h	Fri Jun 01 14:32:11 2007 +0100
     4.3 @@ -171,6 +171,8 @@ struct paging_vcpu {
     4.4      struct paging_mode *mode;
     4.5      /* HVM guest: paging enabled (CR0.PG)?  */
     4.6      unsigned int translate_enabled:1;
     4.7 +    /* HVM guest: last emulate was to a pagetable */
     4.8 +    unsigned int last_write_was_pt:1;
     4.9  
    4.10      /* paging support extension */
    4.11      struct shadow_vcpu shadow;
     5.1 --- a/xen/include/asm-x86/perfc_defn.h	Thu May 31 16:09:11 2007 +0100
     5.2 +++ b/xen/include/asm-x86/perfc_defn.h	Fri Jun 01 14:32:11 2007 +0100
     5.3 @@ -90,5 +90,8 @@ PERFCOUNTER(shadow_guest_walk,     "shad
     5.4  PERFCOUNTER(shadow_invlpg,         "shadow emulates invlpg")
     5.5  PERFCOUNTER(shadow_invlpg_fault,   "shadow invlpg faults")
     5.6  
     5.7 +PERFCOUNTER(shadow_em_ex_pt,       "shadow extra pt write")
     5.8 +PERFCOUNTER(shadow_em_ex_non_pt,   "shadow extra non-pt-write op")
     5.9 +PERFCOUNTER(shadow_em_ex_fail,     "shadow extra emulation failed")
    5.10  
    5.11  /*#endif*/ /* __XEN_PERFC_DEFN_H__ */