ia64/xen-unstable

changeset 16313:db9f62d8f7f4

[SHADOW] Make the guest PT walker more complete.

We now check access rights and write back the _PAGE_ACCESSED and
_PAGE_DIRTY bits into the guest entries as we walk the tables.
This makes the shadow fault handler simpler, and the various emulation
paths more correct.

This patch doesn't add checking and write-back to the HAP pagetable walker;
it just fixes up its arguments to match the new shadow one.

Signed-off-by: Tim Deegan <Tim.Deegan@eu.citrix.com>
author Tim Deegan <Tim.Deegan@eu.citrix.com>
date Fri Nov 02 15:41:57 2007 +0000 (2007-11-02)
parents 46f91ed0f7d1
children 650cadd1b283
files xen/arch/x86/hvm/hvm.c xen/arch/x86/hvm/platform.c xen/arch/x86/hvm/svm/svm.c xen/arch/x86/hvm/vmx/vmx.c xen/arch/x86/mm/hap/guest_walk.c xen/arch/x86/mm/hap/hap.c xen/arch/x86/mm/hap/private.h xen/arch/x86/mm/p2m.c xen/arch/x86/mm/shadow/common.c xen/arch/x86/mm/shadow/multi.c xen/arch/x86/mm/shadow/private.h xen/arch/x86/mm/shadow/types.h xen/include/asm-x86/hvm/support.h xen/include/asm-x86/paging.h xen/include/asm-x86/perfc_defn.h
line diff
     1.1 --- a/xen/arch/x86/hvm/hvm.c	Fri Nov 02 10:37:59 2007 +0000
     1.2 +++ b/xen/arch/x86/hvm/hvm.c	Fri Nov 02 15:41:57 2007 +0000
     1.3 @@ -931,6 +931,7 @@ static void *hvm_map(unsigned long va, i
     1.4  {
     1.5      unsigned long gfn, mfn;
     1.6      p2m_type_t p2mt;
     1.7 +    uint32_t pfec;
     1.8  
     1.9      if ( ((va & ~PAGE_MASK) + size) > PAGE_SIZE )
    1.10      {
    1.11 @@ -939,11 +940,15 @@ static void *hvm_map(unsigned long va, i
    1.12          return NULL;
    1.13      }
    1.14  
    1.15 -    gfn = paging_gva_to_gfn(current, va);
    1.16 +    /* We're mapping on behalf of the segment-load logic, which might
    1.17 +     * write the accessed flags in the descriptors (in 32-bit mode), but
    1.18 +     * we still treat it as a kernel-mode read (i.e. no access checks). */
    1.19 +    pfec = PFEC_page_present;
    1.20 +    gfn = paging_gva_to_gfn(current, va, &pfec);
    1.21      mfn = mfn_x(gfn_to_mfn_current(gfn, &p2mt));
    1.22      if ( !p2m_is_ram(p2mt) )
    1.23      {
    1.24 -        hvm_inject_exception(TRAP_page_fault, PFEC_write_access, va);
    1.25 +        hvm_inject_exception(TRAP_page_fault, pfec, va);
    1.26          return NULL;
    1.27      }
    1.28  
    1.29 @@ -1263,14 +1268,24 @@ void hvm_task_switch(
    1.30   *  @size = number of bytes to copy
    1.31   *  @dir  = copy *to* guest (TRUE) or *from* guest (FALSE)?
    1.32   *  @virt = addr is *virtual* (TRUE) or *guest physical* (FALSE)?
    1.33 + *  @fetch = copy is an instruction fetch?
    1.34   * Returns number of bytes failed to copy (0 == complete success).
    1.35   */
    1.36 -static int __hvm_copy(void *buf, paddr_t addr, int size, int dir, int virt)
    1.37 +static int __hvm_copy(void *buf, paddr_t addr, int size, int dir, 
    1.38 +                      int virt, int fetch)
    1.39  {
    1.40      unsigned long gfn, mfn;
    1.41      p2m_type_t p2mt;
    1.42      char *p;
    1.43      int count, todo;
    1.44 +    uint32_t pfec = PFEC_page_present;
    1.45 +
    1.46 +    if ( dir ) 
    1.47 +        pfec |= PFEC_write_access;
    1.48 +    if ( ring_3(guest_cpu_user_regs()) )
    1.49 +        pfec |= PFEC_user_mode;
    1.50 +    if ( fetch ) 
    1.51 +        pfec |= PFEC_insn_fetch;
    1.52  
    1.53      todo = size;
    1.54      while ( todo > 0 )
    1.55 @@ -1278,7 +1293,7 @@ static int __hvm_copy(void *buf, paddr_t
    1.56          count = min_t(int, PAGE_SIZE - (addr & ~PAGE_MASK), todo);
    1.57  
    1.58          if ( virt )
    1.59 -            gfn = paging_gva_to_gfn(current, addr);
    1.60 +            gfn = paging_gva_to_gfn(current, addr, &pfec);
    1.61          else
    1.62              gfn = addr >> PAGE_SHIFT;
    1.63          
    1.64 @@ -1310,22 +1325,27 @@ static int __hvm_copy(void *buf, paddr_t
    1.65  
    1.66  int hvm_copy_to_guest_phys(paddr_t paddr, void *buf, int size)
    1.67  {
    1.68 -    return __hvm_copy(buf, paddr, size, 1, 0);
    1.69 +    return __hvm_copy(buf, paddr, size, 1, 0, 0);
    1.70  }
    1.71  
    1.72  int hvm_copy_from_guest_phys(void *buf, paddr_t paddr, int size)
    1.73  {
    1.74 -    return __hvm_copy(buf, paddr, size, 0, 0);
    1.75 +    return __hvm_copy(buf, paddr, size, 0, 0, 0);
    1.76  }
    1.77  
    1.78  int hvm_copy_to_guest_virt(unsigned long vaddr, void *buf, int size)
    1.79  {
    1.80 -    return __hvm_copy(buf, vaddr, size, 1, 1);
    1.81 +    return __hvm_copy(buf, vaddr, size, 1, 1, 0);
    1.82  }
    1.83  
    1.84  int hvm_copy_from_guest_virt(void *buf, unsigned long vaddr, int size)
    1.85  {
    1.86 -    return __hvm_copy(buf, vaddr, size, 0, 1);
    1.87 +    return __hvm_copy(buf, vaddr, size, 0, 1, 0);
    1.88 +}
    1.89 +
    1.90 +int hvm_fetch_from_guest_virt(void *buf, unsigned long vaddr, int size)
    1.91 +{
    1.92 +    return __hvm_copy(buf, vaddr, size, 0, 1, hvm_nx_enabled(current));
    1.93  }
    1.94  
    1.95  
     2.1 --- a/xen/arch/x86/hvm/platform.c	Fri Nov 02 10:37:59 2007 +0000
     2.2 +++ b/xen/arch/x86/hvm/platform.c	Fri Nov 02 15:41:57 2007 +0000
     2.3 @@ -833,7 +833,7 @@ int inst_copy_from_guest(unsigned char *
     2.4  {
     2.5      if ( inst_len > MAX_INST_LEN || inst_len <= 0 )
     2.6          return 0;
     2.7 -    if ( hvm_copy_from_guest_virt(buf, guest_eip, inst_len) )
     2.8 +    if ( hvm_fetch_from_guest_virt(buf, guest_eip, inst_len) )
     2.9          return 0;
    2.10      return inst_len;
    2.11  }
    2.12 @@ -1075,6 +1075,7 @@ void handle_mmio(unsigned long gpa)
    2.13          unsigned long addr, gfn; 
    2.14          paddr_t paddr;
    2.15          int dir, size = op_size;
    2.16 +        uint32_t pfec;
    2.17  
    2.18          ASSERT(count);
    2.19  
    2.20 @@ -1082,8 +1083,11 @@ void handle_mmio(unsigned long gpa)
    2.21          addr = regs->edi;
    2.22          if ( ad_size == WORD )
    2.23              addr &= 0xFFFF;
    2.24 -        addr += hvm_get_segment_base(v, x86_seg_es);
    2.25 -        gfn = paging_gva_to_gfn(v, addr);
    2.26 +        addr += hvm_get_segment_base(v, x86_seg_es);        
    2.27 +        pfec = PFEC_page_present | PFEC_write_access;
    2.28 +        if ( ring_3(regs) )
    2.29 +            pfec |= PFEC_user_mode;
    2.30 +        gfn = paging_gva_to_gfn(v, addr, &pfec);
    2.31          paddr = (paddr_t)gfn << PAGE_SHIFT | (addr & ~PAGE_MASK);
    2.32          if ( paddr == gpa )
    2.33          {
    2.34 @@ -1105,7 +1109,8 @@ void handle_mmio(unsigned long gpa)
    2.35              default: domain_crash_synchronous();
    2.36              }
    2.37              addr += hvm_get_segment_base(v, seg);
    2.38 -            gfn = paging_gva_to_gfn(v, addr);
    2.39 +            pfec &= ~PFEC_write_access;
    2.40 +            gfn = paging_gva_to_gfn(v, addr, &pfec);
    2.41              paddr = (paddr_t)gfn << PAGE_SHIFT | (addr & ~PAGE_MASK);
    2.42          }
    2.43          else
    2.44 @@ -1115,12 +1120,9 @@ void handle_mmio(unsigned long gpa)
    2.45          {
    2.46              /* The guest does not have the non-mmio address mapped. 
    2.47               * Need to send in a page fault */
    2.48 -            int errcode = 0;
    2.49 -            /* IO read --> memory write */
    2.50 -            if ( dir == IOREQ_READ ) errcode |= PFEC_write_access;
    2.51              regs->eip -= inst_len; /* do not advance %eip */
    2.52              regs->eflags |= X86_EFLAGS_RF; /* RF was set by original #PF */
    2.53 -            hvm_inject_exception(TRAP_page_fault, errcode, addr);
    2.54 +            hvm_inject_exception(TRAP_page_fault, pfec, addr);
    2.55              return;
    2.56          }
    2.57  
    2.58 @@ -1308,10 +1310,9 @@ void handle_mmio(unsigned long gpa)
    2.59  
    2.60  DEFINE_PER_CPU(int, guest_handles_in_xen_space);
    2.61  
    2.62 -/* Note that copy_{to,from}_user_hvm don't set the A and D bits on
    2.63 -   PTEs, and require the PTE to be writable even when they're only
    2.64 -   trying to read from it.  The guest is expected to deal with
    2.65 -   this. */
    2.66 +/* Note that copy_{to,from}_user_hvm require the PTE to be writable even
    2.67 +   when they're only trying to read from it.  The guest is expected to
    2.68 +   deal with this. */
    2.69  unsigned long copy_to_user_hvm(void *to, const void *from, unsigned len)
    2.70  {
    2.71      if ( this_cpu(guest_handles_in_xen_space) )
     3.1 --- a/xen/arch/x86/hvm/svm/svm.c	Fri Nov 02 10:37:59 2007 +0000
     3.2 +++ b/xen/arch/x86/hvm/svm/svm.c	Fri Nov 02 15:41:57 2007 +0000
     3.3 @@ -1441,6 +1441,7 @@ static void svm_io_instruction(struct vc
     3.4          unsigned long addr, count;
     3.5          paddr_t paddr;
     3.6          unsigned long gfn;
     3.7 +        uint32_t pfec;
     3.8          int sign = regs->eflags & X86_EFLAGS_DF ? -1 : 1;
     3.9  
    3.10          if (!svm_get_io_address(v, regs, size, info, &count, &addr))
    3.11 @@ -1459,15 +1460,17 @@ static void svm_io_instruction(struct vc
    3.12          }
    3.13  
    3.14          /* Translate the address to a physical address */
    3.15 -        gfn = paging_gva_to_gfn(v, addr);
    3.16 +        pfec = PFEC_page_present;
    3.17 +        if ( dir == IOREQ_READ ) /* Read from PIO --> write to RAM */
    3.18 +            pfec |= PFEC_write_access;
    3.19 +        if ( ring_3(regs) )
    3.20 +            pfec |= PFEC_user_mode;
    3.21 +        gfn = paging_gva_to_gfn(v, addr, &pfec);
    3.22          if ( gfn == INVALID_GFN ) 
    3.23          {
    3.24              /* The guest does not have the RAM address mapped. 
    3.25               * Need to send in a page fault */
    3.26 -            int errcode = 0;
    3.27 -            /* IO read --> memory write */
    3.28 -            if ( dir == IOREQ_READ ) errcode |= PFEC_write_access;
    3.29 -            svm_hvm_inject_exception(TRAP_page_fault, errcode, addr);
    3.30 +            svm_hvm_inject_exception(TRAP_page_fault, pfec, addr);
    3.31              return;
    3.32          }
    3.33          paddr = (paddr_t)gfn << PAGE_SHIFT | (addr & ~PAGE_MASK);
     4.1 --- a/xen/arch/x86/hvm/vmx/vmx.c	Fri Nov 02 10:37:59 2007 +0000
     4.2 +++ b/xen/arch/x86/hvm/vmx/vmx.c	Fri Nov 02 15:41:57 2007 +0000
     4.3 @@ -1642,7 +1642,7 @@ static void vmx_do_str_pio(unsigned long
     4.4      unsigned long addr, count = 1, base;
     4.5      paddr_t paddr;
     4.6      unsigned long gfn;
     4.7 -    u32 ar_bytes, limit;
     4.8 +    u32 ar_bytes, limit, pfec;
     4.9      int sign;
    4.10      int long_mode = 0;
    4.11  
    4.12 @@ -1714,15 +1714,17 @@ static void vmx_do_str_pio(unsigned long
    4.13  #endif
    4.14  
    4.15      /* Translate the address to a physical address */
    4.16 -    gfn = paging_gva_to_gfn(current, addr);
    4.17 +    pfec = PFEC_page_present;
    4.18 +    if ( dir == IOREQ_READ ) /* Read from PIO --> write to RAM */
    4.19 +        pfec |= PFEC_write_access;
    4.20 +    if ( ring_3(regs) )
    4.21 +        pfec |= PFEC_user_mode;
    4.22 +    gfn = paging_gva_to_gfn(current, addr, &pfec);
    4.23      if ( gfn == INVALID_GFN )
    4.24      {
    4.25          /* The guest does not have the RAM address mapped.
    4.26           * Need to send in a page fault */
    4.27 -        int errcode = 0;
    4.28 -        /* IO read --> memory write */
    4.29 -        if ( dir == IOREQ_READ ) errcode |= PFEC_write_access;
    4.30 -        vmx_inject_exception(TRAP_page_fault, errcode, addr);
    4.31 +        vmx_inject_exception(TRAP_page_fault, pfec, addr);
    4.32          return;
    4.33      }
    4.34      paddr = (paddr_t)gfn << PAGE_SHIFT | (addr & ~PAGE_MASK);
     5.1 --- a/xen/arch/x86/mm/hap/guest_walk.c	Fri Nov 02 10:37:59 2007 +0000
     5.2 +++ b/xen/arch/x86/mm/hap/guest_walk.c	Fri Nov 02 15:41:57 2007 +0000
     5.3 @@ -40,7 +40,7 @@
     5.4  #if GUEST_PAGING_LEVELS > CONFIG_PAGING_LEVELS
     5.5  
     5.6  unsigned long hap_gva_to_gfn(GUEST_PAGING_LEVELS)(
     5.7 -    struct vcpu *v, unsigned long gva)
     5.8 +    struct vcpu *v, unsigned long gva, uint32_t *pfec)
     5.9  {
    5.10      gdprintk(XENLOG_ERR,
    5.11               "Guest paging level is greater than host paging level!\n");
    5.12 @@ -61,7 +61,7 @@ unsigned long hap_gva_to_gfn(GUEST_PAGIN
    5.13  #endif
    5.14  
    5.15  unsigned long hap_gva_to_gfn(GUEST_PAGING_LEVELS)(
    5.16 -    struct vcpu *v, unsigned long gva)
    5.17 +    struct vcpu *v, unsigned long gva, uint32_t *pfec)
    5.18  {
    5.19      unsigned long gcr3 = v->arch.hvm_vcpu.guest_cr[3];
    5.20      int mode = GUEST_PAGING_LEVELS;
     6.1 --- a/xen/arch/x86/mm/hap/hap.c	Fri Nov 02 10:37:59 2007 +0000
     6.2 +++ b/xen/arch/x86/mm/hap/hap.c	Fri Nov 02 15:41:57 2007 +0000
     6.3 @@ -695,7 +695,7 @@ hap_write_p2m_entry(struct vcpu *v, unsi
     6.4  }
     6.5  
     6.6  static unsigned long hap_gva_to_gfn_real_mode(
     6.7 -    struct vcpu *v, unsigned long gva)
     6.8 +    struct vcpu *v, unsigned long gva, uint32_t *pfec)
     6.9  {
    6.10      return ((paddr_t)gva >> PAGE_SHIFT);
    6.11  }
     7.1 --- a/xen/arch/x86/mm/hap/private.h	Fri Nov 02 10:37:59 2007 +0000
     7.2 +++ b/xen/arch/x86/mm/hap/private.h	Fri Nov 02 15:41:57 2007 +0000
     7.3 @@ -26,9 +26,12 @@
     7.4  /********************************************/
     7.5  /*          GUEST TRANSLATION FUNCS         */
     7.6  /********************************************/
     7.7 -unsigned long hap_gva_to_gfn_2level(struct vcpu *v, unsigned long gva);
     7.8 -unsigned long hap_gva_to_gfn_3level(struct vcpu *v, unsigned long gva);
     7.9 -unsigned long hap_gva_to_gfn_4level(struct vcpu *v, unsigned long gva);
    7.10 +unsigned long hap_gva_to_gfn_2level(struct vcpu *v, unsigned long gva, 
    7.11 +                                    uint32_t *pfec);
    7.12 +unsigned long hap_gva_to_gfn_3level(struct vcpu *v, unsigned long gva,
    7.13 +                                    uint32_t *pfec);
    7.14 +unsigned long hap_gva_to_gfn_4level(struct vcpu *v, unsigned long gva,
    7.15 +                                    uint32_t *pfec);
    7.16  
    7.17  /********************************************/
    7.18  /*            MISC DEFINITIONS              */
     8.1 --- a/xen/arch/x86/mm/p2m.c	Fri Nov 02 10:37:59 2007 +0000
     8.2 +++ b/xen/arch/x86/mm/p2m.c	Fri Nov 02 15:41:57 2007 +0000
     8.3 @@ -31,7 +31,7 @@
     8.4  
     8.5  /* Debugging and auditing of the P2M code? */
     8.6  #define P2M_AUDIT     0
     8.7 -#define P2M_DEBUGGING 1
     8.8 +#define P2M_DEBUGGING 0
     8.9  
    8.10  /*
    8.11   * The P2M lock.  This protects all updates to the p2m table.
    8.12 @@ -290,11 +290,11 @@ int p2m_alloc_table(struct domain *d,
    8.13                      void (*free_page)(struct domain *d, struct page_info *pg))
    8.14  
    8.15  {
    8.16 -    mfn_t mfn;
    8.17 +    mfn_t mfn = _mfn(INVALID_MFN);
    8.18      struct list_head *entry;
    8.19      struct page_info *page, *p2m_top;
    8.20      unsigned int page_count = 0;
    8.21 -    unsigned long gfn;
    8.22 +    unsigned long gfn = -1UL;
    8.23  
    8.24      p2m_lock(d);
    8.25  
     9.1 --- a/xen/arch/x86/mm/shadow/common.c	Fri Nov 02 10:37:59 2007 +0000
     9.2 +++ b/xen/arch/x86/mm/shadow/common.c	Fri Nov 02 15:41:57 2007 +0000
     9.3 @@ -150,11 +150,13 @@ hvm_read(enum x86_segment seg,
     9.4          return rc;
     9.5  
     9.6      *val = 0;
     9.7 -    // XXX -- this is WRONG.
     9.8 -    //        It entirely ignores the permissions in the page tables.
     9.9 -    //        In this case, that is only a user vs supervisor access check.
    9.10 -    //
    9.11 -    if ( (rc = hvm_copy_from_guest_virt(val, addr, bytes)) == 0 )
    9.12 +
    9.13 +    if ( access_type == hvm_access_insn_fetch )
    9.14 +        rc = hvm_fetch_from_guest_virt(val, addr, bytes);
    9.15 +    else
    9.16 +        rc = hvm_copy_from_guest_virt(val, addr, bytes);
    9.17 +
    9.18 +    if ( rc == 0 ) 
    9.19          return X86EMUL_OKAY;
    9.20  
    9.21      /* If we got here, there was nothing mapped here, or a bad GFN 
    9.22 @@ -395,7 +397,7 @@ struct x86_emulate_ops *shadow_init_emul
    9.23          (!hvm_translate_linear_addr(
    9.24              x86_seg_cs, regs->eip, sizeof(sh_ctxt->insn_buf),
    9.25              hvm_access_insn_fetch, sh_ctxt, &addr) &&
    9.26 -         !hvm_copy_from_guest_virt(
    9.27 +         !hvm_fetch_from_guest_virt(
    9.28               sh_ctxt->insn_buf, addr, sizeof(sh_ctxt->insn_buf)))
    9.29          ? sizeof(sh_ctxt->insn_buf) : 0;
    9.30  
    9.31 @@ -423,7 +425,7 @@ void shadow_continue_emulation(struct sh
    9.32                  (!hvm_translate_linear_addr(
    9.33                      x86_seg_cs, regs->eip, sizeof(sh_ctxt->insn_buf),
    9.34                      hvm_access_insn_fetch, sh_ctxt, &addr) &&
    9.35 -                 !hvm_copy_from_guest_virt(
    9.36 +                 !hvm_fetch_from_guest_virt(
    9.37                       sh_ctxt->insn_buf, addr, sizeof(sh_ctxt->insn_buf)))
    9.38                  ? sizeof(sh_ctxt->insn_buf) : 0;
    9.39              sh_ctxt->insn_buf_eip = regs->eip;
    10.1 --- a/xen/arch/x86/mm/shadow/multi.c	Fri Nov 02 10:37:59 2007 +0000
    10.2 +++ b/xen/arch/x86/mm/shadow/multi.c	Fri Nov 02 15:41:57 2007 +0000
    10.3 @@ -189,7 +189,7 @@ guest_supports_nx(struct vcpu *v)
    10.4      if ( GUEST_PAGING_LEVELS == 2 || !cpu_has_nx )
    10.5          return 0;
    10.6      if ( !is_hvm_vcpu(v) )
    10.7 -        return 1;
    10.8 +        return cpu_has_nx;
    10.9      return hvm_nx_enabled(v);
   10.10  }
   10.11  
   10.12 @@ -197,22 +197,119 @@ guest_supports_nx(struct vcpu *v)
   10.13  /**************************************************************************/
   10.14  /* Functions for walking the guest page tables */
   10.15  
   10.16 -
   10.17 -/* Walk the guest pagetables, filling the walk_t with what we see. 
   10.18 - * Takes an uninitialised walk_t.  The caller must call unmap_walk() 
   10.19 - * on the walk_t before discarding it or calling guest_walk_tables again. 
   10.20 - * If "guest_op" is non-zero, we are serving a genuine guest memory access, 
   10.21 +/* Flags that are needed in a pagetable entry, with the sense of NX inverted */
   10.22 +static uint32_t mandatory_flags(struct vcpu *v, uint32_t pfec) 
   10.23 +{
   10.24 +    static uint32_t flags[] = {
   10.25 +        /* I/F -  Usr Wr */
   10.26 +        /* 0   0   0   0 */ _PAGE_PRESENT, 
   10.27 +        /* 0   0   0   1 */ _PAGE_PRESENT|_PAGE_RW,
   10.28 +        /* 0   0   1   0 */ _PAGE_PRESENT|_PAGE_USER,
   10.29 +        /* 0   0   1   1 */ _PAGE_PRESENT|_PAGE_RW|_PAGE_USER,
   10.30 +        /* 0   1   0   0 */ _PAGE_PRESENT, 
   10.31 +        /* 0   1   0   1 */ _PAGE_PRESENT|_PAGE_RW,
   10.32 +        /* 0   1   1   0 */ _PAGE_PRESENT|_PAGE_USER,
   10.33 +        /* 0   1   1   1 */ _PAGE_PRESENT|_PAGE_RW|_PAGE_USER,
   10.34 +        /* 1   0   0   0 */ _PAGE_PRESENT|_PAGE_NX_BIT, 
   10.35 +        /* 1   0   0   1 */ _PAGE_PRESENT|_PAGE_RW|_PAGE_NX_BIT,
   10.36 +        /* 1   0   1   0 */ _PAGE_PRESENT|_PAGE_USER|_PAGE_NX_BIT,
   10.37 +        /* 1   0   1   1 */ _PAGE_PRESENT|_PAGE_RW|_PAGE_USER|_PAGE_NX_BIT,
   10.38 +        /* 1   1   0   0 */ _PAGE_PRESENT|_PAGE_NX_BIT, 
   10.39 +        /* 1   1   0   1 */ _PAGE_PRESENT|_PAGE_RW|_PAGE_NX_BIT,
   10.40 +        /* 1   1   1   0 */ _PAGE_PRESENT|_PAGE_USER|_PAGE_NX_BIT,
   10.41 +        /* 1   1   1   1 */ _PAGE_PRESENT|_PAGE_RW|_PAGE_USER|_PAGE_NX_BIT,
   10.42 +    };
   10.43 +    uint32_t f = flags[(pfec & 0x1f) >> 1];
   10.44 +    /* Don't demand not-NX if the CPU wouldn't enforce it. */
   10.45 +    if ( !guest_supports_nx(v) )
   10.46 +        f &= ~_PAGE_NX_BIT;
   10.47 +    return f;
   10.48 +}
   10.49 +
   10.50 +/* Read, check and modify a guest pagetable entry.  Returns 0 if the
   10.51 + * flags are OK.  Although we use l1e types here, the logic and the bits
   10.52 + * are the same for all types except PAE l3es. */
   10.53 +static int guest_walk_entry(struct vcpu *v, mfn_t gmfn, 
   10.54 +                            void *gp, void *wp,
   10.55 +                            uint32_t flags, int level)
   10.56 +{
   10.57 +    guest_l1e_t e, old_e;
   10.58 +    uint32_t gflags;
   10.59 +    int rc;
   10.60 +
   10.61 +    /* Read the guest entry */
   10.62 +    e = *(guest_l1e_t *)gp;
   10.63 +
   10.64 +    /* Check that all the mandatory flag bits are there.  Invert NX, to
   10.65 +     * calculate as if there were an "X" bit that allowed access. */
   10.66 +    gflags = guest_l1e_get_flags(e) ^ _PAGE_NX_BIT;
   10.67 +    rc = ((gflags & flags) != flags);
   10.68 +    
   10.69 +    /* Set the accessed/dirty bits */
   10.70 +    if ( rc == 0 ) 
   10.71 +    {
   10.72 +        uint32_t bits = _PAGE_ACCESSED;
   10.73 +        if ( (flags & _PAGE_RW) // Implies that the action is a write
   10.74 +             && ((level == 1) || ((level == 2) && (gflags & _PAGE_PSE))) )
   10.75 +            bits |= _PAGE_DIRTY;
   10.76 +        old_e = e;
   10.77 +        e.l1 |= bits;
   10.78 +        SHADOW_PRINTK("flags %lx bits %lx old_e %llx e %llx\n",
   10.79 +                      (unsigned long) flags, 
   10.80 +                      (unsigned long) bits, 
   10.81 +                      (unsigned long long) old_e.l1, 
   10.82 +                      (unsigned long long) e.l1);
   10.83 +        /* Try to write the entry back.  If it's changed under out feet 
   10.84 +         * then leave it alone */
   10.85 +        if ( e.l1 != old_e.l1 )
   10.86 +        {
   10.87 +            (void) cmpxchg(((guest_intpte_t *)gp), old_e.l1, e.l1);
   10.88 +            paging_mark_dirty(v->domain, mfn_x(gmfn));
   10.89 +        }
   10.90 +    }
   10.91 +
   10.92 +    /* Record the entry in the walk */
   10.93 +    *(guest_l1e_t *)wp = e;
   10.94 +    return rc;
   10.95 +}
   10.96 +
   10.97 +/* Walk the guest pagetables, after the manner of a hardware walker. 
   10.98 + *
   10.99 + * Inputs: a vcpu, a virtual address, a walk_t to fill, a 
  10.100 + *         pointer to a pagefault code, and a flag "shadow_op".
  10.101 + * 
  10.102 + * We walk the vcpu's guest pagetables, filling the walk_t with what we
  10.103 + * see and adding any Accessed and Dirty bits that are needed in the
  10.104 + * guest entries.  Using the pagefault code, we check the permissions as
  10.105 + * we go.  For the purposes of reading pagetables we treat all non-RAM
  10.106 + * memory as contining zeroes.
  10.107 + * 
  10.108 + * If "shadow_op" is non-zero, we are serving a genuine guest memory access, 
  10.109   * and must (a) be under the shadow lock, and (b) remove write access
  10.110 - * from any gueat PT pages we see, as we will be using their contents to 
  10.111 - * perform shadow updates.
  10.112 - * Returns 0 for success or non-zero if the guest pagetables are malformed.
  10.113 - * N.B. Finding a not-present entry does not cause a non-zero return code. */
  10.114 -static inline int 
  10.115 -guest_walk_tables(struct vcpu *v, unsigned long va, walk_t *gw, int guest_op)
  10.116 + * from any guest PT pages we see, as we will be shadowing them soon
  10.117 + * and will rely on the contents' not having changed.
  10.118 + * 
  10.119 + * Returns 0 for success or non-zero if the walk did not complete.
  10.120 + * N.B. This is different from the old return code but almost no callers
  10.121 + * checked the old return code anyway.
  10.122 + */
  10.123 +static int 
  10.124 +guest_walk_tables(struct vcpu *v, unsigned long va, walk_t *gw, 
  10.125 +                  uint32_t pfec, int shadow_op)
  10.126  {
  10.127      struct domain *d = v->domain;
  10.128      p2m_type_t p2mt;
  10.129 -    ASSERT(!guest_op || shadow_locked_by_me(d));
  10.130 +    guest_l1e_t *l1p;
  10.131 +#if GUEST_PAGING_LEVELS >= 3 /* PAE or 64... */
  10.132 +    guest_l1e_t *l2p;
  10.133 +#if GUEST_PAGING_LEVELS >= 4 /* 64-bit only... */
  10.134 +    guest_l1e_t *l3p;
  10.135 +#endif    
  10.136 +#endif
  10.137 +    uint32_t flags = mandatory_flags(v, pfec);
  10.138 +    int rc;
  10.139 +
  10.140 +    ASSERT(!shadow_op || shadow_locked_by_me(d));
  10.141      
  10.142      perfc_incr(shadow_guest_walk);
  10.143      memset(gw, 0, sizeof(*gw));
  10.144 @@ -220,84 +317,104 @@ guest_walk_tables(struct vcpu *v, unsign
  10.145  
  10.146  #if GUEST_PAGING_LEVELS >= 3 /* PAE or 64... */
  10.147  #if GUEST_PAGING_LEVELS >= 4 /* 64-bit only... */
  10.148 -    /* Get l4e from the top level table */
  10.149 +    /* Get the l4e from the top level table and check its flags*/
  10.150      gw->l4mfn = pagetable_get_mfn(v->arch.guest_table);
  10.151 -    gw->l4e = (guest_l4e_t *)v->arch.paging.shadow.guest_vtable 
  10.152 -        + guest_l4_table_offset(va);
  10.153 -    /* Walk down to the l3e */
  10.154 -    if ( !(guest_l4e_get_flags(*gw->l4e) & _PAGE_PRESENT) ) return 0;
  10.155 -    gw->l3mfn = gfn_to_mfn(d, guest_l4e_get_gfn(*gw->l4e), &p2mt);
  10.156 +    rc = guest_walk_entry(v, gw->l4mfn,
  10.157 +                          (guest_l4e_t *)v->arch.paging.shadow.guest_vtable
  10.158 +                          + guest_l4_table_offset(va),
  10.159 +                          &gw->l4e, flags, 4);
  10.160 +    if ( rc != 0 ) return rc;
  10.161 +
  10.162 +    /* Map the l3 table */
  10.163 +    gw->l3mfn = gfn_to_mfn(d, guest_l4e_get_gfn(gw->l4e), &p2mt);
  10.164      if ( !p2m_is_ram(p2mt) ) return 1;
  10.165      ASSERT(mfn_valid(gw->l3mfn));
  10.166      /* This mfn is a pagetable: make sure the guest can't write to it. */
  10.167 -    if ( guest_op && sh_remove_write_access(v, gw->l3mfn, 3, va) != 0 )
  10.168 +    if ( shadow_op && sh_remove_write_access(v, gw->l3mfn, 3, va) != 0 )
  10.169          flush_tlb_mask(d->domain_dirty_cpumask); 
  10.170 -    gw->l3e = ((guest_l3e_t *)sh_map_domain_page(gw->l3mfn))
  10.171 -        + guest_l3_table_offset(va);
  10.172 +    /* Get the l3e and check its flags*/
  10.173 +    l3p = sh_map_domain_page(gw->l3mfn);
  10.174 +    rc = guest_walk_entry(v, gw->l3mfn, l3p + guest_l3_table_offset(va), 
  10.175 +                          &gw->l3e, flags, 3);
  10.176 +    sh_unmap_domain_page(l3p);
  10.177 +    if ( rc != 0 ) return rc;
  10.178 +
  10.179  #else /* PAE only... */
  10.180 -    /* Get l3e from the cache of the guest's top level table */
  10.181 -    gw->l3e = (guest_l3e_t *)&v->arch.paging.shadow.gl3e[guest_l3_table_offset(va)];
  10.182 +
  10.183 +    /* Get l3e from the cache of the top level table and check its flag */
  10.184 +    gw->l3e = v->arch.paging.shadow.gl3e[guest_l3_table_offset(va)];
  10.185 +    if ( !(guest_l3e_get_flags(gw->l3e) & _PAGE_PRESENT) ) return 1;
  10.186 +
  10.187  #endif /* PAE or 64... */
  10.188 -    /* Walk down to the l2e */
  10.189 -    if ( !(guest_l3e_get_flags(*gw->l3e) & _PAGE_PRESENT) ) return 0;
  10.190 -    gw->l2mfn = gfn_to_mfn(d, guest_l3e_get_gfn(*gw->l3e), &p2mt);
  10.191 +
  10.192 +    /* Map the l2 table */
  10.193 +    gw->l2mfn = gfn_to_mfn(d, guest_l3e_get_gfn(gw->l3e), &p2mt);
  10.194      if ( !p2m_is_ram(p2mt) ) return 1;
  10.195      ASSERT(mfn_valid(gw->l2mfn));
  10.196      /* This mfn is a pagetable: make sure the guest can't write to it. */
  10.197 -    if ( guest_op && sh_remove_write_access(v, gw->l2mfn, 2, va) != 0 )
  10.198 +    if ( shadow_op && sh_remove_write_access(v, gw->l2mfn, 2, va) != 0 )
  10.199          flush_tlb_mask(d->domain_dirty_cpumask); 
  10.200 -    gw->l2e = ((guest_l2e_t *)sh_map_domain_page(gw->l2mfn))
  10.201 -        + guest_l2_table_offset(va);
  10.202 +    /* Get the l2e */
  10.203 +    l2p = sh_map_domain_page(gw->l2mfn);
  10.204 +    rc = guest_walk_entry(v, gw->l2mfn, l2p + guest_l2_table_offset(va),
  10.205 +                          &gw->l2e, flags, 2);
  10.206 +    sh_unmap_domain_page(l2p);
  10.207 +    if ( rc != 0 ) return rc;
  10.208 +
  10.209  #else /* 32-bit only... */
  10.210 -    /* Get l2e from the top level table */
  10.211 +
  10.212 +    /* Get l2e from the top level table and check its flags */
  10.213      gw->l2mfn = pagetable_get_mfn(v->arch.guest_table);
  10.214 -    gw->l2e = (guest_l2e_t *)v->arch.paging.shadow.guest_vtable 
  10.215 -        + guest_l2_table_offset(va);
  10.216 +    rc = guest_walk_entry(v, gw->l2mfn, 
  10.217 +                          (guest_l2e_t *)v->arch.paging.shadow.guest_vtable
  10.218 +                          + guest_l2_table_offset(va),
  10.219 +                          &gw->l2e, flags, 2);
  10.220 +    if ( rc != 0 ) return rc;
  10.221 +
  10.222  #endif /* All levels... */
  10.223 -    
  10.224 -    if ( !(guest_l2e_get_flags(*gw->l2e) & _PAGE_PRESENT) ) return 0;
  10.225 +
  10.226      if ( guest_supports_superpages(v) &&
  10.227 -         (guest_l2e_get_flags(*gw->l2e) & _PAGE_PSE) ) 
  10.228 +         (guest_l2e_get_flags(gw->l2e) & _PAGE_PSE) ) 
  10.229      {
  10.230          /* Special case: this guest VA is in a PSE superpage, so there's
  10.231           * no guest l1e.  We make one up so that the propagation code
  10.232           * can generate a shadow l1 table.  Start with the gfn of the 
  10.233           * first 4k-page of the superpage. */
  10.234 -        gfn_t start = guest_l2e_get_gfn(*gw->l2e);
  10.235 +        gfn_t start = guest_l2e_get_gfn(gw->l2e);
  10.236          /* Grant full access in the l1e, since all the guest entry's 
  10.237 -         * access controls are enforced in the shadow l2e.  This lets 
  10.238 -         * us reflect l2 changes later without touching the l1s. */
  10.239 +         * access controls are enforced in the shadow l2e. */
  10.240          int flags = (_PAGE_PRESENT|_PAGE_USER|_PAGE_RW|
  10.241                       _PAGE_ACCESSED|_PAGE_DIRTY);
  10.242 -        /* propagate PWT PCD to level 1 for PSE */
  10.243 -        if ( (guest_l2e_get_flags(*gw->l2e) & _PAGE_PWT) )
  10.244 -            flags |= _PAGE_PWT;
  10.245 -        if ( (guest_l2e_get_flags(*gw->l2e) & _PAGE_PCD) )
  10.246 -            flags |= _PAGE_PCD;
  10.247          /* PSE level 2 entries use bit 12 for PAT; propagate it to bit 7
  10.248 -         * of the level 1 */
  10.249 -        if ( (guest_l2e_get_flags(*gw->l2e) & _PAGE_PSE_PAT) ) 
  10.250 -            flags |= _PAGE_PAT; 
  10.251 +         * of the level 1. */
  10.252 +        if ( (guest_l2e_get_flags(gw->l2e) & _PAGE_PSE_PAT) ) 
  10.253 +            flags |= _PAGE_PAT;
  10.254 +        /* Copy the cache-control bits to the l1 as well, because we
  10.255 +         * can't represent PAT in the (non-PSE) shadow l2e. :(
  10.256 +         * This could cause problems if a guest ever maps an area of
  10.257 +         * memory with superpages using more than one caching mode. */
  10.258 +        flags |= guest_l2e_get_flags(gw->l2e) & (_PAGE_PWT|_PAGE_PCD);
  10.259          /* Increment the pfn by the right number of 4k pages.  
  10.260           * The ~0x1 is to mask out the PAT bit mentioned above. */
  10.261          start = _gfn((gfn_x(start) & ~0x1) + guest_l1_table_offset(va));
  10.262 -        gw->eff_l1e = guest_l1e_from_gfn(start, flags);
  10.263 -        gw->l1e = NULL;
  10.264 +        gw->l1e = guest_l1e_from_gfn(start, flags);
  10.265          gw->l1mfn = _mfn(INVALID_MFN);
  10.266      } 
  10.267      else 
  10.268      {
  10.269          /* Not a superpage: carry on and find the l1e. */
  10.270 -        gw->l1mfn = gfn_to_mfn(d, guest_l2e_get_gfn(*gw->l2e), &p2mt);
  10.271 +        gw->l1mfn = gfn_to_mfn(d, guest_l2e_get_gfn(gw->l2e), &p2mt);
  10.272          if ( !p2m_is_ram(p2mt) ) return 1;
  10.273          ASSERT(mfn_valid(gw->l1mfn));
  10.274          /* This mfn is a pagetable: make sure the guest can't write to it. */
  10.275 -        if ( guest_op 
  10.276 +        if ( shadow_op 
  10.277               && sh_remove_write_access(v, gw->l1mfn, 1, va) != 0 )
  10.278              flush_tlb_mask(d->domain_dirty_cpumask); 
  10.279 -        gw->l1e = ((guest_l1e_t *)sh_map_domain_page(gw->l1mfn))
  10.280 -            + guest_l1_table_offset(va);
  10.281 -        gw->eff_l1e = *gw->l1e;
  10.282 +        l1p = sh_map_domain_page(gw->l1mfn);
  10.283 +        rc = guest_walk_entry(v, gw->l2mfn, l1p + guest_l1_table_offset(va),
  10.284 +                              &gw->l1e, flags, 1);
  10.285 +        sh_unmap_domain_page(l1p);
  10.286 +        if ( rc != 0 ) return rc;
  10.287      }
  10.288  
  10.289      return 0;
  10.290 @@ -308,9 +425,9 @@ guest_walk_tables(struct vcpu *v, unsign
  10.291  static inline gfn_t
  10.292  guest_walk_to_gfn(walk_t *gw)
  10.293  {
  10.294 -    if ( !(guest_l1e_get_flags(gw->eff_l1e) & _PAGE_PRESENT) )
  10.295 +    if ( !(guest_l1e_get_flags(gw->l1e) & _PAGE_PRESENT) )
  10.296          return _gfn(INVALID_GFN);
  10.297 -    return guest_l1e_get_gfn(gw->eff_l1e);
  10.298 +    return guest_l1e_get_gfn(gw->l1e);
  10.299  }
  10.300  
  10.301  /* Given a walk_t, translate the gw->va into the guest's notion of the
  10.302 @@ -318,29 +435,12 @@ guest_walk_to_gfn(walk_t *gw)
  10.303  static inline paddr_t
  10.304  guest_walk_to_gpa(walk_t *gw)
  10.305  {
  10.306 -    if ( !(guest_l1e_get_flags(gw->eff_l1e) & _PAGE_PRESENT) )
  10.307 +    if ( !(guest_l1e_get_flags(gw->l1e) & _PAGE_PRESENT) )
  10.308          return 0;
  10.309 -    return guest_l1e_get_paddr(gw->eff_l1e) + (gw->va & ~PAGE_MASK);
  10.310 +    return guest_l1e_get_paddr(gw->l1e) + (gw->va & ~PAGE_MASK);
  10.311  }
  10.312  
  10.313 -
  10.314 -/* Unmap (and reinitialise) a guest walk.  
  10.315 - * Call this to dispose of any walk filled in by guest_walk_tables() */
  10.316 -static void unmap_walk(struct vcpu *v, walk_t *gw)
  10.317 -{
  10.318 -#if GUEST_PAGING_LEVELS >= 3
  10.319 -#if GUEST_PAGING_LEVELS >= 4
  10.320 -    if ( gw->l3e != NULL ) sh_unmap_domain_page(gw->l3e);
  10.321 -#endif
  10.322 -    if ( gw->l2e != NULL ) sh_unmap_domain_page(gw->l2e);
  10.323 -#endif
  10.324 -    if ( gw->l1e != NULL ) sh_unmap_domain_page(gw->l1e);
  10.325 -#ifdef DEBUG
  10.326 -    memset(gw, 0, sizeof(*gw));
  10.327 -#endif
  10.328 -}
  10.329 -
  10.330 -
  10.331 +#if 0 /* Keep for debugging */
  10.332  /* Pretty-print the contents of a guest-walk */
  10.333  static inline void print_gw(walk_t *gw)
  10.334  {
  10.335 @@ -348,26 +448,17 @@ static inline void print_gw(walk_t *gw)
  10.336  #if GUEST_PAGING_LEVELS >= 3 /* PAE or 64... */
  10.337  #if GUEST_PAGING_LEVELS >= 4 /* 64-bit only... */
  10.338      SHADOW_PRINTK("   l4mfn=%" PRI_mfn "\n", mfn_x(gw->l4mfn));
  10.339 -    SHADOW_PRINTK("   l4e=%p\n", gw->l4e);
  10.340 -    if ( gw->l4e )
  10.341 -        SHADOW_PRINTK("   *l4e=%" SH_PRI_gpte "\n", gw->l4e->l4);
  10.342 +    SHADOW_PRINTK("   l4e=%" SH_PRI_gpte "\n", gw->l4e.l4);
  10.343      SHADOW_PRINTK("   l3mfn=%" PRI_mfn "\n", mfn_x(gw->l3mfn));
  10.344  #endif /* PAE or 64... */
  10.345 -    SHADOW_PRINTK("   l3e=%p\n", gw->l3e);
  10.346 -    if ( gw->l3e )
  10.347 -        SHADOW_PRINTK("   *l3e=%" SH_PRI_gpte "\n", gw->l3e->l3);
  10.348 +    SHADOW_PRINTK("   l3e=%" SH_PRI_gpte "\n", gw->l3e.l3);
  10.349  #endif /* All levels... */
  10.350      SHADOW_PRINTK("   l2mfn=%" PRI_mfn "\n", mfn_x(gw->l2mfn));
  10.351 -    SHADOW_PRINTK("   l2e=%p\n", gw->l2e);
  10.352 -    if ( gw->l2e )
  10.353 -        SHADOW_PRINTK("   *l2e=%" SH_PRI_gpte "\n", gw->l2e->l2);
  10.354 +    SHADOW_PRINTK("   l2e=%" SH_PRI_gpte "\n", gw->l2e.l2);
  10.355      SHADOW_PRINTK("   l1mfn=%" PRI_mfn "\n", mfn_x(gw->l1mfn));
  10.356 -    SHADOW_PRINTK("   l1e=%p\n", gw->l1e);
  10.357 -    if ( gw->l1e )
  10.358 -        SHADOW_PRINTK("   *l1e=%" SH_PRI_gpte "\n", gw->l1e->l1);
  10.359 -    SHADOW_PRINTK("   eff_l1e=%" SH_PRI_gpte "\n", gw->eff_l1e.l1);
  10.360 +    SHADOW_PRINTK("   l1e=%" SH_PRI_gpte "\n", gw->l1e.l1);
  10.361  }
  10.362 -
  10.363 +#endif /* 0 */
  10.364  
  10.365  #if SHADOW_AUDIT & SHADOW_AUDIT_ENTRIES
  10.366  /* Lightweight audit: pass all the shadows associated with this guest walk
  10.367 @@ -404,10 +495,10 @@ static void sh_audit_gw(struct vcpu *v, 
  10.368           && mfn_valid((smfn = get_shadow_status(v, gw->l1mfn, 
  10.369                                                  SH_type_l1_shadow))) )
  10.370          (void) sh_audit_l1_table(v, smfn, _mfn(INVALID_MFN));
  10.371 -    else if ( gw->l2e
  10.372 -              && (guest_l2e_get_flags(*gw->l2e) & _PAGE_PSE)
  10.373 +    else if ( (guest_l2e_get_flags(gw->l2e) & _PAGE_PRESENT)
  10.374 +              && (guest_l2e_get_flags(gw->l2e) & _PAGE_PSE)
  10.375                && mfn_valid( 
  10.376 -              (smfn = get_fl1_shadow_status(v, guest_l2e_get_gfn(*gw->l2e)))) )
  10.377 +              (smfn = get_fl1_shadow_status(v, guest_l2e_get_gfn(gw->l2e)))) )
  10.378          (void) sh_audit_fl1_table(v, smfn, _mfn(INVALID_MFN));
  10.379  }
  10.380  
  10.381 @@ -416,85 +507,6 @@ static void sh_audit_gw(struct vcpu *v, 
  10.382  #endif /* audit code */
  10.383  
  10.384  
  10.385 -
  10.386 -/**************************************************************************/
  10.387 -/* Function to write to the guest tables, for propagating accessed and 
  10.388 - * dirty bits from the shadow to the guest.
  10.389 - * Takes a guest mfn, a pointer to the guest entry, the level of pagetable,
  10.390 - * and an operation type.  The guest entry is always passed as an l1e: 
  10.391 - * since we only ever write flags, that's OK.
  10.392 - * Returns the new flag bits of the guest entry. */
  10.393 -
  10.394 -static u32 guest_set_ad_bits(struct vcpu *v,
  10.395 -                             mfn_t gmfn, 
  10.396 -                             guest_l1e_t *ep,
  10.397 -                             unsigned int level, 
  10.398 -                             fetch_type_t ft)
  10.399 -{
  10.400 -    u32 flags;
  10.401 -    int res = 0;
  10.402 -
  10.403 -    ASSERT(ep && !(((unsigned long)ep) & ((sizeof *ep) - 1)));
  10.404 -    ASSERT(level <= GUEST_PAGING_LEVELS);
  10.405 -    ASSERT(shadow_locked_by_me(v->domain));
  10.406 -
  10.407 -    flags = guest_l1e_get_flags(*ep);
  10.408 -
  10.409 -    /* Only set A and D bits for guest-initiated accesses */
  10.410 -    if ( !(ft & FETCH_TYPE_DEMAND) )
  10.411 -        return flags;
  10.412 -
  10.413 -    ASSERT(mfn_valid(gmfn)
  10.414 -           && (sh_mfn_is_a_page_table(gmfn)
  10.415 -               || ((mfn_to_page(gmfn)->u.inuse.type_info & PGT_count_mask) 
  10.416 -                   == 0)));
  10.417 -
  10.418 -    /* PAE l3s do not have A and D bits */
  10.419 -    ASSERT(GUEST_PAGING_LEVELS > 3 || level != 3);
  10.420 -
  10.421 -    /* Need the D bit as well for writes, in L1es and PSE L2es. */
  10.422 -    if ( ft == ft_demand_write  
  10.423 -         && (level == 1 ||
  10.424 -             (level == 2 && (flags & _PAGE_PSE) && guest_supports_superpages(v))) )
  10.425 -    {
  10.426 -        if ( (flags & (_PAGE_DIRTY | _PAGE_ACCESSED)) 
  10.427 -             == (_PAGE_DIRTY | _PAGE_ACCESSED) )
  10.428 -            return flags;  /* Guest already has A and D bits set */
  10.429 -        flags |= _PAGE_DIRTY | _PAGE_ACCESSED;
  10.430 -        perfc_incr(shadow_ad_update);
  10.431 -    }
  10.432 -    else 
  10.433 -    {
  10.434 -        if ( flags & _PAGE_ACCESSED )
  10.435 -            return flags;  /* Guest already has A bit set */
  10.436 -        flags |= _PAGE_ACCESSED;
  10.437 -        perfc_incr(shadow_a_update);
  10.438 -    }
  10.439 -
  10.440 -    /* Set the bit(s) */
  10.441 -    paging_mark_dirty(v->domain, mfn_x(gmfn));
  10.442 -    SHADOW_DEBUG(A_AND_D, "gfn = %" SH_PRI_gfn ", "
  10.443 -                 "old flags = %#x, new flags = %#x\n", 
  10.444 -                 gfn_x(guest_l1e_get_gfn(*ep)), guest_l1e_get_flags(*ep), 
  10.445 -                 flags);
  10.446 -    *ep = guest_l1e_from_gfn(guest_l1e_get_gfn(*ep), flags);
  10.447 -    
  10.448 -    /* Propagate this change to any other shadows of the page 
  10.449 -     * (only necessary if there is more than one shadow) */
  10.450 -    if ( mfn_to_page(gmfn)->count_info & PGC_page_table )
  10.451 -    {
  10.452 -        u32 shflags = mfn_to_page(gmfn)->shadow_flags & SHF_page_type_mask;
  10.453 -        /* More than one type bit set in shadow-flags? */
  10.454 -        if ( shflags & ~(1UL << find_first_set_bit(shflags)) )
  10.455 -            res = sh_validate_guest_entry(v, gmfn, ep, sizeof (*ep));
  10.456 -    }
  10.457 -
  10.458 -    /* We should never need to flush the TLB or recopy PAE entries */
  10.459 -    ASSERT((res == 0) || (res == SHADOW_SET_CHANGED));
  10.460 -
  10.461 -    return flags;
  10.462 -}
  10.463 -
  10.464  #if (CONFIG_PAGING_LEVELS == GUEST_PAGING_LEVELS) && (CONFIG_PAGING_LEVELS == SHADOW_PAGING_LEVELS)
  10.465  void *
  10.466  sh_guest_map_l1e(struct vcpu *v, unsigned long addr,
  10.467 @@ -509,11 +521,9 @@ sh_guest_map_l1e(struct vcpu *v, unsigne
  10.468      // FIXME!
  10.469  
  10.470      shadow_lock(v->domain);
  10.471 -    guest_walk_tables(v, addr, &gw, 1);
  10.472 -
  10.473 -    if ( gw.l2e &&
  10.474 -         (guest_l2e_get_flags(*gw.l2e) & _PAGE_PRESENT) &&
  10.475 -         !(guest_supports_superpages(v) && (guest_l2e_get_flags(*gw.l2e) & _PAGE_PSE)) )
  10.476 +    guest_walk_tables(v, addr, &gw, 0, 1);
  10.477 +
  10.478 +    if ( mfn_valid(gw.l1mfn) )
  10.479      {
  10.480          if ( gl1mfn )
  10.481              *gl1mfn = mfn_x(gw.l1mfn);
  10.482 @@ -521,7 +531,6 @@ sh_guest_map_l1e(struct vcpu *v, unsigne
  10.483              (guest_l1_table_offset(addr) * sizeof(guest_l1e_t));
  10.484      }
  10.485  
  10.486 -    unmap_walk(v, &gw);
  10.487      shadow_unlock(v->domain);
  10.488  
  10.489      return pl1e;
  10.490 @@ -538,9 +547,8 @@ sh_guest_get_eff_l1e(struct vcpu *v, uns
  10.491      // FIXME!
  10.492  
  10.493      shadow_lock(v->domain);
  10.494 -    guest_walk_tables(v, addr, &gw, 1);
  10.495 -    *(guest_l1e_t *)eff_l1e = gw.eff_l1e;
  10.496 -    unmap_walk(v, &gw);
  10.497 +    guest_walk_tables(v, addr, &gw, 0, 1);
  10.498 +    *(guest_l1e_t *)eff_l1e = gw.l1e;
  10.499      shadow_unlock(v->domain);
  10.500  }
  10.501  #endif /* CONFIG==SHADOW==GUEST */
  10.502 @@ -636,17 +644,17 @@ unsigned char pat_type_2_pte_flags(unsig
  10.503  
  10.504  static always_inline void
  10.505  _sh_propagate(struct vcpu *v, 
  10.506 -              void *guest_entry_ptr, 
  10.507 -              mfn_t guest_table_mfn, 
  10.508 +              guest_intpte_t guest_intpte,
  10.509                mfn_t target_mfn, 
  10.510                void *shadow_entry_ptr,
  10.511                int level,
  10.512                fetch_type_t ft, 
  10.513                p2m_type_t p2mt)
  10.514  {
  10.515 -    guest_l1e_t *gp = guest_entry_ptr;
  10.516 +    guest_l1e_t guest_entry = { guest_intpte };
  10.517      shadow_l1e_t *sp = shadow_entry_ptr;
  10.518      struct domain *d = v->domain;
  10.519 +    gfn_t target_gfn = guest_l1e_get_gfn(guest_entry);
  10.520      u32 pass_thru_flags;
  10.521      u32 gflags, sflags;
  10.522  
  10.523 @@ -660,15 +668,7 @@ static always_inline void
  10.524          goto done;
  10.525      }
  10.526  
  10.527 -    if ( mfn_valid(guest_table_mfn) )
  10.528 -        /* Handle A and D bit propagation into the guest */
  10.529 -        gflags = guest_set_ad_bits(v, guest_table_mfn, gp, level, ft);
  10.530 -    else 
  10.531 -    {
  10.532 -        /* Must be an fl1e or a prefetch */
  10.533 -        ASSERT(level==1 || !(ft & FETCH_TYPE_DEMAND));
  10.534 -        gflags = guest_l1e_get_flags(*gp);
  10.535 -    }
  10.536 +    gflags = guest_l1e_get_flags(guest_entry);
  10.537  
  10.538      if ( unlikely(!(gflags & _PAGE_PRESENT)) )
  10.539      {
  10.540 @@ -684,7 +684,7 @@ static always_inline void
  10.541      if ( level == 1 && p2mt == p2m_mmio_dm )
  10.542      {
  10.543          /* Guest l1e maps emulated MMIO space */
  10.544 -        *sp = sh_l1e_mmio(guest_l1e_get_gfn(*gp), gflags);
  10.545 +        *sp = sh_l1e_mmio(target_gfn, gflags);
  10.546          if ( !d->arch.paging.shadow.has_fast_mmio_entries )
  10.547              d->arch.paging.shadow.has_fast_mmio_entries = 1;
  10.548          goto done;
  10.549 @@ -694,9 +694,6 @@ static always_inline void
  10.550      // case of a prefetch, an invalid mfn means that we can not usefully
  10.551      // shadow anything, and so we return early.
  10.552      //
  10.553 -    /* N.B. For pass-through MMIO, either this test needs to be relaxed,
  10.554 -     * and shadow_set_l1e() trained to handle non-valid MFNs (ugh), or the
  10.555 -     * MMIO areas need to be added to the frame-table to make them "valid". */
  10.556      if ( shadow_mode_refcounts(d) && 
  10.557           !mfn_valid(target_mfn) && (p2mt != p2m_mmio_direct) )
  10.558      {
  10.559 @@ -718,20 +715,22 @@ static always_inline void
  10.560          pass_thru_flags |= _PAGE_PAT | _PAGE_PCD | _PAGE_PWT;
  10.561      sflags = gflags & pass_thru_flags;
  10.562  
  10.563 -    /* Only change memory caching type for pass-through domain */
  10.564 +    /*
  10.565 +     * For HVM domains with direct access to MMIO areas, set the correct
  10.566 +     * caching attributes in the shadows to match what was asked for
  10.567 +     */
  10.568      if ( (level == 1) && is_hvm_domain(d) &&
  10.569           !list_empty(&(domain_hvm_iommu(d)->pdev_list)) )
  10.570      {
  10.571          unsigned int type;
  10.572 -        if ( hvm_get_mem_pinned_cacheattr(d, gfn_x(guest_l1e_get_gfn(*gp)),
  10.573 -                                          &type) )
  10.574 +        if ( hvm_get_mem_pinned_cacheattr(d, gfn_x(target_gfn), &type) )
  10.575              sflags |= pat_type_2_pte_flags(type);
  10.576 -        else if ( v->domain->arch.hvm_domain.is_in_uc_mode )
  10.577 +        else if ( d->arch.hvm_domain.is_in_uc_mode )
  10.578              sflags |= pat_type_2_pte_flags(PAT_TYPE_UNCACHABLE);
  10.579          else
  10.580              sflags |= get_pat_flags(v,
  10.581                                      gflags,
  10.582 -                                    guest_l1e_get_paddr(*gp),
  10.583 +                                    gfn_to_paddr(target_gfn),
  10.584                                      mfn_x(target_mfn) << PAGE_SHIFT);
  10.585      }
  10.586  
  10.587 @@ -813,59 +812,55 @@ static always_inline void
  10.588   done:
  10.589      SHADOW_DEBUG(PROPAGATE,
  10.590                   "%s level %u guest %" SH_PRI_gpte " shadow %" SH_PRI_pte "\n",
  10.591 -                 fetch_type_names[ft], level, gp->l1, sp->l1);
  10.592 +                 fetch_type_names[ft], level, guest_entry.l1, sp->l1);
  10.593  }
  10.594  
  10.595  
  10.596 -/* These four wrappers give us a little bit of type-safety back around the 
  10.597 - * use of void-* pointers in _sh_propagate(), and allow the compiler to 
  10.598 - * optimize out some level checks. */
  10.599 +/* These four wrappers give us a little bit of type-safety back around
  10.600 + * the use of void-* pointers and intpte types in _sh_propagate(), and
  10.601 + * allow the compiler to optimize out some level checks. */
  10.602  
  10.603  #if GUEST_PAGING_LEVELS >= 4
  10.604  static void
  10.605  l4e_propagate_from_guest(struct vcpu *v, 
  10.606 -                         guest_l4e_t *gl4e,
  10.607 -                         mfn_t gl4mfn,
  10.608 +                         guest_l4e_t gl4e,
  10.609                           mfn_t sl3mfn,
  10.610                           shadow_l4e_t *sl4e,
  10.611                           fetch_type_t ft)
  10.612  {
  10.613 -    _sh_propagate(v, gl4e, gl4mfn, sl3mfn, sl4e, 4, ft, p2m_ram_rw);
  10.614 +    _sh_propagate(v, gl4e.l4, sl3mfn, sl4e, 4, ft, p2m_ram_rw);
  10.615  }
  10.616  
  10.617  static void
  10.618  l3e_propagate_from_guest(struct vcpu *v,
  10.619 -                         guest_l3e_t *gl3e,
  10.620 -                         mfn_t gl3mfn, 
  10.621 +                         guest_l3e_t gl3e,
  10.622                           mfn_t sl2mfn, 
  10.623                           shadow_l3e_t *sl3e,
  10.624                           fetch_type_t ft)
  10.625  {
  10.626 -    _sh_propagate(v, gl3e, gl3mfn, sl2mfn, sl3e, 3, ft, p2m_ram_rw);
  10.627 +    _sh_propagate(v, gl3e.l3, sl2mfn, sl3e, 3, ft, p2m_ram_rw);
  10.628  }
  10.629  #endif // GUEST_PAGING_LEVELS >= 4
  10.630  
  10.631  static void
  10.632  l2e_propagate_from_guest(struct vcpu *v, 
  10.633 -                         guest_l2e_t *gl2e,
  10.634 -                         mfn_t gl2mfn,
  10.635 +                         guest_l2e_t gl2e,
  10.636                           mfn_t sl1mfn,
  10.637                           shadow_l2e_t *sl2e,
  10.638                           fetch_type_t ft)
  10.639  {
  10.640 -    _sh_propagate(v, gl2e, gl2mfn, sl1mfn, sl2e, 2, ft, p2m_ram_rw);
  10.641 +    _sh_propagate(v, gl2e.l2, sl1mfn, sl2e, 2, ft, p2m_ram_rw);
  10.642  }
  10.643  
  10.644  static void
  10.645  l1e_propagate_from_guest(struct vcpu *v, 
  10.646 -                         guest_l1e_t *gl1e,
  10.647 -                         mfn_t gl1mfn,
  10.648 +                         guest_l1e_t gl1e,
  10.649                           mfn_t gmfn, 
  10.650                           shadow_l1e_t *sl1e,
  10.651                           fetch_type_t ft, 
  10.652                           p2m_type_t p2mt)
  10.653  {
  10.654 -    _sh_propagate(v, gl1e, gl1mfn, gmfn, sl1e, 1, ft, p2mt);
  10.655 +    _sh_propagate(v, gl1e.l1, gmfn, sl1e, 1, ft, p2mt);
  10.656  }
  10.657  
  10.658  
  10.659 @@ -1859,8 +1854,7 @@ static shadow_l3e_t * shadow_get_and_cre
  10.660              *sl3mfn = sh_make_shadow(v, gw->l3mfn, SH_type_l3_shadow);
  10.661          }
  10.662          /* Install the new sl3 table in the sl4e */
  10.663 -        l4e_propagate_from_guest(v, gw->l4e, gw->l4mfn, 
  10.664 -                                 *sl3mfn, &new_sl4e, ft);
  10.665 +        l4e_propagate_from_guest(v, gw->l4e, *sl3mfn, &new_sl4e, ft);
  10.666          r = shadow_set_l4e(v, sl4e, new_sl4e, sl4mfn);
  10.667          ASSERT((r & SHADOW_SET_FLUSH) == 0);
  10.668          if ( r & SHADOW_SET_ERROR )
  10.669 @@ -1909,8 +1903,7 @@ static shadow_l2e_t * shadow_get_and_cre
  10.670              *sl2mfn = sh_make_shadow(v, gw->l2mfn, t);
  10.671          }
  10.672          /* Install the new sl2 table in the sl3e */
  10.673 -        l3e_propagate_from_guest(v, gw->l3e, gw->l3mfn, 
  10.674 -                                 *sl2mfn, &new_sl3e, ft);
  10.675 +        l3e_propagate_from_guest(v, gw->l3e, *sl2mfn, &new_sl3e, ft);
  10.676          r = shadow_set_l3e(v, sl3e, new_sl3e, sl3mfn);
  10.677          ASSERT((r & SHADOW_SET_FLUSH) == 0);
  10.678          if ( r & SHADOW_SET_ERROR )
  10.679 @@ -1934,7 +1927,7 @@ static shadow_l2e_t * shadow_get_and_cre
  10.680      /* This next line is important: the guest l2 has a 16k
  10.681       * shadow, we need to return the right mfn of the four. This
  10.682       * call will set it for us as a side-effect. */
  10.683 -    (void) shadow_l2_index(sl2mfn, guest_index(gw->l2e));
  10.684 +    (void) shadow_l2_index(sl2mfn, guest_l2_table_offset(gw->va));
  10.685      /* Reading the top level table is always valid. */
  10.686      return sh_linear_l2_table(v) + shadow_l2_linear_offset(gw->va);
  10.687  #endif 
  10.688 @@ -1956,8 +1949,8 @@ static shadow_l1e_t * shadow_get_and_cre
  10.689       * re-do it to fix a PSE dirty bit. */
  10.690      if ( shadow_l2e_get_flags(*sl2e) & _PAGE_PRESENT 
  10.691           && likely(ft != ft_demand_write
  10.692 -                   || (guest_l2e_get_flags(*gw->l2e) & _PAGE_DIRTY) 
  10.693 -                   || !(guest_l2e_get_flags(*gw->l2e) & _PAGE_PSE)) )
  10.694 +                   || (shadow_l2e_get_flags(*sl2e) & _PAGE_RW) 
  10.695 +                   || !(guest_l2e_get_flags(gw->l2e) & _PAGE_PSE)) )
  10.696      {
  10.697          *sl1mfn = shadow_l2e_get_mfn(*sl2e);
  10.698          ASSERT(mfn_valid(*sl1mfn));
  10.699 @@ -1965,14 +1958,14 @@ static shadow_l1e_t * shadow_get_and_cre
  10.700      else 
  10.701      {
  10.702          shadow_l2e_t new_sl2e;
  10.703 -        int r, flags = guest_l2e_get_flags(*gw->l2e);
  10.704 +        int r, flags = guest_l2e_get_flags(gw->l2e);
  10.705          /* No l1 shadow installed: find and install it. */
  10.706          if ( !(flags & _PAGE_PRESENT) )
  10.707              return NULL; /* No guest page. */
  10.708          if ( guest_supports_superpages(v) && (flags & _PAGE_PSE) ) 
  10.709          {
  10.710              /* Splintering a superpage */
  10.711 -            gfn_t l2gfn = guest_l2e_get_gfn(*gw->l2e);
  10.712 +            gfn_t l2gfn = guest_l2e_get_gfn(gw->l2e);
  10.713              *sl1mfn = get_fl1_shadow_status(v, l2gfn);
  10.714              if ( !mfn_valid(*sl1mfn) ) 
  10.715              {
  10.716 @@ -1992,8 +1985,7 @@ static shadow_l1e_t * shadow_get_and_cre
  10.717              }
  10.718          }
  10.719          /* Install the new sl1 table in the sl2e */
  10.720 -        l2e_propagate_from_guest(v, gw->l2e, gw->l2mfn, 
  10.721 -                                 *sl1mfn, &new_sl2e, ft);
  10.722 +        l2e_propagate_from_guest(v, gw->l2e, *sl1mfn, &new_sl2e, ft);
  10.723          r = shadow_set_l2e(v, sl2e, new_sl2e, sl2mfn);
  10.724          ASSERT((r & SHADOW_SET_FLUSH) == 0);        
  10.725          if ( r & SHADOW_SET_ERROR )
  10.726 @@ -2247,7 +2239,7 @@ void sh_unhook_64b_mappings(struct vcpu 
  10.727  static int validate_gl4e(struct vcpu *v, void *new_ge, mfn_t sl4mfn, void *se)
  10.728  {
  10.729      shadow_l4e_t new_sl4e;
  10.730 -    guest_l4e_t *new_gl4e = new_ge;
  10.731 +    guest_l4e_t new_gl4e = *(guest_l4e_t *)new_ge;
  10.732      shadow_l4e_t *sl4p = se;
  10.733      mfn_t sl3mfn = _mfn(INVALID_MFN);
  10.734      struct domain *d = v->domain;
  10.735 @@ -2256,17 +2248,16 @@ static int validate_gl4e(struct vcpu *v,
  10.736  
  10.737      perfc_incr(shadow_validate_gl4e_calls);
  10.738  
  10.739 -    if ( guest_l4e_get_flags(*new_gl4e) & _PAGE_PRESENT )
  10.740 +    if ( guest_l4e_get_flags(new_gl4e) & _PAGE_PRESENT )
  10.741      {
  10.742 -        gfn_t gl3gfn = guest_l4e_get_gfn(*new_gl4e);
  10.743 +        gfn_t gl3gfn = guest_l4e_get_gfn(new_gl4e);
  10.744          mfn_t gl3mfn = gfn_to_mfn(d, gl3gfn, &p2mt);
  10.745          if ( p2m_is_ram(p2mt) )
  10.746              sl3mfn = get_shadow_status(v, gl3mfn, SH_type_l3_shadow);
  10.747          else
  10.748              result |= SHADOW_SET_ERROR;
  10.749      }
  10.750 -    l4e_propagate_from_guest(v, new_gl4e, _mfn(INVALID_MFN),
  10.751 -                             sl3mfn, &new_sl4e, ft_prefetch);
  10.752 +    l4e_propagate_from_guest(v, new_gl4e, sl3mfn, &new_sl4e, ft_prefetch);
  10.753  
  10.754      // check for updates to xen reserved slots
  10.755      if ( !shadow_mode_external(d) )
  10.756 @@ -2301,7 +2292,7 @@ static int validate_gl4e(struct vcpu *v,
  10.757  static int validate_gl3e(struct vcpu *v, void *new_ge, mfn_t sl3mfn, void *se)
  10.758  {
  10.759      shadow_l3e_t new_sl3e;
  10.760 -    guest_l3e_t *new_gl3e = new_ge;
  10.761 +    guest_l3e_t new_gl3e = *(guest_l3e_t *)new_ge;
  10.762      shadow_l3e_t *sl3p = se;
  10.763      mfn_t sl2mfn = _mfn(INVALID_MFN);
  10.764      p2m_type_t p2mt;
  10.765 @@ -2309,17 +2300,16 @@ static int validate_gl3e(struct vcpu *v,
  10.766  
  10.767      perfc_incr(shadow_validate_gl3e_calls);
  10.768  
  10.769 -    if ( guest_l3e_get_flags(*new_gl3e) & _PAGE_PRESENT )
  10.770 +    if ( guest_l3e_get_flags(new_gl3e) & _PAGE_PRESENT )
  10.771      {
  10.772 -        gfn_t gl2gfn = guest_l3e_get_gfn(*new_gl3e);
  10.773 +        gfn_t gl2gfn = guest_l3e_get_gfn(new_gl3e);
  10.774          mfn_t gl2mfn = gfn_to_mfn(v->domain, gl2gfn, &p2mt);
  10.775          if ( p2m_is_ram(p2mt) )
  10.776              sl2mfn = get_shadow_status(v, gl2mfn, SH_type_l2_shadow);
  10.777          else
  10.778              result |= SHADOW_SET_ERROR;
  10.779      }
  10.780 -    l3e_propagate_from_guest(v, new_gl3e, _mfn(INVALID_MFN), 
  10.781 -                             sl2mfn, &new_sl3e, ft_prefetch);
  10.782 +    l3e_propagate_from_guest(v, new_gl3e, sl2mfn, &new_sl3e, ft_prefetch);
  10.783      result |= shadow_set_l3e(v, sl3p, new_sl3e, sl3mfn);
  10.784  
  10.785      return result;
  10.786 @@ -2329,7 +2319,7 @@ static int validate_gl3e(struct vcpu *v,
  10.787  static int validate_gl2e(struct vcpu *v, void *new_ge, mfn_t sl2mfn, void *se)
  10.788  {
  10.789      shadow_l2e_t new_sl2e;
  10.790 -    guest_l2e_t *new_gl2e = new_ge;
  10.791 +    guest_l2e_t new_gl2e = *(guest_l2e_t *)new_ge;
  10.792      shadow_l2e_t *sl2p = se;
  10.793      mfn_t sl1mfn = _mfn(INVALID_MFN);
  10.794      p2m_type_t p2mt;
  10.795 @@ -2337,11 +2327,11 @@ static int validate_gl2e(struct vcpu *v,
  10.796  
  10.797      perfc_incr(shadow_validate_gl2e_calls);
  10.798  
  10.799 -    if ( guest_l2e_get_flags(*new_gl2e) & _PAGE_PRESENT )
  10.800 +    if ( guest_l2e_get_flags(new_gl2e) & _PAGE_PRESENT )
  10.801      {
  10.802 -        gfn_t gl1gfn = guest_l2e_get_gfn(*new_gl2e);
  10.803 +        gfn_t gl1gfn = guest_l2e_get_gfn(new_gl2e);
  10.804          if ( guest_supports_superpages(v) &&
  10.805 -             (guest_l2e_get_flags(*new_gl2e) & _PAGE_PSE) )
  10.806 +             (guest_l2e_get_flags(new_gl2e) & _PAGE_PSE) )
  10.807          {
  10.808              // superpage -- need to look up the shadow L1 which holds the
  10.809              // splitters...
  10.810 @@ -2364,8 +2354,7 @@ static int validate_gl2e(struct vcpu *v,
  10.811                  result |= SHADOW_SET_ERROR;
  10.812          }
  10.813      }
  10.814 -    l2e_propagate_from_guest(v, new_gl2e, _mfn(INVALID_MFN),
  10.815 -                             sl1mfn, &new_sl2e, ft_prefetch);
  10.816 +    l2e_propagate_from_guest(v, new_gl2e, sl1mfn, &new_sl2e, ft_prefetch);
  10.817  
  10.818      // check for updates to xen reserved slots in PV guests...
  10.819      // XXX -- need to revisit this for PV 3-on-4 guests.
  10.820 @@ -2415,7 +2404,7 @@ static int validate_gl2e(struct vcpu *v,
  10.821  static int validate_gl1e(struct vcpu *v, void *new_ge, mfn_t sl1mfn, void *se)
  10.822  {
  10.823      shadow_l1e_t new_sl1e;
  10.824 -    guest_l1e_t *new_gl1e = new_ge;
  10.825 +    guest_l1e_t new_gl1e = *(guest_l1e_t *)new_ge;
  10.826      shadow_l1e_t *sl1p = se;
  10.827      gfn_t gfn;
  10.828      mfn_t gmfn;
  10.829 @@ -2424,11 +2413,10 @@ static int validate_gl1e(struct vcpu *v,
  10.830  
  10.831      perfc_incr(shadow_validate_gl1e_calls);
  10.832  
  10.833 -    gfn = guest_l1e_get_gfn(*new_gl1e);
  10.834 +    gfn = guest_l1e_get_gfn(new_gl1e);
  10.835      gmfn = gfn_to_mfn(v->domain, gfn, &p2mt);
  10.836  
  10.837 -    l1e_propagate_from_guest(v, new_gl1e, _mfn(INVALID_MFN), gmfn, &new_sl1e, 
  10.838 -                             ft_prefetch, p2mt);
  10.839 +    l1e_propagate_from_guest(v, new_gl1e, gmfn, &new_sl1e, ft_prefetch, p2mt);
  10.840      
  10.841      result |= shadow_set_l1e(v, sl1p, new_sl1e, sl1mfn);
  10.842      return result;
  10.843 @@ -2615,7 +2603,7 @@ static void sh_prefetch(struct vcpu *v, 
  10.844      int i, dist;
  10.845      gfn_t gfn;
  10.846      mfn_t gmfn;
  10.847 -    guest_l1e_t gl1e;
  10.848 +    guest_l1e_t *gl1p = NULL, gl1e;
  10.849      shadow_l1e_t sl1e;
  10.850      u32 gflags;
  10.851      p2m_type_t p2mt;
  10.852 @@ -2626,16 +2614,23 @@ static void sh_prefetch(struct vcpu *v, 
  10.853      if ( dist > PREFETCH_DISTANCE )
  10.854          dist = PREFETCH_DISTANCE;
  10.855  
  10.856 +    if ( mfn_valid(gw->l1mfn) )
  10.857 +    {
  10.858 +        /* Normal guest page; grab the next guest entry */
  10.859 +        gl1p = sh_map_domain_page(gw->l1mfn);
  10.860 +        gl1p += guest_l1_table_offset(gw->va);
  10.861 +    }
  10.862 +
  10.863      for ( i = 1; i < dist ; i++ ) 
  10.864      {
  10.865          /* No point in prefetching if there's already a shadow */
  10.866          if ( ptr_sl1e[i].l1 != 0 )
  10.867              break;
  10.868  
  10.869 -        if ( gw->l1e )
  10.870 +        if ( mfn_valid(gw->l1mfn) )
  10.871          {
  10.872              /* Normal guest page; grab the next guest entry */
  10.873 -            gl1e = gw->l1e[i];
  10.874 +            gl1e = gl1p[i];
  10.875              /* Not worth continuing if we hit an entry that will need another
  10.876               * fault for A/D-bit propagation anyway */
  10.877              gflags = guest_l1e_get_flags(gl1e);
  10.878 @@ -2647,24 +2642,23 @@ static void sh_prefetch(struct vcpu *v, 
  10.879          else 
  10.880          {
  10.881              /* Fragmented superpage, unless we've been called wrongly */
  10.882 -            ASSERT(guest_l2e_get_flags(*gw->l2e) & _PAGE_PSE);
  10.883 +            ASSERT(guest_l2e_get_flags(gw->l2e) & _PAGE_PSE);
  10.884              /* Increment the l1e's GFN by the right number of guest pages */
  10.885              gl1e = guest_l1e_from_gfn(
  10.886 -                _gfn(gfn_x(guest_l1e_get_gfn(gw->eff_l1e)) + i), 
  10.887 -                guest_l1e_get_flags(gw->eff_l1e));
  10.888 +                _gfn(gfn_x(guest_l1e_get_gfn(gw->l1e)) + i), 
  10.889 +                guest_l1e_get_flags(gw->l1e));
  10.890          }
  10.891  
  10.892          /* Look at the gfn that the l1e is pointing at */
  10.893          gfn = guest_l1e_get_gfn(gl1e);
  10.894          gmfn = gfn_to_mfn(v->domain, gfn, &p2mt);
  10.895  
  10.896 -        /* Propagate the entry.  Safe to use a pointer to our local 
  10.897 -         * gl1e, since this is not a demand-fetch so there will be no 
  10.898 -         * write-back to the guest. */
  10.899 -        l1e_propagate_from_guest(v, &gl1e, _mfn(INVALID_MFN),
  10.900 -                                 gmfn, &sl1e, ft_prefetch, p2mt);
  10.901 +        /* Propagate the entry.  */
  10.902 +        l1e_propagate_from_guest(v, gl1e, gmfn, &sl1e, ft_prefetch, p2mt);
  10.903          (void) shadow_set_l1e(v, ptr_sl1e + i, sl1e, sl1mfn);
  10.904      }
  10.905 +    if ( gl1p != NULL )
  10.906 +        sh_unmap_domain_page(gl1p);
  10.907  }
  10.908  
  10.909  #endif /* SHADOW_OPTIMIZATIONS & SHOPT_PREFETCH */
  10.910 @@ -2684,7 +2678,6 @@ static int sh_page_fault(struct vcpu *v,
  10.911  {
  10.912      struct domain *d = v->domain;
  10.913      walk_t gw;
  10.914 -    u32 accumulated_gflags;
  10.915      gfn_t gfn;
  10.916      mfn_t gmfn, sl1mfn=_mfn(0);
  10.917      shadow_l1e_t sl1e, *ptr_sl1e;
  10.918 @@ -2769,10 +2762,10 @@ static int sh_page_fault(struct vcpu *v,
  10.919      
  10.920      shadow_audit_tables(v);
  10.921                     
  10.922 -    if ( guest_walk_tables(v, va, &gw, 1) != 0 )
  10.923 +    if ( guest_walk_tables(v, va, &gw, regs->error_code, 1) != 0 )
  10.924      {
  10.925 -        SHADOW_PRINTK("malformed guest pagetable\n");
  10.926 -        print_gw(&gw);
  10.927 +        perfc_incr(shadow_fault_bail_real_fault);
  10.928 +        goto not_a_shadow_fault;
  10.929      }
  10.930  
  10.931      /* It's possible that the guest has put pagetables in memory that it has 
  10.932 @@ -2788,64 +2781,12 @@ static int sh_page_fault(struct vcpu *v,
  10.933  
  10.934      sh_audit_gw(v, &gw);
  10.935  
  10.936 -    // We do not look at the gw->l1e, as that will not exist for superpages.
  10.937 -    // Instead, we use the gw->eff_l1e...
  10.938 -    //
  10.939 -    // We need not check all the levels of the guest page table entries for
  10.940 -    // present vs not-present, as the eff_l1e will always be not present if
  10.941 -    // one of the higher level entries is not present.
  10.942 -    //
  10.943 -    if ( unlikely(!(guest_l1e_get_flags(gw.eff_l1e) & _PAGE_PRESENT)) )
  10.944 -    {
  10.945 -        perfc_incr(shadow_fault_bail_not_present);
  10.946 -        goto not_a_shadow_fault;
  10.947 -    }
  10.948 -
  10.949 -    // All levels of the guest page table are now known to be present.
  10.950 -    accumulated_gflags = accumulate_guest_flags(v, &gw);
  10.951 -
  10.952 -    // Check for attempts to access supervisor-only pages from user mode,
  10.953 -    // i.e. ring 3.  Such errors are not caused or dealt with by the shadow
  10.954 -    // code.
  10.955 -    //
  10.956 -    if ( (regs->error_code & PFEC_user_mode) &&
  10.957 -         !(accumulated_gflags & _PAGE_USER) )
  10.958 -    {
  10.959 -        /* illegal user-mode access to supervisor-only page */
  10.960 -        perfc_incr(shadow_fault_bail_user_supervisor);
  10.961 -        goto not_a_shadow_fault;
  10.962 -    }
  10.963 -
  10.964 -    // Was it a write fault?
  10.965 +    /* What kind of access are we dealing with? */
  10.966      ft = ((regs->error_code & PFEC_write_access)
  10.967            ? ft_demand_write : ft_demand_read);
  10.968 -    if ( ft == ft_demand_write )
  10.969 -    {
  10.970 -        if ( unlikely(!(accumulated_gflags & _PAGE_RW)) )
  10.971 -        {
  10.972 -            perfc_incr(shadow_fault_bail_ro_mapping);
  10.973 -            goto not_a_shadow_fault;
  10.974 -        }
  10.975 -    }
  10.976 -    else // must have been either an insn fetch or read fault
  10.977 -    {
  10.978 -        // Check for NX bit violations: attempts to execute code that is
  10.979 -        // marked "do not execute".  Such errors are not caused or dealt with
  10.980 -        // by the shadow code.
  10.981 -        //
  10.982 -        if ( regs->error_code & PFEC_insn_fetch )
  10.983 -        {
  10.984 -            if ( accumulated_gflags & _PAGE_NX_BIT )
  10.985 -            {
  10.986 -                /* NX prevented this code fetch */
  10.987 -                perfc_incr(shadow_fault_bail_nx);
  10.988 -                goto not_a_shadow_fault;
  10.989 -            }
  10.990 -        }
  10.991 -    }
  10.992  
  10.993      /* What mfn is the guest trying to access? */
  10.994 -    gfn = guest_l1e_get_gfn(gw.eff_l1e);
  10.995 +    gfn = guest_l1e_get_gfn(gw.l1e);
  10.996      gmfn = gfn_to_mfn(d, gfn, &p2mt);
  10.997  
  10.998      if ( shadow_mode_refcounts(d) && 
  10.999 @@ -2876,14 +2817,12 @@ static int sh_page_fault(struct vcpu *v,
 10.1000           * shadow_set_l*e(), which will have crashed the guest.
 10.1001           * Get out of the fault handler immediately. */
 10.1002          ASSERT(d->is_shutting_down);
 10.1003 -        unmap_walk(v, &gw);
 10.1004          shadow_unlock(d);
 10.1005          return 0;
 10.1006      }
 10.1007  
 10.1008      /* Calculate the shadow entry and write it */
 10.1009 -    l1e_propagate_from_guest(v, (gw.l1e) ? gw.l1e : &gw.eff_l1e, gw.l1mfn, 
 10.1010 -                             gmfn, &sl1e, ft, p2mt);
 10.1011 +    l1e_propagate_from_guest(v, gw.l1e, gmfn, &sl1e, ft, p2mt);
 10.1012      r = shadow_set_l1e(v, ptr_sl1e, sl1e, sl1mfn);
 10.1013  
 10.1014  #if SHADOW_OPTIMIZATIONS & SHOPT_PREFETCH
 10.1015 @@ -2921,7 +2860,6 @@ static int sh_page_fault(struct vcpu *v,
 10.1016  
 10.1017   done:
 10.1018      sh_audit_gw(v, &gw);
 10.1019 -    unmap_walk(v, &gw);
 10.1020      SHADOW_PRINTK("fixed\n");
 10.1021      shadow_audit_tables(v);
 10.1022      shadow_unlock(d);
 10.1023 @@ -2972,7 +2910,6 @@ static int sh_page_fault(struct vcpu *v,
 10.1024       * take it again when we write to the pagetables.
 10.1025       */
 10.1026      sh_audit_gw(v, &gw);
 10.1027 -    unmap_walk(v, &gw);
 10.1028      shadow_audit_tables(v);
 10.1029      shadow_unlock(d);
 10.1030  
 10.1031 @@ -3033,7 +2970,6 @@ static int sh_page_fault(struct vcpu *v,
 10.1032          goto not_a_shadow_fault;
 10.1033      perfc_incr(shadow_fault_mmio);
 10.1034      sh_audit_gw(v, &gw);
 10.1035 -    unmap_walk(v, &gw);
 10.1036      SHADOW_PRINTK("mmio %#"PRIpaddr"\n", gpa);
 10.1037      shadow_audit_tables(v);
 10.1038      reset_early_unshadow(v);
 10.1039 @@ -3043,7 +2979,6 @@ static int sh_page_fault(struct vcpu *v,
 10.1040  
 10.1041   not_a_shadow_fault:
 10.1042      sh_audit_gw(v, &gw);
 10.1043 -    unmap_walk(v, &gw);
 10.1044      SHADOW_PRINTK("not a shadow fault\n");
 10.1045      shadow_audit_tables(v);
 10.1046      reset_early_unshadow(v);
 10.1047 @@ -3129,30 +3064,36 @@ sh_invlpg(struct vcpu *v, unsigned long 
 10.1048  
 10.1049  
 10.1050  static unsigned long
 10.1051 -sh_gva_to_gfn(struct vcpu *v, unsigned long va)
 10.1052 +sh_gva_to_gfn(struct vcpu *v, unsigned long va, uint32_t *pfec)
 10.1053  /* Called to translate a guest virtual address to what the *guest*
 10.1054   * pagetables would map it to. */
 10.1055  {
 10.1056      walk_t gw;
 10.1057      gfn_t gfn;
 10.1058 -    
 10.1059 +
 10.1060  #if (SHADOW_OPTIMIZATIONS & SHOPT_VIRTUAL_TLB)
 10.1061      struct shadow_vtlb t = {0};
 10.1062 -    if ( vtlb_lookup(v, va, &t) )
 10.1063 +    /* Check the vTLB cache first */
 10.1064 +    if ( vtlb_lookup(v, va, pfec[0], &t) ) 
 10.1065          return t.frame_number;
 10.1066  #endif /* (SHADOW_OPTIMIZATIONS & SHOPT_VIRTUAL_TLB) */
 10.1067  
 10.1068 -    guest_walk_tables(v, va, &gw, 0);
 10.1069 +    if ( guest_walk_tables(v, va, &gw, pfec[0], 0) != 0 )
 10.1070 +    {
 10.1071 +        if ( !(guest_l1e_get_flags(gw.l1e) & _PAGE_PRESENT) )
 10.1072 +            pfec[0] &= ~PFEC_page_present;
 10.1073 +        return INVALID_GFN;
 10.1074 +    }
 10.1075      gfn = guest_walk_to_gfn(&gw);
 10.1076  
 10.1077  #if (SHADOW_OPTIMIZATIONS & SHOPT_VIRTUAL_TLB)
 10.1078      t.page_number = va >> PAGE_SHIFT;
 10.1079      t.frame_number = gfn_x(gfn);
 10.1080      t.flags = accumulate_guest_flags(v, &gw); 
 10.1081 +    t.pfec = pfec[0];
 10.1082      vtlb_insert(v, t);
 10.1083  #endif /* (SHADOW_OPTIMIZATIONS & SHOPT_VIRTUAL_TLB) */
 10.1084  
 10.1085 -    unmap_walk(v, &gw);
 10.1086      return gfn_x(gfn);
 10.1087  }
 10.1088  
 10.1089 @@ -4006,9 +3947,8 @@ static inline void * emulate_map_dest(st
 10.1090                                        struct sh_emulate_ctxt *sh_ctxt,
 10.1091                                        mfn_t *mfnp)
 10.1092  {
 10.1093 -    walk_t gw;
 10.1094 -    u32 flags, errcode;
 10.1095 -    gfn_t gfn;
 10.1096 +    uint32_t pfec;
 10.1097 +    unsigned long gfn;
 10.1098      mfn_t mfn;
 10.1099      p2m_type_t p2mt;
 10.1100  
 10.1101 @@ -4016,50 +3956,20 @@ static inline void * emulate_map_dest(st
 10.1102      if ( ring_3(sh_ctxt->ctxt.regs) ) 
 10.1103          return NULL;
 10.1104  
 10.1105 -#if (SHADOW_OPTIMIZATIONS & SHOPT_VIRTUAL_TLB)
 10.1106 -    /* Try the virtual TLB first */
 10.1107 +    /* Translate the VA, and exit with a page-fault if we fail */
 10.1108 +    pfec = PFEC_page_present | PFEC_write_access;
 10.1109 +    gfn = sh_gva_to_gfn(v, vaddr, &pfec);
 10.1110 +    if ( gfn == INVALID_GFN ) 
 10.1111      {
 10.1112 -        struct shadow_vtlb t = {0};
 10.1113 -        if ( vtlb_lookup(v, vaddr, &t) 
 10.1114 -             && ((t.flags & (_PAGE_PRESENT|_PAGE_RW)) 
 10.1115 -                 == (_PAGE_PRESENT|_PAGE_RW)) )
 10.1116 -        {
 10.1117 -            flags = t.flags;
 10.1118 -            gfn = _gfn(t.frame_number);
 10.1119 -        }
 10.1120 +        if ( is_hvm_vcpu(v) )
 10.1121 +            hvm_inject_exception(TRAP_page_fault, pfec, vaddr);
 10.1122          else
 10.1123 -        {
 10.1124 -            /* Need to do the full lookup, just in case permissions
 10.1125 -             * have increased since we cached this entry */
 10.1126 -            
 10.1127 -#endif /* (SHADOW_OPTIMIZATIONS & SHOPT_VIRTUAL_TLB) */
 10.1128 -
 10.1129 -            /* Walk the guest pagetables */
 10.1130 -            guest_walk_tables(v, vaddr, &gw, 1);
 10.1131 -            flags = accumulate_guest_flags(v, &gw);
 10.1132 -            gfn = guest_l1e_get_gfn(gw.eff_l1e);
 10.1133 -            sh_audit_gw(v, &gw);
 10.1134 -            unmap_walk(v, &gw);
 10.1135 -            
 10.1136 -#if (SHADOW_OPTIMIZATIONS & SHOPT_VIRTUAL_TLB)
 10.1137 -            /* Remember this translation for next time */
 10.1138 -            t.page_number = vaddr >> PAGE_SHIFT;
 10.1139 -            t.frame_number = gfn_x(gfn);
 10.1140 -            t.flags = flags;
 10.1141 -            vtlb_insert(v, t);
 10.1142 -        }
 10.1143 +            propagate_page_fault(vaddr, pfec);
 10.1144 +        return NULL;
 10.1145      }
 10.1146 -#endif
 10.1147 -
 10.1148 -    errcode = PFEC_write_access;
 10.1149 -    if ( !(flags & _PAGE_PRESENT) ) 
 10.1150 -        goto page_fault;
 10.1151 -
 10.1152 -    errcode |= PFEC_page_present;
 10.1153 -    if ( !(flags & _PAGE_RW) ) 
 10.1154 -        goto page_fault;
 10.1155 -
 10.1156 -    mfn = gfn_to_mfn(v->domain, gfn, &p2mt);
 10.1157 +
 10.1158 +    /* Translate the GFN */
 10.1159 +    mfn = gfn_to_mfn(v->domain, _gfn(gfn), &p2mt);
 10.1160      if ( p2m_is_ram(p2mt) )
 10.1161      {
 10.1162          ASSERT(mfn_valid(mfn));
 10.1163 @@ -4069,13 +3979,6 @@ static inline void * emulate_map_dest(st
 10.1164      }
 10.1165      else 
 10.1166          return NULL;
 10.1167 -
 10.1168 - page_fault:
 10.1169 -    if ( is_hvm_vcpu(v) )
 10.1170 -        hvm_inject_exception(TRAP_page_fault, errcode, vaddr);
 10.1171 -    else
 10.1172 -        propagate_page_fault(vaddr, errcode);
 10.1173 -    return NULL;
 10.1174  }
 10.1175  
 10.1176  static int safe_not_to_verify_write(mfn_t gmfn, void *dst, void *src, 
    11.1 --- a/xen/arch/x86/mm/shadow/private.h	Fri Nov 02 10:37:59 2007 +0000
    11.2 +++ b/xen/arch/x86/mm/shadow/private.h	Fri Nov 02 15:41:57 2007 +0000
    11.3 @@ -665,9 +665,10 @@ void shadow_continue_emulation(
    11.4  #define VTLB_ENTRIES 13
    11.5  
    11.6  struct shadow_vtlb {
    11.7 -    unsigned long page_number;    /* Guest virtual address >> PAGE_SHIFT  */
    11.8 -    unsigned long frame_number;   /* Guest physical address >> PAGE_SHIFT */
    11.9 -    u32 flags;    /* Accumulated guest pte flags, or 0 for an empty slot. */
   11.10 +    unsigned long page_number;      /* Guest virtual address >> PAGE_SHIFT  */
   11.11 +    unsigned long frame_number;     /* Guest physical address >> PAGE_SHIFT */
   11.12 +    uint32_t pfec;  /* Pagefault code for the lookup that filled this entry */
   11.13 +    uint32_t flags; /* Accumulated guest pte flags, or 0 for an empty slot. */
   11.14  };
   11.15  
   11.16  /* Call whenever the guest flushes hit actual TLB */
   11.17 @@ -692,7 +693,7 @@ static inline void vtlb_insert(struct vc
   11.18  }
   11.19  
   11.20  /* Look a translation up in the vTLB.  Returns 0 if not found. */
   11.21 -static inline int vtlb_lookup(struct vcpu *v, unsigned long va,
   11.22 +static inline int vtlb_lookup(struct vcpu *v, unsigned long va, uint32_t pfec,
   11.23                                struct shadow_vtlb *result) 
   11.24  {
   11.25      unsigned long page_number = va >> PAGE_SHIFT;
   11.26 @@ -701,7 +702,9 @@ static inline int vtlb_lookup(struct vcp
   11.27  
   11.28      spin_lock(&v->arch.paging.vtlb_lock);
   11.29      if ( v->arch.paging.vtlb[i].flags != 0 
   11.30 -         && v->arch.paging.vtlb[i].page_number == page_number )
   11.31 +         && v->arch.paging.vtlb[i].page_number == page_number 
   11.32 +         /* Any successful walk that had at least these pfec bits is OK */
   11.33 +         && (v->arch.paging.vtlb[i].pfec & pfec) == pfec )
   11.34      {
   11.35          rv = 1; 
   11.36          result[0] = v->arch.paging.vtlb[i];
    12.1 --- a/xen/arch/x86/mm/shadow/types.h	Fri Nov 02 10:37:59 2007 +0000
    12.2 +++ b/xen/arch/x86/mm/shadow/types.h	Fri Nov 02 15:41:57 2007 +0000
    12.3 @@ -251,6 +251,7 @@ TYPE_SAFE(u32,gfn)
    12.4  /* Types of the guest's page tables */
    12.5  typedef l1_pgentry_32_t guest_l1e_t;
    12.6  typedef l2_pgentry_32_t guest_l2e_t;
    12.7 +typedef intpte_32_t guest_intpte_t;
    12.8  
    12.9  /* Access functions for them */
   12.10  static inline paddr_t guest_l1e_get_paddr(guest_l1e_t gl1e)
   12.11 @@ -319,6 +320,7 @@ typedef l3_pgentry_t guest_l3e_t;
   12.12  #if GUEST_PAGING_LEVELS >= 4
   12.13  typedef l4_pgentry_t guest_l4e_t;
   12.14  #endif
   12.15 +typedef intpte_t guest_intpte_t;
   12.16  
   12.17  /* Access functions for them */
   12.18  static inline paddr_t guest_l1e_get_paddr(guest_l1e_t gl1e)
   12.19 @@ -419,32 +421,27 @@ gfn_to_paddr(gfn_t gfn)
   12.20  
   12.21  /* Type used for recording a walk through guest pagetables.  It is
   12.22   * filled in by the pagetable walk function, and also used as a cache
   12.23 - * for later walks.  
   12.24 - * Any non-null pointer in this structure represents a mapping of guest
   12.25 - * memory.  We must always call walk_init() before using a walk_t, and 
   12.26 - * call walk_unmap() when we're done. 
   12.27 - * The "Effective l1e" field is used when there isn't an l1e to point to, 
   12.28 - * but we have fabricated an l1e for propagation to the shadow (e.g., 
   12.29 - * for splintering guest superpages into many shadow l1 entries).  */
   12.30 + * for later walks.  When we encounter a suporpage l2e, we fabricate an
   12.31 + * l1e for propagation to the shadow (for splintering guest superpages
   12.32 + * into many shadow l1 entries).  */
   12.33  typedef struct shadow_walk_t walk_t;
   12.34  struct shadow_walk_t 
   12.35  {
   12.36      unsigned long va;           /* Address we were looking for */
   12.37  #if GUEST_PAGING_LEVELS >= 3
   12.38  #if GUEST_PAGING_LEVELS >= 4
   12.39 -    guest_l4e_t *l4e;           /* Pointer to guest's level 4 entry */
   12.40 +    guest_l4e_t l4e;            /* Guest's level 4 entry */
   12.41  #endif
   12.42 -    guest_l3e_t *l3e;           /* Pointer to guest's level 3 entry */
   12.43 +    guest_l3e_t l3e;            /* Guest's level 3 entry */
   12.44  #endif
   12.45 -    guest_l2e_t *l2e;           /* Pointer to guest's level 2 entry */
   12.46 -    guest_l1e_t *l1e;           /* Pointer to guest's level 1 entry */
   12.47 -    guest_l1e_t eff_l1e;        /* Effective level 1 entry */
   12.48 +    guest_l2e_t l2e;            /* Guest's level 2 entry */
   12.49 +    guest_l1e_t l1e;            /* Guest's level 1 entry (or fabrication) */
   12.50  #if GUEST_PAGING_LEVELS >= 4
   12.51 -    mfn_t l4mfn;                /* MFN that the level 4 entry is in */
   12.52 -    mfn_t l3mfn;                /* MFN that the level 3 entry is in */
   12.53 +    mfn_t l4mfn;                /* MFN that the level 4 entry was in */
   12.54 +    mfn_t l3mfn;                /* MFN that the level 3 entry was in */
   12.55  #endif
   12.56 -    mfn_t l2mfn;                /* MFN that the level 2 entry is in */
   12.57 -    mfn_t l1mfn;                /* MFN that the level 1 entry is in */
   12.58 +    mfn_t l2mfn;                /* MFN that the level 2 entry was in */
   12.59 +    mfn_t l1mfn;                /* MFN that the level 1 entry was in */
   12.60  };
   12.61  
   12.62  /* macros for dealing with the naming of the internal function names of the
   12.63 @@ -542,7 +539,7 @@ accumulate_guest_flags(struct vcpu *v, w
   12.64  {
   12.65      u32 accumulated_flags;
   12.66  
   12.67 -    if ( unlikely(!(guest_l1e_get_flags(gw->eff_l1e) & _PAGE_PRESENT)) )
   12.68 +    if ( unlikely(!(guest_l1e_get_flags(gw->l1e) & _PAGE_PRESENT)) )
   12.69          return 0;
   12.70          
   12.71      // We accumulate the permission flags with bitwise ANDing.
   12.72 @@ -550,17 +547,17 @@ accumulate_guest_flags(struct vcpu *v, w
   12.73      // For the NX bit, however, the polarity is wrong, so we accumulate the
   12.74      // inverse of the NX bit.
   12.75      //
   12.76 -    accumulated_flags =  guest_l1e_get_flags(gw->eff_l1e) ^ _PAGE_NX_BIT;
   12.77 -    accumulated_flags &= guest_l2e_get_flags(*gw->l2e) ^ _PAGE_NX_BIT;
   12.78 +    accumulated_flags =  guest_l1e_get_flags(gw->l1e) ^ _PAGE_NX_BIT;
   12.79 +    accumulated_flags &= guest_l2e_get_flags(gw->l2e) ^ _PAGE_NX_BIT;
   12.80  
   12.81      // Note that PAE guests do not have USER or RW or NX bits in their L3s.
   12.82      //
   12.83  #if GUEST_PAGING_LEVELS == 3
   12.84      accumulated_flags &=
   12.85 -        ~_PAGE_PRESENT | (guest_l3e_get_flags(*gw->l3e) & _PAGE_PRESENT);
   12.86 +        ~_PAGE_PRESENT | (guest_l3e_get_flags(gw->l3e) & _PAGE_PRESENT);
   12.87  #elif GUEST_PAGING_LEVELS >= 4
   12.88 -    accumulated_flags &= guest_l3e_get_flags(*gw->l3e) ^ _PAGE_NX_BIT;
   12.89 -    accumulated_flags &= guest_l4e_get_flags(*gw->l4e) ^ _PAGE_NX_BIT;
   12.90 +    accumulated_flags &= guest_l3e_get_flags(gw->l3e) ^ _PAGE_NX_BIT;
   12.91 +    accumulated_flags &= guest_l4e_get_flags(gw->l4e) ^ _PAGE_NX_BIT;
   12.92  #endif
   12.93  
   12.94      // Revert the NX bit back to its original polarity
    13.1 --- a/xen/include/asm-x86/hvm/support.h	Fri Nov 02 10:37:59 2007 +0000
    13.2 +++ b/xen/include/asm-x86/hvm/support.h	Fri Nov 02 15:41:57 2007 +0000
    13.3 @@ -86,6 +86,7 @@ int hvm_copy_to_guest_phys(paddr_t paddr
    13.4  int hvm_copy_from_guest_phys(void *buf, paddr_t paddr, int size);
    13.5  int hvm_copy_to_guest_virt(unsigned long vaddr, void *buf, int size);
    13.6  int hvm_copy_from_guest_virt(void *buf, unsigned long vaddr, int size);
    13.7 +int hvm_fetch_from_guest_virt(void *buf, unsigned long vaddr, int size);
    13.8  
    13.9  void hvm_print_line(struct vcpu *v, const char c);
   13.10  void hlt_timer_fn(void *data);
    14.1 --- a/xen/include/asm-x86/paging.h	Fri Nov 02 10:37:59 2007 +0000
    14.2 +++ b/xen/include/asm-x86/paging.h	Fri Nov 02 15:41:57 2007 +0000
    14.3 @@ -105,7 +105,8 @@ struct paging_mode {
    14.4      int           (*page_fault            )(struct vcpu *v, unsigned long va,
    14.5                                              struct cpu_user_regs *regs);
    14.6      int           (*invlpg                )(struct vcpu *v, unsigned long va);
    14.7 -    unsigned long (*gva_to_gfn            )(struct vcpu *v, unsigned long va);
    14.8 +    unsigned long (*gva_to_gfn            )(struct vcpu *v, unsigned long va,
    14.9 +                                            uint32_t *pfec);
   14.10      void          (*update_cr3            )(struct vcpu *v, int do_locking);
   14.11      void          (*update_paging_modes   )(struct vcpu *v);
   14.12      void          (*write_p2m_entry       )(struct vcpu *v, unsigned long gfn,
   14.13 @@ -204,12 +205,17 @@ static inline int paging_invlpg(struct v
   14.14  }
   14.15  
   14.16  /* Translate a guest virtual address to the frame number that the
   14.17 - * *guest* pagetables would map it to.  Returns INVALID_GFN if the guest 
   14.18 - * tables don't map this address. */
   14.19 + * *guest* pagetables would map it to.  Returns INVALID_GFN if the guest
   14.20 + * tables don't map this address for this kind of access.
   14.21 + * pfec[0] is used to determine which kind of access this is when
   14.22 + * walking the tables.  The caller should set the PFEC_page_present bit
   14.23 + * in pfec[0]; in the failure case, that bit will be cleared if appropriate. */
   14.24  #define INVALID_GFN (-1UL)
   14.25 -static inline unsigned long paging_gva_to_gfn(struct vcpu *v, unsigned long va)
   14.26 +static inline unsigned long paging_gva_to_gfn(struct vcpu *v, 
   14.27 +                                              unsigned long va,
   14.28 +                                              uint32_t *pfec)
   14.29  {
   14.30 -    return v->arch.paging.mode->gva_to_gfn(v, va);
   14.31 +    return v->arch.paging.mode->gva_to_gfn(v, va, pfec);
   14.32  }
   14.33  
   14.34  /* Update all the things that are derived from the guest's CR3.
    15.1 --- a/xen/include/asm-x86/perfc_defn.h	Fri Nov 02 10:37:59 2007 +0000
    15.2 +++ b/xen/include/asm-x86/perfc_defn.h	Fri Nov 02 15:41:57 2007 +0000
    15.3 @@ -50,12 +50,8 @@ PERFCOUNTER(shadow_fault_fast_gnp, "shad
    15.4  PERFCOUNTER(shadow_fault_fast_mmio, "shadow_fault fast path mmio")
    15.5  PERFCOUNTER(shadow_fault_fast_fail, "shadow_fault fast path error")
    15.6  PERFCOUNTER(shadow_fault_bail_bad_gfn, "shadow_fault guest bad gfn")
    15.7 -PERFCOUNTER(shadow_fault_bail_not_present, 
    15.8 -                                        "shadow_fault guest not-present")
    15.9 -PERFCOUNTER(shadow_fault_bail_nx,  "shadow_fault guest NX fault")
   15.10 -PERFCOUNTER(shadow_fault_bail_ro_mapping, "shadow_fault guest R/W fault")
   15.11 -PERFCOUNTER(shadow_fault_bail_user_supervisor, 
   15.12 -                                        "shadow_fault guest U/S fault")
   15.13 +PERFCOUNTER(shadow_fault_bail_real_fault, 
   15.14 +                                        "shadow_fault really guest fault")
   15.15  PERFCOUNTER(shadow_fault_emulate_read, "shadow_fault emulates a read")
   15.16  PERFCOUNTER(shadow_fault_emulate_write, "shadow_fault emulates a write")
   15.17  PERFCOUNTER(shadow_fault_emulate_failed, "shadow_fault emulator fails")