ia64/xen-unstable

changeset 16314:650cadd1b283

Merge.
author Keir Fraser <keir@xensource.com>
date Fri Nov 02 16:38:11 2007 +0000 (2007-11-02)
parents 838e77a41a3c db9f62d8f7f4
children 070da619e65e
files xen/arch/x86/hvm/svm/svm.c
line diff
     1.1 --- a/xen/arch/x86/hvm/hvm.c	Fri Nov 02 16:34:54 2007 +0000
     1.2 +++ b/xen/arch/x86/hvm/hvm.c	Fri Nov 02 16:38:11 2007 +0000
     1.3 @@ -931,6 +931,7 @@ static void *hvm_map(unsigned long va, i
     1.4  {
     1.5      unsigned long gfn, mfn;
     1.6      p2m_type_t p2mt;
     1.7 +    uint32_t pfec;
     1.8  
     1.9      if ( ((va & ~PAGE_MASK) + size) > PAGE_SIZE )
    1.10      {
    1.11 @@ -939,11 +940,15 @@ static void *hvm_map(unsigned long va, i
    1.12          return NULL;
    1.13      }
    1.14  
    1.15 -    gfn = paging_gva_to_gfn(current, va);
    1.16 +    /* We're mapping on behalf of the segment-load logic, which might
    1.17 +     * write the accessed flags in the descriptors (in 32-bit mode), but
    1.18 +     * we still treat it as a kernel-mode read (i.e. no access checks). */
    1.19 +    pfec = PFEC_page_present;
    1.20 +    gfn = paging_gva_to_gfn(current, va, &pfec);
    1.21      mfn = mfn_x(gfn_to_mfn_current(gfn, &p2mt));
    1.22      if ( !p2m_is_ram(p2mt) )
    1.23      {
    1.24 -        hvm_inject_exception(TRAP_page_fault, PFEC_write_access, va);
    1.25 +        hvm_inject_exception(TRAP_page_fault, pfec, va);
    1.26          return NULL;
    1.27      }
    1.28  
    1.29 @@ -1263,14 +1268,24 @@ void hvm_task_switch(
    1.30   *  @size = number of bytes to copy
    1.31   *  @dir  = copy *to* guest (TRUE) or *from* guest (FALSE)?
    1.32   *  @virt = addr is *virtual* (TRUE) or *guest physical* (FALSE)?
    1.33 + *  @fetch = copy is an instruction fetch?
    1.34   * Returns number of bytes failed to copy (0 == complete success).
    1.35   */
    1.36 -static int __hvm_copy(void *buf, paddr_t addr, int size, int dir, int virt)
    1.37 +static int __hvm_copy(void *buf, paddr_t addr, int size, int dir, 
    1.38 +                      int virt, int fetch)
    1.39  {
    1.40      unsigned long gfn, mfn;
    1.41      p2m_type_t p2mt;
    1.42      char *p;
    1.43      int count, todo;
    1.44 +    uint32_t pfec = PFEC_page_present;
    1.45 +
    1.46 +    if ( dir ) 
    1.47 +        pfec |= PFEC_write_access;
    1.48 +    if ( ring_3(guest_cpu_user_regs()) )
    1.49 +        pfec |= PFEC_user_mode;
    1.50 +    if ( fetch ) 
    1.51 +        pfec |= PFEC_insn_fetch;
    1.52  
    1.53      todo = size;
    1.54      while ( todo > 0 )
    1.55 @@ -1278,7 +1293,7 @@ static int __hvm_copy(void *buf, paddr_t
    1.56          count = min_t(int, PAGE_SIZE - (addr & ~PAGE_MASK), todo);
    1.57  
    1.58          if ( virt )
    1.59 -            gfn = paging_gva_to_gfn(current, addr);
    1.60 +            gfn = paging_gva_to_gfn(current, addr, &pfec);
    1.61          else
    1.62              gfn = addr >> PAGE_SHIFT;
    1.63          
    1.64 @@ -1310,22 +1325,27 @@ static int __hvm_copy(void *buf, paddr_t
    1.65  
    1.66  int hvm_copy_to_guest_phys(paddr_t paddr, void *buf, int size)
    1.67  {
    1.68 -    return __hvm_copy(buf, paddr, size, 1, 0);
    1.69 +    return __hvm_copy(buf, paddr, size, 1, 0, 0);
    1.70  }
    1.71  
    1.72  int hvm_copy_from_guest_phys(void *buf, paddr_t paddr, int size)
    1.73  {
    1.74 -    return __hvm_copy(buf, paddr, size, 0, 0);
    1.75 +    return __hvm_copy(buf, paddr, size, 0, 0, 0);
    1.76  }
    1.77  
    1.78  int hvm_copy_to_guest_virt(unsigned long vaddr, void *buf, int size)
    1.79  {
    1.80 -    return __hvm_copy(buf, vaddr, size, 1, 1);
    1.81 +    return __hvm_copy(buf, vaddr, size, 1, 1, 0);
    1.82  }
    1.83  
    1.84  int hvm_copy_from_guest_virt(void *buf, unsigned long vaddr, int size)
    1.85  {
    1.86 -    return __hvm_copy(buf, vaddr, size, 0, 1);
    1.87 +    return __hvm_copy(buf, vaddr, size, 0, 1, 0);
    1.88 +}
    1.89 +
    1.90 +int hvm_fetch_from_guest_virt(void *buf, unsigned long vaddr, int size)
    1.91 +{
    1.92 +    return __hvm_copy(buf, vaddr, size, 0, 1, hvm_nx_enabled(current));
    1.93  }
    1.94  
    1.95  
     2.1 --- a/xen/arch/x86/hvm/platform.c	Fri Nov 02 16:34:54 2007 +0000
     2.2 +++ b/xen/arch/x86/hvm/platform.c	Fri Nov 02 16:38:11 2007 +0000
     2.3 @@ -833,7 +833,7 @@ int inst_copy_from_guest(unsigned char *
     2.4  {
     2.5      if ( inst_len > MAX_INST_LEN || inst_len <= 0 )
     2.6          return 0;
     2.7 -    if ( hvm_copy_from_guest_virt(buf, guest_eip, inst_len) )
     2.8 +    if ( hvm_fetch_from_guest_virt(buf, guest_eip, inst_len) )
     2.9          return 0;
    2.10      return inst_len;
    2.11  }
    2.12 @@ -1075,6 +1075,7 @@ void handle_mmio(unsigned long gpa)
    2.13          unsigned long addr, gfn; 
    2.14          paddr_t paddr;
    2.15          int dir, size = op_size;
    2.16 +        uint32_t pfec;
    2.17  
    2.18          ASSERT(count);
    2.19  
    2.20 @@ -1082,8 +1083,11 @@ void handle_mmio(unsigned long gpa)
    2.21          addr = regs->edi;
    2.22          if ( ad_size == WORD )
    2.23              addr &= 0xFFFF;
    2.24 -        addr += hvm_get_segment_base(v, x86_seg_es);
    2.25 -        gfn = paging_gva_to_gfn(v, addr);
    2.26 +        addr += hvm_get_segment_base(v, x86_seg_es);        
    2.27 +        pfec = PFEC_page_present | PFEC_write_access;
    2.28 +        if ( ring_3(regs) )
    2.29 +            pfec |= PFEC_user_mode;
    2.30 +        gfn = paging_gva_to_gfn(v, addr, &pfec);
    2.31          paddr = (paddr_t)gfn << PAGE_SHIFT | (addr & ~PAGE_MASK);
    2.32          if ( paddr == gpa )
    2.33          {
    2.34 @@ -1105,7 +1109,8 @@ void handle_mmio(unsigned long gpa)
    2.35              default: domain_crash_synchronous();
    2.36              }
    2.37              addr += hvm_get_segment_base(v, seg);
    2.38 -            gfn = paging_gva_to_gfn(v, addr);
    2.39 +            pfec &= ~PFEC_write_access;
    2.40 +            gfn = paging_gva_to_gfn(v, addr, &pfec);
    2.41              paddr = (paddr_t)gfn << PAGE_SHIFT | (addr & ~PAGE_MASK);
    2.42          }
    2.43          else
    2.44 @@ -1115,12 +1120,9 @@ void handle_mmio(unsigned long gpa)
    2.45          {
    2.46              /* The guest does not have the non-mmio address mapped. 
    2.47               * Need to send in a page fault */
    2.48 -            int errcode = 0;
    2.49 -            /* IO read --> memory write */
    2.50 -            if ( dir == IOREQ_READ ) errcode |= PFEC_write_access;
    2.51              regs->eip -= inst_len; /* do not advance %eip */
    2.52              regs->eflags |= X86_EFLAGS_RF; /* RF was set by original #PF */
    2.53 -            hvm_inject_exception(TRAP_page_fault, errcode, addr);
    2.54 +            hvm_inject_exception(TRAP_page_fault, pfec, addr);
    2.55              return;
    2.56          }
    2.57  
    2.58 @@ -1308,10 +1310,9 @@ void handle_mmio(unsigned long gpa)
    2.59  
    2.60  DEFINE_PER_CPU(int, guest_handles_in_xen_space);
    2.61  
    2.62 -/* Note that copy_{to,from}_user_hvm don't set the A and D bits on
    2.63 -   PTEs, and require the PTE to be writable even when they're only
    2.64 -   trying to read from it.  The guest is expected to deal with
    2.65 -   this. */
    2.66 +/* Note that copy_{to,from}_user_hvm require the PTE to be writable even
    2.67 +   when they're only trying to read from it.  The guest is expected to
    2.68 +   deal with this. */
    2.69  unsigned long copy_to_user_hvm(void *to, const void *from, unsigned len)
    2.70  {
    2.71      if ( this_cpu(guest_handles_in_xen_space) )
     3.1 --- a/xen/arch/x86/hvm/svm/svm.c	Fri Nov 02 16:34:54 2007 +0000
     3.2 +++ b/xen/arch/x86/hvm/svm/svm.c	Fri Nov 02 16:38:11 2007 +0000
     3.3 @@ -1441,6 +1441,7 @@ static void svm_io_instruction(struct vc
     3.4          unsigned long addr, count;
     3.5          paddr_t paddr;
     3.6          unsigned long gfn;
     3.7 +        uint32_t pfec;
     3.8          int sign = regs->eflags & X86_EFLAGS_DF ? -1 : 1;
     3.9  
    3.10          if (!svm_get_io_address(v, regs, size, info, &count, &addr))
    3.11 @@ -1459,15 +1460,17 @@ static void svm_io_instruction(struct vc
    3.12          }
    3.13  
    3.14          /* Translate the address to a physical address */
    3.15 -        gfn = paging_gva_to_gfn(v, addr);
    3.16 +        pfec = PFEC_page_present;
    3.17 +        if ( dir == IOREQ_READ ) /* Read from PIO --> write to RAM */
    3.18 +            pfec |= PFEC_write_access;
    3.19 +        if ( ring_3(regs) )
    3.20 +            pfec |= PFEC_user_mode;
    3.21 +        gfn = paging_gva_to_gfn(v, addr, &pfec);
    3.22          if ( gfn == INVALID_GFN ) 
    3.23          {
    3.24              /* The guest does not have the RAM address mapped. 
    3.25               * Need to send in a page fault */
    3.26 -            int errcode = 0;
    3.27 -            /* IO read --> memory write */
    3.28 -            if ( dir == IOREQ_READ ) errcode |= PFEC_write_access;
    3.29 -            svm_hvm_inject_exception(TRAP_page_fault, errcode, addr);
    3.30 +            svm_hvm_inject_exception(TRAP_page_fault, pfec, addr);
    3.31              return;
    3.32          }
    3.33          paddr = (paddr_t)gfn << PAGE_SHIFT | (addr & ~PAGE_MASK);
     4.1 --- a/xen/arch/x86/hvm/vmx/vmx.c	Fri Nov 02 16:34:54 2007 +0000
     4.2 +++ b/xen/arch/x86/hvm/vmx/vmx.c	Fri Nov 02 16:38:11 2007 +0000
     4.3 @@ -1642,7 +1642,7 @@ static void vmx_do_str_pio(unsigned long
     4.4      unsigned long addr, count = 1, base;
     4.5      paddr_t paddr;
     4.6      unsigned long gfn;
     4.7 -    u32 ar_bytes, limit;
     4.8 +    u32 ar_bytes, limit, pfec;
     4.9      int sign;
    4.10      int long_mode = 0;
    4.11  
    4.12 @@ -1714,15 +1714,17 @@ static void vmx_do_str_pio(unsigned long
    4.13  #endif
    4.14  
    4.15      /* Translate the address to a physical address */
    4.16 -    gfn = paging_gva_to_gfn(current, addr);
    4.17 +    pfec = PFEC_page_present;
    4.18 +    if ( dir == IOREQ_READ ) /* Read from PIO --> write to RAM */
    4.19 +        pfec |= PFEC_write_access;
    4.20 +    if ( ring_3(regs) )
    4.21 +        pfec |= PFEC_user_mode;
    4.22 +    gfn = paging_gva_to_gfn(current, addr, &pfec);
    4.23      if ( gfn == INVALID_GFN )
    4.24      {
    4.25          /* The guest does not have the RAM address mapped.
    4.26           * Need to send in a page fault */
    4.27 -        int errcode = 0;
    4.28 -        /* IO read --> memory write */
    4.29 -        if ( dir == IOREQ_READ ) errcode |= PFEC_write_access;
    4.30 -        vmx_inject_exception(TRAP_page_fault, errcode, addr);
    4.31 +        vmx_inject_exception(TRAP_page_fault, pfec, addr);
    4.32          return;
    4.33      }
    4.34      paddr = (paddr_t)gfn << PAGE_SHIFT | (addr & ~PAGE_MASK);
     5.1 --- a/xen/arch/x86/mm/hap/guest_walk.c	Fri Nov 02 16:34:54 2007 +0000
     5.2 +++ b/xen/arch/x86/mm/hap/guest_walk.c	Fri Nov 02 16:38:11 2007 +0000
     5.3 @@ -40,7 +40,7 @@
     5.4  #if GUEST_PAGING_LEVELS > CONFIG_PAGING_LEVELS
     5.5  
     5.6  unsigned long hap_gva_to_gfn(GUEST_PAGING_LEVELS)(
     5.7 -    struct vcpu *v, unsigned long gva)
     5.8 +    struct vcpu *v, unsigned long gva, uint32_t *pfec)
     5.9  {
    5.10      gdprintk(XENLOG_ERR,
    5.11               "Guest paging level is greater than host paging level!\n");
    5.12 @@ -61,7 +61,7 @@ unsigned long hap_gva_to_gfn(GUEST_PAGIN
    5.13  #endif
    5.14  
    5.15  unsigned long hap_gva_to_gfn(GUEST_PAGING_LEVELS)(
    5.16 -    struct vcpu *v, unsigned long gva)
    5.17 +    struct vcpu *v, unsigned long gva, uint32_t *pfec)
    5.18  {
    5.19      unsigned long gcr3 = v->arch.hvm_vcpu.guest_cr[3];
    5.20      int mode = GUEST_PAGING_LEVELS;
     6.1 --- a/xen/arch/x86/mm/hap/hap.c	Fri Nov 02 16:34:54 2007 +0000
     6.2 +++ b/xen/arch/x86/mm/hap/hap.c	Fri Nov 02 16:38:11 2007 +0000
     6.3 @@ -695,7 +695,7 @@ hap_write_p2m_entry(struct vcpu *v, unsi
     6.4  }
     6.5  
     6.6  static unsigned long hap_gva_to_gfn_real_mode(
     6.7 -    struct vcpu *v, unsigned long gva)
     6.8 +    struct vcpu *v, unsigned long gva, uint32_t *pfec)
     6.9  {
    6.10      return ((paddr_t)gva >> PAGE_SHIFT);
    6.11  }
     7.1 --- a/xen/arch/x86/mm/hap/private.h	Fri Nov 02 16:34:54 2007 +0000
     7.2 +++ b/xen/arch/x86/mm/hap/private.h	Fri Nov 02 16:38:11 2007 +0000
     7.3 @@ -26,9 +26,12 @@
     7.4  /********************************************/
     7.5  /*          GUEST TRANSLATION FUNCS         */
     7.6  /********************************************/
     7.7 -unsigned long hap_gva_to_gfn_2level(struct vcpu *v, unsigned long gva);
     7.8 -unsigned long hap_gva_to_gfn_3level(struct vcpu *v, unsigned long gva);
     7.9 -unsigned long hap_gva_to_gfn_4level(struct vcpu *v, unsigned long gva);
    7.10 +unsigned long hap_gva_to_gfn_2level(struct vcpu *v, unsigned long gva, 
    7.11 +                                    uint32_t *pfec);
    7.12 +unsigned long hap_gva_to_gfn_3level(struct vcpu *v, unsigned long gva,
    7.13 +                                    uint32_t *pfec);
    7.14 +unsigned long hap_gva_to_gfn_4level(struct vcpu *v, unsigned long gva,
    7.15 +                                    uint32_t *pfec);
    7.16  
    7.17  /********************************************/
    7.18  /*            MISC DEFINITIONS              */
     8.1 --- a/xen/arch/x86/mm/p2m.c	Fri Nov 02 16:34:54 2007 +0000
     8.2 +++ b/xen/arch/x86/mm/p2m.c	Fri Nov 02 16:38:11 2007 +0000
     8.3 @@ -31,7 +31,7 @@
     8.4  
     8.5  /* Debugging and auditing of the P2M code? */
     8.6  #define P2M_AUDIT     0
     8.7 -#define P2M_DEBUGGING 1
     8.8 +#define P2M_DEBUGGING 0
     8.9  
    8.10  /*
    8.11   * The P2M lock.  This protects all updates to the p2m table.
    8.12 @@ -290,11 +290,11 @@ int p2m_alloc_table(struct domain *d,
    8.13                      void (*free_page)(struct domain *d, struct page_info *pg))
    8.14  
    8.15  {
    8.16 -    mfn_t mfn;
    8.17 +    mfn_t mfn = _mfn(INVALID_MFN);
    8.18      struct list_head *entry;
    8.19      struct page_info *page, *p2m_top;
    8.20      unsigned int page_count = 0;
    8.21 -    unsigned long gfn;
    8.22 +    unsigned long gfn = -1UL;
    8.23  
    8.24      p2m_lock(d);
    8.25  
     9.1 --- a/xen/arch/x86/mm/shadow/common.c	Fri Nov 02 16:34:54 2007 +0000
     9.2 +++ b/xen/arch/x86/mm/shadow/common.c	Fri Nov 02 16:38:11 2007 +0000
     9.3 @@ -150,11 +150,13 @@ hvm_read(enum x86_segment seg,
     9.4          return rc;
     9.5  
     9.6      *val = 0;
     9.7 -    // XXX -- this is WRONG.
     9.8 -    //        It entirely ignores the permissions in the page tables.
     9.9 -    //        In this case, that is only a user vs supervisor access check.
    9.10 -    //
    9.11 -    if ( (rc = hvm_copy_from_guest_virt(val, addr, bytes)) == 0 )
    9.12 +
    9.13 +    if ( access_type == hvm_access_insn_fetch )
    9.14 +        rc = hvm_fetch_from_guest_virt(val, addr, bytes);
    9.15 +    else
    9.16 +        rc = hvm_copy_from_guest_virt(val, addr, bytes);
    9.17 +
    9.18 +    if ( rc == 0 ) 
    9.19          return X86EMUL_OKAY;
    9.20  
    9.21      /* If we got here, there was nothing mapped here, or a bad GFN 
    9.22 @@ -395,7 +397,7 @@ struct x86_emulate_ops *shadow_init_emul
    9.23          (!hvm_translate_linear_addr(
    9.24              x86_seg_cs, regs->eip, sizeof(sh_ctxt->insn_buf),
    9.25              hvm_access_insn_fetch, sh_ctxt, &addr) &&
    9.26 -         !hvm_copy_from_guest_virt(
    9.27 +         !hvm_fetch_from_guest_virt(
    9.28               sh_ctxt->insn_buf, addr, sizeof(sh_ctxt->insn_buf)))
    9.29          ? sizeof(sh_ctxt->insn_buf) : 0;
    9.30  
    9.31 @@ -423,7 +425,7 @@ void shadow_continue_emulation(struct sh
    9.32                  (!hvm_translate_linear_addr(
    9.33                      x86_seg_cs, regs->eip, sizeof(sh_ctxt->insn_buf),
    9.34                      hvm_access_insn_fetch, sh_ctxt, &addr) &&
    9.35 -                 !hvm_copy_from_guest_virt(
    9.36 +                 !hvm_fetch_from_guest_virt(
    9.37                       sh_ctxt->insn_buf, addr, sizeof(sh_ctxt->insn_buf)))
    9.38                  ? sizeof(sh_ctxt->insn_buf) : 0;
    9.39              sh_ctxt->insn_buf_eip = regs->eip;
    10.1 --- a/xen/arch/x86/mm/shadow/multi.c	Fri Nov 02 16:34:54 2007 +0000
    10.2 +++ b/xen/arch/x86/mm/shadow/multi.c	Fri Nov 02 16:38:11 2007 +0000
    10.3 @@ -189,7 +189,7 @@ guest_supports_nx(struct vcpu *v)
    10.4      if ( GUEST_PAGING_LEVELS == 2 || !cpu_has_nx )
    10.5          return 0;
    10.6      if ( !is_hvm_vcpu(v) )
    10.7 -        return 1;
    10.8 +        return cpu_has_nx;
    10.9      return hvm_nx_enabled(v);
   10.10  }
   10.11  
   10.12 @@ -197,22 +197,119 @@ guest_supports_nx(struct vcpu *v)
   10.13  /**************************************************************************/
   10.14  /* Functions for walking the guest page tables */
   10.15  
   10.16 -
   10.17 -/* Walk the guest pagetables, filling the walk_t with what we see. 
   10.18 - * Takes an uninitialised walk_t.  The caller must call unmap_walk() 
   10.19 - * on the walk_t before discarding it or calling guest_walk_tables again. 
   10.20 - * If "guest_op" is non-zero, we are serving a genuine guest memory access, 
   10.21 +/* Flags that are needed in a pagetable entry, with the sense of NX inverted */
   10.22 +static uint32_t mandatory_flags(struct vcpu *v, uint32_t pfec) 
   10.23 +{
   10.24 +    static uint32_t flags[] = {
   10.25 +        /* I/F -  Usr Wr */
   10.26 +        /* 0   0   0   0 */ _PAGE_PRESENT, 
   10.27 +        /* 0   0   0   1 */ _PAGE_PRESENT|_PAGE_RW,
   10.28 +        /* 0   0   1   0 */ _PAGE_PRESENT|_PAGE_USER,
   10.29 +        /* 0   0   1   1 */ _PAGE_PRESENT|_PAGE_RW|_PAGE_USER,
   10.30 +        /* 0   1   0   0 */ _PAGE_PRESENT, 
   10.31 +        /* 0   1   0   1 */ _PAGE_PRESENT|_PAGE_RW,
   10.32 +        /* 0   1   1   0 */ _PAGE_PRESENT|_PAGE_USER,
   10.33 +        /* 0   1   1   1 */ _PAGE_PRESENT|_PAGE_RW|_PAGE_USER,
   10.34 +        /* 1   0   0   0 */ _PAGE_PRESENT|_PAGE_NX_BIT, 
   10.35 +        /* 1   0   0   1 */ _PAGE_PRESENT|_PAGE_RW|_PAGE_NX_BIT,
   10.36 +        /* 1   0   1   0 */ _PAGE_PRESENT|_PAGE_USER|_PAGE_NX_BIT,
   10.37 +        /* 1   0   1   1 */ _PAGE_PRESENT|_PAGE_RW|_PAGE_USER|_PAGE_NX_BIT,
   10.38 +        /* 1   1   0   0 */ _PAGE_PRESENT|_PAGE_NX_BIT, 
   10.39 +        /* 1   1   0   1 */ _PAGE_PRESENT|_PAGE_RW|_PAGE_NX_BIT,
   10.40 +        /* 1   1   1   0 */ _PAGE_PRESENT|_PAGE_USER|_PAGE_NX_BIT,
   10.41 +        /* 1   1   1   1 */ _PAGE_PRESENT|_PAGE_RW|_PAGE_USER|_PAGE_NX_BIT,
   10.42 +    };
   10.43 +    uint32_t f = flags[(pfec & 0x1f) >> 1];
   10.44 +    /* Don't demand not-NX if the CPU wouldn't enforce it. */
   10.45 +    if ( !guest_supports_nx(v) )
   10.46 +        f &= ~_PAGE_NX_BIT;
   10.47 +    return f;
   10.48 +}
   10.49 +
   10.50 +/* Read, check and modify a guest pagetable entry.  Returns 0 if the
   10.51 + * flags are OK.  Although we use l1e types here, the logic and the bits
   10.52 + * are the same for all types except PAE l3es. */
   10.53 +static int guest_walk_entry(struct vcpu *v, mfn_t gmfn, 
   10.54 +                            void *gp, void *wp,
   10.55 +                            uint32_t flags, int level)
   10.56 +{
   10.57 +    guest_l1e_t e, old_e;
   10.58 +    uint32_t gflags;
   10.59 +    int rc;
   10.60 +
   10.61 +    /* Read the guest entry */
   10.62 +    e = *(guest_l1e_t *)gp;
   10.63 +
   10.64 +    /* Check that all the mandatory flag bits are there.  Invert NX, to
   10.65 +     * calculate as if there were an "X" bit that allowed access. */
   10.66 +    gflags = guest_l1e_get_flags(e) ^ _PAGE_NX_BIT;
   10.67 +    rc = ((gflags & flags) != flags);
   10.68 +    
   10.69 +    /* Set the accessed/dirty bits */
   10.70 +    if ( rc == 0 ) 
   10.71 +    {
   10.72 +        uint32_t bits = _PAGE_ACCESSED;
   10.73 +        if ( (flags & _PAGE_RW) // Implies that the action is a write
   10.74 +             && ((level == 1) || ((level == 2) && (gflags & _PAGE_PSE))) )
   10.75 +            bits |= _PAGE_DIRTY;
   10.76 +        old_e = e;
   10.77 +        e.l1 |= bits;
   10.78 +        SHADOW_PRINTK("flags %lx bits %lx old_e %llx e %llx\n",
   10.79 +                      (unsigned long) flags, 
   10.80 +                      (unsigned long) bits, 
   10.81 +                      (unsigned long long) old_e.l1, 
   10.82 +                      (unsigned long long) e.l1);
   10.83 +        /* Try to write the entry back.  If it's changed under out feet 
   10.84 +         * then leave it alone */
   10.85 +        if ( e.l1 != old_e.l1 )
   10.86 +        {
   10.87 +            (void) cmpxchg(((guest_intpte_t *)gp), old_e.l1, e.l1);
   10.88 +            paging_mark_dirty(v->domain, mfn_x(gmfn));
   10.89 +        }
   10.90 +    }
   10.91 +
   10.92 +    /* Record the entry in the walk */
   10.93 +    *(guest_l1e_t *)wp = e;
   10.94 +    return rc;
   10.95 +}
   10.96 +
   10.97 +/* Walk the guest pagetables, after the manner of a hardware walker. 
   10.98 + *
   10.99 + * Inputs: a vcpu, a virtual address, a walk_t to fill, a 
  10.100 + *         pointer to a pagefault code, and a flag "shadow_op".
  10.101 + * 
  10.102 + * We walk the vcpu's guest pagetables, filling the walk_t with what we
  10.103 + * see and adding any Accessed and Dirty bits that are needed in the
  10.104 + * guest entries.  Using the pagefault code, we check the permissions as
  10.105 + * we go.  For the purposes of reading pagetables we treat all non-RAM
  10.106 + * memory as contining zeroes.
  10.107 + * 
  10.108 + * If "shadow_op" is non-zero, we are serving a genuine guest memory access, 
  10.109   * and must (a) be under the shadow lock, and (b) remove write access
  10.110 - * from any gueat PT pages we see, as we will be using their contents to 
  10.111 - * perform shadow updates.
  10.112 - * Returns 0 for success or non-zero if the guest pagetables are malformed.
  10.113 - * N.B. Finding a not-present entry does not cause a non-zero return code. */
  10.114 -static inline int 
  10.115 -guest_walk_tables(struct vcpu *v, unsigned long va, walk_t *gw, int guest_op)
  10.116 + * from any guest PT pages we see, as we will be shadowing them soon
  10.117 + * and will rely on the contents' not having changed.
  10.118 + * 
  10.119 + * Returns 0 for success or non-zero if the walk did not complete.
  10.120 + * N.B. This is different from the old return code but almost no callers
  10.121 + * checked the old return code anyway.
  10.122 + */
  10.123 +static int 
  10.124 +guest_walk_tables(struct vcpu *v, unsigned long va, walk_t *gw, 
  10.125 +                  uint32_t pfec, int shadow_op)
  10.126  {
  10.127      struct domain *d = v->domain;
  10.128      p2m_type_t p2mt;
  10.129 -    ASSERT(!guest_op || shadow_locked_by_me(d));
  10.130 +    guest_l1e_t *l1p;
  10.131 +#if GUEST_PAGING_LEVELS >= 3 /* PAE or 64... */
  10.132 +    guest_l1e_t *l2p;
  10.133 +#if GUEST_PAGING_LEVELS >= 4 /* 64-bit only... */
  10.134 +    guest_l1e_t *l3p;
  10.135 +#endif    
  10.136 +#endif
  10.137 +    uint32_t flags = mandatory_flags(v, pfec);
  10.138 +    int rc;
  10.139 +
  10.140 +    ASSERT(!shadow_op || shadow_locked_by_me(d));
  10.141      
  10.142      perfc_incr(shadow_guest_walk);
  10.143      memset(gw, 0, sizeof(*gw));
  10.144 @@ -220,84 +317,104 @@ guest_walk_tables(struct vcpu *v, unsign
  10.145  
  10.146  #if GUEST_PAGING_LEVELS >= 3 /* PAE or 64... */
  10.147  #if GUEST_PAGING_LEVELS >= 4 /* 64-bit only... */
  10.148 -    /* Get l4e from the top level table */
  10.149 +    /* Get the l4e from the top level table and check its flags*/
  10.150      gw->l4mfn = pagetable_get_mfn(v->arch.guest_table);
  10.151 -    gw->l4e = (guest_l4e_t *)v->arch.paging.shadow.guest_vtable 
  10.152 -        + guest_l4_table_offset(va);
  10.153 -    /* Walk down to the l3e */
  10.154 -    if ( !(guest_l4e_get_flags(*gw->l4e) & _PAGE_PRESENT) ) return 0;
  10.155 -    gw->l3mfn = gfn_to_mfn(d, guest_l4e_get_gfn(*gw->l4e), &p2mt);
  10.156 +    rc = guest_walk_entry(v, gw->l4mfn,
  10.157 +                          (guest_l4e_t *)v->arch.paging.shadow.guest_vtable
  10.158 +                          + guest_l4_table_offset(va),
  10.159 +                          &gw->l4e, flags, 4);
  10.160 +    if ( rc != 0 ) return rc;
  10.161 +
  10.162 +    /* Map the l3 table */
  10.163 +    gw->l3mfn = gfn_to_mfn(d, guest_l4e_get_gfn(gw->l4e), &p2mt);
  10.164      if ( !p2m_is_ram(p2mt) ) return 1;
  10.165      ASSERT(mfn_valid(gw->l3mfn));
  10.166      /* This mfn is a pagetable: make sure the guest can't write to it. */
  10.167 -    if ( guest_op && sh_remove_write_access(v, gw->l3mfn, 3, va) != 0 )
  10.168 +    if ( shadow_op && sh_remove_write_access(v, gw->l3mfn, 3, va) != 0 )
  10.169          flush_tlb_mask(d->domain_dirty_cpumask); 
  10.170 -    gw->l3e = ((guest_l3e_t *)sh_map_domain_page(gw->l3mfn))
  10.171 -        + guest_l3_table_offset(va);
  10.172 +    /* Get the l3e and check its flags*/
  10.173 +    l3p = sh_map_domain_page(gw->l3mfn);
  10.174 +    rc = guest_walk_entry(v, gw->l3mfn, l3p + guest_l3_table_offset(va), 
  10.175 +                          &gw->l3e, flags, 3);
  10.176 +    sh_unmap_domain_page(l3p);
  10.177 +    if ( rc != 0 ) return rc;
  10.178 +
  10.179  #else /* PAE only... */
  10.180 -    /* Get l3e from the cache of the guest's top level table */
  10.181 -    gw->l3e = (guest_l3e_t *)&v->arch.paging.shadow.gl3e[guest_l3_table_offset(va)];
  10.182 +
  10.183 +    /* Get l3e from the cache of the top level table and check its flag */
  10.184 +    gw->l3e = v->arch.paging.shadow.gl3e[guest_l3_table_offset(va)];
  10.185 +    if ( !(guest_l3e_get_flags(gw->l3e) & _PAGE_PRESENT) ) return 1;
  10.186 +
  10.187  #endif /* PAE or 64... */
  10.188 -    /* Walk down to the l2e */
  10.189 -    if ( !(guest_l3e_get_flags(*gw->l3e) & _PAGE_PRESENT) ) return 0;
  10.190 -    gw->l2mfn = gfn_to_mfn(d, guest_l3e_get_gfn(*gw->l3e), &p2mt);
  10.191 +
  10.192 +    /* Map the l2 table */
  10.193 +    gw->l2mfn = gfn_to_mfn(d, guest_l3e_get_gfn(gw->l3e), &p2mt);
  10.194      if ( !p2m_is_ram(p2mt) ) return 1;
  10.195      ASSERT(mfn_valid(gw->l2mfn));
  10.196      /* This mfn is a pagetable: make sure the guest can't write to it. */
  10.197 -    if ( guest_op && sh_remove_write_access(v, gw->l2mfn, 2, va) != 0 )
  10.198 +    if ( shadow_op && sh_remove_write_access(v, gw->l2mfn, 2, va) != 0 )
  10.199          flush_tlb_mask(d->domain_dirty_cpumask); 
  10.200 -    gw->l2e = ((guest_l2e_t *)sh_map_domain_page(gw->l2mfn))
  10.201 -        + guest_l2_table_offset(va);
  10.202 +    /* Get the l2e */
  10.203 +    l2p = sh_map_domain_page(gw->l2mfn);
  10.204 +    rc = guest_walk_entry(v, gw->l2mfn, l2p + guest_l2_table_offset(va),
  10.205 +                          &gw->l2e, flags, 2);
  10.206 +    sh_unmap_domain_page(l2p);
  10.207 +    if ( rc != 0 ) return rc;
  10.208 +
  10.209  #else /* 32-bit only... */
  10.210 -    /* Get l2e from the top level table */
  10.211 +
  10.212 +    /* Get l2e from the top level table and check its flags */
  10.213      gw->l2mfn = pagetable_get_mfn(v->arch.guest_table);
  10.214 -    gw->l2e = (guest_l2e_t *)v->arch.paging.shadow.guest_vtable 
  10.215 -        + guest_l2_table_offset(va);
  10.216 +    rc = guest_walk_entry(v, gw->l2mfn, 
  10.217 +                          (guest_l2e_t *)v->arch.paging.shadow.guest_vtable
  10.218 +                          + guest_l2_table_offset(va),
  10.219 +                          &gw->l2e, flags, 2);
  10.220 +    if ( rc != 0 ) return rc;
  10.221 +
  10.222  #endif /* All levels... */
  10.223 -    
  10.224 -    if ( !(guest_l2e_get_flags(*gw->l2e) & _PAGE_PRESENT) ) return 0;
  10.225 +
  10.226      if ( guest_supports_superpages(v) &&
  10.227 -         (guest_l2e_get_flags(*gw->l2e) & _PAGE_PSE) ) 
  10.228 +         (guest_l2e_get_flags(gw->l2e) & _PAGE_PSE) ) 
  10.229      {
  10.230          /* Special case: this guest VA is in a PSE superpage, so there's
  10.231           * no guest l1e.  We make one up so that the propagation code
  10.232           * can generate a shadow l1 table.  Start with the gfn of the 
  10.233           * first 4k-page of the superpage. */
  10.234 -        gfn_t start = guest_l2e_get_gfn(*gw->l2e);
  10.235 +        gfn_t start = guest_l2e_get_gfn(gw->l2e);
  10.236          /* Grant full access in the l1e, since all the guest entry's 
  10.237 -         * access controls are enforced in the shadow l2e.  This lets 
  10.238 -         * us reflect l2 changes later without touching the l1s. */
  10.239 +         * access controls are enforced in the shadow l2e. */
  10.240          int flags = (_PAGE_PRESENT|_PAGE_USER|_PAGE_RW|
  10.241                       _PAGE_ACCESSED|_PAGE_DIRTY);
  10.242 -        /* propagate PWT PCD to level 1 for PSE */
  10.243 -        if ( (guest_l2e_get_flags(*gw->l2e) & _PAGE_PWT) )
  10.244 -            flags |= _PAGE_PWT;
  10.245 -        if ( (guest_l2e_get_flags(*gw->l2e) & _PAGE_PCD) )
  10.246 -            flags |= _PAGE_PCD;
  10.247          /* PSE level 2 entries use bit 12 for PAT; propagate it to bit 7
  10.248 -         * of the level 1 */
  10.249 -        if ( (guest_l2e_get_flags(*gw->l2e) & _PAGE_PSE_PAT) ) 
  10.250 -            flags |= _PAGE_PAT; 
  10.251 +         * of the level 1. */
  10.252 +        if ( (guest_l2e_get_flags(gw->l2e) & _PAGE_PSE_PAT) ) 
  10.253 +            flags |= _PAGE_PAT;
  10.254 +        /* Copy the cache-control bits to the l1 as well, because we
  10.255 +         * can't represent PAT in the (non-PSE) shadow l2e. :(
  10.256 +         * This could cause problems if a guest ever maps an area of
  10.257 +         * memory with superpages using more than one caching mode. */
  10.258 +        flags |= guest_l2e_get_flags(gw->l2e) & (_PAGE_PWT|_PAGE_PCD);
  10.259          /* Increment the pfn by the right number of 4k pages.  
  10.260           * The ~0x1 is to mask out the PAT bit mentioned above. */
  10.261          start = _gfn((gfn_x(start) & ~0x1) + guest_l1_table_offset(va));
  10.262 -        gw->eff_l1e = guest_l1e_from_gfn(start, flags);
  10.263 -        gw->l1e = NULL;
  10.264 +        gw->l1e = guest_l1e_from_gfn(start, flags);
  10.265          gw->l1mfn = _mfn(INVALID_MFN);
  10.266      } 
  10.267      else 
  10.268      {
  10.269          /* Not a superpage: carry on and find the l1e. */
  10.270 -        gw->l1mfn = gfn_to_mfn(d, guest_l2e_get_gfn(*gw->l2e), &p2mt);
  10.271 +        gw->l1mfn = gfn_to_mfn(d, guest_l2e_get_gfn(gw->l2e), &p2mt);
  10.272          if ( !p2m_is_ram(p2mt) ) return 1;
  10.273          ASSERT(mfn_valid(gw->l1mfn));
  10.274          /* This mfn is a pagetable: make sure the guest can't write to it. */
  10.275 -        if ( guest_op 
  10.276 +        if ( shadow_op 
  10.277               && sh_remove_write_access(v, gw->l1mfn, 1, va) != 0 )
  10.278              flush_tlb_mask(d->domain_dirty_cpumask); 
  10.279 -        gw->l1e = ((guest_l1e_t *)sh_map_domain_page(gw->l1mfn))
  10.280 -            + guest_l1_table_offset(va);
  10.281 -        gw->eff_l1e = *gw->l1e;
  10.282 +        l1p = sh_map_domain_page(gw->l1mfn);
  10.283 +        rc = guest_walk_entry(v, gw->l2mfn, l1p + guest_l1_table_offset(va),
  10.284 +                              &gw->l1e, flags, 1);
  10.285 +        sh_unmap_domain_page(l1p);
  10.286 +        if ( rc != 0 ) return rc;
  10.287      }
  10.288  
  10.289      return 0;
  10.290 @@ -308,9 +425,9 @@ guest_walk_tables(struct vcpu *v, unsign
  10.291  static inline gfn_t
  10.292  guest_walk_to_gfn(walk_t *gw)
  10.293  {
  10.294 -    if ( !(guest_l1e_get_flags(gw->eff_l1e) & _PAGE_PRESENT) )
  10.295 +    if ( !(guest_l1e_get_flags(gw->l1e) & _PAGE_PRESENT) )
  10.296          return _gfn(INVALID_GFN);
  10.297 -    return guest_l1e_get_gfn(gw->eff_l1e);
  10.298 +    return guest_l1e_get_gfn(gw->l1e);
  10.299  }
  10.300  
  10.301  /* Given a walk_t, translate the gw->va into the guest's notion of the
  10.302 @@ -318,29 +435,12 @@ guest_walk_to_gfn(walk_t *gw)
  10.303  static inline paddr_t
  10.304  guest_walk_to_gpa(walk_t *gw)
  10.305  {
  10.306 -    if ( !(guest_l1e_get_flags(gw->eff_l1e) & _PAGE_PRESENT) )
  10.307 +    if ( !(guest_l1e_get_flags(gw->l1e) & _PAGE_PRESENT) )
  10.308          return 0;
  10.309 -    return guest_l1e_get_paddr(gw->eff_l1e) + (gw->va & ~PAGE_MASK);
  10.310 +    return guest_l1e_get_paddr(gw->l1e) + (gw->va & ~PAGE_MASK);
  10.311  }
  10.312  
  10.313 -
  10.314 -/* Unmap (and reinitialise) a guest walk.  
  10.315 - * Call this to dispose of any walk filled in by guest_walk_tables() */
  10.316 -static void unmap_walk(struct vcpu *v, walk_t *gw)
  10.317 -{
  10.318 -#if GUEST_PAGING_LEVELS >= 3
  10.319 -#if GUEST_PAGING_LEVELS >= 4
  10.320 -    if ( gw->l3e != NULL ) sh_unmap_domain_page(gw->l3e);
  10.321 -#endif
  10.322 -    if ( gw->l2e != NULL ) sh_unmap_domain_page(gw->l2e);
  10.323 -#endif
  10.324 -    if ( gw->l1e != NULL ) sh_unmap_domain_page(gw->l1e);
  10.325 -#ifdef DEBUG
  10.326 -    memset(gw, 0, sizeof(*gw));
  10.327 -#endif
  10.328 -}
  10.329 -
  10.330 -
  10.331 +#if 0 /* Keep for debugging */
  10.332  /* Pretty-print the contents of a guest-walk */
  10.333  static inline void print_gw(walk_t *gw)
  10.334  {
  10.335 @@ -348,26 +448,17 @@ static inline void print_gw(walk_t *gw)
  10.336  #if GUEST_PAGING_LEVELS >= 3 /* PAE or 64... */
  10.337  #if GUEST_PAGING_LEVELS >= 4 /* 64-bit only... */
  10.338      SHADOW_PRINTK("   l4mfn=%" PRI_mfn "\n", mfn_x(gw->l4mfn));
  10.339 -    SHADOW_PRINTK("   l4e=%p\n", gw->l4e);
  10.340 -    if ( gw->l4e )
  10.341 -        SHADOW_PRINTK("   *l4e=%" SH_PRI_gpte "\n", gw->l4e->l4);
  10.342 +    SHADOW_PRINTK("   l4e=%" SH_PRI_gpte "\n", gw->l4e.l4);
  10.343      SHADOW_PRINTK("   l3mfn=%" PRI_mfn "\n", mfn_x(gw->l3mfn));
  10.344  #endif /* PAE or 64... */
  10.345 -    SHADOW_PRINTK("   l3e=%p\n", gw->l3e);
  10.346 -    if ( gw->l3e )
  10.347 -        SHADOW_PRINTK("   *l3e=%" SH_PRI_gpte "\n", gw->l3e->l3);
  10.348 +    SHADOW_PRINTK("   l3e=%" SH_PRI_gpte "\n", gw->l3e.l3);
  10.349  #endif /* All levels... */
  10.350      SHADOW_PRINTK("   l2mfn=%" PRI_mfn "\n", mfn_x(gw->l2mfn));
  10.351 -    SHADOW_PRINTK("   l2e=%p\n", gw->l2e);
  10.352 -    if ( gw->l2e )
  10.353 -        SHADOW_PRINTK("   *l2e=%" SH_PRI_gpte "\n", gw->l2e->l2);
  10.354 +    SHADOW_PRINTK("   l2e=%" SH_PRI_gpte "\n", gw->l2e.l2);
  10.355      SHADOW_PRINTK("   l1mfn=%" PRI_mfn "\n", mfn_x(gw->l1mfn));
  10.356 -    SHADOW_PRINTK("   l1e=%p\n", gw->l1e);
  10.357 -    if ( gw->l1e )
  10.358 -        SHADOW_PRINTK("   *l1e=%" SH_PRI_gpte "\n", gw->l1e->l1);
  10.359 -    SHADOW_PRINTK("   eff_l1e=%" SH_PRI_gpte "\n", gw->eff_l1e.l1);
  10.360 +    SHADOW_PRINTK("   l1e=%" SH_PRI_gpte "\n", gw->l1e.l1);
  10.361  }
  10.362 -
  10.363 +#endif /* 0 */
  10.364  
  10.365  #if SHADOW_AUDIT & SHADOW_AUDIT_ENTRIES
  10.366  /* Lightweight audit: pass all the shadows associated with this guest walk
  10.367 @@ -404,10 +495,10 @@ static void sh_audit_gw(struct vcpu *v, 
  10.368           && mfn_valid((smfn = get_shadow_status(v, gw->l1mfn, 
  10.369                                                  SH_type_l1_shadow))) )
  10.370          (void) sh_audit_l1_table(v, smfn, _mfn(INVALID_MFN));
  10.371 -    else if ( gw->l2e
  10.372 -              && (guest_l2e_get_flags(*gw->l2e) & _PAGE_PSE)
  10.373 +    else if ( (guest_l2e_get_flags(gw->l2e) & _PAGE_PRESENT)
  10.374 +              && (guest_l2e_get_flags(gw->l2e) & _PAGE_PSE)
  10.375                && mfn_valid( 
  10.376 -              (smfn = get_fl1_shadow_status(v, guest_l2e_get_gfn(*gw->l2e)))) )
  10.377 +              (smfn = get_fl1_shadow_status(v, guest_l2e_get_gfn(gw->l2e)))) )
  10.378          (void) sh_audit_fl1_table(v, smfn, _mfn(INVALID_MFN));
  10.379  }
  10.380  
  10.381 @@ -416,85 +507,6 @@ static void sh_audit_gw(struct vcpu *v, 
  10.382  #endif /* audit code */
  10.383  
  10.384  
  10.385 -
  10.386 -/**************************************************************************/
  10.387 -/* Function to write to the guest tables, for propagating accessed and 
  10.388 - * dirty bits from the shadow to the guest.
  10.389 - * Takes a guest mfn, a pointer to the guest entry, the level of pagetable,
  10.390 - * and an operation type.  The guest entry is always passed as an l1e: 
  10.391 - * since we only ever write flags, that's OK.
  10.392 - * Returns the new flag bits of the guest entry. */
  10.393 -
  10.394 -static u32 guest_set_ad_bits(struct vcpu *v,
  10.395 -                             mfn_t gmfn, 
  10.396 -                             guest_l1e_t *ep,
  10.397 -                             unsigned int level, 
  10.398 -                             fetch_type_t ft)
  10.399 -{
  10.400 -    u32 flags;
  10.401 -    int res = 0;
  10.402 -
  10.403 -    ASSERT(ep && !(((unsigned long)ep) & ((sizeof *ep) - 1)));
  10.404 -    ASSERT(level <= GUEST_PAGING_LEVELS);
  10.405 -    ASSERT(shadow_locked_by_me(v->domain));
  10.406 -
  10.407 -    flags = guest_l1e_get_flags(*ep);
  10.408 -
  10.409 -    /* Only set A and D bits for guest-initiated accesses */
  10.410 -    if ( !(ft & FETCH_TYPE_DEMAND) )
  10.411 -        return flags;
  10.412 -
  10.413 -    ASSERT(mfn_valid(gmfn)
  10.414 -           && (sh_mfn_is_a_page_table(gmfn)
  10.415 -               || ((mfn_to_page(gmfn)->u.inuse.type_info & PGT_count_mask) 
  10.416 -                   == 0)));
  10.417 -
  10.418 -    /* PAE l3s do not have A and D bits */
  10.419 -    ASSERT(GUEST_PAGING_LEVELS > 3 || level != 3);
  10.420 -
  10.421 -    /* Need the D bit as well for writes, in L1es and PSE L2es. */
  10.422 -    if ( ft == ft_demand_write  
  10.423 -         && (level == 1 ||
  10.424 -             (level == 2 && (flags & _PAGE_PSE) && guest_supports_superpages(v))) )
  10.425 -    {
  10.426 -        if ( (flags & (_PAGE_DIRTY | _PAGE_ACCESSED)) 
  10.427 -             == (_PAGE_DIRTY | _PAGE_ACCESSED) )
  10.428 -            return flags;  /* Guest already has A and D bits set */
  10.429 -        flags |= _PAGE_DIRTY | _PAGE_ACCESSED;
  10.430 -        perfc_incr(shadow_ad_update);
  10.431 -    }
  10.432 -    else 
  10.433 -    {
  10.434 -        if ( flags & _PAGE_ACCESSED )
  10.435 -            return flags;  /* Guest already has A bit set */
  10.436 -        flags |= _PAGE_ACCESSED;
  10.437 -        perfc_incr(shadow_a_update);
  10.438 -    }
  10.439 -
  10.440 -    /* Set the bit(s) */
  10.441 -    paging_mark_dirty(v->domain, mfn_x(gmfn));
  10.442 -    SHADOW_DEBUG(A_AND_D, "gfn = %" SH_PRI_gfn ", "
  10.443 -                 "old flags = %#x, new flags = %#x\n", 
  10.444 -                 gfn_x(guest_l1e_get_gfn(*ep)), guest_l1e_get_flags(*ep), 
  10.445 -                 flags);
  10.446 -    *ep = guest_l1e_from_gfn(guest_l1e_get_gfn(*ep), flags);
  10.447 -    
  10.448 -    /* Propagate this change to any other shadows of the page 
  10.449 -     * (only necessary if there is more than one shadow) */
  10.450 -    if ( mfn_to_page(gmfn)->count_info & PGC_page_table )
  10.451 -    {
  10.452 -        u32 shflags = mfn_to_page(gmfn)->shadow_flags & SHF_page_type_mask;
  10.453 -        /* More than one type bit set in shadow-flags? */
  10.454 -        if ( shflags & ~(1UL << find_first_set_bit(shflags)) )
  10.455 -            res = sh_validate_guest_entry(v, gmfn, ep, sizeof (*ep));
  10.456 -    }
  10.457 -
  10.458 -    /* We should never need to flush the TLB or recopy PAE entries */
  10.459 -    ASSERT((res == 0) || (res == SHADOW_SET_CHANGED));
  10.460 -
  10.461 -    return flags;
  10.462 -}
  10.463 -
  10.464  #if (CONFIG_PAGING_LEVELS == GUEST_PAGING_LEVELS) && (CONFIG_PAGING_LEVELS == SHADOW_PAGING_LEVELS)
  10.465  void *
  10.466  sh_guest_map_l1e(struct vcpu *v, unsigned long addr,
  10.467 @@ -509,11 +521,9 @@ sh_guest_map_l1e(struct vcpu *v, unsigne
  10.468      // FIXME!
  10.469  
  10.470      shadow_lock(v->domain);
  10.471 -    guest_walk_tables(v, addr, &gw, 1);
  10.472 -
  10.473 -    if ( gw.l2e &&
  10.474 -         (guest_l2e_get_flags(*gw.l2e) & _PAGE_PRESENT) &&
  10.475 -         !(guest_supports_superpages(v) && (guest_l2e_get_flags(*gw.l2e) & _PAGE_PSE)) )
  10.476 +    guest_walk_tables(v, addr, &gw, 0, 1);
  10.477 +
  10.478 +    if ( mfn_valid(gw.l1mfn) )
  10.479      {
  10.480          if ( gl1mfn )
  10.481              *gl1mfn = mfn_x(gw.l1mfn);
  10.482 @@ -521,7 +531,6 @@ sh_guest_map_l1e(struct vcpu *v, unsigne
  10.483              (guest_l1_table_offset(addr) * sizeof(guest_l1e_t));
  10.484      }
  10.485  
  10.486 -    unmap_walk(v, &gw);
  10.487      shadow_unlock(v->domain);
  10.488  
  10.489      return pl1e;
  10.490 @@ -538,9 +547,8 @@ sh_guest_get_eff_l1e(struct vcpu *v, uns
  10.491      // FIXME!
  10.492  
  10.493      shadow_lock(v->domain);
  10.494 -    guest_walk_tables(v, addr, &gw, 1);
  10.495 -    *(guest_l1e_t *)eff_l1e = gw.eff_l1e;
  10.496 -    unmap_walk(v, &gw);
  10.497 +    guest_walk_tables(v, addr, &gw, 0, 1);
  10.498 +    *(guest_l1e_t *)eff_l1e = gw.l1e;
  10.499      shadow_unlock(v->domain);
  10.500  }
  10.501  #endif /* CONFIG==SHADOW==GUEST */
  10.502 @@ -636,17 +644,17 @@ unsigned char pat_type_2_pte_flags(unsig
  10.503  
  10.504  static always_inline void
  10.505  _sh_propagate(struct vcpu *v, 
  10.506 -              void *guest_entry_ptr, 
  10.507 -              mfn_t guest_table_mfn, 
  10.508 +              guest_intpte_t guest_intpte,
  10.509                mfn_t target_mfn, 
  10.510                void *shadow_entry_ptr,
  10.511                int level,
  10.512                fetch_type_t ft, 
  10.513                p2m_type_t p2mt)
  10.514  {
  10.515 -    guest_l1e_t *gp = guest_entry_ptr;
  10.516 +    guest_l1e_t guest_entry = { guest_intpte };
  10.517      shadow_l1e_t *sp = shadow_entry_ptr;
  10.518      struct domain *d = v->domain;
  10.519 +    gfn_t target_gfn = guest_l1e_get_gfn(guest_entry);
  10.520      u32 pass_thru_flags;
  10.521      u32 gflags, sflags;
  10.522  
  10.523 @@ -660,15 +668,7 @@ static always_inline void
  10.524          goto done;
  10.525      }
  10.526  
  10.527 -    if ( mfn_valid(guest_table_mfn) )
  10.528 -        /* Handle A and D bit propagation into the guest */
  10.529 -        gflags = guest_set_ad_bits(v, guest_table_mfn, gp, level, ft);
  10.530 -    else 
  10.531 -    {
  10.532 -        /* Must be an fl1e or a prefetch */
  10.533 -        ASSERT(level==1 || !(ft & FETCH_TYPE_DEMAND));
  10.534 -        gflags = guest_l1e_get_flags(*gp);
  10.535 -    }
  10.536 +    gflags = guest_l1e_get_flags(guest_entry);
  10.537  
  10.538      if ( unlikely(!(gflags & _PAGE_PRESENT)) )
  10.539      {
  10.540 @@ -684,7 +684,7 @@ static always_inline void
  10.541      if ( level == 1 && p2mt == p2m_mmio_dm )
  10.542      {
  10.543          /* Guest l1e maps emulated MMIO space */
  10.544 -        *sp = sh_l1e_mmio(guest_l1e_get_gfn(*gp), gflags);
  10.545 +        *sp = sh_l1e_mmio(target_gfn, gflags);
  10.546          if ( !d->arch.paging.shadow.has_fast_mmio_entries )
  10.547              d->arch.paging.shadow.has_fast_mmio_entries = 1;
  10.548          goto done;
  10.549 @@ -694,9 +694,6 @@ static always_inline void
  10.550      // case of a prefetch, an invalid mfn means that we can not usefully
  10.551      // shadow anything, and so we return early.
  10.552      //
  10.553 -    /* N.B. For pass-through MMIO, either this test needs to be relaxed,
  10.554 -     * and shadow_set_l1e() trained to handle non-valid MFNs (ugh), or the
  10.555 -     * MMIO areas need to be added to the frame-table to make them "valid". */
  10.556      if ( shadow_mode_refcounts(d) && 
  10.557           !mfn_valid(target_mfn) && (p2mt != p2m_mmio_direct) )
  10.558      {
  10.559 @@ -718,20 +715,22 @@ static always_inline void
  10.560          pass_thru_flags |= _PAGE_PAT | _PAGE_PCD | _PAGE_PWT;
  10.561      sflags = gflags & pass_thru_flags;
  10.562  
  10.563 -    /* Only change memory caching type for pass-through domain */
  10.564 +    /*
  10.565 +     * For HVM domains with direct access to MMIO areas, set the correct
  10.566 +     * caching attributes in the shadows to match what was asked for
  10.567 +     */
  10.568      if ( (level == 1) && is_hvm_domain(d) &&
  10.569           !list_empty(&(domain_hvm_iommu(d)->pdev_list)) )
  10.570      {
  10.571          unsigned int type;
  10.572 -        if ( hvm_get_mem_pinned_cacheattr(d, gfn_x(guest_l1e_get_gfn(*gp)),
  10.573 -                                          &type) )
  10.574 +        if ( hvm_get_mem_pinned_cacheattr(d, gfn_x(target_gfn), &type) )
  10.575              sflags |= pat_type_2_pte_flags(type);
  10.576 -        else if ( v->domain->arch.hvm_domain.is_in_uc_mode )
  10.577 +        else if ( d->arch.hvm_domain.is_in_uc_mode )
  10.578              sflags |= pat_type_2_pte_flags(PAT_TYPE_UNCACHABLE);
  10.579          else
  10.580              sflags |= get_pat_flags(v,
  10.581                                      gflags,
  10.582 -                                    guest_l1e_get_paddr(*gp),
  10.583 +                                    gfn_to_paddr(target_gfn),
  10.584                                      mfn_x(target_mfn) << PAGE_SHIFT);
  10.585      }
  10.586  
  10.587 @@ -813,59 +812,55 @@ static always_inline void
  10.588   done:
  10.589      SHADOW_DEBUG(PROPAGATE,
  10.590                   "%s level %u guest %" SH_PRI_gpte " shadow %" SH_PRI_pte "\n",
  10.591 -                 fetch_type_names[ft], level, gp->l1, sp->l1);
  10.592 +                 fetch_type_names[ft], level, guest_entry.l1, sp->l1);
  10.593  }
  10.594  
  10.595  
  10.596 -/* These four wrappers give us a little bit of type-safety back around the 
  10.597 - * use of void-* pointers in _sh_propagate(), and allow the compiler to 
  10.598 - * optimize out some level checks. */
  10.599 +/* These four wrappers give us a little bit of type-safety back around
  10.600 + * the use of void-* pointers and intpte types in _sh_propagate(), and
  10.601 + * allow the compiler to optimize out some level checks. */
  10.602  
  10.603  #if GUEST_PAGING_LEVELS >= 4
  10.604  static void
  10.605  l4e_propagate_from_guest(struct vcpu *v, 
  10.606 -                         guest_l4e_t *gl4e,
  10.607 -                         mfn_t gl4mfn,
  10.608 +                         guest_l4e_t gl4e,
  10.609                           mfn_t sl3mfn,
  10.610                           shadow_l4e_t *sl4e,
  10.611                           fetch_type_t ft)
  10.612  {
  10.613 -    _sh_propagate(v, gl4e, gl4mfn, sl3mfn, sl4e, 4, ft, p2m_ram_rw);
  10.614 +    _sh_propagate(v, gl4e.l4, sl3mfn, sl4e, 4, ft, p2m_ram_rw);
  10.615  }
  10.616  
  10.617  static void
  10.618  l3e_propagate_from_guest(struct vcpu *v,
  10.619 -                         guest_l3e_t *gl3e,
  10.620 -                         mfn_t gl3mfn, 
  10.621 +                         guest_l3e_t gl3e,
  10.622                           mfn_t sl2mfn, 
  10.623                           shadow_l3e_t *sl3e,
  10.624                           fetch_type_t ft)
  10.625  {
  10.626 -    _sh_propagate(v, gl3e, gl3mfn, sl2mfn, sl3e, 3, ft, p2m_ram_rw);
  10.627 +    _sh_propagate(v, gl3e.l3, sl2mfn, sl3e, 3, ft, p2m_ram_rw);
  10.628  }
  10.629  #endif // GUEST_PAGING_LEVELS >= 4
  10.630  
  10.631  static void
  10.632  l2e_propagate_from_guest(struct vcpu *v, 
  10.633 -                         guest_l2e_t *gl2e,
  10.634 -                         mfn_t gl2mfn,
  10.635 +                         guest_l2e_t gl2e,
  10.636                           mfn_t sl1mfn,
  10.637                           shadow_l2e_t *sl2e,
  10.638                           fetch_type_t ft)
  10.639  {
  10.640 -    _sh_propagate(v, gl2e, gl2mfn, sl1mfn, sl2e, 2, ft, p2m_ram_rw);
  10.641 +    _sh_propagate(v, gl2e.l2, sl1mfn, sl2e, 2, ft, p2m_ram_rw);
  10.642  }
  10.643  
  10.644  static void
  10.645  l1e_propagate_from_guest(struct vcpu *v, 
  10.646 -                         guest_l1e_t *gl1e,
  10.647 -                         mfn_t gl1mfn,
  10.648 +                         guest_l1e_t gl1e,
  10.649                           mfn_t gmfn, 
  10.650                           shadow_l1e_t *sl1e,
  10.651                           fetch_type_t ft, 
  10.652                           p2m_type_t p2mt)
  10.653  {
  10.654 -    _sh_propagate(v, gl1e, gl1mfn, gmfn, sl1e, 1, ft, p2mt);
  10.655 +    _sh_propagate(v, gl1e.l1, gmfn, sl1e, 1, ft, p2mt);
  10.656  }
  10.657  
  10.658  
  10.659 @@ -1859,8 +1854,7 @@ static shadow_l3e_t * shadow_get_and_cre
  10.660              *sl3mfn = sh_make_shadow(v, gw->l3mfn, SH_type_l3_shadow);
  10.661          }
  10.662          /* Install the new sl3 table in the sl4e */
  10.663 -        l4e_propagate_from_guest(v, gw->l4e, gw->l4mfn, 
  10.664 -                                 *sl3mfn, &new_sl4e, ft);
  10.665 +        l4e_propagate_from_guest(v, gw->l4e, *sl3mfn, &new_sl4e, ft);
  10.666          r = shadow_set_l4e(v, sl4e, new_sl4e, sl4mfn);
  10.667          ASSERT((r & SHADOW_SET_FLUSH) == 0);
  10.668          if ( r & SHADOW_SET_ERROR )
  10.669 @@ -1909,8 +1903,7 @@ static shadow_l2e_t * shadow_get_and_cre
  10.670              *sl2mfn = sh_make_shadow(v, gw->l2mfn, t);
  10.671          }
  10.672          /* Install the new sl2 table in the sl3e */
  10.673 -        l3e_propagate_from_guest(v, gw->l3e, gw->l3mfn, 
  10.674 -                                 *sl2mfn, &new_sl3e, ft);
  10.675 +        l3e_propagate_from_guest(v, gw->l3e, *sl2mfn, &new_sl3e, ft);
  10.676          r = shadow_set_l3e(v, sl3e, new_sl3e, sl3mfn);
  10.677          ASSERT((r & SHADOW_SET_FLUSH) == 0);
  10.678          if ( r & SHADOW_SET_ERROR )
  10.679 @@ -1934,7 +1927,7 @@ static shadow_l2e_t * shadow_get_and_cre
  10.680      /* This next line is important: the guest l2 has a 16k
  10.681       * shadow, we need to return the right mfn of the four. This
  10.682       * call will set it for us as a side-effect. */
  10.683 -    (void) shadow_l2_index(sl2mfn, guest_index(gw->l2e));
  10.684 +    (void) shadow_l2_index(sl2mfn, guest_l2_table_offset(gw->va));
  10.685      /* Reading the top level table is always valid. */
  10.686      return sh_linear_l2_table(v) + shadow_l2_linear_offset(gw->va);
  10.687  #endif 
  10.688 @@ -1956,8 +1949,8 @@ static shadow_l1e_t * shadow_get_and_cre
  10.689       * re-do it to fix a PSE dirty bit. */
  10.690      if ( shadow_l2e_get_flags(*sl2e) & _PAGE_PRESENT 
  10.691           && likely(ft != ft_demand_write
  10.692 -                   || (guest_l2e_get_flags(*gw->l2e) & _PAGE_DIRTY) 
  10.693 -                   || !(guest_l2e_get_flags(*gw->l2e) & _PAGE_PSE)) )
  10.694 +                   || (shadow_l2e_get_flags(*sl2e) & _PAGE_RW) 
  10.695 +                   || !(guest_l2e_get_flags(gw->l2e) & _PAGE_PSE)) )
  10.696      {
  10.697          *sl1mfn = shadow_l2e_get_mfn(*sl2e);
  10.698          ASSERT(mfn_valid(*sl1mfn));
  10.699 @@ -1965,14 +1958,14 @@ static shadow_l1e_t * shadow_get_and_cre
  10.700      else 
  10.701      {
  10.702          shadow_l2e_t new_sl2e;
  10.703 -        int r, flags = guest_l2e_get_flags(*gw->l2e);
  10.704 +        int r, flags = guest_l2e_get_flags(gw->l2e);
  10.705          /* No l1 shadow installed: find and install it. */
  10.706          if ( !(flags & _PAGE_PRESENT) )
  10.707              return NULL; /* No guest page. */
  10.708          if ( guest_supports_superpages(v) && (flags & _PAGE_PSE) ) 
  10.709          {
  10.710              /* Splintering a superpage */
  10.711 -            gfn_t l2gfn = guest_l2e_get_gfn(*gw->l2e);
  10.712 +            gfn_t l2gfn = guest_l2e_get_gfn(gw->l2e);
  10.713              *sl1mfn = get_fl1_shadow_status(v, l2gfn);
  10.714              if ( !mfn_valid(*sl1mfn) ) 
  10.715              {
  10.716 @@ -1992,8 +1985,7 @@ static shadow_l1e_t * shadow_get_and_cre
  10.717              }
  10.718          }
  10.719          /* Install the new sl1 table in the sl2e */
  10.720 -        l2e_propagate_from_guest(v, gw->l2e, gw->l2mfn, 
  10.721 -                                 *sl1mfn, &new_sl2e, ft);
  10.722 +        l2e_propagate_from_guest(v, gw->l2e, *sl1mfn, &new_sl2e, ft);
  10.723          r = shadow_set_l2e(v, sl2e, new_sl2e, sl2mfn);
  10.724          ASSERT((r & SHADOW_SET_FLUSH) == 0);        
  10.725          if ( r & SHADOW_SET_ERROR )
  10.726 @@ -2247,7 +2239,7 @@ void sh_unhook_64b_mappings(struct vcpu 
  10.727  static int validate_gl4e(struct vcpu *v, void *new_ge, mfn_t sl4mfn, void *se)
  10.728  {
  10.729      shadow_l4e_t new_sl4e;
  10.730 -    guest_l4e_t *new_gl4e = new_ge;
  10.731 +    guest_l4e_t new_gl4e = *(guest_l4e_t *)new_ge;
  10.732      shadow_l4e_t *sl4p = se;
  10.733      mfn_t sl3mfn = _mfn(INVALID_MFN);
  10.734      struct domain *d = v->domain;
  10.735 @@ -2256,17 +2248,16 @@ static int validate_gl4e(struct vcpu *v,
  10.736  
  10.737      perfc_incr(shadow_validate_gl4e_calls);
  10.738  
  10.739 -    if ( guest_l4e_get_flags(*new_gl4e) & _PAGE_PRESENT )
  10.740 +    if ( guest_l4e_get_flags(new_gl4e) & _PAGE_PRESENT )
  10.741      {
  10.742 -        gfn_t gl3gfn = guest_l4e_get_gfn(*new_gl4e);
  10.743 +        gfn_t gl3gfn = guest_l4e_get_gfn(new_gl4e);
  10.744          mfn_t gl3mfn = gfn_to_mfn(d, gl3gfn, &p2mt);
  10.745          if ( p2m_is_ram(p2mt) )
  10.746              sl3mfn = get_shadow_status(v, gl3mfn, SH_type_l3_shadow);
  10.747          else
  10.748              result |= SHADOW_SET_ERROR;
  10.749      }
  10.750 -    l4e_propagate_from_guest(v, new_gl4e, _mfn(INVALID_MFN),
  10.751 -                             sl3mfn, &new_sl4e, ft_prefetch);
  10.752 +    l4e_propagate_from_guest(v, new_gl4e, sl3mfn, &new_sl4e, ft_prefetch);
  10.753  
  10.754      // check for updates to xen reserved slots
  10.755      if ( !shadow_mode_external(d) )
  10.756 @@ -2301,7 +2292,7 @@ static int validate_gl4e(struct vcpu *v,
  10.757  static int validate_gl3e(struct vcpu *v, void *new_ge, mfn_t sl3mfn, void *se)
  10.758  {
  10.759      shadow_l3e_t new_sl3e;
  10.760 -    guest_l3e_t *new_gl3e = new_ge;
  10.761 +    guest_l3e_t new_gl3e = *(guest_l3e_t *)new_ge;
  10.762      shadow_l3e_t *sl3p = se;
  10.763      mfn_t sl2mfn = _mfn(INVALID_MFN);
  10.764      p2m_type_t p2mt;
  10.765 @@ -2309,17 +2300,16 @@ static int validate_gl3e(struct vcpu *v,
  10.766  
  10.767      perfc_incr(shadow_validate_gl3e_calls);
  10.768  
  10.769 -    if ( guest_l3e_get_flags(*new_gl3e) & _PAGE_PRESENT )
  10.770 +    if ( guest_l3e_get_flags(new_gl3e) & _PAGE_PRESENT )
  10.771      {
  10.772 -        gfn_t gl2gfn = guest_l3e_get_gfn(*new_gl3e);
  10.773 +        gfn_t gl2gfn = guest_l3e_get_gfn(new_gl3e);
  10.774          mfn_t gl2mfn = gfn_to_mfn(v->domain, gl2gfn, &p2mt);
  10.775          if ( p2m_is_ram(p2mt) )
  10.776              sl2mfn = get_shadow_status(v, gl2mfn, SH_type_l2_shadow);
  10.777          else
  10.778              result |= SHADOW_SET_ERROR;
  10.779      }
  10.780 -    l3e_propagate_from_guest(v, new_gl3e, _mfn(INVALID_MFN), 
  10.781 -                             sl2mfn, &new_sl3e, ft_prefetch);
  10.782 +    l3e_propagate_from_guest(v, new_gl3e, sl2mfn, &new_sl3e, ft_prefetch);
  10.783      result |= shadow_set_l3e(v, sl3p, new_sl3e, sl3mfn);
  10.784  
  10.785      return result;
  10.786 @@ -2329,7 +2319,7 @@ static int validate_gl3e(struct vcpu *v,
  10.787  static int validate_gl2e(struct vcpu *v, void *new_ge, mfn_t sl2mfn, void *se)
  10.788  {
  10.789      shadow_l2e_t new_sl2e;
  10.790 -    guest_l2e_t *new_gl2e = new_ge;
  10.791 +    guest_l2e_t new_gl2e = *(guest_l2e_t *)new_ge;
  10.792      shadow_l2e_t *sl2p = se;
  10.793      mfn_t sl1mfn = _mfn(INVALID_MFN);
  10.794      p2m_type_t p2mt;
  10.795 @@ -2337,11 +2327,11 @@ static int validate_gl2e(struct vcpu *v,
  10.796  
  10.797      perfc_incr(shadow_validate_gl2e_calls);
  10.798  
  10.799 -    if ( guest_l2e_get_flags(*new_gl2e) & _PAGE_PRESENT )
  10.800 +    if ( guest_l2e_get_flags(new_gl2e) & _PAGE_PRESENT )
  10.801      {
  10.802 -        gfn_t gl1gfn = guest_l2e_get_gfn(*new_gl2e);
  10.803 +        gfn_t gl1gfn = guest_l2e_get_gfn(new_gl2e);
  10.804          if ( guest_supports_superpages(v) &&
  10.805 -             (guest_l2e_get_flags(*new_gl2e) & _PAGE_PSE) )
  10.806 +             (guest_l2e_get_flags(new_gl2e) & _PAGE_PSE) )
  10.807          {
  10.808              // superpage -- need to look up the shadow L1 which holds the
  10.809              // splitters...
  10.810 @@ -2364,8 +2354,7 @@ static int validate_gl2e(struct vcpu *v,
  10.811                  result |= SHADOW_SET_ERROR;
  10.812          }
  10.813      }
  10.814 -    l2e_propagate_from_guest(v, new_gl2e, _mfn(INVALID_MFN),
  10.815 -                             sl1mfn, &new_sl2e, ft_prefetch);
  10.816 +    l2e_propagate_from_guest(v, new_gl2e, sl1mfn, &new_sl2e, ft_prefetch);
  10.817  
  10.818      // check for updates to xen reserved slots in PV guests...
  10.819      // XXX -- need to revisit this for PV 3-on-4 guests.
  10.820 @@ -2415,7 +2404,7 @@ static int validate_gl2e(struct vcpu *v,
  10.821  static int validate_gl1e(struct vcpu *v, void *new_ge, mfn_t sl1mfn, void *se)
  10.822  {
  10.823      shadow_l1e_t new_sl1e;
  10.824 -    guest_l1e_t *new_gl1e = new_ge;
  10.825 +    guest_l1e_t new_gl1e = *(guest_l1e_t *)new_ge;
  10.826      shadow_l1e_t *sl1p = se;
  10.827      gfn_t gfn;
  10.828      mfn_t gmfn;
  10.829 @@ -2424,11 +2413,10 @@ static int validate_gl1e(struct vcpu *v,
  10.830  
  10.831      perfc_incr(shadow_validate_gl1e_calls);
  10.832  
  10.833 -    gfn = guest_l1e_get_gfn(*new_gl1e);
  10.834 +    gfn = guest_l1e_get_gfn(new_gl1e);
  10.835      gmfn = gfn_to_mfn(v->domain, gfn, &p2mt);
  10.836  
  10.837 -    l1e_propagate_from_guest(v, new_gl1e, _mfn(INVALID_MFN), gmfn, &new_sl1e, 
  10.838 -                             ft_prefetch, p2mt);
  10.839 +    l1e_propagate_from_guest(v, new_gl1e, gmfn, &new_sl1e, ft_prefetch, p2mt);
  10.840      
  10.841      result |= shadow_set_l1e(v, sl1p, new_sl1e, sl1mfn);
  10.842      return result;
  10.843 @@ -2615,7 +2603,7 @@ static void sh_prefetch(struct vcpu *v, 
  10.844      int i, dist;
  10.845      gfn_t gfn;
  10.846      mfn_t gmfn;
  10.847 -    guest_l1e_t gl1e;
  10.848 +    guest_l1e_t *gl1p = NULL, gl1e;
  10.849      shadow_l1e_t sl1e;
  10.850      u32 gflags;
  10.851      p2m_type_t p2mt;
  10.852 @@ -2626,16 +2614,23 @@ static void sh_prefetch(struct vcpu *v, 
  10.853      if ( dist > PREFETCH_DISTANCE )
  10.854          dist = PREFETCH_DISTANCE;
  10.855  
  10.856 +    if ( mfn_valid(gw->l1mfn) )
  10.857 +    {
  10.858 +        /* Normal guest page; grab the next guest entry */
  10.859 +        gl1p = sh_map_domain_page(gw->l1mfn);
  10.860 +        gl1p += guest_l1_table_offset(gw->va);
  10.861 +    }
  10.862 +
  10.863      for ( i = 1; i < dist ; i++ ) 
  10.864      {
  10.865          /* No point in prefetching if there's already a shadow */
  10.866          if ( ptr_sl1e[i].l1 != 0 )
  10.867              break;
  10.868  
  10.869 -        if ( gw->l1e )
  10.870 +        if ( mfn_valid(gw->l1mfn) )
  10.871          {
  10.872              /* Normal guest page; grab the next guest entry */
  10.873 -            gl1e = gw->l1e[i];
  10.874 +            gl1e = gl1p[i];
  10.875              /* Not worth continuing if we hit an entry that will need another
  10.876               * fault for A/D-bit propagation anyway */
  10.877              gflags = guest_l1e_get_flags(gl1e);
  10.878 @@ -2647,24 +2642,23 @@ static void sh_prefetch(struct vcpu *v, 
  10.879          else 
  10.880          {
  10.881              /* Fragmented superpage, unless we've been called wrongly */
  10.882 -            ASSERT(guest_l2e_get_flags(*gw->l2e) & _PAGE_PSE);
  10.883 +            ASSERT(guest_l2e_get_flags(gw->l2e) & _PAGE_PSE);
  10.884              /* Increment the l1e's GFN by the right number of guest pages */
  10.885              gl1e = guest_l1e_from_gfn(
  10.886 -                _gfn(gfn_x(guest_l1e_get_gfn(gw->eff_l1e)) + i), 
  10.887 -                guest_l1e_get_flags(gw->eff_l1e));
  10.888 +                _gfn(gfn_x(guest_l1e_get_gfn(gw->l1e)) + i), 
  10.889 +                guest_l1e_get_flags(gw->l1e));
  10.890          }
  10.891  
  10.892          /* Look at the gfn that the l1e is pointing at */
  10.893          gfn = guest_l1e_get_gfn(gl1e);
  10.894          gmfn = gfn_to_mfn(v->domain, gfn, &p2mt);
  10.895  
  10.896 -        /* Propagate the entry.  Safe to use a pointer to our local 
  10.897 -         * gl1e, since this is not a demand-fetch so there will be no 
  10.898 -         * write-back to the guest. */
  10.899 -        l1e_propagate_from_guest(v, &gl1e, _mfn(INVALID_MFN),
  10.900 -                                 gmfn, &sl1e, ft_prefetch, p2mt);
  10.901 +        /* Propagate the entry.  */
  10.902 +        l1e_propagate_from_guest(v, gl1e, gmfn, &sl1e, ft_prefetch, p2mt);
  10.903          (void) shadow_set_l1e(v, ptr_sl1e + i, sl1e, sl1mfn);
  10.904      }
  10.905 +    if ( gl1p != NULL )
  10.906 +        sh_unmap_domain_page(gl1p);
  10.907  }
  10.908  
  10.909  #endif /* SHADOW_OPTIMIZATIONS & SHOPT_PREFETCH */
  10.910 @@ -2684,7 +2678,6 @@ static int sh_page_fault(struct vcpu *v,
  10.911  {
  10.912      struct domain *d = v->domain;
  10.913      walk_t gw;
  10.914 -    u32 accumulated_gflags;
  10.915      gfn_t gfn;
  10.916      mfn_t gmfn, sl1mfn=_mfn(0);
  10.917      shadow_l1e_t sl1e, *ptr_sl1e;
  10.918 @@ -2769,10 +2762,10 @@ static int sh_page_fault(struct vcpu *v,
  10.919      
  10.920      shadow_audit_tables(v);
  10.921                     
  10.922 -    if ( guest_walk_tables(v, va, &gw, 1) != 0 )
  10.923 +    if ( guest_walk_tables(v, va, &gw, regs->error_code, 1) != 0 )
  10.924      {
  10.925 -        SHADOW_PRINTK("malformed guest pagetable\n");
  10.926 -        print_gw(&gw);
  10.927 +        perfc_incr(shadow_fault_bail_real_fault);
  10.928 +        goto not_a_shadow_fault;
  10.929      }
  10.930  
  10.931      /* It's possible that the guest has put pagetables in memory that it has 
  10.932 @@ -2788,64 +2781,12 @@ static int sh_page_fault(struct vcpu *v,
  10.933  
  10.934      sh_audit_gw(v, &gw);
  10.935  
  10.936 -    // We do not look at the gw->l1e, as that will not exist for superpages.
  10.937 -    // Instead, we use the gw->eff_l1e...
  10.938 -    //
  10.939 -    // We need not check all the levels of the guest page table entries for
  10.940 -    // present vs not-present, as the eff_l1e will always be not present if
  10.941 -    // one of the higher level entries is not present.
  10.942 -    //
  10.943 -    if ( unlikely(!(guest_l1e_get_flags(gw.eff_l1e) & _PAGE_PRESENT)) )
  10.944 -    {
  10.945 -        perfc_incr(shadow_fault_bail_not_present);
  10.946 -        goto not_a_shadow_fault;
  10.947 -    }
  10.948 -
  10.949 -    // All levels of the guest page table are now known to be present.
  10.950 -    accumulated_gflags = accumulate_guest_flags(v, &gw);
  10.951 -
  10.952 -    // Check for attempts to access supervisor-only pages from user mode,
  10.953 -    // i.e. ring 3.  Such errors are not caused or dealt with by the shadow
  10.954 -    // code.
  10.955 -    //
  10.956 -    if ( (regs->error_code & PFEC_user_mode) &&
  10.957 -         !(accumulated_gflags & _PAGE_USER) )
  10.958 -    {
  10.959 -        /* illegal user-mode access to supervisor-only page */
  10.960 -        perfc_incr(shadow_fault_bail_user_supervisor);
  10.961 -        goto not_a_shadow_fault;
  10.962 -    }
  10.963 -
  10.964 -    // Was it a write fault?
  10.965 +    /* What kind of access are we dealing with? */
  10.966      ft = ((regs->error_code & PFEC_write_access)
  10.967            ? ft_demand_write : ft_demand_read);
  10.968 -    if ( ft == ft_demand_write )
  10.969 -    {
  10.970 -        if ( unlikely(!(accumulated_gflags & _PAGE_RW)) )
  10.971 -        {
  10.972 -            perfc_incr(shadow_fault_bail_ro_mapping);
  10.973 -            goto not_a_shadow_fault;
  10.974 -        }
  10.975 -    }
  10.976 -    else // must have been either an insn fetch or read fault
  10.977 -    {
  10.978 -        // Check for NX bit violations: attempts to execute code that is
  10.979 -        // marked "do not execute".  Such errors are not caused or dealt with
  10.980 -        // by the shadow code.
  10.981 -        //
  10.982 -        if ( regs->error_code & PFEC_insn_fetch )
  10.983 -        {
  10.984 -            if ( accumulated_gflags & _PAGE_NX_BIT )
  10.985 -            {
  10.986 -                /* NX prevented this code fetch */
  10.987 -                perfc_incr(shadow_fault_bail_nx);
  10.988 -                goto not_a_shadow_fault;
  10.989 -            }
  10.990 -        }
  10.991 -    }
  10.992  
  10.993      /* What mfn is the guest trying to access? */
  10.994 -    gfn = guest_l1e_get_gfn(gw.eff_l1e);
  10.995 +    gfn = guest_l1e_get_gfn(gw.l1e);
  10.996      gmfn = gfn_to_mfn(d, gfn, &p2mt);
  10.997  
  10.998      if ( shadow_mode_refcounts(d) && 
  10.999 @@ -2876,14 +2817,12 @@ static int sh_page_fault(struct vcpu *v,
 10.1000           * shadow_set_l*e(), which will have crashed the guest.
 10.1001           * Get out of the fault handler immediately. */
 10.1002          ASSERT(d->is_shutting_down);
 10.1003 -        unmap_walk(v, &gw);
 10.1004          shadow_unlock(d);
 10.1005          return 0;
 10.1006      }
 10.1007  
 10.1008      /* Calculate the shadow entry and write it */
 10.1009 -    l1e_propagate_from_guest(v, (gw.l1e) ? gw.l1e : &gw.eff_l1e, gw.l1mfn, 
 10.1010 -                             gmfn, &sl1e, ft, p2mt);
 10.1011 +    l1e_propagate_from_guest(v, gw.l1e, gmfn, &sl1e, ft, p2mt);
 10.1012      r = shadow_set_l1e(v, ptr_sl1e, sl1e, sl1mfn);
 10.1013  
 10.1014  #if SHADOW_OPTIMIZATIONS & SHOPT_PREFETCH
 10.1015 @@ -2921,7 +2860,6 @@ static int sh_page_fault(struct vcpu *v,
 10.1016  
 10.1017   done:
 10.1018      sh_audit_gw(v, &gw);
 10.1019 -    unmap_walk(v, &gw);
 10.1020      SHADOW_PRINTK("fixed\n");
 10.1021      shadow_audit_tables(v);
 10.1022      shadow_unlock(d);
 10.1023 @@ -2972,7 +2910,6 @@ static int sh_page_fault(struct vcpu *v,
 10.1024       * take it again when we write to the pagetables.
 10.1025       */
 10.1026      sh_audit_gw(v, &gw);
 10.1027 -    unmap_walk(v, &gw);
 10.1028      shadow_audit_tables(v);
 10.1029      shadow_unlock(d);
 10.1030  
 10.1031 @@ -3033,7 +2970,6 @@ static int sh_page_fault(struct vcpu *v,
 10.1032          goto not_a_shadow_fault;
 10.1033      perfc_incr(shadow_fault_mmio);
 10.1034      sh_audit_gw(v, &gw);
 10.1035 -    unmap_walk(v, &gw);
 10.1036      SHADOW_PRINTK("mmio %#"PRIpaddr"\n", gpa);
 10.1037      shadow_audit_tables(v);
 10.1038      reset_early_unshadow(v);
 10.1039 @@ -3043,7 +2979,6 @@ static int sh_page_fault(struct vcpu *v,
 10.1040  
 10.1041   not_a_shadow_fault:
 10.1042      sh_audit_gw(v, &gw);
 10.1043 -    unmap_walk(v, &gw);
 10.1044      SHADOW_PRINTK("not a shadow fault\n");
 10.1045      shadow_audit_tables(v);
 10.1046      reset_early_unshadow(v);
 10.1047 @@ -3129,30 +3064,36 @@ sh_invlpg(struct vcpu *v, unsigned long 
 10.1048  
 10.1049  
 10.1050  static unsigned long
 10.1051 -sh_gva_to_gfn(struct vcpu *v, unsigned long va)
 10.1052 +sh_gva_to_gfn(struct vcpu *v, unsigned long va, uint32_t *pfec)
 10.1053  /* Called to translate a guest virtual address to what the *guest*
 10.1054   * pagetables would map it to. */
 10.1055  {
 10.1056      walk_t gw;
 10.1057      gfn_t gfn;
 10.1058 -    
 10.1059 +
 10.1060  #if (SHADOW_OPTIMIZATIONS & SHOPT_VIRTUAL_TLB)
 10.1061      struct shadow_vtlb t = {0};
 10.1062 -    if ( vtlb_lookup(v, va, &t) )
 10.1063 +    /* Check the vTLB cache first */
 10.1064 +    if ( vtlb_lookup(v, va, pfec[0], &t) ) 
 10.1065          return t.frame_number;
 10.1066  #endif /* (SHADOW_OPTIMIZATIONS & SHOPT_VIRTUAL_TLB) */
 10.1067  
 10.1068 -    guest_walk_tables(v, va, &gw, 0);
 10.1069 +    if ( guest_walk_tables(v, va, &gw, pfec[0], 0) != 0 )
 10.1070 +    {
 10.1071 +        if ( !(guest_l1e_get_flags(gw.l1e) & _PAGE_PRESENT) )
 10.1072 +            pfec[0] &= ~PFEC_page_present;
 10.1073 +        return INVALID_GFN;
 10.1074 +    }
 10.1075      gfn = guest_walk_to_gfn(&gw);
 10.1076  
 10.1077  #if (SHADOW_OPTIMIZATIONS & SHOPT_VIRTUAL_TLB)
 10.1078      t.page_number = va >> PAGE_SHIFT;
 10.1079      t.frame_number = gfn_x(gfn);
 10.1080      t.flags = accumulate_guest_flags(v, &gw); 
 10.1081 +    t.pfec = pfec[0];
 10.1082      vtlb_insert(v, t);
 10.1083  #endif /* (SHADOW_OPTIMIZATIONS & SHOPT_VIRTUAL_TLB) */
 10.1084  
 10.1085 -    unmap_walk(v, &gw);
 10.1086      return gfn_x(gfn);
 10.1087  }
 10.1088  
 10.1089 @@ -4006,9 +3947,8 @@ static inline void * emulate_map_dest(st
 10.1090                                        struct sh_emulate_ctxt *sh_ctxt,
 10.1091                                        mfn_t *mfnp)
 10.1092  {
 10.1093 -    walk_t gw;
 10.1094 -    u32 flags, errcode;
 10.1095 -    gfn_t gfn;
 10.1096 +    uint32_t pfec;
 10.1097 +    unsigned long gfn;
 10.1098      mfn_t mfn;
 10.1099      p2m_type_t p2mt;
 10.1100  
 10.1101 @@ -4016,50 +3956,20 @@ static inline void * emulate_map_dest(st
 10.1102      if ( ring_3(sh_ctxt->ctxt.regs) ) 
 10.1103          return NULL;
 10.1104  
 10.1105 -#if (SHADOW_OPTIMIZATIONS & SHOPT_VIRTUAL_TLB)
 10.1106 -    /* Try the virtual TLB first */
 10.1107 +    /* Translate the VA, and exit with a page-fault if we fail */
 10.1108 +    pfec = PFEC_page_present | PFEC_write_access;
 10.1109 +    gfn = sh_gva_to_gfn(v, vaddr, &pfec);
 10.1110 +    if ( gfn == INVALID_GFN ) 
 10.1111      {
 10.1112 -        struct shadow_vtlb t = {0};
 10.1113 -        if ( vtlb_lookup(v, vaddr, &t) 
 10.1114 -             && ((t.flags & (_PAGE_PRESENT|_PAGE_RW)) 
 10.1115 -                 == (_PAGE_PRESENT|_PAGE_RW)) )
 10.1116 -        {
 10.1117 -            flags = t.flags;
 10.1118 -            gfn = _gfn(t.frame_number);
 10.1119 -        }
 10.1120 +        if ( is_hvm_vcpu(v) )
 10.1121 +            hvm_inject_exception(TRAP_page_fault, pfec, vaddr);
 10.1122          else
 10.1123 -        {
 10.1124 -            /* Need to do the full lookup, just in case permissions
 10.1125 -             * have increased since we cached this entry */
 10.1126 -            
 10.1127 -#endif /* (SHADOW_OPTIMIZATIONS & SHOPT_VIRTUAL_TLB) */
 10.1128 -
 10.1129 -            /* Walk the guest pagetables */
 10.1130 -            guest_walk_tables(v, vaddr, &gw, 1);
 10.1131 -            flags = accumulate_guest_flags(v, &gw);
 10.1132 -            gfn = guest_l1e_get_gfn(gw.eff_l1e);
 10.1133 -            sh_audit_gw(v, &gw);
 10.1134 -            unmap_walk(v, &gw);
 10.1135 -            
 10.1136 -#if (SHADOW_OPTIMIZATIONS & SHOPT_VIRTUAL_TLB)
 10.1137 -            /* Remember this translation for next time */
 10.1138 -            t.page_number = vaddr >> PAGE_SHIFT;
 10.1139 -            t.frame_number = gfn_x(gfn);
 10.1140 -            t.flags = flags;
 10.1141 -            vtlb_insert(v, t);
 10.1142 -        }
 10.1143 +            propagate_page_fault(vaddr, pfec);
 10.1144 +        return NULL;
 10.1145      }
 10.1146 -#endif
 10.1147 -
 10.1148 -    errcode = PFEC_write_access;
 10.1149 -    if ( !(flags & _PAGE_PRESENT) ) 
 10.1150 -        goto page_fault;
 10.1151 -
 10.1152 -    errcode |= PFEC_page_present;
 10.1153 -    if ( !(flags & _PAGE_RW) ) 
 10.1154 -        goto page_fault;
 10.1155 -
 10.1156 -    mfn = gfn_to_mfn(v->domain, gfn, &p2mt);
 10.1157 +
 10.1158 +    /* Translate the GFN */
 10.1159 +    mfn = gfn_to_mfn(v->domain, _gfn(gfn), &p2mt);
 10.1160      if ( p2m_is_ram(p2mt) )
 10.1161      {
 10.1162          ASSERT(mfn_valid(mfn));
 10.1163 @@ -4069,13 +3979,6 @@ static inline void * emulate_map_dest(st
 10.1164      }
 10.1165      else 
 10.1166          return NULL;
 10.1167 -
 10.1168 - page_fault:
 10.1169 -    if ( is_hvm_vcpu(v) )
 10.1170 -        hvm_inject_exception(TRAP_page_fault, errcode, vaddr);
 10.1171 -    else
 10.1172 -        propagate_page_fault(vaddr, errcode);
 10.1173 -    return NULL;
 10.1174  }
 10.1175  
 10.1176  static int safe_not_to_verify_write(mfn_t gmfn, void *dst, void *src, 
    11.1 --- a/xen/arch/x86/mm/shadow/private.h	Fri Nov 02 16:34:54 2007 +0000
    11.2 +++ b/xen/arch/x86/mm/shadow/private.h	Fri Nov 02 16:38:11 2007 +0000
    11.3 @@ -665,9 +665,10 @@ void shadow_continue_emulation(
    11.4  #define VTLB_ENTRIES 13
    11.5  
    11.6  struct shadow_vtlb {
    11.7 -    unsigned long page_number;    /* Guest virtual address >> PAGE_SHIFT  */
    11.8 -    unsigned long frame_number;   /* Guest physical address >> PAGE_SHIFT */
    11.9 -    u32 flags;    /* Accumulated guest pte flags, or 0 for an empty slot. */
   11.10 +    unsigned long page_number;      /* Guest virtual address >> PAGE_SHIFT  */
   11.11 +    unsigned long frame_number;     /* Guest physical address >> PAGE_SHIFT */
   11.12 +    uint32_t pfec;  /* Pagefault code for the lookup that filled this entry */
   11.13 +    uint32_t flags; /* Accumulated guest pte flags, or 0 for an empty slot. */
   11.14  };
   11.15  
   11.16  /* Call whenever the guest flushes hit actual TLB */
   11.17 @@ -692,7 +693,7 @@ static inline void vtlb_insert(struct vc
   11.18  }
   11.19  
   11.20  /* Look a translation up in the vTLB.  Returns 0 if not found. */
   11.21 -static inline int vtlb_lookup(struct vcpu *v, unsigned long va,
   11.22 +static inline int vtlb_lookup(struct vcpu *v, unsigned long va, uint32_t pfec,
   11.23                                struct shadow_vtlb *result) 
   11.24  {
   11.25      unsigned long page_number = va >> PAGE_SHIFT;
   11.26 @@ -701,7 +702,9 @@ static inline int vtlb_lookup(struct vcp
   11.27  
   11.28      spin_lock(&v->arch.paging.vtlb_lock);
   11.29      if ( v->arch.paging.vtlb[i].flags != 0 
   11.30 -         && v->arch.paging.vtlb[i].page_number == page_number )
   11.31 +         && v->arch.paging.vtlb[i].page_number == page_number 
   11.32 +         /* Any successful walk that had at least these pfec bits is OK */
   11.33 +         && (v->arch.paging.vtlb[i].pfec & pfec) == pfec )
   11.34      {
   11.35          rv = 1; 
   11.36          result[0] = v->arch.paging.vtlb[i];
    12.1 --- a/xen/arch/x86/mm/shadow/types.h	Fri Nov 02 16:34:54 2007 +0000
    12.2 +++ b/xen/arch/x86/mm/shadow/types.h	Fri Nov 02 16:38:11 2007 +0000
    12.3 @@ -251,6 +251,7 @@ TYPE_SAFE(u32,gfn)
    12.4  /* Types of the guest's page tables */
    12.5  typedef l1_pgentry_32_t guest_l1e_t;
    12.6  typedef l2_pgentry_32_t guest_l2e_t;
    12.7 +typedef intpte_32_t guest_intpte_t;
    12.8  
    12.9  /* Access functions for them */
   12.10  static inline paddr_t guest_l1e_get_paddr(guest_l1e_t gl1e)
   12.11 @@ -319,6 +320,7 @@ typedef l3_pgentry_t guest_l3e_t;
   12.12  #if GUEST_PAGING_LEVELS >= 4
   12.13  typedef l4_pgentry_t guest_l4e_t;
   12.14  #endif
   12.15 +typedef intpte_t guest_intpte_t;
   12.16  
   12.17  /* Access functions for them */
   12.18  static inline paddr_t guest_l1e_get_paddr(guest_l1e_t gl1e)
   12.19 @@ -419,32 +421,27 @@ gfn_to_paddr(gfn_t gfn)
   12.20  
   12.21  /* Type used for recording a walk through guest pagetables.  It is
   12.22   * filled in by the pagetable walk function, and also used as a cache
   12.23 - * for later walks.  
   12.24 - * Any non-null pointer in this structure represents a mapping of guest
   12.25 - * memory.  We must always call walk_init() before using a walk_t, and 
   12.26 - * call walk_unmap() when we're done. 
   12.27 - * The "Effective l1e" field is used when there isn't an l1e to point to, 
   12.28 - * but we have fabricated an l1e for propagation to the shadow (e.g., 
   12.29 - * for splintering guest superpages into many shadow l1 entries).  */
   12.30 + * for later walks.  When we encounter a suporpage l2e, we fabricate an
   12.31 + * l1e for propagation to the shadow (for splintering guest superpages
   12.32 + * into many shadow l1 entries).  */
   12.33  typedef struct shadow_walk_t walk_t;
   12.34  struct shadow_walk_t 
   12.35  {
   12.36      unsigned long va;           /* Address we were looking for */
   12.37  #if GUEST_PAGING_LEVELS >= 3
   12.38  #if GUEST_PAGING_LEVELS >= 4
   12.39 -    guest_l4e_t *l4e;           /* Pointer to guest's level 4 entry */
   12.40 -#endif
   12.41 -    guest_l3e_t *l3e;           /* Pointer to guest's level 3 entry */
   12.42 +    guest_l4e_t l4e;            /* Guest's level 4 entry */
   12.43  #endif
   12.44 -    guest_l2e_t *l2e;           /* Pointer to guest's level 2 entry */
   12.45 -    guest_l1e_t *l1e;           /* Pointer to guest's level 1 entry */
   12.46 -    guest_l1e_t eff_l1e;        /* Effective level 1 entry */
   12.47 +    guest_l3e_t l3e;            /* Guest's level 3 entry */
   12.48 +#endif
   12.49 +    guest_l2e_t l2e;            /* Guest's level 2 entry */
   12.50 +    guest_l1e_t l1e;            /* Guest's level 1 entry (or fabrication) */
   12.51  #if GUEST_PAGING_LEVELS >= 4
   12.52 -    mfn_t l4mfn;                /* MFN that the level 4 entry is in */
   12.53 -    mfn_t l3mfn;                /* MFN that the level 3 entry is in */
   12.54 +    mfn_t l4mfn;                /* MFN that the level 4 entry was in */
   12.55 +    mfn_t l3mfn;                /* MFN that the level 3 entry was in */
   12.56  #endif
   12.57 -    mfn_t l2mfn;                /* MFN that the level 2 entry is in */
   12.58 -    mfn_t l1mfn;                /* MFN that the level 1 entry is in */
   12.59 +    mfn_t l2mfn;                /* MFN that the level 2 entry was in */
   12.60 +    mfn_t l1mfn;                /* MFN that the level 1 entry was in */
   12.61  };
   12.62  
   12.63  /* macros for dealing with the naming of the internal function names of the
   12.64 @@ -542,7 +539,7 @@ accumulate_guest_flags(struct vcpu *v, w
   12.65  {
   12.66      u32 accumulated_flags;
   12.67  
   12.68 -    if ( unlikely(!(guest_l1e_get_flags(gw->eff_l1e) & _PAGE_PRESENT)) )
   12.69 +    if ( unlikely(!(guest_l1e_get_flags(gw->l1e) & _PAGE_PRESENT)) )
   12.70          return 0;
   12.71          
   12.72      // We accumulate the permission flags with bitwise ANDing.
   12.73 @@ -550,17 +547,17 @@ accumulate_guest_flags(struct vcpu *v, w
   12.74      // For the NX bit, however, the polarity is wrong, so we accumulate the
   12.75      // inverse of the NX bit.
   12.76      //
   12.77 -    accumulated_flags =  guest_l1e_get_flags(gw->eff_l1e) ^ _PAGE_NX_BIT;
   12.78 -    accumulated_flags &= guest_l2e_get_flags(*gw->l2e) ^ _PAGE_NX_BIT;
   12.79 +    accumulated_flags =  guest_l1e_get_flags(gw->l1e) ^ _PAGE_NX_BIT;
   12.80 +    accumulated_flags &= guest_l2e_get_flags(gw->l2e) ^ _PAGE_NX_BIT;
   12.81  
   12.82      // Note that PAE guests do not have USER or RW or NX bits in their L3s.
   12.83      //
   12.84  #if GUEST_PAGING_LEVELS == 3
   12.85      accumulated_flags &=
   12.86 -        ~_PAGE_PRESENT | (guest_l3e_get_flags(*gw->l3e) & _PAGE_PRESENT);
   12.87 +        ~_PAGE_PRESENT | (guest_l3e_get_flags(gw->l3e) & _PAGE_PRESENT);
   12.88  #elif GUEST_PAGING_LEVELS >= 4
   12.89 -    accumulated_flags &= guest_l3e_get_flags(*gw->l3e) ^ _PAGE_NX_BIT;
   12.90 -    accumulated_flags &= guest_l4e_get_flags(*gw->l4e) ^ _PAGE_NX_BIT;
   12.91 +    accumulated_flags &= guest_l3e_get_flags(gw->l3e) ^ _PAGE_NX_BIT;
   12.92 +    accumulated_flags &= guest_l4e_get_flags(gw->l4e) ^ _PAGE_NX_BIT;
   12.93  #endif
   12.94  
   12.95      // Revert the NX bit back to its original polarity
    13.1 --- a/xen/include/asm-x86/hvm/support.h	Fri Nov 02 16:34:54 2007 +0000
    13.2 +++ b/xen/include/asm-x86/hvm/support.h	Fri Nov 02 16:38:11 2007 +0000
    13.3 @@ -86,6 +86,7 @@ int hvm_copy_to_guest_phys(paddr_t paddr
    13.4  int hvm_copy_from_guest_phys(void *buf, paddr_t paddr, int size);
    13.5  int hvm_copy_to_guest_virt(unsigned long vaddr, void *buf, int size);
    13.6  int hvm_copy_from_guest_virt(void *buf, unsigned long vaddr, int size);
    13.7 +int hvm_fetch_from_guest_virt(void *buf, unsigned long vaddr, int size);
    13.8  
    13.9  void hvm_print_line(struct vcpu *v, const char c);
   13.10  void hlt_timer_fn(void *data);
    14.1 --- a/xen/include/asm-x86/paging.h	Fri Nov 02 16:34:54 2007 +0000
    14.2 +++ b/xen/include/asm-x86/paging.h	Fri Nov 02 16:38:11 2007 +0000
    14.3 @@ -105,7 +105,8 @@ struct paging_mode {
    14.4      int           (*page_fault            )(struct vcpu *v, unsigned long va,
    14.5                                              struct cpu_user_regs *regs);
    14.6      int           (*invlpg                )(struct vcpu *v, unsigned long va);
    14.7 -    unsigned long (*gva_to_gfn            )(struct vcpu *v, unsigned long va);
    14.8 +    unsigned long (*gva_to_gfn            )(struct vcpu *v, unsigned long va,
    14.9 +                                            uint32_t *pfec);
   14.10      void          (*update_cr3            )(struct vcpu *v, int do_locking);
   14.11      void          (*update_paging_modes   )(struct vcpu *v);
   14.12      void          (*write_p2m_entry       )(struct vcpu *v, unsigned long gfn,
   14.13 @@ -204,12 +205,17 @@ static inline int paging_invlpg(struct v
   14.14  }
   14.15  
   14.16  /* Translate a guest virtual address to the frame number that the
   14.17 - * *guest* pagetables would map it to.  Returns INVALID_GFN if the guest 
   14.18 - * tables don't map this address. */
   14.19 + * *guest* pagetables would map it to.  Returns INVALID_GFN if the guest
   14.20 + * tables don't map this address for this kind of access.
   14.21 + * pfec[0] is used to determine which kind of access this is when
   14.22 + * walking the tables.  The caller should set the PFEC_page_present bit
   14.23 + * in pfec[0]; in the failure case, that bit will be cleared if appropriate. */
   14.24  #define INVALID_GFN (-1UL)
   14.25 -static inline unsigned long paging_gva_to_gfn(struct vcpu *v, unsigned long va)
   14.26 +static inline unsigned long paging_gva_to_gfn(struct vcpu *v, 
   14.27 +                                              unsigned long va,
   14.28 +                                              uint32_t *pfec)
   14.29  {
   14.30 -    return v->arch.paging.mode->gva_to_gfn(v, va);
   14.31 +    return v->arch.paging.mode->gva_to_gfn(v, va, pfec);
   14.32  }
   14.33  
   14.34  /* Update all the things that are derived from the guest's CR3.
    15.1 --- a/xen/include/asm-x86/perfc_defn.h	Fri Nov 02 16:34:54 2007 +0000
    15.2 +++ b/xen/include/asm-x86/perfc_defn.h	Fri Nov 02 16:38:11 2007 +0000
    15.3 @@ -50,12 +50,8 @@ PERFCOUNTER(shadow_fault_fast_gnp, "shad
    15.4  PERFCOUNTER(shadow_fault_fast_mmio, "shadow_fault fast path mmio")
    15.5  PERFCOUNTER(shadow_fault_fast_fail, "shadow_fault fast path error")
    15.6  PERFCOUNTER(shadow_fault_bail_bad_gfn, "shadow_fault guest bad gfn")
    15.7 -PERFCOUNTER(shadow_fault_bail_not_present, 
    15.8 -                                        "shadow_fault guest not-present")
    15.9 -PERFCOUNTER(shadow_fault_bail_nx,  "shadow_fault guest NX fault")
   15.10 -PERFCOUNTER(shadow_fault_bail_ro_mapping, "shadow_fault guest R/W fault")
   15.11 -PERFCOUNTER(shadow_fault_bail_user_supervisor, 
   15.12 -                                        "shadow_fault guest U/S fault")
   15.13 +PERFCOUNTER(shadow_fault_bail_real_fault, 
   15.14 +                                        "shadow_fault really guest fault")
   15.15  PERFCOUNTER(shadow_fault_emulate_read, "shadow_fault emulates a read")
   15.16  PERFCOUNTER(shadow_fault_emulate_write, "shadow_fault emulates a write")
   15.17  PERFCOUNTER(shadow_fault_emulate_failed, "shadow_fault emulator fails")