ia64/xen-unstable

changeset 15863:4633e9604da9

[HVM] Add type information to the p2m map.
This is a base for memory tricks like page sharing, copy-on-write, lazy
allocation etc. It should also make pass-through MMIO easier to
implement in the p2m.
Signed-off-by: Tim Deegan <Tim.Deegan@xensource.com>
author Tim Deegan <Tim.Deegan@xensource.com>
date Mon Sep 10 14:42:30 2007 +0100 (2007-09-10)
parents 1474db8058b2
children 924c153e0cf9
files xen/arch/x86/hvm/hvm.c xen/arch/x86/hvm/io.c xen/arch/x86/hvm/svm/svm.c xen/arch/x86/hvm/vmx/vmx.c xen/arch/x86/mm/hap/guest_walk.c xen/arch/x86/mm/hap/hap.c xen/arch/x86/mm/p2m.c xen/arch/x86/mm/shadow/common.c xen/arch/x86/mm/shadow/multi.c xen/arch/x86/mm/shadow/types.h xen/include/asm-x86/mm.h xen/include/asm-x86/p2m.h
line diff
     1.1 --- a/xen/arch/x86/hvm/hvm.c	Mon Sep 10 13:59:46 2007 +0100
     1.2 +++ b/xen/arch/x86/hvm/hvm.c	Mon Sep 10 14:42:30 2007 +0100
     1.3 @@ -161,12 +161,14 @@ static int hvm_set_ioreq_page(
     1.4      struct domain *d, struct hvm_ioreq_page *iorp, unsigned long gmfn)
     1.5  {
     1.6      struct page_info *page;
     1.7 +    p2m_type_t p2mt;
     1.8      unsigned long mfn;
     1.9      void *va;
    1.10  
    1.11 -    mfn = gmfn_to_mfn(d, gmfn);
    1.12 -    if ( !mfn_valid(mfn) )
    1.13 +    mfn = mfn_x(gfn_to_mfn(d, gmfn, &p2mt));
    1.14 +    if ( !p2m_is_ram(p2mt) )
    1.15          return -EINVAL;
    1.16 +    ASSERT(mfn_valid(mfn));
    1.17  
    1.18      page = mfn_to_page(mfn);
    1.19      if ( !get_page_and_type(page, d, PGT_writable_page) )
    1.20 @@ -517,7 +519,8 @@ void hvm_triple_fault(void)
    1.21  int hvm_set_cr0(unsigned long value)
    1.22  {
    1.23      struct vcpu *v = current;
    1.24 -    unsigned long mfn, old_value = v->arch.hvm_vcpu.guest_cr[0];
    1.25 +    p2m_type_t p2mt;
    1.26 +    unsigned long gfn, mfn, old_value = v->arch.hvm_vcpu.guest_cr[0];
    1.27    
    1.28      HVM_DBG_LOG(DBG_LEVEL_VMMU, "Update CR0 value = %lx", value);
    1.29  
    1.30 @@ -559,8 +562,10 @@ int hvm_set_cr0(unsigned long value)
    1.31          if ( !paging_mode_hap(v->domain) )
    1.32          {
    1.33              /* The guest CR3 must be pointing to the guest physical. */
    1.34 -            mfn = get_mfn_from_gpfn(v->arch.hvm_vcpu.guest_cr[3]>>PAGE_SHIFT);
    1.35 -            if ( !mfn_valid(mfn) || !get_page(mfn_to_page(mfn), v->domain))
    1.36 +            gfn = v->arch.hvm_vcpu.guest_cr[3]>>PAGE_SHIFT;
    1.37 +            mfn = mfn_x(gfn_to_mfn_current(gfn, &p2mt));
    1.38 +            if ( !p2m_is_ram(p2mt) || !mfn_valid(mfn) || 
    1.39 +                 !get_page(mfn_to_page(mfn), v->domain))
    1.40              {
    1.41                  gdprintk(XENLOG_ERR, "Invalid CR3 value = %lx (mfn=%lx)\n", 
    1.42                           v->arch.hvm_vcpu.guest_cr[3], mfn);
    1.43 @@ -603,16 +608,18 @@ int hvm_set_cr0(unsigned long value)
    1.44  int hvm_set_cr3(unsigned long value)
    1.45  {
    1.46      unsigned long mfn;
    1.47 +    p2m_type_t p2mt;
    1.48      struct vcpu *v = current;
    1.49  
    1.50      if ( hvm_paging_enabled(v) && !paging_mode_hap(v->domain) &&
    1.51           (value != v->arch.hvm_vcpu.guest_cr[3]) )
    1.52      {
    1.53 -        /* Shadow-mode CR3 change. Check PDBR and then make a new shadow. */
    1.54 +        /* Shadow-mode CR3 change. Check PDBR and update refcounts. */
    1.55          HVM_DBG_LOG(DBG_LEVEL_VMMU, "CR3 value = %lx", value);
    1.56 -        mfn = get_mfn_from_gpfn(value >> PAGE_SHIFT);
    1.57 -        if ( !mfn_valid(mfn) || !get_page(mfn_to_page(mfn), v->domain) )
    1.58 -            goto bad_cr3;
    1.59 +        mfn = mfn_x(gfn_to_mfn_current(value >> PAGE_SHIFT, &p2mt));
    1.60 +        if ( !p2m_is_ram(p2mt) || !mfn_valid(mfn) ||
    1.61 +             !get_page(mfn_to_page(mfn), v->domain) )
    1.62 +              goto bad_cr3;
    1.63  
    1.64          put_page(pagetable_get_page(v->arch.guest_table));
    1.65          v->arch.guest_table = pagetable_from_pfn(mfn);
    1.66 @@ -677,6 +684,7 @@ int hvm_set_cr4(unsigned long value)
    1.67  static int __hvm_copy(void *buf, paddr_t addr, int size, int dir, int virt)
    1.68  {
    1.69      unsigned long gfn, mfn;
    1.70 +    p2m_type_t p2mt;
    1.71      char *p;
    1.72      int count, todo;
    1.73  
    1.74 @@ -690,10 +698,11 @@ static int __hvm_copy(void *buf, paddr_t
    1.75          else
    1.76              gfn = addr >> PAGE_SHIFT;
    1.77          
    1.78 -        mfn = get_mfn_from_gpfn(gfn);
    1.79 +        mfn = mfn_x(gfn_to_mfn_current(gfn, &p2mt));
    1.80  
    1.81 -        if ( mfn == INVALID_MFN )
    1.82 +        if ( !p2m_is_ram(p2mt) )
    1.83              return todo;
    1.84 +        ASSERT(mfn_valid(mfn));
    1.85  
    1.86          p = (char *)map_domain_page(mfn) + (addr & ~PAGE_MASK);
    1.87  
     2.1 --- a/xen/arch/x86/hvm/io.c	Mon Sep 10 13:59:46 2007 +0100
     2.2 +++ b/xen/arch/x86/hvm/io.c	Mon Sep 10 14:42:30 2007 +0100
     2.3 @@ -826,9 +826,7 @@ void hvm_io_assist(void)
     2.4      ioreq_t *p;
     2.5      struct cpu_user_regs *regs;
     2.6      struct hvm_io_op *io_opp;
     2.7 -    unsigned long gmfn;
     2.8      struct vcpu *v = current;
     2.9 -    struct domain *d = v->domain;
    2.10  
    2.11      io_opp = &v->arch.hvm_vcpu.io_op;
    2.12      regs   = &io_opp->io_context;
    2.13 @@ -862,13 +860,6 @@ void hvm_io_assist(void)
    2.14      hvm_load_cpu_guest_regs(v, regs);
    2.15      memcpy(guest_cpu_user_regs(), regs, HVM_CONTEXT_STACK_BYTES);
    2.16  
    2.17 -    /* Has memory been dirtied? */
    2.18 -    if ( (p->dir == IOREQ_READ) && p->data_is_ptr )
    2.19 -    {
    2.20 -        gmfn = get_mfn_from_gpfn(paging_gva_to_gfn(v, p->data));
    2.21 -        paging_mark_dirty(d, gmfn);
    2.22 -    }
    2.23 -
    2.24   out:
    2.25      vcpu_end_shutdown_deferral(v);
    2.26  }
     3.1 --- a/xen/arch/x86/hvm/svm/svm.c	Mon Sep 10 13:59:46 2007 +0100
     3.2 +++ b/xen/arch/x86/hvm/svm/svm.c	Mon Sep 10 14:42:30 2007 +0100
     3.3 @@ -338,6 +338,7 @@ int svm_vmcb_save(struct vcpu *v, struct
     3.4  int svm_vmcb_restore(struct vcpu *v, struct hvm_hw_cpu *c)
     3.5  {
     3.6      unsigned long mfn = 0;
     3.7 +    p2m_type_t p2mt;
     3.8      struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
     3.9  
    3.10      if ( c->pending_valid &&
    3.11 @@ -353,8 +354,8 @@ int svm_vmcb_restore(struct vcpu *v, str
    3.12      {
    3.13          if ( c->cr0 & X86_CR0_PG )
    3.14          {
    3.15 -            mfn = gmfn_to_mfn(v->domain, c->cr3 >> PAGE_SHIFT);
    3.16 -            if ( !mfn_valid(mfn) || !get_page(mfn_to_page(mfn), v->domain) )
    3.17 +            mfn = mfn_x(gfn_to_mfn(v->domain, c->cr3 >> PAGE_SHIFT, &p2mt));
    3.18 +            if ( !p2m_is_ram(p2mt) || !get_page(mfn_to_page(mfn), v->domain) )
    3.19              {
    3.20                  gdprintk(XENLOG_ERR, "Invalid CR3 value=0x%"PRIx64"\n",
    3.21                           c->cr3);
    3.22 @@ -1004,15 +1005,23 @@ int start_svm(struct cpuinfo_x86 *c)
    3.23      return 1;
    3.24  }
    3.25  
    3.26 -static int svm_do_nested_pgfault(paddr_t gpa, struct cpu_user_regs *regs)
    3.27 +static void svm_do_nested_pgfault(paddr_t gpa, struct cpu_user_regs *regs)
    3.28  {
    3.29 -    if (mmio_space(gpa)) {
    3.30 +    p2m_type_t p2mt;
    3.31 +    mfn_t mfn;
    3.32 +    unsigned long gfn = gpa >> PAGE_SHIFT;
    3.33 +
    3.34 +    /* If this GFN is emulated MMIO, pass the fault to the mmio handler */
    3.35 +    mfn = gfn_to_mfn_current(gfn, &p2mt);
    3.36 +    if ( p2mt == p2m_mmio_dm )
    3.37 +    {
    3.38          handle_mmio(gpa);
    3.39 -        return 1;
    3.40 +        return;
    3.41      }
    3.42  
    3.43 -    paging_mark_dirty(current->domain, get_mfn_from_gpfn(gpa >> PAGE_SHIFT));
    3.44 -    return p2m_set_flags(current->domain, gpa, __PAGE_HYPERVISOR|_PAGE_USER);
    3.45 +    /* Log-dirty: mark the page dirty and let the guest write it again */
    3.46 +    paging_mark_dirty(current->domain, mfn_x(mfn));
    3.47 +    p2m_change_type(current->domain, gfn, p2m_ram_logdirty, p2m_ram_rw);
    3.48  }
    3.49  
    3.50  static void svm_do_no_device_fault(struct vmcb_struct *vmcb)
    3.51 @@ -2341,8 +2350,7 @@ asmlinkage void svm_vmexit_handler(struc
    3.52  
    3.53      case VMEXIT_NPF:
    3.54          regs->error_code = vmcb->exitinfo1;
    3.55 -        if ( !svm_do_nested_pgfault(vmcb->exitinfo2, regs) )
    3.56 -            domain_crash(v->domain);
    3.57 +        svm_do_nested_pgfault(vmcb->exitinfo2, regs);
    3.58          break;
    3.59  
    3.60      default:
     4.1 --- a/xen/arch/x86/hvm/vmx/vmx.c	Mon Sep 10 13:59:46 2007 +0100
     4.2 +++ b/xen/arch/x86/hvm/vmx/vmx.c	Mon Sep 10 14:42:30 2007 +0100
     4.3 @@ -566,6 +566,7 @@ void vmx_vmcs_save(struct vcpu *v, struc
     4.4  int vmx_vmcs_restore(struct vcpu *v, struct hvm_hw_cpu *c)
     4.5  {
     4.6      unsigned long mfn = 0;
     4.7 +    p2m_type_t p2mt;
     4.8  
     4.9      if ( c->pending_valid &&
    4.10           ((c->pending_type == 1) || (c->pending_type > 6) ||
    4.11 @@ -578,8 +579,8 @@ int vmx_vmcs_restore(struct vcpu *v, str
    4.12  
    4.13      if ( c->cr0 & X86_CR0_PG )
    4.14      {
    4.15 -        mfn = gmfn_to_mfn(v->domain, c->cr3 >> PAGE_SHIFT);
    4.16 -        if ( !mfn_valid(mfn) || !get_page(mfn_to_page(mfn), v->domain) )
    4.17 +        mfn = mfn_x(gfn_to_mfn(v->domain, c->cr3 >> PAGE_SHIFT, &p2mt));
    4.18 +        if ( !p2m_is_ram(p2mt) || !get_page(mfn_to_page(mfn), v->domain) )
    4.19          {
    4.20              gdprintk(XENLOG_ERR, "Invalid CR3 value=0x%"PRIx64"\n", c->cr3);
    4.21              return -EINVAL;
    4.22 @@ -1292,19 +1293,23 @@ static void vmx_do_cpuid(struct cpu_user
    4.23           * Note that this leaf lives at <max-hypervisor-leaf> + 1.
    4.24           */
    4.25          u64 value = ((u64)regs->edx << 32) | (u32)regs->ecx;
    4.26 -        unsigned long mfn = get_mfn_from_gpfn(value >> PAGE_SHIFT);
    4.27 +        p2m_type_t p2mt;
    4.28 +        unsigned long mfn;
    4.29          struct vcpu *v = current;
    4.30          char *p;
    4.31  
    4.32 +        mfn = mfn_x(gfn_to_mfn_current(value >> PAGE_SHIFT, &p2mt));
    4.33 +
    4.34          gdprintk(XENLOG_INFO, "Input address is 0x%"PRIx64".\n", value);
    4.35  
    4.36          /* 8-byte aligned valid pseudophys address from vmxassist, please. */
    4.37 -        if ( (value & 7) || (mfn == INVALID_MFN) ||
    4.38 +        if ( (value & 7) || !p2m_is_ram(p2mt) ||
    4.39               !v->arch.hvm_vmx.vmxassist_enabled )
    4.40          {
    4.41              domain_crash(v->domain);
    4.42              return;
    4.43          }
    4.44 +        ASSERT(mfn_valid(mfn));
    4.45  
    4.46          p = map_domain_page(mfn);
    4.47          value = *((uint64_t *)(p + (value & (PAGE_SIZE - 1))));
    4.48 @@ -1905,11 +1910,12 @@ static void vmx_world_save(struct vcpu *
    4.49  static int vmx_world_restore(struct vcpu *v, struct vmx_assist_context *c)
    4.50  {
    4.51      unsigned long mfn = 0;
    4.52 +    p2m_type_t p2mt;
    4.53  
    4.54      if ( c->cr0 & X86_CR0_PG )
    4.55      {
    4.56 -        mfn = gmfn_to_mfn(v->domain, c->cr3 >> PAGE_SHIFT);
    4.57 -        if ( !mfn_valid(mfn) || !get_page(mfn_to_page(mfn), v->domain) )
    4.58 +        mfn = mfn_x(gfn_to_mfn(v->domain, c->cr3 >> PAGE_SHIFT, &p2mt));
    4.59 +        if ( !p2m_is_ram(p2mt) || !get_page(mfn_to_page(mfn), v->domain) )
    4.60          {
    4.61              gdprintk(XENLOG_ERR, "Invalid CR3 value=%x", c->cr3);
    4.62              return -EINVAL;
     5.1 --- a/xen/arch/x86/mm/hap/guest_walk.c	Mon Sep 10 13:59:46 2007 +0100
     5.2 +++ b/xen/arch/x86/mm/hap/guest_walk.c	Mon Sep 10 14:42:30 2007 +0100
     5.3 @@ -28,7 +28,8 @@
     5.4  #include <xen/sched.h>
     5.5  #include <asm/hvm/svm/vmcb.h>
     5.6  #include <asm/domain.h>
     5.7 -#include <asm/shadow.h>
     5.8 +#include <asm/paging.h>
     5.9 +#include <asm/p2m.h>
    5.10  #include <asm/hap.h>
    5.11  
    5.12  #include "private.h"
    5.13 @@ -67,6 +68,7 @@ unsigned long hap_gva_to_gfn(GUEST_PAGIN
    5.14      int lev, index;
    5.15      paddr_t gpa = 0;
    5.16      unsigned long gpfn, mfn;
    5.17 +    p2m_type_t p2mt;
    5.18      int success = 1;
    5.19  
    5.20      l1_pgentry_t *l1e;
    5.21 @@ -81,14 +83,16 @@ unsigned long hap_gva_to_gfn(GUEST_PAGIN
    5.22      gpfn = (gcr3 >> PAGE_SHIFT);
    5.23      for ( lev = mode; lev >= 1; lev-- )
    5.24      {
    5.25 -        mfn = get_mfn_from_gpfn(gpfn);
    5.26 -        if ( mfn == INVALID_MFN )
    5.27 +        mfn = mfn_x(gfn_to_mfn_current(gpfn, &p2mt));
    5.28 +        if ( !p2m_is_ram(p2mt) )
    5.29          {
    5.30              HAP_PRINTK("bad pfn=0x%lx from gva=0x%lx at lev%d\n", gpfn, gva,
    5.31                         lev);
    5.32              success = 0;
    5.33              break;
    5.34          }
    5.35 +        ASSERT(mfn_valid(mfn));
    5.36 +
    5.37          index = (gva >> PT_SHIFT[mode][lev]) & (PT_ENTRIES[mode][lev]-1);
    5.38  
    5.39  #if GUEST_PAGING_LEVELS >= 4
     6.1 --- a/xen/arch/x86/mm/hap/hap.c	Mon Sep 10 13:59:46 2007 +0100
     6.2 +++ b/xen/arch/x86/mm/hap/hap.c	Mon Sep 10 14:42:30 2007 +0100
     6.3 @@ -60,8 +60,8 @@ int hap_enable_log_dirty(struct domain *
     6.4      d->arch.paging.mode |= PG_log_dirty;
     6.5      hap_unlock(d);
     6.6  
     6.7 -    /* set l1e entries of P2M table to NOT_WRITABLE. */
     6.8 -    p2m_set_flags_global(d, (_PAGE_PRESENT|_PAGE_USER));
     6.9 +    /* set l1e entries of P2M table to be read-only. */
    6.10 +    p2m_change_type_global(d, p2m_ram_rw, p2m_ram_logdirty);
    6.11      flush_tlb_mask(d->domain_dirty_cpumask);
    6.12      return 0;
    6.13  }
    6.14 @@ -73,14 +73,14 @@ int hap_disable_log_dirty(struct domain 
    6.15      hap_unlock(d);
    6.16  
    6.17      /* set l1e entries of P2M table with normal mode */
    6.18 -    p2m_set_flags_global(d, __PAGE_HYPERVISOR|_PAGE_USER);
    6.19 +    p2m_change_type_global(d, p2m_ram_logdirty, p2m_ram_rw);
    6.20      return 0;
    6.21  }
    6.22  
    6.23  void hap_clean_dirty_bitmap(struct domain *d)
    6.24  {
    6.25 -    /* mark physical memory as NOT_WRITEABLE and flush the TLB */
    6.26 -    p2m_set_flags_global(d, (_PAGE_PRESENT|_PAGE_USER));
    6.27 +    /* set l1e entries of P2M table to be read-only. */
    6.28 +    p2m_change_type_global(d, p2m_ram_rw, p2m_ram_logdirty);
    6.29      flush_tlb_mask(d->domain_dirty_cpumask);
    6.30  }
    6.31  
     7.1 --- a/xen/arch/x86/mm/p2m.c	Mon Sep 10 13:59:46 2007 +0100
     7.2 +++ b/xen/arch/x86/mm/p2m.c	Mon Sep 10 14:42:30 2007 +0100
     7.3 @@ -4,7 +4,7 @@
     7.4   * physical-to-machine mappings for automatically-translated domains.
     7.5   *
     7.6   * Parts of this code are Copyright (c) 2007 by Advanced Micro Devices.
     7.7 - * Parts of this code are Copyright (c) 2006 by XenSource Inc.
     7.8 + * Parts of this code are Copyright (c) 2006-2007 by XenSource Inc.
     7.9   * Parts of this code are Copyright (c) 2006 by Michael A Fetterman
    7.10   * Parts based on earlier work by Michael A Fetterman, Ian Pratt et al.
    7.11   *
    7.12 @@ -93,6 +93,31 @@
    7.13  #define page_to_mfn(_pg) (_mfn((_pg) - frame_table))
    7.14  
    7.15  
    7.16 +/* PTE flags for the various types of p2m entry */
    7.17 +#define P2M_BASE_FLAGS \
    7.18 +        (_PAGE_PRESENT | _PAGE_USER | _PAGE_DIRTY | _PAGE_ACCESSED)
    7.19 +
    7.20 +static unsigned long p2m_type_to_flags(p2m_type_t t) 
    7.21 +{
    7.22 +    unsigned long flags = (t & 0x7UL) << 9;
    7.23 +    switch(t)
    7.24 +    {
    7.25 +    case p2m_invalid:
    7.26 +    default:
    7.27 +        return flags;
    7.28 +    case p2m_ram_rw:
    7.29 +        return flags | P2M_BASE_FLAGS | _PAGE_RW;
    7.30 +    case p2m_ram_logdirty:
    7.31 +        return flags | P2M_BASE_FLAGS;
    7.32 +    case p2m_ram_ro:
    7.33 +        return flags | P2M_BASE_FLAGS;
    7.34 +    case p2m_mmio_dm:
    7.35 +        return flags;
    7.36 +    case p2m_mmio_direct:
    7.37 +        return flags | P2M_BASE_FLAGS | _PAGE_RW | _PAGE_PCD;
    7.38 +    }
    7.39 +}
    7.40 +
    7.41  
    7.42  // Find the next level's P2M entry, checking for out-of-range gfn's...
    7.43  // Returns NULL on error.
    7.44 @@ -358,19 +383,25 @@ void p2m_teardown(struct domain *d)
    7.45  }
    7.46  
    7.47  mfn_t
    7.48 -gfn_to_mfn_foreign(struct domain *d, unsigned long gpfn)
    7.49 +gfn_to_mfn_foreign(struct domain *d, unsigned long gfn, p2m_type_t *t)
    7.50  /* Read another domain's p2m entries */
    7.51  {
    7.52      mfn_t mfn;
    7.53 -    paddr_t addr = ((paddr_t)gpfn) << PAGE_SHIFT;
    7.54 +    paddr_t addr = ((paddr_t)gfn) << PAGE_SHIFT;
    7.55      l2_pgentry_t *l2e;
    7.56      l1_pgentry_t *l1e;
    7.57  
    7.58      ASSERT(paging_mode_translate(d));
    7.59 +
    7.60 +    /* XXX This is for compatibility with the old model, where anything not 
    7.61 +     * XXX marked as RAM was considered to be emulated MMIO space.
    7.62 +     * XXX Once we start explicitly registering MMIO regions in the p2m 
    7.63 +     * XXX we will return p2m_invalid for unmapped gfns */
    7.64 +    *t = p2m_mmio_dm;
    7.65 +
    7.66      mfn = pagetable_get_mfn(d->arch.phys_table);
    7.67  
    7.68 -
    7.69 -    if ( gpfn > d->arch.p2m.max_mapped_pfn )
    7.70 +    if ( gfn > d->arch.p2m.max_mapped_pfn )
    7.71          /* This pfn is higher than the highest the p2m map currently holds */
    7.72          return _mfn(INVALID_MFN);
    7.73  
    7.74 @@ -428,9 +459,11 @@ gfn_to_mfn_foreign(struct domain *d, uns
    7.75          return _mfn(INVALID_MFN);
    7.76      }
    7.77      mfn = _mfn(l1e_get_pfn(*l1e));
    7.78 +    *t = p2m_flags_to_type(l1e_get_flags(*l1e));
    7.79      unmap_domain_page(l1e);
    7.80  
    7.81 -    return mfn;
    7.82 +    ASSERT(mfn_valid(mfn) || !p2m_is_ram(*t));
    7.83 +    return (p2m_is_valid(*t)) ? mfn : _mfn(INVALID_MFN);
    7.84  }
    7.85  
    7.86  #if P2M_AUDIT
    7.87 @@ -630,10 +663,7 @@ p2m_remove_page(struct domain *d, unsign
    7.88          return;
    7.89      P2M_DEBUG("removing gfn=%#lx mfn=%#lx\n", gfn, mfn);
    7.90  
    7.91 -    ASSERT(mfn_x(gfn_to_mfn(d, gfn)) == mfn);
    7.92 -    //ASSERT(mfn_to_gfn(d, mfn) == gfn);
    7.93 -
    7.94 -    set_p2m_entry(d, gfn, _mfn(INVALID_MFN), __PAGE_HYPERVISOR|_PAGE_USER);
    7.95 +    set_p2m_entry(d, gfn, _mfn(INVALID_MFN), 0);
    7.96      set_gpfn_from_mfn(mfn, INVALID_M2P_ENTRY);
    7.97  }
    7.98  
    7.99 @@ -653,6 +683,7 @@ guest_physmap_add_page(struct domain *d,
   7.100                         unsigned long mfn)
   7.101  {
   7.102      unsigned long ogfn;
   7.103 +    p2m_type_t ot;
   7.104      mfn_t omfn;
   7.105  
   7.106      if ( !paging_mode_translate(d) )
   7.107 @@ -663,10 +694,10 @@ guest_physmap_add_page(struct domain *d,
   7.108  
   7.109      P2M_DEBUG("adding gfn=%#lx mfn=%#lx\n", gfn, mfn);
   7.110  
   7.111 -    omfn = gfn_to_mfn(d, gfn);
   7.112 -    if ( mfn_valid(omfn) )
   7.113 +    omfn = gfn_to_mfn(d, gfn, &ot);
   7.114 +    if ( p2m_is_ram(ot) )
   7.115      {
   7.116 -        set_p2m_entry(d, gfn, _mfn(INVALID_MFN), __PAGE_HYPERVISOR|_PAGE_USER);
   7.117 +        ASSERT(mfn_valid(omfn));
   7.118          set_gpfn_from_mfn(mfn_x(omfn), INVALID_M2P_ENTRY);
   7.119      }
   7.120  
   7.121 @@ -683,8 +714,10 @@ guest_physmap_add_page(struct domain *d,
   7.122          /* This machine frame is already mapped at another physical address */
   7.123          P2M_DEBUG("aliased! mfn=%#lx, old gfn=%#lx, new gfn=%#lx\n",
   7.124                    mfn, ogfn, gfn);
   7.125 -        if ( mfn_valid(omfn = gfn_to_mfn(d, ogfn)) )
   7.126 +        omfn = gfn_to_mfn(d, ogfn, &ot);
   7.127 +        if ( p2m_is_ram(ot) )
   7.128          {
   7.129 +            ASSERT(mfn_valid(omfn));
   7.130              P2M_DEBUG("old gfn=%#lx -> mfn %#lx\n",
   7.131                        ogfn , mfn_x(omfn));
   7.132              if ( mfn_x(omfn) == mfn )
   7.133 @@ -692,21 +725,29 @@ guest_physmap_add_page(struct domain *d,
   7.134          }
   7.135      }
   7.136  
   7.137 -    set_p2m_entry(d, gfn, _mfn(mfn), __PAGE_HYPERVISOR|_PAGE_USER);
   7.138 -    set_gpfn_from_mfn(mfn, gfn);
   7.139 +    if ( mfn_valid(_mfn(mfn)) ) 
   7.140 +    {
   7.141 +        set_p2m_entry(d, gfn, _mfn(mfn),
   7.142 +                  p2m_type_to_flags(p2m_ram_rw)|__PAGE_HYPERVISOR|_PAGE_USER);
   7.143 +        set_gpfn_from_mfn(mfn, gfn);
   7.144 +    }
   7.145 +    else
   7.146 +    {
   7.147 +        gdprintk(XENLOG_WARNING, "Adding bad mfn to p2m map (%#lx -> %#lx)\n",
   7.148 +                 gfn, mfn);
   7.149 +        set_p2m_entry(d, gfn, _mfn(INVALID_MFN), 0);
   7.150 +    }
   7.151  
   7.152      audit_p2m(d);
   7.153      p2m_unlock(d);
   7.154  }
   7.155  
   7.156 -/* This function goes through P2M table and modify l1e flags of all pages. Note
   7.157 - * that physical base address of l1e is intact. This function can be used for
   7.158 - * special purpose, such as marking physical memory as NOT WRITABLE for
   7.159 - * tracking dirty pages during live migration.
   7.160 - */
   7.161 -void p2m_set_flags_global(struct domain *d, u32 l1e_flags)
   7.162 +/* Walk the whole p2m table, changing any entries of the old type
   7.163 + * to the new type.  This is used in hardware-assisted paging to 
   7.164 + * quickly enable or diable log-dirty tracking */
   7.165 +void p2m_change_type_global(struct domain *d, p2m_type_t ot, p2m_type_t nt)
   7.166  {
   7.167 -    unsigned long mfn, gfn;
   7.168 +    unsigned long mfn, gfn, flags;
   7.169      l1_pgentry_t l1e_content;
   7.170      l1_pgentry_t *l1e;
   7.171      l2_pgentry_t *l2e;
   7.172 @@ -769,12 +810,14 @@ void p2m_set_flags_global(struct domain 
   7.173  
   7.174                  for ( i1 = 0; i1 < L1_PAGETABLE_ENTRIES; i1++, gfn++ )
   7.175                  {
   7.176 -                    if ( !(l1e_get_flags(l1e[i1]) & _PAGE_PRESENT) )
   7.177 +                    flags = l1e_get_flags(l1e[i1]);
   7.178 +                    if ( p2m_flags_to_type(flags) != ot )
   7.179                          continue;
   7.180                      mfn = l1e_get_pfn(l1e[i1]);
   7.181                      gfn = get_gpfn_from_mfn(mfn);
   7.182 -                    /* create a new 1le entry using l1e_flags */
   7.183 -                    l1e_content = l1e_from_pfn(mfn, l1e_flags);
   7.184 +                    /* create a new 1le entry with the new type */
   7.185 +                    flags = p2m_flags_to_type(nt);
   7.186 +                    l1e_content = l1e_from_pfn(mfn, flags);
   7.187                      paging_write_p2m_entry(d, gfn, &l1e[i1],
   7.188                                             l1mfn, l1e_content, 1);
   7.189                  }
   7.190 @@ -800,24 +843,23 @@ void p2m_set_flags_global(struct domain 
   7.191      p2m_unlock(d);
   7.192  }
   7.193  
   7.194 -/* This function traces through P2M table and modifies l1e flags of a specific
   7.195 - * gpa.
   7.196 - */
   7.197 -int p2m_set_flags(struct domain *d, paddr_t gpa, u32 l1e_flags)
   7.198 +/* Modify the p2m type of a single gfn from ot to nt, returning the 
   7.199 + * entry's previous type */
   7.200 +p2m_type_t p2m_change_type(struct domain *d, unsigned long gfn, 
   7.201 +                           p2m_type_t ot, p2m_type_t nt)
   7.202  {
   7.203 -    unsigned long gfn;
   7.204 +    p2m_type_t pt;
   7.205      mfn_t mfn;
   7.206  
   7.207      p2m_lock(d);
   7.208  
   7.209 -    gfn = gpa >> PAGE_SHIFT;
   7.210 -    mfn = gfn_to_mfn(d, gfn);
   7.211 -    if ( mfn_valid(mfn) )
   7.212 -        set_p2m_entry(d, gfn, mfn, l1e_flags);
   7.213 +    mfn = gfn_to_mfn(d, gfn, &pt);
   7.214 +    if ( pt == ot )
   7.215 +        set_p2m_entry(d, gfn, mfn, p2m_type_to_flags(nt));
   7.216  
   7.217      p2m_unlock(d);
   7.218  
   7.219 -    return 1;
   7.220 +    return pt;
   7.221  }
   7.222  
   7.223  /*
     8.1 --- a/xen/arch/x86/mm/shadow/common.c	Mon Sep 10 13:59:46 2007 +0100
     8.2 +++ b/xen/arch/x86/mm/shadow/common.c	Mon Sep 10 14:42:30 2007 +0100
     8.3 @@ -2764,19 +2764,23 @@ shadow_write_p2m_entry(struct vcpu *v, u
     8.4                         l1_pgentry_t new, unsigned int level)
     8.5  {
     8.6      struct domain *d = v->domain;
     8.7 -    mfn_t mfn;
     8.8      
     8.9      shadow_lock(d);
    8.10  
    8.11 -    /* handle physmap_add and physmap_remove */
    8.12 -    mfn = gfn_to_mfn(d, gfn);
    8.13 -    if ( v != NULL && level == 1 && mfn_valid(mfn) ) {
    8.14 -        sh_remove_all_shadows_and_parents(v, mfn);
    8.15 -        if ( sh_remove_all_mappings(v, mfn) )
    8.16 -            flush_tlb_mask(d->domain_dirty_cpumask);    
    8.17 +    /* If we're removing an MFN from the p2m, remove it from the shadows too */
    8.18 +    if ( level == 1 )
    8.19 +    {
    8.20 +        mfn_t mfn = _mfn(l1e_get_pfn(*p));
    8.21 +        p2m_type_t p2mt = p2m_flags_to_type(l1e_get_flags(*p));
    8.22 +        if ( p2m_is_valid(p2mt) && mfn_valid(mfn) ) 
    8.23 +        {
    8.24 +            sh_remove_all_shadows_and_parents(v, mfn);
    8.25 +            if ( sh_remove_all_mappings(v, mfn) )
    8.26 +                flush_tlb_mask(d->domain_dirty_cpumask);    
    8.27 +        }
    8.28      }
    8.29 -    
    8.30 -    /* update the entry with new content */
    8.31 +
    8.32 +    /* Update the entry with new content */
    8.33      safe_write_pte(p, new);
    8.34  
    8.35      /* install P2M in monitors for PAE Xen */
     9.1 --- a/xen/arch/x86/mm/shadow/multi.c	Mon Sep 10 13:59:46 2007 +0100
     9.2 +++ b/xen/arch/x86/mm/shadow/multi.c	Mon Sep 10 14:42:30 2007 +0100
     9.3 @@ -209,6 +209,7 @@ static inline int
     9.4  guest_walk_tables(struct vcpu *v, unsigned long va, walk_t *gw, int guest_op)
     9.5  {
     9.6      struct domain *d = v->domain;
     9.7 +    p2m_type_t p2mt;
     9.8      ASSERT(!guest_op || shadow_locked_by_me(d));
     9.9      
    9.10      perfc_incr(shadow_guest_walk);
    9.11 @@ -223,8 +224,9 @@ guest_walk_tables(struct vcpu *v, unsign
    9.12          + guest_l4_table_offset(va);
    9.13      /* Walk down to the l3e */
    9.14      if ( !(guest_l4e_get_flags(*gw->l4e) & _PAGE_PRESENT) ) return 0;
    9.15 -    gw->l3mfn = gfn_to_mfn(d, guest_l4e_get_gfn(*gw->l4e));
    9.16 -    if ( !mfn_valid(gw->l3mfn) ) return 1;
    9.17 +    gw->l3mfn = gfn_to_mfn(d, guest_l4e_get_gfn(*gw->l4e), &p2mt);
    9.18 +    if ( !p2m_is_ram(p2mt) ) return 1;
    9.19 +    ASSERT(mfn_valid(gw->l3mfn));
    9.20      /* This mfn is a pagetable: make sure the guest can't write to it. */
    9.21      if ( guest_op && sh_remove_write_access(v, gw->l3mfn, 3, va) != 0 )
    9.22          flush_tlb_mask(d->domain_dirty_cpumask); 
    9.23 @@ -236,8 +238,9 @@ guest_walk_tables(struct vcpu *v, unsign
    9.24  #endif /* PAE or 64... */
    9.25      /* Walk down to the l2e */
    9.26      if ( !(guest_l3e_get_flags(*gw->l3e) & _PAGE_PRESENT) ) return 0;
    9.27 -    gw->l2mfn = gfn_to_mfn(d, guest_l3e_get_gfn(*gw->l3e));
    9.28 -    if ( !mfn_valid(gw->l2mfn) ) return 1;
    9.29 +    gw->l2mfn = gfn_to_mfn(d, guest_l3e_get_gfn(*gw->l3e), &p2mt);
    9.30 +    if ( !p2m_is_ram(p2mt) ) return 1;
    9.31 +    ASSERT(mfn_valid(gw->l2mfn));
    9.32      /* This mfn is a pagetable: make sure the guest can't write to it. */
    9.33      if ( guest_op && sh_remove_write_access(v, gw->l2mfn, 2, va) != 0 )
    9.34          flush_tlb_mask(d->domain_dirty_cpumask); 
    9.35 @@ -278,8 +281,9 @@ guest_walk_tables(struct vcpu *v, unsign
    9.36      else 
    9.37      {
    9.38          /* Not a superpage: carry on and find the l1e. */
    9.39 -        gw->l1mfn = gfn_to_mfn(d, guest_l2e_get_gfn(*gw->l2e));
    9.40 -        if ( !mfn_valid(gw->l1mfn) ) return 1;
    9.41 +        gw->l1mfn = gfn_to_mfn(d, guest_l2e_get_gfn(*gw->l2e), &p2mt);
    9.42 +        if ( !p2m_is_ram(p2mt) ) return 1;
    9.43 +        ASSERT(mfn_valid(gw->l1mfn));
    9.44          /* This mfn is a pagetable: make sure the guest can't write to it. */
    9.45          if ( guest_op 
    9.46               && sh_remove_write_access(v, gw->l1mfn, 1, va) != 0 )
    9.47 @@ -626,7 +630,7 @@ static always_inline void
    9.48                void *shadow_entry_ptr,
    9.49                int level,
    9.50                fetch_type_t ft, 
    9.51 -              int mmio)
    9.52 +              p2m_type_t p2mt)
    9.53  {
    9.54      guest_l1e_t *gp = guest_entry_ptr;
    9.55      shadow_l1e_t *sp = shadow_entry_ptr;
    9.56 @@ -637,6 +641,13 @@ static always_inline void
    9.57      /* We don't shadow PAE l3s */
    9.58      ASSERT(GUEST_PAGING_LEVELS > 3 || level != 3);
    9.59  
    9.60 +    /* Check there's something for the shadows to map to */
    9.61 +    if ( !p2m_is_valid(p2mt) )
    9.62 +    {
    9.63 +        *sp = shadow_l1e_empty();
    9.64 +        goto done;
    9.65 +    }
    9.66 +
    9.67      if ( mfn_valid(guest_table_mfn) )
    9.68          /* Handle A and D bit propagation into the guest */
    9.69          gflags = guest_set_ad_bits(v, guest_table_mfn, gp, level, ft);
    9.70 @@ -658,19 +669,22 @@ static always_inline void
    9.71          goto done;
    9.72      }
    9.73  
    9.74 -    if ( level == 1 && mmio )
    9.75 +    if ( level == 1 && p2mt == p2m_mmio_dm )
    9.76      {
    9.77 -        /* Guest l1e maps MMIO space */
    9.78 +        /* Guest l1e maps emulated MMIO space */
    9.79          *sp = sh_l1e_mmio(guest_l1e_get_gfn(*gp), gflags);
    9.80          if ( !d->arch.paging.shadow.has_fast_mmio_entries )
    9.81              d->arch.paging.shadow.has_fast_mmio_entries = 1;
    9.82          goto done;
    9.83      }
    9.84  
    9.85 -    // Must have a valid target_mfn, unless this is a prefetch.  In the
    9.86 +    // Must have a valid target_mfn unless this is a prefetch.  In the
    9.87      // case of a prefetch, an invalid mfn means that we can not usefully
    9.88      // shadow anything, and so we return early.
    9.89      //
    9.90 +    /* N.B. For pass-through MMIO, either this test needs to be relaxed,
    9.91 +     * and shadow_set_l1e() trained to handle non-valid MFNs (ugh), or the
    9.92 +     * MMIO areas need to be added to the frame-table to make them "valid". */
    9.93      if ( !mfn_valid(target_mfn) )
    9.94      {
    9.95          ASSERT((ft == ft_prefetch));
    9.96 @@ -718,6 +732,8 @@ static always_inline void
    9.97      // Only allow the guest write access to a page a) on a demand fault,
    9.98      // or b) if the page is already marked as dirty.
    9.99      //
   9.100 +    // (We handle log-dirty entirely inside the shadow code, without using the 
   9.101 +    // p2m_ram_logdirty p2m type: only HAP uses that.)
   9.102      if ( unlikely((level == 1) && shadow_mode_log_dirty(d)) )
   9.103      {
   9.104          if ( ft & FETCH_TYPE_WRITE ) 
   9.105 @@ -725,6 +741,10 @@ static always_inline void
   9.106          else if ( !sh_mfn_is_dirty(d, target_mfn) )
   9.107              sflags &= ~_PAGE_RW;
   9.108      }
   9.109 +
   9.110 +    /* Read-only memory */
   9.111 +    if ( p2mt == p2m_ram_ro ) 
   9.112 +        sflags &= ~_PAGE_RW;
   9.113      
   9.114      // protect guest page tables
   9.115      //
   9.116 @@ -754,7 +774,12 @@ static always_inline void
   9.117          sflags |= _PAGE_USER;
   9.118      }
   9.119  
   9.120 +    /* MMIO addresses should never be cached */
   9.121 +    if ( p2m_is_mmio(p2mt) )
   9.122 +        sflags |= _PAGE_PCD;
   9.123 +
   9.124      *sp = shadow_l1e_from_mfn(target_mfn, sflags);
   9.125 +
   9.126   done:
   9.127      SHADOW_DEBUG(PROPAGATE,
   9.128                   "%s level %u guest %" SH_PRI_gpte " shadow %" SH_PRI_pte "\n",
   9.129 @@ -775,7 +800,7 @@ l4e_propagate_from_guest(struct vcpu *v,
   9.130                           shadow_l4e_t *sl4e,
   9.131                           fetch_type_t ft)
   9.132  {
   9.133 -    _sh_propagate(v, gl4e, gl4mfn, sl3mfn, sl4e, 4, ft, 0);
   9.134 +    _sh_propagate(v, gl4e, gl4mfn, sl3mfn, sl4e, 4, ft, p2m_ram_rw);
   9.135  }
   9.136  
   9.137  static void
   9.138 @@ -786,7 +811,7 @@ l3e_propagate_from_guest(struct vcpu *v,
   9.139                           shadow_l3e_t *sl3e,
   9.140                           fetch_type_t ft)
   9.141  {
   9.142 -    _sh_propagate(v, gl3e, gl3mfn, sl2mfn, sl3e, 3, ft, 0);
   9.143 +    _sh_propagate(v, gl3e, gl3mfn, sl2mfn, sl3e, 3, ft, p2m_ram_rw);
   9.144  }
   9.145  #endif // GUEST_PAGING_LEVELS >= 4
   9.146  
   9.147 @@ -798,7 +823,7 @@ l2e_propagate_from_guest(struct vcpu *v,
   9.148                           shadow_l2e_t *sl2e,
   9.149                           fetch_type_t ft)
   9.150  {
   9.151 -    _sh_propagate(v, gl2e, gl2mfn, sl1mfn, sl2e, 2, ft, 0);
   9.152 +    _sh_propagate(v, gl2e, gl2mfn, sl1mfn, sl2e, 2, ft, p2m_ram_rw);
   9.153  }
   9.154  
   9.155  static void
   9.156 @@ -808,9 +833,9 @@ l1e_propagate_from_guest(struct vcpu *v,
   9.157                           mfn_t gmfn, 
   9.158                           shadow_l1e_t *sl1e,
   9.159                           fetch_type_t ft, 
   9.160 -                         int mmio)
   9.161 +                         p2m_type_t p2mt)
   9.162  {
   9.163 -    _sh_propagate(v, gl1e, gl1mfn, gmfn, sl1e, 1, ft, mmio);
   9.164 +    _sh_propagate(v, gl1e, gl1mfn, gmfn, sl1e, 1, ft, p2mt);
   9.165  }
   9.166  
   9.167  
   9.168 @@ -2196,6 +2221,7 @@ static int validate_gl4e(struct vcpu *v,
   9.169      shadow_l4e_t *sl4p = se;
   9.170      mfn_t sl3mfn = _mfn(INVALID_MFN);
   9.171      struct domain *d = v->domain;
   9.172 +    p2m_type_t p2mt;
   9.173      int result = 0;
   9.174  
   9.175      perfc_incr(shadow_validate_gl4e_calls);
   9.176 @@ -2203,8 +2229,8 @@ static int validate_gl4e(struct vcpu *v,
   9.177      if ( guest_l4e_get_flags(*new_gl4e) & _PAGE_PRESENT )
   9.178      {
   9.179          gfn_t gl3gfn = guest_l4e_get_gfn(*new_gl4e);
   9.180 -        mfn_t gl3mfn = gfn_to_mfn(d, gl3gfn);
   9.181 -        if ( mfn_valid(gl3mfn) )
   9.182 +        mfn_t gl3mfn = gfn_to_mfn(d, gl3gfn, &p2mt);
   9.183 +        if ( p2m_is_ram(p2mt) )
   9.184              sl3mfn = get_shadow_status(v, gl3mfn, SH_type_l3_shadow);
   9.185          else
   9.186              result |= SHADOW_SET_ERROR;
   9.187 @@ -2248,6 +2274,7 @@ static int validate_gl3e(struct vcpu *v,
   9.188      guest_l3e_t *new_gl3e = new_ge;
   9.189      shadow_l3e_t *sl3p = se;
   9.190      mfn_t sl2mfn = _mfn(INVALID_MFN);
   9.191 +    p2m_type_t p2mt;
   9.192      int result = 0;
   9.193  
   9.194      perfc_incr(shadow_validate_gl3e_calls);
   9.195 @@ -2255,8 +2282,8 @@ static int validate_gl3e(struct vcpu *v,
   9.196      if ( guest_l3e_get_flags(*new_gl3e) & _PAGE_PRESENT )
   9.197      {
   9.198          gfn_t gl2gfn = guest_l3e_get_gfn(*new_gl3e);
   9.199 -        mfn_t gl2mfn = gfn_to_mfn(v->domain, gl2gfn);
   9.200 -        if ( mfn_valid(gl2mfn) )
   9.201 +        mfn_t gl2mfn = gfn_to_mfn(v->domain, gl2gfn, &p2mt);
   9.202 +        if ( p2m_is_ram(p2mt) )
   9.203              sl2mfn = get_shadow_status(v, gl2mfn, SH_type_l2_shadow);
   9.204          else
   9.205              result |= SHADOW_SET_ERROR;
   9.206 @@ -2275,6 +2302,7 @@ static int validate_gl2e(struct vcpu *v,
   9.207      guest_l2e_t *new_gl2e = new_ge;
   9.208      shadow_l2e_t *sl2p = se;
   9.209      mfn_t sl1mfn = _mfn(INVALID_MFN);
   9.210 +    p2m_type_t p2mt;
   9.211      int result = 0;
   9.212  
   9.213      perfc_incr(shadow_validate_gl2e_calls);
   9.214 @@ -2299,8 +2327,8 @@ static int validate_gl2e(struct vcpu *v,
   9.215          }
   9.216          else
   9.217          {
   9.218 -            mfn_t gl1mfn = gfn_to_mfn(v->domain, gl1gfn);
   9.219 -            if ( mfn_valid(gl1mfn) )
   9.220 +            mfn_t gl1mfn = gfn_to_mfn(v->domain, gl1gfn, &p2mt);
   9.221 +            if ( p2m_is_ram(p2mt) )
   9.222                  sl1mfn = get_shadow_status(v, gl1mfn, SH_type_l1_shadow);
   9.223              else
   9.224                  result |= SHADOW_SET_ERROR;
   9.225 @@ -2361,16 +2389,16 @@ static int validate_gl1e(struct vcpu *v,
   9.226      shadow_l1e_t *sl1p = se;
   9.227      gfn_t gfn;
   9.228      mfn_t gmfn;
   9.229 -    int result = 0, mmio;
   9.230 +    p2m_type_t p2mt;
   9.231 +    int result = 0;
   9.232  
   9.233      perfc_incr(shadow_validate_gl1e_calls);
   9.234  
   9.235      gfn = guest_l1e_get_gfn(*new_gl1e);
   9.236 -    gmfn = gfn_to_mfn(v->domain, gfn);
   9.237 -
   9.238 -    mmio = (is_hvm_vcpu(v) && mmio_space(gfn_to_paddr(gfn)));
   9.239 +    gmfn = gfn_to_mfn(v->domain, gfn, &p2mt);
   9.240 +
   9.241      l1e_propagate_from_guest(v, new_gl1e, _mfn(INVALID_MFN), gmfn, &new_sl1e, 
   9.242 -                             ft_prefetch, mmio);
   9.243 +                             ft_prefetch, p2mt);
   9.244      
   9.245      result |= shadow_set_l1e(v, sl1p, new_sl1e, sl1mfn);
   9.246      return result;
   9.247 @@ -2554,12 +2582,13 @@ static inline void reset_early_unshadow(
   9.248  static void sh_prefetch(struct vcpu *v, walk_t *gw, 
   9.249                          shadow_l1e_t *ptr_sl1e, mfn_t sl1mfn)
   9.250  {
   9.251 -    int i, dist, mmio;
   9.252 +    int i, dist;
   9.253      gfn_t gfn;
   9.254      mfn_t gmfn;
   9.255      guest_l1e_t gl1e;
   9.256      shadow_l1e_t sl1e;
   9.257      u32 gflags;
   9.258 +    p2m_type_t p2mt;
   9.259  
   9.260      /* Prefetch no further than the end of the _shadow_ l1 MFN */
   9.261      dist = (PAGE_SIZE - ((unsigned long)ptr_sl1e & ~PAGE_MASK)) / sizeof sl1e;
   9.262 @@ -2597,14 +2626,13 @@ static void sh_prefetch(struct vcpu *v, 
   9.263  
   9.264          /* Look at the gfn that the l1e is pointing at */
   9.265          gfn = guest_l1e_get_gfn(gl1e);
   9.266 -        gmfn = gfn_to_mfn(v->domain, gfn);
   9.267 -        mmio = ( is_hvm_vcpu(v) && mmio_space(gfn_to_paddr(gfn)) );
   9.268 +        gmfn = gfn_to_mfn(v->domain, gfn, &p2mt);
   9.269  
   9.270          /* Propagate the entry.  Safe to use a pointer to our local 
   9.271           * gl1e, since this is not a demand-fetch so there will be no 
   9.272           * write-back to the guest. */
   9.273          l1e_propagate_from_guest(v, &gl1e, _mfn(INVALID_MFN),
   9.274 -                                 gmfn, &sl1e, ft_prefetch, mmio);
   9.275 +                                 gmfn, &sl1e, ft_prefetch, p2mt);
   9.276          (void) shadow_set_l1e(v, ptr_sl1e + i, sl1e, sl1mfn);
   9.277      }
   9.278  }
   9.279 @@ -2633,8 +2661,9 @@ static int sh_page_fault(struct vcpu *v,
   9.280      paddr_t gpa;
   9.281      struct sh_emulate_ctxt emul_ctxt;
   9.282      struct x86_emulate_ops *emul_ops;
   9.283 -    int r, mmio;
   9.284 +    int r;
   9.285      fetch_type_t ft = 0;
   9.286 +    p2m_type_t p2mt;
   9.287  
   9.288      SHADOW_PRINTK("d:v=%u:%u va=%#lx err=%u\n",
   9.289                     v->domain->domain_id, v->vcpu_id, va, regs->error_code);
   9.290 @@ -2787,10 +2816,9 @@ static int sh_page_fault(struct vcpu *v,
   9.291  
   9.292      /* What mfn is the guest trying to access? */
   9.293      gfn = guest_l1e_get_gfn(gw.eff_l1e);
   9.294 -    gmfn = gfn_to_mfn(d, gfn);
   9.295 -    mmio = (is_hvm_domain(d) && mmio_space(gfn_to_paddr(gfn)));
   9.296 -
   9.297 -    if ( !mmio && !mfn_valid(gmfn) )
   9.298 +    gmfn = gfn_to_mfn(d, gfn, &p2mt);
   9.299 +
   9.300 +    if ( !p2m_is_valid(p2mt) || (!p2m_is_mmio(p2mt) && !mfn_valid(gmfn)) )
   9.301      {
   9.302          perfc_incr(shadow_fault_bail_bad_gfn);
   9.303          SHADOW_PRINTK("BAD gfn=%"SH_PRI_gfn" gmfn=%"PRI_mfn"\n", 
   9.304 @@ -2821,7 +2849,7 @@ static int sh_page_fault(struct vcpu *v,
   9.305  
   9.306      /* Calculate the shadow entry and write it */
   9.307      l1e_propagate_from_guest(v, (gw.l1e) ? gw.l1e : &gw.eff_l1e, gw.l1mfn, 
   9.308 -                             gmfn, &sl1e, ft, mmio);
   9.309 +                             gmfn, &sl1e, ft, p2mt);
   9.310      r = shadow_set_l1e(v, ptr_sl1e, sl1e, sl1mfn);
   9.311  
   9.312  #if SHADOW_OPTIMIZATIONS & SHOPT_PREFETCH
   9.313 @@ -2844,7 +2872,10 @@ static int sh_page_fault(struct vcpu *v,
   9.314          }
   9.315      }
   9.316  
   9.317 -    if ( mmio ) 
   9.318 +    /* Need to hand off device-model MMIO and writes to read-only
   9.319 +     * memory to the device model */
   9.320 +    if ( p2mt == p2m_mmio_dm 
   9.321 +         || (p2mt == p2m_ram_ro && ft == ft_demand_write) ) 
   9.322      {
   9.323          gpa = guest_walk_to_gpa(&gw);
   9.324          goto mmio;
   9.325 @@ -3598,6 +3629,7 @@ sh_update_cr3(struct vcpu *v, int do_loc
   9.326          int flush = 0;
   9.327          gfn_t gl2gfn;
   9.328          mfn_t gl2mfn;
   9.329 +        p2m_type_t p2mt;
   9.330          guest_l3e_t *gl3e = (guest_l3e_t*)&v->arch.paging.shadow.gl3e;
   9.331          /* First, make all four entries read-only. */
   9.332          for ( i = 0; i < 4; i++ )
   9.333 @@ -3605,8 +3637,9 @@ sh_update_cr3(struct vcpu *v, int do_loc
   9.334              if ( guest_l3e_get_flags(gl3e[i]) & _PAGE_PRESENT )
   9.335              {
   9.336                  gl2gfn = guest_l3e_get_gfn(gl3e[i]);
   9.337 -                gl2mfn = gfn_to_mfn(d, gl2gfn);
   9.338 -                flush |= sh_remove_write_access(v, gl2mfn, 2, 0); 
   9.339 +                gl2mfn = gfn_to_mfn(d, gl2gfn, &p2mt);
   9.340 +                if ( p2m_is_ram(p2mt) )
   9.341 +                    flush |= sh_remove_write_access(v, gl2mfn, 2, 0);
   9.342              }
   9.343          }
   9.344          if ( flush ) 
   9.345 @@ -3617,13 +3650,15 @@ sh_update_cr3(struct vcpu *v, int do_loc
   9.346              if ( guest_l3e_get_flags(gl3e[i]) & _PAGE_PRESENT )
   9.347              {
   9.348                  gl2gfn = guest_l3e_get_gfn(gl3e[i]);
   9.349 -                gl2mfn = gfn_to_mfn(d, gl2gfn);
   9.350 -                sh_set_toplevel_shadow(v, i, gl2mfn, (i == 3) 
   9.351 -                                       ? SH_type_l2h_shadow 
   9.352 -                                       : SH_type_l2_shadow);
   9.353 +                gl2mfn = gfn_to_mfn(d, gl2gfn, &p2mt);
   9.354 +                if ( p2m_is_ram(p2mt) )
   9.355 +                    sh_set_toplevel_shadow(v, i, gl2mfn, (i == 3) 
   9.356 +                                           ? SH_type_l2h_shadow 
   9.357 +                                           : SH_type_l2_shadow);
   9.358 +                else
   9.359 +                    sh_set_toplevel_shadow(v, i, _mfn(INVALID_MFN), 0); 
   9.360              }
   9.361              else
   9.362 -                /* The guest is not present: clear out the shadow. */
   9.363                  sh_set_toplevel_shadow(v, i, _mfn(INVALID_MFN), 0); 
   9.364          }
   9.365      }
   9.366 @@ -3932,6 +3967,7 @@ static inline void * emulate_map_dest(st
   9.367      u32 flags, errcode;
   9.368      gfn_t gfn;
   9.369      mfn_t mfn;
   9.370 +    p2m_type_t p2mt;
   9.371  
   9.372      /* We don't emulate user-mode writes to page tables */
   9.373      if ( ring_3(sh_ctxt->ctxt.regs) ) 
   9.374 @@ -3971,7 +4007,6 @@ static inline void * emulate_map_dest(st
   9.375          }
   9.376      }
   9.377  #endif
   9.378 -    mfn = gfn_to_mfn(v->domain, gfn);
   9.379  
   9.380      errcode = PFEC_write_access;
   9.381      if ( !(flags & _PAGE_PRESENT) ) 
   9.382 @@ -3981,8 +4016,10 @@ static inline void * emulate_map_dest(st
   9.383      if ( !(flags & _PAGE_RW) ) 
   9.384          goto page_fault;
   9.385  
   9.386 -    if ( mfn_valid(mfn) )
   9.387 +    mfn = gfn_to_mfn(v->domain, gfn, &p2mt);
   9.388 +    if ( p2m_is_ram(p2mt) )
   9.389      {
   9.390 +        ASSERT(mfn_valid(mfn));
   9.391          *mfnp = mfn;
   9.392          v->arch.paging.last_write_was_pt = !!sh_mfn_is_a_page_table(mfn);
   9.393          return sh_map_domain_page(mfn) + (vaddr & ~PAGE_MASK);
   9.394 @@ -4231,6 +4268,7 @@ audit_gfn_to_mfn(struct vcpu *v, gfn_t g
   9.395  /* Convert this gfn to an mfn in the manner appropriate for the
   9.396   * guest pagetable it's used in (gmfn) */ 
   9.397  {
   9.398 +    p2m_type_t p2mt;
   9.399      if ( !shadow_mode_translate(v->domain) )
   9.400          return _mfn(gfn_x(gfn));
   9.401      
   9.402 @@ -4238,7 +4276,7 @@ audit_gfn_to_mfn(struct vcpu *v, gfn_t g
   9.403           != PGT_writable_page ) 
   9.404          return _mfn(gfn_x(gfn)); /* This is a paging-disabled shadow */
   9.405      else 
   9.406 -        return gfn_to_mfn(v->domain, gfn);
   9.407 +        return gfn_to_mfn(v->domain, gfn, &p2mt);
   9.408  } 
   9.409  
   9.410  
    10.1 --- a/xen/arch/x86/mm/shadow/types.h	Mon Sep 10 13:59:46 2007 +0100
    10.2 +++ b/xen/arch/x86/mm/shadow/types.h	Mon Sep 10 14:42:30 2007 +0100
    10.3 @@ -414,7 +414,7 @@ gfn_to_paddr(gfn_t gfn)
    10.4  
    10.5  /* Override gfn_to_mfn to work with gfn_t */
    10.6  #undef gfn_to_mfn
    10.7 -#define gfn_to_mfn(d, g) _gfn_to_mfn((d), gfn_x(g))
    10.8 +#define gfn_to_mfn(d, g, t) _gfn_to_mfn((d), gfn_x(g), (t))
    10.9  
   10.10  
   10.11  /* Type used for recording a walk through guest pagetables.  It is
    11.1 --- a/xen/include/asm-x86/mm.h	Mon Sep 10 13:59:46 2007 +0100
    11.2 +++ b/xen/include/asm-x86/mm.h	Mon Sep 10 14:42:30 2007 +0100
    11.3 @@ -328,8 +328,6 @@ TYPE_SAFE(unsigned long,mfn);
    11.4        ? get_gpfn_from_mfn(mfn)                          \
    11.5        : (mfn) )
    11.6  
    11.7 -#define gmfn_to_mfn(_d, gpfn)  mfn_x(gfn_to_mfn(_d, gpfn))
    11.8 -
    11.9  #define INVALID_MFN             (~0UL)
   11.10  
   11.11  #ifdef CONFIG_COMPAT
    12.1 --- a/xen/include/asm-x86/p2m.h	Mon Sep 10 13:59:46 2007 +0100
    12.2 +++ b/xen/include/asm-x86/p2m.h	Mon Sep 10 14:42:30 2007 +0100
    12.3 @@ -4,7 +4,7 @@
    12.4   * physical-to-machine mappings for automatically-translated domains.
    12.5   *
    12.6   * Copyright (c) 2007 Advanced Micro Devices (Wei Huang)
    12.7 - * Parts of this code are Copyright (c) 2006 by XenSource Inc.
    12.8 + * Parts of this code are Copyright (c) 2006-2007 by XenSource Inc.
    12.9   * Parts of this code are Copyright (c) 2006 by Michael A Fetterman
   12.10   * Parts based on earlier work by Michael A Fetterman, Ian Pratt et al.
   12.11   *
   12.12 @@ -27,49 +27,141 @@
   12.13  #define _XEN_P2M_H
   12.14  
   12.15  
   12.16 -/* The phys_to_machine_mapping is the reversed mapping of MPT for full
   12.17 - * virtualization.  It is only used by shadow_mode_translate()==true
   12.18 - * guests, so we steal the address space that would have normally
   12.19 - * been used by the read-only MPT map.
   12.20 +/*
   12.21 + * The phys_to_machine_mapping maps guest physical frame numbers 
   12.22 + * to machine frame numbers.  It only exists for paging_mode_translate 
   12.23 + * guests. It is organised in page-table format, which:
   12.24 + *
   12.25 + * (1) allows us to use it directly as the second pagetable in hardware-
   12.26 + *     assisted paging and (hopefully) iommu support; and 
   12.27 + * (2) lets us map it directly into the guest vcpus' virtual address space 
   12.28 + *     as a linear pagetable, so we can read and write it easily.
   12.29 + *
   12.30 + * For (2) we steal the address space that would have normally been used
   12.31 + * by the read-only MPT map in a non-translated guest.  (For 
   12.32 + * paging_mode_external() guests this mapping is in the monitor table.)
   12.33   */
   12.34  #define phys_to_machine_mapping ((l1_pgentry_t *)RO_MPT_VIRT_START)
   12.35  
   12.36 +/*
   12.37 + * The upper levels of the p2m pagetable always contain full rights; all 
   12.38 + * variation in the access control bits is made in the level-1 PTEs.
   12.39 + * 
   12.40 + * In addition to the phys-to-machine translation, each p2m PTE contains
   12.41 + * *type* information about the gfn it translates, helping Xen to decide
   12.42 + * on the correct course of action when handling a page-fault to that
   12.43 + * guest frame.  We store the type in the "available" bits of the PTEs
   12.44 + * in the table, which gives us 8 possible types on 32-bit systems.
   12.45 + * Further expansions of the type system will only be supported on
   12.46 + * 64-bit Xen.
   12.47 + */
   12.48 +typedef enum {
   12.49 +    p2m_invalid = 0,            /* Nothing mapped here */
   12.50 +    p2m_ram_rw = 1,             /* Normal read/write guest RAM */
   12.51 +    p2m_ram_logdirty = 2,       /* Temporarily read-only for log-dirty */
   12.52 +    p2m_ram_ro = 3,             /* Read-only; writes go to the device model */
   12.53 +    p2m_mmio_dm = 4,            /* Reads and write go to the device model */
   12.54 +    p2m_mmio_direct = 5,        /* Read/write mapping of genuine MMIO area */
   12.55 +} p2m_type_t;
   12.56  
   12.57 -/* Read the current domain's P2M table. */
   12.58 -static inline mfn_t gfn_to_mfn_current(unsigned long gfn)
   12.59 -{
   12.60 -    l1_pgentry_t l1e = l1e_empty();
   12.61 -    int ret;
   12.62 +/* We use bitmaps and maks to handle groups of types */
   12.63 +#define p2m_to_mask(_t) (1UL << (_t))
   12.64 +
   12.65 +/* RAM types, which map to real machine frames */
   12.66 +#define P2M_RAM_TYPES (p2m_to_mask(p2m_ram_rw)          \
   12.67 +                       | p2m_to_mask(p2m_ram_logdirty)  \
   12.68 +                       | p2m_to_mask(p2m_ram_ro))
   12.69  
   12.70 -    if ( gfn > current->domain->arch.p2m.max_mapped_pfn )
   12.71 -        return _mfn(INVALID_MFN);
   12.72 +/* MMIO types, which don't have to map to anything in the frametable */
   12.73 +#define P2M_MMIO_TYPES (p2m_to_mask(p2m_mmio_dm)        \
   12.74 +                        | p2m_to_mask(p2m_mmio_direct))
   12.75 +
   12.76 +/* Read-only types, which must have the _PAGE_RW bit clear in their PTEs */
   12.77 +#define P2M_RO_TYPES (p2m_to_mask(p2m_ram_logdirty)     \
   12.78 +                      | p2m_to_mask(p2m_ram_ro))
   12.79 +
   12.80 +/* Useful predicates */
   12.81 +#define p2m_is_ram(_t) (p2m_to_mask(_t) & P2M_RAM_TYPES)
   12.82 +#define p2m_is_mmio(_t) (p2m_to_mask(_t) & P2M_MMIO_TYPES)
   12.83 +#define p2m_is_readonly(_t) (p2m_to_mask(_t) & P2M_RO_TYPES)
   12.84 +#define p2m_is_valid(_t) (p2m_to_mask(_t) & (P2M_RAM_TYPES | P2M_MMIO_TYPES))
   12.85  
   12.86 -    /* Don't read off the end of the p2m table */
   12.87 -    ASSERT(gfn < (RO_MPT_VIRT_END - RO_MPT_VIRT_START) / sizeof(l1_pgentry_t));
   12.88 +/* Extract the type from the PTE flags that store it */
   12.89 +static inline p2m_type_t p2m_flags_to_type(unsigned long flags)
   12.90 +{
   12.91 +    /* Type is stored in the "available" bits, 9, 10 and 11 */
   12.92 +    return (flags >> 9) & 0x7;
   12.93 +}
   12.94 + 
   12.95 +/* Read the current domain's p2m table (through the linear mapping). */
   12.96 +static inline mfn_t gfn_to_mfn_current(unsigned long gfn, p2m_type_t *t)
   12.97 +{
   12.98 +    mfn_t mfn = _mfn(INVALID_MFN);
   12.99 +    p2m_type_t p2mt = p2m_mmio_dm;
  12.100 +    /* XXX This is for compatibility with the old model, where anything not 
  12.101 +     * XXX marked as RAM was considered to be emulated MMIO space.
  12.102 +     * XXX Once we start explicitly registering MMIO regions in the p2m 
  12.103 +     * XXX we will return p2m_invalid for unmapped gfns */
  12.104 +
  12.105 +    if ( gfn <= current->domain->arch.p2m.max_mapped_pfn )
  12.106 +    {
  12.107 +        l1_pgentry_t l1e = l1e_empty();
  12.108 +        int ret;
  12.109  
  12.110 -    ret = __copy_from_user(&l1e,
  12.111 -                           &phys_to_machine_mapping[gfn],
  12.112 -                           sizeof(l1e));
  12.113 +        ASSERT(gfn < (RO_MPT_VIRT_END - RO_MPT_VIRT_START) 
  12.114 +               / sizeof(l1_pgentry_t));
  12.115 +
  12.116 +        /* Need to __copy_from_user because the p2m is sparse and this
  12.117 +         * part might not exist */
  12.118 +        ret = __copy_from_user(&l1e,
  12.119 +                               &phys_to_machine_mapping[gfn],
  12.120 +                               sizeof(l1e));
  12.121  
  12.122 -    if ( (ret == 0) && (l1e_get_flags(l1e) & _PAGE_PRESENT) )
  12.123 -        return _mfn(l1e_get_pfn(l1e));
  12.124 +        if ( ret == 0 ) {
  12.125 +            p2mt = p2m_flags_to_type(l1e_get_flags(l1e));
  12.126 +            ASSERT(l1e_get_pfn(l1e) != INVALID_MFN || !p2m_is_ram(*t));
  12.127 +            if ( p2m_is_valid(p2mt) )
  12.128 +                mfn = _mfn(l1e_get_pfn(l1e));
  12.129 +            else 
  12.130 +                /* XXX see above */
  12.131 +                p2mt = p2m_mmio_dm;
  12.132 +        }
  12.133 +    }
  12.134  
  12.135 -    return _mfn(INVALID_MFN);
  12.136 +    *t = p2mt;
  12.137 +    return mfn;
  12.138  }
  12.139  
  12.140  /* Read another domain's P2M table, mapping pages as we go */
  12.141 -mfn_t gfn_to_mfn_foreign(struct domain *d, unsigned long gpfn);
  12.142 +mfn_t gfn_to_mfn_foreign(struct domain *d, unsigned long gfn, p2m_type_t *t);
  12.143  
  12.144  /* General conversion function from gfn to mfn */
  12.145 -#define gfn_to_mfn(d, g) _gfn_to_mfn((d), (g))
  12.146 -static inline mfn_t _gfn_to_mfn(struct domain *d, unsigned long gfn)
  12.147 +#define gfn_to_mfn(d, g, t) _gfn_to_mfn((d), (g), (t))
  12.148 +static inline mfn_t _gfn_to_mfn(struct domain *d,
  12.149 +                                unsigned long gfn, p2m_type_t *t)
  12.150  {
  12.151      if ( !paging_mode_translate(d) )
  12.152 +    {
  12.153 +        /* Not necessarily true, but for non-translated guests, we claim
  12.154 +         * it's the most generic kind of memory */
  12.155 +        *t = p2m_ram_rw;
  12.156          return _mfn(gfn);
  12.157 +    }
  12.158      if ( likely(current->domain == d) )
  12.159 -        return gfn_to_mfn_current(gfn);
  12.160 +        return gfn_to_mfn_current(gfn, t);
  12.161      else 
  12.162 -        return gfn_to_mfn_foreign(d, gfn);
  12.163 +        return gfn_to_mfn_foreign(d, gfn, t);
  12.164 +}
  12.165 +
  12.166 +/* Compatibility function exporting the old untyped interface */
  12.167 +static inline unsigned long gmfn_to_mfn(struct domain *d, unsigned long gpfn)
  12.168 +{
  12.169 +    mfn_t mfn;
  12.170 +    p2m_type_t t;
  12.171 +    mfn = gfn_to_mfn(d, gpfn, &t);
  12.172 +    if ( p2m_is_valid(t) )
  12.173 +        return mfn_x(mfn);
  12.174 +    return INVALID_MFN;
  12.175  }
  12.176  
  12.177  /* General conversion function from mfn to gfn */
  12.178 @@ -81,19 +173,6 @@ static inline unsigned long mfn_to_gfn(s
  12.179          return mfn_x(mfn);
  12.180  }
  12.181  
  12.182 -/* Compatibility function for HVM code */
  12.183 -static inline unsigned long get_mfn_from_gpfn(unsigned long pfn)
  12.184 -{
  12.185 -    return mfn_x(gfn_to_mfn_current(pfn));
  12.186 -}
  12.187 -
  12.188 -/* Is this guest address an mmio one? (i.e. not defined in p2m map) */
  12.189 -static inline int mmio_space(paddr_t gpa)
  12.190 -{
  12.191 -    unsigned long gfn = gpa >> PAGE_SHIFT;
  12.192 -    return !mfn_valid(mfn_x(gfn_to_mfn_current(gfn)));
  12.193 -}
  12.194 -
  12.195  /* Translate the frame number held in an l1e from guest to machine */
  12.196  static inline l1_pgentry_t
  12.197  gl1e_to_ml1e(struct domain *d, l1_pgentry_t l1e)
  12.198 @@ -105,7 +184,6 @@ gl1e_to_ml1e(struct domain *d, l1_pgentr
  12.199  }
  12.200  
  12.201  
  12.202 -
  12.203  /* Init the datastructures for later use by the p2m code */
  12.204  void p2m_init(struct domain *d);
  12.205  
  12.206 @@ -130,11 +208,12 @@ void guest_physmap_add_page(struct domai
  12.207  void guest_physmap_remove_page(struct domain *d, unsigned long gfn,
  12.208                                 unsigned long mfn);
  12.209  
  12.210 -/* set P2M table l1e flags */
  12.211 -void p2m_set_flags_global(struct domain *d, u32 l1e_flags);
  12.212 +/* Change types across all p2m entries in a domain */
  12.213 +void p2m_change_type_global(struct domain *d, p2m_type_t ot, p2m_type_t nt);
  12.214  
  12.215 -/* set P2M table l1e flags for a gpa */
  12.216 -int p2m_set_flags(struct domain *d, paddr_t gpa, u32 l1e_flags);
  12.217 +/* Compare-exchange the type of a single p2m entry */
  12.218 +p2m_type_t p2m_change_type(struct domain *d, unsigned long gfn,
  12.219 +                           p2m_type_t ot, p2m_type_t nt);
  12.220  
  12.221  #endif /* _XEN_P2M_H */
  12.222