ia64/xen-unstable

changeset 15869:42b925c00d8a

merge with xen-unstable.hg (staging)
author Alex Williamson <alex.williamson@hp.com>
date Mon Sep 10 13:58:56 2007 -0600 (2007-09-10)
parents 7d9b20d91102 154769114a82
children 2635119a1766
files xen/arch/ia64/xen/domain.c
line diff
     1.1 --- a/tools/libxen/src/xen_xspolicy.c	Mon Sep 10 13:56:34 2007 -0600
     1.2 +++ b/tools/libxen/src/xen_xspolicy.c	Mon Sep 10 13:58:56 2007 -0600
     1.3 @@ -21,8 +21,8 @@
     1.4  #include <stddef.h>
     1.5  #include <stdlib.h>
     1.6  
     1.7 +#include "xen_internal.h"
     1.8  #include "xen/api/xen_common.h"
     1.9 -#include "xen/api/xen_internal.h"
    1.10  #include "xen/api/xen_xspolicy.h"
    1.11  
    1.12  
     2.1 --- a/xen/Makefile	Mon Sep 10 13:56:34 2007 -0600
     2.2 +++ b/xen/Makefile	Mon Sep 10 13:58:56 2007 -0600
     2.3 @@ -35,12 +35,15 @@ build install debug clean distclean csco
     2.4  		$(INSTALL_DIR) $(DESTDIR)/usr/include/xen/hvm
     2.5  	[ -d $(DESTDIR)/usr/include/xen/io ] || \
     2.6  		$(INSTALL_DIR) $(DESTDIR)/usr/include/xen/io
     2.7 +	[ -d $(DESTDIR)/usr/include/xen/xsm ] || \
     2.8 +		$(INSTALL_DIR) $(DESTDIR)/usr/include/xen/xsm
     2.9  	[ -d $(DESTDIR)/usr/include/xen/foreign ] || \
    2.10  		$(INSTALL_DIR) $(DESTDIR)/usr/include/xen/foreign
    2.11  	$(INSTALL_DATA) include/public/*.h $(DESTDIR)/usr/include/xen
    2.12  	$(INSTALL_DATA) include/public/arch-x86/*.h $(DESTDIR)/usr/include/xen/arch-x86
    2.13  	$(INSTALL_DATA) include/public/hvm/*.h $(DESTDIR)/usr/include/xen/hvm
    2.14  	$(INSTALL_DATA) include/public/io/*.h $(DESTDIR)/usr/include/xen/io
    2.15 +	$(INSTALL_DATA) include/public/xsm/*.h $(DESTDIR)/usr/include/xen/xsm
    2.16  	$(INSTALL_DATA) include/public/foreign/*.h $(DESTDIR)/usr/include/xen/foreign
    2.17  	$(INSTALL_DATA) include/public/COPYING $(DESTDIR)/usr/include/xen
    2.18  
     3.1 --- a/xen/arch/ia64/xen/domain.c	Mon Sep 10 13:56:34 2007 -0600
     3.2 +++ b/xen/arch/ia64/xen/domain.c	Mon Sep 10 13:58:56 2007 -0600
     3.3 @@ -1487,7 +1487,7 @@ int __init construct_dom0(struct domain 
     3.4  	return 0;
     3.5  }
     3.6  
     3.7 -void machine_restart(char * __unused)
     3.8 +void machine_restart(void)
     3.9  {
    3.10  	console_start_sync();
    3.11  	if (running_on_sim)
     4.1 --- a/xen/arch/powerpc/domain.c	Mon Sep 10 13:56:34 2007 -0600
     4.2 +++ b/xen/arch/powerpc/domain.c	Mon Sep 10 13:58:56 2007 -0600
     4.3 @@ -119,7 +119,7 @@ void machine_halt(void)
     4.4      machine_fail(__func__);
     4.5  }
     4.6  
     4.7 -void machine_restart(char * __unused)
     4.8 +void machine_restart(void)
     4.9  {
    4.10      console_start_sync();
    4.11      printk("%s called\n", __func__);
     5.1 --- a/xen/arch/x86/acpi/power.c	Mon Sep 10 13:56:34 2007 -0600
     5.2 +++ b/xen/arch/x86/acpi/power.c	Mon Sep 10 13:58:56 2007 -0600
     5.3 @@ -181,11 +181,6 @@ static long enter_state_helper(void *dat
     5.4  /*
     5.5   * Dom0 issues this hypercall in place of writing pm1a_cnt. Xen then
     5.6   * takes over the control and put the system into sleep state really.
     5.7 - *
     5.8 - * Guest may issue a two-phases write to PM1x_CNT, to work
     5.9 - * around poorly implemented hardware. It's better to keep
    5.10 - * this logic here. Two writes can be differentiated by 
    5.11 - * enable bit setting.
    5.12   */
    5.13  int acpi_enter_sleep(struct xenpf_enter_acpi_sleep *sleep)
    5.14  {
    5.15 @@ -204,16 +199,6 @@ int acpi_enter_sleep(struct xenpf_enter_
    5.16      if ( sleep->flags )
    5.17          return -EINVAL;
    5.18  
    5.19 -    /* Write #1 */
    5.20 -    if ( !(sleep->pm1a_cnt_val & ACPI_BITMASK_SLEEP_ENABLE) )
    5.21 -    {
    5.22 -        outw((u16)sleep->pm1a_cnt_val, acpi_sinfo.pm1a_cnt);
    5.23 -        if ( acpi_sinfo.pm1b_cnt )
    5.24 -            outw((u16)sleep->pm1b_cnt_val, acpi_sinfo.pm1b_cnt);
    5.25 -        return 0;
    5.26 -    }
    5.27 -
    5.28 -    /* Write #2 */
    5.29      acpi_sinfo.pm1a_cnt_val = sleep->pm1a_cnt_val;
    5.30      acpi_sinfo.pm1b_cnt_val = sleep->pm1b_cnt_val;
    5.31      acpi_sinfo.sleep_state = sleep->sleep_state;
     6.1 --- a/xen/arch/x86/hvm/hvm.c	Mon Sep 10 13:56:34 2007 -0600
     6.2 +++ b/xen/arch/x86/hvm/hvm.c	Mon Sep 10 13:58:56 2007 -0600
     6.3 @@ -161,12 +161,14 @@ static int hvm_set_ioreq_page(
     6.4      struct domain *d, struct hvm_ioreq_page *iorp, unsigned long gmfn)
     6.5  {
     6.6      struct page_info *page;
     6.7 +    p2m_type_t p2mt;
     6.8      unsigned long mfn;
     6.9      void *va;
    6.10  
    6.11 -    mfn = gmfn_to_mfn(d, gmfn);
    6.12 -    if ( !mfn_valid(mfn) )
    6.13 +    mfn = mfn_x(gfn_to_mfn(d, gmfn, &p2mt));
    6.14 +    if ( !p2m_is_ram(p2mt) )
    6.15          return -EINVAL;
    6.16 +    ASSERT(mfn_valid(mfn));
    6.17  
    6.18      page = mfn_to_page(mfn);
    6.19      if ( !get_page_and_type(page, d, PGT_writable_page) )
    6.20 @@ -517,7 +519,8 @@ void hvm_triple_fault(void)
    6.21  int hvm_set_cr0(unsigned long value)
    6.22  {
    6.23      struct vcpu *v = current;
    6.24 -    unsigned long mfn, old_value = v->arch.hvm_vcpu.guest_cr[0];
    6.25 +    p2m_type_t p2mt;
    6.26 +    unsigned long gfn, mfn, old_value = v->arch.hvm_vcpu.guest_cr[0];
    6.27    
    6.28      HVM_DBG_LOG(DBG_LEVEL_VMMU, "Update CR0 value = %lx", value);
    6.29  
    6.30 @@ -559,8 +562,10 @@ int hvm_set_cr0(unsigned long value)
    6.31          if ( !paging_mode_hap(v->domain) )
    6.32          {
    6.33              /* The guest CR3 must be pointing to the guest physical. */
    6.34 -            mfn = get_mfn_from_gpfn(v->arch.hvm_vcpu.guest_cr[3]>>PAGE_SHIFT);
    6.35 -            if ( !mfn_valid(mfn) || !get_page(mfn_to_page(mfn), v->domain))
    6.36 +            gfn = v->arch.hvm_vcpu.guest_cr[3]>>PAGE_SHIFT;
    6.37 +            mfn = mfn_x(gfn_to_mfn_current(gfn, &p2mt));
    6.38 +            if ( !p2m_is_ram(p2mt) || !mfn_valid(mfn) || 
    6.39 +                 !get_page(mfn_to_page(mfn), v->domain))
    6.40              {
    6.41                  gdprintk(XENLOG_ERR, "Invalid CR3 value = %lx (mfn=%lx)\n", 
    6.42                           v->arch.hvm_vcpu.guest_cr[3], mfn);
    6.43 @@ -603,16 +608,18 @@ int hvm_set_cr0(unsigned long value)
    6.44  int hvm_set_cr3(unsigned long value)
    6.45  {
    6.46      unsigned long mfn;
    6.47 +    p2m_type_t p2mt;
    6.48      struct vcpu *v = current;
    6.49  
    6.50      if ( hvm_paging_enabled(v) && !paging_mode_hap(v->domain) &&
    6.51           (value != v->arch.hvm_vcpu.guest_cr[3]) )
    6.52      {
    6.53 -        /* Shadow-mode CR3 change. Check PDBR and then make a new shadow. */
    6.54 +        /* Shadow-mode CR3 change. Check PDBR and update refcounts. */
    6.55          HVM_DBG_LOG(DBG_LEVEL_VMMU, "CR3 value = %lx", value);
    6.56 -        mfn = get_mfn_from_gpfn(value >> PAGE_SHIFT);
    6.57 -        if ( !mfn_valid(mfn) || !get_page(mfn_to_page(mfn), v->domain) )
    6.58 -            goto bad_cr3;
    6.59 +        mfn = mfn_x(gfn_to_mfn_current(value >> PAGE_SHIFT, &p2mt));
    6.60 +        if ( !p2m_is_ram(p2mt) || !mfn_valid(mfn) ||
    6.61 +             !get_page(mfn_to_page(mfn), v->domain) )
    6.62 +              goto bad_cr3;
    6.63  
    6.64          put_page(pagetable_get_page(v->arch.guest_table));
    6.65          v->arch.guest_table = pagetable_from_pfn(mfn);
    6.66 @@ -677,6 +684,7 @@ int hvm_set_cr4(unsigned long value)
    6.67  static int __hvm_copy(void *buf, paddr_t addr, int size, int dir, int virt)
    6.68  {
    6.69      unsigned long gfn, mfn;
    6.70 +    p2m_type_t p2mt;
    6.71      char *p;
    6.72      int count, todo;
    6.73  
    6.74 @@ -690,10 +698,11 @@ static int __hvm_copy(void *buf, paddr_t
    6.75          else
    6.76              gfn = addr >> PAGE_SHIFT;
    6.77          
    6.78 -        mfn = get_mfn_from_gpfn(gfn);
    6.79 +        mfn = mfn_x(gfn_to_mfn_current(gfn, &p2mt));
    6.80  
    6.81 -        if ( mfn == INVALID_MFN )
    6.82 +        if ( !p2m_is_ram(p2mt) )
    6.83              return todo;
    6.84 +        ASSERT(mfn_valid(mfn));
    6.85  
    6.86          p = (char *)map_domain_page(mfn) + (addr & ~PAGE_MASK);
    6.87  
     7.1 --- a/xen/arch/x86/hvm/io.c	Mon Sep 10 13:56:34 2007 -0600
     7.2 +++ b/xen/arch/x86/hvm/io.c	Mon Sep 10 13:58:56 2007 -0600
     7.3 @@ -826,9 +826,7 @@ void hvm_io_assist(void)
     7.4      ioreq_t *p;
     7.5      struct cpu_user_regs *regs;
     7.6      struct hvm_io_op *io_opp;
     7.7 -    unsigned long gmfn;
     7.8      struct vcpu *v = current;
     7.9 -    struct domain *d = v->domain;
    7.10  
    7.11      io_opp = &v->arch.hvm_vcpu.io_op;
    7.12      regs   = &io_opp->io_context;
    7.13 @@ -862,13 +860,6 @@ void hvm_io_assist(void)
    7.14      hvm_load_cpu_guest_regs(v, regs);
    7.15      memcpy(guest_cpu_user_regs(), regs, HVM_CONTEXT_STACK_BYTES);
    7.16  
    7.17 -    /* Has memory been dirtied? */
    7.18 -    if ( (p->dir == IOREQ_READ) && p->data_is_ptr )
    7.19 -    {
    7.20 -        gmfn = get_mfn_from_gpfn(paging_gva_to_gfn(v, p->data));
    7.21 -        paging_mark_dirty(d, gmfn);
    7.22 -    }
    7.23 -
    7.24   out:
    7.25      vcpu_end_shutdown_deferral(v);
    7.26  }
     8.1 --- a/xen/arch/x86/hvm/svm/intr.c	Mon Sep 10 13:56:34 2007 -0600
     8.2 +++ b/xen/arch/x86/hvm/svm/intr.c	Mon Sep 10 13:58:56 2007 -0600
     8.3 @@ -30,6 +30,7 @@
     8.4  #include <asm/hvm/hvm.h>
     8.5  #include <asm/hvm/io.h>
     8.6  #include <asm/hvm/support.h>
     8.7 +#include <asm/hvm/vlapic.h>
     8.8  #include <asm/hvm/svm/svm.h>
     8.9  #include <asm/hvm/svm/intr.h>
    8.10  #include <xen/event.h>
    8.11 @@ -99,6 +100,33 @@ static void enable_intr_window(struct vc
    8.12      svm_inject_dummy_vintr(v);
    8.13  }
    8.14  
    8.15 +static void update_cr8_intercept(
    8.16 +    struct vcpu *v, enum hvm_intack masked_intr_source)
    8.17 +{
    8.18 +    struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
    8.19 +    struct vlapic *vlapic = vcpu_vlapic(v);
    8.20 +    int max_irr;
    8.21 +
    8.22 +    vmcb->cr_intercepts &= ~CR_INTERCEPT_CR8_WRITE;
    8.23 +
    8.24 +    /*
    8.25 +     * If ExtInts are masked then that dominates the TPR --- the 'interrupt
    8.26 +     * window' has already been enabled in this case.
    8.27 +     */
    8.28 +    if ( (masked_intr_source == hvm_intack_lapic) ||
    8.29 +         (masked_intr_source == hvm_intack_pic) )
    8.30 +        return;
    8.31 +
    8.32 +    /* Is there an interrupt pending at the LAPIC? Nothing to do if not. */
    8.33 +    if ( !vlapic_enabled(vlapic) || 
    8.34 +         ((max_irr = vlapic_find_highest_irr(vlapic)) == -1) )
    8.35 +        return;
    8.36 +
    8.37 +    /* Highest-priority pending interrupt is masked by the TPR? */
    8.38 +    if ( (vmcb->vintr.fields.tpr & 0xf) >= (max_irr >> 4) )
    8.39 +        vmcb->cr_intercepts |= CR_INTERCEPT_CR8_WRITE;
    8.40 +}
    8.41 +
    8.42  asmlinkage void svm_intr_assist(void) 
    8.43  {
    8.44      struct vcpu *v = current;
    8.45 @@ -113,7 +141,7 @@ asmlinkage void svm_intr_assist(void)
    8.46      do {
    8.47          intr_source = hvm_vcpu_has_pending_irq(v);
    8.48          if ( likely(intr_source == hvm_intack_none) )
    8.49 -            return;
    8.50 +            goto out;
    8.51  
    8.52          /*
    8.53           * Pending IRQs must be delayed if:
    8.54 @@ -133,7 +161,7 @@ asmlinkage void svm_intr_assist(void)
    8.55               !hvm_interrupts_enabled(v, intr_source) )
    8.56          {
    8.57              enable_intr_window(v, intr_source);
    8.58 -            return;
    8.59 +            goto out;
    8.60          }
    8.61      } while ( !hvm_vcpu_ack_pending_irq(v, intr_source, &intr_vector) );
    8.62  
    8.63 @@ -152,6 +180,9 @@ asmlinkage void svm_intr_assist(void)
    8.64      intr_source = hvm_vcpu_has_pending_irq(v);
    8.65      if ( unlikely(intr_source != hvm_intack_none) )
    8.66          enable_intr_window(v, intr_source);
    8.67 +
    8.68 + out:
    8.69 +    update_cr8_intercept(v, intr_source);
    8.70  }
    8.71  
    8.72  /*
     9.1 --- a/xen/arch/x86/hvm/svm/svm.c	Mon Sep 10 13:56:34 2007 -0600
     9.2 +++ b/xen/arch/x86/hvm/svm/svm.c	Mon Sep 10 13:58:56 2007 -0600
     9.3 @@ -338,6 +338,7 @@ int svm_vmcb_save(struct vcpu *v, struct
     9.4  int svm_vmcb_restore(struct vcpu *v, struct hvm_hw_cpu *c)
     9.5  {
     9.6      unsigned long mfn = 0;
     9.7 +    p2m_type_t p2mt;
     9.8      struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
     9.9  
    9.10      if ( c->pending_valid &&
    9.11 @@ -353,8 +354,8 @@ int svm_vmcb_restore(struct vcpu *v, str
    9.12      {
    9.13          if ( c->cr0 & X86_CR0_PG )
    9.14          {
    9.15 -            mfn = gmfn_to_mfn(v->domain, c->cr3 >> PAGE_SHIFT);
    9.16 -            if ( !mfn_valid(mfn) || !get_page(mfn_to_page(mfn), v->domain) )
    9.17 +            mfn = mfn_x(gfn_to_mfn(v->domain, c->cr3 >> PAGE_SHIFT, &p2mt));
    9.18 +            if ( !p2m_is_ram(p2mt) || !get_page(mfn_to_page(mfn), v->domain) )
    9.19              {
    9.20                  gdprintk(XENLOG_ERR, "Invalid CR3 value=0x%"PRIx64"\n",
    9.21                           c->cr3);
    9.22 @@ -1004,15 +1005,23 @@ int start_svm(struct cpuinfo_x86 *c)
    9.23      return 1;
    9.24  }
    9.25  
    9.26 -static int svm_do_nested_pgfault(paddr_t gpa, struct cpu_user_regs *regs)
    9.27 +static void svm_do_nested_pgfault(paddr_t gpa, struct cpu_user_regs *regs)
    9.28  {
    9.29 -    if (mmio_space(gpa)) {
    9.30 +    p2m_type_t p2mt;
    9.31 +    mfn_t mfn;
    9.32 +    unsigned long gfn = gpa >> PAGE_SHIFT;
    9.33 +
    9.34 +    /* If this GFN is emulated MMIO, pass the fault to the mmio handler */
    9.35 +    mfn = gfn_to_mfn_current(gfn, &p2mt);
    9.36 +    if ( p2mt == p2m_mmio_dm )
    9.37 +    {
    9.38          handle_mmio(gpa);
    9.39 -        return 1;
    9.40 +        return;
    9.41      }
    9.42  
    9.43 -    paging_mark_dirty(current->domain, get_mfn_from_gpfn(gpa >> PAGE_SHIFT));
    9.44 -    return p2m_set_flags(current->domain, gpa, __PAGE_HYPERVISOR|_PAGE_USER);
    9.45 +    /* Log-dirty: mark the page dirty and let the guest write it again */
    9.46 +    paging_mark_dirty(current->domain, mfn_x(mfn));
    9.47 +    p2m_change_type(current->domain, gfn, p2m_ram_logdirty, p2m_ram_rw);
    9.48  }
    9.49  
    9.50  static void svm_do_no_device_fault(struct vmcb_struct *vmcb)
    9.51 @@ -2144,6 +2153,16 @@ asmlinkage void svm_vmexit_handler(struc
    9.52      eventinj_t eventinj;
    9.53      int inst_len, rc;
    9.54  
    9.55 +    /*
    9.56 +     * Before doing anything else, we need to sync up the VLAPIC's TPR with
    9.57 +     * SVM's vTPR if CR8 writes are currently disabled.  It's OK if the 
    9.58 +     * guest doesn't touch the CR8 (e.g. 32-bit Windows) because we update
    9.59 +     * the vTPR on MMIO writes to the TPR
    9.60 +     */
    9.61 +    if ( !(vmcb->cr_intercepts & CR_INTERCEPT_CR8_WRITE) )
    9.62 +        vlapic_set_reg(vcpu_vlapic(v), APIC_TASKPRI,
    9.63 +                       (vmcb->vintr.fields.tpr & 0x0F) << 4);
    9.64 +
    9.65      exit_reason = vmcb->exitcode;
    9.66  
    9.67      HVMTRACE_2D(VMEXIT, v, vmcb->rip, exit_reason);
    9.68 @@ -2341,8 +2360,7 @@ asmlinkage void svm_vmexit_handler(struc
    9.69  
    9.70      case VMEXIT_NPF:
    9.71          regs->error_code = vmcb->exitinfo1;
    9.72 -        if ( !svm_do_nested_pgfault(vmcb->exitinfo2, regs) )
    9.73 -            domain_crash(v->domain);
    9.74 +        svm_do_nested_pgfault(vmcb->exitinfo2, regs);
    9.75          break;
    9.76  
    9.77      default:
    10.1 --- a/xen/arch/x86/hvm/svm/vmcb.c	Mon Sep 10 13:56:34 2007 -0600
    10.2 +++ b/xen/arch/x86/hvm/svm/vmcb.c	Mon Sep 10 13:58:56 2007 -0600
    10.3 @@ -114,23 +114,29 @@ static int construct_vmcb(struct vcpu *v
    10.4      svm_asid_init_vcpu(v);
    10.5  
    10.6      vmcb->general1_intercepts = 
    10.7 -        GENERAL1_INTERCEPT_INTR         | GENERAL1_INTERCEPT_NMI         |
    10.8 -        GENERAL1_INTERCEPT_SMI          | GENERAL1_INTERCEPT_INIT        |
    10.9 -        GENERAL1_INTERCEPT_CPUID        | GENERAL1_INTERCEPT_INVD        |
   10.10 -        GENERAL1_INTERCEPT_HLT          | GENERAL1_INTERCEPT_INVLPG      | 
   10.11 -        GENERAL1_INTERCEPT_INVLPGA      | GENERAL1_INTERCEPT_IOIO_PROT   |
   10.12 -        GENERAL1_INTERCEPT_MSR_PROT     | GENERAL1_INTERCEPT_SHUTDOWN_EVT;
   10.13 +        GENERAL1_INTERCEPT_INTR        | GENERAL1_INTERCEPT_NMI         |
   10.14 +        GENERAL1_INTERCEPT_SMI         | GENERAL1_INTERCEPT_INIT        |
   10.15 +        GENERAL1_INTERCEPT_CPUID       | GENERAL1_INTERCEPT_INVD        |
   10.16 +        GENERAL1_INTERCEPT_HLT         | GENERAL1_INTERCEPT_INVLPG      | 
   10.17 +        GENERAL1_INTERCEPT_INVLPGA     | GENERAL1_INTERCEPT_IOIO_PROT   |
   10.18 +        GENERAL1_INTERCEPT_MSR_PROT    | GENERAL1_INTERCEPT_SHUTDOWN_EVT;
   10.19      vmcb->general2_intercepts = 
   10.20 -        GENERAL2_INTERCEPT_VMRUN  | GENERAL2_INTERCEPT_VMMCALL | 
   10.21 -        GENERAL2_INTERCEPT_VMLOAD | GENERAL2_INTERCEPT_VMSAVE  |
   10.22 -        GENERAL2_INTERCEPT_STGI   | GENERAL2_INTERCEPT_CLGI    |
   10.23 -        GENERAL2_INTERCEPT_SKINIT | GENERAL2_INTERCEPT_RDTSCP;
   10.24 +        GENERAL2_INTERCEPT_VMRUN       | GENERAL2_INTERCEPT_VMMCALL     |
   10.25 +        GENERAL2_INTERCEPT_VMLOAD      | GENERAL2_INTERCEPT_VMSAVE      |
   10.26 +        GENERAL2_INTERCEPT_STGI        | GENERAL2_INTERCEPT_CLGI        |
   10.27 +        GENERAL2_INTERCEPT_SKINIT      | GENERAL2_INTERCEPT_RDTSCP;
   10.28  
   10.29      /* Intercept all debug-register writes. */
   10.30      vmcb->dr_intercepts = DR_INTERCEPT_ALL_WRITES;
   10.31  
   10.32 -    /* Intercept all control-register accesses, except to CR2. */
   10.33 -    vmcb->cr_intercepts = ~(CR_INTERCEPT_CR2_READ | CR_INTERCEPT_CR2_WRITE);
   10.34 +    /*
   10.35 +     * Intercept all control-register accesses except for CR2 reads/writes
   10.36 +     * and CR8 reads (and actually CR8 writes, but that's a special case
   10.37 +     * that's handled in svm/intr.c). 
   10.38 +     */
   10.39 +    vmcb->cr_intercepts = ~(CR_INTERCEPT_CR2_READ |
   10.40 +                            CR_INTERCEPT_CR2_WRITE |
   10.41 +                            CR_INTERCEPT_CR8_READ);
   10.42  
   10.43      /* I/O and MSR permission bitmaps. */
   10.44      arch_svm->msrpm = alloc_xenheap_pages(get_order_from_bytes(MSRPM_SIZE));
    11.1 --- a/xen/arch/x86/hvm/vmx/intr.c	Mon Sep 10 13:56:34 2007 -0600
    11.2 +++ b/xen/arch/x86/hvm/vmx/intr.c	Mon Sep 10 13:58:56 2007 -0600
    11.3 @@ -107,22 +107,35 @@ static void enable_intr_window(struct vc
    11.4      }
    11.5  }
    11.6  
    11.7 -static void update_tpr_threshold(struct vlapic *vlapic)
    11.8 +static void update_tpr_threshold(
    11.9 +    struct vcpu *v, enum hvm_intack masked_intr_source)
   11.10  {
   11.11 -    int max_irr, tpr;
   11.12 +    struct vlapic *vlapic = vcpu_vlapic(v);
   11.13 +    int max_irr, tpr, threshold = 0;
   11.14  
   11.15      if ( !cpu_has_vmx_tpr_shadow )
   11.16          return;
   11.17  
   11.18 +    /*
   11.19 +     * If ExtInts are masked then that dominates the TPR --- the 'interrupt
   11.20 +     * window' has already been enabled in this case.
   11.21 +     */
   11.22 +    if ( (masked_intr_source == hvm_intack_lapic) ||
   11.23 +         (masked_intr_source == hvm_intack_pic) )
   11.24 +        goto out;
   11.25 +
   11.26 +    /* Is there an interrupt pending at the LAPIC? Nothing to do if not. */
   11.27      if ( !vlapic_enabled(vlapic) || 
   11.28           ((max_irr = vlapic_find_highest_irr(vlapic)) == -1) )
   11.29 -    {
   11.30 -        __vmwrite(TPR_THRESHOLD, 0);
   11.31 -        return;
   11.32 -    }
   11.33 +        goto out;
   11.34  
   11.35 +    /* Highest-priority pending interrupt is masked by the TPR? */
   11.36      tpr = vlapic_get_reg(vlapic, APIC_TASKPRI) & 0xF0;
   11.37 -    __vmwrite(TPR_THRESHOLD, (max_irr > tpr) ? (tpr >> 4) : (max_irr >> 4));
   11.38 +    if ( (tpr >> 4) >= (max_irr >> 4) )
   11.39 +        threshold = max_irr >> 4;
   11.40 +
   11.41 + out:
   11.42 +    __vmwrite(TPR_THRESHOLD, threshold);
   11.43  }
   11.44  
   11.45  asmlinkage void vmx_intr_assist(void)
   11.46 @@ -171,7 +184,7 @@ asmlinkage void vmx_intr_assist(void)
   11.47          enable_intr_window(v, intr_source);
   11.48  
   11.49   out:
   11.50 -    update_tpr_threshold(vcpu_vlapic(v));
   11.51 +    update_tpr_threshold(v, intr_source);
   11.52  }
   11.53  
   11.54  /*
    12.1 --- a/xen/arch/x86/hvm/vmx/vmx.c	Mon Sep 10 13:56:34 2007 -0600
    12.2 +++ b/xen/arch/x86/hvm/vmx/vmx.c	Mon Sep 10 13:58:56 2007 -0600
    12.3 @@ -566,6 +566,7 @@ void vmx_vmcs_save(struct vcpu *v, struc
    12.4  int vmx_vmcs_restore(struct vcpu *v, struct hvm_hw_cpu *c)
    12.5  {
    12.6      unsigned long mfn = 0;
    12.7 +    p2m_type_t p2mt;
    12.8  
    12.9      if ( c->pending_valid &&
   12.10           ((c->pending_type == 1) || (c->pending_type > 6) ||
   12.11 @@ -578,8 +579,8 @@ int vmx_vmcs_restore(struct vcpu *v, str
   12.12  
   12.13      if ( c->cr0 & X86_CR0_PG )
   12.14      {
   12.15 -        mfn = gmfn_to_mfn(v->domain, c->cr3 >> PAGE_SHIFT);
   12.16 -        if ( !mfn_valid(mfn) || !get_page(mfn_to_page(mfn), v->domain) )
   12.17 +        mfn = mfn_x(gfn_to_mfn(v->domain, c->cr3 >> PAGE_SHIFT, &p2mt));
   12.18 +        if ( !p2m_is_ram(p2mt) || !get_page(mfn_to_page(mfn), v->domain) )
   12.19          {
   12.20              gdprintk(XENLOG_ERR, "Invalid CR3 value=0x%"PRIx64"\n", c->cr3);
   12.21              return -EINVAL;
   12.22 @@ -1292,19 +1293,23 @@ static void vmx_do_cpuid(struct cpu_user
   12.23           * Note that this leaf lives at <max-hypervisor-leaf> + 1.
   12.24           */
   12.25          u64 value = ((u64)regs->edx << 32) | (u32)regs->ecx;
   12.26 -        unsigned long mfn = get_mfn_from_gpfn(value >> PAGE_SHIFT);
   12.27 +        p2m_type_t p2mt;
   12.28 +        unsigned long mfn;
   12.29          struct vcpu *v = current;
   12.30          char *p;
   12.31  
   12.32 +        mfn = mfn_x(gfn_to_mfn_current(value >> PAGE_SHIFT, &p2mt));
   12.33 +
   12.34          gdprintk(XENLOG_INFO, "Input address is 0x%"PRIx64".\n", value);
   12.35  
   12.36          /* 8-byte aligned valid pseudophys address from vmxassist, please. */
   12.37 -        if ( (value & 7) || (mfn == INVALID_MFN) ||
   12.38 +        if ( (value & 7) || !p2m_is_ram(p2mt) ||
   12.39               !v->arch.hvm_vmx.vmxassist_enabled )
   12.40          {
   12.41              domain_crash(v->domain);
   12.42              return;
   12.43          }
   12.44 +        ASSERT(mfn_valid(mfn));
   12.45  
   12.46          p = map_domain_page(mfn);
   12.47          value = *((uint64_t *)(p + (value & (PAGE_SIZE - 1))));
   12.48 @@ -1905,11 +1910,12 @@ static void vmx_world_save(struct vcpu *
   12.49  static int vmx_world_restore(struct vcpu *v, struct vmx_assist_context *c)
   12.50  {
   12.51      unsigned long mfn = 0;
   12.52 +    p2m_type_t p2mt;
   12.53  
   12.54      if ( c->cr0 & X86_CR0_PG )
   12.55      {
   12.56 -        mfn = gmfn_to_mfn(v->domain, c->cr3 >> PAGE_SHIFT);
   12.57 -        if ( !mfn_valid(mfn) || !get_page(mfn_to_page(mfn), v->domain) )
   12.58 +        mfn = mfn_x(gfn_to_mfn(v->domain, c->cr3 >> PAGE_SHIFT, &p2mt));
   12.59 +        if ( !p2m_is_ram(p2mt) || !get_page(mfn_to_page(mfn), v->domain) )
   12.60          {
   12.61              gdprintk(XENLOG_ERR, "Invalid CR3 value=%x", c->cr3);
   12.62              return -EINVAL;
    13.1 --- a/xen/arch/x86/machine_kexec.c	Mon Sep 10 13:56:34 2007 -0600
    13.2 +++ b/xen/arch/x86/machine_kexec.c	Mon Sep 10 13:58:56 2007 -0600
    13.3 @@ -82,9 +82,6 @@ static void __machine_reboot_kexec(void 
    13.4  
    13.5      smp_send_stop();
    13.6  
    13.7 -    disable_IO_APIC();
    13.8 -    hvm_cpu_down();
    13.9 -
   13.10      machine_kexec(image);
   13.11  }
   13.12  
    14.1 --- a/xen/arch/x86/mm/hap/guest_walk.c	Mon Sep 10 13:56:34 2007 -0600
    14.2 +++ b/xen/arch/x86/mm/hap/guest_walk.c	Mon Sep 10 13:58:56 2007 -0600
    14.3 @@ -28,7 +28,8 @@
    14.4  #include <xen/sched.h>
    14.5  #include <asm/hvm/svm/vmcb.h>
    14.6  #include <asm/domain.h>
    14.7 -#include <asm/shadow.h>
    14.8 +#include <asm/paging.h>
    14.9 +#include <asm/p2m.h>
   14.10  #include <asm/hap.h>
   14.11  
   14.12  #include "private.h"
   14.13 @@ -67,6 +68,7 @@ unsigned long hap_gva_to_gfn(GUEST_PAGIN
   14.14      int lev, index;
   14.15      paddr_t gpa = 0;
   14.16      unsigned long gpfn, mfn;
   14.17 +    p2m_type_t p2mt;
   14.18      int success = 1;
   14.19  
   14.20      l1_pgentry_t *l1e;
   14.21 @@ -81,14 +83,16 @@ unsigned long hap_gva_to_gfn(GUEST_PAGIN
   14.22      gpfn = (gcr3 >> PAGE_SHIFT);
   14.23      for ( lev = mode; lev >= 1; lev-- )
   14.24      {
   14.25 -        mfn = get_mfn_from_gpfn(gpfn);
   14.26 -        if ( mfn == INVALID_MFN )
   14.27 +        mfn = mfn_x(gfn_to_mfn_current(gpfn, &p2mt));
   14.28 +        if ( !p2m_is_ram(p2mt) )
   14.29          {
   14.30              HAP_PRINTK("bad pfn=0x%lx from gva=0x%lx at lev%d\n", gpfn, gva,
   14.31                         lev);
   14.32              success = 0;
   14.33              break;
   14.34          }
   14.35 +        ASSERT(mfn_valid(mfn));
   14.36 +
   14.37          index = (gva >> PT_SHIFT[mode][lev]) & (PT_ENTRIES[mode][lev]-1);
   14.38  
   14.39  #if GUEST_PAGING_LEVELS >= 4
    15.1 --- a/xen/arch/x86/mm/hap/hap.c	Mon Sep 10 13:56:34 2007 -0600
    15.2 +++ b/xen/arch/x86/mm/hap/hap.c	Mon Sep 10 13:58:56 2007 -0600
    15.3 @@ -60,8 +60,8 @@ int hap_enable_log_dirty(struct domain *
    15.4      d->arch.paging.mode |= PG_log_dirty;
    15.5      hap_unlock(d);
    15.6  
    15.7 -    /* set l1e entries of P2M table to NOT_WRITABLE. */
    15.8 -    p2m_set_flags_global(d, (_PAGE_PRESENT|_PAGE_USER));
    15.9 +    /* set l1e entries of P2M table to be read-only. */
   15.10 +    p2m_change_type_global(d, p2m_ram_rw, p2m_ram_logdirty);
   15.11      flush_tlb_mask(d->domain_dirty_cpumask);
   15.12      return 0;
   15.13  }
   15.14 @@ -73,14 +73,14 @@ int hap_disable_log_dirty(struct domain 
   15.15      hap_unlock(d);
   15.16  
   15.17      /* set l1e entries of P2M table with normal mode */
   15.18 -    p2m_set_flags_global(d, __PAGE_HYPERVISOR|_PAGE_USER);
   15.19 +    p2m_change_type_global(d, p2m_ram_logdirty, p2m_ram_rw);
   15.20      return 0;
   15.21  }
   15.22  
   15.23  void hap_clean_dirty_bitmap(struct domain *d)
   15.24  {
   15.25 -    /* mark physical memory as NOT_WRITEABLE and flush the TLB */
   15.26 -    p2m_set_flags_global(d, (_PAGE_PRESENT|_PAGE_USER));
   15.27 +    /* set l1e entries of P2M table to be read-only. */
   15.28 +    p2m_change_type_global(d, p2m_ram_rw, p2m_ram_logdirty);
   15.29      flush_tlb_mask(d->domain_dirty_cpumask);
   15.30  }
   15.31  
    16.1 --- a/xen/arch/x86/mm/p2m.c	Mon Sep 10 13:56:34 2007 -0600
    16.2 +++ b/xen/arch/x86/mm/p2m.c	Mon Sep 10 13:58:56 2007 -0600
    16.3 @@ -4,7 +4,7 @@
    16.4   * physical-to-machine mappings for automatically-translated domains.
    16.5   *
    16.6   * Parts of this code are Copyright (c) 2007 by Advanced Micro Devices.
    16.7 - * Parts of this code are Copyright (c) 2006 by XenSource Inc.
    16.8 + * Parts of this code are Copyright (c) 2006-2007 by XenSource Inc.
    16.9   * Parts of this code are Copyright (c) 2006 by Michael A Fetterman
   16.10   * Parts based on earlier work by Michael A Fetterman, Ian Pratt et al.
   16.11   *
   16.12 @@ -93,6 +93,31 @@
   16.13  #define page_to_mfn(_pg) (_mfn((_pg) - frame_table))
   16.14  
   16.15  
   16.16 +/* PTE flags for the various types of p2m entry */
   16.17 +#define P2M_BASE_FLAGS \
   16.18 +        (_PAGE_PRESENT | _PAGE_USER | _PAGE_DIRTY | _PAGE_ACCESSED)
   16.19 +
   16.20 +static unsigned long p2m_type_to_flags(p2m_type_t t) 
   16.21 +{
   16.22 +    unsigned long flags = (t & 0x7UL) << 9;
   16.23 +    switch(t)
   16.24 +    {
   16.25 +    case p2m_invalid:
   16.26 +    default:
   16.27 +        return flags;
   16.28 +    case p2m_ram_rw:
   16.29 +        return flags | P2M_BASE_FLAGS | _PAGE_RW;
   16.30 +    case p2m_ram_logdirty:
   16.31 +        return flags | P2M_BASE_FLAGS;
   16.32 +    case p2m_ram_ro:
   16.33 +        return flags | P2M_BASE_FLAGS;
   16.34 +    case p2m_mmio_dm:
   16.35 +        return flags;
   16.36 +    case p2m_mmio_direct:
   16.37 +        return flags | P2M_BASE_FLAGS | _PAGE_RW | _PAGE_PCD;
   16.38 +    }
   16.39 +}
   16.40 +
   16.41  
   16.42  // Find the next level's P2M entry, checking for out-of-range gfn's...
   16.43  // Returns NULL on error.
   16.44 @@ -358,19 +383,25 @@ void p2m_teardown(struct domain *d)
   16.45  }
   16.46  
   16.47  mfn_t
   16.48 -gfn_to_mfn_foreign(struct domain *d, unsigned long gpfn)
   16.49 +gfn_to_mfn_foreign(struct domain *d, unsigned long gfn, p2m_type_t *t)
   16.50  /* Read another domain's p2m entries */
   16.51  {
   16.52      mfn_t mfn;
   16.53 -    paddr_t addr = ((paddr_t)gpfn) << PAGE_SHIFT;
   16.54 +    paddr_t addr = ((paddr_t)gfn) << PAGE_SHIFT;
   16.55      l2_pgentry_t *l2e;
   16.56      l1_pgentry_t *l1e;
   16.57  
   16.58      ASSERT(paging_mode_translate(d));
   16.59 +
   16.60 +    /* XXX This is for compatibility with the old model, where anything not 
   16.61 +     * XXX marked as RAM was considered to be emulated MMIO space.
   16.62 +     * XXX Once we start explicitly registering MMIO regions in the p2m 
   16.63 +     * XXX we will return p2m_invalid for unmapped gfns */
   16.64 +    *t = p2m_mmio_dm;
   16.65 +
   16.66      mfn = pagetable_get_mfn(d->arch.phys_table);
   16.67  
   16.68 -
   16.69 -    if ( gpfn > d->arch.p2m.max_mapped_pfn )
   16.70 +    if ( gfn > d->arch.p2m.max_mapped_pfn )
   16.71          /* This pfn is higher than the highest the p2m map currently holds */
   16.72          return _mfn(INVALID_MFN);
   16.73  
   16.74 @@ -428,9 +459,11 @@ gfn_to_mfn_foreign(struct domain *d, uns
   16.75          return _mfn(INVALID_MFN);
   16.76      }
   16.77      mfn = _mfn(l1e_get_pfn(*l1e));
   16.78 +    *t = p2m_flags_to_type(l1e_get_flags(*l1e));
   16.79      unmap_domain_page(l1e);
   16.80  
   16.81 -    return mfn;
   16.82 +    ASSERT(mfn_valid(mfn) || !p2m_is_ram(*t));
   16.83 +    return (p2m_is_valid(*t)) ? mfn : _mfn(INVALID_MFN);
   16.84  }
   16.85  
   16.86  #if P2M_AUDIT
   16.87 @@ -630,10 +663,7 @@ p2m_remove_page(struct domain *d, unsign
   16.88          return;
   16.89      P2M_DEBUG("removing gfn=%#lx mfn=%#lx\n", gfn, mfn);
   16.90  
   16.91 -    ASSERT(mfn_x(gfn_to_mfn(d, gfn)) == mfn);
   16.92 -    //ASSERT(mfn_to_gfn(d, mfn) == gfn);
   16.93 -
   16.94 -    set_p2m_entry(d, gfn, _mfn(INVALID_MFN), __PAGE_HYPERVISOR|_PAGE_USER);
   16.95 +    set_p2m_entry(d, gfn, _mfn(INVALID_MFN), 0);
   16.96      set_gpfn_from_mfn(mfn, INVALID_M2P_ENTRY);
   16.97  }
   16.98  
   16.99 @@ -653,6 +683,7 @@ guest_physmap_add_page(struct domain *d,
  16.100                         unsigned long mfn)
  16.101  {
  16.102      unsigned long ogfn;
  16.103 +    p2m_type_t ot;
  16.104      mfn_t omfn;
  16.105  
  16.106      if ( !paging_mode_translate(d) )
  16.107 @@ -663,10 +694,10 @@ guest_physmap_add_page(struct domain *d,
  16.108  
  16.109      P2M_DEBUG("adding gfn=%#lx mfn=%#lx\n", gfn, mfn);
  16.110  
  16.111 -    omfn = gfn_to_mfn(d, gfn);
  16.112 -    if ( mfn_valid(omfn) )
  16.113 +    omfn = gfn_to_mfn(d, gfn, &ot);
  16.114 +    if ( p2m_is_ram(ot) )
  16.115      {
  16.116 -        set_p2m_entry(d, gfn, _mfn(INVALID_MFN), __PAGE_HYPERVISOR|_PAGE_USER);
  16.117 +        ASSERT(mfn_valid(omfn));
  16.118          set_gpfn_from_mfn(mfn_x(omfn), INVALID_M2P_ENTRY);
  16.119      }
  16.120  
  16.121 @@ -683,8 +714,10 @@ guest_physmap_add_page(struct domain *d,
  16.122          /* This machine frame is already mapped at another physical address */
  16.123          P2M_DEBUG("aliased! mfn=%#lx, old gfn=%#lx, new gfn=%#lx\n",
  16.124                    mfn, ogfn, gfn);
  16.125 -        if ( mfn_valid(omfn = gfn_to_mfn(d, ogfn)) )
  16.126 +        omfn = gfn_to_mfn(d, ogfn, &ot);
  16.127 +        if ( p2m_is_ram(ot) )
  16.128          {
  16.129 +            ASSERT(mfn_valid(omfn));
  16.130              P2M_DEBUG("old gfn=%#lx -> mfn %#lx\n",
  16.131                        ogfn , mfn_x(omfn));
  16.132              if ( mfn_x(omfn) == mfn )
  16.133 @@ -692,21 +725,29 @@ guest_physmap_add_page(struct domain *d,
  16.134          }
  16.135      }
  16.136  
  16.137 -    set_p2m_entry(d, gfn, _mfn(mfn), __PAGE_HYPERVISOR|_PAGE_USER);
  16.138 -    set_gpfn_from_mfn(mfn, gfn);
  16.139 +    if ( mfn_valid(_mfn(mfn)) ) 
  16.140 +    {
  16.141 +        set_p2m_entry(d, gfn, _mfn(mfn),
  16.142 +                  p2m_type_to_flags(p2m_ram_rw)|__PAGE_HYPERVISOR|_PAGE_USER);
  16.143 +        set_gpfn_from_mfn(mfn, gfn);
  16.144 +    }
  16.145 +    else
  16.146 +    {
  16.147 +        gdprintk(XENLOG_WARNING, "Adding bad mfn to p2m map (%#lx -> %#lx)\n",
  16.148 +                 gfn, mfn);
  16.149 +        set_p2m_entry(d, gfn, _mfn(INVALID_MFN), 0);
  16.150 +    }
  16.151  
  16.152      audit_p2m(d);
  16.153      p2m_unlock(d);
  16.154  }
  16.155  
  16.156 -/* This function goes through P2M table and modify l1e flags of all pages. Note
  16.157 - * that physical base address of l1e is intact. This function can be used for
  16.158 - * special purpose, such as marking physical memory as NOT WRITABLE for
  16.159 - * tracking dirty pages during live migration.
  16.160 - */
  16.161 -void p2m_set_flags_global(struct domain *d, u32 l1e_flags)
  16.162 +/* Walk the whole p2m table, changing any entries of the old type
  16.163 + * to the new type.  This is used in hardware-assisted paging to 
  16.164 + * quickly enable or diable log-dirty tracking */
  16.165 +void p2m_change_type_global(struct domain *d, p2m_type_t ot, p2m_type_t nt)
  16.166  {
  16.167 -    unsigned long mfn, gfn;
  16.168 +    unsigned long mfn, gfn, flags;
  16.169      l1_pgentry_t l1e_content;
  16.170      l1_pgentry_t *l1e;
  16.171      l2_pgentry_t *l2e;
  16.172 @@ -769,12 +810,14 @@ void p2m_set_flags_global(struct domain 
  16.173  
  16.174                  for ( i1 = 0; i1 < L1_PAGETABLE_ENTRIES; i1++, gfn++ )
  16.175                  {
  16.176 -                    if ( !(l1e_get_flags(l1e[i1]) & _PAGE_PRESENT) )
  16.177 +                    flags = l1e_get_flags(l1e[i1]);
  16.178 +                    if ( p2m_flags_to_type(flags) != ot )
  16.179                          continue;
  16.180                      mfn = l1e_get_pfn(l1e[i1]);
  16.181                      gfn = get_gpfn_from_mfn(mfn);
  16.182 -                    /* create a new 1le entry using l1e_flags */
  16.183 -                    l1e_content = l1e_from_pfn(mfn, l1e_flags);
  16.184 +                    /* create a new 1le entry with the new type */
  16.185 +                    flags = p2m_flags_to_type(nt);
  16.186 +                    l1e_content = l1e_from_pfn(mfn, flags);
  16.187                      paging_write_p2m_entry(d, gfn, &l1e[i1],
  16.188                                             l1mfn, l1e_content, 1);
  16.189                  }
  16.190 @@ -800,24 +843,23 @@ void p2m_set_flags_global(struct domain 
  16.191      p2m_unlock(d);
  16.192  }
  16.193  
  16.194 -/* This function traces through P2M table and modifies l1e flags of a specific
  16.195 - * gpa.
  16.196 - */
  16.197 -int p2m_set_flags(struct domain *d, paddr_t gpa, u32 l1e_flags)
  16.198 +/* Modify the p2m type of a single gfn from ot to nt, returning the 
  16.199 + * entry's previous type */
  16.200 +p2m_type_t p2m_change_type(struct domain *d, unsigned long gfn, 
  16.201 +                           p2m_type_t ot, p2m_type_t nt)
  16.202  {
  16.203 -    unsigned long gfn;
  16.204 +    p2m_type_t pt;
  16.205      mfn_t mfn;
  16.206  
  16.207      p2m_lock(d);
  16.208  
  16.209 -    gfn = gpa >> PAGE_SHIFT;
  16.210 -    mfn = gfn_to_mfn(d, gfn);
  16.211 -    if ( mfn_valid(mfn) )
  16.212 -        set_p2m_entry(d, gfn, mfn, l1e_flags);
  16.213 +    mfn = gfn_to_mfn(d, gfn, &pt);
  16.214 +    if ( pt == ot )
  16.215 +        set_p2m_entry(d, gfn, mfn, p2m_type_to_flags(nt));
  16.216  
  16.217      p2m_unlock(d);
  16.218  
  16.219 -    return 1;
  16.220 +    return pt;
  16.221  }
  16.222  
  16.223  /*
    17.1 --- a/xen/arch/x86/mm/shadow/common.c	Mon Sep 10 13:56:34 2007 -0600
    17.2 +++ b/xen/arch/x86/mm/shadow/common.c	Mon Sep 10 13:58:56 2007 -0600
    17.3 @@ -2764,19 +2764,23 @@ shadow_write_p2m_entry(struct vcpu *v, u
    17.4                         l1_pgentry_t new, unsigned int level)
    17.5  {
    17.6      struct domain *d = v->domain;
    17.7 -    mfn_t mfn;
    17.8      
    17.9      shadow_lock(d);
   17.10  
   17.11 -    /* handle physmap_add and physmap_remove */
   17.12 -    mfn = gfn_to_mfn(d, gfn);
   17.13 -    if ( v != NULL && level == 1 && mfn_valid(mfn) ) {
   17.14 -        sh_remove_all_shadows_and_parents(v, mfn);
   17.15 -        if ( sh_remove_all_mappings(v, mfn) )
   17.16 -            flush_tlb_mask(d->domain_dirty_cpumask);    
   17.17 +    /* If we're removing an MFN from the p2m, remove it from the shadows too */
   17.18 +    if ( level == 1 )
   17.19 +    {
   17.20 +        mfn_t mfn = _mfn(l1e_get_pfn(*p));
   17.21 +        p2m_type_t p2mt = p2m_flags_to_type(l1e_get_flags(*p));
   17.22 +        if ( p2m_is_valid(p2mt) && mfn_valid(mfn) ) 
   17.23 +        {
   17.24 +            sh_remove_all_shadows_and_parents(v, mfn);
   17.25 +            if ( sh_remove_all_mappings(v, mfn) )
   17.26 +                flush_tlb_mask(d->domain_dirty_cpumask);    
   17.27 +        }
   17.28      }
   17.29 -    
   17.30 -    /* update the entry with new content */
   17.31 +
   17.32 +    /* Update the entry with new content */
   17.33      safe_write_pte(p, new);
   17.34  
   17.35      /* install P2M in monitors for PAE Xen */
    18.1 --- a/xen/arch/x86/mm/shadow/multi.c	Mon Sep 10 13:56:34 2007 -0600
    18.2 +++ b/xen/arch/x86/mm/shadow/multi.c	Mon Sep 10 13:58:56 2007 -0600
    18.3 @@ -209,6 +209,7 @@ static inline int
    18.4  guest_walk_tables(struct vcpu *v, unsigned long va, walk_t *gw, int guest_op)
    18.5  {
    18.6      struct domain *d = v->domain;
    18.7 +    p2m_type_t p2mt;
    18.8      ASSERT(!guest_op || shadow_locked_by_me(d));
    18.9      
   18.10      perfc_incr(shadow_guest_walk);
   18.11 @@ -223,8 +224,9 @@ guest_walk_tables(struct vcpu *v, unsign
   18.12          + guest_l4_table_offset(va);
   18.13      /* Walk down to the l3e */
   18.14      if ( !(guest_l4e_get_flags(*gw->l4e) & _PAGE_PRESENT) ) return 0;
   18.15 -    gw->l3mfn = gfn_to_mfn(d, guest_l4e_get_gfn(*gw->l4e));
   18.16 -    if ( !mfn_valid(gw->l3mfn) ) return 1;
   18.17 +    gw->l3mfn = gfn_to_mfn(d, guest_l4e_get_gfn(*gw->l4e), &p2mt);
   18.18 +    if ( !p2m_is_ram(p2mt) ) return 1;
   18.19 +    ASSERT(mfn_valid(gw->l3mfn));
   18.20      /* This mfn is a pagetable: make sure the guest can't write to it. */
   18.21      if ( guest_op && sh_remove_write_access(v, gw->l3mfn, 3, va) != 0 )
   18.22          flush_tlb_mask(d->domain_dirty_cpumask); 
   18.23 @@ -236,8 +238,9 @@ guest_walk_tables(struct vcpu *v, unsign
   18.24  #endif /* PAE or 64... */
   18.25      /* Walk down to the l2e */
   18.26      if ( !(guest_l3e_get_flags(*gw->l3e) & _PAGE_PRESENT) ) return 0;
   18.27 -    gw->l2mfn = gfn_to_mfn(d, guest_l3e_get_gfn(*gw->l3e));
   18.28 -    if ( !mfn_valid(gw->l2mfn) ) return 1;
   18.29 +    gw->l2mfn = gfn_to_mfn(d, guest_l3e_get_gfn(*gw->l3e), &p2mt);
   18.30 +    if ( !p2m_is_ram(p2mt) ) return 1;
   18.31 +    ASSERT(mfn_valid(gw->l2mfn));
   18.32      /* This mfn is a pagetable: make sure the guest can't write to it. */
   18.33      if ( guest_op && sh_remove_write_access(v, gw->l2mfn, 2, va) != 0 )
   18.34          flush_tlb_mask(d->domain_dirty_cpumask); 
   18.35 @@ -278,8 +281,9 @@ guest_walk_tables(struct vcpu *v, unsign
   18.36      else 
   18.37      {
   18.38          /* Not a superpage: carry on and find the l1e. */
   18.39 -        gw->l1mfn = gfn_to_mfn(d, guest_l2e_get_gfn(*gw->l2e));
   18.40 -        if ( !mfn_valid(gw->l1mfn) ) return 1;
   18.41 +        gw->l1mfn = gfn_to_mfn(d, guest_l2e_get_gfn(*gw->l2e), &p2mt);
   18.42 +        if ( !p2m_is_ram(p2mt) ) return 1;
   18.43 +        ASSERT(mfn_valid(gw->l1mfn));
   18.44          /* This mfn is a pagetable: make sure the guest can't write to it. */
   18.45          if ( guest_op 
   18.46               && sh_remove_write_access(v, gw->l1mfn, 1, va) != 0 )
   18.47 @@ -626,7 +630,7 @@ static always_inline void
   18.48                void *shadow_entry_ptr,
   18.49                int level,
   18.50                fetch_type_t ft, 
   18.51 -              int mmio)
   18.52 +              p2m_type_t p2mt)
   18.53  {
   18.54      guest_l1e_t *gp = guest_entry_ptr;
   18.55      shadow_l1e_t *sp = shadow_entry_ptr;
   18.56 @@ -637,6 +641,13 @@ static always_inline void
   18.57      /* We don't shadow PAE l3s */
   18.58      ASSERT(GUEST_PAGING_LEVELS > 3 || level != 3);
   18.59  
   18.60 +    /* Check there's something for the shadows to map to */
   18.61 +    if ( !p2m_is_valid(p2mt) )
   18.62 +    {
   18.63 +        *sp = shadow_l1e_empty();
   18.64 +        goto done;
   18.65 +    }
   18.66 +
   18.67      if ( mfn_valid(guest_table_mfn) )
   18.68          /* Handle A and D bit propagation into the guest */
   18.69          gflags = guest_set_ad_bits(v, guest_table_mfn, gp, level, ft);
   18.70 @@ -658,19 +669,22 @@ static always_inline void
   18.71          goto done;
   18.72      }
   18.73  
   18.74 -    if ( level == 1 && mmio )
   18.75 +    if ( level == 1 && p2mt == p2m_mmio_dm )
   18.76      {
   18.77 -        /* Guest l1e maps MMIO space */
   18.78 +        /* Guest l1e maps emulated MMIO space */
   18.79          *sp = sh_l1e_mmio(guest_l1e_get_gfn(*gp), gflags);
   18.80          if ( !d->arch.paging.shadow.has_fast_mmio_entries )
   18.81              d->arch.paging.shadow.has_fast_mmio_entries = 1;
   18.82          goto done;
   18.83      }
   18.84  
   18.85 -    // Must have a valid target_mfn, unless this is a prefetch.  In the
   18.86 +    // Must have a valid target_mfn unless this is a prefetch.  In the
   18.87      // case of a prefetch, an invalid mfn means that we can not usefully
   18.88      // shadow anything, and so we return early.
   18.89      //
   18.90 +    /* N.B. For pass-through MMIO, either this test needs to be relaxed,
   18.91 +     * and shadow_set_l1e() trained to handle non-valid MFNs (ugh), or the
   18.92 +     * MMIO areas need to be added to the frame-table to make them "valid". */
   18.93      if ( !mfn_valid(target_mfn) )
   18.94      {
   18.95          ASSERT((ft == ft_prefetch));
   18.96 @@ -718,6 +732,8 @@ static always_inline void
   18.97      // Only allow the guest write access to a page a) on a demand fault,
   18.98      // or b) if the page is already marked as dirty.
   18.99      //
  18.100 +    // (We handle log-dirty entirely inside the shadow code, without using the 
  18.101 +    // p2m_ram_logdirty p2m type: only HAP uses that.)
  18.102      if ( unlikely((level == 1) && shadow_mode_log_dirty(d)) )
  18.103      {
  18.104          if ( ft & FETCH_TYPE_WRITE ) 
  18.105 @@ -725,6 +741,10 @@ static always_inline void
  18.106          else if ( !sh_mfn_is_dirty(d, target_mfn) )
  18.107              sflags &= ~_PAGE_RW;
  18.108      }
  18.109 +
  18.110 +    /* Read-only memory */
  18.111 +    if ( p2mt == p2m_ram_ro ) 
  18.112 +        sflags &= ~_PAGE_RW;
  18.113      
  18.114      // protect guest page tables
  18.115      //
  18.116 @@ -754,7 +774,12 @@ static always_inline void
  18.117          sflags |= _PAGE_USER;
  18.118      }
  18.119  
  18.120 +    /* MMIO addresses should never be cached */
  18.121 +    if ( p2m_is_mmio(p2mt) )
  18.122 +        sflags |= _PAGE_PCD;
  18.123 +
  18.124      *sp = shadow_l1e_from_mfn(target_mfn, sflags);
  18.125 +
  18.126   done:
  18.127      SHADOW_DEBUG(PROPAGATE,
  18.128                   "%s level %u guest %" SH_PRI_gpte " shadow %" SH_PRI_pte "\n",
  18.129 @@ -775,7 +800,7 @@ l4e_propagate_from_guest(struct vcpu *v,
  18.130                           shadow_l4e_t *sl4e,
  18.131                           fetch_type_t ft)
  18.132  {
  18.133 -    _sh_propagate(v, gl4e, gl4mfn, sl3mfn, sl4e, 4, ft, 0);
  18.134 +    _sh_propagate(v, gl4e, gl4mfn, sl3mfn, sl4e, 4, ft, p2m_ram_rw);
  18.135  }
  18.136  
  18.137  static void
  18.138 @@ -786,7 +811,7 @@ l3e_propagate_from_guest(struct vcpu *v,
  18.139                           shadow_l3e_t *sl3e,
  18.140                           fetch_type_t ft)
  18.141  {
  18.142 -    _sh_propagate(v, gl3e, gl3mfn, sl2mfn, sl3e, 3, ft, 0);
  18.143 +    _sh_propagate(v, gl3e, gl3mfn, sl2mfn, sl3e, 3, ft, p2m_ram_rw);
  18.144  }
  18.145  #endif // GUEST_PAGING_LEVELS >= 4
  18.146  
  18.147 @@ -798,7 +823,7 @@ l2e_propagate_from_guest(struct vcpu *v,
  18.148                           shadow_l2e_t *sl2e,
  18.149                           fetch_type_t ft)
  18.150  {
  18.151 -    _sh_propagate(v, gl2e, gl2mfn, sl1mfn, sl2e, 2, ft, 0);
  18.152 +    _sh_propagate(v, gl2e, gl2mfn, sl1mfn, sl2e, 2, ft, p2m_ram_rw);
  18.153  }
  18.154  
  18.155  static void
  18.156 @@ -808,9 +833,9 @@ l1e_propagate_from_guest(struct vcpu *v,
  18.157                           mfn_t gmfn, 
  18.158                           shadow_l1e_t *sl1e,
  18.159                           fetch_type_t ft, 
  18.160 -                         int mmio)
  18.161 +                         p2m_type_t p2mt)
  18.162  {
  18.163 -    _sh_propagate(v, gl1e, gl1mfn, gmfn, sl1e, 1, ft, mmio);
  18.164 +    _sh_propagate(v, gl1e, gl1mfn, gmfn, sl1e, 1, ft, p2mt);
  18.165  }
  18.166  
  18.167  
  18.168 @@ -2196,6 +2221,7 @@ static int validate_gl4e(struct vcpu *v,
  18.169      shadow_l4e_t *sl4p = se;
  18.170      mfn_t sl3mfn = _mfn(INVALID_MFN);
  18.171      struct domain *d = v->domain;
  18.172 +    p2m_type_t p2mt;
  18.173      int result = 0;
  18.174  
  18.175      perfc_incr(shadow_validate_gl4e_calls);
  18.176 @@ -2203,8 +2229,8 @@ static int validate_gl4e(struct vcpu *v,
  18.177      if ( guest_l4e_get_flags(*new_gl4e) & _PAGE_PRESENT )
  18.178      {
  18.179          gfn_t gl3gfn = guest_l4e_get_gfn(*new_gl4e);
  18.180 -        mfn_t gl3mfn = gfn_to_mfn(d, gl3gfn);
  18.181 -        if ( mfn_valid(gl3mfn) )
  18.182 +        mfn_t gl3mfn = gfn_to_mfn(d, gl3gfn, &p2mt);
  18.183 +        if ( p2m_is_ram(p2mt) )
  18.184              sl3mfn = get_shadow_status(v, gl3mfn, SH_type_l3_shadow);
  18.185          else
  18.186              result |= SHADOW_SET_ERROR;
  18.187 @@ -2248,6 +2274,7 @@ static int validate_gl3e(struct vcpu *v,
  18.188      guest_l3e_t *new_gl3e = new_ge;
  18.189      shadow_l3e_t *sl3p = se;
  18.190      mfn_t sl2mfn = _mfn(INVALID_MFN);
  18.191 +    p2m_type_t p2mt;
  18.192      int result = 0;
  18.193  
  18.194      perfc_incr(shadow_validate_gl3e_calls);
  18.195 @@ -2255,8 +2282,8 @@ static int validate_gl3e(struct vcpu *v,
  18.196      if ( guest_l3e_get_flags(*new_gl3e) & _PAGE_PRESENT )
  18.197      {
  18.198          gfn_t gl2gfn = guest_l3e_get_gfn(*new_gl3e);
  18.199 -        mfn_t gl2mfn = gfn_to_mfn(v->domain, gl2gfn);
  18.200 -        if ( mfn_valid(gl2mfn) )
  18.201 +        mfn_t gl2mfn = gfn_to_mfn(v->domain, gl2gfn, &p2mt);
  18.202 +        if ( p2m_is_ram(p2mt) )
  18.203              sl2mfn = get_shadow_status(v, gl2mfn, SH_type_l2_shadow);
  18.204          else
  18.205              result |= SHADOW_SET_ERROR;
  18.206 @@ -2275,6 +2302,7 @@ static int validate_gl2e(struct vcpu *v,
  18.207      guest_l2e_t *new_gl2e = new_ge;
  18.208      shadow_l2e_t *sl2p = se;
  18.209      mfn_t sl1mfn = _mfn(INVALID_MFN);
  18.210 +    p2m_type_t p2mt;
  18.211      int result = 0;
  18.212  
  18.213      perfc_incr(shadow_validate_gl2e_calls);
  18.214 @@ -2299,8 +2327,8 @@ static int validate_gl2e(struct vcpu *v,
  18.215          }
  18.216          else
  18.217          {
  18.218 -            mfn_t gl1mfn = gfn_to_mfn(v->domain, gl1gfn);
  18.219 -            if ( mfn_valid(gl1mfn) )
  18.220 +            mfn_t gl1mfn = gfn_to_mfn(v->domain, gl1gfn, &p2mt);
  18.221 +            if ( p2m_is_ram(p2mt) )
  18.222                  sl1mfn = get_shadow_status(v, gl1mfn, SH_type_l1_shadow);
  18.223              else
  18.224                  result |= SHADOW_SET_ERROR;
  18.225 @@ -2361,16 +2389,16 @@ static int validate_gl1e(struct vcpu *v,
  18.226      shadow_l1e_t *sl1p = se;
  18.227      gfn_t gfn;
  18.228      mfn_t gmfn;
  18.229 -    int result = 0, mmio;
  18.230 +    p2m_type_t p2mt;
  18.231 +    int result = 0;
  18.232  
  18.233      perfc_incr(shadow_validate_gl1e_calls);
  18.234  
  18.235      gfn = guest_l1e_get_gfn(*new_gl1e);
  18.236 -    gmfn = gfn_to_mfn(v->domain, gfn);
  18.237 -
  18.238 -    mmio = (is_hvm_vcpu(v) && mmio_space(gfn_to_paddr(gfn)));
  18.239 +    gmfn = gfn_to_mfn(v->domain, gfn, &p2mt);
  18.240 +
  18.241      l1e_propagate_from_guest(v, new_gl1e, _mfn(INVALID_MFN), gmfn, &new_sl1e, 
  18.242 -                             ft_prefetch, mmio);
  18.243 +                             ft_prefetch, p2mt);
  18.244      
  18.245      result |= shadow_set_l1e(v, sl1p, new_sl1e, sl1mfn);
  18.246      return result;
  18.247 @@ -2554,12 +2582,13 @@ static inline void reset_early_unshadow(
  18.248  static void sh_prefetch(struct vcpu *v, walk_t *gw, 
  18.249                          shadow_l1e_t *ptr_sl1e, mfn_t sl1mfn)
  18.250  {
  18.251 -    int i, dist, mmio;
  18.252 +    int i, dist;
  18.253      gfn_t gfn;
  18.254      mfn_t gmfn;
  18.255      guest_l1e_t gl1e;
  18.256      shadow_l1e_t sl1e;
  18.257      u32 gflags;
  18.258 +    p2m_type_t p2mt;
  18.259  
  18.260      /* Prefetch no further than the end of the _shadow_ l1 MFN */
  18.261      dist = (PAGE_SIZE - ((unsigned long)ptr_sl1e & ~PAGE_MASK)) / sizeof sl1e;
  18.262 @@ -2597,14 +2626,13 @@ static void sh_prefetch(struct vcpu *v, 
  18.263  
  18.264          /* Look at the gfn that the l1e is pointing at */
  18.265          gfn = guest_l1e_get_gfn(gl1e);
  18.266 -        gmfn = gfn_to_mfn(v->domain, gfn);
  18.267 -        mmio = ( is_hvm_vcpu(v) && mmio_space(gfn_to_paddr(gfn)) );
  18.268 +        gmfn = gfn_to_mfn(v->domain, gfn, &p2mt);
  18.269  
  18.270          /* Propagate the entry.  Safe to use a pointer to our local 
  18.271           * gl1e, since this is not a demand-fetch so there will be no 
  18.272           * write-back to the guest. */
  18.273          l1e_propagate_from_guest(v, &gl1e, _mfn(INVALID_MFN),
  18.274 -                                 gmfn, &sl1e, ft_prefetch, mmio);
  18.275 +                                 gmfn, &sl1e, ft_prefetch, p2mt);
  18.276          (void) shadow_set_l1e(v, ptr_sl1e + i, sl1e, sl1mfn);
  18.277      }
  18.278  }
  18.279 @@ -2633,8 +2661,9 @@ static int sh_page_fault(struct vcpu *v,
  18.280      paddr_t gpa;
  18.281      struct sh_emulate_ctxt emul_ctxt;
  18.282      struct x86_emulate_ops *emul_ops;
  18.283 -    int r, mmio;
  18.284 +    int r;
  18.285      fetch_type_t ft = 0;
  18.286 +    p2m_type_t p2mt;
  18.287  
  18.288      SHADOW_PRINTK("d:v=%u:%u va=%#lx err=%u\n",
  18.289                     v->domain->domain_id, v->vcpu_id, va, regs->error_code);
  18.290 @@ -2787,10 +2816,9 @@ static int sh_page_fault(struct vcpu *v,
  18.291  
  18.292      /* What mfn is the guest trying to access? */
  18.293      gfn = guest_l1e_get_gfn(gw.eff_l1e);
  18.294 -    gmfn = gfn_to_mfn(d, gfn);
  18.295 -    mmio = (is_hvm_domain(d) && mmio_space(gfn_to_paddr(gfn)));
  18.296 -
  18.297 -    if ( !mmio && !mfn_valid(gmfn) )
  18.298 +    gmfn = gfn_to_mfn(d, gfn, &p2mt);
  18.299 +
  18.300 +    if ( !p2m_is_valid(p2mt) || (!p2m_is_mmio(p2mt) && !mfn_valid(gmfn)) )
  18.301      {
  18.302          perfc_incr(shadow_fault_bail_bad_gfn);
  18.303          SHADOW_PRINTK("BAD gfn=%"SH_PRI_gfn" gmfn=%"PRI_mfn"\n", 
  18.304 @@ -2821,7 +2849,7 @@ static int sh_page_fault(struct vcpu *v,
  18.305  
  18.306      /* Calculate the shadow entry and write it */
  18.307      l1e_propagate_from_guest(v, (gw.l1e) ? gw.l1e : &gw.eff_l1e, gw.l1mfn, 
  18.308 -                             gmfn, &sl1e, ft, mmio);
  18.309 +                             gmfn, &sl1e, ft, p2mt);
  18.310      r = shadow_set_l1e(v, ptr_sl1e, sl1e, sl1mfn);
  18.311  
  18.312  #if SHADOW_OPTIMIZATIONS & SHOPT_PREFETCH
  18.313 @@ -2844,7 +2872,10 @@ static int sh_page_fault(struct vcpu *v,
  18.314          }
  18.315      }
  18.316  
  18.317 -    if ( mmio ) 
  18.318 +    /* Need to hand off device-model MMIO and writes to read-only
  18.319 +     * memory to the device model */
  18.320 +    if ( p2mt == p2m_mmio_dm 
  18.321 +         || (p2mt == p2m_ram_ro && ft == ft_demand_write) ) 
  18.322      {
  18.323          gpa = guest_walk_to_gpa(&gw);
  18.324          goto mmio;
  18.325 @@ -3598,6 +3629,7 @@ sh_update_cr3(struct vcpu *v, int do_loc
  18.326          int flush = 0;
  18.327          gfn_t gl2gfn;
  18.328          mfn_t gl2mfn;
  18.329 +        p2m_type_t p2mt;
  18.330          guest_l3e_t *gl3e = (guest_l3e_t*)&v->arch.paging.shadow.gl3e;
  18.331          /* First, make all four entries read-only. */
  18.332          for ( i = 0; i < 4; i++ )
  18.333 @@ -3605,8 +3637,9 @@ sh_update_cr3(struct vcpu *v, int do_loc
  18.334              if ( guest_l3e_get_flags(gl3e[i]) & _PAGE_PRESENT )
  18.335              {
  18.336                  gl2gfn = guest_l3e_get_gfn(gl3e[i]);
  18.337 -                gl2mfn = gfn_to_mfn(d, gl2gfn);
  18.338 -                flush |= sh_remove_write_access(v, gl2mfn, 2, 0); 
  18.339 +                gl2mfn = gfn_to_mfn(d, gl2gfn, &p2mt);
  18.340 +                if ( p2m_is_ram(p2mt) )
  18.341 +                    flush |= sh_remove_write_access(v, gl2mfn, 2, 0);
  18.342              }
  18.343          }
  18.344          if ( flush ) 
  18.345 @@ -3617,13 +3650,15 @@ sh_update_cr3(struct vcpu *v, int do_loc
  18.346              if ( guest_l3e_get_flags(gl3e[i]) & _PAGE_PRESENT )
  18.347              {
  18.348                  gl2gfn = guest_l3e_get_gfn(gl3e[i]);
  18.349 -                gl2mfn = gfn_to_mfn(d, gl2gfn);
  18.350 -                sh_set_toplevel_shadow(v, i, gl2mfn, (i == 3) 
  18.351 -                                       ? SH_type_l2h_shadow 
  18.352 -                                       : SH_type_l2_shadow);
  18.353 +                gl2mfn = gfn_to_mfn(d, gl2gfn, &p2mt);
  18.354 +                if ( p2m_is_ram(p2mt) )
  18.355 +                    sh_set_toplevel_shadow(v, i, gl2mfn, (i == 3) 
  18.356 +                                           ? SH_type_l2h_shadow 
  18.357 +                                           : SH_type_l2_shadow);
  18.358 +                else
  18.359 +                    sh_set_toplevel_shadow(v, i, _mfn(INVALID_MFN), 0); 
  18.360              }
  18.361              else
  18.362 -                /* The guest is not present: clear out the shadow. */
  18.363                  sh_set_toplevel_shadow(v, i, _mfn(INVALID_MFN), 0); 
  18.364          }
  18.365      }
  18.366 @@ -3932,6 +3967,7 @@ static inline void * emulate_map_dest(st
  18.367      u32 flags, errcode;
  18.368      gfn_t gfn;
  18.369      mfn_t mfn;
  18.370 +    p2m_type_t p2mt;
  18.371  
  18.372      /* We don't emulate user-mode writes to page tables */
  18.373      if ( ring_3(sh_ctxt->ctxt.regs) ) 
  18.374 @@ -3971,7 +4007,6 @@ static inline void * emulate_map_dest(st
  18.375          }
  18.376      }
  18.377  #endif
  18.378 -    mfn = gfn_to_mfn(v->domain, gfn);
  18.379  
  18.380      errcode = PFEC_write_access;
  18.381      if ( !(flags & _PAGE_PRESENT) ) 
  18.382 @@ -3981,8 +4016,10 @@ static inline void * emulate_map_dest(st
  18.383      if ( !(flags & _PAGE_RW) ) 
  18.384          goto page_fault;
  18.385  
  18.386 -    if ( mfn_valid(mfn) )
  18.387 +    mfn = gfn_to_mfn(v->domain, gfn, &p2mt);
  18.388 +    if ( p2m_is_ram(p2mt) )
  18.389      {
  18.390 +        ASSERT(mfn_valid(mfn));
  18.391          *mfnp = mfn;
  18.392          v->arch.paging.last_write_was_pt = !!sh_mfn_is_a_page_table(mfn);
  18.393          return sh_map_domain_page(mfn) + (vaddr & ~PAGE_MASK);
  18.394 @@ -4231,6 +4268,7 @@ audit_gfn_to_mfn(struct vcpu *v, gfn_t g
  18.395  /* Convert this gfn to an mfn in the manner appropriate for the
  18.396   * guest pagetable it's used in (gmfn) */ 
  18.397  {
  18.398 +    p2m_type_t p2mt;
  18.399      if ( !shadow_mode_translate(v->domain) )
  18.400          return _mfn(gfn_x(gfn));
  18.401      
  18.402 @@ -4238,7 +4276,7 @@ audit_gfn_to_mfn(struct vcpu *v, gfn_t g
  18.403           != PGT_writable_page ) 
  18.404          return _mfn(gfn_x(gfn)); /* This is a paging-disabled shadow */
  18.405      else 
  18.406 -        return gfn_to_mfn(v->domain, gfn);
  18.407 +        return gfn_to_mfn(v->domain, gfn, &p2mt);
  18.408  } 
  18.409  
  18.410  
    19.1 --- a/xen/arch/x86/mm/shadow/types.h	Mon Sep 10 13:56:34 2007 -0600
    19.2 +++ b/xen/arch/x86/mm/shadow/types.h	Mon Sep 10 13:58:56 2007 -0600
    19.3 @@ -414,7 +414,7 @@ gfn_to_paddr(gfn_t gfn)
    19.4  
    19.5  /* Override gfn_to_mfn to work with gfn_t */
    19.6  #undef gfn_to_mfn
    19.7 -#define gfn_to_mfn(d, g) _gfn_to_mfn((d), gfn_x(g))
    19.8 +#define gfn_to_mfn(d, g, t) _gfn_to_mfn((d), gfn_x(g), (t))
    19.9  
   19.10  
   19.11  /* Type used for recording a walk through guest pagetables.  It is
    20.1 --- a/xen/arch/x86/shutdown.c	Mon Sep 10 13:56:34 2007 -0600
    20.2 +++ b/xen/arch/x86/shutdown.c	Mon Sep 10 13:58:56 2007 -0600
    20.3 @@ -197,7 +197,7 @@ static void machine_real_restart(const u
    20.4  
    20.5  #endif
    20.6  
    20.7 -void machine_restart(char *cmd)
    20.8 +void machine_restart(void)
    20.9  {
   20.10      int i;
   20.11  
   20.12 @@ -216,18 +216,12 @@ void machine_restart(char *cmd)
   20.13              safe_halt();
   20.14      }
   20.15  
   20.16 -    /*
   20.17 -     * Stop all CPUs and turn off local APICs and the IO-APIC, so
   20.18 -     * other OSs see a clean IRQ state.
   20.19 -     */
   20.20      smp_send_stop();
   20.21 -    disable_IO_APIC();
   20.22 -    hvm_cpu_down();
   20.23  
   20.24      /* Rebooting needs to touch the page at absolute address 0. */
   20.25      *((unsigned short *)__va(0x472)) = reboot_mode;
   20.26  
   20.27 -    if (reboot_thru_bios <= 0)
   20.28 +    if ( reboot_thru_bios <= 0 )
   20.29      {
   20.30          for ( ; ; )
   20.31          {
    21.1 --- a/xen/arch/x86/smp.c	Mon Sep 10 13:56:34 2007 -0600
    21.2 +++ b/xen/arch/x86/smp.c	Mon Sep 10 13:58:56 2007 -0600
    21.3 @@ -279,6 +279,19 @@ int on_selected_cpus(
    21.4  
    21.5      ASSERT(local_irq_is_enabled());
    21.6  
    21.7 +    /* Legacy UP system with no APIC to deliver IPIs? */
    21.8 +    if ( unlikely(!cpu_has_apic) )
    21.9 +    {
   21.10 +        ASSERT(num_online_cpus() == 1);
   21.11 +        if ( cpu_isset(0, selected) )
   21.12 +        {
   21.13 +            local_irq_disable();
   21.14 +            func(info);
   21.15 +            local_irq_enable();
   21.16 +        }
   21.17 +        return 0;
   21.18 +    }
   21.19 +
   21.20      if ( nr_cpus == 0 )
   21.21          return 0;
   21.22  
   21.23 @@ -306,23 +319,33 @@ int on_selected_cpus(
   21.24  
   21.25  static void stop_this_cpu (void *dummy)
   21.26  {
   21.27 -    cpu_clear(smp_processor_id(), cpu_online_map);
   21.28 -
   21.29 -    local_irq_disable();
   21.30      disable_local_APIC();
   21.31      hvm_cpu_down();
   21.32  
   21.33 +    cpu_clear(smp_processor_id(), cpu_online_map);
   21.34 +
   21.35      for ( ; ; )
   21.36          __asm__ __volatile__ ( "hlt" );
   21.37  }
   21.38  
   21.39 +/*
   21.40 + * Stop all CPUs and turn off local APICs and the IO-APIC, so other OSs see a 
   21.41 + * clean IRQ state.
   21.42 + */
   21.43  void smp_send_stop(void)
   21.44  {
   21.45 -    /* Stop all other CPUs in the system. */
   21.46 +    int timeout = 10;
   21.47 +
   21.48      smp_call_function(stop_this_cpu, NULL, 1, 0);
   21.49  
   21.50 +    /* Wait 10ms for all other CPUs to go offline. */
   21.51 +    while ( (num_online_cpus() > 1) && (timeout-- > 0) )
   21.52 +        mdelay(1);
   21.53 +
   21.54      local_irq_disable();
   21.55      disable_local_APIC();
   21.56 +    disable_IO_APIC();
   21.57 +    hvm_cpu_down();
   21.58      local_irq_enable();
   21.59  }
   21.60  
    22.1 --- a/xen/common/keyhandler.c	Mon Sep 10 13:56:34 2007 -0600
    22.2 +++ b/xen/common/keyhandler.c	Mon Sep 10 13:58:56 2007 -0600
    22.3 @@ -123,7 +123,7 @@ static void dump_registers(unsigned char
    22.4  static void halt_machine(unsigned char key, struct cpu_user_regs *regs)
    22.5  {
    22.6      printk("'%c' pressed -> rebooting machine\n", key);
    22.7 -    machine_restart(NULL);
    22.8 +    machine_restart();
    22.9  }
   22.10  
   22.11  static void cpuset_print(char *set, int size, cpumask_t mask)
    23.1 --- a/xen/common/shutdown.c	Mon Sep 10 13:56:34 2007 -0600
    23.2 +++ b/xen/common/shutdown.c	Mon Sep 10 13:58:56 2007 -0600
    23.3 @@ -24,7 +24,7 @@ static void maybe_reboot(void)
    23.4          printk("rebooting machine in 5 seconds.\n");
    23.5          watchdog_disable();
    23.6          mdelay(5000);
    23.7 -        machine_restart(NULL);
    23.8 +        machine_restart();
    23.9      }
   23.10  }
   23.11  
   23.12 @@ -50,7 +50,7 @@ void dom0_shutdown(u8 reason)
   23.13      case SHUTDOWN_reboot:
   23.14      {
   23.15          printk("Domain 0 shutdown: rebooting machine.\n");
   23.16 -        machine_restart(NULL);
   23.17 +        machine_restart();
   23.18          break; /* not reached */
   23.19      }
   23.20  
    24.1 --- a/xen/drivers/char/console.c	Mon Sep 10 13:56:34 2007 -0600
    24.2 +++ b/xen/drivers/char/console.c	Mon Sep 10 13:58:56 2007 -0600
    24.3 @@ -895,7 +895,7 @@ void panic(const char *fmt, ...)
    24.4      {
    24.5          watchdog_disable();
    24.6          mdelay(5000);
    24.7 -        machine_restart(NULL);
    24.8 +        machine_restart();
    24.9      }
   24.10  }
   24.11  
    25.1 --- a/xen/include/asm-x86/mm.h	Mon Sep 10 13:56:34 2007 -0600
    25.2 +++ b/xen/include/asm-x86/mm.h	Mon Sep 10 13:58:56 2007 -0600
    25.3 @@ -328,8 +328,6 @@ TYPE_SAFE(unsigned long,mfn);
    25.4        ? get_gpfn_from_mfn(mfn)                          \
    25.5        : (mfn) )
    25.6  
    25.7 -#define gmfn_to_mfn(_d, gpfn)  mfn_x(gfn_to_mfn(_d, gpfn))
    25.8 -
    25.9  #define INVALID_MFN             (~0UL)
   25.10  
   25.11  #ifdef CONFIG_COMPAT
    26.1 --- a/xen/include/asm-x86/p2m.h	Mon Sep 10 13:56:34 2007 -0600
    26.2 +++ b/xen/include/asm-x86/p2m.h	Mon Sep 10 13:58:56 2007 -0600
    26.3 @@ -4,7 +4,7 @@
    26.4   * physical-to-machine mappings for automatically-translated domains.
    26.5   *
    26.6   * Copyright (c) 2007 Advanced Micro Devices (Wei Huang)
    26.7 - * Parts of this code are Copyright (c) 2006 by XenSource Inc.
    26.8 + * Parts of this code are Copyright (c) 2006-2007 by XenSource Inc.
    26.9   * Parts of this code are Copyright (c) 2006 by Michael A Fetterman
   26.10   * Parts based on earlier work by Michael A Fetterman, Ian Pratt et al.
   26.11   *
   26.12 @@ -27,49 +27,141 @@
   26.13  #define _XEN_P2M_H
   26.14  
   26.15  
   26.16 -/* The phys_to_machine_mapping is the reversed mapping of MPT for full
   26.17 - * virtualization.  It is only used by shadow_mode_translate()==true
   26.18 - * guests, so we steal the address space that would have normally
   26.19 - * been used by the read-only MPT map.
   26.20 +/*
   26.21 + * The phys_to_machine_mapping maps guest physical frame numbers 
   26.22 + * to machine frame numbers.  It only exists for paging_mode_translate 
   26.23 + * guests. It is organised in page-table format, which:
   26.24 + *
   26.25 + * (1) allows us to use it directly as the second pagetable in hardware-
   26.26 + *     assisted paging and (hopefully) iommu support; and 
   26.27 + * (2) lets us map it directly into the guest vcpus' virtual address space 
   26.28 + *     as a linear pagetable, so we can read and write it easily.
   26.29 + *
   26.30 + * For (2) we steal the address space that would have normally been used
   26.31 + * by the read-only MPT map in a non-translated guest.  (For 
   26.32 + * paging_mode_external() guests this mapping is in the monitor table.)
   26.33   */
   26.34  #define phys_to_machine_mapping ((l1_pgentry_t *)RO_MPT_VIRT_START)
   26.35  
   26.36 +/*
   26.37 + * The upper levels of the p2m pagetable always contain full rights; all 
   26.38 + * variation in the access control bits is made in the level-1 PTEs.
   26.39 + * 
   26.40 + * In addition to the phys-to-machine translation, each p2m PTE contains
   26.41 + * *type* information about the gfn it translates, helping Xen to decide
   26.42 + * on the correct course of action when handling a page-fault to that
   26.43 + * guest frame.  We store the type in the "available" bits of the PTEs
   26.44 + * in the table, which gives us 8 possible types on 32-bit systems.
   26.45 + * Further expansions of the type system will only be supported on
   26.46 + * 64-bit Xen.
   26.47 + */
   26.48 +typedef enum {
   26.49 +    p2m_invalid = 0,            /* Nothing mapped here */
   26.50 +    p2m_ram_rw = 1,             /* Normal read/write guest RAM */
   26.51 +    p2m_ram_logdirty = 2,       /* Temporarily read-only for log-dirty */
   26.52 +    p2m_ram_ro = 3,             /* Read-only; writes go to the device model */
   26.53 +    p2m_mmio_dm = 4,            /* Reads and write go to the device model */
   26.54 +    p2m_mmio_direct = 5,        /* Read/write mapping of genuine MMIO area */
   26.55 +} p2m_type_t;
   26.56  
   26.57 -/* Read the current domain's P2M table. */
   26.58 -static inline mfn_t gfn_to_mfn_current(unsigned long gfn)
   26.59 -{
   26.60 -    l1_pgentry_t l1e = l1e_empty();
   26.61 -    int ret;
   26.62 +/* We use bitmaps and maks to handle groups of types */
   26.63 +#define p2m_to_mask(_t) (1UL << (_t))
   26.64 +
   26.65 +/* RAM types, which map to real machine frames */
   26.66 +#define P2M_RAM_TYPES (p2m_to_mask(p2m_ram_rw)          \
   26.67 +                       | p2m_to_mask(p2m_ram_logdirty)  \
   26.68 +                       | p2m_to_mask(p2m_ram_ro))
   26.69  
   26.70 -    if ( gfn > current->domain->arch.p2m.max_mapped_pfn )
   26.71 -        return _mfn(INVALID_MFN);
   26.72 +/* MMIO types, which don't have to map to anything in the frametable */
   26.73 +#define P2M_MMIO_TYPES (p2m_to_mask(p2m_mmio_dm)        \
   26.74 +                        | p2m_to_mask(p2m_mmio_direct))
   26.75 +
   26.76 +/* Read-only types, which must have the _PAGE_RW bit clear in their PTEs */
   26.77 +#define P2M_RO_TYPES (p2m_to_mask(p2m_ram_logdirty)     \
   26.78 +                      | p2m_to_mask(p2m_ram_ro))
   26.79 +
   26.80 +/* Useful predicates */
   26.81 +#define p2m_is_ram(_t) (p2m_to_mask(_t) & P2M_RAM_TYPES)
   26.82 +#define p2m_is_mmio(_t) (p2m_to_mask(_t) & P2M_MMIO_TYPES)
   26.83 +#define p2m_is_readonly(_t) (p2m_to_mask(_t) & P2M_RO_TYPES)
   26.84 +#define p2m_is_valid(_t) (p2m_to_mask(_t) & (P2M_RAM_TYPES | P2M_MMIO_TYPES))
   26.85  
   26.86 -    /* Don't read off the end of the p2m table */
   26.87 -    ASSERT(gfn < (RO_MPT_VIRT_END - RO_MPT_VIRT_START) / sizeof(l1_pgentry_t));
   26.88 +/* Extract the type from the PTE flags that store it */
   26.89 +static inline p2m_type_t p2m_flags_to_type(unsigned long flags)
   26.90 +{
   26.91 +    /* Type is stored in the "available" bits, 9, 10 and 11 */
   26.92 +    return (flags >> 9) & 0x7;
   26.93 +}
   26.94 + 
   26.95 +/* Read the current domain's p2m table (through the linear mapping). */
   26.96 +static inline mfn_t gfn_to_mfn_current(unsigned long gfn, p2m_type_t *t)
   26.97 +{
   26.98 +    mfn_t mfn = _mfn(INVALID_MFN);
   26.99 +    p2m_type_t p2mt = p2m_mmio_dm;
  26.100 +    /* XXX This is for compatibility with the old model, where anything not 
  26.101 +     * XXX marked as RAM was considered to be emulated MMIO space.
  26.102 +     * XXX Once we start explicitly registering MMIO regions in the p2m 
  26.103 +     * XXX we will return p2m_invalid for unmapped gfns */
  26.104 +
  26.105 +    if ( gfn <= current->domain->arch.p2m.max_mapped_pfn )
  26.106 +    {
  26.107 +        l1_pgentry_t l1e = l1e_empty();
  26.108 +        int ret;
  26.109  
  26.110 -    ret = __copy_from_user(&l1e,
  26.111 -                           &phys_to_machine_mapping[gfn],
  26.112 -                           sizeof(l1e));
  26.113 +        ASSERT(gfn < (RO_MPT_VIRT_END - RO_MPT_VIRT_START) 
  26.114 +               / sizeof(l1_pgentry_t));
  26.115 +
  26.116 +        /* Need to __copy_from_user because the p2m is sparse and this
  26.117 +         * part might not exist */
  26.118 +        ret = __copy_from_user(&l1e,
  26.119 +                               &phys_to_machine_mapping[gfn],
  26.120 +                               sizeof(l1e));
  26.121  
  26.122 -    if ( (ret == 0) && (l1e_get_flags(l1e) & _PAGE_PRESENT) )
  26.123 -        return _mfn(l1e_get_pfn(l1e));
  26.124 +        if ( ret == 0 ) {
  26.125 +            p2mt = p2m_flags_to_type(l1e_get_flags(l1e));
  26.126 +            ASSERT(l1e_get_pfn(l1e) != INVALID_MFN || !p2m_is_ram(*t));
  26.127 +            if ( p2m_is_valid(p2mt) )
  26.128 +                mfn = _mfn(l1e_get_pfn(l1e));
  26.129 +            else 
  26.130 +                /* XXX see above */
  26.131 +                p2mt = p2m_mmio_dm;
  26.132 +        }
  26.133 +    }
  26.134  
  26.135 -    return _mfn(INVALID_MFN);
  26.136 +    *t = p2mt;
  26.137 +    return mfn;
  26.138  }
  26.139  
  26.140  /* Read another domain's P2M table, mapping pages as we go */
  26.141 -mfn_t gfn_to_mfn_foreign(struct domain *d, unsigned long gpfn);
  26.142 +mfn_t gfn_to_mfn_foreign(struct domain *d, unsigned long gfn, p2m_type_t *t);
  26.143  
  26.144  /* General conversion function from gfn to mfn */
  26.145 -#define gfn_to_mfn(d, g) _gfn_to_mfn((d), (g))
  26.146 -static inline mfn_t _gfn_to_mfn(struct domain *d, unsigned long gfn)
  26.147 +#define gfn_to_mfn(d, g, t) _gfn_to_mfn((d), (g), (t))
  26.148 +static inline mfn_t _gfn_to_mfn(struct domain *d,
  26.149 +                                unsigned long gfn, p2m_type_t *t)
  26.150  {
  26.151      if ( !paging_mode_translate(d) )
  26.152 +    {
  26.153 +        /* Not necessarily true, but for non-translated guests, we claim
  26.154 +         * it's the most generic kind of memory */
  26.155 +        *t = p2m_ram_rw;
  26.156          return _mfn(gfn);
  26.157 +    }
  26.158      if ( likely(current->domain == d) )
  26.159 -        return gfn_to_mfn_current(gfn);
  26.160 +        return gfn_to_mfn_current(gfn, t);
  26.161      else 
  26.162 -        return gfn_to_mfn_foreign(d, gfn);
  26.163 +        return gfn_to_mfn_foreign(d, gfn, t);
  26.164 +}
  26.165 +
  26.166 +/* Compatibility function exporting the old untyped interface */
  26.167 +static inline unsigned long gmfn_to_mfn(struct domain *d, unsigned long gpfn)
  26.168 +{
  26.169 +    mfn_t mfn;
  26.170 +    p2m_type_t t;
  26.171 +    mfn = gfn_to_mfn(d, gpfn, &t);
  26.172 +    if ( p2m_is_valid(t) )
  26.173 +        return mfn_x(mfn);
  26.174 +    return INVALID_MFN;
  26.175  }
  26.176  
  26.177  /* General conversion function from mfn to gfn */
  26.178 @@ -81,19 +173,6 @@ static inline unsigned long mfn_to_gfn(s
  26.179          return mfn_x(mfn);
  26.180  }
  26.181  
  26.182 -/* Compatibility function for HVM code */
  26.183 -static inline unsigned long get_mfn_from_gpfn(unsigned long pfn)
  26.184 -{
  26.185 -    return mfn_x(gfn_to_mfn_current(pfn));
  26.186 -}
  26.187 -
  26.188 -/* Is this guest address an mmio one? (i.e. not defined in p2m map) */
  26.189 -static inline int mmio_space(paddr_t gpa)
  26.190 -{
  26.191 -    unsigned long gfn = gpa >> PAGE_SHIFT;
  26.192 -    return !mfn_valid(mfn_x(gfn_to_mfn_current(gfn)));
  26.193 -}
  26.194 -
  26.195  /* Translate the frame number held in an l1e from guest to machine */
  26.196  static inline l1_pgentry_t
  26.197  gl1e_to_ml1e(struct domain *d, l1_pgentry_t l1e)
  26.198 @@ -105,7 +184,6 @@ gl1e_to_ml1e(struct domain *d, l1_pgentr
  26.199  }
  26.200  
  26.201  
  26.202 -
  26.203  /* Init the datastructures for later use by the p2m code */
  26.204  void p2m_init(struct domain *d);
  26.205  
  26.206 @@ -130,11 +208,12 @@ void guest_physmap_add_page(struct domai
  26.207  void guest_physmap_remove_page(struct domain *d, unsigned long gfn,
  26.208                                 unsigned long mfn);
  26.209  
  26.210 -/* set P2M table l1e flags */
  26.211 -void p2m_set_flags_global(struct domain *d, u32 l1e_flags);
  26.212 +/* Change types across all p2m entries in a domain */
  26.213 +void p2m_change_type_global(struct domain *d, p2m_type_t ot, p2m_type_t nt);
  26.214  
  26.215 -/* set P2M table l1e flags for a gpa */
  26.216 -int p2m_set_flags(struct domain *d, paddr_t gpa, u32 l1e_flags);
  26.217 +/* Compare-exchange the type of a single p2m entry */
  26.218 +p2m_type_t p2m_change_type(struct domain *d, unsigned long gfn,
  26.219 +                           p2m_type_t ot, p2m_type_t nt);
  26.220  
  26.221  #endif /* _XEN_P2M_H */
  26.222  
    27.1 --- a/xen/include/xen/shutdown.h	Mon Sep 10 13:56:34 2007 -0600
    27.2 +++ b/xen/include/xen/shutdown.h	Mon Sep 10 13:58:56 2007 -0600
    27.3 @@ -6,7 +6,7 @@ extern int opt_noreboot;
    27.4  
    27.5  void dom0_shutdown(u8 reason);
    27.6  
    27.7 -void machine_restart(char *cmd);
    27.8 +void machine_restart(void);
    27.9  void machine_halt(void);
   27.10  void machine_power_off(void);
   27.11