ia64/xen-unstable
changeset 15869:42b925c00d8a
merge with xen-unstable.hg (staging)
author | Alex Williamson <alex.williamson@hp.com> |
---|---|
date | Mon Sep 10 13:58:56 2007 -0600 (2007-09-10) |
parents | 7d9b20d91102 154769114a82 |
children | 2635119a1766 |
files | xen/arch/ia64/xen/domain.c |
line diff
1.1 --- a/tools/libxen/src/xen_xspolicy.c Mon Sep 10 13:56:34 2007 -0600 1.2 +++ b/tools/libxen/src/xen_xspolicy.c Mon Sep 10 13:58:56 2007 -0600 1.3 @@ -21,8 +21,8 @@ 1.4 #include <stddef.h> 1.5 #include <stdlib.h> 1.6 1.7 +#include "xen_internal.h" 1.8 #include "xen/api/xen_common.h" 1.9 -#include "xen/api/xen_internal.h" 1.10 #include "xen/api/xen_xspolicy.h" 1.11 1.12
2.1 --- a/xen/Makefile Mon Sep 10 13:56:34 2007 -0600 2.2 +++ b/xen/Makefile Mon Sep 10 13:58:56 2007 -0600 2.3 @@ -35,12 +35,15 @@ build install debug clean distclean csco 2.4 $(INSTALL_DIR) $(DESTDIR)/usr/include/xen/hvm 2.5 [ -d $(DESTDIR)/usr/include/xen/io ] || \ 2.6 $(INSTALL_DIR) $(DESTDIR)/usr/include/xen/io 2.7 + [ -d $(DESTDIR)/usr/include/xen/xsm ] || \ 2.8 + $(INSTALL_DIR) $(DESTDIR)/usr/include/xen/xsm 2.9 [ -d $(DESTDIR)/usr/include/xen/foreign ] || \ 2.10 $(INSTALL_DIR) $(DESTDIR)/usr/include/xen/foreign 2.11 $(INSTALL_DATA) include/public/*.h $(DESTDIR)/usr/include/xen 2.12 $(INSTALL_DATA) include/public/arch-x86/*.h $(DESTDIR)/usr/include/xen/arch-x86 2.13 $(INSTALL_DATA) include/public/hvm/*.h $(DESTDIR)/usr/include/xen/hvm 2.14 $(INSTALL_DATA) include/public/io/*.h $(DESTDIR)/usr/include/xen/io 2.15 + $(INSTALL_DATA) include/public/xsm/*.h $(DESTDIR)/usr/include/xen/xsm 2.16 $(INSTALL_DATA) include/public/foreign/*.h $(DESTDIR)/usr/include/xen/foreign 2.17 $(INSTALL_DATA) include/public/COPYING $(DESTDIR)/usr/include/xen 2.18
3.1 --- a/xen/arch/ia64/xen/domain.c Mon Sep 10 13:56:34 2007 -0600 3.2 +++ b/xen/arch/ia64/xen/domain.c Mon Sep 10 13:58:56 2007 -0600 3.3 @@ -1487,7 +1487,7 @@ int __init construct_dom0(struct domain 3.4 return 0; 3.5 } 3.6 3.7 -void machine_restart(char * __unused) 3.8 +void machine_restart(void) 3.9 { 3.10 console_start_sync(); 3.11 if (running_on_sim)
4.1 --- a/xen/arch/powerpc/domain.c Mon Sep 10 13:56:34 2007 -0600 4.2 +++ b/xen/arch/powerpc/domain.c Mon Sep 10 13:58:56 2007 -0600 4.3 @@ -119,7 +119,7 @@ void machine_halt(void) 4.4 machine_fail(__func__); 4.5 } 4.6 4.7 -void machine_restart(char * __unused) 4.8 +void machine_restart(void) 4.9 { 4.10 console_start_sync(); 4.11 printk("%s called\n", __func__);
5.1 --- a/xen/arch/x86/acpi/power.c Mon Sep 10 13:56:34 2007 -0600 5.2 +++ b/xen/arch/x86/acpi/power.c Mon Sep 10 13:58:56 2007 -0600 5.3 @@ -181,11 +181,6 @@ static long enter_state_helper(void *dat 5.4 /* 5.5 * Dom0 issues this hypercall in place of writing pm1a_cnt. Xen then 5.6 * takes over the control and put the system into sleep state really. 5.7 - * 5.8 - * Guest may issue a two-phases write to PM1x_CNT, to work 5.9 - * around poorly implemented hardware. It's better to keep 5.10 - * this logic here. Two writes can be differentiated by 5.11 - * enable bit setting. 5.12 */ 5.13 int acpi_enter_sleep(struct xenpf_enter_acpi_sleep *sleep) 5.14 { 5.15 @@ -204,16 +199,6 @@ int acpi_enter_sleep(struct xenpf_enter_ 5.16 if ( sleep->flags ) 5.17 return -EINVAL; 5.18 5.19 - /* Write #1 */ 5.20 - if ( !(sleep->pm1a_cnt_val & ACPI_BITMASK_SLEEP_ENABLE) ) 5.21 - { 5.22 - outw((u16)sleep->pm1a_cnt_val, acpi_sinfo.pm1a_cnt); 5.23 - if ( acpi_sinfo.pm1b_cnt ) 5.24 - outw((u16)sleep->pm1b_cnt_val, acpi_sinfo.pm1b_cnt); 5.25 - return 0; 5.26 - } 5.27 - 5.28 - /* Write #2 */ 5.29 acpi_sinfo.pm1a_cnt_val = sleep->pm1a_cnt_val; 5.30 acpi_sinfo.pm1b_cnt_val = sleep->pm1b_cnt_val; 5.31 acpi_sinfo.sleep_state = sleep->sleep_state;
6.1 --- a/xen/arch/x86/hvm/hvm.c Mon Sep 10 13:56:34 2007 -0600 6.2 +++ b/xen/arch/x86/hvm/hvm.c Mon Sep 10 13:58:56 2007 -0600 6.3 @@ -161,12 +161,14 @@ static int hvm_set_ioreq_page( 6.4 struct domain *d, struct hvm_ioreq_page *iorp, unsigned long gmfn) 6.5 { 6.6 struct page_info *page; 6.7 + p2m_type_t p2mt; 6.8 unsigned long mfn; 6.9 void *va; 6.10 6.11 - mfn = gmfn_to_mfn(d, gmfn); 6.12 - if ( !mfn_valid(mfn) ) 6.13 + mfn = mfn_x(gfn_to_mfn(d, gmfn, &p2mt)); 6.14 + if ( !p2m_is_ram(p2mt) ) 6.15 return -EINVAL; 6.16 + ASSERT(mfn_valid(mfn)); 6.17 6.18 page = mfn_to_page(mfn); 6.19 if ( !get_page_and_type(page, d, PGT_writable_page) ) 6.20 @@ -517,7 +519,8 @@ void hvm_triple_fault(void) 6.21 int hvm_set_cr0(unsigned long value) 6.22 { 6.23 struct vcpu *v = current; 6.24 - unsigned long mfn, old_value = v->arch.hvm_vcpu.guest_cr[0]; 6.25 + p2m_type_t p2mt; 6.26 + unsigned long gfn, mfn, old_value = v->arch.hvm_vcpu.guest_cr[0]; 6.27 6.28 HVM_DBG_LOG(DBG_LEVEL_VMMU, "Update CR0 value = %lx", value); 6.29 6.30 @@ -559,8 +562,10 @@ int hvm_set_cr0(unsigned long value) 6.31 if ( !paging_mode_hap(v->domain) ) 6.32 { 6.33 /* The guest CR3 must be pointing to the guest physical. */ 6.34 - mfn = get_mfn_from_gpfn(v->arch.hvm_vcpu.guest_cr[3]>>PAGE_SHIFT); 6.35 - if ( !mfn_valid(mfn) || !get_page(mfn_to_page(mfn), v->domain)) 6.36 + gfn = v->arch.hvm_vcpu.guest_cr[3]>>PAGE_SHIFT; 6.37 + mfn = mfn_x(gfn_to_mfn_current(gfn, &p2mt)); 6.38 + if ( !p2m_is_ram(p2mt) || !mfn_valid(mfn) || 6.39 + !get_page(mfn_to_page(mfn), v->domain)) 6.40 { 6.41 gdprintk(XENLOG_ERR, "Invalid CR3 value = %lx (mfn=%lx)\n", 6.42 v->arch.hvm_vcpu.guest_cr[3], mfn); 6.43 @@ -603,16 +608,18 @@ int hvm_set_cr0(unsigned long value) 6.44 int hvm_set_cr3(unsigned long value) 6.45 { 6.46 unsigned long mfn; 6.47 + p2m_type_t p2mt; 6.48 struct vcpu *v = current; 6.49 6.50 if ( hvm_paging_enabled(v) && !paging_mode_hap(v->domain) && 6.51 (value != v->arch.hvm_vcpu.guest_cr[3]) ) 6.52 { 6.53 - /* Shadow-mode CR3 change. Check PDBR and then make a new shadow. */ 6.54 + /* Shadow-mode CR3 change. Check PDBR and update refcounts. */ 6.55 HVM_DBG_LOG(DBG_LEVEL_VMMU, "CR3 value = %lx", value); 6.56 - mfn = get_mfn_from_gpfn(value >> PAGE_SHIFT); 6.57 - if ( !mfn_valid(mfn) || !get_page(mfn_to_page(mfn), v->domain) ) 6.58 - goto bad_cr3; 6.59 + mfn = mfn_x(gfn_to_mfn_current(value >> PAGE_SHIFT, &p2mt)); 6.60 + if ( !p2m_is_ram(p2mt) || !mfn_valid(mfn) || 6.61 + !get_page(mfn_to_page(mfn), v->domain) ) 6.62 + goto bad_cr3; 6.63 6.64 put_page(pagetable_get_page(v->arch.guest_table)); 6.65 v->arch.guest_table = pagetable_from_pfn(mfn); 6.66 @@ -677,6 +684,7 @@ int hvm_set_cr4(unsigned long value) 6.67 static int __hvm_copy(void *buf, paddr_t addr, int size, int dir, int virt) 6.68 { 6.69 unsigned long gfn, mfn; 6.70 + p2m_type_t p2mt; 6.71 char *p; 6.72 int count, todo; 6.73 6.74 @@ -690,10 +698,11 @@ static int __hvm_copy(void *buf, paddr_t 6.75 else 6.76 gfn = addr >> PAGE_SHIFT; 6.77 6.78 - mfn = get_mfn_from_gpfn(gfn); 6.79 + mfn = mfn_x(gfn_to_mfn_current(gfn, &p2mt)); 6.80 6.81 - if ( mfn == INVALID_MFN ) 6.82 + if ( !p2m_is_ram(p2mt) ) 6.83 return todo; 6.84 + ASSERT(mfn_valid(mfn)); 6.85 6.86 p = (char *)map_domain_page(mfn) + (addr & ~PAGE_MASK); 6.87
7.1 --- a/xen/arch/x86/hvm/io.c Mon Sep 10 13:56:34 2007 -0600 7.2 +++ b/xen/arch/x86/hvm/io.c Mon Sep 10 13:58:56 2007 -0600 7.3 @@ -826,9 +826,7 @@ void hvm_io_assist(void) 7.4 ioreq_t *p; 7.5 struct cpu_user_regs *regs; 7.6 struct hvm_io_op *io_opp; 7.7 - unsigned long gmfn; 7.8 struct vcpu *v = current; 7.9 - struct domain *d = v->domain; 7.10 7.11 io_opp = &v->arch.hvm_vcpu.io_op; 7.12 regs = &io_opp->io_context; 7.13 @@ -862,13 +860,6 @@ void hvm_io_assist(void) 7.14 hvm_load_cpu_guest_regs(v, regs); 7.15 memcpy(guest_cpu_user_regs(), regs, HVM_CONTEXT_STACK_BYTES); 7.16 7.17 - /* Has memory been dirtied? */ 7.18 - if ( (p->dir == IOREQ_READ) && p->data_is_ptr ) 7.19 - { 7.20 - gmfn = get_mfn_from_gpfn(paging_gva_to_gfn(v, p->data)); 7.21 - paging_mark_dirty(d, gmfn); 7.22 - } 7.23 - 7.24 out: 7.25 vcpu_end_shutdown_deferral(v); 7.26 }
8.1 --- a/xen/arch/x86/hvm/svm/intr.c Mon Sep 10 13:56:34 2007 -0600 8.2 +++ b/xen/arch/x86/hvm/svm/intr.c Mon Sep 10 13:58:56 2007 -0600 8.3 @@ -30,6 +30,7 @@ 8.4 #include <asm/hvm/hvm.h> 8.5 #include <asm/hvm/io.h> 8.6 #include <asm/hvm/support.h> 8.7 +#include <asm/hvm/vlapic.h> 8.8 #include <asm/hvm/svm/svm.h> 8.9 #include <asm/hvm/svm/intr.h> 8.10 #include <xen/event.h> 8.11 @@ -99,6 +100,33 @@ static void enable_intr_window(struct vc 8.12 svm_inject_dummy_vintr(v); 8.13 } 8.14 8.15 +static void update_cr8_intercept( 8.16 + struct vcpu *v, enum hvm_intack masked_intr_source) 8.17 +{ 8.18 + struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb; 8.19 + struct vlapic *vlapic = vcpu_vlapic(v); 8.20 + int max_irr; 8.21 + 8.22 + vmcb->cr_intercepts &= ~CR_INTERCEPT_CR8_WRITE; 8.23 + 8.24 + /* 8.25 + * If ExtInts are masked then that dominates the TPR --- the 'interrupt 8.26 + * window' has already been enabled in this case. 8.27 + */ 8.28 + if ( (masked_intr_source == hvm_intack_lapic) || 8.29 + (masked_intr_source == hvm_intack_pic) ) 8.30 + return; 8.31 + 8.32 + /* Is there an interrupt pending at the LAPIC? Nothing to do if not. */ 8.33 + if ( !vlapic_enabled(vlapic) || 8.34 + ((max_irr = vlapic_find_highest_irr(vlapic)) == -1) ) 8.35 + return; 8.36 + 8.37 + /* Highest-priority pending interrupt is masked by the TPR? */ 8.38 + if ( (vmcb->vintr.fields.tpr & 0xf) >= (max_irr >> 4) ) 8.39 + vmcb->cr_intercepts |= CR_INTERCEPT_CR8_WRITE; 8.40 +} 8.41 + 8.42 asmlinkage void svm_intr_assist(void) 8.43 { 8.44 struct vcpu *v = current; 8.45 @@ -113,7 +141,7 @@ asmlinkage void svm_intr_assist(void) 8.46 do { 8.47 intr_source = hvm_vcpu_has_pending_irq(v); 8.48 if ( likely(intr_source == hvm_intack_none) ) 8.49 - return; 8.50 + goto out; 8.51 8.52 /* 8.53 * Pending IRQs must be delayed if: 8.54 @@ -133,7 +161,7 @@ asmlinkage void svm_intr_assist(void) 8.55 !hvm_interrupts_enabled(v, intr_source) ) 8.56 { 8.57 enable_intr_window(v, intr_source); 8.58 - return; 8.59 + goto out; 8.60 } 8.61 } while ( !hvm_vcpu_ack_pending_irq(v, intr_source, &intr_vector) ); 8.62 8.63 @@ -152,6 +180,9 @@ asmlinkage void svm_intr_assist(void) 8.64 intr_source = hvm_vcpu_has_pending_irq(v); 8.65 if ( unlikely(intr_source != hvm_intack_none) ) 8.66 enable_intr_window(v, intr_source); 8.67 + 8.68 + out: 8.69 + update_cr8_intercept(v, intr_source); 8.70 } 8.71 8.72 /*
9.1 --- a/xen/arch/x86/hvm/svm/svm.c Mon Sep 10 13:56:34 2007 -0600 9.2 +++ b/xen/arch/x86/hvm/svm/svm.c Mon Sep 10 13:58:56 2007 -0600 9.3 @@ -338,6 +338,7 @@ int svm_vmcb_save(struct vcpu *v, struct 9.4 int svm_vmcb_restore(struct vcpu *v, struct hvm_hw_cpu *c) 9.5 { 9.6 unsigned long mfn = 0; 9.7 + p2m_type_t p2mt; 9.8 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb; 9.9 9.10 if ( c->pending_valid && 9.11 @@ -353,8 +354,8 @@ int svm_vmcb_restore(struct vcpu *v, str 9.12 { 9.13 if ( c->cr0 & X86_CR0_PG ) 9.14 { 9.15 - mfn = gmfn_to_mfn(v->domain, c->cr3 >> PAGE_SHIFT); 9.16 - if ( !mfn_valid(mfn) || !get_page(mfn_to_page(mfn), v->domain) ) 9.17 + mfn = mfn_x(gfn_to_mfn(v->domain, c->cr3 >> PAGE_SHIFT, &p2mt)); 9.18 + if ( !p2m_is_ram(p2mt) || !get_page(mfn_to_page(mfn), v->domain) ) 9.19 { 9.20 gdprintk(XENLOG_ERR, "Invalid CR3 value=0x%"PRIx64"\n", 9.21 c->cr3); 9.22 @@ -1004,15 +1005,23 @@ int start_svm(struct cpuinfo_x86 *c) 9.23 return 1; 9.24 } 9.25 9.26 -static int svm_do_nested_pgfault(paddr_t gpa, struct cpu_user_regs *regs) 9.27 +static void svm_do_nested_pgfault(paddr_t gpa, struct cpu_user_regs *regs) 9.28 { 9.29 - if (mmio_space(gpa)) { 9.30 + p2m_type_t p2mt; 9.31 + mfn_t mfn; 9.32 + unsigned long gfn = gpa >> PAGE_SHIFT; 9.33 + 9.34 + /* If this GFN is emulated MMIO, pass the fault to the mmio handler */ 9.35 + mfn = gfn_to_mfn_current(gfn, &p2mt); 9.36 + if ( p2mt == p2m_mmio_dm ) 9.37 + { 9.38 handle_mmio(gpa); 9.39 - return 1; 9.40 + return; 9.41 } 9.42 9.43 - paging_mark_dirty(current->domain, get_mfn_from_gpfn(gpa >> PAGE_SHIFT)); 9.44 - return p2m_set_flags(current->domain, gpa, __PAGE_HYPERVISOR|_PAGE_USER); 9.45 + /* Log-dirty: mark the page dirty and let the guest write it again */ 9.46 + paging_mark_dirty(current->domain, mfn_x(mfn)); 9.47 + p2m_change_type(current->domain, gfn, p2m_ram_logdirty, p2m_ram_rw); 9.48 } 9.49 9.50 static void svm_do_no_device_fault(struct vmcb_struct *vmcb) 9.51 @@ -2144,6 +2153,16 @@ asmlinkage void svm_vmexit_handler(struc 9.52 eventinj_t eventinj; 9.53 int inst_len, rc; 9.54 9.55 + /* 9.56 + * Before doing anything else, we need to sync up the VLAPIC's TPR with 9.57 + * SVM's vTPR if CR8 writes are currently disabled. It's OK if the 9.58 + * guest doesn't touch the CR8 (e.g. 32-bit Windows) because we update 9.59 + * the vTPR on MMIO writes to the TPR 9.60 + */ 9.61 + if ( !(vmcb->cr_intercepts & CR_INTERCEPT_CR8_WRITE) ) 9.62 + vlapic_set_reg(vcpu_vlapic(v), APIC_TASKPRI, 9.63 + (vmcb->vintr.fields.tpr & 0x0F) << 4); 9.64 + 9.65 exit_reason = vmcb->exitcode; 9.66 9.67 HVMTRACE_2D(VMEXIT, v, vmcb->rip, exit_reason); 9.68 @@ -2341,8 +2360,7 @@ asmlinkage void svm_vmexit_handler(struc 9.69 9.70 case VMEXIT_NPF: 9.71 regs->error_code = vmcb->exitinfo1; 9.72 - if ( !svm_do_nested_pgfault(vmcb->exitinfo2, regs) ) 9.73 - domain_crash(v->domain); 9.74 + svm_do_nested_pgfault(vmcb->exitinfo2, regs); 9.75 break; 9.76 9.77 default:
10.1 --- a/xen/arch/x86/hvm/svm/vmcb.c Mon Sep 10 13:56:34 2007 -0600 10.2 +++ b/xen/arch/x86/hvm/svm/vmcb.c Mon Sep 10 13:58:56 2007 -0600 10.3 @@ -114,23 +114,29 @@ static int construct_vmcb(struct vcpu *v 10.4 svm_asid_init_vcpu(v); 10.5 10.6 vmcb->general1_intercepts = 10.7 - GENERAL1_INTERCEPT_INTR | GENERAL1_INTERCEPT_NMI | 10.8 - GENERAL1_INTERCEPT_SMI | GENERAL1_INTERCEPT_INIT | 10.9 - GENERAL1_INTERCEPT_CPUID | GENERAL1_INTERCEPT_INVD | 10.10 - GENERAL1_INTERCEPT_HLT | GENERAL1_INTERCEPT_INVLPG | 10.11 - GENERAL1_INTERCEPT_INVLPGA | GENERAL1_INTERCEPT_IOIO_PROT | 10.12 - GENERAL1_INTERCEPT_MSR_PROT | GENERAL1_INTERCEPT_SHUTDOWN_EVT; 10.13 + GENERAL1_INTERCEPT_INTR | GENERAL1_INTERCEPT_NMI | 10.14 + GENERAL1_INTERCEPT_SMI | GENERAL1_INTERCEPT_INIT | 10.15 + GENERAL1_INTERCEPT_CPUID | GENERAL1_INTERCEPT_INVD | 10.16 + GENERAL1_INTERCEPT_HLT | GENERAL1_INTERCEPT_INVLPG | 10.17 + GENERAL1_INTERCEPT_INVLPGA | GENERAL1_INTERCEPT_IOIO_PROT | 10.18 + GENERAL1_INTERCEPT_MSR_PROT | GENERAL1_INTERCEPT_SHUTDOWN_EVT; 10.19 vmcb->general2_intercepts = 10.20 - GENERAL2_INTERCEPT_VMRUN | GENERAL2_INTERCEPT_VMMCALL | 10.21 - GENERAL2_INTERCEPT_VMLOAD | GENERAL2_INTERCEPT_VMSAVE | 10.22 - GENERAL2_INTERCEPT_STGI | GENERAL2_INTERCEPT_CLGI | 10.23 - GENERAL2_INTERCEPT_SKINIT | GENERAL2_INTERCEPT_RDTSCP; 10.24 + GENERAL2_INTERCEPT_VMRUN | GENERAL2_INTERCEPT_VMMCALL | 10.25 + GENERAL2_INTERCEPT_VMLOAD | GENERAL2_INTERCEPT_VMSAVE | 10.26 + GENERAL2_INTERCEPT_STGI | GENERAL2_INTERCEPT_CLGI | 10.27 + GENERAL2_INTERCEPT_SKINIT | GENERAL2_INTERCEPT_RDTSCP; 10.28 10.29 /* Intercept all debug-register writes. */ 10.30 vmcb->dr_intercepts = DR_INTERCEPT_ALL_WRITES; 10.31 10.32 - /* Intercept all control-register accesses, except to CR2. */ 10.33 - vmcb->cr_intercepts = ~(CR_INTERCEPT_CR2_READ | CR_INTERCEPT_CR2_WRITE); 10.34 + /* 10.35 + * Intercept all control-register accesses except for CR2 reads/writes 10.36 + * and CR8 reads (and actually CR8 writes, but that's a special case 10.37 + * that's handled in svm/intr.c). 10.38 + */ 10.39 + vmcb->cr_intercepts = ~(CR_INTERCEPT_CR2_READ | 10.40 + CR_INTERCEPT_CR2_WRITE | 10.41 + CR_INTERCEPT_CR8_READ); 10.42 10.43 /* I/O and MSR permission bitmaps. */ 10.44 arch_svm->msrpm = alloc_xenheap_pages(get_order_from_bytes(MSRPM_SIZE));
11.1 --- a/xen/arch/x86/hvm/vmx/intr.c Mon Sep 10 13:56:34 2007 -0600 11.2 +++ b/xen/arch/x86/hvm/vmx/intr.c Mon Sep 10 13:58:56 2007 -0600 11.3 @@ -107,22 +107,35 @@ static void enable_intr_window(struct vc 11.4 } 11.5 } 11.6 11.7 -static void update_tpr_threshold(struct vlapic *vlapic) 11.8 +static void update_tpr_threshold( 11.9 + struct vcpu *v, enum hvm_intack masked_intr_source) 11.10 { 11.11 - int max_irr, tpr; 11.12 + struct vlapic *vlapic = vcpu_vlapic(v); 11.13 + int max_irr, tpr, threshold = 0; 11.14 11.15 if ( !cpu_has_vmx_tpr_shadow ) 11.16 return; 11.17 11.18 + /* 11.19 + * If ExtInts are masked then that dominates the TPR --- the 'interrupt 11.20 + * window' has already been enabled in this case. 11.21 + */ 11.22 + if ( (masked_intr_source == hvm_intack_lapic) || 11.23 + (masked_intr_source == hvm_intack_pic) ) 11.24 + goto out; 11.25 + 11.26 + /* Is there an interrupt pending at the LAPIC? Nothing to do if not. */ 11.27 if ( !vlapic_enabled(vlapic) || 11.28 ((max_irr = vlapic_find_highest_irr(vlapic)) == -1) ) 11.29 - { 11.30 - __vmwrite(TPR_THRESHOLD, 0); 11.31 - return; 11.32 - } 11.33 + goto out; 11.34 11.35 + /* Highest-priority pending interrupt is masked by the TPR? */ 11.36 tpr = vlapic_get_reg(vlapic, APIC_TASKPRI) & 0xF0; 11.37 - __vmwrite(TPR_THRESHOLD, (max_irr > tpr) ? (tpr >> 4) : (max_irr >> 4)); 11.38 + if ( (tpr >> 4) >= (max_irr >> 4) ) 11.39 + threshold = max_irr >> 4; 11.40 + 11.41 + out: 11.42 + __vmwrite(TPR_THRESHOLD, threshold); 11.43 } 11.44 11.45 asmlinkage void vmx_intr_assist(void) 11.46 @@ -171,7 +184,7 @@ asmlinkage void vmx_intr_assist(void) 11.47 enable_intr_window(v, intr_source); 11.48 11.49 out: 11.50 - update_tpr_threshold(vcpu_vlapic(v)); 11.51 + update_tpr_threshold(v, intr_source); 11.52 } 11.53 11.54 /*
12.1 --- a/xen/arch/x86/hvm/vmx/vmx.c Mon Sep 10 13:56:34 2007 -0600 12.2 +++ b/xen/arch/x86/hvm/vmx/vmx.c Mon Sep 10 13:58:56 2007 -0600 12.3 @@ -566,6 +566,7 @@ void vmx_vmcs_save(struct vcpu *v, struc 12.4 int vmx_vmcs_restore(struct vcpu *v, struct hvm_hw_cpu *c) 12.5 { 12.6 unsigned long mfn = 0; 12.7 + p2m_type_t p2mt; 12.8 12.9 if ( c->pending_valid && 12.10 ((c->pending_type == 1) || (c->pending_type > 6) || 12.11 @@ -578,8 +579,8 @@ int vmx_vmcs_restore(struct vcpu *v, str 12.12 12.13 if ( c->cr0 & X86_CR0_PG ) 12.14 { 12.15 - mfn = gmfn_to_mfn(v->domain, c->cr3 >> PAGE_SHIFT); 12.16 - if ( !mfn_valid(mfn) || !get_page(mfn_to_page(mfn), v->domain) ) 12.17 + mfn = mfn_x(gfn_to_mfn(v->domain, c->cr3 >> PAGE_SHIFT, &p2mt)); 12.18 + if ( !p2m_is_ram(p2mt) || !get_page(mfn_to_page(mfn), v->domain) ) 12.19 { 12.20 gdprintk(XENLOG_ERR, "Invalid CR3 value=0x%"PRIx64"\n", c->cr3); 12.21 return -EINVAL; 12.22 @@ -1292,19 +1293,23 @@ static void vmx_do_cpuid(struct cpu_user 12.23 * Note that this leaf lives at <max-hypervisor-leaf> + 1. 12.24 */ 12.25 u64 value = ((u64)regs->edx << 32) | (u32)regs->ecx; 12.26 - unsigned long mfn = get_mfn_from_gpfn(value >> PAGE_SHIFT); 12.27 + p2m_type_t p2mt; 12.28 + unsigned long mfn; 12.29 struct vcpu *v = current; 12.30 char *p; 12.31 12.32 + mfn = mfn_x(gfn_to_mfn_current(value >> PAGE_SHIFT, &p2mt)); 12.33 + 12.34 gdprintk(XENLOG_INFO, "Input address is 0x%"PRIx64".\n", value); 12.35 12.36 /* 8-byte aligned valid pseudophys address from vmxassist, please. */ 12.37 - if ( (value & 7) || (mfn == INVALID_MFN) || 12.38 + if ( (value & 7) || !p2m_is_ram(p2mt) || 12.39 !v->arch.hvm_vmx.vmxassist_enabled ) 12.40 { 12.41 domain_crash(v->domain); 12.42 return; 12.43 } 12.44 + ASSERT(mfn_valid(mfn)); 12.45 12.46 p = map_domain_page(mfn); 12.47 value = *((uint64_t *)(p + (value & (PAGE_SIZE - 1)))); 12.48 @@ -1905,11 +1910,12 @@ static void vmx_world_save(struct vcpu * 12.49 static int vmx_world_restore(struct vcpu *v, struct vmx_assist_context *c) 12.50 { 12.51 unsigned long mfn = 0; 12.52 + p2m_type_t p2mt; 12.53 12.54 if ( c->cr0 & X86_CR0_PG ) 12.55 { 12.56 - mfn = gmfn_to_mfn(v->domain, c->cr3 >> PAGE_SHIFT); 12.57 - if ( !mfn_valid(mfn) || !get_page(mfn_to_page(mfn), v->domain) ) 12.58 + mfn = mfn_x(gfn_to_mfn(v->domain, c->cr3 >> PAGE_SHIFT, &p2mt)); 12.59 + if ( !p2m_is_ram(p2mt) || !get_page(mfn_to_page(mfn), v->domain) ) 12.60 { 12.61 gdprintk(XENLOG_ERR, "Invalid CR3 value=%x", c->cr3); 12.62 return -EINVAL;
13.1 --- a/xen/arch/x86/machine_kexec.c Mon Sep 10 13:56:34 2007 -0600 13.2 +++ b/xen/arch/x86/machine_kexec.c Mon Sep 10 13:58:56 2007 -0600 13.3 @@ -82,9 +82,6 @@ static void __machine_reboot_kexec(void 13.4 13.5 smp_send_stop(); 13.6 13.7 - disable_IO_APIC(); 13.8 - hvm_cpu_down(); 13.9 - 13.10 machine_kexec(image); 13.11 } 13.12
14.1 --- a/xen/arch/x86/mm/hap/guest_walk.c Mon Sep 10 13:56:34 2007 -0600 14.2 +++ b/xen/arch/x86/mm/hap/guest_walk.c Mon Sep 10 13:58:56 2007 -0600 14.3 @@ -28,7 +28,8 @@ 14.4 #include <xen/sched.h> 14.5 #include <asm/hvm/svm/vmcb.h> 14.6 #include <asm/domain.h> 14.7 -#include <asm/shadow.h> 14.8 +#include <asm/paging.h> 14.9 +#include <asm/p2m.h> 14.10 #include <asm/hap.h> 14.11 14.12 #include "private.h" 14.13 @@ -67,6 +68,7 @@ unsigned long hap_gva_to_gfn(GUEST_PAGIN 14.14 int lev, index; 14.15 paddr_t gpa = 0; 14.16 unsigned long gpfn, mfn; 14.17 + p2m_type_t p2mt; 14.18 int success = 1; 14.19 14.20 l1_pgentry_t *l1e; 14.21 @@ -81,14 +83,16 @@ unsigned long hap_gva_to_gfn(GUEST_PAGIN 14.22 gpfn = (gcr3 >> PAGE_SHIFT); 14.23 for ( lev = mode; lev >= 1; lev-- ) 14.24 { 14.25 - mfn = get_mfn_from_gpfn(gpfn); 14.26 - if ( mfn == INVALID_MFN ) 14.27 + mfn = mfn_x(gfn_to_mfn_current(gpfn, &p2mt)); 14.28 + if ( !p2m_is_ram(p2mt) ) 14.29 { 14.30 HAP_PRINTK("bad pfn=0x%lx from gva=0x%lx at lev%d\n", gpfn, gva, 14.31 lev); 14.32 success = 0; 14.33 break; 14.34 } 14.35 + ASSERT(mfn_valid(mfn)); 14.36 + 14.37 index = (gva >> PT_SHIFT[mode][lev]) & (PT_ENTRIES[mode][lev]-1); 14.38 14.39 #if GUEST_PAGING_LEVELS >= 4
15.1 --- a/xen/arch/x86/mm/hap/hap.c Mon Sep 10 13:56:34 2007 -0600 15.2 +++ b/xen/arch/x86/mm/hap/hap.c Mon Sep 10 13:58:56 2007 -0600 15.3 @@ -60,8 +60,8 @@ int hap_enable_log_dirty(struct domain * 15.4 d->arch.paging.mode |= PG_log_dirty; 15.5 hap_unlock(d); 15.6 15.7 - /* set l1e entries of P2M table to NOT_WRITABLE. */ 15.8 - p2m_set_flags_global(d, (_PAGE_PRESENT|_PAGE_USER)); 15.9 + /* set l1e entries of P2M table to be read-only. */ 15.10 + p2m_change_type_global(d, p2m_ram_rw, p2m_ram_logdirty); 15.11 flush_tlb_mask(d->domain_dirty_cpumask); 15.12 return 0; 15.13 } 15.14 @@ -73,14 +73,14 @@ int hap_disable_log_dirty(struct domain 15.15 hap_unlock(d); 15.16 15.17 /* set l1e entries of P2M table with normal mode */ 15.18 - p2m_set_flags_global(d, __PAGE_HYPERVISOR|_PAGE_USER); 15.19 + p2m_change_type_global(d, p2m_ram_logdirty, p2m_ram_rw); 15.20 return 0; 15.21 } 15.22 15.23 void hap_clean_dirty_bitmap(struct domain *d) 15.24 { 15.25 - /* mark physical memory as NOT_WRITEABLE and flush the TLB */ 15.26 - p2m_set_flags_global(d, (_PAGE_PRESENT|_PAGE_USER)); 15.27 + /* set l1e entries of P2M table to be read-only. */ 15.28 + p2m_change_type_global(d, p2m_ram_rw, p2m_ram_logdirty); 15.29 flush_tlb_mask(d->domain_dirty_cpumask); 15.30 } 15.31
16.1 --- a/xen/arch/x86/mm/p2m.c Mon Sep 10 13:56:34 2007 -0600 16.2 +++ b/xen/arch/x86/mm/p2m.c Mon Sep 10 13:58:56 2007 -0600 16.3 @@ -4,7 +4,7 @@ 16.4 * physical-to-machine mappings for automatically-translated domains. 16.5 * 16.6 * Parts of this code are Copyright (c) 2007 by Advanced Micro Devices. 16.7 - * Parts of this code are Copyright (c) 2006 by XenSource Inc. 16.8 + * Parts of this code are Copyright (c) 2006-2007 by XenSource Inc. 16.9 * Parts of this code are Copyright (c) 2006 by Michael A Fetterman 16.10 * Parts based on earlier work by Michael A Fetterman, Ian Pratt et al. 16.11 * 16.12 @@ -93,6 +93,31 @@ 16.13 #define page_to_mfn(_pg) (_mfn((_pg) - frame_table)) 16.14 16.15 16.16 +/* PTE flags for the various types of p2m entry */ 16.17 +#define P2M_BASE_FLAGS \ 16.18 + (_PAGE_PRESENT | _PAGE_USER | _PAGE_DIRTY | _PAGE_ACCESSED) 16.19 + 16.20 +static unsigned long p2m_type_to_flags(p2m_type_t t) 16.21 +{ 16.22 + unsigned long flags = (t & 0x7UL) << 9; 16.23 + switch(t) 16.24 + { 16.25 + case p2m_invalid: 16.26 + default: 16.27 + return flags; 16.28 + case p2m_ram_rw: 16.29 + return flags | P2M_BASE_FLAGS | _PAGE_RW; 16.30 + case p2m_ram_logdirty: 16.31 + return flags | P2M_BASE_FLAGS; 16.32 + case p2m_ram_ro: 16.33 + return flags | P2M_BASE_FLAGS; 16.34 + case p2m_mmio_dm: 16.35 + return flags; 16.36 + case p2m_mmio_direct: 16.37 + return flags | P2M_BASE_FLAGS | _PAGE_RW | _PAGE_PCD; 16.38 + } 16.39 +} 16.40 + 16.41 16.42 // Find the next level's P2M entry, checking for out-of-range gfn's... 16.43 // Returns NULL on error. 16.44 @@ -358,19 +383,25 @@ void p2m_teardown(struct domain *d) 16.45 } 16.46 16.47 mfn_t 16.48 -gfn_to_mfn_foreign(struct domain *d, unsigned long gpfn) 16.49 +gfn_to_mfn_foreign(struct domain *d, unsigned long gfn, p2m_type_t *t) 16.50 /* Read another domain's p2m entries */ 16.51 { 16.52 mfn_t mfn; 16.53 - paddr_t addr = ((paddr_t)gpfn) << PAGE_SHIFT; 16.54 + paddr_t addr = ((paddr_t)gfn) << PAGE_SHIFT; 16.55 l2_pgentry_t *l2e; 16.56 l1_pgentry_t *l1e; 16.57 16.58 ASSERT(paging_mode_translate(d)); 16.59 + 16.60 + /* XXX This is for compatibility with the old model, where anything not 16.61 + * XXX marked as RAM was considered to be emulated MMIO space. 16.62 + * XXX Once we start explicitly registering MMIO regions in the p2m 16.63 + * XXX we will return p2m_invalid for unmapped gfns */ 16.64 + *t = p2m_mmio_dm; 16.65 + 16.66 mfn = pagetable_get_mfn(d->arch.phys_table); 16.67 16.68 - 16.69 - if ( gpfn > d->arch.p2m.max_mapped_pfn ) 16.70 + if ( gfn > d->arch.p2m.max_mapped_pfn ) 16.71 /* This pfn is higher than the highest the p2m map currently holds */ 16.72 return _mfn(INVALID_MFN); 16.73 16.74 @@ -428,9 +459,11 @@ gfn_to_mfn_foreign(struct domain *d, uns 16.75 return _mfn(INVALID_MFN); 16.76 } 16.77 mfn = _mfn(l1e_get_pfn(*l1e)); 16.78 + *t = p2m_flags_to_type(l1e_get_flags(*l1e)); 16.79 unmap_domain_page(l1e); 16.80 16.81 - return mfn; 16.82 + ASSERT(mfn_valid(mfn) || !p2m_is_ram(*t)); 16.83 + return (p2m_is_valid(*t)) ? mfn : _mfn(INVALID_MFN); 16.84 } 16.85 16.86 #if P2M_AUDIT 16.87 @@ -630,10 +663,7 @@ p2m_remove_page(struct domain *d, unsign 16.88 return; 16.89 P2M_DEBUG("removing gfn=%#lx mfn=%#lx\n", gfn, mfn); 16.90 16.91 - ASSERT(mfn_x(gfn_to_mfn(d, gfn)) == mfn); 16.92 - //ASSERT(mfn_to_gfn(d, mfn) == gfn); 16.93 - 16.94 - set_p2m_entry(d, gfn, _mfn(INVALID_MFN), __PAGE_HYPERVISOR|_PAGE_USER); 16.95 + set_p2m_entry(d, gfn, _mfn(INVALID_MFN), 0); 16.96 set_gpfn_from_mfn(mfn, INVALID_M2P_ENTRY); 16.97 } 16.98 16.99 @@ -653,6 +683,7 @@ guest_physmap_add_page(struct domain *d, 16.100 unsigned long mfn) 16.101 { 16.102 unsigned long ogfn; 16.103 + p2m_type_t ot; 16.104 mfn_t omfn; 16.105 16.106 if ( !paging_mode_translate(d) ) 16.107 @@ -663,10 +694,10 @@ guest_physmap_add_page(struct domain *d, 16.108 16.109 P2M_DEBUG("adding gfn=%#lx mfn=%#lx\n", gfn, mfn); 16.110 16.111 - omfn = gfn_to_mfn(d, gfn); 16.112 - if ( mfn_valid(omfn) ) 16.113 + omfn = gfn_to_mfn(d, gfn, &ot); 16.114 + if ( p2m_is_ram(ot) ) 16.115 { 16.116 - set_p2m_entry(d, gfn, _mfn(INVALID_MFN), __PAGE_HYPERVISOR|_PAGE_USER); 16.117 + ASSERT(mfn_valid(omfn)); 16.118 set_gpfn_from_mfn(mfn_x(omfn), INVALID_M2P_ENTRY); 16.119 } 16.120 16.121 @@ -683,8 +714,10 @@ guest_physmap_add_page(struct domain *d, 16.122 /* This machine frame is already mapped at another physical address */ 16.123 P2M_DEBUG("aliased! mfn=%#lx, old gfn=%#lx, new gfn=%#lx\n", 16.124 mfn, ogfn, gfn); 16.125 - if ( mfn_valid(omfn = gfn_to_mfn(d, ogfn)) ) 16.126 + omfn = gfn_to_mfn(d, ogfn, &ot); 16.127 + if ( p2m_is_ram(ot) ) 16.128 { 16.129 + ASSERT(mfn_valid(omfn)); 16.130 P2M_DEBUG("old gfn=%#lx -> mfn %#lx\n", 16.131 ogfn , mfn_x(omfn)); 16.132 if ( mfn_x(omfn) == mfn ) 16.133 @@ -692,21 +725,29 @@ guest_physmap_add_page(struct domain *d, 16.134 } 16.135 } 16.136 16.137 - set_p2m_entry(d, gfn, _mfn(mfn), __PAGE_HYPERVISOR|_PAGE_USER); 16.138 - set_gpfn_from_mfn(mfn, gfn); 16.139 + if ( mfn_valid(_mfn(mfn)) ) 16.140 + { 16.141 + set_p2m_entry(d, gfn, _mfn(mfn), 16.142 + p2m_type_to_flags(p2m_ram_rw)|__PAGE_HYPERVISOR|_PAGE_USER); 16.143 + set_gpfn_from_mfn(mfn, gfn); 16.144 + } 16.145 + else 16.146 + { 16.147 + gdprintk(XENLOG_WARNING, "Adding bad mfn to p2m map (%#lx -> %#lx)\n", 16.148 + gfn, mfn); 16.149 + set_p2m_entry(d, gfn, _mfn(INVALID_MFN), 0); 16.150 + } 16.151 16.152 audit_p2m(d); 16.153 p2m_unlock(d); 16.154 } 16.155 16.156 -/* This function goes through P2M table and modify l1e flags of all pages. Note 16.157 - * that physical base address of l1e is intact. This function can be used for 16.158 - * special purpose, such as marking physical memory as NOT WRITABLE for 16.159 - * tracking dirty pages during live migration. 16.160 - */ 16.161 -void p2m_set_flags_global(struct domain *d, u32 l1e_flags) 16.162 +/* Walk the whole p2m table, changing any entries of the old type 16.163 + * to the new type. This is used in hardware-assisted paging to 16.164 + * quickly enable or diable log-dirty tracking */ 16.165 +void p2m_change_type_global(struct domain *d, p2m_type_t ot, p2m_type_t nt) 16.166 { 16.167 - unsigned long mfn, gfn; 16.168 + unsigned long mfn, gfn, flags; 16.169 l1_pgentry_t l1e_content; 16.170 l1_pgentry_t *l1e; 16.171 l2_pgentry_t *l2e; 16.172 @@ -769,12 +810,14 @@ void p2m_set_flags_global(struct domain 16.173 16.174 for ( i1 = 0; i1 < L1_PAGETABLE_ENTRIES; i1++, gfn++ ) 16.175 { 16.176 - if ( !(l1e_get_flags(l1e[i1]) & _PAGE_PRESENT) ) 16.177 + flags = l1e_get_flags(l1e[i1]); 16.178 + if ( p2m_flags_to_type(flags) != ot ) 16.179 continue; 16.180 mfn = l1e_get_pfn(l1e[i1]); 16.181 gfn = get_gpfn_from_mfn(mfn); 16.182 - /* create a new 1le entry using l1e_flags */ 16.183 - l1e_content = l1e_from_pfn(mfn, l1e_flags); 16.184 + /* create a new 1le entry with the new type */ 16.185 + flags = p2m_flags_to_type(nt); 16.186 + l1e_content = l1e_from_pfn(mfn, flags); 16.187 paging_write_p2m_entry(d, gfn, &l1e[i1], 16.188 l1mfn, l1e_content, 1); 16.189 } 16.190 @@ -800,24 +843,23 @@ void p2m_set_flags_global(struct domain 16.191 p2m_unlock(d); 16.192 } 16.193 16.194 -/* This function traces through P2M table and modifies l1e flags of a specific 16.195 - * gpa. 16.196 - */ 16.197 -int p2m_set_flags(struct domain *d, paddr_t gpa, u32 l1e_flags) 16.198 +/* Modify the p2m type of a single gfn from ot to nt, returning the 16.199 + * entry's previous type */ 16.200 +p2m_type_t p2m_change_type(struct domain *d, unsigned long gfn, 16.201 + p2m_type_t ot, p2m_type_t nt) 16.202 { 16.203 - unsigned long gfn; 16.204 + p2m_type_t pt; 16.205 mfn_t mfn; 16.206 16.207 p2m_lock(d); 16.208 16.209 - gfn = gpa >> PAGE_SHIFT; 16.210 - mfn = gfn_to_mfn(d, gfn); 16.211 - if ( mfn_valid(mfn) ) 16.212 - set_p2m_entry(d, gfn, mfn, l1e_flags); 16.213 + mfn = gfn_to_mfn(d, gfn, &pt); 16.214 + if ( pt == ot ) 16.215 + set_p2m_entry(d, gfn, mfn, p2m_type_to_flags(nt)); 16.216 16.217 p2m_unlock(d); 16.218 16.219 - return 1; 16.220 + return pt; 16.221 } 16.222 16.223 /*
17.1 --- a/xen/arch/x86/mm/shadow/common.c Mon Sep 10 13:56:34 2007 -0600 17.2 +++ b/xen/arch/x86/mm/shadow/common.c Mon Sep 10 13:58:56 2007 -0600 17.3 @@ -2764,19 +2764,23 @@ shadow_write_p2m_entry(struct vcpu *v, u 17.4 l1_pgentry_t new, unsigned int level) 17.5 { 17.6 struct domain *d = v->domain; 17.7 - mfn_t mfn; 17.8 17.9 shadow_lock(d); 17.10 17.11 - /* handle physmap_add and physmap_remove */ 17.12 - mfn = gfn_to_mfn(d, gfn); 17.13 - if ( v != NULL && level == 1 && mfn_valid(mfn) ) { 17.14 - sh_remove_all_shadows_and_parents(v, mfn); 17.15 - if ( sh_remove_all_mappings(v, mfn) ) 17.16 - flush_tlb_mask(d->domain_dirty_cpumask); 17.17 + /* If we're removing an MFN from the p2m, remove it from the shadows too */ 17.18 + if ( level == 1 ) 17.19 + { 17.20 + mfn_t mfn = _mfn(l1e_get_pfn(*p)); 17.21 + p2m_type_t p2mt = p2m_flags_to_type(l1e_get_flags(*p)); 17.22 + if ( p2m_is_valid(p2mt) && mfn_valid(mfn) ) 17.23 + { 17.24 + sh_remove_all_shadows_and_parents(v, mfn); 17.25 + if ( sh_remove_all_mappings(v, mfn) ) 17.26 + flush_tlb_mask(d->domain_dirty_cpumask); 17.27 + } 17.28 } 17.29 - 17.30 - /* update the entry with new content */ 17.31 + 17.32 + /* Update the entry with new content */ 17.33 safe_write_pte(p, new); 17.34 17.35 /* install P2M in monitors for PAE Xen */
18.1 --- a/xen/arch/x86/mm/shadow/multi.c Mon Sep 10 13:56:34 2007 -0600 18.2 +++ b/xen/arch/x86/mm/shadow/multi.c Mon Sep 10 13:58:56 2007 -0600 18.3 @@ -209,6 +209,7 @@ static inline int 18.4 guest_walk_tables(struct vcpu *v, unsigned long va, walk_t *gw, int guest_op) 18.5 { 18.6 struct domain *d = v->domain; 18.7 + p2m_type_t p2mt; 18.8 ASSERT(!guest_op || shadow_locked_by_me(d)); 18.9 18.10 perfc_incr(shadow_guest_walk); 18.11 @@ -223,8 +224,9 @@ guest_walk_tables(struct vcpu *v, unsign 18.12 + guest_l4_table_offset(va); 18.13 /* Walk down to the l3e */ 18.14 if ( !(guest_l4e_get_flags(*gw->l4e) & _PAGE_PRESENT) ) return 0; 18.15 - gw->l3mfn = gfn_to_mfn(d, guest_l4e_get_gfn(*gw->l4e)); 18.16 - if ( !mfn_valid(gw->l3mfn) ) return 1; 18.17 + gw->l3mfn = gfn_to_mfn(d, guest_l4e_get_gfn(*gw->l4e), &p2mt); 18.18 + if ( !p2m_is_ram(p2mt) ) return 1; 18.19 + ASSERT(mfn_valid(gw->l3mfn)); 18.20 /* This mfn is a pagetable: make sure the guest can't write to it. */ 18.21 if ( guest_op && sh_remove_write_access(v, gw->l3mfn, 3, va) != 0 ) 18.22 flush_tlb_mask(d->domain_dirty_cpumask); 18.23 @@ -236,8 +238,9 @@ guest_walk_tables(struct vcpu *v, unsign 18.24 #endif /* PAE or 64... */ 18.25 /* Walk down to the l2e */ 18.26 if ( !(guest_l3e_get_flags(*gw->l3e) & _PAGE_PRESENT) ) return 0; 18.27 - gw->l2mfn = gfn_to_mfn(d, guest_l3e_get_gfn(*gw->l3e)); 18.28 - if ( !mfn_valid(gw->l2mfn) ) return 1; 18.29 + gw->l2mfn = gfn_to_mfn(d, guest_l3e_get_gfn(*gw->l3e), &p2mt); 18.30 + if ( !p2m_is_ram(p2mt) ) return 1; 18.31 + ASSERT(mfn_valid(gw->l2mfn)); 18.32 /* This mfn is a pagetable: make sure the guest can't write to it. */ 18.33 if ( guest_op && sh_remove_write_access(v, gw->l2mfn, 2, va) != 0 ) 18.34 flush_tlb_mask(d->domain_dirty_cpumask); 18.35 @@ -278,8 +281,9 @@ guest_walk_tables(struct vcpu *v, unsign 18.36 else 18.37 { 18.38 /* Not a superpage: carry on and find the l1e. */ 18.39 - gw->l1mfn = gfn_to_mfn(d, guest_l2e_get_gfn(*gw->l2e)); 18.40 - if ( !mfn_valid(gw->l1mfn) ) return 1; 18.41 + gw->l1mfn = gfn_to_mfn(d, guest_l2e_get_gfn(*gw->l2e), &p2mt); 18.42 + if ( !p2m_is_ram(p2mt) ) return 1; 18.43 + ASSERT(mfn_valid(gw->l1mfn)); 18.44 /* This mfn is a pagetable: make sure the guest can't write to it. */ 18.45 if ( guest_op 18.46 && sh_remove_write_access(v, gw->l1mfn, 1, va) != 0 ) 18.47 @@ -626,7 +630,7 @@ static always_inline void 18.48 void *shadow_entry_ptr, 18.49 int level, 18.50 fetch_type_t ft, 18.51 - int mmio) 18.52 + p2m_type_t p2mt) 18.53 { 18.54 guest_l1e_t *gp = guest_entry_ptr; 18.55 shadow_l1e_t *sp = shadow_entry_ptr; 18.56 @@ -637,6 +641,13 @@ static always_inline void 18.57 /* We don't shadow PAE l3s */ 18.58 ASSERT(GUEST_PAGING_LEVELS > 3 || level != 3); 18.59 18.60 + /* Check there's something for the shadows to map to */ 18.61 + if ( !p2m_is_valid(p2mt) ) 18.62 + { 18.63 + *sp = shadow_l1e_empty(); 18.64 + goto done; 18.65 + } 18.66 + 18.67 if ( mfn_valid(guest_table_mfn) ) 18.68 /* Handle A and D bit propagation into the guest */ 18.69 gflags = guest_set_ad_bits(v, guest_table_mfn, gp, level, ft); 18.70 @@ -658,19 +669,22 @@ static always_inline void 18.71 goto done; 18.72 } 18.73 18.74 - if ( level == 1 && mmio ) 18.75 + if ( level == 1 && p2mt == p2m_mmio_dm ) 18.76 { 18.77 - /* Guest l1e maps MMIO space */ 18.78 + /* Guest l1e maps emulated MMIO space */ 18.79 *sp = sh_l1e_mmio(guest_l1e_get_gfn(*gp), gflags); 18.80 if ( !d->arch.paging.shadow.has_fast_mmio_entries ) 18.81 d->arch.paging.shadow.has_fast_mmio_entries = 1; 18.82 goto done; 18.83 } 18.84 18.85 - // Must have a valid target_mfn, unless this is a prefetch. In the 18.86 + // Must have a valid target_mfn unless this is a prefetch. In the 18.87 // case of a prefetch, an invalid mfn means that we can not usefully 18.88 // shadow anything, and so we return early. 18.89 // 18.90 + /* N.B. For pass-through MMIO, either this test needs to be relaxed, 18.91 + * and shadow_set_l1e() trained to handle non-valid MFNs (ugh), or the 18.92 + * MMIO areas need to be added to the frame-table to make them "valid". */ 18.93 if ( !mfn_valid(target_mfn) ) 18.94 { 18.95 ASSERT((ft == ft_prefetch)); 18.96 @@ -718,6 +732,8 @@ static always_inline void 18.97 // Only allow the guest write access to a page a) on a demand fault, 18.98 // or b) if the page is already marked as dirty. 18.99 // 18.100 + // (We handle log-dirty entirely inside the shadow code, without using the 18.101 + // p2m_ram_logdirty p2m type: only HAP uses that.) 18.102 if ( unlikely((level == 1) && shadow_mode_log_dirty(d)) ) 18.103 { 18.104 if ( ft & FETCH_TYPE_WRITE ) 18.105 @@ -725,6 +741,10 @@ static always_inline void 18.106 else if ( !sh_mfn_is_dirty(d, target_mfn) ) 18.107 sflags &= ~_PAGE_RW; 18.108 } 18.109 + 18.110 + /* Read-only memory */ 18.111 + if ( p2mt == p2m_ram_ro ) 18.112 + sflags &= ~_PAGE_RW; 18.113 18.114 // protect guest page tables 18.115 // 18.116 @@ -754,7 +774,12 @@ static always_inline void 18.117 sflags |= _PAGE_USER; 18.118 } 18.119 18.120 + /* MMIO addresses should never be cached */ 18.121 + if ( p2m_is_mmio(p2mt) ) 18.122 + sflags |= _PAGE_PCD; 18.123 + 18.124 *sp = shadow_l1e_from_mfn(target_mfn, sflags); 18.125 + 18.126 done: 18.127 SHADOW_DEBUG(PROPAGATE, 18.128 "%s level %u guest %" SH_PRI_gpte " shadow %" SH_PRI_pte "\n", 18.129 @@ -775,7 +800,7 @@ l4e_propagate_from_guest(struct vcpu *v, 18.130 shadow_l4e_t *sl4e, 18.131 fetch_type_t ft) 18.132 { 18.133 - _sh_propagate(v, gl4e, gl4mfn, sl3mfn, sl4e, 4, ft, 0); 18.134 + _sh_propagate(v, gl4e, gl4mfn, sl3mfn, sl4e, 4, ft, p2m_ram_rw); 18.135 } 18.136 18.137 static void 18.138 @@ -786,7 +811,7 @@ l3e_propagate_from_guest(struct vcpu *v, 18.139 shadow_l3e_t *sl3e, 18.140 fetch_type_t ft) 18.141 { 18.142 - _sh_propagate(v, gl3e, gl3mfn, sl2mfn, sl3e, 3, ft, 0); 18.143 + _sh_propagate(v, gl3e, gl3mfn, sl2mfn, sl3e, 3, ft, p2m_ram_rw); 18.144 } 18.145 #endif // GUEST_PAGING_LEVELS >= 4 18.146 18.147 @@ -798,7 +823,7 @@ l2e_propagate_from_guest(struct vcpu *v, 18.148 shadow_l2e_t *sl2e, 18.149 fetch_type_t ft) 18.150 { 18.151 - _sh_propagate(v, gl2e, gl2mfn, sl1mfn, sl2e, 2, ft, 0); 18.152 + _sh_propagate(v, gl2e, gl2mfn, sl1mfn, sl2e, 2, ft, p2m_ram_rw); 18.153 } 18.154 18.155 static void 18.156 @@ -808,9 +833,9 @@ l1e_propagate_from_guest(struct vcpu *v, 18.157 mfn_t gmfn, 18.158 shadow_l1e_t *sl1e, 18.159 fetch_type_t ft, 18.160 - int mmio) 18.161 + p2m_type_t p2mt) 18.162 { 18.163 - _sh_propagate(v, gl1e, gl1mfn, gmfn, sl1e, 1, ft, mmio); 18.164 + _sh_propagate(v, gl1e, gl1mfn, gmfn, sl1e, 1, ft, p2mt); 18.165 } 18.166 18.167 18.168 @@ -2196,6 +2221,7 @@ static int validate_gl4e(struct vcpu *v, 18.169 shadow_l4e_t *sl4p = se; 18.170 mfn_t sl3mfn = _mfn(INVALID_MFN); 18.171 struct domain *d = v->domain; 18.172 + p2m_type_t p2mt; 18.173 int result = 0; 18.174 18.175 perfc_incr(shadow_validate_gl4e_calls); 18.176 @@ -2203,8 +2229,8 @@ static int validate_gl4e(struct vcpu *v, 18.177 if ( guest_l4e_get_flags(*new_gl4e) & _PAGE_PRESENT ) 18.178 { 18.179 gfn_t gl3gfn = guest_l4e_get_gfn(*new_gl4e); 18.180 - mfn_t gl3mfn = gfn_to_mfn(d, gl3gfn); 18.181 - if ( mfn_valid(gl3mfn) ) 18.182 + mfn_t gl3mfn = gfn_to_mfn(d, gl3gfn, &p2mt); 18.183 + if ( p2m_is_ram(p2mt) ) 18.184 sl3mfn = get_shadow_status(v, gl3mfn, SH_type_l3_shadow); 18.185 else 18.186 result |= SHADOW_SET_ERROR; 18.187 @@ -2248,6 +2274,7 @@ static int validate_gl3e(struct vcpu *v, 18.188 guest_l3e_t *new_gl3e = new_ge; 18.189 shadow_l3e_t *sl3p = se; 18.190 mfn_t sl2mfn = _mfn(INVALID_MFN); 18.191 + p2m_type_t p2mt; 18.192 int result = 0; 18.193 18.194 perfc_incr(shadow_validate_gl3e_calls); 18.195 @@ -2255,8 +2282,8 @@ static int validate_gl3e(struct vcpu *v, 18.196 if ( guest_l3e_get_flags(*new_gl3e) & _PAGE_PRESENT ) 18.197 { 18.198 gfn_t gl2gfn = guest_l3e_get_gfn(*new_gl3e); 18.199 - mfn_t gl2mfn = gfn_to_mfn(v->domain, gl2gfn); 18.200 - if ( mfn_valid(gl2mfn) ) 18.201 + mfn_t gl2mfn = gfn_to_mfn(v->domain, gl2gfn, &p2mt); 18.202 + if ( p2m_is_ram(p2mt) ) 18.203 sl2mfn = get_shadow_status(v, gl2mfn, SH_type_l2_shadow); 18.204 else 18.205 result |= SHADOW_SET_ERROR; 18.206 @@ -2275,6 +2302,7 @@ static int validate_gl2e(struct vcpu *v, 18.207 guest_l2e_t *new_gl2e = new_ge; 18.208 shadow_l2e_t *sl2p = se; 18.209 mfn_t sl1mfn = _mfn(INVALID_MFN); 18.210 + p2m_type_t p2mt; 18.211 int result = 0; 18.212 18.213 perfc_incr(shadow_validate_gl2e_calls); 18.214 @@ -2299,8 +2327,8 @@ static int validate_gl2e(struct vcpu *v, 18.215 } 18.216 else 18.217 { 18.218 - mfn_t gl1mfn = gfn_to_mfn(v->domain, gl1gfn); 18.219 - if ( mfn_valid(gl1mfn) ) 18.220 + mfn_t gl1mfn = gfn_to_mfn(v->domain, gl1gfn, &p2mt); 18.221 + if ( p2m_is_ram(p2mt) ) 18.222 sl1mfn = get_shadow_status(v, gl1mfn, SH_type_l1_shadow); 18.223 else 18.224 result |= SHADOW_SET_ERROR; 18.225 @@ -2361,16 +2389,16 @@ static int validate_gl1e(struct vcpu *v, 18.226 shadow_l1e_t *sl1p = se; 18.227 gfn_t gfn; 18.228 mfn_t gmfn; 18.229 - int result = 0, mmio; 18.230 + p2m_type_t p2mt; 18.231 + int result = 0; 18.232 18.233 perfc_incr(shadow_validate_gl1e_calls); 18.234 18.235 gfn = guest_l1e_get_gfn(*new_gl1e); 18.236 - gmfn = gfn_to_mfn(v->domain, gfn); 18.237 - 18.238 - mmio = (is_hvm_vcpu(v) && mmio_space(gfn_to_paddr(gfn))); 18.239 + gmfn = gfn_to_mfn(v->domain, gfn, &p2mt); 18.240 + 18.241 l1e_propagate_from_guest(v, new_gl1e, _mfn(INVALID_MFN), gmfn, &new_sl1e, 18.242 - ft_prefetch, mmio); 18.243 + ft_prefetch, p2mt); 18.244 18.245 result |= shadow_set_l1e(v, sl1p, new_sl1e, sl1mfn); 18.246 return result; 18.247 @@ -2554,12 +2582,13 @@ static inline void reset_early_unshadow( 18.248 static void sh_prefetch(struct vcpu *v, walk_t *gw, 18.249 shadow_l1e_t *ptr_sl1e, mfn_t sl1mfn) 18.250 { 18.251 - int i, dist, mmio; 18.252 + int i, dist; 18.253 gfn_t gfn; 18.254 mfn_t gmfn; 18.255 guest_l1e_t gl1e; 18.256 shadow_l1e_t sl1e; 18.257 u32 gflags; 18.258 + p2m_type_t p2mt; 18.259 18.260 /* Prefetch no further than the end of the _shadow_ l1 MFN */ 18.261 dist = (PAGE_SIZE - ((unsigned long)ptr_sl1e & ~PAGE_MASK)) / sizeof sl1e; 18.262 @@ -2597,14 +2626,13 @@ static void sh_prefetch(struct vcpu *v, 18.263 18.264 /* Look at the gfn that the l1e is pointing at */ 18.265 gfn = guest_l1e_get_gfn(gl1e); 18.266 - gmfn = gfn_to_mfn(v->domain, gfn); 18.267 - mmio = ( is_hvm_vcpu(v) && mmio_space(gfn_to_paddr(gfn)) ); 18.268 + gmfn = gfn_to_mfn(v->domain, gfn, &p2mt); 18.269 18.270 /* Propagate the entry. Safe to use a pointer to our local 18.271 * gl1e, since this is not a demand-fetch so there will be no 18.272 * write-back to the guest. */ 18.273 l1e_propagate_from_guest(v, &gl1e, _mfn(INVALID_MFN), 18.274 - gmfn, &sl1e, ft_prefetch, mmio); 18.275 + gmfn, &sl1e, ft_prefetch, p2mt); 18.276 (void) shadow_set_l1e(v, ptr_sl1e + i, sl1e, sl1mfn); 18.277 } 18.278 } 18.279 @@ -2633,8 +2661,9 @@ static int sh_page_fault(struct vcpu *v, 18.280 paddr_t gpa; 18.281 struct sh_emulate_ctxt emul_ctxt; 18.282 struct x86_emulate_ops *emul_ops; 18.283 - int r, mmio; 18.284 + int r; 18.285 fetch_type_t ft = 0; 18.286 + p2m_type_t p2mt; 18.287 18.288 SHADOW_PRINTK("d:v=%u:%u va=%#lx err=%u\n", 18.289 v->domain->domain_id, v->vcpu_id, va, regs->error_code); 18.290 @@ -2787,10 +2816,9 @@ static int sh_page_fault(struct vcpu *v, 18.291 18.292 /* What mfn is the guest trying to access? */ 18.293 gfn = guest_l1e_get_gfn(gw.eff_l1e); 18.294 - gmfn = gfn_to_mfn(d, gfn); 18.295 - mmio = (is_hvm_domain(d) && mmio_space(gfn_to_paddr(gfn))); 18.296 - 18.297 - if ( !mmio && !mfn_valid(gmfn) ) 18.298 + gmfn = gfn_to_mfn(d, gfn, &p2mt); 18.299 + 18.300 + if ( !p2m_is_valid(p2mt) || (!p2m_is_mmio(p2mt) && !mfn_valid(gmfn)) ) 18.301 { 18.302 perfc_incr(shadow_fault_bail_bad_gfn); 18.303 SHADOW_PRINTK("BAD gfn=%"SH_PRI_gfn" gmfn=%"PRI_mfn"\n", 18.304 @@ -2821,7 +2849,7 @@ static int sh_page_fault(struct vcpu *v, 18.305 18.306 /* Calculate the shadow entry and write it */ 18.307 l1e_propagate_from_guest(v, (gw.l1e) ? gw.l1e : &gw.eff_l1e, gw.l1mfn, 18.308 - gmfn, &sl1e, ft, mmio); 18.309 + gmfn, &sl1e, ft, p2mt); 18.310 r = shadow_set_l1e(v, ptr_sl1e, sl1e, sl1mfn); 18.311 18.312 #if SHADOW_OPTIMIZATIONS & SHOPT_PREFETCH 18.313 @@ -2844,7 +2872,10 @@ static int sh_page_fault(struct vcpu *v, 18.314 } 18.315 } 18.316 18.317 - if ( mmio ) 18.318 + /* Need to hand off device-model MMIO and writes to read-only 18.319 + * memory to the device model */ 18.320 + if ( p2mt == p2m_mmio_dm 18.321 + || (p2mt == p2m_ram_ro && ft == ft_demand_write) ) 18.322 { 18.323 gpa = guest_walk_to_gpa(&gw); 18.324 goto mmio; 18.325 @@ -3598,6 +3629,7 @@ sh_update_cr3(struct vcpu *v, int do_loc 18.326 int flush = 0; 18.327 gfn_t gl2gfn; 18.328 mfn_t gl2mfn; 18.329 + p2m_type_t p2mt; 18.330 guest_l3e_t *gl3e = (guest_l3e_t*)&v->arch.paging.shadow.gl3e; 18.331 /* First, make all four entries read-only. */ 18.332 for ( i = 0; i < 4; i++ ) 18.333 @@ -3605,8 +3637,9 @@ sh_update_cr3(struct vcpu *v, int do_loc 18.334 if ( guest_l3e_get_flags(gl3e[i]) & _PAGE_PRESENT ) 18.335 { 18.336 gl2gfn = guest_l3e_get_gfn(gl3e[i]); 18.337 - gl2mfn = gfn_to_mfn(d, gl2gfn); 18.338 - flush |= sh_remove_write_access(v, gl2mfn, 2, 0); 18.339 + gl2mfn = gfn_to_mfn(d, gl2gfn, &p2mt); 18.340 + if ( p2m_is_ram(p2mt) ) 18.341 + flush |= sh_remove_write_access(v, gl2mfn, 2, 0); 18.342 } 18.343 } 18.344 if ( flush ) 18.345 @@ -3617,13 +3650,15 @@ sh_update_cr3(struct vcpu *v, int do_loc 18.346 if ( guest_l3e_get_flags(gl3e[i]) & _PAGE_PRESENT ) 18.347 { 18.348 gl2gfn = guest_l3e_get_gfn(gl3e[i]); 18.349 - gl2mfn = gfn_to_mfn(d, gl2gfn); 18.350 - sh_set_toplevel_shadow(v, i, gl2mfn, (i == 3) 18.351 - ? SH_type_l2h_shadow 18.352 - : SH_type_l2_shadow); 18.353 + gl2mfn = gfn_to_mfn(d, gl2gfn, &p2mt); 18.354 + if ( p2m_is_ram(p2mt) ) 18.355 + sh_set_toplevel_shadow(v, i, gl2mfn, (i == 3) 18.356 + ? SH_type_l2h_shadow 18.357 + : SH_type_l2_shadow); 18.358 + else 18.359 + sh_set_toplevel_shadow(v, i, _mfn(INVALID_MFN), 0); 18.360 } 18.361 else 18.362 - /* The guest is not present: clear out the shadow. */ 18.363 sh_set_toplevel_shadow(v, i, _mfn(INVALID_MFN), 0); 18.364 } 18.365 } 18.366 @@ -3932,6 +3967,7 @@ static inline void * emulate_map_dest(st 18.367 u32 flags, errcode; 18.368 gfn_t gfn; 18.369 mfn_t mfn; 18.370 + p2m_type_t p2mt; 18.371 18.372 /* We don't emulate user-mode writes to page tables */ 18.373 if ( ring_3(sh_ctxt->ctxt.regs) ) 18.374 @@ -3971,7 +4007,6 @@ static inline void * emulate_map_dest(st 18.375 } 18.376 } 18.377 #endif 18.378 - mfn = gfn_to_mfn(v->domain, gfn); 18.379 18.380 errcode = PFEC_write_access; 18.381 if ( !(flags & _PAGE_PRESENT) ) 18.382 @@ -3981,8 +4016,10 @@ static inline void * emulate_map_dest(st 18.383 if ( !(flags & _PAGE_RW) ) 18.384 goto page_fault; 18.385 18.386 - if ( mfn_valid(mfn) ) 18.387 + mfn = gfn_to_mfn(v->domain, gfn, &p2mt); 18.388 + if ( p2m_is_ram(p2mt) ) 18.389 { 18.390 + ASSERT(mfn_valid(mfn)); 18.391 *mfnp = mfn; 18.392 v->arch.paging.last_write_was_pt = !!sh_mfn_is_a_page_table(mfn); 18.393 return sh_map_domain_page(mfn) + (vaddr & ~PAGE_MASK); 18.394 @@ -4231,6 +4268,7 @@ audit_gfn_to_mfn(struct vcpu *v, gfn_t g 18.395 /* Convert this gfn to an mfn in the manner appropriate for the 18.396 * guest pagetable it's used in (gmfn) */ 18.397 { 18.398 + p2m_type_t p2mt; 18.399 if ( !shadow_mode_translate(v->domain) ) 18.400 return _mfn(gfn_x(gfn)); 18.401 18.402 @@ -4238,7 +4276,7 @@ audit_gfn_to_mfn(struct vcpu *v, gfn_t g 18.403 != PGT_writable_page ) 18.404 return _mfn(gfn_x(gfn)); /* This is a paging-disabled shadow */ 18.405 else 18.406 - return gfn_to_mfn(v->domain, gfn); 18.407 + return gfn_to_mfn(v->domain, gfn, &p2mt); 18.408 } 18.409 18.410
19.1 --- a/xen/arch/x86/mm/shadow/types.h Mon Sep 10 13:56:34 2007 -0600 19.2 +++ b/xen/arch/x86/mm/shadow/types.h Mon Sep 10 13:58:56 2007 -0600 19.3 @@ -414,7 +414,7 @@ gfn_to_paddr(gfn_t gfn) 19.4 19.5 /* Override gfn_to_mfn to work with gfn_t */ 19.6 #undef gfn_to_mfn 19.7 -#define gfn_to_mfn(d, g) _gfn_to_mfn((d), gfn_x(g)) 19.8 +#define gfn_to_mfn(d, g, t) _gfn_to_mfn((d), gfn_x(g), (t)) 19.9 19.10 19.11 /* Type used for recording a walk through guest pagetables. It is
20.1 --- a/xen/arch/x86/shutdown.c Mon Sep 10 13:56:34 2007 -0600 20.2 +++ b/xen/arch/x86/shutdown.c Mon Sep 10 13:58:56 2007 -0600 20.3 @@ -197,7 +197,7 @@ static void machine_real_restart(const u 20.4 20.5 #endif 20.6 20.7 -void machine_restart(char *cmd) 20.8 +void machine_restart(void) 20.9 { 20.10 int i; 20.11 20.12 @@ -216,18 +216,12 @@ void machine_restart(char *cmd) 20.13 safe_halt(); 20.14 } 20.15 20.16 - /* 20.17 - * Stop all CPUs and turn off local APICs and the IO-APIC, so 20.18 - * other OSs see a clean IRQ state. 20.19 - */ 20.20 smp_send_stop(); 20.21 - disable_IO_APIC(); 20.22 - hvm_cpu_down(); 20.23 20.24 /* Rebooting needs to touch the page at absolute address 0. */ 20.25 *((unsigned short *)__va(0x472)) = reboot_mode; 20.26 20.27 - if (reboot_thru_bios <= 0) 20.28 + if ( reboot_thru_bios <= 0 ) 20.29 { 20.30 for ( ; ; ) 20.31 {
21.1 --- a/xen/arch/x86/smp.c Mon Sep 10 13:56:34 2007 -0600 21.2 +++ b/xen/arch/x86/smp.c Mon Sep 10 13:58:56 2007 -0600 21.3 @@ -279,6 +279,19 @@ int on_selected_cpus( 21.4 21.5 ASSERT(local_irq_is_enabled()); 21.6 21.7 + /* Legacy UP system with no APIC to deliver IPIs? */ 21.8 + if ( unlikely(!cpu_has_apic) ) 21.9 + { 21.10 + ASSERT(num_online_cpus() == 1); 21.11 + if ( cpu_isset(0, selected) ) 21.12 + { 21.13 + local_irq_disable(); 21.14 + func(info); 21.15 + local_irq_enable(); 21.16 + } 21.17 + return 0; 21.18 + } 21.19 + 21.20 if ( nr_cpus == 0 ) 21.21 return 0; 21.22 21.23 @@ -306,23 +319,33 @@ int on_selected_cpus( 21.24 21.25 static void stop_this_cpu (void *dummy) 21.26 { 21.27 - cpu_clear(smp_processor_id(), cpu_online_map); 21.28 - 21.29 - local_irq_disable(); 21.30 disable_local_APIC(); 21.31 hvm_cpu_down(); 21.32 21.33 + cpu_clear(smp_processor_id(), cpu_online_map); 21.34 + 21.35 for ( ; ; ) 21.36 __asm__ __volatile__ ( "hlt" ); 21.37 } 21.38 21.39 +/* 21.40 + * Stop all CPUs and turn off local APICs and the IO-APIC, so other OSs see a 21.41 + * clean IRQ state. 21.42 + */ 21.43 void smp_send_stop(void) 21.44 { 21.45 - /* Stop all other CPUs in the system. */ 21.46 + int timeout = 10; 21.47 + 21.48 smp_call_function(stop_this_cpu, NULL, 1, 0); 21.49 21.50 + /* Wait 10ms for all other CPUs to go offline. */ 21.51 + while ( (num_online_cpus() > 1) && (timeout-- > 0) ) 21.52 + mdelay(1); 21.53 + 21.54 local_irq_disable(); 21.55 disable_local_APIC(); 21.56 + disable_IO_APIC(); 21.57 + hvm_cpu_down(); 21.58 local_irq_enable(); 21.59 } 21.60
22.1 --- a/xen/common/keyhandler.c Mon Sep 10 13:56:34 2007 -0600 22.2 +++ b/xen/common/keyhandler.c Mon Sep 10 13:58:56 2007 -0600 22.3 @@ -123,7 +123,7 @@ static void dump_registers(unsigned char 22.4 static void halt_machine(unsigned char key, struct cpu_user_regs *regs) 22.5 { 22.6 printk("'%c' pressed -> rebooting machine\n", key); 22.7 - machine_restart(NULL); 22.8 + machine_restart(); 22.9 } 22.10 22.11 static void cpuset_print(char *set, int size, cpumask_t mask)
23.1 --- a/xen/common/shutdown.c Mon Sep 10 13:56:34 2007 -0600 23.2 +++ b/xen/common/shutdown.c Mon Sep 10 13:58:56 2007 -0600 23.3 @@ -24,7 +24,7 @@ static void maybe_reboot(void) 23.4 printk("rebooting machine in 5 seconds.\n"); 23.5 watchdog_disable(); 23.6 mdelay(5000); 23.7 - machine_restart(NULL); 23.8 + machine_restart(); 23.9 } 23.10 } 23.11 23.12 @@ -50,7 +50,7 @@ void dom0_shutdown(u8 reason) 23.13 case SHUTDOWN_reboot: 23.14 { 23.15 printk("Domain 0 shutdown: rebooting machine.\n"); 23.16 - machine_restart(NULL); 23.17 + machine_restart(); 23.18 break; /* not reached */ 23.19 } 23.20
24.1 --- a/xen/drivers/char/console.c Mon Sep 10 13:56:34 2007 -0600 24.2 +++ b/xen/drivers/char/console.c Mon Sep 10 13:58:56 2007 -0600 24.3 @@ -895,7 +895,7 @@ void panic(const char *fmt, ...) 24.4 { 24.5 watchdog_disable(); 24.6 mdelay(5000); 24.7 - machine_restart(NULL); 24.8 + machine_restart(); 24.9 } 24.10 } 24.11
25.1 --- a/xen/include/asm-x86/mm.h Mon Sep 10 13:56:34 2007 -0600 25.2 +++ b/xen/include/asm-x86/mm.h Mon Sep 10 13:58:56 2007 -0600 25.3 @@ -328,8 +328,6 @@ TYPE_SAFE(unsigned long,mfn); 25.4 ? get_gpfn_from_mfn(mfn) \ 25.5 : (mfn) ) 25.6 25.7 -#define gmfn_to_mfn(_d, gpfn) mfn_x(gfn_to_mfn(_d, gpfn)) 25.8 - 25.9 #define INVALID_MFN (~0UL) 25.10 25.11 #ifdef CONFIG_COMPAT
26.1 --- a/xen/include/asm-x86/p2m.h Mon Sep 10 13:56:34 2007 -0600 26.2 +++ b/xen/include/asm-x86/p2m.h Mon Sep 10 13:58:56 2007 -0600 26.3 @@ -4,7 +4,7 @@ 26.4 * physical-to-machine mappings for automatically-translated domains. 26.5 * 26.6 * Copyright (c) 2007 Advanced Micro Devices (Wei Huang) 26.7 - * Parts of this code are Copyright (c) 2006 by XenSource Inc. 26.8 + * Parts of this code are Copyright (c) 2006-2007 by XenSource Inc. 26.9 * Parts of this code are Copyright (c) 2006 by Michael A Fetterman 26.10 * Parts based on earlier work by Michael A Fetterman, Ian Pratt et al. 26.11 * 26.12 @@ -27,49 +27,141 @@ 26.13 #define _XEN_P2M_H 26.14 26.15 26.16 -/* The phys_to_machine_mapping is the reversed mapping of MPT for full 26.17 - * virtualization. It is only used by shadow_mode_translate()==true 26.18 - * guests, so we steal the address space that would have normally 26.19 - * been used by the read-only MPT map. 26.20 +/* 26.21 + * The phys_to_machine_mapping maps guest physical frame numbers 26.22 + * to machine frame numbers. It only exists for paging_mode_translate 26.23 + * guests. It is organised in page-table format, which: 26.24 + * 26.25 + * (1) allows us to use it directly as the second pagetable in hardware- 26.26 + * assisted paging and (hopefully) iommu support; and 26.27 + * (2) lets us map it directly into the guest vcpus' virtual address space 26.28 + * as a linear pagetable, so we can read and write it easily. 26.29 + * 26.30 + * For (2) we steal the address space that would have normally been used 26.31 + * by the read-only MPT map in a non-translated guest. (For 26.32 + * paging_mode_external() guests this mapping is in the monitor table.) 26.33 */ 26.34 #define phys_to_machine_mapping ((l1_pgentry_t *)RO_MPT_VIRT_START) 26.35 26.36 +/* 26.37 + * The upper levels of the p2m pagetable always contain full rights; all 26.38 + * variation in the access control bits is made in the level-1 PTEs. 26.39 + * 26.40 + * In addition to the phys-to-machine translation, each p2m PTE contains 26.41 + * *type* information about the gfn it translates, helping Xen to decide 26.42 + * on the correct course of action when handling a page-fault to that 26.43 + * guest frame. We store the type in the "available" bits of the PTEs 26.44 + * in the table, which gives us 8 possible types on 32-bit systems. 26.45 + * Further expansions of the type system will only be supported on 26.46 + * 64-bit Xen. 26.47 + */ 26.48 +typedef enum { 26.49 + p2m_invalid = 0, /* Nothing mapped here */ 26.50 + p2m_ram_rw = 1, /* Normal read/write guest RAM */ 26.51 + p2m_ram_logdirty = 2, /* Temporarily read-only for log-dirty */ 26.52 + p2m_ram_ro = 3, /* Read-only; writes go to the device model */ 26.53 + p2m_mmio_dm = 4, /* Reads and write go to the device model */ 26.54 + p2m_mmio_direct = 5, /* Read/write mapping of genuine MMIO area */ 26.55 +} p2m_type_t; 26.56 26.57 -/* Read the current domain's P2M table. */ 26.58 -static inline mfn_t gfn_to_mfn_current(unsigned long gfn) 26.59 -{ 26.60 - l1_pgentry_t l1e = l1e_empty(); 26.61 - int ret; 26.62 +/* We use bitmaps and maks to handle groups of types */ 26.63 +#define p2m_to_mask(_t) (1UL << (_t)) 26.64 + 26.65 +/* RAM types, which map to real machine frames */ 26.66 +#define P2M_RAM_TYPES (p2m_to_mask(p2m_ram_rw) \ 26.67 + | p2m_to_mask(p2m_ram_logdirty) \ 26.68 + | p2m_to_mask(p2m_ram_ro)) 26.69 26.70 - if ( gfn > current->domain->arch.p2m.max_mapped_pfn ) 26.71 - return _mfn(INVALID_MFN); 26.72 +/* MMIO types, which don't have to map to anything in the frametable */ 26.73 +#define P2M_MMIO_TYPES (p2m_to_mask(p2m_mmio_dm) \ 26.74 + | p2m_to_mask(p2m_mmio_direct)) 26.75 + 26.76 +/* Read-only types, which must have the _PAGE_RW bit clear in their PTEs */ 26.77 +#define P2M_RO_TYPES (p2m_to_mask(p2m_ram_logdirty) \ 26.78 + | p2m_to_mask(p2m_ram_ro)) 26.79 + 26.80 +/* Useful predicates */ 26.81 +#define p2m_is_ram(_t) (p2m_to_mask(_t) & P2M_RAM_TYPES) 26.82 +#define p2m_is_mmio(_t) (p2m_to_mask(_t) & P2M_MMIO_TYPES) 26.83 +#define p2m_is_readonly(_t) (p2m_to_mask(_t) & P2M_RO_TYPES) 26.84 +#define p2m_is_valid(_t) (p2m_to_mask(_t) & (P2M_RAM_TYPES | P2M_MMIO_TYPES)) 26.85 26.86 - /* Don't read off the end of the p2m table */ 26.87 - ASSERT(gfn < (RO_MPT_VIRT_END - RO_MPT_VIRT_START) / sizeof(l1_pgentry_t)); 26.88 +/* Extract the type from the PTE flags that store it */ 26.89 +static inline p2m_type_t p2m_flags_to_type(unsigned long flags) 26.90 +{ 26.91 + /* Type is stored in the "available" bits, 9, 10 and 11 */ 26.92 + return (flags >> 9) & 0x7; 26.93 +} 26.94 + 26.95 +/* Read the current domain's p2m table (through the linear mapping). */ 26.96 +static inline mfn_t gfn_to_mfn_current(unsigned long gfn, p2m_type_t *t) 26.97 +{ 26.98 + mfn_t mfn = _mfn(INVALID_MFN); 26.99 + p2m_type_t p2mt = p2m_mmio_dm; 26.100 + /* XXX This is for compatibility with the old model, where anything not 26.101 + * XXX marked as RAM was considered to be emulated MMIO space. 26.102 + * XXX Once we start explicitly registering MMIO regions in the p2m 26.103 + * XXX we will return p2m_invalid for unmapped gfns */ 26.104 + 26.105 + if ( gfn <= current->domain->arch.p2m.max_mapped_pfn ) 26.106 + { 26.107 + l1_pgentry_t l1e = l1e_empty(); 26.108 + int ret; 26.109 26.110 - ret = __copy_from_user(&l1e, 26.111 - &phys_to_machine_mapping[gfn], 26.112 - sizeof(l1e)); 26.113 + ASSERT(gfn < (RO_MPT_VIRT_END - RO_MPT_VIRT_START) 26.114 + / sizeof(l1_pgentry_t)); 26.115 + 26.116 + /* Need to __copy_from_user because the p2m is sparse and this 26.117 + * part might not exist */ 26.118 + ret = __copy_from_user(&l1e, 26.119 + &phys_to_machine_mapping[gfn], 26.120 + sizeof(l1e)); 26.121 26.122 - if ( (ret == 0) && (l1e_get_flags(l1e) & _PAGE_PRESENT) ) 26.123 - return _mfn(l1e_get_pfn(l1e)); 26.124 + if ( ret == 0 ) { 26.125 + p2mt = p2m_flags_to_type(l1e_get_flags(l1e)); 26.126 + ASSERT(l1e_get_pfn(l1e) != INVALID_MFN || !p2m_is_ram(*t)); 26.127 + if ( p2m_is_valid(p2mt) ) 26.128 + mfn = _mfn(l1e_get_pfn(l1e)); 26.129 + else 26.130 + /* XXX see above */ 26.131 + p2mt = p2m_mmio_dm; 26.132 + } 26.133 + } 26.134 26.135 - return _mfn(INVALID_MFN); 26.136 + *t = p2mt; 26.137 + return mfn; 26.138 } 26.139 26.140 /* Read another domain's P2M table, mapping pages as we go */ 26.141 -mfn_t gfn_to_mfn_foreign(struct domain *d, unsigned long gpfn); 26.142 +mfn_t gfn_to_mfn_foreign(struct domain *d, unsigned long gfn, p2m_type_t *t); 26.143 26.144 /* General conversion function from gfn to mfn */ 26.145 -#define gfn_to_mfn(d, g) _gfn_to_mfn((d), (g)) 26.146 -static inline mfn_t _gfn_to_mfn(struct domain *d, unsigned long gfn) 26.147 +#define gfn_to_mfn(d, g, t) _gfn_to_mfn((d), (g), (t)) 26.148 +static inline mfn_t _gfn_to_mfn(struct domain *d, 26.149 + unsigned long gfn, p2m_type_t *t) 26.150 { 26.151 if ( !paging_mode_translate(d) ) 26.152 + { 26.153 + /* Not necessarily true, but for non-translated guests, we claim 26.154 + * it's the most generic kind of memory */ 26.155 + *t = p2m_ram_rw; 26.156 return _mfn(gfn); 26.157 + } 26.158 if ( likely(current->domain == d) ) 26.159 - return gfn_to_mfn_current(gfn); 26.160 + return gfn_to_mfn_current(gfn, t); 26.161 else 26.162 - return gfn_to_mfn_foreign(d, gfn); 26.163 + return gfn_to_mfn_foreign(d, gfn, t); 26.164 +} 26.165 + 26.166 +/* Compatibility function exporting the old untyped interface */ 26.167 +static inline unsigned long gmfn_to_mfn(struct domain *d, unsigned long gpfn) 26.168 +{ 26.169 + mfn_t mfn; 26.170 + p2m_type_t t; 26.171 + mfn = gfn_to_mfn(d, gpfn, &t); 26.172 + if ( p2m_is_valid(t) ) 26.173 + return mfn_x(mfn); 26.174 + return INVALID_MFN; 26.175 } 26.176 26.177 /* General conversion function from mfn to gfn */ 26.178 @@ -81,19 +173,6 @@ static inline unsigned long mfn_to_gfn(s 26.179 return mfn_x(mfn); 26.180 } 26.181 26.182 -/* Compatibility function for HVM code */ 26.183 -static inline unsigned long get_mfn_from_gpfn(unsigned long pfn) 26.184 -{ 26.185 - return mfn_x(gfn_to_mfn_current(pfn)); 26.186 -} 26.187 - 26.188 -/* Is this guest address an mmio one? (i.e. not defined in p2m map) */ 26.189 -static inline int mmio_space(paddr_t gpa) 26.190 -{ 26.191 - unsigned long gfn = gpa >> PAGE_SHIFT; 26.192 - return !mfn_valid(mfn_x(gfn_to_mfn_current(gfn))); 26.193 -} 26.194 - 26.195 /* Translate the frame number held in an l1e from guest to machine */ 26.196 static inline l1_pgentry_t 26.197 gl1e_to_ml1e(struct domain *d, l1_pgentry_t l1e) 26.198 @@ -105,7 +184,6 @@ gl1e_to_ml1e(struct domain *d, l1_pgentr 26.199 } 26.200 26.201 26.202 - 26.203 /* Init the datastructures for later use by the p2m code */ 26.204 void p2m_init(struct domain *d); 26.205 26.206 @@ -130,11 +208,12 @@ void guest_physmap_add_page(struct domai 26.207 void guest_physmap_remove_page(struct domain *d, unsigned long gfn, 26.208 unsigned long mfn); 26.209 26.210 -/* set P2M table l1e flags */ 26.211 -void p2m_set_flags_global(struct domain *d, u32 l1e_flags); 26.212 +/* Change types across all p2m entries in a domain */ 26.213 +void p2m_change_type_global(struct domain *d, p2m_type_t ot, p2m_type_t nt); 26.214 26.215 -/* set P2M table l1e flags for a gpa */ 26.216 -int p2m_set_flags(struct domain *d, paddr_t gpa, u32 l1e_flags); 26.217 +/* Compare-exchange the type of a single p2m entry */ 26.218 +p2m_type_t p2m_change_type(struct domain *d, unsigned long gfn, 26.219 + p2m_type_t ot, p2m_type_t nt); 26.220 26.221 #endif /* _XEN_P2M_H */ 26.222
27.1 --- a/xen/include/xen/shutdown.h Mon Sep 10 13:56:34 2007 -0600 27.2 +++ b/xen/include/xen/shutdown.h Mon Sep 10 13:58:56 2007 -0600 27.3 @@ -6,7 +6,7 @@ extern int opt_noreboot; 27.4 27.5 void dom0_shutdown(u8 reason); 27.6 27.7 -void machine_restart(char *cmd); 27.8 +void machine_restart(void); 27.9 void machine_halt(void); 27.10 void machine_power_off(void); 27.11