From: Keir Fraser Date: Mon, 29 Oct 2007 16:49:02 +0000 (+0000) Subject: x86: allow pv guests to disable TSC for applications X-Git-Tag: 3.2.0-rc1~196^2~17 X-Git-Url: http://xenbits.xensource.com/gitweb?a=commitdiff_plain;h=e33d4a73162dac1fe6861f3a5e96de89e30b6a91;p=xen.git x86: allow pv guests to disable TSC for applications Linux, under CONFIG_SECCOMP, has been capable of hiding the TSC from processes for quite a while. This patch enables this to actually work for pv kernels, by allowing them to control CR4.TSD (and, as a simple thing to do at the same time, CR4.DE). Applies cleanly only on top of the previously submitted debug register handling patch. Signed-off-by: Jan Beulich Also clean up CR4 and EFER handling, and hack-n-slash header file inclusion madness to get the tree building again. Signed-off-by: Keir Fraser --- diff --git a/xen/arch/x86/acpi/boot.c b/xen/arch/x86/acpi/boot.c index cbaa5ecbe7..7db32263c1 100644 --- a/xen/arch/x86/acpi/boot.c +++ b/xen/arch/x86/acpi/boot.c @@ -36,6 +36,7 @@ #include #include #include +#include #include #include diff --git a/xen/arch/x86/acpi/power.c b/xen/arch/x86/acpi/power.c index ae4d2149cf..f14fda92fb 100644 --- a/xen/arch/x86/acpi/power.c +++ b/xen/arch/x86/acpi/power.c @@ -155,6 +155,10 @@ static int enter_state(u32 state) pmprintk(XENLOG_DEBUG, "Back to C."); + /* Restore CR4 and EFER from cached values. */ + write_cr4(read_cr4()); + write_efer(read_efer()); + device_power_up(); pmprintk(XENLOG_INFO, "Finishing wakeup from ACPI S%d state.", state); diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c index e874ad7eb2..6ca6f0cd9e 100644 --- a/xen/arch/x86/domain.c +++ b/xen/arch/x86/domain.c @@ -50,7 +50,8 @@ #endif DEFINE_PER_CPU(struct vcpu *, curr_vcpu); -DEFINE_PER_CPU(__u64, efer); +DEFINE_PER_CPU(u64, efer); +DEFINE_PER_CPU(unsigned long, cr4); static void unmap_vcpu_info(struct vcpu *v); @@ -413,6 +414,8 @@ int vcpu_initialise(struct vcpu *v) v->arch.schedule_tail = continue_idle_domain; v->arch.cr3 = __pa(idle_pg_table); } + + v->arch.guest_context.ctrlreg[4] = mmu_cr4_features; } v->arch.perdomain_ptes = @@ -568,13 +571,28 @@ void arch_domain_destroy(struct domain *d) free_xenheap_page(d->shared_info); } +unsigned long pv_guest_cr4_fixup(unsigned long guest_cr4) +{ + unsigned long hv_cr4 = read_cr4(), hv_cr4_mask = ~X86_CR4_TSD; + if ( cpu_has_de ) + hv_cr4_mask &= ~X86_CR4_DE; + + if ( (guest_cr4 & hv_cr4_mask) != + (hv_cr4 & hv_cr4_mask & ~(X86_CR4_PGE|X86_CR4_PSE)) ) + gdprintk(XENLOG_WARNING, + "Attempt to change CR4 flags %08lx -> %08lx\n", + hv_cr4 & ~(X86_CR4_PGE|X86_CR4_PSE), guest_cr4); + + return (hv_cr4 & hv_cr4_mask) | (guest_cr4 & ~hv_cr4_mask); +} + /* This is called by arch_final_setup_guest and do_boot_vcpu */ int arch_set_info_guest( struct vcpu *v, vcpu_guest_context_u c) { struct domain *d = v->domain; unsigned long cr3_pfn = INVALID_MFN; - unsigned long flags; + unsigned long flags, cr4; int i, rc = 0, compat; /* The context is a compat-mode one if the target domain is compat-mode; @@ -665,6 +683,10 @@ int arch_set_info_guest( /* Ensure real hardware interrupts are enabled. */ v->arch.guest_context.user_regs.eflags |= EF_IE; + cr4 = v->arch.guest_context.ctrlreg[4]; + v->arch.guest_context.ctrlreg[4] = + (cr4 == 0) ? mmu_cr4_features : pv_guest_cr4_fixup(cr4); + if ( v->is_initialised ) goto out; @@ -1194,6 +1216,9 @@ static void paravirt_ctxt_switch_to(struct vcpu *v) { set_int80_direct_trap(v); switch_kernel_stack(v); + + if ( unlikely(read_cr4() != v->arch.guest_context.ctrlreg[4]) ) + write_cr4(v->arch.guest_context.ctrlreg[4]); } #define loaddebug(_v,_reg) \ diff --git a/xen/arch/x86/flushtlb.c b/xen/arch/x86/flushtlb.c index cb0fbb7580..5a012d4a0d 100644 --- a/xen/arch/x86/flushtlb.c +++ b/xen/arch/x86/flushtlb.c @@ -83,9 +83,12 @@ void write_cr3(unsigned long cr3) hvm_flush_guest_tlbs(); #ifdef USER_MAPPINGS_ARE_GLOBAL - __pge_off(); - asm volatile ( "mov %0, %%cr3" : : "r" (cr3) : "memory" ); - __pge_on(); + { + unsigned long cr4 = read_cr4(); + write_cr4(cr4 & ~X86_CR4_PGE); + asm volatile ( "mov %0, %%cr3" : : "r" (cr3) : "memory" ); + write_cr4(cr4); + } #else asm volatile ( "mov %0, %%cr3" : : "r" (cr3) : "memory" ); #endif @@ -124,8 +127,7 @@ void flush_area_local(const void *va, unsigned int flags) hvm_flush_guest_tlbs(); #ifndef USER_MAPPINGS_ARE_GLOBAL - if ( !(flags & FLUSH_TLB_GLOBAL) || - !(mmu_cr4_features & X86_CR4_PGE) ) + if ( !(flags & FLUSH_TLB_GLOBAL) || !(read_cr4() & X86_CR4_PGE) ) { asm volatile ( "mov %0, %%cr3" : : "r" (read_cr3()) : "memory" ); @@ -133,9 +135,10 @@ void flush_area_local(const void *va, unsigned int flags) else #endif { - __pge_off(); + unsigned long cr4 = read_cr4(); + write_cr4(cr4 & ~X86_CR4_PGE); barrier(); - __pge_on(); + write_cr4(cr4); } post_flush(t); diff --git a/xen/arch/x86/hvm/svm/amd_iommu/amd-iommu-detect.c b/xen/arch/x86/hvm/svm/amd_iommu/amd-iommu-detect.c index c4c8af3c92..032ac0028d 100644 --- a/xen/arch/x86/hvm/svm/amd_iommu/amd-iommu-detect.c +++ b/xen/arch/x86/hvm/svm/amd_iommu/amd-iommu-detect.c @@ -18,6 +18,8 @@ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ +#include +#include #include #include #include diff --git a/xen/arch/x86/hvm/svm/amd_iommu/amd-iommu-init.c b/xen/arch/x86/hvm/svm/amd_iommu/amd-iommu-init.c index 7bd73f18f0..78af93ddba 100644 --- a/xen/arch/x86/hvm/svm/amd_iommu/amd-iommu-init.c +++ b/xen/arch/x86/hvm/svm/amd_iommu/amd-iommu-init.c @@ -18,6 +18,8 @@ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ +#include +#include #include #include #include diff --git a/xen/arch/x86/hvm/svm/svm.c b/xen/arch/x86/hvm/svm/svm.c index 899aad2baf..b5dbe893d2 100644 --- a/xen/arch/x86/hvm/svm/svm.c +++ b/xen/arch/x86/hvm/svm/svm.c @@ -441,7 +441,7 @@ static enum hvm_intblk svm_interrupt_blocked( ASSERT((intack.source == hvm_intsrc_pic) || (intack.source == hvm_intsrc_lapic)); - if ( irq_masked(guest_cpu_user_regs()->eflags) ) + if ( !(guest_cpu_user_regs()->eflags & X86_EFLAGS_IF) ) return hvm_intblk_rflags_ie; if ( (intack.source == hvm_intsrc_lapic) && diff --git a/xen/arch/x86/hvm/vmx/vmcs.c b/xen/arch/x86/hvm/vmx/vmcs.c index 363f88442d..96b2ca6b6d 100644 --- a/xen/arch/x86/hvm/vmx/vmcs.c +++ b/xen/arch/x86/hvm/vmx/vmcs.c @@ -498,7 +498,7 @@ static int construct_vmcs(struct vcpu *v) /* Host control registers. */ __vmwrite(HOST_CR0, read_cr0() | X86_CR0_TS); - __vmwrite(HOST_CR4, read_cr4()); + __vmwrite(HOST_CR4, mmu_cr4_features); /* Host CS:RIP. */ __vmwrite(HOST_CS_SELECTOR, __HYPERVISOR_CS); diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c index 75bceb035f..d74442b931 100644 --- a/xen/arch/x86/hvm/vmx/vmx.c +++ b/xen/arch/x86/hvm/vmx/vmx.c @@ -727,6 +727,10 @@ static void vmx_ctxt_switch_from(struct vcpu *v) static void vmx_ctxt_switch_to(struct vcpu *v) { + /* HOST_CR4 in VMCS is always mmu_cr4_features. Sync CR4 now. */ + if ( unlikely(read_cr4() != mmu_cr4_features) ) + write_cr4(mmu_cr4_features); + vmx_restore_guest_msrs(v); vmx_restore_dr(v); } @@ -990,7 +994,7 @@ static enum hvm_intblk vmx_interrupt_blocked( ASSERT((intack.source == hvm_intsrc_pic) || (intack.source == hvm_intsrc_lapic)); - if ( irq_masked(guest_cpu_user_regs()->eflags) ) + if ( !(guest_cpu_user_regs()->eflags & X86_EFLAGS_IF) ) return hvm_intblk_rflags_ie; if ( intack.source == hvm_intsrc_lapic ) diff --git a/xen/arch/x86/setup.c b/xen/arch/x86/setup.c index 47d81c4ada..1555f58941 100644 --- a/xen/arch/x86/setup.c +++ b/xen/arch/x86/setup.c @@ -415,6 +415,8 @@ void __init __start_xen(unsigned long mbi_p) set_current((struct vcpu *)0xfffff000); /* debug sanity */ idle_vcpu[0] = current; set_processor_id(0); /* needed early, for smp_processor_id() */ + rdmsrl(MSR_EFER, this_cpu(efer)); + asm volatile ( "mov %%cr4,%0" : "=r" (this_cpu(cr4)) ); smp_prepare_boot_cpu(); diff --git a/xen/arch/x86/smp.c b/xen/arch/x86/smp.c index 5f7585acf8..c8886e947a 100644 --- a/xen/arch/x86/smp.c +++ b/xen/arch/x86/smp.c @@ -86,6 +86,12 @@ static inline void check_IPI_mask(cpumask_t cpumask) ASSERT(!cpus_empty(cpumask)); } +void apic_wait_icr_idle(void) +{ + while ( apic_read( APIC_ICR ) & APIC_ICR_BUSY ) + cpu_relax(); +} + void send_IPI_mask_flat(cpumask_t cpumask, int vector) { unsigned long mask = cpus_addr(cpumask)[0]; diff --git a/xen/arch/x86/smpboot.c b/xen/arch/x86/smpboot.c index 0ed832d511..101918ac8f 100644 --- a/xen/arch/x86/smpboot.c +++ b/xen/arch/x86/smpboot.c @@ -495,6 +495,8 @@ void __devinit start_secondary(void *unused) set_processor_id(cpu); set_current(idle_vcpu[cpu]); this_cpu(curr_vcpu) = idle_vcpu[cpu]; + rdmsrl(MSR_EFER, this_cpu(efer)); + asm volatile ( "mov %%cr4,%0" : "=r" (this_cpu(cr4)) ); percpu_traps_init(); diff --git a/xen/arch/x86/traps.c b/xen/arch/x86/traps.c index 9ac1d52cbb..7248189304 100644 --- a/xen/arch/x86/traps.c +++ b/xen/arch/x86/traps.c @@ -1794,10 +1794,8 @@ static int emulate_privileged_op(struct cpu_user_regs *regs) break; case 4: /* Write CR4 */ - if ( *reg != (read_cr4() & ~(X86_CR4_PGE|X86_CR4_PSE)) ) - gdprintk(XENLOG_WARNING, - "Attempt to change CR4 flags %08lx -> %08lx\n", - read_cr4() & ~(X86_CR4_PGE|X86_CR4_PSE), *reg); + v->arch.guest_context.ctrlreg[4] = pv_guest_cr4_fixup(*reg); + write_cr4(v->arch.guest_context.ctrlreg[4]); break; default: @@ -1868,6 +1866,10 @@ static int emulate_privileged_op(struct cpu_user_regs *regs) } break; + case 0x31: /* RDTSC */ + rdtsc(regs->eax, regs->edx); + break; + case 0x32: /* RDMSR */ switch ( regs->ecx ) { diff --git a/xen/include/asm-x86/amd-iommu.h b/xen/include/asm-x86/amd-iommu.h index fb5c87f94d..e91158b6a7 100644 --- a/xen/include/asm-x86/amd-iommu.h +++ b/xen/include/asm-x86/amd-iommu.h @@ -22,8 +22,8 @@ #include #include +#include #include -#include #include #define iommu_found() (!list_empty(&amd_iommu_head)) diff --git a/xen/include/asm-x86/apic.h b/xen/include/asm-x86/apic.h index fd0ee81a01..9b1308481a 100644 --- a/xen/include/asm-x86/apic.h +++ b/xen/include/asm-x86/apic.h @@ -2,9 +2,7 @@ #define __ASM_APIC_H #include -#include #include -#include #include #define Dprintk(x...) @@ -51,11 +49,7 @@ static __inline u32 apic_read(unsigned long reg) return *((volatile u32 *)(APIC_BASE+reg)); } -static __inline__ void apic_wait_icr_idle(void) -{ - while ( apic_read( APIC_ICR ) & APIC_ICR_BUSY ) - cpu_relax(); -} +void apic_wait_icr_idle(void); int get_physical_broadcast(void); diff --git a/xen/include/asm-x86/domain.h b/xen/include/asm-x86/domain.h index 498c2fe591..28fbd7b1f7 100644 --- a/xen/include/asm-x86/domain.h +++ b/xen/include/asm-x86/domain.h @@ -350,6 +350,8 @@ struct arch_vcpu /* Continue the current hypercall via func(data) on specified cpu. */ int continue_hypercall_on_cpu(int cpu, long (*func)(void *data), void *data); +unsigned long pv_guest_cr4_fixup(unsigned long guest_cr4); + #endif /* __ASM_DOMAIN_H__ */ /* diff --git a/xen/include/asm-x86/hvm/io.h b/xen/include/asm-x86/hvm/io.h index 019ecb7acd..1a0287a884 100644 --- a/xen/include/asm-x86/hvm/io.h +++ b/xen/include/asm-x86/hvm/io.h @@ -149,13 +149,6 @@ static inline int register_buffered_io_handler( return register_io_handler(d, addr, size, action, HVM_BUFFERED_IO); } -#if defined(__i386__) || defined(__x86_64__) -static inline int irq_masked(unsigned long eflags) -{ - return ((eflags & X86_EFLAGS_IF) == 0); -} -#endif - extern void send_pio_req(unsigned long port, unsigned long count, int size, paddr_t value, int dir, int df, int value_is_ptr); void send_timeoffset_req(unsigned long timeoff); diff --git a/xen/include/asm-x86/hvm/irq.h b/xen/include/asm-x86/hvm/irq.h index a211eb5e9f..b5107c8e68 100644 --- a/xen/include/asm-x86/hvm/irq.h +++ b/xen/include/asm-x86/hvm/irq.h @@ -24,6 +24,7 @@ #include #include +#include #include #include #include diff --git a/xen/include/asm-x86/io_apic.h b/xen/include/asm-x86/io_apic.h index b8731bf278..e8e102a6b8 100644 --- a/xen/include/asm-x86/io_apic.h +++ b/xen/include/asm-x86/io_apic.h @@ -2,9 +2,10 @@ #define __ASM_IO_APIC_H #include -#include #include #include +#include +#include /* * Intel IO-APIC support for SMP and UP systems. diff --git a/xen/include/asm-x86/iommu.h b/xen/include/asm-x86/iommu.h index 2a119f0d66..e2ceffc7de 100644 --- a/xen/include/asm-x86/iommu.h +++ b/xen/include/asm-x86/iommu.h @@ -21,11 +21,8 @@ #define _IOMMU_H_ #include -#include -#include +#include #include -#include -#include #include #include #include diff --git a/xen/include/asm-x86/msr.h b/xen/include/asm-x86/msr.h index 79d37a33ad..96a078824e 100644 --- a/xen/include/asm-x86/msr.h +++ b/xen/include/asm-x86/msr.h @@ -90,16 +90,14 @@ static inline void wrmsrl(unsigned int msr, __u64 val) : "c" (counter)) -DECLARE_PER_CPU(__u64, efer); +DECLARE_PER_CPU(u64, efer); -static inline __u64 read_efer(void) +static inline u64 read_efer(void) { - if (!this_cpu(efer)) - rdmsrl(MSR_EFER, this_cpu(efer)); return this_cpu(efer); } -static inline void write_efer(__u64 val) +static inline void write_efer(u64 val) { this_cpu(efer) = val; wrmsrl(MSR_EFER, val); diff --git a/xen/include/asm-x86/page.h b/xen/include/asm-x86/page.h index 0ddbd0e214..958eee0292 100644 --- a/xen/include/asm-x86/page.h +++ b/xen/include/asm-x86/page.h @@ -294,9 +294,6 @@ void paging_init(void); void setup_idle_pagetable(void); #endif /* !defined(__ASSEMBLY__) */ -#define __pge_off() write_cr4(mmu_cr4_features & ~X86_CR4_PGE) -#define __pge_on() write_cr4(mmu_cr4_features) - #define _PAGE_PRESENT 0x001U #define _PAGE_RW 0x002U #define _PAGE_USER 0x004U diff --git a/xen/include/asm-x86/processor.h b/xen/include/asm-x86/processor.h index 0af78e21c1..e2285cc5f3 100644 --- a/xen/include/asm-x86/processor.h +++ b/xen/include/asm-x86/processor.h @@ -8,6 +8,8 @@ #include #include #include +#include +#include #include #include #include @@ -298,16 +300,17 @@ static inline unsigned long read_cr2(void) return cr2; } +DECLARE_PER_CPU(unsigned long, cr4); + static inline unsigned long read_cr4(void) { - unsigned long cr4; - asm volatile ( "mov %%cr4,%0\n\t" : "=r" (cr4) ); - return cr4; -} - + return this_cpu(cr4); +} + static inline void write_cr4(unsigned long val) { - asm volatile ( "mov %0,%%cr4" : : "r" ((unsigned long)val) ); + this_cpu(cr4) = val; + asm volatile ( "mov %0,%%cr4" : : "r" (val) ); } /* Clear and set 'TS' bit respectively */ @@ -332,13 +335,13 @@ extern unsigned long mmu_cr4_features; static always_inline void set_in_cr4 (unsigned long mask) { mmu_cr4_features |= mask; - write_cr4(mmu_cr4_features); + write_cr4(read_cr4() | mask); } static always_inline void clear_in_cr4 (unsigned long mask) { - mmu_cr4_features &= ~mask; - write_cr4(mmu_cr4_features); + mmu_cr4_features &= ~mask; + write_cr4(read_cr4() & ~mask); } /* diff --git a/xen/include/asm-x86/smp.h b/xen/include/asm-x86/smp.h index 9b36a64437..55b8cf7705 100644 --- a/xen/include/asm-x86/smp.h +++ b/xen/include/asm-x86/smp.h @@ -13,7 +13,6 @@ #ifdef CONFIG_X86_LOCAL_APIC #ifndef __ASSEMBLY__ -#include #include #include #ifdef CONFIG_X86_IO_APIC diff --git a/xen/include/asm-x86/x86_32/elf.h b/xen/include/asm-x86/x86_32/elf.h index 1aa4e07148..1abcaaf283 100644 --- a/xen/include/asm-x86/x86_32/elf.h +++ b/xen/include/asm-x86/x86_32/elf.h @@ -1,8 +1,6 @@ #ifndef __X86_32_ELF_H__ #define __X86_32_ELF_H__ -#include - typedef struct { unsigned long ebx; unsigned long ecx; @@ -40,7 +38,7 @@ static inline void elf_core_save_regs(ELF_Gregset *core_regs, asm volatile("movw %%fs, %%ax;" :"=a"(core_regs->fs)); asm volatile("movw %%gs, %%ax;" :"=a"(core_regs->gs)); /* orig_eax not filled in for now */ - core_regs->eip = (unsigned long)current_text_addr(); + core_regs->eip = (unsigned long)elf_core_save_regs; asm volatile("movw %%cs, %%ax;" :"=a"(core_regs->cs)); asm volatile("pushfl; popl %0" :"=m"(core_regs->eflags)); asm volatile("movl %%esp,%0" : "=m"(core_regs->esp)); diff --git a/xen/include/asm-x86/x86_64/elf.h b/xen/include/asm-x86/x86_64/elf.h index 39c90b76f6..df92ec0e64 100644 --- a/xen/include/asm-x86/x86_64/elf.h +++ b/xen/include/asm-x86/x86_64/elf.h @@ -1,8 +1,6 @@ #ifndef __X86_64_ELF_H__ #define __X86_64_ELF_H__ -#include - typedef struct { unsigned long r15; unsigned long r14; @@ -54,7 +52,7 @@ static inline void elf_core_save_regs(ELF_Gregset *core_regs, asm volatile("movq %%rsi,%0" : "=m"(core_regs->rsi)); asm volatile("movq %%rdi,%0" : "=m"(core_regs->rdi)); /* orig_rax not filled in for now */ - core_regs->rip = (unsigned long)current_text_addr(); + core_regs->rip = (unsigned long)elf_core_save_regs; asm volatile("movl %%cs, %%eax;" :"=a"(core_regs->cs)); asm volatile("pushfq; popq %0" :"=m"(core_regs->eflags)); asm volatile("movq %%rsp,%0" : "=m"(core_regs->rsp));