From: Jan Beulich Date: Thu, 18 Feb 2016 14:02:16 +0000 (+0100) Subject: x86: avoid flush IPI when possible X-Git-Url: http://xenbits.xensource.com/gitweb?a=commitdiff_plain;h=4598cca54f39da025b83d287f8f88a00c95e5b03;p=people%2Fliuw%2Fxen.git x86: avoid flush IPI when possible Since CLFLUSH, other than WBINVD, is a cache coherency domain wide flush, there's no need to IPI other CPUs if this is the only flushing being requested. (As a secondary change, move a local variable into the scope where it's actually needed.) As a secondary change also eliminate another leftover from 32-bit days: invalidate_interrupt() can clear FLUSH_TLB_GLOBAL alongside FLUSH_TLB, since write_ptbase() (as a descendant of __sync_local_execstate()) now unconditionally fiddles with CR4.PGE. Signed-off-by: Jan Beulich Reviewed-by: Andrew Cooper --- diff --git a/xen/arch/x86/flushtlb.c b/xen/arch/x86/flushtlb.c index 961bb1dd02..582a1e254f 100644 --- a/xen/arch/x86/flushtlb.c +++ b/xen/arch/x86/flushtlb.c @@ -91,9 +91,13 @@ void write_cr3(unsigned long cr3) local_irq_restore(flags); } -void flush_area_local(const void *va, unsigned int flags) +/* + * The return value of this function is the passed in "flags" argument with + * bits cleared that have been fully (i.e. system-wide) taken care of, i.e. + * namely not requiring any further action on remote CPUs. + */ +unsigned int flush_area_local(const void *va, unsigned int flags) { - const struct cpuinfo_x86 *c = ¤t_cpu_data; unsigned int order = (flags - 1) & FLUSH_ORDER_MASK; unsigned long irqfl; @@ -130,6 +134,7 @@ void flush_area_local(const void *va, unsigned int flags) if ( flags & FLUSH_CACHE ) { + const struct cpuinfo_x86 *c = ¤t_cpu_data; unsigned long i, sz = 0; if ( order < (BITS_PER_LONG - PAGE_SHIFT) ) @@ -146,6 +151,7 @@ void flush_area_local(const void *va, unsigned int flags) "data16 clflush %0", /* clflushopt */ X86_FEATURE_CLFLUSHOPT, "m" (((const char *)va)[i])); + flags &= ~FLUSH_CACHE; } else { @@ -154,4 +160,6 @@ void flush_area_local(const void *va, unsigned int flags) } local_irq_restore(irqfl); + + return flags; } diff --git a/xen/arch/x86/smp.c b/xen/arch/x86/smp.c index 988b9c2557..4abb16f3bb 100644 --- a/xen/arch/x86/smp.c +++ b/xen/arch/x86/smp.c @@ -205,26 +205,30 @@ static unsigned int flush_flags; void invalidate_interrupt(struct cpu_user_regs *regs) { + unsigned int flags = flush_flags; ack_APIC_irq(); perfc_incr(ipis); - if ( !__sync_local_execstate() || - (flush_flags & (FLUSH_TLB_GLOBAL | FLUSH_CACHE)) ) - flush_area_local(flush_va, flush_flags); + if ( __sync_local_execstate() ) + flags &= ~(FLUSH_TLB | FLUSH_TLB_GLOBAL); + flush_area_local(flush_va, flags); cpumask_clear_cpu(smp_processor_id(), &flush_cpumask); } void flush_area_mask(const cpumask_t *mask, const void *va, unsigned int flags) { + unsigned int cpu = smp_processor_id(); + ASSERT(local_irq_is_enabled()); - if ( cpumask_test_cpu(smp_processor_id(), mask) ) - flush_area_local(va, flags); + if ( cpumask_test_cpu(cpu, mask) ) + flags = flush_area_local(va, flags); - if ( !cpumask_subset(mask, cpumask_of(smp_processor_id())) ) + if ( (flags & ~FLUSH_ORDER_MASK) && + !cpumask_subset(mask, cpumask_of(cpu)) ) { spin_lock(&flush_lock); cpumask_and(&flush_cpumask, mask, &cpu_online_map); - cpumask_clear_cpu(smp_processor_id(), &flush_cpumask); + cpumask_clear_cpu(cpu, &flush_cpumask); flush_va = va; flush_flags = flags; send_IPI_mask(&flush_cpumask, INVALIDATE_TLB_VECTOR); diff --git a/xen/include/asm-x86/flushtlb.h b/xen/include/asm-x86/flushtlb.h index 8dbe2d61a1..4ea31c2c0e 100644 --- a/xen/include/asm-x86/flushtlb.h +++ b/xen/include/asm-x86/flushtlb.h @@ -87,7 +87,7 @@ void write_cr3(unsigned long cr3); #define FLUSH_CACHE 0x400 /* Flush local TLBs/caches. */ -void flush_area_local(const void *va, unsigned int flags); +unsigned int flush_area_local(const void *va, unsigned int flags); #define flush_local(flags) flush_area_local(NULL, flags) /* Flush specified CPUs' TLBs/caches */