ia64/xen-unstable
changeset 15425:79b180596baf
x86: introduce specialized clear_page()
More than doubles performance of page clearing on not too old
processors (SSE2 supported).
Signed-off-by: Jan Beulich <jbeulich@novell.com>
Signed-off-by: Keir Fraser <keir@xensource.com>
More than doubles performance of page clearing on not too old
processors (SSE2 supported).
Signed-off-by: Jan Beulich <jbeulich@novell.com>
Signed-off-by: Keir Fraser <keir@xensource.com>
author | kfraser@localhost.localdomain |
---|---|
date | Wed Jun 20 16:18:03 2007 +0100 (2007-06-20) |
parents | 005dd6b1cf8e |
children | 799b3e4bfeac |
files | xen/arch/x86/Makefile xen/arch/x86/clear_page.S xen/arch/x86/domain.c xen/arch/x86/x86_64/Makefile xen/arch/x86/x86_64/mm.c xen/include/asm-x86/page.h |
line diff
1.1 --- a/xen/arch/x86/Makefile Wed Jun 20 15:33:14 2007 +0100 1.2 +++ b/xen/arch/x86/Makefile Wed Jun 20 16:18:03 2007 +0100 1.3 @@ -10,6 +10,7 @@ subdir-$(x86_64) += x86_64 1.4 1.5 obj-y += apic.o 1.6 obj-y += bitops.o 1.7 +obj-y += clear_page.o 1.8 obj-y += compat.o 1.9 obj-y += delay.o 1.10 obj-y += dmi_scan.o
2.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 2.2 +++ b/xen/arch/x86/clear_page.S Wed Jun 20 16:18:03 2007 +0100 2.3 @@ -0,0 +1,26 @@ 2.4 +#include <xen/config.h> 2.5 +#include <asm/page.h> 2.6 + 2.7 +#ifdef __i386__ 2.8 +#define ptr_reg %edx 2.9 +#else 2.10 +#define ptr_reg %rdi 2.11 +#endif 2.12 + 2.13 +ENTRY(clear_page_sse2) 2.14 +#ifdef __i386__ 2.15 + mov 4(%esp), ptr_reg 2.16 +#endif 2.17 + mov $PAGE_SIZE/16, %ecx 2.18 + xor %eax,%eax 2.19 + 2.20 +0: dec %ecx 2.21 + movnti %eax, (ptr_reg) 2.22 + movnti %eax, 4(ptr_reg) 2.23 + movnti %eax, 8(ptr_reg) 2.24 + movnti %eax, 12(ptr_reg) 2.25 + lea 16(ptr_reg), ptr_reg 2.26 + jnz 0b 2.27 + 2.28 + sfence 2.29 + ret
3.1 --- a/xen/arch/x86/domain.c Wed Jun 20 15:33:14 2007 +0100 3.2 +++ b/xen/arch/x86/domain.c Wed Jun 20 16:18:03 2007 +0100 3.3 @@ -151,7 +151,8 @@ int setup_arg_xlat_area(struct vcpu *v, 3.4 pg = alloc_domheap_page(NULL); 3.5 if ( !pg ) 3.6 return -ENOMEM; 3.7 - d->arch.mm_arg_xlat_l3 = clear_page(page_to_virt(pg)); 3.8 + d->arch.mm_arg_xlat_l3 = page_to_virt(pg); 3.9 + clear_page(d->arch.mm_arg_xlat_l3); 3.10 } 3.11 3.12 l4tab[l4_table_offset(COMPAT_ARG_XLAT_VIRT_BASE)] = 3.13 @@ -444,7 +445,8 @@ int arch_domain_create(struct domain *d) 3.14 3.15 if ( (pg = alloc_domheap_page(NULL)) == NULL ) 3.16 goto fail; 3.17 - d->arch.mm_perdomain_l2 = clear_page(page_to_virt(pg)); 3.18 + d->arch.mm_perdomain_l2 = page_to_virt(pg); 3.19 + clear_page(d->arch.mm_perdomain_l2); 3.20 for ( i = 0; i < (1 << pdpt_order); i++ ) 3.21 d->arch.mm_perdomain_l2[l2_table_offset(PERDOMAIN_VIRT_START)+i] = 3.22 l2e_from_page(virt_to_page(d->arch.mm_perdomain_pt)+i, 3.23 @@ -452,7 +454,8 @@ int arch_domain_create(struct domain *d) 3.24 3.25 if ( (pg = alloc_domheap_page(NULL)) == NULL ) 3.26 goto fail; 3.27 - d->arch.mm_perdomain_l3 = clear_page(page_to_virt(pg)); 3.28 + d->arch.mm_perdomain_l3 = page_to_virt(pg); 3.29 + clear_page(d->arch.mm_perdomain_l3); 3.30 d->arch.mm_perdomain_l3[l3_table_offset(PERDOMAIN_VIRT_START)] = 3.31 l3e_from_page(virt_to_page(d->arch.mm_perdomain_l2), 3.32 __PAGE_HYPERVISOR);
4.1 --- a/xen/arch/x86/x86_64/Makefile Wed Jun 20 15:33:14 2007 +0100 4.2 +++ b/xen/arch/x86/x86_64/Makefile Wed Jun 20 16:18:03 2007 +0100 4.3 @@ -1,12 +1,12 @@ 4.4 subdir-y += compat 4.5 4.6 obj-y += entry.o 4.7 -obj-y += compat_kexec.o 4.8 obj-y += gpr_switch.o 4.9 obj-y += mm.o 4.10 obj-y += traps.o 4.11 4.12 obj-$(CONFIG_COMPAT) += compat.o 4.13 +obj-$(CONFIG_COMPAT) += compat_kexec.o 4.14 obj-$(CONFIG_COMPAT) += domain.o 4.15 obj-$(CONFIG_COMPAT) += physdev.o 4.16 obj-$(CONFIG_COMPAT) += platform_hypercall.o
5.1 --- a/xen/arch/x86/x86_64/mm.c Wed Jun 20 15:33:14 2007 +0100 5.2 +++ b/xen/arch/x86/x86_64/mm.c Wed Jun 20 16:18:03 2007 +0100 5.3 @@ -106,7 +106,8 @@ void __init paging_init(void) 5.4 /* Create user-accessible L2 directory to map the MPT for guests. */ 5.5 if ( (l2_pg = alloc_domheap_page(NULL)) == NULL ) 5.6 goto nomem; 5.7 - l3_ro_mpt = clear_page(page_to_virt(l2_pg)); 5.8 + l3_ro_mpt = page_to_virt(l2_pg); 5.9 + clear_page(l3_ro_mpt); 5.10 l4e_write(&idle_pg_table[l4_table_offset(RO_MPT_VIRT_START)], 5.11 l4e_from_page(l2_pg, __PAGE_HYPERVISOR | _PAGE_USER)); 5.12 5.13 @@ -132,7 +133,8 @@ void __init paging_init(void) 5.14 if ( (l2_pg = alloc_domheap_page(NULL)) == NULL ) 5.15 goto nomem; 5.16 va = RO_MPT_VIRT_START + (i << L2_PAGETABLE_SHIFT); 5.17 - l2_ro_mpt = clear_page(page_to_virt(l2_pg)); 5.18 + l2_ro_mpt = page_to_virt(l2_pg); 5.19 + clear_page(l2_ro_mpt); 5.20 l3e_write(&l3_ro_mpt[l3_table_offset(va)], 5.21 l3e_from_page(l2_pg, __PAGE_HYPERVISOR | _PAGE_USER)); 5.22 l2_ro_mpt += l2_table_offset(va); 5.23 @@ -152,7 +154,8 @@ void __init paging_init(void) 5.24 l3_ro_mpt = l4e_to_l3e(idle_pg_table[l4_table_offset(HIRO_COMPAT_MPT_VIRT_START)]); 5.25 if ( (l2_pg = alloc_domheap_page(NULL)) == NULL ) 5.26 goto nomem; 5.27 - compat_idle_pg_table_l2 = l2_ro_mpt = clear_page(page_to_virt(l2_pg)); 5.28 + compat_idle_pg_table_l2 = l2_ro_mpt = page_to_virt(l2_pg); 5.29 + clear_page(l2_ro_mpt); 5.30 l3e_write(&l3_ro_mpt[l3_table_offset(HIRO_COMPAT_MPT_VIRT_START)], 5.31 l3e_from_page(l2_pg, __PAGE_HYPERVISOR)); 5.32 l2_ro_mpt += l2_table_offset(HIRO_COMPAT_MPT_VIRT_START);
6.1 --- a/xen/include/asm-x86/page.h Wed Jun 20 15:33:14 2007 +0100 6.2 +++ b/xen/include/asm-x86/page.h Wed Jun 20 16:18:03 2007 +0100 6.3 @@ -192,8 +192,9 @@ static inline l4_pgentry_t l4e_from_padd 6.4 #define pgentry_ptr_to_slot(_p) \ 6.5 (((unsigned long)(_p) & ~PAGE_MASK) / sizeof(*(_p))) 6.6 6.7 +#ifndef __ASSEMBLY__ 6.8 + 6.9 /* Page-table type. */ 6.10 -#ifndef __ASSEMBLY__ 6.11 #if CONFIG_PAGING_LEVELS == 2 6.12 /* x86_32 default */ 6.13 typedef struct { u32 pfn; } pagetable_t; 6.14 @@ -214,9 +215,11 @@ typedef struct { u64 pfn; } pagetable_t; 6.15 #define pagetable_from_page(pg) pagetable_from_pfn(page_to_mfn(pg)) 6.16 #define pagetable_from_paddr(p) pagetable_from_pfn((p)>>PAGE_SHIFT) 6.17 #define pagetable_null() pagetable_from_pfn(0) 6.18 -#endif 6.19 6.20 -#define clear_page(_p) memset((void *)(_p), 0, PAGE_SIZE) 6.21 +void clear_page_sse2(void *); 6.22 +#define clear_page(_p) (cpu_has_xmm2 ? \ 6.23 + clear_page_sse2((void *)(_p)) : \ 6.24 + (void)memset((void *)(_p), 0, PAGE_SIZE)) 6.25 #define copy_page(_t,_f) memcpy((void *)(_t), (void *)(_f), PAGE_SIZE) 6.26 6.27 #define mfn_valid(mfn) ((mfn) < max_page) 6.28 @@ -245,6 +248,8 @@ typedef struct { u64 pfn; } pagetable_t; 6.29 #define pfn_to_paddr(pfn) ((paddr_t)(pfn) << PAGE_SHIFT) 6.30 #define paddr_to_pfn(pa) ((unsigned long)((pa) >> PAGE_SHIFT)) 6.31 6.32 +#endif /* !defined(__ASSEMBLY__) */ 6.33 + 6.34 /* High table entries are reserved by the hypervisor. */ 6.35 #if defined(CONFIG_X86_32) && !defined(CONFIG_X86_PAE) 6.36 #define DOMAIN_ENTRIES_PER_L2_PAGETABLE \