ia64/xen-unstable

changeset 15425:79b180596baf

x86: introduce specialized clear_page()

More than doubles performance of page clearing on not too old
processors (SSE2 supported).

Signed-off-by: Jan Beulich <jbeulich@novell.com>
Signed-off-by: Keir Fraser <keir@xensource.com>
author kfraser@localhost.localdomain
date Wed Jun 20 16:18:03 2007 +0100 (2007-06-20)
parents 005dd6b1cf8e
children 799b3e4bfeac
files xen/arch/x86/Makefile xen/arch/x86/clear_page.S xen/arch/x86/domain.c xen/arch/x86/x86_64/Makefile xen/arch/x86/x86_64/mm.c xen/include/asm-x86/page.h
line diff
     1.1 --- a/xen/arch/x86/Makefile	Wed Jun 20 15:33:14 2007 +0100
     1.2 +++ b/xen/arch/x86/Makefile	Wed Jun 20 16:18:03 2007 +0100
     1.3 @@ -10,6 +10,7 @@ subdir-$(x86_64) += x86_64
     1.4  
     1.5  obj-y += apic.o
     1.6  obj-y += bitops.o
     1.7 +obj-y += clear_page.o
     1.8  obj-y += compat.o
     1.9  obj-y += delay.o
    1.10  obj-y += dmi_scan.o
     2.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     2.2 +++ b/xen/arch/x86/clear_page.S	Wed Jun 20 16:18:03 2007 +0100
     2.3 @@ -0,0 +1,26 @@
     2.4 +#include <xen/config.h>
     2.5 +#include <asm/page.h>
     2.6 +
     2.7 +#ifdef __i386__
     2.8 +#define ptr_reg %edx
     2.9 +#else
    2.10 +#define ptr_reg %rdi
    2.11 +#endif
    2.12 +
    2.13 +ENTRY(clear_page_sse2)
    2.14 +#ifdef __i386__
    2.15 +        mov     4(%esp), ptr_reg
    2.16 +#endif
    2.17 +        mov     $PAGE_SIZE/16, %ecx
    2.18 +        xor     %eax,%eax
    2.19 +
    2.20 +0:      dec     %ecx
    2.21 +        movnti  %eax, (ptr_reg)
    2.22 +        movnti  %eax, 4(ptr_reg)
    2.23 +        movnti  %eax, 8(ptr_reg)
    2.24 +        movnti  %eax, 12(ptr_reg)
    2.25 +        lea     16(ptr_reg), ptr_reg
    2.26 +        jnz     0b
    2.27 +
    2.28 +        sfence
    2.29 +        ret
     3.1 --- a/xen/arch/x86/domain.c	Wed Jun 20 15:33:14 2007 +0100
     3.2 +++ b/xen/arch/x86/domain.c	Wed Jun 20 16:18:03 2007 +0100
     3.3 @@ -151,7 +151,8 @@ int setup_arg_xlat_area(struct vcpu *v, 
     3.4          pg = alloc_domheap_page(NULL);
     3.5          if ( !pg )
     3.6              return -ENOMEM;
     3.7 -        d->arch.mm_arg_xlat_l3 = clear_page(page_to_virt(pg));
     3.8 +        d->arch.mm_arg_xlat_l3 = page_to_virt(pg);
     3.9 +        clear_page(d->arch.mm_arg_xlat_l3);
    3.10      }
    3.11  
    3.12      l4tab[l4_table_offset(COMPAT_ARG_XLAT_VIRT_BASE)] =
    3.13 @@ -444,7 +445,8 @@ int arch_domain_create(struct domain *d)
    3.14  
    3.15      if ( (pg = alloc_domheap_page(NULL)) == NULL )
    3.16          goto fail;
    3.17 -    d->arch.mm_perdomain_l2 = clear_page(page_to_virt(pg));
    3.18 +    d->arch.mm_perdomain_l2 = page_to_virt(pg);
    3.19 +    clear_page(d->arch.mm_perdomain_l2);
    3.20      for ( i = 0; i < (1 << pdpt_order); i++ )
    3.21          d->arch.mm_perdomain_l2[l2_table_offset(PERDOMAIN_VIRT_START)+i] =
    3.22              l2e_from_page(virt_to_page(d->arch.mm_perdomain_pt)+i,
    3.23 @@ -452,7 +454,8 @@ int arch_domain_create(struct domain *d)
    3.24  
    3.25      if ( (pg = alloc_domheap_page(NULL)) == NULL )
    3.26          goto fail;
    3.27 -    d->arch.mm_perdomain_l3 = clear_page(page_to_virt(pg));
    3.28 +    d->arch.mm_perdomain_l3 = page_to_virt(pg);
    3.29 +    clear_page(d->arch.mm_perdomain_l3);
    3.30      d->arch.mm_perdomain_l3[l3_table_offset(PERDOMAIN_VIRT_START)] =
    3.31          l3e_from_page(virt_to_page(d->arch.mm_perdomain_l2),
    3.32                              __PAGE_HYPERVISOR);
     4.1 --- a/xen/arch/x86/x86_64/Makefile	Wed Jun 20 15:33:14 2007 +0100
     4.2 +++ b/xen/arch/x86/x86_64/Makefile	Wed Jun 20 16:18:03 2007 +0100
     4.3 @@ -1,12 +1,12 @@
     4.4  subdir-y += compat
     4.5  
     4.6  obj-y += entry.o
     4.7 -obj-y += compat_kexec.o
     4.8  obj-y += gpr_switch.o
     4.9  obj-y += mm.o
    4.10  obj-y += traps.o
    4.11  
    4.12  obj-$(CONFIG_COMPAT) += compat.o
    4.13 +obj-$(CONFIG_COMPAT) += compat_kexec.o
    4.14  obj-$(CONFIG_COMPAT) += domain.o
    4.15  obj-$(CONFIG_COMPAT) += physdev.o
    4.16  obj-$(CONFIG_COMPAT) += platform_hypercall.o
     5.1 --- a/xen/arch/x86/x86_64/mm.c	Wed Jun 20 15:33:14 2007 +0100
     5.2 +++ b/xen/arch/x86/x86_64/mm.c	Wed Jun 20 16:18:03 2007 +0100
     5.3 @@ -106,7 +106,8 @@ void __init paging_init(void)
     5.4      /* Create user-accessible L2 directory to map the MPT for guests. */
     5.5      if ( (l2_pg = alloc_domheap_page(NULL)) == NULL )
     5.6          goto nomem;
     5.7 -    l3_ro_mpt = clear_page(page_to_virt(l2_pg));
     5.8 +    l3_ro_mpt = page_to_virt(l2_pg);
     5.9 +    clear_page(l3_ro_mpt);
    5.10      l4e_write(&idle_pg_table[l4_table_offset(RO_MPT_VIRT_START)],
    5.11                l4e_from_page(l2_pg, __PAGE_HYPERVISOR | _PAGE_USER));
    5.12  
    5.13 @@ -132,7 +133,8 @@ void __init paging_init(void)
    5.14              if ( (l2_pg = alloc_domheap_page(NULL)) == NULL )
    5.15                  goto nomem;
    5.16              va = RO_MPT_VIRT_START + (i << L2_PAGETABLE_SHIFT);
    5.17 -            l2_ro_mpt = clear_page(page_to_virt(l2_pg));
    5.18 +            l2_ro_mpt = page_to_virt(l2_pg);
    5.19 +            clear_page(l2_ro_mpt);
    5.20              l3e_write(&l3_ro_mpt[l3_table_offset(va)],
    5.21                        l3e_from_page(l2_pg, __PAGE_HYPERVISOR | _PAGE_USER));
    5.22              l2_ro_mpt += l2_table_offset(va);
    5.23 @@ -152,7 +154,8 @@ void __init paging_init(void)
    5.24          l3_ro_mpt = l4e_to_l3e(idle_pg_table[l4_table_offset(HIRO_COMPAT_MPT_VIRT_START)]);
    5.25          if ( (l2_pg = alloc_domheap_page(NULL)) == NULL )
    5.26              goto nomem;
    5.27 -        compat_idle_pg_table_l2 = l2_ro_mpt = clear_page(page_to_virt(l2_pg));
    5.28 +        compat_idle_pg_table_l2 = l2_ro_mpt = page_to_virt(l2_pg);
    5.29 +        clear_page(l2_ro_mpt);
    5.30          l3e_write(&l3_ro_mpt[l3_table_offset(HIRO_COMPAT_MPT_VIRT_START)],
    5.31                    l3e_from_page(l2_pg, __PAGE_HYPERVISOR));
    5.32          l2_ro_mpt += l2_table_offset(HIRO_COMPAT_MPT_VIRT_START);
     6.1 --- a/xen/include/asm-x86/page.h	Wed Jun 20 15:33:14 2007 +0100
     6.2 +++ b/xen/include/asm-x86/page.h	Wed Jun 20 16:18:03 2007 +0100
     6.3 @@ -192,8 +192,9 @@ static inline l4_pgentry_t l4e_from_padd
     6.4  #define pgentry_ptr_to_slot(_p)    \
     6.5      (((unsigned long)(_p) & ~PAGE_MASK) / sizeof(*(_p)))
     6.6  
     6.7 +#ifndef __ASSEMBLY__
     6.8 +
     6.9  /* Page-table type. */
    6.10 -#ifndef __ASSEMBLY__
    6.11  #if CONFIG_PAGING_LEVELS == 2
    6.12  /* x86_32 default */
    6.13  typedef struct { u32 pfn; } pagetable_t;
    6.14 @@ -214,9 +215,11 @@ typedef struct { u64 pfn; } pagetable_t;
    6.15  #define pagetable_from_page(pg) pagetable_from_pfn(page_to_mfn(pg))
    6.16  #define pagetable_from_paddr(p) pagetable_from_pfn((p)>>PAGE_SHIFT)
    6.17  #define pagetable_null()        pagetable_from_pfn(0)
    6.18 -#endif
    6.19  
    6.20 -#define clear_page(_p)      memset((void *)(_p), 0, PAGE_SIZE)
    6.21 +void clear_page_sse2(void *);
    6.22 +#define clear_page(_p)      (cpu_has_xmm2 ?                             \
    6.23 +                             clear_page_sse2((void *)(_p)) :            \
    6.24 +                             (void)memset((void *)(_p), 0, PAGE_SIZE))
    6.25  #define copy_page(_t,_f)    memcpy((void *)(_t), (void *)(_f), PAGE_SIZE)
    6.26  
    6.27  #define mfn_valid(mfn)      ((mfn) < max_page)
    6.28 @@ -245,6 +248,8 @@ typedef struct { u64 pfn; } pagetable_t;
    6.29  #define pfn_to_paddr(pfn)   ((paddr_t)(pfn) << PAGE_SHIFT)
    6.30  #define paddr_to_pfn(pa)    ((unsigned long)((pa) >> PAGE_SHIFT))
    6.31  
    6.32 +#endif /* !defined(__ASSEMBLY__) */
    6.33 +
    6.34  /* High table entries are reserved by the hypervisor. */
    6.35  #if defined(CONFIG_X86_32) && !defined(CONFIG_X86_PAE)
    6.36  #define DOMAIN_ENTRIES_PER_L2_PAGETABLE     \