direct-io.hg

changeset 6450:5978be010bec

The patched attached enables x86_64 xenlinux with "late pin, early
unpin", which is already implemented for x86_32. Since we now only pin
the root rather than any of the other levels, the overall performance
became better especially with workloads that require heavy memory
management operations.

On 8-way x86_64 xenlinux (dom0) the kernel build was improved by about
10% (using make -j32). Even a small setup like a UP HT system, I see
about 3% performance gain with kernel build (make -j4).

Lmbench also shows improvements in fork/exec/sh:
Processor, Processes - times in microseconds - smaller is better
--------------------------------------------------------------------
Host OS Mhz null null open slct sig sig fork exec sh =20
call I/O stat clos TCP inst hndl proc proc proc
--------- ------------- ---- ---- ---- ---- ---- ---- ---- ---- ----=20
Linux 2.6.12- 3786 1.13 1.36 3.93 6.04 10.5 1.43 4.33 536. 1446 3614
Linux 2.6.12- 3786 1.13 1.36 3.91 6.03 10.4 1.44 4.38 346. 1050 2831

Signed-off-by: Jun Nakajima <jun.nakajima@intel.com>
author kaf24@firebug.cl.cam.ac.uk
date Fri Aug 26 11:02:14 2005 +0000 (2005-08-26)
parents edeee85c90b1
children 2b95125015a5
files linux-2.6-xen-sparse/arch/xen/x86_64/kernel/ldt.c linux-2.6-xen-sparse/arch/xen/x86_64/mm/init.c linux-2.6-xen-sparse/arch/xen/x86_64/mm/pageattr.c linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/mmu.h linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/mmu_context.h linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/pgalloc.h linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/tlbflush.h
line diff
     1.1 --- a/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/ldt.c	Fri Aug 26 11:00:14 2005 +0000
     1.2 +++ b/linux-2.6-xen-sparse/arch/xen/x86_64/kernel/ldt.c	Fri Aug 26 11:02:14 2005 +0000
     1.3 @@ -105,14 +105,19 @@ int init_new_context(struct task_struct 
     1.4  	struct mm_struct * old_mm;
     1.5  	int retval = 0;
     1.6  
     1.7 +	memset(&mm->context, 0, sizeof(mm->context));
     1.8  	init_MUTEX(&mm->context.sem);
     1.9 -	mm->context.size = 0;
    1.10  	old_mm = current->mm;
    1.11  	if (old_mm && old_mm->context.size > 0) {
    1.12  		down(&old_mm->context.sem);
    1.13  		retval = copy_ldt(&mm->context, &old_mm->context);
    1.14  		up(&old_mm->context.sem);
    1.15  	}
    1.16 +	if (retval == 0) {
    1.17 +		spin_lock(&mm_unpinned_lock);
    1.18 +		list_add(&mm->context.unpinned, &mm_unpinned);
    1.19 +		spin_unlock(&mm_unpinned_lock);
    1.20 +	}
    1.21  	return retval;
    1.22  }
    1.23  
    1.24 @@ -134,6 +139,11 @@ void destroy_context(struct mm_struct *m
    1.25  			kfree(mm->context.ldt);
    1.26  		mm->context.size = 0;
    1.27  	}
    1.28 +	if (!mm->context.pinned) {
    1.29 +		spin_lock(&mm_unpinned_lock);
    1.30 +		list_del(&mm->context.unpinned);
    1.31 +		spin_unlock(&mm_unpinned_lock);
    1.32 +	}
    1.33  }
    1.34  
    1.35  static int read_ldt(void __user * ptr, unsigned long bytecount)
     2.1 --- a/linux-2.6-xen-sparse/arch/xen/x86_64/mm/init.c	Fri Aug 26 11:00:14 2005 +0000
     2.2 +++ b/linux-2.6-xen-sparse/arch/xen/x86_64/mm/init.c	Fri Aug 26 11:02:14 2005 +0000
     2.3 @@ -712,6 +712,7 @@ void __init paging_init(void)
     2.4          HYPERVISOR_shared_info = (shared_info_t *)fix_to_virt(FIX_SHARED_INFO);
     2.5  
     2.6          memset(empty_zero_page, 0, sizeof(empty_zero_page));
     2.7 +	init_mm.context.pinned = 1;
     2.8  
     2.9  #ifdef CONFIG_XEN_PHYSDEV_ACCESS
    2.10  	{
     3.1 --- a/linux-2.6-xen-sparse/arch/xen/x86_64/mm/pageattr.c	Fri Aug 26 11:00:14 2005 +0000
     3.2 +++ b/linux-2.6-xen-sparse/arch/xen/x86_64/mm/pageattr.c	Fri Aug 26 11:02:14 2005 +0000
     3.3 @@ -12,19 +12,145 @@
     3.4  #include <asm/uaccess.h>
     3.5  #include <asm/processor.h>
     3.6  #include <asm/tlbflush.h>
     3.7 +#include <asm/io.h>
     3.8 +
     3.9 +#ifdef CONFIG_XEN
    3.10  #include <asm/pgalloc.h>
    3.11 -#include <asm/io.h>
    3.12 +#include <asm/mmu_context.h>
    3.13 +
    3.14 +LIST_HEAD(mm_unpinned);
    3.15 +DEFINE_SPINLOCK(mm_unpinned_lock);
    3.16 +
    3.17 +static inline void mm_walk_set_prot(void *pt, pgprot_t flags)
    3.18 +{
    3.19 +	struct page *page = virt_to_page(pt);
    3.20 +	unsigned long pfn = page_to_pfn(page);
    3.21 +
    3.22 +	BUG_ON(HYPERVISOR_update_va_mapping(
    3.23 +		       (unsigned long)__va(pfn << PAGE_SHIFT),
    3.24 +		       pfn_pte(pfn, flags), 0));
    3.25 +}
    3.26 +
    3.27 +static void mm_walk(struct mm_struct *mm, pgprot_t flags)
    3.28 +{
    3.29 +	pgd_t       *pgd;
    3.30 +	pud_t       *pud;
    3.31 +	pmd_t       *pmd;
    3.32 +	pte_t       *pte;
    3.33 +	int          g,u,m;
    3.34 +
    3.35 +	pgd = mm->pgd;
    3.36 +	for (g = 0; g <= USER_PTRS_PER_PGD; g++, pgd++) {
    3.37 +		if (pgd_none(*pgd))
    3.38 +			continue;
    3.39 +		pud = pud_offset(pgd, 0);
    3.40 +		if (PTRS_PER_PUD > 1) /* not folded */ 
    3.41 +			mm_walk_set_prot(pud,flags);
    3.42 +		for (u = 0; u < PTRS_PER_PUD; u++, pud++) {
    3.43 +			if (pud_none(*pud))
    3.44 +				continue;
    3.45 +			pmd = pmd_offset(pud, 0);
    3.46 +			if (PTRS_PER_PMD > 1) /* not folded */ 
    3.47 +				mm_walk_set_prot(pmd,flags);
    3.48 +			for (m = 0; m < PTRS_PER_PMD; m++, pmd++) {
    3.49 +				if (pmd_none(*pmd))
    3.50 +					continue;
    3.51 +				pte = pte_offset_kernel(pmd,0);
    3.52 +				mm_walk_set_prot(pte,flags);
    3.53 +			}
    3.54 +		}
    3.55 +	}
    3.56 +}
    3.57 +
    3.58 +void mm_pin(struct mm_struct *mm)
    3.59 +{
    3.60 +	spin_lock(&mm->page_table_lock);
    3.61 +
    3.62 +	mm_walk(mm, PAGE_KERNEL_RO);
    3.63 +	BUG_ON(HYPERVISOR_update_va_mapping(
    3.64 +		       (unsigned long)mm->pgd,
    3.65 +		       pfn_pte(virt_to_phys(mm->pgd)>>PAGE_SHIFT, PAGE_KERNEL_RO),
    3.66 +		       UVMF_TLB_FLUSH));
    3.67 +	BUG_ON(HYPERVISOR_update_va_mapping(
    3.68 +		       (unsigned long)__user_pgd(mm->pgd),
    3.69 +		       pfn_pte(virt_to_phys(__user_pgd(mm->pgd))>>PAGE_SHIFT, PAGE_KERNEL_RO),
    3.70 +		       UVMF_TLB_FLUSH));
    3.71 +	xen_pgd_pin(__pa(mm->pgd)); /* kernel */
    3.72 +	xen_pgd_pin(__pa(__user_pgd(mm->pgd))); /* user */
    3.73 +	mm->context.pinned = 1;
    3.74 +	spin_lock(&mm_unpinned_lock);
    3.75 +	list_del(&mm->context.unpinned);
    3.76 +	spin_unlock(&mm_unpinned_lock);
    3.77 +
    3.78 +	spin_unlock(&mm->page_table_lock);
    3.79 +}
    3.80 +
    3.81 +void mm_unpin(struct mm_struct *mm)
    3.82 +{
    3.83 +	spin_lock(&mm->page_table_lock);
    3.84 +
    3.85 +	xen_pgd_unpin(__pa(mm->pgd));
    3.86 +	xen_pgd_unpin(__pa(__user_pgd(mm->pgd)));
    3.87 +	BUG_ON(HYPERVISOR_update_va_mapping(
    3.88 +		       (unsigned long)mm->pgd,
    3.89 +		       pfn_pte(virt_to_phys(mm->pgd)>>PAGE_SHIFT, PAGE_KERNEL), 0));
    3.90 +	BUG_ON(HYPERVISOR_update_va_mapping(
    3.91 +		       (unsigned long)__user_pgd(mm->pgd),
    3.92 +		       pfn_pte(virt_to_phys(__user_pgd(mm->pgd))>>PAGE_SHIFT, PAGE_KERNEL), 0));
    3.93 +	mm_walk(mm, PAGE_KERNEL);
    3.94 +	xen_tlb_flush();
    3.95 +	mm->context.pinned = 0;
    3.96 +	spin_lock(&mm_unpinned_lock);
    3.97 +	list_add(&mm->context.unpinned, &mm_unpinned);
    3.98 +	spin_unlock(&mm_unpinned_lock);
    3.99 +
   3.100 +	spin_unlock(&mm->page_table_lock);
   3.101 +}
   3.102 +
   3.103 +void mm_pin_all(void)
   3.104 +{
   3.105 +	while (!list_empty(&mm_unpinned))	
   3.106 +		mm_pin(list_entry(mm_unpinned.next, struct mm_struct,
   3.107 +				  context.unpinned));
   3.108 +}
   3.109 +
   3.110 +void _arch_exit_mmap(struct mm_struct *mm)
   3.111 +{
   3.112 +    struct task_struct *tsk = current;
   3.113 +
   3.114 +    task_lock(tsk);
   3.115 +
   3.116 +    /*
   3.117 +     * We aggressively remove defunct pgd from cr3. We execute unmap_vmas()
   3.118 +     * *much* faster this way, as no tlb flushes means bigger wrpt batches.
   3.119 +     */
   3.120 +    if ( tsk->active_mm == mm )
   3.121 +    {
   3.122 +        tsk->active_mm = &init_mm;
   3.123 +        atomic_inc(&init_mm.mm_count);
   3.124 +
   3.125 +        switch_mm(mm, &init_mm, tsk);
   3.126 +
   3.127 +        atomic_dec(&mm->mm_count);
   3.128 +        BUG_ON(atomic_read(&mm->mm_count) == 0);
   3.129 +    }
   3.130 +
   3.131 +    task_unlock(tsk);
   3.132 +
   3.133 +    if ( mm->context.pinned && (atomic_read(&mm->mm_count) == 1) )
   3.134 +        mm_unpin(mm);
   3.135 +}
   3.136  
   3.137  void pte_free(struct page *pte)
   3.138  {
   3.139 -        pte_t *ptep;
   3.140 -
   3.141 -        ptep = pfn_to_kaddr(page_to_pfn(pte));
   3.142 +	unsigned long va = (unsigned long)__va(page_to_pfn(pte)<<PAGE_SHIFT);
   3.143  
   3.144 -        xen_pte_unpin(__pa(ptep));
   3.145 -        make_page_writable(ptep);
   3.146 -	__free_page(pte); 
   3.147 +	if (!pte_write(*virt_to_ptep(va)))
   3.148 +		BUG_ON(HYPERVISOR_update_va_mapping(
   3.149 +			va, pfn_pte(page_to_pfn(pte), PAGE_KERNEL), 0));
   3.150 +	__free_page(pte);
   3.151  }
   3.152 +#endif	/* CONFIG_XEN */
   3.153  
   3.154  static inline pte_t *lookup_address(unsigned long address) 
   3.155  { 
   3.156 @@ -78,7 +204,7 @@ static void flush_kernel_map(void *addre
   3.157  	} else
   3.158  		asm volatile("wbinvd":::"memory"); 
   3.159  	if (address)
   3.160 -                __flush_tlb_one((unsigned long) address);
   3.161 +		__flush_tlb_one(address);
   3.162  	else
   3.163  		__flush_tlb_all();
   3.164  }
   3.165 @@ -166,14 +292,17 @@ static int
   3.166  		BUG();
   3.167  
   3.168  	/* on x86-64 the direct mapping set at boot is not using 4k pages */
   3.169 -// 	BUG_ON(PageReserved(kpte_page));
   3.170  	/*
   3.171  	 * ..., but the XEN guest kernels (currently) do:
   3.172  	 * If the pte was reserved, it means it was created at boot
   3.173  	 * time (not via split_large_page) and in turn we must not
   3.174  	 * replace it with a large page.
   3.175  	 */
   3.176 -	if (!PageReserved(kpte_page)) {
   3.177 +#ifndef CONFIG_XEN
   3.178 + 	BUG_ON(PageReserved(kpte_page));
   3.179 +#else
   3.180 +	if (!PageReserved(kpte_page))
   3.181 +#endif
   3.182  		switch (page_count(kpte_page)) {
   3.183  		case 1:
   3.184  			save_page(address, kpte_page); 		     
   3.185 @@ -182,7 +311,6 @@ static int
   3.186  		case 0:
   3.187  			BUG(); /* memleak and failed 2M page regeneration */
   3.188  	 	}
   3.189 -	}
   3.190  	return 0;
   3.191  } 
   3.192  
     4.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     4.2 +++ b/linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/mmu.h	Fri Aug 26 11:02:14 2005 +0000
     4.3 @@ -0,0 +1,33 @@
     4.4 +#ifndef __x86_64_MMU_H
     4.5 +#define __x86_64_MMU_H
     4.6 +
     4.7 +#include <linux/spinlock.h>
     4.8 +#include <asm/semaphore.h>
     4.9 +
    4.10 +/*
    4.11 + * The x86_64 doesn't have a mmu context, but
    4.12 + * we put the segment information here.
    4.13 + *
    4.14 + * cpu_vm_mask is used to optimize ldt flushing.
    4.15 + */
    4.16 +typedef struct { 
    4.17 +	void *ldt;
    4.18 +	rwlock_t ldtlock; 
    4.19 +	int size;
    4.20 +	struct semaphore sem; 
    4.21 +#ifdef CONFIG_XEN
    4.22 +	unsigned pinned:1;
    4.23 +	struct list_head unpinned;
    4.24 +#endif
    4.25 +} mm_context_t;
    4.26 +
    4.27 +#ifdef CONFIG_XEN
    4.28 +extern struct list_head mm_unpinned;
    4.29 +extern spinlock_t mm_unpinned_lock;
    4.30 +
    4.31 +/* mm/memory.c:exit_mmap hook */
    4.32 +extern void _arch_exit_mmap(struct mm_struct *mm);
    4.33 +#define arch_exit_mmap(_mm) _arch_exit_mmap(_mm)
    4.34 +#endif
    4.35 +
    4.36 +#endif
     5.1 --- a/linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/mmu_context.h	Fri Aug 26 11:00:14 2005 +0000
     5.2 +++ b/linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/mmu_context.h	Fri Aug 26 11:02:14 2005 +0000
     5.3 @@ -58,6 +58,9 @@ static inline void __prepare_arch_switch
     5.4  	}
     5.5  }
     5.6  
     5.7 +extern void mm_pin(struct mm_struct *mm);
     5.8 +extern void mm_unpin(struct mm_struct *mm);
     5.9 +void mm_pin_all(void);
    5.10  
    5.11  static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, 
    5.12  			     struct task_struct *tsk)
    5.13 @@ -66,6 +69,9 @@ static inline void switch_mm(struct mm_s
    5.14  	struct mmuext_op _op[3], *op = _op;
    5.15  
    5.16  	if (likely(prev != next)) {
    5.17 +		if (!next->context.pinned)
    5.18 +			mm_pin(next);
    5.19 +
    5.20  		/* stop flush ipis for the previous mm */
    5.21  		clear_bit(cpu, &prev->cpu_vm_mask);
    5.22  #if 0  /* XEN: no lazy tlb */
     6.1 --- a/linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/pgalloc.h	Fri Aug 26 11:00:14 2005 +0000
     6.2 +++ b/linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/pgalloc.h	Fri Aug 26 11:02:14 2005 +0000
     6.3 @@ -21,12 +21,27 @@ static inline void pmd_populate_kernel(s
     6.4  
     6.5  static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd, struct page *pte)
     6.6  {
     6.7 -	set_pmd(pmd, __pmd(_PAGE_TABLE | (page_to_pfn(pte) << PAGE_SHIFT)));
     6.8 +	if (unlikely((mm)->context.pinned)) {
     6.9 +		BUG_ON(HYPERVISOR_update_va_mapping(
    6.10 +			       (unsigned long)__va(page_to_pfn(pte) << PAGE_SHIFT),
    6.11 +			       pfn_pte(page_to_pfn(pte), PAGE_KERNEL_RO), 0));
    6.12 +		set_pmd(pmd, __pmd(_PAGE_TABLE | (page_to_pfn(pte) << PAGE_SHIFT)));
    6.13 +	} else {
    6.14 +		*(pmd) = __pmd(_PAGE_TABLE | (page_to_pfn(pte) << PAGE_SHIFT));
    6.15 +	}
    6.16  }
    6.17  
    6.18  static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd)
    6.19  {
    6.20 -	set_pud(pud, __pud(_PAGE_TABLE | __pa(pmd)));
    6.21 +	if (unlikely((mm)->context.pinned)) {
    6.22 +		BUG_ON(HYPERVISOR_update_va_mapping(
    6.23 +			       (unsigned long)pmd,
    6.24 +			       pfn_pte(virt_to_phys(pmd)>>PAGE_SHIFT, 
    6.25 +				       PAGE_KERNEL_RO), 0));
    6.26 +		set_pud(pud, __pud(_PAGE_TABLE | __pa(pmd)));
    6.27 +	} else {
    6.28 +		*(pud) =  __pud(_PAGE_TABLE | __pa(pmd));
    6.29 +	}
    6.30  }
    6.31  
    6.32  /*
    6.33 @@ -35,53 +50,54 @@ static inline void pud_populate(struct m
    6.34   */
    6.35  static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, pud_t *pud)
    6.36  {
    6.37 -        set_pgd(pgd, __pgd(_PAGE_TABLE | __pa(pud)));
    6.38 -        set_pgd(__user_pgd(pgd), __pgd(_PAGE_TABLE | __pa(pud)));
    6.39 -}
    6.40 -
    6.41 -extern __inline__ pmd_t *get_pmd(void)
    6.42 -{
    6.43 -        pmd_t *pmd = (pmd_t *)get_zeroed_page(GFP_KERNEL);
    6.44 -        if (!pmd)
    6.45 -		return NULL;
    6.46 -        make_page_readonly(pmd);
    6.47 -        xen_pmd_pin(__pa(pmd));
    6.48 -	return pmd;
    6.49 +	if (unlikely((mm)->context.pinned)) {
    6.50 +		BUG_ON(HYPERVISOR_update_va_mapping(
    6.51 +			       (unsigned long)pud,
    6.52 +			       pfn_pte(virt_to_phys(pud)>>PAGE_SHIFT, 
    6.53 +				       PAGE_KERNEL_RO), 0));
    6.54 +		set_pgd(pgd, __pgd(_PAGE_TABLE | __pa(pud)));
    6.55 +		set_pgd(__user_pgd(pgd), __pgd(_PAGE_TABLE | __pa(pud)));
    6.56 +	} else {
    6.57 +		*(pgd) =  __pgd(_PAGE_TABLE | __pa(pud));
    6.58 +		*(__user_pgd(pgd)) = *(pgd);
    6.59 +	}
    6.60  }
    6.61  
    6.62  extern __inline__ void pmd_free(pmd_t *pmd)
    6.63  {
    6.64 -	BUG_ON((unsigned long)pmd & (PAGE_SIZE-1));
    6.65 -        xen_pmd_unpin(__pa(pmd));
    6.66 -        make_page_writable(pmd);
    6.67 +	pte_t *ptep = virt_to_ptep(pmd);
    6.68 +
    6.69 +	if (!pte_write(*ptep)) {
    6.70 +		BUG_ON(HYPERVISOR_update_va_mapping(
    6.71 +			(unsigned long)pmd,
    6.72 +			pfn_pte(virt_to_phys(pmd)>>PAGE_SHIFT, PAGE_KERNEL),
    6.73 +			0));
    6.74 +	}
    6.75  	free_page((unsigned long)pmd);
    6.76  }
    6.77  
    6.78  static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr)
    6.79  {
    6.80          pmd_t *pmd = (pmd_t *) get_zeroed_page(GFP_KERNEL|__GFP_REPEAT);
    6.81 -        if (!pmd)
    6.82 -		return NULL;
    6.83 -        make_page_readonly(pmd);
    6.84 -        xen_pmd_pin(__pa(pmd)); 
    6.85          return pmd;
    6.86  }
    6.87  
    6.88  static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr)
    6.89  {
    6.90          pud_t *pud = (pud_t *) get_zeroed_page(GFP_KERNEL|__GFP_REPEAT);
    6.91 -        if (!pud)
    6.92 -		return NULL;
    6.93 -        make_page_readonly(pud);
    6.94 -        xen_pud_pin(__pa(pud)); 
    6.95          return pud;
    6.96  }
    6.97  
    6.98  static inline void pud_free(pud_t *pud)
    6.99  {
   6.100 -	BUG_ON((unsigned long)pud & (PAGE_SIZE-1));
   6.101 -        xen_pud_unpin(__pa(pud));
   6.102 -        make_page_writable(pud);
   6.103 +	pte_t *ptep = virt_to_ptep(pud);
   6.104 +
   6.105 +	if (!pte_write(*ptep)) {
   6.106 +		BUG_ON(HYPERVISOR_update_va_mapping(
   6.107 +			(unsigned long)pud,
   6.108 +			pfn_pte(virt_to_phys(pud)>>PAGE_SHIFT, PAGE_KERNEL),
   6.109 +			0));
   6.110 +	}
   6.111  	free_page((unsigned long)pud);
   6.112  }
   6.113  
   6.114 @@ -107,10 +123,6 @@ static inline pgd_t *pgd_alloc(struct mm
   6.115  	       (PTRS_PER_PGD - boundary) * sizeof(pgd_t));
   6.116  
   6.117  	memset(__user_pgd(pgd), 0, PAGE_SIZE); /* clean up user pgd */
   6.118 -        make_pages_readonly(pgd, 2);
   6.119 -
   6.120 -        xen_pgd_pin(__pa(pgd)); /* kernel */
   6.121 -        xen_pgd_pin(__pa(__user_pgd(pgd))); /* user */
   6.122          /*
   6.123           * Set level3_user_pgt for vsyscall area
   6.124           */
   6.125 @@ -121,31 +133,45 @@ static inline pgd_t *pgd_alloc(struct mm
   6.126  
   6.127  static inline void pgd_free(pgd_t *pgd)
   6.128  {
   6.129 -	BUG_ON((unsigned long)pgd & (PAGE_SIZE-1));
   6.130 -        xen_pgd_unpin(__pa(pgd));
   6.131 -        xen_pgd_unpin(__pa(__user_pgd(pgd)));
   6.132 -        make_pages_writable(pgd, 2);
   6.133 +	pte_t *ptep = virt_to_ptep(pgd);
   6.134 +
   6.135 +	if (!pte_write(*ptep)) {
   6.136 +		xen_pgd_unpin(__pa(pgd));
   6.137 +		BUG_ON(HYPERVISOR_update_va_mapping(
   6.138 +			       (unsigned long)pgd,
   6.139 +			       pfn_pte(virt_to_phys(pgd)>>PAGE_SHIFT, PAGE_KERNEL),
   6.140 +			       0));
   6.141 +	}
   6.142 +
   6.143 +	ptep = virt_to_ptep(__user_pgd(pgd));
   6.144 +
   6.145 +	if (!pte_write(*ptep)) {
   6.146 +		xen_pgd_unpin(__pa(__user_pgd(pgd)));
   6.147 +		BUG_ON(HYPERVISOR_update_va_mapping(
   6.148 +			       (unsigned long)__user_pgd(pgd),
   6.149 +			       pfn_pte(virt_to_phys(__user_pgd(pgd))>>PAGE_SHIFT, 
   6.150 +				       PAGE_KERNEL),
   6.151 +			       0));
   6.152 +	}
   6.153 +
   6.154  	free_pages((unsigned long)pgd, 1);
   6.155  }
   6.156  
   6.157  static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
   6.158  {
   6.159          pte_t *pte = (pte_t *)get_zeroed_page(GFP_KERNEL|__GFP_REPEAT);
   6.160 -        if (!pte)
   6.161 -		return NULL;
   6.162 -        make_page_readonly(pte);
   6.163 -        xen_pte_pin(__pa(pte));
   6.164 +        if (pte)
   6.165 +		make_page_readonly(pte);
   6.166 +
   6.167  	return pte;
   6.168  }
   6.169  
   6.170  static inline struct page *pte_alloc_one(struct mm_struct *mm, unsigned long address)
   6.171  {
   6.172 -	pte_t *pte = (void *)get_zeroed_page(GFP_KERNEL|__GFP_REPEAT);
   6.173 -	if (!pte)
   6.174 -		return NULL;
   6.175 -        make_page_readonly(pte);
   6.176 -        xen_pte_pin(__pa(pte));
   6.177 -	return virt_to_page((unsigned long)pte);
   6.178 +	struct page *pte;
   6.179 +
   6.180 +	pte = alloc_pages(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO, 0);
   6.181 +	return pte;
   6.182  }
   6.183  
   6.184  /* Should really implement gc for free page table pages. This could be
     7.1 --- a/linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/tlbflush.h	Fri Aug 26 11:00:14 2005 +0000
     7.2 +++ b/linux-2.6-xen-sparse/include/asm-xen/asm-x86_64/tlbflush.h	Fri Aug 26 11:02:14 2005 +0000
     7.3 @@ -18,7 +18,7 @@ extern unsigned long pgkern_mask;
     7.4  
     7.5  #define __flush_tlb_all() __flush_tlb_global()
     7.6  
     7.7 -#define __flush_tlb_one(addr)	xen_invlpg(addr)
     7.8 +#define __flush_tlb_one(addr)	xen_invlpg((unsigned long)addr)
     7.9  
    7.10  
    7.11  /*