ia64/xen-unstable

changeset 8046:b0338759544e

Big reworking of SHARED_KERNEL_PMD logic. Includes several
bug fixes for PAE, and reverts my previous changeset that
broke non-pae.

Signed-off-by: Keir Fraser <keir@xensource.com>
author kaf24@firebug.cl.cam.ac.uk
date Thu Nov 24 23:21:48 2005 +0100 (2005-11-24)
parents 78b5e590be34
children cb215a84d1af
files linux-2.6-xen-sparse/arch/xen/i386/mm/init.c linux-2.6-xen-sparse/arch/xen/i386/mm/pgtable.c patches/linux-2.6.12/pmd-shared.patch
line diff
     1.1 --- a/linux-2.6-xen-sparse/arch/xen/i386/mm/init.c	Thu Nov 24 19:57:01 2005 +0000
     1.2 +++ b/linux-2.6-xen-sparse/arch/xen/i386/mm/init.c	Thu Nov 24 23:21:48 2005 +0100
     1.3 @@ -708,7 +708,7 @@ void __init pgtable_cache_init(void)
     1.4  			panic("pgtable_cache_init(): cannot create pmd cache");
     1.5  	}
     1.6  	pgd_cache = kmem_cache_create("pgd",
     1.7 -#if 0 /* How the heck _this_ works in native linux ??? */
     1.8 +#ifndef CONFIG_XEN
     1.9  				PTRS_PER_PGD*sizeof(pgd_t),
    1.10  				PTRS_PER_PGD*sizeof(pgd_t),
    1.11  #else
    1.12 @@ -717,7 +717,7 @@ void __init pgtable_cache_init(void)
    1.13  #endif
    1.14  				0,
    1.15  				pgd_ctor,
    1.16 -				pgd_dtor);
    1.17 +				PTRS_PER_PMD == 1 ? pgd_dtor : NULL);
    1.18  	if (!pgd_cache)
    1.19  		panic("pgtable_cache_init(): Cannot create pgd cache");
    1.20  }
     2.1 --- a/linux-2.6-xen-sparse/arch/xen/i386/mm/pgtable.c	Thu Nov 24 19:57:01 2005 +0000
     2.2 +++ b/linux-2.6-xen-sparse/arch/xen/i386/mm/pgtable.c	Thu Nov 24 23:21:48 2005 +0100
     2.3 @@ -28,8 +28,6 @@
     2.4  #include <asm/hypervisor.h>
     2.5  
     2.6  static void pgd_test_and_unpin(pgd_t *pgd);
     2.7 -#define suspend_disable	preempt_disable
     2.8 -#define suspend_enable	preempt_enable
     2.9  
    2.10  void show_mem(void)
    2.11  {
    2.12 @@ -279,26 +277,31 @@ void pgd_ctor(void *pgd, kmem_cache_t *c
    2.13  {
    2.14  	unsigned long flags;
    2.15  
    2.16 -#ifdef CONFIG_X86_PAE
    2.17 -	/* Ensure pgd resides below 4GB. */
    2.18 -	int rc = xen_create_contiguous_region((unsigned long)pgd, 0, 32);
    2.19 -	BUG_ON(rc);
    2.20 +	if (PTRS_PER_PMD > 1) {
    2.21 +#ifdef CONFIG_XEN
    2.22 +		/* Ensure pgd resides below 4GB. */
    2.23 +		int rc = xen_create_contiguous_region(
    2.24 +			(unsigned long)pgd, 0, 32);
    2.25 +		BUG_ON(rc);
    2.26  #endif
    2.27 -
    2.28 -	if (HAVE_SHARED_KERNEL_PMD) {
    2.29 +		if (HAVE_SHARED_KERNEL_PMD)
    2.30 +			memcpy((pgd_t *)pgd + USER_PTRS_PER_PGD,
    2.31 +			       swapper_pg_dir, sizeof(pgd_t));
    2.32 +	} else {
    2.33 +		if (!HAVE_SHARED_KERNEL_PMD)
    2.34 +			spin_lock_irqsave(&pgd_lock, flags);
    2.35  		memcpy((pgd_t *)pgd + USER_PTRS_PER_PGD,
    2.36  		       swapper_pg_dir + USER_PTRS_PER_PGD,
    2.37  		       (PTRS_PER_PGD - USER_PTRS_PER_PGD) * sizeof(pgd_t));
    2.38 -		return;
    2.39 +		memset(pgd, 0, USER_PTRS_PER_PGD*sizeof(pgd_t));
    2.40 +		if (!HAVE_SHARED_KERNEL_PMD) {
    2.41 +			pgd_list_add(pgd);
    2.42 +			spin_unlock_irqrestore(&pgd_lock, flags);
    2.43 +		}
    2.44  	}
    2.45 -
    2.46 -	memset(pgd, 0, PTRS_PER_PGD*sizeof(pgd_t));
    2.47 -
    2.48 -	spin_lock_irqsave(&pgd_lock, flags);
    2.49 -	pgd_list_add(pgd);
    2.50 -	spin_unlock_irqrestore(&pgd_lock, flags);
    2.51  }
    2.52  
    2.53 +/* never called when PTRS_PER_PMD > 1 */
    2.54  void pgd_dtor(void *pgd, kmem_cache_t *cache, unsigned long unused)
    2.55  {
    2.56  	unsigned long flags; /* can be called from interrupt context */
    2.57 @@ -315,7 +318,7 @@ void pgd_dtor(void *pgd, kmem_cache_t *c
    2.58  
    2.59  pgd_t *pgd_alloc(struct mm_struct *mm)
    2.60  {
    2.61 -	int i = 0;
    2.62 +	int i;
    2.63  	pgd_t *pgd = kmem_cache_alloc(pgd_cache, GFP_KERNEL);
    2.64  
    2.65  	pgd_test_and_unpin(pgd);
    2.66 @@ -323,34 +326,31 @@ pgd_t *pgd_alloc(struct mm_struct *mm)
    2.67  	if (PTRS_PER_PMD == 1 || !pgd)
    2.68  		return pgd;
    2.69  
    2.70 +	for (i = 0; i < USER_PTRS_PER_PGD; ++i) {
    2.71 +		pmd_t *pmd = kmem_cache_alloc(pmd_cache, GFP_KERNEL);
    2.72 +		if (!pmd)
    2.73 +			goto out_oom;
    2.74 +		set_pgd(&pgd[i], __pgd(1 + __pa(pmd)));
    2.75 +	}
    2.76 +
    2.77  	if (!HAVE_SHARED_KERNEL_PMD) {
    2.78 -		/* alloc and copy kernel pmd */
    2.79  		unsigned long flags;
    2.80  		pgd_t *copy_pgd = pgd_offset_k(PAGE_OFFSET);
    2.81  		pud_t *copy_pud = pud_offset(copy_pgd, PAGE_OFFSET);
    2.82  		pmd_t *copy_pmd = pmd_offset(copy_pud, PAGE_OFFSET);
    2.83  		pmd_t *pmd = kmem_cache_alloc(pmd_cache, GFP_KERNEL);
    2.84 -		if (0 == pmd)
    2.85 +		++i;
    2.86 +		if (!pmd)
    2.87  			goto out_oom;
    2.88  
    2.89  		spin_lock_irqsave(&pgd_lock, flags);
    2.90  		memcpy(pmd, copy_pmd, PAGE_SIZE);
    2.91 -		spin_unlock_irqrestore(&pgd_lock, flags);
    2.92  		make_lowmem_page_readonly(pmd);
    2.93  		set_pgd(&pgd[USER_PTRS_PER_PGD], __pgd(1 + __pa(pmd)));
    2.94 +		pgd_list_add(pgd);
    2.95 +		spin_unlock_irqrestore(&pgd_lock, flags);
    2.96  	}
    2.97  
    2.98 -	/* alloc user pmds */
    2.99 -	for (i = 0; i < USER_PTRS_PER_PGD; ++i) {
   2.100 -		pmd_t *pmd = kmem_cache_alloc(pmd_cache, GFP_KERNEL);
   2.101 -		if (!pmd)
   2.102 -			goto out_oom;
   2.103 -		suspend_disable();
   2.104 -		if (test_bit(PG_pinned, &virt_to_page(pgd)->flags))
   2.105 -			make_lowmem_page_readonly(pmd);
   2.106 -		set_pgd(&pgd[i], __pgd(1 + __pa(pmd)));
   2.107 -		suspend_enable();
   2.108 -	}
   2.109  	return pgd;
   2.110  
   2.111  out_oom:
   2.112 @@ -364,28 +364,25 @@ void pgd_free(pgd_t *pgd)
   2.113  {
   2.114  	int i;
   2.115  
   2.116 -	suspend_disable();
   2.117  	pgd_test_and_unpin(pgd);
   2.118  
   2.119  	/* in the PAE case user pgd entries are overwritten before usage */
   2.120  	if (PTRS_PER_PMD > 1) {
   2.121  		for (i = 0; i < USER_PTRS_PER_PGD; ++i) {
   2.122  			pmd_t *pmd = (void *)__va(pgd_val(pgd[i])-1);
   2.123 -			set_pgd(&pgd[i], __pgd(0));
   2.124 -			make_lowmem_page_writable(pmd);
   2.125  			kmem_cache_free(pmd_cache, pmd);
   2.126  		}
   2.127  		if (!HAVE_SHARED_KERNEL_PMD) {
   2.128 +			unsigned long flags;
   2.129  			pmd_t *pmd = (void *)__va(pgd_val(pgd[USER_PTRS_PER_PGD])-1);
   2.130 -			set_pgd(&pgd[USER_PTRS_PER_PGD], __pgd(0));
   2.131 +			spin_lock_irqsave(&pgd_lock, flags);
   2.132 +			pgd_list_del(pgd);
   2.133 +			spin_unlock_irqrestore(&pgd_lock, flags);
   2.134  			make_lowmem_page_writable(pmd);
   2.135  			memset(pmd, 0, PTRS_PER_PMD*sizeof(pmd_t));
   2.136  			kmem_cache_free(pmd_cache, pmd);
   2.137  		}
   2.138  	}
   2.139 -
   2.140 -	suspend_enable();
   2.141 -
   2.142  	/* in the non-PAE case, free_pgtables() clears user pgd entries */
   2.143  	kmem_cache_free(pgd_cache, pgd);
   2.144  }
   2.145 @@ -510,9 +507,6 @@ static void pgd_walk(pgd_t *pgd_base, pg
   2.146  
   2.147  static void __pgd_pin(pgd_t *pgd)
   2.148  {
   2.149 -	/* PAE PGDs with no kernel PMD cannot be pinned. Bail right now. */
   2.150 -	if ((PTRS_PER_PMD > 1) && pgd_none(pgd[USER_PTRS_PER_PGD]))
   2.151 -		return;
   2.152  	pgd_walk(pgd, PAGE_KERNEL_RO);
   2.153  	xen_pgd_pin(__pa(pgd));
   2.154  	set_bit(PG_pinned, &virt_to_page(pgd)->flags);
   2.155 @@ -527,10 +521,8 @@ static void __pgd_unpin(pgd_t *pgd)
   2.156  
   2.157  static void pgd_test_and_unpin(pgd_t *pgd)
   2.158  {
   2.159 -	suspend_disable();
   2.160  	if (test_bit(PG_pinned, &virt_to_page(pgd)->flags))
   2.161  		__pgd_unpin(pgd);
   2.162 -	suspend_enable();
   2.163  }
   2.164  
   2.165  void mm_pin(struct mm_struct *mm)
     3.1 --- a/patches/linux-2.6.12/pmd-shared.patch	Thu Nov 24 19:57:01 2005 +0000
     3.2 +++ b/patches/linux-2.6.12/pmd-shared.patch	Thu Nov 24 23:21:48 2005 +0100
     3.3 @@ -1,15 +1,3 @@
     3.4 -diff -urNpP linux-2.6.12/arch/i386/mm/init.c linux-2.6.12.new/arch/i386/mm/init.c
     3.5 ---- linux-2.6.12/arch/i386/mm/init.c	2005-06-17 20:48:29.000000000 +0100
     3.6 -+++ linux-2.6.12.new/arch/i386/mm/init.c	2005-07-11 16:28:09.778165582 +0100
     3.7 -@@ -634,7 +634,7 @@ void __init pgtable_cache_init(void)
     3.8 - 				PTRS_PER_PGD*sizeof(pgd_t),
     3.9 - 				0,
    3.10 - 				pgd_ctor,
    3.11 --				PTRS_PER_PMD == 1 ? pgd_dtor : NULL);
    3.12 -+				pgd_dtor);
    3.13 - 	if (!pgd_cache)
    3.14 - 		panic("pgtable_cache_init(): Cannot create pgd cache");
    3.15 - }
    3.16  diff -urNpP linux-2.6.12/arch/i386/mm/pageattr.c linux-2.6.12.new/arch/i386/mm/pageattr.c
    3.17  --- linux-2.6.12/arch/i386/mm/pageattr.c	2005-06-17 20:48:29.000000000 +0100
    3.18  +++ linux-2.6.12.new/arch/i386/mm/pageattr.c	2005-07-11 16:28:09.775165494 +0100
    3.19 @@ -23,31 +11,45 @@ diff -urNpP linux-2.6.12/arch/i386/mm/pa
    3.20   
    3.21   	spin_lock_irqsave(&pgd_lock, flags);
    3.22  diff -urNpP linux-2.6.12/arch/i386/mm/pgtable.c linux-2.6.12.new/arch/i386/mm/pgtable.c
    3.23 ---- linux-2.6.12/arch/i386/mm/pgtable.c	2005-06-17 20:48:29.000000000 +0100
    3.24 -+++ linux-2.6.12.new/arch/i386/mm/pgtable.c	2005-07-11 16:32:01.478023726 +0100
    3.25 -@@ -199,14 +199,14 @@ void pgd_ctor(void *pgd, kmem_cache_t *c
    3.26 +--- linux-2.6.12/arch/i386/mm/pgtable.c	2005-11-24 21:51:49.000000000 +0000
    3.27 ++++ linux-2.6.12.new/arch/i386/mm/pgtable.c	2005-11-24 22:06:04.000000000 +0000
    3.28 +@@ -199,19 +199,22 @@ void pgd_ctor(void *pgd, kmem_cache_t *c
    3.29   {
    3.30   	unsigned long flags;
    3.31   
    3.32  -	if (PTRS_PER_PMD == 1)
    3.33 -+	if (!HAVE_SHARED_KERNEL_PMD)
    3.34 - 		spin_lock_irqsave(&pgd_lock, flags);
    3.35 - 
    3.36 - 	memcpy((pgd_t *)pgd + USER_PTRS_PER_PGD,
    3.37 - 			swapper_pg_dir + USER_PTRS_PER_PGD,
    3.38 - 			(PTRS_PER_PGD - USER_PTRS_PER_PGD) * sizeof(pgd_t));
    3.39 - 
    3.40 +-		spin_lock_irqsave(&pgd_lock, flags);
    3.41 +-
    3.42 +-	memcpy((pgd_t *)pgd + USER_PTRS_PER_PGD,
    3.43 +-			swapper_pg_dir + USER_PTRS_PER_PGD,
    3.44 +-			(PTRS_PER_PGD - USER_PTRS_PER_PGD) * sizeof(pgd_t));
    3.45 +-
    3.46  -	if (PTRS_PER_PMD > 1)
    3.47 -+	if (HAVE_SHARED_KERNEL_PMD)
    3.48 - 		return;
    3.49 - 
    3.50 - 	pgd_list_add(pgd);
    3.51 -@@ -214,11 +214,13 @@ void pgd_ctor(void *pgd, kmem_cache_t *c
    3.52 - 	memset(pgd, 0, USER_PTRS_PER_PGD*sizeof(pgd_t));
    3.53 +-		return;
    3.54 +-
    3.55 +-	pgd_list_add(pgd);
    3.56 +-	spin_unlock_irqrestore(&pgd_lock, flags);
    3.57 +-	memset(pgd, 0, USER_PTRS_PER_PGD*sizeof(pgd_t));
    3.58 ++	if (PTRS_PER_PMD > 1) {
    3.59 ++		if (HAVE_SHARED_KERNEL_PMD)
    3.60 ++			memcpy((pgd_t *)pgd + USER_PTRS_PER_PGD,
    3.61 ++			       swapper_pg_dir, sizeof(pgd_t));
    3.62 ++	} else {
    3.63 ++		if (!HAVE_SHARED_KERNEL_PMD)
    3.64 ++			spin_lock_irqsave(&pgd_lock, flags);
    3.65 ++		memcpy((pgd_t *)pgd + USER_PTRS_PER_PGD,
    3.66 ++		       swapper_pg_dir + USER_PTRS_PER_PGD,
    3.67 ++		       (PTRS_PER_PGD - USER_PTRS_PER_PGD) * sizeof(pgd_t));
    3.68 ++		memset(pgd, 0, USER_PTRS_PER_PGD*sizeof(pgd_t));
    3.69 ++		if (!HAVE_SHARED_KERNEL_PMD) {
    3.70 ++			pgd_list_add(pgd);
    3.71 ++			spin_unlock_irqrestore(&pgd_lock, flags);
    3.72 ++		}
    3.73 ++	}
    3.74   }
    3.75   
    3.76 --/* never called when PTRS_PER_PMD > 1 */
    3.77 - void pgd_dtor(void *pgd, kmem_cache_t *cache, unsigned long unused)
    3.78 + /* never called when PTRS_PER_PMD > 1 */
    3.79 +@@ -219,6 +222,9 @@ void pgd_dtor(void *pgd, kmem_cache_t *c
    3.80   {
    3.81   	unsigned long flags; /* can be called from interrupt context */
    3.82   
    3.83 @@ -57,38 +59,32 @@ diff -urNpP linux-2.6.12/arch/i386/mm/pg
    3.84   	spin_lock_irqsave(&pgd_lock, flags);
    3.85   	pgd_list_del(pgd);
    3.86   	spin_unlock_irqrestore(&pgd_lock, flags);
    3.87 -@@ -226,12 +228,29 @@ void pgd_dtor(void *pgd, kmem_cache_t *c
    3.88 - 
    3.89 - pgd_t *pgd_alloc(struct mm_struct *mm)
    3.90 - {
    3.91 --	int i;
    3.92 -+	int i = 0;
    3.93 - 	pgd_t *pgd = kmem_cache_alloc(pgd_cache, GFP_KERNEL);
    3.94 - 
    3.95 - 	if (PTRS_PER_PMD == 1 || !pgd)
    3.96 - 		return pgd;
    3.97 - 
    3.98 +@@ -238,6 +244,24 @@ pgd_t *pgd_alloc(struct mm_struct *mm)
    3.99 + 			goto out_oom;
   3.100 + 		set_pgd(&pgd[i], __pgd(1 + __pa(pmd)));
   3.101 + 	}
   3.102 ++
   3.103  +	if (!HAVE_SHARED_KERNEL_PMD) {
   3.104 -+		/* alloc and copy kernel pmd */
   3.105  +		unsigned long flags;
   3.106  +		pgd_t *copy_pgd = pgd_offset_k(PAGE_OFFSET);
   3.107  +		pud_t *copy_pud = pud_offset(copy_pgd, PAGE_OFFSET);
   3.108  +		pmd_t *copy_pmd = pmd_offset(copy_pud, PAGE_OFFSET);
   3.109  +		pmd_t *pmd = kmem_cache_alloc(pmd_cache, GFP_KERNEL);
   3.110 -+		if (0 == pmd)
   3.111 ++                ++i;
   3.112 ++		if (!pmd)
   3.113  +			goto out_oom;
   3.114  +
   3.115  +		spin_lock_irqsave(&pgd_lock, flags);
   3.116  +		memcpy(pmd, copy_pmd, PAGE_SIZE);
   3.117 ++		set_pgd(&pgd[USER_PTRS_PER_PGD], __pgd(1 + __pa(pmd)));
   3.118 ++		pgd_list_add(pgd);
   3.119  +		spin_unlock_irqrestore(&pgd_lock, flags);
   3.120 -+		set_pgd(&pgd[USER_PTRS_PER_PGD], __pgd(1 + __pa(pmd)));
   3.121  +	}
   3.122  +
   3.123 -+	/* alloc user pmds */
   3.124 - 	for (i = 0; i < USER_PTRS_PER_PGD; ++i) {
   3.125 - 		pmd_t *pmd = kmem_cache_alloc(pmd_cache, GFP_KERNEL);
   3.126 - 		if (!pmd)
   3.127 -@@ -252,9 +271,16 @@ void pgd_free(pgd_t *pgd)
   3.128 + 	return pgd;
   3.129 + 
   3.130 + out_oom:
   3.131 +@@ -252,9 +276,21 @@ void pgd_free(pgd_t *pgd)
   3.132   	int i;
   3.133   
   3.134   	/* in the PAE case user pgd entries are overwritten before usage */
   3.135 @@ -101,7 +97,12 @@ diff -urNpP linux-2.6.12/arch/i386/mm/pg
   3.136  +			kmem_cache_free(pmd_cache, pmd);
   3.137  +		}
   3.138  +		if (!HAVE_SHARED_KERNEL_PMD) {
   3.139 ++			unsigned long flags;
   3.140  +			pmd_t *pmd = (void *)__va(pgd_val(pgd[USER_PTRS_PER_PGD])-1);
   3.141 ++			spin_lock_irqsave(&pgd_lock, flags);
   3.142 ++			pgd_list_del(pgd);
   3.143 ++			spin_unlock_irqrestore(&pgd_lock, flags);
   3.144 ++			memset(pmd, 0, PTRS_PER_PMD*sizeof(pmd_t));
   3.145  +			kmem_cache_free(pmd_cache, pmd);
   3.146  +		}
   3.147  +	}