ia64/xen-unstable

changeset 10402:8070050cc30f

[LINUX][PAE] Improve allocation strategy when PAE pgdirs must be below 4GB.
Moving the re-allocation to low memory into pgd_alloc() has several
advantages:
1. Avoids race with save/restore where pgdir may end up above 4GB after
save/restore.
2. If pgdir cannot be re-allocated we can return failure to the caller
rather than BUG().
3. Slightly reduces diff against native Linux code.
Signed-off-by: Keir Fraser <keir@xensource.com>
author kaf24@firebug.cl.cam.ac.uk
date Wed Jun 14 12:36:06 2006 +0100 (2006-06-14)
parents 5552bc2c3716
children 1e49997c8146
files linux-2.6-xen-sparse/arch/i386/mm/init-xen.c linux-2.6-xen-sparse/arch/i386/mm/pgtable-xen.c
line diff
     1.1 --- a/linux-2.6-xen-sparse/arch/i386/mm/init-xen.c	Wed Jun 14 11:19:53 2006 +0100
     1.2 +++ b/linux-2.6-xen-sparse/arch/i386/mm/init-xen.c	Wed Jun 14 12:36:06 2006 +0100
     1.3 @@ -763,7 +763,7 @@ void __init pgtable_cache_init(void)
     1.4  #endif
     1.5  				0,
     1.6  				pgd_ctor,
     1.7 -				pgd_dtor);
     1.8 +				PTRS_PER_PMD == 1 ? pgd_dtor : NULL);
     1.9  	if (!pgd_cache)
    1.10  		panic("pgtable_cache_init(): Cannot create pgd cache");
    1.11  }
     2.1 --- a/linux-2.6-xen-sparse/arch/i386/mm/pgtable-xen.c	Wed Jun 14 11:19:53 2006 +0100
     2.2 +++ b/linux-2.6-xen-sparse/arch/i386/mm/pgtable-xen.c	Wed Jun 14 12:36:06 2006 +0100
     2.3 @@ -300,11 +300,6 @@ void pgd_ctor(void *pgd, kmem_cache_t *c
     2.4  	unsigned long flags;
     2.5  
     2.6  	if (PTRS_PER_PMD > 1) {
     2.7 -		if (!xen_feature(XENFEAT_pae_pgdir_above_4gb)) {
     2.8 -			int rc = xen_create_contiguous_region(
     2.9 -				(unsigned long)pgd, 0, 32);
    2.10 -			BUG_ON(rc);
    2.11 -		}
    2.12  		if (HAVE_SHARED_KERNEL_PMD)
    2.13  			clone_pgd_range((pgd_t *)pgd + USER_PTRS_PER_PGD,
    2.14  					swapper_pg_dir + USER_PTRS_PER_PGD,
    2.15 @@ -320,26 +315,22 @@ void pgd_ctor(void *pgd, kmem_cache_t *c
    2.16  	}
    2.17  }
    2.18  
    2.19 +/* never called when PTRS_PER_PMD > 1 */
    2.20  void pgd_dtor(void *pgd, kmem_cache_t *cache, unsigned long unused)
    2.21  {
    2.22  	unsigned long flags; /* can be called from interrupt context */
    2.23  
    2.24 -	if (PTRS_PER_PMD > 1) {
    2.25 -		if (!xen_feature(XENFEAT_pae_pgdir_above_4gb))
    2.26 -			xen_destroy_contiguous_region((unsigned long)pgd, 0);
    2.27 -	} else {
    2.28 -		spin_lock_irqsave(&pgd_lock, flags);
    2.29 -		pgd_list_del(pgd);
    2.30 -		spin_unlock_irqrestore(&pgd_lock, flags);
    2.31 +	spin_lock_irqsave(&pgd_lock, flags);
    2.32 +	pgd_list_del(pgd);
    2.33 +	spin_unlock_irqrestore(&pgd_lock, flags);
    2.34  
    2.35 -		pgd_test_and_unpin(pgd);
    2.36 -	}
    2.37 +	pgd_test_and_unpin(pgd);
    2.38  }
    2.39  
    2.40  pgd_t *pgd_alloc(struct mm_struct *mm)
    2.41  {
    2.42  	int i;
    2.43 -	pgd_t *pgd = kmem_cache_alloc(pgd_cache, GFP_KERNEL);
    2.44 +	pgd_t *pgd_tmp = NULL, *pgd = kmem_cache_alloc(pgd_cache, GFP_KERNEL);
    2.45  
    2.46  	pgd_test_and_unpin(pgd);
    2.47  
    2.48 @@ -363,7 +354,26 @@ pgd_t *pgd_alloc(struct mm_struct *mm)
    2.49  			set_pgd(&pgd[i], __pgd(1 + __pa(pmd)));
    2.50  		}
    2.51  
    2.52 +		/* create_contig_region() loses page data. Make a temp copy. */
    2.53 +		if (!xen_feature(XENFEAT_pae_pgdir_above_4gb)) {
    2.54 +			pgd_tmp = kmem_cache_alloc(pgd_cache, GFP_KERNEL);
    2.55 +			if (!pgd_tmp)
    2.56 +				goto out_oom;
    2.57 +			memcpy(pgd_tmp, pgd, PAGE_SIZE);
    2.58 +		}
    2.59 +
    2.60  		spin_lock_irqsave(&pgd_lock, flags);
    2.61 +
    2.62 +		/* Protect against save/restore: move below 4GB with lock. */
    2.63 +		if (!xen_feature(XENFEAT_pae_pgdir_above_4gb)) {
    2.64 +			int rc = xen_create_contiguous_region(
    2.65 +				(unsigned long)pgd, 0, 32);
    2.66 +			memcpy(pgd, pgd_tmp, PAGE_SIZE);
    2.67 +			kmem_cache_free(pgd_cache, pgd_tmp);
    2.68 +			if (rc)
    2.69 +				goto out_oom;
    2.70 +		}
    2.71 +
    2.72  		for (i = USER_PTRS_PER_PGD; i < PTRS_PER_PGD; i++) {
    2.73  			unsigned long v = (unsigned long)i << PGDIR_SHIFT;
    2.74  			pgd_t *kpgd = pgd_offset_k(v);
    2.75 @@ -374,7 +384,9 @@ pgd_t *pgd_alloc(struct mm_struct *mm)
    2.76  			make_lowmem_page_readonly(
    2.77  				pmd, XENFEAT_writable_page_tables);
    2.78  		}
    2.79 +
    2.80  		pgd_list_add(pgd);
    2.81 +
    2.82  		spin_unlock_irqrestore(&pgd_lock, flags);
    2.83  	}
    2.84  
    2.85 @@ -399,11 +411,15 @@ void pgd_free(pgd_t *pgd)
    2.86  			pmd_t *pmd = (void *)__va(pgd_val(pgd[i])-1);
    2.87  			kmem_cache_free(pmd_cache, pmd);
    2.88  		}
    2.89 +
    2.90  		if (!HAVE_SHARED_KERNEL_PMD) {
    2.91  			unsigned long flags;
    2.92  			spin_lock_irqsave(&pgd_lock, flags);
    2.93  			pgd_list_del(pgd);
    2.94  			spin_unlock_irqrestore(&pgd_lock, flags);
    2.95 +
    2.96 +			pgd_test_and_unpin(pgd);
    2.97 +
    2.98  			for (i = USER_PTRS_PER_PGD; i < PTRS_PER_PGD; i++) {
    2.99  				pmd_t *pmd = (void *)__va(pgd_val(pgd[i])-1);
   2.100  				make_lowmem_page_writable(
   2.101 @@ -411,8 +427,13 @@ void pgd_free(pgd_t *pgd)
   2.102  				memset(pmd, 0, PTRS_PER_PMD*sizeof(pmd_t));
   2.103  				kmem_cache_free(pmd_cache, pmd);
   2.104  			}
   2.105 +
   2.106 +			if (!xen_feature(XENFEAT_pae_pgdir_above_4gb))
   2.107 +				xen_destroy_contiguous_region(
   2.108 +					(unsigned long)pgd, 0);
   2.109  		}
   2.110  	}
   2.111 +
   2.112  	/* in the non-PAE case, free_pgtables() clears user pgd entries */
   2.113  	kmem_cache_free(pgd_cache, pgd);
   2.114  }