ia64/xen-unstable

changeset 2263:8a90852f79d1

bitkeeper revision 1.1159.36.1 (41224664I9csn5Rc83-pM_anatKyCg)

Fix VESA BIOS mapping issue. Clean up workqueue/taskqueue confusion.
author kaf24@scramble.cl.cam.ac.uk
date Tue Aug 17 17:54:44 2004 +0000 (2004-08-17)
parents 8346a7a9c1e1
children 0dd4b96ce798
files .rootkeys linux-2.4.26-xen-sparse/arch/xen/mm/Makefile linux-2.4.26-xen-sparse/arch/xen/mm/fault.c linux-2.4.26-xen-sparse/include/asm-xen/pgalloc.h linux-2.4.26-xen-sparse/include/asm-xen/pgtable.h linux-2.4.26-xen-sparse/include/asm-xen/queues.h linux-2.4.26-xen-sparse/mkbuildtree linux-2.4.26-xen-sparse/mm/mmap.c linux-2.6.7-xen-sparse/arch/xen/i386/mm/Makefile linux-2.6.7-xen-sparse/arch/xen/i386/mm/fault.c linux-2.6.7-xen-sparse/arch/xen/i386/mm/mmap.c linux-2.6.7-xen-sparse/arch/xen/i386/mm/pgtable.c linux-2.6.7-xen-sparse/arch/xen/kernel/ctrl_if.c linux-2.6.7-xen-sparse/arch/xen/kernel/fixup.c linux-2.6.7-xen-sparse/arch/xen/kernel/reboot.c linux-2.6.7-xen-sparse/drivers/xen/console/console.c linux-2.6.7-xen-sparse/include/asm-xen/asm-i386/pgtable.h linux-2.6.7-xen-sparse/include/asm-xen/ctrl_if.h linux-2.6.7-xen-sparse/include/asm-xen/queues.h linux-2.6.7-xen-sparse/mm/mmap.c
line diff
     1.1 --- a/.rootkeys	Tue Aug 17 09:35:05 2004 +0000
     1.2 +++ b/.rootkeys	Tue Aug 17 17:54:44 2004 +0000
     1.3 @@ -103,6 +103,7 @@ 3e5a4e67uTYU5oEnIDjxuaez8njjqg linux-2.4
     1.4  3e5a4e67X7JyupgdYkgDX19Huj2sAw linux-2.4.26-xen-sparse/include/asm-xen/pgtable-2level.h
     1.5  3e5a4e67gr4NLGtQ5CvSLimMYZlkOA linux-2.4.26-xen-sparse/include/asm-xen/pgtable.h
     1.6  3e5a4e676uK4xErTBDH6XJREn9LSyg linux-2.4.26-xen-sparse/include/asm-xen/processor.h
     1.7 +41224663YBCUMX1kVo_HRUtgaHTi7w linux-2.4.26-xen-sparse/include/asm-xen/queues.h
     1.8  3e5a4e68uJz-xI0IBVMD7xRLQKJDFg linux-2.4.26-xen-sparse/include/asm-xen/segment.h
     1.9  3e5a4e68Nfdh6QcOKUTGCaYkf2LmYA linux-2.4.26-xen-sparse/include/asm-xen/smp.h
    1.10  4062f7e2PzFOUGT0PaE7A0VprTU3JQ linux-2.4.26-xen-sparse/include/asm-xen/synch_bitops.h
    1.11 @@ -118,7 +119,6 @@ 401c059bjLBFYHRD4Py2uM3eA1D4zQ linux-2.4
    1.12  3e6e7c1efbQe93xCvOpOVCnXTMmQ5w linux-2.4.26-xen-sparse/mkbuildtree
    1.13  406aeeafkrnCuIVWLFv3kfn4uAD5Eg linux-2.4.26-xen-sparse/mm/highmem.c
    1.14  3e5a4e68GxCIaFH4sy01v1wjapetaA linux-2.4.26-xen-sparse/mm/memory.c
    1.15 -411ce99d_uOUTK61pkqbdIAi1CIaSA linux-2.4.26-xen-sparse/mm/mmap.c
    1.16  3f108af5VxPkLv13tXpXgoRKALQtXQ linux-2.4.26-xen-sparse/mm/mprotect.c
    1.17  3e5a4e681xMPdF9xCMwpyfuYMySU5g linux-2.4.26-xen-sparse/mm/mremap.c
    1.18  409ba2e7akOFqQUg6Qyg2s28xcXiMg linux-2.4.26-xen-sparse/mm/page_alloc.c
    1.19 @@ -159,7 +159,6 @@ 4118cc35CbY8rfGVspF5O-7EkXBEAA linux-2.6
    1.20  40f562383SKvDStdtrvzr5fyCbW4rw linux-2.6.7-xen-sparse/arch/xen/i386/mm/hypervisor.c
    1.21  40f56239xcNylAxuGsQHwi1AyMLV8w linux-2.6.7-xen-sparse/arch/xen/i386/mm/init.c
    1.22  41062ab7CjxC1UBaFhOMWWdhHkIUyg linux-2.6.7-xen-sparse/arch/xen/i386/mm/ioremap.c
    1.23 -411b9db3oFpYQc4C-_mO2lRTcSz8UQ linux-2.6.7-xen-sparse/arch/xen/i386/mm/mmap.c
    1.24  40f5623906UYHv1rsVUeRc0tFT0dWw linux-2.6.7-xen-sparse/arch/xen/i386/mm/pgtable.c
    1.25  4107adf12ndy94MidCaivDibJ3pPAg linux-2.6.7-xen-sparse/arch/xen/i386/pci/Makefile
    1.26  4107adf1WcCgkhsdLTRGX52cOG1vJg linux-2.6.7-xen-sparse/arch/xen/i386/pci/direct.c
    1.27 @@ -240,10 +239,10 @@ 40f5623b3Eqs8pAc5WpPX8_jTzV2qw linux-2.6
    1.28  40f5623aGPlsm0u1LTO-NVZ6AGzNRQ linux-2.6.7-xen-sparse/include/asm-xen/hypervisor.h
    1.29  40f5623cndVUFlkxpf7Lfx7xu8madQ linux-2.6.7-xen-sparse/include/asm-xen/multicall.h
    1.30  3f108af1ylCIm82H052FVTfXACBHrw linux-2.6.7-xen-sparse/include/asm-xen/proc_cmd.h
    1.31 +4122466356eIBnC9ot44WSVVIFyhQA linux-2.6.7-xen-sparse/include/asm-xen/queues.h
    1.32  3fa8e3f0kBLeE4To2vpdi3cpJbIkbQ linux-2.6.7-xen-sparse/include/asm-xen/suspend.h
    1.33  3f689063BoW-HWV3auUJ-OqXfcGArw linux-2.6.7-xen-sparse/include/asm-xen/xen_proc.h
    1.34  40f56a0ddHCSs3501MY4hRf22tctOw linux-2.6.7-xen-sparse/mkbuildtree
    1.35 -411b9db3dpQAK-pcP8WwcRHZGn2eKg linux-2.6.7-xen-sparse/mm/mmap.c
    1.36  410a94a4KT6I6X0LVc7djB39tRDp4g linux-2.6.7-xen-sparse/mm/page_alloc.c
    1.37  40e1b09db5mN69Ijj0X_Eol-S7dXiw tools/Make.defs
    1.38  3f776bd1Hy9rn69ntXBhPReUFw9IEA tools/Makefile
     2.1 --- a/linux-2.4.26-xen-sparse/arch/xen/mm/Makefile	Tue Aug 17 09:35:05 2004 +0000
     2.2 +++ b/linux-2.4.26-xen-sparse/arch/xen/mm/Makefile	Tue Aug 17 17:54:44 2004 +0000
     2.3 @@ -9,7 +9,7 @@
     2.4  
     2.5  O_TARGET := mm.o
     2.6  
     2.7 -obj-y	 := init.o fault.o extable.o pageattr.o hypervisor.o ioremap.o mmap.o
     2.8 +obj-y	 := init.o fault.o extable.o pageattr.o hypervisor.o ioremap.o
     2.9  
    2.10  export-objs := pageattr.o
    2.11  
     3.1 --- a/linux-2.4.26-xen-sparse/arch/xen/mm/fault.c	Tue Aug 17 09:35:05 2004 +0000
     3.2 +++ b/linux-2.4.26-xen-sparse/arch/xen/mm/fault.c	Tue Aug 17 17:54:44 2004 +0000
     3.3 @@ -121,10 +121,8 @@ asmlinkage void do_page_fault(struct pt_
     3.4  	 * (error_code & 4) == 0, and that the fault was not a
     3.5  	 * protection error (error_code & 1) == 0.
     3.6  	 */
     3.7 -	if (unlikely(address >= TASK_SIZE) ||
     3.8 -	    unlikely(address < (FIRST_USER_PGD_NR<<PGDIR_SHIFT)))
     3.9 -		if (!(error_code & 5))
    3.10 -			goto vmalloc_fault;
    3.11 +	if (address >= TASK_SIZE && !(error_code & 5))
    3.12 +		goto vmalloc_fault;
    3.13  
    3.14  	mm = tsk->mm;
    3.15  	info.si_code = SEGV_MAPERR;
     4.1 --- a/linux-2.4.26-xen-sparse/include/asm-xen/pgalloc.h	Tue Aug 17 09:35:05 2004 +0000
     4.2 +++ b/linux-2.4.26-xen-sparse/include/asm-xen/pgalloc.h	Tue Aug 17 17:54:44 2004 +0000
     4.3 @@ -54,15 +54,11 @@ static inline pgd_t *get_pgd_slow(void)
     4.4  			if (!pmd)
     4.5  				goto out_oom;
     4.6  			clear_page(pmd);
     4.7 -			set_pgd(pgd + FIRST_USER_PGD_NR, __pgd(1 + __pa(pmd)));
     4.8 +			set_pgd(pgd + i, __pgd(1 + __pa(pmd)));
     4.9  		}
    4.10 -		memcpy(pgd,
    4.11 -			swapper_pg_dir,
    4.12 -			FIRST_USER_PGD_NR * sizeof(pgd_t));
    4.13 -		memcpy(pgd + FIRST_USER_PGD_NR + USER_PTRS_PER_PGD,
    4.14 -			swapper_pg_dir + FIRST_USER_PGD_NR + USER_PTRS_PER_PGD,
    4.15 -			(PTRS_PER_PGD - USER_PTRS_PER_PGD -
    4.16 -			 FIRST_USER_PGD_NR) * sizeof(pgd_t));
    4.17 +		memcpy(pgd + USER_PTRS_PER_PGD,
    4.18 +			init_mm.pgd + USER_PTRS_PER_PGD,
    4.19 +			(PTRS_PER_PGD - USER_PTRS_PER_PGD) * sizeof(pgd_t));
    4.20  	}
    4.21  	return pgd;
    4.22  out_oom:
    4.23 @@ -79,15 +75,10 @@ static inline pgd_t *get_pgd_slow(void)
    4.24  	pgd_t *pgd = (pgd_t *)__get_free_page(GFP_KERNEL);
    4.25  
    4.26  	if (pgd) {
    4.27 -		memset(pgd + FIRST_USER_PGD_NR,
    4.28 -			0, USER_PTRS_PER_PGD*sizeof(pgd_t));
    4.29 -		memcpy(pgd,
    4.30 -			init_mm.pgd,
    4.31 -			FIRST_USER_PGD_NR * sizeof(pgd_t));
    4.32 -		memcpy(pgd + FIRST_USER_PGD_NR + USER_PTRS_PER_PGD,
    4.33 -			init_mm.pgd + FIRST_USER_PGD_NR + USER_PTRS_PER_PGD,
    4.34 -			(PTRS_PER_PGD - USER_PTRS_PER_PGD -
    4.35 -			 FIRST_USER_PGD_NR) * sizeof(pgd_t));
    4.36 +		memset(pgd, 0, USER_PTRS_PER_PGD * sizeof(pgd_t));
    4.37 +		memcpy(pgd + USER_PTRS_PER_PGD,
    4.38 +			init_mm.pgd + USER_PTRS_PER_PGD,
    4.39 +			(PTRS_PER_PGD - USER_PTRS_PER_PGD) * sizeof(pgd_t));
    4.40                  __make_page_readonly(pgd);
    4.41  		queue_pgd_pin(__pa(pgd));
    4.42  	}
     5.1 --- a/linux-2.4.26-xen-sparse/include/asm-xen/pgtable.h	Tue Aug 17 09:35:05 2004 +0000
     5.2 +++ b/linux-2.4.26-xen-sparse/include/asm-xen/pgtable.h	Tue Aug 17 17:54:44 2004 +0000
     5.3 @@ -83,16 +83,16 @@ extern void pgtable_cache_init(void);
     5.4  #define PGDIR_SIZE	(1UL << PGDIR_SHIFT)
     5.5  #define PGDIR_MASK	(~(PGDIR_SIZE-1))
     5.6  
     5.7 -#define FIRST_USER_PGD_NR	(1)
     5.8 -#define USER_PTRS_PER_PGD	((TASK_SIZE/PGDIR_SIZE)-FIRST_USER_PGD_NR)
     5.9 +#define USER_PTRS_PER_PGD	(TASK_SIZE/PGDIR_SIZE)
    5.10 +#define FIRST_USER_PGD_NR	0
    5.11  
    5.12 -#if 0 /* XEN */
    5.13  #define USER_PGD_PTRS (PAGE_OFFSET >> PGDIR_SHIFT)
    5.14  #define KERNEL_PGD_PTRS (PTRS_PER_PGD-USER_PGD_PTRS)
    5.15 +
    5.16  #define TWOLEVEL_PGDIR_SHIFT	22
    5.17  #define BOOT_USER_PGD_PTRS (__PAGE_OFFSET >> TWOLEVEL_PGDIR_SHIFT)
    5.18  #define BOOT_KERNEL_PGD_PTRS (1024-BOOT_USER_PGD_PTRS)
    5.19 -#endif
    5.20 +
    5.21  
    5.22  #ifndef __ASSEMBLY__
    5.23  /* 4MB is just a nice "safety zone". Also, we align to a fresh pde. */
    5.24 @@ -367,7 +367,4 @@ static inline unsigned long arbitrary_vi
    5.25  
    5.26  #define io_remap_page_range remap_page_range
    5.27  
    5.28 -#define HAVE_ARCH_UNMAPPED_AREA
    5.29 -#define HAVE_ARCH_CHECK_FIXED_MAPPING
    5.30 -
    5.31  #endif /* _I386_PGTABLE_H */
     6.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     6.2 +++ b/linux-2.4.26-xen-sparse/include/asm-xen/queues.h	Tue Aug 17 17:54:44 2004 +0000
     6.3 @@ -0,0 +1,28 @@
     6.4 +
     6.5 +/*
     6.6 + * Oh dear. Task queues were removed from Linux 2.6 and replaced by work 
     6.7 + * queues. Unfortunately the semantics is not the same. With task queues we 
     6.8 + * can defer work until a particular event occurs -- this is not
     6.9 + * straightforwardly done with work queues (queued work is performed asap, or
    6.10 + * after some fixed timeout). Conversely, work queues are a (slightly) neater
    6.11 + * way of deferring work to a process context than using task queues in 2.4.
    6.12 + * 
    6.13 + * So, what we do here is a bit weird:
    6.14 + *  1. On 2.4, we emulate work queues over task queues.
    6.15 + *  2. On 2.6, we emulate task queues over work queues.
    6.16 + * 
    6.17 + * Note how much harder the latter is. :-)
    6.18 + */
    6.19 +
    6.20 +#ifndef __QUEUES_H__
    6.21 +#define __QUEUES_H__
    6.22 +
    6.23 +#include <linux/version.h>
    6.24 +#include <linux/list.h>
    6.25 +#include <linux/tqueue.h>
    6.26 +
    6.27 +#define DECLARE_WORK(_name, _fn, _arg) \
    6.28 +    struct tq_struct _name = { .routine = _fn, .data = _arg }
    6.29 +#define schedule_work(_w) schedule_task(_w)
    6.30 +
    6.31 +#endif /* __QUEUES_H__ */
     7.1 --- a/linux-2.4.26-xen-sparse/mkbuildtree	Tue Aug 17 09:35:05 2004 +0000
     7.2 +++ b/linux-2.4.26-xen-sparse/mkbuildtree	Tue Aug 17 17:54:44 2004 +0000
     7.3 @@ -243,7 +243,6 @@ cd ${AD}/arch/xen/mm
     7.4  ln -sf ../../i386/mm/extable.c 
     7.5  ln -sf ../../i386/mm/pageattr.c 
     7.6  ln -sf ../../../${LINUX_26}/arch/xen/i386/mm/hypervisor.c
     7.7 -ln -sf ../../../${LINUX_26}/arch/xen/i386/mm/mmap.c
     7.8  
     7.9  cd ${AD}/arch/xen/drivers/console
    7.10  ln -sf ../../../../${LINUX_26}/drivers/xen/console/console.c 
     8.1 --- a/linux-2.4.26-xen-sparse/mm/mmap.c	Tue Aug 17 09:35:05 2004 +0000
     8.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
     8.3 @@ -1,1219 +0,0 @@
     8.4 -/*
     8.5 - *	linux/mm/mmap.c
     8.6 - *
     8.7 - * Written by obz.
     8.8 - */
     8.9 -#include <linux/slab.h>
    8.10 -#include <linux/shm.h>
    8.11 -#include <linux/mman.h>
    8.12 -#include <linux/pagemap.h>
    8.13 -#include <linux/swap.h>
    8.14 -#include <linux/swapctl.h>
    8.15 -#include <linux/smp_lock.h>
    8.16 -#include <linux/init.h>
    8.17 -#include <linux/file.h>
    8.18 -#include <linux/fs.h>
    8.19 -#include <linux/personality.h>
    8.20 -#include <linux/mount.h>
    8.21 -
    8.22 -#include <asm/uaccess.h>
    8.23 -#include <asm/pgalloc.h>
    8.24 -
    8.25 -/*
    8.26 - * WARNING: the debugging will use recursive algorithms so never enable this
    8.27 - * unless you know what you are doing.
    8.28 - */
    8.29 -#undef DEBUG_MM_RB
    8.30 -
    8.31 -/* description of effects of mapping type and prot in current implementation.
    8.32 - * this is due to the limited x86 page protection hardware.  The expected
    8.33 - * behavior is in parens:
    8.34 - *
    8.35 - * map_type	prot
    8.36 - *		PROT_NONE	PROT_READ	PROT_WRITE	PROT_EXEC
    8.37 - * MAP_SHARED	r: (no) no	r: (yes) yes	r: (no) yes	r: (no) yes
    8.38 - *		w: (no) no	w: (no) no	w: (yes) yes	w: (no) no
    8.39 - *		x: (no) no	x: (no) yes	x: (no) yes	x: (yes) yes
    8.40 - *		
    8.41 - * MAP_PRIVATE	r: (no) no	r: (yes) yes	r: (no) yes	r: (no) yes
    8.42 - *		w: (no) no	w: (no) no	w: (copy) copy	w: (no) no
    8.43 - *		x: (no) no	x: (no) yes	x: (no) yes	x: (yes) yes
    8.44 - *
    8.45 - */
    8.46 -pgprot_t protection_map[16] = {
    8.47 -	__P000, __P001, __P010, __P011, __P100, __P101, __P110, __P111,
    8.48 -	__S000, __S001, __S010, __S011, __S100, __S101, __S110, __S111
    8.49 -};
    8.50 -
    8.51 -int sysctl_overcommit_memory;
    8.52 -int max_map_count = DEFAULT_MAX_MAP_COUNT;
    8.53 -
    8.54 -/* Check that a process has enough memory to allocate a
    8.55 - * new virtual mapping.
    8.56 - */
    8.57 -int vm_enough_memory(long pages)
    8.58 -{
    8.59 -	/* Stupid algorithm to decide if we have enough memory: while
    8.60 -	 * simple, it hopefully works in most obvious cases.. Easy to
    8.61 -	 * fool it, but this should catch most mistakes.
    8.62 -	 */
    8.63 -	/* 23/11/98 NJC: Somewhat less stupid version of algorithm,
    8.64 -	 * which tries to do "TheRightThing".  Instead of using half of
    8.65 -	 * (buffers+cache), use the minimum values.  Allow an extra 2%
    8.66 -	 * of num_physpages for safety margin.
    8.67 -	 */
    8.68 -
    8.69 -	unsigned long free;
    8.70 -	
    8.71 -        /* Sometimes we want to use more memory than we have. */
    8.72 -	if (sysctl_overcommit_memory)
    8.73 -	    return 1;
    8.74 -
    8.75 -	/* The page cache contains buffer pages these days.. */
    8.76 -	free = page_cache_size;
    8.77 -	free += nr_free_pages();
    8.78 -	free += nr_swap_pages;
    8.79 -
    8.80 -	/*
    8.81 -	 * This double-counts: the nrpages are both in the page-cache
    8.82 -	 * and in the swapper space. At the same time, this compensates
    8.83 -	 * for the swap-space over-allocation (ie "nr_swap_pages" being
    8.84 -	 * too small.
    8.85 -	 */
    8.86 -	free += swapper_space.nrpages;
    8.87 -
    8.88 -	/*
    8.89 -	 * The code below doesn't account for free space in the inode
    8.90 -	 * and dentry slab cache, slab cache fragmentation, inodes and
    8.91 -	 * dentries which will become freeable under VM load, etc.
    8.92 -	 * Lets just hope all these (complex) factors balance out...
    8.93 -	 */
    8.94 -	free += (dentry_stat.nr_unused * sizeof(struct dentry)) >> PAGE_SHIFT;
    8.95 -	free += (inodes_stat.nr_unused * sizeof(struct inode)) >> PAGE_SHIFT;
    8.96 -
    8.97 -	return free > pages;
    8.98 -}
    8.99 -
   8.100 -/* Remove one vm structure from the inode's i_mapping address space. */
   8.101 -static inline void __remove_shared_vm_struct(struct vm_area_struct *vma)
   8.102 -{
   8.103 -	struct file * file = vma->vm_file;
   8.104 -
   8.105 -	if (file) {
   8.106 -		struct inode *inode = file->f_dentry->d_inode;
   8.107 -		if (vma->vm_flags & VM_DENYWRITE)
   8.108 -			atomic_inc(&inode->i_writecount);
   8.109 -		if(vma->vm_next_share)
   8.110 -			vma->vm_next_share->vm_pprev_share = vma->vm_pprev_share;
   8.111 -		*vma->vm_pprev_share = vma->vm_next_share;
   8.112 -	}
   8.113 -}
   8.114 -
   8.115 -static inline void remove_shared_vm_struct(struct vm_area_struct *vma)
   8.116 -{
   8.117 -	lock_vma_mappings(vma);
   8.118 -	__remove_shared_vm_struct(vma);
   8.119 -	unlock_vma_mappings(vma);
   8.120 -}
   8.121 -
   8.122 -void lock_vma_mappings(struct vm_area_struct *vma)
   8.123 -{
   8.124 -	struct address_space *mapping;
   8.125 -
   8.126 -	mapping = NULL;
   8.127 -	if (vma->vm_file)
   8.128 -		mapping = vma->vm_file->f_dentry->d_inode->i_mapping;
   8.129 -	if (mapping)
   8.130 -		spin_lock(&mapping->i_shared_lock);
   8.131 -}
   8.132 -
   8.133 -void unlock_vma_mappings(struct vm_area_struct *vma)
   8.134 -{
   8.135 -	struct address_space *mapping;
   8.136 -
   8.137 -	mapping = NULL;
   8.138 -	if (vma->vm_file)
   8.139 -		mapping = vma->vm_file->f_dentry->d_inode->i_mapping;
   8.140 -	if (mapping)
   8.141 -		spin_unlock(&mapping->i_shared_lock);
   8.142 -}
   8.143 -
   8.144 -/*
   8.145 - *  sys_brk() for the most part doesn't need the global kernel
   8.146 - *  lock, except when an application is doing something nasty
   8.147 - *  like trying to un-brk an area that has already been mapped
   8.148 - *  to a regular file.  in this case, the unmapping will need
   8.149 - *  to invoke file system routines that need the global lock.
   8.150 - */
   8.151 -asmlinkage unsigned long sys_brk(unsigned long brk)
   8.152 -{
   8.153 -	unsigned long rlim, retval;
   8.154 -	unsigned long newbrk, oldbrk;
   8.155 -	struct mm_struct *mm = current->mm;
   8.156 -
   8.157 -	down_write(&mm->mmap_sem);
   8.158 -
   8.159 -	if (brk < mm->end_code)
   8.160 -		goto out;
   8.161 -	newbrk = PAGE_ALIGN(brk);
   8.162 -	oldbrk = PAGE_ALIGN(mm->brk);
   8.163 -	if (oldbrk == newbrk)
   8.164 -		goto set_brk;
   8.165 -
   8.166 -	/* Always allow shrinking brk. */
   8.167 -	if (brk <= mm->brk) {
   8.168 -		if (!do_munmap(mm, newbrk, oldbrk-newbrk))
   8.169 -			goto set_brk;
   8.170 -		goto out;
   8.171 -	}
   8.172 -
   8.173 -	/* Check against rlimit.. */
   8.174 -	rlim = current->rlim[RLIMIT_DATA].rlim_cur;
   8.175 -	if (rlim < RLIM_INFINITY && brk - mm->start_data > rlim)
   8.176 -		goto out;
   8.177 -
   8.178 -	/* Check against existing mmap mappings. */
   8.179 -	if (find_vma_intersection(mm, oldbrk, newbrk+PAGE_SIZE))
   8.180 -		goto out;
   8.181 -
   8.182 -	/* Check if we have enough memory.. */
   8.183 -	if (!vm_enough_memory((newbrk-oldbrk) >> PAGE_SHIFT))
   8.184 -		goto out;
   8.185 -
   8.186 -	/* Ok, looks good - let it rip. */
   8.187 -	if (do_brk(oldbrk, newbrk-oldbrk) != oldbrk)
   8.188 -		goto out;
   8.189 -set_brk:
   8.190 -	mm->brk = brk;
   8.191 -out:
   8.192 -	retval = mm->brk;
   8.193 -	up_write(&mm->mmap_sem);
   8.194 -	return retval;
   8.195 -}
   8.196 -
   8.197 -/* Combine the mmap "prot" and "flags" argument into one "vm_flags" used
   8.198 - * internally. Essentially, translate the "PROT_xxx" and "MAP_xxx" bits
   8.199 - * into "VM_xxx".
   8.200 - */
   8.201 -static inline unsigned long calc_vm_flags(unsigned long prot, unsigned long flags)
   8.202 -{
   8.203 -#define _trans(x,bit1,bit2) \
   8.204 -((bit1==bit2)?(x&bit1):(x&bit1)?bit2:0)
   8.205 -
   8.206 -	unsigned long prot_bits, flag_bits;
   8.207 -	prot_bits =
   8.208 -		_trans(prot, PROT_READ, VM_READ) |
   8.209 -		_trans(prot, PROT_WRITE, VM_WRITE) |
   8.210 -		_trans(prot, PROT_EXEC, VM_EXEC);
   8.211 -	flag_bits =
   8.212 -		_trans(flags, MAP_GROWSDOWN, VM_GROWSDOWN) |
   8.213 -		_trans(flags, MAP_DENYWRITE, VM_DENYWRITE) |
   8.214 -		_trans(flags, MAP_EXECUTABLE, VM_EXECUTABLE);
   8.215 -	return prot_bits | flag_bits;
   8.216 -#undef _trans
   8.217 -}
   8.218 -
   8.219 -#ifdef DEBUG_MM_RB
   8.220 -static int browse_rb(rb_node_t * rb_node) {
   8.221 -	int i = 0;
   8.222 -	if (rb_node) {
   8.223 -		i++;
   8.224 -		i += browse_rb(rb_node->rb_left);
   8.225 -		i += browse_rb(rb_node->rb_right);
   8.226 -	}
   8.227 -	return i;
   8.228 -}
   8.229 -
   8.230 -static void validate_mm(struct mm_struct * mm) {
   8.231 -	int bug = 0;
   8.232 -	int i = 0;
   8.233 -	struct vm_area_struct * tmp = mm->mmap;
   8.234 -	while (tmp) {
   8.235 -		tmp = tmp->vm_next;
   8.236 -		i++;
   8.237 -	}
   8.238 -	if (i != mm->map_count)
   8.239 -		printk("map_count %d vm_next %d\n", mm->map_count, i), bug = 1;
   8.240 -	i = browse_rb(mm->mm_rb.rb_node);
   8.241 -	if (i != mm->map_count)
   8.242 -		printk("map_count %d rb %d\n", mm->map_count, i), bug = 1;
   8.243 -	if (bug)
   8.244 -		BUG();
   8.245 -}
   8.246 -#else
   8.247 -#define validate_mm(mm) do { } while (0)
   8.248 -#endif
   8.249 -
   8.250 -static struct vm_area_struct * find_vma_prepare(struct mm_struct * mm, unsigned long addr,
   8.251 -						struct vm_area_struct ** pprev,
   8.252 -						rb_node_t *** rb_link, rb_node_t ** rb_parent)
   8.253 -{
   8.254 -	struct vm_area_struct * vma;
   8.255 -	rb_node_t ** __rb_link, * __rb_parent, * rb_prev;
   8.256 -
   8.257 -	__rb_link = &mm->mm_rb.rb_node;
   8.258 -	rb_prev = __rb_parent = NULL;
   8.259 -	vma = NULL;
   8.260 -
   8.261 -	while (*__rb_link) {
   8.262 -		struct vm_area_struct *vma_tmp;
   8.263 -
   8.264 -		__rb_parent = *__rb_link;
   8.265 -		vma_tmp = rb_entry(__rb_parent, struct vm_area_struct, vm_rb);
   8.266 -
   8.267 -		if (vma_tmp->vm_end > addr) {
   8.268 -			vma = vma_tmp;
   8.269 -			if (vma_tmp->vm_start <= addr)
   8.270 -				return vma;
   8.271 -			__rb_link = &__rb_parent->rb_left;
   8.272 -		} else {
   8.273 -			rb_prev = __rb_parent;
   8.274 -			__rb_link = &__rb_parent->rb_right;
   8.275 -		}
   8.276 -	}
   8.277 -
   8.278 -	*pprev = NULL;
   8.279 -	if (rb_prev)
   8.280 -		*pprev = rb_entry(rb_prev, struct vm_area_struct, vm_rb);
   8.281 -	*rb_link = __rb_link;
   8.282 -	*rb_parent = __rb_parent;
   8.283 -	return vma;
   8.284 -}
   8.285 -
   8.286 -static inline void __vma_link_list(struct mm_struct * mm, struct vm_area_struct * vma, struct vm_area_struct * prev,
   8.287 -				   rb_node_t * rb_parent)
   8.288 -{
   8.289 -	if (prev) {
   8.290 -		vma->vm_next = prev->vm_next;
   8.291 -		prev->vm_next = vma;
   8.292 -	} else {
   8.293 -		mm->mmap = vma;
   8.294 -		if (rb_parent)
   8.295 -			vma->vm_next = rb_entry(rb_parent, struct vm_area_struct, vm_rb);
   8.296 -		else
   8.297 -			vma->vm_next = NULL;
   8.298 -	}
   8.299 -}
   8.300 -
   8.301 -static inline void __vma_link_rb(struct mm_struct * mm, struct vm_area_struct * vma,
   8.302 -				 rb_node_t ** rb_link, rb_node_t * rb_parent)
   8.303 -{
   8.304 -	rb_link_node(&vma->vm_rb, rb_parent, rb_link);
   8.305 -	rb_insert_color(&vma->vm_rb, &mm->mm_rb);
   8.306 -}
   8.307 -
   8.308 -static inline void __vma_link_file(struct vm_area_struct * vma)
   8.309 -{
   8.310 -	struct file * file;
   8.311 -
   8.312 -	file = vma->vm_file;
   8.313 -	if (file) {
   8.314 -		struct inode * inode = file->f_dentry->d_inode;
   8.315 -		struct address_space *mapping = inode->i_mapping;
   8.316 -		struct vm_area_struct **head;
   8.317 -
   8.318 -		if (vma->vm_flags & VM_DENYWRITE)
   8.319 -			atomic_dec(&inode->i_writecount);
   8.320 -
   8.321 -		head = &mapping->i_mmap;
   8.322 -		if (vma->vm_flags & VM_SHARED)
   8.323 -			head = &mapping->i_mmap_shared;
   8.324 -      
   8.325 -		/* insert vma into inode's share list */
   8.326 -		if((vma->vm_next_share = *head) != NULL)
   8.327 -			(*head)->vm_pprev_share = &vma->vm_next_share;
   8.328 -		*head = vma;
   8.329 -		vma->vm_pprev_share = head;
   8.330 -	}
   8.331 -}
   8.332 -
   8.333 -static void __vma_link(struct mm_struct * mm, struct vm_area_struct * vma,  struct vm_area_struct * prev,
   8.334 -		       rb_node_t ** rb_link, rb_node_t * rb_parent)
   8.335 -{
   8.336 -	__vma_link_list(mm, vma, prev, rb_parent);
   8.337 -	__vma_link_rb(mm, vma, rb_link, rb_parent);
   8.338 -	__vma_link_file(vma);
   8.339 -}
   8.340 -
   8.341 -static inline void vma_link(struct mm_struct * mm, struct vm_area_struct * vma, struct vm_area_struct * prev,
   8.342 -			    rb_node_t ** rb_link, rb_node_t * rb_parent)
   8.343 -{
   8.344 -	lock_vma_mappings(vma);
   8.345 -	spin_lock(&mm->page_table_lock);
   8.346 -	__vma_link(mm, vma, prev, rb_link, rb_parent);
   8.347 -	spin_unlock(&mm->page_table_lock);
   8.348 -	unlock_vma_mappings(vma);
   8.349 -
   8.350 -	mm->map_count++;
   8.351 -	validate_mm(mm);
   8.352 -}
   8.353 -
   8.354 -static int vma_merge(struct mm_struct * mm, struct vm_area_struct * prev,
   8.355 -		     rb_node_t * rb_parent, unsigned long addr, unsigned long end, unsigned long vm_flags)
   8.356 -{
   8.357 -	spinlock_t * lock = &mm->page_table_lock;
   8.358 -	if (!prev) {
   8.359 -		prev = rb_entry(rb_parent, struct vm_area_struct, vm_rb);
   8.360 -		goto merge_next;
   8.361 -	}
   8.362 -	if (prev->vm_end == addr && can_vma_merge(prev, vm_flags)) {
   8.363 -		struct vm_area_struct * next;
   8.364 -
   8.365 -		spin_lock(lock);
   8.366 -		prev->vm_end = end;
   8.367 -		next = prev->vm_next;
   8.368 -		if (next && prev->vm_end == next->vm_start && can_vma_merge(next, vm_flags)) {
   8.369 -			prev->vm_end = next->vm_end;
   8.370 -			__vma_unlink(mm, next, prev);
   8.371 -			spin_unlock(lock);
   8.372 -
   8.373 -			mm->map_count--;
   8.374 -			kmem_cache_free(vm_area_cachep, next);
   8.375 -			return 1;
   8.376 -		}
   8.377 -		spin_unlock(lock);
   8.378 -		return 1;
   8.379 -	}
   8.380 -
   8.381 -	prev = prev->vm_next;
   8.382 -	if (prev) {
   8.383 - merge_next:
   8.384 -		if (!can_vma_merge(prev, vm_flags))
   8.385 -			return 0;
   8.386 -		if (end == prev->vm_start) {
   8.387 -			spin_lock(lock);
   8.388 -			prev->vm_start = addr;
   8.389 -			spin_unlock(lock);
   8.390 -			return 1;
   8.391 -		}
   8.392 -	}
   8.393 -
   8.394 -	return 0;
   8.395 -}
   8.396 -
   8.397 -unsigned long do_mmap_pgoff(struct file * file, unsigned long addr, unsigned long len,
   8.398 -	unsigned long prot, unsigned long flags, unsigned long pgoff)
   8.399 -{
   8.400 -	struct mm_struct * mm = current->mm;
   8.401 -	struct vm_area_struct * vma, * prev;
   8.402 -	unsigned int vm_flags;
   8.403 -	int correct_wcount = 0;
   8.404 -	int error;
   8.405 -	rb_node_t ** rb_link, * rb_parent;
   8.406 -
   8.407 -	if (file) {
   8.408 -		if (!file->f_op || !file->f_op->mmap)
   8.409 -			return -ENODEV;
   8.410 -
   8.411 -		if ((prot & PROT_EXEC) && (file->f_vfsmnt->mnt_flags & MNT_NOEXEC))
   8.412 -			return -EPERM;
   8.413 -	}
   8.414 -
   8.415 -	if (!len)
   8.416 -		return addr;
   8.417 -
   8.418 -	len = PAGE_ALIGN(len);
   8.419 -
   8.420 -	if (len > TASK_SIZE || len == 0)
   8.421 -		return -EINVAL;
   8.422 -
   8.423 -	/* offset overflow? */
   8.424 -	if ((pgoff + (len >> PAGE_SHIFT)) < pgoff)
   8.425 -		return -EINVAL;
   8.426 -
   8.427 -	/* Too many mappings? */
   8.428 -	if (mm->map_count > max_map_count)
   8.429 -		return -ENOMEM;
   8.430 -
   8.431 -	/* Obtain the address to map to. we verify (or select) it and ensure
   8.432 -	 * that it represents a valid section of the address space.
   8.433 -	 */
   8.434 -	addr = get_unmapped_area(file, addr, len, pgoff, flags);
   8.435 -	if (addr & ~PAGE_MASK)
   8.436 -		return addr;
   8.437 -
   8.438 -	/* Do simple checking here so the lower-level routines won't have
   8.439 -	 * to. we assume access permissions have been handled by the open
   8.440 -	 * of the memory object, so we don't do any here.
   8.441 -	 */
   8.442 -	vm_flags = calc_vm_flags(prot,flags) | mm->def_flags | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC;
   8.443 -
   8.444 -	/* mlock MCL_FUTURE? */
   8.445 -	if (vm_flags & VM_LOCKED) {
   8.446 -		unsigned long locked = mm->locked_vm << PAGE_SHIFT;
   8.447 -		locked += len;
   8.448 -		if (locked > current->rlim[RLIMIT_MEMLOCK].rlim_cur)
   8.449 -			return -EAGAIN;
   8.450 -	}
   8.451 -
   8.452 -	if (file) {
   8.453 -		switch (flags & MAP_TYPE) {
   8.454 -		case MAP_SHARED:
   8.455 -			if ((prot & PROT_WRITE) && !(file->f_mode & FMODE_WRITE))
   8.456 -				return -EACCES;
   8.457 -
   8.458 -			/* Make sure we don't allow writing to an append-only file.. */
   8.459 -			if (IS_APPEND(file->f_dentry->d_inode) && (file->f_mode & FMODE_WRITE))
   8.460 -				return -EACCES;
   8.461 -
   8.462 -			/* make sure there are no mandatory locks on the file. */
   8.463 -			if (locks_verify_locked(file->f_dentry->d_inode))
   8.464 -				return -EAGAIN;
   8.465 -
   8.466 -			vm_flags |= VM_SHARED | VM_MAYSHARE;
   8.467 -			if (!(file->f_mode & FMODE_WRITE))
   8.468 -				vm_flags &= ~(VM_MAYWRITE | VM_SHARED);
   8.469 -
   8.470 -			/* fall through */
   8.471 -		case MAP_PRIVATE:
   8.472 -			if (!(file->f_mode & FMODE_READ))
   8.473 -				return -EACCES;
   8.474 -			break;
   8.475 -
   8.476 -		default:
   8.477 -			return -EINVAL;
   8.478 -		}
   8.479 -	} else {
   8.480 -		vm_flags |= VM_SHARED | VM_MAYSHARE;
   8.481 -		switch (flags & MAP_TYPE) {
   8.482 -		default:
   8.483 -			return -EINVAL;
   8.484 -		case MAP_PRIVATE:
   8.485 -			vm_flags &= ~(VM_SHARED | VM_MAYSHARE);
   8.486 -			/* fall through */
   8.487 -		case MAP_SHARED:
   8.488 -			break;
   8.489 -		}
   8.490 -	}
   8.491 -
   8.492 -	/* Clear old maps */
   8.493 -munmap_back:
   8.494 -	vma = find_vma_prepare(mm, addr, &prev, &rb_link, &rb_parent);
   8.495 -	if (vma && vma->vm_start < addr + len) {
   8.496 -		if (do_munmap(mm, addr, len))
   8.497 -			return -ENOMEM;
   8.498 -		goto munmap_back;
   8.499 -	}
   8.500 -
   8.501 -	/* Check against address space limit. */
   8.502 -	if ((mm->total_vm << PAGE_SHIFT) + len
   8.503 -	    > current->rlim[RLIMIT_AS].rlim_cur)
   8.504 -		return -ENOMEM;
   8.505 -
   8.506 -	/* Private writable mapping? Check memory availability.. */
   8.507 -	if ((vm_flags & (VM_SHARED | VM_WRITE)) == VM_WRITE &&
   8.508 -	    !(flags & MAP_NORESERVE)				 &&
   8.509 -	    !vm_enough_memory(len >> PAGE_SHIFT))
   8.510 -		return -ENOMEM;
   8.511 -
   8.512 -	/* Can we just expand an old anonymous mapping? */
   8.513 -	if (!file && !(vm_flags & VM_SHARED) && rb_parent)
   8.514 -		if (vma_merge(mm, prev, rb_parent, addr, addr + len, vm_flags))
   8.515 -			goto out;
   8.516 -
   8.517 -	/* Determine the object being mapped and call the appropriate
   8.518 -	 * specific mapper. the address has already been validated, but
   8.519 -	 * not unmapped, but the maps are removed from the list.
   8.520 -	 */
   8.521 -	vma = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
   8.522 -	if (!vma)
   8.523 -		return -ENOMEM;
   8.524 -
   8.525 -	vma->vm_mm = mm;
   8.526 -	vma->vm_start = addr;
   8.527 -	vma->vm_end = addr + len;
   8.528 -	vma->vm_flags = vm_flags;
   8.529 -	vma->vm_page_prot = protection_map[vm_flags & 0x0f];
   8.530 -	vma->vm_ops = NULL;
   8.531 -	vma->vm_pgoff = pgoff;
   8.532 -	vma->vm_file = NULL;
   8.533 -	vma->vm_private_data = NULL;
   8.534 -	vma->vm_raend = 0;
   8.535 -
   8.536 -	if (file) {
   8.537 -		error = -EINVAL;
   8.538 -		if (vm_flags & (VM_GROWSDOWN|VM_GROWSUP))
   8.539 -			goto free_vma;
   8.540 -		if (vm_flags & VM_DENYWRITE) {
   8.541 -			error = deny_write_access(file);
   8.542 -			if (error)
   8.543 -				goto free_vma;
   8.544 -			correct_wcount = 1;
   8.545 -		}
   8.546 -		vma->vm_file = file;
   8.547 -		get_file(file);
   8.548 -		error = file->f_op->mmap(file, vma);
   8.549 -		if (error)
   8.550 -			goto unmap_and_free_vma;
   8.551 -	} else if (flags & MAP_SHARED) {
   8.552 -		error = shmem_zero_setup(vma);
   8.553 -		if (error)
   8.554 -			goto free_vma;
   8.555 -	}
   8.556 -
   8.557 -	/* Can addr have changed??
   8.558 -	 *
   8.559 -	 * Answer: Yes, several device drivers can do it in their
   8.560 -	 *         f_op->mmap method. -DaveM
   8.561 -	 */
   8.562 -	if (addr != vma->vm_start) {
   8.563 -		/*
   8.564 -		 * It is a bit too late to pretend changing the virtual
   8.565 -		 * area of the mapping, we just corrupted userspace
   8.566 -		 * in the do_munmap, so FIXME (not in 2.4 to avoid breaking
   8.567 -		 * the driver API).
   8.568 -		 */
   8.569 -		struct vm_area_struct * stale_vma;
   8.570 -		/* Since addr changed, we rely on the mmap op to prevent 
   8.571 -		 * collisions with existing vmas and just use find_vma_prepare 
   8.572 -		 * to update the tree pointers.
   8.573 -		 */
   8.574 -		addr = vma->vm_start;
   8.575 -		stale_vma = find_vma_prepare(mm, addr, &prev,
   8.576 -						&rb_link, &rb_parent);
   8.577 -		/*
   8.578 -		 * Make sure the lowlevel driver did its job right.
   8.579 -		 */
   8.580 -		if (unlikely(stale_vma && stale_vma->vm_start < vma->vm_end)) {
   8.581 -			printk(KERN_ERR "buggy mmap operation: [<%p>]\n",
   8.582 -				file ? file->f_op->mmap : NULL);
   8.583 -			BUG();
   8.584 -		}
   8.585 -	}
   8.586 -
   8.587 -	vma_link(mm, vma, prev, rb_link, rb_parent);
   8.588 -	if (correct_wcount)
   8.589 -		atomic_inc(&file->f_dentry->d_inode->i_writecount);
   8.590 -
   8.591 -out:	
   8.592 -	mm->total_vm += len >> PAGE_SHIFT;
   8.593 -	if (vm_flags & VM_LOCKED) {
   8.594 -		mm->locked_vm += len >> PAGE_SHIFT;
   8.595 -		make_pages_present(addr, addr + len);
   8.596 -	}
   8.597 -	return addr;
   8.598 -
   8.599 -unmap_and_free_vma:
   8.600 -	if (correct_wcount)
   8.601 -		atomic_inc(&file->f_dentry->d_inode->i_writecount);
   8.602 -	vma->vm_file = NULL;
   8.603 -	fput(file);
   8.604 -
   8.605 -	/* Undo any partial mapping done by a device driver. */
   8.606 -	zap_page_range(mm, vma->vm_start, vma->vm_end - vma->vm_start);
   8.607 -free_vma:
   8.608 -	kmem_cache_free(vm_area_cachep, vma);
   8.609 -	return error;
   8.610 -}
   8.611 -
   8.612 -/* Get an address range which is currently unmapped.
   8.613 - * For shmat() with addr=0.
   8.614 - *
   8.615 - * Ugly calling convention alert:
   8.616 - * Return value with the low bits set means error value,
   8.617 - * ie
   8.618 - *	if (ret & ~PAGE_MASK)
   8.619 - *		error = ret;
   8.620 - *
   8.621 - * This function "knows" that -ENOMEM has the bits set.
   8.622 - */
   8.623 -#ifndef HAVE_ARCH_UNMAPPED_AREA
   8.624 -static inline unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr, unsigned long len, unsigned long pgoff, unsigned long flags)
   8.625 -{
   8.626 -	struct vm_area_struct *vma;
   8.627 -
   8.628 -	if (len > TASK_SIZE)
   8.629 -		return -ENOMEM;
   8.630 -
   8.631 -	if (addr) {
   8.632 -		addr = PAGE_ALIGN(addr);
   8.633 -		vma = find_vma(current->mm, addr);
   8.634 -		if (TASK_SIZE - len >= addr &&
   8.635 -		    (!vma || addr + len <= vma->vm_start))
   8.636 -			return addr;
   8.637 -	}
   8.638 -	addr = PAGE_ALIGN(TASK_UNMAPPED_BASE);
   8.639 -
   8.640 -	for (vma = find_vma(current->mm, addr); ; vma = vma->vm_next) {
   8.641 -		/* At this point:  (!vma || addr < vma->vm_end). */
   8.642 -		if (TASK_SIZE - len < addr)
   8.643 -			return -ENOMEM;
   8.644 -		if (!vma || addr + len <= vma->vm_start)
   8.645 -			return addr;
   8.646 -		addr = vma->vm_end;
   8.647 -	}
   8.648 -}
   8.649 -#else
   8.650 -extern unsigned long arch_get_unmapped_area(struct file *, unsigned long, unsigned long, unsigned long, unsigned long);
   8.651 -#endif	
   8.652 -
   8.653 -#ifndef HAVE_ARCH_CHECK_FIXED_MAPPING
   8.654 -#define arch_check_fixed_mapping(_file,_addr,_len,_pgoff,_flags) 0
   8.655 -#else
   8.656 -extern unsigned long
   8.657 -arch_check_fixed_mapping(struct file *, unsigned long, unsigned long,
   8.658 -			unsigned long, unsigned long);
   8.659 -#endif
   8.660 -
   8.661 -unsigned long get_unmapped_area(struct file *file, unsigned long addr, unsigned long len, unsigned long pgoff, unsigned long flags)
   8.662 -{
   8.663 -	unsigned long ret;
   8.664 -
   8.665 -	if (flags & MAP_FIXED) {
   8.666 -		if (addr > TASK_SIZE - len)
   8.667 -			return -ENOMEM;
   8.668 -		if (addr & ~PAGE_MASK)
   8.669 -			return -EINVAL;
   8.670 -		ret = arch_check_fixed_mapping(file, addr, len, pgoff, flags);
   8.671 -		if (ret != 0)
   8.672 -			return ret;
   8.673 -		return addr;
   8.674 -	}
   8.675 -
   8.676 -	if (file && file->f_op && file->f_op->get_unmapped_area)
   8.677 -		return file->f_op->get_unmapped_area(file, addr, len, pgoff, flags);
   8.678 -
   8.679 -	return arch_get_unmapped_area(file, addr, len, pgoff, flags);
   8.680 -}
   8.681 -
   8.682 -/* Look up the first VMA which satisfies  addr < vm_end,  NULL if none. */
   8.683 -struct vm_area_struct * find_vma(struct mm_struct * mm, unsigned long addr)
   8.684 -{
   8.685 -	struct vm_area_struct *vma = NULL;
   8.686 -
   8.687 -	if (mm) {
   8.688 -		/* Check the cache first. */
   8.689 -		/* (Cache hit rate is typically around 35%.) */
   8.690 -		vma = mm->mmap_cache;
   8.691 -		if (!(vma && vma->vm_end > addr && vma->vm_start <= addr)) {
   8.692 -			rb_node_t * rb_node;
   8.693 -
   8.694 -			rb_node = mm->mm_rb.rb_node;
   8.695 -			vma = NULL;
   8.696 -
   8.697 -			while (rb_node) {
   8.698 -				struct vm_area_struct * vma_tmp;
   8.699 -
   8.700 -				vma_tmp = rb_entry(rb_node, struct vm_area_struct, vm_rb);
   8.701 -
   8.702 -				if (vma_tmp->vm_end > addr) {
   8.703 -					vma = vma_tmp;
   8.704 -					if (vma_tmp->vm_start <= addr)
   8.705 -						break;
   8.706 -					rb_node = rb_node->rb_left;
   8.707 -				} else
   8.708 -					rb_node = rb_node->rb_right;
   8.709 -			}
   8.710 -			if (vma)
   8.711 -				mm->mmap_cache = vma;
   8.712 -		}
   8.713 -	}
   8.714 -	return vma;
   8.715 -}
   8.716 -
   8.717 -/* Same as find_vma, but also return a pointer to the previous VMA in *pprev. */
   8.718 -struct vm_area_struct * find_vma_prev(struct mm_struct * mm, unsigned long addr,
   8.719 -				      struct vm_area_struct **pprev)
   8.720 -{
   8.721 -	if (mm) {
   8.722 -		/* Go through the RB tree quickly. */
   8.723 -		struct vm_area_struct * vma;
   8.724 -		rb_node_t * rb_node, * rb_last_right, * rb_prev;
   8.725 -		
   8.726 -		rb_node = mm->mm_rb.rb_node;
   8.727 -		rb_last_right = rb_prev = NULL;
   8.728 -		vma = NULL;
   8.729 -
   8.730 -		while (rb_node) {
   8.731 -			struct vm_area_struct * vma_tmp;
   8.732 -
   8.733 -			vma_tmp = rb_entry(rb_node, struct vm_area_struct, vm_rb);
   8.734 -
   8.735 -			if (vma_tmp->vm_end > addr) {
   8.736 -				vma = vma_tmp;
   8.737 -				rb_prev = rb_last_right;
   8.738 -				if (vma_tmp->vm_start <= addr)
   8.739 -					break;
   8.740 -				rb_node = rb_node->rb_left;
   8.741 -			} else {
   8.742 -				rb_last_right = rb_node;
   8.743 -				rb_node = rb_node->rb_right;
   8.744 -			}
   8.745 -		}
   8.746 -		if (vma) {
   8.747 -			if (vma->vm_rb.rb_left) {
   8.748 -				rb_prev = vma->vm_rb.rb_left;
   8.749 -				while (rb_prev->rb_right)
   8.750 -					rb_prev = rb_prev->rb_right;
   8.751 -			}
   8.752 -			*pprev = NULL;
   8.753 -			if (rb_prev)
   8.754 -				*pprev = rb_entry(rb_prev, struct vm_area_struct, vm_rb);
   8.755 -			if ((rb_prev ? (*pprev)->vm_next : mm->mmap) != vma)
   8.756 -				BUG();
   8.757 -			return vma;
   8.758 -		}
   8.759 -	}
   8.760 -	*pprev = NULL;
   8.761 -	return NULL;
   8.762 -}
   8.763 -
   8.764 -struct vm_area_struct * find_extend_vma(struct mm_struct * mm, unsigned long addr)
   8.765 -{
   8.766 -	struct vm_area_struct * vma;
   8.767 -	unsigned long start;
   8.768 -
   8.769 -	addr &= PAGE_MASK;
   8.770 -	vma = find_vma(mm,addr);
   8.771 -	if (!vma)
   8.772 -		return NULL;
   8.773 -	if (vma->vm_start <= addr)
   8.774 -		return vma;
   8.775 -	if (!(vma->vm_flags & VM_GROWSDOWN))
   8.776 -		return NULL;
   8.777 -	start = vma->vm_start;
   8.778 -	if (expand_stack(vma, addr))
   8.779 -		return NULL;
   8.780 -	if (vma->vm_flags & VM_LOCKED) {
   8.781 -		make_pages_present(addr, start);
   8.782 -	}
   8.783 -	return vma;
   8.784 -}
   8.785 -
   8.786 -/* Normal function to fix up a mapping
   8.787 - * This function is the default for when an area has no specific
   8.788 - * function.  This may be used as part of a more specific routine.
   8.789 - * This function works out what part of an area is affected and
   8.790 - * adjusts the mapping information.  Since the actual page
   8.791 - * manipulation is done in do_mmap(), none need be done here,
   8.792 - * though it would probably be more appropriate.
   8.793 - *
   8.794 - * By the time this function is called, the area struct has been
   8.795 - * removed from the process mapping list, so it needs to be
   8.796 - * reinserted if necessary.
   8.797 - *
   8.798 - * The 4 main cases are:
   8.799 - *    Unmapping the whole area
   8.800 - *    Unmapping from the start of the segment to a point in it
   8.801 - *    Unmapping from an intermediate point to the end
   8.802 - *    Unmapping between to intermediate points, making a hole.
   8.803 - *
   8.804 - * Case 4 involves the creation of 2 new areas, for each side of
   8.805 - * the hole.  If possible, we reuse the existing area rather than
   8.806 - * allocate a new one, and the return indicates whether the old
   8.807 - * area was reused.
   8.808 - */
   8.809 -static struct vm_area_struct * unmap_fixup(struct mm_struct *mm, 
   8.810 -	struct vm_area_struct *area, unsigned long addr, size_t len, 
   8.811 -	struct vm_area_struct *extra)
   8.812 -{
   8.813 -	struct vm_area_struct *mpnt;
   8.814 -	unsigned long end = addr + len;
   8.815 -
   8.816 -	area->vm_mm->total_vm -= len >> PAGE_SHIFT;
   8.817 -	if (area->vm_flags & VM_LOCKED)
   8.818 -		area->vm_mm->locked_vm -= len >> PAGE_SHIFT;
   8.819 -
   8.820 -	/* Unmapping the whole area. */
   8.821 -	if (addr == area->vm_start && end == area->vm_end) {
   8.822 -		if (area->vm_ops && area->vm_ops->close)
   8.823 -			area->vm_ops->close(area);
   8.824 -		if (area->vm_file)
   8.825 -			fput(area->vm_file);
   8.826 -		kmem_cache_free(vm_area_cachep, area);
   8.827 -		return extra;
   8.828 -	}
   8.829 -
   8.830 -	/* Work out to one of the ends. */
   8.831 -	if (end == area->vm_end) {
   8.832 -		/*
   8.833 -		 * here area isn't visible to the semaphore-less readers
   8.834 -		 * so we don't need to update it under the spinlock.
   8.835 -		 */
   8.836 -		area->vm_end = addr;
   8.837 -		lock_vma_mappings(area);
   8.838 -		spin_lock(&mm->page_table_lock);
   8.839 -	} else if (addr == area->vm_start) {
   8.840 -		area->vm_pgoff += (end - area->vm_start) >> PAGE_SHIFT;
   8.841 -		/* same locking considerations of the above case */
   8.842 -		area->vm_start = end;
   8.843 -		lock_vma_mappings(area);
   8.844 -		spin_lock(&mm->page_table_lock);
   8.845 -	} else {
   8.846 -	/* Unmapping a hole: area->vm_start < addr <= end < area->vm_end */
   8.847 -		/* Add end mapping -- leave beginning for below */
   8.848 -		mpnt = extra;
   8.849 -		extra = NULL;
   8.850 -
   8.851 -		mpnt->vm_mm = area->vm_mm;
   8.852 -		mpnt->vm_start = end;
   8.853 -		mpnt->vm_end = area->vm_end;
   8.854 -		mpnt->vm_page_prot = area->vm_page_prot;
   8.855 -		mpnt->vm_flags = area->vm_flags;
   8.856 -		mpnt->vm_raend = 0;
   8.857 -		mpnt->vm_ops = area->vm_ops;
   8.858 -		mpnt->vm_pgoff = area->vm_pgoff + ((end - area->vm_start) >> PAGE_SHIFT);
   8.859 -		mpnt->vm_file = area->vm_file;
   8.860 -		mpnt->vm_private_data = area->vm_private_data;
   8.861 -		if (mpnt->vm_file)
   8.862 -			get_file(mpnt->vm_file);
   8.863 -		if (mpnt->vm_ops && mpnt->vm_ops->open)
   8.864 -			mpnt->vm_ops->open(mpnt);
   8.865 -		area->vm_end = addr;	/* Truncate area */
   8.866 -
   8.867 -		/* Because mpnt->vm_file == area->vm_file this locks
   8.868 -		 * things correctly.
   8.869 -		 */
   8.870 -		lock_vma_mappings(area);
   8.871 -		spin_lock(&mm->page_table_lock);
   8.872 -		__insert_vm_struct(mm, mpnt);
   8.873 -	}
   8.874 -
   8.875 -	__insert_vm_struct(mm, area);
   8.876 -	spin_unlock(&mm->page_table_lock);
   8.877 -	unlock_vma_mappings(area);
   8.878 -	return extra;
   8.879 -}
   8.880 -
   8.881 -/*
   8.882 - * Try to free as many page directory entries as we can,
   8.883 - * without having to work very hard at actually scanning
   8.884 - * the page tables themselves.
   8.885 - *
   8.886 - * Right now we try to free page tables if we have a nice
   8.887 - * PGDIR-aligned area that got free'd up. We could be more
   8.888 - * granular if we want to, but this is fast and simple,
   8.889 - * and covers the bad cases.
   8.890 - *
   8.891 - * "prev", if it exists, points to a vma before the one
   8.892 - * we just free'd - but there's no telling how much before.
   8.893 - */
   8.894 -static void free_pgtables(struct mm_struct * mm, struct vm_area_struct *prev,
   8.895 -	unsigned long start, unsigned long end)
   8.896 -{
   8.897 -	unsigned long first = start & PGDIR_MASK;
   8.898 -	unsigned long last = end + PGDIR_SIZE - 1;
   8.899 -	unsigned long start_index, end_index;
   8.900 -
   8.901 -	if (!prev) {
   8.902 -		prev = mm->mmap;
   8.903 -		if (!prev)
   8.904 -			goto no_mmaps;
   8.905 -		if (prev->vm_end > start) {
   8.906 -			if (last > prev->vm_start)
   8.907 -				last = prev->vm_start;
   8.908 -			goto no_mmaps;
   8.909 -		}
   8.910 -	}
   8.911 -	for (;;) {
   8.912 -		struct vm_area_struct *next = prev->vm_next;
   8.913 -
   8.914 -		if (next) {
   8.915 -			if (next->vm_start < start) {
   8.916 -				prev = next;
   8.917 -				continue;
   8.918 -			}
   8.919 -			if (last > next->vm_start)
   8.920 -				last = next->vm_start;
   8.921 -		}
   8.922 -		if (prev->vm_end > first)
   8.923 -			first = prev->vm_end + PGDIR_SIZE - 1;
   8.924 -		break;
   8.925 -	}
   8.926 -no_mmaps:
   8.927 -	if (last < first)
   8.928 -		return;
   8.929 -	/*
   8.930 -	 * If the PGD bits are not consecutive in the virtual address, the
   8.931 -	 * old method of shifting the VA >> by PGDIR_SHIFT doesn't work.
   8.932 -	 */
   8.933 -	start_index = pgd_index(first);
   8.934 -	end_index = pgd_index(last);
   8.935 -	if (end_index > start_index) {
   8.936 -		clear_page_tables(mm, start_index, end_index - start_index);
   8.937 -		flush_tlb_pgtables(mm, first & PGDIR_MASK, last & PGDIR_MASK);
   8.938 -	}
   8.939 -}
   8.940 -
   8.941 -/* Munmap is split into 2 main parts -- this part which finds
   8.942 - * what needs doing, and the areas themselves, which do the
   8.943 - * work.  This now handles partial unmappings.
   8.944 - * Jeremy Fitzhardine <jeremy@sw.oz.au>
   8.945 - */
   8.946 -int do_munmap(struct mm_struct *mm, unsigned long addr, size_t len)
   8.947 -{
   8.948 -	struct vm_area_struct *mpnt, *prev, **npp, *free, *extra;
   8.949 -
   8.950 -	if ((addr & ~PAGE_MASK) || addr > TASK_SIZE || len > TASK_SIZE-addr)
   8.951 -		return -EINVAL;
   8.952 -
   8.953 -	if ((len = PAGE_ALIGN(len)) == 0)
   8.954 -		return -EINVAL;
   8.955 -
   8.956 -	/* Check if this memory area is ok - put it on the temporary
   8.957 -	 * list if so..  The checks here are pretty simple --
   8.958 -	 * every area affected in some way (by any overlap) is put
   8.959 -	 * on the list.  If nothing is put on, nothing is affected.
   8.960 -	 */
   8.961 -	mpnt = find_vma_prev(mm, addr, &prev);
   8.962 -	if (!mpnt)
   8.963 -		return 0;
   8.964 -	/* we have  addr < mpnt->vm_end  */
   8.965 -
   8.966 -	if (mpnt->vm_start >= addr+len)
   8.967 -		return 0;
   8.968 -
   8.969 -	/* If we'll make "hole", check the vm areas limit */
   8.970 -	if ((mpnt->vm_start < addr && mpnt->vm_end > addr+len)
   8.971 -	    && mm->map_count >= max_map_count)
   8.972 -		return -ENOMEM;
   8.973 -
   8.974 -	/*
   8.975 -	 * We may need one additional vma to fix up the mappings ... 
   8.976 -	 * and this is the last chance for an easy error exit.
   8.977 -	 */
   8.978 -	extra = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
   8.979 -	if (!extra)
   8.980 -		return -ENOMEM;
   8.981 -
   8.982 -	npp = (prev ? &prev->vm_next : &mm->mmap);
   8.983 -	free = NULL;
   8.984 -	spin_lock(&mm->page_table_lock);
   8.985 -	for ( ; mpnt && mpnt->vm_start < addr+len; mpnt = *npp) {
   8.986 -		*npp = mpnt->vm_next;
   8.987 -		mpnt->vm_next = free;
   8.988 -		free = mpnt;
   8.989 -		rb_erase(&mpnt->vm_rb, &mm->mm_rb);
   8.990 -	}
   8.991 -	mm->mmap_cache = NULL;	/* Kill the cache. */
   8.992 -	spin_unlock(&mm->page_table_lock);
   8.993 -
   8.994 -	/* Ok - we have the memory areas we should free on the 'free' list,
   8.995 -	 * so release them, and unmap the page range..
   8.996 -	 * If the one of the segments is only being partially unmapped,
   8.997 -	 * it will put new vm_area_struct(s) into the address space.
   8.998 -	 * In that case we have to be careful with VM_DENYWRITE.
   8.999 -	 */
  8.1000 -	while ((mpnt = free) != NULL) {
  8.1001 -		unsigned long st, end, size;
  8.1002 -		struct file *file = NULL;
  8.1003 -
  8.1004 -		free = free->vm_next;
  8.1005 -
  8.1006 -		st = addr < mpnt->vm_start ? mpnt->vm_start : addr;
  8.1007 -		end = addr+len;
  8.1008 -		end = end > mpnt->vm_end ? mpnt->vm_end : end;
  8.1009 -		size = end - st;
  8.1010 -
  8.1011 -		if (mpnt->vm_flags & VM_DENYWRITE &&
  8.1012 -		    (st != mpnt->vm_start || end != mpnt->vm_end) &&
  8.1013 -		    (file = mpnt->vm_file) != NULL) {
  8.1014 -			atomic_dec(&file->f_dentry->d_inode->i_writecount);
  8.1015 -		}
  8.1016 -		remove_shared_vm_struct(mpnt);
  8.1017 -		mm->map_count--;
  8.1018 -
  8.1019 -		zap_page_range(mm, st, size);
  8.1020 -
  8.1021 -		/*
  8.1022 -		 * Fix the mapping, and free the old area if it wasn't reused.
  8.1023 -		 */
  8.1024 -		extra = unmap_fixup(mm, mpnt, st, size, extra);
  8.1025 -		if (file)
  8.1026 -			atomic_inc(&file->f_dentry->d_inode->i_writecount);
  8.1027 -	}
  8.1028 -	validate_mm(mm);
  8.1029 -
  8.1030 -	/* Release the extra vma struct if it wasn't used */
  8.1031 -	if (extra)
  8.1032 -		kmem_cache_free(vm_area_cachep, extra);
  8.1033 -
  8.1034 -	free_pgtables(mm, prev, addr, addr+len);
  8.1035 -
  8.1036 -	return 0;
  8.1037 -}
  8.1038 -
  8.1039 -asmlinkage long sys_munmap(unsigned long addr, size_t len)
  8.1040 -{
  8.1041 -	int ret;
  8.1042 -	struct mm_struct *mm = current->mm;
  8.1043 -
  8.1044 -	down_write(&mm->mmap_sem);
  8.1045 -	ret = do_munmap(mm, addr, len);
  8.1046 -	up_write(&mm->mmap_sem);
  8.1047 -	return ret;
  8.1048 -}
  8.1049 -
  8.1050 -/*
  8.1051 - *  this is really a simplified "do_mmap".  it only handles
  8.1052 - *  anonymous maps.  eventually we may be able to do some
  8.1053 - *  brk-specific accounting here.
  8.1054 - */
  8.1055 -unsigned long do_brk(unsigned long addr, unsigned long len)
  8.1056 -{
  8.1057 -	struct mm_struct * mm = current->mm;
  8.1058 -	struct vm_area_struct * vma, * prev;
  8.1059 -	unsigned long flags;
  8.1060 -	rb_node_t ** rb_link, * rb_parent;
  8.1061 -
  8.1062 -	len = PAGE_ALIGN(len);
  8.1063 -	if (!len)
  8.1064 -		return addr;
  8.1065 -
  8.1066 -	if ((addr + len) > TASK_SIZE || (addr + len) < addr)
  8.1067 -		return -EINVAL;
  8.1068 -
  8.1069 -	/*
  8.1070 -	 * mlock MCL_FUTURE?
  8.1071 -	 */
  8.1072 -	if (mm->def_flags & VM_LOCKED) {
  8.1073 -		unsigned long locked = mm->locked_vm << PAGE_SHIFT;
  8.1074 -		locked += len;
  8.1075 -		if (locked > current->rlim[RLIMIT_MEMLOCK].rlim_cur)
  8.1076 -			return -EAGAIN;
  8.1077 -	}
  8.1078 -
  8.1079 -	/*
  8.1080 -	 * Clear old maps.  this also does some error checking for us
  8.1081 -	 */
  8.1082 - munmap_back:
  8.1083 -	vma = find_vma_prepare(mm, addr, &prev, &rb_link, &rb_parent);
  8.1084 -	if (vma && vma->vm_start < addr + len) {
  8.1085 -		if (do_munmap(mm, addr, len))
  8.1086 -			return -ENOMEM;
  8.1087 -		goto munmap_back;
  8.1088 -	}
  8.1089 -
  8.1090 -	/* Check against address space limits *after* clearing old maps... */
  8.1091 -	if ((mm->total_vm << PAGE_SHIFT) + len
  8.1092 -	    > current->rlim[RLIMIT_AS].rlim_cur)
  8.1093 -		return -ENOMEM;
  8.1094 -
  8.1095 -	if (mm->map_count > max_map_count)
  8.1096 -		return -ENOMEM;
  8.1097 -
  8.1098 -	if (!vm_enough_memory(len >> PAGE_SHIFT))
  8.1099 -		return -ENOMEM;
  8.1100 -
  8.1101 -	flags = VM_DATA_DEFAULT_FLAGS | mm->def_flags;
  8.1102 -
  8.1103 -	/* Can we just expand an old anonymous mapping? */
  8.1104 -	if (rb_parent && vma_merge(mm, prev, rb_parent, addr, addr + len, flags))
  8.1105 -		goto out;
  8.1106 -
  8.1107 -	/*
  8.1108 -	 * create a vma struct for an anonymous mapping
  8.1109 -	 */
  8.1110 -	vma = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
  8.1111 -	if (!vma)
  8.1112 -		return -ENOMEM;
  8.1113 -
  8.1114 -	vma->vm_mm = mm;
  8.1115 -	vma->vm_start = addr;
  8.1116 -	vma->vm_end = addr + len;
  8.1117 -	vma->vm_flags = flags;
  8.1118 -	vma->vm_page_prot = protection_map[flags & 0x0f];
  8.1119 -	vma->vm_ops = NULL;
  8.1120 -	vma->vm_pgoff = 0;
  8.1121 -	vma->vm_file = NULL;
  8.1122 -	vma->vm_private_data = NULL;
  8.1123 -
  8.1124 -	vma_link(mm, vma, prev, rb_link, rb_parent);
  8.1125 -
  8.1126 -out:
  8.1127 -	mm->total_vm += len >> PAGE_SHIFT;
  8.1128 -	if (flags & VM_LOCKED) {
  8.1129 -		mm->locked_vm += len >> PAGE_SHIFT;
  8.1130 -		make_pages_present(addr, addr + len);
  8.1131 -	}
  8.1132 -	return addr;
  8.1133 -}
  8.1134 -
  8.1135 -/* Build the RB tree corresponding to the VMA list. */
  8.1136 -void build_mmap_rb(struct mm_struct * mm)
  8.1137 -{
  8.1138 -	struct vm_area_struct * vma;
  8.1139 -	rb_node_t ** rb_link, * rb_parent;
  8.1140 -
  8.1141 -	mm->mm_rb = RB_ROOT;
  8.1142 -	rb_link = &mm->mm_rb.rb_node;
  8.1143 -	rb_parent = NULL;
  8.1144 -	for (vma = mm->mmap; vma; vma = vma->vm_next) {
  8.1145 -		__vma_link_rb(mm, vma, rb_link, rb_parent);
  8.1146 -		rb_parent = &vma->vm_rb;
  8.1147 -		rb_link = &rb_parent->rb_right;
  8.1148 -	}
  8.1149 -}
  8.1150 -
  8.1151 -/* Release all mmaps. */
  8.1152 -void exit_mmap(struct mm_struct * mm)
  8.1153 -{
  8.1154 -	struct vm_area_struct * mpnt;
  8.1155 -
  8.1156 -	release_segments(mm);
  8.1157 -	spin_lock(&mm->page_table_lock);
  8.1158 -	mpnt = mm->mmap;
  8.1159 -	mm->mmap = mm->mmap_cache = NULL;
  8.1160 -	mm->mm_rb = RB_ROOT;
  8.1161 -	mm->rss = 0;
  8.1162 -	spin_unlock(&mm->page_table_lock);
  8.1163 -	mm->total_vm = 0;
  8.1164 -	mm->locked_vm = 0;
  8.1165 -
  8.1166 -	flush_cache_mm(mm);
  8.1167 -	while (mpnt) {
  8.1168 -		struct vm_area_struct * next = mpnt->vm_next;
  8.1169 -		unsigned long start = mpnt->vm_start;
  8.1170 -		unsigned long end = mpnt->vm_end;
  8.1171 -		unsigned long size = end - start;
  8.1172 -
  8.1173 -		if (mpnt->vm_ops) {
  8.1174 -			if (mpnt->vm_ops->close)
  8.1175 -				mpnt->vm_ops->close(mpnt);
  8.1176 -		}
  8.1177 -		mm->map_count--;
  8.1178 -		remove_shared_vm_struct(mpnt);
  8.1179 -		zap_page_range(mm, start, size);
  8.1180 -		if (mpnt->vm_file)
  8.1181 -			fput(mpnt->vm_file);
  8.1182 -		kmem_cache_free(vm_area_cachep, mpnt);
  8.1183 -		mpnt = next;
  8.1184 -	}
  8.1185 -
  8.1186 -	/* This is just debugging */
  8.1187 -	if (mm->map_count)
  8.1188 -		BUG();
  8.1189 -
  8.1190 -	clear_page_tables(mm, FIRST_USER_PGD_NR, USER_PTRS_PER_PGD);
  8.1191 -
  8.1192 -	flush_tlb_mm(mm);
  8.1193 -}
  8.1194 -
  8.1195 -/* Insert vm structure into process list sorted by address
  8.1196 - * and into the inode's i_mmap ring.  If vm_file is non-NULL
  8.1197 - * then the i_shared_lock must be held here.
  8.1198 - */
  8.1199 -void __insert_vm_struct(struct mm_struct * mm, struct vm_area_struct * vma)
  8.1200 -{
  8.1201 -	struct vm_area_struct * __vma, * prev;
  8.1202 -	rb_node_t ** rb_link, * rb_parent;
  8.1203 -
  8.1204 -	__vma = find_vma_prepare(mm, vma->vm_start, &prev, &rb_link, &rb_parent);
  8.1205 -	if (__vma && __vma->vm_start < vma->vm_end)
  8.1206 -		BUG();
  8.1207 -	__vma_link(mm, vma, prev, rb_link, rb_parent);
  8.1208 -	mm->map_count++;
  8.1209 -	validate_mm(mm);
  8.1210 -}
  8.1211 -
  8.1212 -void insert_vm_struct(struct mm_struct * mm, struct vm_area_struct * vma)
  8.1213 -{
  8.1214 -	struct vm_area_struct * __vma, * prev;
  8.1215 -	rb_node_t ** rb_link, * rb_parent;
  8.1216 -
  8.1217 -	__vma = find_vma_prepare(mm, vma->vm_start, &prev, &rb_link, &rb_parent);
  8.1218 -	if (__vma && __vma->vm_start < vma->vm_end)
  8.1219 -		BUG();
  8.1220 -	vma_link(mm, vma, prev, rb_link, rb_parent);
  8.1221 -	validate_mm(mm);
  8.1222 -}
     9.1 --- a/linux-2.6.7-xen-sparse/arch/xen/i386/mm/Makefile	Tue Aug 17 09:35:05 2004 +0000
     9.2 +++ b/linux-2.6.7-xen-sparse/arch/xen/i386/mm/Makefile	Tue Aug 17 17:54:44 2004 +0000
     9.3 @@ -6,7 +6,7 @@ XENARCH	:= $(subst ",,$(CONFIG_XENARCH))
     9.4  
     9.5  CFLAGS	+= -Iarch/$(XENARCH)/mm
     9.6  
     9.7 -obj-y	:= init.o fault.o ioremap.o pgtable.o hypervisor.o mmap.o
     9.8 +obj-y	:= init.o fault.o ioremap.o pgtable.o hypervisor.o
     9.9  c-obj-y	:= extable.o pageattr.o 
    9.10  
    9.11  c-obj-$(CONFIG_DISCONTIGMEM)	+= discontig.o
    10.1 --- a/linux-2.6.7-xen-sparse/arch/xen/i386/mm/fault.c	Tue Aug 17 09:35:05 2004 +0000
    10.2 +++ b/linux-2.6.7-xen-sparse/arch/xen/i386/mm/fault.c	Tue Aug 17 17:54:44 2004 +0000
    10.3 @@ -248,8 +248,7 @@ asmlinkage void do_page_fault(struct pt_
    10.4  	 * (error_code & 4) == 0, and that the fault was not a
    10.5  	 * protection error (error_code & 1) == 0.
    10.6  	 */
    10.7 -	if (unlikely(address >= TASK_SIZE) ||
    10.8 -	    unlikely(address < (FIRST_USER_PGD_NR<<PGDIR_SHIFT))) { 
    10.9 +	if (unlikely(address >= TASK_SIZE)) { 
   10.10  		if (!(error_code & 5))
   10.11  			goto vmalloc_fault;
   10.12  		/* 
    11.1 --- a/linux-2.6.7-xen-sparse/arch/xen/i386/mm/mmap.c	Tue Aug 17 09:35:05 2004 +0000
    11.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    11.3 @@ -1,70 +0,0 @@
    11.4 -
    11.5 -#include <linux/slab.h>
    11.6 -#include <linux/version.h>
    11.7 -#include <linux/mman.h>
    11.8 -#include <linux/init.h>
    11.9 -#include <asm/pgalloc.h>
   11.10 -
   11.11 -unsigned long
   11.12 -arch_get_unmapped_area(struct file *filp, unsigned long addr,
   11.13 -		unsigned long len, unsigned long pgoff, unsigned long flags)
   11.14 -{
   11.15 -	struct mm_struct *mm = current->mm;
   11.16 -	struct vm_area_struct *vma;
   11.17 -	unsigned long start_addr;
   11.18 -
   11.19 -	if (len > TASK_SIZE)
   11.20 -		return -ENOMEM;
   11.21 -
   11.22 -	if (addr) {
   11.23 -		addr = PAGE_ALIGN(addr);
   11.24 -		vma = find_vma(mm, addr);
   11.25 -		if (((TASK_SIZE - len) >= addr) &&
   11.26 -		    (addr >= (FIRST_USER_PGD_NR<<PGDIR_SHIFT)) &&
   11.27 -		    (!vma || ((addr + len) <= vma->vm_start)))
   11.28 -			return addr;
   11.29 -	}
   11.30 -#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
   11.31 -	start_addr = addr = mm->free_area_cache;
   11.32 -#else
   11.33 -	start_addr = addr = PAGE_ALIGN(TASK_UNMAPPED_BASE);
   11.34 -#endif
   11.35 -
   11.36 -full_search:
   11.37 -	for (vma = find_vma(mm, addr); ; vma = vma->vm_next) {
   11.38 -		/* At this point:  (!vma || addr < vma->vm_end). */
   11.39 -		if (TASK_SIZE - len < addr) {
   11.40 -			/*
   11.41 -			 * Start a new search - just in case we missed
   11.42 -			 * some holes.
   11.43 -			 */
   11.44 -			if (start_addr != TASK_UNMAPPED_BASE) {
   11.45 -				start_addr = addr = TASK_UNMAPPED_BASE;
   11.46 -				goto full_search;
   11.47 -			}
   11.48 -			return -ENOMEM;
   11.49 -		}
   11.50 -		if (!vma || addr + len <= vma->vm_start) {
   11.51 -			/*
   11.52 -			 * Remember the place where we stopped the search:
   11.53 -			 */
   11.54 -#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
   11.55 -			mm->free_area_cache = addr + len;
   11.56 -#endif
   11.57 -			return addr;
   11.58 -		}
   11.59 -		addr = vma->vm_end;
   11.60 -	}
   11.61 -}
   11.62 -
   11.63 -unsigned long
   11.64 -arch_check_fixed_mapping(struct file *filp, unsigned long addr,
   11.65 -		unsigned long len, unsigned long pgoff, unsigned long flags)
   11.66 -{
   11.67 -	if (addr < (FIRST_USER_PGD_NR<<PGDIR_SHIFT)) {
   11.68 -		printk(KERN_ALERT "WARNING: Preventing a mmap() request by %s at 0x%08lx, len %08lx\n",
   11.69 -		current->comm, addr, len);
   11.70 -		return -EINVAL;
   11.71 -	}
   11.72 -	return 0;
   11.73 -}
    12.1 --- a/linux-2.6.7-xen-sparse/arch/xen/i386/mm/pgtable.c	Tue Aug 17 09:35:05 2004 +0000
    12.2 +++ b/linux-2.6.7-xen-sparse/arch/xen/i386/mm/pgtable.c	Tue Aug 17 17:54:44 2004 +0000
    12.3 @@ -258,21 +258,16 @@ void pgd_ctor(void *pgd, kmem_cache_t *c
    12.4  	if (PTRS_PER_PMD == 1)
    12.5  		spin_lock_irqsave(&pgd_lock, flags);
    12.6  
    12.7 -	memcpy((pgd_t *)pgd,
    12.8 -			swapper_pg_dir,
    12.9 -			FIRST_USER_PGD_NR * sizeof(pgd_t));
   12.10 -	memcpy((pgd_t *)pgd + FIRST_USER_PGD_NR + USER_PTRS_PER_PGD,
   12.11 -			swapper_pg_dir + FIRST_USER_PGD_NR + USER_PTRS_PER_PGD,
   12.12 -			(PTRS_PER_PGD - USER_PTRS_PER_PGD -
   12.13 -			 FIRST_USER_PGD_NR) * sizeof(pgd_t));
   12.14 +	memcpy((pgd_t *)pgd + USER_PTRS_PER_PGD,
   12.15 +			swapper_pg_dir + USER_PTRS_PER_PGD,
   12.16 +			(PTRS_PER_PGD - USER_PTRS_PER_PGD) * sizeof(pgd_t));
   12.17  
   12.18  	if (PTRS_PER_PMD > 1)
   12.19  		goto out;
   12.20  
   12.21  	pgd_list_add(pgd);
   12.22  	spin_unlock_irqrestore(&pgd_lock, flags);
   12.23 -	memset((pgd_t *)pgd + FIRST_USER_PGD_NR,
   12.24 -			0, USER_PTRS_PER_PGD*sizeof(pgd_t));
   12.25 +	memset(pgd, 0, USER_PTRS_PER_PGD*sizeof(pgd_t));
   12.26   out:
   12.27  	__make_page_readonly(pgd);
   12.28  	queue_pgd_pin(__pa(pgd));
    13.1 --- a/linux-2.6.7-xen-sparse/arch/xen/kernel/ctrl_if.c	Tue Aug 17 09:35:05 2004 +0000
    13.2 +++ b/linux-2.6.7-xen-sparse/arch/xen/kernel/ctrl_if.c	Tue Aug 17 17:54:44 2004 +0000
    13.3 @@ -58,13 +58,14 @@ static struct {
    13.4      unsigned long      id;
    13.5  } ctrl_if_txmsg_id_mapping[CONTROL_RING_SIZE];
    13.6  
    13.7 -#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0)
    13.8 -static struct tq_struct ctrl_if_rxmsg_deferred_tq;
    13.9 +/* For received messages that must be deferred to process context. */
   13.10 +static void __ctrl_if_rxmsg_deferred(void *unused);
   13.11 +static DECLARE_WORK(ctrl_if_rxmsg_deferred_work,
   13.12 +                    __ctrl_if_rxmsg_deferred,
   13.13 +                    NULL);
   13.14 +
   13.15 +/* Deferred callbacks for people waiting for space in the transmit ring. */
   13.16  static DECLARE_TASK_QUEUE(ctrl_if_tx_tq);
   13.17 -#else
   13.18 -static struct work_struct ctrl_if_rxmsg_deferred_work;
   13.19 -static struct workqueue_struct *ctrl_if_tx_wq = NULL;
   13.20 -#endif
   13.21  
   13.22  static DECLARE_WAIT_QUEUE_HEAD(ctrl_if_tx_wait);
   13.23  static void __ctrl_if_tx_tasklet(unsigned long data);
   13.24 @@ -127,9 +128,7 @@ static void __ctrl_if_tx_tasklet(unsigne
   13.25      if ( was_full && !TX_FULL(ctrl_if) )
   13.26      {
   13.27          wake_up(&ctrl_if_tx_wait);
   13.28 -#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0)
   13.29          run_task_queue(&ctrl_if_tx_tq);
   13.30 -#endif
   13.31      }
   13.32  }
   13.33  
   13.34 @@ -184,11 +183,7 @@ static void __ctrl_if_rx_tasklet(unsigne
   13.35      {
   13.36          wmb();
   13.37          ctrl_if_rxmsg_deferred_prod = dp;
   13.38 -#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0)
   13.39 -        schedule_task(&ctrl_if_rxmsg_deferred_tq);
   13.40 -#else
   13.41          schedule_work(&ctrl_if_rxmsg_deferred_work);
   13.42 -#endif
   13.43      }
   13.44  }
   13.45  
   13.46 @@ -285,7 +280,7 @@ int ctrl_if_send_message_block(
   13.47      return rc;
   13.48  }
   13.49  
   13.50 -int ctrl_if_enqueue_space_callback(struct work_struct *work)
   13.51 +int ctrl_if_enqueue_space_callback(struct tq_struct *task)
   13.52  {
   13.53      control_if_t *ctrl_if = get_ctrl_if();
   13.54  
   13.55 @@ -293,14 +288,7 @@ int ctrl_if_enqueue_space_callback(struc
   13.56      if ( !TX_FULL(ctrl_if) )
   13.57          return 0;
   13.58  
   13.59 -#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0)
   13.60 -    (void)queue_task(work, &ctrl_if_tx_tq);
   13.61 -#else
   13.62 -    if ( ctrl_if_tx_wq )
   13.63 -        (void)queue_work(ctrl_if_tx_wq, work);
   13.64 -    else
   13.65 -        return 1;
   13.66 -#endif
   13.67 +    (void)queue_task(task, &ctrl_if_tx_tq);
   13.68  
   13.69      /*
   13.70       * We may race execution of the task queue, so return re-checked status. If
   13.71 @@ -439,13 +427,6 @@ void __init ctrl_if_init(void)
   13.72  
   13.73      for ( i = 0; i < 256; i++ )
   13.74          ctrl_if_rxmsg_handler[i] = ctrl_if_rxmsg_default_handler;
   13.75 -#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0)
   13.76 -    ctrl_if_rxmsg_deferred_tq.routine = __ctrl_if_rxmsg_deferred;
   13.77 -#else
   13.78 -    INIT_WORK(&ctrl_if_rxmsg_deferred_work,
   13.79 -              (void *)__ctrl_if_rxmsg_deferred,
   13.80 -              NULL);
   13.81 -#endif
   13.82  
   13.83      spin_lock_init(&ctrl_if_lock);
   13.84  
   13.85 @@ -457,11 +438,6 @@ void __init ctrl_if_init(void)
   13.86  static int __init ctrl_if_late_setup(void)
   13.87  {
   13.88      safe_to_schedule_task = 1;
   13.89 -#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
   13.90 -    ctrl_if_tx_wq = create_workqueue("ctrl_if_tx");
   13.91 -    if ( ctrl_if_tx_wq == NULL )
   13.92 -        return 1;
   13.93 -#endif
   13.94      return 0;
   13.95  }
   13.96  __initcall(ctrl_if_late_setup);
    14.1 --- a/linux-2.6.7-xen-sparse/arch/xen/kernel/fixup.c	Tue Aug 17 09:35:05 2004 +0000
    14.2 +++ b/linux-2.6.7-xen-sparse/arch/xen/kernel/fixup.c	Tue Aug 17 17:54:44 2004 +0000
    14.3 @@ -30,11 +30,12 @@
    14.4  #include <linux/pagemap.h>
    14.5  #include <linux/vmalloc.h>
    14.6  #include <linux/highmem.h>
    14.7 +#include <linux/mman.h>
    14.8  #include <asm/fixmap.h>
    14.9  #include <asm/pgtable.h>
   14.10  #include <asm/uaccess.h>
   14.11  
   14.12 -#if 0
   14.13 +#if 1
   14.14  #define ASSERT(_p) \
   14.15      if ( !(_p) ) { printk("Assertion '%s' failed, line %d, file %s", #_p , \
   14.16      __LINE__, __FILE__); *(int*)0=0; }
   14.17 @@ -50,23 +51,13 @@
   14.18  #define TestSetPageLocked(_p) TryLockPage(_p)
   14.19  #define PageAnon(_p)          0 /* no equivalent in 2.4 */
   14.20  #define pte_offset_kernel     pte_offset
   14.21 -extern int __vmalloc_area_pages(unsigned long address,
   14.22 -                                unsigned long size,
   14.23 -                                int gfp_mask,
   14.24 -                                pgprot_t prot,
   14.25 -                                struct page ***pages);
   14.26 -#else
   14.27 -static inline int __vmalloc_area_pages(unsigned long address,
   14.28 -                                unsigned long size,
   14.29 -                                int gfp_mask,
   14.30 -                                pgprot_t prot,
   14.31 -                                struct page ***pages)
   14.32 -{
   14.33 -    struct vm_struct vma;
   14.34 -    vma.addr = (void *)address;
   14.35 -    vma.size = size + PAGE_SIZE; /* retarded interface */
   14.36 -    return map_vm_area(&vma, prot, pages);
   14.37 -}
   14.38 +#define remap_page_range(_a,_b,_c,_d,_e) remap_page_range(_b,_c,_d,_e)
   14.39 +#define daemonize(_n)                   \
   14.40 +    do {                                \
   14.41 +        daemonize();                    \
   14.42 +        strcpy(current->comm, _n);      \
   14.43 +        sigfillset(&current->blocked);  \
   14.44 +    } while ( 0 )
   14.45  #endif
   14.46  
   14.47  static unsigned char *fixup_buf;
   14.48 @@ -235,6 +226,64 @@ static unsigned int parse_insn(unsigned 
   14.49      return ((pb - insn) + 1 + (d & INSN_SUFFIX_BYTES));
   14.50  }
   14.51  
   14.52 +#define SUCCESS 1
   14.53 +#define FAIL    0
   14.54 +static int map_fixup_buf(struct mm_struct *mm)
   14.55 +{
   14.56 +    struct vm_area_struct *vma;
   14.57 +
   14.58 +    /* Already mapped? This is a pretty safe check. */
   14.59 +    if ( ((vma = find_vma(current->mm, FIXUP_BUF_USER)) != NULL) &&
   14.60 +         (vma->vm_start <= FIXUP_BUF_USER) &&
   14.61 +         (vma->vm_flags == (VM_READ | VM_MAYREAD | VM_RESERVED)) &&
   14.62 +         (vma->vm_file == NULL) )
   14.63 +        return SUCCESS;
   14.64 +
   14.65 +    if ( (vma = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL)) == NULL )
   14.66 +    {
   14.67 +        DPRINTK("Cannot allocate VMA.");
   14.68 +        return FAIL;
   14.69 +    }
   14.70 +
   14.71 +    memset(vma, 0, sizeof(*vma));
   14.72 +
   14.73 +    vma->vm_mm        = mm;
   14.74 +    vma->vm_flags     = VM_READ | VM_MAYREAD | VM_RESERVED;
   14.75 +    vma->vm_page_prot = PAGE_READONLY;
   14.76 +
   14.77 +    down_write(&mm->mmap_sem);
   14.78 +
   14.79 +    vma->vm_start = get_unmapped_area(
   14.80 +        NULL, FIXUP_BUF_USER, FIXUP_BUF_SIZE,
   14.81 +        0, MAP_PRIVATE|MAP_ANONYMOUS|MAP_FIXED);
   14.82 +    if ( vma->vm_start != FIXUP_BUF_USER )
   14.83 +    {
   14.84 +        DPRINTK("Cannot allocate low-memory-region VMA.");
   14.85 +        up_write(&mm->mmap_sem);
   14.86 +        kmem_cache_free(vm_area_cachep, vma);
   14.87 +        return FAIL;
   14.88 +    }
   14.89 +
   14.90 +    vma->vm_end = vma->vm_start + FIXUP_BUF_SIZE;
   14.91 +
   14.92 +    if ( remap_page_range(vma, vma->vm_start, __pa(fixup_buf), 
   14.93 +                          vma->vm_end - vma->vm_start, vma->vm_page_prot) )
   14.94 +    {
   14.95 +        DPRINTK("Cannot map low-memory-region VMA.");
   14.96 +        up_write(&mm->mmap_sem);
   14.97 +        kmem_cache_free(vm_area_cachep, vma);
   14.98 +        return FAIL;
   14.99 +    }
  14.100 +
  14.101 +    insert_vm_struct(mm, vma);
  14.102 +    
  14.103 +    mm->total_vm += FIXUP_BUF_SIZE >> PAGE_SHIFT;
  14.104 +
  14.105 +    up_write(&mm->mmap_sem);
  14.106 +
  14.107 +    return SUCCESS;
  14.108 +}
  14.109 +
  14.110  /*
  14.111   * Mainly this function checks that our patches can't erroneously get flushed
  14.112   * to a file on disc, which would screw us after reboot!
  14.113 @@ -251,7 +300,8 @@ static int safe_to_patch(struct mm_struc
  14.114      if ( addr <= (FIXUP_BUF_USER + FIXUP_BUF_SIZE) )
  14.115          return SUCCESS;
  14.116  
  14.117 -    if ( (vma = find_vma(current->mm, addr)) == NULL )
  14.118 +    if ( ((vma = find_vma(current->mm, addr)) == NULL) ||
  14.119 +         (vma->vm_start > addr) )
  14.120      {
  14.121          DPRINTK("No VMA contains fault address.");
  14.122          return FAIL;
  14.123 @@ -314,6 +364,9 @@ asmlinkage void do_fixup_4gb_segment(str
  14.124          return;
  14.125      }
  14.126  
  14.127 +    if ( unlikely(!map_fixup_buf(mm)) )
  14.128 +        goto out;
  14.129 +
  14.130      /* Hold the mmap_sem to prevent the mapping from disappearing under us. */
  14.131      down_read(&mm->mmap_sem);
  14.132  
  14.133 @@ -669,8 +722,14 @@ asmlinkage void do_fixup_4gb_segment(str
  14.134  
  14.135      /* Find the physical page that is to be patched. */
  14.136      pgd = pgd_offset(current->mm, eip);
  14.137 +    if ( unlikely(!pgd_present(*pgd)) )
  14.138 +        goto unlock_and_out;
  14.139      pmd = pmd_offset(pgd, eip);
  14.140 +    if ( unlikely(!pmd_present(*pmd)) )
  14.141 +        goto unlock_and_out;
  14.142      pte = pte_offset_kernel(pmd, eip);
  14.143 +    if ( unlikely(!pte_present(*pte)) )
  14.144 +        goto unlock_and_out;
  14.145      page = pte_page(*pte);
  14.146  
  14.147      /*
  14.148 @@ -680,8 +739,7 @@ asmlinkage void do_fixup_4gb_segment(str
  14.149      if ( unlikely(TestSetPageLocked(page)) )
  14.150      {
  14.151          DPRINTK("Page is locked.");
  14.152 -        spin_unlock(&mm->page_table_lock);
  14.153 -        goto out;
  14.154 +        goto unlock_and_out;
  14.155      }
  14.156  
  14.157      /*
  14.158 @@ -692,8 +750,7 @@ asmlinkage void do_fixup_4gb_segment(str
  14.159      {
  14.160          DPRINTK("Page is dirty or anonymous.");
  14.161          unlock_page(page);
  14.162 -        spin_unlock(&mm->page_table_lock);
  14.163 -        goto out;
  14.164 +        goto unlock_and_out;
  14.165      }
  14.166  
  14.167      veip = kmap(page);
  14.168 @@ -709,30 +766,43 @@ asmlinkage void do_fixup_4gb_segment(str
  14.169  
  14.170   out:
  14.171      up_read(&mm->mmap_sem);
  14.172 +    return;
  14.173 +
  14.174 + unlock_and_out:
  14.175 +    spin_unlock(&mm->page_table_lock);
  14.176 +    up_read(&mm->mmap_sem);
  14.177 +    return;
  14.178 +}
  14.179 +
  14.180 +static int fixup_thread(void *unused)
  14.181 +{
  14.182 +    daemonize("segfixup");
  14.183 +    
  14.184 +    for ( ; ; )
  14.185 +    {
  14.186 +        set_current_state(TASK_INTERRUPTIBLE);
  14.187 +        schedule();
  14.188 +    }
  14.189  }
  14.190  
  14.191  static int nosegfixup = 0;
  14.192  
  14.193  static int __init fixup_init(void)
  14.194  {
  14.195 -    struct page *_pages[1<<FIXUP_BUF_ORDER], **pages=_pages;
  14.196      int i;
  14.197  
  14.198 -    if ( nosegfixup )
  14.199 -        return 0;
  14.200 -
  14.201 -    HYPERVISOR_vm_assist(VMASST_CMD_enable,
  14.202 -                         VMASST_TYPE_4gb_segments_notify);
  14.203 +    nosegfixup = 1; /* XXX */
  14.204  
  14.205 -    fixup_buf = (char *)__get_free_pages(GFP_ATOMIC, FIXUP_BUF_ORDER);
  14.206 -    for ( i = 0; i < (1<<FIXUP_BUF_ORDER); i++ )
  14.207 -        _pages[i] = virt_to_page(fixup_buf) + i;
  14.208 -
  14.209 -    if ( __vmalloc_area_pages(FIXUP_BUF_USER, FIXUP_BUF_SIZE, 
  14.210 -                              0, PAGE_READONLY, &pages) != 0 )
  14.211 -        BUG();
  14.212 -
  14.213 -    memset(fixup_hash, 0, sizeof(fixup_hash));
  14.214 +    if ( !nosegfixup )
  14.215 +    {
  14.216 +        HYPERVISOR_vm_assist(VMASST_CMD_enable,
  14.217 +                             VMASST_TYPE_4gb_segments_notify);
  14.218 +        fixup_buf = (char *)__get_free_pages(GFP_ATOMIC, FIXUP_BUF_ORDER);
  14.219 +        for ( i = 0; i < (1 << FIXUP_BUF_ORDER); i++ )
  14.220 +            SetPageReserved(virt_to_page(fixup_buf) + i);
  14.221 +        memset(fixup_hash, 0, sizeof(fixup_hash));
  14.222 +        (void)kernel_thread(fixup_thread, NULL, CLONE_FS | CLONE_FILES);
  14.223 +    }
  14.224  
  14.225      return 0;
  14.226  }
    15.1 --- a/linux-2.6.7-xen-sparse/arch/xen/kernel/reboot.c	Tue Aug 17 09:35:05 2004 +0000
    15.2 +++ b/linux-2.6.7-xen-sparse/arch/xen/kernel/reboot.c	Tue Aug 17 17:54:44 2004 +0000
    15.3 @@ -14,10 +14,7 @@ static int errno;
    15.4  #include <asm-xen/hypervisor.h>
    15.5  #include <asm-xen/hypervisor-ifs/dom0_ops.h>
    15.6  #include <asm-xen/suspend.h>
    15.7 -
    15.8 -#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
    15.9 -int reboot_thru_bios = 0;	/* for dmi_scan.c */
   15.10 -#endif
   15.11 +#include <asm-xen/queues.h>
   15.12  
   15.13  void machine_restart(char * __unused)
   15.14  {
   15.15 @@ -27,19 +24,11 @@ void machine_restart(char * __unused)
   15.16  	HYPERVISOR_reboot();
   15.17  }
   15.18  
   15.19 -#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
   15.20 -EXPORT_SYMBOL(machine_restart);
   15.21 -#endif
   15.22 -
   15.23  void machine_halt(void)
   15.24  {
   15.25  	machine_power_off();
   15.26  }
   15.27  
   15.28 -#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
   15.29 -EXPORT_SYMBOL(machine_halt);
   15.30 -#endif
   15.31 -
   15.32  void machine_power_off(void)
   15.33  {
   15.34  	/* We really want to get pending console data out before we die. */
   15.35 @@ -49,11 +38,13 @@ void machine_power_off(void)
   15.36  }
   15.37  
   15.38  #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
   15.39 +int reboot_thru_bios = 0;	/* for dmi_scan.c */
   15.40 +EXPORT_SYMBOL(machine_restart);
   15.41 +EXPORT_SYMBOL(machine_halt);
   15.42  EXPORT_SYMBOL(machine_power_off);
   15.43  #endif
   15.44  
   15.45  
   15.46 -
   15.47  /******************************************************************************
   15.48   * Stop/pickle callback handling.
   15.49   */
   15.50 @@ -65,7 +56,9 @@ static int shutting_down = -1;
   15.51  
   15.52  static void __do_suspend(void)
   15.53  {
   15.54 -    int i,j;
   15.55 +    int i, j;
   15.56 +    suspend_record_t *suspend_record;
   15.57 +
   15.58      /* Hmmm... a cleaner interface to suspend/resume blkdevs would be nice. */
   15.59      extern void blkdev_suspend(void);
   15.60      extern void blkdev_resume(void);
   15.61 @@ -76,10 +69,8 @@ static void __do_suspend(void)
   15.62      extern unsigned long max_pfn;
   15.63      extern unsigned long *pfn_to_mfn_frame_list;
   15.64  
   15.65 -    suspend_record_t *suspend_record     = NULL;
   15.66 -
   15.67 -    if ( (suspend_record = (suspend_record_t *)__get_free_page(GFP_KERNEL))
   15.68 -         == NULL )
   15.69 +    suspend_record = (suspend_record_t *)__get_free_page(GFP_KERNEL);
   15.70 +    if ( suspend_record == NULL )
   15.71          goto out;
   15.72  
   15.73      suspend_record->nr_pfns = max_pfn; /* final number of pfns */
   15.74 @@ -205,11 +196,7 @@ static void __shutdown_handler(void *unu
   15.75  
   15.76  static void shutdown_handler(ctrl_msg_t *msg, unsigned long id)
   15.77  {
   15.78 -#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
   15.79      static DECLARE_WORK(shutdown_work, __shutdown_handler, NULL);
   15.80 -#else
   15.81 -    static struct tq_struct shutdown_tq;
   15.82 -#endif
   15.83  
   15.84      if ( (shutting_down == -1) &&
   15.85           ((msg->subtype == CMSG_SHUTDOWN_POWEROFF) ||
   15.86 @@ -217,12 +204,7 @@ static void shutdown_handler(ctrl_msg_t 
   15.87            (msg->subtype == CMSG_SHUTDOWN_SUSPEND)) )
   15.88      {
   15.89          shutting_down = msg->subtype;
   15.90 -#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
   15.91          schedule_work(&shutdown_work);
   15.92 -#else
   15.93 -        shutdown_tq.routine = __shutdown_handler;
   15.94 -        schedule_task(&shutdown_tq);
   15.95 -#endif
   15.96      }
   15.97      else
   15.98      {
    16.1 --- a/linux-2.6.7-xen-sparse/drivers/xen/console/console.c	Tue Aug 17 09:35:05 2004 +0000
    16.2 +++ b/linux-2.6.7-xen-sparse/drivers/xen/console/console.c	Tue Aug 17 17:54:44 2004 +0000
    16.3 @@ -74,16 +74,12 @@ static void __xencons_tx_flush(void);
    16.4  /* This task is used to defer sending console data until there is space. */
    16.5  static void xencons_tx_flush_task_routine(void *data);
    16.6  
    16.7 +static struct tq_struct xencons_tx_flush_task;
    16.8 +
    16.9  #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
   16.10  static struct tty_driver *xencons_driver;
   16.11 -static DECLARE_WORK(xencons_tx_flush_task,
   16.12 -                    xencons_tx_flush_task_routine,
   16.13 -                    NULL);
   16.14  #else
   16.15  static struct tty_driver xencons_driver;
   16.16 -static struct tq_struct xencons_tx_flush_task = {
   16.17 -    routine: xencons_tx_flush_task_routine
   16.18 -};
   16.19  #endif
   16.20  
   16.21  
   16.22 @@ -635,6 +631,8 @@ static int __init xencons_init(void)
   16.23      if ( xc_mode == XC_OFF )
   16.24          return 0;
   16.25  
   16.26 +    INIT_TQUEUE(&xencons_tx_flush_task, xencons_tx_flush_task_routine, NULL);
   16.27 +
   16.28  #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
   16.29      xencons_driver = alloc_tty_driver((xc_mode == XC_SERIAL) ? 
   16.30                                        1 : MAX_NR_CONSOLES);
    17.1 --- a/linux-2.6.7-xen-sparse/include/asm-xen/asm-i386/pgtable.h	Tue Aug 17 09:35:05 2004 +0000
    17.2 +++ b/linux-2.6.7-xen-sparse/include/asm-xen/asm-i386/pgtable.h	Tue Aug 17 17:54:44 2004 +0000
    17.3 @@ -64,17 +64,16 @@ void paging_init(void);
    17.4  #define PGDIR_SIZE	(1UL << PGDIR_SHIFT)
    17.5  #define PGDIR_MASK	(~(PGDIR_SIZE-1))
    17.6  
    17.7 -#define FIRST_USER_PGD_NR	1
    17.8 -#define USER_PTRS_PER_PGD	((TASK_SIZE/PGDIR_SIZE) - FIRST_USER_PGD_NR)
    17.9 +#define USER_PTRS_PER_PGD	(TASK_SIZE/PGDIR_SIZE)
   17.10 +#define FIRST_USER_PGD_NR	0
   17.11  
   17.12 -#if 0 /* XEN */
   17.13  #define USER_PGD_PTRS (PAGE_OFFSET >> PGDIR_SHIFT)
   17.14  #define KERNEL_PGD_PTRS (PTRS_PER_PGD-USER_PGD_PTRS)
   17.15  
   17.16  #define TWOLEVEL_PGDIR_SHIFT	22
   17.17  #define BOOT_USER_PGD_PTRS (__PAGE_OFFSET >> TWOLEVEL_PGDIR_SHIFT)
   17.18  #define BOOT_KERNEL_PGD_PTRS (1024-BOOT_USER_PGD_PTRS)
   17.19 -#endif
   17.20 +
   17.21  
   17.22  #ifndef __ASSEMBLY__
   17.23  /* Just any arbitrary offset to the start of the vmalloc VM area: the
   17.24 @@ -462,7 +461,4 @@ static inline unsigned long arbitrary_vi
   17.25  #define __HAVE_ARCH_PTE_SAME
   17.26  #include <asm-generic/pgtable.h>
   17.27  
   17.28 -#define HAVE_ARCH_UNMAPPED_AREA
   17.29 -#define HAVE_ARCH_CHECK_FIXED_MAPPING
   17.30 -
   17.31  #endif /* _I386_PGTABLE_H */
    18.1 --- a/linux-2.6.7-xen-sparse/include/asm-xen/ctrl_if.h	Tue Aug 17 09:35:05 2004 +0000
    18.2 +++ b/linux-2.6.7-xen-sparse/include/asm-xen/ctrl_if.h	Tue Aug 17 17:54:44 2004 +0000
    18.3 @@ -10,11 +10,7 @@
    18.4  #define __ASM_XEN__CTRL_IF_H__
    18.5  
    18.6  #include <asm-xen/hypervisor.h>
    18.7 -
    18.8 -#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0)
    18.9 -#include <linux/tqueue.h>
   18.10 -#define work_struct tq_struct
   18.11 -#endif
   18.12 +#include <asm-xen/queues.h>
   18.13  
   18.14  typedef control_msg_t ctrl_msg_t;
   18.15  
   18.16 @@ -69,7 +65,7 @@ int ctrl_if_send_message_block(
   18.17   * still be executed. If this function returns 1 then the callback /will/ be
   18.18   * executed when space becomes available.
   18.19   */
   18.20 -int ctrl_if_enqueue_space_callback(struct work_struct *task);
   18.21 +int ctrl_if_enqueue_space_callback(struct tq_struct *task);
   18.22  
   18.23  /*
   18.24   * Send a response (@msg) to a message from the domain controller. This will 
    19.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    19.2 +++ b/linux-2.6.7-xen-sparse/include/asm-xen/queues.h	Tue Aug 17 17:54:44 2004 +0000
    19.3 @@ -0,0 +1,75 @@
    19.4 +
    19.5 +/*
    19.6 + * Oh dear. Task queues were removed from Linux 2.6 and replaced by work 
    19.7 + * queues. Unfortunately the semantics is not the same. With task queues we 
    19.8 + * can defer work until a particular event occurs -- this is not
    19.9 + * straightforwardly done with work queues (queued work is performed asap, or
   19.10 + * after some fixed timeout). Conversely, work queues are a (slightly) neater
   19.11 + * way of deferring work to a process context than using task queues in 2.4.
   19.12 + * 
   19.13 + * So, what we do here is a bit weird:
   19.14 + *  1. On 2.4, we emulate work queues over task queues.
   19.15 + *  2. On 2.6, we emulate task queues over work queues.
   19.16 + * 
   19.17 + * Note how much harder the latter is. :-)
   19.18 + */
   19.19 +
   19.20 +#ifndef __QUEUES_H__
   19.21 +#define __QUEUES_H__
   19.22 +
   19.23 +#include <linux/version.h>
   19.24 +#include <linux/list.h>
   19.25 +#include <linux/workqueue.h>
   19.26 +
   19.27 +struct tq_struct { 
   19.28 +    struct work_struct work;
   19.29 +    struct list_head   list;
   19.30 +    unsigned long      pending;
   19.31 +};
   19.32 +#define INIT_TQUEUE(_name, _fn, _arg)               \
   19.33 +    do {                                            \
   19.34 +        INIT_LIST_HEAD(&(_name)->list);             \
   19.35 +        (_name)->pending = 0;                       \
   19.36 +        INIT_WORK(&(_name)->work, (_fn), (_arg));   \
   19.37 +    } while ( 0 )
   19.38 +
   19.39 +typedef struct {
   19.40 +    struct list_head list;
   19.41 +    spinlock_t       lock;
   19.42 +} task_queue;
   19.43 +#define DECLARE_TASK_QUEUE(_name) \
   19.44 +    task_queue _name = { LIST_HEAD_INIT((_name).list), SPIN_LOCK_UNLOCKED }
   19.45 +
   19.46 +static inline int queue_task(struct tq_struct *tqe, task_queue *tql)
   19.47 +{
   19.48 +    unsigned long flags;
   19.49 +    if ( test_and_set_bit(0, &tqe->pending) )
   19.50 +        return 0;
   19.51 +    spin_lock_irqsave(&tql->lock, flags);
   19.52 +    list_add_tail(&tqe->list, &tql->list);
   19.53 +    spin_unlock_irqrestore(&tql->lock, flags);
   19.54 +    return 1;
   19.55 +}
   19.56 +
   19.57 +static inline void run_task_queue(task_queue *tql)
   19.58 +{
   19.59 +    struct list_head head, *ent;
   19.60 +    struct tq_struct *tqe;
   19.61 +    unsigned long flags;
   19.62 +
   19.63 +    spin_lock_irqsave(&tql->lock, flags);
   19.64 +    list_add(&head, &tql->list);
   19.65 +    list_del_init(&tql->list);
   19.66 +    spin_unlock_irqrestore(&tql->lock, flags);
   19.67 +
   19.68 +    while ( !list_empty(&head) )
   19.69 +    {
   19.70 +        ent = head.next;
   19.71 +        list_del_init(ent);
   19.72 +        tqe = list_entry(ent, struct tq_struct, list);
   19.73 +        wmb(); tqe->pending = 0;
   19.74 +        schedule_work(&tqe->work);
   19.75 +    }
   19.76 +}
   19.77 +
   19.78 +#endif /* __QUEUES_H__ */
    20.1 --- a/linux-2.6.7-xen-sparse/mm/mmap.c	Tue Aug 17 09:35:05 2004 +0000
    20.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
    20.3 @@ -1,1816 +0,0 @@
    20.4 -/*
    20.5 - * mm/mmap.c
    20.6 - *
    20.7 - * Written by obz.
    20.8 - *
    20.9 - * Address space accounting code	<alan@redhat.com>
   20.10 - */
   20.11 -
   20.12 -#include <linux/slab.h>
   20.13 -#include <linux/shm.h>
   20.14 -#include <linux/mman.h>
   20.15 -#include <linux/pagemap.h>
   20.16 -#include <linux/swap.h>
   20.17 -#include <linux/syscalls.h>
   20.18 -#include <linux/init.h>
   20.19 -#include <linux/file.h>
   20.20 -#include <linux/fs.h>
   20.21 -#include <linux/personality.h>
   20.22 -#include <linux/security.h>
   20.23 -#include <linux/hugetlb.h>
   20.24 -#include <linux/profile.h>
   20.25 -#include <linux/module.h>
   20.26 -#include <linux/mount.h>
   20.27 -#include <linux/mempolicy.h>
   20.28 -#include <linux/rmap.h>
   20.29 -
   20.30 -#include <asm/uaccess.h>
   20.31 -#include <asm/pgalloc.h>
   20.32 -#include <asm/cacheflush.h>
   20.33 -#include <asm/tlb.h>
   20.34 -
   20.35 -/*
   20.36 - * WARNING: the debugging will use recursive algorithms so never enable this
   20.37 - * unless you know what you are doing.
   20.38 - */
   20.39 -#undef DEBUG_MM_RB
   20.40 -
   20.41 -/* description of effects of mapping type and prot in current implementation.
   20.42 - * this is due to the limited x86 page protection hardware.  The expected
   20.43 - * behavior is in parens:
   20.44 - *
   20.45 - * map_type	prot
   20.46 - *		PROT_NONE	PROT_READ	PROT_WRITE	PROT_EXEC
   20.47 - * MAP_SHARED	r: (no) no	r: (yes) yes	r: (no) yes	r: (no) yes
   20.48 - *		w: (no) no	w: (no) no	w: (yes) yes	w: (no) no
   20.49 - *		x: (no) no	x: (no) yes	x: (no) yes	x: (yes) yes
   20.50 - *		
   20.51 - * MAP_PRIVATE	r: (no) no	r: (yes) yes	r: (no) yes	r: (no) yes
   20.52 - *		w: (no) no	w: (no) no	w: (copy) copy	w: (no) no
   20.53 - *		x: (no) no	x: (no) yes	x: (no) yes	x: (yes) yes
   20.54 - *
   20.55 - */
   20.56 -pgprot_t protection_map[16] = {
   20.57 -	__P000, __P001, __P010, __P011, __P100, __P101, __P110, __P111,
   20.58 -	__S000, __S001, __S010, __S011, __S100, __S101, __S110, __S111
   20.59 -};
   20.60 -
   20.61 -int sysctl_overcommit_memory = 0;	/* default is heuristic overcommit */
   20.62 -int sysctl_overcommit_ratio = 50;	/* default is 50% */
   20.63 -int sysctl_max_map_count = DEFAULT_MAX_MAP_COUNT;
   20.64 -atomic_t vm_committed_space = ATOMIC_INIT(0);
   20.65 -
   20.66 -EXPORT_SYMBOL(sysctl_overcommit_memory);
   20.67 -EXPORT_SYMBOL(sysctl_overcommit_ratio);
   20.68 -EXPORT_SYMBOL(sysctl_max_map_count);
   20.69 -EXPORT_SYMBOL(vm_committed_space);
   20.70 -
   20.71 -/*
   20.72 - * Requires inode->i_mapping->i_mmap_lock
   20.73 - */
   20.74 -static void __remove_shared_vm_struct(struct vm_area_struct *vma,
   20.75 -		struct file *file, struct address_space *mapping)
   20.76 -{
   20.77 -	if (vma->vm_flags & VM_DENYWRITE)
   20.78 -		atomic_inc(&file->f_dentry->d_inode->i_writecount);
   20.79 -	if (vma->vm_flags & VM_SHARED)
   20.80 -		mapping->i_mmap_writable--;
   20.81 -
   20.82 -	flush_dcache_mmap_lock(mapping);
   20.83 -	if (unlikely(vma->vm_flags & VM_NONLINEAR))
   20.84 -		list_del_init(&vma->shared.vm_set.list);
   20.85 -	else
   20.86 -		vma_prio_tree_remove(vma, &mapping->i_mmap);
   20.87 -	flush_dcache_mmap_unlock(mapping);
   20.88 -}
   20.89 -
   20.90 -/*
   20.91 - * Remove one vm structure and free it.
   20.92 - */
   20.93 -static void remove_vm_struct(struct vm_area_struct *vma)
   20.94 -{
   20.95 -	struct file *file = vma->vm_file;
   20.96 -
   20.97 -	if (file) {
   20.98 -		struct address_space *mapping = file->f_mapping;
   20.99 -		spin_lock(&mapping->i_mmap_lock);
  20.100 -		__remove_shared_vm_struct(vma, file, mapping);
  20.101 -		spin_unlock(&mapping->i_mmap_lock);
  20.102 -	}
  20.103 -	if (vma->vm_ops && vma->vm_ops->close)
  20.104 -		vma->vm_ops->close(vma);
  20.105 -	if (file)
  20.106 -		fput(file);
  20.107 -	anon_vma_unlink(vma);
  20.108 -	mpol_free(vma_policy(vma));
  20.109 -	kmem_cache_free(vm_area_cachep, vma);
  20.110 -}
  20.111 -
  20.112 -/*
  20.113 - *  sys_brk() for the most part doesn't need the global kernel
  20.114 - *  lock, except when an application is doing something nasty
  20.115 - *  like trying to un-brk an area that has already been mapped
  20.116 - *  to a regular file.  in this case, the unmapping will need
  20.117 - *  to invoke file system routines that need the global lock.
  20.118 - */
  20.119 -asmlinkage unsigned long sys_brk(unsigned long brk)
  20.120 -{
  20.121 -	unsigned long rlim, retval;
  20.122 -	unsigned long newbrk, oldbrk;
  20.123 -	struct mm_struct *mm = current->mm;
  20.124 -
  20.125 -	down_write(&mm->mmap_sem);
  20.126 -
  20.127 -	if (brk < mm->end_code)
  20.128 -		goto out;
  20.129 -	newbrk = PAGE_ALIGN(brk);
  20.130 -	oldbrk = PAGE_ALIGN(mm->brk);
  20.131 -	if (oldbrk == newbrk)
  20.132 -		goto set_brk;
  20.133 -
  20.134 -	/* Always allow shrinking brk. */
  20.135 -	if (brk <= mm->brk) {
  20.136 -		if (!do_munmap(mm, newbrk, oldbrk-newbrk))
  20.137 -			goto set_brk;
  20.138 -		goto out;
  20.139 -	}
  20.140 -
  20.141 -	/* Check against rlimit.. */
  20.142 -	rlim = current->rlim[RLIMIT_DATA].rlim_cur;
  20.143 -	if (rlim < RLIM_INFINITY && brk - mm->start_data > rlim)
  20.144 -		goto out;
  20.145 -
  20.146 -	/* Check against existing mmap mappings. */
  20.147 -	if (find_vma_intersection(mm, oldbrk, newbrk+PAGE_SIZE))
  20.148 -		goto out;
  20.149 -
  20.150 -	/* Ok, looks good - let it rip. */
  20.151 -	if (do_brk(oldbrk, newbrk-oldbrk) != oldbrk)
  20.152 -		goto out;
  20.153 -set_brk:
  20.154 -	mm->brk = brk;
  20.155 -out:
  20.156 -	retval = mm->brk;
  20.157 -	up_write(&mm->mmap_sem);
  20.158 -	return retval;
  20.159 -}
  20.160 -
  20.161 -#ifdef DEBUG_MM_RB
  20.162 -static int browse_rb(struct rb_root *root)
  20.163 -{
  20.164 -	int i = 0, j;
  20.165 -	struct rb_node *nd, *pn = NULL;
  20.166 -	unsigned long prev = 0, pend = 0;
  20.167 -
  20.168 -	for (nd = rb_first(root); nd; nd = rb_next(nd)) {
  20.169 -		struct vm_area_struct *vma;
  20.170 -		vma = rb_entry(nd, struct vm_area_struct, vm_rb);
  20.171 -		if (vma->vm_start < prev)
  20.172 -			printk("vm_start %lx prev %lx\n", vma->vm_start, prev), i = -1;
  20.173 -		if (vma->vm_start < pend)
  20.174 -			printk("vm_start %lx pend %lx\n", vma->vm_start, pend);
  20.175 -		if (vma->vm_start > vma->vm_end)
  20.176 -			printk("vm_end %lx < vm_start %lx\n", vma->vm_end, vma->vm_start);
  20.177 -		i++;
  20.178 -		pn = nd;
  20.179 -	}
  20.180 -	j = 0;
  20.181 -	for (nd = pn; nd; nd = rb_prev(nd)) {
  20.182 -		j++;
  20.183 -	}
  20.184 -	if (i != j)
  20.185 -		printk("backwards %d, forwards %d\n", j, i), i = 0;
  20.186 -	return i;
  20.187 -}
  20.188 -
  20.189 -void validate_mm(struct mm_struct *mm)
  20.190 -{
  20.191 -	int bug = 0;
  20.192 -	int i = 0;
  20.193 -	struct vm_area_struct *tmp = mm->mmap;
  20.194 -	while (tmp) {
  20.195 -		tmp = tmp->vm_next;
  20.196 -		i++;
  20.197 -	}
  20.198 -	if (i != mm->map_count)
  20.199 -		printk("map_count %d vm_next %d\n", mm->map_count, i), bug = 1;
  20.200 -	i = browse_rb(&mm->mm_rb);
  20.201 -	if (i != mm->map_count)
  20.202 -		printk("map_count %d rb %d\n", mm->map_count, i), bug = 1;
  20.203 -	if (bug)
  20.204 -		BUG();
  20.205 -}
  20.206 -#else
  20.207 -#define validate_mm(mm) do { } while (0)
  20.208 -#endif
  20.209 -
  20.210 -static struct vm_area_struct *
  20.211 -find_vma_prepare(struct mm_struct *mm, unsigned long addr,
  20.212 -		struct vm_area_struct **pprev, struct rb_node ***rb_link,
  20.213 -		struct rb_node ** rb_parent)
  20.214 -{
  20.215 -	struct vm_area_struct * vma;
  20.216 -	struct rb_node ** __rb_link, * __rb_parent, * rb_prev;
  20.217 -
  20.218 -	__rb_link = &mm->mm_rb.rb_node;
  20.219 -	rb_prev = __rb_parent = NULL;
  20.220 -	vma = NULL;
  20.221 -
  20.222 -	while (*__rb_link) {
  20.223 -		struct vm_area_struct *vma_tmp;
  20.224 -
  20.225 -		__rb_parent = *__rb_link;
  20.226 -		vma_tmp = rb_entry(__rb_parent, struct vm_area_struct, vm_rb);
  20.227 -
  20.228 -		if (vma_tmp->vm_end > addr) {
  20.229 -			vma = vma_tmp;
  20.230 -			if (vma_tmp->vm_start <= addr)
  20.231 -				return vma;
  20.232 -			__rb_link = &__rb_parent->rb_left;
  20.233 -		} else {
  20.234 -			rb_prev = __rb_parent;
  20.235 -			__rb_link = &__rb_parent->rb_right;
  20.236 -		}
  20.237 -	}
  20.238 -
  20.239 -	*pprev = NULL;
  20.240 -	if (rb_prev)
  20.241 -		*pprev = rb_entry(rb_prev, struct vm_area_struct, vm_rb);
  20.242 -	*rb_link = __rb_link;
  20.243 -	*rb_parent = __rb_parent;
  20.244 -	return vma;
  20.245 -}
  20.246 -
  20.247 -static inline void
  20.248 -__vma_link_list(struct mm_struct *mm, struct vm_area_struct *vma,
  20.249 -		struct vm_area_struct *prev, struct rb_node *rb_parent)
  20.250 -{
  20.251 -	if (prev) {
  20.252 -		vma->vm_next = prev->vm_next;
  20.253 -		prev->vm_next = vma;
  20.254 -	} else {
  20.255 -		mm->mmap = vma;
  20.256 -		if (rb_parent)
  20.257 -			vma->vm_next = rb_entry(rb_parent,
  20.258 -					struct vm_area_struct, vm_rb);
  20.259 -		else
  20.260 -			vma->vm_next = NULL;
  20.261 -	}
  20.262 -}
  20.263 -
  20.264 -void __vma_link_rb(struct mm_struct *mm, struct vm_area_struct *vma,
  20.265 -		struct rb_node **rb_link, struct rb_node *rb_parent)
  20.266 -{
  20.267 -	rb_link_node(&vma->vm_rb, rb_parent, rb_link);
  20.268 -	rb_insert_color(&vma->vm_rb, &mm->mm_rb);
  20.269 -}
  20.270 -
  20.271 -static inline void __vma_link_file(struct vm_area_struct *vma)
  20.272 -{
  20.273 -	struct file * file;
  20.274 -
  20.275 -	file = vma->vm_file;
  20.276 -	if (file) {
  20.277 -		struct address_space *mapping = file->f_mapping;
  20.278 -
  20.279 -		if (vma->vm_flags & VM_DENYWRITE)
  20.280 -			atomic_dec(&file->f_dentry->d_inode->i_writecount);
  20.281 -		if (vma->vm_flags & VM_SHARED)
  20.282 -			mapping->i_mmap_writable++;
  20.283 -
  20.284 -		flush_dcache_mmap_lock(mapping);
  20.285 -		if (unlikely(vma->vm_flags & VM_NONLINEAR))
  20.286 -			list_add_tail(&vma->shared.vm_set.list,
  20.287 -					&mapping->i_mmap_nonlinear);
  20.288 -		else
  20.289 -			vma_prio_tree_insert(vma, &mapping->i_mmap);
  20.290 -		flush_dcache_mmap_unlock(mapping);
  20.291 -	}
  20.292 -}
  20.293 -
  20.294 -static void
  20.295 -__vma_link(struct mm_struct *mm, struct vm_area_struct *vma,
  20.296 -	struct vm_area_struct *prev, struct rb_node **rb_link,
  20.297 -	struct rb_node *rb_parent)
  20.298 -{
  20.299 -	__vma_link_list(mm, vma, prev, rb_parent);
  20.300 -	__vma_link_rb(mm, vma, rb_link, rb_parent);
  20.301 -	__anon_vma_link(vma);
  20.302 -}
  20.303 -
  20.304 -static void vma_link(struct mm_struct *mm, struct vm_area_struct *vma,
  20.305 -			struct vm_area_struct *prev, struct rb_node **rb_link,
  20.306 -			struct rb_node *rb_parent)
  20.307 -{
  20.308 -	struct address_space *mapping = NULL;
  20.309 -
  20.310 -	if (vma->vm_file)
  20.311 -		mapping = vma->vm_file->f_mapping;
  20.312 -
  20.313 -	if (mapping)
  20.314 -		spin_lock(&mapping->i_mmap_lock);
  20.315 -	anon_vma_lock(vma);
  20.316 -
  20.317 -	__vma_link(mm, vma, prev, rb_link, rb_parent);
  20.318 -	__vma_link_file(vma);
  20.319 -
  20.320 -	anon_vma_unlock(vma);
  20.321 -	if (mapping)
  20.322 -		spin_unlock(&mapping->i_mmap_lock);
  20.323 -
  20.324 -	mark_mm_hugetlb(mm, vma);
  20.325 -	mm->map_count++;
  20.326 -	validate_mm(mm);
  20.327 -}
  20.328 -
  20.329 -/*
  20.330 - * Helper for vma_adjust in the split_vma insert case:
  20.331 - * insert vm structure into list and rbtree and anon_vma,
  20.332 - * but it has already been inserted into prio_tree earlier.
  20.333 - */
  20.334 -static void
  20.335 -__insert_vm_struct(struct mm_struct * mm, struct vm_area_struct * vma)
  20.336 -{
  20.337 -	struct vm_area_struct * __vma, * prev;
  20.338 -	struct rb_node ** rb_link, * rb_parent;
  20.339 -
  20.340 -	__vma = find_vma_prepare(mm, vma->vm_start,&prev, &rb_link, &rb_parent);
  20.341 -	if (__vma && __vma->vm_start < vma->vm_end)
  20.342 -		BUG();
  20.343 -	__vma_link(mm, vma, prev, rb_link, rb_parent);
  20.344 -	mm->map_count++;
  20.345 -}
  20.346 -
  20.347 -static inline void
  20.348 -__vma_unlink(struct mm_struct *mm, struct vm_area_struct *vma,
  20.349 -		struct vm_area_struct *prev)
  20.350 -{
  20.351 -	prev->vm_next = vma->vm_next;
  20.352 -	rb_erase(&vma->vm_rb, &mm->mm_rb);
  20.353 -	if (mm->mmap_cache == vma)
  20.354 -		mm->mmap_cache = prev;
  20.355 -}
  20.356 -
  20.357 -/*
  20.358 - * We cannot adjust vm_start, vm_end, vm_pgoff fields of a vma that
  20.359 - * is already present in an i_mmap tree without adjusting the tree.
  20.360 - * The following helper function should be used when such adjustments
  20.361 - * are necessary.  The "insert" vma (if any) is to be inserted
  20.362 - * before we drop the necessary locks.
  20.363 - */
  20.364 -void vma_adjust(struct vm_area_struct *vma, unsigned long start,
  20.365 -	unsigned long end, pgoff_t pgoff, struct vm_area_struct *insert)
  20.366 -{
  20.367 -	struct mm_struct *mm = vma->vm_mm;
  20.368 -	struct vm_area_struct *next = vma->vm_next;
  20.369 -	struct address_space *mapping = NULL;
  20.370 -	struct prio_tree_root *root = NULL;
  20.371 -	struct file *file = vma->vm_file;
  20.372 -	struct anon_vma *anon_vma = NULL;
  20.373 -	long adjust_next = 0;
  20.374 -	int remove_next = 0;
  20.375 -
  20.376 -	if (next && !insert) {
  20.377 -		if (end >= next->vm_end) {
  20.378 -			/*
  20.379 -			 * vma expands, overlapping all the next, and
  20.380 -			 * perhaps the one after too (mprotect case 6).
  20.381 -			 */
  20.382 -again:			remove_next = 1 + (end > next->vm_end);
  20.383 -			end = next->vm_end;
  20.384 -			anon_vma = next->anon_vma;
  20.385 -		} else if (end > next->vm_start) {
  20.386 -			/*
  20.387 -			 * vma expands, overlapping part of the next:
  20.388 -			 * mprotect case 5 shifting the boundary up.
  20.389 -			 */
  20.390 -			adjust_next = (end - next->vm_start) >> PAGE_SHIFT;
  20.391 -			anon_vma = next->anon_vma;
  20.392 -		} else if (end < vma->vm_end) {
  20.393 -			/*
  20.394 -			 * vma shrinks, and !insert tells it's not
  20.395 -			 * split_vma inserting another: so it must be
  20.396 -			 * mprotect case 4 shifting the boundary down.
  20.397 -			 */
  20.398 -			adjust_next = - ((vma->vm_end - end) >> PAGE_SHIFT);
  20.399 -			anon_vma = next->anon_vma;
  20.400 -		}
  20.401 -	}
  20.402 -
  20.403 -	if (file) {
  20.404 -		mapping = file->f_mapping;
  20.405 -		if (!(vma->vm_flags & VM_NONLINEAR))
  20.406 -			root = &mapping->i_mmap;
  20.407 -		spin_lock(&mapping->i_mmap_lock);
  20.408 -		if (insert) {
  20.409 -			/*
  20.410 -			 * Put into prio_tree now, so instantiated pages
  20.411 -			 * are visible to arm/parisc __flush_dcache_page
  20.412 -			 * throughout; but we cannot insert into address
  20.413 -			 * space until vma start or end is updated.
  20.414 -			 */
  20.415 -			__vma_link_file(insert);
  20.416 -		}
  20.417 -	}
  20.418 -
  20.419 -	/*
  20.420 -	 * When changing only vma->vm_end, we don't really need
  20.421 -	 * anon_vma lock: but is that case worth optimizing out?
  20.422 -	 */
  20.423 -	if (vma->anon_vma)
  20.424 -		anon_vma = vma->anon_vma;
  20.425 -	if (anon_vma)
  20.426 -		spin_lock(&anon_vma->lock);
  20.427 -
  20.428 -	if (root) {
  20.429 -		flush_dcache_mmap_lock(mapping);
  20.430 -		vma_prio_tree_remove(vma, root);
  20.431 -		if (adjust_next)
  20.432 -			vma_prio_tree_remove(next, root);
  20.433 -	}
  20.434 -
  20.435 -	vma->vm_start = start;
  20.436 -	vma->vm_end = end;
  20.437 -	vma->vm_pgoff = pgoff;
  20.438 -	if (adjust_next) {
  20.439 -		next->vm_start += adjust_next << PAGE_SHIFT;
  20.440 -		next->vm_pgoff += adjust_next;
  20.441 -	}
  20.442 -
  20.443 -	if (root) {
  20.444 -		if (adjust_next) {
  20.445 -			vma_prio_tree_init(next);
  20.446 -			vma_prio_tree_insert(next, root);
  20.447 -		}
  20.448 -		vma_prio_tree_init(vma);
  20.449 -		vma_prio_tree_insert(vma, root);
  20.450 -		flush_dcache_mmap_unlock(mapping);
  20.451 -	}
  20.452 -
  20.453 -	if (remove_next) {
  20.454 -		/*
  20.455 -		 * vma_merge has merged next into vma, and needs
  20.456 -		 * us to remove next before dropping the locks.
  20.457 -		 */
  20.458 -		__vma_unlink(mm, next, vma);
  20.459 -		if (file)
  20.460 -			__remove_shared_vm_struct(next, file, mapping);
  20.461 -		if (next->anon_vma)
  20.462 -			__anon_vma_merge(vma, next);
  20.463 -	} else if (insert) {
  20.464 -		/*
  20.465 -		 * split_vma has split insert from vma, and needs
  20.466 -		 * us to insert it before dropping the locks
  20.467 -		 * (it may either follow vma or precede it).
  20.468 -		 */
  20.469 -		__insert_vm_struct(mm, insert);
  20.470 -	}
  20.471 -
  20.472 -	if (anon_vma)
  20.473 -		spin_unlock(&anon_vma->lock);
  20.474 -	if (mapping)
  20.475 -		spin_unlock(&mapping->i_mmap_lock);
  20.476 -
  20.477 -	if (remove_next) {
  20.478 -		if (file)
  20.479 -			fput(file);
  20.480 -		mm->map_count--;
  20.481 -		mpol_free(vma_policy(next));
  20.482 -		kmem_cache_free(vm_area_cachep, next);
  20.483 -		/*
  20.484 -		 * In mprotect's case 6 (see comments on vma_merge),
  20.485 -		 * we must remove another next too. It would clutter
  20.486 -		 * up the code too much to do both in one go.
  20.487 -		 */
  20.488 -		if (remove_next == 2) {
  20.489 -			next = vma->vm_next;
  20.490 -			goto again;
  20.491 -		}
  20.492 -	}
  20.493 -
  20.494 -	validate_mm(mm);
  20.495 -}
  20.496 -
  20.497 -/*
  20.498 - * If the vma has a ->close operation then the driver probably needs to release
  20.499 - * per-vma resources, so we don't attempt to merge those.
  20.500 - */
  20.501 -#define VM_SPECIAL (VM_IO | VM_DONTCOPY | VM_DONTEXPAND | VM_RESERVED)
  20.502 -
  20.503 -static inline int is_mergeable_vma(struct vm_area_struct *vma,
  20.504 -			struct file *file, unsigned long vm_flags)
  20.505 -{
  20.506 -	if (vma->vm_flags != vm_flags)
  20.507 -		return 0;
  20.508 -	if (vma->vm_file != file)
  20.509 -		return 0;
  20.510 -	if (vma->vm_ops && vma->vm_ops->close)
  20.511 -		return 0;
  20.512 -	return 1;
  20.513 -}
  20.514 -
  20.515 -static inline int is_mergeable_anon_vma(struct anon_vma *anon_vma1,
  20.516 -					struct anon_vma *anon_vma2)
  20.517 -{
  20.518 -	return !anon_vma1 || !anon_vma2 || (anon_vma1 == anon_vma2);
  20.519 -}
  20.520 -
  20.521 -/*
  20.522 - * Return true if we can merge this (vm_flags,anon_vma,file,vm_pgoff)
  20.523 - * in front of (at a lower virtual address and file offset than) the vma.
  20.524 - *
  20.525 - * We cannot merge two vmas if they have differently assigned (non-NULL)
  20.526 - * anon_vmas, nor if same anon_vma is assigned but offsets incompatible.
  20.527 - *
  20.528 - * We don't check here for the merged mmap wrapping around the end of pagecache
  20.529 - * indices (16TB on ia32) because do_mmap_pgoff() does not permit mmap's which
  20.530 - * wrap, nor mmaps which cover the final page at index -1UL.
  20.531 - */
  20.532 -static int
  20.533 -can_vma_merge_before(struct vm_area_struct *vma, unsigned long vm_flags,
  20.534 -	struct anon_vma *anon_vma, struct file *file, pgoff_t vm_pgoff)
  20.535 -{
  20.536 -	if (is_mergeable_vma(vma, file, vm_flags) &&
  20.537 -	    is_mergeable_anon_vma(anon_vma, vma->anon_vma)) {
  20.538 -		if (vma->vm_pgoff == vm_pgoff)
  20.539 -			return 1;
  20.540 -	}
  20.541 -	return 0;
  20.542 -}
  20.543 -
  20.544 -/*
  20.545 - * Return true if we can merge this (vm_flags,anon_vma,file,vm_pgoff)
  20.546 - * beyond (at a higher virtual address and file offset than) the vma.
  20.547 - *
  20.548 - * We cannot merge two vmas if they have differently assigned (non-NULL)
  20.549 - * anon_vmas, nor if same anon_vma is assigned but offsets incompatible.
  20.550 - */
  20.551 -static int
  20.552 -can_vma_merge_after(struct vm_area_struct *vma, unsigned long vm_flags,
  20.553 -	struct anon_vma *anon_vma, struct file *file, pgoff_t vm_pgoff)
  20.554 -{
  20.555 -	if (is_mergeable_vma(vma, file, vm_flags) &&
  20.556 -	    is_mergeable_anon_vma(anon_vma, vma->anon_vma)) {
  20.557 -		pgoff_t vm_pglen;
  20.558 -		vm_pglen = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
  20.559 -		if (vma->vm_pgoff + vm_pglen == vm_pgoff)
  20.560 -			return 1;
  20.561 -	}
  20.562 -	return 0;
  20.563 -}
  20.564 -
  20.565 -/*
  20.566 - * Given a mapping request (addr,end,vm_flags,file,pgoff), figure out
  20.567 - * whether that can be merged with its predecessor or its successor.
  20.568 - * Or both (it neatly fills a hole).
  20.569 - *
  20.570 - * In most cases - when called for mmap, brk or mremap - [addr,end) is
  20.571 - * certain not to be mapped by the time vma_merge is called; but when
  20.572 - * called for mprotect, it is certain to be already mapped (either at
  20.573 - * an offset within prev, or at the start of next), and the flags of
  20.574 - * this area are about to be changed to vm_flags - and the no-change
  20.575 - * case has already been eliminated.
  20.576 - *
  20.577 - * The following mprotect cases have to be considered, where AAAA is
  20.578 - * the area passed down from mprotect_fixup, never extending beyond one
  20.579 - * vma, PPPPPP is the prev vma specified, and NNNNNN the next vma after:
  20.580 - *
  20.581 - *     AAAA             AAAA                AAAA          AAAA
  20.582 - *    PPPPPPNNNNNN    PPPPPPNNNNNN    PPPPPPNNNNNN    PPPPNNNNXXXX
  20.583 - *    cannot merge    might become    might become    might become
  20.584 - *                    PPNNNNNNNNNN    PPPPPPPPPPNN    PPPPPPPPPPPP 6 or
  20.585 - *    mmap, brk or    case 4 below    case 5 below    PPPPPPPPXXXX 7 or
  20.586 - *    mremap move:                                    PPPPNNNNNNNN 8
  20.587 - *        AAAA
  20.588 - *    PPPP    NNNN    PPPPPPPPPPPP    PPPPPPPPNNNN    PPPPNNNNNNNN
  20.589 - *    might become    case 1 below    case 2 below    case 3 below
  20.590 - *
  20.591 - * Odd one out? Case 8, because it extends NNNN but needs flags of XXXX:
  20.592 - * mprotect_fixup updates vm_flags & vm_page_prot on successful return.
  20.593 - */
  20.594 -struct vm_area_struct *vma_merge(struct mm_struct *mm,
  20.595 -			struct vm_area_struct *prev, unsigned long addr,
  20.596 -			unsigned long end, unsigned long vm_flags,
  20.597 -		     	struct anon_vma *anon_vma, struct file *file,
  20.598 -			pgoff_t pgoff, struct mempolicy *policy)
  20.599 -{
  20.600 -	pgoff_t pglen = (end - addr) >> PAGE_SHIFT;
  20.601 -	struct vm_area_struct *area, *next;
  20.602 -
  20.603 -	/*
  20.604 -	 * We later require that vma->vm_flags == vm_flags,
  20.605 -	 * so this tests vma->vm_flags & VM_SPECIAL, too.
  20.606 -	 */
  20.607 -	if (vm_flags & VM_SPECIAL)
  20.608 -		return NULL;
  20.609 -
  20.610 -	if (prev)
  20.611 -		next = prev->vm_next;
  20.612 -	else
  20.613 -		next = mm->mmap;
  20.614 -	area = next;
  20.615 -	if (next && next->vm_end == end)		/* cases 6, 7, 8 */
  20.616 -		next = next->vm_next;
  20.617 -
  20.618 -	/*
  20.619 -	 * Can it merge with the predecessor?
  20.620 -	 */
  20.621 -	if (prev && prev->vm_end == addr &&
  20.622 -  			mpol_equal(vma_policy(prev), policy) &&
  20.623 -			can_vma_merge_after(prev, vm_flags,
  20.624 -						anon_vma, file, pgoff)) {
  20.625 -		/*
  20.626 -		 * OK, it can.  Can we now merge in the successor as well?
  20.627 -		 */
  20.628 -		if (next && end == next->vm_start &&
  20.629 -				mpol_equal(policy, vma_policy(next)) &&
  20.630 -				can_vma_merge_before(next, vm_flags,
  20.631 -					anon_vma, file, pgoff+pglen) &&
  20.632 -				is_mergeable_anon_vma(prev->anon_vma,
  20.633 -						      next->anon_vma)) {
  20.634 -							/* cases 1, 6 */
  20.635 -			vma_adjust(prev, prev->vm_start,
  20.636 -				next->vm_end, prev->vm_pgoff, NULL);
  20.637 -		} else					/* cases 2, 5, 7 */
  20.638 -			vma_adjust(prev, prev->vm_start,
  20.639 -				end, prev->vm_pgoff, NULL);
  20.640 -		return prev;
  20.641 -	}
  20.642 -
  20.643 -	/*
  20.644 -	 * Can this new request be merged in front of next?
  20.645 -	 */
  20.646 -	if (next && end == next->vm_start &&
  20.647 - 			mpol_equal(policy, vma_policy(next)) &&
  20.648 -			can_vma_merge_before(next, vm_flags,
  20.649 -					anon_vma, file, pgoff+pglen)) {
  20.650 -		if (prev && addr < prev->vm_end)	/* case 4 */
  20.651 -			vma_adjust(prev, prev->vm_start,
  20.652 -				addr, prev->vm_pgoff, NULL);
  20.653 -		else					/* cases 3, 8 */
  20.654 -			vma_adjust(area, addr, next->vm_end,
  20.655 -				next->vm_pgoff - pglen, NULL);
  20.656 -		return area;
  20.657 -	}
  20.658 -
  20.659 -	return NULL;
  20.660 -}
  20.661 -
  20.662 -/*
  20.663 - * find_mergeable_anon_vma is used by anon_vma_prepare, to check
  20.664 - * neighbouring vmas for a suitable anon_vma, before it goes off
  20.665 - * to allocate a new anon_vma.  It checks because a repetitive
  20.666 - * sequence of mprotects and faults may otherwise lead to distinct
  20.667 - * anon_vmas being allocated, preventing vma merge in subsequent
  20.668 - * mprotect.
  20.669 - */
  20.670 -struct anon_vma *find_mergeable_anon_vma(struct vm_area_struct *vma)
  20.671 -{
  20.672 -	struct vm_area_struct *near;
  20.673 -	unsigned long vm_flags;
  20.674 -
  20.675 -	near = vma->vm_next;
  20.676 -	if (!near)
  20.677 -		goto try_prev;
  20.678 -
  20.679 -	/*
  20.680 -	 * Since only mprotect tries to remerge vmas, match flags
  20.681 -	 * which might be mprotected into each other later on.
  20.682 -	 * Neither mlock nor madvise tries to remerge at present,
  20.683 -	 * so leave their flags as obstructing a merge.
  20.684 -	 */
  20.685 -	vm_flags = vma->vm_flags & ~(VM_READ|VM_WRITE|VM_EXEC);
  20.686 -	vm_flags |= near->vm_flags & (VM_READ|VM_WRITE|VM_EXEC);
  20.687 -
  20.688 -	if (near->anon_vma && vma->vm_end == near->vm_start &&
  20.689 - 			mpol_equal(vma_policy(vma), vma_policy(near)) &&
  20.690 -			can_vma_merge_before(near, vm_flags,
  20.691 -				NULL, vma->vm_file, vma->vm_pgoff +
  20.692 -				((vma->vm_end - vma->vm_start) >> PAGE_SHIFT)))
  20.693 -		return near->anon_vma;
  20.694 -try_prev:
  20.695 -	/*
  20.696 -	 * It is potentially slow to have to call find_vma_prev here.
  20.697 -	 * But it's only on the first write fault on the vma, not
  20.698 -	 * every time, and we could devise a way to avoid it later
  20.699 -	 * (e.g. stash info in next's anon_vma_node when assigning
  20.700 -	 * an anon_vma, or when trying vma_merge).  Another time.
  20.701 -	 */
  20.702 -	if (find_vma_prev(vma->vm_mm, vma->vm_start, &near) != vma)
  20.703 -		BUG();
  20.704 -	if (!near)
  20.705 -		goto none;
  20.706 -
  20.707 -	vm_flags = vma->vm_flags & ~(VM_READ|VM_WRITE|VM_EXEC);
  20.708 -	vm_flags |= near->vm_flags & (VM_READ|VM_WRITE|VM_EXEC);
  20.709 -
  20.710 -	if (near->anon_vma && near->vm_end == vma->vm_start &&
  20.711 -  			mpol_equal(vma_policy(near), vma_policy(vma)) &&
  20.712 -			can_vma_merge_after(near, vm_flags,
  20.713 -				NULL, vma->vm_file, vma->vm_pgoff))
  20.714 -		return near->anon_vma;
  20.715 -none:
  20.716 -	/*
  20.717 -	 * There's no absolute need to look only at touching neighbours:
  20.718 -	 * we could search further afield for "compatible" anon_vmas.
  20.719 -	 * But it would probably just be a waste of time searching,
  20.720 -	 * or lead to too many vmas hanging off the same anon_vma.
  20.721 -	 * We're trying to allow mprotect remerging later on,
  20.722 -	 * not trying to minimize memory used for anon_vmas.
  20.723 -	 */
  20.724 -	return NULL;
  20.725 -}
  20.726 -
  20.727 -/*
  20.728 - * The caller must hold down_write(current->mm->mmap_sem).
  20.729 - */
  20.730 -
  20.731 -unsigned long do_mmap_pgoff(struct file * file, unsigned long addr,
  20.732 -			unsigned long len, unsigned long prot,
  20.733 -			unsigned long flags, unsigned long pgoff)
  20.734 -{
  20.735 -	struct mm_struct * mm = current->mm;
  20.736 -	struct vm_area_struct * vma, * prev;
  20.737 -	struct inode *inode;
  20.738 -	unsigned int vm_flags;
  20.739 -	int correct_wcount = 0;
  20.740 -	int error;
  20.741 -	struct rb_node ** rb_link, * rb_parent;
  20.742 -	int accountable = 1;
  20.743 -	unsigned long charged = 0;
  20.744 -
  20.745 -	if (file) {
  20.746 -		if (is_file_hugepages(file))
  20.747 -			accountable = 0;
  20.748 -
  20.749 -		if (!file->f_op || !file->f_op->mmap)
  20.750 -			return -ENODEV;
  20.751 -
  20.752 -		if ((prot & PROT_EXEC) &&
  20.753 -		    (file->f_vfsmnt->mnt_flags & MNT_NOEXEC))
  20.754 -			return -EPERM;
  20.755 -	}
  20.756 -
  20.757 -	if (!len)
  20.758 -		return addr;
  20.759 -
  20.760 -	/* Careful about overflows.. */
  20.761 -	len = PAGE_ALIGN(len);
  20.762 -	if (!len || len > TASK_SIZE)
  20.763 -		return -EINVAL;
  20.764 -
  20.765 -	/* offset overflow? */
  20.766 -	if ((pgoff + (len >> PAGE_SHIFT)) < pgoff)
  20.767 -		return -EINVAL;
  20.768 -
  20.769 -	/* Too many mappings? */
  20.770 -	if (mm->map_count > sysctl_max_map_count)
  20.771 -		return -ENOMEM;
  20.772 -
  20.773 -	/* Obtain the address to map to. we verify (or select) it and ensure
  20.774 -	 * that it represents a valid section of the address space.
  20.775 -	 */
  20.776 -	addr = get_unmapped_area(file, addr, len, pgoff, flags);
  20.777 -	if (addr & ~PAGE_MASK)
  20.778 -		return addr;
  20.779 -
  20.780 -	/* Do simple checking here so the lower-level routines won't have
  20.781 -	 * to. we assume access permissions have been handled by the open
  20.782 -	 * of the memory object, so we don't do any here.
  20.783 -	 */
  20.784 -	vm_flags = calc_vm_prot_bits(prot) | calc_vm_flag_bits(flags) |
  20.785 -			mm->def_flags | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC;
  20.786 -
  20.787 -	if (flags & MAP_LOCKED) {
  20.788 -		if (!capable(CAP_IPC_LOCK))
  20.789 -			return -EPERM;
  20.790 -		vm_flags |= VM_LOCKED;
  20.791 -	}
  20.792 -	/* mlock MCL_FUTURE? */
  20.793 -	if (vm_flags & VM_LOCKED) {
  20.794 -		unsigned long locked = mm->locked_vm << PAGE_SHIFT;
  20.795 -		locked += len;
  20.796 -		if (locked > current->rlim[RLIMIT_MEMLOCK].rlim_cur)
  20.797 -			return -EAGAIN;
  20.798 -	}
  20.799 -
  20.800 -	inode = file ? file->f_dentry->d_inode : NULL;
  20.801 -
  20.802 -	if (file) {
  20.803 -		switch (flags & MAP_TYPE) {
  20.804 -		case MAP_SHARED:
  20.805 -			if ((prot&PROT_WRITE) && !(file->f_mode&FMODE_WRITE))
  20.806 -				return -EACCES;
  20.807 -
  20.808 -			/*
  20.809 -			 * Make sure we don't allow writing to an append-only
  20.810 -			 * file..
  20.811 -			 */
  20.812 -			if (IS_APPEND(inode) && (file->f_mode & FMODE_WRITE))
  20.813 -				return -EACCES;
  20.814 -
  20.815 -			/*
  20.816 -			 * Make sure there are no mandatory locks on the file.
  20.817 -			 */
  20.818 -			if (locks_verify_locked(inode))
  20.819 -				return -EAGAIN;
  20.820 -
  20.821 -			vm_flags |= VM_SHARED | VM_MAYSHARE;
  20.822 -			if (!(file->f_mode & FMODE_WRITE))
  20.823 -				vm_flags &= ~(VM_MAYWRITE | VM_SHARED);
  20.824 -
  20.825 -			/* fall through */
  20.826 -		case MAP_PRIVATE:
  20.827 -			if (!(file->f_mode & FMODE_READ))
  20.828 -				return -EACCES;
  20.829 -			break;
  20.830 -
  20.831 -		default:
  20.832 -			return -EINVAL;
  20.833 -		}
  20.834 -	} else {
  20.835 -		switch (flags & MAP_TYPE) {
  20.836 -		case MAP_SHARED:
  20.837 -			vm_flags |= VM_SHARED | VM_MAYSHARE;
  20.838 -			break;
  20.839 -		case MAP_PRIVATE:
  20.840 -			/*
  20.841 -			 * Set pgoff according to addr for anon_vma.
  20.842 -			 */
  20.843 -			pgoff = addr >> PAGE_SHIFT;
  20.844 -			break;
  20.845 -		default:
  20.846 -			return -EINVAL;
  20.847 -		}
  20.848 -	}
  20.849 -
  20.850 -	error = security_file_mmap(file, prot, flags);
  20.851 -	if (error)
  20.852 -		return error;
  20.853 -		
  20.854 -	/* Clear old maps */
  20.855 -	error = -ENOMEM;
  20.856 -munmap_back:
  20.857 -	vma = find_vma_prepare(mm, addr, &prev, &rb_link, &rb_parent);
  20.858 -	if (vma && vma->vm_start < addr + len) {
  20.859 -		if (do_munmap(mm, addr, len))
  20.860 -			return -ENOMEM;
  20.861 -		goto munmap_back;
  20.862 -	}
  20.863 -
  20.864 -	/* Check against address space limit. */
  20.865 -	if ((mm->total_vm << PAGE_SHIFT) + len
  20.866 -	    > current->rlim[RLIMIT_AS].rlim_cur)
  20.867 -		return -ENOMEM;
  20.868 -
  20.869 -	if (accountable && (!(flags & MAP_NORESERVE) ||
  20.870 -			sysctl_overcommit_memory > 1)) {
  20.871 -		if (vm_flags & VM_SHARED) {
  20.872 -			/* Check memory availability in shmem_file_setup? */
  20.873 -			vm_flags |= VM_ACCOUNT;
  20.874 -		} else if (vm_flags & VM_WRITE) {
  20.875 -			/*
  20.876 -			 * Private writable mapping: check memory availability
  20.877 -			 */
  20.878 -			charged = len >> PAGE_SHIFT;
  20.879 -			if (security_vm_enough_memory(charged))
  20.880 -				return -ENOMEM;
  20.881 -			vm_flags |= VM_ACCOUNT;
  20.882 -		}
  20.883 -	}
  20.884 -
  20.885 -	/*
  20.886 -	 * Can we just expand an old private anonymous mapping?
  20.887 -	 * The VM_SHARED test is necessary because shmem_zero_setup
  20.888 -	 * will create the file object for a shared anonymous map below.
  20.889 -	 */
  20.890 -	if (!file && !(vm_flags & VM_SHARED) &&
  20.891 -	    vma_merge(mm, prev, addr, addr + len, vm_flags,
  20.892 -					NULL, NULL, pgoff, NULL))
  20.893 -		goto out;
  20.894 -
  20.895 -	/*
  20.896 -	 * Determine the object being mapped and call the appropriate
  20.897 -	 * specific mapper. the address has already been validated, but
  20.898 -	 * not unmapped, but the maps are removed from the list.
  20.899 -	 */
  20.900 -	vma = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
  20.901 -	if (!vma) {
  20.902 -		error = -ENOMEM;
  20.903 -		goto unacct_error;
  20.904 -	}
  20.905 -	memset(vma, 0, sizeof(*vma));
  20.906 -
  20.907 -	vma->vm_mm = mm;
  20.908 -	vma->vm_start = addr;
  20.909 -	vma->vm_end = addr + len;
  20.910 -	vma->vm_flags = vm_flags;
  20.911 -	vma->vm_page_prot = protection_map[vm_flags & 0x0f];
  20.912 -	vma->vm_pgoff = pgoff;
  20.913 -
  20.914 -	if (file) {
  20.915 -		error = -EINVAL;
  20.916 -		if (vm_flags & (VM_GROWSDOWN|VM_GROWSUP))
  20.917 -			goto free_vma;
  20.918 -		if (vm_flags & VM_DENYWRITE) {
  20.919 -			error = deny_write_access(file);
  20.920 -			if (error)
  20.921 -				goto free_vma;
  20.922 -			correct_wcount = 1;
  20.923 -		}
  20.924 -		vma->vm_file = file;
  20.925 -		get_file(file);
  20.926 -		error = file->f_op->mmap(file, vma);
  20.927 -		if (error)
  20.928 -			goto unmap_and_free_vma;
  20.929 -	} else if (vm_flags & VM_SHARED) {
  20.930 -		error = shmem_zero_setup(vma);
  20.931 -		if (error)
  20.932 -			goto free_vma;
  20.933 -	}
  20.934 -
  20.935 -	/* We set VM_ACCOUNT in a shared mapping's vm_flags, to inform
  20.936 -	 * shmem_zero_setup (perhaps called through /dev/zero's ->mmap)
  20.937 -	 * that memory reservation must be checked; but that reservation
  20.938 -	 * belongs to shared memory object, not to vma: so now clear it.
  20.939 -	 */
  20.940 -	if ((vm_flags & (VM_SHARED|VM_ACCOUNT)) == (VM_SHARED|VM_ACCOUNT))
  20.941 -		vma->vm_flags &= ~VM_ACCOUNT;
  20.942 -
  20.943 -	/* Can addr have changed??
  20.944 -	 *
  20.945 -	 * Answer: Yes, several device drivers can do it in their
  20.946 -	 *         f_op->mmap method. -DaveM
  20.947 -	 */
  20.948 -	addr = vma->vm_start;
  20.949 -
  20.950 -	if (!file || !vma_merge(mm, prev, addr, vma->vm_end,
  20.951 -			vma->vm_flags, NULL, file, pgoff, vma_policy(vma))) {
  20.952 -		vma_link(mm, vma, prev, rb_link, rb_parent);
  20.953 -		if (correct_wcount)
  20.954 -			atomic_inc(&inode->i_writecount);
  20.955 -	} else {
  20.956 -		if (file) {
  20.957 -			if (correct_wcount)
  20.958 -				atomic_inc(&inode->i_writecount);
  20.959 -			fput(file);
  20.960 -		}
  20.961 -		mpol_free(vma_policy(vma));
  20.962 -		kmem_cache_free(vm_area_cachep, vma);
  20.963 -	}
  20.964 -out:	
  20.965 -	mm->total_vm += len >> PAGE_SHIFT;
  20.966 -	if (vm_flags & VM_LOCKED) {
  20.967 -		mm->locked_vm += len >> PAGE_SHIFT;
  20.968 -		make_pages_present(addr, addr + len);
  20.969 -	}
  20.970 -	if (flags & MAP_POPULATE) {
  20.971 -		up_write(&mm->mmap_sem);
  20.972 -		sys_remap_file_pages(addr, len, 0,
  20.973 -					pgoff, flags & MAP_NONBLOCK);
  20.974 -		down_write(&mm->mmap_sem);
  20.975 -	}
  20.976 -	return addr;
  20.977 -
  20.978 -unmap_and_free_vma:
  20.979 -	if (correct_wcount)
  20.980 -		atomic_inc(&inode->i_writecount);
  20.981 -	vma->vm_file = NULL;
  20.982 -	fput(file);
  20.983 -
  20.984 -	/* Undo any partial mapping done by a device driver. */
  20.985 -	zap_page_range(vma, vma->vm_start, vma->vm_end - vma->vm_start, NULL);
  20.986 -free_vma:
  20.987 -	kmem_cache_free(vm_area_cachep, vma);
  20.988 -unacct_error:
  20.989 -	if (charged)
  20.990 -		vm_unacct_memory(charged);
  20.991 -	return error;
  20.992 -}
  20.993 -
  20.994 -EXPORT_SYMBOL(do_mmap_pgoff);
  20.995 -
  20.996 -/* Get an address range which is currently unmapped.
  20.997 - * For shmat() with addr=0.
  20.998 - *
  20.999 - * Ugly calling convention alert:
 20.1000 - * Return value with the low bits set means error value,
 20.1001 - * ie
 20.1002 - *	if (ret & ~PAGE_MASK)
 20.1003 - *		error = ret;
 20.1004 - *
 20.1005 - * This function "knows" that -ENOMEM has the bits set.
 20.1006 - */
 20.1007 -#ifndef HAVE_ARCH_UNMAPPED_AREA
 20.1008 -static inline unsigned long
 20.1009 -arch_get_unmapped_area(struct file *filp, unsigned long addr,
 20.1010 -		unsigned long len, unsigned long pgoff, unsigned long flags)
 20.1011 -{
 20.1012 -	struct mm_struct *mm = current->mm;
 20.1013 -	struct vm_area_struct *vma;
 20.1014 -	unsigned long start_addr;
 20.1015 -
 20.1016 -	if (len > TASK_SIZE)
 20.1017 -		return -ENOMEM;
 20.1018 -
 20.1019 -	if (addr) {
 20.1020 -		addr = PAGE_ALIGN(addr);
 20.1021 -		vma = find_vma(mm, addr);
 20.1022 -		if (TASK_SIZE - len >= addr &&
 20.1023 -		    (!vma || addr + len <= vma->vm_start))
 20.1024 -			return addr;
 20.1025 -	}
 20.1026 -	start_addr = addr = mm->free_area_cache;
 20.1027 -
 20.1028 -full_search:
 20.1029 -	for (vma = find_vma(mm, addr); ; vma = vma->vm_next) {
 20.1030 -		/* At this point:  (!vma || addr < vma->vm_end). */
 20.1031 -		if (TASK_SIZE - len < addr) {
 20.1032 -			/*
 20.1033 -			 * Start a new search - just in case we missed
 20.1034 -			 * some holes.
 20.1035 -			 */
 20.1036 -			if (start_addr != TASK_UNMAPPED_BASE) {
 20.1037 -				start_addr = addr = TASK_UNMAPPED_BASE;
 20.1038 -				goto full_search;
 20.1039 -			}
 20.1040 -			return -ENOMEM;
 20.1041 -		}
 20.1042 -		if (!vma || addr + len <= vma->vm_start) {
 20.1043 -			/*
 20.1044 -			 * Remember the place where we stopped the search:
 20.1045 -			 */
 20.1046 -			mm->free_area_cache = addr + len;
 20.1047 -			return addr;
 20.1048 -		}
 20.1049 -		addr = vma->vm_end;
 20.1050 -	}
 20.1051 -}
 20.1052 -#else
 20.1053 -extern unsigned long
 20.1054 -arch_get_unmapped_area(struct file *, unsigned long, unsigned long,
 20.1055 -			unsigned long, unsigned long);
 20.1056 -#endif	
 20.1057 -
 20.1058 -#ifndef HAVE_ARCH_CHECK_FIXED_MAPPING
 20.1059 -#define arch_check_fixed_mapping(_file,_addr,_len,_pgoff,_flags) 0
 20.1060 -#else
 20.1061 -extern unsigned long
 20.1062 -arch_check_fixed_mapping(struct file *, unsigned long, unsigned long,
 20.1063 -			unsigned long, unsigned long);
 20.1064 -#endif
 20.1065 -
 20.1066 -unsigned long
 20.1067 -get_unmapped_area(struct file *file, unsigned long addr, unsigned long len,
 20.1068 -		unsigned long pgoff, unsigned long flags)
 20.1069 -{
 20.1070 -	if (flags & MAP_FIXED) {
 20.1071 -		unsigned long ret;
 20.1072 -
 20.1073 -		if (addr > TASK_SIZE - len)
 20.1074 -			return -ENOMEM;
 20.1075 -		if (addr & ~PAGE_MASK)
 20.1076 -			return -EINVAL;
 20.1077 -		ret = arch_check_fixed_mapping(file, addr, len, pgoff, flags);
 20.1078 -		if (ret != 0)
 20.1079 -			return ret;
 20.1080 -		if (file && is_file_hugepages(file))  {
 20.1081 -			/*
 20.1082 -			 * Check if the given range is hugepage aligned, and
 20.1083 -			 * can be made suitable for hugepages.
 20.1084 -			 */
 20.1085 -			ret = prepare_hugepage_range(addr, len);
 20.1086 -		} else {
 20.1087 -			/*
 20.1088 -			 * Ensure that a normal request is not falling in a
 20.1089 -			 * reserved hugepage range.  For some archs like IA-64,
 20.1090 -			 * there is a separate region for hugepages.
 20.1091 -			 */
 20.1092 -			ret = is_hugepage_only_range(addr, len);
 20.1093 -		}
 20.1094 -		if (ret)
 20.1095 -			return -EINVAL;
 20.1096 -		return addr;
 20.1097 -	}
 20.1098 -
 20.1099 -	if (file && file->f_op && file->f_op->get_unmapped_area)
 20.1100 -		return file->f_op->get_unmapped_area(file, addr, len,
 20.1101 -						pgoff, flags);
 20.1102 -
 20.1103 -	return arch_get_unmapped_area(file, addr, len, pgoff, flags);
 20.1104 -}
 20.1105 -
 20.1106 -EXPORT_SYMBOL(get_unmapped_area);
 20.1107 -
 20.1108 -/* Look up the first VMA which satisfies  addr < vm_end,  NULL if none. */
 20.1109 -struct vm_area_struct * find_vma(struct mm_struct * mm, unsigned long addr)
 20.1110 -{
 20.1111 -	struct vm_area_struct *vma = NULL;
 20.1112 -
 20.1113 -	if (mm) {
 20.1114 -		/* Check the cache first. */
 20.1115 -		/* (Cache hit rate is typically around 35%.) */
 20.1116 -		vma = mm->mmap_cache;
 20.1117 -		if (!(vma && vma->vm_end > addr && vma->vm_start <= addr)) {
 20.1118 -			struct rb_node * rb_node;
 20.1119 -
 20.1120 -			rb_node = mm->mm_rb.rb_node;
 20.1121 -			vma = NULL;
 20.1122 -
 20.1123 -			while (rb_node) {
 20.1124 -				struct vm_area_struct * vma_tmp;
 20.1125 -
 20.1126 -				vma_tmp = rb_entry(rb_node,
 20.1127 -						struct vm_area_struct, vm_rb);
 20.1128 -
 20.1129 -				if (vma_tmp->vm_end > addr) {
 20.1130 -					vma = vma_tmp;
 20.1131 -					if (vma_tmp->vm_start <= addr)
 20.1132 -						break;
 20.1133 -					rb_node = rb_node->rb_left;
 20.1134 -				} else
 20.1135 -					rb_node = rb_node->rb_right;
 20.1136 -			}
 20.1137 -			if (vma)
 20.1138 -				mm->mmap_cache = vma;
 20.1139 -		}
 20.1140 -	}
 20.1141 -	return vma;
 20.1142 -}
 20.1143 -
 20.1144 -EXPORT_SYMBOL(find_vma);
 20.1145 -
 20.1146 -/* Same as find_vma, but also return a pointer to the previous VMA in *pprev. */
 20.1147 -struct vm_area_struct *
 20.1148 -find_vma_prev(struct mm_struct *mm, unsigned long addr,
 20.1149 -			struct vm_area_struct **pprev)
 20.1150 -{
 20.1151 -	struct vm_area_struct *vma = NULL, *prev = NULL;
 20.1152 -	struct rb_node * rb_node;
 20.1153 -	if (!mm)
 20.1154 -		goto out;
 20.1155 -
 20.1156 -	/* Guard against addr being lower than the first VMA */
 20.1157 -	vma = mm->mmap;
 20.1158 -
 20.1159 -	/* Go through the RB tree quickly. */
 20.1160 -	rb_node = mm->mm_rb.rb_node;
 20.1161 -
 20.1162 -	while (rb_node) {
 20.1163 -		struct vm_area_struct *vma_tmp;
 20.1164 -		vma_tmp = rb_entry(rb_node, struct vm_area_struct, vm_rb);
 20.1165 -
 20.1166 -		if (addr < vma_tmp->vm_end) {
 20.1167 -			rb_node = rb_node->rb_left;
 20.1168 -		} else {
 20.1169 -			prev = vma_tmp;
 20.1170 -			if (!prev->vm_next || (addr < prev->vm_next->vm_end))
 20.1171 -				break;
 20.1172 -			rb_node = rb_node->rb_right;
 20.1173 -		}
 20.1174 -	}
 20.1175 -
 20.1176 -out:
 20.1177 -	*pprev = prev;
 20.1178 -	return prev ? prev->vm_next : vma;
 20.1179 -}
 20.1180 -
 20.1181 -#ifdef CONFIG_STACK_GROWSUP
 20.1182 -/*
 20.1183 - * vma is the first one with address > vma->vm_end.  Have to extend vma.
 20.1184 - */
 20.1185 -int expand_stack(struct vm_area_struct * vma, unsigned long address)
 20.1186 -{
 20.1187 -	unsigned long grow;
 20.1188 -
 20.1189 -	if (!(vma->vm_flags & VM_GROWSUP))
 20.1190 -		return -EFAULT;
 20.1191 -
 20.1192 -	/*
 20.1193 -	 * We must make sure the anon_vma is allocated
 20.1194 -	 * so that the anon_vma locking is not a noop.
 20.1195 -	 */
 20.1196 -	if (unlikely(anon_vma_prepare(vma)))
 20.1197 -		return -ENOMEM;
 20.1198 -	anon_vma_lock(vma);
 20.1199 -
 20.1200 -	/*
 20.1201 -	 * vma->vm_start/vm_end cannot change under us because the caller
 20.1202 -	 * is required to hold the mmap_sem in read mode.  We need the
 20.1203 -	 * anon_vma lock to serialize against concurrent expand_stacks.
 20.1204 -	 */
 20.1205 -	address += 4 + PAGE_SIZE - 1;
 20.1206 -	address &= PAGE_MASK;
 20.1207 -	grow = (address - vma->vm_end) >> PAGE_SHIFT;
 20.1208 -
 20.1209 -	/* Overcommit.. */
 20.1210 -	if (security_vm_enough_memory(grow)) {
 20.1211 -		anon_vma_unlock(vma);
 20.1212 -		return -ENOMEM;
 20.1213 -	}
 20.1214 -	
 20.1215 -	if (address - vma->vm_start > current->rlim[RLIMIT_STACK].rlim_cur ||
 20.1216 -			((vma->vm_mm->total_vm + grow) << PAGE_SHIFT) >
 20.1217 -			current->rlim[RLIMIT_AS].rlim_cur) {
 20.1218 -		anon_vma_unlock(vma);
 20.1219 -		vm_unacct_memory(grow);
 20.1220 -		return -ENOMEM;
 20.1221 -	}
 20.1222 -	vma->vm_end = address;
 20.1223 -	vma->vm_mm->total_vm += grow;
 20.1224 -	if (vma->vm_flags & VM_LOCKED)
 20.1225 -		vma->vm_mm->locked_vm += grow;
 20.1226 -	anon_vma_unlock(vma);
 20.1227 -	return 0;
 20.1228 -}
 20.1229 -
 20.1230 -struct vm_area_struct *
 20.1231 -find_extend_vma(struct mm_struct *mm, unsigned long addr)
 20.1232 -{
 20.1233 -	struct vm_area_struct *vma, *prev;
 20.1234 -
 20.1235 -	addr &= PAGE_MASK;
 20.1236 -	vma = find_vma_prev(mm, addr, &prev);
 20.1237 -	if (vma && (vma->vm_start <= addr))
 20.1238 -		return vma;
 20.1239 -	if (!prev || expand_stack(prev, addr))
 20.1240 -		return NULL;
 20.1241 -	if (prev->vm_flags & VM_LOCKED) {
 20.1242 -		make_pages_present(addr, prev->vm_end);
 20.1243 -	}
 20.1244 -	return prev;
 20.1245 -}
 20.1246 -#else
 20.1247 -/*
 20.1248 - * vma is the first one with address < vma->vm_start.  Have to extend vma.
 20.1249 - */
 20.1250 -int expand_stack(struct vm_area_struct *vma, unsigned long address)
 20.1251 -{
 20.1252 -	unsigned long grow;
 20.1253 -
 20.1254 -	/*
 20.1255 -	 * We must make sure the anon_vma is allocated
 20.1256 -	 * so that the anon_vma locking is not a noop.
 20.1257 -	 */
 20.1258 -	if (unlikely(anon_vma_prepare(vma)))
 20.1259 -		return -ENOMEM;
 20.1260 -	anon_vma_lock(vma);
 20.1261 -
 20.1262 -	/*
 20.1263 -	 * vma->vm_start/vm_end cannot change under us because the caller
 20.1264 -	 * is required to hold the mmap_sem in read mode.  We need the
 20.1265 -	 * anon_vma lock to serialize against concurrent expand_stacks.
 20.1266 -	 */
 20.1267 -	address &= PAGE_MASK;
 20.1268 -	grow = (vma->vm_start - address) >> PAGE_SHIFT;
 20.1269 -
 20.1270 -	/* Overcommit.. */
 20.1271 -	if (security_vm_enough_memory(grow)) {
 20.1272 -		anon_vma_unlock(vma);
 20.1273 -		return -ENOMEM;
 20.1274 -	}
 20.1275 -	
 20.1276 -	if (vma->vm_end - address > current->rlim[RLIMIT_STACK].rlim_cur ||
 20.1277 -			((vma->vm_mm->total_vm + grow) << PAGE_SHIFT) >
 20.1278 -			current->rlim[RLIMIT_AS].rlim_cur) {
 20.1279 -		anon_vma_unlock(vma);
 20.1280 -		vm_unacct_memory(grow);
 20.1281 -		return -ENOMEM;
 20.1282 -	}
 20.1283 -	vma->vm_start = address;
 20.1284 -	vma->vm_pgoff -= grow;
 20.1285 -	vma->vm_mm->total_vm += grow;
 20.1286 -	if (vma->vm_flags & VM_LOCKED)
 20.1287 -		vma->vm_mm->locked_vm += grow;
 20.1288 -	anon_vma_unlock(vma);
 20.1289 -	return 0;
 20.1290 -}
 20.1291 -
 20.1292 -struct vm_area_struct *
 20.1293 -find_extend_vma(struct mm_struct * mm, unsigned long addr)
 20.1294 -{
 20.1295 -	struct vm_area_struct * vma;
 20.1296 -	unsigned long start;
 20.1297 -
 20.1298 -	addr &= PAGE_MASK;
 20.1299 -	vma = find_vma(mm,addr);
 20.1300 -	if (!vma)
 20.1301 -		return NULL;
 20.1302 -	if (vma->vm_start <= addr)
 20.1303 -		return vma;
 20.1304 -	if (!(vma->vm_flags & VM_GROWSDOWN))
 20.1305 -		return NULL;
 20.1306 -	start = vma->vm_start;
 20.1307 -	if (expand_stack(vma, addr))
 20.1308 -		return NULL;
 20.1309 -	if (vma->vm_flags & VM_LOCKED) {
 20.1310 -		make_pages_present(addr, start);
 20.1311 -	}
 20.1312 -	return vma;
 20.1313 -}
 20.1314 -#endif
 20.1315 -
 20.1316 -/*
 20.1317 - * Try to free as many page directory entries as we can,
 20.1318 - * without having to work very hard at actually scanning
 20.1319 - * the page tables themselves.
 20.1320 - *
 20.1321 - * Right now we try to free page tables if we have a nice
 20.1322 - * PGDIR-aligned area that got free'd up. We could be more
 20.1323 - * granular if we want to, but this is fast and simple,
 20.1324 - * and covers the bad cases.
 20.1325 - *
 20.1326 - * "prev", if it exists, points to a vma before the one
 20.1327 - * we just free'd - but there's no telling how much before.
 20.1328 - */
 20.1329 -static void free_pgtables(struct mmu_gather *tlb, struct vm_area_struct *prev,
 20.1330 -	unsigned long start, unsigned long end)
 20.1331 -{
 20.1332 -	unsigned long first = start & PGDIR_MASK;
 20.1333 -	unsigned long last = end + PGDIR_SIZE - 1;
 20.1334 -	unsigned long start_index, end_index;
 20.1335 -	struct mm_struct *mm = tlb->mm;
 20.1336 -
 20.1337 -	if (!prev) {
 20.1338 -		prev = mm->mmap;
 20.1339 -		if (!prev)
 20.1340 -			goto no_mmaps;
 20.1341 -		if (prev->vm_end > start) {
 20.1342 -			if (last > prev->vm_start)
 20.1343 -				last = prev->vm_start;
 20.1344 -			goto no_mmaps;
 20.1345 -		}
 20.1346 -	}
 20.1347 -	for (;;) {
 20.1348 -		struct vm_area_struct *next = prev->vm_next;
 20.1349 -
 20.1350 -		if (next) {
 20.1351 -			if (next->vm_start < start) {
 20.1352 -				prev = next;
 20.1353 -				continue;
 20.1354 -			}
 20.1355 -			if (last > next->vm_start)
 20.1356 -				last = next->vm_start;
 20.1357 -		}
 20.1358 -		if (prev->vm_end > first)
 20.1359 -			first = prev->vm_end + PGDIR_SIZE - 1;
 20.1360 -		break;
 20.1361 -	}
 20.1362 -no_mmaps:
 20.1363 -	if (last < first)	/* for arches with discontiguous pgd indices */
 20.1364 -		return;
 20.1365 -	/*
 20.1366 -	 * If the PGD bits are not consecutive in the virtual address, the
 20.1367 -	 * old method of shifting the VA >> by PGDIR_SHIFT doesn't work.
 20.1368 -	 */
 20.1369 -	start_index = pgd_index(first);
 20.1370 -	if (start_index < FIRST_USER_PGD_NR)
 20.1371 -		start_index = FIRST_USER_PGD_NR;
 20.1372 -	end_index = pgd_index(last);
 20.1373 -	if (end_index > start_index) {
 20.1374 -		clear_page_tables(tlb, start_index, end_index - start_index);
 20.1375 -		flush_tlb_pgtables(mm, first & PGDIR_MASK, last & PGDIR_MASK);
 20.1376 -	}
 20.1377 -}
 20.1378 -
 20.1379 -/* Normal function to fix up a mapping
 20.1380 - * This function is the default for when an area has no specific
 20.1381 - * function.  This may be used as part of a more specific routine.
 20.1382 - *
 20.1383 - * By the time this function is called, the area struct has been
 20.1384 - * removed from the process mapping list.
 20.1385 - */
 20.1386 -static void unmap_vma(struct mm_struct *mm, struct vm_area_struct *area)
 20.1387 -{
 20.1388 -	size_t len = area->vm_end - area->vm_start;
 20.1389 -
 20.1390 -	area->vm_mm->total_vm -= len >> PAGE_SHIFT;
 20.1391 -	if (area->vm_flags & VM_LOCKED)
 20.1392 -		area->vm_mm->locked_vm -= len >> PAGE_SHIFT;
 20.1393 -	/*
 20.1394 -	 * Is this a new hole at the lowest possible address?
 20.1395 -	 */
 20.1396 -	if (area->vm_start >= TASK_UNMAPPED_BASE &&
 20.1397 -				area->vm_start < area->vm_mm->free_area_cache)
 20.1398 -	      area->vm_mm->free_area_cache = area->vm_start;
 20.1399 -
 20.1400 -	remove_vm_struct(area);
 20.1401 -}
 20.1402 -
 20.1403 -/*
 20.1404 - * Update the VMA and inode share lists.
 20.1405 - *
 20.1406 - * Ok - we have the memory areas we should free on the 'free' list,
 20.1407 - * so release them, and do the vma updates.
 20.1408 - */
 20.1409 -static void unmap_vma_list(struct mm_struct *mm,
 20.1410 -	struct vm_area_struct *mpnt)
 20.1411 -{
 20.1412 -	do {
 20.1413 -		struct vm_area_struct *next = mpnt->vm_next;
 20.1414 -		unmap_vma(mm, mpnt);
 20.1415 -		mpnt = next;
 20.1416 -	} while (mpnt != NULL);
 20.1417 -	validate_mm(mm);
 20.1418 -}
 20.1419 -
 20.1420 -/*
 20.1421 - * Get rid of page table information in the indicated region.
 20.1422 - *
 20.1423 - * Called with the page table lock held.
 20.1424 - */
 20.1425 -static void unmap_region(struct mm_struct *mm,
 20.1426 -	struct vm_area_struct *vma,
 20.1427 -	struct vm_area_struct *prev,
 20.1428 -	unsigned long start,
 20.1429 -	unsigned long end)
 20.1430 -{
 20.1431 -	struct mmu_gather *tlb;
 20.1432 -	unsigned long nr_accounted = 0;
 20.1433 -
 20.1434 -	lru_add_drain();
 20.1435 -	tlb = tlb_gather_mmu(mm, 0);
 20.1436 -	unmap_vmas(&tlb, mm, vma, start, end, &nr_accounted, NULL);
 20.1437 -	vm_unacct_memory(nr_accounted);
 20.1438 -
 20.1439 -	if (is_hugepage_only_range(start, end - start))
 20.1440 -		hugetlb_free_pgtables(tlb, prev, start, end);
 20.1441 -	else
 20.1442 -		free_pgtables(tlb, prev, start, end);
 20.1443 -	tlb_finish_mmu(tlb, start, end);
 20.1444 -}
 20.1445 -
 20.1446 -/*
 20.1447 - * Create a list of vma's touched by the unmap, removing them from the mm's
 20.1448 - * vma list as we go..
 20.1449 - */
 20.1450 -static void
 20.1451 -detach_vmas_to_be_unmapped(struct mm_struct *mm, struct vm_area_struct *vma,
 20.1452 -	struct vm_area_struct *prev, unsigned long end)
 20.1453 -{
 20.1454 -	struct vm_area_struct **insertion_point;
 20.1455 -	struct vm_area_struct *tail_vma = NULL;
 20.1456 -
 20.1457 -	insertion_point = (prev ? &prev->vm_next : &mm->mmap);
 20.1458 -	do {
 20.1459 -		rb_erase(&vma->vm_rb, &mm->mm_rb);
 20.1460 -		mm->map_count--;
 20.1461 -		tail_vma = vma;
 20.1462 -		vma = vma->vm_next;
 20.1463 -	} while (vma && vma->vm_start < end);
 20.1464 -	*insertion_point = vma;
 20.1465 -	tail_vma->vm_next = NULL;
 20.1466 -	mm->mmap_cache = NULL;		/* Kill the cache. */
 20.1467 -}
 20.1468 -
 20.1469 -/*
 20.1470 - * Split a vma into two pieces at address 'addr', a new vma is allocated
 20.1471 - * either for the first part or the the tail.
 20.1472 - */
 20.1473 -int split_vma(struct mm_struct * mm, struct vm_area_struct * vma,
 20.1474 -	      unsigned long addr, int new_below)
 20.1475 -{
 20.1476 -	struct mempolicy *pol;
 20.1477 -	struct vm_area_struct *new;
 20.1478 -
 20.1479 -	if (mm->map_count >= sysctl_max_map_count)
 20.1480 -		return -ENOMEM;
 20.1481 -
 20.1482 -	new = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
 20.1483 -	if (!new)
 20.1484 -		return -ENOMEM;
 20.1485 -
 20.1486 -	/* most fields are the same, copy all, and then fixup */
 20.1487 -	*new = *vma;
 20.1488 -	vma_prio_tree_init(new);
 20.1489 -
 20.1490 -	if (new_below)
 20.1491 -		new->vm_end = addr;
 20.1492 -	else {
 20.1493 -		new->vm_start = addr;
 20.1494 -		new->vm_pgoff += ((addr - vma->vm_start) >> PAGE_SHIFT);
 20.1495 -	}
 20.1496 -
 20.1497 -	pol = mpol_copy(vma_policy(vma));
 20.1498 -	if (IS_ERR(pol)) {
 20.1499 -		kmem_cache_free(vm_area_cachep, new);
 20.1500 -		return PTR_ERR(pol);
 20.1501 -	}
 20.1502 -	vma_set_policy(new, pol);
 20.1503 -
 20.1504 -	if (new->vm_file)
 20.1505 -		get_file(new->vm_file);
 20.1506 -
 20.1507 -	if (new->vm_ops && new->vm_ops->open)
 20.1508 -		new->vm_ops->open(new);
 20.1509 -
 20.1510 -	if (new_below)
 20.1511 -		vma_adjust(vma, addr, vma->vm_end, vma->vm_pgoff +
 20.1512 -			((addr - new->vm_start) >> PAGE_SHIFT), new);
 20.1513 -	else
 20.1514 -		vma_adjust(vma, vma->vm_start, addr, vma->vm_pgoff, new);
 20.1515 -
 20.1516 -	return 0;
 20.1517 -}
 20.1518 -
 20.1519 -/* Munmap is split into 2 main parts -- this part which finds
 20.1520 - * what needs doing, and the areas themselves, which do the
 20.1521 - * work.  This now handles partial unmappings.
 20.1522 - * Jeremy Fitzhardinge <jeremy@goop.org>
 20.1523 - */
 20.1524 -int do_munmap(struct mm_struct *mm, unsigned long start, size_t len)
 20.1525 -{
 20.1526 -	unsigned long end;
 20.1527 -	struct vm_area_struct *mpnt, *prev, *last;
 20.1528 -
 20.1529 -	if ((start & ~PAGE_MASK) || start > TASK_SIZE || len > TASK_SIZE-start)
 20.1530 -		return -EINVAL;
 20.1531 -
 20.1532 -	if ((len = PAGE_ALIGN(len)) == 0)
 20.1533 -		return -EINVAL;
 20.1534 -
 20.1535 -	/* Find the first overlapping VMA */
 20.1536 -	mpnt = find_vma_prev(mm, start, &prev);
 20.1537 -	if (!mpnt)
 20.1538 -		return 0;
 20.1539 -	/* we have  start < mpnt->vm_end  */
 20.1540 -
 20.1541 -	if (is_vm_hugetlb_page(mpnt)) {
 20.1542 -		int ret = is_aligned_hugepage_range(start, len);
 20.1543 -
 20.1544 -		if (ret)
 20.1545 -			return ret;
 20.1546 -	}
 20.1547 -
 20.1548 -	/* if it doesn't overlap, we have nothing.. */
 20.1549 -	end = start + len;
 20.1550 -	if (mpnt->vm_start >= end)
 20.1551 -		return 0;
 20.1552 -
 20.1553 -	/* Something will probably happen, so notify. */
 20.1554 -	if (mpnt->vm_file && (mpnt->vm_flags & VM_EXEC))
 20.1555 -		profile_exec_unmap(mm);
 20.1556 - 
 20.1557 -	/*
 20.1558 -	 * If we need to split any vma, do it now to save pain later.
 20.1559 -	 *
 20.1560 -	 * Note: mremap's move_vma VM_ACCOUNT handling assumes a partially
 20.1561 -	 * unmapped vm_area_struct will remain in use: so lower split_vma
 20.1562 -	 * places tmp vma above, and higher split_vma places tmp vma below.
 20.1563 -	 */
 20.1564 -	if (start > mpnt->vm_start) {
 20.1565 -		if (split_vma(mm, mpnt, start, 0))
 20.1566 -			return -ENOMEM;
 20.1567 -		prev = mpnt;
 20.1568 -	}
 20.1569 -
 20.1570 -	/* Does it split the last one? */
 20.1571 -	last = find_vma(mm, end);
 20.1572 -	if (last && end > last->vm_start) {
 20.1573 -		if (split_vma(mm, last, end, 1))
 20.1574 -			return -ENOMEM;
 20.1575 -	}
 20.1576 -	mpnt = prev? prev->vm_next: mm->mmap;
 20.1577 -
 20.1578 -	/*
 20.1579 -	 * Remove the vma's, and unmap the actual pages
 20.1580 -	 */
 20.1581 -	detach_vmas_to_be_unmapped(mm, mpnt, prev, end);
 20.1582 -	spin_lock(&mm->page_table_lock);
 20.1583 -	unmap_region(mm, mpnt, prev, start, end);
 20.1584 -	spin_unlock(&mm->page_table_lock);
 20.1585 -
 20.1586 -	/* Fix up all other VM information */
 20.1587 -	unmap_vma_list(mm, mpnt);
 20.1588 -
 20.1589 -	return 0;
 20.1590 -}
 20.1591 -
 20.1592 -EXPORT_SYMBOL(do_munmap);
 20.1593 -
 20.1594 -asmlinkage long sys_munmap(unsigned long addr, size_t len)
 20.1595 -{
 20.1596 -	int ret;
 20.1597 -	struct mm_struct *mm = current->mm;
 20.1598 -
 20.1599 -	down_write(&mm->mmap_sem);
 20.1600 -	ret = do_munmap(mm, addr, len);
 20.1601 -	up_write(&mm->mmap_sem);
 20.1602 -	return ret;
 20.1603 -}
 20.1604 -
 20.1605 -/*
 20.1606 - *  this is really a simplified "do_mmap".  it only handles
 20.1607 - *  anonymous maps.  eventually we may be able to do some
 20.1608 - *  brk-specific accounting here.
 20.1609 - */
 20.1610 -unsigned long do_brk(unsigned long addr, unsigned long len)
 20.1611 -{
 20.1612 -	struct mm_struct * mm = current->mm;
 20.1613 -	struct vm_area_struct * vma, * prev;
 20.1614 -	unsigned long flags;
 20.1615 -	struct rb_node ** rb_link, * rb_parent;
 20.1616 -	pgoff_t pgoff = addr >> PAGE_SHIFT;
 20.1617 -
 20.1618 -	len = PAGE_ALIGN(len);
 20.1619 -	if (!len)
 20.1620 -		return addr;
 20.1621 -
 20.1622 -	if ((addr + len) > TASK_SIZE || (addr + len) < addr)
 20.1623 -		return -EINVAL;
 20.1624 -
 20.1625 -	/*
 20.1626 -	 * mlock MCL_FUTURE?
 20.1627 -	 */
 20.1628 -	if (mm->def_flags & VM_LOCKED) {
 20.1629 -		unsigned long locked = mm->locked_vm << PAGE_SHIFT;
 20.1630 -		locked += len;
 20.1631 -		if (locked > current->rlim[RLIMIT_MEMLOCK].rlim_cur)
 20.1632 -			return -EAGAIN;
 20.1633 -	}
 20.1634 -
 20.1635 -	/*
 20.1636 -	 * Clear old maps.  this also does some error checking for us
 20.1637 -	 */
 20.1638 - munmap_back:
 20.1639 -	vma = find_vma_prepare(mm, addr, &prev, &rb_link, &rb_parent);
 20.1640 -	if (vma && vma->vm_start < addr + len) {
 20.1641 -		if (do_munmap(mm, addr, len))
 20.1642 -			return -ENOMEM;
 20.1643 -		goto munmap_back;
 20.1644 -	}
 20.1645 -
 20.1646 -	/* Check against address space limits *after* clearing old maps... */
 20.1647 -	if ((mm->total_vm << PAGE_SHIFT) + len
 20.1648 -	    > current->rlim[RLIMIT_AS].rlim_cur)
 20.1649 -		return -ENOMEM;
 20.1650 -
 20.1651 -	if (mm->map_count > sysctl_max_map_count)
 20.1652 -		return -ENOMEM;
 20.1653 -
 20.1654 -	if (security_vm_enough_memory(len >> PAGE_SHIFT))
 20.1655 -		return -ENOMEM;
 20.1656 -
 20.1657 -	flags = VM_DATA_DEFAULT_FLAGS | VM_ACCOUNT | mm->def_flags;
 20.1658 -
 20.1659 -	/* Can we just expand an old private anonymous mapping? */
 20.1660 -	if (vma_merge(mm, prev, addr, addr + len, flags,
 20.1661 -					NULL, NULL, pgoff, NULL))
 20.1662 -		goto out;
 20.1663 -
 20.1664 -	/*
 20.1665 -	 * create a vma struct for an anonymous mapping
 20.1666 -	 */
 20.1667 -	vma = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
 20.1668 -	if (!vma) {
 20.1669 -		vm_unacct_memory(len >> PAGE_SHIFT);
 20.1670 -		return -ENOMEM;
 20.1671 -	}
 20.1672 -	memset(vma, 0, sizeof(*vma));
 20.1673 -
 20.1674 -	vma->vm_mm = mm;
 20.1675 -	vma->vm_start = addr;
 20.1676 -	vma->vm_end = addr + len;
 20.1677 -	vma->vm_pgoff = pgoff;
 20.1678 -	vma->vm_flags = flags;
 20.1679 -	vma->vm_page_prot = protection_map[flags & 0x0f];
 20.1680 -	vma_link(mm, vma, prev, rb_link, rb_parent);
 20.1681 -out:
 20.1682 -	mm->total_vm += len >> PAGE_SHIFT;
 20.1683 -	if (flags & VM_LOCKED) {
 20.1684 -		mm->locked_vm += len >> PAGE_SHIFT;
 20.1685 -		make_pages_present(addr, addr + len);
 20.1686 -	}
 20.1687 -	return addr;
 20.1688 -}
 20.1689 -
 20.1690 -EXPORT_SYMBOL(do_brk);
 20.1691 -
 20.1692 -/* Release all mmaps. */
 20.1693 -void exit_mmap(struct mm_struct *mm)
 20.1694 -{
 20.1695 -	struct mmu_gather *tlb;
 20.1696 -	struct vm_area_struct *vma;
 20.1697 -	unsigned long nr_accounted = 0;
 20.1698 -
 20.1699 -	profile_exit_mmap(mm);
 20.1700 - 
 20.1701 -	lru_add_drain();
 20.1702 -
 20.1703 -	spin_lock(&mm->page_table_lock);
 20.1704 -
 20.1705 -	tlb = tlb_gather_mmu(mm, 1);
 20.1706 -	flush_cache_mm(mm);
 20.1707 -	/* Use ~0UL here to ensure all VMAs in the mm are unmapped */
 20.1708 -	mm->map_count -= unmap_vmas(&tlb, mm, mm->mmap, 0,
 20.1709 -					~0UL, &nr_accounted, NULL);
 20.1710 -	vm_unacct_memory(nr_accounted);
 20.1711 -	BUG_ON(mm->map_count);	/* This is just debugging */
 20.1712 -	clear_page_tables(tlb, FIRST_USER_PGD_NR, USER_PTRS_PER_PGD);
 20.1713 -	tlb_finish_mmu(tlb, 0, MM_VM_SIZE(mm));
 20.1714 -
 20.1715 -	vma = mm->mmap;
 20.1716 -	mm->mmap = mm->mmap_cache = NULL;
 20.1717 -	mm->mm_rb = RB_ROOT;
 20.1718 -	mm->rss = 0;
 20.1719 -	mm->total_vm = 0;
 20.1720 -	mm->locked_vm = 0;
 20.1721 -
 20.1722 -	spin_unlock(&mm->page_table_lock);
 20.1723 -
 20.1724 -	/*
 20.1725 -	 * Walk the list again, actually closing and freeing it
 20.1726 -	 * without holding any MM locks.
 20.1727 -	 */
 20.1728 -	while (vma) {
 20.1729 -		struct vm_area_struct *next = vma->vm_next;
 20.1730 -		remove_vm_struct(vma);
 20.1731 -		vma = next;
 20.1732 -	}
 20.1733 -}
 20.1734 -
 20.1735 -/* Insert vm structure into process list sorted by address
 20.1736 - * and into the inode's i_mmap tree.  If vm_file is non-NULL
 20.1737 - * then i_mmap_lock is taken here.
 20.1738 - */
 20.1739 -void insert_vm_struct(struct mm_struct * mm, struct vm_area_struct * vma)
 20.1740 -{
 20.1741 -	struct vm_area_struct * __vma, * prev;
 20.1742 -	struct rb_node ** rb_link, * rb_parent;
 20.1743 -
 20.1744 -	/*
 20.1745 -	 * The vm_pgoff of a purely anonymous vma should be irrelevant
 20.1746 -	 * until its first write fault, when page's anon_vma and index
 20.1747 -	 * are set.  But now set the vm_pgoff it will almost certainly
 20.1748 -	 * end up with (unless mremap moves it elsewhere before that
 20.1749 -	 * first wfault), so /proc/pid/maps tells a consistent story.
 20.1750 -	 *
 20.1751 -	 * By setting it to reflect the virtual start address of the
 20.1752 -	 * vma, merges and splits can happen in a seamless way, just
 20.1753 -	 * using the existing file pgoff checks and manipulations.
 20.1754 -	 * Similarly in do_mmap_pgoff and in do_brk.
 20.1755 -	 */
 20.1756 -	if (!vma->vm_file) {
 20.1757 -		BUG_ON(vma->anon_vma);
 20.1758 -		vma->vm_pgoff = vma->vm_start >> PAGE_SHIFT;
 20.1759 -	}
 20.1760 -	__vma = find_vma_prepare(mm,vma->vm_start,&prev,&rb_link,&rb_parent);
 20.1761 -	if (__vma && __vma->vm_start < vma->vm_end)
 20.1762 -		BUG();
 20.1763 -	vma_link(mm, vma, prev, rb_link, rb_parent);
 20.1764 -}
 20.1765 -
 20.1766 -/*
 20.1767 - * Copy the vma structure to a new location in the same mm,
 20.1768 - * prior to moving page table entries, to effect an mremap move.
 20.1769 - */
 20.1770 -struct vm_area_struct *copy_vma(struct vm_area_struct **vmap,
 20.1771 -	unsigned long addr, unsigned long len, pgoff_t pgoff)
 20.1772 -{
 20.1773 -	struct vm_area_struct *vma = *vmap;
 20.1774 -	unsigned long vma_start = vma->vm_start;
 20.1775 -	struct mm_struct *mm = vma->vm_mm;
 20.1776 -	struct vm_area_struct *new_vma, *prev;
 20.1777 -	struct rb_node **rb_link, *rb_parent;
 20.1778 -	struct mempolicy *pol;
 20.1779 -
 20.1780 -	/*
 20.1781 -	 * If anonymous vma has not yet been faulted, update new pgoff
 20.1782 -	 * to match new location, to increase its chance of merging.
 20.1783 -	 */
 20.1784 -	if (!vma->vm_file && !vma->anon_vma)
 20.1785 -		pgoff = addr >> PAGE_SHIFT;
 20.1786 -
 20.1787 -	find_vma_prepare(mm, addr, &prev, &rb_link, &rb_parent);
 20.1788 -	new_vma = vma_merge(mm, prev, addr, addr + len, vma->vm_flags,
 20.1789 -			vma->anon_vma, vma->vm_file, pgoff, vma_policy(vma));
 20.1790 -	if (new_vma) {
 20.1791 -		/*
 20.1792 -		 * Source vma may have been merged into new_vma
 20.1793 -		 */
 20.1794 -		if (vma_start >= new_vma->vm_start &&
 20.1795 -		    vma_start < new_vma->vm_end)
 20.1796 -			*vmap = new_vma;
 20.1797 -	} else {
 20.1798 -		new_vma = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
 20.1799 -		if (new_vma) {
 20.1800 -			*new_vma = *vma;
 20.1801 -			vma_prio_tree_init(new_vma);
 20.1802 -			pol = mpol_copy(vma_policy(vma));
 20.1803 -			if (IS_ERR(pol)) {
 20.1804 -				kmem_cache_free(vm_area_cachep, new_vma);
 20.1805 -				return NULL;
 20.1806 -			}
 20.1807 -			vma_set_policy(new_vma, pol);
 20.1808 -			new_vma->vm_start = addr;
 20.1809 -			new_vma->vm_end = addr + len;
 20.1810 -			new_vma->vm_pgoff = pgoff;
 20.1811 -			if (new_vma->vm_file)
 20.1812 -				get_file(new_vma->vm_file);
 20.1813 -			if (new_vma->vm_ops && new_vma->vm_ops->open)
 20.1814 -				new_vma->vm_ops->open(new_vma);
 20.1815 -			vma_link(mm, new_vma, prev, rb_link, rb_parent);
 20.1816 -		}
 20.1817 -	}
 20.1818 -	return new_vma;
 20.1819 -}