ia64/xen-unstable

changeset 5665:8bd2e8933277

Manual merge.
author kaf24@firebug.cl.cam.ac.uk
date Mon Jul 04 15:41:21 2005 +0000 (2005-07-04)
parents abc0a8f65be7 9b1866006aea
children 4b052d8a9a2f
files docs/misc/hg-cheatsheet.txt linux-2.6.11-xen-sparse/arch/xen/x86_64/kernel/traps.c linux-2.6.11-xen-sparse/drivers/xen/blkback/blkback.c linux-2.6.11-xen-sparse/include/linux/mm.h linux-2.6.11-xen-sparse/mm/memory.c tools/libxc/xc_gnttab.c xen/arch/x86/domain.c xen/arch/x86/vmx.c xen/arch/x86/vmx_platform.c xen/arch/x86/vmx_vmcs.c xen/include/asm-x86/msr.h xen/include/asm-x86/vmx_intercept.h xen/include/asm-x86/vmx_vmcs.h xen/include/public/grant_table.h
line diff
     4.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     4.2 +++ b/linux-2.6.11-xen-sparse/include/linux/mm.h	Mon Jul 04 15:41:21 2005 +0000
     4.3 @@ -0,0 +1,865 @@
     4.4 +#ifndef _LINUX_MM_H
     4.5 +#define _LINUX_MM_H
     4.6 +
     4.7 +#include <linux/sched.h>
     4.8 +#include <linux/errno.h>
     4.9 +
    4.10 +#ifdef __KERNEL__
    4.11 +
    4.12 +#include <linux/config.h>
    4.13 +#include <linux/gfp.h>
    4.14 +#include <linux/list.h>
    4.15 +#include <linux/mmzone.h>
    4.16 +#include <linux/rbtree.h>
    4.17 +#include <linux/prio_tree.h>
    4.18 +#include <linux/fs.h>
    4.19 +
    4.20 +struct mempolicy;
    4.21 +struct anon_vma;
    4.22 +
    4.23 +#ifndef CONFIG_DISCONTIGMEM          /* Don't use mapnrs, do it properly */
    4.24 +extern unsigned long max_mapnr;
    4.25 +#endif
    4.26 +
    4.27 +extern unsigned long num_physpages;
    4.28 +extern void * high_memory;
    4.29 +extern unsigned long vmalloc_earlyreserve;
    4.30 +extern int page_cluster;
    4.31 +
    4.32 +#ifdef CONFIG_SYSCTL
    4.33 +extern int sysctl_legacy_va_layout;
    4.34 +#else
    4.35 +#define sysctl_legacy_va_layout 0
    4.36 +#endif
    4.37 +
    4.38 +#include <asm/page.h>
    4.39 +#include <asm/pgtable.h>
    4.40 +#include <asm/processor.h>
    4.41 +#include <asm/atomic.h>
    4.42 +
    4.43 +#ifndef MM_VM_SIZE
    4.44 +#define MM_VM_SIZE(mm)	((TASK_SIZE + PGDIR_SIZE - 1) & PGDIR_MASK)
    4.45 +#endif
    4.46 +
    4.47 +#define nth_page(page,n) pfn_to_page(page_to_pfn((page)) + (n))
    4.48 +
    4.49 +/*
    4.50 + * Linux kernel virtual memory manager primitives.
    4.51 + * The idea being to have a "virtual" mm in the same way
    4.52 + * we have a virtual fs - giving a cleaner interface to the
    4.53 + * mm details, and allowing different kinds of memory mappings
    4.54 + * (from shared memory to executable loading to arbitrary
    4.55 + * mmap() functions).
    4.56 + */
    4.57 +
    4.58 +/*
    4.59 + * This struct defines a memory VMM memory area. There is one of these
    4.60 + * per VM-area/task.  A VM area is any part of the process virtual memory
    4.61 + * space that has a special rule for the page-fault handlers (ie a shared
    4.62 + * library, the executable area etc).
    4.63 + */
    4.64 +struct vm_area_struct {
    4.65 +	struct mm_struct * vm_mm;	/* The address space we belong to. */
    4.66 +	unsigned long vm_start;		/* Our start address within vm_mm. */
    4.67 +	unsigned long vm_end;		/* The first byte after our end address
    4.68 +					   within vm_mm. */
    4.69 +
    4.70 +	/* linked list of VM areas per task, sorted by address */
    4.71 +	struct vm_area_struct *vm_next;
    4.72 +
    4.73 +	pgprot_t vm_page_prot;		/* Access permissions of this VMA. */
    4.74 +	unsigned long vm_flags;		/* Flags, listed below. */
    4.75 +
    4.76 +	struct rb_node vm_rb;
    4.77 +
    4.78 +	/*
    4.79 +	 * For areas with an address space and backing store,
    4.80 +	 * linkage into the address_space->i_mmap prio tree, or
    4.81 +	 * linkage to the list of like vmas hanging off its node, or
    4.82 +	 * linkage of vma in the address_space->i_mmap_nonlinear list.
    4.83 +	 */
    4.84 +	union {
    4.85 +		struct {
    4.86 +			struct list_head list;
    4.87 +			void *parent;	/* aligns with prio_tree_node parent */
    4.88 +			struct vm_area_struct *head;
    4.89 +		} vm_set;
    4.90 +
    4.91 +		struct raw_prio_tree_node prio_tree_node;
    4.92 +	} shared;
    4.93 +
    4.94 +	/*
    4.95 +	 * A file's MAP_PRIVATE vma can be in both i_mmap tree and anon_vma
    4.96 +	 * list, after a COW of one of the file pages.  A MAP_SHARED vma
    4.97 +	 * can only be in the i_mmap tree.  An anonymous MAP_PRIVATE, stack
    4.98 +	 * or brk vma (with NULL file) can only be in an anon_vma list.
    4.99 +	 */
   4.100 +	struct list_head anon_vma_node;	/* Serialized by anon_vma->lock */
   4.101 +	struct anon_vma *anon_vma;	/* Serialized by page_table_lock */
   4.102 +
   4.103 +	/* Function pointers to deal with this struct. */
   4.104 +	struct vm_operations_struct * vm_ops;
   4.105 +
   4.106 +	/* Information about our backing store: */
   4.107 +	unsigned long vm_pgoff;		/* Offset (within vm_file) in PAGE_SIZE
   4.108 +					   units, *not* PAGE_CACHE_SIZE */
   4.109 +	struct file * vm_file;		/* File we map to (can be NULL). */
   4.110 +	void * vm_private_data;		/* was vm_pte (shared mem) */
   4.111 +	unsigned long vm_truncate_count;/* truncate_count or restart_addr */
   4.112 +
   4.113 +#ifndef CONFIG_MMU
   4.114 +	atomic_t vm_usage;		/* refcount (VMAs shared if !MMU) */
   4.115 +#endif
   4.116 +#ifdef CONFIG_NUMA
   4.117 +	struct mempolicy *vm_policy;	/* NUMA policy for the VMA */
   4.118 +#endif
   4.119 +};
   4.120 +
   4.121 +/*
   4.122 + * This struct defines the per-mm list of VMAs for uClinux. If CONFIG_MMU is
   4.123 + * disabled, then there's a single shared list of VMAs maintained by the
   4.124 + * system, and mm's subscribe to these individually
   4.125 + */
   4.126 +struct vm_list_struct {
   4.127 +	struct vm_list_struct	*next;
   4.128 +	struct vm_area_struct	*vma;
   4.129 +};
   4.130 +
   4.131 +#ifndef CONFIG_MMU
   4.132 +extern struct rb_root nommu_vma_tree;
   4.133 +extern struct rw_semaphore nommu_vma_sem;
   4.134 +
   4.135 +extern unsigned int kobjsize(const void *objp);
   4.136 +#endif
   4.137 +
   4.138 +/*
   4.139 + * vm_flags..
   4.140 + */
   4.141 +#define VM_READ		0x00000001	/* currently active flags */
   4.142 +#define VM_WRITE	0x00000002
   4.143 +#define VM_EXEC		0x00000004
   4.144 +#define VM_SHARED	0x00000008
   4.145 +
   4.146 +#define VM_MAYREAD	0x00000010	/* limits for mprotect() etc */
   4.147 +#define VM_MAYWRITE	0x00000020
   4.148 +#define VM_MAYEXEC	0x00000040
   4.149 +#define VM_MAYSHARE	0x00000080
   4.150 +
   4.151 +#define VM_GROWSDOWN	0x00000100	/* general info on the segment */
   4.152 +#define VM_GROWSUP	0x00000200
   4.153 +#define VM_SHM		0x00000400	/* shared memory area, don't swap out */
   4.154 +#define VM_DENYWRITE	0x00000800	/* ETXTBSY on write attempts.. */
   4.155 +
   4.156 +#define VM_EXECUTABLE	0x00001000
   4.157 +#define VM_LOCKED	0x00002000
   4.158 +#define VM_IO           0x00004000	/* Memory mapped I/O or similar */
   4.159 +
   4.160 +					/* Used by sys_madvise() */
   4.161 +#define VM_SEQ_READ	0x00008000	/* App will access data sequentially */
   4.162 +#define VM_RAND_READ	0x00010000	/* App will not benefit from clustered reads */
   4.163 +
   4.164 +#define VM_DONTCOPY	0x00020000      /* Do not copy this vma on fork */
   4.165 +#define VM_DONTEXPAND	0x00040000	/* Cannot expand with mremap() */
   4.166 +#define VM_RESERVED	0x00080000	/* Don't unmap it from swap_out */
   4.167 +#define VM_ACCOUNT	0x00100000	/* Is a VM accounted object */
   4.168 +#define VM_HUGETLB	0x00400000	/* Huge TLB Page VM */
   4.169 +#define VM_NONLINEAR	0x00800000	/* Is non-linear (remap_file_pages) */
   4.170 +#define VM_FOREIGN      0x01000000      /* Has pages belonging to another VM */
   4.171 +
   4.172 +#ifndef VM_STACK_DEFAULT_FLAGS		/* arch can override this */
   4.173 +#define VM_STACK_DEFAULT_FLAGS VM_DATA_DEFAULT_FLAGS
   4.174 +#endif
   4.175 +
   4.176 +#ifdef CONFIG_STACK_GROWSUP
   4.177 +#define VM_STACK_FLAGS	(VM_GROWSUP | VM_STACK_DEFAULT_FLAGS | VM_ACCOUNT)
   4.178 +#else
   4.179 +#define VM_STACK_FLAGS	(VM_GROWSDOWN | VM_STACK_DEFAULT_FLAGS | VM_ACCOUNT)
   4.180 +#endif
   4.181 +
   4.182 +#define VM_READHINTMASK			(VM_SEQ_READ | VM_RAND_READ)
   4.183 +#define VM_ClearReadHint(v)		(v)->vm_flags &= ~VM_READHINTMASK
   4.184 +#define VM_NormalReadHint(v)		(!((v)->vm_flags & VM_READHINTMASK))
   4.185 +#define VM_SequentialReadHint(v)	((v)->vm_flags & VM_SEQ_READ)
   4.186 +#define VM_RandomReadHint(v)		((v)->vm_flags & VM_RAND_READ)
   4.187 +
   4.188 +/*
   4.189 + * mapping from the currently active vm_flags protection bits (the
   4.190 + * low four bits) to a page protection mask..
   4.191 + */
   4.192 +extern pgprot_t protection_map[16];
   4.193 +
   4.194 +
   4.195 +/*
   4.196 + * These are the virtual MM functions - opening of an area, closing and
   4.197 + * unmapping it (needed to keep files on disk up-to-date etc), pointer
   4.198 + * to the functions called when a no-page or a wp-page exception occurs. 
   4.199 + */
   4.200 +struct vm_operations_struct {
   4.201 +	void (*open)(struct vm_area_struct * area);
   4.202 +	void (*close)(struct vm_area_struct * area);
   4.203 +	struct page * (*nopage)(struct vm_area_struct * area, unsigned long address, int *type);
   4.204 +	int (*populate)(struct vm_area_struct * area, unsigned long address, unsigned long len, pgprot_t prot, unsigned long pgoff, int nonblock);
   4.205 +#ifdef CONFIG_NUMA
   4.206 +	int (*set_policy)(struct vm_area_struct *vma, struct mempolicy *new);
   4.207 +	struct mempolicy *(*get_policy)(struct vm_area_struct *vma,
   4.208 +					unsigned long addr);
   4.209 +#endif
   4.210 +};
   4.211 +
   4.212 +struct mmu_gather;
   4.213 +struct inode;
   4.214 +
   4.215 +#ifdef ARCH_HAS_ATOMIC_UNSIGNED
   4.216 +typedef unsigned page_flags_t;
   4.217 +#else
   4.218 +typedef unsigned long page_flags_t;
   4.219 +#endif
   4.220 +
   4.221 +/*
   4.222 + * Each physical page in the system has a struct page associated with
   4.223 + * it to keep track of whatever it is we are using the page for at the
   4.224 + * moment. Note that we have no way to track which tasks are using
   4.225 + * a page.
   4.226 + */
   4.227 +struct page {
   4.228 +	page_flags_t flags;		/* Atomic flags, some possibly
   4.229 +					 * updated asynchronously */
   4.230 +	atomic_t _count;		/* Usage count, see below. */
   4.231 +	atomic_t _mapcount;		/* Count of ptes mapped in mms,
   4.232 +					 * to show when page is mapped
   4.233 +					 * & limit reverse map searches.
   4.234 +					 */
   4.235 +	unsigned long private;		/* Mapping-private opaque data:
   4.236 +					 * usually used for buffer_heads
   4.237 +					 * if PagePrivate set; used for
   4.238 +					 * swp_entry_t if PageSwapCache
   4.239 +					 * When page is free, this indicates
   4.240 +					 * order in the buddy system.
   4.241 +					 */
   4.242 +	struct address_space *mapping;	/* If low bit clear, points to
   4.243 +					 * inode address_space, or NULL.
   4.244 +					 * If page mapped as anonymous
   4.245 +					 * memory, low bit is set, and
   4.246 +					 * it points to anon_vma object:
   4.247 +					 * see PAGE_MAPPING_ANON below.
   4.248 +					 */
   4.249 +	pgoff_t index;			/* Our offset within mapping. */
   4.250 +	struct list_head lru;		/* Pageout list, eg. active_list
   4.251 +					 * protected by zone->lru_lock !
   4.252 +					 */
   4.253 +	/*
   4.254 +	 * On machines where all RAM is mapped into kernel address space,
   4.255 +	 * we can simply calculate the virtual address. On machines with
   4.256 +	 * highmem some memory is mapped into kernel virtual memory
   4.257 +	 * dynamically, so we need a place to store that address.
   4.258 +	 * Note that this field could be 16 bits on x86 ... ;)
   4.259 +	 *
   4.260 +	 * Architectures with slow multiplication can define
   4.261 +	 * WANT_PAGE_VIRTUAL in asm/page.h
   4.262 +	 */
   4.263 +#if defined(WANT_PAGE_VIRTUAL)
   4.264 +	void *virtual;			/* Kernel virtual address (NULL if
   4.265 +					   not kmapped, ie. highmem) */
   4.266 +#endif /* WANT_PAGE_VIRTUAL */
   4.267 +};
   4.268 +
   4.269 +/*
   4.270 + * FIXME: take this include out, include page-flags.h in
   4.271 + * files which need it (119 of them)
   4.272 + */
   4.273 +#include <linux/page-flags.h>
   4.274 +
   4.275 +/*
   4.276 + * Methods to modify the page usage count.
   4.277 + *
   4.278 + * What counts for a page usage:
   4.279 + * - cache mapping   (page->mapping)
   4.280 + * - private data    (page->private)
   4.281 + * - page mapped in a task's page tables, each mapping
   4.282 + *   is counted separately
   4.283 + *
   4.284 + * Also, many kernel routines increase the page count before a critical
   4.285 + * routine so they can be sure the page doesn't go away from under them.
   4.286 + *
   4.287 + * Since 2.6.6 (approx), a free page has ->_count = -1.  This is so that we
   4.288 + * can use atomic_add_negative(-1, page->_count) to detect when the page
   4.289 + * becomes free and so that we can also use atomic_inc_and_test to atomically
   4.290 + * detect when we just tried to grab a ref on a page which some other CPU has
   4.291 + * already deemed to be freeable.
   4.292 + *
   4.293 + * NO code should make assumptions about this internal detail!  Use the provided
   4.294 + * macros which retain the old rules: page_count(page) == 0 is a free page.
   4.295 + */
   4.296 +
   4.297 +/*
   4.298 + * Drop a ref, return true if the logical refcount fell to zero (the page has
   4.299 + * no users)
   4.300 + */
   4.301 +#define put_page_testzero(p)				\
   4.302 +	({						\
   4.303 +		BUG_ON(page_count(p) == 0);		\
   4.304 +		atomic_add_negative(-1, &(p)->_count);	\
   4.305 +	})
   4.306 +
   4.307 +/*
   4.308 + * Grab a ref, return true if the page previously had a logical refcount of
   4.309 + * zero.  ie: returns true if we just grabbed an already-deemed-to-be-free page
   4.310 + */
   4.311 +#define get_page_testone(p)	atomic_inc_and_test(&(p)->_count)
   4.312 +
   4.313 +#define set_page_count(p,v) 	atomic_set(&(p)->_count, v - 1)
   4.314 +#define __put_page(p)		atomic_dec(&(p)->_count)
   4.315 +
   4.316 +extern void FASTCALL(__page_cache_release(struct page *));
   4.317 +
   4.318 +#ifdef CONFIG_HUGETLB_PAGE
   4.319 +
   4.320 +static inline int page_count(struct page *p)
   4.321 +{
   4.322 +	if (PageCompound(p))
   4.323 +		p = (struct page *)p->private;
   4.324 +	return atomic_read(&(p)->_count) + 1;
   4.325 +}
   4.326 +
   4.327 +static inline void get_page(struct page *page)
   4.328 +{
   4.329 +	if (unlikely(PageCompound(page)))
   4.330 +		page = (struct page *)page->private;
   4.331 +	atomic_inc(&page->_count);
   4.332 +}
   4.333 +
   4.334 +void put_page(struct page *page);
   4.335 +
   4.336 +#else		/* CONFIG_HUGETLB_PAGE */
   4.337 +
   4.338 +#define page_count(p)		(atomic_read(&(p)->_count) + 1)
   4.339 +
   4.340 +static inline void get_page(struct page *page)
   4.341 +{
   4.342 +	atomic_inc(&page->_count);
   4.343 +}
   4.344 +
   4.345 +static inline void put_page(struct page *page)
   4.346 +{
   4.347 +	if (!PageReserved(page) && put_page_testzero(page))
   4.348 +		__page_cache_release(page);
   4.349 +}
   4.350 +
   4.351 +#endif		/* CONFIG_HUGETLB_PAGE */
   4.352 +
   4.353 +/*
   4.354 + * Multiple processes may "see" the same page. E.g. for untouched
   4.355 + * mappings of /dev/null, all processes see the same page full of
   4.356 + * zeroes, and text pages of executables and shared libraries have
   4.357 + * only one copy in memory, at most, normally.
   4.358 + *
   4.359 + * For the non-reserved pages, page_count(page) denotes a reference count.
   4.360 + *   page_count() == 0 means the page is free.
   4.361 + *   page_count() == 1 means the page is used for exactly one purpose
   4.362 + *   (e.g. a private data page of one process).
   4.363 + *
   4.364 + * A page may be used for kmalloc() or anyone else who does a
   4.365 + * __get_free_page(). In this case the page_count() is at least 1, and
   4.366 + * all other fields are unused but should be 0 or NULL. The
   4.367 + * management of this page is the responsibility of the one who uses
   4.368 + * it.
   4.369 + *
   4.370 + * The other pages (we may call them "process pages") are completely
   4.371 + * managed by the Linux memory manager: I/O, buffers, swapping etc.
   4.372 + * The following discussion applies only to them.
   4.373 + *
   4.374 + * A page may belong to an inode's memory mapping. In this case,
   4.375 + * page->mapping is the pointer to the inode, and page->index is the
   4.376 + * file offset of the page, in units of PAGE_CACHE_SIZE.
   4.377 + *
   4.378 + * A page contains an opaque `private' member, which belongs to the
   4.379 + * page's address_space.  Usually, this is the address of a circular
   4.380 + * list of the page's disk buffers.
   4.381 + *
   4.382 + * For pages belonging to inodes, the page_count() is the number of
   4.383 + * attaches, plus 1 if `private' contains something, plus one for
   4.384 + * the page cache itself.
   4.385 + *
   4.386 + * All pages belonging to an inode are in these doubly linked lists:
   4.387 + * mapping->clean_pages, mapping->dirty_pages and mapping->locked_pages;
   4.388 + * using the page->list list_head. These fields are also used for
   4.389 + * freelist managemet (when page_count()==0).
   4.390 + *
   4.391 + * There is also a per-mapping radix tree mapping index to the page
   4.392 + * in memory if present. The tree is rooted at mapping->root.  
   4.393 + *
   4.394 + * All process pages can do I/O:
   4.395 + * - inode pages may need to be read from disk,
   4.396 + * - inode pages which have been modified and are MAP_SHARED may need
   4.397 + *   to be written to disk,
   4.398 + * - private pages which have been modified may need to be swapped out
   4.399 + *   to swap space and (later) to be read back into memory.
   4.400 + */
   4.401 +
   4.402 +/*
   4.403 + * The zone field is never updated after free_area_init_core()
   4.404 + * sets it, so none of the operations on it need to be atomic.
   4.405 + * We'll have up to (MAX_NUMNODES * MAX_NR_ZONES) zones total,
   4.406 + * so we use (MAX_NODES_SHIFT + MAX_ZONES_SHIFT) here to get enough bits.
   4.407 + */
   4.408 +#define NODEZONE_SHIFT (sizeof(page_flags_t)*8 - MAX_NODES_SHIFT - MAX_ZONES_SHIFT)
   4.409 +#define NODEZONE(node, zone)	((node << ZONES_SHIFT) | zone)
   4.410 +
   4.411 +static inline unsigned long page_zonenum(struct page *page)
   4.412 +{
   4.413 +	return (page->flags >> NODEZONE_SHIFT) & (~(~0UL << ZONES_SHIFT));
   4.414 +}
   4.415 +static inline unsigned long page_to_nid(struct page *page)
   4.416 +{
   4.417 +	return (page->flags >> (NODEZONE_SHIFT + ZONES_SHIFT));
   4.418 +}
   4.419 +
   4.420 +struct zone;
   4.421 +extern struct zone *zone_table[];
   4.422 +
   4.423 +static inline struct zone *page_zone(struct page *page)
   4.424 +{
   4.425 +	return zone_table[page->flags >> NODEZONE_SHIFT];
   4.426 +}
   4.427 +
   4.428 +static inline void set_page_zone(struct page *page, unsigned long nodezone_num)
   4.429 +{
   4.430 +	page->flags &= ~(~0UL << NODEZONE_SHIFT);
   4.431 +	page->flags |= nodezone_num << NODEZONE_SHIFT;
   4.432 +}
   4.433 +
   4.434 +#ifndef CONFIG_DISCONTIGMEM
   4.435 +/* The array of struct pages - for discontigmem use pgdat->lmem_map */
   4.436 +extern struct page *mem_map;
   4.437 +#endif
   4.438 +
   4.439 +static inline void *lowmem_page_address(struct page *page)
   4.440 +{
   4.441 +	return __va(page_to_pfn(page) << PAGE_SHIFT);
   4.442 +}
   4.443 +
   4.444 +#if defined(CONFIG_HIGHMEM) && !defined(WANT_PAGE_VIRTUAL)
   4.445 +#define HASHED_PAGE_VIRTUAL
   4.446 +#endif
   4.447 +
   4.448 +#if defined(WANT_PAGE_VIRTUAL)
   4.449 +#define page_address(page) ((page)->virtual)
   4.450 +#define set_page_address(page, address)			\
   4.451 +	do {						\
   4.452 +		(page)->virtual = (address);		\
   4.453 +	} while(0)
   4.454 +#define page_address_init()  do { } while(0)
   4.455 +#endif
   4.456 +
   4.457 +#if defined(HASHED_PAGE_VIRTUAL)
   4.458 +void *page_address(struct page *page);
   4.459 +void set_page_address(struct page *page, void *virtual);
   4.460 +void page_address_init(void);
   4.461 +#endif
   4.462 +
   4.463 +#if !defined(HASHED_PAGE_VIRTUAL) && !defined(WANT_PAGE_VIRTUAL)
   4.464 +#define page_address(page) lowmem_page_address(page)
   4.465 +#define set_page_address(page, address)  do { } while(0)
   4.466 +#define page_address_init()  do { } while(0)
   4.467 +#endif
   4.468 +
   4.469 +/*
   4.470 + * On an anonymous page mapped into a user virtual memory area,
   4.471 + * page->mapping points to its anon_vma, not to a struct address_space;
   4.472 + * with the PAGE_MAPPING_ANON bit set to distinguish it.
   4.473 + *
   4.474 + * Please note that, confusingly, "page_mapping" refers to the inode
   4.475 + * address_space which maps the page from disk; whereas "page_mapped"
   4.476 + * refers to user virtual address space into which the page is mapped.
   4.477 + */
   4.478 +#define PAGE_MAPPING_ANON	1
   4.479 +
   4.480 +extern struct address_space swapper_space;
   4.481 +static inline struct address_space *page_mapping(struct page *page)
   4.482 +{
   4.483 +	struct address_space *mapping = page->mapping;
   4.484 +
   4.485 +	if (unlikely(PageSwapCache(page)))
   4.486 +		mapping = &swapper_space;
   4.487 +	else if (unlikely((unsigned long)mapping & PAGE_MAPPING_ANON))
   4.488 +		mapping = NULL;
   4.489 +	return mapping;
   4.490 +}
   4.491 +
   4.492 +static inline int PageAnon(struct page *page)
   4.493 +{
   4.494 +	return ((unsigned long)page->mapping & PAGE_MAPPING_ANON) != 0;
   4.495 +}
   4.496 +
   4.497 +/*
   4.498 + * Return the pagecache index of the passed page.  Regular pagecache pages
   4.499 + * use ->index whereas swapcache pages use ->private
   4.500 + */
   4.501 +static inline pgoff_t page_index(struct page *page)
   4.502 +{
   4.503 +	if (unlikely(PageSwapCache(page)))
   4.504 +		return page->private;
   4.505 +	return page->index;
   4.506 +}
   4.507 +
   4.508 +/*
   4.509 + * The atomic page->_mapcount, like _count, starts from -1:
   4.510 + * so that transitions both from it and to it can be tracked,
   4.511 + * using atomic_inc_and_test and atomic_add_negative(-1).
   4.512 + */
   4.513 +static inline void reset_page_mapcount(struct page *page)
   4.514 +{
   4.515 +	atomic_set(&(page)->_mapcount, -1);
   4.516 +}
   4.517 +
   4.518 +static inline int page_mapcount(struct page *page)
   4.519 +{
   4.520 +	return atomic_read(&(page)->_mapcount) + 1;
   4.521 +}
   4.522 +
   4.523 +/*
   4.524 + * Return true if this page is mapped into pagetables.
   4.525 + */
   4.526 +static inline int page_mapped(struct page *page)
   4.527 +{
   4.528 +	return atomic_read(&(page)->_mapcount) >= 0;
   4.529 +}
   4.530 +
   4.531 +/*
   4.532 + * Error return values for the *_nopage functions
   4.533 + */
   4.534 +#define NOPAGE_SIGBUS	(NULL)
   4.535 +#define NOPAGE_OOM	((struct page *) (-1))
   4.536 +
   4.537 +/*
   4.538 + * Different kinds of faults, as returned by handle_mm_fault().
   4.539 + * Used to decide whether a process gets delivered SIGBUS or
   4.540 + * just gets major/minor fault counters bumped up.
   4.541 + */
   4.542 +#define VM_FAULT_OOM	(-1)
   4.543 +#define VM_FAULT_SIGBUS	0
   4.544 +#define VM_FAULT_MINOR	1
   4.545 +#define VM_FAULT_MAJOR	2
   4.546 +
   4.547 +#define offset_in_page(p)	((unsigned long)(p) & ~PAGE_MASK)
   4.548 +
   4.549 +extern void show_free_areas(void);
   4.550 +
   4.551 +#ifdef CONFIG_SHMEM
   4.552 +struct page *shmem_nopage(struct vm_area_struct *vma,
   4.553 +			unsigned long address, int *type);
   4.554 +int shmem_set_policy(struct vm_area_struct *vma, struct mempolicy *new);
   4.555 +struct mempolicy *shmem_get_policy(struct vm_area_struct *vma,
   4.556 +					unsigned long addr);
   4.557 +int shmem_lock(struct file *file, int lock, struct user_struct *user);
   4.558 +#else
   4.559 +#define shmem_nopage filemap_nopage
   4.560 +#define shmem_lock(a, b, c) 	({0;})	/* always in memory, no need to lock */
   4.561 +#define shmem_set_policy(a, b)	(0)
   4.562 +#define shmem_get_policy(a, b)	(NULL)
   4.563 +#endif
   4.564 +struct file *shmem_file_setup(char *name, loff_t size, unsigned long flags);
   4.565 +
   4.566 +int shmem_zero_setup(struct vm_area_struct *);
   4.567 +
   4.568 +static inline int can_do_mlock(void)
   4.569 +{
   4.570 +	if (capable(CAP_IPC_LOCK))
   4.571 +		return 1;
   4.572 +	if (current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur != 0)
   4.573 +		return 1;
   4.574 +	return 0;
   4.575 +}
   4.576 +extern int user_shm_lock(size_t, struct user_struct *);
   4.577 +extern void user_shm_unlock(size_t, struct user_struct *);
   4.578 +
   4.579 +/*
   4.580 + * Parameter block passed down to zap_pte_range in exceptional cases.
   4.581 + */
   4.582 +struct zap_details {
   4.583 +	struct vm_area_struct *nonlinear_vma;	/* Check page->index if set */
   4.584 +	struct address_space *check_mapping;	/* Check page->mapping if set */
   4.585 +	pgoff_t	first_index;			/* Lowest page->index to unmap */
   4.586 +	pgoff_t last_index;			/* Highest page->index to unmap */
   4.587 +	spinlock_t *i_mmap_lock;		/* For unmap_mapping_range: */
   4.588 +	unsigned long break_addr;		/* Where unmap_vmas stopped */
   4.589 +	unsigned long truncate_count;		/* Compare vm_truncate_count */
   4.590 +};
   4.591 +
   4.592 +void zap_page_range(struct vm_area_struct *vma, unsigned long address,
   4.593 +		unsigned long size, struct zap_details *);
   4.594 +int unmap_vmas(struct mmu_gather **tlbp, struct mm_struct *mm,
   4.595 +		struct vm_area_struct *start_vma, unsigned long start_addr,
   4.596 +		unsigned long end_addr, unsigned long *nr_accounted,
   4.597 +		struct zap_details *);
   4.598 +void clear_page_range(struct mmu_gather *tlb, unsigned long addr, unsigned long end);
   4.599 +int copy_page_range(struct mm_struct *dst, struct mm_struct *src,
   4.600 +			struct vm_area_struct *vma);
   4.601 +int zeromap_page_range(struct vm_area_struct *vma, unsigned long from,
   4.602 +			unsigned long size, pgprot_t prot);
   4.603 +void unmap_mapping_range(struct address_space *mapping,
   4.604 +		loff_t const holebegin, loff_t const holelen, int even_cows);
   4.605 +
   4.606 +static inline void unmap_shared_mapping_range(struct address_space *mapping,
   4.607 +		loff_t const holebegin, loff_t const holelen)
   4.608 +{
   4.609 +	unmap_mapping_range(mapping, holebegin, holelen, 0);
   4.610 +}
   4.611 +
   4.612 +extern int vmtruncate(struct inode * inode, loff_t offset);
   4.613 +extern pud_t *FASTCALL(__pud_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long address));
   4.614 +extern pmd_t *FASTCALL(__pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long address));
   4.615 +extern pte_t *FASTCALL(pte_alloc_kernel(struct mm_struct *mm, pmd_t *pmd, unsigned long address));
   4.616 +extern pte_t *FASTCALL(pte_alloc_map(struct mm_struct *mm, pmd_t *pmd, unsigned long address));
   4.617 +extern int install_page(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long addr, struct page *page, pgprot_t prot);
   4.618 +extern int install_file_pte(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long addr, unsigned long pgoff, pgprot_t prot);
   4.619 +extern int handle_mm_fault(struct mm_struct *mm,struct vm_area_struct *vma, unsigned long address, int write_access);
   4.620 +extern int make_pages_present(unsigned long addr, unsigned long end);
   4.621 +extern int access_process_vm(struct task_struct *tsk, unsigned long addr, void *buf, int len, int write);
   4.622 +void install_arg_page(struct vm_area_struct *, struct page *, unsigned long);
   4.623 +
   4.624 +int get_user_pages(struct task_struct *tsk, struct mm_struct *mm, unsigned long start,
   4.625 +		int len, int write, int force, struct page **pages, struct vm_area_struct **vmas);
   4.626 +
   4.627 +int __set_page_dirty_buffers(struct page *page);
   4.628 +int __set_page_dirty_nobuffers(struct page *page);
   4.629 +int redirty_page_for_writepage(struct writeback_control *wbc,
   4.630 +				struct page *page);
   4.631 +int FASTCALL(set_page_dirty(struct page *page));
   4.632 +int set_page_dirty_lock(struct page *page);
   4.633 +int clear_page_dirty_for_io(struct page *page);
   4.634 +
   4.635 +extern unsigned long do_mremap(unsigned long addr,
   4.636 +			       unsigned long old_len, unsigned long new_len,
   4.637 +			       unsigned long flags, unsigned long new_addr);
   4.638 +
   4.639 +/*
   4.640 + * Prototype to add a shrinker callback for ageable caches.
   4.641 + * 
   4.642 + * These functions are passed a count `nr_to_scan' and a gfpmask.  They should
   4.643 + * scan `nr_to_scan' objects, attempting to free them.
   4.644 + *
   4.645 + * The callback must the number of objects which remain in the cache.
   4.646 + *
   4.647 + * The callback will be passes nr_to_scan == 0 when the VM is querying the
   4.648 + * cache size, so a fastpath for that case is appropriate.
   4.649 + */
   4.650 +typedef int (*shrinker_t)(int nr_to_scan, unsigned int gfp_mask);
   4.651 +
   4.652 +/*
   4.653 + * Add an aging callback.  The int is the number of 'seeks' it takes
   4.654 + * to recreate one of the objects that these functions age.
   4.655 + */
   4.656 +
   4.657 +#define DEFAULT_SEEKS 2
   4.658 +struct shrinker;
   4.659 +extern struct shrinker *set_shrinker(int, shrinker_t);
   4.660 +extern void remove_shrinker(struct shrinker *shrinker);
   4.661 +
   4.662 +/*
   4.663 + * On a two-level or three-level page table, this ends up being trivial. Thus
   4.664 + * the inlining and the symmetry break with pte_alloc_map() that does all
   4.665 + * of this out-of-line.
   4.666 + */
   4.667 +/*
   4.668 + * The following ifdef needed to get the 4level-fixup.h header to work.
   4.669 + * Remove it when 4level-fixup.h has been removed.
   4.670 + */
   4.671 +#ifdef CONFIG_MMU
   4.672 +#ifndef __ARCH_HAS_4LEVEL_HACK 
   4.673 +static inline pud_t *pud_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long address)
   4.674 +{
   4.675 +	if (pgd_none(*pgd))
   4.676 +		return __pud_alloc(mm, pgd, address);
   4.677 +	return pud_offset(pgd, address);
   4.678 +}
   4.679 +
   4.680 +static inline pmd_t *pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long address)
   4.681 +{
   4.682 +	if (pud_none(*pud))
   4.683 +		return __pmd_alloc(mm, pud, address);
   4.684 +	return pmd_offset(pud, address);
   4.685 +}
   4.686 +#endif
   4.687 +#endif /* CONFIG_MMU */
   4.688 +
   4.689 +extern void free_area_init(unsigned long * zones_size);
   4.690 +extern void free_area_init_node(int nid, pg_data_t *pgdat,
   4.691 +	unsigned long * zones_size, unsigned long zone_start_pfn, 
   4.692 +	unsigned long *zholes_size);
   4.693 +extern void memmap_init_zone(unsigned long, int, unsigned long, unsigned long);
   4.694 +extern void mem_init(void);
   4.695 +extern void show_mem(void);
   4.696 +extern void si_meminfo(struct sysinfo * val);
   4.697 +extern void si_meminfo_node(struct sysinfo *val, int nid);
   4.698 +
   4.699 +/* prio_tree.c */
   4.700 +void vma_prio_tree_add(struct vm_area_struct *, struct vm_area_struct *old);
   4.701 +void vma_prio_tree_insert(struct vm_area_struct *, struct prio_tree_root *);
   4.702 +void vma_prio_tree_remove(struct vm_area_struct *, struct prio_tree_root *);
   4.703 +struct vm_area_struct *vma_prio_tree_next(struct vm_area_struct *vma,
   4.704 +	struct prio_tree_iter *iter);
   4.705 +
   4.706 +#define vma_prio_tree_foreach(vma, iter, root, begin, end)	\
   4.707 +	for (prio_tree_iter_init(iter, root, begin, end), vma = NULL;	\
   4.708 +		(vma = vma_prio_tree_next(vma, iter)); )
   4.709 +
   4.710 +static inline void vma_nonlinear_insert(struct vm_area_struct *vma,
   4.711 +					struct list_head *list)
   4.712 +{
   4.713 +	vma->shared.vm_set.parent = NULL;
   4.714 +	list_add_tail(&vma->shared.vm_set.list, list);
   4.715 +}
   4.716 +
   4.717 +/* mmap.c */
   4.718 +extern int __vm_enough_memory(long pages, int cap_sys_admin);
   4.719 +extern void vma_adjust(struct vm_area_struct *vma, unsigned long start,
   4.720 +	unsigned long end, pgoff_t pgoff, struct vm_area_struct *insert);
   4.721 +extern struct vm_area_struct *vma_merge(struct mm_struct *,
   4.722 +	struct vm_area_struct *prev, unsigned long addr, unsigned long end,
   4.723 +	unsigned long vm_flags, struct anon_vma *, struct file *, pgoff_t,
   4.724 +	struct mempolicy *);
   4.725 +extern struct anon_vma *find_mergeable_anon_vma(struct vm_area_struct *);
   4.726 +extern int split_vma(struct mm_struct *,
   4.727 +	struct vm_area_struct *, unsigned long addr, int new_below);
   4.728 +extern int insert_vm_struct(struct mm_struct *, struct vm_area_struct *);
   4.729 +extern void __vma_link_rb(struct mm_struct *, struct vm_area_struct *,
   4.730 +	struct rb_node **, struct rb_node *);
   4.731 +extern struct vm_area_struct *copy_vma(struct vm_area_struct **,
   4.732 +	unsigned long addr, unsigned long len, pgoff_t pgoff);
   4.733 +extern void exit_mmap(struct mm_struct *);
   4.734 +
   4.735 +extern unsigned long get_unmapped_area(struct file *, unsigned long, unsigned long, unsigned long, unsigned long);
   4.736 +
   4.737 +extern unsigned long do_mmap_pgoff(struct file *file, unsigned long addr,
   4.738 +	unsigned long len, unsigned long prot,
   4.739 +	unsigned long flag, unsigned long pgoff);
   4.740 +
   4.741 +static inline unsigned long do_mmap(struct file *file, unsigned long addr,
   4.742 +	unsigned long len, unsigned long prot,
   4.743 +	unsigned long flag, unsigned long offset)
   4.744 +{
   4.745 +	unsigned long ret = -EINVAL;
   4.746 +	if ((offset + PAGE_ALIGN(len)) < offset)
   4.747 +		goto out;
   4.748 +	if (!(offset & ~PAGE_MASK))
   4.749 +		ret = do_mmap_pgoff(file, addr, len, prot, flag, offset >> PAGE_SHIFT);
   4.750 +out:
   4.751 +	return ret;
   4.752 +}
   4.753 +
   4.754 +extern int do_munmap(struct mm_struct *, unsigned long, size_t);
   4.755 +
   4.756 +extern unsigned long do_brk(unsigned long, unsigned long);
   4.757 +
   4.758 +/* filemap.c */
   4.759 +extern unsigned long page_unuse(struct page *);
   4.760 +extern void truncate_inode_pages(struct address_space *, loff_t);
   4.761 +
   4.762 +/* generic vm_area_ops exported for stackable file systems */
   4.763 +extern struct page *filemap_nopage(struct vm_area_struct *, unsigned long, int *);
   4.764 +extern int filemap_populate(struct vm_area_struct *, unsigned long,
   4.765 +		unsigned long, pgprot_t, unsigned long, int);
   4.766 +
   4.767 +/* mm/page-writeback.c */
   4.768 +int write_one_page(struct page *page, int wait);
   4.769 +
   4.770 +/* readahead.c */
   4.771 +#define VM_MAX_READAHEAD	128	/* kbytes */
   4.772 +#define VM_MIN_READAHEAD	16	/* kbytes (includes current page) */
   4.773 +#define VM_MAX_CACHE_HIT    	256	/* max pages in a row in cache before
   4.774 +					 * turning readahead off */
   4.775 +
   4.776 +int do_page_cache_readahead(struct address_space *mapping, struct file *filp,
   4.777 +			unsigned long offset, unsigned long nr_to_read);
   4.778 +int force_page_cache_readahead(struct address_space *mapping, struct file *filp,
   4.779 +			unsigned long offset, unsigned long nr_to_read);
   4.780 +unsigned long  page_cache_readahead(struct address_space *mapping,
   4.781 +			  struct file_ra_state *ra,
   4.782 +			  struct file *filp,
   4.783 +			  unsigned long offset,
   4.784 +			  unsigned long size);
   4.785 +void handle_ra_miss(struct address_space *mapping, 
   4.786 +		    struct file_ra_state *ra, pgoff_t offset);
   4.787 +unsigned long max_sane_readahead(unsigned long nr);
   4.788 +
   4.789 +/* Do stack extension */
   4.790 +extern int expand_stack(struct vm_area_struct * vma, unsigned long address);
   4.791 +
   4.792 +/* Look up the first VMA which satisfies  addr < vm_end,  NULL if none. */
   4.793 +extern struct vm_area_struct * find_vma(struct mm_struct * mm, unsigned long addr);
   4.794 +extern struct vm_area_struct * find_vma_prev(struct mm_struct * mm, unsigned long addr,
   4.795 +					     struct vm_area_struct **pprev);
   4.796 +
   4.797 +/* Look up the first VMA which intersects the interval start_addr..end_addr-1,
   4.798 +   NULL if none.  Assume start_addr < end_addr. */
   4.799 +static inline struct vm_area_struct * find_vma_intersection(struct mm_struct * mm, unsigned long start_addr, unsigned long end_addr)
   4.800 +{
   4.801 +	struct vm_area_struct * vma = find_vma(mm,start_addr);
   4.802 +
   4.803 +	if (vma && end_addr <= vma->vm_start)
   4.804 +		vma = NULL;
   4.805 +	return vma;
   4.806 +}
   4.807 +
   4.808 +static inline unsigned long vma_pages(struct vm_area_struct *vma)
   4.809 +{
   4.810 +	return (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
   4.811 +}
   4.812 +
   4.813 +extern struct vm_area_struct *find_extend_vma(struct mm_struct *mm, unsigned long addr);
   4.814 +
   4.815 +extern struct page * vmalloc_to_page(void *addr);
   4.816 +extern unsigned long vmalloc_to_pfn(void *addr);
   4.817 +extern struct page * follow_page(struct mm_struct *mm, unsigned long address,
   4.818 +		int write);
   4.819 +extern int check_user_page_readable(struct mm_struct *mm, unsigned long address);
   4.820 +int remap_pfn_range(struct vm_area_struct *, unsigned long,
   4.821 +		unsigned long, unsigned long, pgprot_t);
   4.822 +/* Allow arch override for mapping of device and I/O (non-RAM) pages. */
   4.823 +#ifndef io_remap_pfn_range
   4.824 +#define io_remap_pfn_range remap_pfn_range
   4.825 +#endif
   4.826 +
   4.827 +#ifdef CONFIG_PROC_FS
   4.828 +void __vm_stat_account(struct mm_struct *, unsigned long, struct file *, long);
   4.829 +#else
   4.830 +static inline void __vm_stat_account(struct mm_struct *mm,
   4.831 +			unsigned long flags, struct file *file, long pages)
   4.832 +{
   4.833 +}
   4.834 +#endif /* CONFIG_PROC_FS */
   4.835 +
   4.836 +static inline void vm_stat_account(struct vm_area_struct *vma)
   4.837 +{
   4.838 +	__vm_stat_account(vma->vm_mm, vma->vm_flags, vma->vm_file,
   4.839 +							vma_pages(vma));
   4.840 +}
   4.841 +
   4.842 +static inline void vm_stat_unaccount(struct vm_area_struct *vma)
   4.843 +{
   4.844 +	__vm_stat_account(vma->vm_mm, vma->vm_flags, vma->vm_file,
   4.845 +							-vma_pages(vma));
   4.846 +}
   4.847 +
   4.848 +/* update per process rss and vm hiwater data */
   4.849 +extern void update_mem_hiwater(void);
   4.850 +
   4.851 +#ifndef CONFIG_DEBUG_PAGEALLOC
   4.852 +static inline void
   4.853 +kernel_map_pages(struct page *page, int numpages, int enable)
   4.854 +{
   4.855 +}
   4.856 +#endif
   4.857 +
   4.858 +extern struct vm_area_struct *get_gate_vma(struct task_struct *tsk);
   4.859 +#ifdef	__HAVE_ARCH_GATE_AREA
   4.860 +int in_gate_area_no_task(unsigned long addr);
   4.861 +int in_gate_area(struct task_struct *task, unsigned long addr);
   4.862 +#else
   4.863 +int in_gate_area_no_task(unsigned long addr);
   4.864 +#define in_gate_area(task, addr) ({(void)task; in_gate_area_no_task(addr);})
   4.865 +#endif	/* __HAVE_ARCH_GATE_AREA */
   4.866 +
   4.867 +#endif /* __KERNEL__ */
   4.868 +#endif /* _LINUX_MM_H */
     5.1 --- a/linux-2.6.11-xen-sparse/mm/memory.c	Mon Jul 04 15:34:57 2005 +0000
     5.2 +++ b/linux-2.6.11-xen-sparse/mm/memory.c	Mon Jul 04 15:41:21 2005 +0000
     5.3 @@ -907,6 +907,24 @@ int get_user_pages(struct task_struct *t
     5.4  			continue;
     5.5  		}
     5.6  
     5.7 +                if (vma && (vma->vm_flags & VM_FOREIGN))
     5.8 +                {
     5.9 +                    struct page **map = vma->vm_private_data;
    5.10 +                    int offset = (start - vma->vm_start) >> PAGE_SHIFT;
    5.11 +
    5.12 +                    if (map[offset] != NULL) {
    5.13 +                        if (pages) {
    5.14 +                            pages[i] = map[offset];
    5.15 +                        } 
    5.16 +                        if (vmas) 
    5.17 +                            vmas[i] = vma;
    5.18 +                        i++;
    5.19 +                        start += PAGE_SIZE;
    5.20 +                        len--;
    5.21 +                        continue;
    5.22 +                    }
    5.23 +                }
    5.24 +
    5.25  		if (!vma || (vma->vm_flags & VM_IO)
    5.26  				|| !(flags & vma->vm_flags))
    5.27  			return i ? : -EFAULT;