ia64/xen-unstable

changeset 2305:cbbe40349d37

bitkeeper revision 1.1159.42.8 (4124f66fUINxrel-POThC1of633DIA)

Clean up network-backend driver changes to common files.
There is now a 'foreign page' hook into the page allocator, and
a CONFIG_ option for forcing page-sized rx skbs.
author kaf24@scramble.cl.cam.ac.uk
date Thu Aug 19 18:50:23 2004 +0000 (2004-08-19)
parents a2ba99e7ce71
children 0f47aec8946e 51bac4ba69d2
files .rootkeys linux-2.4.26-xen-sparse/arch/xen/config.in linux-2.4.26-xen-sparse/arch/xen/defconfig-xen0 linux-2.4.26-xen-sparse/arch/xen/defconfig-xenU linux-2.4.26-xen-sparse/include/linux/mm.h linux-2.4.26-xen-sparse/mm/page_alloc.c linux-2.6.7-xen-sparse/arch/xen/Kconfig linux-2.6.7-xen-sparse/arch/xen/configs/xen0_defconfig linux-2.6.7-xen-sparse/arch/xen/configs/xenU_defconfig linux-2.6.7-xen-sparse/drivers/xen/netback/netback.c linux-2.6.7-xen-sparse/include/linux/page-flags.h linux-2.6.7-xen-sparse/include/linux/skbuff.h linux-2.6.7-xen-sparse/mm/page_alloc.c
line diff
     1.1 --- a/.rootkeys	Thu Aug 19 17:17:18 2004 +0000
     1.2 +++ b/.rootkeys	Thu Aug 19 18:50:23 2004 +0000
     1.3 @@ -111,6 +111,7 @@ 3e5a4e68mTr0zcp9SXDbnd-XLrrfxw linux-2.4
     1.4  3f1056a9L_kqHcFheV00KbKBzv9j5w linux-2.4.26-xen-sparse/include/asm-xen/vga.h
     1.5  40659defgWA92arexpMGn8X3QMDj3w linux-2.4.26-xen-sparse/include/asm-xen/xor.h
     1.6  3f056927gMHl7mWB89rb73JahbhQIA linux-2.4.26-xen-sparse/include/linux/blk.h
     1.7 +4124f66fPHG6yvB_vXmesjvzrJ3yMg linux-2.4.26-xen-sparse/include/linux/mm.h
     1.8  401c0590D_kwJDU59X8NyvqSv_Cl2A linux-2.4.26-xen-sparse/include/linux/sched.h
     1.9  40a248afgI0_JKthdYAe8beVfXSTpQ linux-2.4.26-xen-sparse/include/linux/skbuff.h
    1.10  401c0592pLrp_aCbQRo9GXiYQQaVVA linux-2.4.26-xen-sparse/include/linux/timer.h
    1.11 @@ -243,6 +244,8 @@ 4122466356eIBnC9ot44WSVVIFyhQA linux-2.6
    1.12  3fa8e3f0kBLeE4To2vpdi3cpJbIkbQ linux-2.6.7-xen-sparse/include/asm-xen/suspend.h
    1.13  3f689063BoW-HWV3auUJ-OqXfcGArw linux-2.6.7-xen-sparse/include/asm-xen/xen_proc.h
    1.14  4124d8c4aocX7A-jIbuGraWN84pxGQ linux-2.6.7-xen-sparse/include/linux/bio.h
    1.15 +4124f66fp5QwbDHEfoUIa7pqO5Xhag linux-2.6.7-xen-sparse/include/linux/page-flags.h
    1.16 +4124f66f4NaKNa0xPiGGykn9QaZk3w linux-2.6.7-xen-sparse/include/linux/skbuff.h
    1.17  40f56a0ddHCSs3501MY4hRf22tctOw linux-2.6.7-xen-sparse/mkbuildtree
    1.18  410a94a4KT6I6X0LVc7djB39tRDp4g linux-2.6.7-xen-sparse/mm/page_alloc.c
    1.19  40e1b09db5mN69Ijj0X_Eol-S7dXiw tools/Make.defs
     2.1 --- a/linux-2.4.26-xen-sparse/arch/xen/config.in	Thu Aug 19 17:17:18 2004 +0000
     2.2 +++ b/linux-2.4.26-xen-sparse/arch/xen/config.in	Thu Aug 19 18:50:23 2004 +0000
     2.3 @@ -20,7 +20,10 @@ endmenu
     2.4  # The IBM S/390 patch needs this.
     2.5  define_bool CONFIG_NO_IDLE_HZ y
     2.6  
     2.7 -if [ "$CONFIG_XEN_PHYSDEV_ACCESS" != "y" ]; then
     2.8 +if [ "$CONFIG_XEN_PHYSDEV_ACCESS" == "y" ]; then
     2.9 +   define_bool CONFIG_FOREIGN_PAGES y
    2.10 +else
    2.11 +   define_bool CONFIG_FOREIGN_PAGES n
    2.12     define_bool CONFIG_NETDEVICES y
    2.13     define_bool CONFIG_VT n
    2.14  fi
     3.1 --- a/linux-2.4.26-xen-sparse/arch/xen/defconfig-xen0	Thu Aug 19 17:17:18 2004 +0000
     3.2 +++ b/linux-2.4.26-xen-sparse/arch/xen/defconfig-xen0	Thu Aug 19 18:50:23 2004 +0000
     3.3 @@ -13,6 +13,7 @@ CONFIG_UID16=y
     3.4  CONFIG_XEN_PRIVILEGED_GUEST=y
     3.5  CONFIG_XEN_PHYSDEV_ACCESS=y
     3.6  CONFIG_NO_IDLE_HZ=y
     3.7 +CONFIG_FOREIGN_PAGES=y
     3.8  
     3.9  #
    3.10  # Code maturity level options
     4.1 --- a/linux-2.4.26-xen-sparse/arch/xen/defconfig-xenU	Thu Aug 19 17:17:18 2004 +0000
     4.2 +++ b/linux-2.4.26-xen-sparse/arch/xen/defconfig-xenU	Thu Aug 19 18:50:23 2004 +0000
     4.3 @@ -13,6 +13,7 @@ CONFIG_UID16=y
     4.4  # CONFIG_XEN_PRIVILEGED_GUEST is not set
     4.5  # CONFIG_XEN_PHYSDEV_ACCESS is not set
     4.6  CONFIG_NO_IDLE_HZ=y
     4.7 +# CONFIG_FOREIGN_PAGES is not set
     4.8  CONFIG_NETDEVICES=y
     4.9  # CONFIG_VT is not set
    4.10  
     5.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     5.2 +++ b/linux-2.4.26-xen-sparse/include/linux/mm.h	Thu Aug 19 18:50:23 2004 +0000
     5.3 @@ -0,0 +1,703 @@
     5.4 +#ifndef _LINUX_MM_H
     5.5 +#define _LINUX_MM_H
     5.6 +
     5.7 +#include <linux/sched.h>
     5.8 +#include <linux/errno.h>
     5.9 +
    5.10 +#ifdef __KERNEL__
    5.11 +
    5.12 +#include <linux/config.h>
    5.13 +#include <linux/string.h>
    5.14 +#include <linux/list.h>
    5.15 +#include <linux/mmzone.h>
    5.16 +#include <linux/swap.h>
    5.17 +#include <linux/rbtree.h>
    5.18 +
    5.19 +extern unsigned long max_mapnr;
    5.20 +extern unsigned long num_physpages;
    5.21 +extern unsigned long num_mappedpages;
    5.22 +extern void * high_memory;
    5.23 +extern int page_cluster;
    5.24 +/* The inactive_clean lists are per zone. */
    5.25 +extern struct list_head active_list;
    5.26 +extern struct list_head inactive_list;
    5.27 +
    5.28 +#include <asm/page.h>
    5.29 +#include <asm/pgtable.h>
    5.30 +#include <asm/atomic.h>
    5.31 +
    5.32 +/*
    5.33 + * Linux kernel virtual memory manager primitives.
    5.34 + * The idea being to have a "virtual" mm in the same way
    5.35 + * we have a virtual fs - giving a cleaner interface to the
    5.36 + * mm details, and allowing different kinds of memory mappings
    5.37 + * (from shared memory to executable loading to arbitrary
    5.38 + * mmap() functions).
    5.39 + */
    5.40 +
    5.41 +/*
    5.42 + * This struct defines a memory VMM memory area. There is one of these
    5.43 + * per VM-area/task.  A VM area is any part of the process virtual memory
    5.44 + * space that has a special rule for the page-fault handlers (ie a shared
    5.45 + * library, the executable area etc).
    5.46 + */
    5.47 +struct vm_area_struct {
    5.48 +	struct mm_struct * vm_mm;	/* The address space we belong to. */
    5.49 +	unsigned long vm_start;		/* Our start address within vm_mm. */
    5.50 +	unsigned long vm_end;		/* The first byte after our end address
    5.51 +					   within vm_mm. */
    5.52 +
    5.53 +	/* linked list of VM areas per task, sorted by address */
    5.54 +	struct vm_area_struct *vm_next;
    5.55 +
    5.56 +	pgprot_t vm_page_prot;		/* Access permissions of this VMA. */
    5.57 +	unsigned long vm_flags;		/* Flags, listed below. */
    5.58 +
    5.59 +	rb_node_t vm_rb;
    5.60 +
    5.61 +	/*
    5.62 +	 * For areas with an address space and backing store,
    5.63 +	 * one of the address_space->i_mmap{,shared} lists,
    5.64 +	 * for shm areas, the list of attaches, otherwise unused.
    5.65 +	 */
    5.66 +	struct vm_area_struct *vm_next_share;
    5.67 +	struct vm_area_struct **vm_pprev_share;
    5.68 +
    5.69 +	/* Function pointers to deal with this struct. */
    5.70 +	struct vm_operations_struct * vm_ops;
    5.71 +
    5.72 +	/* Information about our backing store: */
    5.73 +	unsigned long vm_pgoff;		/* Offset (within vm_file) in PAGE_SIZE
    5.74 +					   units, *not* PAGE_CACHE_SIZE */
    5.75 +	struct file * vm_file;		/* File we map to (can be NULL). */
    5.76 +	unsigned long vm_raend;		/* XXX: put full readahead info here. */
    5.77 +	void * vm_private_data;		/* was vm_pte (shared mem) */
    5.78 +};
    5.79 +
    5.80 +/*
    5.81 + * vm_flags..
    5.82 + */
    5.83 +#define VM_READ		0x00000001	/* currently active flags */
    5.84 +#define VM_WRITE	0x00000002
    5.85 +#define VM_EXEC		0x00000004
    5.86 +#define VM_SHARED	0x00000008
    5.87 +
    5.88 +#define VM_MAYREAD	0x00000010	/* limits for mprotect() etc */
    5.89 +#define VM_MAYWRITE	0x00000020
    5.90 +#define VM_MAYEXEC	0x00000040
    5.91 +#define VM_MAYSHARE	0x00000080
    5.92 +
    5.93 +#define VM_GROWSDOWN	0x00000100	/* general info on the segment */
    5.94 +#define VM_GROWSUP	0x00000200
    5.95 +#define VM_SHM		0x00000400	/* shared memory area, don't swap out */
    5.96 +#define VM_DENYWRITE	0x00000800	/* ETXTBSY on write attempts.. */
    5.97 +
    5.98 +#define VM_EXECUTABLE	0x00001000
    5.99 +#define VM_LOCKED	0x00002000
   5.100 +#define VM_IO           0x00004000	/* Memory mapped I/O or similar */
   5.101 +
   5.102 +					/* Used by sys_madvise() */
   5.103 +#define VM_SEQ_READ	0x00008000	/* App will access data sequentially */
   5.104 +#define VM_RAND_READ	0x00010000	/* App will not benefit from clustered reads */
   5.105 +
   5.106 +#define VM_DONTCOPY	0x00020000      /* Do not copy this vma on fork */
   5.107 +#define VM_DONTEXPAND	0x00040000	/* Cannot expand with mremap() */
   5.108 +#define VM_RESERVED	0x00080000	/* Don't unmap it from swap_out */
   5.109 +
   5.110 +#ifndef VM_STACK_FLAGS
   5.111 +#define VM_STACK_FLAGS	0x00000177
   5.112 +#endif
   5.113 +
   5.114 +#define VM_READHINTMASK			(VM_SEQ_READ | VM_RAND_READ)
   5.115 +#define VM_ClearReadHint(v)		(v)->vm_flags &= ~VM_READHINTMASK
   5.116 +#define VM_NormalReadHint(v)		(!((v)->vm_flags & VM_READHINTMASK))
   5.117 +#define VM_SequentialReadHint(v)	((v)->vm_flags & VM_SEQ_READ)
   5.118 +#define VM_RandomReadHint(v)		((v)->vm_flags & VM_RAND_READ)
   5.119 +
   5.120 +/* read ahead limits */
   5.121 +extern int vm_min_readahead;
   5.122 +extern int vm_max_readahead;
   5.123 +
   5.124 +/*
   5.125 + * mapping from the currently active vm_flags protection bits (the
   5.126 + * low four bits) to a page protection mask..
   5.127 + */
   5.128 +extern pgprot_t protection_map[16];
   5.129 +
   5.130 +
   5.131 +/*
   5.132 + * These are the virtual MM functions - opening of an area, closing and
   5.133 + * unmapping it (needed to keep files on disk up-to-date etc), pointer
   5.134 + * to the functions called when a no-page or a wp-page exception occurs. 
   5.135 + */
   5.136 +struct vm_operations_struct {
   5.137 +	void (*open)(struct vm_area_struct * area);
   5.138 +	void (*close)(struct vm_area_struct * area);
   5.139 +	struct page * (*nopage)(struct vm_area_struct * area, unsigned long address, int unused);
   5.140 +};
   5.141 +
   5.142 +/*
   5.143 + * Each physical page in the system has a struct page associated with
   5.144 + * it to keep track of whatever it is we are using the page for at the
   5.145 + * moment. Note that we have no way to track which tasks are using
   5.146 + * a page.
   5.147 + *
   5.148 + * Try to keep the most commonly accessed fields in single cache lines
   5.149 + * here (16 bytes or greater).  This ordering should be particularly
   5.150 + * beneficial on 32-bit processors.
   5.151 + *
   5.152 + * The first line is data used in page cache lookup, the second line
   5.153 + * is used for linear searches (eg. clock algorithm scans). 
   5.154 + *
   5.155 + * TODO: make this structure smaller, it could be as small as 32 bytes.
   5.156 + */
   5.157 +typedef struct page {
   5.158 +	struct list_head list;		/* ->mapping has some page lists. */
   5.159 +	struct address_space *mapping;	/* The inode (or ...) we belong to. */
   5.160 +	unsigned long index;		/* Our offset within mapping. */
   5.161 +	struct page *next_hash;		/* Next page sharing our hash bucket in
   5.162 +					   the pagecache hash table. */
   5.163 +	atomic_t count;			/* Usage count, see below. */
   5.164 +	unsigned long flags;		/* atomic flags, some possibly
   5.165 +					   updated asynchronously */
   5.166 +	struct list_head lru;		/* Pageout list, eg. active_list;
   5.167 +					   protected by pagemap_lru_lock !! */
   5.168 +	struct page **pprev_hash;	/* Complement to *next_hash. */
   5.169 +	struct buffer_head * buffers;	/* Buffer maps us to a disk block. */
   5.170 +
   5.171 +	/*
   5.172 +	 * On machines where all RAM is mapped into kernel address space,
   5.173 +	 * we can simply calculate the virtual address. On machines with
   5.174 +	 * highmem some memory is mapped into kernel virtual memory
   5.175 +	 * dynamically, so we need a place to store that address.
   5.176 +	 * Note that this field could be 16 bits on x86 ... ;)
   5.177 +	 *
   5.178 +	 * Architectures with slow multiplication can define
   5.179 +	 * WANT_PAGE_VIRTUAL in asm/page.h
   5.180 +	 */
   5.181 +#if defined(CONFIG_HIGHMEM) || defined(WANT_PAGE_VIRTUAL)
   5.182 +	void *virtual;			/* Kernel virtual address (NULL if
   5.183 +					   not kmapped, ie. highmem) */
   5.184 +#endif /* CONFIG_HIGMEM || WANT_PAGE_VIRTUAL */
   5.185 +} mem_map_t;
   5.186 +
   5.187 +/*
   5.188 + * Methods to modify the page usage count.
   5.189 + *
   5.190 + * What counts for a page usage:
   5.191 + * - cache mapping   (page->mapping)
   5.192 + * - disk mapping    (page->buffers)
   5.193 + * - page mapped in a task's page tables, each mapping
   5.194 + *   is counted separately
   5.195 + *
   5.196 + * Also, many kernel routines increase the page count before a critical
   5.197 + * routine so they can be sure the page doesn't go away from under them.
   5.198 + */
   5.199 +#define get_page(p)		atomic_inc(&(p)->count)
   5.200 +#define put_page(p)		__free_page(p)
   5.201 +#define put_page_testzero(p) 	atomic_dec_and_test(&(p)->count)
   5.202 +#define page_count(p)		atomic_read(&(p)->count)
   5.203 +#define set_page_count(p,v) 	atomic_set(&(p)->count, v)
   5.204 +
   5.205 +/*
   5.206 + * Various page->flags bits:
   5.207 + *
   5.208 + * PG_reserved is set for special pages, which can never be swapped
   5.209 + * out. Some of them might not even exist (eg empty_bad_page)...
   5.210 + *
   5.211 + * Multiple processes may "see" the same page. E.g. for untouched
   5.212 + * mappings of /dev/null, all processes see the same page full of
   5.213 + * zeroes, and text pages of executables and shared libraries have
   5.214 + * only one copy in memory, at most, normally.
   5.215 + *
   5.216 + * For the non-reserved pages, page->count denotes a reference count.
   5.217 + *   page->count == 0 means the page is free.
   5.218 + *   page->count == 1 means the page is used for exactly one purpose
   5.219 + *   (e.g. a private data page of one process).
   5.220 + *
   5.221 + * A page may be used for kmalloc() or anyone else who does a
   5.222 + * __get_free_page(). In this case the page->count is at least 1, and
   5.223 + * all other fields are unused but should be 0 or NULL. The
   5.224 + * management of this page is the responsibility of the one who uses
   5.225 + * it.
   5.226 + *
   5.227 + * The other pages (we may call them "process pages") are completely
   5.228 + * managed by the Linux memory manager: I/O, buffers, swapping etc.
   5.229 + * The following discussion applies only to them.
   5.230 + *
   5.231 + * A page may belong to an inode's memory mapping. In this case,
   5.232 + * page->mapping is the pointer to the inode, and page->index is the
   5.233 + * file offset of the page, in units of PAGE_CACHE_SIZE.
   5.234 + *
   5.235 + * A page may have buffers allocated to it. In this case,
   5.236 + * page->buffers is a circular list of these buffer heads. Else,
   5.237 + * page->buffers == NULL.
   5.238 + *
   5.239 + * For pages belonging to inodes, the page->count is the number of
   5.240 + * attaches, plus 1 if buffers are allocated to the page, plus one
   5.241 + * for the page cache itself.
   5.242 + *
   5.243 + * All pages belonging to an inode are in these doubly linked lists:
   5.244 + * mapping->clean_pages, mapping->dirty_pages and mapping->locked_pages;
   5.245 + * using the page->list list_head. These fields are also used for
   5.246 + * freelist managemet (when page->count==0).
   5.247 + *
   5.248 + * There is also a hash table mapping (mapping,index) to the page
   5.249 + * in memory if present. The lists for this hash table use the fields
   5.250 + * page->next_hash and page->pprev_hash.
   5.251 + *
   5.252 + * All process pages can do I/O:
   5.253 + * - inode pages may need to be read from disk,
   5.254 + * - inode pages which have been modified and are MAP_SHARED may need
   5.255 + *   to be written to disk,
   5.256 + * - private pages which have been modified may need to be swapped out
   5.257 + *   to swap space and (later) to be read back into memory.
   5.258 + * During disk I/O, PG_locked is used. This bit is set before I/O
   5.259 + * and reset when I/O completes. page_waitqueue(page) is a wait queue of all
   5.260 + * tasks waiting for the I/O on this page to complete.
   5.261 + * PG_uptodate tells whether the page's contents is valid.
   5.262 + * When a read completes, the page becomes uptodate, unless a disk I/O
   5.263 + * error happened.
   5.264 + *
   5.265 + * For choosing which pages to swap out, inode pages carry a
   5.266 + * PG_referenced bit, which is set any time the system accesses
   5.267 + * that page through the (mapping,index) hash table. This referenced
   5.268 + * bit, together with the referenced bit in the page tables, is used
   5.269 + * to manipulate page->age and move the page across the active,
   5.270 + * inactive_dirty and inactive_clean lists.
   5.271 + *
   5.272 + * Note that the referenced bit, the page->lru list_head and the
   5.273 + * active, inactive_dirty and inactive_clean lists are protected by
   5.274 + * the pagemap_lru_lock, and *NOT* by the usual PG_locked bit!
   5.275 + *
   5.276 + * PG_skip is used on sparc/sparc64 architectures to "skip" certain
   5.277 + * parts of the address space.
   5.278 + *
   5.279 + * PG_error is set to indicate that an I/O error occurred on this page.
   5.280 + *
   5.281 + * PG_arch_1 is an architecture specific page state bit.  The generic
   5.282 + * code guarantees that this bit is cleared for a page when it first
   5.283 + * is entered into the page cache.
   5.284 + *
   5.285 + * PG_highmem pages are not permanently mapped into the kernel virtual
   5.286 + * address space, they need to be kmapped separately for doing IO on
   5.287 + * the pages. The struct page (these bits with information) are always
   5.288 + * mapped into kernel address space...
   5.289 + */
   5.290 +#define PG_locked		 0	/* Page is locked. Don't touch. */
   5.291 +#define PG_error		 1
   5.292 +#define PG_referenced		 2
   5.293 +#define PG_uptodate		 3
   5.294 +#define PG_dirty		 4
   5.295 +#define PG_unused		 5
   5.296 +#define PG_lru			 6
   5.297 +#define PG_active		 7
   5.298 +#define PG_slab			 8
   5.299 +#define PG_skip			10
   5.300 +#define PG_highmem		11
   5.301 +#define PG_checked		12	/* kill me in 2.5.<early>. */
   5.302 +#define PG_arch_1		13
   5.303 +#define PG_reserved		14
   5.304 +#define PG_launder		15	/* written out by VM pressure.. */
   5.305 +#define PG_fs_1			16	/* Filesystem specific */
   5.306 +#define PG_foreign		21	/* Page belongs to foreign allocator */
   5.307 +
   5.308 +#ifndef arch_set_page_uptodate
   5.309 +#define arch_set_page_uptodate(page)
   5.310 +#endif
   5.311 +
   5.312 +/* Make it prettier to test the above... */
   5.313 +#define UnlockPage(page)	unlock_page(page)
   5.314 +#define Page_Uptodate(page)	test_bit(PG_uptodate, &(page)->flags)
   5.315 +#define SetPageUptodate(page) \
   5.316 +	do {								\
   5.317 +		arch_set_page_uptodate(page);				\
   5.318 +		set_bit(PG_uptodate, &(page)->flags);			\
   5.319 +	} while (0)
   5.320 +#define ClearPageUptodate(page)	clear_bit(PG_uptodate, &(page)->flags)
   5.321 +#define PageDirty(page)		test_bit(PG_dirty, &(page)->flags)
   5.322 +#define SetPageDirty(page)	set_bit(PG_dirty, &(page)->flags)
   5.323 +#define ClearPageDirty(page)	clear_bit(PG_dirty, &(page)->flags)
   5.324 +#define PageLocked(page)	test_bit(PG_locked, &(page)->flags)
   5.325 +#define LockPage(page)		set_bit(PG_locked, &(page)->flags)
   5.326 +#define TryLockPage(page)	test_and_set_bit(PG_locked, &(page)->flags)
   5.327 +#define PageChecked(page)	test_bit(PG_checked, &(page)->flags)
   5.328 +#define SetPageChecked(page)	set_bit(PG_checked, &(page)->flags)
   5.329 +#define ClearPageChecked(page)	clear_bit(PG_checked, &(page)->flags)
   5.330 +#define PageLaunder(page)	test_bit(PG_launder, &(page)->flags)
   5.331 +#define SetPageLaunder(page)	set_bit(PG_launder, &(page)->flags)
   5.332 +#define ClearPageLaunder(page)	clear_bit(PG_launder, &(page)->flags)
   5.333 +#define ClearPageArch1(page)	clear_bit(PG_arch_1, &(page)->flags)
   5.334 +
   5.335 +/* A foreign page uses a custom destructor rather than the buddy allocator. */
   5.336 +#ifdef CONFIG_FOREIGN_PAGES
   5.337 +#define PageForeign(page)	test_bit(PG_foreign, &(page)->flags)
   5.338 +#define SetPageForeign(page)	set_bit(PG_foreign, &(page)->flags)
   5.339 +#define ClearPageForeign(page)	clear_bit(PG_foreign, &(page)->flags)
   5.340 +#define PageForeignDestructor(page)	\
   5.341 +	( (void (*) (struct page *)) (page)->mapping )
   5.342 +#else
   5.343 +#define PageForeign(page)	0
   5.344 +#define PageForeignDestructor(page)	void
   5.345 +#endif
   5.346 +
   5.347 +/*
   5.348 + * The zone field is never updated after free_area_init_core()
   5.349 + * sets it, so none of the operations on it need to be atomic.
   5.350 + */
   5.351 +#define NODE_SHIFT 4
   5.352 +#define ZONE_SHIFT (BITS_PER_LONG - 8)
   5.353 +
   5.354 +struct zone_struct;
   5.355 +extern struct zone_struct *zone_table[];
   5.356 +
   5.357 +static inline zone_t *page_zone(struct page *page)
   5.358 +{
   5.359 +	return zone_table[page->flags >> ZONE_SHIFT];
   5.360 +}
   5.361 +
   5.362 +static inline void set_page_zone(struct page *page, unsigned long zone_num)
   5.363 +{
   5.364 +	page->flags &= ~(~0UL << ZONE_SHIFT);
   5.365 +	page->flags |= zone_num << ZONE_SHIFT;
   5.366 +}
   5.367 +
   5.368 +/*
   5.369 + * In order to avoid #ifdefs within C code itself, we define
   5.370 + * set_page_address to a noop for non-highmem machines, where
   5.371 + * the field isn't useful.
   5.372 + * The same is true for page_address() in arch-dependent code.
   5.373 + */
   5.374 +#if defined(CONFIG_HIGHMEM) || defined(WANT_PAGE_VIRTUAL)
   5.375 +
   5.376 +#define set_page_address(page, address)			\
   5.377 +	do {						\
   5.378 +		(page)->virtual = (address);		\
   5.379 +	} while(0)
   5.380 +
   5.381 +#else /* CONFIG_HIGHMEM || WANT_PAGE_VIRTUAL */
   5.382 +#define set_page_address(page, address)  do { } while(0)
   5.383 +#endif /* CONFIG_HIGHMEM || WANT_PAGE_VIRTUAL */
   5.384 +
   5.385 +/*
   5.386 + * Permanent address of a page. Obviously must never be
   5.387 + * called on a highmem page.
   5.388 + */
   5.389 +#if defined(CONFIG_HIGHMEM) || defined(WANT_PAGE_VIRTUAL)
   5.390 +
   5.391 +#define page_address(page) ((page)->virtual)
   5.392 +
   5.393 +#else /* CONFIG_HIGHMEM || WANT_PAGE_VIRTUAL */
   5.394 +
   5.395 +#define page_address(page)						\
   5.396 +	__va( (((page) - page_zone(page)->zone_mem_map) << PAGE_SHIFT)	\
   5.397 +			+ page_zone(page)->zone_start_paddr)
   5.398 +
   5.399 +#endif /* CONFIG_HIGHMEM || WANT_PAGE_VIRTUAL */
   5.400 +
   5.401 +extern void FASTCALL(set_page_dirty(struct page *));
   5.402 +
   5.403 +/*
   5.404 + * The first mb is necessary to safely close the critical section opened by the
   5.405 + * TryLockPage(), the second mb is necessary to enforce ordering between
   5.406 + * the clear_bit and the read of the waitqueue (to avoid SMP races with a
   5.407 + * parallel wait_on_page).
   5.408 + */
   5.409 +#define PageError(page)		test_bit(PG_error, &(page)->flags)
   5.410 +#define SetPageError(page)	set_bit(PG_error, &(page)->flags)
   5.411 +#define ClearPageError(page)	clear_bit(PG_error, &(page)->flags)
   5.412 +#define PageReferenced(page)	test_bit(PG_referenced, &(page)->flags)
   5.413 +#define SetPageReferenced(page)	set_bit(PG_referenced, &(page)->flags)
   5.414 +#define ClearPageReferenced(page)	clear_bit(PG_referenced, &(page)->flags)
   5.415 +#define PageTestandClearReferenced(page)	test_and_clear_bit(PG_referenced, &(page)->flags)
   5.416 +#define PageSlab(page)		test_bit(PG_slab, &(page)->flags)
   5.417 +#define PageSetSlab(page)	set_bit(PG_slab, &(page)->flags)
   5.418 +#define PageClearSlab(page)	clear_bit(PG_slab, &(page)->flags)
   5.419 +#define PageReserved(page)	test_bit(PG_reserved, &(page)->flags)
   5.420 +
   5.421 +#define PageActive(page)	test_bit(PG_active, &(page)->flags)
   5.422 +#define SetPageActive(page)	set_bit(PG_active, &(page)->flags)
   5.423 +#define ClearPageActive(page)	clear_bit(PG_active, &(page)->flags)
   5.424 +
   5.425 +#define PageLRU(page)		test_bit(PG_lru, &(page)->flags)
   5.426 +#define TestSetPageLRU(page)	test_and_set_bit(PG_lru, &(page)->flags)
   5.427 +#define TestClearPageLRU(page)	test_and_clear_bit(PG_lru, &(page)->flags)
   5.428 +
   5.429 +#ifdef CONFIG_HIGHMEM
   5.430 +#define PageHighMem(page)		test_bit(PG_highmem, &(page)->flags)
   5.431 +#else
   5.432 +#define PageHighMem(page)		0 /* needed to optimize away at compile time */
   5.433 +#endif
   5.434 +
   5.435 +#define SetPageReserved(page)		set_bit(PG_reserved, &(page)->flags)
   5.436 +#define ClearPageReserved(page)		clear_bit(PG_reserved, &(page)->flags)
   5.437 +
   5.438 +/*
   5.439 + * Error return values for the *_nopage functions
   5.440 + */
   5.441 +#define NOPAGE_SIGBUS	(NULL)
   5.442 +#define NOPAGE_OOM	((struct page *) (-1))
   5.443 +
   5.444 +/* The array of struct pages */
   5.445 +extern mem_map_t * mem_map;
   5.446 +
   5.447 +/*
   5.448 + * There is only one page-allocator function, and two main namespaces to
   5.449 + * it. The alloc_page*() variants return 'struct page *' and as such
   5.450 + * can allocate highmem pages, the *get*page*() variants return
   5.451 + * virtual kernel addresses to the allocated page(s).
   5.452 + */
   5.453 +extern struct page * FASTCALL(_alloc_pages(unsigned int gfp_mask, unsigned int order));
   5.454 +extern struct page * FASTCALL(__alloc_pages(unsigned int gfp_mask, unsigned int order, zonelist_t *zonelist));
   5.455 +extern struct page * alloc_pages_node(int nid, unsigned int gfp_mask, unsigned int order);
   5.456 +
   5.457 +static inline struct page * alloc_pages(unsigned int gfp_mask, unsigned int order)
   5.458 +{
   5.459 +	/*
   5.460 +	 * Gets optimized away by the compiler.
   5.461 +	 */
   5.462 +	if (order >= MAX_ORDER)
   5.463 +		return NULL;
   5.464 +	return _alloc_pages(gfp_mask, order);
   5.465 +}
   5.466 +
   5.467 +#define alloc_page(gfp_mask) alloc_pages(gfp_mask, 0)
   5.468 +
   5.469 +extern unsigned long FASTCALL(__get_free_pages(unsigned int gfp_mask, unsigned int order));
   5.470 +extern unsigned long FASTCALL(get_zeroed_page(unsigned int gfp_mask));
   5.471 +
   5.472 +#define __get_free_page(gfp_mask) \
   5.473 +		__get_free_pages((gfp_mask),0)
   5.474 +
   5.475 +#define __get_dma_pages(gfp_mask, order) \
   5.476 +		__get_free_pages((gfp_mask) | GFP_DMA,(order))
   5.477 +
   5.478 +/*
   5.479 + * The old interface name will be removed in 2.5:
   5.480 + */
   5.481 +#define get_free_page get_zeroed_page
   5.482 +
   5.483 +/*
   5.484 + * There is only one 'core' page-freeing function.
   5.485 + */
   5.486 +extern void FASTCALL(__free_pages(struct page *page, unsigned int order));
   5.487 +extern void FASTCALL(free_pages(unsigned long addr, unsigned int order));
   5.488 +
   5.489 +#define __free_page(page) __free_pages((page), 0)
   5.490 +#define free_page(addr) free_pages((addr),0)
   5.491 +
   5.492 +extern void show_free_areas(void);
   5.493 +extern void show_free_areas_node(pg_data_t *pgdat);
   5.494 +
   5.495 +extern void clear_page_tables(struct mm_struct *, unsigned long, int);
   5.496 +
   5.497 +extern int fail_writepage(struct page *);
   5.498 +struct page * shmem_nopage(struct vm_area_struct * vma, unsigned long address, int unused);
   5.499 +struct file *shmem_file_setup(char * name, loff_t size);
   5.500 +extern void shmem_lock(struct file * file, int lock);
   5.501 +extern int shmem_zero_setup(struct vm_area_struct *);
   5.502 +
   5.503 +extern void zap_page_range(struct mm_struct *mm, unsigned long address, unsigned long size);
   5.504 +extern int copy_page_range(struct mm_struct *dst, struct mm_struct *src, struct vm_area_struct *vma);
   5.505 +extern int remap_page_range(unsigned long from, unsigned long to, unsigned long size, pgprot_t prot);
   5.506 +extern int zeromap_page_range(unsigned long from, unsigned long size, pgprot_t prot);
   5.507 +
   5.508 +extern int vmtruncate(struct inode * inode, loff_t offset);
   5.509 +extern pmd_t *FASTCALL(__pmd_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long address));
   5.510 +extern pte_t *FASTCALL(pte_alloc(struct mm_struct *mm, pmd_t *pmd, unsigned long address));
   5.511 +extern int handle_mm_fault(struct mm_struct *mm,struct vm_area_struct *vma, unsigned long address, int write_access);
   5.512 +extern int make_pages_present(unsigned long addr, unsigned long end);
   5.513 +extern int access_process_vm(struct task_struct *tsk, unsigned long addr, void *buf, int len, int write);
   5.514 +extern int ptrace_readdata(struct task_struct *tsk, unsigned long src, char *dst, int len);
   5.515 +extern int ptrace_writedata(struct task_struct *tsk, char * src, unsigned long dst, int len);
   5.516 +extern int ptrace_attach(struct task_struct *tsk);
   5.517 +extern int ptrace_detach(struct task_struct *, unsigned int);
   5.518 +extern void ptrace_disable(struct task_struct *);
   5.519 +extern int ptrace_check_attach(struct task_struct *task, int kill);
   5.520 +
   5.521 +int get_user_pages(struct task_struct *tsk, struct mm_struct *mm, unsigned long start,
   5.522 +		int len, int write, int force, struct page **pages, struct vm_area_struct **vmas);
   5.523 +
   5.524 +/*
   5.525 + * On a two-level page table, this ends up being trivial. Thus the
   5.526 + * inlining and the symmetry break with pte_alloc() that does all
   5.527 + * of this out-of-line.
   5.528 + */
   5.529 +static inline pmd_t *pmd_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long address)
   5.530 +{
   5.531 +	if (pgd_none(*pgd))
   5.532 +		return __pmd_alloc(mm, pgd, address);
   5.533 +	return pmd_offset(pgd, address);
   5.534 +}
   5.535 +
   5.536 +extern int pgt_cache_water[2];
   5.537 +extern int check_pgt_cache(void);
   5.538 +
   5.539 +extern void free_area_init(unsigned long * zones_size);
   5.540 +extern void free_area_init_node(int nid, pg_data_t *pgdat, struct page *pmap,
   5.541 +	unsigned long * zones_size, unsigned long zone_start_paddr, 
   5.542 +	unsigned long *zholes_size);
   5.543 +extern void mem_init(void);
   5.544 +extern void show_mem(void);
   5.545 +extern void si_meminfo(struct sysinfo * val);
   5.546 +extern void swapin_readahead(swp_entry_t);
   5.547 +
   5.548 +extern struct address_space swapper_space;
   5.549 +#define PageSwapCache(page) ((page)->mapping == &swapper_space)
   5.550 +
   5.551 +static inline int is_page_cache_freeable(struct page * page)
   5.552 +{
   5.553 +	return page_count(page) - !!page->buffers == 1;
   5.554 +}
   5.555 +
   5.556 +extern int FASTCALL(can_share_swap_page(struct page *));
   5.557 +extern int FASTCALL(remove_exclusive_swap_page(struct page *));
   5.558 +
   5.559 +extern void __free_pte(pte_t);
   5.560 +
   5.561 +/* mmap.c */
   5.562 +extern void lock_vma_mappings(struct vm_area_struct *);
   5.563 +extern void unlock_vma_mappings(struct vm_area_struct *);
   5.564 +extern void insert_vm_struct(struct mm_struct *, struct vm_area_struct *);
   5.565 +extern void __insert_vm_struct(struct mm_struct *, struct vm_area_struct *);
   5.566 +extern void build_mmap_rb(struct mm_struct *);
   5.567 +extern void exit_mmap(struct mm_struct *);
   5.568 +
   5.569 +extern unsigned long get_unmapped_area(struct file *, unsigned long, unsigned long, unsigned long, unsigned long);
   5.570 +
   5.571 +extern unsigned long do_mmap_pgoff(struct file *file, unsigned long addr,
   5.572 +	unsigned long len, unsigned long prot,
   5.573 +	unsigned long flag, unsigned long pgoff);
   5.574 +
   5.575 +static inline unsigned long do_mmap(struct file *file, unsigned long addr,
   5.576 +	unsigned long len, unsigned long prot,
   5.577 +	unsigned long flag, unsigned long offset)
   5.578 +{
   5.579 +	unsigned long ret = -EINVAL;
   5.580 +	if ((offset + PAGE_ALIGN(len)) < offset)
   5.581 +		goto out;
   5.582 +	if (!(offset & ~PAGE_MASK))
   5.583 +		ret = do_mmap_pgoff(file, addr, len, prot, flag, offset >> PAGE_SHIFT);
   5.584 +out:
   5.585 +	return ret;
   5.586 +}
   5.587 +
   5.588 +extern int do_munmap(struct mm_struct *, unsigned long, size_t);
   5.589 +
   5.590 +extern unsigned long do_brk(unsigned long, unsigned long);
   5.591 +
   5.592 +static inline void __vma_unlink(struct mm_struct * mm, struct vm_area_struct * vma, struct vm_area_struct * prev)
   5.593 +{
   5.594 +	prev->vm_next = vma->vm_next;
   5.595 +	rb_erase(&vma->vm_rb, &mm->mm_rb);
   5.596 +	if (mm->mmap_cache == vma)
   5.597 +		mm->mmap_cache = prev;
   5.598 +}
   5.599 +
   5.600 +static inline int can_vma_merge(struct vm_area_struct * vma, unsigned long vm_flags)
   5.601 +{
   5.602 +	if (!vma->vm_file && vma->vm_flags == vm_flags)
   5.603 +		return 1;
   5.604 +	else
   5.605 +		return 0;
   5.606 +}
   5.607 +
   5.608 +struct zone_t;
   5.609 +/* filemap.c */
   5.610 +extern void remove_inode_page(struct page *);
   5.611 +extern unsigned long page_unuse(struct page *);
   5.612 +extern void truncate_inode_pages(struct address_space *, loff_t);
   5.613 +
   5.614 +/* generic vm_area_ops exported for stackable file systems */
   5.615 +extern int filemap_sync(struct vm_area_struct *, unsigned long,	size_t, unsigned int);
   5.616 +extern struct page *filemap_nopage(struct vm_area_struct *, unsigned long, int);
   5.617 +
   5.618 +/*
   5.619 + * GFP bitmasks..
   5.620 + */
   5.621 +/* Zone modifiers in GFP_ZONEMASK (see linux/mmzone.h - low four bits) */
   5.622 +#define __GFP_DMA	0x01
   5.623 +#define __GFP_HIGHMEM	0x02
   5.624 +
   5.625 +/* Action modifiers - doesn't change the zoning */
   5.626 +#define __GFP_WAIT	0x10	/* Can wait and reschedule? */
   5.627 +#define __GFP_HIGH	0x20	/* Should access emergency pools? */
   5.628 +#define __GFP_IO	0x40	/* Can start low memory physical IO? */
   5.629 +#define __GFP_HIGHIO	0x80	/* Can start high mem physical IO? */
   5.630 +#define __GFP_FS	0x100	/* Can call down to low-level FS? */
   5.631 +
   5.632 +#define GFP_NOHIGHIO	(__GFP_HIGH | __GFP_WAIT | __GFP_IO)
   5.633 +#define GFP_NOIO	(__GFP_HIGH | __GFP_WAIT)
   5.634 +#define GFP_NOFS	(__GFP_HIGH | __GFP_WAIT | __GFP_IO | __GFP_HIGHIO)
   5.635 +#define GFP_ATOMIC	(__GFP_HIGH)
   5.636 +#define GFP_USER	(             __GFP_WAIT | __GFP_IO | __GFP_HIGHIO | __GFP_FS)
   5.637 +#define GFP_HIGHUSER	(             __GFP_WAIT | __GFP_IO | __GFP_HIGHIO | __GFP_FS | __GFP_HIGHMEM)
   5.638 +#define GFP_KERNEL	(__GFP_HIGH | __GFP_WAIT | __GFP_IO | __GFP_HIGHIO | __GFP_FS)
   5.639 +#define GFP_NFS		(__GFP_HIGH | __GFP_WAIT | __GFP_IO | __GFP_HIGHIO | __GFP_FS)
   5.640 +#define GFP_KSWAPD	(             __GFP_WAIT | __GFP_IO | __GFP_HIGHIO | __GFP_FS)
   5.641 +
   5.642 +/* Flag - indicates that the buffer will be suitable for DMA.  Ignored on some
   5.643 +   platforms, used as appropriate on others */
   5.644 +
   5.645 +#define GFP_DMA		__GFP_DMA
   5.646 +
   5.647 +static inline unsigned int pf_gfp_mask(unsigned int gfp_mask)
   5.648 +{
   5.649 +	/* avoid all memory balancing I/O methods if this task cannot block on I/O */
   5.650 +	if (current->flags & PF_NOIO)
   5.651 +		gfp_mask &= ~(__GFP_IO | __GFP_HIGHIO | __GFP_FS);
   5.652 +
   5.653 +	return gfp_mask;
   5.654 +}
   5.655 +	
   5.656 +/* vma is the first one with  address < vma->vm_end,
   5.657 + * and even  address < vma->vm_start. Have to extend vma. */
   5.658 +static inline int expand_stack(struct vm_area_struct * vma, unsigned long address)
   5.659 +{
   5.660 +	unsigned long grow;
   5.661 +
   5.662 +	/*
   5.663 +	 * vma->vm_start/vm_end cannot change under us because the caller is required
   5.664 +	 * to hold the mmap_sem in write mode. We need to get the spinlock only
   5.665 +	 * before relocating the vma range ourself.
   5.666 +	 */
   5.667 +	address &= PAGE_MASK;
   5.668 + 	spin_lock(&vma->vm_mm->page_table_lock);
   5.669 +	grow = (vma->vm_start - address) >> PAGE_SHIFT;
   5.670 +	if (vma->vm_end - address > current->rlim[RLIMIT_STACK].rlim_cur ||
   5.671 +	    ((vma->vm_mm->total_vm + grow) << PAGE_SHIFT) > current->rlim[RLIMIT_AS].rlim_cur) {
   5.672 +		spin_unlock(&vma->vm_mm->page_table_lock);
   5.673 +		return -ENOMEM;
   5.674 +	}
   5.675 +	vma->vm_start = address;
   5.676 +	vma->vm_pgoff -= grow;
   5.677 +	vma->vm_mm->total_vm += grow;
   5.678 +	if (vma->vm_flags & VM_LOCKED)
   5.679 +		vma->vm_mm->locked_vm += grow;
   5.680 +	spin_unlock(&vma->vm_mm->page_table_lock);
   5.681 +	return 0;
   5.682 +}
   5.683 +
   5.684 +/* Look up the first VMA which satisfies  addr < vm_end,  NULL if none. */
   5.685 +extern struct vm_area_struct * find_vma(struct mm_struct * mm, unsigned long addr);
   5.686 +extern struct vm_area_struct * find_vma_prev(struct mm_struct * mm, unsigned long addr,
   5.687 +					     struct vm_area_struct **pprev);
   5.688 +
   5.689 +/* Look up the first VMA which intersects the interval start_addr..end_addr-1,
   5.690 +   NULL if none.  Assume start_addr < end_addr. */
   5.691 +static inline struct vm_area_struct * find_vma_intersection(struct mm_struct * mm, unsigned long start_addr, unsigned long end_addr)
   5.692 +{
   5.693 +	struct vm_area_struct * vma = find_vma(mm,start_addr);
   5.694 +
   5.695 +	if (vma && end_addr <= vma->vm_start)
   5.696 +		vma = NULL;
   5.697 +	return vma;
   5.698 +}
   5.699 +
   5.700 +extern struct vm_area_struct *find_extend_vma(struct mm_struct *mm, unsigned long addr);
   5.701 +
   5.702 +extern struct page * vmalloc_to_page(void *addr);
   5.703 +
   5.704 +#endif /* __KERNEL__ */
   5.705 +
   5.706 +#endif
     6.1 --- a/linux-2.4.26-xen-sparse/mm/page_alloc.c	Thu Aug 19 17:17:18 2004 +0000
     6.2 +++ b/linux-2.4.26-xen-sparse/mm/page_alloc.c	Thu Aug 19 18:50:23 2004 +0000
     6.3 @@ -89,6 +89,9 @@ static void __free_pages_ok (struct page
     6.4  	struct page *base;
     6.5  	zone_t *zone;
     6.6  
     6.7 +	if (PageForeign(page))
     6.8 +		return (PageForeignDestructor(page))(page);
     6.9 +
    6.10  	/*
    6.11  	 * Yes, think what happens when other parts of the kernel take 
    6.12  	 * a reference to a page in order to pin it for io. -ben
    6.13 @@ -102,7 +105,7 @@ static void __free_pages_ok (struct page
    6.14  	if (page->buffers)
    6.15  		BUG();
    6.16  	if (page->mapping)
    6.17 -		return (*(void(*)(struct page *))page->mapping)(page);
    6.18 +		BUG();
    6.19  	if (!VALID_PAGE(page))
    6.20  		BUG();
    6.21  	if (PageLocked(page))
     7.1 --- a/linux-2.6.7-xen-sparse/arch/xen/Kconfig	Thu Aug 19 17:17:18 2004 +0000
     7.2 +++ b/linux-2.6.7-xen-sparse/arch/xen/Kconfig	Thu Aug 19 18:50:23 2004 +0000
     7.3 @@ -44,6 +44,16 @@ config XEN_WRITABLE_PAGETABLES
     7.4  
     7.5  endmenu
     7.6  
     7.7 +config FOREIGN_PAGES
     7.8 +	bool
     7.9 +	default y if XEN_PHYSDEV_ACCESS
    7.10 +	default n if !XEN_PHYSDEV_ACCESS
    7.11 +
    7.12 +config PAGESIZED_SKBS
    7.13 +	bool
    7.14 +	default y if XEN_PHYSDEV_ACCESS
    7.15 +	default n if !XEN_PHYSDEV_ACCESS
    7.16 +
    7.17  #config VT
    7.18  #	bool
    7.19  #	default y
     8.1 --- a/linux-2.6.7-xen-sparse/arch/xen/configs/xen0_defconfig	Thu Aug 19 17:17:18 2004 +0000
     8.2 +++ b/linux-2.6.7-xen-sparse/arch/xen/configs/xen0_defconfig	Thu Aug 19 18:50:23 2004 +0000
     8.3 @@ -10,6 +10,8 @@ CONFIG_NO_IDLE_HZ=y
     8.4  #
     8.5  CONFIG_XEN_PRIVILEGED_GUEST=y
     8.6  CONFIG_XEN_PHYSDEV_ACCESS=y
     8.7 +CONFIG_FOREIGN_PAGES=y
     8.8 +CONFIG_PAGESIZED_SKBS=y
     8.9  CONFIG_X86=y
    8.10  # CONFIG_X86_64 is not set
    8.11  
     9.1 --- a/linux-2.6.7-xen-sparse/arch/xen/configs/xenU_defconfig	Thu Aug 19 17:17:18 2004 +0000
     9.2 +++ b/linux-2.6.7-xen-sparse/arch/xen/configs/xenU_defconfig	Thu Aug 19 18:50:23 2004 +0000
     9.3 @@ -10,6 +10,8 @@ CONFIG_NO_IDLE_HZ=y
     9.4  #
     9.5  # CONFIG_XEN_PRIVILEGED_GUEST is not set
     9.6  # CONFIG_XEN_PHYSDEV_ACCESS is not set
     9.7 +# CONFIG_FOREIGN_PAGES is not set
     9.8 +# CONFIG_PAGESIZED_SKBS is not set
     9.9  CONFIG_X86=y
    9.10  # CONFIG_X86_64 is not set
    9.11  
    10.1 --- a/linux-2.6.7-xen-sparse/drivers/xen/netback/netback.c	Thu Aug 19 17:17:18 2004 +0000
    10.2 +++ b/linux-2.6.7-xen-sparse/drivers/xen/netback/netback.c	Thu Aug 19 18:50:23 2004 +0000
    10.3 @@ -376,7 +376,6 @@ static void net_tx_action(unsigned long 
    10.4      netif_tx_request_t txreq;
    10.5      u16 pending_idx;
    10.6      NETIF_RING_IDX i;
    10.7 -    struct page *page;
    10.8      multicall_entry_t *mcl;
    10.9      PEND_RING_IDX dc, dp;
   10.10  
   10.11 @@ -567,10 +566,9 @@ static void net_tx_action(unsigned long 
   10.12                 (void *)(MMAP_VADDR(pending_idx)|(txreq.addr&~PAGE_MASK)),
   10.13                 PKT_PROT_LEN);
   10.14  
   10.15 -        page = virt_to_page(MMAP_VADDR(pending_idx));
   10.16 -
   10.17          /* Append the packet payload as a fragment. */
   10.18 -        skb_shinfo(skb)->frags[0].page        = page;
   10.19 +        skb_shinfo(skb)->frags[0].page        = 
   10.20 +            virt_to_page(MMAP_VADDR(pending_idx));
   10.21          skb_shinfo(skb)->frags[0].size        = txreq.size - PKT_PROT_LEN;
   10.22          skb_shinfo(skb)->frags[0].page_offset = 
   10.23              (txreq.addr + PKT_PROT_LEN) & ~PAGE_MASK;
   10.24 @@ -581,17 +579,6 @@ static void net_tx_action(unsigned long 
   10.25          skb->dev      = netif->dev;
   10.26          skb->protocol = eth_type_trans(skb, skb->dev);
   10.27  
   10.28 -        /*
   10.29 -         * Destructor information. We hideously abuse the 'mapping' pointer,
   10.30 -         * which isn't otherwise used by us. The page deallocator is modified
   10.31 -         * to interpret a non-NULL value as a destructor function to be called.
   10.32 -         * This works okay because in all other cases the pointer must be NULL
   10.33 -         * when the page is freed (normally Linux will explicitly bug out if
   10.34 -         * it sees otherwise.
   10.35 -         */
   10.36 -        page->mapping = (struct address_space *)netif_page_release;
   10.37 -        set_page_count(page, 1);
   10.38 -
   10.39          netif->stats.tx_bytes += txreq.size;
   10.40          netif->stats.tx_packets++;
   10.41  
   10.42 @@ -607,8 +594,8 @@ static void netif_page_release(struct pa
   10.43      unsigned long flags;
   10.44      u16 pending_idx = page - virt_to_page(mmap_vstart);
   10.45  
   10.46 -    /* Stop the abuse. */
   10.47 -    page->mapping = NULL;
   10.48 +    /* Ready for next use. */
   10.49 +    set_page_count(page, 1);
   10.50  
   10.51      spin_lock_irqsave(&dealloc_lock, flags);
   10.52      dealloc_ring[MASK_PEND_IDX(dealloc_prod++)] = pending_idx;
   10.53 @@ -742,6 +729,7 @@ static irqreturn_t netif_be_dbg(int irq,
   10.54  static int __init netback_init(void)
   10.55  {
   10.56      int i;
   10.57 +    struct page *page;
   10.58  
   10.59      if ( !(start_info.flags & SIF_NET_BE_DOMAIN) &&
   10.60  	 !(start_info.flags & SIF_INITDOMAIN) )
   10.61 @@ -757,6 +745,13 @@ static int __init netback_init(void)
   10.62      if ( (mmap_vstart = allocate_empty_lowmem_region(MAX_PENDING_REQS)) == 0 )
   10.63          BUG();
   10.64  
   10.65 +    for ( i = 0; i < MAX_PENDING_REQS; i++ )
   10.66 +    {
   10.67 +        page = virt_to_page(MMAP_VADDR(i));
   10.68 +        SetPageForeign(page);
   10.69 +        PageForeignDestructor(page) = netif_page_release;
   10.70 +    }
   10.71 +
   10.72      pending_cons = 0;
   10.73      pending_prod = MAX_PENDING_REQS;
   10.74      for ( i = 0; i < MAX_PENDING_REQS; i++ )
    11.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    11.2 +++ b/linux-2.6.7-xen-sparse/include/linux/page-flags.h	Thu Aug 19 18:50:23 2004 +0000
    11.3 @@ -0,0 +1,343 @@
    11.4 +/*
    11.5 + * Macros for manipulating and testing page->flags
    11.6 + */
    11.7 +
    11.8 +#ifndef PAGE_FLAGS_H
    11.9 +#define PAGE_FLAGS_H
   11.10 +
   11.11 +#include <linux/percpu.h>
   11.12 +#include <linux/cache.h>
   11.13 +#include <asm/pgtable.h>
   11.14 +
   11.15 +/*
   11.16 + * Various page->flags bits:
   11.17 + *
   11.18 + * PG_reserved is set for special pages, which can never be swapped out. Some
   11.19 + * of them might not even exist (eg empty_bad_page)...
   11.20 + *
   11.21 + * The PG_private bitflag is set if page->private contains a valid value.
   11.22 + *
   11.23 + * During disk I/O, PG_locked is used. This bit is set before I/O and
   11.24 + * reset when I/O completes. page_waitqueue(page) is a wait queue of all tasks
   11.25 + * waiting for the I/O on this page to complete.
   11.26 + *
   11.27 + * PG_uptodate tells whether the page's contents is valid.  When a read
   11.28 + * completes, the page becomes uptodate, unless a disk I/O error happened.
   11.29 + *
   11.30 + * For choosing which pages to swap out, inode pages carry a PG_referenced bit,
   11.31 + * which is set any time the system accesses that page through the (mapping,
   11.32 + * index) hash table.  This referenced bit, together with the referenced bit
   11.33 + * in the page tables, is used to manipulate page->age and move the page across
   11.34 + * the active, inactive_dirty and inactive_clean lists.
   11.35 + *
   11.36 + * Note that the referenced bit, the page->lru list_head and the active,
   11.37 + * inactive_dirty and inactive_clean lists are protected by the
   11.38 + * zone->lru_lock, and *NOT* by the usual PG_locked bit!
   11.39 + *
   11.40 + * PG_error is set to indicate that an I/O error occurred on this page.
   11.41 + *
   11.42 + * PG_arch_1 is an architecture specific page state bit.  The generic code
   11.43 + * guarantees that this bit is cleared for a page when it first is entered into
   11.44 + * the page cache.
   11.45 + *
   11.46 + * PG_highmem pages are not permanently mapped into the kernel virtual address
   11.47 + * space, they need to be kmapped separately for doing IO on the pages.  The
   11.48 + * struct page (these bits with information) are always mapped into kernel
   11.49 + * address space...
   11.50 + */
   11.51 +
   11.52 +/*
   11.53 + * Don't use the *_dontuse flags.  Use the macros.  Otherwise you'll break
   11.54 + * locked- and dirty-page accounting.  The top eight bits of page->flags are
   11.55 + * used for page->zone, so putting flag bits there doesn't work.
   11.56 + */
   11.57 +#define PG_locked	 	 0	/* Page is locked. Don't touch. */
   11.58 +#define PG_error		 1
   11.59 +#define PG_referenced		 2
   11.60 +#define PG_uptodate		 3
   11.61 +
   11.62 +#define PG_dirty	 	 4
   11.63 +#define PG_lru			 5
   11.64 +#define PG_active		 6
   11.65 +#define PG_slab			 7	/* slab debug (Suparna wants this) */
   11.66 +
   11.67 +#define PG_highmem		 8
   11.68 +#define PG_checked		 9	/* kill me in 2.5.<early>. */
   11.69 +#define PG_arch_1		10
   11.70 +#define PG_reserved		11
   11.71 +
   11.72 +#define PG_private		12	/* Has something at ->private */
   11.73 +#define PG_writeback		13	/* Page is under writeback */
   11.74 +#define PG_nosave		14	/* Used for system suspend/resume */
   11.75 +#define PG_maplock		15	/* Lock bit for rmap to ptes */
   11.76 +
   11.77 +#define PG_swapcache		16	/* Swap page: swp_entry_t in private */
   11.78 +#define PG_mappedtodisk		17	/* Has blocks allocated on-disk */
   11.79 +#define PG_reclaim		18	/* To be reclaimed asap */
   11.80 +#define PG_compound		19	/* Part of a compound page */
   11.81 +
   11.82 +#define PG_anon			20	/* Anonymous: anon_vma in mapping */
   11.83 +#define PG_foreign		21	/* Page belongs to foreign allocator */
   11.84 +
   11.85 +
   11.86 +/*
   11.87 + * Global page accounting.  One instance per CPU.  Only unsigned longs are
   11.88 + * allowed.
   11.89 + */
   11.90 +struct page_state {
   11.91 +	unsigned long nr_dirty;		/* Dirty writeable pages */
   11.92 +	unsigned long nr_writeback;	/* Pages under writeback */
   11.93 +	unsigned long nr_unstable;	/* NFS unstable pages */
   11.94 +	unsigned long nr_page_table_pages;/* Pages used for pagetables */
   11.95 +	unsigned long nr_mapped;	/* mapped into pagetables */
   11.96 +	unsigned long nr_slab;		/* In slab */
   11.97 +#define GET_PAGE_STATE_LAST nr_slab
   11.98 +
   11.99 +	/*
  11.100 +	 * The below are zeroed by get_page_state().  Use get_full_page_state()
  11.101 +	 * to add up all these.
  11.102 +	 */
  11.103 +	unsigned long pgpgin;		/* Disk reads */
  11.104 +	unsigned long pgpgout;		/* Disk writes */
  11.105 +	unsigned long pswpin;		/* swap reads */
  11.106 +	unsigned long pswpout;		/* swap writes */
  11.107 +	unsigned long pgalloc_high;	/* page allocations */
  11.108 +
  11.109 +	unsigned long pgalloc_normal;
  11.110 +	unsigned long pgalloc_dma;
  11.111 +	unsigned long pgfree;		/* page freeings */
  11.112 +	unsigned long pgactivate;	/* pages moved inactive->active */
  11.113 +	unsigned long pgdeactivate;	/* pages moved active->inactive */
  11.114 +
  11.115 +	unsigned long pgfault;		/* faults (major+minor) */
  11.116 +	unsigned long pgmajfault;	/* faults (major only) */
  11.117 +	unsigned long pgrefill_high;	/* inspected in refill_inactive_zone */
  11.118 +	unsigned long pgrefill_normal;
  11.119 +	unsigned long pgrefill_dma;
  11.120 +
  11.121 +	unsigned long pgsteal_high;	/* total highmem pages reclaimed */
  11.122 +	unsigned long pgsteal_normal;
  11.123 +	unsigned long pgsteal_dma;
  11.124 +	unsigned long pgscan_kswapd_high;/* total highmem pages scanned */
  11.125 +	unsigned long pgscan_kswapd_normal;
  11.126 +
  11.127 +	unsigned long pgscan_kswapd_dma;
  11.128 +	unsigned long pgscan_direct_high;/* total highmem pages scanned */
  11.129 +	unsigned long pgscan_direct_normal;
  11.130 +	unsigned long pgscan_direct_dma;
  11.131 +	unsigned long pginodesteal;	/* pages reclaimed via inode freeing */
  11.132 +
  11.133 +	unsigned long slabs_scanned;	/* slab objects scanned */
  11.134 +	unsigned long kswapd_steal;	/* pages reclaimed by kswapd */
  11.135 +	unsigned long kswapd_inodesteal;/* reclaimed via kswapd inode freeing */
  11.136 +	unsigned long pageoutrun;	/* kswapd's calls to page reclaim */
  11.137 +	unsigned long allocstall;	/* direct reclaim calls */
  11.138 +
  11.139 +	unsigned long pgrotated;	/* pages rotated to tail of the LRU */
  11.140 +};
  11.141 +
  11.142 +DECLARE_PER_CPU(struct page_state, page_states);
  11.143 +
  11.144 +extern void get_page_state(struct page_state *ret);
  11.145 +extern void get_full_page_state(struct page_state *ret);
  11.146 +extern unsigned long __read_page_state(unsigned offset);
  11.147 +
  11.148 +#define read_page_state(member) \
  11.149 +	__read_page_state(offsetof(struct page_state, member))
  11.150 +
  11.151 +#define mod_page_state(member, delta)					\
  11.152 +	do {								\
  11.153 +		unsigned long flags;					\
  11.154 +		local_irq_save(flags);					\
  11.155 +		__get_cpu_var(page_states).member += (delta);		\
  11.156 +		local_irq_restore(flags);				\
  11.157 +	} while (0)
  11.158 +
  11.159 +
  11.160 +#define inc_page_state(member)	mod_page_state(member, 1UL)
  11.161 +#define dec_page_state(member)	mod_page_state(member, 0UL - 1)
  11.162 +#define add_page_state(member,delta) mod_page_state(member, (delta))
  11.163 +#define sub_page_state(member,delta) mod_page_state(member, 0UL - (delta))
  11.164 +
  11.165 +#define mod_page_state_zone(zone, member, delta)			\
  11.166 +	do {								\
  11.167 +		unsigned long flags;					\
  11.168 +		local_irq_save(flags);					\
  11.169 +		if (is_highmem(zone))					\
  11.170 +			__get_cpu_var(page_states).member##_high += (delta);\
  11.171 +		else if (is_normal(zone))				\
  11.172 +			__get_cpu_var(page_states).member##_normal += (delta);\
  11.173 +		else							\
  11.174 +			__get_cpu_var(page_states).member##_dma += (delta);\
  11.175 +		local_irq_restore(flags);				\
  11.176 +	} while (0)
  11.177 +
  11.178 +/*
  11.179 + * Manipulation of page state flags
  11.180 + */
  11.181 +#define PageLocked(page)		\
  11.182 +		test_bit(PG_locked, &(page)->flags)
  11.183 +#define SetPageLocked(page)		\
  11.184 +		set_bit(PG_locked, &(page)->flags)
  11.185 +#define TestSetPageLocked(page)		\
  11.186 +		test_and_set_bit(PG_locked, &(page)->flags)
  11.187 +#define ClearPageLocked(page)		\
  11.188 +		clear_bit(PG_locked, &(page)->flags)
  11.189 +#define TestClearPageLocked(page)	\
  11.190 +		test_and_clear_bit(PG_locked, &(page)->flags)
  11.191 +
  11.192 +#define PageError(page)		test_bit(PG_error, &(page)->flags)
  11.193 +#define SetPageError(page)	set_bit(PG_error, &(page)->flags)
  11.194 +#define ClearPageError(page)	clear_bit(PG_error, &(page)->flags)
  11.195 +
  11.196 +#define PageReferenced(page)	test_bit(PG_referenced, &(page)->flags)
  11.197 +#define SetPageReferenced(page)	set_bit(PG_referenced, &(page)->flags)
  11.198 +#define ClearPageReferenced(page)	clear_bit(PG_referenced, &(page)->flags)
  11.199 +#define TestClearPageReferenced(page) test_and_clear_bit(PG_referenced, &(page)->flags)
  11.200 +
  11.201 +#ifndef arch_set_page_uptodate
  11.202 +#define arch_set_page_uptodate(page) do { } while (0)
  11.203 +#endif
  11.204 +
  11.205 +#define PageUptodate(page)	test_bit(PG_uptodate, &(page)->flags)
  11.206 +#define SetPageUptodate(page) \
  11.207 +	do {								\
  11.208 +		arch_set_page_uptodate(page);				\
  11.209 +		set_bit(PG_uptodate, &(page)->flags);			\
  11.210 +	} while (0)
  11.211 +#define ClearPageUptodate(page)	clear_bit(PG_uptodate, &(page)->flags)
  11.212 +
  11.213 +#define PageDirty(page)		test_bit(PG_dirty, &(page)->flags)
  11.214 +#define SetPageDirty(page)	set_bit(PG_dirty, &(page)->flags)
  11.215 +#define TestSetPageDirty(page)	test_and_set_bit(PG_dirty, &(page)->flags)
  11.216 +#define ClearPageDirty(page)	clear_bit(PG_dirty, &(page)->flags)
  11.217 +#define TestClearPageDirty(page) test_and_clear_bit(PG_dirty, &(page)->flags)
  11.218 +
  11.219 +#define SetPageLRU(page)	set_bit(PG_lru, &(page)->flags)
  11.220 +#define PageLRU(page)		test_bit(PG_lru, &(page)->flags)
  11.221 +#define TestSetPageLRU(page)	test_and_set_bit(PG_lru, &(page)->flags)
  11.222 +#define TestClearPageLRU(page)	test_and_clear_bit(PG_lru, &(page)->flags)
  11.223 +
  11.224 +#define PageActive(page)	test_bit(PG_active, &(page)->flags)
  11.225 +#define SetPageActive(page)	set_bit(PG_active, &(page)->flags)
  11.226 +#define ClearPageActive(page)	clear_bit(PG_active, &(page)->flags)
  11.227 +#define TestClearPageActive(page) test_and_clear_bit(PG_active, &(page)->flags)
  11.228 +#define TestSetPageActive(page) test_and_set_bit(PG_active, &(page)->flags)
  11.229 +
  11.230 +#define PageSlab(page)		test_bit(PG_slab, &(page)->flags)
  11.231 +#define SetPageSlab(page)	set_bit(PG_slab, &(page)->flags)
  11.232 +#define ClearPageSlab(page)	clear_bit(PG_slab, &(page)->flags)
  11.233 +#define TestClearPageSlab(page)	test_and_clear_bit(PG_slab, &(page)->flags)
  11.234 +#define TestSetPageSlab(page)	test_and_set_bit(PG_slab, &(page)->flags)
  11.235 +
  11.236 +#ifdef CONFIG_HIGHMEM
  11.237 +#define PageHighMem(page)	test_bit(PG_highmem, &(page)->flags)
  11.238 +#else
  11.239 +#define PageHighMem(page)	0 /* needed to optimize away at compile time */
  11.240 +#endif
  11.241 +
  11.242 +#define PageChecked(page)	test_bit(PG_checked, &(page)->flags)
  11.243 +#define SetPageChecked(page)	set_bit(PG_checked, &(page)->flags)
  11.244 +#define ClearPageChecked(page)	clear_bit(PG_checked, &(page)->flags)
  11.245 +
  11.246 +#define PageReserved(page)	test_bit(PG_reserved, &(page)->flags)
  11.247 +#define SetPageReserved(page)	set_bit(PG_reserved, &(page)->flags)
  11.248 +#define ClearPageReserved(page)	clear_bit(PG_reserved, &(page)->flags)
  11.249 +
  11.250 +#define SetPagePrivate(page)	set_bit(PG_private, &(page)->flags)
  11.251 +#define ClearPagePrivate(page)	clear_bit(PG_private, &(page)->flags)
  11.252 +#define PagePrivate(page)	test_bit(PG_private, &(page)->flags)
  11.253 +
  11.254 +#define PageWriteback(page)	test_bit(PG_writeback, &(page)->flags)
  11.255 +#define SetPageWriteback(page)						\
  11.256 +	do {								\
  11.257 +		if (!test_and_set_bit(PG_writeback,			\
  11.258 +				&(page)->flags))			\
  11.259 +			inc_page_state(nr_writeback);			\
  11.260 +	} while (0)
  11.261 +#define TestSetPageWriteback(page)					\
  11.262 +	({								\
  11.263 +		int ret;						\
  11.264 +		ret = test_and_set_bit(PG_writeback,			\
  11.265 +					&(page)->flags);		\
  11.266 +		if (!ret)						\
  11.267 +			inc_page_state(nr_writeback);			\
  11.268 +		ret;							\
  11.269 +	})
  11.270 +#define ClearPageWriteback(page)					\
  11.271 +	do {								\
  11.272 +		if (test_and_clear_bit(PG_writeback,			\
  11.273 +				&(page)->flags))			\
  11.274 +			dec_page_state(nr_writeback);			\
  11.275 +	} while (0)
  11.276 +#define TestClearPageWriteback(page)					\
  11.277 +	({								\
  11.278 +		int ret;						\
  11.279 +		ret = test_and_clear_bit(PG_writeback,			\
  11.280 +				&(page)->flags);			\
  11.281 +		if (ret)						\
  11.282 +			dec_page_state(nr_writeback);			\
  11.283 +		ret;							\
  11.284 +	})
  11.285 +
  11.286 +#define PageNosave(page)	test_bit(PG_nosave, &(page)->flags)
  11.287 +#define SetPageNosave(page)	set_bit(PG_nosave, &(page)->flags)
  11.288 +#define TestSetPageNosave(page)	test_and_set_bit(PG_nosave, &(page)->flags)
  11.289 +#define ClearPageNosave(page)		clear_bit(PG_nosave, &(page)->flags)
  11.290 +#define TestClearPageNosave(page)	test_and_clear_bit(PG_nosave, &(page)->flags)
  11.291 +
  11.292 +#define PageMappedToDisk(page)	test_bit(PG_mappedtodisk, &(page)->flags)
  11.293 +#define SetPageMappedToDisk(page) set_bit(PG_mappedtodisk, &(page)->flags)
  11.294 +#define ClearPageMappedToDisk(page) clear_bit(PG_mappedtodisk, &(page)->flags)
  11.295 +
  11.296 +#define PageReclaim(page)	test_bit(PG_reclaim, &(page)->flags)
  11.297 +#define SetPageReclaim(page)	set_bit(PG_reclaim, &(page)->flags)
  11.298 +#define ClearPageReclaim(page)	clear_bit(PG_reclaim, &(page)->flags)
  11.299 +#define TestClearPageReclaim(page) test_and_clear_bit(PG_reclaim, &(page)->flags)
  11.300 +
  11.301 +#define PageCompound(page)	test_bit(PG_compound, &(page)->flags)
  11.302 +#define SetPageCompound(page)	set_bit(PG_compound, &(page)->flags)
  11.303 +#define ClearPageCompound(page)	clear_bit(PG_compound, &(page)->flags)
  11.304 +
  11.305 +#define PageAnon(page)		test_bit(PG_anon, &(page)->flags)
  11.306 +#define SetPageAnon(page)	set_bit(PG_anon, &(page)->flags)
  11.307 +#define ClearPageAnon(page)	clear_bit(PG_anon, &(page)->flags)
  11.308 +
  11.309 +/* A foreign page uses a custom destructor rather than the buddy allocator. */
  11.310 +#ifdef CONFIG_FOREIGN_PAGES
  11.311 +#define PageForeign(page)	test_bit(PG_foreign, &(page)->flags)
  11.312 +#define SetPageForeign(page)	set_bit(PG_foreign, &(page)->flags)
  11.313 +#define ClearPageForeign(page)	clear_bit(PG_foreign, &(page)->flags)
  11.314 +#define PageForeignDestructor(page)	\
  11.315 +	( (void (*) (struct page *)) (page)->mapping )
  11.316 +#else
  11.317 +#define PageForeign(page)	0
  11.318 +#define PageForeignDestructor(page)	void
  11.319 +#endif
  11.320 +
  11.321 +#ifdef CONFIG_SWAP
  11.322 +#define PageSwapCache(page)	test_bit(PG_swapcache, &(page)->flags)
  11.323 +#define SetPageSwapCache(page)	set_bit(PG_swapcache, &(page)->flags)
  11.324 +#define ClearPageSwapCache(page) clear_bit(PG_swapcache, &(page)->flags)
  11.325 +#else
  11.326 +#define PageSwapCache(page)	0
  11.327 +#endif
  11.328 +
  11.329 +struct page;	/* forward declaration */
  11.330 +
  11.331 +int test_clear_page_dirty(struct page *page);
  11.332 +int __clear_page_dirty(struct page *page);
  11.333 +int test_clear_page_writeback(struct page *page);
  11.334 +int test_set_page_writeback(struct page *page);
  11.335 +
  11.336 +static inline void clear_page_dirty(struct page *page)
  11.337 +{
  11.338 +	test_clear_page_dirty(page);
  11.339 +}
  11.340 +
  11.341 +static inline void set_page_writeback(struct page *page)
  11.342 +{
  11.343 +	test_set_page_writeback(page);
  11.344 +}
  11.345 +
  11.346 +#endif	/* PAGE_FLAGS_H */
    12.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    12.2 +++ b/linux-2.6.7-xen-sparse/include/linux/skbuff.h	Thu Aug 19 18:50:23 2004 +0000
    12.3 @@ -0,0 +1,1073 @@
    12.4 +/*
    12.5 + *	Definitions for the 'struct sk_buff' memory handlers.
    12.6 + *
    12.7 + *	Authors:
    12.8 + *		Alan Cox, <gw4pts@gw4pts.ampr.org>
    12.9 + *		Florian La Roche, <rzsfl@rz.uni-sb.de>
   12.10 + *
   12.11 + *	This program is free software; you can redistribute it and/or
   12.12 + *	modify it under the terms of the GNU General Public License
   12.13 + *	as published by the Free Software Foundation; either version
   12.14 + *	2 of the License, or (at your option) any later version.
   12.15 + */
   12.16 +
   12.17 +#ifndef _LINUX_SKBUFF_H
   12.18 +#define _LINUX_SKBUFF_H
   12.19 +
   12.20 +#include <linux/config.h>
   12.21 +#include <linux/kernel.h>
   12.22 +#include <linux/compiler.h>
   12.23 +#include <linux/time.h>
   12.24 +#include <linux/cache.h>
   12.25 +
   12.26 +#include <asm/atomic.h>
   12.27 +#include <asm/types.h>
   12.28 +#include <linux/spinlock.h>
   12.29 +#include <linux/mm.h>
   12.30 +#include <linux/highmem.h>
   12.31 +#include <linux/poll.h>
   12.32 +#include <linux/net.h>
   12.33 +
   12.34 +#define HAVE_ALLOC_SKB		/* For the drivers to know */
   12.35 +#define HAVE_ALIGNABLE_SKB	/* Ditto 8)		   */
   12.36 +#define SLAB_SKB 		/* Slabified skbuffs 	   */
   12.37 +
   12.38 +#define CHECKSUM_NONE 0
   12.39 +#define CHECKSUM_HW 1
   12.40 +#define CHECKSUM_UNNECESSARY 2
   12.41 +
   12.42 +#define SKB_DATA_ALIGN(X)	(((X) + (SMP_CACHE_BYTES - 1)) & \
   12.43 +				 ~(SMP_CACHE_BYTES - 1))
   12.44 +#define SKB_MAX_ORDER(X, ORDER)	(((PAGE_SIZE << (ORDER)) - (X) - \
   12.45 +				  sizeof(struct skb_shared_info)) & \
   12.46 +				  ~(SMP_CACHE_BYTES - 1))
   12.47 +#define SKB_MAX_HEAD(X)		(SKB_MAX_ORDER((X), 0))
   12.48 +#define SKB_MAX_ALLOC		(SKB_MAX_ORDER(0, 2))
   12.49 +
   12.50 +/* A. Checksumming of received packets by device.
   12.51 + *
   12.52 + *	NONE: device failed to checksum this packet.
   12.53 + *		skb->csum is undefined.
   12.54 + *
   12.55 + *	UNNECESSARY: device parsed packet and wouldbe verified checksum.
   12.56 + *		skb->csum is undefined.
   12.57 + *	      It is bad option, but, unfortunately, many of vendors do this.
   12.58 + *	      Apparently with secret goal to sell you new device, when you
   12.59 + *	      will add new protocol to your host. F.e. IPv6. 8)
   12.60 + *
   12.61 + *	HW: the most generic way. Device supplied checksum of _all_
   12.62 + *	    the packet as seen by netif_rx in skb->csum.
   12.63 + *	    NOTE: Even if device supports only some protocols, but
   12.64 + *	    is able to produce some skb->csum, it MUST use HW,
   12.65 + *	    not UNNECESSARY.
   12.66 + *
   12.67 + * B. Checksumming on output.
   12.68 + *
   12.69 + *	NONE: skb is checksummed by protocol or csum is not required.
   12.70 + *
   12.71 + *	HW: device is required to csum packet as seen by hard_start_xmit
   12.72 + *	from skb->h.raw to the end and to record the checksum
   12.73 + *	at skb->h.raw+skb->csum.
   12.74 + *
   12.75 + *	Device must show its capabilities in dev->features, set
   12.76 + *	at device setup time.
   12.77 + *	NETIF_F_HW_CSUM	- it is clever device, it is able to checksum
   12.78 + *			  everything.
   12.79 + *	NETIF_F_NO_CSUM - loopback or reliable single hop media.
   12.80 + *	NETIF_F_IP_CSUM - device is dumb. It is able to csum only
   12.81 + *			  TCP/UDP over IPv4. Sigh. Vendors like this
   12.82 + *			  way by an unknown reason. Though, see comment above
   12.83 + *			  about CHECKSUM_UNNECESSARY. 8)
   12.84 + *
   12.85 + *	Any questions? No questions, good. 		--ANK
   12.86 + */
   12.87 +
   12.88 +#ifdef __i386__
   12.89 +#define NET_CALLER(arg) (*(((void **)&arg) - 1))
   12.90 +#else
   12.91 +#define NET_CALLER(arg) __builtin_return_address(0)
   12.92 +#endif
   12.93 +
   12.94 +#ifdef CONFIG_NETFILTER
   12.95 +struct nf_conntrack {
   12.96 +	atomic_t use;
   12.97 +	void (*destroy)(struct nf_conntrack *);
   12.98 +};
   12.99 +
  12.100 +struct nf_ct_info {
  12.101 +	struct nf_conntrack *master;
  12.102 +};
  12.103 +
  12.104 +#ifdef CONFIG_BRIDGE_NETFILTER
  12.105 +struct nf_bridge_info {
  12.106 +	atomic_t use;
  12.107 +	struct net_device *physindev;
  12.108 +	struct net_device *physoutdev;
  12.109 +#if defined(CONFIG_VLAN_8021Q) || defined(CONFIG_VLAN_8021Q_MODULE)
  12.110 +	struct net_device *netoutdev;
  12.111 +#endif
  12.112 +	unsigned int mask;
  12.113 +	unsigned long data[32 / sizeof(unsigned long)];
  12.114 +};
  12.115 +#endif
  12.116 +
  12.117 +#endif
  12.118 +
  12.119 +struct sk_buff_head {
  12.120 +	/* These two members must be first. */
  12.121 +	struct sk_buff	*next;
  12.122 +	struct sk_buff	*prev;
  12.123 +
  12.124 +	__u32		qlen;
  12.125 +	spinlock_t	lock;
  12.126 +};
  12.127 +
  12.128 +struct sk_buff;
  12.129 +
  12.130 +/* To allow 64K frame to be packed as single skb without frag_list */
  12.131 +#define MAX_SKB_FRAGS (65536/PAGE_SIZE + 2)
  12.132 +
  12.133 +typedef struct skb_frag_struct skb_frag_t;
  12.134 +
  12.135 +struct skb_frag_struct {
  12.136 +	struct page *page;
  12.137 +	__u16 page_offset;
  12.138 +	__u16 size;
  12.139 +};
  12.140 +
  12.141 +/* This data is invariant across clones and lives at
  12.142 + * the end of the header data, ie. at skb->end.
  12.143 + */
  12.144 +struct skb_shared_info {
  12.145 +	atomic_t	dataref;
  12.146 +	unsigned int	nr_frags;
  12.147 +	unsigned short	tso_size;
  12.148 +	unsigned short	tso_segs;
  12.149 +	struct sk_buff	*frag_list;
  12.150 +	skb_frag_t	frags[MAX_SKB_FRAGS];
  12.151 +};
  12.152 +
  12.153 +/** 
  12.154 + *	struct sk_buff - socket buffer
  12.155 + *	@next: Next buffer in list
  12.156 + *	@prev: Previous buffer in list
  12.157 + *	@list: List we are on
  12.158 + *	@sk: Socket we are owned by
  12.159 + *	@stamp: Time we arrived
  12.160 + *	@dev: Device we arrived on/are leaving by
  12.161 + *      @real_dev: The real device we are using
  12.162 + *	@h: Transport layer header
  12.163 + *	@nh: Network layer header
  12.164 + *	@mac: Link layer header
  12.165 + *	@dst: FIXME: Describe this field
  12.166 + *	@cb: Control buffer. Free for use by every layer. Put private vars here
  12.167 + *	@len: Length of actual data
  12.168 + *	@data_len: Data length
  12.169 + *	@mac_len: Length of link layer header
  12.170 + *	@csum: Checksum
  12.171 + *	@__unused: Dead field, may be reused
  12.172 + *	@cloned: Head may be cloned (check refcnt to be sure)
  12.173 + *	@pkt_type: Packet class
  12.174 + *	@ip_summed: Driver fed us an IP checksum
  12.175 + *	@priority: Packet queueing priority
  12.176 + *	@users: User count - see {datagram,tcp}.c
  12.177 + *	@protocol: Packet protocol from driver
  12.178 + *	@security: Security level of packet
  12.179 + *	@truesize: Buffer size 
  12.180 + *	@head: Head of buffer
  12.181 + *	@data: Data head pointer
  12.182 + *	@tail: Tail pointer
  12.183 + *	@end: End pointer
  12.184 + *	@destructor: Destruct function
  12.185 + *	@nfmark: Can be used for communication between hooks
  12.186 + *	@nfcache: Cache info
  12.187 + *	@nfct: Associated connection, if any
  12.188 + *	@nf_debug: Netfilter debugging
  12.189 + *	@nf_bridge: Saved data about a bridged frame - see br_netfilter.c
  12.190 + *      @private: Data which is private to the HIPPI implementation
  12.191 + *	@tc_index: Traffic control index
  12.192 + */
  12.193 +
  12.194 +struct sk_buff {
  12.195 +	/* These two members must be first. */
  12.196 +	struct sk_buff		*next;
  12.197 +	struct sk_buff		*prev;
  12.198 +
  12.199 +	struct sk_buff_head	*list;
  12.200 +	struct sock		*sk;
  12.201 +	struct timeval		stamp;
  12.202 +	struct net_device	*dev;
  12.203 +	struct net_device	*real_dev;
  12.204 +
  12.205 +	union {
  12.206 +		struct tcphdr	*th;
  12.207 +		struct udphdr	*uh;
  12.208 +		struct icmphdr	*icmph;
  12.209 +		struct igmphdr	*igmph;
  12.210 +		struct iphdr	*ipiph;
  12.211 +		struct ipv6hdr	*ipv6h;
  12.212 +		unsigned char	*raw;
  12.213 +	} h;
  12.214 +
  12.215 +	union {
  12.216 +		struct iphdr	*iph;
  12.217 +		struct ipv6hdr	*ipv6h;
  12.218 +		struct arphdr	*arph;
  12.219 +		unsigned char	*raw;
  12.220 +	} nh;
  12.221 +
  12.222 +	union {
  12.223 +	  	struct ethhdr	*ethernet;
  12.224 +	  	unsigned char 	*raw;
  12.225 +	} mac;
  12.226 +
  12.227 +	struct  dst_entry	*dst;
  12.228 +	struct	sec_path	*sp;
  12.229 +
  12.230 +	/*
  12.231 +	 * This is the control buffer. It is free to use for every
  12.232 +	 * layer. Please put your private variables there. If you
  12.233 +	 * want to keep them across layers you have to do a skb_clone()
  12.234 +	 * first. This is owned by whoever has the skb queued ATM.
  12.235 +	 */
  12.236 +	char			cb[48];
  12.237 +
  12.238 +	unsigned int		len,
  12.239 +				data_len,
  12.240 +				mac_len,
  12.241 +				csum;
  12.242 +	unsigned char		local_df,
  12.243 +				cloned,
  12.244 +				pkt_type,
  12.245 +				ip_summed;
  12.246 +	__u32			priority;
  12.247 +	unsigned short		protocol,
  12.248 +				security;
  12.249 +
  12.250 +	void			(*destructor)(struct sk_buff *skb);
  12.251 +#ifdef CONFIG_NETFILTER
  12.252 +        unsigned long		nfmark;
  12.253 +	__u32			nfcache;
  12.254 +	struct nf_ct_info	*nfct;
  12.255 +#ifdef CONFIG_NETFILTER_DEBUG
  12.256 +        unsigned int		nf_debug;
  12.257 +#endif
  12.258 +#ifdef CONFIG_BRIDGE_NETFILTER
  12.259 +	struct nf_bridge_info	*nf_bridge;
  12.260 +#endif
  12.261 +#endif /* CONFIG_NETFILTER */
  12.262 +#if defined(CONFIG_HIPPI)
  12.263 +	union {
  12.264 +		__u32		ifield;
  12.265 +	} private;
  12.266 +#endif
  12.267 +#ifdef CONFIG_NET_SCHED
  12.268 +       __u32			tc_index;               /* traffic control index */
  12.269 +#endif
  12.270 +
  12.271 +	/* These elements must be at the end, see alloc_skb() for details.  */
  12.272 +	unsigned int		truesize;
  12.273 +	atomic_t		users;
  12.274 +	unsigned char		*head,
  12.275 +				*data,
  12.276 +				*tail,
  12.277 +				*end;
  12.278 +};
  12.279 +
  12.280 +#ifdef __KERNEL__
  12.281 +/*
  12.282 + *	Handling routines are only of interest to the kernel
  12.283 + */
  12.284 +#include <linux/slab.h>
  12.285 +
  12.286 +#include <asm/system.h>
  12.287 +
  12.288 +extern void	       __kfree_skb(struct sk_buff *skb);
  12.289 +extern struct sk_buff *alloc_skb(unsigned int size, int priority);
  12.290 +extern void	       kfree_skbmem(struct sk_buff *skb);
  12.291 +extern struct sk_buff *skb_clone(struct sk_buff *skb, int priority);
  12.292 +extern struct sk_buff *skb_copy(const struct sk_buff *skb, int priority);
  12.293 +extern struct sk_buff *pskb_copy(struct sk_buff *skb, int gfp_mask);
  12.294 +extern int	       pskb_expand_head(struct sk_buff *skb,
  12.295 +					int nhead, int ntail, int gfp_mask);
  12.296 +extern struct sk_buff *skb_realloc_headroom(struct sk_buff *skb,
  12.297 +					    unsigned int headroom);
  12.298 +extern struct sk_buff *skb_copy_expand(const struct sk_buff *skb,
  12.299 +				       int newheadroom, int newtailroom,
  12.300 +				       int priority);
  12.301 +extern struct sk_buff *		skb_pad(struct sk_buff *skb, int pad);
  12.302 +#define dev_kfree_skb(a)	kfree_skb(a)
  12.303 +extern void	      skb_over_panic(struct sk_buff *skb, int len,
  12.304 +				     void *here);
  12.305 +extern void	      skb_under_panic(struct sk_buff *skb, int len,
  12.306 +				      void *here);
  12.307 +
  12.308 +/* Internal */
  12.309 +#define skb_shinfo(SKB)		((struct skb_shared_info *)((SKB)->end))
  12.310 +
  12.311 +/**
  12.312 + *	skb_queue_empty - check if a queue is empty
  12.313 + *	@list: queue head
  12.314 + *
  12.315 + *	Returns true if the queue is empty, false otherwise.
  12.316 + */
  12.317 +static inline int skb_queue_empty(const struct sk_buff_head *list)
  12.318 +{
  12.319 +	return list->next == (struct sk_buff *)list;
  12.320 +}
  12.321 +
  12.322 +/**
  12.323 + *	skb_get - reference buffer
  12.324 + *	@skb: buffer to reference
  12.325 + *
  12.326 + *	Makes another reference to a socket buffer and returns a pointer
  12.327 + *	to the buffer.
  12.328 + */
  12.329 +static inline struct sk_buff *skb_get(struct sk_buff *skb)
  12.330 +{
  12.331 +	atomic_inc(&skb->users);
  12.332 +	return skb;
  12.333 +}
  12.334 +
  12.335 +/*
  12.336 + * If users == 1, we are the only owner and are can avoid redundant
  12.337 + * atomic change.
  12.338 + */
  12.339 +
  12.340 +/**
  12.341 + *	kfree_skb - free an sk_buff
  12.342 + *	@skb: buffer to free
  12.343 + *
  12.344 + *	Drop a reference to the buffer and free it if the usage count has
  12.345 + *	hit zero.
  12.346 + */
  12.347 +static inline void kfree_skb(struct sk_buff *skb)
  12.348 +{
  12.349 +	if (atomic_read(&skb->users) == 1 || atomic_dec_and_test(&skb->users))
  12.350 +		__kfree_skb(skb);
  12.351 +}
  12.352 +
  12.353 +/* Use this if you didn't touch the skb state [for fast switching] */
  12.354 +static inline void kfree_skb_fast(struct sk_buff *skb)
  12.355 +{
  12.356 +	if (atomic_read(&skb->users) == 1 || atomic_dec_and_test(&skb->users))
  12.357 +		kfree_skbmem(skb);
  12.358 +}
  12.359 +
  12.360 +/**
  12.361 + *	skb_cloned - is the buffer a clone
  12.362 + *	@skb: buffer to check
  12.363 + *
  12.364 + *	Returns true if the buffer was generated with skb_clone() and is
  12.365 + *	one of multiple shared copies of the buffer. Cloned buffers are
  12.366 + *	shared data so must not be written to under normal circumstances.
  12.367 + */
  12.368 +static inline int skb_cloned(const struct sk_buff *skb)
  12.369 +{
  12.370 +	return skb->cloned && atomic_read(&skb_shinfo(skb)->dataref) != 1;
  12.371 +}
  12.372 +
  12.373 +/**
  12.374 + *	skb_shared - is the buffer shared
  12.375 + *	@skb: buffer to check
  12.376 + *
  12.377 + *	Returns true if more than one person has a reference to this
  12.378 + *	buffer.
  12.379 + */
  12.380 +static inline int skb_shared(const struct sk_buff *skb)
  12.381 +{
  12.382 +	return atomic_read(&skb->users) != 1;
  12.383 +}
  12.384 +
  12.385 +/**
  12.386 + *	skb_share_check - check if buffer is shared and if so clone it
  12.387 + *	@skb: buffer to check
  12.388 + *	@pri: priority for memory allocation
  12.389 + *
  12.390 + *	If the buffer is shared the buffer is cloned and the old copy
  12.391 + *	drops a reference. A new clone with a single reference is returned.
  12.392 + *	If the buffer is not shared the original buffer is returned. When
  12.393 + *	being called from interrupt status or with spinlocks held pri must
  12.394 + *	be GFP_ATOMIC.
  12.395 + *
  12.396 + *	NULL is returned on a memory allocation failure.
  12.397 + */
  12.398 +static inline struct sk_buff *skb_share_check(struct sk_buff *skb, int pri)
  12.399 +{
  12.400 +	might_sleep_if(pri & __GFP_WAIT);
  12.401 +	if (skb_shared(skb)) {
  12.402 +		struct sk_buff *nskb = skb_clone(skb, pri);
  12.403 +		kfree_skb(skb);
  12.404 +		skb = nskb;
  12.405 +	}
  12.406 +	return skb;
  12.407 +}
  12.408 +
  12.409 +/*
  12.410 + *	Copy shared buffers into a new sk_buff. We effectively do COW on
  12.411 + *	packets to handle cases where we have a local reader and forward
  12.412 + *	and a couple of other messy ones. The normal one is tcpdumping
  12.413 + *	a packet thats being forwarded.
  12.414 + */
  12.415 +
  12.416 +/**
  12.417 + *	skb_unshare - make a copy of a shared buffer
  12.418 + *	@skb: buffer to check
  12.419 + *	@pri: priority for memory allocation
  12.420 + *
  12.421 + *	If the socket buffer is a clone then this function creates a new
  12.422 + *	copy of the data, drops a reference count on the old copy and returns
  12.423 + *	the new copy with the reference count at 1. If the buffer is not a clone
  12.424 + *	the original buffer is returned. When called with a spinlock held or
  12.425 + *	from interrupt state @pri must be %GFP_ATOMIC
  12.426 + *
  12.427 + *	%NULL is returned on a memory allocation failure.
  12.428 + */
  12.429 +static inline struct sk_buff *skb_unshare(struct sk_buff *skb, int pri)
  12.430 +{
  12.431 +	might_sleep_if(pri & __GFP_WAIT);
  12.432 +	if (skb_cloned(skb)) {
  12.433 +		struct sk_buff *nskb = skb_copy(skb, pri);
  12.434 +		kfree_skb(skb);	/* Free our shared copy */
  12.435 +		skb = nskb;
  12.436 +	}
  12.437 +	return skb;
  12.438 +}
  12.439 +
  12.440 +/**
  12.441 + *	skb_peek
  12.442 + *	@list_: list to peek at
  12.443 + *
  12.444 + *	Peek an &sk_buff. Unlike most other operations you _MUST_
  12.445 + *	be careful with this one. A peek leaves the buffer on the
  12.446 + *	list and someone else may run off with it. You must hold
  12.447 + *	the appropriate locks or have a private queue to do this.
  12.448 + *
  12.449 + *	Returns %NULL for an empty list or a pointer to the head element.
  12.450 + *	The reference count is not incremented and the reference is therefore
  12.451 + *	volatile. Use with caution.
  12.452 + */
  12.453 +static inline struct sk_buff *skb_peek(struct sk_buff_head *list_)
  12.454 +{
  12.455 +	struct sk_buff *list = ((struct sk_buff *)list_)->next;
  12.456 +	if (list == (struct sk_buff *)list_)
  12.457 +		list = NULL;
  12.458 +	return list;
  12.459 +}
  12.460 +
  12.461 +/**
  12.462 + *	skb_peek_tail
  12.463 + *	@list_: list to peek at
  12.464 + *
  12.465 + *	Peek an &sk_buff. Unlike most other operations you _MUST_
  12.466 + *	be careful with this one. A peek leaves the buffer on the
  12.467 + *	list and someone else may run off with it. You must hold
  12.468 + *	the appropriate locks or have a private queue to do this.
  12.469 + *
  12.470 + *	Returns %NULL for an empty list or a pointer to the tail element.
  12.471 + *	The reference count is not incremented and the reference is therefore
  12.472 + *	volatile. Use with caution.
  12.473 + */
  12.474 +static inline struct sk_buff *skb_peek_tail(struct sk_buff_head *list_)
  12.475 +{
  12.476 +	struct sk_buff *list = ((struct sk_buff *)list_)->prev;
  12.477 +	if (list == (struct sk_buff *)list_)
  12.478 +		list = NULL;
  12.479 +	return list;
  12.480 +}
  12.481 +
  12.482 +/**
  12.483 + *	skb_queue_len	- get queue length
  12.484 + *	@list_: list to measure
  12.485 + *
  12.486 + *	Return the length of an &sk_buff queue.
  12.487 + */
  12.488 +static inline __u32 skb_queue_len(const struct sk_buff_head *list_)
  12.489 +{
  12.490 +	return list_->qlen;
  12.491 +}
  12.492 +
  12.493 +static inline void skb_queue_head_init(struct sk_buff_head *list)
  12.494 +{
  12.495 +	spin_lock_init(&list->lock);
  12.496 +	list->prev = list->next = (struct sk_buff *)list;
  12.497 +	list->qlen = 0;
  12.498 +}
  12.499 +
  12.500 +/*
  12.501 + *	Insert an sk_buff at the start of a list.
  12.502 + *
  12.503 + *	The "__skb_xxxx()" functions are the non-atomic ones that
  12.504 + *	can only be called with interrupts disabled.
  12.505 + */
  12.506 +
  12.507 +/**
  12.508 + *	__skb_queue_head - queue a buffer at the list head
  12.509 + *	@list: list to use
  12.510 + *	@newsk: buffer to queue
  12.511 + *
  12.512 + *	Queue a buffer at the start of a list. This function takes no locks
  12.513 + *	and you must therefore hold required locks before calling it.
  12.514 + *
  12.515 + *	A buffer cannot be placed on two lists at the same time.
  12.516 + */
  12.517 +extern void skb_queue_head(struct sk_buff_head *list, struct sk_buff *newsk);
  12.518 +static inline void __skb_queue_head(struct sk_buff_head *list,
  12.519 +				    struct sk_buff *newsk)
  12.520 +{
  12.521 +	struct sk_buff *prev, *next;
  12.522 +
  12.523 +	newsk->list = list;
  12.524 +	list->qlen++;
  12.525 +	prev = (struct sk_buff *)list;
  12.526 +	next = prev->next;
  12.527 +	newsk->next = next;
  12.528 +	newsk->prev = prev;
  12.529 +	next->prev  = prev->next = newsk;
  12.530 +}
  12.531 +
  12.532 +/**
  12.533 + *	__skb_queue_tail - queue a buffer at the list tail
  12.534 + *	@list: list to use
  12.535 + *	@newsk: buffer to queue
  12.536 + *
  12.537 + *	Queue a buffer at the end of a list. This function takes no locks
  12.538 + *	and you must therefore hold required locks before calling it.
  12.539 + *
  12.540 + *	A buffer cannot be placed on two lists at the same time.
  12.541 + */
  12.542 +extern void skb_queue_tail(struct sk_buff_head *list, struct sk_buff *newsk);
  12.543 +static inline void __skb_queue_tail(struct sk_buff_head *list,
  12.544 +				   struct sk_buff *newsk)
  12.545 +{
  12.546 +	struct sk_buff *prev, *next;
  12.547 +
  12.548 +	newsk->list = list;
  12.549 +	list->qlen++;
  12.550 +	next = (struct sk_buff *)list;
  12.551 +	prev = next->prev;
  12.552 +	newsk->next = next;
  12.553 +	newsk->prev = prev;
  12.554 +	next->prev  = prev->next = newsk;
  12.555 +}
  12.556 +
  12.557 +
  12.558 +/**
  12.559 + *	__skb_dequeue - remove from the head of the queue
  12.560 + *	@list: list to dequeue from
  12.561 + *
  12.562 + *	Remove the head of the list. This function does not take any locks
  12.563 + *	so must be used with appropriate locks held only. The head item is
  12.564 + *	returned or %NULL if the list is empty.
  12.565 + */
  12.566 +extern struct sk_buff *skb_dequeue(struct sk_buff_head *list);
  12.567 +static inline struct sk_buff *__skb_dequeue(struct sk_buff_head *list)
  12.568 +{
  12.569 +	struct sk_buff *next, *prev, *result;
  12.570 +
  12.571 +	prev = (struct sk_buff *) list;
  12.572 +	next = prev->next;
  12.573 +	result = NULL;
  12.574 +	if (next != prev) {
  12.575 +		result	     = next;
  12.576 +		next	     = next->next;
  12.577 +		list->qlen--;
  12.578 +		next->prev   = prev;
  12.579 +		prev->next   = next;
  12.580 +		result->next = result->prev = NULL;
  12.581 +		result->list = NULL;
  12.582 +	}
  12.583 +	return result;
  12.584 +}
  12.585 +
  12.586 +
  12.587 +/*
  12.588 + *	Insert a packet on a list.
  12.589 + */
  12.590 +extern void        skb_insert(struct sk_buff *old, struct sk_buff *newsk);
  12.591 +static inline void __skb_insert(struct sk_buff *newsk,
  12.592 +				struct sk_buff *prev, struct sk_buff *next,
  12.593 +				struct sk_buff_head *list)
  12.594 +{
  12.595 +	newsk->next = next;
  12.596 +	newsk->prev = prev;
  12.597 +	next->prev  = prev->next = newsk;
  12.598 +	newsk->list = list;
  12.599 +	list->qlen++;
  12.600 +}
  12.601 +
  12.602 +/*
  12.603 + *	Place a packet after a given packet in a list.
  12.604 + */
  12.605 +extern void	   skb_append(struct sk_buff *old, struct sk_buff *newsk);
  12.606 +static inline void __skb_append(struct sk_buff *old, struct sk_buff *newsk)
  12.607 +{
  12.608 +	__skb_insert(newsk, old, old->next, old->list);
  12.609 +}
  12.610 +
  12.611 +/*
  12.612 + * remove sk_buff from list. _Must_ be called atomically, and with
  12.613 + * the list known..
  12.614 + */
  12.615 +extern void	   skb_unlink(struct sk_buff *skb);
  12.616 +static inline void __skb_unlink(struct sk_buff *skb, struct sk_buff_head *list)
  12.617 +{
  12.618 +	struct sk_buff *next, *prev;
  12.619 +
  12.620 +	list->qlen--;
  12.621 +	next	   = skb->next;
  12.622 +	prev	   = skb->prev;
  12.623 +	skb->next  = skb->prev = NULL;
  12.624 +	skb->list  = NULL;
  12.625 +	next->prev = prev;
  12.626 +	prev->next = next;
  12.627 +}
  12.628 +
  12.629 +
  12.630 +/* XXX: more streamlined implementation */
  12.631 +
  12.632 +/**
  12.633 + *	__skb_dequeue_tail - remove from the tail of the queue
  12.634 + *	@list: list to dequeue from
  12.635 + *
  12.636 + *	Remove the tail of the list. This function does not take any locks
  12.637 + *	so must be used with appropriate locks held only. The tail item is
  12.638 + *	returned or %NULL if the list is empty.
  12.639 + */
  12.640 +extern struct sk_buff *skb_dequeue_tail(struct sk_buff_head *list);
  12.641 +static inline struct sk_buff *__skb_dequeue_tail(struct sk_buff_head *list)
  12.642 +{
  12.643 +	struct sk_buff *skb = skb_peek_tail(list);
  12.644 +	if (skb)
  12.645 +		__skb_unlink(skb, list);
  12.646 +	return skb;
  12.647 +}
  12.648 +
  12.649 +
  12.650 +static inline int skb_is_nonlinear(const struct sk_buff *skb)
  12.651 +{
  12.652 +	return skb->data_len;
  12.653 +}
  12.654 +
  12.655 +static inline unsigned int skb_headlen(const struct sk_buff *skb)
  12.656 +{
  12.657 +	return skb->len - skb->data_len;
  12.658 +}
  12.659 +
  12.660 +static inline int skb_pagelen(const struct sk_buff *skb)
  12.661 +{
  12.662 +	int i, len = 0;
  12.663 +
  12.664 +	for (i = (int)skb_shinfo(skb)->nr_frags - 1; i >= 0; i--)
  12.665 +		len += skb_shinfo(skb)->frags[i].size;
  12.666 +	return len + skb_headlen(skb);
  12.667 +}
  12.668 +
  12.669 +static inline void skb_fill_page_desc(struct sk_buff *skb, int i, struct page *page, int off, int size)
  12.670 +{
  12.671 +	skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
  12.672 +	frag->page = page;
  12.673 +	frag->page_offset = off;
  12.674 +	frag->size = size;
  12.675 +	skb_shinfo(skb)->nr_frags = i+1;
  12.676 +}
  12.677 +
  12.678 +#define SKB_PAGE_ASSERT(skb) 	BUG_ON(skb_shinfo(skb)->nr_frags)
  12.679 +#define SKB_FRAG_ASSERT(skb) 	BUG_ON(skb_shinfo(skb)->frag_list)
  12.680 +#define SKB_LINEAR_ASSERT(skb)  BUG_ON(skb_is_nonlinear(skb))
  12.681 +
  12.682 +/*
  12.683 + *	Add data to an sk_buff
  12.684 + */
  12.685 +static inline unsigned char *__skb_put(struct sk_buff *skb, unsigned int len)
  12.686 +{
  12.687 +	unsigned char *tmp = skb->tail;
  12.688 +	SKB_LINEAR_ASSERT(skb);
  12.689 +	skb->tail += len;
  12.690 +	skb->len  += len;
  12.691 +	return tmp;
  12.692 +}
  12.693 +
  12.694 +/**
  12.695 + *	skb_put - add data to a buffer
  12.696 + *	@skb: buffer to use
  12.697 + *	@len: amount of data to add
  12.698 + *
  12.699 + *	This function extends the used data area of the buffer. If this would
  12.700 + *	exceed the total buffer size the kernel will panic. A pointer to the
  12.701 + *	first byte of the extra data is returned.
  12.702 + */
  12.703 +static inline unsigned char *skb_put(struct sk_buff *skb, unsigned int len)
  12.704 +{
  12.705 +	unsigned char *tmp = skb->tail;
  12.706 +	SKB_LINEAR_ASSERT(skb);
  12.707 +	skb->tail += len;
  12.708 +	skb->len  += len;
  12.709 +	if (unlikely(skb->tail>skb->end))
  12.710 +		skb_over_panic(skb, len, current_text_addr());
  12.711 +	return tmp;
  12.712 +}
  12.713 +
  12.714 +static inline unsigned char *__skb_push(struct sk_buff *skb, unsigned int len)
  12.715 +{
  12.716 +	skb->data -= len;
  12.717 +	skb->len  += len;
  12.718 +	return skb->data;
  12.719 +}
  12.720 +
  12.721 +/**
  12.722 + *	skb_push - add data to the start of a buffer
  12.723 + *	@skb: buffer to use
  12.724 + *	@len: amount of data to add
  12.725 + *
  12.726 + *	This function extends the used data area of the buffer at the buffer
  12.727 + *	start. If this would exceed the total buffer headroom the kernel will
  12.728 + *	panic. A pointer to the first byte of the extra data is returned.
  12.729 + */
  12.730 +static inline unsigned char *skb_push(struct sk_buff *skb, unsigned int len)
  12.731 +{
  12.732 +	skb->data -= len;
  12.733 +	skb->len  += len;
  12.734 +	if (unlikely(skb->data<skb->head))
  12.735 +		skb_under_panic(skb, len, current_text_addr());
  12.736 +	return skb->data;
  12.737 +}
  12.738 +
  12.739 +static inline unsigned char *__skb_pull(struct sk_buff *skb, unsigned int len)
  12.740 +{
  12.741 +	skb->len -= len;
  12.742 +	BUG_ON(skb->len < skb->data_len);
  12.743 +	return skb->data += len;
  12.744 +}
  12.745 +
  12.746 +/**
  12.747 + *	skb_pull - remove data from the start of a buffer
  12.748 + *	@skb: buffer to use
  12.749 + *	@len: amount of data to remove
  12.750 + *
  12.751 + *	This function removes data from the start of a buffer, returning
  12.752 + *	the memory to the headroom. A pointer to the next data in the buffer
  12.753 + *	is returned. Once the data has been pulled future pushes will overwrite
  12.754 + *	the old data.
  12.755 + */
  12.756 +static inline unsigned char *skb_pull(struct sk_buff *skb, unsigned int len)
  12.757 +{
  12.758 +	return unlikely(len > skb->len) ? NULL : __skb_pull(skb, len);
  12.759 +}
  12.760 +
  12.761 +extern unsigned char *__pskb_pull_tail(struct sk_buff *skb, int delta);
  12.762 +
  12.763 +static inline unsigned char *__pskb_pull(struct sk_buff *skb, unsigned int len)
  12.764 +{
  12.765 +	if (len > skb_headlen(skb) &&
  12.766 +	    !__pskb_pull_tail(skb, len-skb_headlen(skb)))
  12.767 +		return NULL;
  12.768 +	skb->len -= len;
  12.769 +	return skb->data += len;
  12.770 +}
  12.771 +
  12.772 +static inline unsigned char *pskb_pull(struct sk_buff *skb, unsigned int len)
  12.773 +{
  12.774 +	return unlikely(len > skb->len) ? NULL : __pskb_pull(skb, len);
  12.775 +}
  12.776 +
  12.777 +static inline int pskb_may_pull(struct sk_buff *skb, unsigned int len)
  12.778 +{
  12.779 +	if (likely(len <= skb_headlen(skb)))
  12.780 +		return 1;
  12.781 +	if (unlikely(len > skb->len))
  12.782 +		return 0;
  12.783 +	return __pskb_pull_tail(skb, len-skb_headlen(skb)) != NULL;
  12.784 +}
  12.785 +
  12.786 +/**
  12.787 + *	skb_headroom - bytes at buffer head
  12.788 + *	@skb: buffer to check
  12.789 + *
  12.790 + *	Return the number of bytes of free space at the head of an &sk_buff.
  12.791 + */
  12.792 +static inline int skb_headroom(const struct sk_buff *skb)
  12.793 +{
  12.794 +	return skb->data - skb->head;
  12.795 +}
  12.796 +
  12.797 +/**
  12.798 + *	skb_tailroom - bytes at buffer end
  12.799 + *	@skb: buffer to check
  12.800 + *
  12.801 + *	Return the number of bytes of free space at the tail of an sk_buff
  12.802 + */
  12.803 +static inline int skb_tailroom(const struct sk_buff *skb)
  12.804 +{
  12.805 +	return skb_is_nonlinear(skb) ? 0 : skb->end - skb->tail;
  12.806 +}
  12.807 +
  12.808 +/**
  12.809 + *	skb_reserve - adjust headroom
  12.810 + *	@skb: buffer to alter
  12.811 + *	@len: bytes to move
  12.812 + *
  12.813 + *	Increase the headroom of an empty &sk_buff by reducing the tail
  12.814 + *	room. This is only allowed for an empty buffer.
  12.815 + */
  12.816 +static inline void skb_reserve(struct sk_buff *skb, unsigned int len)
  12.817 +{
  12.818 +	skb->data += len;
  12.819 +	skb->tail += len;
  12.820 +}
  12.821 +
  12.822 +extern int ___pskb_trim(struct sk_buff *skb, unsigned int len, int realloc);
  12.823 +
  12.824 +static inline void __skb_trim(struct sk_buff *skb, unsigned int len)
  12.825 +{
  12.826 +	if (!skb->data_len) {
  12.827 +		skb->len  = len;
  12.828 +		skb->tail = skb->data + len;
  12.829 +	} else
  12.830 +		___pskb_trim(skb, len, 0);
  12.831 +}
  12.832 +
  12.833 +/**
  12.834 + *	skb_trim - remove end from a buffer
  12.835 + *	@skb: buffer to alter
  12.836 + *	@len: new length
  12.837 + *
  12.838 + *	Cut the length of a buffer down by removing data from the tail. If
  12.839 + *	the buffer is already under the length specified it is not modified.
  12.840 + */
  12.841 +static inline void skb_trim(struct sk_buff *skb, unsigned int len)
  12.842 +{
  12.843 +	if (skb->len > len)
  12.844 +		__skb_trim(skb, len);
  12.845 +}
  12.846 +
  12.847 +
  12.848 +static inline int __pskb_trim(struct sk_buff *skb, unsigned int len)
  12.849 +{
  12.850 +	if (!skb->data_len) {
  12.851 +		skb->len  = len;
  12.852 +		skb->tail = skb->data+len;
  12.853 +		return 0;
  12.854 +	}
  12.855 +	return ___pskb_trim(skb, len, 1);
  12.856 +}
  12.857 +
  12.858 +static inline int pskb_trim(struct sk_buff *skb, unsigned int len)
  12.859 +{
  12.860 +	return (len < skb->len) ? __pskb_trim(skb, len) : 0;
  12.861 +}
  12.862 +
  12.863 +/**
  12.864 + *	skb_orphan - orphan a buffer
  12.865 + *	@skb: buffer to orphan
  12.866 + *
  12.867 + *	If a buffer currently has an owner then we call the owner's
  12.868 + *	destructor function and make the @skb unowned. The buffer continues
  12.869 + *	to exist but is no longer charged to its former owner.
  12.870 + */
  12.871 +static inline void skb_orphan(struct sk_buff *skb)
  12.872 +{
  12.873 +	if (skb->destructor)
  12.874 +		skb->destructor(skb);
  12.875 +	skb->destructor = NULL;
  12.876 +	skb->sk		= NULL;
  12.877 +}
  12.878 +
  12.879 +/**
  12.880 + *	__skb_queue_purge - empty a list
  12.881 + *	@list: list to empty
  12.882 + *
  12.883 + *	Delete all buffers on an &sk_buff list. Each buffer is removed from
  12.884 + *	the list and one reference dropped. This function does not take the
  12.885 + *	list lock and the caller must hold the relevant locks to use it.
  12.886 + */
  12.887 +extern void skb_queue_purge(struct sk_buff_head *list);
  12.888 +static inline void __skb_queue_purge(struct sk_buff_head *list)
  12.889 +{
  12.890 +	struct sk_buff *skb;
  12.891 +	while ((skb = __skb_dequeue(list)) != NULL)
  12.892 +		kfree_skb(skb);
  12.893 +}
  12.894 +
  12.895 +/**
  12.896 + *	__dev_alloc_skb - allocate an skbuff for sending
  12.897 + *	@length: length to allocate
  12.898 + *	@gfp_mask: get_free_pages mask, passed to alloc_skb
  12.899 + *
  12.900 + *	Allocate a new &sk_buff and assign it a usage count of one. The
  12.901 + *	buffer has unspecified headroom built in. Users should allocate
  12.902 + *	the headroom they think they need without accounting for the
  12.903 + *	built in space. The built in space is used for optimisations.
  12.904 + *
  12.905 + *	%NULL is returned in there is no free memory.
  12.906 + */
  12.907 +static inline struct sk_buff *__dev_alloc_skb(unsigned int length,
  12.908 +					      int gfp_mask)
  12.909 +{
  12.910 +	struct sk_buff *skb;
  12.911 +#ifdef CONFIG_PAGESIZED_SKBS
  12.912 +	length = max(length, (unsigned int)(PAGE_SIZE - 16));
  12.913 +#endif
  12.914 +	skb = alloc_skb(length + 16, gfp_mask);
  12.915 +	if (likely(skb))
  12.916 +		skb_reserve(skb, 16);
  12.917 +	return skb;
  12.918 +}
  12.919 +
  12.920 +/**
  12.921 + *	dev_alloc_skb - allocate an skbuff for sending
  12.922 + *	@length: length to allocate
  12.923 + *
  12.924 + *	Allocate a new &sk_buff and assign it a usage count of one. The
  12.925 + *	buffer has unspecified headroom built in. Users should allocate
  12.926 + *	the headroom they think they need without accounting for the
  12.927 + *	built in space. The built in space is used for optimisations.
  12.928 + *
  12.929 + *	%NULL is returned in there is no free memory. Although this function
  12.930 + *	allocates memory it can be called from an interrupt.
  12.931 + */
  12.932 +static inline struct sk_buff *dev_alloc_skb(unsigned int length)
  12.933 +{
  12.934 +	return __dev_alloc_skb(length, GFP_ATOMIC);
  12.935 +}
  12.936 +
  12.937 +/**
  12.938 + *	skb_cow - copy header of skb when it is required
  12.939 + *	@skb: buffer to cow
  12.940 + *	@headroom: needed headroom
  12.941 + *
  12.942 + *	If the skb passed lacks sufficient headroom or its data part
  12.943 + *	is shared, data is reallocated. If reallocation fails, an error
  12.944 + *	is returned and original skb is not changed.
  12.945 + *
  12.946 + *	The result is skb with writable area skb->head...skb->tail
  12.947 + *	and at least @headroom of space at head.
  12.948 + */
  12.949 +static inline int skb_cow(struct sk_buff *skb, unsigned int headroom)
  12.950 +{
  12.951 +	int delta = (headroom > 16 ? headroom : 16) - skb_headroom(skb);
  12.952 +
  12.953 +	if (delta < 0)
  12.954 +		delta = 0;
  12.955 +
  12.956 +	if (delta || skb_cloned(skb))
  12.957 +		return pskb_expand_head(skb, (delta + 15) & ~15, 0, GFP_ATOMIC);
  12.958 +	return 0;
  12.959 +}
  12.960 +
  12.961 +/**
  12.962 + *	skb_padto	- pad an skbuff up to a minimal size
  12.963 + *	@skb: buffer to pad
  12.964 + *	@len: minimal length
  12.965 + *
  12.966 + *	Pads up a buffer to ensure the trailing bytes exist and are
  12.967 + *	blanked. If the buffer already contains sufficient data it
  12.968 + *	is untouched. Returns the buffer, which may be a replacement
  12.969 + *	for the original, or NULL for out of memory - in which case
  12.970 + *	the original buffer is still freed.
  12.971 + */
  12.972 + 
  12.973 +static inline struct sk_buff *skb_padto(struct sk_buff *skb, unsigned int len)
  12.974 +{
  12.975 +	unsigned int size = skb->len;
  12.976 +	if (likely(size >= len))
  12.977 +		return skb;
  12.978 +	return skb_pad(skb, len-size);
  12.979 +}
  12.980 +
  12.981 +/**
  12.982 + *	skb_linearize - convert paged skb to linear one
  12.983 + *	@skb: buffer to linarize
  12.984 + *	@gfp: allocation mode
  12.985 + *
  12.986 + *	If there is no free memory -ENOMEM is returned, otherwise zero
  12.987 + *	is returned and the old skb data released.
  12.988 + */
  12.989 +extern int __skb_linearize(struct sk_buff *skb, int gfp);
  12.990 +static inline int skb_linearize(struct sk_buff *skb, int gfp)
  12.991 +{
  12.992 +	return __skb_linearize(skb, gfp);
  12.993 +}
  12.994 +
  12.995 +static inline void *kmap_skb_frag(const skb_frag_t *frag)
  12.996 +{
  12.997 +#ifdef CONFIG_HIGHMEM
  12.998 +	BUG_ON(in_irq());
  12.999 +
 12.1000 +	local_bh_disable();
 12.1001 +#endif
 12.1002 +	return kmap_atomic(frag->page, KM_SKB_DATA_SOFTIRQ);
 12.1003 +}
 12.1004 +
 12.1005 +static inline void kunmap_skb_frag(void *vaddr)
 12.1006 +{
 12.1007 +	kunmap_atomic(vaddr, KM_SKB_DATA_SOFTIRQ);
 12.1008 +#ifdef CONFIG_HIGHMEM
 12.1009 +	local_bh_enable();
 12.1010 +#endif
 12.1011 +}
 12.1012 +
 12.1013 +#define skb_queue_walk(queue, skb) \
 12.1014 +		for (skb = (queue)->next, prefetch(skb->next);	\
 12.1015 +		     (skb != (struct sk_buff *)(queue));	\
 12.1016 +		     skb = skb->next, prefetch(skb->next))
 12.1017 +
 12.1018 +
 12.1019 +extern struct sk_buff *skb_recv_datagram(struct sock *sk, unsigned flags,
 12.1020 +					 int noblock, int *err);
 12.1021 +extern unsigned int    datagram_poll(struct file *file, struct socket *sock,
 12.1022 +				     struct poll_table_struct *wait);
 12.1023 +extern int	       skb_copy_datagram(const struct sk_buff *from,
 12.1024 +					 int offset, char __user *to, int size);
 12.1025 +extern int	       skb_copy_datagram_iovec(const struct sk_buff *from,
 12.1026 +					       int offset, struct iovec *to,
 12.1027 +					       int size);
 12.1028 +extern int	       skb_copy_and_csum_datagram(const struct sk_buff *skb,
 12.1029 +						  int offset, u8 __user *to,
 12.1030 +						  int len, unsigned int *csump);
 12.1031 +extern int	       skb_copy_and_csum_datagram_iovec(const
 12.1032 +							struct sk_buff *skb,
 12.1033 +							int hlen,
 12.1034 +							struct iovec *iov);
 12.1035 +extern void	       skb_free_datagram(struct sock *sk, struct sk_buff *skb);
 12.1036 +extern unsigned int    skb_checksum(const struct sk_buff *skb, int offset,
 12.1037 +				    int len, unsigned int csum);
 12.1038 +extern int	       skb_copy_bits(const struct sk_buff *skb, int offset,
 12.1039 +				     void *to, int len);
 12.1040 +extern unsigned int    skb_copy_and_csum_bits(const struct sk_buff *skb,
 12.1041 +					      int offset, u8 *to, int len,
 12.1042 +					      unsigned int csum);
 12.1043 +extern void	       skb_copy_and_csum_dev(const struct sk_buff *skb, u8 *to);
 12.1044 +
 12.1045 +extern void skb_init(void);
 12.1046 +extern void skb_add_mtu(int mtu);
 12.1047 +
 12.1048 +#ifdef CONFIG_NETFILTER
 12.1049 +static inline void nf_conntrack_put(struct nf_ct_info *nfct)
 12.1050 +{
 12.1051 +	if (nfct && atomic_dec_and_test(&nfct->master->use))
 12.1052 +		nfct->master->destroy(nfct->master);
 12.1053 +}
 12.1054 +static inline void nf_conntrack_get(struct nf_ct_info *nfct)
 12.1055 +{
 12.1056 +	if (nfct)
 12.1057 +		atomic_inc(&nfct->master->use);
 12.1058 +}
 12.1059 +
 12.1060 +#ifdef CONFIG_BRIDGE_NETFILTER
 12.1061 +static inline void nf_bridge_put(struct nf_bridge_info *nf_bridge)
 12.1062 +{
 12.1063 +	if (nf_bridge && atomic_dec_and_test(&nf_bridge->use))
 12.1064 +		kfree(nf_bridge);
 12.1065 +}
 12.1066 +static inline void nf_bridge_get(struct nf_bridge_info *nf_bridge)
 12.1067 +{
 12.1068 +	if (nf_bridge)
 12.1069 +		atomic_inc(&nf_bridge->use);
 12.1070 +}
 12.1071 +#endif
 12.1072 +
 12.1073 +#endif
 12.1074 +
 12.1075 +#endif	/* __KERNEL__ */
 12.1076 +#endif	/* _LINUX_SKBUFF_H */
    13.1 --- a/linux-2.6.7-xen-sparse/mm/page_alloc.c	Thu Aug 19 17:17:18 2004 +0000
    13.2 +++ b/linux-2.6.7-xen-sparse/mm/page_alloc.c	Thu Aug 19 18:50:23 2004 +0000
    13.3 @@ -497,9 +497,8 @@ static void fastcall free_hot_cold_page(
    13.4  	struct per_cpu_pages *pcp;
    13.5  	unsigned long flags;
    13.6  
    13.7 -	/* XXX Xen: use mapping pointer as skb/data-page destructor */
    13.8 -	if (page->mapping)
    13.9 -		return (*(void(*)(struct page *))page->mapping)(page);
   13.10 +	if (PageForeign(page))
   13.11 +		return (PageForeignDestructor(page))(page);
   13.12  
   13.13  	kernel_map_pages(page, 1, 0);
   13.14  	inc_page_state(pgfree);