ia64/xen-unstable

changeset 2306:0f47aec8946e

bitkeeper revision 1.1159.45.3 (4124f6c9BN9jHyHMznjiaS-Qw12Rtw)

Merge labyrinth.cl.cam.ac.uk:/auto/groups/xeno/BK/xeno.bk
into labyrinth.cl.cam.ac.uk:/auto/anfs/scratch/labyrinth/iap10/xeno-clone/xeno.bk
author iap10@labyrinth.cl.cam.ac.uk
date Thu Aug 19 18:51:53 2004 +0000 (2004-08-19)
parents f8ccc0daf252 cbbe40349d37
children ca553f0e10ea
files .rootkeys linux-2.4.26-xen-sparse/arch/xen/config.in linux-2.4.26-xen-sparse/arch/xen/defconfig-xen0 linux-2.4.26-xen-sparse/arch/xen/defconfig-xenU linux-2.4.26-xen-sparse/include/linux/mm.h linux-2.4.26-xen-sparse/mm/page_alloc.c linux-2.6.7-xen-sparse/arch/xen/Kconfig linux-2.6.7-xen-sparse/arch/xen/configs/xen0_defconfig linux-2.6.7-xen-sparse/arch/xen/configs/xenU_defconfig linux-2.6.7-xen-sparse/drivers/xen/blkback/blkback.c linux-2.6.7-xen-sparse/drivers/xen/netback/netback.c linux-2.6.7-xen-sparse/include/asm-xen/asm-i386/io.h linux-2.6.7-xen-sparse/include/linux/bio.h linux-2.6.7-xen-sparse/include/linux/page-flags.h linux-2.6.7-xen-sparse/include/linux/skbuff.h linux-2.6.7-xen-sparse/mm/page_alloc.c
line diff
     1.1 --- a/.rootkeys	Thu Aug 19 16:09:39 2004 +0000
     1.2 +++ b/.rootkeys	Thu Aug 19 18:51:53 2004 +0000
     1.3 @@ -111,6 +111,7 @@ 3e5a4e68mTr0zcp9SXDbnd-XLrrfxw linux-2.4
     1.4  3f1056a9L_kqHcFheV00KbKBzv9j5w linux-2.4.26-xen-sparse/include/asm-xen/vga.h
     1.5  40659defgWA92arexpMGn8X3QMDj3w linux-2.4.26-xen-sparse/include/asm-xen/xor.h
     1.6  3f056927gMHl7mWB89rb73JahbhQIA linux-2.4.26-xen-sparse/include/linux/blk.h
     1.7 +4124f66fPHG6yvB_vXmesjvzrJ3yMg linux-2.4.26-xen-sparse/include/linux/mm.h
     1.8  401c0590D_kwJDU59X8NyvqSv_Cl2A linux-2.4.26-xen-sparse/include/linux/sched.h
     1.9  40a248afgI0_JKthdYAe8beVfXSTpQ linux-2.4.26-xen-sparse/include/linux/skbuff.h
    1.10  401c0592pLrp_aCbQRo9GXiYQQaVVA linux-2.4.26-xen-sparse/include/linux/timer.h
    1.11 @@ -242,6 +243,9 @@ 3f108af1ylCIm82H052FVTfXACBHrw linux-2.6
    1.12  4122466356eIBnC9ot44WSVVIFyhQA linux-2.6.7-xen-sparse/include/asm-xen/queues.h
    1.13  3fa8e3f0kBLeE4To2vpdi3cpJbIkbQ linux-2.6.7-xen-sparse/include/asm-xen/suspend.h
    1.14  3f689063BoW-HWV3auUJ-OqXfcGArw linux-2.6.7-xen-sparse/include/asm-xen/xen_proc.h
    1.15 +4124d8c4aocX7A-jIbuGraWN84pxGQ linux-2.6.7-xen-sparse/include/linux/bio.h
    1.16 +4124f66fp5QwbDHEfoUIa7pqO5Xhag linux-2.6.7-xen-sparse/include/linux/page-flags.h
    1.17 +4124f66f4NaKNa0xPiGGykn9QaZk3w linux-2.6.7-xen-sparse/include/linux/skbuff.h
    1.18  40f56a0ddHCSs3501MY4hRf22tctOw linux-2.6.7-xen-sparse/mkbuildtree
    1.19  410a94a4KT6I6X0LVc7djB39tRDp4g linux-2.6.7-xen-sparse/mm/page_alloc.c
    1.20  40e1b09db5mN69Ijj0X_Eol-S7dXiw tools/Make.defs
     2.1 --- a/linux-2.4.26-xen-sparse/arch/xen/config.in	Thu Aug 19 16:09:39 2004 +0000
     2.2 +++ b/linux-2.4.26-xen-sparse/arch/xen/config.in	Thu Aug 19 18:51:53 2004 +0000
     2.3 @@ -20,7 +20,10 @@ endmenu
     2.4  # The IBM S/390 patch needs this.
     2.5  define_bool CONFIG_NO_IDLE_HZ y
     2.6  
     2.7 -if [ "$CONFIG_XEN_PHYSDEV_ACCESS" != "y" ]; then
     2.8 +if [ "$CONFIG_XEN_PHYSDEV_ACCESS" == "y" ]; then
     2.9 +   define_bool CONFIG_FOREIGN_PAGES y
    2.10 +else
    2.11 +   define_bool CONFIG_FOREIGN_PAGES n
    2.12     define_bool CONFIG_NETDEVICES y
    2.13     define_bool CONFIG_VT n
    2.14  fi
    2.15 @@ -103,8 +106,6 @@ if [ "$CONFIG_HIGHMEM" = "y" ]; then
    2.16     bool 'HIGHMEM I/O support' CONFIG_HIGHIO
    2.17  fi
    2.18  
    2.19 -define_int CONFIG_FORCE_MAX_ZONEORDER 12
    2.20 -
    2.21  #bool 'Symmetric multi-processing support' CONFIG_SMP
    2.22  #if [ "$CONFIG_SMP" = "y" -a "$CONFIG_X86_CMPXCHG" = "y" ]; then
    2.23  #   define_bool CONFIG_HAVE_DEC_LOCK y
     3.1 --- a/linux-2.4.26-xen-sparse/arch/xen/defconfig-xen0	Thu Aug 19 16:09:39 2004 +0000
     3.2 +++ b/linux-2.4.26-xen-sparse/arch/xen/defconfig-xen0	Thu Aug 19 18:51:53 2004 +0000
     3.3 @@ -13,6 +13,7 @@ CONFIG_UID16=y
     3.4  CONFIG_XEN_PRIVILEGED_GUEST=y
     3.5  CONFIG_XEN_PHYSDEV_ACCESS=y
     3.6  CONFIG_NO_IDLE_HZ=y
     3.7 +CONFIG_FOREIGN_PAGES=y
     3.8  
     3.9  #
    3.10  # Code maturity level options
    3.11 @@ -50,7 +51,6 @@ CONFIG_X86_TSC=y
    3.12  CONFIG_X86_L1_CACHE_SHIFT=5
    3.13  CONFIG_NOHIGHMEM=y
    3.14  # CONFIG_HIGHMEM4G is not set
    3.15 -CONFIG_FORCE_MAX_ZONEORDER=12
    3.16  
    3.17  #
    3.18  # General setup
     4.1 --- a/linux-2.4.26-xen-sparse/arch/xen/defconfig-xenU	Thu Aug 19 16:09:39 2004 +0000
     4.2 +++ b/linux-2.4.26-xen-sparse/arch/xen/defconfig-xenU	Thu Aug 19 18:51:53 2004 +0000
     4.3 @@ -13,6 +13,7 @@ CONFIG_UID16=y
     4.4  # CONFIG_XEN_PRIVILEGED_GUEST is not set
     4.5  # CONFIG_XEN_PHYSDEV_ACCESS is not set
     4.6  CONFIG_NO_IDLE_HZ=y
     4.7 +# CONFIG_FOREIGN_PAGES is not set
     4.8  CONFIG_NETDEVICES=y
     4.9  # CONFIG_VT is not set
    4.10  
    4.11 @@ -52,7 +53,6 @@ CONFIG_X86_TSC=y
    4.12  CONFIG_X86_L1_CACHE_SHIFT=5
    4.13  CONFIG_NOHIGHMEM=y
    4.14  # CONFIG_HIGHMEM4G is not set
    4.15 -CONFIG_FORCE_MAX_ZONEORDER=12
    4.16  
    4.17  #
    4.18  # General setup
     5.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     5.2 +++ b/linux-2.4.26-xen-sparse/include/linux/mm.h	Thu Aug 19 18:51:53 2004 +0000
     5.3 @@ -0,0 +1,703 @@
     5.4 +#ifndef _LINUX_MM_H
     5.5 +#define _LINUX_MM_H
     5.6 +
     5.7 +#include <linux/sched.h>
     5.8 +#include <linux/errno.h>
     5.9 +
    5.10 +#ifdef __KERNEL__
    5.11 +
    5.12 +#include <linux/config.h>
    5.13 +#include <linux/string.h>
    5.14 +#include <linux/list.h>
    5.15 +#include <linux/mmzone.h>
    5.16 +#include <linux/swap.h>
    5.17 +#include <linux/rbtree.h>
    5.18 +
    5.19 +extern unsigned long max_mapnr;
    5.20 +extern unsigned long num_physpages;
    5.21 +extern unsigned long num_mappedpages;
    5.22 +extern void * high_memory;
    5.23 +extern int page_cluster;
    5.24 +/* The inactive_clean lists are per zone. */
    5.25 +extern struct list_head active_list;
    5.26 +extern struct list_head inactive_list;
    5.27 +
    5.28 +#include <asm/page.h>
    5.29 +#include <asm/pgtable.h>
    5.30 +#include <asm/atomic.h>
    5.31 +
    5.32 +/*
    5.33 + * Linux kernel virtual memory manager primitives.
    5.34 + * The idea being to have a "virtual" mm in the same way
    5.35 + * we have a virtual fs - giving a cleaner interface to the
    5.36 + * mm details, and allowing different kinds of memory mappings
    5.37 + * (from shared memory to executable loading to arbitrary
    5.38 + * mmap() functions).
    5.39 + */
    5.40 +
    5.41 +/*
    5.42 + * This struct defines a memory VMM memory area. There is one of these
    5.43 + * per VM-area/task.  A VM area is any part of the process virtual memory
    5.44 + * space that has a special rule for the page-fault handlers (ie a shared
    5.45 + * library, the executable area etc).
    5.46 + */
    5.47 +struct vm_area_struct {
    5.48 +	struct mm_struct * vm_mm;	/* The address space we belong to. */
    5.49 +	unsigned long vm_start;		/* Our start address within vm_mm. */
    5.50 +	unsigned long vm_end;		/* The first byte after our end address
    5.51 +					   within vm_mm. */
    5.52 +
    5.53 +	/* linked list of VM areas per task, sorted by address */
    5.54 +	struct vm_area_struct *vm_next;
    5.55 +
    5.56 +	pgprot_t vm_page_prot;		/* Access permissions of this VMA. */
    5.57 +	unsigned long vm_flags;		/* Flags, listed below. */
    5.58 +
    5.59 +	rb_node_t vm_rb;
    5.60 +
    5.61 +	/*
    5.62 +	 * For areas with an address space and backing store,
    5.63 +	 * one of the address_space->i_mmap{,shared} lists,
    5.64 +	 * for shm areas, the list of attaches, otherwise unused.
    5.65 +	 */
    5.66 +	struct vm_area_struct *vm_next_share;
    5.67 +	struct vm_area_struct **vm_pprev_share;
    5.68 +
    5.69 +	/* Function pointers to deal with this struct. */
    5.70 +	struct vm_operations_struct * vm_ops;
    5.71 +
    5.72 +	/* Information about our backing store: */
    5.73 +	unsigned long vm_pgoff;		/* Offset (within vm_file) in PAGE_SIZE
    5.74 +					   units, *not* PAGE_CACHE_SIZE */
    5.75 +	struct file * vm_file;		/* File we map to (can be NULL). */
    5.76 +	unsigned long vm_raend;		/* XXX: put full readahead info here. */
    5.77 +	void * vm_private_data;		/* was vm_pte (shared mem) */
    5.78 +};
    5.79 +
    5.80 +/*
    5.81 + * vm_flags..
    5.82 + */
    5.83 +#define VM_READ		0x00000001	/* currently active flags */
    5.84 +#define VM_WRITE	0x00000002
    5.85 +#define VM_EXEC		0x00000004
    5.86 +#define VM_SHARED	0x00000008
    5.87 +
    5.88 +#define VM_MAYREAD	0x00000010	/* limits for mprotect() etc */
    5.89 +#define VM_MAYWRITE	0x00000020
    5.90 +#define VM_MAYEXEC	0x00000040
    5.91 +#define VM_MAYSHARE	0x00000080
    5.92 +
    5.93 +#define VM_GROWSDOWN	0x00000100	/* general info on the segment */
    5.94 +#define VM_GROWSUP	0x00000200
    5.95 +#define VM_SHM		0x00000400	/* shared memory area, don't swap out */
    5.96 +#define VM_DENYWRITE	0x00000800	/* ETXTBSY on write attempts.. */
    5.97 +
    5.98 +#define VM_EXECUTABLE	0x00001000
    5.99 +#define VM_LOCKED	0x00002000
   5.100 +#define VM_IO           0x00004000	/* Memory mapped I/O or similar */
   5.101 +
   5.102 +					/* Used by sys_madvise() */
   5.103 +#define VM_SEQ_READ	0x00008000	/* App will access data sequentially */
   5.104 +#define VM_RAND_READ	0x00010000	/* App will not benefit from clustered reads */
   5.105 +
   5.106 +#define VM_DONTCOPY	0x00020000      /* Do not copy this vma on fork */
   5.107 +#define VM_DONTEXPAND	0x00040000	/* Cannot expand with mremap() */
   5.108 +#define VM_RESERVED	0x00080000	/* Don't unmap it from swap_out */
   5.109 +
   5.110 +#ifndef VM_STACK_FLAGS
   5.111 +#define VM_STACK_FLAGS	0x00000177
   5.112 +#endif
   5.113 +
   5.114 +#define VM_READHINTMASK			(VM_SEQ_READ | VM_RAND_READ)
   5.115 +#define VM_ClearReadHint(v)		(v)->vm_flags &= ~VM_READHINTMASK
   5.116 +#define VM_NormalReadHint(v)		(!((v)->vm_flags & VM_READHINTMASK))
   5.117 +#define VM_SequentialReadHint(v)	((v)->vm_flags & VM_SEQ_READ)
   5.118 +#define VM_RandomReadHint(v)		((v)->vm_flags & VM_RAND_READ)
   5.119 +
   5.120 +/* read ahead limits */
   5.121 +extern int vm_min_readahead;
   5.122 +extern int vm_max_readahead;
   5.123 +
   5.124 +/*
   5.125 + * mapping from the currently active vm_flags protection bits (the
   5.126 + * low four bits) to a page protection mask..
   5.127 + */
   5.128 +extern pgprot_t protection_map[16];
   5.129 +
   5.130 +
   5.131 +/*
   5.132 + * These are the virtual MM functions - opening of an area, closing and
   5.133 + * unmapping it (needed to keep files on disk up-to-date etc), pointer
   5.134 + * to the functions called when a no-page or a wp-page exception occurs. 
   5.135 + */
   5.136 +struct vm_operations_struct {
   5.137 +	void (*open)(struct vm_area_struct * area);
   5.138 +	void (*close)(struct vm_area_struct * area);
   5.139 +	struct page * (*nopage)(struct vm_area_struct * area, unsigned long address, int unused);
   5.140 +};
   5.141 +
   5.142 +/*
   5.143 + * Each physical page in the system has a struct page associated with
   5.144 + * it to keep track of whatever it is we are using the page for at the
   5.145 + * moment. Note that we have no way to track which tasks are using
   5.146 + * a page.
   5.147 + *
   5.148 + * Try to keep the most commonly accessed fields in single cache lines
   5.149 + * here (16 bytes or greater).  This ordering should be particularly
   5.150 + * beneficial on 32-bit processors.
   5.151 + *
   5.152 + * The first line is data used in page cache lookup, the second line
   5.153 + * is used for linear searches (eg. clock algorithm scans). 
   5.154 + *
   5.155 + * TODO: make this structure smaller, it could be as small as 32 bytes.
   5.156 + */
   5.157 +typedef struct page {
   5.158 +	struct list_head list;		/* ->mapping has some page lists. */
   5.159 +	struct address_space *mapping;	/* The inode (or ...) we belong to. */
   5.160 +	unsigned long index;		/* Our offset within mapping. */
   5.161 +	struct page *next_hash;		/* Next page sharing our hash bucket in
   5.162 +					   the pagecache hash table. */
   5.163 +	atomic_t count;			/* Usage count, see below. */
   5.164 +	unsigned long flags;		/* atomic flags, some possibly
   5.165 +					   updated asynchronously */
   5.166 +	struct list_head lru;		/* Pageout list, eg. active_list;
   5.167 +					   protected by pagemap_lru_lock !! */
   5.168 +	struct page **pprev_hash;	/* Complement to *next_hash. */
   5.169 +	struct buffer_head * buffers;	/* Buffer maps us to a disk block. */
   5.170 +
   5.171 +	/*
   5.172 +	 * On machines where all RAM is mapped into kernel address space,
   5.173 +	 * we can simply calculate the virtual address. On machines with
   5.174 +	 * highmem some memory is mapped into kernel virtual memory
   5.175 +	 * dynamically, so we need a place to store that address.
   5.176 +	 * Note that this field could be 16 bits on x86 ... ;)
   5.177 +	 *
   5.178 +	 * Architectures with slow multiplication can define
   5.179 +	 * WANT_PAGE_VIRTUAL in asm/page.h
   5.180 +	 */
   5.181 +#if defined(CONFIG_HIGHMEM) || defined(WANT_PAGE_VIRTUAL)
   5.182 +	void *virtual;			/* Kernel virtual address (NULL if
   5.183 +					   not kmapped, ie. highmem) */
   5.184 +#endif /* CONFIG_HIGMEM || WANT_PAGE_VIRTUAL */
   5.185 +} mem_map_t;
   5.186 +
   5.187 +/*
   5.188 + * Methods to modify the page usage count.
   5.189 + *
   5.190 + * What counts for a page usage:
   5.191 + * - cache mapping   (page->mapping)
   5.192 + * - disk mapping    (page->buffers)
   5.193 + * - page mapped in a task's page tables, each mapping
   5.194 + *   is counted separately
   5.195 + *
   5.196 + * Also, many kernel routines increase the page count before a critical
   5.197 + * routine so they can be sure the page doesn't go away from under them.
   5.198 + */
   5.199 +#define get_page(p)		atomic_inc(&(p)->count)
   5.200 +#define put_page(p)		__free_page(p)
   5.201 +#define put_page_testzero(p) 	atomic_dec_and_test(&(p)->count)
   5.202 +#define page_count(p)		atomic_read(&(p)->count)
   5.203 +#define set_page_count(p,v) 	atomic_set(&(p)->count, v)
   5.204 +
   5.205 +/*
   5.206 + * Various page->flags bits:
   5.207 + *
   5.208 + * PG_reserved is set for special pages, which can never be swapped
   5.209 + * out. Some of them might not even exist (eg empty_bad_page)...
   5.210 + *
   5.211 + * Multiple processes may "see" the same page. E.g. for untouched
   5.212 + * mappings of /dev/null, all processes see the same page full of
   5.213 + * zeroes, and text pages of executables and shared libraries have
   5.214 + * only one copy in memory, at most, normally.
   5.215 + *
   5.216 + * For the non-reserved pages, page->count denotes a reference count.
   5.217 + *   page->count == 0 means the page is free.
   5.218 + *   page->count == 1 means the page is used for exactly one purpose
   5.219 + *   (e.g. a private data page of one process).
   5.220 + *
   5.221 + * A page may be used for kmalloc() or anyone else who does a
   5.222 + * __get_free_page(). In this case the page->count is at least 1, and
   5.223 + * all other fields are unused but should be 0 or NULL. The
   5.224 + * management of this page is the responsibility of the one who uses
   5.225 + * it.
   5.226 + *
   5.227 + * The other pages (we may call them "process pages") are completely
   5.228 + * managed by the Linux memory manager: I/O, buffers, swapping etc.
   5.229 + * The following discussion applies only to them.
   5.230 + *
   5.231 + * A page may belong to an inode's memory mapping. In this case,
   5.232 + * page->mapping is the pointer to the inode, and page->index is the
   5.233 + * file offset of the page, in units of PAGE_CACHE_SIZE.
   5.234 + *
   5.235 + * A page may have buffers allocated to it. In this case,
   5.236 + * page->buffers is a circular list of these buffer heads. Else,
   5.237 + * page->buffers == NULL.
   5.238 + *
   5.239 + * For pages belonging to inodes, the page->count is the number of
   5.240 + * attaches, plus 1 if buffers are allocated to the page, plus one
   5.241 + * for the page cache itself.
   5.242 + *
   5.243 + * All pages belonging to an inode are in these doubly linked lists:
   5.244 + * mapping->clean_pages, mapping->dirty_pages and mapping->locked_pages;
   5.245 + * using the page->list list_head. These fields are also used for
   5.246 + * freelist managemet (when page->count==0).
   5.247 + *
   5.248 + * There is also a hash table mapping (mapping,index) to the page
   5.249 + * in memory if present. The lists for this hash table use the fields
   5.250 + * page->next_hash and page->pprev_hash.
   5.251 + *
   5.252 + * All process pages can do I/O:
   5.253 + * - inode pages may need to be read from disk,
   5.254 + * - inode pages which have been modified and are MAP_SHARED may need
   5.255 + *   to be written to disk,
   5.256 + * - private pages which have been modified may need to be swapped out
   5.257 + *   to swap space and (later) to be read back into memory.
   5.258 + * During disk I/O, PG_locked is used. This bit is set before I/O
   5.259 + * and reset when I/O completes. page_waitqueue(page) is a wait queue of all
   5.260 + * tasks waiting for the I/O on this page to complete.
   5.261 + * PG_uptodate tells whether the page's contents is valid.
   5.262 + * When a read completes, the page becomes uptodate, unless a disk I/O
   5.263 + * error happened.
   5.264 + *
   5.265 + * For choosing which pages to swap out, inode pages carry a
   5.266 + * PG_referenced bit, which is set any time the system accesses
   5.267 + * that page through the (mapping,index) hash table. This referenced
   5.268 + * bit, together with the referenced bit in the page tables, is used
   5.269 + * to manipulate page->age and move the page across the active,
   5.270 + * inactive_dirty and inactive_clean lists.
   5.271 + *
   5.272 + * Note that the referenced bit, the page->lru list_head and the
   5.273 + * active, inactive_dirty and inactive_clean lists are protected by
   5.274 + * the pagemap_lru_lock, and *NOT* by the usual PG_locked bit!
   5.275 + *
   5.276 + * PG_skip is used on sparc/sparc64 architectures to "skip" certain
   5.277 + * parts of the address space.
   5.278 + *
   5.279 + * PG_error is set to indicate that an I/O error occurred on this page.
   5.280 + *
   5.281 + * PG_arch_1 is an architecture specific page state bit.  The generic
   5.282 + * code guarantees that this bit is cleared for a page when it first
   5.283 + * is entered into the page cache.
   5.284 + *
   5.285 + * PG_highmem pages are not permanently mapped into the kernel virtual
   5.286 + * address space, they need to be kmapped separately for doing IO on
   5.287 + * the pages. The struct page (these bits with information) are always
   5.288 + * mapped into kernel address space...
   5.289 + */
   5.290 +#define PG_locked		 0	/* Page is locked. Don't touch. */
   5.291 +#define PG_error		 1
   5.292 +#define PG_referenced		 2
   5.293 +#define PG_uptodate		 3
   5.294 +#define PG_dirty		 4
   5.295 +#define PG_unused		 5
   5.296 +#define PG_lru			 6
   5.297 +#define PG_active		 7
   5.298 +#define PG_slab			 8
   5.299 +#define PG_skip			10
   5.300 +#define PG_highmem		11
   5.301 +#define PG_checked		12	/* kill me in 2.5.<early>. */
   5.302 +#define PG_arch_1		13
   5.303 +#define PG_reserved		14
   5.304 +#define PG_launder		15	/* written out by VM pressure.. */
   5.305 +#define PG_fs_1			16	/* Filesystem specific */
   5.306 +#define PG_foreign		21	/* Page belongs to foreign allocator */
   5.307 +
   5.308 +#ifndef arch_set_page_uptodate
   5.309 +#define arch_set_page_uptodate(page)
   5.310 +#endif
   5.311 +
   5.312 +/* Make it prettier to test the above... */
   5.313 +#define UnlockPage(page)	unlock_page(page)
   5.314 +#define Page_Uptodate(page)	test_bit(PG_uptodate, &(page)->flags)
   5.315 +#define SetPageUptodate(page) \
   5.316 +	do {								\
   5.317 +		arch_set_page_uptodate(page);				\
   5.318 +		set_bit(PG_uptodate, &(page)->flags);			\
   5.319 +	} while (0)
   5.320 +#define ClearPageUptodate(page)	clear_bit(PG_uptodate, &(page)->flags)
   5.321 +#define PageDirty(page)		test_bit(PG_dirty, &(page)->flags)
   5.322 +#define SetPageDirty(page)	set_bit(PG_dirty, &(page)->flags)
   5.323 +#define ClearPageDirty(page)	clear_bit(PG_dirty, &(page)->flags)
   5.324 +#define PageLocked(page)	test_bit(PG_locked, &(page)->flags)
   5.325 +#define LockPage(page)		set_bit(PG_locked, &(page)->flags)
   5.326 +#define TryLockPage(page)	test_and_set_bit(PG_locked, &(page)->flags)
   5.327 +#define PageChecked(page)	test_bit(PG_checked, &(page)->flags)
   5.328 +#define SetPageChecked(page)	set_bit(PG_checked, &(page)->flags)
   5.329 +#define ClearPageChecked(page)	clear_bit(PG_checked, &(page)->flags)
   5.330 +#define PageLaunder(page)	test_bit(PG_launder, &(page)->flags)
   5.331 +#define SetPageLaunder(page)	set_bit(PG_launder, &(page)->flags)
   5.332 +#define ClearPageLaunder(page)	clear_bit(PG_launder, &(page)->flags)
   5.333 +#define ClearPageArch1(page)	clear_bit(PG_arch_1, &(page)->flags)
   5.334 +
   5.335 +/* A foreign page uses a custom destructor rather than the buddy allocator. */
   5.336 +#ifdef CONFIG_FOREIGN_PAGES
   5.337 +#define PageForeign(page)	test_bit(PG_foreign, &(page)->flags)
   5.338 +#define SetPageForeign(page)	set_bit(PG_foreign, &(page)->flags)
   5.339 +#define ClearPageForeign(page)	clear_bit(PG_foreign, &(page)->flags)
   5.340 +#define PageForeignDestructor(page)	\
   5.341 +	( (void (*) (struct page *)) (page)->mapping )
   5.342 +#else
   5.343 +#define PageForeign(page)	0
   5.344 +#define PageForeignDestructor(page)	void
   5.345 +#endif
   5.346 +
   5.347 +/*
   5.348 + * The zone field is never updated after free_area_init_core()
   5.349 + * sets it, so none of the operations on it need to be atomic.
   5.350 + */
   5.351 +#define NODE_SHIFT 4
   5.352 +#define ZONE_SHIFT (BITS_PER_LONG - 8)
   5.353 +
   5.354 +struct zone_struct;
   5.355 +extern struct zone_struct *zone_table[];
   5.356 +
   5.357 +static inline zone_t *page_zone(struct page *page)
   5.358 +{
   5.359 +	return zone_table[page->flags >> ZONE_SHIFT];
   5.360 +}
   5.361 +
   5.362 +static inline void set_page_zone(struct page *page, unsigned long zone_num)
   5.363 +{
   5.364 +	page->flags &= ~(~0UL << ZONE_SHIFT);
   5.365 +	page->flags |= zone_num << ZONE_SHIFT;
   5.366 +}
   5.367 +
   5.368 +/*
   5.369 + * In order to avoid #ifdefs within C code itself, we define
   5.370 + * set_page_address to a noop for non-highmem machines, where
   5.371 + * the field isn't useful.
   5.372 + * The same is true for page_address() in arch-dependent code.
   5.373 + */
   5.374 +#if defined(CONFIG_HIGHMEM) || defined(WANT_PAGE_VIRTUAL)
   5.375 +
   5.376 +#define set_page_address(page, address)			\
   5.377 +	do {						\
   5.378 +		(page)->virtual = (address);		\
   5.379 +	} while(0)
   5.380 +
   5.381 +#else /* CONFIG_HIGHMEM || WANT_PAGE_VIRTUAL */
   5.382 +#define set_page_address(page, address)  do { } while(0)
   5.383 +#endif /* CONFIG_HIGHMEM || WANT_PAGE_VIRTUAL */
   5.384 +
   5.385 +/*
   5.386 + * Permanent address of a page. Obviously must never be
   5.387 + * called on a highmem page.
   5.388 + */
   5.389 +#if defined(CONFIG_HIGHMEM) || defined(WANT_PAGE_VIRTUAL)
   5.390 +
   5.391 +#define page_address(page) ((page)->virtual)
   5.392 +
   5.393 +#else /* CONFIG_HIGHMEM || WANT_PAGE_VIRTUAL */
   5.394 +
   5.395 +#define page_address(page)						\
   5.396 +	__va( (((page) - page_zone(page)->zone_mem_map) << PAGE_SHIFT)	\
   5.397 +			+ page_zone(page)->zone_start_paddr)
   5.398 +
   5.399 +#endif /* CONFIG_HIGHMEM || WANT_PAGE_VIRTUAL */
   5.400 +
   5.401 +extern void FASTCALL(set_page_dirty(struct page *));
   5.402 +
   5.403 +/*
   5.404 + * The first mb is necessary to safely close the critical section opened by the
   5.405 + * TryLockPage(), the second mb is necessary to enforce ordering between
   5.406 + * the clear_bit and the read of the waitqueue (to avoid SMP races with a
   5.407 + * parallel wait_on_page).
   5.408 + */
   5.409 +#define PageError(page)		test_bit(PG_error, &(page)->flags)
   5.410 +#define SetPageError(page)	set_bit(PG_error, &(page)->flags)
   5.411 +#define ClearPageError(page)	clear_bit(PG_error, &(page)->flags)
   5.412 +#define PageReferenced(page)	test_bit(PG_referenced, &(page)->flags)
   5.413 +#define SetPageReferenced(page)	set_bit(PG_referenced, &(page)->flags)
   5.414 +#define ClearPageReferenced(page)	clear_bit(PG_referenced, &(page)->flags)
   5.415 +#define PageTestandClearReferenced(page)	test_and_clear_bit(PG_referenced, &(page)->flags)
   5.416 +#define PageSlab(page)		test_bit(PG_slab, &(page)->flags)
   5.417 +#define PageSetSlab(page)	set_bit(PG_slab, &(page)->flags)
   5.418 +#define PageClearSlab(page)	clear_bit(PG_slab, &(page)->flags)
   5.419 +#define PageReserved(page)	test_bit(PG_reserved, &(page)->flags)
   5.420 +
   5.421 +#define PageActive(page)	test_bit(PG_active, &(page)->flags)
   5.422 +#define SetPageActive(page)	set_bit(PG_active, &(page)->flags)
   5.423 +#define ClearPageActive(page)	clear_bit(PG_active, &(page)->flags)
   5.424 +
   5.425 +#define PageLRU(page)		test_bit(PG_lru, &(page)->flags)
   5.426 +#define TestSetPageLRU(page)	test_and_set_bit(PG_lru, &(page)->flags)
   5.427 +#define TestClearPageLRU(page)	test_and_clear_bit(PG_lru, &(page)->flags)
   5.428 +
   5.429 +#ifdef CONFIG_HIGHMEM
   5.430 +#define PageHighMem(page)		test_bit(PG_highmem, &(page)->flags)
   5.431 +#else
   5.432 +#define PageHighMem(page)		0 /* needed to optimize away at compile time */
   5.433 +#endif
   5.434 +
   5.435 +#define SetPageReserved(page)		set_bit(PG_reserved, &(page)->flags)
   5.436 +#define ClearPageReserved(page)		clear_bit(PG_reserved, &(page)->flags)
   5.437 +
   5.438 +/*
   5.439 + * Error return values for the *_nopage functions
   5.440 + */
   5.441 +#define NOPAGE_SIGBUS	(NULL)
   5.442 +#define NOPAGE_OOM	((struct page *) (-1))
   5.443 +
   5.444 +/* The array of struct pages */
   5.445 +extern mem_map_t * mem_map;
   5.446 +
   5.447 +/*
   5.448 + * There is only one page-allocator function, and two main namespaces to
   5.449 + * it. The alloc_page*() variants return 'struct page *' and as such
   5.450 + * can allocate highmem pages, the *get*page*() variants return
   5.451 + * virtual kernel addresses to the allocated page(s).
   5.452 + */
   5.453 +extern struct page * FASTCALL(_alloc_pages(unsigned int gfp_mask, unsigned int order));
   5.454 +extern struct page * FASTCALL(__alloc_pages(unsigned int gfp_mask, unsigned int order, zonelist_t *zonelist));
   5.455 +extern struct page * alloc_pages_node(int nid, unsigned int gfp_mask, unsigned int order);
   5.456 +
   5.457 +static inline struct page * alloc_pages(unsigned int gfp_mask, unsigned int order)
   5.458 +{
   5.459 +	/*
   5.460 +	 * Gets optimized away by the compiler.
   5.461 +	 */
   5.462 +	if (order >= MAX_ORDER)
   5.463 +		return NULL;
   5.464 +	return _alloc_pages(gfp_mask, order);
   5.465 +}
   5.466 +
   5.467 +#define alloc_page(gfp_mask) alloc_pages(gfp_mask, 0)
   5.468 +
   5.469 +extern unsigned long FASTCALL(__get_free_pages(unsigned int gfp_mask, unsigned int order));
   5.470 +extern unsigned long FASTCALL(get_zeroed_page(unsigned int gfp_mask));
   5.471 +
   5.472 +#define __get_free_page(gfp_mask) \
   5.473 +		__get_free_pages((gfp_mask),0)
   5.474 +
   5.475 +#define __get_dma_pages(gfp_mask, order) \
   5.476 +		__get_free_pages((gfp_mask) | GFP_DMA,(order))
   5.477 +
   5.478 +/*
   5.479 + * The old interface name will be removed in 2.5:
   5.480 + */
   5.481 +#define get_free_page get_zeroed_page
   5.482 +
   5.483 +/*
   5.484 + * There is only one 'core' page-freeing function.
   5.485 + */
   5.486 +extern void FASTCALL(__free_pages(struct page *page, unsigned int order));
   5.487 +extern void FASTCALL(free_pages(unsigned long addr, unsigned int order));
   5.488 +
   5.489 +#define __free_page(page) __free_pages((page), 0)
   5.490 +#define free_page(addr) free_pages((addr),0)
   5.491 +
   5.492 +extern void show_free_areas(void);
   5.493 +extern void show_free_areas_node(pg_data_t *pgdat);
   5.494 +
   5.495 +extern void clear_page_tables(struct mm_struct *, unsigned long, int);
   5.496 +
   5.497 +extern int fail_writepage(struct page *);
   5.498 +struct page * shmem_nopage(struct vm_area_struct * vma, unsigned long address, int unused);
   5.499 +struct file *shmem_file_setup(char * name, loff_t size);
   5.500 +extern void shmem_lock(struct file * file, int lock);
   5.501 +extern int shmem_zero_setup(struct vm_area_struct *);
   5.502 +
   5.503 +extern void zap_page_range(struct mm_struct *mm, unsigned long address, unsigned long size);
   5.504 +extern int copy_page_range(struct mm_struct *dst, struct mm_struct *src, struct vm_area_struct *vma);
   5.505 +extern int remap_page_range(unsigned long from, unsigned long to, unsigned long size, pgprot_t prot);
   5.506 +extern int zeromap_page_range(unsigned long from, unsigned long size, pgprot_t prot);
   5.507 +
   5.508 +extern int vmtruncate(struct inode * inode, loff_t offset);
   5.509 +extern pmd_t *FASTCALL(__pmd_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long address));
   5.510 +extern pte_t *FASTCALL(pte_alloc(struct mm_struct *mm, pmd_t *pmd, unsigned long address));
   5.511 +extern int handle_mm_fault(struct mm_struct *mm,struct vm_area_struct *vma, unsigned long address, int write_access);
   5.512 +extern int make_pages_present(unsigned long addr, unsigned long end);
   5.513 +extern int access_process_vm(struct task_struct *tsk, unsigned long addr, void *buf, int len, int write);
   5.514 +extern int ptrace_readdata(struct task_struct *tsk, unsigned long src, char *dst, int len);
   5.515 +extern int ptrace_writedata(struct task_struct *tsk, char * src, unsigned long dst, int len);
   5.516 +extern int ptrace_attach(struct task_struct *tsk);
   5.517 +extern int ptrace_detach(struct task_struct *, unsigned int);
   5.518 +extern void ptrace_disable(struct task_struct *);
   5.519 +extern int ptrace_check_attach(struct task_struct *task, int kill);
   5.520 +
   5.521 +int get_user_pages(struct task_struct *tsk, struct mm_struct *mm, unsigned long start,
   5.522 +		int len, int write, int force, struct page **pages, struct vm_area_struct **vmas);
   5.523 +
   5.524 +/*
   5.525 + * On a two-level page table, this ends up being trivial. Thus the
   5.526 + * inlining and the symmetry break with pte_alloc() that does all
   5.527 + * of this out-of-line.
   5.528 + */
   5.529 +static inline pmd_t *pmd_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long address)
   5.530 +{
   5.531 +	if (pgd_none(*pgd))
   5.532 +		return __pmd_alloc(mm, pgd, address);
   5.533 +	return pmd_offset(pgd, address);
   5.534 +}
   5.535 +
   5.536 +extern int pgt_cache_water[2];
   5.537 +extern int check_pgt_cache(void);
   5.538 +
   5.539 +extern void free_area_init(unsigned long * zones_size);
   5.540 +extern void free_area_init_node(int nid, pg_data_t *pgdat, struct page *pmap,
   5.541 +	unsigned long * zones_size, unsigned long zone_start_paddr, 
   5.542 +	unsigned long *zholes_size);
   5.543 +extern void mem_init(void);
   5.544 +extern void show_mem(void);
   5.545 +extern void si_meminfo(struct sysinfo * val);
   5.546 +extern void swapin_readahead(swp_entry_t);
   5.547 +
   5.548 +extern struct address_space swapper_space;
   5.549 +#define PageSwapCache(page) ((page)->mapping == &swapper_space)
   5.550 +
   5.551 +static inline int is_page_cache_freeable(struct page * page)
   5.552 +{
   5.553 +	return page_count(page) - !!page->buffers == 1;
   5.554 +}
   5.555 +
   5.556 +extern int FASTCALL(can_share_swap_page(struct page *));
   5.557 +extern int FASTCALL(remove_exclusive_swap_page(struct page *));
   5.558 +
   5.559 +extern void __free_pte(pte_t);
   5.560 +
   5.561 +/* mmap.c */
   5.562 +extern void lock_vma_mappings(struct vm_area_struct *);
   5.563 +extern void unlock_vma_mappings(struct vm_area_struct *);
   5.564 +extern void insert_vm_struct(struct mm_struct *, struct vm_area_struct *);
   5.565 +extern void __insert_vm_struct(struct mm_struct *, struct vm_area_struct *);
   5.566 +extern void build_mmap_rb(struct mm_struct *);
   5.567 +extern void exit_mmap(struct mm_struct *);
   5.568 +
   5.569 +extern unsigned long get_unmapped_area(struct file *, unsigned long, unsigned long, unsigned long, unsigned long);
   5.570 +
   5.571 +extern unsigned long do_mmap_pgoff(struct file *file, unsigned long addr,
   5.572 +	unsigned long len, unsigned long prot,
   5.573 +	unsigned long flag, unsigned long pgoff);
   5.574 +
   5.575 +static inline unsigned long do_mmap(struct file *file, unsigned long addr,
   5.576 +	unsigned long len, unsigned long prot,
   5.577 +	unsigned long flag, unsigned long offset)
   5.578 +{
   5.579 +	unsigned long ret = -EINVAL;
   5.580 +	if ((offset + PAGE_ALIGN(len)) < offset)
   5.581 +		goto out;
   5.582 +	if (!(offset & ~PAGE_MASK))
   5.583 +		ret = do_mmap_pgoff(file, addr, len, prot, flag, offset >> PAGE_SHIFT);
   5.584 +out:
   5.585 +	return ret;
   5.586 +}
   5.587 +
   5.588 +extern int do_munmap(struct mm_struct *, unsigned long, size_t);
   5.589 +
   5.590 +extern unsigned long do_brk(unsigned long, unsigned long);
   5.591 +
   5.592 +static inline void __vma_unlink(struct mm_struct * mm, struct vm_area_struct * vma, struct vm_area_struct * prev)
   5.593 +{
   5.594 +	prev->vm_next = vma->vm_next;
   5.595 +	rb_erase(&vma->vm_rb, &mm->mm_rb);
   5.596 +	if (mm->mmap_cache == vma)
   5.597 +		mm->mmap_cache = prev;
   5.598 +}
   5.599 +
   5.600 +static inline int can_vma_merge(struct vm_area_struct * vma, unsigned long vm_flags)
   5.601 +{
   5.602 +	if (!vma->vm_file && vma->vm_flags == vm_flags)
   5.603 +		return 1;
   5.604 +	else
   5.605 +		return 0;
   5.606 +}
   5.607 +
   5.608 +struct zone_t;
   5.609 +/* filemap.c */
   5.610 +extern void remove_inode_page(struct page *);
   5.611 +extern unsigned long page_unuse(struct page *);
   5.612 +extern void truncate_inode_pages(struct address_space *, loff_t);
   5.613 +
   5.614 +/* generic vm_area_ops exported for stackable file systems */
   5.615 +extern int filemap_sync(struct vm_area_struct *, unsigned long,	size_t, unsigned int);
   5.616 +extern struct page *filemap_nopage(struct vm_area_struct *, unsigned long, int);
   5.617 +
   5.618 +/*
   5.619 + * GFP bitmasks..
   5.620 + */
   5.621 +/* Zone modifiers in GFP_ZONEMASK (see linux/mmzone.h - low four bits) */
   5.622 +#define __GFP_DMA	0x01
   5.623 +#define __GFP_HIGHMEM	0x02
   5.624 +
   5.625 +/* Action modifiers - doesn't change the zoning */
   5.626 +#define __GFP_WAIT	0x10	/* Can wait and reschedule? */
   5.627 +#define __GFP_HIGH	0x20	/* Should access emergency pools? */
   5.628 +#define __GFP_IO	0x40	/* Can start low memory physical IO? */
   5.629 +#define __GFP_HIGHIO	0x80	/* Can start high mem physical IO? */
   5.630 +#define __GFP_FS	0x100	/* Can call down to low-level FS? */
   5.631 +
   5.632 +#define GFP_NOHIGHIO	(__GFP_HIGH | __GFP_WAIT | __GFP_IO)
   5.633 +#define GFP_NOIO	(__GFP_HIGH | __GFP_WAIT)
   5.634 +#define GFP_NOFS	(__GFP_HIGH | __GFP_WAIT | __GFP_IO | __GFP_HIGHIO)
   5.635 +#define GFP_ATOMIC	(__GFP_HIGH)
   5.636 +#define GFP_USER	(             __GFP_WAIT | __GFP_IO | __GFP_HIGHIO | __GFP_FS)
   5.637 +#define GFP_HIGHUSER	(             __GFP_WAIT | __GFP_IO | __GFP_HIGHIO | __GFP_FS | __GFP_HIGHMEM)
   5.638 +#define GFP_KERNEL	(__GFP_HIGH | __GFP_WAIT | __GFP_IO | __GFP_HIGHIO | __GFP_FS)
   5.639 +#define GFP_NFS		(__GFP_HIGH | __GFP_WAIT | __GFP_IO | __GFP_HIGHIO | __GFP_FS)
   5.640 +#define GFP_KSWAPD	(             __GFP_WAIT | __GFP_IO | __GFP_HIGHIO | __GFP_FS)
   5.641 +
   5.642 +/* Flag - indicates that the buffer will be suitable for DMA.  Ignored on some
   5.643 +   platforms, used as appropriate on others */
   5.644 +
   5.645 +#define GFP_DMA		__GFP_DMA
   5.646 +
   5.647 +static inline unsigned int pf_gfp_mask(unsigned int gfp_mask)
   5.648 +{
   5.649 +	/* avoid all memory balancing I/O methods if this task cannot block on I/O */
   5.650 +	if (current->flags & PF_NOIO)
   5.651 +		gfp_mask &= ~(__GFP_IO | __GFP_HIGHIO | __GFP_FS);
   5.652 +
   5.653 +	return gfp_mask;
   5.654 +}
   5.655 +	
   5.656 +/* vma is the first one with  address < vma->vm_end,
   5.657 + * and even  address < vma->vm_start. Have to extend vma. */
   5.658 +static inline int expand_stack(struct vm_area_struct * vma, unsigned long address)
   5.659 +{
   5.660 +	unsigned long grow;
   5.661 +
   5.662 +	/*
   5.663 +	 * vma->vm_start/vm_end cannot change under us because the caller is required
   5.664 +	 * to hold the mmap_sem in write mode. We need to get the spinlock only
   5.665 +	 * before relocating the vma range ourself.
   5.666 +	 */
   5.667 +	address &= PAGE_MASK;
   5.668 + 	spin_lock(&vma->vm_mm->page_table_lock);
   5.669 +	grow = (vma->vm_start - address) >> PAGE_SHIFT;
   5.670 +	if (vma->vm_end - address > current->rlim[RLIMIT_STACK].rlim_cur ||
   5.671 +	    ((vma->vm_mm->total_vm + grow) << PAGE_SHIFT) > current->rlim[RLIMIT_AS].rlim_cur) {
   5.672 +		spin_unlock(&vma->vm_mm->page_table_lock);
   5.673 +		return -ENOMEM;
   5.674 +	}
   5.675 +	vma->vm_start = address;
   5.676 +	vma->vm_pgoff -= grow;
   5.677 +	vma->vm_mm->total_vm += grow;
   5.678 +	if (vma->vm_flags & VM_LOCKED)
   5.679 +		vma->vm_mm->locked_vm += grow;
   5.680 +	spin_unlock(&vma->vm_mm->page_table_lock);
   5.681 +	return 0;
   5.682 +}
   5.683 +
   5.684 +/* Look up the first VMA which satisfies  addr < vm_end,  NULL if none. */
   5.685 +extern struct vm_area_struct * find_vma(struct mm_struct * mm, unsigned long addr);
   5.686 +extern struct vm_area_struct * find_vma_prev(struct mm_struct * mm, unsigned long addr,
   5.687 +					     struct vm_area_struct **pprev);
   5.688 +
   5.689 +/* Look up the first VMA which intersects the interval start_addr..end_addr-1,
   5.690 +   NULL if none.  Assume start_addr < end_addr. */
   5.691 +static inline struct vm_area_struct * find_vma_intersection(struct mm_struct * mm, unsigned long start_addr, unsigned long end_addr)
   5.692 +{
   5.693 +	struct vm_area_struct * vma = find_vma(mm,start_addr);
   5.694 +
   5.695 +	if (vma && end_addr <= vma->vm_start)
   5.696 +		vma = NULL;
   5.697 +	return vma;
   5.698 +}
   5.699 +
   5.700 +extern struct vm_area_struct *find_extend_vma(struct mm_struct *mm, unsigned long addr);
   5.701 +
   5.702 +extern struct page * vmalloc_to_page(void *addr);
   5.703 +
   5.704 +#endif /* __KERNEL__ */
   5.705 +
   5.706 +#endif
     6.1 --- a/linux-2.4.26-xen-sparse/mm/page_alloc.c	Thu Aug 19 16:09:39 2004 +0000
     6.2 +++ b/linux-2.4.26-xen-sparse/mm/page_alloc.c	Thu Aug 19 18:51:53 2004 +0000
     6.3 @@ -89,6 +89,9 @@ static void __free_pages_ok (struct page
     6.4  	struct page *base;
     6.5  	zone_t *zone;
     6.6  
     6.7 +	if (PageForeign(page))
     6.8 +		return (PageForeignDestructor(page))(page);
     6.9 +
    6.10  	/*
    6.11  	 * Yes, think what happens when other parts of the kernel take 
    6.12  	 * a reference to a page in order to pin it for io. -ben
    6.13 @@ -102,7 +105,7 @@ static void __free_pages_ok (struct page
    6.14  	if (page->buffers)
    6.15  		BUG();
    6.16  	if (page->mapping)
    6.17 -		return (*(void(*)(struct page *))page->mapping)(page);
    6.18 +		BUG();
    6.19  	if (!VALID_PAGE(page))
    6.20  		BUG();
    6.21  	if (PageLocked(page))
     7.1 --- a/linux-2.6.7-xen-sparse/arch/xen/Kconfig	Thu Aug 19 16:09:39 2004 +0000
     7.2 +++ b/linux-2.6.7-xen-sparse/arch/xen/Kconfig	Thu Aug 19 18:51:53 2004 +0000
     7.3 @@ -44,11 +44,15 @@ config XEN_WRITABLE_PAGETABLES
     7.4  
     7.5  endmenu
     7.6  
     7.7 -# Xen's block device backend driver needs 2^12 pages
     7.8 -config FORCE_MAX_ZONEORDER
     7.9 -        int
    7.10 -        default "12" if XEN_PHYSDEV_ACCESS
    7.11 -        default "11" if !XEN_PHYSDEV_ACCESS
    7.12 +config FOREIGN_PAGES
    7.13 +	bool
    7.14 +	default y if XEN_PHYSDEV_ACCESS
    7.15 +	default n if !XEN_PHYSDEV_ACCESS
    7.16 +
    7.17 +config PAGESIZED_SKBS
    7.18 +	bool
    7.19 +	default y if XEN_PHYSDEV_ACCESS
    7.20 +	default n if !XEN_PHYSDEV_ACCESS
    7.21  
    7.22  #config VT
    7.23  #	bool
     8.1 --- a/linux-2.6.7-xen-sparse/arch/xen/configs/xen0_defconfig	Thu Aug 19 16:09:39 2004 +0000
     8.2 +++ b/linux-2.6.7-xen-sparse/arch/xen/configs/xen0_defconfig	Thu Aug 19 18:51:53 2004 +0000
     8.3 @@ -10,7 +10,8 @@ CONFIG_NO_IDLE_HZ=y
     8.4  #
     8.5  CONFIG_XEN_PRIVILEGED_GUEST=y
     8.6  CONFIG_XEN_PHYSDEV_ACCESS=y
     8.7 -CONFIG_FORCE_MAX_ZONEORDER=12
     8.8 +CONFIG_FOREIGN_PAGES=y
     8.9 +CONFIG_PAGESIZED_SKBS=y
    8.10  CONFIG_X86=y
    8.11  # CONFIG_X86_64 is not set
    8.12  
     9.1 --- a/linux-2.6.7-xen-sparse/arch/xen/configs/xenU_defconfig	Thu Aug 19 16:09:39 2004 +0000
     9.2 +++ b/linux-2.6.7-xen-sparse/arch/xen/configs/xenU_defconfig	Thu Aug 19 18:51:53 2004 +0000
     9.3 @@ -10,7 +10,8 @@ CONFIG_NO_IDLE_HZ=y
     9.4  #
     9.5  # CONFIG_XEN_PRIVILEGED_GUEST is not set
     9.6  # CONFIG_XEN_PHYSDEV_ACCESS is not set
     9.7 -CONFIG_FORCE_MAX_ZONEORDER=11
     9.8 +# CONFIG_FOREIGN_PAGES is not set
     9.9 +# CONFIG_PAGESIZED_SKBS is not set
    9.10  CONFIG_X86=y
    9.11  # CONFIG_X86_64 is not set
    9.12  
    10.1 --- a/linux-2.6.7-xen-sparse/drivers/xen/blkback/blkback.c	Thu Aug 19 16:09:39 2004 +0000
    10.2 +++ b/linux-2.6.7-xen-sparse/drivers/xen/blkback/blkback.c	Thu Aug 19 18:51:53 2004 +0000
    10.3 @@ -24,22 +24,15 @@
    10.4  #define MAX_PENDING_REQS 64
    10.5  #define BATCH_PER_DOMAIN 16
    10.6  
    10.7 -/*
    10.8 - * NB. We place a page of padding between each buffer page to avoid incorrect
    10.9 - * merging of requests by the IDE and SCSI merging routines. Otherwise, two
   10.10 - * adjacent buffers in a scatter-gather request would have adjacent page
   10.11 - * numbers: since the merge routines don't realise that this is in *pseudophys*
   10.12 - * space, not real space, they may collapse the s-g elements!
   10.13 - */
   10.14  static unsigned long mmap_vstart;
   10.15  #define MMAP_PAGES_PER_REQUEST \
   10.16 -    (2 * (BLKIF_MAX_SEGMENTS_PER_REQUEST + 1))
   10.17 +    (BLKIF_MAX_SEGMENTS_PER_REQUEST + 1)
   10.18  #define MMAP_PAGES             \
   10.19      (MAX_PENDING_REQS * MMAP_PAGES_PER_REQUEST)
   10.20  #define MMAP_VADDR(_req,_seg)                        \
   10.21      (mmap_vstart +                                   \
   10.22       ((_req) * MMAP_PAGES_PER_REQUEST * PAGE_SIZE) + \
   10.23 -     ((_seg) * 2 * PAGE_SIZE))
   10.24 +     ((_seg) * PAGE_SIZE))
   10.25  
   10.26  /*
   10.27   * Each outstanding request that we've passed to the lower device layers has a 
    11.1 --- a/linux-2.6.7-xen-sparse/drivers/xen/netback/netback.c	Thu Aug 19 16:09:39 2004 +0000
    11.2 +++ b/linux-2.6.7-xen-sparse/drivers/xen/netback/netback.c	Thu Aug 19 18:51:53 2004 +0000
    11.3 @@ -376,7 +376,6 @@ static void net_tx_action(unsigned long 
    11.4      netif_tx_request_t txreq;
    11.5      u16 pending_idx;
    11.6      NETIF_RING_IDX i;
    11.7 -    struct page *page;
    11.8      multicall_entry_t *mcl;
    11.9      PEND_RING_IDX dc, dp;
   11.10  
   11.11 @@ -567,10 +566,9 @@ static void net_tx_action(unsigned long 
   11.12                 (void *)(MMAP_VADDR(pending_idx)|(txreq.addr&~PAGE_MASK)),
   11.13                 PKT_PROT_LEN);
   11.14  
   11.15 -        page = virt_to_page(MMAP_VADDR(pending_idx));
   11.16 -
   11.17          /* Append the packet payload as a fragment. */
   11.18 -        skb_shinfo(skb)->frags[0].page        = page;
   11.19 +        skb_shinfo(skb)->frags[0].page        = 
   11.20 +            virt_to_page(MMAP_VADDR(pending_idx));
   11.21          skb_shinfo(skb)->frags[0].size        = txreq.size - PKT_PROT_LEN;
   11.22          skb_shinfo(skb)->frags[0].page_offset = 
   11.23              (txreq.addr + PKT_PROT_LEN) & ~PAGE_MASK;
   11.24 @@ -581,17 +579,6 @@ static void net_tx_action(unsigned long 
   11.25          skb->dev      = netif->dev;
   11.26          skb->protocol = eth_type_trans(skb, skb->dev);
   11.27  
   11.28 -        /*
   11.29 -         * Destructor information. We hideously abuse the 'mapping' pointer,
   11.30 -         * which isn't otherwise used by us. The page deallocator is modified
   11.31 -         * to interpret a non-NULL value as a destructor function to be called.
   11.32 -         * This works okay because in all other cases the pointer must be NULL
   11.33 -         * when the page is freed (normally Linux will explicitly bug out if
   11.34 -         * it sees otherwise.
   11.35 -         */
   11.36 -        page->mapping = (struct address_space *)netif_page_release;
   11.37 -        set_page_count(page, 1);
   11.38 -
   11.39          netif->stats.tx_bytes += txreq.size;
   11.40          netif->stats.tx_packets++;
   11.41  
   11.42 @@ -607,8 +594,8 @@ static void netif_page_release(struct pa
   11.43      unsigned long flags;
   11.44      u16 pending_idx = page - virt_to_page(mmap_vstart);
   11.45  
   11.46 -    /* Stop the abuse. */
   11.47 -    page->mapping = NULL;
   11.48 +    /* Ready for next use. */
   11.49 +    set_page_count(page, 1);
   11.50  
   11.51      spin_lock_irqsave(&dealloc_lock, flags);
   11.52      dealloc_ring[MASK_PEND_IDX(dealloc_prod++)] = pending_idx;
   11.53 @@ -742,6 +729,7 @@ static irqreturn_t netif_be_dbg(int irq,
   11.54  static int __init netback_init(void)
   11.55  {
   11.56      int i;
   11.57 +    struct page *page;
   11.58  
   11.59      if ( !(start_info.flags & SIF_NET_BE_DOMAIN) &&
   11.60  	 !(start_info.flags & SIF_INITDOMAIN) )
   11.61 @@ -757,6 +745,13 @@ static int __init netback_init(void)
   11.62      if ( (mmap_vstart = allocate_empty_lowmem_region(MAX_PENDING_REQS)) == 0 )
   11.63          BUG();
   11.64  
   11.65 +    for ( i = 0; i < MAX_PENDING_REQS; i++ )
   11.66 +    {
   11.67 +        page = virt_to_page(MMAP_VADDR(i));
   11.68 +        SetPageForeign(page);
   11.69 +        PageForeignDestructor(page) = netif_page_release;
   11.70 +    }
   11.71 +
   11.72      pending_cons = 0;
   11.73      pending_prod = MAX_PENDING_REQS;
   11.74      for ( i = 0; i < MAX_PENDING_REQS; i++ )
    12.1 --- a/linux-2.6.7-xen-sparse/include/asm-xen/asm-i386/io.h	Thu Aug 19 16:09:39 2004 +0000
    12.2 +++ b/linux-2.6.7-xen-sparse/include/asm-xen/asm-i386/io.h	Thu Aug 19 18:51:53 2004 +0000
    12.3 @@ -88,6 +88,13 @@ static inline void * phys_to_virt(unsign
    12.4  #define page_to_pseudophys(page) ((dma_addr_t)page_to_pfn(page) << PAGE_SHIFT)
    12.5  #define page_to_phys(page)       (phys_to_machine(page_to_pseudophys(page)))
    12.6  
    12.7 +#define bio_to_pseudophys(bio)	(page_to_pseudophys(bio_page((bio))) + (unsigned long) bio_offset((bio)))
    12.8 +#define bvec_to_pseudophys(bv)	(page_to_pseudophys((bv)->bv_page) + (unsigned long) (bv)->bv_offset)
    12.9 +
   12.10 +#define BIOVEC_PHYS_MERGEABLE(vec1, vec2)	\
   12.11 +	(((bvec_to_phys((vec1)) + (vec1)->bv_len) == bvec_to_phys((vec2))) && \
   12.12 +	 ((bvec_to_pseudophys((vec1)) + (vec1)->bv_len) == bvec_to_pseudophys((vec2))))
   12.13 +
   12.14  extern void * __ioremap(unsigned long offset, unsigned long size, unsigned long flags);
   12.15  
   12.16  /**
    13.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    13.2 +++ b/linux-2.6.7-xen-sparse/include/linux/bio.h	Thu Aug 19 18:51:53 2004 +0000
    13.3 @@ -0,0 +1,304 @@
    13.4 +/*
    13.5 + * 2.5 block I/O model
    13.6 + *
    13.7 + * Copyright (C) 2001 Jens Axboe <axboe@suse.de>
    13.8 + *
    13.9 + * This program is free software; you can redistribute it and/or modify
   13.10 + * it under the terms of the GNU General Public License version 2 as
   13.11 + * published by the Free Software Foundation.
   13.12 + *
   13.13 + * This program is distributed in the hope that it will be useful,
   13.14 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
   13.15 +
   13.16 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   13.17 + * GNU General Public License for more details.
   13.18 + *
   13.19 + * You should have received a copy of the GNU General Public Licens
   13.20 + * along with this program; if not, write to the Free Software
   13.21 + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-
   13.22 + */
   13.23 +#ifndef __LINUX_BIO_H
   13.24 +#define __LINUX_BIO_H
   13.25 +
   13.26 +#include <linux/highmem.h>
   13.27 +#include <linux/mempool.h>
   13.28 +
   13.29 +/* Platforms may set this to teach the BIO layer about IOMMU hardware. */
   13.30 +#include <asm/io.h>
   13.31 +#ifndef BIO_VMERGE_BOUNDARY
   13.32 +#define BIO_VMERGE_BOUNDARY	0
   13.33 +#endif
   13.34 +
   13.35 +#define BIO_DEBUG
   13.36 +
   13.37 +#ifdef BIO_DEBUG
   13.38 +#define BIO_BUG_ON	BUG_ON
   13.39 +#else
   13.40 +#define BIO_BUG_ON
   13.41 +#endif
   13.42 +
   13.43 +#define BIO_MAX_PAGES		(256)
   13.44 +#define BIO_MAX_SIZE		(BIO_MAX_PAGES << PAGE_CACHE_SHIFT)
   13.45 +#define BIO_MAX_SECTORS		(BIO_MAX_SIZE >> 9)
   13.46 +
   13.47 +/*
   13.48 + * was unsigned short, but we might as well be ready for > 64kB I/O pages
   13.49 + */
   13.50 +struct bio_vec {
   13.51 +	struct page	*bv_page;
   13.52 +	unsigned int	bv_len;
   13.53 +	unsigned int	bv_offset;
   13.54 +};
   13.55 +
   13.56 +struct bio;
   13.57 +typedef int (bio_end_io_t) (struct bio *, unsigned int, int);
   13.58 +typedef void (bio_destructor_t) (struct bio *);
   13.59 +
   13.60 +/*
   13.61 + * main unit of I/O for the block layer and lower layers (ie drivers and
   13.62 + * stacking drivers)
   13.63 + */
   13.64 +struct bio {
   13.65 +	sector_t		bi_sector;
   13.66 +	struct bio		*bi_next;	/* request queue link */
   13.67 +	struct block_device	*bi_bdev;
   13.68 +	unsigned long		bi_flags;	/* status, command, etc */
   13.69 +	unsigned long		bi_rw;		/* bottom bits READ/WRITE,
   13.70 +						 * top bits priority
   13.71 +						 */
   13.72 +
   13.73 +	unsigned short		bi_vcnt;	/* how many bio_vec's */
   13.74 +	unsigned short		bi_idx;		/* current index into bvl_vec */
   13.75 +
   13.76 +	/* Number of segments in this BIO after
   13.77 +	 * physical address coalescing is performed.
   13.78 +	 */
   13.79 +	unsigned short		bi_phys_segments;
   13.80 +
   13.81 +	/* Number of segments after physical and DMA remapping
   13.82 +	 * hardware coalescing is performed.
   13.83 +	 */
   13.84 +	unsigned short		bi_hw_segments;
   13.85 +
   13.86 +	unsigned int		bi_size;	/* residual I/O count */
   13.87 +	unsigned int		bi_max_vecs;	/* max bvl_vecs we can hold */
   13.88 +
   13.89 +	struct bio_vec		*bi_io_vec;	/* the actual vec list */
   13.90 +
   13.91 +	bio_end_io_t		*bi_end_io;
   13.92 +	atomic_t		bi_cnt;		/* pin count */
   13.93 +
   13.94 +	void			*bi_private;
   13.95 +
   13.96 +	bio_destructor_t	*bi_destructor;	/* destructor */
   13.97 +};
   13.98 +
   13.99 +/*
  13.100 + * bio flags
  13.101 + */
  13.102 +#define BIO_UPTODATE	0	/* ok after I/O completion */
  13.103 +#define BIO_RW_BLOCK	1	/* RW_AHEAD set, and read/write would block */
  13.104 +#define BIO_EOF		2	/* out-out-bounds error */
  13.105 +#define BIO_SEG_VALID	3	/* nr_hw_seg valid */
  13.106 +#define BIO_CLONED	4	/* doesn't own data */
  13.107 +#define BIO_BOUNCED	5	/* bio is a bounce bio */
  13.108 +#define bio_flagged(bio, flag)	((bio)->bi_flags & (1 << (flag)))
  13.109 +
  13.110 +/*
  13.111 + * top 4 bits of bio flags indicate the pool this bio came from
  13.112 + */
  13.113 +#define BIO_POOL_BITS		(4)
  13.114 +#define BIO_POOL_OFFSET		(BITS_PER_LONG - BIO_POOL_BITS)
  13.115 +#define BIO_POOL_MASK		(1UL << BIO_POOL_OFFSET)
  13.116 +#define BIO_POOL_IDX(bio)	((bio)->bi_flags >> BIO_POOL_OFFSET)	
  13.117 +
  13.118 +/*
  13.119 + * bio bi_rw flags
  13.120 + *
  13.121 + * bit 0 -- read (not set) or write (set)
  13.122 + * bit 1 -- rw-ahead when set
  13.123 + * bit 2 -- barrier
  13.124 + * bit 3 -- fail fast, don't want low level driver retries
  13.125 + * bit 4 -- synchronous I/O hint: the block layer will unplug immediately
  13.126 + */
  13.127 +#define BIO_RW		0
  13.128 +#define BIO_RW_AHEAD	1
  13.129 +#define BIO_RW_BARRIER	2
  13.130 +#define BIO_RW_FAILFAST	3
  13.131 +#define BIO_RW_SYNC	4
  13.132 +
  13.133 +/*
  13.134 + * various member access, note that bio_data should of course not be used
  13.135 + * on highmem page vectors
  13.136 + */
  13.137 +#define bio_iovec_idx(bio, idx)	(&((bio)->bi_io_vec[(idx)]))
  13.138 +#define bio_iovec(bio)		bio_iovec_idx((bio), (bio)->bi_idx)
  13.139 +#define bio_page(bio)		bio_iovec((bio))->bv_page
  13.140 +#define bio_offset(bio)		bio_iovec((bio))->bv_offset
  13.141 +#define bio_segments(bio)	((bio)->bi_vcnt - (bio)->bi_idx)
  13.142 +#define bio_sectors(bio)	((bio)->bi_size >> 9)
  13.143 +#define bio_cur_sectors(bio)	(bio_iovec(bio)->bv_len >> 9)
  13.144 +#define bio_data(bio)		(page_address(bio_page((bio))) + bio_offset((bio)))
  13.145 +#define bio_barrier(bio)	((bio)->bi_rw & (1 << BIO_RW_BARRIER))
  13.146 +#define bio_sync(bio)		((bio)->bi_rw & (1 << BIO_RW_SYNC))
  13.147 +
  13.148 +/*
  13.149 + * will die
  13.150 + */
  13.151 +#define bio_to_phys(bio)	(page_to_phys(bio_page((bio))) + (unsigned long) bio_offset((bio)))
  13.152 +#define bvec_to_phys(bv)	(page_to_phys((bv)->bv_page) + (unsigned long) (bv)->bv_offset)
  13.153 +
  13.154 +/*
  13.155 + * queues that have highmem support enabled may still need to revert to
  13.156 + * PIO transfers occasionally and thus map high pages temporarily. For
  13.157 + * permanent PIO fall back, user is probably better off disabling highmem
  13.158 + * I/O completely on that queue (see ide-dma for example)
  13.159 + */
  13.160 +#define __bio_kmap_atomic(bio, idx, kmtype)				\
  13.161 +	(kmap_atomic(bio_iovec_idx((bio), (idx))->bv_page, kmtype) +	\
  13.162 +		bio_iovec_idx((bio), (idx))->bv_offset)
  13.163 +
  13.164 +#define __bio_kunmap_atomic(addr, kmtype) kunmap_atomic(addr, kmtype)
  13.165 +
  13.166 +/*
  13.167 + * merge helpers etc
  13.168 + */
  13.169 +
  13.170 +#define __BVEC_END(bio)		bio_iovec_idx((bio), (bio)->bi_vcnt - 1)
  13.171 +#define __BVEC_START(bio)	bio_iovec_idx((bio), (bio)->bi_idx)
  13.172 +/* Platforms may set this to restrict multi-page buffer merging. */
  13.173 +#ifndef BIOVEC_PHYS_MERGEABLE
  13.174 +#define BIOVEC_PHYS_MERGEABLE(vec1, vec2)	\
  13.175 +	((bvec_to_phys((vec1)) + (vec1)->bv_len) == bvec_to_phys((vec2)))
  13.176 +#endif
  13.177 +#define BIOVEC_VIRT_MERGEABLE(vec1, vec2)	\
  13.178 +	((((bvec_to_phys((vec1)) + (vec1)->bv_len) | bvec_to_phys((vec2))) & (BIO_VMERGE_BOUNDARY - 1)) == 0)
  13.179 +#define __BIO_SEG_BOUNDARY(addr1, addr2, mask) \
  13.180 +	(((addr1) | (mask)) == (((addr2) - 1) | (mask)))
  13.181 +#define BIOVEC_SEG_BOUNDARY(q, b1, b2) \
  13.182 +	__BIO_SEG_BOUNDARY(bvec_to_phys((b1)), bvec_to_phys((b2)) + (b2)->bv_len, (q)->seg_boundary_mask)
  13.183 +#define BIO_SEG_BOUNDARY(q, b1, b2) \
  13.184 +	BIOVEC_SEG_BOUNDARY((q), __BVEC_END((b1)), __BVEC_START((b2)))
  13.185 +
  13.186 +#define bio_io_error(bio, bytes) bio_endio((bio), (bytes), -EIO)
  13.187 +
  13.188 +/*
  13.189 + * drivers should not use the __ version unless they _really_ want to
  13.190 + * run through the entire bio and not just pending pieces
  13.191 + */
  13.192 +#define __bio_for_each_segment(bvl, bio, i, start_idx)			\
  13.193 +	for (bvl = bio_iovec_idx((bio), (start_idx)), i = (start_idx);	\
  13.194 +	     i < (bio)->bi_vcnt;					\
  13.195 +	     bvl++, i++)
  13.196 +
  13.197 +#define bio_for_each_segment(bvl, bio, i)				\
  13.198 +	__bio_for_each_segment(bvl, bio, i, (bio)->bi_idx)
  13.199 +
  13.200 +/*
  13.201 + * get a reference to a bio, so it won't disappear. the intended use is
  13.202 + * something like:
  13.203 + *
  13.204 + * bio_get(bio);
  13.205 + * submit_bio(rw, bio);
  13.206 + * if (bio->bi_flags ...)
  13.207 + *	do_something
  13.208 + * bio_put(bio);
  13.209 + *
  13.210 + * without the bio_get(), it could potentially complete I/O before submit_bio
  13.211 + * returns. and then bio would be freed memory when if (bio->bi_flags ...)
  13.212 + * runs
  13.213 + */
  13.214 +#define bio_get(bio)	atomic_inc(&(bio)->bi_cnt)
  13.215 +
  13.216 +
  13.217 +/*
  13.218 + * A bio_pair is used when we need to split a bio.
  13.219 + * This can only happen for a bio that refers to just one
  13.220 + * page of data, and in the unusual situation when the
  13.221 + * page crosses a chunk/device boundary
  13.222 + *
  13.223 + * The address of the master bio is stored in bio1.bi_private
  13.224 + * The address of the pool the pair was allocated from is stored
  13.225 + *   in bio2.bi_private
  13.226 + */
  13.227 +struct bio_pair {
  13.228 +	struct bio	bio1, bio2;
  13.229 +	struct bio_vec	bv1, bv2;
  13.230 +	atomic_t	cnt;
  13.231 +	int		error;
  13.232 +};
  13.233 +extern struct bio_pair *bio_split(struct bio *bi, mempool_t *pool,
  13.234 +				  int first_sectors);
  13.235 +extern mempool_t *bio_split_pool;
  13.236 +extern void bio_pair_release(struct bio_pair *dbio);
  13.237 +
  13.238 +extern struct bio *bio_alloc(int, int);
  13.239 +extern void bio_put(struct bio *);
  13.240 +
  13.241 +extern void bio_endio(struct bio *, unsigned int, int);
  13.242 +struct request_queue;
  13.243 +extern int bio_phys_segments(struct request_queue *, struct bio *);
  13.244 +extern int bio_hw_segments(struct request_queue *, struct bio *);
  13.245 +
  13.246 +extern void __bio_clone(struct bio *, struct bio *);
  13.247 +extern struct bio *bio_clone(struct bio *, int);
  13.248 +
  13.249 +extern void bio_init(struct bio *);
  13.250 +
  13.251 +extern int bio_add_page(struct bio *, struct page *, unsigned int,unsigned int);
  13.252 +extern int bio_get_nr_vecs(struct block_device *);
  13.253 +extern struct bio *bio_map_user(struct request_queue *, struct block_device *,
  13.254 +				unsigned long, unsigned int, int);
  13.255 +extern void bio_unmap_user(struct bio *, int);
  13.256 +extern void bio_set_pages_dirty(struct bio *bio);
  13.257 +extern void bio_check_pages_dirty(struct bio *bio);
  13.258 +
  13.259 +#ifdef CONFIG_HIGHMEM
  13.260 +/*
  13.261 + * remember to add offset! and never ever reenable interrupts between a
  13.262 + * bvec_kmap_irq and bvec_kunmap_irq!!
  13.263 + *
  13.264 + * This function MUST be inlined - it plays with the CPU interrupt flags.
  13.265 + * Hence the `extern inline'.
  13.266 + */
  13.267 +extern inline char *bvec_kmap_irq(struct bio_vec *bvec, unsigned long *flags)
  13.268 +{
  13.269 +	unsigned long addr;
  13.270 +
  13.271 +	/*
  13.272 +	 * might not be a highmem page, but the preempt/irq count
  13.273 +	 * balancing is a lot nicer this way
  13.274 +	 */
  13.275 +	local_irq_save(*flags);
  13.276 +	addr = (unsigned long) kmap_atomic(bvec->bv_page, KM_BIO_SRC_IRQ);
  13.277 +
  13.278 +	BUG_ON(addr & ~PAGE_MASK);
  13.279 +
  13.280 +	return (char *) addr + bvec->bv_offset;
  13.281 +}
  13.282 +
  13.283 +extern inline void bvec_kunmap_irq(char *buffer, unsigned long *flags)
  13.284 +{
  13.285 +	unsigned long ptr = (unsigned long) buffer & PAGE_MASK;
  13.286 +
  13.287 +	kunmap_atomic((void *) ptr, KM_BIO_SRC_IRQ);
  13.288 +	local_irq_restore(*flags);
  13.289 +}
  13.290 +
  13.291 +#else
  13.292 +#define bvec_kmap_irq(bvec, flags)	(page_address((bvec)->bv_page) + (bvec)->bv_offset)
  13.293 +#define bvec_kunmap_irq(buf, flags)	do { *(flags) = 0; } while (0)
  13.294 +#endif
  13.295 +
  13.296 +extern inline char *__bio_kmap_irq(struct bio *bio, unsigned short idx,
  13.297 +				   unsigned long *flags)
  13.298 +{
  13.299 +	return bvec_kmap_irq(bio_iovec_idx(bio, idx), flags);
  13.300 +}
  13.301 +#define __bio_kunmap_irq(buf, flags)	bvec_kunmap_irq(buf, flags)
  13.302 +
  13.303 +#define bio_kmap_irq(bio, flags) \
  13.304 +	__bio_kmap_irq((bio), (bio)->bi_idx, (flags))
  13.305 +#define bio_kunmap_irq(buf,flags)	__bio_kunmap_irq(buf, flags)
  13.306 +
  13.307 +#endif /* __LINUX_BIO_H */
    14.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    14.2 +++ b/linux-2.6.7-xen-sparse/include/linux/page-flags.h	Thu Aug 19 18:51:53 2004 +0000
    14.3 @@ -0,0 +1,343 @@
    14.4 +/*
    14.5 + * Macros for manipulating and testing page->flags
    14.6 + */
    14.7 +
    14.8 +#ifndef PAGE_FLAGS_H
    14.9 +#define PAGE_FLAGS_H
   14.10 +
   14.11 +#include <linux/percpu.h>
   14.12 +#include <linux/cache.h>
   14.13 +#include <asm/pgtable.h>
   14.14 +
   14.15 +/*
   14.16 + * Various page->flags bits:
   14.17 + *
   14.18 + * PG_reserved is set for special pages, which can never be swapped out. Some
   14.19 + * of them might not even exist (eg empty_bad_page)...
   14.20 + *
   14.21 + * The PG_private bitflag is set if page->private contains a valid value.
   14.22 + *
   14.23 + * During disk I/O, PG_locked is used. This bit is set before I/O and
   14.24 + * reset when I/O completes. page_waitqueue(page) is a wait queue of all tasks
   14.25 + * waiting for the I/O on this page to complete.
   14.26 + *
   14.27 + * PG_uptodate tells whether the page's contents is valid.  When a read
   14.28 + * completes, the page becomes uptodate, unless a disk I/O error happened.
   14.29 + *
   14.30 + * For choosing which pages to swap out, inode pages carry a PG_referenced bit,
   14.31 + * which is set any time the system accesses that page through the (mapping,
   14.32 + * index) hash table.  This referenced bit, together with the referenced bit
   14.33 + * in the page tables, is used to manipulate page->age and move the page across
   14.34 + * the active, inactive_dirty and inactive_clean lists.
   14.35 + *
   14.36 + * Note that the referenced bit, the page->lru list_head and the active,
   14.37 + * inactive_dirty and inactive_clean lists are protected by the
   14.38 + * zone->lru_lock, and *NOT* by the usual PG_locked bit!
   14.39 + *
   14.40 + * PG_error is set to indicate that an I/O error occurred on this page.
   14.41 + *
   14.42 + * PG_arch_1 is an architecture specific page state bit.  The generic code
   14.43 + * guarantees that this bit is cleared for a page when it first is entered into
   14.44 + * the page cache.
   14.45 + *
   14.46 + * PG_highmem pages are not permanently mapped into the kernel virtual address
   14.47 + * space, they need to be kmapped separately for doing IO on the pages.  The
   14.48 + * struct page (these bits with information) are always mapped into kernel
   14.49 + * address space...
   14.50 + */
   14.51 +
   14.52 +/*
   14.53 + * Don't use the *_dontuse flags.  Use the macros.  Otherwise you'll break
   14.54 + * locked- and dirty-page accounting.  The top eight bits of page->flags are
   14.55 + * used for page->zone, so putting flag bits there doesn't work.
   14.56 + */
   14.57 +#define PG_locked	 	 0	/* Page is locked. Don't touch. */
   14.58 +#define PG_error		 1
   14.59 +#define PG_referenced		 2
   14.60 +#define PG_uptodate		 3
   14.61 +
   14.62 +#define PG_dirty	 	 4
   14.63 +#define PG_lru			 5
   14.64 +#define PG_active		 6
   14.65 +#define PG_slab			 7	/* slab debug (Suparna wants this) */
   14.66 +
   14.67 +#define PG_highmem		 8
   14.68 +#define PG_checked		 9	/* kill me in 2.5.<early>. */
   14.69 +#define PG_arch_1		10
   14.70 +#define PG_reserved		11
   14.71 +
   14.72 +#define PG_private		12	/* Has something at ->private */
   14.73 +#define PG_writeback		13	/* Page is under writeback */
   14.74 +#define PG_nosave		14	/* Used for system suspend/resume */
   14.75 +#define PG_maplock		15	/* Lock bit for rmap to ptes */
   14.76 +
   14.77 +#define PG_swapcache		16	/* Swap page: swp_entry_t in private */
   14.78 +#define PG_mappedtodisk		17	/* Has blocks allocated on-disk */
   14.79 +#define PG_reclaim		18	/* To be reclaimed asap */
   14.80 +#define PG_compound		19	/* Part of a compound page */
   14.81 +
   14.82 +#define PG_anon			20	/* Anonymous: anon_vma in mapping */
   14.83 +#define PG_foreign		21	/* Page belongs to foreign allocator */
   14.84 +
   14.85 +
   14.86 +/*
   14.87 + * Global page accounting.  One instance per CPU.  Only unsigned longs are
   14.88 + * allowed.
   14.89 + */
   14.90 +struct page_state {
   14.91 +	unsigned long nr_dirty;		/* Dirty writeable pages */
   14.92 +	unsigned long nr_writeback;	/* Pages under writeback */
   14.93 +	unsigned long nr_unstable;	/* NFS unstable pages */
   14.94 +	unsigned long nr_page_table_pages;/* Pages used for pagetables */
   14.95 +	unsigned long nr_mapped;	/* mapped into pagetables */
   14.96 +	unsigned long nr_slab;		/* In slab */
   14.97 +#define GET_PAGE_STATE_LAST nr_slab
   14.98 +
   14.99 +	/*
  14.100 +	 * The below are zeroed by get_page_state().  Use get_full_page_state()
  14.101 +	 * to add up all these.
  14.102 +	 */
  14.103 +	unsigned long pgpgin;		/* Disk reads */
  14.104 +	unsigned long pgpgout;		/* Disk writes */
  14.105 +	unsigned long pswpin;		/* swap reads */
  14.106 +	unsigned long pswpout;		/* swap writes */
  14.107 +	unsigned long pgalloc_high;	/* page allocations */
  14.108 +
  14.109 +	unsigned long pgalloc_normal;
  14.110 +	unsigned long pgalloc_dma;
  14.111 +	unsigned long pgfree;		/* page freeings */
  14.112 +	unsigned long pgactivate;	/* pages moved inactive->active */
  14.113 +	unsigned long pgdeactivate;	/* pages moved active->inactive */
  14.114 +
  14.115 +	unsigned long pgfault;		/* faults (major+minor) */
  14.116 +	unsigned long pgmajfault;	/* faults (major only) */
  14.117 +	unsigned long pgrefill_high;	/* inspected in refill_inactive_zone */
  14.118 +	unsigned long pgrefill_normal;
  14.119 +	unsigned long pgrefill_dma;
  14.120 +
  14.121 +	unsigned long pgsteal_high;	/* total highmem pages reclaimed */
  14.122 +	unsigned long pgsteal_normal;
  14.123 +	unsigned long pgsteal_dma;
  14.124 +	unsigned long pgscan_kswapd_high;/* total highmem pages scanned */
  14.125 +	unsigned long pgscan_kswapd_normal;
  14.126 +
  14.127 +	unsigned long pgscan_kswapd_dma;
  14.128 +	unsigned long pgscan_direct_high;/* total highmem pages scanned */
  14.129 +	unsigned long pgscan_direct_normal;
  14.130 +	unsigned long pgscan_direct_dma;
  14.131 +	unsigned long pginodesteal;	/* pages reclaimed via inode freeing */
  14.132 +
  14.133 +	unsigned long slabs_scanned;	/* slab objects scanned */
  14.134 +	unsigned long kswapd_steal;	/* pages reclaimed by kswapd */
  14.135 +	unsigned long kswapd_inodesteal;/* reclaimed via kswapd inode freeing */
  14.136 +	unsigned long pageoutrun;	/* kswapd's calls to page reclaim */
  14.137 +	unsigned long allocstall;	/* direct reclaim calls */
  14.138 +
  14.139 +	unsigned long pgrotated;	/* pages rotated to tail of the LRU */
  14.140 +};
  14.141 +
  14.142 +DECLARE_PER_CPU(struct page_state, page_states);
  14.143 +
  14.144 +extern void get_page_state(struct page_state *ret);
  14.145 +extern void get_full_page_state(struct page_state *ret);
  14.146 +extern unsigned long __read_page_state(unsigned offset);
  14.147 +
  14.148 +#define read_page_state(member) \
  14.149 +	__read_page_state(offsetof(struct page_state, member))
  14.150 +
  14.151 +#define mod_page_state(member, delta)					\
  14.152 +	do {								\
  14.153 +		unsigned long flags;					\
  14.154 +		local_irq_save(flags);					\
  14.155 +		__get_cpu_var(page_states).member += (delta);		\
  14.156 +		local_irq_restore(flags);				\
  14.157 +	} while (0)
  14.158 +
  14.159 +
  14.160 +#define inc_page_state(member)	mod_page_state(member, 1UL)
  14.161 +#define dec_page_state(member)	mod_page_state(member, 0UL - 1)
  14.162 +#define add_page_state(member,delta) mod_page_state(member, (delta))
  14.163 +#define sub_page_state(member,delta) mod_page_state(member, 0UL - (delta))
  14.164 +
  14.165 +#define mod_page_state_zone(zone, member, delta)			\
  14.166 +	do {								\
  14.167 +		unsigned long flags;					\
  14.168 +		local_irq_save(flags);					\
  14.169 +		if (is_highmem(zone))					\
  14.170 +			__get_cpu_var(page_states).member##_high += (delta);\
  14.171 +		else if (is_normal(zone))				\
  14.172 +			__get_cpu_var(page_states).member##_normal += (delta);\
  14.173 +		else							\
  14.174 +			__get_cpu_var(page_states).member##_dma += (delta);\
  14.175 +		local_irq_restore(flags);				\
  14.176 +	} while (0)
  14.177 +
  14.178 +/*
  14.179 + * Manipulation of page state flags
  14.180 + */
  14.181 +#define PageLocked(page)		\
  14.182 +		test_bit(PG_locked, &(page)->flags)
  14.183 +#define SetPageLocked(page)		\
  14.184 +		set_bit(PG_locked, &(page)->flags)
  14.185 +#define TestSetPageLocked(page)		\
  14.186 +		test_and_set_bit(PG_locked, &(page)->flags)
  14.187 +#define ClearPageLocked(page)		\
  14.188 +		clear_bit(PG_locked, &(page)->flags)
  14.189 +#define TestClearPageLocked(page)	\
  14.190 +		test_and_clear_bit(PG_locked, &(page)->flags)
  14.191 +
  14.192 +#define PageError(page)		test_bit(PG_error, &(page)->flags)
  14.193 +#define SetPageError(page)	set_bit(PG_error, &(page)->flags)
  14.194 +#define ClearPageError(page)	clear_bit(PG_error, &(page)->flags)
  14.195 +
  14.196 +#define PageReferenced(page)	test_bit(PG_referenced, &(page)->flags)
  14.197 +#define SetPageReferenced(page)	set_bit(PG_referenced, &(page)->flags)
  14.198 +#define ClearPageReferenced(page)	clear_bit(PG_referenced, &(page)->flags)
  14.199 +#define TestClearPageReferenced(page) test_and_clear_bit(PG_referenced, &(page)->flags)
  14.200 +
  14.201 +#ifndef arch_set_page_uptodate
  14.202 +#define arch_set_page_uptodate(page) do { } while (0)
  14.203 +#endif
  14.204 +
  14.205 +#define PageUptodate(page)	test_bit(PG_uptodate, &(page)->flags)
  14.206 +#define SetPageUptodate(page) \
  14.207 +	do {								\
  14.208 +		arch_set_page_uptodate(page);				\
  14.209 +		set_bit(PG_uptodate, &(page)->flags);			\
  14.210 +	} while (0)
  14.211 +#define ClearPageUptodate(page)	clear_bit(PG_uptodate, &(page)->flags)
  14.212 +
  14.213 +#define PageDirty(page)		test_bit(PG_dirty, &(page)->flags)
  14.214 +#define SetPageDirty(page)	set_bit(PG_dirty, &(page)->flags)
  14.215 +#define TestSetPageDirty(page)	test_and_set_bit(PG_dirty, &(page)->flags)
  14.216 +#define ClearPageDirty(page)	clear_bit(PG_dirty, &(page)->flags)
  14.217 +#define TestClearPageDirty(page) test_and_clear_bit(PG_dirty, &(page)->flags)
  14.218 +
  14.219 +#define SetPageLRU(page)	set_bit(PG_lru, &(page)->flags)
  14.220 +#define PageLRU(page)		test_bit(PG_lru, &(page)->flags)
  14.221 +#define TestSetPageLRU(page)	test_and_set_bit(PG_lru, &(page)->flags)
  14.222 +#define TestClearPageLRU(page)	test_and_clear_bit(PG_lru, &(page)->flags)
  14.223 +
  14.224 +#define PageActive(page)	test_bit(PG_active, &(page)->flags)
  14.225 +#define SetPageActive(page)	set_bit(PG_active, &(page)->flags)
  14.226 +#define ClearPageActive(page)	clear_bit(PG_active, &(page)->flags)
  14.227 +#define TestClearPageActive(page) test_and_clear_bit(PG_active, &(page)->flags)
  14.228 +#define TestSetPageActive(page) test_and_set_bit(PG_active, &(page)->flags)
  14.229 +
  14.230 +#define PageSlab(page)		test_bit(PG_slab, &(page)->flags)
  14.231 +#define SetPageSlab(page)	set_bit(PG_slab, &(page)->flags)
  14.232 +#define ClearPageSlab(page)	clear_bit(PG_slab, &(page)->flags)
  14.233 +#define TestClearPageSlab(page)	test_and_clear_bit(PG_slab, &(page)->flags)
  14.234 +#define TestSetPageSlab(page)	test_and_set_bit(PG_slab, &(page)->flags)
  14.235 +
  14.236 +#ifdef CONFIG_HIGHMEM
  14.237 +#define PageHighMem(page)	test_bit(PG_highmem, &(page)->flags)
  14.238 +#else
  14.239 +#define PageHighMem(page)	0 /* needed to optimize away at compile time */
  14.240 +#endif
  14.241 +
  14.242 +#define PageChecked(page)	test_bit(PG_checked, &(page)->flags)
  14.243 +#define SetPageChecked(page)	set_bit(PG_checked, &(page)->flags)
  14.244 +#define ClearPageChecked(page)	clear_bit(PG_checked, &(page)->flags)
  14.245 +
  14.246 +#define PageReserved(page)	test_bit(PG_reserved, &(page)->flags)
  14.247 +#define SetPageReserved(page)	set_bit(PG_reserved, &(page)->flags)
  14.248 +#define ClearPageReserved(page)	clear_bit(PG_reserved, &(page)->flags)
  14.249 +
  14.250 +#define SetPagePrivate(page)	set_bit(PG_private, &(page)->flags)
  14.251 +#define ClearPagePrivate(page)	clear_bit(PG_private, &(page)->flags)
  14.252 +#define PagePrivate(page)	test_bit(PG_private, &(page)->flags)
  14.253 +
  14.254 +#define PageWriteback(page)	test_bit(PG_writeback, &(page)->flags)
  14.255 +#define SetPageWriteback(page)						\
  14.256 +	do {								\
  14.257 +		if (!test_and_set_bit(PG_writeback,			\
  14.258 +				&(page)->flags))			\
  14.259 +			inc_page_state(nr_writeback);			\
  14.260 +	} while (0)
  14.261 +#define TestSetPageWriteback(page)					\
  14.262 +	({								\
  14.263 +		int ret;						\
  14.264 +		ret = test_and_set_bit(PG_writeback,			\
  14.265 +					&(page)->flags);		\
  14.266 +		if (!ret)						\
  14.267 +			inc_page_state(nr_writeback);			\
  14.268 +		ret;							\
  14.269 +	})
  14.270 +#define ClearPageWriteback(page)					\
  14.271 +	do {								\
  14.272 +		if (test_and_clear_bit(PG_writeback,			\
  14.273 +				&(page)->flags))			\
  14.274 +			dec_page_state(nr_writeback);			\
  14.275 +	} while (0)
  14.276 +#define TestClearPageWriteback(page)					\
  14.277 +	({								\
  14.278 +		int ret;						\
  14.279 +		ret = test_and_clear_bit(PG_writeback,			\
  14.280 +				&(page)->flags);			\
  14.281 +		if (ret)						\
  14.282 +			dec_page_state(nr_writeback);			\
  14.283 +		ret;							\
  14.284 +	})
  14.285 +
  14.286 +#define PageNosave(page)	test_bit(PG_nosave, &(page)->flags)
  14.287 +#define SetPageNosave(page)	set_bit(PG_nosave, &(page)->flags)
  14.288 +#define TestSetPageNosave(page)	test_and_set_bit(PG_nosave, &(page)->flags)
  14.289 +#define ClearPageNosave(page)		clear_bit(PG_nosave, &(page)->flags)
  14.290 +#define TestClearPageNosave(page)	test_and_clear_bit(PG_nosave, &(page)->flags)
  14.291 +
  14.292 +#define PageMappedToDisk(page)	test_bit(PG_mappedtodisk, &(page)->flags)
  14.293 +#define SetPageMappedToDisk(page) set_bit(PG_mappedtodisk, &(page)->flags)
  14.294 +#define ClearPageMappedToDisk(page) clear_bit(PG_mappedtodisk, &(page)->flags)
  14.295 +
  14.296 +#define PageReclaim(page)	test_bit(PG_reclaim, &(page)->flags)
  14.297 +#define SetPageReclaim(page)	set_bit(PG_reclaim, &(page)->flags)
  14.298 +#define ClearPageReclaim(page)	clear_bit(PG_reclaim, &(page)->flags)
  14.299 +#define TestClearPageReclaim(page) test_and_clear_bit(PG_reclaim, &(page)->flags)
  14.300 +
  14.301 +#define PageCompound(page)	test_bit(PG_compound, &(page)->flags)
  14.302 +#define SetPageCompound(page)	set_bit(PG_compound, &(page)->flags)
  14.303 +#define ClearPageCompound(page)	clear_bit(PG_compound, &(page)->flags)
  14.304 +
  14.305 +#define PageAnon(page)		test_bit(PG_anon, &(page)->flags)
  14.306 +#define SetPageAnon(page)	set_bit(PG_anon, &(page)->flags)
  14.307 +#define ClearPageAnon(page)	clear_bit(PG_anon, &(page)->flags)
  14.308 +
  14.309 +/* A foreign page uses a custom destructor rather than the buddy allocator. */
  14.310 +#ifdef CONFIG_FOREIGN_PAGES
  14.311 +#define PageForeign(page)	test_bit(PG_foreign, &(page)->flags)
  14.312 +#define SetPageForeign(page)	set_bit(PG_foreign, &(page)->flags)
  14.313 +#define ClearPageForeign(page)	clear_bit(PG_foreign, &(page)->flags)
  14.314 +#define PageForeignDestructor(page)	\
  14.315 +	( (void (*) (struct page *)) (page)->mapping )
  14.316 +#else
  14.317 +#define PageForeign(page)	0
  14.318 +#define PageForeignDestructor(page)	void
  14.319 +#endif
  14.320 +
  14.321 +#ifdef CONFIG_SWAP
  14.322 +#define PageSwapCache(page)	test_bit(PG_swapcache, &(page)->flags)
  14.323 +#define SetPageSwapCache(page)	set_bit(PG_swapcache, &(page)->flags)
  14.324 +#define ClearPageSwapCache(page) clear_bit(PG_swapcache, &(page)->flags)
  14.325 +#else
  14.326 +#define PageSwapCache(page)	0
  14.327 +#endif
  14.328 +
  14.329 +struct page;	/* forward declaration */
  14.330 +
  14.331 +int test_clear_page_dirty(struct page *page);
  14.332 +int __clear_page_dirty(struct page *page);
  14.333 +int test_clear_page_writeback(struct page *page);
  14.334 +int test_set_page_writeback(struct page *page);
  14.335 +
  14.336 +static inline void clear_page_dirty(struct page *page)
  14.337 +{
  14.338 +	test_clear_page_dirty(page);
  14.339 +}
  14.340 +
  14.341 +static inline void set_page_writeback(struct page *page)
  14.342 +{
  14.343 +	test_set_page_writeback(page);
  14.344 +}
  14.345 +
  14.346 +#endif	/* PAGE_FLAGS_H */
    15.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    15.2 +++ b/linux-2.6.7-xen-sparse/include/linux/skbuff.h	Thu Aug 19 18:51:53 2004 +0000
    15.3 @@ -0,0 +1,1073 @@
    15.4 +/*
    15.5 + *	Definitions for the 'struct sk_buff' memory handlers.
    15.6 + *
    15.7 + *	Authors:
    15.8 + *		Alan Cox, <gw4pts@gw4pts.ampr.org>
    15.9 + *		Florian La Roche, <rzsfl@rz.uni-sb.de>
   15.10 + *
   15.11 + *	This program is free software; you can redistribute it and/or
   15.12 + *	modify it under the terms of the GNU General Public License
   15.13 + *	as published by the Free Software Foundation; either version
   15.14 + *	2 of the License, or (at your option) any later version.
   15.15 + */
   15.16 +
   15.17 +#ifndef _LINUX_SKBUFF_H
   15.18 +#define _LINUX_SKBUFF_H
   15.19 +
   15.20 +#include <linux/config.h>
   15.21 +#include <linux/kernel.h>
   15.22 +#include <linux/compiler.h>
   15.23 +#include <linux/time.h>
   15.24 +#include <linux/cache.h>
   15.25 +
   15.26 +#include <asm/atomic.h>
   15.27 +#include <asm/types.h>
   15.28 +#include <linux/spinlock.h>
   15.29 +#include <linux/mm.h>
   15.30 +#include <linux/highmem.h>
   15.31 +#include <linux/poll.h>
   15.32 +#include <linux/net.h>
   15.33 +
   15.34 +#define HAVE_ALLOC_SKB		/* For the drivers to know */
   15.35 +#define HAVE_ALIGNABLE_SKB	/* Ditto 8)		   */
   15.36 +#define SLAB_SKB 		/* Slabified skbuffs 	   */
   15.37 +
   15.38 +#define CHECKSUM_NONE 0
   15.39 +#define CHECKSUM_HW 1
   15.40 +#define CHECKSUM_UNNECESSARY 2
   15.41 +
   15.42 +#define SKB_DATA_ALIGN(X)	(((X) + (SMP_CACHE_BYTES - 1)) & \
   15.43 +				 ~(SMP_CACHE_BYTES - 1))
   15.44 +#define SKB_MAX_ORDER(X, ORDER)	(((PAGE_SIZE << (ORDER)) - (X) - \
   15.45 +				  sizeof(struct skb_shared_info)) & \
   15.46 +				  ~(SMP_CACHE_BYTES - 1))
   15.47 +#define SKB_MAX_HEAD(X)		(SKB_MAX_ORDER((X), 0))
   15.48 +#define SKB_MAX_ALLOC		(SKB_MAX_ORDER(0, 2))
   15.49 +
   15.50 +/* A. Checksumming of received packets by device.
   15.51 + *
   15.52 + *	NONE: device failed to checksum this packet.
   15.53 + *		skb->csum is undefined.
   15.54 + *
   15.55 + *	UNNECESSARY: device parsed packet and wouldbe verified checksum.
   15.56 + *		skb->csum is undefined.
   15.57 + *	      It is bad option, but, unfortunately, many of vendors do this.
   15.58 + *	      Apparently with secret goal to sell you new device, when you
   15.59 + *	      will add new protocol to your host. F.e. IPv6. 8)
   15.60 + *
   15.61 + *	HW: the most generic way. Device supplied checksum of _all_
   15.62 + *	    the packet as seen by netif_rx in skb->csum.
   15.63 + *	    NOTE: Even if device supports only some protocols, but
   15.64 + *	    is able to produce some skb->csum, it MUST use HW,
   15.65 + *	    not UNNECESSARY.
   15.66 + *
   15.67 + * B. Checksumming on output.
   15.68 + *
   15.69 + *	NONE: skb is checksummed by protocol or csum is not required.
   15.70 + *
   15.71 + *	HW: device is required to csum packet as seen by hard_start_xmit
   15.72 + *	from skb->h.raw to the end and to record the checksum
   15.73 + *	at skb->h.raw+skb->csum.
   15.74 + *
   15.75 + *	Device must show its capabilities in dev->features, set
   15.76 + *	at device setup time.
   15.77 + *	NETIF_F_HW_CSUM	- it is clever device, it is able to checksum
   15.78 + *			  everything.
   15.79 + *	NETIF_F_NO_CSUM - loopback or reliable single hop media.
   15.80 + *	NETIF_F_IP_CSUM - device is dumb. It is able to csum only
   15.81 + *			  TCP/UDP over IPv4. Sigh. Vendors like this
   15.82 + *			  way by an unknown reason. Though, see comment above
   15.83 + *			  about CHECKSUM_UNNECESSARY. 8)
   15.84 + *
   15.85 + *	Any questions? No questions, good. 		--ANK
   15.86 + */
   15.87 +
   15.88 +#ifdef __i386__
   15.89 +#define NET_CALLER(arg) (*(((void **)&arg) - 1))
   15.90 +#else
   15.91 +#define NET_CALLER(arg) __builtin_return_address(0)
   15.92 +#endif
   15.93 +
   15.94 +#ifdef CONFIG_NETFILTER
   15.95 +struct nf_conntrack {
   15.96 +	atomic_t use;
   15.97 +	void (*destroy)(struct nf_conntrack *);
   15.98 +};
   15.99 +
  15.100 +struct nf_ct_info {
  15.101 +	struct nf_conntrack *master;
  15.102 +};
  15.103 +
  15.104 +#ifdef CONFIG_BRIDGE_NETFILTER
  15.105 +struct nf_bridge_info {
  15.106 +	atomic_t use;
  15.107 +	struct net_device *physindev;
  15.108 +	struct net_device *physoutdev;
  15.109 +#if defined(CONFIG_VLAN_8021Q) || defined(CONFIG_VLAN_8021Q_MODULE)
  15.110 +	struct net_device *netoutdev;
  15.111 +#endif
  15.112 +	unsigned int mask;
  15.113 +	unsigned long data[32 / sizeof(unsigned long)];
  15.114 +};
  15.115 +#endif
  15.116 +
  15.117 +#endif
  15.118 +
  15.119 +struct sk_buff_head {
  15.120 +	/* These two members must be first. */
  15.121 +	struct sk_buff	*next;
  15.122 +	struct sk_buff	*prev;
  15.123 +
  15.124 +	__u32		qlen;
  15.125 +	spinlock_t	lock;
  15.126 +};
  15.127 +
  15.128 +struct sk_buff;
  15.129 +
  15.130 +/* To allow 64K frame to be packed as single skb without frag_list */
  15.131 +#define MAX_SKB_FRAGS (65536/PAGE_SIZE + 2)
  15.132 +
  15.133 +typedef struct skb_frag_struct skb_frag_t;
  15.134 +
  15.135 +struct skb_frag_struct {
  15.136 +	struct page *page;
  15.137 +	__u16 page_offset;
  15.138 +	__u16 size;
  15.139 +};
  15.140 +
  15.141 +/* This data is invariant across clones and lives at
  15.142 + * the end of the header data, ie. at skb->end.
  15.143 + */
  15.144 +struct skb_shared_info {
  15.145 +	atomic_t	dataref;
  15.146 +	unsigned int	nr_frags;
  15.147 +	unsigned short	tso_size;
  15.148 +	unsigned short	tso_segs;
  15.149 +	struct sk_buff	*frag_list;
  15.150 +	skb_frag_t	frags[MAX_SKB_FRAGS];
  15.151 +};
  15.152 +
  15.153 +/** 
  15.154 + *	struct sk_buff - socket buffer
  15.155 + *	@next: Next buffer in list
  15.156 + *	@prev: Previous buffer in list
  15.157 + *	@list: List we are on
  15.158 + *	@sk: Socket we are owned by
  15.159 + *	@stamp: Time we arrived
  15.160 + *	@dev: Device we arrived on/are leaving by
  15.161 + *      @real_dev: The real device we are using
  15.162 + *	@h: Transport layer header
  15.163 + *	@nh: Network layer header
  15.164 + *	@mac: Link layer header
  15.165 + *	@dst: FIXME: Describe this field
  15.166 + *	@cb: Control buffer. Free for use by every layer. Put private vars here
  15.167 + *	@len: Length of actual data
  15.168 + *	@data_len: Data length
  15.169 + *	@mac_len: Length of link layer header
  15.170 + *	@csum: Checksum
  15.171 + *	@__unused: Dead field, may be reused
  15.172 + *	@cloned: Head may be cloned (check refcnt to be sure)
  15.173 + *	@pkt_type: Packet class
  15.174 + *	@ip_summed: Driver fed us an IP checksum
  15.175 + *	@priority: Packet queueing priority
  15.176 + *	@users: User count - see {datagram,tcp}.c
  15.177 + *	@protocol: Packet protocol from driver
  15.178 + *	@security: Security level of packet
  15.179 + *	@truesize: Buffer size 
  15.180 + *	@head: Head of buffer
  15.181 + *	@data: Data head pointer
  15.182 + *	@tail: Tail pointer
  15.183 + *	@end: End pointer
  15.184 + *	@destructor: Destruct function
  15.185 + *	@nfmark: Can be used for communication between hooks
  15.186 + *	@nfcache: Cache info
  15.187 + *	@nfct: Associated connection, if any
  15.188 + *	@nf_debug: Netfilter debugging
  15.189 + *	@nf_bridge: Saved data about a bridged frame - see br_netfilter.c
  15.190 + *      @private: Data which is private to the HIPPI implementation
  15.191 + *	@tc_index: Traffic control index
  15.192 + */
  15.193 +
  15.194 +struct sk_buff {
  15.195 +	/* These two members must be first. */
  15.196 +	struct sk_buff		*next;
  15.197 +	struct sk_buff		*prev;
  15.198 +
  15.199 +	struct sk_buff_head	*list;
  15.200 +	struct sock		*sk;
  15.201 +	struct timeval		stamp;
  15.202 +	struct net_device	*dev;
  15.203 +	struct net_device	*real_dev;
  15.204 +
  15.205 +	union {
  15.206 +		struct tcphdr	*th;
  15.207 +		struct udphdr	*uh;
  15.208 +		struct icmphdr	*icmph;
  15.209 +		struct igmphdr	*igmph;
  15.210 +		struct iphdr	*ipiph;
  15.211 +		struct ipv6hdr	*ipv6h;
  15.212 +		unsigned char	*raw;
  15.213 +	} h;
  15.214 +
  15.215 +	union {
  15.216 +		struct iphdr	*iph;
  15.217 +		struct ipv6hdr	*ipv6h;
  15.218 +		struct arphdr	*arph;
  15.219 +		unsigned char	*raw;
  15.220 +	} nh;
  15.221 +
  15.222 +	union {
  15.223 +	  	struct ethhdr	*ethernet;
  15.224 +	  	unsigned char 	*raw;
  15.225 +	} mac;
  15.226 +
  15.227 +	struct  dst_entry	*dst;
  15.228 +	struct	sec_path	*sp;
  15.229 +
  15.230 +	/*
  15.231 +	 * This is the control buffer. It is free to use for every
  15.232 +	 * layer. Please put your private variables there. If you
  15.233 +	 * want to keep them across layers you have to do a skb_clone()
  15.234 +	 * first. This is owned by whoever has the skb queued ATM.
  15.235 +	 */
  15.236 +	char			cb[48];
  15.237 +
  15.238 +	unsigned int		len,
  15.239 +				data_len,
  15.240 +				mac_len,
  15.241 +				csum;
  15.242 +	unsigned char		local_df,
  15.243 +				cloned,
  15.244 +				pkt_type,
  15.245 +				ip_summed;
  15.246 +	__u32			priority;
  15.247 +	unsigned short		protocol,
  15.248 +				security;
  15.249 +
  15.250 +	void			(*destructor)(struct sk_buff *skb);
  15.251 +#ifdef CONFIG_NETFILTER
  15.252 +        unsigned long		nfmark;
  15.253 +	__u32			nfcache;
  15.254 +	struct nf_ct_info	*nfct;
  15.255 +#ifdef CONFIG_NETFILTER_DEBUG
  15.256 +        unsigned int		nf_debug;
  15.257 +#endif
  15.258 +#ifdef CONFIG_BRIDGE_NETFILTER
  15.259 +	struct nf_bridge_info	*nf_bridge;
  15.260 +#endif
  15.261 +#endif /* CONFIG_NETFILTER */
  15.262 +#if defined(CONFIG_HIPPI)
  15.263 +	union {
  15.264 +		__u32		ifield;
  15.265 +	} private;
  15.266 +#endif
  15.267 +#ifdef CONFIG_NET_SCHED
  15.268 +       __u32			tc_index;               /* traffic control index */
  15.269 +#endif
  15.270 +
  15.271 +	/* These elements must be at the end, see alloc_skb() for details.  */
  15.272 +	unsigned int		truesize;
  15.273 +	atomic_t		users;
  15.274 +	unsigned char		*head,
  15.275 +				*data,
  15.276 +				*tail,
  15.277 +				*end;
  15.278 +};
  15.279 +
  15.280 +#ifdef __KERNEL__
  15.281 +/*
  15.282 + *	Handling routines are only of interest to the kernel
  15.283 + */
  15.284 +#include <linux/slab.h>
  15.285 +
  15.286 +#include <asm/system.h>
  15.287 +
  15.288 +extern void	       __kfree_skb(struct sk_buff *skb);
  15.289 +extern struct sk_buff *alloc_skb(unsigned int size, int priority);
  15.290 +extern void	       kfree_skbmem(struct sk_buff *skb);
  15.291 +extern struct sk_buff *skb_clone(struct sk_buff *skb, int priority);
  15.292 +extern struct sk_buff *skb_copy(const struct sk_buff *skb, int priority);
  15.293 +extern struct sk_buff *pskb_copy(struct sk_buff *skb, int gfp_mask);
  15.294 +extern int	       pskb_expand_head(struct sk_buff *skb,
  15.295 +					int nhead, int ntail, int gfp_mask);
  15.296 +extern struct sk_buff *skb_realloc_headroom(struct sk_buff *skb,
  15.297 +					    unsigned int headroom);
  15.298 +extern struct sk_buff *skb_copy_expand(const struct sk_buff *skb,
  15.299 +				       int newheadroom, int newtailroom,
  15.300 +				       int priority);
  15.301 +extern struct sk_buff *		skb_pad(struct sk_buff *skb, int pad);
  15.302 +#define dev_kfree_skb(a)	kfree_skb(a)
  15.303 +extern void	      skb_over_panic(struct sk_buff *skb, int len,
  15.304 +				     void *here);
  15.305 +extern void	      skb_under_panic(struct sk_buff *skb, int len,
  15.306 +				      void *here);
  15.307 +
  15.308 +/* Internal */
  15.309 +#define skb_shinfo(SKB)		((struct skb_shared_info *)((SKB)->end))
  15.310 +
  15.311 +/**
  15.312 + *	skb_queue_empty - check if a queue is empty
  15.313 + *	@list: queue head
  15.314 + *
  15.315 + *	Returns true if the queue is empty, false otherwise.
  15.316 + */
  15.317 +static inline int skb_queue_empty(const struct sk_buff_head *list)
  15.318 +{
  15.319 +	return list->next == (struct sk_buff *)list;
  15.320 +}
  15.321 +
  15.322 +/**
  15.323 + *	skb_get - reference buffer
  15.324 + *	@skb: buffer to reference
  15.325 + *
  15.326 + *	Makes another reference to a socket buffer and returns a pointer
  15.327 + *	to the buffer.
  15.328 + */
  15.329 +static inline struct sk_buff *skb_get(struct sk_buff *skb)
  15.330 +{
  15.331 +	atomic_inc(&skb->users);
  15.332 +	return skb;
  15.333 +}
  15.334 +
  15.335 +/*
  15.336 + * If users == 1, we are the only owner and are can avoid redundant
  15.337 + * atomic change.
  15.338 + */
  15.339 +
  15.340 +/**
  15.341 + *	kfree_skb - free an sk_buff
  15.342 + *	@skb: buffer to free
  15.343 + *
  15.344 + *	Drop a reference to the buffer and free it if the usage count has
  15.345 + *	hit zero.
  15.346 + */
  15.347 +static inline void kfree_skb(struct sk_buff *skb)
  15.348 +{
  15.349 +	if (atomic_read(&skb->users) == 1 || atomic_dec_and_test(&skb->users))
  15.350 +		__kfree_skb(skb);
  15.351 +}
  15.352 +
  15.353 +/* Use this if you didn't touch the skb state [for fast switching] */
  15.354 +static inline void kfree_skb_fast(struct sk_buff *skb)
  15.355 +{
  15.356 +	if (atomic_read(&skb->users) == 1 || atomic_dec_and_test(&skb->users))
  15.357 +		kfree_skbmem(skb);
  15.358 +}
  15.359 +
  15.360 +/**
  15.361 + *	skb_cloned - is the buffer a clone
  15.362 + *	@skb: buffer to check
  15.363 + *
  15.364 + *	Returns true if the buffer was generated with skb_clone() and is
  15.365 + *	one of multiple shared copies of the buffer. Cloned buffers are
  15.366 + *	shared data so must not be written to under normal circumstances.
  15.367 + */
  15.368 +static inline int skb_cloned(const struct sk_buff *skb)
  15.369 +{
  15.370 +	return skb->cloned && atomic_read(&skb_shinfo(skb)->dataref) != 1;
  15.371 +}
  15.372 +
  15.373 +/**
  15.374 + *	skb_shared - is the buffer shared
  15.375 + *	@skb: buffer to check
  15.376 + *
  15.377 + *	Returns true if more than one person has a reference to this
  15.378 + *	buffer.
  15.379 + */
  15.380 +static inline int skb_shared(const struct sk_buff *skb)
  15.381 +{
  15.382 +	return atomic_read(&skb->users) != 1;
  15.383 +}
  15.384 +
  15.385 +/**
  15.386 + *	skb_share_check - check if buffer is shared and if so clone it
  15.387 + *	@skb: buffer to check
  15.388 + *	@pri: priority for memory allocation
  15.389 + *
  15.390 + *	If the buffer is shared the buffer is cloned and the old copy
  15.391 + *	drops a reference. A new clone with a single reference is returned.
  15.392 + *	If the buffer is not shared the original buffer is returned. When
  15.393 + *	being called from interrupt status or with spinlocks held pri must
  15.394 + *	be GFP_ATOMIC.
  15.395 + *
  15.396 + *	NULL is returned on a memory allocation failure.
  15.397 + */
  15.398 +static inline struct sk_buff *skb_share_check(struct sk_buff *skb, int pri)
  15.399 +{
  15.400 +	might_sleep_if(pri & __GFP_WAIT);
  15.401 +	if (skb_shared(skb)) {
  15.402 +		struct sk_buff *nskb = skb_clone(skb, pri);
  15.403 +		kfree_skb(skb);
  15.404 +		skb = nskb;
  15.405 +	}
  15.406 +	return skb;
  15.407 +}
  15.408 +
  15.409 +/*
  15.410 + *	Copy shared buffers into a new sk_buff. We effectively do COW on
  15.411 + *	packets to handle cases where we have a local reader and forward
  15.412 + *	and a couple of other messy ones. The normal one is tcpdumping
  15.413 + *	a packet thats being forwarded.
  15.414 + */
  15.415 +
  15.416 +/**
  15.417 + *	skb_unshare - make a copy of a shared buffer
  15.418 + *	@skb: buffer to check
  15.419 + *	@pri: priority for memory allocation
  15.420 + *
  15.421 + *	If the socket buffer is a clone then this function creates a new
  15.422 + *	copy of the data, drops a reference count on the old copy and returns
  15.423 + *	the new copy with the reference count at 1. If the buffer is not a clone
  15.424 + *	the original buffer is returned. When called with a spinlock held or
  15.425 + *	from interrupt state @pri must be %GFP_ATOMIC
  15.426 + *
  15.427 + *	%NULL is returned on a memory allocation failure.
  15.428 + */
  15.429 +static inline struct sk_buff *skb_unshare(struct sk_buff *skb, int pri)
  15.430 +{
  15.431 +	might_sleep_if(pri & __GFP_WAIT);
  15.432 +	if (skb_cloned(skb)) {
  15.433 +		struct sk_buff *nskb = skb_copy(skb, pri);
  15.434 +		kfree_skb(skb);	/* Free our shared copy */
  15.435 +		skb = nskb;
  15.436 +	}
  15.437 +	return skb;
  15.438 +}
  15.439 +
  15.440 +/**
  15.441 + *	skb_peek
  15.442 + *	@list_: list to peek at
  15.443 + *
  15.444 + *	Peek an &sk_buff. Unlike most other operations you _MUST_
  15.445 + *	be careful with this one. A peek leaves the buffer on the
  15.446 + *	list and someone else may run off with it. You must hold
  15.447 + *	the appropriate locks or have a private queue to do this.
  15.448 + *
  15.449 + *	Returns %NULL for an empty list or a pointer to the head element.
  15.450 + *	The reference count is not incremented and the reference is therefore
  15.451 + *	volatile. Use with caution.
  15.452 + */
  15.453 +static inline struct sk_buff *skb_peek(struct sk_buff_head *list_)
  15.454 +{
  15.455 +	struct sk_buff *list = ((struct sk_buff *)list_)->next;
  15.456 +	if (list == (struct sk_buff *)list_)
  15.457 +		list = NULL;
  15.458 +	return list;
  15.459 +}
  15.460 +
  15.461 +/**
  15.462 + *	skb_peek_tail
  15.463 + *	@list_: list to peek at
  15.464 + *
  15.465 + *	Peek an &sk_buff. Unlike most other operations you _MUST_
  15.466 + *	be careful with this one. A peek leaves the buffer on the
  15.467 + *	list and someone else may run off with it. You must hold
  15.468 + *	the appropriate locks or have a private queue to do this.
  15.469 + *
  15.470 + *	Returns %NULL for an empty list or a pointer to the tail element.
  15.471 + *	The reference count is not incremented and the reference is therefore
  15.472 + *	volatile. Use with caution.
  15.473 + */
  15.474 +static inline struct sk_buff *skb_peek_tail(struct sk_buff_head *list_)
  15.475 +{
  15.476 +	struct sk_buff *list = ((struct sk_buff *)list_)->prev;
  15.477 +	if (list == (struct sk_buff *)list_)
  15.478 +		list = NULL;
  15.479 +	return list;
  15.480 +}
  15.481 +
  15.482 +/**
  15.483 + *	skb_queue_len	- get queue length
  15.484 + *	@list_: list to measure
  15.485 + *
  15.486 + *	Return the length of an &sk_buff queue.
  15.487 + */
  15.488 +static inline __u32 skb_queue_len(const struct sk_buff_head *list_)
  15.489 +{
  15.490 +	return list_->qlen;
  15.491 +}
  15.492 +
  15.493 +static inline void skb_queue_head_init(struct sk_buff_head *list)
  15.494 +{
  15.495 +	spin_lock_init(&list->lock);
  15.496 +	list->prev = list->next = (struct sk_buff *)list;
  15.497 +	list->qlen = 0;
  15.498 +}
  15.499 +
  15.500 +/*
  15.501 + *	Insert an sk_buff at the start of a list.
  15.502 + *
  15.503 + *	The "__skb_xxxx()" functions are the non-atomic ones that
  15.504 + *	can only be called with interrupts disabled.
  15.505 + */
  15.506 +
  15.507 +/**
  15.508 + *	__skb_queue_head - queue a buffer at the list head
  15.509 + *	@list: list to use
  15.510 + *	@newsk: buffer to queue
  15.511 + *
  15.512 + *	Queue a buffer at the start of a list. This function takes no locks
  15.513 + *	and you must therefore hold required locks before calling it.
  15.514 + *
  15.515 + *	A buffer cannot be placed on two lists at the same time.
  15.516 + */
  15.517 +extern void skb_queue_head(struct sk_buff_head *list, struct sk_buff *newsk);
  15.518 +static inline void __skb_queue_head(struct sk_buff_head *list,
  15.519 +				    struct sk_buff *newsk)
  15.520 +{
  15.521 +	struct sk_buff *prev, *next;
  15.522 +
  15.523 +	newsk->list = list;
  15.524 +	list->qlen++;
  15.525 +	prev = (struct sk_buff *)list;
  15.526 +	next = prev->next;
  15.527 +	newsk->next = next;
  15.528 +	newsk->prev = prev;
  15.529 +	next->prev  = prev->next = newsk;
  15.530 +}
  15.531 +
  15.532 +/**
  15.533 + *	__skb_queue_tail - queue a buffer at the list tail
  15.534 + *	@list: list to use
  15.535 + *	@newsk: buffer to queue
  15.536 + *
  15.537 + *	Queue a buffer at the end of a list. This function takes no locks
  15.538 + *	and you must therefore hold required locks before calling it.
  15.539 + *
  15.540 + *	A buffer cannot be placed on two lists at the same time.
  15.541 + */
  15.542 +extern void skb_queue_tail(struct sk_buff_head *list, struct sk_buff *newsk);
  15.543 +static inline void __skb_queue_tail(struct sk_buff_head *list,
  15.544 +				   struct sk_buff *newsk)
  15.545 +{
  15.546 +	struct sk_buff *prev, *next;
  15.547 +
  15.548 +	newsk->list = list;
  15.549 +	list->qlen++;
  15.550 +	next = (struct sk_buff *)list;
  15.551 +	prev = next->prev;
  15.552 +	newsk->next = next;
  15.553 +	newsk->prev = prev;
  15.554 +	next->prev  = prev->next = newsk;
  15.555 +}
  15.556 +
  15.557 +
  15.558 +/**
  15.559 + *	__skb_dequeue - remove from the head of the queue
  15.560 + *	@list: list to dequeue from
  15.561 + *
  15.562 + *	Remove the head of the list. This function does not take any locks
  15.563 + *	so must be used with appropriate locks held only. The head item is
  15.564 + *	returned or %NULL if the list is empty.
  15.565 + */
  15.566 +extern struct sk_buff *skb_dequeue(struct sk_buff_head *list);
  15.567 +static inline struct sk_buff *__skb_dequeue(struct sk_buff_head *list)
  15.568 +{
  15.569 +	struct sk_buff *next, *prev, *result;
  15.570 +
  15.571 +	prev = (struct sk_buff *) list;
  15.572 +	next = prev->next;
  15.573 +	result = NULL;
  15.574 +	if (next != prev) {
  15.575 +		result	     = next;
  15.576 +		next	     = next->next;
  15.577 +		list->qlen--;
  15.578 +		next->prev   = prev;
  15.579 +		prev->next   = next;
  15.580 +		result->next = result->prev = NULL;
  15.581 +		result->list = NULL;
  15.582 +	}
  15.583 +	return result;
  15.584 +}
  15.585 +
  15.586 +
  15.587 +/*
  15.588 + *	Insert a packet on a list.
  15.589 + */
  15.590 +extern void        skb_insert(struct sk_buff *old, struct sk_buff *newsk);
  15.591 +static inline void __skb_insert(struct sk_buff *newsk,
  15.592 +				struct sk_buff *prev, struct sk_buff *next,
  15.593 +				struct sk_buff_head *list)
  15.594 +{
  15.595 +	newsk->next = next;
  15.596 +	newsk->prev = prev;
  15.597 +	next->prev  = prev->next = newsk;
  15.598 +	newsk->list = list;
  15.599 +	list->qlen++;
  15.600 +}
  15.601 +
  15.602 +/*
  15.603 + *	Place a packet after a given packet in a list.
  15.604 + */
  15.605 +extern void	   skb_append(struct sk_buff *old, struct sk_buff *newsk);
  15.606 +static inline void __skb_append(struct sk_buff *old, struct sk_buff *newsk)
  15.607 +{
  15.608 +	__skb_insert(newsk, old, old->next, old->list);
  15.609 +}
  15.610 +
  15.611 +/*
  15.612 + * remove sk_buff from list. _Must_ be called atomically, and with
  15.613 + * the list known..
  15.614 + */
  15.615 +extern void	   skb_unlink(struct sk_buff *skb);
  15.616 +static inline void __skb_unlink(struct sk_buff *skb, struct sk_buff_head *list)
  15.617 +{
  15.618 +	struct sk_buff *next, *prev;
  15.619 +
  15.620 +	list->qlen--;
  15.621 +	next	   = skb->next;
  15.622 +	prev	   = skb->prev;
  15.623 +	skb->next  = skb->prev = NULL;
  15.624 +	skb->list  = NULL;
  15.625 +	next->prev = prev;
  15.626 +	prev->next = next;
  15.627 +}
  15.628 +
  15.629 +
  15.630 +/* XXX: more streamlined implementation */
  15.631 +
  15.632 +/**
  15.633 + *	__skb_dequeue_tail - remove from the tail of the queue
  15.634 + *	@list: list to dequeue from
  15.635 + *
  15.636 + *	Remove the tail of the list. This function does not take any locks
  15.637 + *	so must be used with appropriate locks held only. The tail item is
  15.638 + *	returned or %NULL if the list is empty.
  15.639 + */
  15.640 +extern struct sk_buff *skb_dequeue_tail(struct sk_buff_head *list);
  15.641 +static inline struct sk_buff *__skb_dequeue_tail(struct sk_buff_head *list)
  15.642 +{
  15.643 +	struct sk_buff *skb = skb_peek_tail(list);
  15.644 +	if (skb)
  15.645 +		__skb_unlink(skb, list);
  15.646 +	return skb;
  15.647 +}
  15.648 +
  15.649 +
  15.650 +static inline int skb_is_nonlinear(const struct sk_buff *skb)
  15.651 +{
  15.652 +	return skb->data_len;
  15.653 +}
  15.654 +
  15.655 +static inline unsigned int skb_headlen(const struct sk_buff *skb)
  15.656 +{
  15.657 +	return skb->len - skb->data_len;
  15.658 +}
  15.659 +
  15.660 +static inline int skb_pagelen(const struct sk_buff *skb)
  15.661 +{
  15.662 +	int i, len = 0;
  15.663 +
  15.664 +	for (i = (int)skb_shinfo(skb)->nr_frags - 1; i >= 0; i--)
  15.665 +		len += skb_shinfo(skb)->frags[i].size;
  15.666 +	return len + skb_headlen(skb);
  15.667 +}
  15.668 +
  15.669 +static inline void skb_fill_page_desc(struct sk_buff *skb, int i, struct page *page, int off, int size)
  15.670 +{
  15.671 +	skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
  15.672 +	frag->page = page;
  15.673 +	frag->page_offset = off;
  15.674 +	frag->size = size;
  15.675 +	skb_shinfo(skb)->nr_frags = i+1;
  15.676 +}
  15.677 +
  15.678 +#define SKB_PAGE_ASSERT(skb) 	BUG_ON(skb_shinfo(skb)->nr_frags)
  15.679 +#define SKB_FRAG_ASSERT(skb) 	BUG_ON(skb_shinfo(skb)->frag_list)
  15.680 +#define SKB_LINEAR_ASSERT(skb)  BUG_ON(skb_is_nonlinear(skb))
  15.681 +
  15.682 +/*
  15.683 + *	Add data to an sk_buff
  15.684 + */
  15.685 +static inline unsigned char *__skb_put(struct sk_buff *skb, unsigned int len)
  15.686 +{
  15.687 +	unsigned char *tmp = skb->tail;
  15.688 +	SKB_LINEAR_ASSERT(skb);
  15.689 +	skb->tail += len;
  15.690 +	skb->len  += len;
  15.691 +	return tmp;
  15.692 +}
  15.693 +
  15.694 +/**
  15.695 + *	skb_put - add data to a buffer
  15.696 + *	@skb: buffer to use
  15.697 + *	@len: amount of data to add
  15.698 + *
  15.699 + *	This function extends the used data area of the buffer. If this would
  15.700 + *	exceed the total buffer size the kernel will panic. A pointer to the
  15.701 + *	first byte of the extra data is returned.
  15.702 + */
  15.703 +static inline unsigned char *skb_put(struct sk_buff *skb, unsigned int len)
  15.704 +{
  15.705 +	unsigned char *tmp = skb->tail;
  15.706 +	SKB_LINEAR_ASSERT(skb);
  15.707 +	skb->tail += len;
  15.708 +	skb->len  += len;
  15.709 +	if (unlikely(skb->tail>skb->end))
  15.710 +		skb_over_panic(skb, len, current_text_addr());
  15.711 +	return tmp;
  15.712 +}
  15.713 +
  15.714 +static inline unsigned char *__skb_push(struct sk_buff *skb, unsigned int len)
  15.715 +{
  15.716 +	skb->data -= len;
  15.717 +	skb->len  += len;
  15.718 +	return skb->data;
  15.719 +}
  15.720 +
  15.721 +/**
  15.722 + *	skb_push - add data to the start of a buffer
  15.723 + *	@skb: buffer to use
  15.724 + *	@len: amount of data to add
  15.725 + *
  15.726 + *	This function extends the used data area of the buffer at the buffer
  15.727 + *	start. If this would exceed the total buffer headroom the kernel will
  15.728 + *	panic. A pointer to the first byte of the extra data is returned.
  15.729 + */
  15.730 +static inline unsigned char *skb_push(struct sk_buff *skb, unsigned int len)
  15.731 +{
  15.732 +	skb->data -= len;
  15.733 +	skb->len  += len;
  15.734 +	if (unlikely(skb->data<skb->head))
  15.735 +		skb_under_panic(skb, len, current_text_addr());
  15.736 +	return skb->data;
  15.737 +}
  15.738 +
  15.739 +static inline unsigned char *__skb_pull(struct sk_buff *skb, unsigned int len)
  15.740 +{
  15.741 +	skb->len -= len;
  15.742 +	BUG_ON(skb->len < skb->data_len);
  15.743 +	return skb->data += len;
  15.744 +}
  15.745 +
  15.746 +/**
  15.747 + *	skb_pull - remove data from the start of a buffer
  15.748 + *	@skb: buffer to use
  15.749 + *	@len: amount of data to remove
  15.750 + *
  15.751 + *	This function removes data from the start of a buffer, returning
  15.752 + *	the memory to the headroom. A pointer to the next data in the buffer
  15.753 + *	is returned. Once the data has been pulled future pushes will overwrite
  15.754 + *	the old data.
  15.755 + */
  15.756 +static inline unsigned char *skb_pull(struct sk_buff *skb, unsigned int len)
  15.757 +{
  15.758 +	return unlikely(len > skb->len) ? NULL : __skb_pull(skb, len);
  15.759 +}
  15.760 +
  15.761 +extern unsigned char *__pskb_pull_tail(struct sk_buff *skb, int delta);
  15.762 +
  15.763 +static inline unsigned char *__pskb_pull(struct sk_buff *skb, unsigned int len)
  15.764 +{
  15.765 +	if (len > skb_headlen(skb) &&
  15.766 +	    !__pskb_pull_tail(skb, len-skb_headlen(skb)))
  15.767 +		return NULL;
  15.768 +	skb->len -= len;
  15.769 +	return skb->data += len;
  15.770 +}
  15.771 +
  15.772 +static inline unsigned char *pskb_pull(struct sk_buff *skb, unsigned int len)
  15.773 +{
  15.774 +	return unlikely(len > skb->len) ? NULL : __pskb_pull(skb, len);
  15.775 +}
  15.776 +
  15.777 +static inline int pskb_may_pull(struct sk_buff *skb, unsigned int len)
  15.778 +{
  15.779 +	if (likely(len <= skb_headlen(skb)))
  15.780 +		return 1;
  15.781 +	if (unlikely(len > skb->len))
  15.782 +		return 0;
  15.783 +	return __pskb_pull_tail(skb, len-skb_headlen(skb)) != NULL;
  15.784 +}
  15.785 +
  15.786 +/**
  15.787 + *	skb_headroom - bytes at buffer head
  15.788 + *	@skb: buffer to check
  15.789 + *
  15.790 + *	Return the number of bytes of free space at the head of an &sk_buff.
  15.791 + */
  15.792 +static inline int skb_headroom(const struct sk_buff *skb)
  15.793 +{
  15.794 +	return skb->data - skb->head;
  15.795 +}
  15.796 +
  15.797 +/**
  15.798 + *	skb_tailroom - bytes at buffer end
  15.799 + *	@skb: buffer to check
  15.800 + *
  15.801 + *	Return the number of bytes of free space at the tail of an sk_buff
  15.802 + */
  15.803 +static inline int skb_tailroom(const struct sk_buff *skb)
  15.804 +{
  15.805 +	return skb_is_nonlinear(skb) ? 0 : skb->end - skb->tail;
  15.806 +}
  15.807 +
  15.808 +/**
  15.809 + *	skb_reserve - adjust headroom
  15.810 + *	@skb: buffer to alter
  15.811 + *	@len: bytes to move
  15.812 + *
  15.813 + *	Increase the headroom of an empty &sk_buff by reducing the tail
  15.814 + *	room. This is only allowed for an empty buffer.
  15.815 + */
  15.816 +static inline void skb_reserve(struct sk_buff *skb, unsigned int len)
  15.817 +{
  15.818 +	skb->data += len;
  15.819 +	skb->tail += len;
  15.820 +}
  15.821 +
  15.822 +extern int ___pskb_trim(struct sk_buff *skb, unsigned int len, int realloc);
  15.823 +
  15.824 +static inline void __skb_trim(struct sk_buff *skb, unsigned int len)
  15.825 +{
  15.826 +	if (!skb->data_len) {
  15.827 +		skb->len  = len;
  15.828 +		skb->tail = skb->data + len;
  15.829 +	} else
  15.830 +		___pskb_trim(skb, len, 0);
  15.831 +}
  15.832 +
  15.833 +/**
  15.834 + *	skb_trim - remove end from a buffer
  15.835 + *	@skb: buffer to alter
  15.836 + *	@len: new length
  15.837 + *
  15.838 + *	Cut the length of a buffer down by removing data from the tail. If
  15.839 + *	the buffer is already under the length specified it is not modified.
  15.840 + */
  15.841 +static inline void skb_trim(struct sk_buff *skb, unsigned int len)
  15.842 +{
  15.843 +	if (skb->len > len)
  15.844 +		__skb_trim(skb, len);
  15.845 +}
  15.846 +
  15.847 +
  15.848 +static inline int __pskb_trim(struct sk_buff *skb, unsigned int len)
  15.849 +{
  15.850 +	if (!skb->data_len) {
  15.851 +		skb->len  = len;
  15.852 +		skb->tail = skb->data+len;
  15.853 +		return 0;
  15.854 +	}
  15.855 +	return ___pskb_trim(skb, len, 1);
  15.856 +}
  15.857 +
  15.858 +static inline int pskb_trim(struct sk_buff *skb, unsigned int len)
  15.859 +{
  15.860 +	return (len < skb->len) ? __pskb_trim(skb, len) : 0;
  15.861 +}
  15.862 +
  15.863 +/**
  15.864 + *	skb_orphan - orphan a buffer
  15.865 + *	@skb: buffer to orphan
  15.866 + *
  15.867 + *	If a buffer currently has an owner then we call the owner's
  15.868 + *	destructor function and make the @skb unowned. The buffer continues
  15.869 + *	to exist but is no longer charged to its former owner.
  15.870 + */
  15.871 +static inline void skb_orphan(struct sk_buff *skb)
  15.872 +{
  15.873 +	if (skb->destructor)
  15.874 +		skb->destructor(skb);
  15.875 +	skb->destructor = NULL;
  15.876 +	skb->sk		= NULL;
  15.877 +}
  15.878 +
  15.879 +/**
  15.880 + *	__skb_queue_purge - empty a list
  15.881 + *	@list: list to empty
  15.882 + *
  15.883 + *	Delete all buffers on an &sk_buff list. Each buffer is removed from
  15.884 + *	the list and one reference dropped. This function does not take the
  15.885 + *	list lock and the caller must hold the relevant locks to use it.
  15.886 + */
  15.887 +extern void skb_queue_purge(struct sk_buff_head *list);
  15.888 +static inline void __skb_queue_purge(struct sk_buff_head *list)
  15.889 +{
  15.890 +	struct sk_buff *skb;
  15.891 +	while ((skb = __skb_dequeue(list)) != NULL)
  15.892 +		kfree_skb(skb);
  15.893 +}
  15.894 +
  15.895 +/**
  15.896 + *	__dev_alloc_skb - allocate an skbuff for sending
  15.897 + *	@length: length to allocate
  15.898 + *	@gfp_mask: get_free_pages mask, passed to alloc_skb
  15.899 + *
  15.900 + *	Allocate a new &sk_buff and assign it a usage count of one. The
  15.901 + *	buffer has unspecified headroom built in. Users should allocate
  15.902 + *	the headroom they think they need without accounting for the
  15.903 + *	built in space. The built in space is used for optimisations.
  15.904 + *
  15.905 + *	%NULL is returned in there is no free memory.
  15.906 + */
  15.907 +static inline struct sk_buff *__dev_alloc_skb(unsigned int length,
  15.908 +					      int gfp_mask)
  15.909 +{
  15.910 +	struct sk_buff *skb;
  15.911 +#ifdef CONFIG_PAGESIZED_SKBS
  15.912 +	length = max(length, (unsigned int)(PAGE_SIZE - 16));
  15.913 +#endif
  15.914 +	skb = alloc_skb(length + 16, gfp_mask);
  15.915 +	if (likely(skb))
  15.916 +		skb_reserve(skb, 16);
  15.917 +	return skb;
  15.918 +}
  15.919 +
  15.920 +/**
  15.921 + *	dev_alloc_skb - allocate an skbuff for sending
  15.922 + *	@length: length to allocate
  15.923 + *
  15.924 + *	Allocate a new &sk_buff and assign it a usage count of one. The
  15.925 + *	buffer has unspecified headroom built in. Users should allocate
  15.926 + *	the headroom they think they need without accounting for the
  15.927 + *	built in space. The built in space is used for optimisations.
  15.928 + *
  15.929 + *	%NULL is returned in there is no free memory. Although this function
  15.930 + *	allocates memory it can be called from an interrupt.
  15.931 + */
  15.932 +static inline struct sk_buff *dev_alloc_skb(unsigned int length)
  15.933 +{
  15.934 +	return __dev_alloc_skb(length, GFP_ATOMIC);
  15.935 +}
  15.936 +
  15.937 +/**
  15.938 + *	skb_cow - copy header of skb when it is required
  15.939 + *	@skb: buffer to cow
  15.940 + *	@headroom: needed headroom
  15.941 + *
  15.942 + *	If the skb passed lacks sufficient headroom or its data part
  15.943 + *	is shared, data is reallocated. If reallocation fails, an error
  15.944 + *	is returned and original skb is not changed.
  15.945 + *
  15.946 + *	The result is skb with writable area skb->head...skb->tail
  15.947 + *	and at least @headroom of space at head.
  15.948 + */
  15.949 +static inline int skb_cow(struct sk_buff *skb, unsigned int headroom)
  15.950 +{
  15.951 +	int delta = (headroom > 16 ? headroom : 16) - skb_headroom(skb);
  15.952 +
  15.953 +	if (delta < 0)
  15.954 +		delta = 0;
  15.955 +
  15.956 +	if (delta || skb_cloned(skb))
  15.957 +		return pskb_expand_head(skb, (delta + 15) & ~15, 0, GFP_ATOMIC);
  15.958 +	return 0;
  15.959 +}
  15.960 +
  15.961 +/**
  15.962 + *	skb_padto	- pad an skbuff up to a minimal size
  15.963 + *	@skb: buffer to pad
  15.964 + *	@len: minimal length
  15.965 + *
  15.966 + *	Pads up a buffer to ensure the trailing bytes exist and are
  15.967 + *	blanked. If the buffer already contains sufficient data it
  15.968 + *	is untouched. Returns the buffer, which may be a replacement
  15.969 + *	for the original, or NULL for out of memory - in which case
  15.970 + *	the original buffer is still freed.
  15.971 + */
  15.972 + 
  15.973 +static inline struct sk_buff *skb_padto(struct sk_buff *skb, unsigned int len)
  15.974 +{
  15.975 +	unsigned int size = skb->len;
  15.976 +	if (likely(size >= len))
  15.977 +		return skb;
  15.978 +	return skb_pad(skb, len-size);
  15.979 +}
  15.980 +
  15.981 +/**
  15.982 + *	skb_linearize - convert paged skb to linear one
  15.983 + *	@skb: buffer to linarize
  15.984 + *	@gfp: allocation mode
  15.985 + *
  15.986 + *	If there is no free memory -ENOMEM is returned, otherwise zero
  15.987 + *	is returned and the old skb data released.
  15.988 + */
  15.989 +extern int __skb_linearize(struct sk_buff *skb, int gfp);
  15.990 +static inline int skb_linearize(struct sk_buff *skb, int gfp)
  15.991 +{
  15.992 +	return __skb_linearize(skb, gfp);
  15.993 +}
  15.994 +
  15.995 +static inline void *kmap_skb_frag(const skb_frag_t *frag)
  15.996 +{
  15.997 +#ifdef CONFIG_HIGHMEM
  15.998 +	BUG_ON(in_irq());
  15.999 +
 15.1000 +	local_bh_disable();
 15.1001 +#endif
 15.1002 +	return kmap_atomic(frag->page, KM_SKB_DATA_SOFTIRQ);
 15.1003 +}
 15.1004 +
 15.1005 +static inline void kunmap_skb_frag(void *vaddr)
 15.1006 +{
 15.1007 +	kunmap_atomic(vaddr, KM_SKB_DATA_SOFTIRQ);
 15.1008 +#ifdef CONFIG_HIGHMEM
 15.1009 +	local_bh_enable();
 15.1010 +#endif
 15.1011 +}
 15.1012 +
 15.1013 +#define skb_queue_walk(queue, skb) \
 15.1014 +		for (skb = (queue)->next, prefetch(skb->next);	\
 15.1015 +		     (skb != (struct sk_buff *)(queue));	\
 15.1016 +		     skb = skb->next, prefetch(skb->next))
 15.1017 +
 15.1018 +
 15.1019 +extern struct sk_buff *skb_recv_datagram(struct sock *sk, unsigned flags,
 15.1020 +					 int noblock, int *err);
 15.1021 +extern unsigned int    datagram_poll(struct file *file, struct socket *sock,
 15.1022 +				     struct poll_table_struct *wait);
 15.1023 +extern int	       skb_copy_datagram(const struct sk_buff *from,
 15.1024 +					 int offset, char __user *to, int size);
 15.1025 +extern int	       skb_copy_datagram_iovec(const struct sk_buff *from,
 15.1026 +					       int offset, struct iovec *to,
 15.1027 +					       int size);
 15.1028 +extern int	       skb_copy_and_csum_datagram(const struct sk_buff *skb,
 15.1029 +						  int offset, u8 __user *to,
 15.1030 +						  int len, unsigned int *csump);
 15.1031 +extern int	       skb_copy_and_csum_datagram_iovec(const
 15.1032 +							struct sk_buff *skb,
 15.1033 +							int hlen,
 15.1034 +							struct iovec *iov);
 15.1035 +extern void	       skb_free_datagram(struct sock *sk, struct sk_buff *skb);
 15.1036 +extern unsigned int    skb_checksum(const struct sk_buff *skb, int offset,
 15.1037 +				    int len, unsigned int csum);
 15.1038 +extern int	       skb_copy_bits(const struct sk_buff *skb, int offset,
 15.1039 +				     void *to, int len);
 15.1040 +extern unsigned int    skb_copy_and_csum_bits(const struct sk_buff *skb,
 15.1041 +					      int offset, u8 *to, int len,
 15.1042 +					      unsigned int csum);
 15.1043 +extern void	       skb_copy_and_csum_dev(const struct sk_buff *skb, u8 *to);
 15.1044 +
 15.1045 +extern void skb_init(void);
 15.1046 +extern void skb_add_mtu(int mtu);
 15.1047 +
 15.1048 +#ifdef CONFIG_NETFILTER
 15.1049 +static inline void nf_conntrack_put(struct nf_ct_info *nfct)
 15.1050 +{
 15.1051 +	if (nfct && atomic_dec_and_test(&nfct->master->use))
 15.1052 +		nfct->master->destroy(nfct->master);
 15.1053 +}
 15.1054 +static inline void nf_conntrack_get(struct nf_ct_info *nfct)
 15.1055 +{
 15.1056 +	if (nfct)
 15.1057 +		atomic_inc(&nfct->master->use);
 15.1058 +}
 15.1059 +
 15.1060 +#ifdef CONFIG_BRIDGE_NETFILTER
 15.1061 +static inline void nf_bridge_put(struct nf_bridge_info *nf_bridge)
 15.1062 +{
 15.1063 +	if (nf_bridge && atomic_dec_and_test(&nf_bridge->use))
 15.1064 +		kfree(nf_bridge);
 15.1065 +}
 15.1066 +static inline void nf_bridge_get(struct nf_bridge_info *nf_bridge)
 15.1067 +{
 15.1068 +	if (nf_bridge)
 15.1069 +		atomic_inc(&nf_bridge->use);
 15.1070 +}
 15.1071 +#endif
 15.1072 +
 15.1073 +#endif
 15.1074 +
 15.1075 +#endif	/* __KERNEL__ */
 15.1076 +#endif	/* _LINUX_SKBUFF_H */
    16.1 --- a/linux-2.6.7-xen-sparse/mm/page_alloc.c	Thu Aug 19 16:09:39 2004 +0000
    16.2 +++ b/linux-2.6.7-xen-sparse/mm/page_alloc.c	Thu Aug 19 18:51:53 2004 +0000
    16.3 @@ -497,9 +497,8 @@ static void fastcall free_hot_cold_page(
    16.4  	struct per_cpu_pages *pcp;
    16.5  	unsigned long flags;
    16.6  
    16.7 -	/* XXX Xen: use mapping pointer as skb/data-page destructor */
    16.8 -	if (page->mapping)
    16.9 -		return (*(void(*)(struct page *))page->mapping)(page);
   16.10 +	if (PageForeign(page))
   16.11 +		return (PageForeignDestructor(page))(page);
   16.12  
   16.13  	kernel_map_pages(page, 1, 0);
   16.14  	inc_page_state(pgfree);