ia64/xen-unstable
changeset 2306:0f47aec8946e
bitkeeper revision 1.1159.45.3 (4124f6c9BN9jHyHMznjiaS-Qw12Rtw)
Merge labyrinth.cl.cam.ac.uk:/auto/groups/xeno/BK/xeno.bk
into labyrinth.cl.cam.ac.uk:/auto/anfs/scratch/labyrinth/iap10/xeno-clone/xeno.bk
Merge labyrinth.cl.cam.ac.uk:/auto/groups/xeno/BK/xeno.bk
into labyrinth.cl.cam.ac.uk:/auto/anfs/scratch/labyrinth/iap10/xeno-clone/xeno.bk
line diff
1.1 --- a/.rootkeys Thu Aug 19 16:09:39 2004 +0000 1.2 +++ b/.rootkeys Thu Aug 19 18:51:53 2004 +0000 1.3 @@ -111,6 +111,7 @@ 3e5a4e68mTr0zcp9SXDbnd-XLrrfxw linux-2.4 1.4 3f1056a9L_kqHcFheV00KbKBzv9j5w linux-2.4.26-xen-sparse/include/asm-xen/vga.h 1.5 40659defgWA92arexpMGn8X3QMDj3w linux-2.4.26-xen-sparse/include/asm-xen/xor.h 1.6 3f056927gMHl7mWB89rb73JahbhQIA linux-2.4.26-xen-sparse/include/linux/blk.h 1.7 +4124f66fPHG6yvB_vXmesjvzrJ3yMg linux-2.4.26-xen-sparse/include/linux/mm.h 1.8 401c0590D_kwJDU59X8NyvqSv_Cl2A linux-2.4.26-xen-sparse/include/linux/sched.h 1.9 40a248afgI0_JKthdYAe8beVfXSTpQ linux-2.4.26-xen-sparse/include/linux/skbuff.h 1.10 401c0592pLrp_aCbQRo9GXiYQQaVVA linux-2.4.26-xen-sparse/include/linux/timer.h 1.11 @@ -242,6 +243,9 @@ 3f108af1ylCIm82H052FVTfXACBHrw linux-2.6 1.12 4122466356eIBnC9ot44WSVVIFyhQA linux-2.6.7-xen-sparse/include/asm-xen/queues.h 1.13 3fa8e3f0kBLeE4To2vpdi3cpJbIkbQ linux-2.6.7-xen-sparse/include/asm-xen/suspend.h 1.14 3f689063BoW-HWV3auUJ-OqXfcGArw linux-2.6.7-xen-sparse/include/asm-xen/xen_proc.h 1.15 +4124d8c4aocX7A-jIbuGraWN84pxGQ linux-2.6.7-xen-sparse/include/linux/bio.h 1.16 +4124f66fp5QwbDHEfoUIa7pqO5Xhag linux-2.6.7-xen-sparse/include/linux/page-flags.h 1.17 +4124f66f4NaKNa0xPiGGykn9QaZk3w linux-2.6.7-xen-sparse/include/linux/skbuff.h 1.18 40f56a0ddHCSs3501MY4hRf22tctOw linux-2.6.7-xen-sparse/mkbuildtree 1.19 410a94a4KT6I6X0LVc7djB39tRDp4g linux-2.6.7-xen-sparse/mm/page_alloc.c 1.20 40e1b09db5mN69Ijj0X_Eol-S7dXiw tools/Make.defs
2.1 --- a/linux-2.4.26-xen-sparse/arch/xen/config.in Thu Aug 19 16:09:39 2004 +0000 2.2 +++ b/linux-2.4.26-xen-sparse/arch/xen/config.in Thu Aug 19 18:51:53 2004 +0000 2.3 @@ -20,7 +20,10 @@ endmenu 2.4 # The IBM S/390 patch needs this. 2.5 define_bool CONFIG_NO_IDLE_HZ y 2.6 2.7 -if [ "$CONFIG_XEN_PHYSDEV_ACCESS" != "y" ]; then 2.8 +if [ "$CONFIG_XEN_PHYSDEV_ACCESS" == "y" ]; then 2.9 + define_bool CONFIG_FOREIGN_PAGES y 2.10 +else 2.11 + define_bool CONFIG_FOREIGN_PAGES n 2.12 define_bool CONFIG_NETDEVICES y 2.13 define_bool CONFIG_VT n 2.14 fi 2.15 @@ -103,8 +106,6 @@ if [ "$CONFIG_HIGHMEM" = "y" ]; then 2.16 bool 'HIGHMEM I/O support' CONFIG_HIGHIO 2.17 fi 2.18 2.19 -define_int CONFIG_FORCE_MAX_ZONEORDER 12 2.20 - 2.21 #bool 'Symmetric multi-processing support' CONFIG_SMP 2.22 #if [ "$CONFIG_SMP" = "y" -a "$CONFIG_X86_CMPXCHG" = "y" ]; then 2.23 # define_bool CONFIG_HAVE_DEC_LOCK y
3.1 --- a/linux-2.4.26-xen-sparse/arch/xen/defconfig-xen0 Thu Aug 19 16:09:39 2004 +0000 3.2 +++ b/linux-2.4.26-xen-sparse/arch/xen/defconfig-xen0 Thu Aug 19 18:51:53 2004 +0000 3.3 @@ -13,6 +13,7 @@ CONFIG_UID16=y 3.4 CONFIG_XEN_PRIVILEGED_GUEST=y 3.5 CONFIG_XEN_PHYSDEV_ACCESS=y 3.6 CONFIG_NO_IDLE_HZ=y 3.7 +CONFIG_FOREIGN_PAGES=y 3.8 3.9 # 3.10 # Code maturity level options 3.11 @@ -50,7 +51,6 @@ CONFIG_X86_TSC=y 3.12 CONFIG_X86_L1_CACHE_SHIFT=5 3.13 CONFIG_NOHIGHMEM=y 3.14 # CONFIG_HIGHMEM4G is not set 3.15 -CONFIG_FORCE_MAX_ZONEORDER=12 3.16 3.17 # 3.18 # General setup
4.1 --- a/linux-2.4.26-xen-sparse/arch/xen/defconfig-xenU Thu Aug 19 16:09:39 2004 +0000 4.2 +++ b/linux-2.4.26-xen-sparse/arch/xen/defconfig-xenU Thu Aug 19 18:51:53 2004 +0000 4.3 @@ -13,6 +13,7 @@ CONFIG_UID16=y 4.4 # CONFIG_XEN_PRIVILEGED_GUEST is not set 4.5 # CONFIG_XEN_PHYSDEV_ACCESS is not set 4.6 CONFIG_NO_IDLE_HZ=y 4.7 +# CONFIG_FOREIGN_PAGES is not set 4.8 CONFIG_NETDEVICES=y 4.9 # CONFIG_VT is not set 4.10 4.11 @@ -52,7 +53,6 @@ CONFIG_X86_TSC=y 4.12 CONFIG_X86_L1_CACHE_SHIFT=5 4.13 CONFIG_NOHIGHMEM=y 4.14 # CONFIG_HIGHMEM4G is not set 4.15 -CONFIG_FORCE_MAX_ZONEORDER=12 4.16 4.17 # 4.18 # General setup
5.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 5.2 +++ b/linux-2.4.26-xen-sparse/include/linux/mm.h Thu Aug 19 18:51:53 2004 +0000 5.3 @@ -0,0 +1,703 @@ 5.4 +#ifndef _LINUX_MM_H 5.5 +#define _LINUX_MM_H 5.6 + 5.7 +#include <linux/sched.h> 5.8 +#include <linux/errno.h> 5.9 + 5.10 +#ifdef __KERNEL__ 5.11 + 5.12 +#include <linux/config.h> 5.13 +#include <linux/string.h> 5.14 +#include <linux/list.h> 5.15 +#include <linux/mmzone.h> 5.16 +#include <linux/swap.h> 5.17 +#include <linux/rbtree.h> 5.18 + 5.19 +extern unsigned long max_mapnr; 5.20 +extern unsigned long num_physpages; 5.21 +extern unsigned long num_mappedpages; 5.22 +extern void * high_memory; 5.23 +extern int page_cluster; 5.24 +/* The inactive_clean lists are per zone. */ 5.25 +extern struct list_head active_list; 5.26 +extern struct list_head inactive_list; 5.27 + 5.28 +#include <asm/page.h> 5.29 +#include <asm/pgtable.h> 5.30 +#include <asm/atomic.h> 5.31 + 5.32 +/* 5.33 + * Linux kernel virtual memory manager primitives. 5.34 + * The idea being to have a "virtual" mm in the same way 5.35 + * we have a virtual fs - giving a cleaner interface to the 5.36 + * mm details, and allowing different kinds of memory mappings 5.37 + * (from shared memory to executable loading to arbitrary 5.38 + * mmap() functions). 5.39 + */ 5.40 + 5.41 +/* 5.42 + * This struct defines a memory VMM memory area. There is one of these 5.43 + * per VM-area/task. A VM area is any part of the process virtual memory 5.44 + * space that has a special rule for the page-fault handlers (ie a shared 5.45 + * library, the executable area etc). 5.46 + */ 5.47 +struct vm_area_struct { 5.48 + struct mm_struct * vm_mm; /* The address space we belong to. */ 5.49 + unsigned long vm_start; /* Our start address within vm_mm. */ 5.50 + unsigned long vm_end; /* The first byte after our end address 5.51 + within vm_mm. */ 5.52 + 5.53 + /* linked list of VM areas per task, sorted by address */ 5.54 + struct vm_area_struct *vm_next; 5.55 + 5.56 + pgprot_t vm_page_prot; /* Access permissions of this VMA. */ 5.57 + unsigned long vm_flags; /* Flags, listed below. */ 5.58 + 5.59 + rb_node_t vm_rb; 5.60 + 5.61 + /* 5.62 + * For areas with an address space and backing store, 5.63 + * one of the address_space->i_mmap{,shared} lists, 5.64 + * for shm areas, the list of attaches, otherwise unused. 5.65 + */ 5.66 + struct vm_area_struct *vm_next_share; 5.67 + struct vm_area_struct **vm_pprev_share; 5.68 + 5.69 + /* Function pointers to deal with this struct. */ 5.70 + struct vm_operations_struct * vm_ops; 5.71 + 5.72 + /* Information about our backing store: */ 5.73 + unsigned long vm_pgoff; /* Offset (within vm_file) in PAGE_SIZE 5.74 + units, *not* PAGE_CACHE_SIZE */ 5.75 + struct file * vm_file; /* File we map to (can be NULL). */ 5.76 + unsigned long vm_raend; /* XXX: put full readahead info here. */ 5.77 + void * vm_private_data; /* was vm_pte (shared mem) */ 5.78 +}; 5.79 + 5.80 +/* 5.81 + * vm_flags.. 5.82 + */ 5.83 +#define VM_READ 0x00000001 /* currently active flags */ 5.84 +#define VM_WRITE 0x00000002 5.85 +#define VM_EXEC 0x00000004 5.86 +#define VM_SHARED 0x00000008 5.87 + 5.88 +#define VM_MAYREAD 0x00000010 /* limits for mprotect() etc */ 5.89 +#define VM_MAYWRITE 0x00000020 5.90 +#define VM_MAYEXEC 0x00000040 5.91 +#define VM_MAYSHARE 0x00000080 5.92 + 5.93 +#define VM_GROWSDOWN 0x00000100 /* general info on the segment */ 5.94 +#define VM_GROWSUP 0x00000200 5.95 +#define VM_SHM 0x00000400 /* shared memory area, don't swap out */ 5.96 +#define VM_DENYWRITE 0x00000800 /* ETXTBSY on write attempts.. */ 5.97 + 5.98 +#define VM_EXECUTABLE 0x00001000 5.99 +#define VM_LOCKED 0x00002000 5.100 +#define VM_IO 0x00004000 /* Memory mapped I/O or similar */ 5.101 + 5.102 + /* Used by sys_madvise() */ 5.103 +#define VM_SEQ_READ 0x00008000 /* App will access data sequentially */ 5.104 +#define VM_RAND_READ 0x00010000 /* App will not benefit from clustered reads */ 5.105 + 5.106 +#define VM_DONTCOPY 0x00020000 /* Do not copy this vma on fork */ 5.107 +#define VM_DONTEXPAND 0x00040000 /* Cannot expand with mremap() */ 5.108 +#define VM_RESERVED 0x00080000 /* Don't unmap it from swap_out */ 5.109 + 5.110 +#ifndef VM_STACK_FLAGS 5.111 +#define VM_STACK_FLAGS 0x00000177 5.112 +#endif 5.113 + 5.114 +#define VM_READHINTMASK (VM_SEQ_READ | VM_RAND_READ) 5.115 +#define VM_ClearReadHint(v) (v)->vm_flags &= ~VM_READHINTMASK 5.116 +#define VM_NormalReadHint(v) (!((v)->vm_flags & VM_READHINTMASK)) 5.117 +#define VM_SequentialReadHint(v) ((v)->vm_flags & VM_SEQ_READ) 5.118 +#define VM_RandomReadHint(v) ((v)->vm_flags & VM_RAND_READ) 5.119 + 5.120 +/* read ahead limits */ 5.121 +extern int vm_min_readahead; 5.122 +extern int vm_max_readahead; 5.123 + 5.124 +/* 5.125 + * mapping from the currently active vm_flags protection bits (the 5.126 + * low four bits) to a page protection mask.. 5.127 + */ 5.128 +extern pgprot_t protection_map[16]; 5.129 + 5.130 + 5.131 +/* 5.132 + * These are the virtual MM functions - opening of an area, closing and 5.133 + * unmapping it (needed to keep files on disk up-to-date etc), pointer 5.134 + * to the functions called when a no-page or a wp-page exception occurs. 5.135 + */ 5.136 +struct vm_operations_struct { 5.137 + void (*open)(struct vm_area_struct * area); 5.138 + void (*close)(struct vm_area_struct * area); 5.139 + struct page * (*nopage)(struct vm_area_struct * area, unsigned long address, int unused); 5.140 +}; 5.141 + 5.142 +/* 5.143 + * Each physical page in the system has a struct page associated with 5.144 + * it to keep track of whatever it is we are using the page for at the 5.145 + * moment. Note that we have no way to track which tasks are using 5.146 + * a page. 5.147 + * 5.148 + * Try to keep the most commonly accessed fields in single cache lines 5.149 + * here (16 bytes or greater). This ordering should be particularly 5.150 + * beneficial on 32-bit processors. 5.151 + * 5.152 + * The first line is data used in page cache lookup, the second line 5.153 + * is used for linear searches (eg. clock algorithm scans). 5.154 + * 5.155 + * TODO: make this structure smaller, it could be as small as 32 bytes. 5.156 + */ 5.157 +typedef struct page { 5.158 + struct list_head list; /* ->mapping has some page lists. */ 5.159 + struct address_space *mapping; /* The inode (or ...) we belong to. */ 5.160 + unsigned long index; /* Our offset within mapping. */ 5.161 + struct page *next_hash; /* Next page sharing our hash bucket in 5.162 + the pagecache hash table. */ 5.163 + atomic_t count; /* Usage count, see below. */ 5.164 + unsigned long flags; /* atomic flags, some possibly 5.165 + updated asynchronously */ 5.166 + struct list_head lru; /* Pageout list, eg. active_list; 5.167 + protected by pagemap_lru_lock !! */ 5.168 + struct page **pprev_hash; /* Complement to *next_hash. */ 5.169 + struct buffer_head * buffers; /* Buffer maps us to a disk block. */ 5.170 + 5.171 + /* 5.172 + * On machines where all RAM is mapped into kernel address space, 5.173 + * we can simply calculate the virtual address. On machines with 5.174 + * highmem some memory is mapped into kernel virtual memory 5.175 + * dynamically, so we need a place to store that address. 5.176 + * Note that this field could be 16 bits on x86 ... ;) 5.177 + * 5.178 + * Architectures with slow multiplication can define 5.179 + * WANT_PAGE_VIRTUAL in asm/page.h 5.180 + */ 5.181 +#if defined(CONFIG_HIGHMEM) || defined(WANT_PAGE_VIRTUAL) 5.182 + void *virtual; /* Kernel virtual address (NULL if 5.183 + not kmapped, ie. highmem) */ 5.184 +#endif /* CONFIG_HIGMEM || WANT_PAGE_VIRTUAL */ 5.185 +} mem_map_t; 5.186 + 5.187 +/* 5.188 + * Methods to modify the page usage count. 5.189 + * 5.190 + * What counts for a page usage: 5.191 + * - cache mapping (page->mapping) 5.192 + * - disk mapping (page->buffers) 5.193 + * - page mapped in a task's page tables, each mapping 5.194 + * is counted separately 5.195 + * 5.196 + * Also, many kernel routines increase the page count before a critical 5.197 + * routine so they can be sure the page doesn't go away from under them. 5.198 + */ 5.199 +#define get_page(p) atomic_inc(&(p)->count) 5.200 +#define put_page(p) __free_page(p) 5.201 +#define put_page_testzero(p) atomic_dec_and_test(&(p)->count) 5.202 +#define page_count(p) atomic_read(&(p)->count) 5.203 +#define set_page_count(p,v) atomic_set(&(p)->count, v) 5.204 + 5.205 +/* 5.206 + * Various page->flags bits: 5.207 + * 5.208 + * PG_reserved is set for special pages, which can never be swapped 5.209 + * out. Some of them might not even exist (eg empty_bad_page)... 5.210 + * 5.211 + * Multiple processes may "see" the same page. E.g. for untouched 5.212 + * mappings of /dev/null, all processes see the same page full of 5.213 + * zeroes, and text pages of executables and shared libraries have 5.214 + * only one copy in memory, at most, normally. 5.215 + * 5.216 + * For the non-reserved pages, page->count denotes a reference count. 5.217 + * page->count == 0 means the page is free. 5.218 + * page->count == 1 means the page is used for exactly one purpose 5.219 + * (e.g. a private data page of one process). 5.220 + * 5.221 + * A page may be used for kmalloc() or anyone else who does a 5.222 + * __get_free_page(). In this case the page->count is at least 1, and 5.223 + * all other fields are unused but should be 0 or NULL. The 5.224 + * management of this page is the responsibility of the one who uses 5.225 + * it. 5.226 + * 5.227 + * The other pages (we may call them "process pages") are completely 5.228 + * managed by the Linux memory manager: I/O, buffers, swapping etc. 5.229 + * The following discussion applies only to them. 5.230 + * 5.231 + * A page may belong to an inode's memory mapping. In this case, 5.232 + * page->mapping is the pointer to the inode, and page->index is the 5.233 + * file offset of the page, in units of PAGE_CACHE_SIZE. 5.234 + * 5.235 + * A page may have buffers allocated to it. In this case, 5.236 + * page->buffers is a circular list of these buffer heads. Else, 5.237 + * page->buffers == NULL. 5.238 + * 5.239 + * For pages belonging to inodes, the page->count is the number of 5.240 + * attaches, plus 1 if buffers are allocated to the page, plus one 5.241 + * for the page cache itself. 5.242 + * 5.243 + * All pages belonging to an inode are in these doubly linked lists: 5.244 + * mapping->clean_pages, mapping->dirty_pages and mapping->locked_pages; 5.245 + * using the page->list list_head. These fields are also used for 5.246 + * freelist managemet (when page->count==0). 5.247 + * 5.248 + * There is also a hash table mapping (mapping,index) to the page 5.249 + * in memory if present. The lists for this hash table use the fields 5.250 + * page->next_hash and page->pprev_hash. 5.251 + * 5.252 + * All process pages can do I/O: 5.253 + * - inode pages may need to be read from disk, 5.254 + * - inode pages which have been modified and are MAP_SHARED may need 5.255 + * to be written to disk, 5.256 + * - private pages which have been modified may need to be swapped out 5.257 + * to swap space and (later) to be read back into memory. 5.258 + * During disk I/O, PG_locked is used. This bit is set before I/O 5.259 + * and reset when I/O completes. page_waitqueue(page) is a wait queue of all 5.260 + * tasks waiting for the I/O on this page to complete. 5.261 + * PG_uptodate tells whether the page's contents is valid. 5.262 + * When a read completes, the page becomes uptodate, unless a disk I/O 5.263 + * error happened. 5.264 + * 5.265 + * For choosing which pages to swap out, inode pages carry a 5.266 + * PG_referenced bit, which is set any time the system accesses 5.267 + * that page through the (mapping,index) hash table. This referenced 5.268 + * bit, together with the referenced bit in the page tables, is used 5.269 + * to manipulate page->age and move the page across the active, 5.270 + * inactive_dirty and inactive_clean lists. 5.271 + * 5.272 + * Note that the referenced bit, the page->lru list_head and the 5.273 + * active, inactive_dirty and inactive_clean lists are protected by 5.274 + * the pagemap_lru_lock, and *NOT* by the usual PG_locked bit! 5.275 + * 5.276 + * PG_skip is used on sparc/sparc64 architectures to "skip" certain 5.277 + * parts of the address space. 5.278 + * 5.279 + * PG_error is set to indicate that an I/O error occurred on this page. 5.280 + * 5.281 + * PG_arch_1 is an architecture specific page state bit. The generic 5.282 + * code guarantees that this bit is cleared for a page when it first 5.283 + * is entered into the page cache. 5.284 + * 5.285 + * PG_highmem pages are not permanently mapped into the kernel virtual 5.286 + * address space, they need to be kmapped separately for doing IO on 5.287 + * the pages. The struct page (these bits with information) are always 5.288 + * mapped into kernel address space... 5.289 + */ 5.290 +#define PG_locked 0 /* Page is locked. Don't touch. */ 5.291 +#define PG_error 1 5.292 +#define PG_referenced 2 5.293 +#define PG_uptodate 3 5.294 +#define PG_dirty 4 5.295 +#define PG_unused 5 5.296 +#define PG_lru 6 5.297 +#define PG_active 7 5.298 +#define PG_slab 8 5.299 +#define PG_skip 10 5.300 +#define PG_highmem 11 5.301 +#define PG_checked 12 /* kill me in 2.5.<early>. */ 5.302 +#define PG_arch_1 13 5.303 +#define PG_reserved 14 5.304 +#define PG_launder 15 /* written out by VM pressure.. */ 5.305 +#define PG_fs_1 16 /* Filesystem specific */ 5.306 +#define PG_foreign 21 /* Page belongs to foreign allocator */ 5.307 + 5.308 +#ifndef arch_set_page_uptodate 5.309 +#define arch_set_page_uptodate(page) 5.310 +#endif 5.311 + 5.312 +/* Make it prettier to test the above... */ 5.313 +#define UnlockPage(page) unlock_page(page) 5.314 +#define Page_Uptodate(page) test_bit(PG_uptodate, &(page)->flags) 5.315 +#define SetPageUptodate(page) \ 5.316 + do { \ 5.317 + arch_set_page_uptodate(page); \ 5.318 + set_bit(PG_uptodate, &(page)->flags); \ 5.319 + } while (0) 5.320 +#define ClearPageUptodate(page) clear_bit(PG_uptodate, &(page)->flags) 5.321 +#define PageDirty(page) test_bit(PG_dirty, &(page)->flags) 5.322 +#define SetPageDirty(page) set_bit(PG_dirty, &(page)->flags) 5.323 +#define ClearPageDirty(page) clear_bit(PG_dirty, &(page)->flags) 5.324 +#define PageLocked(page) test_bit(PG_locked, &(page)->flags) 5.325 +#define LockPage(page) set_bit(PG_locked, &(page)->flags) 5.326 +#define TryLockPage(page) test_and_set_bit(PG_locked, &(page)->flags) 5.327 +#define PageChecked(page) test_bit(PG_checked, &(page)->flags) 5.328 +#define SetPageChecked(page) set_bit(PG_checked, &(page)->flags) 5.329 +#define ClearPageChecked(page) clear_bit(PG_checked, &(page)->flags) 5.330 +#define PageLaunder(page) test_bit(PG_launder, &(page)->flags) 5.331 +#define SetPageLaunder(page) set_bit(PG_launder, &(page)->flags) 5.332 +#define ClearPageLaunder(page) clear_bit(PG_launder, &(page)->flags) 5.333 +#define ClearPageArch1(page) clear_bit(PG_arch_1, &(page)->flags) 5.334 + 5.335 +/* A foreign page uses a custom destructor rather than the buddy allocator. */ 5.336 +#ifdef CONFIG_FOREIGN_PAGES 5.337 +#define PageForeign(page) test_bit(PG_foreign, &(page)->flags) 5.338 +#define SetPageForeign(page) set_bit(PG_foreign, &(page)->flags) 5.339 +#define ClearPageForeign(page) clear_bit(PG_foreign, &(page)->flags) 5.340 +#define PageForeignDestructor(page) \ 5.341 + ( (void (*) (struct page *)) (page)->mapping ) 5.342 +#else 5.343 +#define PageForeign(page) 0 5.344 +#define PageForeignDestructor(page) void 5.345 +#endif 5.346 + 5.347 +/* 5.348 + * The zone field is never updated after free_area_init_core() 5.349 + * sets it, so none of the operations on it need to be atomic. 5.350 + */ 5.351 +#define NODE_SHIFT 4 5.352 +#define ZONE_SHIFT (BITS_PER_LONG - 8) 5.353 + 5.354 +struct zone_struct; 5.355 +extern struct zone_struct *zone_table[]; 5.356 + 5.357 +static inline zone_t *page_zone(struct page *page) 5.358 +{ 5.359 + return zone_table[page->flags >> ZONE_SHIFT]; 5.360 +} 5.361 + 5.362 +static inline void set_page_zone(struct page *page, unsigned long zone_num) 5.363 +{ 5.364 + page->flags &= ~(~0UL << ZONE_SHIFT); 5.365 + page->flags |= zone_num << ZONE_SHIFT; 5.366 +} 5.367 + 5.368 +/* 5.369 + * In order to avoid #ifdefs within C code itself, we define 5.370 + * set_page_address to a noop for non-highmem machines, where 5.371 + * the field isn't useful. 5.372 + * The same is true for page_address() in arch-dependent code. 5.373 + */ 5.374 +#if defined(CONFIG_HIGHMEM) || defined(WANT_PAGE_VIRTUAL) 5.375 + 5.376 +#define set_page_address(page, address) \ 5.377 + do { \ 5.378 + (page)->virtual = (address); \ 5.379 + } while(0) 5.380 + 5.381 +#else /* CONFIG_HIGHMEM || WANT_PAGE_VIRTUAL */ 5.382 +#define set_page_address(page, address) do { } while(0) 5.383 +#endif /* CONFIG_HIGHMEM || WANT_PAGE_VIRTUAL */ 5.384 + 5.385 +/* 5.386 + * Permanent address of a page. Obviously must never be 5.387 + * called on a highmem page. 5.388 + */ 5.389 +#if defined(CONFIG_HIGHMEM) || defined(WANT_PAGE_VIRTUAL) 5.390 + 5.391 +#define page_address(page) ((page)->virtual) 5.392 + 5.393 +#else /* CONFIG_HIGHMEM || WANT_PAGE_VIRTUAL */ 5.394 + 5.395 +#define page_address(page) \ 5.396 + __va( (((page) - page_zone(page)->zone_mem_map) << PAGE_SHIFT) \ 5.397 + + page_zone(page)->zone_start_paddr) 5.398 + 5.399 +#endif /* CONFIG_HIGHMEM || WANT_PAGE_VIRTUAL */ 5.400 + 5.401 +extern void FASTCALL(set_page_dirty(struct page *)); 5.402 + 5.403 +/* 5.404 + * The first mb is necessary to safely close the critical section opened by the 5.405 + * TryLockPage(), the second mb is necessary to enforce ordering between 5.406 + * the clear_bit and the read of the waitqueue (to avoid SMP races with a 5.407 + * parallel wait_on_page). 5.408 + */ 5.409 +#define PageError(page) test_bit(PG_error, &(page)->flags) 5.410 +#define SetPageError(page) set_bit(PG_error, &(page)->flags) 5.411 +#define ClearPageError(page) clear_bit(PG_error, &(page)->flags) 5.412 +#define PageReferenced(page) test_bit(PG_referenced, &(page)->flags) 5.413 +#define SetPageReferenced(page) set_bit(PG_referenced, &(page)->flags) 5.414 +#define ClearPageReferenced(page) clear_bit(PG_referenced, &(page)->flags) 5.415 +#define PageTestandClearReferenced(page) test_and_clear_bit(PG_referenced, &(page)->flags) 5.416 +#define PageSlab(page) test_bit(PG_slab, &(page)->flags) 5.417 +#define PageSetSlab(page) set_bit(PG_slab, &(page)->flags) 5.418 +#define PageClearSlab(page) clear_bit(PG_slab, &(page)->flags) 5.419 +#define PageReserved(page) test_bit(PG_reserved, &(page)->flags) 5.420 + 5.421 +#define PageActive(page) test_bit(PG_active, &(page)->flags) 5.422 +#define SetPageActive(page) set_bit(PG_active, &(page)->flags) 5.423 +#define ClearPageActive(page) clear_bit(PG_active, &(page)->flags) 5.424 + 5.425 +#define PageLRU(page) test_bit(PG_lru, &(page)->flags) 5.426 +#define TestSetPageLRU(page) test_and_set_bit(PG_lru, &(page)->flags) 5.427 +#define TestClearPageLRU(page) test_and_clear_bit(PG_lru, &(page)->flags) 5.428 + 5.429 +#ifdef CONFIG_HIGHMEM 5.430 +#define PageHighMem(page) test_bit(PG_highmem, &(page)->flags) 5.431 +#else 5.432 +#define PageHighMem(page) 0 /* needed to optimize away at compile time */ 5.433 +#endif 5.434 + 5.435 +#define SetPageReserved(page) set_bit(PG_reserved, &(page)->flags) 5.436 +#define ClearPageReserved(page) clear_bit(PG_reserved, &(page)->flags) 5.437 + 5.438 +/* 5.439 + * Error return values for the *_nopage functions 5.440 + */ 5.441 +#define NOPAGE_SIGBUS (NULL) 5.442 +#define NOPAGE_OOM ((struct page *) (-1)) 5.443 + 5.444 +/* The array of struct pages */ 5.445 +extern mem_map_t * mem_map; 5.446 + 5.447 +/* 5.448 + * There is only one page-allocator function, and two main namespaces to 5.449 + * it. The alloc_page*() variants return 'struct page *' and as such 5.450 + * can allocate highmem pages, the *get*page*() variants return 5.451 + * virtual kernel addresses to the allocated page(s). 5.452 + */ 5.453 +extern struct page * FASTCALL(_alloc_pages(unsigned int gfp_mask, unsigned int order)); 5.454 +extern struct page * FASTCALL(__alloc_pages(unsigned int gfp_mask, unsigned int order, zonelist_t *zonelist)); 5.455 +extern struct page * alloc_pages_node(int nid, unsigned int gfp_mask, unsigned int order); 5.456 + 5.457 +static inline struct page * alloc_pages(unsigned int gfp_mask, unsigned int order) 5.458 +{ 5.459 + /* 5.460 + * Gets optimized away by the compiler. 5.461 + */ 5.462 + if (order >= MAX_ORDER) 5.463 + return NULL; 5.464 + return _alloc_pages(gfp_mask, order); 5.465 +} 5.466 + 5.467 +#define alloc_page(gfp_mask) alloc_pages(gfp_mask, 0) 5.468 + 5.469 +extern unsigned long FASTCALL(__get_free_pages(unsigned int gfp_mask, unsigned int order)); 5.470 +extern unsigned long FASTCALL(get_zeroed_page(unsigned int gfp_mask)); 5.471 + 5.472 +#define __get_free_page(gfp_mask) \ 5.473 + __get_free_pages((gfp_mask),0) 5.474 + 5.475 +#define __get_dma_pages(gfp_mask, order) \ 5.476 + __get_free_pages((gfp_mask) | GFP_DMA,(order)) 5.477 + 5.478 +/* 5.479 + * The old interface name will be removed in 2.5: 5.480 + */ 5.481 +#define get_free_page get_zeroed_page 5.482 + 5.483 +/* 5.484 + * There is only one 'core' page-freeing function. 5.485 + */ 5.486 +extern void FASTCALL(__free_pages(struct page *page, unsigned int order)); 5.487 +extern void FASTCALL(free_pages(unsigned long addr, unsigned int order)); 5.488 + 5.489 +#define __free_page(page) __free_pages((page), 0) 5.490 +#define free_page(addr) free_pages((addr),0) 5.491 + 5.492 +extern void show_free_areas(void); 5.493 +extern void show_free_areas_node(pg_data_t *pgdat); 5.494 + 5.495 +extern void clear_page_tables(struct mm_struct *, unsigned long, int); 5.496 + 5.497 +extern int fail_writepage(struct page *); 5.498 +struct page * shmem_nopage(struct vm_area_struct * vma, unsigned long address, int unused); 5.499 +struct file *shmem_file_setup(char * name, loff_t size); 5.500 +extern void shmem_lock(struct file * file, int lock); 5.501 +extern int shmem_zero_setup(struct vm_area_struct *); 5.502 + 5.503 +extern void zap_page_range(struct mm_struct *mm, unsigned long address, unsigned long size); 5.504 +extern int copy_page_range(struct mm_struct *dst, struct mm_struct *src, struct vm_area_struct *vma); 5.505 +extern int remap_page_range(unsigned long from, unsigned long to, unsigned long size, pgprot_t prot); 5.506 +extern int zeromap_page_range(unsigned long from, unsigned long size, pgprot_t prot); 5.507 + 5.508 +extern int vmtruncate(struct inode * inode, loff_t offset); 5.509 +extern pmd_t *FASTCALL(__pmd_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long address)); 5.510 +extern pte_t *FASTCALL(pte_alloc(struct mm_struct *mm, pmd_t *pmd, unsigned long address)); 5.511 +extern int handle_mm_fault(struct mm_struct *mm,struct vm_area_struct *vma, unsigned long address, int write_access); 5.512 +extern int make_pages_present(unsigned long addr, unsigned long end); 5.513 +extern int access_process_vm(struct task_struct *tsk, unsigned long addr, void *buf, int len, int write); 5.514 +extern int ptrace_readdata(struct task_struct *tsk, unsigned long src, char *dst, int len); 5.515 +extern int ptrace_writedata(struct task_struct *tsk, char * src, unsigned long dst, int len); 5.516 +extern int ptrace_attach(struct task_struct *tsk); 5.517 +extern int ptrace_detach(struct task_struct *, unsigned int); 5.518 +extern void ptrace_disable(struct task_struct *); 5.519 +extern int ptrace_check_attach(struct task_struct *task, int kill); 5.520 + 5.521 +int get_user_pages(struct task_struct *tsk, struct mm_struct *mm, unsigned long start, 5.522 + int len, int write, int force, struct page **pages, struct vm_area_struct **vmas); 5.523 + 5.524 +/* 5.525 + * On a two-level page table, this ends up being trivial. Thus the 5.526 + * inlining and the symmetry break with pte_alloc() that does all 5.527 + * of this out-of-line. 5.528 + */ 5.529 +static inline pmd_t *pmd_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long address) 5.530 +{ 5.531 + if (pgd_none(*pgd)) 5.532 + return __pmd_alloc(mm, pgd, address); 5.533 + return pmd_offset(pgd, address); 5.534 +} 5.535 + 5.536 +extern int pgt_cache_water[2]; 5.537 +extern int check_pgt_cache(void); 5.538 + 5.539 +extern void free_area_init(unsigned long * zones_size); 5.540 +extern void free_area_init_node(int nid, pg_data_t *pgdat, struct page *pmap, 5.541 + unsigned long * zones_size, unsigned long zone_start_paddr, 5.542 + unsigned long *zholes_size); 5.543 +extern void mem_init(void); 5.544 +extern void show_mem(void); 5.545 +extern void si_meminfo(struct sysinfo * val); 5.546 +extern void swapin_readahead(swp_entry_t); 5.547 + 5.548 +extern struct address_space swapper_space; 5.549 +#define PageSwapCache(page) ((page)->mapping == &swapper_space) 5.550 + 5.551 +static inline int is_page_cache_freeable(struct page * page) 5.552 +{ 5.553 + return page_count(page) - !!page->buffers == 1; 5.554 +} 5.555 + 5.556 +extern int FASTCALL(can_share_swap_page(struct page *)); 5.557 +extern int FASTCALL(remove_exclusive_swap_page(struct page *)); 5.558 + 5.559 +extern void __free_pte(pte_t); 5.560 + 5.561 +/* mmap.c */ 5.562 +extern void lock_vma_mappings(struct vm_area_struct *); 5.563 +extern void unlock_vma_mappings(struct vm_area_struct *); 5.564 +extern void insert_vm_struct(struct mm_struct *, struct vm_area_struct *); 5.565 +extern void __insert_vm_struct(struct mm_struct *, struct vm_area_struct *); 5.566 +extern void build_mmap_rb(struct mm_struct *); 5.567 +extern void exit_mmap(struct mm_struct *); 5.568 + 5.569 +extern unsigned long get_unmapped_area(struct file *, unsigned long, unsigned long, unsigned long, unsigned long); 5.570 + 5.571 +extern unsigned long do_mmap_pgoff(struct file *file, unsigned long addr, 5.572 + unsigned long len, unsigned long prot, 5.573 + unsigned long flag, unsigned long pgoff); 5.574 + 5.575 +static inline unsigned long do_mmap(struct file *file, unsigned long addr, 5.576 + unsigned long len, unsigned long prot, 5.577 + unsigned long flag, unsigned long offset) 5.578 +{ 5.579 + unsigned long ret = -EINVAL; 5.580 + if ((offset + PAGE_ALIGN(len)) < offset) 5.581 + goto out; 5.582 + if (!(offset & ~PAGE_MASK)) 5.583 + ret = do_mmap_pgoff(file, addr, len, prot, flag, offset >> PAGE_SHIFT); 5.584 +out: 5.585 + return ret; 5.586 +} 5.587 + 5.588 +extern int do_munmap(struct mm_struct *, unsigned long, size_t); 5.589 + 5.590 +extern unsigned long do_brk(unsigned long, unsigned long); 5.591 + 5.592 +static inline void __vma_unlink(struct mm_struct * mm, struct vm_area_struct * vma, struct vm_area_struct * prev) 5.593 +{ 5.594 + prev->vm_next = vma->vm_next; 5.595 + rb_erase(&vma->vm_rb, &mm->mm_rb); 5.596 + if (mm->mmap_cache == vma) 5.597 + mm->mmap_cache = prev; 5.598 +} 5.599 + 5.600 +static inline int can_vma_merge(struct vm_area_struct * vma, unsigned long vm_flags) 5.601 +{ 5.602 + if (!vma->vm_file && vma->vm_flags == vm_flags) 5.603 + return 1; 5.604 + else 5.605 + return 0; 5.606 +} 5.607 + 5.608 +struct zone_t; 5.609 +/* filemap.c */ 5.610 +extern void remove_inode_page(struct page *); 5.611 +extern unsigned long page_unuse(struct page *); 5.612 +extern void truncate_inode_pages(struct address_space *, loff_t); 5.613 + 5.614 +/* generic vm_area_ops exported for stackable file systems */ 5.615 +extern int filemap_sync(struct vm_area_struct *, unsigned long, size_t, unsigned int); 5.616 +extern struct page *filemap_nopage(struct vm_area_struct *, unsigned long, int); 5.617 + 5.618 +/* 5.619 + * GFP bitmasks.. 5.620 + */ 5.621 +/* Zone modifiers in GFP_ZONEMASK (see linux/mmzone.h - low four bits) */ 5.622 +#define __GFP_DMA 0x01 5.623 +#define __GFP_HIGHMEM 0x02 5.624 + 5.625 +/* Action modifiers - doesn't change the zoning */ 5.626 +#define __GFP_WAIT 0x10 /* Can wait and reschedule? */ 5.627 +#define __GFP_HIGH 0x20 /* Should access emergency pools? */ 5.628 +#define __GFP_IO 0x40 /* Can start low memory physical IO? */ 5.629 +#define __GFP_HIGHIO 0x80 /* Can start high mem physical IO? */ 5.630 +#define __GFP_FS 0x100 /* Can call down to low-level FS? */ 5.631 + 5.632 +#define GFP_NOHIGHIO (__GFP_HIGH | __GFP_WAIT | __GFP_IO) 5.633 +#define GFP_NOIO (__GFP_HIGH | __GFP_WAIT) 5.634 +#define GFP_NOFS (__GFP_HIGH | __GFP_WAIT | __GFP_IO | __GFP_HIGHIO) 5.635 +#define GFP_ATOMIC (__GFP_HIGH) 5.636 +#define GFP_USER ( __GFP_WAIT | __GFP_IO | __GFP_HIGHIO | __GFP_FS) 5.637 +#define GFP_HIGHUSER ( __GFP_WAIT | __GFP_IO | __GFP_HIGHIO | __GFP_FS | __GFP_HIGHMEM) 5.638 +#define GFP_KERNEL (__GFP_HIGH | __GFP_WAIT | __GFP_IO | __GFP_HIGHIO | __GFP_FS) 5.639 +#define GFP_NFS (__GFP_HIGH | __GFP_WAIT | __GFP_IO | __GFP_HIGHIO | __GFP_FS) 5.640 +#define GFP_KSWAPD ( __GFP_WAIT | __GFP_IO | __GFP_HIGHIO | __GFP_FS) 5.641 + 5.642 +/* Flag - indicates that the buffer will be suitable for DMA. Ignored on some 5.643 + platforms, used as appropriate on others */ 5.644 + 5.645 +#define GFP_DMA __GFP_DMA 5.646 + 5.647 +static inline unsigned int pf_gfp_mask(unsigned int gfp_mask) 5.648 +{ 5.649 + /* avoid all memory balancing I/O methods if this task cannot block on I/O */ 5.650 + if (current->flags & PF_NOIO) 5.651 + gfp_mask &= ~(__GFP_IO | __GFP_HIGHIO | __GFP_FS); 5.652 + 5.653 + return gfp_mask; 5.654 +} 5.655 + 5.656 +/* vma is the first one with address < vma->vm_end, 5.657 + * and even address < vma->vm_start. Have to extend vma. */ 5.658 +static inline int expand_stack(struct vm_area_struct * vma, unsigned long address) 5.659 +{ 5.660 + unsigned long grow; 5.661 + 5.662 + /* 5.663 + * vma->vm_start/vm_end cannot change under us because the caller is required 5.664 + * to hold the mmap_sem in write mode. We need to get the spinlock only 5.665 + * before relocating the vma range ourself. 5.666 + */ 5.667 + address &= PAGE_MASK; 5.668 + spin_lock(&vma->vm_mm->page_table_lock); 5.669 + grow = (vma->vm_start - address) >> PAGE_SHIFT; 5.670 + if (vma->vm_end - address > current->rlim[RLIMIT_STACK].rlim_cur || 5.671 + ((vma->vm_mm->total_vm + grow) << PAGE_SHIFT) > current->rlim[RLIMIT_AS].rlim_cur) { 5.672 + spin_unlock(&vma->vm_mm->page_table_lock); 5.673 + return -ENOMEM; 5.674 + } 5.675 + vma->vm_start = address; 5.676 + vma->vm_pgoff -= grow; 5.677 + vma->vm_mm->total_vm += grow; 5.678 + if (vma->vm_flags & VM_LOCKED) 5.679 + vma->vm_mm->locked_vm += grow; 5.680 + spin_unlock(&vma->vm_mm->page_table_lock); 5.681 + return 0; 5.682 +} 5.683 + 5.684 +/* Look up the first VMA which satisfies addr < vm_end, NULL if none. */ 5.685 +extern struct vm_area_struct * find_vma(struct mm_struct * mm, unsigned long addr); 5.686 +extern struct vm_area_struct * find_vma_prev(struct mm_struct * mm, unsigned long addr, 5.687 + struct vm_area_struct **pprev); 5.688 + 5.689 +/* Look up the first VMA which intersects the interval start_addr..end_addr-1, 5.690 + NULL if none. Assume start_addr < end_addr. */ 5.691 +static inline struct vm_area_struct * find_vma_intersection(struct mm_struct * mm, unsigned long start_addr, unsigned long end_addr) 5.692 +{ 5.693 + struct vm_area_struct * vma = find_vma(mm,start_addr); 5.694 + 5.695 + if (vma && end_addr <= vma->vm_start) 5.696 + vma = NULL; 5.697 + return vma; 5.698 +} 5.699 + 5.700 +extern struct vm_area_struct *find_extend_vma(struct mm_struct *mm, unsigned long addr); 5.701 + 5.702 +extern struct page * vmalloc_to_page(void *addr); 5.703 + 5.704 +#endif /* __KERNEL__ */ 5.705 + 5.706 +#endif
6.1 --- a/linux-2.4.26-xen-sparse/mm/page_alloc.c Thu Aug 19 16:09:39 2004 +0000 6.2 +++ b/linux-2.4.26-xen-sparse/mm/page_alloc.c Thu Aug 19 18:51:53 2004 +0000 6.3 @@ -89,6 +89,9 @@ static void __free_pages_ok (struct page 6.4 struct page *base; 6.5 zone_t *zone; 6.6 6.7 + if (PageForeign(page)) 6.8 + return (PageForeignDestructor(page))(page); 6.9 + 6.10 /* 6.11 * Yes, think what happens when other parts of the kernel take 6.12 * a reference to a page in order to pin it for io. -ben 6.13 @@ -102,7 +105,7 @@ static void __free_pages_ok (struct page 6.14 if (page->buffers) 6.15 BUG(); 6.16 if (page->mapping) 6.17 - return (*(void(*)(struct page *))page->mapping)(page); 6.18 + BUG(); 6.19 if (!VALID_PAGE(page)) 6.20 BUG(); 6.21 if (PageLocked(page))
7.1 --- a/linux-2.6.7-xen-sparse/arch/xen/Kconfig Thu Aug 19 16:09:39 2004 +0000 7.2 +++ b/linux-2.6.7-xen-sparse/arch/xen/Kconfig Thu Aug 19 18:51:53 2004 +0000 7.3 @@ -44,11 +44,15 @@ config XEN_WRITABLE_PAGETABLES 7.4 7.5 endmenu 7.6 7.7 -# Xen's block device backend driver needs 2^12 pages 7.8 -config FORCE_MAX_ZONEORDER 7.9 - int 7.10 - default "12" if XEN_PHYSDEV_ACCESS 7.11 - default "11" if !XEN_PHYSDEV_ACCESS 7.12 +config FOREIGN_PAGES 7.13 + bool 7.14 + default y if XEN_PHYSDEV_ACCESS 7.15 + default n if !XEN_PHYSDEV_ACCESS 7.16 + 7.17 +config PAGESIZED_SKBS 7.18 + bool 7.19 + default y if XEN_PHYSDEV_ACCESS 7.20 + default n if !XEN_PHYSDEV_ACCESS 7.21 7.22 #config VT 7.23 # bool
8.1 --- a/linux-2.6.7-xen-sparse/arch/xen/configs/xen0_defconfig Thu Aug 19 16:09:39 2004 +0000 8.2 +++ b/linux-2.6.7-xen-sparse/arch/xen/configs/xen0_defconfig Thu Aug 19 18:51:53 2004 +0000 8.3 @@ -10,7 +10,8 @@ CONFIG_NO_IDLE_HZ=y 8.4 # 8.5 CONFIG_XEN_PRIVILEGED_GUEST=y 8.6 CONFIG_XEN_PHYSDEV_ACCESS=y 8.7 -CONFIG_FORCE_MAX_ZONEORDER=12 8.8 +CONFIG_FOREIGN_PAGES=y 8.9 +CONFIG_PAGESIZED_SKBS=y 8.10 CONFIG_X86=y 8.11 # CONFIG_X86_64 is not set 8.12
9.1 --- a/linux-2.6.7-xen-sparse/arch/xen/configs/xenU_defconfig Thu Aug 19 16:09:39 2004 +0000 9.2 +++ b/linux-2.6.7-xen-sparse/arch/xen/configs/xenU_defconfig Thu Aug 19 18:51:53 2004 +0000 9.3 @@ -10,7 +10,8 @@ CONFIG_NO_IDLE_HZ=y 9.4 # 9.5 # CONFIG_XEN_PRIVILEGED_GUEST is not set 9.6 # CONFIG_XEN_PHYSDEV_ACCESS is not set 9.7 -CONFIG_FORCE_MAX_ZONEORDER=11 9.8 +# CONFIG_FOREIGN_PAGES is not set 9.9 +# CONFIG_PAGESIZED_SKBS is not set 9.10 CONFIG_X86=y 9.11 # CONFIG_X86_64 is not set 9.12
10.1 --- a/linux-2.6.7-xen-sparse/drivers/xen/blkback/blkback.c Thu Aug 19 16:09:39 2004 +0000 10.2 +++ b/linux-2.6.7-xen-sparse/drivers/xen/blkback/blkback.c Thu Aug 19 18:51:53 2004 +0000 10.3 @@ -24,22 +24,15 @@ 10.4 #define MAX_PENDING_REQS 64 10.5 #define BATCH_PER_DOMAIN 16 10.6 10.7 -/* 10.8 - * NB. We place a page of padding between each buffer page to avoid incorrect 10.9 - * merging of requests by the IDE and SCSI merging routines. Otherwise, two 10.10 - * adjacent buffers in a scatter-gather request would have adjacent page 10.11 - * numbers: since the merge routines don't realise that this is in *pseudophys* 10.12 - * space, not real space, they may collapse the s-g elements! 10.13 - */ 10.14 static unsigned long mmap_vstart; 10.15 #define MMAP_PAGES_PER_REQUEST \ 10.16 - (2 * (BLKIF_MAX_SEGMENTS_PER_REQUEST + 1)) 10.17 + (BLKIF_MAX_SEGMENTS_PER_REQUEST + 1) 10.18 #define MMAP_PAGES \ 10.19 (MAX_PENDING_REQS * MMAP_PAGES_PER_REQUEST) 10.20 #define MMAP_VADDR(_req,_seg) \ 10.21 (mmap_vstart + \ 10.22 ((_req) * MMAP_PAGES_PER_REQUEST * PAGE_SIZE) + \ 10.23 - ((_seg) * 2 * PAGE_SIZE)) 10.24 + ((_seg) * PAGE_SIZE)) 10.25 10.26 /* 10.27 * Each outstanding request that we've passed to the lower device layers has a
11.1 --- a/linux-2.6.7-xen-sparse/drivers/xen/netback/netback.c Thu Aug 19 16:09:39 2004 +0000 11.2 +++ b/linux-2.6.7-xen-sparse/drivers/xen/netback/netback.c Thu Aug 19 18:51:53 2004 +0000 11.3 @@ -376,7 +376,6 @@ static void net_tx_action(unsigned long 11.4 netif_tx_request_t txreq; 11.5 u16 pending_idx; 11.6 NETIF_RING_IDX i; 11.7 - struct page *page; 11.8 multicall_entry_t *mcl; 11.9 PEND_RING_IDX dc, dp; 11.10 11.11 @@ -567,10 +566,9 @@ static void net_tx_action(unsigned long 11.12 (void *)(MMAP_VADDR(pending_idx)|(txreq.addr&~PAGE_MASK)), 11.13 PKT_PROT_LEN); 11.14 11.15 - page = virt_to_page(MMAP_VADDR(pending_idx)); 11.16 - 11.17 /* Append the packet payload as a fragment. */ 11.18 - skb_shinfo(skb)->frags[0].page = page; 11.19 + skb_shinfo(skb)->frags[0].page = 11.20 + virt_to_page(MMAP_VADDR(pending_idx)); 11.21 skb_shinfo(skb)->frags[0].size = txreq.size - PKT_PROT_LEN; 11.22 skb_shinfo(skb)->frags[0].page_offset = 11.23 (txreq.addr + PKT_PROT_LEN) & ~PAGE_MASK; 11.24 @@ -581,17 +579,6 @@ static void net_tx_action(unsigned long 11.25 skb->dev = netif->dev; 11.26 skb->protocol = eth_type_trans(skb, skb->dev); 11.27 11.28 - /* 11.29 - * Destructor information. We hideously abuse the 'mapping' pointer, 11.30 - * which isn't otherwise used by us. The page deallocator is modified 11.31 - * to interpret a non-NULL value as a destructor function to be called. 11.32 - * This works okay because in all other cases the pointer must be NULL 11.33 - * when the page is freed (normally Linux will explicitly bug out if 11.34 - * it sees otherwise. 11.35 - */ 11.36 - page->mapping = (struct address_space *)netif_page_release; 11.37 - set_page_count(page, 1); 11.38 - 11.39 netif->stats.tx_bytes += txreq.size; 11.40 netif->stats.tx_packets++; 11.41 11.42 @@ -607,8 +594,8 @@ static void netif_page_release(struct pa 11.43 unsigned long flags; 11.44 u16 pending_idx = page - virt_to_page(mmap_vstart); 11.45 11.46 - /* Stop the abuse. */ 11.47 - page->mapping = NULL; 11.48 + /* Ready for next use. */ 11.49 + set_page_count(page, 1); 11.50 11.51 spin_lock_irqsave(&dealloc_lock, flags); 11.52 dealloc_ring[MASK_PEND_IDX(dealloc_prod++)] = pending_idx; 11.53 @@ -742,6 +729,7 @@ static irqreturn_t netif_be_dbg(int irq, 11.54 static int __init netback_init(void) 11.55 { 11.56 int i; 11.57 + struct page *page; 11.58 11.59 if ( !(start_info.flags & SIF_NET_BE_DOMAIN) && 11.60 !(start_info.flags & SIF_INITDOMAIN) ) 11.61 @@ -757,6 +745,13 @@ static int __init netback_init(void) 11.62 if ( (mmap_vstart = allocate_empty_lowmem_region(MAX_PENDING_REQS)) == 0 ) 11.63 BUG(); 11.64 11.65 + for ( i = 0; i < MAX_PENDING_REQS; i++ ) 11.66 + { 11.67 + page = virt_to_page(MMAP_VADDR(i)); 11.68 + SetPageForeign(page); 11.69 + PageForeignDestructor(page) = netif_page_release; 11.70 + } 11.71 + 11.72 pending_cons = 0; 11.73 pending_prod = MAX_PENDING_REQS; 11.74 for ( i = 0; i < MAX_PENDING_REQS; i++ )
12.1 --- a/linux-2.6.7-xen-sparse/include/asm-xen/asm-i386/io.h Thu Aug 19 16:09:39 2004 +0000 12.2 +++ b/linux-2.6.7-xen-sparse/include/asm-xen/asm-i386/io.h Thu Aug 19 18:51:53 2004 +0000 12.3 @@ -88,6 +88,13 @@ static inline void * phys_to_virt(unsign 12.4 #define page_to_pseudophys(page) ((dma_addr_t)page_to_pfn(page) << PAGE_SHIFT) 12.5 #define page_to_phys(page) (phys_to_machine(page_to_pseudophys(page))) 12.6 12.7 +#define bio_to_pseudophys(bio) (page_to_pseudophys(bio_page((bio))) + (unsigned long) bio_offset((bio))) 12.8 +#define bvec_to_pseudophys(bv) (page_to_pseudophys((bv)->bv_page) + (unsigned long) (bv)->bv_offset) 12.9 + 12.10 +#define BIOVEC_PHYS_MERGEABLE(vec1, vec2) \ 12.11 + (((bvec_to_phys((vec1)) + (vec1)->bv_len) == bvec_to_phys((vec2))) && \ 12.12 + ((bvec_to_pseudophys((vec1)) + (vec1)->bv_len) == bvec_to_pseudophys((vec2)))) 12.13 + 12.14 extern void * __ioremap(unsigned long offset, unsigned long size, unsigned long flags); 12.15 12.16 /**
13.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 13.2 +++ b/linux-2.6.7-xen-sparse/include/linux/bio.h Thu Aug 19 18:51:53 2004 +0000 13.3 @@ -0,0 +1,304 @@ 13.4 +/* 13.5 + * 2.5 block I/O model 13.6 + * 13.7 + * Copyright (C) 2001 Jens Axboe <axboe@suse.de> 13.8 + * 13.9 + * This program is free software; you can redistribute it and/or modify 13.10 + * it under the terms of the GNU General Public License version 2 as 13.11 + * published by the Free Software Foundation. 13.12 + * 13.13 + * This program is distributed in the hope that it will be useful, 13.14 + * but WITHOUT ANY WARRANTY; without even the implied warranty of 13.15 + 13.16 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13.17 + * GNU General Public License for more details. 13.18 + * 13.19 + * You should have received a copy of the GNU General Public Licens 13.20 + * along with this program; if not, write to the Free Software 13.21 + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111- 13.22 + */ 13.23 +#ifndef __LINUX_BIO_H 13.24 +#define __LINUX_BIO_H 13.25 + 13.26 +#include <linux/highmem.h> 13.27 +#include <linux/mempool.h> 13.28 + 13.29 +/* Platforms may set this to teach the BIO layer about IOMMU hardware. */ 13.30 +#include <asm/io.h> 13.31 +#ifndef BIO_VMERGE_BOUNDARY 13.32 +#define BIO_VMERGE_BOUNDARY 0 13.33 +#endif 13.34 + 13.35 +#define BIO_DEBUG 13.36 + 13.37 +#ifdef BIO_DEBUG 13.38 +#define BIO_BUG_ON BUG_ON 13.39 +#else 13.40 +#define BIO_BUG_ON 13.41 +#endif 13.42 + 13.43 +#define BIO_MAX_PAGES (256) 13.44 +#define BIO_MAX_SIZE (BIO_MAX_PAGES << PAGE_CACHE_SHIFT) 13.45 +#define BIO_MAX_SECTORS (BIO_MAX_SIZE >> 9) 13.46 + 13.47 +/* 13.48 + * was unsigned short, but we might as well be ready for > 64kB I/O pages 13.49 + */ 13.50 +struct bio_vec { 13.51 + struct page *bv_page; 13.52 + unsigned int bv_len; 13.53 + unsigned int bv_offset; 13.54 +}; 13.55 + 13.56 +struct bio; 13.57 +typedef int (bio_end_io_t) (struct bio *, unsigned int, int); 13.58 +typedef void (bio_destructor_t) (struct bio *); 13.59 + 13.60 +/* 13.61 + * main unit of I/O for the block layer and lower layers (ie drivers and 13.62 + * stacking drivers) 13.63 + */ 13.64 +struct bio { 13.65 + sector_t bi_sector; 13.66 + struct bio *bi_next; /* request queue link */ 13.67 + struct block_device *bi_bdev; 13.68 + unsigned long bi_flags; /* status, command, etc */ 13.69 + unsigned long bi_rw; /* bottom bits READ/WRITE, 13.70 + * top bits priority 13.71 + */ 13.72 + 13.73 + unsigned short bi_vcnt; /* how many bio_vec's */ 13.74 + unsigned short bi_idx; /* current index into bvl_vec */ 13.75 + 13.76 + /* Number of segments in this BIO after 13.77 + * physical address coalescing is performed. 13.78 + */ 13.79 + unsigned short bi_phys_segments; 13.80 + 13.81 + /* Number of segments after physical and DMA remapping 13.82 + * hardware coalescing is performed. 13.83 + */ 13.84 + unsigned short bi_hw_segments; 13.85 + 13.86 + unsigned int bi_size; /* residual I/O count */ 13.87 + unsigned int bi_max_vecs; /* max bvl_vecs we can hold */ 13.88 + 13.89 + struct bio_vec *bi_io_vec; /* the actual vec list */ 13.90 + 13.91 + bio_end_io_t *bi_end_io; 13.92 + atomic_t bi_cnt; /* pin count */ 13.93 + 13.94 + void *bi_private; 13.95 + 13.96 + bio_destructor_t *bi_destructor; /* destructor */ 13.97 +}; 13.98 + 13.99 +/* 13.100 + * bio flags 13.101 + */ 13.102 +#define BIO_UPTODATE 0 /* ok after I/O completion */ 13.103 +#define BIO_RW_BLOCK 1 /* RW_AHEAD set, and read/write would block */ 13.104 +#define BIO_EOF 2 /* out-out-bounds error */ 13.105 +#define BIO_SEG_VALID 3 /* nr_hw_seg valid */ 13.106 +#define BIO_CLONED 4 /* doesn't own data */ 13.107 +#define BIO_BOUNCED 5 /* bio is a bounce bio */ 13.108 +#define bio_flagged(bio, flag) ((bio)->bi_flags & (1 << (flag))) 13.109 + 13.110 +/* 13.111 + * top 4 bits of bio flags indicate the pool this bio came from 13.112 + */ 13.113 +#define BIO_POOL_BITS (4) 13.114 +#define BIO_POOL_OFFSET (BITS_PER_LONG - BIO_POOL_BITS) 13.115 +#define BIO_POOL_MASK (1UL << BIO_POOL_OFFSET) 13.116 +#define BIO_POOL_IDX(bio) ((bio)->bi_flags >> BIO_POOL_OFFSET) 13.117 + 13.118 +/* 13.119 + * bio bi_rw flags 13.120 + * 13.121 + * bit 0 -- read (not set) or write (set) 13.122 + * bit 1 -- rw-ahead when set 13.123 + * bit 2 -- barrier 13.124 + * bit 3 -- fail fast, don't want low level driver retries 13.125 + * bit 4 -- synchronous I/O hint: the block layer will unplug immediately 13.126 + */ 13.127 +#define BIO_RW 0 13.128 +#define BIO_RW_AHEAD 1 13.129 +#define BIO_RW_BARRIER 2 13.130 +#define BIO_RW_FAILFAST 3 13.131 +#define BIO_RW_SYNC 4 13.132 + 13.133 +/* 13.134 + * various member access, note that bio_data should of course not be used 13.135 + * on highmem page vectors 13.136 + */ 13.137 +#define bio_iovec_idx(bio, idx) (&((bio)->bi_io_vec[(idx)])) 13.138 +#define bio_iovec(bio) bio_iovec_idx((bio), (bio)->bi_idx) 13.139 +#define bio_page(bio) bio_iovec((bio))->bv_page 13.140 +#define bio_offset(bio) bio_iovec((bio))->bv_offset 13.141 +#define bio_segments(bio) ((bio)->bi_vcnt - (bio)->bi_idx) 13.142 +#define bio_sectors(bio) ((bio)->bi_size >> 9) 13.143 +#define bio_cur_sectors(bio) (bio_iovec(bio)->bv_len >> 9) 13.144 +#define bio_data(bio) (page_address(bio_page((bio))) + bio_offset((bio))) 13.145 +#define bio_barrier(bio) ((bio)->bi_rw & (1 << BIO_RW_BARRIER)) 13.146 +#define bio_sync(bio) ((bio)->bi_rw & (1 << BIO_RW_SYNC)) 13.147 + 13.148 +/* 13.149 + * will die 13.150 + */ 13.151 +#define bio_to_phys(bio) (page_to_phys(bio_page((bio))) + (unsigned long) bio_offset((bio))) 13.152 +#define bvec_to_phys(bv) (page_to_phys((bv)->bv_page) + (unsigned long) (bv)->bv_offset) 13.153 + 13.154 +/* 13.155 + * queues that have highmem support enabled may still need to revert to 13.156 + * PIO transfers occasionally and thus map high pages temporarily. For 13.157 + * permanent PIO fall back, user is probably better off disabling highmem 13.158 + * I/O completely on that queue (see ide-dma for example) 13.159 + */ 13.160 +#define __bio_kmap_atomic(bio, idx, kmtype) \ 13.161 + (kmap_atomic(bio_iovec_idx((bio), (idx))->bv_page, kmtype) + \ 13.162 + bio_iovec_idx((bio), (idx))->bv_offset) 13.163 + 13.164 +#define __bio_kunmap_atomic(addr, kmtype) kunmap_atomic(addr, kmtype) 13.165 + 13.166 +/* 13.167 + * merge helpers etc 13.168 + */ 13.169 + 13.170 +#define __BVEC_END(bio) bio_iovec_idx((bio), (bio)->bi_vcnt - 1) 13.171 +#define __BVEC_START(bio) bio_iovec_idx((bio), (bio)->bi_idx) 13.172 +/* Platforms may set this to restrict multi-page buffer merging. */ 13.173 +#ifndef BIOVEC_PHYS_MERGEABLE 13.174 +#define BIOVEC_PHYS_MERGEABLE(vec1, vec2) \ 13.175 + ((bvec_to_phys((vec1)) + (vec1)->bv_len) == bvec_to_phys((vec2))) 13.176 +#endif 13.177 +#define BIOVEC_VIRT_MERGEABLE(vec1, vec2) \ 13.178 + ((((bvec_to_phys((vec1)) + (vec1)->bv_len) | bvec_to_phys((vec2))) & (BIO_VMERGE_BOUNDARY - 1)) == 0) 13.179 +#define __BIO_SEG_BOUNDARY(addr1, addr2, mask) \ 13.180 + (((addr1) | (mask)) == (((addr2) - 1) | (mask))) 13.181 +#define BIOVEC_SEG_BOUNDARY(q, b1, b2) \ 13.182 + __BIO_SEG_BOUNDARY(bvec_to_phys((b1)), bvec_to_phys((b2)) + (b2)->bv_len, (q)->seg_boundary_mask) 13.183 +#define BIO_SEG_BOUNDARY(q, b1, b2) \ 13.184 + BIOVEC_SEG_BOUNDARY((q), __BVEC_END((b1)), __BVEC_START((b2))) 13.185 + 13.186 +#define bio_io_error(bio, bytes) bio_endio((bio), (bytes), -EIO) 13.187 + 13.188 +/* 13.189 + * drivers should not use the __ version unless they _really_ want to 13.190 + * run through the entire bio and not just pending pieces 13.191 + */ 13.192 +#define __bio_for_each_segment(bvl, bio, i, start_idx) \ 13.193 + for (bvl = bio_iovec_idx((bio), (start_idx)), i = (start_idx); \ 13.194 + i < (bio)->bi_vcnt; \ 13.195 + bvl++, i++) 13.196 + 13.197 +#define bio_for_each_segment(bvl, bio, i) \ 13.198 + __bio_for_each_segment(bvl, bio, i, (bio)->bi_idx) 13.199 + 13.200 +/* 13.201 + * get a reference to a bio, so it won't disappear. the intended use is 13.202 + * something like: 13.203 + * 13.204 + * bio_get(bio); 13.205 + * submit_bio(rw, bio); 13.206 + * if (bio->bi_flags ...) 13.207 + * do_something 13.208 + * bio_put(bio); 13.209 + * 13.210 + * without the bio_get(), it could potentially complete I/O before submit_bio 13.211 + * returns. and then bio would be freed memory when if (bio->bi_flags ...) 13.212 + * runs 13.213 + */ 13.214 +#define bio_get(bio) atomic_inc(&(bio)->bi_cnt) 13.215 + 13.216 + 13.217 +/* 13.218 + * A bio_pair is used when we need to split a bio. 13.219 + * This can only happen for a bio that refers to just one 13.220 + * page of data, and in the unusual situation when the 13.221 + * page crosses a chunk/device boundary 13.222 + * 13.223 + * The address of the master bio is stored in bio1.bi_private 13.224 + * The address of the pool the pair was allocated from is stored 13.225 + * in bio2.bi_private 13.226 + */ 13.227 +struct bio_pair { 13.228 + struct bio bio1, bio2; 13.229 + struct bio_vec bv1, bv2; 13.230 + atomic_t cnt; 13.231 + int error; 13.232 +}; 13.233 +extern struct bio_pair *bio_split(struct bio *bi, mempool_t *pool, 13.234 + int first_sectors); 13.235 +extern mempool_t *bio_split_pool; 13.236 +extern void bio_pair_release(struct bio_pair *dbio); 13.237 + 13.238 +extern struct bio *bio_alloc(int, int); 13.239 +extern void bio_put(struct bio *); 13.240 + 13.241 +extern void bio_endio(struct bio *, unsigned int, int); 13.242 +struct request_queue; 13.243 +extern int bio_phys_segments(struct request_queue *, struct bio *); 13.244 +extern int bio_hw_segments(struct request_queue *, struct bio *); 13.245 + 13.246 +extern void __bio_clone(struct bio *, struct bio *); 13.247 +extern struct bio *bio_clone(struct bio *, int); 13.248 + 13.249 +extern void bio_init(struct bio *); 13.250 + 13.251 +extern int bio_add_page(struct bio *, struct page *, unsigned int,unsigned int); 13.252 +extern int bio_get_nr_vecs(struct block_device *); 13.253 +extern struct bio *bio_map_user(struct request_queue *, struct block_device *, 13.254 + unsigned long, unsigned int, int); 13.255 +extern void bio_unmap_user(struct bio *, int); 13.256 +extern void bio_set_pages_dirty(struct bio *bio); 13.257 +extern void bio_check_pages_dirty(struct bio *bio); 13.258 + 13.259 +#ifdef CONFIG_HIGHMEM 13.260 +/* 13.261 + * remember to add offset! and never ever reenable interrupts between a 13.262 + * bvec_kmap_irq and bvec_kunmap_irq!! 13.263 + * 13.264 + * This function MUST be inlined - it plays with the CPU interrupt flags. 13.265 + * Hence the `extern inline'. 13.266 + */ 13.267 +extern inline char *bvec_kmap_irq(struct bio_vec *bvec, unsigned long *flags) 13.268 +{ 13.269 + unsigned long addr; 13.270 + 13.271 + /* 13.272 + * might not be a highmem page, but the preempt/irq count 13.273 + * balancing is a lot nicer this way 13.274 + */ 13.275 + local_irq_save(*flags); 13.276 + addr = (unsigned long) kmap_atomic(bvec->bv_page, KM_BIO_SRC_IRQ); 13.277 + 13.278 + BUG_ON(addr & ~PAGE_MASK); 13.279 + 13.280 + return (char *) addr + bvec->bv_offset; 13.281 +} 13.282 + 13.283 +extern inline void bvec_kunmap_irq(char *buffer, unsigned long *flags) 13.284 +{ 13.285 + unsigned long ptr = (unsigned long) buffer & PAGE_MASK; 13.286 + 13.287 + kunmap_atomic((void *) ptr, KM_BIO_SRC_IRQ); 13.288 + local_irq_restore(*flags); 13.289 +} 13.290 + 13.291 +#else 13.292 +#define bvec_kmap_irq(bvec, flags) (page_address((bvec)->bv_page) + (bvec)->bv_offset) 13.293 +#define bvec_kunmap_irq(buf, flags) do { *(flags) = 0; } while (0) 13.294 +#endif 13.295 + 13.296 +extern inline char *__bio_kmap_irq(struct bio *bio, unsigned short idx, 13.297 + unsigned long *flags) 13.298 +{ 13.299 + return bvec_kmap_irq(bio_iovec_idx(bio, idx), flags); 13.300 +} 13.301 +#define __bio_kunmap_irq(buf, flags) bvec_kunmap_irq(buf, flags) 13.302 + 13.303 +#define bio_kmap_irq(bio, flags) \ 13.304 + __bio_kmap_irq((bio), (bio)->bi_idx, (flags)) 13.305 +#define bio_kunmap_irq(buf,flags) __bio_kunmap_irq(buf, flags) 13.306 + 13.307 +#endif /* __LINUX_BIO_H */
14.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 14.2 +++ b/linux-2.6.7-xen-sparse/include/linux/page-flags.h Thu Aug 19 18:51:53 2004 +0000 14.3 @@ -0,0 +1,343 @@ 14.4 +/* 14.5 + * Macros for manipulating and testing page->flags 14.6 + */ 14.7 + 14.8 +#ifndef PAGE_FLAGS_H 14.9 +#define PAGE_FLAGS_H 14.10 + 14.11 +#include <linux/percpu.h> 14.12 +#include <linux/cache.h> 14.13 +#include <asm/pgtable.h> 14.14 + 14.15 +/* 14.16 + * Various page->flags bits: 14.17 + * 14.18 + * PG_reserved is set for special pages, which can never be swapped out. Some 14.19 + * of them might not even exist (eg empty_bad_page)... 14.20 + * 14.21 + * The PG_private bitflag is set if page->private contains a valid value. 14.22 + * 14.23 + * During disk I/O, PG_locked is used. This bit is set before I/O and 14.24 + * reset when I/O completes. page_waitqueue(page) is a wait queue of all tasks 14.25 + * waiting for the I/O on this page to complete. 14.26 + * 14.27 + * PG_uptodate tells whether the page's contents is valid. When a read 14.28 + * completes, the page becomes uptodate, unless a disk I/O error happened. 14.29 + * 14.30 + * For choosing which pages to swap out, inode pages carry a PG_referenced bit, 14.31 + * which is set any time the system accesses that page through the (mapping, 14.32 + * index) hash table. This referenced bit, together with the referenced bit 14.33 + * in the page tables, is used to manipulate page->age and move the page across 14.34 + * the active, inactive_dirty and inactive_clean lists. 14.35 + * 14.36 + * Note that the referenced bit, the page->lru list_head and the active, 14.37 + * inactive_dirty and inactive_clean lists are protected by the 14.38 + * zone->lru_lock, and *NOT* by the usual PG_locked bit! 14.39 + * 14.40 + * PG_error is set to indicate that an I/O error occurred on this page. 14.41 + * 14.42 + * PG_arch_1 is an architecture specific page state bit. The generic code 14.43 + * guarantees that this bit is cleared for a page when it first is entered into 14.44 + * the page cache. 14.45 + * 14.46 + * PG_highmem pages are not permanently mapped into the kernel virtual address 14.47 + * space, they need to be kmapped separately for doing IO on the pages. The 14.48 + * struct page (these bits with information) are always mapped into kernel 14.49 + * address space... 14.50 + */ 14.51 + 14.52 +/* 14.53 + * Don't use the *_dontuse flags. Use the macros. Otherwise you'll break 14.54 + * locked- and dirty-page accounting. The top eight bits of page->flags are 14.55 + * used for page->zone, so putting flag bits there doesn't work. 14.56 + */ 14.57 +#define PG_locked 0 /* Page is locked. Don't touch. */ 14.58 +#define PG_error 1 14.59 +#define PG_referenced 2 14.60 +#define PG_uptodate 3 14.61 + 14.62 +#define PG_dirty 4 14.63 +#define PG_lru 5 14.64 +#define PG_active 6 14.65 +#define PG_slab 7 /* slab debug (Suparna wants this) */ 14.66 + 14.67 +#define PG_highmem 8 14.68 +#define PG_checked 9 /* kill me in 2.5.<early>. */ 14.69 +#define PG_arch_1 10 14.70 +#define PG_reserved 11 14.71 + 14.72 +#define PG_private 12 /* Has something at ->private */ 14.73 +#define PG_writeback 13 /* Page is under writeback */ 14.74 +#define PG_nosave 14 /* Used for system suspend/resume */ 14.75 +#define PG_maplock 15 /* Lock bit for rmap to ptes */ 14.76 + 14.77 +#define PG_swapcache 16 /* Swap page: swp_entry_t in private */ 14.78 +#define PG_mappedtodisk 17 /* Has blocks allocated on-disk */ 14.79 +#define PG_reclaim 18 /* To be reclaimed asap */ 14.80 +#define PG_compound 19 /* Part of a compound page */ 14.81 + 14.82 +#define PG_anon 20 /* Anonymous: anon_vma in mapping */ 14.83 +#define PG_foreign 21 /* Page belongs to foreign allocator */ 14.84 + 14.85 + 14.86 +/* 14.87 + * Global page accounting. One instance per CPU. Only unsigned longs are 14.88 + * allowed. 14.89 + */ 14.90 +struct page_state { 14.91 + unsigned long nr_dirty; /* Dirty writeable pages */ 14.92 + unsigned long nr_writeback; /* Pages under writeback */ 14.93 + unsigned long nr_unstable; /* NFS unstable pages */ 14.94 + unsigned long nr_page_table_pages;/* Pages used for pagetables */ 14.95 + unsigned long nr_mapped; /* mapped into pagetables */ 14.96 + unsigned long nr_slab; /* In slab */ 14.97 +#define GET_PAGE_STATE_LAST nr_slab 14.98 + 14.99 + /* 14.100 + * The below are zeroed by get_page_state(). Use get_full_page_state() 14.101 + * to add up all these. 14.102 + */ 14.103 + unsigned long pgpgin; /* Disk reads */ 14.104 + unsigned long pgpgout; /* Disk writes */ 14.105 + unsigned long pswpin; /* swap reads */ 14.106 + unsigned long pswpout; /* swap writes */ 14.107 + unsigned long pgalloc_high; /* page allocations */ 14.108 + 14.109 + unsigned long pgalloc_normal; 14.110 + unsigned long pgalloc_dma; 14.111 + unsigned long pgfree; /* page freeings */ 14.112 + unsigned long pgactivate; /* pages moved inactive->active */ 14.113 + unsigned long pgdeactivate; /* pages moved active->inactive */ 14.114 + 14.115 + unsigned long pgfault; /* faults (major+minor) */ 14.116 + unsigned long pgmajfault; /* faults (major only) */ 14.117 + unsigned long pgrefill_high; /* inspected in refill_inactive_zone */ 14.118 + unsigned long pgrefill_normal; 14.119 + unsigned long pgrefill_dma; 14.120 + 14.121 + unsigned long pgsteal_high; /* total highmem pages reclaimed */ 14.122 + unsigned long pgsteal_normal; 14.123 + unsigned long pgsteal_dma; 14.124 + unsigned long pgscan_kswapd_high;/* total highmem pages scanned */ 14.125 + unsigned long pgscan_kswapd_normal; 14.126 + 14.127 + unsigned long pgscan_kswapd_dma; 14.128 + unsigned long pgscan_direct_high;/* total highmem pages scanned */ 14.129 + unsigned long pgscan_direct_normal; 14.130 + unsigned long pgscan_direct_dma; 14.131 + unsigned long pginodesteal; /* pages reclaimed via inode freeing */ 14.132 + 14.133 + unsigned long slabs_scanned; /* slab objects scanned */ 14.134 + unsigned long kswapd_steal; /* pages reclaimed by kswapd */ 14.135 + unsigned long kswapd_inodesteal;/* reclaimed via kswapd inode freeing */ 14.136 + unsigned long pageoutrun; /* kswapd's calls to page reclaim */ 14.137 + unsigned long allocstall; /* direct reclaim calls */ 14.138 + 14.139 + unsigned long pgrotated; /* pages rotated to tail of the LRU */ 14.140 +}; 14.141 + 14.142 +DECLARE_PER_CPU(struct page_state, page_states); 14.143 + 14.144 +extern void get_page_state(struct page_state *ret); 14.145 +extern void get_full_page_state(struct page_state *ret); 14.146 +extern unsigned long __read_page_state(unsigned offset); 14.147 + 14.148 +#define read_page_state(member) \ 14.149 + __read_page_state(offsetof(struct page_state, member)) 14.150 + 14.151 +#define mod_page_state(member, delta) \ 14.152 + do { \ 14.153 + unsigned long flags; \ 14.154 + local_irq_save(flags); \ 14.155 + __get_cpu_var(page_states).member += (delta); \ 14.156 + local_irq_restore(flags); \ 14.157 + } while (0) 14.158 + 14.159 + 14.160 +#define inc_page_state(member) mod_page_state(member, 1UL) 14.161 +#define dec_page_state(member) mod_page_state(member, 0UL - 1) 14.162 +#define add_page_state(member,delta) mod_page_state(member, (delta)) 14.163 +#define sub_page_state(member,delta) mod_page_state(member, 0UL - (delta)) 14.164 + 14.165 +#define mod_page_state_zone(zone, member, delta) \ 14.166 + do { \ 14.167 + unsigned long flags; \ 14.168 + local_irq_save(flags); \ 14.169 + if (is_highmem(zone)) \ 14.170 + __get_cpu_var(page_states).member##_high += (delta);\ 14.171 + else if (is_normal(zone)) \ 14.172 + __get_cpu_var(page_states).member##_normal += (delta);\ 14.173 + else \ 14.174 + __get_cpu_var(page_states).member##_dma += (delta);\ 14.175 + local_irq_restore(flags); \ 14.176 + } while (0) 14.177 + 14.178 +/* 14.179 + * Manipulation of page state flags 14.180 + */ 14.181 +#define PageLocked(page) \ 14.182 + test_bit(PG_locked, &(page)->flags) 14.183 +#define SetPageLocked(page) \ 14.184 + set_bit(PG_locked, &(page)->flags) 14.185 +#define TestSetPageLocked(page) \ 14.186 + test_and_set_bit(PG_locked, &(page)->flags) 14.187 +#define ClearPageLocked(page) \ 14.188 + clear_bit(PG_locked, &(page)->flags) 14.189 +#define TestClearPageLocked(page) \ 14.190 + test_and_clear_bit(PG_locked, &(page)->flags) 14.191 + 14.192 +#define PageError(page) test_bit(PG_error, &(page)->flags) 14.193 +#define SetPageError(page) set_bit(PG_error, &(page)->flags) 14.194 +#define ClearPageError(page) clear_bit(PG_error, &(page)->flags) 14.195 + 14.196 +#define PageReferenced(page) test_bit(PG_referenced, &(page)->flags) 14.197 +#define SetPageReferenced(page) set_bit(PG_referenced, &(page)->flags) 14.198 +#define ClearPageReferenced(page) clear_bit(PG_referenced, &(page)->flags) 14.199 +#define TestClearPageReferenced(page) test_and_clear_bit(PG_referenced, &(page)->flags) 14.200 + 14.201 +#ifndef arch_set_page_uptodate 14.202 +#define arch_set_page_uptodate(page) do { } while (0) 14.203 +#endif 14.204 + 14.205 +#define PageUptodate(page) test_bit(PG_uptodate, &(page)->flags) 14.206 +#define SetPageUptodate(page) \ 14.207 + do { \ 14.208 + arch_set_page_uptodate(page); \ 14.209 + set_bit(PG_uptodate, &(page)->flags); \ 14.210 + } while (0) 14.211 +#define ClearPageUptodate(page) clear_bit(PG_uptodate, &(page)->flags) 14.212 + 14.213 +#define PageDirty(page) test_bit(PG_dirty, &(page)->flags) 14.214 +#define SetPageDirty(page) set_bit(PG_dirty, &(page)->flags) 14.215 +#define TestSetPageDirty(page) test_and_set_bit(PG_dirty, &(page)->flags) 14.216 +#define ClearPageDirty(page) clear_bit(PG_dirty, &(page)->flags) 14.217 +#define TestClearPageDirty(page) test_and_clear_bit(PG_dirty, &(page)->flags) 14.218 + 14.219 +#define SetPageLRU(page) set_bit(PG_lru, &(page)->flags) 14.220 +#define PageLRU(page) test_bit(PG_lru, &(page)->flags) 14.221 +#define TestSetPageLRU(page) test_and_set_bit(PG_lru, &(page)->flags) 14.222 +#define TestClearPageLRU(page) test_and_clear_bit(PG_lru, &(page)->flags) 14.223 + 14.224 +#define PageActive(page) test_bit(PG_active, &(page)->flags) 14.225 +#define SetPageActive(page) set_bit(PG_active, &(page)->flags) 14.226 +#define ClearPageActive(page) clear_bit(PG_active, &(page)->flags) 14.227 +#define TestClearPageActive(page) test_and_clear_bit(PG_active, &(page)->flags) 14.228 +#define TestSetPageActive(page) test_and_set_bit(PG_active, &(page)->flags) 14.229 + 14.230 +#define PageSlab(page) test_bit(PG_slab, &(page)->flags) 14.231 +#define SetPageSlab(page) set_bit(PG_slab, &(page)->flags) 14.232 +#define ClearPageSlab(page) clear_bit(PG_slab, &(page)->flags) 14.233 +#define TestClearPageSlab(page) test_and_clear_bit(PG_slab, &(page)->flags) 14.234 +#define TestSetPageSlab(page) test_and_set_bit(PG_slab, &(page)->flags) 14.235 + 14.236 +#ifdef CONFIG_HIGHMEM 14.237 +#define PageHighMem(page) test_bit(PG_highmem, &(page)->flags) 14.238 +#else 14.239 +#define PageHighMem(page) 0 /* needed to optimize away at compile time */ 14.240 +#endif 14.241 + 14.242 +#define PageChecked(page) test_bit(PG_checked, &(page)->flags) 14.243 +#define SetPageChecked(page) set_bit(PG_checked, &(page)->flags) 14.244 +#define ClearPageChecked(page) clear_bit(PG_checked, &(page)->flags) 14.245 + 14.246 +#define PageReserved(page) test_bit(PG_reserved, &(page)->flags) 14.247 +#define SetPageReserved(page) set_bit(PG_reserved, &(page)->flags) 14.248 +#define ClearPageReserved(page) clear_bit(PG_reserved, &(page)->flags) 14.249 + 14.250 +#define SetPagePrivate(page) set_bit(PG_private, &(page)->flags) 14.251 +#define ClearPagePrivate(page) clear_bit(PG_private, &(page)->flags) 14.252 +#define PagePrivate(page) test_bit(PG_private, &(page)->flags) 14.253 + 14.254 +#define PageWriteback(page) test_bit(PG_writeback, &(page)->flags) 14.255 +#define SetPageWriteback(page) \ 14.256 + do { \ 14.257 + if (!test_and_set_bit(PG_writeback, \ 14.258 + &(page)->flags)) \ 14.259 + inc_page_state(nr_writeback); \ 14.260 + } while (0) 14.261 +#define TestSetPageWriteback(page) \ 14.262 + ({ \ 14.263 + int ret; \ 14.264 + ret = test_and_set_bit(PG_writeback, \ 14.265 + &(page)->flags); \ 14.266 + if (!ret) \ 14.267 + inc_page_state(nr_writeback); \ 14.268 + ret; \ 14.269 + }) 14.270 +#define ClearPageWriteback(page) \ 14.271 + do { \ 14.272 + if (test_and_clear_bit(PG_writeback, \ 14.273 + &(page)->flags)) \ 14.274 + dec_page_state(nr_writeback); \ 14.275 + } while (0) 14.276 +#define TestClearPageWriteback(page) \ 14.277 + ({ \ 14.278 + int ret; \ 14.279 + ret = test_and_clear_bit(PG_writeback, \ 14.280 + &(page)->flags); \ 14.281 + if (ret) \ 14.282 + dec_page_state(nr_writeback); \ 14.283 + ret; \ 14.284 + }) 14.285 + 14.286 +#define PageNosave(page) test_bit(PG_nosave, &(page)->flags) 14.287 +#define SetPageNosave(page) set_bit(PG_nosave, &(page)->flags) 14.288 +#define TestSetPageNosave(page) test_and_set_bit(PG_nosave, &(page)->flags) 14.289 +#define ClearPageNosave(page) clear_bit(PG_nosave, &(page)->flags) 14.290 +#define TestClearPageNosave(page) test_and_clear_bit(PG_nosave, &(page)->flags) 14.291 + 14.292 +#define PageMappedToDisk(page) test_bit(PG_mappedtodisk, &(page)->flags) 14.293 +#define SetPageMappedToDisk(page) set_bit(PG_mappedtodisk, &(page)->flags) 14.294 +#define ClearPageMappedToDisk(page) clear_bit(PG_mappedtodisk, &(page)->flags) 14.295 + 14.296 +#define PageReclaim(page) test_bit(PG_reclaim, &(page)->flags) 14.297 +#define SetPageReclaim(page) set_bit(PG_reclaim, &(page)->flags) 14.298 +#define ClearPageReclaim(page) clear_bit(PG_reclaim, &(page)->flags) 14.299 +#define TestClearPageReclaim(page) test_and_clear_bit(PG_reclaim, &(page)->flags) 14.300 + 14.301 +#define PageCompound(page) test_bit(PG_compound, &(page)->flags) 14.302 +#define SetPageCompound(page) set_bit(PG_compound, &(page)->flags) 14.303 +#define ClearPageCompound(page) clear_bit(PG_compound, &(page)->flags) 14.304 + 14.305 +#define PageAnon(page) test_bit(PG_anon, &(page)->flags) 14.306 +#define SetPageAnon(page) set_bit(PG_anon, &(page)->flags) 14.307 +#define ClearPageAnon(page) clear_bit(PG_anon, &(page)->flags) 14.308 + 14.309 +/* A foreign page uses a custom destructor rather than the buddy allocator. */ 14.310 +#ifdef CONFIG_FOREIGN_PAGES 14.311 +#define PageForeign(page) test_bit(PG_foreign, &(page)->flags) 14.312 +#define SetPageForeign(page) set_bit(PG_foreign, &(page)->flags) 14.313 +#define ClearPageForeign(page) clear_bit(PG_foreign, &(page)->flags) 14.314 +#define PageForeignDestructor(page) \ 14.315 + ( (void (*) (struct page *)) (page)->mapping ) 14.316 +#else 14.317 +#define PageForeign(page) 0 14.318 +#define PageForeignDestructor(page) void 14.319 +#endif 14.320 + 14.321 +#ifdef CONFIG_SWAP 14.322 +#define PageSwapCache(page) test_bit(PG_swapcache, &(page)->flags) 14.323 +#define SetPageSwapCache(page) set_bit(PG_swapcache, &(page)->flags) 14.324 +#define ClearPageSwapCache(page) clear_bit(PG_swapcache, &(page)->flags) 14.325 +#else 14.326 +#define PageSwapCache(page) 0 14.327 +#endif 14.328 + 14.329 +struct page; /* forward declaration */ 14.330 + 14.331 +int test_clear_page_dirty(struct page *page); 14.332 +int __clear_page_dirty(struct page *page); 14.333 +int test_clear_page_writeback(struct page *page); 14.334 +int test_set_page_writeback(struct page *page); 14.335 + 14.336 +static inline void clear_page_dirty(struct page *page) 14.337 +{ 14.338 + test_clear_page_dirty(page); 14.339 +} 14.340 + 14.341 +static inline void set_page_writeback(struct page *page) 14.342 +{ 14.343 + test_set_page_writeback(page); 14.344 +} 14.345 + 14.346 +#endif /* PAGE_FLAGS_H */
15.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 15.2 +++ b/linux-2.6.7-xen-sparse/include/linux/skbuff.h Thu Aug 19 18:51:53 2004 +0000 15.3 @@ -0,0 +1,1073 @@ 15.4 +/* 15.5 + * Definitions for the 'struct sk_buff' memory handlers. 15.6 + * 15.7 + * Authors: 15.8 + * Alan Cox, <gw4pts@gw4pts.ampr.org> 15.9 + * Florian La Roche, <rzsfl@rz.uni-sb.de> 15.10 + * 15.11 + * This program is free software; you can redistribute it and/or 15.12 + * modify it under the terms of the GNU General Public License 15.13 + * as published by the Free Software Foundation; either version 15.14 + * 2 of the License, or (at your option) any later version. 15.15 + */ 15.16 + 15.17 +#ifndef _LINUX_SKBUFF_H 15.18 +#define _LINUX_SKBUFF_H 15.19 + 15.20 +#include <linux/config.h> 15.21 +#include <linux/kernel.h> 15.22 +#include <linux/compiler.h> 15.23 +#include <linux/time.h> 15.24 +#include <linux/cache.h> 15.25 + 15.26 +#include <asm/atomic.h> 15.27 +#include <asm/types.h> 15.28 +#include <linux/spinlock.h> 15.29 +#include <linux/mm.h> 15.30 +#include <linux/highmem.h> 15.31 +#include <linux/poll.h> 15.32 +#include <linux/net.h> 15.33 + 15.34 +#define HAVE_ALLOC_SKB /* For the drivers to know */ 15.35 +#define HAVE_ALIGNABLE_SKB /* Ditto 8) */ 15.36 +#define SLAB_SKB /* Slabified skbuffs */ 15.37 + 15.38 +#define CHECKSUM_NONE 0 15.39 +#define CHECKSUM_HW 1 15.40 +#define CHECKSUM_UNNECESSARY 2 15.41 + 15.42 +#define SKB_DATA_ALIGN(X) (((X) + (SMP_CACHE_BYTES - 1)) & \ 15.43 + ~(SMP_CACHE_BYTES - 1)) 15.44 +#define SKB_MAX_ORDER(X, ORDER) (((PAGE_SIZE << (ORDER)) - (X) - \ 15.45 + sizeof(struct skb_shared_info)) & \ 15.46 + ~(SMP_CACHE_BYTES - 1)) 15.47 +#define SKB_MAX_HEAD(X) (SKB_MAX_ORDER((X), 0)) 15.48 +#define SKB_MAX_ALLOC (SKB_MAX_ORDER(0, 2)) 15.49 + 15.50 +/* A. Checksumming of received packets by device. 15.51 + * 15.52 + * NONE: device failed to checksum this packet. 15.53 + * skb->csum is undefined. 15.54 + * 15.55 + * UNNECESSARY: device parsed packet and wouldbe verified checksum. 15.56 + * skb->csum is undefined. 15.57 + * It is bad option, but, unfortunately, many of vendors do this. 15.58 + * Apparently with secret goal to sell you new device, when you 15.59 + * will add new protocol to your host. F.e. IPv6. 8) 15.60 + * 15.61 + * HW: the most generic way. Device supplied checksum of _all_ 15.62 + * the packet as seen by netif_rx in skb->csum. 15.63 + * NOTE: Even if device supports only some protocols, but 15.64 + * is able to produce some skb->csum, it MUST use HW, 15.65 + * not UNNECESSARY. 15.66 + * 15.67 + * B. Checksumming on output. 15.68 + * 15.69 + * NONE: skb is checksummed by protocol or csum is not required. 15.70 + * 15.71 + * HW: device is required to csum packet as seen by hard_start_xmit 15.72 + * from skb->h.raw to the end and to record the checksum 15.73 + * at skb->h.raw+skb->csum. 15.74 + * 15.75 + * Device must show its capabilities in dev->features, set 15.76 + * at device setup time. 15.77 + * NETIF_F_HW_CSUM - it is clever device, it is able to checksum 15.78 + * everything. 15.79 + * NETIF_F_NO_CSUM - loopback or reliable single hop media. 15.80 + * NETIF_F_IP_CSUM - device is dumb. It is able to csum only 15.81 + * TCP/UDP over IPv4. Sigh. Vendors like this 15.82 + * way by an unknown reason. Though, see comment above 15.83 + * about CHECKSUM_UNNECESSARY. 8) 15.84 + * 15.85 + * Any questions? No questions, good. --ANK 15.86 + */ 15.87 + 15.88 +#ifdef __i386__ 15.89 +#define NET_CALLER(arg) (*(((void **)&arg) - 1)) 15.90 +#else 15.91 +#define NET_CALLER(arg) __builtin_return_address(0) 15.92 +#endif 15.93 + 15.94 +#ifdef CONFIG_NETFILTER 15.95 +struct nf_conntrack { 15.96 + atomic_t use; 15.97 + void (*destroy)(struct nf_conntrack *); 15.98 +}; 15.99 + 15.100 +struct nf_ct_info { 15.101 + struct nf_conntrack *master; 15.102 +}; 15.103 + 15.104 +#ifdef CONFIG_BRIDGE_NETFILTER 15.105 +struct nf_bridge_info { 15.106 + atomic_t use; 15.107 + struct net_device *physindev; 15.108 + struct net_device *physoutdev; 15.109 +#if defined(CONFIG_VLAN_8021Q) || defined(CONFIG_VLAN_8021Q_MODULE) 15.110 + struct net_device *netoutdev; 15.111 +#endif 15.112 + unsigned int mask; 15.113 + unsigned long data[32 / sizeof(unsigned long)]; 15.114 +}; 15.115 +#endif 15.116 + 15.117 +#endif 15.118 + 15.119 +struct sk_buff_head { 15.120 + /* These two members must be first. */ 15.121 + struct sk_buff *next; 15.122 + struct sk_buff *prev; 15.123 + 15.124 + __u32 qlen; 15.125 + spinlock_t lock; 15.126 +}; 15.127 + 15.128 +struct sk_buff; 15.129 + 15.130 +/* To allow 64K frame to be packed as single skb without frag_list */ 15.131 +#define MAX_SKB_FRAGS (65536/PAGE_SIZE + 2) 15.132 + 15.133 +typedef struct skb_frag_struct skb_frag_t; 15.134 + 15.135 +struct skb_frag_struct { 15.136 + struct page *page; 15.137 + __u16 page_offset; 15.138 + __u16 size; 15.139 +}; 15.140 + 15.141 +/* This data is invariant across clones and lives at 15.142 + * the end of the header data, ie. at skb->end. 15.143 + */ 15.144 +struct skb_shared_info { 15.145 + atomic_t dataref; 15.146 + unsigned int nr_frags; 15.147 + unsigned short tso_size; 15.148 + unsigned short tso_segs; 15.149 + struct sk_buff *frag_list; 15.150 + skb_frag_t frags[MAX_SKB_FRAGS]; 15.151 +}; 15.152 + 15.153 +/** 15.154 + * struct sk_buff - socket buffer 15.155 + * @next: Next buffer in list 15.156 + * @prev: Previous buffer in list 15.157 + * @list: List we are on 15.158 + * @sk: Socket we are owned by 15.159 + * @stamp: Time we arrived 15.160 + * @dev: Device we arrived on/are leaving by 15.161 + * @real_dev: The real device we are using 15.162 + * @h: Transport layer header 15.163 + * @nh: Network layer header 15.164 + * @mac: Link layer header 15.165 + * @dst: FIXME: Describe this field 15.166 + * @cb: Control buffer. Free for use by every layer. Put private vars here 15.167 + * @len: Length of actual data 15.168 + * @data_len: Data length 15.169 + * @mac_len: Length of link layer header 15.170 + * @csum: Checksum 15.171 + * @__unused: Dead field, may be reused 15.172 + * @cloned: Head may be cloned (check refcnt to be sure) 15.173 + * @pkt_type: Packet class 15.174 + * @ip_summed: Driver fed us an IP checksum 15.175 + * @priority: Packet queueing priority 15.176 + * @users: User count - see {datagram,tcp}.c 15.177 + * @protocol: Packet protocol from driver 15.178 + * @security: Security level of packet 15.179 + * @truesize: Buffer size 15.180 + * @head: Head of buffer 15.181 + * @data: Data head pointer 15.182 + * @tail: Tail pointer 15.183 + * @end: End pointer 15.184 + * @destructor: Destruct function 15.185 + * @nfmark: Can be used for communication between hooks 15.186 + * @nfcache: Cache info 15.187 + * @nfct: Associated connection, if any 15.188 + * @nf_debug: Netfilter debugging 15.189 + * @nf_bridge: Saved data about a bridged frame - see br_netfilter.c 15.190 + * @private: Data which is private to the HIPPI implementation 15.191 + * @tc_index: Traffic control index 15.192 + */ 15.193 + 15.194 +struct sk_buff { 15.195 + /* These two members must be first. */ 15.196 + struct sk_buff *next; 15.197 + struct sk_buff *prev; 15.198 + 15.199 + struct sk_buff_head *list; 15.200 + struct sock *sk; 15.201 + struct timeval stamp; 15.202 + struct net_device *dev; 15.203 + struct net_device *real_dev; 15.204 + 15.205 + union { 15.206 + struct tcphdr *th; 15.207 + struct udphdr *uh; 15.208 + struct icmphdr *icmph; 15.209 + struct igmphdr *igmph; 15.210 + struct iphdr *ipiph; 15.211 + struct ipv6hdr *ipv6h; 15.212 + unsigned char *raw; 15.213 + } h; 15.214 + 15.215 + union { 15.216 + struct iphdr *iph; 15.217 + struct ipv6hdr *ipv6h; 15.218 + struct arphdr *arph; 15.219 + unsigned char *raw; 15.220 + } nh; 15.221 + 15.222 + union { 15.223 + struct ethhdr *ethernet; 15.224 + unsigned char *raw; 15.225 + } mac; 15.226 + 15.227 + struct dst_entry *dst; 15.228 + struct sec_path *sp; 15.229 + 15.230 + /* 15.231 + * This is the control buffer. It is free to use for every 15.232 + * layer. Please put your private variables there. If you 15.233 + * want to keep them across layers you have to do a skb_clone() 15.234 + * first. This is owned by whoever has the skb queued ATM. 15.235 + */ 15.236 + char cb[48]; 15.237 + 15.238 + unsigned int len, 15.239 + data_len, 15.240 + mac_len, 15.241 + csum; 15.242 + unsigned char local_df, 15.243 + cloned, 15.244 + pkt_type, 15.245 + ip_summed; 15.246 + __u32 priority; 15.247 + unsigned short protocol, 15.248 + security; 15.249 + 15.250 + void (*destructor)(struct sk_buff *skb); 15.251 +#ifdef CONFIG_NETFILTER 15.252 + unsigned long nfmark; 15.253 + __u32 nfcache; 15.254 + struct nf_ct_info *nfct; 15.255 +#ifdef CONFIG_NETFILTER_DEBUG 15.256 + unsigned int nf_debug; 15.257 +#endif 15.258 +#ifdef CONFIG_BRIDGE_NETFILTER 15.259 + struct nf_bridge_info *nf_bridge; 15.260 +#endif 15.261 +#endif /* CONFIG_NETFILTER */ 15.262 +#if defined(CONFIG_HIPPI) 15.263 + union { 15.264 + __u32 ifield; 15.265 + } private; 15.266 +#endif 15.267 +#ifdef CONFIG_NET_SCHED 15.268 + __u32 tc_index; /* traffic control index */ 15.269 +#endif 15.270 + 15.271 + /* These elements must be at the end, see alloc_skb() for details. */ 15.272 + unsigned int truesize; 15.273 + atomic_t users; 15.274 + unsigned char *head, 15.275 + *data, 15.276 + *tail, 15.277 + *end; 15.278 +}; 15.279 + 15.280 +#ifdef __KERNEL__ 15.281 +/* 15.282 + * Handling routines are only of interest to the kernel 15.283 + */ 15.284 +#include <linux/slab.h> 15.285 + 15.286 +#include <asm/system.h> 15.287 + 15.288 +extern void __kfree_skb(struct sk_buff *skb); 15.289 +extern struct sk_buff *alloc_skb(unsigned int size, int priority); 15.290 +extern void kfree_skbmem(struct sk_buff *skb); 15.291 +extern struct sk_buff *skb_clone(struct sk_buff *skb, int priority); 15.292 +extern struct sk_buff *skb_copy(const struct sk_buff *skb, int priority); 15.293 +extern struct sk_buff *pskb_copy(struct sk_buff *skb, int gfp_mask); 15.294 +extern int pskb_expand_head(struct sk_buff *skb, 15.295 + int nhead, int ntail, int gfp_mask); 15.296 +extern struct sk_buff *skb_realloc_headroom(struct sk_buff *skb, 15.297 + unsigned int headroom); 15.298 +extern struct sk_buff *skb_copy_expand(const struct sk_buff *skb, 15.299 + int newheadroom, int newtailroom, 15.300 + int priority); 15.301 +extern struct sk_buff * skb_pad(struct sk_buff *skb, int pad); 15.302 +#define dev_kfree_skb(a) kfree_skb(a) 15.303 +extern void skb_over_panic(struct sk_buff *skb, int len, 15.304 + void *here); 15.305 +extern void skb_under_panic(struct sk_buff *skb, int len, 15.306 + void *here); 15.307 + 15.308 +/* Internal */ 15.309 +#define skb_shinfo(SKB) ((struct skb_shared_info *)((SKB)->end)) 15.310 + 15.311 +/** 15.312 + * skb_queue_empty - check if a queue is empty 15.313 + * @list: queue head 15.314 + * 15.315 + * Returns true if the queue is empty, false otherwise. 15.316 + */ 15.317 +static inline int skb_queue_empty(const struct sk_buff_head *list) 15.318 +{ 15.319 + return list->next == (struct sk_buff *)list; 15.320 +} 15.321 + 15.322 +/** 15.323 + * skb_get - reference buffer 15.324 + * @skb: buffer to reference 15.325 + * 15.326 + * Makes another reference to a socket buffer and returns a pointer 15.327 + * to the buffer. 15.328 + */ 15.329 +static inline struct sk_buff *skb_get(struct sk_buff *skb) 15.330 +{ 15.331 + atomic_inc(&skb->users); 15.332 + return skb; 15.333 +} 15.334 + 15.335 +/* 15.336 + * If users == 1, we are the only owner and are can avoid redundant 15.337 + * atomic change. 15.338 + */ 15.339 + 15.340 +/** 15.341 + * kfree_skb - free an sk_buff 15.342 + * @skb: buffer to free 15.343 + * 15.344 + * Drop a reference to the buffer and free it if the usage count has 15.345 + * hit zero. 15.346 + */ 15.347 +static inline void kfree_skb(struct sk_buff *skb) 15.348 +{ 15.349 + if (atomic_read(&skb->users) == 1 || atomic_dec_and_test(&skb->users)) 15.350 + __kfree_skb(skb); 15.351 +} 15.352 + 15.353 +/* Use this if you didn't touch the skb state [for fast switching] */ 15.354 +static inline void kfree_skb_fast(struct sk_buff *skb) 15.355 +{ 15.356 + if (atomic_read(&skb->users) == 1 || atomic_dec_and_test(&skb->users)) 15.357 + kfree_skbmem(skb); 15.358 +} 15.359 + 15.360 +/** 15.361 + * skb_cloned - is the buffer a clone 15.362 + * @skb: buffer to check 15.363 + * 15.364 + * Returns true if the buffer was generated with skb_clone() and is 15.365 + * one of multiple shared copies of the buffer. Cloned buffers are 15.366 + * shared data so must not be written to under normal circumstances. 15.367 + */ 15.368 +static inline int skb_cloned(const struct sk_buff *skb) 15.369 +{ 15.370 + return skb->cloned && atomic_read(&skb_shinfo(skb)->dataref) != 1; 15.371 +} 15.372 + 15.373 +/** 15.374 + * skb_shared - is the buffer shared 15.375 + * @skb: buffer to check 15.376 + * 15.377 + * Returns true if more than one person has a reference to this 15.378 + * buffer. 15.379 + */ 15.380 +static inline int skb_shared(const struct sk_buff *skb) 15.381 +{ 15.382 + return atomic_read(&skb->users) != 1; 15.383 +} 15.384 + 15.385 +/** 15.386 + * skb_share_check - check if buffer is shared and if so clone it 15.387 + * @skb: buffer to check 15.388 + * @pri: priority for memory allocation 15.389 + * 15.390 + * If the buffer is shared the buffer is cloned and the old copy 15.391 + * drops a reference. A new clone with a single reference is returned. 15.392 + * If the buffer is not shared the original buffer is returned. When 15.393 + * being called from interrupt status or with spinlocks held pri must 15.394 + * be GFP_ATOMIC. 15.395 + * 15.396 + * NULL is returned on a memory allocation failure. 15.397 + */ 15.398 +static inline struct sk_buff *skb_share_check(struct sk_buff *skb, int pri) 15.399 +{ 15.400 + might_sleep_if(pri & __GFP_WAIT); 15.401 + if (skb_shared(skb)) { 15.402 + struct sk_buff *nskb = skb_clone(skb, pri); 15.403 + kfree_skb(skb); 15.404 + skb = nskb; 15.405 + } 15.406 + return skb; 15.407 +} 15.408 + 15.409 +/* 15.410 + * Copy shared buffers into a new sk_buff. We effectively do COW on 15.411 + * packets to handle cases where we have a local reader and forward 15.412 + * and a couple of other messy ones. The normal one is tcpdumping 15.413 + * a packet thats being forwarded. 15.414 + */ 15.415 + 15.416 +/** 15.417 + * skb_unshare - make a copy of a shared buffer 15.418 + * @skb: buffer to check 15.419 + * @pri: priority for memory allocation 15.420 + * 15.421 + * If the socket buffer is a clone then this function creates a new 15.422 + * copy of the data, drops a reference count on the old copy and returns 15.423 + * the new copy with the reference count at 1. If the buffer is not a clone 15.424 + * the original buffer is returned. When called with a spinlock held or 15.425 + * from interrupt state @pri must be %GFP_ATOMIC 15.426 + * 15.427 + * %NULL is returned on a memory allocation failure. 15.428 + */ 15.429 +static inline struct sk_buff *skb_unshare(struct sk_buff *skb, int pri) 15.430 +{ 15.431 + might_sleep_if(pri & __GFP_WAIT); 15.432 + if (skb_cloned(skb)) { 15.433 + struct sk_buff *nskb = skb_copy(skb, pri); 15.434 + kfree_skb(skb); /* Free our shared copy */ 15.435 + skb = nskb; 15.436 + } 15.437 + return skb; 15.438 +} 15.439 + 15.440 +/** 15.441 + * skb_peek 15.442 + * @list_: list to peek at 15.443 + * 15.444 + * Peek an &sk_buff. Unlike most other operations you _MUST_ 15.445 + * be careful with this one. A peek leaves the buffer on the 15.446 + * list and someone else may run off with it. You must hold 15.447 + * the appropriate locks or have a private queue to do this. 15.448 + * 15.449 + * Returns %NULL for an empty list or a pointer to the head element. 15.450 + * The reference count is not incremented and the reference is therefore 15.451 + * volatile. Use with caution. 15.452 + */ 15.453 +static inline struct sk_buff *skb_peek(struct sk_buff_head *list_) 15.454 +{ 15.455 + struct sk_buff *list = ((struct sk_buff *)list_)->next; 15.456 + if (list == (struct sk_buff *)list_) 15.457 + list = NULL; 15.458 + return list; 15.459 +} 15.460 + 15.461 +/** 15.462 + * skb_peek_tail 15.463 + * @list_: list to peek at 15.464 + * 15.465 + * Peek an &sk_buff. Unlike most other operations you _MUST_ 15.466 + * be careful with this one. A peek leaves the buffer on the 15.467 + * list and someone else may run off with it. You must hold 15.468 + * the appropriate locks or have a private queue to do this. 15.469 + * 15.470 + * Returns %NULL for an empty list or a pointer to the tail element. 15.471 + * The reference count is not incremented and the reference is therefore 15.472 + * volatile. Use with caution. 15.473 + */ 15.474 +static inline struct sk_buff *skb_peek_tail(struct sk_buff_head *list_) 15.475 +{ 15.476 + struct sk_buff *list = ((struct sk_buff *)list_)->prev; 15.477 + if (list == (struct sk_buff *)list_) 15.478 + list = NULL; 15.479 + return list; 15.480 +} 15.481 + 15.482 +/** 15.483 + * skb_queue_len - get queue length 15.484 + * @list_: list to measure 15.485 + * 15.486 + * Return the length of an &sk_buff queue. 15.487 + */ 15.488 +static inline __u32 skb_queue_len(const struct sk_buff_head *list_) 15.489 +{ 15.490 + return list_->qlen; 15.491 +} 15.492 + 15.493 +static inline void skb_queue_head_init(struct sk_buff_head *list) 15.494 +{ 15.495 + spin_lock_init(&list->lock); 15.496 + list->prev = list->next = (struct sk_buff *)list; 15.497 + list->qlen = 0; 15.498 +} 15.499 + 15.500 +/* 15.501 + * Insert an sk_buff at the start of a list. 15.502 + * 15.503 + * The "__skb_xxxx()" functions are the non-atomic ones that 15.504 + * can only be called with interrupts disabled. 15.505 + */ 15.506 + 15.507 +/** 15.508 + * __skb_queue_head - queue a buffer at the list head 15.509 + * @list: list to use 15.510 + * @newsk: buffer to queue 15.511 + * 15.512 + * Queue a buffer at the start of a list. This function takes no locks 15.513 + * and you must therefore hold required locks before calling it. 15.514 + * 15.515 + * A buffer cannot be placed on two lists at the same time. 15.516 + */ 15.517 +extern void skb_queue_head(struct sk_buff_head *list, struct sk_buff *newsk); 15.518 +static inline void __skb_queue_head(struct sk_buff_head *list, 15.519 + struct sk_buff *newsk) 15.520 +{ 15.521 + struct sk_buff *prev, *next; 15.522 + 15.523 + newsk->list = list; 15.524 + list->qlen++; 15.525 + prev = (struct sk_buff *)list; 15.526 + next = prev->next; 15.527 + newsk->next = next; 15.528 + newsk->prev = prev; 15.529 + next->prev = prev->next = newsk; 15.530 +} 15.531 + 15.532 +/** 15.533 + * __skb_queue_tail - queue a buffer at the list tail 15.534 + * @list: list to use 15.535 + * @newsk: buffer to queue 15.536 + * 15.537 + * Queue a buffer at the end of a list. This function takes no locks 15.538 + * and you must therefore hold required locks before calling it. 15.539 + * 15.540 + * A buffer cannot be placed on two lists at the same time. 15.541 + */ 15.542 +extern void skb_queue_tail(struct sk_buff_head *list, struct sk_buff *newsk); 15.543 +static inline void __skb_queue_tail(struct sk_buff_head *list, 15.544 + struct sk_buff *newsk) 15.545 +{ 15.546 + struct sk_buff *prev, *next; 15.547 + 15.548 + newsk->list = list; 15.549 + list->qlen++; 15.550 + next = (struct sk_buff *)list; 15.551 + prev = next->prev; 15.552 + newsk->next = next; 15.553 + newsk->prev = prev; 15.554 + next->prev = prev->next = newsk; 15.555 +} 15.556 + 15.557 + 15.558 +/** 15.559 + * __skb_dequeue - remove from the head of the queue 15.560 + * @list: list to dequeue from 15.561 + * 15.562 + * Remove the head of the list. This function does not take any locks 15.563 + * so must be used with appropriate locks held only. The head item is 15.564 + * returned or %NULL if the list is empty. 15.565 + */ 15.566 +extern struct sk_buff *skb_dequeue(struct sk_buff_head *list); 15.567 +static inline struct sk_buff *__skb_dequeue(struct sk_buff_head *list) 15.568 +{ 15.569 + struct sk_buff *next, *prev, *result; 15.570 + 15.571 + prev = (struct sk_buff *) list; 15.572 + next = prev->next; 15.573 + result = NULL; 15.574 + if (next != prev) { 15.575 + result = next; 15.576 + next = next->next; 15.577 + list->qlen--; 15.578 + next->prev = prev; 15.579 + prev->next = next; 15.580 + result->next = result->prev = NULL; 15.581 + result->list = NULL; 15.582 + } 15.583 + return result; 15.584 +} 15.585 + 15.586 + 15.587 +/* 15.588 + * Insert a packet on a list. 15.589 + */ 15.590 +extern void skb_insert(struct sk_buff *old, struct sk_buff *newsk); 15.591 +static inline void __skb_insert(struct sk_buff *newsk, 15.592 + struct sk_buff *prev, struct sk_buff *next, 15.593 + struct sk_buff_head *list) 15.594 +{ 15.595 + newsk->next = next; 15.596 + newsk->prev = prev; 15.597 + next->prev = prev->next = newsk; 15.598 + newsk->list = list; 15.599 + list->qlen++; 15.600 +} 15.601 + 15.602 +/* 15.603 + * Place a packet after a given packet in a list. 15.604 + */ 15.605 +extern void skb_append(struct sk_buff *old, struct sk_buff *newsk); 15.606 +static inline void __skb_append(struct sk_buff *old, struct sk_buff *newsk) 15.607 +{ 15.608 + __skb_insert(newsk, old, old->next, old->list); 15.609 +} 15.610 + 15.611 +/* 15.612 + * remove sk_buff from list. _Must_ be called atomically, and with 15.613 + * the list known.. 15.614 + */ 15.615 +extern void skb_unlink(struct sk_buff *skb); 15.616 +static inline void __skb_unlink(struct sk_buff *skb, struct sk_buff_head *list) 15.617 +{ 15.618 + struct sk_buff *next, *prev; 15.619 + 15.620 + list->qlen--; 15.621 + next = skb->next; 15.622 + prev = skb->prev; 15.623 + skb->next = skb->prev = NULL; 15.624 + skb->list = NULL; 15.625 + next->prev = prev; 15.626 + prev->next = next; 15.627 +} 15.628 + 15.629 + 15.630 +/* XXX: more streamlined implementation */ 15.631 + 15.632 +/** 15.633 + * __skb_dequeue_tail - remove from the tail of the queue 15.634 + * @list: list to dequeue from 15.635 + * 15.636 + * Remove the tail of the list. This function does not take any locks 15.637 + * so must be used with appropriate locks held only. The tail item is 15.638 + * returned or %NULL if the list is empty. 15.639 + */ 15.640 +extern struct sk_buff *skb_dequeue_tail(struct sk_buff_head *list); 15.641 +static inline struct sk_buff *__skb_dequeue_tail(struct sk_buff_head *list) 15.642 +{ 15.643 + struct sk_buff *skb = skb_peek_tail(list); 15.644 + if (skb) 15.645 + __skb_unlink(skb, list); 15.646 + return skb; 15.647 +} 15.648 + 15.649 + 15.650 +static inline int skb_is_nonlinear(const struct sk_buff *skb) 15.651 +{ 15.652 + return skb->data_len; 15.653 +} 15.654 + 15.655 +static inline unsigned int skb_headlen(const struct sk_buff *skb) 15.656 +{ 15.657 + return skb->len - skb->data_len; 15.658 +} 15.659 + 15.660 +static inline int skb_pagelen(const struct sk_buff *skb) 15.661 +{ 15.662 + int i, len = 0; 15.663 + 15.664 + for (i = (int)skb_shinfo(skb)->nr_frags - 1; i >= 0; i--) 15.665 + len += skb_shinfo(skb)->frags[i].size; 15.666 + return len + skb_headlen(skb); 15.667 +} 15.668 + 15.669 +static inline void skb_fill_page_desc(struct sk_buff *skb, int i, struct page *page, int off, int size) 15.670 +{ 15.671 + skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; 15.672 + frag->page = page; 15.673 + frag->page_offset = off; 15.674 + frag->size = size; 15.675 + skb_shinfo(skb)->nr_frags = i+1; 15.676 +} 15.677 + 15.678 +#define SKB_PAGE_ASSERT(skb) BUG_ON(skb_shinfo(skb)->nr_frags) 15.679 +#define SKB_FRAG_ASSERT(skb) BUG_ON(skb_shinfo(skb)->frag_list) 15.680 +#define SKB_LINEAR_ASSERT(skb) BUG_ON(skb_is_nonlinear(skb)) 15.681 + 15.682 +/* 15.683 + * Add data to an sk_buff 15.684 + */ 15.685 +static inline unsigned char *__skb_put(struct sk_buff *skb, unsigned int len) 15.686 +{ 15.687 + unsigned char *tmp = skb->tail; 15.688 + SKB_LINEAR_ASSERT(skb); 15.689 + skb->tail += len; 15.690 + skb->len += len; 15.691 + return tmp; 15.692 +} 15.693 + 15.694 +/** 15.695 + * skb_put - add data to a buffer 15.696 + * @skb: buffer to use 15.697 + * @len: amount of data to add 15.698 + * 15.699 + * This function extends the used data area of the buffer. If this would 15.700 + * exceed the total buffer size the kernel will panic. A pointer to the 15.701 + * first byte of the extra data is returned. 15.702 + */ 15.703 +static inline unsigned char *skb_put(struct sk_buff *skb, unsigned int len) 15.704 +{ 15.705 + unsigned char *tmp = skb->tail; 15.706 + SKB_LINEAR_ASSERT(skb); 15.707 + skb->tail += len; 15.708 + skb->len += len; 15.709 + if (unlikely(skb->tail>skb->end)) 15.710 + skb_over_panic(skb, len, current_text_addr()); 15.711 + return tmp; 15.712 +} 15.713 + 15.714 +static inline unsigned char *__skb_push(struct sk_buff *skb, unsigned int len) 15.715 +{ 15.716 + skb->data -= len; 15.717 + skb->len += len; 15.718 + return skb->data; 15.719 +} 15.720 + 15.721 +/** 15.722 + * skb_push - add data to the start of a buffer 15.723 + * @skb: buffer to use 15.724 + * @len: amount of data to add 15.725 + * 15.726 + * This function extends the used data area of the buffer at the buffer 15.727 + * start. If this would exceed the total buffer headroom the kernel will 15.728 + * panic. A pointer to the first byte of the extra data is returned. 15.729 + */ 15.730 +static inline unsigned char *skb_push(struct sk_buff *skb, unsigned int len) 15.731 +{ 15.732 + skb->data -= len; 15.733 + skb->len += len; 15.734 + if (unlikely(skb->data<skb->head)) 15.735 + skb_under_panic(skb, len, current_text_addr()); 15.736 + return skb->data; 15.737 +} 15.738 + 15.739 +static inline unsigned char *__skb_pull(struct sk_buff *skb, unsigned int len) 15.740 +{ 15.741 + skb->len -= len; 15.742 + BUG_ON(skb->len < skb->data_len); 15.743 + return skb->data += len; 15.744 +} 15.745 + 15.746 +/** 15.747 + * skb_pull - remove data from the start of a buffer 15.748 + * @skb: buffer to use 15.749 + * @len: amount of data to remove 15.750 + * 15.751 + * This function removes data from the start of a buffer, returning 15.752 + * the memory to the headroom. A pointer to the next data in the buffer 15.753 + * is returned. Once the data has been pulled future pushes will overwrite 15.754 + * the old data. 15.755 + */ 15.756 +static inline unsigned char *skb_pull(struct sk_buff *skb, unsigned int len) 15.757 +{ 15.758 + return unlikely(len > skb->len) ? NULL : __skb_pull(skb, len); 15.759 +} 15.760 + 15.761 +extern unsigned char *__pskb_pull_tail(struct sk_buff *skb, int delta); 15.762 + 15.763 +static inline unsigned char *__pskb_pull(struct sk_buff *skb, unsigned int len) 15.764 +{ 15.765 + if (len > skb_headlen(skb) && 15.766 + !__pskb_pull_tail(skb, len-skb_headlen(skb))) 15.767 + return NULL; 15.768 + skb->len -= len; 15.769 + return skb->data += len; 15.770 +} 15.771 + 15.772 +static inline unsigned char *pskb_pull(struct sk_buff *skb, unsigned int len) 15.773 +{ 15.774 + return unlikely(len > skb->len) ? NULL : __pskb_pull(skb, len); 15.775 +} 15.776 + 15.777 +static inline int pskb_may_pull(struct sk_buff *skb, unsigned int len) 15.778 +{ 15.779 + if (likely(len <= skb_headlen(skb))) 15.780 + return 1; 15.781 + if (unlikely(len > skb->len)) 15.782 + return 0; 15.783 + return __pskb_pull_tail(skb, len-skb_headlen(skb)) != NULL; 15.784 +} 15.785 + 15.786 +/** 15.787 + * skb_headroom - bytes at buffer head 15.788 + * @skb: buffer to check 15.789 + * 15.790 + * Return the number of bytes of free space at the head of an &sk_buff. 15.791 + */ 15.792 +static inline int skb_headroom(const struct sk_buff *skb) 15.793 +{ 15.794 + return skb->data - skb->head; 15.795 +} 15.796 + 15.797 +/** 15.798 + * skb_tailroom - bytes at buffer end 15.799 + * @skb: buffer to check 15.800 + * 15.801 + * Return the number of bytes of free space at the tail of an sk_buff 15.802 + */ 15.803 +static inline int skb_tailroom(const struct sk_buff *skb) 15.804 +{ 15.805 + return skb_is_nonlinear(skb) ? 0 : skb->end - skb->tail; 15.806 +} 15.807 + 15.808 +/** 15.809 + * skb_reserve - adjust headroom 15.810 + * @skb: buffer to alter 15.811 + * @len: bytes to move 15.812 + * 15.813 + * Increase the headroom of an empty &sk_buff by reducing the tail 15.814 + * room. This is only allowed for an empty buffer. 15.815 + */ 15.816 +static inline void skb_reserve(struct sk_buff *skb, unsigned int len) 15.817 +{ 15.818 + skb->data += len; 15.819 + skb->tail += len; 15.820 +} 15.821 + 15.822 +extern int ___pskb_trim(struct sk_buff *skb, unsigned int len, int realloc); 15.823 + 15.824 +static inline void __skb_trim(struct sk_buff *skb, unsigned int len) 15.825 +{ 15.826 + if (!skb->data_len) { 15.827 + skb->len = len; 15.828 + skb->tail = skb->data + len; 15.829 + } else 15.830 + ___pskb_trim(skb, len, 0); 15.831 +} 15.832 + 15.833 +/** 15.834 + * skb_trim - remove end from a buffer 15.835 + * @skb: buffer to alter 15.836 + * @len: new length 15.837 + * 15.838 + * Cut the length of a buffer down by removing data from the tail. If 15.839 + * the buffer is already under the length specified it is not modified. 15.840 + */ 15.841 +static inline void skb_trim(struct sk_buff *skb, unsigned int len) 15.842 +{ 15.843 + if (skb->len > len) 15.844 + __skb_trim(skb, len); 15.845 +} 15.846 + 15.847 + 15.848 +static inline int __pskb_trim(struct sk_buff *skb, unsigned int len) 15.849 +{ 15.850 + if (!skb->data_len) { 15.851 + skb->len = len; 15.852 + skb->tail = skb->data+len; 15.853 + return 0; 15.854 + } 15.855 + return ___pskb_trim(skb, len, 1); 15.856 +} 15.857 + 15.858 +static inline int pskb_trim(struct sk_buff *skb, unsigned int len) 15.859 +{ 15.860 + return (len < skb->len) ? __pskb_trim(skb, len) : 0; 15.861 +} 15.862 + 15.863 +/** 15.864 + * skb_orphan - orphan a buffer 15.865 + * @skb: buffer to orphan 15.866 + * 15.867 + * If a buffer currently has an owner then we call the owner's 15.868 + * destructor function and make the @skb unowned. The buffer continues 15.869 + * to exist but is no longer charged to its former owner. 15.870 + */ 15.871 +static inline void skb_orphan(struct sk_buff *skb) 15.872 +{ 15.873 + if (skb->destructor) 15.874 + skb->destructor(skb); 15.875 + skb->destructor = NULL; 15.876 + skb->sk = NULL; 15.877 +} 15.878 + 15.879 +/** 15.880 + * __skb_queue_purge - empty a list 15.881 + * @list: list to empty 15.882 + * 15.883 + * Delete all buffers on an &sk_buff list. Each buffer is removed from 15.884 + * the list and one reference dropped. This function does not take the 15.885 + * list lock and the caller must hold the relevant locks to use it. 15.886 + */ 15.887 +extern void skb_queue_purge(struct sk_buff_head *list); 15.888 +static inline void __skb_queue_purge(struct sk_buff_head *list) 15.889 +{ 15.890 + struct sk_buff *skb; 15.891 + while ((skb = __skb_dequeue(list)) != NULL) 15.892 + kfree_skb(skb); 15.893 +} 15.894 + 15.895 +/** 15.896 + * __dev_alloc_skb - allocate an skbuff for sending 15.897 + * @length: length to allocate 15.898 + * @gfp_mask: get_free_pages mask, passed to alloc_skb 15.899 + * 15.900 + * Allocate a new &sk_buff and assign it a usage count of one. The 15.901 + * buffer has unspecified headroom built in. Users should allocate 15.902 + * the headroom they think they need without accounting for the 15.903 + * built in space. The built in space is used for optimisations. 15.904 + * 15.905 + * %NULL is returned in there is no free memory. 15.906 + */ 15.907 +static inline struct sk_buff *__dev_alloc_skb(unsigned int length, 15.908 + int gfp_mask) 15.909 +{ 15.910 + struct sk_buff *skb; 15.911 +#ifdef CONFIG_PAGESIZED_SKBS 15.912 + length = max(length, (unsigned int)(PAGE_SIZE - 16)); 15.913 +#endif 15.914 + skb = alloc_skb(length + 16, gfp_mask); 15.915 + if (likely(skb)) 15.916 + skb_reserve(skb, 16); 15.917 + return skb; 15.918 +} 15.919 + 15.920 +/** 15.921 + * dev_alloc_skb - allocate an skbuff for sending 15.922 + * @length: length to allocate 15.923 + * 15.924 + * Allocate a new &sk_buff and assign it a usage count of one. The 15.925 + * buffer has unspecified headroom built in. Users should allocate 15.926 + * the headroom they think they need without accounting for the 15.927 + * built in space. The built in space is used for optimisations. 15.928 + * 15.929 + * %NULL is returned in there is no free memory. Although this function 15.930 + * allocates memory it can be called from an interrupt. 15.931 + */ 15.932 +static inline struct sk_buff *dev_alloc_skb(unsigned int length) 15.933 +{ 15.934 + return __dev_alloc_skb(length, GFP_ATOMIC); 15.935 +} 15.936 + 15.937 +/** 15.938 + * skb_cow - copy header of skb when it is required 15.939 + * @skb: buffer to cow 15.940 + * @headroom: needed headroom 15.941 + * 15.942 + * If the skb passed lacks sufficient headroom or its data part 15.943 + * is shared, data is reallocated. If reallocation fails, an error 15.944 + * is returned and original skb is not changed. 15.945 + * 15.946 + * The result is skb with writable area skb->head...skb->tail 15.947 + * and at least @headroom of space at head. 15.948 + */ 15.949 +static inline int skb_cow(struct sk_buff *skb, unsigned int headroom) 15.950 +{ 15.951 + int delta = (headroom > 16 ? headroom : 16) - skb_headroom(skb); 15.952 + 15.953 + if (delta < 0) 15.954 + delta = 0; 15.955 + 15.956 + if (delta || skb_cloned(skb)) 15.957 + return pskb_expand_head(skb, (delta + 15) & ~15, 0, GFP_ATOMIC); 15.958 + return 0; 15.959 +} 15.960 + 15.961 +/** 15.962 + * skb_padto - pad an skbuff up to a minimal size 15.963 + * @skb: buffer to pad 15.964 + * @len: minimal length 15.965 + * 15.966 + * Pads up a buffer to ensure the trailing bytes exist and are 15.967 + * blanked. If the buffer already contains sufficient data it 15.968 + * is untouched. Returns the buffer, which may be a replacement 15.969 + * for the original, or NULL for out of memory - in which case 15.970 + * the original buffer is still freed. 15.971 + */ 15.972 + 15.973 +static inline struct sk_buff *skb_padto(struct sk_buff *skb, unsigned int len) 15.974 +{ 15.975 + unsigned int size = skb->len; 15.976 + if (likely(size >= len)) 15.977 + return skb; 15.978 + return skb_pad(skb, len-size); 15.979 +} 15.980 + 15.981 +/** 15.982 + * skb_linearize - convert paged skb to linear one 15.983 + * @skb: buffer to linarize 15.984 + * @gfp: allocation mode 15.985 + * 15.986 + * If there is no free memory -ENOMEM is returned, otherwise zero 15.987 + * is returned and the old skb data released. 15.988 + */ 15.989 +extern int __skb_linearize(struct sk_buff *skb, int gfp); 15.990 +static inline int skb_linearize(struct sk_buff *skb, int gfp) 15.991 +{ 15.992 + return __skb_linearize(skb, gfp); 15.993 +} 15.994 + 15.995 +static inline void *kmap_skb_frag(const skb_frag_t *frag) 15.996 +{ 15.997 +#ifdef CONFIG_HIGHMEM 15.998 + BUG_ON(in_irq()); 15.999 + 15.1000 + local_bh_disable(); 15.1001 +#endif 15.1002 + return kmap_atomic(frag->page, KM_SKB_DATA_SOFTIRQ); 15.1003 +} 15.1004 + 15.1005 +static inline void kunmap_skb_frag(void *vaddr) 15.1006 +{ 15.1007 + kunmap_atomic(vaddr, KM_SKB_DATA_SOFTIRQ); 15.1008 +#ifdef CONFIG_HIGHMEM 15.1009 + local_bh_enable(); 15.1010 +#endif 15.1011 +} 15.1012 + 15.1013 +#define skb_queue_walk(queue, skb) \ 15.1014 + for (skb = (queue)->next, prefetch(skb->next); \ 15.1015 + (skb != (struct sk_buff *)(queue)); \ 15.1016 + skb = skb->next, prefetch(skb->next)) 15.1017 + 15.1018 + 15.1019 +extern struct sk_buff *skb_recv_datagram(struct sock *sk, unsigned flags, 15.1020 + int noblock, int *err); 15.1021 +extern unsigned int datagram_poll(struct file *file, struct socket *sock, 15.1022 + struct poll_table_struct *wait); 15.1023 +extern int skb_copy_datagram(const struct sk_buff *from, 15.1024 + int offset, char __user *to, int size); 15.1025 +extern int skb_copy_datagram_iovec(const struct sk_buff *from, 15.1026 + int offset, struct iovec *to, 15.1027 + int size); 15.1028 +extern int skb_copy_and_csum_datagram(const struct sk_buff *skb, 15.1029 + int offset, u8 __user *to, 15.1030 + int len, unsigned int *csump); 15.1031 +extern int skb_copy_and_csum_datagram_iovec(const 15.1032 + struct sk_buff *skb, 15.1033 + int hlen, 15.1034 + struct iovec *iov); 15.1035 +extern void skb_free_datagram(struct sock *sk, struct sk_buff *skb); 15.1036 +extern unsigned int skb_checksum(const struct sk_buff *skb, int offset, 15.1037 + int len, unsigned int csum); 15.1038 +extern int skb_copy_bits(const struct sk_buff *skb, int offset, 15.1039 + void *to, int len); 15.1040 +extern unsigned int skb_copy_and_csum_bits(const struct sk_buff *skb, 15.1041 + int offset, u8 *to, int len, 15.1042 + unsigned int csum); 15.1043 +extern void skb_copy_and_csum_dev(const struct sk_buff *skb, u8 *to); 15.1044 + 15.1045 +extern void skb_init(void); 15.1046 +extern void skb_add_mtu(int mtu); 15.1047 + 15.1048 +#ifdef CONFIG_NETFILTER 15.1049 +static inline void nf_conntrack_put(struct nf_ct_info *nfct) 15.1050 +{ 15.1051 + if (nfct && atomic_dec_and_test(&nfct->master->use)) 15.1052 + nfct->master->destroy(nfct->master); 15.1053 +} 15.1054 +static inline void nf_conntrack_get(struct nf_ct_info *nfct) 15.1055 +{ 15.1056 + if (nfct) 15.1057 + atomic_inc(&nfct->master->use); 15.1058 +} 15.1059 + 15.1060 +#ifdef CONFIG_BRIDGE_NETFILTER 15.1061 +static inline void nf_bridge_put(struct nf_bridge_info *nf_bridge) 15.1062 +{ 15.1063 + if (nf_bridge && atomic_dec_and_test(&nf_bridge->use)) 15.1064 + kfree(nf_bridge); 15.1065 +} 15.1066 +static inline void nf_bridge_get(struct nf_bridge_info *nf_bridge) 15.1067 +{ 15.1068 + if (nf_bridge) 15.1069 + atomic_inc(&nf_bridge->use); 15.1070 +} 15.1071 +#endif 15.1072 + 15.1073 +#endif 15.1074 + 15.1075 +#endif /* __KERNEL__ */ 15.1076 +#endif /* _LINUX_SKBUFF_H */
16.1 --- a/linux-2.6.7-xen-sparse/mm/page_alloc.c Thu Aug 19 16:09:39 2004 +0000 16.2 +++ b/linux-2.6.7-xen-sparse/mm/page_alloc.c Thu Aug 19 18:51:53 2004 +0000 16.3 @@ -497,9 +497,8 @@ static void fastcall free_hot_cold_page( 16.4 struct per_cpu_pages *pcp; 16.5 unsigned long flags; 16.6 16.7 - /* XXX Xen: use mapping pointer as skb/data-page destructor */ 16.8 - if (page->mapping) 16.9 - return (*(void(*)(struct page *))page->mapping)(page); 16.10 + if (PageForeign(page)) 16.11 + return (PageForeignDestructor(page))(page); 16.12 16.13 kernel_map_pages(page, 1, 0); 16.14 inc_page_state(pgfree);