ia64/xen-unstable

changeset 108:de280362dfb0

bitkeeper revision 1.15.3.3 (3e3917752EttVE6JZH0loCHxmqGWrQ)

GuestOS modifications to allocate SKB data from pre-allocated pages.
Zero copy RX-path stuff.
author akw27@boulderdash.cl.cam.ac.uk
date Thu Jan 30 12:15:49 2003 +0000 (2003-01-30)
parents 91f5e18965d9
children cb2688ed1a23
files .rootkeys xen-2.4.16/include/hypervisor-ifs/network.h xenolinux-2.4.16-sparse/include/linux/skbuff.h xenolinux-2.4.16-sparse/net/core/skbuff.c
line diff
     1.1 --- a/.rootkeys	Wed Jan 29 11:17:07 2003 +0000
     1.2 +++ b/.rootkeys	Thu Jan 30 12:15:49 2003 +0000
     1.3 @@ -399,9 +399,11 @@ 3ddb79bb3cMSs_k2X5Oq2hOIBvmPYA xenolinux
     1.4  3ddb79ba2qYtIQAT_-vCFkkZUXu_UQ xenolinux-2.4.16-sparse/include/asm-xeno/user.h
     1.5  3ddb79bbqhb9X9qWOz5Bv4wOzrkITg xenolinux-2.4.16-sparse/include/asm-xeno/vga.h
     1.6  3ddb79bbA52x94o6uwDYsbzrH2hjzA xenolinux-2.4.16-sparse/include/asm-xeno/xor.h
     1.7 +3e37c39fVCSGQENtY6g7muaq_THliw xenolinux-2.4.16-sparse/include/linux/skbuff.h
     1.8  3ddb79bb_7YG4U75ZmEic9YXWTW7Vw xenolinux-2.4.16-sparse/include/linux/sunrpc/debug.h
     1.9  3ddb79bcxkVPfWlZ1PQKvDrfArzOVw xenolinux-2.4.16-sparse/kernel/panic.c
    1.10  3ddb79bbP31im-mx2NbfthSeqty1Dg xenolinux-2.4.16-sparse/mk
    1.11  3e15d52e0_j129JPvo7xfYGndVFpwQ xenolinux-2.4.16-sparse/mm/memory.c
    1.12  3e15d535DLvpzTrLRUIerB69LpJD1g xenolinux-2.4.16-sparse/mm/mremap.c
    1.13  3e15d531m1Y1_W8ki64AFOU_ua4C4w xenolinux-2.4.16-sparse/mm/swapfile.c
    1.14 +3e37c312QFuzIxXsuAgO6IRt3Tp96Q xenolinux-2.4.16-sparse/net/core/skbuff.c
     2.1 --- a/xen-2.4.16/include/hypervisor-ifs/network.h	Wed Jan 29 11:17:07 2003 +0000
     2.2 +++ b/xen-2.4.16/include/hypervisor-ifs/network.h	Thu Jan 30 12:15:49 2003 +0000
     2.3 @@ -26,8 +26,8 @@ typedef struct rx_entry_st {
     2.4          int           status; /* per descriptor status. */
     2.5  } rx_entry_t;
     2.6  
     2.7 -#define TX_RING_SIZE 1024
     2.8 -#define RX_RING_SIZE 1024
     2.9 +#define TX_RING_SIZE 256
    2.10 +#define RX_RING_SIZE 256
    2.11  typedef struct net_ring_st {
    2.12      /*
    2.13       * Guest OS places packets into ring at tx_prod.
     3.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     3.2 +++ b/xenolinux-2.4.16-sparse/include/linux/skbuff.h	Thu Jan 30 12:15:49 2003 +0000
     3.3 @@ -0,0 +1,1185 @@
     3.4 +/*
     3.5 + *	Definitions for the 'struct sk_buff' memory handlers.
     3.6 + *
     3.7 + *	Authors:
     3.8 + *		Alan Cox, <gw4pts@gw4pts.ampr.org>
     3.9 + *		Florian La Roche, <rzsfl@rz.uni-sb.de>
    3.10 + *
    3.11 + *	This program is free software; you can redistribute it and/or
    3.12 + *	modify it under the terms of the GNU General Public License
    3.13 + *	as published by the Free Software Foundation; either version
    3.14 + *	2 of the License, or (at your option) any later version.
    3.15 + */
    3.16 + 
    3.17 +#ifndef _LINUX_SKBUFF_H
    3.18 +#define _LINUX_SKBUFF_H
    3.19 +
    3.20 +#include <linux/config.h>
    3.21 +#include <linux/kernel.h>
    3.22 +#include <linux/sched.h>
    3.23 +#include <linux/time.h>
    3.24 +#include <linux/cache.h>
    3.25 +
    3.26 +#include <asm/atomic.h>
    3.27 +#include <asm/types.h>
    3.28 +#include <linux/spinlock.h>
    3.29 +#include <linux/mm.h>
    3.30 +#include <linux/highmem.h>
    3.31 +
    3.32 +/* Zero Copy additions:
    3.33 + *
    3.34 + * (1) there are now two types of skb, as indicated by the skb_type field.
    3.35 + *     this is because, at least for the time being, there are two seperate types 
    3.36 + *     of memory that may be allocated to skb->data.
    3.37 + *
    3.38 + * (2) until discontiguous memory is fully supported, there will be a free list of pages
    3.39 + *     to be used by the net RX code.  This list will be allocated in the driver init code
    3.40 + *     but is declared here because the socket free code needs to return pages to it.
    3.41 + */
    3.42 +
    3.43 +// for skb->skb_type:
    3.44 +
    3.45 +#define SKB_NORMAL          0
    3.46 +#define SKB_ZERO_COPY       1
    3.47 +
    3.48 +#define NUM_NET_PAGES       9 // about 1Meg of buffers. (2^9)
    3.49 +struct net_page_info {
    3.50 +        struct list_head list;
    3.51 +        unsigned long   virt_addr;
    3.52 +        unsigned long   ppte;
    3.53 +};
    3.54 +
    3.55 +extern char *net_page_chunk;
    3.56 +extern struct net_page_info *net_page_table;
    3.57 +extern struct list_head net_page_list;
    3.58 +extern spinlock_t net_page_list_lock;
    3.59 +extern unsigned int net_pages;
    3.60 +
    3.61 +/* End zero copy additions */
    3.62 +
    3.63 +#define HAVE_ALLOC_SKB		/* For the drivers to know */
    3.64 +#define HAVE_ALIGNABLE_SKB	/* Ditto 8)		   */
    3.65 +#define SLAB_SKB 		/* Slabified skbuffs 	   */
    3.66 +
    3.67 +#define CHECKSUM_NONE 0
    3.68 +#define CHECKSUM_HW 1
    3.69 +#define CHECKSUM_UNNECESSARY 2
    3.70 +
    3.71 +#define SKB_DATA_ALIGN(X)	(((X) + (SMP_CACHE_BYTES-1)) & ~(SMP_CACHE_BYTES-1))
    3.72 +#define SKB_MAX_ORDER(X,ORDER)	(((PAGE_SIZE<<(ORDER)) - (X) - sizeof(struct skb_shared_info))&~(SMP_CACHE_BYTES-1))
    3.73 +#define SKB_MAX_HEAD(X)		(SKB_MAX_ORDER((X),0))
    3.74 +#define SKB_MAX_ALLOC		(SKB_MAX_ORDER(0,2))
    3.75 +
    3.76 +/* A. Checksumming of received packets by device.
    3.77 + *
    3.78 + *	NONE: device failed to checksum this packet.
    3.79 + *		skb->csum is undefined.
    3.80 + *
    3.81 + *	UNNECESSARY: device parsed packet and wouldbe verified checksum.
    3.82 + *		skb->csum is undefined.
    3.83 + *	      It is bad option, but, unfortunately, many of vendors do this.
    3.84 + *	      Apparently with secret goal to sell you new device, when you
    3.85 + *	      will add new protocol to your host. F.e. IPv6. 8)
    3.86 + *
    3.87 + *	HW: the most generic way. Device supplied checksum of _all_
    3.88 + *	    the packet as seen by netif_rx in skb->csum.
    3.89 + *	    NOTE: Even if device supports only some protocols, but
    3.90 + *	    is able to produce some skb->csum, it MUST use HW,
    3.91 + *	    not UNNECESSARY.
    3.92 + *
    3.93 + * B. Checksumming on output.
    3.94 + *
    3.95 + *	NONE: skb is checksummed by protocol or csum is not required.
    3.96 + *
    3.97 + *	HW: device is required to csum packet as seen by hard_start_xmit
    3.98 + *	from skb->h.raw to the end and to record the checksum
    3.99 + *	at skb->h.raw+skb->csum.
   3.100 + *
   3.101 + *	Device must show its capabilities in dev->features, set
   3.102 + *	at device setup time.
   3.103 + *	NETIF_F_HW_CSUM	- it is clever device, it is able to checksum
   3.104 + *			  everything.
   3.105 + *	NETIF_F_NO_CSUM - loopback or reliable single hop media.
   3.106 + *	NETIF_F_IP_CSUM - device is dumb. It is able to csum only
   3.107 + *			  TCP/UDP over IPv4. Sigh. Vendors like this
   3.108 + *			  way by an unknown reason. Though, see comment above
   3.109 + *			  about CHECKSUM_UNNECESSARY. 8)
   3.110 + *
   3.111 + *	Any questions? No questions, good. 		--ANK
   3.112 + */
   3.113 +
   3.114 +#ifdef __i386__
   3.115 +#define NET_CALLER(arg) (*(((void**)&arg)-1))
   3.116 +#else
   3.117 +#define NET_CALLER(arg) __builtin_return_address(0)
   3.118 +#endif
   3.119 +
   3.120 +#ifdef CONFIG_NETFILTER
   3.121 +struct nf_conntrack {
   3.122 +	atomic_t use;
   3.123 +	void (*destroy)(struct nf_conntrack *);
   3.124 +};
   3.125 +
   3.126 +struct nf_ct_info {
   3.127 +	struct nf_conntrack *master;
   3.128 +};
   3.129 +#endif
   3.130 +
   3.131 +struct sk_buff_head {
   3.132 +	/* These two members must be first. */
   3.133 +	struct sk_buff	* next;
   3.134 +	struct sk_buff	* prev;
   3.135 +
   3.136 +	__u32		qlen;
   3.137 +	spinlock_t	lock;
   3.138 +};
   3.139 +
   3.140 +struct sk_buff;
   3.141 +
   3.142 +#define MAX_SKB_FRAGS 6
   3.143 +
   3.144 +typedef struct skb_frag_struct skb_frag_t;
   3.145 +
   3.146 +struct skb_frag_struct
   3.147 +{
   3.148 +	struct page *page;
   3.149 +	__u16 page_offset;
   3.150 +	__u16 size;
   3.151 +};
   3.152 +
   3.153 +/* This data is invariant across clones and lives at
   3.154 + * the end of the header data, ie. at skb->end.
   3.155 + */
   3.156 +struct skb_shared_info {
   3.157 +	atomic_t	dataref;
   3.158 +	unsigned int	nr_frags;
   3.159 +	struct sk_buff	*frag_list;
   3.160 +	skb_frag_t	frags[MAX_SKB_FRAGS];
   3.161 +};
   3.162 +
   3.163 +struct sk_buff {
   3.164 +	/* These two members must be first. */
   3.165 +	struct sk_buff	* next;			/* Next buffer in list 				*/
   3.166 +	struct sk_buff	* prev;			/* Previous buffer in list 			*/
   3.167 +
   3.168 +	struct sk_buff_head * list;		/* List we are on				*/
   3.169 +	struct sock	*sk;			/* Socket we are owned by 			*/
   3.170 +	struct timeval	stamp;			/* Time we arrived				*/
   3.171 +	struct net_device	*dev;		/* Device we arrived on/are leaving by		*/
   3.172 +
   3.173 +	/* Transport layer header */
   3.174 +	union
   3.175 +	{
   3.176 +		struct tcphdr	*th;
   3.177 +		struct udphdr	*uh;
   3.178 +		struct icmphdr	*icmph;
   3.179 +		struct igmphdr	*igmph;
   3.180 +		struct iphdr	*ipiph;
   3.181 +		struct spxhdr	*spxh;
   3.182 +		unsigned char	*raw;
   3.183 +	} h;
   3.184 +
   3.185 +	/* Network layer header */
   3.186 +	union
   3.187 +	{
   3.188 +		struct iphdr	*iph;
   3.189 +		struct ipv6hdr	*ipv6h;
   3.190 +		struct arphdr	*arph;
   3.191 +		struct ipxhdr	*ipxh;
   3.192 +		unsigned char	*raw;
   3.193 +	} nh;
   3.194 +  
   3.195 +	/* Link layer header */
   3.196 +	union 
   3.197 +	{	
   3.198 +	  	struct ethhdr	*ethernet;
   3.199 +	  	unsigned char 	*raw;
   3.200 +	} mac;
   3.201 +
   3.202 +	struct  dst_entry *dst;
   3.203 +
   3.204 +	/* 
   3.205 +	 * This is the control buffer. It is free to use for every
   3.206 +	 * layer. Please put your private variables there. If you
   3.207 +	 * want to keep them across layers you have to do a skb_clone()
   3.208 +	 * first. This is owned by whoever has the skb queued ATM.
   3.209 +	 */ 
   3.210 +	char		cb[48];	 
   3.211 +
   3.212 +	unsigned int 	len;			/* Length of actual data			*/
   3.213 + 	unsigned int 	data_len;
   3.214 +	unsigned int	csum;			/* Checksum 					*/
   3.215 +	unsigned char 	__unused,		/* Dead field, may be reused			*/
   3.216 +			cloned, 		/* head may be cloned (check refcnt to be sure). */
   3.217 +  			pkt_type,		/* Packet class					*/
   3.218 +  			ip_summed;		/* Driver fed us an IP checksum			*/
   3.219 +	__u32		priority;		/* Packet queueing priority			*/
   3.220 +	atomic_t	users;			/* User count - see datagram.c,tcp.c 		*/
   3.221 +	unsigned short	protocol;		/* Packet protocol from driver. 		*/
   3.222 +	unsigned short	security;		/* Security level of packet			*/
   3.223 +	unsigned int	truesize;		/* Buffer size 					*/
   3.224 +
   3.225 +	unsigned char	*head;			/* Head of buffer 				*/
   3.226 +	unsigned char	*data;			/* Data head pointer				*/
   3.227 +	unsigned char	*tail;			/* Tail pointer					*/
   3.228 +	unsigned char 	*end;			/* End pointer					*/
   3.229 +
   3.230 +	void 		(*destructor)(struct sk_buff *);	/* Destruct function		*/
   3.231 +#ifdef CONFIG_NETFILTER
   3.232 +	/* Can be used for communication between hooks. */
   3.233 +        unsigned long	nfmark;
   3.234 +	/* Cache info */
   3.235 +	__u32		nfcache;
   3.236 +	/* Associated connection, if any */
   3.237 +	struct nf_ct_info *nfct;
   3.238 +#ifdef CONFIG_NETFILTER_DEBUG
   3.239 +        unsigned int nf_debug;
   3.240 +#endif
   3.241 +#endif /*CONFIG_NETFILTER*/
   3.242 +
   3.243 +#if defined(CONFIG_HIPPI)
   3.244 +	union{
   3.245 +		__u32	ifield;
   3.246 +	} private;
   3.247 +#endif
   3.248 +
   3.249 +#ifdef CONFIG_NET_SCHED
   3.250 +       __u32           tc_index;                /* traffic control index */
   3.251 +#endif
   3.252 +       unsigned int     skb_type;                /* for zero copy handling.                      */
   3.253 +       struct net_page_info *net_page;
   3.254 +};
   3.255 +
   3.256 +#define SK_WMEM_MAX	65535
   3.257 +#define SK_RMEM_MAX	65535
   3.258 +
   3.259 +#ifdef __KERNEL__
   3.260 +/*
   3.261 + *	Handling routines are only of interest to the kernel
   3.262 + */
   3.263 +#include <linux/slab.h>
   3.264 +
   3.265 +#include <asm/system.h>
   3.266 +
   3.267 +extern void			__kfree_skb(struct sk_buff *skb);
   3.268 +extern struct sk_buff *		alloc_skb(unsigned int size, int priority);
   3.269 +extern struct sk_buff *         alloc_zc_skb(unsigned int size, int priority);
   3.270 +extern void			kfree_skbmem(struct sk_buff *skb);
   3.271 +extern struct sk_buff *		skb_clone(struct sk_buff *skb, int priority);
   3.272 +extern struct sk_buff *		skb_copy(const struct sk_buff *skb, int priority);
   3.273 +extern struct sk_buff *		pskb_copy(struct sk_buff *skb, int gfp_mask);
   3.274 +extern int			pskb_expand_head(struct sk_buff *skb, int nhead, int ntail, int gfp_mask);
   3.275 +extern struct sk_buff *		skb_realloc_headroom(struct sk_buff *skb, unsigned int headroom);
   3.276 +extern struct sk_buff *		skb_copy_expand(const struct sk_buff *skb, 
   3.277 +						int newheadroom,
   3.278 +						int newtailroom,
   3.279 +						int priority);
   3.280 +#define dev_kfree_skb(a)	kfree_skb(a)
   3.281 +extern void	skb_over_panic(struct sk_buff *skb, int len, void *here);
   3.282 +extern void	skb_under_panic(struct sk_buff *skb, int len, void *here);
   3.283 +
   3.284 +/* Internal */
   3.285 +#define skb_shinfo(SKB)		((struct skb_shared_info *)((SKB)->end))
   3.286 +
   3.287 +/**
   3.288 + *	skb_queue_empty - check if a queue is empty
   3.289 + *	@list: queue head
   3.290 + *
   3.291 + *	Returns true if the queue is empty, false otherwise.
   3.292 + */
   3.293 + 
   3.294 +static inline int skb_queue_empty(struct sk_buff_head *list)
   3.295 +{
   3.296 +	return (list->next == (struct sk_buff *) list);
   3.297 +}
   3.298 +
   3.299 +/**
   3.300 + *	skb_get - reference buffer
   3.301 + *	@skb: buffer to reference
   3.302 + *
   3.303 + *	Makes another reference to a socket buffer and returns a pointer
   3.304 + *	to the buffer.
   3.305 + */
   3.306 + 
   3.307 +static inline struct sk_buff *skb_get(struct sk_buff *skb)
   3.308 +{
   3.309 +	atomic_inc(&skb->users);
   3.310 +	return skb;
   3.311 +}
   3.312 +
   3.313 +/*
   3.314 + * If users==1, we are the only owner and are can avoid redundant
   3.315 + * atomic change.
   3.316 + */
   3.317 + 
   3.318 +/**
   3.319 + *	kfree_skb - free an sk_buff
   3.320 + *	@skb: buffer to free
   3.321 + *
   3.322 + *	Drop a reference to the buffer and free it if the usage count has
   3.323 + *	hit zero.
   3.324 + */
   3.325 + 
   3.326 +static inline void kfree_skb(struct sk_buff *skb)
   3.327 +{
   3.328 +	if (atomic_read(&skb->users) == 1 || atomic_dec_and_test(&skb->users))
   3.329 +		__kfree_skb(skb);
   3.330 +}
   3.331 +
   3.332 +/* Use this if you didn't touch the skb state [for fast switching] */
   3.333 +static inline void kfree_skb_fast(struct sk_buff *skb)
   3.334 +{
   3.335 +	if (atomic_read(&skb->users) == 1 || atomic_dec_and_test(&skb->users))
   3.336 +		kfree_skbmem(skb);	
   3.337 +}
   3.338 +
   3.339 +/**
   3.340 + *	skb_cloned - is the buffer a clone
   3.341 + *	@skb: buffer to check
   3.342 + *
   3.343 + *	Returns true if the buffer was generated with skb_clone() and is
   3.344 + *	one of multiple shared copies of the buffer. Cloned buffers are
   3.345 + *	shared data so must not be written to under normal circumstances.
   3.346 + */
   3.347 +
   3.348 +static inline int skb_cloned(struct sk_buff *skb)
   3.349 +{
   3.350 +	return skb->cloned && atomic_read(&skb_shinfo(skb)->dataref) != 1;
   3.351 +}
   3.352 +
   3.353 +/**
   3.354 + *	skb_shared - is the buffer shared
   3.355 + *	@skb: buffer to check
   3.356 + *
   3.357 + *	Returns true if more than one person has a reference to this
   3.358 + *	buffer.
   3.359 + */
   3.360 + 
   3.361 +static inline int skb_shared(struct sk_buff *skb)
   3.362 +{
   3.363 +	return (atomic_read(&skb->users) != 1);
   3.364 +}
   3.365 +
   3.366 +/** 
   3.367 + *	skb_share_check - check if buffer is shared and if so clone it
   3.368 + *	@skb: buffer to check
   3.369 + *	@pri: priority for memory allocation
   3.370 + *	
   3.371 + *	If the buffer is shared the buffer is cloned and the old copy
   3.372 + *	drops a reference. A new clone with a single reference is returned.
   3.373 + *	If the buffer is not shared the original buffer is returned. When
   3.374 + *	being called from interrupt status or with spinlocks held pri must
   3.375 + *	be GFP_ATOMIC.
   3.376 + *
   3.377 + *	NULL is returned on a memory allocation failure.
   3.378 + */
   3.379 + 
   3.380 +static inline struct sk_buff *skb_share_check(struct sk_buff *skb, int pri)
   3.381 +{
   3.382 +	if (skb_shared(skb)) {
   3.383 +		struct sk_buff *nskb;
   3.384 +		nskb = skb_clone(skb, pri);
   3.385 +		kfree_skb(skb);
   3.386 +		return nskb;
   3.387 +	}
   3.388 +	return skb;
   3.389 +}
   3.390 +
   3.391 +
   3.392 +/*
   3.393 + *	Copy shared buffers into a new sk_buff. We effectively do COW on
   3.394 + *	packets to handle cases where we have a local reader and forward
   3.395 + *	and a couple of other messy ones. The normal one is tcpdumping
   3.396 + *	a packet thats being forwarded.
   3.397 + */
   3.398 + 
   3.399 +/**
   3.400 + *	skb_unshare - make a copy of a shared buffer
   3.401 + *	@skb: buffer to check
   3.402 + *	@pri: priority for memory allocation
   3.403 + *
   3.404 + *	If the socket buffer is a clone then this function creates a new
   3.405 + *	copy of the data, drops a reference count on the old copy and returns
   3.406 + *	the new copy with the reference count at 1. If the buffer is not a clone
   3.407 + *	the original buffer is returned. When called with a spinlock held or
   3.408 + *	from interrupt state @pri must be %GFP_ATOMIC
   3.409 + *
   3.410 + *	%NULL is returned on a memory allocation failure.
   3.411 + */
   3.412 + 
   3.413 +static inline struct sk_buff *skb_unshare(struct sk_buff *skb, int pri)
   3.414 +{
   3.415 +	struct sk_buff *nskb;
   3.416 +	if(!skb_cloned(skb))
   3.417 +		return skb;
   3.418 +	nskb=skb_copy(skb, pri);
   3.419 +	kfree_skb(skb);		/* Free our shared copy */
   3.420 +	return nskb;
   3.421 +}
   3.422 +
   3.423 +/**
   3.424 + *	skb_peek
   3.425 + *	@list_: list to peek at
   3.426 + *
   3.427 + *	Peek an &sk_buff. Unlike most other operations you _MUST_
   3.428 + *	be careful with this one. A peek leaves the buffer on the
   3.429 + *	list and someone else may run off with it. You must hold
   3.430 + *	the appropriate locks or have a private queue to do this.
   3.431 + *
   3.432 + *	Returns %NULL for an empty list or a pointer to the head element.
   3.433 + *	The reference count is not incremented and the reference is therefore
   3.434 + *	volatile. Use with caution.
   3.435 + */
   3.436 + 
   3.437 +static inline struct sk_buff *skb_peek(struct sk_buff_head *list_)
   3.438 +{
   3.439 +	struct sk_buff *list = ((struct sk_buff *)list_)->next;
   3.440 +	if (list == (struct sk_buff *)list_)
   3.441 +		list = NULL;
   3.442 +	return list;
   3.443 +}
   3.444 +
   3.445 +/**
   3.446 + *	skb_peek_tail
   3.447 + *	@list_: list to peek at
   3.448 + *
   3.449 + *	Peek an &sk_buff. Unlike most other operations you _MUST_
   3.450 + *	be careful with this one. A peek leaves the buffer on the
   3.451 + *	list and someone else may run off with it. You must hold
   3.452 + *	the appropriate locks or have a private queue to do this.
   3.453 + *
   3.454 + *	Returns %NULL for an empty list or a pointer to the tail element.
   3.455 + *	The reference count is not incremented and the reference is therefore
   3.456 + *	volatile. Use with caution.
   3.457 + */
   3.458 +
   3.459 +static inline struct sk_buff *skb_peek_tail(struct sk_buff_head *list_)
   3.460 +{
   3.461 +	struct sk_buff *list = ((struct sk_buff *)list_)->prev;
   3.462 +	if (list == (struct sk_buff *)list_)
   3.463 +		list = NULL;
   3.464 +	return list;
   3.465 +}
   3.466 +
   3.467 +/**
   3.468 + *	skb_queue_len	- get queue length
   3.469 + *	@list_: list to measure
   3.470 + *
   3.471 + *	Return the length of an &sk_buff queue. 
   3.472 + */
   3.473 + 
   3.474 +static inline __u32 skb_queue_len(struct sk_buff_head *list_)
   3.475 +{
   3.476 +	return(list_->qlen);
   3.477 +}
   3.478 +
   3.479 +static inline void skb_queue_head_init(struct sk_buff_head *list)
   3.480 +{
   3.481 +	spin_lock_init(&list->lock);
   3.482 +	list->prev = (struct sk_buff *)list;
   3.483 +	list->next = (struct sk_buff *)list;
   3.484 +	list->qlen = 0;
   3.485 +}
   3.486 +
   3.487 +/*
   3.488 + *	Insert an sk_buff at the start of a list.
   3.489 + *
   3.490 + *	The "__skb_xxxx()" functions are the non-atomic ones that
   3.491 + *	can only be called with interrupts disabled.
   3.492 + */
   3.493 +
   3.494 +/**
   3.495 + *	__skb_queue_head - queue a buffer at the list head
   3.496 + *	@list: list to use
   3.497 + *	@newsk: buffer to queue
   3.498 + *
   3.499 + *	Queue a buffer at the start of a list. This function takes no locks
   3.500 + *	and you must therefore hold required locks before calling it.
   3.501 + *
   3.502 + *	A buffer cannot be placed on two lists at the same time.
   3.503 + */	
   3.504 + 
   3.505 +static inline void __skb_queue_head(struct sk_buff_head *list, struct sk_buff *newsk)
   3.506 +{
   3.507 +	struct sk_buff *prev, *next;
   3.508 +
   3.509 +	newsk->list = list;
   3.510 +	list->qlen++;
   3.511 +	prev = (struct sk_buff *)list;
   3.512 +	next = prev->next;
   3.513 +	newsk->next = next;
   3.514 +	newsk->prev = prev;
   3.515 +	next->prev = newsk;
   3.516 +	prev->next = newsk;
   3.517 +}
   3.518 +
   3.519 +
   3.520 +/**
   3.521 + *	skb_queue_head - queue a buffer at the list head
   3.522 + *	@list: list to use
   3.523 + *	@newsk: buffer to queue
   3.524 + *
   3.525 + *	Queue a buffer at the start of the list. This function takes the
   3.526 + *	list lock and can be used safely with other locking &sk_buff functions
   3.527 + *	safely.
   3.528 + *
   3.529 + *	A buffer cannot be placed on two lists at the same time.
   3.530 + */	
   3.531 +
   3.532 +static inline void skb_queue_head(struct sk_buff_head *list, struct sk_buff *newsk)
   3.533 +{
   3.534 +	unsigned long flags;
   3.535 +
   3.536 +	spin_lock_irqsave(&list->lock, flags);
   3.537 +	__skb_queue_head(list, newsk);
   3.538 +	spin_unlock_irqrestore(&list->lock, flags);
   3.539 +}
   3.540 +
   3.541 +/**
   3.542 + *	__skb_queue_tail - queue a buffer at the list tail
   3.543 + *	@list: list to use
   3.544 + *	@newsk: buffer to queue
   3.545 + *
   3.546 + *	Queue a buffer at the end of a list. This function takes no locks
   3.547 + *	and you must therefore hold required locks before calling it.
   3.548 + *
   3.549 + *	A buffer cannot be placed on two lists at the same time.
   3.550 + */	
   3.551 + 
   3.552 +
   3.553 +static inline void __skb_queue_tail(struct sk_buff_head *list, struct sk_buff *newsk)
   3.554 +{
   3.555 +	struct sk_buff *prev, *next;
   3.556 +
   3.557 +	newsk->list = list;
   3.558 +	list->qlen++;
   3.559 +	next = (struct sk_buff *)list;
   3.560 +	prev = next->prev;
   3.561 +	newsk->next = next;
   3.562 +	newsk->prev = prev;
   3.563 +	next->prev = newsk;
   3.564 +	prev->next = newsk;
   3.565 +}
   3.566 +
   3.567 +/**
   3.568 + *	skb_queue_tail - queue a buffer at the list tail
   3.569 + *	@list: list to use
   3.570 + *	@newsk: buffer to queue
   3.571 + *
   3.572 + *	Queue a buffer at the tail of the list. This function takes the
   3.573 + *	list lock and can be used safely with other locking &sk_buff functions
   3.574 + *	safely.
   3.575 + *
   3.576 + *	A buffer cannot be placed on two lists at the same time.
   3.577 + */	
   3.578 +
   3.579 +static inline void skb_queue_tail(struct sk_buff_head *list, struct sk_buff *newsk)
   3.580 +{
   3.581 +	unsigned long flags;
   3.582 +
   3.583 +	spin_lock_irqsave(&list->lock, flags);
   3.584 +	__skb_queue_tail(list, newsk);
   3.585 +	spin_unlock_irqrestore(&list->lock, flags);
   3.586 +}
   3.587 +
   3.588 +/**
   3.589 + *	__skb_dequeue - remove from the head of the queue
   3.590 + *	@list: list to dequeue from
   3.591 + *
   3.592 + *	Remove the head of the list. This function does not take any locks
   3.593 + *	so must be used with appropriate locks held only. The head item is
   3.594 + *	returned or %NULL if the list is empty.
   3.595 + */
   3.596 +
   3.597 +static inline struct sk_buff *__skb_dequeue(struct sk_buff_head *list)
   3.598 +{
   3.599 +	struct sk_buff *next, *prev, *result;
   3.600 +
   3.601 +	prev = (struct sk_buff *) list;
   3.602 +	next = prev->next;
   3.603 +	result = NULL;
   3.604 +	if (next != prev) {
   3.605 +		result = next;
   3.606 +		next = next->next;
   3.607 +		list->qlen--;
   3.608 +		next->prev = prev;
   3.609 +		prev->next = next;
   3.610 +		result->next = NULL;
   3.611 +		result->prev = NULL;
   3.612 +		result->list = NULL;
   3.613 +	}
   3.614 +	return result;
   3.615 +}
   3.616 +
   3.617 +/**
   3.618 + *	skb_dequeue - remove from the head of the queue
   3.619 + *	@list: list to dequeue from
   3.620 + *
   3.621 + *	Remove the head of the list. The list lock is taken so the function
   3.622 + *	may be used safely with other locking list functions. The head item is
   3.623 + *	returned or %NULL if the list is empty.
   3.624 + */
   3.625 +
   3.626 +static inline struct sk_buff *skb_dequeue(struct sk_buff_head *list)
   3.627 +{
   3.628 +	long flags;
   3.629 +	struct sk_buff *result;
   3.630 +
   3.631 +	spin_lock_irqsave(&list->lock, flags);
   3.632 +	result = __skb_dequeue(list);
   3.633 +	spin_unlock_irqrestore(&list->lock, flags);
   3.634 +	return result;
   3.635 +}
   3.636 +
   3.637 +/*
   3.638 + *	Insert a packet on a list.
   3.639 + */
   3.640 +
   3.641 +static inline void __skb_insert(struct sk_buff *newsk,
   3.642 +	struct sk_buff * prev, struct sk_buff *next,
   3.643 +	struct sk_buff_head * list)
   3.644 +{
   3.645 +	newsk->next = next;
   3.646 +	newsk->prev = prev;
   3.647 +	next->prev = newsk;
   3.648 +	prev->next = newsk;
   3.649 +	newsk->list = list;
   3.650 +	list->qlen++;
   3.651 +}
   3.652 +
   3.653 +/**
   3.654 + *	skb_insert	-	insert a buffer
   3.655 + *	@old: buffer to insert before
   3.656 + *	@newsk: buffer to insert
   3.657 + *
   3.658 + *	Place a packet before a given packet in a list. The list locks are taken
   3.659 + *	and this function is atomic with respect to other list locked calls
   3.660 + *	A buffer cannot be placed on two lists at the same time.
   3.661 + */
   3.662 +
   3.663 +static inline void skb_insert(struct sk_buff *old, struct sk_buff *newsk)
   3.664 +{
   3.665 +	unsigned long flags;
   3.666 +
   3.667 +	spin_lock_irqsave(&old->list->lock, flags);
   3.668 +	__skb_insert(newsk, old->prev, old, old->list);
   3.669 +	spin_unlock_irqrestore(&old->list->lock, flags);
   3.670 +}
   3.671 +
   3.672 +/*
   3.673 + *	Place a packet after a given packet in a list.
   3.674 + */
   3.675 +
   3.676 +static inline void __skb_append(struct sk_buff *old, struct sk_buff *newsk)
   3.677 +{
   3.678 +	__skb_insert(newsk, old, old->next, old->list);
   3.679 +}
   3.680 +
   3.681 +/**
   3.682 + *	skb_append	-	append a buffer
   3.683 + *	@old: buffer to insert after
   3.684 + *	@newsk: buffer to insert
   3.685 + *
   3.686 + *	Place a packet after a given packet in a list. The list locks are taken
   3.687 + *	and this function is atomic with respect to other list locked calls.
   3.688 + *	A buffer cannot be placed on two lists at the same time.
   3.689 + */
   3.690 +
   3.691 +
   3.692 +static inline void skb_append(struct sk_buff *old, struct sk_buff *newsk)
   3.693 +{
   3.694 +	unsigned long flags;
   3.695 +
   3.696 +	spin_lock_irqsave(&old->list->lock, flags);
   3.697 +	__skb_append(old, newsk);
   3.698 +	spin_unlock_irqrestore(&old->list->lock, flags);
   3.699 +}
   3.700 +
   3.701 +/*
   3.702 + * remove sk_buff from list. _Must_ be called atomically, and with
   3.703 + * the list known..
   3.704 + */
   3.705 + 
   3.706 +static inline void __skb_unlink(struct sk_buff *skb, struct sk_buff_head *list)
   3.707 +{
   3.708 +	struct sk_buff * next, * prev;
   3.709 +
   3.710 +	list->qlen--;
   3.711 +	next = skb->next;
   3.712 +	prev = skb->prev;
   3.713 +	skb->next = NULL;
   3.714 +	skb->prev = NULL;
   3.715 +	skb->list = NULL;
   3.716 +	next->prev = prev;
   3.717 +	prev->next = next;
   3.718 +}
   3.719 +
   3.720 +/**
   3.721 + *	skb_unlink	-	remove a buffer from a list
   3.722 + *	@skb: buffer to remove
   3.723 + *
   3.724 + *	Place a packet after a given packet in a list. The list locks are taken
   3.725 + *	and this function is atomic with respect to other list locked calls
   3.726 + *	
   3.727 + *	Works even without knowing the list it is sitting on, which can be 
   3.728 + *	handy at times. It also means that THE LIST MUST EXIST when you 
   3.729 + *	unlink. Thus a list must have its contents unlinked before it is
   3.730 + *	destroyed.
   3.731 + */
   3.732 +
   3.733 +static inline void skb_unlink(struct sk_buff *skb)
   3.734 +{
   3.735 +	struct sk_buff_head *list = skb->list;
   3.736 +
   3.737 +	if(list) {
   3.738 +		unsigned long flags;
   3.739 +
   3.740 +		spin_lock_irqsave(&list->lock, flags);
   3.741 +		if(skb->list == list)
   3.742 +			__skb_unlink(skb, skb->list);
   3.743 +		spin_unlock_irqrestore(&list->lock, flags);
   3.744 +	}
   3.745 +}
   3.746 +
   3.747 +/* XXX: more streamlined implementation */
   3.748 +
   3.749 +/**
   3.750 + *	__skb_dequeue_tail - remove from the tail of the queue
   3.751 + *	@list: list to dequeue from
   3.752 + *
   3.753 + *	Remove the tail of the list. This function does not take any locks
   3.754 + *	so must be used with appropriate locks held only. The tail item is
   3.755 + *	returned or %NULL if the list is empty.
   3.756 + */
   3.757 +
   3.758 +static inline struct sk_buff *__skb_dequeue_tail(struct sk_buff_head *list)
   3.759 +{
   3.760 +	struct sk_buff *skb = skb_peek_tail(list); 
   3.761 +	if (skb)
   3.762 +		__skb_unlink(skb, list);
   3.763 +	return skb;
   3.764 +}
   3.765 +
   3.766 +/**
   3.767 + *	skb_dequeue - remove from the head of the queue
   3.768 + *	@list: list to dequeue from
   3.769 + *
   3.770 + *	Remove the head of the list. The list lock is taken so the function
   3.771 + *	may be used safely with other locking list functions. The tail item is
   3.772 + *	returned or %NULL if the list is empty.
   3.773 + */
   3.774 +
   3.775 +static inline struct sk_buff *skb_dequeue_tail(struct sk_buff_head *list)
   3.776 +{
   3.777 +	long flags;
   3.778 +	struct sk_buff *result;
   3.779 +
   3.780 +	spin_lock_irqsave(&list->lock, flags);
   3.781 +	result = __skb_dequeue_tail(list);
   3.782 +	spin_unlock_irqrestore(&list->lock, flags);
   3.783 +	return result;
   3.784 +}
   3.785 +
   3.786 +static inline int skb_is_nonlinear(const struct sk_buff *skb)
   3.787 +{
   3.788 +	return skb->data_len;
   3.789 +}
   3.790 +
   3.791 +static inline int skb_headlen(const struct sk_buff *skb)
   3.792 +{
   3.793 +	return skb->len - skb->data_len;
   3.794 +}
   3.795 +
   3.796 +#define SKB_PAGE_ASSERT(skb) do { if (skb_shinfo(skb)->nr_frags) BUG(); } while (0)
   3.797 +#define SKB_FRAG_ASSERT(skb) do { if (skb_shinfo(skb)->frag_list) BUG(); } while (0)
   3.798 +#define SKB_LINEAR_ASSERT(skb) do { if (skb_is_nonlinear(skb)) BUG(); } while (0)
   3.799 +
   3.800 +/*
   3.801 + *	Add data to an sk_buff
   3.802 + */
   3.803 + 
   3.804 +static inline unsigned char *__skb_put(struct sk_buff *skb, unsigned int len)
   3.805 +{
   3.806 +	unsigned char *tmp=skb->tail;
   3.807 +	SKB_LINEAR_ASSERT(skb);
   3.808 +	skb->tail+=len;
   3.809 +	skb->len+=len;
   3.810 +	return tmp;
   3.811 +}
   3.812 +
   3.813 +/**
   3.814 + *	skb_put - add data to a buffer
   3.815 + *	@skb: buffer to use 
   3.816 + *	@len: amount of data to add
   3.817 + *
   3.818 + *	This function extends the used data area of the buffer. If this would
   3.819 + *	exceed the total buffer size the kernel will panic. A pointer to the
   3.820 + *	first byte of the extra data is returned.
   3.821 + */
   3.822 + 
   3.823 +static inline unsigned char *skb_put(struct sk_buff *skb, unsigned int len)
   3.824 +{
   3.825 +	unsigned char *tmp=skb->tail;
   3.826 +	SKB_LINEAR_ASSERT(skb);
   3.827 +	skb->tail+=len;
   3.828 +	skb->len+=len;
   3.829 +	if(skb->tail>skb->end) {
   3.830 +		skb_over_panic(skb, len, current_text_addr());
   3.831 +	}
   3.832 +	return tmp;
   3.833 +}
   3.834 +
   3.835 +static inline unsigned char *__skb_push(struct sk_buff *skb, unsigned int len)
   3.836 +{
   3.837 +	skb->data-=len;
   3.838 +	skb->len+=len;
   3.839 +	return skb->data;
   3.840 +}
   3.841 +
   3.842 +/**
   3.843 + *	skb_push - add data to the start of a buffer
   3.844 + *	@skb: buffer to use 
   3.845 + *	@len: amount of data to add
   3.846 + *
   3.847 + *	This function extends the used data area of the buffer at the buffer
   3.848 + *	start. If this would exceed the total buffer headroom the kernel will
   3.849 + *	panic. A pointer to the first byte of the extra data is returned.
   3.850 + */
   3.851 +
   3.852 +static inline unsigned char *skb_push(struct sk_buff *skb, unsigned int len)
   3.853 +{
   3.854 +	skb->data-=len;
   3.855 +	skb->len+=len;
   3.856 +	if(skb->data<skb->head) {
   3.857 +		skb_under_panic(skb, len, current_text_addr());
   3.858 +	}
   3.859 +	return skb->data;
   3.860 +}
   3.861 +
   3.862 +static inline char *__skb_pull(struct sk_buff *skb, unsigned int len)
   3.863 +{
   3.864 +	skb->len-=len;
   3.865 +	if (skb->len < skb->data_len)
   3.866 +		BUG();
   3.867 +	return 	skb->data+=len;
   3.868 +}
   3.869 +
   3.870 +/**
   3.871 + *	skb_pull - remove data from the start of a buffer
   3.872 + *	@skb: buffer to use 
   3.873 + *	@len: amount of data to remove
   3.874 + *
   3.875 + *	This function removes data from the start of a buffer, returning
   3.876 + *	the memory to the headroom. A pointer to the next data in the buffer
   3.877 + *	is returned. Once the data has been pulled future pushes will overwrite
   3.878 + *	the old data.
   3.879 + */
   3.880 +
   3.881 +static inline unsigned char * skb_pull(struct sk_buff *skb, unsigned int len)
   3.882 +{	
   3.883 +	if (len > skb->len)
   3.884 +		return NULL;
   3.885 +	return __skb_pull(skb,len);
   3.886 +}
   3.887 +
   3.888 +extern unsigned char * __pskb_pull_tail(struct sk_buff *skb, int delta);
   3.889 +
   3.890 +static inline char *__pskb_pull(struct sk_buff *skb, unsigned int len)
   3.891 +{
   3.892 +	if (len > skb_headlen(skb) &&
   3.893 +	    __pskb_pull_tail(skb, len-skb_headlen(skb)) == NULL)
   3.894 +		return NULL;
   3.895 +	skb->len -= len;
   3.896 +	return 	skb->data += len;
   3.897 +}
   3.898 +
   3.899 +static inline unsigned char * pskb_pull(struct sk_buff *skb, unsigned int len)
   3.900 +{	
   3.901 +	if (len > skb->len)
   3.902 +		return NULL;
   3.903 +	return __pskb_pull(skb,len);
   3.904 +}
   3.905 +
   3.906 +static inline int pskb_may_pull(struct sk_buff *skb, unsigned int len)
   3.907 +{
   3.908 +	if (len <= skb_headlen(skb))
   3.909 +		return 1;
   3.910 +	if (len > skb->len)
   3.911 +		return 0;
   3.912 +	return (__pskb_pull_tail(skb, len-skb_headlen(skb)) != NULL);
   3.913 +}
   3.914 +
   3.915 +/**
   3.916 + *	skb_headroom - bytes at buffer head
   3.917 + *	@skb: buffer to check
   3.918 + *
   3.919 + *	Return the number of bytes of free space at the head of an &sk_buff.
   3.920 + */
   3.921 + 
   3.922 +static inline int skb_headroom(const struct sk_buff *skb)
   3.923 +{
   3.924 +	return skb->data-skb->head;
   3.925 +}
   3.926 +
   3.927 +/**
   3.928 + *	skb_tailroom - bytes at buffer end
   3.929 + *	@skb: buffer to check
   3.930 + *
   3.931 + *	Return the number of bytes of free space at the tail of an sk_buff
   3.932 + */
   3.933 +
   3.934 +static inline int skb_tailroom(const struct sk_buff *skb)
   3.935 +{
   3.936 +	return skb_is_nonlinear(skb) ? 0 : skb->end-skb->tail;
   3.937 +}
   3.938 +
   3.939 +/**
   3.940 + *	skb_reserve - adjust headroom
   3.941 + *	@skb: buffer to alter
   3.942 + *	@len: bytes to move
   3.943 + *
   3.944 + *	Increase the headroom of an empty &sk_buff by reducing the tail
   3.945 + *	room. This is only allowed for an empty buffer.
   3.946 + */
   3.947 +
   3.948 +static inline void skb_reserve(struct sk_buff *skb, unsigned int len)
   3.949 +{
   3.950 +	skb->data+=len;
   3.951 +	skb->tail+=len;
   3.952 +}
   3.953 +
   3.954 +extern int ___pskb_trim(struct sk_buff *skb, unsigned int len, int realloc);
   3.955 +
   3.956 +static inline void __skb_trim(struct sk_buff *skb, unsigned int len)
   3.957 +{
   3.958 +	if (!skb->data_len) {
   3.959 +		skb->len = len;
   3.960 +		skb->tail = skb->data+len;
   3.961 +	} else {
   3.962 +		___pskb_trim(skb, len, 0);
   3.963 +	}
   3.964 +}
   3.965 +
   3.966 +/**
   3.967 + *	skb_trim - remove end from a buffer
   3.968 + *	@skb: buffer to alter
   3.969 + *	@len: new length
   3.970 + *
   3.971 + *	Cut the length of a buffer down by removing data from the tail. If
   3.972 + *	the buffer is already under the length specified it is not modified.
   3.973 + */
   3.974 +
   3.975 +static inline void skb_trim(struct sk_buff *skb, unsigned int len)
   3.976 +{
   3.977 +	if (skb->len > len) {
   3.978 +		__skb_trim(skb, len);
   3.979 +	}
   3.980 +}
   3.981 +
   3.982 +
   3.983 +static inline int __pskb_trim(struct sk_buff *skb, unsigned int len)
   3.984 +{
   3.985 +	if (!skb->data_len) {
   3.986 +		skb->len = len;
   3.987 +		skb->tail = skb->data+len;
   3.988 +		return 0;
   3.989 +	} else {
   3.990 +		return ___pskb_trim(skb, len, 1);
   3.991 +	}
   3.992 +}
   3.993 +
   3.994 +static inline int pskb_trim(struct sk_buff *skb, unsigned int len)
   3.995 +{
   3.996 +	if (len < skb->len)
   3.997 +		return __pskb_trim(skb, len);
   3.998 +	return 0;
   3.999 +}
  3.1000 +
  3.1001 +/**
  3.1002 + *	skb_orphan - orphan a buffer
  3.1003 + *	@skb: buffer to orphan
  3.1004 + *
  3.1005 + *	If a buffer currently has an owner then we call the owner's
  3.1006 + *	destructor function and make the @skb unowned. The buffer continues
  3.1007 + *	to exist but is no longer charged to its former owner.
  3.1008 + */
  3.1009 +
  3.1010 +
  3.1011 +static inline void skb_orphan(struct sk_buff *skb)
  3.1012 +{
  3.1013 +	if (skb->destructor)
  3.1014 +		skb->destructor(skb);
  3.1015 +	skb->destructor = NULL;
  3.1016 +	skb->sk = NULL;
  3.1017 +}
  3.1018 +
  3.1019 +/**
  3.1020 + *	skb_purge - empty a list
  3.1021 + *	@list: list to empty
  3.1022 + *
  3.1023 + *	Delete all buffers on an &sk_buff list. Each buffer is removed from
  3.1024 + *	the list and one reference dropped. This function takes the list
  3.1025 + *	lock and is atomic with respect to other list locking functions.
  3.1026 + */
  3.1027 +
  3.1028 +
  3.1029 +static inline void skb_queue_purge(struct sk_buff_head *list)
  3.1030 +{
  3.1031 +	struct sk_buff *skb;
  3.1032 +	while ((skb=skb_dequeue(list))!=NULL)
  3.1033 +		kfree_skb(skb);
  3.1034 +}
  3.1035 +
  3.1036 +/**
  3.1037 + *	__skb_purge - empty a list
  3.1038 + *	@list: list to empty
  3.1039 + *
  3.1040 + *	Delete all buffers on an &sk_buff list. Each buffer is removed from
  3.1041 + *	the list and one reference dropped. This function does not take the
  3.1042 + *	list lock and the caller must hold the relevant locks to use it.
  3.1043 + */
  3.1044 +
  3.1045 +
  3.1046 +static inline void __skb_queue_purge(struct sk_buff_head *list)
  3.1047 +{
  3.1048 +	struct sk_buff *skb;
  3.1049 +	while ((skb=__skb_dequeue(list))!=NULL)
  3.1050 +		kfree_skb(skb);
  3.1051 +}
  3.1052 +
  3.1053 +/**
  3.1054 + *	__dev_alloc_skb - allocate an skbuff for sending
  3.1055 + *	@length: length to allocate
  3.1056 + *	@gfp_mask: get_free_pages mask, passed to alloc_skb
  3.1057 + *
  3.1058 + *	Allocate a new &sk_buff and assign it a usage count of one. The
  3.1059 + *	buffer has unspecified headroom built in. Users should allocate
  3.1060 + *	the headroom they think they need without accounting for the
  3.1061 + *	built in space. The built in space is used for optimisations.
  3.1062 + *
  3.1063 + *	%NULL is returned in there is no free memory.
  3.1064 + */
  3.1065 + 
  3.1066 +static inline struct sk_buff *__dev_alloc_skb(unsigned int length,
  3.1067 +					      int gfp_mask)
  3.1068 +{
  3.1069 +	struct sk_buff *skb;
  3.1070 +
  3.1071 +	//skb = alloc_skb(length+16, gfp_mask);
  3.1072 +        skb = alloc_zc_skb(length+16, gfp_mask);
  3.1073 +	if (skb)
  3.1074 +		skb_reserve(skb,16);
  3.1075 +	return skb;
  3.1076 +}
  3.1077 +
  3.1078 +/**
  3.1079 + *	dev_alloc_skb - allocate an skbuff for sending
  3.1080 + *	@length: length to allocate
  3.1081 + *
  3.1082 + *	Allocate a new &sk_buff and assign it a usage count of one. The
  3.1083 + *	buffer has unspecified headroom built in. Users should allocate
  3.1084 + *	the headroom they think they need without accounting for the
  3.1085 + *	built in space. The built in space is used for optimisations.
  3.1086 + *
  3.1087 + *	%NULL is returned in there is no free memory. Although this function
  3.1088 + *	allocates memory it can be called from an interrupt.
  3.1089 + */
  3.1090 + 
  3.1091 +static inline struct sk_buff *dev_alloc_skb(unsigned int length)
  3.1092 +{
  3.1093 +	return __dev_alloc_skb(length, GFP_ATOMIC);
  3.1094 +}
  3.1095 +
  3.1096 +/**
  3.1097 + *	skb_cow - copy header of skb when it is required
  3.1098 + *	@skb: buffer to cow
  3.1099 + *	@headroom: needed headroom
  3.1100 + *
  3.1101 + *	If the skb passed lacks sufficient headroom or its data part
  3.1102 + *	is shared, data is reallocated. If reallocation fails, an error
  3.1103 + *	is returned and original skb is not changed.
  3.1104 + *
  3.1105 + *	The result is skb with writable area skb->head...skb->tail
  3.1106 + *	and at least @headroom of space at head.
  3.1107 + */
  3.1108 +
  3.1109 +static inline int
  3.1110 +skb_cow(struct sk_buff *skb, unsigned int headroom)
  3.1111 +{
  3.1112 +	int delta = (headroom > 16 ? headroom : 16) - skb_headroom(skb);
  3.1113 +
  3.1114 +	if (delta < 0)
  3.1115 +		delta = 0;
  3.1116 +
  3.1117 +	if (delta || skb_cloned(skb))
  3.1118 +		return pskb_expand_head(skb, (delta+15)&~15, 0, GFP_ATOMIC);
  3.1119 +	return 0;
  3.1120 +}
  3.1121 +
  3.1122 +/**
  3.1123 + *	skb_linearize - convert paged skb to linear one
  3.1124 + *	@skb: buffer to linarize
  3.1125 + *	@gfp: allocation mode
  3.1126 + *
  3.1127 + *	If there is no free memory -ENOMEM is returned, otherwise zero
  3.1128 + *	is returned and the old skb data released.  */
  3.1129 +int skb_linearize(struct sk_buff *skb, int gfp);
  3.1130 +
  3.1131 +static inline void *kmap_skb_frag(const skb_frag_t *frag)
  3.1132 +{
  3.1133 +#ifdef CONFIG_HIGHMEM
  3.1134 +	if (in_irq())
  3.1135 +		BUG();
  3.1136 +
  3.1137 +	local_bh_disable();
  3.1138 +#endif
  3.1139 +	return kmap_atomic(frag->page, KM_SKB_DATA_SOFTIRQ);
  3.1140 +}
  3.1141 +
  3.1142 +static inline void kunmap_skb_frag(void *vaddr)
  3.1143 +{
  3.1144 +	kunmap_atomic(vaddr, KM_SKB_DATA_SOFTIRQ);
  3.1145 +#ifdef CONFIG_HIGHMEM
  3.1146 +	local_bh_enable();
  3.1147 +#endif
  3.1148 +}
  3.1149 +
  3.1150 +#define skb_queue_walk(queue, skb) \
  3.1151 +		for (skb = (queue)->next;			\
  3.1152 +		     (skb != (struct sk_buff *)(queue));	\
  3.1153 +		     skb=skb->next)
  3.1154 +
  3.1155 +
  3.1156 +extern struct sk_buff *		skb_recv_datagram(struct sock *sk,unsigned flags,int noblock, int *err);
  3.1157 +extern unsigned int		datagram_poll(struct file *file, struct socket *sock, struct poll_table_struct *wait);
  3.1158 +extern int			skb_copy_datagram(const struct sk_buff *from, int offset, char *to,int size);
  3.1159 +extern int			skb_copy_datagram_iovec(const struct sk_buff *from, int offset, struct iovec *to,int size);
  3.1160 +extern int			skb_copy_and_csum_datagram(const struct sk_buff *skb, int offset, u8 *to, int len, unsigned int *csump);
  3.1161 +extern int			skb_copy_and_csum_datagram_iovec(const struct sk_buff *skb, int hlen, struct iovec *iov);
  3.1162 +extern void			skb_free_datagram(struct sock * sk, struct sk_buff *skb);
  3.1163 +
  3.1164 +extern unsigned int		skb_checksum(const struct sk_buff *skb, int offset, int len, unsigned int csum);
  3.1165 +extern int			skb_copy_bits(const struct sk_buff *skb, int offset, void *to, int len);
  3.1166 +extern unsigned int		skb_copy_and_csum_bits(const struct sk_buff *skb, int offset, u8 *to, int len, unsigned int csum);
  3.1167 +extern void			skb_copy_and_csum_dev(const struct sk_buff *skb, u8 *to);
  3.1168 +
  3.1169 +extern void skb_init(void);
  3.1170 +extern void skb_add_mtu(int mtu);
  3.1171 +
  3.1172 +#ifdef CONFIG_NETFILTER
  3.1173 +static inline void
  3.1174 +nf_conntrack_put(struct nf_ct_info *nfct)
  3.1175 +{
  3.1176 +	if (nfct && atomic_dec_and_test(&nfct->master->use))
  3.1177 +		nfct->master->destroy(nfct->master);
  3.1178 +}
  3.1179 +static inline void
  3.1180 +nf_conntrack_get(struct nf_ct_info *nfct)
  3.1181 +{
  3.1182 +	if (nfct)
  3.1183 +		atomic_inc(&nfct->master->use);
  3.1184 +}
  3.1185 +#endif
  3.1186 +
  3.1187 +#endif	/* __KERNEL__ */
  3.1188 +#endif	/* _LINUX_SKBUFF_H */
     4.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     4.2 +++ b/xenolinux-2.4.16-sparse/net/core/skbuff.c	Thu Jan 30 12:15:49 2003 +0000
     4.3 @@ -0,0 +1,1366 @@
     4.4 +/*
     4.5 + *	Routines having to do with the 'struct sk_buff' memory handlers.
     4.6 + *
     4.7 + *	Authors:	Alan Cox <iiitac@pyr.swan.ac.uk>
     4.8 + *			Florian La Roche <rzsfl@rz.uni-sb.de>
     4.9 + *
    4.10 + *	Version:	$Id: skbuff.c,v 1.89 2001/08/06 13:25:02 davem Exp $
    4.11 + *
    4.12 + *	Fixes:	
    4.13 + *		Alan Cox	:	Fixed the worst of the load balancer bugs.
    4.14 + *		Dave Platt	:	Interrupt stacking fix.
    4.15 + *	Richard Kooijman	:	Timestamp fixes.
    4.16 + *		Alan Cox	:	Changed buffer format.
    4.17 + *		Alan Cox	:	destructor hook for AF_UNIX etc.
    4.18 + *		Linus Torvalds	:	Better skb_clone.
    4.19 + *		Alan Cox	:	Added skb_copy.
    4.20 + *		Alan Cox	:	Added all the changed routines Linus
    4.21 + *					only put in the headers
    4.22 + *		Ray VanTassle	:	Fixed --skb->lock in free
    4.23 + *		Alan Cox	:	skb_copy copy arp field
    4.24 + *		Andi Kleen	:	slabified it.
    4.25 + *
    4.26 + *	NOTE:
    4.27 + *		The __skb_ routines should be called with interrupts 
    4.28 + *	disabled, or you better be *real* sure that the operation is atomic 
    4.29 + *	with respect to whatever list is being frobbed (e.g. via lock_sock()
    4.30 + *	or via disabling bottom half handlers, etc).
    4.31 + *
    4.32 + *	This program is free software; you can redistribute it and/or
    4.33 + *	modify it under the terms of the GNU General Public License
    4.34 + *	as published by the Free Software Foundation; either version
    4.35 + *	2 of the License, or (at your option) any later version.
    4.36 + */
    4.37 +
    4.38 +/*
    4.39 + *	The functions in this file will not compile correctly with gcc 2.4.x
    4.40 + */
    4.41 +
    4.42 +#include <linux/config.h>
    4.43 +#include <linux/types.h>
    4.44 +#include <linux/kernel.h>
    4.45 +#include <linux/sched.h>
    4.46 +#include <linux/mm.h>
    4.47 +#include <linux/interrupt.h>
    4.48 +#include <linux/in.h>
    4.49 +#include <linux/inet.h>
    4.50 +#include <linux/slab.h>
    4.51 +#include <linux/netdevice.h>
    4.52 +#include <linux/string.h>
    4.53 +#include <linux/skbuff.h>
    4.54 +#include <linux/cache.h>
    4.55 +#include <linux/init.h>
    4.56 +#include <linux/highmem.h>
    4.57 +#include <linux/spinlock.h>
    4.58 +
    4.59 +#include <net/ip.h>
    4.60 +#include <net/protocol.h>
    4.61 +#include <net/dst.h>
    4.62 +#include <net/tcp.h>
    4.63 +#include <net/udp.h>
    4.64 +#include <net/sock.h>
    4.65 +
    4.66 +#include <asm/uaccess.h>
    4.67 +#include <asm/system.h>
    4.68 +
    4.69 +/* zc globals: */
    4.70 +char *net_page_chunk;
    4.71 +struct net_page_info *net_page_table;
    4.72 +struct list_head net_page_list;
    4.73 +spinlock_t net_page_list_lock = SPIN_LOCK_UNLOCKED;
    4.74 +unsigned int net_pages;
    4.75 +
    4.76 +
    4.77 +
    4.78 +int sysctl_hot_list_len = 128;
    4.79 +
    4.80 +static kmem_cache_t *skbuff_head_cache;
    4.81 +
    4.82 +static union {
    4.83 +	struct sk_buff_head	list;
    4.84 +	char			pad[SMP_CACHE_BYTES];
    4.85 +} skb_head_pool[NR_CPUS];
    4.86 +
    4.87 +/*
    4.88 + *	Keep out-of-line to prevent kernel bloat.
    4.89 + *	__builtin_return_address is not used because it is not always
    4.90 + *	reliable. 
    4.91 + */
    4.92 +
    4.93 +/**
    4.94 + *	skb_over_panic	- 	private function
    4.95 + *	@skb: buffer
    4.96 + *	@sz: size
    4.97 + *	@here: address
    4.98 + *
    4.99 + *	Out of line support code for skb_put(). Not user callable.
   4.100 + */
   4.101 + 
   4.102 +void skb_over_panic(struct sk_buff *skb, int sz, void *here)
   4.103 +{
   4.104 +	printk("skput:over: %p:%d put:%d dev:%s", 
   4.105 +		here, skb->len, sz, skb->dev ? skb->dev->name : "<NULL>");
   4.106 +	BUG();
   4.107 +}
   4.108 +
   4.109 +/**
   4.110 + *	skb_under_panic	- 	private function
   4.111 + *	@skb: buffer
   4.112 + *	@sz: size
   4.113 + *	@here: address
   4.114 + *
   4.115 + *	Out of line support code for skb_push(). Not user callable.
   4.116 + */
   4.117 + 
   4.118 +
   4.119 +void skb_under_panic(struct sk_buff *skb, int sz, void *here)
   4.120 +{
   4.121 +        printk("skput:under: %p:%d put:%d dev:%s",
   4.122 +                here, skb->len, sz, skb->dev ? skb->dev->name : "<NULL>");
   4.123 +	BUG();
   4.124 +}
   4.125 +
   4.126 +static __inline__ struct sk_buff *skb_head_from_pool(void)
   4.127 +{
   4.128 +	struct sk_buff_head *list = &skb_head_pool[smp_processor_id()].list;
   4.129 +
   4.130 +	if (skb_queue_len(list)) {
   4.131 +		struct sk_buff *skb;
   4.132 +		unsigned long flags;
   4.133 +
   4.134 +		local_irq_save(flags);
   4.135 +		skb = __skb_dequeue(list);
   4.136 +		local_irq_restore(flags);
   4.137 +		return skb;
   4.138 +	}
   4.139 +	return NULL;
   4.140 +}
   4.141 +
   4.142 +static __inline__ void skb_head_to_pool(struct sk_buff *skb)
   4.143 +{
   4.144 +	struct sk_buff_head *list = &skb_head_pool[smp_processor_id()].list;
   4.145 +
   4.146 +	if (skb_queue_len(list) < sysctl_hot_list_len) {
   4.147 +		unsigned long flags;
   4.148 +
   4.149 +		local_irq_save(flags);
   4.150 +		__skb_queue_head(list, skb);
   4.151 +		local_irq_restore(flags);
   4.152 +
   4.153 +		return;
   4.154 +	}
   4.155 +	kmem_cache_free(skbuff_head_cache, skb);
   4.156 +}
   4.157 +
   4.158 +
   4.159 +/* 	Allocate a new skbuff. We do this ourselves so we can fill in a few
   4.160 + *	'private' fields and also do memory statistics to find all the
   4.161 + *	[BEEP] leaks.
   4.162 + * 
   4.163 + */
   4.164 +
   4.165 +/**
   4.166 + *	alloc_skb	-	allocate a network buffer
   4.167 + *	@size: size to allocate
   4.168 + *	@gfp_mask: allocation mask
   4.169 + *
   4.170 + *	Allocate a new &sk_buff. The returned buffer has no headroom and a
   4.171 + *	tail room of size bytes. The object has a reference count of one.
   4.172 + *	The return is the buffer. On a failure the return is %NULL.
   4.173 + *
   4.174 + *	Buffers may only be allocated from interrupts using a @gfp_mask of
   4.175 + *	%GFP_ATOMIC.
   4.176 + */
   4.177 + 
   4.178 +struct sk_buff *alloc_skb(unsigned int size,int gfp_mask)
   4.179 +{
   4.180 +	struct sk_buff *skb;
   4.181 +	u8 *data;
   4.182 +
   4.183 +	if (in_interrupt() && (gfp_mask & __GFP_WAIT)) {
   4.184 +		static int count = 0;
   4.185 +		if (++count < 5) {
   4.186 +			printk(KERN_ERR "alloc_skb called nonatomically "
   4.187 +			       "from interrupt %p\n", NET_CALLER(size));
   4.188 + 			BUG();
   4.189 +		}
   4.190 +		gfp_mask &= ~__GFP_WAIT;
   4.191 +	}
   4.192 +
   4.193 +	/* Get the HEAD */
   4.194 +	skb = skb_head_from_pool();
   4.195 +	if (skb == NULL) {
   4.196 +		skb = kmem_cache_alloc(skbuff_head_cache, gfp_mask & ~__GFP_DMA);
   4.197 +		if (skb == NULL)
   4.198 +			goto nohead;
   4.199 +	}
   4.200 +
   4.201 +	/* Get the DATA. Size must match skb_add_mtu(). */
   4.202 +	size = SKB_DATA_ALIGN(size);
   4.203 +	data = kmalloc(size + sizeof(struct skb_shared_info), gfp_mask);
   4.204 +	if (data == NULL)
   4.205 +		goto nodata;
   4.206 +
   4.207 +	/* XXX: does not include slab overhead */ 
   4.208 +	skb->truesize = size + sizeof(struct sk_buff);
   4.209 +
   4.210 +	/* Load the data pointers. */
   4.211 +	skb->head = data;
   4.212 +	skb->data = data;
   4.213 +	skb->tail = data;
   4.214 +	skb->end = data + size;
   4.215 +
   4.216 +	/* Set up other state */
   4.217 +	skb->len = 0;
   4.218 +	skb->cloned = 0;
   4.219 +	skb->data_len = 0;
   4.220 +        skb->skb_type = SKB_NORMAL;
   4.221 +
   4.222 +	atomic_set(&skb->users, 1); 
   4.223 +	atomic_set(&(skb_shinfo(skb)->dataref), 1);
   4.224 +	skb_shinfo(skb)->nr_frags = 0;
   4.225 +	skb_shinfo(skb)->frag_list = NULL;
   4.226 +	return skb;
   4.227 +
   4.228 +nodata:
   4.229 +	skb_head_to_pool(skb);
   4.230 +nohead:
   4.231 +	return NULL;
   4.232 +}
   4.233 +
   4.234 +/* begin zc code additions: */
   4.235 +
   4.236 +void init_net_pages(unsigned long order_pages)
   4.237 +{
   4.238 +        int i;
   4.239 +        struct net_page_info *np;
   4.240 +        pgd_t *pgd; pmd_t *pmd; pte_t *ptep;
   4.241 +        unsigned long nr_pages = 1 << order_pages;
   4.242 +        
   4.243 +        net_page_chunk = (char *)__get_free_pages(GFP_KERNEL, order_pages);
   4.244 +        net_page_table = kmalloc(nr_pages * sizeof(struct net_page_info), GFP_KERNEL);
   4.245 +
   4.246 +        INIT_LIST_HEAD(&net_page_list);
   4.247 +
   4.248 +        for (i = 0; i < nr_pages; i++) 
   4.249 +        {
   4.250 +                np = net_page_table + i;
   4.251 +                np->virt_addr = (unsigned long)net_page_chunk + (i * PAGE_SIZE);
   4.252 +                
   4.253 +                // now fill the pte pointer:
   4.254 +                np->ppte = 0xdeadbeef;
   4.255 +                pgd = pgd_offset_k(np->virt_addr);
   4.256 +                if (!pgd_none(*pgd))
   4.257 +                {
   4.258 +                    pmd = pmd_offset(pgd, np->virt_addr);
   4.259 +                    if (!pmd_none(*pmd))
   4.260 +                    {
   4.261 +                            ptep = pte_offset(pmd, np->virt_addr);
   4.262 +                            np->ppte = (unsigned long)ptep; // neet to virt_to_phys this?
   4.263 +                    }
   4.264 +                }
   4.265 +
   4.266 +                list_add_tail(&np->list, &net_page_list);
   4.267 +        }
   4.268 +        net_pages = nr_pages;
   4.269 +        
   4.270 +
   4.271 +}
   4.272 +
   4.273 +struct net_page_info *get_net_page(void)
   4.274 +{
   4.275 +    struct list_head *list_ptr;
   4.276 +    struct net_page_info *np;
   4.277 +    unsigned long flags;
   4.278 +
   4.279 +    if (!net_pages) 
   4.280 +    {
   4.281 +            return NULL;
   4.282 +    }
   4.283 +    spin_lock_irqsave(&net_page_list_lock, flags);
   4.284 +    
   4.285 +    list_ptr = net_page_list.next;
   4.286 +    np = list_entry(list_ptr, struct net_page_info, list);
   4.287 +    list_del(&np->list);
   4.288 +    net_pages--;
   4.289 +    
   4.290 +    spin_unlock_irqrestore(&net_page_list_lock, flags);
   4.291 +    
   4.292 +    return np;
   4.293 +}
   4.294 +
   4.295 +void free_net_page(struct net_page_info *np)
   4.296 +{
   4.297 +    unsigned long flags;
   4.298 +  
   4.299 +    if (np == NULL) return;
   4.300 +    
   4.301 +    spin_lock_irqsave(&net_page_list_lock, flags);
   4.302 +    
   4.303 +    list_add_tail(&np->list, &net_page_list);
   4.304 +    net_pages++;
   4.305 +
   4.306 +    spin_unlock_irqrestore(&net_page_list_lock, flags);
   4.307 +}
   4.308 +
   4.309 +struct sk_buff *alloc_zc_skb(unsigned int size,int gfp_mask)
   4.310 +{
   4.311 +	struct sk_buff *skb;
   4.312 +	u8 *data;
   4.313 +
   4.314 +	if (in_interrupt() && (gfp_mask & __GFP_WAIT)) {
   4.315 +		static int count = 0;
   4.316 +		if (++count < 5) {
   4.317 +			printk(KERN_ERR "alloc_skb called nonatomically "
   4.318 +			       "from interrupt %p\n", NET_CALLER(size));
   4.319 + 			BUG();
   4.320 +		}
   4.321 +		gfp_mask &= ~__GFP_WAIT;
   4.322 +	}
   4.323 +
   4.324 +	/* Get the HEAD */
   4.325 +	skb = skb_head_from_pool();
   4.326 +	if (skb == NULL) {
   4.327 +		skb = kmem_cache_alloc(skbuff_head_cache, gfp_mask & ~__GFP_DMA);
   4.328 +		if (skb == NULL)
   4.329 +			goto nohead;
   4.330 +	}
   4.331 +
   4.332 +	/* Get the DATA. Size must match skb_add_mtu(). */
   4.333 +	size = SKB_DATA_ALIGN(size);
   4.334 +        if (size > PAGE_SIZE)
   4.335 +        {
   4.336 +                printk("alloc_zc_skb called with unruly size.\n");
   4.337 +                size = PAGE_SIZE;
   4.338 +        }
   4.339 +	skb->net_page = get_net_page();
   4.340 +        if (skb->net_page == NULL)
   4.341 +        {
   4.342 +                goto nodata;
   4.343 +        }
   4.344 +        data = (u8 *)skb->net_page->virt_addr;
   4.345 +	if (data == NULL)
   4.346 +		goto nodata;
   4.347 +	/* XXX: does not include slab overhead */ 
   4.348 +	skb->truesize = size + sizeof(struct sk_buff);
   4.349 +
   4.350 +	/* Load the data pointers. */
   4.351 +	skb->head = data;
   4.352 +	skb->data = data;
   4.353 +	skb->tail = data;
   4.354 +	skb->end = data + size;
   4.355 +
   4.356 +	/* Set up other state */
   4.357 +	skb->len = 0;
   4.358 +	skb->cloned = 0;
   4.359 +	skb->data_len = 0;
   4.360 +        skb->skb_type = SKB_ZERO_COPY;
   4.361 +
   4.362 +	atomic_set(&skb->users, 1); 
   4.363 +	atomic_set(&(skb_shinfo(skb)->dataref), 1);
   4.364 +	skb_shinfo(skb)->nr_frags = 0;
   4.365 +	skb_shinfo(skb)->frag_list = NULL;
   4.366 +	return skb;
   4.367 +
   4.368 +nodata:
   4.369 +	skb_head_to_pool(skb);
   4.370 +nohead:
   4.371 +	return NULL;
   4.372 +}
   4.373 +
   4.374 +/* end zc code additions: */
   4.375 +
   4.376 +/*
   4.377 + *	Slab constructor for a skb head. 
   4.378 + */ 
   4.379 +static inline void skb_headerinit(void *p, kmem_cache_t *cache, 
   4.380 +				  unsigned long flags)
   4.381 +{
   4.382 +	struct sk_buff *skb = p;
   4.383 +
   4.384 +	skb->next = NULL;
   4.385 +	skb->prev = NULL;
   4.386 +	skb->list = NULL;
   4.387 +	skb->sk = NULL;
   4.388 +	skb->stamp.tv_sec=0;	/* No idea about time */
   4.389 +	skb->dev = NULL;
   4.390 +	skb->dst = NULL;
   4.391 +	memset(skb->cb, 0, sizeof(skb->cb));
   4.392 +	skb->pkt_type = PACKET_HOST;	/* Default type */
   4.393 +	skb->ip_summed = 0;
   4.394 +	skb->priority = 0;
   4.395 +	skb->security = 0;	/* By default packets are insecure */
   4.396 +	skb->destructor = NULL;
   4.397 +
   4.398 +#ifdef CONFIG_NETFILTER
   4.399 +	skb->nfmark = skb->nfcache = 0;
   4.400 +	skb->nfct = NULL;
   4.401 +#ifdef CONFIG_NETFILTER_DEBUG
   4.402 +	skb->nf_debug = 0;
   4.403 +#endif
   4.404 +#endif
   4.405 +#ifdef CONFIG_NET_SCHED
   4.406 +	skb->tc_index = 0;
   4.407 +#endif
   4.408 +}
   4.409 +
   4.410 +static void skb_drop_fraglist(struct sk_buff *skb)
   4.411 +{
   4.412 +	struct sk_buff *list = skb_shinfo(skb)->frag_list;
   4.413 +
   4.414 +	skb_shinfo(skb)->frag_list = NULL;
   4.415 +
   4.416 +	do {
   4.417 +		struct sk_buff *this = list;
   4.418 +		list = list->next;
   4.419 +		kfree_skb(this);
   4.420 +	} while (list);
   4.421 +}
   4.422 +
   4.423 +static void skb_clone_fraglist(struct sk_buff *skb)
   4.424 +{
   4.425 +	struct sk_buff *list;
   4.426 +
   4.427 +	for (list = skb_shinfo(skb)->frag_list; list; list=list->next)
   4.428 +		skb_get(list);
   4.429 +}
   4.430 +
   4.431 +static void skb_release_data(struct sk_buff *skb)
   4.432 +{
   4.433 +	if (!skb->cloned ||
   4.434 +	    atomic_dec_and_test(&(skb_shinfo(skb)->dataref))) {
   4.435 +		if (skb_shinfo(skb)->nr_frags) {
   4.436 +			int i;
   4.437 +			for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
   4.438 +				put_page(skb_shinfo(skb)->frags[i].page);
   4.439 +		}
   4.440 +
   4.441 +		if (skb_shinfo(skb)->frag_list)
   4.442 +			skb_drop_fraglist(skb);
   4.443 +
   4.444 +                if (skb->skb_type == SKB_NORMAL)
   4.445 +                {
   4.446 +		    kfree(skb->head);
   4.447 +                } else {// SKB_ZERO_COPY
   4.448 +                    free_net_page(skb->net_page);
   4.449 +                }
   4.450 +	}
   4.451 +}
   4.452 +
   4.453 +/*
   4.454 + *	Free an skbuff by memory without cleaning the state. 
   4.455 + */
   4.456 +void kfree_skbmem(struct sk_buff *skb)
   4.457 +{
   4.458 +	skb_release_data(skb);
   4.459 +	skb_head_to_pool(skb);
   4.460 +}
   4.461 +
   4.462 +/**
   4.463 + *	__kfree_skb - private function 
   4.464 + *	@skb: buffer
   4.465 + *
   4.466 + *	Free an sk_buff. Release anything attached to the buffer. 
   4.467 + *	Clean the state. This is an internal helper function. Users should
   4.468 + *	always call kfree_skb
   4.469 + */
   4.470 +
   4.471 +void __kfree_skb(struct sk_buff *skb)
   4.472 +{
   4.473 +	if (skb->list) {
   4.474 +	 	printk(KERN_WARNING "Warning: kfree_skb passed an skb still "
   4.475 +		       "on a list (from %p).\n", NET_CALLER(skb));
   4.476 +		BUG();
   4.477 +	}
   4.478 +
   4.479 +	dst_release(skb->dst);
   4.480 +	if(skb->destructor) {
   4.481 +		if (in_irq()) {
   4.482 +			printk(KERN_WARNING "Warning: kfree_skb on hard IRQ %p\n",
   4.483 +				NET_CALLER(skb));
   4.484 +		}
   4.485 +		skb->destructor(skb);
   4.486 +	}
   4.487 +#ifdef CONFIG_NETFILTER
   4.488 +	nf_conntrack_put(skb->nfct);
   4.489 +#endif
   4.490 +	skb_headerinit(skb, NULL, 0);  /* clean state */
   4.491 +	kfree_skbmem(skb);
   4.492 +}
   4.493 +
   4.494 +/**
   4.495 + *	skb_clone	-	duplicate an sk_buff
   4.496 + *	@skb: buffer to clone
   4.497 + *	@gfp_mask: allocation priority
   4.498 + *
   4.499 + *	Duplicate an &sk_buff. The new one is not owned by a socket. Both
   4.500 + *	copies share the same packet data but not structure. The new
   4.501 + *	buffer has a reference count of 1. If the allocation fails the 
   4.502 + *	function returns %NULL otherwise the new buffer is returned.
   4.503 + *	
   4.504 + *	If this function is called from an interrupt gfp_mask() must be
   4.505 + *	%GFP_ATOMIC.
   4.506 + */
   4.507 +
   4.508 +struct sk_buff *skb_clone(struct sk_buff *skb, int gfp_mask)
   4.509 +{
   4.510 +	struct sk_buff *n;
   4.511 +
   4.512 +	n = skb_head_from_pool();
   4.513 +	if (!n) {
   4.514 +		n = kmem_cache_alloc(skbuff_head_cache, gfp_mask);
   4.515 +		if (!n)
   4.516 +			return NULL;
   4.517 +	}
   4.518 +
   4.519 +#define C(x) n->x = skb->x
   4.520 +
   4.521 +	n->next = n->prev = NULL;
   4.522 +	n->list = NULL;
   4.523 +	n->sk = NULL;
   4.524 +	C(stamp);
   4.525 +	C(dev);
   4.526 +	C(h);
   4.527 +	C(nh);
   4.528 +	C(mac);
   4.529 +	C(dst);
   4.530 +	dst_clone(n->dst);
   4.531 +	memcpy(n->cb, skb->cb, sizeof(skb->cb));
   4.532 +	C(len);
   4.533 +	C(data_len);
   4.534 +	C(csum);
   4.535 +	n->cloned = 1;
   4.536 +	C(pkt_type);
   4.537 +	C(ip_summed);
   4.538 +	C(priority);
   4.539 +	atomic_set(&n->users, 1);
   4.540 +	C(protocol);
   4.541 +	C(security);
   4.542 +	C(truesize);
   4.543 +	C(head);
   4.544 +	C(data);
   4.545 +	C(tail);
   4.546 +	C(end);
   4.547 +	n->destructor = NULL;
   4.548 +#ifdef CONFIG_NETFILTER
   4.549 +	C(nfmark);
   4.550 +	C(nfcache);
   4.551 +	C(nfct);
   4.552 +#ifdef CONFIG_NETFILTER_DEBUG
   4.553 +	C(nf_debug);
   4.554 +#endif
   4.555 +#endif /*CONFIG_NETFILTER*/
   4.556 +#if defined(CONFIG_HIPPI)
   4.557 +	C(private);
   4.558 +#endif
   4.559 +#ifdef CONFIG_NET_SCHED
   4.560 +	C(tc_index);
   4.561 +#endif
   4.562 +        C(skb_type);
   4.563 +        C(net_page);
   4.564 +	atomic_inc(&(skb_shinfo(skb)->dataref));
   4.565 +	skb->cloned = 1;
   4.566 +#ifdef CONFIG_NETFILTER
   4.567 +	nf_conntrack_get(skb->nfct);
   4.568 +#endif
   4.569 +	return n;
   4.570 +}
   4.571 +
   4.572 +static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
   4.573 +{
   4.574 +	/*
   4.575 +	 *	Shift between the two data areas in bytes
   4.576 +	 */
   4.577 +	unsigned long offset = new->data - old->data;
   4.578 +
   4.579 +	new->list=NULL;
   4.580 +	new->sk=NULL;
   4.581 +	new->dev=old->dev;
   4.582 +	new->priority=old->priority;
   4.583 +	new->protocol=old->protocol;
   4.584 +	new->dst=dst_clone(old->dst);
   4.585 +	new->h.raw=old->h.raw+offset;
   4.586 +	new->nh.raw=old->nh.raw+offset;
   4.587 +	new->mac.raw=old->mac.raw+offset;
   4.588 +	memcpy(new->cb, old->cb, sizeof(old->cb));
   4.589 +	atomic_set(&new->users, 1);
   4.590 +	new->pkt_type=old->pkt_type;
   4.591 +	new->stamp=old->stamp;
   4.592 +	new->destructor = NULL;
   4.593 +	new->security=old->security;
   4.594 +#ifdef CONFIG_NETFILTER
   4.595 +	new->nfmark=old->nfmark;
   4.596 +	new->nfcache=old->nfcache;
   4.597 +	new->nfct=old->nfct;
   4.598 +	nf_conntrack_get(new->nfct);
   4.599 +#ifdef CONFIG_NETFILTER_DEBUG
   4.600 +	new->nf_debug=old->nf_debug;
   4.601 +#endif
   4.602 +#endif
   4.603 +#ifdef CONFIG_NET_SCHED
   4.604 +	new->tc_index = old->tc_index;
   4.605 +#endif
   4.606 +}
   4.607 +
   4.608 +/**
   4.609 + *	skb_copy	-	create private copy of an sk_buff
   4.610 + *	@skb: buffer to copy
   4.611 + *	@gfp_mask: allocation priority
   4.612 + *
   4.613 + *	Make a copy of both an &sk_buff and its data. This is used when the
   4.614 + *	caller wishes to modify the data and needs a private copy of the 
   4.615 + *	data to alter. Returns %NULL on failure or the pointer to the buffer
   4.616 + *	on success. The returned buffer has a reference count of 1.
   4.617 + *
   4.618 + *	As by-product this function converts non-linear &sk_buff to linear
   4.619 + *	one, so that &sk_buff becomes completely private and caller is allowed
   4.620 + *	to modify all the data of returned buffer. This means that this
   4.621 + *	function is not recommended for use in circumstances when only
   4.622 + *	header is going to be modified. Use pskb_copy() instead.
   4.623 + */
   4.624 + 
   4.625 +struct sk_buff *skb_copy(const struct sk_buff *skb, int gfp_mask)
   4.626 +{
   4.627 +	struct sk_buff *n;
   4.628 +	int headerlen = skb->data-skb->head;
   4.629 +
   4.630 +	/*
   4.631 +	 *	Allocate the copy buffer
   4.632 +	 */
   4.633 +	n=alloc_skb(skb->end - skb->head + skb->data_len, gfp_mask);
   4.634 +	if(n==NULL)
   4.635 +		return NULL;
   4.636 +
   4.637 +	/* Set the data pointer */
   4.638 +	skb_reserve(n,headerlen);
   4.639 +	/* Set the tail pointer and length */
   4.640 +	skb_put(n,skb->len);
   4.641 +	n->csum = skb->csum;
   4.642 +	n->ip_summed = skb->ip_summed;
   4.643 +
   4.644 +	if (skb_copy_bits(skb, -headerlen, n->head, headerlen+skb->len))
   4.645 +		BUG();
   4.646 +
   4.647 +	copy_skb_header(n, skb);
   4.648 +
   4.649 +	return n;
   4.650 +}
   4.651 +
   4.652 +/* Keep head the same: replace data */
   4.653 +int skb_linearize(struct sk_buff *skb, int gfp_mask)
   4.654 +{
   4.655 +	unsigned int size;
   4.656 +	u8 *data;
   4.657 +	long offset;
   4.658 +	int headerlen = skb->data - skb->head;
   4.659 +	int expand = (skb->tail+skb->data_len) - skb->end;
   4.660 +
   4.661 +	if (skb_shared(skb))
   4.662 +		BUG();
   4.663 +
   4.664 +	if (expand <= 0)
   4.665 +		expand = 0;
   4.666 +
   4.667 +	size = (skb->end - skb->head + expand);
   4.668 +	size = SKB_DATA_ALIGN(size);
   4.669 +	data = kmalloc(size + sizeof(struct skb_shared_info), gfp_mask);
   4.670 +	if (data == NULL)
   4.671 +		return -ENOMEM;
   4.672 +
   4.673 +	/* Copy entire thing */
   4.674 +	if (skb_copy_bits(skb, -headerlen, data, headerlen+skb->len))
   4.675 +		BUG();
   4.676 +
   4.677 +	/* Offset between the two in bytes */
   4.678 +	offset = data - skb->head;
   4.679 +
   4.680 +	/* Free old data. */
   4.681 +	skb_release_data(skb);
   4.682 +
   4.683 +	skb->head = data;
   4.684 +	skb->end  = data + size;
   4.685 +
   4.686 +	/* Set up new pointers */
   4.687 +	skb->h.raw += offset;
   4.688 +	skb->nh.raw += offset;
   4.689 +	skb->mac.raw += offset;
   4.690 +	skb->tail += offset;
   4.691 +	skb->data += offset;
   4.692 +
   4.693 +	/* Set up shinfo */
   4.694 +	atomic_set(&(skb_shinfo(skb)->dataref), 1);
   4.695 +	skb_shinfo(skb)->nr_frags = 0;
   4.696 +	skb_shinfo(skb)->frag_list = NULL;
   4.697 +
   4.698 +	/* We are no longer a clone, even if we were. */
   4.699 +	skb->cloned = 0;
   4.700 +
   4.701 +	skb->tail += skb->data_len;
   4.702 +	skb->data_len = 0;
   4.703 +	return 0;
   4.704 +}
   4.705 +
   4.706 +
   4.707 +/**
   4.708 + *	pskb_copy	-	create copy of an sk_buff with private head.
   4.709 + *	@skb: buffer to copy
   4.710 + *	@gfp_mask: allocation priority
   4.711 + *
   4.712 + *	Make a copy of both an &sk_buff and part of its data, located
   4.713 + *	in header. Fragmented data remain shared. This is used when
   4.714 + *	the caller wishes to modify only header of &sk_buff and needs
   4.715 + *	private copy of the header to alter. Returns %NULL on failure
   4.716 + *	or the pointer to the buffer on success.
   4.717 + *	The returned buffer has a reference count of 1.
   4.718 + */
   4.719 +
   4.720 +struct sk_buff *pskb_copy(struct sk_buff *skb, int gfp_mask)
   4.721 +{
   4.722 +	struct sk_buff *n;
   4.723 +
   4.724 +	/*
   4.725 +	 *	Allocate the copy buffer
   4.726 +	 */
   4.727 +	n=alloc_skb(skb->end - skb->head, gfp_mask);
   4.728 +	if(n==NULL)
   4.729 +		return NULL;
   4.730 +
   4.731 +	/* Set the data pointer */
   4.732 +	skb_reserve(n,skb->data-skb->head);
   4.733 +	/* Set the tail pointer and length */
   4.734 +	skb_put(n,skb_headlen(skb));
   4.735 +	/* Copy the bytes */
   4.736 +	memcpy(n->data, skb->data, n->len);
   4.737 +	n->csum = skb->csum;
   4.738 +	n->ip_summed = skb->ip_summed;
   4.739 +
   4.740 +	n->data_len = skb->data_len;
   4.741 +	n->len = skb->len;
   4.742 +
   4.743 +	if (skb_shinfo(skb)->nr_frags) {
   4.744 +		int i;
   4.745 +
   4.746 +		for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
   4.747 +			skb_shinfo(n)->frags[i] = skb_shinfo(skb)->frags[i];
   4.748 +			get_page(skb_shinfo(n)->frags[i].page);
   4.749 +		}
   4.750 +		skb_shinfo(n)->nr_frags = i;
   4.751 +	}
   4.752 +
   4.753 +	if (skb_shinfo(skb)->frag_list) {
   4.754 +		skb_shinfo(n)->frag_list = skb_shinfo(skb)->frag_list;
   4.755 +		skb_clone_fraglist(n);
   4.756 +	}
   4.757 +
   4.758 +	copy_skb_header(n, skb);
   4.759 +
   4.760 +	return n;
   4.761 +}
   4.762 +
   4.763 +/**
   4.764 + *	pskb_expand_head - reallocate header of &sk_buff
   4.765 + *	@skb: buffer to reallocate
   4.766 + *	@nhead: room to add at head
   4.767 + *	@ntail: room to add at tail
   4.768 + *	@gfp_mask: allocation priority
   4.769 + *
   4.770 + *	Expands (or creates identical copy, if &nhead and &ntail are zero)
   4.771 + *	header of skb. &sk_buff itself is not changed. &sk_buff MUST have
   4.772 + *	reference count of 1. Returns zero in the case of success or error,
   4.773 + *	if expansion failed. In the last case, &sk_buff is not changed.
   4.774 + *
   4.775 + *	All the pointers pointing into skb header may change and must be
   4.776 + *	reloaded after call to this function.
   4.777 + */
   4.778 +
   4.779 +int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail, int gfp_mask)
   4.780 +{
   4.781 +	int i;
   4.782 +	u8 *data;
   4.783 +	int size = nhead + (skb->end - skb->head) + ntail;
   4.784 +	long off;
   4.785 +
   4.786 +	if (skb_shared(skb))
   4.787 +		BUG();
   4.788 +
   4.789 +	size = SKB_DATA_ALIGN(size);
   4.790 +
   4.791 +	data = kmalloc(size + sizeof(struct skb_shared_info), gfp_mask);
   4.792 +	if (data == NULL)
   4.793 +		goto nodata;
   4.794 +
   4.795 +	/* Copy only real data... and, alas, header. This should be
   4.796 +	 * optimized for the cases when header is void. */
   4.797 +	memcpy(data+nhead, skb->head, skb->tail-skb->head);
   4.798 +	memcpy(data+size, skb->end, sizeof(struct skb_shared_info));
   4.799 +
   4.800 +	for (i=0; i<skb_shinfo(skb)->nr_frags; i++)
   4.801 +		get_page(skb_shinfo(skb)->frags[i].page);
   4.802 +
   4.803 +	if (skb_shinfo(skb)->frag_list)
   4.804 +		skb_clone_fraglist(skb);
   4.805 +
   4.806 +	skb_release_data(skb);
   4.807 +
   4.808 +	off = (data+nhead) - skb->head;
   4.809 +
   4.810 +	skb->head = data;
   4.811 +	skb->end  = data+size;
   4.812 +
   4.813 +	skb->data += off;
   4.814 +	skb->tail += off;
   4.815 +	skb->mac.raw += off;
   4.816 +	skb->h.raw += off;
   4.817 +	skb->nh.raw += off;
   4.818 +	skb->cloned = 0;
   4.819 +	atomic_set(&skb_shinfo(skb)->dataref, 1);
   4.820 +	return 0;
   4.821 +
   4.822 +nodata:
   4.823 +	return -ENOMEM;
   4.824 +}
   4.825 +
   4.826 +/* Make private copy of skb with writable head and some headroom */
   4.827 +
   4.828 +struct sk_buff *
   4.829 +skb_realloc_headroom(struct sk_buff *skb, unsigned int headroom)
   4.830 +{
   4.831 +	struct sk_buff *skb2;
   4.832 +	int delta = headroom - skb_headroom(skb);
   4.833 +
   4.834 +	if (delta <= 0)
   4.835 +		return pskb_copy(skb, GFP_ATOMIC);
   4.836 +
   4.837 +	skb2 = skb_clone(skb, GFP_ATOMIC);
   4.838 +	if (skb2 == NULL ||
   4.839 +	    !pskb_expand_head(skb2, SKB_DATA_ALIGN(delta), 0, GFP_ATOMIC))
   4.840 +		return skb2;
   4.841 +
   4.842 +	kfree_skb(skb2);
   4.843 +	return NULL;
   4.844 +}
   4.845 +
   4.846 +
   4.847 +/**
   4.848 + *	skb_copy_expand	-	copy and expand sk_buff
   4.849 + *	@skb: buffer to copy
   4.850 + *	@newheadroom: new free bytes at head
   4.851 + *	@newtailroom: new free bytes at tail
   4.852 + *	@gfp_mask: allocation priority
   4.853 + *
   4.854 + *	Make a copy of both an &sk_buff and its data and while doing so 
   4.855 + *	allocate additional space.
   4.856 + *
   4.857 + *	This is used when the caller wishes to modify the data and needs a 
   4.858 + *	private copy of the data to alter as well as more space for new fields.
   4.859 + *	Returns %NULL on failure or the pointer to the buffer
   4.860 + *	on success. The returned buffer has a reference count of 1.
   4.861 + *
   4.862 + *	You must pass %GFP_ATOMIC as the allocation priority if this function
   4.863 + *	is called from an interrupt.
   4.864 + */
   4.865 + 
   4.866 +
   4.867 +struct sk_buff *skb_copy_expand(const struct sk_buff *skb,
   4.868 +				int newheadroom,
   4.869 +				int newtailroom,
   4.870 +				int gfp_mask)
   4.871 +{
   4.872 +	struct sk_buff *n;
   4.873 +
   4.874 +	/*
   4.875 +	 *	Allocate the copy buffer
   4.876 +	 */
   4.877 + 	 
   4.878 +	n=alloc_skb(newheadroom + skb->len + newtailroom,
   4.879 +		    gfp_mask);
   4.880 +	if(n==NULL)
   4.881 +		return NULL;
   4.882 +
   4.883 +	skb_reserve(n,newheadroom);
   4.884 +
   4.885 +	/* Set the tail pointer and length */
   4.886 +	skb_put(n,skb->len);
   4.887 +
   4.888 +	/* Copy the data only. */
   4.889 +	if (skb_copy_bits(skb, 0, n->data, skb->len))
   4.890 +		BUG();
   4.891 +
   4.892 +	copy_skb_header(n, skb);
   4.893 +	return n;
   4.894 +}
   4.895 +
   4.896 +/* Trims skb to length len. It can change skb pointers, if "realloc" is 1.
   4.897 + * If realloc==0 and trimming is impossible without change of data,
   4.898 + * it is BUG().
   4.899 + */
   4.900 +
   4.901 +int ___pskb_trim(struct sk_buff *skb, unsigned int len, int realloc)
   4.902 +{
   4.903 +	int offset = skb_headlen(skb);
   4.904 +	int nfrags = skb_shinfo(skb)->nr_frags;
   4.905 +	int i;
   4.906 +
   4.907 +	for (i=0; i<nfrags; i++) {
   4.908 +		int end = offset + skb_shinfo(skb)->frags[i].size;
   4.909 +		if (end > len) {
   4.910 +			if (skb_cloned(skb)) {
   4.911 +				if (!realloc)
   4.912 +					BUG();
   4.913 +				if (!pskb_expand_head(skb, 0, 0, GFP_ATOMIC))
   4.914 +					return -ENOMEM;
   4.915 +			}
   4.916 +			if (len <= offset) {
   4.917 +				put_page(skb_shinfo(skb)->frags[i].page);
   4.918 +				skb_shinfo(skb)->nr_frags--;
   4.919 +			} else {
   4.920 +				skb_shinfo(skb)->frags[i].size = len-offset;
   4.921 +			}
   4.922 +		}
   4.923 +		offset = end;
   4.924 +	}
   4.925 +
   4.926 +	if (offset < len) {
   4.927 +		skb->data_len -= skb->len - len;
   4.928 +		skb->len = len;
   4.929 +	} else {
   4.930 +		if (len <= skb_headlen(skb)) {
   4.931 +			skb->len = len;
   4.932 +			skb->data_len = 0;
   4.933 +			skb->tail = skb->data + len;
   4.934 +			if (skb_shinfo(skb)->frag_list && !skb_cloned(skb))
   4.935 +				skb_drop_fraglist(skb);
   4.936 +		} else {
   4.937 +			skb->data_len -= skb->len - len;
   4.938 +			skb->len = len;
   4.939 +		}
   4.940 +	}
   4.941 +
   4.942 +	return 0;
   4.943 +}
   4.944 +
   4.945 +/**
   4.946 + *	__pskb_pull_tail - advance tail of skb header 
   4.947 + *	@skb: buffer to reallocate
   4.948 + *	@delta: number of bytes to advance tail
   4.949 + *
   4.950 + *	The function makes a sense only on a fragmented &sk_buff,
   4.951 + *	it expands header moving its tail forward and copying necessary
   4.952 + *	data from fragmented part.
   4.953 + *
   4.954 + *	&sk_buff MUST have reference count of 1.
   4.955 + *
   4.956 + *	Returns %NULL (and &sk_buff does not change) if pull failed
   4.957 + *	or value of new tail of skb in the case of success.
   4.958 + *
   4.959 + *	All the pointers pointing into skb header may change and must be
   4.960 + *	reloaded after call to this function.
   4.961 + */
   4.962 +
   4.963 +/* Moves tail of skb head forward, copying data from fragmented part,
   4.964 + * when it is necessary.
   4.965 + * 1. It may fail due to malloc failure.
   4.966 + * 2. It may change skb pointers.
   4.967 + *
   4.968 + * It is pretty complicated. Luckily, it is called only in exceptional cases.
   4.969 + */
   4.970 +unsigned char * __pskb_pull_tail(struct sk_buff *skb, int delta)
   4.971 +{
   4.972 +	int i, k, eat;
   4.973 +
   4.974 +	/* If skb has not enough free space at tail, get new one
   4.975 +	 * plus 128 bytes for future expansions. If we have enough
   4.976 +	 * room at tail, reallocate without expansion only if skb is cloned.
   4.977 +	 */
   4.978 +	eat = (skb->tail+delta) - skb->end;
   4.979 +
   4.980 +	if (eat > 0 || skb_cloned(skb)) {
   4.981 +		if (pskb_expand_head(skb, 0, eat>0 ? eat+128 : 0, GFP_ATOMIC))
   4.982 +			return NULL;
   4.983 +	}
   4.984 +
   4.985 +	if (skb_copy_bits(skb, skb_headlen(skb), skb->tail, delta))
   4.986 +		BUG();
   4.987 +
   4.988 +	/* Optimization: no fragments, no reasons to preestimate
   4.989 +	 * size of pulled pages. Superb.
   4.990 +	 */
   4.991 +	if (skb_shinfo(skb)->frag_list == NULL)
   4.992 +		goto pull_pages;
   4.993 +
   4.994 +	/* Estimate size of pulled pages. */
   4.995 +	eat = delta;
   4.996 +	for (i=0; i<skb_shinfo(skb)->nr_frags; i++) {
   4.997 +		if (skb_shinfo(skb)->frags[i].size >= eat)
   4.998 +			goto pull_pages;
   4.999 +		eat -= skb_shinfo(skb)->frags[i].size;
  4.1000 +	}
  4.1001 +
  4.1002 +	/* If we need update frag list, we are in troubles.
  4.1003 +	 * Certainly, it possible to add an offset to skb data,
  4.1004 +	 * but taking into account that pulling is expected to
  4.1005 +	 * be very rare operation, it is worth to fight against
  4.1006 +	 * further bloating skb head and crucify ourselves here instead.
  4.1007 +	 * Pure masohism, indeed. 8)8)
  4.1008 +	 */
  4.1009 +	if (eat) {
  4.1010 +		struct sk_buff *list = skb_shinfo(skb)->frag_list;
  4.1011 +		struct sk_buff *clone = NULL;
  4.1012 +		struct sk_buff *insp = NULL;
  4.1013 +
  4.1014 +		do {
  4.1015 +			if (list == NULL)
  4.1016 +				BUG();
  4.1017 +
  4.1018 +			if (list->len <= eat) {
  4.1019 +				/* Eaten as whole. */
  4.1020 +				eat -= list->len;
  4.1021 +				list = list->next;
  4.1022 +				insp = list;
  4.1023 +			} else {
  4.1024 +				/* Eaten partially. */
  4.1025 +
  4.1026 +				if (skb_shared(list)) {
  4.1027 +					/* Sucks! We need to fork list. :-( */
  4.1028 +					clone = skb_clone(list, GFP_ATOMIC);
  4.1029 +					if (clone == NULL)
  4.1030 +						return NULL;
  4.1031 +					insp = list->next;
  4.1032 +					list = clone;
  4.1033 +				} else {
  4.1034 +					/* This may be pulled without
  4.1035 +					 * problems. */
  4.1036 +					insp = list;
  4.1037 +				}
  4.1038 +				if (pskb_pull(list, eat) == NULL) {
  4.1039 +					if (clone)
  4.1040 +						kfree_skb(clone);
  4.1041 +					return NULL;
  4.1042 +				}
  4.1043 +				break;
  4.1044 +			}
  4.1045 +		} while (eat);
  4.1046 +
  4.1047 +		/* Free pulled out fragments. */
  4.1048 +		while ((list = skb_shinfo(skb)->frag_list) != insp) {
  4.1049 +			skb_shinfo(skb)->frag_list = list->next;
  4.1050 +			kfree_skb(list);
  4.1051 +		}
  4.1052 +		/* And insert new clone at head. */
  4.1053 +		if (clone) {
  4.1054 +			clone->next = list;
  4.1055 +			skb_shinfo(skb)->frag_list = clone;
  4.1056 +		}
  4.1057 +	}
  4.1058 +	/* Success! Now we may commit changes to skb data. */
  4.1059 +
  4.1060 +pull_pages:
  4.1061 +	eat = delta;
  4.1062 +	k = 0;
  4.1063 +	for (i=0; i<skb_shinfo(skb)->nr_frags; i++) {
  4.1064 +		if (skb_shinfo(skb)->frags[i].size <= eat) {
  4.1065 +			put_page(skb_shinfo(skb)->frags[i].page);
  4.1066 +			eat -= skb_shinfo(skb)->frags[i].size;
  4.1067 +		} else {
  4.1068 +			skb_shinfo(skb)->frags[k] = skb_shinfo(skb)->frags[i];
  4.1069 +			if (eat) {
  4.1070 +				skb_shinfo(skb)->frags[k].page_offset += eat;
  4.1071 +				skb_shinfo(skb)->frags[k].size -= eat;
  4.1072 +				eat = 0;
  4.1073 +			}
  4.1074 +			k++;
  4.1075 +		}
  4.1076 +	}
  4.1077 +	skb_shinfo(skb)->nr_frags = k;
  4.1078 +
  4.1079 +	skb->tail += delta;
  4.1080 +	skb->data_len -= delta;
  4.1081 +
  4.1082 +	return skb->tail;
  4.1083 +}
  4.1084 +
  4.1085 +/* Copy some data bits from skb to kernel buffer. */
  4.1086 +
  4.1087 +int skb_copy_bits(const struct sk_buff *skb, int offset, void *to, int len)
  4.1088 +{
  4.1089 +	int i, copy;
  4.1090 +	int start = skb->len - skb->data_len;
  4.1091 +
  4.1092 +	if (offset > (int)skb->len-len)
  4.1093 +		goto fault;
  4.1094 +
  4.1095 +	/* Copy header. */
  4.1096 +	if ((copy = start-offset) > 0) {
  4.1097 +		if (copy > len)
  4.1098 +			copy = len;
  4.1099 +		memcpy(to, skb->data + offset, copy);
  4.1100 +		if ((len -= copy) == 0)
  4.1101 +			return 0;
  4.1102 +		offset += copy;
  4.1103 +		to += copy;
  4.1104 +	}
  4.1105 +
  4.1106 +	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
  4.1107 +		int end;
  4.1108 +
  4.1109 +		BUG_TRAP(start <= offset+len);
  4.1110 +
  4.1111 +		end = start + skb_shinfo(skb)->frags[i].size;
  4.1112 +		if ((copy = end-offset) > 0) {
  4.1113 +			u8 *vaddr;
  4.1114 +
  4.1115 +			if (copy > len)
  4.1116 +				copy = len;
  4.1117 +
  4.1118 +			vaddr = kmap_skb_frag(&skb_shinfo(skb)->frags[i]);
  4.1119 +			memcpy(to, vaddr+skb_shinfo(skb)->frags[i].page_offset+
  4.1120 +			       offset-start, copy);
  4.1121 +			kunmap_skb_frag(vaddr);
  4.1122 +
  4.1123 +			if ((len -= copy) == 0)
  4.1124 +				return 0;
  4.1125 +			offset += copy;
  4.1126 +			to += copy;
  4.1127 +		}
  4.1128 +		start = end;
  4.1129 +	}
  4.1130 +
  4.1131 +	if (skb_shinfo(skb)->frag_list) {
  4.1132 +		struct sk_buff *list;
  4.1133 +
  4.1134 +		for (list = skb_shinfo(skb)->frag_list; list; list=list->next) {
  4.1135 +			int end;
  4.1136 +
  4.1137 +			BUG_TRAP(start <= offset+len);
  4.1138 +
  4.1139 +			end = start + list->len;
  4.1140 +			if ((copy = end-offset) > 0) {
  4.1141 +				if (copy > len)
  4.1142 +					copy = len;
  4.1143 +				if (skb_copy_bits(list, offset-start, to, copy))
  4.1144 +					goto fault;
  4.1145 +				if ((len -= copy) == 0)
  4.1146 +					return 0;
  4.1147 +				offset += copy;
  4.1148 +				to += copy;
  4.1149 +			}
  4.1150 +			start = end;
  4.1151 +		}
  4.1152 +	}
  4.1153 +	if (len == 0)
  4.1154 +		return 0;
  4.1155 +
  4.1156 +fault:
  4.1157 +	return -EFAULT;
  4.1158 +}
  4.1159 +
  4.1160 +/* Checksum skb data. */
  4.1161 +
  4.1162 +unsigned int skb_checksum(const struct sk_buff *skb, int offset, int len, unsigned int csum)
  4.1163 +{
  4.1164 +	int i, copy;
  4.1165 +	int start = skb->len - skb->data_len;
  4.1166 +	int pos = 0;
  4.1167 +
  4.1168 +	/* Checksum header. */
  4.1169 +	if ((copy = start-offset) > 0) {
  4.1170 +		if (copy > len)
  4.1171 +			copy = len;
  4.1172 +		csum = csum_partial(skb->data+offset, copy, csum);
  4.1173 +		if ((len -= copy) == 0)
  4.1174 +			return csum;
  4.1175 +		offset += copy;
  4.1176 +		pos = copy;
  4.1177 +	}
  4.1178 +
  4.1179 +	for (i=0; i<skb_shinfo(skb)->nr_frags; i++) {
  4.1180 +		int end;
  4.1181 +
  4.1182 +		BUG_TRAP(start <= offset+len);
  4.1183 +
  4.1184 +		end = start + skb_shinfo(skb)->frags[i].size;
  4.1185 +		if ((copy = end-offset) > 0) {
  4.1186 +			unsigned int csum2;
  4.1187 +			u8 *vaddr;
  4.1188 +			skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
  4.1189 +
  4.1190 +			if (copy > len)
  4.1191 +				copy = len;
  4.1192 +			vaddr = kmap_skb_frag(frag);
  4.1193 +			csum2 = csum_partial(vaddr + frag->page_offset +
  4.1194 +					     offset-start, copy, 0);
  4.1195 +			kunmap_skb_frag(vaddr);
  4.1196 +			csum = csum_block_add(csum, csum2, pos);
  4.1197 +			if (!(len -= copy))
  4.1198 +				return csum;
  4.1199 +			offset += copy;
  4.1200 +			pos += copy;
  4.1201 +		}
  4.1202 +		start = end;
  4.1203 +	}
  4.1204 +
  4.1205 +	if (skb_shinfo(skb)->frag_list) {
  4.1206 +		struct sk_buff *list;
  4.1207 +
  4.1208 +		for (list = skb_shinfo(skb)->frag_list; list; list=list->next) {
  4.1209 +			int end;
  4.1210 +
  4.1211 +			BUG_TRAP(start <= offset+len);
  4.1212 +
  4.1213 +			end = start + list->len;
  4.1214 +			if ((copy = end-offset) > 0) {
  4.1215 +				unsigned int csum2;
  4.1216 +				if (copy > len)
  4.1217 +					copy = len;
  4.1218 +				csum2 = skb_checksum(list, offset-start, copy, 0);
  4.1219 +				csum = csum_block_add(csum, csum2, pos);
  4.1220 +				if ((len -= copy) == 0)
  4.1221 +					return csum;
  4.1222 +				offset += copy;
  4.1223 +				pos += copy;
  4.1224 +			}
  4.1225 +			start = end;
  4.1226 +		}
  4.1227 +	}
  4.1228 +	if (len == 0)
  4.1229 +		return csum;
  4.1230 +
  4.1231 +	BUG();
  4.1232 +	return csum;
  4.1233 +}
  4.1234 +
  4.1235 +/* Both of above in one bottle. */
  4.1236 +
  4.1237 +unsigned int skb_copy_and_csum_bits(const struct sk_buff *skb, int offset, u8 *to, int len, unsigned int csum)
  4.1238 +{
  4.1239 +	int i, copy;
  4.1240 +	int start = skb->len - skb->data_len;
  4.1241 +	int pos = 0;
  4.1242 +
  4.1243 +	/* Copy header. */
  4.1244 +	if ((copy = start-offset) > 0) {
  4.1245 +		if (copy > len)
  4.1246 +			copy = len;
  4.1247 +		csum = csum_partial_copy_nocheck(skb->data+offset, to, copy, csum);
  4.1248 +		if ((len -= copy) == 0)
  4.1249 +			return csum;
  4.1250 +		offset += copy;
  4.1251 +		to += copy;
  4.1252 +		pos = copy;
  4.1253 +	}
  4.1254 +
  4.1255 +	for (i=0; i<skb_shinfo(skb)->nr_frags; i++) {
  4.1256 +		int end;
  4.1257 +
  4.1258 +		BUG_TRAP(start <= offset+len);
  4.1259 +
  4.1260 +		end = start + skb_shinfo(skb)->frags[i].size;
  4.1261 +		if ((copy = end-offset) > 0) {
  4.1262 +			unsigned int csum2;
  4.1263 +			u8 *vaddr;
  4.1264 +			skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
  4.1265 +
  4.1266 +			if (copy > len)
  4.1267 +				copy = len;
  4.1268 +			vaddr = kmap_skb_frag(frag);
  4.1269 +			csum2 = csum_partial_copy_nocheck(vaddr + frag->page_offset +
  4.1270 +						      offset-start, to, copy, 0);
  4.1271 +			kunmap_skb_frag(vaddr);
  4.1272 +			csum = csum_block_add(csum, csum2, pos);
  4.1273 +			if (!(len -= copy))
  4.1274 +				return csum;
  4.1275 +			offset += copy;
  4.1276 +			to += copy;
  4.1277 +			pos += copy;
  4.1278 +		}
  4.1279 +		start = end;
  4.1280 +	}
  4.1281 +
  4.1282 +	if (skb_shinfo(skb)->frag_list) {
  4.1283 +		struct sk_buff *list;
  4.1284 +
  4.1285 +		for (list = skb_shinfo(skb)->frag_list; list; list=list->next) {
  4.1286 +			unsigned int csum2;
  4.1287 +			int end;
  4.1288 +
  4.1289 +			BUG_TRAP(start <= offset+len);
  4.1290 +
  4.1291 +			end = start + list->len;
  4.1292 +			if ((copy = end-offset) > 0) {
  4.1293 +				if (copy > len)
  4.1294 +					copy = len;
  4.1295 +				csum2 = skb_copy_and_csum_bits(list, offset-start, to, copy, 0);
  4.1296 +				csum = csum_block_add(csum, csum2, pos);
  4.1297 +				if ((len -= copy) == 0)
  4.1298 +					return csum;
  4.1299 +				offset += copy;
  4.1300 +				to += copy;
  4.1301 +				pos += copy;
  4.1302 +			}
  4.1303 +			start = end;
  4.1304 +		}
  4.1305 +	}
  4.1306 +	if (len == 0)
  4.1307 +		return csum;
  4.1308 +
  4.1309 +	BUG();
  4.1310 +	return csum;
  4.1311 +}
  4.1312 +
  4.1313 +void skb_copy_and_csum_dev(const struct sk_buff *skb, u8 *to)
  4.1314 +{
  4.1315 +	unsigned int csum;
  4.1316 +	long csstart;
  4.1317 +
  4.1318 +	if (skb->ip_summed == CHECKSUM_HW)
  4.1319 +		csstart = skb->h.raw - skb->data;
  4.1320 +	else
  4.1321 +		csstart = skb->len - skb->data_len;
  4.1322 +
  4.1323 +	if (csstart > skb->len - skb->data_len)
  4.1324 +		BUG();
  4.1325 +
  4.1326 +	memcpy(to, skb->data, csstart);
  4.1327 +
  4.1328 +	csum = 0;
  4.1329 +	if (csstart != skb->len)
  4.1330 +		csum = skb_copy_and_csum_bits(skb, csstart, to+csstart,
  4.1331 +				skb->len-csstart, 0);
  4.1332 +
  4.1333 +	if (skb->ip_summed == CHECKSUM_HW) {
  4.1334 +		long csstuff = csstart + skb->csum;
  4.1335 +
  4.1336 +		*((unsigned short *)(to + csstuff)) = csum_fold(csum);
  4.1337 +	}
  4.1338 +}
  4.1339 +
  4.1340 +#if 0
  4.1341 +/* 
  4.1342 + * 	Tune the memory allocator for a new MTU size.
  4.1343 + */
  4.1344 +void skb_add_mtu(int mtu)
  4.1345 +{
  4.1346 +	/* Must match allocation in alloc_skb */
  4.1347 +	mtu = SKB_DATA_ALIGN(mtu) + sizeof(struct skb_shared_info);
  4.1348 +
  4.1349 +	kmem_add_cache_size(mtu);
  4.1350 +}
  4.1351 +#endif
  4.1352 +
  4.1353 +void __init skb_init(void)
  4.1354 +{
  4.1355 +	int i;
  4.1356 +
  4.1357 +	skbuff_head_cache = kmem_cache_create("skbuff_head_cache",
  4.1358 +					      sizeof(struct sk_buff),
  4.1359 +					      0,
  4.1360 +					      SLAB_HWCACHE_ALIGN,
  4.1361 +					      skb_headerinit, NULL);
  4.1362 +	if (!skbuff_head_cache)
  4.1363 +		panic("cannot create skbuff cache");
  4.1364 +
  4.1365 +        init_net_pages(NUM_NET_PAGES);
  4.1366 +
  4.1367 +	for (i=0; i<NR_CPUS; i++)
  4.1368 +		skb_queue_head_init(&skb_head_pool[i].list);
  4.1369 +}