direct-io.hg

changeset 2508:359b48f3b4f8

bitkeeper revision 1.1159.79.10 (41505c58w6Hk7xUbFnsEdE5r462MrQ)

Rewrite our skb cache. Fix frontend driver to ask for sensibly-sized
skbuffs.
author kaf24@freefall.cl.cam.ac.uk
date Tue Sep 21 16:52:40 2004 +0000 (2004-09-21)
parents 37953cf6f0dd
children ae5765bc62c2
files .rootkeys linux-2.4.27-xen-sparse/include/linux/skbuff.h linux-2.4.27-xen-sparse/net/core/skbuff.c linux-2.6.8.1-xen-sparse/arch/xen/kernel/skbuff.c linux-2.6.8.1-xen-sparse/drivers/xen/netback/netback.c linux-2.6.8.1-xen-sparse/drivers/xen/netfront/netfront.c linux-2.6.8.1-xen-sparse/include/linux/skbuff.h linux-2.6.8.1-xen-sparse/net/core/skbuff.c
line diff
     1.1 --- a/.rootkeys	Tue Sep 21 13:05:21 2004 +0000
     1.2 +++ b/.rootkeys	Tue Sep 21 16:52:40 2004 +0000
     1.3 @@ -126,6 +126,7 @@ 3e5a4e681xMPdF9xCMwpyfuYMySU5g linux-2.4
     1.4  409ba2e7akOFqQUg6Qyg2s28xcXiMg linux-2.4.27-xen-sparse/mm/page_alloc.c
     1.5  3e5a4e683HKVU-sxtagrDasRB8eBVw linux-2.4.27-xen-sparse/mm/swapfile.c
     1.6  41180721bNns9Na7w1nJ0ZVt8bhUNA linux-2.4.27-xen-sparse/mm/vmalloc.c
     1.7 +41505c57WAd5l1rlfCLNSCpx9J13vA linux-2.4.27-xen-sparse/net/core/skbuff.c
     1.8  413aa1d0oNP8HXLvfPuMe6cSroUfSA linux-2.6.8.1-patches/agpgart.patch
     1.9  413aa1d0ewvSv-ohnNnQQNGsbPTTNA linux-2.6.8.1-patches/drm.patch
    1.10  40f562372u3A7_kfbYYixPHJJxYUxA linux-2.6.8.1-xen-sparse/arch/xen/Kconfig
    1.11 @@ -259,6 +260,7 @@ 4124f66f4NaKNa0xPiGGykn9QaZk3w linux-2.6
    1.12  40f56a0ddHCSs3501MY4hRf22tctOw linux-2.6.8.1-xen-sparse/mkbuildtree
    1.13  412f46c0LJuKAgSPGoC0Z1DEkLfuLA linux-2.6.8.1-xen-sparse/mm/memory.c
    1.14  410a94a4KT6I6X0LVc7djB39tRDp4g linux-2.6.8.1-xen-sparse/mm/page_alloc.c
    1.15 +41505c572m-s9ATiO1LiD1GPznTTIg linux-2.6.8.1-xen-sparse/net/core/skbuff.c
    1.16  4149ec79wMpIHdvbntxqVGLRZZjPxw linux-2.6.8.1-xen-sparse/net/ipv4/raw.c
    1.17  413cb1e4zst25MDYjg63Y-NGC5_pLg netbsd-2.0-xen-sparse/Makefile
    1.18  413cb1e5c_Mkxf_X0zimEhTKI_l4DA netbsd-2.0-xen-sparse/mkbuildtree
     2.1 --- a/linux-2.4.27-xen-sparse/include/linux/skbuff.h	Tue Sep 21 13:05:21 2004 +0000
     2.2 +++ b/linux-2.4.27-xen-sparse/include/linux/skbuff.h	Tue Sep 21 16:52:40 2004 +0000
     2.3 @@ -231,6 +231,7 @@ struct sk_buff {
     2.4  
     2.5  extern void			__kfree_skb(struct sk_buff *skb);
     2.6  extern struct sk_buff *		alloc_skb(unsigned int size, int priority);
     2.7 +extern struct sk_buff *		alloc_skb_from_cache(kmem_cache_t *cp, unsigned int size, int priority);
     2.8  extern void			kfree_skbmem(struct sk_buff *skb);
     2.9  extern struct sk_buff *		skb_clone(struct sk_buff *skb, int priority);
    2.10  extern struct sk_buff *		skb_copy(const struct sk_buff *skb, int priority);
     3.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     3.2 +++ b/linux-2.4.27-xen-sparse/net/core/skbuff.c	Tue Sep 21 16:52:40 2004 +0000
     3.3 @@ -0,0 +1,1309 @@
     3.4 +/*
     3.5 + *	Routines having to do with the 'struct sk_buff' memory handlers.
     3.6 + *
     3.7 + *	Authors:	Alan Cox <iiitac@pyr.swan.ac.uk>
     3.8 + *			Florian La Roche <rzsfl@rz.uni-sb.de>
     3.9 + *
    3.10 + *	Version:	$Id: skbuff.c,v 1.90 2001/11/07 05:56:19 davem Exp $
    3.11 + *
    3.12 + *	Fixes:	
    3.13 + *		Alan Cox	:	Fixed the worst of the load balancer bugs.
    3.14 + *		Dave Platt	:	Interrupt stacking fix.
    3.15 + *	Richard Kooijman	:	Timestamp fixes.
    3.16 + *		Alan Cox	:	Changed buffer format.
    3.17 + *		Alan Cox	:	destructor hook for AF_UNIX etc.
    3.18 + *		Linus Torvalds	:	Better skb_clone.
    3.19 + *		Alan Cox	:	Added skb_copy.
    3.20 + *		Alan Cox	:	Added all the changed routines Linus
    3.21 + *					only put in the headers
    3.22 + *		Ray VanTassle	:	Fixed --skb->lock in free
    3.23 + *		Alan Cox	:	skb_copy copy arp field
    3.24 + *		Andi Kleen	:	slabified it.
    3.25 + *
    3.26 + *	NOTE:
    3.27 + *		The __skb_ routines should be called with interrupts 
    3.28 + *	disabled, or you better be *real* sure that the operation is atomic 
    3.29 + *	with respect to whatever list is being frobbed (e.g. via lock_sock()
    3.30 + *	or via disabling bottom half handlers, etc).
    3.31 + *
    3.32 + *	This program is free software; you can redistribute it and/or
    3.33 + *	modify it under the terms of the GNU General Public License
    3.34 + *	as published by the Free Software Foundation; either version
    3.35 + *	2 of the License, or (at your option) any later version.
    3.36 + */
    3.37 +
    3.38 +/*
    3.39 + *	The functions in this file will not compile correctly with gcc 2.4.x
    3.40 + */
    3.41 +
    3.42 +#include <linux/config.h>
    3.43 +#include <linux/types.h>
    3.44 +#include <linux/kernel.h>
    3.45 +#include <linux/sched.h>
    3.46 +#include <linux/mm.h>
    3.47 +#include <linux/interrupt.h>
    3.48 +#include <linux/in.h>
    3.49 +#include <linux/inet.h>
    3.50 +#include <linux/slab.h>
    3.51 +#include <linux/netdevice.h>
    3.52 +#include <linux/string.h>
    3.53 +#include <linux/skbuff.h>
    3.54 +#include <linux/cache.h>
    3.55 +#include <linux/rtnetlink.h>
    3.56 +#include <linux/init.h>
    3.57 +#include <linux/highmem.h>
    3.58 +
    3.59 +#include <net/protocol.h>
    3.60 +#include <net/dst.h>
    3.61 +#include <net/sock.h>
    3.62 +#include <net/checksum.h>
    3.63 +
    3.64 +#include <asm/uaccess.h>
    3.65 +#include <asm/system.h>
    3.66 +
    3.67 +int sysctl_hot_list_len = 128;
    3.68 +
    3.69 +static kmem_cache_t *skbuff_head_cache;
    3.70 +
    3.71 +static union {
    3.72 +	struct sk_buff_head	list;
    3.73 +	char			pad[SMP_CACHE_BYTES];
    3.74 +} skb_head_pool[NR_CPUS];
    3.75 +
    3.76 +/*
    3.77 + *	Keep out-of-line to prevent kernel bloat.
    3.78 + *	__builtin_return_address is not used because it is not always
    3.79 + *	reliable. 
    3.80 + */
    3.81 +
    3.82 +/**
    3.83 + *	skb_over_panic	- 	private function
    3.84 + *	@skb: buffer
    3.85 + *	@sz: size
    3.86 + *	@here: address
    3.87 + *
    3.88 + *	Out of line support code for skb_put(). Not user callable.
    3.89 + */
    3.90 + 
    3.91 +void skb_over_panic(struct sk_buff *skb, int sz, void *here)
    3.92 +{
    3.93 +	printk("skput:over: %p:%d put:%d dev:%s", 
    3.94 +		here, skb->len, sz, skb->dev ? skb->dev->name : "<NULL>");
    3.95 +	BUG();
    3.96 +}
    3.97 +
    3.98 +/**
    3.99 + *	skb_under_panic	- 	private function
   3.100 + *	@skb: buffer
   3.101 + *	@sz: size
   3.102 + *	@here: address
   3.103 + *
   3.104 + *	Out of line support code for skb_push(). Not user callable.
   3.105 + */
   3.106 + 
   3.107 +
   3.108 +void skb_under_panic(struct sk_buff *skb, int sz, void *here)
   3.109 +{
   3.110 +        printk("skput:under: %p:%d put:%d dev:%s",
   3.111 +                here, skb->len, sz, skb->dev ? skb->dev->name : "<NULL>");
   3.112 +	BUG();
   3.113 +}
   3.114 +
   3.115 +static __inline__ struct sk_buff *skb_head_from_pool(void)
   3.116 +{
   3.117 +	struct sk_buff_head *list = &skb_head_pool[smp_processor_id()].list;
   3.118 +
   3.119 +	if (skb_queue_len(list)) {
   3.120 +		struct sk_buff *skb;
   3.121 +		unsigned long flags;
   3.122 +
   3.123 +		local_irq_save(flags);
   3.124 +		skb = __skb_dequeue(list);
   3.125 +		local_irq_restore(flags);
   3.126 +		return skb;
   3.127 +	}
   3.128 +	return NULL;
   3.129 +}
   3.130 +
   3.131 +static __inline__ void skb_head_to_pool(struct sk_buff *skb)
   3.132 +{
   3.133 +	struct sk_buff_head *list = &skb_head_pool[smp_processor_id()].list;
   3.134 +
   3.135 +	if (skb_queue_len(list) < sysctl_hot_list_len) {
   3.136 +		unsigned long flags;
   3.137 +
   3.138 +		local_irq_save(flags);
   3.139 +		__skb_queue_head(list, skb);
   3.140 +		local_irq_restore(flags);
   3.141 +
   3.142 +		return;
   3.143 +	}
   3.144 +	kmem_cache_free(skbuff_head_cache, skb);
   3.145 +}
   3.146 +
   3.147 +
   3.148 +/* 	Allocate a new skbuff. We do this ourselves so we can fill in a few
   3.149 + *	'private' fields and also do memory statistics to find all the
   3.150 + *	[BEEP] leaks.
   3.151 + * 
   3.152 + */
   3.153 +
   3.154 +/**
   3.155 + *	alloc_skb	-	allocate a network buffer
   3.156 + *	@size: size to allocate
   3.157 + *	@gfp_mask: allocation mask
   3.158 + *
   3.159 + *	Allocate a new &sk_buff. The returned buffer has no headroom and a
   3.160 + *	tail room of size bytes. The object has a reference count of one.
   3.161 + *	The return is the buffer. On a failure the return is %NULL.
   3.162 + *
   3.163 + *	Buffers may only be allocated from interrupts using a @gfp_mask of
   3.164 + *	%GFP_ATOMIC.
   3.165 + */
   3.166 + 
   3.167 +struct sk_buff *alloc_skb(unsigned int size,int gfp_mask)
   3.168 +{
   3.169 +	struct sk_buff *skb;
   3.170 +	u8 *data;
   3.171 +
   3.172 +	if (in_interrupt() && (gfp_mask & __GFP_WAIT)) {
   3.173 +		static int count = 0;
   3.174 +		if (++count < 5) {
   3.175 +			printk(KERN_ERR "alloc_skb called nonatomically "
   3.176 +			       "from interrupt %p\n", NET_CALLER(size));
   3.177 + 			BUG();
   3.178 +		}
   3.179 +		gfp_mask &= ~__GFP_WAIT;
   3.180 +	}
   3.181 +
   3.182 +	/* Get the HEAD */
   3.183 +	skb = skb_head_from_pool();
   3.184 +	if (skb == NULL) {
   3.185 +		skb = kmem_cache_alloc(skbuff_head_cache, gfp_mask & ~__GFP_DMA);
   3.186 +		if (skb == NULL)
   3.187 +			goto nohead;
   3.188 +	}
   3.189 +
   3.190 +	/* Get the DATA. Size must match skb_add_mtu(). */
   3.191 +	size = SKB_DATA_ALIGN(size);
   3.192 +	data = kmalloc(size + sizeof(struct skb_shared_info), gfp_mask);
   3.193 +	if (data == NULL)
   3.194 +		goto nodata;
   3.195 +
   3.196 +	/* XXX: does not include slab overhead */ 
   3.197 +	skb->truesize = size + sizeof(struct sk_buff);
   3.198 +
   3.199 +	/* Load the data pointers. */
   3.200 +	skb->head = data;
   3.201 +	skb->data = data;
   3.202 +	skb->tail = data;
   3.203 +	skb->end = data + size;
   3.204 +
   3.205 +	/* Set up other state */
   3.206 +	skb->len = 0;
   3.207 +	skb->cloned = 0;
   3.208 +	skb->data_len = 0;
   3.209 +
   3.210 +	atomic_set(&skb->users, 1); 
   3.211 +	atomic_set(&(skb_shinfo(skb)->dataref), 1);
   3.212 +	skb_shinfo(skb)->nr_frags = 0;
   3.213 +	skb_shinfo(skb)->frag_list = NULL;
   3.214 +	return skb;
   3.215 +
   3.216 +nodata:
   3.217 +	skb_head_to_pool(skb);
   3.218 +nohead:
   3.219 +	return NULL;
   3.220 +}
   3.221 +
   3.222 +/**
   3.223 + *	alloc_skb_from_cache	-	allocate a network buffer
   3.224 + *	@cp: kmem_cache from which to allocate the data area
   3.225 + *           (object size must be big enough for @size bytes + skb overheads)
   3.226 + *	@size: size to allocate
   3.227 + *	@gfp_mask: allocation mask
   3.228 + *
   3.229 + *	Allocate a new &sk_buff. The returned buffer has no headroom and a
   3.230 + *	tail room of size bytes. The object has a reference count of one.
   3.231 + *	The return is the buffer. On a failure the return is %NULL.
   3.232 + *
   3.233 + *	Buffers may only be allocated from interrupts using a @gfp_mask of
   3.234 + *	%GFP_ATOMIC.
   3.235 + */
   3.236 + 
   3.237 +struct sk_buff *alloc_skb_from_cache(kmem_cache_t *cp,
   3.238 +				     unsigned int size, int gfp_mask)
   3.239 +{
   3.240 +	struct sk_buff *skb;
   3.241 +	u8 *data;
   3.242 +
   3.243 +	if (in_interrupt() && (gfp_mask & __GFP_WAIT)) {
   3.244 +		static int count = 0;
   3.245 +		if (++count < 5) {
   3.246 +			printk(KERN_ERR "alloc_skb called nonatomically "
   3.247 +			       "from interrupt %p\n", NET_CALLER(size));
   3.248 + 			BUG();
   3.249 +		}
   3.250 +		gfp_mask &= ~__GFP_WAIT;
   3.251 +	}
   3.252 +
   3.253 +	/* Get the HEAD */
   3.254 +	skb = skb_head_from_pool();
   3.255 +	if (skb == NULL) {
   3.256 +		skb = kmem_cache_alloc(skbuff_head_cache, gfp_mask & ~__GFP_DMA);
   3.257 +		if (skb == NULL)
   3.258 +			goto nohead;
   3.259 +	}
   3.260 +
   3.261 +	/* Get the DATA. */
   3.262 +	size = SKB_DATA_ALIGN(size);
   3.263 +	data = kmem_cache_alloc(cp, gfp_mask);
   3.264 +	if (data == NULL)
   3.265 +		goto nodata;
   3.266 +
   3.267 +	/* XXX: does not include slab overhead */ 
   3.268 +	skb->truesize = size + sizeof(struct sk_buff);
   3.269 +
   3.270 +	/* Load the data pointers. */
   3.271 +	skb->head = data;
   3.272 +	skb->data = data;
   3.273 +	skb->tail = data;
   3.274 +	skb->end = data + size;
   3.275 +
   3.276 +	/* Set up other state */
   3.277 +	skb->len = 0;
   3.278 +	skb->cloned = 0;
   3.279 +	skb->data_len = 0;
   3.280 +
   3.281 +	atomic_set(&skb->users, 1); 
   3.282 +	atomic_set(&(skb_shinfo(skb)->dataref), 1);
   3.283 +	skb_shinfo(skb)->nr_frags = 0;
   3.284 +	skb_shinfo(skb)->frag_list = NULL;
   3.285 +	return skb;
   3.286 +
   3.287 +nodata:
   3.288 +	skb_head_to_pool(skb);
   3.289 +nohead:
   3.290 +	return NULL;
   3.291 +}
   3.292 +
   3.293 +
   3.294 +/*
   3.295 + *	Slab constructor for a skb head. 
   3.296 + */ 
   3.297 +static inline void skb_headerinit(void *p, kmem_cache_t *cache, 
   3.298 +				  unsigned long flags)
   3.299 +{
   3.300 +	struct sk_buff *skb = p;
   3.301 +
   3.302 +	skb->next = NULL;
   3.303 +	skb->prev = NULL;
   3.304 +	skb->list = NULL;
   3.305 +	skb->sk = NULL;
   3.306 +	skb->stamp.tv_sec=0;	/* No idea about time */
   3.307 +	skb->dev = NULL;
   3.308 +	skb->real_dev = NULL;
   3.309 +	skb->dst = NULL;
   3.310 +	memset(skb->cb, 0, sizeof(skb->cb));
   3.311 +	skb->pkt_type = PACKET_HOST;	/* Default type */
   3.312 +	skb->ip_summed = 0;
   3.313 +	skb->priority = 0;
   3.314 +	skb->security = 0;	/* By default packets are insecure */
   3.315 +	skb->destructor = NULL;
   3.316 +
   3.317 +#ifdef CONFIG_NETFILTER
   3.318 +	skb->nfmark = skb->nfcache = 0;
   3.319 +	skb->nfct = NULL;
   3.320 +#ifdef CONFIG_NETFILTER_DEBUG
   3.321 +	skb->nf_debug = 0;
   3.322 +#endif
   3.323 +#endif
   3.324 +#ifdef CONFIG_NET_SCHED
   3.325 +	skb->tc_index = 0;
   3.326 +#endif
   3.327 +}
   3.328 +
   3.329 +static void skb_drop_fraglist(struct sk_buff *skb)
   3.330 +{
   3.331 +	struct sk_buff *list = skb_shinfo(skb)->frag_list;
   3.332 +
   3.333 +	skb_shinfo(skb)->frag_list = NULL;
   3.334 +
   3.335 +	do {
   3.336 +		struct sk_buff *this = list;
   3.337 +		list = list->next;
   3.338 +		kfree_skb(this);
   3.339 +	} while (list);
   3.340 +}
   3.341 +
   3.342 +static void skb_clone_fraglist(struct sk_buff *skb)
   3.343 +{
   3.344 +	struct sk_buff *list;
   3.345 +
   3.346 +	for (list = skb_shinfo(skb)->frag_list; list; list=list->next)
   3.347 +		skb_get(list);
   3.348 +}
   3.349 +
   3.350 +static void skb_release_data(struct sk_buff *skb)
   3.351 +{
   3.352 +	if (!skb->cloned ||
   3.353 +	    atomic_dec_and_test(&(skb_shinfo(skb)->dataref))) {
   3.354 +		if (skb_shinfo(skb)->nr_frags) {
   3.355 +			int i;
   3.356 +			for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
   3.357 +				put_page(skb_shinfo(skb)->frags[i].page);
   3.358 +		}
   3.359 +
   3.360 +		if (skb_shinfo(skb)->frag_list)
   3.361 +			skb_drop_fraglist(skb);
   3.362 +
   3.363 +		kfree(skb->head);
   3.364 +	}
   3.365 +}
   3.366 +
   3.367 +/*
   3.368 + *	Free an skbuff by memory without cleaning the state. 
   3.369 + */
   3.370 +void kfree_skbmem(struct sk_buff *skb)
   3.371 +{
   3.372 +	skb_release_data(skb);
   3.373 +	skb_head_to_pool(skb);
   3.374 +}
   3.375 +
   3.376 +/**
   3.377 + *	__kfree_skb - private function 
   3.378 + *	@skb: buffer
   3.379 + *
   3.380 + *	Free an sk_buff. Release anything attached to the buffer. 
   3.381 + *	Clean the state. This is an internal helper function. Users should
   3.382 + *	always call kfree_skb
   3.383 + */
   3.384 +
   3.385 +void __kfree_skb(struct sk_buff *skb)
   3.386 +{
   3.387 +	if (skb->list) {
   3.388 +	 	printk(KERN_WARNING "Warning: kfree_skb passed an skb still "
   3.389 +		       "on a list (from %p).\n", NET_CALLER(skb));
   3.390 +		BUG();
   3.391 +	}
   3.392 +
   3.393 +	dst_release(skb->dst);
   3.394 +	if(skb->destructor) {
   3.395 +		if (in_irq()) {
   3.396 +			printk(KERN_WARNING "Warning: kfree_skb on hard IRQ %p\n",
   3.397 +				NET_CALLER(skb));
   3.398 +		}
   3.399 +		skb->destructor(skb);
   3.400 +	}
   3.401 +#ifdef CONFIG_NETFILTER
   3.402 +	nf_conntrack_put(skb->nfct);
   3.403 +#endif
   3.404 +	skb_headerinit(skb, NULL, 0);  /* clean state */
   3.405 +	kfree_skbmem(skb);
   3.406 +}
   3.407 +
   3.408 +/**
   3.409 + *	skb_clone	-	duplicate an sk_buff
   3.410 + *	@skb: buffer to clone
   3.411 + *	@gfp_mask: allocation priority
   3.412 + *
   3.413 + *	Duplicate an &sk_buff. The new one is not owned by a socket. Both
   3.414 + *	copies share the same packet data but not structure. The new
   3.415 + *	buffer has a reference count of 1. If the allocation fails the 
   3.416 + *	function returns %NULL otherwise the new buffer is returned.
   3.417 + *	
   3.418 + *	If this function is called from an interrupt gfp_mask() must be
   3.419 + *	%GFP_ATOMIC.
   3.420 + */
   3.421 +
   3.422 +struct sk_buff *skb_clone(struct sk_buff *skb, int gfp_mask)
   3.423 +{
   3.424 +	struct sk_buff *n;
   3.425 +
   3.426 +	n = skb_head_from_pool();
   3.427 +	if (!n) {
   3.428 +		n = kmem_cache_alloc(skbuff_head_cache, gfp_mask);
   3.429 +		if (!n)
   3.430 +			return NULL;
   3.431 +	}
   3.432 +
   3.433 +#define C(x) n->x = skb->x
   3.434 +
   3.435 +	n->next = n->prev = NULL;
   3.436 +	n->list = NULL;
   3.437 +	n->sk = NULL;
   3.438 +	C(stamp);
   3.439 +	C(dev);
   3.440 +	C(real_dev);
   3.441 +	C(h);
   3.442 +	C(nh);
   3.443 +	C(mac);
   3.444 +	C(dst);
   3.445 +	dst_clone(n->dst);
   3.446 +	memcpy(n->cb, skb->cb, sizeof(skb->cb));
   3.447 +	C(len);
   3.448 +	C(data_len);
   3.449 +	C(csum);
   3.450 +	n->cloned = 1;
   3.451 +	C(pkt_type);
   3.452 +	C(ip_summed);
   3.453 +	C(priority);
   3.454 +	atomic_set(&n->users, 1);
   3.455 +	C(protocol);
   3.456 +	C(security);
   3.457 +	C(truesize);
   3.458 +	C(head);
   3.459 +	C(data);
   3.460 +	C(tail);
   3.461 +	C(end);
   3.462 +	n->destructor = NULL;
   3.463 +#ifdef CONFIG_NETFILTER
   3.464 +	C(nfmark);
   3.465 +	C(nfcache);
   3.466 +	C(nfct);
   3.467 +#ifdef CONFIG_NETFILTER_DEBUG
   3.468 +	C(nf_debug);
   3.469 +#endif
   3.470 +#endif /*CONFIG_NETFILTER*/
   3.471 +#if defined(CONFIG_HIPPI)
   3.472 +	C(private);
   3.473 +#endif
   3.474 +#ifdef CONFIG_NET_SCHED
   3.475 +	C(tc_index);
   3.476 +#endif
   3.477 +
   3.478 +	atomic_inc(&(skb_shinfo(skb)->dataref));
   3.479 +	skb->cloned = 1;
   3.480 +#ifdef CONFIG_NETFILTER
   3.481 +	nf_conntrack_get(skb->nfct);
   3.482 +#endif
   3.483 +	return n;
   3.484 +}
   3.485 +
   3.486 +static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
   3.487 +{
   3.488 +	/*
   3.489 +	 *	Shift between the two data areas in bytes
   3.490 +	 */
   3.491 +	unsigned long offset = new->data - old->data;
   3.492 +
   3.493 +	new->list=NULL;
   3.494 +	new->sk=NULL;
   3.495 +	new->dev=old->dev;
   3.496 +	new->real_dev=old->real_dev;
   3.497 +	new->priority=old->priority;
   3.498 +	new->protocol=old->protocol;
   3.499 +	new->dst=dst_clone(old->dst);
   3.500 +	new->h.raw=old->h.raw+offset;
   3.501 +	new->nh.raw=old->nh.raw+offset;
   3.502 +	new->mac.raw=old->mac.raw+offset;
   3.503 +	memcpy(new->cb, old->cb, sizeof(old->cb));
   3.504 +	atomic_set(&new->users, 1);
   3.505 +	new->pkt_type=old->pkt_type;
   3.506 +	new->stamp=old->stamp;
   3.507 +	new->destructor = NULL;
   3.508 +	new->security=old->security;
   3.509 +#ifdef CONFIG_NETFILTER
   3.510 +	new->nfmark=old->nfmark;
   3.511 +	new->nfcache=old->nfcache;
   3.512 +	new->nfct=old->nfct;
   3.513 +	nf_conntrack_get(new->nfct);
   3.514 +#ifdef CONFIG_NETFILTER_DEBUG
   3.515 +	new->nf_debug=old->nf_debug;
   3.516 +#endif
   3.517 +#endif
   3.518 +#ifdef CONFIG_NET_SCHED
   3.519 +	new->tc_index = old->tc_index;
   3.520 +#endif
   3.521 +}
   3.522 +
   3.523 +/**
   3.524 + *	skb_copy	-	create private copy of an sk_buff
   3.525 + *	@skb: buffer to copy
   3.526 + *	@gfp_mask: allocation priority
   3.527 + *
   3.528 + *	Make a copy of both an &sk_buff and its data. This is used when the
   3.529 + *	caller wishes to modify the data and needs a private copy of the 
   3.530 + *	data to alter. Returns %NULL on failure or the pointer to the buffer
   3.531 + *	on success. The returned buffer has a reference count of 1.
   3.532 + *
   3.533 + *	As by-product this function converts non-linear &sk_buff to linear
   3.534 + *	one, so that &sk_buff becomes completely private and caller is allowed
   3.535 + *	to modify all the data of returned buffer. This means that this
   3.536 + *	function is not recommended for use in circumstances when only
   3.537 + *	header is going to be modified. Use pskb_copy() instead.
   3.538 + */
   3.539 + 
   3.540 +struct sk_buff *skb_copy(const struct sk_buff *skb, int gfp_mask)
   3.541 +{
   3.542 +	struct sk_buff *n;
   3.543 +	int headerlen = skb->data-skb->head;
   3.544 +
   3.545 +	/*
   3.546 +	 *	Allocate the copy buffer
   3.547 +	 */
   3.548 +	n=alloc_skb(skb->end - skb->head + skb->data_len, gfp_mask);
   3.549 +	if(n==NULL)
   3.550 +		return NULL;
   3.551 +
   3.552 +	/* Set the data pointer */
   3.553 +	skb_reserve(n,headerlen);
   3.554 +	/* Set the tail pointer and length */
   3.555 +	skb_put(n,skb->len);
   3.556 +	n->csum = skb->csum;
   3.557 +	n->ip_summed = skb->ip_summed;
   3.558 +
   3.559 +	if (skb_copy_bits(skb, -headerlen, n->head, headerlen+skb->len))
   3.560 +		BUG();
   3.561 +
   3.562 +	copy_skb_header(n, skb);
   3.563 +
   3.564 +	return n;
   3.565 +}
   3.566 +
   3.567 +/* Keep head the same: replace data */
   3.568 +int skb_linearize(struct sk_buff *skb, int gfp_mask)
   3.569 +{
   3.570 +	unsigned int size;
   3.571 +	u8 *data;
   3.572 +	long offset;
   3.573 +	int headerlen = skb->data - skb->head;
   3.574 +	int expand = (skb->tail+skb->data_len) - skb->end;
   3.575 +
   3.576 +	if (skb_shared(skb))
   3.577 +		BUG();
   3.578 +
   3.579 +	if (expand <= 0)
   3.580 +		expand = 0;
   3.581 +
   3.582 +	size = (skb->end - skb->head + expand);
   3.583 +	size = SKB_DATA_ALIGN(size);
   3.584 +	data = kmalloc(size + sizeof(struct skb_shared_info), gfp_mask);
   3.585 +	if (data == NULL)
   3.586 +		return -ENOMEM;
   3.587 +
   3.588 +	/* Copy entire thing */
   3.589 +	if (skb_copy_bits(skb, -headerlen, data, headerlen+skb->len))
   3.590 +		BUG();
   3.591 +
   3.592 +	/* Offset between the two in bytes */
   3.593 +	offset = data - skb->head;
   3.594 +
   3.595 +	/* Free old data. */
   3.596 +	skb_release_data(skb);
   3.597 +
   3.598 +	skb->head = data;
   3.599 +	skb->end  = data + size;
   3.600 +
   3.601 +	/* Set up new pointers */
   3.602 +	skb->h.raw += offset;
   3.603 +	skb->nh.raw += offset;
   3.604 +	skb->mac.raw += offset;
   3.605 +	skb->tail += offset;
   3.606 +	skb->data += offset;
   3.607 +
   3.608 +	/* Set up shinfo */
   3.609 +	atomic_set(&(skb_shinfo(skb)->dataref), 1);
   3.610 +	skb_shinfo(skb)->nr_frags = 0;
   3.611 +	skb_shinfo(skb)->frag_list = NULL;
   3.612 +
   3.613 +	/* We are no longer a clone, even if we were. */
   3.614 +	skb->cloned = 0;
   3.615 +
   3.616 +	skb->tail += skb->data_len;
   3.617 +	skb->data_len = 0;
   3.618 +	return 0;
   3.619 +}
   3.620 +
   3.621 +
   3.622 +/**
   3.623 + *	pskb_copy	-	create copy of an sk_buff with private head.
   3.624 + *	@skb: buffer to copy
   3.625 + *	@gfp_mask: allocation priority
   3.626 + *
   3.627 + *	Make a copy of both an &sk_buff and part of its data, located
   3.628 + *	in header. Fragmented data remain shared. This is used when
   3.629 + *	the caller wishes to modify only header of &sk_buff and needs
   3.630 + *	private copy of the header to alter. Returns %NULL on failure
   3.631 + *	or the pointer to the buffer on success.
   3.632 + *	The returned buffer has a reference count of 1.
   3.633 + */
   3.634 +
   3.635 +struct sk_buff *pskb_copy(struct sk_buff *skb, int gfp_mask)
   3.636 +{
   3.637 +	struct sk_buff *n;
   3.638 +
   3.639 +	/*
   3.640 +	 *	Allocate the copy buffer
   3.641 +	 */
   3.642 +	n=alloc_skb(skb->end - skb->head, gfp_mask);
   3.643 +	if(n==NULL)
   3.644 +		return NULL;
   3.645 +
   3.646 +	/* Set the data pointer */
   3.647 +	skb_reserve(n,skb->data-skb->head);
   3.648 +	/* Set the tail pointer and length */
   3.649 +	skb_put(n,skb_headlen(skb));
   3.650 +	/* Copy the bytes */
   3.651 +	memcpy(n->data, skb->data, n->len);
   3.652 +	n->csum = skb->csum;
   3.653 +	n->ip_summed = skb->ip_summed;
   3.654 +
   3.655 +	n->data_len = skb->data_len;
   3.656 +	n->len = skb->len;
   3.657 +
   3.658 +	if (skb_shinfo(skb)->nr_frags) {
   3.659 +		int i;
   3.660 +
   3.661 +		for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
   3.662 +			skb_shinfo(n)->frags[i] = skb_shinfo(skb)->frags[i];
   3.663 +			get_page(skb_shinfo(n)->frags[i].page);
   3.664 +		}
   3.665 +		skb_shinfo(n)->nr_frags = i;
   3.666 +	}
   3.667 +
   3.668 +	if (skb_shinfo(skb)->frag_list) {
   3.669 +		skb_shinfo(n)->frag_list = skb_shinfo(skb)->frag_list;
   3.670 +		skb_clone_fraglist(n);
   3.671 +	}
   3.672 +
   3.673 +	copy_skb_header(n, skb);
   3.674 +
   3.675 +	return n;
   3.676 +}
   3.677 +
   3.678 +/**
   3.679 + *	pskb_expand_head - reallocate header of &sk_buff
   3.680 + *	@skb: buffer to reallocate
   3.681 + *	@nhead: room to add at head
   3.682 + *	@ntail: room to add at tail
   3.683 + *	@gfp_mask: allocation priority
   3.684 + *
   3.685 + *	Expands (or creates identical copy, if &nhead and &ntail are zero)
   3.686 + *	header of skb. &sk_buff itself is not changed. &sk_buff MUST have
   3.687 + *	reference count of 1. Returns zero in the case of success or error,
   3.688 + *	if expansion failed. In the last case, &sk_buff is not changed.
   3.689 + *
   3.690 + *	All the pointers pointing into skb header may change and must be
   3.691 + *	reloaded after call to this function.
   3.692 + */
   3.693 +
   3.694 +int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail, int gfp_mask)
   3.695 +{
   3.696 +	int i;
   3.697 +	u8 *data;
   3.698 +	int size = nhead + (skb->end - skb->head) + ntail;
   3.699 +	long off;
   3.700 +
   3.701 +	if (skb_shared(skb))
   3.702 +		BUG();
   3.703 +
   3.704 +	size = SKB_DATA_ALIGN(size);
   3.705 +
   3.706 +	data = kmalloc(size + sizeof(struct skb_shared_info), gfp_mask);
   3.707 +	if (data == NULL)
   3.708 +		goto nodata;
   3.709 +
   3.710 +	/* Copy only real data... and, alas, header. This should be
   3.711 +	 * optimized for the cases when header is void. */
   3.712 +	memcpy(data+nhead, skb->head, skb->tail-skb->head);
   3.713 +	memcpy(data+size, skb->end, sizeof(struct skb_shared_info));
   3.714 +
   3.715 +	for (i=0; i<skb_shinfo(skb)->nr_frags; i++)
   3.716 +		get_page(skb_shinfo(skb)->frags[i].page);
   3.717 +
   3.718 +	if (skb_shinfo(skb)->frag_list)
   3.719 +		skb_clone_fraglist(skb);
   3.720 +
   3.721 +	skb_release_data(skb);
   3.722 +
   3.723 +	off = (data+nhead) - skb->head;
   3.724 +
   3.725 +	skb->head = data;
   3.726 +	skb->end  = data+size;
   3.727 +
   3.728 +	skb->data += off;
   3.729 +	skb->tail += off;
   3.730 +	skb->mac.raw += off;
   3.731 +	skb->h.raw += off;
   3.732 +	skb->nh.raw += off;
   3.733 +	skb->cloned = 0;
   3.734 +	atomic_set(&skb_shinfo(skb)->dataref, 1);
   3.735 +	return 0;
   3.736 +
   3.737 +nodata:
   3.738 +	return -ENOMEM;
   3.739 +}
   3.740 +
   3.741 +/* Make private copy of skb with writable head and some headroom */
   3.742 +
   3.743 +struct sk_buff *
   3.744 +skb_realloc_headroom(struct sk_buff *skb, unsigned int headroom)
   3.745 +{
   3.746 +	struct sk_buff *skb2;
   3.747 +	int delta = headroom - skb_headroom(skb);
   3.748 +
   3.749 +	if (delta <= 0)
   3.750 +		return pskb_copy(skb, GFP_ATOMIC);
   3.751 +
   3.752 +	skb2 = skb_clone(skb, GFP_ATOMIC);
   3.753 +	if (skb2 == NULL ||
   3.754 +	    !pskb_expand_head(skb2, SKB_DATA_ALIGN(delta), 0, GFP_ATOMIC))
   3.755 +		return skb2;
   3.756 +
   3.757 +	kfree_skb(skb2);
   3.758 +	return NULL;
   3.759 +}
   3.760 +
   3.761 +
   3.762 +/**
   3.763 + *	skb_copy_expand	-	copy and expand sk_buff
   3.764 + *	@skb: buffer to copy
   3.765 + *	@newheadroom: new free bytes at head
   3.766 + *	@newtailroom: new free bytes at tail
   3.767 + *	@gfp_mask: allocation priority
   3.768 + *
   3.769 + *	Make a copy of both an &sk_buff and its data and while doing so 
   3.770 + *	allocate additional space.
   3.771 + *
   3.772 + *	This is used when the caller wishes to modify the data and needs a 
   3.773 + *	private copy of the data to alter as well as more space for new fields.
   3.774 + *	Returns %NULL on failure or the pointer to the buffer
   3.775 + *	on success. The returned buffer has a reference count of 1.
   3.776 + *
   3.777 + *	You must pass %GFP_ATOMIC as the allocation priority if this function
   3.778 + *	is called from an interrupt.
   3.779 + */
   3.780 + 
   3.781 +
   3.782 +struct sk_buff *skb_copy_expand(const struct sk_buff *skb,
   3.783 +				int newheadroom,
   3.784 +				int newtailroom,
   3.785 +				int gfp_mask)
   3.786 +{
   3.787 +	struct sk_buff *n;
   3.788 +
   3.789 +	/*
   3.790 +	 *	Allocate the copy buffer
   3.791 +	 */
   3.792 + 	 
   3.793 +	n=alloc_skb(newheadroom + skb->len + newtailroom,
   3.794 +		    gfp_mask);
   3.795 +	if(n==NULL)
   3.796 +		return NULL;
   3.797 +
   3.798 +	skb_reserve(n,newheadroom);
   3.799 +
   3.800 +	/* Set the tail pointer and length */
   3.801 +	skb_put(n,skb->len);
   3.802 +
   3.803 +	/* Copy the data only. */
   3.804 +	if (skb_copy_bits(skb, 0, n->data, skb->len))
   3.805 +		BUG();
   3.806 +
   3.807 +	copy_skb_header(n, skb);
   3.808 +	return n;
   3.809 +}
   3.810 +
   3.811 +/**
   3.812 + *	skb_pad			-	zero pad the tail of an skb
   3.813 + *	@skb: buffer to pad
   3.814 + *	@pad: space to pad
   3.815 + *
   3.816 + *	Ensure that a buffer is followed by a padding area that is zero
   3.817 + *	filled. Used by network drivers which may DMA or transfer data
   3.818 + *	beyond the buffer end onto the wire.
   3.819 + *
   3.820 + *	May return NULL in out of memory cases.
   3.821 + */
   3.822 + 
   3.823 +struct sk_buff *skb_pad(struct sk_buff *skb, int pad)
   3.824 +{
   3.825 +	struct sk_buff *nskb;
   3.826 +	
   3.827 +	/* If the skbuff is non linear tailroom is always zero.. */
   3.828 +	if(skb_tailroom(skb) >= pad)
   3.829 +	{
   3.830 +		memset(skb->data+skb->len, 0, pad);
   3.831 +		return skb;
   3.832 +	}
   3.833 +	
   3.834 +	nskb = skb_copy_expand(skb, skb_headroom(skb), skb_tailroom(skb) + pad, GFP_ATOMIC);
   3.835 +	kfree_skb(skb);
   3.836 +	if(nskb)
   3.837 +		memset(nskb->data+nskb->len, 0, pad);
   3.838 +	return nskb;
   3.839 +}	
   3.840 + 
   3.841 +/* Trims skb to length len. It can change skb pointers, if "realloc" is 1.
   3.842 + * If realloc==0 and trimming is impossible without change of data,
   3.843 + * it is BUG().
   3.844 + */
   3.845 +
   3.846 +int ___pskb_trim(struct sk_buff *skb, unsigned int len, int realloc)
   3.847 +{
   3.848 +	int offset = skb_headlen(skb);
   3.849 +	int nfrags = skb_shinfo(skb)->nr_frags;
   3.850 +	int i;
   3.851 +
   3.852 +	for (i=0; i<nfrags; i++) {
   3.853 +		int end = offset + skb_shinfo(skb)->frags[i].size;
   3.854 +		if (end > len) {
   3.855 +			if (skb_cloned(skb)) {
   3.856 +				if (!realloc)
   3.857 +					BUG();
   3.858 +				if (pskb_expand_head(skb, 0, 0, GFP_ATOMIC))
   3.859 +					return -ENOMEM;
   3.860 +			}
   3.861 +			if (len <= offset) {
   3.862 +				put_page(skb_shinfo(skb)->frags[i].page);
   3.863 +				skb_shinfo(skb)->nr_frags--;
   3.864 +			} else {
   3.865 +				skb_shinfo(skb)->frags[i].size = len-offset;
   3.866 +			}
   3.867 +		}
   3.868 +		offset = end;
   3.869 +	}
   3.870 +
   3.871 +	if (offset < len) {
   3.872 +		skb->data_len -= skb->len - len;
   3.873 +		skb->len = len;
   3.874 +	} else {
   3.875 +		if (len <= skb_headlen(skb)) {
   3.876 +			skb->len = len;
   3.877 +			skb->data_len = 0;
   3.878 +			skb->tail = skb->data + len;
   3.879 +			if (skb_shinfo(skb)->frag_list && !skb_cloned(skb))
   3.880 +				skb_drop_fraglist(skb);
   3.881 +		} else {
   3.882 +			skb->data_len -= skb->len - len;
   3.883 +			skb->len = len;
   3.884 +		}
   3.885 +	}
   3.886 +
   3.887 +	return 0;
   3.888 +}
   3.889 +
   3.890 +/**
   3.891 + *	__pskb_pull_tail - advance tail of skb header 
   3.892 + *	@skb: buffer to reallocate
   3.893 + *	@delta: number of bytes to advance tail
   3.894 + *
   3.895 + *	The function makes a sense only on a fragmented &sk_buff,
   3.896 + *	it expands header moving its tail forward and copying necessary
   3.897 + *	data from fragmented part.
   3.898 + *
   3.899 + *	&sk_buff MUST have reference count of 1.
   3.900 + *
   3.901 + *	Returns %NULL (and &sk_buff does not change) if pull failed
   3.902 + *	or value of new tail of skb in the case of success.
   3.903 + *
   3.904 + *	All the pointers pointing into skb header may change and must be
   3.905 + *	reloaded after call to this function.
   3.906 + */
   3.907 +
   3.908 +/* Moves tail of skb head forward, copying data from fragmented part,
   3.909 + * when it is necessary.
   3.910 + * 1. It may fail due to malloc failure.
   3.911 + * 2. It may change skb pointers.
   3.912 + *
   3.913 + * It is pretty complicated. Luckily, it is called only in exceptional cases.
   3.914 + */
   3.915 +unsigned char * __pskb_pull_tail(struct sk_buff *skb, int delta)
   3.916 +{
   3.917 +	int i, k, eat;
   3.918 +
   3.919 +	/* If skb has not enough free space at tail, get new one
   3.920 +	 * plus 128 bytes for future expansions. If we have enough
   3.921 +	 * room at tail, reallocate without expansion only if skb is cloned.
   3.922 +	 */
   3.923 +	eat = (skb->tail+delta) - skb->end;
   3.924 +
   3.925 +	if (eat > 0 || skb_cloned(skb)) {
   3.926 +		if (pskb_expand_head(skb, 0, eat>0 ? eat+128 : 0, GFP_ATOMIC))
   3.927 +			return NULL;
   3.928 +	}
   3.929 +
   3.930 +	if (skb_copy_bits(skb, skb_headlen(skb), skb->tail, delta))
   3.931 +		BUG();
   3.932 +
   3.933 +	/* Optimization: no fragments, no reasons to preestimate
   3.934 +	 * size of pulled pages. Superb.
   3.935 +	 */
   3.936 +	if (skb_shinfo(skb)->frag_list == NULL)
   3.937 +		goto pull_pages;
   3.938 +
   3.939 +	/* Estimate size of pulled pages. */
   3.940 +	eat = delta;
   3.941 +	for (i=0; i<skb_shinfo(skb)->nr_frags; i++) {
   3.942 +		if (skb_shinfo(skb)->frags[i].size >= eat)
   3.943 +			goto pull_pages;
   3.944 +		eat -= skb_shinfo(skb)->frags[i].size;
   3.945 +	}
   3.946 +
   3.947 +	/* If we need update frag list, we are in troubles.
   3.948 +	 * Certainly, it possible to add an offset to skb data,
   3.949 +	 * but taking into account that pulling is expected to
   3.950 +	 * be very rare operation, it is worth to fight against
   3.951 +	 * further bloating skb head and crucify ourselves here instead.
   3.952 +	 * Pure masohism, indeed. 8)8)
   3.953 +	 */
   3.954 +	if (eat) {
   3.955 +		struct sk_buff *list = skb_shinfo(skb)->frag_list;
   3.956 +		struct sk_buff *clone = NULL;
   3.957 +		struct sk_buff *insp = NULL;
   3.958 +
   3.959 +		do {
   3.960 +			if (list == NULL)
   3.961 +				BUG();
   3.962 +
   3.963 +			if (list->len <= eat) {
   3.964 +				/* Eaten as whole. */
   3.965 +				eat -= list->len;
   3.966 +				list = list->next;
   3.967 +				insp = list;
   3.968 +			} else {
   3.969 +				/* Eaten partially. */
   3.970 +
   3.971 +				if (skb_shared(list)) {
   3.972 +					/* Sucks! We need to fork list. :-( */
   3.973 +					clone = skb_clone(list, GFP_ATOMIC);
   3.974 +					if (clone == NULL)
   3.975 +						return NULL;
   3.976 +					insp = list->next;
   3.977 +					list = clone;
   3.978 +				} else {
   3.979 +					/* This may be pulled without
   3.980 +					 * problems. */
   3.981 +					insp = list;
   3.982 +				}
   3.983 +				if (pskb_pull(list, eat) == NULL) {
   3.984 +					if (clone)
   3.985 +						kfree_skb(clone);
   3.986 +					return NULL;
   3.987 +				}
   3.988 +				break;
   3.989 +			}
   3.990 +		} while (eat);
   3.991 +
   3.992 +		/* Free pulled out fragments. */
   3.993 +		while ((list = skb_shinfo(skb)->frag_list) != insp) {
   3.994 +			skb_shinfo(skb)->frag_list = list->next;
   3.995 +			kfree_skb(list);
   3.996 +		}
   3.997 +		/* And insert new clone at head. */
   3.998 +		if (clone) {
   3.999 +			clone->next = list;
  3.1000 +			skb_shinfo(skb)->frag_list = clone;
  3.1001 +		}
  3.1002 +	}
  3.1003 +	/* Success! Now we may commit changes to skb data. */
  3.1004 +
  3.1005 +pull_pages:
  3.1006 +	eat = delta;
  3.1007 +	k = 0;
  3.1008 +	for (i=0; i<skb_shinfo(skb)->nr_frags; i++) {
  3.1009 +		if (skb_shinfo(skb)->frags[i].size <= eat) {
  3.1010 +			put_page(skb_shinfo(skb)->frags[i].page);
  3.1011 +			eat -= skb_shinfo(skb)->frags[i].size;
  3.1012 +		} else {
  3.1013 +			skb_shinfo(skb)->frags[k] = skb_shinfo(skb)->frags[i];
  3.1014 +			if (eat) {
  3.1015 +				skb_shinfo(skb)->frags[k].page_offset += eat;
  3.1016 +				skb_shinfo(skb)->frags[k].size -= eat;
  3.1017 +				eat = 0;
  3.1018 +			}
  3.1019 +			k++;
  3.1020 +		}
  3.1021 +	}
  3.1022 +	skb_shinfo(skb)->nr_frags = k;
  3.1023 +
  3.1024 +	skb->tail += delta;
  3.1025 +	skb->data_len -= delta;
  3.1026 +
  3.1027 +	return skb->tail;
  3.1028 +}
  3.1029 +
  3.1030 +/* Copy some data bits from skb to kernel buffer. */
  3.1031 +
  3.1032 +int skb_copy_bits(const struct sk_buff *skb, int offset, void *to, int len)
  3.1033 +{
  3.1034 +	int i, copy;
  3.1035 +	int start = skb->len - skb->data_len;
  3.1036 +
  3.1037 +	if (offset > (int)skb->len-len)
  3.1038 +		goto fault;
  3.1039 +
  3.1040 +	/* Copy header. */
  3.1041 +	if ((copy = start-offset) > 0) {
  3.1042 +		if (copy > len)
  3.1043 +			copy = len;
  3.1044 +		memcpy(to, skb->data + offset, copy);
  3.1045 +		if ((len -= copy) == 0)
  3.1046 +			return 0;
  3.1047 +		offset += copy;
  3.1048 +		to += copy;
  3.1049 +	}
  3.1050 +
  3.1051 +	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
  3.1052 +		int end;
  3.1053 +
  3.1054 +		BUG_TRAP(start <= offset+len);
  3.1055 +
  3.1056 +		end = start + skb_shinfo(skb)->frags[i].size;
  3.1057 +		if ((copy = end-offset) > 0) {
  3.1058 +			u8 *vaddr;
  3.1059 +
  3.1060 +			if (copy > len)
  3.1061 +				copy = len;
  3.1062 +
  3.1063 +			vaddr = kmap_skb_frag(&skb_shinfo(skb)->frags[i]);
  3.1064 +			memcpy(to, vaddr+skb_shinfo(skb)->frags[i].page_offset+
  3.1065 +			       offset-start, copy);
  3.1066 +			kunmap_skb_frag(vaddr);
  3.1067 +
  3.1068 +			if ((len -= copy) == 0)
  3.1069 +				return 0;
  3.1070 +			offset += copy;
  3.1071 +			to += copy;
  3.1072 +		}
  3.1073 +		start = end;
  3.1074 +	}
  3.1075 +
  3.1076 +	if (skb_shinfo(skb)->frag_list) {
  3.1077 +		struct sk_buff *list;
  3.1078 +
  3.1079 +		for (list = skb_shinfo(skb)->frag_list; list; list=list->next) {
  3.1080 +			int end;
  3.1081 +
  3.1082 +			BUG_TRAP(start <= offset+len);
  3.1083 +
  3.1084 +			end = start + list->len;
  3.1085 +			if ((copy = end-offset) > 0) {
  3.1086 +				if (copy > len)
  3.1087 +					copy = len;
  3.1088 +				if (skb_copy_bits(list, offset-start, to, copy))
  3.1089 +					goto fault;
  3.1090 +				if ((len -= copy) == 0)
  3.1091 +					return 0;
  3.1092 +				offset += copy;
  3.1093 +				to += copy;
  3.1094 +			}
  3.1095 +			start = end;
  3.1096 +		}
  3.1097 +	}
  3.1098 +	if (len == 0)
  3.1099 +		return 0;
  3.1100 +
  3.1101 +fault:
  3.1102 +	return -EFAULT;
  3.1103 +}
  3.1104 +
  3.1105 +/* Checksum skb data. */
  3.1106 +
  3.1107 +unsigned int skb_checksum(const struct sk_buff *skb, int offset, int len, unsigned int csum)
  3.1108 +{
  3.1109 +	int i, copy;
  3.1110 +	int start = skb->len - skb->data_len;
  3.1111 +	int pos = 0;
  3.1112 +
  3.1113 +	/* Checksum header. */
  3.1114 +	if ((copy = start-offset) > 0) {
  3.1115 +		if (copy > len)
  3.1116 +			copy = len;
  3.1117 +		csum = csum_partial(skb->data+offset, copy, csum);
  3.1118 +		if ((len -= copy) == 0)
  3.1119 +			return csum;
  3.1120 +		offset += copy;
  3.1121 +		pos = copy;
  3.1122 +	}
  3.1123 +
  3.1124 +	for (i=0; i<skb_shinfo(skb)->nr_frags; i++) {
  3.1125 +		int end;
  3.1126 +
  3.1127 +		BUG_TRAP(start <= offset+len);
  3.1128 +
  3.1129 +		end = start + skb_shinfo(skb)->frags[i].size;
  3.1130 +		if ((copy = end-offset) > 0) {
  3.1131 +			unsigned int csum2;
  3.1132 +			u8 *vaddr;
  3.1133 +			skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
  3.1134 +
  3.1135 +			if (copy > len)
  3.1136 +				copy = len;
  3.1137 +			vaddr = kmap_skb_frag(frag);
  3.1138 +			csum2 = csum_partial(vaddr + frag->page_offset +
  3.1139 +					     offset-start, copy, 0);
  3.1140 +			kunmap_skb_frag(vaddr);
  3.1141 +			csum = csum_block_add(csum, csum2, pos);
  3.1142 +			if (!(len -= copy))
  3.1143 +				return csum;
  3.1144 +			offset += copy;
  3.1145 +			pos += copy;
  3.1146 +		}
  3.1147 +		start = end;
  3.1148 +	}
  3.1149 +
  3.1150 +	if (skb_shinfo(skb)->frag_list) {
  3.1151 +		struct sk_buff *list;
  3.1152 +
  3.1153 +		for (list = skb_shinfo(skb)->frag_list; list; list=list->next) {
  3.1154 +			int end;
  3.1155 +
  3.1156 +			BUG_TRAP(start <= offset+len);
  3.1157 +
  3.1158 +			end = start + list->len;
  3.1159 +			if ((copy = end-offset) > 0) {
  3.1160 +				unsigned int csum2;
  3.1161 +				if (copy > len)
  3.1162 +					copy = len;
  3.1163 +				csum2 = skb_checksum(list, offset-start, copy, 0);
  3.1164 +				csum = csum_block_add(csum, csum2, pos);
  3.1165 +				if ((len -= copy) == 0)
  3.1166 +					return csum;
  3.1167 +				offset += copy;
  3.1168 +				pos += copy;
  3.1169 +			}
  3.1170 +			start = end;
  3.1171 +		}
  3.1172 +	}
  3.1173 +	if (len == 0)
  3.1174 +		return csum;
  3.1175 +
  3.1176 +	BUG();
  3.1177 +	return csum;
  3.1178 +}
  3.1179 +
  3.1180 +/* Both of above in one bottle. */
  3.1181 +
  3.1182 +unsigned int skb_copy_and_csum_bits(const struct sk_buff *skb, int offset, u8 *to, int len, unsigned int csum)
  3.1183 +{
  3.1184 +	int i, copy;
  3.1185 +	int start = skb->len - skb->data_len;
  3.1186 +	int pos = 0;
  3.1187 +
  3.1188 +	/* Copy header. */
  3.1189 +	if ((copy = start-offset) > 0) {
  3.1190 +		if (copy > len)
  3.1191 +			copy = len;
  3.1192 +		csum = csum_partial_copy_nocheck(skb->data+offset, to, copy, csum);
  3.1193 +		if ((len -= copy) == 0)
  3.1194 +			return csum;
  3.1195 +		offset += copy;
  3.1196 +		to += copy;
  3.1197 +		pos = copy;
  3.1198 +	}
  3.1199 +
  3.1200 +	for (i=0; i<skb_shinfo(skb)->nr_frags; i++) {
  3.1201 +		int end;
  3.1202 +
  3.1203 +		BUG_TRAP(start <= offset+len);
  3.1204 +
  3.1205 +		end = start + skb_shinfo(skb)->frags[i].size;
  3.1206 +		if ((copy = end-offset) > 0) {
  3.1207 +			unsigned int csum2;
  3.1208 +			u8 *vaddr;
  3.1209 +			skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
  3.1210 +
  3.1211 +			if (copy > len)
  3.1212 +				copy = len;
  3.1213 +			vaddr = kmap_skb_frag(frag);
  3.1214 +			csum2 = csum_partial_copy_nocheck(vaddr + frag->page_offset +
  3.1215 +						      offset-start, to, copy, 0);
  3.1216 +			kunmap_skb_frag(vaddr);
  3.1217 +			csum = csum_block_add(csum, csum2, pos);
  3.1218 +			if (!(len -= copy))
  3.1219 +				return csum;
  3.1220 +			offset += copy;
  3.1221 +			to += copy;
  3.1222 +			pos += copy;
  3.1223 +		}
  3.1224 +		start = end;
  3.1225 +	}
  3.1226 +
  3.1227 +	if (skb_shinfo(skb)->frag_list) {
  3.1228 +		struct sk_buff *list;
  3.1229 +
  3.1230 +		for (list = skb_shinfo(skb)->frag_list; list; list=list->next) {
  3.1231 +			unsigned int csum2;
  3.1232 +			int end;
  3.1233 +
  3.1234 +			BUG_TRAP(start <= offset+len);
  3.1235 +
  3.1236 +			end = start + list->len;
  3.1237 +			if ((copy = end-offset) > 0) {
  3.1238 +				if (copy > len)
  3.1239 +					copy = len;
  3.1240 +				csum2 = skb_copy_and_csum_bits(list, offset-start, to, copy, 0);
  3.1241 +				csum = csum_block_add(csum, csum2, pos);
  3.1242 +				if ((len -= copy) == 0)
  3.1243 +					return csum;
  3.1244 +				offset += copy;
  3.1245 +				to += copy;
  3.1246 +				pos += copy;
  3.1247 +			}
  3.1248 +			start = end;
  3.1249 +		}
  3.1250 +	}
  3.1251 +	if (len == 0)
  3.1252 +		return csum;
  3.1253 +
  3.1254 +	BUG();
  3.1255 +	return csum;
  3.1256 +}
  3.1257 +
  3.1258 +void skb_copy_and_csum_dev(const struct sk_buff *skb, u8 *to)
  3.1259 +{
  3.1260 +	unsigned int csum;
  3.1261 +	long csstart;
  3.1262 +
  3.1263 +	if (skb->ip_summed == CHECKSUM_HW)
  3.1264 +		csstart = skb->h.raw - skb->data;
  3.1265 +	else
  3.1266 +		csstart = skb->len - skb->data_len;
  3.1267 +
  3.1268 +	if (csstart > skb->len - skb->data_len)
  3.1269 +		BUG();
  3.1270 +
  3.1271 +	memcpy(to, skb->data, csstart);
  3.1272 +
  3.1273 +	csum = 0;
  3.1274 +	if (csstart != skb->len)
  3.1275 +		csum = skb_copy_and_csum_bits(skb, csstart, to+csstart,
  3.1276 +				skb->len-csstart, 0);
  3.1277 +
  3.1278 +	if (skb->ip_summed == CHECKSUM_HW) {
  3.1279 +		long csstuff = csstart + skb->csum;
  3.1280 +
  3.1281 +		*((unsigned short *)(to + csstuff)) = csum_fold(csum);
  3.1282 +	}
  3.1283 +}
  3.1284 +
  3.1285 +#if 0
  3.1286 +/* 
  3.1287 + * 	Tune the memory allocator for a new MTU size.
  3.1288 + */
  3.1289 +void skb_add_mtu(int mtu)
  3.1290 +{
  3.1291 +	/* Must match allocation in alloc_skb */
  3.1292 +	mtu = SKB_DATA_ALIGN(mtu) + sizeof(struct skb_shared_info);
  3.1293 +
  3.1294 +	kmem_add_cache_size(mtu);
  3.1295 +}
  3.1296 +#endif
  3.1297 +
  3.1298 +void __init skb_init(void)
  3.1299 +{
  3.1300 +	int i;
  3.1301 +
  3.1302 +	skbuff_head_cache = kmem_cache_create("skbuff_head_cache",
  3.1303 +					      sizeof(struct sk_buff),
  3.1304 +					      0,
  3.1305 +					      SLAB_HWCACHE_ALIGN,
  3.1306 +					      skb_headerinit, NULL);
  3.1307 +	if (!skbuff_head_cache)
  3.1308 +		panic("cannot create skbuff cache");
  3.1309 +
  3.1310 +	for (i=0; i<NR_CPUS; i++)
  3.1311 +		skb_queue_head_init(&skb_head_pool[i].list);
  3.1312 +}
     4.1 --- a/linux-2.6.8.1-xen-sparse/arch/xen/kernel/skbuff.c	Tue Sep 21 13:05:21 2004 +0000
     4.2 +++ b/linux-2.6.8.1-xen-sparse/arch/xen/kernel/skbuff.c	Tue Sep 21 16:52:40 2004 +0000
     4.3 @@ -27,34 +27,9 @@ EXPORT_SYMBOL(__dev_alloc_skb);
     4.4  struct sk_buff *__dev_alloc_skb(unsigned int length, int gfp_mask)
     4.5  {
     4.6      struct sk_buff *skb;
     4.7 -    u8             *new_data, *new_shinfo; 
     4.8 -
     4.9 -    /*
    4.10 -     * Yuk! There is no way to get a skbuff head without allocating the
    4.11 -     * data area using kmalloc(). So we do that and then replace the default
    4.12 -     * data area with our own.
    4.13 -     */
    4.14 -    skb = alloc_skb(0, gfp_mask);
    4.15 -    if ( unlikely(skb == NULL) )
    4.16 -        return NULL;
    4.17 -
    4.18 -    new_data = kmem_cache_alloc(skbuff_cachep, gfp_mask);
    4.19 -    if ( new_data == NULL )
    4.20 -    {
    4.21 -        dev_kfree_skb(skb);
    4.22 -        return NULL;
    4.23 -    }
    4.24 -
    4.25 -    new_shinfo = new_data + XEN_SKB_SIZE;
    4.26 -    memcpy(new_shinfo, skb_shinfo(skb), sizeof(struct skb_shared_info));
    4.27 -
    4.28 -    kfree(skb->head);
    4.29 -
    4.30 -    skb->head = new_data;
    4.31 -    skb->data = skb->tail = new_data + 16; /* __dev_alloc_skb does this */
    4.32 -    skb->end  = new_shinfo;
    4.33 -    skb->truesize = 1500;                  /* is this important? */
    4.34 -
    4.35 +    skb = alloc_skb_from_cache(skbuff_cachep, length + 16, gfp_mask);
    4.36 +    if ( likely(skb != NULL) )
    4.37 +        skb_reserve(skb, 16);
    4.38      return skb;
    4.39  }
    4.40  
     5.1 --- a/linux-2.6.8.1-xen-sparse/drivers/xen/netback/netback.c	Tue Sep 21 13:05:21 2004 +0000
     5.2 +++ b/linux-2.6.8.1-xen-sparse/drivers/xen/netback/netback.c	Tue Sep 21 16:52:40 2004 +0000
     5.3 @@ -144,11 +144,12 @@ int netif_be_start_xmit(struct sk_buff *
     5.4       */
     5.5      if ( skb_shared(skb) || skb_cloned(skb) || !is_xen_skb(skb) )
     5.6      {
     5.7 -        struct sk_buff *nskb = dev_alloc_skb(PAGE_SIZE);
     5.8          int hlen = skb->data - skb->head;
     5.9 +        struct sk_buff *nskb = dev_alloc_skb(hlen + skb->len);
    5.10          if ( unlikely(nskb == NULL) )
    5.11              goto drop;
    5.12 -        skb_reserve(nskb, hlen);
    5.13 +        /* Account for any reservation already made by dev_alloc_skb(). */
    5.14 +        skb_reserve(nskb, hlen - (nskb->data - nskb->head));
    5.15          __skb_put(nskb, skb->len);
    5.16          (void)skb_copy_bits(skb, -hlen, nskb->head, hlen + skb->len);
    5.17          nskb->dev = skb->dev;
     6.1 --- a/linux-2.6.8.1-xen-sparse/drivers/xen/netfront/netfront.c	Tue Sep 21 13:05:21 2004 +0000
     6.2 +++ b/linux-2.6.8.1-xen-sparse/drivers/xen/netfront/netfront.c	Tue Sep 21 16:52:40 2004 +0000
     6.3 @@ -39,7 +39,17 @@
     6.4  #ifndef __GFP_NOWARN
     6.5  #define __GFP_NOWARN 0
     6.6  #endif
     6.7 -#define alloc_skb_page() __dev_alloc_skb(PAGE_SIZE, GFP_ATOMIC|__GFP_NOWARN)
     6.8 +#define alloc_xen_skb(_l) __dev_alloc_skb((_l), GFP_ATOMIC|__GFP_NOWARN)
     6.9 +
    6.10 +#define init_skb_shinfo(_skb)                         \
    6.11 +    do {                                              \
    6.12 +        atomic_set(&(skb_shinfo(_skb)->dataref), 1);  \
    6.13 +        skb_shinfo(_skb)->nr_frags = 0;               \
    6.14 +        skb_shinfo(_skb)->frag_list = NULL;           \
    6.15 +    } while ( 0 )
    6.16 +
    6.17 +/* Allow headroom on each rx pkt for Ethernet header, alignment padding, ... */
    6.18 +#define RX_HEADROOM 100
    6.19  
    6.20  /*
    6.21   * If the backend driver is pipelining transmit requests then we can be very
    6.22 @@ -249,7 +259,7 @@ static void network_tx_buf_gc(struct net
    6.23              id  = np->tx->ring[MASK_NETIF_TX_IDX(i)].resp.id;
    6.24              skb = np->tx_skbs[id];
    6.25              ADD_ID_TO_FREELIST(np->tx_skbs, id);
    6.26 -            dev_kfree_skb_any(skb);
    6.27 +            dev_kfree_skb_irq(skb);
    6.28          }
    6.29          
    6.30          np->tx_resp_cons = prod;
    6.31 @@ -292,7 +302,7 @@ static void network_alloc_rx_buffers(str
    6.32          return;
    6.33  
    6.34      do {
    6.35 -        if ( unlikely((skb = alloc_skb_page()) == NULL) )
    6.36 +        if ( unlikely((skb = alloc_xen_skb(dev->mtu + RX_HEADROOM)) == NULL) )
    6.37              break;
    6.38  
    6.39          skb->dev = dev;
    6.40 @@ -368,7 +378,7 @@ static int network_start_xmit(struct sk_
    6.41                    PAGE_SIZE) )
    6.42      {
    6.43          struct sk_buff *new_skb;
    6.44 -        if ( unlikely((new_skb = alloc_skb_page()) == NULL) )
    6.45 +        if ( unlikely((new_skb = alloc_xen_skb(skb->len)) == NULL) )
    6.46              goto drop;
    6.47          skb_put(new_skb, skb->len);
    6.48          memcpy(new_skb->data, skb->data, skb->len);
    6.49 @@ -446,7 +456,7 @@ static irqreturn_t netif_int(int irq, vo
    6.50  static int netif_poll(struct net_device *dev, int *pbudget)
    6.51  {
    6.52      struct net_private *np = dev->priv;
    6.53 -    struct sk_buff *skb;
    6.54 +    struct sk_buff *skb, *nskb;
    6.55      netif_rx_response_t *rx;
    6.56      NETIF_RING_IDX i, rp;
    6.57      mmu_update_t *mmu = rx_mmu;
    6.58 @@ -494,8 +504,10 @@ static int netif_poll(struct net_device 
    6.59          skb = np->rx_skbs[rx->id];
    6.60          ADD_ID_TO_FREELIST(np->rx_skbs, rx->id);
    6.61  
    6.62 -        skb->data = skb->tail = skb->head + (rx->addr & ~PAGE_MASK);
    6.63 -        skb_put(skb, rx->status);
    6.64 +        /* NB. We handle skb overflow later. */
    6.65 +        skb->data = skb->head + (rx->addr & ~PAGE_MASK);
    6.66 +        skb->len  = rx->status;
    6.67 +        skb->tail = skb->data + skb->len;
    6.68  
    6.69          np->stats.rx_packets++;
    6.70          np->stats.rx_bytes += rx->status;
    6.71 @@ -529,10 +541,39 @@ static int netif_poll(struct net_device 
    6.72  
    6.73      while ( (skb = __skb_dequeue(&rxq)) != NULL )
    6.74      {
    6.75 +        /*
    6.76 +         * Enough room in skbuff for the data we were passed? Also, Linux 
    6.77 +         * expects at least 16 bytes headroom in each receive buffer.
    6.78 +         */
    6.79 +        if ( unlikely(skb->tail > skb->end) ||
    6.80 +             unlikely((skb->data - skb->head) < 16) )
    6.81 +        {
    6.82 +            nskb = NULL;
    6.83 +
    6.84 +            /* Only copy the packet if it fits in the current MTU. */
    6.85 +            if ( skb->len <= (dev->mtu + ETH_HLEN) )
    6.86 +            {
    6.87 +                if ( (nskb = alloc_xen_skb(skb->len + 2)) != NULL )
    6.88 +                {
    6.89 +                    skb_reserve(nskb, 2);
    6.90 +                    skb_put(nskb, skb->len);
    6.91 +                    memcpy(nskb->data, skb->data, skb->len);
    6.92 +                }
    6.93 +            }
    6.94 +
    6.95 +            /* Reinitialise and then destroy the old skbuff. */
    6.96 +            skb->len  = 0;
    6.97 +            skb->tail = skb->data;
    6.98 +            init_skb_shinfo(skb);
    6.99 +            dev_kfree_skb(skb);
   6.100 +
   6.101 +            /* Switch old for new, if we copied the buffer. */
   6.102 +            if ( (skb = nskb) == NULL )
   6.103 +                continue;
   6.104 +        }
   6.105 +        
   6.106          /* Set the shared-info area, which is hidden behind the real data. */
   6.107 -        atomic_set(&(skb_shinfo(skb)->dataref), 1);
   6.108 -        skb_shinfo(skb)->nr_frags = 0;
   6.109 -        skb_shinfo(skb)->frag_list = NULL;
   6.110 +        init_skb_shinfo(skb);
   6.111  
   6.112          /* Ethernet-specific work. Delayed to here as it peeks the header. */
   6.113          skb->protocol = eth_type_trans(skb, dev);
   6.114 @@ -596,8 +637,8 @@ static void network_connect(struct net_d
   6.115      netif_tx_request_t *tx;
   6.116  
   6.117      np = dev->priv;
   6.118 -    spin_lock_irq(&np->rx_lock);
   6.119 -    spin_lock(&np->tx_lock);
   6.120 +    spin_lock_irq(&np->tx_lock);
   6.121 +    spin_lock(&np->rx_lock);
   6.122  
   6.123      /* Recovery procedure: */
   6.124  
   6.125 @@ -664,8 +705,8 @@ printk(KERN_ALERT"Netfront recovered tx=
   6.126      if ( np->user_state == UST_OPEN )
   6.127          netif_start_queue(dev);
   6.128  
   6.129 -    spin_unlock(&np->tx_lock);
   6.130 -    spin_unlock_irq(&np->rx_lock);
   6.131 +    spin_unlock(&np->rx_lock);
   6.132 +    spin_unlock_irq(&np->tx_lock);
   6.133  }
   6.134  
   6.135  static void netif_status_change(netif_fe_interface_status_changed_t *status)
     7.1 --- a/linux-2.6.8.1-xen-sparse/include/linux/skbuff.h	Tue Sep 21 13:05:21 2004 +0000
     7.2 +++ b/linux-2.6.8.1-xen-sparse/include/linux/skbuff.h	Tue Sep 21 16:52:40 2004 +0000
     7.3 @@ -293,6 +293,8 @@ struct sk_buff {
     7.4  
     7.5  extern void	       __kfree_skb(struct sk_buff *skb);
     7.6  extern struct sk_buff *alloc_skb(unsigned int size, int priority);
     7.7 +extern struct sk_buff *alloc_skb_from_cache(kmem_cache_t *cp,
     7.8 +					    unsigned int size, int priority);
     7.9  extern void	       kfree_skbmem(struct sk_buff *skb);
    7.10  extern struct sk_buff *skb_clone(struct sk_buff *skb, int priority);
    7.11  extern struct sk_buff *skb_copy(const struct sk_buff *skb, int priority);
     8.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     8.2 +++ b/linux-2.6.8.1-xen-sparse/net/core/skbuff.c	Tue Sep 21 16:52:40 2004 +0000
     8.3 @@ -0,0 +1,1521 @@
     8.4 +/*
     8.5 + *	Routines having to do with the 'struct sk_buff' memory handlers.
     8.6 + *
     8.7 + *	Authors:	Alan Cox <iiitac@pyr.swan.ac.uk>
     8.8 + *			Florian La Roche <rzsfl@rz.uni-sb.de>
     8.9 + *
    8.10 + *	Version:	$Id: skbuff.c,v 1.90 2001/11/07 05:56:19 davem Exp $
    8.11 + *
    8.12 + *	Fixes:
    8.13 + *		Alan Cox	:	Fixed the worst of the load
    8.14 + *					balancer bugs.
    8.15 + *		Dave Platt	:	Interrupt stacking fix.
    8.16 + *	Richard Kooijman	:	Timestamp fixes.
    8.17 + *		Alan Cox	:	Changed buffer format.
    8.18 + *		Alan Cox	:	destructor hook for AF_UNIX etc.
    8.19 + *		Linus Torvalds	:	Better skb_clone.
    8.20 + *		Alan Cox	:	Added skb_copy.
    8.21 + *		Alan Cox	:	Added all the changed routines Linus
    8.22 + *					only put in the headers
    8.23 + *		Ray VanTassle	:	Fixed --skb->lock in free
    8.24 + *		Alan Cox	:	skb_copy copy arp field
    8.25 + *		Andi Kleen	:	slabified it.
    8.26 + *		Robert Olsson	:	Removed skb_head_pool
    8.27 + *
    8.28 + *	NOTE:
    8.29 + *		The __skb_ routines should be called with interrupts
    8.30 + *	disabled, or you better be *real* sure that the operation is atomic
    8.31 + *	with respect to whatever list is being frobbed (e.g. via lock_sock()
    8.32 + *	or via disabling bottom half handlers, etc).
    8.33 + *
    8.34 + *	This program is free software; you can redistribute it and/or
    8.35 + *	modify it under the terms of the GNU General Public License
    8.36 + *	as published by the Free Software Foundation; either version
    8.37 + *	2 of the License, or (at your option) any later version.
    8.38 + */
    8.39 +
    8.40 +/*
    8.41 + *	The functions in this file will not compile correctly with gcc 2.4.x
    8.42 + */
    8.43 +
    8.44 +#include <linux/config.h>
    8.45 +#include <linux/module.h>
    8.46 +#include <linux/types.h>
    8.47 +#include <linux/kernel.h>
    8.48 +#include <linux/sched.h>
    8.49 +#include <linux/mm.h>
    8.50 +#include <linux/interrupt.h>
    8.51 +#include <linux/in.h>
    8.52 +#include <linux/inet.h>
    8.53 +#include <linux/slab.h>
    8.54 +#include <linux/netdevice.h>
    8.55 +#ifdef CONFIG_NET_CLS_ACT
    8.56 +#include <net/pkt_sched.h>
    8.57 +#endif
    8.58 +#include <linux/string.h>
    8.59 +#include <linux/skbuff.h>
    8.60 +#include <linux/cache.h>
    8.61 +#include <linux/rtnetlink.h>
    8.62 +#include <linux/init.h>
    8.63 +#include <linux/highmem.h>
    8.64 +
    8.65 +#include <net/protocol.h>
    8.66 +#include <net/dst.h>
    8.67 +#include <net/sock.h>
    8.68 +#include <net/checksum.h>
    8.69 +#include <net/xfrm.h>
    8.70 +
    8.71 +#include <asm/uaccess.h>
    8.72 +#include <asm/system.h>
    8.73 +
    8.74 +static kmem_cache_t *skbuff_head_cache;
    8.75 +
    8.76 +/*
    8.77 + *	Keep out-of-line to prevent kernel bloat.
    8.78 + *	__builtin_return_address is not used because it is not always
    8.79 + *	reliable.
    8.80 + */
    8.81 +
    8.82 +/**
    8.83 + *	skb_over_panic	- 	private function
    8.84 + *	@skb: buffer
    8.85 + *	@sz: size
    8.86 + *	@here: address
    8.87 + *
    8.88 + *	Out of line support code for skb_put(). Not user callable.
    8.89 + */
    8.90 +void skb_over_panic(struct sk_buff *skb, int sz, void *here)
    8.91 +{
    8.92 +	printk(KERN_INFO "skput:over: %p:%d put:%d dev:%s",
    8.93 +		here, skb->len, sz, skb->dev ? skb->dev->name : "<NULL>");
    8.94 +	BUG();
    8.95 +}
    8.96 +
    8.97 +/**
    8.98 + *	skb_under_panic	- 	private function
    8.99 + *	@skb: buffer
   8.100 + *	@sz: size
   8.101 + *	@here: address
   8.102 + *
   8.103 + *	Out of line support code for skb_push(). Not user callable.
   8.104 + */
   8.105 +
   8.106 +void skb_under_panic(struct sk_buff *skb, int sz, void *here)
   8.107 +{
   8.108 +	printk(KERN_INFO "skput:under: %p:%d put:%d dev:%s",
   8.109 +               here, skb->len, sz, skb->dev ? skb->dev->name : "<NULL>");
   8.110 +	BUG();
   8.111 +}
   8.112 +
   8.113 +/* 	Allocate a new skbuff. We do this ourselves so we can fill in a few
   8.114 + *	'private' fields and also do memory statistics to find all the
   8.115 + *	[BEEP] leaks.
   8.116 + *
   8.117 + */
   8.118 +
   8.119 +/**
   8.120 + *	alloc_skb	-	allocate a network buffer
   8.121 + *	@size: size to allocate
   8.122 + *	@gfp_mask: allocation mask
   8.123 + *
   8.124 + *	Allocate a new &sk_buff. The returned buffer has no headroom and a
   8.125 + *	tail room of size bytes. The object has a reference count of one.
   8.126 + *	The return is the buffer. On a failure the return is %NULL.
   8.127 + *
   8.128 + *	Buffers may only be allocated from interrupts using a @gfp_mask of
   8.129 + *	%GFP_ATOMIC.
   8.130 + */
   8.131 +struct sk_buff *alloc_skb(unsigned int size, int gfp_mask)
   8.132 +{
   8.133 +	struct sk_buff *skb;
   8.134 +	u8 *data;
   8.135 +
   8.136 +	/* Get the HEAD */
   8.137 +	skb = kmem_cache_alloc(skbuff_head_cache,
   8.138 +			       gfp_mask & ~__GFP_DMA);
   8.139 +	if (!skb)
   8.140 +		goto out;
   8.141 +
   8.142 +	/* Get the DATA. Size must match skb_add_mtu(). */
   8.143 +	size = SKB_DATA_ALIGN(size);
   8.144 +	data = kmalloc(size + sizeof(struct skb_shared_info), gfp_mask);
   8.145 +	if (!data)
   8.146 +		goto nodata;
   8.147 +
   8.148 +	memset(skb, 0, offsetof(struct sk_buff, truesize));
   8.149 +	skb->truesize = size + sizeof(struct sk_buff);
   8.150 +	atomic_set(&skb->users, 1);
   8.151 +	skb->head = data;
   8.152 +	skb->data = data;
   8.153 +	skb->tail = data;
   8.154 +	skb->end  = data + size;
   8.155 +
   8.156 +	atomic_set(&(skb_shinfo(skb)->dataref), 1);
   8.157 +	skb_shinfo(skb)->nr_frags  = 0;
   8.158 +	skb_shinfo(skb)->tso_size = 0;
   8.159 +	skb_shinfo(skb)->tso_segs = 0;
   8.160 +	skb_shinfo(skb)->frag_list = NULL;
   8.161 +out:
   8.162 +	return skb;
   8.163 +nodata:
   8.164 +	kmem_cache_free(skbuff_head_cache, skb);
   8.165 +	skb = NULL;
   8.166 +	goto out;
   8.167 +}
   8.168 +
   8.169 +/**
   8.170 + *	alloc_skb_from_cache	-	allocate a network buffer
   8.171 + *	@cp: kmem_cache from which to allocate the data area
   8.172 + *           (object size must be big enough for @size bytes + skb overheads)
   8.173 + *	@size: size to allocate
   8.174 + *	@gfp_mask: allocation mask
   8.175 + *
   8.176 + *	Allocate a new &sk_buff. The returned buffer has no headroom and a
   8.177 + *	tail room of size bytes. The object has a reference count of one.
   8.178 + *	The return is the buffer. On a failure the return is %NULL.
   8.179 + *
   8.180 + *	Buffers may only be allocated from interrupts using a @gfp_mask of
   8.181 + *	%GFP_ATOMIC.
   8.182 + */
   8.183 +struct sk_buff *alloc_skb_from_cache(kmem_cache_t *cp,
   8.184 +				     unsigned int size, int gfp_mask)
   8.185 +{
   8.186 +	struct sk_buff *skb;
   8.187 +	u8 *data;
   8.188 +
   8.189 +	/* Get the HEAD */
   8.190 +	skb = kmem_cache_alloc(skbuff_head_cache,
   8.191 +			       gfp_mask & ~__GFP_DMA);
   8.192 +	if (!skb)
   8.193 +		goto out;
   8.194 +
   8.195 +	/* Get the DATA. */
   8.196 +	size = SKB_DATA_ALIGN(size);
   8.197 +	data = kmem_cache_alloc(cp, gfp_mask);
   8.198 +	if (!data)
   8.199 +		goto nodata;
   8.200 +
   8.201 +	memset(skb, 0, offsetof(struct sk_buff, truesize));
   8.202 +	skb->truesize = size + sizeof(struct sk_buff);
   8.203 +	atomic_set(&skb->users, 1);
   8.204 +	skb->head = data;
   8.205 +	skb->data = data;
   8.206 +	skb->tail = data;
   8.207 +	skb->end  = data + size;
   8.208 +
   8.209 +	atomic_set(&(skb_shinfo(skb)->dataref), 1);
   8.210 +	skb_shinfo(skb)->nr_frags  = 0;
   8.211 +	skb_shinfo(skb)->tso_size = 0;
   8.212 +	skb_shinfo(skb)->tso_segs = 0;
   8.213 +	skb_shinfo(skb)->frag_list = NULL;
   8.214 +out:
   8.215 +	return skb;
   8.216 +nodata:
   8.217 +	kmem_cache_free(skbuff_head_cache, skb);
   8.218 +	skb = NULL;
   8.219 +	goto out;
   8.220 +}
   8.221 +
   8.222 +
   8.223 +static void skb_drop_fraglist(struct sk_buff *skb)
   8.224 +{
   8.225 +	struct sk_buff *list = skb_shinfo(skb)->frag_list;
   8.226 +
   8.227 +	skb_shinfo(skb)->frag_list = NULL;
   8.228 +
   8.229 +	do {
   8.230 +		struct sk_buff *this = list;
   8.231 +		list = list->next;
   8.232 +		kfree_skb(this);
   8.233 +	} while (list);
   8.234 +}
   8.235 +
   8.236 +static void skb_clone_fraglist(struct sk_buff *skb)
   8.237 +{
   8.238 +	struct sk_buff *list;
   8.239 +
   8.240 +	for (list = skb_shinfo(skb)->frag_list; list; list = list->next)
   8.241 +		skb_get(list);
   8.242 +}
   8.243 +
   8.244 +void skb_release_data(struct sk_buff *skb)
   8.245 +{
   8.246 +	if (!skb->cloned ||
   8.247 +	    atomic_dec_and_test(&(skb_shinfo(skb)->dataref))) {
   8.248 +		if (skb_shinfo(skb)->nr_frags) {
   8.249 +			int i;
   8.250 +			for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
   8.251 +				put_page(skb_shinfo(skb)->frags[i].page);
   8.252 +		}
   8.253 +
   8.254 +		if (skb_shinfo(skb)->frag_list)
   8.255 +			skb_drop_fraglist(skb);
   8.256 +
   8.257 +		kfree(skb->head);
   8.258 +	}
   8.259 +}
   8.260 +
   8.261 +/*
   8.262 + *	Free an skbuff by memory without cleaning the state.
   8.263 + */
   8.264 +void kfree_skbmem(struct sk_buff *skb)
   8.265 +{
   8.266 +	skb_release_data(skb);
   8.267 +	kmem_cache_free(skbuff_head_cache, skb);
   8.268 +}
   8.269 +
   8.270 +/**
   8.271 + *	__kfree_skb - private function
   8.272 + *	@skb: buffer
   8.273 + *
   8.274 + *	Free an sk_buff. Release anything attached to the buffer.
   8.275 + *	Clean the state. This is an internal helper function. Users should
   8.276 + *	always call kfree_skb
   8.277 + */
   8.278 +
   8.279 +void __kfree_skb(struct sk_buff *skb)
   8.280 +{
   8.281 +	if (skb->list) {
   8.282 +	 	printk(KERN_WARNING "Warning: kfree_skb passed an skb still "
   8.283 +		       "on a list (from %p).\n", NET_CALLER(skb));
   8.284 +		BUG();
   8.285 +	}
   8.286 +
   8.287 +	dst_release(skb->dst);
   8.288 +#ifdef CONFIG_XFRM
   8.289 +	secpath_put(skb->sp);
   8.290 +#endif
   8.291 +	if(skb->destructor) {
   8.292 +		if (in_irq())
   8.293 +			printk(KERN_WARNING "Warning: kfree_skb on "
   8.294 +					    "hard IRQ %p\n", NET_CALLER(skb));
   8.295 +		skb->destructor(skb);
   8.296 +	}
   8.297 +#ifdef CONFIG_NETFILTER
   8.298 +	nf_conntrack_put(skb->nfct);
   8.299 +#ifdef CONFIG_BRIDGE_NETFILTER
   8.300 +	nf_bridge_put(skb->nf_bridge);
   8.301 +#endif
   8.302 +#endif
   8.303 +/* XXX: IS this still necessary? - JHS */
   8.304 +#ifdef CONFIG_NET_SCHED
   8.305 +	skb->tc_index = 0;
   8.306 +#ifdef CONFIG_NET_CLS_ACT
   8.307 +	skb->tc_verd = 0;
   8.308 +	skb->tc_classid = 0;
   8.309 +#endif
   8.310 +#endif
   8.311 +
   8.312 +	kfree_skbmem(skb);
   8.313 +}
   8.314 +
   8.315 +/**
   8.316 + *	skb_clone	-	duplicate an sk_buff
   8.317 + *	@skb: buffer to clone
   8.318 + *	@gfp_mask: allocation priority
   8.319 + *
   8.320 + *	Duplicate an &sk_buff. The new one is not owned by a socket. Both
   8.321 + *	copies share the same packet data but not structure. The new
   8.322 + *	buffer has a reference count of 1. If the allocation fails the
   8.323 + *	function returns %NULL otherwise the new buffer is returned.
   8.324 + *
   8.325 + *	If this function is called from an interrupt gfp_mask() must be
   8.326 + *	%GFP_ATOMIC.
   8.327 + */
   8.328 +
   8.329 +struct sk_buff *skb_clone(struct sk_buff *skb, int gfp_mask)
   8.330 +{
   8.331 +	struct sk_buff *n = kmem_cache_alloc(skbuff_head_cache, gfp_mask);
   8.332 +
   8.333 +	if (!n) 
   8.334 +		return NULL;
   8.335 +
   8.336 +#define C(x) n->x = skb->x
   8.337 +
   8.338 +	n->next = n->prev = NULL;
   8.339 +	n->list = NULL;
   8.340 +	n->sk = NULL;
   8.341 +	C(stamp);
   8.342 +	C(dev);
   8.343 +	C(real_dev);
   8.344 +	C(h);
   8.345 +	C(nh);
   8.346 +	C(mac);
   8.347 +	C(dst);
   8.348 +	dst_clone(skb->dst);
   8.349 +	C(sp);
   8.350 +#ifdef CONFIG_INET
   8.351 +	secpath_get(skb->sp);
   8.352 +#endif
   8.353 +	memcpy(n->cb, skb->cb, sizeof(skb->cb));
   8.354 +	C(len);
   8.355 +	C(data_len);
   8.356 +	C(csum);
   8.357 +	C(local_df);
   8.358 +	n->cloned = 1;
   8.359 +	C(pkt_type);
   8.360 +	C(ip_summed);
   8.361 +	C(priority);
   8.362 +	C(protocol);
   8.363 +	C(security);
   8.364 +	n->destructor = NULL;
   8.365 +#ifdef CONFIG_NETFILTER
   8.366 +	C(nfmark);
   8.367 +	C(nfcache);
   8.368 +	C(nfct);
   8.369 +	nf_conntrack_get(skb->nfct);
   8.370 +#ifdef CONFIG_NETFILTER_DEBUG
   8.371 +	C(nf_debug);
   8.372 +#endif
   8.373 +#ifdef CONFIG_BRIDGE_NETFILTER
   8.374 +	C(nf_bridge);
   8.375 +	nf_bridge_get(skb->nf_bridge);
   8.376 +#endif
   8.377 +#endif /*CONFIG_NETFILTER*/
   8.378 +#if defined(CONFIG_HIPPI)
   8.379 +	C(private);
   8.380 +#endif
   8.381 +#ifdef CONFIG_NET_SCHED
   8.382 +	C(tc_index);
   8.383 +#ifdef CONFIG_NET_CLS_ACT
   8.384 +	n->tc_verd = SET_TC_VERD(skb->tc_verd,0);
   8.385 +	n->tc_verd = CLR_TC_OK2MUNGE(skb->tc_verd);
   8.386 +	n->tc_verd = CLR_TC_MUNGED(skb->tc_verd);
   8.387 +	C(input_dev);
   8.388 +	C(tc_classid);
   8.389 +#endif
   8.390 +
   8.391 +#endif
   8.392 +	C(truesize);
   8.393 +	atomic_set(&n->users, 1);
   8.394 +	C(head);
   8.395 +	C(data);
   8.396 +	C(tail);
   8.397 +	C(end);
   8.398 +
   8.399 +	atomic_inc(&(skb_shinfo(skb)->dataref));
   8.400 +	skb->cloned = 1;
   8.401 +
   8.402 +	return n;
   8.403 +}
   8.404 +
   8.405 +static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
   8.406 +{
   8.407 +	/*
   8.408 +	 *	Shift between the two data areas in bytes
   8.409 +	 */
   8.410 +	unsigned long offset = new->data - old->data;
   8.411 +
   8.412 +	new->list	= NULL;
   8.413 +	new->sk		= NULL;
   8.414 +	new->dev	= old->dev;
   8.415 +	new->real_dev	= old->real_dev;
   8.416 +	new->priority	= old->priority;
   8.417 +	new->protocol	= old->protocol;
   8.418 +	new->dst	= dst_clone(old->dst);
   8.419 +#ifdef CONFIG_INET
   8.420 +	new->sp		= secpath_get(old->sp);
   8.421 +#endif
   8.422 +	new->h.raw	= old->h.raw + offset;
   8.423 +	new->nh.raw	= old->nh.raw + offset;
   8.424 +	new->mac.raw	= old->mac.raw + offset;
   8.425 +	memcpy(new->cb, old->cb, sizeof(old->cb));
   8.426 +	new->local_df	= old->local_df;
   8.427 +	new->pkt_type	= old->pkt_type;
   8.428 +	new->stamp	= old->stamp;
   8.429 +	new->destructor = NULL;
   8.430 +	new->security	= old->security;
   8.431 +#ifdef CONFIG_NETFILTER
   8.432 +	new->nfmark	= old->nfmark;
   8.433 +	new->nfcache	= old->nfcache;
   8.434 +	new->nfct	= old->nfct;
   8.435 +	nf_conntrack_get(old->nfct);
   8.436 +#ifdef CONFIG_NETFILTER_DEBUG
   8.437 +	new->nf_debug	= old->nf_debug;
   8.438 +#endif
   8.439 +#ifdef CONFIG_BRIDGE_NETFILTER
   8.440 +	new->nf_bridge	= old->nf_bridge;
   8.441 +	nf_bridge_get(old->nf_bridge);
   8.442 +#endif
   8.443 +#endif
   8.444 +#ifdef CONFIG_NET_SCHED
   8.445 +#ifdef CONFIG_NET_CLS_ACT
   8.446 +	new->tc_verd = old->tc_verd;
   8.447 +#endif
   8.448 +	new->tc_index	= old->tc_index;
   8.449 +#endif
   8.450 +	atomic_set(&new->users, 1);
   8.451 +}
   8.452 +
   8.453 +/**
   8.454 + *	skb_copy	-	create private copy of an sk_buff
   8.455 + *	@skb: buffer to copy
   8.456 + *	@gfp_mask: allocation priority
   8.457 + *
   8.458 + *	Make a copy of both an &sk_buff and its data. This is used when the
   8.459 + *	caller wishes to modify the data and needs a private copy of the
   8.460 + *	data to alter. Returns %NULL on failure or the pointer to the buffer
   8.461 + *	on success. The returned buffer has a reference count of 1.
   8.462 + *
   8.463 + *	As by-product this function converts non-linear &sk_buff to linear
   8.464 + *	one, so that &sk_buff becomes completely private and caller is allowed
   8.465 + *	to modify all the data of returned buffer. This means that this
   8.466 + *	function is not recommended for use in circumstances when only
   8.467 + *	header is going to be modified. Use pskb_copy() instead.
   8.468 + */
   8.469 +
   8.470 +struct sk_buff *skb_copy(const struct sk_buff *skb, int gfp_mask)
   8.471 +{
   8.472 +	int headerlen = skb->data - skb->head;
   8.473 +	/*
   8.474 +	 *	Allocate the copy buffer
   8.475 +	 */
   8.476 +	struct sk_buff *n = alloc_skb(skb->end - skb->head + skb->data_len,
   8.477 +				      gfp_mask);
   8.478 +	if (!n)
   8.479 +		return NULL;
   8.480 +
   8.481 +	/* Set the data pointer */
   8.482 +	skb_reserve(n, headerlen);
   8.483 +	/* Set the tail pointer and length */
   8.484 +	skb_put(n, skb->len);
   8.485 +	n->csum	     = skb->csum;
   8.486 +	n->ip_summed = skb->ip_summed;
   8.487 +
   8.488 +	if (skb_copy_bits(skb, -headerlen, n->head, headerlen + skb->len))
   8.489 +		BUG();
   8.490 +
   8.491 +	copy_skb_header(n, skb);
   8.492 +	return n;
   8.493 +}
   8.494 +
   8.495 +
   8.496 +/**
   8.497 + *	pskb_copy	-	create copy of an sk_buff with private head.
   8.498 + *	@skb: buffer to copy
   8.499 + *	@gfp_mask: allocation priority
   8.500 + *
   8.501 + *	Make a copy of both an &sk_buff and part of its data, located
   8.502 + *	in header. Fragmented data remain shared. This is used when
   8.503 + *	the caller wishes to modify only header of &sk_buff and needs
   8.504 + *	private copy of the header to alter. Returns %NULL on failure
   8.505 + *	or the pointer to the buffer on success.
   8.506 + *	The returned buffer has a reference count of 1.
   8.507 + */
   8.508 +
   8.509 +struct sk_buff *pskb_copy(struct sk_buff *skb, int gfp_mask)
   8.510 +{
   8.511 +	/*
   8.512 +	 *	Allocate the copy buffer
   8.513 +	 */
   8.514 +	struct sk_buff *n = alloc_skb(skb->end - skb->head, gfp_mask);
   8.515 +
   8.516 +	if (!n)
   8.517 +		goto out;
   8.518 +
   8.519 +	/* Set the data pointer */
   8.520 +	skb_reserve(n, skb->data - skb->head);
   8.521 +	/* Set the tail pointer and length */
   8.522 +	skb_put(n, skb_headlen(skb));
   8.523 +	/* Copy the bytes */
   8.524 +	memcpy(n->data, skb->data, n->len);
   8.525 +	n->csum	     = skb->csum;
   8.526 +	n->ip_summed = skb->ip_summed;
   8.527 +
   8.528 +	n->data_len  = skb->data_len;
   8.529 +	n->len	     = skb->len;
   8.530 +
   8.531 +	if (skb_shinfo(skb)->nr_frags) {
   8.532 +		int i;
   8.533 +
   8.534 +		for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
   8.535 +			skb_shinfo(n)->frags[i] = skb_shinfo(skb)->frags[i];
   8.536 +			get_page(skb_shinfo(n)->frags[i].page);
   8.537 +		}
   8.538 +		skb_shinfo(n)->nr_frags = i;
   8.539 +	}
   8.540 +	skb_shinfo(n)->tso_size = skb_shinfo(skb)->tso_size;
   8.541 +	skb_shinfo(n)->tso_segs = skb_shinfo(skb)->tso_segs;
   8.542 +
   8.543 +	if (skb_shinfo(skb)->frag_list) {
   8.544 +		skb_shinfo(n)->frag_list = skb_shinfo(skb)->frag_list;
   8.545 +		skb_clone_fraglist(n);
   8.546 +	}
   8.547 +
   8.548 +	copy_skb_header(n, skb);
   8.549 +out:
   8.550 +	return n;
   8.551 +}
   8.552 +
   8.553 +/**
   8.554 + *	pskb_expand_head - reallocate header of &sk_buff
   8.555 + *	@skb: buffer to reallocate
   8.556 + *	@nhead: room to add at head
   8.557 + *	@ntail: room to add at tail
   8.558 + *	@gfp_mask: allocation priority
   8.559 + *
   8.560 + *	Expands (or creates identical copy, if &nhead and &ntail are zero)
   8.561 + *	header of skb. &sk_buff itself is not changed. &sk_buff MUST have
   8.562 + *	reference count of 1. Returns zero in the case of success or error,
   8.563 + *	if expansion failed. In the last case, &sk_buff is not changed.
   8.564 + *
   8.565 + *	All the pointers pointing into skb header may change and must be
   8.566 + *	reloaded after call to this function.
   8.567 + */
   8.568 +
   8.569 +int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail, int gfp_mask)
   8.570 +{
   8.571 +	int i;
   8.572 +	u8 *data;
   8.573 +	int size = nhead + (skb->end - skb->head) + ntail;
   8.574 +	long off;
   8.575 +
   8.576 +	if (skb_shared(skb))
   8.577 +		BUG();
   8.578 +
   8.579 +	size = SKB_DATA_ALIGN(size);
   8.580 +
   8.581 +	data = kmalloc(size + sizeof(struct skb_shared_info), gfp_mask);
   8.582 +	if (!data)
   8.583 +		goto nodata;
   8.584 +
   8.585 +	/* Copy only real data... and, alas, header. This should be
   8.586 +	 * optimized for the cases when header is void. */
   8.587 +	memcpy(data + nhead, skb->head, skb->tail - skb->head);
   8.588 +	memcpy(data + size, skb->end, sizeof(struct skb_shared_info));
   8.589 +
   8.590 +	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
   8.591 +		get_page(skb_shinfo(skb)->frags[i].page);
   8.592 +
   8.593 +	if (skb_shinfo(skb)->frag_list)
   8.594 +		skb_clone_fraglist(skb);
   8.595 +
   8.596 +	skb_release_data(skb);
   8.597 +
   8.598 +	off = (data + nhead) - skb->head;
   8.599 +
   8.600 +	skb->head     = data;
   8.601 +	skb->end      = data + size;
   8.602 +	skb->data    += off;
   8.603 +	skb->tail    += off;
   8.604 +	skb->mac.raw += off;
   8.605 +	skb->h.raw   += off;
   8.606 +	skb->nh.raw  += off;
   8.607 +	skb->cloned   = 0;
   8.608 +	atomic_set(&skb_shinfo(skb)->dataref, 1);
   8.609 +	return 0;
   8.610 +
   8.611 +nodata:
   8.612 +	return -ENOMEM;
   8.613 +}
   8.614 +
   8.615 +/* Make private copy of skb with writable head and some headroom */
   8.616 +
   8.617 +struct sk_buff *skb_realloc_headroom(struct sk_buff *skb, unsigned int headroom)
   8.618 +{
   8.619 +	struct sk_buff *skb2;
   8.620 +	int delta = headroom - skb_headroom(skb);
   8.621 +
   8.622 +	if (delta <= 0)
   8.623 +		skb2 = pskb_copy(skb, GFP_ATOMIC);
   8.624 +	else {
   8.625 +		skb2 = skb_clone(skb, GFP_ATOMIC);
   8.626 +		if (skb2 && pskb_expand_head(skb2, SKB_DATA_ALIGN(delta), 0,
   8.627 +					     GFP_ATOMIC)) {
   8.628 +			kfree_skb(skb2);
   8.629 +			skb2 = NULL;
   8.630 +		}
   8.631 +	}
   8.632 +	return skb2;
   8.633 +}
   8.634 +
   8.635 +
   8.636 +/**
   8.637 + *	skb_copy_expand	-	copy and expand sk_buff
   8.638 + *	@skb: buffer to copy
   8.639 + *	@newheadroom: new free bytes at head
   8.640 + *	@newtailroom: new free bytes at tail
   8.641 + *	@gfp_mask: allocation priority
   8.642 + *
   8.643 + *	Make a copy of both an &sk_buff and its data and while doing so
   8.644 + *	allocate additional space.
   8.645 + *
   8.646 + *	This is used when the caller wishes to modify the data and needs a
   8.647 + *	private copy of the data to alter as well as more space for new fields.
   8.648 + *	Returns %NULL on failure or the pointer to the buffer
   8.649 + *	on success. The returned buffer has a reference count of 1.
   8.650 + *
   8.651 + *	You must pass %GFP_ATOMIC as the allocation priority if this function
   8.652 + *	is called from an interrupt.
   8.653 + *
   8.654 + *	BUG ALERT: ip_summed is not copied. Why does this work? Is it used
   8.655 + *	only by netfilter in the cases when checksum is recalculated? --ANK
   8.656 + */
   8.657 +struct sk_buff *skb_copy_expand(const struct sk_buff *skb,
   8.658 +				int newheadroom, int newtailroom, int gfp_mask)
   8.659 +{
   8.660 +	/*
   8.661 +	 *	Allocate the copy buffer
   8.662 +	 */
   8.663 +	struct sk_buff *n = alloc_skb(newheadroom + skb->len + newtailroom,
   8.664 +				      gfp_mask);
   8.665 +	int head_copy_len, head_copy_off;
   8.666 +
   8.667 +	if (!n)
   8.668 +		return NULL;
   8.669 +
   8.670 +	skb_reserve(n, newheadroom);
   8.671 +
   8.672 +	/* Set the tail pointer and length */
   8.673 +	skb_put(n, skb->len);
   8.674 +
   8.675 +	head_copy_len = skb_headroom(skb);
   8.676 +	head_copy_off = 0;
   8.677 +	if (newheadroom <= head_copy_len)
   8.678 +		head_copy_len = newheadroom;
   8.679 +	else
   8.680 +		head_copy_off = newheadroom - head_copy_len;
   8.681 +
   8.682 +	/* Copy the linear header and data. */
   8.683 +	if (skb_copy_bits(skb, -head_copy_len, n->head + head_copy_off,
   8.684 +			  skb->len + head_copy_len))
   8.685 +		BUG();
   8.686 +
   8.687 +	copy_skb_header(n, skb);
   8.688 +	skb_shinfo(n)->tso_size = skb_shinfo(skb)->tso_size;
   8.689 +	skb_shinfo(n)->tso_segs = skb_shinfo(skb)->tso_segs;
   8.690 +
   8.691 +	return n;
   8.692 +}
   8.693 +
   8.694 +/**
   8.695 + *	skb_pad			-	zero pad the tail of an skb
   8.696 + *	@skb: buffer to pad
   8.697 + *	@pad: space to pad
   8.698 + *
   8.699 + *	Ensure that a buffer is followed by a padding area that is zero
   8.700 + *	filled. Used by network drivers which may DMA or transfer data
   8.701 + *	beyond the buffer end onto the wire.
   8.702 + *
   8.703 + *	May return NULL in out of memory cases.
   8.704 + */
   8.705 + 
   8.706 +struct sk_buff *skb_pad(struct sk_buff *skb, int pad)
   8.707 +{
   8.708 +	struct sk_buff *nskb;
   8.709 +	
   8.710 +	/* If the skbuff is non linear tailroom is always zero.. */
   8.711 +	if (skb_tailroom(skb) >= pad) {
   8.712 +		memset(skb->data+skb->len, 0, pad);
   8.713 +		return skb;
   8.714 +	}
   8.715 +	
   8.716 +	nskb = skb_copy_expand(skb, skb_headroom(skb), skb_tailroom(skb) + pad, GFP_ATOMIC);
   8.717 +	kfree_skb(skb);
   8.718 +	if (nskb)
   8.719 +		memset(nskb->data+nskb->len, 0, pad);
   8.720 +	return nskb;
   8.721 +}	
   8.722 + 
   8.723 +/* Trims skb to length len. It can change skb pointers, if "realloc" is 1.
   8.724 + * If realloc==0 and trimming is impossible without change of data,
   8.725 + * it is BUG().
   8.726 + */
   8.727 +
   8.728 +int ___pskb_trim(struct sk_buff *skb, unsigned int len, int realloc)
   8.729 +{
   8.730 +	int offset = skb_headlen(skb);
   8.731 +	int nfrags = skb_shinfo(skb)->nr_frags;
   8.732 +	int i;
   8.733 +
   8.734 +	for (i = 0; i < nfrags; i++) {
   8.735 +		int end = offset + skb_shinfo(skb)->frags[i].size;
   8.736 +		if (end > len) {
   8.737 +			if (skb_cloned(skb)) {
   8.738 +				if (!realloc)
   8.739 +					BUG();
   8.740 +				if (pskb_expand_head(skb, 0, 0, GFP_ATOMIC))
   8.741 +					return -ENOMEM;
   8.742 +			}
   8.743 +			if (len <= offset) {
   8.744 +				put_page(skb_shinfo(skb)->frags[i].page);
   8.745 +				skb_shinfo(skb)->nr_frags--;
   8.746 +			} else {
   8.747 +				skb_shinfo(skb)->frags[i].size = len - offset;
   8.748 +			}
   8.749 +		}
   8.750 +		offset = end;
   8.751 +	}
   8.752 +
   8.753 +	if (offset < len) {
   8.754 +		skb->data_len -= skb->len - len;
   8.755 +		skb->len       = len;
   8.756 +	} else {
   8.757 +		if (len <= skb_headlen(skb)) {
   8.758 +			skb->len      = len;
   8.759 +			skb->data_len = 0;
   8.760 +			skb->tail     = skb->data + len;
   8.761 +			if (skb_shinfo(skb)->frag_list && !skb_cloned(skb))
   8.762 +				skb_drop_fraglist(skb);
   8.763 +		} else {
   8.764 +			skb->data_len -= skb->len - len;
   8.765 +			skb->len       = len;
   8.766 +		}
   8.767 +	}
   8.768 +
   8.769 +	return 0;
   8.770 +}
   8.771 +
   8.772 +/**
   8.773 + *	__pskb_pull_tail - advance tail of skb header
   8.774 + *	@skb: buffer to reallocate
   8.775 + *	@delta: number of bytes to advance tail
   8.776 + *
   8.777 + *	The function makes a sense only on a fragmented &sk_buff,
   8.778 + *	it expands header moving its tail forward and copying necessary
   8.779 + *	data from fragmented part.
   8.780 + *
   8.781 + *	&sk_buff MUST have reference count of 1.
   8.782 + *
   8.783 + *	Returns %NULL (and &sk_buff does not change) if pull failed
   8.784 + *	or value of new tail of skb in the case of success.
   8.785 + *
   8.786 + *	All the pointers pointing into skb header may change and must be
   8.787 + *	reloaded after call to this function.
   8.788 + */
   8.789 +
   8.790 +/* Moves tail of skb head forward, copying data from fragmented part,
   8.791 + * when it is necessary.
   8.792 + * 1. It may fail due to malloc failure.
   8.793 + * 2. It may change skb pointers.
   8.794 + *
   8.795 + * It is pretty complicated. Luckily, it is called only in exceptional cases.
   8.796 + */
   8.797 +unsigned char *__pskb_pull_tail(struct sk_buff *skb, int delta)
   8.798 +{
   8.799 +	/* If skb has not enough free space at tail, get new one
   8.800 +	 * plus 128 bytes for future expansions. If we have enough
   8.801 +	 * room at tail, reallocate without expansion only if skb is cloned.
   8.802 +	 */
   8.803 +	int i, k, eat = (skb->tail + delta) - skb->end;
   8.804 +
   8.805 +	if (eat > 0 || skb_cloned(skb)) {
   8.806 +		if (pskb_expand_head(skb, 0, eat > 0 ? eat + 128 : 0,
   8.807 +				     GFP_ATOMIC))
   8.808 +			return NULL;
   8.809 +	}
   8.810 +
   8.811 +	if (skb_copy_bits(skb, skb_headlen(skb), skb->tail, delta))
   8.812 +		BUG();
   8.813 +
   8.814 +	/* Optimization: no fragments, no reasons to preestimate
   8.815 +	 * size of pulled pages. Superb.
   8.816 +	 */
   8.817 +	if (!skb_shinfo(skb)->frag_list)
   8.818 +		goto pull_pages;
   8.819 +
   8.820 +	/* Estimate size of pulled pages. */
   8.821 +	eat = delta;
   8.822 +	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
   8.823 +		if (skb_shinfo(skb)->frags[i].size >= eat)
   8.824 +			goto pull_pages;
   8.825 +		eat -= skb_shinfo(skb)->frags[i].size;
   8.826 +	}
   8.827 +
   8.828 +	/* If we need update frag list, we are in troubles.
   8.829 +	 * Certainly, it possible to add an offset to skb data,
   8.830 +	 * but taking into account that pulling is expected to
   8.831 +	 * be very rare operation, it is worth to fight against
   8.832 +	 * further bloating skb head and crucify ourselves here instead.
   8.833 +	 * Pure masohism, indeed. 8)8)
   8.834 +	 */
   8.835 +	if (eat) {
   8.836 +		struct sk_buff *list = skb_shinfo(skb)->frag_list;
   8.837 +		struct sk_buff *clone = NULL;
   8.838 +		struct sk_buff *insp = NULL;
   8.839 +
   8.840 +		do {
   8.841 +			if (!list)
   8.842 +				BUG();
   8.843 +
   8.844 +			if (list->len <= eat) {
   8.845 +				/* Eaten as whole. */
   8.846 +				eat -= list->len;
   8.847 +				list = list->next;
   8.848 +				insp = list;
   8.849 +			} else {
   8.850 +				/* Eaten partially. */
   8.851 +
   8.852 +				if (skb_shared(list)) {
   8.853 +					/* Sucks! We need to fork list. :-( */
   8.854 +					clone = skb_clone(list, GFP_ATOMIC);
   8.855 +					if (!clone)
   8.856 +						return NULL;
   8.857 +					insp = list->next;
   8.858 +					list = clone;
   8.859 +				} else {
   8.860 +					/* This may be pulled without
   8.861 +					 * problems. */
   8.862 +					insp = list;
   8.863 +				}
   8.864 +				if (!pskb_pull(list, eat)) {
   8.865 +					if (clone)
   8.866 +						kfree_skb(clone);
   8.867 +					return NULL;
   8.868 +				}
   8.869 +				break;
   8.870 +			}
   8.871 +		} while (eat);
   8.872 +
   8.873 +		/* Free pulled out fragments. */
   8.874 +		while ((list = skb_shinfo(skb)->frag_list) != insp) {
   8.875 +			skb_shinfo(skb)->frag_list = list->next;
   8.876 +			kfree_skb(list);
   8.877 +		}
   8.878 +		/* And insert new clone at head. */
   8.879 +		if (clone) {
   8.880 +			clone->next = list;
   8.881 +			skb_shinfo(skb)->frag_list = clone;
   8.882 +		}
   8.883 +	}
   8.884 +	/* Success! Now we may commit changes to skb data. */
   8.885 +
   8.886 +pull_pages:
   8.887 +	eat = delta;
   8.888 +	k = 0;
   8.889 +	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
   8.890 +		if (skb_shinfo(skb)->frags[i].size <= eat) {
   8.891 +			put_page(skb_shinfo(skb)->frags[i].page);
   8.892 +			eat -= skb_shinfo(skb)->frags[i].size;
   8.893 +		} else {
   8.894 +			skb_shinfo(skb)->frags[k] = skb_shinfo(skb)->frags[i];
   8.895 +			if (eat) {
   8.896 +				skb_shinfo(skb)->frags[k].page_offset += eat;
   8.897 +				skb_shinfo(skb)->frags[k].size -= eat;
   8.898 +				eat = 0;
   8.899 +			}
   8.900 +			k++;
   8.901 +		}
   8.902 +	}
   8.903 +	skb_shinfo(skb)->nr_frags = k;
   8.904 +
   8.905 +	skb->tail     += delta;
   8.906 +	skb->data_len -= delta;
   8.907 +
   8.908 +	return skb->tail;
   8.909 +}
   8.910 +
   8.911 +/* Copy some data bits from skb to kernel buffer. */
   8.912 +
   8.913 +int skb_copy_bits(const struct sk_buff *skb, int offset, void *to, int len)
   8.914 +{
   8.915 +	int i, copy;
   8.916 +	int start = skb_headlen(skb);
   8.917 +
   8.918 +	if (offset > (int)skb->len - len)
   8.919 +		goto fault;
   8.920 +
   8.921 +	/* Copy header. */
   8.922 +	if ((copy = start - offset) > 0) {
   8.923 +		if (copy > len)
   8.924 +			copy = len;
   8.925 +		memcpy(to, skb->data + offset, copy);
   8.926 +		if ((len -= copy) == 0)
   8.927 +			return 0;
   8.928 +		offset += copy;
   8.929 +		to     += copy;
   8.930 +	}
   8.931 +
   8.932 +	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
   8.933 +		int end;
   8.934 +
   8.935 +		BUG_TRAP(start <= offset + len);
   8.936 +
   8.937 +		end = start + skb_shinfo(skb)->frags[i].size;
   8.938 +		if ((copy = end - offset) > 0) {
   8.939 +			u8 *vaddr;
   8.940 +
   8.941 +			if (copy > len)
   8.942 +				copy = len;
   8.943 +
   8.944 +			vaddr = kmap_skb_frag(&skb_shinfo(skb)->frags[i]);
   8.945 +			memcpy(to,
   8.946 +			       vaddr + skb_shinfo(skb)->frags[i].page_offset+
   8.947 +			       offset - start, copy);
   8.948 +			kunmap_skb_frag(vaddr);
   8.949 +
   8.950 +			if ((len -= copy) == 0)
   8.951 +				return 0;
   8.952 +			offset += copy;
   8.953 +			to     += copy;
   8.954 +		}
   8.955 +		start = end;
   8.956 +	}
   8.957 +
   8.958 +	if (skb_shinfo(skb)->frag_list) {
   8.959 +		struct sk_buff *list = skb_shinfo(skb)->frag_list;
   8.960 +
   8.961 +		for (; list; list = list->next) {
   8.962 +			int end;
   8.963 +
   8.964 +			BUG_TRAP(start <= offset + len);
   8.965 +
   8.966 +			end = start + list->len;
   8.967 +			if ((copy = end - offset) > 0) {
   8.968 +				if (copy > len)
   8.969 +					copy = len;
   8.970 +				if (skb_copy_bits(list, offset - start,
   8.971 +						  to, copy))
   8.972 +					goto fault;
   8.973 +				if ((len -= copy) == 0)
   8.974 +					return 0;
   8.975 +				offset += copy;
   8.976 +				to     += copy;
   8.977 +			}
   8.978 +			start = end;
   8.979 +		}
   8.980 +	}
   8.981 +	if (!len)
   8.982 +		return 0;
   8.983 +
   8.984 +fault:
   8.985 +	return -EFAULT;
   8.986 +}
   8.987 +
   8.988 +/* Keep iterating until skb_iter_next returns false. */
   8.989 +void skb_iter_first(const struct sk_buff *skb, struct skb_iter *i)
   8.990 +{
   8.991 +	i->len = skb_headlen(skb);
   8.992 +	i->data = (unsigned char *)skb->data;
   8.993 +	i->nextfrag = 0;
   8.994 +	i->fraglist = NULL;
   8.995 +}
   8.996 +
   8.997 +int skb_iter_next(const struct sk_buff *skb, struct skb_iter *i)
   8.998 +{
   8.999 +	/* Unmap previous, if not head fragment. */
  8.1000 +	if (i->nextfrag)
  8.1001 +		kunmap_skb_frag(i->data);
  8.1002 +
  8.1003 +	if (i->fraglist) {
  8.1004 +	fraglist:
  8.1005 +		/* We're iterating through fraglist. */
  8.1006 +		if (i->nextfrag < skb_shinfo(i->fraglist)->nr_frags) {
  8.1007 +			i->data = kmap_skb_frag(&skb_shinfo(i->fraglist)
  8.1008 +						->frags[i->nextfrag]);
  8.1009 +			i->len = skb_shinfo(i->fraglist)->frags[i->nextfrag]
  8.1010 +				.size;
  8.1011 +			i->nextfrag++;
  8.1012 +			return 1;
  8.1013 +		}
  8.1014 +		/* Fragments with fragments?  Too hard! */
  8.1015 +		BUG_ON(skb_shinfo(i->fraglist)->frag_list);
  8.1016 +		i->fraglist = i->fraglist->next;
  8.1017 +		if (!i->fraglist)
  8.1018 +			goto end;
  8.1019 +
  8.1020 +		i->len = skb_headlen(i->fraglist);
  8.1021 +		i->data = i->fraglist->data;
  8.1022 +		i->nextfrag = 0;
  8.1023 +		return 1;
  8.1024 +	}
  8.1025 +
  8.1026 +	if (i->nextfrag < skb_shinfo(skb)->nr_frags) {
  8.1027 +		i->data = kmap_skb_frag(&skb_shinfo(skb)->frags[i->nextfrag]);
  8.1028 +		i->len = skb_shinfo(skb)->frags[i->nextfrag].size;
  8.1029 +		i->nextfrag++;
  8.1030 +		return 1;
  8.1031 +	}
  8.1032 +
  8.1033 +	i->fraglist = skb_shinfo(skb)->frag_list;
  8.1034 +	if (i->fraglist)
  8.1035 +		goto fraglist;
  8.1036 +
  8.1037 +end:
  8.1038 +	/* Bug trap for callers */
  8.1039 +	i->data = NULL;
  8.1040 +	return 0;
  8.1041 +}
  8.1042 +
  8.1043 +void skb_iter_abort(const struct sk_buff *skb, struct skb_iter *i)
  8.1044 +{
  8.1045 +	/* Unmap previous, if not head fragment. */
  8.1046 +	if (i->data && i->nextfrag)
  8.1047 +		kunmap_skb_frag(i->data);
  8.1048 +	/* Bug trap for callers */
  8.1049 +	i->data = NULL;
  8.1050 +}
  8.1051 +
  8.1052 +/* Checksum skb data. */
  8.1053 +
  8.1054 +unsigned int skb_checksum(const struct sk_buff *skb, int offset,
  8.1055 +			  int len, unsigned int csum)
  8.1056 +{
  8.1057 +	int start = skb_headlen(skb);
  8.1058 +	int i, copy = start - offset;
  8.1059 +	int pos = 0;
  8.1060 +
  8.1061 +	/* Checksum header. */
  8.1062 +	if (copy > 0) {
  8.1063 +		if (copy > len)
  8.1064 +			copy = len;
  8.1065 +		csum = csum_partial(skb->data + offset, copy, csum);
  8.1066 +		if ((len -= copy) == 0)
  8.1067 +			return csum;
  8.1068 +		offset += copy;
  8.1069 +		pos	= copy;
  8.1070 +	}
  8.1071 +
  8.1072 +	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
  8.1073 +		int end;
  8.1074 +
  8.1075 +		BUG_TRAP(start <= offset + len);
  8.1076 +
  8.1077 +		end = start + skb_shinfo(skb)->frags[i].size;
  8.1078 +		if ((copy = end - offset) > 0) {
  8.1079 +			unsigned int csum2;
  8.1080 +			u8 *vaddr;
  8.1081 +			skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
  8.1082 +
  8.1083 +			if (copy > len)
  8.1084 +				copy = len;
  8.1085 +			vaddr = kmap_skb_frag(frag);
  8.1086 +			csum2 = csum_partial(vaddr + frag->page_offset +
  8.1087 +					     offset - start, copy, 0);
  8.1088 +			kunmap_skb_frag(vaddr);
  8.1089 +			csum = csum_block_add(csum, csum2, pos);
  8.1090 +			if (!(len -= copy))
  8.1091 +				return csum;
  8.1092 +			offset += copy;
  8.1093 +			pos    += copy;
  8.1094 +		}
  8.1095 +		start = end;
  8.1096 +	}
  8.1097 +
  8.1098 +	if (skb_shinfo(skb)->frag_list) {
  8.1099 +		struct sk_buff *list = skb_shinfo(skb)->frag_list;
  8.1100 +
  8.1101 +		for (; list; list = list->next) {
  8.1102 +			int end;
  8.1103 +
  8.1104 +			BUG_TRAP(start <= offset + len);
  8.1105 +
  8.1106 +			end = start + list->len;
  8.1107 +			if ((copy = end - offset) > 0) {
  8.1108 +				unsigned int csum2;
  8.1109 +				if (copy > len)
  8.1110 +					copy = len;
  8.1111 +				csum2 = skb_checksum(list, offset - start,
  8.1112 +						     copy, 0);
  8.1113 +				csum = csum_block_add(csum, csum2, pos);
  8.1114 +				if ((len -= copy) == 0)
  8.1115 +					return csum;
  8.1116 +				offset += copy;
  8.1117 +				pos    += copy;
  8.1118 +			}
  8.1119 +			start = end;
  8.1120 +		}
  8.1121 +	}
  8.1122 +	if (len)
  8.1123 +		BUG();
  8.1124 +
  8.1125 +	return csum;
  8.1126 +}
  8.1127 +
  8.1128 +/* Both of above in one bottle. */
  8.1129 +
  8.1130 +unsigned int skb_copy_and_csum_bits(const struct sk_buff *skb, int offset,
  8.1131 +				    u8 *to, int len, unsigned int csum)
  8.1132 +{
  8.1133 +	int start = skb_headlen(skb);
  8.1134 +	int i, copy = start - offset;
  8.1135 +	int pos = 0;
  8.1136 +
  8.1137 +	/* Copy header. */
  8.1138 +	if (copy > 0) {
  8.1139 +		if (copy > len)
  8.1140 +			copy = len;
  8.1141 +		csum = csum_partial_copy_nocheck(skb->data + offset, to,
  8.1142 +						 copy, csum);
  8.1143 +		if ((len -= copy) == 0)
  8.1144 +			return csum;
  8.1145 +		offset += copy;
  8.1146 +		to     += copy;
  8.1147 +		pos	= copy;
  8.1148 +	}
  8.1149 +
  8.1150 +	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
  8.1151 +		int end;
  8.1152 +
  8.1153 +		BUG_TRAP(start <= offset + len);
  8.1154 +
  8.1155 +		end = start + skb_shinfo(skb)->frags[i].size;
  8.1156 +		if ((copy = end - offset) > 0) {
  8.1157 +			unsigned int csum2;
  8.1158 +			u8 *vaddr;
  8.1159 +			skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
  8.1160 +
  8.1161 +			if (copy > len)
  8.1162 +				copy = len;
  8.1163 +			vaddr = kmap_skb_frag(frag);
  8.1164 +			csum2 = csum_partial_copy_nocheck(vaddr +
  8.1165 +							  frag->page_offset +
  8.1166 +							  offset - start, to,
  8.1167 +							  copy, 0);
  8.1168 +			kunmap_skb_frag(vaddr);
  8.1169 +			csum = csum_block_add(csum, csum2, pos);
  8.1170 +			if (!(len -= copy))
  8.1171 +				return csum;
  8.1172 +			offset += copy;
  8.1173 +			to     += copy;
  8.1174 +			pos    += copy;
  8.1175 +		}
  8.1176 +		start = end;
  8.1177 +	}
  8.1178 +
  8.1179 +	if (skb_shinfo(skb)->frag_list) {
  8.1180 +		struct sk_buff *list = skb_shinfo(skb)->frag_list;
  8.1181 +
  8.1182 +		for (; list; list = list->next) {
  8.1183 +			unsigned int csum2;
  8.1184 +			int end;
  8.1185 +
  8.1186 +			BUG_TRAP(start <= offset + len);
  8.1187 +
  8.1188 +			end = start + list->len;
  8.1189 +			if ((copy = end - offset) > 0) {
  8.1190 +				if (copy > len)
  8.1191 +					copy = len;
  8.1192 +				csum2 = skb_copy_and_csum_bits(list,
  8.1193 +							       offset - start,
  8.1194 +							       to, copy, 0);
  8.1195 +				csum = csum_block_add(csum, csum2, pos);
  8.1196 +				if ((len -= copy) == 0)
  8.1197 +					return csum;
  8.1198 +				offset += copy;
  8.1199 +				to     += copy;
  8.1200 +				pos    += copy;
  8.1201 +			}
  8.1202 +			start = end;
  8.1203 +		}
  8.1204 +	}
  8.1205 +	if (len)
  8.1206 +		BUG();
  8.1207 +	return csum;
  8.1208 +}
  8.1209 +
  8.1210 +void skb_copy_and_csum_dev(const struct sk_buff *skb, u8 *to)
  8.1211 +{
  8.1212 +	unsigned int csum;
  8.1213 +	long csstart;
  8.1214 +
  8.1215 +	if (skb->ip_summed == CHECKSUM_HW)
  8.1216 +		csstart = skb->h.raw - skb->data;
  8.1217 +	else
  8.1218 +		csstart = skb_headlen(skb);
  8.1219 +
  8.1220 +	if (csstart > skb_headlen(skb))
  8.1221 +		BUG();
  8.1222 +
  8.1223 +	memcpy(to, skb->data, csstart);
  8.1224 +
  8.1225 +	csum = 0;
  8.1226 +	if (csstart != skb->len)
  8.1227 +		csum = skb_copy_and_csum_bits(skb, csstart, to + csstart,
  8.1228 +					      skb->len - csstart, 0);
  8.1229 +
  8.1230 +	if (skb->ip_summed == CHECKSUM_HW) {
  8.1231 +		long csstuff = csstart + skb->csum;
  8.1232 +
  8.1233 +		*((unsigned short *)(to + csstuff)) = csum_fold(csum);
  8.1234 +	}
  8.1235 +}
  8.1236 +
  8.1237 +/**
  8.1238 + *	skb_dequeue - remove from the head of the queue
  8.1239 + *	@list: list to dequeue from
  8.1240 + *
  8.1241 + *	Remove the head of the list. The list lock is taken so the function
  8.1242 + *	may be used safely with other locking list functions. The head item is
  8.1243 + *	returned or %NULL if the list is empty.
  8.1244 + */
  8.1245 +
  8.1246 +struct sk_buff *skb_dequeue(struct sk_buff_head *list)
  8.1247 +{
  8.1248 +	unsigned long flags;
  8.1249 +	struct sk_buff *result;
  8.1250 +
  8.1251 +	spin_lock_irqsave(&list->lock, flags);
  8.1252 +	result = __skb_dequeue(list);
  8.1253 +	spin_unlock_irqrestore(&list->lock, flags);
  8.1254 +	return result;
  8.1255 +}
  8.1256 +
  8.1257 +/**
  8.1258 + *	skb_dequeue_tail - remove from the tail of the queue
  8.1259 + *	@list: list to dequeue from
  8.1260 + *
  8.1261 + *	Remove the tail of the list. The list lock is taken so the function
  8.1262 + *	may be used safely with other locking list functions. The tail item is
  8.1263 + *	returned or %NULL if the list is empty.
  8.1264 + */
  8.1265 +struct sk_buff *skb_dequeue_tail(struct sk_buff_head *list)
  8.1266 +{
  8.1267 +	unsigned long flags;
  8.1268 +	struct sk_buff *result;
  8.1269 +
  8.1270 +	spin_lock_irqsave(&list->lock, flags);
  8.1271 +	result = __skb_dequeue_tail(list);
  8.1272 +	spin_unlock_irqrestore(&list->lock, flags);
  8.1273 +	return result;
  8.1274 +}
  8.1275 +
  8.1276 +/**
  8.1277 + *	skb_queue_purge - empty a list
  8.1278 + *	@list: list to empty
  8.1279 + *
  8.1280 + *	Delete all buffers on an &sk_buff list. Each buffer is removed from
  8.1281 + *	the list and one reference dropped. This function takes the list
  8.1282 + *	lock and is atomic with respect to other list locking functions.
  8.1283 + */
  8.1284 +void skb_queue_purge(struct sk_buff_head *list)
  8.1285 +{
  8.1286 +	struct sk_buff *skb;
  8.1287 +	while ((skb = skb_dequeue(list)) != NULL)
  8.1288 +		kfree_skb(skb);
  8.1289 +}
  8.1290 +
  8.1291 +/**
  8.1292 + *	skb_queue_head - queue a buffer at the list head
  8.1293 + *	@list: list to use
  8.1294 + *	@newsk: buffer to queue
  8.1295 + *
  8.1296 + *	Queue a buffer at the start of the list. This function takes the
  8.1297 + *	list lock and can be used safely with other locking &sk_buff functions
  8.1298 + *	safely.
  8.1299 + *
  8.1300 + *	A buffer cannot be placed on two lists at the same time.
  8.1301 + */
  8.1302 +void skb_queue_head(struct sk_buff_head *list, struct sk_buff *newsk)
  8.1303 +{
  8.1304 +	unsigned long flags;
  8.1305 +
  8.1306 +	spin_lock_irqsave(&list->lock, flags);
  8.1307 +	__skb_queue_head(list, newsk);
  8.1308 +	spin_unlock_irqrestore(&list->lock, flags);
  8.1309 +}
  8.1310 +
  8.1311 +/**
  8.1312 + *	skb_queue_tail - queue a buffer at the list tail
  8.1313 + *	@list: list to use
  8.1314 + *	@newsk: buffer to queue
  8.1315 + *
  8.1316 + *	Queue a buffer at the tail of the list. This function takes the
  8.1317 + *	list lock and can be used safely with other locking &sk_buff functions
  8.1318 + *	safely.
  8.1319 + *
  8.1320 + *	A buffer cannot be placed on two lists at the same time.
  8.1321 + */
  8.1322 +void skb_queue_tail(struct sk_buff_head *list, struct sk_buff *newsk)
  8.1323 +{
  8.1324 +	unsigned long flags;
  8.1325 +
  8.1326 +	spin_lock_irqsave(&list->lock, flags);
  8.1327 +	__skb_queue_tail(list, newsk);
  8.1328 +	spin_unlock_irqrestore(&list->lock, flags);
  8.1329 +}
  8.1330 +/**
  8.1331 + *	skb_unlink	-	remove a buffer from a list
  8.1332 + *	@skb: buffer to remove
  8.1333 + *
  8.1334 + *	Place a packet after a given packet in a list. The list locks are taken
  8.1335 + *	and this function is atomic with respect to other list locked calls
  8.1336 + *
  8.1337 + *	Works even without knowing the list it is sitting on, which can be
  8.1338 + *	handy at times. It also means that THE LIST MUST EXIST when you
  8.1339 + *	unlink. Thus a list must have its contents unlinked before it is
  8.1340 + *	destroyed.
  8.1341 + */
  8.1342 +void skb_unlink(struct sk_buff *skb)
  8.1343 +{
  8.1344 +	struct sk_buff_head *list = skb->list;
  8.1345 +
  8.1346 +	if (list) {
  8.1347 +		unsigned long flags;
  8.1348 +
  8.1349 +		spin_lock_irqsave(&list->lock, flags);
  8.1350 +		if (skb->list == list)
  8.1351 +			__skb_unlink(skb, skb->list);
  8.1352 +		spin_unlock_irqrestore(&list->lock, flags);
  8.1353 +	}
  8.1354 +}
  8.1355 +
  8.1356 +
  8.1357 +/**
  8.1358 + *	skb_append	-	append a buffer
  8.1359 + *	@old: buffer to insert after
  8.1360 + *	@newsk: buffer to insert
  8.1361 + *
  8.1362 + *	Place a packet after a given packet in a list. The list locks are taken
  8.1363 + *	and this function is atomic with respect to other list locked calls.
  8.1364 + *	A buffer cannot be placed on two lists at the same time.
  8.1365 + */
  8.1366 +
  8.1367 +void skb_append(struct sk_buff *old, struct sk_buff *newsk)
  8.1368 +{
  8.1369 +	unsigned long flags;
  8.1370 +
  8.1371 +	spin_lock_irqsave(&old->list->lock, flags);
  8.1372 +	__skb_append(old, newsk);
  8.1373 +	spin_unlock_irqrestore(&old->list->lock, flags);
  8.1374 +}
  8.1375 +
  8.1376 +
  8.1377 +/**
  8.1378 + *	skb_insert	-	insert a buffer
  8.1379 + *	@old: buffer to insert before
  8.1380 + *	@newsk: buffer to insert
  8.1381 + *
  8.1382 + *	Place a packet before a given packet in a list. The list locks are taken
  8.1383 + *	and this function is atomic with respect to other list locked calls
  8.1384 + *	A buffer cannot be placed on two lists at the same time.
  8.1385 + */
  8.1386 +
  8.1387 +void skb_insert(struct sk_buff *old, struct sk_buff *newsk)
  8.1388 +{
  8.1389 +	unsigned long flags;
  8.1390 +
  8.1391 +	spin_lock_irqsave(&old->list->lock, flags);
  8.1392 +	__skb_insert(newsk, old->prev, old, old->list);
  8.1393 +	spin_unlock_irqrestore(&old->list->lock, flags);
  8.1394 +}
  8.1395 +
  8.1396 +#if 0
  8.1397 +/*
  8.1398 + * 	Tune the memory allocator for a new MTU size.
  8.1399 + */
  8.1400 +void skb_add_mtu(int mtu)
  8.1401 +{
  8.1402 +	/* Must match allocation in alloc_skb */
  8.1403 +	mtu = SKB_DATA_ALIGN(mtu) + sizeof(struct skb_shared_info);
  8.1404 +
  8.1405 +	kmem_add_cache_size(mtu);
  8.1406 +}
  8.1407 +#endif
  8.1408 +
  8.1409 +static void inline skb_split_inside_header(struct sk_buff *skb,
  8.1410 +					   struct sk_buff* skb1,
  8.1411 +					   const u32 len, const int pos)
  8.1412 +{
  8.1413 +	int i;
  8.1414 +
  8.1415 +	memcpy(skb_put(skb1, pos - len), skb->data + len, pos - len);
  8.1416 +
  8.1417 +	/* And move data appendix as is. */
  8.1418 +	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
  8.1419 +		skb_shinfo(skb1)->frags[i] = skb_shinfo(skb)->frags[i];
  8.1420 +
  8.1421 +	skb_shinfo(skb1)->nr_frags = skb_shinfo(skb)->nr_frags;
  8.1422 +	skb_shinfo(skb)->nr_frags  = 0;
  8.1423 +	skb1->data_len		   = skb->data_len;
  8.1424 +	skb1->len		   += skb1->data_len;
  8.1425 +	skb->data_len		   = 0;
  8.1426 +	skb->len		   = len;
  8.1427 +	skb->tail		   = skb->data + len;
  8.1428 +}
  8.1429 +
  8.1430 +static void inline skb_split_no_header(struct sk_buff *skb,
  8.1431 +				       struct sk_buff* skb1,
  8.1432 +				       const u32 len, int pos)
  8.1433 +{
  8.1434 +	int i, k = 0;
  8.1435 +	const int nfrags = skb_shinfo(skb)->nr_frags;
  8.1436 +
  8.1437 +	skb_shinfo(skb)->nr_frags = 0;
  8.1438 +	skb1->len		  = skb1->data_len = skb->len - len;
  8.1439 +	skb->len		  = len;
  8.1440 +	skb->data_len		  = len - pos;
  8.1441 +
  8.1442 +	for (i = 0; i < nfrags; i++) {
  8.1443 +		int size = skb_shinfo(skb)->frags[i].size;
  8.1444 +
  8.1445 +		if (pos + size > len) {
  8.1446 +			skb_shinfo(skb1)->frags[k] = skb_shinfo(skb)->frags[i];
  8.1447 +
  8.1448 +			if (pos < len) {
  8.1449 +				/* Split frag.
  8.1450 +				 * We have to variants in this case:
  8.1451 +				 * 1. Move all the frag to the second
  8.1452 +				 *    part, if it is possible. F.e.
  8.1453 +				 *    this approach is mandatory for TUX,
  8.1454 +				 *    where splitting is expensive.
  8.1455 +				 * 2. Split is accurately. We make this.
  8.1456 +				 */
  8.1457 +				get_page(skb_shinfo(skb)->frags[i].page);
  8.1458 +				skb_shinfo(skb1)->frags[0].page_offset += len - pos;
  8.1459 +				skb_shinfo(skb1)->frags[0].size -= len - pos;
  8.1460 +				skb_shinfo(skb)->frags[i].size	= len - pos;
  8.1461 +				skb_shinfo(skb)->nr_frags++;
  8.1462 +			}
  8.1463 +			k++;
  8.1464 +		} else
  8.1465 +			skb_shinfo(skb)->nr_frags++;
  8.1466 +		pos += size;
  8.1467 +	}
  8.1468 +	skb_shinfo(skb1)->nr_frags = k;
  8.1469 +}
  8.1470 +
  8.1471 +/**
  8.1472 + * skb_split - Split fragmented skb to two parts at length len.
  8.1473 + */
  8.1474 +void skb_split(struct sk_buff *skb, struct sk_buff *skb1, const u32 len)
  8.1475 +{
  8.1476 +	int pos = skb_headlen(skb);
  8.1477 +
  8.1478 +	if (len < pos)	/* Split line is inside header. */
  8.1479 +		skb_split_inside_header(skb, skb1, len, pos);
  8.1480 +	else		/* Second chunk has no header, nothing to copy. */
  8.1481 +		skb_split_no_header(skb, skb1, len, pos);
  8.1482 +}
  8.1483 +
  8.1484 +void __init skb_init(void)
  8.1485 +{
  8.1486 +	skbuff_head_cache = kmem_cache_create("skbuff_head_cache",
  8.1487 +					      sizeof(struct sk_buff),
  8.1488 +					      0,
  8.1489 +					      SLAB_HWCACHE_ALIGN,
  8.1490 +					      NULL, NULL);
  8.1491 +	if (!skbuff_head_cache)
  8.1492 +		panic("cannot create skbuff cache");
  8.1493 +}
  8.1494 +
  8.1495 +EXPORT_SYMBOL(___pskb_trim);
  8.1496 +EXPORT_SYMBOL(__kfree_skb);
  8.1497 +EXPORT_SYMBOL(__pskb_pull_tail);
  8.1498 +EXPORT_SYMBOL(alloc_skb);
  8.1499 +EXPORT_SYMBOL(pskb_copy);
  8.1500 +EXPORT_SYMBOL(pskb_expand_head);
  8.1501 +EXPORT_SYMBOL(skb_checksum);
  8.1502 +EXPORT_SYMBOL(skb_clone);
  8.1503 +EXPORT_SYMBOL(skb_clone_fraglist);
  8.1504 +EXPORT_SYMBOL(skb_copy);
  8.1505 +EXPORT_SYMBOL(skb_copy_and_csum_bits);
  8.1506 +EXPORT_SYMBOL(skb_copy_and_csum_dev);
  8.1507 +EXPORT_SYMBOL(skb_copy_bits);
  8.1508 +EXPORT_SYMBOL(skb_copy_expand);
  8.1509 +EXPORT_SYMBOL(skb_over_panic);
  8.1510 +EXPORT_SYMBOL(skb_pad);
  8.1511 +EXPORT_SYMBOL(skb_realloc_headroom);
  8.1512 +EXPORT_SYMBOL(skb_under_panic);
  8.1513 +EXPORT_SYMBOL(skb_dequeue);
  8.1514 +EXPORT_SYMBOL(skb_dequeue_tail);
  8.1515 +EXPORT_SYMBOL(skb_insert);
  8.1516 +EXPORT_SYMBOL(skb_queue_purge);
  8.1517 +EXPORT_SYMBOL(skb_queue_head);
  8.1518 +EXPORT_SYMBOL(skb_queue_tail);
  8.1519 +EXPORT_SYMBOL(skb_unlink);
  8.1520 +EXPORT_SYMBOL(skb_append);
  8.1521 +EXPORT_SYMBOL(skb_split);
  8.1522 +EXPORT_SYMBOL(skb_iter_first);
  8.1523 +EXPORT_SYMBOL(skb_iter_next);
  8.1524 +EXPORT_SYMBOL(skb_iter_abort);