ia64/xen-unstable

view linux-2.4-xen-sparse/net/core/skbuff.c @ 6832:5959fae4722a

Set NE bit for VMX guest CR0. VMCS guest CR0.NE bit must
be set, else it will cause "vm-entry failed".

Signed-off-by: Chengyuan Li <chengyuan.li@intel.com>
author kaf24@firebug.cl.cam.ac.uk
date Wed Sep 14 13:37:50 2005 +0000 (2005-09-14)
parents dd668f7527cb
children b2f4823b6ff0 b35215021b32 9af349b055e5 3233e7ecfa9f
line source
1 /*
2 * Routines having to do with the 'struct sk_buff' memory handlers.
3 *
4 * Authors: Alan Cox <iiitac@pyr.swan.ac.uk>
5 * Florian La Roche <rzsfl@rz.uni-sb.de>
6 *
7 * Version: $Id: skbuff.c,v 1.90 2001/11/07 05:56:19 davem Exp $
8 *
9 * Fixes:
10 * Alan Cox : Fixed the worst of the load balancer bugs.
11 * Dave Platt : Interrupt stacking fix.
12 * Richard Kooijman : Timestamp fixes.
13 * Alan Cox : Changed buffer format.
14 * Alan Cox : destructor hook for AF_UNIX etc.
15 * Linus Torvalds : Better skb_clone.
16 * Alan Cox : Added skb_copy.
17 * Alan Cox : Added all the changed routines Linus
18 * only put in the headers
19 * Ray VanTassle : Fixed --skb->lock in free
20 * Alan Cox : skb_copy copy arp field
21 * Andi Kleen : slabified it.
22 *
23 * NOTE:
24 * The __skb_ routines should be called with interrupts
25 * disabled, or you better be *real* sure that the operation is atomic
26 * with respect to whatever list is being frobbed (e.g. via lock_sock()
27 * or via disabling bottom half handlers, etc).
28 *
29 * This program is free software; you can redistribute it and/or
30 * modify it under the terms of the GNU General Public License
31 * as published by the Free Software Foundation; either version
32 * 2 of the License, or (at your option) any later version.
33 */
35 /*
36 * The functions in this file will not compile correctly with gcc 2.4.x
37 */
39 #include <linux/config.h>
40 #include <linux/types.h>
41 #include <linux/kernel.h>
42 #include <linux/sched.h>
43 #include <linux/mm.h>
44 #include <linux/interrupt.h>
45 #include <linux/in.h>
46 #include <linux/inet.h>
47 #include <linux/slab.h>
48 #include <linux/netdevice.h>
49 #include <linux/string.h>
50 #include <linux/skbuff.h>
51 #include <linux/cache.h>
52 #include <linux/rtnetlink.h>
53 #include <linux/init.h>
54 #include <linux/highmem.h>
56 #include <net/protocol.h>
57 #include <net/dst.h>
58 #include <net/sock.h>
59 #include <net/checksum.h>
61 #include <asm/uaccess.h>
62 #include <asm/system.h>
64 int sysctl_hot_list_len = 128;
66 static kmem_cache_t *skbuff_head_cache;
68 static union {
69 struct sk_buff_head list;
70 char pad[SMP_CACHE_BYTES];
71 } skb_head_pool[NR_CPUS];
73 /*
74 * Keep out-of-line to prevent kernel bloat.
75 * __builtin_return_address is not used because it is not always
76 * reliable.
77 */
79 /**
80 * skb_over_panic - private function
81 * @skb: buffer
82 * @sz: size
83 * @here: address
84 *
85 * Out of line support code for skb_put(). Not user callable.
86 */
88 void skb_over_panic(struct sk_buff *skb, int sz, void *here)
89 {
90 printk("skput:over: %p:%d put:%d dev:%s",
91 here, skb->len, sz, skb->dev ? skb->dev->name : "<NULL>");
92 BUG();
93 }
95 /**
96 * skb_under_panic - private function
97 * @skb: buffer
98 * @sz: size
99 * @here: address
100 *
101 * Out of line support code for skb_push(). Not user callable.
102 */
105 void skb_under_panic(struct sk_buff *skb, int sz, void *here)
106 {
107 printk("skput:under: %p:%d put:%d dev:%s",
108 here, skb->len, sz, skb->dev ? skb->dev->name : "<NULL>");
109 BUG();
110 }
112 static __inline__ struct sk_buff *skb_head_from_pool(void)
113 {
114 struct sk_buff_head *list = &skb_head_pool[smp_processor_id()].list;
116 if (skb_queue_len(list)) {
117 struct sk_buff *skb;
118 unsigned long flags;
120 local_irq_save(flags);
121 skb = __skb_dequeue(list);
122 local_irq_restore(flags);
123 return skb;
124 }
125 return NULL;
126 }
128 static __inline__ void skb_head_to_pool(struct sk_buff *skb)
129 {
130 struct sk_buff_head *list = &skb_head_pool[smp_processor_id()].list;
132 if (skb_queue_len(list) < sysctl_hot_list_len) {
133 unsigned long flags;
135 local_irq_save(flags);
136 __skb_queue_head(list, skb);
137 local_irq_restore(flags);
139 return;
140 }
141 kmem_cache_free(skbuff_head_cache, skb);
142 }
145 /* Allocate a new skbuff. We do this ourselves so we can fill in a few
146 * 'private' fields and also do memory statistics to find all the
147 * [BEEP] leaks.
148 *
149 */
151 /**
152 * alloc_skb - allocate a network buffer
153 * @size: size to allocate
154 * @gfp_mask: allocation mask
155 *
156 * Allocate a new &sk_buff. The returned buffer has no headroom and a
157 * tail room of size bytes. The object has a reference count of one.
158 * The return is the buffer. On a failure the return is %NULL.
159 *
160 * Buffers may only be allocated from interrupts using a @gfp_mask of
161 * %GFP_ATOMIC.
162 */
164 struct sk_buff *alloc_skb(unsigned int size,int gfp_mask)
165 {
166 struct sk_buff *skb;
167 u8 *data;
169 if (in_interrupt() && (gfp_mask & __GFP_WAIT)) {
170 static int count = 0;
171 if (++count < 5) {
172 printk(KERN_ERR "alloc_skb called nonatomically "
173 "from interrupt %p\n", NET_CALLER(size));
174 BUG();
175 }
176 gfp_mask &= ~__GFP_WAIT;
177 }
179 /* Get the HEAD */
180 skb = skb_head_from_pool();
181 if (skb == NULL) {
182 skb = kmem_cache_alloc(skbuff_head_cache, gfp_mask & ~__GFP_DMA);
183 if (skb == NULL)
184 goto nohead;
185 }
187 /* Get the DATA. Size must match skb_add_mtu(). */
188 size = SKB_DATA_ALIGN(size);
189 data = kmalloc(size + sizeof(struct skb_shared_info), gfp_mask);
190 if (data == NULL)
191 goto nodata;
193 /* XXX: does not include slab overhead */
194 skb->truesize = size + sizeof(struct sk_buff);
196 /* Load the data pointers. */
197 skb->head = data;
198 skb->data = data;
199 skb->tail = data;
200 skb->end = data + size;
202 /* Set up other state */
203 skb->len = 0;
204 skb->cloned = 0;
205 skb->data_len = 0;
207 atomic_set(&skb->users, 1);
208 atomic_set(&(skb_shinfo(skb)->dataref), 1);
209 skb_shinfo(skb)->nr_frags = 0;
210 skb_shinfo(skb)->frag_list = NULL;
211 return skb;
213 nodata:
214 skb_head_to_pool(skb);
215 nohead:
216 return NULL;
217 }
219 /**
220 * alloc_skb_from_cache - allocate a network buffer
221 * @cp: kmem_cache from which to allocate the data area
222 * (object size must be big enough for @size bytes + skb overheads)
223 * @size: size to allocate
224 * @gfp_mask: allocation mask
225 *
226 * Allocate a new &sk_buff. The returned buffer has no headroom and a
227 * tail room of size bytes. The object has a reference count of one.
228 * The return is the buffer. On a failure the return is %NULL.
229 *
230 * Buffers may only be allocated from interrupts using a @gfp_mask of
231 * %GFP_ATOMIC.
232 */
234 struct sk_buff *alloc_skb_from_cache(kmem_cache_t *cp,
235 unsigned int size, int gfp_mask)
236 {
237 struct sk_buff *skb;
238 u8 *data;
240 if (in_interrupt() && (gfp_mask & __GFP_WAIT)) {
241 static int count = 0;
242 if (++count < 5) {
243 printk(KERN_ERR "alloc_skb called nonatomically "
244 "from interrupt %p\n", NET_CALLER(size));
245 BUG();
246 }
247 gfp_mask &= ~__GFP_WAIT;
248 }
250 /* Get the HEAD */
251 skb = skb_head_from_pool();
252 if (skb == NULL) {
253 skb = kmem_cache_alloc(skbuff_head_cache, gfp_mask & ~__GFP_DMA);
254 if (skb == NULL)
255 goto nohead;
256 }
258 /* Get the DATA. */
259 size = SKB_DATA_ALIGN(size);
260 data = kmem_cache_alloc(cp, gfp_mask);
261 if (data == NULL)
262 goto nodata;
264 /* XXX: does not include slab overhead */
265 skb->truesize = size + sizeof(struct sk_buff);
267 /* Load the data pointers. */
268 skb->head = data;
269 skb->data = data;
270 skb->tail = data;
271 skb->end = data + size;
273 /* Set up other state */
274 skb->len = 0;
275 skb->cloned = 0;
276 skb->data_len = 0;
278 atomic_set(&skb->users, 1);
279 atomic_set(&(skb_shinfo(skb)->dataref), 1);
280 skb_shinfo(skb)->nr_frags = 0;
281 skb_shinfo(skb)->frag_list = NULL;
282 return skb;
284 nodata:
285 skb_head_to_pool(skb);
286 nohead:
287 return NULL;
288 }
291 /*
292 * Slab constructor for a skb head.
293 */
294 static inline void skb_headerinit(void *p, kmem_cache_t *cache,
295 unsigned long flags)
296 {
297 struct sk_buff *skb = p;
299 skb->next = NULL;
300 skb->prev = NULL;
301 skb->list = NULL;
302 skb->sk = NULL;
303 skb->stamp.tv_sec=0; /* No idea about time */
304 skb->dev = NULL;
305 skb->real_dev = NULL;
306 skb->dst = NULL;
307 memset(skb->cb, 0, sizeof(skb->cb));
308 skb->pkt_type = PACKET_HOST; /* Default type */
309 skb->ip_summed = 0;
310 skb->priority = 0;
311 skb->security = 0; /* By default packets are insecure */
312 skb->destructor = NULL;
314 #ifdef CONFIG_NETFILTER
315 skb->nfmark = skb->nfcache = 0;
316 skb->nfct = NULL;
317 #ifdef CONFIG_NETFILTER_DEBUG
318 skb->nf_debug = 0;
319 #endif
320 #endif
321 #ifdef CONFIG_NET_SCHED
322 skb->tc_index = 0;
323 #endif
324 }
326 static void skb_drop_fraglist(struct sk_buff *skb)
327 {
328 struct sk_buff *list = skb_shinfo(skb)->frag_list;
330 skb_shinfo(skb)->frag_list = NULL;
332 do {
333 struct sk_buff *this = list;
334 list = list->next;
335 kfree_skb(this);
336 } while (list);
337 }
339 static void skb_clone_fraglist(struct sk_buff *skb)
340 {
341 struct sk_buff *list;
343 for (list = skb_shinfo(skb)->frag_list; list; list=list->next)
344 skb_get(list);
345 }
347 static void skb_release_data(struct sk_buff *skb)
348 {
349 if (!skb->cloned ||
350 atomic_dec_and_test(&(skb_shinfo(skb)->dataref))) {
351 if (skb_shinfo(skb)->nr_frags) {
352 int i;
353 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
354 put_page(skb_shinfo(skb)->frags[i].page);
355 }
357 if (skb_shinfo(skb)->frag_list)
358 skb_drop_fraglist(skb);
360 kfree(skb->head);
361 }
362 }
364 /*
365 * Free an skbuff by memory without cleaning the state.
366 */
367 void kfree_skbmem(struct sk_buff *skb)
368 {
369 skb_release_data(skb);
370 skb_head_to_pool(skb);
371 }
373 /**
374 * __kfree_skb - private function
375 * @skb: buffer
376 *
377 * Free an sk_buff. Release anything attached to the buffer.
378 * Clean the state. This is an internal helper function. Users should
379 * always call kfree_skb
380 */
382 void __kfree_skb(struct sk_buff *skb)
383 {
384 if (skb->list) {
385 printk(KERN_WARNING "Warning: kfree_skb passed an skb still "
386 "on a list (from %p).\n", NET_CALLER(skb));
387 BUG();
388 }
390 dst_release(skb->dst);
391 if(skb->destructor) {
392 if (in_irq()) {
393 printk(KERN_WARNING "Warning: kfree_skb on hard IRQ %p\n",
394 NET_CALLER(skb));
395 }
396 skb->destructor(skb);
397 }
398 #ifdef CONFIG_NETFILTER
399 nf_conntrack_put(skb->nfct);
400 #endif
401 skb_headerinit(skb, NULL, 0); /* clean state */
402 kfree_skbmem(skb);
403 }
405 /**
406 * skb_clone - duplicate an sk_buff
407 * @skb: buffer to clone
408 * @gfp_mask: allocation priority
409 *
410 * Duplicate an &sk_buff. The new one is not owned by a socket. Both
411 * copies share the same packet data but not structure. The new
412 * buffer has a reference count of 1. If the allocation fails the
413 * function returns %NULL otherwise the new buffer is returned.
414 *
415 * If this function is called from an interrupt gfp_mask() must be
416 * %GFP_ATOMIC.
417 */
419 struct sk_buff *skb_clone(struct sk_buff *skb, int gfp_mask)
420 {
421 struct sk_buff *n;
423 n = skb_head_from_pool();
424 if (!n) {
425 n = kmem_cache_alloc(skbuff_head_cache, gfp_mask);
426 if (!n)
427 return NULL;
428 }
430 #define C(x) n->x = skb->x
432 n->next = n->prev = NULL;
433 n->list = NULL;
434 n->sk = NULL;
435 C(stamp);
436 C(dev);
437 C(real_dev);
438 C(h);
439 C(nh);
440 C(mac);
441 C(dst);
442 dst_clone(n->dst);
443 memcpy(n->cb, skb->cb, sizeof(skb->cb));
444 C(len);
445 C(data_len);
446 C(csum);
447 n->cloned = 1;
448 C(pkt_type);
449 C(ip_summed);
450 C(priority);
451 atomic_set(&n->users, 1);
452 C(protocol);
453 C(security);
454 C(truesize);
455 C(head);
456 C(data);
457 C(tail);
458 C(end);
459 n->destructor = NULL;
460 #ifdef CONFIG_NETFILTER
461 C(nfmark);
462 C(nfcache);
463 C(nfct);
464 #ifdef CONFIG_NETFILTER_DEBUG
465 C(nf_debug);
466 #endif
467 #endif /*CONFIG_NETFILTER*/
468 #if defined(CONFIG_HIPPI)
469 C(private);
470 #endif
471 #ifdef CONFIG_NET_SCHED
472 C(tc_index);
473 #endif
475 atomic_inc(&(skb_shinfo(skb)->dataref));
476 skb->cloned = 1;
477 #ifdef CONFIG_NETFILTER
478 nf_conntrack_get(skb->nfct);
479 #endif
480 return n;
481 }
483 static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
484 {
485 /*
486 * Shift between the two data areas in bytes
487 */
488 unsigned long offset = new->data - old->data;
490 new->list=NULL;
491 new->sk=NULL;
492 new->dev=old->dev;
493 new->real_dev=old->real_dev;
494 new->priority=old->priority;
495 new->protocol=old->protocol;
496 new->dst=dst_clone(old->dst);
497 new->h.raw=old->h.raw+offset;
498 new->nh.raw=old->nh.raw+offset;
499 new->mac.raw=old->mac.raw+offset;
500 memcpy(new->cb, old->cb, sizeof(old->cb));
501 atomic_set(&new->users, 1);
502 new->pkt_type=old->pkt_type;
503 new->stamp=old->stamp;
504 new->destructor = NULL;
505 new->security=old->security;
506 #ifdef CONFIG_NETFILTER
507 new->nfmark=old->nfmark;
508 new->nfcache=old->nfcache;
509 new->nfct=old->nfct;
510 nf_conntrack_get(new->nfct);
511 #ifdef CONFIG_NETFILTER_DEBUG
512 new->nf_debug=old->nf_debug;
513 #endif
514 #endif
515 #ifdef CONFIG_NET_SCHED
516 new->tc_index = old->tc_index;
517 #endif
518 }
520 /**
521 * skb_copy - create private copy of an sk_buff
522 * @skb: buffer to copy
523 * @gfp_mask: allocation priority
524 *
525 * Make a copy of both an &sk_buff and its data. This is used when the
526 * caller wishes to modify the data and needs a private copy of the
527 * data to alter. Returns %NULL on failure or the pointer to the buffer
528 * on success. The returned buffer has a reference count of 1.
529 *
530 * As by-product this function converts non-linear &sk_buff to linear
531 * one, so that &sk_buff becomes completely private and caller is allowed
532 * to modify all the data of returned buffer. This means that this
533 * function is not recommended for use in circumstances when only
534 * header is going to be modified. Use pskb_copy() instead.
535 */
537 struct sk_buff *skb_copy(const struct sk_buff *skb, int gfp_mask)
538 {
539 struct sk_buff *n;
540 int headerlen = skb->data-skb->head;
542 /*
543 * Allocate the copy buffer
544 */
545 n=alloc_skb(skb->end - skb->head + skb->data_len, gfp_mask);
546 if(n==NULL)
547 return NULL;
549 /* Set the data pointer */
550 skb_reserve(n,headerlen);
551 /* Set the tail pointer and length */
552 skb_put(n,skb->len);
553 n->csum = skb->csum;
554 n->ip_summed = skb->ip_summed;
556 if (skb_copy_bits(skb, -headerlen, n->head, headerlen+skb->len))
557 BUG();
559 copy_skb_header(n, skb);
561 return n;
562 }
564 /* Keep head the same: replace data */
565 int skb_linearize(struct sk_buff *skb, int gfp_mask)
566 {
567 unsigned int size;
568 u8 *data;
569 long offset;
570 int headerlen = skb->data - skb->head;
571 int expand = (skb->tail+skb->data_len) - skb->end;
573 if (skb_shared(skb))
574 BUG();
576 if (expand <= 0)
577 expand = 0;
579 size = (skb->end - skb->head + expand);
580 size = SKB_DATA_ALIGN(size);
581 data = kmalloc(size + sizeof(struct skb_shared_info), gfp_mask);
582 if (data == NULL)
583 return -ENOMEM;
585 /* Copy entire thing */
586 if (skb_copy_bits(skb, -headerlen, data, headerlen+skb->len))
587 BUG();
589 /* Offset between the two in bytes */
590 offset = data - skb->head;
592 /* Free old data. */
593 skb_release_data(skb);
595 skb->head = data;
596 skb->end = data + size;
598 /* Set up new pointers */
599 skb->h.raw += offset;
600 skb->nh.raw += offset;
601 skb->mac.raw += offset;
602 skb->tail += offset;
603 skb->data += offset;
605 /* Set up shinfo */
606 atomic_set(&(skb_shinfo(skb)->dataref), 1);
607 skb_shinfo(skb)->nr_frags = 0;
608 skb_shinfo(skb)->frag_list = NULL;
610 /* We are no longer a clone, even if we were. */
611 skb->cloned = 0;
613 skb->tail += skb->data_len;
614 skb->data_len = 0;
615 return 0;
616 }
619 /**
620 * pskb_copy - create copy of an sk_buff with private head.
621 * @skb: buffer to copy
622 * @gfp_mask: allocation priority
623 *
624 * Make a copy of both an &sk_buff and part of its data, located
625 * in header. Fragmented data remain shared. This is used when
626 * the caller wishes to modify only header of &sk_buff and needs
627 * private copy of the header to alter. Returns %NULL on failure
628 * or the pointer to the buffer on success.
629 * The returned buffer has a reference count of 1.
630 */
632 struct sk_buff *pskb_copy(struct sk_buff *skb, int gfp_mask)
633 {
634 struct sk_buff *n;
636 /*
637 * Allocate the copy buffer
638 */
639 n=alloc_skb(skb->end - skb->head, gfp_mask);
640 if(n==NULL)
641 return NULL;
643 /* Set the data pointer */
644 skb_reserve(n,skb->data-skb->head);
645 /* Set the tail pointer and length */
646 skb_put(n,skb_headlen(skb));
647 /* Copy the bytes */
648 memcpy(n->data, skb->data, n->len);
649 n->csum = skb->csum;
650 n->ip_summed = skb->ip_summed;
652 n->data_len = skb->data_len;
653 n->len = skb->len;
655 if (skb_shinfo(skb)->nr_frags) {
656 int i;
658 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
659 skb_shinfo(n)->frags[i] = skb_shinfo(skb)->frags[i];
660 get_page(skb_shinfo(n)->frags[i].page);
661 }
662 skb_shinfo(n)->nr_frags = i;
663 }
665 if (skb_shinfo(skb)->frag_list) {
666 skb_shinfo(n)->frag_list = skb_shinfo(skb)->frag_list;
667 skb_clone_fraglist(n);
668 }
670 copy_skb_header(n, skb);
672 return n;
673 }
675 /**
676 * pskb_expand_head - reallocate header of &sk_buff
677 * @skb: buffer to reallocate
678 * @nhead: room to add at head
679 * @ntail: room to add at tail
680 * @gfp_mask: allocation priority
681 *
682 * Expands (or creates identical copy, if &nhead and &ntail are zero)
683 * header of skb. &sk_buff itself is not changed. &sk_buff MUST have
684 * reference count of 1. Returns zero in the case of success or error,
685 * if expansion failed. In the last case, &sk_buff is not changed.
686 *
687 * All the pointers pointing into skb header may change and must be
688 * reloaded after call to this function.
689 */
691 int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail, int gfp_mask)
692 {
693 int i;
694 u8 *data;
695 int size = nhead + (skb->end - skb->head) + ntail;
696 long off;
698 if (skb_shared(skb))
699 BUG();
701 size = SKB_DATA_ALIGN(size);
703 data = kmalloc(size + sizeof(struct skb_shared_info), gfp_mask);
704 if (data == NULL)
705 goto nodata;
707 /* Copy only real data... and, alas, header. This should be
708 * optimized for the cases when header is void. */
709 memcpy(data+nhead, skb->head, skb->tail-skb->head);
710 memcpy(data+size, skb->end, sizeof(struct skb_shared_info));
712 for (i=0; i<skb_shinfo(skb)->nr_frags; i++)
713 get_page(skb_shinfo(skb)->frags[i].page);
715 if (skb_shinfo(skb)->frag_list)
716 skb_clone_fraglist(skb);
718 skb_release_data(skb);
720 off = (data+nhead) - skb->head;
722 skb->head = data;
723 skb->end = data+size;
725 skb->data += off;
726 skb->tail += off;
727 skb->mac.raw += off;
728 skb->h.raw += off;
729 skb->nh.raw += off;
730 skb->cloned = 0;
731 atomic_set(&skb_shinfo(skb)->dataref, 1);
732 return 0;
734 nodata:
735 return -ENOMEM;
736 }
738 /* Make private copy of skb with writable head and some headroom */
740 struct sk_buff *
741 skb_realloc_headroom(struct sk_buff *skb, unsigned int headroom)
742 {
743 struct sk_buff *skb2;
744 int delta = headroom - skb_headroom(skb);
746 if (delta <= 0)
747 return pskb_copy(skb, GFP_ATOMIC);
749 skb2 = skb_clone(skb, GFP_ATOMIC);
750 if (skb2 == NULL ||
751 !pskb_expand_head(skb2, SKB_DATA_ALIGN(delta), 0, GFP_ATOMIC))
752 return skb2;
754 kfree_skb(skb2);
755 return NULL;
756 }
759 /**
760 * skb_copy_expand - copy and expand sk_buff
761 * @skb: buffer to copy
762 * @newheadroom: new free bytes at head
763 * @newtailroom: new free bytes at tail
764 * @gfp_mask: allocation priority
765 *
766 * Make a copy of both an &sk_buff and its data and while doing so
767 * allocate additional space.
768 *
769 * This is used when the caller wishes to modify the data and needs a
770 * private copy of the data to alter as well as more space for new fields.
771 * Returns %NULL on failure or the pointer to the buffer
772 * on success. The returned buffer has a reference count of 1.
773 *
774 * You must pass %GFP_ATOMIC as the allocation priority if this function
775 * is called from an interrupt.
776 */
779 struct sk_buff *skb_copy_expand(const struct sk_buff *skb,
780 int newheadroom,
781 int newtailroom,
782 int gfp_mask)
783 {
784 struct sk_buff *n;
786 /*
787 * Allocate the copy buffer
788 */
790 n=alloc_skb(newheadroom + skb->len + newtailroom,
791 gfp_mask);
792 if(n==NULL)
793 return NULL;
795 skb_reserve(n,newheadroom);
797 /* Set the tail pointer and length */
798 skb_put(n,skb->len);
800 /* Copy the data only. */
801 if (skb_copy_bits(skb, 0, n->data, skb->len))
802 BUG();
804 copy_skb_header(n, skb);
805 return n;
806 }
808 /**
809 * skb_pad - zero pad the tail of an skb
810 * @skb: buffer to pad
811 * @pad: space to pad
812 *
813 * Ensure that a buffer is followed by a padding area that is zero
814 * filled. Used by network drivers which may DMA or transfer data
815 * beyond the buffer end onto the wire.
816 *
817 * May return NULL in out of memory cases.
818 */
820 struct sk_buff *skb_pad(struct sk_buff *skb, int pad)
821 {
822 struct sk_buff *nskb;
824 /* If the skbuff is non linear tailroom is always zero.. */
825 if(skb_tailroom(skb) >= pad)
826 {
827 memset(skb->data+skb->len, 0, pad);
828 return skb;
829 }
831 nskb = skb_copy_expand(skb, skb_headroom(skb), skb_tailroom(skb) + pad, GFP_ATOMIC);
832 kfree_skb(skb);
833 if(nskb)
834 memset(nskb->data+nskb->len, 0, pad);
835 return nskb;
836 }
838 /* Trims skb to length len. It can change skb pointers, if "realloc" is 1.
839 * If realloc==0 and trimming is impossible without change of data,
840 * it is BUG().
841 */
843 int ___pskb_trim(struct sk_buff *skb, unsigned int len, int realloc)
844 {
845 int offset = skb_headlen(skb);
846 int nfrags = skb_shinfo(skb)->nr_frags;
847 int i;
849 for (i=0; i<nfrags; i++) {
850 int end = offset + skb_shinfo(skb)->frags[i].size;
851 if (end > len) {
852 if (skb_cloned(skb)) {
853 if (!realloc)
854 BUG();
855 if (pskb_expand_head(skb, 0, 0, GFP_ATOMIC))
856 return -ENOMEM;
857 }
858 if (len <= offset) {
859 put_page(skb_shinfo(skb)->frags[i].page);
860 skb_shinfo(skb)->nr_frags--;
861 } else {
862 skb_shinfo(skb)->frags[i].size = len-offset;
863 }
864 }
865 offset = end;
866 }
868 if (offset < len) {
869 skb->data_len -= skb->len - len;
870 skb->len = len;
871 } else {
872 if (len <= skb_headlen(skb)) {
873 skb->len = len;
874 skb->data_len = 0;
875 skb->tail = skb->data + len;
876 if (skb_shinfo(skb)->frag_list && !skb_cloned(skb))
877 skb_drop_fraglist(skb);
878 } else {
879 skb->data_len -= skb->len - len;
880 skb->len = len;
881 }
882 }
884 return 0;
885 }
887 /**
888 * __pskb_pull_tail - advance tail of skb header
889 * @skb: buffer to reallocate
890 * @delta: number of bytes to advance tail
891 *
892 * The function makes a sense only on a fragmented &sk_buff,
893 * it expands header moving its tail forward and copying necessary
894 * data from fragmented part.
895 *
896 * &sk_buff MUST have reference count of 1.
897 *
898 * Returns %NULL (and &sk_buff does not change) if pull failed
899 * or value of new tail of skb in the case of success.
900 *
901 * All the pointers pointing into skb header may change and must be
902 * reloaded after call to this function.
903 */
905 /* Moves tail of skb head forward, copying data from fragmented part,
906 * when it is necessary.
907 * 1. It may fail due to malloc failure.
908 * 2. It may change skb pointers.
909 *
910 * It is pretty complicated. Luckily, it is called only in exceptional cases.
911 */
912 unsigned char * __pskb_pull_tail(struct sk_buff *skb, int delta)
913 {
914 int i, k, eat;
916 /* If skb has not enough free space at tail, get new one
917 * plus 128 bytes for future expansions. If we have enough
918 * room at tail, reallocate without expansion only if skb is cloned.
919 */
920 eat = (skb->tail+delta) - skb->end;
922 if (eat > 0 || skb_cloned(skb)) {
923 if (pskb_expand_head(skb, 0, eat>0 ? eat+128 : 0, GFP_ATOMIC))
924 return NULL;
925 }
927 if (skb_copy_bits(skb, skb_headlen(skb), skb->tail, delta))
928 BUG();
930 /* Optimization: no fragments, no reasons to preestimate
931 * size of pulled pages. Superb.
932 */
933 if (skb_shinfo(skb)->frag_list == NULL)
934 goto pull_pages;
936 /* Estimate size of pulled pages. */
937 eat = delta;
938 for (i=0; i<skb_shinfo(skb)->nr_frags; i++) {
939 if (skb_shinfo(skb)->frags[i].size >= eat)
940 goto pull_pages;
941 eat -= skb_shinfo(skb)->frags[i].size;
942 }
944 /* If we need update frag list, we are in troubles.
945 * Certainly, it possible to add an offset to skb data,
946 * but taking into account that pulling is expected to
947 * be very rare operation, it is worth to fight against
948 * further bloating skb head and crucify ourselves here instead.
949 * Pure masohism, indeed. 8)8)
950 */
951 if (eat) {
952 struct sk_buff *list = skb_shinfo(skb)->frag_list;
953 struct sk_buff *clone = NULL;
954 struct sk_buff *insp = NULL;
956 do {
957 if (list == NULL)
958 BUG();
960 if (list->len <= eat) {
961 /* Eaten as whole. */
962 eat -= list->len;
963 list = list->next;
964 insp = list;
965 } else {
966 /* Eaten partially. */
968 if (skb_shared(list)) {
969 /* Sucks! We need to fork list. :-( */
970 clone = skb_clone(list, GFP_ATOMIC);
971 if (clone == NULL)
972 return NULL;
973 insp = list->next;
974 list = clone;
975 } else {
976 /* This may be pulled without
977 * problems. */
978 insp = list;
979 }
980 if (pskb_pull(list, eat) == NULL) {
981 if (clone)
982 kfree_skb(clone);
983 return NULL;
984 }
985 break;
986 }
987 } while (eat);
989 /* Free pulled out fragments. */
990 while ((list = skb_shinfo(skb)->frag_list) != insp) {
991 skb_shinfo(skb)->frag_list = list->next;
992 kfree_skb(list);
993 }
994 /* And insert new clone at head. */
995 if (clone) {
996 clone->next = list;
997 skb_shinfo(skb)->frag_list = clone;
998 }
999 }
1000 /* Success! Now we may commit changes to skb data. */
1002 pull_pages:
1003 eat = delta;
1004 k = 0;
1005 for (i=0; i<skb_shinfo(skb)->nr_frags; i++) {
1006 if (skb_shinfo(skb)->frags[i].size <= eat) {
1007 put_page(skb_shinfo(skb)->frags[i].page);
1008 eat -= skb_shinfo(skb)->frags[i].size;
1009 } else {
1010 skb_shinfo(skb)->frags[k] = skb_shinfo(skb)->frags[i];
1011 if (eat) {
1012 skb_shinfo(skb)->frags[k].page_offset += eat;
1013 skb_shinfo(skb)->frags[k].size -= eat;
1014 eat = 0;
1016 k++;
1019 skb_shinfo(skb)->nr_frags = k;
1021 skb->tail += delta;
1022 skb->data_len -= delta;
1024 return skb->tail;
1027 /* Copy some data bits from skb to kernel buffer. */
1029 int skb_copy_bits(const struct sk_buff *skb, int offset, void *to, int len)
1031 int i, copy;
1032 int start = skb->len - skb->data_len;
1034 if (offset > (int)skb->len-len)
1035 goto fault;
1037 /* Copy header. */
1038 if ((copy = start-offset) > 0) {
1039 if (copy > len)
1040 copy = len;
1041 memcpy(to, skb->data + offset, copy);
1042 if ((len -= copy) == 0)
1043 return 0;
1044 offset += copy;
1045 to += copy;
1048 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
1049 int end;
1051 BUG_TRAP(start <= offset+len);
1053 end = start + skb_shinfo(skb)->frags[i].size;
1054 if ((copy = end-offset) > 0) {
1055 u8 *vaddr;
1057 if (copy > len)
1058 copy = len;
1060 vaddr = kmap_skb_frag(&skb_shinfo(skb)->frags[i]);
1061 memcpy(to, vaddr+skb_shinfo(skb)->frags[i].page_offset+
1062 offset-start, copy);
1063 kunmap_skb_frag(vaddr);
1065 if ((len -= copy) == 0)
1066 return 0;
1067 offset += copy;
1068 to += copy;
1070 start = end;
1073 if (skb_shinfo(skb)->frag_list) {
1074 struct sk_buff *list;
1076 for (list = skb_shinfo(skb)->frag_list; list; list=list->next) {
1077 int end;
1079 BUG_TRAP(start <= offset+len);
1081 end = start + list->len;
1082 if ((copy = end-offset) > 0) {
1083 if (copy > len)
1084 copy = len;
1085 if (skb_copy_bits(list, offset-start, to, copy))
1086 goto fault;
1087 if ((len -= copy) == 0)
1088 return 0;
1089 offset += copy;
1090 to += copy;
1092 start = end;
1095 if (len == 0)
1096 return 0;
1098 fault:
1099 return -EFAULT;
1102 /* Checksum skb data. */
1104 unsigned int skb_checksum(const struct sk_buff *skb, int offset, int len, unsigned int csum)
1106 int i, copy;
1107 int start = skb->len - skb->data_len;
1108 int pos = 0;
1110 /* Checksum header. */
1111 if ((copy = start-offset) > 0) {
1112 if (copy > len)
1113 copy = len;
1114 csum = csum_partial(skb->data+offset, copy, csum);
1115 if ((len -= copy) == 0)
1116 return csum;
1117 offset += copy;
1118 pos = copy;
1121 for (i=0; i<skb_shinfo(skb)->nr_frags; i++) {
1122 int end;
1124 BUG_TRAP(start <= offset+len);
1126 end = start + skb_shinfo(skb)->frags[i].size;
1127 if ((copy = end-offset) > 0) {
1128 unsigned int csum2;
1129 u8 *vaddr;
1130 skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
1132 if (copy > len)
1133 copy = len;
1134 vaddr = kmap_skb_frag(frag);
1135 csum2 = csum_partial(vaddr + frag->page_offset +
1136 offset-start, copy, 0);
1137 kunmap_skb_frag(vaddr);
1138 csum = csum_block_add(csum, csum2, pos);
1139 if (!(len -= copy))
1140 return csum;
1141 offset += copy;
1142 pos += copy;
1144 start = end;
1147 if (skb_shinfo(skb)->frag_list) {
1148 struct sk_buff *list;
1150 for (list = skb_shinfo(skb)->frag_list; list; list=list->next) {
1151 int end;
1153 BUG_TRAP(start <= offset+len);
1155 end = start + list->len;
1156 if ((copy = end-offset) > 0) {
1157 unsigned int csum2;
1158 if (copy > len)
1159 copy = len;
1160 csum2 = skb_checksum(list, offset-start, copy, 0);
1161 csum = csum_block_add(csum, csum2, pos);
1162 if ((len -= copy) == 0)
1163 return csum;
1164 offset += copy;
1165 pos += copy;
1167 start = end;
1170 if (len == 0)
1171 return csum;
1173 BUG();
1174 return csum;
1177 /* Both of above in one bottle. */
1179 unsigned int skb_copy_and_csum_bits(const struct sk_buff *skb, int offset, u8 *to, int len, unsigned int csum)
1181 int i, copy;
1182 int start = skb->len - skb->data_len;
1183 int pos = 0;
1185 /* Copy header. */
1186 if ((copy = start-offset) > 0) {
1187 if (copy > len)
1188 copy = len;
1189 csum = csum_partial_copy_nocheck(skb->data+offset, to, copy, csum);
1190 if ((len -= copy) == 0)
1191 return csum;
1192 offset += copy;
1193 to += copy;
1194 pos = copy;
1197 for (i=0; i<skb_shinfo(skb)->nr_frags; i++) {
1198 int end;
1200 BUG_TRAP(start <= offset+len);
1202 end = start + skb_shinfo(skb)->frags[i].size;
1203 if ((copy = end-offset) > 0) {
1204 unsigned int csum2;
1205 u8 *vaddr;
1206 skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
1208 if (copy > len)
1209 copy = len;
1210 vaddr = kmap_skb_frag(frag);
1211 csum2 = csum_partial_copy_nocheck(vaddr + frag->page_offset +
1212 offset-start, to, copy, 0);
1213 kunmap_skb_frag(vaddr);
1214 csum = csum_block_add(csum, csum2, pos);
1215 if (!(len -= copy))
1216 return csum;
1217 offset += copy;
1218 to += copy;
1219 pos += copy;
1221 start = end;
1224 if (skb_shinfo(skb)->frag_list) {
1225 struct sk_buff *list;
1227 for (list = skb_shinfo(skb)->frag_list; list; list=list->next) {
1228 unsigned int csum2;
1229 int end;
1231 BUG_TRAP(start <= offset+len);
1233 end = start + list->len;
1234 if ((copy = end-offset) > 0) {
1235 if (copy > len)
1236 copy = len;
1237 csum2 = skb_copy_and_csum_bits(list, offset-start, to, copy, 0);
1238 csum = csum_block_add(csum, csum2, pos);
1239 if ((len -= copy) == 0)
1240 return csum;
1241 offset += copy;
1242 to += copy;
1243 pos += copy;
1245 start = end;
1248 if (len == 0)
1249 return csum;
1251 BUG();
1252 return csum;
1255 void skb_copy_and_csum_dev(const struct sk_buff *skb, u8 *to)
1257 unsigned int csum;
1258 long csstart;
1260 if (skb->ip_summed == CHECKSUM_HW)
1261 csstart = skb->h.raw - skb->data;
1262 else
1263 csstart = skb->len - skb->data_len;
1265 if (csstart > skb->len - skb->data_len)
1266 BUG();
1268 memcpy(to, skb->data, csstart);
1270 csum = 0;
1271 if (csstart != skb->len)
1272 csum = skb_copy_and_csum_bits(skb, csstart, to+csstart,
1273 skb->len-csstart, 0);
1275 if (skb->ip_summed == CHECKSUM_HW) {
1276 long csstuff = csstart + skb->csum;
1278 *((unsigned short *)(to + csstuff)) = csum_fold(csum);
1282 #if 0
1283 /*
1284 * Tune the memory allocator for a new MTU size.
1285 */
1286 void skb_add_mtu(int mtu)
1288 /* Must match allocation in alloc_skb */
1289 mtu = SKB_DATA_ALIGN(mtu) + sizeof(struct skb_shared_info);
1291 kmem_add_cache_size(mtu);
1293 #endif
1295 void __init skb_init(void)
1297 int i;
1299 skbuff_head_cache = kmem_cache_create("skbuff_head_cache",
1300 sizeof(struct sk_buff),
1301 0,
1302 SLAB_HWCACHE_ALIGN,
1303 skb_headerinit, NULL);
1304 if (!skbuff_head_cache)
1305 panic("cannot create skbuff cache");
1307 for (i=0; i<NR_CPUS; i++)
1308 skb_queue_head_init(&skb_head_pool[i].list);