ia64/xen-unstable

view linux-2.4-xen-sparse/net/core/skbuff.c @ 7238:971e7c7411b3

Raise an exception if an error appears on the pipes to our children, and make
sure that the child's pipes are closed even under that exception. Move the
handling of POLLHUP to the end of the loop, so that we guarantee to read any
remaining data from the child if POLLHUP and POLLIN appear at the same time.

Signed-off-by: Ewan Mellor <ewan@xensource.com>
author emellor@ewan
date Thu Oct 06 10:13:11 2005 +0100 (2005-10-06)
parents 06d84bf87159
children
line source
1 /*
2 * Routines having to do with the 'struct sk_buff' memory handlers.
3 *
4 * Authors: Alan Cox <iiitac@pyr.swan.ac.uk>
5 * Florian La Roche <rzsfl@rz.uni-sb.de>
6 *
7 * Version: $Id: skbuff.c,v 1.90 2001/11/07 05:56:19 davem Exp $
8 *
9 * Fixes:
10 * Alan Cox : Fixed the worst of the load balancer bugs.
11 * Dave Platt : Interrupt stacking fix.
12 * Richard Kooijman : Timestamp fixes.
13 * Alan Cox : Changed buffer format.
14 * Alan Cox : destructor hook for AF_UNIX etc.
15 * Linus Torvalds : Better skb_clone.
16 * Alan Cox : Added skb_copy.
17 * Alan Cox : Added all the changed routines Linus
18 * only put in the headers
19 * Ray VanTassle : Fixed --skb->lock in free
20 * Alan Cox : skb_copy copy arp field
21 * Andi Kleen : slabified it.
22 *
23 * NOTE:
24 * The __skb_ routines should be called with interrupts
25 * disabled, or you better be *real* sure that the operation is atomic
26 * with respect to whatever list is being frobbed (e.g. via lock_sock()
27 * or via disabling bottom half handlers, etc).
28 *
29 * This program is free software; you can redistribute it and/or
30 * modify it under the terms of the GNU General Public License
31 * as published by the Free Software Foundation; either version
32 * 2 of the License, or (at your option) any later version.
33 */
35 /*
36 * The functions in this file will not compile correctly with gcc 2.4.x
37 */
39 #include <linux/config.h>
40 #include <linux/types.h>
41 #include <linux/kernel.h>
42 #include <linux/sched.h>
43 #include <linux/mm.h>
44 #include <linux/interrupt.h>
45 #include <linux/in.h>
46 #include <linux/inet.h>
47 #include <linux/slab.h>
48 #include <linux/netdevice.h>
49 #include <linux/string.h>
50 #include <linux/skbuff.h>
51 #include <linux/cache.h>
52 #include <linux/rtnetlink.h>
53 #include <linux/init.h>
54 #include <linux/highmem.h>
56 #include <net/protocol.h>
57 #include <net/dst.h>
58 #include <net/sock.h>
59 #include <net/checksum.h>
61 #include <asm/uaccess.h>
62 #include <asm/system.h>
64 int sysctl_hot_list_len = 128;
66 static kmem_cache_t *skbuff_head_cache;
68 static union {
69 struct sk_buff_head list;
70 char pad[SMP_CACHE_BYTES];
71 } skb_head_pool[NR_CPUS];
73 /*
74 * Keep out-of-line to prevent kernel bloat.
75 * __builtin_return_address is not used because it is not always
76 * reliable.
77 */
79 /**
80 * skb_over_panic - private function
81 * @skb: buffer
82 * @sz: size
83 * @here: address
84 *
85 * Out of line support code for skb_put(). Not user callable.
86 */
88 void skb_over_panic(struct sk_buff *skb, int sz, void *here)
89 {
90 printk("skput:over: %p:%d put:%d dev:%s",
91 here, skb->len, sz, skb->dev ? skb->dev->name : "<NULL>");
92 BUG();
93 }
95 /**
96 * skb_under_panic - private function
97 * @skb: buffer
98 * @sz: size
99 * @here: address
100 *
101 * Out of line support code for skb_push(). Not user callable.
102 */
105 void skb_under_panic(struct sk_buff *skb, int sz, void *here)
106 {
107 printk("skput:under: %p:%d put:%d dev:%s",
108 here, skb->len, sz, skb->dev ? skb->dev->name : "<NULL>");
109 BUG();
110 }
112 static __inline__ struct sk_buff *skb_head_from_pool(void)
113 {
114 struct sk_buff_head *list = &skb_head_pool[smp_processor_id()].list;
116 if (skb_queue_len(list)) {
117 struct sk_buff *skb;
118 unsigned long flags;
120 local_irq_save(flags);
121 skb = __skb_dequeue(list);
122 local_irq_restore(flags);
123 return skb;
124 }
125 return NULL;
126 }
128 static __inline__ void skb_head_to_pool(struct sk_buff *skb)
129 {
130 struct sk_buff_head *list = &skb_head_pool[smp_processor_id()].list;
132 if (skb_queue_len(list) < sysctl_hot_list_len) {
133 unsigned long flags;
135 local_irq_save(flags);
136 __skb_queue_head(list, skb);
137 local_irq_restore(flags);
139 return;
140 }
141 kmem_cache_free(skbuff_head_cache, skb);
142 }
145 /* Allocate a new skbuff. We do this ourselves so we can fill in a few
146 * 'private' fields and also do memory statistics to find all the
147 * [BEEP] leaks.
148 *
149 */
151 /**
152 * alloc_skb - allocate a network buffer
153 * @size: size to allocate
154 * @gfp_mask: allocation mask
155 *
156 * Allocate a new &sk_buff. The returned buffer has no headroom and a
157 * tail room of size bytes. The object has a reference count of one.
158 * The return is the buffer. On a failure the return is %NULL.
159 *
160 * Buffers may only be allocated from interrupts using a @gfp_mask of
161 * %GFP_ATOMIC.
162 */
164 struct sk_buff *alloc_skb(unsigned int size,int gfp_mask)
165 {
166 struct sk_buff *skb;
167 u8 *data;
169 if (in_interrupt() && (gfp_mask & __GFP_WAIT)) {
170 static int count = 0;
171 if (++count < 5) {
172 printk(KERN_ERR "alloc_skb called nonatomically "
173 "from interrupt %p\n", NET_CALLER(size));
174 BUG();
175 }
176 gfp_mask &= ~__GFP_WAIT;
177 }
179 /* Get the HEAD */
180 skb = skb_head_from_pool();
181 if (skb == NULL) {
182 skb = kmem_cache_alloc(skbuff_head_cache, gfp_mask & ~__GFP_DMA);
183 if (skb == NULL)
184 goto nohead;
185 }
187 /* Get the DATA. Size must match skb_add_mtu(). */
188 size = SKB_DATA_ALIGN(size);
189 data = kmalloc(size + sizeof(struct skb_shared_info), gfp_mask);
190 if (data == NULL)
191 goto nodata;
193 /* XXX: does not include slab overhead */
194 skb->truesize = size + sizeof(struct sk_buff);
196 /* Load the data pointers. */
197 skb->head = data;
198 skb->data = data;
199 skb->tail = data;
200 skb->end = data + size;
202 /* Set up other state */
203 skb->len = 0;
204 skb->cloned = 0;
205 skb->data_len = 0;
207 atomic_set(&skb->users, 1);
208 atomic_set(&(skb_shinfo(skb)->dataref), 1);
209 skb_shinfo(skb)->nr_frags = 0;
210 skb_shinfo(skb)->frag_list = NULL;
211 return skb;
213 nodata:
214 skb_head_to_pool(skb);
215 nohead:
216 return NULL;
217 }
219 /**
220 * alloc_skb_from_cache - allocate a network buffer
221 * @cp: kmem_cache from which to allocate the data area
222 * (object size must be big enough for @size bytes + skb overheads)
223 * @size: size to allocate
224 * @gfp_mask: allocation mask
225 *
226 * Allocate a new &sk_buff. The returned buffer has no headroom and a
227 * tail room of size bytes. The object has a reference count of one.
228 * The return is the buffer. On a failure the return is %NULL.
229 *
230 * Buffers may only be allocated from interrupts using a @gfp_mask of
231 * %GFP_ATOMIC.
232 */
234 struct sk_buff *alloc_skb_from_cache(kmem_cache_t *cp,
235 unsigned int size, int gfp_mask)
236 {
237 struct sk_buff *skb;
238 u8 *data;
240 if (in_interrupt() && (gfp_mask & __GFP_WAIT)) {
241 static int count = 0;
242 if (++count < 5) {
243 printk(KERN_ERR "alloc_skb called nonatomically "
244 "from interrupt %p\n", NET_CALLER(size));
245 BUG();
246 }
247 gfp_mask &= ~__GFP_WAIT;
248 }
250 /* Get the HEAD */
251 skb = skb_head_from_pool();
252 if (skb == NULL) {
253 skb = kmem_cache_alloc(skbuff_head_cache, gfp_mask & ~__GFP_DMA);
254 if (skb == NULL)
255 goto nohead;
256 }
258 /* Get the DATA. */
259 size = SKB_DATA_ALIGN(size);
260 data = kmem_cache_alloc(cp, gfp_mask);
261 if (data == NULL)
262 goto nodata;
264 /* XXX: does not include slab overhead */
265 skb->truesize = size + sizeof(struct sk_buff);
267 /* Load the data pointers. */
268 skb->head = data;
269 skb->data = data;
270 skb->tail = data;
271 skb->end = data + size;
273 /* Set up other state */
274 skb->len = 0;
275 skb->cloned = 0;
276 skb->data_len = 0;
278 atomic_set(&skb->users, 1);
279 atomic_set(&(skb_shinfo(skb)->dataref), 1);
280 skb_shinfo(skb)->nr_frags = 0;
281 skb_shinfo(skb)->frag_list = NULL;
282 return skb;
284 nodata:
285 skb_head_to_pool(skb);
286 nohead:
287 return NULL;
288 }
291 /*
292 * Slab constructor for a skb head.
293 */
294 static inline void skb_headerinit(void *p, kmem_cache_t *cache,
295 unsigned long flags)
296 {
297 struct sk_buff *skb = p;
299 skb->next = NULL;
300 skb->prev = NULL;
301 skb->list = NULL;
302 skb->sk = NULL;
303 skb->stamp.tv_sec=0; /* No idea about time */
304 skb->dev = NULL;
305 skb->real_dev = NULL;
306 skb->dst = NULL;
307 memset(skb->cb, 0, sizeof(skb->cb));
308 skb->pkt_type = PACKET_HOST; /* Default type */
309 skb->ip_summed = 0;
310 skb->priority = 0;
311 skb->security = 0; /* By default packets are insecure */
312 skb->destructor = NULL;
314 #ifdef CONFIG_NETFILTER
315 skb->nfmark = skb->nfcache = 0;
316 skb->nfct = NULL;
317 #ifdef CONFIG_NETFILTER_DEBUG
318 skb->nf_debug = 0;
319 #endif
320 #endif
321 #ifdef CONFIG_NET_SCHED
322 skb->tc_index = 0;
323 #endif
324 }
326 static void skb_drop_fraglist(struct sk_buff *skb)
327 {
328 struct sk_buff *list = skb_shinfo(skb)->frag_list;
330 skb_shinfo(skb)->frag_list = NULL;
332 do {
333 struct sk_buff *this = list;
334 list = list->next;
335 kfree_skb(this);
336 } while (list);
337 }
339 static void skb_clone_fraglist(struct sk_buff *skb)
340 {
341 struct sk_buff *list;
343 for (list = skb_shinfo(skb)->frag_list; list; list=list->next)
344 skb_get(list);
345 }
347 static void skb_release_data(struct sk_buff *skb)
348 {
349 if (!skb->cloned ||
350 atomic_dec_and_test(&(skb_shinfo(skb)->dataref))) {
351 if (skb_shinfo(skb)->nr_frags) {
352 int i;
353 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
354 put_page(skb_shinfo(skb)->frags[i].page);
355 }
357 if (skb_shinfo(skb)->frag_list)
358 skb_drop_fraglist(skb);
360 kfree(skb->head);
361 }
362 }
364 /*
365 * Free an skbuff by memory without cleaning the state.
366 */
367 void kfree_skbmem(struct sk_buff *skb)
368 {
369 skb_release_data(skb);
370 skb_head_to_pool(skb);
371 }
373 /**
374 * __kfree_skb - private function
375 * @skb: buffer
376 *
377 * Free an sk_buff. Release anything attached to the buffer.
378 * Clean the state. This is an internal helper function. Users should
379 * always call kfree_skb
380 */
382 void __kfree_skb(struct sk_buff *skb)
383 {
384 if (skb->list) {
385 printk(KERN_WARNING "Warning: kfree_skb passed an skb still "
386 "on a list (from %p).\n", NET_CALLER(skb));
387 BUG();
388 }
390 dst_release(skb->dst);
391 if(skb->destructor) {
392 if (in_irq()) {
393 printk(KERN_WARNING "Warning: kfree_skb on hard IRQ %p\n",
394 NET_CALLER(skb));
395 }
396 skb->destructor(skb);
397 }
398 #ifdef CONFIG_NETFILTER
399 nf_conntrack_put(skb->nfct);
400 #endif
401 skb_headerinit(skb, NULL, 0); /* clean state */
402 kfree_skbmem(skb);
403 }
405 /**
406 * skb_clone - duplicate an sk_buff
407 * @skb: buffer to clone
408 * @gfp_mask: allocation priority
409 *
410 * Duplicate an &sk_buff. The new one is not owned by a socket. Both
411 * copies share the same packet data but not structure. The new
412 * buffer has a reference count of 1. If the allocation fails the
413 * function returns %NULL otherwise the new buffer is returned.
414 *
415 * If this function is called from an interrupt gfp_mask() must be
416 * %GFP_ATOMIC.
417 */
419 struct sk_buff *skb_clone(struct sk_buff *skb, int gfp_mask)
420 {
421 struct sk_buff *n;
423 n = skb_head_from_pool();
424 if (!n) {
425 n = kmem_cache_alloc(skbuff_head_cache, gfp_mask);
426 if (!n)
427 return NULL;
428 }
430 #define C(x) n->x = skb->x
432 n->next = n->prev = NULL;
433 n->list = NULL;
434 n->sk = NULL;
435 C(stamp);
436 C(dev);
437 C(real_dev);
438 C(h);
439 C(nh);
440 C(mac);
441 C(dst);
442 dst_clone(n->dst);
443 memcpy(n->cb, skb->cb, sizeof(skb->cb));
444 C(len);
445 C(data_len);
446 C(csum);
447 n->cloned = 1;
448 C(pkt_type);
449 C(ip_summed);
450 C(priority);
451 atomic_set(&n->users, 1);
452 C(protocol);
453 C(security);
454 C(truesize);
455 C(head);
456 C(data);
457 C(tail);
458 C(end);
459 n->destructor = NULL;
460 #ifdef CONFIG_NETFILTER
461 C(nfmark);
462 C(nfcache);
463 C(nfct);
464 #ifdef CONFIG_NETFILTER_DEBUG
465 C(nf_debug);
466 #endif
467 #endif /*CONFIG_NETFILTER*/
468 #if defined(CONFIG_HIPPI)
469 C(private);
470 #endif
471 #ifdef CONFIG_NET_SCHED
472 C(tc_index);
473 #endif
475 atomic_inc(&(skb_shinfo(skb)->dataref));
476 skb->cloned = 1;
477 #ifdef CONFIG_NETFILTER
478 nf_conntrack_get(skb->nfct);
479 #endif
480 return n;
481 }
483 static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
484 {
485 /*
486 * Shift between the two data areas in bytes
487 */
488 unsigned long offset = new->data - old->data;
490 new->list=NULL;
491 new->sk=NULL;
492 new->dev=old->dev;
493 new->real_dev=old->real_dev;
494 new->priority=old->priority;
495 new->protocol=old->protocol;
496 new->dst=dst_clone(old->dst);
497 new->h.raw=old->h.raw+offset;
498 new->nh.raw=old->nh.raw+offset;
499 new->mac.raw=old->mac.raw+offset;
500 memcpy(new->cb, old->cb, sizeof(old->cb));
501 atomic_set(&new->users, 1);
502 new->pkt_type=old->pkt_type;
503 new->stamp=old->stamp;
504 new->destructor = NULL;
505 new->security=old->security;
506 #ifdef CONFIG_NETFILTER
507 new->nfmark=old->nfmark;
508 new->nfcache=old->nfcache;
509 new->nfct=old->nfct;
510 nf_conntrack_get(new->nfct);
511 #ifdef CONFIG_NETFILTER_DEBUG
512 new->nf_debug=old->nf_debug;
513 #endif
514 #endif
515 #ifdef CONFIG_NET_SCHED
516 new->tc_index = old->tc_index;
517 #endif
518 }
520 /**
521 * skb_copy - create private copy of an sk_buff
522 * @skb: buffer to copy
523 * @gfp_mask: allocation priority
524 *
525 * Make a copy of both an &sk_buff and its data. This is used when the
526 * caller wishes to modify the data and needs a private copy of the
527 * data to alter. Returns %NULL on failure or the pointer to the buffer
528 * on success. The returned buffer has a reference count of 1.
529 *
530 * As by-product this function converts non-linear &sk_buff to linear
531 * one, so that &sk_buff becomes completely private and caller is allowed
532 * to modify all the data of returned buffer. This means that this
533 * function is not recommended for use in circumstances when only
534 * header is going to be modified. Use pskb_copy() instead.
535 */
537 struct sk_buff *skb_copy(const struct sk_buff *skb, int gfp_mask)
538 {
539 struct sk_buff *n;
540 int headerlen = skb->data-skb->head;
542 /*
543 * Allocate the copy buffer
544 */
545 n=alloc_skb(skb->end - skb->head + skb->data_len, gfp_mask);
546 if(n==NULL)
547 return NULL;
549 /* Set the data pointer */
550 skb_reserve(n,headerlen);
551 /* Set the tail pointer and length */
552 skb_put(n,skb->len);
553 n->csum = skb->csum;
554 n->ip_summed = skb->ip_summed;
556 if (skb_copy_bits(skb, -headerlen, n->head, headerlen+skb->len))
557 BUG();
559 copy_skb_header(n, skb);
561 return n;
562 }
564 /* Keep head the same: replace data */
565 int skb_linearize(struct sk_buff *skb, int gfp_mask)
566 {
567 unsigned int size;
568 u8 *data;
569 long offset;
570 int headerlen = skb->data - skb->head;
571 int expand = (skb->tail+skb->data_len) - skb->end;
573 if (skb_shared(skb))
574 BUG();
576 if (expand <= 0)
577 expand = 0;
579 size = (skb->end - skb->head + expand);
580 size = SKB_DATA_ALIGN(size);
581 data = kmalloc(size + sizeof(struct skb_shared_info), gfp_mask);
582 if (data == NULL)
583 return -ENOMEM;
585 /* Copy entire thing */
586 if (skb_copy_bits(skb, -headerlen, data, headerlen+skb->len))
587 BUG();
589 /* Offset between the two in bytes */
590 offset = data - skb->head;
592 /* Free old data. */
593 skb_release_data(skb);
595 skb->head = data;
596 skb->end = data + size;
598 /* Set up new pointers */
599 skb->h.raw += offset;
600 skb->nh.raw += offset;
601 skb->mac.raw += offset;
602 skb->tail += offset;
603 skb->data += offset;
605 /* Set up shinfo */
606 atomic_set(&(skb_shinfo(skb)->dataref), 1);
607 skb_shinfo(skb)->nr_frags = 0;
608 skb_shinfo(skb)->frag_list = NULL;
610 /* We are no longer a clone, even if we were. */
611 skb->cloned = 0;
613 skb->tail += skb->data_len;
614 skb->data_len = 0;
615 return 0;
616 }
619 /**
620 * pskb_copy - create copy of an sk_buff with private head.
621 * @skb: buffer to copy
622 * @gfp_mask: allocation priority
623 *
624 * Make a copy of both an &sk_buff and part of its data, located
625 * in header. Fragmented data remain shared. This is used when
626 * the caller wishes to modify only header of &sk_buff and needs
627 * private copy of the header to alter. Returns %NULL on failure
628 * or the pointer to the buffer on success.
629 * The returned buffer has a reference count of 1.
630 */
632 struct sk_buff *pskb_copy(struct sk_buff *skb, int gfp_mask)
633 {
634 struct sk_buff *n;
636 /*
637 * Allocate the copy buffer
638 */
639 n=alloc_skb(skb->end - skb->head, gfp_mask);
640 if(n==NULL)
641 return NULL;
643 /* Set the data pointer */
644 skb_reserve(n,skb->data-skb->head);
645 /* Set the tail pointer and length */
646 skb_put(n,skb_headlen(skb));
647 /* Copy the bytes */
648 memcpy(n->data, skb->data, n->len);
649 n->csum = skb->csum;
650 n->ip_summed = skb->ip_summed;
652 n->data_len = skb->data_len;
653 n->len = skb->len;
655 if (skb_shinfo(skb)->nr_frags) {
656 int i;
658 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
659 skb_shinfo(n)->frags[i] = skb_shinfo(skb)->frags[i];
660 get_page(skb_shinfo(n)->frags[i].page);
661 }
662 skb_shinfo(n)->nr_frags = i;
663 }
665 if (skb_shinfo(skb)->frag_list) {
666 skb_shinfo(n)->frag_list = skb_shinfo(skb)->frag_list;
667 skb_clone_fraglist(n);
668 }
670 copy_skb_header(n, skb);
672 return n;
673 }
675 /**
676 * pskb_expand_head - reallocate header of &sk_buff
677 * @skb: buffer to reallocate
678 * @nhead: room to add at head
679 * @ntail: room to add at tail
680 * @gfp_mask: allocation priority
681 *
682 * Expands (or creates identical copy, if &nhead and &ntail are zero)
683 * header of skb. &sk_buff itself is not changed. &sk_buff MUST have
684 * reference count of 1. Returns zero in the case of success or error,
685 * if expansion failed. In the last case, &sk_buff is not changed.
686 *
687 * All the pointers pointing into skb header may change and must be
688 * reloaded after call to this function.
689 */
691 int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail, int gfp_mask)
692 {
693 int i;
694 u8 *data;
695 int size = nhead + (skb->end - skb->head) + ntail;
696 long off;
698 if (skb_shared(skb))
699 BUG();
701 size = SKB_DATA_ALIGN(size);
703 data = kmalloc(size + sizeof(struct skb_shared_info), gfp_mask);
704 if (data == NULL)
705 goto nodata;
707 /* Copy only real data... and, alas, header. This should be
708 * optimized for the cases when header is void. */
709 memcpy(data+nhead, skb->head, skb->tail-skb->head);
710 memcpy(data+size, skb->end, sizeof(struct skb_shared_info));
712 for (i=0; i<skb_shinfo(skb)->nr_frags; i++)
713 get_page(skb_shinfo(skb)->frags[i].page);
715 if (skb_shinfo(skb)->frag_list)
716 skb_clone_fraglist(skb);
718 skb_release_data(skb);
720 off = (data+nhead) - skb->head;
722 skb->head = data;
723 skb->end = data+size;
725 skb->data += off;
726 skb->tail += off;
727 skb->mac.raw += off;
728 skb->h.raw += off;
729 skb->nh.raw += off;
730 skb->cloned = 0;
731 atomic_set(&skb_shinfo(skb)->dataref, 1);
732 return 0;
734 nodata:
735 return -ENOMEM;
736 }
738 /* Make private copy of skb with writable head and some headroom */
740 struct sk_buff *
741 skb_realloc_headroom(struct sk_buff *skb, unsigned int headroom)
742 {
743 struct sk_buff *skb2;
744 int delta = headroom - skb_headroom(skb);
746 if (delta <= 0)
747 return pskb_copy(skb, GFP_ATOMIC);
749 skb2 = skb_clone(skb, GFP_ATOMIC);
750 if (skb2 == NULL ||
751 !pskb_expand_head(skb2, SKB_DATA_ALIGN(delta), 0, GFP_ATOMIC))
752 return skb2;
754 kfree_skb(skb2);
755 return NULL;
756 }
759 /**
760 * skb_copy_expand - copy and expand sk_buff
761 * @skb: buffer to copy
762 * @newheadroom: new free bytes at head
763 * @newtailroom: new free bytes at tail
764 * @gfp_mask: allocation priority
765 *
766 * Make a copy of both an &sk_buff and its data and while doing so
767 * allocate additional space.
768 *
769 * This is used when the caller wishes to modify the data and needs a
770 * private copy of the data to alter as well as more space for new fields.
771 * Returns %NULL on failure or the pointer to the buffer
772 * on success. The returned buffer has a reference count of 1.
773 *
774 * You must pass %GFP_ATOMIC as the allocation priority if this function
775 * is called from an interrupt.
776 */
779 struct sk_buff *skb_copy_expand(const struct sk_buff *skb,
780 int newheadroom,
781 int newtailroom,
782 int gfp_mask)
783 {
784 struct sk_buff *n;
786 /*
787 * Allocate the copy buffer
788 */
790 n=alloc_skb(newheadroom + skb->len + newtailroom,
791 gfp_mask);
792 if(n==NULL)
793 return NULL;
795 skb_reserve(n,newheadroom);
797 /* Set the tail pointer and length */
798 skb_put(n,skb->len);
800 /* Copy the data only. */
801 if (skb_copy_bits(skb, 0, n->data, skb->len))
802 BUG();
804 copy_skb_header(n, skb);
805 return n;
806 }
808 /**
809 * skb_pad - zero pad the tail of an skb
810 * @skb: buffer to pad
811 * @pad: space to pad
812 *
813 * Ensure that a buffer is followed by a padding area that is zero
814 * filled. Used by network drivers which may DMA or transfer data
815 * beyond the buffer end onto the wire.
816 *
817 * May return NULL in out of memory cases.
818 */
820 struct sk_buff *skb_pad(struct sk_buff *skb, int pad)
821 {
822 struct sk_buff *nskb;
824 /* If the skbuff is non linear tailroom is always zero.. */
825 if(skb_tailroom(skb) >= pad)
826 {
827 memset(skb->data+skb->len, 0, pad);
828 return skb;
829 }
831 nskb = skb_copy_expand(skb, skb_headroom(skb), skb_tailroom(skb) + pad, GFP_ATOMIC);
832 kfree_skb(skb);
833 if(nskb)
834 memset(nskb->data+nskb->len, 0, pad);
835 return nskb;
836 }
838 /* Trims skb to length len. It can change skb pointers, if "realloc" is 1.
839 * If realloc==0 and trimming is impossible without change of data,
840 * it is BUG().
841 */
843 int ___pskb_trim(struct sk_buff *skb, unsigned int len, int realloc)
844 {
845 int offset = skb_headlen(skb);
846 int nfrags = skb_shinfo(skb)->nr_frags;
847 int i;
849 for (i=0; i<nfrags; i++) {
850 int end = offset + skb_shinfo(skb)->frags[i].size;
851 if (end > len) {
852 if (skb_cloned(skb)) {
853 if (!realloc)
854 BUG();
855 if (pskb_expand_head(skb, 0, 0, GFP_ATOMIC))
856 return -ENOMEM;
857 }
858 if (len <= offset) {
859 put_page(skb_shinfo(skb)->frags[i].page);
860 skb_shinfo(skb)->nr_frags--;
861 } else {
862 skb_shinfo(skb)->frags[i].size = len-offset;
863 }
864 }
865 offset = end;
866 }
868 if (offset < len) {
869 skb->data_len -= skb->len - len;
870 skb->len = len;
871 } else {
872 if (len <= skb_headlen(skb)) {
873 skb->len = len;
874 skb->data_len = 0;
875 skb->tail = skb->data + len;
876 if (skb_shinfo(skb)->frag_list && !skb_cloned(skb))
877 skb_drop_fraglist(skb);
878 } else {
879 skb->data_len -= skb->len - len;
880 skb->len = len;
881 }
882 }
884 return 0;
885 }
887 /**
888 * __pskb_pull_tail - advance tail of skb header
889 * @skb: buffer to reallocate
890 * @delta: number of bytes to advance tail
891 *
892 * The function makes a sense only on a fragmented &sk_buff,
893 * it expands header moving its tail forward and copying necessary
894 * data from fragmented part.
895 *
896 * &sk_buff MUST have reference count of 1.
897 *
898 * Returns %NULL (and &sk_buff does not change) if pull failed
899 * or value of new tail of skb in the case of success.
900 *
901 * All the pointers pointing into skb header may change and must be
902 * reloaded after call to this function.
903 */
905 /* Moves tail of skb head forward, copying data from fragmented part,
906 * when it is necessary.
907 * 1. It may fail due to malloc failure.
908 * 2. It may change skb pointers.
909 *
910 * It is pretty complicated. Luckily, it is called only in exceptional cases.
911 */
912 unsigned char * __pskb_pull_tail(struct sk_buff *skb, int delta)
913 {
914 int i, k, eat;
916 /* If skb has not enough free space at tail, get new one
917 * plus 128 bytes for future expansions. If we have enough
918 * room at tail, reallocate without expansion only if skb is cloned.
919 */
920 eat = (skb->tail+delta) - skb->end;
922 if (eat > 0 || skb_cloned(skb)) {
923 if (pskb_expand_head(skb, 0, eat>0 ? eat+128 : 0, GFP_ATOMIC))
924 return NULL;
925 }
927 if (skb_copy_bits(skb, skb_headlen(skb), skb->tail, delta))
928 BUG();
930 /* Optimization: no fragments, no reasons to preestimate
931 * size of pulled pages. Superb.
932 */
933 if (skb_shinfo(skb)->frag_list == NULL)
934 goto pull_pages;
936 /* Estimate size of pulled pages. */
937 eat = delta;
938 for (i=0; i<skb_shinfo(skb)->nr_frags; i++) {
939 if (skb_shinfo(skb)->frags[i].size >= eat)
940 goto pull_pages;
941 eat -= skb_shinfo(skb)->frags[i].size;
942 }
944 /* If we need update frag list, we are in troubles.
945 * Certainly, it possible to add an offset to skb data,
946 * but taking into account that pulling is expected to
947 * be very rare operation, it is worth to fight against
948 * further bloating skb head and crucify ourselves here instead.
949 * Pure masohism, indeed. 8)8)
950 */
951 if (eat) {
952 struct sk_buff *list = skb_shinfo(skb)->frag_list;
953 struct sk_buff *clone = NULL;
954 struct sk_buff *insp = NULL;
956 do {
957 if (list == NULL)
958 BUG();
960 if (list->len <= eat) {
961 /* Eaten as whole. */
962 eat -= list->len;
963 list = list->next;
964 insp = list;
965 } else {
966 /* Eaten partially. */
968 if (skb_shared(list)) {
969 /* Sucks! We need to fork list. :-( */
970 clone = skb_clone(list, GFP_ATOMIC);
971 if (clone == NULL)
972 return NULL;
973 insp = list->next;
974 list = clone;
975 } else {
976 /* This may be pulled without
977 * problems. */
978 insp = list;
979 }
980 if (pskb_pull(list, eat) == NULL) {
981 if (clone)
982 kfree_skb(clone);
983 return NULL;
984 }
985 break;
986 }
987 } while (eat);
989 /* Free pulled out fragments. */
990 while ((list = skb_shinfo(skb)->frag_list) != insp) {
991 skb_shinfo(skb)->frag_list = list->next;
992 kfree_skb(list);
993 }
994 /* And insert new clone at head. */
995 if (clone) {
996 clone->next = list;
997 skb_shinfo(skb)->frag_list = clone;
998 }
999 }
1000 /* Success! Now we may commit changes to skb data. */
1002 pull_pages:
1003 eat = delta;
1004 k = 0;
1005 for (i=0; i<skb_shinfo(skb)->nr_frags; i++) {
1006 if (skb_shinfo(skb)->frags[i].size <= eat) {
1007 put_page(skb_shinfo(skb)->frags[i].page);
1008 eat -= skb_shinfo(skb)->frags[i].size;
1009 } else {
1010 skb_shinfo(skb)->frags[k] = skb_shinfo(skb)->frags[i];
1011 if (eat) {
1012 skb_shinfo(skb)->frags[k].page_offset += eat;
1013 skb_shinfo(skb)->frags[k].size -= eat;
1014 eat = 0;
1016 k++;
1019 skb_shinfo(skb)->nr_frags = k;
1021 skb->tail += delta;
1022 skb->data_len -= delta;
1024 return skb->tail;
1027 /* Copy some data bits from skb to kernel buffer. */
1029 int skb_copy_bits(const struct sk_buff *skb, int offset, void *to, int len)
1031 int i, copy;
1032 int start = skb->len - skb->data_len;
1034 if (offset > (int)skb->len-len)
1035 goto fault;
1037 /* Copy header. */
1038 if ((copy = start-offset) > 0) {
1039 if (copy > len)
1040 copy = len;
1041 memcpy(to, skb->data + offset, copy);
1042 if ((len -= copy) == 0)
1043 return 0;
1044 offset += copy;
1045 to += copy;
1048 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
1049 int end;
1051 BUG_TRAP(start <= offset+len);
1053 end = start + skb_shinfo(skb)->frags[i].size;
1054 if ((copy = end-offset) > 0) {
1055 u8 *vaddr;
1057 if (copy > len)
1058 copy = len;
1060 vaddr = kmap_skb_frag(&skb_shinfo(skb)->frags[i]);
1061 memcpy(to, vaddr+skb_shinfo(skb)->frags[i].page_offset+
1062 offset-start, copy);
1063 kunmap_skb_frag(vaddr);
1065 if ((len -= copy) == 0)
1066 return 0;
1067 offset += copy;
1068 to += copy;
1070 start = end;
1073 if (skb_shinfo(skb)->frag_list) {
1074 struct sk_buff *list;
1076 for (list = skb_shinfo(skb)->frag_list; list; list=list->next) {
1077 int end;
1079 BUG_TRAP(start <= offset+len);
1081 end = start + list->len;
1082 if ((copy = end-offset) > 0) {
1083 if (copy > len)
1084 copy = len;
1085 if (skb_copy_bits(list, offset-start, to, copy))
1086 goto fault;
1087 if ((len -= copy) == 0)
1088 return 0;
1089 offset += copy;
1090 to += copy;
1092 start = end;
1095 if (len == 0)
1096 return 0;
1098 fault:
1099 return -EFAULT;
1102 /* Checksum skb data. */
1104 unsigned int skb_checksum(const struct sk_buff *skb, int offset, int len, unsigned int csum)
1106 int i, copy;
1107 int start = skb->len - skb->data_len;
1108 int pos = 0;
1110 /* Checksum header. */
1111 if ((copy = start-offset) > 0) {
1112 if (copy > len)
1113 copy = len;
1114 csum = csum_partial(skb->data+offset, copy, csum);
1115 if ((len -= copy) == 0)
1116 return csum;
1117 offset += copy;
1118 pos = copy;
1121 for (i=0; i<skb_shinfo(skb)->nr_frags; i++) {
1122 int end;
1124 BUG_TRAP(start <= offset+len);
1126 end = start + skb_shinfo(skb)->frags[i].size;
1127 if ((copy = end-offset) > 0) {
1128 unsigned int csum2;
1129 u8 *vaddr;
1130 skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
1132 if (copy > len)
1133 copy = len;
1134 vaddr = kmap_skb_frag(frag);
1135 csum2 = csum_partial(vaddr + frag->page_offset +
1136 offset-start, copy, 0);
1137 kunmap_skb_frag(vaddr);
1138 csum = csum_block_add(csum, csum2, pos);
1139 if (!(len -= copy))
1140 return csum;
1141 offset += copy;
1142 pos += copy;
1144 start = end;
1147 if (skb_shinfo(skb)->frag_list) {
1148 struct sk_buff *list;
1150 for (list = skb_shinfo(skb)->frag_list; list; list=list->next) {
1151 int end;
1153 BUG_TRAP(start <= offset+len);
1155 end = start + list->len;
1156 if ((copy = end-offset) > 0) {
1157 unsigned int csum2;
1158 if (copy > len)
1159 copy = len;
1160 csum2 = skb_checksum(list, offset-start, copy, 0);
1161 csum = csum_block_add(csum, csum2, pos);
1162 if ((len -= copy) == 0)
1163 return csum;
1164 offset += copy;
1165 pos += copy;
1167 start = end;
1170 if (len == 0)
1171 return csum;
1173 BUG();
1174 return csum;
1177 /* Both of above in one bottle. */
1179 unsigned int skb_copy_and_csum_bits(const struct sk_buff *skb, int offset, u8 *to, int len, unsigned int csum)
1181 int i, copy;
1182 int start = skb->len - skb->data_len;
1183 int pos = 0;
1185 /* Copy header. */
1186 if ((copy = start-offset) > 0) {
1187 if (copy > len)
1188 copy = len;
1189 csum = csum_partial_copy_nocheck(skb->data+offset, to, copy, csum);
1190 if ((len -= copy) == 0)
1191 return csum;
1192 offset += copy;
1193 to += copy;
1194 pos = copy;
1197 for (i=0; i<skb_shinfo(skb)->nr_frags; i++) {
1198 int end;
1200 BUG_TRAP(start <= offset+len);
1202 end = start + skb_shinfo(skb)->frags[i].size;
1203 if ((copy = end-offset) > 0) {
1204 unsigned int csum2;
1205 u8 *vaddr;
1206 skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
1208 if (copy > len)
1209 copy = len;
1210 vaddr = kmap_skb_frag(frag);
1211 csum2 = csum_partial_copy_nocheck(vaddr + frag->page_offset +
1212 offset-start, to, copy, 0);
1213 kunmap_skb_frag(vaddr);
1214 csum = csum_block_add(csum, csum2, pos);
1215 if (!(len -= copy))
1216 return csum;
1217 offset += copy;
1218 to += copy;
1219 pos += copy;
1221 start = end;
1224 if (skb_shinfo(skb)->frag_list) {
1225 struct sk_buff *list;
1227 for (list = skb_shinfo(skb)->frag_list; list; list=list->next) {
1228 unsigned int csum2;
1229 int end;
1231 BUG_TRAP(start <= offset+len);
1233 end = start + list->len;
1234 if ((copy = end-offset) > 0) {
1235 if (copy > len)
1236 copy = len;
1237 csum2 = skb_copy_and_csum_bits(list, offset-start, to, copy, 0);
1238 csum = csum_block_add(csum, csum2, pos);
1239 if ((len -= copy) == 0)
1240 return csum;
1241 offset += copy;
1242 to += copy;
1243 pos += copy;
1245 start = end;
1248 if (len == 0)
1249 return csum;
1251 BUG();
1252 return csum;
1255 void skb_copy_and_csum_dev(const struct sk_buff *skb, u8 *to)
1257 unsigned int csum;
1258 long csstart;
1260 if (skb->ip_summed == CHECKSUM_HW)
1261 csstart = skb->h.raw - skb->data;
1262 else
1263 csstart = skb->len - skb->data_len;
1265 if (csstart > skb->len - skb->data_len)
1266 BUG();
1268 memcpy(to, skb->data, csstart);
1270 csum = 0;
1271 if (csstart != skb->len)
1272 csum = skb_copy_and_csum_bits(skb, csstart, to+csstart,
1273 skb->len-csstart, 0);
1275 if (skb->ip_summed == CHECKSUM_HW) {
1276 long csstuff = csstart + skb->csum;
1278 *((unsigned short *)(to + csstuff)) = csum_fold(csum);
1282 #if 0
1283 /*
1284 * Tune the memory allocator for a new MTU size.
1285 */
1286 void skb_add_mtu(int mtu)
1288 /* Must match allocation in alloc_skb */
1289 mtu = SKB_DATA_ALIGN(mtu) + sizeof(struct skb_shared_info);
1291 kmem_add_cache_size(mtu);
1293 #endif
1295 void __init skb_init(void)
1297 int i;
1299 skbuff_head_cache = kmem_cache_create("skbuff_head_cache",
1300 sizeof(struct sk_buff),
1301 0,
1302 SLAB_HWCACHE_ALIGN,
1303 skb_headerinit, NULL);
1304 if (!skbuff_head_cache)
1305 panic("cannot create skbuff cache");
1307 for (i=0; i<NR_CPUS; i++)
1308 skb_queue_head_init(&skb_head_pool[i].list);