direct-io.hg
changeset 112:cb2688ed1a23
bitkeeper revision 1.15.1.12 (3e3bb848bQ7Yn0pGaQ3H5n8g3MYQWQ)
Merge boulderdash.cl.cam.ac.uk:/usr/groups/xeno/users/akw27/xeno
into boulderdash.cl.cam.ac.uk:/anfs/scratch/boulderdash/akw27/argh/xeno
Merge boulderdash.cl.cam.ac.uk:/usr/groups/xeno/users/akw27/xeno
into boulderdash.cl.cam.ac.uk:/anfs/scratch/boulderdash/akw27/argh/xeno
author | akw27@boulderdash.cl.cam.ac.uk |
---|---|
date | Sat Feb 01 12:06:32 2003 +0000 (2003-02-01) |
parents | 033b3540eda0 de280362dfb0 |
children | 82679de8a1ca |
files | .rootkeys xen-2.4.16/common/domain.c xen-2.4.16/common/memory.c xen-2.4.16/drivers/net/tulip/interrupt.c xen-2.4.16/include/asm-i386/pci.h xen-2.4.16/include/hypervisor-ifs/network.h xen-2.4.16/include/xeno/mm.h xen-2.4.16/include/xeno/skbuff.h xen-2.4.16/net/dev.c xen-2.4.16/net/eth.c xen-2.4.16/net/skbuff.c xenolinux-2.4.16-sparse/include/linux/skbuff.h xenolinux-2.4.16-sparse/net/core/skbuff.c |
line diff
1.1 --- a/.rootkeys Tue Jan 28 16:13:04 2003 +0000 1.2 +++ b/.rootkeys Sat Feb 01 12:06:32 2003 +0000 1.3 @@ -399,9 +399,11 @@ 3ddb79bb3cMSs_k2X5Oq2hOIBvmPYA xenolinux 1.4 3ddb79ba2qYtIQAT_-vCFkkZUXu_UQ xenolinux-2.4.16-sparse/include/asm-xeno/user.h 1.5 3ddb79bbqhb9X9qWOz5Bv4wOzrkITg xenolinux-2.4.16-sparse/include/asm-xeno/vga.h 1.6 3ddb79bbA52x94o6uwDYsbzrH2hjzA xenolinux-2.4.16-sparse/include/asm-xeno/xor.h 1.7 +3e37c39fVCSGQENtY6g7muaq_THliw xenolinux-2.4.16-sparse/include/linux/skbuff.h 1.8 3ddb79bb_7YG4U75ZmEic9YXWTW7Vw xenolinux-2.4.16-sparse/include/linux/sunrpc/debug.h 1.9 3ddb79bcxkVPfWlZ1PQKvDrfArzOVw xenolinux-2.4.16-sparse/kernel/panic.c 1.10 3ddb79bbP31im-mx2NbfthSeqty1Dg xenolinux-2.4.16-sparse/mk 1.11 3e15d52e0_j129JPvo7xfYGndVFpwQ xenolinux-2.4.16-sparse/mm/memory.c 1.12 3e15d535DLvpzTrLRUIerB69LpJD1g xenolinux-2.4.16-sparse/mm/mremap.c 1.13 3e15d531m1Y1_W8ki64AFOU_ua4C4w xenolinux-2.4.16-sparse/mm/swapfile.c 1.14 +3e37c312QFuzIxXsuAgO6IRt3Tp96Q xenolinux-2.4.16-sparse/net/core/skbuff.c
2.1 --- a/xen-2.4.16/common/domain.c Tue Jan 28 16:13:04 2003 +0000 2.2 +++ b/xen-2.4.16/common/domain.c Sat Feb 01 12:06:32 2003 +0000 2.3 @@ -334,10 +334,13 @@ static unsigned int alloc_new_dom_mem(st 2.4 struct pfn_info *pf, *pf_head; 2.5 unsigned int alloc_pfns; 2.6 unsigned int req_pages; 2.7 + unsigned long flags; 2.8 2.9 /* how many pages do we need to alloc? */ 2.10 req_pages = kbytes >> (PAGE_SHIFT - 10); 2.11 2.12 + spin_lock_irqsave(&free_list_lock, flags); 2.13 + 2.14 /* is there enough mem to serve the request? */ 2.15 if(req_pages > free_pfns) 2.16 return -1; 2.17 @@ -369,6 +372,8 @@ static unsigned int alloc_new_dom_mem(st 2.18 2.19 free_pfns--; 2.20 } 2.21 + 2.22 + spin_unlock_irqrestore(&free_list_lock, flags); 2.23 2.24 p->tot_pages = req_pages; 2.25
3.1 --- a/xen-2.4.16/common/memory.c Tue Jan 28 16:13:04 2003 +0000 3.2 +++ b/xen-2.4.16/common/memory.c Sat Feb 01 12:06:32 2003 +0000 3.3 @@ -206,6 +206,7 @@ unsigned long frame_table_size; 3.4 unsigned long max_page; 3.5 3.6 struct list_head free_list; 3.7 +spinlock_t free_list_lock = SPIN_LOCK_UNLOCKED; 3.8 unsigned int free_pfns; 3.9 3.10 static int tlb_flush[NR_CPUS]; 3.11 @@ -219,6 +220,7 @@ void __init init_frametable(unsigned lon 3.12 { 3.13 struct pfn_info *pf; 3.14 unsigned long page_index; 3.15 + unsigned long flags; 3.16 3.17 memset(tlb_flush, 0, sizeof(tlb_flush)); 3.18 3.19 @@ -232,6 +234,7 @@ void __init init_frametable(unsigned lon 3.20 memset(frame_table, 0, frame_table_size); 3.21 3.22 /* Put all domain-allocatable memory on a free list. */ 3.23 + spin_lock_irqsave(&free_list_lock, flags); 3.24 INIT_LIST_HEAD(&free_list); 3.25 for( page_index = (MAX_MONITOR_ADDRESS + frame_table_size) >> PAGE_SHIFT; 3.26 page_index < nr_pages; 3.27 @@ -240,6 +243,7 @@ void __init init_frametable(unsigned lon 3.28 pf = list_entry(&frame_table[page_index].list, struct pfn_info, list); 3.29 list_add_tail(&pf->list, &free_list); 3.30 } 3.31 + spin_unlock_irqrestore(&free_list_lock, flags); 3.32 } 3.33 3.34
4.1 --- a/xen-2.4.16/drivers/net/tulip/interrupt.c Tue Jan 28 16:13:04 2003 +0000 4.2 +++ b/xen-2.4.16/drivers/net/tulip/interrupt.c Sat Feb 01 12:06:32 2003 +0000 4.3 @@ -170,8 +170,9 @@ static int tulip_rx(struct net_device *d 4.4 #endif 4.5 /* Check if the packet is long enough to accept without copying 4.6 to a minimally-sized skbuff. */ 4.7 - if (pkt_len < tulip_rx_copybreak 4.8 - && (skb = dev_alloc_skb(pkt_len + 2)) != NULL) { 4.9 + //if (pkt_len < tulip_rx_copybreak 4.10 + // && (skb = dev_alloc_skb(pkt_len + 2)) != NULL) { 4.11 + if (0) { 4.12 skb->dev = dev; 4.13 skb_reserve(skb, 2); /* 16 byte align the IP header */ 4.14 pci_dma_sync_single(tp->pdev,
5.1 --- a/xen-2.4.16/include/asm-i386/pci.h Tue Jan 28 16:13:04 2003 +0000 5.2 +++ b/xen-2.4.16/include/asm-i386/pci.h Sat Feb 01 12:06:32 2003 +0000 5.3 @@ -75,7 +75,19 @@ static inline dma_addr_t pci_map_single( 5.4 if (direction == PCI_DMA_NONE) 5.5 BUG(); 5.6 flush_write_buffers(); 5.7 - return virt_to_bus(ptr); 5.8 + 5.9 + if ((unsigned long) ptr > PAGE_OFFSET) 5.10 + return virt_to_bus(ptr); 5.11 + 5.12 + /* If an address that is not in hypervisor VM is passed to this 5.13 + * function (ie > PAGE_OFFSET) we assume that the passer knows 5.14 + * what they are doing, and have passed a physical address that 5.15 + * should not be converted here. This is a little hackish, but 5.16 + * is being added to allow references to domain memory in order 5.17 + * to support zero-copy network code. 5.18 + */ 5.19 + 5.20 + return (dma_addr_t) ptr; 5.21 } 5.22 5.23 /* Unmap a single streaming mode DMA translation. The dma_addr and size
6.1 --- a/xen-2.4.16/include/hypervisor-ifs/network.h Tue Jan 28 16:13:04 2003 +0000 6.2 +++ b/xen-2.4.16/include/hypervisor-ifs/network.h Sat Feb 01 12:06:32 2003 +0000 6.3 @@ -26,8 +26,8 @@ typedef struct rx_entry_st { 6.4 int status; /* per descriptor status. */ 6.5 } rx_entry_t; 6.6 6.7 -#define TX_RING_SIZE 1024 6.8 -#define RX_RING_SIZE 1024 6.9 +#define TX_RING_SIZE 256 6.10 +#define RX_RING_SIZE 256 6.11 typedef struct net_ring_st { 6.12 /* 6.13 * Guest OS places packets into ring at tx_prod.
7.1 --- a/xen-2.4.16/include/xeno/mm.h Tue Jan 28 16:13:04 2003 +0000 7.2 +++ b/xen-2.4.16/include/xeno/mm.h Sat Feb 01 12:06:32 2003 +0000 7.3 @@ -7,6 +7,7 @@ 7.4 #include <asm/desc.h> 7.5 #include <xeno/list.h> 7.6 #include <hypervisor-ifs/hypervisor-if.h> 7.7 +#include <xeno/spinlock.h> 7.8 7.9 /* XXX KAF: These may die eventually, but so many refs in slab.c :((( */ 7.10 7.11 @@ -110,6 +111,7 @@ typedef struct pfn_info { 7.12 extern frame_table_t * frame_table; 7.13 extern unsigned long frame_table_size; 7.14 extern struct list_head free_list; 7.15 +extern spinlock_t free_list_lock; 7.16 extern unsigned int free_pfns; 7.17 extern unsigned long max_page; 7.18 void init_frametable(unsigned long nr_pages);
8.1 --- a/xen-2.4.16/include/xeno/skbuff.h Tue Jan 28 16:13:04 2003 +0000 8.2 +++ b/xen-2.4.16/include/xeno/skbuff.h Sat Feb 01 12:06:32 2003 +0000 8.3 @@ -34,6 +34,10 @@ 8.4 #define VIF_DROP -3 8.5 #define VIF_ANY_INTERFACE -4 8.6 8.7 +//skb_type values: 8.8 +#define SKB_NORMAL 0 8.9 +#define SKB_ZERO_COPY 1 8.10 + 8.11 #define HAVE_ALLOC_SKB /* For the drivers to know */ 8.12 #define HAVE_ALIGNABLE_SKB /* Ditto 8) */ 8.13 #define SLAB_SKB /* Slabified skbuffs */ 8.14 @@ -187,7 +191,7 @@ struct sk_buff { 8.15 unsigned int data_len; 8.16 unsigned int csum; /* Checksum */ 8.17 unsigned char __unused, /* Dead field, may be reused */ 8.18 - cloned, /* head may be cloned (check refcnt to be sure). */ 8.19 + cloned, /* head may be cloned (check refcnt to be sure) */ 8.20 pkt_type, /* Packet class */ 8.21 ip_summed; /* Driver fed us an IP checksum */ 8.22 __u32 priority; /* Packet queueing priority */ 8.23 @@ -203,8 +207,12 @@ struct sk_buff { 8.24 8.25 void (*destructor)(struct sk_buff *); /* Destruct function */ 8.26 8.27 - int src_vif; /* vif we came from */ 8.28 - int dst_vif; /* vif we are bound for */ 8.29 + unsigned int skb_type; /* SKB_NORMAL or SKB_ZERO_COPY */ 8.30 + struct pfn_info *pf; /* record of physical pf address for freeing */ 8.31 + int src_vif; /* vif we came from */ 8.32 + int dst_vif; /* vif we are bound for */ 8.33 + struct skb_shared_info shinfo; /* shared info is no longer shared in Xen. */ 8.34 + 8.35 8.36 8.37 8.38 @@ -244,6 +252,7 @@ struct sk_buff { 8.39 8.40 extern void __kfree_skb(struct sk_buff *skb); 8.41 extern struct sk_buff * alloc_skb(unsigned int size, int priority); 8.42 +extern struct sk_buff * alloc_zc_skb(unsigned int size, int priority); 8.43 extern void kfree_skbmem(struct sk_buff *skb); 8.44 extern struct sk_buff * skb_clone(struct sk_buff *skb, int priority); 8.45 extern struct sk_buff * skb_copy(const struct sk_buff *skb, int priority); 8.46 @@ -259,7 +268,8 @@ extern void skb_over_panic(struct sk_buf 8.47 extern void skb_under_panic(struct sk_buff *skb, int len, void *here); 8.48 8.49 /* Internal */ 8.50 -#define skb_shinfo(SKB) ((struct skb_shared_info *)((SKB)->end)) 8.51 +//#define skb_shinfo(SKB) ((struct skb_shared_info *)((SKB)->end)) 8.52 +#define skb_shinfo(SKB) ((struct skb_shared_info *)(&(SKB)->shinfo)) 8.53 8.54 /** 8.55 * skb_queue_empty - check if a queue is empty 8.56 @@ -1045,7 +1055,8 @@ static inline struct sk_buff *__dev_allo 8.57 { 8.58 struct sk_buff *skb; 8.59 8.60 - skb = alloc_skb(length+16, gfp_mask); 8.61 + //skb = alloc_skb(length+16, gfp_mask); 8.62 + skb = alloc_zc_skb(length+16, gfp_mask); 8.63 if (skb) 8.64 skb_reserve(skb,16); 8.65 return skb;
9.1 --- a/xen-2.4.16/net/dev.c Tue Jan 28 16:13:04 2003 +0000 9.2 +++ b/xen-2.4.16/net/dev.c Sat Feb 01 12:06:32 2003 +0000 9.3 @@ -30,6 +30,7 @@ 9.4 #include <linux/pkt_sched.h> 9.5 9.6 #include <linux/event.h> 9.7 +#include <asm/domain_page.h> 9.8 9.9 #define BUG_TRAP ASSERT 9.10 #define notifier_call_chain(_a,_b,_c) ((void)0) 9.11 @@ -695,6 +696,21 @@ int netif_rx(struct sk_buff *skb) 9.12 if (skb->stamp.tv_sec == 0) 9.13 get_fast_time(&skb->stamp); 9.14 9.15 + /* Attempt to handle zero-copy packets here: */ 9.16 + if (skb->skb_type == SKB_ZERO_COPY) 9.17 + { 9.18 + skb->head = (u8 *)map_domain_mem(((skb->pf - frame_table) << PAGE_SHIFT)); 9.19 + 9.20 + /* remapping this address really screws up all the skb pointers. We need 9.21 + * to map them all here sufficiently to get the packet demultiplexed. 9.22 + */ 9.23 + 9.24 + skb->data = skb->head; 9.25 + skb_reserve(skb,16); // need to ensure that all the drivers and not just tulip do this. 9.26 + skb->mac.raw = skb->data; 9.27 + skb->data += ETH_HLEN; 9.28 + } 9.29 + 9.30 /* The code is rearranged so that the path is the most 9.31 short when CPU is congested, but is still operating. 9.32 */ 9.33 @@ -747,10 +763,18 @@ drop: 9.34 netdev_rx_stat[this_cpu].dropped++; 9.35 local_irq_restore(flags); 9.36 9.37 + if (skb->skb_type == SKB_ZERO_COPY) 9.38 + unmap_domain_mem(skb->head); 9.39 + 9.40 kfree_skb(skb); 9.41 return NET_RX_DROP; 9.42 9.43 found: 9.44 + if (skb->skb_type == SKB_ZERO_COPY) { 9.45 + unmap_domain_mem(skb->head); 9.46 + //skb->head = (u8 *)((skb->pf - frame_table) << PAGE_SHIFT); 9.47 + skb->head = skb->data = skb->tail = (void *)0xdeadbeef; 9.48 + } 9.49 hyp_event_notify(cpu_mask); 9.50 local_irq_restore(flags); 9.51 return 0; 9.52 @@ -930,8 +954,28 @@ void flush_rx_queue(void) 9.53 rx = shadow_ring->rx_ring+i; 9.54 if ( (skb->len + ETH_HLEN) < rx->size ) 9.55 rx->size = skb->len + ETH_HLEN; 9.56 + 9.57 + /* remap the packet again. This is very temporary and will shortly be 9.58 + * replaced with a page swizzle. 9.59 + */ 9.60 + 9.61 + if (skb->skb_type == SKB_ZERO_COPY) 9.62 + { 9.63 + skb->head = (u8 *)map_domain_mem(((skb->pf - frame_table) << PAGE_SHIFT)); 9.64 + skb->data = skb->head; 9.65 + skb_reserve(skb,16); 9.66 + skb->mac.raw = skb->data; 9.67 + skb->data += ETH_HLEN; 9.68 + } 9.69 + 9.70 copy_to_user((void *)rx->addr, skb->mac.raw, rx->size); 9.71 copy_to_user(net_ring->rx_ring+i, rx, sizeof(rx)); 9.72 + 9.73 + if (skb->skb_type == SKB_ZERO_COPY) 9.74 + { 9.75 + unmap_domain_mem(skb->head); 9.76 + skb->head = skb->data = skb->tail = (void *)0xdeadbeef; 9.77 + } 9.78 } 9.79 net_ring->rx_cons = (i+1) & (RX_RING_SIZE-1); 9.80 if ( net_ring->rx_cons == net_ring->rx_event )
10.1 --- a/xen-2.4.16/net/eth.c Tue Jan 28 16:13:04 2003 +0000 10.2 +++ b/xen-2.4.16/net/eth.c Sat Feb 01 12:06:32 2003 +0000 10.3 @@ -161,52 +161,62 @@ unsigned short eth_type_trans(struct sk_ 10.4 struct ethhdr *eth; 10.5 unsigned char *rawp; 10.6 10.7 - skb->mac.raw=skb->data; 10.8 - skb_pull(skb,dev->hard_header_len); 10.9 - eth= skb->mac.ethernet; 10.10 + if (skb->skb_type == SKB_ZERO_COPY) 10.11 + { 10.12 + skb_pull(skb,dev->hard_header_len); 10.13 + skb->mac.raw= (void *)0xdeadbeef; 10.14 + return htons(ETH_P_802_2); 10.15 + 10.16 + } else { // SKB_NORMAL 10.17 + 10.18 + skb->mac.raw=skb->data; 10.19 + skb_pull(skb,dev->hard_header_len); 10.20 + eth= skb->mac.ethernet; 10.21 10.22 - if(*eth->h_dest&1) 10.23 - { 10.24 - if(memcmp(eth->h_dest,dev->broadcast, ETH_ALEN)==0) 10.25 + if(*eth->h_dest&1) 10.26 + { 10.27 + if(memcmp(eth->h_dest,dev->broadcast, ETH_ALEN)==0) 10.28 skb->pkt_type=PACKET_BROADCAST; 10.29 else 10.30 skb->pkt_type=PACKET_MULTICAST; 10.31 - } 10.32 + } 10.33 10.34 - /* 10.35 - * This ALLMULTI check should be redundant by 1.4 10.36 - * so don't forget to remove it. 10.37 - * 10.38 - * Seems, you forgot to remove it. All silly devices 10.39 - * seems to set IFF_PROMISC. 10.40 - */ 10.41 + /* 10.42 + * This ALLMULTI check should be redundant by 1.4 10.43 + * so don't forget to remove it. 10.44 + * 10.45 + * Seems, you forgot to remove it. All silly devices 10.46 + * seems to set IFF_PROMISC. 10.47 + */ 10.48 10.49 - else if(1 /*dev->flags&IFF_PROMISC*/) 10.50 - { 10.51 + else if(1 /*dev->flags&IFF_PROMISC*/) 10.52 + { 10.53 if(memcmp(eth->h_dest,dev->dev_addr, ETH_ALEN)) 10.54 skb->pkt_type=PACKET_OTHERHOST; 10.55 - } 10.56 + } 10.57 10.58 - if (ntohs(eth->h_proto) >= 1536) 10.59 + if (ntohs(eth->h_proto) >= 1536) 10.60 return eth->h_proto; 10.61 10.62 - rawp = skb->data; 10.63 + rawp = skb->data; 10.64 10.65 - /* 10.66 - * This is a magic hack to spot IPX packets. Older Novell breaks 10.67 - * the protocol design and runs IPX over 802.3 without an 802.2 LLC 10.68 - * layer. We look for FFFF which isn't a used 802.2 SSAP/DSAP. This 10.69 - * won't work for fault tolerant netware but does for the rest. 10.70 - */ 10.71 - if (*(unsigned short *)rawp == 0xFFFF) 10.72 + /* 10.73 + * This is a magic hack to spot IPX packets. Older Novell breaks 10.74 + * the protocol design and runs IPX over 802.3 without an 802.2 LLC 10.75 + * layer. We look for FFFF which isn't a used 802.2 SSAP/DSAP. This 10.76 + * won't work for fault tolerant netware but does for the rest. 10.77 + */ 10.78 + if (*(unsigned short *)rawp == 0xFFFF) 10.79 return htons(ETH_P_802_3); 10.80 10.81 - /* 10.82 - * Real 802.2 LLC 10.83 - */ 10.84 - return htons(ETH_P_802_2); 10.85 + /* 10.86 + * Real 802.2 LLC 10.87 + */ 10.88 + return htons(ETH_P_802_2); 10.89 + } 10.90 } 10.91 10.92 + 10.93 int eth_header_parse(struct sk_buff *skb, unsigned char *haddr) 10.94 { 10.95 struct ethhdr *eth = skb->mac.ethernet;
11.1 --- a/xen-2.4.16/net/skbuff.c Tue Jan 28 16:13:04 2003 +0000 11.2 +++ b/xen-2.4.16/net/skbuff.c Sat Feb 01 12:06:32 2003 +0000 11.3 @@ -149,6 +149,102 @@ static __inline__ void skb_head_to_pool( 11.4 kmem_cache_free(skbuff_head_cache, skb); 11.5 } 11.6 11.7 +static inline u8 *alloc_skb_data_page(struct sk_buff *skb) 11.8 +{ 11.9 + struct list_head *list_ptr; 11.10 + struct pfn_info *pf; 11.11 + unsigned long flags; 11.12 + 11.13 + spin_lock_irqsave(&free_list_lock, flags); 11.14 + 11.15 + if (!free_pfns) return NULL; 11.16 + 11.17 + list_ptr = free_list.next; 11.18 + pf = list_entry(list_ptr, struct pfn_info, list); 11.19 + pf->flags = 0; // owned by dom0 11.20 + list_del(&pf->list); 11.21 + pf->next = pf->prev = (pf - frame_table); 11.22 + free_pfns--; 11.23 + 11.24 + spin_unlock_irqrestore(&free_list_lock, flags); 11.25 + 11.26 + skb->pf = pf; 11.27 + return (u8 *)((pf - frame_table) << PAGE_SHIFT); 11.28 +} 11.29 + 11.30 +static inline void dealloc_skb_data_page(struct sk_buff *skb) 11.31 +{ 11.32 + struct pfn_info *pf; 11.33 + unsigned long flags; 11.34 + 11.35 + pf = skb->pf; 11.36 + 11.37 + spin_lock_irqsave(&free_list_lock, flags); 11.38 + 11.39 + list_add_tail(&pf->list, &free_list); 11.40 + free_pfns++; 11.41 + 11.42 + spin_unlock_irqrestore(&free_list_lock, flags); 11.43 +} 11.44 + 11.45 +struct sk_buff *alloc_zc_skb(unsigned int size,int gfp_mask) 11.46 +{ 11.47 + struct sk_buff *skb; 11.48 + u8 *data; 11.49 + 11.50 + if (in_interrupt() && (gfp_mask & __GFP_WAIT)) { 11.51 + static int count = 0; 11.52 + if (++count < 5) { 11.53 + printk(KERN_ERR "alloc_skb called nonatomically " 11.54 + "from interrupt %p\n", NET_CALLER(size)); 11.55 + BUG(); 11.56 + } 11.57 + gfp_mask &= ~__GFP_WAIT; 11.58 + } 11.59 + 11.60 + /* Get the HEAD */ 11.61 + skb = skb_head_from_pool(); 11.62 + if (skb == NULL) { 11.63 + skb = kmem_cache_alloc(skbuff_head_cache, gfp_mask & ~__GFP_DMA); 11.64 + if (skb == NULL) 11.65 + goto nohead; 11.66 + } 11.67 + 11.68 + /* Get the DATA. Size must match skb_add_mtu(). */ 11.69 + size = SKB_DATA_ALIGN(size); 11.70 + data = alloc_skb_data_page(skb); 11.71 + if (data == NULL) 11.72 + goto nodata; 11.73 + 11.74 + /* XXX: does not include slab overhead */ 11.75 + skb->truesize = size + sizeof(struct sk_buff); 11.76 + 11.77 + /* Load the data pointers. */ 11.78 + skb->head = data; 11.79 + skb->data = data; 11.80 + skb->tail = data; 11.81 + skb->end = data + size; 11.82 + 11.83 + /* Set up other state */ 11.84 + skb->len = 0; 11.85 + skb->cloned = 0; 11.86 + skb->data_len = 0; 11.87 + skb->src_vif = VIF_UNKNOWN_INTERFACE; 11.88 + skb->dst_vif = VIF_UNKNOWN_INTERFACE; 11.89 + skb->skb_type = SKB_ZERO_COPY; 11.90 + 11.91 + atomic_set(&skb->users, 1); 11.92 + atomic_set(&(skb_shinfo(skb)->dataref), 1); 11.93 + skb_shinfo(skb)->nr_frags = 0; 11.94 + skb_shinfo(skb)->frag_list = NULL; 11.95 + return skb; 11.96 + 11.97 +nodata: 11.98 + skb_head_to_pool(skb); 11.99 +nohead: 11.100 + return NULL; 11.101 +} 11.102 + 11.103 11.104 /* Allocate a new skbuff. We do this ourselves so we can fill in a few 11.105 * 'private' fields and also do memory statistics to find all the 11.106 @@ -213,6 +309,7 @@ struct sk_buff *alloc_skb(unsigned int s 11.107 skb->data_len = 0; 11.108 skb->src_vif = VIF_UNKNOWN_INTERFACE; 11.109 skb->dst_vif = VIF_UNKNOWN_INTERFACE; 11.110 + skb->skb_type = SKB_NORMAL; 11.111 11.112 atomic_set(&skb->users, 1); 11.113 atomic_set(&(skb_shinfo(skb)->dataref), 1); 11.114 @@ -295,7 +392,13 @@ static void skb_release_data(struct sk_b 11.115 if (skb_shinfo(skb)->frag_list) 11.116 skb_drop_fraglist(skb); 11.117 11.118 - kfree(skb->head); 11.119 + if (skb->skb_type == SKB_NORMAL) { 11.120 + kfree(skb->head); 11.121 + } else if (skb->skb_type == SKB_ZERO_COPY) { 11.122 + dealloc_skb_data_page(skb); 11.123 + } else { 11.124 + printk("skb_release_data called with unknown skb type!\n"); 11.125 + } 11.126 } 11.127 } 11.128
12.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 12.2 +++ b/xenolinux-2.4.16-sparse/include/linux/skbuff.h Sat Feb 01 12:06:32 2003 +0000 12.3 @@ -0,0 +1,1185 @@ 12.4 +/* 12.5 + * Definitions for the 'struct sk_buff' memory handlers. 12.6 + * 12.7 + * Authors: 12.8 + * Alan Cox, <gw4pts@gw4pts.ampr.org> 12.9 + * Florian La Roche, <rzsfl@rz.uni-sb.de> 12.10 + * 12.11 + * This program is free software; you can redistribute it and/or 12.12 + * modify it under the terms of the GNU General Public License 12.13 + * as published by the Free Software Foundation; either version 12.14 + * 2 of the License, or (at your option) any later version. 12.15 + */ 12.16 + 12.17 +#ifndef _LINUX_SKBUFF_H 12.18 +#define _LINUX_SKBUFF_H 12.19 + 12.20 +#include <linux/config.h> 12.21 +#include <linux/kernel.h> 12.22 +#include <linux/sched.h> 12.23 +#include <linux/time.h> 12.24 +#include <linux/cache.h> 12.25 + 12.26 +#include <asm/atomic.h> 12.27 +#include <asm/types.h> 12.28 +#include <linux/spinlock.h> 12.29 +#include <linux/mm.h> 12.30 +#include <linux/highmem.h> 12.31 + 12.32 +/* Zero Copy additions: 12.33 + * 12.34 + * (1) there are now two types of skb, as indicated by the skb_type field. 12.35 + * this is because, at least for the time being, there are two seperate types 12.36 + * of memory that may be allocated to skb->data. 12.37 + * 12.38 + * (2) until discontiguous memory is fully supported, there will be a free list of pages 12.39 + * to be used by the net RX code. This list will be allocated in the driver init code 12.40 + * but is declared here because the socket free code needs to return pages to it. 12.41 + */ 12.42 + 12.43 +// for skb->skb_type: 12.44 + 12.45 +#define SKB_NORMAL 0 12.46 +#define SKB_ZERO_COPY 1 12.47 + 12.48 +#define NUM_NET_PAGES 9 // about 1Meg of buffers. (2^9) 12.49 +struct net_page_info { 12.50 + struct list_head list; 12.51 + unsigned long virt_addr; 12.52 + unsigned long ppte; 12.53 +}; 12.54 + 12.55 +extern char *net_page_chunk; 12.56 +extern struct net_page_info *net_page_table; 12.57 +extern struct list_head net_page_list; 12.58 +extern spinlock_t net_page_list_lock; 12.59 +extern unsigned int net_pages; 12.60 + 12.61 +/* End zero copy additions */ 12.62 + 12.63 +#define HAVE_ALLOC_SKB /* For the drivers to know */ 12.64 +#define HAVE_ALIGNABLE_SKB /* Ditto 8) */ 12.65 +#define SLAB_SKB /* Slabified skbuffs */ 12.66 + 12.67 +#define CHECKSUM_NONE 0 12.68 +#define CHECKSUM_HW 1 12.69 +#define CHECKSUM_UNNECESSARY 2 12.70 + 12.71 +#define SKB_DATA_ALIGN(X) (((X) + (SMP_CACHE_BYTES-1)) & ~(SMP_CACHE_BYTES-1)) 12.72 +#define SKB_MAX_ORDER(X,ORDER) (((PAGE_SIZE<<(ORDER)) - (X) - sizeof(struct skb_shared_info))&~(SMP_CACHE_BYTES-1)) 12.73 +#define SKB_MAX_HEAD(X) (SKB_MAX_ORDER((X),0)) 12.74 +#define SKB_MAX_ALLOC (SKB_MAX_ORDER(0,2)) 12.75 + 12.76 +/* A. Checksumming of received packets by device. 12.77 + * 12.78 + * NONE: device failed to checksum this packet. 12.79 + * skb->csum is undefined. 12.80 + * 12.81 + * UNNECESSARY: device parsed packet and wouldbe verified checksum. 12.82 + * skb->csum is undefined. 12.83 + * It is bad option, but, unfortunately, many of vendors do this. 12.84 + * Apparently with secret goal to sell you new device, when you 12.85 + * will add new protocol to your host. F.e. IPv6. 8) 12.86 + * 12.87 + * HW: the most generic way. Device supplied checksum of _all_ 12.88 + * the packet as seen by netif_rx in skb->csum. 12.89 + * NOTE: Even if device supports only some protocols, but 12.90 + * is able to produce some skb->csum, it MUST use HW, 12.91 + * not UNNECESSARY. 12.92 + * 12.93 + * B. Checksumming on output. 12.94 + * 12.95 + * NONE: skb is checksummed by protocol or csum is not required. 12.96 + * 12.97 + * HW: device is required to csum packet as seen by hard_start_xmit 12.98 + * from skb->h.raw to the end and to record the checksum 12.99 + * at skb->h.raw+skb->csum. 12.100 + * 12.101 + * Device must show its capabilities in dev->features, set 12.102 + * at device setup time. 12.103 + * NETIF_F_HW_CSUM - it is clever device, it is able to checksum 12.104 + * everything. 12.105 + * NETIF_F_NO_CSUM - loopback or reliable single hop media. 12.106 + * NETIF_F_IP_CSUM - device is dumb. It is able to csum only 12.107 + * TCP/UDP over IPv4. Sigh. Vendors like this 12.108 + * way by an unknown reason. Though, see comment above 12.109 + * about CHECKSUM_UNNECESSARY. 8) 12.110 + * 12.111 + * Any questions? No questions, good. --ANK 12.112 + */ 12.113 + 12.114 +#ifdef __i386__ 12.115 +#define NET_CALLER(arg) (*(((void**)&arg)-1)) 12.116 +#else 12.117 +#define NET_CALLER(arg) __builtin_return_address(0) 12.118 +#endif 12.119 + 12.120 +#ifdef CONFIG_NETFILTER 12.121 +struct nf_conntrack { 12.122 + atomic_t use; 12.123 + void (*destroy)(struct nf_conntrack *); 12.124 +}; 12.125 + 12.126 +struct nf_ct_info { 12.127 + struct nf_conntrack *master; 12.128 +}; 12.129 +#endif 12.130 + 12.131 +struct sk_buff_head { 12.132 + /* These two members must be first. */ 12.133 + struct sk_buff * next; 12.134 + struct sk_buff * prev; 12.135 + 12.136 + __u32 qlen; 12.137 + spinlock_t lock; 12.138 +}; 12.139 + 12.140 +struct sk_buff; 12.141 + 12.142 +#define MAX_SKB_FRAGS 6 12.143 + 12.144 +typedef struct skb_frag_struct skb_frag_t; 12.145 + 12.146 +struct skb_frag_struct 12.147 +{ 12.148 + struct page *page; 12.149 + __u16 page_offset; 12.150 + __u16 size; 12.151 +}; 12.152 + 12.153 +/* This data is invariant across clones and lives at 12.154 + * the end of the header data, ie. at skb->end. 12.155 + */ 12.156 +struct skb_shared_info { 12.157 + atomic_t dataref; 12.158 + unsigned int nr_frags; 12.159 + struct sk_buff *frag_list; 12.160 + skb_frag_t frags[MAX_SKB_FRAGS]; 12.161 +}; 12.162 + 12.163 +struct sk_buff { 12.164 + /* These two members must be first. */ 12.165 + struct sk_buff * next; /* Next buffer in list */ 12.166 + struct sk_buff * prev; /* Previous buffer in list */ 12.167 + 12.168 + struct sk_buff_head * list; /* List we are on */ 12.169 + struct sock *sk; /* Socket we are owned by */ 12.170 + struct timeval stamp; /* Time we arrived */ 12.171 + struct net_device *dev; /* Device we arrived on/are leaving by */ 12.172 + 12.173 + /* Transport layer header */ 12.174 + union 12.175 + { 12.176 + struct tcphdr *th; 12.177 + struct udphdr *uh; 12.178 + struct icmphdr *icmph; 12.179 + struct igmphdr *igmph; 12.180 + struct iphdr *ipiph; 12.181 + struct spxhdr *spxh; 12.182 + unsigned char *raw; 12.183 + } h; 12.184 + 12.185 + /* Network layer header */ 12.186 + union 12.187 + { 12.188 + struct iphdr *iph; 12.189 + struct ipv6hdr *ipv6h; 12.190 + struct arphdr *arph; 12.191 + struct ipxhdr *ipxh; 12.192 + unsigned char *raw; 12.193 + } nh; 12.194 + 12.195 + /* Link layer header */ 12.196 + union 12.197 + { 12.198 + struct ethhdr *ethernet; 12.199 + unsigned char *raw; 12.200 + } mac; 12.201 + 12.202 + struct dst_entry *dst; 12.203 + 12.204 + /* 12.205 + * This is the control buffer. It is free to use for every 12.206 + * layer. Please put your private variables there. If you 12.207 + * want to keep them across layers you have to do a skb_clone() 12.208 + * first. This is owned by whoever has the skb queued ATM. 12.209 + */ 12.210 + char cb[48]; 12.211 + 12.212 + unsigned int len; /* Length of actual data */ 12.213 + unsigned int data_len; 12.214 + unsigned int csum; /* Checksum */ 12.215 + unsigned char __unused, /* Dead field, may be reused */ 12.216 + cloned, /* head may be cloned (check refcnt to be sure). */ 12.217 + pkt_type, /* Packet class */ 12.218 + ip_summed; /* Driver fed us an IP checksum */ 12.219 + __u32 priority; /* Packet queueing priority */ 12.220 + atomic_t users; /* User count - see datagram.c,tcp.c */ 12.221 + unsigned short protocol; /* Packet protocol from driver. */ 12.222 + unsigned short security; /* Security level of packet */ 12.223 + unsigned int truesize; /* Buffer size */ 12.224 + 12.225 + unsigned char *head; /* Head of buffer */ 12.226 + unsigned char *data; /* Data head pointer */ 12.227 + unsigned char *tail; /* Tail pointer */ 12.228 + unsigned char *end; /* End pointer */ 12.229 + 12.230 + void (*destructor)(struct sk_buff *); /* Destruct function */ 12.231 +#ifdef CONFIG_NETFILTER 12.232 + /* Can be used for communication between hooks. */ 12.233 + unsigned long nfmark; 12.234 + /* Cache info */ 12.235 + __u32 nfcache; 12.236 + /* Associated connection, if any */ 12.237 + struct nf_ct_info *nfct; 12.238 +#ifdef CONFIG_NETFILTER_DEBUG 12.239 + unsigned int nf_debug; 12.240 +#endif 12.241 +#endif /*CONFIG_NETFILTER*/ 12.242 + 12.243 +#if defined(CONFIG_HIPPI) 12.244 + union{ 12.245 + __u32 ifield; 12.246 + } private; 12.247 +#endif 12.248 + 12.249 +#ifdef CONFIG_NET_SCHED 12.250 + __u32 tc_index; /* traffic control index */ 12.251 +#endif 12.252 + unsigned int skb_type; /* for zero copy handling. */ 12.253 + struct net_page_info *net_page; 12.254 +}; 12.255 + 12.256 +#define SK_WMEM_MAX 65535 12.257 +#define SK_RMEM_MAX 65535 12.258 + 12.259 +#ifdef __KERNEL__ 12.260 +/* 12.261 + * Handling routines are only of interest to the kernel 12.262 + */ 12.263 +#include <linux/slab.h> 12.264 + 12.265 +#include <asm/system.h> 12.266 + 12.267 +extern void __kfree_skb(struct sk_buff *skb); 12.268 +extern struct sk_buff * alloc_skb(unsigned int size, int priority); 12.269 +extern struct sk_buff * alloc_zc_skb(unsigned int size, int priority); 12.270 +extern void kfree_skbmem(struct sk_buff *skb); 12.271 +extern struct sk_buff * skb_clone(struct sk_buff *skb, int priority); 12.272 +extern struct sk_buff * skb_copy(const struct sk_buff *skb, int priority); 12.273 +extern struct sk_buff * pskb_copy(struct sk_buff *skb, int gfp_mask); 12.274 +extern int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail, int gfp_mask); 12.275 +extern struct sk_buff * skb_realloc_headroom(struct sk_buff *skb, unsigned int headroom); 12.276 +extern struct sk_buff * skb_copy_expand(const struct sk_buff *skb, 12.277 + int newheadroom, 12.278 + int newtailroom, 12.279 + int priority); 12.280 +#define dev_kfree_skb(a) kfree_skb(a) 12.281 +extern void skb_over_panic(struct sk_buff *skb, int len, void *here); 12.282 +extern void skb_under_panic(struct sk_buff *skb, int len, void *here); 12.283 + 12.284 +/* Internal */ 12.285 +#define skb_shinfo(SKB) ((struct skb_shared_info *)((SKB)->end)) 12.286 + 12.287 +/** 12.288 + * skb_queue_empty - check if a queue is empty 12.289 + * @list: queue head 12.290 + * 12.291 + * Returns true if the queue is empty, false otherwise. 12.292 + */ 12.293 + 12.294 +static inline int skb_queue_empty(struct sk_buff_head *list) 12.295 +{ 12.296 + return (list->next == (struct sk_buff *) list); 12.297 +} 12.298 + 12.299 +/** 12.300 + * skb_get - reference buffer 12.301 + * @skb: buffer to reference 12.302 + * 12.303 + * Makes another reference to a socket buffer and returns a pointer 12.304 + * to the buffer. 12.305 + */ 12.306 + 12.307 +static inline struct sk_buff *skb_get(struct sk_buff *skb) 12.308 +{ 12.309 + atomic_inc(&skb->users); 12.310 + return skb; 12.311 +} 12.312 + 12.313 +/* 12.314 + * If users==1, we are the only owner and are can avoid redundant 12.315 + * atomic change. 12.316 + */ 12.317 + 12.318 +/** 12.319 + * kfree_skb - free an sk_buff 12.320 + * @skb: buffer to free 12.321 + * 12.322 + * Drop a reference to the buffer and free it if the usage count has 12.323 + * hit zero. 12.324 + */ 12.325 + 12.326 +static inline void kfree_skb(struct sk_buff *skb) 12.327 +{ 12.328 + if (atomic_read(&skb->users) == 1 || atomic_dec_and_test(&skb->users)) 12.329 + __kfree_skb(skb); 12.330 +} 12.331 + 12.332 +/* Use this if you didn't touch the skb state [for fast switching] */ 12.333 +static inline void kfree_skb_fast(struct sk_buff *skb) 12.334 +{ 12.335 + if (atomic_read(&skb->users) == 1 || atomic_dec_and_test(&skb->users)) 12.336 + kfree_skbmem(skb); 12.337 +} 12.338 + 12.339 +/** 12.340 + * skb_cloned - is the buffer a clone 12.341 + * @skb: buffer to check 12.342 + * 12.343 + * Returns true if the buffer was generated with skb_clone() and is 12.344 + * one of multiple shared copies of the buffer. Cloned buffers are 12.345 + * shared data so must not be written to under normal circumstances. 12.346 + */ 12.347 + 12.348 +static inline int skb_cloned(struct sk_buff *skb) 12.349 +{ 12.350 + return skb->cloned && atomic_read(&skb_shinfo(skb)->dataref) != 1; 12.351 +} 12.352 + 12.353 +/** 12.354 + * skb_shared - is the buffer shared 12.355 + * @skb: buffer to check 12.356 + * 12.357 + * Returns true if more than one person has a reference to this 12.358 + * buffer. 12.359 + */ 12.360 + 12.361 +static inline int skb_shared(struct sk_buff *skb) 12.362 +{ 12.363 + return (atomic_read(&skb->users) != 1); 12.364 +} 12.365 + 12.366 +/** 12.367 + * skb_share_check - check if buffer is shared and if so clone it 12.368 + * @skb: buffer to check 12.369 + * @pri: priority for memory allocation 12.370 + * 12.371 + * If the buffer is shared the buffer is cloned and the old copy 12.372 + * drops a reference. A new clone with a single reference is returned. 12.373 + * If the buffer is not shared the original buffer is returned. When 12.374 + * being called from interrupt status or with spinlocks held pri must 12.375 + * be GFP_ATOMIC. 12.376 + * 12.377 + * NULL is returned on a memory allocation failure. 12.378 + */ 12.379 + 12.380 +static inline struct sk_buff *skb_share_check(struct sk_buff *skb, int pri) 12.381 +{ 12.382 + if (skb_shared(skb)) { 12.383 + struct sk_buff *nskb; 12.384 + nskb = skb_clone(skb, pri); 12.385 + kfree_skb(skb); 12.386 + return nskb; 12.387 + } 12.388 + return skb; 12.389 +} 12.390 + 12.391 + 12.392 +/* 12.393 + * Copy shared buffers into a new sk_buff. We effectively do COW on 12.394 + * packets to handle cases where we have a local reader and forward 12.395 + * and a couple of other messy ones. The normal one is tcpdumping 12.396 + * a packet thats being forwarded. 12.397 + */ 12.398 + 12.399 +/** 12.400 + * skb_unshare - make a copy of a shared buffer 12.401 + * @skb: buffer to check 12.402 + * @pri: priority for memory allocation 12.403 + * 12.404 + * If the socket buffer is a clone then this function creates a new 12.405 + * copy of the data, drops a reference count on the old copy and returns 12.406 + * the new copy with the reference count at 1. If the buffer is not a clone 12.407 + * the original buffer is returned. When called with a spinlock held or 12.408 + * from interrupt state @pri must be %GFP_ATOMIC 12.409 + * 12.410 + * %NULL is returned on a memory allocation failure. 12.411 + */ 12.412 + 12.413 +static inline struct sk_buff *skb_unshare(struct sk_buff *skb, int pri) 12.414 +{ 12.415 + struct sk_buff *nskb; 12.416 + if(!skb_cloned(skb)) 12.417 + return skb; 12.418 + nskb=skb_copy(skb, pri); 12.419 + kfree_skb(skb); /* Free our shared copy */ 12.420 + return nskb; 12.421 +} 12.422 + 12.423 +/** 12.424 + * skb_peek 12.425 + * @list_: list to peek at 12.426 + * 12.427 + * Peek an &sk_buff. Unlike most other operations you _MUST_ 12.428 + * be careful with this one. A peek leaves the buffer on the 12.429 + * list and someone else may run off with it. You must hold 12.430 + * the appropriate locks or have a private queue to do this. 12.431 + * 12.432 + * Returns %NULL for an empty list or a pointer to the head element. 12.433 + * The reference count is not incremented and the reference is therefore 12.434 + * volatile. Use with caution. 12.435 + */ 12.436 + 12.437 +static inline struct sk_buff *skb_peek(struct sk_buff_head *list_) 12.438 +{ 12.439 + struct sk_buff *list = ((struct sk_buff *)list_)->next; 12.440 + if (list == (struct sk_buff *)list_) 12.441 + list = NULL; 12.442 + return list; 12.443 +} 12.444 + 12.445 +/** 12.446 + * skb_peek_tail 12.447 + * @list_: list to peek at 12.448 + * 12.449 + * Peek an &sk_buff. Unlike most other operations you _MUST_ 12.450 + * be careful with this one. A peek leaves the buffer on the 12.451 + * list and someone else may run off with it. You must hold 12.452 + * the appropriate locks or have a private queue to do this. 12.453 + * 12.454 + * Returns %NULL for an empty list or a pointer to the tail element. 12.455 + * The reference count is not incremented and the reference is therefore 12.456 + * volatile. Use with caution. 12.457 + */ 12.458 + 12.459 +static inline struct sk_buff *skb_peek_tail(struct sk_buff_head *list_) 12.460 +{ 12.461 + struct sk_buff *list = ((struct sk_buff *)list_)->prev; 12.462 + if (list == (struct sk_buff *)list_) 12.463 + list = NULL; 12.464 + return list; 12.465 +} 12.466 + 12.467 +/** 12.468 + * skb_queue_len - get queue length 12.469 + * @list_: list to measure 12.470 + * 12.471 + * Return the length of an &sk_buff queue. 12.472 + */ 12.473 + 12.474 +static inline __u32 skb_queue_len(struct sk_buff_head *list_) 12.475 +{ 12.476 + return(list_->qlen); 12.477 +} 12.478 + 12.479 +static inline void skb_queue_head_init(struct sk_buff_head *list) 12.480 +{ 12.481 + spin_lock_init(&list->lock); 12.482 + list->prev = (struct sk_buff *)list; 12.483 + list->next = (struct sk_buff *)list; 12.484 + list->qlen = 0; 12.485 +} 12.486 + 12.487 +/* 12.488 + * Insert an sk_buff at the start of a list. 12.489 + * 12.490 + * The "__skb_xxxx()" functions are the non-atomic ones that 12.491 + * can only be called with interrupts disabled. 12.492 + */ 12.493 + 12.494 +/** 12.495 + * __skb_queue_head - queue a buffer at the list head 12.496 + * @list: list to use 12.497 + * @newsk: buffer to queue 12.498 + * 12.499 + * Queue a buffer at the start of a list. This function takes no locks 12.500 + * and you must therefore hold required locks before calling it. 12.501 + * 12.502 + * A buffer cannot be placed on two lists at the same time. 12.503 + */ 12.504 + 12.505 +static inline void __skb_queue_head(struct sk_buff_head *list, struct sk_buff *newsk) 12.506 +{ 12.507 + struct sk_buff *prev, *next; 12.508 + 12.509 + newsk->list = list; 12.510 + list->qlen++; 12.511 + prev = (struct sk_buff *)list; 12.512 + next = prev->next; 12.513 + newsk->next = next; 12.514 + newsk->prev = prev; 12.515 + next->prev = newsk; 12.516 + prev->next = newsk; 12.517 +} 12.518 + 12.519 + 12.520 +/** 12.521 + * skb_queue_head - queue a buffer at the list head 12.522 + * @list: list to use 12.523 + * @newsk: buffer to queue 12.524 + * 12.525 + * Queue a buffer at the start of the list. This function takes the 12.526 + * list lock and can be used safely with other locking &sk_buff functions 12.527 + * safely. 12.528 + * 12.529 + * A buffer cannot be placed on two lists at the same time. 12.530 + */ 12.531 + 12.532 +static inline void skb_queue_head(struct sk_buff_head *list, struct sk_buff *newsk) 12.533 +{ 12.534 + unsigned long flags; 12.535 + 12.536 + spin_lock_irqsave(&list->lock, flags); 12.537 + __skb_queue_head(list, newsk); 12.538 + spin_unlock_irqrestore(&list->lock, flags); 12.539 +} 12.540 + 12.541 +/** 12.542 + * __skb_queue_tail - queue a buffer at the list tail 12.543 + * @list: list to use 12.544 + * @newsk: buffer to queue 12.545 + * 12.546 + * Queue a buffer at the end of a list. This function takes no locks 12.547 + * and you must therefore hold required locks before calling it. 12.548 + * 12.549 + * A buffer cannot be placed on two lists at the same time. 12.550 + */ 12.551 + 12.552 + 12.553 +static inline void __skb_queue_tail(struct sk_buff_head *list, struct sk_buff *newsk) 12.554 +{ 12.555 + struct sk_buff *prev, *next; 12.556 + 12.557 + newsk->list = list; 12.558 + list->qlen++; 12.559 + next = (struct sk_buff *)list; 12.560 + prev = next->prev; 12.561 + newsk->next = next; 12.562 + newsk->prev = prev; 12.563 + next->prev = newsk; 12.564 + prev->next = newsk; 12.565 +} 12.566 + 12.567 +/** 12.568 + * skb_queue_tail - queue a buffer at the list tail 12.569 + * @list: list to use 12.570 + * @newsk: buffer to queue 12.571 + * 12.572 + * Queue a buffer at the tail of the list. This function takes the 12.573 + * list lock and can be used safely with other locking &sk_buff functions 12.574 + * safely. 12.575 + * 12.576 + * A buffer cannot be placed on two lists at the same time. 12.577 + */ 12.578 + 12.579 +static inline void skb_queue_tail(struct sk_buff_head *list, struct sk_buff *newsk) 12.580 +{ 12.581 + unsigned long flags; 12.582 + 12.583 + spin_lock_irqsave(&list->lock, flags); 12.584 + __skb_queue_tail(list, newsk); 12.585 + spin_unlock_irqrestore(&list->lock, flags); 12.586 +} 12.587 + 12.588 +/** 12.589 + * __skb_dequeue - remove from the head of the queue 12.590 + * @list: list to dequeue from 12.591 + * 12.592 + * Remove the head of the list. This function does not take any locks 12.593 + * so must be used with appropriate locks held only. The head item is 12.594 + * returned or %NULL if the list is empty. 12.595 + */ 12.596 + 12.597 +static inline struct sk_buff *__skb_dequeue(struct sk_buff_head *list) 12.598 +{ 12.599 + struct sk_buff *next, *prev, *result; 12.600 + 12.601 + prev = (struct sk_buff *) list; 12.602 + next = prev->next; 12.603 + result = NULL; 12.604 + if (next != prev) { 12.605 + result = next; 12.606 + next = next->next; 12.607 + list->qlen--; 12.608 + next->prev = prev; 12.609 + prev->next = next; 12.610 + result->next = NULL; 12.611 + result->prev = NULL; 12.612 + result->list = NULL; 12.613 + } 12.614 + return result; 12.615 +} 12.616 + 12.617 +/** 12.618 + * skb_dequeue - remove from the head of the queue 12.619 + * @list: list to dequeue from 12.620 + * 12.621 + * Remove the head of the list. The list lock is taken so the function 12.622 + * may be used safely with other locking list functions. The head item is 12.623 + * returned or %NULL if the list is empty. 12.624 + */ 12.625 + 12.626 +static inline struct sk_buff *skb_dequeue(struct sk_buff_head *list) 12.627 +{ 12.628 + long flags; 12.629 + struct sk_buff *result; 12.630 + 12.631 + spin_lock_irqsave(&list->lock, flags); 12.632 + result = __skb_dequeue(list); 12.633 + spin_unlock_irqrestore(&list->lock, flags); 12.634 + return result; 12.635 +} 12.636 + 12.637 +/* 12.638 + * Insert a packet on a list. 12.639 + */ 12.640 + 12.641 +static inline void __skb_insert(struct sk_buff *newsk, 12.642 + struct sk_buff * prev, struct sk_buff *next, 12.643 + struct sk_buff_head * list) 12.644 +{ 12.645 + newsk->next = next; 12.646 + newsk->prev = prev; 12.647 + next->prev = newsk; 12.648 + prev->next = newsk; 12.649 + newsk->list = list; 12.650 + list->qlen++; 12.651 +} 12.652 + 12.653 +/** 12.654 + * skb_insert - insert a buffer 12.655 + * @old: buffer to insert before 12.656 + * @newsk: buffer to insert 12.657 + * 12.658 + * Place a packet before a given packet in a list. The list locks are taken 12.659 + * and this function is atomic with respect to other list locked calls 12.660 + * A buffer cannot be placed on two lists at the same time. 12.661 + */ 12.662 + 12.663 +static inline void skb_insert(struct sk_buff *old, struct sk_buff *newsk) 12.664 +{ 12.665 + unsigned long flags; 12.666 + 12.667 + spin_lock_irqsave(&old->list->lock, flags); 12.668 + __skb_insert(newsk, old->prev, old, old->list); 12.669 + spin_unlock_irqrestore(&old->list->lock, flags); 12.670 +} 12.671 + 12.672 +/* 12.673 + * Place a packet after a given packet in a list. 12.674 + */ 12.675 + 12.676 +static inline void __skb_append(struct sk_buff *old, struct sk_buff *newsk) 12.677 +{ 12.678 + __skb_insert(newsk, old, old->next, old->list); 12.679 +} 12.680 + 12.681 +/** 12.682 + * skb_append - append a buffer 12.683 + * @old: buffer to insert after 12.684 + * @newsk: buffer to insert 12.685 + * 12.686 + * Place a packet after a given packet in a list. The list locks are taken 12.687 + * and this function is atomic with respect to other list locked calls. 12.688 + * A buffer cannot be placed on two lists at the same time. 12.689 + */ 12.690 + 12.691 + 12.692 +static inline void skb_append(struct sk_buff *old, struct sk_buff *newsk) 12.693 +{ 12.694 + unsigned long flags; 12.695 + 12.696 + spin_lock_irqsave(&old->list->lock, flags); 12.697 + __skb_append(old, newsk); 12.698 + spin_unlock_irqrestore(&old->list->lock, flags); 12.699 +} 12.700 + 12.701 +/* 12.702 + * remove sk_buff from list. _Must_ be called atomically, and with 12.703 + * the list known.. 12.704 + */ 12.705 + 12.706 +static inline void __skb_unlink(struct sk_buff *skb, struct sk_buff_head *list) 12.707 +{ 12.708 + struct sk_buff * next, * prev; 12.709 + 12.710 + list->qlen--; 12.711 + next = skb->next; 12.712 + prev = skb->prev; 12.713 + skb->next = NULL; 12.714 + skb->prev = NULL; 12.715 + skb->list = NULL; 12.716 + next->prev = prev; 12.717 + prev->next = next; 12.718 +} 12.719 + 12.720 +/** 12.721 + * skb_unlink - remove a buffer from a list 12.722 + * @skb: buffer to remove 12.723 + * 12.724 + * Place a packet after a given packet in a list. The list locks are taken 12.725 + * and this function is atomic with respect to other list locked calls 12.726 + * 12.727 + * Works even without knowing the list it is sitting on, which can be 12.728 + * handy at times. It also means that THE LIST MUST EXIST when you 12.729 + * unlink. Thus a list must have its contents unlinked before it is 12.730 + * destroyed. 12.731 + */ 12.732 + 12.733 +static inline void skb_unlink(struct sk_buff *skb) 12.734 +{ 12.735 + struct sk_buff_head *list = skb->list; 12.736 + 12.737 + if(list) { 12.738 + unsigned long flags; 12.739 + 12.740 + spin_lock_irqsave(&list->lock, flags); 12.741 + if(skb->list == list) 12.742 + __skb_unlink(skb, skb->list); 12.743 + spin_unlock_irqrestore(&list->lock, flags); 12.744 + } 12.745 +} 12.746 + 12.747 +/* XXX: more streamlined implementation */ 12.748 + 12.749 +/** 12.750 + * __skb_dequeue_tail - remove from the tail of the queue 12.751 + * @list: list to dequeue from 12.752 + * 12.753 + * Remove the tail of the list. This function does not take any locks 12.754 + * so must be used with appropriate locks held only. The tail item is 12.755 + * returned or %NULL if the list is empty. 12.756 + */ 12.757 + 12.758 +static inline struct sk_buff *__skb_dequeue_tail(struct sk_buff_head *list) 12.759 +{ 12.760 + struct sk_buff *skb = skb_peek_tail(list); 12.761 + if (skb) 12.762 + __skb_unlink(skb, list); 12.763 + return skb; 12.764 +} 12.765 + 12.766 +/** 12.767 + * skb_dequeue - remove from the head of the queue 12.768 + * @list: list to dequeue from 12.769 + * 12.770 + * Remove the head of the list. The list lock is taken so the function 12.771 + * may be used safely with other locking list functions. The tail item is 12.772 + * returned or %NULL if the list is empty. 12.773 + */ 12.774 + 12.775 +static inline struct sk_buff *skb_dequeue_tail(struct sk_buff_head *list) 12.776 +{ 12.777 + long flags; 12.778 + struct sk_buff *result; 12.779 + 12.780 + spin_lock_irqsave(&list->lock, flags); 12.781 + result = __skb_dequeue_tail(list); 12.782 + spin_unlock_irqrestore(&list->lock, flags); 12.783 + return result; 12.784 +} 12.785 + 12.786 +static inline int skb_is_nonlinear(const struct sk_buff *skb) 12.787 +{ 12.788 + return skb->data_len; 12.789 +} 12.790 + 12.791 +static inline int skb_headlen(const struct sk_buff *skb) 12.792 +{ 12.793 + return skb->len - skb->data_len; 12.794 +} 12.795 + 12.796 +#define SKB_PAGE_ASSERT(skb) do { if (skb_shinfo(skb)->nr_frags) BUG(); } while (0) 12.797 +#define SKB_FRAG_ASSERT(skb) do { if (skb_shinfo(skb)->frag_list) BUG(); } while (0) 12.798 +#define SKB_LINEAR_ASSERT(skb) do { if (skb_is_nonlinear(skb)) BUG(); } while (0) 12.799 + 12.800 +/* 12.801 + * Add data to an sk_buff 12.802 + */ 12.803 + 12.804 +static inline unsigned char *__skb_put(struct sk_buff *skb, unsigned int len) 12.805 +{ 12.806 + unsigned char *tmp=skb->tail; 12.807 + SKB_LINEAR_ASSERT(skb); 12.808 + skb->tail+=len; 12.809 + skb->len+=len; 12.810 + return tmp; 12.811 +} 12.812 + 12.813 +/** 12.814 + * skb_put - add data to a buffer 12.815 + * @skb: buffer to use 12.816 + * @len: amount of data to add 12.817 + * 12.818 + * This function extends the used data area of the buffer. If this would 12.819 + * exceed the total buffer size the kernel will panic. A pointer to the 12.820 + * first byte of the extra data is returned. 12.821 + */ 12.822 + 12.823 +static inline unsigned char *skb_put(struct sk_buff *skb, unsigned int len) 12.824 +{ 12.825 + unsigned char *tmp=skb->tail; 12.826 + SKB_LINEAR_ASSERT(skb); 12.827 + skb->tail+=len; 12.828 + skb->len+=len; 12.829 + if(skb->tail>skb->end) { 12.830 + skb_over_panic(skb, len, current_text_addr()); 12.831 + } 12.832 + return tmp; 12.833 +} 12.834 + 12.835 +static inline unsigned char *__skb_push(struct sk_buff *skb, unsigned int len) 12.836 +{ 12.837 + skb->data-=len; 12.838 + skb->len+=len; 12.839 + return skb->data; 12.840 +} 12.841 + 12.842 +/** 12.843 + * skb_push - add data to the start of a buffer 12.844 + * @skb: buffer to use 12.845 + * @len: amount of data to add 12.846 + * 12.847 + * This function extends the used data area of the buffer at the buffer 12.848 + * start. If this would exceed the total buffer headroom the kernel will 12.849 + * panic. A pointer to the first byte of the extra data is returned. 12.850 + */ 12.851 + 12.852 +static inline unsigned char *skb_push(struct sk_buff *skb, unsigned int len) 12.853 +{ 12.854 + skb->data-=len; 12.855 + skb->len+=len; 12.856 + if(skb->data<skb->head) { 12.857 + skb_under_panic(skb, len, current_text_addr()); 12.858 + } 12.859 + return skb->data; 12.860 +} 12.861 + 12.862 +static inline char *__skb_pull(struct sk_buff *skb, unsigned int len) 12.863 +{ 12.864 + skb->len-=len; 12.865 + if (skb->len < skb->data_len) 12.866 + BUG(); 12.867 + return skb->data+=len; 12.868 +} 12.869 + 12.870 +/** 12.871 + * skb_pull - remove data from the start of a buffer 12.872 + * @skb: buffer to use 12.873 + * @len: amount of data to remove 12.874 + * 12.875 + * This function removes data from the start of a buffer, returning 12.876 + * the memory to the headroom. A pointer to the next data in the buffer 12.877 + * is returned. Once the data has been pulled future pushes will overwrite 12.878 + * the old data. 12.879 + */ 12.880 + 12.881 +static inline unsigned char * skb_pull(struct sk_buff *skb, unsigned int len) 12.882 +{ 12.883 + if (len > skb->len) 12.884 + return NULL; 12.885 + return __skb_pull(skb,len); 12.886 +} 12.887 + 12.888 +extern unsigned char * __pskb_pull_tail(struct sk_buff *skb, int delta); 12.889 + 12.890 +static inline char *__pskb_pull(struct sk_buff *skb, unsigned int len) 12.891 +{ 12.892 + if (len > skb_headlen(skb) && 12.893 + __pskb_pull_tail(skb, len-skb_headlen(skb)) == NULL) 12.894 + return NULL; 12.895 + skb->len -= len; 12.896 + return skb->data += len; 12.897 +} 12.898 + 12.899 +static inline unsigned char * pskb_pull(struct sk_buff *skb, unsigned int len) 12.900 +{ 12.901 + if (len > skb->len) 12.902 + return NULL; 12.903 + return __pskb_pull(skb,len); 12.904 +} 12.905 + 12.906 +static inline int pskb_may_pull(struct sk_buff *skb, unsigned int len) 12.907 +{ 12.908 + if (len <= skb_headlen(skb)) 12.909 + return 1; 12.910 + if (len > skb->len) 12.911 + return 0; 12.912 + return (__pskb_pull_tail(skb, len-skb_headlen(skb)) != NULL); 12.913 +} 12.914 + 12.915 +/** 12.916 + * skb_headroom - bytes at buffer head 12.917 + * @skb: buffer to check 12.918 + * 12.919 + * Return the number of bytes of free space at the head of an &sk_buff. 12.920 + */ 12.921 + 12.922 +static inline int skb_headroom(const struct sk_buff *skb) 12.923 +{ 12.924 + return skb->data-skb->head; 12.925 +} 12.926 + 12.927 +/** 12.928 + * skb_tailroom - bytes at buffer end 12.929 + * @skb: buffer to check 12.930 + * 12.931 + * Return the number of bytes of free space at the tail of an sk_buff 12.932 + */ 12.933 + 12.934 +static inline int skb_tailroom(const struct sk_buff *skb) 12.935 +{ 12.936 + return skb_is_nonlinear(skb) ? 0 : skb->end-skb->tail; 12.937 +} 12.938 + 12.939 +/** 12.940 + * skb_reserve - adjust headroom 12.941 + * @skb: buffer to alter 12.942 + * @len: bytes to move 12.943 + * 12.944 + * Increase the headroom of an empty &sk_buff by reducing the tail 12.945 + * room. This is only allowed for an empty buffer. 12.946 + */ 12.947 + 12.948 +static inline void skb_reserve(struct sk_buff *skb, unsigned int len) 12.949 +{ 12.950 + skb->data+=len; 12.951 + skb->tail+=len; 12.952 +} 12.953 + 12.954 +extern int ___pskb_trim(struct sk_buff *skb, unsigned int len, int realloc); 12.955 + 12.956 +static inline void __skb_trim(struct sk_buff *skb, unsigned int len) 12.957 +{ 12.958 + if (!skb->data_len) { 12.959 + skb->len = len; 12.960 + skb->tail = skb->data+len; 12.961 + } else { 12.962 + ___pskb_trim(skb, len, 0); 12.963 + } 12.964 +} 12.965 + 12.966 +/** 12.967 + * skb_trim - remove end from a buffer 12.968 + * @skb: buffer to alter 12.969 + * @len: new length 12.970 + * 12.971 + * Cut the length of a buffer down by removing data from the tail. If 12.972 + * the buffer is already under the length specified it is not modified. 12.973 + */ 12.974 + 12.975 +static inline void skb_trim(struct sk_buff *skb, unsigned int len) 12.976 +{ 12.977 + if (skb->len > len) { 12.978 + __skb_trim(skb, len); 12.979 + } 12.980 +} 12.981 + 12.982 + 12.983 +static inline int __pskb_trim(struct sk_buff *skb, unsigned int len) 12.984 +{ 12.985 + if (!skb->data_len) { 12.986 + skb->len = len; 12.987 + skb->tail = skb->data+len; 12.988 + return 0; 12.989 + } else { 12.990 + return ___pskb_trim(skb, len, 1); 12.991 + } 12.992 +} 12.993 + 12.994 +static inline int pskb_trim(struct sk_buff *skb, unsigned int len) 12.995 +{ 12.996 + if (len < skb->len) 12.997 + return __pskb_trim(skb, len); 12.998 + return 0; 12.999 +} 12.1000 + 12.1001 +/** 12.1002 + * skb_orphan - orphan a buffer 12.1003 + * @skb: buffer to orphan 12.1004 + * 12.1005 + * If a buffer currently has an owner then we call the owner's 12.1006 + * destructor function and make the @skb unowned. The buffer continues 12.1007 + * to exist but is no longer charged to its former owner. 12.1008 + */ 12.1009 + 12.1010 + 12.1011 +static inline void skb_orphan(struct sk_buff *skb) 12.1012 +{ 12.1013 + if (skb->destructor) 12.1014 + skb->destructor(skb); 12.1015 + skb->destructor = NULL; 12.1016 + skb->sk = NULL; 12.1017 +} 12.1018 + 12.1019 +/** 12.1020 + * skb_purge - empty a list 12.1021 + * @list: list to empty 12.1022 + * 12.1023 + * Delete all buffers on an &sk_buff list. Each buffer is removed from 12.1024 + * the list and one reference dropped. This function takes the list 12.1025 + * lock and is atomic with respect to other list locking functions. 12.1026 + */ 12.1027 + 12.1028 + 12.1029 +static inline void skb_queue_purge(struct sk_buff_head *list) 12.1030 +{ 12.1031 + struct sk_buff *skb; 12.1032 + while ((skb=skb_dequeue(list))!=NULL) 12.1033 + kfree_skb(skb); 12.1034 +} 12.1035 + 12.1036 +/** 12.1037 + * __skb_purge - empty a list 12.1038 + * @list: list to empty 12.1039 + * 12.1040 + * Delete all buffers on an &sk_buff list. Each buffer is removed from 12.1041 + * the list and one reference dropped. This function does not take the 12.1042 + * list lock and the caller must hold the relevant locks to use it. 12.1043 + */ 12.1044 + 12.1045 + 12.1046 +static inline void __skb_queue_purge(struct sk_buff_head *list) 12.1047 +{ 12.1048 + struct sk_buff *skb; 12.1049 + while ((skb=__skb_dequeue(list))!=NULL) 12.1050 + kfree_skb(skb); 12.1051 +} 12.1052 + 12.1053 +/** 12.1054 + * __dev_alloc_skb - allocate an skbuff for sending 12.1055 + * @length: length to allocate 12.1056 + * @gfp_mask: get_free_pages mask, passed to alloc_skb 12.1057 + * 12.1058 + * Allocate a new &sk_buff and assign it a usage count of one. The 12.1059 + * buffer has unspecified headroom built in. Users should allocate 12.1060 + * the headroom they think they need without accounting for the 12.1061 + * built in space. The built in space is used for optimisations. 12.1062 + * 12.1063 + * %NULL is returned in there is no free memory. 12.1064 + */ 12.1065 + 12.1066 +static inline struct sk_buff *__dev_alloc_skb(unsigned int length, 12.1067 + int gfp_mask) 12.1068 +{ 12.1069 + struct sk_buff *skb; 12.1070 + 12.1071 + //skb = alloc_skb(length+16, gfp_mask); 12.1072 + skb = alloc_zc_skb(length+16, gfp_mask); 12.1073 + if (skb) 12.1074 + skb_reserve(skb,16); 12.1075 + return skb; 12.1076 +} 12.1077 + 12.1078 +/** 12.1079 + * dev_alloc_skb - allocate an skbuff for sending 12.1080 + * @length: length to allocate 12.1081 + * 12.1082 + * Allocate a new &sk_buff and assign it a usage count of one. The 12.1083 + * buffer has unspecified headroom built in. Users should allocate 12.1084 + * the headroom they think they need without accounting for the 12.1085 + * built in space. The built in space is used for optimisations. 12.1086 + * 12.1087 + * %NULL is returned in there is no free memory. Although this function 12.1088 + * allocates memory it can be called from an interrupt. 12.1089 + */ 12.1090 + 12.1091 +static inline struct sk_buff *dev_alloc_skb(unsigned int length) 12.1092 +{ 12.1093 + return __dev_alloc_skb(length, GFP_ATOMIC); 12.1094 +} 12.1095 + 12.1096 +/** 12.1097 + * skb_cow - copy header of skb when it is required 12.1098 + * @skb: buffer to cow 12.1099 + * @headroom: needed headroom 12.1100 + * 12.1101 + * If the skb passed lacks sufficient headroom or its data part 12.1102 + * is shared, data is reallocated. If reallocation fails, an error 12.1103 + * is returned and original skb is not changed. 12.1104 + * 12.1105 + * The result is skb with writable area skb->head...skb->tail 12.1106 + * and at least @headroom of space at head. 12.1107 + */ 12.1108 + 12.1109 +static inline int 12.1110 +skb_cow(struct sk_buff *skb, unsigned int headroom) 12.1111 +{ 12.1112 + int delta = (headroom > 16 ? headroom : 16) - skb_headroom(skb); 12.1113 + 12.1114 + if (delta < 0) 12.1115 + delta = 0; 12.1116 + 12.1117 + if (delta || skb_cloned(skb)) 12.1118 + return pskb_expand_head(skb, (delta+15)&~15, 0, GFP_ATOMIC); 12.1119 + return 0; 12.1120 +} 12.1121 + 12.1122 +/** 12.1123 + * skb_linearize - convert paged skb to linear one 12.1124 + * @skb: buffer to linarize 12.1125 + * @gfp: allocation mode 12.1126 + * 12.1127 + * If there is no free memory -ENOMEM is returned, otherwise zero 12.1128 + * is returned and the old skb data released. */ 12.1129 +int skb_linearize(struct sk_buff *skb, int gfp); 12.1130 + 12.1131 +static inline void *kmap_skb_frag(const skb_frag_t *frag) 12.1132 +{ 12.1133 +#ifdef CONFIG_HIGHMEM 12.1134 + if (in_irq()) 12.1135 + BUG(); 12.1136 + 12.1137 + local_bh_disable(); 12.1138 +#endif 12.1139 + return kmap_atomic(frag->page, KM_SKB_DATA_SOFTIRQ); 12.1140 +} 12.1141 + 12.1142 +static inline void kunmap_skb_frag(void *vaddr) 12.1143 +{ 12.1144 + kunmap_atomic(vaddr, KM_SKB_DATA_SOFTIRQ); 12.1145 +#ifdef CONFIG_HIGHMEM 12.1146 + local_bh_enable(); 12.1147 +#endif 12.1148 +} 12.1149 + 12.1150 +#define skb_queue_walk(queue, skb) \ 12.1151 + for (skb = (queue)->next; \ 12.1152 + (skb != (struct sk_buff *)(queue)); \ 12.1153 + skb=skb->next) 12.1154 + 12.1155 + 12.1156 +extern struct sk_buff * skb_recv_datagram(struct sock *sk,unsigned flags,int noblock, int *err); 12.1157 +extern unsigned int datagram_poll(struct file *file, struct socket *sock, struct poll_table_struct *wait); 12.1158 +extern int skb_copy_datagram(const struct sk_buff *from, int offset, char *to,int size); 12.1159 +extern int skb_copy_datagram_iovec(const struct sk_buff *from, int offset, struct iovec *to,int size); 12.1160 +extern int skb_copy_and_csum_datagram(const struct sk_buff *skb, int offset, u8 *to, int len, unsigned int *csump); 12.1161 +extern int skb_copy_and_csum_datagram_iovec(const struct sk_buff *skb, int hlen, struct iovec *iov); 12.1162 +extern void skb_free_datagram(struct sock * sk, struct sk_buff *skb); 12.1163 + 12.1164 +extern unsigned int skb_checksum(const struct sk_buff *skb, int offset, int len, unsigned int csum); 12.1165 +extern int skb_copy_bits(const struct sk_buff *skb, int offset, void *to, int len); 12.1166 +extern unsigned int skb_copy_and_csum_bits(const struct sk_buff *skb, int offset, u8 *to, int len, unsigned int csum); 12.1167 +extern void skb_copy_and_csum_dev(const struct sk_buff *skb, u8 *to); 12.1168 + 12.1169 +extern void skb_init(void); 12.1170 +extern void skb_add_mtu(int mtu); 12.1171 + 12.1172 +#ifdef CONFIG_NETFILTER 12.1173 +static inline void 12.1174 +nf_conntrack_put(struct nf_ct_info *nfct) 12.1175 +{ 12.1176 + if (nfct && atomic_dec_and_test(&nfct->master->use)) 12.1177 + nfct->master->destroy(nfct->master); 12.1178 +} 12.1179 +static inline void 12.1180 +nf_conntrack_get(struct nf_ct_info *nfct) 12.1181 +{ 12.1182 + if (nfct) 12.1183 + atomic_inc(&nfct->master->use); 12.1184 +} 12.1185 +#endif 12.1186 + 12.1187 +#endif /* __KERNEL__ */ 12.1188 +#endif /* _LINUX_SKBUFF_H */
13.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 13.2 +++ b/xenolinux-2.4.16-sparse/net/core/skbuff.c Sat Feb 01 12:06:32 2003 +0000 13.3 @@ -0,0 +1,1366 @@ 13.4 +/* 13.5 + * Routines having to do with the 'struct sk_buff' memory handlers. 13.6 + * 13.7 + * Authors: Alan Cox <iiitac@pyr.swan.ac.uk> 13.8 + * Florian La Roche <rzsfl@rz.uni-sb.de> 13.9 + * 13.10 + * Version: $Id: skbuff.c,v 1.89 2001/08/06 13:25:02 davem Exp $ 13.11 + * 13.12 + * Fixes: 13.13 + * Alan Cox : Fixed the worst of the load balancer bugs. 13.14 + * Dave Platt : Interrupt stacking fix. 13.15 + * Richard Kooijman : Timestamp fixes. 13.16 + * Alan Cox : Changed buffer format. 13.17 + * Alan Cox : destructor hook for AF_UNIX etc. 13.18 + * Linus Torvalds : Better skb_clone. 13.19 + * Alan Cox : Added skb_copy. 13.20 + * Alan Cox : Added all the changed routines Linus 13.21 + * only put in the headers 13.22 + * Ray VanTassle : Fixed --skb->lock in free 13.23 + * Alan Cox : skb_copy copy arp field 13.24 + * Andi Kleen : slabified it. 13.25 + * 13.26 + * NOTE: 13.27 + * The __skb_ routines should be called with interrupts 13.28 + * disabled, or you better be *real* sure that the operation is atomic 13.29 + * with respect to whatever list is being frobbed (e.g. via lock_sock() 13.30 + * or via disabling bottom half handlers, etc). 13.31 + * 13.32 + * This program is free software; you can redistribute it and/or 13.33 + * modify it under the terms of the GNU General Public License 13.34 + * as published by the Free Software Foundation; either version 13.35 + * 2 of the License, or (at your option) any later version. 13.36 + */ 13.37 + 13.38 +/* 13.39 + * The functions in this file will not compile correctly with gcc 2.4.x 13.40 + */ 13.41 + 13.42 +#include <linux/config.h> 13.43 +#include <linux/types.h> 13.44 +#include <linux/kernel.h> 13.45 +#include <linux/sched.h> 13.46 +#include <linux/mm.h> 13.47 +#include <linux/interrupt.h> 13.48 +#include <linux/in.h> 13.49 +#include <linux/inet.h> 13.50 +#include <linux/slab.h> 13.51 +#include <linux/netdevice.h> 13.52 +#include <linux/string.h> 13.53 +#include <linux/skbuff.h> 13.54 +#include <linux/cache.h> 13.55 +#include <linux/init.h> 13.56 +#include <linux/highmem.h> 13.57 +#include <linux/spinlock.h> 13.58 + 13.59 +#include <net/ip.h> 13.60 +#include <net/protocol.h> 13.61 +#include <net/dst.h> 13.62 +#include <net/tcp.h> 13.63 +#include <net/udp.h> 13.64 +#include <net/sock.h> 13.65 + 13.66 +#include <asm/uaccess.h> 13.67 +#include <asm/system.h> 13.68 + 13.69 +/* zc globals: */ 13.70 +char *net_page_chunk; 13.71 +struct net_page_info *net_page_table; 13.72 +struct list_head net_page_list; 13.73 +spinlock_t net_page_list_lock = SPIN_LOCK_UNLOCKED; 13.74 +unsigned int net_pages; 13.75 + 13.76 + 13.77 + 13.78 +int sysctl_hot_list_len = 128; 13.79 + 13.80 +static kmem_cache_t *skbuff_head_cache; 13.81 + 13.82 +static union { 13.83 + struct sk_buff_head list; 13.84 + char pad[SMP_CACHE_BYTES]; 13.85 +} skb_head_pool[NR_CPUS]; 13.86 + 13.87 +/* 13.88 + * Keep out-of-line to prevent kernel bloat. 13.89 + * __builtin_return_address is not used because it is not always 13.90 + * reliable. 13.91 + */ 13.92 + 13.93 +/** 13.94 + * skb_over_panic - private function 13.95 + * @skb: buffer 13.96 + * @sz: size 13.97 + * @here: address 13.98 + * 13.99 + * Out of line support code for skb_put(). Not user callable. 13.100 + */ 13.101 + 13.102 +void skb_over_panic(struct sk_buff *skb, int sz, void *here) 13.103 +{ 13.104 + printk("skput:over: %p:%d put:%d dev:%s", 13.105 + here, skb->len, sz, skb->dev ? skb->dev->name : "<NULL>"); 13.106 + BUG(); 13.107 +} 13.108 + 13.109 +/** 13.110 + * skb_under_panic - private function 13.111 + * @skb: buffer 13.112 + * @sz: size 13.113 + * @here: address 13.114 + * 13.115 + * Out of line support code for skb_push(). Not user callable. 13.116 + */ 13.117 + 13.118 + 13.119 +void skb_under_panic(struct sk_buff *skb, int sz, void *here) 13.120 +{ 13.121 + printk("skput:under: %p:%d put:%d dev:%s", 13.122 + here, skb->len, sz, skb->dev ? skb->dev->name : "<NULL>"); 13.123 + BUG(); 13.124 +} 13.125 + 13.126 +static __inline__ struct sk_buff *skb_head_from_pool(void) 13.127 +{ 13.128 + struct sk_buff_head *list = &skb_head_pool[smp_processor_id()].list; 13.129 + 13.130 + if (skb_queue_len(list)) { 13.131 + struct sk_buff *skb; 13.132 + unsigned long flags; 13.133 + 13.134 + local_irq_save(flags); 13.135 + skb = __skb_dequeue(list); 13.136 + local_irq_restore(flags); 13.137 + return skb; 13.138 + } 13.139 + return NULL; 13.140 +} 13.141 + 13.142 +static __inline__ void skb_head_to_pool(struct sk_buff *skb) 13.143 +{ 13.144 + struct sk_buff_head *list = &skb_head_pool[smp_processor_id()].list; 13.145 + 13.146 + if (skb_queue_len(list) < sysctl_hot_list_len) { 13.147 + unsigned long flags; 13.148 + 13.149 + local_irq_save(flags); 13.150 + __skb_queue_head(list, skb); 13.151 + local_irq_restore(flags); 13.152 + 13.153 + return; 13.154 + } 13.155 + kmem_cache_free(skbuff_head_cache, skb); 13.156 +} 13.157 + 13.158 + 13.159 +/* Allocate a new skbuff. We do this ourselves so we can fill in a few 13.160 + * 'private' fields and also do memory statistics to find all the 13.161 + * [BEEP] leaks. 13.162 + * 13.163 + */ 13.164 + 13.165 +/** 13.166 + * alloc_skb - allocate a network buffer 13.167 + * @size: size to allocate 13.168 + * @gfp_mask: allocation mask 13.169 + * 13.170 + * Allocate a new &sk_buff. The returned buffer has no headroom and a 13.171 + * tail room of size bytes. The object has a reference count of one. 13.172 + * The return is the buffer. On a failure the return is %NULL. 13.173 + * 13.174 + * Buffers may only be allocated from interrupts using a @gfp_mask of 13.175 + * %GFP_ATOMIC. 13.176 + */ 13.177 + 13.178 +struct sk_buff *alloc_skb(unsigned int size,int gfp_mask) 13.179 +{ 13.180 + struct sk_buff *skb; 13.181 + u8 *data; 13.182 + 13.183 + if (in_interrupt() && (gfp_mask & __GFP_WAIT)) { 13.184 + static int count = 0; 13.185 + if (++count < 5) { 13.186 + printk(KERN_ERR "alloc_skb called nonatomically " 13.187 + "from interrupt %p\n", NET_CALLER(size)); 13.188 + BUG(); 13.189 + } 13.190 + gfp_mask &= ~__GFP_WAIT; 13.191 + } 13.192 + 13.193 + /* Get the HEAD */ 13.194 + skb = skb_head_from_pool(); 13.195 + if (skb == NULL) { 13.196 + skb = kmem_cache_alloc(skbuff_head_cache, gfp_mask & ~__GFP_DMA); 13.197 + if (skb == NULL) 13.198 + goto nohead; 13.199 + } 13.200 + 13.201 + /* Get the DATA. Size must match skb_add_mtu(). */ 13.202 + size = SKB_DATA_ALIGN(size); 13.203 + data = kmalloc(size + sizeof(struct skb_shared_info), gfp_mask); 13.204 + if (data == NULL) 13.205 + goto nodata; 13.206 + 13.207 + /* XXX: does not include slab overhead */ 13.208 + skb->truesize = size + sizeof(struct sk_buff); 13.209 + 13.210 + /* Load the data pointers. */ 13.211 + skb->head = data; 13.212 + skb->data = data; 13.213 + skb->tail = data; 13.214 + skb->end = data + size; 13.215 + 13.216 + /* Set up other state */ 13.217 + skb->len = 0; 13.218 + skb->cloned = 0; 13.219 + skb->data_len = 0; 13.220 + skb->skb_type = SKB_NORMAL; 13.221 + 13.222 + atomic_set(&skb->users, 1); 13.223 + atomic_set(&(skb_shinfo(skb)->dataref), 1); 13.224 + skb_shinfo(skb)->nr_frags = 0; 13.225 + skb_shinfo(skb)->frag_list = NULL; 13.226 + return skb; 13.227 + 13.228 +nodata: 13.229 + skb_head_to_pool(skb); 13.230 +nohead: 13.231 + return NULL; 13.232 +} 13.233 + 13.234 +/* begin zc code additions: */ 13.235 + 13.236 +void init_net_pages(unsigned long order_pages) 13.237 +{ 13.238 + int i; 13.239 + struct net_page_info *np; 13.240 + pgd_t *pgd; pmd_t *pmd; pte_t *ptep; 13.241 + unsigned long nr_pages = 1 << order_pages; 13.242 + 13.243 + net_page_chunk = (char *)__get_free_pages(GFP_KERNEL, order_pages); 13.244 + net_page_table = kmalloc(nr_pages * sizeof(struct net_page_info), GFP_KERNEL); 13.245 + 13.246 + INIT_LIST_HEAD(&net_page_list); 13.247 + 13.248 + for (i = 0; i < nr_pages; i++) 13.249 + { 13.250 + np = net_page_table + i; 13.251 + np->virt_addr = (unsigned long)net_page_chunk + (i * PAGE_SIZE); 13.252 + 13.253 + // now fill the pte pointer: 13.254 + np->ppte = 0xdeadbeef; 13.255 + pgd = pgd_offset_k(np->virt_addr); 13.256 + if (!pgd_none(*pgd)) 13.257 + { 13.258 + pmd = pmd_offset(pgd, np->virt_addr); 13.259 + if (!pmd_none(*pmd)) 13.260 + { 13.261 + ptep = pte_offset(pmd, np->virt_addr); 13.262 + np->ppte = (unsigned long)ptep; // neet to virt_to_phys this? 13.263 + } 13.264 + } 13.265 + 13.266 + list_add_tail(&np->list, &net_page_list); 13.267 + } 13.268 + net_pages = nr_pages; 13.269 + 13.270 + 13.271 +} 13.272 + 13.273 +struct net_page_info *get_net_page(void) 13.274 +{ 13.275 + struct list_head *list_ptr; 13.276 + struct net_page_info *np; 13.277 + unsigned long flags; 13.278 + 13.279 + if (!net_pages) 13.280 + { 13.281 + return NULL; 13.282 + } 13.283 + spin_lock_irqsave(&net_page_list_lock, flags); 13.284 + 13.285 + list_ptr = net_page_list.next; 13.286 + np = list_entry(list_ptr, struct net_page_info, list); 13.287 + list_del(&np->list); 13.288 + net_pages--; 13.289 + 13.290 + spin_unlock_irqrestore(&net_page_list_lock, flags); 13.291 + 13.292 + return np; 13.293 +} 13.294 + 13.295 +void free_net_page(struct net_page_info *np) 13.296 +{ 13.297 + unsigned long flags; 13.298 + 13.299 + if (np == NULL) return; 13.300 + 13.301 + spin_lock_irqsave(&net_page_list_lock, flags); 13.302 + 13.303 + list_add_tail(&np->list, &net_page_list); 13.304 + net_pages++; 13.305 + 13.306 + spin_unlock_irqrestore(&net_page_list_lock, flags); 13.307 +} 13.308 + 13.309 +struct sk_buff *alloc_zc_skb(unsigned int size,int gfp_mask) 13.310 +{ 13.311 + struct sk_buff *skb; 13.312 + u8 *data; 13.313 + 13.314 + if (in_interrupt() && (gfp_mask & __GFP_WAIT)) { 13.315 + static int count = 0; 13.316 + if (++count < 5) { 13.317 + printk(KERN_ERR "alloc_skb called nonatomically " 13.318 + "from interrupt %p\n", NET_CALLER(size)); 13.319 + BUG(); 13.320 + } 13.321 + gfp_mask &= ~__GFP_WAIT; 13.322 + } 13.323 + 13.324 + /* Get the HEAD */ 13.325 + skb = skb_head_from_pool(); 13.326 + if (skb == NULL) { 13.327 + skb = kmem_cache_alloc(skbuff_head_cache, gfp_mask & ~__GFP_DMA); 13.328 + if (skb == NULL) 13.329 + goto nohead; 13.330 + } 13.331 + 13.332 + /* Get the DATA. Size must match skb_add_mtu(). */ 13.333 + size = SKB_DATA_ALIGN(size); 13.334 + if (size > PAGE_SIZE) 13.335 + { 13.336 + printk("alloc_zc_skb called with unruly size.\n"); 13.337 + size = PAGE_SIZE; 13.338 + } 13.339 + skb->net_page = get_net_page(); 13.340 + if (skb->net_page == NULL) 13.341 + { 13.342 + goto nodata; 13.343 + } 13.344 + data = (u8 *)skb->net_page->virt_addr; 13.345 + if (data == NULL) 13.346 + goto nodata; 13.347 + /* XXX: does not include slab overhead */ 13.348 + skb->truesize = size + sizeof(struct sk_buff); 13.349 + 13.350 + /* Load the data pointers. */ 13.351 + skb->head = data; 13.352 + skb->data = data; 13.353 + skb->tail = data; 13.354 + skb->end = data + size; 13.355 + 13.356 + /* Set up other state */ 13.357 + skb->len = 0; 13.358 + skb->cloned = 0; 13.359 + skb->data_len = 0; 13.360 + skb->skb_type = SKB_ZERO_COPY; 13.361 + 13.362 + atomic_set(&skb->users, 1); 13.363 + atomic_set(&(skb_shinfo(skb)->dataref), 1); 13.364 + skb_shinfo(skb)->nr_frags = 0; 13.365 + skb_shinfo(skb)->frag_list = NULL; 13.366 + return skb; 13.367 + 13.368 +nodata: 13.369 + skb_head_to_pool(skb); 13.370 +nohead: 13.371 + return NULL; 13.372 +} 13.373 + 13.374 +/* end zc code additions: */ 13.375 + 13.376 +/* 13.377 + * Slab constructor for a skb head. 13.378 + */ 13.379 +static inline void skb_headerinit(void *p, kmem_cache_t *cache, 13.380 + unsigned long flags) 13.381 +{ 13.382 + struct sk_buff *skb = p; 13.383 + 13.384 + skb->next = NULL; 13.385 + skb->prev = NULL; 13.386 + skb->list = NULL; 13.387 + skb->sk = NULL; 13.388 + skb->stamp.tv_sec=0; /* No idea about time */ 13.389 + skb->dev = NULL; 13.390 + skb->dst = NULL; 13.391 + memset(skb->cb, 0, sizeof(skb->cb)); 13.392 + skb->pkt_type = PACKET_HOST; /* Default type */ 13.393 + skb->ip_summed = 0; 13.394 + skb->priority = 0; 13.395 + skb->security = 0; /* By default packets are insecure */ 13.396 + skb->destructor = NULL; 13.397 + 13.398 +#ifdef CONFIG_NETFILTER 13.399 + skb->nfmark = skb->nfcache = 0; 13.400 + skb->nfct = NULL; 13.401 +#ifdef CONFIG_NETFILTER_DEBUG 13.402 + skb->nf_debug = 0; 13.403 +#endif 13.404 +#endif 13.405 +#ifdef CONFIG_NET_SCHED 13.406 + skb->tc_index = 0; 13.407 +#endif 13.408 +} 13.409 + 13.410 +static void skb_drop_fraglist(struct sk_buff *skb) 13.411 +{ 13.412 + struct sk_buff *list = skb_shinfo(skb)->frag_list; 13.413 + 13.414 + skb_shinfo(skb)->frag_list = NULL; 13.415 + 13.416 + do { 13.417 + struct sk_buff *this = list; 13.418 + list = list->next; 13.419 + kfree_skb(this); 13.420 + } while (list); 13.421 +} 13.422 + 13.423 +static void skb_clone_fraglist(struct sk_buff *skb) 13.424 +{ 13.425 + struct sk_buff *list; 13.426 + 13.427 + for (list = skb_shinfo(skb)->frag_list; list; list=list->next) 13.428 + skb_get(list); 13.429 +} 13.430 + 13.431 +static void skb_release_data(struct sk_buff *skb) 13.432 +{ 13.433 + if (!skb->cloned || 13.434 + atomic_dec_and_test(&(skb_shinfo(skb)->dataref))) { 13.435 + if (skb_shinfo(skb)->nr_frags) { 13.436 + int i; 13.437 + for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) 13.438 + put_page(skb_shinfo(skb)->frags[i].page); 13.439 + } 13.440 + 13.441 + if (skb_shinfo(skb)->frag_list) 13.442 + skb_drop_fraglist(skb); 13.443 + 13.444 + if (skb->skb_type == SKB_NORMAL) 13.445 + { 13.446 + kfree(skb->head); 13.447 + } else {// SKB_ZERO_COPY 13.448 + free_net_page(skb->net_page); 13.449 + } 13.450 + } 13.451 +} 13.452 + 13.453 +/* 13.454 + * Free an skbuff by memory without cleaning the state. 13.455 + */ 13.456 +void kfree_skbmem(struct sk_buff *skb) 13.457 +{ 13.458 + skb_release_data(skb); 13.459 + skb_head_to_pool(skb); 13.460 +} 13.461 + 13.462 +/** 13.463 + * __kfree_skb - private function 13.464 + * @skb: buffer 13.465 + * 13.466 + * Free an sk_buff. Release anything attached to the buffer. 13.467 + * Clean the state. This is an internal helper function. Users should 13.468 + * always call kfree_skb 13.469 + */ 13.470 + 13.471 +void __kfree_skb(struct sk_buff *skb) 13.472 +{ 13.473 + if (skb->list) { 13.474 + printk(KERN_WARNING "Warning: kfree_skb passed an skb still " 13.475 + "on a list (from %p).\n", NET_CALLER(skb)); 13.476 + BUG(); 13.477 + } 13.478 + 13.479 + dst_release(skb->dst); 13.480 + if(skb->destructor) { 13.481 + if (in_irq()) { 13.482 + printk(KERN_WARNING "Warning: kfree_skb on hard IRQ %p\n", 13.483 + NET_CALLER(skb)); 13.484 + } 13.485 + skb->destructor(skb); 13.486 + } 13.487 +#ifdef CONFIG_NETFILTER 13.488 + nf_conntrack_put(skb->nfct); 13.489 +#endif 13.490 + skb_headerinit(skb, NULL, 0); /* clean state */ 13.491 + kfree_skbmem(skb); 13.492 +} 13.493 + 13.494 +/** 13.495 + * skb_clone - duplicate an sk_buff 13.496 + * @skb: buffer to clone 13.497 + * @gfp_mask: allocation priority 13.498 + * 13.499 + * Duplicate an &sk_buff. The new one is not owned by a socket. Both 13.500 + * copies share the same packet data but not structure. The new 13.501 + * buffer has a reference count of 1. If the allocation fails the 13.502 + * function returns %NULL otherwise the new buffer is returned. 13.503 + * 13.504 + * If this function is called from an interrupt gfp_mask() must be 13.505 + * %GFP_ATOMIC. 13.506 + */ 13.507 + 13.508 +struct sk_buff *skb_clone(struct sk_buff *skb, int gfp_mask) 13.509 +{ 13.510 + struct sk_buff *n; 13.511 + 13.512 + n = skb_head_from_pool(); 13.513 + if (!n) { 13.514 + n = kmem_cache_alloc(skbuff_head_cache, gfp_mask); 13.515 + if (!n) 13.516 + return NULL; 13.517 + } 13.518 + 13.519 +#define C(x) n->x = skb->x 13.520 + 13.521 + n->next = n->prev = NULL; 13.522 + n->list = NULL; 13.523 + n->sk = NULL; 13.524 + C(stamp); 13.525 + C(dev); 13.526 + C(h); 13.527 + C(nh); 13.528 + C(mac); 13.529 + C(dst); 13.530 + dst_clone(n->dst); 13.531 + memcpy(n->cb, skb->cb, sizeof(skb->cb)); 13.532 + C(len); 13.533 + C(data_len); 13.534 + C(csum); 13.535 + n->cloned = 1; 13.536 + C(pkt_type); 13.537 + C(ip_summed); 13.538 + C(priority); 13.539 + atomic_set(&n->users, 1); 13.540 + C(protocol); 13.541 + C(security); 13.542 + C(truesize); 13.543 + C(head); 13.544 + C(data); 13.545 + C(tail); 13.546 + C(end); 13.547 + n->destructor = NULL; 13.548 +#ifdef CONFIG_NETFILTER 13.549 + C(nfmark); 13.550 + C(nfcache); 13.551 + C(nfct); 13.552 +#ifdef CONFIG_NETFILTER_DEBUG 13.553 + C(nf_debug); 13.554 +#endif 13.555 +#endif /*CONFIG_NETFILTER*/ 13.556 +#if defined(CONFIG_HIPPI) 13.557 + C(private); 13.558 +#endif 13.559 +#ifdef CONFIG_NET_SCHED 13.560 + C(tc_index); 13.561 +#endif 13.562 + C(skb_type); 13.563 + C(net_page); 13.564 + atomic_inc(&(skb_shinfo(skb)->dataref)); 13.565 + skb->cloned = 1; 13.566 +#ifdef CONFIG_NETFILTER 13.567 + nf_conntrack_get(skb->nfct); 13.568 +#endif 13.569 + return n; 13.570 +} 13.571 + 13.572 +static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old) 13.573 +{ 13.574 + /* 13.575 + * Shift between the two data areas in bytes 13.576 + */ 13.577 + unsigned long offset = new->data - old->data; 13.578 + 13.579 + new->list=NULL; 13.580 + new->sk=NULL; 13.581 + new->dev=old->dev; 13.582 + new->priority=old->priority; 13.583 + new->protocol=old->protocol; 13.584 + new->dst=dst_clone(old->dst); 13.585 + new->h.raw=old->h.raw+offset; 13.586 + new->nh.raw=old->nh.raw+offset; 13.587 + new->mac.raw=old->mac.raw+offset; 13.588 + memcpy(new->cb, old->cb, sizeof(old->cb)); 13.589 + atomic_set(&new->users, 1); 13.590 + new->pkt_type=old->pkt_type; 13.591 + new->stamp=old->stamp; 13.592 + new->destructor = NULL; 13.593 + new->security=old->security; 13.594 +#ifdef CONFIG_NETFILTER 13.595 + new->nfmark=old->nfmark; 13.596 + new->nfcache=old->nfcache; 13.597 + new->nfct=old->nfct; 13.598 + nf_conntrack_get(new->nfct); 13.599 +#ifdef CONFIG_NETFILTER_DEBUG 13.600 + new->nf_debug=old->nf_debug; 13.601 +#endif 13.602 +#endif 13.603 +#ifdef CONFIG_NET_SCHED 13.604 + new->tc_index = old->tc_index; 13.605 +#endif 13.606 +} 13.607 + 13.608 +/** 13.609 + * skb_copy - create private copy of an sk_buff 13.610 + * @skb: buffer to copy 13.611 + * @gfp_mask: allocation priority 13.612 + * 13.613 + * Make a copy of both an &sk_buff and its data. This is used when the 13.614 + * caller wishes to modify the data and needs a private copy of the 13.615 + * data to alter. Returns %NULL on failure or the pointer to the buffer 13.616 + * on success. The returned buffer has a reference count of 1. 13.617 + * 13.618 + * As by-product this function converts non-linear &sk_buff to linear 13.619 + * one, so that &sk_buff becomes completely private and caller is allowed 13.620 + * to modify all the data of returned buffer. This means that this 13.621 + * function is not recommended for use in circumstances when only 13.622 + * header is going to be modified. Use pskb_copy() instead. 13.623 + */ 13.624 + 13.625 +struct sk_buff *skb_copy(const struct sk_buff *skb, int gfp_mask) 13.626 +{ 13.627 + struct sk_buff *n; 13.628 + int headerlen = skb->data-skb->head; 13.629 + 13.630 + /* 13.631 + * Allocate the copy buffer 13.632 + */ 13.633 + n=alloc_skb(skb->end - skb->head + skb->data_len, gfp_mask); 13.634 + if(n==NULL) 13.635 + return NULL; 13.636 + 13.637 + /* Set the data pointer */ 13.638 + skb_reserve(n,headerlen); 13.639 + /* Set the tail pointer and length */ 13.640 + skb_put(n,skb->len); 13.641 + n->csum = skb->csum; 13.642 + n->ip_summed = skb->ip_summed; 13.643 + 13.644 + if (skb_copy_bits(skb, -headerlen, n->head, headerlen+skb->len)) 13.645 + BUG(); 13.646 + 13.647 + copy_skb_header(n, skb); 13.648 + 13.649 + return n; 13.650 +} 13.651 + 13.652 +/* Keep head the same: replace data */ 13.653 +int skb_linearize(struct sk_buff *skb, int gfp_mask) 13.654 +{ 13.655 + unsigned int size; 13.656 + u8 *data; 13.657 + long offset; 13.658 + int headerlen = skb->data - skb->head; 13.659 + int expand = (skb->tail+skb->data_len) - skb->end; 13.660 + 13.661 + if (skb_shared(skb)) 13.662 + BUG(); 13.663 + 13.664 + if (expand <= 0) 13.665 + expand = 0; 13.666 + 13.667 + size = (skb->end - skb->head + expand); 13.668 + size = SKB_DATA_ALIGN(size); 13.669 + data = kmalloc(size + sizeof(struct skb_shared_info), gfp_mask); 13.670 + if (data == NULL) 13.671 + return -ENOMEM; 13.672 + 13.673 + /* Copy entire thing */ 13.674 + if (skb_copy_bits(skb, -headerlen, data, headerlen+skb->len)) 13.675 + BUG(); 13.676 + 13.677 + /* Offset between the two in bytes */ 13.678 + offset = data - skb->head; 13.679 + 13.680 + /* Free old data. */ 13.681 + skb_release_data(skb); 13.682 + 13.683 + skb->head = data; 13.684 + skb->end = data + size; 13.685 + 13.686 + /* Set up new pointers */ 13.687 + skb->h.raw += offset; 13.688 + skb->nh.raw += offset; 13.689 + skb->mac.raw += offset; 13.690 + skb->tail += offset; 13.691 + skb->data += offset; 13.692 + 13.693 + /* Set up shinfo */ 13.694 + atomic_set(&(skb_shinfo(skb)->dataref), 1); 13.695 + skb_shinfo(skb)->nr_frags = 0; 13.696 + skb_shinfo(skb)->frag_list = NULL; 13.697 + 13.698 + /* We are no longer a clone, even if we were. */ 13.699 + skb->cloned = 0; 13.700 + 13.701 + skb->tail += skb->data_len; 13.702 + skb->data_len = 0; 13.703 + return 0; 13.704 +} 13.705 + 13.706 + 13.707 +/** 13.708 + * pskb_copy - create copy of an sk_buff with private head. 13.709 + * @skb: buffer to copy 13.710 + * @gfp_mask: allocation priority 13.711 + * 13.712 + * Make a copy of both an &sk_buff and part of its data, located 13.713 + * in header. Fragmented data remain shared. This is used when 13.714 + * the caller wishes to modify only header of &sk_buff and needs 13.715 + * private copy of the header to alter. Returns %NULL on failure 13.716 + * or the pointer to the buffer on success. 13.717 + * The returned buffer has a reference count of 1. 13.718 + */ 13.719 + 13.720 +struct sk_buff *pskb_copy(struct sk_buff *skb, int gfp_mask) 13.721 +{ 13.722 + struct sk_buff *n; 13.723 + 13.724 + /* 13.725 + * Allocate the copy buffer 13.726 + */ 13.727 + n=alloc_skb(skb->end - skb->head, gfp_mask); 13.728 + if(n==NULL) 13.729 + return NULL; 13.730 + 13.731 + /* Set the data pointer */ 13.732 + skb_reserve(n,skb->data-skb->head); 13.733 + /* Set the tail pointer and length */ 13.734 + skb_put(n,skb_headlen(skb)); 13.735 + /* Copy the bytes */ 13.736 + memcpy(n->data, skb->data, n->len); 13.737 + n->csum = skb->csum; 13.738 + n->ip_summed = skb->ip_summed; 13.739 + 13.740 + n->data_len = skb->data_len; 13.741 + n->len = skb->len; 13.742 + 13.743 + if (skb_shinfo(skb)->nr_frags) { 13.744 + int i; 13.745 + 13.746 + for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { 13.747 + skb_shinfo(n)->frags[i] = skb_shinfo(skb)->frags[i]; 13.748 + get_page(skb_shinfo(n)->frags[i].page); 13.749 + } 13.750 + skb_shinfo(n)->nr_frags = i; 13.751 + } 13.752 + 13.753 + if (skb_shinfo(skb)->frag_list) { 13.754 + skb_shinfo(n)->frag_list = skb_shinfo(skb)->frag_list; 13.755 + skb_clone_fraglist(n); 13.756 + } 13.757 + 13.758 + copy_skb_header(n, skb); 13.759 + 13.760 + return n; 13.761 +} 13.762 + 13.763 +/** 13.764 + * pskb_expand_head - reallocate header of &sk_buff 13.765 + * @skb: buffer to reallocate 13.766 + * @nhead: room to add at head 13.767 + * @ntail: room to add at tail 13.768 + * @gfp_mask: allocation priority 13.769 + * 13.770 + * Expands (or creates identical copy, if &nhead and &ntail are zero) 13.771 + * header of skb. &sk_buff itself is not changed. &sk_buff MUST have 13.772 + * reference count of 1. Returns zero in the case of success or error, 13.773 + * if expansion failed. In the last case, &sk_buff is not changed. 13.774 + * 13.775 + * All the pointers pointing into skb header may change and must be 13.776 + * reloaded after call to this function. 13.777 + */ 13.778 + 13.779 +int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail, int gfp_mask) 13.780 +{ 13.781 + int i; 13.782 + u8 *data; 13.783 + int size = nhead + (skb->end - skb->head) + ntail; 13.784 + long off; 13.785 + 13.786 + if (skb_shared(skb)) 13.787 + BUG(); 13.788 + 13.789 + size = SKB_DATA_ALIGN(size); 13.790 + 13.791 + data = kmalloc(size + sizeof(struct skb_shared_info), gfp_mask); 13.792 + if (data == NULL) 13.793 + goto nodata; 13.794 + 13.795 + /* Copy only real data... and, alas, header. This should be 13.796 + * optimized for the cases when header is void. */ 13.797 + memcpy(data+nhead, skb->head, skb->tail-skb->head); 13.798 + memcpy(data+size, skb->end, sizeof(struct skb_shared_info)); 13.799 + 13.800 + for (i=0; i<skb_shinfo(skb)->nr_frags; i++) 13.801 + get_page(skb_shinfo(skb)->frags[i].page); 13.802 + 13.803 + if (skb_shinfo(skb)->frag_list) 13.804 + skb_clone_fraglist(skb); 13.805 + 13.806 + skb_release_data(skb); 13.807 + 13.808 + off = (data+nhead) - skb->head; 13.809 + 13.810 + skb->head = data; 13.811 + skb->end = data+size; 13.812 + 13.813 + skb->data += off; 13.814 + skb->tail += off; 13.815 + skb->mac.raw += off; 13.816 + skb->h.raw += off; 13.817 + skb->nh.raw += off; 13.818 + skb->cloned = 0; 13.819 + atomic_set(&skb_shinfo(skb)->dataref, 1); 13.820 + return 0; 13.821 + 13.822 +nodata: 13.823 + return -ENOMEM; 13.824 +} 13.825 + 13.826 +/* Make private copy of skb with writable head and some headroom */ 13.827 + 13.828 +struct sk_buff * 13.829 +skb_realloc_headroom(struct sk_buff *skb, unsigned int headroom) 13.830 +{ 13.831 + struct sk_buff *skb2; 13.832 + int delta = headroom - skb_headroom(skb); 13.833 + 13.834 + if (delta <= 0) 13.835 + return pskb_copy(skb, GFP_ATOMIC); 13.836 + 13.837 + skb2 = skb_clone(skb, GFP_ATOMIC); 13.838 + if (skb2 == NULL || 13.839 + !pskb_expand_head(skb2, SKB_DATA_ALIGN(delta), 0, GFP_ATOMIC)) 13.840 + return skb2; 13.841 + 13.842 + kfree_skb(skb2); 13.843 + return NULL; 13.844 +} 13.845 + 13.846 + 13.847 +/** 13.848 + * skb_copy_expand - copy and expand sk_buff 13.849 + * @skb: buffer to copy 13.850 + * @newheadroom: new free bytes at head 13.851 + * @newtailroom: new free bytes at tail 13.852 + * @gfp_mask: allocation priority 13.853 + * 13.854 + * Make a copy of both an &sk_buff and its data and while doing so 13.855 + * allocate additional space. 13.856 + * 13.857 + * This is used when the caller wishes to modify the data and needs a 13.858 + * private copy of the data to alter as well as more space for new fields. 13.859 + * Returns %NULL on failure or the pointer to the buffer 13.860 + * on success. The returned buffer has a reference count of 1. 13.861 + * 13.862 + * You must pass %GFP_ATOMIC as the allocation priority if this function 13.863 + * is called from an interrupt. 13.864 + */ 13.865 + 13.866 + 13.867 +struct sk_buff *skb_copy_expand(const struct sk_buff *skb, 13.868 + int newheadroom, 13.869 + int newtailroom, 13.870 + int gfp_mask) 13.871 +{ 13.872 + struct sk_buff *n; 13.873 + 13.874 + /* 13.875 + * Allocate the copy buffer 13.876 + */ 13.877 + 13.878 + n=alloc_skb(newheadroom + skb->len + newtailroom, 13.879 + gfp_mask); 13.880 + if(n==NULL) 13.881 + return NULL; 13.882 + 13.883 + skb_reserve(n,newheadroom); 13.884 + 13.885 + /* Set the tail pointer and length */ 13.886 + skb_put(n,skb->len); 13.887 + 13.888 + /* Copy the data only. */ 13.889 + if (skb_copy_bits(skb, 0, n->data, skb->len)) 13.890 + BUG(); 13.891 + 13.892 + copy_skb_header(n, skb); 13.893 + return n; 13.894 +} 13.895 + 13.896 +/* Trims skb to length len. It can change skb pointers, if "realloc" is 1. 13.897 + * If realloc==0 and trimming is impossible without change of data, 13.898 + * it is BUG(). 13.899 + */ 13.900 + 13.901 +int ___pskb_trim(struct sk_buff *skb, unsigned int len, int realloc) 13.902 +{ 13.903 + int offset = skb_headlen(skb); 13.904 + int nfrags = skb_shinfo(skb)->nr_frags; 13.905 + int i; 13.906 + 13.907 + for (i=0; i<nfrags; i++) { 13.908 + int end = offset + skb_shinfo(skb)->frags[i].size; 13.909 + if (end > len) { 13.910 + if (skb_cloned(skb)) { 13.911 + if (!realloc) 13.912 + BUG(); 13.913 + if (!pskb_expand_head(skb, 0, 0, GFP_ATOMIC)) 13.914 + return -ENOMEM; 13.915 + } 13.916 + if (len <= offset) { 13.917 + put_page(skb_shinfo(skb)->frags[i].page); 13.918 + skb_shinfo(skb)->nr_frags--; 13.919 + } else { 13.920 + skb_shinfo(skb)->frags[i].size = len-offset; 13.921 + } 13.922 + } 13.923 + offset = end; 13.924 + } 13.925 + 13.926 + if (offset < len) { 13.927 + skb->data_len -= skb->len - len; 13.928 + skb->len = len; 13.929 + } else { 13.930 + if (len <= skb_headlen(skb)) { 13.931 + skb->len = len; 13.932 + skb->data_len = 0; 13.933 + skb->tail = skb->data + len; 13.934 + if (skb_shinfo(skb)->frag_list && !skb_cloned(skb)) 13.935 + skb_drop_fraglist(skb); 13.936 + } else { 13.937 + skb->data_len -= skb->len - len; 13.938 + skb->len = len; 13.939 + } 13.940 + } 13.941 + 13.942 + return 0; 13.943 +} 13.944 + 13.945 +/** 13.946 + * __pskb_pull_tail - advance tail of skb header 13.947 + * @skb: buffer to reallocate 13.948 + * @delta: number of bytes to advance tail 13.949 + * 13.950 + * The function makes a sense only on a fragmented &sk_buff, 13.951 + * it expands header moving its tail forward and copying necessary 13.952 + * data from fragmented part. 13.953 + * 13.954 + * &sk_buff MUST have reference count of 1. 13.955 + * 13.956 + * Returns %NULL (and &sk_buff does not change) if pull failed 13.957 + * or value of new tail of skb in the case of success. 13.958 + * 13.959 + * All the pointers pointing into skb header may change and must be 13.960 + * reloaded after call to this function. 13.961 + */ 13.962 + 13.963 +/* Moves tail of skb head forward, copying data from fragmented part, 13.964 + * when it is necessary. 13.965 + * 1. It may fail due to malloc failure. 13.966 + * 2. It may change skb pointers. 13.967 + * 13.968 + * It is pretty complicated. Luckily, it is called only in exceptional cases. 13.969 + */ 13.970 +unsigned char * __pskb_pull_tail(struct sk_buff *skb, int delta) 13.971 +{ 13.972 + int i, k, eat; 13.973 + 13.974 + /* If skb has not enough free space at tail, get new one 13.975 + * plus 128 bytes for future expansions. If we have enough 13.976 + * room at tail, reallocate without expansion only if skb is cloned. 13.977 + */ 13.978 + eat = (skb->tail+delta) - skb->end; 13.979 + 13.980 + if (eat > 0 || skb_cloned(skb)) { 13.981 + if (pskb_expand_head(skb, 0, eat>0 ? eat+128 : 0, GFP_ATOMIC)) 13.982 + return NULL; 13.983 + } 13.984 + 13.985 + if (skb_copy_bits(skb, skb_headlen(skb), skb->tail, delta)) 13.986 + BUG(); 13.987 + 13.988 + /* Optimization: no fragments, no reasons to preestimate 13.989 + * size of pulled pages. Superb. 13.990 + */ 13.991 + if (skb_shinfo(skb)->frag_list == NULL) 13.992 + goto pull_pages; 13.993 + 13.994 + /* Estimate size of pulled pages. */ 13.995 + eat = delta; 13.996 + for (i=0; i<skb_shinfo(skb)->nr_frags; i++) { 13.997 + if (skb_shinfo(skb)->frags[i].size >= eat) 13.998 + goto pull_pages; 13.999 + eat -= skb_shinfo(skb)->frags[i].size; 13.1000 + } 13.1001 + 13.1002 + /* If we need update frag list, we are in troubles. 13.1003 + * Certainly, it possible to add an offset to skb data, 13.1004 + * but taking into account that pulling is expected to 13.1005 + * be very rare operation, it is worth to fight against 13.1006 + * further bloating skb head and crucify ourselves here instead. 13.1007 + * Pure masohism, indeed. 8)8) 13.1008 + */ 13.1009 + if (eat) { 13.1010 + struct sk_buff *list = skb_shinfo(skb)->frag_list; 13.1011 + struct sk_buff *clone = NULL; 13.1012 + struct sk_buff *insp = NULL; 13.1013 + 13.1014 + do { 13.1015 + if (list == NULL) 13.1016 + BUG(); 13.1017 + 13.1018 + if (list->len <= eat) { 13.1019 + /* Eaten as whole. */ 13.1020 + eat -= list->len; 13.1021 + list = list->next; 13.1022 + insp = list; 13.1023 + } else { 13.1024 + /* Eaten partially. */ 13.1025 + 13.1026 + if (skb_shared(list)) { 13.1027 + /* Sucks! We need to fork list. :-( */ 13.1028 + clone = skb_clone(list, GFP_ATOMIC); 13.1029 + if (clone == NULL) 13.1030 + return NULL; 13.1031 + insp = list->next; 13.1032 + list = clone; 13.1033 + } else { 13.1034 + /* This may be pulled without 13.1035 + * problems. */ 13.1036 + insp = list; 13.1037 + } 13.1038 + if (pskb_pull(list, eat) == NULL) { 13.1039 + if (clone) 13.1040 + kfree_skb(clone); 13.1041 + return NULL; 13.1042 + } 13.1043 + break; 13.1044 + } 13.1045 + } while (eat); 13.1046 + 13.1047 + /* Free pulled out fragments. */ 13.1048 + while ((list = skb_shinfo(skb)->frag_list) != insp) { 13.1049 + skb_shinfo(skb)->frag_list = list->next; 13.1050 + kfree_skb(list); 13.1051 + } 13.1052 + /* And insert new clone at head. */ 13.1053 + if (clone) { 13.1054 + clone->next = list; 13.1055 + skb_shinfo(skb)->frag_list = clone; 13.1056 + } 13.1057 + } 13.1058 + /* Success! Now we may commit changes to skb data. */ 13.1059 + 13.1060 +pull_pages: 13.1061 + eat = delta; 13.1062 + k = 0; 13.1063 + for (i=0; i<skb_shinfo(skb)->nr_frags; i++) { 13.1064 + if (skb_shinfo(skb)->frags[i].size <= eat) { 13.1065 + put_page(skb_shinfo(skb)->frags[i].page); 13.1066 + eat -= skb_shinfo(skb)->frags[i].size; 13.1067 + } else { 13.1068 + skb_shinfo(skb)->frags[k] = skb_shinfo(skb)->frags[i]; 13.1069 + if (eat) { 13.1070 + skb_shinfo(skb)->frags[k].page_offset += eat; 13.1071 + skb_shinfo(skb)->frags[k].size -= eat; 13.1072 + eat = 0; 13.1073 + } 13.1074 + k++; 13.1075 + } 13.1076 + } 13.1077 + skb_shinfo(skb)->nr_frags = k; 13.1078 + 13.1079 + skb->tail += delta; 13.1080 + skb->data_len -= delta; 13.1081 + 13.1082 + return skb->tail; 13.1083 +} 13.1084 + 13.1085 +/* Copy some data bits from skb to kernel buffer. */ 13.1086 + 13.1087 +int skb_copy_bits(const struct sk_buff *skb, int offset, void *to, int len) 13.1088 +{ 13.1089 + int i, copy; 13.1090 + int start = skb->len - skb->data_len; 13.1091 + 13.1092 + if (offset > (int)skb->len-len) 13.1093 + goto fault; 13.1094 + 13.1095 + /* Copy header. */ 13.1096 + if ((copy = start-offset) > 0) { 13.1097 + if (copy > len) 13.1098 + copy = len; 13.1099 + memcpy(to, skb->data + offset, copy); 13.1100 + if ((len -= copy) == 0) 13.1101 + return 0; 13.1102 + offset += copy; 13.1103 + to += copy; 13.1104 + } 13.1105 + 13.1106 + for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { 13.1107 + int end; 13.1108 + 13.1109 + BUG_TRAP(start <= offset+len); 13.1110 + 13.1111 + end = start + skb_shinfo(skb)->frags[i].size; 13.1112 + if ((copy = end-offset) > 0) { 13.1113 + u8 *vaddr; 13.1114 + 13.1115 + if (copy > len) 13.1116 + copy = len; 13.1117 + 13.1118 + vaddr = kmap_skb_frag(&skb_shinfo(skb)->frags[i]); 13.1119 + memcpy(to, vaddr+skb_shinfo(skb)->frags[i].page_offset+ 13.1120 + offset-start, copy); 13.1121 + kunmap_skb_frag(vaddr); 13.1122 + 13.1123 + if ((len -= copy) == 0) 13.1124 + return 0; 13.1125 + offset += copy; 13.1126 + to += copy; 13.1127 + } 13.1128 + start = end; 13.1129 + } 13.1130 + 13.1131 + if (skb_shinfo(skb)->frag_list) { 13.1132 + struct sk_buff *list; 13.1133 + 13.1134 + for (list = skb_shinfo(skb)->frag_list; list; list=list->next) { 13.1135 + int end; 13.1136 + 13.1137 + BUG_TRAP(start <= offset+len); 13.1138 + 13.1139 + end = start + list->len; 13.1140 + if ((copy = end-offset) > 0) { 13.1141 + if (copy > len) 13.1142 + copy = len; 13.1143 + if (skb_copy_bits(list, offset-start, to, copy)) 13.1144 + goto fault; 13.1145 + if ((len -= copy) == 0) 13.1146 + return 0; 13.1147 + offset += copy; 13.1148 + to += copy; 13.1149 + } 13.1150 + start = end; 13.1151 + } 13.1152 + } 13.1153 + if (len == 0) 13.1154 + return 0; 13.1155 + 13.1156 +fault: 13.1157 + return -EFAULT; 13.1158 +} 13.1159 + 13.1160 +/* Checksum skb data. */ 13.1161 + 13.1162 +unsigned int skb_checksum(const struct sk_buff *skb, int offset, int len, unsigned int csum) 13.1163 +{ 13.1164 + int i, copy; 13.1165 + int start = skb->len - skb->data_len; 13.1166 + int pos = 0; 13.1167 + 13.1168 + /* Checksum header. */ 13.1169 + if ((copy = start-offset) > 0) { 13.1170 + if (copy > len) 13.1171 + copy = len; 13.1172 + csum = csum_partial(skb->data+offset, copy, csum); 13.1173 + if ((len -= copy) == 0) 13.1174 + return csum; 13.1175 + offset += copy; 13.1176 + pos = copy; 13.1177 + } 13.1178 + 13.1179 + for (i=0; i<skb_shinfo(skb)->nr_frags; i++) { 13.1180 + int end; 13.1181 + 13.1182 + BUG_TRAP(start <= offset+len); 13.1183 + 13.1184 + end = start + skb_shinfo(skb)->frags[i].size; 13.1185 + if ((copy = end-offset) > 0) { 13.1186 + unsigned int csum2; 13.1187 + u8 *vaddr; 13.1188 + skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; 13.1189 + 13.1190 + if (copy > len) 13.1191 + copy = len; 13.1192 + vaddr = kmap_skb_frag(frag); 13.1193 + csum2 = csum_partial(vaddr + frag->page_offset + 13.1194 + offset-start, copy, 0); 13.1195 + kunmap_skb_frag(vaddr); 13.1196 + csum = csum_block_add(csum, csum2, pos); 13.1197 + if (!(len -= copy)) 13.1198 + return csum; 13.1199 + offset += copy; 13.1200 + pos += copy; 13.1201 + } 13.1202 + start = end; 13.1203 + } 13.1204 + 13.1205 + if (skb_shinfo(skb)->frag_list) { 13.1206 + struct sk_buff *list; 13.1207 + 13.1208 + for (list = skb_shinfo(skb)->frag_list; list; list=list->next) { 13.1209 + int end; 13.1210 + 13.1211 + BUG_TRAP(start <= offset+len); 13.1212 + 13.1213 + end = start + list->len; 13.1214 + if ((copy = end-offset) > 0) { 13.1215 + unsigned int csum2; 13.1216 + if (copy > len) 13.1217 + copy = len; 13.1218 + csum2 = skb_checksum(list, offset-start, copy, 0); 13.1219 + csum = csum_block_add(csum, csum2, pos); 13.1220 + if ((len -= copy) == 0) 13.1221 + return csum; 13.1222 + offset += copy; 13.1223 + pos += copy; 13.1224 + } 13.1225 + start = end; 13.1226 + } 13.1227 + } 13.1228 + if (len == 0) 13.1229 + return csum; 13.1230 + 13.1231 + BUG(); 13.1232 + return csum; 13.1233 +} 13.1234 + 13.1235 +/* Both of above in one bottle. */ 13.1236 + 13.1237 +unsigned int skb_copy_and_csum_bits(const struct sk_buff *skb, int offset, u8 *to, int len, unsigned int csum) 13.1238 +{ 13.1239 + int i, copy; 13.1240 + int start = skb->len - skb->data_len; 13.1241 + int pos = 0; 13.1242 + 13.1243 + /* Copy header. */ 13.1244 + if ((copy = start-offset) > 0) { 13.1245 + if (copy > len) 13.1246 + copy = len; 13.1247 + csum = csum_partial_copy_nocheck(skb->data+offset, to, copy, csum); 13.1248 + if ((len -= copy) == 0) 13.1249 + return csum; 13.1250 + offset += copy; 13.1251 + to += copy; 13.1252 + pos = copy; 13.1253 + } 13.1254 + 13.1255 + for (i=0; i<skb_shinfo(skb)->nr_frags; i++) { 13.1256 + int end; 13.1257 + 13.1258 + BUG_TRAP(start <= offset+len); 13.1259 + 13.1260 + end = start + skb_shinfo(skb)->frags[i].size; 13.1261 + if ((copy = end-offset) > 0) { 13.1262 + unsigned int csum2; 13.1263 + u8 *vaddr; 13.1264 + skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; 13.1265 + 13.1266 + if (copy > len) 13.1267 + copy = len; 13.1268 + vaddr = kmap_skb_frag(frag); 13.1269 + csum2 = csum_partial_copy_nocheck(vaddr + frag->page_offset + 13.1270 + offset-start, to, copy, 0); 13.1271 + kunmap_skb_frag(vaddr); 13.1272 + csum = csum_block_add(csum, csum2, pos); 13.1273 + if (!(len -= copy)) 13.1274 + return csum; 13.1275 + offset += copy; 13.1276 + to += copy; 13.1277 + pos += copy; 13.1278 + } 13.1279 + start = end; 13.1280 + } 13.1281 + 13.1282 + if (skb_shinfo(skb)->frag_list) { 13.1283 + struct sk_buff *list; 13.1284 + 13.1285 + for (list = skb_shinfo(skb)->frag_list; list; list=list->next) { 13.1286 + unsigned int csum2; 13.1287 + int end; 13.1288 + 13.1289 + BUG_TRAP(start <= offset+len); 13.1290 + 13.1291 + end = start + list->len; 13.1292 + if ((copy = end-offset) > 0) { 13.1293 + if (copy > len) 13.1294 + copy = len; 13.1295 + csum2 = skb_copy_and_csum_bits(list, offset-start, to, copy, 0); 13.1296 + csum = csum_block_add(csum, csum2, pos); 13.1297 + if ((len -= copy) == 0) 13.1298 + return csum; 13.1299 + offset += copy; 13.1300 + to += copy; 13.1301 + pos += copy; 13.1302 + } 13.1303 + start = end; 13.1304 + } 13.1305 + } 13.1306 + if (len == 0) 13.1307 + return csum; 13.1308 + 13.1309 + BUG(); 13.1310 + return csum; 13.1311 +} 13.1312 + 13.1313 +void skb_copy_and_csum_dev(const struct sk_buff *skb, u8 *to) 13.1314 +{ 13.1315 + unsigned int csum; 13.1316 + long csstart; 13.1317 + 13.1318 + if (skb->ip_summed == CHECKSUM_HW) 13.1319 + csstart = skb->h.raw - skb->data; 13.1320 + else 13.1321 + csstart = skb->len - skb->data_len; 13.1322 + 13.1323 + if (csstart > skb->len - skb->data_len) 13.1324 + BUG(); 13.1325 + 13.1326 + memcpy(to, skb->data, csstart); 13.1327 + 13.1328 + csum = 0; 13.1329 + if (csstart != skb->len) 13.1330 + csum = skb_copy_and_csum_bits(skb, csstart, to+csstart, 13.1331 + skb->len-csstart, 0); 13.1332 + 13.1333 + if (skb->ip_summed == CHECKSUM_HW) { 13.1334 + long csstuff = csstart + skb->csum; 13.1335 + 13.1336 + *((unsigned short *)(to + csstuff)) = csum_fold(csum); 13.1337 + } 13.1338 +} 13.1339 + 13.1340 +#if 0 13.1341 +/* 13.1342 + * Tune the memory allocator for a new MTU size. 13.1343 + */ 13.1344 +void skb_add_mtu(int mtu) 13.1345 +{ 13.1346 + /* Must match allocation in alloc_skb */ 13.1347 + mtu = SKB_DATA_ALIGN(mtu) + sizeof(struct skb_shared_info); 13.1348 + 13.1349 + kmem_add_cache_size(mtu); 13.1350 +} 13.1351 +#endif 13.1352 + 13.1353 +void __init skb_init(void) 13.1354 +{ 13.1355 + int i; 13.1356 + 13.1357 + skbuff_head_cache = kmem_cache_create("skbuff_head_cache", 13.1358 + sizeof(struct sk_buff), 13.1359 + 0, 13.1360 + SLAB_HWCACHE_ALIGN, 13.1361 + skb_headerinit, NULL); 13.1362 + if (!skbuff_head_cache) 13.1363 + panic("cannot create skbuff cache"); 13.1364 + 13.1365 + init_net_pages(NUM_NET_PAGES); 13.1366 + 13.1367 + for (i=0; i<NR_CPUS; i++) 13.1368 + skb_queue_head_init(&skb_head_pool[i].list); 13.1369 +}