ia64/xen-unstable

changeset 112:cb2688ed1a23

bitkeeper revision 1.15.1.12 (3e3bb848bQ7Yn0pGaQ3H5n8g3MYQWQ)

Merge boulderdash.cl.cam.ac.uk:/usr/groups/xeno/users/akw27/xeno
into boulderdash.cl.cam.ac.uk:/anfs/scratch/boulderdash/akw27/argh/xeno
author akw27@boulderdash.cl.cam.ac.uk
date Sat Feb 01 12:06:32 2003 +0000 (2003-02-01)
parents 033b3540eda0 de280362dfb0
children 82679de8a1ca
files .rootkeys xen-2.4.16/common/domain.c xen-2.4.16/common/memory.c xen-2.4.16/drivers/net/tulip/interrupt.c xen-2.4.16/include/asm-i386/pci.h xen-2.4.16/include/hypervisor-ifs/network.h xen-2.4.16/include/xeno/mm.h xen-2.4.16/include/xeno/skbuff.h xen-2.4.16/net/dev.c xen-2.4.16/net/eth.c xen-2.4.16/net/skbuff.c xenolinux-2.4.16-sparse/include/linux/skbuff.h xenolinux-2.4.16-sparse/net/core/skbuff.c
line diff
     1.1 --- a/.rootkeys	Tue Jan 28 16:13:04 2003 +0000
     1.2 +++ b/.rootkeys	Sat Feb 01 12:06:32 2003 +0000
     1.3 @@ -399,9 +399,11 @@ 3ddb79bb3cMSs_k2X5Oq2hOIBvmPYA xenolinux
     1.4  3ddb79ba2qYtIQAT_-vCFkkZUXu_UQ xenolinux-2.4.16-sparse/include/asm-xeno/user.h
     1.5  3ddb79bbqhb9X9qWOz5Bv4wOzrkITg xenolinux-2.4.16-sparse/include/asm-xeno/vga.h
     1.6  3ddb79bbA52x94o6uwDYsbzrH2hjzA xenolinux-2.4.16-sparse/include/asm-xeno/xor.h
     1.7 +3e37c39fVCSGQENtY6g7muaq_THliw xenolinux-2.4.16-sparse/include/linux/skbuff.h
     1.8  3ddb79bb_7YG4U75ZmEic9YXWTW7Vw xenolinux-2.4.16-sparse/include/linux/sunrpc/debug.h
     1.9  3ddb79bcxkVPfWlZ1PQKvDrfArzOVw xenolinux-2.4.16-sparse/kernel/panic.c
    1.10  3ddb79bbP31im-mx2NbfthSeqty1Dg xenolinux-2.4.16-sparse/mk
    1.11  3e15d52e0_j129JPvo7xfYGndVFpwQ xenolinux-2.4.16-sparse/mm/memory.c
    1.12  3e15d535DLvpzTrLRUIerB69LpJD1g xenolinux-2.4.16-sparse/mm/mremap.c
    1.13  3e15d531m1Y1_W8ki64AFOU_ua4C4w xenolinux-2.4.16-sparse/mm/swapfile.c
    1.14 +3e37c312QFuzIxXsuAgO6IRt3Tp96Q xenolinux-2.4.16-sparse/net/core/skbuff.c
     2.1 --- a/xen-2.4.16/common/domain.c	Tue Jan 28 16:13:04 2003 +0000
     2.2 +++ b/xen-2.4.16/common/domain.c	Sat Feb 01 12:06:32 2003 +0000
     2.3 @@ -334,10 +334,13 @@ static unsigned int alloc_new_dom_mem(st
     2.4      struct pfn_info *pf, *pf_head;
     2.5      unsigned int alloc_pfns;
     2.6      unsigned int req_pages;
     2.7 +    unsigned long flags;
     2.8  
     2.9      /* how many pages do we need to alloc? */
    2.10      req_pages = kbytes >> (PAGE_SHIFT - 10);
    2.11  
    2.12 +    spin_lock_irqsave(&free_list_lock, flags);
    2.13 +    
    2.14      /* is there enough mem to serve the request? */   
    2.15      if(req_pages > free_pfns)
    2.16          return -1;
    2.17 @@ -369,6 +372,8 @@ static unsigned int alloc_new_dom_mem(st
    2.18  
    2.19          free_pfns--;
    2.20      }
    2.21 +   
    2.22 +    spin_unlock_irqrestore(&free_list_lock, flags);
    2.23      
    2.24      p->tot_pages = req_pages;
    2.25  
     3.1 --- a/xen-2.4.16/common/memory.c	Tue Jan 28 16:13:04 2003 +0000
     3.2 +++ b/xen-2.4.16/common/memory.c	Sat Feb 01 12:06:32 2003 +0000
     3.3 @@ -206,6 +206,7 @@ unsigned long frame_table_size;
     3.4  unsigned long max_page;
     3.5  
     3.6  struct list_head free_list;
     3.7 +spinlock_t free_list_lock = SPIN_LOCK_UNLOCKED;
     3.8  unsigned int free_pfns;
     3.9  
    3.10  static int tlb_flush[NR_CPUS];
    3.11 @@ -219,6 +220,7 @@ void __init init_frametable(unsigned lon
    3.12  {
    3.13      struct pfn_info *pf;
    3.14      unsigned long page_index;
    3.15 +    unsigned long flags;
    3.16  
    3.17      memset(tlb_flush, 0, sizeof(tlb_flush));
    3.18  
    3.19 @@ -232,6 +234,7 @@ void __init init_frametable(unsigned lon
    3.20      memset(frame_table, 0, frame_table_size);
    3.21  
    3.22      /* Put all domain-allocatable memory on a free list. */
    3.23 +    spin_lock_irqsave(&free_list_lock, flags);
    3.24      INIT_LIST_HEAD(&free_list);
    3.25      for( page_index = (MAX_MONITOR_ADDRESS + frame_table_size) >> PAGE_SHIFT; 
    3.26           page_index < nr_pages; 
    3.27 @@ -240,6 +243,7 @@ void __init init_frametable(unsigned lon
    3.28          pf = list_entry(&frame_table[page_index].list, struct pfn_info, list);
    3.29          list_add_tail(&pf->list, &free_list);
    3.30      }
    3.31 +    spin_unlock_irqrestore(&free_list_lock, flags);
    3.32  }
    3.33  
    3.34  
     4.1 --- a/xen-2.4.16/drivers/net/tulip/interrupt.c	Tue Jan 28 16:13:04 2003 +0000
     4.2 +++ b/xen-2.4.16/drivers/net/tulip/interrupt.c	Sat Feb 01 12:06:32 2003 +0000
     4.3 @@ -170,8 +170,9 @@ static int tulip_rx(struct net_device *d
     4.4  #endif
     4.5  			/* Check if the packet is long enough to accept without copying
     4.6  			   to a minimally-sized skbuff. */
     4.7 -			if (pkt_len < tulip_rx_copybreak
     4.8 -				&& (skb = dev_alloc_skb(pkt_len + 2)) != NULL) {
     4.9 +			//if (pkt_len < tulip_rx_copybreak
    4.10 +			//	&& (skb = dev_alloc_skb(pkt_len + 2)) != NULL) {
    4.11 +                        if (0) {
    4.12  				skb->dev = dev;
    4.13  				skb_reserve(skb, 2);	/* 16 byte align the IP header */
    4.14  				pci_dma_sync_single(tp->pdev,
     5.1 --- a/xen-2.4.16/include/asm-i386/pci.h	Tue Jan 28 16:13:04 2003 +0000
     5.2 +++ b/xen-2.4.16/include/asm-i386/pci.h	Sat Feb 01 12:06:32 2003 +0000
     5.3 @@ -75,7 +75,19 @@ static inline dma_addr_t pci_map_single(
     5.4  	if (direction == PCI_DMA_NONE)
     5.5  		BUG();
     5.6  	flush_write_buffers();
     5.7 -	return virt_to_bus(ptr);
     5.8 +
     5.9 +        if ((unsigned long) ptr > PAGE_OFFSET)
    5.10 +	    return virt_to_bus(ptr);
    5.11 +
    5.12 +        /* If an address that is not in hypervisor VM is passed to this 
    5.13 +         * function (ie > PAGE_OFFSET) we assume that the passer knows 
    5.14 +         * what they are doing, and have passed a physical address that 
    5.15 +         * should not be converted here.  This is a little hackish, but 
    5.16 +         * is being added to allow references to domain memory in order 
    5.17 +         * to support zero-copy network code.
    5.18 +         */
    5.19 +        
    5.20 +        return (dma_addr_t) ptr;
    5.21  }
    5.22  
    5.23  /* Unmap a single streaming mode DMA translation.  The dma_addr and size
     6.1 --- a/xen-2.4.16/include/hypervisor-ifs/network.h	Tue Jan 28 16:13:04 2003 +0000
     6.2 +++ b/xen-2.4.16/include/hypervisor-ifs/network.h	Sat Feb 01 12:06:32 2003 +0000
     6.3 @@ -26,8 +26,8 @@ typedef struct rx_entry_st {
     6.4          int           status; /* per descriptor status. */
     6.5  } rx_entry_t;
     6.6  
     6.7 -#define TX_RING_SIZE 1024
     6.8 -#define RX_RING_SIZE 1024
     6.9 +#define TX_RING_SIZE 256
    6.10 +#define RX_RING_SIZE 256
    6.11  typedef struct net_ring_st {
    6.12      /*
    6.13       * Guest OS places packets into ring at tx_prod.
     7.1 --- a/xen-2.4.16/include/xeno/mm.h	Tue Jan 28 16:13:04 2003 +0000
     7.2 +++ b/xen-2.4.16/include/xeno/mm.h	Sat Feb 01 12:06:32 2003 +0000
     7.3 @@ -7,6 +7,7 @@
     7.4  #include <asm/desc.h>
     7.5  #include <xeno/list.h>
     7.6  #include <hypervisor-ifs/hypervisor-if.h>
     7.7 +#include <xeno/spinlock.h>
     7.8  
     7.9  /* XXX KAF: These may die eventually, but so many refs in slab.c :((( */
    7.10  
    7.11 @@ -110,6 +111,7 @@ typedef struct pfn_info {
    7.12  extern frame_table_t * frame_table;
    7.13  extern unsigned long frame_table_size;
    7.14  extern struct list_head free_list;
    7.15 +extern spinlock_t free_list_lock;
    7.16  extern unsigned int free_pfns;
    7.17  extern unsigned long max_page;
    7.18  void init_frametable(unsigned long nr_pages);
     8.1 --- a/xen-2.4.16/include/xeno/skbuff.h	Tue Jan 28 16:13:04 2003 +0000
     8.2 +++ b/xen-2.4.16/include/xeno/skbuff.h	Sat Feb 01 12:06:32 2003 +0000
     8.3 @@ -34,6 +34,10 @@
     8.4  #define VIF_DROP                -3
     8.5  #define VIF_ANY_INTERFACE       -4
     8.6  
     8.7 +//skb_type values:
     8.8 +#define SKB_NORMAL               0
     8.9 +#define SKB_ZERO_COPY            1
    8.10 +
    8.11  #define HAVE_ALLOC_SKB		/* For the drivers to know */
    8.12  #define HAVE_ALIGNABLE_SKB	/* Ditto 8)		   */
    8.13  #define SLAB_SKB 		/* Slabified skbuffs 	   */
    8.14 @@ -187,7 +191,7 @@ struct sk_buff {
    8.15   	unsigned int 	data_len;
    8.16  	unsigned int	csum;			/* Checksum 					*/
    8.17  	unsigned char 	__unused,		/* Dead field, may be reused			*/
    8.18 -			cloned, 		/* head may be cloned (check refcnt to be sure). */
    8.19 +			cloned, 		/* head may be cloned (check refcnt to be sure) */
    8.20    			pkt_type,		/* Packet class					*/
    8.21    			ip_summed;		/* Driver fed us an IP checksum			*/
    8.22  	__u32		priority;		/* Packet queueing priority			*/
    8.23 @@ -203,8 +207,12 @@ struct sk_buff {
    8.24  
    8.25  	void 		(*destructor)(struct sk_buff *);	/* Destruct function		*/
    8.26  
    8.27 -        int src_vif;                            /* vif we came from */
    8.28 -        int dst_vif;                            /* vif we are bound for */
    8.29 +        unsigned int    skb_type;               /* SKB_NORMAL or SKB_ZERO_COPY                  */
    8.30 +        struct pfn_info *pf;                    /* record of physical pf address for freeing    */
    8.31 +        int src_vif;                            /* vif we came from                             */
    8.32 +        int dst_vif;                            /* vif we are bound for                         */
    8.33 +        struct skb_shared_info shinfo;          /* shared info is no longer shared in Xen.      */
    8.34 +        
    8.35  
    8.36                  
    8.37          
    8.38 @@ -244,6 +252,7 @@ struct sk_buff {
    8.39  
    8.40  extern void			__kfree_skb(struct sk_buff *skb);
    8.41  extern struct sk_buff *		alloc_skb(unsigned int size, int priority);
    8.42 +extern struct sk_buff *         alloc_zc_skb(unsigned int size, int priority);
    8.43  extern void			kfree_skbmem(struct sk_buff *skb);
    8.44  extern struct sk_buff *		skb_clone(struct sk_buff *skb, int priority);
    8.45  extern struct sk_buff *		skb_copy(const struct sk_buff *skb, int priority);
    8.46 @@ -259,7 +268,8 @@ extern void	skb_over_panic(struct sk_buf
    8.47  extern void	skb_under_panic(struct sk_buff *skb, int len, void *here);
    8.48  
    8.49  /* Internal */
    8.50 -#define skb_shinfo(SKB)		((struct skb_shared_info *)((SKB)->end))
    8.51 +//#define skb_shinfo(SKB)		((struct skb_shared_info *)((SKB)->end))
    8.52 +#define skb_shinfo(SKB)     ((struct skb_shared_info *)(&(SKB)->shinfo))
    8.53  
    8.54  /**
    8.55   *	skb_queue_empty - check if a queue is empty
    8.56 @@ -1045,7 +1055,8 @@ static inline struct sk_buff *__dev_allo
    8.57  {
    8.58  	struct sk_buff *skb;
    8.59  
    8.60 -	skb = alloc_skb(length+16, gfp_mask);
    8.61 +	//skb = alloc_skb(length+16, gfp_mask);
    8.62 +        skb = alloc_zc_skb(length+16, gfp_mask);
    8.63  	if (skb)
    8.64  		skb_reserve(skb,16);
    8.65  	return skb;
     9.1 --- a/xen-2.4.16/net/dev.c	Tue Jan 28 16:13:04 2003 +0000
     9.2 +++ b/xen-2.4.16/net/dev.c	Sat Feb 01 12:06:32 2003 +0000
     9.3 @@ -30,6 +30,7 @@
     9.4  #include <linux/pkt_sched.h>
     9.5  
     9.6  #include <linux/event.h>
     9.7 +#include <asm/domain_page.h>
     9.8  
     9.9  #define BUG_TRAP ASSERT
    9.10  #define notifier_call_chain(_a,_b,_c) ((void)0)
    9.11 @@ -695,6 +696,21 @@ int netif_rx(struct sk_buff *skb)
    9.12  	if (skb->stamp.tv_sec == 0)
    9.13  		get_fast_time(&skb->stamp);
    9.14  
    9.15 +        /* Attempt to handle zero-copy packets here: */
    9.16 +        if (skb->skb_type == SKB_ZERO_COPY)
    9.17 +        {
    9.18 +                skb->head = (u8 *)map_domain_mem(((skb->pf - frame_table) << PAGE_SHIFT));
    9.19 +
    9.20 +                /* remapping this address really screws up all the skb pointers.  We need 
    9.21 +                 * to map them all here sufficiently to get the packet demultiplexed.
    9.22 +                 */
    9.23 +                
    9.24 +                skb->data = skb->head;
    9.25 +                skb_reserve(skb,16); // need to ensure that all the drivers and not just tulip do this.
    9.26 +                skb->mac.raw = skb->data;
    9.27 +                skb->data += ETH_HLEN;
    9.28 +        }
    9.29 +        
    9.30  	/* The code is rearranged so that the path is the most
    9.31  	   short when CPU is congested, but is still operating.
    9.32  	 */
    9.33 @@ -747,10 +763,18 @@ drop:
    9.34  	netdev_rx_stat[this_cpu].dropped++;
    9.35  	local_irq_restore(flags);
    9.36  
    9.37 +        if (skb->skb_type == SKB_ZERO_COPY)
    9.38 +                unmap_domain_mem(skb->head);
    9.39 +        
    9.40  	kfree_skb(skb);
    9.41  	return NET_RX_DROP;
    9.42  
    9.43  found:
    9.44 +        if (skb->skb_type == SKB_ZERO_COPY) {
    9.45 +                unmap_domain_mem(skb->head);
    9.46 +                //skb->head = (u8 *)((skb->pf - frame_table) << PAGE_SHIFT);
    9.47 +                skb->head = skb->data = skb->tail = (void *)0xdeadbeef;
    9.48 +        }
    9.49          hyp_event_notify(cpu_mask);
    9.50          local_irq_restore(flags);
    9.51          return 0;
    9.52 @@ -930,8 +954,28 @@ void flush_rx_queue(void)
    9.53                      rx = shadow_ring->rx_ring+i;
    9.54                      if ( (skb->len + ETH_HLEN) < rx->size )
    9.55                          rx->size = skb->len + ETH_HLEN;
    9.56 +
    9.57 +                    /* remap the packet again.  This is very temporary and will shortly be
    9.58 +                     * replaced with a page swizzle.
    9.59 +                     */
    9.60 +
    9.61 +                    if (skb->skb_type == SKB_ZERO_COPY)
    9.62 +                    {
    9.63 +                        skb->head = (u8 *)map_domain_mem(((skb->pf - frame_table) << PAGE_SHIFT));
    9.64 +                        skb->data = skb->head;
    9.65 +                        skb_reserve(skb,16); 
    9.66 +                        skb->mac.raw = skb->data;
    9.67 +                        skb->data += ETH_HLEN;
    9.68 +                    }
    9.69 +                                                                        
    9.70                      copy_to_user((void *)rx->addr, skb->mac.raw, rx->size);
    9.71                      copy_to_user(net_ring->rx_ring+i, rx, sizeof(rx));
    9.72 +                    
    9.73 +                    if (skb->skb_type == SKB_ZERO_COPY)
    9.74 +                    {
    9.75 +                        unmap_domain_mem(skb->head);
    9.76 +                        skb->head = skb->data = skb->tail = (void *)0xdeadbeef;
    9.77 +                    }
    9.78                  }
    9.79                  net_ring->rx_cons = (i+1) & (RX_RING_SIZE-1);
    9.80                  if ( net_ring->rx_cons == net_ring->rx_event )
    10.1 --- a/xen-2.4.16/net/eth.c	Tue Jan 28 16:13:04 2003 +0000
    10.2 +++ b/xen-2.4.16/net/eth.c	Sat Feb 01 12:06:32 2003 +0000
    10.3 @@ -161,52 +161,62 @@ unsigned short eth_type_trans(struct sk_
    10.4  	struct ethhdr *eth;
    10.5  	unsigned char *rawp;
    10.6  	
    10.7 -	skb->mac.raw=skb->data;
    10.8 -	skb_pull(skb,dev->hard_header_len);
    10.9 -	eth= skb->mac.ethernet;
   10.10 +        if (skb->skb_type == SKB_ZERO_COPY)
   10.11 +        {
   10.12 +            skb_pull(skb,dev->hard_header_len);
   10.13 +            skb->mac.raw= (void *)0xdeadbeef;
   10.14 +            return htons(ETH_P_802_2);
   10.15 +            
   10.16 +        } else { // SKB_NORMAL
   10.17 +        
   10.18 +	    skb->mac.raw=skb->data;
   10.19 +	    skb_pull(skb,dev->hard_header_len);
   10.20 +	    eth= skb->mac.ethernet;
   10.21  	
   10.22 -	if(*eth->h_dest&1)
   10.23 -	{
   10.24 -		if(memcmp(eth->h_dest,dev->broadcast, ETH_ALEN)==0)
   10.25 +	    if(*eth->h_dest&1)
   10.26 +	    {
   10.27 +	    	if(memcmp(eth->h_dest,dev->broadcast, ETH_ALEN)==0)
   10.28  			skb->pkt_type=PACKET_BROADCAST;
   10.29  		else
   10.30  			skb->pkt_type=PACKET_MULTICAST;
   10.31 -	}
   10.32 +	    }
   10.33  	
   10.34 -	/*
   10.35 -	 *	This ALLMULTI check should be redundant by 1.4
   10.36 -	 *	so don't forget to remove it.
   10.37 -	 *
   10.38 -	 *	Seems, you forgot to remove it. All silly devices
   10.39 -	 *	seems to set IFF_PROMISC.
   10.40 -	 */
   10.41 +	    /*
   10.42 +	    *	This ALLMULTI check should be redundant by 1.4
   10.43 +	    *	so don't forget to remove it.
   10.44 +	    *
   10.45 +	    *	Seems, you forgot to remove it. All silly devices
   10.46 +	    *	seems to set IFF_PROMISC.
   10.47 +	    */
   10.48  	 
   10.49 -	else if(1 /*dev->flags&IFF_PROMISC*/)
   10.50 -	{
   10.51 +	    else if(1 /*dev->flags&IFF_PROMISC*/)
   10.52 +	    {
   10.53  		if(memcmp(eth->h_dest,dev->dev_addr, ETH_ALEN))
   10.54  			skb->pkt_type=PACKET_OTHERHOST;
   10.55 -	}
   10.56 +	    }
   10.57  	
   10.58 -	if (ntohs(eth->h_proto) >= 1536)
   10.59 +	    if (ntohs(eth->h_proto) >= 1536)
   10.60  		return eth->h_proto;
   10.61  		
   10.62 -	rawp = skb->data;
   10.63 +	    rawp = skb->data;
   10.64  	
   10.65 -	/*
   10.66 -	 *	This is a magic hack to spot IPX packets. Older Novell breaks
   10.67 -	 *	the protocol design and runs IPX over 802.3 without an 802.2 LLC
   10.68 -	 *	layer. We look for FFFF which isn't a used 802.2 SSAP/DSAP. This
   10.69 -	 *	won't work for fault tolerant netware but does for the rest.
   10.70 -	 */
   10.71 -	if (*(unsigned short *)rawp == 0xFFFF)
   10.72 +	    /*
   10.73 +	    *	This is a magic hack to spot IPX packets. Older Novell breaks
   10.74 +	    *	the protocol design and runs IPX over 802.3 without an 802.2 LLC
   10.75 +	    *	layer. We look for FFFF which isn't a used 802.2 SSAP/DSAP. This
   10.76 +	    *	won't work for fault tolerant netware but does for the rest.
   10.77 +	    */
   10.78 +	    if (*(unsigned short *)rawp == 0xFFFF)
   10.79  		return htons(ETH_P_802_3);
   10.80  		
   10.81 -	/*
   10.82 -	 *	Real 802.2 LLC
   10.83 -	 */
   10.84 -	return htons(ETH_P_802_2);
   10.85 +	    /*
   10.86 +	    *	Real 802.2 LLC
   10.87 +	    */
   10.88 +	    return htons(ETH_P_802_2);
   10.89 +        }
   10.90  }
   10.91  
   10.92 +
   10.93  int eth_header_parse(struct sk_buff *skb, unsigned char *haddr)
   10.94  {
   10.95  	struct ethhdr *eth = skb->mac.ethernet;
    11.1 --- a/xen-2.4.16/net/skbuff.c	Tue Jan 28 16:13:04 2003 +0000
    11.2 +++ b/xen-2.4.16/net/skbuff.c	Sat Feb 01 12:06:32 2003 +0000
    11.3 @@ -149,6 +149,102 @@ static __inline__ void skb_head_to_pool(
    11.4  	kmem_cache_free(skbuff_head_cache, skb);
    11.5  }
    11.6  
    11.7 +static inline u8 *alloc_skb_data_page(struct sk_buff *skb)
    11.8 +{
    11.9 +        struct list_head *list_ptr;
   11.10 +        struct pfn_info  *pf;
   11.11 +        unsigned long flags;
   11.12 +        
   11.13 +        spin_lock_irqsave(&free_list_lock, flags);
   11.14 +
   11.15 +        if (!free_pfns) return NULL;
   11.16 +
   11.17 +        list_ptr = free_list.next;
   11.18 +        pf = list_entry(list_ptr, struct pfn_info, list);
   11.19 +        pf->flags = 0; // owned by dom0
   11.20 +        list_del(&pf->list);
   11.21 +        pf->next = pf->prev = (pf - frame_table);
   11.22 +        free_pfns--;
   11.23 +
   11.24 +        spin_unlock_irqrestore(&free_list_lock, flags);
   11.25 +
   11.26 +        skb->pf = pf;
   11.27 +        return (u8 *)((pf - frame_table) << PAGE_SHIFT);
   11.28 +}
   11.29 +
   11.30 +static inline void dealloc_skb_data_page(struct sk_buff *skb)
   11.31 +{
   11.32 +        struct pfn_info  *pf;
   11.33 +        unsigned long flags;
   11.34 +
   11.35 +        pf = skb->pf;
   11.36 +
   11.37 +        spin_lock_irqsave(&free_list_lock, flags);
   11.38 +
   11.39 +        list_add_tail(&pf->list, &free_list);
   11.40 +        free_pfns++;
   11.41 +
   11.42 +        spin_unlock_irqrestore(&free_list_lock, flags);
   11.43 +}
   11.44 +
   11.45 +struct sk_buff *alloc_zc_skb(unsigned int size,int gfp_mask)
   11.46 +{
   11.47 +        struct sk_buff *skb;
   11.48 +        u8 *data;
   11.49 +
   11.50 +        if (in_interrupt() && (gfp_mask & __GFP_WAIT)) {
   11.51 +                static int count = 0;
   11.52 +                if (++count < 5) {
   11.53 +                        printk(KERN_ERR "alloc_skb called nonatomically "
   11.54 +                               "from interrupt %p\n", NET_CALLER(size));
   11.55 +                        BUG();
   11.56 +                }
   11.57 +                gfp_mask &= ~__GFP_WAIT;
   11.58 +        }
   11.59 +
   11.60 +        /* Get the HEAD */
   11.61 +        skb = skb_head_from_pool();
   11.62 +        if (skb == NULL) {
   11.63 +                skb = kmem_cache_alloc(skbuff_head_cache, gfp_mask & ~__GFP_DMA);
   11.64 +                if (skb == NULL)
   11.65 +                        goto nohead;
   11.66 +        }
   11.67 +
   11.68 +        /* Get the DATA. Size must match skb_add_mtu(). */
   11.69 +        size = SKB_DATA_ALIGN(size);
   11.70 +        data = alloc_skb_data_page(skb);
   11.71 +        if (data == NULL)
   11.72 +                goto nodata;
   11.73 +
   11.74 +        /* XXX: does not include slab overhead */
   11.75 +        skb->truesize = size + sizeof(struct sk_buff);
   11.76 +
   11.77 +        /* Load the data pointers. */
   11.78 +        skb->head = data;
   11.79 +        skb->data = data;
   11.80 +        skb->tail = data;
   11.81 +        skb->end = data + size;
   11.82 +
   11.83 +        /* Set up other state */
   11.84 +        skb->len = 0;
   11.85 +        skb->cloned = 0;
   11.86 +        skb->data_len = 0;
   11.87 +        skb->src_vif = VIF_UNKNOWN_INTERFACE;
   11.88 +        skb->dst_vif = VIF_UNKNOWN_INTERFACE;
   11.89 +        skb->skb_type = SKB_ZERO_COPY;
   11.90 +
   11.91 +        atomic_set(&skb->users, 1);
   11.92 +        atomic_set(&(skb_shinfo(skb)->dataref), 1);
   11.93 +        skb_shinfo(skb)->nr_frags = 0;
   11.94 +        skb_shinfo(skb)->frag_list = NULL;
   11.95 +        return skb;
   11.96 +
   11.97 +nodata:
   11.98 +        skb_head_to_pool(skb);
   11.99 +nohead:
  11.100 +        return NULL;
  11.101 +}
  11.102 +
  11.103  
  11.104  /* 	Allocate a new skbuff. We do this ourselves so we can fill in a few
  11.105   *	'private' fields and also do memory statistics to find all the
  11.106 @@ -213,6 +309,7 @@ struct sk_buff *alloc_skb(unsigned int s
  11.107  	skb->data_len = 0;
  11.108          skb->src_vif = VIF_UNKNOWN_INTERFACE;
  11.109          skb->dst_vif = VIF_UNKNOWN_INTERFACE;
  11.110 +        skb->skb_type = SKB_NORMAL;
  11.111  
  11.112  	atomic_set(&skb->users, 1); 
  11.113  	atomic_set(&(skb_shinfo(skb)->dataref), 1);
  11.114 @@ -295,7 +392,13 @@ static void skb_release_data(struct sk_b
  11.115  		if (skb_shinfo(skb)->frag_list)
  11.116  			skb_drop_fraglist(skb);
  11.117  
  11.118 -		kfree(skb->head);
  11.119 +                if (skb->skb_type == SKB_NORMAL) {
  11.120 +		    kfree(skb->head);
  11.121 +                } else if (skb->skb_type == SKB_ZERO_COPY) {
  11.122 +                    dealloc_skb_data_page(skb);
  11.123 +                } else {
  11.124 +                    printk("skb_release_data called with unknown skb type!\n");
  11.125 +                }
  11.126  	}
  11.127  }
  11.128  
    12.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    12.2 +++ b/xenolinux-2.4.16-sparse/include/linux/skbuff.h	Sat Feb 01 12:06:32 2003 +0000
    12.3 @@ -0,0 +1,1185 @@
    12.4 +/*
    12.5 + *	Definitions for the 'struct sk_buff' memory handlers.
    12.6 + *
    12.7 + *	Authors:
    12.8 + *		Alan Cox, <gw4pts@gw4pts.ampr.org>
    12.9 + *		Florian La Roche, <rzsfl@rz.uni-sb.de>
   12.10 + *
   12.11 + *	This program is free software; you can redistribute it and/or
   12.12 + *	modify it under the terms of the GNU General Public License
   12.13 + *	as published by the Free Software Foundation; either version
   12.14 + *	2 of the License, or (at your option) any later version.
   12.15 + */
   12.16 + 
   12.17 +#ifndef _LINUX_SKBUFF_H
   12.18 +#define _LINUX_SKBUFF_H
   12.19 +
   12.20 +#include <linux/config.h>
   12.21 +#include <linux/kernel.h>
   12.22 +#include <linux/sched.h>
   12.23 +#include <linux/time.h>
   12.24 +#include <linux/cache.h>
   12.25 +
   12.26 +#include <asm/atomic.h>
   12.27 +#include <asm/types.h>
   12.28 +#include <linux/spinlock.h>
   12.29 +#include <linux/mm.h>
   12.30 +#include <linux/highmem.h>
   12.31 +
   12.32 +/* Zero Copy additions:
   12.33 + *
   12.34 + * (1) there are now two types of skb, as indicated by the skb_type field.
   12.35 + *     this is because, at least for the time being, there are two seperate types 
   12.36 + *     of memory that may be allocated to skb->data.
   12.37 + *
   12.38 + * (2) until discontiguous memory is fully supported, there will be a free list of pages
   12.39 + *     to be used by the net RX code.  This list will be allocated in the driver init code
   12.40 + *     but is declared here because the socket free code needs to return pages to it.
   12.41 + */
   12.42 +
   12.43 +// for skb->skb_type:
   12.44 +
   12.45 +#define SKB_NORMAL          0
   12.46 +#define SKB_ZERO_COPY       1
   12.47 +
   12.48 +#define NUM_NET_PAGES       9 // about 1Meg of buffers. (2^9)
   12.49 +struct net_page_info {
   12.50 +        struct list_head list;
   12.51 +        unsigned long   virt_addr;
   12.52 +        unsigned long   ppte;
   12.53 +};
   12.54 +
   12.55 +extern char *net_page_chunk;
   12.56 +extern struct net_page_info *net_page_table;
   12.57 +extern struct list_head net_page_list;
   12.58 +extern spinlock_t net_page_list_lock;
   12.59 +extern unsigned int net_pages;
   12.60 +
   12.61 +/* End zero copy additions */
   12.62 +
   12.63 +#define HAVE_ALLOC_SKB		/* For the drivers to know */
   12.64 +#define HAVE_ALIGNABLE_SKB	/* Ditto 8)		   */
   12.65 +#define SLAB_SKB 		/* Slabified skbuffs 	   */
   12.66 +
   12.67 +#define CHECKSUM_NONE 0
   12.68 +#define CHECKSUM_HW 1
   12.69 +#define CHECKSUM_UNNECESSARY 2
   12.70 +
   12.71 +#define SKB_DATA_ALIGN(X)	(((X) + (SMP_CACHE_BYTES-1)) & ~(SMP_CACHE_BYTES-1))
   12.72 +#define SKB_MAX_ORDER(X,ORDER)	(((PAGE_SIZE<<(ORDER)) - (X) - sizeof(struct skb_shared_info))&~(SMP_CACHE_BYTES-1))
   12.73 +#define SKB_MAX_HEAD(X)		(SKB_MAX_ORDER((X),0))
   12.74 +#define SKB_MAX_ALLOC		(SKB_MAX_ORDER(0,2))
   12.75 +
   12.76 +/* A. Checksumming of received packets by device.
   12.77 + *
   12.78 + *	NONE: device failed to checksum this packet.
   12.79 + *		skb->csum is undefined.
   12.80 + *
   12.81 + *	UNNECESSARY: device parsed packet and wouldbe verified checksum.
   12.82 + *		skb->csum is undefined.
   12.83 + *	      It is bad option, but, unfortunately, many of vendors do this.
   12.84 + *	      Apparently with secret goal to sell you new device, when you
   12.85 + *	      will add new protocol to your host. F.e. IPv6. 8)
   12.86 + *
   12.87 + *	HW: the most generic way. Device supplied checksum of _all_
   12.88 + *	    the packet as seen by netif_rx in skb->csum.
   12.89 + *	    NOTE: Even if device supports only some protocols, but
   12.90 + *	    is able to produce some skb->csum, it MUST use HW,
   12.91 + *	    not UNNECESSARY.
   12.92 + *
   12.93 + * B. Checksumming on output.
   12.94 + *
   12.95 + *	NONE: skb is checksummed by protocol or csum is not required.
   12.96 + *
   12.97 + *	HW: device is required to csum packet as seen by hard_start_xmit
   12.98 + *	from skb->h.raw to the end and to record the checksum
   12.99 + *	at skb->h.raw+skb->csum.
  12.100 + *
  12.101 + *	Device must show its capabilities in dev->features, set
  12.102 + *	at device setup time.
  12.103 + *	NETIF_F_HW_CSUM	- it is clever device, it is able to checksum
  12.104 + *			  everything.
  12.105 + *	NETIF_F_NO_CSUM - loopback or reliable single hop media.
  12.106 + *	NETIF_F_IP_CSUM - device is dumb. It is able to csum only
  12.107 + *			  TCP/UDP over IPv4. Sigh. Vendors like this
  12.108 + *			  way by an unknown reason. Though, see comment above
  12.109 + *			  about CHECKSUM_UNNECESSARY. 8)
  12.110 + *
  12.111 + *	Any questions? No questions, good. 		--ANK
  12.112 + */
  12.113 +
  12.114 +#ifdef __i386__
  12.115 +#define NET_CALLER(arg) (*(((void**)&arg)-1))
  12.116 +#else
  12.117 +#define NET_CALLER(arg) __builtin_return_address(0)
  12.118 +#endif
  12.119 +
  12.120 +#ifdef CONFIG_NETFILTER
  12.121 +struct nf_conntrack {
  12.122 +	atomic_t use;
  12.123 +	void (*destroy)(struct nf_conntrack *);
  12.124 +};
  12.125 +
  12.126 +struct nf_ct_info {
  12.127 +	struct nf_conntrack *master;
  12.128 +};
  12.129 +#endif
  12.130 +
  12.131 +struct sk_buff_head {
  12.132 +	/* These two members must be first. */
  12.133 +	struct sk_buff	* next;
  12.134 +	struct sk_buff	* prev;
  12.135 +
  12.136 +	__u32		qlen;
  12.137 +	spinlock_t	lock;
  12.138 +};
  12.139 +
  12.140 +struct sk_buff;
  12.141 +
  12.142 +#define MAX_SKB_FRAGS 6
  12.143 +
  12.144 +typedef struct skb_frag_struct skb_frag_t;
  12.145 +
  12.146 +struct skb_frag_struct
  12.147 +{
  12.148 +	struct page *page;
  12.149 +	__u16 page_offset;
  12.150 +	__u16 size;
  12.151 +};
  12.152 +
  12.153 +/* This data is invariant across clones and lives at
  12.154 + * the end of the header data, ie. at skb->end.
  12.155 + */
  12.156 +struct skb_shared_info {
  12.157 +	atomic_t	dataref;
  12.158 +	unsigned int	nr_frags;
  12.159 +	struct sk_buff	*frag_list;
  12.160 +	skb_frag_t	frags[MAX_SKB_FRAGS];
  12.161 +};
  12.162 +
  12.163 +struct sk_buff {
  12.164 +	/* These two members must be first. */
  12.165 +	struct sk_buff	* next;			/* Next buffer in list 				*/
  12.166 +	struct sk_buff	* prev;			/* Previous buffer in list 			*/
  12.167 +
  12.168 +	struct sk_buff_head * list;		/* List we are on				*/
  12.169 +	struct sock	*sk;			/* Socket we are owned by 			*/
  12.170 +	struct timeval	stamp;			/* Time we arrived				*/
  12.171 +	struct net_device	*dev;		/* Device we arrived on/are leaving by		*/
  12.172 +
  12.173 +	/* Transport layer header */
  12.174 +	union
  12.175 +	{
  12.176 +		struct tcphdr	*th;
  12.177 +		struct udphdr	*uh;
  12.178 +		struct icmphdr	*icmph;
  12.179 +		struct igmphdr	*igmph;
  12.180 +		struct iphdr	*ipiph;
  12.181 +		struct spxhdr	*spxh;
  12.182 +		unsigned char	*raw;
  12.183 +	} h;
  12.184 +
  12.185 +	/* Network layer header */
  12.186 +	union
  12.187 +	{
  12.188 +		struct iphdr	*iph;
  12.189 +		struct ipv6hdr	*ipv6h;
  12.190 +		struct arphdr	*arph;
  12.191 +		struct ipxhdr	*ipxh;
  12.192 +		unsigned char	*raw;
  12.193 +	} nh;
  12.194 +  
  12.195 +	/* Link layer header */
  12.196 +	union 
  12.197 +	{	
  12.198 +	  	struct ethhdr	*ethernet;
  12.199 +	  	unsigned char 	*raw;
  12.200 +	} mac;
  12.201 +
  12.202 +	struct  dst_entry *dst;
  12.203 +
  12.204 +	/* 
  12.205 +	 * This is the control buffer. It is free to use for every
  12.206 +	 * layer. Please put your private variables there. If you
  12.207 +	 * want to keep them across layers you have to do a skb_clone()
  12.208 +	 * first. This is owned by whoever has the skb queued ATM.
  12.209 +	 */ 
  12.210 +	char		cb[48];	 
  12.211 +
  12.212 +	unsigned int 	len;			/* Length of actual data			*/
  12.213 + 	unsigned int 	data_len;
  12.214 +	unsigned int	csum;			/* Checksum 					*/
  12.215 +	unsigned char 	__unused,		/* Dead field, may be reused			*/
  12.216 +			cloned, 		/* head may be cloned (check refcnt to be sure). */
  12.217 +  			pkt_type,		/* Packet class					*/
  12.218 +  			ip_summed;		/* Driver fed us an IP checksum			*/
  12.219 +	__u32		priority;		/* Packet queueing priority			*/
  12.220 +	atomic_t	users;			/* User count - see datagram.c,tcp.c 		*/
  12.221 +	unsigned short	protocol;		/* Packet protocol from driver. 		*/
  12.222 +	unsigned short	security;		/* Security level of packet			*/
  12.223 +	unsigned int	truesize;		/* Buffer size 					*/
  12.224 +
  12.225 +	unsigned char	*head;			/* Head of buffer 				*/
  12.226 +	unsigned char	*data;			/* Data head pointer				*/
  12.227 +	unsigned char	*tail;			/* Tail pointer					*/
  12.228 +	unsigned char 	*end;			/* End pointer					*/
  12.229 +
  12.230 +	void 		(*destructor)(struct sk_buff *);	/* Destruct function		*/
  12.231 +#ifdef CONFIG_NETFILTER
  12.232 +	/* Can be used for communication between hooks. */
  12.233 +        unsigned long	nfmark;
  12.234 +	/* Cache info */
  12.235 +	__u32		nfcache;
  12.236 +	/* Associated connection, if any */
  12.237 +	struct nf_ct_info *nfct;
  12.238 +#ifdef CONFIG_NETFILTER_DEBUG
  12.239 +        unsigned int nf_debug;
  12.240 +#endif
  12.241 +#endif /*CONFIG_NETFILTER*/
  12.242 +
  12.243 +#if defined(CONFIG_HIPPI)
  12.244 +	union{
  12.245 +		__u32	ifield;
  12.246 +	} private;
  12.247 +#endif
  12.248 +
  12.249 +#ifdef CONFIG_NET_SCHED
  12.250 +       __u32           tc_index;                /* traffic control index */
  12.251 +#endif
  12.252 +       unsigned int     skb_type;                /* for zero copy handling.                      */
  12.253 +       struct net_page_info *net_page;
  12.254 +};
  12.255 +
  12.256 +#define SK_WMEM_MAX	65535
  12.257 +#define SK_RMEM_MAX	65535
  12.258 +
  12.259 +#ifdef __KERNEL__
  12.260 +/*
  12.261 + *	Handling routines are only of interest to the kernel
  12.262 + */
  12.263 +#include <linux/slab.h>
  12.264 +
  12.265 +#include <asm/system.h>
  12.266 +
  12.267 +extern void			__kfree_skb(struct sk_buff *skb);
  12.268 +extern struct sk_buff *		alloc_skb(unsigned int size, int priority);
  12.269 +extern struct sk_buff *         alloc_zc_skb(unsigned int size, int priority);
  12.270 +extern void			kfree_skbmem(struct sk_buff *skb);
  12.271 +extern struct sk_buff *		skb_clone(struct sk_buff *skb, int priority);
  12.272 +extern struct sk_buff *		skb_copy(const struct sk_buff *skb, int priority);
  12.273 +extern struct sk_buff *		pskb_copy(struct sk_buff *skb, int gfp_mask);
  12.274 +extern int			pskb_expand_head(struct sk_buff *skb, int nhead, int ntail, int gfp_mask);
  12.275 +extern struct sk_buff *		skb_realloc_headroom(struct sk_buff *skb, unsigned int headroom);
  12.276 +extern struct sk_buff *		skb_copy_expand(const struct sk_buff *skb, 
  12.277 +						int newheadroom,
  12.278 +						int newtailroom,
  12.279 +						int priority);
  12.280 +#define dev_kfree_skb(a)	kfree_skb(a)
  12.281 +extern void	skb_over_panic(struct sk_buff *skb, int len, void *here);
  12.282 +extern void	skb_under_panic(struct sk_buff *skb, int len, void *here);
  12.283 +
  12.284 +/* Internal */
  12.285 +#define skb_shinfo(SKB)		((struct skb_shared_info *)((SKB)->end))
  12.286 +
  12.287 +/**
  12.288 + *	skb_queue_empty - check if a queue is empty
  12.289 + *	@list: queue head
  12.290 + *
  12.291 + *	Returns true if the queue is empty, false otherwise.
  12.292 + */
  12.293 + 
  12.294 +static inline int skb_queue_empty(struct sk_buff_head *list)
  12.295 +{
  12.296 +	return (list->next == (struct sk_buff *) list);
  12.297 +}
  12.298 +
  12.299 +/**
  12.300 + *	skb_get - reference buffer
  12.301 + *	@skb: buffer to reference
  12.302 + *
  12.303 + *	Makes another reference to a socket buffer and returns a pointer
  12.304 + *	to the buffer.
  12.305 + */
  12.306 + 
  12.307 +static inline struct sk_buff *skb_get(struct sk_buff *skb)
  12.308 +{
  12.309 +	atomic_inc(&skb->users);
  12.310 +	return skb;
  12.311 +}
  12.312 +
  12.313 +/*
  12.314 + * If users==1, we are the only owner and are can avoid redundant
  12.315 + * atomic change.
  12.316 + */
  12.317 + 
  12.318 +/**
  12.319 + *	kfree_skb - free an sk_buff
  12.320 + *	@skb: buffer to free
  12.321 + *
  12.322 + *	Drop a reference to the buffer and free it if the usage count has
  12.323 + *	hit zero.
  12.324 + */
  12.325 + 
  12.326 +static inline void kfree_skb(struct sk_buff *skb)
  12.327 +{
  12.328 +	if (atomic_read(&skb->users) == 1 || atomic_dec_and_test(&skb->users))
  12.329 +		__kfree_skb(skb);
  12.330 +}
  12.331 +
  12.332 +/* Use this if you didn't touch the skb state [for fast switching] */
  12.333 +static inline void kfree_skb_fast(struct sk_buff *skb)
  12.334 +{
  12.335 +	if (atomic_read(&skb->users) == 1 || atomic_dec_and_test(&skb->users))
  12.336 +		kfree_skbmem(skb);	
  12.337 +}
  12.338 +
  12.339 +/**
  12.340 + *	skb_cloned - is the buffer a clone
  12.341 + *	@skb: buffer to check
  12.342 + *
  12.343 + *	Returns true if the buffer was generated with skb_clone() and is
  12.344 + *	one of multiple shared copies of the buffer. Cloned buffers are
  12.345 + *	shared data so must not be written to under normal circumstances.
  12.346 + */
  12.347 +
  12.348 +static inline int skb_cloned(struct sk_buff *skb)
  12.349 +{
  12.350 +	return skb->cloned && atomic_read(&skb_shinfo(skb)->dataref) != 1;
  12.351 +}
  12.352 +
  12.353 +/**
  12.354 + *	skb_shared - is the buffer shared
  12.355 + *	@skb: buffer to check
  12.356 + *
  12.357 + *	Returns true if more than one person has a reference to this
  12.358 + *	buffer.
  12.359 + */
  12.360 + 
  12.361 +static inline int skb_shared(struct sk_buff *skb)
  12.362 +{
  12.363 +	return (atomic_read(&skb->users) != 1);
  12.364 +}
  12.365 +
  12.366 +/** 
  12.367 + *	skb_share_check - check if buffer is shared and if so clone it
  12.368 + *	@skb: buffer to check
  12.369 + *	@pri: priority for memory allocation
  12.370 + *	
  12.371 + *	If the buffer is shared the buffer is cloned and the old copy
  12.372 + *	drops a reference. A new clone with a single reference is returned.
  12.373 + *	If the buffer is not shared the original buffer is returned. When
  12.374 + *	being called from interrupt status or with spinlocks held pri must
  12.375 + *	be GFP_ATOMIC.
  12.376 + *
  12.377 + *	NULL is returned on a memory allocation failure.
  12.378 + */
  12.379 + 
  12.380 +static inline struct sk_buff *skb_share_check(struct sk_buff *skb, int pri)
  12.381 +{
  12.382 +	if (skb_shared(skb)) {
  12.383 +		struct sk_buff *nskb;
  12.384 +		nskb = skb_clone(skb, pri);
  12.385 +		kfree_skb(skb);
  12.386 +		return nskb;
  12.387 +	}
  12.388 +	return skb;
  12.389 +}
  12.390 +
  12.391 +
  12.392 +/*
  12.393 + *	Copy shared buffers into a new sk_buff. We effectively do COW on
  12.394 + *	packets to handle cases where we have a local reader and forward
  12.395 + *	and a couple of other messy ones. The normal one is tcpdumping
  12.396 + *	a packet thats being forwarded.
  12.397 + */
  12.398 + 
  12.399 +/**
  12.400 + *	skb_unshare - make a copy of a shared buffer
  12.401 + *	@skb: buffer to check
  12.402 + *	@pri: priority for memory allocation
  12.403 + *
  12.404 + *	If the socket buffer is a clone then this function creates a new
  12.405 + *	copy of the data, drops a reference count on the old copy and returns
  12.406 + *	the new copy with the reference count at 1. If the buffer is not a clone
  12.407 + *	the original buffer is returned. When called with a spinlock held or
  12.408 + *	from interrupt state @pri must be %GFP_ATOMIC
  12.409 + *
  12.410 + *	%NULL is returned on a memory allocation failure.
  12.411 + */
  12.412 + 
  12.413 +static inline struct sk_buff *skb_unshare(struct sk_buff *skb, int pri)
  12.414 +{
  12.415 +	struct sk_buff *nskb;
  12.416 +	if(!skb_cloned(skb))
  12.417 +		return skb;
  12.418 +	nskb=skb_copy(skb, pri);
  12.419 +	kfree_skb(skb);		/* Free our shared copy */
  12.420 +	return nskb;
  12.421 +}
  12.422 +
  12.423 +/**
  12.424 + *	skb_peek
  12.425 + *	@list_: list to peek at
  12.426 + *
  12.427 + *	Peek an &sk_buff. Unlike most other operations you _MUST_
  12.428 + *	be careful with this one. A peek leaves the buffer on the
  12.429 + *	list and someone else may run off with it. You must hold
  12.430 + *	the appropriate locks or have a private queue to do this.
  12.431 + *
  12.432 + *	Returns %NULL for an empty list or a pointer to the head element.
  12.433 + *	The reference count is not incremented and the reference is therefore
  12.434 + *	volatile. Use with caution.
  12.435 + */
  12.436 + 
  12.437 +static inline struct sk_buff *skb_peek(struct sk_buff_head *list_)
  12.438 +{
  12.439 +	struct sk_buff *list = ((struct sk_buff *)list_)->next;
  12.440 +	if (list == (struct sk_buff *)list_)
  12.441 +		list = NULL;
  12.442 +	return list;
  12.443 +}
  12.444 +
  12.445 +/**
  12.446 + *	skb_peek_tail
  12.447 + *	@list_: list to peek at
  12.448 + *
  12.449 + *	Peek an &sk_buff. Unlike most other operations you _MUST_
  12.450 + *	be careful with this one. A peek leaves the buffer on the
  12.451 + *	list and someone else may run off with it. You must hold
  12.452 + *	the appropriate locks or have a private queue to do this.
  12.453 + *
  12.454 + *	Returns %NULL for an empty list or a pointer to the tail element.
  12.455 + *	The reference count is not incremented and the reference is therefore
  12.456 + *	volatile. Use with caution.
  12.457 + */
  12.458 +
  12.459 +static inline struct sk_buff *skb_peek_tail(struct sk_buff_head *list_)
  12.460 +{
  12.461 +	struct sk_buff *list = ((struct sk_buff *)list_)->prev;
  12.462 +	if (list == (struct sk_buff *)list_)
  12.463 +		list = NULL;
  12.464 +	return list;
  12.465 +}
  12.466 +
  12.467 +/**
  12.468 + *	skb_queue_len	- get queue length
  12.469 + *	@list_: list to measure
  12.470 + *
  12.471 + *	Return the length of an &sk_buff queue. 
  12.472 + */
  12.473 + 
  12.474 +static inline __u32 skb_queue_len(struct sk_buff_head *list_)
  12.475 +{
  12.476 +	return(list_->qlen);
  12.477 +}
  12.478 +
  12.479 +static inline void skb_queue_head_init(struct sk_buff_head *list)
  12.480 +{
  12.481 +	spin_lock_init(&list->lock);
  12.482 +	list->prev = (struct sk_buff *)list;
  12.483 +	list->next = (struct sk_buff *)list;
  12.484 +	list->qlen = 0;
  12.485 +}
  12.486 +
  12.487 +/*
  12.488 + *	Insert an sk_buff at the start of a list.
  12.489 + *
  12.490 + *	The "__skb_xxxx()" functions are the non-atomic ones that
  12.491 + *	can only be called with interrupts disabled.
  12.492 + */
  12.493 +
  12.494 +/**
  12.495 + *	__skb_queue_head - queue a buffer at the list head
  12.496 + *	@list: list to use
  12.497 + *	@newsk: buffer to queue
  12.498 + *
  12.499 + *	Queue a buffer at the start of a list. This function takes no locks
  12.500 + *	and you must therefore hold required locks before calling it.
  12.501 + *
  12.502 + *	A buffer cannot be placed on two lists at the same time.
  12.503 + */	
  12.504 + 
  12.505 +static inline void __skb_queue_head(struct sk_buff_head *list, struct sk_buff *newsk)
  12.506 +{
  12.507 +	struct sk_buff *prev, *next;
  12.508 +
  12.509 +	newsk->list = list;
  12.510 +	list->qlen++;
  12.511 +	prev = (struct sk_buff *)list;
  12.512 +	next = prev->next;
  12.513 +	newsk->next = next;
  12.514 +	newsk->prev = prev;
  12.515 +	next->prev = newsk;
  12.516 +	prev->next = newsk;
  12.517 +}
  12.518 +
  12.519 +
  12.520 +/**
  12.521 + *	skb_queue_head - queue a buffer at the list head
  12.522 + *	@list: list to use
  12.523 + *	@newsk: buffer to queue
  12.524 + *
  12.525 + *	Queue a buffer at the start of the list. This function takes the
  12.526 + *	list lock and can be used safely with other locking &sk_buff functions
  12.527 + *	safely.
  12.528 + *
  12.529 + *	A buffer cannot be placed on two lists at the same time.
  12.530 + */	
  12.531 +
  12.532 +static inline void skb_queue_head(struct sk_buff_head *list, struct sk_buff *newsk)
  12.533 +{
  12.534 +	unsigned long flags;
  12.535 +
  12.536 +	spin_lock_irqsave(&list->lock, flags);
  12.537 +	__skb_queue_head(list, newsk);
  12.538 +	spin_unlock_irqrestore(&list->lock, flags);
  12.539 +}
  12.540 +
  12.541 +/**
  12.542 + *	__skb_queue_tail - queue a buffer at the list tail
  12.543 + *	@list: list to use
  12.544 + *	@newsk: buffer to queue
  12.545 + *
  12.546 + *	Queue a buffer at the end of a list. This function takes no locks
  12.547 + *	and you must therefore hold required locks before calling it.
  12.548 + *
  12.549 + *	A buffer cannot be placed on two lists at the same time.
  12.550 + */	
  12.551 + 
  12.552 +
  12.553 +static inline void __skb_queue_tail(struct sk_buff_head *list, struct sk_buff *newsk)
  12.554 +{
  12.555 +	struct sk_buff *prev, *next;
  12.556 +
  12.557 +	newsk->list = list;
  12.558 +	list->qlen++;
  12.559 +	next = (struct sk_buff *)list;
  12.560 +	prev = next->prev;
  12.561 +	newsk->next = next;
  12.562 +	newsk->prev = prev;
  12.563 +	next->prev = newsk;
  12.564 +	prev->next = newsk;
  12.565 +}
  12.566 +
  12.567 +/**
  12.568 + *	skb_queue_tail - queue a buffer at the list tail
  12.569 + *	@list: list to use
  12.570 + *	@newsk: buffer to queue
  12.571 + *
  12.572 + *	Queue a buffer at the tail of the list. This function takes the
  12.573 + *	list lock and can be used safely with other locking &sk_buff functions
  12.574 + *	safely.
  12.575 + *
  12.576 + *	A buffer cannot be placed on two lists at the same time.
  12.577 + */	
  12.578 +
  12.579 +static inline void skb_queue_tail(struct sk_buff_head *list, struct sk_buff *newsk)
  12.580 +{
  12.581 +	unsigned long flags;
  12.582 +
  12.583 +	spin_lock_irqsave(&list->lock, flags);
  12.584 +	__skb_queue_tail(list, newsk);
  12.585 +	spin_unlock_irqrestore(&list->lock, flags);
  12.586 +}
  12.587 +
  12.588 +/**
  12.589 + *	__skb_dequeue - remove from the head of the queue
  12.590 + *	@list: list to dequeue from
  12.591 + *
  12.592 + *	Remove the head of the list. This function does not take any locks
  12.593 + *	so must be used with appropriate locks held only. The head item is
  12.594 + *	returned or %NULL if the list is empty.
  12.595 + */
  12.596 +
  12.597 +static inline struct sk_buff *__skb_dequeue(struct sk_buff_head *list)
  12.598 +{
  12.599 +	struct sk_buff *next, *prev, *result;
  12.600 +
  12.601 +	prev = (struct sk_buff *) list;
  12.602 +	next = prev->next;
  12.603 +	result = NULL;
  12.604 +	if (next != prev) {
  12.605 +		result = next;
  12.606 +		next = next->next;
  12.607 +		list->qlen--;
  12.608 +		next->prev = prev;
  12.609 +		prev->next = next;
  12.610 +		result->next = NULL;
  12.611 +		result->prev = NULL;
  12.612 +		result->list = NULL;
  12.613 +	}
  12.614 +	return result;
  12.615 +}
  12.616 +
  12.617 +/**
  12.618 + *	skb_dequeue - remove from the head of the queue
  12.619 + *	@list: list to dequeue from
  12.620 + *
  12.621 + *	Remove the head of the list. The list lock is taken so the function
  12.622 + *	may be used safely with other locking list functions. The head item is
  12.623 + *	returned or %NULL if the list is empty.
  12.624 + */
  12.625 +
  12.626 +static inline struct sk_buff *skb_dequeue(struct sk_buff_head *list)
  12.627 +{
  12.628 +	long flags;
  12.629 +	struct sk_buff *result;
  12.630 +
  12.631 +	spin_lock_irqsave(&list->lock, flags);
  12.632 +	result = __skb_dequeue(list);
  12.633 +	spin_unlock_irqrestore(&list->lock, flags);
  12.634 +	return result;
  12.635 +}
  12.636 +
  12.637 +/*
  12.638 + *	Insert a packet on a list.
  12.639 + */
  12.640 +
  12.641 +static inline void __skb_insert(struct sk_buff *newsk,
  12.642 +	struct sk_buff * prev, struct sk_buff *next,
  12.643 +	struct sk_buff_head * list)
  12.644 +{
  12.645 +	newsk->next = next;
  12.646 +	newsk->prev = prev;
  12.647 +	next->prev = newsk;
  12.648 +	prev->next = newsk;
  12.649 +	newsk->list = list;
  12.650 +	list->qlen++;
  12.651 +}
  12.652 +
  12.653 +/**
  12.654 + *	skb_insert	-	insert a buffer
  12.655 + *	@old: buffer to insert before
  12.656 + *	@newsk: buffer to insert
  12.657 + *
  12.658 + *	Place a packet before a given packet in a list. The list locks are taken
  12.659 + *	and this function is atomic with respect to other list locked calls
  12.660 + *	A buffer cannot be placed on two lists at the same time.
  12.661 + */
  12.662 +
  12.663 +static inline void skb_insert(struct sk_buff *old, struct sk_buff *newsk)
  12.664 +{
  12.665 +	unsigned long flags;
  12.666 +
  12.667 +	spin_lock_irqsave(&old->list->lock, flags);
  12.668 +	__skb_insert(newsk, old->prev, old, old->list);
  12.669 +	spin_unlock_irqrestore(&old->list->lock, flags);
  12.670 +}
  12.671 +
  12.672 +/*
  12.673 + *	Place a packet after a given packet in a list.
  12.674 + */
  12.675 +
  12.676 +static inline void __skb_append(struct sk_buff *old, struct sk_buff *newsk)
  12.677 +{
  12.678 +	__skb_insert(newsk, old, old->next, old->list);
  12.679 +}
  12.680 +
  12.681 +/**
  12.682 + *	skb_append	-	append a buffer
  12.683 + *	@old: buffer to insert after
  12.684 + *	@newsk: buffer to insert
  12.685 + *
  12.686 + *	Place a packet after a given packet in a list. The list locks are taken
  12.687 + *	and this function is atomic with respect to other list locked calls.
  12.688 + *	A buffer cannot be placed on two lists at the same time.
  12.689 + */
  12.690 +
  12.691 +
  12.692 +static inline void skb_append(struct sk_buff *old, struct sk_buff *newsk)
  12.693 +{
  12.694 +	unsigned long flags;
  12.695 +
  12.696 +	spin_lock_irqsave(&old->list->lock, flags);
  12.697 +	__skb_append(old, newsk);
  12.698 +	spin_unlock_irqrestore(&old->list->lock, flags);
  12.699 +}
  12.700 +
  12.701 +/*
  12.702 + * remove sk_buff from list. _Must_ be called atomically, and with
  12.703 + * the list known..
  12.704 + */
  12.705 + 
  12.706 +static inline void __skb_unlink(struct sk_buff *skb, struct sk_buff_head *list)
  12.707 +{
  12.708 +	struct sk_buff * next, * prev;
  12.709 +
  12.710 +	list->qlen--;
  12.711 +	next = skb->next;
  12.712 +	prev = skb->prev;
  12.713 +	skb->next = NULL;
  12.714 +	skb->prev = NULL;
  12.715 +	skb->list = NULL;
  12.716 +	next->prev = prev;
  12.717 +	prev->next = next;
  12.718 +}
  12.719 +
  12.720 +/**
  12.721 + *	skb_unlink	-	remove a buffer from a list
  12.722 + *	@skb: buffer to remove
  12.723 + *
  12.724 + *	Place a packet after a given packet in a list. The list locks are taken
  12.725 + *	and this function is atomic with respect to other list locked calls
  12.726 + *	
  12.727 + *	Works even without knowing the list it is sitting on, which can be 
  12.728 + *	handy at times. It also means that THE LIST MUST EXIST when you 
  12.729 + *	unlink. Thus a list must have its contents unlinked before it is
  12.730 + *	destroyed.
  12.731 + */
  12.732 +
  12.733 +static inline void skb_unlink(struct sk_buff *skb)
  12.734 +{
  12.735 +	struct sk_buff_head *list = skb->list;
  12.736 +
  12.737 +	if(list) {
  12.738 +		unsigned long flags;
  12.739 +
  12.740 +		spin_lock_irqsave(&list->lock, flags);
  12.741 +		if(skb->list == list)
  12.742 +			__skb_unlink(skb, skb->list);
  12.743 +		spin_unlock_irqrestore(&list->lock, flags);
  12.744 +	}
  12.745 +}
  12.746 +
  12.747 +/* XXX: more streamlined implementation */
  12.748 +
  12.749 +/**
  12.750 + *	__skb_dequeue_tail - remove from the tail of the queue
  12.751 + *	@list: list to dequeue from
  12.752 + *
  12.753 + *	Remove the tail of the list. This function does not take any locks
  12.754 + *	so must be used with appropriate locks held only. The tail item is
  12.755 + *	returned or %NULL if the list is empty.
  12.756 + */
  12.757 +
  12.758 +static inline struct sk_buff *__skb_dequeue_tail(struct sk_buff_head *list)
  12.759 +{
  12.760 +	struct sk_buff *skb = skb_peek_tail(list); 
  12.761 +	if (skb)
  12.762 +		__skb_unlink(skb, list);
  12.763 +	return skb;
  12.764 +}
  12.765 +
  12.766 +/**
  12.767 + *	skb_dequeue - remove from the head of the queue
  12.768 + *	@list: list to dequeue from
  12.769 + *
  12.770 + *	Remove the head of the list. The list lock is taken so the function
  12.771 + *	may be used safely with other locking list functions. The tail item is
  12.772 + *	returned or %NULL if the list is empty.
  12.773 + */
  12.774 +
  12.775 +static inline struct sk_buff *skb_dequeue_tail(struct sk_buff_head *list)
  12.776 +{
  12.777 +	long flags;
  12.778 +	struct sk_buff *result;
  12.779 +
  12.780 +	spin_lock_irqsave(&list->lock, flags);
  12.781 +	result = __skb_dequeue_tail(list);
  12.782 +	spin_unlock_irqrestore(&list->lock, flags);
  12.783 +	return result;
  12.784 +}
  12.785 +
  12.786 +static inline int skb_is_nonlinear(const struct sk_buff *skb)
  12.787 +{
  12.788 +	return skb->data_len;
  12.789 +}
  12.790 +
  12.791 +static inline int skb_headlen(const struct sk_buff *skb)
  12.792 +{
  12.793 +	return skb->len - skb->data_len;
  12.794 +}
  12.795 +
  12.796 +#define SKB_PAGE_ASSERT(skb) do { if (skb_shinfo(skb)->nr_frags) BUG(); } while (0)
  12.797 +#define SKB_FRAG_ASSERT(skb) do { if (skb_shinfo(skb)->frag_list) BUG(); } while (0)
  12.798 +#define SKB_LINEAR_ASSERT(skb) do { if (skb_is_nonlinear(skb)) BUG(); } while (0)
  12.799 +
  12.800 +/*
  12.801 + *	Add data to an sk_buff
  12.802 + */
  12.803 + 
  12.804 +static inline unsigned char *__skb_put(struct sk_buff *skb, unsigned int len)
  12.805 +{
  12.806 +	unsigned char *tmp=skb->tail;
  12.807 +	SKB_LINEAR_ASSERT(skb);
  12.808 +	skb->tail+=len;
  12.809 +	skb->len+=len;
  12.810 +	return tmp;
  12.811 +}
  12.812 +
  12.813 +/**
  12.814 + *	skb_put - add data to a buffer
  12.815 + *	@skb: buffer to use 
  12.816 + *	@len: amount of data to add
  12.817 + *
  12.818 + *	This function extends the used data area of the buffer. If this would
  12.819 + *	exceed the total buffer size the kernel will panic. A pointer to the
  12.820 + *	first byte of the extra data is returned.
  12.821 + */
  12.822 + 
  12.823 +static inline unsigned char *skb_put(struct sk_buff *skb, unsigned int len)
  12.824 +{
  12.825 +	unsigned char *tmp=skb->tail;
  12.826 +	SKB_LINEAR_ASSERT(skb);
  12.827 +	skb->tail+=len;
  12.828 +	skb->len+=len;
  12.829 +	if(skb->tail>skb->end) {
  12.830 +		skb_over_panic(skb, len, current_text_addr());
  12.831 +	}
  12.832 +	return tmp;
  12.833 +}
  12.834 +
  12.835 +static inline unsigned char *__skb_push(struct sk_buff *skb, unsigned int len)
  12.836 +{
  12.837 +	skb->data-=len;
  12.838 +	skb->len+=len;
  12.839 +	return skb->data;
  12.840 +}
  12.841 +
  12.842 +/**
  12.843 + *	skb_push - add data to the start of a buffer
  12.844 + *	@skb: buffer to use 
  12.845 + *	@len: amount of data to add
  12.846 + *
  12.847 + *	This function extends the used data area of the buffer at the buffer
  12.848 + *	start. If this would exceed the total buffer headroom the kernel will
  12.849 + *	panic. A pointer to the first byte of the extra data is returned.
  12.850 + */
  12.851 +
  12.852 +static inline unsigned char *skb_push(struct sk_buff *skb, unsigned int len)
  12.853 +{
  12.854 +	skb->data-=len;
  12.855 +	skb->len+=len;
  12.856 +	if(skb->data<skb->head) {
  12.857 +		skb_under_panic(skb, len, current_text_addr());
  12.858 +	}
  12.859 +	return skb->data;
  12.860 +}
  12.861 +
  12.862 +static inline char *__skb_pull(struct sk_buff *skb, unsigned int len)
  12.863 +{
  12.864 +	skb->len-=len;
  12.865 +	if (skb->len < skb->data_len)
  12.866 +		BUG();
  12.867 +	return 	skb->data+=len;
  12.868 +}
  12.869 +
  12.870 +/**
  12.871 + *	skb_pull - remove data from the start of a buffer
  12.872 + *	@skb: buffer to use 
  12.873 + *	@len: amount of data to remove
  12.874 + *
  12.875 + *	This function removes data from the start of a buffer, returning
  12.876 + *	the memory to the headroom. A pointer to the next data in the buffer
  12.877 + *	is returned. Once the data has been pulled future pushes will overwrite
  12.878 + *	the old data.
  12.879 + */
  12.880 +
  12.881 +static inline unsigned char * skb_pull(struct sk_buff *skb, unsigned int len)
  12.882 +{	
  12.883 +	if (len > skb->len)
  12.884 +		return NULL;
  12.885 +	return __skb_pull(skb,len);
  12.886 +}
  12.887 +
  12.888 +extern unsigned char * __pskb_pull_tail(struct sk_buff *skb, int delta);
  12.889 +
  12.890 +static inline char *__pskb_pull(struct sk_buff *skb, unsigned int len)
  12.891 +{
  12.892 +	if (len > skb_headlen(skb) &&
  12.893 +	    __pskb_pull_tail(skb, len-skb_headlen(skb)) == NULL)
  12.894 +		return NULL;
  12.895 +	skb->len -= len;
  12.896 +	return 	skb->data += len;
  12.897 +}
  12.898 +
  12.899 +static inline unsigned char * pskb_pull(struct sk_buff *skb, unsigned int len)
  12.900 +{	
  12.901 +	if (len > skb->len)
  12.902 +		return NULL;
  12.903 +	return __pskb_pull(skb,len);
  12.904 +}
  12.905 +
  12.906 +static inline int pskb_may_pull(struct sk_buff *skb, unsigned int len)
  12.907 +{
  12.908 +	if (len <= skb_headlen(skb))
  12.909 +		return 1;
  12.910 +	if (len > skb->len)
  12.911 +		return 0;
  12.912 +	return (__pskb_pull_tail(skb, len-skb_headlen(skb)) != NULL);
  12.913 +}
  12.914 +
  12.915 +/**
  12.916 + *	skb_headroom - bytes at buffer head
  12.917 + *	@skb: buffer to check
  12.918 + *
  12.919 + *	Return the number of bytes of free space at the head of an &sk_buff.
  12.920 + */
  12.921 + 
  12.922 +static inline int skb_headroom(const struct sk_buff *skb)
  12.923 +{
  12.924 +	return skb->data-skb->head;
  12.925 +}
  12.926 +
  12.927 +/**
  12.928 + *	skb_tailroom - bytes at buffer end
  12.929 + *	@skb: buffer to check
  12.930 + *
  12.931 + *	Return the number of bytes of free space at the tail of an sk_buff
  12.932 + */
  12.933 +
  12.934 +static inline int skb_tailroom(const struct sk_buff *skb)
  12.935 +{
  12.936 +	return skb_is_nonlinear(skb) ? 0 : skb->end-skb->tail;
  12.937 +}
  12.938 +
  12.939 +/**
  12.940 + *	skb_reserve - adjust headroom
  12.941 + *	@skb: buffer to alter
  12.942 + *	@len: bytes to move
  12.943 + *
  12.944 + *	Increase the headroom of an empty &sk_buff by reducing the tail
  12.945 + *	room. This is only allowed for an empty buffer.
  12.946 + */
  12.947 +
  12.948 +static inline void skb_reserve(struct sk_buff *skb, unsigned int len)
  12.949 +{
  12.950 +	skb->data+=len;
  12.951 +	skb->tail+=len;
  12.952 +}
  12.953 +
  12.954 +extern int ___pskb_trim(struct sk_buff *skb, unsigned int len, int realloc);
  12.955 +
  12.956 +static inline void __skb_trim(struct sk_buff *skb, unsigned int len)
  12.957 +{
  12.958 +	if (!skb->data_len) {
  12.959 +		skb->len = len;
  12.960 +		skb->tail = skb->data+len;
  12.961 +	} else {
  12.962 +		___pskb_trim(skb, len, 0);
  12.963 +	}
  12.964 +}
  12.965 +
  12.966 +/**
  12.967 + *	skb_trim - remove end from a buffer
  12.968 + *	@skb: buffer to alter
  12.969 + *	@len: new length
  12.970 + *
  12.971 + *	Cut the length of a buffer down by removing data from the tail. If
  12.972 + *	the buffer is already under the length specified it is not modified.
  12.973 + */
  12.974 +
  12.975 +static inline void skb_trim(struct sk_buff *skb, unsigned int len)
  12.976 +{
  12.977 +	if (skb->len > len) {
  12.978 +		__skb_trim(skb, len);
  12.979 +	}
  12.980 +}
  12.981 +
  12.982 +
  12.983 +static inline int __pskb_trim(struct sk_buff *skb, unsigned int len)
  12.984 +{
  12.985 +	if (!skb->data_len) {
  12.986 +		skb->len = len;
  12.987 +		skb->tail = skb->data+len;
  12.988 +		return 0;
  12.989 +	} else {
  12.990 +		return ___pskb_trim(skb, len, 1);
  12.991 +	}
  12.992 +}
  12.993 +
  12.994 +static inline int pskb_trim(struct sk_buff *skb, unsigned int len)
  12.995 +{
  12.996 +	if (len < skb->len)
  12.997 +		return __pskb_trim(skb, len);
  12.998 +	return 0;
  12.999 +}
 12.1000 +
 12.1001 +/**
 12.1002 + *	skb_orphan - orphan a buffer
 12.1003 + *	@skb: buffer to orphan
 12.1004 + *
 12.1005 + *	If a buffer currently has an owner then we call the owner's
 12.1006 + *	destructor function and make the @skb unowned. The buffer continues
 12.1007 + *	to exist but is no longer charged to its former owner.
 12.1008 + */
 12.1009 +
 12.1010 +
 12.1011 +static inline void skb_orphan(struct sk_buff *skb)
 12.1012 +{
 12.1013 +	if (skb->destructor)
 12.1014 +		skb->destructor(skb);
 12.1015 +	skb->destructor = NULL;
 12.1016 +	skb->sk = NULL;
 12.1017 +}
 12.1018 +
 12.1019 +/**
 12.1020 + *	skb_purge - empty a list
 12.1021 + *	@list: list to empty
 12.1022 + *
 12.1023 + *	Delete all buffers on an &sk_buff list. Each buffer is removed from
 12.1024 + *	the list and one reference dropped. This function takes the list
 12.1025 + *	lock and is atomic with respect to other list locking functions.
 12.1026 + */
 12.1027 +
 12.1028 +
 12.1029 +static inline void skb_queue_purge(struct sk_buff_head *list)
 12.1030 +{
 12.1031 +	struct sk_buff *skb;
 12.1032 +	while ((skb=skb_dequeue(list))!=NULL)
 12.1033 +		kfree_skb(skb);
 12.1034 +}
 12.1035 +
 12.1036 +/**
 12.1037 + *	__skb_purge - empty a list
 12.1038 + *	@list: list to empty
 12.1039 + *
 12.1040 + *	Delete all buffers on an &sk_buff list. Each buffer is removed from
 12.1041 + *	the list and one reference dropped. This function does not take the
 12.1042 + *	list lock and the caller must hold the relevant locks to use it.
 12.1043 + */
 12.1044 +
 12.1045 +
 12.1046 +static inline void __skb_queue_purge(struct sk_buff_head *list)
 12.1047 +{
 12.1048 +	struct sk_buff *skb;
 12.1049 +	while ((skb=__skb_dequeue(list))!=NULL)
 12.1050 +		kfree_skb(skb);
 12.1051 +}
 12.1052 +
 12.1053 +/**
 12.1054 + *	__dev_alloc_skb - allocate an skbuff for sending
 12.1055 + *	@length: length to allocate
 12.1056 + *	@gfp_mask: get_free_pages mask, passed to alloc_skb
 12.1057 + *
 12.1058 + *	Allocate a new &sk_buff and assign it a usage count of one. The
 12.1059 + *	buffer has unspecified headroom built in. Users should allocate
 12.1060 + *	the headroom they think they need without accounting for the
 12.1061 + *	built in space. The built in space is used for optimisations.
 12.1062 + *
 12.1063 + *	%NULL is returned in there is no free memory.
 12.1064 + */
 12.1065 + 
 12.1066 +static inline struct sk_buff *__dev_alloc_skb(unsigned int length,
 12.1067 +					      int gfp_mask)
 12.1068 +{
 12.1069 +	struct sk_buff *skb;
 12.1070 +
 12.1071 +	//skb = alloc_skb(length+16, gfp_mask);
 12.1072 +        skb = alloc_zc_skb(length+16, gfp_mask);
 12.1073 +	if (skb)
 12.1074 +		skb_reserve(skb,16);
 12.1075 +	return skb;
 12.1076 +}
 12.1077 +
 12.1078 +/**
 12.1079 + *	dev_alloc_skb - allocate an skbuff for sending
 12.1080 + *	@length: length to allocate
 12.1081 + *
 12.1082 + *	Allocate a new &sk_buff and assign it a usage count of one. The
 12.1083 + *	buffer has unspecified headroom built in. Users should allocate
 12.1084 + *	the headroom they think they need without accounting for the
 12.1085 + *	built in space. The built in space is used for optimisations.
 12.1086 + *
 12.1087 + *	%NULL is returned in there is no free memory. Although this function
 12.1088 + *	allocates memory it can be called from an interrupt.
 12.1089 + */
 12.1090 + 
 12.1091 +static inline struct sk_buff *dev_alloc_skb(unsigned int length)
 12.1092 +{
 12.1093 +	return __dev_alloc_skb(length, GFP_ATOMIC);
 12.1094 +}
 12.1095 +
 12.1096 +/**
 12.1097 + *	skb_cow - copy header of skb when it is required
 12.1098 + *	@skb: buffer to cow
 12.1099 + *	@headroom: needed headroom
 12.1100 + *
 12.1101 + *	If the skb passed lacks sufficient headroom or its data part
 12.1102 + *	is shared, data is reallocated. If reallocation fails, an error
 12.1103 + *	is returned and original skb is not changed.
 12.1104 + *
 12.1105 + *	The result is skb with writable area skb->head...skb->tail
 12.1106 + *	and at least @headroom of space at head.
 12.1107 + */
 12.1108 +
 12.1109 +static inline int
 12.1110 +skb_cow(struct sk_buff *skb, unsigned int headroom)
 12.1111 +{
 12.1112 +	int delta = (headroom > 16 ? headroom : 16) - skb_headroom(skb);
 12.1113 +
 12.1114 +	if (delta < 0)
 12.1115 +		delta = 0;
 12.1116 +
 12.1117 +	if (delta || skb_cloned(skb))
 12.1118 +		return pskb_expand_head(skb, (delta+15)&~15, 0, GFP_ATOMIC);
 12.1119 +	return 0;
 12.1120 +}
 12.1121 +
 12.1122 +/**
 12.1123 + *	skb_linearize - convert paged skb to linear one
 12.1124 + *	@skb: buffer to linarize
 12.1125 + *	@gfp: allocation mode
 12.1126 + *
 12.1127 + *	If there is no free memory -ENOMEM is returned, otherwise zero
 12.1128 + *	is returned and the old skb data released.  */
 12.1129 +int skb_linearize(struct sk_buff *skb, int gfp);
 12.1130 +
 12.1131 +static inline void *kmap_skb_frag(const skb_frag_t *frag)
 12.1132 +{
 12.1133 +#ifdef CONFIG_HIGHMEM
 12.1134 +	if (in_irq())
 12.1135 +		BUG();
 12.1136 +
 12.1137 +	local_bh_disable();
 12.1138 +#endif
 12.1139 +	return kmap_atomic(frag->page, KM_SKB_DATA_SOFTIRQ);
 12.1140 +}
 12.1141 +
 12.1142 +static inline void kunmap_skb_frag(void *vaddr)
 12.1143 +{
 12.1144 +	kunmap_atomic(vaddr, KM_SKB_DATA_SOFTIRQ);
 12.1145 +#ifdef CONFIG_HIGHMEM
 12.1146 +	local_bh_enable();
 12.1147 +#endif
 12.1148 +}
 12.1149 +
 12.1150 +#define skb_queue_walk(queue, skb) \
 12.1151 +		for (skb = (queue)->next;			\
 12.1152 +		     (skb != (struct sk_buff *)(queue));	\
 12.1153 +		     skb=skb->next)
 12.1154 +
 12.1155 +
 12.1156 +extern struct sk_buff *		skb_recv_datagram(struct sock *sk,unsigned flags,int noblock, int *err);
 12.1157 +extern unsigned int		datagram_poll(struct file *file, struct socket *sock, struct poll_table_struct *wait);
 12.1158 +extern int			skb_copy_datagram(const struct sk_buff *from, int offset, char *to,int size);
 12.1159 +extern int			skb_copy_datagram_iovec(const struct sk_buff *from, int offset, struct iovec *to,int size);
 12.1160 +extern int			skb_copy_and_csum_datagram(const struct sk_buff *skb, int offset, u8 *to, int len, unsigned int *csump);
 12.1161 +extern int			skb_copy_and_csum_datagram_iovec(const struct sk_buff *skb, int hlen, struct iovec *iov);
 12.1162 +extern void			skb_free_datagram(struct sock * sk, struct sk_buff *skb);
 12.1163 +
 12.1164 +extern unsigned int		skb_checksum(const struct sk_buff *skb, int offset, int len, unsigned int csum);
 12.1165 +extern int			skb_copy_bits(const struct sk_buff *skb, int offset, void *to, int len);
 12.1166 +extern unsigned int		skb_copy_and_csum_bits(const struct sk_buff *skb, int offset, u8 *to, int len, unsigned int csum);
 12.1167 +extern void			skb_copy_and_csum_dev(const struct sk_buff *skb, u8 *to);
 12.1168 +
 12.1169 +extern void skb_init(void);
 12.1170 +extern void skb_add_mtu(int mtu);
 12.1171 +
 12.1172 +#ifdef CONFIG_NETFILTER
 12.1173 +static inline void
 12.1174 +nf_conntrack_put(struct nf_ct_info *nfct)
 12.1175 +{
 12.1176 +	if (nfct && atomic_dec_and_test(&nfct->master->use))
 12.1177 +		nfct->master->destroy(nfct->master);
 12.1178 +}
 12.1179 +static inline void
 12.1180 +nf_conntrack_get(struct nf_ct_info *nfct)
 12.1181 +{
 12.1182 +	if (nfct)
 12.1183 +		atomic_inc(&nfct->master->use);
 12.1184 +}
 12.1185 +#endif
 12.1186 +
 12.1187 +#endif	/* __KERNEL__ */
 12.1188 +#endif	/* _LINUX_SKBUFF_H */
    13.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    13.2 +++ b/xenolinux-2.4.16-sparse/net/core/skbuff.c	Sat Feb 01 12:06:32 2003 +0000
    13.3 @@ -0,0 +1,1366 @@
    13.4 +/*
    13.5 + *	Routines having to do with the 'struct sk_buff' memory handlers.
    13.6 + *
    13.7 + *	Authors:	Alan Cox <iiitac@pyr.swan.ac.uk>
    13.8 + *			Florian La Roche <rzsfl@rz.uni-sb.de>
    13.9 + *
   13.10 + *	Version:	$Id: skbuff.c,v 1.89 2001/08/06 13:25:02 davem Exp $
   13.11 + *
   13.12 + *	Fixes:	
   13.13 + *		Alan Cox	:	Fixed the worst of the load balancer bugs.
   13.14 + *		Dave Platt	:	Interrupt stacking fix.
   13.15 + *	Richard Kooijman	:	Timestamp fixes.
   13.16 + *		Alan Cox	:	Changed buffer format.
   13.17 + *		Alan Cox	:	destructor hook for AF_UNIX etc.
   13.18 + *		Linus Torvalds	:	Better skb_clone.
   13.19 + *		Alan Cox	:	Added skb_copy.
   13.20 + *		Alan Cox	:	Added all the changed routines Linus
   13.21 + *					only put in the headers
   13.22 + *		Ray VanTassle	:	Fixed --skb->lock in free
   13.23 + *		Alan Cox	:	skb_copy copy arp field
   13.24 + *		Andi Kleen	:	slabified it.
   13.25 + *
   13.26 + *	NOTE:
   13.27 + *		The __skb_ routines should be called with interrupts 
   13.28 + *	disabled, or you better be *real* sure that the operation is atomic 
   13.29 + *	with respect to whatever list is being frobbed (e.g. via lock_sock()
   13.30 + *	or via disabling bottom half handlers, etc).
   13.31 + *
   13.32 + *	This program is free software; you can redistribute it and/or
   13.33 + *	modify it under the terms of the GNU General Public License
   13.34 + *	as published by the Free Software Foundation; either version
   13.35 + *	2 of the License, or (at your option) any later version.
   13.36 + */
   13.37 +
   13.38 +/*
   13.39 + *	The functions in this file will not compile correctly with gcc 2.4.x
   13.40 + */
   13.41 +
   13.42 +#include <linux/config.h>
   13.43 +#include <linux/types.h>
   13.44 +#include <linux/kernel.h>
   13.45 +#include <linux/sched.h>
   13.46 +#include <linux/mm.h>
   13.47 +#include <linux/interrupt.h>
   13.48 +#include <linux/in.h>
   13.49 +#include <linux/inet.h>
   13.50 +#include <linux/slab.h>
   13.51 +#include <linux/netdevice.h>
   13.52 +#include <linux/string.h>
   13.53 +#include <linux/skbuff.h>
   13.54 +#include <linux/cache.h>
   13.55 +#include <linux/init.h>
   13.56 +#include <linux/highmem.h>
   13.57 +#include <linux/spinlock.h>
   13.58 +
   13.59 +#include <net/ip.h>
   13.60 +#include <net/protocol.h>
   13.61 +#include <net/dst.h>
   13.62 +#include <net/tcp.h>
   13.63 +#include <net/udp.h>
   13.64 +#include <net/sock.h>
   13.65 +
   13.66 +#include <asm/uaccess.h>
   13.67 +#include <asm/system.h>
   13.68 +
   13.69 +/* zc globals: */
   13.70 +char *net_page_chunk;
   13.71 +struct net_page_info *net_page_table;
   13.72 +struct list_head net_page_list;
   13.73 +spinlock_t net_page_list_lock = SPIN_LOCK_UNLOCKED;
   13.74 +unsigned int net_pages;
   13.75 +
   13.76 +
   13.77 +
   13.78 +int sysctl_hot_list_len = 128;
   13.79 +
   13.80 +static kmem_cache_t *skbuff_head_cache;
   13.81 +
   13.82 +static union {
   13.83 +	struct sk_buff_head	list;
   13.84 +	char			pad[SMP_CACHE_BYTES];
   13.85 +} skb_head_pool[NR_CPUS];
   13.86 +
   13.87 +/*
   13.88 + *	Keep out-of-line to prevent kernel bloat.
   13.89 + *	__builtin_return_address is not used because it is not always
   13.90 + *	reliable. 
   13.91 + */
   13.92 +
   13.93 +/**
   13.94 + *	skb_over_panic	- 	private function
   13.95 + *	@skb: buffer
   13.96 + *	@sz: size
   13.97 + *	@here: address
   13.98 + *
   13.99 + *	Out of line support code for skb_put(). Not user callable.
  13.100 + */
  13.101 + 
  13.102 +void skb_over_panic(struct sk_buff *skb, int sz, void *here)
  13.103 +{
  13.104 +	printk("skput:over: %p:%d put:%d dev:%s", 
  13.105 +		here, skb->len, sz, skb->dev ? skb->dev->name : "<NULL>");
  13.106 +	BUG();
  13.107 +}
  13.108 +
  13.109 +/**
  13.110 + *	skb_under_panic	- 	private function
  13.111 + *	@skb: buffer
  13.112 + *	@sz: size
  13.113 + *	@here: address
  13.114 + *
  13.115 + *	Out of line support code for skb_push(). Not user callable.
  13.116 + */
  13.117 + 
  13.118 +
  13.119 +void skb_under_panic(struct sk_buff *skb, int sz, void *here)
  13.120 +{
  13.121 +        printk("skput:under: %p:%d put:%d dev:%s",
  13.122 +                here, skb->len, sz, skb->dev ? skb->dev->name : "<NULL>");
  13.123 +	BUG();
  13.124 +}
  13.125 +
  13.126 +static __inline__ struct sk_buff *skb_head_from_pool(void)
  13.127 +{
  13.128 +	struct sk_buff_head *list = &skb_head_pool[smp_processor_id()].list;
  13.129 +
  13.130 +	if (skb_queue_len(list)) {
  13.131 +		struct sk_buff *skb;
  13.132 +		unsigned long flags;
  13.133 +
  13.134 +		local_irq_save(flags);
  13.135 +		skb = __skb_dequeue(list);
  13.136 +		local_irq_restore(flags);
  13.137 +		return skb;
  13.138 +	}
  13.139 +	return NULL;
  13.140 +}
  13.141 +
  13.142 +static __inline__ void skb_head_to_pool(struct sk_buff *skb)
  13.143 +{
  13.144 +	struct sk_buff_head *list = &skb_head_pool[smp_processor_id()].list;
  13.145 +
  13.146 +	if (skb_queue_len(list) < sysctl_hot_list_len) {
  13.147 +		unsigned long flags;
  13.148 +
  13.149 +		local_irq_save(flags);
  13.150 +		__skb_queue_head(list, skb);
  13.151 +		local_irq_restore(flags);
  13.152 +
  13.153 +		return;
  13.154 +	}
  13.155 +	kmem_cache_free(skbuff_head_cache, skb);
  13.156 +}
  13.157 +
  13.158 +
  13.159 +/* 	Allocate a new skbuff. We do this ourselves so we can fill in a few
  13.160 + *	'private' fields and also do memory statistics to find all the
  13.161 + *	[BEEP] leaks.
  13.162 + * 
  13.163 + */
  13.164 +
  13.165 +/**
  13.166 + *	alloc_skb	-	allocate a network buffer
  13.167 + *	@size: size to allocate
  13.168 + *	@gfp_mask: allocation mask
  13.169 + *
  13.170 + *	Allocate a new &sk_buff. The returned buffer has no headroom and a
  13.171 + *	tail room of size bytes. The object has a reference count of one.
  13.172 + *	The return is the buffer. On a failure the return is %NULL.
  13.173 + *
  13.174 + *	Buffers may only be allocated from interrupts using a @gfp_mask of
  13.175 + *	%GFP_ATOMIC.
  13.176 + */
  13.177 + 
  13.178 +struct sk_buff *alloc_skb(unsigned int size,int gfp_mask)
  13.179 +{
  13.180 +	struct sk_buff *skb;
  13.181 +	u8 *data;
  13.182 +
  13.183 +	if (in_interrupt() && (gfp_mask & __GFP_WAIT)) {
  13.184 +		static int count = 0;
  13.185 +		if (++count < 5) {
  13.186 +			printk(KERN_ERR "alloc_skb called nonatomically "
  13.187 +			       "from interrupt %p\n", NET_CALLER(size));
  13.188 + 			BUG();
  13.189 +		}
  13.190 +		gfp_mask &= ~__GFP_WAIT;
  13.191 +	}
  13.192 +
  13.193 +	/* Get the HEAD */
  13.194 +	skb = skb_head_from_pool();
  13.195 +	if (skb == NULL) {
  13.196 +		skb = kmem_cache_alloc(skbuff_head_cache, gfp_mask & ~__GFP_DMA);
  13.197 +		if (skb == NULL)
  13.198 +			goto nohead;
  13.199 +	}
  13.200 +
  13.201 +	/* Get the DATA. Size must match skb_add_mtu(). */
  13.202 +	size = SKB_DATA_ALIGN(size);
  13.203 +	data = kmalloc(size + sizeof(struct skb_shared_info), gfp_mask);
  13.204 +	if (data == NULL)
  13.205 +		goto nodata;
  13.206 +
  13.207 +	/* XXX: does not include slab overhead */ 
  13.208 +	skb->truesize = size + sizeof(struct sk_buff);
  13.209 +
  13.210 +	/* Load the data pointers. */
  13.211 +	skb->head = data;
  13.212 +	skb->data = data;
  13.213 +	skb->tail = data;
  13.214 +	skb->end = data + size;
  13.215 +
  13.216 +	/* Set up other state */
  13.217 +	skb->len = 0;
  13.218 +	skb->cloned = 0;
  13.219 +	skb->data_len = 0;
  13.220 +        skb->skb_type = SKB_NORMAL;
  13.221 +
  13.222 +	atomic_set(&skb->users, 1); 
  13.223 +	atomic_set(&(skb_shinfo(skb)->dataref), 1);
  13.224 +	skb_shinfo(skb)->nr_frags = 0;
  13.225 +	skb_shinfo(skb)->frag_list = NULL;
  13.226 +	return skb;
  13.227 +
  13.228 +nodata:
  13.229 +	skb_head_to_pool(skb);
  13.230 +nohead:
  13.231 +	return NULL;
  13.232 +}
  13.233 +
  13.234 +/* begin zc code additions: */
  13.235 +
  13.236 +void init_net_pages(unsigned long order_pages)
  13.237 +{
  13.238 +        int i;
  13.239 +        struct net_page_info *np;
  13.240 +        pgd_t *pgd; pmd_t *pmd; pte_t *ptep;
  13.241 +        unsigned long nr_pages = 1 << order_pages;
  13.242 +        
  13.243 +        net_page_chunk = (char *)__get_free_pages(GFP_KERNEL, order_pages);
  13.244 +        net_page_table = kmalloc(nr_pages * sizeof(struct net_page_info), GFP_KERNEL);
  13.245 +
  13.246 +        INIT_LIST_HEAD(&net_page_list);
  13.247 +
  13.248 +        for (i = 0; i < nr_pages; i++) 
  13.249 +        {
  13.250 +                np = net_page_table + i;
  13.251 +                np->virt_addr = (unsigned long)net_page_chunk + (i * PAGE_SIZE);
  13.252 +                
  13.253 +                // now fill the pte pointer:
  13.254 +                np->ppte = 0xdeadbeef;
  13.255 +                pgd = pgd_offset_k(np->virt_addr);
  13.256 +                if (!pgd_none(*pgd))
  13.257 +                {
  13.258 +                    pmd = pmd_offset(pgd, np->virt_addr);
  13.259 +                    if (!pmd_none(*pmd))
  13.260 +                    {
  13.261 +                            ptep = pte_offset(pmd, np->virt_addr);
  13.262 +                            np->ppte = (unsigned long)ptep; // neet to virt_to_phys this?
  13.263 +                    }
  13.264 +                }
  13.265 +
  13.266 +                list_add_tail(&np->list, &net_page_list);
  13.267 +        }
  13.268 +        net_pages = nr_pages;
  13.269 +        
  13.270 +
  13.271 +}
  13.272 +
  13.273 +struct net_page_info *get_net_page(void)
  13.274 +{
  13.275 +    struct list_head *list_ptr;
  13.276 +    struct net_page_info *np;
  13.277 +    unsigned long flags;
  13.278 +
  13.279 +    if (!net_pages) 
  13.280 +    {
  13.281 +            return NULL;
  13.282 +    }
  13.283 +    spin_lock_irqsave(&net_page_list_lock, flags);
  13.284 +    
  13.285 +    list_ptr = net_page_list.next;
  13.286 +    np = list_entry(list_ptr, struct net_page_info, list);
  13.287 +    list_del(&np->list);
  13.288 +    net_pages--;
  13.289 +    
  13.290 +    spin_unlock_irqrestore(&net_page_list_lock, flags);
  13.291 +    
  13.292 +    return np;
  13.293 +}
  13.294 +
  13.295 +void free_net_page(struct net_page_info *np)
  13.296 +{
  13.297 +    unsigned long flags;
  13.298 +  
  13.299 +    if (np == NULL) return;
  13.300 +    
  13.301 +    spin_lock_irqsave(&net_page_list_lock, flags);
  13.302 +    
  13.303 +    list_add_tail(&np->list, &net_page_list);
  13.304 +    net_pages++;
  13.305 +
  13.306 +    spin_unlock_irqrestore(&net_page_list_lock, flags);
  13.307 +}
  13.308 +
  13.309 +struct sk_buff *alloc_zc_skb(unsigned int size,int gfp_mask)
  13.310 +{
  13.311 +	struct sk_buff *skb;
  13.312 +	u8 *data;
  13.313 +
  13.314 +	if (in_interrupt() && (gfp_mask & __GFP_WAIT)) {
  13.315 +		static int count = 0;
  13.316 +		if (++count < 5) {
  13.317 +			printk(KERN_ERR "alloc_skb called nonatomically "
  13.318 +			       "from interrupt %p\n", NET_CALLER(size));
  13.319 + 			BUG();
  13.320 +		}
  13.321 +		gfp_mask &= ~__GFP_WAIT;
  13.322 +	}
  13.323 +
  13.324 +	/* Get the HEAD */
  13.325 +	skb = skb_head_from_pool();
  13.326 +	if (skb == NULL) {
  13.327 +		skb = kmem_cache_alloc(skbuff_head_cache, gfp_mask & ~__GFP_DMA);
  13.328 +		if (skb == NULL)
  13.329 +			goto nohead;
  13.330 +	}
  13.331 +
  13.332 +	/* Get the DATA. Size must match skb_add_mtu(). */
  13.333 +	size = SKB_DATA_ALIGN(size);
  13.334 +        if (size > PAGE_SIZE)
  13.335 +        {
  13.336 +                printk("alloc_zc_skb called with unruly size.\n");
  13.337 +                size = PAGE_SIZE;
  13.338 +        }
  13.339 +	skb->net_page = get_net_page();
  13.340 +        if (skb->net_page == NULL)
  13.341 +        {
  13.342 +                goto nodata;
  13.343 +        }
  13.344 +        data = (u8 *)skb->net_page->virt_addr;
  13.345 +	if (data == NULL)
  13.346 +		goto nodata;
  13.347 +	/* XXX: does not include slab overhead */ 
  13.348 +	skb->truesize = size + sizeof(struct sk_buff);
  13.349 +
  13.350 +	/* Load the data pointers. */
  13.351 +	skb->head = data;
  13.352 +	skb->data = data;
  13.353 +	skb->tail = data;
  13.354 +	skb->end = data + size;
  13.355 +
  13.356 +	/* Set up other state */
  13.357 +	skb->len = 0;
  13.358 +	skb->cloned = 0;
  13.359 +	skb->data_len = 0;
  13.360 +        skb->skb_type = SKB_ZERO_COPY;
  13.361 +
  13.362 +	atomic_set(&skb->users, 1); 
  13.363 +	atomic_set(&(skb_shinfo(skb)->dataref), 1);
  13.364 +	skb_shinfo(skb)->nr_frags = 0;
  13.365 +	skb_shinfo(skb)->frag_list = NULL;
  13.366 +	return skb;
  13.367 +
  13.368 +nodata:
  13.369 +	skb_head_to_pool(skb);
  13.370 +nohead:
  13.371 +	return NULL;
  13.372 +}
  13.373 +
  13.374 +/* end zc code additions: */
  13.375 +
  13.376 +/*
  13.377 + *	Slab constructor for a skb head. 
  13.378 + */ 
  13.379 +static inline void skb_headerinit(void *p, kmem_cache_t *cache, 
  13.380 +				  unsigned long flags)
  13.381 +{
  13.382 +	struct sk_buff *skb = p;
  13.383 +
  13.384 +	skb->next = NULL;
  13.385 +	skb->prev = NULL;
  13.386 +	skb->list = NULL;
  13.387 +	skb->sk = NULL;
  13.388 +	skb->stamp.tv_sec=0;	/* No idea about time */
  13.389 +	skb->dev = NULL;
  13.390 +	skb->dst = NULL;
  13.391 +	memset(skb->cb, 0, sizeof(skb->cb));
  13.392 +	skb->pkt_type = PACKET_HOST;	/* Default type */
  13.393 +	skb->ip_summed = 0;
  13.394 +	skb->priority = 0;
  13.395 +	skb->security = 0;	/* By default packets are insecure */
  13.396 +	skb->destructor = NULL;
  13.397 +
  13.398 +#ifdef CONFIG_NETFILTER
  13.399 +	skb->nfmark = skb->nfcache = 0;
  13.400 +	skb->nfct = NULL;
  13.401 +#ifdef CONFIG_NETFILTER_DEBUG
  13.402 +	skb->nf_debug = 0;
  13.403 +#endif
  13.404 +#endif
  13.405 +#ifdef CONFIG_NET_SCHED
  13.406 +	skb->tc_index = 0;
  13.407 +#endif
  13.408 +}
  13.409 +
  13.410 +static void skb_drop_fraglist(struct sk_buff *skb)
  13.411 +{
  13.412 +	struct sk_buff *list = skb_shinfo(skb)->frag_list;
  13.413 +
  13.414 +	skb_shinfo(skb)->frag_list = NULL;
  13.415 +
  13.416 +	do {
  13.417 +		struct sk_buff *this = list;
  13.418 +		list = list->next;
  13.419 +		kfree_skb(this);
  13.420 +	} while (list);
  13.421 +}
  13.422 +
  13.423 +static void skb_clone_fraglist(struct sk_buff *skb)
  13.424 +{
  13.425 +	struct sk_buff *list;
  13.426 +
  13.427 +	for (list = skb_shinfo(skb)->frag_list; list; list=list->next)
  13.428 +		skb_get(list);
  13.429 +}
  13.430 +
  13.431 +static void skb_release_data(struct sk_buff *skb)
  13.432 +{
  13.433 +	if (!skb->cloned ||
  13.434 +	    atomic_dec_and_test(&(skb_shinfo(skb)->dataref))) {
  13.435 +		if (skb_shinfo(skb)->nr_frags) {
  13.436 +			int i;
  13.437 +			for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
  13.438 +				put_page(skb_shinfo(skb)->frags[i].page);
  13.439 +		}
  13.440 +
  13.441 +		if (skb_shinfo(skb)->frag_list)
  13.442 +			skb_drop_fraglist(skb);
  13.443 +
  13.444 +                if (skb->skb_type == SKB_NORMAL)
  13.445 +                {
  13.446 +		    kfree(skb->head);
  13.447 +                } else {// SKB_ZERO_COPY
  13.448 +                    free_net_page(skb->net_page);
  13.449 +                }
  13.450 +	}
  13.451 +}
  13.452 +
  13.453 +/*
  13.454 + *	Free an skbuff by memory without cleaning the state. 
  13.455 + */
  13.456 +void kfree_skbmem(struct sk_buff *skb)
  13.457 +{
  13.458 +	skb_release_data(skb);
  13.459 +	skb_head_to_pool(skb);
  13.460 +}
  13.461 +
  13.462 +/**
  13.463 + *	__kfree_skb - private function 
  13.464 + *	@skb: buffer
  13.465 + *
  13.466 + *	Free an sk_buff. Release anything attached to the buffer. 
  13.467 + *	Clean the state. This is an internal helper function. Users should
  13.468 + *	always call kfree_skb
  13.469 + */
  13.470 +
  13.471 +void __kfree_skb(struct sk_buff *skb)
  13.472 +{
  13.473 +	if (skb->list) {
  13.474 +	 	printk(KERN_WARNING "Warning: kfree_skb passed an skb still "
  13.475 +		       "on a list (from %p).\n", NET_CALLER(skb));
  13.476 +		BUG();
  13.477 +	}
  13.478 +
  13.479 +	dst_release(skb->dst);
  13.480 +	if(skb->destructor) {
  13.481 +		if (in_irq()) {
  13.482 +			printk(KERN_WARNING "Warning: kfree_skb on hard IRQ %p\n",
  13.483 +				NET_CALLER(skb));
  13.484 +		}
  13.485 +		skb->destructor(skb);
  13.486 +	}
  13.487 +#ifdef CONFIG_NETFILTER
  13.488 +	nf_conntrack_put(skb->nfct);
  13.489 +#endif
  13.490 +	skb_headerinit(skb, NULL, 0);  /* clean state */
  13.491 +	kfree_skbmem(skb);
  13.492 +}
  13.493 +
  13.494 +/**
  13.495 + *	skb_clone	-	duplicate an sk_buff
  13.496 + *	@skb: buffer to clone
  13.497 + *	@gfp_mask: allocation priority
  13.498 + *
  13.499 + *	Duplicate an &sk_buff. The new one is not owned by a socket. Both
  13.500 + *	copies share the same packet data but not structure. The new
  13.501 + *	buffer has a reference count of 1. If the allocation fails the 
  13.502 + *	function returns %NULL otherwise the new buffer is returned.
  13.503 + *	
  13.504 + *	If this function is called from an interrupt gfp_mask() must be
  13.505 + *	%GFP_ATOMIC.
  13.506 + */
  13.507 +
  13.508 +struct sk_buff *skb_clone(struct sk_buff *skb, int gfp_mask)
  13.509 +{
  13.510 +	struct sk_buff *n;
  13.511 +
  13.512 +	n = skb_head_from_pool();
  13.513 +	if (!n) {
  13.514 +		n = kmem_cache_alloc(skbuff_head_cache, gfp_mask);
  13.515 +		if (!n)
  13.516 +			return NULL;
  13.517 +	}
  13.518 +
  13.519 +#define C(x) n->x = skb->x
  13.520 +
  13.521 +	n->next = n->prev = NULL;
  13.522 +	n->list = NULL;
  13.523 +	n->sk = NULL;
  13.524 +	C(stamp);
  13.525 +	C(dev);
  13.526 +	C(h);
  13.527 +	C(nh);
  13.528 +	C(mac);
  13.529 +	C(dst);
  13.530 +	dst_clone(n->dst);
  13.531 +	memcpy(n->cb, skb->cb, sizeof(skb->cb));
  13.532 +	C(len);
  13.533 +	C(data_len);
  13.534 +	C(csum);
  13.535 +	n->cloned = 1;
  13.536 +	C(pkt_type);
  13.537 +	C(ip_summed);
  13.538 +	C(priority);
  13.539 +	atomic_set(&n->users, 1);
  13.540 +	C(protocol);
  13.541 +	C(security);
  13.542 +	C(truesize);
  13.543 +	C(head);
  13.544 +	C(data);
  13.545 +	C(tail);
  13.546 +	C(end);
  13.547 +	n->destructor = NULL;
  13.548 +#ifdef CONFIG_NETFILTER
  13.549 +	C(nfmark);
  13.550 +	C(nfcache);
  13.551 +	C(nfct);
  13.552 +#ifdef CONFIG_NETFILTER_DEBUG
  13.553 +	C(nf_debug);
  13.554 +#endif
  13.555 +#endif /*CONFIG_NETFILTER*/
  13.556 +#if defined(CONFIG_HIPPI)
  13.557 +	C(private);
  13.558 +#endif
  13.559 +#ifdef CONFIG_NET_SCHED
  13.560 +	C(tc_index);
  13.561 +#endif
  13.562 +        C(skb_type);
  13.563 +        C(net_page);
  13.564 +	atomic_inc(&(skb_shinfo(skb)->dataref));
  13.565 +	skb->cloned = 1;
  13.566 +#ifdef CONFIG_NETFILTER
  13.567 +	nf_conntrack_get(skb->nfct);
  13.568 +#endif
  13.569 +	return n;
  13.570 +}
  13.571 +
  13.572 +static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
  13.573 +{
  13.574 +	/*
  13.575 +	 *	Shift between the two data areas in bytes
  13.576 +	 */
  13.577 +	unsigned long offset = new->data - old->data;
  13.578 +
  13.579 +	new->list=NULL;
  13.580 +	new->sk=NULL;
  13.581 +	new->dev=old->dev;
  13.582 +	new->priority=old->priority;
  13.583 +	new->protocol=old->protocol;
  13.584 +	new->dst=dst_clone(old->dst);
  13.585 +	new->h.raw=old->h.raw+offset;
  13.586 +	new->nh.raw=old->nh.raw+offset;
  13.587 +	new->mac.raw=old->mac.raw+offset;
  13.588 +	memcpy(new->cb, old->cb, sizeof(old->cb));
  13.589 +	atomic_set(&new->users, 1);
  13.590 +	new->pkt_type=old->pkt_type;
  13.591 +	new->stamp=old->stamp;
  13.592 +	new->destructor = NULL;
  13.593 +	new->security=old->security;
  13.594 +#ifdef CONFIG_NETFILTER
  13.595 +	new->nfmark=old->nfmark;
  13.596 +	new->nfcache=old->nfcache;
  13.597 +	new->nfct=old->nfct;
  13.598 +	nf_conntrack_get(new->nfct);
  13.599 +#ifdef CONFIG_NETFILTER_DEBUG
  13.600 +	new->nf_debug=old->nf_debug;
  13.601 +#endif
  13.602 +#endif
  13.603 +#ifdef CONFIG_NET_SCHED
  13.604 +	new->tc_index = old->tc_index;
  13.605 +#endif
  13.606 +}
  13.607 +
  13.608 +/**
  13.609 + *	skb_copy	-	create private copy of an sk_buff
  13.610 + *	@skb: buffer to copy
  13.611 + *	@gfp_mask: allocation priority
  13.612 + *
  13.613 + *	Make a copy of both an &sk_buff and its data. This is used when the
  13.614 + *	caller wishes to modify the data and needs a private copy of the 
  13.615 + *	data to alter. Returns %NULL on failure or the pointer to the buffer
  13.616 + *	on success. The returned buffer has a reference count of 1.
  13.617 + *
  13.618 + *	As by-product this function converts non-linear &sk_buff to linear
  13.619 + *	one, so that &sk_buff becomes completely private and caller is allowed
  13.620 + *	to modify all the data of returned buffer. This means that this
  13.621 + *	function is not recommended for use in circumstances when only
  13.622 + *	header is going to be modified. Use pskb_copy() instead.
  13.623 + */
  13.624 + 
  13.625 +struct sk_buff *skb_copy(const struct sk_buff *skb, int gfp_mask)
  13.626 +{
  13.627 +	struct sk_buff *n;
  13.628 +	int headerlen = skb->data-skb->head;
  13.629 +
  13.630 +	/*
  13.631 +	 *	Allocate the copy buffer
  13.632 +	 */
  13.633 +	n=alloc_skb(skb->end - skb->head + skb->data_len, gfp_mask);
  13.634 +	if(n==NULL)
  13.635 +		return NULL;
  13.636 +
  13.637 +	/* Set the data pointer */
  13.638 +	skb_reserve(n,headerlen);
  13.639 +	/* Set the tail pointer and length */
  13.640 +	skb_put(n,skb->len);
  13.641 +	n->csum = skb->csum;
  13.642 +	n->ip_summed = skb->ip_summed;
  13.643 +
  13.644 +	if (skb_copy_bits(skb, -headerlen, n->head, headerlen+skb->len))
  13.645 +		BUG();
  13.646 +
  13.647 +	copy_skb_header(n, skb);
  13.648 +
  13.649 +	return n;
  13.650 +}
  13.651 +
  13.652 +/* Keep head the same: replace data */
  13.653 +int skb_linearize(struct sk_buff *skb, int gfp_mask)
  13.654 +{
  13.655 +	unsigned int size;
  13.656 +	u8 *data;
  13.657 +	long offset;
  13.658 +	int headerlen = skb->data - skb->head;
  13.659 +	int expand = (skb->tail+skb->data_len) - skb->end;
  13.660 +
  13.661 +	if (skb_shared(skb))
  13.662 +		BUG();
  13.663 +
  13.664 +	if (expand <= 0)
  13.665 +		expand = 0;
  13.666 +
  13.667 +	size = (skb->end - skb->head + expand);
  13.668 +	size = SKB_DATA_ALIGN(size);
  13.669 +	data = kmalloc(size + sizeof(struct skb_shared_info), gfp_mask);
  13.670 +	if (data == NULL)
  13.671 +		return -ENOMEM;
  13.672 +
  13.673 +	/* Copy entire thing */
  13.674 +	if (skb_copy_bits(skb, -headerlen, data, headerlen+skb->len))
  13.675 +		BUG();
  13.676 +
  13.677 +	/* Offset between the two in bytes */
  13.678 +	offset = data - skb->head;
  13.679 +
  13.680 +	/* Free old data. */
  13.681 +	skb_release_data(skb);
  13.682 +
  13.683 +	skb->head = data;
  13.684 +	skb->end  = data + size;
  13.685 +
  13.686 +	/* Set up new pointers */
  13.687 +	skb->h.raw += offset;
  13.688 +	skb->nh.raw += offset;
  13.689 +	skb->mac.raw += offset;
  13.690 +	skb->tail += offset;
  13.691 +	skb->data += offset;
  13.692 +
  13.693 +	/* Set up shinfo */
  13.694 +	atomic_set(&(skb_shinfo(skb)->dataref), 1);
  13.695 +	skb_shinfo(skb)->nr_frags = 0;
  13.696 +	skb_shinfo(skb)->frag_list = NULL;
  13.697 +
  13.698 +	/* We are no longer a clone, even if we were. */
  13.699 +	skb->cloned = 0;
  13.700 +
  13.701 +	skb->tail += skb->data_len;
  13.702 +	skb->data_len = 0;
  13.703 +	return 0;
  13.704 +}
  13.705 +
  13.706 +
  13.707 +/**
  13.708 + *	pskb_copy	-	create copy of an sk_buff with private head.
  13.709 + *	@skb: buffer to copy
  13.710 + *	@gfp_mask: allocation priority
  13.711 + *
  13.712 + *	Make a copy of both an &sk_buff and part of its data, located
  13.713 + *	in header. Fragmented data remain shared. This is used when
  13.714 + *	the caller wishes to modify only header of &sk_buff and needs
  13.715 + *	private copy of the header to alter. Returns %NULL on failure
  13.716 + *	or the pointer to the buffer on success.
  13.717 + *	The returned buffer has a reference count of 1.
  13.718 + */
  13.719 +
  13.720 +struct sk_buff *pskb_copy(struct sk_buff *skb, int gfp_mask)
  13.721 +{
  13.722 +	struct sk_buff *n;
  13.723 +
  13.724 +	/*
  13.725 +	 *	Allocate the copy buffer
  13.726 +	 */
  13.727 +	n=alloc_skb(skb->end - skb->head, gfp_mask);
  13.728 +	if(n==NULL)
  13.729 +		return NULL;
  13.730 +
  13.731 +	/* Set the data pointer */
  13.732 +	skb_reserve(n,skb->data-skb->head);
  13.733 +	/* Set the tail pointer and length */
  13.734 +	skb_put(n,skb_headlen(skb));
  13.735 +	/* Copy the bytes */
  13.736 +	memcpy(n->data, skb->data, n->len);
  13.737 +	n->csum = skb->csum;
  13.738 +	n->ip_summed = skb->ip_summed;
  13.739 +
  13.740 +	n->data_len = skb->data_len;
  13.741 +	n->len = skb->len;
  13.742 +
  13.743 +	if (skb_shinfo(skb)->nr_frags) {
  13.744 +		int i;
  13.745 +
  13.746 +		for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
  13.747 +			skb_shinfo(n)->frags[i] = skb_shinfo(skb)->frags[i];
  13.748 +			get_page(skb_shinfo(n)->frags[i].page);
  13.749 +		}
  13.750 +		skb_shinfo(n)->nr_frags = i;
  13.751 +	}
  13.752 +
  13.753 +	if (skb_shinfo(skb)->frag_list) {
  13.754 +		skb_shinfo(n)->frag_list = skb_shinfo(skb)->frag_list;
  13.755 +		skb_clone_fraglist(n);
  13.756 +	}
  13.757 +
  13.758 +	copy_skb_header(n, skb);
  13.759 +
  13.760 +	return n;
  13.761 +}
  13.762 +
  13.763 +/**
  13.764 + *	pskb_expand_head - reallocate header of &sk_buff
  13.765 + *	@skb: buffer to reallocate
  13.766 + *	@nhead: room to add at head
  13.767 + *	@ntail: room to add at tail
  13.768 + *	@gfp_mask: allocation priority
  13.769 + *
  13.770 + *	Expands (or creates identical copy, if &nhead and &ntail are zero)
  13.771 + *	header of skb. &sk_buff itself is not changed. &sk_buff MUST have
  13.772 + *	reference count of 1. Returns zero in the case of success or error,
  13.773 + *	if expansion failed. In the last case, &sk_buff is not changed.
  13.774 + *
  13.775 + *	All the pointers pointing into skb header may change and must be
  13.776 + *	reloaded after call to this function.
  13.777 + */
  13.778 +
  13.779 +int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail, int gfp_mask)
  13.780 +{
  13.781 +	int i;
  13.782 +	u8 *data;
  13.783 +	int size = nhead + (skb->end - skb->head) + ntail;
  13.784 +	long off;
  13.785 +
  13.786 +	if (skb_shared(skb))
  13.787 +		BUG();
  13.788 +
  13.789 +	size = SKB_DATA_ALIGN(size);
  13.790 +
  13.791 +	data = kmalloc(size + sizeof(struct skb_shared_info), gfp_mask);
  13.792 +	if (data == NULL)
  13.793 +		goto nodata;
  13.794 +
  13.795 +	/* Copy only real data... and, alas, header. This should be
  13.796 +	 * optimized for the cases when header is void. */
  13.797 +	memcpy(data+nhead, skb->head, skb->tail-skb->head);
  13.798 +	memcpy(data+size, skb->end, sizeof(struct skb_shared_info));
  13.799 +
  13.800 +	for (i=0; i<skb_shinfo(skb)->nr_frags; i++)
  13.801 +		get_page(skb_shinfo(skb)->frags[i].page);
  13.802 +
  13.803 +	if (skb_shinfo(skb)->frag_list)
  13.804 +		skb_clone_fraglist(skb);
  13.805 +
  13.806 +	skb_release_data(skb);
  13.807 +
  13.808 +	off = (data+nhead) - skb->head;
  13.809 +
  13.810 +	skb->head = data;
  13.811 +	skb->end  = data+size;
  13.812 +
  13.813 +	skb->data += off;
  13.814 +	skb->tail += off;
  13.815 +	skb->mac.raw += off;
  13.816 +	skb->h.raw += off;
  13.817 +	skb->nh.raw += off;
  13.818 +	skb->cloned = 0;
  13.819 +	atomic_set(&skb_shinfo(skb)->dataref, 1);
  13.820 +	return 0;
  13.821 +
  13.822 +nodata:
  13.823 +	return -ENOMEM;
  13.824 +}
  13.825 +
  13.826 +/* Make private copy of skb with writable head and some headroom */
  13.827 +
  13.828 +struct sk_buff *
  13.829 +skb_realloc_headroom(struct sk_buff *skb, unsigned int headroom)
  13.830 +{
  13.831 +	struct sk_buff *skb2;
  13.832 +	int delta = headroom - skb_headroom(skb);
  13.833 +
  13.834 +	if (delta <= 0)
  13.835 +		return pskb_copy(skb, GFP_ATOMIC);
  13.836 +
  13.837 +	skb2 = skb_clone(skb, GFP_ATOMIC);
  13.838 +	if (skb2 == NULL ||
  13.839 +	    !pskb_expand_head(skb2, SKB_DATA_ALIGN(delta), 0, GFP_ATOMIC))
  13.840 +		return skb2;
  13.841 +
  13.842 +	kfree_skb(skb2);
  13.843 +	return NULL;
  13.844 +}
  13.845 +
  13.846 +
  13.847 +/**
  13.848 + *	skb_copy_expand	-	copy and expand sk_buff
  13.849 + *	@skb: buffer to copy
  13.850 + *	@newheadroom: new free bytes at head
  13.851 + *	@newtailroom: new free bytes at tail
  13.852 + *	@gfp_mask: allocation priority
  13.853 + *
  13.854 + *	Make a copy of both an &sk_buff and its data and while doing so 
  13.855 + *	allocate additional space.
  13.856 + *
  13.857 + *	This is used when the caller wishes to modify the data and needs a 
  13.858 + *	private copy of the data to alter as well as more space for new fields.
  13.859 + *	Returns %NULL on failure or the pointer to the buffer
  13.860 + *	on success. The returned buffer has a reference count of 1.
  13.861 + *
  13.862 + *	You must pass %GFP_ATOMIC as the allocation priority if this function
  13.863 + *	is called from an interrupt.
  13.864 + */
  13.865 + 
  13.866 +
  13.867 +struct sk_buff *skb_copy_expand(const struct sk_buff *skb,
  13.868 +				int newheadroom,
  13.869 +				int newtailroom,
  13.870 +				int gfp_mask)
  13.871 +{
  13.872 +	struct sk_buff *n;
  13.873 +
  13.874 +	/*
  13.875 +	 *	Allocate the copy buffer
  13.876 +	 */
  13.877 + 	 
  13.878 +	n=alloc_skb(newheadroom + skb->len + newtailroom,
  13.879 +		    gfp_mask);
  13.880 +	if(n==NULL)
  13.881 +		return NULL;
  13.882 +
  13.883 +	skb_reserve(n,newheadroom);
  13.884 +
  13.885 +	/* Set the tail pointer and length */
  13.886 +	skb_put(n,skb->len);
  13.887 +
  13.888 +	/* Copy the data only. */
  13.889 +	if (skb_copy_bits(skb, 0, n->data, skb->len))
  13.890 +		BUG();
  13.891 +
  13.892 +	copy_skb_header(n, skb);
  13.893 +	return n;
  13.894 +}
  13.895 +
  13.896 +/* Trims skb to length len. It can change skb pointers, if "realloc" is 1.
  13.897 + * If realloc==0 and trimming is impossible without change of data,
  13.898 + * it is BUG().
  13.899 + */
  13.900 +
  13.901 +int ___pskb_trim(struct sk_buff *skb, unsigned int len, int realloc)
  13.902 +{
  13.903 +	int offset = skb_headlen(skb);
  13.904 +	int nfrags = skb_shinfo(skb)->nr_frags;
  13.905 +	int i;
  13.906 +
  13.907 +	for (i=0; i<nfrags; i++) {
  13.908 +		int end = offset + skb_shinfo(skb)->frags[i].size;
  13.909 +		if (end > len) {
  13.910 +			if (skb_cloned(skb)) {
  13.911 +				if (!realloc)
  13.912 +					BUG();
  13.913 +				if (!pskb_expand_head(skb, 0, 0, GFP_ATOMIC))
  13.914 +					return -ENOMEM;
  13.915 +			}
  13.916 +			if (len <= offset) {
  13.917 +				put_page(skb_shinfo(skb)->frags[i].page);
  13.918 +				skb_shinfo(skb)->nr_frags--;
  13.919 +			} else {
  13.920 +				skb_shinfo(skb)->frags[i].size = len-offset;
  13.921 +			}
  13.922 +		}
  13.923 +		offset = end;
  13.924 +	}
  13.925 +
  13.926 +	if (offset < len) {
  13.927 +		skb->data_len -= skb->len - len;
  13.928 +		skb->len = len;
  13.929 +	} else {
  13.930 +		if (len <= skb_headlen(skb)) {
  13.931 +			skb->len = len;
  13.932 +			skb->data_len = 0;
  13.933 +			skb->tail = skb->data + len;
  13.934 +			if (skb_shinfo(skb)->frag_list && !skb_cloned(skb))
  13.935 +				skb_drop_fraglist(skb);
  13.936 +		} else {
  13.937 +			skb->data_len -= skb->len - len;
  13.938 +			skb->len = len;
  13.939 +		}
  13.940 +	}
  13.941 +
  13.942 +	return 0;
  13.943 +}
  13.944 +
  13.945 +/**
  13.946 + *	__pskb_pull_tail - advance tail of skb header 
  13.947 + *	@skb: buffer to reallocate
  13.948 + *	@delta: number of bytes to advance tail
  13.949 + *
  13.950 + *	The function makes a sense only on a fragmented &sk_buff,
  13.951 + *	it expands header moving its tail forward and copying necessary
  13.952 + *	data from fragmented part.
  13.953 + *
  13.954 + *	&sk_buff MUST have reference count of 1.
  13.955 + *
  13.956 + *	Returns %NULL (and &sk_buff does not change) if pull failed
  13.957 + *	or value of new tail of skb in the case of success.
  13.958 + *
  13.959 + *	All the pointers pointing into skb header may change and must be
  13.960 + *	reloaded after call to this function.
  13.961 + */
  13.962 +
  13.963 +/* Moves tail of skb head forward, copying data from fragmented part,
  13.964 + * when it is necessary.
  13.965 + * 1. It may fail due to malloc failure.
  13.966 + * 2. It may change skb pointers.
  13.967 + *
  13.968 + * It is pretty complicated. Luckily, it is called only in exceptional cases.
  13.969 + */
  13.970 +unsigned char * __pskb_pull_tail(struct sk_buff *skb, int delta)
  13.971 +{
  13.972 +	int i, k, eat;
  13.973 +
  13.974 +	/* If skb has not enough free space at tail, get new one
  13.975 +	 * plus 128 bytes for future expansions. If we have enough
  13.976 +	 * room at tail, reallocate without expansion only if skb is cloned.
  13.977 +	 */
  13.978 +	eat = (skb->tail+delta) - skb->end;
  13.979 +
  13.980 +	if (eat > 0 || skb_cloned(skb)) {
  13.981 +		if (pskb_expand_head(skb, 0, eat>0 ? eat+128 : 0, GFP_ATOMIC))
  13.982 +			return NULL;
  13.983 +	}
  13.984 +
  13.985 +	if (skb_copy_bits(skb, skb_headlen(skb), skb->tail, delta))
  13.986 +		BUG();
  13.987 +
  13.988 +	/* Optimization: no fragments, no reasons to preestimate
  13.989 +	 * size of pulled pages. Superb.
  13.990 +	 */
  13.991 +	if (skb_shinfo(skb)->frag_list == NULL)
  13.992 +		goto pull_pages;
  13.993 +
  13.994 +	/* Estimate size of pulled pages. */
  13.995 +	eat = delta;
  13.996 +	for (i=0; i<skb_shinfo(skb)->nr_frags; i++) {
  13.997 +		if (skb_shinfo(skb)->frags[i].size >= eat)
  13.998 +			goto pull_pages;
  13.999 +		eat -= skb_shinfo(skb)->frags[i].size;
 13.1000 +	}
 13.1001 +
 13.1002 +	/* If we need update frag list, we are in troubles.
 13.1003 +	 * Certainly, it possible to add an offset to skb data,
 13.1004 +	 * but taking into account that pulling is expected to
 13.1005 +	 * be very rare operation, it is worth to fight against
 13.1006 +	 * further bloating skb head and crucify ourselves here instead.
 13.1007 +	 * Pure masohism, indeed. 8)8)
 13.1008 +	 */
 13.1009 +	if (eat) {
 13.1010 +		struct sk_buff *list = skb_shinfo(skb)->frag_list;
 13.1011 +		struct sk_buff *clone = NULL;
 13.1012 +		struct sk_buff *insp = NULL;
 13.1013 +
 13.1014 +		do {
 13.1015 +			if (list == NULL)
 13.1016 +				BUG();
 13.1017 +
 13.1018 +			if (list->len <= eat) {
 13.1019 +				/* Eaten as whole. */
 13.1020 +				eat -= list->len;
 13.1021 +				list = list->next;
 13.1022 +				insp = list;
 13.1023 +			} else {
 13.1024 +				/* Eaten partially. */
 13.1025 +
 13.1026 +				if (skb_shared(list)) {
 13.1027 +					/* Sucks! We need to fork list. :-( */
 13.1028 +					clone = skb_clone(list, GFP_ATOMIC);
 13.1029 +					if (clone == NULL)
 13.1030 +						return NULL;
 13.1031 +					insp = list->next;
 13.1032 +					list = clone;
 13.1033 +				} else {
 13.1034 +					/* This may be pulled without
 13.1035 +					 * problems. */
 13.1036 +					insp = list;
 13.1037 +				}
 13.1038 +				if (pskb_pull(list, eat) == NULL) {
 13.1039 +					if (clone)
 13.1040 +						kfree_skb(clone);
 13.1041 +					return NULL;
 13.1042 +				}
 13.1043 +				break;
 13.1044 +			}
 13.1045 +		} while (eat);
 13.1046 +
 13.1047 +		/* Free pulled out fragments. */
 13.1048 +		while ((list = skb_shinfo(skb)->frag_list) != insp) {
 13.1049 +			skb_shinfo(skb)->frag_list = list->next;
 13.1050 +			kfree_skb(list);
 13.1051 +		}
 13.1052 +		/* And insert new clone at head. */
 13.1053 +		if (clone) {
 13.1054 +			clone->next = list;
 13.1055 +			skb_shinfo(skb)->frag_list = clone;
 13.1056 +		}
 13.1057 +	}
 13.1058 +	/* Success! Now we may commit changes to skb data. */
 13.1059 +
 13.1060 +pull_pages:
 13.1061 +	eat = delta;
 13.1062 +	k = 0;
 13.1063 +	for (i=0; i<skb_shinfo(skb)->nr_frags; i++) {
 13.1064 +		if (skb_shinfo(skb)->frags[i].size <= eat) {
 13.1065 +			put_page(skb_shinfo(skb)->frags[i].page);
 13.1066 +			eat -= skb_shinfo(skb)->frags[i].size;
 13.1067 +		} else {
 13.1068 +			skb_shinfo(skb)->frags[k] = skb_shinfo(skb)->frags[i];
 13.1069 +			if (eat) {
 13.1070 +				skb_shinfo(skb)->frags[k].page_offset += eat;
 13.1071 +				skb_shinfo(skb)->frags[k].size -= eat;
 13.1072 +				eat = 0;
 13.1073 +			}
 13.1074 +			k++;
 13.1075 +		}
 13.1076 +	}
 13.1077 +	skb_shinfo(skb)->nr_frags = k;
 13.1078 +
 13.1079 +	skb->tail += delta;
 13.1080 +	skb->data_len -= delta;
 13.1081 +
 13.1082 +	return skb->tail;
 13.1083 +}
 13.1084 +
 13.1085 +/* Copy some data bits from skb to kernel buffer. */
 13.1086 +
 13.1087 +int skb_copy_bits(const struct sk_buff *skb, int offset, void *to, int len)
 13.1088 +{
 13.1089 +	int i, copy;
 13.1090 +	int start = skb->len - skb->data_len;
 13.1091 +
 13.1092 +	if (offset > (int)skb->len-len)
 13.1093 +		goto fault;
 13.1094 +
 13.1095 +	/* Copy header. */
 13.1096 +	if ((copy = start-offset) > 0) {
 13.1097 +		if (copy > len)
 13.1098 +			copy = len;
 13.1099 +		memcpy(to, skb->data + offset, copy);
 13.1100 +		if ((len -= copy) == 0)
 13.1101 +			return 0;
 13.1102 +		offset += copy;
 13.1103 +		to += copy;
 13.1104 +	}
 13.1105 +
 13.1106 +	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
 13.1107 +		int end;
 13.1108 +
 13.1109 +		BUG_TRAP(start <= offset+len);
 13.1110 +
 13.1111 +		end = start + skb_shinfo(skb)->frags[i].size;
 13.1112 +		if ((copy = end-offset) > 0) {
 13.1113 +			u8 *vaddr;
 13.1114 +
 13.1115 +			if (copy > len)
 13.1116 +				copy = len;
 13.1117 +
 13.1118 +			vaddr = kmap_skb_frag(&skb_shinfo(skb)->frags[i]);
 13.1119 +			memcpy(to, vaddr+skb_shinfo(skb)->frags[i].page_offset+
 13.1120 +			       offset-start, copy);
 13.1121 +			kunmap_skb_frag(vaddr);
 13.1122 +
 13.1123 +			if ((len -= copy) == 0)
 13.1124 +				return 0;
 13.1125 +			offset += copy;
 13.1126 +			to += copy;
 13.1127 +		}
 13.1128 +		start = end;
 13.1129 +	}
 13.1130 +
 13.1131 +	if (skb_shinfo(skb)->frag_list) {
 13.1132 +		struct sk_buff *list;
 13.1133 +
 13.1134 +		for (list = skb_shinfo(skb)->frag_list; list; list=list->next) {
 13.1135 +			int end;
 13.1136 +
 13.1137 +			BUG_TRAP(start <= offset+len);
 13.1138 +
 13.1139 +			end = start + list->len;
 13.1140 +			if ((copy = end-offset) > 0) {
 13.1141 +				if (copy > len)
 13.1142 +					copy = len;
 13.1143 +				if (skb_copy_bits(list, offset-start, to, copy))
 13.1144 +					goto fault;
 13.1145 +				if ((len -= copy) == 0)
 13.1146 +					return 0;
 13.1147 +				offset += copy;
 13.1148 +				to += copy;
 13.1149 +			}
 13.1150 +			start = end;
 13.1151 +		}
 13.1152 +	}
 13.1153 +	if (len == 0)
 13.1154 +		return 0;
 13.1155 +
 13.1156 +fault:
 13.1157 +	return -EFAULT;
 13.1158 +}
 13.1159 +
 13.1160 +/* Checksum skb data. */
 13.1161 +
 13.1162 +unsigned int skb_checksum(const struct sk_buff *skb, int offset, int len, unsigned int csum)
 13.1163 +{
 13.1164 +	int i, copy;
 13.1165 +	int start = skb->len - skb->data_len;
 13.1166 +	int pos = 0;
 13.1167 +
 13.1168 +	/* Checksum header. */
 13.1169 +	if ((copy = start-offset) > 0) {
 13.1170 +		if (copy > len)
 13.1171 +			copy = len;
 13.1172 +		csum = csum_partial(skb->data+offset, copy, csum);
 13.1173 +		if ((len -= copy) == 0)
 13.1174 +			return csum;
 13.1175 +		offset += copy;
 13.1176 +		pos = copy;
 13.1177 +	}
 13.1178 +
 13.1179 +	for (i=0; i<skb_shinfo(skb)->nr_frags; i++) {
 13.1180 +		int end;
 13.1181 +
 13.1182 +		BUG_TRAP(start <= offset+len);
 13.1183 +
 13.1184 +		end = start + skb_shinfo(skb)->frags[i].size;
 13.1185 +		if ((copy = end-offset) > 0) {
 13.1186 +			unsigned int csum2;
 13.1187 +			u8 *vaddr;
 13.1188 +			skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
 13.1189 +
 13.1190 +			if (copy > len)
 13.1191 +				copy = len;
 13.1192 +			vaddr = kmap_skb_frag(frag);
 13.1193 +			csum2 = csum_partial(vaddr + frag->page_offset +
 13.1194 +					     offset-start, copy, 0);
 13.1195 +			kunmap_skb_frag(vaddr);
 13.1196 +			csum = csum_block_add(csum, csum2, pos);
 13.1197 +			if (!(len -= copy))
 13.1198 +				return csum;
 13.1199 +			offset += copy;
 13.1200 +			pos += copy;
 13.1201 +		}
 13.1202 +		start = end;
 13.1203 +	}
 13.1204 +
 13.1205 +	if (skb_shinfo(skb)->frag_list) {
 13.1206 +		struct sk_buff *list;
 13.1207 +
 13.1208 +		for (list = skb_shinfo(skb)->frag_list; list; list=list->next) {
 13.1209 +			int end;
 13.1210 +
 13.1211 +			BUG_TRAP(start <= offset+len);
 13.1212 +
 13.1213 +			end = start + list->len;
 13.1214 +			if ((copy = end-offset) > 0) {
 13.1215 +				unsigned int csum2;
 13.1216 +				if (copy > len)
 13.1217 +					copy = len;
 13.1218 +				csum2 = skb_checksum(list, offset-start, copy, 0);
 13.1219 +				csum = csum_block_add(csum, csum2, pos);
 13.1220 +				if ((len -= copy) == 0)
 13.1221 +					return csum;
 13.1222 +				offset += copy;
 13.1223 +				pos += copy;
 13.1224 +			}
 13.1225 +			start = end;
 13.1226 +		}
 13.1227 +	}
 13.1228 +	if (len == 0)
 13.1229 +		return csum;
 13.1230 +
 13.1231 +	BUG();
 13.1232 +	return csum;
 13.1233 +}
 13.1234 +
 13.1235 +/* Both of above in one bottle. */
 13.1236 +
 13.1237 +unsigned int skb_copy_and_csum_bits(const struct sk_buff *skb, int offset, u8 *to, int len, unsigned int csum)
 13.1238 +{
 13.1239 +	int i, copy;
 13.1240 +	int start = skb->len - skb->data_len;
 13.1241 +	int pos = 0;
 13.1242 +
 13.1243 +	/* Copy header. */
 13.1244 +	if ((copy = start-offset) > 0) {
 13.1245 +		if (copy > len)
 13.1246 +			copy = len;
 13.1247 +		csum = csum_partial_copy_nocheck(skb->data+offset, to, copy, csum);
 13.1248 +		if ((len -= copy) == 0)
 13.1249 +			return csum;
 13.1250 +		offset += copy;
 13.1251 +		to += copy;
 13.1252 +		pos = copy;
 13.1253 +	}
 13.1254 +
 13.1255 +	for (i=0; i<skb_shinfo(skb)->nr_frags; i++) {
 13.1256 +		int end;
 13.1257 +
 13.1258 +		BUG_TRAP(start <= offset+len);
 13.1259 +
 13.1260 +		end = start + skb_shinfo(skb)->frags[i].size;
 13.1261 +		if ((copy = end-offset) > 0) {
 13.1262 +			unsigned int csum2;
 13.1263 +			u8 *vaddr;
 13.1264 +			skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
 13.1265 +
 13.1266 +			if (copy > len)
 13.1267 +				copy = len;
 13.1268 +			vaddr = kmap_skb_frag(frag);
 13.1269 +			csum2 = csum_partial_copy_nocheck(vaddr + frag->page_offset +
 13.1270 +						      offset-start, to, copy, 0);
 13.1271 +			kunmap_skb_frag(vaddr);
 13.1272 +			csum = csum_block_add(csum, csum2, pos);
 13.1273 +			if (!(len -= copy))
 13.1274 +				return csum;
 13.1275 +			offset += copy;
 13.1276 +			to += copy;
 13.1277 +			pos += copy;
 13.1278 +		}
 13.1279 +		start = end;
 13.1280 +	}
 13.1281 +
 13.1282 +	if (skb_shinfo(skb)->frag_list) {
 13.1283 +		struct sk_buff *list;
 13.1284 +
 13.1285 +		for (list = skb_shinfo(skb)->frag_list; list; list=list->next) {
 13.1286 +			unsigned int csum2;
 13.1287 +			int end;
 13.1288 +
 13.1289 +			BUG_TRAP(start <= offset+len);
 13.1290 +
 13.1291 +			end = start + list->len;
 13.1292 +			if ((copy = end-offset) > 0) {
 13.1293 +				if (copy > len)
 13.1294 +					copy = len;
 13.1295 +				csum2 = skb_copy_and_csum_bits(list, offset-start, to, copy, 0);
 13.1296 +				csum = csum_block_add(csum, csum2, pos);
 13.1297 +				if ((len -= copy) == 0)
 13.1298 +					return csum;
 13.1299 +				offset += copy;
 13.1300 +				to += copy;
 13.1301 +				pos += copy;
 13.1302 +			}
 13.1303 +			start = end;
 13.1304 +		}
 13.1305 +	}
 13.1306 +	if (len == 0)
 13.1307 +		return csum;
 13.1308 +
 13.1309 +	BUG();
 13.1310 +	return csum;
 13.1311 +}
 13.1312 +
 13.1313 +void skb_copy_and_csum_dev(const struct sk_buff *skb, u8 *to)
 13.1314 +{
 13.1315 +	unsigned int csum;
 13.1316 +	long csstart;
 13.1317 +
 13.1318 +	if (skb->ip_summed == CHECKSUM_HW)
 13.1319 +		csstart = skb->h.raw - skb->data;
 13.1320 +	else
 13.1321 +		csstart = skb->len - skb->data_len;
 13.1322 +
 13.1323 +	if (csstart > skb->len - skb->data_len)
 13.1324 +		BUG();
 13.1325 +
 13.1326 +	memcpy(to, skb->data, csstart);
 13.1327 +
 13.1328 +	csum = 0;
 13.1329 +	if (csstart != skb->len)
 13.1330 +		csum = skb_copy_and_csum_bits(skb, csstart, to+csstart,
 13.1331 +				skb->len-csstart, 0);
 13.1332 +
 13.1333 +	if (skb->ip_summed == CHECKSUM_HW) {
 13.1334 +		long csstuff = csstart + skb->csum;
 13.1335 +
 13.1336 +		*((unsigned short *)(to + csstuff)) = csum_fold(csum);
 13.1337 +	}
 13.1338 +}
 13.1339 +
 13.1340 +#if 0
 13.1341 +/* 
 13.1342 + * 	Tune the memory allocator for a new MTU size.
 13.1343 + */
 13.1344 +void skb_add_mtu(int mtu)
 13.1345 +{
 13.1346 +	/* Must match allocation in alloc_skb */
 13.1347 +	mtu = SKB_DATA_ALIGN(mtu) + sizeof(struct skb_shared_info);
 13.1348 +
 13.1349 +	kmem_add_cache_size(mtu);
 13.1350 +}
 13.1351 +#endif
 13.1352 +
 13.1353 +void __init skb_init(void)
 13.1354 +{
 13.1355 +	int i;
 13.1356 +
 13.1357 +	skbuff_head_cache = kmem_cache_create("skbuff_head_cache",
 13.1358 +					      sizeof(struct sk_buff),
 13.1359 +					      0,
 13.1360 +					      SLAB_HWCACHE_ALIGN,
 13.1361 +					      skb_headerinit, NULL);
 13.1362 +	if (!skbuff_head_cache)
 13.1363 +		panic("cannot create skbuff cache");
 13.1364 +
 13.1365 +        init_net_pages(NUM_NET_PAGES);
 13.1366 +
 13.1367 +	for (i=0; i<NR_CPUS; i++)
 13.1368 +		skb_queue_head_init(&skb_head_pool[i].list);
 13.1369 +}