ia64/xen-unstable

changeset 122:b1b1608f5d5c

bitkeeper revision 1.22.1.8 (3e4117feir_tT1ncjYWsGFnRPS64sg)

fix conflicts between VM and net updates.
author akw27@boulderdash.cl.cam.ac.uk
date Wed Feb 05 13:56:14 2003 +0000 (2003-02-05)
parents 658b3aeca0e5 82679de8a1ca
children 3549eb0ec2db
files .rootkeys BitKeeper/etc/logging_ok xen-2.4.16/common/domain.c xen-2.4.16/common/event.c xen-2.4.16/common/memory.c xen-2.4.16/common/network.c xen-2.4.16/drivers/net/tulip/interrupt.c xen-2.4.16/include/asm-i386/flushtlb.h xen-2.4.16/include/asm-i386/page.h xen-2.4.16/include/asm-i386/pci.h xen-2.4.16/include/hypervisor-ifs/network.h xen-2.4.16/include/xeno/mm.h xen-2.4.16/include/xeno/skbuff.h xen-2.4.16/include/xeno/vif.h xen-2.4.16/net/dev.c xen-2.4.16/net/eth.c xen-2.4.16/net/skbuff.c xenolinux-2.4.16-sparse/arch/xeno/drivers/network/network.c xenolinux-2.4.16-sparse/include/asm-xeno/io.h xenolinux-2.4.16-sparse/include/linux/skbuff.h xenolinux-2.4.16-sparse/net/core/skbuff.c
line diff
     1.1 --- a/.rootkeys	Tue Feb 04 22:08:19 2003 +0000
     1.2 +++ b/.rootkeys	Wed Feb 05 13:56:14 2003 +0000
     1.3 @@ -135,6 +135,7 @@ 3ddb79c34BFiXjBJ_cCKB0aCsV1IDw xen-2.4.1
     1.4  3e20b82fl1jmQiKdLy7fxMcutfpjWA xen-2.4.16/include/asm-i386/domain_page.h
     1.5  3ddb79c2O729EttZTYu1c8LcsUO_GQ xen-2.4.16/include/asm-i386/elf.h
     1.6  3ddb79c3NU8Zy40OTrq3D-i30Y3t4A xen-2.4.16/include/asm-i386/fixmap.h
     1.7 +3e2d29944GI24gf7vOP_7x8EyuqxeA xen-2.4.16/include/asm-i386/flushtlb.h
     1.8  3ddb79c39o75zPP0T1aQQ4mNrCAN2w xen-2.4.16/include/asm-i386/hardirq.h
     1.9  3ddb79c3BFEIwXR4IsWbwp4BoL4DkA xen-2.4.16/include/asm-i386/hdreg.h
    1.10  3ddb79c3TMDjkxVndKFKnGiwY0HzDg xen-2.4.16/include/asm-i386/i387.h
    1.11 @@ -401,9 +402,11 @@ 3ddb79bb3cMSs_k2X5Oq2hOIBvmPYA xenolinux
    1.12  3ddb79ba2qYtIQAT_-vCFkkZUXu_UQ xenolinux-2.4.16-sparse/include/asm-xeno/user.h
    1.13  3ddb79bbqhb9X9qWOz5Bv4wOzrkITg xenolinux-2.4.16-sparse/include/asm-xeno/vga.h
    1.14  3ddb79bbA52x94o6uwDYsbzrH2hjzA xenolinux-2.4.16-sparse/include/asm-xeno/xor.h
    1.15 +3e37c39fVCSGQENtY6g7muaq_THliw xenolinux-2.4.16-sparse/include/linux/skbuff.h
    1.16  3ddb79bb_7YG4U75ZmEic9YXWTW7Vw xenolinux-2.4.16-sparse/include/linux/sunrpc/debug.h
    1.17  3ddb79bcxkVPfWlZ1PQKvDrfArzOVw xenolinux-2.4.16-sparse/kernel/panic.c
    1.18  3ddb79bbP31im-mx2NbfthSeqty1Dg xenolinux-2.4.16-sparse/mk
    1.19  3e15d52e0_j129JPvo7xfYGndVFpwQ xenolinux-2.4.16-sparse/mm/memory.c
    1.20  3e15d535DLvpzTrLRUIerB69LpJD1g xenolinux-2.4.16-sparse/mm/mremap.c
    1.21  3e15d531m1Y1_W8ki64AFOU_ua4C4w xenolinux-2.4.16-sparse/mm/swapfile.c
    1.22 +3e37c312QFuzIxXsuAgO6IRt3Tp96Q xenolinux-2.4.16-sparse/net/core/skbuff.c
     2.1 --- a/BitKeeper/etc/logging_ok	Tue Feb 04 22:08:19 2003 +0000
     2.2 +++ b/BitKeeper/etc/logging_ok	Wed Feb 05 13:56:14 2003 +0000
     2.3 @@ -1,5 +1,6 @@
     2.4  akw27@boulderdash.cl.cam.ac.uk
     2.5  akw27@labyrinth.cl.cam.ac.uk
     2.6 +akw27@plucky.localdomain
     2.7  bd240@boulderdash.cl.cam.ac.uk
     2.8  iap10@labyrinth.cl.cam.ac.uk
     2.9  kaf24@labyrinth.cl.cam.ac.uk
     3.1 --- a/xen-2.4.16/common/domain.c	Tue Feb 04 22:08:19 2003 +0000
     3.2 +++ b/xen-2.4.16/common/domain.c	Wed Feb 05 13:56:14 2003 +0000
     3.3 @@ -11,6 +11,7 @@
     3.4  #include <xeno/dom0_ops.h>
     3.5  #include <asm/io.h>
     3.6  #include <asm/domain_page.h>
     3.7 +#include <asm/flushtlb.h>
     3.8  #include <asm/msr.h>
     3.9  #include <xeno/multiboot.h>
    3.10  
    3.11 @@ -353,10 +354,13 @@ unsigned int alloc_new_dom_mem(struct ta
    3.12      struct pfn_info *pf, *pf_head;
    3.13      unsigned int alloc_pfns;
    3.14      unsigned int req_pages;
    3.15 +    unsigned long flags;
    3.16  
    3.17      /* how many pages do we need to alloc? */
    3.18      req_pages = kbytes >> (PAGE_SHIFT - 10);
    3.19  
    3.20 +    spin_lock_irqsave(&free_list_lock, flags);
    3.21 +    
    3.22      /* is there enough mem to serve the request? */   
    3.23      if(req_pages > free_pfns)
    3.24          return -1;
    3.25 @@ -387,6 +391,8 @@ unsigned int alloc_new_dom_mem(struct ta
    3.26  
    3.27          free_pfns--;
    3.28      }
    3.29 +   
    3.30 +    spin_unlock_irqrestore(&free_list_lock, flags);
    3.31      
    3.32      p->tot_pages = req_pages;
    3.33  
    3.34 @@ -544,6 +550,7 @@ static unsigned long alloc_page_from_dom
    3.35   */
    3.36  int setup_guestos(struct task_struct *p, dom0_newdomain_t *params)
    3.37  {
    3.38 +
    3.39      struct list_head *list_ent;
    3.40      char *src, *dst;
    3.41      int i, dom = p->domain;
    3.42 @@ -704,8 +711,7 @@ int setup_guestos(struct task_struct *p,
    3.43  
    3.44      /* Install the new page tables. */
    3.45      __cli();
    3.46 -    __asm__ __volatile__ (
    3.47 -        "mov %%eax,%%cr3" : : "a" (pagetable_val(p->mm.pagetable)));
    3.48 +    __write_cr3_counted(pagetable_val(p->mm.pagetable));
    3.49  
    3.50      /* Copy the guest OS image. */
    3.51      src = (char *)__va(mod[0].mod_start + 12);
    3.52 @@ -777,8 +783,7 @@ int setup_guestos(struct task_struct *p,
    3.53      }
    3.54  
    3.55      /* Reinstate the caller's page tables. */
    3.56 -    __asm__ __volatile__ (
    3.57 -        "mov %%eax,%%cr3" : : "a" (pagetable_val(current->mm.pagetable)));    
    3.58 +    __write_cr3_counted(pagetable_val(current->mm.pagetable));
    3.59      __sti();
    3.60  
    3.61      new_thread(p, 
     4.1 --- a/xen-2.4.16/common/event.c	Tue Feb 04 22:08:19 2003 +0000
     4.2 +++ b/xen-2.4.16/common/event.c	Wed Feb 05 13:56:14 2003 +0000
     4.3 @@ -14,13 +14,13 @@
     4.4  typedef void (*hyp_event_callback_fn_t)(void);
     4.5  
     4.6  extern void schedule(void);
     4.7 -extern void flush_rx_queue(void);
     4.8 +extern void update_shared_ring(void);
     4.9  
    4.10  /* Ordering must match definitions of _HYP_EVENT_* in xeno/sched.h */
    4.11  static hyp_event_callback_fn_t event_call_fn[] = 
    4.12  {
    4.13      schedule,
    4.14 -    flush_rx_queue,
    4.15 +    update_shared_ring,
    4.16      kill_domain
    4.17  };
    4.18  
     5.1 --- a/xen-2.4.16/common/memory.c	Tue Feb 04 22:08:19 2003 +0000
     5.2 +++ b/xen-2.4.16/common/memory.c	Wed Feb 05 13:56:14 2003 +0000
     5.3 @@ -171,6 +171,7 @@
     5.4  #include <xeno/sched.h>
     5.5  #include <xeno/errno.h>
     5.6  #include <asm/page.h>
     5.7 +#include <asm/flushtlb.h>
     5.8  #include <asm/io.h>
     5.9  #include <asm/uaccess.h>
    5.10  #include <asm/domain_page.h>
    5.11 @@ -205,6 +206,7 @@ unsigned long frame_table_size;
    5.12  unsigned long max_page;
    5.13  
    5.14  struct list_head free_list;
    5.15 +spinlock_t free_list_lock = SPIN_LOCK_UNLOCKED;
    5.16  unsigned int free_pfns;
    5.17  
    5.18  static int tlb_flush[NR_CPUS];
    5.19 @@ -218,6 +220,7 @@ void __init init_frametable(unsigned lon
    5.20  {
    5.21      struct pfn_info *pf;
    5.22      unsigned long page_index;
    5.23 +    unsigned long flags;
    5.24  
    5.25      memset(tlb_flush, 0, sizeof(tlb_flush));
    5.26  
    5.27 @@ -231,6 +234,7 @@ void __init init_frametable(unsigned lon
    5.28          ((__pa(frame_table) + frame_table_size) >> PAGE_SHIFT);
    5.29  
    5.30      /* Put all domain-allocatable memory on a free list. */
    5.31 +    spin_lock_irqsave(&free_list_lock, flags);
    5.32      INIT_LIST_HEAD(&free_list);
    5.33      for( page_index = (__pa(frame_table) + frame_table_size) >> PAGE_SHIFT; 
    5.34           page_index < nr_pages; 
    5.35 @@ -239,6 +243,7 @@ void __init init_frametable(unsigned lon
    5.36          pf = list_entry(&frame_table[page_index].list, struct pfn_info, list);
    5.37          list_add_tail(&pf->list, &free_list);
    5.38      }
    5.39 +    spin_unlock_irqrestore(&free_list_lock, flags);
    5.40  }
    5.41  
    5.42  
    5.43 @@ -697,7 +702,6 @@ static int do_extended_command(unsigned 
    5.44      return err;
    5.45  }
    5.46  
    5.47 -
    5.48  int do_process_page_updates(page_update_request_t *ureqs, int count)
    5.49  {
    5.50      page_update_request_t req;
    5.51 @@ -807,11 +811,10 @@ int do_process_page_updates(page_update_
    5.52      if ( tlb_flush[smp_processor_id()] )
    5.53      {
    5.54          tlb_flush[smp_processor_id()] = 0;
    5.55 -        __asm__ __volatile__ (
    5.56 -            "movl %%eax,%%cr3" : : 
    5.57 -            "a" (pagetable_val(current->mm.pagetable)));
    5.58 +        __write_cr3_counted(pagetable_val(current->mm.pagetable));
    5.59  
    5.60      }
    5.61  
    5.62      return(0);
    5.63  }
    5.64 +
     6.1 --- a/xen-2.4.16/common/network.c	Tue Feb 04 22:08:19 2003 +0000
     6.2 +++ b/xen-2.4.16/common/network.c	Wed Feb 05 13:56:14 2003 +0000
     6.3 @@ -49,6 +49,7 @@ net_vif_t *create_net_vif(int domain)
     6.4  {
     6.5      net_vif_t *new_vif;
     6.6      net_ring_t *new_ring;
     6.7 +    net_shadow_ring_t *shadow_ring;
     6.8      struct task_struct *dom_task;
     6.9      
    6.10      if ( !(dom_task = find_domain_by_id(domain)) ) 
    6.11 @@ -64,7 +65,27 @@ net_vif_t *create_net_vif(int domain)
    6.12      new_ring = dom_task->net_ring_base + dom_task->num_net_vifs;
    6.13      memset(new_ring, 0, sizeof(net_ring_t));
    6.14  
    6.15 +    // allocate the shadow ring.  
    6.16 +    // maybe these should be kmem_cache instead of kmalloc?
    6.17 +    
    6.18 +    shadow_ring = kmalloc(sizeof(net_shadow_ring_t), GFP_KERNEL);
    6.19 +    if (shadow_ring == NULL) goto fail;
    6.20 +    
    6.21 +    shadow_ring->tx_ring = kmalloc(TX_RING_SIZE 
    6.22 +                    * sizeof(tx_shadow_entry_t), GFP_KERNEL);
    6.23 +    shadow_ring->rx_ring = kmalloc(RX_RING_SIZE
    6.24 +                    * sizeof(rx_shadow_entry_t), GFP_KERNEL);
    6.25 +    if ((shadow_ring->tx_ring == NULL) || (shadow_ring->rx_ring == NULL))
    6.26 +            goto fail;
    6.27 +
    6.28 +    shadow_ring->rx_prod = shadow_ring->rx_cons = shadow_ring->rx_idx = 0;
    6.29 +    
    6.30 +    // fill in the new vif struct.
    6.31 +    
    6.32      new_vif->net_ring = new_ring;
    6.33 +    new_vif->shadow_ring = shadow_ring;
    6.34 +    
    6.35 +                    
    6.36      skb_queue_head_init(&new_vif->skb_list);
    6.37      new_vif->domain = domain;
    6.38      
    6.39 @@ -77,6 +98,10 @@ net_vif_t *create_net_vif(int domain)
    6.40      dom_task->num_net_vifs++;
    6.41      
    6.42      return new_vif;
    6.43 +    
    6.44 +fail:
    6.45 +    printk("VIF allocation failed!\n");
    6.46 +    return NULL;
    6.47  }
    6.48  
    6.49  /* delete_net_vif - Delete the last vif in the given domain. 
    6.50 @@ -101,7 +126,10 @@ void destroy_net_vif(struct task_struct 
    6.51      write_lock(&sys_vif_lock);
    6.52      sys_vif_list[p->net_vif_list[i]->id] = NULL; // system vif list not gc'ed
    6.53      write_unlock(&sys_vif_lock);        
    6.54 -    
    6.55 +   
    6.56 +    kfree(p->net_vif_list[i]->shadow_ring->tx_ring);
    6.57 +    kfree(p->net_vif_list[i]->shadow_ring->rx_ring);
    6.58 +    kfree(p->net_vif_list[i]->shadow_ring);
    6.59      kmem_cache_free(net_vif_cache, p->net_vif_list[i]);
    6.60  }
    6.61  
     7.1 --- a/xen-2.4.16/drivers/net/tulip/interrupt.c	Tue Feb 04 22:08:19 2003 +0000
     7.2 +++ b/xen-2.4.16/drivers/net/tulip/interrupt.c	Wed Feb 05 13:56:14 2003 +0000
     7.3 @@ -170,8 +170,9 @@ static int tulip_rx(struct net_device *d
     7.4  #endif
     7.5  			/* Check if the packet is long enough to accept without copying
     7.6  			   to a minimally-sized skbuff. */
     7.7 -			if (pkt_len < tulip_rx_copybreak
     7.8 -				&& (skb = dev_alloc_skb(pkt_len + 2)) != NULL) {
     7.9 +			//if (pkt_len < tulip_rx_copybreak
    7.10 +			//	&& (skb = dev_alloc_skb(pkt_len + 2)) != NULL) {
    7.11 +                        if (0) {
    7.12  				skb->dev = dev;
    7.13  				skb_reserve(skb, 2);	/* 16 byte align the IP header */
    7.14  				pci_dma_sync_single(tp->pdev,
     8.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     8.2 +++ b/xen-2.4.16/include/asm-i386/flushtlb.h	Wed Feb 05 13:56:14 2003 +0000
     8.3 @@ -0,0 +1,48 @@
     8.4 +/******************************************************************************
     8.5 + * flushtlb.h
     8.6 + * 
     8.7 + * TLB flush macros that count flushes.  Counting is used to enforce 
     8.8 + * zero-copy safety, particularily for the network code.
     8.9 + *
    8.10 + * akw - Jan 21, 2003
    8.11 + */
    8.12 +
    8.13 +#ifndef __FLUSHTLB_H
    8.14 +#define __FLUSHTLB_H
    8.15 +
    8.16 +#include <xeno/smp.h>
    8.17 +
    8.18 +unsigned long tlb_flush_count[NR_CPUS];
    8.19 +//#if 0 
    8.20 +#define __read_cr3(__var)                                               \
    8.21 +    do {                                                                \
    8.22 +                __asm__ __volatile (                                    \
    8.23 +                        "movl %%cr3, %0;"                               \
    8.24 +                        : "=r" (__var));                                \
    8.25 +    } while (0)
    8.26 +//#endif
    8.27 +
    8.28 +#define __write_cr3_counted(__pa)                                       \
    8.29 +    do {                                                                \
    8.30 +                __asm__ __volatile__ (                                  \
    8.31 +                        "movl %0, %%cr3;"                               \
    8.32 +                        :: "r" (__pa)                                    \
    8.33 +                        : "memory");                                    \
    8.34 +                tlb_flush_count[smp_processor_id()]++;                  \
    8.35 +    } while (0)
    8.36 +
    8.37 +//#endif
    8.38 +#define __flush_tlb_counted()                                           \
    8.39 +        do {                                                            \
    8.40 +                unsigned int tmpreg;                                    \
    8.41 +                                                                        \
    8.42 +                __asm__ __volatile__(                                   \
    8.43 +                        "movl %%cr3, %0;  # flush TLB \n"               \
    8.44 +                        "movl %0, %%cr3;                "               \
    8.45 +                        : "=r" (tmpreg)                                \
    8.46 +                        :: "memory");                                   \
    8.47 +                tlb_flush_count[smp_processor_id()]++;                  \
    8.48 +        } while (0)
    8.49 +
    8.50 +#endif
    8.51 +                           
     9.1 --- a/xen-2.4.16/include/asm-i386/page.h	Tue Feb 04 22:08:19 2003 +0000
     9.2 +++ b/xen-2.4.16/include/asm-i386/page.h	Wed Feb 05 13:56:14 2003 +0000
     9.3 @@ -91,36 +91,36 @@ typedef struct { unsigned long pt_lo; } 
     9.4  #include <asm/processor.h>
     9.5  #include <asm/fixmap.h>
     9.6  #include <asm/bitops.h>
     9.7 +#include <asm/flushtlb.h>
     9.8  
     9.9  extern l2_pgentry_t idle0_pg_table[ENTRIES_PER_L2_PAGETABLE];
    9.10  extern l2_pgentry_t *idle_pg_table[NR_CPUS];
    9.11  extern void paging_init(void);
    9.12  
    9.13 -#define __flush_tlb()							\
    9.14 -	do {								\
    9.15 -		unsigned int tmpreg;					\
    9.16 -									\
    9.17 -		__asm__ __volatile__(					\
    9.18 -			"movl %%cr3, %0;  # flush TLB \n"		\
    9.19 -			"movl %0, %%cr3;              \n"		\
    9.20 -			: "=r" (tmpreg)					\
    9.21 -			:: "memory");					\
    9.22 -	} while (0)
    9.23 +#define __flush_tlb() __flush_tlb_counted()
    9.24  
    9.25  /* Flush global pages as well. */
    9.26 +
    9.27 +#define __pge_off()                                                     \
    9.28 +        do {                                                            \
    9.29 +                __asm__ __volatile__(                                   \
    9.30 +                        "movl %0, %%cr4;  # turn off PGE     "          \
    9.31 +                        :: "r" (mmu_cr4_features & ~X86_CR4_PGE));      \
    9.32 +        } while (0)
    9.33 +
    9.34 +#define __pge_on()                                                      \
    9.35 +        do {                                                            \
    9.36 +                __asm__ __volatile__(                                   \
    9.37 +                        "movl %0, %%cr4;  # turn off PGE     "          \
    9.38 +                        :: "r" (mmu_cr4_features));                     \
    9.39 +        } while (0)
    9.40 +
    9.41 +
    9.42  #define __flush_tlb_all()						\
    9.43  	do {								\
    9.44 -		unsigned int tmpreg;					\
    9.45 -									\
    9.46 -		__asm__ __volatile__(					\
    9.47 -			"movl %1, %%cr4;  # turn off PGE     \n"	\
    9.48 -			"movl %%cr3, %0;  # flush TLB        \n"	\
    9.49 -			"movl %0, %%cr3;                     \n"	\
    9.50 -			"movl %2, %%cr4;  # turn PGE back on \n"	\
    9.51 -			: "=&r" (tmpreg)				\
    9.52 -			: "r" (mmu_cr4_features & ~X86_CR4_PGE),	\
    9.53 -			  "r" (mmu_cr4_features)			\
    9.54 -			: "memory");					\
    9.55 +                __pge_off();                                            \
    9.56 +		__flush_tlb_counted();					\
    9.57 +                __pge_on();                                             \
    9.58  	} while (0)
    9.59  
    9.60  #define __flush_tlb_one(__addr) \
    10.1 --- a/xen-2.4.16/include/asm-i386/pci.h	Tue Feb 04 22:08:19 2003 +0000
    10.2 +++ b/xen-2.4.16/include/asm-i386/pci.h	Wed Feb 05 13:56:14 2003 +0000
    10.3 @@ -75,7 +75,19 @@ static inline dma_addr_t pci_map_single(
    10.4  	if (direction == PCI_DMA_NONE)
    10.5  		BUG();
    10.6  	flush_write_buffers();
    10.7 -	return virt_to_bus(ptr);
    10.8 +
    10.9 +        if ((unsigned long) ptr > PAGE_OFFSET)
   10.10 +	    return virt_to_bus(ptr);
   10.11 +
   10.12 +        /* If an address that is not in hypervisor VM is passed to this 
   10.13 +         * function (ie > PAGE_OFFSET) we assume that the passer knows 
   10.14 +         * what they are doing, and have passed a physical address that 
   10.15 +         * should not be converted here.  This is a little hackish, but 
   10.16 +         * is being added to allow references to domain memory in order 
   10.17 +         * to support zero-copy network code.
   10.18 +         */
   10.19 +        
   10.20 +        return (dma_addr_t) ptr;
   10.21  }
   10.22  
   10.23  /* Unmap a single streaming mode DMA translation.  The dma_addr and size
    11.1 --- a/xen-2.4.16/include/hypervisor-ifs/network.h	Tue Feb 04 22:08:19 2003 +0000
    11.2 +++ b/xen-2.4.16/include/hypervisor-ifs/network.h	Wed Feb 05 13:56:14 2003 +0000
    11.3 @@ -15,17 +15,19 @@
    11.4  #include <linux/types.h>
    11.5  
    11.6  typedef struct tx_entry_st {
    11.7 -	unsigned long addr; /* virtual address */
    11.8 -	unsigned long size; /* in bytes */
    11.9 +	unsigned long addr;   /* virtual address */
   11.10 +	unsigned long size;   /* in bytes */
   11.11 +        int           status; /* per descriptor status. */
   11.12  } tx_entry_t;
   11.13  
   11.14  typedef struct rx_entry_st {
   11.15 -	unsigned long addr; /* virtual address */
   11.16 -	unsigned long size; /* in bytes */
   11.17 +	unsigned long addr;   /* virtual address */
   11.18 +	unsigned long size;   /* in bytes */
   11.19 +        int           status; /* per descriptor status. */
   11.20  } rx_entry_t;
   11.21  
   11.22 -#define TX_RING_SIZE 1024
   11.23 -#define RX_RING_SIZE 1024
   11.24 +#define TX_RING_SIZE 256
   11.25 +#define RX_RING_SIZE 256
   11.26  typedef struct net_ring_st {
   11.27      /*
   11.28       * Guest OS places packets into ring at tx_prod.
   11.29 @@ -111,4 +113,12 @@ typedef struct net_rule_ent_st
   11.30  /* Drop a new rule down to the network tables. */
   11.31  int add_net_rule(net_rule_t *rule);
   11.32  
   11.33 +
   11.34 +/* Descriptor status values:
   11.35 + */
   11.36 +
   11.37 +#define RING_STATUS_OK               0  // Everything is gravy.
   11.38 +#define RING_STATUS_ERR_CFU         -1  // Copy from user problems.
   11.39 +#define RING_STATUS_BAD_PAGE        -2  // What they gave us was pure evil.
   11.40 +
   11.41  #endif
    12.1 --- a/xen-2.4.16/include/xeno/mm.h	Tue Feb 04 22:08:19 2003 +0000
    12.2 +++ b/xen-2.4.16/include/xeno/mm.h	Wed Feb 05 13:56:14 2003 +0000
    12.3 @@ -7,6 +7,7 @@
    12.4  #include <asm/desc.h>
    12.5  #include <xeno/list.h>
    12.6  #include <hypervisor-ifs/hypervisor-if.h>
    12.7 +#include <xeno/spinlock.h>
    12.8  
    12.9  /* XXX KAF: These may die eventually, but so many refs in slab.c :((( */
   12.10  
   12.11 @@ -108,6 +109,7 @@ typedef struct pfn_info {
   12.12  extern frame_table_t * frame_table;
   12.13  extern unsigned long frame_table_size;
   12.14  extern struct list_head free_list;
   12.15 +extern spinlock_t free_list_lock;
   12.16  extern unsigned int free_pfns;
   12.17  extern unsigned long max_page;
   12.18  void init_frametable(unsigned long nr_pages);
    13.1 --- a/xen-2.4.16/include/xeno/skbuff.h	Tue Feb 04 22:08:19 2003 +0000
    13.2 +++ b/xen-2.4.16/include/xeno/skbuff.h	Wed Feb 05 13:56:14 2003 +0000
    13.3 @@ -34,6 +34,10 @@
    13.4  #define VIF_DROP                -3
    13.5  #define VIF_ANY_INTERFACE       -4
    13.6  
    13.7 +//skb_type values:
    13.8 +#define SKB_NORMAL               0
    13.9 +#define SKB_ZERO_COPY            1
   13.10 +
   13.11  #define HAVE_ALLOC_SKB		/* For the drivers to know */
   13.12  #define HAVE_ALIGNABLE_SKB	/* Ditto 8)		   */
   13.13  #define SLAB_SKB 		/* Slabified skbuffs 	   */
   13.14 @@ -187,7 +191,7 @@ struct sk_buff {
   13.15   	unsigned int 	data_len;
   13.16  	unsigned int	csum;			/* Checksum 					*/
   13.17  	unsigned char 	__unused,		/* Dead field, may be reused			*/
   13.18 -			cloned, 		/* head may be cloned (check refcnt to be sure). */
   13.19 +			cloned, 		/* head may be cloned (check refcnt to be sure) */
   13.20    			pkt_type,		/* Packet class					*/
   13.21    			ip_summed;		/* Driver fed us an IP checksum			*/
   13.22  	__u32		priority;		/* Packet queueing priority			*/
   13.23 @@ -203,8 +207,12 @@ struct sk_buff {
   13.24  
   13.25  	void 		(*destructor)(struct sk_buff *);	/* Destruct function		*/
   13.26  
   13.27 -        int src_vif;                            /* vif we came from */
   13.28 -        int dst_vif;                            /* vif we are bound for */
   13.29 +        unsigned int    skb_type;               /* SKB_NORMAL or SKB_ZERO_COPY                  */
   13.30 +        struct pfn_info *pf;                    /* record of physical pf address for freeing    */
   13.31 +        int src_vif;                            /* vif we came from                             */
   13.32 +        int dst_vif;                            /* vif we are bound for                         */
   13.33 +        struct skb_shared_info shinfo;          /* shared info is no longer shared in Xen.      */
   13.34 +        
   13.35  
   13.36                  
   13.37          
   13.38 @@ -244,6 +252,7 @@ struct sk_buff {
   13.39  
   13.40  extern void			__kfree_skb(struct sk_buff *skb);
   13.41  extern struct sk_buff *		alloc_skb(unsigned int size, int priority);
   13.42 +extern struct sk_buff *         alloc_zc_skb(unsigned int size, int priority);
   13.43  extern void			kfree_skbmem(struct sk_buff *skb);
   13.44  extern struct sk_buff *		skb_clone(struct sk_buff *skb, int priority);
   13.45  extern struct sk_buff *		skb_copy(const struct sk_buff *skb, int priority);
   13.46 @@ -259,7 +268,8 @@ extern void	skb_over_panic(struct sk_buf
   13.47  extern void	skb_under_panic(struct sk_buff *skb, int len, void *here);
   13.48  
   13.49  /* Internal */
   13.50 -#define skb_shinfo(SKB)		((struct skb_shared_info *)((SKB)->end))
   13.51 +//#define skb_shinfo(SKB)		((struct skb_shared_info *)((SKB)->end))
   13.52 +#define skb_shinfo(SKB)     ((struct skb_shared_info *)(&(SKB)->shinfo))
   13.53  
   13.54  /**
   13.55   *	skb_queue_empty - check if a queue is empty
   13.56 @@ -1045,7 +1055,8 @@ static inline struct sk_buff *__dev_allo
   13.57  {
   13.58  	struct sk_buff *skb;
   13.59  
   13.60 -	skb = alloc_skb(length+16, gfp_mask);
   13.61 +	//skb = alloc_skb(length+16, gfp_mask);
   13.62 +        skb = alloc_zc_skb(length+16, gfp_mask);
   13.63  	if (skb)
   13.64  		skb_reserve(skb,16);
   13.65  	return skb;
    14.1 --- a/xen-2.4.16/include/xeno/vif.h	Tue Feb 04 22:08:19 2003 +0000
    14.2 +++ b/xen-2.4.16/include/xeno/vif.h	Wed Feb 05 13:56:14 2003 +0000
    14.3 @@ -18,9 +18,37 @@
    14.4  #include <hypervisor-ifs/network.h>
    14.5  #include <xeno/skbuff.h>
    14.6  
    14.7 +/* 
    14.8 + * shadow ring structures are used to protect the descriptors from
    14.9 + * tampering after they have been passed to the hypervisor.
   14.10 + *
   14.11 + * TX_RING_SIZE and RX_RING_SIZE are defined in the shared network.h.
   14.12 + */
   14.13 +
   14.14 +typedef struct tx_shadow_entry_st {
   14.15 +    unsigned long addr;
   14.16 +    unsigned long size;
   14.17 +    int           status;
   14.18 +    unsigned long flush_count;
   14.19 +} tx_shadow_entry_t;
   14.20 +
   14.21 +typedef struct rx_shadow_entry_st {
   14.22 +    unsigned long addr;
   14.23 +    unsigned long size;
   14.24 +    int           status;
   14.25 +    unsigned long flush_count;
   14.26 +} rx_shadow_entry_t;
   14.27 +
   14.28 +typedef struct net_shadow_ring_st {
   14.29 +    tx_shadow_entry_t *tx_ring;
   14.30 +    rx_shadow_entry_t *rx_ring;
   14.31 +    unsigned int rx_prod, rx_cons, rx_idx;
   14.32 +} net_shadow_ring_t;
   14.33 +
   14.34  typedef struct net_vif_st {
   14.35 -    net_ring_t  *net_ring;
   14.36 -    int          id;
   14.37 +    net_ring_t          *net_ring;
   14.38 +    net_shadow_ring_t   *shadow_ring;
   14.39 +    int                 id;
   14.40      struct sk_buff_head skb_list;
   14.41      unsigned int domain;
   14.42      // rules table goes here in next revision.
   14.43 @@ -40,3 +68,8 @@ void destroy_net_vif(struct task_struct 
   14.44  void add_default_net_rule(int vif_id, u32 ipaddr);
   14.45  int net_get_target_vif(struct sk_buff *skb);
   14.46  void add_default_net_rule(int vif_id, u32 ipaddr);
   14.47 +
   14.48 +/* status fields per-descriptor:
   14.49 + */
   14.50 +
   14.51 +
    15.1 --- a/xen-2.4.16/net/dev.c	Tue Feb 04 22:08:19 2003 +0000
    15.2 +++ b/xen-2.4.16/net/dev.c	Wed Feb 05 13:56:14 2003 +0000
    15.3 @@ -30,6 +30,8 @@
    15.4  #include <linux/pkt_sched.h>
    15.5  
    15.6  #include <linux/event.h>
    15.7 +#include <asm/domain_page.h>
    15.8 +#include <asm/pgalloc.h>
    15.9  
   15.10  #define BUG_TRAP ASSERT
   15.11  #define notifier_call_chain(_a,_b,_c) ((void)0)
   15.12 @@ -38,6 +40,12 @@
   15.13  #define rtnl_unlock() ((void)0)
   15.14  #define dst_init() ((void)0)
   15.15  
   15.16 +// Ring defines:
   15.17 +#define TX_RING_INC(_i)    (((_i)+1) & (TX_RING_SIZE-1))
   15.18 +#define RX_RING_INC(_i)    (((_i)+1) & (RX_RING_SIZE-1))
   15.19 +#define TX_RING_ADD(_i,_j) (((_i)+(_j)) & (TX_RING_SIZE-1))
   15.20 +#define RX_RING_ADD(_i,_j) (((_i)+(_j)) & (RX_RING_SIZE-1))
   15.21 +
   15.22  struct net_device *the_dev = NULL;
   15.23  
   15.24  /*
   15.25 @@ -47,11 +55,11 @@ struct net_device *the_dev = NULL;
   15.26  struct softnet_data softnet_data[NR_CPUS] __cacheline_aligned;
   15.27  
   15.28  
   15.29 -/*****************************************************************************************
   15.30 +/*********************************************************************************
   15.31  
   15.32  			    Device Interface Subroutines
   15.33  
   15.34 -******************************************************************************************/
   15.35 +**********************************************************************************/
   15.36  
   15.37  /**
   15.38   *	__dev_get_by_name	- find a device by its name 
   15.39 @@ -661,7 +669,83 @@ static void get_sample_stats(int cpu)
   15.40  	softnet_data[cpu].avg_blog = avg_blog;
   15.41  }
   15.42  
   15.43 +void deliver_packet(struct sk_buff *skb, net_vif_t *vif)
   15.44 +{
   15.45 +        net_shadow_ring_t *shadow_ring;
   15.46 +        rx_shadow_entry_t *rx;
   15.47 +        unsigned long *g_pte, tmp;
   15.48 +        struct pfn_info *g_pfn, *h_pfn;
   15.49 +        unsigned int i; //, nvif;
   15.50  
   15.51 +        if (skb->skb_type != SKB_ZERO_COPY) 
   15.52 +            return;
   15.53 +        
   15.54 +        /*
   15.55 +         * Write the virtual MAC address into the destination field
   15.56 +         * of the ethernet packet. Furthermore, do the same for ARP
   15.57 +         * reply packets. This is easy because the virtual MAC address
   15.58 +         * is always 00-[nn]-00-00-00-00, where the second sixteen bits 
   15.59 +         * of the MAC are the vif's id.  This is to differentiate between
   15.60 +         * vifs on guests that have more than one.
   15.61 +         *
   15.62 +         * In zero copy, the data pointers for the packet have to have been 
   15.63 +         * mapped in by the caller.
   15.64 +         */
   15.65 +
   15.66 +        memset(skb->mac.ethernet->h_dest, 0, ETH_ALEN);
   15.67 +//        *(unsigned int *)(skb->mac.ethernet->h_dest + 1) = nvif;
   15.68 +        if ( ntohs(skb->mac.ethernet->h_proto) == ETH_P_ARP )
   15.69 +        {
   15.70 +            memset(skb->nh.raw + 18, 0, ETH_ALEN);
   15.71 +//            *(unsigned int *)(skb->nh.raw + 18 + 1) = nvif;
   15.72 +        }
   15.73 +        shadow_ring = vif->shadow_ring;
   15.74 +
   15.75 +        //Advance to next good buffer.
   15.76 +        for (i = shadow_ring->rx_cons; 
   15.77 +             (i != shadow_ring->rx_prod) 
   15.78 +             && ( shadow_ring->rx_ring[i].status != RING_STATUS_OK );
   15.79 +             i = RX_RING_INC(i));
   15.80 +            
   15.81 +        if (( i != shadow_ring->rx_prod ) &&
   15.82 +            ( shadow_ring->rx_ring[i].status == RING_STATUS_OK ))
   15.83 +        {
   15.84 +            rx = shadow_ring->rx_ring+i;
   15.85 +            if ( (skb->len + ETH_HLEN) < rx->size )
   15.86 +                rx->size = skb->len + ETH_HLEN;
   15.87 +                        
   15.88 +            if (rx->flush_count == tlb_flush_count[smp_processor_id()])
   15.89 +                flush_tlb_all();
   15.90 +            
   15.91 +            g_pte = map_domain_mem(rx->addr);
   15.92 +
   15.93 +            g_pfn =  frame_table + (*g_pte >> PAGE_SHIFT);
   15.94 +            h_pfn = skb->pf;
   15.95 +
   15.96 +            //flip and/or set relevant pf_info fields.
   15.97 +            tmp = g_pfn->next; g_pfn->next = h_pfn->next; h_pfn->next = tmp;
   15.98 +            tmp = g_pfn->prev; g_pfn->prev = h_pfn->prev; h_pfn->prev = tmp;
   15.99 +            tmp = g_pfn->flags; g_pfn->flags = h_pfn->flags; h_pfn->flags = tmp;
  15.100 +            h_pfn->tot_count = 1;
  15.101 +            h_pfn->type_count = g_pfn->type_count;
  15.102 +            g_pfn->tot_count = g_pfn->type_count = 0;
  15.103 +            h_pfn->flags = current->domain | PGT_l1_page_table;
  15.104 +            g_pfn->flags = PGT_l1_page_table;
  15.105 +            //point guest pte at the new page:
  15.106 +            *g_pte = (*g_pte & ~PAGE_MASK) 
  15.107 +                | (((h_pfn - frame_table) << PAGE_SHIFT) & PAGE_MASK);
  15.108 +            *g_pte |= _PAGE_PRESENT;
  15.109 +                
  15.110 +            unmap_domain_mem(g_pte);
  15.111 +            skb->pf = g_pfn; // return the guest pfn to be put on the free list
  15.112 +                
  15.113 +            shadow_ring->rx_cons = RX_RING_INC(i);
  15.114 +        }
  15.115 +}
  15.116 +
  15.117 +/* Deliver skb to an old protocol, which is not threaded well
  15.118 +   or which do not understand shared skbs.
  15.119 + */
  15.120  /**
  15.121   *	netif_rx	-	post buffer to the network code
  15.122   *	@skb: buffer to post
  15.123 @@ -686,21 +770,38 @@ int netif_rx(struct sk_buff *skb)
  15.124  #ifdef CONFIG_SMP
  15.125          unsigned long cpu_mask;
  15.126  #endif
  15.127 +        
  15.128          struct task_struct *p;
  15.129  	int this_cpu = smp_processor_id();
  15.130  	struct softnet_data *queue;
  15.131  	unsigned long flags;
  15.132          net_vif_t *vif;
  15.133  
  15.134 +	local_irq_save(flags);
  15.135 +        
  15.136  	if (skb->stamp.tv_sec == 0)
  15.137  		get_fast_time(&skb->stamp);
  15.138  
  15.139 +        /* Attempt to handle zero-copy packets here: */
  15.140 +        if (skb->skb_type == SKB_ZERO_COPY)
  15.141 +        {
  15.142 +                skb->head = (u8 *)map_domain_mem(((skb->pf - frame_table) << PAGE_SHIFT));
  15.143 +
  15.144 +                /* remapping this address really screws up all the skb pointers.  We need 
  15.145 +                 * to map them all here sufficiently to get the packet demultiplexed.
  15.146 +                 */
  15.147 +                
  15.148 +                skb->data = skb->head;
  15.149 +                skb_reserve(skb,16); // need to ensure that all the drivers and not just tulip do this.
  15.150 +                skb->mac.raw = skb->data;
  15.151 +                skb->data += ETH_HLEN;
  15.152 +                skb->nh.raw = skb->data;
  15.153 +        }
  15.154 +        
  15.155  	/* The code is rearranged so that the path is the most
  15.156  	   short when CPU is congested, but is still operating.
  15.157  	 */
  15.158  	queue = &softnet_data[this_cpu];
  15.159 -
  15.160 -	local_irq_save(flags);
  15.161          
  15.162  	netdev_rx_stat[this_cpu].total++;
  15.163  
  15.164 @@ -733,7 +834,7 @@ int netif_rx(struct sk_buff *skb)
  15.165              do {
  15.166                  if ( p->domain != vif->domain ) continue;
  15.167                  if ( vif->skb_list.qlen > 100 ) break;
  15.168 -                skb_queue_tail(&vif->skb_list, skb);
  15.169 +                deliver_packet(skb, vif);
  15.170                  cpu_mask = mark_hyp_event(p, _HYP_EVENT_NET_RX);
  15.171                  read_unlock(&tasklist_lock);
  15.172                  goto found;
  15.173 @@ -745,20 +846,24 @@ int netif_rx(struct sk_buff *skb)
  15.174  
  15.175  drop:
  15.176  	netdev_rx_stat[this_cpu].dropped++;
  15.177 -	local_irq_restore(flags);
  15.178 -
  15.179 +        if (skb->skb_type == SKB_ZERO_COPY)
  15.180 +                unmap_domain_mem(skb->head);
  15.181  	kfree_skb(skb);
  15.182 +        local_irq_restore(flags);
  15.183  	return NET_RX_DROP;
  15.184  
  15.185  found:
  15.186 +        if (skb->skb_type == SKB_ZERO_COPY) {
  15.187 +                unmap_domain_mem(skb->head);
  15.188 +                skb->head = skb->data = skb->tail = (void *)0xdeadbeef;
  15.189 +        }
  15.190 +        kfree_skb(skb);
  15.191          hyp_event_notify(cpu_mask);
  15.192          local_irq_restore(flags);
  15.193          return 0;
  15.194  }
  15.195  
  15.196 -/* Deliver skb to an old protocol, which is not threaded well
  15.197 -   or which do not understand shared skbs.
  15.198 - */
  15.199 +
  15.200  static int deliver_to_old_ones(struct packet_type *pt, struct sk_buff *skb, int last)
  15.201  {
  15.202  	static spinlock_t net_bh_lock = SPIN_LOCK_UNLOCKED;
  15.203 @@ -873,15 +978,46 @@ static inline void handle_diverter(struc
  15.204  }
  15.205  #endif   /* CONFIG_NET_DIVERT */
  15.206  
  15.207 +void update_shared_ring(void)
  15.208 +{
  15.209 +    rx_shadow_entry_t *rx;
  15.210 +    shared_info_t *s = current->shared_info;
  15.211 +    net_ring_t *net_ring;
  15.212 +    net_shadow_ring_t *shadow_ring;
  15.213 +    unsigned int nvif;
  15.214  
  15.215 +    clear_bit(_HYP_EVENT_NET_RX, &current->hyp_events);
  15.216 +    for (nvif = 0; nvif < current->num_net_vifs; nvif++)
  15.217 +    {
  15.218 +        net_ring = current->net_vif_list[nvif]->net_ring;
  15.219 +        shadow_ring = current->net_vif_list[nvif]->shadow_ring;
  15.220 +        while ((shadow_ring->rx_idx != shadow_ring->rx_cons) 
  15.221 +                && (net_ring->rx_cons != net_ring->rx_prod))
  15.222 +        {
  15.223 +            rx = shadow_ring->rx_ring+shadow_ring->rx_idx;
  15.224 +            copy_to_user(net_ring->rx_ring + net_ring->rx_cons, rx, sizeof(rx_entry_t));
  15.225 +
  15.226 +            shadow_ring->rx_idx = RX_RING_INC(shadow_ring->rx_idx);
  15.227 +            net_ring->rx_cons   = RX_RING_INC(net_ring->rx_cons);
  15.228 +
  15.229 +            if ( net_ring->rx_cons == net_ring->rx_event )
  15.230 +                set_bit(_EVENT_NET_RX_FOR_VIF(nvif), &s->events);
  15.231 +            
  15.232 +        }
  15.233 +    }
  15.234 +}
  15.235 +            
  15.236  void flush_rx_queue(void)
  15.237  {
  15.238      struct sk_buff *skb;
  15.239      shared_info_t *s = current->shared_info;
  15.240      net_ring_t *net_ring;
  15.241 +    net_shadow_ring_t *shadow_ring;
  15.242      unsigned int i, nvif;
  15.243 -    rx_entry_t rx;
  15.244 -
  15.245 +    rx_shadow_entry_t *rx;
  15.246 +    unsigned long *g_pte, tmp;
  15.247 +    struct pfn_info *g_pfn, *h_pfn;
  15.248 +    
  15.249      /* I have changed this to batch flush all vifs for a guest
  15.250       * at once, whenever this is called.  Since the guest is about to be
  15.251       * scheduled and issued an RX interrupt for one nic, it might as well
  15.252 @@ -893,15 +1029,17 @@ void flush_rx_queue(void)
  15.253       * loop can be replaced with a translation to the specific NET 
  15.254       * interrupt to serve. --akw
  15.255       */
  15.256 -    
  15.257      clear_bit(_HYP_EVENT_NET_RX, &current->hyp_events);
  15.258  
  15.259      for (nvif = 0; nvif < current->num_net_vifs; nvif++)
  15.260      {
  15.261          net_ring = current->net_vif_list[nvif]->net_ring;
  15.262 +        shadow_ring = current->net_vif_list[nvif]->shadow_ring;
  15.263          while ( (skb = skb_dequeue(&current->net_vif_list[nvif]->skb_list)) 
  15.264                          != NULL )
  15.265          {
  15.266 +            //temporary hack to stop processing non-zc skbs.
  15.267 +            if (skb->skb_type == SKB_NORMAL) continue;
  15.268              /*
  15.269               * Write the virtual MAC address into the destination field
  15.270               * of the ethernet packet. Furthermore, do the same for ARP
  15.271 @@ -912,6 +1050,16 @@ void flush_rx_queue(void)
  15.272               * second sixteen bits, which are the per-host vif id.
  15.273               * (so eth0 should be 00-00-..., eth1 is 00-01-...)
  15.274               */
  15.275 +            
  15.276 +            if (skb->skb_type == SKB_ZERO_COPY)
  15.277 +            {
  15.278 +                skb->head = (u8 *)map_domain_mem(((skb->pf - frame_table) << PAGE_SHIFT));
  15.279 +                skb->data = skb->head;
  15.280 +                skb_reserve(skb,16); 
  15.281 +                skb->mac.raw = skb->data;
  15.282 +                skb->data += ETH_HLEN;
  15.283 +            }
  15.284 +            
  15.285              memset(skb->mac.ethernet->h_dest, 0, ETH_ALEN);
  15.286              *(unsigned int *)(skb->mac.ethernet->h_dest + 1) = nvif;
  15.287              if ( ntohs(skb->mac.ethernet->h_proto) == ETH_P_ARP )
  15.288 @@ -920,15 +1068,84 @@ void flush_rx_queue(void)
  15.289                  *(unsigned int *)(skb->nh.raw + 18 + 1) = nvif;
  15.290              }
  15.291  
  15.292 +            if (skb->skb_type == SKB_ZERO_COPY)
  15.293 +            {
  15.294 +                unmap_domain_mem(skb->head);
  15.295 +            }
  15.296 +
  15.297              i = net_ring->rx_cons;
  15.298              if ( i != net_ring->rx_prod )
  15.299              {
  15.300 -                if ( !copy_from_user(&rx, net_ring->rx_ring+i, sizeof(rx)) )
  15.301 +                net_ring->rx_ring[i].status = shadow_ring->rx_ring[i].status;
  15.302 +                if ( shadow_ring->rx_ring[i].status == RING_STATUS_OK)
  15.303                  {
  15.304 -                    if ( (skb->len + ETH_HLEN) < rx.size )
  15.305 -                        rx.size = skb->len + ETH_HLEN;
  15.306 -                    copy_to_user((void *)rx.addr, skb->mac.raw, rx.size);
  15.307 -                    copy_to_user(net_ring->rx_ring+i, &rx, sizeof(rx));
  15.308 +                    rx = shadow_ring->rx_ring+i;
  15.309 +                    if ( (skb->len + ETH_HLEN) < rx->size )
  15.310 +                        rx->size = skb->len + ETH_HLEN;
  15.311 +
  15.312 +                    /* remap the packet again.  This is very temporary and will shortly be
  15.313 +                     * replaced with a page swizzle.
  15.314 +                     */
  15.315 +
  15.316 +                    /*if (skb->skb_type == SKB_ZERO_COPY)
  15.317 +                    {
  15.318 +                        skb->head = (u8 *)map_domain_mem(((skb->pf - frame_table) << PAGE_SHIFT));
  15.319 +                        skb->data = skb->head;
  15.320 +                        skb_reserve(skb,16); 
  15.321 +                        skb->mac.raw = skb->data;
  15.322 +                        skb->data += ETH_HLEN;
  15.323 +                    }
  15.324 +                                                                        
  15.325 +                    copy_to_user((void *)rx->addr, skb->mac.raw, rx->size);
  15.326 +                    copy_to_user(net_ring->rx_ring+i, rx, sizeof(rx));
  15.327 +                    
  15.328 +                    if (skb->skb_type == SKB_ZERO_COPY)
  15.329 +                    {
  15.330 +                        unmap_domain_mem(skb->head);
  15.331 +                        skb->head = skb->data = skb->tail = (void *)0xdeadbeef;
  15.332 +                    }*/
  15.333 +
  15.334 +                    //presumably I don't need to rewalk the guest page table
  15.335 +                    //here.
  15.336 +                    if (skb->skb_type == SKB_ZERO_COPY) 
  15.337 +                    {
  15.338 +                        // g_pfn is the frame FROM the guest being given up
  15.339 +                        // h_pfn is the frame FROM the hypervisor, passing up.
  15.340 +                        
  15.341 +                        if (rx->flush_count == tlb_flush_count[smp_processor_id()])
  15.342 +                        {
  15.343 +                            flush_tlb_all();
  15.344 +                        }
  15.345 +                        
  15.346 +                        g_pte = map_domain_mem(rx->addr);
  15.347 +                        
  15.348 +                        //g_pfn = frame_table + (rx->addr >> PAGE_SHIFT);
  15.349 +                        g_pfn =  frame_table + (*g_pte >> PAGE_SHIFT);
  15.350 +                        h_pfn = skb->pf;
  15.351 +
  15.352 +
  15.353 +                        tmp = g_pfn->next; g_pfn->next = h_pfn->next; h_pfn->next = tmp;
  15.354 +                        tmp = g_pfn->prev; g_pfn->prev = h_pfn->prev; h_pfn->prev = tmp;
  15.355 +                        tmp = g_pfn->flags; g_pfn->flags = h_pfn->flags; h_pfn->flags = tmp;
  15.356 +                        
  15.357 +                        h_pfn->tot_count = 1;
  15.358 +                        h_pfn->type_count = g_pfn->type_count;
  15.359 +                        g_pfn->tot_count = g_pfn->type_count = 0;
  15.360 +                        
  15.361 +                        h_pfn->flags = current->domain | PGT_l1_page_table;
  15.362 +                        g_pfn->flags = PGT_l1_page_table;
  15.363 +
  15.364 +
  15.365 +                        *g_pte = (*g_pte & ~PAGE_MASK) | (((h_pfn - frame_table) << PAGE_SHIFT) & PAGE_MASK);
  15.366 +
  15.367 +                        *g_pte |= _PAGE_PRESENT;
  15.368 +                        unmap_domain_mem(g_pte);
  15.369 +
  15.370 +                        skb->pf = g_pfn; // return the guest pfn to be put on the free list
  15.371 +                    } else {
  15.372 +                        BUG(); //got a non-zero copy skb.  which is not good.
  15.373 +                    }
  15.374 +                    
  15.375                  }
  15.376                  net_ring->rx_cons = (i+1) & (RX_RING_SIZE-1);
  15.377                  if ( net_ring->rx_cons == net_ring->rx_event )
  15.378 @@ -1916,23 +2133,30 @@ int __init net_dev_init(void)
  15.379   * Called from guest OS to notify updates to its transmit and/or receive
  15.380   * descriptor rings.
  15.381   */
  15.382 -#define TX_RING_INC(_i)    (((_i)+1) & (TX_RING_SIZE-1))
  15.383 -#define RX_RING_INC(_i)    (((_i)+1) & (RX_RING_SIZE-1))
  15.384 -#define TX_RING_ADD(_i,_j) (((_i)+(_j)) & (TX_RING_SIZE-1))
  15.385 -#define RX_RING_ADD(_i,_j) (((_i)+(_j)) & (RX_RING_SIZE-1))
  15.386 +
  15.387  long do_net_update(void)
  15.388  {
  15.389      shared_info_t *shared = current->shared_info;    
  15.390 -    net_ring_t *net_ring = current->net_ring_base;
  15.391 +    net_ring_t *net_ring;
  15.392 +    net_shadow_ring_t *shadow_ring;
  15.393      net_vif_t *current_vif;
  15.394      unsigned int i, j;
  15.395      struct sk_buff *skb;
  15.396      tx_entry_t tx;
  15.397 -
  15.398 +    rx_shadow_entry_t *rx;
  15.399 +    unsigned long pfn;
  15.400 +    struct pfn_info *page;
  15.401 +    unsigned long *g_pte;
  15.402 +    
  15.403 +    
  15.404      for ( j = 0; j < current->num_net_vifs; j++)
  15.405      {
  15.406          current_vif = current->net_vif_list[j];
  15.407          net_ring = current_vif->net_ring;
  15.408 +
  15.409 +        /* First, we send out pending TX descriptors if they exist on this ring.
  15.410 +         */
  15.411 +        
  15.412          for ( i = net_ring->tx_cons; i != net_ring->tx_prod; i = TX_RING_INC(i) )
  15.413          {
  15.414              if ( copy_from_user(&tx, net_ring->tx_ring+i, sizeof(tx)) )
  15.415 @@ -1982,6 +2206,7 @@ long do_net_update(void)
  15.416                  net_get_target_vif(skb);
  15.417                  if ( skb->dst_vif > VIF_PHYSICAL_INTERFACE )
  15.418                  {
  15.419 +printk("LOCAL DELIVERY!\n");
  15.420                      (void)netif_rx(skb);
  15.421                  }
  15.422                  else if ( skb->dst_vif == VIF_PHYSICAL_INTERFACE )
  15.423 @@ -1997,8 +2222,50 @@ long do_net_update(void)
  15.424              }
  15.425          }
  15.426          net_ring->tx_cons = i;
  15.427 +
  15.428 +        /* Next, pull any new RX descriptors across to the shadow ring.
  15.429 +         */
  15.430 +    
  15.431 +        shadow_ring = current_vif->shadow_ring;
  15.432 +
  15.433 +        for (i = shadow_ring->rx_prod; i != net_ring->rx_prod; i = RX_RING_INC(i))
  15.434 +        {
  15.435 +            /* This copy assumes that rx_shadow_entry_t is an extension of 
  15.436 +             * rx_net_entry_t extra fields must be tacked on to the end.
  15.437 +             */
  15.438 +            if ( copy_from_user( shadow_ring->rx_ring+i, net_ring->rx_ring+i, 
  15.439 +                                 sizeof (rx_entry_t) ) )
  15.440 +            {
  15.441 +                shadow_ring->rx_ring[i].status = RING_STATUS_ERR_CFU;
  15.442 +                continue;
  15.443 +            } else {
  15.444 +                    
  15.445 +                rx = shadow_ring->rx_ring + i;
  15.446 +                pfn = rx->addr >> PAGE_SHIFT;
  15.447 +                page = frame_table + pfn;
  15.448 +                
  15.449 +                shadow_ring->rx_ring[i].status = RING_STATUS_OK;
  15.450 +
  15.451 +               if  (!(page->flags & PGT_l1_page_table) 
  15.452 +                    || !((page->flags & PG_domain_mask) == current->domain))
  15.453 +                       shadow_ring->rx_ring[i].status = RING_STATUS_BAD_PAGE; 
  15.454 +
  15.455 +
  15.456 +                g_pte = map_domain_mem(rx->addr);
  15.457 +
  15.458 +                if (!(*g_pte & _PAGE_PRESENT))
  15.459 +                        shadow_ring->rx_ring[i].status = RING_STATUS_BAD_PAGE;
  15.460 +                page = (*g_pte >> PAGE_SHIFT) + frame_table;
  15.461 +                if (page->tot_count != 1) 
  15.462 +                        shadow_ring->rx_ring[i].status = RING_STATUS_BAD_PAGE;
  15.463 +                
  15.464 +                *g_pte &= ~_PAGE_PRESENT;
  15.465 +                rx->flush_count = tlb_flush_count[smp_processor_id()];
  15.466 +                unmap_domain_mem(g_pte);
  15.467 +            }
  15.468 +        }
  15.469 +        shadow_ring->rx_prod = net_ring->rx_prod;
  15.470      }
  15.471 -
  15.472      return 0;
  15.473  }
  15.474  
    16.1 --- a/xen-2.4.16/net/eth.c	Tue Feb 04 22:08:19 2003 +0000
    16.2 +++ b/xen-2.4.16/net/eth.c	Wed Feb 05 13:56:14 2003 +0000
    16.3 @@ -161,52 +161,62 @@ unsigned short eth_type_trans(struct sk_
    16.4  	struct ethhdr *eth;
    16.5  	unsigned char *rawp;
    16.6  	
    16.7 -	skb->mac.raw=skb->data;
    16.8 -	skb_pull(skb,dev->hard_header_len);
    16.9 -	eth= skb->mac.ethernet;
   16.10 +        if (skb->skb_type == SKB_ZERO_COPY)
   16.11 +        {
   16.12 +            skb_pull(skb,dev->hard_header_len);
   16.13 +            skb->mac.raw= (void *)0xdeadbeef;
   16.14 +            return htons(ETH_P_802_2);
   16.15 +            
   16.16 +        } else { // SKB_NORMAL
   16.17 +        
   16.18 +	    skb->mac.raw=skb->data;
   16.19 +	    skb_pull(skb,dev->hard_header_len);
   16.20 +	    eth= skb->mac.ethernet;
   16.21  	
   16.22 -	if(*eth->h_dest&1)
   16.23 -	{
   16.24 -		if(memcmp(eth->h_dest,dev->broadcast, ETH_ALEN)==0)
   16.25 +	    if(*eth->h_dest&1)
   16.26 +	    {
   16.27 +	    	if(memcmp(eth->h_dest,dev->broadcast, ETH_ALEN)==0)
   16.28  			skb->pkt_type=PACKET_BROADCAST;
   16.29  		else
   16.30  			skb->pkt_type=PACKET_MULTICAST;
   16.31 -	}
   16.32 +	    }
   16.33  	
   16.34 -	/*
   16.35 -	 *	This ALLMULTI check should be redundant by 1.4
   16.36 -	 *	so don't forget to remove it.
   16.37 -	 *
   16.38 -	 *	Seems, you forgot to remove it. All silly devices
   16.39 -	 *	seems to set IFF_PROMISC.
   16.40 -	 */
   16.41 +	    /*
   16.42 +	    *	This ALLMULTI check should be redundant by 1.4
   16.43 +	    *	so don't forget to remove it.
   16.44 +	    *
   16.45 +	    *	Seems, you forgot to remove it. All silly devices
   16.46 +	    *	seems to set IFF_PROMISC.
   16.47 +	    */
   16.48  	 
   16.49 -	else if(1 /*dev->flags&IFF_PROMISC*/)
   16.50 -	{
   16.51 +	    else if(1 /*dev->flags&IFF_PROMISC*/)
   16.52 +	    {
   16.53  		if(memcmp(eth->h_dest,dev->dev_addr, ETH_ALEN))
   16.54  			skb->pkt_type=PACKET_OTHERHOST;
   16.55 -	}
   16.56 +	    }
   16.57  	
   16.58 -	if (ntohs(eth->h_proto) >= 1536)
   16.59 +	    if (ntohs(eth->h_proto) >= 1536)
   16.60  		return eth->h_proto;
   16.61  		
   16.62 -	rawp = skb->data;
   16.63 +	    rawp = skb->data;
   16.64  	
   16.65 -	/*
   16.66 -	 *	This is a magic hack to spot IPX packets. Older Novell breaks
   16.67 -	 *	the protocol design and runs IPX over 802.3 without an 802.2 LLC
   16.68 -	 *	layer. We look for FFFF which isn't a used 802.2 SSAP/DSAP. This
   16.69 -	 *	won't work for fault tolerant netware but does for the rest.
   16.70 -	 */
   16.71 -	if (*(unsigned short *)rawp == 0xFFFF)
   16.72 +	    /*
   16.73 +	    *	This is a magic hack to spot IPX packets. Older Novell breaks
   16.74 +	    *	the protocol design and runs IPX over 802.3 without an 802.2 LLC
   16.75 +	    *	layer. We look for FFFF which isn't a used 802.2 SSAP/DSAP. This
   16.76 +	    *	won't work for fault tolerant netware but does for the rest.
   16.77 +	    */
   16.78 +	    if (*(unsigned short *)rawp == 0xFFFF)
   16.79  		return htons(ETH_P_802_3);
   16.80  		
   16.81 -	/*
   16.82 -	 *	Real 802.2 LLC
   16.83 -	 */
   16.84 -	return htons(ETH_P_802_2);
   16.85 +	    /*
   16.86 +	    *	Real 802.2 LLC
   16.87 +	    */
   16.88 +	    return htons(ETH_P_802_2);
   16.89 +        }
   16.90  }
   16.91  
   16.92 +
   16.93  int eth_header_parse(struct sk_buff *skb, unsigned char *haddr)
   16.94  {
   16.95  	struct ethhdr *eth = skb->mac.ethernet;
    17.1 --- a/xen-2.4.16/net/skbuff.c	Tue Feb 04 22:08:19 2003 +0000
    17.2 +++ b/xen-2.4.16/net/skbuff.c	Wed Feb 05 13:56:14 2003 +0000
    17.3 @@ -149,6 +149,104 @@ static __inline__ void skb_head_to_pool(
    17.4  	kmem_cache_free(skbuff_head_cache, skb);
    17.5  }
    17.6  
    17.7 +static inline u8 *alloc_skb_data_page(struct sk_buff *skb)
    17.8 +{
    17.9 +        struct list_head *list_ptr;
   17.10 +        struct pfn_info  *pf;
   17.11 +        unsigned long flags;
   17.12 +        
   17.13 +        spin_lock_irqsave(&free_list_lock, flags);
   17.14 +
   17.15 +        if (!free_pfns) return NULL;
   17.16 +
   17.17 +        list_ptr = free_list.next;
   17.18 +        pf = list_entry(list_ptr, struct pfn_info, list);
   17.19 +        pf->flags = 0; // owned by dom0
   17.20 +        list_del(&pf->list);
   17.21 +        pf->next = pf->prev = (pf - frame_table);
   17.22 +        free_pfns--;
   17.23 +
   17.24 +        spin_unlock_irqrestore(&free_list_lock, flags);
   17.25 +
   17.26 +        skb->pf = pf;
   17.27 +        return (u8 *)((pf - frame_table) << PAGE_SHIFT);
   17.28 +}
   17.29 +
   17.30 +static inline void dealloc_skb_data_page(struct sk_buff *skb)
   17.31 +{
   17.32 +        struct pfn_info  *pf;
   17.33 +        unsigned long flags;
   17.34 +
   17.35 +        pf = skb->pf;
   17.36 +
   17.37 +        spin_lock_irqsave(&free_list_lock, flags);
   17.38 +        
   17.39 +        list_add(&pf->list, &free_list);
   17.40 +        free_pfns++;
   17.41 +
   17.42 +        spin_unlock_irqrestore(&free_list_lock, flags);
   17.43 +}
   17.44 +
   17.45 +struct sk_buff *alloc_zc_skb(unsigned int size,int gfp_mask)
   17.46 +{
   17.47 +        struct sk_buff *skb;
   17.48 +        u8 *data;
   17.49 +
   17.50 +        if (in_interrupt() && (gfp_mask & __GFP_WAIT)) {
   17.51 +                static int count = 0;
   17.52 +                if (++count < 5) {
   17.53 +                        printk(KERN_ERR "alloc_skb called nonatomically "
   17.54 +                               "from interrupt %p\n", NET_CALLER(size));
   17.55 +                        BUG();
   17.56 +                }
   17.57 +                gfp_mask &= ~__GFP_WAIT;
   17.58 +        }
   17.59 +
   17.60 +        /* Get the HEAD */
   17.61 +        skb = skb_head_from_pool();
   17.62 +        if (skb == NULL) {
   17.63 +                skb = kmem_cache_alloc(skbuff_head_cache, gfp_mask & ~__GFP_DMA);
   17.64 +                if (skb == NULL)
   17.65 +                        goto nohead;
   17.66 +        }
   17.67 +
   17.68 +        /* Get the DATA. Size must match skb_add_mtu(). */
   17.69 +        size = SKB_DATA_ALIGN(size);
   17.70 +        data = alloc_skb_data_page(skb);
   17.71 +
   17.72 +        if (data == NULL)
   17.73 +                goto nodata;
   17.74 +
   17.75 +        /* XXX: does not include slab overhead */
   17.76 +        skb->truesize = size + sizeof(struct sk_buff);
   17.77 +
   17.78 +        /* Load the data pointers. */
   17.79 +        skb->head = data;
   17.80 +        skb->data = data;
   17.81 +        skb->tail = data;
   17.82 +        skb->end = data + size;
   17.83 +
   17.84 +        /* Set up other state */
   17.85 +        skb->len = 0;
   17.86 +        skb->cloned = 0;
   17.87 +        skb->data_len = 0;
   17.88 +        skb->src_vif = VIF_UNKNOWN_INTERFACE;
   17.89 +        skb->dst_vif = VIF_UNKNOWN_INTERFACE;
   17.90 +        skb->skb_type = SKB_ZERO_COPY;
   17.91 +
   17.92 +        atomic_set(&skb->users, 1);
   17.93 +        atomic_set(&(skb_shinfo(skb)->dataref), 1);
   17.94 +        skb_shinfo(skb)->nr_frags = 0;
   17.95 +        skb_shinfo(skb)->frag_list = NULL;
   17.96 +
   17.97 +        return skb;
   17.98 +
   17.99 +nodata:
  17.100 +        skb_head_to_pool(skb);
  17.101 +nohead:
  17.102 +        return NULL;
  17.103 +}
  17.104 +
  17.105  
  17.106  /* 	Allocate a new skbuff. We do this ourselves so we can fill in a few
  17.107   *	'private' fields and also do memory statistics to find all the
  17.108 @@ -213,6 +311,7 @@ struct sk_buff *alloc_skb(unsigned int s
  17.109  	skb->data_len = 0;
  17.110          skb->src_vif = VIF_UNKNOWN_INTERFACE;
  17.111          skb->dst_vif = VIF_UNKNOWN_INTERFACE;
  17.112 +        skb->skb_type = SKB_NORMAL;
  17.113  
  17.114  	atomic_set(&skb->users, 1); 
  17.115  	atomic_set(&(skb_shinfo(skb)->dataref), 1);
  17.116 @@ -284,6 +383,7 @@ static void skb_clone_fraglist(struct sk
  17.117  
  17.118  static void skb_release_data(struct sk_buff *skb)
  17.119  {
  17.120 +
  17.121  	if (!skb->cloned ||
  17.122  	    atomic_dec_and_test(&(skb_shinfo(skb)->dataref))) {
  17.123  		if (skb_shinfo(skb)->nr_frags) {
  17.124 @@ -295,7 +395,12 @@ static void skb_release_data(struct sk_b
  17.125  		if (skb_shinfo(skb)->frag_list)
  17.126  			skb_drop_fraglist(skb);
  17.127  
  17.128 -		kfree(skb->head);
  17.129 +                if (skb->skb_type == SKB_NORMAL) {
  17.130 +		    kfree(skb->head);
  17.131 +                } else if (skb->skb_type == SKB_ZERO_COPY) {                    dealloc_skb_data_page(skb);
  17.132 +                } else {
  17.133 +                    BUG(); //skb_release_data called with unknown skb type!
  17.134 +                }
  17.135  	}
  17.136  }
  17.137  
  17.138 @@ -333,6 +438,7 @@ void __kfree_skb(struct sk_buff *skb)
  17.139  		}
  17.140  		skb->destructor(skb);
  17.141  	}
  17.142 +
  17.143  #ifdef CONFIG_NETFILTER
  17.144  	nf_conntrack_put(skb->nfct);
  17.145  #endif
    18.1 --- a/xenolinux-2.4.16-sparse/arch/xeno/drivers/network/network.c	Tue Feb 04 22:08:19 2003 +0000
    18.2 +++ b/xenolinux-2.4.16-sparse/arch/xeno/drivers/network/network.c	Wed Feb 05 13:56:14 2003 +0000
    18.3 @@ -192,9 +192,9 @@ static void network_alloc_rx_buffers(str
    18.4          skb = dev_alloc_skb(RX_BUF_SIZE);
    18.5          if ( skb == NULL ) break;
    18.6          skb->dev = dev;
    18.7 -        skb_reserve(skb, 2); /* word align the IP header */
    18.8 +        //skb_reserve(skb, 2); /* word align the IP header */
    18.9          np->rx_skb_ring[i] = skb;
   18.10 -        np->net_ring->rx_ring[i].addr = (unsigned long)skb->data;
   18.11 +        np->net_ring->rx_ring[i].addr = (unsigned long)skb->net_page->ppte; //data;
   18.12          np->net_ring->rx_ring[i].size = RX_BUF_SIZE - 16; /* arbitrary */
   18.13      }
   18.14  
   18.15 @@ -276,10 +276,18 @@ static void network_rx_int(int irq, void
   18.16   again:
   18.17      for ( i = np->rx_idx; i != np->net_ring->rx_cons; i = RX_RING_INC(i) )
   18.18      {
   18.19 +        if (np->net_ring->rx_ring[i].status != RING_STATUS_OK)
   18.20 +        {
   18.21 +                printk("bad buffer on RX ring!(%d)\n", 
   18.22 +                                np->net_ring->rx_ring[i].status);
   18.23 +                continue;
   18.24 +        }
   18.25          skb = np->rx_skb_ring[i];
   18.26 +        
   18.27          skb_put(skb, np->net_ring->rx_ring[i].size);
   18.28          skb->protocol = eth_type_trans(skb, dev);
   18.29          np->stats.rx_packets++;
   18.30 +
   18.31          np->stats.rx_bytes += np->net_ring->rx_ring[i].size;
   18.32          netif_rx(skb);
   18.33          dev->last_rx = jiffies;
    19.1 --- a/xenolinux-2.4.16-sparse/include/asm-xeno/io.h	Tue Feb 04 22:08:19 2003 +0000
    19.2 +++ b/xenolinux-2.4.16-sparse/include/asm-xeno/io.h	Wed Feb 05 13:56:14 2003 +0000
    19.3 @@ -2,7 +2,7 @@
    19.4  #define _ASM_IO_H
    19.5  
    19.6  #include <linux/config.h>
    19.7 -
    19.8 +#include <asm/hypervisor.h>
    19.9  /*
   19.10   * This file contains the definitions for the x86 IO instructions
   19.11   * inb/inw/inl/outb/outw/outl and the "string versions" of the same
   19.12 @@ -74,6 +74,22 @@ static inline void * phys_to_virt(unsign
   19.13  }
   19.14  
   19.15  /*
   19.16 + * Change virtual addresses to machine addresses and vv.
   19.17 + * These are equally trivial.
   19.18 + */
   19.19 +
   19.20 +static inline unsigned long virt_to_mach(volatile void * address)
   19.21 +{
   19.22 +       return __pa(address) + (unsigned long) start_info.phys_base;
   19.23 +}
   19.24 +
   19.25 +static inline void *mach_to_virt(unsigned long address)
   19.26 +{
   19.27 +        return __va(address) - (unsigned long) start_info.phys_base;
   19.28 +}
   19.29 +
   19.30 +
   19.31 +/*
   19.32   * Change "struct page" to physical address.
   19.33   */
   19.34  #define page_to_phys(page)	((page - mem_map) << PAGE_SHIFT)
    20.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    20.2 +++ b/xenolinux-2.4.16-sparse/include/linux/skbuff.h	Wed Feb 05 13:56:14 2003 +0000
    20.3 @@ -0,0 +1,1185 @@
    20.4 +/*
    20.5 + *	Definitions for the 'struct sk_buff' memory handlers.
    20.6 + *
    20.7 + *	Authors:
    20.8 + *		Alan Cox, <gw4pts@gw4pts.ampr.org>
    20.9 + *		Florian La Roche, <rzsfl@rz.uni-sb.de>
   20.10 + *
   20.11 + *	This program is free software; you can redistribute it and/or
   20.12 + *	modify it under the terms of the GNU General Public License
   20.13 + *	as published by the Free Software Foundation; either version
   20.14 + *	2 of the License, or (at your option) any later version.
   20.15 + */
   20.16 + 
   20.17 +#ifndef _LINUX_SKBUFF_H
   20.18 +#define _LINUX_SKBUFF_H
   20.19 +
   20.20 +#include <linux/config.h>
   20.21 +#include <linux/kernel.h>
   20.22 +#include <linux/sched.h>
   20.23 +#include <linux/time.h>
   20.24 +#include <linux/cache.h>
   20.25 +
   20.26 +#include <asm/atomic.h>
   20.27 +#include <asm/types.h>
   20.28 +#include <linux/spinlock.h>
   20.29 +#include <linux/mm.h>
   20.30 +#include <linux/highmem.h>
   20.31 +
   20.32 +/* Zero Copy additions:
   20.33 + *
   20.34 + * (1) there are now two types of skb, as indicated by the skb_type field.
   20.35 + *     this is because, at least for the time being, there are two seperate types 
   20.36 + *     of memory that may be allocated to skb->data.
   20.37 + *
   20.38 + * (2) until discontiguous memory is fully supported, there will be a free list of pages
   20.39 + *     to be used by the net RX code.  This list will be allocated in the driver init code
   20.40 + *     but is declared here because the socket free code needs to return pages to it.
   20.41 + */
   20.42 +
   20.43 +// for skb->skb_type:
   20.44 +
   20.45 +#define SKB_NORMAL          0
   20.46 +#define SKB_ZERO_COPY       1
   20.47 +
   20.48 +#define NUM_NET_PAGES       9 // about 1Meg of buffers. (2^9)
   20.49 +struct net_page_info {
   20.50 +        struct list_head list;
   20.51 +        unsigned long   virt_addr;
   20.52 +        unsigned long   ppte;
   20.53 +};
   20.54 +
   20.55 +extern char *net_page_chunk;
   20.56 +extern struct net_page_info *net_page_table;
   20.57 +extern struct list_head net_page_list;
   20.58 +extern spinlock_t net_page_list_lock;
   20.59 +extern unsigned int net_pages;
   20.60 +
   20.61 +/* End zero copy additions */
   20.62 +
   20.63 +#define HAVE_ALLOC_SKB		/* For the drivers to know */
   20.64 +#define HAVE_ALIGNABLE_SKB	/* Ditto 8)		   */
   20.65 +#define SLAB_SKB 		/* Slabified skbuffs 	   */
   20.66 +
   20.67 +#define CHECKSUM_NONE 0
   20.68 +#define CHECKSUM_HW 1
   20.69 +#define CHECKSUM_UNNECESSARY 2
   20.70 +
   20.71 +#define SKB_DATA_ALIGN(X)	(((X) + (SMP_CACHE_BYTES-1)) & ~(SMP_CACHE_BYTES-1))
   20.72 +#define SKB_MAX_ORDER(X,ORDER)	(((PAGE_SIZE<<(ORDER)) - (X) - sizeof(struct skb_shared_info))&~(SMP_CACHE_BYTES-1))
   20.73 +#define SKB_MAX_HEAD(X)		(SKB_MAX_ORDER((X),0))
   20.74 +#define SKB_MAX_ALLOC		(SKB_MAX_ORDER(0,2))
   20.75 +
   20.76 +/* A. Checksumming of received packets by device.
   20.77 + *
   20.78 + *	NONE: device failed to checksum this packet.
   20.79 + *		skb->csum is undefined.
   20.80 + *
   20.81 + *	UNNECESSARY: device parsed packet and wouldbe verified checksum.
   20.82 + *		skb->csum is undefined.
   20.83 + *	      It is bad option, but, unfortunately, many of vendors do this.
   20.84 + *	      Apparently with secret goal to sell you new device, when you
   20.85 + *	      will add new protocol to your host. F.e. IPv6. 8)
   20.86 + *
   20.87 + *	HW: the most generic way. Device supplied checksum of _all_
   20.88 + *	    the packet as seen by netif_rx in skb->csum.
   20.89 + *	    NOTE: Even if device supports only some protocols, but
   20.90 + *	    is able to produce some skb->csum, it MUST use HW,
   20.91 + *	    not UNNECESSARY.
   20.92 + *
   20.93 + * B. Checksumming on output.
   20.94 + *
   20.95 + *	NONE: skb is checksummed by protocol or csum is not required.
   20.96 + *
   20.97 + *	HW: device is required to csum packet as seen by hard_start_xmit
   20.98 + *	from skb->h.raw to the end and to record the checksum
   20.99 + *	at skb->h.raw+skb->csum.
  20.100 + *
  20.101 + *	Device must show its capabilities in dev->features, set
  20.102 + *	at device setup time.
  20.103 + *	NETIF_F_HW_CSUM	- it is clever device, it is able to checksum
  20.104 + *			  everything.
  20.105 + *	NETIF_F_NO_CSUM - loopback or reliable single hop media.
  20.106 + *	NETIF_F_IP_CSUM - device is dumb. It is able to csum only
  20.107 + *			  TCP/UDP over IPv4. Sigh. Vendors like this
  20.108 + *			  way by an unknown reason. Though, see comment above
  20.109 + *			  about CHECKSUM_UNNECESSARY. 8)
  20.110 + *
  20.111 + *	Any questions? No questions, good. 		--ANK
  20.112 + */
  20.113 +
  20.114 +#ifdef __i386__
  20.115 +#define NET_CALLER(arg) (*(((void**)&arg)-1))
  20.116 +#else
  20.117 +#define NET_CALLER(arg) __builtin_return_address(0)
  20.118 +#endif
  20.119 +
  20.120 +#ifdef CONFIG_NETFILTER
  20.121 +struct nf_conntrack {
  20.122 +	atomic_t use;
  20.123 +	void (*destroy)(struct nf_conntrack *);
  20.124 +};
  20.125 +
  20.126 +struct nf_ct_info {
  20.127 +	struct nf_conntrack *master;
  20.128 +};
  20.129 +#endif
  20.130 +
  20.131 +struct sk_buff_head {
  20.132 +	/* These two members must be first. */
  20.133 +	struct sk_buff	* next;
  20.134 +	struct sk_buff	* prev;
  20.135 +
  20.136 +	__u32		qlen;
  20.137 +	spinlock_t	lock;
  20.138 +};
  20.139 +
  20.140 +struct sk_buff;
  20.141 +
  20.142 +#define MAX_SKB_FRAGS 6
  20.143 +
  20.144 +typedef struct skb_frag_struct skb_frag_t;
  20.145 +
  20.146 +struct skb_frag_struct
  20.147 +{
  20.148 +	struct page *page;
  20.149 +	__u16 page_offset;
  20.150 +	__u16 size;
  20.151 +};
  20.152 +
  20.153 +/* This data is invariant across clones and lives at
  20.154 + * the end of the header data, ie. at skb->end.
  20.155 + */
  20.156 +struct skb_shared_info {
  20.157 +	atomic_t	dataref;
  20.158 +	unsigned int	nr_frags;
  20.159 +	struct sk_buff	*frag_list;
  20.160 +	skb_frag_t	frags[MAX_SKB_FRAGS];
  20.161 +};
  20.162 +
  20.163 +struct sk_buff {
  20.164 +	/* These two members must be first. */
  20.165 +	struct sk_buff	* next;			/* Next buffer in list 				*/
  20.166 +	struct sk_buff	* prev;			/* Previous buffer in list 			*/
  20.167 +
  20.168 +	struct sk_buff_head * list;		/* List we are on				*/
  20.169 +	struct sock	*sk;			/* Socket we are owned by 			*/
  20.170 +	struct timeval	stamp;			/* Time we arrived				*/
  20.171 +	struct net_device	*dev;		/* Device we arrived on/are leaving by		*/
  20.172 +
  20.173 +	/* Transport layer header */
  20.174 +	union
  20.175 +	{
  20.176 +		struct tcphdr	*th;
  20.177 +		struct udphdr	*uh;
  20.178 +		struct icmphdr	*icmph;
  20.179 +		struct igmphdr	*igmph;
  20.180 +		struct iphdr	*ipiph;
  20.181 +		struct spxhdr	*spxh;
  20.182 +		unsigned char	*raw;
  20.183 +	} h;
  20.184 +
  20.185 +	/* Network layer header */
  20.186 +	union
  20.187 +	{
  20.188 +		struct iphdr	*iph;
  20.189 +		struct ipv6hdr	*ipv6h;
  20.190 +		struct arphdr	*arph;
  20.191 +		struct ipxhdr	*ipxh;
  20.192 +		unsigned char	*raw;
  20.193 +	} nh;
  20.194 +  
  20.195 +	/* Link layer header */
  20.196 +	union 
  20.197 +	{	
  20.198 +	  	struct ethhdr	*ethernet;
  20.199 +	  	unsigned char 	*raw;
  20.200 +	} mac;
  20.201 +
  20.202 +	struct  dst_entry *dst;
  20.203 +
  20.204 +	/* 
  20.205 +	 * This is the control buffer. It is free to use for every
  20.206 +	 * layer. Please put your private variables there. If you
  20.207 +	 * want to keep them across layers you have to do a skb_clone()
  20.208 +	 * first. This is owned by whoever has the skb queued ATM.
  20.209 +	 */ 
  20.210 +	char		cb[48];	 
  20.211 +
  20.212 +	unsigned int 	len;			/* Length of actual data			*/
  20.213 + 	unsigned int 	data_len;
  20.214 +	unsigned int	csum;			/* Checksum 					*/
  20.215 +	unsigned char 	__unused,		/* Dead field, may be reused			*/
  20.216 +			cloned, 		/* head may be cloned (check refcnt to be sure). */
  20.217 +  			pkt_type,		/* Packet class					*/
  20.218 +  			ip_summed;		/* Driver fed us an IP checksum			*/
  20.219 +	__u32		priority;		/* Packet queueing priority			*/
  20.220 +	atomic_t	users;			/* User count - see datagram.c,tcp.c 		*/
  20.221 +	unsigned short	protocol;		/* Packet protocol from driver. 		*/
  20.222 +	unsigned short	security;		/* Security level of packet			*/
  20.223 +	unsigned int	truesize;		/* Buffer size 					*/
  20.224 +
  20.225 +	unsigned char	*head;			/* Head of buffer 				*/
  20.226 +	unsigned char	*data;			/* Data head pointer				*/
  20.227 +	unsigned char	*tail;			/* Tail pointer					*/
  20.228 +	unsigned char 	*end;			/* End pointer					*/
  20.229 +
  20.230 +	void 		(*destructor)(struct sk_buff *);	/* Destruct function		*/
  20.231 +#ifdef CONFIG_NETFILTER
  20.232 +	/* Can be used for communication between hooks. */
  20.233 +        unsigned long	nfmark;
  20.234 +	/* Cache info */
  20.235 +	__u32		nfcache;
  20.236 +	/* Associated connection, if any */
  20.237 +	struct nf_ct_info *nfct;
  20.238 +#ifdef CONFIG_NETFILTER_DEBUG
  20.239 +        unsigned int nf_debug;
  20.240 +#endif
  20.241 +#endif /*CONFIG_NETFILTER*/
  20.242 +
  20.243 +#if defined(CONFIG_HIPPI)
  20.244 +	union{
  20.245 +		__u32	ifield;
  20.246 +	} private;
  20.247 +#endif
  20.248 +
  20.249 +#ifdef CONFIG_NET_SCHED
  20.250 +       __u32           tc_index;                /* traffic control index */
  20.251 +#endif
  20.252 +       unsigned int     skb_type;                /* for zero copy handling.                      */
  20.253 +       struct net_page_info *net_page;
  20.254 +};
  20.255 +
  20.256 +#define SK_WMEM_MAX	65535
  20.257 +#define SK_RMEM_MAX	65535
  20.258 +
  20.259 +#ifdef __KERNEL__
  20.260 +/*
  20.261 + *	Handling routines are only of interest to the kernel
  20.262 + */
  20.263 +#include <linux/slab.h>
  20.264 +
  20.265 +#include <asm/system.h>
  20.266 +
  20.267 +extern void			__kfree_skb(struct sk_buff *skb);
  20.268 +extern struct sk_buff *		alloc_skb(unsigned int size, int priority);
  20.269 +extern struct sk_buff *         alloc_zc_skb(unsigned int size, int priority);
  20.270 +extern void			kfree_skbmem(struct sk_buff *skb);
  20.271 +extern struct sk_buff *		skb_clone(struct sk_buff *skb, int priority);
  20.272 +extern struct sk_buff *		skb_copy(const struct sk_buff *skb, int priority);
  20.273 +extern struct sk_buff *		pskb_copy(struct sk_buff *skb, int gfp_mask);
  20.274 +extern int			pskb_expand_head(struct sk_buff *skb, int nhead, int ntail, int gfp_mask);
  20.275 +extern struct sk_buff *		skb_realloc_headroom(struct sk_buff *skb, unsigned int headroom);
  20.276 +extern struct sk_buff *		skb_copy_expand(const struct sk_buff *skb, 
  20.277 +						int newheadroom,
  20.278 +						int newtailroom,
  20.279 +						int priority);
  20.280 +#define dev_kfree_skb(a)	kfree_skb(a)
  20.281 +extern void	skb_over_panic(struct sk_buff *skb, int len, void *here);
  20.282 +extern void	skb_under_panic(struct sk_buff *skb, int len, void *here);
  20.283 +
  20.284 +/* Internal */
  20.285 +#define skb_shinfo(SKB)		((struct skb_shared_info *)((SKB)->end))
  20.286 +
  20.287 +/**
  20.288 + *	skb_queue_empty - check if a queue is empty
  20.289 + *	@list: queue head
  20.290 + *
  20.291 + *	Returns true if the queue is empty, false otherwise.
  20.292 + */
  20.293 + 
  20.294 +static inline int skb_queue_empty(struct sk_buff_head *list)
  20.295 +{
  20.296 +	return (list->next == (struct sk_buff *) list);
  20.297 +}
  20.298 +
  20.299 +/**
  20.300 + *	skb_get - reference buffer
  20.301 + *	@skb: buffer to reference
  20.302 + *
  20.303 + *	Makes another reference to a socket buffer and returns a pointer
  20.304 + *	to the buffer.
  20.305 + */
  20.306 + 
  20.307 +static inline struct sk_buff *skb_get(struct sk_buff *skb)
  20.308 +{
  20.309 +	atomic_inc(&skb->users);
  20.310 +	return skb;
  20.311 +}
  20.312 +
  20.313 +/*
  20.314 + * If users==1, we are the only owner and are can avoid redundant
  20.315 + * atomic change.
  20.316 + */
  20.317 + 
  20.318 +/**
  20.319 + *	kfree_skb - free an sk_buff
  20.320 + *	@skb: buffer to free
  20.321 + *
  20.322 + *	Drop a reference to the buffer and free it if the usage count has
  20.323 + *	hit zero.
  20.324 + */
  20.325 + 
  20.326 +static inline void kfree_skb(struct sk_buff *skb)
  20.327 +{
  20.328 +	if (atomic_read(&skb->users) == 1 || atomic_dec_and_test(&skb->users))
  20.329 +		__kfree_skb(skb);
  20.330 +}
  20.331 +
  20.332 +/* Use this if you didn't touch the skb state [for fast switching] */
  20.333 +static inline void kfree_skb_fast(struct sk_buff *skb)
  20.334 +{
  20.335 +	if (atomic_read(&skb->users) == 1 || atomic_dec_and_test(&skb->users))
  20.336 +		kfree_skbmem(skb);	
  20.337 +}
  20.338 +
  20.339 +/**
  20.340 + *	skb_cloned - is the buffer a clone
  20.341 + *	@skb: buffer to check
  20.342 + *
  20.343 + *	Returns true if the buffer was generated with skb_clone() and is
  20.344 + *	one of multiple shared copies of the buffer. Cloned buffers are
  20.345 + *	shared data so must not be written to under normal circumstances.
  20.346 + */
  20.347 +
  20.348 +static inline int skb_cloned(struct sk_buff *skb)
  20.349 +{
  20.350 +	return skb->cloned && atomic_read(&skb_shinfo(skb)->dataref) != 1;
  20.351 +}
  20.352 +
  20.353 +/**
  20.354 + *	skb_shared - is the buffer shared
  20.355 + *	@skb: buffer to check
  20.356 + *
  20.357 + *	Returns true if more than one person has a reference to this
  20.358 + *	buffer.
  20.359 + */
  20.360 + 
  20.361 +static inline int skb_shared(struct sk_buff *skb)
  20.362 +{
  20.363 +	return (atomic_read(&skb->users) != 1);
  20.364 +}
  20.365 +
  20.366 +/** 
  20.367 + *	skb_share_check - check if buffer is shared and if so clone it
  20.368 + *	@skb: buffer to check
  20.369 + *	@pri: priority for memory allocation
  20.370 + *	
  20.371 + *	If the buffer is shared the buffer is cloned and the old copy
  20.372 + *	drops a reference. A new clone with a single reference is returned.
  20.373 + *	If the buffer is not shared the original buffer is returned. When
  20.374 + *	being called from interrupt status or with spinlocks held pri must
  20.375 + *	be GFP_ATOMIC.
  20.376 + *
  20.377 + *	NULL is returned on a memory allocation failure.
  20.378 + */
  20.379 + 
  20.380 +static inline struct sk_buff *skb_share_check(struct sk_buff *skb, int pri)
  20.381 +{
  20.382 +	if (skb_shared(skb)) {
  20.383 +		struct sk_buff *nskb;
  20.384 +		nskb = skb_clone(skb, pri);
  20.385 +		kfree_skb(skb);
  20.386 +		return nskb;
  20.387 +	}
  20.388 +	return skb;
  20.389 +}
  20.390 +
  20.391 +
  20.392 +/*
  20.393 + *	Copy shared buffers into a new sk_buff. We effectively do COW on
  20.394 + *	packets to handle cases where we have a local reader and forward
  20.395 + *	and a couple of other messy ones. The normal one is tcpdumping
  20.396 + *	a packet thats being forwarded.
  20.397 + */
  20.398 + 
  20.399 +/**
  20.400 + *	skb_unshare - make a copy of a shared buffer
  20.401 + *	@skb: buffer to check
  20.402 + *	@pri: priority for memory allocation
  20.403 + *
  20.404 + *	If the socket buffer is a clone then this function creates a new
  20.405 + *	copy of the data, drops a reference count on the old copy and returns
  20.406 + *	the new copy with the reference count at 1. If the buffer is not a clone
  20.407 + *	the original buffer is returned. When called with a spinlock held or
  20.408 + *	from interrupt state @pri must be %GFP_ATOMIC
  20.409 + *
  20.410 + *	%NULL is returned on a memory allocation failure.
  20.411 + */
  20.412 + 
  20.413 +static inline struct sk_buff *skb_unshare(struct sk_buff *skb, int pri)
  20.414 +{
  20.415 +	struct sk_buff *nskb;
  20.416 +	if(!skb_cloned(skb))
  20.417 +		return skb;
  20.418 +	nskb=skb_copy(skb, pri);
  20.419 +	kfree_skb(skb);		/* Free our shared copy */
  20.420 +	return nskb;
  20.421 +}
  20.422 +
  20.423 +/**
  20.424 + *	skb_peek
  20.425 + *	@list_: list to peek at
  20.426 + *
  20.427 + *	Peek an &sk_buff. Unlike most other operations you _MUST_
  20.428 + *	be careful with this one. A peek leaves the buffer on the
  20.429 + *	list and someone else may run off with it. You must hold
  20.430 + *	the appropriate locks or have a private queue to do this.
  20.431 + *
  20.432 + *	Returns %NULL for an empty list or a pointer to the head element.
  20.433 + *	The reference count is not incremented and the reference is therefore
  20.434 + *	volatile. Use with caution.
  20.435 + */
  20.436 + 
  20.437 +static inline struct sk_buff *skb_peek(struct sk_buff_head *list_)
  20.438 +{
  20.439 +	struct sk_buff *list = ((struct sk_buff *)list_)->next;
  20.440 +	if (list == (struct sk_buff *)list_)
  20.441 +		list = NULL;
  20.442 +	return list;
  20.443 +}
  20.444 +
  20.445 +/**
  20.446 + *	skb_peek_tail
  20.447 + *	@list_: list to peek at
  20.448 + *
  20.449 + *	Peek an &sk_buff. Unlike most other operations you _MUST_
  20.450 + *	be careful with this one. A peek leaves the buffer on the
  20.451 + *	list and someone else may run off with it. You must hold
  20.452 + *	the appropriate locks or have a private queue to do this.
  20.453 + *
  20.454 + *	Returns %NULL for an empty list or a pointer to the tail element.
  20.455 + *	The reference count is not incremented and the reference is therefore
  20.456 + *	volatile. Use with caution.
  20.457 + */
  20.458 +
  20.459 +static inline struct sk_buff *skb_peek_tail(struct sk_buff_head *list_)
  20.460 +{
  20.461 +	struct sk_buff *list = ((struct sk_buff *)list_)->prev;
  20.462 +	if (list == (struct sk_buff *)list_)
  20.463 +		list = NULL;
  20.464 +	return list;
  20.465 +}
  20.466 +
  20.467 +/**
  20.468 + *	skb_queue_len	- get queue length
  20.469 + *	@list_: list to measure
  20.470 + *
  20.471 + *	Return the length of an &sk_buff queue. 
  20.472 + */
  20.473 + 
  20.474 +static inline __u32 skb_queue_len(struct sk_buff_head *list_)
  20.475 +{
  20.476 +	return(list_->qlen);
  20.477 +}
  20.478 +
  20.479 +static inline void skb_queue_head_init(struct sk_buff_head *list)
  20.480 +{
  20.481 +	spin_lock_init(&list->lock);
  20.482 +	list->prev = (struct sk_buff *)list;
  20.483 +	list->next = (struct sk_buff *)list;
  20.484 +	list->qlen = 0;
  20.485 +}
  20.486 +
  20.487 +/*
  20.488 + *	Insert an sk_buff at the start of a list.
  20.489 + *
  20.490 + *	The "__skb_xxxx()" functions are the non-atomic ones that
  20.491 + *	can only be called with interrupts disabled.
  20.492 + */
  20.493 +
  20.494 +/**
  20.495 + *	__skb_queue_head - queue a buffer at the list head
  20.496 + *	@list: list to use
  20.497 + *	@newsk: buffer to queue
  20.498 + *
  20.499 + *	Queue a buffer at the start of a list. This function takes no locks
  20.500 + *	and you must therefore hold required locks before calling it.
  20.501 + *
  20.502 + *	A buffer cannot be placed on two lists at the same time.
  20.503 + */	
  20.504 + 
  20.505 +static inline void __skb_queue_head(struct sk_buff_head *list, struct sk_buff *newsk)
  20.506 +{
  20.507 +	struct sk_buff *prev, *next;
  20.508 +
  20.509 +	newsk->list = list;
  20.510 +	list->qlen++;
  20.511 +	prev = (struct sk_buff *)list;
  20.512 +	next = prev->next;
  20.513 +	newsk->next = next;
  20.514 +	newsk->prev = prev;
  20.515 +	next->prev = newsk;
  20.516 +	prev->next = newsk;
  20.517 +}
  20.518 +
  20.519 +
  20.520 +/**
  20.521 + *	skb_queue_head - queue a buffer at the list head
  20.522 + *	@list: list to use
  20.523 + *	@newsk: buffer to queue
  20.524 + *
  20.525 + *	Queue a buffer at the start of the list. This function takes the
  20.526 + *	list lock and can be used safely with other locking &sk_buff functions
  20.527 + *	safely.
  20.528 + *
  20.529 + *	A buffer cannot be placed on two lists at the same time.
  20.530 + */	
  20.531 +
  20.532 +static inline void skb_queue_head(struct sk_buff_head *list, struct sk_buff *newsk)
  20.533 +{
  20.534 +	unsigned long flags;
  20.535 +
  20.536 +	spin_lock_irqsave(&list->lock, flags);
  20.537 +	__skb_queue_head(list, newsk);
  20.538 +	spin_unlock_irqrestore(&list->lock, flags);
  20.539 +}
  20.540 +
  20.541 +/**
  20.542 + *	__skb_queue_tail - queue a buffer at the list tail
  20.543 + *	@list: list to use
  20.544 + *	@newsk: buffer to queue
  20.545 + *
  20.546 + *	Queue a buffer at the end of a list. This function takes no locks
  20.547 + *	and you must therefore hold required locks before calling it.
  20.548 + *
  20.549 + *	A buffer cannot be placed on two lists at the same time.
  20.550 + */	
  20.551 + 
  20.552 +
  20.553 +static inline void __skb_queue_tail(struct sk_buff_head *list, struct sk_buff *newsk)
  20.554 +{
  20.555 +	struct sk_buff *prev, *next;
  20.556 +
  20.557 +	newsk->list = list;
  20.558 +	list->qlen++;
  20.559 +	next = (struct sk_buff *)list;
  20.560 +	prev = next->prev;
  20.561 +	newsk->next = next;
  20.562 +	newsk->prev = prev;
  20.563 +	next->prev = newsk;
  20.564 +	prev->next = newsk;
  20.565 +}
  20.566 +
  20.567 +/**
  20.568 + *	skb_queue_tail - queue a buffer at the list tail
  20.569 + *	@list: list to use
  20.570 + *	@newsk: buffer to queue
  20.571 + *
  20.572 + *	Queue a buffer at the tail of the list. This function takes the
  20.573 + *	list lock and can be used safely with other locking &sk_buff functions
  20.574 + *	safely.
  20.575 + *
  20.576 + *	A buffer cannot be placed on two lists at the same time.
  20.577 + */	
  20.578 +
  20.579 +static inline void skb_queue_tail(struct sk_buff_head *list, struct sk_buff *newsk)
  20.580 +{
  20.581 +	unsigned long flags;
  20.582 +
  20.583 +	spin_lock_irqsave(&list->lock, flags);
  20.584 +	__skb_queue_tail(list, newsk);
  20.585 +	spin_unlock_irqrestore(&list->lock, flags);
  20.586 +}
  20.587 +
  20.588 +/**
  20.589 + *	__skb_dequeue - remove from the head of the queue
  20.590 + *	@list: list to dequeue from
  20.591 + *
  20.592 + *	Remove the head of the list. This function does not take any locks
  20.593 + *	so must be used with appropriate locks held only. The head item is
  20.594 + *	returned or %NULL if the list is empty.
  20.595 + */
  20.596 +
  20.597 +static inline struct sk_buff *__skb_dequeue(struct sk_buff_head *list)
  20.598 +{
  20.599 +	struct sk_buff *next, *prev, *result;
  20.600 +
  20.601 +	prev = (struct sk_buff *) list;
  20.602 +	next = prev->next;
  20.603 +	result = NULL;
  20.604 +	if (next != prev) {
  20.605 +		result = next;
  20.606 +		next = next->next;
  20.607 +		list->qlen--;
  20.608 +		next->prev = prev;
  20.609 +		prev->next = next;
  20.610 +		result->next = NULL;
  20.611 +		result->prev = NULL;
  20.612 +		result->list = NULL;
  20.613 +	}
  20.614 +	return result;
  20.615 +}
  20.616 +
  20.617 +/**
  20.618 + *	skb_dequeue - remove from the head of the queue
  20.619 + *	@list: list to dequeue from
  20.620 + *
  20.621 + *	Remove the head of the list. The list lock is taken so the function
  20.622 + *	may be used safely with other locking list functions. The head item is
  20.623 + *	returned or %NULL if the list is empty.
  20.624 + */
  20.625 +
  20.626 +static inline struct sk_buff *skb_dequeue(struct sk_buff_head *list)
  20.627 +{
  20.628 +	long flags;
  20.629 +	struct sk_buff *result;
  20.630 +
  20.631 +	spin_lock_irqsave(&list->lock, flags);
  20.632 +	result = __skb_dequeue(list);
  20.633 +	spin_unlock_irqrestore(&list->lock, flags);
  20.634 +	return result;
  20.635 +}
  20.636 +
  20.637 +/*
  20.638 + *	Insert a packet on a list.
  20.639 + */
  20.640 +
  20.641 +static inline void __skb_insert(struct sk_buff *newsk,
  20.642 +	struct sk_buff * prev, struct sk_buff *next,
  20.643 +	struct sk_buff_head * list)
  20.644 +{
  20.645 +	newsk->next = next;
  20.646 +	newsk->prev = prev;
  20.647 +	next->prev = newsk;
  20.648 +	prev->next = newsk;
  20.649 +	newsk->list = list;
  20.650 +	list->qlen++;
  20.651 +}
  20.652 +
  20.653 +/**
  20.654 + *	skb_insert	-	insert a buffer
  20.655 + *	@old: buffer to insert before
  20.656 + *	@newsk: buffer to insert
  20.657 + *
  20.658 + *	Place a packet before a given packet in a list. The list locks are taken
  20.659 + *	and this function is atomic with respect to other list locked calls
  20.660 + *	A buffer cannot be placed on two lists at the same time.
  20.661 + */
  20.662 +
  20.663 +static inline void skb_insert(struct sk_buff *old, struct sk_buff *newsk)
  20.664 +{
  20.665 +	unsigned long flags;
  20.666 +
  20.667 +	spin_lock_irqsave(&old->list->lock, flags);
  20.668 +	__skb_insert(newsk, old->prev, old, old->list);
  20.669 +	spin_unlock_irqrestore(&old->list->lock, flags);
  20.670 +}
  20.671 +
  20.672 +/*
  20.673 + *	Place a packet after a given packet in a list.
  20.674 + */
  20.675 +
  20.676 +static inline void __skb_append(struct sk_buff *old, struct sk_buff *newsk)
  20.677 +{
  20.678 +	__skb_insert(newsk, old, old->next, old->list);
  20.679 +}
  20.680 +
  20.681 +/**
  20.682 + *	skb_append	-	append a buffer
  20.683 + *	@old: buffer to insert after
  20.684 + *	@newsk: buffer to insert
  20.685 + *
  20.686 + *	Place a packet after a given packet in a list. The list locks are taken
  20.687 + *	and this function is atomic with respect to other list locked calls.
  20.688 + *	A buffer cannot be placed on two lists at the same time.
  20.689 + */
  20.690 +
  20.691 +
  20.692 +static inline void skb_append(struct sk_buff *old, struct sk_buff *newsk)
  20.693 +{
  20.694 +	unsigned long flags;
  20.695 +
  20.696 +	spin_lock_irqsave(&old->list->lock, flags);
  20.697 +	__skb_append(old, newsk);
  20.698 +	spin_unlock_irqrestore(&old->list->lock, flags);
  20.699 +}
  20.700 +
  20.701 +/*
  20.702 + * remove sk_buff from list. _Must_ be called atomically, and with
  20.703 + * the list known..
  20.704 + */
  20.705 + 
  20.706 +static inline void __skb_unlink(struct sk_buff *skb, struct sk_buff_head *list)
  20.707 +{
  20.708 +	struct sk_buff * next, * prev;
  20.709 +
  20.710 +	list->qlen--;
  20.711 +	next = skb->next;
  20.712 +	prev = skb->prev;
  20.713 +	skb->next = NULL;
  20.714 +	skb->prev = NULL;
  20.715 +	skb->list = NULL;
  20.716 +	next->prev = prev;
  20.717 +	prev->next = next;
  20.718 +}
  20.719 +
  20.720 +/**
  20.721 + *	skb_unlink	-	remove a buffer from a list
  20.722 + *	@skb: buffer to remove
  20.723 + *
  20.724 + *	Place a packet after a given packet in a list. The list locks are taken
  20.725 + *	and this function is atomic with respect to other list locked calls
  20.726 + *	
  20.727 + *	Works even without knowing the list it is sitting on, which can be 
  20.728 + *	handy at times. It also means that THE LIST MUST EXIST when you 
  20.729 + *	unlink. Thus a list must have its contents unlinked before it is
  20.730 + *	destroyed.
  20.731 + */
  20.732 +
  20.733 +static inline void skb_unlink(struct sk_buff *skb)
  20.734 +{
  20.735 +	struct sk_buff_head *list = skb->list;
  20.736 +
  20.737 +	if(list) {
  20.738 +		unsigned long flags;
  20.739 +
  20.740 +		spin_lock_irqsave(&list->lock, flags);
  20.741 +		if(skb->list == list)
  20.742 +			__skb_unlink(skb, skb->list);
  20.743 +		spin_unlock_irqrestore(&list->lock, flags);
  20.744 +	}
  20.745 +}
  20.746 +
  20.747 +/* XXX: more streamlined implementation */
  20.748 +
  20.749 +/**
  20.750 + *	__skb_dequeue_tail - remove from the tail of the queue
  20.751 + *	@list: list to dequeue from
  20.752 + *
  20.753 + *	Remove the tail of the list. This function does not take any locks
  20.754 + *	so must be used with appropriate locks held only. The tail item is
  20.755 + *	returned or %NULL if the list is empty.
  20.756 + */
  20.757 +
  20.758 +static inline struct sk_buff *__skb_dequeue_tail(struct sk_buff_head *list)
  20.759 +{
  20.760 +	struct sk_buff *skb = skb_peek_tail(list); 
  20.761 +	if (skb)
  20.762 +		__skb_unlink(skb, list);
  20.763 +	return skb;
  20.764 +}
  20.765 +
  20.766 +/**
  20.767 + *	skb_dequeue - remove from the head of the queue
  20.768 + *	@list: list to dequeue from
  20.769 + *
  20.770 + *	Remove the head of the list. The list lock is taken so the function
  20.771 + *	may be used safely with other locking list functions. The tail item is
  20.772 + *	returned or %NULL if the list is empty.
  20.773 + */
  20.774 +
  20.775 +static inline struct sk_buff *skb_dequeue_tail(struct sk_buff_head *list)
  20.776 +{
  20.777 +	long flags;
  20.778 +	struct sk_buff *result;
  20.779 +
  20.780 +	spin_lock_irqsave(&list->lock, flags);
  20.781 +	result = __skb_dequeue_tail(list);
  20.782 +	spin_unlock_irqrestore(&list->lock, flags);
  20.783 +	return result;
  20.784 +}
  20.785 +
  20.786 +static inline int skb_is_nonlinear(const struct sk_buff *skb)
  20.787 +{
  20.788 +	return skb->data_len;
  20.789 +}
  20.790 +
  20.791 +static inline int skb_headlen(const struct sk_buff *skb)
  20.792 +{
  20.793 +	return skb->len - skb->data_len;
  20.794 +}
  20.795 +
  20.796 +#define SKB_PAGE_ASSERT(skb) do { if (skb_shinfo(skb)->nr_frags) BUG(); } while (0)
  20.797 +#define SKB_FRAG_ASSERT(skb) do { if (skb_shinfo(skb)->frag_list) BUG(); } while (0)
  20.798 +#define SKB_LINEAR_ASSERT(skb) do { if (skb_is_nonlinear(skb)) BUG(); } while (0)
  20.799 +
  20.800 +/*
  20.801 + *	Add data to an sk_buff
  20.802 + */
  20.803 + 
  20.804 +static inline unsigned char *__skb_put(struct sk_buff *skb, unsigned int len)
  20.805 +{
  20.806 +	unsigned char *tmp=skb->tail;
  20.807 +	SKB_LINEAR_ASSERT(skb);
  20.808 +	skb->tail+=len;
  20.809 +	skb->len+=len;
  20.810 +	return tmp;
  20.811 +}
  20.812 +
  20.813 +/**
  20.814 + *	skb_put - add data to a buffer
  20.815 + *	@skb: buffer to use 
  20.816 + *	@len: amount of data to add
  20.817 + *
  20.818 + *	This function extends the used data area of the buffer. If this would
  20.819 + *	exceed the total buffer size the kernel will panic. A pointer to the
  20.820 + *	first byte of the extra data is returned.
  20.821 + */
  20.822 + 
  20.823 +static inline unsigned char *skb_put(struct sk_buff *skb, unsigned int len)
  20.824 +{
  20.825 +	unsigned char *tmp=skb->tail;
  20.826 +	SKB_LINEAR_ASSERT(skb);
  20.827 +	skb->tail+=len;
  20.828 +	skb->len+=len;
  20.829 +	if(skb->tail>skb->end) {
  20.830 +		skb_over_panic(skb, len, current_text_addr());
  20.831 +	}
  20.832 +	return tmp;
  20.833 +}
  20.834 +
  20.835 +static inline unsigned char *__skb_push(struct sk_buff *skb, unsigned int len)
  20.836 +{
  20.837 +	skb->data-=len;
  20.838 +	skb->len+=len;
  20.839 +	return skb->data;
  20.840 +}
  20.841 +
  20.842 +/**
  20.843 + *	skb_push - add data to the start of a buffer
  20.844 + *	@skb: buffer to use 
  20.845 + *	@len: amount of data to add
  20.846 + *
  20.847 + *	This function extends the used data area of the buffer at the buffer
  20.848 + *	start. If this would exceed the total buffer headroom the kernel will
  20.849 + *	panic. A pointer to the first byte of the extra data is returned.
  20.850 + */
  20.851 +
  20.852 +static inline unsigned char *skb_push(struct sk_buff *skb, unsigned int len)
  20.853 +{
  20.854 +	skb->data-=len;
  20.855 +	skb->len+=len;
  20.856 +	if(skb->data<skb->head) {
  20.857 +		skb_under_panic(skb, len, current_text_addr());
  20.858 +	}
  20.859 +	return skb->data;
  20.860 +}
  20.861 +
  20.862 +static inline char *__skb_pull(struct sk_buff *skb, unsigned int len)
  20.863 +{
  20.864 +	skb->len-=len;
  20.865 +	if (skb->len < skb->data_len)
  20.866 +		BUG();
  20.867 +	return 	skb->data+=len;
  20.868 +}
  20.869 +
  20.870 +/**
  20.871 + *	skb_pull - remove data from the start of a buffer
  20.872 + *	@skb: buffer to use 
  20.873 + *	@len: amount of data to remove
  20.874 + *
  20.875 + *	This function removes data from the start of a buffer, returning
  20.876 + *	the memory to the headroom. A pointer to the next data in the buffer
  20.877 + *	is returned. Once the data has been pulled future pushes will overwrite
  20.878 + *	the old data.
  20.879 + */
  20.880 +
  20.881 +static inline unsigned char * skb_pull(struct sk_buff *skb, unsigned int len)
  20.882 +{	
  20.883 +	if (len > skb->len)
  20.884 +		return NULL;
  20.885 +	return __skb_pull(skb,len);
  20.886 +}
  20.887 +
  20.888 +extern unsigned char * __pskb_pull_tail(struct sk_buff *skb, int delta);
  20.889 +
  20.890 +static inline char *__pskb_pull(struct sk_buff *skb, unsigned int len)
  20.891 +{
  20.892 +	if (len > skb_headlen(skb) &&
  20.893 +	    __pskb_pull_tail(skb, len-skb_headlen(skb)) == NULL)
  20.894 +		return NULL;
  20.895 +	skb->len -= len;
  20.896 +	return 	skb->data += len;
  20.897 +}
  20.898 +
  20.899 +static inline unsigned char * pskb_pull(struct sk_buff *skb, unsigned int len)
  20.900 +{	
  20.901 +	if (len > skb->len)
  20.902 +		return NULL;
  20.903 +	return __pskb_pull(skb,len);
  20.904 +}
  20.905 +
  20.906 +static inline int pskb_may_pull(struct sk_buff *skb, unsigned int len)
  20.907 +{
  20.908 +	if (len <= skb_headlen(skb))
  20.909 +		return 1;
  20.910 +	if (len > skb->len)
  20.911 +		return 0;
  20.912 +	return (__pskb_pull_tail(skb, len-skb_headlen(skb)) != NULL);
  20.913 +}
  20.914 +
  20.915 +/**
  20.916 + *	skb_headroom - bytes at buffer head
  20.917 + *	@skb: buffer to check
  20.918 + *
  20.919 + *	Return the number of bytes of free space at the head of an &sk_buff.
  20.920 + */
  20.921 + 
  20.922 +static inline int skb_headroom(const struct sk_buff *skb)
  20.923 +{
  20.924 +	return skb->data-skb->head;
  20.925 +}
  20.926 +
  20.927 +/**
  20.928 + *	skb_tailroom - bytes at buffer end
  20.929 + *	@skb: buffer to check
  20.930 + *
  20.931 + *	Return the number of bytes of free space at the tail of an sk_buff
  20.932 + */
  20.933 +
  20.934 +static inline int skb_tailroom(const struct sk_buff *skb)
  20.935 +{
  20.936 +	return skb_is_nonlinear(skb) ? 0 : skb->end-skb->tail;
  20.937 +}
  20.938 +
  20.939 +/**
  20.940 + *	skb_reserve - adjust headroom
  20.941 + *	@skb: buffer to alter
  20.942 + *	@len: bytes to move
  20.943 + *
  20.944 + *	Increase the headroom of an empty &sk_buff by reducing the tail
  20.945 + *	room. This is only allowed for an empty buffer.
  20.946 + */
  20.947 +
  20.948 +static inline void skb_reserve(struct sk_buff *skb, unsigned int len)
  20.949 +{
  20.950 +	skb->data+=len;
  20.951 +	skb->tail+=len;
  20.952 +}
  20.953 +
  20.954 +extern int ___pskb_trim(struct sk_buff *skb, unsigned int len, int realloc);
  20.955 +
  20.956 +static inline void __skb_trim(struct sk_buff *skb, unsigned int len)
  20.957 +{
  20.958 +	if (!skb->data_len) {
  20.959 +		skb->len = len;
  20.960 +		skb->tail = skb->data+len;
  20.961 +	} else {
  20.962 +		___pskb_trim(skb, len, 0);
  20.963 +	}
  20.964 +}
  20.965 +
  20.966 +/**
  20.967 + *	skb_trim - remove end from a buffer
  20.968 + *	@skb: buffer to alter
  20.969 + *	@len: new length
  20.970 + *
  20.971 + *	Cut the length of a buffer down by removing data from the tail. If
  20.972 + *	the buffer is already under the length specified it is not modified.
  20.973 + */
  20.974 +
  20.975 +static inline void skb_trim(struct sk_buff *skb, unsigned int len)
  20.976 +{
  20.977 +	if (skb->len > len) {
  20.978 +		__skb_trim(skb, len);
  20.979 +	}
  20.980 +}
  20.981 +
  20.982 +
  20.983 +static inline int __pskb_trim(struct sk_buff *skb, unsigned int len)
  20.984 +{
  20.985 +	if (!skb->data_len) {
  20.986 +		skb->len = len;
  20.987 +		skb->tail = skb->data+len;
  20.988 +		return 0;
  20.989 +	} else {
  20.990 +		return ___pskb_trim(skb, len, 1);
  20.991 +	}
  20.992 +}
  20.993 +
  20.994 +static inline int pskb_trim(struct sk_buff *skb, unsigned int len)
  20.995 +{
  20.996 +	if (len < skb->len)
  20.997 +		return __pskb_trim(skb, len);
  20.998 +	return 0;
  20.999 +}
 20.1000 +
 20.1001 +/**
 20.1002 + *	skb_orphan - orphan a buffer
 20.1003 + *	@skb: buffer to orphan
 20.1004 + *
 20.1005 + *	If a buffer currently has an owner then we call the owner's
 20.1006 + *	destructor function and make the @skb unowned. The buffer continues
 20.1007 + *	to exist but is no longer charged to its former owner.
 20.1008 + */
 20.1009 +
 20.1010 +
 20.1011 +static inline void skb_orphan(struct sk_buff *skb)
 20.1012 +{
 20.1013 +	if (skb->destructor)
 20.1014 +		skb->destructor(skb);
 20.1015 +	skb->destructor = NULL;
 20.1016 +	skb->sk = NULL;
 20.1017 +}
 20.1018 +
 20.1019 +/**
 20.1020 + *	skb_purge - empty a list
 20.1021 + *	@list: list to empty
 20.1022 + *
 20.1023 + *	Delete all buffers on an &sk_buff list. Each buffer is removed from
 20.1024 + *	the list and one reference dropped. This function takes the list
 20.1025 + *	lock and is atomic with respect to other list locking functions.
 20.1026 + */
 20.1027 +
 20.1028 +
 20.1029 +static inline void skb_queue_purge(struct sk_buff_head *list)
 20.1030 +{
 20.1031 +	struct sk_buff *skb;
 20.1032 +	while ((skb=skb_dequeue(list))!=NULL)
 20.1033 +		kfree_skb(skb);
 20.1034 +}
 20.1035 +
 20.1036 +/**
 20.1037 + *	__skb_purge - empty a list
 20.1038 + *	@list: list to empty
 20.1039 + *
 20.1040 + *	Delete all buffers on an &sk_buff list. Each buffer is removed from
 20.1041 + *	the list and one reference dropped. This function does not take the
 20.1042 + *	list lock and the caller must hold the relevant locks to use it.
 20.1043 + */
 20.1044 +
 20.1045 +
 20.1046 +static inline void __skb_queue_purge(struct sk_buff_head *list)
 20.1047 +{
 20.1048 +	struct sk_buff *skb;
 20.1049 +	while ((skb=__skb_dequeue(list))!=NULL)
 20.1050 +		kfree_skb(skb);
 20.1051 +}
 20.1052 +
 20.1053 +/**
 20.1054 + *	__dev_alloc_skb - allocate an skbuff for sending
 20.1055 + *	@length: length to allocate
 20.1056 + *	@gfp_mask: get_free_pages mask, passed to alloc_skb
 20.1057 + *
 20.1058 + *	Allocate a new &sk_buff and assign it a usage count of one. The
 20.1059 + *	buffer has unspecified headroom built in. Users should allocate
 20.1060 + *	the headroom they think they need without accounting for the
 20.1061 + *	built in space. The built in space is used for optimisations.
 20.1062 + *
 20.1063 + *	%NULL is returned in there is no free memory.
 20.1064 + */
 20.1065 + 
 20.1066 +static inline struct sk_buff *__dev_alloc_skb(unsigned int length,
 20.1067 +					      int gfp_mask)
 20.1068 +{
 20.1069 +	struct sk_buff *skb;
 20.1070 +
 20.1071 +	//skb = alloc_skb(length+16, gfp_mask);
 20.1072 +        skb = alloc_zc_skb(length+16, gfp_mask);
 20.1073 +	if (skb)
 20.1074 +		skb_reserve(skb,16);
 20.1075 +	return skb;
 20.1076 +}
 20.1077 +
 20.1078 +/**
 20.1079 + *	dev_alloc_skb - allocate an skbuff for sending
 20.1080 + *	@length: length to allocate
 20.1081 + *
 20.1082 + *	Allocate a new &sk_buff and assign it a usage count of one. The
 20.1083 + *	buffer has unspecified headroom built in. Users should allocate
 20.1084 + *	the headroom they think they need without accounting for the
 20.1085 + *	built in space. The built in space is used for optimisations.
 20.1086 + *
 20.1087 + *	%NULL is returned in there is no free memory. Although this function
 20.1088 + *	allocates memory it can be called from an interrupt.
 20.1089 + */
 20.1090 + 
 20.1091 +static inline struct sk_buff *dev_alloc_skb(unsigned int length)
 20.1092 +{
 20.1093 +	return __dev_alloc_skb(length, GFP_ATOMIC);
 20.1094 +}
 20.1095 +
 20.1096 +/**
 20.1097 + *	skb_cow - copy header of skb when it is required
 20.1098 + *	@skb: buffer to cow
 20.1099 + *	@headroom: needed headroom
 20.1100 + *
 20.1101 + *	If the skb passed lacks sufficient headroom or its data part
 20.1102 + *	is shared, data is reallocated. If reallocation fails, an error
 20.1103 + *	is returned and original skb is not changed.
 20.1104 + *
 20.1105 + *	The result is skb with writable area skb->head...skb->tail
 20.1106 + *	and at least @headroom of space at head.
 20.1107 + */
 20.1108 +
 20.1109 +static inline int
 20.1110 +skb_cow(struct sk_buff *skb, unsigned int headroom)
 20.1111 +{
 20.1112 +	int delta = (headroom > 16 ? headroom : 16) - skb_headroom(skb);
 20.1113 +
 20.1114 +	if (delta < 0)
 20.1115 +		delta = 0;
 20.1116 +
 20.1117 +	if (delta || skb_cloned(skb))
 20.1118 +		return pskb_expand_head(skb, (delta+15)&~15, 0, GFP_ATOMIC);
 20.1119 +	return 0;
 20.1120 +}
 20.1121 +
 20.1122 +/**
 20.1123 + *	skb_linearize - convert paged skb to linear one
 20.1124 + *	@skb: buffer to linarize
 20.1125 + *	@gfp: allocation mode
 20.1126 + *
 20.1127 + *	If there is no free memory -ENOMEM is returned, otherwise zero
 20.1128 + *	is returned and the old skb data released.  */
 20.1129 +int skb_linearize(struct sk_buff *skb, int gfp);
 20.1130 +
 20.1131 +static inline void *kmap_skb_frag(const skb_frag_t *frag)
 20.1132 +{
 20.1133 +#ifdef CONFIG_HIGHMEM
 20.1134 +	if (in_irq())
 20.1135 +		BUG();
 20.1136 +
 20.1137 +	local_bh_disable();
 20.1138 +#endif
 20.1139 +	return kmap_atomic(frag->page, KM_SKB_DATA_SOFTIRQ);
 20.1140 +}
 20.1141 +
 20.1142 +static inline void kunmap_skb_frag(void *vaddr)
 20.1143 +{
 20.1144 +	kunmap_atomic(vaddr, KM_SKB_DATA_SOFTIRQ);
 20.1145 +#ifdef CONFIG_HIGHMEM
 20.1146 +	local_bh_enable();
 20.1147 +#endif
 20.1148 +}
 20.1149 +
 20.1150 +#define skb_queue_walk(queue, skb) \
 20.1151 +		for (skb = (queue)->next;			\
 20.1152 +		     (skb != (struct sk_buff *)(queue));	\
 20.1153 +		     skb=skb->next)
 20.1154 +
 20.1155 +
 20.1156 +extern struct sk_buff *		skb_recv_datagram(struct sock *sk,unsigned flags,int noblock, int *err);
 20.1157 +extern unsigned int		datagram_poll(struct file *file, struct socket *sock, struct poll_table_struct *wait);
 20.1158 +extern int			skb_copy_datagram(const struct sk_buff *from, int offset, char *to,int size);
 20.1159 +extern int			skb_copy_datagram_iovec(const struct sk_buff *from, int offset, struct iovec *to,int size);
 20.1160 +extern int			skb_copy_and_csum_datagram(const struct sk_buff *skb, int offset, u8 *to, int len, unsigned int *csump);
 20.1161 +extern int			skb_copy_and_csum_datagram_iovec(const struct sk_buff *skb, int hlen, struct iovec *iov);
 20.1162 +extern void			skb_free_datagram(struct sock * sk, struct sk_buff *skb);
 20.1163 +
 20.1164 +extern unsigned int		skb_checksum(const struct sk_buff *skb, int offset, int len, unsigned int csum);
 20.1165 +extern int			skb_copy_bits(const struct sk_buff *skb, int offset, void *to, int len);
 20.1166 +extern unsigned int		skb_copy_and_csum_bits(const struct sk_buff *skb, int offset, u8 *to, int len, unsigned int csum);
 20.1167 +extern void			skb_copy_and_csum_dev(const struct sk_buff *skb, u8 *to);
 20.1168 +
 20.1169 +extern void skb_init(void);
 20.1170 +extern void skb_add_mtu(int mtu);
 20.1171 +
 20.1172 +#ifdef CONFIG_NETFILTER
 20.1173 +static inline void
 20.1174 +nf_conntrack_put(struct nf_ct_info *nfct)
 20.1175 +{
 20.1176 +	if (nfct && atomic_dec_and_test(&nfct->master->use))
 20.1177 +		nfct->master->destroy(nfct->master);
 20.1178 +}
 20.1179 +static inline void
 20.1180 +nf_conntrack_get(struct nf_ct_info *nfct)
 20.1181 +{
 20.1182 +	if (nfct)
 20.1183 +		atomic_inc(&nfct->master->use);
 20.1184 +}
 20.1185 +#endif
 20.1186 +
 20.1187 +#endif	/* __KERNEL__ */
 20.1188 +#endif	/* _LINUX_SKBUFF_H */
    21.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    21.2 +++ b/xenolinux-2.4.16-sparse/net/core/skbuff.c	Wed Feb 05 13:56:14 2003 +0000
    21.3 @@ -0,0 +1,1368 @@
    21.4 +/*
    21.5 + *	Routines having to do with the 'struct sk_buff' memory handlers.
    21.6 + *
    21.7 + *	Authors:	Alan Cox <iiitac@pyr.swan.ac.uk>
    21.8 + *			Florian La Roche <rzsfl@rz.uni-sb.de>
    21.9 + *
   21.10 + *	Version:	$Id: skbuff.c,v 1.89 2001/08/06 13:25:02 davem Exp $
   21.11 + *
   21.12 + *	Fixes:	
   21.13 + *		Alan Cox	:	Fixed the worst of the load balancer bugs.
   21.14 + *		Dave Platt	:	Interrupt stacking fix.
   21.15 + *	Richard Kooijman	:	Timestamp fixes.
   21.16 + *		Alan Cox	:	Changed buffer format.
   21.17 + *		Alan Cox	:	destructor hook for AF_UNIX etc.
   21.18 + *		Linus Torvalds	:	Better skb_clone.
   21.19 + *		Alan Cox	:	Added skb_copy.
   21.20 + *		Alan Cox	:	Added all the changed routines Linus
   21.21 + *					only put in the headers
   21.22 + *		Ray VanTassle	:	Fixed --skb->lock in free
   21.23 + *		Alan Cox	:	skb_copy copy arp field
   21.24 + *		Andi Kleen	:	slabified it.
   21.25 + *
   21.26 + *	NOTE:
   21.27 + *		The __skb_ routines should be called with interrupts 
   21.28 + *	disabled, or you better be *real* sure that the operation is atomic 
   21.29 + *	with respect to whatever list is being frobbed (e.g. via lock_sock()
   21.30 + *	or via disabling bottom half handlers, etc).
   21.31 + *
   21.32 + *	This program is free software; you can redistribute it and/or
   21.33 + *	modify it under the terms of the GNU General Public License
   21.34 + *	as published by the Free Software Foundation; either version
   21.35 + *	2 of the License, or (at your option) any later version.
   21.36 + */
   21.37 +
   21.38 +/*
   21.39 + *	The functions in this file will not compile correctly with gcc 2.4.x
   21.40 + */
   21.41 +
   21.42 +#include <linux/config.h>
   21.43 +#include <linux/types.h>
   21.44 +#include <linux/kernel.h>
   21.45 +#include <linux/sched.h>
   21.46 +#include <linux/mm.h>
   21.47 +#include <linux/interrupt.h>
   21.48 +#include <linux/in.h>
   21.49 +#include <linux/inet.h>
   21.50 +#include <linux/slab.h>
   21.51 +#include <linux/netdevice.h>
   21.52 +#include <linux/string.h>
   21.53 +#include <linux/skbuff.h>
   21.54 +#include <linux/cache.h>
   21.55 +#include <linux/init.h>
   21.56 +#include <linux/highmem.h>
   21.57 +#include <linux/spinlock.h>
   21.58 +
   21.59 +#include <net/ip.h>
   21.60 +#include <net/protocol.h>
   21.61 +#include <net/dst.h>
   21.62 +#include <net/tcp.h>
   21.63 +#include <net/udp.h>
   21.64 +#include <net/sock.h>
   21.65 +#include <asm/io.h>
   21.66 +#include <asm/uaccess.h>
   21.67 +#include <asm/system.h>
   21.68 +
   21.69 +/* zc globals: */
   21.70 +char *net_page_chunk;
   21.71 +struct net_page_info *net_page_table;
   21.72 +struct list_head net_page_list;
   21.73 +spinlock_t net_page_list_lock = SPIN_LOCK_UNLOCKED;
   21.74 +unsigned int net_pages;
   21.75 +
   21.76 +
   21.77 +
   21.78 +int sysctl_hot_list_len = 128;
   21.79 +
   21.80 +static kmem_cache_t *skbuff_head_cache;
   21.81 +
   21.82 +static union {
   21.83 +	struct sk_buff_head	list;
   21.84 +	char			pad[SMP_CACHE_BYTES];
   21.85 +} skb_head_pool[NR_CPUS];
   21.86 +
   21.87 +/*
   21.88 + *	Keep out-of-line to prevent kernel bloat.
   21.89 + *	__builtin_return_address is not used because it is not always
   21.90 + *	reliable. 
   21.91 + */
   21.92 +
   21.93 +/**
   21.94 + *	skb_over_panic	- 	private function
   21.95 + *	@skb: buffer
   21.96 + *	@sz: size
   21.97 + *	@here: address
   21.98 + *
   21.99 + *	Out of line support code for skb_put(). Not user callable.
  21.100 + */
  21.101 + 
  21.102 +void skb_over_panic(struct sk_buff *skb, int sz, void *here)
  21.103 +{
  21.104 +	printk("skput:over: %p:%d put:%d dev:%s", 
  21.105 +		here, skb->len, sz, skb->dev ? skb->dev->name : "<NULL>");
  21.106 +	BUG();
  21.107 +}
  21.108 +
  21.109 +/**
  21.110 + *	skb_under_panic	- 	private function
  21.111 + *	@skb: buffer
  21.112 + *	@sz: size
  21.113 + *	@here: address
  21.114 + *
  21.115 + *	Out of line support code for skb_push(). Not user callable.
  21.116 + */
  21.117 + 
  21.118 +
  21.119 +void skb_under_panic(struct sk_buff *skb, int sz, void *here)
  21.120 +{
  21.121 +        printk("skput:under: %p:%d put:%d dev:%s",
  21.122 +                here, skb->len, sz, skb->dev ? skb->dev->name : "<NULL>");
  21.123 +	BUG();
  21.124 +}
  21.125 +
  21.126 +static __inline__ struct sk_buff *skb_head_from_pool(void)
  21.127 +{
  21.128 +	struct sk_buff_head *list = &skb_head_pool[smp_processor_id()].list;
  21.129 +
  21.130 +	if (skb_queue_len(list)) {
  21.131 +		struct sk_buff *skb;
  21.132 +		unsigned long flags;
  21.133 +
  21.134 +		local_irq_save(flags);
  21.135 +		skb = __skb_dequeue(list);
  21.136 +		local_irq_restore(flags);
  21.137 +		return skb;
  21.138 +	}
  21.139 +	return NULL;
  21.140 +}
  21.141 +
  21.142 +static __inline__ void skb_head_to_pool(struct sk_buff *skb)
  21.143 +{
  21.144 +	struct sk_buff_head *list = &skb_head_pool[smp_processor_id()].list;
  21.145 +
  21.146 +	if (skb_queue_len(list) < sysctl_hot_list_len) {
  21.147 +		unsigned long flags;
  21.148 +
  21.149 +		local_irq_save(flags);
  21.150 +		__skb_queue_head(list, skb);
  21.151 +		local_irq_restore(flags);
  21.152 +
  21.153 +		return;
  21.154 +	}
  21.155 +	kmem_cache_free(skbuff_head_cache, skb);
  21.156 +}
  21.157 +
  21.158 +
  21.159 +/* 	Allocate a new skbuff. We do this ourselves so we can fill in a few
  21.160 + *	'private' fields and also do memory statistics to find all the
  21.161 + *	[BEEP] leaks.
  21.162 + * 
  21.163 + */
  21.164 +
  21.165 +/**
  21.166 + *	alloc_skb	-	allocate a network buffer
  21.167 + *	@size: size to allocate
  21.168 + *	@gfp_mask: allocation mask
  21.169 + *
  21.170 + *	Allocate a new &sk_buff. The returned buffer has no headroom and a
  21.171 + *	tail room of size bytes. The object has a reference count of one.
  21.172 + *	The return is the buffer. On a failure the return is %NULL.
  21.173 + *
  21.174 + *	Buffers may only be allocated from interrupts using a @gfp_mask of
  21.175 + *	%GFP_ATOMIC.
  21.176 + */
  21.177 + 
  21.178 +struct sk_buff *alloc_skb(unsigned int size,int gfp_mask)
  21.179 +{
  21.180 +	struct sk_buff *skb;
  21.181 +	u8 *data;
  21.182 +
  21.183 +	if (in_interrupt() && (gfp_mask & __GFP_WAIT)) {
  21.184 +		static int count = 0;
  21.185 +		if (++count < 5) {
  21.186 +			printk(KERN_ERR "alloc_skb called nonatomically "
  21.187 +			       "from interrupt %p\n", NET_CALLER(size));
  21.188 + 			BUG();
  21.189 +		}
  21.190 +		gfp_mask &= ~__GFP_WAIT;
  21.191 +	}
  21.192 +
  21.193 +	/* Get the HEAD */
  21.194 +	skb = skb_head_from_pool();
  21.195 +	if (skb == NULL) {
  21.196 +		skb = kmem_cache_alloc(skbuff_head_cache, gfp_mask & ~__GFP_DMA);
  21.197 +		if (skb == NULL)
  21.198 +			goto nohead;
  21.199 +	}
  21.200 +
  21.201 +	/* Get the DATA. Size must match skb_add_mtu(). */
  21.202 +	size = SKB_DATA_ALIGN(size);
  21.203 +	data = kmalloc(size + sizeof(struct skb_shared_info), gfp_mask);
  21.204 +	if (data == NULL)
  21.205 +		goto nodata;
  21.206 +
  21.207 +	/* XXX: does not include slab overhead */ 
  21.208 +	skb->truesize = size + sizeof(struct sk_buff);
  21.209 +
  21.210 +	/* Load the data pointers. */
  21.211 +	skb->head = data;
  21.212 +	skb->data = data;
  21.213 +	skb->tail = data;
  21.214 +	skb->end = data + size;
  21.215 +
  21.216 +	/* Set up other state */
  21.217 +	skb->len = 0;
  21.218 +	skb->cloned = 0;
  21.219 +	skb->data_len = 0;
  21.220 +        skb->skb_type = SKB_NORMAL;
  21.221 +
  21.222 +	atomic_set(&skb->users, 1); 
  21.223 +	atomic_set(&(skb_shinfo(skb)->dataref), 1);
  21.224 +	skb_shinfo(skb)->nr_frags = 0;
  21.225 +	skb_shinfo(skb)->frag_list = NULL;
  21.226 +	return skb;
  21.227 +
  21.228 +nodata:
  21.229 +	skb_head_to_pool(skb);
  21.230 +nohead:
  21.231 +	return NULL;
  21.232 +}
  21.233 +
  21.234 +/* begin zc code additions: */
  21.235 +
  21.236 +void init_net_pages(unsigned long order_pages)
  21.237 +{
  21.238 +        int i;
  21.239 +        struct net_page_info *np;
  21.240 +        pgd_t *pgd; pmd_t *pmd; pte_t *ptep;
  21.241 +        unsigned long nr_pages = 1 << order_pages;
  21.242 +        
  21.243 +        net_page_chunk = (char *)__get_free_pages(GFP_KERNEL, order_pages);
  21.244 +        net_page_table = kmalloc(nr_pages * sizeof(struct net_page_info), GFP_KERNEL);
  21.245 +
  21.246 +        INIT_LIST_HEAD(&net_page_list);
  21.247 +
  21.248 +        for (i = 0; i < nr_pages; i++) 
  21.249 +        {
  21.250 +                np = net_page_table + i;
  21.251 +                np->virt_addr = (unsigned long)net_page_chunk + (i * PAGE_SIZE);
  21.252 +
  21.253 +                // now fill the pte pointer:
  21.254 +                np->ppte = 0xdeadbeef;
  21.255 +                pgd = pgd_offset_k(np->virt_addr);
  21.256 +                if (pgd_none(*pgd) || pgd_bad(*pgd)) BUG();
  21.257 +
  21.258 +                if (pmd_none(*pmd)) BUG(); 
  21.259 +                if (pmd_bad(*pmd)) BUG();
  21.260 +
  21.261 +                ptep = pte_offset(pmd, np->virt_addr);
  21.262 +                np->ppte = (unsigned long)virt_to_mach(ptep);
  21.263 +
  21.264 +                list_add_tail(&np->list, &net_page_list);
  21.265 +        }
  21.266 +        net_pages = nr_pages;
  21.267 +        
  21.268 +
  21.269 +}
  21.270 +
  21.271 +struct net_page_info *get_net_page(void)
  21.272 +{
  21.273 +    struct list_head *list_ptr;
  21.274 +    struct net_page_info *np;
  21.275 +    unsigned long flags;
  21.276 +
  21.277 +    if (!net_pages) 
  21.278 +    {
  21.279 +            return NULL;
  21.280 +    }
  21.281 +    spin_lock_irqsave(&net_page_list_lock, flags);
  21.282 +    
  21.283 +    list_ptr = net_page_list.next;
  21.284 +    np = list_entry(list_ptr, struct net_page_info, list);
  21.285 +    list_del(&np->list);
  21.286 +    net_pages--;
  21.287 +    
  21.288 +    spin_unlock_irqrestore(&net_page_list_lock, flags);
  21.289 +    
  21.290 +    return np;
  21.291 +}
  21.292 +
  21.293 +void free_net_page(struct net_page_info *np)
  21.294 +{
  21.295 +    unsigned long flags;
  21.296 +  
  21.297 +    if (np == NULL) return;
  21.298 +    
  21.299 +    spin_lock_irqsave(&net_page_list_lock, flags);
  21.300 +    
  21.301 +    list_add(&np->list, &net_page_list);
  21.302 +    net_pages++;
  21.303 +
  21.304 +    spin_unlock_irqrestore(&net_page_list_lock, flags);
  21.305 +
  21.306 +}
  21.307 +
  21.308 +struct sk_buff *alloc_zc_skb(unsigned int size,int gfp_mask)
  21.309 +{
  21.310 +	struct sk_buff *skb;
  21.311 +	u8 *data;
  21.312 +
  21.313 +	if (in_interrupt() && (gfp_mask & __GFP_WAIT)) {
  21.314 +		static int count = 0;
  21.315 +		if (++count < 5) {
  21.316 +			printk(KERN_ERR "alloc_skb called nonatomically "
  21.317 +			       "from interrupt %p\n", NET_CALLER(size));
  21.318 + 			BUG();
  21.319 +		}
  21.320 +		gfp_mask &= ~__GFP_WAIT;
  21.321 +	}
  21.322 +
  21.323 +	/* Get the HEAD */
  21.324 +	skb = skb_head_from_pool();
  21.325 +	if (skb == NULL) {
  21.326 +		skb = kmem_cache_alloc(skbuff_head_cache, gfp_mask & ~__GFP_DMA);
  21.327 +		if (skb == NULL)
  21.328 +			goto nohead;
  21.329 +	}
  21.330 +
  21.331 +	/* Get the DATA. Size must match skb_add_mtu(). */
  21.332 +	size = SKB_DATA_ALIGN(size);
  21.333 +        if (size > PAGE_SIZE)
  21.334 +        {
  21.335 +                printk("alloc_zc_skb called with unruly size.\n");
  21.336 +                size = PAGE_SIZE;
  21.337 +        }
  21.338 +	skb->net_page = get_net_page();
  21.339 +        if (skb->net_page == NULL)
  21.340 +        {
  21.341 +                goto nodata;
  21.342 +        }
  21.343 +        data = (u8 *)skb->net_page->virt_addr;
  21.344 +	if (data == NULL)
  21.345 +		goto nodata;
  21.346 +	/* XXX: does not include slab overhead */ 
  21.347 +	skb->truesize = size + sizeof(struct sk_buff);
  21.348 +
  21.349 +	/* Load the data pointers. */
  21.350 +	skb->head = data;
  21.351 +	skb->data = data;
  21.352 +	skb->tail = data;
  21.353 +	skb->end = data + size;
  21.354 +
  21.355 +	/* Set up other state */
  21.356 +	skb->len = 0;
  21.357 +	skb->cloned = 0;
  21.358 +	skb->data_len = 0;
  21.359 +        skb->skb_type = SKB_ZERO_COPY;
  21.360 +
  21.361 +	atomic_set(&skb->users, 1); 
  21.362 +	atomic_set(&(skb_shinfo(skb)->dataref), 1);
  21.363 +	skb_shinfo(skb)->nr_frags = 0;
  21.364 +	skb_shinfo(skb)->frag_list = NULL;
  21.365 +	return skb;
  21.366 +
  21.367 +nodata:
  21.368 +	skb_head_to_pool(skb);
  21.369 +nohead:
  21.370 +	return NULL;
  21.371 +}
  21.372 +
  21.373 +/* end zc code additions: */
  21.374 +
  21.375 +/*
  21.376 + *	Slab constructor for a skb head. 
  21.377 + */ 
  21.378 +static inline void skb_headerinit(void *p, kmem_cache_t *cache, 
  21.379 +				  unsigned long flags)
  21.380 +{
  21.381 +	struct sk_buff *skb = p;
  21.382 +
  21.383 +	skb->next = NULL;
  21.384 +	skb->prev = NULL;
  21.385 +	skb->list = NULL;
  21.386 +	skb->sk = NULL;
  21.387 +	skb->stamp.tv_sec=0;	/* No idea about time */
  21.388 +	skb->dev = NULL;
  21.389 +	skb->dst = NULL;
  21.390 +	memset(skb->cb, 0, sizeof(skb->cb));
  21.391 +	skb->pkt_type = PACKET_HOST;	/* Default type */
  21.392 +	skb->ip_summed = 0;
  21.393 +	skb->priority = 0;
  21.394 +	skb->security = 0;	/* By default packets are insecure */
  21.395 +	skb->destructor = NULL;
  21.396 +
  21.397 +#ifdef CONFIG_NETFILTER
  21.398 +	skb->nfmark = skb->nfcache = 0;
  21.399 +	skb->nfct = NULL;
  21.400 +#ifdef CONFIG_NETFILTER_DEBUG
  21.401 +	skb->nf_debug = 0;
  21.402 +#endif
  21.403 +#endif
  21.404 +#ifdef CONFIG_NET_SCHED
  21.405 +	skb->tc_index = 0;
  21.406 +#endif
  21.407 +}
  21.408 +
  21.409 +static void skb_drop_fraglist(struct sk_buff *skb)
  21.410 +{
  21.411 +	struct sk_buff *list = skb_shinfo(skb)->frag_list;
  21.412 +
  21.413 +	skb_shinfo(skb)->frag_list = NULL;
  21.414 +
  21.415 +	do {
  21.416 +		struct sk_buff *this = list;
  21.417 +		list = list->next;
  21.418 +		kfree_skb(this);
  21.419 +	} while (list);
  21.420 +}
  21.421 +
  21.422 +static void skb_clone_fraglist(struct sk_buff *skb)
  21.423 +{
  21.424 +	struct sk_buff *list;
  21.425 +
  21.426 +	for (list = skb_shinfo(skb)->frag_list; list; list=list->next)
  21.427 +		skb_get(list);
  21.428 +}
  21.429 +
  21.430 +static void skb_release_data(struct sk_buff *skb)
  21.431 +{
  21.432 +        if (!skb->cloned ||
  21.433 +	    atomic_dec_and_test(&(skb_shinfo(skb)->dataref))) {
  21.434 +		if (skb_shinfo(skb)->nr_frags) {
  21.435 +			int i;
  21.436 +			for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) 
  21.437 +{
  21.438 +				put_page(skb_shinfo(skb)->frags[i].page);
  21.439 +}
  21.440 +		}
  21.441 +
  21.442 +		if (skb_shinfo(skb)->frag_list)
  21.443 +			skb_drop_fraglist(skb);
  21.444 +
  21.445 +                if (skb->skb_type == SKB_NORMAL)
  21.446 +                {
  21.447 +		    kfree(skb->head);
  21.448 +                } else {// SKB_ZERO_COPY
  21.449 +                    free_net_page(skb->net_page);
  21.450 +                }
  21.451 +	}
  21.452 +
  21.453 +}
  21.454 +
  21.455 +/*
  21.456 + *	Free an skbuff by memory without cleaning the state. 
  21.457 + */
  21.458 +void kfree_skbmem(struct sk_buff *skb)
  21.459 +{
  21.460 +	skb_release_data(skb);
  21.461 +	skb_head_to_pool(skb);
  21.462 +}
  21.463 +
  21.464 +/**
  21.465 + *	__kfree_skb - private function 
  21.466 + *	@skb: buffer
  21.467 + *
  21.468 + *	Free an sk_buff. Release anything attached to the buffer. 
  21.469 + *	Clean the state. This is an internal helper function. Users should
  21.470 + *	always call kfree_skb
  21.471 + */
  21.472 +
  21.473 +void __kfree_skb(struct sk_buff *skb)
  21.474 +{
  21.475 +	if (skb->list) {
  21.476 +	 	printk(KERN_WARNING "Warning: kfree_skb passed an skb still "
  21.477 +		       "on a list (from %p).\n", NET_CALLER(skb));
  21.478 +		BUG();
  21.479 +	}
  21.480 +
  21.481 +	dst_release(skb->dst);
  21.482 +	if(skb->destructor) {
  21.483 +		if (in_irq()) {
  21.484 +			printk(KERN_WARNING "Warning: kfree_skb on hard IRQ %p\n",
  21.485 +				NET_CALLER(skb));
  21.486 +		}
  21.487 +		skb->destructor(skb);
  21.488 +	}
  21.489 +#ifdef CONFIG_NETFILTER
  21.490 +	nf_conntrack_put(skb->nfct);
  21.491 +#endif
  21.492 +	skb_headerinit(skb, NULL, 0);  /* clean state */
  21.493 +	kfree_skbmem(skb);
  21.494 +}
  21.495 +
  21.496 +/**
  21.497 + *	skb_clone	-	duplicate an sk_buff
  21.498 + *	@skb: buffer to clone
  21.499 + *	@gfp_mask: allocation priority
  21.500 + *
  21.501 + *	Duplicate an &sk_buff. The new one is not owned by a socket. Both
  21.502 + *	copies share the same packet data but not structure. The new
  21.503 + *	buffer has a reference count of 1. If the allocation fails the 
  21.504 + *	function returns %NULL otherwise the new buffer is returned.
  21.505 + *	
  21.506 + *	If this function is called from an interrupt gfp_mask() must be
  21.507 + *	%GFP_ATOMIC.
  21.508 + */
  21.509 +
  21.510 +struct sk_buff *skb_clone(struct sk_buff *skb, int gfp_mask)
  21.511 +{
  21.512 +	struct sk_buff *n;
  21.513 +
  21.514 +	n = skb_head_from_pool();
  21.515 +	if (!n) {
  21.516 +		n = kmem_cache_alloc(skbuff_head_cache, gfp_mask);
  21.517 +		if (!n)
  21.518 +			return NULL;
  21.519 +	}
  21.520 +
  21.521 +#define C(x) n->x = skb->x
  21.522 +
  21.523 +	n->next = n->prev = NULL;
  21.524 +	n->list = NULL;
  21.525 +	n->sk = NULL;
  21.526 +	C(stamp);
  21.527 +	C(dev);
  21.528 +	C(h);
  21.529 +	C(nh);
  21.530 +	C(mac);
  21.531 +	C(dst);
  21.532 +	dst_clone(n->dst);
  21.533 +	memcpy(n->cb, skb->cb, sizeof(skb->cb));
  21.534 +	C(len);
  21.535 +	C(data_len);
  21.536 +	C(csum);
  21.537 +	n->cloned = 1;
  21.538 +	C(pkt_type);
  21.539 +	C(ip_summed);
  21.540 +	C(priority);
  21.541 +	atomic_set(&n->users, 1);
  21.542 +	C(protocol);
  21.543 +	C(security);
  21.544 +	C(truesize);
  21.545 +	C(head);
  21.546 +	C(data);
  21.547 +	C(tail);
  21.548 +	C(end);
  21.549 +	n->destructor = NULL;
  21.550 +#ifdef CONFIG_NETFILTER
  21.551 +	C(nfmark);
  21.552 +	C(nfcache);
  21.553 +	C(nfct);
  21.554 +#ifdef CONFIG_NETFILTER_DEBUG
  21.555 +	C(nf_debug);
  21.556 +#endif
  21.557 +#endif /*CONFIG_NETFILTER*/
  21.558 +#if defined(CONFIG_HIPPI)
  21.559 +	C(private);
  21.560 +#endif
  21.561 +#ifdef CONFIG_NET_SCHED
  21.562 +	C(tc_index);
  21.563 +#endif
  21.564 +        C(skb_type);
  21.565 +        C(net_page);
  21.566 +	atomic_inc(&(skb_shinfo(skb)->dataref));
  21.567 +	skb->cloned = 1;
  21.568 +#ifdef CONFIG_NETFILTER
  21.569 +	nf_conntrack_get(skb->nfct);
  21.570 +#endif
  21.571 +	return n;
  21.572 +}
  21.573 +
  21.574 +static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
  21.575 +{
  21.576 +	/*
  21.577 +	 *	Shift between the two data areas in bytes
  21.578 +	 */
  21.579 +	unsigned long offset = new->data - old->data;
  21.580 +
  21.581 +	new->list=NULL;
  21.582 +	new->sk=NULL;
  21.583 +	new->dev=old->dev;
  21.584 +	new->priority=old->priority;
  21.585 +	new->protocol=old->protocol;
  21.586 +	new->dst=dst_clone(old->dst);
  21.587 +	new->h.raw=old->h.raw+offset;
  21.588 +	new->nh.raw=old->nh.raw+offset;
  21.589 +	new->mac.raw=old->mac.raw+offset;
  21.590 +	memcpy(new->cb, old->cb, sizeof(old->cb));
  21.591 +	atomic_set(&new->users, 1);
  21.592 +	new->pkt_type=old->pkt_type;
  21.593 +	new->stamp=old->stamp;
  21.594 +	new->destructor = NULL;
  21.595 +	new->security=old->security;
  21.596 +#ifdef CONFIG_NETFILTER
  21.597 +	new->nfmark=old->nfmark;
  21.598 +	new->nfcache=old->nfcache;
  21.599 +	new->nfct=old->nfct;
  21.600 +	nf_conntrack_get(new->nfct);
  21.601 +#ifdef CONFIG_NETFILTER_DEBUG
  21.602 +	new->nf_debug=old->nf_debug;
  21.603 +#endif
  21.604 +#endif
  21.605 +#ifdef CONFIG_NET_SCHED
  21.606 +	new->tc_index = old->tc_index;
  21.607 +#endif
  21.608 +}
  21.609 +
  21.610 +/**
  21.611 + *	skb_copy	-	create private copy of an sk_buff
  21.612 + *	@skb: buffer to copy
  21.613 + *	@gfp_mask: allocation priority
  21.614 + *
  21.615 + *	Make a copy of both an &sk_buff and its data. This is used when the
  21.616 + *	caller wishes to modify the data and needs a private copy of the 
  21.617 + *	data to alter. Returns %NULL on failure or the pointer to the buffer
  21.618 + *	on success. The returned buffer has a reference count of 1.
  21.619 + *
  21.620 + *	As by-product this function converts non-linear &sk_buff to linear
  21.621 + *	one, so that &sk_buff becomes completely private and caller is allowed
  21.622 + *	to modify all the data of returned buffer. This means that this
  21.623 + *	function is not recommended for use in circumstances when only
  21.624 + *	header is going to be modified. Use pskb_copy() instead.
  21.625 + */
  21.626 + 
  21.627 +struct sk_buff *skb_copy(const struct sk_buff *skb, int gfp_mask)
  21.628 +{
  21.629 +	struct sk_buff *n;
  21.630 +	int headerlen = skb->data-skb->head;
  21.631 +
  21.632 +	/*
  21.633 +	 *	Allocate the copy buffer
  21.634 +	 */
  21.635 +	n=alloc_skb(skb->end - skb->head + skb->data_len, gfp_mask);
  21.636 +	if(n==NULL)
  21.637 +		return NULL;
  21.638 +
  21.639 +	/* Set the data pointer */
  21.640 +	skb_reserve(n,headerlen);
  21.641 +	/* Set the tail pointer and length */
  21.642 +	skb_put(n,skb->len);
  21.643 +	n->csum = skb->csum;
  21.644 +	n->ip_summed = skb->ip_summed;
  21.645 +
  21.646 +	if (skb_copy_bits(skb, -headerlen, n->head, headerlen+skb->len))
  21.647 +		BUG();
  21.648 +
  21.649 +	copy_skb_header(n, skb);
  21.650 +
  21.651 +	return n;
  21.652 +}
  21.653 +
  21.654 +/* Keep head the same: replace data */
  21.655 +int skb_linearize(struct sk_buff *skb, int gfp_mask)
  21.656 +{
  21.657 +	unsigned int size;
  21.658 +	u8 *data;
  21.659 +	long offset;
  21.660 +	int headerlen = skb->data - skb->head;
  21.661 +	int expand = (skb->tail+skb->data_len) - skb->end;
  21.662 +
  21.663 +	if (skb_shared(skb))
  21.664 +		BUG();
  21.665 +
  21.666 +	if (expand <= 0)
  21.667 +		expand = 0;
  21.668 +
  21.669 +	size = (skb->end - skb->head + expand);
  21.670 +	size = SKB_DATA_ALIGN(size);
  21.671 +	data = kmalloc(size + sizeof(struct skb_shared_info), gfp_mask);
  21.672 +	if (data == NULL)
  21.673 +		return -ENOMEM;
  21.674 +
  21.675 +	/* Copy entire thing */
  21.676 +	if (skb_copy_bits(skb, -headerlen, data, headerlen+skb->len))
  21.677 +		BUG();
  21.678 +
  21.679 +	/* Offset between the two in bytes */
  21.680 +	offset = data - skb->head;
  21.681 +
  21.682 +	/* Free old data. */
  21.683 +	skb_release_data(skb);
  21.684 +
  21.685 +	skb->head = data;
  21.686 +	skb->end  = data + size;
  21.687 +
  21.688 +	/* Set up new pointers */
  21.689 +	skb->h.raw += offset;
  21.690 +	skb->nh.raw += offset;
  21.691 +	skb->mac.raw += offset;
  21.692 +	skb->tail += offset;
  21.693 +	skb->data += offset;
  21.694 +
  21.695 +	/* Set up shinfo */
  21.696 +	atomic_set(&(skb_shinfo(skb)->dataref), 1);
  21.697 +	skb_shinfo(skb)->nr_frags = 0;
  21.698 +	skb_shinfo(skb)->frag_list = NULL;
  21.699 +
  21.700 +	/* We are no longer a clone, even if we were. */
  21.701 +	skb->cloned = 0;
  21.702 +
  21.703 +	skb->tail += skb->data_len;
  21.704 +	skb->data_len = 0;
  21.705 +	return 0;
  21.706 +}
  21.707 +
  21.708 +
  21.709 +/**
  21.710 + *	pskb_copy	-	create copy of an sk_buff with private head.
  21.711 + *	@skb: buffer to copy
  21.712 + *	@gfp_mask: allocation priority
  21.713 + *
  21.714 + *	Make a copy of both an &sk_buff and part of its data, located
  21.715 + *	in header. Fragmented data remain shared. This is used when
  21.716 + *	the caller wishes to modify only header of &sk_buff and needs
  21.717 + *	private copy of the header to alter. Returns %NULL on failure
  21.718 + *	or the pointer to the buffer on success.
  21.719 + *	The returned buffer has a reference count of 1.
  21.720 + */
  21.721 +
  21.722 +struct sk_buff *pskb_copy(struct sk_buff *skb, int gfp_mask)
  21.723 +{
  21.724 +	struct sk_buff *n;
  21.725 +
  21.726 +	/*
  21.727 +	 *	Allocate the copy buffer
  21.728 +	 */
  21.729 +	n=alloc_skb(skb->end - skb->head, gfp_mask);
  21.730 +	if(n==NULL)
  21.731 +		return NULL;
  21.732 +
  21.733 +	/* Set the data pointer */
  21.734 +	skb_reserve(n,skb->data-skb->head);
  21.735 +	/* Set the tail pointer and length */
  21.736 +	skb_put(n,skb_headlen(skb));
  21.737 +	/* Copy the bytes */
  21.738 +	memcpy(n->data, skb->data, n->len);
  21.739 +	n->csum = skb->csum;
  21.740 +	n->ip_summed = skb->ip_summed;
  21.741 +
  21.742 +	n->data_len = skb->data_len;
  21.743 +	n->len = skb->len;
  21.744 +
  21.745 +	if (skb_shinfo(skb)->nr_frags) {
  21.746 +		int i;
  21.747 +
  21.748 +		for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
  21.749 +			skb_shinfo(n)->frags[i] = skb_shinfo(skb)->frags[i];
  21.750 +			get_page(skb_shinfo(n)->frags[i].page);
  21.751 +		}
  21.752 +		skb_shinfo(n)->nr_frags = i;
  21.753 +	}
  21.754 +
  21.755 +	if (skb_shinfo(skb)->frag_list) {
  21.756 +		skb_shinfo(n)->frag_list = skb_shinfo(skb)->frag_list;
  21.757 +		skb_clone_fraglist(n);
  21.758 +	}
  21.759 +
  21.760 +	copy_skb_header(n, skb);
  21.761 +
  21.762 +	return n;
  21.763 +}
  21.764 +
  21.765 +/**
  21.766 + *	pskb_expand_head - reallocate header of &sk_buff
  21.767 + *	@skb: buffer to reallocate
  21.768 + *	@nhead: room to add at head
  21.769 + *	@ntail: room to add at tail
  21.770 + *	@gfp_mask: allocation priority
  21.771 + *
  21.772 + *	Expands (or creates identical copy, if &nhead and &ntail are zero)
  21.773 + *	header of skb. &sk_buff itself is not changed. &sk_buff MUST have
  21.774 + *	reference count of 1. Returns zero in the case of success or error,
  21.775 + *	if expansion failed. In the last case, &sk_buff is not changed.
  21.776 + *
  21.777 + *	All the pointers pointing into skb header may change and must be
  21.778 + *	reloaded after call to this function.
  21.779 + */
  21.780 +
  21.781 +int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail, int gfp_mask)
  21.782 +{
  21.783 +	int i;
  21.784 +	u8 *data;
  21.785 +	int size = nhead + (skb->end - skb->head) + ntail;
  21.786 +	long off;
  21.787 +
  21.788 +	if (skb_shared(skb))
  21.789 +		BUG();
  21.790 +
  21.791 +	size = SKB_DATA_ALIGN(size);
  21.792 +
  21.793 +	data = kmalloc(size + sizeof(struct skb_shared_info), gfp_mask);
  21.794 +	if (data == NULL)
  21.795 +		goto nodata;
  21.796 +
  21.797 +	/* Copy only real data... and, alas, header. This should be
  21.798 +	 * optimized for the cases when header is void. */
  21.799 +	memcpy(data+nhead, skb->head, skb->tail-skb->head);
  21.800 +	memcpy(data+size, skb->end, sizeof(struct skb_shared_info));
  21.801 +
  21.802 +	for (i=0; i<skb_shinfo(skb)->nr_frags; i++)
  21.803 +		get_page(skb_shinfo(skb)->frags[i].page);
  21.804 +
  21.805 +	if (skb_shinfo(skb)->frag_list)
  21.806 +		skb_clone_fraglist(skb);
  21.807 +
  21.808 +	skb_release_data(skb);
  21.809 +
  21.810 +	off = (data+nhead) - skb->head;
  21.811 +
  21.812 +	skb->head = data;
  21.813 +	skb->end  = data+size;
  21.814 +
  21.815 +	skb->data += off;
  21.816 +	skb->tail += off;
  21.817 +	skb->mac.raw += off;
  21.818 +	skb->h.raw += off;
  21.819 +	skb->nh.raw += off;
  21.820 +	skb->cloned = 0;
  21.821 +	atomic_set(&skb_shinfo(skb)->dataref, 1);
  21.822 +	return 0;
  21.823 +
  21.824 +nodata:
  21.825 +	return -ENOMEM;
  21.826 +}
  21.827 +
  21.828 +/* Make private copy of skb with writable head and some headroom */
  21.829 +
  21.830 +struct sk_buff *
  21.831 +skb_realloc_headroom(struct sk_buff *skb, unsigned int headroom)
  21.832 +{
  21.833 +	struct sk_buff *skb2;
  21.834 +	int delta = headroom - skb_headroom(skb);
  21.835 +
  21.836 +	if (delta <= 0)
  21.837 +		return pskb_copy(skb, GFP_ATOMIC);
  21.838 +
  21.839 +	skb2 = skb_clone(skb, GFP_ATOMIC);
  21.840 +	if (skb2 == NULL ||
  21.841 +	    !pskb_expand_head(skb2, SKB_DATA_ALIGN(delta), 0, GFP_ATOMIC))
  21.842 +		return skb2;
  21.843 +
  21.844 +	kfree_skb(skb2);
  21.845 +	return NULL;
  21.846 +}
  21.847 +
  21.848 +
  21.849 +/**
  21.850 + *	skb_copy_expand	-	copy and expand sk_buff
  21.851 + *	@skb: buffer to copy
  21.852 + *	@newheadroom: new free bytes at head
  21.853 + *	@newtailroom: new free bytes at tail
  21.854 + *	@gfp_mask: allocation priority
  21.855 + *
  21.856 + *	Make a copy of both an &sk_buff and its data and while doing so 
  21.857 + *	allocate additional space.
  21.858 + *
  21.859 + *	This is used when the caller wishes to modify the data and needs a 
  21.860 + *	private copy of the data to alter as well as more space for new fields.
  21.861 + *	Returns %NULL on failure or the pointer to the buffer
  21.862 + *	on success. The returned buffer has a reference count of 1.
  21.863 + *
  21.864 + *	You must pass %GFP_ATOMIC as the allocation priority if this function
  21.865 + *	is called from an interrupt.
  21.866 + */
  21.867 + 
  21.868 +
  21.869 +struct sk_buff *skb_copy_expand(const struct sk_buff *skb,
  21.870 +				int newheadroom,
  21.871 +				int newtailroom,
  21.872 +				int gfp_mask)
  21.873 +{
  21.874 +	struct sk_buff *n;
  21.875 +
  21.876 +	/*
  21.877 +	 *	Allocate the copy buffer
  21.878 +	 */
  21.879 + 	 
  21.880 +	n=alloc_skb(newheadroom + skb->len + newtailroom,
  21.881 +		    gfp_mask);
  21.882 +	if(n==NULL)
  21.883 +		return NULL;
  21.884 +
  21.885 +	skb_reserve(n,newheadroom);
  21.886 +
  21.887 +	/* Set the tail pointer and length */
  21.888 +	skb_put(n,skb->len);
  21.889 +
  21.890 +	/* Copy the data only. */
  21.891 +	if (skb_copy_bits(skb, 0, n->data, skb->len))
  21.892 +		BUG();
  21.893 +
  21.894 +	copy_skb_header(n, skb);
  21.895 +	return n;
  21.896 +}
  21.897 +
  21.898 +/* Trims skb to length len. It can change skb pointers, if "realloc" is 1.
  21.899 + * If realloc==0 and trimming is impossible without change of data,
  21.900 + * it is BUG().
  21.901 + */
  21.902 +
  21.903 +int ___pskb_trim(struct sk_buff *skb, unsigned int len, int realloc)
  21.904 +{
  21.905 +	int offset = skb_headlen(skb);
  21.906 +	int nfrags = skb_shinfo(skb)->nr_frags;
  21.907 +	int i;
  21.908 +
  21.909 +	for (i=0; i<nfrags; i++) {
  21.910 +		int end = offset + skb_shinfo(skb)->frags[i].size;
  21.911 +		if (end > len) {
  21.912 +			if (skb_cloned(skb)) {
  21.913 +				if (!realloc)
  21.914 +					BUG();
  21.915 +				if (!pskb_expand_head(skb, 0, 0, GFP_ATOMIC))
  21.916 +					return -ENOMEM;
  21.917 +			}
  21.918 +			if (len <= offset) {
  21.919 +				put_page(skb_shinfo(skb)->frags[i].page);
  21.920 +				skb_shinfo(skb)->nr_frags--;
  21.921 +			} else {
  21.922 +				skb_shinfo(skb)->frags[i].size = len-offset;
  21.923 +			}
  21.924 +		}
  21.925 +		offset = end;
  21.926 +	}
  21.927 +
  21.928 +	if (offset < len) {
  21.929 +		skb->data_len -= skb->len - len;
  21.930 +		skb->len = len;
  21.931 +	} else {
  21.932 +		if (len <= skb_headlen(skb)) {
  21.933 +			skb->len = len;
  21.934 +			skb->data_len = 0;
  21.935 +			skb->tail = skb->data + len;
  21.936 +			if (skb_shinfo(skb)->frag_list && !skb_cloned(skb))
  21.937 +				skb_drop_fraglist(skb);
  21.938 +		} else {
  21.939 +			skb->data_len -= skb->len - len;
  21.940 +			skb->len = len;
  21.941 +		}
  21.942 +	}
  21.943 +
  21.944 +	return 0;
  21.945 +}
  21.946 +
  21.947 +/**
  21.948 + *	__pskb_pull_tail - advance tail of skb header 
  21.949 + *	@skb: buffer to reallocate
  21.950 + *	@delta: number of bytes to advance tail
  21.951 + *
  21.952 + *	The function makes a sense only on a fragmented &sk_buff,
  21.953 + *	it expands header moving its tail forward and copying necessary
  21.954 + *	data from fragmented part.
  21.955 + *
  21.956 + *	&sk_buff MUST have reference count of 1.
  21.957 + *
  21.958 + *	Returns %NULL (and &sk_buff does not change) if pull failed
  21.959 + *	or value of new tail of skb in the case of success.
  21.960 + *
  21.961 + *	All the pointers pointing into skb header may change and must be
  21.962 + *	reloaded after call to this function.
  21.963 + */
  21.964 +
  21.965 +/* Moves tail of skb head forward, copying data from fragmented part,
  21.966 + * when it is necessary.
  21.967 + * 1. It may fail due to malloc failure.
  21.968 + * 2. It may change skb pointers.
  21.969 + *
  21.970 + * It is pretty complicated. Luckily, it is called only in exceptional cases.
  21.971 + */
  21.972 +unsigned char * __pskb_pull_tail(struct sk_buff *skb, int delta)
  21.973 +{
  21.974 +	int i, k, eat;
  21.975 +
  21.976 +	/* If skb has not enough free space at tail, get new one
  21.977 +	 * plus 128 bytes for future expansions. If we have enough
  21.978 +	 * room at tail, reallocate without expansion only if skb is cloned.
  21.979 +	 */
  21.980 +	eat = (skb->tail+delta) - skb->end;
  21.981 +
  21.982 +	if (eat > 0 || skb_cloned(skb)) {
  21.983 +		if (pskb_expand_head(skb, 0, eat>0 ? eat+128 : 0, GFP_ATOMIC))
  21.984 +			return NULL;
  21.985 +	}
  21.986 +
  21.987 +	if (skb_copy_bits(skb, skb_headlen(skb), skb->tail, delta))
  21.988 +		BUG();
  21.989 +
  21.990 +	/* Optimization: no fragments, no reasons to preestimate
  21.991 +	 * size of pulled pages. Superb.
  21.992 +	 */
  21.993 +	if (skb_shinfo(skb)->frag_list == NULL)
  21.994 +		goto pull_pages;
  21.995 +
  21.996 +	/* Estimate size of pulled pages. */
  21.997 +	eat = delta;
  21.998 +	for (i=0; i<skb_shinfo(skb)->nr_frags; i++) {
  21.999 +		if (skb_shinfo(skb)->frags[i].size >= eat)
 21.1000 +			goto pull_pages;
 21.1001 +		eat -= skb_shinfo(skb)->frags[i].size;
 21.1002 +	}
 21.1003 +
 21.1004 +	/* If we need update frag list, we are in troubles.
 21.1005 +	 * Certainly, it possible to add an offset to skb data,
 21.1006 +	 * but taking into account that pulling is expected to
 21.1007 +	 * be very rare operation, it is worth to fight against
 21.1008 +	 * further bloating skb head and crucify ourselves here instead.
 21.1009 +	 * Pure masohism, indeed. 8)8)
 21.1010 +	 */
 21.1011 +	if (eat) {
 21.1012 +		struct sk_buff *list = skb_shinfo(skb)->frag_list;
 21.1013 +		struct sk_buff *clone = NULL;
 21.1014 +		struct sk_buff *insp = NULL;
 21.1015 +
 21.1016 +		do {
 21.1017 +			if (list == NULL)
 21.1018 +				BUG();
 21.1019 +
 21.1020 +			if (list->len <= eat) {
 21.1021 +				/* Eaten as whole. */
 21.1022 +				eat -= list->len;
 21.1023 +				list = list->next;
 21.1024 +				insp = list;
 21.1025 +			} else {
 21.1026 +				/* Eaten partially. */
 21.1027 +
 21.1028 +				if (skb_shared(list)) {
 21.1029 +					/* Sucks! We need to fork list. :-( */
 21.1030 +					clone = skb_clone(list, GFP_ATOMIC);
 21.1031 +					if (clone == NULL)
 21.1032 +						return NULL;
 21.1033 +					insp = list->next;
 21.1034 +					list = clone;
 21.1035 +				} else {
 21.1036 +					/* This may be pulled without
 21.1037 +					 * problems. */
 21.1038 +					insp = list;
 21.1039 +				}
 21.1040 +				if (pskb_pull(list, eat) == NULL) {
 21.1041 +					if (clone)
 21.1042 +						kfree_skb(clone);
 21.1043 +					return NULL;
 21.1044 +				}
 21.1045 +				break;
 21.1046 +			}
 21.1047 +		} while (eat);
 21.1048 +
 21.1049 +		/* Free pulled out fragments. */
 21.1050 +		while ((list = skb_shinfo(skb)->frag_list) != insp) {
 21.1051 +			skb_shinfo(skb)->frag_list = list->next;
 21.1052 +			kfree_skb(list);
 21.1053 +		}
 21.1054 +		/* And insert new clone at head. */
 21.1055 +		if (clone) {
 21.1056 +			clone->next = list;
 21.1057 +			skb_shinfo(skb)->frag_list = clone;
 21.1058 +		}
 21.1059 +	}
 21.1060 +	/* Success! Now we may commit changes to skb data. */
 21.1061 +
 21.1062 +pull_pages:
 21.1063 +	eat = delta;
 21.1064 +	k = 0;
 21.1065 +	for (i=0; i<skb_shinfo(skb)->nr_frags; i++) {
 21.1066 +		if (skb_shinfo(skb)->frags[i].size <= eat) {
 21.1067 +			put_page(skb_shinfo(skb)->frags[i].page);
 21.1068 +			eat -= skb_shinfo(skb)->frags[i].size;
 21.1069 +		} else {
 21.1070 +			skb_shinfo(skb)->frags[k] = skb_shinfo(skb)->frags[i];
 21.1071 +			if (eat) {
 21.1072 +				skb_shinfo(skb)->frags[k].page_offset += eat;
 21.1073 +				skb_shinfo(skb)->frags[k].size -= eat;
 21.1074 +				eat = 0;
 21.1075 +			}
 21.1076 +			k++;
 21.1077 +		}
 21.1078 +	}
 21.1079 +	skb_shinfo(skb)->nr_frags = k;
 21.1080 +
 21.1081 +	skb->tail += delta;
 21.1082 +	skb->data_len -= delta;
 21.1083 +
 21.1084 +	return skb->tail;
 21.1085 +}
 21.1086 +
 21.1087 +/* Copy some data bits from skb to kernel buffer. */
 21.1088 +
 21.1089 +int skb_copy_bits(const struct sk_buff *skb, int offset, void *to, int len)
 21.1090 +{
 21.1091 +	int i, copy;
 21.1092 +	int start = skb->len - skb->data_len;
 21.1093 +
 21.1094 +	if (offset > (int)skb->len-len)
 21.1095 +		goto fault;
 21.1096 +
 21.1097 +	/* Copy header. */
 21.1098 +	if ((copy = start-offset) > 0) {
 21.1099 +		if (copy > len)
 21.1100 +			copy = len;
 21.1101 +		memcpy(to, skb->data + offset, copy);
 21.1102 +		if ((len -= copy) == 0)
 21.1103 +			return 0;
 21.1104 +		offset += copy;
 21.1105 +		to += copy;
 21.1106 +	}
 21.1107 +
 21.1108 +	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
 21.1109 +		int end;
 21.1110 +
 21.1111 +		BUG_TRAP(start <= offset+len);
 21.1112 +
 21.1113 +		end = start + skb_shinfo(skb)->frags[i].size;
 21.1114 +		if ((copy = end-offset) > 0) {
 21.1115 +			u8 *vaddr;
 21.1116 +
 21.1117 +			if (copy > len)
 21.1118 +				copy = len;
 21.1119 +
 21.1120 +			vaddr = kmap_skb_frag(&skb_shinfo(skb)->frags[i]);
 21.1121 +			memcpy(to, vaddr+skb_shinfo(skb)->frags[i].page_offset+
 21.1122 +			       offset-start, copy);
 21.1123 +			kunmap_skb_frag(vaddr);
 21.1124 +
 21.1125 +			if ((len -= copy) == 0)
 21.1126 +				return 0;
 21.1127 +			offset += copy;
 21.1128 +			to += copy;
 21.1129 +		}
 21.1130 +		start = end;
 21.1131 +	}
 21.1132 +
 21.1133 +	if (skb_shinfo(skb)->frag_list) {
 21.1134 +		struct sk_buff *list;
 21.1135 +
 21.1136 +		for (list = skb_shinfo(skb)->frag_list; list; list=list->next) {
 21.1137 +			int end;
 21.1138 +
 21.1139 +			BUG_TRAP(start <= offset+len);
 21.1140 +
 21.1141 +			end = start + list->len;
 21.1142 +			if ((copy = end-offset) > 0) {
 21.1143 +				if (copy > len)
 21.1144 +					copy = len;
 21.1145 +				if (skb_copy_bits(list, offset-start, to, copy))
 21.1146 +					goto fault;
 21.1147 +				if ((len -= copy) == 0)
 21.1148 +					return 0;
 21.1149 +				offset += copy;
 21.1150 +				to += copy;
 21.1151 +			}
 21.1152 +			start = end;
 21.1153 +		}
 21.1154 +	}
 21.1155 +	if (len == 0)
 21.1156 +		return 0;
 21.1157 +
 21.1158 +fault:
 21.1159 +	return -EFAULT;
 21.1160 +}
 21.1161 +
 21.1162 +/* Checksum skb data. */
 21.1163 +
 21.1164 +unsigned int skb_checksum(const struct sk_buff *skb, int offset, int len, unsigned int csum)
 21.1165 +{
 21.1166 +	int i, copy;
 21.1167 +	int start = skb->len - skb->data_len;
 21.1168 +	int pos = 0;
 21.1169 +
 21.1170 +	/* Checksum header. */
 21.1171 +	if ((copy = start-offset) > 0) {
 21.1172 +		if (copy > len)
 21.1173 +			copy = len;
 21.1174 +		csum = csum_partial(skb->data+offset, copy, csum);
 21.1175 +		if ((len -= copy) == 0)
 21.1176 +			return csum;
 21.1177 +		offset += copy;
 21.1178 +		pos = copy;
 21.1179 +	}
 21.1180 +
 21.1181 +	for (i=0; i<skb_shinfo(skb)->nr_frags; i++) {
 21.1182 +		int end;
 21.1183 +
 21.1184 +		BUG_TRAP(start <= offset+len);
 21.1185 +
 21.1186 +		end = start + skb_shinfo(skb)->frags[i].size;
 21.1187 +		if ((copy = end-offset) > 0) {
 21.1188 +			unsigned int csum2;
 21.1189 +			u8 *vaddr;
 21.1190 +			skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
 21.1191 +
 21.1192 +			if (copy > len)
 21.1193 +				copy = len;
 21.1194 +			vaddr = kmap_skb_frag(frag);
 21.1195 +			csum2 = csum_partial(vaddr + frag->page_offset +
 21.1196 +					     offset-start, copy, 0);
 21.1197 +			kunmap_skb_frag(vaddr);
 21.1198 +			csum = csum_block_add(csum, csum2, pos);
 21.1199 +			if (!(len -= copy))
 21.1200 +				return csum;
 21.1201 +			offset += copy;
 21.1202 +			pos += copy;
 21.1203 +		}
 21.1204 +		start = end;
 21.1205 +	}
 21.1206 +
 21.1207 +	if (skb_shinfo(skb)->frag_list) {
 21.1208 +		struct sk_buff *list;
 21.1209 +
 21.1210 +		for (list = skb_shinfo(skb)->frag_list; list; list=list->next) {
 21.1211 +			int end;
 21.1212 +
 21.1213 +			BUG_TRAP(start <= offset+len);
 21.1214 +
 21.1215 +			end = start + list->len;
 21.1216 +			if ((copy = end-offset) > 0) {
 21.1217 +				unsigned int csum2;
 21.1218 +				if (copy > len)
 21.1219 +					copy = len;
 21.1220 +				csum2 = skb_checksum(list, offset-start, copy, 0);
 21.1221 +				csum = csum_block_add(csum, csum2, pos);
 21.1222 +				if ((len -= copy) == 0)
 21.1223 +					return csum;
 21.1224 +				offset += copy;
 21.1225 +				pos += copy;
 21.1226 +			}
 21.1227 +			start = end;
 21.1228 +		}
 21.1229 +	}
 21.1230 +	if (len == 0)
 21.1231 +		return csum;
 21.1232 +
 21.1233 +	BUG();
 21.1234 +	return csum;
 21.1235 +}
 21.1236 +
 21.1237 +/* Both of above in one bottle. */
 21.1238 +
 21.1239 +unsigned int skb_copy_and_csum_bits(const struct sk_buff *skb, int offset, u8 *to, int len, unsigned int csum)
 21.1240 +{
 21.1241 +	int i, copy;
 21.1242 +	int start = skb->len - skb->data_len;
 21.1243 +	int pos = 0;
 21.1244 +
 21.1245 +	/* Copy header. */
 21.1246 +	if ((copy = start-offset) > 0) {
 21.1247 +		if (copy > len)
 21.1248 +			copy = len;
 21.1249 +		csum = csum_partial_copy_nocheck(skb->data+offset, to, copy, csum);
 21.1250 +		if ((len -= copy) == 0)
 21.1251 +			return csum;
 21.1252 +		offset += copy;
 21.1253 +		to += copy;
 21.1254 +		pos = copy;
 21.1255 +	}
 21.1256 +
 21.1257 +	for (i=0; i<skb_shinfo(skb)->nr_frags; i++) {
 21.1258 +		int end;
 21.1259 +
 21.1260 +		BUG_TRAP(start <= offset+len);
 21.1261 +
 21.1262 +		end = start + skb_shinfo(skb)->frags[i].size;
 21.1263 +		if ((copy = end-offset) > 0) {
 21.1264 +			unsigned int csum2;
 21.1265 +			u8 *vaddr;
 21.1266 +			skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
 21.1267 +
 21.1268 +			if (copy > len)
 21.1269 +				copy = len;
 21.1270 +			vaddr = kmap_skb_frag(frag);
 21.1271 +			csum2 = csum_partial_copy_nocheck(vaddr + frag->page_offset +
 21.1272 +						      offset-start, to, copy, 0);
 21.1273 +			kunmap_skb_frag(vaddr);
 21.1274 +			csum = csum_block_add(csum, csum2, pos);
 21.1275 +			if (!(len -= copy))
 21.1276 +				return csum;
 21.1277 +			offset += copy;
 21.1278 +			to += copy;
 21.1279 +			pos += copy;
 21.1280 +		}
 21.1281 +		start = end;
 21.1282 +	}
 21.1283 +
 21.1284 +	if (skb_shinfo(skb)->frag_list) {
 21.1285 +		struct sk_buff *list;
 21.1286 +
 21.1287 +		for (list = skb_shinfo(skb)->frag_list; list; list=list->next) {
 21.1288 +			unsigned int csum2;
 21.1289 +			int end;
 21.1290 +
 21.1291 +			BUG_TRAP(start <= offset+len);
 21.1292 +
 21.1293 +			end = start + list->len;
 21.1294 +			if ((copy = end-offset) > 0) {
 21.1295 +				if (copy > len)
 21.1296 +					copy = len;
 21.1297 +				csum2 = skb_copy_and_csum_bits(list, offset-start, to, copy, 0);
 21.1298 +				csum = csum_block_add(csum, csum2, pos);
 21.1299 +				if ((len -= copy) == 0)
 21.1300 +					return csum;
 21.1301 +				offset += copy;
 21.1302 +				to += copy;
 21.1303 +				pos += copy;
 21.1304 +			}
 21.1305 +			start = end;
 21.1306 +		}
 21.1307 +	}
 21.1308 +	if (len == 0)
 21.1309 +		return csum;
 21.1310 +
 21.1311 +	BUG();
 21.1312 +	return csum;
 21.1313 +}
 21.1314 +
 21.1315 +void skb_copy_and_csum_dev(const struct sk_buff *skb, u8 *to)
 21.1316 +{
 21.1317 +	unsigned int csum;
 21.1318 +	long csstart;
 21.1319 +
 21.1320 +	if (skb->ip_summed == CHECKSUM_HW)
 21.1321 +		csstart = skb->h.raw - skb->data;
 21.1322 +	else
 21.1323 +		csstart = skb->len - skb->data_len;
 21.1324 +
 21.1325 +	if (csstart > skb->len - skb->data_len)
 21.1326 +		BUG();
 21.1327 +
 21.1328 +	memcpy(to, skb->data, csstart);
 21.1329 +
 21.1330 +	csum = 0;
 21.1331 +	if (csstart != skb->len)
 21.1332 +		csum = skb_copy_and_csum_bits(skb, csstart, to+csstart,
 21.1333 +				skb->len-csstart, 0);
 21.1334 +
 21.1335 +	if (skb->ip_summed == CHECKSUM_HW) {
 21.1336 +		long csstuff = csstart + skb->csum;
 21.1337 +
 21.1338 +		*((unsigned short *)(to + csstuff)) = csum_fold(csum);
 21.1339 +	}
 21.1340 +}
 21.1341 +
 21.1342 +#if 0
 21.1343 +/* 
 21.1344 + * 	Tune the memory allocator for a new MTU size.
 21.1345 + */
 21.1346 +void skb_add_mtu(int mtu)
 21.1347 +{
 21.1348 +	/* Must match allocation in alloc_skb */
 21.1349 +	mtu = SKB_DATA_ALIGN(mtu) + sizeof(struct skb_shared_info);
 21.1350 +
 21.1351 +	kmem_add_cache_size(mtu);
 21.1352 +}
 21.1353 +#endif
 21.1354 +
 21.1355 +void __init skb_init(void)
 21.1356 +{
 21.1357 +	int i;
 21.1358 +
 21.1359 +	skbuff_head_cache = kmem_cache_create("skbuff_head_cache",
 21.1360 +					      sizeof(struct sk_buff),
 21.1361 +					      0,
 21.1362 +					      SLAB_HWCACHE_ALIGN,
 21.1363 +					      skb_headerinit, NULL);
 21.1364 +	if (!skbuff_head_cache)
 21.1365 +		panic("cannot create skbuff cache");
 21.1366 +
 21.1367 +        init_net_pages(NUM_NET_PAGES);
 21.1368 +
 21.1369 +	for (i=0; i<NR_CPUS; i++)
 21.1370 +		skb_queue_head_init(&skb_head_pool[i].list);
 21.1371 +}