ia64/xen-unstable
changeset 122:b1b1608f5d5c
bitkeeper revision 1.22.1.8 (3e4117feir_tT1ncjYWsGFnRPS64sg)
fix conflicts between VM and net updates.
fix conflicts between VM and net updates.
line diff
1.1 --- a/.rootkeys Tue Feb 04 22:08:19 2003 +0000 1.2 +++ b/.rootkeys Wed Feb 05 13:56:14 2003 +0000 1.3 @@ -135,6 +135,7 @@ 3ddb79c34BFiXjBJ_cCKB0aCsV1IDw xen-2.4.1 1.4 3e20b82fl1jmQiKdLy7fxMcutfpjWA xen-2.4.16/include/asm-i386/domain_page.h 1.5 3ddb79c2O729EttZTYu1c8LcsUO_GQ xen-2.4.16/include/asm-i386/elf.h 1.6 3ddb79c3NU8Zy40OTrq3D-i30Y3t4A xen-2.4.16/include/asm-i386/fixmap.h 1.7 +3e2d29944GI24gf7vOP_7x8EyuqxeA xen-2.4.16/include/asm-i386/flushtlb.h 1.8 3ddb79c39o75zPP0T1aQQ4mNrCAN2w xen-2.4.16/include/asm-i386/hardirq.h 1.9 3ddb79c3BFEIwXR4IsWbwp4BoL4DkA xen-2.4.16/include/asm-i386/hdreg.h 1.10 3ddb79c3TMDjkxVndKFKnGiwY0HzDg xen-2.4.16/include/asm-i386/i387.h 1.11 @@ -401,9 +402,11 @@ 3ddb79bb3cMSs_k2X5Oq2hOIBvmPYA xenolinux 1.12 3ddb79ba2qYtIQAT_-vCFkkZUXu_UQ xenolinux-2.4.16-sparse/include/asm-xeno/user.h 1.13 3ddb79bbqhb9X9qWOz5Bv4wOzrkITg xenolinux-2.4.16-sparse/include/asm-xeno/vga.h 1.14 3ddb79bbA52x94o6uwDYsbzrH2hjzA xenolinux-2.4.16-sparse/include/asm-xeno/xor.h 1.15 +3e37c39fVCSGQENtY6g7muaq_THliw xenolinux-2.4.16-sparse/include/linux/skbuff.h 1.16 3ddb79bb_7YG4U75ZmEic9YXWTW7Vw xenolinux-2.4.16-sparse/include/linux/sunrpc/debug.h 1.17 3ddb79bcxkVPfWlZ1PQKvDrfArzOVw xenolinux-2.4.16-sparse/kernel/panic.c 1.18 3ddb79bbP31im-mx2NbfthSeqty1Dg xenolinux-2.4.16-sparse/mk 1.19 3e15d52e0_j129JPvo7xfYGndVFpwQ xenolinux-2.4.16-sparse/mm/memory.c 1.20 3e15d535DLvpzTrLRUIerB69LpJD1g xenolinux-2.4.16-sparse/mm/mremap.c 1.21 3e15d531m1Y1_W8ki64AFOU_ua4C4w xenolinux-2.4.16-sparse/mm/swapfile.c 1.22 +3e37c312QFuzIxXsuAgO6IRt3Tp96Q xenolinux-2.4.16-sparse/net/core/skbuff.c
2.1 --- a/BitKeeper/etc/logging_ok Tue Feb 04 22:08:19 2003 +0000 2.2 +++ b/BitKeeper/etc/logging_ok Wed Feb 05 13:56:14 2003 +0000 2.3 @@ -1,5 +1,6 @@ 2.4 akw27@boulderdash.cl.cam.ac.uk 2.5 akw27@labyrinth.cl.cam.ac.uk 2.6 +akw27@plucky.localdomain 2.7 bd240@boulderdash.cl.cam.ac.uk 2.8 iap10@labyrinth.cl.cam.ac.uk 2.9 kaf24@labyrinth.cl.cam.ac.uk
3.1 --- a/xen-2.4.16/common/domain.c Tue Feb 04 22:08:19 2003 +0000 3.2 +++ b/xen-2.4.16/common/domain.c Wed Feb 05 13:56:14 2003 +0000 3.3 @@ -11,6 +11,7 @@ 3.4 #include <xeno/dom0_ops.h> 3.5 #include <asm/io.h> 3.6 #include <asm/domain_page.h> 3.7 +#include <asm/flushtlb.h> 3.8 #include <asm/msr.h> 3.9 #include <xeno/multiboot.h> 3.10 3.11 @@ -353,10 +354,13 @@ unsigned int alloc_new_dom_mem(struct ta 3.12 struct pfn_info *pf, *pf_head; 3.13 unsigned int alloc_pfns; 3.14 unsigned int req_pages; 3.15 + unsigned long flags; 3.16 3.17 /* how many pages do we need to alloc? */ 3.18 req_pages = kbytes >> (PAGE_SHIFT - 10); 3.19 3.20 + spin_lock_irqsave(&free_list_lock, flags); 3.21 + 3.22 /* is there enough mem to serve the request? */ 3.23 if(req_pages > free_pfns) 3.24 return -1; 3.25 @@ -387,6 +391,8 @@ unsigned int alloc_new_dom_mem(struct ta 3.26 3.27 free_pfns--; 3.28 } 3.29 + 3.30 + spin_unlock_irqrestore(&free_list_lock, flags); 3.31 3.32 p->tot_pages = req_pages; 3.33 3.34 @@ -544,6 +550,7 @@ static unsigned long alloc_page_from_dom 3.35 */ 3.36 int setup_guestos(struct task_struct *p, dom0_newdomain_t *params) 3.37 { 3.38 + 3.39 struct list_head *list_ent; 3.40 char *src, *dst; 3.41 int i, dom = p->domain; 3.42 @@ -704,8 +711,7 @@ int setup_guestos(struct task_struct *p, 3.43 3.44 /* Install the new page tables. */ 3.45 __cli(); 3.46 - __asm__ __volatile__ ( 3.47 - "mov %%eax,%%cr3" : : "a" (pagetable_val(p->mm.pagetable))); 3.48 + __write_cr3_counted(pagetable_val(p->mm.pagetable)); 3.49 3.50 /* Copy the guest OS image. */ 3.51 src = (char *)__va(mod[0].mod_start + 12); 3.52 @@ -777,8 +783,7 @@ int setup_guestos(struct task_struct *p, 3.53 } 3.54 3.55 /* Reinstate the caller's page tables. */ 3.56 - __asm__ __volatile__ ( 3.57 - "mov %%eax,%%cr3" : : "a" (pagetable_val(current->mm.pagetable))); 3.58 + __write_cr3_counted(pagetable_val(current->mm.pagetable)); 3.59 __sti(); 3.60 3.61 new_thread(p,
4.1 --- a/xen-2.4.16/common/event.c Tue Feb 04 22:08:19 2003 +0000 4.2 +++ b/xen-2.4.16/common/event.c Wed Feb 05 13:56:14 2003 +0000 4.3 @@ -14,13 +14,13 @@ 4.4 typedef void (*hyp_event_callback_fn_t)(void); 4.5 4.6 extern void schedule(void); 4.7 -extern void flush_rx_queue(void); 4.8 +extern void update_shared_ring(void); 4.9 4.10 /* Ordering must match definitions of _HYP_EVENT_* in xeno/sched.h */ 4.11 static hyp_event_callback_fn_t event_call_fn[] = 4.12 { 4.13 schedule, 4.14 - flush_rx_queue, 4.15 + update_shared_ring, 4.16 kill_domain 4.17 }; 4.18
5.1 --- a/xen-2.4.16/common/memory.c Tue Feb 04 22:08:19 2003 +0000 5.2 +++ b/xen-2.4.16/common/memory.c Wed Feb 05 13:56:14 2003 +0000 5.3 @@ -171,6 +171,7 @@ 5.4 #include <xeno/sched.h> 5.5 #include <xeno/errno.h> 5.6 #include <asm/page.h> 5.7 +#include <asm/flushtlb.h> 5.8 #include <asm/io.h> 5.9 #include <asm/uaccess.h> 5.10 #include <asm/domain_page.h> 5.11 @@ -205,6 +206,7 @@ unsigned long frame_table_size; 5.12 unsigned long max_page; 5.13 5.14 struct list_head free_list; 5.15 +spinlock_t free_list_lock = SPIN_LOCK_UNLOCKED; 5.16 unsigned int free_pfns; 5.17 5.18 static int tlb_flush[NR_CPUS]; 5.19 @@ -218,6 +220,7 @@ void __init init_frametable(unsigned lon 5.20 { 5.21 struct pfn_info *pf; 5.22 unsigned long page_index; 5.23 + unsigned long flags; 5.24 5.25 memset(tlb_flush, 0, sizeof(tlb_flush)); 5.26 5.27 @@ -231,6 +234,7 @@ void __init init_frametable(unsigned lon 5.28 ((__pa(frame_table) + frame_table_size) >> PAGE_SHIFT); 5.29 5.30 /* Put all domain-allocatable memory on a free list. */ 5.31 + spin_lock_irqsave(&free_list_lock, flags); 5.32 INIT_LIST_HEAD(&free_list); 5.33 for( page_index = (__pa(frame_table) + frame_table_size) >> PAGE_SHIFT; 5.34 page_index < nr_pages; 5.35 @@ -239,6 +243,7 @@ void __init init_frametable(unsigned lon 5.36 pf = list_entry(&frame_table[page_index].list, struct pfn_info, list); 5.37 list_add_tail(&pf->list, &free_list); 5.38 } 5.39 + spin_unlock_irqrestore(&free_list_lock, flags); 5.40 } 5.41 5.42 5.43 @@ -697,7 +702,6 @@ static int do_extended_command(unsigned 5.44 return err; 5.45 } 5.46 5.47 - 5.48 int do_process_page_updates(page_update_request_t *ureqs, int count) 5.49 { 5.50 page_update_request_t req; 5.51 @@ -807,11 +811,10 @@ int do_process_page_updates(page_update_ 5.52 if ( tlb_flush[smp_processor_id()] ) 5.53 { 5.54 tlb_flush[smp_processor_id()] = 0; 5.55 - __asm__ __volatile__ ( 5.56 - "movl %%eax,%%cr3" : : 5.57 - "a" (pagetable_val(current->mm.pagetable))); 5.58 + __write_cr3_counted(pagetable_val(current->mm.pagetable)); 5.59 5.60 } 5.61 5.62 return(0); 5.63 } 5.64 +
6.1 --- a/xen-2.4.16/common/network.c Tue Feb 04 22:08:19 2003 +0000 6.2 +++ b/xen-2.4.16/common/network.c Wed Feb 05 13:56:14 2003 +0000 6.3 @@ -49,6 +49,7 @@ net_vif_t *create_net_vif(int domain) 6.4 { 6.5 net_vif_t *new_vif; 6.6 net_ring_t *new_ring; 6.7 + net_shadow_ring_t *shadow_ring; 6.8 struct task_struct *dom_task; 6.9 6.10 if ( !(dom_task = find_domain_by_id(domain)) ) 6.11 @@ -64,7 +65,27 @@ net_vif_t *create_net_vif(int domain) 6.12 new_ring = dom_task->net_ring_base + dom_task->num_net_vifs; 6.13 memset(new_ring, 0, sizeof(net_ring_t)); 6.14 6.15 + // allocate the shadow ring. 6.16 + // maybe these should be kmem_cache instead of kmalloc? 6.17 + 6.18 + shadow_ring = kmalloc(sizeof(net_shadow_ring_t), GFP_KERNEL); 6.19 + if (shadow_ring == NULL) goto fail; 6.20 + 6.21 + shadow_ring->tx_ring = kmalloc(TX_RING_SIZE 6.22 + * sizeof(tx_shadow_entry_t), GFP_KERNEL); 6.23 + shadow_ring->rx_ring = kmalloc(RX_RING_SIZE 6.24 + * sizeof(rx_shadow_entry_t), GFP_KERNEL); 6.25 + if ((shadow_ring->tx_ring == NULL) || (shadow_ring->rx_ring == NULL)) 6.26 + goto fail; 6.27 + 6.28 + shadow_ring->rx_prod = shadow_ring->rx_cons = shadow_ring->rx_idx = 0; 6.29 + 6.30 + // fill in the new vif struct. 6.31 + 6.32 new_vif->net_ring = new_ring; 6.33 + new_vif->shadow_ring = shadow_ring; 6.34 + 6.35 + 6.36 skb_queue_head_init(&new_vif->skb_list); 6.37 new_vif->domain = domain; 6.38 6.39 @@ -77,6 +98,10 @@ net_vif_t *create_net_vif(int domain) 6.40 dom_task->num_net_vifs++; 6.41 6.42 return new_vif; 6.43 + 6.44 +fail: 6.45 + printk("VIF allocation failed!\n"); 6.46 + return NULL; 6.47 } 6.48 6.49 /* delete_net_vif - Delete the last vif in the given domain. 6.50 @@ -101,7 +126,10 @@ void destroy_net_vif(struct task_struct 6.51 write_lock(&sys_vif_lock); 6.52 sys_vif_list[p->net_vif_list[i]->id] = NULL; // system vif list not gc'ed 6.53 write_unlock(&sys_vif_lock); 6.54 - 6.55 + 6.56 + kfree(p->net_vif_list[i]->shadow_ring->tx_ring); 6.57 + kfree(p->net_vif_list[i]->shadow_ring->rx_ring); 6.58 + kfree(p->net_vif_list[i]->shadow_ring); 6.59 kmem_cache_free(net_vif_cache, p->net_vif_list[i]); 6.60 } 6.61
7.1 --- a/xen-2.4.16/drivers/net/tulip/interrupt.c Tue Feb 04 22:08:19 2003 +0000 7.2 +++ b/xen-2.4.16/drivers/net/tulip/interrupt.c Wed Feb 05 13:56:14 2003 +0000 7.3 @@ -170,8 +170,9 @@ static int tulip_rx(struct net_device *d 7.4 #endif 7.5 /* Check if the packet is long enough to accept without copying 7.6 to a minimally-sized skbuff. */ 7.7 - if (pkt_len < tulip_rx_copybreak 7.8 - && (skb = dev_alloc_skb(pkt_len + 2)) != NULL) { 7.9 + //if (pkt_len < tulip_rx_copybreak 7.10 + // && (skb = dev_alloc_skb(pkt_len + 2)) != NULL) { 7.11 + if (0) { 7.12 skb->dev = dev; 7.13 skb_reserve(skb, 2); /* 16 byte align the IP header */ 7.14 pci_dma_sync_single(tp->pdev,
8.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 8.2 +++ b/xen-2.4.16/include/asm-i386/flushtlb.h Wed Feb 05 13:56:14 2003 +0000 8.3 @@ -0,0 +1,48 @@ 8.4 +/****************************************************************************** 8.5 + * flushtlb.h 8.6 + * 8.7 + * TLB flush macros that count flushes. Counting is used to enforce 8.8 + * zero-copy safety, particularily for the network code. 8.9 + * 8.10 + * akw - Jan 21, 2003 8.11 + */ 8.12 + 8.13 +#ifndef __FLUSHTLB_H 8.14 +#define __FLUSHTLB_H 8.15 + 8.16 +#include <xeno/smp.h> 8.17 + 8.18 +unsigned long tlb_flush_count[NR_CPUS]; 8.19 +//#if 0 8.20 +#define __read_cr3(__var) \ 8.21 + do { \ 8.22 + __asm__ __volatile ( \ 8.23 + "movl %%cr3, %0;" \ 8.24 + : "=r" (__var)); \ 8.25 + } while (0) 8.26 +//#endif 8.27 + 8.28 +#define __write_cr3_counted(__pa) \ 8.29 + do { \ 8.30 + __asm__ __volatile__ ( \ 8.31 + "movl %0, %%cr3;" \ 8.32 + :: "r" (__pa) \ 8.33 + : "memory"); \ 8.34 + tlb_flush_count[smp_processor_id()]++; \ 8.35 + } while (0) 8.36 + 8.37 +//#endif 8.38 +#define __flush_tlb_counted() \ 8.39 + do { \ 8.40 + unsigned int tmpreg; \ 8.41 + \ 8.42 + __asm__ __volatile__( \ 8.43 + "movl %%cr3, %0; # flush TLB \n" \ 8.44 + "movl %0, %%cr3; " \ 8.45 + : "=r" (tmpreg) \ 8.46 + :: "memory"); \ 8.47 + tlb_flush_count[smp_processor_id()]++; \ 8.48 + } while (0) 8.49 + 8.50 +#endif 8.51 +
9.1 --- a/xen-2.4.16/include/asm-i386/page.h Tue Feb 04 22:08:19 2003 +0000 9.2 +++ b/xen-2.4.16/include/asm-i386/page.h Wed Feb 05 13:56:14 2003 +0000 9.3 @@ -91,36 +91,36 @@ typedef struct { unsigned long pt_lo; } 9.4 #include <asm/processor.h> 9.5 #include <asm/fixmap.h> 9.6 #include <asm/bitops.h> 9.7 +#include <asm/flushtlb.h> 9.8 9.9 extern l2_pgentry_t idle0_pg_table[ENTRIES_PER_L2_PAGETABLE]; 9.10 extern l2_pgentry_t *idle_pg_table[NR_CPUS]; 9.11 extern void paging_init(void); 9.12 9.13 -#define __flush_tlb() \ 9.14 - do { \ 9.15 - unsigned int tmpreg; \ 9.16 - \ 9.17 - __asm__ __volatile__( \ 9.18 - "movl %%cr3, %0; # flush TLB \n" \ 9.19 - "movl %0, %%cr3; \n" \ 9.20 - : "=r" (tmpreg) \ 9.21 - :: "memory"); \ 9.22 - } while (0) 9.23 +#define __flush_tlb() __flush_tlb_counted() 9.24 9.25 /* Flush global pages as well. */ 9.26 + 9.27 +#define __pge_off() \ 9.28 + do { \ 9.29 + __asm__ __volatile__( \ 9.30 + "movl %0, %%cr4; # turn off PGE " \ 9.31 + :: "r" (mmu_cr4_features & ~X86_CR4_PGE)); \ 9.32 + } while (0) 9.33 + 9.34 +#define __pge_on() \ 9.35 + do { \ 9.36 + __asm__ __volatile__( \ 9.37 + "movl %0, %%cr4; # turn off PGE " \ 9.38 + :: "r" (mmu_cr4_features)); \ 9.39 + } while (0) 9.40 + 9.41 + 9.42 #define __flush_tlb_all() \ 9.43 do { \ 9.44 - unsigned int tmpreg; \ 9.45 - \ 9.46 - __asm__ __volatile__( \ 9.47 - "movl %1, %%cr4; # turn off PGE \n" \ 9.48 - "movl %%cr3, %0; # flush TLB \n" \ 9.49 - "movl %0, %%cr3; \n" \ 9.50 - "movl %2, %%cr4; # turn PGE back on \n" \ 9.51 - : "=&r" (tmpreg) \ 9.52 - : "r" (mmu_cr4_features & ~X86_CR4_PGE), \ 9.53 - "r" (mmu_cr4_features) \ 9.54 - : "memory"); \ 9.55 + __pge_off(); \ 9.56 + __flush_tlb_counted(); \ 9.57 + __pge_on(); \ 9.58 } while (0) 9.59 9.60 #define __flush_tlb_one(__addr) \
10.1 --- a/xen-2.4.16/include/asm-i386/pci.h Tue Feb 04 22:08:19 2003 +0000 10.2 +++ b/xen-2.4.16/include/asm-i386/pci.h Wed Feb 05 13:56:14 2003 +0000 10.3 @@ -75,7 +75,19 @@ static inline dma_addr_t pci_map_single( 10.4 if (direction == PCI_DMA_NONE) 10.5 BUG(); 10.6 flush_write_buffers(); 10.7 - return virt_to_bus(ptr); 10.8 + 10.9 + if ((unsigned long) ptr > PAGE_OFFSET) 10.10 + return virt_to_bus(ptr); 10.11 + 10.12 + /* If an address that is not in hypervisor VM is passed to this 10.13 + * function (ie > PAGE_OFFSET) we assume that the passer knows 10.14 + * what they are doing, and have passed a physical address that 10.15 + * should not be converted here. This is a little hackish, but 10.16 + * is being added to allow references to domain memory in order 10.17 + * to support zero-copy network code. 10.18 + */ 10.19 + 10.20 + return (dma_addr_t) ptr; 10.21 } 10.22 10.23 /* Unmap a single streaming mode DMA translation. The dma_addr and size
11.1 --- a/xen-2.4.16/include/hypervisor-ifs/network.h Tue Feb 04 22:08:19 2003 +0000 11.2 +++ b/xen-2.4.16/include/hypervisor-ifs/network.h Wed Feb 05 13:56:14 2003 +0000 11.3 @@ -15,17 +15,19 @@ 11.4 #include <linux/types.h> 11.5 11.6 typedef struct tx_entry_st { 11.7 - unsigned long addr; /* virtual address */ 11.8 - unsigned long size; /* in bytes */ 11.9 + unsigned long addr; /* virtual address */ 11.10 + unsigned long size; /* in bytes */ 11.11 + int status; /* per descriptor status. */ 11.12 } tx_entry_t; 11.13 11.14 typedef struct rx_entry_st { 11.15 - unsigned long addr; /* virtual address */ 11.16 - unsigned long size; /* in bytes */ 11.17 + unsigned long addr; /* virtual address */ 11.18 + unsigned long size; /* in bytes */ 11.19 + int status; /* per descriptor status. */ 11.20 } rx_entry_t; 11.21 11.22 -#define TX_RING_SIZE 1024 11.23 -#define RX_RING_SIZE 1024 11.24 +#define TX_RING_SIZE 256 11.25 +#define RX_RING_SIZE 256 11.26 typedef struct net_ring_st { 11.27 /* 11.28 * Guest OS places packets into ring at tx_prod. 11.29 @@ -111,4 +113,12 @@ typedef struct net_rule_ent_st 11.30 /* Drop a new rule down to the network tables. */ 11.31 int add_net_rule(net_rule_t *rule); 11.32 11.33 + 11.34 +/* Descriptor status values: 11.35 + */ 11.36 + 11.37 +#define RING_STATUS_OK 0 // Everything is gravy. 11.38 +#define RING_STATUS_ERR_CFU -1 // Copy from user problems. 11.39 +#define RING_STATUS_BAD_PAGE -2 // What they gave us was pure evil. 11.40 + 11.41 #endif
12.1 --- a/xen-2.4.16/include/xeno/mm.h Tue Feb 04 22:08:19 2003 +0000 12.2 +++ b/xen-2.4.16/include/xeno/mm.h Wed Feb 05 13:56:14 2003 +0000 12.3 @@ -7,6 +7,7 @@ 12.4 #include <asm/desc.h> 12.5 #include <xeno/list.h> 12.6 #include <hypervisor-ifs/hypervisor-if.h> 12.7 +#include <xeno/spinlock.h> 12.8 12.9 /* XXX KAF: These may die eventually, but so many refs in slab.c :((( */ 12.10 12.11 @@ -108,6 +109,7 @@ typedef struct pfn_info { 12.12 extern frame_table_t * frame_table; 12.13 extern unsigned long frame_table_size; 12.14 extern struct list_head free_list; 12.15 +extern spinlock_t free_list_lock; 12.16 extern unsigned int free_pfns; 12.17 extern unsigned long max_page; 12.18 void init_frametable(unsigned long nr_pages);
13.1 --- a/xen-2.4.16/include/xeno/skbuff.h Tue Feb 04 22:08:19 2003 +0000 13.2 +++ b/xen-2.4.16/include/xeno/skbuff.h Wed Feb 05 13:56:14 2003 +0000 13.3 @@ -34,6 +34,10 @@ 13.4 #define VIF_DROP -3 13.5 #define VIF_ANY_INTERFACE -4 13.6 13.7 +//skb_type values: 13.8 +#define SKB_NORMAL 0 13.9 +#define SKB_ZERO_COPY 1 13.10 + 13.11 #define HAVE_ALLOC_SKB /* For the drivers to know */ 13.12 #define HAVE_ALIGNABLE_SKB /* Ditto 8) */ 13.13 #define SLAB_SKB /* Slabified skbuffs */ 13.14 @@ -187,7 +191,7 @@ struct sk_buff { 13.15 unsigned int data_len; 13.16 unsigned int csum; /* Checksum */ 13.17 unsigned char __unused, /* Dead field, may be reused */ 13.18 - cloned, /* head may be cloned (check refcnt to be sure). */ 13.19 + cloned, /* head may be cloned (check refcnt to be sure) */ 13.20 pkt_type, /* Packet class */ 13.21 ip_summed; /* Driver fed us an IP checksum */ 13.22 __u32 priority; /* Packet queueing priority */ 13.23 @@ -203,8 +207,12 @@ struct sk_buff { 13.24 13.25 void (*destructor)(struct sk_buff *); /* Destruct function */ 13.26 13.27 - int src_vif; /* vif we came from */ 13.28 - int dst_vif; /* vif we are bound for */ 13.29 + unsigned int skb_type; /* SKB_NORMAL or SKB_ZERO_COPY */ 13.30 + struct pfn_info *pf; /* record of physical pf address for freeing */ 13.31 + int src_vif; /* vif we came from */ 13.32 + int dst_vif; /* vif we are bound for */ 13.33 + struct skb_shared_info shinfo; /* shared info is no longer shared in Xen. */ 13.34 + 13.35 13.36 13.37 13.38 @@ -244,6 +252,7 @@ struct sk_buff { 13.39 13.40 extern void __kfree_skb(struct sk_buff *skb); 13.41 extern struct sk_buff * alloc_skb(unsigned int size, int priority); 13.42 +extern struct sk_buff * alloc_zc_skb(unsigned int size, int priority); 13.43 extern void kfree_skbmem(struct sk_buff *skb); 13.44 extern struct sk_buff * skb_clone(struct sk_buff *skb, int priority); 13.45 extern struct sk_buff * skb_copy(const struct sk_buff *skb, int priority); 13.46 @@ -259,7 +268,8 @@ extern void skb_over_panic(struct sk_buf 13.47 extern void skb_under_panic(struct sk_buff *skb, int len, void *here); 13.48 13.49 /* Internal */ 13.50 -#define skb_shinfo(SKB) ((struct skb_shared_info *)((SKB)->end)) 13.51 +//#define skb_shinfo(SKB) ((struct skb_shared_info *)((SKB)->end)) 13.52 +#define skb_shinfo(SKB) ((struct skb_shared_info *)(&(SKB)->shinfo)) 13.53 13.54 /** 13.55 * skb_queue_empty - check if a queue is empty 13.56 @@ -1045,7 +1055,8 @@ static inline struct sk_buff *__dev_allo 13.57 { 13.58 struct sk_buff *skb; 13.59 13.60 - skb = alloc_skb(length+16, gfp_mask); 13.61 + //skb = alloc_skb(length+16, gfp_mask); 13.62 + skb = alloc_zc_skb(length+16, gfp_mask); 13.63 if (skb) 13.64 skb_reserve(skb,16); 13.65 return skb;
14.1 --- a/xen-2.4.16/include/xeno/vif.h Tue Feb 04 22:08:19 2003 +0000 14.2 +++ b/xen-2.4.16/include/xeno/vif.h Wed Feb 05 13:56:14 2003 +0000 14.3 @@ -18,9 +18,37 @@ 14.4 #include <hypervisor-ifs/network.h> 14.5 #include <xeno/skbuff.h> 14.6 14.7 +/* 14.8 + * shadow ring structures are used to protect the descriptors from 14.9 + * tampering after they have been passed to the hypervisor. 14.10 + * 14.11 + * TX_RING_SIZE and RX_RING_SIZE are defined in the shared network.h. 14.12 + */ 14.13 + 14.14 +typedef struct tx_shadow_entry_st { 14.15 + unsigned long addr; 14.16 + unsigned long size; 14.17 + int status; 14.18 + unsigned long flush_count; 14.19 +} tx_shadow_entry_t; 14.20 + 14.21 +typedef struct rx_shadow_entry_st { 14.22 + unsigned long addr; 14.23 + unsigned long size; 14.24 + int status; 14.25 + unsigned long flush_count; 14.26 +} rx_shadow_entry_t; 14.27 + 14.28 +typedef struct net_shadow_ring_st { 14.29 + tx_shadow_entry_t *tx_ring; 14.30 + rx_shadow_entry_t *rx_ring; 14.31 + unsigned int rx_prod, rx_cons, rx_idx; 14.32 +} net_shadow_ring_t; 14.33 + 14.34 typedef struct net_vif_st { 14.35 - net_ring_t *net_ring; 14.36 - int id; 14.37 + net_ring_t *net_ring; 14.38 + net_shadow_ring_t *shadow_ring; 14.39 + int id; 14.40 struct sk_buff_head skb_list; 14.41 unsigned int domain; 14.42 // rules table goes here in next revision. 14.43 @@ -40,3 +68,8 @@ void destroy_net_vif(struct task_struct 14.44 void add_default_net_rule(int vif_id, u32 ipaddr); 14.45 int net_get_target_vif(struct sk_buff *skb); 14.46 void add_default_net_rule(int vif_id, u32 ipaddr); 14.47 + 14.48 +/* status fields per-descriptor: 14.49 + */ 14.50 + 14.51 +
15.1 --- a/xen-2.4.16/net/dev.c Tue Feb 04 22:08:19 2003 +0000 15.2 +++ b/xen-2.4.16/net/dev.c Wed Feb 05 13:56:14 2003 +0000 15.3 @@ -30,6 +30,8 @@ 15.4 #include <linux/pkt_sched.h> 15.5 15.6 #include <linux/event.h> 15.7 +#include <asm/domain_page.h> 15.8 +#include <asm/pgalloc.h> 15.9 15.10 #define BUG_TRAP ASSERT 15.11 #define notifier_call_chain(_a,_b,_c) ((void)0) 15.12 @@ -38,6 +40,12 @@ 15.13 #define rtnl_unlock() ((void)0) 15.14 #define dst_init() ((void)0) 15.15 15.16 +// Ring defines: 15.17 +#define TX_RING_INC(_i) (((_i)+1) & (TX_RING_SIZE-1)) 15.18 +#define RX_RING_INC(_i) (((_i)+1) & (RX_RING_SIZE-1)) 15.19 +#define TX_RING_ADD(_i,_j) (((_i)+(_j)) & (TX_RING_SIZE-1)) 15.20 +#define RX_RING_ADD(_i,_j) (((_i)+(_j)) & (RX_RING_SIZE-1)) 15.21 + 15.22 struct net_device *the_dev = NULL; 15.23 15.24 /* 15.25 @@ -47,11 +55,11 @@ struct net_device *the_dev = NULL; 15.26 struct softnet_data softnet_data[NR_CPUS] __cacheline_aligned; 15.27 15.28 15.29 -/***************************************************************************************** 15.30 +/********************************************************************************* 15.31 15.32 Device Interface Subroutines 15.33 15.34 -******************************************************************************************/ 15.35 +**********************************************************************************/ 15.36 15.37 /** 15.38 * __dev_get_by_name - find a device by its name 15.39 @@ -661,7 +669,83 @@ static void get_sample_stats(int cpu) 15.40 softnet_data[cpu].avg_blog = avg_blog; 15.41 } 15.42 15.43 +void deliver_packet(struct sk_buff *skb, net_vif_t *vif) 15.44 +{ 15.45 + net_shadow_ring_t *shadow_ring; 15.46 + rx_shadow_entry_t *rx; 15.47 + unsigned long *g_pte, tmp; 15.48 + struct pfn_info *g_pfn, *h_pfn; 15.49 + unsigned int i; //, nvif; 15.50 15.51 + if (skb->skb_type != SKB_ZERO_COPY) 15.52 + return; 15.53 + 15.54 + /* 15.55 + * Write the virtual MAC address into the destination field 15.56 + * of the ethernet packet. Furthermore, do the same for ARP 15.57 + * reply packets. This is easy because the virtual MAC address 15.58 + * is always 00-[nn]-00-00-00-00, where the second sixteen bits 15.59 + * of the MAC are the vif's id. This is to differentiate between 15.60 + * vifs on guests that have more than one. 15.61 + * 15.62 + * In zero copy, the data pointers for the packet have to have been 15.63 + * mapped in by the caller. 15.64 + */ 15.65 + 15.66 + memset(skb->mac.ethernet->h_dest, 0, ETH_ALEN); 15.67 +// *(unsigned int *)(skb->mac.ethernet->h_dest + 1) = nvif; 15.68 + if ( ntohs(skb->mac.ethernet->h_proto) == ETH_P_ARP ) 15.69 + { 15.70 + memset(skb->nh.raw + 18, 0, ETH_ALEN); 15.71 +// *(unsigned int *)(skb->nh.raw + 18 + 1) = nvif; 15.72 + } 15.73 + shadow_ring = vif->shadow_ring; 15.74 + 15.75 + //Advance to next good buffer. 15.76 + for (i = shadow_ring->rx_cons; 15.77 + (i != shadow_ring->rx_prod) 15.78 + && ( shadow_ring->rx_ring[i].status != RING_STATUS_OK ); 15.79 + i = RX_RING_INC(i)); 15.80 + 15.81 + if (( i != shadow_ring->rx_prod ) && 15.82 + ( shadow_ring->rx_ring[i].status == RING_STATUS_OK )) 15.83 + { 15.84 + rx = shadow_ring->rx_ring+i; 15.85 + if ( (skb->len + ETH_HLEN) < rx->size ) 15.86 + rx->size = skb->len + ETH_HLEN; 15.87 + 15.88 + if (rx->flush_count == tlb_flush_count[smp_processor_id()]) 15.89 + flush_tlb_all(); 15.90 + 15.91 + g_pte = map_domain_mem(rx->addr); 15.92 + 15.93 + g_pfn = frame_table + (*g_pte >> PAGE_SHIFT); 15.94 + h_pfn = skb->pf; 15.95 + 15.96 + //flip and/or set relevant pf_info fields. 15.97 + tmp = g_pfn->next; g_pfn->next = h_pfn->next; h_pfn->next = tmp; 15.98 + tmp = g_pfn->prev; g_pfn->prev = h_pfn->prev; h_pfn->prev = tmp; 15.99 + tmp = g_pfn->flags; g_pfn->flags = h_pfn->flags; h_pfn->flags = tmp; 15.100 + h_pfn->tot_count = 1; 15.101 + h_pfn->type_count = g_pfn->type_count; 15.102 + g_pfn->tot_count = g_pfn->type_count = 0; 15.103 + h_pfn->flags = current->domain | PGT_l1_page_table; 15.104 + g_pfn->flags = PGT_l1_page_table; 15.105 + //point guest pte at the new page: 15.106 + *g_pte = (*g_pte & ~PAGE_MASK) 15.107 + | (((h_pfn - frame_table) << PAGE_SHIFT) & PAGE_MASK); 15.108 + *g_pte |= _PAGE_PRESENT; 15.109 + 15.110 + unmap_domain_mem(g_pte); 15.111 + skb->pf = g_pfn; // return the guest pfn to be put on the free list 15.112 + 15.113 + shadow_ring->rx_cons = RX_RING_INC(i); 15.114 + } 15.115 +} 15.116 + 15.117 +/* Deliver skb to an old protocol, which is not threaded well 15.118 + or which do not understand shared skbs. 15.119 + */ 15.120 /** 15.121 * netif_rx - post buffer to the network code 15.122 * @skb: buffer to post 15.123 @@ -686,21 +770,38 @@ int netif_rx(struct sk_buff *skb) 15.124 #ifdef CONFIG_SMP 15.125 unsigned long cpu_mask; 15.126 #endif 15.127 + 15.128 struct task_struct *p; 15.129 int this_cpu = smp_processor_id(); 15.130 struct softnet_data *queue; 15.131 unsigned long flags; 15.132 net_vif_t *vif; 15.133 15.134 + local_irq_save(flags); 15.135 + 15.136 if (skb->stamp.tv_sec == 0) 15.137 get_fast_time(&skb->stamp); 15.138 15.139 + /* Attempt to handle zero-copy packets here: */ 15.140 + if (skb->skb_type == SKB_ZERO_COPY) 15.141 + { 15.142 + skb->head = (u8 *)map_domain_mem(((skb->pf - frame_table) << PAGE_SHIFT)); 15.143 + 15.144 + /* remapping this address really screws up all the skb pointers. We need 15.145 + * to map them all here sufficiently to get the packet demultiplexed. 15.146 + */ 15.147 + 15.148 + skb->data = skb->head; 15.149 + skb_reserve(skb,16); // need to ensure that all the drivers and not just tulip do this. 15.150 + skb->mac.raw = skb->data; 15.151 + skb->data += ETH_HLEN; 15.152 + skb->nh.raw = skb->data; 15.153 + } 15.154 + 15.155 /* The code is rearranged so that the path is the most 15.156 short when CPU is congested, but is still operating. 15.157 */ 15.158 queue = &softnet_data[this_cpu]; 15.159 - 15.160 - local_irq_save(flags); 15.161 15.162 netdev_rx_stat[this_cpu].total++; 15.163 15.164 @@ -733,7 +834,7 @@ int netif_rx(struct sk_buff *skb) 15.165 do { 15.166 if ( p->domain != vif->domain ) continue; 15.167 if ( vif->skb_list.qlen > 100 ) break; 15.168 - skb_queue_tail(&vif->skb_list, skb); 15.169 + deliver_packet(skb, vif); 15.170 cpu_mask = mark_hyp_event(p, _HYP_EVENT_NET_RX); 15.171 read_unlock(&tasklist_lock); 15.172 goto found; 15.173 @@ -745,20 +846,24 @@ int netif_rx(struct sk_buff *skb) 15.174 15.175 drop: 15.176 netdev_rx_stat[this_cpu].dropped++; 15.177 - local_irq_restore(flags); 15.178 - 15.179 + if (skb->skb_type == SKB_ZERO_COPY) 15.180 + unmap_domain_mem(skb->head); 15.181 kfree_skb(skb); 15.182 + local_irq_restore(flags); 15.183 return NET_RX_DROP; 15.184 15.185 found: 15.186 + if (skb->skb_type == SKB_ZERO_COPY) { 15.187 + unmap_domain_mem(skb->head); 15.188 + skb->head = skb->data = skb->tail = (void *)0xdeadbeef; 15.189 + } 15.190 + kfree_skb(skb); 15.191 hyp_event_notify(cpu_mask); 15.192 local_irq_restore(flags); 15.193 return 0; 15.194 } 15.195 15.196 -/* Deliver skb to an old protocol, which is not threaded well 15.197 - or which do not understand shared skbs. 15.198 - */ 15.199 + 15.200 static int deliver_to_old_ones(struct packet_type *pt, struct sk_buff *skb, int last) 15.201 { 15.202 static spinlock_t net_bh_lock = SPIN_LOCK_UNLOCKED; 15.203 @@ -873,15 +978,46 @@ static inline void handle_diverter(struc 15.204 } 15.205 #endif /* CONFIG_NET_DIVERT */ 15.206 15.207 +void update_shared_ring(void) 15.208 +{ 15.209 + rx_shadow_entry_t *rx; 15.210 + shared_info_t *s = current->shared_info; 15.211 + net_ring_t *net_ring; 15.212 + net_shadow_ring_t *shadow_ring; 15.213 + unsigned int nvif; 15.214 15.215 + clear_bit(_HYP_EVENT_NET_RX, ¤t->hyp_events); 15.216 + for (nvif = 0; nvif < current->num_net_vifs; nvif++) 15.217 + { 15.218 + net_ring = current->net_vif_list[nvif]->net_ring; 15.219 + shadow_ring = current->net_vif_list[nvif]->shadow_ring; 15.220 + while ((shadow_ring->rx_idx != shadow_ring->rx_cons) 15.221 + && (net_ring->rx_cons != net_ring->rx_prod)) 15.222 + { 15.223 + rx = shadow_ring->rx_ring+shadow_ring->rx_idx; 15.224 + copy_to_user(net_ring->rx_ring + net_ring->rx_cons, rx, sizeof(rx_entry_t)); 15.225 + 15.226 + shadow_ring->rx_idx = RX_RING_INC(shadow_ring->rx_idx); 15.227 + net_ring->rx_cons = RX_RING_INC(net_ring->rx_cons); 15.228 + 15.229 + if ( net_ring->rx_cons == net_ring->rx_event ) 15.230 + set_bit(_EVENT_NET_RX_FOR_VIF(nvif), &s->events); 15.231 + 15.232 + } 15.233 + } 15.234 +} 15.235 + 15.236 void flush_rx_queue(void) 15.237 { 15.238 struct sk_buff *skb; 15.239 shared_info_t *s = current->shared_info; 15.240 net_ring_t *net_ring; 15.241 + net_shadow_ring_t *shadow_ring; 15.242 unsigned int i, nvif; 15.243 - rx_entry_t rx; 15.244 - 15.245 + rx_shadow_entry_t *rx; 15.246 + unsigned long *g_pte, tmp; 15.247 + struct pfn_info *g_pfn, *h_pfn; 15.248 + 15.249 /* I have changed this to batch flush all vifs for a guest 15.250 * at once, whenever this is called. Since the guest is about to be 15.251 * scheduled and issued an RX interrupt for one nic, it might as well 15.252 @@ -893,15 +1029,17 @@ void flush_rx_queue(void) 15.253 * loop can be replaced with a translation to the specific NET 15.254 * interrupt to serve. --akw 15.255 */ 15.256 - 15.257 clear_bit(_HYP_EVENT_NET_RX, ¤t->hyp_events); 15.258 15.259 for (nvif = 0; nvif < current->num_net_vifs; nvif++) 15.260 { 15.261 net_ring = current->net_vif_list[nvif]->net_ring; 15.262 + shadow_ring = current->net_vif_list[nvif]->shadow_ring; 15.263 while ( (skb = skb_dequeue(¤t->net_vif_list[nvif]->skb_list)) 15.264 != NULL ) 15.265 { 15.266 + //temporary hack to stop processing non-zc skbs. 15.267 + if (skb->skb_type == SKB_NORMAL) continue; 15.268 /* 15.269 * Write the virtual MAC address into the destination field 15.270 * of the ethernet packet. Furthermore, do the same for ARP 15.271 @@ -912,6 +1050,16 @@ void flush_rx_queue(void) 15.272 * second sixteen bits, which are the per-host vif id. 15.273 * (so eth0 should be 00-00-..., eth1 is 00-01-...) 15.274 */ 15.275 + 15.276 + if (skb->skb_type == SKB_ZERO_COPY) 15.277 + { 15.278 + skb->head = (u8 *)map_domain_mem(((skb->pf - frame_table) << PAGE_SHIFT)); 15.279 + skb->data = skb->head; 15.280 + skb_reserve(skb,16); 15.281 + skb->mac.raw = skb->data; 15.282 + skb->data += ETH_HLEN; 15.283 + } 15.284 + 15.285 memset(skb->mac.ethernet->h_dest, 0, ETH_ALEN); 15.286 *(unsigned int *)(skb->mac.ethernet->h_dest + 1) = nvif; 15.287 if ( ntohs(skb->mac.ethernet->h_proto) == ETH_P_ARP ) 15.288 @@ -920,15 +1068,84 @@ void flush_rx_queue(void) 15.289 *(unsigned int *)(skb->nh.raw + 18 + 1) = nvif; 15.290 } 15.291 15.292 + if (skb->skb_type == SKB_ZERO_COPY) 15.293 + { 15.294 + unmap_domain_mem(skb->head); 15.295 + } 15.296 + 15.297 i = net_ring->rx_cons; 15.298 if ( i != net_ring->rx_prod ) 15.299 { 15.300 - if ( !copy_from_user(&rx, net_ring->rx_ring+i, sizeof(rx)) ) 15.301 + net_ring->rx_ring[i].status = shadow_ring->rx_ring[i].status; 15.302 + if ( shadow_ring->rx_ring[i].status == RING_STATUS_OK) 15.303 { 15.304 - if ( (skb->len + ETH_HLEN) < rx.size ) 15.305 - rx.size = skb->len + ETH_HLEN; 15.306 - copy_to_user((void *)rx.addr, skb->mac.raw, rx.size); 15.307 - copy_to_user(net_ring->rx_ring+i, &rx, sizeof(rx)); 15.308 + rx = shadow_ring->rx_ring+i; 15.309 + if ( (skb->len + ETH_HLEN) < rx->size ) 15.310 + rx->size = skb->len + ETH_HLEN; 15.311 + 15.312 + /* remap the packet again. This is very temporary and will shortly be 15.313 + * replaced with a page swizzle. 15.314 + */ 15.315 + 15.316 + /*if (skb->skb_type == SKB_ZERO_COPY) 15.317 + { 15.318 + skb->head = (u8 *)map_domain_mem(((skb->pf - frame_table) << PAGE_SHIFT)); 15.319 + skb->data = skb->head; 15.320 + skb_reserve(skb,16); 15.321 + skb->mac.raw = skb->data; 15.322 + skb->data += ETH_HLEN; 15.323 + } 15.324 + 15.325 + copy_to_user((void *)rx->addr, skb->mac.raw, rx->size); 15.326 + copy_to_user(net_ring->rx_ring+i, rx, sizeof(rx)); 15.327 + 15.328 + if (skb->skb_type == SKB_ZERO_COPY) 15.329 + { 15.330 + unmap_domain_mem(skb->head); 15.331 + skb->head = skb->data = skb->tail = (void *)0xdeadbeef; 15.332 + }*/ 15.333 + 15.334 + //presumably I don't need to rewalk the guest page table 15.335 + //here. 15.336 + if (skb->skb_type == SKB_ZERO_COPY) 15.337 + { 15.338 + // g_pfn is the frame FROM the guest being given up 15.339 + // h_pfn is the frame FROM the hypervisor, passing up. 15.340 + 15.341 + if (rx->flush_count == tlb_flush_count[smp_processor_id()]) 15.342 + { 15.343 + flush_tlb_all(); 15.344 + } 15.345 + 15.346 + g_pte = map_domain_mem(rx->addr); 15.347 + 15.348 + //g_pfn = frame_table + (rx->addr >> PAGE_SHIFT); 15.349 + g_pfn = frame_table + (*g_pte >> PAGE_SHIFT); 15.350 + h_pfn = skb->pf; 15.351 + 15.352 + 15.353 + tmp = g_pfn->next; g_pfn->next = h_pfn->next; h_pfn->next = tmp; 15.354 + tmp = g_pfn->prev; g_pfn->prev = h_pfn->prev; h_pfn->prev = tmp; 15.355 + tmp = g_pfn->flags; g_pfn->flags = h_pfn->flags; h_pfn->flags = tmp; 15.356 + 15.357 + h_pfn->tot_count = 1; 15.358 + h_pfn->type_count = g_pfn->type_count; 15.359 + g_pfn->tot_count = g_pfn->type_count = 0; 15.360 + 15.361 + h_pfn->flags = current->domain | PGT_l1_page_table; 15.362 + g_pfn->flags = PGT_l1_page_table; 15.363 + 15.364 + 15.365 + *g_pte = (*g_pte & ~PAGE_MASK) | (((h_pfn - frame_table) << PAGE_SHIFT) & PAGE_MASK); 15.366 + 15.367 + *g_pte |= _PAGE_PRESENT; 15.368 + unmap_domain_mem(g_pte); 15.369 + 15.370 + skb->pf = g_pfn; // return the guest pfn to be put on the free list 15.371 + } else { 15.372 + BUG(); //got a non-zero copy skb. which is not good. 15.373 + } 15.374 + 15.375 } 15.376 net_ring->rx_cons = (i+1) & (RX_RING_SIZE-1); 15.377 if ( net_ring->rx_cons == net_ring->rx_event ) 15.378 @@ -1916,23 +2133,30 @@ int __init net_dev_init(void) 15.379 * Called from guest OS to notify updates to its transmit and/or receive 15.380 * descriptor rings. 15.381 */ 15.382 -#define TX_RING_INC(_i) (((_i)+1) & (TX_RING_SIZE-1)) 15.383 -#define RX_RING_INC(_i) (((_i)+1) & (RX_RING_SIZE-1)) 15.384 -#define TX_RING_ADD(_i,_j) (((_i)+(_j)) & (TX_RING_SIZE-1)) 15.385 -#define RX_RING_ADD(_i,_j) (((_i)+(_j)) & (RX_RING_SIZE-1)) 15.386 + 15.387 long do_net_update(void) 15.388 { 15.389 shared_info_t *shared = current->shared_info; 15.390 - net_ring_t *net_ring = current->net_ring_base; 15.391 + net_ring_t *net_ring; 15.392 + net_shadow_ring_t *shadow_ring; 15.393 net_vif_t *current_vif; 15.394 unsigned int i, j; 15.395 struct sk_buff *skb; 15.396 tx_entry_t tx; 15.397 - 15.398 + rx_shadow_entry_t *rx; 15.399 + unsigned long pfn; 15.400 + struct pfn_info *page; 15.401 + unsigned long *g_pte; 15.402 + 15.403 + 15.404 for ( j = 0; j < current->num_net_vifs; j++) 15.405 { 15.406 current_vif = current->net_vif_list[j]; 15.407 net_ring = current_vif->net_ring; 15.408 + 15.409 + /* First, we send out pending TX descriptors if they exist on this ring. 15.410 + */ 15.411 + 15.412 for ( i = net_ring->tx_cons; i != net_ring->tx_prod; i = TX_RING_INC(i) ) 15.413 { 15.414 if ( copy_from_user(&tx, net_ring->tx_ring+i, sizeof(tx)) ) 15.415 @@ -1982,6 +2206,7 @@ long do_net_update(void) 15.416 net_get_target_vif(skb); 15.417 if ( skb->dst_vif > VIF_PHYSICAL_INTERFACE ) 15.418 { 15.419 +printk("LOCAL DELIVERY!\n"); 15.420 (void)netif_rx(skb); 15.421 } 15.422 else if ( skb->dst_vif == VIF_PHYSICAL_INTERFACE ) 15.423 @@ -1997,8 +2222,50 @@ long do_net_update(void) 15.424 } 15.425 } 15.426 net_ring->tx_cons = i; 15.427 + 15.428 + /* Next, pull any new RX descriptors across to the shadow ring. 15.429 + */ 15.430 + 15.431 + shadow_ring = current_vif->shadow_ring; 15.432 + 15.433 + for (i = shadow_ring->rx_prod; i != net_ring->rx_prod; i = RX_RING_INC(i)) 15.434 + { 15.435 + /* This copy assumes that rx_shadow_entry_t is an extension of 15.436 + * rx_net_entry_t extra fields must be tacked on to the end. 15.437 + */ 15.438 + if ( copy_from_user( shadow_ring->rx_ring+i, net_ring->rx_ring+i, 15.439 + sizeof (rx_entry_t) ) ) 15.440 + { 15.441 + shadow_ring->rx_ring[i].status = RING_STATUS_ERR_CFU; 15.442 + continue; 15.443 + } else { 15.444 + 15.445 + rx = shadow_ring->rx_ring + i; 15.446 + pfn = rx->addr >> PAGE_SHIFT; 15.447 + page = frame_table + pfn; 15.448 + 15.449 + shadow_ring->rx_ring[i].status = RING_STATUS_OK; 15.450 + 15.451 + if (!(page->flags & PGT_l1_page_table) 15.452 + || !((page->flags & PG_domain_mask) == current->domain)) 15.453 + shadow_ring->rx_ring[i].status = RING_STATUS_BAD_PAGE; 15.454 + 15.455 + 15.456 + g_pte = map_domain_mem(rx->addr); 15.457 + 15.458 + if (!(*g_pte & _PAGE_PRESENT)) 15.459 + shadow_ring->rx_ring[i].status = RING_STATUS_BAD_PAGE; 15.460 + page = (*g_pte >> PAGE_SHIFT) + frame_table; 15.461 + if (page->tot_count != 1) 15.462 + shadow_ring->rx_ring[i].status = RING_STATUS_BAD_PAGE; 15.463 + 15.464 + *g_pte &= ~_PAGE_PRESENT; 15.465 + rx->flush_count = tlb_flush_count[smp_processor_id()]; 15.466 + unmap_domain_mem(g_pte); 15.467 + } 15.468 + } 15.469 + shadow_ring->rx_prod = net_ring->rx_prod; 15.470 } 15.471 - 15.472 return 0; 15.473 } 15.474
16.1 --- a/xen-2.4.16/net/eth.c Tue Feb 04 22:08:19 2003 +0000 16.2 +++ b/xen-2.4.16/net/eth.c Wed Feb 05 13:56:14 2003 +0000 16.3 @@ -161,52 +161,62 @@ unsigned short eth_type_trans(struct sk_ 16.4 struct ethhdr *eth; 16.5 unsigned char *rawp; 16.6 16.7 - skb->mac.raw=skb->data; 16.8 - skb_pull(skb,dev->hard_header_len); 16.9 - eth= skb->mac.ethernet; 16.10 + if (skb->skb_type == SKB_ZERO_COPY) 16.11 + { 16.12 + skb_pull(skb,dev->hard_header_len); 16.13 + skb->mac.raw= (void *)0xdeadbeef; 16.14 + return htons(ETH_P_802_2); 16.15 + 16.16 + } else { // SKB_NORMAL 16.17 + 16.18 + skb->mac.raw=skb->data; 16.19 + skb_pull(skb,dev->hard_header_len); 16.20 + eth= skb->mac.ethernet; 16.21 16.22 - if(*eth->h_dest&1) 16.23 - { 16.24 - if(memcmp(eth->h_dest,dev->broadcast, ETH_ALEN)==0) 16.25 + if(*eth->h_dest&1) 16.26 + { 16.27 + if(memcmp(eth->h_dest,dev->broadcast, ETH_ALEN)==0) 16.28 skb->pkt_type=PACKET_BROADCAST; 16.29 else 16.30 skb->pkt_type=PACKET_MULTICAST; 16.31 - } 16.32 + } 16.33 16.34 - /* 16.35 - * This ALLMULTI check should be redundant by 1.4 16.36 - * so don't forget to remove it. 16.37 - * 16.38 - * Seems, you forgot to remove it. All silly devices 16.39 - * seems to set IFF_PROMISC. 16.40 - */ 16.41 + /* 16.42 + * This ALLMULTI check should be redundant by 1.4 16.43 + * so don't forget to remove it. 16.44 + * 16.45 + * Seems, you forgot to remove it. All silly devices 16.46 + * seems to set IFF_PROMISC. 16.47 + */ 16.48 16.49 - else if(1 /*dev->flags&IFF_PROMISC*/) 16.50 - { 16.51 + else if(1 /*dev->flags&IFF_PROMISC*/) 16.52 + { 16.53 if(memcmp(eth->h_dest,dev->dev_addr, ETH_ALEN)) 16.54 skb->pkt_type=PACKET_OTHERHOST; 16.55 - } 16.56 + } 16.57 16.58 - if (ntohs(eth->h_proto) >= 1536) 16.59 + if (ntohs(eth->h_proto) >= 1536) 16.60 return eth->h_proto; 16.61 16.62 - rawp = skb->data; 16.63 + rawp = skb->data; 16.64 16.65 - /* 16.66 - * This is a magic hack to spot IPX packets. Older Novell breaks 16.67 - * the protocol design and runs IPX over 802.3 without an 802.2 LLC 16.68 - * layer. We look for FFFF which isn't a used 802.2 SSAP/DSAP. This 16.69 - * won't work for fault tolerant netware but does for the rest. 16.70 - */ 16.71 - if (*(unsigned short *)rawp == 0xFFFF) 16.72 + /* 16.73 + * This is a magic hack to spot IPX packets. Older Novell breaks 16.74 + * the protocol design and runs IPX over 802.3 without an 802.2 LLC 16.75 + * layer. We look for FFFF which isn't a used 802.2 SSAP/DSAP. This 16.76 + * won't work for fault tolerant netware but does for the rest. 16.77 + */ 16.78 + if (*(unsigned short *)rawp == 0xFFFF) 16.79 return htons(ETH_P_802_3); 16.80 16.81 - /* 16.82 - * Real 802.2 LLC 16.83 - */ 16.84 - return htons(ETH_P_802_2); 16.85 + /* 16.86 + * Real 802.2 LLC 16.87 + */ 16.88 + return htons(ETH_P_802_2); 16.89 + } 16.90 } 16.91 16.92 + 16.93 int eth_header_parse(struct sk_buff *skb, unsigned char *haddr) 16.94 { 16.95 struct ethhdr *eth = skb->mac.ethernet;
17.1 --- a/xen-2.4.16/net/skbuff.c Tue Feb 04 22:08:19 2003 +0000 17.2 +++ b/xen-2.4.16/net/skbuff.c Wed Feb 05 13:56:14 2003 +0000 17.3 @@ -149,6 +149,104 @@ static __inline__ void skb_head_to_pool( 17.4 kmem_cache_free(skbuff_head_cache, skb); 17.5 } 17.6 17.7 +static inline u8 *alloc_skb_data_page(struct sk_buff *skb) 17.8 +{ 17.9 + struct list_head *list_ptr; 17.10 + struct pfn_info *pf; 17.11 + unsigned long flags; 17.12 + 17.13 + spin_lock_irqsave(&free_list_lock, flags); 17.14 + 17.15 + if (!free_pfns) return NULL; 17.16 + 17.17 + list_ptr = free_list.next; 17.18 + pf = list_entry(list_ptr, struct pfn_info, list); 17.19 + pf->flags = 0; // owned by dom0 17.20 + list_del(&pf->list); 17.21 + pf->next = pf->prev = (pf - frame_table); 17.22 + free_pfns--; 17.23 + 17.24 + spin_unlock_irqrestore(&free_list_lock, flags); 17.25 + 17.26 + skb->pf = pf; 17.27 + return (u8 *)((pf - frame_table) << PAGE_SHIFT); 17.28 +} 17.29 + 17.30 +static inline void dealloc_skb_data_page(struct sk_buff *skb) 17.31 +{ 17.32 + struct pfn_info *pf; 17.33 + unsigned long flags; 17.34 + 17.35 + pf = skb->pf; 17.36 + 17.37 + spin_lock_irqsave(&free_list_lock, flags); 17.38 + 17.39 + list_add(&pf->list, &free_list); 17.40 + free_pfns++; 17.41 + 17.42 + spin_unlock_irqrestore(&free_list_lock, flags); 17.43 +} 17.44 + 17.45 +struct sk_buff *alloc_zc_skb(unsigned int size,int gfp_mask) 17.46 +{ 17.47 + struct sk_buff *skb; 17.48 + u8 *data; 17.49 + 17.50 + if (in_interrupt() && (gfp_mask & __GFP_WAIT)) { 17.51 + static int count = 0; 17.52 + if (++count < 5) { 17.53 + printk(KERN_ERR "alloc_skb called nonatomically " 17.54 + "from interrupt %p\n", NET_CALLER(size)); 17.55 + BUG(); 17.56 + } 17.57 + gfp_mask &= ~__GFP_WAIT; 17.58 + } 17.59 + 17.60 + /* Get the HEAD */ 17.61 + skb = skb_head_from_pool(); 17.62 + if (skb == NULL) { 17.63 + skb = kmem_cache_alloc(skbuff_head_cache, gfp_mask & ~__GFP_DMA); 17.64 + if (skb == NULL) 17.65 + goto nohead; 17.66 + } 17.67 + 17.68 + /* Get the DATA. Size must match skb_add_mtu(). */ 17.69 + size = SKB_DATA_ALIGN(size); 17.70 + data = alloc_skb_data_page(skb); 17.71 + 17.72 + if (data == NULL) 17.73 + goto nodata; 17.74 + 17.75 + /* XXX: does not include slab overhead */ 17.76 + skb->truesize = size + sizeof(struct sk_buff); 17.77 + 17.78 + /* Load the data pointers. */ 17.79 + skb->head = data; 17.80 + skb->data = data; 17.81 + skb->tail = data; 17.82 + skb->end = data + size; 17.83 + 17.84 + /* Set up other state */ 17.85 + skb->len = 0; 17.86 + skb->cloned = 0; 17.87 + skb->data_len = 0; 17.88 + skb->src_vif = VIF_UNKNOWN_INTERFACE; 17.89 + skb->dst_vif = VIF_UNKNOWN_INTERFACE; 17.90 + skb->skb_type = SKB_ZERO_COPY; 17.91 + 17.92 + atomic_set(&skb->users, 1); 17.93 + atomic_set(&(skb_shinfo(skb)->dataref), 1); 17.94 + skb_shinfo(skb)->nr_frags = 0; 17.95 + skb_shinfo(skb)->frag_list = NULL; 17.96 + 17.97 + return skb; 17.98 + 17.99 +nodata: 17.100 + skb_head_to_pool(skb); 17.101 +nohead: 17.102 + return NULL; 17.103 +} 17.104 + 17.105 17.106 /* Allocate a new skbuff. We do this ourselves so we can fill in a few 17.107 * 'private' fields and also do memory statistics to find all the 17.108 @@ -213,6 +311,7 @@ struct sk_buff *alloc_skb(unsigned int s 17.109 skb->data_len = 0; 17.110 skb->src_vif = VIF_UNKNOWN_INTERFACE; 17.111 skb->dst_vif = VIF_UNKNOWN_INTERFACE; 17.112 + skb->skb_type = SKB_NORMAL; 17.113 17.114 atomic_set(&skb->users, 1); 17.115 atomic_set(&(skb_shinfo(skb)->dataref), 1); 17.116 @@ -284,6 +383,7 @@ static void skb_clone_fraglist(struct sk 17.117 17.118 static void skb_release_data(struct sk_buff *skb) 17.119 { 17.120 + 17.121 if (!skb->cloned || 17.122 atomic_dec_and_test(&(skb_shinfo(skb)->dataref))) { 17.123 if (skb_shinfo(skb)->nr_frags) { 17.124 @@ -295,7 +395,12 @@ static void skb_release_data(struct sk_b 17.125 if (skb_shinfo(skb)->frag_list) 17.126 skb_drop_fraglist(skb); 17.127 17.128 - kfree(skb->head); 17.129 + if (skb->skb_type == SKB_NORMAL) { 17.130 + kfree(skb->head); 17.131 + } else if (skb->skb_type == SKB_ZERO_COPY) { dealloc_skb_data_page(skb); 17.132 + } else { 17.133 + BUG(); //skb_release_data called with unknown skb type! 17.134 + } 17.135 } 17.136 } 17.137 17.138 @@ -333,6 +438,7 @@ void __kfree_skb(struct sk_buff *skb) 17.139 } 17.140 skb->destructor(skb); 17.141 } 17.142 + 17.143 #ifdef CONFIG_NETFILTER 17.144 nf_conntrack_put(skb->nfct); 17.145 #endif
18.1 --- a/xenolinux-2.4.16-sparse/arch/xeno/drivers/network/network.c Tue Feb 04 22:08:19 2003 +0000 18.2 +++ b/xenolinux-2.4.16-sparse/arch/xeno/drivers/network/network.c Wed Feb 05 13:56:14 2003 +0000 18.3 @@ -192,9 +192,9 @@ static void network_alloc_rx_buffers(str 18.4 skb = dev_alloc_skb(RX_BUF_SIZE); 18.5 if ( skb == NULL ) break; 18.6 skb->dev = dev; 18.7 - skb_reserve(skb, 2); /* word align the IP header */ 18.8 + //skb_reserve(skb, 2); /* word align the IP header */ 18.9 np->rx_skb_ring[i] = skb; 18.10 - np->net_ring->rx_ring[i].addr = (unsigned long)skb->data; 18.11 + np->net_ring->rx_ring[i].addr = (unsigned long)skb->net_page->ppte; //data; 18.12 np->net_ring->rx_ring[i].size = RX_BUF_SIZE - 16; /* arbitrary */ 18.13 } 18.14 18.15 @@ -276,10 +276,18 @@ static void network_rx_int(int irq, void 18.16 again: 18.17 for ( i = np->rx_idx; i != np->net_ring->rx_cons; i = RX_RING_INC(i) ) 18.18 { 18.19 + if (np->net_ring->rx_ring[i].status != RING_STATUS_OK) 18.20 + { 18.21 + printk("bad buffer on RX ring!(%d)\n", 18.22 + np->net_ring->rx_ring[i].status); 18.23 + continue; 18.24 + } 18.25 skb = np->rx_skb_ring[i]; 18.26 + 18.27 skb_put(skb, np->net_ring->rx_ring[i].size); 18.28 skb->protocol = eth_type_trans(skb, dev); 18.29 np->stats.rx_packets++; 18.30 + 18.31 np->stats.rx_bytes += np->net_ring->rx_ring[i].size; 18.32 netif_rx(skb); 18.33 dev->last_rx = jiffies;
19.1 --- a/xenolinux-2.4.16-sparse/include/asm-xeno/io.h Tue Feb 04 22:08:19 2003 +0000 19.2 +++ b/xenolinux-2.4.16-sparse/include/asm-xeno/io.h Wed Feb 05 13:56:14 2003 +0000 19.3 @@ -2,7 +2,7 @@ 19.4 #define _ASM_IO_H 19.5 19.6 #include <linux/config.h> 19.7 - 19.8 +#include <asm/hypervisor.h> 19.9 /* 19.10 * This file contains the definitions for the x86 IO instructions 19.11 * inb/inw/inl/outb/outw/outl and the "string versions" of the same 19.12 @@ -74,6 +74,22 @@ static inline void * phys_to_virt(unsign 19.13 } 19.14 19.15 /* 19.16 + * Change virtual addresses to machine addresses and vv. 19.17 + * These are equally trivial. 19.18 + */ 19.19 + 19.20 +static inline unsigned long virt_to_mach(volatile void * address) 19.21 +{ 19.22 + return __pa(address) + (unsigned long) start_info.phys_base; 19.23 +} 19.24 + 19.25 +static inline void *mach_to_virt(unsigned long address) 19.26 +{ 19.27 + return __va(address) - (unsigned long) start_info.phys_base; 19.28 +} 19.29 + 19.30 + 19.31 +/* 19.32 * Change "struct page" to physical address. 19.33 */ 19.34 #define page_to_phys(page) ((page - mem_map) << PAGE_SHIFT)
20.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 20.2 +++ b/xenolinux-2.4.16-sparse/include/linux/skbuff.h Wed Feb 05 13:56:14 2003 +0000 20.3 @@ -0,0 +1,1185 @@ 20.4 +/* 20.5 + * Definitions for the 'struct sk_buff' memory handlers. 20.6 + * 20.7 + * Authors: 20.8 + * Alan Cox, <gw4pts@gw4pts.ampr.org> 20.9 + * Florian La Roche, <rzsfl@rz.uni-sb.de> 20.10 + * 20.11 + * This program is free software; you can redistribute it and/or 20.12 + * modify it under the terms of the GNU General Public License 20.13 + * as published by the Free Software Foundation; either version 20.14 + * 2 of the License, or (at your option) any later version. 20.15 + */ 20.16 + 20.17 +#ifndef _LINUX_SKBUFF_H 20.18 +#define _LINUX_SKBUFF_H 20.19 + 20.20 +#include <linux/config.h> 20.21 +#include <linux/kernel.h> 20.22 +#include <linux/sched.h> 20.23 +#include <linux/time.h> 20.24 +#include <linux/cache.h> 20.25 + 20.26 +#include <asm/atomic.h> 20.27 +#include <asm/types.h> 20.28 +#include <linux/spinlock.h> 20.29 +#include <linux/mm.h> 20.30 +#include <linux/highmem.h> 20.31 + 20.32 +/* Zero Copy additions: 20.33 + * 20.34 + * (1) there are now two types of skb, as indicated by the skb_type field. 20.35 + * this is because, at least for the time being, there are two seperate types 20.36 + * of memory that may be allocated to skb->data. 20.37 + * 20.38 + * (2) until discontiguous memory is fully supported, there will be a free list of pages 20.39 + * to be used by the net RX code. This list will be allocated in the driver init code 20.40 + * but is declared here because the socket free code needs to return pages to it. 20.41 + */ 20.42 + 20.43 +// for skb->skb_type: 20.44 + 20.45 +#define SKB_NORMAL 0 20.46 +#define SKB_ZERO_COPY 1 20.47 + 20.48 +#define NUM_NET_PAGES 9 // about 1Meg of buffers. (2^9) 20.49 +struct net_page_info { 20.50 + struct list_head list; 20.51 + unsigned long virt_addr; 20.52 + unsigned long ppte; 20.53 +}; 20.54 + 20.55 +extern char *net_page_chunk; 20.56 +extern struct net_page_info *net_page_table; 20.57 +extern struct list_head net_page_list; 20.58 +extern spinlock_t net_page_list_lock; 20.59 +extern unsigned int net_pages; 20.60 + 20.61 +/* End zero copy additions */ 20.62 + 20.63 +#define HAVE_ALLOC_SKB /* For the drivers to know */ 20.64 +#define HAVE_ALIGNABLE_SKB /* Ditto 8) */ 20.65 +#define SLAB_SKB /* Slabified skbuffs */ 20.66 + 20.67 +#define CHECKSUM_NONE 0 20.68 +#define CHECKSUM_HW 1 20.69 +#define CHECKSUM_UNNECESSARY 2 20.70 + 20.71 +#define SKB_DATA_ALIGN(X) (((X) + (SMP_CACHE_BYTES-1)) & ~(SMP_CACHE_BYTES-1)) 20.72 +#define SKB_MAX_ORDER(X,ORDER) (((PAGE_SIZE<<(ORDER)) - (X) - sizeof(struct skb_shared_info))&~(SMP_CACHE_BYTES-1)) 20.73 +#define SKB_MAX_HEAD(X) (SKB_MAX_ORDER((X),0)) 20.74 +#define SKB_MAX_ALLOC (SKB_MAX_ORDER(0,2)) 20.75 + 20.76 +/* A. Checksumming of received packets by device. 20.77 + * 20.78 + * NONE: device failed to checksum this packet. 20.79 + * skb->csum is undefined. 20.80 + * 20.81 + * UNNECESSARY: device parsed packet and wouldbe verified checksum. 20.82 + * skb->csum is undefined. 20.83 + * It is bad option, but, unfortunately, many of vendors do this. 20.84 + * Apparently with secret goal to sell you new device, when you 20.85 + * will add new protocol to your host. F.e. IPv6. 8) 20.86 + * 20.87 + * HW: the most generic way. Device supplied checksum of _all_ 20.88 + * the packet as seen by netif_rx in skb->csum. 20.89 + * NOTE: Even if device supports only some protocols, but 20.90 + * is able to produce some skb->csum, it MUST use HW, 20.91 + * not UNNECESSARY. 20.92 + * 20.93 + * B. Checksumming on output. 20.94 + * 20.95 + * NONE: skb is checksummed by protocol or csum is not required. 20.96 + * 20.97 + * HW: device is required to csum packet as seen by hard_start_xmit 20.98 + * from skb->h.raw to the end and to record the checksum 20.99 + * at skb->h.raw+skb->csum. 20.100 + * 20.101 + * Device must show its capabilities in dev->features, set 20.102 + * at device setup time. 20.103 + * NETIF_F_HW_CSUM - it is clever device, it is able to checksum 20.104 + * everything. 20.105 + * NETIF_F_NO_CSUM - loopback or reliable single hop media. 20.106 + * NETIF_F_IP_CSUM - device is dumb. It is able to csum only 20.107 + * TCP/UDP over IPv4. Sigh. Vendors like this 20.108 + * way by an unknown reason. Though, see comment above 20.109 + * about CHECKSUM_UNNECESSARY. 8) 20.110 + * 20.111 + * Any questions? No questions, good. --ANK 20.112 + */ 20.113 + 20.114 +#ifdef __i386__ 20.115 +#define NET_CALLER(arg) (*(((void**)&arg)-1)) 20.116 +#else 20.117 +#define NET_CALLER(arg) __builtin_return_address(0) 20.118 +#endif 20.119 + 20.120 +#ifdef CONFIG_NETFILTER 20.121 +struct nf_conntrack { 20.122 + atomic_t use; 20.123 + void (*destroy)(struct nf_conntrack *); 20.124 +}; 20.125 + 20.126 +struct nf_ct_info { 20.127 + struct nf_conntrack *master; 20.128 +}; 20.129 +#endif 20.130 + 20.131 +struct sk_buff_head { 20.132 + /* These two members must be first. */ 20.133 + struct sk_buff * next; 20.134 + struct sk_buff * prev; 20.135 + 20.136 + __u32 qlen; 20.137 + spinlock_t lock; 20.138 +}; 20.139 + 20.140 +struct sk_buff; 20.141 + 20.142 +#define MAX_SKB_FRAGS 6 20.143 + 20.144 +typedef struct skb_frag_struct skb_frag_t; 20.145 + 20.146 +struct skb_frag_struct 20.147 +{ 20.148 + struct page *page; 20.149 + __u16 page_offset; 20.150 + __u16 size; 20.151 +}; 20.152 + 20.153 +/* This data is invariant across clones and lives at 20.154 + * the end of the header data, ie. at skb->end. 20.155 + */ 20.156 +struct skb_shared_info { 20.157 + atomic_t dataref; 20.158 + unsigned int nr_frags; 20.159 + struct sk_buff *frag_list; 20.160 + skb_frag_t frags[MAX_SKB_FRAGS]; 20.161 +}; 20.162 + 20.163 +struct sk_buff { 20.164 + /* These two members must be first. */ 20.165 + struct sk_buff * next; /* Next buffer in list */ 20.166 + struct sk_buff * prev; /* Previous buffer in list */ 20.167 + 20.168 + struct sk_buff_head * list; /* List we are on */ 20.169 + struct sock *sk; /* Socket we are owned by */ 20.170 + struct timeval stamp; /* Time we arrived */ 20.171 + struct net_device *dev; /* Device we arrived on/are leaving by */ 20.172 + 20.173 + /* Transport layer header */ 20.174 + union 20.175 + { 20.176 + struct tcphdr *th; 20.177 + struct udphdr *uh; 20.178 + struct icmphdr *icmph; 20.179 + struct igmphdr *igmph; 20.180 + struct iphdr *ipiph; 20.181 + struct spxhdr *spxh; 20.182 + unsigned char *raw; 20.183 + } h; 20.184 + 20.185 + /* Network layer header */ 20.186 + union 20.187 + { 20.188 + struct iphdr *iph; 20.189 + struct ipv6hdr *ipv6h; 20.190 + struct arphdr *arph; 20.191 + struct ipxhdr *ipxh; 20.192 + unsigned char *raw; 20.193 + } nh; 20.194 + 20.195 + /* Link layer header */ 20.196 + union 20.197 + { 20.198 + struct ethhdr *ethernet; 20.199 + unsigned char *raw; 20.200 + } mac; 20.201 + 20.202 + struct dst_entry *dst; 20.203 + 20.204 + /* 20.205 + * This is the control buffer. It is free to use for every 20.206 + * layer. Please put your private variables there. If you 20.207 + * want to keep them across layers you have to do a skb_clone() 20.208 + * first. This is owned by whoever has the skb queued ATM. 20.209 + */ 20.210 + char cb[48]; 20.211 + 20.212 + unsigned int len; /* Length of actual data */ 20.213 + unsigned int data_len; 20.214 + unsigned int csum; /* Checksum */ 20.215 + unsigned char __unused, /* Dead field, may be reused */ 20.216 + cloned, /* head may be cloned (check refcnt to be sure). */ 20.217 + pkt_type, /* Packet class */ 20.218 + ip_summed; /* Driver fed us an IP checksum */ 20.219 + __u32 priority; /* Packet queueing priority */ 20.220 + atomic_t users; /* User count - see datagram.c,tcp.c */ 20.221 + unsigned short protocol; /* Packet protocol from driver. */ 20.222 + unsigned short security; /* Security level of packet */ 20.223 + unsigned int truesize; /* Buffer size */ 20.224 + 20.225 + unsigned char *head; /* Head of buffer */ 20.226 + unsigned char *data; /* Data head pointer */ 20.227 + unsigned char *tail; /* Tail pointer */ 20.228 + unsigned char *end; /* End pointer */ 20.229 + 20.230 + void (*destructor)(struct sk_buff *); /* Destruct function */ 20.231 +#ifdef CONFIG_NETFILTER 20.232 + /* Can be used for communication between hooks. */ 20.233 + unsigned long nfmark; 20.234 + /* Cache info */ 20.235 + __u32 nfcache; 20.236 + /* Associated connection, if any */ 20.237 + struct nf_ct_info *nfct; 20.238 +#ifdef CONFIG_NETFILTER_DEBUG 20.239 + unsigned int nf_debug; 20.240 +#endif 20.241 +#endif /*CONFIG_NETFILTER*/ 20.242 + 20.243 +#if defined(CONFIG_HIPPI) 20.244 + union{ 20.245 + __u32 ifield; 20.246 + } private; 20.247 +#endif 20.248 + 20.249 +#ifdef CONFIG_NET_SCHED 20.250 + __u32 tc_index; /* traffic control index */ 20.251 +#endif 20.252 + unsigned int skb_type; /* for zero copy handling. */ 20.253 + struct net_page_info *net_page; 20.254 +}; 20.255 + 20.256 +#define SK_WMEM_MAX 65535 20.257 +#define SK_RMEM_MAX 65535 20.258 + 20.259 +#ifdef __KERNEL__ 20.260 +/* 20.261 + * Handling routines are only of interest to the kernel 20.262 + */ 20.263 +#include <linux/slab.h> 20.264 + 20.265 +#include <asm/system.h> 20.266 + 20.267 +extern void __kfree_skb(struct sk_buff *skb); 20.268 +extern struct sk_buff * alloc_skb(unsigned int size, int priority); 20.269 +extern struct sk_buff * alloc_zc_skb(unsigned int size, int priority); 20.270 +extern void kfree_skbmem(struct sk_buff *skb); 20.271 +extern struct sk_buff * skb_clone(struct sk_buff *skb, int priority); 20.272 +extern struct sk_buff * skb_copy(const struct sk_buff *skb, int priority); 20.273 +extern struct sk_buff * pskb_copy(struct sk_buff *skb, int gfp_mask); 20.274 +extern int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail, int gfp_mask); 20.275 +extern struct sk_buff * skb_realloc_headroom(struct sk_buff *skb, unsigned int headroom); 20.276 +extern struct sk_buff * skb_copy_expand(const struct sk_buff *skb, 20.277 + int newheadroom, 20.278 + int newtailroom, 20.279 + int priority); 20.280 +#define dev_kfree_skb(a) kfree_skb(a) 20.281 +extern void skb_over_panic(struct sk_buff *skb, int len, void *here); 20.282 +extern void skb_under_panic(struct sk_buff *skb, int len, void *here); 20.283 + 20.284 +/* Internal */ 20.285 +#define skb_shinfo(SKB) ((struct skb_shared_info *)((SKB)->end)) 20.286 + 20.287 +/** 20.288 + * skb_queue_empty - check if a queue is empty 20.289 + * @list: queue head 20.290 + * 20.291 + * Returns true if the queue is empty, false otherwise. 20.292 + */ 20.293 + 20.294 +static inline int skb_queue_empty(struct sk_buff_head *list) 20.295 +{ 20.296 + return (list->next == (struct sk_buff *) list); 20.297 +} 20.298 + 20.299 +/** 20.300 + * skb_get - reference buffer 20.301 + * @skb: buffer to reference 20.302 + * 20.303 + * Makes another reference to a socket buffer and returns a pointer 20.304 + * to the buffer. 20.305 + */ 20.306 + 20.307 +static inline struct sk_buff *skb_get(struct sk_buff *skb) 20.308 +{ 20.309 + atomic_inc(&skb->users); 20.310 + return skb; 20.311 +} 20.312 + 20.313 +/* 20.314 + * If users==1, we are the only owner and are can avoid redundant 20.315 + * atomic change. 20.316 + */ 20.317 + 20.318 +/** 20.319 + * kfree_skb - free an sk_buff 20.320 + * @skb: buffer to free 20.321 + * 20.322 + * Drop a reference to the buffer and free it if the usage count has 20.323 + * hit zero. 20.324 + */ 20.325 + 20.326 +static inline void kfree_skb(struct sk_buff *skb) 20.327 +{ 20.328 + if (atomic_read(&skb->users) == 1 || atomic_dec_and_test(&skb->users)) 20.329 + __kfree_skb(skb); 20.330 +} 20.331 + 20.332 +/* Use this if you didn't touch the skb state [for fast switching] */ 20.333 +static inline void kfree_skb_fast(struct sk_buff *skb) 20.334 +{ 20.335 + if (atomic_read(&skb->users) == 1 || atomic_dec_and_test(&skb->users)) 20.336 + kfree_skbmem(skb); 20.337 +} 20.338 + 20.339 +/** 20.340 + * skb_cloned - is the buffer a clone 20.341 + * @skb: buffer to check 20.342 + * 20.343 + * Returns true if the buffer was generated with skb_clone() and is 20.344 + * one of multiple shared copies of the buffer. Cloned buffers are 20.345 + * shared data so must not be written to under normal circumstances. 20.346 + */ 20.347 + 20.348 +static inline int skb_cloned(struct sk_buff *skb) 20.349 +{ 20.350 + return skb->cloned && atomic_read(&skb_shinfo(skb)->dataref) != 1; 20.351 +} 20.352 + 20.353 +/** 20.354 + * skb_shared - is the buffer shared 20.355 + * @skb: buffer to check 20.356 + * 20.357 + * Returns true if more than one person has a reference to this 20.358 + * buffer. 20.359 + */ 20.360 + 20.361 +static inline int skb_shared(struct sk_buff *skb) 20.362 +{ 20.363 + return (atomic_read(&skb->users) != 1); 20.364 +} 20.365 + 20.366 +/** 20.367 + * skb_share_check - check if buffer is shared and if so clone it 20.368 + * @skb: buffer to check 20.369 + * @pri: priority for memory allocation 20.370 + * 20.371 + * If the buffer is shared the buffer is cloned and the old copy 20.372 + * drops a reference. A new clone with a single reference is returned. 20.373 + * If the buffer is not shared the original buffer is returned. When 20.374 + * being called from interrupt status or with spinlocks held pri must 20.375 + * be GFP_ATOMIC. 20.376 + * 20.377 + * NULL is returned on a memory allocation failure. 20.378 + */ 20.379 + 20.380 +static inline struct sk_buff *skb_share_check(struct sk_buff *skb, int pri) 20.381 +{ 20.382 + if (skb_shared(skb)) { 20.383 + struct sk_buff *nskb; 20.384 + nskb = skb_clone(skb, pri); 20.385 + kfree_skb(skb); 20.386 + return nskb; 20.387 + } 20.388 + return skb; 20.389 +} 20.390 + 20.391 + 20.392 +/* 20.393 + * Copy shared buffers into a new sk_buff. We effectively do COW on 20.394 + * packets to handle cases where we have a local reader and forward 20.395 + * and a couple of other messy ones. The normal one is tcpdumping 20.396 + * a packet thats being forwarded. 20.397 + */ 20.398 + 20.399 +/** 20.400 + * skb_unshare - make a copy of a shared buffer 20.401 + * @skb: buffer to check 20.402 + * @pri: priority for memory allocation 20.403 + * 20.404 + * If the socket buffer is a clone then this function creates a new 20.405 + * copy of the data, drops a reference count on the old copy and returns 20.406 + * the new copy with the reference count at 1. If the buffer is not a clone 20.407 + * the original buffer is returned. When called with a spinlock held or 20.408 + * from interrupt state @pri must be %GFP_ATOMIC 20.409 + * 20.410 + * %NULL is returned on a memory allocation failure. 20.411 + */ 20.412 + 20.413 +static inline struct sk_buff *skb_unshare(struct sk_buff *skb, int pri) 20.414 +{ 20.415 + struct sk_buff *nskb; 20.416 + if(!skb_cloned(skb)) 20.417 + return skb; 20.418 + nskb=skb_copy(skb, pri); 20.419 + kfree_skb(skb); /* Free our shared copy */ 20.420 + return nskb; 20.421 +} 20.422 + 20.423 +/** 20.424 + * skb_peek 20.425 + * @list_: list to peek at 20.426 + * 20.427 + * Peek an &sk_buff. Unlike most other operations you _MUST_ 20.428 + * be careful with this one. A peek leaves the buffer on the 20.429 + * list and someone else may run off with it. You must hold 20.430 + * the appropriate locks or have a private queue to do this. 20.431 + * 20.432 + * Returns %NULL for an empty list or a pointer to the head element. 20.433 + * The reference count is not incremented and the reference is therefore 20.434 + * volatile. Use with caution. 20.435 + */ 20.436 + 20.437 +static inline struct sk_buff *skb_peek(struct sk_buff_head *list_) 20.438 +{ 20.439 + struct sk_buff *list = ((struct sk_buff *)list_)->next; 20.440 + if (list == (struct sk_buff *)list_) 20.441 + list = NULL; 20.442 + return list; 20.443 +} 20.444 + 20.445 +/** 20.446 + * skb_peek_tail 20.447 + * @list_: list to peek at 20.448 + * 20.449 + * Peek an &sk_buff. Unlike most other operations you _MUST_ 20.450 + * be careful with this one. A peek leaves the buffer on the 20.451 + * list and someone else may run off with it. You must hold 20.452 + * the appropriate locks or have a private queue to do this. 20.453 + * 20.454 + * Returns %NULL for an empty list or a pointer to the tail element. 20.455 + * The reference count is not incremented and the reference is therefore 20.456 + * volatile. Use with caution. 20.457 + */ 20.458 + 20.459 +static inline struct sk_buff *skb_peek_tail(struct sk_buff_head *list_) 20.460 +{ 20.461 + struct sk_buff *list = ((struct sk_buff *)list_)->prev; 20.462 + if (list == (struct sk_buff *)list_) 20.463 + list = NULL; 20.464 + return list; 20.465 +} 20.466 + 20.467 +/** 20.468 + * skb_queue_len - get queue length 20.469 + * @list_: list to measure 20.470 + * 20.471 + * Return the length of an &sk_buff queue. 20.472 + */ 20.473 + 20.474 +static inline __u32 skb_queue_len(struct sk_buff_head *list_) 20.475 +{ 20.476 + return(list_->qlen); 20.477 +} 20.478 + 20.479 +static inline void skb_queue_head_init(struct sk_buff_head *list) 20.480 +{ 20.481 + spin_lock_init(&list->lock); 20.482 + list->prev = (struct sk_buff *)list; 20.483 + list->next = (struct sk_buff *)list; 20.484 + list->qlen = 0; 20.485 +} 20.486 + 20.487 +/* 20.488 + * Insert an sk_buff at the start of a list. 20.489 + * 20.490 + * The "__skb_xxxx()" functions are the non-atomic ones that 20.491 + * can only be called with interrupts disabled. 20.492 + */ 20.493 + 20.494 +/** 20.495 + * __skb_queue_head - queue a buffer at the list head 20.496 + * @list: list to use 20.497 + * @newsk: buffer to queue 20.498 + * 20.499 + * Queue a buffer at the start of a list. This function takes no locks 20.500 + * and you must therefore hold required locks before calling it. 20.501 + * 20.502 + * A buffer cannot be placed on two lists at the same time. 20.503 + */ 20.504 + 20.505 +static inline void __skb_queue_head(struct sk_buff_head *list, struct sk_buff *newsk) 20.506 +{ 20.507 + struct sk_buff *prev, *next; 20.508 + 20.509 + newsk->list = list; 20.510 + list->qlen++; 20.511 + prev = (struct sk_buff *)list; 20.512 + next = prev->next; 20.513 + newsk->next = next; 20.514 + newsk->prev = prev; 20.515 + next->prev = newsk; 20.516 + prev->next = newsk; 20.517 +} 20.518 + 20.519 + 20.520 +/** 20.521 + * skb_queue_head - queue a buffer at the list head 20.522 + * @list: list to use 20.523 + * @newsk: buffer to queue 20.524 + * 20.525 + * Queue a buffer at the start of the list. This function takes the 20.526 + * list lock and can be used safely with other locking &sk_buff functions 20.527 + * safely. 20.528 + * 20.529 + * A buffer cannot be placed on two lists at the same time. 20.530 + */ 20.531 + 20.532 +static inline void skb_queue_head(struct sk_buff_head *list, struct sk_buff *newsk) 20.533 +{ 20.534 + unsigned long flags; 20.535 + 20.536 + spin_lock_irqsave(&list->lock, flags); 20.537 + __skb_queue_head(list, newsk); 20.538 + spin_unlock_irqrestore(&list->lock, flags); 20.539 +} 20.540 + 20.541 +/** 20.542 + * __skb_queue_tail - queue a buffer at the list tail 20.543 + * @list: list to use 20.544 + * @newsk: buffer to queue 20.545 + * 20.546 + * Queue a buffer at the end of a list. This function takes no locks 20.547 + * and you must therefore hold required locks before calling it. 20.548 + * 20.549 + * A buffer cannot be placed on two lists at the same time. 20.550 + */ 20.551 + 20.552 + 20.553 +static inline void __skb_queue_tail(struct sk_buff_head *list, struct sk_buff *newsk) 20.554 +{ 20.555 + struct sk_buff *prev, *next; 20.556 + 20.557 + newsk->list = list; 20.558 + list->qlen++; 20.559 + next = (struct sk_buff *)list; 20.560 + prev = next->prev; 20.561 + newsk->next = next; 20.562 + newsk->prev = prev; 20.563 + next->prev = newsk; 20.564 + prev->next = newsk; 20.565 +} 20.566 + 20.567 +/** 20.568 + * skb_queue_tail - queue a buffer at the list tail 20.569 + * @list: list to use 20.570 + * @newsk: buffer to queue 20.571 + * 20.572 + * Queue a buffer at the tail of the list. This function takes the 20.573 + * list lock and can be used safely with other locking &sk_buff functions 20.574 + * safely. 20.575 + * 20.576 + * A buffer cannot be placed on two lists at the same time. 20.577 + */ 20.578 + 20.579 +static inline void skb_queue_tail(struct sk_buff_head *list, struct sk_buff *newsk) 20.580 +{ 20.581 + unsigned long flags; 20.582 + 20.583 + spin_lock_irqsave(&list->lock, flags); 20.584 + __skb_queue_tail(list, newsk); 20.585 + spin_unlock_irqrestore(&list->lock, flags); 20.586 +} 20.587 + 20.588 +/** 20.589 + * __skb_dequeue - remove from the head of the queue 20.590 + * @list: list to dequeue from 20.591 + * 20.592 + * Remove the head of the list. This function does not take any locks 20.593 + * so must be used with appropriate locks held only. The head item is 20.594 + * returned or %NULL if the list is empty. 20.595 + */ 20.596 + 20.597 +static inline struct sk_buff *__skb_dequeue(struct sk_buff_head *list) 20.598 +{ 20.599 + struct sk_buff *next, *prev, *result; 20.600 + 20.601 + prev = (struct sk_buff *) list; 20.602 + next = prev->next; 20.603 + result = NULL; 20.604 + if (next != prev) { 20.605 + result = next; 20.606 + next = next->next; 20.607 + list->qlen--; 20.608 + next->prev = prev; 20.609 + prev->next = next; 20.610 + result->next = NULL; 20.611 + result->prev = NULL; 20.612 + result->list = NULL; 20.613 + } 20.614 + return result; 20.615 +} 20.616 + 20.617 +/** 20.618 + * skb_dequeue - remove from the head of the queue 20.619 + * @list: list to dequeue from 20.620 + * 20.621 + * Remove the head of the list. The list lock is taken so the function 20.622 + * may be used safely with other locking list functions. The head item is 20.623 + * returned or %NULL if the list is empty. 20.624 + */ 20.625 + 20.626 +static inline struct sk_buff *skb_dequeue(struct sk_buff_head *list) 20.627 +{ 20.628 + long flags; 20.629 + struct sk_buff *result; 20.630 + 20.631 + spin_lock_irqsave(&list->lock, flags); 20.632 + result = __skb_dequeue(list); 20.633 + spin_unlock_irqrestore(&list->lock, flags); 20.634 + return result; 20.635 +} 20.636 + 20.637 +/* 20.638 + * Insert a packet on a list. 20.639 + */ 20.640 + 20.641 +static inline void __skb_insert(struct sk_buff *newsk, 20.642 + struct sk_buff * prev, struct sk_buff *next, 20.643 + struct sk_buff_head * list) 20.644 +{ 20.645 + newsk->next = next; 20.646 + newsk->prev = prev; 20.647 + next->prev = newsk; 20.648 + prev->next = newsk; 20.649 + newsk->list = list; 20.650 + list->qlen++; 20.651 +} 20.652 + 20.653 +/** 20.654 + * skb_insert - insert a buffer 20.655 + * @old: buffer to insert before 20.656 + * @newsk: buffer to insert 20.657 + * 20.658 + * Place a packet before a given packet in a list. The list locks are taken 20.659 + * and this function is atomic with respect to other list locked calls 20.660 + * A buffer cannot be placed on two lists at the same time. 20.661 + */ 20.662 + 20.663 +static inline void skb_insert(struct sk_buff *old, struct sk_buff *newsk) 20.664 +{ 20.665 + unsigned long flags; 20.666 + 20.667 + spin_lock_irqsave(&old->list->lock, flags); 20.668 + __skb_insert(newsk, old->prev, old, old->list); 20.669 + spin_unlock_irqrestore(&old->list->lock, flags); 20.670 +} 20.671 + 20.672 +/* 20.673 + * Place a packet after a given packet in a list. 20.674 + */ 20.675 + 20.676 +static inline void __skb_append(struct sk_buff *old, struct sk_buff *newsk) 20.677 +{ 20.678 + __skb_insert(newsk, old, old->next, old->list); 20.679 +} 20.680 + 20.681 +/** 20.682 + * skb_append - append a buffer 20.683 + * @old: buffer to insert after 20.684 + * @newsk: buffer to insert 20.685 + * 20.686 + * Place a packet after a given packet in a list. The list locks are taken 20.687 + * and this function is atomic with respect to other list locked calls. 20.688 + * A buffer cannot be placed on two lists at the same time. 20.689 + */ 20.690 + 20.691 + 20.692 +static inline void skb_append(struct sk_buff *old, struct sk_buff *newsk) 20.693 +{ 20.694 + unsigned long flags; 20.695 + 20.696 + spin_lock_irqsave(&old->list->lock, flags); 20.697 + __skb_append(old, newsk); 20.698 + spin_unlock_irqrestore(&old->list->lock, flags); 20.699 +} 20.700 + 20.701 +/* 20.702 + * remove sk_buff from list. _Must_ be called atomically, and with 20.703 + * the list known.. 20.704 + */ 20.705 + 20.706 +static inline void __skb_unlink(struct sk_buff *skb, struct sk_buff_head *list) 20.707 +{ 20.708 + struct sk_buff * next, * prev; 20.709 + 20.710 + list->qlen--; 20.711 + next = skb->next; 20.712 + prev = skb->prev; 20.713 + skb->next = NULL; 20.714 + skb->prev = NULL; 20.715 + skb->list = NULL; 20.716 + next->prev = prev; 20.717 + prev->next = next; 20.718 +} 20.719 + 20.720 +/** 20.721 + * skb_unlink - remove a buffer from a list 20.722 + * @skb: buffer to remove 20.723 + * 20.724 + * Place a packet after a given packet in a list. The list locks are taken 20.725 + * and this function is atomic with respect to other list locked calls 20.726 + * 20.727 + * Works even without knowing the list it is sitting on, which can be 20.728 + * handy at times. It also means that THE LIST MUST EXIST when you 20.729 + * unlink. Thus a list must have its contents unlinked before it is 20.730 + * destroyed. 20.731 + */ 20.732 + 20.733 +static inline void skb_unlink(struct sk_buff *skb) 20.734 +{ 20.735 + struct sk_buff_head *list = skb->list; 20.736 + 20.737 + if(list) { 20.738 + unsigned long flags; 20.739 + 20.740 + spin_lock_irqsave(&list->lock, flags); 20.741 + if(skb->list == list) 20.742 + __skb_unlink(skb, skb->list); 20.743 + spin_unlock_irqrestore(&list->lock, flags); 20.744 + } 20.745 +} 20.746 + 20.747 +/* XXX: more streamlined implementation */ 20.748 + 20.749 +/** 20.750 + * __skb_dequeue_tail - remove from the tail of the queue 20.751 + * @list: list to dequeue from 20.752 + * 20.753 + * Remove the tail of the list. This function does not take any locks 20.754 + * so must be used with appropriate locks held only. The tail item is 20.755 + * returned or %NULL if the list is empty. 20.756 + */ 20.757 + 20.758 +static inline struct sk_buff *__skb_dequeue_tail(struct sk_buff_head *list) 20.759 +{ 20.760 + struct sk_buff *skb = skb_peek_tail(list); 20.761 + if (skb) 20.762 + __skb_unlink(skb, list); 20.763 + return skb; 20.764 +} 20.765 + 20.766 +/** 20.767 + * skb_dequeue - remove from the head of the queue 20.768 + * @list: list to dequeue from 20.769 + * 20.770 + * Remove the head of the list. The list lock is taken so the function 20.771 + * may be used safely with other locking list functions. The tail item is 20.772 + * returned or %NULL if the list is empty. 20.773 + */ 20.774 + 20.775 +static inline struct sk_buff *skb_dequeue_tail(struct sk_buff_head *list) 20.776 +{ 20.777 + long flags; 20.778 + struct sk_buff *result; 20.779 + 20.780 + spin_lock_irqsave(&list->lock, flags); 20.781 + result = __skb_dequeue_tail(list); 20.782 + spin_unlock_irqrestore(&list->lock, flags); 20.783 + return result; 20.784 +} 20.785 + 20.786 +static inline int skb_is_nonlinear(const struct sk_buff *skb) 20.787 +{ 20.788 + return skb->data_len; 20.789 +} 20.790 + 20.791 +static inline int skb_headlen(const struct sk_buff *skb) 20.792 +{ 20.793 + return skb->len - skb->data_len; 20.794 +} 20.795 + 20.796 +#define SKB_PAGE_ASSERT(skb) do { if (skb_shinfo(skb)->nr_frags) BUG(); } while (0) 20.797 +#define SKB_FRAG_ASSERT(skb) do { if (skb_shinfo(skb)->frag_list) BUG(); } while (0) 20.798 +#define SKB_LINEAR_ASSERT(skb) do { if (skb_is_nonlinear(skb)) BUG(); } while (0) 20.799 + 20.800 +/* 20.801 + * Add data to an sk_buff 20.802 + */ 20.803 + 20.804 +static inline unsigned char *__skb_put(struct sk_buff *skb, unsigned int len) 20.805 +{ 20.806 + unsigned char *tmp=skb->tail; 20.807 + SKB_LINEAR_ASSERT(skb); 20.808 + skb->tail+=len; 20.809 + skb->len+=len; 20.810 + return tmp; 20.811 +} 20.812 + 20.813 +/** 20.814 + * skb_put - add data to a buffer 20.815 + * @skb: buffer to use 20.816 + * @len: amount of data to add 20.817 + * 20.818 + * This function extends the used data area of the buffer. If this would 20.819 + * exceed the total buffer size the kernel will panic. A pointer to the 20.820 + * first byte of the extra data is returned. 20.821 + */ 20.822 + 20.823 +static inline unsigned char *skb_put(struct sk_buff *skb, unsigned int len) 20.824 +{ 20.825 + unsigned char *tmp=skb->tail; 20.826 + SKB_LINEAR_ASSERT(skb); 20.827 + skb->tail+=len; 20.828 + skb->len+=len; 20.829 + if(skb->tail>skb->end) { 20.830 + skb_over_panic(skb, len, current_text_addr()); 20.831 + } 20.832 + return tmp; 20.833 +} 20.834 + 20.835 +static inline unsigned char *__skb_push(struct sk_buff *skb, unsigned int len) 20.836 +{ 20.837 + skb->data-=len; 20.838 + skb->len+=len; 20.839 + return skb->data; 20.840 +} 20.841 + 20.842 +/** 20.843 + * skb_push - add data to the start of a buffer 20.844 + * @skb: buffer to use 20.845 + * @len: amount of data to add 20.846 + * 20.847 + * This function extends the used data area of the buffer at the buffer 20.848 + * start. If this would exceed the total buffer headroom the kernel will 20.849 + * panic. A pointer to the first byte of the extra data is returned. 20.850 + */ 20.851 + 20.852 +static inline unsigned char *skb_push(struct sk_buff *skb, unsigned int len) 20.853 +{ 20.854 + skb->data-=len; 20.855 + skb->len+=len; 20.856 + if(skb->data<skb->head) { 20.857 + skb_under_panic(skb, len, current_text_addr()); 20.858 + } 20.859 + return skb->data; 20.860 +} 20.861 + 20.862 +static inline char *__skb_pull(struct sk_buff *skb, unsigned int len) 20.863 +{ 20.864 + skb->len-=len; 20.865 + if (skb->len < skb->data_len) 20.866 + BUG(); 20.867 + return skb->data+=len; 20.868 +} 20.869 + 20.870 +/** 20.871 + * skb_pull - remove data from the start of a buffer 20.872 + * @skb: buffer to use 20.873 + * @len: amount of data to remove 20.874 + * 20.875 + * This function removes data from the start of a buffer, returning 20.876 + * the memory to the headroom. A pointer to the next data in the buffer 20.877 + * is returned. Once the data has been pulled future pushes will overwrite 20.878 + * the old data. 20.879 + */ 20.880 + 20.881 +static inline unsigned char * skb_pull(struct sk_buff *skb, unsigned int len) 20.882 +{ 20.883 + if (len > skb->len) 20.884 + return NULL; 20.885 + return __skb_pull(skb,len); 20.886 +} 20.887 + 20.888 +extern unsigned char * __pskb_pull_tail(struct sk_buff *skb, int delta); 20.889 + 20.890 +static inline char *__pskb_pull(struct sk_buff *skb, unsigned int len) 20.891 +{ 20.892 + if (len > skb_headlen(skb) && 20.893 + __pskb_pull_tail(skb, len-skb_headlen(skb)) == NULL) 20.894 + return NULL; 20.895 + skb->len -= len; 20.896 + return skb->data += len; 20.897 +} 20.898 + 20.899 +static inline unsigned char * pskb_pull(struct sk_buff *skb, unsigned int len) 20.900 +{ 20.901 + if (len > skb->len) 20.902 + return NULL; 20.903 + return __pskb_pull(skb,len); 20.904 +} 20.905 + 20.906 +static inline int pskb_may_pull(struct sk_buff *skb, unsigned int len) 20.907 +{ 20.908 + if (len <= skb_headlen(skb)) 20.909 + return 1; 20.910 + if (len > skb->len) 20.911 + return 0; 20.912 + return (__pskb_pull_tail(skb, len-skb_headlen(skb)) != NULL); 20.913 +} 20.914 + 20.915 +/** 20.916 + * skb_headroom - bytes at buffer head 20.917 + * @skb: buffer to check 20.918 + * 20.919 + * Return the number of bytes of free space at the head of an &sk_buff. 20.920 + */ 20.921 + 20.922 +static inline int skb_headroom(const struct sk_buff *skb) 20.923 +{ 20.924 + return skb->data-skb->head; 20.925 +} 20.926 + 20.927 +/** 20.928 + * skb_tailroom - bytes at buffer end 20.929 + * @skb: buffer to check 20.930 + * 20.931 + * Return the number of bytes of free space at the tail of an sk_buff 20.932 + */ 20.933 + 20.934 +static inline int skb_tailroom(const struct sk_buff *skb) 20.935 +{ 20.936 + return skb_is_nonlinear(skb) ? 0 : skb->end-skb->tail; 20.937 +} 20.938 + 20.939 +/** 20.940 + * skb_reserve - adjust headroom 20.941 + * @skb: buffer to alter 20.942 + * @len: bytes to move 20.943 + * 20.944 + * Increase the headroom of an empty &sk_buff by reducing the tail 20.945 + * room. This is only allowed for an empty buffer. 20.946 + */ 20.947 + 20.948 +static inline void skb_reserve(struct sk_buff *skb, unsigned int len) 20.949 +{ 20.950 + skb->data+=len; 20.951 + skb->tail+=len; 20.952 +} 20.953 + 20.954 +extern int ___pskb_trim(struct sk_buff *skb, unsigned int len, int realloc); 20.955 + 20.956 +static inline void __skb_trim(struct sk_buff *skb, unsigned int len) 20.957 +{ 20.958 + if (!skb->data_len) { 20.959 + skb->len = len; 20.960 + skb->tail = skb->data+len; 20.961 + } else { 20.962 + ___pskb_trim(skb, len, 0); 20.963 + } 20.964 +} 20.965 + 20.966 +/** 20.967 + * skb_trim - remove end from a buffer 20.968 + * @skb: buffer to alter 20.969 + * @len: new length 20.970 + * 20.971 + * Cut the length of a buffer down by removing data from the tail. If 20.972 + * the buffer is already under the length specified it is not modified. 20.973 + */ 20.974 + 20.975 +static inline void skb_trim(struct sk_buff *skb, unsigned int len) 20.976 +{ 20.977 + if (skb->len > len) { 20.978 + __skb_trim(skb, len); 20.979 + } 20.980 +} 20.981 + 20.982 + 20.983 +static inline int __pskb_trim(struct sk_buff *skb, unsigned int len) 20.984 +{ 20.985 + if (!skb->data_len) { 20.986 + skb->len = len; 20.987 + skb->tail = skb->data+len; 20.988 + return 0; 20.989 + } else { 20.990 + return ___pskb_trim(skb, len, 1); 20.991 + } 20.992 +} 20.993 + 20.994 +static inline int pskb_trim(struct sk_buff *skb, unsigned int len) 20.995 +{ 20.996 + if (len < skb->len) 20.997 + return __pskb_trim(skb, len); 20.998 + return 0; 20.999 +} 20.1000 + 20.1001 +/** 20.1002 + * skb_orphan - orphan a buffer 20.1003 + * @skb: buffer to orphan 20.1004 + * 20.1005 + * If a buffer currently has an owner then we call the owner's 20.1006 + * destructor function and make the @skb unowned. The buffer continues 20.1007 + * to exist but is no longer charged to its former owner. 20.1008 + */ 20.1009 + 20.1010 + 20.1011 +static inline void skb_orphan(struct sk_buff *skb) 20.1012 +{ 20.1013 + if (skb->destructor) 20.1014 + skb->destructor(skb); 20.1015 + skb->destructor = NULL; 20.1016 + skb->sk = NULL; 20.1017 +} 20.1018 + 20.1019 +/** 20.1020 + * skb_purge - empty a list 20.1021 + * @list: list to empty 20.1022 + * 20.1023 + * Delete all buffers on an &sk_buff list. Each buffer is removed from 20.1024 + * the list and one reference dropped. This function takes the list 20.1025 + * lock and is atomic with respect to other list locking functions. 20.1026 + */ 20.1027 + 20.1028 + 20.1029 +static inline void skb_queue_purge(struct sk_buff_head *list) 20.1030 +{ 20.1031 + struct sk_buff *skb; 20.1032 + while ((skb=skb_dequeue(list))!=NULL) 20.1033 + kfree_skb(skb); 20.1034 +} 20.1035 + 20.1036 +/** 20.1037 + * __skb_purge - empty a list 20.1038 + * @list: list to empty 20.1039 + * 20.1040 + * Delete all buffers on an &sk_buff list. Each buffer is removed from 20.1041 + * the list and one reference dropped. This function does not take the 20.1042 + * list lock and the caller must hold the relevant locks to use it. 20.1043 + */ 20.1044 + 20.1045 + 20.1046 +static inline void __skb_queue_purge(struct sk_buff_head *list) 20.1047 +{ 20.1048 + struct sk_buff *skb; 20.1049 + while ((skb=__skb_dequeue(list))!=NULL) 20.1050 + kfree_skb(skb); 20.1051 +} 20.1052 + 20.1053 +/** 20.1054 + * __dev_alloc_skb - allocate an skbuff for sending 20.1055 + * @length: length to allocate 20.1056 + * @gfp_mask: get_free_pages mask, passed to alloc_skb 20.1057 + * 20.1058 + * Allocate a new &sk_buff and assign it a usage count of one. The 20.1059 + * buffer has unspecified headroom built in. Users should allocate 20.1060 + * the headroom they think they need without accounting for the 20.1061 + * built in space. The built in space is used for optimisations. 20.1062 + * 20.1063 + * %NULL is returned in there is no free memory. 20.1064 + */ 20.1065 + 20.1066 +static inline struct sk_buff *__dev_alloc_skb(unsigned int length, 20.1067 + int gfp_mask) 20.1068 +{ 20.1069 + struct sk_buff *skb; 20.1070 + 20.1071 + //skb = alloc_skb(length+16, gfp_mask); 20.1072 + skb = alloc_zc_skb(length+16, gfp_mask); 20.1073 + if (skb) 20.1074 + skb_reserve(skb,16); 20.1075 + return skb; 20.1076 +} 20.1077 + 20.1078 +/** 20.1079 + * dev_alloc_skb - allocate an skbuff for sending 20.1080 + * @length: length to allocate 20.1081 + * 20.1082 + * Allocate a new &sk_buff and assign it a usage count of one. The 20.1083 + * buffer has unspecified headroom built in. Users should allocate 20.1084 + * the headroom they think they need without accounting for the 20.1085 + * built in space. The built in space is used for optimisations. 20.1086 + * 20.1087 + * %NULL is returned in there is no free memory. Although this function 20.1088 + * allocates memory it can be called from an interrupt. 20.1089 + */ 20.1090 + 20.1091 +static inline struct sk_buff *dev_alloc_skb(unsigned int length) 20.1092 +{ 20.1093 + return __dev_alloc_skb(length, GFP_ATOMIC); 20.1094 +} 20.1095 + 20.1096 +/** 20.1097 + * skb_cow - copy header of skb when it is required 20.1098 + * @skb: buffer to cow 20.1099 + * @headroom: needed headroom 20.1100 + * 20.1101 + * If the skb passed lacks sufficient headroom or its data part 20.1102 + * is shared, data is reallocated. If reallocation fails, an error 20.1103 + * is returned and original skb is not changed. 20.1104 + * 20.1105 + * The result is skb with writable area skb->head...skb->tail 20.1106 + * and at least @headroom of space at head. 20.1107 + */ 20.1108 + 20.1109 +static inline int 20.1110 +skb_cow(struct sk_buff *skb, unsigned int headroom) 20.1111 +{ 20.1112 + int delta = (headroom > 16 ? headroom : 16) - skb_headroom(skb); 20.1113 + 20.1114 + if (delta < 0) 20.1115 + delta = 0; 20.1116 + 20.1117 + if (delta || skb_cloned(skb)) 20.1118 + return pskb_expand_head(skb, (delta+15)&~15, 0, GFP_ATOMIC); 20.1119 + return 0; 20.1120 +} 20.1121 + 20.1122 +/** 20.1123 + * skb_linearize - convert paged skb to linear one 20.1124 + * @skb: buffer to linarize 20.1125 + * @gfp: allocation mode 20.1126 + * 20.1127 + * If there is no free memory -ENOMEM is returned, otherwise zero 20.1128 + * is returned and the old skb data released. */ 20.1129 +int skb_linearize(struct sk_buff *skb, int gfp); 20.1130 + 20.1131 +static inline void *kmap_skb_frag(const skb_frag_t *frag) 20.1132 +{ 20.1133 +#ifdef CONFIG_HIGHMEM 20.1134 + if (in_irq()) 20.1135 + BUG(); 20.1136 + 20.1137 + local_bh_disable(); 20.1138 +#endif 20.1139 + return kmap_atomic(frag->page, KM_SKB_DATA_SOFTIRQ); 20.1140 +} 20.1141 + 20.1142 +static inline void kunmap_skb_frag(void *vaddr) 20.1143 +{ 20.1144 + kunmap_atomic(vaddr, KM_SKB_DATA_SOFTIRQ); 20.1145 +#ifdef CONFIG_HIGHMEM 20.1146 + local_bh_enable(); 20.1147 +#endif 20.1148 +} 20.1149 + 20.1150 +#define skb_queue_walk(queue, skb) \ 20.1151 + for (skb = (queue)->next; \ 20.1152 + (skb != (struct sk_buff *)(queue)); \ 20.1153 + skb=skb->next) 20.1154 + 20.1155 + 20.1156 +extern struct sk_buff * skb_recv_datagram(struct sock *sk,unsigned flags,int noblock, int *err); 20.1157 +extern unsigned int datagram_poll(struct file *file, struct socket *sock, struct poll_table_struct *wait); 20.1158 +extern int skb_copy_datagram(const struct sk_buff *from, int offset, char *to,int size); 20.1159 +extern int skb_copy_datagram_iovec(const struct sk_buff *from, int offset, struct iovec *to,int size); 20.1160 +extern int skb_copy_and_csum_datagram(const struct sk_buff *skb, int offset, u8 *to, int len, unsigned int *csump); 20.1161 +extern int skb_copy_and_csum_datagram_iovec(const struct sk_buff *skb, int hlen, struct iovec *iov); 20.1162 +extern void skb_free_datagram(struct sock * sk, struct sk_buff *skb); 20.1163 + 20.1164 +extern unsigned int skb_checksum(const struct sk_buff *skb, int offset, int len, unsigned int csum); 20.1165 +extern int skb_copy_bits(const struct sk_buff *skb, int offset, void *to, int len); 20.1166 +extern unsigned int skb_copy_and_csum_bits(const struct sk_buff *skb, int offset, u8 *to, int len, unsigned int csum); 20.1167 +extern void skb_copy_and_csum_dev(const struct sk_buff *skb, u8 *to); 20.1168 + 20.1169 +extern void skb_init(void); 20.1170 +extern void skb_add_mtu(int mtu); 20.1171 + 20.1172 +#ifdef CONFIG_NETFILTER 20.1173 +static inline void 20.1174 +nf_conntrack_put(struct nf_ct_info *nfct) 20.1175 +{ 20.1176 + if (nfct && atomic_dec_and_test(&nfct->master->use)) 20.1177 + nfct->master->destroy(nfct->master); 20.1178 +} 20.1179 +static inline void 20.1180 +nf_conntrack_get(struct nf_ct_info *nfct) 20.1181 +{ 20.1182 + if (nfct) 20.1183 + atomic_inc(&nfct->master->use); 20.1184 +} 20.1185 +#endif 20.1186 + 20.1187 +#endif /* __KERNEL__ */ 20.1188 +#endif /* _LINUX_SKBUFF_H */
21.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 21.2 +++ b/xenolinux-2.4.16-sparse/net/core/skbuff.c Wed Feb 05 13:56:14 2003 +0000 21.3 @@ -0,0 +1,1368 @@ 21.4 +/* 21.5 + * Routines having to do with the 'struct sk_buff' memory handlers. 21.6 + * 21.7 + * Authors: Alan Cox <iiitac@pyr.swan.ac.uk> 21.8 + * Florian La Roche <rzsfl@rz.uni-sb.de> 21.9 + * 21.10 + * Version: $Id: skbuff.c,v 1.89 2001/08/06 13:25:02 davem Exp $ 21.11 + * 21.12 + * Fixes: 21.13 + * Alan Cox : Fixed the worst of the load balancer bugs. 21.14 + * Dave Platt : Interrupt stacking fix. 21.15 + * Richard Kooijman : Timestamp fixes. 21.16 + * Alan Cox : Changed buffer format. 21.17 + * Alan Cox : destructor hook for AF_UNIX etc. 21.18 + * Linus Torvalds : Better skb_clone. 21.19 + * Alan Cox : Added skb_copy. 21.20 + * Alan Cox : Added all the changed routines Linus 21.21 + * only put in the headers 21.22 + * Ray VanTassle : Fixed --skb->lock in free 21.23 + * Alan Cox : skb_copy copy arp field 21.24 + * Andi Kleen : slabified it. 21.25 + * 21.26 + * NOTE: 21.27 + * The __skb_ routines should be called with interrupts 21.28 + * disabled, or you better be *real* sure that the operation is atomic 21.29 + * with respect to whatever list is being frobbed (e.g. via lock_sock() 21.30 + * or via disabling bottom half handlers, etc). 21.31 + * 21.32 + * This program is free software; you can redistribute it and/or 21.33 + * modify it under the terms of the GNU General Public License 21.34 + * as published by the Free Software Foundation; either version 21.35 + * 2 of the License, or (at your option) any later version. 21.36 + */ 21.37 + 21.38 +/* 21.39 + * The functions in this file will not compile correctly with gcc 2.4.x 21.40 + */ 21.41 + 21.42 +#include <linux/config.h> 21.43 +#include <linux/types.h> 21.44 +#include <linux/kernel.h> 21.45 +#include <linux/sched.h> 21.46 +#include <linux/mm.h> 21.47 +#include <linux/interrupt.h> 21.48 +#include <linux/in.h> 21.49 +#include <linux/inet.h> 21.50 +#include <linux/slab.h> 21.51 +#include <linux/netdevice.h> 21.52 +#include <linux/string.h> 21.53 +#include <linux/skbuff.h> 21.54 +#include <linux/cache.h> 21.55 +#include <linux/init.h> 21.56 +#include <linux/highmem.h> 21.57 +#include <linux/spinlock.h> 21.58 + 21.59 +#include <net/ip.h> 21.60 +#include <net/protocol.h> 21.61 +#include <net/dst.h> 21.62 +#include <net/tcp.h> 21.63 +#include <net/udp.h> 21.64 +#include <net/sock.h> 21.65 +#include <asm/io.h> 21.66 +#include <asm/uaccess.h> 21.67 +#include <asm/system.h> 21.68 + 21.69 +/* zc globals: */ 21.70 +char *net_page_chunk; 21.71 +struct net_page_info *net_page_table; 21.72 +struct list_head net_page_list; 21.73 +spinlock_t net_page_list_lock = SPIN_LOCK_UNLOCKED; 21.74 +unsigned int net_pages; 21.75 + 21.76 + 21.77 + 21.78 +int sysctl_hot_list_len = 128; 21.79 + 21.80 +static kmem_cache_t *skbuff_head_cache; 21.81 + 21.82 +static union { 21.83 + struct sk_buff_head list; 21.84 + char pad[SMP_CACHE_BYTES]; 21.85 +} skb_head_pool[NR_CPUS]; 21.86 + 21.87 +/* 21.88 + * Keep out-of-line to prevent kernel bloat. 21.89 + * __builtin_return_address is not used because it is not always 21.90 + * reliable. 21.91 + */ 21.92 + 21.93 +/** 21.94 + * skb_over_panic - private function 21.95 + * @skb: buffer 21.96 + * @sz: size 21.97 + * @here: address 21.98 + * 21.99 + * Out of line support code for skb_put(). Not user callable. 21.100 + */ 21.101 + 21.102 +void skb_over_panic(struct sk_buff *skb, int sz, void *here) 21.103 +{ 21.104 + printk("skput:over: %p:%d put:%d dev:%s", 21.105 + here, skb->len, sz, skb->dev ? skb->dev->name : "<NULL>"); 21.106 + BUG(); 21.107 +} 21.108 + 21.109 +/** 21.110 + * skb_under_panic - private function 21.111 + * @skb: buffer 21.112 + * @sz: size 21.113 + * @here: address 21.114 + * 21.115 + * Out of line support code for skb_push(). Not user callable. 21.116 + */ 21.117 + 21.118 + 21.119 +void skb_under_panic(struct sk_buff *skb, int sz, void *here) 21.120 +{ 21.121 + printk("skput:under: %p:%d put:%d dev:%s", 21.122 + here, skb->len, sz, skb->dev ? skb->dev->name : "<NULL>"); 21.123 + BUG(); 21.124 +} 21.125 + 21.126 +static __inline__ struct sk_buff *skb_head_from_pool(void) 21.127 +{ 21.128 + struct sk_buff_head *list = &skb_head_pool[smp_processor_id()].list; 21.129 + 21.130 + if (skb_queue_len(list)) { 21.131 + struct sk_buff *skb; 21.132 + unsigned long flags; 21.133 + 21.134 + local_irq_save(flags); 21.135 + skb = __skb_dequeue(list); 21.136 + local_irq_restore(flags); 21.137 + return skb; 21.138 + } 21.139 + return NULL; 21.140 +} 21.141 + 21.142 +static __inline__ void skb_head_to_pool(struct sk_buff *skb) 21.143 +{ 21.144 + struct sk_buff_head *list = &skb_head_pool[smp_processor_id()].list; 21.145 + 21.146 + if (skb_queue_len(list) < sysctl_hot_list_len) { 21.147 + unsigned long flags; 21.148 + 21.149 + local_irq_save(flags); 21.150 + __skb_queue_head(list, skb); 21.151 + local_irq_restore(flags); 21.152 + 21.153 + return; 21.154 + } 21.155 + kmem_cache_free(skbuff_head_cache, skb); 21.156 +} 21.157 + 21.158 + 21.159 +/* Allocate a new skbuff. We do this ourselves so we can fill in a few 21.160 + * 'private' fields and also do memory statistics to find all the 21.161 + * [BEEP] leaks. 21.162 + * 21.163 + */ 21.164 + 21.165 +/** 21.166 + * alloc_skb - allocate a network buffer 21.167 + * @size: size to allocate 21.168 + * @gfp_mask: allocation mask 21.169 + * 21.170 + * Allocate a new &sk_buff. The returned buffer has no headroom and a 21.171 + * tail room of size bytes. The object has a reference count of one. 21.172 + * The return is the buffer. On a failure the return is %NULL. 21.173 + * 21.174 + * Buffers may only be allocated from interrupts using a @gfp_mask of 21.175 + * %GFP_ATOMIC. 21.176 + */ 21.177 + 21.178 +struct sk_buff *alloc_skb(unsigned int size,int gfp_mask) 21.179 +{ 21.180 + struct sk_buff *skb; 21.181 + u8 *data; 21.182 + 21.183 + if (in_interrupt() && (gfp_mask & __GFP_WAIT)) { 21.184 + static int count = 0; 21.185 + if (++count < 5) { 21.186 + printk(KERN_ERR "alloc_skb called nonatomically " 21.187 + "from interrupt %p\n", NET_CALLER(size)); 21.188 + BUG(); 21.189 + } 21.190 + gfp_mask &= ~__GFP_WAIT; 21.191 + } 21.192 + 21.193 + /* Get the HEAD */ 21.194 + skb = skb_head_from_pool(); 21.195 + if (skb == NULL) { 21.196 + skb = kmem_cache_alloc(skbuff_head_cache, gfp_mask & ~__GFP_DMA); 21.197 + if (skb == NULL) 21.198 + goto nohead; 21.199 + } 21.200 + 21.201 + /* Get the DATA. Size must match skb_add_mtu(). */ 21.202 + size = SKB_DATA_ALIGN(size); 21.203 + data = kmalloc(size + sizeof(struct skb_shared_info), gfp_mask); 21.204 + if (data == NULL) 21.205 + goto nodata; 21.206 + 21.207 + /* XXX: does not include slab overhead */ 21.208 + skb->truesize = size + sizeof(struct sk_buff); 21.209 + 21.210 + /* Load the data pointers. */ 21.211 + skb->head = data; 21.212 + skb->data = data; 21.213 + skb->tail = data; 21.214 + skb->end = data + size; 21.215 + 21.216 + /* Set up other state */ 21.217 + skb->len = 0; 21.218 + skb->cloned = 0; 21.219 + skb->data_len = 0; 21.220 + skb->skb_type = SKB_NORMAL; 21.221 + 21.222 + atomic_set(&skb->users, 1); 21.223 + atomic_set(&(skb_shinfo(skb)->dataref), 1); 21.224 + skb_shinfo(skb)->nr_frags = 0; 21.225 + skb_shinfo(skb)->frag_list = NULL; 21.226 + return skb; 21.227 + 21.228 +nodata: 21.229 + skb_head_to_pool(skb); 21.230 +nohead: 21.231 + return NULL; 21.232 +} 21.233 + 21.234 +/* begin zc code additions: */ 21.235 + 21.236 +void init_net_pages(unsigned long order_pages) 21.237 +{ 21.238 + int i; 21.239 + struct net_page_info *np; 21.240 + pgd_t *pgd; pmd_t *pmd; pte_t *ptep; 21.241 + unsigned long nr_pages = 1 << order_pages; 21.242 + 21.243 + net_page_chunk = (char *)__get_free_pages(GFP_KERNEL, order_pages); 21.244 + net_page_table = kmalloc(nr_pages * sizeof(struct net_page_info), GFP_KERNEL); 21.245 + 21.246 + INIT_LIST_HEAD(&net_page_list); 21.247 + 21.248 + for (i = 0; i < nr_pages; i++) 21.249 + { 21.250 + np = net_page_table + i; 21.251 + np->virt_addr = (unsigned long)net_page_chunk + (i * PAGE_SIZE); 21.252 + 21.253 + // now fill the pte pointer: 21.254 + np->ppte = 0xdeadbeef; 21.255 + pgd = pgd_offset_k(np->virt_addr); 21.256 + if (pgd_none(*pgd) || pgd_bad(*pgd)) BUG(); 21.257 + 21.258 + if (pmd_none(*pmd)) BUG(); 21.259 + if (pmd_bad(*pmd)) BUG(); 21.260 + 21.261 + ptep = pte_offset(pmd, np->virt_addr); 21.262 + np->ppte = (unsigned long)virt_to_mach(ptep); 21.263 + 21.264 + list_add_tail(&np->list, &net_page_list); 21.265 + } 21.266 + net_pages = nr_pages; 21.267 + 21.268 + 21.269 +} 21.270 + 21.271 +struct net_page_info *get_net_page(void) 21.272 +{ 21.273 + struct list_head *list_ptr; 21.274 + struct net_page_info *np; 21.275 + unsigned long flags; 21.276 + 21.277 + if (!net_pages) 21.278 + { 21.279 + return NULL; 21.280 + } 21.281 + spin_lock_irqsave(&net_page_list_lock, flags); 21.282 + 21.283 + list_ptr = net_page_list.next; 21.284 + np = list_entry(list_ptr, struct net_page_info, list); 21.285 + list_del(&np->list); 21.286 + net_pages--; 21.287 + 21.288 + spin_unlock_irqrestore(&net_page_list_lock, flags); 21.289 + 21.290 + return np; 21.291 +} 21.292 + 21.293 +void free_net_page(struct net_page_info *np) 21.294 +{ 21.295 + unsigned long flags; 21.296 + 21.297 + if (np == NULL) return; 21.298 + 21.299 + spin_lock_irqsave(&net_page_list_lock, flags); 21.300 + 21.301 + list_add(&np->list, &net_page_list); 21.302 + net_pages++; 21.303 + 21.304 + spin_unlock_irqrestore(&net_page_list_lock, flags); 21.305 + 21.306 +} 21.307 + 21.308 +struct sk_buff *alloc_zc_skb(unsigned int size,int gfp_mask) 21.309 +{ 21.310 + struct sk_buff *skb; 21.311 + u8 *data; 21.312 + 21.313 + if (in_interrupt() && (gfp_mask & __GFP_WAIT)) { 21.314 + static int count = 0; 21.315 + if (++count < 5) { 21.316 + printk(KERN_ERR "alloc_skb called nonatomically " 21.317 + "from interrupt %p\n", NET_CALLER(size)); 21.318 + BUG(); 21.319 + } 21.320 + gfp_mask &= ~__GFP_WAIT; 21.321 + } 21.322 + 21.323 + /* Get the HEAD */ 21.324 + skb = skb_head_from_pool(); 21.325 + if (skb == NULL) { 21.326 + skb = kmem_cache_alloc(skbuff_head_cache, gfp_mask & ~__GFP_DMA); 21.327 + if (skb == NULL) 21.328 + goto nohead; 21.329 + } 21.330 + 21.331 + /* Get the DATA. Size must match skb_add_mtu(). */ 21.332 + size = SKB_DATA_ALIGN(size); 21.333 + if (size > PAGE_SIZE) 21.334 + { 21.335 + printk("alloc_zc_skb called with unruly size.\n"); 21.336 + size = PAGE_SIZE; 21.337 + } 21.338 + skb->net_page = get_net_page(); 21.339 + if (skb->net_page == NULL) 21.340 + { 21.341 + goto nodata; 21.342 + } 21.343 + data = (u8 *)skb->net_page->virt_addr; 21.344 + if (data == NULL) 21.345 + goto nodata; 21.346 + /* XXX: does not include slab overhead */ 21.347 + skb->truesize = size + sizeof(struct sk_buff); 21.348 + 21.349 + /* Load the data pointers. */ 21.350 + skb->head = data; 21.351 + skb->data = data; 21.352 + skb->tail = data; 21.353 + skb->end = data + size; 21.354 + 21.355 + /* Set up other state */ 21.356 + skb->len = 0; 21.357 + skb->cloned = 0; 21.358 + skb->data_len = 0; 21.359 + skb->skb_type = SKB_ZERO_COPY; 21.360 + 21.361 + atomic_set(&skb->users, 1); 21.362 + atomic_set(&(skb_shinfo(skb)->dataref), 1); 21.363 + skb_shinfo(skb)->nr_frags = 0; 21.364 + skb_shinfo(skb)->frag_list = NULL; 21.365 + return skb; 21.366 + 21.367 +nodata: 21.368 + skb_head_to_pool(skb); 21.369 +nohead: 21.370 + return NULL; 21.371 +} 21.372 + 21.373 +/* end zc code additions: */ 21.374 + 21.375 +/* 21.376 + * Slab constructor for a skb head. 21.377 + */ 21.378 +static inline void skb_headerinit(void *p, kmem_cache_t *cache, 21.379 + unsigned long flags) 21.380 +{ 21.381 + struct sk_buff *skb = p; 21.382 + 21.383 + skb->next = NULL; 21.384 + skb->prev = NULL; 21.385 + skb->list = NULL; 21.386 + skb->sk = NULL; 21.387 + skb->stamp.tv_sec=0; /* No idea about time */ 21.388 + skb->dev = NULL; 21.389 + skb->dst = NULL; 21.390 + memset(skb->cb, 0, sizeof(skb->cb)); 21.391 + skb->pkt_type = PACKET_HOST; /* Default type */ 21.392 + skb->ip_summed = 0; 21.393 + skb->priority = 0; 21.394 + skb->security = 0; /* By default packets are insecure */ 21.395 + skb->destructor = NULL; 21.396 + 21.397 +#ifdef CONFIG_NETFILTER 21.398 + skb->nfmark = skb->nfcache = 0; 21.399 + skb->nfct = NULL; 21.400 +#ifdef CONFIG_NETFILTER_DEBUG 21.401 + skb->nf_debug = 0; 21.402 +#endif 21.403 +#endif 21.404 +#ifdef CONFIG_NET_SCHED 21.405 + skb->tc_index = 0; 21.406 +#endif 21.407 +} 21.408 + 21.409 +static void skb_drop_fraglist(struct sk_buff *skb) 21.410 +{ 21.411 + struct sk_buff *list = skb_shinfo(skb)->frag_list; 21.412 + 21.413 + skb_shinfo(skb)->frag_list = NULL; 21.414 + 21.415 + do { 21.416 + struct sk_buff *this = list; 21.417 + list = list->next; 21.418 + kfree_skb(this); 21.419 + } while (list); 21.420 +} 21.421 + 21.422 +static void skb_clone_fraglist(struct sk_buff *skb) 21.423 +{ 21.424 + struct sk_buff *list; 21.425 + 21.426 + for (list = skb_shinfo(skb)->frag_list; list; list=list->next) 21.427 + skb_get(list); 21.428 +} 21.429 + 21.430 +static void skb_release_data(struct sk_buff *skb) 21.431 +{ 21.432 + if (!skb->cloned || 21.433 + atomic_dec_and_test(&(skb_shinfo(skb)->dataref))) { 21.434 + if (skb_shinfo(skb)->nr_frags) { 21.435 + int i; 21.436 + for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) 21.437 +{ 21.438 + put_page(skb_shinfo(skb)->frags[i].page); 21.439 +} 21.440 + } 21.441 + 21.442 + if (skb_shinfo(skb)->frag_list) 21.443 + skb_drop_fraglist(skb); 21.444 + 21.445 + if (skb->skb_type == SKB_NORMAL) 21.446 + { 21.447 + kfree(skb->head); 21.448 + } else {// SKB_ZERO_COPY 21.449 + free_net_page(skb->net_page); 21.450 + } 21.451 + } 21.452 + 21.453 +} 21.454 + 21.455 +/* 21.456 + * Free an skbuff by memory without cleaning the state. 21.457 + */ 21.458 +void kfree_skbmem(struct sk_buff *skb) 21.459 +{ 21.460 + skb_release_data(skb); 21.461 + skb_head_to_pool(skb); 21.462 +} 21.463 + 21.464 +/** 21.465 + * __kfree_skb - private function 21.466 + * @skb: buffer 21.467 + * 21.468 + * Free an sk_buff. Release anything attached to the buffer. 21.469 + * Clean the state. This is an internal helper function. Users should 21.470 + * always call kfree_skb 21.471 + */ 21.472 + 21.473 +void __kfree_skb(struct sk_buff *skb) 21.474 +{ 21.475 + if (skb->list) { 21.476 + printk(KERN_WARNING "Warning: kfree_skb passed an skb still " 21.477 + "on a list (from %p).\n", NET_CALLER(skb)); 21.478 + BUG(); 21.479 + } 21.480 + 21.481 + dst_release(skb->dst); 21.482 + if(skb->destructor) { 21.483 + if (in_irq()) { 21.484 + printk(KERN_WARNING "Warning: kfree_skb on hard IRQ %p\n", 21.485 + NET_CALLER(skb)); 21.486 + } 21.487 + skb->destructor(skb); 21.488 + } 21.489 +#ifdef CONFIG_NETFILTER 21.490 + nf_conntrack_put(skb->nfct); 21.491 +#endif 21.492 + skb_headerinit(skb, NULL, 0); /* clean state */ 21.493 + kfree_skbmem(skb); 21.494 +} 21.495 + 21.496 +/** 21.497 + * skb_clone - duplicate an sk_buff 21.498 + * @skb: buffer to clone 21.499 + * @gfp_mask: allocation priority 21.500 + * 21.501 + * Duplicate an &sk_buff. The new one is not owned by a socket. Both 21.502 + * copies share the same packet data but not structure. The new 21.503 + * buffer has a reference count of 1. If the allocation fails the 21.504 + * function returns %NULL otherwise the new buffer is returned. 21.505 + * 21.506 + * If this function is called from an interrupt gfp_mask() must be 21.507 + * %GFP_ATOMIC. 21.508 + */ 21.509 + 21.510 +struct sk_buff *skb_clone(struct sk_buff *skb, int gfp_mask) 21.511 +{ 21.512 + struct sk_buff *n; 21.513 + 21.514 + n = skb_head_from_pool(); 21.515 + if (!n) { 21.516 + n = kmem_cache_alloc(skbuff_head_cache, gfp_mask); 21.517 + if (!n) 21.518 + return NULL; 21.519 + } 21.520 + 21.521 +#define C(x) n->x = skb->x 21.522 + 21.523 + n->next = n->prev = NULL; 21.524 + n->list = NULL; 21.525 + n->sk = NULL; 21.526 + C(stamp); 21.527 + C(dev); 21.528 + C(h); 21.529 + C(nh); 21.530 + C(mac); 21.531 + C(dst); 21.532 + dst_clone(n->dst); 21.533 + memcpy(n->cb, skb->cb, sizeof(skb->cb)); 21.534 + C(len); 21.535 + C(data_len); 21.536 + C(csum); 21.537 + n->cloned = 1; 21.538 + C(pkt_type); 21.539 + C(ip_summed); 21.540 + C(priority); 21.541 + atomic_set(&n->users, 1); 21.542 + C(protocol); 21.543 + C(security); 21.544 + C(truesize); 21.545 + C(head); 21.546 + C(data); 21.547 + C(tail); 21.548 + C(end); 21.549 + n->destructor = NULL; 21.550 +#ifdef CONFIG_NETFILTER 21.551 + C(nfmark); 21.552 + C(nfcache); 21.553 + C(nfct); 21.554 +#ifdef CONFIG_NETFILTER_DEBUG 21.555 + C(nf_debug); 21.556 +#endif 21.557 +#endif /*CONFIG_NETFILTER*/ 21.558 +#if defined(CONFIG_HIPPI) 21.559 + C(private); 21.560 +#endif 21.561 +#ifdef CONFIG_NET_SCHED 21.562 + C(tc_index); 21.563 +#endif 21.564 + C(skb_type); 21.565 + C(net_page); 21.566 + atomic_inc(&(skb_shinfo(skb)->dataref)); 21.567 + skb->cloned = 1; 21.568 +#ifdef CONFIG_NETFILTER 21.569 + nf_conntrack_get(skb->nfct); 21.570 +#endif 21.571 + return n; 21.572 +} 21.573 + 21.574 +static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old) 21.575 +{ 21.576 + /* 21.577 + * Shift between the two data areas in bytes 21.578 + */ 21.579 + unsigned long offset = new->data - old->data; 21.580 + 21.581 + new->list=NULL; 21.582 + new->sk=NULL; 21.583 + new->dev=old->dev; 21.584 + new->priority=old->priority; 21.585 + new->protocol=old->protocol; 21.586 + new->dst=dst_clone(old->dst); 21.587 + new->h.raw=old->h.raw+offset; 21.588 + new->nh.raw=old->nh.raw+offset; 21.589 + new->mac.raw=old->mac.raw+offset; 21.590 + memcpy(new->cb, old->cb, sizeof(old->cb)); 21.591 + atomic_set(&new->users, 1); 21.592 + new->pkt_type=old->pkt_type; 21.593 + new->stamp=old->stamp; 21.594 + new->destructor = NULL; 21.595 + new->security=old->security; 21.596 +#ifdef CONFIG_NETFILTER 21.597 + new->nfmark=old->nfmark; 21.598 + new->nfcache=old->nfcache; 21.599 + new->nfct=old->nfct; 21.600 + nf_conntrack_get(new->nfct); 21.601 +#ifdef CONFIG_NETFILTER_DEBUG 21.602 + new->nf_debug=old->nf_debug; 21.603 +#endif 21.604 +#endif 21.605 +#ifdef CONFIG_NET_SCHED 21.606 + new->tc_index = old->tc_index; 21.607 +#endif 21.608 +} 21.609 + 21.610 +/** 21.611 + * skb_copy - create private copy of an sk_buff 21.612 + * @skb: buffer to copy 21.613 + * @gfp_mask: allocation priority 21.614 + * 21.615 + * Make a copy of both an &sk_buff and its data. This is used when the 21.616 + * caller wishes to modify the data and needs a private copy of the 21.617 + * data to alter. Returns %NULL on failure or the pointer to the buffer 21.618 + * on success. The returned buffer has a reference count of 1. 21.619 + * 21.620 + * As by-product this function converts non-linear &sk_buff to linear 21.621 + * one, so that &sk_buff becomes completely private and caller is allowed 21.622 + * to modify all the data of returned buffer. This means that this 21.623 + * function is not recommended for use in circumstances when only 21.624 + * header is going to be modified. Use pskb_copy() instead. 21.625 + */ 21.626 + 21.627 +struct sk_buff *skb_copy(const struct sk_buff *skb, int gfp_mask) 21.628 +{ 21.629 + struct sk_buff *n; 21.630 + int headerlen = skb->data-skb->head; 21.631 + 21.632 + /* 21.633 + * Allocate the copy buffer 21.634 + */ 21.635 + n=alloc_skb(skb->end - skb->head + skb->data_len, gfp_mask); 21.636 + if(n==NULL) 21.637 + return NULL; 21.638 + 21.639 + /* Set the data pointer */ 21.640 + skb_reserve(n,headerlen); 21.641 + /* Set the tail pointer and length */ 21.642 + skb_put(n,skb->len); 21.643 + n->csum = skb->csum; 21.644 + n->ip_summed = skb->ip_summed; 21.645 + 21.646 + if (skb_copy_bits(skb, -headerlen, n->head, headerlen+skb->len)) 21.647 + BUG(); 21.648 + 21.649 + copy_skb_header(n, skb); 21.650 + 21.651 + return n; 21.652 +} 21.653 + 21.654 +/* Keep head the same: replace data */ 21.655 +int skb_linearize(struct sk_buff *skb, int gfp_mask) 21.656 +{ 21.657 + unsigned int size; 21.658 + u8 *data; 21.659 + long offset; 21.660 + int headerlen = skb->data - skb->head; 21.661 + int expand = (skb->tail+skb->data_len) - skb->end; 21.662 + 21.663 + if (skb_shared(skb)) 21.664 + BUG(); 21.665 + 21.666 + if (expand <= 0) 21.667 + expand = 0; 21.668 + 21.669 + size = (skb->end - skb->head + expand); 21.670 + size = SKB_DATA_ALIGN(size); 21.671 + data = kmalloc(size + sizeof(struct skb_shared_info), gfp_mask); 21.672 + if (data == NULL) 21.673 + return -ENOMEM; 21.674 + 21.675 + /* Copy entire thing */ 21.676 + if (skb_copy_bits(skb, -headerlen, data, headerlen+skb->len)) 21.677 + BUG(); 21.678 + 21.679 + /* Offset between the two in bytes */ 21.680 + offset = data - skb->head; 21.681 + 21.682 + /* Free old data. */ 21.683 + skb_release_data(skb); 21.684 + 21.685 + skb->head = data; 21.686 + skb->end = data + size; 21.687 + 21.688 + /* Set up new pointers */ 21.689 + skb->h.raw += offset; 21.690 + skb->nh.raw += offset; 21.691 + skb->mac.raw += offset; 21.692 + skb->tail += offset; 21.693 + skb->data += offset; 21.694 + 21.695 + /* Set up shinfo */ 21.696 + atomic_set(&(skb_shinfo(skb)->dataref), 1); 21.697 + skb_shinfo(skb)->nr_frags = 0; 21.698 + skb_shinfo(skb)->frag_list = NULL; 21.699 + 21.700 + /* We are no longer a clone, even if we were. */ 21.701 + skb->cloned = 0; 21.702 + 21.703 + skb->tail += skb->data_len; 21.704 + skb->data_len = 0; 21.705 + return 0; 21.706 +} 21.707 + 21.708 + 21.709 +/** 21.710 + * pskb_copy - create copy of an sk_buff with private head. 21.711 + * @skb: buffer to copy 21.712 + * @gfp_mask: allocation priority 21.713 + * 21.714 + * Make a copy of both an &sk_buff and part of its data, located 21.715 + * in header. Fragmented data remain shared. This is used when 21.716 + * the caller wishes to modify only header of &sk_buff and needs 21.717 + * private copy of the header to alter. Returns %NULL on failure 21.718 + * or the pointer to the buffer on success. 21.719 + * The returned buffer has a reference count of 1. 21.720 + */ 21.721 + 21.722 +struct sk_buff *pskb_copy(struct sk_buff *skb, int gfp_mask) 21.723 +{ 21.724 + struct sk_buff *n; 21.725 + 21.726 + /* 21.727 + * Allocate the copy buffer 21.728 + */ 21.729 + n=alloc_skb(skb->end - skb->head, gfp_mask); 21.730 + if(n==NULL) 21.731 + return NULL; 21.732 + 21.733 + /* Set the data pointer */ 21.734 + skb_reserve(n,skb->data-skb->head); 21.735 + /* Set the tail pointer and length */ 21.736 + skb_put(n,skb_headlen(skb)); 21.737 + /* Copy the bytes */ 21.738 + memcpy(n->data, skb->data, n->len); 21.739 + n->csum = skb->csum; 21.740 + n->ip_summed = skb->ip_summed; 21.741 + 21.742 + n->data_len = skb->data_len; 21.743 + n->len = skb->len; 21.744 + 21.745 + if (skb_shinfo(skb)->nr_frags) { 21.746 + int i; 21.747 + 21.748 + for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { 21.749 + skb_shinfo(n)->frags[i] = skb_shinfo(skb)->frags[i]; 21.750 + get_page(skb_shinfo(n)->frags[i].page); 21.751 + } 21.752 + skb_shinfo(n)->nr_frags = i; 21.753 + } 21.754 + 21.755 + if (skb_shinfo(skb)->frag_list) { 21.756 + skb_shinfo(n)->frag_list = skb_shinfo(skb)->frag_list; 21.757 + skb_clone_fraglist(n); 21.758 + } 21.759 + 21.760 + copy_skb_header(n, skb); 21.761 + 21.762 + return n; 21.763 +} 21.764 + 21.765 +/** 21.766 + * pskb_expand_head - reallocate header of &sk_buff 21.767 + * @skb: buffer to reallocate 21.768 + * @nhead: room to add at head 21.769 + * @ntail: room to add at tail 21.770 + * @gfp_mask: allocation priority 21.771 + * 21.772 + * Expands (or creates identical copy, if &nhead and &ntail are zero) 21.773 + * header of skb. &sk_buff itself is not changed. &sk_buff MUST have 21.774 + * reference count of 1. Returns zero in the case of success or error, 21.775 + * if expansion failed. In the last case, &sk_buff is not changed. 21.776 + * 21.777 + * All the pointers pointing into skb header may change and must be 21.778 + * reloaded after call to this function. 21.779 + */ 21.780 + 21.781 +int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail, int gfp_mask) 21.782 +{ 21.783 + int i; 21.784 + u8 *data; 21.785 + int size = nhead + (skb->end - skb->head) + ntail; 21.786 + long off; 21.787 + 21.788 + if (skb_shared(skb)) 21.789 + BUG(); 21.790 + 21.791 + size = SKB_DATA_ALIGN(size); 21.792 + 21.793 + data = kmalloc(size + sizeof(struct skb_shared_info), gfp_mask); 21.794 + if (data == NULL) 21.795 + goto nodata; 21.796 + 21.797 + /* Copy only real data... and, alas, header. This should be 21.798 + * optimized for the cases when header is void. */ 21.799 + memcpy(data+nhead, skb->head, skb->tail-skb->head); 21.800 + memcpy(data+size, skb->end, sizeof(struct skb_shared_info)); 21.801 + 21.802 + for (i=0; i<skb_shinfo(skb)->nr_frags; i++) 21.803 + get_page(skb_shinfo(skb)->frags[i].page); 21.804 + 21.805 + if (skb_shinfo(skb)->frag_list) 21.806 + skb_clone_fraglist(skb); 21.807 + 21.808 + skb_release_data(skb); 21.809 + 21.810 + off = (data+nhead) - skb->head; 21.811 + 21.812 + skb->head = data; 21.813 + skb->end = data+size; 21.814 + 21.815 + skb->data += off; 21.816 + skb->tail += off; 21.817 + skb->mac.raw += off; 21.818 + skb->h.raw += off; 21.819 + skb->nh.raw += off; 21.820 + skb->cloned = 0; 21.821 + atomic_set(&skb_shinfo(skb)->dataref, 1); 21.822 + return 0; 21.823 + 21.824 +nodata: 21.825 + return -ENOMEM; 21.826 +} 21.827 + 21.828 +/* Make private copy of skb with writable head and some headroom */ 21.829 + 21.830 +struct sk_buff * 21.831 +skb_realloc_headroom(struct sk_buff *skb, unsigned int headroom) 21.832 +{ 21.833 + struct sk_buff *skb2; 21.834 + int delta = headroom - skb_headroom(skb); 21.835 + 21.836 + if (delta <= 0) 21.837 + return pskb_copy(skb, GFP_ATOMIC); 21.838 + 21.839 + skb2 = skb_clone(skb, GFP_ATOMIC); 21.840 + if (skb2 == NULL || 21.841 + !pskb_expand_head(skb2, SKB_DATA_ALIGN(delta), 0, GFP_ATOMIC)) 21.842 + return skb2; 21.843 + 21.844 + kfree_skb(skb2); 21.845 + return NULL; 21.846 +} 21.847 + 21.848 + 21.849 +/** 21.850 + * skb_copy_expand - copy and expand sk_buff 21.851 + * @skb: buffer to copy 21.852 + * @newheadroom: new free bytes at head 21.853 + * @newtailroom: new free bytes at tail 21.854 + * @gfp_mask: allocation priority 21.855 + * 21.856 + * Make a copy of both an &sk_buff and its data and while doing so 21.857 + * allocate additional space. 21.858 + * 21.859 + * This is used when the caller wishes to modify the data and needs a 21.860 + * private copy of the data to alter as well as more space for new fields. 21.861 + * Returns %NULL on failure or the pointer to the buffer 21.862 + * on success. The returned buffer has a reference count of 1. 21.863 + * 21.864 + * You must pass %GFP_ATOMIC as the allocation priority if this function 21.865 + * is called from an interrupt. 21.866 + */ 21.867 + 21.868 + 21.869 +struct sk_buff *skb_copy_expand(const struct sk_buff *skb, 21.870 + int newheadroom, 21.871 + int newtailroom, 21.872 + int gfp_mask) 21.873 +{ 21.874 + struct sk_buff *n; 21.875 + 21.876 + /* 21.877 + * Allocate the copy buffer 21.878 + */ 21.879 + 21.880 + n=alloc_skb(newheadroom + skb->len + newtailroom, 21.881 + gfp_mask); 21.882 + if(n==NULL) 21.883 + return NULL; 21.884 + 21.885 + skb_reserve(n,newheadroom); 21.886 + 21.887 + /* Set the tail pointer and length */ 21.888 + skb_put(n,skb->len); 21.889 + 21.890 + /* Copy the data only. */ 21.891 + if (skb_copy_bits(skb, 0, n->data, skb->len)) 21.892 + BUG(); 21.893 + 21.894 + copy_skb_header(n, skb); 21.895 + return n; 21.896 +} 21.897 + 21.898 +/* Trims skb to length len. It can change skb pointers, if "realloc" is 1. 21.899 + * If realloc==0 and trimming is impossible without change of data, 21.900 + * it is BUG(). 21.901 + */ 21.902 + 21.903 +int ___pskb_trim(struct sk_buff *skb, unsigned int len, int realloc) 21.904 +{ 21.905 + int offset = skb_headlen(skb); 21.906 + int nfrags = skb_shinfo(skb)->nr_frags; 21.907 + int i; 21.908 + 21.909 + for (i=0; i<nfrags; i++) { 21.910 + int end = offset + skb_shinfo(skb)->frags[i].size; 21.911 + if (end > len) { 21.912 + if (skb_cloned(skb)) { 21.913 + if (!realloc) 21.914 + BUG(); 21.915 + if (!pskb_expand_head(skb, 0, 0, GFP_ATOMIC)) 21.916 + return -ENOMEM; 21.917 + } 21.918 + if (len <= offset) { 21.919 + put_page(skb_shinfo(skb)->frags[i].page); 21.920 + skb_shinfo(skb)->nr_frags--; 21.921 + } else { 21.922 + skb_shinfo(skb)->frags[i].size = len-offset; 21.923 + } 21.924 + } 21.925 + offset = end; 21.926 + } 21.927 + 21.928 + if (offset < len) { 21.929 + skb->data_len -= skb->len - len; 21.930 + skb->len = len; 21.931 + } else { 21.932 + if (len <= skb_headlen(skb)) { 21.933 + skb->len = len; 21.934 + skb->data_len = 0; 21.935 + skb->tail = skb->data + len; 21.936 + if (skb_shinfo(skb)->frag_list && !skb_cloned(skb)) 21.937 + skb_drop_fraglist(skb); 21.938 + } else { 21.939 + skb->data_len -= skb->len - len; 21.940 + skb->len = len; 21.941 + } 21.942 + } 21.943 + 21.944 + return 0; 21.945 +} 21.946 + 21.947 +/** 21.948 + * __pskb_pull_tail - advance tail of skb header 21.949 + * @skb: buffer to reallocate 21.950 + * @delta: number of bytes to advance tail 21.951 + * 21.952 + * The function makes a sense only on a fragmented &sk_buff, 21.953 + * it expands header moving its tail forward and copying necessary 21.954 + * data from fragmented part. 21.955 + * 21.956 + * &sk_buff MUST have reference count of 1. 21.957 + * 21.958 + * Returns %NULL (and &sk_buff does not change) if pull failed 21.959 + * or value of new tail of skb in the case of success. 21.960 + * 21.961 + * All the pointers pointing into skb header may change and must be 21.962 + * reloaded after call to this function. 21.963 + */ 21.964 + 21.965 +/* Moves tail of skb head forward, copying data from fragmented part, 21.966 + * when it is necessary. 21.967 + * 1. It may fail due to malloc failure. 21.968 + * 2. It may change skb pointers. 21.969 + * 21.970 + * It is pretty complicated. Luckily, it is called only in exceptional cases. 21.971 + */ 21.972 +unsigned char * __pskb_pull_tail(struct sk_buff *skb, int delta) 21.973 +{ 21.974 + int i, k, eat; 21.975 + 21.976 + /* If skb has not enough free space at tail, get new one 21.977 + * plus 128 bytes for future expansions. If we have enough 21.978 + * room at tail, reallocate without expansion only if skb is cloned. 21.979 + */ 21.980 + eat = (skb->tail+delta) - skb->end; 21.981 + 21.982 + if (eat > 0 || skb_cloned(skb)) { 21.983 + if (pskb_expand_head(skb, 0, eat>0 ? eat+128 : 0, GFP_ATOMIC)) 21.984 + return NULL; 21.985 + } 21.986 + 21.987 + if (skb_copy_bits(skb, skb_headlen(skb), skb->tail, delta)) 21.988 + BUG(); 21.989 + 21.990 + /* Optimization: no fragments, no reasons to preestimate 21.991 + * size of pulled pages. Superb. 21.992 + */ 21.993 + if (skb_shinfo(skb)->frag_list == NULL) 21.994 + goto pull_pages; 21.995 + 21.996 + /* Estimate size of pulled pages. */ 21.997 + eat = delta; 21.998 + for (i=0; i<skb_shinfo(skb)->nr_frags; i++) { 21.999 + if (skb_shinfo(skb)->frags[i].size >= eat) 21.1000 + goto pull_pages; 21.1001 + eat -= skb_shinfo(skb)->frags[i].size; 21.1002 + } 21.1003 + 21.1004 + /* If we need update frag list, we are in troubles. 21.1005 + * Certainly, it possible to add an offset to skb data, 21.1006 + * but taking into account that pulling is expected to 21.1007 + * be very rare operation, it is worth to fight against 21.1008 + * further bloating skb head and crucify ourselves here instead. 21.1009 + * Pure masohism, indeed. 8)8) 21.1010 + */ 21.1011 + if (eat) { 21.1012 + struct sk_buff *list = skb_shinfo(skb)->frag_list; 21.1013 + struct sk_buff *clone = NULL; 21.1014 + struct sk_buff *insp = NULL; 21.1015 + 21.1016 + do { 21.1017 + if (list == NULL) 21.1018 + BUG(); 21.1019 + 21.1020 + if (list->len <= eat) { 21.1021 + /* Eaten as whole. */ 21.1022 + eat -= list->len; 21.1023 + list = list->next; 21.1024 + insp = list; 21.1025 + } else { 21.1026 + /* Eaten partially. */ 21.1027 + 21.1028 + if (skb_shared(list)) { 21.1029 + /* Sucks! We need to fork list. :-( */ 21.1030 + clone = skb_clone(list, GFP_ATOMIC); 21.1031 + if (clone == NULL) 21.1032 + return NULL; 21.1033 + insp = list->next; 21.1034 + list = clone; 21.1035 + } else { 21.1036 + /* This may be pulled without 21.1037 + * problems. */ 21.1038 + insp = list; 21.1039 + } 21.1040 + if (pskb_pull(list, eat) == NULL) { 21.1041 + if (clone) 21.1042 + kfree_skb(clone); 21.1043 + return NULL; 21.1044 + } 21.1045 + break; 21.1046 + } 21.1047 + } while (eat); 21.1048 + 21.1049 + /* Free pulled out fragments. */ 21.1050 + while ((list = skb_shinfo(skb)->frag_list) != insp) { 21.1051 + skb_shinfo(skb)->frag_list = list->next; 21.1052 + kfree_skb(list); 21.1053 + } 21.1054 + /* And insert new clone at head. */ 21.1055 + if (clone) { 21.1056 + clone->next = list; 21.1057 + skb_shinfo(skb)->frag_list = clone; 21.1058 + } 21.1059 + } 21.1060 + /* Success! Now we may commit changes to skb data. */ 21.1061 + 21.1062 +pull_pages: 21.1063 + eat = delta; 21.1064 + k = 0; 21.1065 + for (i=0; i<skb_shinfo(skb)->nr_frags; i++) { 21.1066 + if (skb_shinfo(skb)->frags[i].size <= eat) { 21.1067 + put_page(skb_shinfo(skb)->frags[i].page); 21.1068 + eat -= skb_shinfo(skb)->frags[i].size; 21.1069 + } else { 21.1070 + skb_shinfo(skb)->frags[k] = skb_shinfo(skb)->frags[i]; 21.1071 + if (eat) { 21.1072 + skb_shinfo(skb)->frags[k].page_offset += eat; 21.1073 + skb_shinfo(skb)->frags[k].size -= eat; 21.1074 + eat = 0; 21.1075 + } 21.1076 + k++; 21.1077 + } 21.1078 + } 21.1079 + skb_shinfo(skb)->nr_frags = k; 21.1080 + 21.1081 + skb->tail += delta; 21.1082 + skb->data_len -= delta; 21.1083 + 21.1084 + return skb->tail; 21.1085 +} 21.1086 + 21.1087 +/* Copy some data bits from skb to kernel buffer. */ 21.1088 + 21.1089 +int skb_copy_bits(const struct sk_buff *skb, int offset, void *to, int len) 21.1090 +{ 21.1091 + int i, copy; 21.1092 + int start = skb->len - skb->data_len; 21.1093 + 21.1094 + if (offset > (int)skb->len-len) 21.1095 + goto fault; 21.1096 + 21.1097 + /* Copy header. */ 21.1098 + if ((copy = start-offset) > 0) { 21.1099 + if (copy > len) 21.1100 + copy = len; 21.1101 + memcpy(to, skb->data + offset, copy); 21.1102 + if ((len -= copy) == 0) 21.1103 + return 0; 21.1104 + offset += copy; 21.1105 + to += copy; 21.1106 + } 21.1107 + 21.1108 + for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { 21.1109 + int end; 21.1110 + 21.1111 + BUG_TRAP(start <= offset+len); 21.1112 + 21.1113 + end = start + skb_shinfo(skb)->frags[i].size; 21.1114 + if ((copy = end-offset) > 0) { 21.1115 + u8 *vaddr; 21.1116 + 21.1117 + if (copy > len) 21.1118 + copy = len; 21.1119 + 21.1120 + vaddr = kmap_skb_frag(&skb_shinfo(skb)->frags[i]); 21.1121 + memcpy(to, vaddr+skb_shinfo(skb)->frags[i].page_offset+ 21.1122 + offset-start, copy); 21.1123 + kunmap_skb_frag(vaddr); 21.1124 + 21.1125 + if ((len -= copy) == 0) 21.1126 + return 0; 21.1127 + offset += copy; 21.1128 + to += copy; 21.1129 + } 21.1130 + start = end; 21.1131 + } 21.1132 + 21.1133 + if (skb_shinfo(skb)->frag_list) { 21.1134 + struct sk_buff *list; 21.1135 + 21.1136 + for (list = skb_shinfo(skb)->frag_list; list; list=list->next) { 21.1137 + int end; 21.1138 + 21.1139 + BUG_TRAP(start <= offset+len); 21.1140 + 21.1141 + end = start + list->len; 21.1142 + if ((copy = end-offset) > 0) { 21.1143 + if (copy > len) 21.1144 + copy = len; 21.1145 + if (skb_copy_bits(list, offset-start, to, copy)) 21.1146 + goto fault; 21.1147 + if ((len -= copy) == 0) 21.1148 + return 0; 21.1149 + offset += copy; 21.1150 + to += copy; 21.1151 + } 21.1152 + start = end; 21.1153 + } 21.1154 + } 21.1155 + if (len == 0) 21.1156 + return 0; 21.1157 + 21.1158 +fault: 21.1159 + return -EFAULT; 21.1160 +} 21.1161 + 21.1162 +/* Checksum skb data. */ 21.1163 + 21.1164 +unsigned int skb_checksum(const struct sk_buff *skb, int offset, int len, unsigned int csum) 21.1165 +{ 21.1166 + int i, copy; 21.1167 + int start = skb->len - skb->data_len; 21.1168 + int pos = 0; 21.1169 + 21.1170 + /* Checksum header. */ 21.1171 + if ((copy = start-offset) > 0) { 21.1172 + if (copy > len) 21.1173 + copy = len; 21.1174 + csum = csum_partial(skb->data+offset, copy, csum); 21.1175 + if ((len -= copy) == 0) 21.1176 + return csum; 21.1177 + offset += copy; 21.1178 + pos = copy; 21.1179 + } 21.1180 + 21.1181 + for (i=0; i<skb_shinfo(skb)->nr_frags; i++) { 21.1182 + int end; 21.1183 + 21.1184 + BUG_TRAP(start <= offset+len); 21.1185 + 21.1186 + end = start + skb_shinfo(skb)->frags[i].size; 21.1187 + if ((copy = end-offset) > 0) { 21.1188 + unsigned int csum2; 21.1189 + u8 *vaddr; 21.1190 + skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; 21.1191 + 21.1192 + if (copy > len) 21.1193 + copy = len; 21.1194 + vaddr = kmap_skb_frag(frag); 21.1195 + csum2 = csum_partial(vaddr + frag->page_offset + 21.1196 + offset-start, copy, 0); 21.1197 + kunmap_skb_frag(vaddr); 21.1198 + csum = csum_block_add(csum, csum2, pos); 21.1199 + if (!(len -= copy)) 21.1200 + return csum; 21.1201 + offset += copy; 21.1202 + pos += copy; 21.1203 + } 21.1204 + start = end; 21.1205 + } 21.1206 + 21.1207 + if (skb_shinfo(skb)->frag_list) { 21.1208 + struct sk_buff *list; 21.1209 + 21.1210 + for (list = skb_shinfo(skb)->frag_list; list; list=list->next) { 21.1211 + int end; 21.1212 + 21.1213 + BUG_TRAP(start <= offset+len); 21.1214 + 21.1215 + end = start + list->len; 21.1216 + if ((copy = end-offset) > 0) { 21.1217 + unsigned int csum2; 21.1218 + if (copy > len) 21.1219 + copy = len; 21.1220 + csum2 = skb_checksum(list, offset-start, copy, 0); 21.1221 + csum = csum_block_add(csum, csum2, pos); 21.1222 + if ((len -= copy) == 0) 21.1223 + return csum; 21.1224 + offset += copy; 21.1225 + pos += copy; 21.1226 + } 21.1227 + start = end; 21.1228 + } 21.1229 + } 21.1230 + if (len == 0) 21.1231 + return csum; 21.1232 + 21.1233 + BUG(); 21.1234 + return csum; 21.1235 +} 21.1236 + 21.1237 +/* Both of above in one bottle. */ 21.1238 + 21.1239 +unsigned int skb_copy_and_csum_bits(const struct sk_buff *skb, int offset, u8 *to, int len, unsigned int csum) 21.1240 +{ 21.1241 + int i, copy; 21.1242 + int start = skb->len - skb->data_len; 21.1243 + int pos = 0; 21.1244 + 21.1245 + /* Copy header. */ 21.1246 + if ((copy = start-offset) > 0) { 21.1247 + if (copy > len) 21.1248 + copy = len; 21.1249 + csum = csum_partial_copy_nocheck(skb->data+offset, to, copy, csum); 21.1250 + if ((len -= copy) == 0) 21.1251 + return csum; 21.1252 + offset += copy; 21.1253 + to += copy; 21.1254 + pos = copy; 21.1255 + } 21.1256 + 21.1257 + for (i=0; i<skb_shinfo(skb)->nr_frags; i++) { 21.1258 + int end; 21.1259 + 21.1260 + BUG_TRAP(start <= offset+len); 21.1261 + 21.1262 + end = start + skb_shinfo(skb)->frags[i].size; 21.1263 + if ((copy = end-offset) > 0) { 21.1264 + unsigned int csum2; 21.1265 + u8 *vaddr; 21.1266 + skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; 21.1267 + 21.1268 + if (copy > len) 21.1269 + copy = len; 21.1270 + vaddr = kmap_skb_frag(frag); 21.1271 + csum2 = csum_partial_copy_nocheck(vaddr + frag->page_offset + 21.1272 + offset-start, to, copy, 0); 21.1273 + kunmap_skb_frag(vaddr); 21.1274 + csum = csum_block_add(csum, csum2, pos); 21.1275 + if (!(len -= copy)) 21.1276 + return csum; 21.1277 + offset += copy; 21.1278 + to += copy; 21.1279 + pos += copy; 21.1280 + } 21.1281 + start = end; 21.1282 + } 21.1283 + 21.1284 + if (skb_shinfo(skb)->frag_list) { 21.1285 + struct sk_buff *list; 21.1286 + 21.1287 + for (list = skb_shinfo(skb)->frag_list; list; list=list->next) { 21.1288 + unsigned int csum2; 21.1289 + int end; 21.1290 + 21.1291 + BUG_TRAP(start <= offset+len); 21.1292 + 21.1293 + end = start + list->len; 21.1294 + if ((copy = end-offset) > 0) { 21.1295 + if (copy > len) 21.1296 + copy = len; 21.1297 + csum2 = skb_copy_and_csum_bits(list, offset-start, to, copy, 0); 21.1298 + csum = csum_block_add(csum, csum2, pos); 21.1299 + if ((len -= copy) == 0) 21.1300 + return csum; 21.1301 + offset += copy; 21.1302 + to += copy; 21.1303 + pos += copy; 21.1304 + } 21.1305 + start = end; 21.1306 + } 21.1307 + } 21.1308 + if (len == 0) 21.1309 + return csum; 21.1310 + 21.1311 + BUG(); 21.1312 + return csum; 21.1313 +} 21.1314 + 21.1315 +void skb_copy_and_csum_dev(const struct sk_buff *skb, u8 *to) 21.1316 +{ 21.1317 + unsigned int csum; 21.1318 + long csstart; 21.1319 + 21.1320 + if (skb->ip_summed == CHECKSUM_HW) 21.1321 + csstart = skb->h.raw - skb->data; 21.1322 + else 21.1323 + csstart = skb->len - skb->data_len; 21.1324 + 21.1325 + if (csstart > skb->len - skb->data_len) 21.1326 + BUG(); 21.1327 + 21.1328 + memcpy(to, skb->data, csstart); 21.1329 + 21.1330 + csum = 0; 21.1331 + if (csstart != skb->len) 21.1332 + csum = skb_copy_and_csum_bits(skb, csstart, to+csstart, 21.1333 + skb->len-csstart, 0); 21.1334 + 21.1335 + if (skb->ip_summed == CHECKSUM_HW) { 21.1336 + long csstuff = csstart + skb->csum; 21.1337 + 21.1338 + *((unsigned short *)(to + csstuff)) = csum_fold(csum); 21.1339 + } 21.1340 +} 21.1341 + 21.1342 +#if 0 21.1343 +/* 21.1344 + * Tune the memory allocator for a new MTU size. 21.1345 + */ 21.1346 +void skb_add_mtu(int mtu) 21.1347 +{ 21.1348 + /* Must match allocation in alloc_skb */ 21.1349 + mtu = SKB_DATA_ALIGN(mtu) + sizeof(struct skb_shared_info); 21.1350 + 21.1351 + kmem_add_cache_size(mtu); 21.1352 +} 21.1353 +#endif 21.1354 + 21.1355 +void __init skb_init(void) 21.1356 +{ 21.1357 + int i; 21.1358 + 21.1359 + skbuff_head_cache = kmem_cache_create("skbuff_head_cache", 21.1360 + sizeof(struct sk_buff), 21.1361 + 0, 21.1362 + SLAB_HWCACHE_ALIGN, 21.1363 + skb_headerinit, NULL); 21.1364 + if (!skbuff_head_cache) 21.1365 + panic("cannot create skbuff cache"); 21.1366 + 21.1367 + init_net_pages(NUM_NET_PAGES); 21.1368 + 21.1369 + for (i=0; i<NR_CPUS; i++) 21.1370 + skb_queue_head_init(&skb_head_pool[i].list); 21.1371 +}