direct-io.hg

changeset 2519:ab0042a6d724

bitkeeper revision 1.1159.1.167 (4151ef22u9Wl4yRwTMkDLTop-6XohA)

Merge ssh://xenbk@gandalf.hpl.hp.com//var/bk/xeno-unstable.bk
into labyrinth.cl.cam.ac.uk:/auto/anfs/scratch/labyrinth/iap10/xeno-clone/xeno.bk
author iap10@labyrinth.cl.cam.ac.uk
date Wed Sep 22 21:31:14 2004 +0000 (2004-09-22)
parents f7b2e90dac20 b1a31ec18454
children 0bd31a3188c2 a33e97958db8 28ffe4d8f416
files .rootkeys linux-2.4.27-xen-sparse/arch/xen/drivers/balloon/balloon.c linux-2.4.27-xen-sparse/include/linux/skbuff.h linux-2.4.27-xen-sparse/net/core/skbuff.c linux-2.6.8.1-xen-sparse/arch/xen/i386/kernel/entry.S linux-2.6.8.1-xen-sparse/arch/xen/i386/kernel/pci-dma.c linux-2.6.8.1-xen-sparse/arch/xen/i386/mm/hypervisor.c linux-2.6.8.1-xen-sparse/arch/xen/i386/mm/ioremap.c linux-2.6.8.1-xen-sparse/arch/xen/kernel/reboot.c linux-2.6.8.1-xen-sparse/arch/xen/kernel/skbuff.c linux-2.6.8.1-xen-sparse/drivers/xen/netback/netback.c linux-2.6.8.1-xen-sparse/drivers/xen/netfront/netfront.c linux-2.6.8.1-xen-sparse/include/linux/skbuff.h linux-2.6.8.1-xen-sparse/net/core/skbuff.c netbsd-2.0-xen-sparse/nbconfig-xen netbsd-2.0-xen-sparse/nbmake-xen netbsd-2.0-xen-sparse/sys/arch/xen/conf/XEN netbsd-2.0-xen-sparse/sys/arch/xen/i386/hypervisor_machdep.c netbsd-2.0-xen-sparse/sys/arch/xen/i386/locore.S netbsd-2.0-xen-sparse/sys/arch/xen/i386/vector.S netbsd-2.0-xen-sparse/sys/arch/xen/include/evtchn.h netbsd-2.0-xen-sparse/sys/arch/xen/include/hypervisor.h netbsd-2.0-xen-sparse/sys/arch/xen/include/xen.h netbsd-2.0-xen-sparse/sys/arch/xen/xen/clock.c netbsd-2.0-xen-sparse/sys/arch/xen/xen/evtchn.c xen/arch/x86/domain.c xen/common/kernel.c
line diff
     1.1 --- a/.rootkeys	Wed Sep 22 11:02:20 2004 +0000
     1.2 +++ b/.rootkeys	Wed Sep 22 21:31:14 2004 +0000
     1.3 @@ -126,6 +126,7 @@ 3e5a4e681xMPdF9xCMwpyfuYMySU5g linux-2.4
     1.4  409ba2e7akOFqQUg6Qyg2s28xcXiMg linux-2.4.27-xen-sparse/mm/page_alloc.c
     1.5  3e5a4e683HKVU-sxtagrDasRB8eBVw linux-2.4.27-xen-sparse/mm/swapfile.c
     1.6  41180721bNns9Na7w1nJ0ZVt8bhUNA linux-2.4.27-xen-sparse/mm/vmalloc.c
     1.7 +41505c57WAd5l1rlfCLNSCpx9J13vA linux-2.4.27-xen-sparse/net/core/skbuff.c
     1.8  413aa1d0oNP8HXLvfPuMe6cSroUfSA linux-2.6.8.1-patches/agpgart.patch
     1.9  413aa1d0ewvSv-ohnNnQQNGsbPTTNA linux-2.6.8.1-patches/drm.patch
    1.10  40f562372u3A7_kfbYYixPHJJxYUxA linux-2.6.8.1-xen-sparse/arch/xen/Kconfig
    1.11 @@ -259,6 +260,7 @@ 4124f66f4NaKNa0xPiGGykn9QaZk3w linux-2.6
    1.12  40f56a0ddHCSs3501MY4hRf22tctOw linux-2.6.8.1-xen-sparse/mkbuildtree
    1.13  412f46c0LJuKAgSPGoC0Z1DEkLfuLA linux-2.6.8.1-xen-sparse/mm/memory.c
    1.14  410a94a4KT6I6X0LVc7djB39tRDp4g linux-2.6.8.1-xen-sparse/mm/page_alloc.c
    1.15 +41505c572m-s9ATiO1LiD1GPznTTIg linux-2.6.8.1-xen-sparse/net/core/skbuff.c
    1.16  4149ec79wMpIHdvbntxqVGLRZZjPxw linux-2.6.8.1-xen-sparse/net/ipv4/raw.c
    1.17  413cb1e4zst25MDYjg63Y-NGC5_pLg netbsd-2.0-xen-sparse/Makefile
    1.18  413cb1e5c_Mkxf_X0zimEhTKI_l4DA netbsd-2.0-xen-sparse/mkbuildtree
     2.1 --- a/linux-2.4.27-xen-sparse/arch/xen/drivers/balloon/balloon.c	Wed Sep 22 11:02:20 2004 +0000
     2.2 +++ b/linux-2.4.27-xen-sparse/arch/xen/drivers/balloon/balloon.c	Wed Sep 22 21:31:14 2004 +0000
     2.3 @@ -81,8 +81,8 @@ static unsigned long inflate_balloon(uns
     2.4  
     2.5      for ( i = 0; i < num_pages; i++, currp++ )
     2.6      {
     2.7 -	struct page *page = alloc_page(GFP_HIGHUSER);
     2.8 -	unsigned long pfn = page - mem_map;
     2.9 +        struct page *page = alloc_page(GFP_HIGHUSER);
    2.10 +        unsigned long pfn = page - mem_map;
    2.11  
    2.12          /* If allocation fails then free all reserved pages. */
    2.13          if ( page == NULL )
    2.14 @@ -92,7 +92,7 @@ static unsigned long inflate_balloon(uns
    2.15              currp = parray;
    2.16              for ( j = 0; j < i; j++, currp++ )
    2.17                  __free_page((struct page *) (mem_map + *currp));
    2.18 -	    ret = -EFAULT;
    2.19 +            ret = -EFAULT;
    2.20              goto cleanup;
    2.21          }
    2.22  
    2.23 @@ -102,10 +102,10 @@ static unsigned long inflate_balloon(uns
    2.24  
    2.25      for ( i = 0, currp = parray; i < num_pages; i++, currp++ )
    2.26      {
    2.27 -	unsigned long mfn = phys_to_machine_mapping[*currp];
    2.28 +        unsigned long mfn = phys_to_machine_mapping[*currp];
    2.29          curraddr = (unsigned long)page_address(mem_map + *currp);
    2.30          /* Blow away page contents for security, and also p.t. ref if any. */
    2.31 -	if ( curraddr != 0 )
    2.32 +        if ( curraddr != 0 )
    2.33          {
    2.34              scrub_pages(curraddr, 1);
    2.35              queue_l1_entry_update(get_ptep(curraddr), 0);
    2.36 @@ -122,7 +122,8 @@ static unsigned long inflate_balloon(uns
    2.37          *currp = mfn;
    2.38      }
    2.39  
    2.40 -    XEN_flush_page_update_queue();
    2.41 +    /* Flush updates through and flush the TLB. */
    2.42 +    xen_tlb_flush();
    2.43  
    2.44      ret = HYPERVISOR_dom_mem_op(MEMOP_decrease_reservation, 
    2.45                                  parray, num_pages, 0);
    2.46 @@ -168,7 +169,7 @@ static unsigned long process_returned_pa
    2.47          {
    2.48              phys_to_machine_mapping[i] = *curr;
    2.49              queue_machphys_update(*curr, i);
    2.50 -	    if (i<max_low_pfn)
    2.51 +            if (i<max_low_pfn)
    2.52                queue_l1_entry_update(
    2.53                  get_ptep((unsigned long)__va(i << PAGE_SHIFT)),
    2.54                  ((*curr) << PAGE_SHIFT) | pgprot_val(PAGE_KERNEL));
    2.55 @@ -191,7 +192,7 @@ unsigned long deflate_balloon(unsigned l
    2.56      if ( num_pages > credit )
    2.57      {
    2.58          printk(KERN_ERR "deflate_balloon: %lu pages > %lu credit.\n",
    2.59 -			num_pages, credit);
    2.60 +               num_pages, credit);
    2.61          return -EAGAIN;
    2.62      }
    2.63  
    2.64 @@ -202,21 +203,19 @@ unsigned long deflate_balloon(unsigned l
    2.65          return 0;
    2.66      }
    2.67  
    2.68 -    XEN_flush_page_update_queue();
    2.69 -
    2.70      ret = HYPERVISOR_dom_mem_op(MEMOP_increase_reservation, 
    2.71                                  parray, num_pages, 0);
    2.72      if ( unlikely(ret != num_pages) )
    2.73      {
    2.74          printk(KERN_ERR "deflate_balloon: xen increase_reservation err %lx\n",
    2.75 -			ret);
    2.76 +               ret);
    2.77          goto cleanup;
    2.78      }
    2.79  
    2.80      if ( (ret = process_returned_pages(parray, num_pages)) < num_pages )
    2.81      {
    2.82          printk(KERN_WARNING
    2.83 -	   "deflate_balloon: restored only %lx of %lx pages.\n",
    2.84 +               "deflate_balloon: restored only %lx of %lx pages.\n",
    2.85             ret, num_pages);
    2.86          goto cleanup;
    2.87      }
    2.88 @@ -323,7 +322,6 @@ claim_new_pages(unsigned long num_pages)
    2.89          return 0;
    2.90      }
    2.91  
    2.92 -    XEN_flush_page_update_queue();
    2.93      new_page_cnt = HYPERVISOR_dom_mem_op(MEMOP_increase_reservation, 
    2.94                                  parray, num_pages, 0);
    2.95      if ( new_page_cnt != num_pages )
    2.96 @@ -332,48 +330,50 @@ claim_new_pages(unsigned long num_pages)
    2.97              "claim_new_pages: xen granted only %lu of %lu requested pages\n",
    2.98              new_page_cnt, num_pages);
    2.99  
   2.100 -	/* 
   2.101 -	 * Avoid xen lockup when user forgot to setdomainmaxmem. Xen
   2.102 -	 * usually can dribble out a few pages and then hangs.
   2.103 -	 */
   2.104 -	if ( new_page_cnt < 1000 )
   2.105 +        /* 
   2.106 +         * Avoid xen lockup when user forgot to setdomainmaxmem. Xen
   2.107 +         * usually can dribble out a few pages and then hangs.
   2.108 +         */
   2.109 +        if ( new_page_cnt < 1000 )
   2.110          {
   2.111              printk(KERN_WARNING "Remember to use setdomainmaxmem\n");
   2.112 -	    HYPERVISOR_dom_mem_op(MEMOP_decrease_reservation, 
   2.113 +            HYPERVISOR_dom_mem_op(MEMOP_decrease_reservation, 
   2.114                                  parray, new_page_cnt, 0);
   2.115              return -EFAULT;
   2.116 -	}
   2.117 +        }
   2.118      }
   2.119      memcpy(phys_to_machine_mapping+most_seen_pages, parray,
   2.120             new_page_cnt * sizeof(unsigned long));
   2.121  
   2.122      pagetable_extend(most_seen_pages,new_page_cnt);
   2.123  
   2.124 -    for (pfn = most_seen_pages, curr = parray;
   2.125 -	    pfn < most_seen_pages+new_page_cnt;
   2.126 -            pfn++, curr++ )
   2.127 +    for ( pfn = most_seen_pages, curr = parray;
   2.128 +          pfn < most_seen_pages+new_page_cnt;
   2.129 +          pfn++, curr++ )
   2.130      {
   2.131          struct page *page = mem_map + pfn;
   2.132  
   2.133  #ifndef CONFIG_HIGHMEM
   2.134 -	if (pfn>=max_low_pfn) {
   2.135 +        if ( pfn>=max_low_pfn )
   2.136 +        {
   2.137              printk(KERN_WARNING "Warning only %ldMB will be used.\n",
   2.138                 pfn>>PAGE_TO_MB_SHIFT);
   2.139              printk(KERN_WARNING "Use a HIGHMEM enabled kernel.\n");
   2.140 -	    break;
   2.141 -	}
   2.142 +            break;
   2.143 +        }
   2.144  #endif
   2.145 -	queue_machphys_update(*curr, pfn);
   2.146 -	XEN_flush_page_update_queue();
   2.147 -	if (pfn<max_low_pfn)  {
   2.148 -		queue_l1_entry_update(get_ptep((unsigned long)__va(pfn << PAGE_SHIFT)),
   2.149 -			((*curr) << PAGE_SHIFT) | pgprot_val(PAGE_KERNEL));
   2.150 -		XEN_flush_page_update_queue();
   2.151 -		}
   2.152 -
   2.153 +        queue_machphys_update(*curr, pfn);
   2.154 +        if ( pfn < max_low_pfn )
   2.155 +            queue_l1_entry_update(
   2.156 +                get_ptep((unsigned long)__va(pfn << PAGE_SHIFT)),
   2.157 +                ((*curr) << PAGE_SHIFT) | pgprot_val(PAGE_KERNEL));
   2.158 +        
   2.159 +        XEN_flush_page_update_queue();
   2.160 +        
   2.161          /* this next bit mimics arch/xen/mm/init.c:one_highpage_init() */
   2.162          ClearPageReserved(page);
   2.163 -        if (pfn>=max_low_pfn) set_bit(PG_highmem, &page->flags);
   2.164 +        if ( pfn >= max_low_pfn )
   2.165 +            set_bit(PG_highmem, &page->flags);
   2.166          set_page_count(page, 1);
   2.167          __free_page(page);
   2.168      }
   2.169 @@ -395,9 +395,8 @@ static int balloon_write(struct file *fi
   2.170      if ( !capable(CAP_SYS_ADMIN) )
   2.171          return -EPERM;
   2.172  
   2.173 -    if (count>sizeof memstring) {
   2.174 +    if ( count > sizeof(memstring) )
   2.175          return -EFBIG;
   2.176 -    }
   2.177  
   2.178      len = strnlen_user(buffer, count);
   2.179      if ( len == 0 ) return -EBADMSG;
   2.180 @@ -406,39 +405,47 @@ static int balloon_write(struct file *fi
   2.181          return -EFAULT;
   2.182  
   2.183      endchar = memstring;
   2.184 -    for(i=0; i<len; ++i,++endchar) {
   2.185 -        if ('0'>memstring[i] || memstring[i]>'9') break;
   2.186 -    }
   2.187 -    if (i==0) return -EBADMSG;
   2.188 +    for ( i = 0; i < len; ++i, ++endchar )
   2.189 +        if ( (memstring[i] < '0') || (memstring[i] > '9') )
   2.190 +            break;
   2.191 +    if ( i == 0 )
   2.192 +        return -EBADMSG;
   2.193  
   2.194      targetbytes = memparse(memstring,&endchar);
   2.195      target = targetbytes >> PAGE_SHIFT;
   2.196  
   2.197 -    if (target < current_pages) {
   2.198 +    if ( target < current_pages )
   2.199 +    {
   2.200          int change = inflate_balloon(current_pages-target);
   2.201 -        if (change<=0) return change;
   2.202 +        if ( change <= 0 )
   2.203 +            return change;
   2.204  
   2.205          current_pages -= change;
   2.206          printk(KERN_INFO "Relinquish %dMB to xen. Domain now has %luMB\n",
   2.207              change>>PAGE_TO_MB_SHIFT, current_pages>>PAGE_TO_MB_SHIFT);
   2.208      }
   2.209 -    else if (target > current_pages) {
   2.210 +    else if ( target > current_pages )
   2.211 +    {
   2.212          int change, reclaim = min(target,most_seen_pages) - current_pages;
   2.213  
   2.214 -        if (reclaim) {
   2.215 +        if ( reclaim )
   2.216 +        {
   2.217              change = deflate_balloon( reclaim);
   2.218 -            if (change<=0) return change;
   2.219 +            if ( change <= 0 )
   2.220 +                return change;
   2.221              current_pages += change;
   2.222              printk(KERN_INFO "Reclaim %dMB from xen. Domain now has %luMB\n",
   2.223                  change>>PAGE_TO_MB_SHIFT, current_pages>>PAGE_TO_MB_SHIFT);
   2.224          }
   2.225  
   2.226 -        if (most_seen_pages<target) {
   2.227 +        if ( most_seen_pages < target )
   2.228 +        {
   2.229              int growth = claim_new_pages(target-most_seen_pages);
   2.230 -	    if (growth<=0) return growth;
   2.231 +            if ( growth <= 0 )
   2.232 +                return growth;
   2.233              most_seen_pages += growth;
   2.234              current_pages += growth;
   2.235 -            printk(KERN_INFO "Granted %dMB new mem by xen. Domain now has %luMB\n",
   2.236 +            printk(KERN_INFO "Granted %dMB new mem. Dom now has %luMB\n",
   2.237                  growth>>PAGE_TO_MB_SHIFT, current_pages>>PAGE_TO_MB_SHIFT);
   2.238          }
   2.239      }
     3.1 --- a/linux-2.4.27-xen-sparse/include/linux/skbuff.h	Wed Sep 22 11:02:20 2004 +0000
     3.2 +++ b/linux-2.4.27-xen-sparse/include/linux/skbuff.h	Wed Sep 22 21:31:14 2004 +0000
     3.3 @@ -231,6 +231,7 @@ struct sk_buff {
     3.4  
     3.5  extern void			__kfree_skb(struct sk_buff *skb);
     3.6  extern struct sk_buff *		alloc_skb(unsigned int size, int priority);
     3.7 +extern struct sk_buff *		alloc_skb_from_cache(kmem_cache_t *cp, unsigned int size, int priority);
     3.8  extern void			kfree_skbmem(struct sk_buff *skb);
     3.9  extern struct sk_buff *		skb_clone(struct sk_buff *skb, int priority);
    3.10  extern struct sk_buff *		skb_copy(const struct sk_buff *skb, int priority);
     4.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     4.2 +++ b/linux-2.4.27-xen-sparse/net/core/skbuff.c	Wed Sep 22 21:31:14 2004 +0000
     4.3 @@ -0,0 +1,1309 @@
     4.4 +/*
     4.5 + *	Routines having to do with the 'struct sk_buff' memory handlers.
     4.6 + *
     4.7 + *	Authors:	Alan Cox <iiitac@pyr.swan.ac.uk>
     4.8 + *			Florian La Roche <rzsfl@rz.uni-sb.de>
     4.9 + *
    4.10 + *	Version:	$Id: skbuff.c,v 1.90 2001/11/07 05:56:19 davem Exp $
    4.11 + *
    4.12 + *	Fixes:	
    4.13 + *		Alan Cox	:	Fixed the worst of the load balancer bugs.
    4.14 + *		Dave Platt	:	Interrupt stacking fix.
    4.15 + *	Richard Kooijman	:	Timestamp fixes.
    4.16 + *		Alan Cox	:	Changed buffer format.
    4.17 + *		Alan Cox	:	destructor hook for AF_UNIX etc.
    4.18 + *		Linus Torvalds	:	Better skb_clone.
    4.19 + *		Alan Cox	:	Added skb_copy.
    4.20 + *		Alan Cox	:	Added all the changed routines Linus
    4.21 + *					only put in the headers
    4.22 + *		Ray VanTassle	:	Fixed --skb->lock in free
    4.23 + *		Alan Cox	:	skb_copy copy arp field
    4.24 + *		Andi Kleen	:	slabified it.
    4.25 + *
    4.26 + *	NOTE:
    4.27 + *		The __skb_ routines should be called with interrupts 
    4.28 + *	disabled, or you better be *real* sure that the operation is atomic 
    4.29 + *	with respect to whatever list is being frobbed (e.g. via lock_sock()
    4.30 + *	or via disabling bottom half handlers, etc).
    4.31 + *
    4.32 + *	This program is free software; you can redistribute it and/or
    4.33 + *	modify it under the terms of the GNU General Public License
    4.34 + *	as published by the Free Software Foundation; either version
    4.35 + *	2 of the License, or (at your option) any later version.
    4.36 + */
    4.37 +
    4.38 +/*
    4.39 + *	The functions in this file will not compile correctly with gcc 2.4.x
    4.40 + */
    4.41 +
    4.42 +#include <linux/config.h>
    4.43 +#include <linux/types.h>
    4.44 +#include <linux/kernel.h>
    4.45 +#include <linux/sched.h>
    4.46 +#include <linux/mm.h>
    4.47 +#include <linux/interrupt.h>
    4.48 +#include <linux/in.h>
    4.49 +#include <linux/inet.h>
    4.50 +#include <linux/slab.h>
    4.51 +#include <linux/netdevice.h>
    4.52 +#include <linux/string.h>
    4.53 +#include <linux/skbuff.h>
    4.54 +#include <linux/cache.h>
    4.55 +#include <linux/rtnetlink.h>
    4.56 +#include <linux/init.h>
    4.57 +#include <linux/highmem.h>
    4.58 +
    4.59 +#include <net/protocol.h>
    4.60 +#include <net/dst.h>
    4.61 +#include <net/sock.h>
    4.62 +#include <net/checksum.h>
    4.63 +
    4.64 +#include <asm/uaccess.h>
    4.65 +#include <asm/system.h>
    4.66 +
    4.67 +int sysctl_hot_list_len = 128;
    4.68 +
    4.69 +static kmem_cache_t *skbuff_head_cache;
    4.70 +
    4.71 +static union {
    4.72 +	struct sk_buff_head	list;
    4.73 +	char			pad[SMP_CACHE_BYTES];
    4.74 +} skb_head_pool[NR_CPUS];
    4.75 +
    4.76 +/*
    4.77 + *	Keep out-of-line to prevent kernel bloat.
    4.78 + *	__builtin_return_address is not used because it is not always
    4.79 + *	reliable. 
    4.80 + */
    4.81 +
    4.82 +/**
    4.83 + *	skb_over_panic	- 	private function
    4.84 + *	@skb: buffer
    4.85 + *	@sz: size
    4.86 + *	@here: address
    4.87 + *
    4.88 + *	Out of line support code for skb_put(). Not user callable.
    4.89 + */
    4.90 + 
    4.91 +void skb_over_panic(struct sk_buff *skb, int sz, void *here)
    4.92 +{
    4.93 +	printk("skput:over: %p:%d put:%d dev:%s", 
    4.94 +		here, skb->len, sz, skb->dev ? skb->dev->name : "<NULL>");
    4.95 +	BUG();
    4.96 +}
    4.97 +
    4.98 +/**
    4.99 + *	skb_under_panic	- 	private function
   4.100 + *	@skb: buffer
   4.101 + *	@sz: size
   4.102 + *	@here: address
   4.103 + *
   4.104 + *	Out of line support code for skb_push(). Not user callable.
   4.105 + */
   4.106 + 
   4.107 +
   4.108 +void skb_under_panic(struct sk_buff *skb, int sz, void *here)
   4.109 +{
   4.110 +        printk("skput:under: %p:%d put:%d dev:%s",
   4.111 +                here, skb->len, sz, skb->dev ? skb->dev->name : "<NULL>");
   4.112 +	BUG();
   4.113 +}
   4.114 +
   4.115 +static __inline__ struct sk_buff *skb_head_from_pool(void)
   4.116 +{
   4.117 +	struct sk_buff_head *list = &skb_head_pool[smp_processor_id()].list;
   4.118 +
   4.119 +	if (skb_queue_len(list)) {
   4.120 +		struct sk_buff *skb;
   4.121 +		unsigned long flags;
   4.122 +
   4.123 +		local_irq_save(flags);
   4.124 +		skb = __skb_dequeue(list);
   4.125 +		local_irq_restore(flags);
   4.126 +		return skb;
   4.127 +	}
   4.128 +	return NULL;
   4.129 +}
   4.130 +
   4.131 +static __inline__ void skb_head_to_pool(struct sk_buff *skb)
   4.132 +{
   4.133 +	struct sk_buff_head *list = &skb_head_pool[smp_processor_id()].list;
   4.134 +
   4.135 +	if (skb_queue_len(list) < sysctl_hot_list_len) {
   4.136 +		unsigned long flags;
   4.137 +
   4.138 +		local_irq_save(flags);
   4.139 +		__skb_queue_head(list, skb);
   4.140 +		local_irq_restore(flags);
   4.141 +
   4.142 +		return;
   4.143 +	}
   4.144 +	kmem_cache_free(skbuff_head_cache, skb);
   4.145 +}
   4.146 +
   4.147 +
   4.148 +/* 	Allocate a new skbuff. We do this ourselves so we can fill in a few
   4.149 + *	'private' fields and also do memory statistics to find all the
   4.150 + *	[BEEP] leaks.
   4.151 + * 
   4.152 + */
   4.153 +
   4.154 +/**
   4.155 + *	alloc_skb	-	allocate a network buffer
   4.156 + *	@size: size to allocate
   4.157 + *	@gfp_mask: allocation mask
   4.158 + *
   4.159 + *	Allocate a new &sk_buff. The returned buffer has no headroom and a
   4.160 + *	tail room of size bytes. The object has a reference count of one.
   4.161 + *	The return is the buffer. On a failure the return is %NULL.
   4.162 + *
   4.163 + *	Buffers may only be allocated from interrupts using a @gfp_mask of
   4.164 + *	%GFP_ATOMIC.
   4.165 + */
   4.166 + 
   4.167 +struct sk_buff *alloc_skb(unsigned int size,int gfp_mask)
   4.168 +{
   4.169 +	struct sk_buff *skb;
   4.170 +	u8 *data;
   4.171 +
   4.172 +	if (in_interrupt() && (gfp_mask & __GFP_WAIT)) {
   4.173 +		static int count = 0;
   4.174 +		if (++count < 5) {
   4.175 +			printk(KERN_ERR "alloc_skb called nonatomically "
   4.176 +			       "from interrupt %p\n", NET_CALLER(size));
   4.177 + 			BUG();
   4.178 +		}
   4.179 +		gfp_mask &= ~__GFP_WAIT;
   4.180 +	}
   4.181 +
   4.182 +	/* Get the HEAD */
   4.183 +	skb = skb_head_from_pool();
   4.184 +	if (skb == NULL) {
   4.185 +		skb = kmem_cache_alloc(skbuff_head_cache, gfp_mask & ~__GFP_DMA);
   4.186 +		if (skb == NULL)
   4.187 +			goto nohead;
   4.188 +	}
   4.189 +
   4.190 +	/* Get the DATA. Size must match skb_add_mtu(). */
   4.191 +	size = SKB_DATA_ALIGN(size);
   4.192 +	data = kmalloc(size + sizeof(struct skb_shared_info), gfp_mask);
   4.193 +	if (data == NULL)
   4.194 +		goto nodata;
   4.195 +
   4.196 +	/* XXX: does not include slab overhead */ 
   4.197 +	skb->truesize = size + sizeof(struct sk_buff);
   4.198 +
   4.199 +	/* Load the data pointers. */
   4.200 +	skb->head = data;
   4.201 +	skb->data = data;
   4.202 +	skb->tail = data;
   4.203 +	skb->end = data + size;
   4.204 +
   4.205 +	/* Set up other state */
   4.206 +	skb->len = 0;
   4.207 +	skb->cloned = 0;
   4.208 +	skb->data_len = 0;
   4.209 +
   4.210 +	atomic_set(&skb->users, 1); 
   4.211 +	atomic_set(&(skb_shinfo(skb)->dataref), 1);
   4.212 +	skb_shinfo(skb)->nr_frags = 0;
   4.213 +	skb_shinfo(skb)->frag_list = NULL;
   4.214 +	return skb;
   4.215 +
   4.216 +nodata:
   4.217 +	skb_head_to_pool(skb);
   4.218 +nohead:
   4.219 +	return NULL;
   4.220 +}
   4.221 +
   4.222 +/**
   4.223 + *	alloc_skb_from_cache	-	allocate a network buffer
   4.224 + *	@cp: kmem_cache from which to allocate the data area
   4.225 + *           (object size must be big enough for @size bytes + skb overheads)
   4.226 + *	@size: size to allocate
   4.227 + *	@gfp_mask: allocation mask
   4.228 + *
   4.229 + *	Allocate a new &sk_buff. The returned buffer has no headroom and a
   4.230 + *	tail room of size bytes. The object has a reference count of one.
   4.231 + *	The return is the buffer. On a failure the return is %NULL.
   4.232 + *
   4.233 + *	Buffers may only be allocated from interrupts using a @gfp_mask of
   4.234 + *	%GFP_ATOMIC.
   4.235 + */
   4.236 + 
   4.237 +struct sk_buff *alloc_skb_from_cache(kmem_cache_t *cp,
   4.238 +				     unsigned int size, int gfp_mask)
   4.239 +{
   4.240 +	struct sk_buff *skb;
   4.241 +	u8 *data;
   4.242 +
   4.243 +	if (in_interrupt() && (gfp_mask & __GFP_WAIT)) {
   4.244 +		static int count = 0;
   4.245 +		if (++count < 5) {
   4.246 +			printk(KERN_ERR "alloc_skb called nonatomically "
   4.247 +			       "from interrupt %p\n", NET_CALLER(size));
   4.248 + 			BUG();
   4.249 +		}
   4.250 +		gfp_mask &= ~__GFP_WAIT;
   4.251 +	}
   4.252 +
   4.253 +	/* Get the HEAD */
   4.254 +	skb = skb_head_from_pool();
   4.255 +	if (skb == NULL) {
   4.256 +		skb = kmem_cache_alloc(skbuff_head_cache, gfp_mask & ~__GFP_DMA);
   4.257 +		if (skb == NULL)
   4.258 +			goto nohead;
   4.259 +	}
   4.260 +
   4.261 +	/* Get the DATA. */
   4.262 +	size = SKB_DATA_ALIGN(size);
   4.263 +	data = kmem_cache_alloc(cp, gfp_mask);
   4.264 +	if (data == NULL)
   4.265 +		goto nodata;
   4.266 +
   4.267 +	/* XXX: does not include slab overhead */ 
   4.268 +	skb->truesize = size + sizeof(struct sk_buff);
   4.269 +
   4.270 +	/* Load the data pointers. */
   4.271 +	skb->head = data;
   4.272 +	skb->data = data;
   4.273 +	skb->tail = data;
   4.274 +	skb->end = data + size;
   4.275 +
   4.276 +	/* Set up other state */
   4.277 +	skb->len = 0;
   4.278 +	skb->cloned = 0;
   4.279 +	skb->data_len = 0;
   4.280 +
   4.281 +	atomic_set(&skb->users, 1); 
   4.282 +	atomic_set(&(skb_shinfo(skb)->dataref), 1);
   4.283 +	skb_shinfo(skb)->nr_frags = 0;
   4.284 +	skb_shinfo(skb)->frag_list = NULL;
   4.285 +	return skb;
   4.286 +
   4.287 +nodata:
   4.288 +	skb_head_to_pool(skb);
   4.289 +nohead:
   4.290 +	return NULL;
   4.291 +}
   4.292 +
   4.293 +
   4.294 +/*
   4.295 + *	Slab constructor for a skb head. 
   4.296 + */ 
   4.297 +static inline void skb_headerinit(void *p, kmem_cache_t *cache, 
   4.298 +				  unsigned long flags)
   4.299 +{
   4.300 +	struct sk_buff *skb = p;
   4.301 +
   4.302 +	skb->next = NULL;
   4.303 +	skb->prev = NULL;
   4.304 +	skb->list = NULL;
   4.305 +	skb->sk = NULL;
   4.306 +	skb->stamp.tv_sec=0;	/* No idea about time */
   4.307 +	skb->dev = NULL;
   4.308 +	skb->real_dev = NULL;
   4.309 +	skb->dst = NULL;
   4.310 +	memset(skb->cb, 0, sizeof(skb->cb));
   4.311 +	skb->pkt_type = PACKET_HOST;	/* Default type */
   4.312 +	skb->ip_summed = 0;
   4.313 +	skb->priority = 0;
   4.314 +	skb->security = 0;	/* By default packets are insecure */
   4.315 +	skb->destructor = NULL;
   4.316 +
   4.317 +#ifdef CONFIG_NETFILTER
   4.318 +	skb->nfmark = skb->nfcache = 0;
   4.319 +	skb->nfct = NULL;
   4.320 +#ifdef CONFIG_NETFILTER_DEBUG
   4.321 +	skb->nf_debug = 0;
   4.322 +#endif
   4.323 +#endif
   4.324 +#ifdef CONFIG_NET_SCHED
   4.325 +	skb->tc_index = 0;
   4.326 +#endif
   4.327 +}
   4.328 +
   4.329 +static void skb_drop_fraglist(struct sk_buff *skb)
   4.330 +{
   4.331 +	struct sk_buff *list = skb_shinfo(skb)->frag_list;
   4.332 +
   4.333 +	skb_shinfo(skb)->frag_list = NULL;
   4.334 +
   4.335 +	do {
   4.336 +		struct sk_buff *this = list;
   4.337 +		list = list->next;
   4.338 +		kfree_skb(this);
   4.339 +	} while (list);
   4.340 +}
   4.341 +
   4.342 +static void skb_clone_fraglist(struct sk_buff *skb)
   4.343 +{
   4.344 +	struct sk_buff *list;
   4.345 +
   4.346 +	for (list = skb_shinfo(skb)->frag_list; list; list=list->next)
   4.347 +		skb_get(list);
   4.348 +}
   4.349 +
   4.350 +static void skb_release_data(struct sk_buff *skb)
   4.351 +{
   4.352 +	if (!skb->cloned ||
   4.353 +	    atomic_dec_and_test(&(skb_shinfo(skb)->dataref))) {
   4.354 +		if (skb_shinfo(skb)->nr_frags) {
   4.355 +			int i;
   4.356 +			for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
   4.357 +				put_page(skb_shinfo(skb)->frags[i].page);
   4.358 +		}
   4.359 +
   4.360 +		if (skb_shinfo(skb)->frag_list)
   4.361 +			skb_drop_fraglist(skb);
   4.362 +
   4.363 +		kfree(skb->head);
   4.364 +	}
   4.365 +}
   4.366 +
   4.367 +/*
   4.368 + *	Free an skbuff by memory without cleaning the state. 
   4.369 + */
   4.370 +void kfree_skbmem(struct sk_buff *skb)
   4.371 +{
   4.372 +	skb_release_data(skb);
   4.373 +	skb_head_to_pool(skb);
   4.374 +}
   4.375 +
   4.376 +/**
   4.377 + *	__kfree_skb - private function 
   4.378 + *	@skb: buffer
   4.379 + *
   4.380 + *	Free an sk_buff. Release anything attached to the buffer. 
   4.381 + *	Clean the state. This is an internal helper function. Users should
   4.382 + *	always call kfree_skb
   4.383 + */
   4.384 +
   4.385 +void __kfree_skb(struct sk_buff *skb)
   4.386 +{
   4.387 +	if (skb->list) {
   4.388 +	 	printk(KERN_WARNING "Warning: kfree_skb passed an skb still "
   4.389 +		       "on a list (from %p).\n", NET_CALLER(skb));
   4.390 +		BUG();
   4.391 +	}
   4.392 +
   4.393 +	dst_release(skb->dst);
   4.394 +	if(skb->destructor) {
   4.395 +		if (in_irq()) {
   4.396 +			printk(KERN_WARNING "Warning: kfree_skb on hard IRQ %p\n",
   4.397 +				NET_CALLER(skb));
   4.398 +		}
   4.399 +		skb->destructor(skb);
   4.400 +	}
   4.401 +#ifdef CONFIG_NETFILTER
   4.402 +	nf_conntrack_put(skb->nfct);
   4.403 +#endif
   4.404 +	skb_headerinit(skb, NULL, 0);  /* clean state */
   4.405 +	kfree_skbmem(skb);
   4.406 +}
   4.407 +
   4.408 +/**
   4.409 + *	skb_clone	-	duplicate an sk_buff
   4.410 + *	@skb: buffer to clone
   4.411 + *	@gfp_mask: allocation priority
   4.412 + *
   4.413 + *	Duplicate an &sk_buff. The new one is not owned by a socket. Both
   4.414 + *	copies share the same packet data but not structure. The new
   4.415 + *	buffer has a reference count of 1. If the allocation fails the 
   4.416 + *	function returns %NULL otherwise the new buffer is returned.
   4.417 + *	
   4.418 + *	If this function is called from an interrupt gfp_mask() must be
   4.419 + *	%GFP_ATOMIC.
   4.420 + */
   4.421 +
   4.422 +struct sk_buff *skb_clone(struct sk_buff *skb, int gfp_mask)
   4.423 +{
   4.424 +	struct sk_buff *n;
   4.425 +
   4.426 +	n = skb_head_from_pool();
   4.427 +	if (!n) {
   4.428 +		n = kmem_cache_alloc(skbuff_head_cache, gfp_mask);
   4.429 +		if (!n)
   4.430 +			return NULL;
   4.431 +	}
   4.432 +
   4.433 +#define C(x) n->x = skb->x
   4.434 +
   4.435 +	n->next = n->prev = NULL;
   4.436 +	n->list = NULL;
   4.437 +	n->sk = NULL;
   4.438 +	C(stamp);
   4.439 +	C(dev);
   4.440 +	C(real_dev);
   4.441 +	C(h);
   4.442 +	C(nh);
   4.443 +	C(mac);
   4.444 +	C(dst);
   4.445 +	dst_clone(n->dst);
   4.446 +	memcpy(n->cb, skb->cb, sizeof(skb->cb));
   4.447 +	C(len);
   4.448 +	C(data_len);
   4.449 +	C(csum);
   4.450 +	n->cloned = 1;
   4.451 +	C(pkt_type);
   4.452 +	C(ip_summed);
   4.453 +	C(priority);
   4.454 +	atomic_set(&n->users, 1);
   4.455 +	C(protocol);
   4.456 +	C(security);
   4.457 +	C(truesize);
   4.458 +	C(head);
   4.459 +	C(data);
   4.460 +	C(tail);
   4.461 +	C(end);
   4.462 +	n->destructor = NULL;
   4.463 +#ifdef CONFIG_NETFILTER
   4.464 +	C(nfmark);
   4.465 +	C(nfcache);
   4.466 +	C(nfct);
   4.467 +#ifdef CONFIG_NETFILTER_DEBUG
   4.468 +	C(nf_debug);
   4.469 +#endif
   4.470 +#endif /*CONFIG_NETFILTER*/
   4.471 +#if defined(CONFIG_HIPPI)
   4.472 +	C(private);
   4.473 +#endif
   4.474 +#ifdef CONFIG_NET_SCHED
   4.475 +	C(tc_index);
   4.476 +#endif
   4.477 +
   4.478 +	atomic_inc(&(skb_shinfo(skb)->dataref));
   4.479 +	skb->cloned = 1;
   4.480 +#ifdef CONFIG_NETFILTER
   4.481 +	nf_conntrack_get(skb->nfct);
   4.482 +#endif
   4.483 +	return n;
   4.484 +}
   4.485 +
   4.486 +static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
   4.487 +{
   4.488 +	/*
   4.489 +	 *	Shift between the two data areas in bytes
   4.490 +	 */
   4.491 +	unsigned long offset = new->data - old->data;
   4.492 +
   4.493 +	new->list=NULL;
   4.494 +	new->sk=NULL;
   4.495 +	new->dev=old->dev;
   4.496 +	new->real_dev=old->real_dev;
   4.497 +	new->priority=old->priority;
   4.498 +	new->protocol=old->protocol;
   4.499 +	new->dst=dst_clone(old->dst);
   4.500 +	new->h.raw=old->h.raw+offset;
   4.501 +	new->nh.raw=old->nh.raw+offset;
   4.502 +	new->mac.raw=old->mac.raw+offset;
   4.503 +	memcpy(new->cb, old->cb, sizeof(old->cb));
   4.504 +	atomic_set(&new->users, 1);
   4.505 +	new->pkt_type=old->pkt_type;
   4.506 +	new->stamp=old->stamp;
   4.507 +	new->destructor = NULL;
   4.508 +	new->security=old->security;
   4.509 +#ifdef CONFIG_NETFILTER
   4.510 +	new->nfmark=old->nfmark;
   4.511 +	new->nfcache=old->nfcache;
   4.512 +	new->nfct=old->nfct;
   4.513 +	nf_conntrack_get(new->nfct);
   4.514 +#ifdef CONFIG_NETFILTER_DEBUG
   4.515 +	new->nf_debug=old->nf_debug;
   4.516 +#endif
   4.517 +#endif
   4.518 +#ifdef CONFIG_NET_SCHED
   4.519 +	new->tc_index = old->tc_index;
   4.520 +#endif
   4.521 +}
   4.522 +
   4.523 +/**
   4.524 + *	skb_copy	-	create private copy of an sk_buff
   4.525 + *	@skb: buffer to copy
   4.526 + *	@gfp_mask: allocation priority
   4.527 + *
   4.528 + *	Make a copy of both an &sk_buff and its data. This is used when the
   4.529 + *	caller wishes to modify the data and needs a private copy of the 
   4.530 + *	data to alter. Returns %NULL on failure or the pointer to the buffer
   4.531 + *	on success. The returned buffer has a reference count of 1.
   4.532 + *
   4.533 + *	As by-product this function converts non-linear &sk_buff to linear
   4.534 + *	one, so that &sk_buff becomes completely private and caller is allowed
   4.535 + *	to modify all the data of returned buffer. This means that this
   4.536 + *	function is not recommended for use in circumstances when only
   4.537 + *	header is going to be modified. Use pskb_copy() instead.
   4.538 + */
   4.539 + 
   4.540 +struct sk_buff *skb_copy(const struct sk_buff *skb, int gfp_mask)
   4.541 +{
   4.542 +	struct sk_buff *n;
   4.543 +	int headerlen = skb->data-skb->head;
   4.544 +
   4.545 +	/*
   4.546 +	 *	Allocate the copy buffer
   4.547 +	 */
   4.548 +	n=alloc_skb(skb->end - skb->head + skb->data_len, gfp_mask);
   4.549 +	if(n==NULL)
   4.550 +		return NULL;
   4.551 +
   4.552 +	/* Set the data pointer */
   4.553 +	skb_reserve(n,headerlen);
   4.554 +	/* Set the tail pointer and length */
   4.555 +	skb_put(n,skb->len);
   4.556 +	n->csum = skb->csum;
   4.557 +	n->ip_summed = skb->ip_summed;
   4.558 +
   4.559 +	if (skb_copy_bits(skb, -headerlen, n->head, headerlen+skb->len))
   4.560 +		BUG();
   4.561 +
   4.562 +	copy_skb_header(n, skb);
   4.563 +
   4.564 +	return n;
   4.565 +}
   4.566 +
   4.567 +/* Keep head the same: replace data */
   4.568 +int skb_linearize(struct sk_buff *skb, int gfp_mask)
   4.569 +{
   4.570 +	unsigned int size;
   4.571 +	u8 *data;
   4.572 +	long offset;
   4.573 +	int headerlen = skb->data - skb->head;
   4.574 +	int expand = (skb->tail+skb->data_len) - skb->end;
   4.575 +
   4.576 +	if (skb_shared(skb))
   4.577 +		BUG();
   4.578 +
   4.579 +	if (expand <= 0)
   4.580 +		expand = 0;
   4.581 +
   4.582 +	size = (skb->end - skb->head + expand);
   4.583 +	size = SKB_DATA_ALIGN(size);
   4.584 +	data = kmalloc(size + sizeof(struct skb_shared_info), gfp_mask);
   4.585 +	if (data == NULL)
   4.586 +		return -ENOMEM;
   4.587 +
   4.588 +	/* Copy entire thing */
   4.589 +	if (skb_copy_bits(skb, -headerlen, data, headerlen+skb->len))
   4.590 +		BUG();
   4.591 +
   4.592 +	/* Offset between the two in bytes */
   4.593 +	offset = data - skb->head;
   4.594 +
   4.595 +	/* Free old data. */
   4.596 +	skb_release_data(skb);
   4.597 +
   4.598 +	skb->head = data;
   4.599 +	skb->end  = data + size;
   4.600 +
   4.601 +	/* Set up new pointers */
   4.602 +	skb->h.raw += offset;
   4.603 +	skb->nh.raw += offset;
   4.604 +	skb->mac.raw += offset;
   4.605 +	skb->tail += offset;
   4.606 +	skb->data += offset;
   4.607 +
   4.608 +	/* Set up shinfo */
   4.609 +	atomic_set(&(skb_shinfo(skb)->dataref), 1);
   4.610 +	skb_shinfo(skb)->nr_frags = 0;
   4.611 +	skb_shinfo(skb)->frag_list = NULL;
   4.612 +
   4.613 +	/* We are no longer a clone, even if we were. */
   4.614 +	skb->cloned = 0;
   4.615 +
   4.616 +	skb->tail += skb->data_len;
   4.617 +	skb->data_len = 0;
   4.618 +	return 0;
   4.619 +}
   4.620 +
   4.621 +
   4.622 +/**
   4.623 + *	pskb_copy	-	create copy of an sk_buff with private head.
   4.624 + *	@skb: buffer to copy
   4.625 + *	@gfp_mask: allocation priority
   4.626 + *
   4.627 + *	Make a copy of both an &sk_buff and part of its data, located
   4.628 + *	in header. Fragmented data remain shared. This is used when
   4.629 + *	the caller wishes to modify only header of &sk_buff and needs
   4.630 + *	private copy of the header to alter. Returns %NULL on failure
   4.631 + *	or the pointer to the buffer on success.
   4.632 + *	The returned buffer has a reference count of 1.
   4.633 + */
   4.634 +
   4.635 +struct sk_buff *pskb_copy(struct sk_buff *skb, int gfp_mask)
   4.636 +{
   4.637 +	struct sk_buff *n;
   4.638 +
   4.639 +	/*
   4.640 +	 *	Allocate the copy buffer
   4.641 +	 */
   4.642 +	n=alloc_skb(skb->end - skb->head, gfp_mask);
   4.643 +	if(n==NULL)
   4.644 +		return NULL;
   4.645 +
   4.646 +	/* Set the data pointer */
   4.647 +	skb_reserve(n,skb->data-skb->head);
   4.648 +	/* Set the tail pointer and length */
   4.649 +	skb_put(n,skb_headlen(skb));
   4.650 +	/* Copy the bytes */
   4.651 +	memcpy(n->data, skb->data, n->len);
   4.652 +	n->csum = skb->csum;
   4.653 +	n->ip_summed = skb->ip_summed;
   4.654 +
   4.655 +	n->data_len = skb->data_len;
   4.656 +	n->len = skb->len;
   4.657 +
   4.658 +	if (skb_shinfo(skb)->nr_frags) {
   4.659 +		int i;
   4.660 +
   4.661 +		for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
   4.662 +			skb_shinfo(n)->frags[i] = skb_shinfo(skb)->frags[i];
   4.663 +			get_page(skb_shinfo(n)->frags[i].page);
   4.664 +		}
   4.665 +		skb_shinfo(n)->nr_frags = i;
   4.666 +	}
   4.667 +
   4.668 +	if (skb_shinfo(skb)->frag_list) {
   4.669 +		skb_shinfo(n)->frag_list = skb_shinfo(skb)->frag_list;
   4.670 +		skb_clone_fraglist(n);
   4.671 +	}
   4.672 +
   4.673 +	copy_skb_header(n, skb);
   4.674 +
   4.675 +	return n;
   4.676 +}
   4.677 +
   4.678 +/**
   4.679 + *	pskb_expand_head - reallocate header of &sk_buff
   4.680 + *	@skb: buffer to reallocate
   4.681 + *	@nhead: room to add at head
   4.682 + *	@ntail: room to add at tail
   4.683 + *	@gfp_mask: allocation priority
   4.684 + *
   4.685 + *	Expands (or creates identical copy, if &nhead and &ntail are zero)
   4.686 + *	header of skb. &sk_buff itself is not changed. &sk_buff MUST have
   4.687 + *	reference count of 1. Returns zero in the case of success or error,
   4.688 + *	if expansion failed. In the last case, &sk_buff is not changed.
   4.689 + *
   4.690 + *	All the pointers pointing into skb header may change and must be
   4.691 + *	reloaded after call to this function.
   4.692 + */
   4.693 +
   4.694 +int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail, int gfp_mask)
   4.695 +{
   4.696 +	int i;
   4.697 +	u8 *data;
   4.698 +	int size = nhead + (skb->end - skb->head) + ntail;
   4.699 +	long off;
   4.700 +
   4.701 +	if (skb_shared(skb))
   4.702 +		BUG();
   4.703 +
   4.704 +	size = SKB_DATA_ALIGN(size);
   4.705 +
   4.706 +	data = kmalloc(size + sizeof(struct skb_shared_info), gfp_mask);
   4.707 +	if (data == NULL)
   4.708 +		goto nodata;
   4.709 +
   4.710 +	/* Copy only real data... and, alas, header. This should be
   4.711 +	 * optimized for the cases when header is void. */
   4.712 +	memcpy(data+nhead, skb->head, skb->tail-skb->head);
   4.713 +	memcpy(data+size, skb->end, sizeof(struct skb_shared_info));
   4.714 +
   4.715 +	for (i=0; i<skb_shinfo(skb)->nr_frags; i++)
   4.716 +		get_page(skb_shinfo(skb)->frags[i].page);
   4.717 +
   4.718 +	if (skb_shinfo(skb)->frag_list)
   4.719 +		skb_clone_fraglist(skb);
   4.720 +
   4.721 +	skb_release_data(skb);
   4.722 +
   4.723 +	off = (data+nhead) - skb->head;
   4.724 +
   4.725 +	skb->head = data;
   4.726 +	skb->end  = data+size;
   4.727 +
   4.728 +	skb->data += off;
   4.729 +	skb->tail += off;
   4.730 +	skb->mac.raw += off;
   4.731 +	skb->h.raw += off;
   4.732 +	skb->nh.raw += off;
   4.733 +	skb->cloned = 0;
   4.734 +	atomic_set(&skb_shinfo(skb)->dataref, 1);
   4.735 +	return 0;
   4.736 +
   4.737 +nodata:
   4.738 +	return -ENOMEM;
   4.739 +}
   4.740 +
   4.741 +/* Make private copy of skb with writable head and some headroom */
   4.742 +
   4.743 +struct sk_buff *
   4.744 +skb_realloc_headroom(struct sk_buff *skb, unsigned int headroom)
   4.745 +{
   4.746 +	struct sk_buff *skb2;
   4.747 +	int delta = headroom - skb_headroom(skb);
   4.748 +
   4.749 +	if (delta <= 0)
   4.750 +		return pskb_copy(skb, GFP_ATOMIC);
   4.751 +
   4.752 +	skb2 = skb_clone(skb, GFP_ATOMIC);
   4.753 +	if (skb2 == NULL ||
   4.754 +	    !pskb_expand_head(skb2, SKB_DATA_ALIGN(delta), 0, GFP_ATOMIC))
   4.755 +		return skb2;
   4.756 +
   4.757 +	kfree_skb(skb2);
   4.758 +	return NULL;
   4.759 +}
   4.760 +
   4.761 +
   4.762 +/**
   4.763 + *	skb_copy_expand	-	copy and expand sk_buff
   4.764 + *	@skb: buffer to copy
   4.765 + *	@newheadroom: new free bytes at head
   4.766 + *	@newtailroom: new free bytes at tail
   4.767 + *	@gfp_mask: allocation priority
   4.768 + *
   4.769 + *	Make a copy of both an &sk_buff and its data and while doing so 
   4.770 + *	allocate additional space.
   4.771 + *
   4.772 + *	This is used when the caller wishes to modify the data and needs a 
   4.773 + *	private copy of the data to alter as well as more space for new fields.
   4.774 + *	Returns %NULL on failure or the pointer to the buffer
   4.775 + *	on success. The returned buffer has a reference count of 1.
   4.776 + *
   4.777 + *	You must pass %GFP_ATOMIC as the allocation priority if this function
   4.778 + *	is called from an interrupt.
   4.779 + */
   4.780 + 
   4.781 +
   4.782 +struct sk_buff *skb_copy_expand(const struct sk_buff *skb,
   4.783 +				int newheadroom,
   4.784 +				int newtailroom,
   4.785 +				int gfp_mask)
   4.786 +{
   4.787 +	struct sk_buff *n;
   4.788 +
   4.789 +	/*
   4.790 +	 *	Allocate the copy buffer
   4.791 +	 */
   4.792 + 	 
   4.793 +	n=alloc_skb(newheadroom + skb->len + newtailroom,
   4.794 +		    gfp_mask);
   4.795 +	if(n==NULL)
   4.796 +		return NULL;
   4.797 +
   4.798 +	skb_reserve(n,newheadroom);
   4.799 +
   4.800 +	/* Set the tail pointer and length */
   4.801 +	skb_put(n,skb->len);
   4.802 +
   4.803 +	/* Copy the data only. */
   4.804 +	if (skb_copy_bits(skb, 0, n->data, skb->len))
   4.805 +		BUG();
   4.806 +
   4.807 +	copy_skb_header(n, skb);
   4.808 +	return n;
   4.809 +}
   4.810 +
   4.811 +/**
   4.812 + *	skb_pad			-	zero pad the tail of an skb
   4.813 + *	@skb: buffer to pad
   4.814 + *	@pad: space to pad
   4.815 + *
   4.816 + *	Ensure that a buffer is followed by a padding area that is zero
   4.817 + *	filled. Used by network drivers which may DMA or transfer data
   4.818 + *	beyond the buffer end onto the wire.
   4.819 + *
   4.820 + *	May return NULL in out of memory cases.
   4.821 + */
   4.822 + 
   4.823 +struct sk_buff *skb_pad(struct sk_buff *skb, int pad)
   4.824 +{
   4.825 +	struct sk_buff *nskb;
   4.826 +	
   4.827 +	/* If the skbuff is non linear tailroom is always zero.. */
   4.828 +	if(skb_tailroom(skb) >= pad)
   4.829 +	{
   4.830 +		memset(skb->data+skb->len, 0, pad);
   4.831 +		return skb;
   4.832 +	}
   4.833 +	
   4.834 +	nskb = skb_copy_expand(skb, skb_headroom(skb), skb_tailroom(skb) + pad, GFP_ATOMIC);
   4.835 +	kfree_skb(skb);
   4.836 +	if(nskb)
   4.837 +		memset(nskb->data+nskb->len, 0, pad);
   4.838 +	return nskb;
   4.839 +}	
   4.840 + 
   4.841 +/* Trims skb to length len. It can change skb pointers, if "realloc" is 1.
   4.842 + * If realloc==0 and trimming is impossible without change of data,
   4.843 + * it is BUG().
   4.844 + */
   4.845 +
   4.846 +int ___pskb_trim(struct sk_buff *skb, unsigned int len, int realloc)
   4.847 +{
   4.848 +	int offset = skb_headlen(skb);
   4.849 +	int nfrags = skb_shinfo(skb)->nr_frags;
   4.850 +	int i;
   4.851 +
   4.852 +	for (i=0; i<nfrags; i++) {
   4.853 +		int end = offset + skb_shinfo(skb)->frags[i].size;
   4.854 +		if (end > len) {
   4.855 +			if (skb_cloned(skb)) {
   4.856 +				if (!realloc)
   4.857 +					BUG();
   4.858 +				if (pskb_expand_head(skb, 0, 0, GFP_ATOMIC))
   4.859 +					return -ENOMEM;
   4.860 +			}
   4.861 +			if (len <= offset) {
   4.862 +				put_page(skb_shinfo(skb)->frags[i].page);
   4.863 +				skb_shinfo(skb)->nr_frags--;
   4.864 +			} else {
   4.865 +				skb_shinfo(skb)->frags[i].size = len-offset;
   4.866 +			}
   4.867 +		}
   4.868 +		offset = end;
   4.869 +	}
   4.870 +
   4.871 +	if (offset < len) {
   4.872 +		skb->data_len -= skb->len - len;
   4.873 +		skb->len = len;
   4.874 +	} else {
   4.875 +		if (len <= skb_headlen(skb)) {
   4.876 +			skb->len = len;
   4.877 +			skb->data_len = 0;
   4.878 +			skb->tail = skb->data + len;
   4.879 +			if (skb_shinfo(skb)->frag_list && !skb_cloned(skb))
   4.880 +				skb_drop_fraglist(skb);
   4.881 +		} else {
   4.882 +			skb->data_len -= skb->len - len;
   4.883 +			skb->len = len;
   4.884 +		}
   4.885 +	}
   4.886 +
   4.887 +	return 0;
   4.888 +}
   4.889 +
   4.890 +/**
   4.891 + *	__pskb_pull_tail - advance tail of skb header 
   4.892 + *	@skb: buffer to reallocate
   4.893 + *	@delta: number of bytes to advance tail
   4.894 + *
   4.895 + *	The function makes a sense only on a fragmented &sk_buff,
   4.896 + *	it expands header moving its tail forward and copying necessary
   4.897 + *	data from fragmented part.
   4.898 + *
   4.899 + *	&sk_buff MUST have reference count of 1.
   4.900 + *
   4.901 + *	Returns %NULL (and &sk_buff does not change) if pull failed
   4.902 + *	or value of new tail of skb in the case of success.
   4.903 + *
   4.904 + *	All the pointers pointing into skb header may change and must be
   4.905 + *	reloaded after call to this function.
   4.906 + */
   4.907 +
   4.908 +/* Moves tail of skb head forward, copying data from fragmented part,
   4.909 + * when it is necessary.
   4.910 + * 1. It may fail due to malloc failure.
   4.911 + * 2. It may change skb pointers.
   4.912 + *
   4.913 + * It is pretty complicated. Luckily, it is called only in exceptional cases.
   4.914 + */
   4.915 +unsigned char * __pskb_pull_tail(struct sk_buff *skb, int delta)
   4.916 +{
   4.917 +	int i, k, eat;
   4.918 +
   4.919 +	/* If skb has not enough free space at tail, get new one
   4.920 +	 * plus 128 bytes for future expansions. If we have enough
   4.921 +	 * room at tail, reallocate without expansion only if skb is cloned.
   4.922 +	 */
   4.923 +	eat = (skb->tail+delta) - skb->end;
   4.924 +
   4.925 +	if (eat > 0 || skb_cloned(skb)) {
   4.926 +		if (pskb_expand_head(skb, 0, eat>0 ? eat+128 : 0, GFP_ATOMIC))
   4.927 +			return NULL;
   4.928 +	}
   4.929 +
   4.930 +	if (skb_copy_bits(skb, skb_headlen(skb), skb->tail, delta))
   4.931 +		BUG();
   4.932 +
   4.933 +	/* Optimization: no fragments, no reasons to preestimate
   4.934 +	 * size of pulled pages. Superb.
   4.935 +	 */
   4.936 +	if (skb_shinfo(skb)->frag_list == NULL)
   4.937 +		goto pull_pages;
   4.938 +
   4.939 +	/* Estimate size of pulled pages. */
   4.940 +	eat = delta;
   4.941 +	for (i=0; i<skb_shinfo(skb)->nr_frags; i++) {
   4.942 +		if (skb_shinfo(skb)->frags[i].size >= eat)
   4.943 +			goto pull_pages;
   4.944 +		eat -= skb_shinfo(skb)->frags[i].size;
   4.945 +	}
   4.946 +
   4.947 +	/* If we need update frag list, we are in troubles.
   4.948 +	 * Certainly, it possible to add an offset to skb data,
   4.949 +	 * but taking into account that pulling is expected to
   4.950 +	 * be very rare operation, it is worth to fight against
   4.951 +	 * further bloating skb head and crucify ourselves here instead.
   4.952 +	 * Pure masohism, indeed. 8)8)
   4.953 +	 */
   4.954 +	if (eat) {
   4.955 +		struct sk_buff *list = skb_shinfo(skb)->frag_list;
   4.956 +		struct sk_buff *clone = NULL;
   4.957 +		struct sk_buff *insp = NULL;
   4.958 +
   4.959 +		do {
   4.960 +			if (list == NULL)
   4.961 +				BUG();
   4.962 +
   4.963 +			if (list->len <= eat) {
   4.964 +				/* Eaten as whole. */
   4.965 +				eat -= list->len;
   4.966 +				list = list->next;
   4.967 +				insp = list;
   4.968 +			} else {
   4.969 +				/* Eaten partially. */
   4.970 +
   4.971 +				if (skb_shared(list)) {
   4.972 +					/* Sucks! We need to fork list. :-( */
   4.973 +					clone = skb_clone(list, GFP_ATOMIC);
   4.974 +					if (clone == NULL)
   4.975 +						return NULL;
   4.976 +					insp = list->next;
   4.977 +					list = clone;
   4.978 +				} else {
   4.979 +					/* This may be pulled without
   4.980 +					 * problems. */
   4.981 +					insp = list;
   4.982 +				}
   4.983 +				if (pskb_pull(list, eat) == NULL) {
   4.984 +					if (clone)
   4.985 +						kfree_skb(clone);
   4.986 +					return NULL;
   4.987 +				}
   4.988 +				break;
   4.989 +			}
   4.990 +		} while (eat);
   4.991 +
   4.992 +		/* Free pulled out fragments. */
   4.993 +		while ((list = skb_shinfo(skb)->frag_list) != insp) {
   4.994 +			skb_shinfo(skb)->frag_list = list->next;
   4.995 +			kfree_skb(list);
   4.996 +		}
   4.997 +		/* And insert new clone at head. */
   4.998 +		if (clone) {
   4.999 +			clone->next = list;
  4.1000 +			skb_shinfo(skb)->frag_list = clone;
  4.1001 +		}
  4.1002 +	}
  4.1003 +	/* Success! Now we may commit changes to skb data. */
  4.1004 +
  4.1005 +pull_pages:
  4.1006 +	eat = delta;
  4.1007 +	k = 0;
  4.1008 +	for (i=0; i<skb_shinfo(skb)->nr_frags; i++) {
  4.1009 +		if (skb_shinfo(skb)->frags[i].size <= eat) {
  4.1010 +			put_page(skb_shinfo(skb)->frags[i].page);
  4.1011 +			eat -= skb_shinfo(skb)->frags[i].size;
  4.1012 +		} else {
  4.1013 +			skb_shinfo(skb)->frags[k] = skb_shinfo(skb)->frags[i];
  4.1014 +			if (eat) {
  4.1015 +				skb_shinfo(skb)->frags[k].page_offset += eat;
  4.1016 +				skb_shinfo(skb)->frags[k].size -= eat;
  4.1017 +				eat = 0;
  4.1018 +			}
  4.1019 +			k++;
  4.1020 +		}
  4.1021 +	}
  4.1022 +	skb_shinfo(skb)->nr_frags = k;
  4.1023 +
  4.1024 +	skb->tail += delta;
  4.1025 +	skb->data_len -= delta;
  4.1026 +
  4.1027 +	return skb->tail;
  4.1028 +}
  4.1029 +
  4.1030 +/* Copy some data bits from skb to kernel buffer. */
  4.1031 +
  4.1032 +int skb_copy_bits(const struct sk_buff *skb, int offset, void *to, int len)
  4.1033 +{
  4.1034 +	int i, copy;
  4.1035 +	int start = skb->len - skb->data_len;
  4.1036 +
  4.1037 +	if (offset > (int)skb->len-len)
  4.1038 +		goto fault;
  4.1039 +
  4.1040 +	/* Copy header. */
  4.1041 +	if ((copy = start-offset) > 0) {
  4.1042 +		if (copy > len)
  4.1043 +			copy = len;
  4.1044 +		memcpy(to, skb->data + offset, copy);
  4.1045 +		if ((len -= copy) == 0)
  4.1046 +			return 0;
  4.1047 +		offset += copy;
  4.1048 +		to += copy;
  4.1049 +	}
  4.1050 +
  4.1051 +	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
  4.1052 +		int end;
  4.1053 +
  4.1054 +		BUG_TRAP(start <= offset+len);
  4.1055 +
  4.1056 +		end = start + skb_shinfo(skb)->frags[i].size;
  4.1057 +		if ((copy = end-offset) > 0) {
  4.1058 +			u8 *vaddr;
  4.1059 +
  4.1060 +			if (copy > len)
  4.1061 +				copy = len;
  4.1062 +
  4.1063 +			vaddr = kmap_skb_frag(&skb_shinfo(skb)->frags[i]);
  4.1064 +			memcpy(to, vaddr+skb_shinfo(skb)->frags[i].page_offset+
  4.1065 +			       offset-start, copy);
  4.1066 +			kunmap_skb_frag(vaddr);
  4.1067 +
  4.1068 +			if ((len -= copy) == 0)
  4.1069 +				return 0;
  4.1070 +			offset += copy;
  4.1071 +			to += copy;
  4.1072 +		}
  4.1073 +		start = end;
  4.1074 +	}
  4.1075 +
  4.1076 +	if (skb_shinfo(skb)->frag_list) {
  4.1077 +		struct sk_buff *list;
  4.1078 +
  4.1079 +		for (list = skb_shinfo(skb)->frag_list; list; list=list->next) {
  4.1080 +			int end;
  4.1081 +
  4.1082 +			BUG_TRAP(start <= offset+len);
  4.1083 +
  4.1084 +			end = start + list->len;
  4.1085 +			if ((copy = end-offset) > 0) {
  4.1086 +				if (copy > len)
  4.1087 +					copy = len;
  4.1088 +				if (skb_copy_bits(list, offset-start, to, copy))
  4.1089 +					goto fault;
  4.1090 +				if ((len -= copy) == 0)
  4.1091 +					return 0;
  4.1092 +				offset += copy;
  4.1093 +				to += copy;
  4.1094 +			}
  4.1095 +			start = end;
  4.1096 +		}
  4.1097 +	}
  4.1098 +	if (len == 0)
  4.1099 +		return 0;
  4.1100 +
  4.1101 +fault:
  4.1102 +	return -EFAULT;
  4.1103 +}
  4.1104 +
  4.1105 +/* Checksum skb data. */
  4.1106 +
  4.1107 +unsigned int skb_checksum(const struct sk_buff *skb, int offset, int len, unsigned int csum)
  4.1108 +{
  4.1109 +	int i, copy;
  4.1110 +	int start = skb->len - skb->data_len;
  4.1111 +	int pos = 0;
  4.1112 +
  4.1113 +	/* Checksum header. */
  4.1114 +	if ((copy = start-offset) > 0) {
  4.1115 +		if (copy > len)
  4.1116 +			copy = len;
  4.1117 +		csum = csum_partial(skb->data+offset, copy, csum);
  4.1118 +		if ((len -= copy) == 0)
  4.1119 +			return csum;
  4.1120 +		offset += copy;
  4.1121 +		pos = copy;
  4.1122 +	}
  4.1123 +
  4.1124 +	for (i=0; i<skb_shinfo(skb)->nr_frags; i++) {
  4.1125 +		int end;
  4.1126 +
  4.1127 +		BUG_TRAP(start <= offset+len);
  4.1128 +
  4.1129 +		end = start + skb_shinfo(skb)->frags[i].size;
  4.1130 +		if ((copy = end-offset) > 0) {
  4.1131 +			unsigned int csum2;
  4.1132 +			u8 *vaddr;
  4.1133 +			skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
  4.1134 +
  4.1135 +			if (copy > len)
  4.1136 +				copy = len;
  4.1137 +			vaddr = kmap_skb_frag(frag);
  4.1138 +			csum2 = csum_partial(vaddr + frag->page_offset +
  4.1139 +					     offset-start, copy, 0);
  4.1140 +			kunmap_skb_frag(vaddr);
  4.1141 +			csum = csum_block_add(csum, csum2, pos);
  4.1142 +			if (!(len -= copy))
  4.1143 +				return csum;
  4.1144 +			offset += copy;
  4.1145 +			pos += copy;
  4.1146 +		}
  4.1147 +		start = end;
  4.1148 +	}
  4.1149 +
  4.1150 +	if (skb_shinfo(skb)->frag_list) {
  4.1151 +		struct sk_buff *list;
  4.1152 +
  4.1153 +		for (list = skb_shinfo(skb)->frag_list; list; list=list->next) {
  4.1154 +			int end;
  4.1155 +
  4.1156 +			BUG_TRAP(start <= offset+len);
  4.1157 +
  4.1158 +			end = start + list->len;
  4.1159 +			if ((copy = end-offset) > 0) {
  4.1160 +				unsigned int csum2;
  4.1161 +				if (copy > len)
  4.1162 +					copy = len;
  4.1163 +				csum2 = skb_checksum(list, offset-start, copy, 0);
  4.1164 +				csum = csum_block_add(csum, csum2, pos);
  4.1165 +				if ((len -= copy) == 0)
  4.1166 +					return csum;
  4.1167 +				offset += copy;
  4.1168 +				pos += copy;
  4.1169 +			}
  4.1170 +			start = end;
  4.1171 +		}
  4.1172 +	}
  4.1173 +	if (len == 0)
  4.1174 +		return csum;
  4.1175 +
  4.1176 +	BUG();
  4.1177 +	return csum;
  4.1178 +}
  4.1179 +
  4.1180 +/* Both of above in one bottle. */
  4.1181 +
  4.1182 +unsigned int skb_copy_and_csum_bits(const struct sk_buff *skb, int offset, u8 *to, int len, unsigned int csum)
  4.1183 +{
  4.1184 +	int i, copy;
  4.1185 +	int start = skb->len - skb->data_len;
  4.1186 +	int pos = 0;
  4.1187 +
  4.1188 +	/* Copy header. */
  4.1189 +	if ((copy = start-offset) > 0) {
  4.1190 +		if (copy > len)
  4.1191 +			copy = len;
  4.1192 +		csum = csum_partial_copy_nocheck(skb->data+offset, to, copy, csum);
  4.1193 +		if ((len -= copy) == 0)
  4.1194 +			return csum;
  4.1195 +		offset += copy;
  4.1196 +		to += copy;
  4.1197 +		pos = copy;
  4.1198 +	}
  4.1199 +
  4.1200 +	for (i=0; i<skb_shinfo(skb)->nr_frags; i++) {
  4.1201 +		int end;
  4.1202 +
  4.1203 +		BUG_TRAP(start <= offset+len);
  4.1204 +
  4.1205 +		end = start + skb_shinfo(skb)->frags[i].size;
  4.1206 +		if ((copy = end-offset) > 0) {
  4.1207 +			unsigned int csum2;
  4.1208 +			u8 *vaddr;
  4.1209 +			skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
  4.1210 +
  4.1211 +			if (copy > len)
  4.1212 +				copy = len;
  4.1213 +			vaddr = kmap_skb_frag(frag);
  4.1214 +			csum2 = csum_partial_copy_nocheck(vaddr + frag->page_offset +
  4.1215 +						      offset-start, to, copy, 0);
  4.1216 +			kunmap_skb_frag(vaddr);
  4.1217 +			csum = csum_block_add(csum, csum2, pos);
  4.1218 +			if (!(len -= copy))
  4.1219 +				return csum;
  4.1220 +			offset += copy;
  4.1221 +			to += copy;
  4.1222 +			pos += copy;
  4.1223 +		}
  4.1224 +		start = end;
  4.1225 +	}
  4.1226 +
  4.1227 +	if (skb_shinfo(skb)->frag_list) {
  4.1228 +		struct sk_buff *list;
  4.1229 +
  4.1230 +		for (list = skb_shinfo(skb)->frag_list; list; list=list->next) {
  4.1231 +			unsigned int csum2;
  4.1232 +			int end;
  4.1233 +
  4.1234 +			BUG_TRAP(start <= offset+len);
  4.1235 +
  4.1236 +			end = start + list->len;
  4.1237 +			if ((copy = end-offset) > 0) {
  4.1238 +				if (copy > len)
  4.1239 +					copy = len;
  4.1240 +				csum2 = skb_copy_and_csum_bits(list, offset-start, to, copy, 0);
  4.1241 +				csum = csum_block_add(csum, csum2, pos);
  4.1242 +				if ((len -= copy) == 0)
  4.1243 +					return csum;
  4.1244 +				offset += copy;
  4.1245 +				to += copy;
  4.1246 +				pos += copy;
  4.1247 +			}
  4.1248 +			start = end;
  4.1249 +		}
  4.1250 +	}
  4.1251 +	if (len == 0)
  4.1252 +		return csum;
  4.1253 +
  4.1254 +	BUG();
  4.1255 +	return csum;
  4.1256 +}
  4.1257 +
  4.1258 +void skb_copy_and_csum_dev(const struct sk_buff *skb, u8 *to)
  4.1259 +{
  4.1260 +	unsigned int csum;
  4.1261 +	long csstart;
  4.1262 +
  4.1263 +	if (skb->ip_summed == CHECKSUM_HW)
  4.1264 +		csstart = skb->h.raw - skb->data;
  4.1265 +	else
  4.1266 +		csstart = skb->len - skb->data_len;
  4.1267 +
  4.1268 +	if (csstart > skb->len - skb->data_len)
  4.1269 +		BUG();
  4.1270 +
  4.1271 +	memcpy(to, skb->data, csstart);
  4.1272 +
  4.1273 +	csum = 0;
  4.1274 +	if (csstart != skb->len)
  4.1275 +		csum = skb_copy_and_csum_bits(skb, csstart, to+csstart,
  4.1276 +				skb->len-csstart, 0);
  4.1277 +
  4.1278 +	if (skb->ip_summed == CHECKSUM_HW) {
  4.1279 +		long csstuff = csstart + skb->csum;
  4.1280 +
  4.1281 +		*((unsigned short *)(to + csstuff)) = csum_fold(csum);
  4.1282 +	}
  4.1283 +}
  4.1284 +
  4.1285 +#if 0
  4.1286 +/* 
  4.1287 + * 	Tune the memory allocator for a new MTU size.
  4.1288 + */
  4.1289 +void skb_add_mtu(int mtu)
  4.1290 +{
  4.1291 +	/* Must match allocation in alloc_skb */
  4.1292 +	mtu = SKB_DATA_ALIGN(mtu) + sizeof(struct skb_shared_info);
  4.1293 +
  4.1294 +	kmem_add_cache_size(mtu);
  4.1295 +}
  4.1296 +#endif
  4.1297 +
  4.1298 +void __init skb_init(void)
  4.1299 +{
  4.1300 +	int i;
  4.1301 +
  4.1302 +	skbuff_head_cache = kmem_cache_create("skbuff_head_cache",
  4.1303 +					      sizeof(struct sk_buff),
  4.1304 +					      0,
  4.1305 +					      SLAB_HWCACHE_ALIGN,
  4.1306 +					      skb_headerinit, NULL);
  4.1307 +	if (!skbuff_head_cache)
  4.1308 +		panic("cannot create skbuff cache");
  4.1309 +
  4.1310 +	for (i=0; i<NR_CPUS; i++)
  4.1311 +		skb_queue_head_init(&skb_head_pool[i].list);
  4.1312 +}
     5.1 --- a/linux-2.6.8.1-xen-sparse/arch/xen/i386/kernel/entry.S	Wed Sep 22 11:02:20 2004 +0000
     5.2 +++ b/linux-2.6.8.1-xen-sparse/arch/xen/i386/kernel/entry.S	Wed Sep 22 21:31:14 2004 +0000
     5.3 @@ -854,7 +854,7 @@ ENTRY(sys_call_table)
     5.4  	.long sys_iopl		/* 110 */
     5.5  	.long sys_vhangup
     5.6  	.long sys_ni_syscall	/* old "idle" system call */
     5.7 -	.long sys_vm86old
     5.8 +	.long sys_ni_syscall	/* disable sys_vm86old */
     5.9  	.long sys_wait4
    5.10  	.long sys_swapoff	/* 115 */
    5.11  	.long sys_sysinfo
     6.1 --- a/linux-2.6.8.1-xen-sparse/arch/xen/i386/kernel/pci-dma.c	Wed Sep 22 11:02:20 2004 +0000
     6.2 +++ b/linux-2.6.8.1-xen-sparse/arch/xen/i386/kernel/pci-dma.c	Wed Sep 22 21:31:14 2004 +0000
     6.3 @@ -84,7 +84,8 @@ void *dma_alloc_coherent(struct device *
     6.4  			phys_to_machine_mapping[(__pa(ret)>>PAGE_SHIFT)+i] =
     6.5  				pfn+i;
     6.6  		}
     6.7 -		flush_page_update_queue();
     6.8 +		/* Flush updates through and flush the TLB. */
     6.9 +		xen_tlb_flush();
    6.10  	}
    6.11  
    6.12  	memset(ret, 0, size);
     7.1 --- a/linux-2.6.8.1-xen-sparse/arch/xen/i386/mm/hypervisor.c	Wed Sep 22 11:02:20 2004 +0000
     7.2 +++ b/linux-2.6.8.1-xen-sparse/arch/xen/i386/mm/hypervisor.c	Wed Sep 22 21:31:14 2004 +0000
     7.3 @@ -430,7 +430,8 @@ unsigned long allocate_empty_lowmem_regi
     7.4          phys_to_machine_mapping[__pa(vstart)>>PAGE_SHIFT] = INVALID_P2M_ENTRY;
     7.5      }
     7.6  
     7.7 -    flush_page_update_queue();
     7.8 +    /* Flush updates through and flush the TLB. */
     7.9 +    xen_tlb_flush();
    7.10  
    7.11      ret = HYPERVISOR_dom_mem_op(MEMOP_decrease_reservation, 
    7.12                                  pfn_array, 1<<order, 0);
     8.1 --- a/linux-2.6.8.1-xen-sparse/arch/xen/i386/mm/ioremap.c	Wed Sep 22 11:02:20 2004 +0000
     8.2 +++ b/linux-2.6.8.1-xen-sparse/arch/xen/i386/mm/ioremap.c	Wed Sep 22 21:31:14 2004 +0000
     8.3 @@ -381,11 +381,11 @@ static inline int direct_remap_area_pmd(
     8.4      if (address >= end)
     8.5          BUG();
     8.6      do {
     8.7 -        pte_t *pte = pte_alloc_kernel(mm, pmd, address);
     8.8 +        pte_t *pte = pte_alloc_map(mm, pmd, address);
     8.9          if (!pte)
    8.10              return -ENOMEM;
    8.11          direct_remap_area_pte(pte, address, end - address, v);
    8.12 -
    8.13 +	pte_unmap(pte);
    8.14          address = (address + PMD_SIZE) & PMD_MASK;
    8.15          pmd++;
    8.16      } while (address && (address < end));
     9.1 --- a/linux-2.6.8.1-xen-sparse/arch/xen/kernel/reboot.c	Wed Sep 22 11:02:20 2004 +0000
     9.2 +++ b/linux-2.6.8.1-xen-sparse/arch/xen/kernel/reboot.c	Wed Sep 22 21:31:14 2004 +0000
     9.3 @@ -65,7 +65,7 @@ static void __do_suspend(void)
     9.4      extern void blkdev_suspend(void);
     9.5      extern void blkdev_resume(void);
     9.6  #endif
     9.7 -#ifdef CONFIG_XEN_NETIF_FRONTEND
     9.8 +#ifdef CONFIG_XEN_NETDEV_FRONTEND
     9.9      extern void netif_suspend(void);
    9.10      extern void netif_resume(void);    
    9.11  #endif
    9.12 @@ -82,7 +82,7 @@ static void __do_suspend(void)
    9.13  
    9.14      __cli();
    9.15  
    9.16 -#ifdef CONFIG_XEN_NETIF_FRONTEND
    9.17 +#ifdef CONFIG_XEN_NETDEV_FRONTEND
    9.18      netif_suspend();
    9.19  #endif
    9.20  
    9.21 @@ -143,7 +143,7 @@ static void __do_suspend(void)
    9.22      blkdev_resume();
    9.23  #endif
    9.24  
    9.25 -#ifdef CONFIG_XEN_NETIF_FRONTEND
    9.26 +#ifdef CONFIG_XEN_NETDEV_FRONTEND
    9.27      netif_resume();
    9.28  #endif
    9.29  
    10.1 --- a/linux-2.6.8.1-xen-sparse/arch/xen/kernel/skbuff.c	Wed Sep 22 11:02:20 2004 +0000
    10.2 +++ b/linux-2.6.8.1-xen-sparse/arch/xen/kernel/skbuff.c	Wed Sep 22 21:31:14 2004 +0000
    10.3 @@ -20,38 +20,16 @@ EXPORT_SYMBOL(__dev_alloc_skb);
    10.4  /* Referenced in netback.c. */
    10.5  /*static*/ kmem_cache_t *skbuff_cachep;
    10.6  
    10.7 +/* Size must be cacheline-aligned (alloc_skb uses SKB_DATA_ALIGN). */
    10.8 +#define XEN_SKB_SIZE \
    10.9 +    ((PAGE_SIZE - sizeof(struct skb_shared_info)) & ~(SMP_CACHE_BYTES - 1))
   10.10 +
   10.11  struct sk_buff *__dev_alloc_skb(unsigned int length, int gfp_mask)
   10.12  {
   10.13      struct sk_buff *skb;
   10.14 -    u8             *new_data, *new_shinfo; 
   10.15 -
   10.16 -    /*
   10.17 -     * Yuk! There is no way to get a skbuff head without allocating the
   10.18 -     * data area using kmalloc(). So we do that and then replace the default
   10.19 -     * data area with our own.
   10.20 -     */
   10.21 -    skb = alloc_skb(0, gfp_mask);
   10.22 -    if ( unlikely(skb == NULL) )
   10.23 -        return NULL;
   10.24 -
   10.25 -    new_data = kmem_cache_alloc(skbuff_cachep, gfp_mask);
   10.26 -    if ( new_data == NULL )
   10.27 -    {
   10.28 -        dev_kfree_skb(skb);
   10.29 -        return NULL;
   10.30 -    }
   10.31 -
   10.32 -    new_shinfo = 
   10.33 -        new_data + PAGE_SIZE - sizeof(struct skb_shared_info);
   10.34 -    memcpy(new_shinfo, skb_shinfo(skb), sizeof(struct skb_shared_info));
   10.35 -
   10.36 -    kfree(skb->head);
   10.37 -
   10.38 -    skb->head = new_data;
   10.39 -    skb->data = skb->tail = new_data + 16; /* __dev_alloc_skb does this */
   10.40 -    skb->end  = new_shinfo;
   10.41 -    skb->truesize = 1500;                  /* is this important? */
   10.42 -
   10.43 +    skb = alloc_skb_from_cache(skbuff_cachep, length + 16, gfp_mask);
   10.44 +    if ( likely(skb != NULL) )
   10.45 +        skb_reserve(skb, 16);
   10.46      return skb;
   10.47  }
   10.48  
    11.1 --- a/linux-2.6.8.1-xen-sparse/drivers/xen/netback/netback.c	Wed Sep 22 11:02:20 2004 +0000
    11.2 +++ b/linux-2.6.8.1-xen-sparse/drivers/xen/netback/netback.c	Wed Sep 22 21:31:14 2004 +0000
    11.3 @@ -138,19 +138,18 @@ int netif_be_start_xmit(struct sk_buff *
    11.4       * We do not copy the packet unless:
    11.5       *  1. The data is shared; or
    11.6       *  2. The data is not allocated from our special cache.
    11.7 -     * The copying method is taken from skb_copy().
    11.8       * NB. We also couldn't cope with fragmented packets, but we won't get
    11.9       *     any because we not advertise the NETIF_F_SG feature.
   11.10       */
   11.11      if ( skb_shared(skb) || skb_cloned(skb) || !is_xen_skb(skb) )
   11.12      {
   11.13 -        struct sk_buff *nskb = dev_alloc_skb(PAGE_SIZE);
   11.14          int hlen = skb->data - skb->head;
   11.15 +        struct sk_buff *nskb = dev_alloc_skb(hlen + skb->len);
   11.16          if ( unlikely(nskb == NULL) )
   11.17              goto drop;
   11.18          skb_reserve(nskb, hlen);
   11.19          __skb_put(nskb, skb->len);
   11.20 -        (void)skb_copy_bits(skb, -hlen, nskb->head, hlen + skb->len);
   11.21 +        (void)skb_copy_bits(skb, -hlen, nskb->data - hlen, skb->len + hlen);
   11.22          nskb->dev = skb->dev;
   11.23          dev_kfree_skb(skb);
   11.24          skb = nskb;
    12.1 --- a/linux-2.6.8.1-xen-sparse/drivers/xen/netfront/netfront.c	Wed Sep 22 11:02:20 2004 +0000
    12.2 +++ b/linux-2.6.8.1-xen-sparse/drivers/xen/netfront/netfront.c	Wed Sep 22 21:31:14 2004 +0000
    12.3 @@ -39,7 +39,17 @@
    12.4  #ifndef __GFP_NOWARN
    12.5  #define __GFP_NOWARN 0
    12.6  #endif
    12.7 -#define alloc_skb_page() __dev_alloc_skb(PAGE_SIZE, GFP_ATOMIC|__GFP_NOWARN)
    12.8 +#define alloc_xen_skb(_l) __dev_alloc_skb((_l), GFP_ATOMIC|__GFP_NOWARN)
    12.9 +
   12.10 +#define init_skb_shinfo(_skb)                         \
   12.11 +    do {                                              \
   12.12 +        atomic_set(&(skb_shinfo(_skb)->dataref), 1);  \
   12.13 +        skb_shinfo(_skb)->nr_frags = 0;               \
   12.14 +        skb_shinfo(_skb)->frag_list = NULL;           \
   12.15 +    } while ( 0 )
   12.16 +
   12.17 +/* Allow headroom on each rx pkt for Ethernet header, alignment padding, ... */
   12.18 +#define RX_HEADROOM 100
   12.19  
   12.20  /*
   12.21   * If the backend driver is pipelining transmit requests then we can be very
   12.22 @@ -249,7 +259,7 @@ static void network_tx_buf_gc(struct net
   12.23              id  = np->tx->ring[MASK_NETIF_TX_IDX(i)].resp.id;
   12.24              skb = np->tx_skbs[id];
   12.25              ADD_ID_TO_FREELIST(np->tx_skbs, id);
   12.26 -            dev_kfree_skb_any(skb);
   12.27 +            dev_kfree_skb_irq(skb);
   12.28          }
   12.29          
   12.30          np->tx_resp_cons = prod;
   12.31 @@ -292,7 +302,7 @@ static void network_alloc_rx_buffers(str
   12.32          return;
   12.33  
   12.34      do {
   12.35 -        if ( unlikely((skb = alloc_skb_page()) == NULL) )
   12.36 +        if ( unlikely((skb = alloc_xen_skb(dev->mtu + RX_HEADROOM)) == NULL) )
   12.37              break;
   12.38  
   12.39          skb->dev = dev;
   12.40 @@ -367,13 +377,14 @@ static int network_start_xmit(struct sk_
   12.41      if ( unlikely((((unsigned long)skb->data & ~PAGE_MASK) + skb->len) >=
   12.42                    PAGE_SIZE) )
   12.43      {
   12.44 -        struct sk_buff *new_skb;
   12.45 -        if ( unlikely((new_skb = alloc_skb_page()) == NULL) )
   12.46 +        struct sk_buff *nskb;
   12.47 +        if ( unlikely((nskb = alloc_xen_skb(skb->len)) == NULL) )
   12.48              goto drop;
   12.49 -        skb_put(new_skb, skb->len);
   12.50 -        memcpy(new_skb->data, skb->data, skb->len);
   12.51 +        skb_put(nskb, skb->len);
   12.52 +        memcpy(nskb->data, skb->data, skb->len);
   12.53 +        nskb->dev = skb->dev;
   12.54          dev_kfree_skb(skb);
   12.55 -        skb = new_skb;
   12.56 +        skb = nskb;
   12.57      }
   12.58      
   12.59      spin_lock_irq(&np->tx_lock);
   12.60 @@ -446,7 +457,7 @@ static irqreturn_t netif_int(int irq, vo
   12.61  static int netif_poll(struct net_device *dev, int *pbudget)
   12.62  {
   12.63      struct net_private *np = dev->priv;
   12.64 -    struct sk_buff *skb;
   12.65 +    struct sk_buff *skb, *nskb;
   12.66      netif_rx_response_t *rx;
   12.67      NETIF_RING_IDX i, rp;
   12.68      mmu_update_t *mmu = rx_mmu;
   12.69 @@ -494,8 +505,10 @@ static int netif_poll(struct net_device 
   12.70          skb = np->rx_skbs[rx->id];
   12.71          ADD_ID_TO_FREELIST(np->rx_skbs, rx->id);
   12.72  
   12.73 -        skb->data = skb->tail = skb->head + (rx->addr & ~PAGE_MASK);
   12.74 -        skb_put(skb, rx->status);
   12.75 +        /* NB. We handle skb overflow later. */
   12.76 +        skb->data = skb->head + (rx->addr & ~PAGE_MASK);
   12.77 +        skb->len  = rx->status;
   12.78 +        skb->tail = skb->data + skb->len;
   12.79  
   12.80          np->stats.rx_packets++;
   12.81          np->stats.rx_bytes += rx->status;
   12.82 @@ -529,10 +542,47 @@ static int netif_poll(struct net_device 
   12.83  
   12.84      while ( (skb = __skb_dequeue(&rxq)) != NULL )
   12.85      {
   12.86 +        /*
   12.87 +         * Enough room in skbuff for the data we were passed? Also, Linux 
   12.88 +         * expects at least 16 bytes headroom in each receive buffer.
   12.89 +         */
   12.90 +        if ( unlikely(skb->tail > skb->end) ||
   12.91 +             unlikely((skb->data - skb->head) < 16) )
   12.92 +        {
   12.93 +            nskb = NULL;
   12.94 +
   12.95 +            /* Only copy the packet if it fits in the current MTU. */
   12.96 +            if ( skb->len <= (dev->mtu + ETH_HLEN) )
   12.97 +            {
   12.98 +                if ( (skb->tail > skb->end) && net_ratelimit() )
   12.99 +                    printk(KERN_INFO "Received packet needs %d bytes more "
  12.100 +                           "headroom.\n", skb->tail - skb->end);
  12.101 +
  12.102 +                if ( (nskb = alloc_xen_skb(skb->len + 2)) != NULL )
  12.103 +                {
  12.104 +                    skb_reserve(nskb, 2);
  12.105 +                    skb_put(nskb, skb->len);
  12.106 +                    memcpy(nskb->data, skb->data, skb->len);
  12.107 +                    nskb->dev = skb->dev;
  12.108 +                }
  12.109 +            }
  12.110 +            else if ( net_ratelimit() )
  12.111 +                printk(KERN_INFO "Received packet too big for MTU "
  12.112 +                       "(%d > %d)\n", skb->len - ETH_HLEN, dev->mtu);
  12.113 +
  12.114 +            /* Reinitialise and then destroy the old skbuff. */
  12.115 +            skb->len  = 0;
  12.116 +            skb->tail = skb->data;
  12.117 +            init_skb_shinfo(skb);
  12.118 +            dev_kfree_skb(skb);
  12.119 +
  12.120 +            /* Switch old for new, if we copied the buffer. */
  12.121 +            if ( (skb = nskb) == NULL )
  12.122 +                continue;
  12.123 +        }
  12.124 +        
  12.125          /* Set the shared-info area, which is hidden behind the real data. */
  12.126 -        atomic_set(&(skb_shinfo(skb)->dataref), 1);
  12.127 -        skb_shinfo(skb)->nr_frags = 0;
  12.128 -        skb_shinfo(skb)->frag_list = NULL;
  12.129 +        init_skb_shinfo(skb);
  12.130  
  12.131          /* Ethernet-specific work. Delayed to here as it peeks the header. */
  12.132          skb->protocol = eth_type_trans(skb, dev);
  12.133 @@ -596,8 +646,8 @@ static void network_connect(struct net_d
  12.134      netif_tx_request_t *tx;
  12.135  
  12.136      np = dev->priv;
  12.137 -    spin_lock_irq(&np->rx_lock);
  12.138 -    spin_lock(&np->tx_lock);
  12.139 +    spin_lock_irq(&np->tx_lock);
  12.140 +    spin_lock(&np->rx_lock);
  12.141  
  12.142      /* Recovery procedure: */
  12.143  
  12.144 @@ -664,8 +714,8 @@ printk(KERN_ALERT"Netfront recovered tx=
  12.145      if ( np->user_state == UST_OPEN )
  12.146          netif_start_queue(dev);
  12.147  
  12.148 -    spin_unlock(&np->tx_lock);
  12.149 -    spin_unlock_irq(&np->rx_lock);
  12.150 +    spin_unlock(&np->rx_lock);
  12.151 +    spin_unlock_irq(&np->tx_lock);
  12.152  }
  12.153  
  12.154  static void netif_status_change(netif_fe_interface_status_changed_t *status)
    13.1 --- a/linux-2.6.8.1-xen-sparse/include/linux/skbuff.h	Wed Sep 22 11:02:20 2004 +0000
    13.2 +++ b/linux-2.6.8.1-xen-sparse/include/linux/skbuff.h	Wed Sep 22 21:31:14 2004 +0000
    13.3 @@ -293,6 +293,8 @@ struct sk_buff {
    13.4  
    13.5  extern void	       __kfree_skb(struct sk_buff *skb);
    13.6  extern struct sk_buff *alloc_skb(unsigned int size, int priority);
    13.7 +extern struct sk_buff *alloc_skb_from_cache(kmem_cache_t *cp,
    13.8 +					    unsigned int size, int priority);
    13.9  extern void	       kfree_skbmem(struct sk_buff *skb);
   13.10  extern struct sk_buff *skb_clone(struct sk_buff *skb, int priority);
   13.11  extern struct sk_buff *skb_copy(const struct sk_buff *skb, int priority);
    14.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    14.2 +++ b/linux-2.6.8.1-xen-sparse/net/core/skbuff.c	Wed Sep 22 21:31:14 2004 +0000
    14.3 @@ -0,0 +1,1521 @@
    14.4 +/*
    14.5 + *	Routines having to do with the 'struct sk_buff' memory handlers.
    14.6 + *
    14.7 + *	Authors:	Alan Cox <iiitac@pyr.swan.ac.uk>
    14.8 + *			Florian La Roche <rzsfl@rz.uni-sb.de>
    14.9 + *
   14.10 + *	Version:	$Id: skbuff.c,v 1.90 2001/11/07 05:56:19 davem Exp $
   14.11 + *
   14.12 + *	Fixes:
   14.13 + *		Alan Cox	:	Fixed the worst of the load
   14.14 + *					balancer bugs.
   14.15 + *		Dave Platt	:	Interrupt stacking fix.
   14.16 + *	Richard Kooijman	:	Timestamp fixes.
   14.17 + *		Alan Cox	:	Changed buffer format.
   14.18 + *		Alan Cox	:	destructor hook for AF_UNIX etc.
   14.19 + *		Linus Torvalds	:	Better skb_clone.
   14.20 + *		Alan Cox	:	Added skb_copy.
   14.21 + *		Alan Cox	:	Added all the changed routines Linus
   14.22 + *					only put in the headers
   14.23 + *		Ray VanTassle	:	Fixed --skb->lock in free
   14.24 + *		Alan Cox	:	skb_copy copy arp field
   14.25 + *		Andi Kleen	:	slabified it.
   14.26 + *		Robert Olsson	:	Removed skb_head_pool
   14.27 + *
   14.28 + *	NOTE:
   14.29 + *		The __skb_ routines should be called with interrupts
   14.30 + *	disabled, or you better be *real* sure that the operation is atomic
   14.31 + *	with respect to whatever list is being frobbed (e.g. via lock_sock()
   14.32 + *	or via disabling bottom half handlers, etc).
   14.33 + *
   14.34 + *	This program is free software; you can redistribute it and/or
   14.35 + *	modify it under the terms of the GNU General Public License
   14.36 + *	as published by the Free Software Foundation; either version
   14.37 + *	2 of the License, or (at your option) any later version.
   14.38 + */
   14.39 +
   14.40 +/*
   14.41 + *	The functions in this file will not compile correctly with gcc 2.4.x
   14.42 + */
   14.43 +
   14.44 +#include <linux/config.h>
   14.45 +#include <linux/module.h>
   14.46 +#include <linux/types.h>
   14.47 +#include <linux/kernel.h>
   14.48 +#include <linux/sched.h>
   14.49 +#include <linux/mm.h>
   14.50 +#include <linux/interrupt.h>
   14.51 +#include <linux/in.h>
   14.52 +#include <linux/inet.h>
   14.53 +#include <linux/slab.h>
   14.54 +#include <linux/netdevice.h>
   14.55 +#ifdef CONFIG_NET_CLS_ACT
   14.56 +#include <net/pkt_sched.h>
   14.57 +#endif
   14.58 +#include <linux/string.h>
   14.59 +#include <linux/skbuff.h>
   14.60 +#include <linux/cache.h>
   14.61 +#include <linux/rtnetlink.h>
   14.62 +#include <linux/init.h>
   14.63 +#include <linux/highmem.h>
   14.64 +
   14.65 +#include <net/protocol.h>
   14.66 +#include <net/dst.h>
   14.67 +#include <net/sock.h>
   14.68 +#include <net/checksum.h>
   14.69 +#include <net/xfrm.h>
   14.70 +
   14.71 +#include <asm/uaccess.h>
   14.72 +#include <asm/system.h>
   14.73 +
   14.74 +static kmem_cache_t *skbuff_head_cache;
   14.75 +
   14.76 +/*
   14.77 + *	Keep out-of-line to prevent kernel bloat.
   14.78 + *	__builtin_return_address is not used because it is not always
   14.79 + *	reliable.
   14.80 + */
   14.81 +
   14.82 +/**
   14.83 + *	skb_over_panic	- 	private function
   14.84 + *	@skb: buffer
   14.85 + *	@sz: size
   14.86 + *	@here: address
   14.87 + *
   14.88 + *	Out of line support code for skb_put(). Not user callable.
   14.89 + */
   14.90 +void skb_over_panic(struct sk_buff *skb, int sz, void *here)
   14.91 +{
   14.92 +	printk(KERN_INFO "skput:over: %p:%d put:%d dev:%s",
   14.93 +		here, skb->len, sz, skb->dev ? skb->dev->name : "<NULL>");
   14.94 +	BUG();
   14.95 +}
   14.96 +
   14.97 +/**
   14.98 + *	skb_under_panic	- 	private function
   14.99 + *	@skb: buffer
  14.100 + *	@sz: size
  14.101 + *	@here: address
  14.102 + *
  14.103 + *	Out of line support code for skb_push(). Not user callable.
  14.104 + */
  14.105 +
  14.106 +void skb_under_panic(struct sk_buff *skb, int sz, void *here)
  14.107 +{
  14.108 +	printk(KERN_INFO "skput:under: %p:%d put:%d dev:%s",
  14.109 +               here, skb->len, sz, skb->dev ? skb->dev->name : "<NULL>");
  14.110 +	BUG();
  14.111 +}
  14.112 +
  14.113 +/* 	Allocate a new skbuff. We do this ourselves so we can fill in a few
  14.114 + *	'private' fields and also do memory statistics to find all the
  14.115 + *	[BEEP] leaks.
  14.116 + *
  14.117 + */
  14.118 +
  14.119 +/**
  14.120 + *	alloc_skb	-	allocate a network buffer
  14.121 + *	@size: size to allocate
  14.122 + *	@gfp_mask: allocation mask
  14.123 + *
  14.124 + *	Allocate a new &sk_buff. The returned buffer has no headroom and a
  14.125 + *	tail room of size bytes. The object has a reference count of one.
  14.126 + *	The return is the buffer. On a failure the return is %NULL.
  14.127 + *
  14.128 + *	Buffers may only be allocated from interrupts using a @gfp_mask of
  14.129 + *	%GFP_ATOMIC.
  14.130 + */
  14.131 +struct sk_buff *alloc_skb(unsigned int size, int gfp_mask)
  14.132 +{
  14.133 +	struct sk_buff *skb;
  14.134 +	u8 *data;
  14.135 +
  14.136 +	/* Get the HEAD */
  14.137 +	skb = kmem_cache_alloc(skbuff_head_cache,
  14.138 +			       gfp_mask & ~__GFP_DMA);
  14.139 +	if (!skb)
  14.140 +		goto out;
  14.141 +
  14.142 +	/* Get the DATA. Size must match skb_add_mtu(). */
  14.143 +	size = SKB_DATA_ALIGN(size);
  14.144 +	data = kmalloc(size + sizeof(struct skb_shared_info), gfp_mask);
  14.145 +	if (!data)
  14.146 +		goto nodata;
  14.147 +
  14.148 +	memset(skb, 0, offsetof(struct sk_buff, truesize));
  14.149 +	skb->truesize = size + sizeof(struct sk_buff);
  14.150 +	atomic_set(&skb->users, 1);
  14.151 +	skb->head = data;
  14.152 +	skb->data = data;
  14.153 +	skb->tail = data;
  14.154 +	skb->end  = data + size;
  14.155 +
  14.156 +	atomic_set(&(skb_shinfo(skb)->dataref), 1);
  14.157 +	skb_shinfo(skb)->nr_frags  = 0;
  14.158 +	skb_shinfo(skb)->tso_size = 0;
  14.159 +	skb_shinfo(skb)->tso_segs = 0;
  14.160 +	skb_shinfo(skb)->frag_list = NULL;
  14.161 +out:
  14.162 +	return skb;
  14.163 +nodata:
  14.164 +	kmem_cache_free(skbuff_head_cache, skb);
  14.165 +	skb = NULL;
  14.166 +	goto out;
  14.167 +}
  14.168 +
  14.169 +/**
  14.170 + *	alloc_skb_from_cache	-	allocate a network buffer
  14.171 + *	@cp: kmem_cache from which to allocate the data area
  14.172 + *           (object size must be big enough for @size bytes + skb overheads)
  14.173 + *	@size: size to allocate
  14.174 + *	@gfp_mask: allocation mask
  14.175 + *
  14.176 + *	Allocate a new &sk_buff. The returned buffer has no headroom and a
  14.177 + *	tail room of size bytes. The object has a reference count of one.
  14.178 + *	The return is the buffer. On a failure the return is %NULL.
  14.179 + *
  14.180 + *	Buffers may only be allocated from interrupts using a @gfp_mask of
  14.181 + *	%GFP_ATOMIC.
  14.182 + */
  14.183 +struct sk_buff *alloc_skb_from_cache(kmem_cache_t *cp,
  14.184 +				     unsigned int size, int gfp_mask)
  14.185 +{
  14.186 +	struct sk_buff *skb;
  14.187 +	u8 *data;
  14.188 +
  14.189 +	/* Get the HEAD */
  14.190 +	skb = kmem_cache_alloc(skbuff_head_cache,
  14.191 +			       gfp_mask & ~__GFP_DMA);
  14.192 +	if (!skb)
  14.193 +		goto out;
  14.194 +
  14.195 +	/* Get the DATA. */
  14.196 +	size = SKB_DATA_ALIGN(size);
  14.197 +	data = kmem_cache_alloc(cp, gfp_mask);
  14.198 +	if (!data)
  14.199 +		goto nodata;
  14.200 +
  14.201 +	memset(skb, 0, offsetof(struct sk_buff, truesize));
  14.202 +	skb->truesize = size + sizeof(struct sk_buff);
  14.203 +	atomic_set(&skb->users, 1);
  14.204 +	skb->head = data;
  14.205 +	skb->data = data;
  14.206 +	skb->tail = data;
  14.207 +	skb->end  = data + size;
  14.208 +
  14.209 +	atomic_set(&(skb_shinfo(skb)->dataref), 1);
  14.210 +	skb_shinfo(skb)->nr_frags  = 0;
  14.211 +	skb_shinfo(skb)->tso_size = 0;
  14.212 +	skb_shinfo(skb)->tso_segs = 0;
  14.213 +	skb_shinfo(skb)->frag_list = NULL;
  14.214 +out:
  14.215 +	return skb;
  14.216 +nodata:
  14.217 +	kmem_cache_free(skbuff_head_cache, skb);
  14.218 +	skb = NULL;
  14.219 +	goto out;
  14.220 +}
  14.221 +
  14.222 +
  14.223 +static void skb_drop_fraglist(struct sk_buff *skb)
  14.224 +{
  14.225 +	struct sk_buff *list = skb_shinfo(skb)->frag_list;
  14.226 +
  14.227 +	skb_shinfo(skb)->frag_list = NULL;
  14.228 +
  14.229 +	do {
  14.230 +		struct sk_buff *this = list;
  14.231 +		list = list->next;
  14.232 +		kfree_skb(this);
  14.233 +	} while (list);
  14.234 +}
  14.235 +
  14.236 +static void skb_clone_fraglist(struct sk_buff *skb)
  14.237 +{
  14.238 +	struct sk_buff *list;
  14.239 +
  14.240 +	for (list = skb_shinfo(skb)->frag_list; list; list = list->next)
  14.241 +		skb_get(list);
  14.242 +}
  14.243 +
  14.244 +void skb_release_data(struct sk_buff *skb)
  14.245 +{
  14.246 +	if (!skb->cloned ||
  14.247 +	    atomic_dec_and_test(&(skb_shinfo(skb)->dataref))) {
  14.248 +		if (skb_shinfo(skb)->nr_frags) {
  14.249 +			int i;
  14.250 +			for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
  14.251 +				put_page(skb_shinfo(skb)->frags[i].page);
  14.252 +		}
  14.253 +
  14.254 +		if (skb_shinfo(skb)->frag_list)
  14.255 +			skb_drop_fraglist(skb);
  14.256 +
  14.257 +		kfree(skb->head);
  14.258 +	}
  14.259 +}
  14.260 +
  14.261 +/*
  14.262 + *	Free an skbuff by memory without cleaning the state.
  14.263 + */
  14.264 +void kfree_skbmem(struct sk_buff *skb)
  14.265 +{
  14.266 +	skb_release_data(skb);
  14.267 +	kmem_cache_free(skbuff_head_cache, skb);
  14.268 +}
  14.269 +
  14.270 +/**
  14.271 + *	__kfree_skb - private function
  14.272 + *	@skb: buffer
  14.273 + *
  14.274 + *	Free an sk_buff. Release anything attached to the buffer.
  14.275 + *	Clean the state. This is an internal helper function. Users should
  14.276 + *	always call kfree_skb
  14.277 + */
  14.278 +
  14.279 +void __kfree_skb(struct sk_buff *skb)
  14.280 +{
  14.281 +	if (skb->list) {
  14.282 +	 	printk(KERN_WARNING "Warning: kfree_skb passed an skb still "
  14.283 +		       "on a list (from %p).\n", NET_CALLER(skb));
  14.284 +		BUG();
  14.285 +	}
  14.286 +
  14.287 +	dst_release(skb->dst);
  14.288 +#ifdef CONFIG_XFRM
  14.289 +	secpath_put(skb->sp);
  14.290 +#endif
  14.291 +	if(skb->destructor) {
  14.292 +		if (in_irq())
  14.293 +			printk(KERN_WARNING "Warning: kfree_skb on "
  14.294 +					    "hard IRQ %p\n", NET_CALLER(skb));
  14.295 +		skb->destructor(skb);
  14.296 +	}
  14.297 +#ifdef CONFIG_NETFILTER
  14.298 +	nf_conntrack_put(skb->nfct);
  14.299 +#ifdef CONFIG_BRIDGE_NETFILTER
  14.300 +	nf_bridge_put(skb->nf_bridge);
  14.301 +#endif
  14.302 +#endif
  14.303 +/* XXX: IS this still necessary? - JHS */
  14.304 +#ifdef CONFIG_NET_SCHED
  14.305 +	skb->tc_index = 0;
  14.306 +#ifdef CONFIG_NET_CLS_ACT
  14.307 +	skb->tc_verd = 0;
  14.308 +	skb->tc_classid = 0;
  14.309 +#endif
  14.310 +#endif
  14.311 +
  14.312 +	kfree_skbmem(skb);
  14.313 +}
  14.314 +
  14.315 +/**
  14.316 + *	skb_clone	-	duplicate an sk_buff
  14.317 + *	@skb: buffer to clone
  14.318 + *	@gfp_mask: allocation priority
  14.319 + *
  14.320 + *	Duplicate an &sk_buff. The new one is not owned by a socket. Both
  14.321 + *	copies share the same packet data but not structure. The new
  14.322 + *	buffer has a reference count of 1. If the allocation fails the
  14.323 + *	function returns %NULL otherwise the new buffer is returned.
  14.324 + *
  14.325 + *	If this function is called from an interrupt gfp_mask() must be
  14.326 + *	%GFP_ATOMIC.
  14.327 + */
  14.328 +
  14.329 +struct sk_buff *skb_clone(struct sk_buff *skb, int gfp_mask)
  14.330 +{
  14.331 +	struct sk_buff *n = kmem_cache_alloc(skbuff_head_cache, gfp_mask);
  14.332 +
  14.333 +	if (!n) 
  14.334 +		return NULL;
  14.335 +
  14.336 +#define C(x) n->x = skb->x
  14.337 +
  14.338 +	n->next = n->prev = NULL;
  14.339 +	n->list = NULL;
  14.340 +	n->sk = NULL;
  14.341 +	C(stamp);
  14.342 +	C(dev);
  14.343 +	C(real_dev);
  14.344 +	C(h);
  14.345 +	C(nh);
  14.346 +	C(mac);
  14.347 +	C(dst);
  14.348 +	dst_clone(skb->dst);
  14.349 +	C(sp);
  14.350 +#ifdef CONFIG_INET
  14.351 +	secpath_get(skb->sp);
  14.352 +#endif
  14.353 +	memcpy(n->cb, skb->cb, sizeof(skb->cb));
  14.354 +	C(len);
  14.355 +	C(data_len);
  14.356 +	C(csum);
  14.357 +	C(local_df);
  14.358 +	n->cloned = 1;
  14.359 +	C(pkt_type);
  14.360 +	C(ip_summed);
  14.361 +	C(priority);
  14.362 +	C(protocol);
  14.363 +	C(security);
  14.364 +	n->destructor = NULL;
  14.365 +#ifdef CONFIG_NETFILTER
  14.366 +	C(nfmark);
  14.367 +	C(nfcache);
  14.368 +	C(nfct);
  14.369 +	nf_conntrack_get(skb->nfct);
  14.370 +#ifdef CONFIG_NETFILTER_DEBUG
  14.371 +	C(nf_debug);
  14.372 +#endif
  14.373 +#ifdef CONFIG_BRIDGE_NETFILTER
  14.374 +	C(nf_bridge);
  14.375 +	nf_bridge_get(skb->nf_bridge);
  14.376 +#endif
  14.377 +#endif /*CONFIG_NETFILTER*/
  14.378 +#if defined(CONFIG_HIPPI)
  14.379 +	C(private);
  14.380 +#endif
  14.381 +#ifdef CONFIG_NET_SCHED
  14.382 +	C(tc_index);
  14.383 +#ifdef CONFIG_NET_CLS_ACT
  14.384 +	n->tc_verd = SET_TC_VERD(skb->tc_verd,0);
  14.385 +	n->tc_verd = CLR_TC_OK2MUNGE(skb->tc_verd);
  14.386 +	n->tc_verd = CLR_TC_MUNGED(skb->tc_verd);
  14.387 +	C(input_dev);
  14.388 +	C(tc_classid);
  14.389 +#endif
  14.390 +
  14.391 +#endif
  14.392 +	C(truesize);
  14.393 +	atomic_set(&n->users, 1);
  14.394 +	C(head);
  14.395 +	C(data);
  14.396 +	C(tail);
  14.397 +	C(end);
  14.398 +
  14.399 +	atomic_inc(&(skb_shinfo(skb)->dataref));
  14.400 +	skb->cloned = 1;
  14.401 +
  14.402 +	return n;
  14.403 +}
  14.404 +
  14.405 +static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
  14.406 +{
  14.407 +	/*
  14.408 +	 *	Shift between the two data areas in bytes
  14.409 +	 */
  14.410 +	unsigned long offset = new->data - old->data;
  14.411 +
  14.412 +	new->list	= NULL;
  14.413 +	new->sk		= NULL;
  14.414 +	new->dev	= old->dev;
  14.415 +	new->real_dev	= old->real_dev;
  14.416 +	new->priority	= old->priority;
  14.417 +	new->protocol	= old->protocol;
  14.418 +	new->dst	= dst_clone(old->dst);
  14.419 +#ifdef CONFIG_INET
  14.420 +	new->sp		= secpath_get(old->sp);
  14.421 +#endif
  14.422 +	new->h.raw	= old->h.raw + offset;
  14.423 +	new->nh.raw	= old->nh.raw + offset;
  14.424 +	new->mac.raw	= old->mac.raw + offset;
  14.425 +	memcpy(new->cb, old->cb, sizeof(old->cb));
  14.426 +	new->local_df	= old->local_df;
  14.427 +	new->pkt_type	= old->pkt_type;
  14.428 +	new->stamp	= old->stamp;
  14.429 +	new->destructor = NULL;
  14.430 +	new->security	= old->security;
  14.431 +#ifdef CONFIG_NETFILTER
  14.432 +	new->nfmark	= old->nfmark;
  14.433 +	new->nfcache	= old->nfcache;
  14.434 +	new->nfct	= old->nfct;
  14.435 +	nf_conntrack_get(old->nfct);
  14.436 +#ifdef CONFIG_NETFILTER_DEBUG
  14.437 +	new->nf_debug	= old->nf_debug;
  14.438 +#endif
  14.439 +#ifdef CONFIG_BRIDGE_NETFILTER
  14.440 +	new->nf_bridge	= old->nf_bridge;
  14.441 +	nf_bridge_get(old->nf_bridge);
  14.442 +#endif
  14.443 +#endif
  14.444 +#ifdef CONFIG_NET_SCHED
  14.445 +#ifdef CONFIG_NET_CLS_ACT
  14.446 +	new->tc_verd = old->tc_verd;
  14.447 +#endif
  14.448 +	new->tc_index	= old->tc_index;
  14.449 +#endif
  14.450 +	atomic_set(&new->users, 1);
  14.451 +}
  14.452 +
  14.453 +/**
  14.454 + *	skb_copy	-	create private copy of an sk_buff
  14.455 + *	@skb: buffer to copy
  14.456 + *	@gfp_mask: allocation priority
  14.457 + *
  14.458 + *	Make a copy of both an &sk_buff and its data. This is used when the
  14.459 + *	caller wishes to modify the data and needs a private copy of the
  14.460 + *	data to alter. Returns %NULL on failure or the pointer to the buffer
  14.461 + *	on success. The returned buffer has a reference count of 1.
  14.462 + *
  14.463 + *	As by-product this function converts non-linear &sk_buff to linear
  14.464 + *	one, so that &sk_buff becomes completely private and caller is allowed
  14.465 + *	to modify all the data of returned buffer. This means that this
  14.466 + *	function is not recommended for use in circumstances when only
  14.467 + *	header is going to be modified. Use pskb_copy() instead.
  14.468 + */
  14.469 +
  14.470 +struct sk_buff *skb_copy(const struct sk_buff *skb, int gfp_mask)
  14.471 +{
  14.472 +	int headerlen = skb->data - skb->head;
  14.473 +	/*
  14.474 +	 *	Allocate the copy buffer
  14.475 +	 */
  14.476 +	struct sk_buff *n = alloc_skb(skb->end - skb->head + skb->data_len,
  14.477 +				      gfp_mask);
  14.478 +	if (!n)
  14.479 +		return NULL;
  14.480 +
  14.481 +	/* Set the data pointer */
  14.482 +	skb_reserve(n, headerlen);
  14.483 +	/* Set the tail pointer and length */
  14.484 +	skb_put(n, skb->len);
  14.485 +	n->csum	     = skb->csum;
  14.486 +	n->ip_summed = skb->ip_summed;
  14.487 +
  14.488 +	if (skb_copy_bits(skb, -headerlen, n->head, headerlen + skb->len))
  14.489 +		BUG();
  14.490 +
  14.491 +	copy_skb_header(n, skb);
  14.492 +	return n;
  14.493 +}
  14.494 +
  14.495 +
  14.496 +/**
  14.497 + *	pskb_copy	-	create copy of an sk_buff with private head.
  14.498 + *	@skb: buffer to copy
  14.499 + *	@gfp_mask: allocation priority
  14.500 + *
  14.501 + *	Make a copy of both an &sk_buff and part of its data, located
  14.502 + *	in header. Fragmented data remain shared. This is used when
  14.503 + *	the caller wishes to modify only header of &sk_buff and needs
  14.504 + *	private copy of the header to alter. Returns %NULL on failure
  14.505 + *	or the pointer to the buffer on success.
  14.506 + *	The returned buffer has a reference count of 1.
  14.507 + */
  14.508 +
  14.509 +struct sk_buff *pskb_copy(struct sk_buff *skb, int gfp_mask)
  14.510 +{
  14.511 +	/*
  14.512 +	 *	Allocate the copy buffer
  14.513 +	 */
  14.514 +	struct sk_buff *n = alloc_skb(skb->end - skb->head, gfp_mask);
  14.515 +
  14.516 +	if (!n)
  14.517 +		goto out;
  14.518 +
  14.519 +	/* Set the data pointer */
  14.520 +	skb_reserve(n, skb->data - skb->head);
  14.521 +	/* Set the tail pointer and length */
  14.522 +	skb_put(n, skb_headlen(skb));
  14.523 +	/* Copy the bytes */
  14.524 +	memcpy(n->data, skb->data, n->len);
  14.525 +	n->csum	     = skb->csum;
  14.526 +	n->ip_summed = skb->ip_summed;
  14.527 +
  14.528 +	n->data_len  = skb->data_len;
  14.529 +	n->len	     = skb->len;
  14.530 +
  14.531 +	if (skb_shinfo(skb)->nr_frags) {
  14.532 +		int i;
  14.533 +
  14.534 +		for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
  14.535 +			skb_shinfo(n)->frags[i] = skb_shinfo(skb)->frags[i];
  14.536 +			get_page(skb_shinfo(n)->frags[i].page);
  14.537 +		}
  14.538 +		skb_shinfo(n)->nr_frags = i;
  14.539 +	}
  14.540 +	skb_shinfo(n)->tso_size = skb_shinfo(skb)->tso_size;
  14.541 +	skb_shinfo(n)->tso_segs = skb_shinfo(skb)->tso_segs;
  14.542 +
  14.543 +	if (skb_shinfo(skb)->frag_list) {
  14.544 +		skb_shinfo(n)->frag_list = skb_shinfo(skb)->frag_list;
  14.545 +		skb_clone_fraglist(n);
  14.546 +	}
  14.547 +
  14.548 +	copy_skb_header(n, skb);
  14.549 +out:
  14.550 +	return n;
  14.551 +}
  14.552 +
  14.553 +/**
  14.554 + *	pskb_expand_head - reallocate header of &sk_buff
  14.555 + *	@skb: buffer to reallocate
  14.556 + *	@nhead: room to add at head
  14.557 + *	@ntail: room to add at tail
  14.558 + *	@gfp_mask: allocation priority
  14.559 + *
  14.560 + *	Expands (or creates identical copy, if &nhead and &ntail are zero)
  14.561 + *	header of skb. &sk_buff itself is not changed. &sk_buff MUST have
  14.562 + *	reference count of 1. Returns zero in the case of success or error,
  14.563 + *	if expansion failed. In the last case, &sk_buff is not changed.
  14.564 + *
  14.565 + *	All the pointers pointing into skb header may change and must be
  14.566 + *	reloaded after call to this function.
  14.567 + */
  14.568 +
  14.569 +int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail, int gfp_mask)
  14.570 +{
  14.571 +	int i;
  14.572 +	u8 *data;
  14.573 +	int size = nhead + (skb->end - skb->head) + ntail;
  14.574 +	long off;
  14.575 +
  14.576 +	if (skb_shared(skb))
  14.577 +		BUG();
  14.578 +
  14.579 +	size = SKB_DATA_ALIGN(size);
  14.580 +
  14.581 +	data = kmalloc(size + sizeof(struct skb_shared_info), gfp_mask);
  14.582 +	if (!data)
  14.583 +		goto nodata;
  14.584 +
  14.585 +	/* Copy only real data... and, alas, header. This should be
  14.586 +	 * optimized for the cases when header is void. */
  14.587 +	memcpy(data + nhead, skb->head, skb->tail - skb->head);
  14.588 +	memcpy(data + size, skb->end, sizeof(struct skb_shared_info));
  14.589 +
  14.590 +	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
  14.591 +		get_page(skb_shinfo(skb)->frags[i].page);
  14.592 +
  14.593 +	if (skb_shinfo(skb)->frag_list)
  14.594 +		skb_clone_fraglist(skb);
  14.595 +
  14.596 +	skb_release_data(skb);
  14.597 +
  14.598 +	off = (data + nhead) - skb->head;
  14.599 +
  14.600 +	skb->head     = data;
  14.601 +	skb->end      = data + size;
  14.602 +	skb->data    += off;
  14.603 +	skb->tail    += off;
  14.604 +	skb->mac.raw += off;
  14.605 +	skb->h.raw   += off;
  14.606 +	skb->nh.raw  += off;
  14.607 +	skb->cloned   = 0;
  14.608 +	atomic_set(&skb_shinfo(skb)->dataref, 1);
  14.609 +	return 0;
  14.610 +
  14.611 +nodata:
  14.612 +	return -ENOMEM;
  14.613 +}
  14.614 +
  14.615 +/* Make private copy of skb with writable head and some headroom */
  14.616 +
  14.617 +struct sk_buff *skb_realloc_headroom(struct sk_buff *skb, unsigned int headroom)
  14.618 +{
  14.619 +	struct sk_buff *skb2;
  14.620 +	int delta = headroom - skb_headroom(skb);
  14.621 +
  14.622 +	if (delta <= 0)
  14.623 +		skb2 = pskb_copy(skb, GFP_ATOMIC);
  14.624 +	else {
  14.625 +		skb2 = skb_clone(skb, GFP_ATOMIC);
  14.626 +		if (skb2 && pskb_expand_head(skb2, SKB_DATA_ALIGN(delta), 0,
  14.627 +					     GFP_ATOMIC)) {
  14.628 +			kfree_skb(skb2);
  14.629 +			skb2 = NULL;
  14.630 +		}
  14.631 +	}
  14.632 +	return skb2;
  14.633 +}
  14.634 +
  14.635 +
  14.636 +/**
  14.637 + *	skb_copy_expand	-	copy and expand sk_buff
  14.638 + *	@skb: buffer to copy
  14.639 + *	@newheadroom: new free bytes at head
  14.640 + *	@newtailroom: new free bytes at tail
  14.641 + *	@gfp_mask: allocation priority
  14.642 + *
  14.643 + *	Make a copy of both an &sk_buff and its data and while doing so
  14.644 + *	allocate additional space.
  14.645 + *
  14.646 + *	This is used when the caller wishes to modify the data and needs a
  14.647 + *	private copy of the data to alter as well as more space for new fields.
  14.648 + *	Returns %NULL on failure or the pointer to the buffer
  14.649 + *	on success. The returned buffer has a reference count of 1.
  14.650 + *
  14.651 + *	You must pass %GFP_ATOMIC as the allocation priority if this function
  14.652 + *	is called from an interrupt.
  14.653 + *
  14.654 + *	BUG ALERT: ip_summed is not copied. Why does this work? Is it used
  14.655 + *	only by netfilter in the cases when checksum is recalculated? --ANK
  14.656 + */
  14.657 +struct sk_buff *skb_copy_expand(const struct sk_buff *skb,
  14.658 +				int newheadroom, int newtailroom, int gfp_mask)
  14.659 +{
  14.660 +	/*
  14.661 +	 *	Allocate the copy buffer
  14.662 +	 */
  14.663 +	struct sk_buff *n = alloc_skb(newheadroom + skb->len + newtailroom,
  14.664 +				      gfp_mask);
  14.665 +	int head_copy_len, head_copy_off;
  14.666 +
  14.667 +	if (!n)
  14.668 +		return NULL;
  14.669 +
  14.670 +	skb_reserve(n, newheadroom);
  14.671 +
  14.672 +	/* Set the tail pointer and length */
  14.673 +	skb_put(n, skb->len);
  14.674 +
  14.675 +	head_copy_len = skb_headroom(skb);
  14.676 +	head_copy_off = 0;
  14.677 +	if (newheadroom <= head_copy_len)
  14.678 +		head_copy_len = newheadroom;
  14.679 +	else
  14.680 +		head_copy_off = newheadroom - head_copy_len;
  14.681 +
  14.682 +	/* Copy the linear header and data. */
  14.683 +	if (skb_copy_bits(skb, -head_copy_len, n->head + head_copy_off,
  14.684 +			  skb->len + head_copy_len))
  14.685 +		BUG();
  14.686 +
  14.687 +	copy_skb_header(n, skb);
  14.688 +	skb_shinfo(n)->tso_size = skb_shinfo(skb)->tso_size;
  14.689 +	skb_shinfo(n)->tso_segs = skb_shinfo(skb)->tso_segs;
  14.690 +
  14.691 +	return n;
  14.692 +}
  14.693 +
  14.694 +/**
  14.695 + *	skb_pad			-	zero pad the tail of an skb
  14.696 + *	@skb: buffer to pad
  14.697 + *	@pad: space to pad
  14.698 + *
  14.699 + *	Ensure that a buffer is followed by a padding area that is zero
  14.700 + *	filled. Used by network drivers which may DMA or transfer data
  14.701 + *	beyond the buffer end onto the wire.
  14.702 + *
  14.703 + *	May return NULL in out of memory cases.
  14.704 + */
  14.705 + 
  14.706 +struct sk_buff *skb_pad(struct sk_buff *skb, int pad)
  14.707 +{
  14.708 +	struct sk_buff *nskb;
  14.709 +	
  14.710 +	/* If the skbuff is non linear tailroom is always zero.. */
  14.711 +	if (skb_tailroom(skb) >= pad) {
  14.712 +		memset(skb->data+skb->len, 0, pad);
  14.713 +		return skb;
  14.714 +	}
  14.715 +	
  14.716 +	nskb = skb_copy_expand(skb, skb_headroom(skb), skb_tailroom(skb) + pad, GFP_ATOMIC);
  14.717 +	kfree_skb(skb);
  14.718 +	if (nskb)
  14.719 +		memset(nskb->data+nskb->len, 0, pad);
  14.720 +	return nskb;
  14.721 +}	
  14.722 + 
  14.723 +/* Trims skb to length len. It can change skb pointers, if "realloc" is 1.
  14.724 + * If realloc==0 and trimming is impossible without change of data,
  14.725 + * it is BUG().
  14.726 + */
  14.727 +
  14.728 +int ___pskb_trim(struct sk_buff *skb, unsigned int len, int realloc)
  14.729 +{
  14.730 +	int offset = skb_headlen(skb);
  14.731 +	int nfrags = skb_shinfo(skb)->nr_frags;
  14.732 +	int i;
  14.733 +
  14.734 +	for (i = 0; i < nfrags; i++) {
  14.735 +		int end = offset + skb_shinfo(skb)->frags[i].size;
  14.736 +		if (end > len) {
  14.737 +			if (skb_cloned(skb)) {
  14.738 +				if (!realloc)
  14.739 +					BUG();
  14.740 +				if (pskb_expand_head(skb, 0, 0, GFP_ATOMIC))
  14.741 +					return -ENOMEM;
  14.742 +			}
  14.743 +			if (len <= offset) {
  14.744 +				put_page(skb_shinfo(skb)->frags[i].page);
  14.745 +				skb_shinfo(skb)->nr_frags--;
  14.746 +			} else {
  14.747 +				skb_shinfo(skb)->frags[i].size = len - offset;
  14.748 +			}
  14.749 +		}
  14.750 +		offset = end;
  14.751 +	}
  14.752 +
  14.753 +	if (offset < len) {
  14.754 +		skb->data_len -= skb->len - len;
  14.755 +		skb->len       = len;
  14.756 +	} else {
  14.757 +		if (len <= skb_headlen(skb)) {
  14.758 +			skb->len      = len;
  14.759 +			skb->data_len = 0;
  14.760 +			skb->tail     = skb->data + len;
  14.761 +			if (skb_shinfo(skb)->frag_list && !skb_cloned(skb))
  14.762 +				skb_drop_fraglist(skb);
  14.763 +		} else {
  14.764 +			skb->data_len -= skb->len - len;
  14.765 +			skb->len       = len;
  14.766 +		}
  14.767 +	}
  14.768 +
  14.769 +	return 0;
  14.770 +}
  14.771 +
  14.772 +/**
  14.773 + *	__pskb_pull_tail - advance tail of skb header
  14.774 + *	@skb: buffer to reallocate
  14.775 + *	@delta: number of bytes to advance tail
  14.776 + *
  14.777 + *	The function makes a sense only on a fragmented &sk_buff,
  14.778 + *	it expands header moving its tail forward and copying necessary
  14.779 + *	data from fragmented part.
  14.780 + *
  14.781 + *	&sk_buff MUST have reference count of 1.
  14.782 + *
  14.783 + *	Returns %NULL (and &sk_buff does not change) if pull failed
  14.784 + *	or value of new tail of skb in the case of success.
  14.785 + *
  14.786 + *	All the pointers pointing into skb header may change and must be
  14.787 + *	reloaded after call to this function.
  14.788 + */
  14.789 +
  14.790 +/* Moves tail of skb head forward, copying data from fragmented part,
  14.791 + * when it is necessary.
  14.792 + * 1. It may fail due to malloc failure.
  14.793 + * 2. It may change skb pointers.
  14.794 + *
  14.795 + * It is pretty complicated. Luckily, it is called only in exceptional cases.
  14.796 + */
  14.797 +unsigned char *__pskb_pull_tail(struct sk_buff *skb, int delta)
  14.798 +{
  14.799 +	/* If skb has not enough free space at tail, get new one
  14.800 +	 * plus 128 bytes for future expansions. If we have enough
  14.801 +	 * room at tail, reallocate without expansion only if skb is cloned.
  14.802 +	 */
  14.803 +	int i, k, eat = (skb->tail + delta) - skb->end;
  14.804 +
  14.805 +	if (eat > 0 || skb_cloned(skb)) {
  14.806 +		if (pskb_expand_head(skb, 0, eat > 0 ? eat + 128 : 0,
  14.807 +				     GFP_ATOMIC))
  14.808 +			return NULL;
  14.809 +	}
  14.810 +
  14.811 +	if (skb_copy_bits(skb, skb_headlen(skb), skb->tail, delta))
  14.812 +		BUG();
  14.813 +
  14.814 +	/* Optimization: no fragments, no reasons to preestimate
  14.815 +	 * size of pulled pages. Superb.
  14.816 +	 */
  14.817 +	if (!skb_shinfo(skb)->frag_list)
  14.818 +		goto pull_pages;
  14.819 +
  14.820 +	/* Estimate size of pulled pages. */
  14.821 +	eat = delta;
  14.822 +	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
  14.823 +		if (skb_shinfo(skb)->frags[i].size >= eat)
  14.824 +			goto pull_pages;
  14.825 +		eat -= skb_shinfo(skb)->frags[i].size;
  14.826 +	}
  14.827 +
  14.828 +	/* If we need update frag list, we are in troubles.
  14.829 +	 * Certainly, it possible to add an offset to skb data,
  14.830 +	 * but taking into account that pulling is expected to
  14.831 +	 * be very rare operation, it is worth to fight against
  14.832 +	 * further bloating skb head and crucify ourselves here instead.
  14.833 +	 * Pure masohism, indeed. 8)8)
  14.834 +	 */
  14.835 +	if (eat) {
  14.836 +		struct sk_buff *list = skb_shinfo(skb)->frag_list;
  14.837 +		struct sk_buff *clone = NULL;
  14.838 +		struct sk_buff *insp = NULL;
  14.839 +
  14.840 +		do {
  14.841 +			if (!list)
  14.842 +				BUG();
  14.843 +
  14.844 +			if (list->len <= eat) {
  14.845 +				/* Eaten as whole. */
  14.846 +				eat -= list->len;
  14.847 +				list = list->next;
  14.848 +				insp = list;
  14.849 +			} else {
  14.850 +				/* Eaten partially. */
  14.851 +
  14.852 +				if (skb_shared(list)) {
  14.853 +					/* Sucks! We need to fork list. :-( */
  14.854 +					clone = skb_clone(list, GFP_ATOMIC);
  14.855 +					if (!clone)
  14.856 +						return NULL;
  14.857 +					insp = list->next;
  14.858 +					list = clone;
  14.859 +				} else {
  14.860 +					/* This may be pulled without
  14.861 +					 * problems. */
  14.862 +					insp = list;
  14.863 +				}
  14.864 +				if (!pskb_pull(list, eat)) {
  14.865 +					if (clone)
  14.866 +						kfree_skb(clone);
  14.867 +					return NULL;
  14.868 +				}
  14.869 +				break;
  14.870 +			}
  14.871 +		} while (eat);
  14.872 +
  14.873 +		/* Free pulled out fragments. */
  14.874 +		while ((list = skb_shinfo(skb)->frag_list) != insp) {
  14.875 +			skb_shinfo(skb)->frag_list = list->next;
  14.876 +			kfree_skb(list);
  14.877 +		}
  14.878 +		/* And insert new clone at head. */
  14.879 +		if (clone) {
  14.880 +			clone->next = list;
  14.881 +			skb_shinfo(skb)->frag_list = clone;
  14.882 +		}
  14.883 +	}
  14.884 +	/* Success! Now we may commit changes to skb data. */
  14.885 +
  14.886 +pull_pages:
  14.887 +	eat = delta;
  14.888 +	k = 0;
  14.889 +	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
  14.890 +		if (skb_shinfo(skb)->frags[i].size <= eat) {
  14.891 +			put_page(skb_shinfo(skb)->frags[i].page);
  14.892 +			eat -= skb_shinfo(skb)->frags[i].size;
  14.893 +		} else {
  14.894 +			skb_shinfo(skb)->frags[k] = skb_shinfo(skb)->frags[i];
  14.895 +			if (eat) {
  14.896 +				skb_shinfo(skb)->frags[k].page_offset += eat;
  14.897 +				skb_shinfo(skb)->frags[k].size -= eat;
  14.898 +				eat = 0;
  14.899 +			}
  14.900 +			k++;
  14.901 +		}
  14.902 +	}
  14.903 +	skb_shinfo(skb)->nr_frags = k;
  14.904 +
  14.905 +	skb->tail     += delta;
  14.906 +	skb->data_len -= delta;
  14.907 +
  14.908 +	return skb->tail;
  14.909 +}
  14.910 +
  14.911 +/* Copy some data bits from skb to kernel buffer. */
  14.912 +
  14.913 +int skb_copy_bits(const struct sk_buff *skb, int offset, void *to, int len)
  14.914 +{
  14.915 +	int i, copy;
  14.916 +	int start = skb_headlen(skb);
  14.917 +
  14.918 +	if (offset > (int)skb->len - len)
  14.919 +		goto fault;
  14.920 +
  14.921 +	/* Copy header. */
  14.922 +	if ((copy = start - offset) > 0) {
  14.923 +		if (copy > len)
  14.924 +			copy = len;
  14.925 +		memcpy(to, skb->data + offset, copy);
  14.926 +		if ((len -= copy) == 0)
  14.927 +			return 0;
  14.928 +		offset += copy;
  14.929 +		to     += copy;
  14.930 +	}
  14.931 +
  14.932 +	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
  14.933 +		int end;
  14.934 +
  14.935 +		BUG_TRAP(start <= offset + len);
  14.936 +
  14.937 +		end = start + skb_shinfo(skb)->frags[i].size;
  14.938 +		if ((copy = end - offset) > 0) {
  14.939 +			u8 *vaddr;
  14.940 +
  14.941 +			if (copy > len)
  14.942 +				copy = len;
  14.943 +
  14.944 +			vaddr = kmap_skb_frag(&skb_shinfo(skb)->frags[i]);
  14.945 +			memcpy(to,
  14.946 +			       vaddr + skb_shinfo(skb)->frags[i].page_offset+
  14.947 +			       offset - start, copy);
  14.948 +			kunmap_skb_frag(vaddr);
  14.949 +
  14.950 +			if ((len -= copy) == 0)
  14.951 +				return 0;
  14.952 +			offset += copy;
  14.953 +			to     += copy;
  14.954 +		}
  14.955 +		start = end;
  14.956 +	}
  14.957 +
  14.958 +	if (skb_shinfo(skb)->frag_list) {
  14.959 +		struct sk_buff *list = skb_shinfo(skb)->frag_list;
  14.960 +
  14.961 +		for (; list; list = list->next) {
  14.962 +			int end;
  14.963 +
  14.964 +			BUG_TRAP(start <= offset + len);
  14.965 +
  14.966 +			end = start + list->len;
  14.967 +			if ((copy = end - offset) > 0) {
  14.968 +				if (copy > len)
  14.969 +					copy = len;
  14.970 +				if (skb_copy_bits(list, offset - start,
  14.971 +						  to, copy))
  14.972 +					goto fault;
  14.973 +				if ((len -= copy) == 0)
  14.974 +					return 0;
  14.975 +				offset += copy;
  14.976 +				to     += copy;
  14.977 +			}
  14.978 +			start = end;
  14.979 +		}
  14.980 +	}
  14.981 +	if (!len)
  14.982 +		return 0;
  14.983 +
  14.984 +fault:
  14.985 +	return -EFAULT;
  14.986 +}
  14.987 +
  14.988 +/* Keep iterating until skb_iter_next returns false. */
  14.989 +void skb_iter_first(const struct sk_buff *skb, struct skb_iter *i)
  14.990 +{
  14.991 +	i->len = skb_headlen(skb);
  14.992 +	i->data = (unsigned char *)skb->data;
  14.993 +	i->nextfrag = 0;
  14.994 +	i->fraglist = NULL;
  14.995 +}
  14.996 +
  14.997 +int skb_iter_next(const struct sk_buff *skb, struct skb_iter *i)
  14.998 +{
  14.999 +	/* Unmap previous, if not head fragment. */
 14.1000 +	if (i->nextfrag)
 14.1001 +		kunmap_skb_frag(i->data);
 14.1002 +
 14.1003 +	if (i->fraglist) {
 14.1004 +	fraglist:
 14.1005 +		/* We're iterating through fraglist. */
 14.1006 +		if (i->nextfrag < skb_shinfo(i->fraglist)->nr_frags) {
 14.1007 +			i->data = kmap_skb_frag(&skb_shinfo(i->fraglist)
 14.1008 +						->frags[i->nextfrag]);
 14.1009 +			i->len = skb_shinfo(i->fraglist)->frags[i->nextfrag]
 14.1010 +				.size;
 14.1011 +			i->nextfrag++;
 14.1012 +			return 1;
 14.1013 +		}
 14.1014 +		/* Fragments with fragments?  Too hard! */
 14.1015 +		BUG_ON(skb_shinfo(i->fraglist)->frag_list);
 14.1016 +		i->fraglist = i->fraglist->next;
 14.1017 +		if (!i->fraglist)
 14.1018 +			goto end;
 14.1019 +
 14.1020 +		i->len = skb_headlen(i->fraglist);
 14.1021 +		i->data = i->fraglist->data;
 14.1022 +		i->nextfrag = 0;
 14.1023 +		return 1;
 14.1024 +	}
 14.1025 +
 14.1026 +	if (i->nextfrag < skb_shinfo(skb)->nr_frags) {
 14.1027 +		i->data = kmap_skb_frag(&skb_shinfo(skb)->frags[i->nextfrag]);
 14.1028 +		i->len = skb_shinfo(skb)->frags[i->nextfrag].size;
 14.1029 +		i->nextfrag++;
 14.1030 +		return 1;
 14.1031 +	}
 14.1032 +
 14.1033 +	i->fraglist = skb_shinfo(skb)->frag_list;
 14.1034 +	if (i->fraglist)
 14.1035 +		goto fraglist;
 14.1036 +
 14.1037 +end:
 14.1038 +	/* Bug trap for callers */
 14.1039 +	i->data = NULL;
 14.1040 +	return 0;
 14.1041 +}
 14.1042 +
 14.1043 +void skb_iter_abort(const struct sk_buff *skb, struct skb_iter *i)
 14.1044 +{
 14.1045 +	/* Unmap previous, if not head fragment. */
 14.1046 +	if (i->data && i->nextfrag)
 14.1047 +		kunmap_skb_frag(i->data);
 14.1048 +	/* Bug trap for callers */
 14.1049 +	i->data = NULL;
 14.1050 +}
 14.1051 +
 14.1052 +/* Checksum skb data. */
 14.1053 +
 14.1054 +unsigned int skb_checksum(const struct sk_buff *skb, int offset,
 14.1055 +			  int len, unsigned int csum)
 14.1056 +{
 14.1057 +	int start = skb_headlen(skb);
 14.1058 +	int i, copy = start - offset;
 14.1059 +	int pos = 0;
 14.1060 +
 14.1061 +	/* Checksum header. */
 14.1062 +	if (copy > 0) {
 14.1063 +		if (copy > len)
 14.1064 +			copy = len;
 14.1065 +		csum = csum_partial(skb->data + offset, copy, csum);
 14.1066 +		if ((len -= copy) == 0)
 14.1067 +			return csum;
 14.1068 +		offset += copy;
 14.1069 +		pos	= copy;
 14.1070 +	}
 14.1071 +
 14.1072 +	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
 14.1073 +		int end;
 14.1074 +
 14.1075 +		BUG_TRAP(start <= offset + len);
 14.1076 +
 14.1077 +		end = start + skb_shinfo(skb)->frags[i].size;
 14.1078 +		if ((copy = end - offset) > 0) {
 14.1079 +			unsigned int csum2;
 14.1080 +			u8 *vaddr;
 14.1081 +			skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
 14.1082 +
 14.1083 +			if (copy > len)
 14.1084 +				copy = len;
 14.1085 +			vaddr = kmap_skb_frag(frag);
 14.1086 +			csum2 = csum_partial(vaddr + frag->page_offset +
 14.1087 +					     offset - start, copy, 0);
 14.1088 +			kunmap_skb_frag(vaddr);
 14.1089 +			csum = csum_block_add(csum, csum2, pos);
 14.1090 +			if (!(len -= copy))
 14.1091 +				return csum;
 14.1092 +			offset += copy;
 14.1093 +			pos    += copy;
 14.1094 +		}
 14.1095 +		start = end;
 14.1096 +	}
 14.1097 +
 14.1098 +	if (skb_shinfo(skb)->frag_list) {
 14.1099 +		struct sk_buff *list = skb_shinfo(skb)->frag_list;
 14.1100 +
 14.1101 +		for (; list; list = list->next) {
 14.1102 +			int end;
 14.1103 +
 14.1104 +			BUG_TRAP(start <= offset + len);
 14.1105 +
 14.1106 +			end = start + list->len;
 14.1107 +			if ((copy = end - offset) > 0) {
 14.1108 +				unsigned int csum2;
 14.1109 +				if (copy > len)
 14.1110 +					copy = len;
 14.1111 +				csum2 = skb_checksum(list, offset - start,
 14.1112 +						     copy, 0);
 14.1113 +				csum = csum_block_add(csum, csum2, pos);
 14.1114 +				if ((len -= copy) == 0)
 14.1115 +					return csum;
 14.1116 +				offset += copy;
 14.1117 +				pos    += copy;
 14.1118 +			}
 14.1119 +			start = end;
 14.1120 +		}
 14.1121 +	}
 14.1122 +	if (len)
 14.1123 +		BUG();
 14.1124 +
 14.1125 +	return csum;
 14.1126 +}
 14.1127 +
 14.1128 +/* Both of above in one bottle. */
 14.1129 +
 14.1130 +unsigned int skb_copy_and_csum_bits(const struct sk_buff *skb, int offset,
 14.1131 +				    u8 *to, int len, unsigned int csum)
 14.1132 +{
 14.1133 +	int start = skb_headlen(skb);
 14.1134 +	int i, copy = start - offset;
 14.1135 +	int pos = 0;
 14.1136 +
 14.1137 +	/* Copy header. */
 14.1138 +	if (copy > 0) {
 14.1139 +		if (copy > len)
 14.1140 +			copy = len;
 14.1141 +		csum = csum_partial_copy_nocheck(skb->data + offset, to,
 14.1142 +						 copy, csum);
 14.1143 +		if ((len -= copy) == 0)
 14.1144 +			return csum;
 14.1145 +		offset += copy;
 14.1146 +		to     += copy;
 14.1147 +		pos	= copy;
 14.1148 +	}
 14.1149 +
 14.1150 +	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
 14.1151 +		int end;
 14.1152 +
 14.1153 +		BUG_TRAP(start <= offset + len);
 14.1154 +
 14.1155 +		end = start + skb_shinfo(skb)->frags[i].size;
 14.1156 +		if ((copy = end - offset) > 0) {
 14.1157 +			unsigned int csum2;
 14.1158 +			u8 *vaddr;
 14.1159 +			skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
 14.1160 +
 14.1161 +			if (copy > len)
 14.1162 +				copy = len;
 14.1163 +			vaddr = kmap_skb_frag(frag);
 14.1164 +			csum2 = csum_partial_copy_nocheck(vaddr +
 14.1165 +							  frag->page_offset +
 14.1166 +							  offset - start, to,
 14.1167 +							  copy, 0);
 14.1168 +			kunmap_skb_frag(vaddr);
 14.1169 +			csum = csum_block_add(csum, csum2, pos);
 14.1170 +			if (!(len -= copy))
 14.1171 +				return csum;
 14.1172 +			offset += copy;
 14.1173 +			to     += copy;
 14.1174 +			pos    += copy;
 14.1175 +		}
 14.1176 +		start = end;
 14.1177 +	}
 14.1178 +
 14.1179 +	if (skb_shinfo(skb)->frag_list) {
 14.1180 +		struct sk_buff *list = skb_shinfo(skb)->frag_list;
 14.1181 +
 14.1182 +		for (; list; list = list->next) {
 14.1183 +			unsigned int csum2;
 14.1184 +			int end;
 14.1185 +
 14.1186 +			BUG_TRAP(start <= offset + len);
 14.1187 +
 14.1188 +			end = start + list->len;
 14.1189 +			if ((copy = end - offset) > 0) {
 14.1190 +				if (copy > len)
 14.1191 +					copy = len;
 14.1192 +				csum2 = skb_copy_and_csum_bits(list,
 14.1193 +							       offset - start,
 14.1194 +							       to, copy, 0);
 14.1195 +				csum = csum_block_add(csum, csum2, pos);
 14.1196 +				if ((len -= copy) == 0)
 14.1197 +					return csum;
 14.1198 +				offset += copy;
 14.1199 +				to     += copy;
 14.1200 +				pos    += copy;
 14.1201 +			}
 14.1202 +			start = end;
 14.1203 +		}
 14.1204 +	}
 14.1205 +	if (len)
 14.1206 +		BUG();
 14.1207 +	return csum;
 14.1208 +}
 14.1209 +
 14.1210 +void skb_copy_and_csum_dev(const struct sk_buff *skb, u8 *to)
 14.1211 +{
 14.1212 +	unsigned int csum;
 14.1213 +	long csstart;
 14.1214 +
 14.1215 +	if (skb->ip_summed == CHECKSUM_HW)
 14.1216 +		csstart = skb->h.raw - skb->data;
 14.1217 +	else
 14.1218 +		csstart = skb_headlen(skb);
 14.1219 +
 14.1220 +	if (csstart > skb_headlen(skb))
 14.1221 +		BUG();
 14.1222 +
 14.1223 +	memcpy(to, skb->data, csstart);
 14.1224 +
 14.1225 +	csum = 0;
 14.1226 +	if (csstart != skb->len)
 14.1227 +		csum = skb_copy_and_csum_bits(skb, csstart, to + csstart,
 14.1228 +					      skb->len - csstart, 0);
 14.1229 +
 14.1230 +	if (skb->ip_summed == CHECKSUM_HW) {
 14.1231 +		long csstuff = csstart + skb->csum;
 14.1232 +
 14.1233 +		*((unsigned short *)(to + csstuff)) = csum_fold(csum);
 14.1234 +	}
 14.1235 +}
 14.1236 +
 14.1237 +/**
 14.1238 + *	skb_dequeue - remove from the head of the queue
 14.1239 + *	@list: list to dequeue from
 14.1240 + *
 14.1241 + *	Remove the head of the list. The list lock is taken so the function
 14.1242 + *	may be used safely with other locking list functions. The head item is
 14.1243 + *	returned or %NULL if the list is empty.
 14.1244 + */
 14.1245 +
 14.1246 +struct sk_buff *skb_dequeue(struct sk_buff_head *list)
 14.1247 +{
 14.1248 +	unsigned long flags;
 14.1249 +	struct sk_buff *result;
 14.1250 +
 14.1251 +	spin_lock_irqsave(&list->lock, flags);
 14.1252 +	result = __skb_dequeue(list);
 14.1253 +	spin_unlock_irqrestore(&list->lock, flags);
 14.1254 +	return result;
 14.1255 +}
 14.1256 +
 14.1257 +/**
 14.1258 + *	skb_dequeue_tail - remove from the tail of the queue
 14.1259 + *	@list: list to dequeue from
 14.1260 + *
 14.1261 + *	Remove the tail of the list. The list lock is taken so the function
 14.1262 + *	may be used safely with other locking list functions. The tail item is
 14.1263 + *	returned or %NULL if the list is empty.
 14.1264 + */
 14.1265 +struct sk_buff *skb_dequeue_tail(struct sk_buff_head *list)
 14.1266 +{
 14.1267 +	unsigned long flags;
 14.1268 +	struct sk_buff *result;
 14.1269 +
 14.1270 +	spin_lock_irqsave(&list->lock, flags);
 14.1271 +	result = __skb_dequeue_tail(list);
 14.1272 +	spin_unlock_irqrestore(&list->lock, flags);
 14.1273 +	return result;
 14.1274 +}
 14.1275 +
 14.1276 +/**
 14.1277 + *	skb_queue_purge - empty a list
 14.1278 + *	@list: list to empty
 14.1279 + *
 14.1280 + *	Delete all buffers on an &sk_buff list. Each buffer is removed from
 14.1281 + *	the list and one reference dropped. This function takes the list
 14.1282 + *	lock and is atomic with respect to other list locking functions.
 14.1283 + */
 14.1284 +void skb_queue_purge(struct sk_buff_head *list)
 14.1285 +{
 14.1286 +	struct sk_buff *skb;
 14.1287 +	while ((skb = skb_dequeue(list)) != NULL)
 14.1288 +		kfree_skb(skb);
 14.1289 +}
 14.1290 +
 14.1291 +/**
 14.1292 + *	skb_queue_head - queue a buffer at the list head
 14.1293 + *	@list: list to use
 14.1294 + *	@newsk: buffer to queue
 14.1295 + *
 14.1296 + *	Queue a buffer at the start of the list. This function takes the
 14.1297 + *	list lock and can be used safely with other locking &sk_buff functions
 14.1298 + *	safely.
 14.1299 + *
 14.1300 + *	A buffer cannot be placed on two lists at the same time.
 14.1301 + */
 14.1302 +void skb_queue_head(struct sk_buff_head *list, struct sk_buff *newsk)
 14.1303 +{
 14.1304 +	unsigned long flags;
 14.1305 +
 14.1306 +	spin_lock_irqsave(&list->lock, flags);
 14.1307 +	__skb_queue_head(list, newsk);
 14.1308 +	spin_unlock_irqrestore(&list->lock, flags);
 14.1309 +}
 14.1310 +
 14.1311 +/**
 14.1312 + *	skb_queue_tail - queue a buffer at the list tail
 14.1313 + *	@list: list to use
 14.1314 + *	@newsk: buffer to queue
 14.1315 + *
 14.1316 + *	Queue a buffer at the tail of the list. This function takes the
 14.1317 + *	list lock and can be used safely with other locking &sk_buff functions
 14.1318 + *	safely.
 14.1319 + *
 14.1320 + *	A buffer cannot be placed on two lists at the same time.
 14.1321 + */
 14.1322 +void skb_queue_tail(struct sk_buff_head *list, struct sk_buff *newsk)
 14.1323 +{
 14.1324 +	unsigned long flags;
 14.1325 +
 14.1326 +	spin_lock_irqsave(&list->lock, flags);
 14.1327 +	__skb_queue_tail(list, newsk);
 14.1328 +	spin_unlock_irqrestore(&list->lock, flags);
 14.1329 +}
 14.1330 +/**
 14.1331 + *	skb_unlink	-	remove a buffer from a list
 14.1332 + *	@skb: buffer to remove
 14.1333 + *
 14.1334 + *	Place a packet after a given packet in a list. The list locks are taken
 14.1335 + *	and this function is atomic with respect to other list locked calls
 14.1336 + *
 14.1337 + *	Works even without knowing the list it is sitting on, which can be
 14.1338 + *	handy at times. It also means that THE LIST MUST EXIST when you
 14.1339 + *	unlink. Thus a list must have its contents unlinked before it is
 14.1340 + *	destroyed.
 14.1341 + */
 14.1342 +void skb_unlink(struct sk_buff *skb)
 14.1343 +{
 14.1344 +	struct sk_buff_head *list = skb->list;
 14.1345 +
 14.1346 +	if (list) {
 14.1347 +		unsigned long flags;
 14.1348 +
 14.1349 +		spin_lock_irqsave(&list->lock, flags);
 14.1350 +		if (skb->list == list)
 14.1351 +			__skb_unlink(skb, skb->list);
 14.1352 +		spin_unlock_irqrestore(&list->lock, flags);
 14.1353 +	}
 14.1354 +}
 14.1355 +
 14.1356 +
 14.1357 +/**
 14.1358 + *	skb_append	-	append a buffer
 14.1359 + *	@old: buffer to insert after
 14.1360 + *	@newsk: buffer to insert
 14.1361 + *
 14.1362 + *	Place a packet after a given packet in a list. The list locks are taken
 14.1363 + *	and this function is atomic with respect to other list locked calls.
 14.1364 + *	A buffer cannot be placed on two lists at the same time.
 14.1365 + */
 14.1366 +
 14.1367 +void skb_append(struct sk_buff *old, struct sk_buff *newsk)
 14.1368 +{
 14.1369 +	unsigned long flags;
 14.1370 +
 14.1371 +	spin_lock_irqsave(&old->list->lock, flags);
 14.1372 +	__skb_append(old, newsk);
 14.1373 +	spin_unlock_irqrestore(&old->list->lock, flags);
 14.1374 +}
 14.1375 +
 14.1376 +
 14.1377 +/**
 14.1378 + *	skb_insert	-	insert a buffer
 14.1379 + *	@old: buffer to insert before
 14.1380 + *	@newsk: buffer to insert
 14.1381 + *
 14.1382 + *	Place a packet before a given packet in a list. The list locks are taken
 14.1383 + *	and this function is atomic with respect to other list locked calls
 14.1384 + *	A buffer cannot be placed on two lists at the same time.
 14.1385 + */
 14.1386 +
 14.1387 +void skb_insert(struct sk_buff *old, struct sk_buff *newsk)
 14.1388 +{
 14.1389 +	unsigned long flags;
 14.1390 +
 14.1391 +	spin_lock_irqsave(&old->list->lock, flags);
 14.1392 +	__skb_insert(newsk, old->prev, old, old->list);
 14.1393 +	spin_unlock_irqrestore(&old->list->lock, flags);
 14.1394 +}
 14.1395 +
 14.1396 +#if 0
 14.1397 +/*
 14.1398 + * 	Tune the memory allocator for a new MTU size.
 14.1399 + */
 14.1400 +void skb_add_mtu(int mtu)
 14.1401 +{
 14.1402 +	/* Must match allocation in alloc_skb */
 14.1403 +	mtu = SKB_DATA_ALIGN(mtu) + sizeof(struct skb_shared_info);
 14.1404 +
 14.1405 +	kmem_add_cache_size(mtu);
 14.1406 +}
 14.1407 +#endif
 14.1408 +
 14.1409 +static void inline skb_split_inside_header(struct sk_buff *skb,
 14.1410 +					   struct sk_buff* skb1,
 14.1411 +					   const u32 len, const int pos)
 14.1412 +{
 14.1413 +	int i;
 14.1414 +
 14.1415 +	memcpy(skb_put(skb1, pos - len), skb->data + len, pos - len);
 14.1416 +
 14.1417 +	/* And move data appendix as is. */
 14.1418 +	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
 14.1419 +		skb_shinfo(skb1)->frags[i] = skb_shinfo(skb)->frags[i];
 14.1420 +
 14.1421 +	skb_shinfo(skb1)->nr_frags = skb_shinfo(skb)->nr_frags;
 14.1422 +	skb_shinfo(skb)->nr_frags  = 0;
 14.1423 +	skb1->data_len		   = skb->data_len;
 14.1424 +	skb1->len		   += skb1->data_len;
 14.1425 +	skb->data_len		   = 0;
 14.1426 +	skb->len		   = len;
 14.1427 +	skb->tail		   = skb->data + len;
 14.1428 +}
 14.1429 +
 14.1430 +static void inline skb_split_no_header(struct sk_buff *skb,
 14.1431 +				       struct sk_buff* skb1,
 14.1432 +				       const u32 len, int pos)
 14.1433 +{
 14.1434 +	int i, k = 0;
 14.1435 +	const int nfrags = skb_shinfo(skb)->nr_frags;
 14.1436 +
 14.1437 +	skb_shinfo(skb)->nr_frags = 0;
 14.1438 +	skb1->len		  = skb1->data_len = skb->len - len;
 14.1439 +	skb->len		  = len;
 14.1440 +	skb->data_len		  = len - pos;
 14.1441 +
 14.1442 +	for (i = 0; i < nfrags; i++) {
 14.1443 +		int size = skb_shinfo(skb)->frags[i].size;
 14.1444 +
 14.1445 +		if (pos + size > len) {
 14.1446 +			skb_shinfo(skb1)->frags[k] = skb_shinfo(skb)->frags[i];
 14.1447 +
 14.1448 +			if (pos < len) {
 14.1449 +				/* Split frag.
 14.1450 +				 * We have to variants in this case:
 14.1451 +				 * 1. Move all the frag to the second
 14.1452 +				 *    part, if it is possible. F.e.
 14.1453 +				 *    this approach is mandatory for TUX,
 14.1454 +				 *    where splitting is expensive.
 14.1455 +				 * 2. Split is accurately. We make this.
 14.1456 +				 */
 14.1457 +				get_page(skb_shinfo(skb)->frags[i].page);
 14.1458 +				skb_shinfo(skb1)->frags[0].page_offset += len - pos;
 14.1459 +				skb_shinfo(skb1)->frags[0].size -= len - pos;
 14.1460 +				skb_shinfo(skb)->frags[i].size	= len - pos;
 14.1461 +				skb_shinfo(skb)->nr_frags++;
 14.1462 +			}
 14.1463 +			k++;
 14.1464 +		} else
 14.1465 +			skb_shinfo(skb)->nr_frags++;
 14.1466 +		pos += size;
 14.1467 +	}
 14.1468 +	skb_shinfo(skb1)->nr_frags = k;
 14.1469 +}
 14.1470 +
 14.1471 +/**
 14.1472 + * skb_split - Split fragmented skb to two parts at length len.
 14.1473 + */
 14.1474 +void skb_split(struct sk_buff *skb, struct sk_buff *skb1, const u32 len)
 14.1475 +{
 14.1476 +	int pos = skb_headlen(skb);
 14.1477 +
 14.1478 +	if (len < pos)	/* Split line is inside header. */
 14.1479 +		skb_split_inside_header(skb, skb1, len, pos);
 14.1480 +	else		/* Second chunk has no header, nothing to copy. */
 14.1481 +		skb_split_no_header(skb, skb1, len, pos);
 14.1482 +}
 14.1483 +
 14.1484 +void __init skb_init(void)
 14.1485 +{
 14.1486 +	skbuff_head_cache = kmem_cache_create("skbuff_head_cache",
 14.1487 +					      sizeof(struct sk_buff),
 14.1488 +					      0,
 14.1489 +					      SLAB_HWCACHE_ALIGN,
 14.1490 +					      NULL, NULL);
 14.1491 +	if (!skbuff_head_cache)
 14.1492 +		panic("cannot create skbuff cache");
 14.1493 +}
 14.1494 +
 14.1495 +EXPORT_SYMBOL(___pskb_trim);
 14.1496 +EXPORT_SYMBOL(__kfree_skb);
 14.1497 +EXPORT_SYMBOL(__pskb_pull_tail);
 14.1498 +EXPORT_SYMBOL(alloc_skb);
 14.1499 +EXPORT_SYMBOL(pskb_copy);
 14.1500 +EXPORT_SYMBOL(pskb_expand_head);
 14.1501 +EXPORT_SYMBOL(skb_checksum);
 14.1502 +EXPORT_SYMBOL(skb_clone);
 14.1503 +EXPORT_SYMBOL(skb_clone_fraglist);
 14.1504 +EXPORT_SYMBOL(skb_copy);
 14.1505 +EXPORT_SYMBOL(skb_copy_and_csum_bits);
 14.1506 +EXPORT_SYMBOL(skb_copy_and_csum_dev);
 14.1507 +EXPORT_SYMBOL(skb_copy_bits);
 14.1508 +EXPORT_SYMBOL(skb_copy_expand);
 14.1509 +EXPORT_SYMBOL(skb_over_panic);
 14.1510 +EXPORT_SYMBOL(skb_pad);
 14.1511 +EXPORT_SYMBOL(skb_realloc_headroom);
 14.1512 +EXPORT_SYMBOL(skb_under_panic);
 14.1513 +EXPORT_SYMBOL(skb_dequeue);
 14.1514 +EXPORT_SYMBOL(skb_dequeue_tail);
 14.1515 +EXPORT_SYMBOL(skb_insert);
 14.1516 +EXPORT_SYMBOL(skb_queue_purge);
 14.1517 +EXPORT_SYMBOL(skb_queue_head);
 14.1518 +EXPORT_SYMBOL(skb_queue_tail);
 14.1519 +EXPORT_SYMBOL(skb_unlink);
 14.1520 +EXPORT_SYMBOL(skb_append);
 14.1521 +EXPORT_SYMBOL(skb_split);
 14.1522 +EXPORT_SYMBOL(skb_iter_first);
 14.1523 +EXPORT_SYMBOL(skb_iter_next);
 14.1524 +EXPORT_SYMBOL(skb_iter_abort);
    15.1 --- a/netbsd-2.0-xen-sparse/nbconfig-xen	Wed Sep 22 11:02:20 2004 +0000
    15.2 +++ b/netbsd-2.0-xen-sparse/nbconfig-xen	Wed Sep 22 21:31:14 2004 +0000
    15.3 @@ -5,6 +5,7 @@
    15.4  : ${HARCH:=$(uname -i)}
    15.5  : ${NETBSD_RELEASE:=$(basename $(cd $(dirname $0) && pwd) | sed 's/netbsd-\([0-9]\+\.[0-9]\+\).*/\1/')}
    15.6  : ${NETBSD_VERSION:=$(basename $(cd $(dirname $0) && pwd) | sed 's/netbsd-\([0-9]\+\.[0-9]\+.*\)-xen.*/\1/')}
    15.7 +: ${TOPDIR:=$(cd $(dirname $0) && pwd | sed 's/\(netbsd-[0-9]\+\.[0-9]\+.*-xen[^/]*\)/\1/')}
    15.8  
    15.9  TOOLDIR="$TOPDIR/../netbsd-${NETBSD_RELEASE}-tools/$HOS-$HARCH"; export TOOLDIR
   15.10  
    16.1 --- a/netbsd-2.0-xen-sparse/nbmake-xen	Wed Sep 22 11:02:20 2004 +0000
    16.2 +++ b/netbsd-2.0-xen-sparse/nbmake-xen	Wed Sep 22 21:31:14 2004 +0000
    16.3 @@ -7,6 +7,7 @@
    16.4  : ${HARCH:=$(uname -i)}
    16.5  : ${NETBSD_RELEASE:=$(basename $(cd $(dirname $0) && pwd) | sed 's/netbsd-\([0-9]\+\.[0-9]\+\).*/\1/')}
    16.6  : ${NETBSD_VERSION:=$(basename $(cd $(dirname $0) && pwd) | sed 's/netbsd-\([0-9]\+\.[0-9]\+.*\)-xen.*/\1/')}
    16.7 +: ${TOPDIR:=$(cd $(dirname $0) && pwd | sed 's/\(netbsd-[0-9]\+\.[0-9]\+.*-xen[^/]*\)/\1/')}
    16.8  
    16.9  NETBSDSRCDIR="$TOPDIR"; export NETBSDSRCDIR
   16.10  DESTDIR="$TOPDIR/root"; export DESTDIR
    17.1 --- a/netbsd-2.0-xen-sparse/sys/arch/xen/conf/XEN	Wed Sep 22 11:02:20 2004 +0000
    17.2 +++ b/netbsd-2.0-xen-sparse/sys/arch/xen/conf/XEN	Wed Sep 22 21:31:14 2004 +0000
    17.3 @@ -13,7 +13,6 @@ maxusers	32		# estimated number of users
    17.4  #
    17.5  options		XEN
    17.6  #options		DOM0OPS
    17.7 -options		HZ=50
    17.8  
    17.9  #options 	I586_CPU
   17.10  options 	I686_CPU
    18.1 --- a/netbsd-2.0-xen-sparse/sys/arch/xen/i386/hypervisor_machdep.c	Wed Sep 22 11:02:20 2004 +0000
    18.2 +++ b/netbsd-2.0-xen-sparse/sys/arch/xen/i386/hypervisor_machdep.c	Wed Sep 22 21:31:14 2004 +0000
    18.3 @@ -151,7 +151,7 @@ stipending()
    18.4  	return (ret);
    18.5  }
    18.6  
    18.7 -void do_hypervisor_callback(struct trapframe *regs)
    18.8 +void do_hypervisor_callback(struct intrframe *regs)
    18.9  {
   18.10  	uint32_t l1;
   18.11  	unsigned long l2;
    19.1 --- a/netbsd-2.0-xen-sparse/sys/arch/xen/i386/locore.S	Wed Sep 22 11:02:20 2004 +0000
    19.2 +++ b/netbsd-2.0-xen-sparse/sys/arch/xen/i386/locore.S	Wed Sep 22 21:31:14 2004 +0000
    19.3 @@ -1567,6 +1567,7 @@ idle_zero:
    19.4  	pushl	$IPL_NONE
    19.5  	call	_C_LABEL(Xspllower)
    19.6  	addl	$4,%esp
    19.7 +	jmp	idle_start
    19.8  4:
    19.9  	call	_C_LABEL(uvm_pageidlezero)
   19.10  	CLI(%eax)
   19.11 @@ -1577,6 +1578,9 @@ idle_loop:
   19.12  	movl	_C_LABEL(uvm)+UVM_PAGE_IDLE_ZERO,%ecx
   19.13  	testl	%ecx,%ecx
   19.14  	jnz	idle_zero
   19.15 +	call	_C_LABEL(idle_block)
   19.16 +	cmpl	$0,_C_LABEL(sched_whichqs)
   19.17 +	jnz	idle_exit
   19.18  	STIC(%eax)
   19.19      	jz	4f
   19.20  	call	_C_LABEL(stipending)
    20.1 --- a/netbsd-2.0-xen-sparse/sys/arch/xen/i386/vector.S	Wed Sep 22 11:02:20 2004 +0000
    20.2 +++ b/netbsd-2.0-xen-sparse/sys/arch/xen/i386/vector.S	Wed Sep 22 21:31:14 2004 +0000
    20.3 @@ -1347,9 +1347,10 @@ ENTRY(hypervisor_callback)
    20.4          jb   11f
    20.5          cmpl $ecrit,%eax
    20.6          jb   critical_region_fixup
    20.7 -11:     push %esp
    20.8 +11:     pushl CPUVAR(ILEVEL)
    20.9 +        push %esp
   20.10          call do_hypervisor_callback
   20.11 -        add  $4,%esp
   20.12 +        add  $8,%esp
   20.13          movl HYPERVISOR_shared_info,%esi
   20.14          xorl %eax,%eax
   20.15          movb TF_CS(%esp),%cl
    21.1 --- a/netbsd-2.0-xen-sparse/sys/arch/xen/include/evtchn.h	Wed Sep 22 11:02:20 2004 +0000
    21.2 +++ b/netbsd-2.0-xen-sparse/sys/arch/xen/include/evtchn.h	Wed Sep 22 21:31:14 2004 +0000
    21.3 @@ -43,7 +43,7 @@ typedef int (*ev_handler_t)(void *);
    21.4  
    21.5  void events_default_setup(void);
    21.6  void init_events(void);
    21.7 -unsigned int do_event(int, struct trapframe *);
    21.8 +unsigned int do_event(int, struct intrframe *);
    21.9  int event_set_handler(int, ev_handler_t, void *, int);
   21.10  
   21.11  int bind_virq_to_irq(int);
    22.1 --- a/netbsd-2.0-xen-sparse/sys/arch/xen/include/hypervisor.h	Wed Sep 22 11:02:20 2004 +0000
    22.2 +++ b/netbsd-2.0-xen-sparse/sys/arch/xen/include/hypervisor.h	Wed Sep 22 21:31:14 2004 +0000
    22.3 @@ -83,7 +83,7 @@ extern union start_info_union start_info
    22.4  
    22.5  
    22.6  /* hypervisor.c */
    22.7 -void do_hypervisor_callback(struct trapframe *regs);
    22.8 +void do_hypervisor_callback(struct intrframe *regs);
    22.9  void hypervisor_notify_via_evtchn(unsigned int);
   22.10  void hypervisor_enable_irq(unsigned int);
   22.11  void hypervisor_disable_irq(unsigned int);
    23.1 --- a/netbsd-2.0-xen-sparse/sys/arch/xen/include/xen.h	Wed Sep 22 11:02:20 2004 +0000
    23.2 +++ b/netbsd-2.0-xen-sparse/sys/arch/xen/include/xen.h	Wed Sep 22 21:31:14 2004 +0000
    23.3 @@ -54,6 +54,8 @@ void	xenmachmem_init(void);
    23.4  void	xenprivcmd_init(void);
    23.5  void	xenvfr_init(void);
    23.6  
    23.7 +void	idle_block(void);
    23.8 +
    23.9  #ifdef XENDEBUG
   23.10  void printk(const char *, ...);
   23.11  void vprintk(const char *, va_list);
    24.1 --- a/netbsd-2.0-xen-sparse/sys/arch/xen/xen/clock.c	Wed Sep 22 11:02:20 2004 +0000
    24.2 +++ b/netbsd-2.0-xen-sparse/sys/arch/xen/xen/clock.c	Wed Sep 22 21:31:14 2004 +0000
    24.3 @@ -51,16 +51,20 @@
    24.4  
    24.5  #include "config_time.h"		/* for CONFIG_TIME */
    24.6  
    24.7 -static int xen_timer_handler(void *, struct trapframe *);
    24.8 +static int xen_timer_handler(void *, struct intrframe *);
    24.9  
   24.10  /* These are peridically updated in shared_info, and then copied here. */
   24.11 -static unsigned long shadow_tsc_stamp;
   24.12 -static u_int64_t shadow_system_time;
   24.13 +static uint64_t shadow_tsc_stamp;
   24.14 +static uint64_t shadow_system_time;
   24.15  static unsigned long shadow_time_version;
   24.16  static struct timeval shadow_tv;
   24.17  
   24.18  static int timeset;
   24.19  
   24.20 +static uint64_t processed_system_time;
   24.21 +
   24.22 +#define NS_PER_TICK (1000000000ULL/hz)
   24.23 +
   24.24  /*
   24.25   * Reads a consistent set of time-base values from Xen, into a shadow data
   24.26   * area.  Must be called at splclock.
   24.27 @@ -79,6 +83,16 @@ get_time_values_from_xen(void)
   24.28  	} while (shadow_time_version != HYPERVISOR_shared_info->time_version1);
   24.29  }
   24.30  
   24.31 +static uint64_t
   24.32 +get_tsc_offset_ns(void)
   24.33 +{
   24.34 +	uint32_t tsc_delta;
   24.35 +	struct cpu_info *ci = curcpu();
   24.36 +
   24.37 +	tsc_delta = cpu_counter32() - shadow_tsc_stamp;
   24.38 +	return tsc_delta * 1000000000 / cpu_frequency(ci);
   24.39 +}
   24.40 +
   24.41  void
   24.42  inittodr(time_t base)
   24.43  {
   24.44 @@ -190,14 +204,19 @@ xen_initclocks()
   24.45  {
   24.46  	int irq = bind_virq_to_irq(VIRQ_TIMER);
   24.47  
   24.48 +	get_time_values_from_xen();
   24.49 +	processed_system_time = shadow_system_time;
   24.50 +
   24.51  	event_set_handler(irq, (int (*)(void *))xen_timer_handler,
   24.52  	    NULL, IPL_CLOCK);
   24.53  	hypervisor_enable_irq(irq);
   24.54  }
   24.55  
   24.56  static int
   24.57 -xen_timer_handler(void *arg, struct trapframe *regs)
   24.58 +xen_timer_handler(void *arg, struct intrframe *regs)
   24.59  {
   24.60 +	int64_t delta;
   24.61 +
   24.62  #if defined(I586_CPU) || defined(I686_CPU)
   24.63  	static int microset_iter; /* call cc_microset once/sec */
   24.64  	struct cpu_info *ci = curcpu();
   24.65 @@ -223,7 +242,13 @@ xen_timer_handler(void *arg, struct trap
   24.66  
   24.67  	get_time_values_from_xen();
   24.68  
   24.69 -	hardclock((struct clockframe *)regs);
   24.70 +	delta = (int64_t)(shadow_system_time + get_tsc_offset_ns() -
   24.71 +			  processed_system_time);
   24.72 +	while (delta >= NS_PER_TICK) {
   24.73 +		hardclock(regs);
   24.74 +		delta -= NS_PER_TICK;
   24.75 +		processed_system_time += NS_PER_TICK;
   24.76 +	}
   24.77  
   24.78  	return 0;
   24.79  }
   24.80 @@ -232,3 +257,17 @@ void
   24.81  setstatclockrate(int arg)
   24.82  {
   24.83  }
   24.84 +
   24.85 +void
   24.86 +idle_block(void)
   24.87 +{
   24.88 +
   24.89 +	/*
   24.90 +	 * We set the timer to when we expect the next timer
   24.91 +	 * interrupt.  We could set the timer to later if we could
   24.92 +	 * easily find out when we will have more work (callouts) to
   24.93 +	 * process from hardclock.
   24.94 +	 */
   24.95 +	if (HYPERVISOR_set_timer_op(processed_system_time + NS_PER_TICK) == 0)
   24.96 +		HYPERVISOR_block();
   24.97 +}
    25.1 --- a/netbsd-2.0-xen-sparse/sys/arch/xen/xen/evtchn.c	Wed Sep 22 11:02:20 2004 +0000
    25.2 +++ b/netbsd-2.0-xen-sparse/sys/arch/xen/xen/evtchn.c	Wed Sep 22 21:31:14 2004 +0000
    25.3 @@ -123,9 +123,13 @@ init_events()
    25.4  }
    25.5  
    25.6  unsigned int
    25.7 -do_event(int irq, struct trapframe *regs)
    25.8 +do_event(int irq, struct intrframe *regs)
    25.9  {
   25.10  	struct cpu_info *ci;
   25.11 +	int ilevel;
   25.12 +	struct intrhand *ih;
   25.13 +	int	(*ih_fun)(void *, void *);
   25.14 +	extern struct uvmexp uvmexp;
   25.15  
   25.16  	if (irq >= NR_IRQS) {
   25.17  #ifdef DIAGNOSTIC
   25.18 @@ -144,15 +148,46 @@ do_event(int irq, struct trapframe *regs
   25.19  
   25.20  	hypervisor_acknowledge_irq(irq);
   25.21  	if (ci->ci_isources[irq] == NULL) {
   25.22 +		hypervisor_enable_irq(irq);
   25.23 +		return 0;
   25.24 +	}
   25.25 +	ilevel = ci->ci_ilevel;
   25.26 +	if (ci->ci_isources[irq]->is_maxlevel <= ilevel) {
   25.27 +		ci->ci_ipending |= 1 << irq;
   25.28 +		/* leave masked */
   25.29  		return 0;
   25.30  	}
   25.31 -	__asm__ __volatile__ (
   25.32 -		"   movl $1f,%%esi	;"
   25.33 -		"   jmp  *%%eax		;"
   25.34 -		"1:			"
   25.35 -		: : "a" (ci->ci_isources[irq]->is_recurse),
   25.36 -		"b" (ci->ci_ilevel)
   25.37 -		: "esi", "ecx", "edx", "memory");
   25.38 +	uvmexp.intrs++;
   25.39 +	ci->ci_isources[irq]->is_evcnt.ev_count++;
   25.40 +	ci->ci_ilevel = ci->ci_isources[irq]->is_maxlevel;
   25.41 +	/* sti */
   25.42 +	ci->ci_idepth++;
   25.43 +#ifdef MULTIPROCESSOR
   25.44 +	x86_intlock(regs);
   25.45 +#endif
   25.46 +	ih = ci->ci_isources[irq]->is_handlers;
   25.47 +	while (ih != NULL) {
   25.48 +		if (ih->ih_level <= ilevel) {
   25.49 +#ifdef MULTIPROCESSOR
   25.50 +			x86_intunlock(regs);
   25.51 +#endif
   25.52 +			ci->ci_ipending |= 1 << irq;
   25.53 +			/* leave masked */
   25.54 +			ci->ci_idepth--;
   25.55 +			splx(ilevel);
   25.56 +			return 0;
   25.57 +		}
   25.58 +		ci->ci_ilevel = ih->ih_level;
   25.59 +		ih_fun = (void *)ih->ih_fun;
   25.60 +		ih_fun(ih->ih_arg, regs);
   25.61 +		ih = ih->ih_next;
   25.62 +	}
   25.63 +#ifdef MULTIPROCESSOR
   25.64 +	x86_intunlock(regs);
   25.65 +#endif
   25.66 +	hypervisor_enable_irq(irq);
   25.67 +	ci->ci_idepth--;
   25.68 +	splx(ilevel);
   25.69  
   25.70  	if (0 && irq == 4)
   25.71  		printf("do_event %d done, ipending %08x\n", irq,
    26.1 --- a/xen/arch/x86/domain.c	Wed Sep 22 11:02:20 2004 +0000
    26.2 +++ b/xen/arch/x86/domain.c	Wed Sep 22 21:31:14 2004 +0000
    26.3 @@ -862,7 +862,7 @@ int construct_dom0(struct domain *p,
    26.4      zap_low_mappings(); /* Do the same for the idle page tables. */
    26.5      
    26.6      /* Give up the VGA console if DOM0 is configured to grab it. */
    26.7 -    console_endboot(strstr(cmdline, "tty0") != NULL);
    26.8 +    console_endboot(cmdline && strstr(cmdline, "tty0"));
    26.9  
   26.10      /* DOM0 gets access to everything. */
   26.11      physdev_init_dom0(p);
    27.1 --- a/xen/common/kernel.c	Wed Sep 22 11:02:20 2004 +0000
    27.2 +++ b/xen/common/kernel.c	Wed Sep 22 21:31:14 2004 +0000
    27.3 @@ -148,7 +148,7 @@ void cmain(multiboot_info_t *mbi)
    27.4      {
    27.5          unsigned char *opt_end, *opt;
    27.6          while ( *cmdline == ' ' ) cmdline++;
    27.7 -        cmdline = strchr(cmdline, ' ');
    27.8 +        cmdline = strchr(cmdline, ' '); /* skip the image name */
    27.9          while ( cmdline != NULL )
   27.10          {
   27.11              while ( *cmdline == ' ' ) cmdline++;
   27.12 @@ -326,6 +326,15 @@ void cmain(multiboot_info_t *mbi)
   27.13  
   27.14      shadow_mode_init();
   27.15  
   27.16 +    /* Grab the DOM0 command line. Skip past the image name. */
   27.17 +    cmdline = (unsigned char *)(mod[0].string ? __va(mod[0].string) : NULL);
   27.18 +    if ( cmdline != NULL )
   27.19 +    {
   27.20 +        while ( *cmdline == ' ' ) cmdline++;
   27.21 +        if ( (cmdline = strchr(cmdline, ' ')) != NULL )
   27.22 +            while ( *cmdline == ' ' ) cmdline++;
   27.23 +    }
   27.24 +
   27.25      /*
   27.26       * We're going to setup domain0 using the module(s) that we stashed safely
   27.27       * above our heap. The second module, if present, is an initrd ramdisk.
   27.28 @@ -338,7 +347,7 @@ void cmain(multiboot_info_t *mbi)
   27.29                          (mod[1].mod_start-mod[0].mod_start),
   27.30                          (mbi->mods_count == 1) ? 0 :
   27.31                          mod[mbi->mods_count-1].mod_end - mod[1].mod_start,
   27.32 -                        __va(mod[0].string)) != 0)
   27.33 +                        cmdline) != 0)
   27.34          panic("Could not set up DOM0 guest OS\n");
   27.35  
   27.36      /* The stash space for the initial kernel image can now be freed up. */