ia64/xen-unstable

changeset 232:f73ef0280d7e

bitkeeper revision 1.90 (3e5a2a0eLNdNDmT58mo-G1hnrOmMig)

Many files:
More updates to network tx path. New scheduler. More code ripped out. Nearly finished now.
author kaf24@labyrinth.cl.cam.ac.uk
date Mon Feb 24 14:19:58 2003 +0000 (2003-02-24)
parents 5262380c8422
children 4c0c962a443f
files xen-2.4.16/common/network.c xen-2.4.16/common/softirq.c xen-2.4.16/drivers/block/xen_block.c xen-2.4.16/include/hypervisor-ifs/network.h xen-2.4.16/include/xeno/interrupt.h xen-2.4.16/include/xeno/netdevice.h xen-2.4.16/include/xeno/skbuff.h xen-2.4.16/include/xeno/vif.h xen-2.4.16/net/dev.c xen-2.4.16/net/devinit.c xen-2.4.16/net/skbuff.c
line diff
     1.1 --- a/xen-2.4.16/common/network.c	Sun Feb 23 11:22:39 2003 +0000
     1.2 +++ b/xen-2.4.16/common/network.c	Mon Feb 24 14:19:58 2003 +0000
     1.3 @@ -81,9 +81,9 @@ net_vif_t *create_net_vif(int domain)
     1.4      new_vif->net_ring = new_ring;
     1.5      new_vif->shadow_ring = shadow_ring;
     1.6      
     1.7 -                    
     1.8 -    skb_queue_head_init(&new_vif->skb_list);
     1.9 -    new_vif->domain = domain;
    1.10 +    new_vif->domain = find_domain_by_id(domain);
    1.11 +
    1.12 +    new_vif->list.next = NULL;
    1.13      
    1.14      write_lock(&sys_vif_lock);
    1.15      new_vif->id = sys_vif_count;
    1.16 @@ -114,16 +114,11 @@ fail:
    1.17  
    1.18  void destroy_net_vif(struct task_struct *p)
    1.19  {
    1.20 -    struct sk_buff *skb;
    1.21      int i;
    1.22  
    1.23      if ( p->num_net_vifs <= 0 ) return; // nothing to do.
    1.24      
    1.25      i = --p->num_net_vifs;
    1.26 -    while ( (skb = skb_dequeue(&p->net_vif_list[i]->skb_list)) != NULL )
    1.27 -    {
    1.28 -        kfree_skb(skb);
    1.29 -    }
    1.30      
    1.31      write_lock(&sys_vif_lock);
    1.32      sys_vif_list[p->net_vif_list[i]->id] = NULL; // system vif list not gc'ed
    1.33 @@ -145,17 +140,12 @@ void vif_query(vif_query_t *vq)
    1.34      char buf[128];
    1.35      int i;
    1.36  
    1.37 -    if ( !(dom_task = find_domain_by_id(vq->domain)) )
    1.38 -    {
    1.39 -        return;
    1.40 -    }
    1.41 +    if ( !(dom_task = find_domain_by_id(vq->domain)) ) return;
    1.42  
    1.43      *buf = '\0';
    1.44  
    1.45 -    for (i=0; i < dom_task->num_net_vifs; i++)
    1.46 -    {
    1.47 +    for ( i = 0; i < dom_task->num_net_vifs; i++ )
    1.48          sprintf(buf + strlen(buf), "%d\n", dom_task->net_vif_list[i]->id);
    1.49 -    }
    1.50  
    1.51      copy_to_user(vq->buf, buf, strlen(buf) + 1);
    1.52      
    1.53 @@ -171,12 +161,12 @@ void print_vif_list()
    1.54      net_vif_t *v;
    1.55  
    1.56      printk("Currently, there are %d VIFs.\n", sys_vif_count);
    1.57 -    for (i=0; i<sys_vif_count; i++)
    1.58 +    for ( i = 0; i<sys_vif_count; i++ )
    1.59      {
    1.60          v = sys_vif_list[i];
    1.61          printk("] VIF Entry %d(%d):\n", i, v->id);
    1.62          printk("   > net_ring*:  %p\n", v->net_ring);
    1.63 -        printk("   > domain   :  %u\n", v->domain);
    1.64 +        printk("   > domain   :  %u\n", v->domain->domain);
    1.65      }
    1.66  }
    1.67  
     2.1 --- a/xen-2.4.16/common/softirq.c	Sun Feb 23 11:22:39 2003 +0000
     2.2 +++ b/xen-2.4.16/common/softirq.c	Mon Feb 24 14:19:58 2003 +0000
     2.3 @@ -166,10 +166,9 @@ static void tasklet_action(struct softir
     2.4  				if (!test_and_clear_bit(TASKLET_STATE_SCHED, &t->state))
     2.5  					BUG();
     2.6  				t->func(t->data);
     2.7 -				tasklet_unlock(t);
     2.8 -				continue;
     2.9  			}
    2.10  			tasklet_unlock(t);
    2.11 +			continue;
    2.12  		}
    2.13  
    2.14  		local_irq_disable();
    2.15 @@ -200,10 +199,9 @@ static void tasklet_hi_action(struct sof
    2.16  				if (!test_and_clear_bit(TASKLET_STATE_SCHED, &t->state))
    2.17  					BUG();
    2.18  				t->func(t->data);
    2.19 -				tasklet_unlock(t);
    2.20 -				continue;
    2.21  			}
    2.22  			tasklet_unlock(t);
    2.23 +			continue;
    2.24  		}
    2.25  
    2.26  		local_irq_disable();
     3.1 --- a/xen-2.4.16/drivers/block/xen_block.c	Sun Feb 23 11:22:39 2003 +0000
     3.2 +++ b/xen-2.4.16/drivers/block/xen_block.c	Mon Feb 24 14:19:58 2003 +0000
     3.3 @@ -70,18 +70,6 @@ static void remove_from_blkdev_list(stru
     3.4      spin_unlock_irqrestore(&io_schedule_list_lock, flags);
     3.5  }
     3.6  
     3.7 -static void add_to_blkdev_list(struct task_struct *p)
     3.8 -{
     3.9 -    unsigned long flags;
    3.10 -    if ( __on_blkdev_list(p) ) return;
    3.11 -    spin_lock_irqsave(&io_schedule_list_lock, flags);
    3.12 -    if ( !__on_blkdev_list(p) )
    3.13 -    {
    3.14 -        list_add(&p->blkdev_list, &io_schedule_list);
    3.15 -    }
    3.16 -    spin_unlock_irqrestore(&io_schedule_list_lock, flags);
    3.17 -}
    3.18 -
    3.19  static void add_to_blkdev_list_tail(struct task_struct *p)
    3.20  {
    3.21      unsigned long flags;
     4.1 --- a/xen-2.4.16/include/hypervisor-ifs/network.h	Sun Feb 23 11:22:39 2003 +0000
     4.2 +++ b/xen-2.4.16/include/hypervisor-ifs/network.h	Mon Feb 24 14:19:58 2003 +0000
     4.3 @@ -15,15 +15,15 @@
     4.4  #include <linux/types.h>
     4.5  
     4.6  typedef struct tx_entry_st {
     4.7 -	unsigned long addr;   /* virtual address */
     4.8 -	unsigned long size;   /* in bytes */
     4.9 -        int           status; /* per descriptor status. */
    4.10 +	unsigned long  addr;   /* machine address of packet */
    4.11 +	unsigned short size;   /* in bytes */
    4.12 +        unsigned short status; /* per descriptor status. */
    4.13  } tx_entry_t;
    4.14  
    4.15  typedef struct rx_entry_st {
    4.16 -	unsigned long addr;   /* virtual address */
    4.17 -	unsigned long size;   /* in bytes */
    4.18 -        int           status; /* per descriptor status. */
    4.19 +	unsigned long  addr;   /* machine address of PTE to swizzle */
    4.20 +	unsigned short size;   /* in bytes */
    4.21 +        unsigned short status; /* per descriptor status. */
    4.22  } rx_entry_t;
    4.23  
    4.24  #define TX_RING_SIZE 256
     5.1 --- a/xen-2.4.16/include/xeno/interrupt.h	Sun Feb 23 11:22:39 2003 +0000
     5.2 +++ b/xen-2.4.16/include/xeno/interrupt.h	Mon Feb 24 14:19:58 2003 +0000
     5.3 @@ -172,13 +172,17 @@ static inline void tasklet_disable(struc
     5.4  static inline void tasklet_enable(struct tasklet_struct *t)
     5.5  {
     5.6  	smp_mb__before_atomic_dec();
     5.7 -	atomic_dec(&t->count);
     5.8 +	if (atomic_dec_and_test(&t->count) &&
     5.9 +	    test_bit(TASKLET_STATE_SCHED, &t->state))
    5.10 +		__tasklet_schedule(t);
    5.11  }
    5.12  
    5.13  static inline void tasklet_hi_enable(struct tasklet_struct *t)
    5.14  {
    5.15  	smp_mb__before_atomic_dec();
    5.16 -	atomic_dec(&t->count);
    5.17 +	if (atomic_dec_and_test(&t->count) &&
    5.18 +	    test_bit(TASKLET_STATE_SCHED, &t->state))
    5.19 +		__tasklet_hi_schedule(t);
    5.20  }
    5.21  
    5.22  extern void tasklet_kill(struct tasklet_struct *t);
     6.1 --- a/xen-2.4.16/include/xeno/netdevice.h	Sun Feb 23 11:22:39 2003 +0000
     6.2 +++ b/xen-2.4.16/include/xeno/netdevice.h	Mon Feb 24 14:19:58 2003 +0000
     6.3 @@ -38,29 +38,11 @@
     6.4  #ifdef __KERNEL__
     6.5  #include <xeno/config.h>
     6.6  
     6.7 -struct divert_blk;
     6.8  struct vlan_group;
     6.9  
    6.10 -#define HAVE_ALLOC_NETDEV		/* feature macro: alloc_xxxdev
    6.11 -					   functions are available. */
    6.12 -
    6.13 -#define NET_XMIT_SUCCESS	0
    6.14 -#define NET_XMIT_DROP		1	/* skb dropped			*/
    6.15 -#define NET_XMIT_CN		2	/* congestion notification	*/
    6.16 -#define NET_XMIT_POLICED	3	/* skb is shot by police	*/
    6.17 -#define NET_XMIT_BYPASS		4	/* packet does not leave via dequeue;
    6.18 -					   (TC use only - dev_queue_xmit
    6.19 -					   returns this as NET_XMIT_SUCCESS) */
    6.20 -
    6.21  /* Backlog congestion levels */
    6.22  #define NET_RX_SUCCESS		0   /* keep 'em coming, baby */
    6.23  #define NET_RX_DROP		1  /* packet dropped */
    6.24 -#define NET_RX_CN_LOW		2   /* storm alert, just in case */
    6.25 -#define NET_RX_CN_MOD		3   /* Storm on its way! */
    6.26 -#define NET_RX_CN_HIGH		4   /* The storm is here */
    6.27 -#define NET_RX_BAD		5  /* packet dropped due to kernel error */
    6.28 -
    6.29 -#define net_xmit_errno(e)	((e) != NET_XMIT_CN ? -ENOBUFS : 0)
    6.30  
    6.31  #endif
    6.32  
    6.33 @@ -182,7 +164,6 @@ enum netdev_state_t
    6.34  	__LINK_STATE_XOFF=0,
    6.35  	__LINK_STATE_START,
    6.36  	__LINK_STATE_PRESENT,
    6.37 -	__LINK_STATE_SCHED,
    6.38  	__LINK_STATE_NOCARRIER
    6.39  };
    6.40  
    6.41 @@ -409,7 +390,6 @@ extern struct net_device	*dev_alloc(cons
    6.42  extern int		dev_alloc_name(struct net_device *dev, const char *name);
    6.43  extern int		dev_open(struct net_device *dev);
    6.44  extern int		dev_close(struct net_device *dev);
    6.45 -extern int		dev_queue_xmit(struct sk_buff *skb);
    6.46  extern int		register_netdevice(struct net_device *dev);
    6.47  extern int		unregister_netdevice(struct net_device *dev);
    6.48  extern void dev_shutdown(struct net_device *dev);
    6.49 @@ -430,30 +410,14 @@ static inline int unregister_gifconf(uns
    6.50  
    6.51  extern struct tasklet_struct net_tx_tasklet;
    6.52  
    6.53 -
    6.54 -struct softnet_data
    6.55 -{
    6.56 -	struct net_device	*output_queue;
    6.57 -	struct sk_buff		*completion_queue;
    6.58 -} __attribute__((__aligned__(SMP_CACHE_BYTES)));
    6.59 -
    6.60 -
    6.61 -extern struct softnet_data softnet_data[NR_CPUS];
    6.62 +extern struct list_head net_schedule_list;
    6.63 +extern spinlock_t net_schedule_list_lock;
    6.64  
    6.65  #define HAVE_NETIF_QUEUE
    6.66  
    6.67  static inline void __netif_schedule(struct net_device *dev)
    6.68  {
    6.69 -	if (!test_and_set_bit(__LINK_STATE_SCHED, &dev->state)) {
    6.70 -		unsigned long flags;
    6.71 -		int cpu = smp_processor_id();
    6.72 -
    6.73 -		local_irq_save(flags);
    6.74 -		dev->next_sched = softnet_data[cpu].output_queue;
    6.75 -		softnet_data[cpu].output_queue = dev;
    6.76 -                tasklet_schedule(&net_tx_tasklet);
    6.77 -		local_irq_restore(flags);
    6.78 -	}
    6.79 +	tasklet_schedule(&net_tx_tasklet);
    6.80  }
    6.81  
    6.82  static inline void netif_schedule(struct net_device *dev)
    6.83 @@ -488,41 +452,18 @@ static inline int netif_running(struct n
    6.84  	return test_bit(__LINK_STATE_START, &dev->state);
    6.85  }
    6.86  
    6.87 -/* Use this variant when it is known for sure that it
    6.88 - * is executing from interrupt context.
    6.89 +
    6.90 +/*
    6.91 + * Xen does not need deferred skb freeing, as all destructor hook functions 
    6.92 + * are IRQ safe. Linux needed more care for some destructors...
    6.93   */
    6.94 -static inline void dev_kfree_skb_irq(struct sk_buff *skb)
    6.95 -{
    6.96 -	if (atomic_dec_and_test(&skb->users)) {
    6.97 -		int cpu = smp_processor_id();
    6.98 -		unsigned long flags;
    6.99 -
   6.100 -		local_irq_save(flags);
   6.101 -		skb->next = softnet_data[cpu].completion_queue;
   6.102 -		softnet_data[cpu].completion_queue = skb;
   6.103 -                tasklet_schedule(&net_tx_tasklet);
   6.104 -		local_irq_restore(flags);
   6.105 -	}
   6.106 -}
   6.107 -
   6.108 -/* Use this variant in places where it could be invoked
   6.109 - * either from interrupt or non-interrupt context.
   6.110 - */
   6.111 -static inline void dev_kfree_skb_any(struct sk_buff *skb)
   6.112 -{
   6.113 -	if (in_irq())
   6.114 -		dev_kfree_skb_irq(skb);
   6.115 -	else
   6.116 -		dev_kfree_skb(skb);
   6.117 -}
   6.118 +#define dev_kfree_skb_irq(_skb) dev_kfree_skb(_skb)
   6.119 +#define dev_kfree_skb_any(_skb) dev_kfree_skb(_skb)
   6.120  
   6.121  extern void		net_call_rx_atomic(void (*fn)(void));
   6.122 -#define HAVE_NETIF_RX 1
   6.123  extern int		netif_rx(struct sk_buff *skb);
   6.124  extern int		dev_ioctl(unsigned int cmd, void *);
   6.125  extern int		dev_change_flags(struct net_device *, unsigned);
   6.126 -extern void		dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev);
   6.127 -
   6.128  extern void		dev_init(void);
   6.129  
   6.130  extern int		netdev_nit;
   6.131 @@ -654,9 +595,7 @@ extern void		dev_load(const char *name);
   6.132  extern void		dev_mcast_init(void);
   6.133  extern int		netdev_register_fc(struct net_device *dev, void (*stimul)(struct net_device *dev));
   6.134  extern void		netdev_unregister_fc(int bit);
   6.135 -extern int		netdev_max_backlog;
   6.136  extern unsigned long	netdev_fc_xoff;
   6.137 -extern atomic_t netdev_dropping;
   6.138  extern int		netdev_set_master(struct net_device *dev, struct net_device *master);
   6.139  extern struct sk_buff * skb_checksum_help(struct sk_buff *skb);
   6.140  
     7.1 --- a/xen-2.4.16/include/xeno/skbuff.h	Sun Feb 23 11:22:39 2003 +0000
     7.2 +++ b/xen-2.4.16/include/xeno/skbuff.h	Mon Feb 24 14:19:58 2003 +0000
     7.3 @@ -19,21 +19,23 @@
     7.4  #include <linux/time.h>
     7.5  #include <linux/timer.h>
     7.6  #include <linux/cache.h>
     7.7 -
     7.8 +#include <linux/slab.h>
     7.9 +#include <asm/system.h>
    7.10  #include <asm/atomic.h>
    7.11  #include <asm/types.h>
    7.12  #include <linux/spinlock.h>
    7.13  #include <linux/mm.h>
    7.14  
    7.15 -// vif special values.
    7.16 +/* vif special values */
    7.17  #define VIF_PHYSICAL_INTERFACE  -1
    7.18  #define VIF_UNKNOWN_INTERFACE   -2
    7.19  #define VIF_DROP                -3
    7.20  #define VIF_ANY_INTERFACE       -4
    7.21  
    7.22 -//skb_type values:
    7.23 -#define SKB_NORMAL               0
    7.24 -#define SKB_ZERO_COPY            1
    7.25 +/* skb_type values */
    7.26 +#define SKB_NORMAL               0 /* A Linux-style skbuff: no strangeness */
    7.27 +#define SKB_ZERO_COPY            1 /* Zero copy skbs are used for receive  */
    7.28 +#define SKB_NODATA               2 /* Data allocation not handled by us    */
    7.29  
    7.30  #define HAVE_ALLOC_SKB		/* For the drivers to know */
    7.31  #define HAVE_ALIGNABLE_SKB	/* Ditto 8)		   */
    7.32 @@ -44,9 +46,6 @@
    7.33  #define CHECKSUM_UNNECESSARY 2
    7.34  
    7.35  #define SKB_DATA_ALIGN(X)	(((X) + (SMP_CACHE_BYTES-1)) & ~(SMP_CACHE_BYTES-1))
    7.36 -#define SKB_MAX_ORDER(X,ORDER)	(((PAGE_SIZE<<(ORDER)) - (X) - sizeof(struct skb_shared_info))&~(SMP_CACHE_BYTES-1))
    7.37 -#define SKB_MAX_HEAD(X)		(SKB_MAX_ORDER((X),0))
    7.38 -#define SKB_MAX_ALLOC		(SKB_MAX_ORDER(0,2))
    7.39  
    7.40  /* A. Checksumming of received packets by device.
    7.41   *
    7.42 @@ -82,8 +81,6 @@
    7.43   *			  TCP/UDP over IPv4. Sigh. Vendors like this
    7.44   *			  way by an unknown reason. Though, see comment above
    7.45   *			  about CHECKSUM_UNNECESSARY. 8)
    7.46 - *
    7.47 - *	Any questions? No questions, good. 		--ANK
    7.48   */
    7.49  
    7.50  #ifdef __i386__
    7.51 @@ -92,171 +89,98 @@
    7.52  #define NET_CALLER(arg) __builtin_return_address(0)
    7.53  #endif
    7.54  
    7.55 -#ifdef CONFIG_NETFILTER
    7.56 -struct nf_conntrack {
    7.57 -	atomic_t use;
    7.58 -	void (*destroy)(struct nf_conntrack *);
    7.59 -};
    7.60 +struct sk_buff_head {
    7.61 +    /* These two members must be first. */
    7.62 +    struct sk_buff	* next;
    7.63 +    struct sk_buff	* prev;
    7.64  
    7.65 -struct nf_ct_info {
    7.66 -	struct nf_conntrack *master;
    7.67 -};
    7.68 -#endif
    7.69 -
    7.70 -struct sk_buff_head {
    7.71 -	/* These two members must be first. */
    7.72 -	struct sk_buff	* next;
    7.73 -	struct sk_buff	* prev;
    7.74 -
    7.75 -	__u32		qlen;
    7.76 -	spinlock_t	lock;
    7.77 +    __u32		qlen;
    7.78 +    spinlock_t	lock;
    7.79  };
    7.80  
    7.81 -struct sk_buff;
    7.82 -
    7.83 -#define MAX_SKB_FRAGS 6
    7.84 -
    7.85 -typedef struct skb_frag_struct skb_frag_t;
    7.86 +#define MAX_SKB_FRAGS 1 /* KAF: was 6 */
    7.87  
    7.88 -struct skb_frag_struct
    7.89 -{
    7.90 -	struct pfn_info *page;
    7.91 -	__u16 page_offset;
    7.92 -	__u16 size;
    7.93 -};
    7.94 +typedef struct skb_frag_struct {
    7.95 +    struct pfn_info *page;
    7.96 +    __u16 page_offset;
    7.97 +    __u16 size;
    7.98 +} skb_frag_t;
    7.99  
   7.100 -/* This data is invariant across clones and lives at
   7.101 - * the end of the header data, ie. at skb->end.
   7.102 - */
   7.103  struct skb_shared_info {
   7.104 -	atomic_t	dataref;
   7.105 -	unsigned int	nr_frags;
   7.106 -	struct sk_buff	*frag_list;
   7.107 -	skb_frag_t	frags[MAX_SKB_FRAGS];
   7.108 +    unsigned int nr_frags;
   7.109 +    skb_frag_t	frags[MAX_SKB_FRAGS];
   7.110  };
   7.111  
   7.112  struct sk_buff {
   7.113 -	/* These two members must be first. */
   7.114 -	struct sk_buff	* next;			/* Next buffer in list 				*/
   7.115 -	struct sk_buff	* prev;			/* Previous buffer in list 			*/
   7.116 +    /* These two members must be first. */
   7.117 +    struct sk_buff	* next;			/* Next buffer in list 				*/
   7.118 +    struct sk_buff	* prev;			/* Previous buffer in list 			*/
   7.119  
   7.120 -	struct sk_buff_head * list;		/* List we are on				*/
   7.121 -	struct net_device	*dev;		/* Device we arrived on/are leaving by		*/
   7.122 +    struct sk_buff_head * list;		/* List we are on				*/
   7.123 +    struct net_device	*dev;		/* Device we arrived on/are leaving by		*/
   7.124  
   7.125 -	/* Transport layer header */
   7.126 -	union
   7.127 -	{
   7.128 -		struct tcphdr	*th;
   7.129 -		struct udphdr	*uh;
   7.130 -		struct icmphdr	*icmph;
   7.131 -		struct igmphdr	*igmph;
   7.132 -		struct iphdr	*ipiph;
   7.133 -		struct spxhdr	*spxh;
   7.134 -		unsigned char	*raw;
   7.135 -	} h;
   7.136 +    /* Transport layer header */
   7.137 +    union
   7.138 +    {
   7.139 +        struct tcphdr	*th;
   7.140 +        struct udphdr	*uh;
   7.141 +        struct icmphdr	*icmph;
   7.142 +        struct igmphdr	*igmph;
   7.143 +        struct iphdr	*ipiph;
   7.144 +        struct spxhdr	*spxh;
   7.145 +        unsigned char	*raw;
   7.146 +    } h;
   7.147  
   7.148 -	/* Network layer header */
   7.149 -	union
   7.150 -	{
   7.151 -		struct iphdr	*iph;
   7.152 -		struct ipv6hdr	*ipv6h;
   7.153 -		struct arphdr	*arph;
   7.154 -		struct ipxhdr	*ipxh;
   7.155 -		unsigned char	*raw;
   7.156 -	} nh;
   7.157 +    /* Network layer header */
   7.158 +    union
   7.159 +    {
   7.160 +        struct iphdr	*iph;
   7.161 +        struct ipv6hdr	*ipv6h;
   7.162 +        struct arphdr	*arph;
   7.163 +        struct ipxhdr	*ipxh;
   7.164 +        unsigned char	*raw;
   7.165 +    } nh;
   7.166    
   7.167 -	/* Link layer header */
   7.168 -	union 
   7.169 -	{	
   7.170 -	  	struct ethhdr	*ethernet;
   7.171 -	  	unsigned char 	*raw;
   7.172 -	} mac;
   7.173 +    /* Link layer header */
   7.174 +    union 
   7.175 +    {	
   7.176 +        struct ethhdr	*ethernet;
   7.177 +        unsigned char 	*raw;
   7.178 +    } mac;
   7.179  
   7.180 -	unsigned int 	len;			/* Length of actual data			*/
   7.181 - 	unsigned int 	data_len;
   7.182 -	unsigned int	csum;			/* Checksum 					*/
   7.183 -	unsigned char 	skb_type,
   7.184 -			cloned, 		/* head may be cloned (check refcnt to be sure) */
   7.185 -  			pkt_type,		/* Packet class					*/
   7.186 -  			ip_summed;		/* Driver fed us an IP checksum			*/
   7.187 -	atomic_t	users;			/* User count - see datagram.c,tcp.c 		*/
   7.188 -	unsigned short	protocol;		/* Packet protocol from driver. 		*/
   7.189 -	unsigned char	*head;			/* Head of buffer 				*/
   7.190 -	unsigned char	*data;			/* Data head pointer				*/
   7.191 -	unsigned char	*tail;			/* Tail pointer					*/
   7.192 -	unsigned char 	*end;			/* End pointer					*/
   7.193 +    unsigned int 	len;			/* Length of actual data			*/
   7.194 +    unsigned int 	data_len;
   7.195 +    unsigned int	csum;			/* Checksum 					*/
   7.196 +    unsigned char 	skb_type,
   7.197 +        pkt_type,		/* Packet class					*/
   7.198 +        ip_summed;		/* Driver fed us an IP checksum			*/
   7.199 +    unsigned short	protocol;		/* Packet protocol from driver. 		*/
   7.200 +    unsigned char	*head;			/* Head of buffer 				*/
   7.201 +    unsigned char	*data;			/* Data head pointer				*/
   7.202 +    unsigned char	*tail;			/* Tail pointer					*/
   7.203 +    unsigned char 	*end;			/* End pointer					*/
   7.204  
   7.205 -	void 		(*destructor)(struct sk_buff *);	/* Destruct function		*/
   7.206 -        struct pfn_info *pf;                    /* record of physical pf address for freeing    */
   7.207 -        int src_vif;                            /* vif we came from                             */
   7.208 -        int dst_vif;                            /* vif we are bound for                         */
   7.209 -        struct skb_shared_info shinfo;          /* shared info is no longer shared in Xen.      */
   7.210 +    void 		(*destructor)(struct sk_buff *);	/* Destruct function		*/
   7.211 +    struct pfn_info *pf;                    /* record of physical pf address for freeing    */
   7.212 +    int src_vif;                            /* vif we came from                             */
   7.213 +    int dst_vif;                            /* vif we are bound for                         */
   7.214 +    struct skb_shared_info shinfo;          /* shared info is no longer shared in Xen.      */
   7.215  };
   7.216  
   7.217 -#define SK_WMEM_MAX	65535
   7.218 -#define SK_RMEM_MAX	65535
   7.219 -
   7.220 -#ifdef __KERNEL__
   7.221 -/*
   7.222 - *	Handling routines are only of interest to the kernel
   7.223 - */
   7.224 -#include <linux/slab.h>
   7.225 -
   7.226 -#include <asm/system.h>
   7.227 -
   7.228 -extern void			__kfree_skb(struct sk_buff *skb);
   7.229 -extern struct sk_buff *		alloc_skb(unsigned int size, int priority);
   7.230 -extern struct sk_buff *         alloc_zc_skb(unsigned int size, int priority);
   7.231 -extern void			kfree_skbmem(struct sk_buff *skb);
   7.232 -extern struct sk_buff *		skb_clone(struct sk_buff *skb, int priority);
   7.233 -extern struct sk_buff *		skb_copy(const struct sk_buff *skb, int priority);
   7.234 -extern struct sk_buff *		pskb_copy(struct sk_buff *skb, int gfp_mask);
   7.235 -extern int			pskb_expand_head(struct sk_buff *skb, int nhead, int ntail, int gfp_mask);
   7.236 -extern struct sk_buff *		skb_realloc_headroom(struct sk_buff *skb, unsigned int headroom);
   7.237 -extern struct sk_buff *		skb_copy_expand(const struct sk_buff *skb, 
   7.238 -						int newheadroom,
   7.239 -						int newtailroom,
   7.240 -						int priority);
   7.241 +extern void	       __kfree_skb(struct sk_buff *skb);
   7.242 +extern struct sk_buff *alloc_skb(unsigned int size, int priority);
   7.243 +extern struct sk_buff *alloc_skb_nodata(int priority);
   7.244 +extern struct sk_buff *alloc_zc_skb(unsigned int size, int priority);
   7.245 +extern void	       kfree_skbmem(struct sk_buff *skb);
   7.246 +extern struct sk_buff *skb_copy(const struct sk_buff *skb, int priority);
   7.247  #define dev_kfree_skb(a)	kfree_skb(a)
   7.248  extern void	skb_over_panic(struct sk_buff *skb, int len, void *here);
   7.249  extern void	skb_under_panic(struct sk_buff *skb, int len, void *here);
   7.250  
   7.251 -/* Internal */
   7.252 -//#define skb_shinfo(SKB)		((struct skb_shared_info *)((SKB)->end))
   7.253 +/* In Xen, we don't clone skbs, so shared data can go in the sk_buff struct. */
   7.254  #define skb_shinfo(SKB)     ((struct skb_shared_info *)(&(SKB)->shinfo))
   7.255  
   7.256  /**
   7.257 - *	skb_queue_empty - check if a queue is empty
   7.258 - *	@list: queue head
   7.259 - *
   7.260 - *	Returns true if the queue is empty, false otherwise.
   7.261 - */
   7.262 - 
   7.263 -static inline int skb_queue_empty(struct sk_buff_head *list)
   7.264 -{
   7.265 -	return (list->next == (struct sk_buff *) list);
   7.266 -}
   7.267 -
   7.268 -/**
   7.269 - *	skb_get - reference buffer
   7.270 - *	@skb: buffer to reference
   7.271 - *
   7.272 - *	Makes another reference to a socket buffer and returns a pointer
   7.273 - *	to the buffer.
   7.274 - */
   7.275 - 
   7.276 -static inline struct sk_buff *skb_get(struct sk_buff *skb)
   7.277 -{
   7.278 -	atomic_inc(&skb->users);
   7.279 -	return skb;
   7.280 -}
   7.281 -
   7.282 -/*
   7.283 - * If users==1, we are the only owner and are can avoid redundant
   7.284 - * atomic change.
   7.285 - */
   7.286 - 
   7.287 -/**
   7.288   *	kfree_skb - free an sk_buff
   7.289   *	@skb: buffer to free
   7.290   *
   7.291 @@ -266,143 +190,7 @@ static inline struct sk_buff *skb_get(st
   7.292   
   7.293  static inline void kfree_skb(struct sk_buff *skb)
   7.294  {
   7.295 -	if (atomic_read(&skb->users) == 1 || atomic_dec_and_test(&skb->users))
   7.296 -		__kfree_skb(skb);
   7.297 -}
   7.298 -
   7.299 -/* Use this if you didn't touch the skb state [for fast switching] */
   7.300 -static inline void kfree_skb_fast(struct sk_buff *skb)
   7.301 -{
   7.302 -	if (atomic_read(&skb->users) == 1 || atomic_dec_and_test(&skb->users))
   7.303 -		kfree_skbmem(skb);	
   7.304 -}
   7.305 -
   7.306 -/**
   7.307 - *	skb_cloned - is the buffer a clone
   7.308 - *	@skb: buffer to check
   7.309 - *
   7.310 - *	Returns true if the buffer was generated with skb_clone() and is
   7.311 - *	one of multiple shared copies of the buffer. Cloned buffers are
   7.312 - *	shared data so must not be written to under normal circumstances.
   7.313 - */
   7.314 -
   7.315 -static inline int skb_cloned(struct sk_buff *skb)
   7.316 -{
   7.317 -	return skb->cloned && atomic_read(&skb_shinfo(skb)->dataref) != 1;
   7.318 -}
   7.319 -
   7.320 -/**
   7.321 - *	skb_shared - is the buffer shared
   7.322 - *	@skb: buffer to check
   7.323 - *
   7.324 - *	Returns true if more than one person has a reference to this
   7.325 - *	buffer.
   7.326 - */
   7.327 - 
   7.328 -static inline int skb_shared(struct sk_buff *skb)
   7.329 -{
   7.330 -	return (atomic_read(&skb->users) != 1);
   7.331 -}
   7.332 -
   7.333 -/** 
   7.334 - *	skb_share_check - check if buffer is shared and if so clone it
   7.335 - *	@skb: buffer to check
   7.336 - *	@pri: priority for memory allocation
   7.337 - *	
   7.338 - *	If the buffer is shared the buffer is cloned and the old copy
   7.339 - *	drops a reference. A new clone with a single reference is returned.
   7.340 - *	If the buffer is not shared the original buffer is returned. When
   7.341 - *	being called from interrupt status or with spinlocks held pri must
   7.342 - *	be GFP_ATOMIC.
   7.343 - *
   7.344 - *	NULL is returned on a memory allocation failure.
   7.345 - */
   7.346 - 
   7.347 -static inline struct sk_buff *skb_share_check(struct sk_buff *skb, int pri)
   7.348 -{
   7.349 -	if (skb_shared(skb)) {
   7.350 -		struct sk_buff *nskb;
   7.351 -		nskb = skb_clone(skb, pri);
   7.352 -		kfree_skb(skb);
   7.353 -		return nskb;
   7.354 -	}
   7.355 -	return skb;
   7.356 -}
   7.357 -
   7.358 -
   7.359 -/*
   7.360 - *	Copy shared buffers into a new sk_buff. We effectively do COW on
   7.361 - *	packets to handle cases where we have a local reader and forward
   7.362 - *	and a couple of other messy ones. The normal one is tcpdumping
   7.363 - *	a packet thats being forwarded.
   7.364 - */
   7.365 - 
   7.366 -/**
   7.367 - *	skb_unshare - make a copy of a shared buffer
   7.368 - *	@skb: buffer to check
   7.369 - *	@pri: priority for memory allocation
   7.370 - *
   7.371 - *	If the socket buffer is a clone then this function creates a new
   7.372 - *	copy of the data, drops a reference count on the old copy and returns
   7.373 - *	the new copy with the reference count at 1. If the buffer is not a clone
   7.374 - *	the original buffer is returned. When called with a spinlock held or
   7.375 - *	from interrupt state @pri must be %GFP_ATOMIC
   7.376 - *
   7.377 - *	%NULL is returned on a memory allocation failure.
   7.378 - */
   7.379 - 
   7.380 -static inline struct sk_buff *skb_unshare(struct sk_buff *skb, int pri)
   7.381 -{
   7.382 -	struct sk_buff *nskb;
   7.383 -	if(!skb_cloned(skb))
   7.384 -		return skb;
   7.385 -	nskb=skb_copy(skb, pri);
   7.386 -	kfree_skb(skb);		/* Free our shared copy */
   7.387 -	return nskb;
   7.388 -}
   7.389 -
   7.390 -/**
   7.391 - *	skb_peek
   7.392 - *	@list_: list to peek at
   7.393 - *
   7.394 - *	Peek an &sk_buff. Unlike most other operations you _MUST_
   7.395 - *	be careful with this one. A peek leaves the buffer on the
   7.396 - *	list and someone else may run off with it. You must hold
   7.397 - *	the appropriate locks or have a private queue to do this.
   7.398 - *
   7.399 - *	Returns %NULL for an empty list or a pointer to the head element.
   7.400 - *	The reference count is not incremented and the reference is therefore
   7.401 - *	volatile. Use with caution.
   7.402 - */
   7.403 - 
   7.404 -static inline struct sk_buff *skb_peek(struct sk_buff_head *list_)
   7.405 -{
   7.406 -	struct sk_buff *list = ((struct sk_buff *)list_)->next;
   7.407 -	if (list == (struct sk_buff *)list_)
   7.408 -		list = NULL;
   7.409 -	return list;
   7.410 -}
   7.411 -
   7.412 -/**
   7.413 - *	skb_peek_tail
   7.414 - *	@list_: list to peek at
   7.415 - *
   7.416 - *	Peek an &sk_buff. Unlike most other operations you _MUST_
   7.417 - *	be careful with this one. A peek leaves the buffer on the
   7.418 - *	list and someone else may run off with it. You must hold
   7.419 - *	the appropriate locks or have a private queue to do this.
   7.420 - *
   7.421 - *	Returns %NULL for an empty list or a pointer to the tail element.
   7.422 - *	The reference count is not incremented and the reference is therefore
   7.423 - *	volatile. Use with caution.
   7.424 - */
   7.425 -
   7.426 -static inline struct sk_buff *skb_peek_tail(struct sk_buff_head *list_)
   7.427 -{
   7.428 -	struct sk_buff *list = ((struct sk_buff *)list_)->prev;
   7.429 -	if (list == (struct sk_buff *)list_)
   7.430 -		list = NULL;
   7.431 -	return list;
   7.432 +    __kfree_skb(skb);
   7.433  }
   7.434  
   7.435  /**
   7.436 @@ -414,24 +202,17 @@ static inline struct sk_buff *skb_peek_t
   7.437   
   7.438  static inline __u32 skb_queue_len(struct sk_buff_head *list_)
   7.439  {
   7.440 -	return(list_->qlen);
   7.441 +    return(list_->qlen);
   7.442  }
   7.443  
   7.444  static inline void skb_queue_head_init(struct sk_buff_head *list)
   7.445  {
   7.446 -	spin_lock_init(&list->lock);
   7.447 -	list->prev = (struct sk_buff *)list;
   7.448 -	list->next = (struct sk_buff *)list;
   7.449 -	list->qlen = 0;
   7.450 +    spin_lock_init(&list->lock);
   7.451 +    list->prev = (struct sk_buff *)list;
   7.452 +    list->next = (struct sk_buff *)list;
   7.453 +    list->qlen = 0;
   7.454  }
   7.455  
   7.456 -/*
   7.457 - *	Insert an sk_buff at the start of a list.
   7.458 - *
   7.459 - *	The "__skb_xxxx()" functions are the non-atomic ones that
   7.460 - *	can only be called with interrupts disabled.
   7.461 - */
   7.462 -
   7.463  /**
   7.464   *	__skb_queue_head - queue a buffer at the list head
   7.465   *	@list: list to use
   7.466 @@ -445,85 +226,16 @@ static inline void skb_queue_head_init(s
   7.467   
   7.468  static inline void __skb_queue_head(struct sk_buff_head *list, struct sk_buff *newsk)
   7.469  {
   7.470 -	struct sk_buff *prev, *next;
   7.471 -
   7.472 -	newsk->list = list;
   7.473 -	list->qlen++;
   7.474 -	prev = (struct sk_buff *)list;
   7.475 -	next = prev->next;
   7.476 -	newsk->next = next;
   7.477 -	newsk->prev = prev;
   7.478 -	next->prev = newsk;
   7.479 -	prev->next = newsk;
   7.480 -}
   7.481 -
   7.482 -
   7.483 -/**
   7.484 - *	skb_queue_head - queue a buffer at the list head
   7.485 - *	@list: list to use
   7.486 - *	@newsk: buffer to queue
   7.487 - *
   7.488 - *	Queue a buffer at the start of the list. This function takes the
   7.489 - *	list lock and can be used safely with other locking &sk_buff functions
   7.490 - *	safely.
   7.491 - *
   7.492 - *	A buffer cannot be placed on two lists at the same time.
   7.493 - */	
   7.494 -
   7.495 -static inline void skb_queue_head(struct sk_buff_head *list, struct sk_buff *newsk)
   7.496 -{
   7.497 -	unsigned long flags;
   7.498 -
   7.499 -	spin_lock_irqsave(&list->lock, flags);
   7.500 -	__skb_queue_head(list, newsk);
   7.501 -	spin_unlock_irqrestore(&list->lock, flags);
   7.502 -}
   7.503 +    struct sk_buff *prev, *next;
   7.504  
   7.505 -/**
   7.506 - *	__skb_queue_tail - queue a buffer at the list tail
   7.507 - *	@list: list to use
   7.508 - *	@newsk: buffer to queue
   7.509 - *
   7.510 - *	Queue a buffer at the end of a list. This function takes no locks
   7.511 - *	and you must therefore hold required locks before calling it.
   7.512 - *
   7.513 - *	A buffer cannot be placed on two lists at the same time.
   7.514 - */	
   7.515 - 
   7.516 -
   7.517 -static inline void __skb_queue_tail(struct sk_buff_head *list, struct sk_buff *newsk)
   7.518 -{
   7.519 -	struct sk_buff *prev, *next;
   7.520 -
   7.521 -	newsk->list = list;
   7.522 -	list->qlen++;
   7.523 -	next = (struct sk_buff *)list;
   7.524 -	prev = next->prev;
   7.525 -	newsk->next = next;
   7.526 -	newsk->prev = prev;
   7.527 -	next->prev = newsk;
   7.528 -	prev->next = newsk;
   7.529 -}
   7.530 -
   7.531 -/**
   7.532 - *	skb_queue_tail - queue a buffer at the list tail
   7.533 - *	@list: list to use
   7.534 - *	@newsk: buffer to queue
   7.535 - *
   7.536 - *	Queue a buffer at the tail of the list. This function takes the
   7.537 - *	list lock and can be used safely with other locking &sk_buff functions
   7.538 - *	safely.
   7.539 - *
   7.540 - *	A buffer cannot be placed on two lists at the same time.
   7.541 - */	
   7.542 -
   7.543 -static inline void skb_queue_tail(struct sk_buff_head *list, struct sk_buff *newsk)
   7.544 -{
   7.545 -	unsigned long flags;
   7.546 -
   7.547 -	spin_lock_irqsave(&list->lock, flags);
   7.548 -	__skb_queue_tail(list, newsk);
   7.549 -	spin_unlock_irqrestore(&list->lock, flags);
   7.550 +    newsk->list = list;
   7.551 +    list->qlen++;
   7.552 +    prev = (struct sk_buff *)list;
   7.553 +    next = prev->next;
   7.554 +    newsk->next = next;
   7.555 +    newsk->prev = prev;
   7.556 +    next->prev = newsk;
   7.557 +    prev->next = newsk;
   7.558  }
   7.559  
   7.560  /**
   7.561 @@ -537,205 +249,29 @@ static inline void skb_queue_tail(struct
   7.562  
   7.563  static inline struct sk_buff *__skb_dequeue(struct sk_buff_head *list)
   7.564  {
   7.565 -	struct sk_buff *next, *prev, *result;
   7.566 -
   7.567 -	prev = (struct sk_buff *) list;
   7.568 -	next = prev->next;
   7.569 -	result = NULL;
   7.570 -	if (next != prev) {
   7.571 -		result = next;
   7.572 -		next = next->next;
   7.573 -		list->qlen--;
   7.574 -		next->prev = prev;
   7.575 -		prev->next = next;
   7.576 -		result->next = NULL;
   7.577 -		result->prev = NULL;
   7.578 -		result->list = NULL;
   7.579 -	}
   7.580 -	return result;
   7.581 -}
   7.582 -
   7.583 -/**
   7.584 - *	skb_dequeue - remove from the head of the queue
   7.585 - *	@list: list to dequeue from
   7.586 - *
   7.587 - *	Remove the head of the list. The list lock is taken so the function
   7.588 - *	may be used safely with other locking list functions. The head item is
   7.589 - *	returned or %NULL if the list is empty.
   7.590 - */
   7.591 -
   7.592 -static inline struct sk_buff *skb_dequeue(struct sk_buff_head *list)
   7.593 -{
   7.594 -	long flags;
   7.595 -	struct sk_buff *result;
   7.596 -
   7.597 -	spin_lock_irqsave(&list->lock, flags);
   7.598 -	result = __skb_dequeue(list);
   7.599 -	spin_unlock_irqrestore(&list->lock, flags);
   7.600 -	return result;
   7.601 -}
   7.602 -
   7.603 -/*
   7.604 - *	Insert a packet on a list.
   7.605 - */
   7.606 -
   7.607 -static inline void __skb_insert(struct sk_buff *newsk,
   7.608 -	struct sk_buff * prev, struct sk_buff *next,
   7.609 -	struct sk_buff_head * list)
   7.610 -{
   7.611 -	newsk->next = next;
   7.612 -	newsk->prev = prev;
   7.613 -	next->prev = newsk;
   7.614 -	prev->next = newsk;
   7.615 -	newsk->list = list;
   7.616 -	list->qlen++;
   7.617 -}
   7.618 -
   7.619 -/**
   7.620 - *	skb_insert	-	insert a buffer
   7.621 - *	@old: buffer to insert before
   7.622 - *	@newsk: buffer to insert
   7.623 - *
   7.624 - *	Place a packet before a given packet in a list. The list locks are taken
   7.625 - *	and this function is atomic with respect to other list locked calls
   7.626 - *	A buffer cannot be placed on two lists at the same time.
   7.627 - */
   7.628 -
   7.629 -static inline void skb_insert(struct sk_buff *old, struct sk_buff *newsk)
   7.630 -{
   7.631 -	unsigned long flags;
   7.632 -
   7.633 -	spin_lock_irqsave(&old->list->lock, flags);
   7.634 -	__skb_insert(newsk, old->prev, old, old->list);
   7.635 -	spin_unlock_irqrestore(&old->list->lock, flags);
   7.636 -}
   7.637 -
   7.638 -/*
   7.639 - *	Place a packet after a given packet in a list.
   7.640 - */
   7.641 -
   7.642 -static inline void __skb_append(struct sk_buff *old, struct sk_buff *newsk)
   7.643 -{
   7.644 -	__skb_insert(newsk, old, old->next, old->list);
   7.645 -}
   7.646 -
   7.647 -/**
   7.648 - *	skb_append	-	append a buffer
   7.649 - *	@old: buffer to insert after
   7.650 - *	@newsk: buffer to insert
   7.651 - *
   7.652 - *	Place a packet after a given packet in a list. The list locks are taken
   7.653 - *	and this function is atomic with respect to other list locked calls.
   7.654 - *	A buffer cannot be placed on two lists at the same time.
   7.655 - */
   7.656 -
   7.657 +    struct sk_buff *next, *prev, *result;
   7.658  
   7.659 -static inline void skb_append(struct sk_buff *old, struct sk_buff *newsk)
   7.660 -{
   7.661 -	unsigned long flags;
   7.662 -
   7.663 -	spin_lock_irqsave(&old->list->lock, flags);
   7.664 -	__skb_append(old, newsk);
   7.665 -	spin_unlock_irqrestore(&old->list->lock, flags);
   7.666 -}
   7.667 -
   7.668 -/*
   7.669 - * remove sk_buff from list. _Must_ be called atomically, and with
   7.670 - * the list known..
   7.671 - */
   7.672 - 
   7.673 -static inline void __skb_unlink(struct sk_buff *skb, struct sk_buff_head *list)
   7.674 -{
   7.675 -	struct sk_buff * next, * prev;
   7.676 -
   7.677 -	list->qlen--;
   7.678 -	next = skb->next;
   7.679 -	prev = skb->prev;
   7.680 -	skb->next = NULL;
   7.681 -	skb->prev = NULL;
   7.682 -	skb->list = NULL;
   7.683 -	next->prev = prev;
   7.684 -	prev->next = next;
   7.685 -}
   7.686 -
   7.687 -/**
   7.688 - *	skb_unlink	-	remove a buffer from a list
   7.689 - *	@skb: buffer to remove
   7.690 - *
   7.691 - *	Place a packet after a given packet in a list. The list locks are taken
   7.692 - *	and this function is atomic with respect to other list locked calls
   7.693 - *	
   7.694 - *	Works even without knowing the list it is sitting on, which can be 
   7.695 - *	handy at times. It also means that THE LIST MUST EXIST when you 
   7.696 - *	unlink. Thus a list must have its contents unlinked before it is
   7.697 - *	destroyed.
   7.698 - */
   7.699 -
   7.700 -static inline void skb_unlink(struct sk_buff *skb)
   7.701 -{
   7.702 -	struct sk_buff_head *list = skb->list;
   7.703 -
   7.704 -	if(list) {
   7.705 -		unsigned long flags;
   7.706 -
   7.707 -		spin_lock_irqsave(&list->lock, flags);
   7.708 -		if(skb->list == list)
   7.709 -			__skb_unlink(skb, skb->list);
   7.710 -		spin_unlock_irqrestore(&list->lock, flags);
   7.711 -	}
   7.712 -}
   7.713 -
   7.714 -/* XXX: more streamlined implementation */
   7.715 -
   7.716 -/**
   7.717 - *	__skb_dequeue_tail - remove from the tail of the queue
   7.718 - *	@list: list to dequeue from
   7.719 - *
   7.720 - *	Remove the tail of the list. This function does not take any locks
   7.721 - *	so must be used with appropriate locks held only. The tail item is
   7.722 - *	returned or %NULL if the list is empty.
   7.723 - */
   7.724 -
   7.725 -static inline struct sk_buff *__skb_dequeue_tail(struct sk_buff_head *list)
   7.726 -{
   7.727 -	struct sk_buff *skb = skb_peek_tail(list); 
   7.728 -	if (skb)
   7.729 -		__skb_unlink(skb, list);
   7.730 -	return skb;
   7.731 -}
   7.732 -
   7.733 -/**
   7.734 - *	skb_dequeue - remove from the head of the queue
   7.735 - *	@list: list to dequeue from
   7.736 - *
   7.737 - *	Remove the head of the list. The list lock is taken so the function
   7.738 - *	may be used safely with other locking list functions. The tail item is
   7.739 - *	returned or %NULL if the list is empty.
   7.740 - */
   7.741 -
   7.742 -static inline struct sk_buff *skb_dequeue_tail(struct sk_buff_head *list)
   7.743 -{
   7.744 -	long flags;
   7.745 -	struct sk_buff *result;
   7.746 -
   7.747 -	spin_lock_irqsave(&list->lock, flags);
   7.748 -	result = __skb_dequeue_tail(list);
   7.749 -	spin_unlock_irqrestore(&list->lock, flags);
   7.750 -	return result;
   7.751 +    prev = (struct sk_buff *) list;
   7.752 +    next = prev->next;
   7.753 +    result = NULL;
   7.754 +    if (next != prev) {
   7.755 +        result = next;
   7.756 +        next = next->next;
   7.757 +        list->qlen--;
   7.758 +        next->prev = prev;
   7.759 +        prev->next = next;
   7.760 +        result->next = NULL;
   7.761 +        result->prev = NULL;
   7.762 +        result->list = NULL;
   7.763 +    }
   7.764 +    return result;
   7.765  }
   7.766  
   7.767  static inline int skb_is_nonlinear(const struct sk_buff *skb)
   7.768  {
   7.769 -	return skb->data_len;
   7.770 +    return skb->data_len;
   7.771  }
   7.772  
   7.773 -static inline int skb_headlen(const struct sk_buff *skb)
   7.774 -{
   7.775 -	return skb->len - skb->data_len;
   7.776 -}
   7.777 -
   7.778 -#define SKB_PAGE_ASSERT(skb) do { if (skb_shinfo(skb)->nr_frags) BUG(); } while (0)
   7.779 -#define SKB_FRAG_ASSERT(skb) do { if (skb_shinfo(skb)->frag_list) BUG(); } while (0)
   7.780  #define SKB_LINEAR_ASSERT(skb) do { if (skb_is_nonlinear(skb)) BUG(); } while (0)
   7.781  
   7.782  /*
   7.783 @@ -744,11 +280,11 @@ static inline int skb_headlen(const stru
   7.784   
   7.785  static inline unsigned char *__skb_put(struct sk_buff *skb, unsigned int len)
   7.786  {
   7.787 -	unsigned char *tmp=skb->tail;
   7.788 -	SKB_LINEAR_ASSERT(skb);
   7.789 -	skb->tail+=len;
   7.790 -	skb->len+=len;
   7.791 -	return tmp;
   7.792 +    unsigned char *tmp=skb->tail;
   7.793 +    SKB_LINEAR_ASSERT(skb);
   7.794 +    skb->tail+=len;
   7.795 +    skb->len+=len;
   7.796 +    return tmp;
   7.797  }
   7.798  
   7.799  /**
   7.800 @@ -763,21 +299,21 @@ static inline unsigned char *__skb_put(s
   7.801   
   7.802  static inline unsigned char *skb_put(struct sk_buff *skb, unsigned int len)
   7.803  {
   7.804 -	unsigned char *tmp=skb->tail;
   7.805 -	SKB_LINEAR_ASSERT(skb);
   7.806 -	skb->tail+=len;
   7.807 -	skb->len+=len;
   7.808 -	if(skb->tail>skb->end) {
   7.809 -		skb_over_panic(skb, len, current_text_addr());
   7.810 -	}
   7.811 -	return tmp;
   7.812 +    unsigned char *tmp=skb->tail;
   7.813 +    SKB_LINEAR_ASSERT(skb);
   7.814 +    skb->tail+=len;
   7.815 +    skb->len+=len;
   7.816 +    if(skb->tail>skb->end) {
   7.817 +        skb_over_panic(skb, len, current_text_addr());
   7.818 +    }
   7.819 +    return tmp;
   7.820  }
   7.821  
   7.822  static inline unsigned char *__skb_push(struct sk_buff *skb, unsigned int len)
   7.823  {
   7.824 -	skb->data-=len;
   7.825 -	skb->len+=len;
   7.826 -	return skb->data;
   7.827 +    skb->data-=len;
   7.828 +    skb->len+=len;
   7.829 +    return skb->data;
   7.830  }
   7.831  
   7.832  /**
   7.833 @@ -792,20 +328,20 @@ static inline unsigned char *__skb_push(
   7.834  
   7.835  static inline unsigned char *skb_push(struct sk_buff *skb, unsigned int len)
   7.836  {
   7.837 -	skb->data-=len;
   7.838 -	skb->len+=len;
   7.839 -	if(skb->data<skb->head) {
   7.840 -		skb_under_panic(skb, len, current_text_addr());
   7.841 -	}
   7.842 -	return skb->data;
   7.843 +    skb->data-=len;
   7.844 +    skb->len+=len;
   7.845 +    if(skb->data<skb->head) {
   7.846 +        skb_under_panic(skb, len, current_text_addr());
   7.847 +    }
   7.848 +    return skb->data;
   7.849  }
   7.850  
   7.851  static inline char *__skb_pull(struct sk_buff *skb, unsigned int len)
   7.852  {
   7.853 -	skb->len-=len;
   7.854 -	if (skb->len < skb->data_len)
   7.855 -		BUG();
   7.856 -	return 	skb->data+=len;
   7.857 +    skb->len-=len;
   7.858 +    if (skb->len < skb->data_len)
   7.859 +        BUG();
   7.860 +    return 	skb->data+=len;
   7.861  }
   7.862  
   7.863  /**
   7.864 @@ -821,60 +357,9 @@ static inline char *__skb_pull(struct sk
   7.865  
   7.866  static inline unsigned char * skb_pull(struct sk_buff *skb, unsigned int len)
   7.867  {	
   7.868 -	if (len > skb->len)
   7.869 -		return NULL;
   7.870 -	return __skb_pull(skb,len);
   7.871 -}
   7.872 -
   7.873 -extern unsigned char * __pskb_pull_tail(struct sk_buff *skb, int delta);
   7.874 -
   7.875 -static inline char *__pskb_pull(struct sk_buff *skb, unsigned int len)
   7.876 -{
   7.877 -	if (len > skb_headlen(skb) &&
   7.878 -	    __pskb_pull_tail(skb, len-skb_headlen(skb)) == NULL)
   7.879 -		return NULL;
   7.880 -	skb->len -= len;
   7.881 -	return 	skb->data += len;
   7.882 -}
   7.883 -
   7.884 -static inline unsigned char * pskb_pull(struct sk_buff *skb, unsigned int len)
   7.885 -{	
   7.886 -	if (len > skb->len)
   7.887 -		return NULL;
   7.888 -	return __pskb_pull(skb,len);
   7.889 -}
   7.890 -
   7.891 -static inline int pskb_may_pull(struct sk_buff *skb, unsigned int len)
   7.892 -{
   7.893 -	if (len <= skb_headlen(skb))
   7.894 -		return 1;
   7.895 -	if (len > skb->len)
   7.896 -		return 0;
   7.897 -	return (__pskb_pull_tail(skb, len-skb_headlen(skb)) != NULL);
   7.898 -}
   7.899 -
   7.900 -/**
   7.901 - *	skb_headroom - bytes at buffer head
   7.902 - *	@skb: buffer to check
   7.903 - *
   7.904 - *	Return the number of bytes of free space at the head of an &sk_buff.
   7.905 - */
   7.906 - 
   7.907 -static inline int skb_headroom(const struct sk_buff *skb)
   7.908 -{
   7.909 -	return skb->data-skb->head;
   7.910 -}
   7.911 -
   7.912 -/**
   7.913 - *	skb_tailroom - bytes at buffer end
   7.914 - *	@skb: buffer to check
   7.915 - *
   7.916 - *	Return the number of bytes of free space at the tail of an sk_buff
   7.917 - */
   7.918 -
   7.919 -static inline int skb_tailroom(const struct sk_buff *skb)
   7.920 -{
   7.921 -	return skb_is_nonlinear(skb) ? 0 : skb->end-skb->tail;
   7.922 +    if (len > skb->len)
   7.923 +        return NULL;
   7.924 +    return __skb_pull(skb,len);
   7.925  }
   7.926  
   7.927  /**
   7.928 @@ -888,106 +373,8 @@ static inline int skb_tailroom(const str
   7.929  
   7.930  static inline void skb_reserve(struct sk_buff *skb, unsigned int len)
   7.931  {
   7.932 -	skb->data+=len;
   7.933 -	skb->tail+=len;
   7.934 -}
   7.935 -
   7.936 -extern int ___pskb_trim(struct sk_buff *skb, unsigned int len, int realloc);
   7.937 -
   7.938 -static inline void __skb_trim(struct sk_buff *skb, unsigned int len)
   7.939 -{
   7.940 -	if (!skb->data_len) {
   7.941 -		skb->len = len;
   7.942 -		skb->tail = skb->data+len;
   7.943 -	} else {
   7.944 -		___pskb_trim(skb, len, 0);
   7.945 -	}
   7.946 -}
   7.947 -
   7.948 -/**
   7.949 - *	skb_trim - remove end from a buffer
   7.950 - *	@skb: buffer to alter
   7.951 - *	@len: new length
   7.952 - *
   7.953 - *	Cut the length of a buffer down by removing data from the tail. If
   7.954 - *	the buffer is already under the length specified it is not modified.
   7.955 - */
   7.956 -
   7.957 -static inline void skb_trim(struct sk_buff *skb, unsigned int len)
   7.958 -{
   7.959 -	if (skb->len > len) {
   7.960 -		__skb_trim(skb, len);
   7.961 -	}
   7.962 -}
   7.963 -
   7.964 -
   7.965 -static inline int __pskb_trim(struct sk_buff *skb, unsigned int len)
   7.966 -{
   7.967 -	if (!skb->data_len) {
   7.968 -		skb->len = len;
   7.969 -		skb->tail = skb->data+len;
   7.970 -		return 0;
   7.971 -	} else {
   7.972 -		return ___pskb_trim(skb, len, 1);
   7.973 -	}
   7.974 -}
   7.975 -
   7.976 -static inline int pskb_trim(struct sk_buff *skb, unsigned int len)
   7.977 -{
   7.978 -	if (len < skb->len)
   7.979 -		return __pskb_trim(skb, len);
   7.980 -	return 0;
   7.981 -}
   7.982 -
   7.983 -/**
   7.984 - *	skb_orphan - orphan a buffer
   7.985 - *	@skb: buffer to orphan
   7.986 - *
   7.987 - *	If a buffer currently has an owner then we call the owner's
   7.988 - *	destructor function and make the @skb unowned. The buffer continues
   7.989 - *	to exist but is no longer charged to its former owner.
   7.990 - */
   7.991 -
   7.992 -
   7.993 -static inline void skb_orphan(struct sk_buff *skb)
   7.994 -{
   7.995 -	if (skb->destructor)
   7.996 -		skb->destructor(skb);
   7.997 -	skb->destructor = NULL;
   7.998 -}
   7.999 -
  7.1000 -/**
  7.1001 - *	skb_purge - empty a list
  7.1002 - *	@list: list to empty
  7.1003 - *
  7.1004 - *	Delete all buffers on an &sk_buff list. Each buffer is removed from
  7.1005 - *	the list and one reference dropped. This function takes the list
  7.1006 - *	lock and is atomic with respect to other list locking functions.
  7.1007 - */
  7.1008 -
  7.1009 -
  7.1010 -static inline void skb_queue_purge(struct sk_buff_head *list)
  7.1011 -{
  7.1012 -	struct sk_buff *skb;
  7.1013 -	while ((skb=skb_dequeue(list))!=NULL)
  7.1014 -		kfree_skb(skb);
  7.1015 -}
  7.1016 -
  7.1017 -/**
  7.1018 - *	__skb_purge - empty a list
  7.1019 - *	@list: list to empty
  7.1020 - *
  7.1021 - *	Delete all buffers on an &sk_buff list. Each buffer is removed from
  7.1022 - *	the list and one reference dropped. This function does not take the
  7.1023 - *	list lock and the caller must hold the relevant locks to use it.
  7.1024 - */
  7.1025 -
  7.1026 -
  7.1027 -static inline void __skb_queue_purge(struct sk_buff_head *list)
  7.1028 -{
  7.1029 -	struct sk_buff *skb;
  7.1030 -	while ((skb=__skb_dequeue(list))!=NULL)
  7.1031 -		kfree_skb(skb);
  7.1032 +    skb->data+=len;
  7.1033 +    skb->tail+=len;
  7.1034  }
  7.1035  
  7.1036  /**
  7.1037 @@ -1006,13 +393,11 @@ static inline void __skb_queue_purge(str
  7.1038  static inline struct sk_buff *__dev_alloc_skb(unsigned int length,
  7.1039  					      int gfp_mask)
  7.1040  {
  7.1041 -	struct sk_buff *skb;
  7.1042 -
  7.1043 -	//skb = alloc_skb(length+16, gfp_mask);
  7.1044 -        skb = alloc_zc_skb(length+16, gfp_mask);
  7.1045 -	if (skb)
  7.1046 -		skb_reserve(skb,16);
  7.1047 -	return skb;
  7.1048 +    struct sk_buff *skb;
  7.1049 +    skb = alloc_zc_skb(length+16, gfp_mask);
  7.1050 +    if (skb)
  7.1051 +        skb_reserve(skb,16);
  7.1052 +    return skb;
  7.1053  }
  7.1054  
  7.1055  /**
  7.1056 @@ -1030,84 +415,20 @@ static inline struct sk_buff *__dev_allo
  7.1057   
  7.1058  static inline struct sk_buff *dev_alloc_skb(unsigned int length)
  7.1059  {
  7.1060 -	return __dev_alloc_skb(length, GFP_ATOMIC);
  7.1061 +    return __dev_alloc_skb(length, GFP_ATOMIC);
  7.1062  }
  7.1063  
  7.1064 -/**
  7.1065 - *	skb_cow - copy header of skb when it is required
  7.1066 - *	@skb: buffer to cow
  7.1067 - *	@headroom: needed headroom
  7.1068 - *
  7.1069 - *	If the skb passed lacks sufficient headroom or its data part
  7.1070 - *	is shared, data is reallocated. If reallocation fails, an error
  7.1071 - *	is returned and original skb is not changed.
  7.1072 - *
  7.1073 - *	The result is skb with writable area skb->head...skb->tail
  7.1074 - *	and at least @headroom of space at head.
  7.1075 - */
  7.1076 -
  7.1077 -static inline int
  7.1078 -skb_cow(struct sk_buff *skb, unsigned int headroom)
  7.1079 -{
  7.1080 -	int delta = (headroom > 16 ? headroom : 16) - skb_headroom(skb);
  7.1081 -
  7.1082 -	if (delta < 0)
  7.1083 -		delta = 0;
  7.1084 -
  7.1085 -	if (delta || skb_cloned(skb))
  7.1086 -		return pskb_expand_head(skb, (delta+15)&~15, 0, GFP_ATOMIC);
  7.1087 -	return 0;
  7.1088 -}
  7.1089 -
  7.1090 -/**
  7.1091 - *	skb_linearize - convert paged skb to linear one
  7.1092 - *	@skb: buffer to linarize
  7.1093 - *	@gfp: allocation mode
  7.1094 - *
  7.1095 - *	If there is no free memory -ENOMEM is returned, otherwise zero
  7.1096 - *	is returned and the old skb data released.  */
  7.1097 -int skb_linearize(struct sk_buff *skb, int gfp);
  7.1098 -
  7.1099  static inline void *kmap_skb_frag(const skb_frag_t *frag)
  7.1100  {
  7.1101 -	return page_address(frag->page);
  7.1102 +    return page_address(frag->page);
  7.1103  }
  7.1104  
  7.1105  static inline void kunmap_skb_frag(void *vaddr)
  7.1106  {
  7.1107  }
  7.1108  
  7.1109 -#define skb_queue_walk(queue, skb) \
  7.1110 -		for (skb = (queue)->next;			\
  7.1111 -		     (skb != (struct sk_buff *)(queue));	\
  7.1112 -		     skb=skb->next)
  7.1113 -
  7.1114 -
  7.1115 -extern int			skb_copy_datagram(const struct sk_buff *from, int offset, char *to,int size);
  7.1116 -extern int			skb_copy_and_csum_datagram(const struct sk_buff *skb, int offset, u8 *to, int len, unsigned int *csump);
  7.1117 -
  7.1118 -extern unsigned int		skb_checksum(const struct sk_buff *skb, int offset, int len, unsigned int csum);
  7.1119 -extern int			skb_copy_bits(const struct sk_buff *skb, int offset, void *to, int len);
  7.1120 -extern unsigned int		skb_copy_and_csum_bits(const struct sk_buff *skb, int offset, u8 *to, int len, unsigned int csum);
  7.1121 -extern void			skb_copy_and_csum_dev(const struct sk_buff *skb, u8 *to);
  7.1122 -
  7.1123 +extern int skb_copy_bits(const struct sk_buff *skb, 
  7.1124 +                         int offset, void *to, int len);
  7.1125  extern void skb_init(void);
  7.1126 -extern void skb_add_mtu(int mtu);
  7.1127  
  7.1128 -#ifdef CONFIG_NETFILTER
  7.1129 -static inline void
  7.1130 -nf_conntrack_put(struct nf_ct_info *nfct)
  7.1131 -{
  7.1132 -	if (nfct && atomic_dec_and_test(&nfct->master->use))
  7.1133 -		nfct->master->destroy(nfct->master);
  7.1134 -}
  7.1135 -static inline void
  7.1136 -nf_conntrack_get(struct nf_ct_info *nfct)
  7.1137 -{
  7.1138 -	if (nfct)
  7.1139 -		atomic_inc(&nfct->master->use);
  7.1140 -}
  7.1141 -#endif
  7.1142 -
  7.1143 -#endif	/* __KERNEL__ */
  7.1144  #endif	/* _LINUX_SKBUFF_H */
     8.1 --- a/xen-2.4.16/include/xeno/vif.h	Sun Feb 23 11:22:39 2003 +0000
     8.2 +++ b/xen-2.4.16/include/xeno/vif.h	Mon Feb 24 14:19:58 2003 +0000
     8.3 @@ -26,16 +26,17 @@
     8.4   */
     8.5  
     8.6  typedef struct rx_shadow_entry_st {
     8.7 -    unsigned long addr;
     8.8 -    unsigned long size;
     8.9 -    int           status;
    8.10 -    unsigned long flush_count;
    8.11 +    unsigned long  addr;
    8.12 +    unsigned short size;
    8.13 +    unsigned short status;
    8.14 +    unsigned long  flush_count;
    8.15  } rx_shadow_entry_t;
    8.16  
    8.17  typedef struct tx_shadow_entry_st {
    8.18 -    unsigned long addr;
    8.19 -    unsigned long size;
    8.20 -    int           status;
    8.21 +    void          *header;
    8.22 +    unsigned long  payload;
    8.23 +    unsigned short size;
    8.24 +    unsigned short status;
    8.25  } tx_shadow_entry_t;
    8.26  
    8.27  typedef struct net_shadow_ring_st {
    8.28 @@ -46,12 +47,11 @@ typedef struct net_shadow_ring_st {
    8.29  } net_shadow_ring_t;
    8.30  
    8.31  typedef struct net_vif_st {
    8.32 -    net_ring_t          *net_ring;
    8.33 -    net_shadow_ring_t   *shadow_ring;
    8.34 +    net_ring_t         *net_ring;
    8.35 +    net_shadow_ring_t  *shadow_ring;
    8.36      int                 id;
    8.37 -    struct sk_buff_head skb_list;
    8.38 -    unsigned int domain;
    8.39 -    // rules table goes here in next revision.
    8.40 +    struct task_struct *domain;
    8.41 +    struct list_head    list;
    8.42  } net_vif_t;
    8.43  
    8.44  /* VIF-related defines. */
     9.1 --- a/xen-2.4.16/net/dev.c	Sun Feb 23 11:22:39 2003 +0000
     9.2 +++ b/xen-2.4.16/net/dev.c	Mon Feb 24 14:19:58 2003 +0000
     9.3 @@ -37,7 +37,6 @@
     9.4  #define rtmsg_ifinfo(_a,_b,_c) ((void)0)
     9.5  #define rtnl_lock() ((void)0)
     9.6  #define rtnl_unlock() ((void)0)
     9.7 -#define dst_init() ((void)0)
     9.8  
     9.9  #if 0
    9.10  #define DPRINTK(_f, _a...) printk(_f , ## _a)
    9.11 @@ -53,11 +52,12 @@
    9.12  struct net_device *the_dev = NULL;
    9.13  
    9.14  /*
    9.15 - *	Device drivers call our routines to queue packets here. We empty the
    9.16 - *	queue in the local softnet handler.
    9.17 + * Transmitted packets are fragmented, so we can copy the important headesr 
    9.18 + * before checking them for validity. Avoids need for page protection.
    9.19   */
    9.20 -struct softnet_data softnet_data[NR_CPUS] __cacheline_aligned;
    9.21 -
    9.22 +/* Ethernet + IP headers */
    9.23 +#define PKT_PROT_LEN (ETH_HLEN + 20)
    9.24 +static kmem_cache_t *net_header_cachep;
    9.25  
    9.26  /**
    9.27   *	__dev_get_by_name	- find a device by its name 
    9.28 @@ -105,14 +105,6 @@ struct net_device *dev_get_by_name(const
    9.29      return dev;
    9.30  }
    9.31  
    9.32 -/* 
    9.33 -   Return value is changed to int to prevent illegal usage in future.
    9.34 -   It is still legal to use to check for device existance.
    9.35 -
    9.36 -   User should understand, that the result returned by this function
    9.37 -   is meaningless, if it was not issued under rtnl semaphore.
    9.38 - */
    9.39 -
    9.40  /**
    9.41   *	dev_get	-	test if a device exists
    9.42   *	@name:	name to test for
    9.43 @@ -483,142 +475,13 @@ illegal_highdma(struct net_device *dev, 
    9.44  #define illegal_highdma(dev, skb)	(0)
    9.45  #endif
    9.46  
    9.47 -/*
    9.48 - * dev_queue_xmit - transmit a buffer
    9.49 - * @skb: buffer to transmit
    9.50 - *	
    9.51 - * Queue a buffer for transmission to a network device. The caller must
    9.52 - * have set the device and priority and built the buffer before calling this 
    9.53 - * function. The function can be called from an interrupt.
    9.54 - *
    9.55 - * A negative errno code is returned on a failure. A success does not
    9.56 - * guarantee the frame will be transmitted as it may be dropped due
    9.57 - * to congestion or traffic shaping.
    9.58 - */
    9.59 -
    9.60 -int dev_queue_xmit(struct sk_buff *skb)
    9.61 -{
    9.62 -    struct net_device *dev = skb->dev;
    9.63 -        
    9.64 -    if (!(dev->features&NETIF_F_SG)) 
    9.65 -    {
    9.66 -        printk("NIC doesn't do scatter-gather!\n");
    9.67 -        BUG();
    9.68 -    }
    9.69 -        
    9.70 -    if (skb_shinfo(skb)->frag_list &&
    9.71 -        !(dev->features&NETIF_F_FRAGLIST) &&
    9.72 -        skb_linearize(skb, GFP_ATOMIC) != 0) {
    9.73 -        kfree_skb(skb);
    9.74 -        return -ENOMEM;
    9.75 -    }
    9.76 -
    9.77 -    spin_lock_bh(&dev->queue_lock);
    9.78 -    if (dev->flags&IFF_UP) {
    9.79 -        int cpu = smp_processor_id();
    9.80 -
    9.81 -        if (dev->xmit_lock_owner != cpu) {
    9.82 -            spin_unlock(&dev->queue_lock);
    9.83 -            spin_lock(&dev->xmit_lock);
    9.84 -            dev->xmit_lock_owner = cpu;
    9.85 -
    9.86 -            if (!netif_queue_stopped(dev)) {
    9.87 -                if (dev->hard_start_xmit(skb, dev) == 0) {
    9.88 -                    dev->xmit_lock_owner = -1;
    9.89 -                    spin_unlock_bh(&dev->xmit_lock);
    9.90 -                    return 0;
    9.91 -                }
    9.92 -            }
    9.93 -            dev->xmit_lock_owner = -1;
    9.94 -            spin_unlock_bh(&dev->xmit_lock);
    9.95 -            kfree_skb(skb);
    9.96 -            return -ENETDOWN;
    9.97 -        }
    9.98 -    }
    9.99 -    spin_unlock_bh(&dev->queue_lock);
   9.100 -
   9.101 -    kfree_skb(skb);
   9.102 -    return -ENETDOWN;
   9.103 -}
   9.104 -
   9.105  
   9.106  /*=======================================================================
   9.107  			Receiver routines
   9.108    =======================================================================*/
   9.109  
   9.110 -int netdev_max_backlog = 300;
   9.111 -/* These numbers are selected based on intuition and some
   9.112 - * experimentatiom, if you have more scientific way of doing this
   9.113 - * please go ahead and fix things.
   9.114 - */
   9.115 -int no_cong_thresh = 10;
   9.116 -int no_cong = 20;
   9.117 -int lo_cong = 100;
   9.118 -int mod_cong = 290;
   9.119 -
   9.120  struct netif_rx_stats netdev_rx_stat[NR_CPUS];
   9.121  
   9.122 -
   9.123 -#ifdef CONFIG_NET_HW_FLOWCONTROL
   9.124 -atomic_t netdev_dropping = ATOMIC_INIT(0);
   9.125 -static unsigned long netdev_fc_mask = 1;
   9.126 -unsigned long netdev_fc_xoff = 0;
   9.127 -spinlock_t netdev_fc_lock = SPIN_LOCK_UNLOCKED;
   9.128 -
   9.129 -static struct
   9.130 -{
   9.131 -    void (*stimul)(struct net_device *);
   9.132 -    struct net_device *dev;
   9.133 -} netdev_fc_slots[BITS_PER_LONG];
   9.134 -
   9.135 -int netdev_register_fc(struct net_device *dev, 
   9.136 -                       void (*stimul)(struct net_device *dev))
   9.137 -{
   9.138 -    int bit = 0;
   9.139 -    unsigned long flags;
   9.140 -
   9.141 -    spin_lock_irqsave(&netdev_fc_lock, flags);
   9.142 -    if (netdev_fc_mask != ~0UL) {
   9.143 -        bit = ffz(netdev_fc_mask);
   9.144 -        netdev_fc_slots[bit].stimul = stimul;
   9.145 -        netdev_fc_slots[bit].dev = dev;
   9.146 -        set_bit(bit, &netdev_fc_mask);
   9.147 -        clear_bit(bit, &netdev_fc_xoff);
   9.148 -    }
   9.149 -    spin_unlock_irqrestore(&netdev_fc_lock, flags);
   9.150 -    return bit;
   9.151 -}
   9.152 -
   9.153 -void netdev_unregister_fc(int bit)
   9.154 -{
   9.155 -    unsigned long flags;
   9.156 -
   9.157 -    spin_lock_irqsave(&netdev_fc_lock, flags);
   9.158 -    if (bit > 0) {
   9.159 -        netdev_fc_slots[bit].stimul = NULL;
   9.160 -        netdev_fc_slots[bit].dev = NULL;
   9.161 -        clear_bit(bit, &netdev_fc_mask);
   9.162 -        clear_bit(bit, &netdev_fc_xoff);
   9.163 -    }
   9.164 -    spin_unlock_irqrestore(&netdev_fc_lock, flags);
   9.165 -}
   9.166 -
   9.167 -static void netdev_wakeup(void)
   9.168 -{
   9.169 -    unsigned long xoff;
   9.170 -
   9.171 -    spin_lock(&netdev_fc_lock);
   9.172 -    xoff = netdev_fc_xoff;
   9.173 -    netdev_fc_xoff = 0;
   9.174 -    while (xoff) {
   9.175 -        int i = ffz(~xoff);
   9.176 -        xoff &= ~(1<<i);
   9.177 -        netdev_fc_slots[i].stimul(netdev_fc_slots[i].dev);
   9.178 -    }
   9.179 -    spin_unlock(&netdev_fc_lock);
   9.180 -}
   9.181 -#endif
   9.182 -
   9.183  void deliver_packet(struct sk_buff *skb, net_vif_t *vif)
   9.184  {
   9.185      net_shadow_ring_t *shadow_ring;
   9.186 @@ -677,9 +540,6 @@ void deliver_packet(struct sk_buff *skb,
   9.187      shadow_ring->rx_cons = RX_RING_INC(i);
   9.188  }
   9.189  
   9.190 -/* Deliver skb to an old protocol, which is not threaded well
   9.191 -   or which do not understand shared skbs.
   9.192 - */
   9.193  /**
   9.194   *	netif_rx	-	post buffer to the network code
   9.195   *	@skb: buffer to post
   9.196 @@ -691,12 +551,7 @@ void deliver_packet(struct sk_buff *skb,
   9.197   *      
   9.198   *	return values:
   9.199   *	NET_RX_SUCCESS	(no congestion)           
   9.200 - *	NET_RX_CN_LOW     (low congestion) 
   9.201 - *	NET_RX_CN_MOD     (moderate congestion)
   9.202 - *	NET_RX_CN_HIGH    (high congestion) 
   9.203   *	NET_RX_DROP    (packet was dropped)
   9.204 - *      
   9.205 - *      
   9.206   */
   9.207  
   9.208  int netif_rx(struct sk_buff *skb)
   9.209 @@ -707,7 +562,6 @@ int netif_rx(struct sk_buff *skb)
   9.210          
   9.211      struct task_struct *p;
   9.212      int this_cpu = smp_processor_id();
   9.213 -    struct softnet_data *queue;
   9.214      unsigned long flags;
   9.215      net_vif_t *vif;
   9.216  
   9.217 @@ -733,8 +587,6 @@ int netif_rx(struct sk_buff *skb)
   9.218      skb->data += ETH_HLEN;
   9.219      skb->nh.raw = skb->data;
   9.220          
   9.221 -    queue = &softnet_data[this_cpu];
   9.222 -        
   9.223      netdev_rx_stat[this_cpu].total++;
   9.224  
   9.225      if ( skb->src_vif == VIF_UNKNOWN_INTERFACE )
   9.226 @@ -762,8 +614,7 @@ int netif_rx(struct sk_buff *skb)
   9.227          read_lock(&tasklist_lock);
   9.228          p = &idle0_task;
   9.229          do {
   9.230 -            if ( p->domain != vif->domain ) continue;
   9.231 -            if ( vif->skb_list.qlen > 100 ) break;
   9.232 +            if ( p != vif->domain ) continue;
   9.233              deliver_packet(skb, vif);
   9.234              cpu_mask = mark_hyp_event(p, _HYP_EVENT_NET_RX);
   9.235              read_unlock(&tasklist_lock);
   9.236 @@ -787,124 +638,146 @@ int netif_rx(struct sk_buff *skb)
   9.237      kfree_skb(skb);
   9.238      hyp_event_notify(cpu_mask);
   9.239      local_irq_restore(flags);
   9.240 -    return 0;
   9.241 +    return NET_RX_SUCCESS;
   9.242 +}
   9.243 +
   9.244 +
   9.245 +/*************************************************************
   9.246 + * NEW TRANSMIT SCHEDULER
   9.247 + */
   9.248 +
   9.249 +struct list_head net_schedule_list;
   9.250 +spinlock_t net_schedule_list_lock;
   9.251 +
   9.252 +static int __on_net_schedule_list(net_vif_t *vif)
   9.253 +{
   9.254 +    return vif->list.next != NULL;
   9.255 +}
   9.256 +
   9.257 +static void remove_from_net_schedule_list(net_vif_t *vif)
   9.258 +{
   9.259 +    unsigned long flags;
   9.260 +    if ( !__on_net_schedule_list(vif) ) return;
   9.261 +    spin_lock_irqsave(&net_schedule_list_lock, flags);
   9.262 +    if ( __on_net_schedule_list(vif) )
   9.263 +    {
   9.264 +        list_del(&vif->list);
   9.265 +        vif->list.next = NULL;
   9.266 +    }
   9.267 +    spin_unlock_irqrestore(&net_schedule_list_lock, flags);
   9.268 +}
   9.269 +
   9.270 +static void add_to_net_schedule_list_tail(net_vif_t *vif)
   9.271 +{
   9.272 +    unsigned long flags;
   9.273 +    if ( __on_net_schedule_list(vif) ) return;
   9.274 +    spin_lock_irqsave(&net_schedule_list_lock, flags);
   9.275 +    if ( !__on_net_schedule_list(vif) )
   9.276 +    {
   9.277 +        list_add_tail(&vif->list, &net_schedule_list);
   9.278 +    }
   9.279 +    spin_unlock_irqrestore(&net_schedule_list_lock, flags);
   9.280  }
   9.281  
   9.282  
   9.283 -static int deliver_to_old_ones(struct packet_type *pt, 
   9.284 -                               struct sk_buff *skb, int last)
   9.285 +/* Destructor function for tx skbs. */
   9.286 +static void tx_skb_release(struct sk_buff *skb)
   9.287  {
   9.288 -    static spinlock_t net_bh_lock = SPIN_LOCK_UNLOCKED;
   9.289 -    int ret = NET_RX_DROP;
   9.290 +    int i;
   9.291 +    net_ring_t *ring;
   9.292 +    
   9.293 +    for ( i = 0; i < skb_shinfo(skb)->nr_frags; i++ )
   9.294 +        put_page_tot(skb_shinfo(skb)->frags[i].page);
   9.295  
   9.296 +    if ( skb->skb_type == SKB_NODATA )
   9.297 +        kmem_cache_free(net_header_cachep, skb->head);
   9.298  
   9.299 -    if (!last) {
   9.300 -        skb = skb_clone(skb, GFP_ATOMIC);
   9.301 -        if (skb == NULL)
   9.302 -            return ret;
   9.303 -    }
   9.304 -    if (skb_is_nonlinear(skb) && skb_linearize(skb, GFP_ATOMIC) != 0) {
   9.305 -        kfree_skb(skb);
   9.306 -        return ret;
   9.307 -    }
   9.308 +    skb_shinfo(skb)->nr_frags = 0; 
   9.309  
   9.310 -    /* The assumption (correct one) is that old protocols
   9.311 -       did not depened on BHs different of NET_BH and TIMER_BH.
   9.312 -    */
   9.313 -
   9.314 -    /* Emulate NET_BH with special spinlock */
   9.315 -    spin_lock(&net_bh_lock);
   9.316 +    /*
   9.317 +     * XXX This assumes that, per vif, SKBs are processed in-order!
   9.318 +     * Also assumes no concurrency. This is safe because each vif
   9.319 +     * maps to one NIC. This is executed in NIC interrupt code, so we have
   9.320 +     * mutual exclusion from do_IRQ().
   9.321 +     */
   9.322 +    ring = sys_vif_list[skb->src_vif]->net_ring;
   9.323 +    ring->tx_cons = TX_RING_INC(ring->tx_cons);
   9.324  
   9.325 -    /* Disable timers and wait for all timers completion */
   9.326 -    tasklet_disable(bh_task_vec+TIMER_BH);
   9.327 -
   9.328 -    ret = pt->func(skb, skb->dev, pt);
   9.329 -
   9.330 -    tasklet_hi_enable(bh_task_vec+TIMER_BH);
   9.331 -    spin_unlock(&net_bh_lock);
   9.332 -    return ret;
   9.333 +    if ( ring->tx_cons == ring->tx_event )
   9.334 +        set_bit(_EVENT_NET_TX, 
   9.335 +                &sys_vif_list[skb->src_vif]->domain->shared_info->events);
   9.336  }
   9.337  
   9.338 +    
   9.339  static void net_tx_action(unsigned long unused)
   9.340  {
   9.341 -    int cpu = smp_processor_id();
   9.342 -
   9.343 -    if (softnet_data[cpu].completion_queue) {
   9.344 -        struct sk_buff *clist;
   9.345 -
   9.346 -        local_irq_disable();
   9.347 -        clist = softnet_data[cpu].completion_queue;
   9.348 -        softnet_data[cpu].completion_queue = NULL;
   9.349 -        local_irq_enable();
   9.350 -
   9.351 -        while (clist != NULL) {
   9.352 -            struct sk_buff *skb = clist;
   9.353 -            clist = clist->next;
   9.354 +    struct net_device *dev = the_dev;
   9.355 +    struct list_head *ent;
   9.356 +    struct sk_buff *skb;
   9.357 +    net_vif_t *vif;
   9.358 +    tx_shadow_entry_t *tx;
   9.359 +    int pending_bytes = 0, pending_bytes_max = 1;
   9.360  
   9.361 -            BUG_TRAP(atomic_read(&skb->users) == 0);
   9.362 -            __kfree_skb(skb);
   9.363 -        }
   9.364 -    }
   9.365 -
   9.366 -    if (softnet_data[cpu].output_queue) {
   9.367 -        struct net_device *head;
   9.368 +    spin_lock(&dev->xmit_lock);
   9.369 +    while ( !netif_queue_stopped(dev) &&
   9.370 +            (pending_bytes < pending_bytes_max) &&
   9.371 +            !list_empty(&net_schedule_list) )
   9.372 +    {
   9.373 +        /* Get a vif from the list with work to do. */
   9.374 +        ent = net_schedule_list.next;
   9.375 +        vif = list_entry(ent, net_vif_t, list);
   9.376 +        remove_from_net_schedule_list(vif);
   9.377 +        if ( vif->shadow_ring->tx_idx == vif->shadow_ring->tx_prod )
   9.378 +            continue;
   9.379  
   9.380 -        local_irq_disable();
   9.381 -        head = softnet_data[cpu].output_queue;
   9.382 -        softnet_data[cpu].output_queue = NULL;
   9.383 -        local_irq_enable();
   9.384 -
   9.385 -        while (head != NULL) {
   9.386 -            struct net_device *dev = head;
   9.387 -            head = head->next_sched;
   9.388 -
   9.389 -            smp_mb__before_clear_bit();
   9.390 -            clear_bit(__LINK_STATE_SCHED, &dev->state);
   9.391 +        /* Check the chosen entry is good. */
   9.392 +        tx = &vif->shadow_ring->tx_ring[vif->shadow_ring->tx_idx];
   9.393 +        if ( tx->status != RING_STATUS_OK ) goto skip_desc;
   9.394  
   9.395 -            if (spin_trylock(&dev->queue_lock)) {
   9.396 -				/*qdisc_run(dev); XXX KAF */
   9.397 -                spin_unlock(&dev->queue_lock);
   9.398 -            } else {
   9.399 -                netif_schedule(dev);
   9.400 -            }
   9.401 +        if ( (skb = alloc_skb_nodata(GFP_ATOMIC)) == NULL )
   9.402 +        {
   9.403 +            add_to_net_schedule_list_tail(vif);
   9.404 +            printk("Out of memory in net_tx_action()!\n");
   9.405 +            goto out;
   9.406          }
   9.407 -    }
   9.408 -}
   9.409 -DECLARE_TASKLET(net_tx_tasklet, net_tx_action, 0);
   9.410 -
   9.411 -#if defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE)
   9.412 -void (*br_handle_frame_hook)(struct sk_buff *skb) = NULL;
   9.413 -#endif
   9.414 +        
   9.415 +        skb->destructor = tx_skb_release;
   9.416 +        
   9.417 +        skb->head = skb->data = tx->header;
   9.418 +        skb->end  = skb->tail = skb->head + PKT_PROT_LEN;
   9.419 +        
   9.420 +        skb->dev      = the_dev;
   9.421 +        skb->src_vif  = vif->id;
   9.422 +        skb->dst_vif  = VIF_PHYSICAL_INTERFACE;
   9.423 +        skb->mac.raw  = skb->data; 
   9.424 +        
   9.425 +        skb_shinfo(skb)->frags[0].page        = frame_table +
   9.426 +            (tx->payload >> PAGE_SHIFT);
   9.427 +        skb_shinfo(skb)->frags[0].size        = tx->size - PKT_PROT_LEN;
   9.428 +        skb_shinfo(skb)->frags[0].page_offset = tx->payload & ~PAGE_MASK;
   9.429 +        skb_shinfo(skb)->nr_frags = 1;
   9.430  
   9.431 -static __inline__ int handle_bridge(struct sk_buff *skb,
   9.432 -                                    struct packet_type *pt_prev)
   9.433 -{
   9.434 -    int ret = NET_RX_DROP;
   9.435 +        skb->data_len = tx->size - PKT_PROT_LEN;
   9.436 +        skb->len      = tx->size;
   9.437  
   9.438 -    if (pt_prev) {
   9.439 -        if (!pt_prev->data)
   9.440 -            ret = deliver_to_old_ones(pt_prev, skb, 0);
   9.441 -        else {
   9.442 -            atomic_inc(&skb->users);
   9.443 -            ret = pt_prev->func(skb, skb->dev, pt_prev);
   9.444 +        /* Transmit should always work, or the queue would be stopped. */
   9.445 +        if ( dev->hard_start_xmit(skb, dev) != 0 )
   9.446 +        {
   9.447 +            add_to_net_schedule_list_tail(vif);
   9.448 +            printk("Weird failure in hard_start_xmit!\n");
   9.449 +            goto out;
   9.450          }
   9.451 +
   9.452 +    skip_desc:
   9.453 +        vif->shadow_ring->tx_idx = TX_RING_INC(vif->shadow_ring->tx_idx);
   9.454 +        if ( vif->shadow_ring->tx_idx != vif->shadow_ring->tx_prod )
   9.455 +            add_to_net_schedule_list_tail(vif);
   9.456      }
   9.457 -
   9.458 -#if defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE)
   9.459 -    br_handle_frame_hook(skb);
   9.460 -#endif
   9.461 -    return ret;
   9.462 + out:
   9.463 +    spin_unlock(&dev->xmit_lock);
   9.464  }
   9.465  
   9.466 -
   9.467 -#ifdef CONFIG_NET_DIVERT
   9.468 -static inline void handle_diverter(struct sk_buff *skb)
   9.469 -{
   9.470 -    /* if diversion is supported on device, then divert */
   9.471 -    if (skb->dev->divert && skb->dev->divert->divert)
   9.472 -        divert_frame(skb);
   9.473 -}
   9.474 -#endif   /* CONFIG_NET_DIVERT */
   9.475 +DECLARE_TASKLET_DISABLED(net_tx_tasklet, net_tx_action, 0);
   9.476  
   9.477  
   9.478  /*
   9.479 @@ -1809,22 +1682,20 @@ extern void dv_init(void);
   9.480  int __init net_dev_init(void)
   9.481  {
   9.482      struct net_device *dev, **dp;
   9.483 -    int i;
   9.484  
   9.485      if ( !dev_boot_phase )
   9.486          return 0;
   9.487  
   9.488 -    /* KAF: was sone in socket_init, but that top-half stuff is gone. */
   9.489      skb_init();
   9.490  
   9.491 -    /* Initialise the packet receive queues. */
   9.492 -    for ( i = 0; i < NR_CPUS; i++ )
   9.493 -    {
   9.494 -        struct softnet_data *queue;
   9.495 -        queue = &softnet_data[i];
   9.496 -        queue->completion_queue = NULL;
   9.497 -    }
   9.498 -	
   9.499 +    net_header_cachep = kmem_cache_create(
   9.500 +        "net_header_cache", 
   9.501 +        (PKT_PROT_LEN + sizeof(void *) - 1) & ~(sizeof(void *) - 1),
   9.502 +        0, SLAB_HWCACHE_ALIGN, NULL, NULL);
   9.503 +
   9.504 +    spin_lock_init(&net_schedule_list_lock);
   9.505 +    INIT_LIST_HEAD(&net_schedule_list);
   9.506 +
   9.507      /*
   9.508       *	Add the devices.
   9.509       *	If the call to dev->init fails, the dev is removed
   9.510 @@ -1887,13 +1758,8 @@ int __init net_dev_init(void)
   9.511  
   9.512      dev_boot_phase = 0;
   9.513  
   9.514 -    dst_init();
   9.515      dev_mcast_init();
   9.516  
   9.517 -#ifdef CONFIG_NET_SCHED
   9.518 -    pktsched_init();
   9.519 -#endif
   9.520 -
   9.521      /*
   9.522       *	Initialise network devices
   9.523       */
   9.524 @@ -1920,36 +1786,6 @@ inline int init_tx_header(u8 *data, unsi
   9.525  }
   9.526  
   9.527  
   9.528 -/* 
   9.529 - * tx_skb_release
   9.530 - *
   9.531 - * skb destructor function that is attached to zero-copy tx skbs before 
   9.532 - * they are passed to the device driver for transmission.  The destructor 
   9.533 - * is responsible for unlinking the fragment pointer to the skb data that 
   9.534 - * is in guest memory, and decrementing the tot_count on the packet pages 
   9.535 - * pfn_info.
   9.536 - */
   9.537 -
   9.538 -void tx_skb_release(struct sk_buff *skb)
   9.539 -{
   9.540 -    int i;
   9.541 -    
   9.542 -    for ( i = 0; i < skb_shinfo(skb)->nr_frags; i++ )
   9.543 -        skb_shinfo(skb)->frags[i].page->tot_count--;
   9.544 -    
   9.545 -    skb_shinfo(skb)->nr_frags = 0; 
   9.546 -
   9.547 -    /*
   9.548 -     * XXX This assumes that, per vif, SKBs are processed in-order!
   9.549 -     * Also, like lots of code in here -- we assume direct access to the
   9.550 -     * consumer and producer indexes. This is likely safe for the
   9.551 -     * forseeable future.
   9.552 -     */
   9.553 -    sys_vif_list[skb->src_vif]->net_ring->tx_cons = 
   9.554 -        TX_RING_INC(sys_vif_list[skb->src_vif]->net_ring->tx_cons);
   9.555 -}
   9.556 -
   9.557 -    
   9.558  /*
   9.559   * do_net_update:
   9.560   * 
   9.561 @@ -1957,12 +1793,8 @@ void tx_skb_release(struct sk_buff *skb)
   9.562   * descriptor rings.
   9.563   */
   9.564  
   9.565 -/* Ethernet + IP headers */
   9.566 -#define PKT_PROT_LEN (ETH_HLEN + 20)
   9.567 -
   9.568  long do_net_update(void)
   9.569  {
   9.570 -    shared_info_t *shared = current->shared_info;    
   9.571      net_ring_t *net_ring;
   9.572      net_shadow_ring_t *shadow_ring;
   9.573      net_vif_t *current_vif;
   9.574 @@ -1988,16 +1820,20 @@ long do_net_update(void)
   9.575           * PHASE 1 -- TRANSMIT RING
   9.576           */
   9.577  
   9.578 -        for ( i = shadow_ring->tx_cons; 
   9.579 +        for ( i = shadow_ring->tx_prod; 
   9.580                i != net_ring->tx_prod; 
   9.581                i = TX_RING_INC(i) )
   9.582          {
   9.583              if ( copy_from_user(&tx, net_ring->tx_ring+i, sizeof(tx)) )
   9.584              {
   9.585                  DPRINTK("Bad copy_from_user for tx net descriptor\n");
   9.586 +                shadow_ring->tx_ring[i].status = RING_STATUS_ERR_CFU;
   9.587                  continue;
   9.588              }
   9.589  
   9.590 +            shadow_ring->tx_ring[i].size   = tx.size;
   9.591 +            shadow_ring->tx_ring[i].status = RING_STATUS_BAD_PAGE;
   9.592 +
   9.593              if ( tx.size < PKT_PROT_LEN )
   9.594              {
   9.595                  DPRINTK("Runt packet %ld\n", tx.size);
   9.596 @@ -2010,41 +1846,35 @@ long do_net_update(void)
   9.597                          tx.addr, tx.size, (tx.addr &~PAGE_MASK) + tx.size);
   9.598                  continue;
   9.599              }
   9.600 -            
   9.601 -            if ( TX_RING_INC(i) == net_ring->tx_event )
   9.602 +
   9.603 +            pfn  = tx.addr >> PAGE_SHIFT;
   9.604 +            page = frame_table + pfn;
   9.605 +            if ( (pfn >= max_page) || 
   9.606 +                 ((page->flags & PG_domain_mask) != current->domain) ) 
   9.607              {
   9.608 -                set_bit(_EVENT_NET_TX, &shared->events);
   9.609 +                DPRINTK("Bad page frame\n");
   9.610 +                continue;
   9.611              }
   9.612 -
   9.613 -            /* 
   9.614 -             * Map the skb in from the guest, and get it's delivery target.
   9.615 -             * We need this to know whether the packet is to be sent locally
   9.616 -             * or remotely.
   9.617 -             */
   9.618              
   9.619              g_data = map_domain_mem(tx.addr);
   9.620  
   9.621              protocol = __constant_htons(
   9.622                  init_tx_header(g_data, tx.size, the_dev));
   9.623              if ( protocol == 0 )
   9.624 -            {
   9.625 -                unmap_domain_mem(g_data);
   9.626 -                continue;
   9.627 -            }
   9.628 +                goto unmap_and_continue;
   9.629  
   9.630              target = __net_get_target_vif(g_data, tx.size, current_vif->id);
   9.631  
   9.632 -            if (target > VIF_PHYSICAL_INTERFACE )
   9.633 +            if ( target > VIF_PHYSICAL_INTERFACE )
   9.634              {
   9.635                  /* Local delivery */
   9.636 -                skb = dev_alloc_skb(tx.size);
   9.637 +                if ( (skb = dev_alloc_skb(tx.size)) == NULL ) 
   9.638 +                    goto unmap_and_continue;
   9.639 +                
   9.640 +                skb->destructor = tx_skb_release;
   9.641  
   9.642 -                if (skb == NULL) 
   9.643 -                {
   9.644 -                    unmap_domain_mem(g_data);
   9.645 -                    continue;
   9.646 -                }
   9.647 -                
   9.648 +                shadow_ring->tx_ring[i].status = RING_STATUS_OK;
   9.649 +
   9.650                  skb->src_vif = current_vif->id;
   9.651                  skb->dst_vif = target;
   9.652                  skb->protocol = protocol;
   9.653 @@ -2058,52 +1888,26 @@ long do_net_update(void)
   9.654                  unmap_domain_mem(skb->head);
   9.655                  skb->data += ETH_HLEN;
   9.656                  (void)netif_rx(skb);
   9.657 -                unmap_domain_mem(g_data);
   9.658              }
   9.659              else if ( target == VIF_PHYSICAL_INTERFACE )
   9.660              {
   9.661 -                /*
   9.662 -                 * External delivery: create a fragmented SKB, consisting of a
   9.663 -                 * small copied section for the header, then a reference to the
   9.664 -                 * in-place payload.
   9.665 -                 */                
   9.666 -                skb = alloc_skb(PKT_PROT_LEN, GFP_KERNEL);
   9.667 -                if (skb == NULL) 
   9.668 -                    continue;
   9.669 -            
   9.670 -                skb_put(skb, PKT_PROT_LEN);
   9.671 -                memcpy(skb->data, g_data, PKT_PROT_LEN);
   9.672 -                unmap_domain_mem(g_data);
   9.673 -
   9.674 -                skb->dev = the_dev;
   9.675 -                skb->src_vif = current_vif->id;
   9.676 -                skb->dst_vif = target;
   9.677 -                skb->protocol = protocol; 
   9.678 -                skb->mac.raw=skb->data; 
   9.679 +                shadow_ring->tx_ring[i].header = 
   9.680 +                    kmem_cache_alloc(net_header_cachep, GFP_KERNEL);
   9.681 +                if ( shadow_ring->tx_ring[i].header == NULL ) 
   9.682 +                    goto unmap_and_continue;
   9.683 +                memcpy(shadow_ring->tx_ring[i].header, g_data, PKT_PROT_LEN);
   9.684 +                shadow_ring->tx_ring[i].payload = tx.addr + PKT_PROT_LEN;
   9.685 +                shadow_ring->tx_ring[i].status = RING_STATUS_OK;
   9.686 +                get_page_tot(page);
   9.687 +            }
   9.688  
   9.689 -                /* One more reference to guest page for duration of transfer */
   9.690 -                page = (tx.addr >> PAGE_SHIFT) + frame_table;
   9.691 -                page->tot_count++;
   9.692 -                
   9.693 -                /* We have special destructor to deal with guest frag. */
   9.694 -                skb->destructor = &tx_skb_release;
   9.695 -
   9.696 -                skb_shinfo(skb)->frags[0].page = page;
   9.697 -                skb_shinfo(skb)->frags[0].size = tx.size - PKT_PROT_LEN;
   9.698 -                skb_shinfo(skb)->frags[0].page_offset 
   9.699 -                    = (tx.addr & ~PAGE_MASK) + PKT_PROT_LEN;
   9.700 -                skb_shinfo(skb)->nr_frags = 1;
   9.701 -                skb->data_len = tx.size - skb->len;
   9.702 -                skb->len = tx.size;
   9.703 -                
   9.704 -                dev_queue_xmit(skb);
   9.705 -            }
   9.706 -            else
   9.707 -            {
   9.708 -                unmap_domain_mem(g_data);
   9.709 -            }
   9.710 +        unmap_and_continue:
   9.711 +            unmap_domain_mem(g_data);
   9.712          }
   9.713 -        shadow_ring->tx_cons = i;
   9.714 +        smp_wmb(); /* Let other CPUs see new descriptors first. */
   9.715 +        shadow_ring->tx_prod = i;
   9.716 +        add_to_net_schedule_list_tail(current_vif);
   9.717 +        tasklet_schedule(&net_tx_tasklet); /* XXX */
   9.718  
   9.719          /*
   9.720           * PHASE 2 -- RECEIVE RING
   9.721 @@ -2131,9 +1935,10 @@ long do_net_update(void)
   9.722              
   9.723              shadow_ring->rx_ring[i].status = RING_STATUS_BAD_PAGE;
   9.724              
   9.725 -            if  ( page->flags != (PGT_l1_page_table | current->domain) ) 
   9.726 +            if ( (pfn >= max_page) || 
   9.727 +                 (page->flags != (PGT_l1_page_table | current->domain)) ) 
   9.728              {
   9.729 -                DPRINTK("Bad page flags\n");
   9.730 +                DPRINTK("Bad page frame containing ppte\n");
   9.731                  continue;
   9.732              }
   9.733              
   9.734 @@ -2175,7 +1980,7 @@ int setup_network_devices(void)
   9.735      int ret;
   9.736      extern char opt_ifname[];
   9.737      struct net_device *dev = dev_get_by_name(opt_ifname);
   9.738 -    
   9.739 +
   9.740      if ( dev == NULL ) 
   9.741      {
   9.742          printk("Could not find device %s\n", opt_ifname);
   9.743 @@ -2191,6 +1996,8 @@ int setup_network_devices(void)
   9.744      printk("Device %s opened and ready for use.\n", opt_ifname);
   9.745      the_dev = dev;
   9.746  
   9.747 +    tasklet_enable(&net_tx_tasklet);
   9.748 +
   9.749      return 1;
   9.750  }
   9.751  
    10.1 --- a/xen-2.4.16/net/devinit.c	Sun Feb 23 11:22:39 2003 +0000
    10.2 +++ b/xen-2.4.16/net/devinit.c	Mon Feb 24 14:19:58 2003 +0000
    10.3 @@ -97,11 +97,6 @@ void dev_activate(struct net_device *dev
    10.4  void dev_deactivate(struct net_device *dev)
    10.5  {
    10.6      dev_watchdog_down(dev);
    10.7 -
    10.8 -    while (test_bit(__LINK_STATE_SCHED, &dev->state)) {
    10.9 -        current->policy |= SCHED_YIELD;
   10.10 -        schedule();
   10.11 -    }
   10.12  }
   10.13  
   10.14  void dev_init_scheduler(struct net_device *dev)
    11.1 --- a/xen-2.4.16/net/skbuff.c	Sun Feb 23 11:22:39 2003 +0000
    11.2 +++ b/xen-2.4.16/net/skbuff.c	Mon Feb 24 14:19:58 2003 +0000
    11.3 @@ -32,10 +32,6 @@
    11.4   *	2 of the License, or (at your option) any later version.
    11.5   */
    11.6  
    11.7 -/*
    11.8 - *	The functions in this file will not compile correctly with gcc 2.4.x
    11.9 - */
   11.10 -
   11.11  #include <linux/config.h>
   11.12  #include <linux/lib.h>
   11.13  #include <linux/errno.h>
   11.14 @@ -55,16 +51,13 @@
   11.15  
   11.16  #define BUG_TRAP ASSERT
   11.17  
   11.18 -#define put_page(_p) ((void)0) /* XXXX KAF */
   11.19 -#define get_page(_p) ((void)0)
   11.20 -
   11.21  int sysctl_hot_list_len = 128;
   11.22  
   11.23  static kmem_cache_t *skbuff_head_cache;
   11.24  
   11.25  static union {
   11.26 -	struct sk_buff_head	list;
   11.27 -	char			pad[SMP_CACHE_BYTES];
   11.28 +    struct sk_buff_head	list;
   11.29 +    char			pad[SMP_CACHE_BYTES];
   11.30  } skb_head_pool[NR_CPUS];
   11.31  
   11.32  /*
   11.33 @@ -84,9 +77,9 @@ static union {
   11.34   
   11.35  void skb_over_panic(struct sk_buff *skb, int sz, void *here)
   11.36  {
   11.37 -	printk("skput:over: %p:%d put:%d dev:%s", 
   11.38 -		here, skb->len, sz, skb->dev ? skb->dev->name : "<NULL>");
   11.39 -	BUG();
   11.40 +    printk("skput:over: %p:%d put:%d dev:%s", 
   11.41 +           here, skb->len, sz, skb->dev ? skb->dev->name : "<NULL>");
   11.42 +    BUG();
   11.43  }
   11.44  
   11.45  /**
   11.46 @@ -101,148 +94,90 @@ void skb_over_panic(struct sk_buff *skb,
   11.47  
   11.48  void skb_under_panic(struct sk_buff *skb, int sz, void *here)
   11.49  {
   11.50 -        printk("skput:under: %p:%d put:%d dev:%s",
   11.51 -                here, skb->len, sz, skb->dev ? skb->dev->name : "<NULL>");
   11.52 -	BUG();
   11.53 +    printk("skput:under: %p:%d put:%d dev:%s",
   11.54 +           here, skb->len, sz, skb->dev ? skb->dev->name : "<NULL>");
   11.55 +    BUG();
   11.56  }
   11.57  
   11.58  static __inline__ struct sk_buff *skb_head_from_pool(void)
   11.59  {
   11.60 -	struct sk_buff_head *list = &skb_head_pool[smp_processor_id()].list;
   11.61 +    struct sk_buff_head *list = &skb_head_pool[smp_processor_id()].list;
   11.62  
   11.63 -	if (skb_queue_len(list)) {
   11.64 -		struct sk_buff *skb;
   11.65 -		unsigned long flags;
   11.66 +    if (skb_queue_len(list)) {
   11.67 +        struct sk_buff *skb;
   11.68 +        unsigned long flags;
   11.69  
   11.70 -		local_irq_save(flags);
   11.71 -		skb = __skb_dequeue(list);
   11.72 -		local_irq_restore(flags);
   11.73 -		return skb;
   11.74 -	}
   11.75 -	return NULL;
   11.76 +        local_irq_save(flags);
   11.77 +        skb = __skb_dequeue(list);
   11.78 +        local_irq_restore(flags);
   11.79 +        return skb;
   11.80 +    }
   11.81 +    return NULL;
   11.82  }
   11.83  
   11.84  static __inline__ void skb_head_to_pool(struct sk_buff *skb)
   11.85  {
   11.86 -	struct sk_buff_head *list = &skb_head_pool[smp_processor_id()].list;
   11.87 +    struct sk_buff_head *list = &skb_head_pool[smp_processor_id()].list;
   11.88  
   11.89 -	if (skb_queue_len(list) < sysctl_hot_list_len) {
   11.90 -		unsigned long flags;
   11.91 +    if (skb_queue_len(list) < sysctl_hot_list_len) {
   11.92 +        unsigned long flags;
   11.93  
   11.94 -		local_irq_save(flags);
   11.95 -		__skb_queue_head(list, skb);
   11.96 -		local_irq_restore(flags);
   11.97 +        local_irq_save(flags);
   11.98 +        __skb_queue_head(list, skb);
   11.99 +        local_irq_restore(flags);
  11.100  
  11.101 -		return;
  11.102 -	}
  11.103 -	kmem_cache_free(skbuff_head_cache, skb);
  11.104 +        return;
  11.105 +    }
  11.106 +    kmem_cache_free(skbuff_head_cache, skb);
  11.107  }
  11.108  
  11.109  static inline u8 *alloc_skb_data_page(struct sk_buff *skb)
  11.110  {
  11.111 -        struct list_head *list_ptr;
  11.112 -        struct pfn_info  *pf;
  11.113 -        unsigned long flags;
  11.114 +    struct list_head *list_ptr;
  11.115 +    struct pfn_info  *pf;
  11.116 +    unsigned long flags;
  11.117          
  11.118 -        spin_lock_irqsave(&free_list_lock, flags);
  11.119 +    spin_lock_irqsave(&free_list_lock, flags);
  11.120  
  11.121 -        if (!free_pfns) return NULL;
  11.122 +    if (!free_pfns) return NULL;
  11.123  
  11.124 -        list_ptr = free_list.next;
  11.125 -        pf = list_entry(list_ptr, struct pfn_info, list);
  11.126 -        pf->flags = 0; /* owned by dom0 */
  11.127 -        list_del(&pf->list);
  11.128 -        free_pfns--;
  11.129 +    list_ptr = free_list.next;
  11.130 +    pf = list_entry(list_ptr, struct pfn_info, list);
  11.131 +    pf->flags = 0; /* owned by dom0 */
  11.132 +    list_del(&pf->list);
  11.133 +    free_pfns--;
  11.134  
  11.135 -        spin_unlock_irqrestore(&free_list_lock, flags);
  11.136 +    spin_unlock_irqrestore(&free_list_lock, flags);
  11.137  
  11.138 -        skb->pf = pf;
  11.139 -        return (u8 *)((pf - frame_table) << PAGE_SHIFT);
  11.140 +    skb->pf = pf;
  11.141 +    return (u8 *)((pf - frame_table) << PAGE_SHIFT);
  11.142  }
  11.143  
  11.144  static inline void dealloc_skb_data_page(struct sk_buff *skb)
  11.145  {
  11.146 -        struct pfn_info  *pf;
  11.147 -        unsigned long flags;
  11.148 +    struct pfn_info  *pf;
  11.149 +    unsigned long flags;
  11.150  
  11.151 -        pf = skb->pf;
  11.152 +    pf = skb->pf;
  11.153  
  11.154 -        spin_lock_irqsave(&free_list_lock, flags);
  11.155 +    spin_lock_irqsave(&free_list_lock, flags);
  11.156          
  11.157 -        list_add(&pf->list, &free_list);
  11.158 -        free_pfns++;
  11.159 +    list_add(&pf->list, &free_list);
  11.160 +    free_pfns++;
  11.161  
  11.162 -        spin_unlock_irqrestore(&free_list_lock, flags);
  11.163 +    spin_unlock_irqrestore(&free_list_lock, flags);
  11.164  
  11.165  }
  11.166  
  11.167 -struct sk_buff *alloc_zc_skb(unsigned int size,int gfp_mask)
  11.168 +static inline void INTERRUPT_CHECK(int gfp_mask)
  11.169  {
  11.170 -        struct sk_buff *skb;
  11.171 -        u8 *data;
  11.172 -
  11.173 -        if (in_interrupt() && (gfp_mask & __GFP_WAIT)) {
  11.174 -                static int count = 0;
  11.175 -                if (++count < 5) {
  11.176 -                        printk(KERN_ERR "alloc_skb called nonatomically "
  11.177 -                               "from interrupt %p\n", NET_CALLER(size));
  11.178 -                        BUG();
  11.179 -                }
  11.180 -                gfp_mask &= ~__GFP_WAIT;
  11.181 -        }
  11.182 -
  11.183 -        /* Get the HEAD */
  11.184 -        skb = skb_head_from_pool();
  11.185 -        if (skb == NULL) {
  11.186 -                skb = kmem_cache_alloc(skbuff_head_cache, gfp_mask & ~__GFP_DMA);
  11.187 -                if (skb == NULL)
  11.188 -                        goto nohead;
  11.189 -        }
  11.190 -
  11.191 -        /* Get the DATA. Size must match skb_add_mtu(). */
  11.192 -        size = SKB_DATA_ALIGN(size);
  11.193 -        data = alloc_skb_data_page(skb);
  11.194 -
  11.195 -        if (data == NULL)
  11.196 -                goto nodata;
  11.197 -
  11.198 -        /* A FAKE virtual address, so that pci_map_xxx dor the right thing. */
  11.199 -        data = phys_to_virt((unsigned long)data); 
  11.200 -        
  11.201 -        /* Load the data pointers. */
  11.202 -        skb->head = data;
  11.203 -        skb->data = data;
  11.204 -        skb->tail = data;
  11.205 -        skb->end = data + size;
  11.206 -
  11.207 -        /* Set up other state */
  11.208 -        skb->len = 0;
  11.209 -        skb->cloned = 0;
  11.210 -        skb->data_len = 0;
  11.211 -        skb->src_vif = VIF_UNKNOWN_INTERFACE;
  11.212 -        skb->dst_vif = VIF_UNKNOWN_INTERFACE;
  11.213 -        skb->skb_type = SKB_ZERO_COPY;
  11.214 -
  11.215 -        atomic_set(&skb->users, 1);
  11.216 -        atomic_set(&(skb_shinfo(skb)->dataref), 1);
  11.217 -        skb_shinfo(skb)->nr_frags = 0;
  11.218 -        skb_shinfo(skb)->frag_list = NULL;
  11.219 -
  11.220 -        return skb;
  11.221 -
  11.222 -nodata:
  11.223 -        skb_head_to_pool(skb);
  11.224 -nohead:
  11.225 -        return NULL;
  11.226 +    if (in_interrupt() && (gfp_mask & __GFP_WAIT)) {
  11.227 +        printk(KERN_ERR "alloc_skb called nonatomically\n");
  11.228 +        BUG();
  11.229 +    }
  11.230  }
  11.231  
  11.232  
  11.233 -/* 	Allocate a new skbuff. We do this ourselves so we can fill in a few
  11.234 - *	'private' fields and also do memory statistics to find all the
  11.235 - *	[BEEP] leaks.
  11.236 - * 
  11.237 - */
  11.238 -
  11.239  /**
  11.240   *	alloc_skb	-	allocate a network buffer
  11.241   *	@size: size to allocate
  11.242 @@ -258,57 +193,113 @@ nohead:
  11.243   
  11.244  struct sk_buff *alloc_skb(unsigned int size,int gfp_mask)
  11.245  {
  11.246 -	struct sk_buff *skb;
  11.247 -	u8 *data;
  11.248 +    struct sk_buff *skb;
  11.249 +    u8 *data;
  11.250 +
  11.251 +    INTERRUPT_CHECK(gfp_mask);
  11.252 +
  11.253 +    /* Get the HEAD */
  11.254 +    skb = skb_head_from_pool();
  11.255 +    if (skb == NULL) {
  11.256 +        skb = kmem_cache_alloc(skbuff_head_cache, gfp_mask & ~__GFP_DMA);
  11.257 +        if (skb == NULL)
  11.258 +            goto nohead;
  11.259 +    }
  11.260 +
  11.261 +    /* Get the DATA. Size must match skb_add_mtu(). */
  11.262 +    size = SKB_DATA_ALIGN(size);
  11.263 +    data = kmalloc(size + sizeof(struct skb_shared_info), gfp_mask);
  11.264 +    if (data == NULL)
  11.265 +        goto nodata;
  11.266  
  11.267 -	if (in_interrupt() && (gfp_mask & __GFP_WAIT)) {
  11.268 -		static int count = 0;
  11.269 -		if (++count < 5) {
  11.270 -			printk(KERN_ERR "alloc_skb called nonatomically "
  11.271 -			       "from interrupt %p\n", NET_CALLER(size));
  11.272 - 			BUG();
  11.273 -		}
  11.274 -		gfp_mask &= ~__GFP_WAIT;
  11.275 -	}
  11.276 +    /* Load the data pointers. */
  11.277 +    skb->head = data;
  11.278 +    skb->data = data;
  11.279 +    skb->tail = data;
  11.280 +    skb->end = data + size;
  11.281 +
  11.282 +    /* Set up other state */
  11.283 +    skb->len = 0;
  11.284 +    skb->data_len = 0;
  11.285 +    skb->src_vif = VIF_UNKNOWN_INTERFACE;
  11.286 +    skb->dst_vif = VIF_UNKNOWN_INTERFACE;
  11.287 +    skb->skb_type = SKB_NORMAL;
  11.288  
  11.289 -	/* Get the HEAD */
  11.290 -	skb = skb_head_from_pool();
  11.291 -	if (skb == NULL) {
  11.292 -		skb = kmem_cache_alloc(skbuff_head_cache, gfp_mask & ~__GFP_DMA);
  11.293 -		if (skb == NULL)
  11.294 -			goto nohead;
  11.295 -	}
  11.296 +    skb_shinfo(skb)->nr_frags = 0;
  11.297 +    return skb;
  11.298 +
  11.299 + nodata:
  11.300 +    skb_head_to_pool(skb);
  11.301 + nohead:
  11.302 +    return NULL;
  11.303 +}
  11.304 +
  11.305 +
  11.306 +struct sk_buff *alloc_zc_skb(unsigned int size,int gfp_mask)
  11.307 +{
  11.308 +    struct sk_buff *skb;
  11.309 +    u8 *data;
  11.310 +
  11.311 +    INTERRUPT_CHECK(gfp_mask);
  11.312  
  11.313 -	/* Get the DATA. Size must match skb_add_mtu(). */
  11.314 -	size = SKB_DATA_ALIGN(size);
  11.315 -	data = kmalloc(size + sizeof(struct skb_shared_info), gfp_mask);
  11.316 -	if (data == NULL)
  11.317 -		goto nodata;
  11.318 +    /* Get the HEAD */
  11.319 +    skb = skb_head_from_pool();
  11.320 +    if (skb == NULL) {
  11.321 +        skb = kmem_cache_alloc(skbuff_head_cache, gfp_mask & ~__GFP_DMA);
  11.322 +        if (skb == NULL)
  11.323 +            goto nohead;
  11.324 +    }
  11.325 +
  11.326 +    /* Get the DATA. Size must match skb_add_mtu(). */
  11.327 +    size = SKB_DATA_ALIGN(size);
  11.328 +    data = alloc_skb_data_page(skb);
  11.329  
  11.330 -	/* Load the data pointers. */
  11.331 -	skb->head = data;
  11.332 -	skb->data = data;
  11.333 -	skb->tail = data;
  11.334 -	skb->end = data + size;
  11.335 +    if (data == NULL)
  11.336 +        goto nodata;
  11.337 +
  11.338 +    /* A FAKE virtual address, so that pci_map_xxx dor the right thing. */
  11.339 +    data = phys_to_virt((unsigned long)data); 
  11.340 +        
  11.341 +    /* Load the data pointers. */
  11.342 +    skb->head = data;
  11.343 +    skb->data = data;
  11.344 +    skb->tail = data;
  11.345 +    skb->end = data + size;
  11.346  
  11.347 -	/* Set up other state */
  11.348 -	skb->len = 0;
  11.349 -	skb->cloned = 0;
  11.350 -	skb->data_len = 0;
  11.351 -        skb->src_vif = VIF_UNKNOWN_INTERFACE;
  11.352 -        skb->dst_vif = VIF_UNKNOWN_INTERFACE;
  11.353 -        skb->skb_type = SKB_NORMAL;
  11.354 +    /* Set up other state */
  11.355 +    skb->len = 0;
  11.356 +    skb->data_len = 0;
  11.357 +    skb->src_vif = VIF_UNKNOWN_INTERFACE;
  11.358 +    skb->dst_vif = VIF_UNKNOWN_INTERFACE;
  11.359 +    skb->skb_type = SKB_ZERO_COPY;
  11.360 +
  11.361 +    skb_shinfo(skb)->nr_frags = 0;
  11.362 +
  11.363 +    return skb;
  11.364 +
  11.365 + nodata:
  11.366 +    skb_head_to_pool(skb);
  11.367 + nohead:
  11.368 +    return NULL;
  11.369 +}
  11.370  
  11.371 -	atomic_set(&skb->users, 1); 
  11.372 -	atomic_set(&(skb_shinfo(skb)->dataref), 1);
  11.373 -	skb_shinfo(skb)->nr_frags = 0;
  11.374 -	skb_shinfo(skb)->frag_list = NULL;
  11.375 -	return skb;
  11.376 +
  11.377 +struct sk_buff *alloc_skb_nodata(int gfp_mask)
  11.378 +{
  11.379 +    struct sk_buff *skb;
  11.380 +
  11.381 +    INTERRUPT_CHECK(gfp_mask);
  11.382  
  11.383 -nodata:
  11.384 -	skb_head_to_pool(skb);
  11.385 -nohead:
  11.386 -	return NULL;
  11.387 +    /* Get the HEAD */
  11.388 +    skb = skb_head_from_pool();
  11.389 +    if (skb == NULL) {
  11.390 +        skb = kmem_cache_alloc(skbuff_head_cache, gfp_mask & ~__GFP_DMA);
  11.391 +        if (skb == NULL)
  11.392 +            return NULL;
  11.393 +    }
  11.394 +
  11.395 +    skb->skb_type = SKB_NODATA;
  11.396 +    return skb;
  11.397  }
  11.398  
  11.399  
  11.400 @@ -318,76 +309,34 @@ nohead:
  11.401  static inline void skb_headerinit(void *p, kmem_cache_t *cache, 
  11.402  				  unsigned long flags)
  11.403  {
  11.404 -	struct sk_buff *skb = p;
  11.405 -
  11.406 -	skb->next = NULL;
  11.407 -	skb->prev = NULL;
  11.408 -	skb->list = NULL;
  11.409 -	skb->dev = NULL;
  11.410 -	skb->pkt_type = PACKET_HOST;	/* Default type */
  11.411 -	skb->ip_summed = 0;
  11.412 -	skb->destructor = NULL;
  11.413 +    struct sk_buff *skb = p;
  11.414  
  11.415 -#ifdef CONFIG_NETFILTER
  11.416 -	skb->nfmark = skb->nfcache = 0;
  11.417 -	skb->nfct = NULL;
  11.418 -#ifdef CONFIG_NETFILTER_DEBUG
  11.419 -	skb->nf_debug = 0;
  11.420 -#endif
  11.421 -#endif
  11.422 -#ifdef CONFIG_NET_SCHED
  11.423 -	skb->tc_index = 0;
  11.424 -#endif
  11.425 -}
  11.426 -
  11.427 -static void skb_drop_fraglist(struct sk_buff *skb)
  11.428 -{
  11.429 -	struct sk_buff *list = skb_shinfo(skb)->frag_list;
  11.430 -
  11.431 -	skb_shinfo(skb)->frag_list = NULL;
  11.432 -
  11.433 -	do {
  11.434 -		struct sk_buff *this = list;
  11.435 -		list = list->next;
  11.436 -		kfree_skb(this);
  11.437 -	} while (list);
  11.438 -}
  11.439 -
  11.440 -static void skb_clone_fraglist(struct sk_buff *skb)
  11.441 -{
  11.442 -	struct sk_buff *list;
  11.443 -
  11.444 -	for (list = skb_shinfo(skb)->frag_list; list; list=list->next)
  11.445 -		skb_get(list);
  11.446 +    skb->next = NULL;
  11.447 +    skb->prev = NULL;
  11.448 +    skb->list = NULL;
  11.449 +    skb->dev = NULL;
  11.450 +    skb->pkt_type = PACKET_HOST;	/* Default type */
  11.451 +    skb->ip_summed = 0;
  11.452 +    skb->destructor = NULL;
  11.453  }
  11.454  
  11.455  static void skb_release_data(struct sk_buff *skb)
  11.456  {
  11.457 -
  11.458 -	if (!skb->cloned ||
  11.459 -	    atomic_dec_and_test(&(skb_shinfo(skb)->dataref))) {
  11.460 -		if (skb_shinfo(skb)->nr_frags) {
  11.461 -			int i;
  11.462 -			for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
  11.463 -				put_page(skb_shinfo(skb)->frags[i].page);
  11.464 -		}
  11.465 -
  11.466 -		if (skb_shinfo(skb)->frag_list)
  11.467 -			skb_drop_fraglist(skb);
  11.468 +    if (skb_shinfo(skb)->nr_frags) BUG();
  11.469  
  11.470 -                if (skb->skb_type == SKB_NORMAL) 
  11.471 -                {
  11.472 -		    kfree(skb->head);
  11.473 -                } 
  11.474 -                else if (skb->skb_type == SKB_ZERO_COPY) 
  11.475 -                {
  11.476 -                    dealloc_skb_data_page(skb);
  11.477 -                } 
  11.478 -                else 
  11.479 -                {
  11.480 -                    BUG();
  11.481 -                }
  11.482 -	}
  11.483 +    switch ( skb->skb_type )
  11.484 +    {
  11.485 +    case SKB_NORMAL:
  11.486 +        kfree(skb->head);
  11.487 +        break;
  11.488 +    case SKB_ZERO_COPY:
  11.489 +        dealloc_skb_data_page(skb);
  11.490 +        break;
  11.491 +    case SKB_NODATA:
  11.492 +        break;
  11.493 +    default:
  11.494 +        BUG();
  11.495 +    }
  11.496  }
  11.497  
  11.498  /*
  11.499 @@ -395,8 +344,8 @@ static void skb_release_data(struct sk_b
  11.500   */
  11.501  void kfree_skbmem(struct sk_buff *skb)
  11.502  {
  11.503 -	skb_release_data(skb);
  11.504 -	skb_head_to_pool(skb);
  11.505 +    skb_release_data(skb);
  11.506 +    skb_head_to_pool(skb);
  11.507  }
  11.508  
  11.509  /**
  11.510 @@ -410,124 +359,32 @@ void kfree_skbmem(struct sk_buff *skb)
  11.511  
  11.512  void __kfree_skb(struct sk_buff *skb)
  11.513  {
  11.514 -	if (skb->list) {
  11.515 -	 	printk(KERN_WARNING "Warning: kfree_skb passed an skb still "
  11.516 -		       "on a list (from %p).\n", NET_CALLER(skb));
  11.517 -		BUG();
  11.518 -	}
  11.519 -
  11.520 -	if(skb->destructor) {
  11.521 -		if (in_irq()) {
  11.522 -			printk(KERN_WARNING "Warning: kfree_skb on hard IRQ %p\n",
  11.523 -				NET_CALLER(skb));
  11.524 -		}
  11.525 -		skb->destructor(skb);
  11.526 -	}
  11.527 -
  11.528 -#ifdef CONFIG_NETFILTER
  11.529 -	nf_conntrack_put(skb->nfct);
  11.530 -#endif
  11.531 -	skb_headerinit(skb, NULL, 0);  /* clean state */
  11.532 -	kfree_skbmem(skb);
  11.533 -}
  11.534 -
  11.535 -/**
  11.536 - *	skb_clone	-	duplicate an sk_buff
  11.537 - *	@skb: buffer to clone
  11.538 - *	@gfp_mask: allocation priority
  11.539 - *
  11.540 - *	Duplicate an &sk_buff. The new one is not owned by a socket. Both
  11.541 - *	copies share the same packet data but not structure. The new
  11.542 - *	buffer has a reference count of 1. If the allocation fails the 
  11.543 - *	function returns %NULL otherwise the new buffer is returned.
  11.544 - *	
  11.545 - *	If this function is called from an interrupt gfp_mask() must be
  11.546 - *	%GFP_ATOMIC.
  11.547 - */
  11.548 -
  11.549 -struct sk_buff *skb_clone(struct sk_buff *skb, int gfp_mask)
  11.550 -{
  11.551 -	struct sk_buff *n;
  11.552 +    if ( skb->list )
  11.553 +        panic(KERN_WARNING "Warning: kfree_skb passed an skb still "
  11.554 +              "on a list (from %p).\n", NET_CALLER(skb));
  11.555  
  11.556 -	n = skb_head_from_pool();
  11.557 -	if (!n) {
  11.558 -		n = kmem_cache_alloc(skbuff_head_cache, gfp_mask);
  11.559 -		if (!n)
  11.560 -			return NULL;
  11.561 -	}
  11.562 -
  11.563 -#define C(x) n->x = skb->x
  11.564 +    if ( skb->destructor )
  11.565 +        skb->destructor(skb);
  11.566  
  11.567 -	n->next = n->prev = NULL;
  11.568 -	n->list = NULL;
  11.569 -	C(dev);
  11.570 -	C(h);
  11.571 -	C(nh);
  11.572 -	C(mac);
  11.573 -	C(len);
  11.574 -	C(data_len);
  11.575 -	C(csum);
  11.576 -	n->cloned = 1;
  11.577 -	C(pkt_type);
  11.578 -	C(ip_summed);
  11.579 -	atomic_set(&n->users, 1);
  11.580 -	C(protocol);
  11.581 -	C(head);
  11.582 -	C(data);
  11.583 -	C(tail);
  11.584 -	C(end);
  11.585 -	n->destructor = NULL;
  11.586 -#ifdef CONFIG_NETFILTER
  11.587 -	C(nfmark);
  11.588 -	C(nfcache);
  11.589 -	C(nfct);
  11.590 -#ifdef CONFIG_NETFILTER_DEBUG
  11.591 -	C(nf_debug);
  11.592 -#endif
  11.593 -#endif /*CONFIG_NETFILTER*/
  11.594 -#if defined(CONFIG_HIPPI)
  11.595 -	C(private);
  11.596 -#endif
  11.597 -#ifdef CONFIG_NET_SCHED
  11.598 -	C(tc_index);
  11.599 -#endif
  11.600 -
  11.601 -	atomic_inc(&(skb_shinfo(skb)->dataref));
  11.602 -	skb->cloned = 1;
  11.603 -#ifdef CONFIG_NETFILTER
  11.604 -	nf_conntrack_get(skb->nfct);
  11.605 -#endif
  11.606 -	return n;
  11.607 +    skb_headerinit(skb, NULL, 0);  /* clean state */
  11.608 +    kfree_skbmem(skb);
  11.609  }
  11.610  
  11.611  static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
  11.612  {
  11.613 -	/*
  11.614 -	 *	Shift between the two data areas in bytes
  11.615 -	 */
  11.616 -	unsigned long offset = new->data - old->data;
  11.617 +    /*
  11.618 +     *	Shift between the two data areas in bytes
  11.619 +     */
  11.620 +    unsigned long offset = new->data - old->data;
  11.621  
  11.622 -	new->list=NULL;
  11.623 -	new->dev=old->dev;
  11.624 -	new->protocol=old->protocol;
  11.625 -	new->h.raw=old->h.raw+offset;
  11.626 -	new->nh.raw=old->nh.raw+offset;
  11.627 -	new->mac.raw=old->mac.raw+offset;
  11.628 -	atomic_set(&new->users, 1);
  11.629 -	new->pkt_type=old->pkt_type;
  11.630 -	new->destructor = NULL;
  11.631 -#ifdef CONFIG_NETFILTER
  11.632 -	new->nfmark=old->nfmark;
  11.633 -	new->nfcache=old->nfcache;
  11.634 -	new->nfct=old->nfct;
  11.635 -	nf_conntrack_get(new->nfct);
  11.636 -#ifdef CONFIG_NETFILTER_DEBUG
  11.637 -	new->nf_debug=old->nf_debug;
  11.638 -#endif
  11.639 -#endif
  11.640 -#ifdef CONFIG_NET_SCHED
  11.641 -	new->tc_index = old->tc_index;
  11.642 -#endif
  11.643 +    new->list=NULL;
  11.644 +    new->dev=old->dev;
  11.645 +    new->protocol=old->protocol;
  11.646 +    new->h.raw=old->h.raw+offset;
  11.647 +    new->nh.raw=old->nh.raw+offset;
  11.648 +    new->mac.raw=old->mac.raw+offset;
  11.649 +    new->pkt_type=old->pkt_type;
  11.650 +    new->destructor = NULL;
  11.651  }
  11.652  
  11.653  /**
  11.654 @@ -549,748 +406,96 @@ static void copy_skb_header(struct sk_bu
  11.655   
  11.656  struct sk_buff *skb_copy(const struct sk_buff *skb, int gfp_mask)
  11.657  {
  11.658 -	struct sk_buff *n;
  11.659 -	int headerlen = skb->data-skb->head;
  11.660 -
  11.661 -	/*
  11.662 -	 *	Allocate the copy buffer
  11.663 -	 */
  11.664 -	n=alloc_skb(skb->end - skb->head + skb->data_len, gfp_mask);
  11.665 -	if(n==NULL)
  11.666 -		return NULL;
  11.667 -
  11.668 -	/* Set the data pointer */
  11.669 -	skb_reserve(n,headerlen);
  11.670 -	/* Set the tail pointer and length */
  11.671 -	skb_put(n,skb->len);
  11.672 -	n->csum = skb->csum;
  11.673 -	n->ip_summed = skb->ip_summed;
  11.674 -
  11.675 -	if (skb_copy_bits(skb, -headerlen, n->head, headerlen+skb->len))
  11.676 -		BUG();
  11.677 -
  11.678 -	copy_skb_header(n, skb);
  11.679 -
  11.680 -	return n;
  11.681 -}
  11.682 -
  11.683 -/* Keep head the same: replace data */
  11.684 -int skb_linearize(struct sk_buff *skb, int gfp_mask)
  11.685 -{
  11.686 -	unsigned int size;
  11.687 -	u8 *data;
  11.688 -	long offset;
  11.689 -	int headerlen = skb->data - skb->head;
  11.690 -	int expand = (skb->tail+skb->data_len) - skb->end;
  11.691 -
  11.692 -	if (skb_shared(skb))
  11.693 -		BUG();
  11.694 -
  11.695 -	if (expand <= 0)
  11.696 -		expand = 0;
  11.697 -
  11.698 -	size = (skb->end - skb->head + expand);
  11.699 -	size = SKB_DATA_ALIGN(size);
  11.700 -	data = kmalloc(size + sizeof(struct skb_shared_info), gfp_mask);
  11.701 -	if (data == NULL)
  11.702 -		return -ENOMEM;
  11.703 -
  11.704 -	/* Copy entire thing */
  11.705 -	if (skb_copy_bits(skb, -headerlen, data, headerlen+skb->len))
  11.706 -		BUG();
  11.707 -
  11.708 -	/* Offset between the two in bytes */
  11.709 -	offset = data - skb->head;
  11.710 -
  11.711 -	/* Free old data. */
  11.712 -	skb_release_data(skb);
  11.713 -
  11.714 -	skb->head = data;
  11.715 -	skb->end  = data + size;
  11.716 -
  11.717 -	/* Set up new pointers */
  11.718 -	skb->h.raw += offset;
  11.719 -	skb->nh.raw += offset;
  11.720 -	skb->mac.raw += offset;
  11.721 -	skb->tail += offset;
  11.722 -	skb->data += offset;
  11.723 -
  11.724 -	/* Set up shinfo */
  11.725 -	atomic_set(&(skb_shinfo(skb)->dataref), 1);
  11.726 -	skb_shinfo(skb)->nr_frags = 0;
  11.727 -	skb_shinfo(skb)->frag_list = NULL;
  11.728 -
  11.729 -	/* We are no longer a clone, even if we were. */
  11.730 -	skb->cloned = 0;
  11.731 -
  11.732 -	skb->tail += skb->data_len;
  11.733 -	skb->data_len = 0;
  11.734 -	return 0;
  11.735 -}
  11.736 -
  11.737 -
  11.738 -/**
  11.739 - *	pskb_copy	-	create copy of an sk_buff with private head.
  11.740 - *	@skb: buffer to copy
  11.741 - *	@gfp_mask: allocation priority
  11.742 - *
  11.743 - *	Make a copy of both an &sk_buff and part of its data, located
  11.744 - *	in header. Fragmented data remain shared. This is used when
  11.745 - *	the caller wishes to modify only header of &sk_buff and needs
  11.746 - *	private copy of the header to alter. Returns %NULL on failure
  11.747 - *	or the pointer to the buffer on success.
  11.748 - *	The returned buffer has a reference count of 1.
  11.749 - */
  11.750 -
  11.751 -struct sk_buff *pskb_copy(struct sk_buff *skb, int gfp_mask)
  11.752 -{
  11.753 -	struct sk_buff *n;
  11.754 -
  11.755 -	/*
  11.756 -	 *	Allocate the copy buffer
  11.757 -	 */
  11.758 -	n=alloc_skb(skb->end - skb->head, gfp_mask);
  11.759 -	if(n==NULL)
  11.760 -		return NULL;
  11.761 +    struct sk_buff *n;
  11.762 +    int headerlen = skb->data-skb->head;
  11.763  
  11.764 -	/* Set the data pointer */
  11.765 -	skb_reserve(n,skb->data-skb->head);
  11.766 -	/* Set the tail pointer and length */
  11.767 -	skb_put(n,skb_headlen(skb));
  11.768 -	/* Copy the bytes */
  11.769 -	memcpy(n->data, skb->data, n->len);
  11.770 -	n->csum = skb->csum;
  11.771 -	n->ip_summed = skb->ip_summed;
  11.772 -
  11.773 -	n->data_len = skb->data_len;
  11.774 -	n->len = skb->len;
  11.775 -
  11.776 -	if (skb_shinfo(skb)->nr_frags) {
  11.777 -		int i;
  11.778 -
  11.779 -		for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
  11.780 -			skb_shinfo(n)->frags[i] = skb_shinfo(skb)->frags[i];
  11.781 -			get_page(skb_shinfo(n)->frags[i].page);
  11.782 -		}
  11.783 -		skb_shinfo(n)->nr_frags = i;
  11.784 -	}
  11.785 -
  11.786 -	if (skb_shinfo(skb)->frag_list) {
  11.787 -		skb_shinfo(n)->frag_list = skb_shinfo(skb)->frag_list;
  11.788 -		skb_clone_fraglist(n);
  11.789 -	}
  11.790 -
  11.791 -	copy_skb_header(n, skb);
  11.792 -
  11.793 -	return n;
  11.794 -}
  11.795 -
  11.796 -/**
  11.797 - *	pskb_expand_head - reallocate header of &sk_buff
  11.798 - *	@skb: buffer to reallocate
  11.799 - *	@nhead: room to add at head
  11.800 - *	@ntail: room to add at tail
  11.801 - *	@gfp_mask: allocation priority
  11.802 - *
  11.803 - *	Expands (or creates identical copy, if &nhead and &ntail are zero)
  11.804 - *	header of skb. &sk_buff itself is not changed. &sk_buff MUST have
  11.805 - *	reference count of 1. Returns zero in the case of success or error,
  11.806 - *	if expansion failed. In the last case, &sk_buff is not changed.
  11.807 - *
  11.808 - *	All the pointers pointing into skb header may change and must be
  11.809 - *	reloaded after call to this function.
  11.810 - */
  11.811 -
  11.812 -int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail, int gfp_mask)
  11.813 -{
  11.814 -	int i;
  11.815 -	u8 *data;
  11.816 -	int size = nhead + (skb->end - skb->head) + ntail;
  11.817 -	long off;
  11.818 -
  11.819 -	if (skb_shared(skb))
  11.820 -		BUG();
  11.821 -
  11.822 -	size = SKB_DATA_ALIGN(size);
  11.823 -
  11.824 -	data = kmalloc(size + sizeof(struct skb_shared_info), gfp_mask);
  11.825 -	if (data == NULL)
  11.826 -		goto nodata;
  11.827 -
  11.828 -	/* Copy only real data... and, alas, header. This should be
  11.829 -	 * optimized for the cases when header is void. */
  11.830 -	memcpy(data+nhead, skb->head, skb->tail-skb->head);
  11.831 -	memcpy(data+size, skb->end, sizeof(struct skb_shared_info));
  11.832 -
  11.833 -	for (i=0; i<skb_shinfo(skb)->nr_frags; i++)
  11.834 -		get_page(skb_shinfo(skb)->frags[i].page);
  11.835 -
  11.836 -	if (skb_shinfo(skb)->frag_list)
  11.837 -		skb_clone_fraglist(skb);
  11.838 -
  11.839 -	skb_release_data(skb);
  11.840 -
  11.841 -	off = (data+nhead) - skb->head;
  11.842 -
  11.843 -	skb->head = data;
  11.844 -	skb->end  = data+size;
  11.845 -
  11.846 -	skb->data += off;
  11.847 -	skb->tail += off;
  11.848 -	skb->mac.raw += off;
  11.849 -	skb->h.raw += off;
  11.850 -	skb->nh.raw += off;
  11.851 -	skb->cloned = 0;
  11.852 -	atomic_set(&skb_shinfo(skb)->dataref, 1);
  11.853 -	return 0;
  11.854 -
  11.855 -nodata:
  11.856 -	return -ENOMEM;
  11.857 -}
  11.858 -
  11.859 -/* Make private copy of skb with writable head and some headroom */
  11.860 -
  11.861 -struct sk_buff *
  11.862 -skb_realloc_headroom(struct sk_buff *skb, unsigned int headroom)
  11.863 -{
  11.864 -	struct sk_buff *skb2;
  11.865 -	int delta = headroom - skb_headroom(skb);
  11.866 -
  11.867 -	if (delta <= 0)
  11.868 -		return pskb_copy(skb, GFP_ATOMIC);
  11.869 -
  11.870 -	skb2 = skb_clone(skb, GFP_ATOMIC);
  11.871 -	if (skb2 == NULL ||
  11.872 -	    !pskb_expand_head(skb2, SKB_DATA_ALIGN(delta), 0, GFP_ATOMIC))
  11.873 -		return skb2;
  11.874 -
  11.875 -	kfree_skb(skb2);
  11.876 -	return NULL;
  11.877 -}
  11.878 -
  11.879 +    /*
  11.880 +     *	Allocate the copy buffer
  11.881 +     */
  11.882 +    n=alloc_skb(skb->end - skb->head + skb->data_len, gfp_mask);
  11.883 +    if(n==NULL)
  11.884 +        return NULL;
  11.885  
  11.886 -/**
  11.887 - *	skb_copy_expand	-	copy and expand sk_buff
  11.888 - *	@skb: buffer to copy
  11.889 - *	@newheadroom: new free bytes at head
  11.890 - *	@newtailroom: new free bytes at tail
  11.891 - *	@gfp_mask: allocation priority
  11.892 - *
  11.893 - *	Make a copy of both an &sk_buff and its data and while doing so 
  11.894 - *	allocate additional space.
  11.895 - *
  11.896 - *	This is used when the caller wishes to modify the data and needs a 
  11.897 - *	private copy of the data to alter as well as more space for new fields.
  11.898 - *	Returns %NULL on failure or the pointer to the buffer
  11.899 - *	on success. The returned buffer has a reference count of 1.
  11.900 - *
  11.901 - *	You must pass %GFP_ATOMIC as the allocation priority if this function
  11.902 - *	is called from an interrupt.
  11.903 - */
  11.904 - 
  11.905 -
  11.906 -struct sk_buff *skb_copy_expand(const struct sk_buff *skb,
  11.907 -				int newheadroom,
  11.908 -				int newtailroom,
  11.909 -				int gfp_mask)
  11.910 -{
  11.911 -	struct sk_buff *n;
  11.912 -
  11.913 -	/*
  11.914 -	 *	Allocate the copy buffer
  11.915 -	 */
  11.916 - 	 
  11.917 -	n=alloc_skb(newheadroom + skb->len + newtailroom,
  11.918 -		    gfp_mask);
  11.919 -	if(n==NULL)
  11.920 -		return NULL;
  11.921 -
  11.922 -	skb_reserve(n,newheadroom);
  11.923 -
  11.924 -	/* Set the tail pointer and length */
  11.925 -	skb_put(n,skb->len);
  11.926 -
  11.927 -	/* Copy the data only. */
  11.928 -	if (skb_copy_bits(skb, 0, n->data, skb->len))
  11.929 -		BUG();
  11.930 -
  11.931 -	copy_skb_header(n, skb);
  11.932 -	return n;
  11.933 -}
  11.934 -
  11.935 -/* Trims skb to length len. It can change skb pointers, if "realloc" is 1.
  11.936 - * If realloc==0 and trimming is impossible without change of data,
  11.937 - * it is BUG().
  11.938 - */
  11.939 -
  11.940 -int ___pskb_trim(struct sk_buff *skb, unsigned int len, int realloc)
  11.941 -{
  11.942 -	int offset = skb_headlen(skb);
  11.943 -	int nfrags = skb_shinfo(skb)->nr_frags;
  11.944 -	int i;
  11.945 -
  11.946 -	for (i=0; i<nfrags; i++) {
  11.947 -		int end = offset + skb_shinfo(skb)->frags[i].size;
  11.948 -		if (end > len) {
  11.949 -			if (skb_cloned(skb)) {
  11.950 -				if (!realloc)
  11.951 -					BUG();
  11.952 -				if (!pskb_expand_head(skb, 0, 0, GFP_ATOMIC))
  11.953 -					return -ENOMEM;
  11.954 -			}
  11.955 -			if (len <= offset) {
  11.956 -				put_page(skb_shinfo(skb)->frags[i].page);
  11.957 -				skb_shinfo(skb)->nr_frags--;
  11.958 -			} else {
  11.959 -				skb_shinfo(skb)->frags[i].size = len-offset;
  11.960 -			}
  11.961 -		}
  11.962 -		offset = end;
  11.963 -	}
  11.964 -
  11.965 -	if (offset < len) {
  11.966 -		skb->data_len -= skb->len - len;
  11.967 -		skb->len = len;
  11.968 -	} else {
  11.969 -		if (len <= skb_headlen(skb)) {
  11.970 -			skb->len = len;
  11.971 -			skb->data_len = 0;
  11.972 -			skb->tail = skb->data + len;
  11.973 -			if (skb_shinfo(skb)->frag_list && !skb_cloned(skb))
  11.974 -				skb_drop_fraglist(skb);
  11.975 -		} else {
  11.976 -			skb->data_len -= skb->len - len;
  11.977 -			skb->len = len;
  11.978 -		}
  11.979 -	}
  11.980 -
  11.981 -	return 0;
  11.982 -}
  11.983 -
  11.984 -/**
  11.985 - *	__pskb_pull_tail - advance tail of skb header 
  11.986 - *	@skb: buffer to reallocate
  11.987 - *	@delta: number of bytes to advance tail
  11.988 - *
  11.989 - *	The function makes a sense only on a fragmented &sk_buff,
  11.990 - *	it expands header moving its tail forward and copying necessary
  11.991 - *	data from fragmented part.
  11.992 - *
  11.993 - *	&sk_buff MUST have reference count of 1.
  11.994 - *
  11.995 - *	Returns %NULL (and &sk_buff does not change) if pull failed
  11.996 - *	or value of new tail of skb in the case of success.
  11.997 - *
  11.998 - *	All the pointers pointing into skb header may change and must be
  11.999 - *	reloaded after call to this function.
 11.1000 - */
 11.1001 +    /* Set the data pointer */
 11.1002 +    skb_reserve(n,headerlen);
 11.1003 +    /* Set the tail pointer and length */
 11.1004 +    skb_put(n,skb->len);
 11.1005 +    n->csum = skb->csum;
 11.1006 +    n->ip_summed = skb->ip_summed;
 11.1007  
 11.1008 -/* Moves tail of skb head forward, copying data from fragmented part,
 11.1009 - * when it is necessary.
 11.1010 - * 1. It may fail due to malloc failure.
 11.1011 - * 2. It may change skb pointers.
 11.1012 - *
 11.1013 - * It is pretty complicated. Luckily, it is called only in exceptional cases.
 11.1014 - */
 11.1015 -unsigned char * __pskb_pull_tail(struct sk_buff *skb, int delta)
 11.1016 -{
 11.1017 -	int i, k, eat;
 11.1018 -
 11.1019 -	/* If skb has not enough free space at tail, get new one
 11.1020 -	 * plus 128 bytes for future expansions. If we have enough
 11.1021 -	 * room at tail, reallocate without expansion only if skb is cloned.
 11.1022 -	 */
 11.1023 -	eat = (skb->tail+delta) - skb->end;
 11.1024 -
 11.1025 -	if (eat > 0 || skb_cloned(skb)) {
 11.1026 -		if (pskb_expand_head(skb, 0, eat>0 ? eat+128 : 0, GFP_ATOMIC))
 11.1027 -			return NULL;
 11.1028 -	}
 11.1029 -
 11.1030 -	if (skb_copy_bits(skb, skb_headlen(skb), skb->tail, delta))
 11.1031 -		BUG();
 11.1032 -
 11.1033 -	/* Optimization: no fragments, no reasons to preestimate
 11.1034 -	 * size of pulled pages. Superb.
 11.1035 -	 */
 11.1036 -	if (skb_shinfo(skb)->frag_list == NULL)
 11.1037 -		goto pull_pages;
 11.1038 -
 11.1039 -	/* Estimate size of pulled pages. */
 11.1040 -	eat = delta;
 11.1041 -	for (i=0; i<skb_shinfo(skb)->nr_frags; i++) {
 11.1042 -		if (skb_shinfo(skb)->frags[i].size >= eat)
 11.1043 -			goto pull_pages;
 11.1044 -		eat -= skb_shinfo(skb)->frags[i].size;
 11.1045 -	}
 11.1046 -
 11.1047 -	/* If we need update frag list, we are in troubles.
 11.1048 -	 * Certainly, it possible to add an offset to skb data,
 11.1049 -	 * but taking into account that pulling is expected to
 11.1050 -	 * be very rare operation, it is worth to fight against
 11.1051 -	 * further bloating skb head and crucify ourselves here instead.
 11.1052 -	 * Pure masohism, indeed. 8)8)
 11.1053 -	 */
 11.1054 -	if (eat) {
 11.1055 -		struct sk_buff *list = skb_shinfo(skb)->frag_list;
 11.1056 -		struct sk_buff *clone = NULL;
 11.1057 -		struct sk_buff *insp = NULL;
 11.1058 -
 11.1059 -		do {
 11.1060 -			if (list == NULL)
 11.1061 -				BUG();
 11.1062 +    if (skb_copy_bits(skb, -headerlen, n->head, headerlen+skb->len))
 11.1063 +        BUG();
 11.1064  
 11.1065 -			if (list->len <= eat) {
 11.1066 -				/* Eaten as whole. */
 11.1067 -				eat -= list->len;
 11.1068 -				list = list->next;
 11.1069 -				insp = list;
 11.1070 -			} else {
 11.1071 -				/* Eaten partially. */
 11.1072 -
 11.1073 -				if (skb_shared(list)) {
 11.1074 -					/* Sucks! We need to fork list. :-( */
 11.1075 -					clone = skb_clone(list, GFP_ATOMIC);
 11.1076 -					if (clone == NULL)
 11.1077 -						return NULL;
 11.1078 -					insp = list->next;
 11.1079 -					list = clone;
 11.1080 -				} else {
 11.1081 -					/* This may be pulled without
 11.1082 -					 * problems. */
 11.1083 -					insp = list;
 11.1084 -				}
 11.1085 -				if (pskb_pull(list, eat) == NULL) {
 11.1086 -					if (clone)
 11.1087 -						kfree_skb(clone);
 11.1088 -					return NULL;
 11.1089 -				}
 11.1090 -				break;
 11.1091 -			}
 11.1092 -		} while (eat);
 11.1093 +    copy_skb_header(n, skb);
 11.1094  
 11.1095 -		/* Free pulled out fragments. */
 11.1096 -		while ((list = skb_shinfo(skb)->frag_list) != insp) {
 11.1097 -			skb_shinfo(skb)->frag_list = list->next;
 11.1098 -			kfree_skb(list);
 11.1099 -		}
 11.1100 -		/* And insert new clone at head. */
 11.1101 -		if (clone) {
 11.1102 -			clone->next = list;
 11.1103 -			skb_shinfo(skb)->frag_list = clone;
 11.1104 -		}
 11.1105 -	}
 11.1106 -	/* Success! Now we may commit changes to skb data. */
 11.1107 -
 11.1108 -pull_pages:
 11.1109 -	eat = delta;
 11.1110 -	k = 0;
 11.1111 -	for (i=0; i<skb_shinfo(skb)->nr_frags; i++) {
 11.1112 -		if (skb_shinfo(skb)->frags[i].size <= eat) {
 11.1113 -			put_page(skb_shinfo(skb)->frags[i].page);
 11.1114 -			eat -= skb_shinfo(skb)->frags[i].size;
 11.1115 -		} else {
 11.1116 -			skb_shinfo(skb)->frags[k] = skb_shinfo(skb)->frags[i];
 11.1117 -			if (eat) {
 11.1118 -				skb_shinfo(skb)->frags[k].page_offset += eat;
 11.1119 -				skb_shinfo(skb)->frags[k].size -= eat;
 11.1120 -				eat = 0;
 11.1121 -			}
 11.1122 -			k++;
 11.1123 -		}
 11.1124 -	}
 11.1125 -	skb_shinfo(skb)->nr_frags = k;
 11.1126 -
 11.1127 -	skb->tail += delta;
 11.1128 -	skb->data_len -= delta;
 11.1129 -
 11.1130 -	return skb->tail;
 11.1131 +    return n;
 11.1132  }
 11.1133  
 11.1134  /* Copy some data bits from skb to kernel buffer. */
 11.1135  
 11.1136  int skb_copy_bits(const struct sk_buff *skb, int offset, void *to, int len)
 11.1137  {
 11.1138 -	int i, copy;
 11.1139 -	int start = skb->len - skb->data_len;
 11.1140 -
 11.1141 -	if (offset > (int)skb->len-len)
 11.1142 -		goto fault;
 11.1143 -
 11.1144 -	/* Copy header. */
 11.1145 -	if ((copy = start-offset) > 0) {
 11.1146 -		if (copy > len)
 11.1147 -			copy = len;
 11.1148 -		memcpy(to, skb->data + offset, copy);
 11.1149 -		if ((len -= copy) == 0)
 11.1150 -			return 0;
 11.1151 -		offset += copy;
 11.1152 -		to += copy;
 11.1153 -	}
 11.1154 -
 11.1155 -	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
 11.1156 -		int end;
 11.1157 -
 11.1158 -		BUG_TRAP(start <= offset+len);
 11.1159 +    int i, copy;
 11.1160 +    int start = skb->len - skb->data_len;
 11.1161  
 11.1162 -		end = start + skb_shinfo(skb)->frags[i].size;
 11.1163 -		if ((copy = end-offset) > 0) {
 11.1164 -			u8 *vaddr;
 11.1165 -
 11.1166 -			if (copy > len)
 11.1167 -				copy = len;
 11.1168 -
 11.1169 -			vaddr = kmap_skb_frag(&skb_shinfo(skb)->frags[i]);
 11.1170 -			memcpy(to, vaddr+skb_shinfo(skb)->frags[i].page_offset+
 11.1171 -			       offset-start, copy);
 11.1172 -			kunmap_skb_frag(vaddr);
 11.1173 -
 11.1174 -			if ((len -= copy) == 0)
 11.1175 -				return 0;
 11.1176 -			offset += copy;
 11.1177 -			to += copy;
 11.1178 -		}
 11.1179 -		start = end;
 11.1180 -	}
 11.1181 -
 11.1182 -	if (skb_shinfo(skb)->frag_list) {
 11.1183 -		struct sk_buff *list;
 11.1184 -
 11.1185 -		for (list = skb_shinfo(skb)->frag_list; list; list=list->next) {
 11.1186 -			int end;
 11.1187 -
 11.1188 -			BUG_TRAP(start <= offset+len);
 11.1189 +    if (offset > (int)skb->len-len)
 11.1190 +        goto fault;
 11.1191  
 11.1192 -			end = start + list->len;
 11.1193 -			if ((copy = end-offset) > 0) {
 11.1194 -				if (copy > len)
 11.1195 -					copy = len;
 11.1196 -				if (skb_copy_bits(list, offset-start, to, copy))
 11.1197 -					goto fault;
 11.1198 -				if ((len -= copy) == 0)
 11.1199 -					return 0;
 11.1200 -				offset += copy;
 11.1201 -				to += copy;
 11.1202 -			}
 11.1203 -			start = end;
 11.1204 -		}
 11.1205 -	}
 11.1206 -	if (len == 0)
 11.1207 -		return 0;
 11.1208 -
 11.1209 -fault:
 11.1210 -	return -EFAULT;
 11.1211 -}
 11.1212 -
 11.1213 -/* Checksum skb data. */
 11.1214 -
 11.1215 -#if 0
 11.1216 -
 11.1217 -unsigned int skb_checksum(const struct sk_buff *skb, int offset, int len, unsigned int csum)
 11.1218 -{
 11.1219 -	int i, copy;
 11.1220 -	int start = skb->len - skb->data_len;
 11.1221 -	int pos = 0;
 11.1222 +    /* Copy header. */
 11.1223 +    if ((copy = start-offset) > 0) {
 11.1224 +        if (copy > len)
 11.1225 +            copy = len;
 11.1226 +        memcpy(to, skb->data + offset, copy);
 11.1227 +        if ((len -= copy) == 0)
 11.1228 +            return 0;
 11.1229 +        offset += copy;
 11.1230 +        to += copy;
 11.1231 +    }
 11.1232  
 11.1233 -	/* Checksum header. */
 11.1234 -	if ((copy = start-offset) > 0) {
 11.1235 -		if (copy > len)
 11.1236 -			copy = len;
 11.1237 -		csum = csum_partial(skb->data+offset, copy, csum);
 11.1238 -		if ((len -= copy) == 0)
 11.1239 -			return csum;
 11.1240 -		offset += copy;
 11.1241 -		pos = copy;
 11.1242 -	}
 11.1243 -
 11.1244 -	for (i=0; i<skb_shinfo(skb)->nr_frags; i++) {
 11.1245 -		int end;
 11.1246 -
 11.1247 -		BUG_TRAP(start <= offset+len);
 11.1248 -
 11.1249 -		end = start + skb_shinfo(skb)->frags[i].size;
 11.1250 -		if ((copy = end-offset) > 0) {
 11.1251 -			unsigned int csum2;
 11.1252 -			u8 *vaddr;
 11.1253 -			skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
 11.1254 +    for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
 11.1255 +        int end;
 11.1256  
 11.1257 -			if (copy > len)
 11.1258 -				copy = len;
 11.1259 -			vaddr = kmap_skb_frag(frag);
 11.1260 -			csum2 = csum_partial(vaddr + frag->page_offset +
 11.1261 -					     offset-start, copy, 0);
 11.1262 -			kunmap_skb_frag(vaddr);
 11.1263 -			csum = csum_block_add(csum, csum2, pos);
 11.1264 -			if (!(len -= copy))
 11.1265 -				return csum;
 11.1266 -			offset += copy;
 11.1267 -			pos += copy;
 11.1268 -		}
 11.1269 -		start = end;
 11.1270 -	}
 11.1271 -
 11.1272 -	if (skb_shinfo(skb)->frag_list) {
 11.1273 -		struct sk_buff *list;
 11.1274 -
 11.1275 -		for (list = skb_shinfo(skb)->frag_list; list; list=list->next) {
 11.1276 -			int end;
 11.1277 -
 11.1278 -			BUG_TRAP(start <= offset+len);
 11.1279 +        BUG_TRAP(start <= offset+len);
 11.1280  
 11.1281 -			end = start + list->len;
 11.1282 -			if ((copy = end-offset) > 0) {
 11.1283 -				unsigned int csum2;
 11.1284 -				if (copy > len)
 11.1285 -					copy = len;
 11.1286 -				csum2 = skb_checksum(list, offset-start, copy, 0);
 11.1287 -				csum = csum_block_add(csum, csum2, pos);
 11.1288 -				if ((len -= copy) == 0)
 11.1289 -					return csum;
 11.1290 -				offset += copy;
 11.1291 -				pos += copy;
 11.1292 -			}
 11.1293 -			start = end;
 11.1294 -		}
 11.1295 -	}
 11.1296 -	if (len == 0)
 11.1297 -		return csum;
 11.1298 -
 11.1299 -	BUG();
 11.1300 -	return csum;
 11.1301 -}
 11.1302 -
 11.1303 -/* Both of above in one bottle. */
 11.1304 -
 11.1305 -unsigned int skb_copy_and_csum_bits(const struct sk_buff *skb, int offset, u8 *to, int len, unsigned int csum)
 11.1306 -{
 11.1307 -	int i, copy;
 11.1308 -	int start = skb->len - skb->data_len;
 11.1309 -	int pos = 0;
 11.1310 +        end = start + skb_shinfo(skb)->frags[i].size;
 11.1311 +        if ((copy = end-offset) > 0) {
 11.1312 +            u8 *vaddr;
 11.1313  
 11.1314 -	/* Copy header. */
 11.1315 -	if ((copy = start-offset) > 0) {
 11.1316 -		if (copy > len)
 11.1317 -			copy = len;
 11.1318 -		csum = csum_partial_copy_nocheck(skb->data+offset, to, copy, csum);
 11.1319 -		if ((len -= copy) == 0)
 11.1320 -			return csum;
 11.1321 -		offset += copy;
 11.1322 -		to += copy;
 11.1323 -		pos = copy;
 11.1324 -	}
 11.1325 -
 11.1326 -	for (i=0; i<skb_shinfo(skb)->nr_frags; i++) {
 11.1327 -		int end;
 11.1328 -
 11.1329 -		BUG_TRAP(start <= offset+len);
 11.1330 +            if (copy > len)
 11.1331 +                copy = len;
 11.1332  
 11.1333 -		end = start + skb_shinfo(skb)->frags[i].size;
 11.1334 -		if ((copy = end-offset) > 0) {
 11.1335 -			unsigned int csum2;
 11.1336 -			u8 *vaddr;
 11.1337 -			skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
 11.1338 -
 11.1339 -			if (copy > len)
 11.1340 -				copy = len;
 11.1341 -			vaddr = kmap_skb_frag(frag);
 11.1342 -			csum2 = csum_partial_copy_nocheck(vaddr + frag->page_offset +
 11.1343 -						      offset-start, to, copy, 0);
 11.1344 -			kunmap_skb_frag(vaddr);
 11.1345 -			csum = csum_block_add(csum, csum2, pos);
 11.1346 -			if (!(len -= copy))
 11.1347 -				return csum;
 11.1348 -			offset += copy;
 11.1349 -			to += copy;
 11.1350 -			pos += copy;
 11.1351 -		}
 11.1352 -		start = end;
 11.1353 -	}
 11.1354 +            vaddr = kmap_skb_frag(&skb_shinfo(skb)->frags[i]);
 11.1355 +            memcpy(to, vaddr+skb_shinfo(skb)->frags[i].page_offset+
 11.1356 +                   offset-start, copy);
 11.1357 +            kunmap_skb_frag(vaddr);
 11.1358  
 11.1359 -	if (skb_shinfo(skb)->frag_list) {
 11.1360 -		struct sk_buff *list;
 11.1361 -
 11.1362 -		for (list = skb_shinfo(skb)->frag_list; list; list=list->next) {
 11.1363 -			unsigned int csum2;
 11.1364 -			int end;
 11.1365 -
 11.1366 -			BUG_TRAP(start <= offset+len);
 11.1367 -
 11.1368 -			end = start + list->len;
 11.1369 -			if ((copy = end-offset) > 0) {
 11.1370 -				if (copy > len)
 11.1371 -					copy = len;
 11.1372 -				csum2 = skb_copy_and_csum_bits(list, offset-start, to, copy, 0);
 11.1373 -				csum = csum_block_add(csum, csum2, pos);
 11.1374 -				if ((len -= copy) == 0)
 11.1375 -					return csum;
 11.1376 -				offset += copy;
 11.1377 -				to += copy;
 11.1378 -				pos += copy;
 11.1379 -			}
 11.1380 -			start = end;
 11.1381 -		}
 11.1382 -	}
 11.1383 -	if (len == 0)
 11.1384 -		return csum;
 11.1385 -
 11.1386 -	BUG();
 11.1387 -	return csum;
 11.1388 -}
 11.1389 -
 11.1390 -void skb_copy_and_csum_dev(const struct sk_buff *skb, u8 *to)
 11.1391 -{
 11.1392 -	unsigned int csum;
 11.1393 -	long csstart;
 11.1394 +            if ((len -= copy) == 0)
 11.1395 +                return 0;
 11.1396 +            offset += copy;
 11.1397 +            to += copy;
 11.1398 +        }
 11.1399 +        start = end;
 11.1400 +    }
 11.1401  
 11.1402 -	if (skb->ip_summed == CHECKSUM_HW)
 11.1403 -		csstart = skb->h.raw - skb->data;
 11.1404 -	else
 11.1405 -		csstart = skb->len - skb->data_len;
 11.1406 -
 11.1407 -	if (csstart > skb->len - skb->data_len)
 11.1408 -		BUG();
 11.1409 -
 11.1410 -	memcpy(to, skb->data, csstart);
 11.1411 -
 11.1412 -	csum = 0;
 11.1413 -	if (csstart != skb->len)
 11.1414 -		csum = skb_copy_and_csum_bits(skb, csstart, to+csstart,
 11.1415 -				skb->len-csstart, 0);
 11.1416 -
 11.1417 -	if (skb->ip_summed == CHECKSUM_HW) {
 11.1418 -		long csstuff = csstart + skb->csum;
 11.1419 +    if (len == 0)
 11.1420 +        return 0;
 11.1421  
 11.1422 -		*((unsigned short *)(to + csstuff)) = csum_fold(csum);
 11.1423 -	}
 11.1424 + fault:
 11.1425 +    return -EFAULT;
 11.1426  }
 11.1427  
 11.1428 -#endif
 11.1429 -
 11.1430 -#if 0
 11.1431 -/* 
 11.1432 - * 	Tune the memory allocator for a new MTU size.
 11.1433 - */
 11.1434 -void skb_add_mtu(int mtu)
 11.1435 -{
 11.1436 -	/* Must match allocation in alloc_skb */
 11.1437 -	mtu = SKB_DATA_ALIGN(mtu) + sizeof(struct skb_shared_info);
 11.1438 -
 11.1439 -	kmem_add_cache_size(mtu);
 11.1440 -}
 11.1441 -#endif
 11.1442 -
 11.1443  void __init skb_init(void)
 11.1444  {
 11.1445 -	int i;
 11.1446 +    int i;
 11.1447  
 11.1448 -	skbuff_head_cache = kmem_cache_create("skbuff_head_cache",
 11.1449 -					      sizeof(struct sk_buff),
 11.1450 -					      0,
 11.1451 -					      SLAB_HWCACHE_ALIGN,
 11.1452 -					      skb_headerinit, NULL);
 11.1453 -	if (!skbuff_head_cache)
 11.1454 -		panic("cannot create skbuff cache");
 11.1455 +    skbuff_head_cache = kmem_cache_create("skbuff_head_cache",
 11.1456 +                                          sizeof(struct sk_buff),
 11.1457 +                                          0,
 11.1458 +                                          SLAB_HWCACHE_ALIGN,
 11.1459 +                                          skb_headerinit, NULL);
 11.1460 +    if (!skbuff_head_cache)
 11.1461 +        panic("cannot create skbuff cache");
 11.1462  
 11.1463 -	for (i=0; i<NR_CPUS; i++)
 11.1464 -		skb_queue_head_init(&skb_head_pool[i].list);
 11.1465 +    for (i=0; i<NR_CPUS; i++)
 11.1466 +        skb_queue_head_init(&skb_head_pool[i].list);
 11.1467  }