ia64/xen-unstable

changeset 107:91f5e18965d9

bitkeeper revision 1.15.3.2 (3e37b8332YRktwAjVLsh2PyFFW2XNw)

RX data is now moved in a domain-memory page, but still copied at the end.
author akw27@boulderdash.cl.cam.ac.uk
date Wed Jan 29 11:17:07 2003 +0000 (2003-01-29)
parents 0ce34da1b61d
children de280362dfb0
files xen-2.4.16/drivers/net/tulip/interrupt.c xen-2.4.16/include/asm-i386/pci.h xen-2.4.16/include/xeno/skbuff.h xen-2.4.16/net/dev.c xen-2.4.16/net/eth.c xen-2.4.16/net/skbuff.c
line diff
     1.1 --- a/xen-2.4.16/drivers/net/tulip/interrupt.c	Wed Jan 29 08:59:21 2003 +0000
     1.2 +++ b/xen-2.4.16/drivers/net/tulip/interrupt.c	Wed Jan 29 11:17:07 2003 +0000
     1.3 @@ -170,8 +170,9 @@ static int tulip_rx(struct net_device *d
     1.4  #endif
     1.5  			/* Check if the packet is long enough to accept without copying
     1.6  			   to a minimally-sized skbuff. */
     1.7 -			if (pkt_len < tulip_rx_copybreak
     1.8 -				&& (skb = dev_alloc_skb(pkt_len + 2)) != NULL) {
     1.9 +			//if (pkt_len < tulip_rx_copybreak
    1.10 +			//	&& (skb = dev_alloc_skb(pkt_len + 2)) != NULL) {
    1.11 +                        if (0) {
    1.12  				skb->dev = dev;
    1.13  				skb_reserve(skb, 2);	/* 16 byte align the IP header */
    1.14  				pci_dma_sync_single(tp->pdev,
     2.1 --- a/xen-2.4.16/include/asm-i386/pci.h	Wed Jan 29 08:59:21 2003 +0000
     2.2 +++ b/xen-2.4.16/include/asm-i386/pci.h	Wed Jan 29 11:17:07 2003 +0000
     2.3 @@ -75,7 +75,19 @@ static inline dma_addr_t pci_map_single(
     2.4  	if (direction == PCI_DMA_NONE)
     2.5  		BUG();
     2.6  	flush_write_buffers();
     2.7 -	return virt_to_bus(ptr);
     2.8 +
     2.9 +        if ((unsigned long) ptr > PAGE_OFFSET)
    2.10 +	    return virt_to_bus(ptr);
    2.11 +
    2.12 +        /* If an address that is not in hypervisor VM is passed to this 
    2.13 +         * function (ie > PAGE_OFFSET) we assume that the passer knows 
    2.14 +         * what they are doing, and have passed a physical address that 
    2.15 +         * should not be converted here.  This is a little hackish, but 
    2.16 +         * is being added to allow references to domain memory in order 
    2.17 +         * to support zero-copy network code.
    2.18 +         */
    2.19 +        
    2.20 +        return (dma_addr_t) ptr;
    2.21  }
    2.22  
    2.23  /* Unmap a single streaming mode DMA translation.  The dma_addr and size
     3.1 --- a/xen-2.4.16/include/xeno/skbuff.h	Wed Jan 29 08:59:21 2003 +0000
     3.2 +++ b/xen-2.4.16/include/xeno/skbuff.h	Wed Jan 29 11:17:07 2003 +0000
     3.3 @@ -34,6 +34,10 @@
     3.4  #define VIF_DROP                -3
     3.5  #define VIF_ANY_INTERFACE       -4
     3.6  
     3.7 +//skb_type values:
     3.8 +#define SKB_NORMAL               0
     3.9 +#define SKB_ZERO_COPY            1
    3.10 +
    3.11  #define HAVE_ALLOC_SKB		/* For the drivers to know */
    3.12  #define HAVE_ALIGNABLE_SKB	/* Ditto 8)		   */
    3.13  #define SLAB_SKB 		/* Slabified skbuffs 	   */
    3.14 @@ -187,7 +191,7 @@ struct sk_buff {
    3.15   	unsigned int 	data_len;
    3.16  	unsigned int	csum;			/* Checksum 					*/
    3.17  	unsigned char 	__unused,		/* Dead field, may be reused			*/
    3.18 -			cloned, 		/* head may be cloned (check refcnt to be sure). */
    3.19 +			cloned, 		/* head may be cloned (check refcnt to be sure) */
    3.20    			pkt_type,		/* Packet class					*/
    3.21    			ip_summed;		/* Driver fed us an IP checksum			*/
    3.22  	__u32		priority;		/* Packet queueing priority			*/
    3.23 @@ -203,8 +207,12 @@ struct sk_buff {
    3.24  
    3.25  	void 		(*destructor)(struct sk_buff *);	/* Destruct function		*/
    3.26  
    3.27 -        int src_vif;                            /* vif we came from */
    3.28 -        int dst_vif;                            /* vif we are bound for */
    3.29 +        unsigned int    skb_type;               /* SKB_NORMAL or SKB_ZERO_COPY                  */
    3.30 +        struct pfn_info *pf;                    /* record of physical pf address for freeing    */
    3.31 +        int src_vif;                            /* vif we came from                             */
    3.32 +        int dst_vif;                            /* vif we are bound for                         */
    3.33 +        struct skb_shared_info shinfo;          /* shared info is no longer shared in Xen.      */
    3.34 +        
    3.35  
    3.36                  
    3.37          
    3.38 @@ -244,6 +252,7 @@ struct sk_buff {
    3.39  
    3.40  extern void			__kfree_skb(struct sk_buff *skb);
    3.41  extern struct sk_buff *		alloc_skb(unsigned int size, int priority);
    3.42 +extern struct sk_buff *         alloc_zc_skb(unsigned int size, int priority);
    3.43  extern void			kfree_skbmem(struct sk_buff *skb);
    3.44  extern struct sk_buff *		skb_clone(struct sk_buff *skb, int priority);
    3.45  extern struct sk_buff *		skb_copy(const struct sk_buff *skb, int priority);
    3.46 @@ -259,7 +268,8 @@ extern void	skb_over_panic(struct sk_buf
    3.47  extern void	skb_under_panic(struct sk_buff *skb, int len, void *here);
    3.48  
    3.49  /* Internal */
    3.50 -#define skb_shinfo(SKB)		((struct skb_shared_info *)((SKB)->end))
    3.51 +//#define skb_shinfo(SKB)		((struct skb_shared_info *)((SKB)->end))
    3.52 +#define skb_shinfo(SKB)     ((struct skb_shared_info *)(&(SKB)->shinfo))
    3.53  
    3.54  /**
    3.55   *	skb_queue_empty - check if a queue is empty
    3.56 @@ -1045,7 +1055,8 @@ static inline struct sk_buff *__dev_allo
    3.57  {
    3.58  	struct sk_buff *skb;
    3.59  
    3.60 -	skb = alloc_skb(length+16, gfp_mask);
    3.61 +	//skb = alloc_skb(length+16, gfp_mask);
    3.62 +        skb = alloc_zc_skb(length+16, gfp_mask);
    3.63  	if (skb)
    3.64  		skb_reserve(skb,16);
    3.65  	return skb;
     4.1 --- a/xen-2.4.16/net/dev.c	Wed Jan 29 08:59:21 2003 +0000
     4.2 +++ b/xen-2.4.16/net/dev.c	Wed Jan 29 11:17:07 2003 +0000
     4.3 @@ -30,6 +30,7 @@
     4.4  #include <linux/pkt_sched.h>
     4.5  
     4.6  #include <linux/event.h>
     4.7 +#include <asm/domain_page.h>
     4.8  
     4.9  #define BUG_TRAP ASSERT
    4.10  #define notifier_call_chain(_a,_b,_c) ((void)0)
    4.11 @@ -695,6 +696,21 @@ int netif_rx(struct sk_buff *skb)
    4.12  	if (skb->stamp.tv_sec == 0)
    4.13  		get_fast_time(&skb->stamp);
    4.14  
    4.15 +        /* Attempt to handle zero-copy packets here: */
    4.16 +        if (skb->skb_type == SKB_ZERO_COPY)
    4.17 +        {
    4.18 +                skb->head = (u8 *)map_domain_mem(((skb->pf - frame_table) << PAGE_SHIFT));
    4.19 +
    4.20 +                /* remapping this address really screws up all the skb pointers.  We need 
    4.21 +                 * to map them all here sufficiently to get the packet demultiplexed.
    4.22 +                 */
    4.23 +                
    4.24 +                skb->data = skb->head;
    4.25 +                skb_reserve(skb,16); // need to ensure that all the drivers and not just tulip do this.
    4.26 +                skb->mac.raw = skb->data;
    4.27 +                skb->data += ETH_HLEN;
    4.28 +        }
    4.29 +        
    4.30  	/* The code is rearranged so that the path is the most
    4.31  	   short when CPU is congested, but is still operating.
    4.32  	 */
    4.33 @@ -747,10 +763,18 @@ drop:
    4.34  	netdev_rx_stat[this_cpu].dropped++;
    4.35  	local_irq_restore(flags);
    4.36  
    4.37 +        if (skb->skb_type == SKB_ZERO_COPY)
    4.38 +                unmap_domain_mem(skb->head);
    4.39 +        
    4.40  	kfree_skb(skb);
    4.41  	return NET_RX_DROP;
    4.42  
    4.43  found:
    4.44 +        if (skb->skb_type == SKB_ZERO_COPY) {
    4.45 +                unmap_domain_mem(skb->head);
    4.46 +                //skb->head = (u8 *)((skb->pf - frame_table) << PAGE_SHIFT);
    4.47 +                skb->head = skb->data = skb->tail = (void *)0xdeadbeef;
    4.48 +        }
    4.49          hyp_event_notify(cpu_mask);
    4.50          local_irq_restore(flags);
    4.51          return 0;
    4.52 @@ -930,8 +954,28 @@ void flush_rx_queue(void)
    4.53                      rx = shadow_ring->rx_ring+i;
    4.54                      if ( (skb->len + ETH_HLEN) < rx->size )
    4.55                          rx->size = skb->len + ETH_HLEN;
    4.56 +
    4.57 +                    /* remap the packet again.  This is very temporary and will shortly be
    4.58 +                     * replaced with a page swizzle.
    4.59 +                     */
    4.60 +
    4.61 +                    if (skb->skb_type == SKB_ZERO_COPY)
    4.62 +                    {
    4.63 +                        skb->head = (u8 *)map_domain_mem(((skb->pf - frame_table) << PAGE_SHIFT));
    4.64 +                        skb->data = skb->head;
    4.65 +                        skb_reserve(skb,16); 
    4.66 +                        skb->mac.raw = skb->data;
    4.67 +                        skb->data += ETH_HLEN;
    4.68 +                    }
    4.69 +                                                                        
    4.70                      copy_to_user((void *)rx->addr, skb->mac.raw, rx->size);
    4.71                      copy_to_user(net_ring->rx_ring+i, rx, sizeof(rx));
    4.72 +                    
    4.73 +                    if (skb->skb_type == SKB_ZERO_COPY)
    4.74 +                    {
    4.75 +                        unmap_domain_mem(skb->head);
    4.76 +                        skb->head = skb->data = skb->tail = (void *)0xdeadbeef;
    4.77 +                    }
    4.78                  }
    4.79                  net_ring->rx_cons = (i+1) & (RX_RING_SIZE-1);
    4.80                  if ( net_ring->rx_cons == net_ring->rx_event )
     5.1 --- a/xen-2.4.16/net/eth.c	Wed Jan 29 08:59:21 2003 +0000
     5.2 +++ b/xen-2.4.16/net/eth.c	Wed Jan 29 11:17:07 2003 +0000
     5.3 @@ -161,52 +161,62 @@ unsigned short eth_type_trans(struct sk_
     5.4  	struct ethhdr *eth;
     5.5  	unsigned char *rawp;
     5.6  	
     5.7 -	skb->mac.raw=skb->data;
     5.8 -	skb_pull(skb,dev->hard_header_len);
     5.9 -	eth= skb->mac.ethernet;
    5.10 +        if (skb->skb_type == SKB_ZERO_COPY)
    5.11 +        {
    5.12 +            skb_pull(skb,dev->hard_header_len);
    5.13 +            skb->mac.raw= (void *)0xdeadbeef;
    5.14 +            return htons(ETH_P_802_2);
    5.15 +            
    5.16 +        } else { // SKB_NORMAL
    5.17 +        
    5.18 +	    skb->mac.raw=skb->data;
    5.19 +	    skb_pull(skb,dev->hard_header_len);
    5.20 +	    eth= skb->mac.ethernet;
    5.21  	
    5.22 -	if(*eth->h_dest&1)
    5.23 -	{
    5.24 -		if(memcmp(eth->h_dest,dev->broadcast, ETH_ALEN)==0)
    5.25 +	    if(*eth->h_dest&1)
    5.26 +	    {
    5.27 +	    	if(memcmp(eth->h_dest,dev->broadcast, ETH_ALEN)==0)
    5.28  			skb->pkt_type=PACKET_BROADCAST;
    5.29  		else
    5.30  			skb->pkt_type=PACKET_MULTICAST;
    5.31 -	}
    5.32 +	    }
    5.33  	
    5.34 -	/*
    5.35 -	 *	This ALLMULTI check should be redundant by 1.4
    5.36 -	 *	so don't forget to remove it.
    5.37 -	 *
    5.38 -	 *	Seems, you forgot to remove it. All silly devices
    5.39 -	 *	seems to set IFF_PROMISC.
    5.40 -	 */
    5.41 +	    /*
    5.42 +	    *	This ALLMULTI check should be redundant by 1.4
    5.43 +	    *	so don't forget to remove it.
    5.44 +	    *
    5.45 +	    *	Seems, you forgot to remove it. All silly devices
    5.46 +	    *	seems to set IFF_PROMISC.
    5.47 +	    */
    5.48  	 
    5.49 -	else if(1 /*dev->flags&IFF_PROMISC*/)
    5.50 -	{
    5.51 +	    else if(1 /*dev->flags&IFF_PROMISC*/)
    5.52 +	    {
    5.53  		if(memcmp(eth->h_dest,dev->dev_addr, ETH_ALEN))
    5.54  			skb->pkt_type=PACKET_OTHERHOST;
    5.55 -	}
    5.56 +	    }
    5.57  	
    5.58 -	if (ntohs(eth->h_proto) >= 1536)
    5.59 +	    if (ntohs(eth->h_proto) >= 1536)
    5.60  		return eth->h_proto;
    5.61  		
    5.62 -	rawp = skb->data;
    5.63 +	    rawp = skb->data;
    5.64  	
    5.65 -	/*
    5.66 -	 *	This is a magic hack to spot IPX packets. Older Novell breaks
    5.67 -	 *	the protocol design and runs IPX over 802.3 without an 802.2 LLC
    5.68 -	 *	layer. We look for FFFF which isn't a used 802.2 SSAP/DSAP. This
    5.69 -	 *	won't work for fault tolerant netware but does for the rest.
    5.70 -	 */
    5.71 -	if (*(unsigned short *)rawp == 0xFFFF)
    5.72 +	    /*
    5.73 +	    *	This is a magic hack to spot IPX packets. Older Novell breaks
    5.74 +	    *	the protocol design and runs IPX over 802.3 without an 802.2 LLC
    5.75 +	    *	layer. We look for FFFF which isn't a used 802.2 SSAP/DSAP. This
    5.76 +	    *	won't work for fault tolerant netware but does for the rest.
    5.77 +	    */
    5.78 +	    if (*(unsigned short *)rawp == 0xFFFF)
    5.79  		return htons(ETH_P_802_3);
    5.80  		
    5.81 -	/*
    5.82 -	 *	Real 802.2 LLC
    5.83 -	 */
    5.84 -	return htons(ETH_P_802_2);
    5.85 +	    /*
    5.86 +	    *	Real 802.2 LLC
    5.87 +	    */
    5.88 +	    return htons(ETH_P_802_2);
    5.89 +        }
    5.90  }
    5.91  
    5.92 +
    5.93  int eth_header_parse(struct sk_buff *skb, unsigned char *haddr)
    5.94  {
    5.95  	struct ethhdr *eth = skb->mac.ethernet;
     6.1 --- a/xen-2.4.16/net/skbuff.c	Wed Jan 29 08:59:21 2003 +0000
     6.2 +++ b/xen-2.4.16/net/skbuff.c	Wed Jan 29 11:17:07 2003 +0000
     6.3 @@ -149,6 +149,102 @@ static __inline__ void skb_head_to_pool(
     6.4  	kmem_cache_free(skbuff_head_cache, skb);
     6.5  }
     6.6  
     6.7 +static inline u8 *alloc_skb_data_page(struct sk_buff *skb)
     6.8 +{
     6.9 +        struct list_head *list_ptr;
    6.10 +        struct pfn_info  *pf;
    6.11 +        unsigned long flags;
    6.12 +        
    6.13 +        spin_lock_irqsave(&free_list_lock, flags);
    6.14 +
    6.15 +        if (!free_pfns) return NULL;
    6.16 +
    6.17 +        list_ptr = free_list.next;
    6.18 +        pf = list_entry(list_ptr, struct pfn_info, list);
    6.19 +        pf->flags = 0; // owned by dom0
    6.20 +        list_del(&pf->list);
    6.21 +        pf->next = pf->prev = (pf - frame_table);
    6.22 +        free_pfns--;
    6.23 +
    6.24 +        spin_unlock_irqrestore(&free_list_lock, flags);
    6.25 +
    6.26 +        skb->pf = pf;
    6.27 +        return (u8 *)((pf - frame_table) << PAGE_SHIFT);
    6.28 +}
    6.29 +
    6.30 +static inline void dealloc_skb_data_page(struct sk_buff *skb)
    6.31 +{
    6.32 +        struct pfn_info  *pf;
    6.33 +        unsigned long flags;
    6.34 +
    6.35 +        pf = skb->pf;
    6.36 +
    6.37 +        spin_lock_irqsave(&free_list_lock, flags);
    6.38 +
    6.39 +        list_add_tail(&pf->list, &free_list);
    6.40 +        free_pfns++;
    6.41 +
    6.42 +        spin_unlock_irqrestore(&free_list_lock, flags);
    6.43 +}
    6.44 +
    6.45 +struct sk_buff *alloc_zc_skb(unsigned int size,int gfp_mask)
    6.46 +{
    6.47 +        struct sk_buff *skb;
    6.48 +        u8 *data;
    6.49 +
    6.50 +        if (in_interrupt() && (gfp_mask & __GFP_WAIT)) {
    6.51 +                static int count = 0;
    6.52 +                if (++count < 5) {
    6.53 +                        printk(KERN_ERR "alloc_skb called nonatomically "
    6.54 +                               "from interrupt %p\n", NET_CALLER(size));
    6.55 +                        BUG();
    6.56 +                }
    6.57 +                gfp_mask &= ~__GFP_WAIT;
    6.58 +        }
    6.59 +
    6.60 +        /* Get the HEAD */
    6.61 +        skb = skb_head_from_pool();
    6.62 +        if (skb == NULL) {
    6.63 +                skb = kmem_cache_alloc(skbuff_head_cache, gfp_mask & ~__GFP_DMA);
    6.64 +                if (skb == NULL)
    6.65 +                        goto nohead;
    6.66 +        }
    6.67 +
    6.68 +        /* Get the DATA. Size must match skb_add_mtu(). */
    6.69 +        size = SKB_DATA_ALIGN(size);
    6.70 +        data = alloc_skb_data_page(skb);
    6.71 +        if (data == NULL)
    6.72 +                goto nodata;
    6.73 +
    6.74 +        /* XXX: does not include slab overhead */
    6.75 +        skb->truesize = size + sizeof(struct sk_buff);
    6.76 +
    6.77 +        /* Load the data pointers. */
    6.78 +        skb->head = data;
    6.79 +        skb->data = data;
    6.80 +        skb->tail = data;
    6.81 +        skb->end = data + size;
    6.82 +
    6.83 +        /* Set up other state */
    6.84 +        skb->len = 0;
    6.85 +        skb->cloned = 0;
    6.86 +        skb->data_len = 0;
    6.87 +        skb->src_vif = VIF_UNKNOWN_INTERFACE;
    6.88 +        skb->dst_vif = VIF_UNKNOWN_INTERFACE;
    6.89 +        skb->skb_type = SKB_ZERO_COPY;
    6.90 +
    6.91 +        atomic_set(&skb->users, 1);
    6.92 +        atomic_set(&(skb_shinfo(skb)->dataref), 1);
    6.93 +        skb_shinfo(skb)->nr_frags = 0;
    6.94 +        skb_shinfo(skb)->frag_list = NULL;
    6.95 +        return skb;
    6.96 +
    6.97 +nodata:
    6.98 +        skb_head_to_pool(skb);
    6.99 +nohead:
   6.100 +        return NULL;
   6.101 +}
   6.102 +
   6.103  
   6.104  /* 	Allocate a new skbuff. We do this ourselves so we can fill in a few
   6.105   *	'private' fields and also do memory statistics to find all the
   6.106 @@ -213,6 +309,7 @@ struct sk_buff *alloc_skb(unsigned int s
   6.107  	skb->data_len = 0;
   6.108          skb->src_vif = VIF_UNKNOWN_INTERFACE;
   6.109          skb->dst_vif = VIF_UNKNOWN_INTERFACE;
   6.110 +        skb->skb_type = SKB_NORMAL;
   6.111  
   6.112  	atomic_set(&skb->users, 1); 
   6.113  	atomic_set(&(skb_shinfo(skb)->dataref), 1);
   6.114 @@ -295,7 +392,13 @@ static void skb_release_data(struct sk_b
   6.115  		if (skb_shinfo(skb)->frag_list)
   6.116  			skb_drop_fraglist(skb);
   6.117  
   6.118 -		kfree(skb->head);
   6.119 +                if (skb->skb_type == SKB_NORMAL) {
   6.120 +		    kfree(skb->head);
   6.121 +                } else if (skb->skb_type == SKB_ZERO_COPY) {
   6.122 +                    dealloc_skb_data_page(skb);
   6.123 +                } else {
   6.124 +                    printk("skb_release_data called with unknown skb type!\n");
   6.125 +                }
   6.126  	}
   6.127  }
   6.128