ia64/xen-unstable

changeset 10309:aecdb4c52fa7

[NET] front: Transmit SG packets if supported

This patch adds scatter-and-gather transmission support to the frontend.
This allows the MTU to be raised right now and the potential for TSO in
future.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
author kaf24@firebug.cl.cam.ac.uk
date Mon Jun 05 16:33:49 2006 +0100 (2006-06-05)
parents 50db8c95e65d
children e3af1912794b
files linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c xen/include/public/io/ring.h
line diff
     1.1 --- a/linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c	Mon Jun 05 16:13:47 2006 +0100
     1.2 +++ b/linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c	Mon Jun 05 16:33:49 2006 +0100
     1.3 @@ -45,6 +45,7 @@
     1.4  #include <linux/bitops.h>
     1.5  #include <linux/ethtool.h>
     1.6  #include <linux/in.h>
     1.7 +#include <linux/if_ether.h>
     1.8  #include <net/sock.h>
     1.9  #include <net/pkt_sched.h>
    1.10  #include <net/arp.h>
    1.11 @@ -173,6 +174,11 @@ static void xennet_sysfs_delif(struct ne
    1.12  #define xennet_sysfs_delif(dev) do { } while(0)
    1.13  #endif
    1.14  
    1.15 +static inline int xennet_can_sg(struct net_device *dev)
    1.16 +{
    1.17 +	return dev->features & NETIF_F_SG;
    1.18 +}
    1.19 +
    1.20  /**
    1.21   * Entry point to this code when a new device is created.  Allocate the basic
    1.22   * structures and the ring buffers for communication with the backend, and
    1.23 @@ -307,8 +313,6 @@ again:
    1.24  		goto destroy_ring;
    1.25  	}
    1.26  
    1.27 -	xenbus_switch_state(dev, XenbusStateConnected);
    1.28 -
    1.29  	return 0;
    1.30  
    1.31   abort_transaction:
    1.32 @@ -370,12 +374,9 @@ static int setup_device(struct xenbus_de
    1.33  		goto fail;
    1.34  
    1.35  	memcpy(netdev->dev_addr, info->mac, ETH_ALEN);
    1.36 -	network_connect(netdev);
    1.37  	info->irq = bind_evtchn_to_irqhandler(
    1.38  		info->evtchn, netif_int, SA_SAMPLE_RANDOM, netdev->name,
    1.39  		netdev);
    1.40 -	(void)send_fake_arp(netdev);
    1.41 -	show_device(info);
    1.42  
    1.43  	return 0;
    1.44  
    1.45 @@ -391,17 +392,26 @@ static int setup_device(struct xenbus_de
    1.46  static void backend_changed(struct xenbus_device *dev,
    1.47  			    enum xenbus_state backend_state)
    1.48  {
    1.49 +	struct netfront_info *np = dev->data;
    1.50 +	struct net_device *netdev = np->netdev;
    1.51 +
    1.52  	DPRINTK("\n");
    1.53  
    1.54  	switch (backend_state) {
    1.55  	case XenbusStateInitialising:
    1.56 -	case XenbusStateInitWait:
    1.57  	case XenbusStateInitialised:
    1.58  	case XenbusStateConnected:
    1.59  	case XenbusStateUnknown:
    1.60  	case XenbusStateClosed:
    1.61  		break;
    1.62  
    1.63 +	case XenbusStateInitWait:
    1.64 +		network_connect(netdev);
    1.65 +		xenbus_switch_state(dev, XenbusStateConnected);
    1.66 +		(void)send_fake_arp(netdev);
    1.67 +		show_device(np);
    1.68 +		break;
    1.69 +
    1.70  	case XenbusStateClosing:
    1.71  		netfront_closing(dev);
    1.72  		break;
    1.73 @@ -452,13 +462,17 @@ static int network_open(struct net_devic
    1.74  	return 0;
    1.75  }
    1.76  
    1.77 +static inline int netfront_tx_slot_available(struct netfront_info *np)
    1.78 +{
    1.79 +	return RING_FREE_REQUESTS(&np->tx) >= MAX_SKB_FRAGS + 1;
    1.80 +}
    1.81 +
    1.82  static inline void network_maybe_wake_tx(struct net_device *dev)
    1.83  {
    1.84  	struct netfront_info *np = netdev_priv(dev);
    1.85  
    1.86  	if (unlikely(netif_queue_stopped(dev)) &&
    1.87 -	    !RING_FULL(&np->tx) &&
    1.88 -	    !gnttab_empty_grant_references(&np->gref_tx_head) &&
    1.89 +	    netfront_tx_slot_available(np) &&
    1.90  	    likely(netif_running(dev)))
    1.91  		netif_wake_queue(dev);
    1.92  }
    1.93 @@ -638,36 +652,95 @@ static void network_alloc_rx_buffers(str
    1.94  	RING_PUSH_REQUESTS(&np->rx);
    1.95  }
    1.96  
    1.97 +static void xennet_make_frags(struct sk_buff *skb, struct net_device *dev,
    1.98 +			      struct netif_tx_request *tx)
    1.99 +{
   1.100 +	struct netfront_info *np = netdev_priv(dev);
   1.101 +	char *data = skb->data;
   1.102 +	unsigned long mfn;
   1.103 +	RING_IDX prod = np->tx.req_prod_pvt;
   1.104 +	int frags = skb_shinfo(skb)->nr_frags;
   1.105 +	unsigned int offset = offset_in_page(data);
   1.106 +	unsigned int len = skb_headlen(skb);
   1.107 +	unsigned int id;
   1.108 +	grant_ref_t ref;
   1.109 +	int i;
   1.110 +
   1.111 +	while (len > PAGE_SIZE - offset) {
   1.112 +		tx->size = PAGE_SIZE - offset;
   1.113 +		tx->flags |= NETTXF_more_data;
   1.114 +		len -= tx->size;
   1.115 +		data += tx->size;
   1.116 +		offset = 0;
   1.117 +
   1.118 +		id = get_id_from_freelist(np->tx_skbs);
   1.119 +		np->tx_skbs[id] = skb_get(skb);
   1.120 +		tx = RING_GET_REQUEST(&np->tx, prod++);
   1.121 +		tx->id = id;
   1.122 +		ref = gnttab_claim_grant_reference(&np->gref_tx_head);
   1.123 +		BUG_ON((signed short)ref < 0);
   1.124 +
   1.125 +		mfn = virt_to_mfn(data);
   1.126 +		gnttab_grant_foreign_access_ref(ref, np->xbdev->otherend_id,
   1.127 +						mfn, GNTMAP_readonly);
   1.128 +
   1.129 +		tx->gref = np->grant_tx_ref[id] = ref;
   1.130 +		tx->offset = offset;
   1.131 +		tx->size = len;
   1.132 +		tx->flags = 0;
   1.133 +	}
   1.134 +
   1.135 +	for (i = 0; i < frags; i++) {
   1.136 +		skb_frag_t *frag = skb_shinfo(skb)->frags + i;
   1.137 +
   1.138 +		tx->flags |= NETTXF_more_data;
   1.139 +
   1.140 +		id = get_id_from_freelist(np->tx_skbs);
   1.141 +		np->tx_skbs[id] = skb_get(skb);
   1.142 +		tx = RING_GET_REQUEST(&np->tx, prod++);
   1.143 +		tx->id = id;
   1.144 +		ref = gnttab_claim_grant_reference(&np->gref_tx_head);
   1.145 +		BUG_ON((signed short)ref < 0);
   1.146 +
   1.147 +		mfn = pfn_to_mfn(page_to_pfn(frag->page));
   1.148 +		gnttab_grant_foreign_access_ref(ref, np->xbdev->otherend_id,
   1.149 +						mfn, GNTMAP_readonly);
   1.150 +
   1.151 +		tx->gref = np->grant_tx_ref[id] = ref;
   1.152 +		tx->offset = frag->page_offset;
   1.153 +		tx->size = frag->size;
   1.154 +		tx->flags = 0;
   1.155 +	}
   1.156 +
   1.157 +	np->tx.req_prod_pvt = prod;
   1.158 +}
   1.159  
   1.160  static int network_start_xmit(struct sk_buff *skb, struct net_device *dev)
   1.161  {
   1.162  	unsigned short id;
   1.163  	struct netfront_info *np = netdev_priv(dev);
   1.164  	struct netif_tx_request *tx;
   1.165 +	char *data = skb->data;
   1.166  	RING_IDX i;
   1.167  	grant_ref_t ref;
   1.168  	unsigned long mfn;
   1.169  	int notify;
   1.170 +	int frags = skb_shinfo(skb)->nr_frags;
   1.171 +	unsigned int offset = offset_in_page(data);
   1.172 +	unsigned int len = skb_headlen(skb);
   1.173  
   1.174 -	if (unlikely((((unsigned long)skb->data & ~PAGE_MASK) + skb->len) >=
   1.175 -		     PAGE_SIZE)) {
   1.176 -		struct sk_buff *nskb;
   1.177 -		nskb = __dev_alloc_skb(skb->len, GFP_ATOMIC|__GFP_NOWARN);
   1.178 -		if (unlikely(nskb == NULL))
   1.179 -			goto drop;
   1.180 -		skb_put(nskb, skb->len);
   1.181 -		memcpy(nskb->data, skb->data, skb->len);
   1.182 -		/* Copy only the header fields we use in this driver. */
   1.183 -		nskb->dev = skb->dev;
   1.184 -		nskb->ip_summed = skb->ip_summed;
   1.185 -		nskb->proto_data_valid = skb->proto_data_valid;
   1.186 -		dev_kfree_skb(skb);
   1.187 -		skb = nskb;
   1.188 +	frags += (offset + len + PAGE_SIZE - 1) / PAGE_SIZE;
   1.189 +	if (unlikely(frags > MAX_SKB_FRAGS + 1)) {
   1.190 +		printk(KERN_ALERT "xennet: skb rides the rocket: %d frags\n",
   1.191 +		       frags);
   1.192 +		dump_stack();
   1.193 +		goto drop;
   1.194  	}
   1.195  
   1.196  	spin_lock_irq(&np->tx_lock);
   1.197  
   1.198 -	if (unlikely(!netif_carrier_ok(dev))) {
   1.199 +	if (unlikely(!netif_carrier_ok(dev) ||
   1.200 +		     (frags > 1 && !xennet_can_sg(dev)))) {
   1.201  		spin_unlock_irq(&np->tx_lock);
   1.202  		goto drop;
   1.203  	}
   1.204 @@ -682,12 +755,12 @@ static int network_start_xmit(struct sk_
   1.205  	tx->id   = id;
   1.206  	ref = gnttab_claim_grant_reference(&np->gref_tx_head);
   1.207  	BUG_ON((signed short)ref < 0);
   1.208 -	mfn = virt_to_mfn(skb->data);
   1.209 +	mfn = virt_to_mfn(data);
   1.210  	gnttab_grant_foreign_access_ref(
   1.211  		ref, np->xbdev->otherend_id, mfn, GNTMAP_readonly);
   1.212  	tx->gref = np->grant_tx_ref[id] = ref;
   1.213 -	tx->offset = (unsigned long)skb->data & ~PAGE_MASK;
   1.214 -	tx->size = skb->len;
   1.215 +	tx->offset = offset;
   1.216 +	tx->size = len;
   1.217  
   1.218  	tx->flags = 0;
   1.219  	if (skb->ip_summed == CHECKSUM_HW) /* local packet? */
   1.220 @@ -696,14 +769,17 @@ static int network_start_xmit(struct sk_
   1.221  		tx->flags |= NETTXF_data_validated;
   1.222  
   1.223  	np->tx.req_prod_pvt = i + 1;
   1.224 +
   1.225 +	xennet_make_frags(skb, dev, tx);
   1.226 +	tx->size = skb->len;
   1.227 +
   1.228  	RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&np->tx, notify);
   1.229  	if (notify)
   1.230  		notify_remote_via_irq(np->irq);
   1.231  
   1.232  	network_tx_buf_gc(dev);
   1.233  
   1.234 -	if (RING_FULL(&np->tx) ||
   1.235 -	    gnttab_empty_grant_references(&np->gref_tx_head))
   1.236 +	if (!netfront_tx_slot_available(np))
   1.237  		netif_stop_queue(dev);
   1.238  
   1.239  	spin_unlock_irq(&np->tx_lock);
   1.240 @@ -963,6 +1039,38 @@ static struct net_device_stats *network_
   1.241  	return &np->stats;
   1.242  }
   1.243  
   1.244 +static int xennet_change_mtu(struct net_device *dev, int mtu)
   1.245 +{
   1.246 +	int max = xennet_can_sg(dev) ? 65535 - ETH_HLEN : ETH_DATA_LEN;
   1.247 +
   1.248 +	if (mtu > max)
   1.249 +		return -EINVAL;
   1.250 +	dev->mtu = mtu;
   1.251 +	return 0;
   1.252 +}
   1.253 +
   1.254 +static int xennet_set_sg(struct net_device *dev, u32 data)
   1.255 +{
   1.256 +	if (data) {
   1.257 +		struct netfront_info *np = netdev_priv(dev);
   1.258 +		int val;
   1.259 +
   1.260 +		if (xenbus_scanf(XBT_NULL, np->xbdev->otherend, "feature-sg",
   1.261 +				 "%d", &val) < 0)
   1.262 +			val = 0;
   1.263 +		if (!val)
   1.264 +			return -ENOSYS;
   1.265 +	} else if (dev->mtu > ETH_DATA_LEN)
   1.266 +		dev->mtu = ETH_DATA_LEN;
   1.267 +
   1.268 +	return ethtool_op_set_sg(dev, data);
   1.269 +}
   1.270 +
   1.271 +static void xennet_set_features(struct net_device *dev)
   1.272 +{
   1.273 +	xennet_set_sg(dev, 1);
   1.274 +}
   1.275 +
   1.276  static void network_connect(struct net_device *dev)
   1.277  {
   1.278  	struct netfront_info *np;
   1.279 @@ -970,6 +1078,8 @@ static void network_connect(struct net_d
   1.280  	struct netif_tx_request *tx;
   1.281  	struct sk_buff *skb;
   1.282  
   1.283 +	xennet_set_features(dev);
   1.284 +
   1.285  	np = netdev_priv(dev);
   1.286  	spin_lock_irq(&np->tx_lock);
   1.287  	spin_lock(&np->rx_lock);
   1.288 @@ -1081,6 +1191,8 @@ static struct ethtool_ops network_ethtoo
   1.289  {
   1.290  	.get_tx_csum = ethtool_op_get_tx_csum,
   1.291  	.set_tx_csum = ethtool_op_set_tx_csum,
   1.292 +	.get_sg = ethtool_op_get_sg,
   1.293 +	.set_sg = xennet_set_sg,
   1.294  };
   1.295  
   1.296  #ifdef CONFIG_SYSFS
   1.297 @@ -1297,6 +1409,7 @@ static struct net_device * __devinit cre
   1.298  	netdev->poll            = netif_poll;
   1.299  	netdev->set_multicast_list = network_set_multicast_list;
   1.300  	netdev->uninit          = netif_uninit;
   1.301 +	netdev->change_mtu	= xennet_change_mtu;
   1.302  	netdev->weight          = 64;
   1.303  	netdev->features        = NETIF_F_IP_CSUM;
   1.304  
     2.1 --- a/xen/include/public/io/ring.h	Mon Jun 05 16:13:47 2006 +0100
     2.2 +++ b/xen/include/public/io/ring.h	Mon Jun 05 16:33:49 2006 +0100
     2.3 @@ -151,11 +151,15 @@ typedef struct __name##_back_ring __name
     2.4  #define RING_SIZE(_r)                                                   \
     2.5      ((_r)->nr_ents)
     2.6  
     2.7 +/* Number of free requests (for use on front side only). */
     2.8 +#define RING_FREE_REQUESTS(_r)						\
     2.9 +    (RING_SIZE(_r) - ((_r)->req_prod_pvt - (_r)->rsp_cons))
    2.10 +
    2.11  /* Test if there is an empty slot available on the front ring.
    2.12   * (This is only meaningful from the front. )
    2.13   */
    2.14  #define RING_FULL(_r)                                                   \
    2.15 -    (((_r)->req_prod_pvt - (_r)->rsp_cons) == RING_SIZE(_r))
    2.16 +    (RING_FREE_REQUESTS(_r) == 0)
    2.17  
    2.18  /* Test if there are outstanding messages to be processed on a ring. */
    2.19  #define RING_HAS_UNCONSUMED_RESPONSES(_r)                               \