ia64/xen-unstable

changeset 10558:224597479812

[NET] back: Add TSO support

This patch adds TCP Segmentation Offload (TSO) support to the backend.
It also advertises this fact through xenbus so that the frontend can
detect this and send through TSO requests only if it is supported.

This is done using an extra request slot which is indicated by a flag
in the first slot. In future checksum offload can be done in the same
way.

The extra request slot must not be generated if the backend does not
support the appropriate feature bits. For now this is simply feature-tso.

If the frontend detects the presence of the appropriate feature bits,
it may generate TX requests which have the appropriate request flags
set that indicates the presence of an extra request slot with the extra
information.

On the backend the extra request slot is read if and only if the request
flags are set in the TX request.

This protocol allows more feature bits to be added in future without
breaking compatibility. At least the hardware checksum bit is planned.

Even though only TSO is supported for now the code actually supports
GSO so it can be applied to any other protocol. The only missing bit
is the detection of host support for a specific GSO protocol. Once that
is added we can advertise all supported protocols to the guest.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: Keir Fraser <keir@xensource.com>
author kaf24@firebug.cl.cam.ac.uk
date Wed Jun 28 12:04:32 2006 +0100 (2006-06-28)
parents bb46d03f5f1d
children 4b51d081378d 25cd5216b30c
files linux-2.6-xen-sparse/drivers/xen/netback/interface.c linux-2.6-xen-sparse/drivers/xen/netback/netback.c linux-2.6-xen-sparse/drivers/xen/netback/xenbus.c xen/include/public/io/netif.h
line diff
     1.1 --- a/linux-2.6-xen-sparse/drivers/xen/netback/interface.c	Wed Jun 28 12:03:57 2006 +0100
     1.2 +++ b/linux-2.6-xen-sparse/drivers/xen/netback/interface.c	Wed Jun 28 12:04:32 2006 +0100
     1.3 @@ -37,9 +37,9 @@
     1.4  static void __netif_up(netif_t *netif)
     1.5  {
     1.6  	struct net_device *dev = netif->dev;
     1.7 -	spin_lock_bh(&dev->xmit_lock);
     1.8 +	netif_tx_lock_bh(dev);
     1.9  	netif->active = 1;
    1.10 -	spin_unlock_bh(&dev->xmit_lock);
    1.11 +	netif_tx_unlock_bh(dev);
    1.12  	enable_irq(netif->irq);
    1.13  	netif_schedule_work(netif);
    1.14  }
    1.15 @@ -48,9 +48,9 @@ static void __netif_down(netif_t *netif)
    1.16  {
    1.17  	struct net_device *dev = netif->dev;
    1.18  	disable_irq(netif->irq);
    1.19 -	spin_lock_bh(&dev->xmit_lock);
    1.20 +	netif_tx_lock_bh(dev);
    1.21  	netif->active = 0;
    1.22 -	spin_unlock_bh(&dev->xmit_lock);
    1.23 +	netif_tx_unlock_bh(dev);
    1.24  	netif_deschedule_work(netif);
    1.25  }
    1.26  
     2.1 --- a/linux-2.6-xen-sparse/drivers/xen/netback/netback.c	Wed Jun 28 12:03:57 2006 +0100
     2.2 +++ b/linux-2.6-xen-sparse/drivers/xen/netback/netback.c	Wed Jun 28 12:04:32 2006 +0100
     2.3 @@ -490,14 +490,16 @@ inline static void net_tx_action_dealloc
     2.4  	}
     2.5  }
     2.6  
     2.7 -static void netbk_tx_err(netif_t *netif, RING_IDX end)
     2.8 +static void netbk_tx_err(netif_t *netif, netif_tx_request_t *txp, RING_IDX end)
     2.9  {
    2.10  	RING_IDX cons = netif->tx.req_cons;
    2.11  
    2.12  	do {
    2.13 -		netif_tx_request_t *txp = RING_GET_REQUEST(&netif->tx, cons);
    2.14  		make_tx_response(netif, txp, NETIF_RSP_ERROR);
    2.15 -	} while (++cons < end);
    2.16 +		if (++cons >= end)
    2.17 +			break;
    2.18 +		txp = RING_GET_REQUEST(&netif->tx, cons);
    2.19 +	} while (1);
    2.20  	netif->tx.req_cons = cons;
    2.21  	netif_schedule_work(netif);
    2.22  	netif_put(netif);
    2.23 @@ -508,7 +510,7 @@ static int netbk_count_requests(netif_t 
    2.24  {
    2.25  	netif_tx_request_t *first = txp;
    2.26  	RING_IDX cons = netif->tx.req_cons;
    2.27 -	int frags = 1;
    2.28 +	int frags = 0;
    2.29  
    2.30  	while (txp->flags & NETTXF_more_data) {
    2.31  		if (frags >= work_to_do) {
    2.32 @@ -543,7 +545,7 @@ static gnttab_map_grant_ref_t *netbk_get
    2.33  	skb_frag_t *frags = shinfo->frags;
    2.34  	netif_tx_request_t *txp;
    2.35  	unsigned long pending_idx = *((u16 *)skb->data);
    2.36 -	RING_IDX cons = netif->tx.req_cons + 1;
    2.37 +	RING_IDX cons = netif->tx.req_cons;
    2.38  	int i, start;
    2.39  
    2.40  	/* Skip first skb fragment if it is on same page as header fragment. */
    2.41 @@ -668,6 +670,7 @@ static void net_tx_action(unsigned long 
    2.42  	struct sk_buff *skb;
    2.43  	netif_t *netif;
    2.44  	netif_tx_request_t txreq;
    2.45 +	struct netif_tx_extra txtra;
    2.46  	u16 pending_idx;
    2.47  	RING_IDX i;
    2.48  	gnttab_map_grant_ref_t *mop;
    2.49 @@ -726,22 +729,37 @@ static void net_tx_action(unsigned long 
    2.50  		}
    2.51  		netif->remaining_credit -= txreq.size;
    2.52  
    2.53 +		work_to_do--;
    2.54 +		netif->tx.req_cons = ++i;
    2.55 +
    2.56 +		if (txreq.flags & NETTXF_extra_info) {
    2.57 +			if (work_to_do-- <= 0) {
    2.58 +				DPRINTK("Missing extra info\n");
    2.59 +				netbk_tx_err(netif, &txreq, i);
    2.60 +				continue;
    2.61 +			}
    2.62 +
    2.63 +			memcpy(&txtra, RING_GET_REQUEST(&netif->tx, i),
    2.64 +			       sizeof(txtra));
    2.65 +			netif->tx.req_cons = ++i;
    2.66 +		}
    2.67 +
    2.68  		ret = netbk_count_requests(netif, &txreq, work_to_do);
    2.69  		if (unlikely(ret < 0)) {
    2.70 -			netbk_tx_err(netif, i - ret);
    2.71 +			netbk_tx_err(netif, &txreq, i - ret);
    2.72  			continue;
    2.73  		}
    2.74  		i += ret;
    2.75  
    2.76  		if (unlikely(ret > MAX_SKB_FRAGS + 1)) {
    2.77  			DPRINTK("Too many frags\n");
    2.78 -			netbk_tx_err(netif, i);
    2.79 +			netbk_tx_err(netif, &txreq, i);
    2.80  			continue;
    2.81  		}
    2.82  
    2.83  		if (unlikely(txreq.size < ETH_HLEN)) {
    2.84  			DPRINTK("Bad packet size: %d\n", txreq.size);
    2.85 -			netbk_tx_err(netif, i);
    2.86 +			netbk_tx_err(netif, &txreq, i);
    2.87  			continue; 
    2.88  		}
    2.89  
    2.90 @@ -750,26 +768,32 @@ static void net_tx_action(unsigned long 
    2.91  			DPRINTK("txreq.offset: %x, size: %u, end: %lu\n", 
    2.92  				txreq.offset, txreq.size, 
    2.93  				(txreq.offset &~PAGE_MASK) + txreq.size);
    2.94 -			netbk_tx_err(netif, i);
    2.95 +			netbk_tx_err(netif, &txreq, i);
    2.96  			continue;
    2.97  		}
    2.98  
    2.99  		pending_idx = pending_ring[MASK_PEND_IDX(pending_cons)];
   2.100  
   2.101  		data_len = (txreq.size > PKT_PROT_LEN &&
   2.102 -			    ret < MAX_SKB_FRAGS + 1) ?
   2.103 +			    ret < MAX_SKB_FRAGS) ?
   2.104  			PKT_PROT_LEN : txreq.size;
   2.105  
   2.106  		skb = alloc_skb(data_len+16, GFP_ATOMIC);
   2.107  		if (unlikely(skb == NULL)) {
   2.108  			DPRINTK("Can't allocate a skb in start_xmit.\n");
   2.109 -			netbk_tx_err(netif, i);
   2.110 +			netbk_tx_err(netif, &txreq, i);
   2.111  			break;
   2.112  		}
   2.113  
   2.114  		/* Packets passed to netif_rx() must have some headroom. */
   2.115  		skb_reserve(skb, 16);
   2.116  
   2.117 +		if (txreq.flags & NETTXF_gso) {
   2.118 +			skb_shinfo(skb)->gso_size = txtra.u.gso.size;
   2.119 +			skb_shinfo(skb)->gso_segs = txtra.u.gso.segs;
   2.120 +			skb_shinfo(skb)->gso_type = txtra.u.gso.type;
   2.121 +		}
   2.122 +
   2.123  		gnttab_set_map_op(mop, MMAP_VADDR(pending_idx),
   2.124  				  GNTMAP_host_map | GNTMAP_readonly,
   2.125  				  txreq.gref, netif->domid);
   2.126 @@ -782,7 +806,7 @@ static void net_tx_action(unsigned long 
   2.127  
   2.128  		__skb_put(skb, data_len);
   2.129  
   2.130 -		skb_shinfo(skb)->nr_frags = ret - 1;
   2.131 +		skb_shinfo(skb)->nr_frags = ret;
   2.132  		if (data_len < txreq.size) {
   2.133  			skb_shinfo(skb)->nr_frags++;
   2.134  			skb_shinfo(skb)->frags[0].page =
   2.135 @@ -909,6 +933,9 @@ static void make_tx_response(netif_t *ne
   2.136  	resp->id     = txp->id;
   2.137  	resp->status = st;
   2.138  
   2.139 +	if (txp->flags & NETTXF_extra_info)
   2.140 +		RING_GET_RESPONSE(&netif->tx, ++i)->status = NETIF_RSP_NULL;
   2.141 +
   2.142  	netif->tx.rsp_prod_pvt = ++i;
   2.143  	RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&netif->tx, notify);
   2.144  	if (notify)
     3.1 --- a/linux-2.6-xen-sparse/drivers/xen/netback/xenbus.c	Wed Jun 28 12:03:57 2006 +0100
     3.2 +++ b/linux-2.6-xen-sparse/drivers/xen/netback/xenbus.c	Wed Jun 28 12:04:32 2006 +0100
     3.3 @@ -101,6 +101,12 @@ static int netback_probe(struct xenbus_d
     3.4  			goto abort_transaction;
     3.5  		}
     3.6  
     3.7 +		err = xenbus_printf(xbt, dev->nodename, "feature-tso", "%d", 1);
     3.8 +		if (err) {
     3.9 +			message = "writing feature-tso";
    3.10 +			goto abort_transaction;
    3.11 +		}
    3.12 +
    3.13  		err = xenbus_transaction_end(xbt, 0);
    3.14  	} while (err == -EAGAIN);
    3.15  
     4.1 --- a/xen/include/public/io/netif.h	Wed Jun 28 12:03:57 2006 +0100
     4.2 +++ b/xen/include/public/io/netif.h	Wed Jun 28 12:04:32 2006 +0100
     4.3 @@ -19,6 +19,16 @@
     4.4   * the appropriate req_event or rsp_event field in the shared ring.
     4.5   */
     4.6  
     4.7 +/*
     4.8 + * This is the 'wire' format for packets:
     4.9 + *  Request 1: netif_tx_request -- NETTXF_* (any flags)
    4.10 + * [Request 2: netif_tx_extra]  (only if request 1 has NETTXF_extra_info)
    4.11 + *  Request 3: netif_tx_request -- NETTXF_more_data
    4.12 + *  Request 4: netif_tx_request -- NETTXF_more_data
    4.13 + *  ...
    4.14 + *  Request N: netif_tx_request -- 0
    4.15 + */
    4.16 +
    4.17  /* Protocol checksum field is blank in the packet (hardware offload)? */
    4.18  #define _NETTXF_csum_blank     (0)
    4.19  #define  NETTXF_csum_blank     (1U<<_NETTXF_csum_blank)
    4.20 @@ -27,10 +37,17 @@
    4.21  #define _NETTXF_data_validated (1)
    4.22  #define  NETTXF_data_validated (1U<<_NETTXF_data_validated)
    4.23  
    4.24 -/* Packet continues in the request. */
    4.25 +/* Packet continues in the next request descriptor. */
    4.26  #define _NETTXF_more_data      (2)
    4.27  #define  NETTXF_more_data      (1U<<_NETTXF_more_data)
    4.28  
    4.29 +/* Packet has GSO fields in the following descriptor (netif_tx_extra.u.gso). */
    4.30 +#define _NETTXF_gso            (3)
    4.31 +#define  NETTXF_gso            (1U<<_NETTXF_gso)
    4.32 +
    4.33 +/* This descriptor is followed by an extra-info descriptor (netif_tx_extra). */
    4.34 +#define  NETTXF_extra_info     (NETTXF_gso)
    4.35 +
    4.36  struct netif_tx_request {
    4.37      grant_ref_t gref;      /* Reference to buffer page */
    4.38      uint16_t offset;       /* Offset within buffer page */
    4.39 @@ -40,6 +57,18 @@ struct netif_tx_request {
    4.40  };
    4.41  typedef struct netif_tx_request netif_tx_request_t;
    4.42  
    4.43 +/* This structure needs to fit within netif_tx_request for compatibility. */
    4.44 +struct netif_tx_extra {
    4.45 +    union {
    4.46 +        /* NETTXF_gso: Generic Segmentation Offload. */
    4.47 +        struct netif_tx_gso {
    4.48 +            uint16_t size;	   /* GSO MSS. */
    4.49 +            uint16_t segs;	   /* GSO segment count. */
    4.50 +            uint16_t type;	   /* GSO type. */
    4.51 +        } gso;
    4.52 +    } u;
    4.53 +};
    4.54 +
    4.55  struct netif_tx_response {
    4.56      uint16_t id;
    4.57      int16_t  status;       /* NETIF_RSP_* */
    4.58 @@ -78,6 +107,8 @@ DEFINE_RING_TYPES(netif_rx, struct netif
    4.59  #define NETIF_RSP_DROPPED         -2
    4.60  #define NETIF_RSP_ERROR           -1
    4.61  #define NETIF_RSP_OKAY             0
    4.62 +/* No response: used for auxiliary requests (e.g., netif_tx_extra). */
    4.63 +#define NETIF_RSP_NULL             1
    4.64  
    4.65  #endif
    4.66