ia64/xen-unstable

changeset 15237:c4f62fe631e4

[NET] back: Add lazy copying

This patch adds lazy copying using the new unmap_and_replace grant
table operation.

We keep a list of pending entries sorted by arrival order. We'll
process this list every time net_tx_action is invoked. We ensure
that net_tx_action is invoked within one second of the arrival of
the first packet in the list.

When we process the list any entry that has been around for more
than half a second is copied. This allows up to free the grant
table entry and return it to domU.

If the new grant table operation is not available (e.g., old HV
or architectures that don't support it yet) we simply copy each
packet as we receive them using skb_linearize. We also disable
SG/TSO if this is the case.

By default the new code is disabled. In order to enable it,
the module needs to be loaded with the argument copy_skb=1.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
author kfraser@localhost.localdomain
date Wed May 30 10:47:05 2007 +0100 (2007-05-30)
parents 45f939d0c724
children 63211a8027fa
files linux-2.6-xen-sparse/drivers/xen/netback/common.h linux-2.6-xen-sparse/drivers/xen/netback/netback.c linux-2.6-xen-sparse/drivers/xen/netback/xenbus.c
line diff
     1.1 --- a/linux-2.6-xen-sparse/drivers/xen/netback/common.h	Wed May 30 10:46:13 2007 +0100
     1.2 +++ b/linux-2.6-xen-sparse/drivers/xen/netback/common.h	Wed May 30 10:47:05 2007 +0100
     1.3 @@ -114,6 +114,14 @@ typedef struct netif_st {
     1.4  #define netback_carrier_off(netif)	((netif)->carrier = 0)
     1.5  #define netback_carrier_ok(netif)	((netif)->carrier)
     1.6  
     1.7 +enum {
     1.8 +	NETBK_DONT_COPY_SKB,
     1.9 +	NETBK_DELAYED_COPY_SKB,
    1.10 +	NETBK_ALWAYS_COPY_SKB,
    1.11 +};
    1.12 +
    1.13 +extern int netbk_copy_skb_mode;
    1.14 +
    1.15  #define NET_TX_RING_SIZE __RING_SIZE((netif_tx_sring_t *)0, PAGE_SIZE)
    1.16  #define NET_RX_RING_SIZE __RING_SIZE((netif_rx_sring_t *)0, PAGE_SIZE)
    1.17  
     2.1 --- a/linux-2.6-xen-sparse/drivers/xen/netback/netback.c	Wed May 30 10:46:13 2007 +0100
     2.2 +++ b/linux-2.6-xen-sparse/drivers/xen/netback/netback.c	Wed May 30 10:47:05 2007 +0100
     2.3 @@ -49,6 +49,11 @@ struct netbk_rx_meta {
     2.4  	int copy:1;
     2.5  };
     2.6  
     2.7 +struct netbk_tx_pending_inuse {
     2.8 +	struct list_head list;
     2.9 +	unsigned long alloc_time;
    2.10 +};
    2.11 +
    2.12  static void netif_idx_release(u16 pending_idx);
    2.13  static void netif_page_release(struct page *page);
    2.14  static void make_tx_response(netif_t *netif, 
    2.15 @@ -68,15 +73,21 @@ static void net_rx_action(unsigned long 
    2.16  static DECLARE_TASKLET(net_rx_tasklet, net_rx_action, 0);
    2.17  
    2.18  static struct timer_list net_timer;
    2.19 +static struct timer_list netbk_tx_pending_timer;
    2.20  
    2.21  #define MAX_PENDING_REQS 256
    2.22  
    2.23  static struct sk_buff_head rx_queue;
    2.24  
    2.25  static struct page **mmap_pages;
    2.26 +static inline unsigned long idx_to_pfn(unsigned int idx)
    2.27 +{
    2.28 +	return page_to_pfn(mmap_pages[idx]);
    2.29 +}
    2.30 +
    2.31  static inline unsigned long idx_to_kaddr(unsigned int idx)
    2.32  {
    2.33 -	return (unsigned long)pfn_to_kaddr(page_to_pfn(mmap_pages[idx]));
    2.34 +	return (unsigned long)pfn_to_kaddr(idx_to_pfn(idx));
    2.35  }
    2.36  
    2.37  #define PKT_PROT_LEN 64
    2.38 @@ -95,6 +106,10 @@ static PEND_RING_IDX pending_prod, pendi
    2.39  static u16 dealloc_ring[MAX_PENDING_REQS];
    2.40  static PEND_RING_IDX dealloc_prod, dealloc_cons;
    2.41  
    2.42 +/* Doubly-linked list of in-use pending entries. */
    2.43 +static struct netbk_tx_pending_inuse pending_inuse[MAX_PENDING_REQS];
    2.44 +static LIST_HEAD(pending_inuse_head);
    2.45 +
    2.46  static struct sk_buff_head tx_queue;
    2.47  
    2.48  static grant_handle_t grant_tx_handle[MAX_PENDING_REQS];
    2.49 @@ -108,6 +123,13 @@ static spinlock_t net_schedule_list_lock
    2.50  static unsigned long mfn_list[MAX_MFN_ALLOC];
    2.51  static unsigned int alloc_index = 0;
    2.52  
    2.53 +/* Setting this allows the safe use of this driver without netloop. */
    2.54 +static int MODPARM_copy_skb;
    2.55 +module_param_named(copy_skb, MODPARM_copy_skb, bool, 0);
    2.56 +MODULE_PARM_DESC(copy_skb, "Copy data received from netfront without netloop");
    2.57 +
    2.58 +int netbk_copy_skb_mode;
    2.59 +
    2.60  static inline unsigned long alloc_mfn(void)
    2.61  {
    2.62  	BUG_ON(alloc_index == 0);
    2.63 @@ -719,6 +741,11 @@ static void net_alarm(unsigned long unus
    2.64  	tasklet_schedule(&net_rx_tasklet);
    2.65  }
    2.66  
    2.67 +static void netbk_tx_pending_timeout(unsigned long unused)
    2.68 +{
    2.69 +	tasklet_schedule(&net_tx_tasklet);
    2.70 +}
    2.71 +
    2.72  struct net_device_stats *netif_be_get_stats(struct net_device *dev)
    2.73  {
    2.74  	netif_t *netif = netdev_priv(dev);
    2.75 @@ -812,46 +839,97 @@ static void tx_credit_callback(unsigned 
    2.76  	netif_schedule_work(netif);
    2.77  }
    2.78  
    2.79 +static inline int copy_pending_req(PEND_RING_IDX pending_idx)
    2.80 +{
    2.81 +	return gnttab_copy_grant_page(grant_tx_handle[pending_idx],
    2.82 +				      &mmap_pages[pending_idx]);
    2.83 +}
    2.84 +
    2.85  inline static void net_tx_action_dealloc(void)
    2.86  {
    2.87 +	struct netbk_tx_pending_inuse *inuse, *n;
    2.88  	gnttab_unmap_grant_ref_t *gop;
    2.89  	u16 pending_idx;
    2.90  	PEND_RING_IDX dc, dp;
    2.91  	netif_t *netif;
    2.92  	int ret;
    2.93 +	LIST_HEAD(list);
    2.94  
    2.95  	dc = dealloc_cons;
    2.96 -	dp = dealloc_prod;
    2.97 -
    2.98 -	/* Ensure we see all indexes enqueued by netif_idx_release(). */
    2.99 -	smp_rmb();
   2.100 +	gop = tx_unmap_ops;
   2.101  
   2.102  	/*
   2.103  	 * Free up any grants we have finished using
   2.104  	 */
   2.105 -	gop = tx_unmap_ops;
   2.106 -	while (dc != dp) {
   2.107 -		pending_idx = dealloc_ring[MASK_PEND_IDX(dc++)];
   2.108 -		gnttab_set_unmap_op(gop, idx_to_kaddr(pending_idx),
   2.109 -				    GNTMAP_host_map,
   2.110 -				    grant_tx_handle[pending_idx]);
   2.111 -		gop++;
   2.112 -	}
   2.113 +	do {
   2.114 +		dp = dealloc_prod;
   2.115 +
   2.116 +		/* Ensure we see all indices enqueued by netif_idx_release(). */
   2.117 +		smp_rmb();
   2.118 +
   2.119 +		while (dc != dp) {
   2.120 +			unsigned long pfn;
   2.121 +
   2.122 +			pending_idx = dealloc_ring[MASK_PEND_IDX(dc++)];
   2.123 +			list_move_tail(&pending_inuse[pending_idx].list, &list);
   2.124 +
   2.125 +			pfn = idx_to_pfn(pending_idx);
   2.126 +			/* Already unmapped? */
   2.127 +			if (!phys_to_machine_mapping_valid(pfn))
   2.128 +				continue;
   2.129 +
   2.130 +			gnttab_set_unmap_op(gop, idx_to_kaddr(pending_idx),
   2.131 +					    GNTMAP_host_map,
   2.132 +					    grant_tx_handle[pending_idx]);
   2.133 +			gop++;
   2.134 +		}
   2.135 +
   2.136 +		if (netbk_copy_skb_mode != NETBK_DELAYED_COPY_SKB ||
   2.137 +		    list_empty(&pending_inuse_head))
   2.138 +			break;
   2.139 +
   2.140 +		/* Copy any entries that have been pending for too long. */
   2.141 +		list_for_each_entry_safe(inuse, n, &pending_inuse_head, list) {
   2.142 +			if (time_after(inuse->alloc_time + HZ / 2, jiffies))
   2.143 +				break;
   2.144 +
   2.145 +			switch (copy_pending_req(inuse - pending_inuse)) {
   2.146 +			case 0:
   2.147 +				list_move_tail(&inuse->list, &list);
   2.148 +				continue;
   2.149 +			case -EBUSY:
   2.150 +				list_del_init(&inuse->list);
   2.151 +				continue;
   2.152 +			case -ENOENT:
   2.153 +				continue;
   2.154 +			}
   2.155 +
   2.156 +			break;
   2.157 +		}
   2.158 +	} while (dp != dealloc_prod);
   2.159 +
   2.160 +	dealloc_cons = dc;
   2.161 +
   2.162  	ret = HYPERVISOR_grant_table_op(
   2.163  		GNTTABOP_unmap_grant_ref, tx_unmap_ops, gop - tx_unmap_ops);
   2.164  	BUG_ON(ret);
   2.165  
   2.166 -	while (dealloc_cons != dp) {
   2.167 -		pending_idx = dealloc_ring[MASK_PEND_IDX(dealloc_cons++)];
   2.168 +	list_for_each_entry_safe(inuse, n, &list, list) {
   2.169 +		pending_idx = inuse - pending_inuse;
   2.170  
   2.171  		netif = pending_tx_info[pending_idx].netif;
   2.172  
   2.173  		make_tx_response(netif, &pending_tx_info[pending_idx].req, 
   2.174  				 NETIF_RSP_OKAY);
   2.175  
   2.176 +		/* Ready for next use. */
   2.177 +		gnttab_reset_grant_page(mmap_pages[pending_idx]);
   2.178 +
   2.179  		pending_ring[MASK_PEND_IDX(pending_prod++)] = pending_idx;
   2.180  
   2.181  		netif_put(netif);
   2.182 +
   2.183 +		list_del_init(&inuse->list);
   2.184  	}
   2.185  }
   2.186  
   2.187 @@ -1023,6 +1101,11 @@ static void netbk_fill_frags(struct sk_b
   2.188  		unsigned long pending_idx;
   2.189  
   2.190  		pending_idx = (unsigned long)frag->page;
   2.191 +
   2.192 +		pending_inuse[pending_idx].alloc_time = jiffies;
   2.193 +		list_add_tail(&pending_inuse[pending_idx].list,
   2.194 +			      &pending_inuse_head);
   2.195 +
   2.196  		txp = &pending_tx_info[pending_idx].req;
   2.197  		frag->page = virt_to_page(idx_to_kaddr(pending_idx));
   2.198  		frag->size = txp->size;
   2.199 @@ -1311,9 +1394,25 @@ static void net_tx_action(unsigned long 
   2.200  		netif->stats.rx_bytes += skb->len;
   2.201  		netif->stats.rx_packets++;
   2.202  
   2.203 +		if (unlikely(netbk_copy_skb_mode == NETBK_ALWAYS_COPY_SKB) &&
   2.204 +		    unlikely(skb_linearize(skb))) {
   2.205 +			DPRINTK("Can't linearize skb in net_tx_action.\n");
   2.206 +			kfree_skb(skb);
   2.207 +			continue;
   2.208 +		}
   2.209 +
   2.210  		netif_rx(skb);
   2.211  		netif->dev->last_rx = jiffies;
   2.212  	}
   2.213 +
   2.214 +	if (netbk_copy_skb_mode == NETBK_DELAYED_COPY_SKB &&
   2.215 +	    !list_empty(&pending_inuse_head)) {
   2.216 +		struct netbk_tx_pending_inuse *oldest;
   2.217 +
   2.218 +		oldest = list_entry(pending_inuse_head.next,
   2.219 +				    struct netbk_tx_pending_inuse, list);
   2.220 +		mod_timer(&netbk_tx_pending_timer, oldest->alloc_time + HZ);
   2.221 +	}
   2.222  }
   2.223  
   2.224  static void netif_idx_release(u16 pending_idx)
   2.225 @@ -1333,9 +1432,6 @@ static void netif_idx_release(u16 pendin
   2.226  
   2.227  static void netif_page_release(struct page *page)
   2.228  {
   2.229 -	/* Ready for next use. */
   2.230 -	init_page_count(page);
   2.231 -
   2.232  	netif_idx_release(netif_page_index(page));
   2.233  }
   2.234  
   2.235 @@ -1457,6 +1553,10 @@ static int __init netback_init(void)
   2.236  	net_timer.data = 0;
   2.237  	net_timer.function = net_alarm;
   2.238  
   2.239 +	init_timer(&netbk_tx_pending_timer);
   2.240 +	netbk_tx_pending_timer.data = 0;
   2.241 +	netbk_tx_pending_timer.function = netbk_tx_pending_timeout;
   2.242 +
   2.243  	mmap_pages = alloc_empty_pages_and_pagevec(MAX_PENDING_REQS);
   2.244  	if (mmap_pages == NULL) {
   2.245  		printk("%s: out of memory\n", __FUNCTION__);
   2.246 @@ -1467,6 +1567,7 @@ static int __init netback_init(void)
   2.247  		page = mmap_pages[i];
   2.248  		SetPageForeign(page, netif_page_release);
   2.249  		netif_page_index(page) = i;
   2.250 +		INIT_LIST_HEAD(&pending_inuse[i].list);
   2.251  	}
   2.252  
   2.253  	pending_cons = 0;
   2.254 @@ -1477,6 +1578,15 @@ static int __init netback_init(void)
   2.255  	spin_lock_init(&net_schedule_list_lock);
   2.256  	INIT_LIST_HEAD(&net_schedule_list);
   2.257  
   2.258 +	netbk_copy_skb_mode = NETBK_DONT_COPY_SKB;
   2.259 +	if (MODPARM_copy_skb) {
   2.260 +		if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_and_replace,
   2.261 +					      NULL, 0))
   2.262 +			netbk_copy_skb_mode = NETBK_ALWAYS_COPY_SKB;
   2.263 +		else
   2.264 +			netbk_copy_skb_mode = NETBK_DELAYED_COPY_SKB;
   2.265 +	}
   2.266 +
   2.267  	netif_xenbus_init();
   2.268  
   2.269  #ifdef NETBE_DEBUG_INTERRUPT
     3.1 --- a/linux-2.6-xen-sparse/drivers/xen/netback/xenbus.c	Wed May 30 10:46:13 2007 +0100
     3.2 +++ b/linux-2.6-xen-sparse/drivers/xen/netback/xenbus.c	Wed May 30 10:47:05 2007 +0100
     3.3 @@ -62,6 +62,7 @@ static int netback_probe(struct xenbus_d
     3.4  	const char *message;
     3.5  	struct xenbus_transaction xbt;
     3.6  	int err;
     3.7 +	int sg;
     3.8  	struct backend_info *be = kzalloc(sizeof(struct backend_info),
     3.9  					  GFP_KERNEL);
    3.10  	if (!be) {
    3.11 @@ -73,6 +74,10 @@ static int netback_probe(struct xenbus_d
    3.12  	be->dev = dev;
    3.13  	dev->dev.driver_data = be;
    3.14  
    3.15 +	sg = 1;
    3.16 +	if (netbk_copy_skb_mode == NETBK_ALWAYS_COPY_SKB)
    3.17 +		sg = 0;
    3.18 +
    3.19  	do {
    3.20  		err = xenbus_transaction_start(&xbt);
    3.21  		if (err) {
    3.22 @@ -80,14 +85,14 @@ static int netback_probe(struct xenbus_d
    3.23  			goto fail;
    3.24  		}
    3.25  
    3.26 -		err = xenbus_printf(xbt, dev->nodename, "feature-sg", "%d", 1);
    3.27 +		err = xenbus_printf(xbt, dev->nodename, "feature-sg", "%d", sg);
    3.28  		if (err) {
    3.29  			message = "writing feature-sg";
    3.30  			goto abort_transaction;
    3.31  		}
    3.32  
    3.33  		err = xenbus_printf(xbt, dev->nodename, "feature-gso-tcpv4",
    3.34 -				    "%d", 1);
    3.35 +				    "%d", sg);
    3.36  		if (err) {
    3.37  			message = "writing feature-gso-tcpv4";
    3.38  			goto abort_transaction;