ia64/xen-unstable

changeset 10882:9f29252c23b6

[NET] back: Transmit SG packets if supported

This patch adds scatter-and-gather transmission support to the
backend. This allows the MTU to be raised right now and the potential
for TSO in future.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
author kfraser@localhost.localdomain
date Mon Jul 31 17:42:13 2006 +0100 (2006-07-31)
parents 5f5a2f282032
children 485616ab73e3
files linux-2.6-xen-sparse/drivers/xen/netback/common.h linux-2.6-xen-sparse/drivers/xen/netback/interface.c linux-2.6-xen-sparse/drivers/xen/netback/netback.c linux-2.6-xen-sparse/drivers/xen/netback/xenbus.c xen/include/public/io/netif.h
line diff
     1.1 --- a/linux-2.6-xen-sparse/drivers/xen/netback/common.h	Mon Jul 31 17:35:43 2006 +0100
     1.2 +++ b/linux-2.6-xen-sparse/drivers/xen/netback/common.h	Mon Jul 31 17:42:13 2006 +0100
     1.3 @@ -129,4 +129,10 @@ static inline int netbk_can_queue(struct
     1.4  	return netif->can_queue;
     1.5  }
     1.6  
     1.7 +static inline int netbk_can_sg(struct net_device *dev)
     1.8 +{
     1.9 +	netif_t *netif = netdev_priv(dev);
    1.10 +	return netif->features & NETIF_F_SG;
    1.11 +}
    1.12 +
    1.13  #endif /* __NETIF__BACKEND__COMMON_H__ */
     2.1 --- a/linux-2.6-xen-sparse/drivers/xen/netback/interface.c	Mon Jul 31 17:35:43 2006 +0100
     2.2 +++ b/linux-2.6-xen-sparse/drivers/xen/netback/interface.c	Mon Jul 31 17:42:13 2006 +0100
     2.3 @@ -62,10 +62,34 @@ static int net_close(struct net_device *
     2.4  	return 0;
     2.5  }
     2.6  
     2.7 +static int netbk_change_mtu(struct net_device *dev, int mtu)
     2.8 +{
     2.9 +	int max = netbk_can_sg(dev) ? 65535 - ETH_HLEN : ETH_DATA_LEN;
    2.10 +
    2.11 +	if (mtu > max)
    2.12 +		return -EINVAL;
    2.13 +	dev->mtu = mtu;
    2.14 +	return 0;
    2.15 +}
    2.16 +
    2.17 +static int netbk_set_sg(struct net_device *dev, u32 data)
    2.18 +{
    2.19 +	if (data) {
    2.20 +		netif_t *netif = netdev_priv(dev);
    2.21 +
    2.22 +		if (!(netif->features & NETIF_F_SG))
    2.23 +			return -ENOSYS;
    2.24 +	}
    2.25 +
    2.26 +	return ethtool_op_set_sg(dev, data);
    2.27 +}
    2.28 +
    2.29  static struct ethtool_ops network_ethtool_ops =
    2.30  {
    2.31  	.get_tx_csum = ethtool_op_get_tx_csum,
    2.32  	.set_tx_csum = ethtool_op_set_tx_csum,
    2.33 +	.get_sg = ethtool_op_get_sg,
    2.34 +	.set_sg = netbk_set_sg,
    2.35  	.get_link = ethtool_op_get_link,
    2.36  };
    2.37  
    2.38 @@ -101,6 +125,7 @@ netif_t *netif_alloc(domid_t domid, unsi
    2.39  	dev->get_stats       = netif_be_get_stats;
    2.40  	dev->open            = net_open;
    2.41  	dev->stop            = net_close;
    2.42 +	dev->change_mtu	     = netbk_change_mtu;
    2.43  	dev->features        = NETIF_F_IP_CSUM;
    2.44  
    2.45  	SET_ETHTOOL_OPS(dev, &network_ethtool_ops);
     3.1 --- a/linux-2.6-xen-sparse/drivers/xen/netback/netback.c	Mon Jul 31 17:35:43 2006 +0100
     3.2 +++ b/linux-2.6-xen-sparse/drivers/xen/netback/netback.c	Mon Jul 31 17:42:13 2006 +0100
     3.3 @@ -40,6 +40,11 @@
     3.4  
     3.5  /*#define NETBE_DEBUG_INTERRUPT*/
     3.6  
     3.7 +struct netbk_rx_meta {
     3.8 +	skb_frag_t frag;
     3.9 +	int id;
    3.10 +};
    3.11 +
    3.12  static void netif_idx_release(u16 pending_idx);
    3.13  static void netif_page_release(struct page *page);
    3.14  static void make_tx_response(netif_t *netif, 
    3.15 @@ -100,21 +105,27 @@ static spinlock_t net_schedule_list_lock
    3.16  static unsigned long mfn_list[MAX_MFN_ALLOC];
    3.17  static unsigned int alloc_index = 0;
    3.18  
    3.19 -static unsigned long alloc_mfn(void)
    3.20 +static inline unsigned long alloc_mfn(void)
    3.21  {
    3.22 -	unsigned long mfn = 0;
    3.23 +	return mfn_list[--alloc_index];
    3.24 +}
    3.25 +
    3.26 +static int check_mfn(int nr)
    3.27 +{
    3.28  	struct xen_memory_reservation reservation = {
    3.29 -		.nr_extents   = MAX_MFN_ALLOC,
    3.30  		.extent_order = 0,
    3.31  		.domid        = DOMID_SELF
    3.32  	};
    3.33 -	set_xen_guest_handle(reservation.extent_start, mfn_list);
    3.34 -	if ( unlikely(alloc_index == 0) )
    3.35 -		alloc_index = HYPERVISOR_memory_op(
    3.36 -			XENMEM_increase_reservation, &reservation);
    3.37 -	if ( alloc_index != 0 )
    3.38 -		mfn = mfn_list[--alloc_index];
    3.39 -	return mfn;
    3.40 +
    3.41 +	if (likely(alloc_index >= nr))
    3.42 +		return 0;
    3.43 +
    3.44 +	set_xen_guest_handle(reservation.extent_start, mfn_list + alloc_index);
    3.45 +	reservation.nr_extents = MAX_MFN_ALLOC - alloc_index;
    3.46 +	alloc_index += HYPERVISOR_memory_op(XENMEM_increase_reservation,
    3.47 +					    &reservation);
    3.48 +
    3.49 +	return alloc_index >= nr ? 0 : -ENOMEM;
    3.50  }
    3.51  
    3.52  static inline void maybe_schedule_tx_action(void)
    3.53 @@ -136,12 +147,87 @@ static inline int is_xen_skb(struct sk_b
    3.54  	return (cp == skbuff_cachep);
    3.55  }
    3.56  
    3.57 +static struct sk_buff *netbk_copy_skb(struct sk_buff *skb)
    3.58 +{
    3.59 +	struct skb_shared_info *ninfo;
    3.60 +	struct sk_buff *nskb;
    3.61 +	unsigned long offset;
    3.62 +	int ret;
    3.63 +	int len;
    3.64 +	int headlen;
    3.65 +
    3.66 +	nskb = alloc_skb(SKB_MAX_HEAD(0), GFP_ATOMIC);
    3.67 +	if (unlikely(!nskb))
    3.68 +		goto err;
    3.69 +
    3.70 +	skb_reserve(nskb, 16);
    3.71 +	headlen = nskb->end - nskb->data;
    3.72 +	if (headlen > skb_headlen(skb))
    3.73 +		headlen = skb_headlen(skb);
    3.74 +	ret = skb_copy_bits(skb, 0, __skb_put(nskb, headlen), headlen);
    3.75 +	BUG_ON(ret);
    3.76 +
    3.77 +	ninfo = skb_shinfo(nskb);
    3.78 +	ninfo->gso_size = skb_shinfo(skb)->gso_size;
    3.79 +	ninfo->gso_type = skb_shinfo(skb)->gso_type;
    3.80 +
    3.81 +	offset = headlen;
    3.82 +	len = skb->len - headlen;
    3.83 +
    3.84 +	nskb->len = skb->len;
    3.85 +	nskb->data_len = len;
    3.86 +	nskb->truesize += len;
    3.87 +
    3.88 +	while (len) {
    3.89 +		struct page *page;
    3.90 +		int copy;
    3.91 +		int zero;
    3.92 +
    3.93 +		if (unlikely(ninfo->nr_frags >= MAX_SKB_FRAGS)) {
    3.94 +			dump_stack();
    3.95 +			goto err_free;
    3.96 +		}
    3.97 +
    3.98 +		copy = len >= PAGE_SIZE ? PAGE_SIZE : len;
    3.99 +		zero = len >= PAGE_SIZE ? 0 : __GFP_ZERO;
   3.100 +
   3.101 +		page = alloc_page(GFP_ATOMIC | zero);
   3.102 +		if (unlikely(!page))
   3.103 +			goto err_free;
   3.104 +
   3.105 +		ret = skb_copy_bits(skb, offset, page_address(page), copy);
   3.106 +		BUG_ON(ret);
   3.107 +
   3.108 +		ninfo->frags[ninfo->nr_frags].page = page;
   3.109 +		ninfo->frags[ninfo->nr_frags].page_offset = 0;
   3.110 +		ninfo->frags[ninfo->nr_frags].size = copy;
   3.111 +		ninfo->nr_frags++;
   3.112 +
   3.113 +		offset += copy;
   3.114 +		len -= copy;
   3.115 +	}
   3.116 +
   3.117 +	offset = nskb->data - skb->data;
   3.118 +
   3.119 +	nskb->h.raw = skb->h.raw + offset;
   3.120 +	nskb->nh.raw = skb->nh.raw + offset;
   3.121 +	nskb->mac.raw = skb->mac.raw + offset;
   3.122 +
   3.123 +	return nskb;
   3.124 +
   3.125 + err_free:
   3.126 +	kfree_skb(nskb);
   3.127 + err:
   3.128 +	return NULL;
   3.129 +}
   3.130 +
   3.131  static inline int netbk_queue_full(netif_t *netif)
   3.132  {
   3.133  	RING_IDX peek = netif->rx_req_cons_peek;
   3.134  
   3.135 -	return ((netif->rx.sring->req_prod - peek) <= 0) ||
   3.136 -	       ((netif->rx.rsp_prod_pvt + NET_RX_RING_SIZE - peek) <= 0);
   3.137 +	return ((netif->rx.sring->req_prod - peek) <= MAX_SKB_FRAGS) ||
   3.138 +	       ((netif->rx.rsp_prod_pvt + NET_RX_RING_SIZE - peek) <=
   3.139 +		MAX_SKB_FRAGS);
   3.140  }
   3.141  
   3.142  int netif_be_start_xmit(struct sk_buff *skb, struct net_device *dev)
   3.143 @@ -165,20 +251,12 @@ int netif_be_start_xmit(struct sk_buff *
   3.144  	 * We do not copy the packet unless:
   3.145  	 *  1. The data is shared; or
   3.146  	 *  2. The data is not allocated from our special cache.
   3.147 -	 * NB. We also couldn't cope with fragmented packets, but we won't get
   3.148 -	 *     any because we not advertise the NETIF_F_SG feature.
   3.149 +	 *  3. The data is fragmented.
   3.150  	 */
   3.151 -	if (skb_shared(skb) || skb_cloned(skb) || !is_xen_skb(skb)) {
   3.152 -		int hlen = skb->data - skb->head;
   3.153 -		int ret;
   3.154 -		struct sk_buff *nskb = dev_alloc_skb(hlen + skb->len);
   3.155 +	if (skb_cloned(skb) || skb_is_nonlinear(skb) || !is_xen_skb(skb)) {
   3.156 +		struct sk_buff *nskb = netbk_copy_skb(skb);
   3.157  		if ( unlikely(nskb == NULL) )
   3.158  			goto drop;
   3.159 -		skb_reserve(nskb, hlen);
   3.160 -		__skb_put(nskb, skb->len);
   3.161 -		ret = skb_copy_bits(skb, -hlen, nskb->data - hlen,
   3.162 -				     skb->len + hlen);
   3.163 -		BUG_ON(ret);
   3.164  		/* Copy only the header fields we use in this driver. */
   3.165  		nskb->dev = skb->dev;
   3.166  		nskb->ip_summed = skb->ip_summed;
   3.167 @@ -187,11 +265,12 @@ int netif_be_start_xmit(struct sk_buff *
   3.168  		skb = nskb;
   3.169  	}
   3.170  
   3.171 -	netif->rx_req_cons_peek++;
   3.172 +	netif->rx_req_cons_peek += skb_shinfo(skb)->nr_frags + 1;
   3.173  	netif_get(netif);
   3.174  
   3.175  	if (netbk_can_queue(dev) && netbk_queue_full(netif)) {
   3.176 -		netif->rx.sring->req_event = netif->rx_req_cons_peek + 1;
   3.177 +		netif->rx.sring->req_event = netif->rx_req_cons_peek +
   3.178 +			MAX_SKB_FRAGS + 1;
   3.179  		mb(); /* request notification /then/ check & stop the queue */
   3.180  		if (netbk_queue_full(netif))
   3.181  			netif_stop_queue(dev);
   3.182 @@ -227,116 +306,80 @@ int xen_network_done(void)
   3.183  }
   3.184  #endif
   3.185  
   3.186 -static void net_rx_action(unsigned long unused)
   3.187 +static u16 netbk_gop_frag(netif_t *netif, struct page *page, int count, int i)
   3.188  {
   3.189 -	netif_t *netif = NULL; 
   3.190 -	s8 status;
   3.191 -	u16 size, id, irq, flags;
   3.192 -	multicall_entry_t *mcl;
   3.193 -	mmu_update_t *mmu;
   3.194 -	gnttab_transfer_t *gop;
   3.195 -	unsigned long vdata, old_mfn, new_mfn;
   3.196 -	struct sk_buff_head rxq;
   3.197 -	struct sk_buff *skb;
   3.198 -	int notify_nr = 0;
   3.199 -	int ret;
   3.200 -	/*
   3.201 -	 * Putting hundreds of bytes on the stack is considered rude.
   3.202 -	 * Static works because a tasklet can only be on one CPU at any time.
   3.203 -	 */
   3.204 -	static u16 notify_list[NET_RX_RING_SIZE];
   3.205 +	multicall_entry_t *mcl = rx_mcl + count;
   3.206 +	mmu_update_t *mmu = rx_mmu + count;
   3.207 +	gnttab_transfer_t *gop = grant_rx_op + count;
   3.208 +	netif_rx_request_t *req;
   3.209 +	unsigned long old_mfn, new_mfn;
   3.210  
   3.211 -	skb_queue_head_init(&rxq);
   3.212 +	old_mfn = virt_to_mfn(page_address(page));
   3.213  
   3.214 -	mcl = rx_mcl;
   3.215 -	mmu = rx_mmu;
   3.216 -	gop = grant_rx_op;
   3.217 -
   3.218 -	while ((skb = skb_dequeue(&rx_queue)) != NULL) {
   3.219 -		netif   = netdev_priv(skb->dev);
   3.220 -		vdata   = (unsigned long)skb->data;
   3.221 -		old_mfn = virt_to_mfn(vdata);
   3.222 +	if (!xen_feature(XENFEAT_auto_translated_physmap)) {
   3.223 +		new_mfn = alloc_mfn();
   3.224  
   3.225 -		if (!xen_feature(XENFEAT_auto_translated_physmap)) {
   3.226 -			/* Memory squeeze? Back off for an arbitrary while. */
   3.227 -			if ((new_mfn = alloc_mfn()) == 0) {
   3.228 -				if ( net_ratelimit() )
   3.229 -					WPRINTK("Memory squeeze in netback "
   3.230 -						"driver.\n");
   3.231 -				mod_timer(&net_timer, jiffies + HZ);
   3.232 -				skb_queue_head(&rx_queue, skb);
   3.233 -				break;
   3.234 -			}
   3.235 -			/*
   3.236 -			 * Set the new P2M table entry before reassigning
   3.237 -			 * the old data page. Heed the comment in
   3.238 -			 * pgtable-2level.h:pte_page(). :-)
   3.239 -			 */
   3.240 -			set_phys_to_machine(
   3.241 -				__pa(skb->data) >> PAGE_SHIFT,
   3.242 -				new_mfn);
   3.243 +		/*
   3.244 +		 * Set the new P2M table entry before reassigning
   3.245 +		 * the old data page. Heed the comment in
   3.246 +		 * pgtable-2level.h:pte_page(). :-)
   3.247 +		 */
   3.248 +		set_phys_to_machine(page_to_pfn(page), new_mfn);
   3.249  
   3.250 -			MULTI_update_va_mapping(mcl, vdata,
   3.251 -						pfn_pte_ma(new_mfn,
   3.252 -							   PAGE_KERNEL), 0);
   3.253 -			mcl++;
   3.254 -
   3.255 -			mmu->ptr = ((maddr_t)new_mfn << PAGE_SHIFT) |
   3.256 -				MMU_MACHPHYS_UPDATE;
   3.257 -			mmu->val = __pa(vdata) >> PAGE_SHIFT;
   3.258 -			mmu++;
   3.259 -		}
   3.260 +		MULTI_update_va_mapping(mcl, (unsigned long)page_address(page),
   3.261 +					pfn_pte_ma(new_mfn, PAGE_KERNEL), 0);
   3.262  
   3.263 -		gop->mfn = old_mfn;
   3.264 -		gop->domid = netif->domid;
   3.265 -		gop->ref = RING_GET_REQUEST(
   3.266 -			&netif->rx, netif->rx.req_cons)->gref;
   3.267 -		netif->rx.req_cons++;
   3.268 -		gop++;
   3.269 -
   3.270 -		__skb_queue_tail(&rxq, skb);
   3.271 -
   3.272 -		/* Filled the batch queue? */
   3.273 -		if ((gop - grant_rx_op) == ARRAY_SIZE(grant_rx_op))
   3.274 -			break;
   3.275 +		mmu->ptr = ((maddr_t)new_mfn << PAGE_SHIFT) |
   3.276 +			MMU_MACHPHYS_UPDATE;
   3.277 +		mmu->val = page_to_pfn(page);
   3.278  	}
   3.279  
   3.280 -	if (!xen_feature(XENFEAT_auto_translated_physmap)) {
   3.281 -		if (mcl == rx_mcl)
   3.282 -			return;
   3.283 -
   3.284 -		mcl[-1].args[MULTI_UVMFLAGS_INDEX] = UVMF_TLB_FLUSH|UVMF_ALL;
   3.285 +	req = RING_GET_REQUEST(&netif->rx, netif->rx.req_cons + i);
   3.286 +	gop->mfn = old_mfn;
   3.287 +	gop->domid = netif->domid;
   3.288 +	gop->ref = req->gref;
   3.289 +	return req->id;
   3.290 +}
   3.291  
   3.292 -		if (mmu - rx_mmu) {
   3.293 -			mcl->op = __HYPERVISOR_mmu_update;
   3.294 -			mcl->args[0] = (unsigned long)rx_mmu;
   3.295 -			mcl->args[1] = mmu - rx_mmu;
   3.296 -			mcl->args[2] = 0;
   3.297 -			mcl->args[3] = DOMID_SELF;
   3.298 -			mcl++;
   3.299 -		}
   3.300 +static void netbk_gop_skb(struct sk_buff *skb, struct netbk_rx_meta *meta,
   3.301 +			  int count)
   3.302 +{
   3.303 +	netif_t *netif = netdev_priv(skb->dev);
   3.304 +	int nr_frags = skb_shinfo(skb)->nr_frags;
   3.305 +	int i;
   3.306  
   3.307 -		ret = HYPERVISOR_multicall(rx_mcl, mcl - rx_mcl);
   3.308 -		BUG_ON(ret != 0);
   3.309 +	for (i = 0; i < nr_frags; i++) {
   3.310 +		meta[++count].frag = skb_shinfo(skb)->frags[i];
   3.311 +		meta[count].id = netbk_gop_frag(netif, meta[count].frag.page,
   3.312 +						count, i + 1);
   3.313  	}
   3.314  
   3.315 -	ret = HYPERVISOR_grant_table_op(GNTTABOP_transfer, grant_rx_op, 
   3.316 -					gop - grant_rx_op);
   3.317 -	BUG_ON(ret != 0);
   3.318 +	/*
   3.319 +	 * This must occur at the end to ensure that we don't trash
   3.320 +	 * skb_shinfo until we're done.
   3.321 +	 */
   3.322 +	meta[count - nr_frags].id = netbk_gop_frag(netif,
   3.323 +						   virt_to_page(skb->data),
   3.324 +						   count - nr_frags, 0);
   3.325 +	netif->rx.req_cons += nr_frags + 1;
   3.326 +}
   3.327  
   3.328 -	mcl = rx_mcl;
   3.329 -	gop = grant_rx_op;
   3.330 -	while ((skb = __skb_dequeue(&rxq)) != NULL) {
   3.331 -		netif   = netdev_priv(skb->dev);
   3.332 -		size    = skb->tail - skb->data;
   3.333 +static inline void netbk_free_pages(int nr_frags, struct netbk_rx_meta *meta)
   3.334 +{
   3.335 +	int i;
   3.336 +
   3.337 +	for (i = 0; i < nr_frags; i++)
   3.338 +		put_page(meta[i].frag.page);
   3.339 +}
   3.340  
   3.341 -		atomic_set(&(skb_shinfo(skb)->dataref), 1);
   3.342 -		skb_shinfo(skb)->nr_frags = 0;
   3.343 -		skb_shinfo(skb)->frag_list = NULL;
   3.344 +static int netbk_check_gop(int nr_frags, domid_t domid, int count)
   3.345 +{
   3.346 +	multicall_entry_t *mcl = rx_mcl + count;
   3.347 +	gnttab_transfer_t *gop = grant_rx_op + count;
   3.348 +	int status = NETIF_RSP_OKAY;
   3.349 +	int i;
   3.350  
   3.351 -		netif->stats.tx_bytes += size;
   3.352 -		netif->stats.tx_packets++;
   3.353 -
   3.354 +	for (i = 0; i <= nr_frags; i++) {
   3.355  		if (!xen_feature(XENFEAT_auto_translated_physmap)) {
   3.356  			/* The update_va_mapping() must not fail. */
   3.357  			BUG_ON(mcl->result != 0);
   3.358 @@ -344,10 +387,9 @@ static void net_rx_action(unsigned long 
   3.359  		}
   3.360  
   3.361  		/* Check the reassignment error code. */
   3.362 -		status = NETIF_RSP_OKAY;
   3.363  		if (gop->status != 0) { 
   3.364  			DPRINTK("Bad status %d from grant transfer to DOM%u\n",
   3.365 -				gop->status, netif->domid);
   3.366 +				gop->status, domid);
   3.367  			/*
   3.368  			 * Page no longer belongs to us unless GNTST_bad_page,
   3.369  			 * but that should be a fatal error anyway.
   3.370 @@ -355,17 +397,128 @@ static void net_rx_action(unsigned long 
   3.371  			BUG_ON(gop->status == GNTST_bad_page);
   3.372  			status = NETIF_RSP_ERROR; 
   3.373  		}
   3.374 -		irq = netif->irq;
   3.375 -		id = RING_GET_REQUEST(&netif->rx, netif->rx.rsp_prod_pvt)->id;
   3.376 -		flags = 0;
   3.377 +		gop++;
   3.378 +	}
   3.379 +
   3.380 +	return status;
   3.381 +}
   3.382 +
   3.383 +static void netbk_add_frag_responses(netif_t *netif, int status,
   3.384 +				     struct netbk_rx_meta *meta, int nr_frags)
   3.385 +{
   3.386 +	int i;
   3.387 +
   3.388 +	for (i = 0; i < nr_frags; i++) {
   3.389 +		int id = meta[i].id;
   3.390 +		int flags = (i == nr_frags - 1) ? 0 : NETRXF_more_data;
   3.391 +
   3.392 +		make_rx_response(netif, id, status, meta[i].frag.page_offset,
   3.393 +				 meta[i].frag.size, flags);
   3.394 +	}
   3.395 +}
   3.396 +
   3.397 +static void net_rx_action(unsigned long unused)
   3.398 +{
   3.399 +	netif_t *netif = NULL; 
   3.400 +	s8 status;
   3.401 +	u16 id, irq, flags;
   3.402 +	multicall_entry_t *mcl;
   3.403 +	struct sk_buff_head rxq;
   3.404 +	struct sk_buff *skb;
   3.405 +	int notify_nr = 0;
   3.406 +	int ret;
   3.407 +	int nr_frags;
   3.408 +	int count;
   3.409 +
   3.410 +	/*
   3.411 +	 * Putting hundreds of bytes on the stack is considered rude.
   3.412 +	 * Static works because a tasklet can only be on one CPU at any time.
   3.413 +	 */
   3.414 +	static u16 notify_list[NET_RX_RING_SIZE];
   3.415 +	static struct netbk_rx_meta meta[NET_RX_RING_SIZE];
   3.416 +
   3.417 +	skb_queue_head_init(&rxq);
   3.418 +
   3.419 +	count = 0;
   3.420 +
   3.421 +	while ((skb = skb_dequeue(&rx_queue)) != NULL) {
   3.422 +		nr_frags = skb_shinfo(skb)->nr_frags;
   3.423 +		*(int *)skb->cb = nr_frags;
   3.424 +
   3.425 +		if (!xen_feature(XENFEAT_auto_translated_physmap) &&
   3.426 +		    check_mfn(nr_frags + 1)) {
   3.427 +			/* Memory squeeze? Back off for an arbitrary while. */
   3.428 +			if ( net_ratelimit() )
   3.429 +				WPRINTK("Memory squeeze in netback "
   3.430 +					"driver.\n");
   3.431 +			mod_timer(&net_timer, jiffies + HZ);
   3.432 +			skb_queue_head(&rx_queue, skb);
   3.433 +			break;
   3.434 +		}
   3.435 +
   3.436 +		netbk_gop_skb(skb, meta, count);
   3.437 +
   3.438 +		count += nr_frags + 1;
   3.439 +
   3.440 +		__skb_queue_tail(&rxq, skb);
   3.441 +
   3.442 +		/* Filled the batch queue? */
   3.443 +		if (count + MAX_SKB_FRAGS >= NET_RX_RING_SIZE)
   3.444 +			break;
   3.445 +	}
   3.446 +
   3.447 +	if (!count)
   3.448 +		return;
   3.449 +
   3.450 +	if (!xen_feature(XENFEAT_auto_translated_physmap)) {
   3.451 +		mcl = rx_mcl + count;
   3.452 +
   3.453 +		mcl[-1].args[MULTI_UVMFLAGS_INDEX] = UVMF_TLB_FLUSH|UVMF_ALL;
   3.454 +
   3.455 +		mcl->op = __HYPERVISOR_mmu_update;
   3.456 +		mcl->args[0] = (unsigned long)rx_mmu;
   3.457 +		mcl->args[1] = count;
   3.458 +		mcl->args[2] = 0;
   3.459 +		mcl->args[3] = DOMID_SELF;
   3.460 +
   3.461 +		ret = HYPERVISOR_multicall(rx_mcl, count + 1);
   3.462 +		BUG_ON(ret != 0);
   3.463 +	}
   3.464 +
   3.465 +	ret = HYPERVISOR_grant_table_op(GNTTABOP_transfer, grant_rx_op, count);
   3.466 +	BUG_ON(ret != 0);
   3.467 +
   3.468 +	count = 0;
   3.469 +	while ((skb = __skb_dequeue(&rxq)) != NULL) {
   3.470 +		nr_frags = *(int *)skb->cb;
   3.471 +
   3.472 +		atomic_set(&(skb_shinfo(skb)->dataref), 1);
   3.473 +		skb_shinfo(skb)->nr_frags = 0;
   3.474 +		skb_shinfo(skb)->frag_list = NULL;
   3.475 +
   3.476 +		netif = netdev_priv(skb->dev);
   3.477 +		netif->stats.tx_bytes += skb->len;
   3.478 +		netif->stats.tx_packets++;
   3.479 +
   3.480 +		netbk_free_pages(nr_frags, meta + count + 1);
   3.481 +		status = netbk_check_gop(nr_frags, netif->domid, count);
   3.482 +
   3.483 +		id = meta[count].id;
   3.484 +		flags = nr_frags ? NETRXF_more_data : 0;
   3.485 +
   3.486  		if (skb->ip_summed == CHECKSUM_HW) /* local packet? */
   3.487  			flags |= NETRXF_csum_blank | NETRXF_data_validated;
   3.488  		else if (skb->proto_data_valid) /* remote but checksummed? */
   3.489  			flags |= NETRXF_data_validated;
   3.490 -		if (make_rx_response(netif, id, status,
   3.491 -				     (unsigned long)skb->data & ~PAGE_MASK,
   3.492 -				     size, flags) &&
   3.493 -		    (rx_notify[irq] == 0)) {
   3.494 +
   3.495 +		make_rx_response(netif, id, status, offset_in_page(skb->data),
   3.496 +				 skb_headlen(skb), flags);
   3.497 +		netbk_add_frag_responses(netif, status, meta + count + 1,
   3.498 +					 nr_frags);
   3.499 +
   3.500 +		RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&netif->rx, ret);
   3.501 +		irq = netif->irq;
   3.502 +		if (ret && !rx_notify[irq]) {
   3.503  			rx_notify[irq] = 1;
   3.504  			notify_list[notify_nr++] = irq;
   3.505  		}
   3.506 @@ -376,7 +529,7 @@ static void net_rx_action(unsigned long 
   3.507  
   3.508  		netif_put(netif);
   3.509  		dev_kfree_skb(skb);
   3.510 -		gop++;
   3.511 +		count += nr_frags + 1;
   3.512  	}
   3.513  
   3.514  	while (notify_nr != 0) {
   3.515 @@ -1046,7 +1199,6 @@ static int make_rx_response(netif_t *net
   3.516  {
   3.517  	RING_IDX i = netif->rx.rsp_prod_pvt;
   3.518  	netif_rx_response_t *resp;
   3.519 -	int notify;
   3.520  
   3.521  	resp = RING_GET_RESPONSE(&netif->rx, i);
   3.522  	resp->offset     = offset;
   3.523 @@ -1057,9 +1209,8 @@ static int make_rx_response(netif_t *net
   3.524  		resp->status = (s16)st;
   3.525  
   3.526  	netif->rx.rsp_prod_pvt = ++i;
   3.527 -	RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&netif->rx, notify);
   3.528  
   3.529 -	return notify;
   3.530 +	return 0;
   3.531  }
   3.532  
   3.533  #ifdef NETBE_DEBUG_INTERRUPT
     4.1 --- a/linux-2.6-xen-sparse/drivers/xen/netback/xenbus.c	Mon Jul 31 17:35:43 2006 +0100
     4.2 +++ b/linux-2.6-xen-sparse/drivers/xen/netback/xenbus.c	Mon Jul 31 17:42:13 2006 +0100
     4.3 @@ -377,6 +377,13 @@ static int connect_rings(struct backend_
     4.4  		/* Must be non-zero for pfifo_fast to work. */
     4.5  		be->netif->dev->tx_queue_len = 1;
     4.6  
     4.7 +	if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-sg", "%d", &val) < 0)
     4.8 +		val = 0;
     4.9 +	if (val) {
    4.10 +		be->netif->features |= NETIF_F_SG;
    4.11 +		be->netif->dev->features |= NETIF_F_SG;
    4.12 +	}
    4.13 +
    4.14  	/* Map the shared frame, irq etc. */
    4.15  	err = netif_map(be->netif, tx_ring_ref, rx_ring_ref, evtchn);
    4.16  	if (err) {
     5.1 --- a/xen/include/public/io/netif.h	Mon Jul 31 17:35:43 2006 +0100
     5.2 +++ b/xen/include/public/io/netif.h	Mon Jul 31 17:42:13 2006 +0100
     5.3 @@ -124,6 +124,10 @@ typedef struct netif_rx_request netif_rx
     5.4  #define _NETRXF_csum_blank     (1)
     5.5  #define  NETRXF_csum_blank     (1U<<_NETRXF_csum_blank)
     5.6  
     5.7 +/* Packet continues in the next request descriptor. */
     5.8 +#define _NETRXF_more_data      (2)
     5.9 +#define  NETRXF_more_data      (1U<<_NETRXF_more_data)
    5.10 +
    5.11  struct netif_rx_response {
    5.12      uint16_t id;
    5.13      uint16_t offset;       /* Offset in page of start of received packet  */