ia64/xen-unstable

changeset 11168:d4efff1beedb

[NET] front: Allow packets to be copied on the receive path instead of flipped.
Signed-off-by: Steven Smith <ssmith@xensource.com>
Signed-off-by: Keir Fraser <keir@xensource.com>
author kfraser@localhost.localdomain
date Wed Aug 16 16:11:53 2006 +0100 (2006-08-16)
parents 6217dbbba0af
children 4b145f81fce4
files linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c
line diff
     1.1 --- a/linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c	Wed Aug 16 16:11:12 2006 +0100
     1.2 +++ b/linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c	Wed Aug 16 16:11:53 2006 +0100
     1.3 @@ -83,6 +83,7 @@ struct netfront_info {
     1.4  
     1.5  	unsigned int handle;
     1.6  	unsigned int evtchn, irq;
     1.7 +	unsigned int copying_receiver;
     1.8  
     1.9  	/* Receive-ring batched refills. */
    1.10  #define RX_MIN_TARGET 8
    1.11 @@ -171,7 +172,7 @@ static inline grant_ref_t xennet_get_rx_
    1.12  
    1.13  static int talk_to_backend(struct xenbus_device *, struct netfront_info *);
    1.14  static int setup_device(struct xenbus_device *, struct netfront_info *);
    1.15 -static struct net_device *create_netdev(int, struct xenbus_device *);
    1.16 +static struct net_device *create_netdev(int, int, struct xenbus_device *);
    1.17  
    1.18  static void netfront_closing(struct xenbus_device *);
    1.19  
    1.20 @@ -213,6 +214,7 @@ static int __devinit netfront_probe(stru
    1.21  	struct net_device *netdev;
    1.22  	struct netfront_info *info;
    1.23  	unsigned int handle;
    1.24 +	unsigned feature_rx_copy;
    1.25  
    1.26  	err = xenbus_scanf(XBT_NIL, dev->nodename, "handle", "%u", &handle);
    1.27  	if (err != 1) {
    1.28 @@ -220,7 +222,22 @@ static int __devinit netfront_probe(stru
    1.29  		return err;
    1.30  	}
    1.31  
    1.32 -	netdev = create_netdev(handle, dev);
    1.33 +#ifndef CONFIG_XEN
    1.34 +	err = xenbus_scanf(XBT_NIL, dev->otherend, "feature-rx-copy", "%u",
    1.35 +			   &feature_rx_copy);
    1.36 +	if (err != 1) {
    1.37 +		xenbus_dev_fatal(dev, err, "reading feature-rx-copy");
    1.38 +		return err;
    1.39 +	}
    1.40 +	if (!feature_rx_copy) {
    1.41 +		xenbus_dev_fatal(dev, 0, "need a copy-capable backend");
    1.42 +		return -EINVAL;
    1.43 +	}
    1.44 +#else
    1.45 +	feature_rx_copy = 0;
    1.46 +#endif
    1.47 +
    1.48 +	netdev = create_netdev(handle, feature_rx_copy, dev);
    1.49  	if (IS_ERR(netdev)) {
    1.50  		err = PTR_ERR(netdev);
    1.51  		xenbus_dev_fatal(dev, err, "creating netdev");
    1.52 @@ -326,6 +343,13 @@ again:
    1.53  		goto abort_transaction;
    1.54  	}
    1.55  
    1.56 +	err = xenbus_printf(xbt, dev->nodename, "request-rx-copy", "%u",
    1.57 +			    info->copying_receiver);
    1.58 +	if (err) {
    1.59 +		message = "writing request-rx-copy";
    1.60 +		goto abort_transaction;
    1.61 +	}
    1.62 +
    1.63  	err = xenbus_printf(xbt, dev->nodename, "feature-rx-notify", "%d", 1);
    1.64  	if (err) {
    1.65  		message = "writing feature-rx-notify";
    1.66 @@ -597,6 +621,8 @@ static void network_alloc_rx_buffers(str
    1.67  	grant_ref_t ref;
    1.68   	unsigned long pfn;
    1.69   	void *vaddr;
    1.70 +	int nr_flips;
    1.71 +	netif_rx_request_t *req;
    1.72  
    1.73  	if (unlikely(!netif_carrier_ok(dev)))
    1.74  		return;
    1.75 @@ -652,7 +678,7 @@ no_skb:
    1.76  		np->rx_target = np->rx_max_target;
    1.77  
    1.78   refill:
    1.79 -	for (i = 0; ; i++) {
    1.80 +	for (nr_flips = i = 0; ; i++) {
    1.81  		if ((skb = __skb_dequeue(&np->rx_batch)) == NULL)
    1.82  			break;
    1.83  
    1.84 @@ -663,7 +689,6 @@ no_skb:
    1.85  		BUG_ON(np->rx_skbs[id]);
    1.86  		np->rx_skbs[id] = skb;
    1.87  
    1.88 -		RING_GET_REQUEST(&np->rx, req_prod + i)->id = id;
    1.89  		ref = gnttab_claim_grant_reference(&np->gref_rx_head);
    1.90  		BUG_ON((signed short)ref < 0);
    1.91  		np->grant_rx_ref[id] = ref;
    1.92 @@ -671,49 +696,67 @@ no_skb:
    1.93  		pfn = page_to_pfn(skb_shinfo(skb)->frags[0].page);
    1.94  		vaddr = page_address(skb_shinfo(skb)->frags[0].page);
    1.95  
    1.96 -		gnttab_grant_foreign_transfer_ref(ref,
    1.97 -						  np->xbdev->otherend_id, pfn);
    1.98 -		RING_GET_REQUEST(&np->rx, req_prod + i)->gref = ref;
    1.99 -		np->rx_pfn_array[i] = pfn_to_mfn(pfn);
   1.100 +		req = RING_GET_REQUEST(&np->rx, req_prod + i);
   1.101 +		if (!np->copying_receiver) {
   1.102 +			gnttab_grant_foreign_transfer_ref(ref,
   1.103 +							  np->xbdev->otherend_id,
   1.104 +							  pfn);
   1.105 +			np->rx_pfn_array[nr_flips] = pfn_to_mfn(pfn);
   1.106 +			if (!xen_feature(XENFEAT_auto_translated_physmap)) {
   1.107 +				/* Remove this page before passing
   1.108 +				 * back to Xen. */
   1.109 +				set_phys_to_machine(pfn, INVALID_P2M_ENTRY);
   1.110 +				MULTI_update_va_mapping(np->rx_mcl+i,
   1.111 +							(unsigned long)vaddr,
   1.112 +							__pte(0), 0);
   1.113 +			}
   1.114 +		} else {
   1.115 +			gnttab_grant_foreign_access_ref(ref,
   1.116 +							np->xbdev->otherend_id,
   1.117 +							pfn,
   1.118 +							0);
   1.119 +		}
   1.120  
   1.121 -		if (!xen_feature(XENFEAT_auto_translated_physmap)) {
   1.122 -			/* Remove this page before passing back to Xen. */
   1.123 -			set_phys_to_machine(pfn, INVALID_P2M_ENTRY);
   1.124 -			MULTI_update_va_mapping(np->rx_mcl+i,
   1.125 -						(unsigned long)vaddr,
   1.126 -						__pte(0), 0);
   1.127 -		}
   1.128 +		req->id = id;
   1.129 +		req->gref = ref;
   1.130  	}
   1.131  
   1.132 -	/* Tell the ballon driver what is going on. */
   1.133 -	balloon_update_driver_allowance(i);
   1.134 +	if ( nr_flips != 0 ) {
   1.135 +		/* Tell the ballon driver what is going on. */
   1.136 +		balloon_update_driver_allowance(i);
   1.137  
   1.138 -	set_xen_guest_handle(reservation.extent_start, np->rx_pfn_array);
   1.139 -	reservation.nr_extents   = i;
   1.140 -	reservation.extent_order = 0;
   1.141 -	reservation.address_bits = 0;
   1.142 -	reservation.domid        = DOMID_SELF;
   1.143 +		set_xen_guest_handle(reservation.extent_start,
   1.144 +				     np->rx_pfn_array);
   1.145 +		reservation.nr_extents   = nr_flips;
   1.146 +		reservation.extent_order = 0;
   1.147 +		reservation.address_bits = 0;
   1.148 +		reservation.domid        = DOMID_SELF;
   1.149  
   1.150 -	if (!xen_feature(XENFEAT_auto_translated_physmap)) {
   1.151 -		/* After all PTEs have been zapped, flush the TLB. */
   1.152 -		np->rx_mcl[i-1].args[MULTI_UVMFLAGS_INDEX] =
   1.153 -			UVMF_TLB_FLUSH|UVMF_ALL;
   1.154 +		if (!xen_feature(XENFEAT_auto_translated_physmap)) {
   1.155 +			/* After all PTEs have been zapped, flush the TLB. */
   1.156 +			np->rx_mcl[i-1].args[MULTI_UVMFLAGS_INDEX] =
   1.157 +				UVMF_TLB_FLUSH|UVMF_ALL;
   1.158  
   1.159 -		/* Give away a batch of pages. */
   1.160 -		np->rx_mcl[i].op = __HYPERVISOR_memory_op;
   1.161 -		np->rx_mcl[i].args[0] = XENMEM_decrease_reservation;
   1.162 -		np->rx_mcl[i].args[1] = (unsigned long)&reservation;
   1.163 +			/* Give away a batch of pages. */
   1.164 +			np->rx_mcl[i].op = __HYPERVISOR_memory_op;
   1.165 +			np->rx_mcl[i].args[0] = XENMEM_decrease_reservation;
   1.166 +			np->rx_mcl[i].args[1] = (unsigned long)&reservation;
   1.167  
   1.168 -		/* Zap PTEs and give away pages in one big multicall. */
   1.169 -		(void)HYPERVISOR_multicall(np->rx_mcl, i+1);
   1.170 +			/* Zap PTEs and give away pages in one big
   1.171 +			 * multicall. */
   1.172 +			(void)HYPERVISOR_multicall(np->rx_mcl, i+1);
   1.173  
   1.174 -		/* Check return status of HYPERVISOR_memory_op(). */
   1.175 -		if (unlikely(np->rx_mcl[i].result != i))
   1.176 -			panic("Unable to reduce memory reservation\n");
   1.177 -	} else
   1.178 -		if (HYPERVISOR_memory_op(XENMEM_decrease_reservation,
   1.179 -					 &reservation) != i)
   1.180 -			panic("Unable to reduce memory reservation\n");
   1.181 +			/* Check return status of HYPERVISOR_memory_op(). */
   1.182 +			if (unlikely(np->rx_mcl[i].result != i))
   1.183 +				panic("Unable to reduce memory reservation\n");
   1.184 +		} else {
   1.185 +			if (HYPERVISOR_memory_op(XENMEM_decrease_reservation,
   1.186 +						 &reservation) != i)
   1.187 +				panic("Unable to reduce memory reservation\n");
   1.188 +		}
   1.189 +	} else {
   1.190 +		wmb();
   1.191 +	}
   1.192  
   1.193  	/* Above is a suitable barrier to ensure backend will see requests. */
   1.194  	np->rx.req_prod_pvt = req_prod + i;
   1.195 @@ -961,10 +1004,11 @@ int xennet_get_extras(struct netfront_in
   1.196  
   1.197  static int xennet_get_responses(struct netfront_info *np,
   1.198  				struct netfront_rx_info *rinfo, RING_IDX rp,
   1.199 -				struct sk_buff_head *list, int count)
   1.200 +				struct sk_buff_head *list, int *mcl_offset_p)
   1.201  {
   1.202 -	struct mmu_update *mmu = np->rx_mmu + count;
   1.203 -	struct multicall_entry *mcl = np->rx_mcl + count;
   1.204 +	int mcl_offset = *mcl_offset_p;
   1.205 +	struct mmu_update *mmu;
   1.206 +	struct multicall_entry *mcl;
   1.207  	struct netif_rx_response *rx = &rinfo->rx;
   1.208  	struct netif_extra_info *extras = rinfo->extras;
   1.209  	RING_IDX cons = np->rx.rsp_cons;
   1.210 @@ -973,6 +1017,7 @@ static int xennet_get_responses(struct n
   1.211  	int max = MAX_SKB_FRAGS + (rx->status <= RX_COPY_THRESHOLD);
   1.212  	int frags = 1;
   1.213  	int err = 0;
   1.214 +	unsigned long ret;
   1.215  
   1.216  	if (rx->flags & NETRXF_extra_info) {
   1.217  		err = xennet_get_extras(np, extras, rp);
   1.218 @@ -988,6 +1033,7 @@ static int xennet_get_responses(struct n
   1.219  				WPRINTK("rx->offset: %x, size: %u\n",
   1.220  					rx->offset, rx->status);
   1.221  			err = -EINVAL;
   1.222 +			goto next;
   1.223  		}
   1.224  
   1.225  		/*
   1.226 @@ -1001,36 +1047,48 @@ static int xennet_get_responses(struct n
   1.227  			goto next;
   1.228  		}
   1.229  
   1.230 -		/* Memory pressure, insufficient buffer headroom, ... */
   1.231 -		if ((mfn = gnttab_end_foreign_transfer_ref(ref)) == 0) {
   1.232 -			if (net_ratelimit())
   1.233 -				WPRINTK("Unfulfilled rx req (id=%d, st=%d).\n",
   1.234 -					rx->id, rx->status);
   1.235 -			xennet_move_rx_slot(np, skb, ref);
   1.236 -			err = -ENOMEM;
   1.237 -			goto next;
   1.238 +		if (!np->copying_receiver) {
   1.239 +			/* Memory pressure, insufficient buffer
   1.240 +			 * headroom, ... */
   1.241 +			if (!(mfn = gnttab_end_foreign_transfer_ref(ref))) {
   1.242 +				if (net_ratelimit())
   1.243 +					WPRINTK("Unfulfilled rx req (id=%d, st=%d).\n",
   1.244 +						rx->id, rx->status);
   1.245 +				xennet_move_rx_slot(np, skb, ref);
   1.246 +				err = -ENOMEM;
   1.247 +				goto next;
   1.248 +			}
   1.249 +
   1.250 +			if (!xen_feature(XENFEAT_auto_translated_physmap)) {
   1.251 +				/* Remap the page. */
   1.252 +				struct page *page =
   1.253 +					skb_shinfo(skb)->frags[0].page;
   1.254 +				unsigned long pfn = page_to_pfn(page);
   1.255 +				void *vaddr = page_address(page);
   1.256 +
   1.257 +				mcl = np->rx_mcl + mcl_offset;
   1.258 +				mmu = np->rx_mmu + mcl_offset;
   1.259 +
   1.260 +				MULTI_update_va_mapping(mcl,
   1.261 +							(unsigned long)vaddr,
   1.262 +							pfn_pte_ma(mfn,
   1.263 +								   PAGE_KERNEL),
   1.264 +							0);
   1.265 +				mmu->ptr = ((maddr_t)mfn << PAGE_SHIFT)
   1.266 +					| MMU_MACHPHYS_UPDATE;
   1.267 +				mmu->val = pfn;
   1.268 +
   1.269 +				mcl_offset++;
   1.270 +
   1.271 +				set_phys_to_machine(pfn, mfn);
   1.272 +			}
   1.273 +		} else {
   1.274 +			ret = gnttab_end_foreign_access_ref(ref, 0);
   1.275 +			BUG_ON(!ret);
   1.276  		}
   1.277  
   1.278  		gnttab_release_grant_reference(&np->gref_rx_head, ref);
   1.279  
   1.280 -		if (!xen_feature(XENFEAT_auto_translated_physmap)) {
   1.281 -			/* Remap the page. */
   1.282 -			struct page *page = skb_shinfo(skb)->frags[0].page;
   1.283 -			unsigned long pfn = page_to_pfn(page);
   1.284 -			void *vaddr = page_address(page);
   1.285 -
   1.286 -			MULTI_update_va_mapping(mcl, (unsigned long)vaddr,
   1.287 -						pfn_pte_ma(mfn, PAGE_KERNEL),
   1.288 -						0);
   1.289 -			mcl++;
   1.290 -			mmu->ptr = ((maddr_t)mfn << PAGE_SHIFT)
   1.291 -				| MMU_MACHPHYS_UPDATE;
   1.292 -			mmu->val = pfn;
   1.293 -			mmu++;
   1.294 -
   1.295 -			set_phys_to_machine(pfn, mfn);
   1.296 -		}
   1.297 -
   1.298  		__skb_queue_tail(list, skb);
   1.299  
   1.300  next:
   1.301 @@ -1056,6 +1114,8 @@ next:
   1.302  		err = -E2BIG;
   1.303  	}
   1.304  
   1.305 +	*mcl_offset_p = mcl_offset;
   1.306 +
   1.307  	return err;
   1.308  }
   1.309  
   1.310 @@ -1155,8 +1215,7 @@ static int netif_poll(struct net_device 
   1.311  		memcpy(rx, RING_GET_RESPONSE(&np->rx, i), sizeof(*rx));
   1.312  		memset(extras, 0, sizeof(extras));
   1.313  
   1.314 -		err = xennet_get_responses(np, &rinfo, rp, &tmpq, pages_done);
   1.315 -		pages_done += skb_queue_len(&tmpq);
   1.316 +		err = xennet_get_responses(np, &rinfo, rp, &tmpq, &pages_done);
   1.317  
   1.318  		if (unlikely(err)) {
   1.319  err:
   1.320 @@ -1383,6 +1442,7 @@ static void network_connect(struct net_d
   1.321  	int i, requeue_idx;
   1.322  	struct sk_buff *skb;
   1.323  	grant_ref_t ref;
   1.324 +	netif_rx_request_t *req;
   1.325  
   1.326  	xennet_set_features(dev);
   1.327  
   1.328 @@ -1390,12 +1450,12 @@ static void network_connect(struct net_d
   1.329  	spin_lock(&np->rx_lock);
   1.330  
   1.331  	/*
   1.332 -         * Recovery procedure:
   1.333 +	 * Recovery procedure:
   1.334  	 *  NB. Freelist index entries are always going to be less than
   1.335  	 *  PAGE_OFFSET, whereas pointers to skbs will always be equal or
   1.336  	 *  greater than PAGE_OFFSET: we use this property to distinguish
   1.337  	 *  them.
   1.338 -         */
   1.339 +	 */
   1.340  
   1.341  	/* Step 1: Discard all pending TX packet fragments. */
   1.342  	for (requeue_idx = 0, i = 1; i <= NET_TX_RING_SIZE; i++) {
   1.343 @@ -1419,13 +1479,20 @@ static void network_connect(struct net_d
   1.344  
   1.345  		skb = np->rx_skbs[requeue_idx] = xennet_get_rx_skb(np, i);
   1.346  		ref = np->grant_rx_ref[requeue_idx] = xennet_get_rx_ref(np, i);
   1.347 +		req = RING_GET_REQUEST(&np->rx, requeue_idx);
   1.348  
   1.349 -		gnttab_grant_foreign_transfer_ref(
   1.350 -			ref, np->xbdev->otherend_id,
   1.351 -			page_to_pfn(skb_shinfo(skb)->frags->page));
   1.352 -
   1.353 -		RING_GET_REQUEST(&np->rx, requeue_idx)->gref = ref;
   1.354 -		RING_GET_REQUEST(&np->rx, requeue_idx)->id   = requeue_idx;
   1.355 +		if (!np->copying_receiver) {
   1.356 +			gnttab_grant_foreign_transfer_ref(
   1.357 +				ref, np->xbdev->otherend_id,
   1.358 +				page_to_pfn(skb_shinfo(skb)->frags->page));
   1.359 +		} else {
   1.360 +			gnttab_grant_foreign_access_ref(
   1.361 +				ref, np->xbdev->otherend_id,
   1.362 +				page_to_pfn(skb_shinfo(skb)->frags->page),
   1.363 +				0);
   1.364 +		}
   1.365 +		req->gref = ref;
   1.366 +		req->id   = requeue_idx;
   1.367  
   1.368  		requeue_idx++;
   1.369  	}
   1.370 @@ -1608,13 +1675,8 @@ static void network_set_multicast_list(s
   1.371  {
   1.372  }
   1.373  
   1.374 -/** Create a network device.
   1.375 - * @param handle device handle
   1.376 - * @param val return parameter for created device
   1.377 - * @return 0 on success, error code otherwise
   1.378 - */
   1.379 -static struct net_device * __devinit create_netdev(int handle,
   1.380 -						   struct xenbus_device *dev)
   1.381 +static struct net_device * __devinit
   1.382 +create_netdev(int handle, int copying_receiver, struct xenbus_device *dev)
   1.383  {
   1.384  	int i, err = 0;
   1.385  	struct net_device *netdev = NULL;
   1.386 @@ -1627,9 +1689,10 @@ static struct net_device * __devinit cre
   1.387  		return ERR_PTR(-ENOMEM);
   1.388  	}
   1.389  
   1.390 -	np                = netdev_priv(netdev);
   1.391 -	np->handle        = handle;
   1.392 -	np->xbdev         = dev;
   1.393 +	np                   = netdev_priv(netdev);
   1.394 +	np->handle           = handle;
   1.395 +	np->xbdev            = dev;
   1.396 +	np->copying_receiver = copying_receiver;
   1.397  
   1.398  	netif_carrier_off(netdev);
   1.399