direct-io.hg

view linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c @ 11142:ebd289e3d205

[NET] front: Fix features on resume when csum is off

When the netfront driver is resumed the features are renegotiated with
the backend. However, I forgot take into account the status of the TX
checksum setting. When TX checksum is disabled by the user, we cannot
enable SG or TSO since both require checksum offload. This patch
makes
xennet check the checksum setting before renegotiating SG or TSO.

This bug was fixed thanks to a report from Anton Burtsev.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
author kfraser@localhost.localdomain
date Wed Aug 16 14:26:59 2006 +0100 (2006-08-16)
parents d48842f924d0
children d4efff1beedb
line source
1 /******************************************************************************
2 * Virtual network driver for conversing with remote driver backends.
3 *
4 * Copyright (c) 2002-2005, K A Fraser
5 * Copyright (c) 2005, XenSource Ltd
6 *
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License version 2
9 * as published by the Free Software Foundation; or, when distributed
10 * separately from the Linux kernel or incorporated into other
11 * software packages, subject to the following license:
12 *
13 * Permission is hereby granted, free of charge, to any person obtaining a copy
14 * of this source file (the "Software"), to deal in the Software without
15 * restriction, including without limitation the rights to use, copy, modify,
16 * merge, publish, distribute, sublicense, and/or sell copies of the Software,
17 * and to permit persons to whom the Software is furnished to do so, subject to
18 * the following conditions:
19 *
20 * The above copyright notice and this permission notice shall be included in
21 * all copies or substantial portions of the Software.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
24 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
25 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
26 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
27 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
28 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
29 * IN THE SOFTWARE.
30 */
32 #include <linux/config.h>
33 #include <linux/module.h>
34 #include <linux/version.h>
35 #include <linux/kernel.h>
36 #include <linux/sched.h>
37 #include <linux/slab.h>
38 #include <linux/string.h>
39 #include <linux/errno.h>
40 #include <linux/netdevice.h>
41 #include <linux/inetdevice.h>
42 #include <linux/etherdevice.h>
43 #include <linux/skbuff.h>
44 #include <linux/init.h>
45 #include <linux/bitops.h>
46 #include <linux/ethtool.h>
47 #include <linux/in.h>
48 #include <linux/if_ether.h>
49 #include <linux/io.h>
50 #include <net/sock.h>
51 #include <net/pkt_sched.h>
52 #include <net/arp.h>
53 #include <net/route.h>
54 #include <asm/uaccess.h>
55 #include <xen/evtchn.h>
56 #include <xen/xenbus.h>
57 #include <xen/interface/io/netif.h>
58 #include <xen/interface/memory.h>
59 #include <xen/balloon.h>
60 #include <asm/page.h>
61 #include <asm/uaccess.h>
62 #include <xen/interface/grant_table.h>
63 #include <xen/gnttab.h>
65 #define RX_COPY_THRESHOLD 256
67 #define GRANT_INVALID_REF 0
69 #define NET_TX_RING_SIZE __RING_SIZE((struct netif_tx_sring *)0, PAGE_SIZE)
70 #define NET_RX_RING_SIZE __RING_SIZE((struct netif_rx_sring *)0, PAGE_SIZE)
72 struct netfront_info {
73 struct list_head list;
74 struct net_device *netdev;
76 struct net_device_stats stats;
78 struct netif_tx_front_ring tx;
79 struct netif_rx_front_ring rx;
81 spinlock_t tx_lock;
82 spinlock_t rx_lock;
84 unsigned int handle;
85 unsigned int evtchn, irq;
87 /* Receive-ring batched refills. */
88 #define RX_MIN_TARGET 8
89 #define RX_DFL_MIN_TARGET 64
90 #define RX_MAX_TARGET min_t(int, NET_RX_RING_SIZE, 256)
91 unsigned rx_min_target, rx_max_target, rx_target;
92 struct sk_buff_head rx_batch;
94 struct timer_list rx_refill_timer;
96 /*
97 * {tx,rx}_skbs store outstanding skbuffs. The first entry in tx_skbs
98 * is an index into a chain of free entries.
99 */
100 struct sk_buff *tx_skbs[NET_TX_RING_SIZE+1];
101 struct sk_buff *rx_skbs[NET_RX_RING_SIZE];
103 #define TX_MAX_TARGET min_t(int, NET_RX_RING_SIZE, 256)
104 grant_ref_t gref_tx_head;
105 grant_ref_t grant_tx_ref[NET_TX_RING_SIZE + 1];
106 grant_ref_t gref_rx_head;
107 grant_ref_t grant_rx_ref[NET_TX_RING_SIZE];
109 struct xenbus_device *xbdev;
110 int tx_ring_ref;
111 int rx_ring_ref;
112 u8 mac[ETH_ALEN];
114 unsigned long rx_pfn_array[NET_RX_RING_SIZE];
115 struct multicall_entry rx_mcl[NET_RX_RING_SIZE+1];
116 struct mmu_update rx_mmu[NET_RX_RING_SIZE];
117 };
119 struct netfront_rx_info {
120 struct netif_rx_response rx;
121 struct netif_extra_info extras[XEN_NETIF_EXTRA_TYPE_MAX - 1];
122 };
124 /*
125 * Access macros for acquiring freeing slots in tx_skbs[].
126 */
128 static inline void add_id_to_freelist(struct sk_buff **list, unsigned short id)
129 {
130 list[id] = list[0];
131 list[0] = (void *)(unsigned long)id;
132 }
134 static inline unsigned short get_id_from_freelist(struct sk_buff **list)
135 {
136 unsigned int id = (unsigned int)(unsigned long)list[0];
137 list[0] = list[id];
138 return id;
139 }
141 static inline int xennet_rxidx(RING_IDX idx)
142 {
143 return idx & (NET_RX_RING_SIZE - 1);
144 }
146 static inline struct sk_buff *xennet_get_rx_skb(struct netfront_info *np,
147 RING_IDX ri)
148 {
149 int i = xennet_rxidx(ri);
150 struct sk_buff *skb = np->rx_skbs[i];
151 np->rx_skbs[i] = NULL;
152 return skb;
153 }
155 static inline grant_ref_t xennet_get_rx_ref(struct netfront_info *np,
156 RING_IDX ri)
157 {
158 int i = xennet_rxidx(ri);
159 grant_ref_t ref = np->grant_rx_ref[i];
160 np->grant_rx_ref[i] = GRANT_INVALID_REF;
161 return ref;
162 }
164 #define DPRINTK(fmt, args...) \
165 pr_debug("netfront (%s:%d) " fmt, \
166 __FUNCTION__, __LINE__, ##args)
167 #define IPRINTK(fmt, args...) \
168 printk(KERN_INFO "netfront: " fmt, ##args)
169 #define WPRINTK(fmt, args...) \
170 printk(KERN_WARNING "netfront: " fmt, ##args)
172 static int talk_to_backend(struct xenbus_device *, struct netfront_info *);
173 static int setup_device(struct xenbus_device *, struct netfront_info *);
174 static struct net_device *create_netdev(int, struct xenbus_device *);
176 static void netfront_closing(struct xenbus_device *);
178 static void end_access(int, void *);
179 static void netif_disconnect_backend(struct netfront_info *);
180 static void close_netdev(struct netfront_info *);
181 static void netif_free(struct netfront_info *);
183 static void network_connect(struct net_device *);
184 static void network_tx_buf_gc(struct net_device *);
185 static void network_alloc_rx_buffers(struct net_device *);
186 static int send_fake_arp(struct net_device *);
188 static irqreturn_t netif_int(int irq, void *dev_id, struct pt_regs *ptregs);
190 #ifdef CONFIG_SYSFS
191 static int xennet_sysfs_addif(struct net_device *netdev);
192 static void xennet_sysfs_delif(struct net_device *netdev);
193 #else /* !CONFIG_SYSFS */
194 #define xennet_sysfs_addif(dev) (0)
195 #define xennet_sysfs_delif(dev) do { } while(0)
196 #endif
198 static inline int xennet_can_sg(struct net_device *dev)
199 {
200 return dev->features & NETIF_F_SG;
201 }
203 /**
204 * Entry point to this code when a new device is created. Allocate the basic
205 * structures and the ring buffers for communication with the backend, and
206 * inform the backend of the appropriate details for those. Switch to
207 * Connected state.
208 */
209 static int __devinit netfront_probe(struct xenbus_device *dev,
210 const struct xenbus_device_id *id)
211 {
212 int err;
213 struct net_device *netdev;
214 struct netfront_info *info;
215 unsigned int handle;
217 err = xenbus_scanf(XBT_NIL, dev->nodename, "handle", "%u", &handle);
218 if (err != 1) {
219 xenbus_dev_fatal(dev, err, "reading handle");
220 return err;
221 }
223 netdev = create_netdev(handle, dev);
224 if (IS_ERR(netdev)) {
225 err = PTR_ERR(netdev);
226 xenbus_dev_fatal(dev, err, "creating netdev");
227 return err;
228 }
230 info = netdev_priv(netdev);
231 dev->dev.driver_data = info;
233 err = talk_to_backend(dev, info);
234 if (err) {
235 xennet_sysfs_delif(info->netdev);
236 unregister_netdev(netdev);
237 free_netdev(netdev);
238 dev->dev.driver_data = NULL;
239 return err;
240 }
242 return 0;
243 }
246 /**
247 * We are reconnecting to the backend, due to a suspend/resume, or a backend
248 * driver restart. We tear down our netif structure and recreate it, but
249 * leave the device-layer structures intact so that this is transparent to the
250 * rest of the kernel.
251 */
252 static int netfront_resume(struct xenbus_device *dev)
253 {
254 struct netfront_info *info = dev->dev.driver_data;
256 DPRINTK("%s\n", dev->nodename);
258 netif_disconnect_backend(info);
259 return talk_to_backend(dev, info);
260 }
262 static int xen_net_read_mac(struct xenbus_device *dev, u8 mac[])
263 {
264 char *s, *e, *macstr;
265 int i;
267 macstr = s = xenbus_read(XBT_NIL, dev->nodename, "mac", NULL);
268 if (IS_ERR(macstr))
269 return PTR_ERR(macstr);
271 for (i = 0; i < ETH_ALEN; i++) {
272 mac[i] = simple_strtoul(s, &e, 16);
273 if ((s == e) || (*e != ((i == ETH_ALEN-1) ? '\0' : ':'))) {
274 kfree(macstr);
275 return -ENOENT;
276 }
277 s = e+1;
278 }
280 kfree(macstr);
281 return 0;
282 }
284 /* Common code used when first setting up, and when resuming. */
285 static int talk_to_backend(struct xenbus_device *dev,
286 struct netfront_info *info)
287 {
288 const char *message;
289 struct xenbus_transaction xbt;
290 int err;
292 err = xen_net_read_mac(dev, info->mac);
293 if (err) {
294 xenbus_dev_fatal(dev, err, "parsing %s/mac", dev->nodename);
295 goto out;
296 }
298 /* Create shared ring, alloc event channel. */
299 err = setup_device(dev, info);
300 if (err)
301 goto out;
303 again:
304 err = xenbus_transaction_start(&xbt);
305 if (err) {
306 xenbus_dev_fatal(dev, err, "starting transaction");
307 goto destroy_ring;
308 }
310 err = xenbus_printf(xbt, dev->nodename, "tx-ring-ref","%u",
311 info->tx_ring_ref);
312 if (err) {
313 message = "writing tx ring-ref";
314 goto abort_transaction;
315 }
316 err = xenbus_printf(xbt, dev->nodename, "rx-ring-ref","%u",
317 info->rx_ring_ref);
318 if (err) {
319 message = "writing rx ring-ref";
320 goto abort_transaction;
321 }
322 err = xenbus_printf(xbt, dev->nodename,
323 "event-channel", "%u", info->evtchn);
324 if (err) {
325 message = "writing event-channel";
326 goto abort_transaction;
327 }
329 err = xenbus_printf(xbt, dev->nodename, "feature-rx-notify", "%d", 1);
330 if (err) {
331 message = "writing feature-rx-notify";
332 goto abort_transaction;
333 }
335 err = xenbus_printf(xbt, dev->nodename, "feature-sg", "%d", 1);
336 if (err) {
337 message = "writing feature-sg";
338 goto abort_transaction;
339 }
341 err = xenbus_printf(xbt, dev->nodename, "feature-gso-tcpv4", "%d", 1);
342 if (err) {
343 message = "writing feature-gso-tcpv4";
344 goto abort_transaction;
345 }
347 err = xenbus_transaction_end(xbt, 0);
348 if (err) {
349 if (err == -EAGAIN)
350 goto again;
351 xenbus_dev_fatal(dev, err, "completing transaction");
352 goto destroy_ring;
353 }
355 return 0;
357 abort_transaction:
358 xenbus_transaction_end(xbt, 1);
359 xenbus_dev_fatal(dev, err, "%s", message);
360 destroy_ring:
361 netif_free(info);
362 out:
363 return err;
364 }
367 static int setup_device(struct xenbus_device *dev, struct netfront_info *info)
368 {
369 struct netif_tx_sring *txs;
370 struct netif_rx_sring *rxs;
371 int err;
372 struct net_device *netdev = info->netdev;
374 info->tx_ring_ref = GRANT_INVALID_REF;
375 info->rx_ring_ref = GRANT_INVALID_REF;
376 info->rx.sring = NULL;
377 info->tx.sring = NULL;
378 info->irq = 0;
380 txs = (struct netif_tx_sring *)get_zeroed_page(GFP_KERNEL);
381 if (!txs) {
382 err = -ENOMEM;
383 xenbus_dev_fatal(dev, err, "allocating tx ring page");
384 goto fail;
385 }
386 SHARED_RING_INIT(txs);
387 FRONT_RING_INIT(&info->tx, txs, PAGE_SIZE);
389 err = xenbus_grant_ring(dev, virt_to_mfn(txs));
390 if (err < 0) {
391 free_page((unsigned long)txs);
392 goto fail;
393 }
394 info->tx_ring_ref = err;
396 rxs = (struct netif_rx_sring *)get_zeroed_page(GFP_KERNEL);
397 if (!rxs) {
398 err = -ENOMEM;
399 xenbus_dev_fatal(dev, err, "allocating rx ring page");
400 goto fail;
401 }
402 SHARED_RING_INIT(rxs);
403 FRONT_RING_INIT(&info->rx, rxs, PAGE_SIZE);
405 err = xenbus_grant_ring(dev, virt_to_mfn(rxs));
406 if (err < 0) {
407 free_page((unsigned long)rxs);
408 goto fail;
409 }
410 info->rx_ring_ref = err;
412 err = xenbus_alloc_evtchn(dev, &info->evtchn);
413 if (err)
414 goto fail;
416 memcpy(netdev->dev_addr, info->mac, ETH_ALEN);
417 err = bind_evtchn_to_irqhandler(info->evtchn, netif_int,
418 SA_SAMPLE_RANDOM, netdev->name, netdev);
419 if (err < 0)
420 goto fail;
421 info->irq = err;
422 return 0;
424 fail:
425 netif_free(info);
426 return err;
427 }
430 /**
431 * Callback received when the backend's state changes.
432 */
433 static void backend_changed(struct xenbus_device *dev,
434 enum xenbus_state backend_state)
435 {
436 struct netfront_info *np = dev->dev.driver_data;
437 struct net_device *netdev = np->netdev;
439 DPRINTK("\n");
441 switch (backend_state) {
442 case XenbusStateInitialising:
443 case XenbusStateInitialised:
444 case XenbusStateConnected:
445 case XenbusStateUnknown:
446 case XenbusStateClosed:
447 break;
449 case XenbusStateInitWait:
450 network_connect(netdev);
451 xenbus_switch_state(dev, XenbusStateConnected);
452 (void)send_fake_arp(netdev);
453 break;
455 case XenbusStateClosing:
456 netfront_closing(dev);
457 break;
458 }
459 }
462 /** Send a packet on a net device to encourage switches to learn the
463 * MAC. We send a fake ARP request.
464 *
465 * @param dev device
466 * @return 0 on success, error code otherwise
467 */
468 static int send_fake_arp(struct net_device *dev)
469 {
470 struct sk_buff *skb;
471 u32 src_ip, dst_ip;
473 dst_ip = INADDR_BROADCAST;
474 src_ip = inet_select_addr(dev, dst_ip, RT_SCOPE_LINK);
476 /* No IP? Then nothing to do. */
477 if (src_ip == 0)
478 return 0;
480 skb = arp_create(ARPOP_REPLY, ETH_P_ARP,
481 dst_ip, dev, src_ip,
482 /*dst_hw*/ NULL, /*src_hw*/ NULL,
483 /*target_hw*/ dev->dev_addr);
484 if (skb == NULL)
485 return -ENOMEM;
487 return dev_queue_xmit(skb);
488 }
491 static int network_open(struct net_device *dev)
492 {
493 struct netfront_info *np = netdev_priv(dev);
495 memset(&np->stats, 0, sizeof(np->stats));
497 network_alloc_rx_buffers(dev);
498 np->rx.sring->rsp_event = np->rx.rsp_cons + 1;
500 if (RING_HAS_UNCONSUMED_RESPONSES(&np->rx))
501 netif_rx_schedule(dev);
503 netif_start_queue(dev);
505 return 0;
506 }
508 static inline int netfront_tx_slot_available(struct netfront_info *np)
509 {
510 return RING_FREE_REQUESTS(&np->tx) >= MAX_SKB_FRAGS + 2;
511 }
513 static inline void network_maybe_wake_tx(struct net_device *dev)
514 {
515 struct netfront_info *np = netdev_priv(dev);
517 if (unlikely(netif_queue_stopped(dev)) &&
518 netfront_tx_slot_available(np) &&
519 likely(netif_running(dev)))
520 netif_wake_queue(dev);
521 }
523 static void network_tx_buf_gc(struct net_device *dev)
524 {
525 RING_IDX cons, prod;
526 unsigned short id;
527 struct netfront_info *np = netdev_priv(dev);
528 struct sk_buff *skb;
530 if (unlikely(!netif_carrier_ok(dev)))
531 return;
533 do {
534 prod = np->tx.sring->rsp_prod;
535 rmb(); /* Ensure we see responses up to 'rp'. */
537 for (cons = np->tx.rsp_cons; cons != prod; cons++) {
538 struct netif_tx_response *txrsp;
540 txrsp = RING_GET_RESPONSE(&np->tx, cons);
541 if (txrsp->status == NETIF_RSP_NULL)
542 continue;
544 id = txrsp->id;
545 skb = np->tx_skbs[id];
546 if (unlikely(gnttab_query_foreign_access(
547 np->grant_tx_ref[id]) != 0)) {
548 printk(KERN_ALERT "network_tx_buf_gc: warning "
549 "-- grant still in use by backend "
550 "domain.\n");
551 BUG();
552 }
553 gnttab_end_foreign_access_ref(
554 np->grant_tx_ref[id], GNTMAP_readonly);
555 gnttab_release_grant_reference(
556 &np->gref_tx_head, np->grant_tx_ref[id]);
557 np->grant_tx_ref[id] = GRANT_INVALID_REF;
558 add_id_to_freelist(np->tx_skbs, id);
559 dev_kfree_skb_irq(skb);
560 }
562 np->tx.rsp_cons = prod;
564 /*
565 * Set a new event, then check for race with update of tx_cons.
566 * Note that it is essential to schedule a callback, no matter
567 * how few buffers are pending. Even if there is space in the
568 * transmit ring, higher layers may be blocked because too much
569 * data is outstanding: in such cases notification from Xen is
570 * likely to be the only kick that we'll get.
571 */
572 np->tx.sring->rsp_event =
573 prod + ((np->tx.sring->req_prod - prod) >> 1) + 1;
574 mb();
575 } while ((cons == prod) && (prod != np->tx.sring->rsp_prod));
577 network_maybe_wake_tx(dev);
578 }
581 static void rx_refill_timeout(unsigned long data)
582 {
583 struct net_device *dev = (struct net_device *)data;
584 netif_rx_schedule(dev);
585 }
588 static void network_alloc_rx_buffers(struct net_device *dev)
589 {
590 unsigned short id;
591 struct netfront_info *np = netdev_priv(dev);
592 struct sk_buff *skb;
593 struct page *page;
594 int i, batch_target, notify;
595 RING_IDX req_prod = np->rx.req_prod_pvt;
596 struct xen_memory_reservation reservation;
597 grant_ref_t ref;
598 unsigned long pfn;
599 void *vaddr;
601 if (unlikely(!netif_carrier_ok(dev)))
602 return;
604 /*
605 * Allocate skbuffs greedily, even though we batch updates to the
606 * receive ring. This creates a less bursty demand on the memory
607 * allocator, so should reduce the chance of failed allocation requests
608 * both for ourself and for other kernel subsystems.
609 */
610 batch_target = np->rx_target - (req_prod - np->rx.rsp_cons);
611 for (i = skb_queue_len(&np->rx_batch); i < batch_target; i++) {
612 /*
613 * Allocate an skb and a page. Do not use __dev_alloc_skb as
614 * that will allocate page-sized buffers which is not
615 * necessary here.
616 * 16 bytes added as necessary headroom for netif_receive_skb.
617 */
618 skb = alloc_skb(RX_COPY_THRESHOLD + 16,
619 GFP_ATOMIC | __GFP_NOWARN);
620 if (unlikely(!skb))
621 goto no_skb;
623 page = alloc_page(GFP_ATOMIC | __GFP_NOWARN);
624 if (!page) {
625 kfree_skb(skb);
626 no_skb:
627 /* Any skbuffs queued for refill? Force them out. */
628 if (i != 0)
629 goto refill;
630 /* Could not allocate any skbuffs. Try again later. */
631 mod_timer(&np->rx_refill_timer,
632 jiffies + (HZ/10));
633 break;
634 }
636 skb_reserve(skb, 16); /* mimic dev_alloc_skb() */
637 skb_shinfo(skb)->frags[0].page = page;
638 skb_shinfo(skb)->nr_frags = 1;
639 __skb_queue_tail(&np->rx_batch, skb);
640 }
642 /* Is the batch large enough to be worthwhile? */
643 if (i < (np->rx_target/2)) {
644 if (req_prod > np->rx.sring->req_prod)
645 goto push;
646 return;
647 }
649 /* Adjust our fill target if we risked running out of buffers. */
650 if (((req_prod - np->rx.sring->rsp_prod) < (np->rx_target / 4)) &&
651 ((np->rx_target *= 2) > np->rx_max_target))
652 np->rx_target = np->rx_max_target;
654 refill:
655 for (i = 0; ; i++) {
656 if ((skb = __skb_dequeue(&np->rx_batch)) == NULL)
657 break;
659 skb->dev = dev;
661 id = xennet_rxidx(req_prod + i);
663 BUG_ON(np->rx_skbs[id]);
664 np->rx_skbs[id] = skb;
666 RING_GET_REQUEST(&np->rx, req_prod + i)->id = id;
667 ref = gnttab_claim_grant_reference(&np->gref_rx_head);
668 BUG_ON((signed short)ref < 0);
669 np->grant_rx_ref[id] = ref;
671 pfn = page_to_pfn(skb_shinfo(skb)->frags[0].page);
672 vaddr = page_address(skb_shinfo(skb)->frags[0].page);
674 gnttab_grant_foreign_transfer_ref(ref,
675 np->xbdev->otherend_id, pfn);
676 RING_GET_REQUEST(&np->rx, req_prod + i)->gref = ref;
677 np->rx_pfn_array[i] = pfn_to_mfn(pfn);
679 if (!xen_feature(XENFEAT_auto_translated_physmap)) {
680 /* Remove this page before passing back to Xen. */
681 set_phys_to_machine(pfn, INVALID_P2M_ENTRY);
682 MULTI_update_va_mapping(np->rx_mcl+i,
683 (unsigned long)vaddr,
684 __pte(0), 0);
685 }
686 }
688 /* Tell the ballon driver what is going on. */
689 balloon_update_driver_allowance(i);
691 set_xen_guest_handle(reservation.extent_start, np->rx_pfn_array);
692 reservation.nr_extents = i;
693 reservation.extent_order = 0;
694 reservation.address_bits = 0;
695 reservation.domid = DOMID_SELF;
697 if (!xen_feature(XENFEAT_auto_translated_physmap)) {
698 /* After all PTEs have been zapped, flush the TLB. */
699 np->rx_mcl[i-1].args[MULTI_UVMFLAGS_INDEX] =
700 UVMF_TLB_FLUSH|UVMF_ALL;
702 /* Give away a batch of pages. */
703 np->rx_mcl[i].op = __HYPERVISOR_memory_op;
704 np->rx_mcl[i].args[0] = XENMEM_decrease_reservation;
705 np->rx_mcl[i].args[1] = (unsigned long)&reservation;
707 /* Zap PTEs and give away pages in one big multicall. */
708 (void)HYPERVISOR_multicall(np->rx_mcl, i+1);
710 /* Check return status of HYPERVISOR_memory_op(). */
711 if (unlikely(np->rx_mcl[i].result != i))
712 panic("Unable to reduce memory reservation\n");
713 } else
714 if (HYPERVISOR_memory_op(XENMEM_decrease_reservation,
715 &reservation) != i)
716 panic("Unable to reduce memory reservation\n");
718 /* Above is a suitable barrier to ensure backend will see requests. */
719 np->rx.req_prod_pvt = req_prod + i;
720 push:
721 RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&np->rx, notify);
722 if (notify)
723 notify_remote_via_irq(np->irq);
724 }
726 static void xennet_make_frags(struct sk_buff *skb, struct net_device *dev,
727 struct netif_tx_request *tx)
728 {
729 struct netfront_info *np = netdev_priv(dev);
730 char *data = skb->data;
731 unsigned long mfn;
732 RING_IDX prod = np->tx.req_prod_pvt;
733 int frags = skb_shinfo(skb)->nr_frags;
734 unsigned int offset = offset_in_page(data);
735 unsigned int len = skb_headlen(skb);
736 unsigned int id;
737 grant_ref_t ref;
738 int i;
740 while (len > PAGE_SIZE - offset) {
741 tx->size = PAGE_SIZE - offset;
742 tx->flags |= NETTXF_more_data;
743 len -= tx->size;
744 data += tx->size;
745 offset = 0;
747 id = get_id_from_freelist(np->tx_skbs);
748 np->tx_skbs[id] = skb_get(skb);
749 tx = RING_GET_REQUEST(&np->tx, prod++);
750 tx->id = id;
751 ref = gnttab_claim_grant_reference(&np->gref_tx_head);
752 BUG_ON((signed short)ref < 0);
754 mfn = virt_to_mfn(data);
755 gnttab_grant_foreign_access_ref(ref, np->xbdev->otherend_id,
756 mfn, GNTMAP_readonly);
758 tx->gref = np->grant_tx_ref[id] = ref;
759 tx->offset = offset;
760 tx->size = len;
761 tx->flags = 0;
762 }
764 for (i = 0; i < frags; i++) {
765 skb_frag_t *frag = skb_shinfo(skb)->frags + i;
767 tx->flags |= NETTXF_more_data;
769 id = get_id_from_freelist(np->tx_skbs);
770 np->tx_skbs[id] = skb_get(skb);
771 tx = RING_GET_REQUEST(&np->tx, prod++);
772 tx->id = id;
773 ref = gnttab_claim_grant_reference(&np->gref_tx_head);
774 BUG_ON((signed short)ref < 0);
776 mfn = pfn_to_mfn(page_to_pfn(frag->page));
777 gnttab_grant_foreign_access_ref(ref, np->xbdev->otherend_id,
778 mfn, GNTMAP_readonly);
780 tx->gref = np->grant_tx_ref[id] = ref;
781 tx->offset = frag->page_offset;
782 tx->size = frag->size;
783 tx->flags = 0;
784 }
786 np->tx.req_prod_pvt = prod;
787 }
789 static int network_start_xmit(struct sk_buff *skb, struct net_device *dev)
790 {
791 unsigned short id;
792 struct netfront_info *np = netdev_priv(dev);
793 struct netif_tx_request *tx;
794 struct netif_extra_info *extra;
795 char *data = skb->data;
796 RING_IDX i;
797 grant_ref_t ref;
798 unsigned long mfn;
799 int notify;
800 int frags = skb_shinfo(skb)->nr_frags;
801 unsigned int offset = offset_in_page(data);
802 unsigned int len = skb_headlen(skb);
804 frags += (offset + len + PAGE_SIZE - 1) / PAGE_SIZE;
805 if (unlikely(frags > MAX_SKB_FRAGS + 1)) {
806 printk(KERN_ALERT "xennet: skb rides the rocket: %d frags\n",
807 frags);
808 dump_stack();
809 goto drop;
810 }
812 spin_lock_irq(&np->tx_lock);
814 if (unlikely(!netif_carrier_ok(dev) ||
815 (frags > 1 && !xennet_can_sg(dev)) ||
816 netif_needs_gso(dev, skb))) {
817 spin_unlock_irq(&np->tx_lock);
818 goto drop;
819 }
821 i = np->tx.req_prod_pvt;
823 id = get_id_from_freelist(np->tx_skbs);
824 np->tx_skbs[id] = skb;
826 tx = RING_GET_REQUEST(&np->tx, i);
828 tx->id = id;
829 ref = gnttab_claim_grant_reference(&np->gref_tx_head);
830 BUG_ON((signed short)ref < 0);
831 mfn = virt_to_mfn(data);
832 gnttab_grant_foreign_access_ref(
833 ref, np->xbdev->otherend_id, mfn, GNTMAP_readonly);
834 tx->gref = np->grant_tx_ref[id] = ref;
835 tx->offset = offset;
836 tx->size = len;
838 tx->flags = 0;
839 extra = NULL;
841 if (skb->ip_summed == CHECKSUM_HW) /* local packet? */
842 tx->flags |= NETTXF_csum_blank | NETTXF_data_validated;
843 if (skb->proto_data_valid) /* remote but checksummed? */
844 tx->flags |= NETTXF_data_validated;
846 if (skb_shinfo(skb)->gso_size) {
847 struct netif_extra_info *gso = (struct netif_extra_info *)
848 RING_GET_REQUEST(&np->tx, ++i);
850 if (extra)
851 extra->flags |= XEN_NETIF_EXTRA_FLAG_MORE;
852 else
853 tx->flags |= NETTXF_extra_info;
855 gso->u.gso.size = skb_shinfo(skb)->gso_size;
856 gso->u.gso.type = XEN_NETIF_GSO_TYPE_TCPV4;
857 gso->u.gso.pad = 0;
858 gso->u.gso.features = 0;
860 gso->type = XEN_NETIF_EXTRA_TYPE_GSO;
861 gso->flags = 0;
862 extra = gso;
863 }
865 np->tx.req_prod_pvt = i + 1;
867 xennet_make_frags(skb, dev, tx);
868 tx->size = skb->len;
870 RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&np->tx, notify);
871 if (notify)
872 notify_remote_via_irq(np->irq);
874 network_tx_buf_gc(dev);
876 if (!netfront_tx_slot_available(np))
877 netif_stop_queue(dev);
879 spin_unlock_irq(&np->tx_lock);
881 np->stats.tx_bytes += skb->len;
882 np->stats.tx_packets++;
884 return 0;
886 drop:
887 np->stats.tx_dropped++;
888 dev_kfree_skb(skb);
889 return 0;
890 }
892 static irqreturn_t netif_int(int irq, void *dev_id, struct pt_regs *ptregs)
893 {
894 struct net_device *dev = dev_id;
895 struct netfront_info *np = netdev_priv(dev);
896 unsigned long flags;
898 spin_lock_irqsave(&np->tx_lock, flags);
899 network_tx_buf_gc(dev);
900 spin_unlock_irqrestore(&np->tx_lock, flags);
902 if (RING_HAS_UNCONSUMED_RESPONSES(&np->rx) &&
903 likely(netif_running(dev)))
904 netif_rx_schedule(dev);
906 return IRQ_HANDLED;
907 }
909 static void xennet_move_rx_slot(struct netfront_info *np, struct sk_buff *skb,
910 grant_ref_t ref)
911 {
912 int new = xennet_rxidx(np->rx.req_prod_pvt);
914 BUG_ON(np->rx_skbs[new]);
915 np->rx_skbs[new] = skb;
916 np->grant_rx_ref[new] = ref;
917 RING_GET_REQUEST(&np->rx, np->rx.req_prod_pvt)->id = new;
918 RING_GET_REQUEST(&np->rx, np->rx.req_prod_pvt)->gref = ref;
919 np->rx.req_prod_pvt++;
920 }
922 int xennet_get_extras(struct netfront_info *np,
923 struct netif_extra_info *extras, RING_IDX rp)
925 {
926 struct netif_extra_info *extra;
927 RING_IDX cons = np->rx.rsp_cons;
928 int err = 0;
930 do {
931 struct sk_buff *skb;
932 grant_ref_t ref;
934 if (unlikely(cons + 1 == rp)) {
935 if (net_ratelimit())
936 WPRINTK("Missing extra info\n");
937 err = -EBADR;
938 break;
939 }
941 extra = (struct netif_extra_info *)
942 RING_GET_RESPONSE(&np->rx, ++cons);
944 if (unlikely(!extra->type ||
945 extra->type >= XEN_NETIF_EXTRA_TYPE_MAX)) {
946 if (net_ratelimit())
947 WPRINTK("Invalid extra type: %d\n",
948 extra->type);
949 err = -EINVAL;
950 } else
951 memcpy(&extras[extra->type - 1], extra, sizeof(*extra));
953 skb = xennet_get_rx_skb(np, cons);
954 ref = xennet_get_rx_ref(np, cons);
955 xennet_move_rx_slot(np, skb, ref);
956 } while (extra->flags & XEN_NETIF_EXTRA_FLAG_MORE);
958 np->rx.rsp_cons = cons;
959 return err;
960 }
962 static int xennet_get_responses(struct netfront_info *np,
963 struct netfront_rx_info *rinfo, RING_IDX rp,
964 struct sk_buff_head *list, int count)
965 {
966 struct mmu_update *mmu = np->rx_mmu + count;
967 struct multicall_entry *mcl = np->rx_mcl + count;
968 struct netif_rx_response *rx = &rinfo->rx;
969 struct netif_extra_info *extras = rinfo->extras;
970 RING_IDX cons = np->rx.rsp_cons;
971 struct sk_buff *skb = xennet_get_rx_skb(np, cons);
972 grant_ref_t ref = xennet_get_rx_ref(np, cons);
973 int max = MAX_SKB_FRAGS + (rx->status <= RX_COPY_THRESHOLD);
974 int frags = 1;
975 int err = 0;
977 if (rx->flags & NETRXF_extra_info) {
978 err = xennet_get_extras(np, extras, rp);
979 cons = np->rx.rsp_cons;
980 }
982 for (;;) {
983 unsigned long mfn;
985 if (unlikely(rx->status < 0 ||
986 rx->offset + rx->status > PAGE_SIZE)) {
987 if (net_ratelimit())
988 WPRINTK("rx->offset: %x, size: %u\n",
989 rx->offset, rx->status);
990 err = -EINVAL;
991 }
993 /*
994 * This definitely indicates a bug, either in this driver or in
995 * the backend driver. In future this should flag the bad
996 * situation to the system controller to reboot the backed.
997 */
998 if (ref == GRANT_INVALID_REF) {
999 WPRINTK("Bad rx response id %d.\n", rx->id);
1000 err = -EINVAL;
1001 goto next;
1004 /* Memory pressure, insufficient buffer headroom, ... */
1005 if ((mfn = gnttab_end_foreign_transfer_ref(ref)) == 0) {
1006 if (net_ratelimit())
1007 WPRINTK("Unfulfilled rx req (id=%d, st=%d).\n",
1008 rx->id, rx->status);
1009 xennet_move_rx_slot(np, skb, ref);
1010 err = -ENOMEM;
1011 goto next;
1014 gnttab_release_grant_reference(&np->gref_rx_head, ref);
1016 if (!xen_feature(XENFEAT_auto_translated_physmap)) {
1017 /* Remap the page. */
1018 struct page *page = skb_shinfo(skb)->frags[0].page;
1019 unsigned long pfn = page_to_pfn(page);
1020 void *vaddr = page_address(page);
1022 MULTI_update_va_mapping(mcl, (unsigned long)vaddr,
1023 pfn_pte_ma(mfn, PAGE_KERNEL),
1024 0);
1025 mcl++;
1026 mmu->ptr = ((maddr_t)mfn << PAGE_SHIFT)
1027 | MMU_MACHPHYS_UPDATE;
1028 mmu->val = pfn;
1029 mmu++;
1031 set_phys_to_machine(pfn, mfn);
1034 __skb_queue_tail(list, skb);
1036 next:
1037 if (!(rx->flags & NETRXF_more_data))
1038 break;
1040 if (cons + frags == rp) {
1041 if (net_ratelimit())
1042 WPRINTK("Need more frags\n");
1043 err = -ENOENT;
1044 break;
1047 rx = RING_GET_RESPONSE(&np->rx, cons + frags);
1048 skb = xennet_get_rx_skb(np, cons + frags);
1049 ref = xennet_get_rx_ref(np, cons + frags);
1050 frags++;
1053 if (unlikely(frags > max)) {
1054 if (net_ratelimit())
1055 WPRINTK("Too many frags\n");
1056 err = -E2BIG;
1059 return err;
1062 static RING_IDX xennet_fill_frags(struct netfront_info *np,
1063 struct sk_buff *skb,
1064 struct sk_buff_head *list)
1066 struct skb_shared_info *shinfo = skb_shinfo(skb);
1067 int nr_frags = shinfo->nr_frags;
1068 RING_IDX cons = np->rx.rsp_cons;
1069 skb_frag_t *frag = shinfo->frags + nr_frags;
1070 struct sk_buff *nskb;
1072 while ((nskb = __skb_dequeue(list))) {
1073 struct netif_rx_response *rx =
1074 RING_GET_RESPONSE(&np->rx, ++cons);
1076 frag->page = skb_shinfo(nskb)->frags[0].page;
1077 frag->page_offset = rx->offset;
1078 frag->size = rx->status;
1080 skb->data_len += rx->status;
1082 skb_shinfo(nskb)->nr_frags = 0;
1083 kfree_skb(nskb);
1085 frag++;
1086 nr_frags++;
1089 shinfo->nr_frags = nr_frags;
1090 return cons;
1093 static int xennet_set_skb_gso(struct sk_buff *skb, struct netif_extra_info *gso)
1095 if (!gso->u.gso.size) {
1096 if (net_ratelimit())
1097 WPRINTK("GSO size must not be zero.\n");
1098 return -EINVAL;
1101 /* Currently only TCPv4 S.O. is supported. */
1102 if (gso->u.gso.type != XEN_NETIF_GSO_TYPE_TCPV4) {
1103 if (net_ratelimit())
1104 WPRINTK("Bad GSO type %d.\n", gso->u.gso.type);
1105 return -EINVAL;
1108 skb_shinfo(skb)->gso_size = gso->u.gso.size;
1109 skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4;
1111 /* Header must be checked, and gso_segs computed. */
1112 skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY;
1113 skb_shinfo(skb)->gso_segs = 0;
1115 return 0;
1118 static int netif_poll(struct net_device *dev, int *pbudget)
1120 struct netfront_info *np = netdev_priv(dev);
1121 struct sk_buff *skb;
1122 struct netfront_rx_info rinfo;
1123 struct netif_rx_response *rx = &rinfo.rx;
1124 struct netif_extra_info *extras = rinfo.extras;
1125 RING_IDX i, rp;
1126 struct multicall_entry *mcl;
1127 int work_done, budget, more_to_do = 1;
1128 struct sk_buff_head rxq;
1129 struct sk_buff_head errq;
1130 struct sk_buff_head tmpq;
1131 unsigned long flags;
1132 unsigned int len;
1133 int pages_done;
1134 int err;
1136 spin_lock(&np->rx_lock);
1138 if (unlikely(!netif_carrier_ok(dev))) {
1139 spin_unlock(&np->rx_lock);
1140 return 0;
1143 skb_queue_head_init(&rxq);
1144 skb_queue_head_init(&errq);
1145 skb_queue_head_init(&tmpq);
1147 if ((budget = *pbudget) > dev->quota)
1148 budget = dev->quota;
1149 rp = np->rx.sring->rsp_prod;
1150 rmb(); /* Ensure we see queued responses up to 'rp'. */
1152 for (i = np->rx.rsp_cons, work_done = 0, pages_done = 0;
1153 (i != rp) && (work_done < budget);
1154 np->rx.rsp_cons = ++i, work_done++) {
1155 memcpy(rx, RING_GET_RESPONSE(&np->rx, i), sizeof(*rx));
1156 memset(extras, 0, sizeof(extras));
1158 err = xennet_get_responses(np, &rinfo, rp, &tmpq, pages_done);
1159 pages_done += skb_queue_len(&tmpq);
1161 if (unlikely(err)) {
1162 err:
1163 i = np->rx.rsp_cons + skb_queue_len(&tmpq) - 1;
1164 work_done--;
1165 while ((skb = __skb_dequeue(&tmpq)))
1166 __skb_queue_tail(&errq, skb);
1167 np->stats.rx_errors++;
1168 continue;
1171 skb = __skb_dequeue(&tmpq);
1173 if (extras[XEN_NETIF_EXTRA_TYPE_GSO - 1].type) {
1174 struct netif_extra_info *gso;
1175 gso = &extras[XEN_NETIF_EXTRA_TYPE_GSO - 1];
1177 if (unlikely(xennet_set_skb_gso(skb, gso))) {
1178 __skb_queue_head(&tmpq, skb);
1179 goto err;
1183 skb->nh.raw = (void *)skb_shinfo(skb)->frags[0].page;
1184 skb->h.raw = skb->nh.raw + rx->offset;
1186 len = rx->status;
1187 if (len > RX_COPY_THRESHOLD)
1188 len = RX_COPY_THRESHOLD;
1189 skb_put(skb, len);
1191 if (rx->status > len) {
1192 skb_shinfo(skb)->frags[0].page_offset =
1193 rx->offset + len;
1194 skb_shinfo(skb)->frags[0].size = rx->status - len;
1195 skb->data_len = rx->status - len;
1196 } else {
1197 skb_shinfo(skb)->frags[0].page = NULL;
1198 skb_shinfo(skb)->nr_frags = 0;
1201 i = xennet_fill_frags(np, skb, &tmpq);
1203 /*
1204 * Truesize must approximates the size of true data plus
1205 * any supervisor overheads. Adding hypervisor overheads
1206 * has been shown to significantly reduce achievable
1207 * bandwidth with the default receive buffer size. It is
1208 * therefore not wise to account for it here.
1210 * After alloc_skb(RX_COPY_THRESHOLD), truesize is set to
1211 * RX_COPY_THRESHOLD + the supervisor overheads. Here, we
1212 * add the size of the data pulled in xennet_fill_frags().
1214 * We also adjust for any unused space in the main data
1215 * area by subtracting (RX_COPY_THRESHOLD - len). This is
1216 * especially important with drivers which split incoming
1217 * packets into header and data, using only 66 bytes of
1218 * the main data area (see the e1000 driver for example.)
1219 * On such systems, without this last adjustement, our
1220 * achievable receive throughout using the standard receive
1221 * buffer size was cut by 25%(!!!).
1222 */
1223 skb->truesize += skb->data_len - (RX_COPY_THRESHOLD - len);
1224 skb->len += skb->data_len;
1226 /*
1227 * Old backends do not assert data_validated but we
1228 * can infer it from csum_blank so test both flags.
1229 */
1230 if (rx->flags & (NETRXF_data_validated|NETRXF_csum_blank)) {
1231 skb->ip_summed = CHECKSUM_UNNECESSARY;
1232 skb->proto_data_valid = 1;
1233 } else {
1234 skb->ip_summed = CHECKSUM_NONE;
1235 skb->proto_data_valid = 0;
1237 skb->proto_csum_blank = !!(rx->flags & NETRXF_csum_blank);
1239 np->stats.rx_packets++;
1240 np->stats.rx_bytes += skb->len;
1242 __skb_queue_tail(&rxq, skb);
1245 /* Some pages are no longer absent... */
1246 balloon_update_driver_allowance(-pages_done);
1248 /* Do all the remapping work, and M2P updates, in one big hypercall. */
1249 if (likely(pages_done)) {
1250 mcl = np->rx_mcl + pages_done;
1251 mcl->op = __HYPERVISOR_mmu_update;
1252 mcl->args[0] = (unsigned long)np->rx_mmu;
1253 mcl->args[1] = pages_done;
1254 mcl->args[2] = 0;
1255 mcl->args[3] = DOMID_SELF;
1256 (void)HYPERVISOR_multicall(np->rx_mcl, pages_done + 1);
1259 while ((skb = __skb_dequeue(&errq)))
1260 kfree_skb(skb);
1262 while ((skb = __skb_dequeue(&rxq)) != NULL) {
1263 struct page *page = (struct page *)skb->nh.raw;
1264 void *vaddr = page_address(page);
1266 memcpy(skb->data, vaddr + (skb->h.raw - skb->nh.raw),
1267 skb_headlen(skb));
1269 if (page != skb_shinfo(skb)->frags[0].page)
1270 __free_page(page);
1272 /* Ethernet work: Delayed to here as it peeks the header. */
1273 skb->protocol = eth_type_trans(skb, dev);
1275 /* Pass it up. */
1276 netif_receive_skb(skb);
1277 dev->last_rx = jiffies;
1280 /* If we get a callback with very few responses, reduce fill target. */
1281 /* NB. Note exponential increase, linear decrease. */
1282 if (((np->rx.req_prod_pvt - np->rx.sring->rsp_prod) >
1283 ((3*np->rx_target) / 4)) &&
1284 (--np->rx_target < np->rx_min_target))
1285 np->rx_target = np->rx_min_target;
1287 network_alloc_rx_buffers(dev);
1289 *pbudget -= work_done;
1290 dev->quota -= work_done;
1292 if (work_done < budget) {
1293 local_irq_save(flags);
1295 RING_FINAL_CHECK_FOR_RESPONSES(&np->rx, more_to_do);
1296 if (!more_to_do)
1297 __netif_rx_complete(dev);
1299 local_irq_restore(flags);
1302 spin_unlock(&np->rx_lock);
1304 return more_to_do;
1308 static int network_close(struct net_device *dev)
1310 struct netfront_info *np = netdev_priv(dev);
1311 netif_stop_queue(np->netdev);
1312 return 0;
1316 static struct net_device_stats *network_get_stats(struct net_device *dev)
1318 struct netfront_info *np = netdev_priv(dev);
1319 return &np->stats;
1322 static int xennet_change_mtu(struct net_device *dev, int mtu)
1324 int max = xennet_can_sg(dev) ? 65535 - ETH_HLEN : ETH_DATA_LEN;
1326 if (mtu > max)
1327 return -EINVAL;
1328 dev->mtu = mtu;
1329 return 0;
1332 static int xennet_set_sg(struct net_device *dev, u32 data)
1334 if (data) {
1335 struct netfront_info *np = netdev_priv(dev);
1336 int val;
1338 if (xenbus_scanf(XBT_NIL, np->xbdev->otherend, "feature-sg",
1339 "%d", &val) < 0)
1340 val = 0;
1341 if (!val)
1342 return -ENOSYS;
1343 } else if (dev->mtu > ETH_DATA_LEN)
1344 dev->mtu = ETH_DATA_LEN;
1346 return ethtool_op_set_sg(dev, data);
1349 static int xennet_set_tso(struct net_device *dev, u32 data)
1351 if (data) {
1352 struct netfront_info *np = netdev_priv(dev);
1353 int val;
1355 if (xenbus_scanf(XBT_NIL, np->xbdev->otherend,
1356 "feature-gso-tcpv4", "%d", &val) < 0)
1357 val = 0;
1358 if (!val)
1359 return -ENOSYS;
1362 return ethtool_op_set_tso(dev, data);
1365 static void xennet_set_features(struct net_device *dev)
1367 /* Turn off all GSO bits except ROBUST. */
1368 dev->features &= (1 << NETIF_F_GSO_SHIFT) - 1;
1369 dev->features |= NETIF_F_GSO_ROBUST;
1370 xennet_set_sg(dev, 0);
1372 /* We need checksum offload to enable scatter/gather and TSO. */
1373 if (!(dev->features & NETIF_F_ALL_CSUM))
1374 return;
1376 if (!xennet_set_sg(dev, 1))
1377 xennet_set_tso(dev, 1);
1380 static void network_connect(struct net_device *dev)
1382 struct netfront_info *np = netdev_priv(dev);
1383 int i, requeue_idx;
1384 struct sk_buff *skb;
1385 grant_ref_t ref;
1387 xennet_set_features(dev);
1389 spin_lock_irq(&np->tx_lock);
1390 spin_lock(&np->rx_lock);
1392 /*
1393 * Recovery procedure:
1394 * NB. Freelist index entries are always going to be less than
1395 * PAGE_OFFSET, whereas pointers to skbs will always be equal or
1396 * greater than PAGE_OFFSET: we use this property to distinguish
1397 * them.
1398 */
1400 /* Step 1: Discard all pending TX packet fragments. */
1401 for (requeue_idx = 0, i = 1; i <= NET_TX_RING_SIZE; i++) {
1402 if ((unsigned long)np->tx_skbs[i] < PAGE_OFFSET)
1403 continue;
1405 skb = np->tx_skbs[i];
1406 gnttab_end_foreign_access_ref(
1407 np->grant_tx_ref[i], GNTMAP_readonly);
1408 gnttab_release_grant_reference(
1409 &np->gref_tx_head, np->grant_tx_ref[i]);
1410 np->grant_tx_ref[i] = GRANT_INVALID_REF;
1411 add_id_to_freelist(np->tx_skbs, i);
1412 dev_kfree_skb_irq(skb);
1415 /* Step 2: Rebuild the RX buffer freelist and the RX ring itself. */
1416 for (requeue_idx = 0, i = 0; i < NET_RX_RING_SIZE; i++) {
1417 if (!np->rx_skbs[i])
1418 continue;
1420 skb = np->rx_skbs[requeue_idx] = xennet_get_rx_skb(np, i);
1421 ref = np->grant_rx_ref[requeue_idx] = xennet_get_rx_ref(np, i);
1423 gnttab_grant_foreign_transfer_ref(
1424 ref, np->xbdev->otherend_id,
1425 page_to_pfn(skb_shinfo(skb)->frags->page));
1427 RING_GET_REQUEST(&np->rx, requeue_idx)->gref = ref;
1428 RING_GET_REQUEST(&np->rx, requeue_idx)->id = requeue_idx;
1430 requeue_idx++;
1433 np->rx.req_prod_pvt = requeue_idx;
1435 /*
1436 * Step 3: All public and private state should now be sane. Get
1437 * ready to start sending and receiving packets and give the driver
1438 * domain a kick because we've probably just requeued some
1439 * packets.
1440 */
1441 netif_carrier_on(dev);
1442 notify_remote_via_irq(np->irq);
1443 network_tx_buf_gc(dev);
1444 network_alloc_rx_buffers(dev);
1446 spin_unlock(&np->rx_lock);
1447 spin_unlock_irq(&np->tx_lock);
1450 static void netif_uninit(struct net_device *dev)
1452 struct netfront_info *np = netdev_priv(dev);
1453 gnttab_free_grant_references(np->gref_tx_head);
1454 gnttab_free_grant_references(np->gref_rx_head);
1457 static struct ethtool_ops network_ethtool_ops =
1459 .get_tx_csum = ethtool_op_get_tx_csum,
1460 .set_tx_csum = ethtool_op_set_tx_csum,
1461 .get_sg = ethtool_op_get_sg,
1462 .set_sg = xennet_set_sg,
1463 .get_tso = ethtool_op_get_tso,
1464 .set_tso = xennet_set_tso,
1465 .get_link = ethtool_op_get_link,
1466 };
1468 #ifdef CONFIG_SYSFS
1469 static ssize_t show_rxbuf_min(struct class_device *cd, char *buf)
1471 struct net_device *netdev = container_of(cd, struct net_device,
1472 class_dev);
1473 struct netfront_info *info = netdev_priv(netdev);
1475 return sprintf(buf, "%u\n", info->rx_min_target);
1478 static ssize_t store_rxbuf_min(struct class_device *cd,
1479 const char *buf, size_t len)
1481 struct net_device *netdev = container_of(cd, struct net_device,
1482 class_dev);
1483 struct netfront_info *np = netdev_priv(netdev);
1484 char *endp;
1485 unsigned long target;
1487 if (!capable(CAP_NET_ADMIN))
1488 return -EPERM;
1490 target = simple_strtoul(buf, &endp, 0);
1491 if (endp == buf)
1492 return -EBADMSG;
1494 if (target < RX_MIN_TARGET)
1495 target = RX_MIN_TARGET;
1496 if (target > RX_MAX_TARGET)
1497 target = RX_MAX_TARGET;
1499 spin_lock(&np->rx_lock);
1500 if (target > np->rx_max_target)
1501 np->rx_max_target = target;
1502 np->rx_min_target = target;
1503 if (target > np->rx_target)
1504 np->rx_target = target;
1506 network_alloc_rx_buffers(netdev);
1508 spin_unlock(&np->rx_lock);
1509 return len;
1512 static ssize_t show_rxbuf_max(struct class_device *cd, char *buf)
1514 struct net_device *netdev = container_of(cd, struct net_device,
1515 class_dev);
1516 struct netfront_info *info = netdev_priv(netdev);
1518 return sprintf(buf, "%u\n", info->rx_max_target);
1521 static ssize_t store_rxbuf_max(struct class_device *cd,
1522 const char *buf, size_t len)
1524 struct net_device *netdev = container_of(cd, struct net_device,
1525 class_dev);
1526 struct netfront_info *np = netdev_priv(netdev);
1527 char *endp;
1528 unsigned long target;
1530 if (!capable(CAP_NET_ADMIN))
1531 return -EPERM;
1533 target = simple_strtoul(buf, &endp, 0);
1534 if (endp == buf)
1535 return -EBADMSG;
1537 if (target < RX_MIN_TARGET)
1538 target = RX_MIN_TARGET;
1539 if (target > RX_MAX_TARGET)
1540 target = RX_MAX_TARGET;
1542 spin_lock(&np->rx_lock);
1543 if (target < np->rx_min_target)
1544 np->rx_min_target = target;
1545 np->rx_max_target = target;
1546 if (target < np->rx_target)
1547 np->rx_target = target;
1549 network_alloc_rx_buffers(netdev);
1551 spin_unlock(&np->rx_lock);
1552 return len;
1555 static ssize_t show_rxbuf_cur(struct class_device *cd, char *buf)
1557 struct net_device *netdev = container_of(cd, struct net_device,
1558 class_dev);
1559 struct netfront_info *info = netdev_priv(netdev);
1561 return sprintf(buf, "%u\n", info->rx_target);
1564 static const struct class_device_attribute xennet_attrs[] = {
1565 __ATTR(rxbuf_min, S_IRUGO|S_IWUSR, show_rxbuf_min, store_rxbuf_min),
1566 __ATTR(rxbuf_max, S_IRUGO|S_IWUSR, show_rxbuf_max, store_rxbuf_max),
1567 __ATTR(rxbuf_cur, S_IRUGO, show_rxbuf_cur, NULL),
1568 };
1570 static int xennet_sysfs_addif(struct net_device *netdev)
1572 int i;
1573 int error = 0;
1575 for (i = 0; i < ARRAY_SIZE(xennet_attrs); i++) {
1576 error = class_device_create_file(&netdev->class_dev,
1577 &xennet_attrs[i]);
1578 if (error)
1579 goto fail;
1581 return 0;
1583 fail:
1584 while (--i >= 0)
1585 class_device_remove_file(&netdev->class_dev,
1586 &xennet_attrs[i]);
1587 return error;
1590 static void xennet_sysfs_delif(struct net_device *netdev)
1592 int i;
1594 for (i = 0; i < ARRAY_SIZE(xennet_attrs); i++) {
1595 class_device_remove_file(&netdev->class_dev,
1596 &xennet_attrs[i]);
1600 #endif /* CONFIG_SYSFS */
1603 /*
1604 * Nothing to do here. Virtual interface is point-to-point and the
1605 * physical interface is probably promiscuous anyway.
1606 */
1607 static void network_set_multicast_list(struct net_device *dev)
1611 /** Create a network device.
1612 * @param handle device handle
1613 * @param val return parameter for created device
1614 * @return 0 on success, error code otherwise
1615 */
1616 static struct net_device * __devinit create_netdev(int handle,
1617 struct xenbus_device *dev)
1619 int i, err = 0;
1620 struct net_device *netdev = NULL;
1621 struct netfront_info *np = NULL;
1623 netdev = alloc_etherdev(sizeof(struct netfront_info));
1624 if (!netdev) {
1625 printk(KERN_WARNING "%s> alloc_etherdev failed.\n",
1626 __FUNCTION__);
1627 return ERR_PTR(-ENOMEM);
1630 np = netdev_priv(netdev);
1631 np->handle = handle;
1632 np->xbdev = dev;
1634 netif_carrier_off(netdev);
1636 spin_lock_init(&np->tx_lock);
1637 spin_lock_init(&np->rx_lock);
1639 skb_queue_head_init(&np->rx_batch);
1640 np->rx_target = RX_DFL_MIN_TARGET;
1641 np->rx_min_target = RX_DFL_MIN_TARGET;
1642 np->rx_max_target = RX_MAX_TARGET;
1644 init_timer(&np->rx_refill_timer);
1645 np->rx_refill_timer.data = (unsigned long)netdev;
1646 np->rx_refill_timer.function = rx_refill_timeout;
1648 /* Initialise {tx,rx}_skbs as a free chain containing every entry. */
1649 for (i = 0; i <= NET_TX_RING_SIZE; i++) {
1650 np->tx_skbs[i] = (void *)((unsigned long) i+1);
1651 np->grant_tx_ref[i] = GRANT_INVALID_REF;
1654 for (i = 0; i < NET_RX_RING_SIZE; i++) {
1655 np->rx_skbs[i] = NULL;
1656 np->grant_rx_ref[i] = GRANT_INVALID_REF;
1659 /* A grant for every tx ring slot */
1660 if (gnttab_alloc_grant_references(TX_MAX_TARGET,
1661 &np->gref_tx_head) < 0) {
1662 printk(KERN_ALERT "#### netfront can't alloc tx grant refs\n");
1663 err = -ENOMEM;
1664 goto exit;
1666 /* A grant for every rx ring slot */
1667 if (gnttab_alloc_grant_references(RX_MAX_TARGET,
1668 &np->gref_rx_head) < 0) {
1669 printk(KERN_ALERT "#### netfront can't alloc rx grant refs\n");
1670 err = -ENOMEM;
1671 goto exit_free_tx;
1674 netdev->open = network_open;
1675 netdev->hard_start_xmit = network_start_xmit;
1676 netdev->stop = network_close;
1677 netdev->get_stats = network_get_stats;
1678 netdev->poll = netif_poll;
1679 netdev->set_multicast_list = network_set_multicast_list;
1680 netdev->uninit = netif_uninit;
1681 netdev->change_mtu = xennet_change_mtu;
1682 netdev->weight = 64;
1683 netdev->features = NETIF_F_IP_CSUM;
1685 SET_ETHTOOL_OPS(netdev, &network_ethtool_ops);
1686 SET_MODULE_OWNER(netdev);
1687 SET_NETDEV_DEV(netdev, &dev->dev);
1689 err = register_netdev(netdev);
1690 if (err) {
1691 printk(KERN_WARNING "%s> register_netdev err=%d\n",
1692 __FUNCTION__, err);
1693 goto exit_free_rx;
1696 err = xennet_sysfs_addif(netdev);
1697 if (err) {
1698 /* This can be non-fatal: it only means no tuning parameters */
1699 printk(KERN_WARNING "%s> add sysfs failed err=%d\n",
1700 __FUNCTION__, err);
1703 np->netdev = netdev;
1705 return netdev;
1708 exit_free_rx:
1709 gnttab_free_grant_references(np->gref_rx_head);
1710 exit_free_tx:
1711 gnttab_free_grant_references(np->gref_tx_head);
1712 exit:
1713 free_netdev(netdev);
1714 return ERR_PTR(err);
1717 /*
1718 * We use this notifier to send out a fake ARP reply to reset switches and
1719 * router ARP caches when an IP interface is brought up on a VIF.
1720 */
1721 static int
1722 inetdev_notify(struct notifier_block *this, unsigned long event, void *ptr)
1724 struct in_ifaddr *ifa = (struct in_ifaddr *)ptr;
1725 struct net_device *dev = ifa->ifa_dev->dev;
1727 /* UP event and is it one of our devices? */
1728 if (event == NETDEV_UP && dev->open == network_open)
1729 (void)send_fake_arp(dev);
1731 return NOTIFY_DONE;
1735 /* ** Close down ** */
1738 /**
1739 * Handle the change of state of the backend to Closing. We must delete our
1740 * device-layer structures now, to ensure that writes are flushed through to
1741 * the backend. Once is this done, we can switch to Closed in
1742 * acknowledgement.
1743 */
1744 static void netfront_closing(struct xenbus_device *dev)
1746 struct netfront_info *info = dev->dev.driver_data;
1748 DPRINTK("netfront_closing: %s removed\n", dev->nodename);
1750 close_netdev(info);
1752 xenbus_switch_state(dev, XenbusStateClosed);
1756 static int __devexit netfront_remove(struct xenbus_device *dev)
1758 struct netfront_info *info = dev->dev.driver_data;
1760 DPRINTK("%s\n", dev->nodename);
1762 netif_disconnect_backend(info);
1763 free_netdev(info->netdev);
1765 return 0;
1769 static void close_netdev(struct netfront_info *info)
1771 del_timer_sync(&info->rx_refill_timer);
1773 xennet_sysfs_delif(info->netdev);
1774 unregister_netdev(info->netdev);
1778 static void netif_disconnect_backend(struct netfront_info *info)
1780 /* Stop old i/f to prevent errors whilst we rebuild the state. */
1781 spin_lock_irq(&info->tx_lock);
1782 spin_lock(&info->rx_lock);
1783 netif_carrier_off(info->netdev);
1784 spin_unlock(&info->rx_lock);
1785 spin_unlock_irq(&info->tx_lock);
1787 if (info->irq)
1788 unbind_from_irqhandler(info->irq, info->netdev);
1789 info->evtchn = info->irq = 0;
1791 end_access(info->tx_ring_ref, info->tx.sring);
1792 end_access(info->rx_ring_ref, info->rx.sring);
1793 info->tx_ring_ref = GRANT_INVALID_REF;
1794 info->rx_ring_ref = GRANT_INVALID_REF;
1795 info->tx.sring = NULL;
1796 info->rx.sring = NULL;
1800 static void netif_free(struct netfront_info *info)
1802 close_netdev(info);
1803 netif_disconnect_backend(info);
1804 free_netdev(info->netdev);
1808 static void end_access(int ref, void *page)
1810 if (ref != GRANT_INVALID_REF)
1811 gnttab_end_foreign_access(ref, 0, (unsigned long)page);
1815 /* ** Driver registration ** */
1818 static struct xenbus_device_id netfront_ids[] = {
1819 { "vif" },
1820 { "" }
1821 };
1824 static struct xenbus_driver netfront = {
1825 .name = "vif",
1826 .owner = THIS_MODULE,
1827 .ids = netfront_ids,
1828 .probe = netfront_probe,
1829 .remove = __devexit_p(netfront_remove),
1830 .resume = netfront_resume,
1831 .otherend_changed = backend_changed,
1832 };
1835 static struct notifier_block notifier_inetdev = {
1836 .notifier_call = inetdev_notify,
1837 .next = NULL,
1838 .priority = 0
1839 };
1841 static int __init netif_init(void)
1843 if (!is_running_on_xen())
1844 return -ENODEV;
1846 if (is_initial_xendomain())
1847 return 0;
1849 IPRINTK("Initialising virtual ethernet driver.\n");
1851 (void)register_inetaddr_notifier(&notifier_inetdev);
1853 return xenbus_register_frontend(&netfront);
1855 module_init(netif_init);
1858 static void __exit netif_exit(void)
1860 unregister_inetaddr_notifier(&notifier_inetdev);
1862 return xenbus_unregister_driver(&netfront);
1864 module_exit(netif_exit);
1866 MODULE_LICENSE("Dual BSD/GPL");