ia64/linux-2.6.18-xen.hg

view drivers/xen/netfront/netfront.c @ 803:86e119bc82c5

netfront: Unregister inetdev notifiers on failure
If you attempt to modprobe the pv-on-hvm netfront driver on a machine
not running under Xen (say, bare-metal, or under another hypervisor), the
netfront code correctly returns an ENODEV and fails to load. However, if you
then shutdown that machine, you will oops while tearing down the network.
This is because we forget to unregister the the inetaddr_notifier on failure,
and so the kernel takes a fatal page fault. The attached patch just unregisters
the notifier on failure, and solves the problem for me.

Signed-off-by: Chris Lalancette <clalance@redhat.com>
author Keir Fraser <keir.fraser@citrix.com>
date Mon Mar 02 11:06:52 2009 +0000 (2009-03-02)
parents 8081d19dce89
children ab1d4fbbe4bf
line source
1 /******************************************************************************
2 * Virtual network driver for conversing with remote driver backends.
3 *
4 * Copyright (c) 2002-2005, K A Fraser
5 * Copyright (c) 2005, XenSource Ltd
6 * Copyright (C) 2007 Solarflare Communications, Inc.
7 *
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License version 2
10 * as published by the Free Software Foundation; or, when distributed
11 * separately from the Linux kernel or incorporated into other
12 * software packages, subject to the following license:
13 *
14 * Permission is hereby granted, free of charge, to any person obtaining a copy
15 * of this source file (the "Software"), to deal in the Software without
16 * restriction, including without limitation the rights to use, copy, modify,
17 * merge, publish, distribute, sublicense, and/or sell copies of the Software,
18 * and to permit persons to whom the Software is furnished to do so, subject to
19 * the following conditions:
20 *
21 * The above copyright notice and this permission notice shall be included in
22 * all copies or substantial portions of the Software.
23 *
24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
25 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
26 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
27 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
28 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
29 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
30 * IN THE SOFTWARE.
31 */
33 #include <linux/module.h>
34 #include <linux/version.h>
35 #include <linux/kernel.h>
36 #include <linux/sched.h>
37 #include <linux/slab.h>
38 #include <linux/string.h>
39 #include <linux/errno.h>
40 #include <linux/netdevice.h>
41 #include <linux/inetdevice.h>
42 #include <linux/etherdevice.h>
43 #include <linux/skbuff.h>
44 #include <linux/init.h>
45 #include <linux/bitops.h>
46 #include <linux/ethtool.h>
47 #include <linux/in.h>
48 #include <linux/if_ether.h>
49 #include <linux/io.h>
50 #include <linux/moduleparam.h>
51 #include <net/sock.h>
52 #include <net/pkt_sched.h>
53 #include <net/arp.h>
54 #include <net/route.h>
55 #include <asm/uaccess.h>
56 #include <xen/evtchn.h>
57 #include <xen/xenbus.h>
58 #include <xen/interface/io/netif.h>
59 #include <xen/interface/memory.h>
60 #include <xen/balloon.h>
61 #include <asm/page.h>
62 #include <asm/maddr.h>
63 #include <asm/uaccess.h>
64 #include <xen/interface/grant_table.h>
65 #include <xen/gnttab.h>
67 struct netfront_cb {
68 struct page *page;
69 unsigned offset;
70 };
72 #define NETFRONT_SKB_CB(skb) ((struct netfront_cb *)((skb)->cb))
74 #include "netfront.h"
76 /*
77 * Mutually-exclusive module options to select receive data path:
78 * rx_copy : Packets are copied by network backend into local memory
79 * rx_flip : Page containing packet data is transferred to our ownership
80 * For fully-virtualised guests there is no option - copying must be used.
81 * For paravirtualised guests, flipping is the default.
82 */
83 #ifdef CONFIG_XEN
84 static int MODPARM_rx_copy = 0;
85 module_param_named(rx_copy, MODPARM_rx_copy, bool, 0);
86 MODULE_PARM_DESC(rx_copy, "Copy packets from network card (rather than flip)");
87 static int MODPARM_rx_flip = 0;
88 module_param_named(rx_flip, MODPARM_rx_flip, bool, 0);
89 MODULE_PARM_DESC(rx_flip, "Flip packets from network card (rather than copy)");
90 #else
91 static const int MODPARM_rx_copy = 1;
92 static const int MODPARM_rx_flip = 0;
93 #endif
95 #define RX_COPY_THRESHOLD 256
97 /* If we don't have GSO, fake things up so that we never try to use it. */
98 #if defined(NETIF_F_GSO)
99 #define HAVE_GSO 1
100 #define HAVE_TSO 1 /* TSO is a subset of GSO */
101 #define HAVE_CSUM_OFFLOAD 1
102 static inline void dev_disable_gso_features(struct net_device *dev)
103 {
104 /* Turn off all GSO bits except ROBUST. */
105 dev->features &= (1 << NETIF_F_GSO_SHIFT) - 1;
106 dev->features |= NETIF_F_GSO_ROBUST;
107 }
108 #elif defined(NETIF_F_TSO)
109 #define HAVE_GSO 0
110 #define HAVE_TSO 1
112 /* Some older kernels cannot cope with incorrect checksums,
113 * particularly in netfilter. I'm not sure there is 100% correlation
114 * with the presence of NETIF_F_TSO but it appears to be a good first
115 * approximiation.
116 */
117 #define HAVE_CSUM_OFFLOAD 0
119 #define gso_size tso_size
120 #define gso_segs tso_segs
121 static inline void dev_disable_gso_features(struct net_device *dev)
122 {
123 /* Turn off all TSO bits. */
124 dev->features &= ~NETIF_F_TSO;
125 }
126 static inline int skb_is_gso(const struct sk_buff *skb)
127 {
128 return skb_shinfo(skb)->tso_size;
129 }
130 static inline int skb_gso_ok(struct sk_buff *skb, int features)
131 {
132 return (features & NETIF_F_TSO);
133 }
135 static inline int netif_needs_gso(struct net_device *dev, struct sk_buff *skb)
136 {
137 return skb_is_gso(skb) &&
138 (!skb_gso_ok(skb, dev->features) ||
139 unlikely(skb->ip_summed != CHECKSUM_HW));
140 }
141 #else
142 #define HAVE_GSO 0
143 #define HAVE_TSO 0
144 #define HAVE_CSUM_OFFLOAD 0
145 #define netif_needs_gso(dev, skb) 0
146 #define dev_disable_gso_features(dev) ((void)0)
147 #define ethtool_op_set_tso(dev, data) (-ENOSYS)
148 #endif
150 #define GRANT_INVALID_REF 0
152 struct netfront_rx_info {
153 struct netif_rx_response rx;
154 struct netif_extra_info extras[XEN_NETIF_EXTRA_TYPE_MAX - 1];
155 };
157 /*
158 * Implement our own carrier flag: the network stack's version causes delays
159 * when the carrier is re-enabled (in particular, dev_activate() may not
160 * immediately be called, which can cause packet loss).
161 */
162 #define netfront_carrier_on(netif) ((netif)->carrier = 1)
163 #define netfront_carrier_off(netif) ((netif)->carrier = 0)
164 #define netfront_carrier_ok(netif) ((netif)->carrier)
166 /*
167 * Access macros for acquiring freeing slots in tx_skbs[].
168 */
170 static inline void add_id_to_freelist(struct sk_buff **list, unsigned short id)
171 {
172 list[id] = list[0];
173 list[0] = (void *)(unsigned long)id;
174 }
176 static inline unsigned short get_id_from_freelist(struct sk_buff **list)
177 {
178 unsigned int id = (unsigned int)(unsigned long)list[0];
179 list[0] = list[id];
180 return id;
181 }
183 static inline int xennet_rxidx(RING_IDX idx)
184 {
185 return idx & (NET_RX_RING_SIZE - 1);
186 }
188 static inline struct sk_buff *xennet_get_rx_skb(struct netfront_info *np,
189 RING_IDX ri)
190 {
191 int i = xennet_rxidx(ri);
192 struct sk_buff *skb = np->rx_skbs[i];
193 np->rx_skbs[i] = NULL;
194 return skb;
195 }
197 static inline grant_ref_t xennet_get_rx_ref(struct netfront_info *np,
198 RING_IDX ri)
199 {
200 int i = xennet_rxidx(ri);
201 grant_ref_t ref = np->grant_rx_ref[i];
202 np->grant_rx_ref[i] = GRANT_INVALID_REF;
203 return ref;
204 }
206 #define DPRINTK(fmt, args...) \
207 pr_debug("netfront (%s:%d) " fmt, \
208 __FUNCTION__, __LINE__, ##args)
209 #define IPRINTK(fmt, args...) \
210 printk(KERN_INFO "netfront: " fmt, ##args)
211 #define WPRINTK(fmt, args...) \
212 printk(KERN_WARNING "netfront: " fmt, ##args)
214 static int setup_device(struct xenbus_device *, struct netfront_info *);
215 static struct net_device *create_netdev(struct xenbus_device *);
217 static void end_access(int, void *);
218 static void netif_disconnect_backend(struct netfront_info *);
220 static int network_connect(struct net_device *);
221 static void network_tx_buf_gc(struct net_device *);
222 static void network_alloc_rx_buffers(struct net_device *);
223 static void send_fake_arp(struct net_device *);
225 static irqreturn_t netif_int(int irq, void *dev_id, struct pt_regs *ptregs);
227 #ifdef CONFIG_SYSFS
228 static int xennet_sysfs_addif(struct net_device *netdev);
229 static void xennet_sysfs_delif(struct net_device *netdev);
230 #else /* !CONFIG_SYSFS */
231 #define xennet_sysfs_addif(dev) (0)
232 #define xennet_sysfs_delif(dev) do { } while(0)
233 #endif
235 static inline int xennet_can_sg(struct net_device *dev)
236 {
237 return dev->features & NETIF_F_SG;
238 }
240 /**
241 * Entry point to this code when a new device is created. Allocate the basic
242 * structures and the ring buffers for communication with the backend, and
243 * inform the backend of the appropriate details for those.
244 */
245 static int __devinit netfront_probe(struct xenbus_device *dev,
246 const struct xenbus_device_id *id)
247 {
248 int err;
249 struct net_device *netdev;
250 struct netfront_info *info;
252 netdev = create_netdev(dev);
253 if (IS_ERR(netdev)) {
254 err = PTR_ERR(netdev);
255 xenbus_dev_fatal(dev, err, "creating netdev");
256 return err;
257 }
259 info = netdev_priv(netdev);
260 dev->dev.driver_data = info;
262 err = register_netdev(info->netdev);
263 if (err) {
264 printk(KERN_WARNING "%s: register_netdev err=%d\n",
265 __FUNCTION__, err);
266 goto fail;
267 }
269 err = xennet_sysfs_addif(info->netdev);
270 if (err) {
271 unregister_netdev(info->netdev);
272 printk(KERN_WARNING "%s: add sysfs failed err=%d\n",
273 __FUNCTION__, err);
274 goto fail;
275 }
277 return 0;
279 fail:
280 free_netdev(netdev);
281 dev->dev.driver_data = NULL;
282 return err;
283 }
285 static int __devexit netfront_remove(struct xenbus_device *dev)
286 {
287 struct netfront_info *info = dev->dev.driver_data;
289 DPRINTK("%s\n", dev->nodename);
291 netfront_accelerator_call_remove(info, dev);
293 netif_disconnect_backend(info);
295 del_timer_sync(&info->rx_refill_timer);
297 xennet_sysfs_delif(info->netdev);
299 unregister_netdev(info->netdev);
301 free_netdev(info->netdev);
303 return 0;
304 }
307 static int netfront_suspend(struct xenbus_device *dev)
308 {
309 struct netfront_info *info = dev->dev.driver_data;
310 return netfront_accelerator_suspend(info, dev);
311 }
314 static int netfront_suspend_cancel(struct xenbus_device *dev)
315 {
316 struct netfront_info *info = dev->dev.driver_data;
317 return netfront_accelerator_suspend_cancel(info, dev);
318 }
321 /**
322 * We are reconnecting to the backend, due to a suspend/resume, or a backend
323 * driver restart. We tear down our netif structure and recreate it, but
324 * leave the device-layer structures intact so that this is transparent to the
325 * rest of the kernel.
326 */
327 static int netfront_resume(struct xenbus_device *dev)
328 {
329 struct netfront_info *info = dev->dev.driver_data;
331 DPRINTK("%s\n", dev->nodename);
333 netfront_accelerator_resume(info, dev);
335 netif_disconnect_backend(info);
336 return 0;
337 }
339 static int xen_net_read_mac(struct xenbus_device *dev, u8 mac[])
340 {
341 char *s, *e, *macstr;
342 int i;
344 macstr = s = xenbus_read(XBT_NIL, dev->nodename, "mac", NULL);
345 if (IS_ERR(macstr))
346 return PTR_ERR(macstr);
348 for (i = 0; i < ETH_ALEN; i++) {
349 mac[i] = simple_strtoul(s, &e, 16);
350 if ((s == e) || (*e != ((i == ETH_ALEN-1) ? '\0' : ':'))) {
351 kfree(macstr);
352 return -ENOENT;
353 }
354 s = e+1;
355 }
357 kfree(macstr);
358 return 0;
359 }
361 /* Common code used when first setting up, and when resuming. */
362 static int talk_to_backend(struct xenbus_device *dev,
363 struct netfront_info *info)
364 {
365 const char *message;
366 struct xenbus_transaction xbt;
367 int err;
369 /* Read mac only in the first setup. */
370 if (!is_valid_ether_addr(info->mac)) {
371 err = xen_net_read_mac(dev, info->mac);
372 if (err) {
373 xenbus_dev_fatal(dev, err, "parsing %s/mac",
374 dev->nodename);
375 goto out;
376 }
377 }
379 /* Create shared ring, alloc event channel. */
380 err = setup_device(dev, info);
381 if (err)
382 goto out;
384 /* This will load an accelerator if one is configured when the
385 * watch fires */
386 netfront_accelerator_add_watch(info);
388 again:
389 err = xenbus_transaction_start(&xbt);
390 if (err) {
391 xenbus_dev_fatal(dev, err, "starting transaction");
392 goto destroy_ring;
393 }
395 err = xenbus_printf(xbt, dev->nodename, "tx-ring-ref","%u",
396 info->tx_ring_ref);
397 if (err) {
398 message = "writing tx ring-ref";
399 goto abort_transaction;
400 }
401 err = xenbus_printf(xbt, dev->nodename, "rx-ring-ref","%u",
402 info->rx_ring_ref);
403 if (err) {
404 message = "writing rx ring-ref";
405 goto abort_transaction;
406 }
407 err = xenbus_printf(xbt, dev->nodename,
408 "event-channel", "%u",
409 irq_to_evtchn_port(info->irq));
410 if (err) {
411 message = "writing event-channel";
412 goto abort_transaction;
413 }
415 err = xenbus_printf(xbt, dev->nodename, "request-rx-copy", "%u",
416 info->copying_receiver);
417 if (err) {
418 message = "writing request-rx-copy";
419 goto abort_transaction;
420 }
422 err = xenbus_printf(xbt, dev->nodename, "feature-rx-notify", "%d", 1);
423 if (err) {
424 message = "writing feature-rx-notify";
425 goto abort_transaction;
426 }
428 err = xenbus_printf(xbt, dev->nodename, "feature-no-csum-offload",
429 "%d", !HAVE_CSUM_OFFLOAD);
430 if (err) {
431 message = "writing feature-no-csum-offload";
432 goto abort_transaction;
433 }
435 err = xenbus_printf(xbt, dev->nodename, "feature-sg", "%d", 1);
436 if (err) {
437 message = "writing feature-sg";
438 goto abort_transaction;
439 }
441 err = xenbus_printf(xbt, dev->nodename, "feature-gso-tcpv4", "%d",
442 HAVE_TSO);
443 if (err) {
444 message = "writing feature-gso-tcpv4";
445 goto abort_transaction;
446 }
448 err = xenbus_transaction_end(xbt, 0);
449 if (err) {
450 if (err == -EAGAIN)
451 goto again;
452 xenbus_dev_fatal(dev, err, "completing transaction");
453 goto destroy_ring;
454 }
456 return 0;
458 abort_transaction:
459 xenbus_transaction_end(xbt, 1);
460 xenbus_dev_fatal(dev, err, "%s", message);
461 destroy_ring:
462 netfront_accelerator_call_remove(info, dev);
463 netif_disconnect_backend(info);
464 out:
465 return err;
466 }
468 static int setup_device(struct xenbus_device *dev, struct netfront_info *info)
469 {
470 struct netif_tx_sring *txs;
471 struct netif_rx_sring *rxs;
472 int err;
473 struct net_device *netdev = info->netdev;
475 info->tx_ring_ref = GRANT_INVALID_REF;
476 info->rx_ring_ref = GRANT_INVALID_REF;
477 info->rx.sring = NULL;
478 info->tx.sring = NULL;
479 info->irq = 0;
481 txs = (struct netif_tx_sring *)get_zeroed_page(GFP_NOIO | __GFP_HIGH);
482 if (!txs) {
483 err = -ENOMEM;
484 xenbus_dev_fatal(dev, err, "allocating tx ring page");
485 goto fail;
486 }
487 SHARED_RING_INIT(txs);
488 FRONT_RING_INIT(&info->tx, txs, PAGE_SIZE);
490 err = xenbus_grant_ring(dev, virt_to_mfn(txs));
491 if (err < 0) {
492 free_page((unsigned long)txs);
493 goto fail;
494 }
495 info->tx_ring_ref = err;
497 rxs = (struct netif_rx_sring *)get_zeroed_page(GFP_NOIO | __GFP_HIGH);
498 if (!rxs) {
499 err = -ENOMEM;
500 xenbus_dev_fatal(dev, err, "allocating rx ring page");
501 goto fail;
502 }
503 SHARED_RING_INIT(rxs);
504 FRONT_RING_INIT(&info->rx, rxs, PAGE_SIZE);
506 err = xenbus_grant_ring(dev, virt_to_mfn(rxs));
507 if (err < 0) {
508 free_page((unsigned long)rxs);
509 goto fail;
510 }
511 info->rx_ring_ref = err;
513 memcpy(netdev->dev_addr, info->mac, ETH_ALEN);
515 err = bind_listening_port_to_irqhandler(
516 dev->otherend_id, netif_int, SA_SAMPLE_RANDOM, netdev->name,
517 netdev);
518 if (err < 0)
519 goto fail;
520 info->irq = err;
522 return 0;
524 fail:
525 return err;
526 }
528 /**
529 * Callback received when the backend's state changes.
530 */
531 static void backend_changed(struct xenbus_device *dev,
532 enum xenbus_state backend_state)
533 {
534 struct netfront_info *np = dev->dev.driver_data;
535 struct net_device *netdev = np->netdev;
537 DPRINTK("%s\n", xenbus_strstate(backend_state));
539 switch (backend_state) {
540 case XenbusStateInitialising:
541 case XenbusStateInitialised:
542 case XenbusStateConnected:
543 case XenbusStateReconfiguring:
544 case XenbusStateReconfigured:
545 case XenbusStateUnknown:
546 case XenbusStateClosed:
547 break;
549 case XenbusStateInitWait:
550 if (dev->state != XenbusStateInitialising)
551 break;
552 if (network_connect(netdev) != 0)
553 break;
554 xenbus_switch_state(dev, XenbusStateConnected);
555 send_fake_arp(netdev);
556 break;
558 case XenbusStateClosing:
559 xenbus_frontend_closed(dev);
560 break;
561 }
562 }
564 /** Send a packet on a net device to encourage switches to learn the
565 * MAC. We send a fake ARP request.
566 *
567 * @param dev device
568 * @return 0 on success, error code otherwise
569 */
570 static void send_fake_arp(struct net_device *dev)
571 {
572 #ifdef CONFIG_INET
573 struct sk_buff *skb;
574 u32 src_ip, dst_ip;
576 dst_ip = INADDR_BROADCAST;
577 src_ip = inet_select_addr(dev, dst_ip, RT_SCOPE_LINK);
579 /* No IP? Then nothing to do. */
580 if (src_ip == 0)
581 return;
583 skb = arp_create(ARPOP_REPLY, ETH_P_ARP,
584 dst_ip, dev, src_ip,
585 /*dst_hw*/ NULL, /*src_hw*/ NULL,
586 /*target_hw*/ dev->dev_addr);
587 if (skb == NULL)
588 return;
590 dev_queue_xmit(skb);
591 #endif
592 }
594 static inline int netfront_tx_slot_available(struct netfront_info *np)
595 {
596 return ((np->tx.req_prod_pvt - np->tx.rsp_cons) <
597 (TX_MAX_TARGET - MAX_SKB_FRAGS - 2));
598 }
601 static inline void network_maybe_wake_tx(struct net_device *dev)
602 {
603 struct netfront_info *np = netdev_priv(dev);
605 if (unlikely(netif_queue_stopped(dev)) &&
606 netfront_tx_slot_available(np) &&
607 likely(netif_running(dev)) &&
608 netfront_check_accelerator_queue_ready(dev, np))
609 netif_wake_queue(dev);
610 }
613 int netfront_check_queue_ready(struct net_device *dev)
614 {
615 struct netfront_info *np = netdev_priv(dev);
617 return unlikely(netif_queue_stopped(dev)) &&
618 netfront_tx_slot_available(np) &&
619 likely(netif_running(dev));
620 }
621 EXPORT_SYMBOL(netfront_check_queue_ready);
624 static int network_open(struct net_device *dev)
625 {
626 struct netfront_info *np = netdev_priv(dev);
628 memset(&np->stats, 0, sizeof(np->stats));
630 spin_lock_bh(&np->rx_lock);
631 if (netfront_carrier_ok(np)) {
632 network_alloc_rx_buffers(dev);
633 np->rx.sring->rsp_event = np->rx.rsp_cons + 1;
634 if (RING_HAS_UNCONSUMED_RESPONSES(&np->rx)){
635 netfront_accelerator_call_stop_napi_irq(np, dev);
637 netif_rx_schedule(dev);
638 }
639 }
640 spin_unlock_bh(&np->rx_lock);
642 network_maybe_wake_tx(dev);
644 return 0;
645 }
647 static void network_tx_buf_gc(struct net_device *dev)
648 {
649 RING_IDX cons, prod;
650 unsigned short id;
651 struct netfront_info *np = netdev_priv(dev);
652 struct sk_buff *skb;
654 BUG_ON(!netfront_carrier_ok(np));
656 do {
657 prod = np->tx.sring->rsp_prod;
658 rmb(); /* Ensure we see responses up to 'rp'. */
660 for (cons = np->tx.rsp_cons; cons != prod; cons++) {
661 struct netif_tx_response *txrsp;
663 txrsp = RING_GET_RESPONSE(&np->tx, cons);
664 if (txrsp->status == NETIF_RSP_NULL)
665 continue;
667 id = txrsp->id;
668 skb = np->tx_skbs[id];
669 if (unlikely(gnttab_query_foreign_access(
670 np->grant_tx_ref[id]) != 0)) {
671 printk(KERN_ALERT "network_tx_buf_gc: warning "
672 "-- grant still in use by backend "
673 "domain.\n");
674 BUG();
675 }
676 gnttab_end_foreign_access_ref(np->grant_tx_ref[id]);
677 gnttab_release_grant_reference(
678 &np->gref_tx_head, np->grant_tx_ref[id]);
679 np->grant_tx_ref[id] = GRANT_INVALID_REF;
680 add_id_to_freelist(np->tx_skbs, id);
681 dev_kfree_skb_irq(skb);
682 }
684 np->tx.rsp_cons = prod;
686 /*
687 * Set a new event, then check for race with update of tx_cons.
688 * Note that it is essential to schedule a callback, no matter
689 * how few buffers are pending. Even if there is space in the
690 * transmit ring, higher layers may be blocked because too much
691 * data is outstanding: in such cases notification from Xen is
692 * likely to be the only kick that we'll get.
693 */
694 np->tx.sring->rsp_event =
695 prod + ((np->tx.sring->req_prod - prod) >> 1) + 1;
696 mb();
697 } while ((cons == prod) && (prod != np->tx.sring->rsp_prod));
699 network_maybe_wake_tx(dev);
700 }
702 static void rx_refill_timeout(unsigned long data)
703 {
704 struct net_device *dev = (struct net_device *)data;
705 struct netfront_info *np = netdev_priv(dev);
707 netfront_accelerator_call_stop_napi_irq(np, dev);
709 netif_rx_schedule(dev);
710 }
712 static void network_alloc_rx_buffers(struct net_device *dev)
713 {
714 unsigned short id;
715 struct netfront_info *np = netdev_priv(dev);
716 struct sk_buff *skb;
717 struct page *page;
718 int i, batch_target, notify;
719 RING_IDX req_prod = np->rx.req_prod_pvt;
720 struct xen_memory_reservation reservation;
721 grant_ref_t ref;
722 unsigned long pfn;
723 void *vaddr;
724 int nr_flips;
725 netif_rx_request_t *req;
727 if (unlikely(!netfront_carrier_ok(np)))
728 return;
730 /*
731 * Allocate skbuffs greedily, even though we batch updates to the
732 * receive ring. This creates a less bursty demand on the memory
733 * allocator, so should reduce the chance of failed allocation requests
734 * both for ourself and for other kernel subsystems.
735 */
736 batch_target = np->rx_target - (req_prod - np->rx.rsp_cons);
737 for (i = skb_queue_len(&np->rx_batch); i < batch_target; i++) {
738 /*
739 * Allocate an skb and a page. Do not use __dev_alloc_skb as
740 * that will allocate page-sized buffers which is not
741 * necessary here.
742 * 16 bytes added as necessary headroom for netif_receive_skb.
743 */
744 skb = alloc_skb(RX_COPY_THRESHOLD + 16 + NET_IP_ALIGN,
745 GFP_ATOMIC | __GFP_NOWARN);
746 if (unlikely(!skb))
747 goto no_skb;
749 page = alloc_page(GFP_ATOMIC | __GFP_NOWARN);
750 if (!page) {
751 kfree_skb(skb);
752 no_skb:
753 /* Any skbuffs queued for refill? Force them out. */
754 if (i != 0)
755 goto refill;
756 /* Could not allocate any skbuffs. Try again later. */
757 mod_timer(&np->rx_refill_timer,
758 jiffies + (HZ/10));
759 break;
760 }
762 skb_reserve(skb, 16 + NET_IP_ALIGN); /* mimic dev_alloc_skb() */
763 skb_shinfo(skb)->frags[0].page = page;
764 skb_shinfo(skb)->nr_frags = 1;
765 __skb_queue_tail(&np->rx_batch, skb);
766 }
768 /* Is the batch large enough to be worthwhile? */
769 if (i < (np->rx_target/2)) {
770 if (req_prod > np->rx.sring->req_prod)
771 goto push;
772 return;
773 }
775 /* Adjust our fill target if we risked running out of buffers. */
776 if (((req_prod - np->rx.sring->rsp_prod) < (np->rx_target / 4)) &&
777 ((np->rx_target *= 2) > np->rx_max_target))
778 np->rx_target = np->rx_max_target;
780 refill:
781 for (nr_flips = i = 0; ; i++) {
782 if ((skb = __skb_dequeue(&np->rx_batch)) == NULL)
783 break;
785 skb->dev = dev;
787 id = xennet_rxidx(req_prod + i);
789 BUG_ON(np->rx_skbs[id]);
790 np->rx_skbs[id] = skb;
792 ref = gnttab_claim_grant_reference(&np->gref_rx_head);
793 BUG_ON((signed short)ref < 0);
794 np->grant_rx_ref[id] = ref;
796 pfn = page_to_pfn(skb_shinfo(skb)->frags[0].page);
797 vaddr = page_address(skb_shinfo(skb)->frags[0].page);
799 req = RING_GET_REQUEST(&np->rx, req_prod + i);
800 if (!np->copying_receiver) {
801 gnttab_grant_foreign_transfer_ref(ref,
802 np->xbdev->otherend_id,
803 pfn);
804 np->rx_pfn_array[nr_flips] = pfn_to_mfn(pfn);
805 if (!xen_feature(XENFEAT_auto_translated_physmap)) {
806 /* Remove this page before passing
807 * back to Xen. */
808 set_phys_to_machine(pfn, INVALID_P2M_ENTRY);
809 MULTI_update_va_mapping(np->rx_mcl+i,
810 (unsigned long)vaddr,
811 __pte(0), 0);
812 }
813 nr_flips++;
814 } else {
815 gnttab_grant_foreign_access_ref(ref,
816 np->xbdev->otherend_id,
817 pfn_to_mfn(pfn),
818 0);
819 }
821 req->id = id;
822 req->gref = ref;
823 }
825 if ( nr_flips != 0 ) {
826 /* Tell the ballon driver what is going on. */
827 balloon_update_driver_allowance(i);
829 set_xen_guest_handle(reservation.extent_start,
830 np->rx_pfn_array);
831 reservation.nr_extents = nr_flips;
832 reservation.extent_order = 0;
833 reservation.address_bits = 0;
834 reservation.domid = DOMID_SELF;
836 if (!xen_feature(XENFEAT_auto_translated_physmap)) {
837 /* After all PTEs have been zapped, flush the TLB. */
838 np->rx_mcl[i-1].args[MULTI_UVMFLAGS_INDEX] =
839 UVMF_TLB_FLUSH|UVMF_ALL;
841 /* Give away a batch of pages. */
842 np->rx_mcl[i].op = __HYPERVISOR_memory_op;
843 np->rx_mcl[i].args[0] = XENMEM_decrease_reservation;
844 np->rx_mcl[i].args[1] = (unsigned long)&reservation;
846 /* Zap PTEs and give away pages in one big
847 * multicall. */
848 if (unlikely(HYPERVISOR_multicall(np->rx_mcl, i+1)))
849 BUG();
851 /* Check return status of HYPERVISOR_memory_op(). */
852 if (unlikely(np->rx_mcl[i].result != i))
853 panic("Unable to reduce memory reservation\n");
854 while (nr_flips--)
855 BUG_ON(np->rx_mcl[nr_flips].result);
856 } else {
857 if (HYPERVISOR_memory_op(XENMEM_decrease_reservation,
858 &reservation) != i)
859 panic("Unable to reduce memory reservation\n");
860 }
861 } else {
862 wmb();
863 }
865 /* Above is a suitable barrier to ensure backend will see requests. */
866 np->rx.req_prod_pvt = req_prod + i;
867 push:
868 RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&np->rx, notify);
869 if (notify)
870 notify_remote_via_irq(np->irq);
871 }
873 static void xennet_make_frags(struct sk_buff *skb, struct net_device *dev,
874 struct netif_tx_request *tx)
875 {
876 struct netfront_info *np = netdev_priv(dev);
877 char *data = skb->data;
878 unsigned long mfn;
879 RING_IDX prod = np->tx.req_prod_pvt;
880 int frags = skb_shinfo(skb)->nr_frags;
881 unsigned int offset = offset_in_page(data);
882 unsigned int len = skb_headlen(skb);
883 unsigned int id;
884 grant_ref_t ref;
885 int i;
887 while (len > PAGE_SIZE - offset) {
888 tx->size = PAGE_SIZE - offset;
889 tx->flags |= NETTXF_more_data;
890 len -= tx->size;
891 data += tx->size;
892 offset = 0;
894 id = get_id_from_freelist(np->tx_skbs);
895 np->tx_skbs[id] = skb_get(skb);
896 tx = RING_GET_REQUEST(&np->tx, prod++);
897 tx->id = id;
898 ref = gnttab_claim_grant_reference(&np->gref_tx_head);
899 BUG_ON((signed short)ref < 0);
901 mfn = virt_to_mfn(data);
902 gnttab_grant_foreign_access_ref(ref, np->xbdev->otherend_id,
903 mfn, GTF_readonly);
905 tx->gref = np->grant_tx_ref[id] = ref;
906 tx->offset = offset;
907 tx->size = len;
908 tx->flags = 0;
909 }
911 for (i = 0; i < frags; i++) {
912 skb_frag_t *frag = skb_shinfo(skb)->frags + i;
914 tx->flags |= NETTXF_more_data;
916 id = get_id_from_freelist(np->tx_skbs);
917 np->tx_skbs[id] = skb_get(skb);
918 tx = RING_GET_REQUEST(&np->tx, prod++);
919 tx->id = id;
920 ref = gnttab_claim_grant_reference(&np->gref_tx_head);
921 BUG_ON((signed short)ref < 0);
923 mfn = pfn_to_mfn(page_to_pfn(frag->page));
924 gnttab_grant_foreign_access_ref(ref, np->xbdev->otherend_id,
925 mfn, GTF_readonly);
927 tx->gref = np->grant_tx_ref[id] = ref;
928 tx->offset = frag->page_offset;
929 tx->size = frag->size;
930 tx->flags = 0;
931 }
933 np->tx.req_prod_pvt = prod;
934 }
936 static int network_start_xmit(struct sk_buff *skb, struct net_device *dev)
937 {
938 unsigned short id;
939 struct netfront_info *np = netdev_priv(dev);
940 struct netif_tx_request *tx;
941 struct netif_extra_info *extra;
942 char *data = skb->data;
943 RING_IDX i;
944 grant_ref_t ref;
945 unsigned long mfn;
946 int notify;
947 int frags = skb_shinfo(skb)->nr_frags;
948 unsigned int offset = offset_in_page(data);
949 unsigned int len = skb_headlen(skb);
951 /* Check the fast path, if hooks are available */
952 if (np->accel_vif_state.hooks &&
953 np->accel_vif_state.hooks->start_xmit(skb, dev)) {
954 /* Fast path has sent this packet */
955 return 0;
956 }
958 frags += (offset + len + PAGE_SIZE - 1) / PAGE_SIZE;
959 if (unlikely(frags > MAX_SKB_FRAGS + 1)) {
960 printk(KERN_ALERT "xennet: skb rides the rocket: %d frags\n",
961 frags);
962 dump_stack();
963 goto drop;
964 }
966 spin_lock_irq(&np->tx_lock);
968 if (unlikely(!netfront_carrier_ok(np) ||
969 (frags > 1 && !xennet_can_sg(dev)) ||
970 netif_needs_gso(dev, skb))) {
971 spin_unlock_irq(&np->tx_lock);
972 goto drop;
973 }
975 i = np->tx.req_prod_pvt;
977 id = get_id_from_freelist(np->tx_skbs);
978 np->tx_skbs[id] = skb;
980 tx = RING_GET_REQUEST(&np->tx, i);
982 tx->id = id;
983 ref = gnttab_claim_grant_reference(&np->gref_tx_head);
984 BUG_ON((signed short)ref < 0);
985 mfn = virt_to_mfn(data);
986 gnttab_grant_foreign_access_ref(
987 ref, np->xbdev->otherend_id, mfn, GTF_readonly);
988 tx->gref = np->grant_tx_ref[id] = ref;
989 tx->offset = offset;
990 tx->size = len;
992 tx->flags = 0;
993 extra = NULL;
995 if (skb->ip_summed == CHECKSUM_HW) /* local packet? */
996 tx->flags |= NETTXF_csum_blank | NETTXF_data_validated;
997 #ifdef CONFIG_XEN
998 if (skb->proto_data_valid) /* remote but checksummed? */
999 tx->flags |= NETTXF_data_validated;
1000 #endif
1002 #if HAVE_TSO
1003 if (skb_shinfo(skb)->gso_size) {
1004 struct netif_extra_info *gso = (struct netif_extra_info *)
1005 RING_GET_REQUEST(&np->tx, ++i);
1007 if (extra)
1008 extra->flags |= XEN_NETIF_EXTRA_FLAG_MORE;
1009 else
1010 tx->flags |= NETTXF_extra_info;
1012 gso->u.gso.size = skb_shinfo(skb)->gso_size;
1013 gso->u.gso.type = XEN_NETIF_GSO_TYPE_TCPV4;
1014 gso->u.gso.pad = 0;
1015 gso->u.gso.features = 0;
1017 gso->type = XEN_NETIF_EXTRA_TYPE_GSO;
1018 gso->flags = 0;
1019 extra = gso;
1021 #endif
1023 np->tx.req_prod_pvt = i + 1;
1025 xennet_make_frags(skb, dev, tx);
1026 tx->size = skb->len;
1028 RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&np->tx, notify);
1029 if (notify)
1030 notify_remote_via_irq(np->irq);
1032 np->stats.tx_bytes += skb->len;
1033 np->stats.tx_packets++;
1034 dev->trans_start = jiffies;
1036 /* Note: It is not safe to access skb after network_tx_buf_gc()! */
1037 network_tx_buf_gc(dev);
1039 if (!netfront_tx_slot_available(np))
1040 netif_stop_queue(dev);
1042 spin_unlock_irq(&np->tx_lock);
1044 return 0;
1046 drop:
1047 np->stats.tx_dropped++;
1048 dev_kfree_skb(skb);
1049 return 0;
1052 static irqreturn_t netif_int(int irq, void *dev_id, struct pt_regs *ptregs)
1054 struct net_device *dev = dev_id;
1055 struct netfront_info *np = netdev_priv(dev);
1056 unsigned long flags;
1058 spin_lock_irqsave(&np->tx_lock, flags);
1060 if (likely(netfront_carrier_ok(np))) {
1061 network_tx_buf_gc(dev);
1062 /* Under tx_lock: protects access to rx shared-ring indexes. */
1063 if (RING_HAS_UNCONSUMED_RESPONSES(&np->rx)) {
1064 netfront_accelerator_call_stop_napi_irq(np, dev);
1066 netif_rx_schedule(dev);
1067 dev->last_rx = jiffies;
1071 spin_unlock_irqrestore(&np->tx_lock, flags);
1073 return IRQ_HANDLED;
1076 static void xennet_move_rx_slot(struct netfront_info *np, struct sk_buff *skb,
1077 grant_ref_t ref)
1079 int new = xennet_rxidx(np->rx.req_prod_pvt);
1081 BUG_ON(np->rx_skbs[new]);
1082 np->rx_skbs[new] = skb;
1083 np->grant_rx_ref[new] = ref;
1084 RING_GET_REQUEST(&np->rx, np->rx.req_prod_pvt)->id = new;
1085 RING_GET_REQUEST(&np->rx, np->rx.req_prod_pvt)->gref = ref;
1086 np->rx.req_prod_pvt++;
1089 int xennet_get_extras(struct netfront_info *np,
1090 struct netif_extra_info *extras, RING_IDX rp)
1093 struct netif_extra_info *extra;
1094 RING_IDX cons = np->rx.rsp_cons;
1095 int err = 0;
1097 do {
1098 struct sk_buff *skb;
1099 grant_ref_t ref;
1101 if (unlikely(cons + 1 == rp)) {
1102 if (net_ratelimit())
1103 WPRINTK("Missing extra info\n");
1104 err = -EBADR;
1105 break;
1108 extra = (struct netif_extra_info *)
1109 RING_GET_RESPONSE(&np->rx, ++cons);
1111 if (unlikely(!extra->type ||
1112 extra->type >= XEN_NETIF_EXTRA_TYPE_MAX)) {
1113 if (net_ratelimit())
1114 WPRINTK("Invalid extra type: %d\n",
1115 extra->type);
1116 err = -EINVAL;
1117 } else {
1118 memcpy(&extras[extra->type - 1], extra,
1119 sizeof(*extra));
1122 skb = xennet_get_rx_skb(np, cons);
1123 ref = xennet_get_rx_ref(np, cons);
1124 xennet_move_rx_slot(np, skb, ref);
1125 } while (extra->flags & XEN_NETIF_EXTRA_FLAG_MORE);
1127 np->rx.rsp_cons = cons;
1128 return err;
1131 static int xennet_get_responses(struct netfront_info *np,
1132 struct netfront_rx_info *rinfo, RING_IDX rp,
1133 struct sk_buff_head *list,
1134 int *pages_flipped_p)
1136 int pages_flipped = *pages_flipped_p;
1137 struct mmu_update *mmu;
1138 struct multicall_entry *mcl;
1139 struct netif_rx_response *rx = &rinfo->rx;
1140 struct netif_extra_info *extras = rinfo->extras;
1141 RING_IDX cons = np->rx.rsp_cons;
1142 struct sk_buff *skb = xennet_get_rx_skb(np, cons);
1143 grant_ref_t ref = xennet_get_rx_ref(np, cons);
1144 int max = MAX_SKB_FRAGS + (rx->status <= RX_COPY_THRESHOLD);
1145 int frags = 1;
1146 int err = 0;
1147 unsigned long ret;
1149 if (rx->flags & NETRXF_extra_info) {
1150 err = xennet_get_extras(np, extras, rp);
1151 cons = np->rx.rsp_cons;
1154 for (;;) {
1155 unsigned long mfn;
1157 if (unlikely(rx->status < 0 ||
1158 rx->offset + rx->status > PAGE_SIZE)) {
1159 if (net_ratelimit())
1160 WPRINTK("rx->offset: %x, size: %u\n",
1161 rx->offset, rx->status);
1162 xennet_move_rx_slot(np, skb, ref);
1163 err = -EINVAL;
1164 goto next;
1167 /*
1168 * This definitely indicates a bug, either in this driver or in
1169 * the backend driver. In future this should flag the bad
1170 * situation to the system controller to reboot the backed.
1171 */
1172 if (ref == GRANT_INVALID_REF) {
1173 if (net_ratelimit())
1174 WPRINTK("Bad rx response id %d.\n", rx->id);
1175 err = -EINVAL;
1176 goto next;
1179 if (!np->copying_receiver) {
1180 /* Memory pressure, insufficient buffer
1181 * headroom, ... */
1182 if (!(mfn = gnttab_end_foreign_transfer_ref(ref))) {
1183 if (net_ratelimit())
1184 WPRINTK("Unfulfilled rx req "
1185 "(id=%d, st=%d).\n",
1186 rx->id, rx->status);
1187 xennet_move_rx_slot(np, skb, ref);
1188 err = -ENOMEM;
1189 goto next;
1192 if (!xen_feature(XENFEAT_auto_translated_physmap)) {
1193 /* Remap the page. */
1194 struct page *page =
1195 skb_shinfo(skb)->frags[0].page;
1196 unsigned long pfn = page_to_pfn(page);
1197 void *vaddr = page_address(page);
1199 mcl = np->rx_mcl + pages_flipped;
1200 mmu = np->rx_mmu + pages_flipped;
1202 MULTI_update_va_mapping(mcl,
1203 (unsigned long)vaddr,
1204 pfn_pte_ma(mfn,
1205 PAGE_KERNEL),
1206 0);
1207 mmu->ptr = ((maddr_t)mfn << PAGE_SHIFT)
1208 | MMU_MACHPHYS_UPDATE;
1209 mmu->val = pfn;
1211 set_phys_to_machine(pfn, mfn);
1213 pages_flipped++;
1214 } else {
1215 ret = gnttab_end_foreign_access_ref(ref);
1216 BUG_ON(!ret);
1219 gnttab_release_grant_reference(&np->gref_rx_head, ref);
1221 __skb_queue_tail(list, skb);
1223 next:
1224 if (!(rx->flags & NETRXF_more_data))
1225 break;
1227 if (cons + frags == rp) {
1228 if (net_ratelimit())
1229 WPRINTK("Need more frags\n");
1230 err = -ENOENT;
1231 break;
1234 rx = RING_GET_RESPONSE(&np->rx, cons + frags);
1235 skb = xennet_get_rx_skb(np, cons + frags);
1236 ref = xennet_get_rx_ref(np, cons + frags);
1237 frags++;
1240 if (unlikely(frags > max)) {
1241 if (net_ratelimit())
1242 WPRINTK("Too many frags\n");
1243 err = -E2BIG;
1246 if (unlikely(err))
1247 np->rx.rsp_cons = cons + frags;
1249 *pages_flipped_p = pages_flipped;
1251 return err;
1254 static RING_IDX xennet_fill_frags(struct netfront_info *np,
1255 struct sk_buff *skb,
1256 struct sk_buff_head *list)
1258 struct skb_shared_info *shinfo = skb_shinfo(skb);
1259 int nr_frags = shinfo->nr_frags;
1260 RING_IDX cons = np->rx.rsp_cons;
1261 skb_frag_t *frag = shinfo->frags + nr_frags;
1262 struct sk_buff *nskb;
1264 while ((nskb = __skb_dequeue(list))) {
1265 struct netif_rx_response *rx =
1266 RING_GET_RESPONSE(&np->rx, ++cons);
1268 frag->page = skb_shinfo(nskb)->frags[0].page;
1269 frag->page_offset = rx->offset;
1270 frag->size = rx->status;
1272 skb->data_len += rx->status;
1274 skb_shinfo(nskb)->nr_frags = 0;
1275 kfree_skb(nskb);
1277 frag++;
1278 nr_frags++;
1281 shinfo->nr_frags = nr_frags;
1282 return cons;
1285 static int xennet_set_skb_gso(struct sk_buff *skb,
1286 struct netif_extra_info *gso)
1288 if (!gso->u.gso.size) {
1289 if (net_ratelimit())
1290 WPRINTK("GSO size must not be zero.\n");
1291 return -EINVAL;
1294 /* Currently only TCPv4 S.O. is supported. */
1295 if (gso->u.gso.type != XEN_NETIF_GSO_TYPE_TCPV4) {
1296 if (net_ratelimit())
1297 WPRINTK("Bad GSO type %d.\n", gso->u.gso.type);
1298 return -EINVAL;
1301 #if HAVE_TSO
1302 skb_shinfo(skb)->gso_size = gso->u.gso.size;
1303 #if HAVE_GSO
1304 skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4;
1306 /* Header must be checked, and gso_segs computed. */
1307 skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY;
1308 #endif
1309 skb_shinfo(skb)->gso_segs = 0;
1311 return 0;
1312 #else
1313 if (net_ratelimit())
1314 WPRINTK("GSO unsupported by this kernel.\n");
1315 return -EINVAL;
1316 #endif
1319 static int netif_poll(struct net_device *dev, int *pbudget)
1321 struct netfront_info *np = netdev_priv(dev);
1322 struct sk_buff *skb;
1323 struct netfront_rx_info rinfo;
1324 struct netif_rx_response *rx = &rinfo.rx;
1325 struct netif_extra_info *extras = rinfo.extras;
1326 RING_IDX i, rp;
1327 struct multicall_entry *mcl;
1328 int work_done, budget, more_to_do = 1, accel_more_to_do = 1;
1329 struct sk_buff_head rxq;
1330 struct sk_buff_head errq;
1331 struct sk_buff_head tmpq;
1332 unsigned long flags;
1333 unsigned int len;
1334 int pages_flipped = 0;
1335 int err;
1337 spin_lock(&np->rx_lock); /* no need for spin_lock_bh() in ->poll() */
1339 if (unlikely(!netfront_carrier_ok(np))) {
1340 spin_unlock(&np->rx_lock);
1341 return 0;
1344 skb_queue_head_init(&rxq);
1345 skb_queue_head_init(&errq);
1346 skb_queue_head_init(&tmpq);
1348 if ((budget = *pbudget) > dev->quota)
1349 budget = dev->quota;
1350 rp = np->rx.sring->rsp_prod;
1351 rmb(); /* Ensure we see queued responses up to 'rp'. */
1353 i = np->rx.rsp_cons;
1354 work_done = 0;
1355 while ((i != rp) && (work_done < budget)) {
1356 memcpy(rx, RING_GET_RESPONSE(&np->rx, i), sizeof(*rx));
1357 memset(extras, 0, sizeof(rinfo.extras));
1359 err = xennet_get_responses(np, &rinfo, rp, &tmpq,
1360 &pages_flipped);
1362 if (unlikely(err)) {
1363 err:
1364 while ((skb = __skb_dequeue(&tmpq)))
1365 __skb_queue_tail(&errq, skb);
1366 np->stats.rx_errors++;
1367 i = np->rx.rsp_cons;
1368 continue;
1371 skb = __skb_dequeue(&tmpq);
1373 if (extras[XEN_NETIF_EXTRA_TYPE_GSO - 1].type) {
1374 struct netif_extra_info *gso;
1375 gso = &extras[XEN_NETIF_EXTRA_TYPE_GSO - 1];
1377 if (unlikely(xennet_set_skb_gso(skb, gso))) {
1378 __skb_queue_head(&tmpq, skb);
1379 np->rx.rsp_cons += skb_queue_len(&tmpq);
1380 goto err;
1384 NETFRONT_SKB_CB(skb)->page = skb_shinfo(skb)->frags[0].page;
1385 NETFRONT_SKB_CB(skb)->offset = rx->offset;
1387 len = rx->status;
1388 if (len > RX_COPY_THRESHOLD)
1389 len = RX_COPY_THRESHOLD;
1390 skb_put(skb, len);
1392 if (rx->status > len) {
1393 skb_shinfo(skb)->frags[0].page_offset =
1394 rx->offset + len;
1395 skb_shinfo(skb)->frags[0].size = rx->status - len;
1396 skb->data_len = rx->status - len;
1397 } else {
1398 skb_shinfo(skb)->frags[0].page = NULL;
1399 skb_shinfo(skb)->nr_frags = 0;
1402 i = xennet_fill_frags(np, skb, &tmpq);
1404 /*
1405 * Truesize must approximates the size of true data plus
1406 * any supervisor overheads. Adding hypervisor overheads
1407 * has been shown to significantly reduce achievable
1408 * bandwidth with the default receive buffer size. It is
1409 * therefore not wise to account for it here.
1411 * After alloc_skb(RX_COPY_THRESHOLD), truesize is set to
1412 * RX_COPY_THRESHOLD + the supervisor overheads. Here, we
1413 * add the size of the data pulled in xennet_fill_frags().
1415 * We also adjust for any unused space in the main data
1416 * area by subtracting (RX_COPY_THRESHOLD - len). This is
1417 * especially important with drivers which split incoming
1418 * packets into header and data, using only 66 bytes of
1419 * the main data area (see the e1000 driver for example.)
1420 * On such systems, without this last adjustement, our
1421 * achievable receive throughout using the standard receive
1422 * buffer size was cut by 25%(!!!).
1423 */
1424 skb->truesize += skb->data_len - (RX_COPY_THRESHOLD - len);
1425 skb->len += skb->data_len;
1427 /*
1428 * Old backends do not assert data_validated but we
1429 * can infer it from csum_blank so test both flags.
1430 */
1431 if (rx->flags & (NETRXF_data_validated|NETRXF_csum_blank))
1432 skb->ip_summed = CHECKSUM_UNNECESSARY;
1433 else
1434 skb->ip_summed = CHECKSUM_NONE;
1435 #ifdef CONFIG_XEN
1436 skb->proto_data_valid = (skb->ip_summed != CHECKSUM_NONE);
1437 skb->proto_csum_blank = !!(rx->flags & NETRXF_csum_blank);
1438 #endif
1439 np->stats.rx_packets++;
1440 np->stats.rx_bytes += skb->len;
1442 __skb_queue_tail(&rxq, skb);
1444 np->rx.rsp_cons = ++i;
1445 work_done++;
1448 if (pages_flipped) {
1449 /* Some pages are no longer absent... */
1450 balloon_update_driver_allowance(-pages_flipped);
1452 /* Do all the remapping work and M2P updates. */
1453 if (!xen_feature(XENFEAT_auto_translated_physmap)) {
1454 mcl = np->rx_mcl + pages_flipped;
1455 mcl->op = __HYPERVISOR_mmu_update;
1456 mcl->args[0] = (unsigned long)np->rx_mmu;
1457 mcl->args[1] = pages_flipped;
1458 mcl->args[2] = 0;
1459 mcl->args[3] = DOMID_SELF;
1460 err = HYPERVISOR_multicall_check(np->rx_mcl,
1461 pages_flipped + 1,
1462 NULL);
1463 BUG_ON(err);
1467 while ((skb = __skb_dequeue(&errq)))
1468 kfree_skb(skb);
1470 while ((skb = __skb_dequeue(&rxq)) != NULL) {
1471 struct page *page = NETFRONT_SKB_CB(skb)->page;
1472 void *vaddr = page_address(page);
1473 unsigned offset = NETFRONT_SKB_CB(skb)->offset;
1475 memcpy(skb->data, vaddr + offset, skb_headlen(skb));
1477 if (page != skb_shinfo(skb)->frags[0].page)
1478 __free_page(page);
1480 /* Ethernet work: Delayed to here as it peeks the header. */
1481 skb->protocol = eth_type_trans(skb, dev);
1483 /* Pass it up. */
1484 netif_receive_skb(skb);
1485 dev->last_rx = jiffies;
1488 /* If we get a callback with very few responses, reduce fill target. */
1489 /* NB. Note exponential increase, linear decrease. */
1490 if (((np->rx.req_prod_pvt - np->rx.sring->rsp_prod) >
1491 ((3*np->rx_target) / 4)) &&
1492 (--np->rx_target < np->rx_min_target))
1493 np->rx_target = np->rx_min_target;
1495 network_alloc_rx_buffers(dev);
1497 if (work_done < budget) {
1498 /* there's some spare capacity, try the accelerated path */
1499 int accel_budget = budget - work_done;
1500 int accel_budget_start = accel_budget;
1502 if (np->accel_vif_state.hooks) {
1503 accel_more_to_do =
1504 np->accel_vif_state.hooks->netdev_poll
1505 (dev, &accel_budget);
1506 work_done += (accel_budget_start - accel_budget);
1507 } else
1508 accel_more_to_do = 0;
1511 *pbudget -= work_done;
1512 dev->quota -= work_done;
1514 if (work_done < budget) {
1515 local_irq_save(flags);
1517 RING_FINAL_CHECK_FOR_RESPONSES(&np->rx, more_to_do);
1519 if (!more_to_do && !accel_more_to_do &&
1520 np->accel_vif_state.hooks) {
1521 /*
1522 * Slow path has nothing more to do, see if
1523 * fast path is likewise
1524 */
1525 accel_more_to_do =
1526 np->accel_vif_state.hooks->start_napi_irq(dev);
1529 if (!more_to_do && !accel_more_to_do)
1530 __netif_rx_complete(dev);
1532 local_irq_restore(flags);
1535 spin_unlock(&np->rx_lock);
1537 return more_to_do | accel_more_to_do;
1540 static void netif_release_tx_bufs(struct netfront_info *np)
1542 struct sk_buff *skb;
1543 int i;
1545 for (i = 1; i <= NET_TX_RING_SIZE; i++) {
1546 if ((unsigned long)np->tx_skbs[i] < PAGE_OFFSET)
1547 continue;
1549 skb = np->tx_skbs[i];
1550 gnttab_end_foreign_access_ref(np->grant_tx_ref[i]);
1551 gnttab_release_grant_reference(
1552 &np->gref_tx_head, np->grant_tx_ref[i]);
1553 np->grant_tx_ref[i] = GRANT_INVALID_REF;
1554 add_id_to_freelist(np->tx_skbs, i);
1555 dev_kfree_skb_irq(skb);
1559 static void netif_release_rx_bufs_flip(struct netfront_info *np)
1561 struct mmu_update *mmu = np->rx_mmu;
1562 struct multicall_entry *mcl = np->rx_mcl;
1563 struct sk_buff_head free_list;
1564 struct sk_buff *skb;
1565 unsigned long mfn;
1566 int xfer = 0, noxfer = 0, unused = 0;
1567 int id, ref, rc;
1569 skb_queue_head_init(&free_list);
1571 spin_lock_bh(&np->rx_lock);
1573 for (id = 0; id < NET_RX_RING_SIZE; id++) {
1574 if ((ref = np->grant_rx_ref[id]) == GRANT_INVALID_REF) {
1575 unused++;
1576 continue;
1579 skb = np->rx_skbs[id];
1580 mfn = gnttab_end_foreign_transfer_ref(ref);
1581 gnttab_release_grant_reference(&np->gref_rx_head, ref);
1582 np->grant_rx_ref[id] = GRANT_INVALID_REF;
1583 add_id_to_freelist(np->rx_skbs, id);
1585 if (0 == mfn) {
1586 struct page *page = skb_shinfo(skb)->frags[0].page;
1587 balloon_release_driver_page(page);
1588 skb_shinfo(skb)->nr_frags = 0;
1589 dev_kfree_skb(skb);
1590 noxfer++;
1591 continue;
1594 if (!xen_feature(XENFEAT_auto_translated_physmap)) {
1595 /* Remap the page. */
1596 struct page *page = skb_shinfo(skb)->frags[0].page;
1597 unsigned long pfn = page_to_pfn(page);
1598 void *vaddr = page_address(page);
1600 MULTI_update_va_mapping(mcl, (unsigned long)vaddr,
1601 pfn_pte_ma(mfn, PAGE_KERNEL),
1602 0);
1603 mcl++;
1604 mmu->ptr = ((maddr_t)mfn << PAGE_SHIFT)
1605 | MMU_MACHPHYS_UPDATE;
1606 mmu->val = pfn;
1607 mmu++;
1609 set_phys_to_machine(pfn, mfn);
1611 __skb_queue_tail(&free_list, skb);
1612 xfer++;
1615 DPRINTK("%s: %d xfer, %d noxfer, %d unused\n",
1616 __FUNCTION__, xfer, noxfer, unused);
1618 if (xfer) {
1619 /* Some pages are no longer absent... */
1620 balloon_update_driver_allowance(-xfer);
1622 if (!xen_feature(XENFEAT_auto_translated_physmap)) {
1623 /* Do all the remapping work and M2P updates. */
1624 mcl->op = __HYPERVISOR_mmu_update;
1625 mcl->args[0] = (unsigned long)np->rx_mmu;
1626 mcl->args[1] = mmu - np->rx_mmu;
1627 mcl->args[2] = 0;
1628 mcl->args[3] = DOMID_SELF;
1629 mcl++;
1630 rc = HYPERVISOR_multicall_check(
1631 np->rx_mcl, mcl - np->rx_mcl, NULL);
1632 BUG_ON(rc);
1636 while ((skb = __skb_dequeue(&free_list)) != NULL)
1637 dev_kfree_skb(skb);
1639 spin_unlock_bh(&np->rx_lock);
1642 static void netif_release_rx_bufs_copy(struct netfront_info *np)
1644 struct sk_buff *skb;
1645 int i, ref;
1646 int busy = 0, inuse = 0;
1648 spin_lock_bh(&np->rx_lock);
1650 for (i = 0; i < NET_RX_RING_SIZE; i++) {
1651 ref = np->grant_rx_ref[i];
1653 if (ref == GRANT_INVALID_REF)
1654 continue;
1656 inuse++;
1658 skb = np->rx_skbs[i];
1660 if (!gnttab_end_foreign_access_ref(ref))
1662 busy++;
1663 continue;
1666 gnttab_release_grant_reference(&np->gref_rx_head, ref);
1667 np->grant_rx_ref[i] = GRANT_INVALID_REF;
1668 add_id_to_freelist(np->rx_skbs, i);
1670 dev_kfree_skb(skb);
1673 if (busy)
1674 DPRINTK("%s: Unable to release %d of %d inuse grant references out of %ld total.\n",
1675 __FUNCTION__, busy, inuse, NET_RX_RING_SIZE);
1677 spin_unlock_bh(&np->rx_lock);
1680 static int network_close(struct net_device *dev)
1682 struct netfront_info *np = netdev_priv(dev);
1683 netif_stop_queue(np->netdev);
1684 return 0;
1688 static struct net_device_stats *network_get_stats(struct net_device *dev)
1690 struct netfront_info *np = netdev_priv(dev);
1692 netfront_accelerator_call_get_stats(np, dev);
1693 return &np->stats;
1696 static int xennet_set_mac_address(struct net_device *dev, void *p)
1698 struct netfront_info *np = netdev_priv(dev);
1699 struct sockaddr *addr = p;
1701 if (netif_running(dev))
1702 return -EBUSY;
1704 if (!is_valid_ether_addr(addr->sa_data))
1705 return -EADDRNOTAVAIL;
1707 memcpy(dev->dev_addr, addr->sa_data, dev->addr_len);
1708 memcpy(np->mac, addr->sa_data, ETH_ALEN);
1710 return 0;
1713 static int xennet_change_mtu(struct net_device *dev, int mtu)
1715 int max = xennet_can_sg(dev) ? 65535 - ETH_HLEN : ETH_DATA_LEN;
1717 if (mtu > max)
1718 return -EINVAL;
1719 dev->mtu = mtu;
1720 return 0;
1723 static int xennet_set_sg(struct net_device *dev, u32 data)
1725 if (data) {
1726 struct netfront_info *np = netdev_priv(dev);
1727 int val;
1729 if (xenbus_scanf(XBT_NIL, np->xbdev->otherend, "feature-sg",
1730 "%d", &val) < 0)
1731 val = 0;
1732 if (!val)
1733 return -ENOSYS;
1734 } else if (dev->mtu > ETH_DATA_LEN)
1735 dev->mtu = ETH_DATA_LEN;
1737 return ethtool_op_set_sg(dev, data);
1740 static int xennet_set_tso(struct net_device *dev, u32 data)
1742 if (data) {
1743 struct netfront_info *np = netdev_priv(dev);
1744 int val;
1746 if (xenbus_scanf(XBT_NIL, np->xbdev->otherend,
1747 "feature-gso-tcpv4", "%d", &val) < 0)
1748 val = 0;
1749 if (!val)
1750 return -ENOSYS;
1753 return ethtool_op_set_tso(dev, data);
1756 static void xennet_set_features(struct net_device *dev)
1758 dev_disable_gso_features(dev);
1759 xennet_set_sg(dev, 0);
1761 /* We need checksum offload to enable scatter/gather and TSO. */
1762 if (!(dev->features & NETIF_F_IP_CSUM))
1763 return;
1765 if (xennet_set_sg(dev, 1))
1766 return;
1768 /* Before 2.6.9 TSO seems to be unreliable so do not enable it
1769 * on older kernels.
1770 */
1771 if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,9))
1772 xennet_set_tso(dev, 1);
1775 static int network_connect(struct net_device *dev)
1777 struct netfront_info *np = netdev_priv(dev);
1778 int i, requeue_idx, err;
1779 struct sk_buff *skb;
1780 grant_ref_t ref;
1781 netif_rx_request_t *req;
1782 unsigned int feature_rx_copy, feature_rx_flip;
1784 err = xenbus_scanf(XBT_NIL, np->xbdev->otherend,
1785 "feature-rx-copy", "%u", &feature_rx_copy);
1786 if (err != 1)
1787 feature_rx_copy = 0;
1788 err = xenbus_scanf(XBT_NIL, np->xbdev->otherend,
1789 "feature-rx-flip", "%u", &feature_rx_flip);
1790 if (err != 1)
1791 feature_rx_flip = 1;
1793 /*
1794 * Copy packets on receive path if:
1795 * (a) This was requested by user, and the backend supports it; or
1796 * (b) Flipping was requested, but this is unsupported by the backend.
1797 */
1798 np->copying_receiver = ((MODPARM_rx_copy && feature_rx_copy) ||
1799 (MODPARM_rx_flip && !feature_rx_flip));
1801 err = talk_to_backend(np->xbdev, np);
1802 if (err)
1803 return err;
1805 xennet_set_features(dev);
1807 DPRINTK("device %s has %sing receive path.\n",
1808 dev->name, np->copying_receiver ? "copy" : "flipp");
1810 spin_lock_bh(&np->rx_lock);
1811 spin_lock_irq(&np->tx_lock);
1813 /*
1814 * Recovery procedure:
1815 * NB. Freelist index entries are always going to be less than
1816 * PAGE_OFFSET, whereas pointers to skbs will always be equal or
1817 * greater than PAGE_OFFSET: we use this property to distinguish
1818 * them.
1819 */
1821 /* Step 1: Discard all pending TX packet fragments. */
1822 netif_release_tx_bufs(np);
1824 /* Step 2: Rebuild the RX buffer freelist and the RX ring itself. */
1825 for (requeue_idx = 0, i = 0; i < NET_RX_RING_SIZE; i++) {
1826 if (!np->rx_skbs[i])
1827 continue;
1829 skb = np->rx_skbs[requeue_idx] = xennet_get_rx_skb(np, i);
1830 ref = np->grant_rx_ref[requeue_idx] = xennet_get_rx_ref(np, i);
1831 req = RING_GET_REQUEST(&np->rx, requeue_idx);
1833 if (!np->copying_receiver) {
1834 gnttab_grant_foreign_transfer_ref(
1835 ref, np->xbdev->otherend_id,
1836 page_to_pfn(skb_shinfo(skb)->frags->page));
1837 } else {
1838 gnttab_grant_foreign_access_ref(
1839 ref, np->xbdev->otherend_id,
1840 pfn_to_mfn(page_to_pfn(skb_shinfo(skb)->
1841 frags->page)),
1842 0);
1844 req->gref = ref;
1845 req->id = requeue_idx;
1847 requeue_idx++;
1850 np->rx.req_prod_pvt = requeue_idx;
1852 /*
1853 * Step 3: All public and private state should now be sane. Get
1854 * ready to start sending and receiving packets and give the driver
1855 * domain a kick because we've probably just requeued some
1856 * packets.
1857 */
1858 netfront_carrier_on(np);
1859 notify_remote_via_irq(np->irq);
1860 network_tx_buf_gc(dev);
1861 network_alloc_rx_buffers(dev);
1863 spin_unlock_irq(&np->tx_lock);
1864 spin_unlock_bh(&np->rx_lock);
1866 return 0;
1869 static void netif_uninit(struct net_device *dev)
1871 struct netfront_info *np = netdev_priv(dev);
1872 netif_release_tx_bufs(np);
1873 if (np->copying_receiver)
1874 netif_release_rx_bufs_copy(np);
1875 else
1876 netif_release_rx_bufs_flip(np);
1877 gnttab_free_grant_references(np->gref_tx_head);
1878 gnttab_free_grant_references(np->gref_rx_head);
1881 static struct ethtool_ops network_ethtool_ops =
1883 .get_tx_csum = ethtool_op_get_tx_csum,
1884 .set_tx_csum = ethtool_op_set_tx_csum,
1885 .get_sg = ethtool_op_get_sg,
1886 .set_sg = xennet_set_sg,
1887 #if HAVE_TSO
1888 .get_tso = ethtool_op_get_tso,
1889 .set_tso = xennet_set_tso,
1890 #endif
1891 .get_link = ethtool_op_get_link,
1892 };
1894 #ifdef CONFIG_SYSFS
1895 static ssize_t show_rxbuf_min(struct class_device *cd, char *buf)
1897 struct net_device *netdev = container_of(cd, struct net_device,
1898 class_dev);
1899 struct netfront_info *info = netdev_priv(netdev);
1901 return sprintf(buf, "%u\n", info->rx_min_target);
1904 static ssize_t store_rxbuf_min(struct class_device *cd,
1905 const char *buf, size_t len)
1907 struct net_device *netdev = container_of(cd, struct net_device,
1908 class_dev);
1909 struct netfront_info *np = netdev_priv(netdev);
1910 char *endp;
1911 unsigned long target;
1913 if (!capable(CAP_NET_ADMIN))
1914 return -EPERM;
1916 target = simple_strtoul(buf, &endp, 0);
1917 if (endp == buf)
1918 return -EBADMSG;
1920 if (target < RX_MIN_TARGET)
1921 target = RX_MIN_TARGET;
1922 if (target > RX_MAX_TARGET)
1923 target = RX_MAX_TARGET;
1925 spin_lock_bh(&np->rx_lock);
1926 if (target > np->rx_max_target)
1927 np->rx_max_target = target;
1928 np->rx_min_target = target;
1929 if (target > np->rx_target)
1930 np->rx_target = target;
1932 network_alloc_rx_buffers(netdev);
1934 spin_unlock_bh(&np->rx_lock);
1935 return len;
1938 static ssize_t show_rxbuf_max(struct class_device *cd, char *buf)
1940 struct net_device *netdev = container_of(cd, struct net_device,
1941 class_dev);
1942 struct netfront_info *info = netdev_priv(netdev);
1944 return sprintf(buf, "%u\n", info->rx_max_target);
1947 static ssize_t store_rxbuf_max(struct class_device *cd,
1948 const char *buf, size_t len)
1950 struct net_device *netdev = container_of(cd, struct net_device,
1951 class_dev);
1952 struct netfront_info *np = netdev_priv(netdev);
1953 char *endp;
1954 unsigned long target;
1956 if (!capable(CAP_NET_ADMIN))
1957 return -EPERM;
1959 target = simple_strtoul(buf, &endp, 0);
1960 if (endp == buf)
1961 return -EBADMSG;
1963 if (target < RX_MIN_TARGET)
1964 target = RX_MIN_TARGET;
1965 if (target > RX_MAX_TARGET)
1966 target = RX_MAX_TARGET;
1968 spin_lock_bh(&np->rx_lock);
1969 if (target < np->rx_min_target)
1970 np->rx_min_target = target;
1971 np->rx_max_target = target;
1972 if (target < np->rx_target)
1973 np->rx_target = target;
1975 network_alloc_rx_buffers(netdev);
1977 spin_unlock_bh(&np->rx_lock);
1978 return len;
1981 static ssize_t show_rxbuf_cur(struct class_device *cd, char *buf)
1983 struct net_device *netdev = container_of(cd, struct net_device,
1984 class_dev);
1985 struct netfront_info *info = netdev_priv(netdev);
1987 return sprintf(buf, "%u\n", info->rx_target);
1990 static const struct class_device_attribute xennet_attrs[] = {
1991 __ATTR(rxbuf_min, S_IRUGO|S_IWUSR, show_rxbuf_min, store_rxbuf_min),
1992 __ATTR(rxbuf_max, S_IRUGO|S_IWUSR, show_rxbuf_max, store_rxbuf_max),
1993 __ATTR(rxbuf_cur, S_IRUGO, show_rxbuf_cur, NULL),
1994 };
1996 static int xennet_sysfs_addif(struct net_device *netdev)
1998 int i;
1999 int error = 0;
2001 for (i = 0; i < ARRAY_SIZE(xennet_attrs); i++) {
2002 error = class_device_create_file(&netdev->class_dev,
2003 &xennet_attrs[i]);
2004 if (error)
2005 goto fail;
2007 return 0;
2009 fail:
2010 while (--i >= 0)
2011 class_device_remove_file(&netdev->class_dev,
2012 &xennet_attrs[i]);
2013 return error;
2016 static void xennet_sysfs_delif(struct net_device *netdev)
2018 int i;
2020 for (i = 0; i < ARRAY_SIZE(xennet_attrs); i++) {
2021 class_device_remove_file(&netdev->class_dev,
2022 &xennet_attrs[i]);
2026 #endif /* CONFIG_SYSFS */
2029 /*
2030 * Nothing to do here. Virtual interface is point-to-point and the
2031 * physical interface is probably promiscuous anyway.
2032 */
2033 static void network_set_multicast_list(struct net_device *dev)
2037 static struct net_device * __devinit create_netdev(struct xenbus_device *dev)
2039 int i, err = 0;
2040 struct net_device *netdev = NULL;
2041 struct netfront_info *np = NULL;
2043 netdev = alloc_etherdev(sizeof(struct netfront_info));
2044 if (!netdev) {
2045 printk(KERN_WARNING "%s> alloc_etherdev failed.\n",
2046 __FUNCTION__);
2047 return ERR_PTR(-ENOMEM);
2050 np = netdev_priv(netdev);
2051 np->xbdev = dev;
2053 spin_lock_init(&np->tx_lock);
2054 spin_lock_init(&np->rx_lock);
2056 init_accelerator_vif(np, dev);
2058 skb_queue_head_init(&np->rx_batch);
2059 np->rx_target = RX_DFL_MIN_TARGET;
2060 np->rx_min_target = RX_DFL_MIN_TARGET;
2061 np->rx_max_target = RX_MAX_TARGET;
2063 init_timer(&np->rx_refill_timer);
2064 np->rx_refill_timer.data = (unsigned long)netdev;
2065 np->rx_refill_timer.function = rx_refill_timeout;
2067 /* Initialise {tx,rx}_skbs as a free chain containing every entry. */
2068 for (i = 0; i <= NET_TX_RING_SIZE; i++) {
2069 np->tx_skbs[i] = (void *)((unsigned long) i+1);
2070 np->grant_tx_ref[i] = GRANT_INVALID_REF;
2073 for (i = 0; i < NET_RX_RING_SIZE; i++) {
2074 np->rx_skbs[i] = NULL;
2075 np->grant_rx_ref[i] = GRANT_INVALID_REF;
2078 /* A grant for every tx ring slot */
2079 if (gnttab_alloc_grant_references(TX_MAX_TARGET,
2080 &np->gref_tx_head) < 0) {
2081 printk(KERN_ALERT "#### netfront can't alloc tx grant refs\n");
2082 err = -ENOMEM;
2083 goto exit;
2085 /* A grant for every rx ring slot */
2086 if (gnttab_alloc_grant_references(RX_MAX_TARGET,
2087 &np->gref_rx_head) < 0) {
2088 printk(KERN_ALERT "#### netfront can't alloc rx grant refs\n");
2089 err = -ENOMEM;
2090 goto exit_free_tx;
2093 netdev->open = network_open;
2094 netdev->hard_start_xmit = network_start_xmit;
2095 netdev->stop = network_close;
2096 netdev->get_stats = network_get_stats;
2097 netdev->poll = netif_poll;
2098 netdev->set_multicast_list = network_set_multicast_list;
2099 netdev->uninit = netif_uninit;
2100 netdev->set_mac_address = xennet_set_mac_address;
2101 netdev->change_mtu = xennet_change_mtu;
2102 netdev->weight = 64;
2103 netdev->features = NETIF_F_IP_CSUM;
2105 SET_ETHTOOL_OPS(netdev, &network_ethtool_ops);
2106 SET_MODULE_OWNER(netdev);
2107 SET_NETDEV_DEV(netdev, &dev->dev);
2109 np->netdev = netdev;
2111 netfront_carrier_off(np);
2113 return netdev;
2115 exit_free_tx:
2116 gnttab_free_grant_references(np->gref_tx_head);
2117 exit:
2118 free_netdev(netdev);
2119 return ERR_PTR(err);
2122 #ifdef CONFIG_INET
2123 /*
2124 * We use this notifier to send out a fake ARP reply to reset switches and
2125 * router ARP caches when an IP interface is brought up on a VIF.
2126 */
2127 static int
2128 inetdev_notify(struct notifier_block *this, unsigned long event, void *ptr)
2130 struct in_ifaddr *ifa = (struct in_ifaddr *)ptr;
2131 struct net_device *dev = ifa->ifa_dev->dev;
2133 /* UP event and is it one of our devices? */
2134 if (event == NETDEV_UP && dev->open == network_open)
2135 send_fake_arp(dev);
2137 return NOTIFY_DONE;
2140 static struct notifier_block notifier_inetdev = {
2141 .notifier_call = inetdev_notify,
2142 .next = NULL,
2143 .priority = 0
2144 };
2145 #endif
2148 static void netif_disconnect_backend(struct netfront_info *info)
2150 /* Stop old i/f to prevent errors whilst we rebuild the state. */
2151 spin_lock_bh(&info->rx_lock);
2152 spin_lock_irq(&info->tx_lock);
2153 netfront_carrier_off(info);
2154 spin_unlock_irq(&info->tx_lock);
2155 spin_unlock_bh(&info->rx_lock);
2157 if (info->irq)
2158 unbind_from_irqhandler(info->irq, info->netdev);
2159 info->irq = 0;
2161 end_access(info->tx_ring_ref, info->tx.sring);
2162 end_access(info->rx_ring_ref, info->rx.sring);
2163 info->tx_ring_ref = GRANT_INVALID_REF;
2164 info->rx_ring_ref = GRANT_INVALID_REF;
2165 info->tx.sring = NULL;
2166 info->rx.sring = NULL;
2170 static void end_access(int ref, void *page)
2172 if (ref != GRANT_INVALID_REF)
2173 gnttab_end_foreign_access(ref, (unsigned long)page);
2177 /* ** Driver registration ** */
2180 static const struct xenbus_device_id netfront_ids[] = {
2181 { "vif" },
2182 { "" }
2183 };
2184 MODULE_ALIAS("xen:vif");
2187 static struct xenbus_driver netfront_driver = {
2188 .name = "vif",
2189 .owner = THIS_MODULE,
2190 .ids = netfront_ids,
2191 .probe = netfront_probe,
2192 .remove = __devexit_p(netfront_remove),
2193 .suspend = netfront_suspend,
2194 .suspend_cancel = netfront_suspend_cancel,
2195 .resume = netfront_resume,
2196 .otherend_changed = backend_changed,
2197 };
2200 static int __init netif_init(void)
2202 int err;
2204 if (!is_running_on_xen())
2205 return -ENODEV;
2207 #ifdef CONFIG_XEN
2208 if (MODPARM_rx_flip && MODPARM_rx_copy) {
2209 WPRINTK("Cannot specify both rx_copy and rx_flip.\n");
2210 return -EINVAL;
2213 if (!MODPARM_rx_flip && !MODPARM_rx_copy)
2214 MODPARM_rx_flip = 1; /* Default is to flip. */
2215 #endif
2217 netif_init_accel();
2219 IPRINTK("Initialising virtual ethernet driver.\n");
2221 #ifdef CONFIG_INET
2222 (void)register_inetaddr_notifier(&notifier_inetdev);
2223 #endif
2225 err = xenbus_register_frontend(&netfront_driver);
2226 if (err) {
2227 #ifdef CONFIG_INET
2228 unregister_inetaddr_notifier(&notifier_inetdev);
2229 #endif
2231 return err;
2233 module_init(netif_init);
2236 static void __exit netif_exit(void)
2238 #ifdef CONFIG_INET
2239 unregister_inetaddr_notifier(&notifier_inetdev);
2240 #endif
2242 netif_exit_accel();
2244 return xenbus_unregister_driver(&netfront_driver);
2246 module_exit(netif_exit);
2248 MODULE_LICENSE("Dual BSD/GPL");