ia64/xen-unstable

view linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c @ 10403:1e49997c8146

[NET] front: Discard packets in tx ring rather than attempting retransmit
when reconnecting to backend driver (e.g., after save/restore or migrate).

Two main reasons for this:
1. The retransmit code is broken for fragmented packets. It would need
a rewrite to cope with the new scatter-gather format.
2. We will drop packets anyway, in both directions (e.g., takes some
time for received packets to be redirected to new virtual interface;
also further transmitted packets from the network stack are dropped
after we call netif_carrier_off(), so even if we retransmite what's
already in the ring it is likely that some subsequent packets will
already be lost).

If this causes downtimes that are too long (particularly for live
relocation) then the whole strategy for buffering packets while the
frontend-backend connection is severed needs to be considered.

Signed-off-by: Keir Fraser <keir@xensource.com>
author kaf24@firebug.cl.cam.ac.uk
date Wed Jun 14 13:11:44 2006 +0100 (2006-06-14)
parents cede50ca1704
children 5e30ef3e541d
line source
1 /******************************************************************************
2 * Virtual network driver for conversing with remote driver backends.
3 *
4 * Copyright (c) 2002-2005, K A Fraser
5 * Copyright (c) 2005, XenSource Ltd
6 *
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License version 2
9 * as published by the Free Software Foundation; or, when distributed
10 * separately from the Linux kernel or incorporated into other
11 * software packages, subject to the following license:
12 *
13 * Permission is hereby granted, free of charge, to any person obtaining a copy
14 * of this source file (the "Software"), to deal in the Software without
15 * restriction, including without limitation the rights to use, copy, modify,
16 * merge, publish, distribute, sublicense, and/or sell copies of the Software,
17 * and to permit persons to whom the Software is furnished to do so, subject to
18 * the following conditions:
19 *
20 * The above copyright notice and this permission notice shall be included in
21 * all copies or substantial portions of the Software.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
24 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
25 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
26 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
27 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
28 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
29 * IN THE SOFTWARE.
30 */
32 #include <linux/config.h>
33 #include <linux/module.h>
34 #include <linux/version.h>
35 #include <linux/kernel.h>
36 #include <linux/sched.h>
37 #include <linux/slab.h>
38 #include <linux/string.h>
39 #include <linux/errno.h>
40 #include <linux/netdevice.h>
41 #include <linux/inetdevice.h>
42 #include <linux/etherdevice.h>
43 #include <linux/skbuff.h>
44 #include <linux/init.h>
45 #include <linux/bitops.h>
46 #include <linux/ethtool.h>
47 #include <linux/in.h>
48 #include <linux/if_ether.h>
49 #include <net/sock.h>
50 #include <net/pkt_sched.h>
51 #include <net/arp.h>
52 #include <net/route.h>
53 #include <asm/io.h>
54 #include <asm/uaccess.h>
55 #include <xen/evtchn.h>
56 #include <xen/xenbus.h>
57 #include <xen/interface/io/netif.h>
58 #include <xen/interface/memory.h>
59 #include <xen/balloon.h>
60 #include <asm/page.h>
61 #include <asm/uaccess.h>
62 #include <xen/interface/grant_table.h>
63 #include <xen/gnttab.h>
65 #define GRANT_INVALID_REF 0
67 #define NET_TX_RING_SIZE __RING_SIZE((struct netif_tx_sring *)0, PAGE_SIZE)
68 #define NET_RX_RING_SIZE __RING_SIZE((struct netif_rx_sring *)0, PAGE_SIZE)
70 static inline void init_skb_shinfo(struct sk_buff *skb)
71 {
72 atomic_set(&(skb_shinfo(skb)->dataref), 1);
73 skb_shinfo(skb)->nr_frags = 0;
74 skb_shinfo(skb)->frag_list = NULL;
75 }
77 struct netfront_info {
78 struct list_head list;
79 struct net_device *netdev;
81 struct net_device_stats stats;
83 struct netif_tx_front_ring tx;
84 struct netif_rx_front_ring rx;
86 spinlock_t tx_lock;
87 spinlock_t rx_lock;
89 unsigned int handle;
90 unsigned int evtchn, irq;
92 /* Receive-ring batched refills. */
93 #define RX_MIN_TARGET 8
94 #define RX_DFL_MIN_TARGET 64
95 #define RX_MAX_TARGET min_t(int, NET_RX_RING_SIZE, 256)
96 unsigned rx_min_target, rx_max_target, rx_target;
97 struct sk_buff_head rx_batch;
99 struct timer_list rx_refill_timer;
101 /*
102 * {tx,rx}_skbs store outstanding skbuffs. The first entry in each
103 * array is an index into a chain of free entries.
104 */
105 struct sk_buff *tx_skbs[NET_TX_RING_SIZE+1];
106 struct sk_buff *rx_skbs[NET_RX_RING_SIZE+1];
108 #define TX_MAX_TARGET min_t(int, NET_RX_RING_SIZE, 256)
109 grant_ref_t gref_tx_head;
110 grant_ref_t grant_tx_ref[NET_TX_RING_SIZE + 1];
111 grant_ref_t gref_rx_head;
112 grant_ref_t grant_rx_ref[NET_TX_RING_SIZE + 1];
114 struct xenbus_device *xbdev;
115 int tx_ring_ref;
116 int rx_ring_ref;
117 u8 mac[ETH_ALEN];
119 unsigned long rx_pfn_array[NET_RX_RING_SIZE];
120 struct multicall_entry rx_mcl[NET_RX_RING_SIZE+1];
121 struct mmu_update rx_mmu[NET_RX_RING_SIZE];
122 };
124 /*
125 * Access macros for acquiring freeing slots in {tx,rx}_skbs[].
126 */
128 static inline void add_id_to_freelist(struct sk_buff **list, unsigned short id)
129 {
130 list[id] = list[0];
131 list[0] = (void *)(unsigned long)id;
132 }
134 static inline unsigned short get_id_from_freelist(struct sk_buff **list)
135 {
136 unsigned int id = (unsigned int)(unsigned long)list[0];
137 list[0] = list[id];
138 return id;
139 }
141 #define DPRINTK(fmt, args...) \
142 pr_debug("netfront (%s:%d) " fmt, \
143 __FUNCTION__, __LINE__, ##args)
144 #define IPRINTK(fmt, args...) \
145 printk(KERN_INFO "netfront: " fmt, ##args)
146 #define WPRINTK(fmt, args...) \
147 printk(KERN_WARNING "netfront: " fmt, ##args)
149 static int talk_to_backend(struct xenbus_device *, struct netfront_info *);
150 static int setup_device(struct xenbus_device *, struct netfront_info *);
151 static struct net_device *create_netdev(int, struct xenbus_device *);
153 static void netfront_closing(struct xenbus_device *);
155 static void end_access(int, void *);
156 static void netif_disconnect_backend(struct netfront_info *);
157 static void close_netdev(struct netfront_info *);
158 static void netif_free(struct netfront_info *);
160 static void network_connect(struct net_device *);
161 static void network_tx_buf_gc(struct net_device *);
162 static void network_alloc_rx_buffers(struct net_device *);
163 static int send_fake_arp(struct net_device *);
165 static irqreturn_t netif_int(int irq, void *dev_id, struct pt_regs *ptregs);
167 #ifdef CONFIG_SYSFS
168 static int xennet_sysfs_addif(struct net_device *netdev);
169 static void xennet_sysfs_delif(struct net_device *netdev);
170 #else /* !CONFIG_SYSFS */
171 #define xennet_sysfs_addif(dev) (0)
172 #define xennet_sysfs_delif(dev) do { } while(0)
173 #endif
175 static inline int xennet_can_sg(struct net_device *dev)
176 {
177 return dev->features & NETIF_F_SG;
178 }
180 /**
181 * Entry point to this code when a new device is created. Allocate the basic
182 * structures and the ring buffers for communication with the backend, and
183 * inform the backend of the appropriate details for those. Switch to
184 * Connected state.
185 */
186 static int __devinit netfront_probe(struct xenbus_device *dev,
187 const struct xenbus_device_id *id)
188 {
189 int err;
190 struct net_device *netdev;
191 struct netfront_info *info;
192 unsigned int handle;
194 err = xenbus_scanf(XBT_NIL, dev->nodename, "handle", "%u", &handle);
195 if (err != 1) {
196 xenbus_dev_fatal(dev, err, "reading handle");
197 return err;
198 }
200 netdev = create_netdev(handle, dev);
201 if (IS_ERR(netdev)) {
202 err = PTR_ERR(netdev);
203 xenbus_dev_fatal(dev, err, "creating netdev");
204 return err;
205 }
207 info = netdev_priv(netdev);
208 dev->dev.driver_data = info;
210 err = talk_to_backend(dev, info);
211 if (err) {
212 xennet_sysfs_delif(info->netdev);
213 unregister_netdev(netdev);
214 free_netdev(netdev);
215 dev->dev.driver_data = NULL;
216 return err;
217 }
219 return 0;
220 }
223 /**
224 * We are reconnecting to the backend, due to a suspend/resume, or a backend
225 * driver restart. We tear down our netif structure and recreate it, but
226 * leave the device-layer structures intact so that this is transparent to the
227 * rest of the kernel.
228 */
229 static int netfront_resume(struct xenbus_device *dev)
230 {
231 struct netfront_info *info = dev->dev.driver_data;
233 DPRINTK("%s\n", dev->nodename);
235 netif_disconnect_backend(info);
236 return talk_to_backend(dev, info);
237 }
239 static int xen_net_read_mac(struct xenbus_device *dev, u8 mac[])
240 {
241 char *s, *e, *macstr;
242 int i;
244 macstr = s = xenbus_read(XBT_NIL, dev->nodename, "mac", NULL);
245 if (IS_ERR(macstr))
246 return PTR_ERR(macstr);
248 for (i = 0; i < ETH_ALEN; i++) {
249 mac[i] = simple_strtoul(s, &e, 16);
250 if ((s == e) || (*e != ((i == ETH_ALEN-1) ? '\0' : ':'))) {
251 kfree(macstr);
252 return -ENOENT;
253 }
254 s = e+1;
255 }
257 kfree(macstr);
258 return 0;
259 }
261 /* Common code used when first setting up, and when resuming. */
262 static int talk_to_backend(struct xenbus_device *dev,
263 struct netfront_info *info)
264 {
265 const char *message;
266 struct xenbus_transaction xbt;
267 int err;
269 err = xen_net_read_mac(dev, info->mac);
270 if (err) {
271 xenbus_dev_fatal(dev, err, "parsing %s/mac", dev->nodename);
272 goto out;
273 }
275 /* Create shared ring, alloc event channel. */
276 err = setup_device(dev, info);
277 if (err)
278 goto out;
280 again:
281 err = xenbus_transaction_start(&xbt);
282 if (err) {
283 xenbus_dev_fatal(dev, err, "starting transaction");
284 goto destroy_ring;
285 }
287 err = xenbus_printf(xbt, dev->nodename, "tx-ring-ref","%u",
288 info->tx_ring_ref);
289 if (err) {
290 message = "writing tx ring-ref";
291 goto abort_transaction;
292 }
293 err = xenbus_printf(xbt, dev->nodename, "rx-ring-ref","%u",
294 info->rx_ring_ref);
295 if (err) {
296 message = "writing rx ring-ref";
297 goto abort_transaction;
298 }
299 err = xenbus_printf(xbt, dev->nodename,
300 "event-channel", "%u", info->evtchn);
301 if (err) {
302 message = "writing event-channel";
303 goto abort_transaction;
304 }
306 err = xenbus_transaction_end(xbt, 0);
307 if (err) {
308 if (err == -EAGAIN)
309 goto again;
310 xenbus_dev_fatal(dev, err, "completing transaction");
311 goto destroy_ring;
312 }
314 return 0;
316 abort_transaction:
317 xenbus_transaction_end(xbt, 1);
318 xenbus_dev_fatal(dev, err, "%s", message);
319 destroy_ring:
320 netif_free(info);
321 out:
322 return err;
323 }
326 static int setup_device(struct xenbus_device *dev, struct netfront_info *info)
327 {
328 struct netif_tx_sring *txs;
329 struct netif_rx_sring *rxs;
330 int err;
331 struct net_device *netdev = info->netdev;
333 info->tx_ring_ref = GRANT_INVALID_REF;
334 info->rx_ring_ref = GRANT_INVALID_REF;
335 info->rx.sring = NULL;
336 info->tx.sring = NULL;
337 info->irq = 0;
339 txs = (struct netif_tx_sring *)get_zeroed_page(GFP_KERNEL);
340 if (!txs) {
341 err = -ENOMEM;
342 xenbus_dev_fatal(dev, err, "allocating tx ring page");
343 goto fail;
344 }
345 SHARED_RING_INIT(txs);
346 FRONT_RING_INIT(&info->tx, txs, PAGE_SIZE);
348 err = xenbus_grant_ring(dev, virt_to_mfn(txs));
349 if (err < 0) {
350 free_page((unsigned long)txs);
351 goto fail;
352 }
353 info->tx_ring_ref = err;
355 rxs = (struct netif_rx_sring *)get_zeroed_page(GFP_KERNEL);
356 if (!rxs) {
357 err = -ENOMEM;
358 xenbus_dev_fatal(dev, err, "allocating rx ring page");
359 goto fail;
360 }
361 SHARED_RING_INIT(rxs);
362 FRONT_RING_INIT(&info->rx, rxs, PAGE_SIZE);
364 err = xenbus_grant_ring(dev, virt_to_mfn(rxs));
365 if (err < 0) {
366 free_page((unsigned long)rxs);
367 goto fail;
368 }
369 info->rx_ring_ref = err;
371 err = xenbus_alloc_evtchn(dev, &info->evtchn);
372 if (err)
373 goto fail;
375 memcpy(netdev->dev_addr, info->mac, ETH_ALEN);
376 err = bind_evtchn_to_irqhandler(info->evtchn, netif_int,
377 SA_SAMPLE_RANDOM, netdev->name, netdev);
378 if (err < 0)
379 goto fail;
380 info->irq = err;
381 return 0;
383 fail:
384 netif_free(info);
385 return err;
386 }
389 /**
390 * Callback received when the backend's state changes.
391 */
392 static void backend_changed(struct xenbus_device *dev,
393 enum xenbus_state backend_state)
394 {
395 struct netfront_info *np = dev->dev.driver_data;
396 struct net_device *netdev = np->netdev;
398 DPRINTK("\n");
400 switch (backend_state) {
401 case XenbusStateInitialising:
402 case XenbusStateInitialised:
403 case XenbusStateConnected:
404 case XenbusStateUnknown:
405 case XenbusStateClosed:
406 break;
408 case XenbusStateInitWait:
409 network_connect(netdev);
410 xenbus_switch_state(dev, XenbusStateConnected);
411 (void)send_fake_arp(netdev);
412 break;
414 case XenbusStateClosing:
415 netfront_closing(dev);
416 break;
417 }
418 }
421 /** Send a packet on a net device to encourage switches to learn the
422 * MAC. We send a fake ARP request.
423 *
424 * @param dev device
425 * @return 0 on success, error code otherwise
426 */
427 static int send_fake_arp(struct net_device *dev)
428 {
429 struct sk_buff *skb;
430 u32 src_ip, dst_ip;
432 dst_ip = INADDR_BROADCAST;
433 src_ip = inet_select_addr(dev, dst_ip, RT_SCOPE_LINK);
435 /* No IP? Then nothing to do. */
436 if (src_ip == 0)
437 return 0;
439 skb = arp_create(ARPOP_REPLY, ETH_P_ARP,
440 dst_ip, dev, src_ip,
441 /*dst_hw*/ NULL, /*src_hw*/ NULL,
442 /*target_hw*/ dev->dev_addr);
443 if (skb == NULL)
444 return -ENOMEM;
446 return dev_queue_xmit(skb);
447 }
450 static int network_open(struct net_device *dev)
451 {
452 struct netfront_info *np = netdev_priv(dev);
454 memset(&np->stats, 0, sizeof(np->stats));
456 network_alloc_rx_buffers(dev);
457 np->rx.sring->rsp_event = np->rx.rsp_cons + 1;
459 netif_start_queue(dev);
461 return 0;
462 }
464 static inline int netfront_tx_slot_available(struct netfront_info *np)
465 {
466 return RING_FREE_REQUESTS(&np->tx) >= MAX_SKB_FRAGS + 1;
467 }
469 static inline void network_maybe_wake_tx(struct net_device *dev)
470 {
471 struct netfront_info *np = netdev_priv(dev);
473 if (unlikely(netif_queue_stopped(dev)) &&
474 netfront_tx_slot_available(np) &&
475 likely(netif_running(dev)))
476 netif_wake_queue(dev);
477 }
479 static void network_tx_buf_gc(struct net_device *dev)
480 {
481 RING_IDX cons, prod;
482 unsigned short id;
483 struct netfront_info *np = netdev_priv(dev);
484 struct sk_buff *skb;
486 if (unlikely(!netif_carrier_ok(dev)))
487 return;
489 do {
490 prod = np->tx.sring->rsp_prod;
491 rmb(); /* Ensure we see responses up to 'rp'. */
493 for (cons = np->tx.rsp_cons; cons != prod; cons++) {
494 id = RING_GET_RESPONSE(&np->tx, cons)->id;
495 skb = np->tx_skbs[id];
496 if (unlikely(gnttab_query_foreign_access(
497 np->grant_tx_ref[id]) != 0)) {
498 printk(KERN_ALERT "network_tx_buf_gc: warning "
499 "-- grant still in use by backend "
500 "domain.\n");
501 BUG();
502 }
503 gnttab_end_foreign_access_ref(
504 np->grant_tx_ref[id], GNTMAP_readonly);
505 gnttab_release_grant_reference(
506 &np->gref_tx_head, np->grant_tx_ref[id]);
507 np->grant_tx_ref[id] = GRANT_INVALID_REF;
508 add_id_to_freelist(np->tx_skbs, id);
509 dev_kfree_skb_irq(skb);
510 }
512 np->tx.rsp_cons = prod;
514 /*
515 * Set a new event, then check for race with update of tx_cons.
516 * Note that it is essential to schedule a callback, no matter
517 * how few buffers are pending. Even if there is space in the
518 * transmit ring, higher layers may be blocked because too much
519 * data is outstanding: in such cases notification from Xen is
520 * likely to be the only kick that we'll get.
521 */
522 np->tx.sring->rsp_event =
523 prod + ((np->tx.sring->req_prod - prod) >> 1) + 1;
524 mb();
525 } while ((cons == prod) && (prod != np->tx.sring->rsp_prod));
527 network_maybe_wake_tx(dev);
528 }
531 static void rx_refill_timeout(unsigned long data)
532 {
533 struct net_device *dev = (struct net_device *)data;
534 netif_rx_schedule(dev);
535 }
538 static void network_alloc_rx_buffers(struct net_device *dev)
539 {
540 unsigned short id;
541 struct netfront_info *np = netdev_priv(dev);
542 struct sk_buff *skb;
543 int i, batch_target;
544 RING_IDX req_prod = np->rx.req_prod_pvt;
545 struct xen_memory_reservation reservation;
546 grant_ref_t ref;
548 if (unlikely(!netif_carrier_ok(dev)))
549 return;
551 /*
552 * Allocate skbuffs greedily, even though we batch updates to the
553 * receive ring. This creates a less bursty demand on the memory
554 * allocator, so should reduce the chance of failed allocation requests
555 * both for ourself and for other kernel subsystems.
556 */
557 batch_target = np->rx_target - (req_prod - np->rx.rsp_cons);
558 for (i = skb_queue_len(&np->rx_batch); i < batch_target; i++) {
559 /*
560 * Subtract dev_alloc_skb headroom (16 bytes) and shared info
561 * tailroom then round down to SKB_DATA_ALIGN boundary.
562 */
563 skb = __dev_alloc_skb(
564 ((PAGE_SIZE - sizeof(struct skb_shared_info)) &
565 (-SKB_DATA_ALIGN(1))) - 16,
566 GFP_ATOMIC|__GFP_NOWARN);
567 if (skb == NULL) {
568 /* Any skbuffs queued for refill? Force them out. */
569 if (i != 0)
570 goto refill;
571 /* Could not allocate any skbuffs. Try again later. */
572 mod_timer(&np->rx_refill_timer,
573 jiffies + (HZ/10));
574 return;
575 }
576 __skb_queue_tail(&np->rx_batch, skb);
577 }
579 /* Is the batch large enough to be worthwhile? */
580 if (i < (np->rx_target/2))
581 return;
583 /* Adjust our fill target if we risked running out of buffers. */
584 if (((req_prod - np->rx.sring->rsp_prod) < (np->rx_target / 4)) &&
585 ((np->rx_target *= 2) > np->rx_max_target))
586 np->rx_target = np->rx_max_target;
588 refill:
589 for (i = 0; ; i++) {
590 if ((skb = __skb_dequeue(&np->rx_batch)) == NULL)
591 break;
593 skb->dev = dev;
595 id = get_id_from_freelist(np->rx_skbs);
597 np->rx_skbs[id] = skb;
599 RING_GET_REQUEST(&np->rx, req_prod + i)->id = id;
600 ref = gnttab_claim_grant_reference(&np->gref_rx_head);
601 BUG_ON((signed short)ref < 0);
602 np->grant_rx_ref[id] = ref;
603 gnttab_grant_foreign_transfer_ref(ref,
604 np->xbdev->otherend_id,
605 __pa(skb->head)>>PAGE_SHIFT);
606 RING_GET_REQUEST(&np->rx, req_prod + i)->gref = ref;
607 np->rx_pfn_array[i] = virt_to_mfn(skb->head);
609 if (!xen_feature(XENFEAT_auto_translated_physmap)) {
610 /* Remove this page before passing back to Xen. */
611 set_phys_to_machine(__pa(skb->head) >> PAGE_SHIFT,
612 INVALID_P2M_ENTRY);
613 MULTI_update_va_mapping(np->rx_mcl+i,
614 (unsigned long)skb->head,
615 __pte(0), 0);
616 }
617 }
619 /* Tell the ballon driver what is going on. */
620 balloon_update_driver_allowance(i);
622 set_xen_guest_handle(reservation.extent_start, np->rx_pfn_array);
623 reservation.nr_extents = i;
624 reservation.extent_order = 0;
625 reservation.address_bits = 0;
626 reservation.domid = DOMID_SELF;
628 if (!xen_feature(XENFEAT_auto_translated_physmap)) {
629 /* After all PTEs have been zapped, flush the TLB. */
630 np->rx_mcl[i-1].args[MULTI_UVMFLAGS_INDEX] =
631 UVMF_TLB_FLUSH|UVMF_ALL;
633 /* Give away a batch of pages. */
634 np->rx_mcl[i].op = __HYPERVISOR_memory_op;
635 np->rx_mcl[i].args[0] = XENMEM_decrease_reservation;
636 np->rx_mcl[i].args[1] = (unsigned long)&reservation;
638 /* Zap PTEs and give away pages in one big multicall. */
639 (void)HYPERVISOR_multicall(np->rx_mcl, i+1);
641 /* Check return status of HYPERVISOR_memory_op(). */
642 if (unlikely(np->rx_mcl[i].result != i))
643 panic("Unable to reduce memory reservation\n");
644 } else
645 if (HYPERVISOR_memory_op(XENMEM_decrease_reservation,
646 &reservation) != i)
647 panic("Unable to reduce memory reservation\n");
649 /* Above is a suitable barrier to ensure backend will see requests. */
650 np->rx.req_prod_pvt = req_prod + i;
651 RING_PUSH_REQUESTS(&np->rx);
652 }
654 static void xennet_make_frags(struct sk_buff *skb, struct net_device *dev,
655 struct netif_tx_request *tx)
656 {
657 struct netfront_info *np = netdev_priv(dev);
658 char *data = skb->data;
659 unsigned long mfn;
660 RING_IDX prod = np->tx.req_prod_pvt;
661 int frags = skb_shinfo(skb)->nr_frags;
662 unsigned int offset = offset_in_page(data);
663 unsigned int len = skb_headlen(skb);
664 unsigned int id;
665 grant_ref_t ref;
666 int i;
668 while (len > PAGE_SIZE - offset) {
669 tx->size = PAGE_SIZE - offset;
670 tx->flags |= NETTXF_more_data;
671 len -= tx->size;
672 data += tx->size;
673 offset = 0;
675 id = get_id_from_freelist(np->tx_skbs);
676 np->tx_skbs[id] = skb_get(skb);
677 tx = RING_GET_REQUEST(&np->tx, prod++);
678 tx->id = id;
679 ref = gnttab_claim_grant_reference(&np->gref_tx_head);
680 BUG_ON((signed short)ref < 0);
682 mfn = virt_to_mfn(data);
683 gnttab_grant_foreign_access_ref(ref, np->xbdev->otherend_id,
684 mfn, GNTMAP_readonly);
686 tx->gref = np->grant_tx_ref[id] = ref;
687 tx->offset = offset;
688 tx->size = len;
689 tx->flags = 0;
690 }
692 for (i = 0; i < frags; i++) {
693 skb_frag_t *frag = skb_shinfo(skb)->frags + i;
695 tx->flags |= NETTXF_more_data;
697 id = get_id_from_freelist(np->tx_skbs);
698 np->tx_skbs[id] = skb_get(skb);
699 tx = RING_GET_REQUEST(&np->tx, prod++);
700 tx->id = id;
701 ref = gnttab_claim_grant_reference(&np->gref_tx_head);
702 BUG_ON((signed short)ref < 0);
704 mfn = pfn_to_mfn(page_to_pfn(frag->page));
705 gnttab_grant_foreign_access_ref(ref, np->xbdev->otherend_id,
706 mfn, GNTMAP_readonly);
708 tx->gref = np->grant_tx_ref[id] = ref;
709 tx->offset = frag->page_offset;
710 tx->size = frag->size;
711 tx->flags = 0;
712 }
714 np->tx.req_prod_pvt = prod;
715 }
717 static int network_start_xmit(struct sk_buff *skb, struct net_device *dev)
718 {
719 unsigned short id;
720 struct netfront_info *np = netdev_priv(dev);
721 struct netif_tx_request *tx;
722 char *data = skb->data;
723 RING_IDX i;
724 grant_ref_t ref;
725 unsigned long mfn;
726 int notify;
727 int frags = skb_shinfo(skb)->nr_frags;
728 unsigned int offset = offset_in_page(data);
729 unsigned int len = skb_headlen(skb);
731 frags += (offset + len + PAGE_SIZE - 1) / PAGE_SIZE;
732 if (unlikely(frags > MAX_SKB_FRAGS + 1)) {
733 printk(KERN_ALERT "xennet: skb rides the rocket: %d frags\n",
734 frags);
735 dump_stack();
736 goto drop;
737 }
739 spin_lock_irq(&np->tx_lock);
741 if (unlikely(!netif_carrier_ok(dev) ||
742 (frags > 1 && !xennet_can_sg(dev)))) {
743 spin_unlock_irq(&np->tx_lock);
744 goto drop;
745 }
747 i = np->tx.req_prod_pvt;
749 id = get_id_from_freelist(np->tx_skbs);
750 np->tx_skbs[id] = skb;
752 tx = RING_GET_REQUEST(&np->tx, i);
754 tx->id = id;
755 ref = gnttab_claim_grant_reference(&np->gref_tx_head);
756 BUG_ON((signed short)ref < 0);
757 mfn = virt_to_mfn(data);
758 gnttab_grant_foreign_access_ref(
759 ref, np->xbdev->otherend_id, mfn, GNTMAP_readonly);
760 tx->gref = np->grant_tx_ref[id] = ref;
761 tx->offset = offset;
762 tx->size = len;
764 tx->flags = 0;
765 if (skb->ip_summed == CHECKSUM_HW) /* local packet? */
766 tx->flags |= NETTXF_csum_blank | NETTXF_data_validated;
767 if (skb->proto_data_valid) /* remote but checksummed? */
768 tx->flags |= NETTXF_data_validated;
770 np->tx.req_prod_pvt = i + 1;
772 xennet_make_frags(skb, dev, tx);
773 tx->size = skb->len;
775 RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&np->tx, notify);
776 if (notify)
777 notify_remote_via_irq(np->irq);
779 network_tx_buf_gc(dev);
781 if (!netfront_tx_slot_available(np))
782 netif_stop_queue(dev);
784 spin_unlock_irq(&np->tx_lock);
786 np->stats.tx_bytes += skb->len;
787 np->stats.tx_packets++;
789 return 0;
791 drop:
792 np->stats.tx_dropped++;
793 dev_kfree_skb(skb);
794 return 0;
795 }
797 static irqreturn_t netif_int(int irq, void *dev_id, struct pt_regs *ptregs)
798 {
799 struct net_device *dev = dev_id;
800 struct netfront_info *np = netdev_priv(dev);
801 unsigned long flags;
803 spin_lock_irqsave(&np->tx_lock, flags);
804 network_tx_buf_gc(dev);
805 spin_unlock_irqrestore(&np->tx_lock, flags);
807 if (RING_HAS_UNCONSUMED_RESPONSES(&np->rx) &&
808 likely(netif_running(dev)))
809 netif_rx_schedule(dev);
811 return IRQ_HANDLED;
812 }
815 static int netif_poll(struct net_device *dev, int *pbudget)
816 {
817 struct netfront_info *np = netdev_priv(dev);
818 struct sk_buff *skb, *nskb;
819 struct netif_rx_response *rx;
820 RING_IDX i, rp;
821 struct mmu_update *mmu = np->rx_mmu;
822 struct multicall_entry *mcl = np->rx_mcl;
823 int work_done, budget, more_to_do = 1;
824 struct sk_buff_head rxq;
825 unsigned long flags;
826 unsigned long mfn;
827 grant_ref_t ref;
829 spin_lock(&np->rx_lock);
831 if (unlikely(!netif_carrier_ok(dev))) {
832 spin_unlock(&np->rx_lock);
833 return 0;
834 }
836 skb_queue_head_init(&rxq);
838 if ((budget = *pbudget) > dev->quota)
839 budget = dev->quota;
840 rp = np->rx.sring->rsp_prod;
841 rmb(); /* Ensure we see queued responses up to 'rp'. */
843 for (i = np->rx.rsp_cons, work_done = 0;
844 (i != rp) && (work_done < budget);
845 i++, work_done++) {
846 rx = RING_GET_RESPONSE(&np->rx, i);
848 /*
849 * This definitely indicates a bug, either in this driver or in
850 * the backend driver. In future this should flag the bad
851 * situation to the system controller to reboot the backed.
852 */
853 if ((ref = np->grant_rx_ref[rx->id]) == GRANT_INVALID_REF) {
854 WPRINTK("Bad rx response id %d.\n", rx->id);
855 work_done--;
856 continue;
857 }
859 /* Memory pressure, insufficient buffer headroom, ... */
860 if ((mfn = gnttab_end_foreign_transfer_ref(ref)) == 0) {
861 if (net_ratelimit())
862 WPRINTK("Unfulfilled rx req (id=%d, st=%d).\n",
863 rx->id, rx->status);
864 RING_GET_REQUEST(&np->rx, np->rx.req_prod_pvt)->id =
865 rx->id;
866 RING_GET_REQUEST(&np->rx, np->rx.req_prod_pvt)->gref =
867 ref;
868 np->rx.req_prod_pvt++;
869 RING_PUSH_REQUESTS(&np->rx);
870 work_done--;
871 continue;
872 }
874 gnttab_release_grant_reference(&np->gref_rx_head, ref);
875 np->grant_rx_ref[rx->id] = GRANT_INVALID_REF;
877 skb = np->rx_skbs[rx->id];
878 add_id_to_freelist(np->rx_skbs, rx->id);
880 /* NB. We handle skb overflow later. */
881 skb->data = skb->head + rx->offset;
882 skb->len = rx->status;
883 skb->tail = skb->data + skb->len;
885 /*
886 * Old backends do not assert data_validated but we
887 * can infer it from csum_blank so test both flags.
888 */
889 if (rx->flags & (NETRXF_data_validated|NETRXF_csum_blank)) {
890 skb->ip_summed = CHECKSUM_UNNECESSARY;
891 skb->proto_data_valid = 1;
892 } else {
893 skb->ip_summed = CHECKSUM_NONE;
894 skb->proto_data_valid = 0;
895 }
896 skb->proto_csum_blank = !!(rx->flags & NETRXF_csum_blank);
898 np->stats.rx_packets++;
899 np->stats.rx_bytes += rx->status;
901 if (!xen_feature(XENFEAT_auto_translated_physmap)) {
902 /* Remap the page. */
903 MULTI_update_va_mapping(mcl, (unsigned long)skb->head,
904 pfn_pte_ma(mfn, PAGE_KERNEL),
905 0);
906 mcl++;
907 mmu->ptr = ((maddr_t)mfn << PAGE_SHIFT)
908 | MMU_MACHPHYS_UPDATE;
909 mmu->val = __pa(skb->head) >> PAGE_SHIFT;
910 mmu++;
912 set_phys_to_machine(__pa(skb->head) >> PAGE_SHIFT,
913 mfn);
914 }
916 __skb_queue_tail(&rxq, skb);
917 }
919 /* Some pages are no longer absent... */
920 balloon_update_driver_allowance(-work_done);
922 /* Do all the remapping work, and M2P updates, in one big hypercall. */
923 if (likely((mcl - np->rx_mcl) != 0)) {
924 mcl->op = __HYPERVISOR_mmu_update;
925 mcl->args[0] = (unsigned long)np->rx_mmu;
926 mcl->args[1] = mmu - np->rx_mmu;
927 mcl->args[2] = 0;
928 mcl->args[3] = DOMID_SELF;
929 mcl++;
930 (void)HYPERVISOR_multicall(np->rx_mcl, mcl - np->rx_mcl);
931 }
933 while ((skb = __skb_dequeue(&rxq)) != NULL) {
934 if (skb->len > (dev->mtu + ETH_HLEN + 4)) {
935 if (net_ratelimit())
936 printk(KERN_INFO "Received packet too big for "
937 "MTU (%d > %d)\n",
938 skb->len - ETH_HLEN - 4, dev->mtu);
939 skb->len = 0;
940 skb->tail = skb->data;
941 init_skb_shinfo(skb);
942 dev_kfree_skb(skb);
943 continue;
944 }
946 /*
947 * Enough room in skbuff for the data we were passed? Also,
948 * Linux expects at least 16 bytes headroom in each rx buffer.
949 */
950 if (unlikely(skb->tail > skb->end) ||
951 unlikely((skb->data - skb->head) < 16)) {
952 if (net_ratelimit()) {
953 if (skb->tail > skb->end)
954 printk(KERN_INFO "Received packet "
955 "is %zd bytes beyond tail.\n",
956 skb->tail - skb->end);
957 else
958 printk(KERN_INFO "Received packet "
959 "is %zd bytes before head.\n",
960 16 - (skb->data - skb->head));
961 }
963 nskb = __dev_alloc_skb(skb->len + 2,
964 GFP_ATOMIC|__GFP_NOWARN);
965 if (nskb != NULL) {
966 skb_reserve(nskb, 2);
967 skb_put(nskb, skb->len);
968 memcpy(nskb->data, skb->data, skb->len);
969 /* Copy any other fields we already set up. */
970 nskb->dev = skb->dev;
971 nskb->ip_summed = skb->ip_summed;
972 nskb->proto_data_valid = skb->proto_data_valid;
973 nskb->proto_csum_blank = skb->proto_csum_blank;
974 }
976 /* Reinitialise and then destroy the old skbuff. */
977 skb->len = 0;
978 skb->tail = skb->data;
979 init_skb_shinfo(skb);
980 dev_kfree_skb(skb);
982 /* Switch old for new, if we copied the buffer. */
983 if ((skb = nskb) == NULL)
984 continue;
985 }
987 /* Set the shinfo area, which is hidden behind the data. */
988 init_skb_shinfo(skb);
989 /* Ethernet work: Delayed to here as it peeks the header. */
990 skb->protocol = eth_type_trans(skb, dev);
992 /* Pass it up. */
993 netif_receive_skb(skb);
994 dev->last_rx = jiffies;
995 }
997 np->rx.rsp_cons = i;
999 /* If we get a callback with very few responses, reduce fill target. */
1000 /* NB. Note exponential increase, linear decrease. */
1001 if (((np->rx.req_prod_pvt - np->rx.sring->rsp_prod) >
1002 ((3*np->rx_target) / 4)) &&
1003 (--np->rx_target < np->rx_min_target))
1004 np->rx_target = np->rx_min_target;
1006 network_alloc_rx_buffers(dev);
1008 *pbudget -= work_done;
1009 dev->quota -= work_done;
1011 if (work_done < budget) {
1012 local_irq_save(flags);
1014 RING_FINAL_CHECK_FOR_RESPONSES(&np->rx, more_to_do);
1015 if (!more_to_do)
1016 __netif_rx_complete(dev);
1018 local_irq_restore(flags);
1021 spin_unlock(&np->rx_lock);
1023 return more_to_do;
1027 static int network_close(struct net_device *dev)
1029 struct netfront_info *np = netdev_priv(dev);
1030 netif_stop_queue(np->netdev);
1031 return 0;
1035 static struct net_device_stats *network_get_stats(struct net_device *dev)
1037 struct netfront_info *np = netdev_priv(dev);
1038 return &np->stats;
1041 static int xennet_change_mtu(struct net_device *dev, int mtu)
1043 int max = xennet_can_sg(dev) ? 65535 - ETH_HLEN : ETH_DATA_LEN;
1045 if (mtu > max)
1046 return -EINVAL;
1047 dev->mtu = mtu;
1048 return 0;
1051 static int xennet_set_sg(struct net_device *dev, u32 data)
1053 if (data) {
1054 struct netfront_info *np = netdev_priv(dev);
1055 int val;
1057 if (xenbus_scanf(XBT_NIL, np->xbdev->otherend, "feature-sg",
1058 "%d", &val) < 0)
1059 val = 0;
1060 if (!val)
1061 return -ENOSYS;
1062 } else if (dev->mtu > ETH_DATA_LEN)
1063 dev->mtu = ETH_DATA_LEN;
1065 return ethtool_op_set_sg(dev, data);
1068 static void xennet_set_features(struct net_device *dev)
1070 xennet_set_sg(dev, 1);
1073 static void network_connect(struct net_device *dev)
1075 struct netfront_info *np = netdev_priv(dev);
1076 int i, requeue_idx;
1077 struct sk_buff *skb;
1079 xennet_set_features(dev);
1081 spin_lock_irq(&np->tx_lock);
1082 spin_lock(&np->rx_lock);
1084 /*
1085 * Recovery procedure:
1086 * NB. Freelist index entries are always going to be less than
1087 * PAGE_OFFSET, whereas pointers to skbs will always be equal or
1088 * greater than PAGE_OFFSET: we use this property to distinguish
1089 * them.
1090 */
1092 /* Step 1: Discard all pending TX packet fragments. */
1093 for (requeue_idx = 0, i = 1; i <= NET_TX_RING_SIZE; i++) {
1094 if ((unsigned long)np->tx_skbs[i] < PAGE_OFFSET)
1095 continue;
1097 skb = np->tx_skbs[i];
1098 gnttab_end_foreign_access_ref(
1099 np->grant_tx_ref[i], GNTMAP_readonly);
1100 gnttab_release_grant_reference(
1101 &np->gref_tx_head, np->grant_tx_ref[i]);
1102 np->grant_tx_ref[i] = GRANT_INVALID_REF;
1103 add_id_to_freelist(np->tx_skbs, i);
1104 dev_kfree_skb_irq(skb);
1107 /* Step 2: Rebuild the RX buffer freelist and the RX ring itself. */
1108 for (requeue_idx = 0, i = 1; i <= NET_RX_RING_SIZE; i++) {
1109 if ((unsigned long)np->rx_skbs[i] < PAGE_OFFSET)
1110 continue;
1111 gnttab_grant_foreign_transfer_ref(
1112 np->grant_rx_ref[i], np->xbdev->otherend_id,
1113 __pa(np->rx_skbs[i]->data) >> PAGE_SHIFT);
1114 RING_GET_REQUEST(&np->rx, requeue_idx)->gref =
1115 np->grant_rx_ref[i];
1116 RING_GET_REQUEST(&np->rx, requeue_idx)->id = i;
1117 requeue_idx++;
1120 np->rx.req_prod_pvt = requeue_idx;
1121 RING_PUSH_REQUESTS(&np->rx);
1123 /*
1124 * Step 3: All public and private state should now be sane. Get
1125 * ready to start sending and receiving packets and give the driver
1126 * domain a kick because we've probably just requeued some
1127 * packets.
1128 */
1129 netif_carrier_on(dev);
1130 notify_remote_via_irq(np->irq);
1131 network_tx_buf_gc(dev);
1132 network_alloc_rx_buffers(dev);
1134 spin_unlock(&np->rx_lock);
1135 spin_unlock_irq(&np->tx_lock);
1138 static void netif_uninit(struct net_device *dev)
1140 struct netfront_info *np = netdev_priv(dev);
1141 gnttab_free_grant_references(np->gref_tx_head);
1142 gnttab_free_grant_references(np->gref_rx_head);
1145 static struct ethtool_ops network_ethtool_ops =
1147 .get_tx_csum = ethtool_op_get_tx_csum,
1148 .set_tx_csum = ethtool_op_set_tx_csum,
1149 .get_sg = ethtool_op_get_sg,
1150 .set_sg = xennet_set_sg,
1151 };
1153 #ifdef CONFIG_SYSFS
1154 static ssize_t show_rxbuf_min(struct class_device *cd, char *buf)
1156 struct net_device *netdev = container_of(cd, struct net_device,
1157 class_dev);
1158 struct netfront_info *info = netdev_priv(netdev);
1160 return sprintf(buf, "%u\n", info->rx_min_target);
1163 static ssize_t store_rxbuf_min(struct class_device *cd,
1164 const char *buf, size_t len)
1166 struct net_device *netdev = container_of(cd, struct net_device,
1167 class_dev);
1168 struct netfront_info *np = netdev_priv(netdev);
1169 char *endp;
1170 unsigned long target;
1172 if (!capable(CAP_NET_ADMIN))
1173 return -EPERM;
1175 target = simple_strtoul(buf, &endp, 0);
1176 if (endp == buf)
1177 return -EBADMSG;
1179 if (target < RX_MIN_TARGET)
1180 target = RX_MIN_TARGET;
1181 if (target > RX_MAX_TARGET)
1182 target = RX_MAX_TARGET;
1184 spin_lock(&np->rx_lock);
1185 if (target > np->rx_max_target)
1186 np->rx_max_target = target;
1187 np->rx_min_target = target;
1188 if (target > np->rx_target)
1189 np->rx_target = target;
1191 network_alloc_rx_buffers(netdev);
1193 spin_unlock(&np->rx_lock);
1194 return len;
1197 static ssize_t show_rxbuf_max(struct class_device *cd, char *buf)
1199 struct net_device *netdev = container_of(cd, struct net_device,
1200 class_dev);
1201 struct netfront_info *info = netdev_priv(netdev);
1203 return sprintf(buf, "%u\n", info->rx_max_target);
1206 static ssize_t store_rxbuf_max(struct class_device *cd,
1207 const char *buf, size_t len)
1209 struct net_device *netdev = container_of(cd, struct net_device,
1210 class_dev);
1211 struct netfront_info *np = netdev_priv(netdev);
1212 char *endp;
1213 unsigned long target;
1215 if (!capable(CAP_NET_ADMIN))
1216 return -EPERM;
1218 target = simple_strtoul(buf, &endp, 0);
1219 if (endp == buf)
1220 return -EBADMSG;
1222 if (target < RX_MIN_TARGET)
1223 target = RX_MIN_TARGET;
1224 if (target > RX_MAX_TARGET)
1225 target = RX_MAX_TARGET;
1227 spin_lock(&np->rx_lock);
1228 if (target < np->rx_min_target)
1229 np->rx_min_target = target;
1230 np->rx_max_target = target;
1231 if (target < np->rx_target)
1232 np->rx_target = target;
1234 network_alloc_rx_buffers(netdev);
1236 spin_unlock(&np->rx_lock);
1237 return len;
1240 static ssize_t show_rxbuf_cur(struct class_device *cd, char *buf)
1242 struct net_device *netdev = container_of(cd, struct net_device,
1243 class_dev);
1244 struct netfront_info *info = netdev_priv(netdev);
1246 return sprintf(buf, "%u\n", info->rx_target);
1249 static const struct class_device_attribute xennet_attrs[] = {
1250 __ATTR(rxbuf_min, S_IRUGO|S_IWUSR, show_rxbuf_min, store_rxbuf_min),
1251 __ATTR(rxbuf_max, S_IRUGO|S_IWUSR, show_rxbuf_max, store_rxbuf_max),
1252 __ATTR(rxbuf_cur, S_IRUGO, show_rxbuf_cur, NULL),
1253 };
1255 static int xennet_sysfs_addif(struct net_device *netdev)
1257 int i;
1258 int error = 0;
1260 for (i = 0; i < ARRAY_SIZE(xennet_attrs); i++) {
1261 error = class_device_create_file(&netdev->class_dev,
1262 &xennet_attrs[i]);
1263 if (error)
1264 goto fail;
1266 return 0;
1268 fail:
1269 while (--i >= 0)
1270 class_device_remove_file(&netdev->class_dev,
1271 &xennet_attrs[i]);
1272 return error;
1275 static void xennet_sysfs_delif(struct net_device *netdev)
1277 int i;
1279 for (i = 0; i < ARRAY_SIZE(xennet_attrs); i++) {
1280 class_device_remove_file(&netdev->class_dev,
1281 &xennet_attrs[i]);
1285 #endif /* CONFIG_SYSFS */
1288 /*
1289 * Nothing to do here. Virtual interface is point-to-point and the
1290 * physical interface is probably promiscuous anyway.
1291 */
1292 static void network_set_multicast_list(struct net_device *dev)
1296 /** Create a network device.
1297 * @param handle device handle
1298 * @param val return parameter for created device
1299 * @return 0 on success, error code otherwise
1300 */
1301 static struct net_device * __devinit create_netdev(int handle,
1302 struct xenbus_device *dev)
1304 int i, err = 0;
1305 struct net_device *netdev = NULL;
1306 struct netfront_info *np = NULL;
1308 netdev = alloc_etherdev(sizeof(struct netfront_info));
1309 if (!netdev) {
1310 printk(KERN_WARNING "%s> alloc_etherdev failed.\n",
1311 __FUNCTION__);
1312 return ERR_PTR(-ENOMEM);
1315 np = netdev_priv(netdev);
1316 np->handle = handle;
1317 np->xbdev = dev;
1319 netif_carrier_off(netdev);
1321 spin_lock_init(&np->tx_lock);
1322 spin_lock_init(&np->rx_lock);
1324 skb_queue_head_init(&np->rx_batch);
1325 np->rx_target = RX_DFL_MIN_TARGET;
1326 np->rx_min_target = RX_DFL_MIN_TARGET;
1327 np->rx_max_target = RX_MAX_TARGET;
1329 init_timer(&np->rx_refill_timer);
1330 np->rx_refill_timer.data = (unsigned long)netdev;
1331 np->rx_refill_timer.function = rx_refill_timeout;
1333 /* Initialise {tx,rx}_skbs as a free chain containing every entry. */
1334 for (i = 0; i <= NET_TX_RING_SIZE; i++) {
1335 np->tx_skbs[i] = (void *)((unsigned long) i+1);
1336 np->grant_tx_ref[i] = GRANT_INVALID_REF;
1339 for (i = 0; i <= NET_RX_RING_SIZE; i++) {
1340 np->rx_skbs[i] = (void *)((unsigned long) i+1);
1341 np->grant_rx_ref[i] = GRANT_INVALID_REF;
1344 /* A grant for every tx ring slot */
1345 if (gnttab_alloc_grant_references(TX_MAX_TARGET,
1346 &np->gref_tx_head) < 0) {
1347 printk(KERN_ALERT "#### netfront can't alloc tx grant refs\n");
1348 err = -ENOMEM;
1349 goto exit;
1351 /* A grant for every rx ring slot */
1352 if (gnttab_alloc_grant_references(RX_MAX_TARGET,
1353 &np->gref_rx_head) < 0) {
1354 printk(KERN_ALERT "#### netfront can't alloc rx grant refs\n");
1355 err = -ENOMEM;
1356 goto exit_free_tx;
1359 netdev->open = network_open;
1360 netdev->hard_start_xmit = network_start_xmit;
1361 netdev->stop = network_close;
1362 netdev->get_stats = network_get_stats;
1363 netdev->poll = netif_poll;
1364 netdev->set_multicast_list = network_set_multicast_list;
1365 netdev->uninit = netif_uninit;
1366 netdev->change_mtu = xennet_change_mtu;
1367 netdev->weight = 64;
1368 netdev->features = NETIF_F_IP_CSUM;
1370 SET_ETHTOOL_OPS(netdev, &network_ethtool_ops);
1371 SET_MODULE_OWNER(netdev);
1372 SET_NETDEV_DEV(netdev, &dev->dev);
1374 err = register_netdev(netdev);
1375 if (err) {
1376 printk(KERN_WARNING "%s> register_netdev err=%d\n",
1377 __FUNCTION__, err);
1378 goto exit_free_rx;
1381 err = xennet_sysfs_addif(netdev);
1382 if (err) {
1383 /* This can be non-fatal: it only means no tuning parameters */
1384 printk(KERN_WARNING "%s> add sysfs failed err=%d\n",
1385 __FUNCTION__, err);
1388 np->netdev = netdev;
1390 return netdev;
1393 exit_free_rx:
1394 gnttab_free_grant_references(np->gref_rx_head);
1395 exit_free_tx:
1396 gnttab_free_grant_references(np->gref_tx_head);
1397 exit:
1398 free_netdev(netdev);
1399 return ERR_PTR(err);
1402 /*
1403 * We use this notifier to send out a fake ARP reply to reset switches and
1404 * router ARP caches when an IP interface is brought up on a VIF.
1405 */
1406 static int
1407 inetdev_notify(struct notifier_block *this, unsigned long event, void *ptr)
1409 struct in_ifaddr *ifa = (struct in_ifaddr *)ptr;
1410 struct net_device *dev = ifa->ifa_dev->dev;
1412 /* UP event and is it one of our devices? */
1413 if (event == NETDEV_UP && dev->open == network_open)
1414 (void)send_fake_arp(dev);
1416 return NOTIFY_DONE;
1420 /* ** Close down ** */
1423 /**
1424 * Handle the change of state of the backend to Closing. We must delete our
1425 * device-layer structures now, to ensure that writes are flushed through to
1426 * the backend. Once is this done, we can switch to Closed in
1427 * acknowledgement.
1428 */
1429 static void netfront_closing(struct xenbus_device *dev)
1431 struct netfront_info *info = dev->dev.driver_data;
1433 DPRINTK("netfront_closing: %s removed\n", dev->nodename);
1435 close_netdev(info);
1437 xenbus_switch_state(dev, XenbusStateClosed);
1441 static int __devexit netfront_remove(struct xenbus_device *dev)
1443 struct netfront_info *info = dev->dev.driver_data;
1445 DPRINTK("%s\n", dev->nodename);
1447 netif_disconnect_backend(info);
1448 free_netdev(info->netdev);
1450 return 0;
1454 static void close_netdev(struct netfront_info *info)
1456 del_timer_sync(&info->rx_refill_timer);
1458 xennet_sysfs_delif(info->netdev);
1459 unregister_netdev(info->netdev);
1463 static void netif_disconnect_backend(struct netfront_info *info)
1465 /* Stop old i/f to prevent errors whilst we rebuild the state. */
1466 spin_lock_irq(&info->tx_lock);
1467 spin_lock(&info->rx_lock);
1468 netif_carrier_off(info->netdev);
1469 spin_unlock(&info->rx_lock);
1470 spin_unlock_irq(&info->tx_lock);
1472 if (info->irq)
1473 unbind_from_irqhandler(info->irq, info->netdev);
1474 info->evtchn = info->irq = 0;
1476 end_access(info->tx_ring_ref, info->tx.sring);
1477 end_access(info->rx_ring_ref, info->rx.sring);
1478 info->tx_ring_ref = GRANT_INVALID_REF;
1479 info->rx_ring_ref = GRANT_INVALID_REF;
1480 info->tx.sring = NULL;
1481 info->rx.sring = NULL;
1485 static void netif_free(struct netfront_info *info)
1487 close_netdev(info);
1488 netif_disconnect_backend(info);
1489 free_netdev(info->netdev);
1493 static void end_access(int ref, void *page)
1495 if (ref != GRANT_INVALID_REF)
1496 gnttab_end_foreign_access(ref, 0, (unsigned long)page);
1500 /* ** Driver registration ** */
1503 static struct xenbus_device_id netfront_ids[] = {
1504 { "vif" },
1505 { "" }
1506 };
1509 static struct xenbus_driver netfront = {
1510 .name = "vif",
1511 .owner = THIS_MODULE,
1512 .ids = netfront_ids,
1513 .probe = netfront_probe,
1514 .remove = __devexit_p(netfront_remove),
1515 .resume = netfront_resume,
1516 .otherend_changed = backend_changed,
1517 };
1520 static struct notifier_block notifier_inetdev = {
1521 .notifier_call = inetdev_notify,
1522 .next = NULL,
1523 .priority = 0
1524 };
1526 static int __init netif_init(void)
1528 if (!is_running_on_xen())
1529 return -ENODEV;
1531 if (xen_start_info->flags & SIF_INITDOMAIN)
1532 return 0;
1534 IPRINTK("Initialising virtual ethernet driver.\n");
1536 (void)register_inetaddr_notifier(&notifier_inetdev);
1538 return xenbus_register_frontend(&netfront);
1540 module_init(netif_init);
1543 static void __exit netif_exit(void)
1545 unregister_inetaddr_notifier(&notifier_inetdev);
1547 return xenbus_unregister_driver(&netfront);
1549 module_exit(netif_exit);
1551 MODULE_LICENSE("Dual BSD/GPL");