ia64/xen-unstable

view linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c @ 10975:7684f9032f9f

[NET] front: Check for received packets in network_open0

Because the backend brings up the interface long before the frontend
has booted up, it is possible that by the time we get here we already
have packets queued up for processing.

If we don't process them here, we may delay them more than what is
necessary. Worse yet, it is possible to miss the notification
interrupt from the backend in such a way that we never get another
one until we bring the interface down and up.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
author ack@localhost.localdomain
date Fri Aug 04 16:07:58 2006 +0100 (2006-08-04)
parents a5835e6f7ff9
children 1d817bfc5ed9
line source
1 /******************************************************************************
2 * Virtual network driver for conversing with remote driver backends.
3 *
4 * Copyright (c) 2002-2005, K A Fraser
5 * Copyright (c) 2005, XenSource Ltd
6 *
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License version 2
9 * as published by the Free Software Foundation; or, when distributed
10 * separately from the Linux kernel or incorporated into other
11 * software packages, subject to the following license:
12 *
13 * Permission is hereby granted, free of charge, to any person obtaining a copy
14 * of this source file (the "Software"), to deal in the Software without
15 * restriction, including without limitation the rights to use, copy, modify,
16 * merge, publish, distribute, sublicense, and/or sell copies of the Software,
17 * and to permit persons to whom the Software is furnished to do so, subject to
18 * the following conditions:
19 *
20 * The above copyright notice and this permission notice shall be included in
21 * all copies or substantial portions of the Software.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
24 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
25 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
26 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
27 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
28 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
29 * IN THE SOFTWARE.
30 */
32 #include <linux/config.h>
33 #include <linux/module.h>
34 #include <linux/version.h>
35 #include <linux/kernel.h>
36 #include <linux/sched.h>
37 #include <linux/slab.h>
38 #include <linux/string.h>
39 #include <linux/errno.h>
40 #include <linux/netdevice.h>
41 #include <linux/inetdevice.h>
42 #include <linux/etherdevice.h>
43 #include <linux/skbuff.h>
44 #include <linux/init.h>
45 #include <linux/bitops.h>
46 #include <linux/ethtool.h>
47 #include <linux/in.h>
48 #include <linux/if_ether.h>
49 #include <linux/io.h>
50 #include <net/sock.h>
51 #include <net/pkt_sched.h>
52 #include <net/arp.h>
53 #include <net/route.h>
54 #include <asm/uaccess.h>
55 #include <xen/evtchn.h>
56 #include <xen/xenbus.h>
57 #include <xen/interface/io/netif.h>
58 #include <xen/interface/memory.h>
59 #include <xen/balloon.h>
60 #include <asm/page.h>
61 #include <asm/uaccess.h>
62 #include <xen/interface/grant_table.h>
63 #include <xen/gnttab.h>
65 #define RX_COPY_THRESHOLD 256
67 #define GRANT_INVALID_REF 0
69 #define NET_TX_RING_SIZE __RING_SIZE((struct netif_tx_sring *)0, PAGE_SIZE)
70 #define NET_RX_RING_SIZE __RING_SIZE((struct netif_rx_sring *)0, PAGE_SIZE)
72 struct netfront_info {
73 struct list_head list;
74 struct net_device *netdev;
76 struct net_device_stats stats;
78 struct netif_tx_front_ring tx;
79 struct netif_rx_front_ring rx;
81 spinlock_t tx_lock;
82 spinlock_t rx_lock;
84 unsigned int handle;
85 unsigned int evtchn, irq;
87 /* Receive-ring batched refills. */
88 #define RX_MIN_TARGET 8
89 #define RX_DFL_MIN_TARGET 64
90 #define RX_MAX_TARGET min_t(int, NET_RX_RING_SIZE, 256)
91 unsigned rx_min_target, rx_max_target, rx_target;
92 struct sk_buff_head rx_batch;
94 struct timer_list rx_refill_timer;
96 /*
97 * {tx,rx}_skbs store outstanding skbuffs. The first entry in tx_skbs
98 * is an index into a chain of free entries.
99 */
100 struct sk_buff *tx_skbs[NET_TX_RING_SIZE+1];
101 struct sk_buff *rx_skbs[NET_RX_RING_SIZE];
103 #define TX_MAX_TARGET min_t(int, NET_RX_RING_SIZE, 256)
104 grant_ref_t gref_tx_head;
105 grant_ref_t grant_tx_ref[NET_TX_RING_SIZE + 1];
106 grant_ref_t gref_rx_head;
107 grant_ref_t grant_rx_ref[NET_TX_RING_SIZE];
109 struct xenbus_device *xbdev;
110 int tx_ring_ref;
111 int rx_ring_ref;
112 u8 mac[ETH_ALEN];
114 unsigned long rx_pfn_array[NET_RX_RING_SIZE];
115 struct multicall_entry rx_mcl[NET_RX_RING_SIZE+1];
116 struct mmu_update rx_mmu[NET_RX_RING_SIZE];
117 };
119 struct netfront_rx_info {
120 struct netif_rx_response rx;
121 struct netif_extra_info extras[XEN_NETIF_EXTRA_TYPE_MAX - 1];
122 };
124 /*
125 * Access macros for acquiring freeing slots in tx_skbs[].
126 */
128 static inline void add_id_to_freelist(struct sk_buff **list, unsigned short id)
129 {
130 list[id] = list[0];
131 list[0] = (void *)(unsigned long)id;
132 }
134 static inline unsigned short get_id_from_freelist(struct sk_buff **list)
135 {
136 unsigned int id = (unsigned int)(unsigned long)list[0];
137 list[0] = list[id];
138 return id;
139 }
141 static inline int xennet_rxidx(RING_IDX idx)
142 {
143 return idx & (NET_RX_RING_SIZE - 1);
144 }
146 static inline struct sk_buff *xennet_get_rx_skb(struct netfront_info *np,
147 RING_IDX ri)
148 {
149 int i = xennet_rxidx(ri);
150 struct sk_buff *skb = np->rx_skbs[i];
151 np->rx_skbs[i] = NULL;
152 return skb;
153 }
155 static inline grant_ref_t xennet_get_rx_ref(struct netfront_info *np,
156 RING_IDX ri)
157 {
158 int i = xennet_rxidx(ri);
159 grant_ref_t ref = np->grant_rx_ref[i];
160 np->grant_rx_ref[i] = GRANT_INVALID_REF;
161 return ref;
162 }
164 #define DPRINTK(fmt, args...) \
165 pr_debug("netfront (%s:%d) " fmt, \
166 __FUNCTION__, __LINE__, ##args)
167 #define IPRINTK(fmt, args...) \
168 printk(KERN_INFO "netfront: " fmt, ##args)
169 #define WPRINTK(fmt, args...) \
170 printk(KERN_WARNING "netfront: " fmt, ##args)
172 static int talk_to_backend(struct xenbus_device *, struct netfront_info *);
173 static int setup_device(struct xenbus_device *, struct netfront_info *);
174 static struct net_device *create_netdev(int, struct xenbus_device *);
176 static void netfront_closing(struct xenbus_device *);
178 static void end_access(int, void *);
179 static void netif_disconnect_backend(struct netfront_info *);
180 static void close_netdev(struct netfront_info *);
181 static void netif_free(struct netfront_info *);
183 static void network_connect(struct net_device *);
184 static void network_tx_buf_gc(struct net_device *);
185 static void network_alloc_rx_buffers(struct net_device *);
186 static int send_fake_arp(struct net_device *);
188 static irqreturn_t netif_int(int irq, void *dev_id, struct pt_regs *ptregs);
190 #ifdef CONFIG_SYSFS
191 static int xennet_sysfs_addif(struct net_device *netdev);
192 static void xennet_sysfs_delif(struct net_device *netdev);
193 #else /* !CONFIG_SYSFS */
194 #define xennet_sysfs_addif(dev) (0)
195 #define xennet_sysfs_delif(dev) do { } while(0)
196 #endif
198 static inline int xennet_can_sg(struct net_device *dev)
199 {
200 return dev->features & NETIF_F_SG;
201 }
203 /**
204 * Entry point to this code when a new device is created. Allocate the basic
205 * structures and the ring buffers for communication with the backend, and
206 * inform the backend of the appropriate details for those. Switch to
207 * Connected state.
208 */
209 static int __devinit netfront_probe(struct xenbus_device *dev,
210 const struct xenbus_device_id *id)
211 {
212 int err;
213 struct net_device *netdev;
214 struct netfront_info *info;
215 unsigned int handle;
217 err = xenbus_scanf(XBT_NIL, dev->nodename, "handle", "%u", &handle);
218 if (err != 1) {
219 xenbus_dev_fatal(dev, err, "reading handle");
220 return err;
221 }
223 netdev = create_netdev(handle, dev);
224 if (IS_ERR(netdev)) {
225 err = PTR_ERR(netdev);
226 xenbus_dev_fatal(dev, err, "creating netdev");
227 return err;
228 }
230 info = netdev_priv(netdev);
231 dev->dev.driver_data = info;
233 err = talk_to_backend(dev, info);
234 if (err) {
235 xennet_sysfs_delif(info->netdev);
236 unregister_netdev(netdev);
237 free_netdev(netdev);
238 dev->dev.driver_data = NULL;
239 return err;
240 }
242 return 0;
243 }
246 /**
247 * We are reconnecting to the backend, due to a suspend/resume, or a backend
248 * driver restart. We tear down our netif structure and recreate it, but
249 * leave the device-layer structures intact so that this is transparent to the
250 * rest of the kernel.
251 */
252 static int netfront_resume(struct xenbus_device *dev)
253 {
254 struct netfront_info *info = dev->dev.driver_data;
256 DPRINTK("%s\n", dev->nodename);
258 netif_disconnect_backend(info);
259 return talk_to_backend(dev, info);
260 }
262 static int xen_net_read_mac(struct xenbus_device *dev, u8 mac[])
263 {
264 char *s, *e, *macstr;
265 int i;
267 macstr = s = xenbus_read(XBT_NIL, dev->nodename, "mac", NULL);
268 if (IS_ERR(macstr))
269 return PTR_ERR(macstr);
271 for (i = 0; i < ETH_ALEN; i++) {
272 mac[i] = simple_strtoul(s, &e, 16);
273 if ((s == e) || (*e != ((i == ETH_ALEN-1) ? '\0' : ':'))) {
274 kfree(macstr);
275 return -ENOENT;
276 }
277 s = e+1;
278 }
280 kfree(macstr);
281 return 0;
282 }
284 /* Common code used when first setting up, and when resuming. */
285 static int talk_to_backend(struct xenbus_device *dev,
286 struct netfront_info *info)
287 {
288 const char *message;
289 struct xenbus_transaction xbt;
290 int err;
292 err = xen_net_read_mac(dev, info->mac);
293 if (err) {
294 xenbus_dev_fatal(dev, err, "parsing %s/mac", dev->nodename);
295 goto out;
296 }
298 /* Create shared ring, alloc event channel. */
299 err = setup_device(dev, info);
300 if (err)
301 goto out;
303 again:
304 err = xenbus_transaction_start(&xbt);
305 if (err) {
306 xenbus_dev_fatal(dev, err, "starting transaction");
307 goto destroy_ring;
308 }
310 err = xenbus_printf(xbt, dev->nodename, "tx-ring-ref","%u",
311 info->tx_ring_ref);
312 if (err) {
313 message = "writing tx ring-ref";
314 goto abort_transaction;
315 }
316 err = xenbus_printf(xbt, dev->nodename, "rx-ring-ref","%u",
317 info->rx_ring_ref);
318 if (err) {
319 message = "writing rx ring-ref";
320 goto abort_transaction;
321 }
322 err = xenbus_printf(xbt, dev->nodename,
323 "event-channel", "%u", info->evtchn);
324 if (err) {
325 message = "writing event-channel";
326 goto abort_transaction;
327 }
329 err = xenbus_printf(xbt, dev->nodename, "feature-rx-notify", "%d", 1);
330 if (err) {
331 message = "writing feature-rx-notify";
332 goto abort_transaction;
333 }
335 err = xenbus_printf(xbt, dev->nodename, "feature-sg", "%d", 1);
336 if (err) {
337 message = "writing feature-sg";
338 goto abort_transaction;
339 }
341 err = xenbus_printf(xbt, dev->nodename, "feature-gso-tcpv4", "%d", 1);
342 if (err) {
343 message = "writing feature-gso-tcpv4";
344 goto abort_transaction;
345 }
347 err = xenbus_transaction_end(xbt, 0);
348 if (err) {
349 if (err == -EAGAIN)
350 goto again;
351 xenbus_dev_fatal(dev, err, "completing transaction");
352 goto destroy_ring;
353 }
355 return 0;
357 abort_transaction:
358 xenbus_transaction_end(xbt, 1);
359 xenbus_dev_fatal(dev, err, "%s", message);
360 destroy_ring:
361 netif_free(info);
362 out:
363 return err;
364 }
367 static int setup_device(struct xenbus_device *dev, struct netfront_info *info)
368 {
369 struct netif_tx_sring *txs;
370 struct netif_rx_sring *rxs;
371 int err;
372 struct net_device *netdev = info->netdev;
374 info->tx_ring_ref = GRANT_INVALID_REF;
375 info->rx_ring_ref = GRANT_INVALID_REF;
376 info->rx.sring = NULL;
377 info->tx.sring = NULL;
378 info->irq = 0;
380 txs = (struct netif_tx_sring *)get_zeroed_page(GFP_KERNEL);
381 if (!txs) {
382 err = -ENOMEM;
383 xenbus_dev_fatal(dev, err, "allocating tx ring page");
384 goto fail;
385 }
386 SHARED_RING_INIT(txs);
387 FRONT_RING_INIT(&info->tx, txs, PAGE_SIZE);
389 err = xenbus_grant_ring(dev, virt_to_mfn(txs));
390 if (err < 0) {
391 free_page((unsigned long)txs);
392 goto fail;
393 }
394 info->tx_ring_ref = err;
396 rxs = (struct netif_rx_sring *)get_zeroed_page(GFP_KERNEL);
397 if (!rxs) {
398 err = -ENOMEM;
399 xenbus_dev_fatal(dev, err, "allocating rx ring page");
400 goto fail;
401 }
402 SHARED_RING_INIT(rxs);
403 FRONT_RING_INIT(&info->rx, rxs, PAGE_SIZE);
405 err = xenbus_grant_ring(dev, virt_to_mfn(rxs));
406 if (err < 0) {
407 free_page((unsigned long)rxs);
408 goto fail;
409 }
410 info->rx_ring_ref = err;
412 err = xenbus_alloc_evtchn(dev, &info->evtchn);
413 if (err)
414 goto fail;
416 memcpy(netdev->dev_addr, info->mac, ETH_ALEN);
417 err = bind_evtchn_to_irqhandler(info->evtchn, netif_int,
418 SA_SAMPLE_RANDOM, netdev->name, netdev);
419 if (err < 0)
420 goto fail;
421 info->irq = err;
422 return 0;
424 fail:
425 netif_free(info);
426 return err;
427 }
430 /**
431 * Callback received when the backend's state changes.
432 */
433 static void backend_changed(struct xenbus_device *dev,
434 enum xenbus_state backend_state)
435 {
436 struct netfront_info *np = dev->dev.driver_data;
437 struct net_device *netdev = np->netdev;
439 DPRINTK("\n");
441 switch (backend_state) {
442 case XenbusStateInitialising:
443 case XenbusStateInitialised:
444 case XenbusStateConnected:
445 case XenbusStateUnknown:
446 case XenbusStateClosed:
447 break;
449 case XenbusStateInitWait:
450 network_connect(netdev);
451 xenbus_switch_state(dev, XenbusStateConnected);
452 (void)send_fake_arp(netdev);
453 break;
455 case XenbusStateClosing:
456 netfront_closing(dev);
457 break;
458 }
459 }
462 /** Send a packet on a net device to encourage switches to learn the
463 * MAC. We send a fake ARP request.
464 *
465 * @param dev device
466 * @return 0 on success, error code otherwise
467 */
468 static int send_fake_arp(struct net_device *dev)
469 {
470 struct sk_buff *skb;
471 u32 src_ip, dst_ip;
473 dst_ip = INADDR_BROADCAST;
474 src_ip = inet_select_addr(dev, dst_ip, RT_SCOPE_LINK);
476 /* No IP? Then nothing to do. */
477 if (src_ip == 0)
478 return 0;
480 skb = arp_create(ARPOP_REPLY, ETH_P_ARP,
481 dst_ip, dev, src_ip,
482 /*dst_hw*/ NULL, /*src_hw*/ NULL,
483 /*target_hw*/ dev->dev_addr);
484 if (skb == NULL)
485 return -ENOMEM;
487 return dev_queue_xmit(skb);
488 }
491 static int network_open(struct net_device *dev)
492 {
493 struct netfront_info *np = netdev_priv(dev);
495 memset(&np->stats, 0, sizeof(np->stats));
497 network_alloc_rx_buffers(dev);
498 np->rx.sring->rsp_event = np->rx.rsp_cons + 1;
500 if (RING_HAS_UNCONSUMED_RESPONSES(&np->rx))
501 netif_rx_schedule(dev);
503 netif_start_queue(dev);
505 return 0;
506 }
508 static inline int netfront_tx_slot_available(struct netfront_info *np)
509 {
510 return RING_FREE_REQUESTS(&np->tx) >= MAX_SKB_FRAGS + 2;
511 }
513 static inline void network_maybe_wake_tx(struct net_device *dev)
514 {
515 struct netfront_info *np = netdev_priv(dev);
517 if (unlikely(netif_queue_stopped(dev)) &&
518 netfront_tx_slot_available(np) &&
519 likely(netif_running(dev)))
520 netif_wake_queue(dev);
521 }
523 static void network_tx_buf_gc(struct net_device *dev)
524 {
525 RING_IDX cons, prod;
526 unsigned short id;
527 struct netfront_info *np = netdev_priv(dev);
528 struct sk_buff *skb;
530 if (unlikely(!netif_carrier_ok(dev)))
531 return;
533 do {
534 prod = np->tx.sring->rsp_prod;
535 rmb(); /* Ensure we see responses up to 'rp'. */
537 for (cons = np->tx.rsp_cons; cons != prod; cons++) {
538 struct netif_tx_response *txrsp;
540 txrsp = RING_GET_RESPONSE(&np->tx, cons);
541 if (txrsp->status == NETIF_RSP_NULL)
542 continue;
544 id = txrsp->id;
545 skb = np->tx_skbs[id];
546 if (unlikely(gnttab_query_foreign_access(
547 np->grant_tx_ref[id]) != 0)) {
548 printk(KERN_ALERT "network_tx_buf_gc: warning "
549 "-- grant still in use by backend "
550 "domain.\n");
551 BUG();
552 }
553 gnttab_end_foreign_access_ref(
554 np->grant_tx_ref[id], GNTMAP_readonly);
555 gnttab_release_grant_reference(
556 &np->gref_tx_head, np->grant_tx_ref[id]);
557 np->grant_tx_ref[id] = GRANT_INVALID_REF;
558 add_id_to_freelist(np->tx_skbs, id);
559 dev_kfree_skb_irq(skb);
560 }
562 np->tx.rsp_cons = prod;
564 /*
565 * Set a new event, then check for race with update of tx_cons.
566 * Note that it is essential to schedule a callback, no matter
567 * how few buffers are pending. Even if there is space in the
568 * transmit ring, higher layers may be blocked because too much
569 * data is outstanding: in such cases notification from Xen is
570 * likely to be the only kick that we'll get.
571 */
572 np->tx.sring->rsp_event =
573 prod + ((np->tx.sring->req_prod - prod) >> 1) + 1;
574 mb();
575 } while ((cons == prod) && (prod != np->tx.sring->rsp_prod));
577 network_maybe_wake_tx(dev);
578 }
581 static void rx_refill_timeout(unsigned long data)
582 {
583 struct net_device *dev = (struct net_device *)data;
584 netif_rx_schedule(dev);
585 }
588 static void network_alloc_rx_buffers(struct net_device *dev)
589 {
590 unsigned short id;
591 struct netfront_info *np = netdev_priv(dev);
592 struct sk_buff *skb;
593 struct page *page;
594 int i, batch_target, notify;
595 RING_IDX req_prod = np->rx.req_prod_pvt;
596 struct xen_memory_reservation reservation;
597 grant_ref_t ref;
598 unsigned long pfn;
599 void *vaddr;
601 if (unlikely(!netif_carrier_ok(dev)))
602 return;
604 /*
605 * Allocate skbuffs greedily, even though we batch updates to the
606 * receive ring. This creates a less bursty demand on the memory
607 * allocator, so should reduce the chance of failed allocation requests
608 * both for ourself and for other kernel subsystems.
609 */
610 batch_target = np->rx_target - (req_prod - np->rx.rsp_cons);
611 for (i = skb_queue_len(&np->rx_batch); i < batch_target; i++) {
612 /* Allocate an skb and a page. */
613 skb = __dev_alloc_skb(RX_COPY_THRESHOLD,
614 GFP_ATOMIC | __GFP_NOWARN);
615 if (unlikely(!skb))
616 goto no_skb;
618 page = alloc_page(GFP_ATOMIC | __GFP_NOWARN);
619 if (!page) {
620 kfree_skb(skb);
621 no_skb:
622 /* Any skbuffs queued for refill? Force them out. */
623 if (i != 0)
624 goto refill;
625 /* Could not allocate any skbuffs. Try again later. */
626 mod_timer(&np->rx_refill_timer,
627 jiffies + (HZ/10));
628 break;
629 }
631 skb_shinfo(skb)->frags[0].page = page;
632 skb_shinfo(skb)->nr_frags = 1;
633 __skb_queue_tail(&np->rx_batch, skb);
634 }
636 /* Is the batch large enough to be worthwhile? */
637 if (i < (np->rx_target/2)) {
638 if (req_prod > np->rx.sring->req_prod)
639 goto push;
640 return;
641 }
643 /* Adjust our fill target if we risked running out of buffers. */
644 if (((req_prod - np->rx.sring->rsp_prod) < (np->rx_target / 4)) &&
645 ((np->rx_target *= 2) > np->rx_max_target))
646 np->rx_target = np->rx_max_target;
648 refill:
649 for (i = 0; ; i++) {
650 if ((skb = __skb_dequeue(&np->rx_batch)) == NULL)
651 break;
653 skb->dev = dev;
655 id = xennet_rxidx(req_prod + i);
657 BUG_ON(np->rx_skbs[id]);
658 np->rx_skbs[id] = skb;
660 RING_GET_REQUEST(&np->rx, req_prod + i)->id = id;
661 ref = gnttab_claim_grant_reference(&np->gref_rx_head);
662 BUG_ON((signed short)ref < 0);
663 np->grant_rx_ref[id] = ref;
665 pfn = page_to_pfn(skb_shinfo(skb)->frags[0].page);
666 vaddr = page_address(skb_shinfo(skb)->frags[0].page);
668 gnttab_grant_foreign_transfer_ref(ref,
669 np->xbdev->otherend_id, pfn);
670 RING_GET_REQUEST(&np->rx, req_prod + i)->gref = ref;
671 np->rx_pfn_array[i] = pfn_to_mfn(pfn);
673 if (!xen_feature(XENFEAT_auto_translated_physmap)) {
674 /* Remove this page before passing back to Xen. */
675 set_phys_to_machine(pfn, INVALID_P2M_ENTRY);
676 MULTI_update_va_mapping(np->rx_mcl+i,
677 (unsigned long)vaddr,
678 __pte(0), 0);
679 }
680 }
682 /* Tell the ballon driver what is going on. */
683 balloon_update_driver_allowance(i);
685 set_xen_guest_handle(reservation.extent_start, np->rx_pfn_array);
686 reservation.nr_extents = i;
687 reservation.extent_order = 0;
688 reservation.address_bits = 0;
689 reservation.domid = DOMID_SELF;
691 if (!xen_feature(XENFEAT_auto_translated_physmap)) {
692 /* After all PTEs have been zapped, flush the TLB. */
693 np->rx_mcl[i-1].args[MULTI_UVMFLAGS_INDEX] =
694 UVMF_TLB_FLUSH|UVMF_ALL;
696 /* Give away a batch of pages. */
697 np->rx_mcl[i].op = __HYPERVISOR_memory_op;
698 np->rx_mcl[i].args[0] = XENMEM_decrease_reservation;
699 np->rx_mcl[i].args[1] = (unsigned long)&reservation;
701 /* Zap PTEs and give away pages in one big multicall. */
702 (void)HYPERVISOR_multicall(np->rx_mcl, i+1);
704 /* Check return status of HYPERVISOR_memory_op(). */
705 if (unlikely(np->rx_mcl[i].result != i))
706 panic("Unable to reduce memory reservation\n");
707 } else
708 if (HYPERVISOR_memory_op(XENMEM_decrease_reservation,
709 &reservation) != i)
710 panic("Unable to reduce memory reservation\n");
712 /* Above is a suitable barrier to ensure backend will see requests. */
713 np->rx.req_prod_pvt = req_prod + i;
714 push:
715 RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&np->rx, notify);
716 if (notify)
717 notify_remote_via_irq(np->irq);
718 }
720 static void xennet_make_frags(struct sk_buff *skb, struct net_device *dev,
721 struct netif_tx_request *tx)
722 {
723 struct netfront_info *np = netdev_priv(dev);
724 char *data = skb->data;
725 unsigned long mfn;
726 RING_IDX prod = np->tx.req_prod_pvt;
727 int frags = skb_shinfo(skb)->nr_frags;
728 unsigned int offset = offset_in_page(data);
729 unsigned int len = skb_headlen(skb);
730 unsigned int id;
731 grant_ref_t ref;
732 int i;
734 while (len > PAGE_SIZE - offset) {
735 tx->size = PAGE_SIZE - offset;
736 tx->flags |= NETTXF_more_data;
737 len -= tx->size;
738 data += tx->size;
739 offset = 0;
741 id = get_id_from_freelist(np->tx_skbs);
742 np->tx_skbs[id] = skb_get(skb);
743 tx = RING_GET_REQUEST(&np->tx, prod++);
744 tx->id = id;
745 ref = gnttab_claim_grant_reference(&np->gref_tx_head);
746 BUG_ON((signed short)ref < 0);
748 mfn = virt_to_mfn(data);
749 gnttab_grant_foreign_access_ref(ref, np->xbdev->otherend_id,
750 mfn, GNTMAP_readonly);
752 tx->gref = np->grant_tx_ref[id] = ref;
753 tx->offset = offset;
754 tx->size = len;
755 tx->flags = 0;
756 }
758 for (i = 0; i < frags; i++) {
759 skb_frag_t *frag = skb_shinfo(skb)->frags + i;
761 tx->flags |= NETTXF_more_data;
763 id = get_id_from_freelist(np->tx_skbs);
764 np->tx_skbs[id] = skb_get(skb);
765 tx = RING_GET_REQUEST(&np->tx, prod++);
766 tx->id = id;
767 ref = gnttab_claim_grant_reference(&np->gref_tx_head);
768 BUG_ON((signed short)ref < 0);
770 mfn = pfn_to_mfn(page_to_pfn(frag->page));
771 gnttab_grant_foreign_access_ref(ref, np->xbdev->otherend_id,
772 mfn, GNTMAP_readonly);
774 tx->gref = np->grant_tx_ref[id] = ref;
775 tx->offset = frag->page_offset;
776 tx->size = frag->size;
777 tx->flags = 0;
778 }
780 np->tx.req_prod_pvt = prod;
781 }
783 static int network_start_xmit(struct sk_buff *skb, struct net_device *dev)
784 {
785 unsigned short id;
786 struct netfront_info *np = netdev_priv(dev);
787 struct netif_tx_request *tx;
788 struct netif_extra_info *extra;
789 char *data = skb->data;
790 RING_IDX i;
791 grant_ref_t ref;
792 unsigned long mfn;
793 int notify;
794 int frags = skb_shinfo(skb)->nr_frags;
795 unsigned int offset = offset_in_page(data);
796 unsigned int len = skb_headlen(skb);
798 frags += (offset + len + PAGE_SIZE - 1) / PAGE_SIZE;
799 if (unlikely(frags > MAX_SKB_FRAGS + 1)) {
800 printk(KERN_ALERT "xennet: skb rides the rocket: %d frags\n",
801 frags);
802 dump_stack();
803 goto drop;
804 }
806 spin_lock_irq(&np->tx_lock);
808 if (unlikely(!netif_carrier_ok(dev) ||
809 (frags > 1 && !xennet_can_sg(dev)) ||
810 netif_needs_gso(dev, skb))) {
811 spin_unlock_irq(&np->tx_lock);
812 goto drop;
813 }
815 i = np->tx.req_prod_pvt;
817 id = get_id_from_freelist(np->tx_skbs);
818 np->tx_skbs[id] = skb;
820 tx = RING_GET_REQUEST(&np->tx, i);
822 tx->id = id;
823 ref = gnttab_claim_grant_reference(&np->gref_tx_head);
824 BUG_ON((signed short)ref < 0);
825 mfn = virt_to_mfn(data);
826 gnttab_grant_foreign_access_ref(
827 ref, np->xbdev->otherend_id, mfn, GNTMAP_readonly);
828 tx->gref = np->grant_tx_ref[id] = ref;
829 tx->offset = offset;
830 tx->size = len;
832 tx->flags = 0;
833 extra = NULL;
835 if (skb->ip_summed == CHECKSUM_HW) /* local packet? */
836 tx->flags |= NETTXF_csum_blank | NETTXF_data_validated;
837 if (skb->proto_data_valid) /* remote but checksummed? */
838 tx->flags |= NETTXF_data_validated;
840 if (skb_shinfo(skb)->gso_size) {
841 struct netif_extra_info *gso = (struct netif_extra_info *)
842 RING_GET_REQUEST(&np->tx, ++i);
844 if (extra)
845 extra->flags |= XEN_NETIF_EXTRA_FLAG_MORE;
846 else
847 tx->flags |= NETTXF_extra_info;
849 gso->u.gso.size = skb_shinfo(skb)->gso_size;
850 gso->u.gso.type = XEN_NETIF_GSO_TYPE_TCPV4;
851 gso->u.gso.pad = 0;
852 gso->u.gso.features = 0;
854 gso->type = XEN_NETIF_EXTRA_TYPE_GSO;
855 gso->flags = 0;
856 extra = gso;
857 }
859 np->tx.req_prod_pvt = i + 1;
861 xennet_make_frags(skb, dev, tx);
862 tx->size = skb->len;
864 RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&np->tx, notify);
865 if (notify)
866 notify_remote_via_irq(np->irq);
868 network_tx_buf_gc(dev);
870 if (!netfront_tx_slot_available(np))
871 netif_stop_queue(dev);
873 spin_unlock_irq(&np->tx_lock);
875 np->stats.tx_bytes += skb->len;
876 np->stats.tx_packets++;
878 return 0;
880 drop:
881 np->stats.tx_dropped++;
882 dev_kfree_skb(skb);
883 return 0;
884 }
886 static irqreturn_t netif_int(int irq, void *dev_id, struct pt_regs *ptregs)
887 {
888 struct net_device *dev = dev_id;
889 struct netfront_info *np = netdev_priv(dev);
890 unsigned long flags;
892 spin_lock_irqsave(&np->tx_lock, flags);
893 network_tx_buf_gc(dev);
894 spin_unlock_irqrestore(&np->tx_lock, flags);
896 if (RING_HAS_UNCONSUMED_RESPONSES(&np->rx) &&
897 likely(netif_running(dev)))
898 netif_rx_schedule(dev);
900 return IRQ_HANDLED;
901 }
903 static void xennet_move_rx_slot(struct netfront_info *np, struct sk_buff *skb,
904 grant_ref_t ref)
905 {
906 int new = xennet_rxidx(np->rx.req_prod_pvt);
908 BUG_ON(np->rx_skbs[new]);
909 np->rx_skbs[new] = skb;
910 np->grant_rx_ref[new] = ref;
911 RING_GET_REQUEST(&np->rx, np->rx.req_prod_pvt)->id = new;
912 RING_GET_REQUEST(&np->rx, np->rx.req_prod_pvt)->gref = ref;
913 np->rx.req_prod_pvt++;
914 }
916 int xennet_get_extras(struct netfront_info *np,
917 struct netif_extra_info *extras, RING_IDX rp)
919 {
920 struct netif_extra_info *extra;
921 RING_IDX cons = np->rx.rsp_cons;
922 int err = 0;
924 do {
925 struct sk_buff *skb;
926 grant_ref_t ref;
928 if (unlikely(cons + 1 == rp)) {
929 if (net_ratelimit())
930 WPRINTK("Missing extra info\n");
931 err = -EBADR;
932 break;
933 }
935 extra = (struct netif_extra_info *)
936 RING_GET_RESPONSE(&np->rx, ++cons);
938 if (unlikely(!extra->type ||
939 extra->type >= XEN_NETIF_EXTRA_TYPE_MAX)) {
940 if (net_ratelimit())
941 WPRINTK("Invalid extra type: %d\n",
942 extra->type);
943 err = -EINVAL;
944 } else
945 memcpy(&extras[extra->type - 1], extra, sizeof(*extra));
947 skb = xennet_get_rx_skb(np, cons);
948 ref = xennet_get_rx_ref(np, cons);
949 xennet_move_rx_slot(np, skb, ref);
950 } while (extra->flags & XEN_NETIF_EXTRA_FLAG_MORE);
952 np->rx.rsp_cons = cons;
953 return err;
954 }
956 static int xennet_get_responses(struct netfront_info *np,
957 struct netfront_rx_info *rinfo, RING_IDX rp,
958 struct sk_buff_head *list, int count)
959 {
960 struct mmu_update *mmu = np->rx_mmu + count;
961 struct multicall_entry *mcl = np->rx_mcl + count;
962 struct netif_rx_response *rx = &rinfo->rx;
963 struct netif_extra_info *extras = rinfo->extras;
964 RING_IDX cons = np->rx.rsp_cons;
965 struct sk_buff *skb = xennet_get_rx_skb(np, cons);
966 grant_ref_t ref = xennet_get_rx_ref(np, cons);
967 int max = MAX_SKB_FRAGS + (rx->status <= RX_COPY_THRESHOLD);
968 int frags = 1;
969 int err = 0;
971 if (rx->flags & NETRXF_extra_info) {
972 err = xennet_get_extras(np, extras, rp);
973 cons = np->rx.rsp_cons;
974 }
976 for (;;) {
977 unsigned long mfn;
979 if (unlikely(rx->status < 0 ||
980 rx->offset + rx->status > PAGE_SIZE)) {
981 if (net_ratelimit())
982 WPRINTK("rx->offset: %x, size: %u\n",
983 rx->offset, rx->status);
984 err = -EINVAL;
985 }
987 /*
988 * This definitely indicates a bug, either in this driver or in
989 * the backend driver. In future this should flag the bad
990 * situation to the system controller to reboot the backed.
991 */
992 if (ref == GRANT_INVALID_REF) {
993 WPRINTK("Bad rx response id %d.\n", rx->id);
994 err = -EINVAL;
995 goto next;
996 }
998 /* Memory pressure, insufficient buffer headroom, ... */
999 if ((mfn = gnttab_end_foreign_transfer_ref(ref)) == 0) {
1000 if (net_ratelimit())
1001 WPRINTK("Unfulfilled rx req (id=%d, st=%d).\n",
1002 rx->id, rx->status);
1003 xennet_move_rx_slot(np, skb, ref);
1004 err = -ENOMEM;
1005 goto next;
1008 gnttab_release_grant_reference(&np->gref_rx_head, ref);
1010 if (!xen_feature(XENFEAT_auto_translated_physmap)) {
1011 /* Remap the page. */
1012 struct page *page = skb_shinfo(skb)->frags[0].page;
1013 unsigned long pfn = page_to_pfn(page);
1014 void *vaddr = page_address(page);
1016 MULTI_update_va_mapping(mcl, (unsigned long)vaddr,
1017 pfn_pte_ma(mfn, PAGE_KERNEL),
1018 0);
1019 mcl++;
1020 mmu->ptr = ((maddr_t)mfn << PAGE_SHIFT)
1021 | MMU_MACHPHYS_UPDATE;
1022 mmu->val = pfn;
1023 mmu++;
1025 set_phys_to_machine(pfn, mfn);
1028 __skb_queue_tail(list, skb);
1030 next:
1031 if (!(rx->flags & NETRXF_more_data))
1032 break;
1034 if (cons + frags == rp) {
1035 if (net_ratelimit())
1036 WPRINTK("Need more frags\n");
1037 err = -ENOENT;
1038 break;
1041 rx = RING_GET_RESPONSE(&np->rx, cons + frags);
1042 skb = xennet_get_rx_skb(np, cons + frags);
1043 ref = xennet_get_rx_ref(np, cons + frags);
1044 frags++;
1047 if (unlikely(frags > max)) {
1048 if (net_ratelimit())
1049 WPRINTK("Too many frags\n");
1050 err = -E2BIG;
1053 return err;
1056 static RING_IDX xennet_fill_frags(struct netfront_info *np,
1057 struct sk_buff *skb,
1058 struct sk_buff_head *list)
1060 struct skb_shared_info *shinfo = skb_shinfo(skb);
1061 int nr_frags = shinfo->nr_frags;
1062 RING_IDX cons = np->rx.rsp_cons;
1063 skb_frag_t *frag = shinfo->frags + nr_frags;
1064 struct sk_buff *nskb;
1066 while ((nskb = __skb_dequeue(list))) {
1067 struct netif_rx_response *rx =
1068 RING_GET_RESPONSE(&np->rx, ++cons);
1070 frag->page = skb_shinfo(nskb)->frags[0].page;
1071 frag->page_offset = rx->offset;
1072 frag->size = rx->status;
1074 skb->data_len += rx->status;
1076 skb_shinfo(nskb)->nr_frags = 0;
1077 kfree_skb(nskb);
1079 frag++;
1080 nr_frags++;
1083 shinfo->nr_frags = nr_frags;
1084 return cons;
1087 static int xennet_set_skb_gso(struct sk_buff *skb, struct netif_extra_info *gso)
1089 if (!gso->u.gso.size) {
1090 if (net_ratelimit())
1091 WPRINTK("GSO size must not be zero.\n");
1092 return -EINVAL;
1095 /* Currently only TCPv4 S.O. is supported. */
1096 if (gso->u.gso.type != XEN_NETIF_GSO_TYPE_TCPV4) {
1097 if (net_ratelimit())
1098 WPRINTK("Bad GSO type %d.\n", gso->u.gso.type);
1099 return -EINVAL;
1102 skb_shinfo(skb)->gso_size = gso->u.gso.size;
1103 skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4;
1105 /* Header must be checked, and gso_segs computed. */
1106 skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY;
1107 skb_shinfo(skb)->gso_segs = 0;
1109 return 0;
1112 static int netif_poll(struct net_device *dev, int *pbudget)
1114 struct netfront_info *np = netdev_priv(dev);
1115 struct sk_buff *skb;
1116 struct netfront_rx_info rinfo;
1117 struct netif_rx_response *rx = &rinfo.rx;
1118 struct netif_extra_info *extras = rinfo.extras;
1119 RING_IDX i, rp;
1120 struct multicall_entry *mcl;
1121 int work_done, budget, more_to_do = 1;
1122 struct sk_buff_head rxq;
1123 struct sk_buff_head errq;
1124 struct sk_buff_head tmpq;
1125 unsigned long flags;
1126 unsigned int len;
1127 int pages_done;
1128 int err;
1130 spin_lock(&np->rx_lock);
1132 if (unlikely(!netif_carrier_ok(dev))) {
1133 spin_unlock(&np->rx_lock);
1134 return 0;
1137 skb_queue_head_init(&rxq);
1138 skb_queue_head_init(&errq);
1139 skb_queue_head_init(&tmpq);
1141 if ((budget = *pbudget) > dev->quota)
1142 budget = dev->quota;
1143 rp = np->rx.sring->rsp_prod;
1144 rmb(); /* Ensure we see queued responses up to 'rp'. */
1146 for (i = np->rx.rsp_cons, work_done = 0, pages_done = 0;
1147 (i != rp) && (work_done < budget);
1148 np->rx.rsp_cons = ++i, work_done++) {
1149 memcpy(rx, RING_GET_RESPONSE(&np->rx, i), sizeof(*rx));
1150 memset(extras, 0, sizeof(extras));
1152 err = xennet_get_responses(np, &rinfo, rp, &tmpq, pages_done);
1153 pages_done += skb_queue_len(&tmpq);
1155 if (unlikely(err)) {
1156 err:
1157 i = np->rx.rsp_cons + skb_queue_len(&tmpq) - 1;
1158 work_done--;
1159 while ((skb = __skb_dequeue(&tmpq)))
1160 __skb_queue_tail(&errq, skb);
1161 np->stats.rx_errors++;
1162 continue;
1165 skb = __skb_dequeue(&tmpq);
1167 if (extras[XEN_NETIF_EXTRA_TYPE_GSO - 1].type) {
1168 struct netif_extra_info *gso;
1169 gso = &extras[XEN_NETIF_EXTRA_TYPE_GSO - 1];
1171 if (unlikely(xennet_set_skb_gso(skb, gso))) {
1172 __skb_queue_head(&tmpq, skb);
1173 goto err;
1177 skb->nh.raw = (void *)skb_shinfo(skb)->frags[0].page;
1178 skb->h.raw = skb->nh.raw + rx->offset;
1180 len = rx->status;
1181 if (len > RX_COPY_THRESHOLD)
1182 len = RX_COPY_THRESHOLD;
1183 skb_put(skb, len);
1185 if (rx->status > len) {
1186 skb_shinfo(skb)->frags[0].page_offset =
1187 rx->offset + len;
1188 skb_shinfo(skb)->frags[0].size = rx->status - len;
1189 skb->data_len = rx->status - len;
1190 } else {
1191 skb_shinfo(skb)->frags[0].page = NULL;
1192 skb_shinfo(skb)->nr_frags = 0;
1195 i = xennet_fill_frags(np, skb, &tmpq);
1196 skb->truesize += skb->data_len;
1197 skb->len += skb->data_len;
1199 /*
1200 * Old backends do not assert data_validated but we
1201 * can infer it from csum_blank so test both flags.
1202 */
1203 if (rx->flags & (NETRXF_data_validated|NETRXF_csum_blank)) {
1204 skb->ip_summed = CHECKSUM_UNNECESSARY;
1205 skb->proto_data_valid = 1;
1206 } else {
1207 skb->ip_summed = CHECKSUM_NONE;
1208 skb->proto_data_valid = 0;
1210 skb->proto_csum_blank = !!(rx->flags & NETRXF_csum_blank);
1212 np->stats.rx_packets++;
1213 np->stats.rx_bytes += skb->len;
1215 __skb_queue_tail(&rxq, skb);
1218 /* Some pages are no longer absent... */
1219 balloon_update_driver_allowance(-pages_done);
1221 /* Do all the remapping work, and M2P updates, in one big hypercall. */
1222 if (likely(pages_done)) {
1223 mcl = np->rx_mcl + pages_done;
1224 mcl->op = __HYPERVISOR_mmu_update;
1225 mcl->args[0] = (unsigned long)np->rx_mmu;
1226 mcl->args[1] = pages_done;
1227 mcl->args[2] = 0;
1228 mcl->args[3] = DOMID_SELF;
1229 (void)HYPERVISOR_multicall(np->rx_mcl, pages_done + 1);
1232 while ((skb = __skb_dequeue(&errq)))
1233 kfree_skb(skb);
1235 while ((skb = __skb_dequeue(&rxq)) != NULL) {
1236 struct page *page = (struct page *)skb->nh.raw;
1237 void *vaddr = page_address(page);
1239 memcpy(skb->data, vaddr + (skb->h.raw - skb->nh.raw),
1240 skb_headlen(skb));
1242 if (page != skb_shinfo(skb)->frags[0].page)
1243 __free_page(page);
1245 /* Ethernet work: Delayed to here as it peeks the header. */
1246 skb->protocol = eth_type_trans(skb, dev);
1248 /* Pass it up. */
1249 netif_receive_skb(skb);
1250 dev->last_rx = jiffies;
1253 /* If we get a callback with very few responses, reduce fill target. */
1254 /* NB. Note exponential increase, linear decrease. */
1255 if (((np->rx.req_prod_pvt - np->rx.sring->rsp_prod) >
1256 ((3*np->rx_target) / 4)) &&
1257 (--np->rx_target < np->rx_min_target))
1258 np->rx_target = np->rx_min_target;
1260 network_alloc_rx_buffers(dev);
1262 *pbudget -= work_done;
1263 dev->quota -= work_done;
1265 if (work_done < budget) {
1266 local_irq_save(flags);
1268 RING_FINAL_CHECK_FOR_RESPONSES(&np->rx, more_to_do);
1269 if (!more_to_do)
1270 __netif_rx_complete(dev);
1272 local_irq_restore(flags);
1275 spin_unlock(&np->rx_lock);
1277 return more_to_do;
1281 static int network_close(struct net_device *dev)
1283 struct netfront_info *np = netdev_priv(dev);
1284 netif_stop_queue(np->netdev);
1285 return 0;
1289 static struct net_device_stats *network_get_stats(struct net_device *dev)
1291 struct netfront_info *np = netdev_priv(dev);
1292 return &np->stats;
1295 static int xennet_change_mtu(struct net_device *dev, int mtu)
1297 int max = xennet_can_sg(dev) ? 65535 - ETH_HLEN : ETH_DATA_LEN;
1299 if (mtu > max)
1300 return -EINVAL;
1301 dev->mtu = mtu;
1302 return 0;
1305 static int xennet_set_sg(struct net_device *dev, u32 data)
1307 if (data) {
1308 struct netfront_info *np = netdev_priv(dev);
1309 int val;
1311 if (xenbus_scanf(XBT_NIL, np->xbdev->otherend, "feature-sg",
1312 "%d", &val) < 0)
1313 val = 0;
1314 if (!val)
1315 return -ENOSYS;
1316 } else if (dev->mtu > ETH_DATA_LEN)
1317 dev->mtu = ETH_DATA_LEN;
1319 return ethtool_op_set_sg(dev, data);
1322 static int xennet_set_tso(struct net_device *dev, u32 data)
1324 if (data) {
1325 struct netfront_info *np = netdev_priv(dev);
1326 int val;
1328 if (xenbus_scanf(XBT_NIL, np->xbdev->otherend,
1329 "feature-gso-tcpv4", "%d", &val) < 0)
1330 val = 0;
1331 if (!val)
1332 return -ENOSYS;
1335 return ethtool_op_set_tso(dev, data);
1338 static void xennet_set_features(struct net_device *dev)
1340 /* Turn off all GSO bits except ROBUST. */
1341 dev->features &= (1 << NETIF_F_GSO_SHIFT) - 1;
1342 dev->features |= NETIF_F_GSO_ROBUST;
1343 xennet_set_sg(dev, 0);
1345 if (!xennet_set_sg(dev, 1))
1346 xennet_set_tso(dev, 1);
1349 static void network_connect(struct net_device *dev)
1351 struct netfront_info *np = netdev_priv(dev);
1352 int i, requeue_idx;
1353 struct sk_buff *skb;
1354 grant_ref_t ref;
1356 xennet_set_features(dev);
1358 spin_lock_irq(&np->tx_lock);
1359 spin_lock(&np->rx_lock);
1361 /*
1362 * Recovery procedure:
1363 * NB. Freelist index entries are always going to be less than
1364 * PAGE_OFFSET, whereas pointers to skbs will always be equal or
1365 * greater than PAGE_OFFSET: we use this property to distinguish
1366 * them.
1367 */
1369 /* Step 1: Discard all pending TX packet fragments. */
1370 for (requeue_idx = 0, i = 1; i <= NET_TX_RING_SIZE; i++) {
1371 if ((unsigned long)np->tx_skbs[i] < PAGE_OFFSET)
1372 continue;
1374 skb = np->tx_skbs[i];
1375 gnttab_end_foreign_access_ref(
1376 np->grant_tx_ref[i], GNTMAP_readonly);
1377 gnttab_release_grant_reference(
1378 &np->gref_tx_head, np->grant_tx_ref[i]);
1379 np->grant_tx_ref[i] = GRANT_INVALID_REF;
1380 add_id_to_freelist(np->tx_skbs, i);
1381 dev_kfree_skb_irq(skb);
1384 /* Step 2: Rebuild the RX buffer freelist and the RX ring itself. */
1385 for (requeue_idx = 0, i = 0; i < NET_RX_RING_SIZE; i++) {
1386 if (!np->rx_skbs[i])
1387 continue;
1389 skb = np->rx_skbs[requeue_idx] = xennet_get_rx_skb(np, i);
1390 ref = np->grant_rx_ref[requeue_idx] = xennet_get_rx_ref(np, i);
1392 gnttab_grant_foreign_transfer_ref(
1393 ref, np->xbdev->otherend_id,
1394 page_to_pfn(skb_shinfo(skb)->frags->page));
1396 RING_GET_REQUEST(&np->rx, requeue_idx)->gref = ref;
1397 RING_GET_REQUEST(&np->rx, requeue_idx)->id = requeue_idx;
1399 requeue_idx++;
1402 np->rx.req_prod_pvt = requeue_idx;
1404 /*
1405 * Step 3: All public and private state should now be sane. Get
1406 * ready to start sending and receiving packets and give the driver
1407 * domain a kick because we've probably just requeued some
1408 * packets.
1409 */
1410 netif_carrier_on(dev);
1411 notify_remote_via_irq(np->irq);
1412 network_tx_buf_gc(dev);
1413 network_alloc_rx_buffers(dev);
1415 spin_unlock(&np->rx_lock);
1416 spin_unlock_irq(&np->tx_lock);
1419 static void netif_uninit(struct net_device *dev)
1421 struct netfront_info *np = netdev_priv(dev);
1422 gnttab_free_grant_references(np->gref_tx_head);
1423 gnttab_free_grant_references(np->gref_rx_head);
1426 static struct ethtool_ops network_ethtool_ops =
1428 .get_tx_csum = ethtool_op_get_tx_csum,
1429 .set_tx_csum = ethtool_op_set_tx_csum,
1430 .get_sg = ethtool_op_get_sg,
1431 .set_sg = xennet_set_sg,
1432 .get_tso = ethtool_op_get_tso,
1433 .set_tso = xennet_set_tso,
1434 .get_link = ethtool_op_get_link,
1435 };
1437 #ifdef CONFIG_SYSFS
1438 static ssize_t show_rxbuf_min(struct class_device *cd, char *buf)
1440 struct net_device *netdev = container_of(cd, struct net_device,
1441 class_dev);
1442 struct netfront_info *info = netdev_priv(netdev);
1444 return sprintf(buf, "%u\n", info->rx_min_target);
1447 static ssize_t store_rxbuf_min(struct class_device *cd,
1448 const char *buf, size_t len)
1450 struct net_device *netdev = container_of(cd, struct net_device,
1451 class_dev);
1452 struct netfront_info *np = netdev_priv(netdev);
1453 char *endp;
1454 unsigned long target;
1456 if (!capable(CAP_NET_ADMIN))
1457 return -EPERM;
1459 target = simple_strtoul(buf, &endp, 0);
1460 if (endp == buf)
1461 return -EBADMSG;
1463 if (target < RX_MIN_TARGET)
1464 target = RX_MIN_TARGET;
1465 if (target > RX_MAX_TARGET)
1466 target = RX_MAX_TARGET;
1468 spin_lock(&np->rx_lock);
1469 if (target > np->rx_max_target)
1470 np->rx_max_target = target;
1471 np->rx_min_target = target;
1472 if (target > np->rx_target)
1473 np->rx_target = target;
1475 network_alloc_rx_buffers(netdev);
1477 spin_unlock(&np->rx_lock);
1478 return len;
1481 static ssize_t show_rxbuf_max(struct class_device *cd, char *buf)
1483 struct net_device *netdev = container_of(cd, struct net_device,
1484 class_dev);
1485 struct netfront_info *info = netdev_priv(netdev);
1487 return sprintf(buf, "%u\n", info->rx_max_target);
1490 static ssize_t store_rxbuf_max(struct class_device *cd,
1491 const char *buf, size_t len)
1493 struct net_device *netdev = container_of(cd, struct net_device,
1494 class_dev);
1495 struct netfront_info *np = netdev_priv(netdev);
1496 char *endp;
1497 unsigned long target;
1499 if (!capable(CAP_NET_ADMIN))
1500 return -EPERM;
1502 target = simple_strtoul(buf, &endp, 0);
1503 if (endp == buf)
1504 return -EBADMSG;
1506 if (target < RX_MIN_TARGET)
1507 target = RX_MIN_TARGET;
1508 if (target > RX_MAX_TARGET)
1509 target = RX_MAX_TARGET;
1511 spin_lock(&np->rx_lock);
1512 if (target < np->rx_min_target)
1513 np->rx_min_target = target;
1514 np->rx_max_target = target;
1515 if (target < np->rx_target)
1516 np->rx_target = target;
1518 network_alloc_rx_buffers(netdev);
1520 spin_unlock(&np->rx_lock);
1521 return len;
1524 static ssize_t show_rxbuf_cur(struct class_device *cd, char *buf)
1526 struct net_device *netdev = container_of(cd, struct net_device,
1527 class_dev);
1528 struct netfront_info *info = netdev_priv(netdev);
1530 return sprintf(buf, "%u\n", info->rx_target);
1533 static const struct class_device_attribute xennet_attrs[] = {
1534 __ATTR(rxbuf_min, S_IRUGO|S_IWUSR, show_rxbuf_min, store_rxbuf_min),
1535 __ATTR(rxbuf_max, S_IRUGO|S_IWUSR, show_rxbuf_max, store_rxbuf_max),
1536 __ATTR(rxbuf_cur, S_IRUGO, show_rxbuf_cur, NULL),
1537 };
1539 static int xennet_sysfs_addif(struct net_device *netdev)
1541 int i;
1542 int error = 0;
1544 for (i = 0; i < ARRAY_SIZE(xennet_attrs); i++) {
1545 error = class_device_create_file(&netdev->class_dev,
1546 &xennet_attrs[i]);
1547 if (error)
1548 goto fail;
1550 return 0;
1552 fail:
1553 while (--i >= 0)
1554 class_device_remove_file(&netdev->class_dev,
1555 &xennet_attrs[i]);
1556 return error;
1559 static void xennet_sysfs_delif(struct net_device *netdev)
1561 int i;
1563 for (i = 0; i < ARRAY_SIZE(xennet_attrs); i++) {
1564 class_device_remove_file(&netdev->class_dev,
1565 &xennet_attrs[i]);
1569 #endif /* CONFIG_SYSFS */
1572 /*
1573 * Nothing to do here. Virtual interface is point-to-point and the
1574 * physical interface is probably promiscuous anyway.
1575 */
1576 static void network_set_multicast_list(struct net_device *dev)
1580 /** Create a network device.
1581 * @param handle device handle
1582 * @param val return parameter for created device
1583 * @return 0 on success, error code otherwise
1584 */
1585 static struct net_device * __devinit create_netdev(int handle,
1586 struct xenbus_device *dev)
1588 int i, err = 0;
1589 struct net_device *netdev = NULL;
1590 struct netfront_info *np = NULL;
1592 netdev = alloc_etherdev(sizeof(struct netfront_info));
1593 if (!netdev) {
1594 printk(KERN_WARNING "%s> alloc_etherdev failed.\n",
1595 __FUNCTION__);
1596 return ERR_PTR(-ENOMEM);
1599 np = netdev_priv(netdev);
1600 np->handle = handle;
1601 np->xbdev = dev;
1603 netif_carrier_off(netdev);
1605 spin_lock_init(&np->tx_lock);
1606 spin_lock_init(&np->rx_lock);
1608 skb_queue_head_init(&np->rx_batch);
1609 np->rx_target = RX_DFL_MIN_TARGET;
1610 np->rx_min_target = RX_DFL_MIN_TARGET;
1611 np->rx_max_target = RX_MAX_TARGET;
1613 init_timer(&np->rx_refill_timer);
1614 np->rx_refill_timer.data = (unsigned long)netdev;
1615 np->rx_refill_timer.function = rx_refill_timeout;
1617 /* Initialise {tx,rx}_skbs as a free chain containing every entry. */
1618 for (i = 0; i <= NET_TX_RING_SIZE; i++) {
1619 np->tx_skbs[i] = (void *)((unsigned long) i+1);
1620 np->grant_tx_ref[i] = GRANT_INVALID_REF;
1623 for (i = 0; i < NET_RX_RING_SIZE; i++) {
1624 np->rx_skbs[i] = NULL;
1625 np->grant_rx_ref[i] = GRANT_INVALID_REF;
1628 /* A grant for every tx ring slot */
1629 if (gnttab_alloc_grant_references(TX_MAX_TARGET,
1630 &np->gref_tx_head) < 0) {
1631 printk(KERN_ALERT "#### netfront can't alloc tx grant refs\n");
1632 err = -ENOMEM;
1633 goto exit;
1635 /* A grant for every rx ring slot */
1636 if (gnttab_alloc_grant_references(RX_MAX_TARGET,
1637 &np->gref_rx_head) < 0) {
1638 printk(KERN_ALERT "#### netfront can't alloc rx grant refs\n");
1639 err = -ENOMEM;
1640 goto exit_free_tx;
1643 netdev->open = network_open;
1644 netdev->hard_start_xmit = network_start_xmit;
1645 netdev->stop = network_close;
1646 netdev->get_stats = network_get_stats;
1647 netdev->poll = netif_poll;
1648 netdev->set_multicast_list = network_set_multicast_list;
1649 netdev->uninit = netif_uninit;
1650 netdev->change_mtu = xennet_change_mtu;
1651 netdev->weight = 64;
1652 netdev->features = NETIF_F_IP_CSUM;
1654 SET_ETHTOOL_OPS(netdev, &network_ethtool_ops);
1655 SET_MODULE_OWNER(netdev);
1656 SET_NETDEV_DEV(netdev, &dev->dev);
1658 err = register_netdev(netdev);
1659 if (err) {
1660 printk(KERN_WARNING "%s> register_netdev err=%d\n",
1661 __FUNCTION__, err);
1662 goto exit_free_rx;
1665 err = xennet_sysfs_addif(netdev);
1666 if (err) {
1667 /* This can be non-fatal: it only means no tuning parameters */
1668 printk(KERN_WARNING "%s> add sysfs failed err=%d\n",
1669 __FUNCTION__, err);
1672 np->netdev = netdev;
1674 return netdev;
1677 exit_free_rx:
1678 gnttab_free_grant_references(np->gref_rx_head);
1679 exit_free_tx:
1680 gnttab_free_grant_references(np->gref_tx_head);
1681 exit:
1682 free_netdev(netdev);
1683 return ERR_PTR(err);
1686 /*
1687 * We use this notifier to send out a fake ARP reply to reset switches and
1688 * router ARP caches when an IP interface is brought up on a VIF.
1689 */
1690 static int
1691 inetdev_notify(struct notifier_block *this, unsigned long event, void *ptr)
1693 struct in_ifaddr *ifa = (struct in_ifaddr *)ptr;
1694 struct net_device *dev = ifa->ifa_dev->dev;
1696 /* UP event and is it one of our devices? */
1697 if (event == NETDEV_UP && dev->open == network_open)
1698 (void)send_fake_arp(dev);
1700 return NOTIFY_DONE;
1704 /* ** Close down ** */
1707 /**
1708 * Handle the change of state of the backend to Closing. We must delete our
1709 * device-layer structures now, to ensure that writes are flushed through to
1710 * the backend. Once is this done, we can switch to Closed in
1711 * acknowledgement.
1712 */
1713 static void netfront_closing(struct xenbus_device *dev)
1715 struct netfront_info *info = dev->dev.driver_data;
1717 DPRINTK("netfront_closing: %s removed\n", dev->nodename);
1719 close_netdev(info);
1721 xenbus_switch_state(dev, XenbusStateClosed);
1725 static int __devexit netfront_remove(struct xenbus_device *dev)
1727 struct netfront_info *info = dev->dev.driver_data;
1729 DPRINTK("%s\n", dev->nodename);
1731 netif_disconnect_backend(info);
1732 free_netdev(info->netdev);
1734 return 0;
1738 static void close_netdev(struct netfront_info *info)
1740 del_timer_sync(&info->rx_refill_timer);
1742 xennet_sysfs_delif(info->netdev);
1743 unregister_netdev(info->netdev);
1747 static void netif_disconnect_backend(struct netfront_info *info)
1749 /* Stop old i/f to prevent errors whilst we rebuild the state. */
1750 spin_lock_irq(&info->tx_lock);
1751 spin_lock(&info->rx_lock);
1752 netif_carrier_off(info->netdev);
1753 spin_unlock(&info->rx_lock);
1754 spin_unlock_irq(&info->tx_lock);
1756 if (info->irq)
1757 unbind_from_irqhandler(info->irq, info->netdev);
1758 info->evtchn = info->irq = 0;
1760 end_access(info->tx_ring_ref, info->tx.sring);
1761 end_access(info->rx_ring_ref, info->rx.sring);
1762 info->tx_ring_ref = GRANT_INVALID_REF;
1763 info->rx_ring_ref = GRANT_INVALID_REF;
1764 info->tx.sring = NULL;
1765 info->rx.sring = NULL;
1769 static void netif_free(struct netfront_info *info)
1771 close_netdev(info);
1772 netif_disconnect_backend(info);
1773 free_netdev(info->netdev);
1777 static void end_access(int ref, void *page)
1779 if (ref != GRANT_INVALID_REF)
1780 gnttab_end_foreign_access(ref, 0, (unsigned long)page);
1784 /* ** Driver registration ** */
1787 static struct xenbus_device_id netfront_ids[] = {
1788 { "vif" },
1789 { "" }
1790 };
1793 static struct xenbus_driver netfront = {
1794 .name = "vif",
1795 .owner = THIS_MODULE,
1796 .ids = netfront_ids,
1797 .probe = netfront_probe,
1798 .remove = __devexit_p(netfront_remove),
1799 .resume = netfront_resume,
1800 .otherend_changed = backend_changed,
1801 };
1804 static struct notifier_block notifier_inetdev = {
1805 .notifier_call = inetdev_notify,
1806 .next = NULL,
1807 .priority = 0
1808 };
1810 static int __init netif_init(void)
1812 if (!is_running_on_xen())
1813 return -ENODEV;
1815 if (xen_start_info->flags & SIF_INITDOMAIN)
1816 return 0;
1818 IPRINTK("Initialising virtual ethernet driver.\n");
1820 (void)register_inetaddr_notifier(&notifier_inetdev);
1822 return xenbus_register_frontend(&netfront);
1824 module_init(netif_init);
1827 static void __exit netif_exit(void)
1829 unregister_inetaddr_notifier(&notifier_inetdev);
1831 return xenbus_unregister_driver(&netfront);
1833 module_exit(netif_exit);
1835 MODULE_LICENSE("Dual BSD/GPL");