ia64/xen-unstable

view linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c @ 6427:3428d58a85e1

merge?
author cl349@firebug.cl.cam.ac.uk
date Thu Aug 25 14:41:52 2005 +0000 (2005-08-25)
parents 4abd299ef2f6 56a2232377f1
children 4f73a7bde7b1
line source
1 /******************************************************************************
2 * Virtual network driver for conversing with remote driver backends.
3 *
4 * Copyright (c) 2002-2004, K A Fraser
5 *
6 * This file may be distributed separately from the Linux kernel, or
7 * incorporated into other software packages, subject to the following license:
8 *
9 * Permission is hereby granted, free of charge, to any person obtaining a copy
10 * of this source file (the "Software"), to deal in the Software without
11 * restriction, including without limitation the rights to use, copy, modify,
12 * merge, publish, distribute, sublicense, and/or sell copies of the Software,
13 * and to permit persons to whom the Software is furnished to do so, subject to
14 * the following conditions:
15 *
16 * The above copyright notice and this permission notice shall be included in
17 * all copies or substantial portions of the Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
22 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
24 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
25 * IN THE SOFTWARE.
26 */
28 #include <linux/config.h>
29 #include <linux/module.h>
30 #include <linux/version.h>
31 #include <linux/kernel.h>
32 #include <linux/sched.h>
33 #include <linux/slab.h>
34 #include <linux/string.h>
35 #include <linux/errno.h>
36 #include <linux/netdevice.h>
37 #include <linux/inetdevice.h>
38 #include <linux/etherdevice.h>
39 #include <linux/skbuff.h>
40 #include <linux/init.h>
41 #include <linux/bitops.h>
42 #include <linux/proc_fs.h>
43 #include <linux/ethtool.h>
44 #include <net/sock.h>
45 #include <net/pkt_sched.h>
46 #include <net/arp.h>
47 #include <net/route.h>
48 #include <asm/io.h>
49 #include <asm/uaccess.h>
50 #include <asm-xen/evtchn.h>
51 #include <asm-xen/xenbus.h>
52 #include <asm-xen/xen-public/io/netif.h>
53 #include <asm-xen/balloon.h>
54 #include <asm/page.h>
55 #include <asm/uaccess.h>
57 #if defined(CONFIG_XEN_NETDEV_GRANT_TX) || defined(CONFIG_XEN_NETDEV_GRANT_RX)
58 #include <asm-xen/xen-public/grant_table.h>
59 #include <asm-xen/gnttab.h>
60 #ifdef GRANT_DEBUG
61 static void
62 dump_packet(int tag, void *addr, u32 ap)
63 {
64 unsigned char *p = (unsigned char *)ap;
65 int i;
67 printk(KERN_ALERT "#### rx_poll %c %08x ", tag & 0xff, addr);
68 for (i = 0; i < 20; i++) {
69 printk("%02x", p[i]);
70 }
71 printk("\n");
72 }
73 #endif
74 #endif
76 #ifndef __GFP_NOWARN
77 #define __GFP_NOWARN 0
78 #endif
79 #define alloc_xen_skb(_l) __dev_alloc_skb((_l), GFP_ATOMIC|__GFP_NOWARN)
81 #define init_skb_shinfo(_skb) \
82 do { \
83 atomic_set(&(skb_shinfo(_skb)->dataref), 1); \
84 skb_shinfo(_skb)->nr_frags = 0; \
85 skb_shinfo(_skb)->frag_list = NULL; \
86 } while (0)
88 /* Allow headroom on each rx pkt for Ethernet header, alignment padding, ... */
89 #define RX_HEADROOM 200
91 /*
92 * If the backend driver is pipelining transmit requests then we can be very
93 * aggressive in avoiding new-packet notifications -- only need to send a
94 * notification if there are no outstanding unreceived responses.
95 * If the backend may be buffering our transmit buffers for any reason then we
96 * are rather more conservative.
97 */
98 #ifdef CONFIG_XEN_NETDEV_FRONTEND_PIPELINED_TRANSMITTER
99 #define TX_TEST_IDX resp_prod /* aggressive: any outstanding responses? */
100 #else
101 #define TX_TEST_IDX req_cons /* conservative: not seen all our requests? */
102 #endif
104 #ifdef CONFIG_XEN_NETDEV_GRANT_TX
105 static grant_ref_t gref_tx_head;
106 static grant_ref_t grant_tx_ref[NETIF_TX_RING_SIZE + 1];
107 #endif
109 #ifdef CONFIG_XEN_NETDEV_GRANT_RX
110 static grant_ref_t gref_rx_head;
111 static grant_ref_t grant_rx_ref[NETIF_RX_RING_SIZE + 1];
112 #endif
114 #if defined(CONFIG_XEN_NETDEV_GRANT_TX) || defined(CONFIG_XEN_NETDEV_GRANT_RX)
115 #define GRANT_INVALID_REF (0xFFFF)
116 #endif
118 static void network_tx_buf_gc(struct net_device *dev);
119 static void network_alloc_rx_buffers(struct net_device *dev);
121 static unsigned long rx_pfn_array[NETIF_RX_RING_SIZE];
122 static multicall_entry_t rx_mcl[NETIF_RX_RING_SIZE+1];
123 static mmu_update_t rx_mmu[NETIF_RX_RING_SIZE];
125 #ifdef CONFIG_PROC_FS
126 static int xennet_proc_init(void);
127 static int xennet_proc_addif(struct net_device *dev);
128 static void xennet_proc_delif(struct net_device *dev);
129 #else
130 #define xennet_proc_init() (0)
131 #define xennet_proc_addif(d) (0)
132 #define xennet_proc_delif(d) ((void)0)
133 #endif
135 static struct list_head dev_list;
137 #define netfront_info net_private
138 struct net_private
139 {
140 struct list_head list;
141 struct net_device *netdev;
143 struct net_device_stats stats;
144 NETIF_RING_IDX rx_resp_cons, tx_resp_cons;
145 unsigned int tx_full;
147 netif_tx_interface_t *tx;
148 netif_rx_interface_t *rx;
150 spinlock_t tx_lock;
151 spinlock_t rx_lock;
153 unsigned int handle;
154 unsigned int evtchn;
156 /* What is the status of our connection to the remote backend? */
157 #define BEST_CLOSED 0
158 #define BEST_DISCONNECTED 1
159 #define BEST_CONNECTED 2
160 unsigned int backend_state;
162 /* Is this interface open or closed (down or up)? */
163 #define UST_CLOSED 0
164 #define UST_OPEN 1
165 unsigned int user_state;
167 /* Receive-ring batched refills. */
168 #define RX_MIN_TARGET 8
169 #define RX_MAX_TARGET NETIF_RX_RING_SIZE
170 int rx_min_target, rx_max_target, rx_target;
171 struct sk_buff_head rx_batch;
173 /*
174 * {tx,rx}_skbs store outstanding skbuffs. The first entry in each
175 * array is an index into a chain of free entries.
176 */
177 struct sk_buff *tx_skbs[NETIF_TX_RING_SIZE+1];
178 struct sk_buff *rx_skbs[NETIF_RX_RING_SIZE+1];
180 struct xenbus_device *xbdev;
181 char *backend;
182 int backend_id;
183 struct xenbus_watch watch;
184 int tx_ring_ref;
185 int rx_ring_ref;
186 u8 mac[ETH_ALEN];
187 };
189 /* Access macros for acquiring freeing slots in {tx,rx}_skbs[]. */
190 #define ADD_ID_TO_FREELIST(_list, _id) \
191 (_list)[(_id)] = (_list)[0]; \
192 (_list)[0] = (void *)(unsigned long)(_id);
193 #define GET_ID_FROM_FREELIST(_list) \
194 ({ unsigned long _id = (unsigned long)(_list)[0]; \
195 (_list)[0] = (_list)[_id]; \
196 (unsigned short)_id; })
198 static char *be_state_name[] = {
199 [BEST_CLOSED] = "closed",
200 [BEST_DISCONNECTED] = "disconnected",
201 [BEST_CONNECTED] = "connected",
202 };
204 #ifdef DEBUG
205 #define DPRINTK(fmt, args...) \
206 printk(KERN_ALERT "xen_net (%s:%d) " fmt, __FUNCTION__, __LINE__, ##args)
207 #else
208 #define DPRINTK(fmt, args...) ((void)0)
209 #endif
210 #define IPRINTK(fmt, args...) \
211 printk(KERN_INFO "xen_net: " fmt, ##args)
212 #define WPRINTK(fmt, args...) \
213 printk(KERN_WARNING "xen_net: " fmt, ##args)
215 static struct net_device *find_dev_by_handle(unsigned int handle)
216 {
217 struct list_head *ent;
218 struct net_private *np;
219 list_for_each (ent, &dev_list) {
220 np = list_entry(ent, struct net_private, list);
221 if (np->handle == handle)
222 return np->netdev;
223 }
224 return NULL;
225 }
227 /** Network interface info. */
228 struct netif_ctrl {
229 /** Number of interfaces. */
230 int interface_n;
231 /** Number of connected interfaces. */
232 int connected_n;
233 /** Error code. */
234 int err;
235 int up;
236 };
238 static struct netif_ctrl netctrl;
240 static void netctrl_init(void)
241 {
242 memset(&netctrl, 0, sizeof(netctrl));
243 netctrl.up = NETIF_DRIVER_STATUS_DOWN;
244 }
246 /** Get or set a network interface error.
247 */
248 static int netctrl_err(int err)
249 {
250 if ((err < 0) && !netctrl.err)
251 netctrl.err = err;
252 return netctrl.err;
253 }
255 /** Test if all network interfaces are connected.
256 *
257 * @return 1 if all connected, 0 if not, negative error code otherwise
258 */
259 static int netctrl_connected(void)
260 {
261 int ok;
263 if (netctrl.err)
264 ok = netctrl.err;
265 else if (netctrl.up == NETIF_DRIVER_STATUS_UP)
266 ok = (netctrl.connected_n == netctrl.interface_n);
267 else
268 ok = 0;
270 return ok;
271 }
273 /** Count the connected network interfaces.
274 *
275 * @return connected count
276 */
277 static int netctrl_connected_count(void)
278 {
280 struct list_head *ent;
281 struct net_private *np;
282 unsigned int connected;
284 connected = 0;
286 list_for_each(ent, &dev_list) {
287 np = list_entry(ent, struct net_private, list);
288 if (np->backend_state == BEST_CONNECTED)
289 connected++;
290 }
292 netctrl.connected_n = connected;
293 DPRINTK("> connected_n=%d interface_n=%d\n",
294 netctrl.connected_n, netctrl.interface_n);
295 return connected;
296 }
298 /** Send a packet on a net device to encourage switches to learn the
299 * MAC. We send a fake ARP request.
300 *
301 * @param dev device
302 * @return 0 on success, error code otherwise
303 */
304 static int send_fake_arp(struct net_device *dev)
305 {
306 struct sk_buff *skb;
307 u32 src_ip, dst_ip;
309 dst_ip = INADDR_BROADCAST;
310 src_ip = inet_select_addr(dev, dst_ip, RT_SCOPE_LINK);
312 /* No IP? Then nothing to do. */
313 if (src_ip == 0)
314 return 0;
316 skb = arp_create(ARPOP_REPLY, ETH_P_ARP,
317 dst_ip, dev, src_ip,
318 /*dst_hw*/ NULL, /*src_hw*/ NULL,
319 /*target_hw*/ dev->dev_addr);
320 if (skb == NULL)
321 return -ENOMEM;
323 return dev_queue_xmit(skb);
324 }
326 static int network_open(struct net_device *dev)
327 {
328 struct net_private *np = netdev_priv(dev);
330 memset(&np->stats, 0, sizeof(np->stats));
332 np->user_state = UST_OPEN;
334 network_alloc_rx_buffers(dev);
335 np->rx->event = np->rx_resp_cons + 1;
337 netif_start_queue(dev);
339 return 0;
340 }
342 static void network_tx_buf_gc(struct net_device *dev)
343 {
344 NETIF_RING_IDX i, prod;
345 unsigned short id;
346 struct net_private *np = netdev_priv(dev);
347 struct sk_buff *skb;
349 if (np->backend_state != BEST_CONNECTED)
350 return;
352 do {
353 prod = np->tx->resp_prod;
354 rmb(); /* Ensure we see responses up to 'rp'. */
356 for (i = np->tx_resp_cons; i != prod; i++) {
357 id = np->tx->ring[MASK_NETIF_TX_IDX(i)].resp.id;
358 skb = np->tx_skbs[id];
359 #ifdef CONFIG_XEN_NETDEV_GRANT_TX
360 if (unlikely(gnttab_query_foreign_access(grant_tx_ref[id]) != 0)) {
361 /* other domain is still using this grant - shouldn't happen
362 but if it does, we'll try to reclaim the grant later */
363 printk(KERN_ALERT "network_tx_buf_gc: warning -- grant "
364 "still in use by backend domain.\n");
365 goto out;
366 }
367 gnttab_end_foreign_access_ref(grant_tx_ref[id], GNTMAP_readonly);
368 gnttab_release_grant_reference(&gref_tx_head, grant_tx_ref[id]);
369 grant_tx_ref[id] = GRANT_INVALID_REF;
370 #endif
371 ADD_ID_TO_FREELIST(np->tx_skbs, id);
372 dev_kfree_skb_irq(skb);
373 }
375 np->tx_resp_cons = prod;
377 /*
378 * Set a new event, then check for race with update of tx_cons. Note
379 * that it is essential to schedule a callback, no matter how few
380 * buffers are pending. Even if there is space in the transmit ring,
381 * higher layers may be blocked because too much data is outstanding:
382 * in such cases notification from Xen is likely to be the only kick
383 * that we'll get.
384 */
385 np->tx->event =
386 prod + ((np->tx->req_prod - prod) >> 1) + 1;
387 mb();
388 } while (prod != np->tx->resp_prod);
390 #ifdef CONFIG_XEN_NETDEV_GRANT_TX
391 out:
392 #endif
394 if (np->tx_full && ((np->tx->req_prod - prod) < NETIF_TX_RING_SIZE)) {
395 np->tx_full = 0;
396 if (np->user_state == UST_OPEN)
397 netif_wake_queue(dev);
398 }
399 }
402 static void network_alloc_rx_buffers(struct net_device *dev)
403 {
404 unsigned short id;
405 struct net_private *np = netdev_priv(dev);
406 struct sk_buff *skb;
407 int i, batch_target;
408 NETIF_RING_IDX req_prod = np->rx->req_prod;
409 #ifdef CONFIG_XEN_NETDEV_GRANT_RX
410 int ref;
411 #endif
413 if (unlikely(np->backend_state != BEST_CONNECTED))
414 return;
416 /*
417 * Allocate skbuffs greedily, even though we batch updates to the
418 * receive ring. This creates a less bursty demand on the memory allocator,
419 * so should reduce the chance of failed allocation requests both for
420 * ourself and for other kernel subsystems.
421 */
422 batch_target = np->rx_target - (req_prod - np->rx_resp_cons);
423 for (i = skb_queue_len(&np->rx_batch); i < batch_target; i++) {
424 if (unlikely((skb = alloc_xen_skb(dev->mtu + RX_HEADROOM)) == NULL))
425 break;
426 __skb_queue_tail(&np->rx_batch, skb);
427 }
429 /* Is the batch large enough to be worthwhile? */
430 if (i < (np->rx_target/2))
431 return;
433 for (i = 0; ; i++) {
434 if ((skb = __skb_dequeue(&np->rx_batch)) == NULL)
435 break;
437 skb->dev = dev;
439 id = GET_ID_FROM_FREELIST(np->rx_skbs);
441 np->rx_skbs[id] = skb;
443 np->rx->ring[MASK_NETIF_RX_IDX(req_prod + i)].req.id = id;
444 #ifdef CONFIG_XEN_NETDEV_GRANT_RX
445 ref = gnttab_claim_grant_reference(&gref_rx_head);
446 if (unlikely(ref < 0)) {
447 printk(KERN_ALERT "#### netfront can't claim rx reference\n");
448 BUG();
449 }
450 grant_rx_ref[id] = ref;
451 gnttab_grant_foreign_transfer_ref(ref, np->backend_id,
452 virt_to_mfn(skb->head));
453 np->rx->ring[MASK_NETIF_RX_IDX(req_prod + i)].req.gref = ref;
454 #endif
455 rx_pfn_array[i] = virt_to_mfn(skb->head);
457 /* Remove this page from pseudo phys map before passing back to Xen. */
458 phys_to_machine_mapping[__pa(skb->head) >> PAGE_SHIFT]
459 = INVALID_P2M_ENTRY;
461 MULTI_update_va_mapping(rx_mcl+i, (unsigned long)skb->head,
462 __pte(0), 0);
463 }
465 /* After all PTEs have been zapped we blow away stale TLB entries. */
466 rx_mcl[i-1].args[MULTI_UVMFLAGS_INDEX] = UVMF_TLB_FLUSH|UVMF_ALL;
468 /* Give away a batch of pages. */
469 rx_mcl[i].op = __HYPERVISOR_dom_mem_op;
470 rx_mcl[i].args[0] = MEMOP_decrease_reservation;
471 rx_mcl[i].args[1] = (unsigned long)rx_pfn_array;
472 rx_mcl[i].args[2] = (unsigned long)i;
473 rx_mcl[i].args[3] = 0;
474 rx_mcl[i].args[4] = DOMID_SELF;
476 /* Tell the ballon driver what is going on. */
477 balloon_update_driver_allowance(i);
479 /* Zap PTEs and give away pages in one big multicall. */
480 (void)HYPERVISOR_multicall(rx_mcl, i+1);
482 /* Check return status of HYPERVISOR_dom_mem_op(). */
483 if (unlikely(rx_mcl[i].result != i))
484 panic("Unable to reduce memory reservation\n");
486 /* Above is a suitable barrier to ensure backend will see requests. */
487 np->rx->req_prod = req_prod + i;
489 /* Adjust our floating fill target if we risked running out of buffers. */
490 if (((req_prod - np->rx->resp_prod) < (np->rx_target / 4)) &&
491 ((np->rx_target *= 2) > np->rx_max_target))
492 np->rx_target = np->rx_max_target;
493 }
496 static int network_start_xmit(struct sk_buff *skb, struct net_device *dev)
497 {
498 unsigned short id;
499 struct net_private *np = netdev_priv(dev);
500 netif_tx_request_t *tx;
501 NETIF_RING_IDX i;
502 #ifdef CONFIG_XEN_NETDEV_GRANT_TX
503 unsigned int ref;
504 unsigned long mfn;
505 #endif
507 if (unlikely(np->tx_full)) {
508 printk(KERN_ALERT "%s: full queue wasn't stopped!\n", dev->name);
509 netif_stop_queue(dev);
510 goto drop;
511 }
513 if (unlikely((((unsigned long)skb->data & ~PAGE_MASK) + skb->len) >=
514 PAGE_SIZE)) {
515 struct sk_buff *nskb;
516 if (unlikely((nskb = alloc_xen_skb(skb->len)) == NULL))
517 goto drop;
518 skb_put(nskb, skb->len);
519 memcpy(nskb->data, skb->data, skb->len);
520 nskb->dev = skb->dev;
521 dev_kfree_skb(skb);
522 skb = nskb;
523 }
525 spin_lock_irq(&np->tx_lock);
527 if (np->backend_state != BEST_CONNECTED) {
528 spin_unlock_irq(&np->tx_lock);
529 goto drop;
530 }
532 i = np->tx->req_prod;
534 id = GET_ID_FROM_FREELIST(np->tx_skbs);
535 np->tx_skbs[id] = skb;
537 tx = &np->tx->ring[MASK_NETIF_TX_IDX(i)].req;
539 tx->id = id;
540 #ifdef CONFIG_XEN_NETDEV_GRANT_TX
541 ref = gnttab_claim_grant_reference(&gref_tx_head);
542 if (unlikely(ref < 0)) {
543 printk(KERN_ALERT "#### netfront can't claim tx grant reference\n");
544 BUG();
545 }
546 mfn = virt_to_mfn(skb->data);
547 gnttab_grant_foreign_access_ref(ref, np->backend_id, mfn, GNTMAP_readonly);
548 tx->addr = ref << PAGE_SHIFT;
549 grant_tx_ref[id] = ref;
550 #else
551 tx->addr = virt_to_mfn(skb->data) << PAGE_SHIFT;
552 #endif
553 tx->addr |= (unsigned long)skb->data & ~PAGE_MASK;
554 tx->size = skb->len;
555 tx->csum_blank = (skb->ip_summed == CHECKSUM_HW);
557 wmb(); /* Ensure that backend will see the request. */
558 np->tx->req_prod = i + 1;
560 network_tx_buf_gc(dev);
562 if ((i - np->tx_resp_cons) == (NETIF_TX_RING_SIZE - 1)) {
563 np->tx_full = 1;
564 netif_stop_queue(dev);
565 }
567 spin_unlock_irq(&np->tx_lock);
569 np->stats.tx_bytes += skb->len;
570 np->stats.tx_packets++;
572 /* Only notify Xen if we really have to. */
573 mb();
574 if (np->tx->TX_TEST_IDX == i)
575 notify_via_evtchn(np->evtchn);
577 return 0;
579 drop:
580 np->stats.tx_dropped++;
581 dev_kfree_skb(skb);
582 return 0;
583 }
585 static irqreturn_t netif_int(int irq, void *dev_id, struct pt_regs *ptregs)
586 {
587 struct net_device *dev = dev_id;
588 struct net_private *np = netdev_priv(dev);
589 unsigned long flags;
591 spin_lock_irqsave(&np->tx_lock, flags);
592 network_tx_buf_gc(dev);
593 spin_unlock_irqrestore(&np->tx_lock, flags);
595 if ((np->rx_resp_cons != np->rx->resp_prod) && (np->user_state == UST_OPEN))
596 netif_rx_schedule(dev);
598 return IRQ_HANDLED;
599 }
602 static int netif_poll(struct net_device *dev, int *pbudget)
603 {
604 struct net_private *np = netdev_priv(dev);
605 struct sk_buff *skb, *nskb;
606 netif_rx_response_t *rx;
607 NETIF_RING_IDX i, rp;
608 mmu_update_t *mmu = rx_mmu;
609 multicall_entry_t *mcl = rx_mcl;
610 int work_done, budget, more_to_do = 1;
611 struct sk_buff_head rxq;
612 unsigned long flags;
613 #ifdef CONFIG_XEN_NETDEV_GRANT_RX
614 unsigned long mfn;
615 grant_ref_t ref;
616 #endif
618 spin_lock(&np->rx_lock);
620 if (np->backend_state != BEST_CONNECTED) {
621 spin_unlock(&np->rx_lock);
622 return 0;
623 }
625 skb_queue_head_init(&rxq);
627 if ((budget = *pbudget) > dev->quota)
628 budget = dev->quota;
629 rp = np->rx->resp_prod;
630 rmb(); /* Ensure we see queued responses up to 'rp'. */
632 for (i = np->rx_resp_cons, work_done = 0;
633 (i != rp) && (work_done < budget);
634 i++, work_done++) {
635 rx = &np->rx->ring[MASK_NETIF_RX_IDX(i)].resp;
636 /*
637 * An error here is very odd. Usually indicates a backend bug,
638 * low-memory condition, or that we didn't have reservation headroom.
639 */
640 if (unlikely(rx->status <= 0)) {
641 if (net_ratelimit())
642 printk(KERN_WARNING "Bad rx buffer (memory squeeze?).\n");
643 np->rx->ring[MASK_NETIF_RX_IDX(np->rx->req_prod)].req.id = rx->id;
644 wmb();
645 np->rx->req_prod++;
646 work_done--;
647 continue;
648 }
650 #ifdef CONFIG_XEN_NETDEV_GRANT_RX
651 ref = grant_rx_ref[rx->id];
652 grant_rx_ref[rx->id] = GRANT_INVALID_REF;
653 mfn = gnttab_end_foreign_transfer_ref(ref);
654 gnttab_release_grant_reference(&gref_rx_head, ref);
655 #endif
657 skb = np->rx_skbs[rx->id];
658 ADD_ID_TO_FREELIST(np->rx_skbs, rx->id);
660 /* NB. We handle skb overflow later. */
661 #ifdef CONFIG_XEN_NETDEV_GRANT_RX
662 skb->data = skb->head + rx->addr;
663 #else
664 skb->data = skb->head + (rx->addr & ~PAGE_MASK);
665 #endif
666 skb->len = rx->status;
667 skb->tail = skb->data + skb->len;
669 if ( rx->csum_valid )
670 skb->ip_summed = CHECKSUM_UNNECESSARY;
672 np->stats.rx_packets++;
673 np->stats.rx_bytes += rx->status;
675 /* Remap the page. */
676 #ifdef CONFIG_XEN_NETDEV_GRANT_RX
677 mmu->ptr = mfn << PAGE_SHIFT | MMU_MACHPHYS_UPDATE;
678 #else
679 mmu->ptr = (rx->addr & PAGE_MASK) | MMU_MACHPHYS_UPDATE;
680 #endif
681 mmu->val = __pa(skb->head) >> PAGE_SHIFT;
682 mmu++;
683 #ifdef CONFIG_XEN_NETDEV_GRANT_RX
684 MULTI_update_va_mapping(mcl, (unsigned long)skb->head,
685 pfn_pte_ma(mfn, PAGE_KERNEL), 0);
686 #else
687 MULTI_update_va_mapping(mcl, (unsigned long)skb->head,
688 pfn_pte_ma(rx->addr >> PAGE_SHIFT,
689 PAGE_KERNEL), 0);
690 #endif
691 mcl++;
693 #ifdef CONFIG_XEN_NETDEV_GRANT_RX
694 phys_to_machine_mapping[__pa(skb->head) >> PAGE_SHIFT] = mfn;
695 #else
696 phys_to_machine_mapping[__pa(skb->head) >> PAGE_SHIFT] =
697 rx->addr >> PAGE_SHIFT;
698 #endif
700 #ifdef GRANT_DEBUG
701 printk(KERN_ALERT "#### rx_poll enqueue vdata=%p mfn=%lu ref=%x\n",
702 skb->data, mfn, ref);
703 #endif
704 __skb_queue_tail(&rxq, skb);
705 }
707 /* Some pages are no longer absent... */
708 balloon_update_driver_allowance(-work_done);
710 /* Do all the remapping work, and M->P updates, in one big hypercall. */
711 if (likely((mcl - rx_mcl) != 0)) {
712 mcl->op = __HYPERVISOR_mmu_update;
713 mcl->args[0] = (unsigned long)rx_mmu;
714 mcl->args[1] = mmu - rx_mmu;
715 mcl->args[2] = 0;
716 mcl->args[3] = DOMID_SELF;
717 mcl++;
718 (void)HYPERVISOR_multicall(rx_mcl, mcl - rx_mcl);
719 }
721 while ((skb = __skb_dequeue(&rxq)) != NULL) {
722 #ifdef GRANT_DEBUG
723 printk(KERN_ALERT "#### rx_poll dequeue vdata=%p mfn=%lu\n",
724 skb->data, virt_to_mfn(skb->data));
725 dump_packet('d', skb->data, (unsigned long)skb->data);
726 #endif
727 /*
728 * Enough room in skbuff for the data we were passed? Also, Linux
729 * expects at least 16 bytes headroom in each receive buffer.
730 */
731 if (unlikely(skb->tail > skb->end) ||
732 unlikely((skb->data - skb->head) < 16)) {
733 nskb = NULL;
736 /* Only copy the packet if it fits in the current MTU. */
737 if (skb->len <= (dev->mtu + ETH_HLEN)) {
738 if ((skb->tail > skb->end) && net_ratelimit())
739 printk(KERN_INFO "Received packet needs %zd bytes more "
740 "headroom.\n", skb->tail - skb->end);
742 if ((nskb = alloc_xen_skb(skb->len + 2)) != NULL) {
743 skb_reserve(nskb, 2);
744 skb_put(nskb, skb->len);
745 memcpy(nskb->data, skb->data, skb->len);
746 nskb->dev = skb->dev;
747 }
748 }
749 else if (net_ratelimit())
750 printk(KERN_INFO "Received packet too big for MTU "
751 "(%d > %d)\n", skb->len - ETH_HLEN, dev->mtu);
753 /* Reinitialise and then destroy the old skbuff. */
754 skb->len = 0;
755 skb->tail = skb->data;
756 init_skb_shinfo(skb);
757 dev_kfree_skb(skb);
759 /* Switch old for new, if we copied the buffer. */
760 if ((skb = nskb) == NULL)
761 continue;
762 }
764 /* Set the shared-info area, which is hidden behind the real data. */
765 init_skb_shinfo(skb);
766 /* Ethernet-specific work. Delayed to here as it peeks the header. */
767 skb->protocol = eth_type_trans(skb, dev);
769 /* Pass it up. */
770 netif_receive_skb(skb);
771 dev->last_rx = jiffies;
772 }
774 np->rx_resp_cons = i;
776 /* If we get a callback with very few responses, reduce fill target. */
777 /* NB. Note exponential increase, linear decrease. */
778 if (((np->rx->req_prod - np->rx->resp_prod) > ((3*np->rx_target) / 4)) &&
779 (--np->rx_target < np->rx_min_target))
780 np->rx_target = np->rx_min_target;
782 network_alloc_rx_buffers(dev);
784 *pbudget -= work_done;
785 dev->quota -= work_done;
787 if (work_done < budget) {
788 local_irq_save(flags);
790 np->rx->event = i + 1;
792 /* Deal with hypervisor racing our resetting of rx_event. */
793 mb();
794 if (np->rx->resp_prod == i) {
795 __netif_rx_complete(dev);
796 more_to_do = 0;
797 }
799 local_irq_restore(flags);
800 }
802 spin_unlock(&np->rx_lock);
804 return more_to_do;
805 }
808 static int network_close(struct net_device *dev)
809 {
810 struct net_private *np = netdev_priv(dev);
811 np->user_state = UST_CLOSED;
812 netif_stop_queue(np->netdev);
813 return 0;
814 }
817 static struct net_device_stats *network_get_stats(struct net_device *dev)
818 {
819 struct net_private *np = netdev_priv(dev);
820 return &np->stats;
821 }
824 static void network_connect(struct net_device *dev)
825 {
826 struct net_private *np;
827 int i, requeue_idx;
828 netif_tx_request_t *tx;
830 np = netdev_priv(dev);
831 spin_lock_irq(&np->tx_lock);
832 spin_lock(&np->rx_lock);
834 /* Recovery procedure: */
836 /* Step 1: Reinitialise variables. */
837 np->rx_resp_cons = np->tx_resp_cons = np->tx_full = 0;
838 np->rx->event = np->tx->event = 1;
840 /* Step 2: Rebuild the RX and TX ring contents.
841 * NB. We could just free the queued TX packets now but we hope
842 * that sending them out might do some good. We have to rebuild
843 * the RX ring because some of our pages are currently flipped out
844 * so we can't just free the RX skbs.
845 * NB2. Freelist index entries are always going to be less than
846 * __PAGE_OFFSET, whereas pointers to skbs will always be equal or
847 * greater than __PAGE_OFFSET: we use this property to distinguish
848 * them.
849 */
851 /* Rebuild the TX buffer freelist and the TX ring itself.
852 * NB. This reorders packets. We could keep more private state
853 * to avoid this but maybe it doesn't matter so much given the
854 * interface has been down.
855 */
856 for (requeue_idx = 0, i = 1; i <= NETIF_TX_RING_SIZE; i++) {
857 if ((unsigned long)np->tx_skbs[i] >= __PAGE_OFFSET) {
858 struct sk_buff *skb = np->tx_skbs[i];
860 tx = &np->tx->ring[requeue_idx++].req;
862 tx->id = i;
863 #ifdef CONFIG_XEN_NETDEV_GRANT_TX
864 tx->addr = 0; /*(ref << PAGE_SHIFT) |*/
865 #else
866 tx->addr = virt_to_mfn(skb->data) << PAGE_SHIFT;
867 #endif
868 tx->addr |= (unsigned long)skb->data & ~PAGE_MASK;
869 tx->size = skb->len;
871 np->stats.tx_bytes += skb->len;
872 np->stats.tx_packets++;
873 }
874 }
875 wmb();
876 np->tx->req_prod = requeue_idx;
878 /* Rebuild the RX buffer freelist and the RX ring itself. */
879 for (requeue_idx = 0, i = 1; i <= NETIF_RX_RING_SIZE; i++)
880 if ((unsigned long)np->rx_skbs[i] >= __PAGE_OFFSET)
881 np->rx->ring[requeue_idx++].req.id = i;
882 wmb();
883 np->rx->req_prod = requeue_idx;
885 /* Step 3: All public and private state should now be sane. Get
886 * ready to start sending and receiving packets and give the driver
887 * domain a kick because we've probably just requeued some
888 * packets.
889 */
890 np->backend_state = BEST_CONNECTED;
891 wmb();
892 notify_via_evtchn(np->evtchn);
893 network_tx_buf_gc(dev);
895 if (np->user_state == UST_OPEN)
896 netif_start_queue(dev);
898 spin_unlock(&np->rx_lock);
899 spin_unlock_irq(&np->tx_lock);
900 }
902 static void show_device(struct net_private *np)
903 {
904 #ifdef DEBUG
905 if (np) {
906 IPRINTK("<vif handle=%u %s(%s) evtchn=%u tx=%p rx=%p>\n",
907 np->handle,
908 be_state_name[np->backend_state],
909 np->user_state ? "open" : "closed",
910 np->evtchn,
911 np->tx,
912 np->rx);
913 } else {
914 IPRINTK("<vif NULL>\n");
915 }
916 #endif
917 }
919 /* Stop network device and free tx/rx queues and irq.
920 */
921 static void shutdown_device(struct net_private *np)
922 {
923 /* Stop old i/f to prevent errors whilst we rebuild the state. */
924 spin_lock_irq(&np->tx_lock);
925 spin_lock(&np->rx_lock);
926 netif_stop_queue(np->netdev);
927 /* np->backend_state = BEST_DISCONNECTED; */
928 spin_unlock(&np->rx_lock);
929 spin_unlock_irq(&np->tx_lock);
931 /* Free resources. */
932 if (np->tx) {
933 unbind_evtchn_from_irqhandler(np->evtchn, np->netdev);
934 np->evtchn = 0;
935 free_page((unsigned long)np->tx);
936 free_page((unsigned long)np->rx);
937 np->tx = NULL;
938 np->rx = NULL;
939 }
940 }
942 /* Release vif resources and close it down completely.
943 */
944 static void vif_close(struct net_private *np)
945 {
946 BUG();
947 WPRINTK("Unexpected netif-CLOSED message in state %s\n",
948 be_state_name[np->backend_state]);
949 shutdown_device(np);
950 np->backend_state = BEST_CLOSED;
951 /* todo: take dev down and free. */
952 show_device(np);
953 }
955 /* Move the vif into disconnected state.
956 * Allocates tx/rx pages.
957 * Sends connect message to xend.
958 */
959 static void vif_disconnect(struct net_private *np)
960 {
961 BUG();
962 if(np->tx) free_page((unsigned long)np->tx);
963 if(np->rx) free_page((unsigned long)np->rx);
964 // Before this np->tx and np->rx had better be null.
965 np->tx = (netif_tx_interface_t *)__get_free_page(GFP_KERNEL);
966 np->rx = (netif_rx_interface_t *)__get_free_page(GFP_KERNEL);
967 memset(np->tx, 0, PAGE_SIZE);
968 memset(np->rx, 0, PAGE_SIZE);
969 np->backend_state = BEST_DISCONNECTED;
970 // send_interface_connect(np);
971 show_device(np);
972 }
974 /* Begin interface recovery.
975 *
976 * NB. Whilst we're recovering, we turn the carrier state off. We
977 * take measures to ensure that this device isn't used for
978 * anything. We also stop the queue for this device. Various
979 * different approaches (e.g. continuing to buffer packets) have
980 * been tested but don't appear to improve the overall impact on
981 * TCP connections.
982 *
983 * TODO: (MAW) Change the Xend<->Guest protocol so that a recovery
984 * is initiated by a special "RESET" message - disconnect could
985 * just mean we're not allowed to use this interface any more.
986 */
987 static void vif_reset(struct net_private *np)
988 {
989 BUG();
990 IPRINTK("Attempting to reconnect network interface: handle=%u\n",
991 np->handle);
992 shutdown_device(np);
993 vif_disconnect(np);
994 show_device(np);
995 }
997 /* Move the vif into connected state.
998 * Sets the mac and event channel from the message.
999 * Binds the irq to the event channel.
1000 */
1001 static void
1002 connect_device(struct net_private *np, unsigned int evtchn)
1004 struct net_device *dev = np->netdev;
1005 memcpy(dev->dev_addr, np->mac, ETH_ALEN);
1006 np->evtchn = evtchn;
1007 network_connect(dev);
1008 (void)bind_evtchn_to_irqhandler(
1009 np->evtchn, netif_int, SA_SAMPLE_RANDOM, dev->name, dev);
1010 netctrl_connected_count();
1011 (void)send_fake_arp(dev);
1012 show_device(np);
1015 static struct ethtool_ops network_ethtool_ops =
1017 .get_tx_csum = ethtool_op_get_tx_csum,
1018 .set_tx_csum = ethtool_op_set_tx_csum,
1019 };
1021 /** Create a network device.
1022 * @param handle device handle
1023 * @param val return parameter for created device
1024 * @return 0 on success, error code otherwise
1025 */
1026 static int create_netdev(int handle, struct xenbus_device *dev,
1027 struct net_device **val)
1029 int i, err = 0;
1030 struct net_device *netdev = NULL;
1031 struct net_private *np = NULL;
1033 if ((netdev = alloc_etherdev(sizeof(struct net_private))) == NULL) {
1034 printk(KERN_WARNING "%s> alloc_etherdev failed.\n", __FUNCTION__);
1035 err = -ENOMEM;
1036 goto exit;
1039 np = netdev_priv(netdev);
1040 np->backend_state = BEST_CLOSED;
1041 np->user_state = UST_CLOSED;
1042 np->handle = handle;
1043 np->xbdev = dev;
1045 spin_lock_init(&np->tx_lock);
1046 spin_lock_init(&np->rx_lock);
1048 skb_queue_head_init(&np->rx_batch);
1049 np->rx_target = RX_MIN_TARGET;
1050 np->rx_min_target = RX_MIN_TARGET;
1051 np->rx_max_target = RX_MAX_TARGET;
1053 /* Initialise {tx,rx}_skbs to be a free chain containing every entry. */
1054 for (i = 0; i <= NETIF_TX_RING_SIZE; i++) {
1055 np->tx_skbs[i] = (void *)((unsigned long) i+1);
1056 #ifdef CONFIG_XEN_NETDEV_GRANT_TX
1057 grant_tx_ref[i] = GRANT_INVALID_REF;
1058 #endif
1060 for (i = 0; i <= NETIF_RX_RING_SIZE; i++) {
1061 np->rx_skbs[i] = (void *)((unsigned long) i+1);
1062 #ifdef CONFIG_XEN_NETDEV_GRANT_RX
1063 grant_rx_ref[i] = GRANT_INVALID_REF;
1064 #endif
1067 netdev->open = network_open;
1068 netdev->hard_start_xmit = network_start_xmit;
1069 netdev->stop = network_close;
1070 netdev->get_stats = network_get_stats;
1071 netdev->poll = netif_poll;
1072 netdev->weight = 64;
1073 netdev->features = NETIF_F_IP_CSUM;
1075 SET_ETHTOOL_OPS(netdev, &network_ethtool_ops);
1077 if ((err = register_netdev(netdev)) != 0) {
1078 printk(KERN_WARNING "%s> register_netdev err=%d\n", __FUNCTION__, err);
1079 goto exit;
1082 if ((err = xennet_proc_addif(netdev)) != 0) {
1083 unregister_netdev(netdev);
1084 goto exit;
1087 np->netdev = netdev;
1088 list_add(&np->list, &dev_list);
1090 exit:
1091 if ((err != 0) && (netdev != NULL))
1092 kfree(netdev);
1093 else if (val != NULL)
1094 *val = netdev;
1095 return err;
1098 static int destroy_netdev(struct net_device *netdev)
1100 struct net_private *np = NULL;
1102 #ifdef CONFIG_PROC_FS
1103 xennet_proc_delif(netdev);
1104 #endif
1106 unregister_netdev(netdev);
1108 np = netdev_priv(netdev);
1109 list_del(&np->list);
1111 kfree(netdev);
1113 return 0;
1116 /*
1117 * Initialize the network control interface.
1118 */
1119 static void netif_driver_status(netif_fe_driver_status_t *status)
1121 netctrl.up = status->status;
1122 netctrl_connected_count();
1125 /*
1126 * We use this notifier to send out a fake ARP reply to reset switches and
1127 * router ARP caches when an IP interface is brought up on a VIF.
1128 */
1129 static int
1130 inetdev_notify(struct notifier_block *this, unsigned long event, void *ptr)
1132 struct in_ifaddr *ifa = (struct in_ifaddr *)ptr;
1133 struct net_device *dev = ifa->ifa_dev->dev;
1134 struct list_head *ent;
1135 struct net_private *np;
1137 if (event != NETDEV_UP)
1138 goto out;
1140 list_for_each (ent, &dev_list) {
1141 np = list_entry(ent, struct net_private, list);
1142 if (np->netdev == dev)
1143 (void)send_fake_arp(dev);
1146 out:
1147 return NOTIFY_DONE;
1150 static struct notifier_block notifier_inetdev = {
1151 .notifier_call = inetdev_notify,
1152 .next = NULL,
1153 .priority = 0
1154 };
1156 static struct xenbus_device_id netfront_ids[] = {
1157 { "vif" },
1158 { "" }
1159 };
1161 static void watch_for_status(struct xenbus_watch *watch, const char *node)
1163 #if 0
1164 struct netfront_info *info;
1165 unsigned int binfo;
1166 unsigned long sectors, sector_size;
1167 int err;
1169 info = container_of(watch, struct netfront_info, watch);
1170 node += strlen(watch->node);
1172 /* FIXME: clean up when error on the other end. */
1173 if (info->connected == BLKIF_STATE_CONNECTED)
1174 return;
1176 err = xenbus_gather(watch->node,
1177 "sectors", "%lu", &sectors,
1178 "info", "%u", &binfo,
1179 "sector-size", "%lu", &sector_size,
1180 NULL);
1181 if (err) {
1182 xenbus_dev_error(info->xbdev, err, "reading backend fields");
1183 return;
1186 xlvbd_add(sectors, info->vdevice, binfo, sector_size, info);
1187 info->connected = BLKIF_STATE_CONNECTED;
1189 blkif_state = BLKIF_STATE_CONNECTED;
1191 xenbus_dev_ok(info->xbdev);
1193 /* Kick pending requests. */
1194 spin_lock_irq(&blkif_io_lock);
1195 kick_pending_request_queues(info);
1196 spin_unlock_irq(&blkif_io_lock);
1197 #endif
1200 static int setup_device(struct xenbus_device *dev, struct netfront_info *info)
1202 evtchn_op_t op = { .cmd = EVTCHNOP_alloc_unbound };
1203 int err;
1205 #ifdef CONFIG_XEN_NETDEV_GRANT_TX
1206 info->tx_ring_ref = GRANT_INVALID_REF;
1207 #endif
1208 #ifdef CONFIG_XEN_NETDEV_GRANT_RX
1209 info->rx_ring_ref = GRANT_INVALID_REF;
1210 #endif
1212 info->tx = (netif_tx_interface_t *)__get_free_page(GFP_KERNEL);
1213 if (info->tx == 0) {
1214 err = -ENOMEM;
1215 xenbus_dev_error(dev, err, "allocating tx ring page");
1216 goto out;
1218 info->rx = (netif_rx_interface_t *)__get_free_page(GFP_KERNEL);
1219 if (info->rx == 0) {
1220 err = -ENOMEM;
1221 xenbus_dev_error(dev, err, "allocating rx ring page");
1222 goto out;
1224 memset(info->tx, 0, PAGE_SIZE);
1225 memset(info->rx, 0, PAGE_SIZE);
1226 info->backend_state = BEST_DISCONNECTED;
1228 #ifdef CONFIG_XEN_NETDEV_GRANT_TX
1229 err = gnttab_grant_foreign_access(info->backend_id,
1230 virt_to_mfn(info->tx), 0);
1231 if (err < 0) {
1232 xenbus_dev_error(dev, err, "granting access to tx ring page");
1233 goto out;
1235 info->tx_ring_ref = err;
1236 #else
1237 info->tx_ring_ref = virt_to_mfn(info->tx);
1238 #endif
1240 #ifdef CONFIG_XEN_NETDEV_GRANT_RX
1241 err = gnttab_grant_foreign_access(info->backend_id,
1242 virt_to_mfn(info->rx), 0);
1243 if (err < 0) {
1244 xenbus_dev_error(dev, err, "granting access to rx ring page");
1245 goto out;
1247 info->rx_ring_ref = err;
1248 #else
1249 info->rx_ring_ref = virt_to_mfn(info->rx);
1250 #endif
1252 op.u.alloc_unbound.dom = info->backend_id;
1253 err = HYPERVISOR_event_channel_op(&op);
1254 if (err) {
1255 xenbus_dev_error(dev, err, "allocating event channel");
1256 goto out;
1258 connect_device(info, op.u.alloc_unbound.port);
1259 return 0;
1261 out:
1262 if (info->tx)
1263 free_page((unsigned long)info->tx);
1264 info->tx = 0;
1265 if (info->rx)
1266 free_page((unsigned long)info->rx);
1267 info->rx = 0;
1268 #ifdef CONFIG_XEN_NETDEV_GRANT_TX
1269 if (info->tx_ring_ref != GRANT_INVALID_REF)
1270 gnttab_end_foreign_access(info->tx_ring_ref, 0);
1271 info->tx_ring_ref = GRANT_INVALID_REF;
1272 #endif
1273 #ifdef CONFIG_XEN_NETDEV_GRANT_RX
1274 if (info->rx_ring_ref != GRANT_INVALID_REF)
1275 gnttab_end_foreign_access(info->rx_ring_ref, 0);
1276 info->rx_ring_ref = GRANT_INVALID_REF;
1277 #endif
1278 return err;
1281 /* Common code used when first setting up, and when resuming. */
1282 static int talk_to_backend(struct xenbus_device *dev,
1283 struct netfront_info *info)
1285 char *backend, *mac, *e, *s;
1286 const char *message;
1287 int err, i;
1289 backend = NULL;
1290 err = xenbus_gather(dev->nodename,
1291 "backend-id", "%i", &info->backend_id,
1292 "backend", NULL, &backend,
1293 NULL);
1294 if (XENBUS_EXIST_ERR(err))
1295 goto out;
1296 if (backend && strlen(backend) == 0) {
1297 err = -ENOENT;
1298 goto out;
1300 if (err < 0) {
1301 xenbus_dev_error(dev, err, "reading %s/backend or backend-id",
1302 dev->nodename);
1303 goto out;
1306 mac = xenbus_read(dev->nodename, "mac", NULL);
1307 if (IS_ERR(mac)) {
1308 err = PTR_ERR(mac);
1309 xenbus_dev_error(dev, err, "reading %s/mac",
1310 dev->nodename);
1311 goto out;
1313 s = mac;
1314 for (i = 0; i < ETH_ALEN; i++) {
1315 info->mac[i] = simple_strtoul(s, &e, 16);
1316 if (s == e || (e[0] != ':' && e[0] != 0)) {
1317 kfree(mac);
1318 err = -ENOENT;
1319 xenbus_dev_error(dev, err, "parsing %s/mac",
1320 dev->nodename);
1321 goto out;
1323 s = &e[1];
1325 kfree(mac);
1327 /* Create shared ring, alloc event channel. */
1328 err = setup_device(dev, info);
1329 if (err) {
1330 xenbus_dev_error(dev, err, "setting up ring");
1331 goto out;
1334 err = xenbus_transaction_start(dev->nodename);
1335 if (err) {
1336 xenbus_dev_error(dev, err, "starting transaction");
1337 goto destroy_ring;
1340 err = xenbus_printf(dev->nodename, "tx-ring-ref","%u",
1341 info->tx_ring_ref);
1342 if (err) {
1343 message = "writing tx ring-ref";
1344 goto abort_transaction;
1346 err = xenbus_printf(dev->nodename, "rx-ring-ref","%u",
1347 info->rx_ring_ref);
1348 if (err) {
1349 message = "writing rx ring-ref";
1350 goto abort_transaction;
1352 err = xenbus_printf(dev->nodename,
1353 "event-channel", "%u", info->evtchn);
1354 if (err) {
1355 message = "writing event-channel";
1356 goto abort_transaction;
1359 info->backend = backend;
1360 backend = NULL;
1362 info->watch.node = info->backend;
1363 info->watch.callback = watch_for_status;
1364 err = register_xenbus_watch(&info->watch);
1365 if (err) {
1366 message = "registering watch on backend";
1367 goto abort_transaction;
1370 err = xenbus_transaction_end(0);
1371 if (err) {
1372 xenbus_dev_error(dev, err, "completing transaction");
1373 goto destroy_ring;
1376 out:
1377 if (backend)
1378 kfree(backend);
1379 return err;
1381 abort_transaction:
1382 xenbus_transaction_end(1);
1383 /* Have to do this *outside* transaction. */
1384 xenbus_dev_error(dev, err, "%s", message);
1385 destroy_ring:
1386 shutdown_device(info);
1387 goto out;
1390 /* Setup supplies the backend dir, virtual device.
1392 We place an event channel and shared frame entries.
1393 We watch backend to wait if it's ok. */
1394 static int netfront_probe(struct xenbus_device *dev,
1395 const struct xenbus_device_id *id)
1397 int err;
1398 struct net_device *netdev;
1399 unsigned int handle;
1401 printk("netfront_probe %p\n", dev);
1402 err = xenbus_scanf(dev->nodename, "handle", "%u", &handle);
1403 if (XENBUS_EXIST_ERR(err))
1404 return err;
1405 if (err < 0) {
1406 xenbus_dev_error(dev, err, "reading handle");
1407 return err;
1410 printk("netfront_probe handle %d\n", handle);
1411 netdev = find_dev_by_handle(handle);
1412 printk("netfront_probe found netdev %p\n", netdev);
1413 if (netdev)
1414 return 0;
1416 err = create_netdev(handle, dev, &netdev);
1417 if (err) {
1418 xenbus_dev_error(dev, err, "creating netdev");
1419 return err;
1422 printk("netfront_probe netdev %p\n", netdev);
1423 err = talk_to_backend(dev, netdev_priv(netdev));
1424 if (err) {
1425 destroy_netdev(netdev);
1426 return err;
1429 #if 0
1430 /* Call once in case entries already there. */
1431 watch_for_status(&info->watch, info->watch.node);
1432 #endif
1433 return 0;
1436 static int netfront_remove(struct xenbus_device *dev)
1438 struct netfront_info *info = dev->data;
1440 #if 0
1441 if (info->backend)
1442 unregister_xenbus_watch(&info->watch);
1444 if (info->mi)
1445 xlvbd_del(info);
1447 blkif_free(info);
1449 kfree(info->backend);
1450 #endif
1451 kfree(info);
1453 return 0;
1456 static int netfront_suspend(struct xenbus_device *dev)
1458 struct net_private *np = dev->data;
1459 /* Avoid having tx/rx stuff happen until we're ready. */
1460 unbind_evtchn_from_irqhandler(np->evtchn, np->netdev);
1461 return 0;
1464 static int netfront_resume(struct xenbus_device *dev)
1466 struct net_private *np = dev->data;
1467 /*
1468 * Connect regardless of whether IFF_UP flag set.
1469 * Stop bad things from happening until we're back up.
1470 */
1471 np->backend_state = BEST_DISCONNECTED;
1472 memset(np->tx, 0, PAGE_SIZE);
1473 memset(np->rx, 0, PAGE_SIZE);
1475 // send_interface_connect(np);
1476 return 0;
1479 static struct xenbus_driver netfront = {
1480 .name = "vif",
1481 .owner = THIS_MODULE,
1482 .ids = netfront_ids,
1483 .probe = netfront_probe,
1484 .remove = netfront_remove,
1485 .resume = netfront_resume,
1486 .suspend = netfront_suspend,
1487 };
1489 static void __init init_net_xenbus(void)
1491 xenbus_register_device(&netfront);
1494 static int wait_for_netif(void)
1496 int err = 0, conn = 0;
1497 int i;
1499 /*
1500 * We should figure out how many and which devices we need to
1501 * proceed and only wait for those. For now, continue once the
1502 * first device is around.
1503 */
1504 for ( i=0; i < 10*HZ; i++ )
1506 conn = netctrl_connected();
1507 if (conn)
1508 break;
1509 set_current_state(TASK_INTERRUPTIBLE);
1510 schedule_timeout(1);
1513 if (conn <= 0) {
1514 err = netctrl_err(-ENETDOWN);
1515 WPRINTK("Timeout connecting to device!\n");
1517 return err;
1520 static int __init netif_init(void)
1522 int err = 0;
1524 if (xen_start_info.flags & SIF_INITDOMAIN)
1525 return 0;
1527 #ifdef CONFIG_XEN_NETDEV_GRANT_TX
1528 /* A grant for every ring slot */
1529 if (gnttab_alloc_grant_references(NETIF_TX_RING_SIZE,
1530 &gref_tx_head) < 0) {
1531 printk(KERN_ALERT "#### netfront can't alloc tx grant refs\n");
1532 return 1;
1534 printk(KERN_ALERT "Netdev frontend (TX) is using grant tables.\n");
1535 #endif
1536 #ifdef CONFIG_XEN_NETDEV_GRANT_RX
1537 /* A grant for every ring slot */
1538 if (gnttab_alloc_grant_references(NETIF_RX_RING_SIZE,
1539 &gref_rx_head) < 0) {
1540 printk(KERN_ALERT "#### netfront can't alloc rx grant refs\n");
1541 return 1;
1543 printk(KERN_ALERT "Netdev frontend (RX) is using grant tables.\n");
1544 #endif
1546 if ((err = xennet_proc_init()) != 0)
1547 return err;
1549 IPRINTK("Initialising virtual ethernet driver.\n");
1551 INIT_LIST_HEAD(&dev_list);
1552 (void)register_inetaddr_notifier(&notifier_inetdev);
1553 netctrl_init();
1555 init_net_xenbus();
1557 wait_for_netif();
1559 return err;
1562 static void netif_exit(void)
1564 #ifdef CONFIG_XEN_NETDEV_GRANT_TX
1565 gnttab_free_grant_references(gref_tx_head);
1566 #endif
1567 #ifdef CONFIG_XEN_NETDEV_GRANT_RX
1568 gnttab_free_grant_references(gref_rx_head);
1569 #endif
1572 #ifdef CONFIG_PROC_FS
1574 #define TARGET_MIN 0UL
1575 #define TARGET_MAX 1UL
1576 #define TARGET_CUR 2UL
1578 static int xennet_proc_read(
1579 char *page, char **start, off_t off, int count, int *eof, void *data)
1581 struct net_device *dev = (struct net_device *)((unsigned long)data & ~3UL);
1582 struct net_private *np = netdev_priv(dev);
1583 int len = 0, which_target = (long)data & 3;
1585 switch (which_target)
1587 case TARGET_MIN:
1588 len = sprintf(page, "%d\n", np->rx_min_target);
1589 break;
1590 case TARGET_MAX:
1591 len = sprintf(page, "%d\n", np->rx_max_target);
1592 break;
1593 case TARGET_CUR:
1594 len = sprintf(page, "%d\n", np->rx_target);
1595 break;
1598 *eof = 1;
1599 return len;
1602 static int xennet_proc_write(
1603 struct file *file, const char __user *buffer,
1604 unsigned long count, void *data)
1606 struct net_device *dev = (struct net_device *)((unsigned long)data & ~3UL);
1607 struct net_private *np = netdev_priv(dev);
1608 int which_target = (long)data & 3;
1609 char string[64];
1610 long target;
1612 if (!capable(CAP_SYS_ADMIN))
1613 return -EPERM;
1615 if (count <= 1)
1616 return -EBADMSG; /* runt */
1617 if (count > sizeof(string))
1618 return -EFBIG; /* too long */
1620 if (copy_from_user(string, buffer, count))
1621 return -EFAULT;
1622 string[sizeof(string)-1] = '\0';
1624 target = simple_strtol(string, NULL, 10);
1625 if (target < RX_MIN_TARGET)
1626 target = RX_MIN_TARGET;
1627 if (target > RX_MAX_TARGET)
1628 target = RX_MAX_TARGET;
1630 spin_lock(&np->rx_lock);
1632 switch (which_target)
1634 case TARGET_MIN:
1635 if (target > np->rx_max_target)
1636 np->rx_max_target = target;
1637 np->rx_min_target = target;
1638 if (target > np->rx_target)
1639 np->rx_target = target;
1640 break;
1641 case TARGET_MAX:
1642 if (target < np->rx_min_target)
1643 np->rx_min_target = target;
1644 np->rx_max_target = target;
1645 if (target < np->rx_target)
1646 np->rx_target = target;
1647 break;
1648 case TARGET_CUR:
1649 break;
1652 network_alloc_rx_buffers(dev);
1654 spin_unlock(&np->rx_lock);
1656 return count;
1659 static int xennet_proc_init(void)
1661 if (proc_mkdir("xen/net", NULL) == NULL)
1662 return -ENOMEM;
1663 return 0;
1666 static int xennet_proc_addif(struct net_device *dev)
1668 struct proc_dir_entry *dir, *min, *max, *cur;
1669 char name[30];
1671 sprintf(name, "xen/net/%s", dev->name);
1673 dir = proc_mkdir(name, NULL);
1674 if (!dir)
1675 goto nomem;
1677 min = create_proc_entry("rxbuf_min", 0644, dir);
1678 max = create_proc_entry("rxbuf_max", 0644, dir);
1679 cur = create_proc_entry("rxbuf_cur", 0444, dir);
1680 if (!min || !max || !cur)
1681 goto nomem;
1683 min->read_proc = xennet_proc_read;
1684 min->write_proc = xennet_proc_write;
1685 min->data = (void *)((unsigned long)dev | TARGET_MIN);
1687 max->read_proc = xennet_proc_read;
1688 max->write_proc = xennet_proc_write;
1689 max->data = (void *)((unsigned long)dev | TARGET_MAX);
1691 cur->read_proc = xennet_proc_read;
1692 cur->write_proc = xennet_proc_write;
1693 cur->data = (void *)((unsigned long)dev | TARGET_CUR);
1695 return 0;
1697 nomem:
1698 xennet_proc_delif(dev);
1699 return -ENOMEM;
1702 static void xennet_proc_delif(struct net_device *dev)
1704 char name[30];
1706 sprintf(name, "xen/net/%s/rxbuf_min", dev->name);
1707 remove_proc_entry(name, NULL);
1709 sprintf(name, "xen/net/%s/rxbuf_max", dev->name);
1710 remove_proc_entry(name, NULL);
1712 sprintf(name, "xen/net/%s/rxbuf_cur", dev->name);
1713 remove_proc_entry(name, NULL);
1715 sprintf(name, "xen/net/%s", dev->name);
1716 remove_proc_entry(name, NULL);
1719 #endif
1721 module_init(netif_init);
1722 module_exit(netif_exit);