ia64/xen-unstable

view linux-2.6-xen-sparse/drivers/xen/netfront/netfront.c @ 6355:6078dc5f7ea1

Update for grant table interface changes.
Signed-off-by: Christian Limpach <Christian.Limpach@cl.cam.ac.uk>
author cl349@firebug.cl.cam.ac.uk
date Mon Aug 22 20:57:26 2005 +0000 (2005-08-22)
parents 1a0723cd37f1
children 5f4724c13040
line source
1 /******************************************************************************
2 * Virtual network driver for conversing with remote driver backends.
3 *
4 * Copyright (c) 2002-2004, K A Fraser
5 *
6 * This file may be distributed separately from the Linux kernel, or
7 * incorporated into other software packages, subject to the following license:
8 *
9 * Permission is hereby granted, free of charge, to any person obtaining a copy
10 * of this source file (the "Software"), to deal in the Software without
11 * restriction, including without limitation the rights to use, copy, modify,
12 * merge, publish, distribute, sublicense, and/or sell copies of the Software,
13 * and to permit persons to whom the Software is furnished to do so, subject to
14 * the following conditions:
15 *
16 * The above copyright notice and this permission notice shall be included in
17 * all copies or substantial portions of the Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
22 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
24 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
25 * IN THE SOFTWARE.
26 */
28 #include <linux/config.h>
29 #include <linux/module.h>
30 #include <linux/version.h>
31 #include <linux/kernel.h>
32 #include <linux/sched.h>
33 #include <linux/slab.h>
34 #include <linux/string.h>
35 #include <linux/errno.h>
36 #include <linux/netdevice.h>
37 #include <linux/inetdevice.h>
38 #include <linux/etherdevice.h>
39 #include <linux/skbuff.h>
40 #include <linux/init.h>
41 #include <linux/bitops.h>
42 #include <linux/proc_fs.h>
43 #include <linux/ethtool.h>
44 #include <net/sock.h>
45 #include <net/pkt_sched.h>
46 #include <net/arp.h>
47 #include <net/route.h>
48 #include <asm/io.h>
49 #include <asm/uaccess.h>
50 #include <asm-xen/evtchn.h>
51 #include <asm-xen/ctrl_if.h>
52 #include <asm-xen/xen-public/io/netif.h>
53 #include <asm-xen/balloon.h>
54 #include <asm/page.h>
55 #include <asm/uaccess.h>
57 #if defined(CONFIG_XEN_NETDEV_GRANT_TX) || defined(CONFIG_XEN_NETDEV_GRANT_RX)
58 #include <asm-xen/xen-public/grant_table.h>
59 #include <asm-xen/gnttab.h>
60 #ifdef GRANT_DEBUG
61 static void
62 dump_packet(int tag, void *addr, u32 ap)
63 {
64 unsigned char *p = (unsigned char *)ap;
65 int i;
67 printk(KERN_ALERT "#### rx_poll %c %08x ", tag & 0xff, addr);
68 for (i = 0; i < 20; i++) {
69 printk("%02x", p[i]);
70 }
71 printk("\n");
72 }
73 #endif
74 #endif
76 #ifndef __GFP_NOWARN
77 #define __GFP_NOWARN 0
78 #endif
79 #define alloc_xen_skb(_l) __dev_alloc_skb((_l), GFP_ATOMIC|__GFP_NOWARN)
81 #define init_skb_shinfo(_skb) \
82 do { \
83 atomic_set(&(skb_shinfo(_skb)->dataref), 1); \
84 skb_shinfo(_skb)->nr_frags = 0; \
85 skb_shinfo(_skb)->frag_list = NULL; \
86 } while (0)
88 /* Allow headroom on each rx pkt for Ethernet header, alignment padding, ... */
89 #define RX_HEADROOM 200
91 /*
92 * If the backend driver is pipelining transmit requests then we can be very
93 * aggressive in avoiding new-packet notifications -- only need to send a
94 * notification if there are no outstanding unreceived responses.
95 * If the backend may be buffering our transmit buffers for any reason then we
96 * are rather more conservative.
97 */
98 #ifdef CONFIG_XEN_NETDEV_FRONTEND_PIPELINED_TRANSMITTER
99 #define TX_TEST_IDX resp_prod /* aggressive: any outstanding responses? */
100 #else
101 #define TX_TEST_IDX req_cons /* conservative: not seen all our requests? */
102 #endif
104 #ifdef CONFIG_XEN_NETDEV_GRANT_TX
105 static grant_ref_t gref_tx_head, gref_tx_terminal;
106 static grant_ref_t grant_tx_ref[NETIF_TX_RING_SIZE + 1];
107 #endif
109 #ifdef CONFIG_XEN_NETDEV_GRANT_RX
110 static grant_ref_t gref_rx_head, gref_rx_terminal;
111 static grant_ref_t grant_rx_ref[NETIF_RX_RING_SIZE + 1];
112 #endif
114 #if defined(CONFIG_XEN_NETDEV_GRANT_TX) || defined(CONFIG_XEN_NETDEV_GRANT_RX)
115 static domid_t rdomid = 0;
116 #define GRANT_INVALID_REF (0xFFFF)
117 #endif
119 static void network_tx_buf_gc(struct net_device *dev);
120 static void network_alloc_rx_buffers(struct net_device *dev);
122 static unsigned long rx_pfn_array[NETIF_RX_RING_SIZE];
123 static multicall_entry_t rx_mcl[NETIF_RX_RING_SIZE+1];
124 static mmu_update_t rx_mmu[NETIF_RX_RING_SIZE];
126 #ifdef CONFIG_PROC_FS
127 static int xennet_proc_init(void);
128 static int xennet_proc_addif(struct net_device *dev);
129 static void xennet_proc_delif(struct net_device *dev);
130 #else
131 #define xennet_proc_init() (0)
132 #define xennet_proc_addif(d) (0)
133 #define xennet_proc_delif(d) ((void)0)
134 #endif
136 static struct list_head dev_list;
138 struct net_private
139 {
140 struct list_head list;
141 struct net_device *dev;
143 struct net_device_stats stats;
144 NETIF_RING_IDX rx_resp_cons, tx_resp_cons;
145 unsigned int tx_full;
147 netif_tx_interface_t *tx;
148 netif_rx_interface_t *rx;
150 spinlock_t tx_lock;
151 spinlock_t rx_lock;
153 unsigned int handle;
154 unsigned int evtchn;
156 /* What is the status of our connection to the remote backend? */
157 #define BEST_CLOSED 0
158 #define BEST_DISCONNECTED 1
159 #define BEST_CONNECTED 2
160 unsigned int backend_state;
162 /* Is this interface open or closed (down or up)? */
163 #define UST_CLOSED 0
164 #define UST_OPEN 1
165 unsigned int user_state;
167 /* Receive-ring batched refills. */
168 #define RX_MIN_TARGET 8
169 #define RX_MAX_TARGET NETIF_RX_RING_SIZE
170 int rx_min_target, rx_max_target, rx_target;
171 struct sk_buff_head rx_batch;
173 /*
174 * {tx,rx}_skbs store outstanding skbuffs. The first entry in each
175 * array is an index into a chain of free entries.
176 */
177 struct sk_buff *tx_skbs[NETIF_TX_RING_SIZE+1];
178 struct sk_buff *rx_skbs[NETIF_RX_RING_SIZE+1];
179 };
181 /* Access macros for acquiring freeing slots in {tx,rx}_skbs[]. */
182 #define ADD_ID_TO_FREELIST(_list, _id) \
183 (_list)[(_id)] = (_list)[0]; \
184 (_list)[0] = (void *)(unsigned long)(_id);
185 #define GET_ID_FROM_FREELIST(_list) \
186 ({ unsigned long _id = (unsigned long)(_list)[0]; \
187 (_list)[0] = (_list)[_id]; \
188 (unsigned short)_id; })
190 static char *status_name[] = {
191 [NETIF_INTERFACE_STATUS_CLOSED] = "closed",
192 [NETIF_INTERFACE_STATUS_DISCONNECTED] = "disconnected",
193 [NETIF_INTERFACE_STATUS_CONNECTED] = "connected",
194 [NETIF_INTERFACE_STATUS_CHANGED] = "changed",
195 };
197 static char *be_state_name[] = {
198 [BEST_CLOSED] = "closed",
199 [BEST_DISCONNECTED] = "disconnected",
200 [BEST_CONNECTED] = "connected",
201 };
203 #ifdef DEBUG
204 #define DPRINTK(fmt, args...) \
205 printk(KERN_ALERT "xen_net (%s:%d) " fmt, __FUNCTION__, __LINE__, ##args)
206 #else
207 #define DPRINTK(fmt, args...) ((void)0)
208 #endif
209 #define IPRINTK(fmt, args...) \
210 printk(KERN_INFO "xen_net: " fmt, ##args)
211 #define WPRINTK(fmt, args...) \
212 printk(KERN_WARNING "xen_net: " fmt, ##args)
214 static struct net_device *find_dev_by_handle(unsigned int handle)
215 {
216 struct list_head *ent;
217 struct net_private *np;
218 list_for_each (ent, &dev_list) {
219 np = list_entry(ent, struct net_private, list);
220 if (np->handle == handle)
221 return np->dev;
222 }
223 return NULL;
224 }
226 /** Network interface info. */
227 struct netif_ctrl {
228 /** Number of interfaces. */
229 int interface_n;
230 /** Number of connected interfaces. */
231 int connected_n;
232 /** Error code. */
233 int err;
234 int up;
235 };
237 static struct netif_ctrl netctrl;
239 static void netctrl_init(void)
240 {
241 memset(&netctrl, 0, sizeof(netctrl));
242 netctrl.up = NETIF_DRIVER_STATUS_DOWN;
243 }
245 /** Get or set a network interface error.
246 */
247 static int netctrl_err(int err)
248 {
249 if ((err < 0) && !netctrl.err)
250 netctrl.err = err;
251 return netctrl.err;
252 }
254 /** Test if all network interfaces are connected.
255 *
256 * @return 1 if all connected, 0 if not, negative error code otherwise
257 */
258 static int netctrl_connected(void)
259 {
260 int ok;
262 if (netctrl.err)
263 ok = netctrl.err;
264 else if (netctrl.up == NETIF_DRIVER_STATUS_UP)
265 ok = (netctrl.connected_n == netctrl.interface_n);
266 else
267 ok = 0;
269 return ok;
270 }
272 /** Count the connected network interfaces.
273 *
274 * @return connected count
275 */
276 static int netctrl_connected_count(void)
277 {
279 struct list_head *ent;
280 struct net_private *np;
281 unsigned int connected;
283 connected = 0;
285 list_for_each(ent, &dev_list) {
286 np = list_entry(ent, struct net_private, list);
287 if (np->backend_state == BEST_CONNECTED)
288 connected++;
289 }
291 netctrl.connected_n = connected;
292 DPRINTK("> connected_n=%d interface_n=%d\n",
293 netctrl.connected_n, netctrl.interface_n);
294 return connected;
295 }
297 /** Send a packet on a net device to encourage switches to learn the
298 * MAC. We send a fake ARP request.
299 *
300 * @param dev device
301 * @return 0 on success, error code otherwise
302 */
303 static int send_fake_arp(struct net_device *dev)
304 {
305 struct sk_buff *skb;
306 u32 src_ip, dst_ip;
308 dst_ip = INADDR_BROADCAST;
309 src_ip = inet_select_addr(dev, dst_ip, RT_SCOPE_LINK);
311 /* No IP? Then nothing to do. */
312 if (src_ip == 0)
313 return 0;
315 skb = arp_create(ARPOP_REPLY, ETH_P_ARP,
316 dst_ip, dev, src_ip,
317 /*dst_hw*/ NULL, /*src_hw*/ NULL,
318 /*target_hw*/ dev->dev_addr);
319 if (skb == NULL)
320 return -ENOMEM;
322 return dev_queue_xmit(skb);
323 }
325 static int network_open(struct net_device *dev)
326 {
327 struct net_private *np = netdev_priv(dev);
329 memset(&np->stats, 0, sizeof(np->stats));
331 np->user_state = UST_OPEN;
333 network_alloc_rx_buffers(dev);
334 np->rx->event = np->rx_resp_cons + 1;
336 netif_start_queue(dev);
338 return 0;
339 }
341 static void network_tx_buf_gc(struct net_device *dev)
342 {
343 NETIF_RING_IDX i, prod;
344 unsigned short id;
345 struct net_private *np = netdev_priv(dev);
346 struct sk_buff *skb;
348 if (np->backend_state != BEST_CONNECTED)
349 return;
351 do {
352 prod = np->tx->resp_prod;
353 rmb(); /* Ensure we see responses up to 'rp'. */
355 for (i = np->tx_resp_cons; i != prod; i++) {
356 id = np->tx->ring[MASK_NETIF_TX_IDX(i)].resp.id;
357 skb = np->tx_skbs[id];
358 #ifdef CONFIG_XEN_NETDEV_GRANT_TX
359 if (unlikely(gnttab_query_foreign_access(grant_tx_ref[id]) != 0)) {
360 /* other domain is still using this grant - shouldn't happen
361 but if it does, we'll try to reclaim the grant later */
362 printk(KERN_ALERT "network_tx_buf_gc: warning -- grant "
363 "still in use by backend domain.\n");
364 goto out;
365 }
366 gnttab_end_foreign_access(grant_tx_ref[id], GNTMAP_readonly);
367 gnttab_release_grant_reference(&gref_tx_head, grant_tx_ref[id]);
368 grant_tx_ref[id] = GRANT_INVALID_REF;
369 #endif
370 ADD_ID_TO_FREELIST(np->tx_skbs, id);
371 dev_kfree_skb_irq(skb);
372 }
374 np->tx_resp_cons = prod;
376 /*
377 * Set a new event, then check for race with update of tx_cons. Note
378 * that it is essential to schedule a callback, no matter how few
379 * buffers are pending. Even if there is space in the transmit ring,
380 * higher layers may be blocked because too much data is outstanding:
381 * in such cases notification from Xen is likely to be the only kick
382 * that we'll get.
383 */
384 np->tx->event =
385 prod + ((np->tx->req_prod - prod) >> 1) + 1;
386 mb();
387 } while (prod != np->tx->resp_prod);
389 #ifdef CONFIG_XEN_NETDEV_GRANT_TX
390 out:
391 #endif
393 if (np->tx_full && ((np->tx->req_prod - prod) < NETIF_TX_RING_SIZE)) {
394 np->tx_full = 0;
395 if (np->user_state == UST_OPEN)
396 netif_wake_queue(dev);
397 }
398 }
401 static void network_alloc_rx_buffers(struct net_device *dev)
402 {
403 unsigned short id;
404 struct net_private *np = netdev_priv(dev);
405 struct sk_buff *skb;
406 int i, batch_target;
407 NETIF_RING_IDX req_prod = np->rx->req_prod;
408 #ifdef CONFIG_XEN_NETDEV_GRANT_RX
409 int ref;
410 #endif
412 if (unlikely(np->backend_state != BEST_CONNECTED))
413 return;
415 /*
416 * Allocate skbuffs greedily, even though we batch updates to the
417 * receive ring. This creates a less bursty demand on the memory allocator,
418 * so should reduce the chance of failed allocation requests both for
419 * ourself and for other kernel subsystems.
420 */
421 batch_target = np->rx_target - (req_prod - np->rx_resp_cons);
422 for (i = skb_queue_len(&np->rx_batch); i < batch_target; i++) {
423 if (unlikely((skb = alloc_xen_skb(dev->mtu + RX_HEADROOM)) == NULL))
424 break;
425 __skb_queue_tail(&np->rx_batch, skb);
426 }
428 /* Is the batch large enough to be worthwhile? */
429 if (i < (np->rx_target/2))
430 return;
432 for (i = 0; ; i++) {
433 if ((skb = __skb_dequeue(&np->rx_batch)) == NULL)
434 break;
436 skb->dev = dev;
438 id = GET_ID_FROM_FREELIST(np->rx_skbs);
440 np->rx_skbs[id] = skb;
442 np->rx->ring[MASK_NETIF_RX_IDX(req_prod + i)].req.id = id;
443 #ifdef CONFIG_XEN_NETDEV_GRANT_RX
444 if (unlikely((ref = gnttab_claim_grant_reference(&gref_rx_head,
445 gref_rx_terminal)) < 0)) {
446 printk(KERN_ALERT "#### netfront can't claim rx reference\n");
447 BUG();
448 }
449 grant_rx_ref[id] = ref;
450 gnttab_grant_foreign_transfer_ref(ref, rdomid,
451 virt_to_mfn(skb->head));
452 np->rx->ring[MASK_NETIF_RX_IDX(req_prod + i)].req.gref = ref;
453 #endif
454 rx_pfn_array[i] = virt_to_mfn(skb->head);
456 /* Remove this page from pseudo phys map before passing back to Xen. */
457 phys_to_machine_mapping[__pa(skb->head) >> PAGE_SHIFT]
458 = INVALID_P2M_ENTRY;
460 MULTI_update_va_mapping(rx_mcl+i, (unsigned long)skb->head,
461 __pte(0), 0);
462 }
464 /* After all PTEs have been zapped we blow away stale TLB entries. */
465 rx_mcl[i-1].args[MULTI_UVMFLAGS_INDEX] = UVMF_TLB_FLUSH|UVMF_ALL;
467 /* Give away a batch of pages. */
468 rx_mcl[i].op = __HYPERVISOR_dom_mem_op;
469 rx_mcl[i].args[0] = MEMOP_decrease_reservation;
470 rx_mcl[i].args[1] = (unsigned long)rx_pfn_array;
471 rx_mcl[i].args[2] = (unsigned long)i;
472 rx_mcl[i].args[3] = 0;
473 rx_mcl[i].args[4] = DOMID_SELF;
475 /* Tell the ballon driver what is going on. */
476 balloon_update_driver_allowance(i);
478 /* Zap PTEs and give away pages in one big multicall. */
479 (void)HYPERVISOR_multicall(rx_mcl, i+1);
481 /* Check return status of HYPERVISOR_dom_mem_op(). */
482 if (unlikely(rx_mcl[i].result != i))
483 panic("Unable to reduce memory reservation\n");
485 /* Above is a suitable barrier to ensure backend will see requests. */
486 np->rx->req_prod = req_prod + i;
488 /* Adjust our floating fill target if we risked running out of buffers. */
489 if (((req_prod - np->rx->resp_prod) < (np->rx_target / 4)) &&
490 ((np->rx_target *= 2) > np->rx_max_target))
491 np->rx_target = np->rx_max_target;
492 }
495 static int network_start_xmit(struct sk_buff *skb, struct net_device *dev)
496 {
497 unsigned short id;
498 struct net_private *np = netdev_priv(dev);
499 netif_tx_request_t *tx;
500 NETIF_RING_IDX i;
501 #ifdef CONFIG_XEN_NETDEV_GRANT_TX
502 unsigned int ref;
503 unsigned long mfn;
504 #endif
506 if (unlikely(np->tx_full)) {
507 printk(KERN_ALERT "%s: full queue wasn't stopped!\n", dev->name);
508 netif_stop_queue(dev);
509 goto drop;
510 }
512 if (unlikely((((unsigned long)skb->data & ~PAGE_MASK) + skb->len) >=
513 PAGE_SIZE)) {
514 struct sk_buff *nskb;
515 if (unlikely((nskb = alloc_xen_skb(skb->len)) == NULL))
516 goto drop;
517 skb_put(nskb, skb->len);
518 memcpy(nskb->data, skb->data, skb->len);
519 nskb->dev = skb->dev;
520 dev_kfree_skb(skb);
521 skb = nskb;
522 }
524 spin_lock_irq(&np->tx_lock);
526 if (np->backend_state != BEST_CONNECTED) {
527 spin_unlock_irq(&np->tx_lock);
528 goto drop;
529 }
531 i = np->tx->req_prod;
533 id = GET_ID_FROM_FREELIST(np->tx_skbs);
534 np->tx_skbs[id] = skb;
536 tx = &np->tx->ring[MASK_NETIF_TX_IDX(i)].req;
538 tx->id = id;
539 #ifdef CONFIG_XEN_NETDEV_GRANT_TX
540 if (unlikely((ref = gnttab_claim_grant_reference(&gref_tx_head,
541 gref_tx_terminal)) < 0)) {
542 printk(KERN_ALERT "#### netfront can't claim tx grant reference\n");
543 BUG();
544 }
545 mfn = virt_to_mfn(skb->data);
546 gnttab_grant_foreign_access_ref(ref, rdomid, mfn, GNTMAP_readonly);
547 tx->addr = ref << PAGE_SHIFT;
548 grant_tx_ref[id] = ref;
549 #else
550 tx->addr = virt_to_mfn(skb->data) << PAGE_SHIFT;
551 #endif
552 tx->addr |= (unsigned long)skb->data & ~PAGE_MASK;
553 tx->size = skb->len;
554 tx->csum_blank = (skb->ip_summed == CHECKSUM_HW);
556 wmb(); /* Ensure that backend will see the request. */
557 np->tx->req_prod = i + 1;
559 network_tx_buf_gc(dev);
561 if ((i - np->tx_resp_cons) == (NETIF_TX_RING_SIZE - 1)) {
562 np->tx_full = 1;
563 netif_stop_queue(dev);
564 }
566 spin_unlock_irq(&np->tx_lock);
568 np->stats.tx_bytes += skb->len;
569 np->stats.tx_packets++;
571 /* Only notify Xen if we really have to. */
572 mb();
573 if (np->tx->TX_TEST_IDX == i)
574 notify_via_evtchn(np->evtchn);
576 return 0;
578 drop:
579 np->stats.tx_dropped++;
580 dev_kfree_skb(skb);
581 return 0;
582 }
584 static irqreturn_t netif_int(int irq, void *dev_id, struct pt_regs *ptregs)
585 {
586 struct net_device *dev = dev_id;
587 struct net_private *np = netdev_priv(dev);
588 unsigned long flags;
590 spin_lock_irqsave(&np->tx_lock, flags);
591 network_tx_buf_gc(dev);
592 spin_unlock_irqrestore(&np->tx_lock, flags);
594 if ((np->rx_resp_cons != np->rx->resp_prod) && (np->user_state == UST_OPEN))
595 netif_rx_schedule(dev);
597 return IRQ_HANDLED;
598 }
601 static int netif_poll(struct net_device *dev, int *pbudget)
602 {
603 struct net_private *np = netdev_priv(dev);
604 struct sk_buff *skb, *nskb;
605 netif_rx_response_t *rx;
606 NETIF_RING_IDX i, rp;
607 mmu_update_t *mmu = rx_mmu;
608 multicall_entry_t *mcl = rx_mcl;
609 int work_done, budget, more_to_do = 1;
610 struct sk_buff_head rxq;
611 unsigned long flags;
612 #ifdef CONFIG_XEN_NETDEV_GRANT_RX
613 unsigned long mfn;
614 grant_ref_t ref;
615 #endif
617 spin_lock(&np->rx_lock);
619 if (np->backend_state != BEST_CONNECTED) {
620 spin_unlock(&np->rx_lock);
621 return 0;
622 }
624 skb_queue_head_init(&rxq);
626 if ((budget = *pbudget) > dev->quota)
627 budget = dev->quota;
628 rp = np->rx->resp_prod;
629 rmb(); /* Ensure we see queued responses up to 'rp'. */
631 for (i = np->rx_resp_cons, work_done = 0;
632 (i != rp) && (work_done < budget);
633 i++, work_done++) {
634 rx = &np->rx->ring[MASK_NETIF_RX_IDX(i)].resp;
635 /*
636 * An error here is very odd. Usually indicates a backend bug,
637 * low-memory condition, or that we didn't have reservation headroom.
638 */
639 if (unlikely(rx->status <= 0)) {
640 if (net_ratelimit())
641 printk(KERN_WARNING "Bad rx buffer (memory squeeze?).\n");
642 np->rx->ring[MASK_NETIF_RX_IDX(np->rx->req_prod)].req.id = rx->id;
643 wmb();
644 np->rx->req_prod++;
645 work_done--;
646 continue;
647 }
649 #ifdef CONFIG_XEN_NETDEV_GRANT_RX
650 ref = grant_rx_ref[rx->id];
651 grant_rx_ref[rx->id] = GRANT_INVALID_REF;
652 mfn = gnttab_end_foreign_transfer(ref);
653 gnttab_release_grant_reference(&gref_rx_head, ref);
654 #endif
656 skb = np->rx_skbs[rx->id];
657 ADD_ID_TO_FREELIST(np->rx_skbs, rx->id);
659 /* NB. We handle skb overflow later. */
660 #ifdef CONFIG_XEN_NETDEV_GRANT_RX
661 skb->data = skb->head + rx->addr;
662 #else
663 skb->data = skb->head + (rx->addr & ~PAGE_MASK);
664 #endif
665 skb->len = rx->status;
666 skb->tail = skb->data + skb->len;
668 if ( rx->csum_valid )
669 skb->ip_summed = CHECKSUM_UNNECESSARY;
671 np->stats.rx_packets++;
672 np->stats.rx_bytes += rx->status;
674 /* Remap the page. */
675 #ifdef CONFIG_XEN_NETDEV_GRANT_RX
676 mmu->ptr = mfn << PAGE_SHIFT | MMU_MACHPHYS_UPDATE;
677 #else
678 mmu->ptr = (rx->addr & PAGE_MASK) | MMU_MACHPHYS_UPDATE;
679 #endif
680 mmu->val = __pa(skb->head) >> PAGE_SHIFT;
681 mmu++;
682 #ifdef CONFIG_XEN_NETDEV_GRANT_RX
683 MULTI_update_va_mapping(mcl, (unsigned long)skb->head,
684 pfn_pte_ma(mfn, PAGE_KERNEL), 0);
685 #else
686 MULTI_update_va_mapping(mcl, (unsigned long)skb->head,
687 pfn_pte_ma(rx->addr >> PAGE_SHIFT,
688 PAGE_KERNEL), 0);
689 #endif
690 mcl++;
692 #ifdef CONFIG_XEN_NETDEV_GRANT_RX
693 phys_to_machine_mapping[__pa(skb->head) >> PAGE_SHIFT] = mfn;
694 #else
695 phys_to_machine_mapping[__pa(skb->head) >> PAGE_SHIFT] =
696 rx->addr >> PAGE_SHIFT;
697 #endif
699 #ifdef GRANT_DEBUG
700 printk(KERN_ALERT "#### rx_poll enqueue vdata=%p mfn=%lu ref=%x\n",
701 skb->data, mfn, ref);
702 #endif
703 __skb_queue_tail(&rxq, skb);
704 }
706 /* Some pages are no longer absent... */
707 balloon_update_driver_allowance(-work_done);
709 /* Do all the remapping work, and M->P updates, in one big hypercall. */
710 if (likely((mcl - rx_mcl) != 0)) {
711 mcl->op = __HYPERVISOR_mmu_update;
712 mcl->args[0] = (unsigned long)rx_mmu;
713 mcl->args[1] = mmu - rx_mmu;
714 mcl->args[2] = 0;
715 mcl->args[3] = DOMID_SELF;
716 mcl++;
717 (void)HYPERVISOR_multicall(rx_mcl, mcl - rx_mcl);
718 }
720 while ((skb = __skb_dequeue(&rxq)) != NULL) {
721 #ifdef GRANT_DEBUG
722 printk(KERN_ALERT "#### rx_poll dequeue vdata=%p mfn=%lu\n",
723 skb->data, virt_to_mfn(skb->data));
724 dump_packet('d', skb->data, (unsigned long)skb->data);
725 #endif
726 /*
727 * Enough room in skbuff for the data we were passed? Also, Linux
728 * expects at least 16 bytes headroom in each receive buffer.
729 */
730 if (unlikely(skb->tail > skb->end) ||
731 unlikely((skb->data - skb->head) < 16)) {
732 nskb = NULL;
735 /* Only copy the packet if it fits in the current MTU. */
736 if (skb->len <= (dev->mtu + ETH_HLEN)) {
737 if ((skb->tail > skb->end) && net_ratelimit())
738 printk(KERN_INFO "Received packet needs %zd bytes more "
739 "headroom.\n", skb->tail - skb->end);
741 if ((nskb = alloc_xen_skb(skb->len + 2)) != NULL) {
742 skb_reserve(nskb, 2);
743 skb_put(nskb, skb->len);
744 memcpy(nskb->data, skb->data, skb->len);
745 nskb->dev = skb->dev;
746 }
747 }
748 else if (net_ratelimit())
749 printk(KERN_INFO "Received packet too big for MTU "
750 "(%d > %d)\n", skb->len - ETH_HLEN, dev->mtu);
752 /* Reinitialise and then destroy the old skbuff. */
753 skb->len = 0;
754 skb->tail = skb->data;
755 init_skb_shinfo(skb);
756 dev_kfree_skb(skb);
758 /* Switch old for new, if we copied the buffer. */
759 if ((skb = nskb) == NULL)
760 continue;
761 }
763 /* Set the shared-info area, which is hidden behind the real data. */
764 init_skb_shinfo(skb);
765 /* Ethernet-specific work. Delayed to here as it peeks the header. */
766 skb->protocol = eth_type_trans(skb, dev);
768 /* Pass it up. */
769 netif_receive_skb(skb);
770 dev->last_rx = jiffies;
771 }
773 np->rx_resp_cons = i;
775 /* If we get a callback with very few responses, reduce fill target. */
776 /* NB. Note exponential increase, linear decrease. */
777 if (((np->rx->req_prod - np->rx->resp_prod) > ((3*np->rx_target) / 4)) &&
778 (--np->rx_target < np->rx_min_target))
779 np->rx_target = np->rx_min_target;
781 network_alloc_rx_buffers(dev);
783 *pbudget -= work_done;
784 dev->quota -= work_done;
786 if (work_done < budget) {
787 local_irq_save(flags);
789 np->rx->event = i + 1;
791 /* Deal with hypervisor racing our resetting of rx_event. */
792 mb();
793 if (np->rx->resp_prod == i) {
794 __netif_rx_complete(dev);
795 more_to_do = 0;
796 }
798 local_irq_restore(flags);
799 }
801 spin_unlock(&np->rx_lock);
803 return more_to_do;
804 }
807 static int network_close(struct net_device *dev)
808 {
809 struct net_private *np = netdev_priv(dev);
810 np->user_state = UST_CLOSED;
811 netif_stop_queue(np->dev);
812 return 0;
813 }
816 static struct net_device_stats *network_get_stats(struct net_device *dev)
817 {
818 struct net_private *np = netdev_priv(dev);
819 return &np->stats;
820 }
823 static void network_connect(struct net_device *dev,
824 netif_fe_interface_status_t *status)
825 {
826 struct net_private *np;
827 int i, requeue_idx;
828 netif_tx_request_t *tx;
830 np = netdev_priv(dev);
831 spin_lock_irq(&np->tx_lock);
832 spin_lock(&np->rx_lock);
834 /* Recovery procedure: */
836 /* Step 1: Reinitialise variables. */
837 np->rx_resp_cons = np->tx_resp_cons = np->tx_full = 0;
838 np->rx->event = np->tx->event = 1;
840 /* Step 2: Rebuild the RX and TX ring contents.
841 * NB. We could just free the queued TX packets now but we hope
842 * that sending them out might do some good. We have to rebuild
843 * the RX ring because some of our pages are currently flipped out
844 * so we can't just free the RX skbs.
845 * NB2. Freelist index entries are always going to be less than
846 * __PAGE_OFFSET, whereas pointers to skbs will always be equal or
847 * greater than __PAGE_OFFSET: we use this property to distinguish
848 * them.
849 */
851 /* Rebuild the TX buffer freelist and the TX ring itself.
852 * NB. This reorders packets. We could keep more private state
853 * to avoid this but maybe it doesn't matter so much given the
854 * interface has been down.
855 */
856 for (requeue_idx = 0, i = 1; i <= NETIF_TX_RING_SIZE; i++) {
857 if ((unsigned long)np->tx_skbs[i] >= __PAGE_OFFSET) {
858 struct sk_buff *skb = np->tx_skbs[i];
860 tx = &np->tx->ring[requeue_idx++].req;
862 tx->id = i;
863 #ifdef CONFIG_XEN_NETDEV_GRANT_TX
864 tx->addr = 0; /*(ref << PAGE_SHIFT) |*/
865 #else
866 tx->addr = virt_to_mfn(skb->data) << PAGE_SHIFT;
867 #endif
868 tx->addr |= (unsigned long)skb->data & ~PAGE_MASK;
869 tx->size = skb->len;
871 np->stats.tx_bytes += skb->len;
872 np->stats.tx_packets++;
873 }
874 }
875 wmb();
876 np->tx->req_prod = requeue_idx;
878 /* Rebuild the RX buffer freelist and the RX ring itself. */
879 for (requeue_idx = 0, i = 1; i <= NETIF_RX_RING_SIZE; i++)
880 if ((unsigned long)np->rx_skbs[i] >= __PAGE_OFFSET)
881 np->rx->ring[requeue_idx++].req.id = i;
882 wmb();
883 np->rx->req_prod = requeue_idx;
885 /* Step 3: All public and private state should now be sane. Get
886 * ready to start sending and receiving packets and give the driver
887 * domain a kick because we've probably just requeued some
888 * packets.
889 */
890 np->backend_state = BEST_CONNECTED;
891 wmb();
892 notify_via_evtchn(status->evtchn);
893 network_tx_buf_gc(dev);
895 if (np->user_state == UST_OPEN)
896 netif_start_queue(dev);
898 spin_unlock(&np->rx_lock);
899 spin_unlock_irq(&np->tx_lock);
900 }
902 static void vif_show(struct net_private *np)
903 {
904 #ifdef DEBUG
905 if (np) {
906 IPRINTK("<vif handle=%u %s(%s) evtchn=%u tx=%p rx=%p>\n",
907 np->handle,
908 be_state_name[np->backend_state],
909 np->user_state ? "open" : "closed",
910 np->evtchn,
911 np->tx,
912 np->rx);
913 } else {
914 IPRINTK("<vif NULL>\n");
915 }
916 #endif
917 }
919 /* Send a connect message to xend to tell it to bring up the interface. */
920 static void send_interface_connect(struct net_private *np)
921 {
922 ctrl_msg_t cmsg = {
923 .type = CMSG_NETIF_FE,
924 .subtype = CMSG_NETIF_FE_INTERFACE_CONNECT,
925 .length = sizeof(netif_fe_interface_connect_t),
926 };
927 netif_fe_interface_connect_t *msg = (void*)cmsg.msg;
929 msg->handle = np->handle;
930 msg->tx_shmem_frame = virt_to_mfn(np->tx);
931 #ifdef CONFIG_XEN_NETDEV_GRANT_TX
932 msg->tx_shmem_ref = (u32)gnttab_claim_grant_reference(&gref_tx_head,
933 gref_tx_terminal);
934 if(msg->tx_shmem_ref < 0) {
935 printk(KERN_ALERT "#### netfront can't claim tx_shmem reference\n");
936 BUG();
937 }
938 gnttab_grant_foreign_access_ref (msg->tx_shmem_ref, rdomid,
939 msg->tx_shmem_frame, 0);
940 #endif
942 msg->rx_shmem_frame = virt_to_mfn(np->rx);
943 #ifdef CONFIG_XEN_NETDEV_GRANT_RX
944 msg->rx_shmem_ref = (u32)gnttab_claim_grant_reference(&gref_rx_head,
945 gref_rx_terminal);
946 if(msg->rx_shmem_ref < 0) {
947 printk(KERN_ALERT "#### netfront can't claim rx_shmem reference\n");
948 BUG();
949 }
950 gnttab_grant_foreign_access_ref (msg->rx_shmem_ref, rdomid,
951 msg->rx_shmem_frame, 0);
952 #endif
954 ctrl_if_send_message_block(&cmsg, NULL, 0, TASK_UNINTERRUPTIBLE);
955 }
957 /* Send a driver status notification to the domain controller. */
958 static int send_driver_status(int ok)
959 {
960 int err = 0;
961 ctrl_msg_t cmsg = {
962 .type = CMSG_NETIF_FE,
963 .subtype = CMSG_NETIF_FE_DRIVER_STATUS,
964 .length = sizeof(netif_fe_driver_status_t),
965 };
966 netif_fe_driver_status_t *msg = (void*)cmsg.msg;
968 msg->status = (ok ? NETIF_DRIVER_STATUS_UP : NETIF_DRIVER_STATUS_DOWN);
969 err = ctrl_if_send_message_block(&cmsg, NULL, 0, TASK_UNINTERRUPTIBLE);
970 return err;
971 }
973 /* Stop network device and free tx/rx queues and irq.
974 */
975 static void vif_release(struct net_private *np)
976 {
977 /* Stop old i/f to prevent errors whilst we rebuild the state. */
978 spin_lock_irq(&np->tx_lock);
979 spin_lock(&np->rx_lock);
980 netif_stop_queue(np->dev);
981 /* np->backend_state = BEST_DISCONNECTED; */
982 spin_unlock(&np->rx_lock);
983 spin_unlock_irq(&np->tx_lock);
985 /* Free resources. */
986 if ( np->tx != NULL )
987 {
988 unbind_evtchn_from_irqhandler(np->evtchn, np->dev);
989 free_page((unsigned long)np->tx);
990 free_page((unsigned long)np->rx);
991 np->evtchn = 0;
992 np->tx = NULL;
993 np->rx = NULL;
994 }
995 }
997 /* Release vif resources and close it down completely.
998 */
999 static void vif_close(struct net_private *np)
1001 WPRINTK("Unexpected netif-CLOSED message in state %s\n",
1002 be_state_name[np->backend_state]);
1003 vif_release(np);
1004 np->backend_state = BEST_CLOSED;
1005 /* todo: take dev down and free. */
1006 vif_show(np);
1009 /* Move the vif into disconnected state.
1010 * Allocates tx/rx pages.
1011 * Sends connect message to xend.
1012 */
1013 static void vif_disconnect(struct net_private *np)
1015 if(np->tx) free_page((unsigned long)np->tx);
1016 if(np->rx) free_page((unsigned long)np->rx);
1017 // Before this np->tx and np->rx had better be null.
1018 np->tx = (netif_tx_interface_t *)__get_free_page(GFP_KERNEL);
1019 np->rx = (netif_rx_interface_t *)__get_free_page(GFP_KERNEL);
1020 memset(np->tx, 0, PAGE_SIZE);
1021 memset(np->rx, 0, PAGE_SIZE);
1022 np->backend_state = BEST_DISCONNECTED;
1023 send_interface_connect(np);
1024 vif_show(np);
1027 /* Begin interface recovery.
1029 * NB. Whilst we're recovering, we turn the carrier state off. We
1030 * take measures to ensure that this device isn't used for
1031 * anything. We also stop the queue for this device. Various
1032 * different approaches (e.g. continuing to buffer packets) have
1033 * been tested but don't appear to improve the overall impact on
1034 * TCP connections.
1036 * TODO: (MAW) Change the Xend<->Guest protocol so that a recovery
1037 * is initiated by a special "RESET" message - disconnect could
1038 * just mean we're not allowed to use this interface any more.
1039 */
1040 static void vif_reset(struct net_private *np)
1042 IPRINTK("Attempting to reconnect network interface: handle=%u\n",
1043 np->handle);
1044 vif_release(np);
1045 vif_disconnect(np);
1046 vif_show(np);
1049 /* Move the vif into connected state.
1050 * Sets the mac and event channel from the message.
1051 * Binds the irq to the event channel.
1052 */
1053 static void
1054 vif_connect(struct net_private *np, netif_fe_interface_status_t *status)
1056 struct net_device *dev = np->dev;
1057 memcpy(dev->dev_addr, status->mac, ETH_ALEN);
1058 network_connect(dev, status);
1059 np->evtchn = status->evtchn;
1060 #if defined(CONFIG_XEN_NETDEV_GRANT_TX) || defined(CONFIG_XEN_NETDEV_GRANT_RX)
1061 rdomid = status->domid;
1062 #endif
1063 (void)bind_evtchn_to_irqhandler(
1064 np->evtchn, netif_int, SA_SAMPLE_RANDOM, dev->name, dev);
1065 netctrl_connected_count();
1066 (void)send_fake_arp(dev);
1067 vif_show(np);
1070 static struct ethtool_ops network_ethtool_ops =
1072 .get_tx_csum = ethtool_op_get_tx_csum,
1073 .set_tx_csum = ethtool_op_set_tx_csum,
1074 };
1076 /** Create a network device.
1077 * @param handle device handle
1078 * @param val return parameter for created device
1079 * @return 0 on success, error code otherwise
1080 */
1081 static int create_netdev(int handle, struct net_device **val)
1083 int i, err = 0;
1084 struct net_device *dev = NULL;
1085 struct net_private *np = NULL;
1087 if ((dev = alloc_etherdev(sizeof(struct net_private))) == NULL) {
1088 printk(KERN_WARNING "%s> alloc_etherdev failed.\n", __FUNCTION__);
1089 err = -ENOMEM;
1090 goto exit;
1093 np = netdev_priv(dev);
1094 np->backend_state = BEST_CLOSED;
1095 np->user_state = UST_CLOSED;
1096 np->handle = handle;
1098 spin_lock_init(&np->tx_lock);
1099 spin_lock_init(&np->rx_lock);
1101 skb_queue_head_init(&np->rx_batch);
1102 np->rx_target = RX_MIN_TARGET;
1103 np->rx_min_target = RX_MIN_TARGET;
1104 np->rx_max_target = RX_MAX_TARGET;
1106 /* Initialise {tx,rx}_skbs to be a free chain containing every entry. */
1107 for (i = 0; i <= NETIF_TX_RING_SIZE; i++) {
1108 np->tx_skbs[i] = (void *)((unsigned long) i+1);
1109 #ifdef CONFIG_XEN_NETDEV_GRANT_TX
1110 grant_tx_ref[i] = GRANT_INVALID_REF;
1111 #endif
1113 for (i = 0; i <= NETIF_RX_RING_SIZE; i++) {
1114 np->rx_skbs[i] = (void *)((unsigned long) i+1);
1115 #ifdef CONFIG_XEN_NETDEV_GRANT_RX
1116 grant_rx_ref[i] = GRANT_INVALID_REF;
1117 #endif
1120 dev->open = network_open;
1121 dev->hard_start_xmit = network_start_xmit;
1122 dev->stop = network_close;
1123 dev->get_stats = network_get_stats;
1124 dev->poll = netif_poll;
1125 dev->weight = 64;
1126 dev->features = NETIF_F_IP_CSUM;
1128 SET_ETHTOOL_OPS(dev, &network_ethtool_ops);
1130 if ((err = register_netdev(dev)) != 0) {
1131 printk(KERN_WARNING "%s> register_netdev err=%d\n", __FUNCTION__, err);
1132 goto exit;
1135 if ((err = xennet_proc_addif(dev)) != 0) {
1136 unregister_netdev(dev);
1137 goto exit;
1140 np->dev = dev;
1141 list_add(&np->list, &dev_list);
1143 exit:
1144 if ((err != 0) && (dev != NULL))
1145 kfree(dev);
1146 else if (val != NULL)
1147 *val = dev;
1148 return err;
1151 /* Get the target interface for a status message.
1152 * Creates the interface when it makes sense.
1153 * The returned interface may be null when there is no error.
1155 * @param status status message
1156 * @param np return parameter for interface state
1157 * @return 0 on success, error code otherwise
1158 */
1159 static int
1160 target_vif(netif_fe_interface_status_t *status, struct net_private **np)
1162 int err = 0;
1163 struct net_device *dev;
1165 DPRINTK("> handle=%d\n", status->handle);
1166 if (status->handle < 0) {
1167 err = -EINVAL;
1168 goto exit;
1171 if ((dev = find_dev_by_handle(status->handle)) != NULL)
1172 goto exit;
1174 if (status->status == NETIF_INTERFACE_STATUS_CLOSED)
1175 goto exit;
1176 if (status->status == NETIF_INTERFACE_STATUS_CHANGED)
1177 goto exit;
1179 /* It's a new interface in a good state - create it. */
1180 DPRINTK("> create device...\n");
1181 if ((err = create_netdev(status->handle, &dev)) != 0)
1182 goto exit;
1184 netctrl.interface_n++;
1186 exit:
1187 if (np != NULL)
1188 *np = ((dev && !err) ? netdev_priv(dev) : NULL);
1189 DPRINTK("< err=%d\n", err);
1190 return err;
1193 /* Handle an interface status message. */
1194 static void netif_interface_status(netif_fe_interface_status_t *status)
1196 int err = 0;
1197 struct net_private *np = NULL;
1199 DPRINTK("> status=%s handle=%d\n",
1200 status_name[status->status], status->handle);
1202 if ((err = target_vif(status, &np)) != 0) {
1203 WPRINTK("Invalid netif: handle=%u\n", status->handle);
1204 return;
1207 if (np == NULL) {
1208 DPRINTK("> no vif\n");
1209 return;
1212 switch (status->status) {
1213 case NETIF_INTERFACE_STATUS_CLOSED:
1214 switch (np->backend_state) {
1215 case BEST_CLOSED:
1216 case BEST_DISCONNECTED:
1217 case BEST_CONNECTED:
1218 vif_close(np);
1219 break;
1221 break;
1223 case NETIF_INTERFACE_STATUS_DISCONNECTED:
1224 switch (np->backend_state) {
1225 case BEST_CLOSED:
1226 vif_disconnect(np);
1227 break;
1228 case BEST_DISCONNECTED:
1229 case BEST_CONNECTED:
1230 vif_reset(np);
1231 break;
1233 break;
1235 case NETIF_INTERFACE_STATUS_CONNECTED:
1236 switch (np->backend_state) {
1237 case BEST_CLOSED:
1238 WPRINTK("Unexpected netif status %s in state %s\n",
1239 status_name[status->status],
1240 be_state_name[np->backend_state]);
1241 vif_disconnect(np);
1242 vif_connect(np, status);
1243 break;
1244 case BEST_DISCONNECTED:
1245 vif_connect(np, status);
1246 break;
1248 break;
1250 case NETIF_INTERFACE_STATUS_CHANGED:
1251 /*
1252 * The domain controller is notifying us that a device has been
1253 * added or removed.
1254 */
1255 break;
1257 default:
1258 WPRINTK("Invalid netif status code %d\n", status->status);
1259 break;
1262 vif_show(np);
1265 /*
1266 * Initialize the network control interface.
1267 */
1268 static void netif_driver_status(netif_fe_driver_status_t *status)
1270 netctrl.up = status->status;
1271 netctrl_connected_count();
1274 /* Receive handler for control messages. */
1275 static void netif_ctrlif_rx(ctrl_msg_t *msg, unsigned long id)
1278 switch (msg->subtype) {
1279 case CMSG_NETIF_FE_INTERFACE_STATUS:
1280 netif_interface_status((netif_fe_interface_status_t *) &msg->msg[0]);
1281 break;
1283 case CMSG_NETIF_FE_DRIVER_STATUS:
1284 netif_driver_status((netif_fe_driver_status_t *) &msg->msg[0]);
1285 break;
1287 default:
1288 msg->length = 0;
1289 break;
1292 ctrl_if_send_response(msg);
1296 #if 1
1297 /* Wait for all interfaces to be connected.
1299 * This works OK, but we'd like to use the probing mode (see below).
1300 */
1301 static int probe_interfaces(void)
1303 int err = 0, conn = 0;
1304 int wait_i, wait_n = 100;
1306 DPRINTK(">\n");
1308 for (wait_i = 0; wait_i < wait_n; wait_i++) {
1309 DPRINTK("> wait_i=%d\n", wait_i);
1310 conn = netctrl_connected();
1311 if(conn) break;
1312 DPRINTK("> schedule_timeout...\n");
1313 set_current_state(TASK_INTERRUPTIBLE);
1314 schedule_timeout(10);
1317 DPRINTK("> wait finished...\n");
1318 if (conn <= 0) {
1319 err = netctrl_err(-ENETDOWN);
1320 WPRINTK("Failed to connect all virtual interfaces: err=%d\n", err);
1323 DPRINTK("< err=%d\n", err);
1325 return err;
1327 #else
1328 /* Probe for interfaces until no more are found.
1330 * This is the mode we'd like to use, but at the moment it panics the kernel.
1331 */
1332 static int probe_interfaces(void)
1334 int err = 0;
1335 int wait_i, wait_n = 100;
1336 ctrl_msg_t cmsg = {
1337 .type = CMSG_NETIF_FE,
1338 .subtype = CMSG_NETIF_FE_INTERFACE_STATUS,
1339 .length = sizeof(netif_fe_interface_status_t),
1340 };
1341 netif_fe_interface_status_t msg = {};
1342 ctrl_msg_t rmsg = {};
1343 netif_fe_interface_status_t *reply = (void*)rmsg.msg;
1344 int state = TASK_UNINTERRUPTIBLE;
1345 u32 query = -1;
1347 DPRINTK(">\n");
1349 netctrl.interface_n = 0;
1350 for (wait_i = 0; wait_i < wait_n; wait_i++) {
1351 DPRINTK("> wait_i=%d query=%d\n", wait_i, query);
1352 msg.handle = query;
1353 memcpy(cmsg.msg, &msg, sizeof(msg));
1354 DPRINTK("> set_current_state...\n");
1355 set_current_state(state);
1356 DPRINTK("> rmsg=%p msg=%p, reply=%p\n", &rmsg, rmsg.msg, reply);
1357 DPRINTK("> sending...\n");
1358 err = ctrl_if_send_message_and_get_response(&cmsg, &rmsg, state);
1359 DPRINTK("> err=%d\n", err);
1360 if(err) goto exit;
1361 DPRINTK("> rmsg=%p msg=%p, reply=%p\n", &rmsg, rmsg.msg, reply);
1362 if((int)reply->handle < 0) {
1363 // No more interfaces.
1364 break;
1366 query = -reply->handle - 2;
1367 DPRINTK(">netif_interface_status ...\n");
1368 netif_interface_status(reply);
1371 exit:
1372 if (err) {
1373 err = netctrl_err(-ENETDOWN);
1374 WPRINTK("Connecting virtual network interfaces failed: err=%d\n", err);
1377 DPRINTK("< err=%d\n", err);
1378 return err;
1381 #endif
1383 /*
1384 * We use this notifier to send out a fake ARP reply to reset switches and
1385 * router ARP caches when an IP interface is brought up on a VIF.
1386 */
1387 static int
1388 inetdev_notify(struct notifier_block *this, unsigned long event, void *ptr)
1390 struct in_ifaddr *ifa = (struct in_ifaddr *)ptr;
1391 struct net_device *dev = ifa->ifa_dev->dev;
1392 struct list_head *ent;
1393 struct net_private *np;
1395 if (event != NETDEV_UP)
1396 goto out;
1398 list_for_each (ent, &dev_list) {
1399 np = list_entry(ent, struct net_private, list);
1400 if (np->dev == dev)
1401 (void)send_fake_arp(dev);
1404 out:
1405 return NOTIFY_DONE;
1408 static struct notifier_block notifier_inetdev = {
1409 .notifier_call = inetdev_notify,
1410 .next = NULL,
1411 .priority = 0
1412 };
1414 static int __init netif_init(void)
1416 int err = 0;
1418 if (xen_start_info.flags & SIF_INITDOMAIN)
1419 return 0;
1420 #ifdef CONFIG_XEN_NETDEV_GRANT_TX
1421 /* A grant for every ring slot, plus one for the ring itself */
1422 if (gnttab_alloc_grant_references(NETIF_TX_RING_SIZE + 1,
1423 &gref_tx_head, &gref_tx_terminal) < 0) {
1424 printk(KERN_ALERT "#### netfront can't alloc tx grant refs\n");
1425 return 1;
1427 printk(KERN_ALERT "Netdev frontend (TX) is using grant tables.\n");
1428 #endif
1429 #ifdef CONFIG_XEN_NETDEV_GRANT_RX
1430 /* A grant for every ring slot, plus one for the ring itself */
1431 if (gnttab_alloc_grant_references(NETIF_RX_RING_SIZE + 1,
1432 &gref_rx_head, &gref_rx_terminal) < 0) {
1433 printk(KERN_ALERT "#### netfront can't alloc rx grant refs\n");
1434 return 1;
1436 printk(KERN_ALERT "Netdev frontend (RX) is using grant tables.\n");
1437 #endif
1439 if ((err = xennet_proc_init()) != 0)
1440 return err;
1442 IPRINTK("Initialising virtual ethernet driver.\n");
1443 INIT_LIST_HEAD(&dev_list);
1444 (void)register_inetaddr_notifier(&notifier_inetdev);
1445 netctrl_init();
1446 (void)ctrl_if_register_receiver(CMSG_NETIF_FE, netif_ctrlif_rx,
1447 CALLBACK_IN_BLOCKING_CONTEXT);
1448 send_driver_status(1);
1449 err = probe_interfaces();
1450 if (err)
1451 ctrl_if_unregister_receiver(CMSG_NETIF_FE, netif_ctrlif_rx);
1453 DPRINTK("< err=%d\n", err);
1454 return err;
1457 static void netif_exit(void)
1459 #ifdef CONFIG_XEN_NETDEV_GRANT_TX
1460 gnttab_free_grant_references(gref_tx_head, gref_tx_terminal);
1461 #endif
1462 #ifdef CONFIG_XEN_NETDEV_GRANT_RX
1463 gnttab_free_grant_references(gref_rx_head, gref_rx_terminal);
1464 #endif
1467 static void vif_suspend(struct net_private *np)
1469 /* Avoid having tx/rx stuff happen until we're ready. */
1470 unbind_evtchn_from_irqhandler(np->evtchn, np->dev);
1473 static void vif_resume(struct net_private *np)
1475 /*
1476 * Connect regardless of whether IFF_UP flag set.
1477 * Stop bad things from happening until we're back up.
1478 */
1479 np->backend_state = BEST_DISCONNECTED;
1480 memset(np->tx, 0, PAGE_SIZE);
1481 memset(np->rx, 0, PAGE_SIZE);
1483 send_interface_connect(np);
1486 void netif_suspend(void)
1488 struct list_head *ent;
1489 struct net_private *np;
1491 list_for_each (ent, &dev_list) {
1492 np = list_entry(ent, struct net_private, list);
1493 vif_suspend(np);
1497 void netif_resume(void)
1499 struct list_head *ent;
1500 struct net_private *np;
1502 list_for_each (ent, &dev_list) {
1503 np = list_entry(ent, struct net_private, list);
1504 vif_resume(np);
1508 #ifdef CONFIG_PROC_FS
1510 #define TARGET_MIN 0UL
1511 #define TARGET_MAX 1UL
1512 #define TARGET_CUR 2UL
1514 static int xennet_proc_read(
1515 char *page, char **start, off_t off, int count, int *eof, void *data)
1517 struct net_device *dev = (struct net_device *)((unsigned long)data & ~3UL);
1518 struct net_private *np = netdev_priv(dev);
1519 int len = 0, which_target = (long)data & 3;
1521 switch (which_target)
1523 case TARGET_MIN:
1524 len = sprintf(page, "%d\n", np->rx_min_target);
1525 break;
1526 case TARGET_MAX:
1527 len = sprintf(page, "%d\n", np->rx_max_target);
1528 break;
1529 case TARGET_CUR:
1530 len = sprintf(page, "%d\n", np->rx_target);
1531 break;
1534 *eof = 1;
1535 return len;
1538 static int xennet_proc_write(
1539 struct file *file, const char __user *buffer,
1540 unsigned long count, void *data)
1542 struct net_device *dev = (struct net_device *)((unsigned long)data & ~3UL);
1543 struct net_private *np = netdev_priv(dev);
1544 int which_target = (long)data & 3;
1545 char string[64];
1546 long target;
1548 if (!capable(CAP_SYS_ADMIN))
1549 return -EPERM;
1551 if (count <= 1)
1552 return -EBADMSG; /* runt */
1553 if (count > sizeof(string))
1554 return -EFBIG; /* too long */
1556 if (copy_from_user(string, buffer, count))
1557 return -EFAULT;
1558 string[sizeof(string)-1] = '\0';
1560 target = simple_strtol(string, NULL, 10);
1561 if (target < RX_MIN_TARGET)
1562 target = RX_MIN_TARGET;
1563 if (target > RX_MAX_TARGET)
1564 target = RX_MAX_TARGET;
1566 spin_lock(&np->rx_lock);
1568 switch (which_target)
1570 case TARGET_MIN:
1571 if (target > np->rx_max_target)
1572 np->rx_max_target = target;
1573 np->rx_min_target = target;
1574 if (target > np->rx_target)
1575 np->rx_target = target;
1576 break;
1577 case TARGET_MAX:
1578 if (target < np->rx_min_target)
1579 np->rx_min_target = target;
1580 np->rx_max_target = target;
1581 if (target < np->rx_target)
1582 np->rx_target = target;
1583 break;
1584 case TARGET_CUR:
1585 break;
1588 network_alloc_rx_buffers(dev);
1590 spin_unlock(&np->rx_lock);
1592 return count;
1595 static int xennet_proc_init(void)
1597 if (proc_mkdir("xen/net", NULL) == NULL)
1598 return -ENOMEM;
1599 return 0;
1602 static int xennet_proc_addif(struct net_device *dev)
1604 struct proc_dir_entry *dir, *min, *max, *cur;
1605 char name[30];
1607 sprintf(name, "xen/net/%s", dev->name);
1609 dir = proc_mkdir(name, NULL);
1610 if (!dir)
1611 goto nomem;
1613 min = create_proc_entry("rxbuf_min", 0644, dir);
1614 max = create_proc_entry("rxbuf_max", 0644, dir);
1615 cur = create_proc_entry("rxbuf_cur", 0444, dir);
1616 if (!min || !max || !cur)
1617 goto nomem;
1619 min->read_proc = xennet_proc_read;
1620 min->write_proc = xennet_proc_write;
1621 min->data = (void *)((unsigned long)dev | TARGET_MIN);
1623 max->read_proc = xennet_proc_read;
1624 max->write_proc = xennet_proc_write;
1625 max->data = (void *)((unsigned long)dev | TARGET_MAX);
1627 cur->read_proc = xennet_proc_read;
1628 cur->write_proc = xennet_proc_write;
1629 cur->data = (void *)((unsigned long)dev | TARGET_CUR);
1631 return 0;
1633 nomem:
1634 xennet_proc_delif(dev);
1635 return -ENOMEM;
1638 static void xennet_proc_delif(struct net_device *dev)
1640 char name[30];
1642 sprintf(name, "xen/net/%s/rxbuf_min", dev->name);
1643 remove_proc_entry(name, NULL);
1645 sprintf(name, "xen/net/%s/rxbuf_max", dev->name);
1646 remove_proc_entry(name, NULL);
1648 sprintf(name, "xen/net/%s/rxbuf_cur", dev->name);
1649 remove_proc_entry(name, NULL);
1651 sprintf(name, "xen/net/%s", dev->name);
1652 remove_proc_entry(name, NULL);
1655 #endif
1657 module_init(netif_init);
1658 module_exit(netif_exit);