ia64/xen-unstable

view linux-2.6-xen-sparse/drivers/xen/netback/netback.c @ 6432:b54144915ae6

merge?
author cl349@firebug.cl.cam.ac.uk
date Thu Aug 25 16:26:30 2005 +0000 (2005-08-25)
parents 3428d58a85e1 522bc50588ed
children 0610add7c3fe
line source
1 /******************************************************************************
2 * drivers/xen/netback/netback.c
3 *
4 * Back-end of the driver for virtual network devices. This portion of the
5 * driver exports a 'unified' network-device interface that can be accessed
6 * by any operating system that implements a compatible front end. A
7 * reference front-end implementation can be found in:
8 * drivers/xen/netfront/netfront.c
9 *
10 * Copyright (c) 2002-2005, K A Fraser
11 */
13 #include "common.h"
14 #include <asm-xen/balloon.h>
16 #if defined(CONFIG_XEN_NETDEV_GRANT_TX) || defined(CONFIG_XEN_NETDEV_GRANT_RX)
17 #include <asm-xen/xen-public/grant_table.h>
18 #include <asm-xen/gnttab.h>
19 #ifdef GRANT_DEBUG
20 static void
21 dump_packet(int tag, u32 addr, unsigned char *p)
22 {
23 int i;
25 printk(KERN_ALERT "#### rx_action %c %08x ", tag & 0xff, addr);
26 for (i = 0; i < 20; i++) {
27 printk("%02x", p[i]);
28 }
29 printk("\n");
30 }
31 #endif
32 #endif
34 static void netif_idx_release(u16 pending_idx);
35 static void netif_page_release(struct page *page);
36 static void make_tx_response(netif_t *netif,
37 u16 id,
38 s8 st);
39 static int make_rx_response(netif_t *netif,
40 u16 id,
41 s8 st,
42 unsigned long addr,
43 u16 size,
44 u16 csum_valid);
46 static void net_tx_action(unsigned long unused);
47 static DECLARE_TASKLET(net_tx_tasklet, net_tx_action, 0);
49 static void net_rx_action(unsigned long unused);
50 static DECLARE_TASKLET(net_rx_tasklet, net_rx_action, 0);
52 static struct timer_list net_timer;
54 #define MAX_PENDING_REQS 256
56 static struct sk_buff_head rx_queue;
57 static multicall_entry_t rx_mcl[NETIF_RX_RING_SIZE*2+1];
58 static mmu_update_t rx_mmu[NETIF_RX_RING_SIZE];
59 #ifdef CONFIG_XEN_NETDEV_GRANT_RX
60 static gnttab_donate_t grant_rx_op[MAX_PENDING_REQS];
61 #else
62 static struct mmuext_op rx_mmuext[NETIF_RX_RING_SIZE];
63 #endif
64 static unsigned char rx_notify[NR_EVENT_CHANNELS];
66 /* Don't currently gate addition of an interface to the tx scheduling list. */
67 #define tx_work_exists(_if) (1)
69 static unsigned long mmap_vstart;
70 #define MMAP_VADDR(_req) (mmap_vstart + ((_req) * PAGE_SIZE))
72 #define PKT_PROT_LEN 64
74 static struct {
75 netif_tx_request_t req;
76 netif_t *netif;
77 } pending_tx_info[MAX_PENDING_REQS];
78 static u16 pending_ring[MAX_PENDING_REQS];
79 typedef unsigned int PEND_RING_IDX;
80 #define MASK_PEND_IDX(_i) ((_i)&(MAX_PENDING_REQS-1))
81 static PEND_RING_IDX pending_prod, pending_cons;
82 #define NR_PENDING_REQS (MAX_PENDING_REQS - pending_prod + pending_cons)
84 /* Freed TX SKBs get batched on this ring before return to pending_ring. */
85 static u16 dealloc_ring[MAX_PENDING_REQS];
86 static PEND_RING_IDX dealloc_prod, dealloc_cons;
88 static struct sk_buff_head tx_queue;
90 #ifdef CONFIG_XEN_NETDEV_GRANT_TX
91 static u16 grant_tx_ref[MAX_PENDING_REQS];
92 static gnttab_unmap_grant_ref_t tx_unmap_ops[MAX_PENDING_REQS];
93 static gnttab_map_grant_ref_t tx_map_ops[MAX_PENDING_REQS];
94 #else
95 static multicall_entry_t tx_mcl[MAX_PENDING_REQS];
96 #endif
98 #if defined(CONFIG_XEN_NETDEV_GRANT_TX) || defined(CONFIG_XEN_NETDEV_GRANT_RX)
99 #define GRANT_INVALID_REF (0xFFFF)
100 #endif
102 static struct list_head net_schedule_list;
103 static spinlock_t net_schedule_list_lock;
105 #define MAX_MFN_ALLOC 64
106 static unsigned long mfn_list[MAX_MFN_ALLOC];
107 static unsigned int alloc_index = 0;
108 static spinlock_t mfn_lock = SPIN_LOCK_UNLOCKED;
110 static unsigned long alloc_mfn(void)
111 {
112 unsigned long mfn = 0, flags;
113 spin_lock_irqsave(&mfn_lock, flags);
114 if ( unlikely(alloc_index == 0) )
115 alloc_index = HYPERVISOR_dom_mem_op(
116 MEMOP_increase_reservation, mfn_list, MAX_MFN_ALLOC, 0);
117 if ( alloc_index != 0 )
118 mfn = mfn_list[--alloc_index];
119 spin_unlock_irqrestore(&mfn_lock, flags);
120 return mfn;
121 }
123 #ifndef CONFIG_XEN_NETDEV_GRANT_RX
124 static void free_mfn(unsigned long mfn)
125 {
126 unsigned long flags;
127 spin_lock_irqsave(&mfn_lock, flags);
128 if ( alloc_index != MAX_MFN_ALLOC )
129 mfn_list[alloc_index++] = mfn;
130 else if ( HYPERVISOR_dom_mem_op(MEMOP_decrease_reservation,
131 &mfn, 1, 0) != 1 )
132 BUG();
133 spin_unlock_irqrestore(&mfn_lock, flags);
134 }
135 #endif
137 static inline void maybe_schedule_tx_action(void)
138 {
139 smp_mb();
140 if ( (NR_PENDING_REQS < (MAX_PENDING_REQS/2)) &&
141 !list_empty(&net_schedule_list) )
142 tasklet_schedule(&net_tx_tasklet);
143 }
145 /*
146 * A gross way of confirming the origin of an skb data page. The slab
147 * allocator abuses a field in the page struct to cache the kmem_cache_t ptr.
148 */
149 static inline int is_xen_skb(struct sk_buff *skb)
150 {
151 extern kmem_cache_t *skbuff_cachep;
152 kmem_cache_t *cp = (kmem_cache_t *)virt_to_page(skb->head)->lru.next;
153 return (cp == skbuff_cachep);
154 }
156 int netif_be_start_xmit(struct sk_buff *skb, struct net_device *dev)
157 {
158 netif_t *netif = netdev_priv(dev);
160 ASSERT(skb->dev == dev);
162 /* Drop the packet if the target domain has no receive buffers. */
163 if ( !netif->active ||
164 (netif->rx_req_cons == netif->rx->req_prod) ||
165 ((netif->rx_req_cons-netif->rx_resp_prod) == NETIF_RX_RING_SIZE) )
166 goto drop;
168 /*
169 * We do not copy the packet unless:
170 * 1. The data is shared; or
171 * 2. The data is not allocated from our special cache.
172 * NB. We also couldn't cope with fragmented packets, but we won't get
173 * any because we not advertise the NETIF_F_SG feature.
174 */
175 if ( skb_shared(skb) || skb_cloned(skb) || !is_xen_skb(skb) )
176 {
177 int hlen = skb->data - skb->head;
178 struct sk_buff *nskb = dev_alloc_skb(hlen + skb->len);
179 if ( unlikely(nskb == NULL) )
180 goto drop;
181 skb_reserve(nskb, hlen);
182 __skb_put(nskb, skb->len);
183 if (skb_copy_bits(skb, -hlen, nskb->data - hlen, skb->len + hlen))
184 BUG();
185 nskb->dev = skb->dev;
186 nskb->proto_csum_valid = skb->proto_csum_valid;
187 dev_kfree_skb(skb);
188 skb = nskb;
189 }
190 #ifdef CONFIG_XEN_NETDEV_GRANT_RX
191 #ifdef DEBUG_GRANT
192 printk(KERN_ALERT "#### be_xmit: req_prod=%d req_cons=%d id=%04x gr=%04x\n",
193 netif->rx->req_prod,
194 netif->rx_req_cons,
195 netif->rx->ring[
196 MASK_NETIF_RX_IDX(netif->rx_req_cons)].req.id,
197 netif->rx->ring[
198 MASK_NETIF_RX_IDX(netif->rx_req_cons)].req.gref);
199 #endif
200 #endif
201 netif->rx_req_cons++;
202 netif_get(netif);
204 skb_queue_tail(&rx_queue, skb);
205 tasklet_schedule(&net_rx_tasklet);
207 return 0;
209 drop:
210 netif->stats.tx_dropped++;
211 dev_kfree_skb(skb);
212 return 0;
213 }
215 #if 0
216 static void xen_network_done_notify(void)
217 {
218 static struct net_device *eth0_dev = NULL;
219 if ( unlikely(eth0_dev == NULL) )
220 eth0_dev = __dev_get_by_name("eth0");
221 netif_rx_schedule(eth0_dev);
222 }
223 /*
224 * Add following to poll() function in NAPI driver (Tigon3 is example):
225 * if ( xen_network_done() )
226 * tg3_enable_ints(tp);
227 */
228 int xen_network_done(void)
229 {
230 return skb_queue_empty(&rx_queue);
231 }
232 #endif
234 static void net_rx_action(unsigned long unused)
235 {
236 netif_t *netif;
237 s8 status;
238 u16 size, id, evtchn;
239 multicall_entry_t *mcl;
240 mmu_update_t *mmu;
241 #ifdef CONFIG_XEN_NETDEV_GRANT_RX
242 gnttab_donate_t *gop;
243 #else
244 struct mmuext_op *mmuext;
245 #endif
246 unsigned long vdata, old_mfn, new_mfn;
247 struct sk_buff_head rxq;
248 struct sk_buff *skb;
249 u16 notify_list[NETIF_RX_RING_SIZE];
250 int notify_nr = 0;
252 skb_queue_head_init(&rxq);
254 mcl = rx_mcl;
255 mmu = rx_mmu;
256 #ifdef CONFIG_XEN_NETDEV_GRANT_RX
257 gop = grant_rx_op;
258 #else
259 mmuext = rx_mmuext;
260 #endif
262 while ( (skb = skb_dequeue(&rx_queue)) != NULL )
263 {
264 netif = netdev_priv(skb->dev);
265 vdata = (unsigned long)skb->data;
266 old_mfn = virt_to_mfn(vdata);
268 /* Memory squeeze? Back off for an arbitrary while. */
269 if ( (new_mfn = alloc_mfn()) == 0 )
270 {
271 if ( net_ratelimit() )
272 printk(KERN_WARNING "Memory squeeze in netback driver.\n");
273 mod_timer(&net_timer, jiffies + HZ);
274 skb_queue_head(&rx_queue, skb);
275 break;
276 }
277 /*
278 * Set the new P2M table entry before reassigning the old data page.
279 * Heed the comment in pgtable-2level.h:pte_page(). :-)
280 */
281 phys_to_machine_mapping[__pa(skb->data) >> PAGE_SHIFT] = new_mfn;
283 MULTI_update_va_mapping(mcl, vdata,
284 pfn_pte_ma(new_mfn, PAGE_KERNEL), 0);
285 mcl++;
287 #ifdef CONFIG_XEN_NETDEV_GRANT_RX
288 gop->mfn = old_mfn;
289 gop->domid = netif->domid;
290 gop->handle = netif->rx->ring[
291 MASK_NETIF_RX_IDX(netif->rx_resp_prod_copy)].req.gref;
292 netif->rx_resp_prod_copy++;
293 gop++;
294 #else
295 mcl->op = __HYPERVISOR_mmuext_op;
296 mcl->args[0] = (unsigned long)mmuext;
297 mcl->args[1] = 1;
298 mcl->args[2] = 0;
299 mcl->args[3] = netif->domid;
300 mcl++;
302 mmuext->cmd = MMUEXT_REASSIGN_PAGE;
303 mmuext->mfn = old_mfn;
304 mmuext++;
305 #endif
306 mmu->ptr = (new_mfn << PAGE_SHIFT) | MMU_MACHPHYS_UPDATE;
307 mmu->val = __pa(vdata) >> PAGE_SHIFT;
308 mmu++;
310 __skb_queue_tail(&rxq, skb);
312 #ifdef DEBUG_GRANT
313 dump_packet('a', old_mfn, vdata);
314 #endif
315 /* Filled the batch queue? */
316 if ( (mcl - rx_mcl) == ARRAY_SIZE(rx_mcl) )
317 break;
318 }
320 if ( mcl == rx_mcl )
321 return;
323 mcl->op = __HYPERVISOR_mmu_update;
324 mcl->args[0] = (unsigned long)rx_mmu;
325 mcl->args[1] = mmu - rx_mmu;
326 mcl->args[2] = 0;
327 mcl->args[3] = DOMID_SELF;
328 mcl++;
330 #ifdef CONFIG_XEN_NETDEV_GRANT_RX
331 mcl[-2].args[MULTI_UVMFLAGS_INDEX] = UVMF_TLB_FLUSH|UVMF_ALL;
332 #else
333 mcl[-3].args[MULTI_UVMFLAGS_INDEX] = UVMF_TLB_FLUSH|UVMF_ALL;
334 #endif
335 if ( unlikely(HYPERVISOR_multicall(rx_mcl, mcl - rx_mcl) != 0) )
336 BUG();
338 mcl = rx_mcl;
339 #ifdef CONFIG_XEN_NETDEV_GRANT_RX
340 BUG_ON(HYPERVISOR_grant_table_op(
341 GNTTABOP_donate, grant_rx_op, gop - grant_rx_op));
342 gop = grant_rx_op;
343 #else
344 mmuext = rx_mmuext;
345 #endif
346 while ( (skb = __skb_dequeue(&rxq)) != NULL )
347 {
348 netif = netdev_priv(skb->dev);
349 size = skb->tail - skb->data;
351 /* Rederive the machine addresses. */
352 new_mfn = mcl[0].args[1] >> PAGE_SHIFT;
353 #ifdef CONFIG_XEN_NETDEV_GRANT_RX
354 old_mfn = 0; /* XXX Fix this so we can free_mfn() on error! */
355 #else
356 old_mfn = mmuext[0].mfn;
357 #endif
358 atomic_set(&(skb_shinfo(skb)->dataref), 1);
359 skb_shinfo(skb)->nr_frags = 0;
360 skb_shinfo(skb)->frag_list = NULL;
362 netif->stats.tx_bytes += size;
363 netif->stats.tx_packets++;
365 /* The update_va_mapping() must not fail. */
366 BUG_ON(mcl[0].result != 0);
368 /* Check the reassignment error code. */
369 status = NETIF_RSP_OKAY;
370 #ifdef CONFIG_XEN_NETDEV_GRANT_RX
371 BUG_ON(gop->status != 0); /* XXX */
372 #else
373 if ( unlikely(mcl[1].result != 0) )
374 {
375 DPRINTK("Failed MMU update transferring to DOM%u\n", netif->domid);
376 free_mfn(old_mfn);
377 status = NETIF_RSP_ERROR;
378 }
379 #endif
380 evtchn = netif->evtchn;
381 id = netif->rx->ring[MASK_NETIF_RX_IDX(netif->rx_resp_prod)].req.id;
382 if ( make_rx_response(netif, id, status,
383 (old_mfn << PAGE_SHIFT) | /* XXX */
384 ((unsigned long)skb->data & ~PAGE_MASK),
385 size, skb->proto_csum_valid) &&
386 (rx_notify[evtchn] == 0) )
387 {
388 rx_notify[evtchn] = 1;
389 notify_list[notify_nr++] = evtchn;
390 }
392 netif_put(netif);
393 dev_kfree_skb(skb);
394 #ifdef CONFIG_XEN_NETDEV_GRANT_RX
395 mcl++;
396 gop++;
397 #else
398 mcl += 2;
399 mmuext += 1;
400 #endif
401 }
403 while ( notify_nr != 0 )
404 {
405 evtchn = notify_list[--notify_nr];
406 rx_notify[evtchn] = 0;
407 notify_via_evtchn(evtchn);
408 }
410 /* More work to do? */
411 if ( !skb_queue_empty(&rx_queue) && !timer_pending(&net_timer) )
412 tasklet_schedule(&net_rx_tasklet);
413 #if 0
414 else
415 xen_network_done_notify();
416 #endif
417 }
419 static void net_alarm(unsigned long unused)
420 {
421 tasklet_schedule(&net_rx_tasklet);
422 }
424 struct net_device_stats *netif_be_get_stats(struct net_device *dev)
425 {
426 netif_t *netif = netdev_priv(dev);
427 return &netif->stats;
428 }
430 static int __on_net_schedule_list(netif_t *netif)
431 {
432 return netif->list.next != NULL;
433 }
435 static void remove_from_net_schedule_list(netif_t *netif)
436 {
437 spin_lock_irq(&net_schedule_list_lock);
438 if ( likely(__on_net_schedule_list(netif)) )
439 {
440 list_del(&netif->list);
441 netif->list.next = NULL;
442 netif_put(netif);
443 }
444 spin_unlock_irq(&net_schedule_list_lock);
445 }
447 static void add_to_net_schedule_list_tail(netif_t *netif)
448 {
449 if ( __on_net_schedule_list(netif) )
450 return;
452 spin_lock_irq(&net_schedule_list_lock);
453 if ( !__on_net_schedule_list(netif) && netif->active )
454 {
455 list_add_tail(&netif->list, &net_schedule_list);
456 netif_get(netif);
457 }
458 spin_unlock_irq(&net_schedule_list_lock);
459 }
461 void netif_schedule_work(netif_t *netif)
462 {
463 if ( (netif->tx_req_cons != netif->tx->req_prod) &&
464 ((netif->tx_req_cons-netif->tx_resp_prod) != NETIF_TX_RING_SIZE) )
465 {
466 add_to_net_schedule_list_tail(netif);
467 maybe_schedule_tx_action();
468 }
469 }
471 void netif_deschedule_work(netif_t *netif)
472 {
473 remove_from_net_schedule_list(netif);
474 }
477 static void tx_credit_callback(unsigned long data)
478 {
479 netif_t *netif = (netif_t *)data;
480 netif->remaining_credit = netif->credit_bytes;
481 netif_schedule_work(netif);
482 }
484 inline static void net_tx_action_dealloc(void)
485 {
486 #ifdef CONFIG_XEN_NETDEV_GRANT_TX
487 gnttab_unmap_grant_ref_t *gop;
488 #else
489 multicall_entry_t *mcl;
490 #endif
491 u16 pending_idx;
492 PEND_RING_IDX dc, dp;
493 netif_t *netif;
495 dc = dealloc_cons;
496 dp = dealloc_prod;
498 #ifdef CONFIG_XEN_NETDEV_GRANT_TX
499 /*
500 * Free up any grants we have finished using
501 */
502 gop = tx_unmap_ops;
503 while ( dc != dp )
504 {
505 pending_idx = dealloc_ring[MASK_PEND_IDX(dc++)];
506 gop->host_addr = MMAP_VADDR(pending_idx);
507 gop->dev_bus_addr = 0;
508 gop->handle = grant_tx_ref[pending_idx];
509 grant_tx_ref[pending_idx] = GRANT_INVALID_REF;
510 gop++;
511 }
512 BUG_ON(HYPERVISOR_grant_table_op(
513 GNTTABOP_unmap_grant_ref, tx_unmap_ops, gop - tx_unmap_ops));
514 #else
515 mcl = tx_mcl;
516 while ( dc != dp )
517 {
518 pending_idx = dealloc_ring[MASK_PEND_IDX(dc++)];
519 MULTI_update_va_mapping(mcl, MMAP_VADDR(pending_idx),
520 __pte(0), 0);
521 mcl++;
522 }
524 mcl[-1].args[MULTI_UVMFLAGS_INDEX] = UVMF_TLB_FLUSH|UVMF_ALL;
525 if ( unlikely(HYPERVISOR_multicall(tx_mcl, mcl - tx_mcl) != 0) )
526 BUG();
528 mcl = tx_mcl;
529 #endif
530 while ( dealloc_cons != dp )
531 {
532 #ifndef CONFIG_XEN_NETDEV_GRANT_TX
533 /* The update_va_mapping() must not fail. */
534 BUG_ON(mcl[0].result != 0);
535 #endif
537 pending_idx = dealloc_ring[MASK_PEND_IDX(dealloc_cons++)];
539 netif = pending_tx_info[pending_idx].netif;
541 make_tx_response(netif, pending_tx_info[pending_idx].req.id,
542 NETIF_RSP_OKAY);
544 pending_ring[MASK_PEND_IDX(pending_prod++)] = pending_idx;
546 /*
547 * Scheduling checks must happen after the above response is posted.
548 * This avoids a possible race with a guest OS on another CPU if that
549 * guest is testing against 'resp_prod' when deciding whether to notify
550 * us when it queues additional packets.
551 */
552 mb();
553 if ( (netif->tx_req_cons != netif->tx->req_prod) &&
554 ((netif->tx_req_cons-netif->tx_resp_prod) != NETIF_TX_RING_SIZE) )
555 add_to_net_schedule_list_tail(netif);
557 netif_put(netif);
559 #ifndef CONFIG_XEN_NETDEV_GRANT_TX
560 mcl++;
561 #endif
562 }
564 }
566 /* Called after netfront has transmitted */
567 static void net_tx_action(unsigned long unused)
568 {
569 struct list_head *ent;
570 struct sk_buff *skb;
571 netif_t *netif;
572 netif_tx_request_t txreq;
573 u16 pending_idx;
574 NETIF_RING_IDX i;
575 #ifdef CONFIG_XEN_NETDEV_GRANT_TX
576 gnttab_map_grant_ref_t *mop;
577 #else
578 multicall_entry_t *mcl;
579 #endif
580 unsigned int data_len;
582 if ( dealloc_cons != dealloc_prod )
583 net_tx_action_dealloc();
585 #ifdef CONFIG_XEN_NETDEV_GRANT_TX
586 mop = tx_map_ops;
587 #else
588 mcl = tx_mcl;
589 #endif
590 while ( (NR_PENDING_REQS < MAX_PENDING_REQS) &&
591 !list_empty(&net_schedule_list) )
592 {
593 /* Get a netif from the list with work to do. */
594 ent = net_schedule_list.next;
595 netif = list_entry(ent, netif_t, list);
596 netif_get(netif);
597 remove_from_net_schedule_list(netif);
599 /* Work to do? */
600 i = netif->tx_req_cons;
601 if ( (i == netif->tx->req_prod) ||
602 ((i-netif->tx_resp_prod) == NETIF_TX_RING_SIZE) )
603 {
604 netif_put(netif);
605 continue;
606 }
608 rmb(); /* Ensure that we see the request before we copy it. */
609 memcpy(&txreq, &netif->tx->ring[MASK_NETIF_TX_IDX(i)].req,
610 sizeof(txreq));
611 /* Credit-based scheduling. */
612 if ( txreq.size > netif->remaining_credit )
613 {
614 unsigned long now = jiffies;
615 unsigned long next_credit =
616 netif->credit_timeout.expires +
617 msecs_to_jiffies(netif->credit_usec / 1000);
619 /* Timer could already be pending in some rare cases. */
620 if ( timer_pending(&netif->credit_timeout) )
621 break;
623 /* Already passed the point at which we can replenish credit? */
624 if ( time_after_eq(now, next_credit) )
625 {
626 netif->credit_timeout.expires = now;
627 netif->remaining_credit = netif->credit_bytes;
628 }
630 /* Still too big to send right now? Then set a timer callback. */
631 if ( txreq.size > netif->remaining_credit )
632 {
633 netif->remaining_credit = 0;
634 netif->credit_timeout.expires = next_credit;
635 netif->credit_timeout.data = (unsigned long)netif;
636 netif->credit_timeout.function = tx_credit_callback;
637 add_timer_on(&netif->credit_timeout, smp_processor_id());
638 break;
639 }
640 }
641 netif->remaining_credit -= txreq.size;
643 /*
644 * Why the barrier? It ensures that the frontend sees updated req_cons
645 * before we check for more work to schedule.
646 */
647 netif->tx->req_cons = ++netif->tx_req_cons;
648 mb();
650 netif_schedule_work(netif);
652 if ( unlikely(txreq.size < ETH_HLEN) ||
653 unlikely(txreq.size > ETH_FRAME_LEN) )
654 {
655 DPRINTK("Bad packet size: %d\n", txreq.size);
656 make_tx_response(netif, txreq.id, NETIF_RSP_ERROR);
657 netif_put(netif);
658 continue;
659 }
661 /* No crossing a page boundary as the payload mustn't fragment. */
662 if ( unlikely(((txreq.addr & ~PAGE_MASK) + txreq.size) >= PAGE_SIZE) )
663 {
664 DPRINTK("txreq.addr: %lx, size: %u, end: %lu\n",
665 txreq.addr, txreq.size,
666 (txreq.addr &~PAGE_MASK) + txreq.size);
667 make_tx_response(netif, txreq.id, NETIF_RSP_ERROR);
668 netif_put(netif);
669 continue;
670 }
672 pending_idx = pending_ring[MASK_PEND_IDX(pending_cons)];
674 data_len = (txreq.size > PKT_PROT_LEN) ? PKT_PROT_LEN : txreq.size;
676 if ( unlikely((skb = alloc_skb(data_len+16, GFP_ATOMIC)) == NULL) )
677 {
678 DPRINTK("Can't allocate a skb in start_xmit.\n");
679 make_tx_response(netif, txreq.id, NETIF_RSP_ERROR);
680 netif_put(netif);
681 break;
682 }
684 /* Packets passed to netif_rx() must have some headroom. */
685 skb_reserve(skb, 16);
686 #ifdef CONFIG_XEN_NETDEV_GRANT_TX
687 mop->host_addr = MMAP_VADDR(pending_idx);
688 mop->dom = netif->domid;
689 mop->ref = txreq.addr >> PAGE_SHIFT;
690 mop->flags = GNTMAP_host_map | GNTMAP_readonly;
691 mop++;
692 #else
693 MULTI_update_va_mapping_otherdomain(
694 mcl, MMAP_VADDR(pending_idx),
695 pfn_pte_ma(txreq.addr >> PAGE_SHIFT, PAGE_KERNEL),
696 0, netif->domid);
698 mcl++;
699 #endif
701 memcpy(&pending_tx_info[pending_idx].req, &txreq, sizeof(txreq));
702 pending_tx_info[pending_idx].netif = netif;
703 *((u16 *)skb->data) = pending_idx;
705 __skb_queue_tail(&tx_queue, skb);
707 pending_cons++;
709 #ifdef CONFIG_XEN_NETDEV_GRANT_TX
710 if ( (mop - tx_map_ops) >= ARRAY_SIZE(tx_map_ops) )
711 break;
712 #else
713 /* Filled the batch queue? */
714 if ( (mcl - tx_mcl) == ARRAY_SIZE(tx_mcl) )
715 break;
716 #endif
717 }
719 #ifdef CONFIG_XEN_NETDEV_GRANT_TX
720 if ( mop == tx_map_ops )
721 return;
723 BUG_ON(HYPERVISOR_grant_table_op(
724 GNTTABOP_map_grant_ref, tx_map_ops, mop - tx_map_ops));
726 mop = tx_map_ops;
727 #else
728 if ( mcl == tx_mcl )
729 return;
731 BUG_ON(HYPERVISOR_multicall(tx_mcl, mcl - tx_mcl) != 0);
733 mcl = tx_mcl;
734 #endif
735 while ( (skb = __skb_dequeue(&tx_queue)) != NULL )
736 {
737 pending_idx = *((u16 *)skb->data);
738 netif = pending_tx_info[pending_idx].netif;
739 memcpy(&txreq, &pending_tx_info[pending_idx].req, sizeof(txreq));
741 /* Check the remap error code. */
742 #ifdef CONFIG_XEN_NETDEV_GRANT_TX
743 /*
744 XXX SMH: error returns from grant operations are pretty poorly
745 specified/thought out, but the below at least conforms with
746 what the rest of the code uses.
747 */
748 if ( unlikely(mop->handle < 0) )
749 {
750 printk(KERN_ALERT "#### netback grant fails\n");
751 make_tx_response(netif, txreq.id, NETIF_RSP_ERROR);
752 netif_put(netif);
753 kfree_skb(skb);
754 mop++;
755 pending_ring[MASK_PEND_IDX(pending_prod++)] = pending_idx;
756 continue;
757 }
758 phys_to_machine_mapping[__pa(MMAP_VADDR(pending_idx)) >> PAGE_SHIFT] =
759 FOREIGN_FRAME(mop->dev_bus_addr >> PAGE_SHIFT);
760 grant_tx_ref[pending_idx] = mop->handle;
761 #else
762 if ( unlikely(mcl[0].result != 0) )
763 {
764 DPRINTK("Bad page frame\n");
765 make_tx_response(netif, txreq.id, NETIF_RSP_ERROR);
766 netif_put(netif);
767 kfree_skb(skb);
768 mcl++;
769 pending_ring[MASK_PEND_IDX(pending_prod++)] = pending_idx;
770 continue;
771 }
773 phys_to_machine_mapping[__pa(MMAP_VADDR(pending_idx)) >> PAGE_SHIFT] =
774 FOREIGN_FRAME(txreq.addr >> PAGE_SHIFT);
775 #endif
777 data_len = (txreq.size > PKT_PROT_LEN) ? PKT_PROT_LEN : txreq.size;
779 __skb_put(skb, data_len);
780 memcpy(skb->data,
781 (void *)(MMAP_VADDR(pending_idx)|(txreq.addr&~PAGE_MASK)),
782 data_len);
783 if ( data_len < txreq.size )
784 {
785 /* Append the packet payload as a fragment. */
786 skb_shinfo(skb)->frags[0].page =
787 virt_to_page(MMAP_VADDR(pending_idx));
788 skb_shinfo(skb)->frags[0].size = txreq.size - data_len;
789 skb_shinfo(skb)->frags[0].page_offset =
790 (txreq.addr + data_len) & ~PAGE_MASK;
791 skb_shinfo(skb)->nr_frags = 1;
792 }
793 else
794 {
795 /* Schedule a response immediately. */
796 netif_idx_release(pending_idx);
797 }
799 skb->data_len = txreq.size - data_len;
800 skb->len += skb->data_len;
802 skb->dev = netif->dev;
803 skb->protocol = eth_type_trans(skb, skb->dev);
805 /* No checking needed on localhost, but remember the field is blank. */
806 skb->ip_summed = CHECKSUM_UNNECESSARY;
807 skb->proto_csum_valid = 1;
808 skb->proto_csum_blank = txreq.csum_blank;
810 netif->stats.rx_bytes += txreq.size;
811 netif->stats.rx_packets++;
813 netif_rx(skb);
814 netif->dev->last_rx = jiffies;
816 #ifdef CONFIG_XEN_NETDEV_GRANT_TX
817 mop++;
818 #else
819 mcl++;
820 #endif
821 }
822 }
824 static void netif_idx_release(u16 pending_idx)
825 {
826 static spinlock_t _lock = SPIN_LOCK_UNLOCKED;
827 unsigned long flags;
829 spin_lock_irqsave(&_lock, flags);
830 dealloc_ring[MASK_PEND_IDX(dealloc_prod++)] = pending_idx;
831 spin_unlock_irqrestore(&_lock, flags);
833 tasklet_schedule(&net_tx_tasklet);
834 }
836 static void netif_page_release(struct page *page)
837 {
838 u16 pending_idx = page - virt_to_page(mmap_vstart);
840 /* Ready for next use. */
841 set_page_count(page, 1);
843 netif_idx_release(pending_idx);
844 }
846 irqreturn_t netif_be_int(int irq, void *dev_id, struct pt_regs *regs)
847 {
848 netif_t *netif = dev_id;
849 if ( tx_work_exists(netif) )
850 {
851 add_to_net_schedule_list_tail(netif);
852 maybe_schedule_tx_action();
853 }
854 return IRQ_HANDLED;
855 }
857 static void make_tx_response(netif_t *netif,
858 u16 id,
859 s8 st)
860 {
861 NETIF_RING_IDX i = netif->tx_resp_prod;
862 netif_tx_response_t *resp;
864 resp = &netif->tx->ring[MASK_NETIF_TX_IDX(i)].resp;
865 resp->id = id;
866 resp->status = st;
867 wmb();
868 netif->tx->resp_prod = netif->tx_resp_prod = ++i;
870 mb(); /* Update producer before checking event threshold. */
871 if ( i == netif->tx->event )
872 notify_via_evtchn(netif->evtchn);
873 }
875 static int make_rx_response(netif_t *netif,
876 u16 id,
877 s8 st,
878 unsigned long addr,
879 u16 size,
880 u16 csum_valid)
881 {
882 NETIF_RING_IDX i = netif->rx_resp_prod;
883 netif_rx_response_t *resp;
885 resp = &netif->rx->ring[MASK_NETIF_RX_IDX(i)].resp;
886 resp->addr = addr;
887 resp->csum_valid = csum_valid;
888 resp->id = id;
889 resp->status = (s16)size;
890 if ( st < 0 )
891 resp->status = (s16)st;
892 wmb();
893 netif->rx->resp_prod = netif->rx_resp_prod = ++i;
895 mb(); /* Update producer before checking event threshold. */
896 return (i == netif->rx->event);
897 }
899 static irqreturn_t netif_be_dbg(int irq, void *dev_id, struct pt_regs *regs)
900 {
901 struct list_head *ent;
902 netif_t *netif;
903 int i = 0;
905 printk(KERN_ALERT "netif_schedule_list:\n");
906 spin_lock_irq(&net_schedule_list_lock);
908 list_for_each ( ent, &net_schedule_list )
909 {
910 netif = list_entry(ent, netif_t, list);
911 printk(KERN_ALERT " %d: private(rx_req_cons=%08x rx_resp_prod=%08x\n",
912 i, netif->rx_req_cons, netif->rx_resp_prod);
913 printk(KERN_ALERT " tx_req_cons=%08x tx_resp_prod=%08x)\n",
914 netif->tx_req_cons, netif->tx_resp_prod);
915 printk(KERN_ALERT " shared(rx_req_prod=%08x rx_resp_prod=%08x\n",
916 netif->rx->req_prod, netif->rx->resp_prod);
917 printk(KERN_ALERT " rx_event=%08x tx_req_prod=%08x\n",
918 netif->rx->event, netif->tx->req_prod);
919 printk(KERN_ALERT " tx_resp_prod=%08x, tx_event=%08x)\n",
920 netif->tx->resp_prod, netif->tx->event);
921 i++;
922 }
924 spin_unlock_irq(&net_schedule_list_lock);
925 printk(KERN_ALERT " ** End of netif_schedule_list **\n");
927 return IRQ_HANDLED;
928 }
930 static int __init netback_init(void)
931 {
932 int i;
933 struct page *page;
935 if ( !(xen_start_info.flags & SIF_NET_BE_DOMAIN) &&
936 !(xen_start_info.flags & SIF_INITDOMAIN) )
937 return 0;
939 printk("Initialising Xen netif backend\n");
940 #ifdef CONFIG_XEN_NETDEV_GRANT_TX
941 printk("#### netback tx using grant tables\n");
942 #endif
943 #ifdef CONFIG_XEN_NETDEV_GRANT_RX
944 printk("#### netback rx using grant tables\n");
945 #endif
947 /* We can increase reservation by this much in net_rx_action(). */
948 balloon_update_driver_allowance(NETIF_RX_RING_SIZE);
950 skb_queue_head_init(&rx_queue);
951 skb_queue_head_init(&tx_queue);
953 init_timer(&net_timer);
954 net_timer.data = 0;
955 net_timer.function = net_alarm;
957 page = balloon_alloc_empty_page_range(MAX_PENDING_REQS);
958 BUG_ON(page == NULL);
959 mmap_vstart = (unsigned long)pfn_to_kaddr(page_to_pfn(page));
961 for ( i = 0; i < MAX_PENDING_REQS; i++ )
962 {
963 page = virt_to_page(MMAP_VADDR(i));
964 set_page_count(page, 1);
965 SetPageForeign(page, netif_page_release);
966 }
968 pending_cons = 0;
969 pending_prod = MAX_PENDING_REQS;
970 for ( i = 0; i < MAX_PENDING_REQS; i++ )
971 pending_ring[i] = i;
973 spin_lock_init(&net_schedule_list_lock);
974 INIT_LIST_HEAD(&net_schedule_list);
976 netif_xenbus_init();
978 (void)request_irq(bind_virq_to_irq(VIRQ_DEBUG),
979 netif_be_dbg, SA_SHIRQ,
980 "net-be-dbg", &netif_be_dbg);
982 return 0;
983 }
985 static void netback_cleanup(void)
986 {
987 BUG();
988 }
990 module_init(netback_init);
991 module_exit(netback_cleanup);