ia64/xen-unstable

view linux-2.6-xen-sparse/drivers/xen/netback/netback.c @ 6422:e24fd7012ffb

merge?
author cl349@firebug.cl.cam.ac.uk
date Thu Aug 25 10:09:39 2005 +0000 (2005-08-25)
parents 2f20c2fce2c5 522bc50588ed
children 4abd299ef2f6
line source
1 /******************************************************************************
2 * drivers/xen/netback/netback.c
3 *
4 * Back-end of the driver for virtual network devices. This portion of the
5 * driver exports a 'unified' network-device interface that can be accessed
6 * by any operating system that implements a compatible front end. A
7 * reference front-end implementation can be found in:
8 * drivers/xen/netfront/netfront.c
9 *
10 * Copyright (c) 2002-2005, K A Fraser
11 */
13 #include "common.h"
14 #include <asm-xen/balloon.h>
16 #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0)
17 #include <linux/delay.h>
18 #endif
20 #if defined(CONFIG_XEN_NETDEV_GRANT_TX) || defined(CONFIG_XEN_NETDEV_GRANT_RX)
21 #include <asm-xen/xen-public/grant_table.h>
22 #include <asm-xen/gnttab.h>
23 #ifdef GRANT_DEBUG
24 static void
25 dump_packet(int tag, u32 addr, unsigned char *p)
26 {
27 int i;
29 printk(KERN_ALERT "#### rx_action %c %08x ", tag & 0xff, addr);
30 for (i = 0; i < 20; i++) {
31 printk("%02x", p[i]);
32 }
33 printk("\n");
34 }
35 #endif
36 #endif
38 static void netif_idx_release(u16 pending_idx);
39 static void netif_page_release(struct page *page);
40 static void make_tx_response(netif_t *netif,
41 u16 id,
42 s8 st);
43 static int make_rx_response(netif_t *netif,
44 u16 id,
45 s8 st,
46 unsigned long addr,
47 u16 size,
48 u16 csum_valid);
50 static void net_tx_action(unsigned long unused);
51 static DECLARE_TASKLET(net_tx_tasklet, net_tx_action, 0);
53 static void net_rx_action(unsigned long unused);
54 static DECLARE_TASKLET(net_rx_tasklet, net_rx_action, 0);
56 static struct timer_list net_timer;
58 #define MAX_PENDING_REQS 256
60 static struct sk_buff_head rx_queue;
61 static multicall_entry_t rx_mcl[NETIF_RX_RING_SIZE*2+1];
62 static mmu_update_t rx_mmu[NETIF_RX_RING_SIZE];
63 #ifdef CONFIG_XEN_NETDEV_GRANT_RX
64 static gnttab_donate_t grant_rx_op[MAX_PENDING_REQS];
65 #else
66 static struct mmuext_op rx_mmuext[NETIF_RX_RING_SIZE];
67 #endif
68 static unsigned char rx_notify[NR_EVENT_CHANNELS];
70 /* Don't currently gate addition of an interface to the tx scheduling list. */
71 #define tx_work_exists(_if) (1)
73 static unsigned long mmap_vstart;
74 #define MMAP_VADDR(_req) (mmap_vstart + ((_req) * PAGE_SIZE))
76 #define PKT_PROT_LEN 64
78 static struct {
79 netif_tx_request_t req;
80 netif_t *netif;
81 } pending_tx_info[MAX_PENDING_REQS];
82 static u16 pending_ring[MAX_PENDING_REQS];
83 typedef unsigned int PEND_RING_IDX;
84 #define MASK_PEND_IDX(_i) ((_i)&(MAX_PENDING_REQS-1))
85 static PEND_RING_IDX pending_prod, pending_cons;
86 #define NR_PENDING_REQS (MAX_PENDING_REQS - pending_prod + pending_cons)
88 /* Freed TX SKBs get batched on this ring before return to pending_ring. */
89 static u16 dealloc_ring[MAX_PENDING_REQS];
90 static PEND_RING_IDX dealloc_prod, dealloc_cons;
92 static struct sk_buff_head tx_queue;
94 #ifdef CONFIG_XEN_NETDEV_GRANT_TX
95 static u16 grant_tx_ref[MAX_PENDING_REQS];
96 static gnttab_unmap_grant_ref_t tx_unmap_ops[MAX_PENDING_REQS];
97 static gnttab_map_grant_ref_t tx_map_ops[MAX_PENDING_REQS];
98 #else
99 static multicall_entry_t tx_mcl[MAX_PENDING_REQS];
100 #endif
102 #if defined(CONFIG_XEN_NETDEV_GRANT_TX) || defined(CONFIG_XEN_NETDEV_GRANT_RX)
103 #define GRANT_INVALID_REF (0xFFFF)
104 #endif
106 static struct list_head net_schedule_list;
107 static spinlock_t net_schedule_list_lock;
109 #define MAX_MFN_ALLOC 64
110 static unsigned long mfn_list[MAX_MFN_ALLOC];
111 static unsigned int alloc_index = 0;
112 static spinlock_t mfn_lock = SPIN_LOCK_UNLOCKED;
114 static unsigned long alloc_mfn(void)
115 {
116 unsigned long mfn = 0, flags;
117 spin_lock_irqsave(&mfn_lock, flags);
118 if ( unlikely(alloc_index == 0) )
119 alloc_index = HYPERVISOR_dom_mem_op(
120 MEMOP_increase_reservation, mfn_list, MAX_MFN_ALLOC, 0);
121 if ( alloc_index != 0 )
122 mfn = mfn_list[--alloc_index];
123 spin_unlock_irqrestore(&mfn_lock, flags);
124 return mfn;
125 }
127 #ifndef CONFIG_XEN_NETDEV_GRANT_RX
128 static void free_mfn(unsigned long mfn)
129 {
130 unsigned long flags;
131 spin_lock_irqsave(&mfn_lock, flags);
132 if ( alloc_index != MAX_MFN_ALLOC )
133 mfn_list[alloc_index++] = mfn;
134 else if ( HYPERVISOR_dom_mem_op(MEMOP_decrease_reservation,
135 &mfn, 1, 0) != 1 )
136 BUG();
137 spin_unlock_irqrestore(&mfn_lock, flags);
138 }
139 #endif
141 static inline void maybe_schedule_tx_action(void)
142 {
143 smp_mb();
144 if ( (NR_PENDING_REQS < (MAX_PENDING_REQS/2)) &&
145 !list_empty(&net_schedule_list) )
146 tasklet_schedule(&net_tx_tasklet);
147 }
149 /*
150 * A gross way of confirming the origin of an skb data page. The slab
151 * allocator abuses a field in the page struct to cache the kmem_cache_t ptr.
152 */
153 static inline int is_xen_skb(struct sk_buff *skb)
154 {
155 extern kmem_cache_t *skbuff_cachep;
156 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
157 kmem_cache_t *cp = (kmem_cache_t *)virt_to_page(skb->head)->lru.next;
158 #else
159 kmem_cache_t *cp = (kmem_cache_t *)virt_to_page(skb->head)->list.next;
160 #endif
161 return (cp == skbuff_cachep);
162 }
164 int netif_be_start_xmit(struct sk_buff *skb, struct net_device *dev)
165 {
166 netif_t *netif = netdev_priv(dev);
168 ASSERT(skb->dev == dev);
170 /* Drop the packet if the target domain has no receive buffers. */
171 if ( !netif->active ||
172 (netif->rx_req_cons == netif->rx->req_prod) ||
173 ((netif->rx_req_cons-netif->rx_resp_prod) == NETIF_RX_RING_SIZE) )
174 goto drop;
176 /*
177 * We do not copy the packet unless:
178 * 1. The data is shared; or
179 * 2. The data is not allocated from our special cache.
180 * NB. We also couldn't cope with fragmented packets, but we won't get
181 * any because we not advertise the NETIF_F_SG feature.
182 */
183 if ( skb_shared(skb) || skb_cloned(skb) || !is_xen_skb(skb) )
184 {
185 int hlen = skb->data - skb->head;
186 struct sk_buff *nskb = dev_alloc_skb(hlen + skb->len);
187 if ( unlikely(nskb == NULL) )
188 goto drop;
189 skb_reserve(nskb, hlen);
190 __skb_put(nskb, skb->len);
191 if (skb_copy_bits(skb, -hlen, nskb->data - hlen, skb->len + hlen))
192 BUG();
193 nskb->dev = skb->dev;
194 nskb->proto_csum_valid = skb->proto_csum_valid;
195 dev_kfree_skb(skb);
196 skb = nskb;
197 }
198 #ifdef CONFIG_XEN_NETDEV_GRANT_RX
199 #ifdef DEBUG_GRANT
200 printk(KERN_ALERT "#### be_xmit: req_prod=%d req_cons=%d id=%04x gr=%04x\n",
201 netif->rx->req_prod,
202 netif->rx_req_cons,
203 netif->rx->ring[
204 MASK_NETIF_RX_IDX(netif->rx_req_cons)].req.id,
205 netif->rx->ring[
206 MASK_NETIF_RX_IDX(netif->rx_req_cons)].req.gref);
207 #endif
208 #endif
209 netif->rx_req_cons++;
210 netif_get(netif);
212 skb_queue_tail(&rx_queue, skb);
213 tasklet_schedule(&net_rx_tasklet);
215 return 0;
217 drop:
218 netif->stats.tx_dropped++;
219 dev_kfree_skb(skb);
220 return 0;
221 }
223 #if 0
224 static void xen_network_done_notify(void)
225 {
226 static struct net_device *eth0_dev = NULL;
227 if ( unlikely(eth0_dev == NULL) )
228 eth0_dev = __dev_get_by_name("eth0");
229 netif_rx_schedule(eth0_dev);
230 }
231 /*
232 * Add following to poll() function in NAPI driver (Tigon3 is example):
233 * if ( xen_network_done() )
234 * tg3_enable_ints(tp);
235 */
236 int xen_network_done(void)
237 {
238 return skb_queue_empty(&rx_queue);
239 }
240 #endif
242 static void net_rx_action(unsigned long unused)
243 {
244 netif_t *netif;
245 s8 status;
246 u16 size, id, evtchn;
247 multicall_entry_t *mcl;
248 mmu_update_t *mmu;
249 #ifdef CONFIG_XEN_NETDEV_GRANT_RX
250 gnttab_donate_t *gop;
251 #else
252 struct mmuext_op *mmuext;
253 #endif
254 unsigned long vdata, old_mfn, new_mfn;
255 struct sk_buff_head rxq;
256 struct sk_buff *skb;
257 u16 notify_list[NETIF_RX_RING_SIZE];
258 int notify_nr = 0;
260 skb_queue_head_init(&rxq);
262 mcl = rx_mcl;
263 mmu = rx_mmu;
264 #ifdef CONFIG_XEN_NETDEV_GRANT_RX
265 gop = grant_rx_op;
266 #else
267 mmuext = rx_mmuext;
268 #endif
270 while ( (skb = skb_dequeue(&rx_queue)) != NULL )
271 {
272 netif = netdev_priv(skb->dev);
273 vdata = (unsigned long)skb->data;
274 old_mfn = virt_to_mfn(vdata);
276 /* Memory squeeze? Back off for an arbitrary while. */
277 if ( (new_mfn = alloc_mfn()) == 0 )
278 {
279 if ( net_ratelimit() )
280 printk(KERN_WARNING "Memory squeeze in netback driver.\n");
281 mod_timer(&net_timer, jiffies + HZ);
282 skb_queue_head(&rx_queue, skb);
283 break;
284 }
285 /*
286 * Set the new P2M table entry before reassigning the old data page.
287 * Heed the comment in pgtable-2level.h:pte_page(). :-)
288 */
289 phys_to_machine_mapping[__pa(skb->data) >> PAGE_SHIFT] = new_mfn;
291 MULTI_update_va_mapping(mcl, vdata,
292 pfn_pte_ma(new_mfn, PAGE_KERNEL), 0);
293 mcl++;
295 #ifdef CONFIG_XEN_NETDEV_GRANT_RX
296 gop->mfn = old_mfn;
297 gop->domid = netif->domid;
298 gop->handle = netif->rx->ring[
299 MASK_NETIF_RX_IDX(netif->rx_resp_prod_copy)].req.gref;
300 netif->rx_resp_prod_copy++;
301 gop++;
302 #else
303 mcl->op = __HYPERVISOR_mmuext_op;
304 mcl->args[0] = (unsigned long)mmuext;
305 mcl->args[1] = 1;
306 mcl->args[2] = 0;
307 mcl->args[3] = netif->domid;
308 mcl++;
310 mmuext->cmd = MMUEXT_REASSIGN_PAGE;
311 mmuext->mfn = old_mfn;
312 mmuext++;
313 #endif
314 mmu->ptr = (new_mfn << PAGE_SHIFT) | MMU_MACHPHYS_UPDATE;
315 mmu->val = __pa(vdata) >> PAGE_SHIFT;
316 mmu++;
318 __skb_queue_tail(&rxq, skb);
320 #ifdef DEBUG_GRANT
321 dump_packet('a', old_mfn, vdata);
322 #endif
323 /* Filled the batch queue? */
324 if ( (mcl - rx_mcl) == ARRAY_SIZE(rx_mcl) )
325 break;
326 }
328 if ( mcl == rx_mcl )
329 return;
331 mcl->op = __HYPERVISOR_mmu_update;
332 mcl->args[0] = (unsigned long)rx_mmu;
333 mcl->args[1] = mmu - rx_mmu;
334 mcl->args[2] = 0;
335 mcl->args[3] = DOMID_SELF;
336 mcl++;
338 #ifdef CONFIG_XEN_NETDEV_GRANT_RX
339 mcl[-2].args[MULTI_UVMFLAGS_INDEX] = UVMF_TLB_FLUSH|UVMF_ALL;
340 #else
341 mcl[-3].args[MULTI_UVMFLAGS_INDEX] = UVMF_TLB_FLUSH|UVMF_ALL;
342 #endif
343 if ( unlikely(HYPERVISOR_multicall(rx_mcl, mcl - rx_mcl) != 0) )
344 BUG();
346 mcl = rx_mcl;
347 #ifdef CONFIG_XEN_NETDEV_GRANT_RX
348 BUG_ON(HYPERVISOR_grant_table_op(
349 GNTTABOP_donate, grant_rx_op, gop - grant_rx_op));
350 gop = grant_rx_op;
351 #else
352 mmuext = rx_mmuext;
353 #endif
354 while ( (skb = __skb_dequeue(&rxq)) != NULL )
355 {
356 netif = netdev_priv(skb->dev);
357 size = skb->tail - skb->data;
359 /* Rederive the machine addresses. */
360 new_mfn = mcl[0].args[1] >> PAGE_SHIFT;
361 #ifdef CONFIG_XEN_NETDEV_GRANT_RX
362 old_mfn = 0; /* XXX Fix this so we can free_mfn() on error! */
363 #else
364 old_mfn = mmuext[0].mfn;
365 #endif
366 atomic_set(&(skb_shinfo(skb)->dataref), 1);
367 skb_shinfo(skb)->nr_frags = 0;
368 skb_shinfo(skb)->frag_list = NULL;
370 netif->stats.tx_bytes += size;
371 netif->stats.tx_packets++;
373 /* The update_va_mapping() must not fail. */
374 BUG_ON(mcl[0].result != 0);
376 /* Check the reassignment error code. */
377 status = NETIF_RSP_OKAY;
378 #ifdef CONFIG_XEN_NETDEV_GRANT_RX
379 BUG_ON(gop->status != 0); /* XXX */
380 #else
381 if ( unlikely(mcl[1].result != 0) )
382 {
383 DPRINTK("Failed MMU update transferring to DOM%u\n", netif->domid);
384 free_mfn(old_mfn);
385 status = NETIF_RSP_ERROR;
386 }
387 #endif
388 evtchn = netif->evtchn;
389 id = netif->rx->ring[MASK_NETIF_RX_IDX(netif->rx_resp_prod)].req.id;
390 if ( make_rx_response(netif, id, status,
391 (old_mfn << PAGE_SHIFT) | /* XXX */
392 ((unsigned long)skb->data & ~PAGE_MASK),
393 size, skb->proto_csum_valid) &&
394 (rx_notify[evtchn] == 0) )
395 {
396 rx_notify[evtchn] = 1;
397 notify_list[notify_nr++] = evtchn;
398 }
400 netif_put(netif);
401 dev_kfree_skb(skb);
402 #ifdef CONFIG_XEN_NETDEV_GRANT_RX
403 mcl++;
404 gop++;
405 #else
406 mcl += 2;
407 mmuext += 1;
408 #endif
409 }
411 while ( notify_nr != 0 )
412 {
413 evtchn = notify_list[--notify_nr];
414 rx_notify[evtchn] = 0;
415 notify_via_evtchn(evtchn);
416 }
418 /* More work to do? */
419 if ( !skb_queue_empty(&rx_queue) && !timer_pending(&net_timer) )
420 tasklet_schedule(&net_rx_tasklet);
421 #if 0
422 else
423 xen_network_done_notify();
424 #endif
425 }
427 static void net_alarm(unsigned long unused)
428 {
429 tasklet_schedule(&net_rx_tasklet);
430 }
432 struct net_device_stats *netif_be_get_stats(struct net_device *dev)
433 {
434 netif_t *netif = netdev_priv(dev);
435 return &netif->stats;
436 }
438 static int __on_net_schedule_list(netif_t *netif)
439 {
440 return netif->list.next != NULL;
441 }
443 static void remove_from_net_schedule_list(netif_t *netif)
444 {
445 spin_lock_irq(&net_schedule_list_lock);
446 if ( likely(__on_net_schedule_list(netif)) )
447 {
448 list_del(&netif->list);
449 netif->list.next = NULL;
450 netif_put(netif);
451 }
452 spin_unlock_irq(&net_schedule_list_lock);
453 }
455 static void add_to_net_schedule_list_tail(netif_t *netif)
456 {
457 if ( __on_net_schedule_list(netif) )
458 return;
460 spin_lock_irq(&net_schedule_list_lock);
461 if ( !__on_net_schedule_list(netif) && netif->active )
462 {
463 list_add_tail(&netif->list, &net_schedule_list);
464 netif_get(netif);
465 }
466 spin_unlock_irq(&net_schedule_list_lock);
467 }
469 void netif_schedule_work(netif_t *netif)
470 {
471 if ( (netif->tx_req_cons != netif->tx->req_prod) &&
472 ((netif->tx_req_cons-netif->tx_resp_prod) != NETIF_TX_RING_SIZE) )
473 {
474 add_to_net_schedule_list_tail(netif);
475 maybe_schedule_tx_action();
476 }
477 }
479 void netif_deschedule_work(netif_t *netif)
480 {
481 remove_from_net_schedule_list(netif);
482 }
485 static void tx_credit_callback(unsigned long data)
486 {
487 netif_t *netif = (netif_t *)data;
488 netif->remaining_credit = netif->credit_bytes;
489 netif_schedule_work(netif);
490 }
492 inline static void net_tx_action_dealloc(void)
493 {
494 #ifdef CONFIG_XEN_NETDEV_GRANT_TX
495 gnttab_unmap_grant_ref_t *gop;
496 #else
497 multicall_entry_t *mcl;
498 #endif
499 u16 pending_idx;
500 PEND_RING_IDX dc, dp;
501 netif_t *netif;
503 dc = dealloc_cons;
504 dp = dealloc_prod;
506 #ifdef CONFIG_XEN_NETDEV_GRANT_TX
507 /*
508 * Free up any grants we have finished using
509 */
510 gop = tx_unmap_ops;
511 while ( dc != dp )
512 {
513 pending_idx = dealloc_ring[MASK_PEND_IDX(dc++)];
514 gop->host_addr = MMAP_VADDR(pending_idx);
515 gop->dev_bus_addr = 0;
516 gop->handle = grant_tx_ref[pending_idx];
517 grant_tx_ref[pending_idx] = GRANT_INVALID_REF;
518 gop++;
519 }
520 BUG_ON(HYPERVISOR_grant_table_op(
521 GNTTABOP_unmap_grant_ref, tx_unmap_ops, gop - tx_unmap_ops));
522 #else
523 mcl = tx_mcl;
524 while ( dc != dp )
525 {
526 pending_idx = dealloc_ring[MASK_PEND_IDX(dc++)];
527 MULTI_update_va_mapping(mcl, MMAP_VADDR(pending_idx),
528 __pte(0), 0);
529 mcl++;
530 }
532 mcl[-1].args[MULTI_UVMFLAGS_INDEX] = UVMF_TLB_FLUSH|UVMF_ALL;
533 if ( unlikely(HYPERVISOR_multicall(tx_mcl, mcl - tx_mcl) != 0) )
534 BUG();
536 mcl = tx_mcl;
537 #endif
538 while ( dealloc_cons != dp )
539 {
540 #ifndef CONFIG_XEN_NETDEV_GRANT_TX
541 /* The update_va_mapping() must not fail. */
542 BUG_ON(mcl[0].result != 0);
543 #endif
545 pending_idx = dealloc_ring[MASK_PEND_IDX(dealloc_cons++)];
547 netif = pending_tx_info[pending_idx].netif;
549 make_tx_response(netif, pending_tx_info[pending_idx].req.id,
550 NETIF_RSP_OKAY);
552 pending_ring[MASK_PEND_IDX(pending_prod++)] = pending_idx;
554 /*
555 * Scheduling checks must happen after the above response is posted.
556 * This avoids a possible race with a guest OS on another CPU if that
557 * guest is testing against 'resp_prod' when deciding whether to notify
558 * us when it queues additional packets.
559 */
560 mb();
561 if ( (netif->tx_req_cons != netif->tx->req_prod) &&
562 ((netif->tx_req_cons-netif->tx_resp_prod) != NETIF_TX_RING_SIZE) )
563 add_to_net_schedule_list_tail(netif);
565 netif_put(netif);
567 #ifndef CONFIG_XEN_NETDEV_GRANT_TX
568 mcl++;
569 #endif
570 }
572 }
574 /* Called after netfront has transmitted */
575 static void net_tx_action(unsigned long unused)
576 {
577 struct list_head *ent;
578 struct sk_buff *skb;
579 netif_t *netif;
580 netif_tx_request_t txreq;
581 u16 pending_idx;
582 NETIF_RING_IDX i;
583 #ifdef CONFIG_XEN_NETDEV_GRANT_TX
584 gnttab_map_grant_ref_t *mop;
585 #else
586 multicall_entry_t *mcl;
587 #endif
588 unsigned int data_len;
590 if ( dealloc_cons != dealloc_prod )
591 net_tx_action_dealloc();
593 #ifdef CONFIG_XEN_NETDEV_GRANT_TX
594 mop = tx_map_ops;
595 #else
596 mcl = tx_mcl;
597 #endif
598 while ( (NR_PENDING_REQS < MAX_PENDING_REQS) &&
599 !list_empty(&net_schedule_list) )
600 {
601 /* Get a netif from the list with work to do. */
602 ent = net_schedule_list.next;
603 netif = list_entry(ent, netif_t, list);
604 netif_get(netif);
605 remove_from_net_schedule_list(netif);
607 /* Work to do? */
608 i = netif->tx_req_cons;
609 if ( (i == netif->tx->req_prod) ||
610 ((i-netif->tx_resp_prod) == NETIF_TX_RING_SIZE) )
611 {
612 netif_put(netif);
613 continue;
614 }
616 rmb(); /* Ensure that we see the request before we copy it. */
617 memcpy(&txreq, &netif->tx->ring[MASK_NETIF_TX_IDX(i)].req,
618 sizeof(txreq));
619 /* Credit-based scheduling. */
620 if ( txreq.size > netif->remaining_credit )
621 {
622 unsigned long now = jiffies;
623 unsigned long next_credit =
624 netif->credit_timeout.expires +
625 msecs_to_jiffies(netif->credit_usec / 1000);
627 /* Timer could already be pending in some rare cases. */
628 if ( timer_pending(&netif->credit_timeout) )
629 break;
631 /* Already passed the point at which we can replenish credit? */
632 if ( time_after_eq(now, next_credit) )
633 {
634 netif->credit_timeout.expires = now;
635 netif->remaining_credit = netif->credit_bytes;
636 }
638 /* Still too big to send right now? Then set a timer callback. */
639 if ( txreq.size > netif->remaining_credit )
640 {
641 netif->remaining_credit = 0;
642 netif->credit_timeout.expires = next_credit;
643 netif->credit_timeout.data = (unsigned long)netif;
644 netif->credit_timeout.function = tx_credit_callback;
645 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
646 add_timer_on(&netif->credit_timeout, smp_processor_id());
647 #else
648 add_timer(&netif->credit_timeout);
649 #endif
650 break;
651 }
652 }
653 netif->remaining_credit -= txreq.size;
655 /*
656 * Why the barrier? It ensures that the frontend sees updated req_cons
657 * before we check for more work to schedule.
658 */
659 netif->tx->req_cons = ++netif->tx_req_cons;
660 mb();
662 netif_schedule_work(netif);
664 if ( unlikely(txreq.size < ETH_HLEN) ||
665 unlikely(txreq.size > ETH_FRAME_LEN) )
666 {
667 DPRINTK("Bad packet size: %d\n", txreq.size);
668 make_tx_response(netif, txreq.id, NETIF_RSP_ERROR);
669 netif_put(netif);
670 continue;
671 }
673 /* No crossing a page boundary as the payload mustn't fragment. */
674 if ( unlikely(((txreq.addr & ~PAGE_MASK) + txreq.size) >= PAGE_SIZE) )
675 {
676 DPRINTK("txreq.addr: %lx, size: %u, end: %lu\n",
677 txreq.addr, txreq.size,
678 (txreq.addr &~PAGE_MASK) + txreq.size);
679 make_tx_response(netif, txreq.id, NETIF_RSP_ERROR);
680 netif_put(netif);
681 continue;
682 }
684 pending_idx = pending_ring[MASK_PEND_IDX(pending_cons)];
686 data_len = (txreq.size > PKT_PROT_LEN) ? PKT_PROT_LEN : txreq.size;
688 if ( unlikely((skb = alloc_skb(data_len+16, GFP_ATOMIC)) == NULL) )
689 {
690 DPRINTK("Can't allocate a skb in start_xmit.\n");
691 make_tx_response(netif, txreq.id, NETIF_RSP_ERROR);
692 netif_put(netif);
693 break;
694 }
696 /* Packets passed to netif_rx() must have some headroom. */
697 skb_reserve(skb, 16);
698 #ifdef CONFIG_XEN_NETDEV_GRANT_TX
699 mop->host_addr = MMAP_VADDR(pending_idx);
700 mop->dom = netif->domid;
701 mop->ref = txreq.addr >> PAGE_SHIFT;
702 mop->flags = GNTMAP_host_map | GNTMAP_readonly;
703 mop++;
704 #else
705 MULTI_update_va_mapping_otherdomain(
706 mcl, MMAP_VADDR(pending_idx),
707 pfn_pte_ma(txreq.addr >> PAGE_SHIFT, PAGE_KERNEL),
708 0, netif->domid);
710 mcl++;
711 #endif
713 memcpy(&pending_tx_info[pending_idx].req, &txreq, sizeof(txreq));
714 pending_tx_info[pending_idx].netif = netif;
715 *((u16 *)skb->data) = pending_idx;
717 __skb_queue_tail(&tx_queue, skb);
719 pending_cons++;
721 #ifdef CONFIG_XEN_NETDEV_GRANT_TX
722 if ( (mop - tx_map_ops) >= ARRAY_SIZE(tx_map_ops) )
723 break;
724 #else
725 /* Filled the batch queue? */
726 if ( (mcl - tx_mcl) == ARRAY_SIZE(tx_mcl) )
727 break;
728 #endif
729 }
731 #ifdef CONFIG_XEN_NETDEV_GRANT_TX
732 if ( mop == tx_map_ops )
733 return;
735 BUG_ON(HYPERVISOR_grant_table_op(
736 GNTTABOP_map_grant_ref, tx_map_ops, mop - tx_map_ops));
738 mop = tx_map_ops;
739 #else
740 if ( mcl == tx_mcl )
741 return;
743 BUG_ON(HYPERVISOR_multicall(tx_mcl, mcl - tx_mcl) != 0);
745 mcl = tx_mcl;
746 #endif
747 while ( (skb = __skb_dequeue(&tx_queue)) != NULL )
748 {
749 pending_idx = *((u16 *)skb->data);
750 netif = pending_tx_info[pending_idx].netif;
751 memcpy(&txreq, &pending_tx_info[pending_idx].req, sizeof(txreq));
753 /* Check the remap error code. */
754 #ifdef CONFIG_XEN_NETDEV_GRANT_TX
755 /*
756 XXX SMH: error returns from grant operations are pretty poorly
757 specified/thought out, but the below at least conforms with
758 what the rest of the code uses.
759 */
760 if ( unlikely(mop->handle < 0) )
761 {
762 printk(KERN_ALERT "#### netback grant fails\n");
763 make_tx_response(netif, txreq.id, NETIF_RSP_ERROR);
764 netif_put(netif);
765 kfree_skb(skb);
766 mop++;
767 pending_ring[MASK_PEND_IDX(pending_prod++)] = pending_idx;
768 continue;
769 }
770 phys_to_machine_mapping[__pa(MMAP_VADDR(pending_idx)) >> PAGE_SHIFT] =
771 FOREIGN_FRAME(mop->dev_bus_addr >> PAGE_SHIFT);
772 grant_tx_ref[pending_idx] = mop->handle;
773 #else
774 if ( unlikely(mcl[0].result != 0) )
775 {
776 DPRINTK("Bad page frame\n");
777 make_tx_response(netif, txreq.id, NETIF_RSP_ERROR);
778 netif_put(netif);
779 kfree_skb(skb);
780 mcl++;
781 pending_ring[MASK_PEND_IDX(pending_prod++)] = pending_idx;
782 continue;
783 }
785 phys_to_machine_mapping[__pa(MMAP_VADDR(pending_idx)) >> PAGE_SHIFT] =
786 FOREIGN_FRAME(txreq.addr >> PAGE_SHIFT);
787 #endif
789 data_len = (txreq.size > PKT_PROT_LEN) ? PKT_PROT_LEN : txreq.size;
791 __skb_put(skb, data_len);
792 memcpy(skb->data,
793 (void *)(MMAP_VADDR(pending_idx)|(txreq.addr&~PAGE_MASK)),
794 data_len);
795 if ( data_len < txreq.size )
796 {
797 /* Append the packet payload as a fragment. */
798 skb_shinfo(skb)->frags[0].page =
799 virt_to_page(MMAP_VADDR(pending_idx));
800 skb_shinfo(skb)->frags[0].size = txreq.size - data_len;
801 skb_shinfo(skb)->frags[0].page_offset =
802 (txreq.addr + data_len) & ~PAGE_MASK;
803 skb_shinfo(skb)->nr_frags = 1;
804 }
805 else
806 {
807 /* Schedule a response immediately. */
808 netif_idx_release(pending_idx);
809 }
811 skb->data_len = txreq.size - data_len;
812 skb->len += skb->data_len;
814 skb->dev = netif->dev;
815 skb->protocol = eth_type_trans(skb, skb->dev);
817 /* No checking needed on localhost, but remember the field is blank. */
818 skb->ip_summed = CHECKSUM_UNNECESSARY;
819 skb->proto_csum_valid = 1;
820 skb->proto_csum_blank = txreq.csum_blank;
822 netif->stats.rx_bytes += txreq.size;
823 netif->stats.rx_packets++;
825 netif_rx(skb);
826 netif->dev->last_rx = jiffies;
828 #ifdef CONFIG_XEN_NETDEV_GRANT_TX
829 mop++;
830 #else
831 mcl++;
832 #endif
833 }
834 }
836 static void netif_idx_release(u16 pending_idx)
837 {
838 static spinlock_t _lock = SPIN_LOCK_UNLOCKED;
839 unsigned long flags;
841 spin_lock_irqsave(&_lock, flags);
842 dealloc_ring[MASK_PEND_IDX(dealloc_prod++)] = pending_idx;
843 spin_unlock_irqrestore(&_lock, flags);
845 tasklet_schedule(&net_tx_tasklet);
846 }
848 static void netif_page_release(struct page *page)
849 {
850 u16 pending_idx = page - virt_to_page(mmap_vstart);
852 /* Ready for next use. */
853 set_page_count(page, 1);
855 netif_idx_release(pending_idx);
856 }
858 irqreturn_t netif_be_int(int irq, void *dev_id, struct pt_regs *regs)
859 {
860 netif_t *netif = dev_id;
861 if ( tx_work_exists(netif) )
862 {
863 add_to_net_schedule_list_tail(netif);
864 maybe_schedule_tx_action();
865 }
866 return IRQ_HANDLED;
867 }
869 static void make_tx_response(netif_t *netif,
870 u16 id,
871 s8 st)
872 {
873 NETIF_RING_IDX i = netif->tx_resp_prod;
874 netif_tx_response_t *resp;
876 resp = &netif->tx->ring[MASK_NETIF_TX_IDX(i)].resp;
877 resp->id = id;
878 resp->status = st;
879 wmb();
880 netif->tx->resp_prod = netif->tx_resp_prod = ++i;
882 mb(); /* Update producer before checking event threshold. */
883 if ( i == netif->tx->event )
884 notify_via_evtchn(netif->evtchn);
885 }
887 static int make_rx_response(netif_t *netif,
888 u16 id,
889 s8 st,
890 unsigned long addr,
891 u16 size,
892 u16 csum_valid)
893 {
894 NETIF_RING_IDX i = netif->rx_resp_prod;
895 netif_rx_response_t *resp;
897 resp = &netif->rx->ring[MASK_NETIF_RX_IDX(i)].resp;
898 resp->addr = addr;
899 resp->csum_valid = csum_valid;
900 resp->id = id;
901 resp->status = (s16)size;
902 if ( st < 0 )
903 resp->status = (s16)st;
904 wmb();
905 netif->rx->resp_prod = netif->rx_resp_prod = ++i;
907 mb(); /* Update producer before checking event threshold. */
908 return (i == netif->rx->event);
909 }
911 static irqreturn_t netif_be_dbg(int irq, void *dev_id, struct pt_regs *regs)
912 {
913 struct list_head *ent;
914 netif_t *netif;
915 int i = 0;
917 printk(KERN_ALERT "netif_schedule_list:\n");
918 spin_lock_irq(&net_schedule_list_lock);
920 list_for_each ( ent, &net_schedule_list )
921 {
922 netif = list_entry(ent, netif_t, list);
923 printk(KERN_ALERT " %d: private(rx_req_cons=%08x rx_resp_prod=%08x\n",
924 i, netif->rx_req_cons, netif->rx_resp_prod);
925 printk(KERN_ALERT " tx_req_cons=%08x tx_resp_prod=%08x)\n",
926 netif->tx_req_cons, netif->tx_resp_prod);
927 printk(KERN_ALERT " shared(rx_req_prod=%08x rx_resp_prod=%08x\n",
928 netif->rx->req_prod, netif->rx->resp_prod);
929 printk(KERN_ALERT " rx_event=%08x tx_req_prod=%08x\n",
930 netif->rx->event, netif->tx->req_prod);
931 printk(KERN_ALERT " tx_resp_prod=%08x, tx_event=%08x)\n",
932 netif->tx->resp_prod, netif->tx->event);
933 i++;
934 }
936 spin_unlock_irq(&net_schedule_list_lock);
937 printk(KERN_ALERT " ** End of netif_schedule_list **\n");
939 return IRQ_HANDLED;
940 }
942 static int __init netback_init(void)
943 {
944 int i;
945 struct page *page;
947 if ( !(xen_start_info.flags & SIF_NET_BE_DOMAIN) &&
948 !(xen_start_info.flags & SIF_INITDOMAIN) )
949 return 0;
951 printk("Initialising Xen netif backend\n");
952 #ifdef CONFIG_XEN_NETDEV_GRANT_TX
953 printk("#### netback tx using grant tables\n");
954 #endif
955 #ifdef CONFIG_XEN_NETDEV_GRANT_RX
956 printk("#### netback rx using grant tables\n");
957 #endif
959 /* We can increase reservation by this much in net_rx_action(). */
960 balloon_update_driver_allowance(NETIF_RX_RING_SIZE);
962 skb_queue_head_init(&rx_queue);
963 skb_queue_head_init(&tx_queue);
965 init_timer(&net_timer);
966 net_timer.data = 0;
967 net_timer.function = net_alarm;
969 netif_interface_init();
971 page = balloon_alloc_empty_page_range(MAX_PENDING_REQS);
972 BUG_ON(page == NULL);
973 mmap_vstart = (unsigned long)pfn_to_kaddr(page_to_pfn(page));
975 for ( i = 0; i < MAX_PENDING_REQS; i++ )
976 {
977 page = virt_to_page(MMAP_VADDR(i));
978 set_page_count(page, 1);
979 SetPageForeign(page, netif_page_release);
980 }
982 pending_cons = 0;
983 pending_prod = MAX_PENDING_REQS;
984 for ( i = 0; i < MAX_PENDING_REQS; i++ )
985 pending_ring[i] = i;
987 spin_lock_init(&net_schedule_list_lock);
988 INIT_LIST_HEAD(&net_schedule_list);
990 netif_xenbus_init();
992 (void)request_irq(bind_virq_to_irq(VIRQ_DEBUG),
993 netif_be_dbg, SA_SHIRQ,
994 "net-be-dbg", &netif_be_dbg);
996 return 0;
997 }
999 static void netback_cleanup(void)
1001 BUG();
1004 module_init(netback_init);
1005 module_exit(netback_cleanup);