ia64/xen-unstable

view linux-2.6-xen-sparse/drivers/xen/netback/netback.c @ 6699:006efe128837

Yet more PAE fixes, this time in the paravirt drivers.
Signed-off-by: ian@xensource.com
author iap10@freefall.cl.cam.ac.uk
date Thu Sep 08 01:00:04 2005 +0000 (2005-09-08)
parents 1f460d0fd6c6
children 2704a88c3295 cdfa7dd00c44
line source
1 /******************************************************************************
2 * drivers/xen/netback/netback.c
3 *
4 * Back-end of the driver for virtual network devices. This portion of the
5 * driver exports a 'unified' network-device interface that can be accessed
6 * by any operating system that implements a compatible front end. A
7 * reference front-end implementation can be found in:
8 * drivers/xen/netfront/netfront.c
9 *
10 * Copyright (c) 2002-2005, K A Fraser
11 */
13 #include "common.h"
14 #include <asm-xen/balloon.h>
15 #include <asm-xen/xen-public/memory.h>
18 static void netif_idx_release(u16 pending_idx);
19 static void netif_page_release(struct page *page);
20 static void make_tx_response(netif_t *netif,
21 u16 id,
22 s8 st);
23 static int make_rx_response(netif_t *netif,
24 u16 id,
25 s8 st,
26 unsigned long addr,
27 u16 size,
28 u16 csum_valid);
30 static void net_tx_action(unsigned long unused);
31 static DECLARE_TASKLET(net_tx_tasklet, net_tx_action, 0);
33 static void net_rx_action(unsigned long unused);
34 static DECLARE_TASKLET(net_rx_tasklet, net_rx_action, 0);
36 static struct timer_list net_timer;
38 #define MAX_PENDING_REQS 256
40 static struct sk_buff_head rx_queue;
41 static multicall_entry_t rx_mcl[NETIF_RX_RING_SIZE*2+1];
42 static mmu_update_t rx_mmu[NETIF_RX_RING_SIZE];
44 #ifdef CONFIG_XEN_NETDEV_GRANT
45 static gnttab_donate_t grant_rx_op[MAX_PENDING_REQS];
46 #else
47 static struct mmuext_op rx_mmuext[NETIF_RX_RING_SIZE];
48 #endif
49 static unsigned char rx_notify[NR_EVENT_CHANNELS];
51 /* Don't currently gate addition of an interface to the tx scheduling list. */
52 #define tx_work_exists(_if) (1)
54 static unsigned long mmap_vstart;
55 #define MMAP_VADDR(_req) (mmap_vstart + ((_req) * PAGE_SIZE))
57 #define PKT_PROT_LEN 64
59 static struct {
60 netif_tx_request_t req;
61 netif_t *netif;
62 } pending_tx_info[MAX_PENDING_REQS];
63 static u16 pending_ring[MAX_PENDING_REQS];
64 typedef unsigned int PEND_RING_IDX;
65 #define MASK_PEND_IDX(_i) ((_i)&(MAX_PENDING_REQS-1))
66 static PEND_RING_IDX pending_prod, pending_cons;
67 #define NR_PENDING_REQS (MAX_PENDING_REQS - pending_prod + pending_cons)
69 /* Freed TX SKBs get batched on this ring before return to pending_ring. */
70 static u16 dealloc_ring[MAX_PENDING_REQS];
71 static PEND_RING_IDX dealloc_prod, dealloc_cons;
73 static struct sk_buff_head tx_queue;
75 #ifdef CONFIG_XEN_NETDEV_GRANT
76 static u16 grant_tx_ref[MAX_PENDING_REQS];
77 static gnttab_unmap_grant_ref_t tx_unmap_ops[MAX_PENDING_REQS];
78 static gnttab_map_grant_ref_t tx_map_ops[MAX_PENDING_REQS];
80 #else
81 static multicall_entry_t tx_mcl[MAX_PENDING_REQS];
82 #endif
84 static struct list_head net_schedule_list;
85 static spinlock_t net_schedule_list_lock;
87 #define MAX_MFN_ALLOC 64
88 static unsigned long mfn_list[MAX_MFN_ALLOC];
89 static unsigned int alloc_index = 0;
90 static spinlock_t mfn_lock = SPIN_LOCK_UNLOCKED;
92 static unsigned long alloc_mfn(void)
93 {
94 unsigned long mfn = 0, flags;
95 struct xen_memory_reservation reservation = {
96 .extent_start = mfn_list,
97 .nr_extents = MAX_MFN_ALLOC,
98 .extent_order = 0,
99 .domid = DOMID_SELF
100 };
101 spin_lock_irqsave(&mfn_lock, flags);
102 if ( unlikely(alloc_index == 0) )
103 alloc_index = HYPERVISOR_memory_op(
104 XENMEM_increase_reservation, &reservation);
105 if ( alloc_index != 0 )
106 mfn = mfn_list[--alloc_index];
107 spin_unlock_irqrestore(&mfn_lock, flags);
108 return mfn;
109 }
111 #ifndef CONFIG_XEN_NETDEV_GRANT
112 static void free_mfn(unsigned long mfn)
113 {
114 unsigned long flags;
115 struct xen_memory_reservation reservation = {
116 .extent_start = &mfn,
117 .nr_extents = 1,
118 .extent_order = 0,
119 .domid = DOMID_SELF
120 };
121 spin_lock_irqsave(&mfn_lock, flags);
122 if ( alloc_index != MAX_MFN_ALLOC )
123 mfn_list[alloc_index++] = mfn;
124 else if ( HYPERVISOR_memory_op(XENMEM_decrease_reservation, &reservation)
125 != 1 )
126 BUG();
127 spin_unlock_irqrestore(&mfn_lock, flags);
128 }
129 #endif
131 static inline void maybe_schedule_tx_action(void)
132 {
133 smp_mb();
134 if ( (NR_PENDING_REQS < (MAX_PENDING_REQS/2)) &&
135 !list_empty(&net_schedule_list) )
136 tasklet_schedule(&net_tx_tasklet);
137 }
139 /*
140 * A gross way of confirming the origin of an skb data page. The slab
141 * allocator abuses a field in the page struct to cache the kmem_cache_t ptr.
142 */
143 static inline int is_xen_skb(struct sk_buff *skb)
144 {
145 extern kmem_cache_t *skbuff_cachep;
146 kmem_cache_t *cp = (kmem_cache_t *)virt_to_page(skb->head)->lru.next;
147 return (cp == skbuff_cachep);
148 }
150 int netif_be_start_xmit(struct sk_buff *skb, struct net_device *dev)
151 {
152 netif_t *netif = netdev_priv(dev);
154 ASSERT(skb->dev == dev);
156 /* Drop the packet if the target domain has no receive buffers. */
157 if ( !netif->active ||
158 (netif->rx_req_cons == netif->rx->req_prod) ||
159 ((netif->rx_req_cons-netif->rx_resp_prod) == NETIF_RX_RING_SIZE) )
160 goto drop;
162 /*
163 * We do not copy the packet unless:
164 * 1. The data is shared; or
165 * 2. The data is not allocated from our special cache.
166 * NB. We also couldn't cope with fragmented packets, but we won't get
167 * any because we not advertise the NETIF_F_SG feature.
168 */
169 if ( skb_shared(skb) || skb_cloned(skb) || !is_xen_skb(skb) )
170 {
171 int hlen = skb->data - skb->head;
172 struct sk_buff *nskb = dev_alloc_skb(hlen + skb->len);
173 if ( unlikely(nskb == NULL) )
174 goto drop;
175 skb_reserve(nskb, hlen);
176 __skb_put(nskb, skb->len);
177 if (skb_copy_bits(skb, -hlen, nskb->data - hlen, skb->len + hlen))
178 BUG();
179 nskb->dev = skb->dev;
180 nskb->proto_csum_valid = skb->proto_csum_valid;
181 dev_kfree_skb(skb);
182 skb = nskb;
183 }
184 #ifdef CONFIG_XEN_NETDEV_GRANT
185 #ifdef DEBUG_GRANT
186 printk(KERN_ALERT "#### be_xmit: req_prod=%d req_cons=%d id=%04x gr=%04x\n",
187 netif->rx->req_prod,
188 netif->rx_req_cons,
189 netif->rx->ring[
190 MASK_NETIF_RX_IDX(netif->rx_req_cons)].req.id,
191 netif->rx->ring[
192 MASK_NETIF_RX_IDX(netif->rx_req_cons)].req.gref);
193 #endif
194 #endif
195 netif->rx_req_cons++;
196 netif_get(netif);
198 skb_queue_tail(&rx_queue, skb);
199 tasklet_schedule(&net_rx_tasklet);
201 return 0;
203 drop:
204 netif->stats.tx_dropped++;
205 dev_kfree_skb(skb);
206 return 0;
207 }
209 #if 0
210 static void xen_network_done_notify(void)
211 {
212 static struct net_device *eth0_dev = NULL;
213 if ( unlikely(eth0_dev == NULL) )
214 eth0_dev = __dev_get_by_name("eth0");
215 netif_rx_schedule(eth0_dev);
216 }
217 /*
218 * Add following to poll() function in NAPI driver (Tigon3 is example):
219 * if ( xen_network_done() )
220 * tg3_enable_ints(tp);
221 */
222 int xen_network_done(void)
223 {
224 return skb_queue_empty(&rx_queue);
225 }
226 #endif
228 static void net_rx_action(unsigned long unused)
229 {
230 netif_t *netif = NULL;
231 s8 status;
232 u16 size, id, evtchn;
233 multicall_entry_t *mcl;
234 mmu_update_t *mmu;
235 #ifdef CONFIG_XEN_NETDEV_GRANT
236 gnttab_donate_t *gop;
237 #else
238 struct mmuext_op *mmuext;
239 #endif
240 unsigned long vdata, old_mfn, new_mfn;
241 struct sk_buff_head rxq;
242 struct sk_buff *skb;
243 u16 notify_list[NETIF_RX_RING_SIZE];
244 int notify_nr = 0;
246 skb_queue_head_init(&rxq);
248 mcl = rx_mcl;
249 mmu = rx_mmu;
250 #ifdef CONFIG_XEN_NETDEV_GRANT
251 gop = grant_rx_op;
252 #else
253 mmuext = rx_mmuext;
254 #endif
256 while ( (skb = skb_dequeue(&rx_queue)) != NULL )
257 {
258 netif = netdev_priv(skb->dev);
259 vdata = (unsigned long)skb->data;
260 old_mfn = virt_to_mfn(vdata);
262 /* Memory squeeze? Back off for an arbitrary while. */
263 if ( (new_mfn = alloc_mfn()) == 0 )
264 {
265 if ( net_ratelimit() )
266 WPRINTK("Memory squeeze in netback driver.\n");
267 mod_timer(&net_timer, jiffies + HZ);
268 skb_queue_head(&rx_queue, skb);
269 break;
270 }
271 /*
272 * Set the new P2M table entry before reassigning the old data page.
273 * Heed the comment in pgtable-2level.h:pte_page(). :-)
274 */
275 phys_to_machine_mapping[__pa(skb->data) >> PAGE_SHIFT] = new_mfn;
277 MULTI_update_va_mapping(mcl, vdata,
278 pfn_pte_ma(new_mfn, PAGE_KERNEL), 0);
279 mcl++;
281 #ifdef CONFIG_XEN_NETDEV_GRANT
282 gop->mfn = old_mfn;
283 gop->domid = netif->domid;
284 gop->handle = netif->rx->ring[
285 MASK_NETIF_RX_IDX(netif->rx_resp_prod_copy)].req.gref;
286 netif->rx_resp_prod_copy++;
287 gop++;
288 #else
289 mcl->op = __HYPERVISOR_mmuext_op;
290 mcl->args[0] = (unsigned long)mmuext;
291 mcl->args[1] = 1;
292 mcl->args[2] = 0;
293 mcl->args[3] = netif->domid;
294 mcl++;
296 mmuext->cmd = MMUEXT_REASSIGN_PAGE;
297 mmuext->mfn = old_mfn;
298 mmuext++;
299 #endif
300 mmu->ptr = ((unsigned long long)new_mfn << PAGE_SHIFT) | MMU_MACHPHYS_UPDATE;
301 mmu->val = __pa(vdata) >> PAGE_SHIFT;
302 mmu++;
304 __skb_queue_tail(&rxq, skb);
306 #ifdef DEBUG_GRANT
307 dump_packet('a', old_mfn, vdata);
308 #endif
309 /* Filled the batch queue? */
310 if ( (mcl - rx_mcl) == ARRAY_SIZE(rx_mcl) )
311 break;
312 }
314 if ( mcl == rx_mcl )
315 return;
317 mcl->op = __HYPERVISOR_mmu_update;
318 mcl->args[0] = (unsigned long)rx_mmu;
319 mcl->args[1] = mmu - rx_mmu;
320 mcl->args[2] = 0;
321 mcl->args[3] = DOMID_SELF;
322 mcl++;
324 #ifdef CONFIG_XEN_NETDEV_GRANT
325 mcl[-2].args[MULTI_UVMFLAGS_INDEX] = UVMF_TLB_FLUSH|UVMF_ALL;
326 #else
327 mcl[-3].args[MULTI_UVMFLAGS_INDEX] = UVMF_TLB_FLUSH|UVMF_ALL;
328 #endif
329 if ( unlikely(HYPERVISOR_multicall(rx_mcl, mcl - rx_mcl) != 0) )
330 BUG();
332 mcl = rx_mcl;
333 #ifdef CONFIG_XEN_NETDEV_GRANT
334 if(HYPERVISOR_grant_table_op(GNTTABOP_donate, grant_rx_op,
335 gop - grant_rx_op)) {
336 /*
337 ** The other side has given us a bad grant ref, or has no headroom,
338 ** or has gone away. Unfortunately the current grant table code
339 ** doesn't inform us which is the case, so not much we can do.
340 */
341 DPRINTK("net_rx: donate to DOM%u failed; dropping (up to) %d "
342 "packets.\n", grant_rx_op[0].domid, gop - grant_rx_op);
343 }
344 gop = grant_rx_op;
345 #else
346 mmuext = rx_mmuext;
347 #endif
348 while ( (skb = __skb_dequeue(&rxq)) != NULL )
349 {
350 netif = netdev_priv(skb->dev);
351 size = skb->tail - skb->data;
353 /* Rederive the machine addresses. */
354 new_mfn = mcl[0].args[1] >> PAGE_SHIFT;
355 #ifdef CONFIG_XEN_NETDEV_GRANT
356 old_mfn = 0; /* XXX Fix this so we can free_mfn() on error! */
357 #else
358 old_mfn = mmuext[0].mfn;
359 #endif
360 atomic_set(&(skb_shinfo(skb)->dataref), 1);
361 skb_shinfo(skb)->nr_frags = 0;
362 skb_shinfo(skb)->frag_list = NULL;
364 netif->stats.tx_bytes += size;
365 netif->stats.tx_packets++;
367 /* The update_va_mapping() must not fail. */
368 BUG_ON(mcl[0].result != 0);
370 /* Check the reassignment error code. */
371 status = NETIF_RSP_OKAY;
372 #ifdef CONFIG_XEN_NETDEV_GRANT
373 if(gop->status != 0) {
374 DPRINTK("Bad status %d from grant donate to DOM%u\n",
375 gop->status, netif->domid);
376 /* XXX SMH: should free 'old_mfn' here */
377 status = NETIF_RSP_ERROR;
378 }
379 #else
380 if ( unlikely(mcl[1].result != 0) )
381 {
382 DPRINTK("Failed MMU update transferring to DOM%u\n", netif->domid);
383 free_mfn(old_mfn);
384 status = NETIF_RSP_ERROR;
385 }
386 #endif
387 evtchn = netif->evtchn;
388 id = netif->rx->ring[MASK_NETIF_RX_IDX(netif->rx_resp_prod)].req.id;
389 if ( make_rx_response(netif, id, status,
390 (old_mfn << PAGE_SHIFT) | /* XXX */
391 ((unsigned long)skb->data & ~PAGE_MASK),
392 size, skb->proto_csum_valid) &&
393 (rx_notify[evtchn] == 0) )
394 {
395 rx_notify[evtchn] = 1;
396 notify_list[notify_nr++] = evtchn;
397 }
399 netif_put(netif);
400 dev_kfree_skb(skb);
401 #ifdef CONFIG_XEN_NETDEV_GRANT
402 mcl++;
403 gop++;
404 #else
405 mcl += 2;
406 mmuext += 1;
407 #endif
408 }
410 while ( notify_nr != 0 )
411 {
412 evtchn = notify_list[--notify_nr];
413 rx_notify[evtchn] = 0;
414 notify_via_evtchn(evtchn);
415 }
417 out:
418 /* More work to do? */
419 if ( !skb_queue_empty(&rx_queue) && !timer_pending(&net_timer) )
420 tasklet_schedule(&net_rx_tasklet);
421 #if 0
422 else
423 xen_network_done_notify();
424 #endif
425 }
427 static void net_alarm(unsigned long unused)
428 {
429 tasklet_schedule(&net_rx_tasklet);
430 }
432 struct net_device_stats *netif_be_get_stats(struct net_device *dev)
433 {
434 netif_t *netif = netdev_priv(dev);
435 return &netif->stats;
436 }
438 static int __on_net_schedule_list(netif_t *netif)
439 {
440 return netif->list.next != NULL;
441 }
443 static void remove_from_net_schedule_list(netif_t *netif)
444 {
445 spin_lock_irq(&net_schedule_list_lock);
446 if ( likely(__on_net_schedule_list(netif)) )
447 {
448 list_del(&netif->list);
449 netif->list.next = NULL;
450 netif_put(netif);
451 }
452 spin_unlock_irq(&net_schedule_list_lock);
453 }
455 static void add_to_net_schedule_list_tail(netif_t *netif)
456 {
457 if ( __on_net_schedule_list(netif) )
458 return;
460 spin_lock_irq(&net_schedule_list_lock);
461 if ( !__on_net_schedule_list(netif) && netif->active )
462 {
463 list_add_tail(&netif->list, &net_schedule_list);
464 netif_get(netif);
465 }
466 spin_unlock_irq(&net_schedule_list_lock);
467 }
469 void netif_schedule_work(netif_t *netif)
470 {
471 if ( (netif->tx_req_cons != netif->tx->req_prod) &&
472 ((netif->tx_req_cons-netif->tx_resp_prod) != NETIF_TX_RING_SIZE) )
473 {
474 add_to_net_schedule_list_tail(netif);
475 maybe_schedule_tx_action();
476 }
477 }
479 void netif_deschedule_work(netif_t *netif)
480 {
481 remove_from_net_schedule_list(netif);
482 }
485 static void tx_credit_callback(unsigned long data)
486 {
487 netif_t *netif = (netif_t *)data;
488 netif->remaining_credit = netif->credit_bytes;
489 netif_schedule_work(netif);
490 }
492 inline static void net_tx_action_dealloc(void)
493 {
494 #ifdef CONFIG_XEN_NETDEV_GRANT
495 gnttab_unmap_grant_ref_t *gop;
496 #else
497 multicall_entry_t *mcl;
498 #endif
499 u16 pending_idx;
500 PEND_RING_IDX dc, dp;
501 netif_t *netif;
503 dc = dealloc_cons;
504 dp = dealloc_prod;
506 #ifdef CONFIG_XEN_NETDEV_GRANT
507 /*
508 * Free up any grants we have finished using
509 */
510 gop = tx_unmap_ops;
511 while ( dc != dp )
512 {
513 pending_idx = dealloc_ring[MASK_PEND_IDX(dc++)];
514 gop->host_addr = MMAP_VADDR(pending_idx);
515 gop->dev_bus_addr = 0;
516 gop->handle = grant_tx_ref[pending_idx];
517 grant_tx_ref[pending_idx] = GRANT_INVALID_REF;
518 gop++;
519 }
520 BUG_ON(HYPERVISOR_grant_table_op(
521 GNTTABOP_unmap_grant_ref, tx_unmap_ops, gop - tx_unmap_ops));
522 #else
523 mcl = tx_mcl;
524 while ( dc != dp )
525 {
526 pending_idx = dealloc_ring[MASK_PEND_IDX(dc++)];
527 MULTI_update_va_mapping(mcl, MMAP_VADDR(pending_idx),
528 __pte(0), 0);
529 mcl++;
530 }
532 mcl[-1].args[MULTI_UVMFLAGS_INDEX] = UVMF_TLB_FLUSH|UVMF_ALL;
533 if ( unlikely(HYPERVISOR_multicall(tx_mcl, mcl - tx_mcl) != 0) )
534 BUG();
536 mcl = tx_mcl;
537 #endif
538 while ( dealloc_cons != dp )
539 {
540 #ifndef CONFIG_XEN_NETDEV_GRANT
541 /* The update_va_mapping() must not fail. */
542 BUG_ON(mcl[0].result != 0);
543 #endif
545 pending_idx = dealloc_ring[MASK_PEND_IDX(dealloc_cons++)];
547 netif = pending_tx_info[pending_idx].netif;
549 make_tx_response(netif, pending_tx_info[pending_idx].req.id,
550 NETIF_RSP_OKAY);
552 pending_ring[MASK_PEND_IDX(pending_prod++)] = pending_idx;
554 /*
555 * Scheduling checks must happen after the above response is posted.
556 * This avoids a possible race with a guest OS on another CPU if that
557 * guest is testing against 'resp_prod' when deciding whether to notify
558 * us when it queues additional packets.
559 */
560 mb();
561 if ( (netif->tx_req_cons != netif->tx->req_prod) &&
562 ((netif->tx_req_cons-netif->tx_resp_prod) != NETIF_TX_RING_SIZE) )
563 add_to_net_schedule_list_tail(netif);
565 netif_put(netif);
567 #ifndef CONFIG_XEN_NETDEV_GRANT
568 mcl++;
569 #endif
570 }
572 }
574 /* Called after netfront has transmitted */
575 static void net_tx_action(unsigned long unused)
576 {
577 struct list_head *ent;
578 struct sk_buff *skb;
579 netif_t *netif;
580 netif_tx_request_t txreq;
581 u16 pending_idx;
582 NETIF_RING_IDX i;
583 #ifdef CONFIG_XEN_NETDEV_GRANT
584 gnttab_map_grant_ref_t *mop;
585 #else
586 multicall_entry_t *mcl;
587 #endif
588 unsigned int data_len;
590 if ( dealloc_cons != dealloc_prod )
591 net_tx_action_dealloc();
593 #ifdef CONFIG_XEN_NETDEV_GRANT
594 mop = tx_map_ops;
595 #else
596 mcl = tx_mcl;
597 #endif
598 while ( (NR_PENDING_REQS < MAX_PENDING_REQS) &&
599 !list_empty(&net_schedule_list) )
600 {
601 /* Get a netif from the list with work to do. */
602 ent = net_schedule_list.next;
603 netif = list_entry(ent, netif_t, list);
604 netif_get(netif);
605 remove_from_net_schedule_list(netif);
607 /* Work to do? */
608 i = netif->tx_req_cons;
609 if ( (i == netif->tx->req_prod) ||
610 ((i-netif->tx_resp_prod) == NETIF_TX_RING_SIZE) )
611 {
612 netif_put(netif);
613 continue;
614 }
616 rmb(); /* Ensure that we see the request before we copy it. */
617 memcpy(&txreq, &netif->tx->ring[MASK_NETIF_TX_IDX(i)].req,
618 sizeof(txreq));
619 /* Credit-based scheduling. */
620 if ( txreq.size > netif->remaining_credit )
621 {
622 unsigned long now = jiffies;
623 unsigned long next_credit =
624 netif->credit_timeout.expires +
625 msecs_to_jiffies(netif->credit_usec / 1000);
627 /* Timer could already be pending in some rare cases. */
628 if ( timer_pending(&netif->credit_timeout) )
629 break;
631 /* Already passed the point at which we can replenish credit? */
632 if ( time_after_eq(now, next_credit) )
633 {
634 netif->credit_timeout.expires = now;
635 netif->remaining_credit = netif->credit_bytes;
636 }
638 /* Still too big to send right now? Then set a timer callback. */
639 if ( txreq.size > netif->remaining_credit )
640 {
641 netif->remaining_credit = 0;
642 netif->credit_timeout.expires = next_credit;
643 netif->credit_timeout.data = (unsigned long)netif;
644 netif->credit_timeout.function = tx_credit_callback;
645 add_timer_on(&netif->credit_timeout, smp_processor_id());
646 break;
647 }
648 }
649 netif->remaining_credit -= txreq.size;
651 /*
652 * Why the barrier? It ensures that the frontend sees updated req_cons
653 * before we check for more work to schedule.
654 */
655 netif->tx->req_cons = ++netif->tx_req_cons;
656 mb();
658 netif_schedule_work(netif);
660 if ( unlikely(txreq.size < ETH_HLEN) ||
661 unlikely(txreq.size > ETH_FRAME_LEN) )
662 {
663 DPRINTK("Bad packet size: %d\n", txreq.size);
664 make_tx_response(netif, txreq.id, NETIF_RSP_ERROR);
665 netif_put(netif);
666 continue;
667 }
669 /* No crossing a page boundary as the payload mustn't fragment. */
670 if ( unlikely(((txreq.addr & ~PAGE_MASK) + txreq.size) >= PAGE_SIZE) )
671 {
672 DPRINTK("txreq.addr: %lx, size: %u, end: %lu\n",
673 txreq.addr, txreq.size,
674 (txreq.addr &~PAGE_MASK) + txreq.size);
675 make_tx_response(netif, txreq.id, NETIF_RSP_ERROR);
676 netif_put(netif);
677 continue;
678 }
680 pending_idx = pending_ring[MASK_PEND_IDX(pending_cons)];
682 data_len = (txreq.size > PKT_PROT_LEN) ? PKT_PROT_LEN : txreq.size;
684 if ( unlikely((skb = alloc_skb(data_len+16, GFP_ATOMIC)) == NULL) )
685 {
686 DPRINTK("Can't allocate a skb in start_xmit.\n");
687 make_tx_response(netif, txreq.id, NETIF_RSP_ERROR);
688 netif_put(netif);
689 break;
690 }
692 /* Packets passed to netif_rx() must have some headroom. */
693 skb_reserve(skb, 16);
694 #ifdef CONFIG_XEN_NETDEV_GRANT
695 mop->host_addr = MMAP_VADDR(pending_idx);
696 mop->dom = netif->domid;
697 mop->ref = txreq.addr >> PAGE_SHIFT;
698 mop->flags = GNTMAP_host_map | GNTMAP_readonly;
699 mop++;
700 #else
701 MULTI_update_va_mapping_otherdomain(
702 mcl, MMAP_VADDR(pending_idx),
703 pfn_pte_ma(txreq.addr >> PAGE_SHIFT, PAGE_KERNEL),
704 0, netif->domid);
706 mcl++;
707 #endif
709 memcpy(&pending_tx_info[pending_idx].req, &txreq, sizeof(txreq));
710 pending_tx_info[pending_idx].netif = netif;
711 *((u16 *)skb->data) = pending_idx;
713 __skb_queue_tail(&tx_queue, skb);
715 pending_cons++;
717 #ifdef CONFIG_XEN_NETDEV_GRANT
718 if ( (mop - tx_map_ops) >= ARRAY_SIZE(tx_map_ops) )
719 break;
720 #else
721 /* Filled the batch queue? */
722 if ( (mcl - tx_mcl) == ARRAY_SIZE(tx_mcl) )
723 break;
724 #endif
725 }
727 #ifdef CONFIG_XEN_NETDEV_GRANT
728 if ( mop == tx_map_ops )
729 return;
731 BUG_ON(HYPERVISOR_grant_table_op(
732 GNTTABOP_map_grant_ref, tx_map_ops, mop - tx_map_ops));
734 mop = tx_map_ops;
735 #else
736 if ( mcl == tx_mcl )
737 return;
739 BUG_ON(HYPERVISOR_multicall(tx_mcl, mcl - tx_mcl) != 0);
741 mcl = tx_mcl;
742 #endif
743 while ( (skb = __skb_dequeue(&tx_queue)) != NULL )
744 {
745 pending_idx = *((u16 *)skb->data);
746 netif = pending_tx_info[pending_idx].netif;
747 memcpy(&txreq, &pending_tx_info[pending_idx].req, sizeof(txreq));
749 /* Check the remap error code. */
750 #ifdef CONFIG_XEN_NETDEV_GRANT
751 /*
752 XXX SMH: error returns from grant operations are pretty poorly
753 specified/thought out, but the below at least conforms with
754 what the rest of the code uses.
755 */
756 if ( unlikely(mop->handle < 0) )
757 {
758 printk(KERN_ALERT "#### netback grant fails\n");
759 make_tx_response(netif, txreq.id, NETIF_RSP_ERROR);
760 netif_put(netif);
761 kfree_skb(skb);
762 mop++;
763 pending_ring[MASK_PEND_IDX(pending_prod++)] = pending_idx;
764 continue;
765 }
766 phys_to_machine_mapping[__pa(MMAP_VADDR(pending_idx)) >> PAGE_SHIFT] =
767 FOREIGN_FRAME(mop->dev_bus_addr >> PAGE_SHIFT);
768 grant_tx_ref[pending_idx] = mop->handle;
769 #else
770 if ( unlikely(mcl[0].result != 0) )
771 {
772 DPRINTK("Bad page frame\n");
773 make_tx_response(netif, txreq.id, NETIF_RSP_ERROR);
774 netif_put(netif);
775 kfree_skb(skb);
776 mcl++;
777 pending_ring[MASK_PEND_IDX(pending_prod++)] = pending_idx;
778 continue;
779 }
781 phys_to_machine_mapping[__pa(MMAP_VADDR(pending_idx)) >> PAGE_SHIFT] =
782 FOREIGN_FRAME(txreq.addr >> PAGE_SHIFT);
783 #endif
785 data_len = (txreq.size > PKT_PROT_LEN) ? PKT_PROT_LEN : txreq.size;
787 __skb_put(skb, data_len);
788 memcpy(skb->data,
789 (void *)(MMAP_VADDR(pending_idx)|(txreq.addr&~PAGE_MASK)),
790 data_len);
791 if ( data_len < txreq.size )
792 {
793 /* Append the packet payload as a fragment. */
794 skb_shinfo(skb)->frags[0].page =
795 virt_to_page(MMAP_VADDR(pending_idx));
796 skb_shinfo(skb)->frags[0].size = txreq.size - data_len;
797 skb_shinfo(skb)->frags[0].page_offset =
798 (txreq.addr + data_len) & ~PAGE_MASK;
799 skb_shinfo(skb)->nr_frags = 1;
800 }
801 else
802 {
803 /* Schedule a response immediately. */
804 netif_idx_release(pending_idx);
805 }
807 skb->data_len = txreq.size - data_len;
808 skb->len += skb->data_len;
810 skb->dev = netif->dev;
811 skb->protocol = eth_type_trans(skb, skb->dev);
813 /* No checking needed on localhost, but remember the field is blank. */
814 skb->ip_summed = CHECKSUM_UNNECESSARY;
815 skb->proto_csum_valid = 1;
816 skb->proto_csum_blank = txreq.csum_blank;
818 netif->stats.rx_bytes += txreq.size;
819 netif->stats.rx_packets++;
821 netif_rx(skb);
822 netif->dev->last_rx = jiffies;
824 #ifdef CONFIG_XEN_NETDEV_GRANT
825 mop++;
826 #else
827 mcl++;
828 #endif
829 }
830 }
832 static void netif_idx_release(u16 pending_idx)
833 {
834 static spinlock_t _lock = SPIN_LOCK_UNLOCKED;
835 unsigned long flags;
837 spin_lock_irqsave(&_lock, flags);
838 dealloc_ring[MASK_PEND_IDX(dealloc_prod++)] = pending_idx;
839 spin_unlock_irqrestore(&_lock, flags);
841 tasklet_schedule(&net_tx_tasklet);
842 }
844 static void netif_page_release(struct page *page)
845 {
846 u16 pending_idx = page - virt_to_page(mmap_vstart);
848 /* Ready for next use. */
849 set_page_count(page, 1);
851 netif_idx_release(pending_idx);
852 }
854 irqreturn_t netif_be_int(int irq, void *dev_id, struct pt_regs *regs)
855 {
856 netif_t *netif = dev_id;
857 if ( tx_work_exists(netif) )
858 {
859 add_to_net_schedule_list_tail(netif);
860 maybe_schedule_tx_action();
861 }
862 return IRQ_HANDLED;
863 }
865 static void make_tx_response(netif_t *netif,
866 u16 id,
867 s8 st)
868 {
869 NETIF_RING_IDX i = netif->tx_resp_prod;
870 netif_tx_response_t *resp;
872 resp = &netif->tx->ring[MASK_NETIF_TX_IDX(i)].resp;
873 resp->id = id;
874 resp->status = st;
875 wmb();
876 netif->tx->resp_prod = netif->tx_resp_prod = ++i;
878 mb(); /* Update producer before checking event threshold. */
879 if ( i == netif->tx->event )
880 notify_via_evtchn(netif->evtchn);
881 }
883 static int make_rx_response(netif_t *netif,
884 u16 id,
885 s8 st,
886 unsigned long addr,
887 u16 size,
888 u16 csum_valid)
889 {
890 NETIF_RING_IDX i = netif->rx_resp_prod;
891 netif_rx_response_t *resp;
893 resp = &netif->rx->ring[MASK_NETIF_RX_IDX(i)].resp;
894 resp->addr = addr;
895 resp->csum_valid = csum_valid;
896 resp->id = id;
897 resp->status = (s16)size;
898 if ( st < 0 )
899 resp->status = (s16)st;
900 wmb();
901 netif->rx->resp_prod = netif->rx_resp_prod = ++i;
903 mb(); /* Update producer before checking event threshold. */
904 return (i == netif->rx->event);
905 }
907 static irqreturn_t netif_be_dbg(int irq, void *dev_id, struct pt_regs *regs)
908 {
909 struct list_head *ent;
910 netif_t *netif;
911 int i = 0;
913 printk(KERN_ALERT "netif_schedule_list:\n");
914 spin_lock_irq(&net_schedule_list_lock);
916 list_for_each ( ent, &net_schedule_list )
917 {
918 netif = list_entry(ent, netif_t, list);
919 printk(KERN_ALERT " %d: private(rx_req_cons=%08x rx_resp_prod=%08x\n",
920 i, netif->rx_req_cons, netif->rx_resp_prod);
921 printk(KERN_ALERT " tx_req_cons=%08x tx_resp_prod=%08x)\n",
922 netif->tx_req_cons, netif->tx_resp_prod);
923 printk(KERN_ALERT " shared(rx_req_prod=%08x rx_resp_prod=%08x\n",
924 netif->rx->req_prod, netif->rx->resp_prod);
925 printk(KERN_ALERT " rx_event=%08x tx_req_prod=%08x\n",
926 netif->rx->event, netif->tx->req_prod);
927 printk(KERN_ALERT " tx_resp_prod=%08x, tx_event=%08x)\n",
928 netif->tx->resp_prod, netif->tx->event);
929 i++;
930 }
932 spin_unlock_irq(&net_schedule_list_lock);
933 printk(KERN_ALERT " ** End of netif_schedule_list **\n");
935 return IRQ_HANDLED;
936 }
938 static int __init netback_init(void)
939 {
940 int i;
941 struct page *page;
943 if ( !(xen_start_info->flags & SIF_NET_BE_DOMAIN) &&
944 !(xen_start_info->flags & SIF_INITDOMAIN) )
945 return 0;
947 IPRINTK("Initialising Xen netif backend.\n");
948 #ifdef CONFIG_XEN_NETDEV_GRANT
949 IPRINTK("Using grant tables.\n");
950 #endif
952 /* We can increase reservation by this much in net_rx_action(). */
953 balloon_update_driver_allowance(NETIF_RX_RING_SIZE);
955 skb_queue_head_init(&rx_queue);
956 skb_queue_head_init(&tx_queue);
958 init_timer(&net_timer);
959 net_timer.data = 0;
960 net_timer.function = net_alarm;
962 page = balloon_alloc_empty_page_range(MAX_PENDING_REQS);
963 BUG_ON(page == NULL);
964 mmap_vstart = (unsigned long)pfn_to_kaddr(page_to_pfn(page));
966 for ( i = 0; i < MAX_PENDING_REQS; i++ )
967 {
968 page = virt_to_page(MMAP_VADDR(i));
969 set_page_count(page, 1);
970 SetPageForeign(page, netif_page_release);
971 }
973 pending_cons = 0;
974 pending_prod = MAX_PENDING_REQS;
975 for ( i = 0; i < MAX_PENDING_REQS; i++ )
976 pending_ring[i] = i;
978 spin_lock_init(&net_schedule_list_lock);
979 INIT_LIST_HEAD(&net_schedule_list);
981 netif_xenbus_init();
983 (void)request_irq(bind_virq_to_irq(VIRQ_DEBUG),
984 netif_be_dbg, SA_SHIRQ,
985 "net-be-dbg", &netif_be_dbg);
987 return 0;
988 }
990 static void netback_cleanup(void)
991 {
992 BUG();
993 }
995 module_init(netback_init);
996 module_exit(netback_cleanup);