ia64/linux-2.6.18-xen.hg

view drivers/xen/netback/netback.c @ 792:db9857bb0320

netback: add ethtool stat to track copied skbs.

Copied skbs should be rare but we have no way of verifying that.

Signed-off-by: Ian Campbell <ian.campbell@citrix.com>
author Keir Fraser <keir.fraser@citrix.com>
date Thu Feb 05 15:24:46 2009 +0000 (2009-02-05)
parents 8081d19dce89
children 2ab54cc40761
line source
1 /******************************************************************************
2 * drivers/xen/netback/netback.c
3 *
4 * Back-end of the driver for virtual network devices. This portion of the
5 * driver exports a 'unified' network-device interface that can be accessed
6 * by any operating system that implements a compatible front end. A
7 * reference front-end implementation can be found in:
8 * drivers/xen/netfront/netfront.c
9 *
10 * Copyright (c) 2002-2005, K A Fraser
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License version 2
14 * as published by the Free Software Foundation; or, when distributed
15 * separately from the Linux kernel or incorporated into other
16 * software packages, subject to the following license:
17 *
18 * Permission is hereby granted, free of charge, to any person obtaining a copy
19 * of this source file (the "Software"), to deal in the Software without
20 * restriction, including without limitation the rights to use, copy, modify,
21 * merge, publish, distribute, sublicense, and/or sell copies of the Software,
22 * and to permit persons to whom the Software is furnished to do so, subject to
23 * the following conditions:
24 *
25 * The above copyright notice and this permission notice shall be included in
26 * all copies or substantial portions of the Software.
27 *
28 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
29 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
30 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
31 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
32 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
33 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
34 * IN THE SOFTWARE.
35 */
37 #include "common.h"
38 #include <xen/balloon.h>
39 #include <xen/interface/memory.h>
41 /*define NETBE_DEBUG_INTERRUPT*/
43 struct netbk_rx_meta {
44 skb_frag_t frag;
45 int id;
46 u8 copy:1;
47 };
49 struct netbk_tx_pending_inuse {
50 struct list_head list;
51 unsigned long alloc_time;
52 };
54 static void netif_idx_release(u16 pending_idx);
55 static void make_tx_response(netif_t *netif,
56 netif_tx_request_t *txp,
57 s8 st);
58 static netif_rx_response_t *make_rx_response(netif_t *netif,
59 u16 id,
60 s8 st,
61 u16 offset,
62 u16 size,
63 u16 flags);
65 static void net_tx_action(unsigned long unused);
66 static DECLARE_TASKLET(net_tx_tasklet, net_tx_action, 0);
68 static void net_rx_action(unsigned long unused);
69 static DECLARE_TASKLET(net_rx_tasklet, net_rx_action, 0);
71 static struct timer_list net_timer;
72 static struct timer_list netbk_tx_pending_timer;
74 #define MAX_PENDING_REQS 256
76 static struct sk_buff_head rx_queue;
78 static struct page **mmap_pages;
79 static inline unsigned long idx_to_pfn(unsigned int idx)
80 {
81 return page_to_pfn(mmap_pages[idx]);
82 }
84 static inline unsigned long idx_to_kaddr(unsigned int idx)
85 {
86 return (unsigned long)pfn_to_kaddr(idx_to_pfn(idx));
87 }
89 /* extra field used in struct page */
90 static inline void netif_set_page_index(struct page *pg, unsigned int index)
91 {
92 *(unsigned long *)&pg->mapping = index;
93 }
95 static inline int netif_page_index(struct page *pg)
96 {
97 unsigned long idx = (unsigned long)pg->mapping;
99 if (!PageForeign(pg))
100 return -1;
102 if ((idx >= MAX_PENDING_REQS) || (mmap_pages[idx] != pg))
103 return -1;
105 return idx;
106 }
108 #define PKT_PROT_LEN 64
110 static struct pending_tx_info {
111 netif_tx_request_t req;
112 netif_t *netif;
113 } pending_tx_info[MAX_PENDING_REQS];
114 static u16 pending_ring[MAX_PENDING_REQS];
115 typedef unsigned int PEND_RING_IDX;
116 #define MASK_PEND_IDX(_i) ((_i)&(MAX_PENDING_REQS-1))
117 static PEND_RING_IDX pending_prod, pending_cons;
118 #define NR_PENDING_REQS (MAX_PENDING_REQS - pending_prod + pending_cons)
120 /* Freed TX SKBs get batched on this ring before return to pending_ring. */
121 static u16 dealloc_ring[MAX_PENDING_REQS];
122 static PEND_RING_IDX dealloc_prod, dealloc_cons;
124 /* Doubly-linked list of in-use pending entries. */
125 static struct netbk_tx_pending_inuse pending_inuse[MAX_PENDING_REQS];
126 static LIST_HEAD(pending_inuse_head);
128 static struct sk_buff_head tx_queue;
130 static grant_handle_t grant_tx_handle[MAX_PENDING_REQS];
131 static gnttab_unmap_grant_ref_t tx_unmap_ops[MAX_PENDING_REQS];
132 static gnttab_map_grant_ref_t tx_map_ops[MAX_PENDING_REQS];
134 static struct list_head net_schedule_list;
135 static spinlock_t net_schedule_list_lock;
137 #define MAX_MFN_ALLOC 64
138 static unsigned long mfn_list[MAX_MFN_ALLOC];
139 static unsigned int alloc_index = 0;
141 /* Setting this allows the safe use of this driver without netloop. */
142 static int MODPARM_copy_skb = 1;
143 module_param_named(copy_skb, MODPARM_copy_skb, bool, 0);
144 MODULE_PARM_DESC(copy_skb, "Copy data received from netfront without netloop");
146 int netbk_copy_skb_mode;
148 static inline unsigned long alloc_mfn(void)
149 {
150 BUG_ON(alloc_index == 0);
151 return mfn_list[--alloc_index];
152 }
154 static int check_mfn(int nr)
155 {
156 struct xen_memory_reservation reservation = {
157 .extent_order = 0,
158 .domid = DOMID_SELF
159 };
160 int rc;
162 if (likely(alloc_index >= nr))
163 return 0;
165 set_xen_guest_handle(reservation.extent_start, mfn_list + alloc_index);
166 reservation.nr_extents = MAX_MFN_ALLOC - alloc_index;
167 rc = HYPERVISOR_memory_op(XENMEM_increase_reservation, &reservation);
168 if (likely(rc > 0))
169 alloc_index += rc;
171 return alloc_index >= nr ? 0 : -ENOMEM;
172 }
174 static inline void maybe_schedule_tx_action(void)
175 {
176 smp_mb();
177 if ((NR_PENDING_REQS < (MAX_PENDING_REQS/2)) &&
178 !list_empty(&net_schedule_list))
179 tasklet_schedule(&net_tx_tasklet);
180 }
182 static struct sk_buff *netbk_copy_skb(struct sk_buff *skb)
183 {
184 struct skb_shared_info *ninfo;
185 struct sk_buff *nskb;
186 unsigned long offset;
187 int ret;
188 int len;
189 int headlen;
191 BUG_ON(skb_shinfo(skb)->frag_list != NULL);
193 nskb = alloc_skb(SKB_MAX_HEAD(0), GFP_ATOMIC | __GFP_NOWARN);
194 if (unlikely(!nskb))
195 goto err;
197 skb_reserve(nskb, 16 + NET_IP_ALIGN);
198 headlen = nskb->end - nskb->data;
199 if (headlen > skb_headlen(skb))
200 headlen = skb_headlen(skb);
201 ret = skb_copy_bits(skb, 0, __skb_put(nskb, headlen), headlen);
202 BUG_ON(ret);
204 ninfo = skb_shinfo(nskb);
205 ninfo->gso_size = skb_shinfo(skb)->gso_size;
206 ninfo->gso_type = skb_shinfo(skb)->gso_type;
208 offset = headlen;
209 len = skb->len - headlen;
211 nskb->len = skb->len;
212 nskb->data_len = len;
213 nskb->truesize += len;
215 while (len) {
216 struct page *page;
217 int copy;
218 int zero;
220 if (unlikely(ninfo->nr_frags >= MAX_SKB_FRAGS)) {
221 dump_stack();
222 goto err_free;
223 }
225 copy = len >= PAGE_SIZE ? PAGE_SIZE : len;
226 zero = len >= PAGE_SIZE ? 0 : __GFP_ZERO;
228 page = alloc_page(GFP_ATOMIC | __GFP_NOWARN | zero);
229 if (unlikely(!page))
230 goto err_free;
232 ret = skb_copy_bits(skb, offset, page_address(page), copy);
233 BUG_ON(ret);
235 ninfo->frags[ninfo->nr_frags].page = page;
236 ninfo->frags[ninfo->nr_frags].page_offset = 0;
237 ninfo->frags[ninfo->nr_frags].size = copy;
238 ninfo->nr_frags++;
240 offset += copy;
241 len -= copy;
242 }
244 offset = nskb->data - skb->data;
246 nskb->h.raw = skb->h.raw + offset;
247 nskb->nh.raw = skb->nh.raw + offset;
248 nskb->mac.raw = skb->mac.raw + offset;
250 return nskb;
252 err_free:
253 kfree_skb(nskb);
254 err:
255 return NULL;
256 }
258 static inline int netbk_max_required_rx_slots(netif_t *netif)
259 {
260 if (netif->features & (NETIF_F_SG|NETIF_F_TSO))
261 return MAX_SKB_FRAGS + 2; /* header + extra_info + frags */
262 return 1; /* all in one */
263 }
265 static inline int netbk_queue_full(netif_t *netif)
266 {
267 RING_IDX peek = netif->rx_req_cons_peek;
268 RING_IDX needed = netbk_max_required_rx_slots(netif);
270 return ((netif->rx.sring->req_prod - peek) < needed) ||
271 ((netif->rx.rsp_prod_pvt + NET_RX_RING_SIZE - peek) < needed);
272 }
274 static void tx_queue_callback(unsigned long data)
275 {
276 netif_t *netif = (netif_t *)data;
277 if (netif_schedulable(netif))
278 netif_wake_queue(netif->dev);
279 }
281 int netif_be_start_xmit(struct sk_buff *skb, struct net_device *dev)
282 {
283 netif_t *netif = netdev_priv(dev);
285 BUG_ON(skb->dev != dev);
287 /* Drop the packet if the target domain has no receive buffers. */
288 if (unlikely(!netif_schedulable(netif) || netbk_queue_full(netif)))
289 goto drop;
291 /*
292 * Copy the packet here if it's destined for a flipping interface
293 * but isn't flippable (e.g. extra references to data).
294 * XXX For now we also copy skbuffs whose head crosses a page
295 * boundary, because netbk_gop_skb can't handle them.
296 */
297 if (!netif->copying_receiver ||
298 ((skb_headlen(skb) + offset_in_page(skb->data)) >= PAGE_SIZE)) {
299 struct sk_buff *nskb = netbk_copy_skb(skb);
300 if ( unlikely(nskb == NULL) )
301 goto drop;
302 /* Copy only the header fields we use in this driver. */
303 nskb->dev = skb->dev;
304 nskb->ip_summed = skb->ip_summed;
305 nskb->proto_data_valid = skb->proto_data_valid;
306 dev_kfree_skb(skb);
307 skb = nskb;
308 }
310 netif->rx_req_cons_peek += skb_shinfo(skb)->nr_frags + 1 +
311 !!skb_shinfo(skb)->gso_size;
312 netif_get(netif);
314 if (netbk_can_queue(dev) && netbk_queue_full(netif)) {
315 netif->rx.sring->req_event = netif->rx_req_cons_peek +
316 netbk_max_required_rx_slots(netif);
317 mb(); /* request notification /then/ check & stop the queue */
318 if (netbk_queue_full(netif)) {
319 netif_stop_queue(dev);
320 /*
321 * Schedule 500ms timeout to restart the queue, thus
322 * ensuring that an inactive queue will be drained.
323 * Packets will be immediately be dropped until more
324 * receive buffers become available (see
325 * netbk_queue_full() check above).
326 */
327 netif->tx_queue_timeout.data = (unsigned long)netif;
328 netif->tx_queue_timeout.function = tx_queue_callback;
329 __mod_timer(&netif->tx_queue_timeout, jiffies + HZ/2);
330 }
331 }
333 skb_queue_tail(&rx_queue, skb);
334 tasklet_schedule(&net_rx_tasklet);
336 return 0;
338 drop:
339 netif->stats.tx_dropped++;
340 dev_kfree_skb(skb);
341 return 0;
342 }
344 #if 0
345 static void xen_network_done_notify(void)
346 {
347 static struct net_device *eth0_dev = NULL;
348 if (unlikely(eth0_dev == NULL))
349 eth0_dev = __dev_get_by_name("eth0");
350 netif_rx_schedule(eth0_dev);
351 }
352 /*
353 * Add following to poll() function in NAPI driver (Tigon3 is example):
354 * if ( xen_network_done() )
355 * tg3_enable_ints(tp);
356 */
357 int xen_network_done(void)
358 {
359 return skb_queue_empty(&rx_queue);
360 }
361 #endif
363 struct netrx_pending_operations {
364 unsigned trans_prod, trans_cons;
365 unsigned mmu_prod, mmu_mcl;
366 unsigned mcl_prod, mcl_cons;
367 unsigned copy_prod, copy_cons;
368 unsigned meta_prod, meta_cons;
369 mmu_update_t *mmu;
370 gnttab_transfer_t *trans;
371 gnttab_copy_t *copy;
372 multicall_entry_t *mcl;
373 struct netbk_rx_meta *meta;
374 };
376 /* Set up the grant operations for this fragment. If it's a flipping
377 interface, we also set up the unmap request from here. */
378 static u16 netbk_gop_frag(netif_t *netif, struct netbk_rx_meta *meta,
379 int i, struct netrx_pending_operations *npo,
380 struct page *page, unsigned long size,
381 unsigned long offset)
382 {
383 mmu_update_t *mmu;
384 gnttab_transfer_t *gop;
385 gnttab_copy_t *copy_gop;
386 multicall_entry_t *mcl;
387 netif_rx_request_t *req;
388 unsigned long old_mfn, new_mfn;
389 int idx = netif_page_index(page);
391 old_mfn = virt_to_mfn(page_address(page));
393 req = RING_GET_REQUEST(&netif->rx, netif->rx.req_cons + i);
394 if (netif->copying_receiver) {
395 /* The fragment needs to be copied rather than
396 flipped. */
397 meta->copy = 1;
398 copy_gop = npo->copy + npo->copy_prod++;
399 copy_gop->flags = GNTCOPY_dest_gref;
400 if (idx > -1) {
401 struct pending_tx_info *src_pend = &pending_tx_info[idx];
402 copy_gop->source.domid = src_pend->netif->domid;
403 copy_gop->source.u.ref = src_pend->req.gref;
404 copy_gop->flags |= GNTCOPY_source_gref;
405 } else {
406 copy_gop->source.domid = DOMID_SELF;
407 copy_gop->source.u.gmfn = old_mfn;
408 }
409 copy_gop->source.offset = offset;
410 copy_gop->dest.domid = netif->domid;
411 copy_gop->dest.offset = 0;
412 copy_gop->dest.u.ref = req->gref;
413 copy_gop->len = size;
414 } else {
415 meta->copy = 0;
416 if (!xen_feature(XENFEAT_auto_translated_physmap)) {
417 new_mfn = alloc_mfn();
419 /*
420 * Set the new P2M table entry before
421 * reassigning the old data page. Heed the
422 * comment in pgtable-2level.h:pte_page(). :-)
423 */
424 set_phys_to_machine(page_to_pfn(page), new_mfn);
426 mcl = npo->mcl + npo->mcl_prod++;
427 MULTI_update_va_mapping(mcl,
428 (unsigned long)page_address(page),
429 pfn_pte_ma(new_mfn, PAGE_KERNEL),
430 0);
432 mmu = npo->mmu + npo->mmu_prod++;
433 mmu->ptr = ((maddr_t)new_mfn << PAGE_SHIFT) |
434 MMU_MACHPHYS_UPDATE;
435 mmu->val = page_to_pfn(page);
436 }
438 gop = npo->trans + npo->trans_prod++;
439 gop->mfn = old_mfn;
440 gop->domid = netif->domid;
441 gop->ref = req->gref;
442 }
443 return req->id;
444 }
446 static void netbk_gop_skb(struct sk_buff *skb,
447 struct netrx_pending_operations *npo)
448 {
449 netif_t *netif = netdev_priv(skb->dev);
450 int nr_frags = skb_shinfo(skb)->nr_frags;
451 int i;
452 int extra;
453 struct netbk_rx_meta *head_meta, *meta;
455 head_meta = npo->meta + npo->meta_prod++;
456 head_meta->frag.page_offset = skb_shinfo(skb)->gso_type;
457 head_meta->frag.size = skb_shinfo(skb)->gso_size;
458 extra = !!head_meta->frag.size + 1;
460 for (i = 0; i < nr_frags; i++) {
461 meta = npo->meta + npo->meta_prod++;
462 meta->frag = skb_shinfo(skb)->frags[i];
463 meta->id = netbk_gop_frag(netif, meta, i + extra, npo,
464 meta->frag.page,
465 meta->frag.size,
466 meta->frag.page_offset);
467 }
469 /*
470 * This must occur at the end to ensure that we don't trash skb_shinfo
471 * until we're done. We know that the head doesn't cross a page
472 * boundary because such packets get copied in netif_be_start_xmit.
473 */
474 head_meta->id = netbk_gop_frag(netif, head_meta, 0, npo,
475 virt_to_page(skb->data),
476 skb_headlen(skb),
477 offset_in_page(skb->data));
479 netif->rx.req_cons += nr_frags + extra;
480 }
482 static inline void netbk_free_pages(int nr_frags, struct netbk_rx_meta *meta)
483 {
484 int i;
486 for (i = 0; i < nr_frags; i++)
487 put_page(meta[i].frag.page);
488 }
490 /* This is a twin to netbk_gop_skb. Assume that netbk_gop_skb was
491 used to set up the operations on the top of
492 netrx_pending_operations, which have since been done. Check that
493 they didn't give any errors and advance over them. */
494 static int netbk_check_gop(int nr_frags, domid_t domid,
495 struct netrx_pending_operations *npo)
496 {
497 multicall_entry_t *mcl;
498 gnttab_transfer_t *gop;
499 gnttab_copy_t *copy_op;
500 int status = NETIF_RSP_OKAY;
501 int i;
503 for (i = 0; i <= nr_frags; i++) {
504 if (npo->meta[npo->meta_cons + i].copy) {
505 copy_op = npo->copy + npo->copy_cons++;
506 if (copy_op->status != GNTST_okay) {
507 DPRINTK("Bad status %d from copy to DOM%d.\n",
508 copy_op->status, domid);
509 status = NETIF_RSP_ERROR;
510 }
511 } else {
512 if (!xen_feature(XENFEAT_auto_translated_physmap)) {
513 mcl = npo->mcl + npo->mcl_cons++;
514 /* The update_va_mapping() must not fail. */
515 BUG_ON(mcl->result != 0);
516 }
518 gop = npo->trans + npo->trans_cons++;
519 /* Check the reassignment error code. */
520 if (gop->status != 0) {
521 DPRINTK("Bad status %d from grant transfer to DOM%u\n",
522 gop->status, domid);
523 /*
524 * Page no longer belongs to us unless
525 * GNTST_bad_page, but that should be
526 * a fatal error anyway.
527 */
528 BUG_ON(gop->status == GNTST_bad_page);
529 status = NETIF_RSP_ERROR;
530 }
531 }
532 }
534 return status;
535 }
537 static void netbk_add_frag_responses(netif_t *netif, int status,
538 struct netbk_rx_meta *meta, int nr_frags)
539 {
540 int i;
541 unsigned long offset;
543 for (i = 0; i < nr_frags; i++) {
544 int id = meta[i].id;
545 int flags = (i == nr_frags - 1) ? 0 : NETRXF_more_data;
547 if (meta[i].copy)
548 offset = 0;
549 else
550 offset = meta[i].frag.page_offset;
551 make_rx_response(netif, id, status, offset,
552 meta[i].frag.size, flags);
553 }
554 }
556 static void net_rx_action(unsigned long unused)
557 {
558 netif_t *netif = NULL;
559 s8 status;
560 u16 id, irq, flags;
561 netif_rx_response_t *resp;
562 multicall_entry_t *mcl;
563 struct sk_buff_head rxq;
564 struct sk_buff *skb;
565 int notify_nr = 0;
566 int ret;
567 int nr_frags;
568 int count;
569 unsigned long offset;
571 /*
572 * Putting hundreds of bytes on the stack is considered rude.
573 * Static works because a tasklet can only be on one CPU at any time.
574 */
575 static multicall_entry_t rx_mcl[NET_RX_RING_SIZE+3];
576 static mmu_update_t rx_mmu[NET_RX_RING_SIZE];
577 static gnttab_transfer_t grant_trans_op[NET_RX_RING_SIZE];
578 static gnttab_copy_t grant_copy_op[NET_RX_RING_SIZE];
579 static unsigned char rx_notify[NR_IRQS];
580 static u16 notify_list[NET_RX_RING_SIZE];
581 static struct netbk_rx_meta meta[NET_RX_RING_SIZE];
583 struct netrx_pending_operations npo = {
584 mmu: rx_mmu,
585 trans: grant_trans_op,
586 copy: grant_copy_op,
587 mcl: rx_mcl,
588 meta: meta};
590 skb_queue_head_init(&rxq);
592 count = 0;
594 while ((skb = skb_dequeue(&rx_queue)) != NULL) {
595 nr_frags = skb_shinfo(skb)->nr_frags;
596 *(int *)skb->cb = nr_frags;
598 if (!xen_feature(XENFEAT_auto_translated_physmap) &&
599 !((netif_t *)netdev_priv(skb->dev))->copying_receiver &&
600 check_mfn(nr_frags + 1)) {
601 /* Memory squeeze? Back off for an arbitrary while. */
602 if ( net_ratelimit() )
603 WPRINTK("Memory squeeze in netback "
604 "driver.\n");
605 mod_timer(&net_timer, jiffies + HZ);
606 skb_queue_head(&rx_queue, skb);
607 break;
608 }
610 netbk_gop_skb(skb, &npo);
612 count += nr_frags + 1;
614 __skb_queue_tail(&rxq, skb);
616 /* Filled the batch queue? */
617 if (count + MAX_SKB_FRAGS >= NET_RX_RING_SIZE)
618 break;
619 }
621 BUG_ON(npo.meta_prod > ARRAY_SIZE(meta));
623 npo.mmu_mcl = npo.mcl_prod;
624 if (npo.mcl_prod) {
625 BUG_ON(xen_feature(XENFEAT_auto_translated_physmap));
626 BUG_ON(npo.mmu_prod > ARRAY_SIZE(rx_mmu));
627 mcl = npo.mcl + npo.mcl_prod++;
629 BUG_ON(mcl[-1].op != __HYPERVISOR_update_va_mapping);
630 mcl[-1].args[MULTI_UVMFLAGS_INDEX] = UVMF_TLB_FLUSH|UVMF_ALL;
632 mcl->op = __HYPERVISOR_mmu_update;
633 mcl->args[0] = (unsigned long)rx_mmu;
634 mcl->args[1] = npo.mmu_prod;
635 mcl->args[2] = 0;
636 mcl->args[3] = DOMID_SELF;
637 }
639 if (npo.trans_prod) {
640 BUG_ON(npo.trans_prod > ARRAY_SIZE(grant_trans_op));
641 mcl = npo.mcl + npo.mcl_prod++;
642 mcl->op = __HYPERVISOR_grant_table_op;
643 mcl->args[0] = GNTTABOP_transfer;
644 mcl->args[1] = (unsigned long)grant_trans_op;
645 mcl->args[2] = npo.trans_prod;
646 }
648 if (npo.copy_prod) {
649 BUG_ON(npo.copy_prod > ARRAY_SIZE(grant_copy_op));
650 mcl = npo.mcl + npo.mcl_prod++;
651 mcl->op = __HYPERVISOR_grant_table_op;
652 mcl->args[0] = GNTTABOP_copy;
653 mcl->args[1] = (unsigned long)grant_copy_op;
654 mcl->args[2] = npo.copy_prod;
655 }
657 /* Nothing to do? */
658 if (!npo.mcl_prod)
659 return;
661 BUG_ON(npo.mcl_prod > ARRAY_SIZE(rx_mcl));
663 ret = HYPERVISOR_multicall(npo.mcl, npo.mcl_prod);
664 BUG_ON(ret != 0);
665 /* The mmu_machphys_update() must not fail. */
666 BUG_ON(npo.mmu_mcl && npo.mcl[npo.mmu_mcl].result != 0);
668 while ((skb = __skb_dequeue(&rxq)) != NULL) {
669 nr_frags = *(int *)skb->cb;
671 netif = netdev_priv(skb->dev);
672 /* We can't rely on skb_release_data to release the
673 pages used by fragments for us, since it tries to
674 touch the pages in the fraglist. If we're in
675 flipping mode, that doesn't work. In copying mode,
676 we still have access to all of the pages, and so
677 it's safe to let release_data deal with it. */
678 /* (Freeing the fragments is safe since we copy
679 non-linear skbs destined for flipping interfaces) */
680 if (!netif->copying_receiver) {
681 atomic_set(&(skb_shinfo(skb)->dataref), 1);
682 skb_shinfo(skb)->frag_list = NULL;
683 skb_shinfo(skb)->nr_frags = 0;
684 netbk_free_pages(nr_frags, meta + npo.meta_cons + 1);
685 }
687 netif->stats.tx_bytes += skb->len;
688 netif->stats.tx_packets++;
690 status = netbk_check_gop(nr_frags, netif->domid, &npo);
692 id = meta[npo.meta_cons].id;
693 flags = nr_frags ? NETRXF_more_data : 0;
695 if (skb->ip_summed == CHECKSUM_HW) /* local packet? */
696 flags |= NETRXF_csum_blank | NETRXF_data_validated;
697 else if (skb->proto_data_valid) /* remote but checksummed? */
698 flags |= NETRXF_data_validated;
700 if (meta[npo.meta_cons].copy)
701 offset = 0;
702 else
703 offset = offset_in_page(skb->data);
704 resp = make_rx_response(netif, id, status, offset,
705 skb_headlen(skb), flags);
707 if (meta[npo.meta_cons].frag.size) {
708 struct netif_extra_info *gso =
709 (struct netif_extra_info *)
710 RING_GET_RESPONSE(&netif->rx,
711 netif->rx.rsp_prod_pvt++);
713 resp->flags |= NETRXF_extra_info;
715 gso->u.gso.size = meta[npo.meta_cons].frag.size;
716 gso->u.gso.type = XEN_NETIF_GSO_TYPE_TCPV4;
717 gso->u.gso.pad = 0;
718 gso->u.gso.features = 0;
720 gso->type = XEN_NETIF_EXTRA_TYPE_GSO;
721 gso->flags = 0;
722 }
724 netbk_add_frag_responses(netif, status,
725 meta + npo.meta_cons + 1,
726 nr_frags);
728 RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&netif->rx, ret);
729 irq = netif->irq;
730 if (ret && !rx_notify[irq]) {
731 rx_notify[irq] = 1;
732 notify_list[notify_nr++] = irq;
733 }
735 if (netif_queue_stopped(netif->dev) &&
736 netif_schedulable(netif) &&
737 !netbk_queue_full(netif))
738 netif_wake_queue(netif->dev);
740 netif_put(netif);
741 dev_kfree_skb(skb);
742 npo.meta_cons += nr_frags + 1;
743 }
745 while (notify_nr != 0) {
746 irq = notify_list[--notify_nr];
747 rx_notify[irq] = 0;
748 notify_remote_via_irq(irq);
749 }
751 /* More work to do? */
752 if (!skb_queue_empty(&rx_queue) && !timer_pending(&net_timer))
753 tasklet_schedule(&net_rx_tasklet);
754 #if 0
755 else
756 xen_network_done_notify();
757 #endif
758 }
760 static void net_alarm(unsigned long unused)
761 {
762 tasklet_schedule(&net_rx_tasklet);
763 }
765 static void netbk_tx_pending_timeout(unsigned long unused)
766 {
767 tasklet_schedule(&net_tx_tasklet);
768 }
770 struct net_device_stats *netif_be_get_stats(struct net_device *dev)
771 {
772 netif_t *netif = netdev_priv(dev);
773 return &netif->stats;
774 }
776 static int __on_net_schedule_list(netif_t *netif)
777 {
778 return netif->list.next != NULL;
779 }
781 static void remove_from_net_schedule_list(netif_t *netif)
782 {
783 spin_lock_irq(&net_schedule_list_lock);
784 if (likely(__on_net_schedule_list(netif))) {
785 list_del(&netif->list);
786 netif->list.next = NULL;
787 netif_put(netif);
788 }
789 spin_unlock_irq(&net_schedule_list_lock);
790 }
792 static void add_to_net_schedule_list_tail(netif_t *netif)
793 {
794 if (__on_net_schedule_list(netif))
795 return;
797 spin_lock_irq(&net_schedule_list_lock);
798 if (!__on_net_schedule_list(netif) &&
799 likely(netif_schedulable(netif))) {
800 list_add_tail(&netif->list, &net_schedule_list);
801 netif_get(netif);
802 }
803 spin_unlock_irq(&net_schedule_list_lock);
804 }
806 /*
807 * Note on CONFIG_XEN_NETDEV_PIPELINED_TRANSMITTER:
808 * If this driver is pipelining transmit requests then we can be very
809 * aggressive in avoiding new-packet notifications -- frontend only needs to
810 * send a notification if there are no outstanding unreceived responses.
811 * If we may be buffer transmit buffers for any reason then we must be rather
812 * more conservative and treat this as the final check for pending work.
813 */
814 void netif_schedule_work(netif_t *netif)
815 {
816 int more_to_do;
818 #ifdef CONFIG_XEN_NETDEV_PIPELINED_TRANSMITTER
819 more_to_do = RING_HAS_UNCONSUMED_REQUESTS(&netif->tx);
820 #else
821 RING_FINAL_CHECK_FOR_REQUESTS(&netif->tx, more_to_do);
822 #endif
824 if (more_to_do) {
825 add_to_net_schedule_list_tail(netif);
826 maybe_schedule_tx_action();
827 }
828 }
830 void netif_deschedule_work(netif_t *netif)
831 {
832 remove_from_net_schedule_list(netif);
833 }
836 static void tx_add_credit(netif_t *netif)
837 {
838 unsigned long max_burst, max_credit;
840 /*
841 * Allow a burst big enough to transmit a jumbo packet of up to 128kB.
842 * Otherwise the interface can seize up due to insufficient credit.
843 */
844 max_burst = RING_GET_REQUEST(&netif->tx, netif->tx.req_cons)->size;
845 max_burst = min(max_burst, 131072UL);
846 max_burst = max(max_burst, netif->credit_bytes);
848 /* Take care that adding a new chunk of credit doesn't wrap to zero. */
849 max_credit = netif->remaining_credit + netif->credit_bytes;
850 if (max_credit < netif->remaining_credit)
851 max_credit = ULONG_MAX; /* wrapped: clamp to ULONG_MAX */
853 netif->remaining_credit = min(max_credit, max_burst);
854 }
856 static void tx_credit_callback(unsigned long data)
857 {
858 netif_t *netif = (netif_t *)data;
859 tx_add_credit(netif);
860 netif_schedule_work(netif);
861 }
863 static inline int copy_pending_req(PEND_RING_IDX pending_idx)
864 {
865 return gnttab_copy_grant_page(grant_tx_handle[pending_idx],
866 &mmap_pages[pending_idx]);
867 }
869 inline static void net_tx_action_dealloc(void)
870 {
871 struct netbk_tx_pending_inuse *inuse, *n;
872 gnttab_unmap_grant_ref_t *gop;
873 u16 pending_idx;
874 PEND_RING_IDX dc, dp;
875 netif_t *netif;
876 int ret;
877 LIST_HEAD(list);
879 dc = dealloc_cons;
880 gop = tx_unmap_ops;
882 /*
883 * Free up any grants we have finished using
884 */
885 do {
886 dp = dealloc_prod;
888 /* Ensure we see all indices enqueued by netif_idx_release(). */
889 smp_rmb();
891 while (dc != dp) {
892 unsigned long pfn;
894 pending_idx = dealloc_ring[MASK_PEND_IDX(dc++)];
895 list_move_tail(&pending_inuse[pending_idx].list, &list);
897 pfn = idx_to_pfn(pending_idx);
898 /* Already unmapped? */
899 if (!phys_to_machine_mapping_valid(pfn))
900 continue;
902 gnttab_set_unmap_op(gop, idx_to_kaddr(pending_idx),
903 GNTMAP_host_map,
904 grant_tx_handle[pending_idx]);
905 gop++;
906 }
908 if (netbk_copy_skb_mode != NETBK_DELAYED_COPY_SKB ||
909 list_empty(&pending_inuse_head))
910 break;
912 /* Copy any entries that have been pending for too long. */
913 list_for_each_entry_safe(inuse, n, &pending_inuse_head, list) {
914 if (time_after(inuse->alloc_time + HZ / 2, jiffies))
915 break;
917 pending_idx = inuse - pending_inuse;
919 pending_tx_info[pending_idx].netif->nr_copied_skbs++;
921 switch (copy_pending_req(pending_idx)) {
922 case 0:
923 list_move_tail(&inuse->list, &list);
924 continue;
925 case -EBUSY:
926 list_del_init(&inuse->list);
927 continue;
928 case -ENOENT:
929 continue;
930 }
932 break;
933 }
934 } while (dp != dealloc_prod);
936 dealloc_cons = dc;
938 ret = HYPERVISOR_grant_table_op(
939 GNTTABOP_unmap_grant_ref, tx_unmap_ops, gop - tx_unmap_ops);
940 BUG_ON(ret);
942 list_for_each_entry_safe(inuse, n, &list, list) {
943 pending_idx = inuse - pending_inuse;
945 netif = pending_tx_info[pending_idx].netif;
947 make_tx_response(netif, &pending_tx_info[pending_idx].req,
948 NETIF_RSP_OKAY);
950 /* Ready for next use. */
951 gnttab_reset_grant_page(mmap_pages[pending_idx]);
953 pending_ring[MASK_PEND_IDX(pending_prod++)] = pending_idx;
955 netif_put(netif);
957 list_del_init(&inuse->list);
958 }
959 }
961 static void netbk_tx_err(netif_t *netif, netif_tx_request_t *txp, RING_IDX end)
962 {
963 RING_IDX cons = netif->tx.req_cons;
965 do {
966 make_tx_response(netif, txp, NETIF_RSP_ERROR);
967 if (cons >= end)
968 break;
969 txp = RING_GET_REQUEST(&netif->tx, cons++);
970 } while (1);
971 netif->tx.req_cons = cons;
972 netif_schedule_work(netif);
973 netif_put(netif);
974 }
976 static int netbk_count_requests(netif_t *netif, netif_tx_request_t *first,
977 netif_tx_request_t *txp, int work_to_do)
978 {
979 RING_IDX cons = netif->tx.req_cons;
980 int frags = 0;
982 if (!(first->flags & NETTXF_more_data))
983 return 0;
985 do {
986 if (frags >= work_to_do) {
987 DPRINTK("Need more frags\n");
988 return -frags;
989 }
991 if (unlikely(frags >= MAX_SKB_FRAGS)) {
992 DPRINTK("Too many frags\n");
993 return -frags;
994 }
996 memcpy(txp, RING_GET_REQUEST(&netif->tx, cons + frags),
997 sizeof(*txp));
998 if (txp->size > first->size) {
999 DPRINTK("Frags galore\n");
1000 return -frags;
1003 first->size -= txp->size;
1004 frags++;
1006 if (unlikely((txp->offset + txp->size) > PAGE_SIZE)) {
1007 DPRINTK("txp->offset: %x, size: %u\n",
1008 txp->offset, txp->size);
1009 return -frags;
1011 } while ((txp++)->flags & NETTXF_more_data);
1013 return frags;
1016 static gnttab_map_grant_ref_t *netbk_get_requests(netif_t *netif,
1017 struct sk_buff *skb,
1018 netif_tx_request_t *txp,
1019 gnttab_map_grant_ref_t *mop)
1021 struct skb_shared_info *shinfo = skb_shinfo(skb);
1022 skb_frag_t *frags = shinfo->frags;
1023 unsigned long pending_idx = *((u16 *)skb->data);
1024 int i, start;
1026 /* Skip first skb fragment if it is on same page as header fragment. */
1027 start = ((unsigned long)shinfo->frags[0].page == pending_idx);
1029 for (i = start; i < shinfo->nr_frags; i++, txp++) {
1030 pending_idx = pending_ring[MASK_PEND_IDX(pending_cons++)];
1032 gnttab_set_map_op(mop++, idx_to_kaddr(pending_idx),
1033 GNTMAP_host_map | GNTMAP_readonly,
1034 txp->gref, netif->domid);
1036 memcpy(&pending_tx_info[pending_idx].req, txp, sizeof(*txp));
1037 netif_get(netif);
1038 pending_tx_info[pending_idx].netif = netif;
1039 frags[i].page = (void *)pending_idx;
1042 return mop;
1045 static int netbk_tx_check_mop(struct sk_buff *skb,
1046 gnttab_map_grant_ref_t **mopp)
1048 gnttab_map_grant_ref_t *mop = *mopp;
1049 int pending_idx = *((u16 *)skb->data);
1050 netif_t *netif = pending_tx_info[pending_idx].netif;
1051 netif_tx_request_t *txp;
1052 struct skb_shared_info *shinfo = skb_shinfo(skb);
1053 int nr_frags = shinfo->nr_frags;
1054 int i, err, start;
1056 /* Check status of header. */
1057 err = mop->status;
1058 if (unlikely(err)) {
1059 txp = &pending_tx_info[pending_idx].req;
1060 make_tx_response(netif, txp, NETIF_RSP_ERROR);
1061 pending_ring[MASK_PEND_IDX(pending_prod++)] = pending_idx;
1062 netif_put(netif);
1063 } else {
1064 set_phys_to_machine(
1065 __pa(idx_to_kaddr(pending_idx)) >> PAGE_SHIFT,
1066 FOREIGN_FRAME(mop->dev_bus_addr >> PAGE_SHIFT));
1067 grant_tx_handle[pending_idx] = mop->handle;
1070 /* Skip first skb fragment if it is on same page as header fragment. */
1071 start = ((unsigned long)shinfo->frags[0].page == pending_idx);
1073 for (i = start; i < nr_frags; i++) {
1074 int j, newerr;
1076 pending_idx = (unsigned long)shinfo->frags[i].page;
1078 /* Check error status: if okay then remember grant handle. */
1079 newerr = (++mop)->status;
1080 if (likely(!newerr)) {
1081 set_phys_to_machine(
1082 __pa(idx_to_kaddr(pending_idx))>>PAGE_SHIFT,
1083 FOREIGN_FRAME(mop->dev_bus_addr>>PAGE_SHIFT));
1084 grant_tx_handle[pending_idx] = mop->handle;
1085 /* Had a previous error? Invalidate this fragment. */
1086 if (unlikely(err))
1087 netif_idx_release(pending_idx);
1088 continue;
1091 /* Error on this fragment: respond to client with an error. */
1092 txp = &pending_tx_info[pending_idx].req;
1093 make_tx_response(netif, txp, NETIF_RSP_ERROR);
1094 pending_ring[MASK_PEND_IDX(pending_prod++)] = pending_idx;
1095 netif_put(netif);
1097 /* Not the first error? Preceding frags already invalidated. */
1098 if (err)
1099 continue;
1101 /* First error: invalidate header and preceding fragments. */
1102 pending_idx = *((u16 *)skb->data);
1103 netif_idx_release(pending_idx);
1104 for (j = start; j < i; j++) {
1105 pending_idx = (unsigned long)shinfo->frags[i].page;
1106 netif_idx_release(pending_idx);
1109 /* Remember the error: invalidate all subsequent fragments. */
1110 err = newerr;
1113 *mopp = mop + 1;
1114 return err;
1117 static void netbk_fill_frags(struct sk_buff *skb)
1119 struct skb_shared_info *shinfo = skb_shinfo(skb);
1120 int nr_frags = shinfo->nr_frags;
1121 int i;
1123 for (i = 0; i < nr_frags; i++) {
1124 skb_frag_t *frag = shinfo->frags + i;
1125 netif_tx_request_t *txp;
1126 unsigned long pending_idx;
1128 pending_idx = (unsigned long)frag->page;
1130 pending_inuse[pending_idx].alloc_time = jiffies;
1131 list_add_tail(&pending_inuse[pending_idx].list,
1132 &pending_inuse_head);
1134 txp = &pending_tx_info[pending_idx].req;
1135 frag->page = virt_to_page(idx_to_kaddr(pending_idx));
1136 frag->size = txp->size;
1137 frag->page_offset = txp->offset;
1139 skb->len += txp->size;
1140 skb->data_len += txp->size;
1141 skb->truesize += txp->size;
1145 int netbk_get_extras(netif_t *netif, struct netif_extra_info *extras,
1146 int work_to_do)
1148 struct netif_extra_info extra;
1149 RING_IDX cons = netif->tx.req_cons;
1151 do {
1152 if (unlikely(work_to_do-- <= 0)) {
1153 DPRINTK("Missing extra info\n");
1154 return -EBADR;
1157 memcpy(&extra, RING_GET_REQUEST(&netif->tx, cons),
1158 sizeof(extra));
1159 if (unlikely(!extra.type ||
1160 extra.type >= XEN_NETIF_EXTRA_TYPE_MAX)) {
1161 netif->tx.req_cons = ++cons;
1162 DPRINTK("Invalid extra type: %d\n", extra.type);
1163 return -EINVAL;
1166 memcpy(&extras[extra.type - 1], &extra, sizeof(extra));
1167 netif->tx.req_cons = ++cons;
1168 } while (extra.flags & XEN_NETIF_EXTRA_FLAG_MORE);
1170 return work_to_do;
1173 static int netbk_set_skb_gso(struct sk_buff *skb, struct netif_extra_info *gso)
1175 if (!gso->u.gso.size) {
1176 DPRINTK("GSO size must not be zero.\n");
1177 return -EINVAL;
1180 /* Currently only TCPv4 S.O. is supported. */
1181 if (gso->u.gso.type != XEN_NETIF_GSO_TYPE_TCPV4) {
1182 DPRINTK("Bad GSO type %d.\n", gso->u.gso.type);
1183 return -EINVAL;
1186 skb_shinfo(skb)->gso_size = gso->u.gso.size;
1187 skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4;
1189 /* Header must be checked, and gso_segs computed. */
1190 skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY;
1191 skb_shinfo(skb)->gso_segs = 0;
1193 return 0;
1196 /* Called after netfront has transmitted */
1197 static void net_tx_action(unsigned long unused)
1199 struct list_head *ent;
1200 struct sk_buff *skb;
1201 netif_t *netif;
1202 netif_tx_request_t txreq;
1203 netif_tx_request_t txfrags[MAX_SKB_FRAGS];
1204 struct netif_extra_info extras[XEN_NETIF_EXTRA_TYPE_MAX - 1];
1205 u16 pending_idx;
1206 RING_IDX i;
1207 gnttab_map_grant_ref_t *mop;
1208 unsigned int data_len;
1209 int ret, work_to_do;
1211 if (dealloc_cons != dealloc_prod)
1212 net_tx_action_dealloc();
1214 mop = tx_map_ops;
1215 while (((NR_PENDING_REQS + MAX_SKB_FRAGS) < MAX_PENDING_REQS) &&
1216 !list_empty(&net_schedule_list)) {
1217 /* Get a netif from the list with work to do. */
1218 ent = net_schedule_list.next;
1219 netif = list_entry(ent, netif_t, list);
1220 netif_get(netif);
1221 remove_from_net_schedule_list(netif);
1223 RING_FINAL_CHECK_FOR_REQUESTS(&netif->tx, work_to_do);
1224 if (!work_to_do) {
1225 netif_put(netif);
1226 continue;
1229 i = netif->tx.req_cons;
1230 rmb(); /* Ensure that we see the request before we copy it. */
1231 memcpy(&txreq, RING_GET_REQUEST(&netif->tx, i), sizeof(txreq));
1233 /* Credit-based scheduling. */
1234 if (txreq.size > netif->remaining_credit) {
1235 unsigned long now = jiffies;
1236 unsigned long next_credit =
1237 netif->credit_timeout.expires +
1238 msecs_to_jiffies(netif->credit_usec / 1000);
1240 /* Timer could already be pending in rare cases. */
1241 if (timer_pending(&netif->credit_timeout)) {
1242 netif_put(netif);
1243 continue;
1246 /* Passed the point where we can replenish credit? */
1247 if (time_after_eq(now, next_credit)) {
1248 netif->credit_timeout.expires = now;
1249 tx_add_credit(netif);
1252 /* Still too big to send right now? Set a callback. */
1253 if (txreq.size > netif->remaining_credit) {
1254 netif->credit_timeout.data =
1255 (unsigned long)netif;
1256 netif->credit_timeout.function =
1257 tx_credit_callback;
1258 __mod_timer(&netif->credit_timeout,
1259 next_credit);
1260 netif_put(netif);
1261 continue;
1264 netif->remaining_credit -= txreq.size;
1266 work_to_do--;
1267 netif->tx.req_cons = ++i;
1269 memset(extras, 0, sizeof(extras));
1270 if (txreq.flags & NETTXF_extra_info) {
1271 work_to_do = netbk_get_extras(netif, extras,
1272 work_to_do);
1273 i = netif->tx.req_cons;
1274 if (unlikely(work_to_do < 0)) {
1275 netbk_tx_err(netif, &txreq, i);
1276 continue;
1280 ret = netbk_count_requests(netif, &txreq, txfrags, work_to_do);
1281 if (unlikely(ret < 0)) {
1282 netbk_tx_err(netif, &txreq, i - ret);
1283 continue;
1285 i += ret;
1287 if (unlikely(txreq.size < ETH_HLEN)) {
1288 DPRINTK("Bad packet size: %d\n", txreq.size);
1289 netbk_tx_err(netif, &txreq, i);
1290 continue;
1293 /* No crossing a page as the payload mustn't fragment. */
1294 if (unlikely((txreq.offset + txreq.size) > PAGE_SIZE)) {
1295 DPRINTK("txreq.offset: %x, size: %u, end: %lu\n",
1296 txreq.offset, txreq.size,
1297 (txreq.offset &~PAGE_MASK) + txreq.size);
1298 netbk_tx_err(netif, &txreq, i);
1299 continue;
1302 pending_idx = pending_ring[MASK_PEND_IDX(pending_cons)];
1304 data_len = (txreq.size > PKT_PROT_LEN &&
1305 ret < MAX_SKB_FRAGS) ?
1306 PKT_PROT_LEN : txreq.size;
1308 skb = alloc_skb(data_len + 16 + NET_IP_ALIGN,
1309 GFP_ATOMIC | __GFP_NOWARN);
1310 if (unlikely(skb == NULL)) {
1311 DPRINTK("Can't allocate a skb in start_xmit.\n");
1312 netbk_tx_err(netif, &txreq, i);
1313 break;
1316 /* Packets passed to netif_rx() must have some headroom. */
1317 skb_reserve(skb, 16 + NET_IP_ALIGN);
1319 if (extras[XEN_NETIF_EXTRA_TYPE_GSO - 1].type) {
1320 struct netif_extra_info *gso;
1321 gso = &extras[XEN_NETIF_EXTRA_TYPE_GSO - 1];
1323 if (netbk_set_skb_gso(skb, gso)) {
1324 kfree_skb(skb);
1325 netbk_tx_err(netif, &txreq, i);
1326 continue;
1330 gnttab_set_map_op(mop, idx_to_kaddr(pending_idx),
1331 GNTMAP_host_map | GNTMAP_readonly,
1332 txreq.gref, netif->domid);
1333 mop++;
1335 memcpy(&pending_tx_info[pending_idx].req,
1336 &txreq, sizeof(txreq));
1337 pending_tx_info[pending_idx].netif = netif;
1338 *((u16 *)skb->data) = pending_idx;
1340 __skb_put(skb, data_len);
1342 skb_shinfo(skb)->nr_frags = ret;
1343 if (data_len < txreq.size) {
1344 skb_shinfo(skb)->nr_frags++;
1345 skb_shinfo(skb)->frags[0].page =
1346 (void *)(unsigned long)pending_idx;
1347 } else {
1348 /* Discriminate from any valid pending_idx value. */
1349 skb_shinfo(skb)->frags[0].page = (void *)~0UL;
1352 __skb_queue_tail(&tx_queue, skb);
1354 pending_cons++;
1356 mop = netbk_get_requests(netif, skb, txfrags, mop);
1358 netif->tx.req_cons = i;
1359 netif_schedule_work(netif);
1361 if ((mop - tx_map_ops) >= ARRAY_SIZE(tx_map_ops))
1362 break;
1365 if (mop == tx_map_ops)
1366 return;
1368 ret = HYPERVISOR_grant_table_op(
1369 GNTTABOP_map_grant_ref, tx_map_ops, mop - tx_map_ops);
1370 BUG_ON(ret);
1372 mop = tx_map_ops;
1373 while ((skb = __skb_dequeue(&tx_queue)) != NULL) {
1374 netif_tx_request_t *txp;
1376 pending_idx = *((u16 *)skb->data);
1377 netif = pending_tx_info[pending_idx].netif;
1378 txp = &pending_tx_info[pending_idx].req;
1380 /* Check the remap error code. */
1381 if (unlikely(netbk_tx_check_mop(skb, &mop))) {
1382 DPRINTK("netback grant failed.\n");
1383 skb_shinfo(skb)->nr_frags = 0;
1384 kfree_skb(skb);
1385 continue;
1388 data_len = skb->len;
1389 memcpy(skb->data,
1390 (void *)(idx_to_kaddr(pending_idx)|txp->offset),
1391 data_len);
1392 if (data_len < txp->size) {
1393 /* Append the packet payload as a fragment. */
1394 txp->offset += data_len;
1395 txp->size -= data_len;
1396 } else {
1397 /* Schedule a response immediately. */
1398 netif_idx_release(pending_idx);
1401 /*
1402 * Old frontends do not assert data_validated but we
1403 * can infer it from csum_blank so test both flags.
1404 */
1405 if (txp->flags & (NETTXF_data_validated|NETTXF_csum_blank)) {
1406 skb->ip_summed = CHECKSUM_UNNECESSARY;
1407 skb->proto_data_valid = 1;
1408 } else {
1409 skb->ip_summed = CHECKSUM_NONE;
1410 skb->proto_data_valid = 0;
1412 skb->proto_csum_blank = !!(txp->flags & NETTXF_csum_blank);
1414 netbk_fill_frags(skb);
1416 skb->dev = netif->dev;
1417 skb->protocol = eth_type_trans(skb, skb->dev);
1419 netif->stats.rx_bytes += skb->len;
1420 netif->stats.rx_packets++;
1422 if (unlikely(netbk_copy_skb_mode == NETBK_ALWAYS_COPY_SKB) &&
1423 unlikely(skb_linearize(skb))) {
1424 DPRINTK("Can't linearize skb in net_tx_action.\n");
1425 kfree_skb(skb);
1426 continue;
1429 netif_rx(skb);
1430 netif->dev->last_rx = jiffies;
1433 if (netbk_copy_skb_mode == NETBK_DELAYED_COPY_SKB &&
1434 !list_empty(&pending_inuse_head)) {
1435 struct netbk_tx_pending_inuse *oldest;
1437 oldest = list_entry(pending_inuse_head.next,
1438 struct netbk_tx_pending_inuse, list);
1439 mod_timer(&netbk_tx_pending_timer, oldest->alloc_time + HZ);
1443 static void netif_idx_release(u16 pending_idx)
1445 static DEFINE_SPINLOCK(_lock);
1446 unsigned long flags;
1448 spin_lock_irqsave(&_lock, flags);
1449 dealloc_ring[MASK_PEND_IDX(dealloc_prod)] = pending_idx;
1450 /* Sync with net_tx_action_dealloc: insert idx /then/ incr producer. */
1451 smp_wmb();
1452 dealloc_prod++;
1453 spin_unlock_irqrestore(&_lock, flags);
1455 tasklet_schedule(&net_tx_tasklet);
1458 static void netif_page_release(struct page *page, unsigned int order)
1460 int idx = netif_page_index(page);
1461 BUG_ON(order);
1462 BUG_ON(idx < 0);
1463 netif_idx_release(idx);
1466 irqreturn_t netif_be_int(int irq, void *dev_id, struct pt_regs *regs)
1468 netif_t *netif = dev_id;
1470 add_to_net_schedule_list_tail(netif);
1471 maybe_schedule_tx_action();
1473 if (netif_schedulable(netif) && !netbk_queue_full(netif))
1474 netif_wake_queue(netif->dev);
1476 return IRQ_HANDLED;
1479 static void make_tx_response(netif_t *netif,
1480 netif_tx_request_t *txp,
1481 s8 st)
1483 RING_IDX i = netif->tx.rsp_prod_pvt;
1484 netif_tx_response_t *resp;
1485 int notify;
1487 resp = RING_GET_RESPONSE(&netif->tx, i);
1488 resp->id = txp->id;
1489 resp->status = st;
1491 if (txp->flags & NETTXF_extra_info)
1492 RING_GET_RESPONSE(&netif->tx, ++i)->status = NETIF_RSP_NULL;
1494 netif->tx.rsp_prod_pvt = ++i;
1495 RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&netif->tx, notify);
1496 if (notify)
1497 notify_remote_via_irq(netif->irq);
1499 #ifdef CONFIG_XEN_NETDEV_PIPELINED_TRANSMITTER
1500 if (i == netif->tx.req_cons) {
1501 int more_to_do;
1502 RING_FINAL_CHECK_FOR_REQUESTS(&netif->tx, more_to_do);
1503 if (more_to_do)
1504 add_to_net_schedule_list_tail(netif);
1506 #endif
1509 static netif_rx_response_t *make_rx_response(netif_t *netif,
1510 u16 id,
1511 s8 st,
1512 u16 offset,
1513 u16 size,
1514 u16 flags)
1516 RING_IDX i = netif->rx.rsp_prod_pvt;
1517 netif_rx_response_t *resp;
1519 resp = RING_GET_RESPONSE(&netif->rx, i);
1520 resp->offset = offset;
1521 resp->flags = flags;
1522 resp->id = id;
1523 resp->status = (s16)size;
1524 if (st < 0)
1525 resp->status = (s16)st;
1527 netif->rx.rsp_prod_pvt = ++i;
1529 return resp;
1532 #ifdef NETBE_DEBUG_INTERRUPT
1533 static irqreturn_t netif_be_dbg(int irq, void *dev_id, struct pt_regs *regs)
1535 struct list_head *ent;
1536 netif_t *netif;
1537 int i = 0;
1539 printk(KERN_ALERT "netif_schedule_list:\n");
1540 spin_lock_irq(&net_schedule_list_lock);
1542 list_for_each (ent, &net_schedule_list) {
1543 netif = list_entry(ent, netif_t, list);
1544 printk(KERN_ALERT " %d: private(rx_req_cons=%08x "
1545 "rx_resp_prod=%08x\n",
1546 i, netif->rx.req_cons, netif->rx.rsp_prod_pvt);
1547 printk(KERN_ALERT " tx_req_cons=%08x tx_resp_prod=%08x)\n",
1548 netif->tx.req_cons, netif->tx.rsp_prod_pvt);
1549 printk(KERN_ALERT " shared(rx_req_prod=%08x "
1550 "rx_resp_prod=%08x\n",
1551 netif->rx.sring->req_prod, netif->rx.sring->rsp_prod);
1552 printk(KERN_ALERT " rx_event=%08x tx_req_prod=%08x\n",
1553 netif->rx.sring->rsp_event, netif->tx.sring->req_prod);
1554 printk(KERN_ALERT " tx_resp_prod=%08x, tx_event=%08x)\n",
1555 netif->tx.sring->rsp_prod, netif->tx.sring->rsp_event);
1556 i++;
1559 spin_unlock_irq(&net_schedule_list_lock);
1560 printk(KERN_ALERT " ** End of netif_schedule_list **\n");
1562 return IRQ_HANDLED;
1564 #endif
1566 static int __init netback_init(void)
1568 int i;
1569 struct page *page;
1571 if (!is_running_on_xen())
1572 return -ENODEV;
1574 /* We can increase reservation by this much in net_rx_action(). */
1575 balloon_update_driver_allowance(NET_RX_RING_SIZE);
1577 skb_queue_head_init(&rx_queue);
1578 skb_queue_head_init(&tx_queue);
1580 init_timer(&net_timer);
1581 net_timer.data = 0;
1582 net_timer.function = net_alarm;
1584 init_timer(&netbk_tx_pending_timer);
1585 netbk_tx_pending_timer.data = 0;
1586 netbk_tx_pending_timer.function = netbk_tx_pending_timeout;
1588 mmap_pages = alloc_empty_pages_and_pagevec(MAX_PENDING_REQS);
1589 if (mmap_pages == NULL) {
1590 printk("%s: out of memory\n", __FUNCTION__);
1591 return -ENOMEM;
1594 for (i = 0; i < MAX_PENDING_REQS; i++) {
1595 page = mmap_pages[i];
1596 SetPageForeign(page, netif_page_release);
1597 netif_set_page_index(page, i);
1598 INIT_LIST_HEAD(&pending_inuse[i].list);
1601 pending_cons = 0;
1602 pending_prod = MAX_PENDING_REQS;
1603 for (i = 0; i < MAX_PENDING_REQS; i++)
1604 pending_ring[i] = i;
1606 spin_lock_init(&net_schedule_list_lock);
1607 INIT_LIST_HEAD(&net_schedule_list);
1609 netbk_copy_skb_mode = NETBK_DONT_COPY_SKB;
1610 if (MODPARM_copy_skb) {
1611 if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_and_replace,
1612 NULL, 0))
1613 netbk_copy_skb_mode = NETBK_ALWAYS_COPY_SKB;
1614 else
1615 netbk_copy_skb_mode = NETBK_DELAYED_COPY_SKB;
1618 netif_accel_init();
1620 netif_xenbus_init();
1622 #ifdef NETBE_DEBUG_INTERRUPT
1623 (void)bind_virq_to_irqhandler(VIRQ_DEBUG,
1624 0,
1625 netif_be_dbg,
1626 SA_SHIRQ,
1627 "net-be-dbg",
1628 &netif_be_dbg);
1629 #endif
1631 return 0;
1634 module_init(netback_init);
1636 MODULE_LICENSE("Dual BSD/GPL");