ia64/xen-unstable

view linux-2.6-xen-sparse/drivers/xen/netback/netback.c @ 10642:4b9876fe2f1f

[NET] back: Add GSO features field and check gso_size

This patch adds the as-yet unused GSO features which will contain
protocol-independent bits such as the ECN marker.

It also makes the backend check gso_size to ensure that it is non-zero.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
author kaf24@firebug.cl.cam.ac.uk
date Mon Jul 03 09:05:18 2006 +0100 (2006-07-03)
parents 18abc9eb9a31
children 4782114d0713
line source
1 /******************************************************************************
2 * drivers/xen/netback/netback.c
3 *
4 * Back-end of the driver for virtual network devices. This portion of the
5 * driver exports a 'unified' network-device interface that can be accessed
6 * by any operating system that implements a compatible front end. A
7 * reference front-end implementation can be found in:
8 * drivers/xen/netfront/netfront.c
9 *
10 * Copyright (c) 2002-2005, K A Fraser
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License version 2
14 * as published by the Free Software Foundation; or, when distributed
15 * separately from the Linux kernel or incorporated into other
16 * software packages, subject to the following license:
17 *
18 * Permission is hereby granted, free of charge, to any person obtaining a copy
19 * of this source file (the "Software"), to deal in the Software without
20 * restriction, including without limitation the rights to use, copy, modify,
21 * merge, publish, distribute, sublicense, and/or sell copies of the Software,
22 * and to permit persons to whom the Software is furnished to do so, subject to
23 * the following conditions:
24 *
25 * The above copyright notice and this permission notice shall be included in
26 * all copies or substantial portions of the Software.
27 *
28 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
29 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
30 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
31 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
32 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
33 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
34 * IN THE SOFTWARE.
35 */
37 #include "common.h"
38 #include <xen/balloon.h>
39 #include <xen/interface/memory.h>
41 /*#define NETBE_DEBUG_INTERRUPT*/
43 static void netif_idx_release(u16 pending_idx);
44 static void netif_page_release(struct page *page);
45 static void make_tx_response(netif_t *netif,
46 netif_tx_request_t *txp,
47 s8 st);
48 static int make_rx_response(netif_t *netif,
49 u16 id,
50 s8 st,
51 u16 offset,
52 u16 size,
53 u16 flags);
55 static void net_tx_action(unsigned long unused);
56 static DECLARE_TASKLET(net_tx_tasklet, net_tx_action, 0);
58 static void net_rx_action(unsigned long unused);
59 static DECLARE_TASKLET(net_rx_tasklet, net_rx_action, 0);
61 static struct timer_list net_timer;
63 #define MAX_PENDING_REQS 256
65 static struct sk_buff_head rx_queue;
66 static multicall_entry_t rx_mcl[NET_RX_RING_SIZE+1];
67 static mmu_update_t rx_mmu[NET_RX_RING_SIZE];
68 static gnttab_transfer_t grant_rx_op[NET_RX_RING_SIZE];
69 static unsigned char rx_notify[NR_IRQS];
71 static unsigned long mmap_vstart;
72 #define MMAP_VADDR(_req) (mmap_vstart + ((_req) * PAGE_SIZE))
74 #define PKT_PROT_LEN 64
76 static struct {
77 netif_tx_request_t req;
78 netif_t *netif;
79 } pending_tx_info[MAX_PENDING_REQS];
80 static u16 pending_ring[MAX_PENDING_REQS];
81 typedef unsigned int PEND_RING_IDX;
82 #define MASK_PEND_IDX(_i) ((_i)&(MAX_PENDING_REQS-1))
83 static PEND_RING_IDX pending_prod, pending_cons;
84 #define NR_PENDING_REQS (MAX_PENDING_REQS - pending_prod + pending_cons)
86 /* Freed TX SKBs get batched on this ring before return to pending_ring. */
87 static u16 dealloc_ring[MAX_PENDING_REQS];
88 static PEND_RING_IDX dealloc_prod, dealloc_cons;
90 static struct sk_buff_head tx_queue;
92 static grant_handle_t grant_tx_handle[MAX_PENDING_REQS];
93 static gnttab_unmap_grant_ref_t tx_unmap_ops[MAX_PENDING_REQS];
94 static gnttab_map_grant_ref_t tx_map_ops[MAX_PENDING_REQS];
96 static struct list_head net_schedule_list;
97 static spinlock_t net_schedule_list_lock;
99 #define MAX_MFN_ALLOC 64
100 static unsigned long mfn_list[MAX_MFN_ALLOC];
101 static unsigned int alloc_index = 0;
102 static DEFINE_SPINLOCK(mfn_lock);
104 static unsigned long alloc_mfn(void)
105 {
106 unsigned long mfn = 0, flags;
107 struct xen_memory_reservation reservation = {
108 .nr_extents = MAX_MFN_ALLOC,
109 .extent_order = 0,
110 .domid = DOMID_SELF
111 };
112 set_xen_guest_handle(reservation.extent_start, mfn_list);
113 spin_lock_irqsave(&mfn_lock, flags);
114 if ( unlikely(alloc_index == 0) )
115 alloc_index = HYPERVISOR_memory_op(
116 XENMEM_increase_reservation, &reservation);
117 if ( alloc_index != 0 )
118 mfn = mfn_list[--alloc_index];
119 spin_unlock_irqrestore(&mfn_lock, flags);
120 return mfn;
121 }
123 static inline void maybe_schedule_tx_action(void)
124 {
125 smp_mb();
126 if ((NR_PENDING_REQS < (MAX_PENDING_REQS/2)) &&
127 !list_empty(&net_schedule_list))
128 tasklet_schedule(&net_tx_tasklet);
129 }
131 /*
132 * A gross way of confirming the origin of an skb data page. The slab
133 * allocator abuses a field in the page struct to cache the kmem_cache_t ptr.
134 */
135 static inline int is_xen_skb(struct sk_buff *skb)
136 {
137 extern kmem_cache_t *skbuff_cachep;
138 kmem_cache_t *cp = (kmem_cache_t *)virt_to_page(skb->head)->lru.next;
139 return (cp == skbuff_cachep);
140 }
142 int netif_be_start_xmit(struct sk_buff *skb, struct net_device *dev)
143 {
144 netif_t *netif = netdev_priv(dev);
146 BUG_ON(skb->dev != dev);
148 /* Drop the packet if the target domain has no receive buffers. */
149 if (!netif->active ||
150 (netif->rx_req_cons_peek == netif->rx.sring->req_prod) ||
151 ((netif->rx_req_cons_peek - netif->rx.rsp_prod_pvt) ==
152 NET_RX_RING_SIZE))
153 goto drop;
155 /*
156 * We do not copy the packet unless:
157 * 1. The data is shared; or
158 * 2. The data is not allocated from our special cache.
159 * NB. We also couldn't cope with fragmented packets, but we won't get
160 * any because we not advertise the NETIF_F_SG feature.
161 */
162 if (skb_shared(skb) || skb_cloned(skb) || !is_xen_skb(skb)) {
163 int hlen = skb->data - skb->head;
164 int ret;
165 struct sk_buff *nskb = dev_alloc_skb(hlen + skb->len);
166 if ( unlikely(nskb == NULL) )
167 goto drop;
168 skb_reserve(nskb, hlen);
169 __skb_put(nskb, skb->len);
170 ret = skb_copy_bits(skb, -hlen, nskb->data - hlen,
171 skb->len + hlen);
172 BUG_ON(ret);
173 /* Copy only the header fields we use in this driver. */
174 nskb->dev = skb->dev;
175 nskb->ip_summed = skb->ip_summed;
176 nskb->proto_data_valid = skb->proto_data_valid;
177 dev_kfree_skb(skb);
178 skb = nskb;
179 }
181 netif->rx_req_cons_peek++;
182 netif_get(netif);
184 skb_queue_tail(&rx_queue, skb);
185 tasklet_schedule(&net_rx_tasklet);
187 return 0;
189 drop:
190 netif->stats.tx_dropped++;
191 dev_kfree_skb(skb);
192 return 0;
193 }
195 #if 0
196 static void xen_network_done_notify(void)
197 {
198 static struct net_device *eth0_dev = NULL;
199 if (unlikely(eth0_dev == NULL))
200 eth0_dev = __dev_get_by_name("eth0");
201 netif_rx_schedule(eth0_dev);
202 }
203 /*
204 * Add following to poll() function in NAPI driver (Tigon3 is example):
205 * if ( xen_network_done() )
206 * tg3_enable_ints(tp);
207 */
208 int xen_network_done(void)
209 {
210 return skb_queue_empty(&rx_queue);
211 }
212 #endif
214 static void net_rx_action(unsigned long unused)
215 {
216 netif_t *netif = NULL;
217 s8 status;
218 u16 size, id, irq, flags;
219 multicall_entry_t *mcl;
220 mmu_update_t *mmu;
221 gnttab_transfer_t *gop;
222 unsigned long vdata, old_mfn, new_mfn;
223 struct sk_buff_head rxq;
224 struct sk_buff *skb;
225 u16 notify_list[NET_RX_RING_SIZE];
226 int notify_nr = 0;
227 int ret;
229 skb_queue_head_init(&rxq);
231 mcl = rx_mcl;
232 mmu = rx_mmu;
233 gop = grant_rx_op;
235 while ((skb = skb_dequeue(&rx_queue)) != NULL) {
236 netif = netdev_priv(skb->dev);
237 vdata = (unsigned long)skb->data;
238 old_mfn = virt_to_mfn(vdata);
240 if (!xen_feature(XENFEAT_auto_translated_physmap)) {
241 /* Memory squeeze? Back off for an arbitrary while. */
242 if ((new_mfn = alloc_mfn()) == 0) {
243 if ( net_ratelimit() )
244 WPRINTK("Memory squeeze in netback "
245 "driver.\n");
246 mod_timer(&net_timer, jiffies + HZ);
247 skb_queue_head(&rx_queue, skb);
248 break;
249 }
250 /*
251 * Set the new P2M table entry before reassigning
252 * the old data page. Heed the comment in
253 * pgtable-2level.h:pte_page(). :-)
254 */
255 set_phys_to_machine(
256 __pa(skb->data) >> PAGE_SHIFT,
257 new_mfn);
259 MULTI_update_va_mapping(mcl, vdata,
260 pfn_pte_ma(new_mfn,
261 PAGE_KERNEL), 0);
262 mcl++;
264 mmu->ptr = ((maddr_t)new_mfn << PAGE_SHIFT) |
265 MMU_MACHPHYS_UPDATE;
266 mmu->val = __pa(vdata) >> PAGE_SHIFT;
267 mmu++;
268 }
270 gop->mfn = old_mfn;
271 gop->domid = netif->domid;
272 gop->ref = RING_GET_REQUEST(
273 &netif->rx, netif->rx.req_cons)->gref;
274 netif->rx.req_cons++;
275 gop++;
277 __skb_queue_tail(&rxq, skb);
279 /* Filled the batch queue? */
280 if ((gop - grant_rx_op) == ARRAY_SIZE(grant_rx_op))
281 break;
282 }
284 if (!xen_feature(XENFEAT_auto_translated_physmap)) {
285 if (mcl == rx_mcl)
286 return;
288 mcl[-1].args[MULTI_UVMFLAGS_INDEX] = UVMF_TLB_FLUSH|UVMF_ALL;
290 if (mmu - rx_mmu) {
291 mcl->op = __HYPERVISOR_mmu_update;
292 mcl->args[0] = (unsigned long)rx_mmu;
293 mcl->args[1] = mmu - rx_mmu;
294 mcl->args[2] = 0;
295 mcl->args[3] = DOMID_SELF;
296 mcl++;
297 }
299 ret = HYPERVISOR_multicall(rx_mcl, mcl - rx_mcl);
300 BUG_ON(ret != 0);
301 }
303 ret = HYPERVISOR_grant_table_op(GNTTABOP_transfer, grant_rx_op,
304 gop - grant_rx_op);
305 BUG_ON(ret != 0);
307 mcl = rx_mcl;
308 gop = grant_rx_op;
309 while ((skb = __skb_dequeue(&rxq)) != NULL) {
310 netif = netdev_priv(skb->dev);
311 size = skb->tail - skb->data;
313 atomic_set(&(skb_shinfo(skb)->dataref), 1);
314 skb_shinfo(skb)->nr_frags = 0;
315 skb_shinfo(skb)->frag_list = NULL;
317 netif->stats.tx_bytes += size;
318 netif->stats.tx_packets++;
320 if (!xen_feature(XENFEAT_auto_translated_physmap)) {
321 /* The update_va_mapping() must not fail. */
322 BUG_ON(mcl->result != 0);
323 mcl++;
324 }
326 /* Check the reassignment error code. */
327 status = NETIF_RSP_OKAY;
328 if (gop->status != 0) {
329 DPRINTK("Bad status %d from grant transfer to DOM%u\n",
330 gop->status, netif->domid);
331 /*
332 * Page no longer belongs to us unless GNTST_bad_page,
333 * but that should be a fatal error anyway.
334 */
335 BUG_ON(gop->status == GNTST_bad_page);
336 status = NETIF_RSP_ERROR;
337 }
338 irq = netif->irq;
339 id = RING_GET_REQUEST(&netif->rx, netif->rx.rsp_prod_pvt)->id;
340 flags = 0;
341 if (skb->ip_summed == CHECKSUM_HW) /* local packet? */
342 flags |= NETRXF_csum_blank | NETRXF_data_validated;
343 else if (skb->proto_data_valid) /* remote but checksummed? */
344 flags |= NETRXF_data_validated;
345 if (make_rx_response(netif, id, status,
346 (unsigned long)skb->data & ~PAGE_MASK,
347 size, flags) &&
348 (rx_notify[irq] == 0)) {
349 rx_notify[irq] = 1;
350 notify_list[notify_nr++] = irq;
351 }
353 netif_put(netif);
354 dev_kfree_skb(skb);
355 gop++;
356 }
358 while (notify_nr != 0) {
359 irq = notify_list[--notify_nr];
360 rx_notify[irq] = 0;
361 notify_remote_via_irq(irq);
362 }
364 /* More work to do? */
365 if (!skb_queue_empty(&rx_queue) && !timer_pending(&net_timer))
366 tasklet_schedule(&net_rx_tasklet);
367 #if 0
368 else
369 xen_network_done_notify();
370 #endif
371 }
373 static void net_alarm(unsigned long unused)
374 {
375 tasklet_schedule(&net_rx_tasklet);
376 }
378 struct net_device_stats *netif_be_get_stats(struct net_device *dev)
379 {
380 netif_t *netif = netdev_priv(dev);
381 return &netif->stats;
382 }
384 static int __on_net_schedule_list(netif_t *netif)
385 {
386 return netif->list.next != NULL;
387 }
389 static void remove_from_net_schedule_list(netif_t *netif)
390 {
391 spin_lock_irq(&net_schedule_list_lock);
392 if (likely(__on_net_schedule_list(netif))) {
393 list_del(&netif->list);
394 netif->list.next = NULL;
395 netif_put(netif);
396 }
397 spin_unlock_irq(&net_schedule_list_lock);
398 }
400 static void add_to_net_schedule_list_tail(netif_t *netif)
401 {
402 if (__on_net_schedule_list(netif))
403 return;
405 spin_lock_irq(&net_schedule_list_lock);
406 if (!__on_net_schedule_list(netif) && netif->active) {
407 list_add_tail(&netif->list, &net_schedule_list);
408 netif_get(netif);
409 }
410 spin_unlock_irq(&net_schedule_list_lock);
411 }
413 /*
414 * Note on CONFIG_XEN_NETDEV_PIPELINED_TRANSMITTER:
415 * If this driver is pipelining transmit requests then we can be very
416 * aggressive in avoiding new-packet notifications -- frontend only needs to
417 * send a notification if there are no outstanding unreceived responses.
418 * If we may be buffer transmit buffers for any reason then we must be rather
419 * more conservative and treat this as the final check for pending work.
420 */
421 void netif_schedule_work(netif_t *netif)
422 {
423 int more_to_do;
425 #ifdef CONFIG_XEN_NETDEV_PIPELINED_TRANSMITTER
426 more_to_do = RING_HAS_UNCONSUMED_REQUESTS(&netif->tx);
427 #else
428 RING_FINAL_CHECK_FOR_REQUESTS(&netif->tx, more_to_do);
429 #endif
431 if (more_to_do) {
432 add_to_net_schedule_list_tail(netif);
433 maybe_schedule_tx_action();
434 }
435 }
437 void netif_deschedule_work(netif_t *netif)
438 {
439 remove_from_net_schedule_list(netif);
440 }
443 static void tx_credit_callback(unsigned long data)
444 {
445 netif_t *netif = (netif_t *)data;
446 netif->remaining_credit = netif->credit_bytes;
447 netif_schedule_work(netif);
448 }
450 inline static void net_tx_action_dealloc(void)
451 {
452 gnttab_unmap_grant_ref_t *gop;
453 u16 pending_idx;
454 PEND_RING_IDX dc, dp;
455 netif_t *netif;
456 int ret;
458 dc = dealloc_cons;
459 dp = dealloc_prod;
461 /* Ensure we see all indexes enqueued by netif_idx_release(). */
462 smp_rmb();
464 /*
465 * Free up any grants we have finished using
466 */
467 gop = tx_unmap_ops;
468 while (dc != dp) {
469 pending_idx = dealloc_ring[MASK_PEND_IDX(dc++)];
470 gnttab_set_unmap_op(gop, MMAP_VADDR(pending_idx),
471 GNTMAP_host_map,
472 grant_tx_handle[pending_idx]);
473 gop++;
474 }
475 ret = HYPERVISOR_grant_table_op(
476 GNTTABOP_unmap_grant_ref, tx_unmap_ops, gop - tx_unmap_ops);
477 BUG_ON(ret);
479 while (dealloc_cons != dp) {
480 pending_idx = dealloc_ring[MASK_PEND_IDX(dealloc_cons++)];
482 netif = pending_tx_info[pending_idx].netif;
484 make_tx_response(netif, &pending_tx_info[pending_idx].req,
485 NETIF_RSP_OKAY);
487 pending_ring[MASK_PEND_IDX(pending_prod++)] = pending_idx;
489 netif_put(netif);
490 }
491 }
493 static void netbk_tx_err(netif_t *netif, netif_tx_request_t *txp, RING_IDX end)
494 {
495 RING_IDX cons = netif->tx.req_cons;
497 do {
498 make_tx_response(netif, txp, NETIF_RSP_ERROR);
499 if (cons >= end)
500 break;
501 txp = RING_GET_REQUEST(&netif->tx, cons++);
502 } while (1);
503 netif->tx.req_cons = cons;
504 netif_schedule_work(netif);
505 netif_put(netif);
506 }
508 static int netbk_count_requests(netif_t *netif, netif_tx_request_t *txp,
509 int work_to_do)
510 {
511 netif_tx_request_t *first = txp;
512 RING_IDX cons = netif->tx.req_cons;
513 int frags = 0;
515 while (txp->flags & NETTXF_more_data) {
516 if (frags >= work_to_do) {
517 DPRINTK("Need more frags\n");
518 return -frags;
519 }
521 txp = RING_GET_REQUEST(&netif->tx, cons + frags);
522 if (txp->size > first->size) {
523 DPRINTK("Frags galore\n");
524 return -frags;
525 }
527 first->size -= txp->size;
528 frags++;
530 if (unlikely((txp->offset + txp->size) > PAGE_SIZE)) {
531 DPRINTK("txp->offset: %x, size: %u\n",
532 txp->offset, txp->size);
533 return -frags;
534 }
535 }
537 return frags;
538 }
540 static gnttab_map_grant_ref_t *netbk_get_requests(netif_t *netif,
541 struct sk_buff *skb,
542 gnttab_map_grant_ref_t *mop)
543 {
544 struct skb_shared_info *shinfo = skb_shinfo(skb);
545 skb_frag_t *frags = shinfo->frags;
546 netif_tx_request_t *txp;
547 unsigned long pending_idx = *((u16 *)skb->data);
548 RING_IDX cons = netif->tx.req_cons;
549 int i, start;
551 /* Skip first skb fragment if it is on same page as header fragment. */
552 start = ((unsigned long)shinfo->frags[0].page == pending_idx);
554 for (i = start; i < shinfo->nr_frags; i++) {
555 txp = RING_GET_REQUEST(&netif->tx, cons++);
556 pending_idx = pending_ring[MASK_PEND_IDX(pending_cons++)];
558 gnttab_set_map_op(mop++, MMAP_VADDR(pending_idx),
559 GNTMAP_host_map | GNTMAP_readonly,
560 txp->gref, netif->domid);
562 memcpy(&pending_tx_info[pending_idx].req, txp, sizeof(*txp));
563 netif_get(netif);
564 pending_tx_info[pending_idx].netif = netif;
565 frags[i].page = (void *)pending_idx;
566 }
568 return mop;
569 }
571 static int netbk_tx_check_mop(struct sk_buff *skb,
572 gnttab_map_grant_ref_t **mopp)
573 {
574 gnttab_map_grant_ref_t *mop = *mopp;
575 int pending_idx = *((u16 *)skb->data);
576 netif_t *netif = pending_tx_info[pending_idx].netif;
577 netif_tx_request_t *txp;
578 struct skb_shared_info *shinfo = skb_shinfo(skb);
579 int nr_frags = shinfo->nr_frags;
580 int i, err, start;
582 /* Check status of header. */
583 err = mop->status;
584 if (unlikely(err)) {
585 txp = &pending_tx_info[pending_idx].req;
586 make_tx_response(netif, txp, NETIF_RSP_ERROR);
587 pending_ring[MASK_PEND_IDX(pending_prod++)] = pending_idx;
588 netif_put(netif);
589 } else {
590 set_phys_to_machine(
591 __pa(MMAP_VADDR(pending_idx)) >> PAGE_SHIFT,
592 FOREIGN_FRAME(mop->dev_bus_addr >> PAGE_SHIFT));
593 grant_tx_handle[pending_idx] = mop->handle;
594 }
596 /* Skip first skb fragment if it is on same page as header fragment. */
597 start = ((unsigned long)shinfo->frags[0].page == pending_idx);
599 for (i = start; i < nr_frags; i++) {
600 int j, newerr;
602 pending_idx = (unsigned long)shinfo->frags[i].page;
604 /* Check error status: if okay then remember grant handle. */
605 newerr = (++mop)->status;
606 if (likely(!newerr)) {
607 set_phys_to_machine(
608 __pa(MMAP_VADDR(pending_idx))>>PAGE_SHIFT,
609 FOREIGN_FRAME(mop->dev_bus_addr>>PAGE_SHIFT));
610 grant_tx_handle[pending_idx] = mop->handle;
611 /* Had a previous error? Invalidate this fragment. */
612 if (unlikely(err))
613 netif_idx_release(pending_idx);
614 continue;
615 }
617 /* Error on this fragment: respond to client with an error. */
618 txp = &pending_tx_info[pending_idx].req;
619 make_tx_response(netif, txp, NETIF_RSP_ERROR);
620 pending_ring[MASK_PEND_IDX(pending_prod++)] = pending_idx;
621 netif_put(netif);
623 /* Not the first error? Preceding frags already invalidated. */
624 if (err)
625 continue;
627 /* First error: invalidate header and preceding fragments. */
628 pending_idx = *((u16 *)skb->data);
629 netif_idx_release(pending_idx);
630 for (j = start; j < i; j++) {
631 pending_idx = (unsigned long)shinfo->frags[i].page;
632 netif_idx_release(pending_idx);
633 }
635 /* Remember the error: invalidate all subsequent fragments. */
636 err = newerr;
637 }
639 *mopp = mop + 1;
640 return err;
641 }
643 static void netbk_fill_frags(struct sk_buff *skb)
644 {
645 struct skb_shared_info *shinfo = skb_shinfo(skb);
646 int nr_frags = shinfo->nr_frags;
647 int i;
649 for (i = 0; i < nr_frags; i++) {
650 skb_frag_t *frag = shinfo->frags + i;
651 netif_tx_request_t *txp;
652 unsigned long pending_idx;
654 pending_idx = (unsigned long)frag->page;
655 txp = &pending_tx_info[pending_idx].req;
656 frag->page = virt_to_page(MMAP_VADDR(pending_idx));
657 frag->size = txp->size;
658 frag->page_offset = txp->offset;
660 skb->len += txp->size;
661 skb->data_len += txp->size;
662 skb->truesize += txp->size;
663 }
664 }
666 int netbk_get_extras(netif_t *netif, struct netif_extra_info *extras,
667 int work_to_do)
668 {
669 struct netif_extra_info *extra;
670 RING_IDX cons = netif->tx.req_cons;
672 do {
673 if (unlikely(work_to_do-- <= 0)) {
674 DPRINTK("Missing extra info\n");
675 return -EBADR;
676 }
678 extra = (struct netif_extra_info *)
679 RING_GET_REQUEST(&netif->tx, cons);
680 if (unlikely(!extra->type ||
681 extra->type >= XEN_NETIF_EXTRA_TYPE_MAX)) {
682 netif->tx.req_cons = ++cons;
683 DPRINTK("Invalid extra type: %d\n", extra->type);
684 return -EINVAL;
685 }
687 memcpy(&extras[extra->type - 1], extra, sizeof(*extra));
688 netif->tx.req_cons = ++cons;
689 } while (extra->flags & XEN_NETIF_EXTRA_FLAG_MORE);
691 return work_to_do;
692 }
694 static int netbk_set_skb_gso(struct sk_buff *skb, struct netif_extra_info *gso)
695 {
696 if (!gso->u.gso.size) {
697 DPRINTK("GSO size must not be zero.\n");
698 return -EINVAL;
699 }
701 /* Currently only TCPv4 S.O. is supported. */
702 if (gso->u.gso.type != XEN_NETIF_GSO_TYPE_TCPV4) {
703 DPRINTK("Bad GSO type %d.\n", gso->u.gso.type);
704 return -EINVAL;
705 }
707 skb_shinfo(skb)->gso_size = gso->u.gso.size;
708 skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4;
710 /* Header must be checked, and gso_segs computed. */
711 skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY;
712 skb_shinfo(skb)->gso_segs = 0;
714 return 0;
715 }
717 /* Called after netfront has transmitted */
718 static void net_tx_action(unsigned long unused)
719 {
720 struct list_head *ent;
721 struct sk_buff *skb;
722 netif_t *netif;
723 netif_tx_request_t txreq;
724 struct netif_extra_info extras[XEN_NETIF_EXTRA_TYPE_MAX - 1];
725 u16 pending_idx;
726 RING_IDX i;
727 gnttab_map_grant_ref_t *mop;
728 unsigned int data_len;
729 int ret, work_to_do;
731 if (dealloc_cons != dealloc_prod)
732 net_tx_action_dealloc();
734 mop = tx_map_ops;
735 while (((NR_PENDING_REQS + MAX_SKB_FRAGS) < MAX_PENDING_REQS) &&
736 !list_empty(&net_schedule_list)) {
737 /* Get a netif from the list with work to do. */
738 ent = net_schedule_list.next;
739 netif = list_entry(ent, netif_t, list);
740 netif_get(netif);
741 remove_from_net_schedule_list(netif);
743 RING_FINAL_CHECK_FOR_REQUESTS(&netif->tx, work_to_do);
744 if (!work_to_do) {
745 netif_put(netif);
746 continue;
747 }
749 i = netif->tx.req_cons;
750 rmb(); /* Ensure that we see the request before we copy it. */
751 memcpy(&txreq, RING_GET_REQUEST(&netif->tx, i), sizeof(txreq));
752 /* Credit-based scheduling. */
753 if (txreq.size > netif->remaining_credit) {
754 unsigned long now = jiffies;
755 unsigned long next_credit =
756 netif->credit_timeout.expires +
757 msecs_to_jiffies(netif->credit_usec / 1000);
759 /* Timer could already be pending in rare cases. */
760 if (timer_pending(&netif->credit_timeout))
761 break;
763 /* Passed the point where we can replenish credit? */
764 if (time_after_eq(now, next_credit)) {
765 netif->credit_timeout.expires = now;
766 netif->remaining_credit = netif->credit_bytes;
767 }
769 /* Still too big to send right now? Set a callback. */
770 if (txreq.size > netif->remaining_credit) {
771 netif->remaining_credit = 0;
772 netif->credit_timeout.data =
773 (unsigned long)netif;
774 netif->credit_timeout.function =
775 tx_credit_callback;
776 __mod_timer(&netif->credit_timeout,
777 next_credit);
778 break;
779 }
780 }
781 netif->remaining_credit -= txreq.size;
783 work_to_do--;
784 netif->tx.req_cons = ++i;
786 memset(extras, 0, sizeof(extras));
787 if (txreq.flags & NETTXF_extra_info) {
788 work_to_do = netbk_get_extras(netif, extras,
789 work_to_do);
790 i = netif->tx.req_cons;
791 if (unlikely(work_to_do < 0)) {
792 netbk_tx_err(netif, &txreq, i);
793 continue;
794 }
795 }
797 ret = netbk_count_requests(netif, &txreq, work_to_do);
798 if (unlikely(ret < 0)) {
799 netbk_tx_err(netif, &txreq, i - ret);
800 continue;
801 }
802 i += ret;
804 if (unlikely(ret > MAX_SKB_FRAGS)) {
805 DPRINTK("Too many frags\n");
806 netbk_tx_err(netif, &txreq, i);
807 continue;
808 }
810 if (unlikely(txreq.size < ETH_HLEN)) {
811 DPRINTK("Bad packet size: %d\n", txreq.size);
812 netbk_tx_err(netif, &txreq, i);
813 continue;
814 }
816 /* No crossing a page as the payload mustn't fragment. */
817 if (unlikely((txreq.offset + txreq.size) > PAGE_SIZE)) {
818 DPRINTK("txreq.offset: %x, size: %u, end: %lu\n",
819 txreq.offset, txreq.size,
820 (txreq.offset &~PAGE_MASK) + txreq.size);
821 netbk_tx_err(netif, &txreq, i);
822 continue;
823 }
825 pending_idx = pending_ring[MASK_PEND_IDX(pending_cons)];
827 data_len = (txreq.size > PKT_PROT_LEN &&
828 ret < MAX_SKB_FRAGS) ?
829 PKT_PROT_LEN : txreq.size;
831 skb = alloc_skb(data_len+16, GFP_ATOMIC);
832 if (unlikely(skb == NULL)) {
833 DPRINTK("Can't allocate a skb in start_xmit.\n");
834 netbk_tx_err(netif, &txreq, i);
835 break;
836 }
838 /* Packets passed to netif_rx() must have some headroom. */
839 skb_reserve(skb, 16);
841 if (extras[XEN_NETIF_EXTRA_TYPE_GSO - 1].type) {
842 struct netif_extra_info *gso;
843 gso = &extras[XEN_NETIF_EXTRA_TYPE_GSO - 1];
845 if (netbk_set_skb_gso(skb, gso)) {
846 kfree_skb(skb);
847 netbk_tx_err(netif, &txreq, i);
848 continue;
849 }
850 }
852 gnttab_set_map_op(mop, MMAP_VADDR(pending_idx),
853 GNTMAP_host_map | GNTMAP_readonly,
854 txreq.gref, netif->domid);
855 mop++;
857 memcpy(&pending_tx_info[pending_idx].req,
858 &txreq, sizeof(txreq));
859 pending_tx_info[pending_idx].netif = netif;
860 *((u16 *)skb->data) = pending_idx;
862 __skb_put(skb, data_len);
864 skb_shinfo(skb)->nr_frags = ret;
865 if (data_len < txreq.size) {
866 skb_shinfo(skb)->nr_frags++;
867 skb_shinfo(skb)->frags[0].page =
868 (void *)(unsigned long)pending_idx;
869 }
871 __skb_queue_tail(&tx_queue, skb);
873 pending_cons++;
875 mop = netbk_get_requests(netif, skb, mop);
877 netif->tx.req_cons = i;
878 netif_schedule_work(netif);
880 if ((mop - tx_map_ops) >= ARRAY_SIZE(tx_map_ops))
881 break;
882 }
884 if (mop == tx_map_ops)
885 return;
887 ret = HYPERVISOR_grant_table_op(
888 GNTTABOP_map_grant_ref, tx_map_ops, mop - tx_map_ops);
889 BUG_ON(ret);
891 mop = tx_map_ops;
892 while ((skb = __skb_dequeue(&tx_queue)) != NULL) {
893 netif_tx_request_t *txp;
895 pending_idx = *((u16 *)skb->data);
896 netif = pending_tx_info[pending_idx].netif;
897 txp = &pending_tx_info[pending_idx].req;
899 /* Check the remap error code. */
900 if (unlikely(netbk_tx_check_mop(skb, &mop))) {
901 printk(KERN_ALERT "#### netback grant fails\n");
902 skb_shinfo(skb)->nr_frags = 0;
903 kfree_skb(skb);
904 continue;
905 }
907 data_len = skb->len;
908 memcpy(skb->data,
909 (void *)(MMAP_VADDR(pending_idx)|txp->offset),
910 data_len);
911 if (data_len < txp->size) {
912 /* Append the packet payload as a fragment. */
913 txp->offset += data_len;
914 txp->size -= data_len;
915 } else {
916 /* Schedule a response immediately. */
917 netif_idx_release(pending_idx);
918 }
920 /*
921 * Old frontends do not assert data_validated but we
922 * can infer it from csum_blank so test both flags.
923 */
924 if (txp->flags & (NETTXF_data_validated|NETTXF_csum_blank)) {
925 skb->ip_summed = CHECKSUM_UNNECESSARY;
926 skb->proto_data_valid = 1;
927 } else {
928 skb->ip_summed = CHECKSUM_NONE;
929 skb->proto_data_valid = 0;
930 }
931 skb->proto_csum_blank = !!(txp->flags & NETTXF_csum_blank);
933 netbk_fill_frags(skb);
935 skb->dev = netif->dev;
936 skb->protocol = eth_type_trans(skb, skb->dev);
938 netif->stats.rx_bytes += skb->len;
939 netif->stats.rx_packets++;
941 netif_rx(skb);
942 netif->dev->last_rx = jiffies;
943 }
944 }
946 static void netif_idx_release(u16 pending_idx)
947 {
948 static DEFINE_SPINLOCK(_lock);
949 unsigned long flags;
951 spin_lock_irqsave(&_lock, flags);
952 dealloc_ring[MASK_PEND_IDX(dealloc_prod)] = pending_idx;
953 /* Sync with net_tx_action_dealloc: insert idx /then/ incr producer. */
954 smp_wmb();
955 dealloc_prod++;
956 spin_unlock_irqrestore(&_lock, flags);
958 tasklet_schedule(&net_tx_tasklet);
959 }
961 static void netif_page_release(struct page *page)
962 {
963 u16 pending_idx = page - virt_to_page(mmap_vstart);
965 /* Ready for next use. */
966 set_page_count(page, 1);
968 netif_idx_release(pending_idx);
969 }
971 irqreturn_t netif_be_int(int irq, void *dev_id, struct pt_regs *regs)
972 {
973 netif_t *netif = dev_id;
974 add_to_net_schedule_list_tail(netif);
975 maybe_schedule_tx_action();
976 return IRQ_HANDLED;
977 }
979 static void make_tx_response(netif_t *netif,
980 netif_tx_request_t *txp,
981 s8 st)
982 {
983 RING_IDX i = netif->tx.rsp_prod_pvt;
984 netif_tx_response_t *resp;
985 int notify;
987 resp = RING_GET_RESPONSE(&netif->tx, i);
988 resp->id = txp->id;
989 resp->status = st;
991 if (txp->flags & NETTXF_extra_info)
992 RING_GET_RESPONSE(&netif->tx, ++i)->status = NETIF_RSP_NULL;
994 netif->tx.rsp_prod_pvt = ++i;
995 RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&netif->tx, notify);
996 if (notify)
997 notify_remote_via_irq(netif->irq);
999 #ifdef CONFIG_XEN_NETDEV_PIPELINED_TRANSMITTER
1000 if (i == netif->tx.req_cons) {
1001 int more_to_do;
1002 RING_FINAL_CHECK_FOR_REQUESTS(&netif->tx, more_to_do);
1003 if (more_to_do)
1004 add_to_net_schedule_list_tail(netif);
1006 #endif
1009 static int make_rx_response(netif_t *netif,
1010 u16 id,
1011 s8 st,
1012 u16 offset,
1013 u16 size,
1014 u16 flags)
1016 RING_IDX i = netif->rx.rsp_prod_pvt;
1017 netif_rx_response_t *resp;
1018 int notify;
1020 resp = RING_GET_RESPONSE(&netif->rx, i);
1021 resp->offset = offset;
1022 resp->flags = flags;
1023 resp->id = id;
1024 resp->status = (s16)size;
1025 if (st < 0)
1026 resp->status = (s16)st;
1028 netif->rx.rsp_prod_pvt = ++i;
1029 RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&netif->rx, notify);
1031 return notify;
1034 #ifdef NETBE_DEBUG_INTERRUPT
1035 static irqreturn_t netif_be_dbg(int irq, void *dev_id, struct pt_regs *regs)
1037 struct list_head *ent;
1038 netif_t *netif;
1039 int i = 0;
1041 printk(KERN_ALERT "netif_schedule_list:\n");
1042 spin_lock_irq(&net_schedule_list_lock);
1044 list_for_each (ent, &net_schedule_list) {
1045 netif = list_entry(ent, netif_t, list);
1046 printk(KERN_ALERT " %d: private(rx_req_cons=%08x "
1047 "rx_resp_prod=%08x\n",
1048 i, netif->rx.req_cons, netif->rx.rsp_prod_pvt);
1049 printk(KERN_ALERT " tx_req_cons=%08x tx_resp_prod=%08x)\n",
1050 netif->tx.req_cons, netif->tx.rsp_prod_pvt);
1051 printk(KERN_ALERT " shared(rx_req_prod=%08x "
1052 "rx_resp_prod=%08x\n",
1053 netif->rx.sring->req_prod, netif->rx.sring->rsp_prod);
1054 printk(KERN_ALERT " rx_event=%08x tx_req_prod=%08x\n",
1055 netif->rx.sring->rsp_event, netif->tx.sring->req_prod);
1056 printk(KERN_ALERT " tx_resp_prod=%08x, tx_event=%08x)\n",
1057 netif->tx.sring->rsp_prod, netif->tx.sring->rsp_event);
1058 i++;
1061 spin_unlock_irq(&net_schedule_list_lock);
1062 printk(KERN_ALERT " ** End of netif_schedule_list **\n");
1064 return IRQ_HANDLED;
1066 #endif
1068 static int __init netback_init(void)
1070 int i;
1071 struct page *page;
1073 if (!is_running_on_xen())
1074 return -ENODEV;
1076 /* We can increase reservation by this much in net_rx_action(). */
1077 balloon_update_driver_allowance(NET_RX_RING_SIZE);
1079 skb_queue_head_init(&rx_queue);
1080 skb_queue_head_init(&tx_queue);
1082 init_timer(&net_timer);
1083 net_timer.data = 0;
1084 net_timer.function = net_alarm;
1086 page = balloon_alloc_empty_page_range(MAX_PENDING_REQS);
1087 BUG_ON(page == NULL);
1088 mmap_vstart = (unsigned long)pfn_to_kaddr(page_to_pfn(page));
1090 for (i = 0; i < MAX_PENDING_REQS; i++) {
1091 page = virt_to_page(MMAP_VADDR(i));
1092 set_page_count(page, 1);
1093 SetPageForeign(page, netif_page_release);
1096 pending_cons = 0;
1097 pending_prod = MAX_PENDING_REQS;
1098 for (i = 0; i < MAX_PENDING_REQS; i++)
1099 pending_ring[i] = i;
1101 spin_lock_init(&net_schedule_list_lock);
1102 INIT_LIST_HEAD(&net_schedule_list);
1104 netif_xenbus_init();
1106 #ifdef NETBE_DEBUG_INTERRUPT
1107 (void)bind_virq_to_irqhandler(
1108 VIRQ_DEBUG,
1109 0,
1110 netif_be_dbg,
1111 SA_SHIRQ,
1112 "net-be-dbg",
1113 &netif_be_dbg);
1114 #endif
1116 return 0;
1119 module_init(netback_init);
1121 MODULE_LICENSE("Dual BSD/GPL");