ia64/xen-unstable

annotate linux-2.6-xen-sparse/drivers/xen/netback/netback.c @ 10040:91c77df11b43

When we copy packet sin netback/netfront make sure the new skb has
all the necessary fields initialised. In particular, before we were
not copying ip_summed and that screws up checksum offload.

Signed-off-by: Keir Fraser <keir@xensource.com>
author kaf24@firebug.cl.cam.ac.uk
date Wed May 10 17:30:42 2006 +0100 (2006-05-10)
parents 60f7b567bb2b
children 48c0f5489d44
rev   line source
cl349@4087 1 /******************************************************************************
cl349@4087 2 * drivers/xen/netback/netback.c
cl349@4087 3 *
cl349@4087 4 * Back-end of the driver for virtual network devices. This portion of the
cl349@4087 5 * driver exports a 'unified' network-device interface that can be accessed
cl349@4087 6 * by any operating system that implements a compatible front end. A
cl349@4087 7 * reference front-end implementation can be found in:
cl349@4087 8 * drivers/xen/netfront/netfront.c
cl349@4087 9 *
cl349@4112 10 * Copyright (c) 2002-2005, K A Fraser
kaf24@9386 11 *
kaf24@9386 12 * This program is free software; you can redistribute it and/or
kaf24@9386 13 * modify it under the terms of the GNU General Public License version 2
kaf24@9386 14 * as published by the Free Software Foundation; or, when distributed
kaf24@9386 15 * separately from the Linux kernel or incorporated into other
kaf24@9386 16 * software packages, subject to the following license:
kaf24@9386 17 *
kaf24@9386 18 * Permission is hereby granted, free of charge, to any person obtaining a copy
kaf24@9386 19 * of this source file (the "Software"), to deal in the Software without
kaf24@9386 20 * restriction, including without limitation the rights to use, copy, modify,
kaf24@9386 21 * merge, publish, distribute, sublicense, and/or sell copies of the Software,
kaf24@9386 22 * and to permit persons to whom the Software is furnished to do so, subject to
kaf24@9386 23 * the following conditions:
kaf24@9386 24 *
kaf24@9386 25 * The above copyright notice and this permission notice shall be included in
kaf24@9386 26 * all copies or substantial portions of the Software.
kaf24@9386 27 *
kaf24@9386 28 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
kaf24@9386 29 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
kaf24@9386 30 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
kaf24@9386 31 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
kaf24@9386 32 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
kaf24@9386 33 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
kaf24@9386 34 * IN THE SOFTWARE.
cl349@4087 35 */
cl349@4087 36
cl349@4087 37 #include "common.h"
cl349@8706 38 #include <xen/balloon.h>
cl349@8706 39 #include <xen/interface/memory.h>
cl349@4112 40
kaf24@8519 41 /*#define NETBE_DEBUG_INTERRUPT*/
vh249@5844 42
cl349@4087 43 static void netif_idx_release(u16 pending_idx);
cl349@4087 44 static void netif_page_release(struct page *page);
cl349@4087 45 static void make_tx_response(netif_t *netif,
cl349@4087 46 u16 id,
cl349@4087 47 s8 st);
cl349@4087 48 static int make_rx_response(netif_t *netif,
cl349@4087 49 u16 id,
cl349@4087 50 s8 st,
kaf24@7019 51 u16 offset,
kaf24@5077 52 u16 size,
kaf24@8179 53 u16 flags);
cl349@4087 54
cl349@4087 55 static void net_tx_action(unsigned long unused);
cl349@4087 56 static DECLARE_TASKLET(net_tx_tasklet, net_tx_action, 0);
cl349@4087 57
cl349@4087 58 static void net_rx_action(unsigned long unused);
cl349@4087 59 static DECLARE_TASKLET(net_rx_tasklet, net_rx_action, 0);
cl349@4087 60
cl349@4087 61 static struct timer_list net_timer;
cl349@4087 62
smh22@6175 63 #define MAX_PENDING_REQS 256
smh22@6175 64
cl349@4087 65 static struct sk_buff_head rx_queue;
kaf24@8659 66 static multicall_entry_t rx_mcl[NET_RX_RING_SIZE+1];
kaf24@8164 67 static mmu_update_t rx_mmu[NET_RX_RING_SIZE];
kaf24@8659 68 static gnttab_transfer_t grant_rx_op[NET_RX_RING_SIZE];
kaf24@7129 69 static unsigned char rx_notify[NR_IRQS];
cl349@4087 70
cl349@4087 71 static unsigned long mmap_vstart;
cl349@4087 72 #define MMAP_VADDR(_req) (mmap_vstart + ((_req) * PAGE_SIZE))
cl349@4087 73
cl349@4087 74 #define PKT_PROT_LEN 64
cl349@4087 75
cl349@4087 76 static struct {
kaf24@6910 77 netif_tx_request_t req;
kaf24@6910 78 netif_t *netif;
cl349@4087 79 } pending_tx_info[MAX_PENDING_REQS];
cl349@4087 80 static u16 pending_ring[MAX_PENDING_REQS];
cl349@4087 81 typedef unsigned int PEND_RING_IDX;
cl349@4087 82 #define MASK_PEND_IDX(_i) ((_i)&(MAX_PENDING_REQS-1))
cl349@4087 83 static PEND_RING_IDX pending_prod, pending_cons;
cl349@4087 84 #define NR_PENDING_REQS (MAX_PENDING_REQS - pending_prod + pending_cons)
cl349@4087 85
cl349@4087 86 /* Freed TX SKBs get batched on this ring before return to pending_ring. */
cl349@4087 87 static u16 dealloc_ring[MAX_PENDING_REQS];
cl349@4087 88 static PEND_RING_IDX dealloc_prod, dealloc_cons;
cl349@4087 89
cl349@4087 90 static struct sk_buff_head tx_queue;
vh249@5844 91
kaf24@8137 92 static grant_handle_t grant_tx_handle[MAX_PENDING_REQS];
kaf24@6163 93 static gnttab_unmap_grant_ref_t tx_unmap_ops[MAX_PENDING_REQS];
kaf24@6163 94 static gnttab_map_grant_ref_t tx_map_ops[MAX_PENDING_REQS];
shand@6593 95
cl349@4087 96 static struct list_head net_schedule_list;
cl349@4087 97 static spinlock_t net_schedule_list_lock;
cl349@4087 98
cl349@4087 99 #define MAX_MFN_ALLOC 64
cl349@4087 100 static unsigned long mfn_list[MAX_MFN_ALLOC];
cl349@4087 101 static unsigned int alloc_index = 0;
cl349@4087 102 static spinlock_t mfn_lock = SPIN_LOCK_UNLOCKED;
cl349@4087 103
cl349@4087 104 static unsigned long alloc_mfn(void)
cl349@4087 105 {
kaf24@6910 106 unsigned long mfn = 0, flags;
kaf24@6910 107 struct xen_memory_reservation reservation = {
kaf24@6910 108 .nr_extents = MAX_MFN_ALLOC,
kaf24@6910 109 .extent_order = 0,
kaf24@6910 110 .domid = DOMID_SELF
kaf24@6910 111 };
kaf24@9881 112 set_xen_guest_handle(reservation.extent_start, mfn_list);
kaf24@6910 113 spin_lock_irqsave(&mfn_lock, flags);
kaf24@6910 114 if ( unlikely(alloc_index == 0) )
kaf24@6910 115 alloc_index = HYPERVISOR_memory_op(
kaf24@6910 116 XENMEM_increase_reservation, &reservation);
kaf24@6910 117 if ( alloc_index != 0 )
kaf24@6910 118 mfn = mfn_list[--alloc_index];
kaf24@6910 119 spin_unlock_irqrestore(&mfn_lock, flags);
kaf24@6910 120 return mfn;
cl349@4087 121 }
cl349@4087 122
cl349@4087 123 static inline void maybe_schedule_tx_action(void)
cl349@4087 124 {
kaf24@6910 125 smp_mb();
kaf24@6910 126 if ((NR_PENDING_REQS < (MAX_PENDING_REQS/2)) &&
kaf24@6910 127 !list_empty(&net_schedule_list))
kaf24@6910 128 tasklet_schedule(&net_tx_tasklet);
cl349@4087 129 }
cl349@4087 130
cl349@4087 131 /*
cl349@4087 132 * A gross way of confirming the origin of an skb data page. The slab
cl349@4087 133 * allocator abuses a field in the page struct to cache the kmem_cache_t ptr.
cl349@4087 134 */
cl349@4087 135 static inline int is_xen_skb(struct sk_buff *skb)
cl349@4087 136 {
kaf24@6910 137 extern kmem_cache_t *skbuff_cachep;
kaf24@6910 138 kmem_cache_t *cp = (kmem_cache_t *)virt_to_page(skb->head)->lru.next;
kaf24@6910 139 return (cp == skbuff_cachep);
cl349@4087 140 }
cl349@4087 141
cl349@4087 142 int netif_be_start_xmit(struct sk_buff *skb, struct net_device *dev)
cl349@4087 143 {
kaf24@6910 144 netif_t *netif = netdev_priv(dev);
cl349@4087 145
vhanquez@8619 146 BUG_ON(skb->dev != dev);
cl349@4087 147
kaf24@6910 148 /* Drop the packet if the target domain has no receive buffers. */
kaf24@6910 149 if (!netif->active ||
kaf24@8164 150 (netif->rx_req_cons_peek == netif->rx.sring->req_prod) ||
kaf24@8164 151 ((netif->rx_req_cons_peek - netif->rx.rsp_prod_pvt) ==
kaf24@8164 152 NET_RX_RING_SIZE))
kaf24@6910 153 goto drop;
cl349@4087 154
kaf24@6910 155 /*
kaf24@6910 156 * We do not copy the packet unless:
kaf24@6910 157 * 1. The data is shared; or
kaf24@6910 158 * 2. The data is not allocated from our special cache.
kaf24@6910 159 * NB. We also couldn't cope with fragmented packets, but we won't get
kaf24@6910 160 * any because we not advertise the NETIF_F_SG feature.
kaf24@6910 161 */
kaf24@6910 162 if (skb_shared(skb) || skb_cloned(skb) || !is_xen_skb(skb)) {
kaf24@6910 163 int hlen = skb->data - skb->head;
kaf24@7535 164 int ret;
kaf24@6910 165 struct sk_buff *nskb = dev_alloc_skb(hlen + skb->len);
kaf24@6910 166 if ( unlikely(nskb == NULL) )
kaf24@6910 167 goto drop;
kaf24@6910 168 skb_reserve(nskb, hlen);
kaf24@6910 169 __skb_put(nskb, skb->len);
kaf24@7535 170 ret = skb_copy_bits(skb, -hlen, nskb->data - hlen,
kaf24@7535 171 skb->len + hlen);
kaf24@7535 172 BUG_ON(ret);
kaf24@10040 173 /* Copy only the header fields we use in this driver. */
kaf24@6910 174 nskb->dev = skb->dev;
kaf24@10040 175 nskb->ip_summed = skb->ip_summed;
kaf24@9567 176 nskb->proto_data_valid = skb->proto_data_valid;
kaf24@6910 177 dev_kfree_skb(skb);
kaf24@6910 178 skb = nskb;
kaf24@6910 179 }
kaf24@7019 180
kaf24@8164 181 netif->rx_req_cons_peek++;
kaf24@6910 182 netif_get(netif);
cl349@4087 183
kaf24@6910 184 skb_queue_tail(&rx_queue, skb);
kaf24@6910 185 tasklet_schedule(&net_rx_tasklet);
cl349@4087 186
kaf24@6910 187 return 0;
cl349@4087 188
cl349@4087 189 drop:
kaf24@6910 190 netif->stats.tx_dropped++;
kaf24@6910 191 dev_kfree_skb(skb);
kaf24@6910 192 return 0;
cl349@4087 193 }
cl349@4087 194
cl349@4087 195 #if 0
cl349@4087 196 static void xen_network_done_notify(void)
cl349@4087 197 {
kaf24@6910 198 static struct net_device *eth0_dev = NULL;
kaf24@6910 199 if (unlikely(eth0_dev == NULL))
kaf24@6910 200 eth0_dev = __dev_get_by_name("eth0");
kaf24@6910 201 netif_rx_schedule(eth0_dev);
cl349@4087 202 }
cl349@4087 203 /*
cl349@4087 204 * Add following to poll() function in NAPI driver (Tigon3 is example):
cl349@4087 205 * if ( xen_network_done() )
cl349@4087 206 * tg3_enable_ints(tp);
cl349@4087 207 */
cl349@4087 208 int xen_network_done(void)
cl349@4087 209 {
kaf24@6910 210 return skb_queue_empty(&rx_queue);
cl349@4087 211 }
cl349@4087 212 #endif
cl349@4087 213
cl349@4087 214 static void net_rx_action(unsigned long unused)
cl349@4087 215 {
kaf24@6910 216 netif_t *netif = NULL;
kaf24@6910 217 s8 status;
kaf24@9567 218 u16 size, id, irq, flags;
kaf24@6910 219 multicall_entry_t *mcl;
kaf24@6910 220 mmu_update_t *mmu;
kaf24@6910 221 gnttab_transfer_t *gop;
kaf24@6910 222 unsigned long vdata, old_mfn, new_mfn;
kaf24@6910 223 struct sk_buff_head rxq;
kaf24@6910 224 struct sk_buff *skb;
kaf24@8164 225 u16 notify_list[NET_RX_RING_SIZE];
kaf24@6910 226 int notify_nr = 0;
kaf24@7535 227 int ret;
cl349@4087 228
kaf24@6910 229 skb_queue_head_init(&rxq);
cl349@4087 230
kaf24@6910 231 mcl = rx_mcl;
kaf24@6910 232 mmu = rx_mmu;
kaf24@6910 233 gop = grant_rx_op;
vh249@5844 234
kaf24@6910 235 while ((skb = skb_dequeue(&rx_queue)) != NULL) {
kaf24@6910 236 netif = netdev_priv(skb->dev);
kaf24@6910 237 vdata = (unsigned long)skb->data;
kaf24@6910 238 old_mfn = virt_to_mfn(vdata);
cl349@4087 239
kaf24@9904 240 if (!xen_feature(XENFEAT_auto_translated_physmap)) {
kaf24@9904 241 /* Memory squeeze? Back off for an arbitrary while. */
kaf24@9904 242 if ((new_mfn = alloc_mfn()) == 0) {
kaf24@9904 243 if ( net_ratelimit() )
kaf24@9904 244 WPRINTK("Memory squeeze in netback "
kaf24@9904 245 "driver.\n");
kaf24@9904 246 mod_timer(&net_timer, jiffies + HZ);
kaf24@9904 247 skb_queue_head(&rx_queue, skb);
kaf24@9904 248 break;
kaf24@9904 249 }
kaf24@9904 250 /*
kaf24@9904 251 * Set the new P2M table entry before reassigning
kaf24@9904 252 * the old data page. Heed the comment in
kaf24@9904 253 * pgtable-2level.h:pte_page(). :-)
kaf24@9904 254 */
kaf24@9904 255 set_phys_to_machine(
kaf24@9904 256 __pa(skb->data) >> PAGE_SHIFT,
kaf24@9904 257 new_mfn);
kaf24@9904 258
kaf24@9904 259 MULTI_update_va_mapping(mcl, vdata,
kaf24@9904 260 pfn_pte_ma(new_mfn,
kaf24@9904 261 PAGE_KERNEL), 0);
kaf24@9904 262 mcl++;
kaf24@9904 263
kaf24@9904 264 mmu->ptr = ((maddr_t)new_mfn << PAGE_SHIFT) |
kaf24@9904 265 MMU_MACHPHYS_UPDATE;
kaf24@9904 266 mmu->val = __pa(vdata) >> PAGE_SHIFT;
kaf24@9904 267 mmu++;
kaf24@6910 268 }
cl349@4087 269
kaf24@6910 270 gop->mfn = old_mfn;
kaf24@6910 271 gop->domid = netif->domid;
kaf24@8164 272 gop->ref = RING_GET_REQUEST(
kaf24@8164 273 &netif->rx, netif->rx.req_cons)->gref;
kaf24@8164 274 netif->rx.req_cons++;
kaf24@6910 275 gop++;
cl349@4087 276
kaf24@6910 277 __skb_queue_tail(&rxq, skb);
cl349@4087 278
kaf24@6910 279 /* Filled the batch queue? */
kaf24@8659 280 if ((gop - grant_rx_op) == ARRAY_SIZE(grant_rx_op))
kaf24@6910 281 break;
kaf24@6910 282 }
cl349@4087 283
kaf24@9904 284 if (!xen_feature(XENFEAT_auto_translated_physmap)) {
kaf24@9904 285 if (mcl == rx_mcl)
kaf24@9904 286 return;
cl349@4087 287
kaf24@9904 288 mcl[-1].args[MULTI_UVMFLAGS_INDEX] = UVMF_TLB_FLUSH|UVMF_ALL;
kaf24@4388 289
kaf24@9904 290 if (mmu - rx_mmu) {
kaf24@9904 291 mcl->op = __HYPERVISOR_mmu_update;
kaf24@9904 292 mcl->args[0] = (unsigned long)rx_mmu;
kaf24@9904 293 mcl->args[1] = mmu - rx_mmu;
kaf24@9904 294 mcl->args[2] = 0;
kaf24@9904 295 mcl->args[3] = DOMID_SELF;
kaf24@9904 296 mcl++;
kaf24@9904 297 }
kaf24@9904 298
kaf24@9904 299 ret = HYPERVISOR_multicall(rx_mcl, mcl - rx_mcl);
kaf24@9904 300 BUG_ON(ret != 0);
Ian@8732 301 }
Ian@8732 302
kaf24@7953 303 ret = HYPERVISOR_grant_table_op(GNTTABOP_transfer, grant_rx_op,
kaf24@7953 304 gop - grant_rx_op);
kaf24@7953 305 BUG_ON(ret != 0);
kaf24@7953 306
kaf24@6910 307 mcl = rx_mcl;
kaf24@6910 308 gop = grant_rx_op;
kaf24@6910 309 while ((skb = __skb_dequeue(&rxq)) != NULL) {
kaf24@6910 310 netif = netdev_priv(skb->dev);
kaf24@6910 311 size = skb->tail - skb->data;
cl349@4087 312
kaf24@6910 313 atomic_set(&(skb_shinfo(skb)->dataref), 1);
kaf24@6910 314 skb_shinfo(skb)->nr_frags = 0;
kaf24@6910 315 skb_shinfo(skb)->frag_list = NULL;
cl349@4087 316
kaf24@6910 317 netif->stats.tx_bytes += size;
kaf24@6910 318 netif->stats.tx_packets++;
cl349@4087 319
kaf24@9904 320 if (!xen_feature(XENFEAT_auto_translated_physmap)) {
kaf24@9904 321 /* The update_va_mapping() must not fail. */
kaf24@9904 322 BUG_ON(mcl->result != 0);
kaf24@9904 323 mcl++;
kaf24@9904 324 }
cl349@4087 325
kaf24@6910 326 /* Check the reassignment error code. */
kaf24@6910 327 status = NETIF_RSP_OKAY;
kaf24@7953 328 if (gop->status != 0) {
kaf24@6910 329 DPRINTK("Bad status %d from grant transfer to DOM%u\n",
kaf24@6910 330 gop->status, netif->domid);
kaf24@7967 331 /*
kaf24@7967 332 * Page no longer belongs to us unless GNTST_bad_page,
kaf24@7967 333 * but that should be a fatal error anyway.
kaf24@7967 334 */
kaf24@7967 335 BUG_ON(gop->status == GNTST_bad_page);
kaf24@6910 336 status = NETIF_RSP_ERROR;
kaf24@7129 337 }
kaf24@7129 338 irq = netif->irq;
kaf24@8164 339 id = RING_GET_REQUEST(&netif->rx, netif->rx.rsp_prod_pvt)->id;
kaf24@9567 340 flags = 0;
kaf24@9586 341 if (skb->ip_summed == CHECKSUM_HW) /* local packet? */
kaf24@9586 342 flags |= NETRXF_csum_blank | NETRXF_data_validated;
kaf24@9586 343 else if (skb->proto_data_valid) /* remote but checksummed? */
kaf24@9567 344 flags |= NETRXF_data_validated;
kaf24@6910 345 if (make_rx_response(netif, id, status,
kaf24@7019 346 (unsigned long)skb->data & ~PAGE_MASK,
kaf24@9567 347 size, flags) &&
kaf24@7129 348 (rx_notify[irq] == 0)) {
kaf24@7129 349 rx_notify[irq] = 1;
kaf24@7129 350 notify_list[notify_nr++] = irq;
kaf24@6910 351 }
cl349@4087 352
kaf24@6910 353 netif_put(netif);
kaf24@6910 354 dev_kfree_skb(skb);
kaf24@6910 355 gop++;
kaf24@6910 356 }
cl349@4087 357
kaf24@6910 358 while (notify_nr != 0) {
kaf24@7129 359 irq = notify_list[--notify_nr];
kaf24@7129 360 rx_notify[irq] = 0;
kaf24@7129 361 notify_remote_via_irq(irq);
kaf24@6910 362 }
cl349@4087 363
kaf24@6910 364 /* More work to do? */
kaf24@6910 365 if (!skb_queue_empty(&rx_queue) && !timer_pending(&net_timer))
kaf24@6910 366 tasklet_schedule(&net_rx_tasklet);
cl349@4087 367 #if 0
kaf24@6910 368 else
kaf24@6910 369 xen_network_done_notify();
cl349@4087 370 #endif
cl349@4087 371 }
cl349@4087 372
cl349@4087 373 static void net_alarm(unsigned long unused)
cl349@4087 374 {
kaf24@6910 375 tasklet_schedule(&net_rx_tasklet);
cl349@4087 376 }
cl349@4087 377
cl349@4087 378 struct net_device_stats *netif_be_get_stats(struct net_device *dev)
cl349@4087 379 {
kaf24@6910 380 netif_t *netif = netdev_priv(dev);
kaf24@6910 381 return &netif->stats;
cl349@4087 382 }
cl349@4087 383
cl349@4087 384 static int __on_net_schedule_list(netif_t *netif)
cl349@4087 385 {
kaf24@6910 386 return netif->list.next != NULL;
cl349@4087 387 }
cl349@4087 388
cl349@4087 389 static void remove_from_net_schedule_list(netif_t *netif)
cl349@4087 390 {
kaf24@6910 391 spin_lock_irq(&net_schedule_list_lock);
kaf24@6910 392 if (likely(__on_net_schedule_list(netif))) {
kaf24@6910 393 list_del(&netif->list);
kaf24@6910 394 netif->list.next = NULL;
kaf24@6910 395 netif_put(netif);
kaf24@6910 396 }
kaf24@6910 397 spin_unlock_irq(&net_schedule_list_lock);
cl349@4087 398 }
cl349@4087 399
cl349@4087 400 static void add_to_net_schedule_list_tail(netif_t *netif)
cl349@4087 401 {
kaf24@6910 402 if (__on_net_schedule_list(netif))
kaf24@6910 403 return;
cl349@4087 404
kaf24@6910 405 spin_lock_irq(&net_schedule_list_lock);
kaf24@6910 406 if (!__on_net_schedule_list(netif) && netif->active) {
kaf24@6910 407 list_add_tail(&netif->list, &net_schedule_list);
kaf24@6910 408 netif_get(netif);
kaf24@6910 409 }
kaf24@6910 410 spin_unlock_irq(&net_schedule_list_lock);
cl349@4087 411 }
cl349@4087 412
kaf24@8164 413 /*
kaf24@8164 414 * Note on CONFIG_XEN_NETDEV_PIPELINED_TRANSMITTER:
kaf24@8164 415 * If this driver is pipelining transmit requests then we can be very
kaf24@8164 416 * aggressive in avoiding new-packet notifications -- frontend only needs to
kaf24@8164 417 * send a notification if there are no outstanding unreceived responses.
kaf24@8164 418 * If we may be buffer transmit buffers for any reason then we must be rather
kaf24@8169 419 * more conservative and treat this as the final check for pending work.
kaf24@8164 420 */
cl349@4087 421 void netif_schedule_work(netif_t *netif)
cl349@4087 422 {
kaf24@8169 423 int more_to_do;
kaf24@8169 424
kaf24@8169 425 #ifdef CONFIG_XEN_NETDEV_PIPELINED_TRANSMITTER
kaf24@8169 426 more_to_do = RING_HAS_UNCONSUMED_REQUESTS(&netif->tx);
kaf24@8169 427 #else
kaf24@8169 428 RING_FINAL_CHECK_FOR_REQUESTS(&netif->tx, more_to_do);
kaf24@8169 429 #endif
kaf24@8169 430
kaf24@8169 431 if (more_to_do) {
kaf24@6910 432 add_to_net_schedule_list_tail(netif);
kaf24@6910 433 maybe_schedule_tx_action();
kaf24@6910 434 }
cl349@4087 435 }
cl349@4087 436
cl349@4087 437 void netif_deschedule_work(netif_t *netif)
cl349@4087 438 {
kaf24@6910 439 remove_from_net_schedule_list(netif);
cl349@4087 440 }
cl349@4087 441
cl349@4112 442
cl349@4087 443 static void tx_credit_callback(unsigned long data)
cl349@4087 444 {
kaf24@6910 445 netif_t *netif = (netif_t *)data;
kaf24@6910 446 netif->remaining_credit = netif->credit_bytes;
kaf24@6910 447 netif_schedule_work(netif);
cl349@4087 448 }
cl349@4087 449
vh249@5845 450 inline static void net_tx_action_dealloc(void)
cl349@4087 451 {
kaf24@6910 452 gnttab_unmap_grant_ref_t *gop;
kaf24@6910 453 u16 pending_idx;
kaf24@6910 454 PEND_RING_IDX dc, dp;
kaf24@6910 455 netif_t *netif;
kaf24@7535 456 int ret;
cl349@4087 457
kaf24@6910 458 dc = dealloc_cons;
kaf24@6910 459 dp = dealloc_prod;
cl349@4087 460
kaf24@6910 461 /*
kaf24@6910 462 * Free up any grants we have finished using
kaf24@6910 463 */
kaf24@6910 464 gop = tx_unmap_ops;
kaf24@6910 465 while (dc != dp) {
kaf24@6910 466 pending_idx = dealloc_ring[MASK_PEND_IDX(dc++)];
kaf24@9695 467 gnttab_set_unmap_op(gop, MMAP_VADDR(pending_idx),
kaf24@9695 468 GNTMAP_host_map,
kaf24@9695 469 grant_tx_handle[pending_idx]);
kaf24@6910 470 gop++;
kaf24@6910 471 }
kaf24@7535 472 ret = HYPERVISOR_grant_table_op(
kaf24@7535 473 GNTTABOP_unmap_grant_ref, tx_unmap_ops, gop - tx_unmap_ops);
kaf24@7535 474 BUG_ON(ret);
cl349@4087 475
kaf24@6910 476 while (dealloc_cons != dp) {
kaf24@6910 477 pending_idx = dealloc_ring[MASK_PEND_IDX(dealloc_cons++)];
cl349@4087 478
kaf24@6910 479 netif = pending_tx_info[pending_idx].netif;
cl349@4087 480
kaf24@6910 481 make_tx_response(netif, pending_tx_info[pending_idx].req.id,
kaf24@6910 482 NETIF_RSP_OKAY);
cl349@4087 483
kaf24@6910 484 pending_ring[MASK_PEND_IDX(pending_prod++)] = pending_idx;
cl349@4087 485
kaf24@6910 486 netif_put(netif);
kaf24@6910 487 }
vh249@5845 488 }
vh249@5845 489
vh249@5845 490 /* Called after netfront has transmitted */
vh249@5845 491 static void net_tx_action(unsigned long unused)
vh249@5845 492 {
kaf24@6910 493 struct list_head *ent;
kaf24@6910 494 struct sk_buff *skb;
kaf24@6910 495 netif_t *netif;
kaf24@6910 496 netif_tx_request_t txreq;
kaf24@6910 497 u16 pending_idx;
kaf24@8164 498 RING_IDX i;
kaf24@6910 499 gnttab_map_grant_ref_t *mop;
kaf24@6910 500 unsigned int data_len;
kaf24@8169 501 int ret, work_to_do;
vh249@5845 502
kaf24@6910 503 if (dealloc_cons != dealloc_prod)
kaf24@6910 504 net_tx_action_dealloc();
vh249@5845 505
kaf24@6910 506 mop = tx_map_ops;
kaf24@6910 507 while ((NR_PENDING_REQS < MAX_PENDING_REQS) &&
kaf24@6910 508 !list_empty(&net_schedule_list)) {
kaf24@6910 509 /* Get a netif from the list with work to do. */
kaf24@6910 510 ent = net_schedule_list.next;
kaf24@6910 511 netif = list_entry(ent, netif_t, list);
kaf24@6910 512 netif_get(netif);
kaf24@6910 513 remove_from_net_schedule_list(netif);
cl349@4087 514
kaf24@8169 515 RING_FINAL_CHECK_FOR_REQUESTS(&netif->tx, work_to_do);
kaf24@8169 516 if (!work_to_do) {
kaf24@6910 517 netif_put(netif);
kaf24@6910 518 continue;
kaf24@6910 519 }
cl349@4087 520
kaf24@8164 521 i = netif->tx.req_cons;
kaf24@6910 522 rmb(); /* Ensure that we see the request before we copy it. */
kaf24@8164 523 memcpy(&txreq, RING_GET_REQUEST(&netif->tx, i), sizeof(txreq));
kaf24@6910 524 /* Credit-based scheduling. */
kaf24@6910 525 if (txreq.size > netif->remaining_credit) {
kaf24@6910 526 unsigned long now = jiffies;
kaf24@6910 527 unsigned long next_credit =
kaf24@6910 528 netif->credit_timeout.expires +
kaf24@6910 529 msecs_to_jiffies(netif->credit_usec / 1000);
cl349@4112 530
kaf24@6910 531 /* Timer could already be pending in rare cases. */
kaf24@6910 532 if (timer_pending(&netif->credit_timeout))
kaf24@6910 533 break;
cl349@4112 534
kaf24@6910 535 /* Passed the point where we can replenish credit? */
kaf24@6910 536 if (time_after_eq(now, next_credit)) {
kaf24@6910 537 netif->credit_timeout.expires = now;
kaf24@6910 538 netif->remaining_credit = netif->credit_bytes;
kaf24@6910 539 }
cl349@4112 540
kaf24@6910 541 /* Still too big to send right now? Set a callback. */
kaf24@6910 542 if (txreq.size > netif->remaining_credit) {
kaf24@6910 543 netif->remaining_credit = 0;
kaf24@6910 544 netif->credit_timeout.data =
kaf24@6910 545 (unsigned long)netif;
kaf24@6910 546 netif->credit_timeout.function =
kaf24@6910 547 tx_credit_callback;
kaf24@9069 548 __mod_timer(&netif->credit_timeout,
kaf24@9069 549 next_credit);
kaf24@6910 550 break;
kaf24@6910 551 }
kaf24@6910 552 }
kaf24@6910 553 netif->remaining_credit -= txreq.size;
cl349@4112 554
kaf24@8164 555 netif->tx.req_cons++;
cl349@4087 556
kaf24@6910 557 netif_schedule_work(netif);
cl349@4087 558
kaf24@6910 559 if (unlikely(txreq.size < ETH_HLEN) ||
kaf24@6910 560 unlikely(txreq.size > ETH_FRAME_LEN)) {
kaf24@6910 561 DPRINTK("Bad packet size: %d\n", txreq.size);
kaf24@6910 562 make_tx_response(netif, txreq.id, NETIF_RSP_ERROR);
kaf24@6910 563 netif_put(netif);
kaf24@6910 564 continue;
kaf24@6910 565 }
kaf24@6910 566
kaf24@6910 567 /* No crossing a page as the payload mustn't fragment. */
kaf24@7019 568 if (unlikely((txreq.offset + txreq.size) >= PAGE_SIZE)) {
emellor@7187 569 DPRINTK("txreq.offset: %x, size: %u, end: %lu\n",
emellor@7187 570 txreq.offset, txreq.size,
emellor@7187 571 (txreq.offset &~PAGE_MASK) + txreq.size);
kaf24@6910 572 make_tx_response(netif, txreq.id, NETIF_RSP_ERROR);
kaf24@6910 573 netif_put(netif);
kaf24@6910 574 continue;
kaf24@6910 575 }
cl349@4087 576
kaf24@6910 577 pending_idx = pending_ring[MASK_PEND_IDX(pending_cons)];
kaf24@6910 578
kaf24@6910 579 data_len = (txreq.size > PKT_PROT_LEN) ?
kaf24@6910 580 PKT_PROT_LEN : txreq.size;
cl349@4087 581
kaf24@6910 582 skb = alloc_skb(data_len+16, GFP_ATOMIC);
kaf24@6910 583 if (unlikely(skb == NULL)) {
kaf24@6910 584 DPRINTK("Can't allocate a skb in start_xmit.\n");
kaf24@6910 585 make_tx_response(netif, txreq.id, NETIF_RSP_ERROR);
kaf24@6910 586 netif_put(netif);
kaf24@6910 587 break;
kaf24@6910 588 }
cl349@4087 589
kaf24@6910 590 /* Packets passed to netif_rx() must have some headroom. */
kaf24@6910 591 skb_reserve(skb, 16);
kaf24@7019 592
kaf24@9695 593 gnttab_set_map_op(mop, MMAP_VADDR(pending_idx),
kaf24@9695 594 GNTMAP_host_map | GNTMAP_readonly,
kaf24@9695 595 txreq.gref, netif->domid);
kaf24@6910 596 mop++;
cl349@4087 597
kaf24@6910 598 memcpy(&pending_tx_info[pending_idx].req,
kaf24@6910 599 &txreq, sizeof(txreq));
kaf24@6910 600 pending_tx_info[pending_idx].netif = netif;
kaf24@6910 601 *((u16 *)skb->data) = pending_idx;
cl349@4087 602
kaf24@6910 603 __skb_queue_tail(&tx_queue, skb);
cl349@4087 604
kaf24@6910 605 pending_cons++;
cl349@4087 606
kaf24@6910 607 if ((mop - tx_map_ops) >= ARRAY_SIZE(tx_map_ops))
kaf24@6910 608 break;
kaf24@6910 609 }
cl349@4087 610
kaf24@6910 611 if (mop == tx_map_ops)
kaf24@6910 612 return;
kaf24@6163 613
kaf24@7535 614 ret = HYPERVISOR_grant_table_op(
kaf24@7535 615 GNTTABOP_map_grant_ref, tx_map_ops, mop - tx_map_ops);
kaf24@7535 616 BUG_ON(ret);
kaf24@6163 617
kaf24@6910 618 mop = tx_map_ops;
kaf24@6910 619 while ((skb = __skb_dequeue(&tx_queue)) != NULL) {
kaf24@6910 620 pending_idx = *((u16 *)skb->data);
kaf24@6910 621 netif = pending_tx_info[pending_idx].netif;
kaf24@6910 622 memcpy(&txreq, &pending_tx_info[pending_idx].req,
kaf24@6910 623 sizeof(txreq));
cl349@4087 624
kaf24@6910 625 /* Check the remap error code. */
kaf24@8137 626 if (unlikely(mop->status)) {
kaf24@6910 627 printk(KERN_ALERT "#### netback grant fails\n");
kaf24@6910 628 make_tx_response(netif, txreq.id, NETIF_RSP_ERROR);
kaf24@6910 629 netif_put(netif);
kaf24@6910 630 kfree_skb(skb);
kaf24@6910 631 mop++;
kaf24@6910 632 pending_ring[MASK_PEND_IDX(pending_prod++)] =
kaf24@6910 633 pending_idx;
kaf24@6910 634 continue;
kaf24@6910 635 }
kaf24@7767 636 set_phys_to_machine(
kaf24@7767 637 __pa(MMAP_VADDR(pending_idx)) >> PAGE_SHIFT,
kaf24@7767 638 FOREIGN_FRAME(mop->dev_bus_addr >> PAGE_SHIFT));
kaf24@8137 639 grant_tx_handle[pending_idx] = mop->handle;
cl349@4087 640
kaf24@6910 641 data_len = (txreq.size > PKT_PROT_LEN) ?
kaf24@6910 642 PKT_PROT_LEN : txreq.size;
cl349@4087 643
kaf24@6910 644 __skb_put(skb, data_len);
kaf24@6910 645 memcpy(skb->data,
kaf24@7019 646 (void *)(MMAP_VADDR(pending_idx)|txreq.offset),
kaf24@6910 647 data_len);
kaf24@6910 648 if (data_len < txreq.size) {
kaf24@6910 649 /* Append the packet payload as a fragment. */
kaf24@6910 650 skb_shinfo(skb)->frags[0].page =
kaf24@6910 651 virt_to_page(MMAP_VADDR(pending_idx));
kaf24@6910 652 skb_shinfo(skb)->frags[0].size =
kaf24@6910 653 txreq.size - data_len;
kaf24@6910 654 skb_shinfo(skb)->frags[0].page_offset =
kaf24@7019 655 txreq.offset + data_len;
kaf24@6910 656 skb_shinfo(skb)->nr_frags = 1;
kaf24@6910 657 } else {
kaf24@6910 658 /* Schedule a response immediately. */
kaf24@6910 659 netif_idx_release(pending_idx);
kaf24@6910 660 }
cl349@4087 661
kaf24@6910 662 skb->data_len = txreq.size - data_len;
kaf24@6910 663 skb->len += skb->data_len;
kaf24@9946 664 skb->truesize += skb->data_len;
cl349@4087 665
kaf24@6910 666 skb->dev = netif->dev;
kaf24@6910 667 skb->protocol = eth_type_trans(skb, skb->dev);
cl349@4087 668
kaf24@9586 669 /*
kaf24@9586 670 * Old frontends do not assert data_validated but we
kaf24@9586 671 * can infer it from csum_blank so test both flags.
kaf24@9586 672 */
kaf24@9586 673 if (txreq.flags & (NETTXF_data_validated|NETTXF_csum_blank)) {
kaf24@9567 674 skb->ip_summed = CHECKSUM_UNNECESSARY;
kaf24@9567 675 skb->proto_data_valid = 1;
kaf24@9567 676 } else {
kaf24@9567 677 skb->ip_summed = CHECKSUM_NONE;
kaf24@9567 678 skb->proto_data_valid = 0;
kaf24@9567 679 }
kaf24@8179 680 skb->proto_csum_blank = !!(txreq.flags & NETTXF_csum_blank);
kaf24@5077 681
kaf24@6910 682 netif->stats.rx_bytes += txreq.size;
kaf24@6910 683 netif->stats.rx_packets++;
cl349@4087 684
kaf24@6910 685 netif_rx(skb);
kaf24@6910 686 netif->dev->last_rx = jiffies;
cl349@4087 687
kaf24@6910 688 mop++;
kaf24@6910 689 }
cl349@4087 690 }
cl349@4087 691
cl349@4087 692 static void netif_idx_release(u16 pending_idx)
cl349@4087 693 {
kaf24@6910 694 static spinlock_t _lock = SPIN_LOCK_UNLOCKED;
kaf24@6910 695 unsigned long flags;
cl349@4087 696
kaf24@6910 697 spin_lock_irqsave(&_lock, flags);
kaf24@6910 698 dealloc_ring[MASK_PEND_IDX(dealloc_prod++)] = pending_idx;
kaf24@6910 699 spin_unlock_irqrestore(&_lock, flags);
cl349@4087 700
kaf24@6910 701 tasklet_schedule(&net_tx_tasklet);
cl349@4087 702 }
cl349@4087 703
cl349@4087 704 static void netif_page_release(struct page *page)
cl349@4087 705 {
kaf24@6910 706 u16 pending_idx = page - virt_to_page(mmap_vstart);
cl349@4087 707
kaf24@6910 708 /* Ready for next use. */
kaf24@6910 709 set_page_count(page, 1);
cl349@4087 710
kaf24@6910 711 netif_idx_release(pending_idx);
cl349@4087 712 }
cl349@4087 713
cl349@4087 714 irqreturn_t netif_be_int(int irq, void *dev_id, struct pt_regs *regs)
cl349@4087 715 {
kaf24@6910 716 netif_t *netif = dev_id;
kaf24@8169 717 add_to_net_schedule_list_tail(netif);
kaf24@8169 718 maybe_schedule_tx_action();
kaf24@6910 719 return IRQ_HANDLED;
cl349@4087 720 }
cl349@4087 721
cl349@4087 722 static void make_tx_response(netif_t *netif,
cl349@4087 723 u16 id,
cl349@4087 724 s8 st)
cl349@4087 725 {
kaf24@8164 726 RING_IDX i = netif->tx.rsp_prod_pvt;
kaf24@6910 727 netif_tx_response_t *resp;
kaf24@8169 728 int notify;
cl349@4087 729
kaf24@8164 730 resp = RING_GET_RESPONSE(&netif->tx, i);
kaf24@6910 731 resp->id = id;
kaf24@6910 732 resp->status = st;
kaf24@8169 733
kaf24@8164 734 netif->tx.rsp_prod_pvt = ++i;
kaf24@8169 735 RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&netif->tx, notify);
kaf24@8169 736 if (notify)
kaf24@8169 737 notify_remote_via_irq(netif->irq);
cl349@4087 738
kaf24@8169 739 #ifdef CONFIG_XEN_NETDEV_PIPELINED_TRANSMITTER
kaf24@8169 740 if (i == netif->tx.req_cons) {
kaf24@8169 741 int more_to_do;
kaf24@8169 742 RING_FINAL_CHECK_FOR_REQUESTS(&netif->tx, more_to_do);
kaf24@8169 743 if (more_to_do)
kaf24@8169 744 add_to_net_schedule_list_tail(netif);
kaf24@8169 745 }
kaf24@8169 746 #endif
cl349@4087 747 }
cl349@4087 748
cl349@4087 749 static int make_rx_response(netif_t *netif,
cl349@4087 750 u16 id,
cl349@4087 751 s8 st,
kaf24@7019 752 u16 offset,
kaf24@5077 753 u16 size,
kaf24@8179 754 u16 flags)
cl349@4087 755 {
kaf24@8164 756 RING_IDX i = netif->rx.rsp_prod_pvt;
kaf24@6910 757 netif_rx_response_t *resp;
kaf24@8169 758 int notify;
cl349@4087 759
kaf24@8164 760 resp = RING_GET_RESPONSE(&netif->rx, i);
kaf24@7019 761 resp->offset = offset;
kaf24@8179 762 resp->flags = flags;
kaf24@6910 763 resp->id = id;
kaf24@6910 764 resp->status = (s16)size;
kaf24@6910 765 if (st < 0)
kaf24@6910 766 resp->status = (s16)st;
kaf24@8169 767
kaf24@8164 768 netif->rx.rsp_prod_pvt = ++i;
kaf24@8169 769 RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&netif->rx, notify);
cl349@4087 770
kaf24@8169 771 return notify;
cl349@4087 772 }
cl349@4087 773
kaf24@8519 774 #ifdef NETBE_DEBUG_INTERRUPT
cl349@4087 775 static irqreturn_t netif_be_dbg(int irq, void *dev_id, struct pt_regs *regs)
cl349@4087 776 {
kaf24@6910 777 struct list_head *ent;
kaf24@6910 778 netif_t *netif;
kaf24@6910 779 int i = 0;
cl349@4087 780
kaf24@6910 781 printk(KERN_ALERT "netif_schedule_list:\n");
kaf24@6910 782 spin_lock_irq(&net_schedule_list_lock);
cl349@4087 783
kaf24@6910 784 list_for_each (ent, &net_schedule_list) {
kaf24@6910 785 netif = list_entry(ent, netif_t, list);
kaf24@6910 786 printk(KERN_ALERT " %d: private(rx_req_cons=%08x "
kaf24@6910 787 "rx_resp_prod=%08x\n",
kaf24@8164 788 i, netif->rx.req_cons, netif->rx.rsp_prod_pvt);
kaf24@6910 789 printk(KERN_ALERT " tx_req_cons=%08x tx_resp_prod=%08x)\n",
kaf24@8164 790 netif->tx.req_cons, netif->tx.rsp_prod_pvt);
kaf24@6910 791 printk(KERN_ALERT " shared(rx_req_prod=%08x "
kaf24@6910 792 "rx_resp_prod=%08x\n",
kaf24@8164 793 netif->rx.sring->req_prod, netif->rx.sring->rsp_prod);
kaf24@6910 794 printk(KERN_ALERT " rx_event=%08x tx_req_prod=%08x\n",
kaf24@8164 795 netif->rx.sring->rsp_event, netif->tx.sring->req_prod);
kaf24@6910 796 printk(KERN_ALERT " tx_resp_prod=%08x, tx_event=%08x)\n",
kaf24@8164 797 netif->tx.sring->rsp_prod, netif->tx.sring->rsp_event);
kaf24@6910 798 i++;
kaf24@6910 799 }
cl349@4087 800
kaf24@6910 801 spin_unlock_irq(&net_schedule_list_lock);
kaf24@6910 802 printk(KERN_ALERT " ** End of netif_schedule_list **\n");
cl349@4087 803
kaf24@6910 804 return IRQ_HANDLED;
cl349@4087 805 }
kaf24@8519 806 #endif
cl349@4087 807
cl349@4087 808 static int __init netback_init(void)
cl349@4087 809 {
kaf24@6910 810 int i;
kaf24@6910 811 struct page *page;
cl349@4087 812
kaf24@6910 813 /* We can increase reservation by this much in net_rx_action(). */
kaf24@8164 814 balloon_update_driver_allowance(NET_RX_RING_SIZE);
cl349@4087 815
kaf24@6910 816 skb_queue_head_init(&rx_queue);
kaf24@6910 817 skb_queue_head_init(&tx_queue);
cl349@4087 818
kaf24@6910 819 init_timer(&net_timer);
kaf24@6910 820 net_timer.data = 0;
kaf24@6910 821 net_timer.function = net_alarm;
cl349@4087 822
kaf24@6910 823 page = balloon_alloc_empty_page_range(MAX_PENDING_REQS);
kaf24@6910 824 BUG_ON(page == NULL);
kaf24@6910 825 mmap_vstart = (unsigned long)pfn_to_kaddr(page_to_pfn(page));
cl349@4087 826
kaf24@6910 827 for (i = 0; i < MAX_PENDING_REQS; i++) {
kaf24@6910 828 page = virt_to_page(MMAP_VADDR(i));
kaf24@6910 829 set_page_count(page, 1);
kaf24@6910 830 SetPageForeign(page, netif_page_release);
kaf24@6910 831 }
kaf24@6910 832
kaf24@6910 833 pending_cons = 0;
kaf24@6910 834 pending_prod = MAX_PENDING_REQS;
kaf24@6910 835 for (i = 0; i < MAX_PENDING_REQS; i++)
kaf24@6910 836 pending_ring[i] = i;
cl349@4087 837
kaf24@6910 838 spin_lock_init(&net_schedule_list_lock);
kaf24@6910 839 INIT_LIST_HEAD(&net_schedule_list);
cl349@4087 840
kaf24@6910 841 netif_xenbus_init();
cl349@4087 842
kaf24@8519 843 #ifdef NETBE_DEBUG_INTERRUPT
kaf24@7707 844 (void)bind_virq_to_irqhandler(
kaf24@7707 845 VIRQ_DEBUG,
kaf24@7707 846 0,
kaf24@7707 847 netif_be_dbg,
kaf24@7707 848 SA_SHIRQ,
kaf24@7707 849 "net-be-dbg",
kaf24@7707 850 &netif_be_dbg);
kaf24@8519 851 #endif
cl349@4087 852
kaf24@6910 853 return 0;
cl349@4087 854 }
cl349@4087 855
cl349@4087 856 module_init(netback_init);
kaf24@6910 857
kaf24@9071 858 MODULE_LICENSE("Dual BSD/GPL");
kaf24@9071 859
kaf24@6910 860 /*
kaf24@6910 861 * Local variables:
kaf24@6910 862 * c-file-style: "linux"
kaf24@6910 863 * indent-tabs-mode: t
kaf24@6910 864 * c-indent-level: 8
kaf24@6910 865 * c-basic-offset: 8
kaf24@6910 866 * tab-width: 8
kaf24@6910 867 * End:
kaf24@6910 868 */