]> xenbits.xensource.com Git - xenclient/kernel.git/commitdiff
There's no point in sending lots of little packets to a copying netback-workaround-bad-csums
authort_jeang <devnull@localhost>
Tue, 6 Jan 2009 12:06:02 +0000 (12:06 +0000)
committert_jeang <devnull@localhost>
Tue, 6 Jan 2009 12:06:02 +0000 (12:06 +0000)
receiver if we can instead arrange to copy them all into a single RX
buffer.  We need to copy anyway, so there's no overhead here, and this
is a little bit easier on the receiving domain's network stack.

drivers/xen/netback/common.h
drivers/xen/netback/netback.c

index 869c032d11633bf432c99b3d2c91058037036990..de9be717c8ec99331e8da38629a85b2d50fb563e 100644 (file)
@@ -85,7 +85,9 @@ typedef struct netif_st {
        u8 copying_receiver:1;  /* copy packets to receiver?       */
        unsigned copying_rx_offset;
 
-       /* Allow netif_be_start_xmit() to peek ahead in the rx request ring. */
+       /* Allow netif_be_start_xmit() to peek ahead in the rx request
+        * ring.  This is a prediction of what rx_req_cons will be once
+        * all queued skbs are put on the ring. */
        RING_IDX rx_req_cons_peek;
 
        /* Transmit shaping: allow 'credit_bytes' every 'credit_usec'. */
index 7fcca69ed245afc808b36ab6218e16420b1b9065..c853fd40991ec93380dd4acb2f1d4ba1103f8a94 100644 (file)
 /*define NETBE_DEBUG_INTERRUPT*/
 
 struct netbk_rx_meta {
-       skb_frag_t frag;
+       skb_frag_t frag; /* Only if copy == 0 */
        int id;
+       int size;
+       int gso_size;
        u8 copy:1;
 };
 
@@ -279,6 +281,34 @@ static void tx_queue_callback(unsigned long data)
                netif_wake_queue(netif->dev);
 }
 
+/* Figure out how many ring slots we're going to need to send @skb to
+   the guest. */
+static unsigned count_skb_slots(struct sk_buff *skb, netif_t *netif)
+{
+        unsigned count;
+        unsigned copy_off;
+        unsigned i;
+
+        copy_off = netif->copying_rx_offset;
+        count = 1;
+        if (copy_off + skb_headlen(skb) > 2048) {
+                count++;
+                copy_off = 0;
+        } else {
+                copy_off = skb_headlen(skb);
+        }
+        if (skb_shinfo(skb)->gso_size)
+                count++;
+        for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
+                if (copy_off + skb_shinfo(skb)->frags[i].size > 2048) {
+                        copy_off = 0;
+                        count++;
+                }
+                copy_off += skb_shinfo(skb)->frags[i].size;
+        }
+        return count;
+}
+
 int netif_be_start_xmit(struct sk_buff *skb, struct net_device *dev)
 {
        netif_t *netif = netdev_priv(dev);
@@ -308,8 +338,9 @@ int netif_be_start_xmit(struct sk_buff *skb, struct net_device *dev)
                skb = nskb;
        }
 
-       netif->rx_req_cons_peek += skb_shinfo(skb)->nr_frags + 1 +
-                                  !!skb_shinfo(skb)->gso_size;
+       /* Reserve ring slots for the worst-case number of
+        * fragments. */
+       netif->rx_req_cons_peek += count_skb_slots(skb, netif);
        netif_get(netif);
 
        if (netbk_can_queue(dev) && netbk_queue_full(netif)) {
@@ -372,117 +403,180 @@ struct netrx_pending_operations {
        gnttab_copy_t *copy;
        multicall_entry_t *mcl;
        struct netbk_rx_meta *meta;
+       int copy_off;
+       grant_ref_t copy_gref;
 };
 
 /* Set up the grant operations for this fragment.  If it's a flipping
    interface, we also set up the unmap request from here. */
-static u16 netbk_gop_frag(netif_t *netif, struct netbk_rx_meta *meta,
-                         int i, struct netrx_pending_operations *npo,
-                         struct page *page, unsigned long size,
-                         unsigned long offset)
+
+static void netbk_gop_frag_copy(netif_t *netif,
+                               struct netrx_pending_operations *npo,
+                               struct page *page, unsigned long size,
+                               unsigned long offset)
+{
+       gnttab_copy_t *copy_gop;
+       struct netbk_rx_meta *meta;
+       int idx = netif_page_index(page);
+
+       meta = npo->meta + npo->meta_prod - 1;
+
+       copy_gop = npo->copy + npo->copy_prod++;
+       copy_gop->flags = GNTCOPY_dest_gref;
+       if (idx > -1) {
+               struct pending_tx_info *src_pend = &pending_tx_info[idx];
+               copy_gop->source.domid = src_pend->netif->domid;
+               copy_gop->source.u.ref = src_pend->req.gref;
+               copy_gop->flags |= GNTCOPY_source_gref;
+       } else {
+               copy_gop->source.domid = DOMID_SELF;
+               copy_gop->source.u.gmfn = virt_to_mfn(page_address(page));
+       }
+       copy_gop->source.offset = offset;
+       copy_gop->dest.domid = netif->domid;
+
+       /* We only use the first 2K of the buffer, because some guests
+          put stuff at the end of the page. */
+       if (npo->copy_off + size > 2048) {
+               netif_rx_request_t *req;
+               /* Overflowed this request, go to the next one */
+               req = RING_GET_REQUEST(&netif->rx, netif->rx.req_cons++);
+               meta = npo->meta + npo->meta_prod++;
+               meta->copy = 1;
+               meta->size = 0;
+               meta->id = req->id;
+               npo->copy_off = 0;
+               npo->copy_gref = req->gref;
+       }
+       copy_gop->dest.offset = npo->copy_off;
+       copy_gop->dest.u.ref = npo->copy_gref;
+       copy_gop->len = size;
+
+       npo->copy_off += size;
+       meta->size += size;
+}
+
+static u16 netbk_gop_frag_flip(netif_t *netif, struct netbk_rx_meta *meta,
+                              int i, struct netrx_pending_operations *npo,
+                              struct page *page, unsigned long size,
+                              unsigned long offset)
 {
        mmu_update_t *mmu;
        gnttab_transfer_t *gop;
-       gnttab_copy_t *copy_gop;
        multicall_entry_t *mcl;
        netif_rx_request_t *req;
        unsigned long old_mfn, new_mfn;
-       int idx = netif_page_index(page);
 
        old_mfn = virt_to_mfn(page_address(page));
 
        req = RING_GET_REQUEST(&netif->rx, netif->rx.req_cons + i);
-       if (netif->copying_receiver) {
-               /* The fragment needs to be copied rather than
-                  flipped. */
-               meta->copy = 1;
-               copy_gop = npo->copy + npo->copy_prod++;
-               copy_gop->flags = GNTCOPY_dest_gref;
-               if (idx > -1) {
-                       struct pending_tx_info *src_pend = &pending_tx_info[idx];
-                       copy_gop->source.domid = src_pend->netif->domid;
-                       copy_gop->source.u.ref = src_pend->req.gref;
-                       copy_gop->flags |= GNTCOPY_source_gref;
-               } else {
-                       copy_gop->source.domid = DOMID_SELF;
-                       copy_gop->source.u.gmfn = old_mfn;
-               }
-               copy_gop->source.offset = offset;
-               copy_gop->dest.domid = netif->domid;
-               if (i == 0)
-                       copy_gop->dest.offset = netif->copying_rx_offset;
-               else
-                       copy_gop->dest.offset = 0;
-               copy_gop->dest.u.ref = req->gref;
-               /* We rely on Xen to enforce that offset + size <=
-                * PAGE_SIZE */
-               copy_gop->len = size;
-       } else {
-               meta->copy = 0;
-               if (!xen_feature(XENFEAT_auto_translated_physmap)) {
-                       new_mfn = alloc_mfn();
-
-                       /*
-                        * Set the new P2M table entry before
-                        * reassigning the old data page. Heed the
-                        * comment in pgtable-2level.h:pte_page(). :-)
-                        */
-                       set_phys_to_machine(page_to_pfn(page), new_mfn);
-
-                       mcl = npo->mcl + npo->mcl_prod++;
-                       MULTI_update_va_mapping(mcl,
-                                            (unsigned long)page_address(page),
-                                            pfn_pte_ma(new_mfn, PAGE_KERNEL),
-                                            0);
-
-                       mmu = npo->mmu + npo->mmu_prod++;
-                       mmu->ptr = ((maddr_t)new_mfn << PAGE_SHIFT) |
-                               MMU_MACHPHYS_UPDATE;
-                       mmu->val = page_to_pfn(page);
-               }
+       meta->copy = 0;
+       if (!xen_feature(XENFEAT_auto_translated_physmap)) {
+               new_mfn = alloc_mfn();
 
-               gop = npo->trans + npo->trans_prod++;
-               gop->mfn = old_mfn;
-               gop->domid = netif->domid;
-               gop->ref = req->gref;
+               /*
+                * Set the new P2M table entry before
+                * reassigning the old data page. Heed the
+                * comment in pgtable-2level.h:pte_page(). :-)
+                */
+               set_phys_to_machine(page_to_pfn(page), new_mfn);
+
+               mcl = npo->mcl + npo->mcl_prod++;
+               MULTI_update_va_mapping(mcl,
+                                       (unsigned long)page_address(page),
+                                       pfn_pte_ma(new_mfn, PAGE_KERNEL),
+                                       0);
+
+               mmu = npo->mmu + npo->mmu_prod++;
+               mmu->ptr = ((maddr_t)new_mfn << PAGE_SHIFT) |
+                       MMU_MACHPHYS_UPDATE;
+               mmu->val = page_to_pfn(page);
        }
+
+       gop = npo->trans + npo->trans_prod++;
+       gop->mfn = old_mfn;
+       gop->domid = netif->domid;
+       gop->ref = req->gref;
+
        return req->id;
 }
 
-static void netbk_gop_skb(struct sk_buff *skb,
-                         struct netrx_pending_operations *npo)
+/* Prepare an SKB to be transmitted to the frontend.  This is
+   responsible for allocating grant operations, meta structures, etc.
+   It returns the number of meta structures consumed.  The number of
+   ring slots used is always equal to the number of meta slots used
+   plus the number of GSO descriptors used.  Currently, we use either
+   zero GSO descriptors (for non-GSO packets) or one descriptor (for
+   frontend-side LRO). */
+static int netbk_gop_skb(struct sk_buff *skb,
+                        struct netrx_pending_operations *npo)
 {
        netif_t *netif = netdev_priv(skb->dev);
        int nr_frags = skb_shinfo(skb)->nr_frags;
        int i;
        int extra;
        struct netbk_rx_meta *head_meta, *meta;
+       int old_meta_prod;
 
-       head_meta = npo->meta + npo->meta_prod++;
-       head_meta->frag.page_offset = skb_shinfo(skb)->gso_type;
-       head_meta->frag.size = skb_shinfo(skb)->gso_size;
-       extra = !!head_meta->frag.size + 1;
+       old_meta_prod = npo->meta_prod;
 
-       for (i = 0; i < nr_frags; i++) {
+       if (netif->copying_receiver) {
+               netif_rx_request_t *req;
+
+               req = RING_GET_REQUEST(&netif->rx, netif->rx.req_cons++);
                meta = npo->meta + npo->meta_prod++;
-               meta->frag = skb_shinfo(skb)->frags[i];
-               meta->id = netbk_gop_frag(netif, meta, i + extra, npo,
-                                         meta->frag.page,
-                                         meta->frag.size,
-                                         meta->frag.page_offset);
-       }
+               meta->copy = 1;
+               meta->gso_size = skb_shinfo(skb)->gso_size;
+               meta->size = 0;
+               meta->id = req->id;
+               npo->copy_off = netif->copying_rx_offset;
+               npo->copy_gref = req->gref;
+
+               netbk_gop_frag_copy(netif,
+                                   npo, virt_to_page(skb->data),
+                                   skb_headlen(skb),
+                                   offset_in_page(skb->data));
+               if (skb_shinfo(skb)->gso_size) {
+                       /* Leave a gap for the GSO descriptor. */
+                       netif->rx.req_cons++;
+               }
+               for (i = 0; i < nr_frags; i++) {
+                       netbk_gop_frag_copy(netif, npo,
+                                           skb_shinfo(skb)->frags[i].page,
+                                           skb_shinfo(skb)->frags[i].size,
+                                           skb_shinfo(skb)->frags[i].page_offset);
+               }
+       } else {
+               head_meta = npo->meta + npo->meta_prod++;
+               head_meta->frag.page_offset = skb_shinfo(skb)->gso_type;
+               head_meta->frag.size = skb_shinfo(skb)->gso_size;
+               head_meta->gso_size = skb_shinfo(skb)->gso_size;
+               head_meta->size = skb_headlen(skb);
+               extra = !!skb_shinfo(skb)->gso_size + 1;
+
+               for (i = 0; i < nr_frags; i++) {
+                       meta = npo->meta + npo->meta_prod++;
+                       meta->frag = skb_shinfo(skb)->frags[i];
+                       meta->id = netbk_gop_frag_flip(netif, meta, i + extra, npo,
+                                                      skb_shinfo(skb)->frags[i].page,
+                                                      skb_shinfo(skb)->frags[i].size,
+                                                      skb_shinfo(skb)->frags[i].page_offset);
+               }
 
-       /*
-        * This must occur at the end to ensure that we don't trash skb_shinfo
-        * until we're done. We know that the head doesn't cross a page
-        * boundary because such packets get copied in netif_be_start_xmit.
-        */
-       head_meta->id = netbk_gop_frag(netif, head_meta, 0, npo,
-                                      virt_to_page(skb->data),
-                                      skb_headlen(skb),
-                                      offset_in_page(skb->data));
+               /*
+                * This must occur at the end to ensure that we don't
+                * trash skb_shinfo until we're done. We know that the
+                * head doesn't cross a page boundary because such
+                * packets get copied in netif_be_start_xmit.
+                */
+               head_meta->id = netbk_gop_frag_flip(netif, head_meta, 0, npo,
+                                                   virt_to_page(skb->data),
+                                                   skb_headlen(skb),
+                                                   offset_in_page(skb->data));
 
-       netif->rx.req_cons += nr_frags + extra;
+               netif->rx.req_cons += nr_frags + extra;
+       }
+       return npo->meta_prod - old_meta_prod;
 }
 
 static inline void netbk_free_pages(int nr_frags, struct netbk_rx_meta *meta)
@@ -497,7 +591,7 @@ static inline void netbk_free_pages(int nr_frags, struct netbk_rx_meta *meta)
    used to set up the operations on the top of
    netrx_pending_operations, which have since been done.  Check that
    they didn't give any errors and advance over them. */
-static int netbk_check_gop(int nr_frags, domid_t domid,
+static int netbk_check_gop(int nr_meta_slots, domid_t domid,
                           struct netrx_pending_operations *npo)
 {
        multicall_entry_t *mcl;
@@ -506,7 +600,7 @@ static int netbk_check_gop(int nr_frags, domid_t domid,
        int status = NETIF_RSP_OKAY;
        int i;
 
-       for (i = 0; i <= nr_frags; i++) {
+       for (i = 0; i < nr_meta_slots; i++) {
                if (npo->meta[npo->meta_cons + i].copy) {
                        copy_op = npo->copy + npo->copy_cons++;
                        if (copy_op->status != GNTST_okay) {
@@ -541,29 +635,37 @@ static int netbk_check_gop(int nr_frags, domid_t domid,
 }
 
 static void netbk_add_frag_responses(netif_t *netif, int status,
-                                    struct netbk_rx_meta *meta, int nr_frags)
+                                    struct netbk_rx_meta *meta,
+                                    int nr_meta_slots)
 {
        int i;
        unsigned long offset;
 
-       for (i = 0; i < nr_frags; i++) {
-               int id = meta[i].id;
-               int flags = (i == nr_frags - 1) ? 0 : NETRXF_more_data;
+       for (i = 0; i < nr_meta_slots; i++) {
+               int flags;
+               if (i == nr_meta_slots - 1)
+                       flags = 0;
+               else
+                       flags = NETRXF_more_data;
 
                if (meta[i].copy)
                        offset = 0;
                else
                        offset = meta[i].frag.page_offset;
-               make_rx_response(netif, id, status, offset,
-                                meta[i].frag.size, flags);
+               make_rx_response(netif, meta[i].id, status, offset,
+                                meta[i].size, flags);
        }
 }
 
+struct skb_cb_overlay {
+       int meta_slots_used;
+};
+
 static void net_rx_action(unsigned long unused)
 {
        netif_t *netif = NULL;
        s8 status;
-       u16 id, irq, flags;
+       u16 irq, flags;
        netif_rx_response_t *resp;
        multicall_entry_t *mcl;
        struct sk_buff_head rxq;
@@ -573,6 +675,7 @@ static void net_rx_action(unsigned long unused)
        int nr_frags;
        int count;
        unsigned long offset;
+       struct skb_cb_overlay *sco;
 
        /*
         * Putting hundreds of bytes on the stack is considered rude.
@@ -598,11 +701,11 @@ static void net_rx_action(unsigned long unused)
        count = 0;
 
        while ((skb = skb_dequeue(&rx_queue)) != NULL) {
+               netif = netdev_priv(skb->dev);
                nr_frags = skb_shinfo(skb)->nr_frags;
-               *(int *)skb->cb = nr_frags;
 
-               if (!xen_feature(XENFEAT_auto_translated_physmap) &&
-                   !((netif_t *)netdev_priv(skb->dev))->copying_receiver &&
+               if (!netif->copying_receiver &&
+                   !xen_feature(XENFEAT_auto_translated_physmap) &&
                    check_mfn(nr_frags + 1)) {
                        /* Memory squeeze? Back off for an arbitrary while. */
                        if ( net_ratelimit() )
@@ -613,7 +716,8 @@ static void net_rx_action(unsigned long unused)
                        break;
                }
 
-               netbk_gop_skb(skb, &npo);
+               sco = (struct skb_cb_overlay *)skb->cb;
+               sco->meta_slots_used = netbk_gop_skb(skb, &npo);
 
                count += nr_frags + 1;
 
@@ -672,7 +776,7 @@ static void net_rx_action(unsigned long unused)
        BUG_ON(npo.mmu_mcl && npo.mcl[npo.mmu_mcl].result != 0);
 
        while ((skb = __skb_dequeue(&rxq)) != NULL) {
-               nr_frags = *(int *)skb->cb;
+               sco = (struct skb_cb_overlay *)skb->cb;
 
                netif = netdev_priv(skb->dev);
                /* We can't rely on skb_release_data to release the
@@ -687,17 +791,20 @@ static void net_rx_action(unsigned long unused)
                        atomic_set(&(skb_shinfo(skb)->dataref), 1);
                        skb_shinfo(skb)->frag_list = NULL;
                        skb_shinfo(skb)->nr_frags = 0;
-                       netbk_free_pages(nr_frags, meta + npo.meta_cons + 1);
+                       netbk_free_pages(sco->meta_slots_used - 1,
+                                        meta + npo.meta_cons + 1);
                }
 
                netif->stats.tx_bytes += skb->len;
                netif->stats.tx_packets++;
 
-               status = netbk_check_gop(nr_frags, netif->domid, &npo);
-
-               id = meta[npo.meta_cons].id;
-               flags = nr_frags ? NETRXF_more_data : 0;
+               status = netbk_check_gop(sco->meta_slots_used,
+                                        netif->domid, &npo);
 
+               if (sco->meta_slots_used == 1)
+                       flags = 0;
+               else
+                       flags = NETRXF_more_data;
                if (skb->ip_summed == CHECKSUM_HW) /* local packet? */
                        flags |= NETRXF_csum_blank | NETRXF_data_validated;
                else if (skb->proto_data_valid) /* remote but checksummed? */
@@ -707,10 +814,12 @@ static void net_rx_action(unsigned long unused)
                        offset = 0;
                else
                        offset = offset_in_page(skb->data);
-               resp = make_rx_response(netif, id, status, offset,
-                                       skb_headlen(skb), flags);
+               resp = make_rx_response(netif, meta[npo.meta_cons].id,
+                                       status, offset,
+                                       meta[npo.meta_cons].size,
+                                       flags);
 
-               if (meta[npo.meta_cons].frag.size) {
+               if (meta[npo.meta_cons].gso_size) {
                        struct netif_extra_info *gso =
                                (struct netif_extra_info *)
                                RING_GET_RESPONSE(&netif->rx,
@@ -718,7 +827,7 @@ static void net_rx_action(unsigned long unused)
 
                        resp->flags |= NETRXF_extra_info;
 
-                       gso->u.gso.size = meta[npo.meta_cons].frag.size;
+                       gso->u.gso.size = meta[npo.meta_cons].gso_size;
                        gso->u.gso.type = XEN_NETIF_GSO_TYPE_TCPV4;
                        gso->u.gso.pad = 0;
                        gso->u.gso.features = 0;
@@ -727,9 +836,11 @@ static void net_rx_action(unsigned long unused)
                        gso->flags = 0;
                }
 
-               netbk_add_frag_responses(netif, status,
-                                        meta + npo.meta_cons + 1,
-                                        nr_frags);
+               if (sco->meta_slots_used > 1) {
+                       netbk_add_frag_responses(netif, status,
+                                                meta + npo.meta_cons + 1,
+                                                sco->meta_slots_used - 1);
+               }
 
                RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&netif->rx, ret);
                irq = netif->irq;
@@ -744,8 +855,8 @@ static void net_rx_action(unsigned long unused)
                        netif_wake_queue(netif->dev);
 
                netif_put(netif);
+               npo.meta_cons += sco->meta_slots_used;
                dev_kfree_skb(skb);
-               npo.meta_cons += nr_frags + 1;
        }
 
        while (notify_nr != 0) {