]> xenbits.xensource.com Git - people/ssmith/nc2-2.6.27.git/commitdiff
Fix bug on netchannel2 receiver copy mode which computes lro2 nc2/trunk
authorSteven Smith <ssmith@weybridge.uk.xensource.com>
Tue, 7 Jul 2009 15:59:24 +0000 (16:59 +0100)
committerSteven Smith <ssmith@weybridge.uk.xensource.com>
Tue, 7 Jul 2009 15:59:24 +0000 (16:59 +0100)
the wrong packet size when the number of fragments
exceeds MAX_SKB_FRAGS.
The previous code seems more complex that it actually needs
to be. Using a simpler version fixes the problem.
It may be possible to fix the original code if we
understand where exactly is the bug, but I am
not convinced we want to preserve the original complexity.
Another problem with previous code is that it chains
multiple skb's using frag_list when we should use skb->next
for all skb's after the first in the chain.

Using frag_list to store fragments works
for packets received from the external network, but it
breaks guest to guest communication. Not sure why the
stack treats both types of packets differently.
The current fix is to use skb_shinfo(skb)->frags when
the number of fragments does not exceed MAX_SKB_FRAGS,
and skb_shinfo(skb)->frag_list otherwise.
This is ugly but it works for now. We probably need
a better fix.

Signed-off-by: Jose Renato Santos <jsantos@hpl.hp.com>
Minor fixup: remove assumption that kmalloc() of less than a page size
returns something which doesn't cross a page boundary.

Signed-off-by: Steven Smith <steven.smith@citrix.com>
drivers/xen/netchannel2/rscb.c

index 42c11c308cc989ca55222dd2bbf0bde606e98f57..20b13803a8b6df3ff60eab7b4cd974c02202969c 100644 (file)
@@ -26,30 +26,22 @@ void nc2_rscb_on_gntcopy_fail(void *ctxt, gnttab_copy_t *gop)
 
 
 /* Copy @size bytes from @offset in grant ref @gref against domain
-   @domid and shove them on the end of @cur_skb.  This can extend the
-   skb chain, in which case *@cur_skb becomes the new tail.  On
-   failure, skb_overlay(head_skb)->failed is set to 1. */
-/* There are a lot of recursive tail calls here.  Trust that the
-   compiler will do the right thing. */
-static void batched_grant_copy(struct sk_buff **cur_skb_p,
-                               struct sk_buff *head_skb,
-                               unsigned offset,
-                               unsigned size,
-                               grant_ref_t gref,
-                               domid_t domid)
+   @domid and shove them on the end of @skb. Fails if it the head
+   does not have enough space or if the copy would span multiple
+   pages. */
+static int nc2_grant_copy(struct netchannel2_ring_pair *ncrp,
+                          struct sk_buff *skb,
+                          unsigned offset,
+                          unsigned size,
+                          grant_ref_t gref,
+                          domid_t domid)
 {
-        struct sk_buff *skb = *cur_skb_p;
-        struct sk_buff *new_skb;
         gnttab_copy_t *gop;
-        unsigned first;
-        unsigned frag_nr;
-        struct skb_shared_info *shinfo;
-        struct page *new_page;
         void *tail;
         void *end;
 
         if (size > PAGE_SIZE)
-                goto fail;
+                return 0;
 
 #if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,20)
         tail = skb_tail_pointer(skb);
@@ -59,101 +51,35 @@ static void batched_grant_copy(struct sk_buff **cur_skb_p,
         end = skb->end;
 #endif
 
-        /* Is there any space left in the SKB head? */
-        if (end != tail) {
-                /* Yes.  How much? */
-                first = end - tail;
-                /* Limit ourselves to this fragment. */
-                if (first > size)
-                        first = size;
-                /* And don't cross page boundaries. */
-                if (unlikely(offset_in_page(tail) + first > PAGE_SIZE))
-                        first = PAGE_SIZE - offset_in_page(tail);
-
-                /* Copy this fragment to the header. */
-                gop = hypercall_batcher_grant_copy(&pending_rx_hypercalls,
-                                                   skb,
-                                                   nc2_rscb_on_gntcopy_fail);
-                gop->flags = GNTCOPY_source_gref;
-                gop->source.domid = domid;
-                gop->source.offset = offset;
-                gop->source.u.ref = gref;
-                gop->dest.domid = DOMID_SELF;
-                gop->dest.offset = offset_in_page(tail);
-                gop->dest.u.gmfn = virt_to_mfn(tail);
-                gop->len = first;
-
-                if (skb != head_skb) {
-                        head_skb->truesize += size;
-                        head_skb->data_len += size;
-                        head_skb->len += size;
-                }
-                skb_put(skb, first);
-
-                if (size != first)
-                        batched_grant_copy(cur_skb_p, head_skb,
-                                           offset + first, size - first,
-                                           gref, domid);
-                return;
-        }
-
-        /* Okay, we're in fragment space. */
-        shinfo = skb_shinfo(skb);
-        frag_nr = shinfo->nr_frags;
-        if (frag_nr == MAX_SKB_FRAGS) {
-                /* Advance to a new skb */
-                /* size is probably PAGE_SIZE, in which case we'll end
-                   up kmalloc()ing PAGE_SIZE plus about 200 bytes,
-                   which is pretty inefficient.  This should be rare,
-                   though, so just let it be. */
-                new_skb = dev_alloc_skb(size);
-                if (!new_skb) {
-                        /* Uh oh, no memory.  Give up. */
-                        /* (We'll keep trying to tranfer fragments to
-                           this skb until we hit the end of the
-                           packet, which isn't immensely efficient,
-                           but this should be rare enough that it
-                           doesn't matter). */
-                        goto fail;
-                }
-                skb_shinfo(skb)->frag_list = new_skb;
-                *cur_skb_p = new_skb;
-                batched_grant_copy(cur_skb_p, head_skb, offset,
-                                   size, gref, domid);
-                return;
+        if (unlikely(size > (end-tail)))
+                return 0;
+
+        if (unlikely(offset_in_page(tail) + size > PAGE_SIZE)) {
+                unsigned f1 = PAGE_SIZE - offset_in_page(tail);
+                /* Recursive, but only ever to depth 1, so okay */
+                if (!nc2_grant_copy(ncrp, skb, offset, f1, gref, domid))
+                        return 0;
+                offset += f1;
+                size -= f1;
+                tail += f1;
         }
 
-        /* Allocate a new page for the fragment */
-        new_page = alloc_page(GFP_ATOMIC);
-        if (!new_page)
-                goto fail;
-
+        /* Copy this fragment into the header. */
         gop = hypercall_batcher_grant_copy(&pending_rx_hypercalls,
-                                           head_skb,
+                                           skb,
                                            nc2_rscb_on_gntcopy_fail);
         gop->flags = GNTCOPY_source_gref;
         gop->source.domid = domid;
         gop->source.offset = offset;
         gop->source.u.ref = gref;
         gop->dest.domid = DOMID_SELF;
-        gop->dest.offset = 0;
-        gop->dest.u.gmfn = pfn_to_mfn(page_to_pfn(new_page));
+        gop->dest.offset = offset_in_page(tail);
+        gop->dest.u.gmfn = virt_to_mfn(tail);
         gop->len = size;
 
-        shinfo->frags[frag_nr].page = new_page;
-        shinfo->frags[frag_nr].page_offset = 0;
-        shinfo->frags[frag_nr].size = size;
-        shinfo->nr_frags++;
-
-        head_skb->truesize += size;
-        head_skb->data_len += size;
-        head_skb->len += size;
-
-        return;
+        skb_put(skb, size);
 
-fail:
-        get_skb_overlay(head_skb)->failed = 1;
-        return;
+        return 1;
 }
 
 /* We've received a receiver-copy packet message from the remote.
@@ -169,8 +95,12 @@ struct sk_buff *handle_receiver_copy_packet(struct netchannel2 *nc,
         struct netchannel2_fragment frag;
         unsigned nr_bytes;
         unsigned x;
-        struct sk_buff *skb, *cur_skb;
+        struct sk_buff *skb, *cur_skb, *prev_skb;
         unsigned skb_headsize;
+        int first_frag, first_frag_size;
+        gnttab_copy_t *gop;
+        struct skb_shared_info *shinfo;
+        struct page *new_page;
 
         if (msg->prefix_size > NETCHANNEL2_MAX_INLINE_BYTES) {
                 pr_debug("Inline prefix too big! (%d > %d)\n",
@@ -197,21 +127,24 @@ struct sk_buff *handle_receiver_copy_packet(struct netchannel2 *nc,
                 nr_bytes = 64;
         }
 
-        /* We prefer to put the packet in the head if possible,
-           provided that won't cause the head to be allocated with a
-           multi-page kmalloc().  If we can't manage that, we fall
-           back to just putting the inline part in the head, with the
-           rest of the packet attached as fragments. */
-        /* We could also consider having a maximally-sized head and
-           put the rest in fragments, but that would mean that the
-           Linux-side fragments wouldn't match up with the NC2-side
-           fragments, which would mean we'd need twice as many
-           hypercalls. */
-        skb_headsize = nr_bytes + NET_IP_ALIGN;
+        first_frag = 0;
+        if (nr_frags > 0 ) {
+                fetch_fragment(ncrp, 0, &frag, frags_off);
+                first_frag_size = frag.size;
+                first_frag = 1;
+        } else {
+                first_frag_size = 0;
+                first_frag = 0;
+        }
+
+        /* We try to have both prefix and the first frag in the skb head 
+           if they do not exceed the page size */
+        skb_headsize = msg->prefix_size + first_frag_size + NET_IP_ALIGN;
         if (skb_headsize >
             ((PAGE_SIZE - sizeof(struct skb_shared_info) - NET_SKB_PAD) & 
-            ~(SMP_CACHE_BYTES - 1))) {
+             ~(SMP_CACHE_BYTES - 1))) {
                 skb_headsize = msg->prefix_size + NET_IP_ALIGN;
+                first_frag = 0;
         }
 
         skb = dev_alloc_skb(skb_headsize);
@@ -231,12 +164,88 @@ struct sk_buff *handle_receiver_copy_packet(struct netchannel2 *nc,
                                msg->prefix_size,
                                frags_off + nr_frags * sizeof(frag));
 
-        cur_skb = skb;
-        for (x = 0; x < nr_frags; x++) {
+        /* copy first frag into skb head if it does not cross a 
+           page boundary */
+        if (first_frag == 1) {
+                fetch_fragment(ncrp, 0, &frag, frags_off);
+                if (!nc2_grant_copy(ncrp, skb, frag.off, frag.size,
+                                    frag.receiver_copy.gref,
+                                    ncrp->otherend_id)) {
+                        get_skb_overlay(skb)->failed = 1;
+                        return skb;
+                }
+        }
+
+        /* If everything will fit in the shinfo frag space, use that.
+           Otherwise, use a frag list. */
+        /* XXX we should arguably be using a frag list *in addition to*
+           ordinary shinfo frags, but that doesn't work, for reasons
+           which aren't immensely clear. */
+        if ((nr_frags - first_frag) <= MAX_SKB_FRAGS) {
+                shinfo = skb_shinfo(skb);
+                for (x = first_frag; x < nr_frags; x++) {
+                        fetch_fragment(ncrp, x, &frag, frags_off);
+
+                        /* Allocate a new page for the fragment */
+                        new_page = alloc_page(GFP_ATOMIC);
+                        if (!new_page) {
+                                get_skb_overlay(skb)->failed = 1;
+                                return skb;
+                        }
+
+                        gop = hypercall_batcher_grant_copy(&pending_rx_hypercalls,
+                                                           skb,
+                                                           nc2_rscb_on_gntcopy_fail);
+                        gop->flags = GNTCOPY_source_gref;
+                        gop->source.domid = ncrp->otherend_id;
+                        gop->source.offset = frag.off;
+                        gop->source.u.ref = frag.receiver_copy.gref;
+                        gop->dest.domid = DOMID_SELF;
+                        gop->dest.offset = 0;
+                        gop->dest.u.gmfn = pfn_to_mfn(page_to_pfn(new_page));
+                        gop->len = frag.size;
+
+                        shinfo->frags[x-first_frag].page = new_page;
+                        shinfo->frags[x-first_frag].page_offset = 0;
+                        shinfo->frags[x-first_frag].size = frag.size;
+                        shinfo->nr_frags++;
+                
+                        skb->truesize += frag.size;
+                        skb->data_len += frag.size;
+                        skb->len += frag.size;
+                }
+                return skb;
+        }
+
+        if (first_frag < nr_frags) {
+                fetch_fragment(ncrp, first_frag, &frag, frags_off);
+                cur_skb = dev_alloc_skb(frag.size);
+                skb_shinfo(skb)->frag_list = cur_skb;
+                if (!nc2_grant_copy(ncrp, cur_skb, frag.off, frag.size,
+                                    frag.receiver_copy.gref,
+                                    ncrp->otherend_id)) {
+                        get_skb_overlay(skb)->failed = 1;
+                        return skb;
+                }
+                skb->truesize += frag.size;
+                skb->data_len += frag.size;
+                skb->len += frag.size;
+        } else {
+                /* Shut the compiler up */
+                cur_skb = NULL;
+        }
+
+        for (x = first_frag+1; x < nr_frags; x++) {
+                prev_skb = cur_skb;
                 fetch_fragment(ncrp, x, &frag, frags_off);
-                batched_grant_copy(&cur_skb, skb, frag.off, frag.size,
-                                   frag.receiver_copy.gref,
-                                   ncrp->otherend_id);
+                cur_skb = dev_alloc_skb(frag.size);
+                prev_skb->next = cur_skb;
+                nc2_grant_copy(ncrp, cur_skb, frag.off, frag.size,
+                               frag.receiver_copy.gref,
+                               ncrp->otherend_id);
+                skb->truesize += frag.size;
+                skb->data_len += frag.size;
+                skb->len += frag.size;
         }
         return skb;
 }
@@ -268,7 +277,7 @@ static inline int nfrags_skb(struct sk_buff *skb, int prefix_size)
        start_grant = ((unsigned long)skb->data + prefix_size) &
                ~(PAGE_SIZE-1);
        end_grant = ((unsigned long)skb->data +
-                    skb_headlen(skb) +  PAGE_SIZE - 1) &
+                    skb_headlen(skb) +  PAGE_SIZE - 1) &
                ~(PAGE_SIZE-1);
        return ( ((end_grant - start_grant) >> PAGE_SHIFT)
                 + skb_shinfo(skb)->nr_frags );