From e036c395d7ca2df494b0949d94f2ad127380727f Mon Sep 17 00:00:00 2001 From: t_jeang Date: Tue, 6 Jan 2009 12:06:02 +0000 Subject: [PATCH] There's no point in sending lots of little packets to a copying receiver if we can instead arrange to copy them all into a single RX buffer. We need to copy anyway, so there's no overhead here, and this is a little bit easier on the receiving domain's network stack. --- drivers/xen/netback/common.h | 4 +- drivers/xen/netback/netback.c | 333 ++++++++++++++++++++++------------ 2 files changed, 225 insertions(+), 112 deletions(-) diff --git a/drivers/xen/netback/common.h b/drivers/xen/netback/common.h index 869c032d..de9be717 100644 --- a/drivers/xen/netback/common.h +++ b/drivers/xen/netback/common.h @@ -85,7 +85,9 @@ typedef struct netif_st { u8 copying_receiver:1; /* copy packets to receiver? */ unsigned copying_rx_offset; - /* Allow netif_be_start_xmit() to peek ahead in the rx request ring. */ + /* Allow netif_be_start_xmit() to peek ahead in the rx request + * ring. This is a prediction of what rx_req_cons will be once + * all queued skbs are put on the ring. */ RING_IDX rx_req_cons_peek; /* Transmit shaping: allow 'credit_bytes' every 'credit_usec'. */ diff --git a/drivers/xen/netback/netback.c b/drivers/xen/netback/netback.c index 7fcca69e..c853fd40 100644 --- a/drivers/xen/netback/netback.c +++ b/drivers/xen/netback/netback.c @@ -41,8 +41,10 @@ /*define NETBE_DEBUG_INTERRUPT*/ struct netbk_rx_meta { - skb_frag_t frag; + skb_frag_t frag; /* Only if copy == 0 */ int id; + int size; + int gso_size; u8 copy:1; }; @@ -279,6 +281,34 @@ static void tx_queue_callback(unsigned long data) netif_wake_queue(netif->dev); } +/* Figure out how many ring slots we're going to need to send @skb to + the guest. */ +static unsigned count_skb_slots(struct sk_buff *skb, netif_t *netif) +{ + unsigned count; + unsigned copy_off; + unsigned i; + + copy_off = netif->copying_rx_offset; + count = 1; + if (copy_off + skb_headlen(skb) > 2048) { + count++; + copy_off = 0; + } else { + copy_off = skb_headlen(skb); + } + if (skb_shinfo(skb)->gso_size) + count++; + for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { + if (copy_off + skb_shinfo(skb)->frags[i].size > 2048) { + copy_off = 0; + count++; + } + copy_off += skb_shinfo(skb)->frags[i].size; + } + return count; +} + int netif_be_start_xmit(struct sk_buff *skb, struct net_device *dev) { netif_t *netif = netdev_priv(dev); @@ -308,8 +338,9 @@ int netif_be_start_xmit(struct sk_buff *skb, struct net_device *dev) skb = nskb; } - netif->rx_req_cons_peek += skb_shinfo(skb)->nr_frags + 1 + - !!skb_shinfo(skb)->gso_size; + /* Reserve ring slots for the worst-case number of + * fragments. */ + netif->rx_req_cons_peek += count_skb_slots(skb, netif); netif_get(netif); if (netbk_can_queue(dev) && netbk_queue_full(netif)) { @@ -372,117 +403,180 @@ struct netrx_pending_operations { gnttab_copy_t *copy; multicall_entry_t *mcl; struct netbk_rx_meta *meta; + int copy_off; + grant_ref_t copy_gref; }; /* Set up the grant operations for this fragment. If it's a flipping interface, we also set up the unmap request from here. */ -static u16 netbk_gop_frag(netif_t *netif, struct netbk_rx_meta *meta, - int i, struct netrx_pending_operations *npo, - struct page *page, unsigned long size, - unsigned long offset) + +static void netbk_gop_frag_copy(netif_t *netif, + struct netrx_pending_operations *npo, + struct page *page, unsigned long size, + unsigned long offset) +{ + gnttab_copy_t *copy_gop; + struct netbk_rx_meta *meta; + int idx = netif_page_index(page); + + meta = npo->meta + npo->meta_prod - 1; + + copy_gop = npo->copy + npo->copy_prod++; + copy_gop->flags = GNTCOPY_dest_gref; + if (idx > -1) { + struct pending_tx_info *src_pend = &pending_tx_info[idx]; + copy_gop->source.domid = src_pend->netif->domid; + copy_gop->source.u.ref = src_pend->req.gref; + copy_gop->flags |= GNTCOPY_source_gref; + } else { + copy_gop->source.domid = DOMID_SELF; + copy_gop->source.u.gmfn = virt_to_mfn(page_address(page)); + } + copy_gop->source.offset = offset; + copy_gop->dest.domid = netif->domid; + + /* We only use the first 2K of the buffer, because some guests + put stuff at the end of the page. */ + if (npo->copy_off + size > 2048) { + netif_rx_request_t *req; + /* Overflowed this request, go to the next one */ + req = RING_GET_REQUEST(&netif->rx, netif->rx.req_cons++); + meta = npo->meta + npo->meta_prod++; + meta->copy = 1; + meta->size = 0; + meta->id = req->id; + npo->copy_off = 0; + npo->copy_gref = req->gref; + } + copy_gop->dest.offset = npo->copy_off; + copy_gop->dest.u.ref = npo->copy_gref; + copy_gop->len = size; + + npo->copy_off += size; + meta->size += size; +} + +static u16 netbk_gop_frag_flip(netif_t *netif, struct netbk_rx_meta *meta, + int i, struct netrx_pending_operations *npo, + struct page *page, unsigned long size, + unsigned long offset) { mmu_update_t *mmu; gnttab_transfer_t *gop; - gnttab_copy_t *copy_gop; multicall_entry_t *mcl; netif_rx_request_t *req; unsigned long old_mfn, new_mfn; - int idx = netif_page_index(page); old_mfn = virt_to_mfn(page_address(page)); req = RING_GET_REQUEST(&netif->rx, netif->rx.req_cons + i); - if (netif->copying_receiver) { - /* The fragment needs to be copied rather than - flipped. */ - meta->copy = 1; - copy_gop = npo->copy + npo->copy_prod++; - copy_gop->flags = GNTCOPY_dest_gref; - if (idx > -1) { - struct pending_tx_info *src_pend = &pending_tx_info[idx]; - copy_gop->source.domid = src_pend->netif->domid; - copy_gop->source.u.ref = src_pend->req.gref; - copy_gop->flags |= GNTCOPY_source_gref; - } else { - copy_gop->source.domid = DOMID_SELF; - copy_gop->source.u.gmfn = old_mfn; - } - copy_gop->source.offset = offset; - copy_gop->dest.domid = netif->domid; - if (i == 0) - copy_gop->dest.offset = netif->copying_rx_offset; - else - copy_gop->dest.offset = 0; - copy_gop->dest.u.ref = req->gref; - /* We rely on Xen to enforce that offset + size <= - * PAGE_SIZE */ - copy_gop->len = size; - } else { - meta->copy = 0; - if (!xen_feature(XENFEAT_auto_translated_physmap)) { - new_mfn = alloc_mfn(); - - /* - * Set the new P2M table entry before - * reassigning the old data page. Heed the - * comment in pgtable-2level.h:pte_page(). :-) - */ - set_phys_to_machine(page_to_pfn(page), new_mfn); - - mcl = npo->mcl + npo->mcl_prod++; - MULTI_update_va_mapping(mcl, - (unsigned long)page_address(page), - pfn_pte_ma(new_mfn, PAGE_KERNEL), - 0); - - mmu = npo->mmu + npo->mmu_prod++; - mmu->ptr = ((maddr_t)new_mfn << PAGE_SHIFT) | - MMU_MACHPHYS_UPDATE; - mmu->val = page_to_pfn(page); - } + meta->copy = 0; + if (!xen_feature(XENFEAT_auto_translated_physmap)) { + new_mfn = alloc_mfn(); - gop = npo->trans + npo->trans_prod++; - gop->mfn = old_mfn; - gop->domid = netif->domid; - gop->ref = req->gref; + /* + * Set the new P2M table entry before + * reassigning the old data page. Heed the + * comment in pgtable-2level.h:pte_page(). :-) + */ + set_phys_to_machine(page_to_pfn(page), new_mfn); + + mcl = npo->mcl + npo->mcl_prod++; + MULTI_update_va_mapping(mcl, + (unsigned long)page_address(page), + pfn_pte_ma(new_mfn, PAGE_KERNEL), + 0); + + mmu = npo->mmu + npo->mmu_prod++; + mmu->ptr = ((maddr_t)new_mfn << PAGE_SHIFT) | + MMU_MACHPHYS_UPDATE; + mmu->val = page_to_pfn(page); } + + gop = npo->trans + npo->trans_prod++; + gop->mfn = old_mfn; + gop->domid = netif->domid; + gop->ref = req->gref; + return req->id; } -static void netbk_gop_skb(struct sk_buff *skb, - struct netrx_pending_operations *npo) +/* Prepare an SKB to be transmitted to the frontend. This is + responsible for allocating grant operations, meta structures, etc. + It returns the number of meta structures consumed. The number of + ring slots used is always equal to the number of meta slots used + plus the number of GSO descriptors used. Currently, we use either + zero GSO descriptors (for non-GSO packets) or one descriptor (for + frontend-side LRO). */ +static int netbk_gop_skb(struct sk_buff *skb, + struct netrx_pending_operations *npo) { netif_t *netif = netdev_priv(skb->dev); int nr_frags = skb_shinfo(skb)->nr_frags; int i; int extra; struct netbk_rx_meta *head_meta, *meta; + int old_meta_prod; - head_meta = npo->meta + npo->meta_prod++; - head_meta->frag.page_offset = skb_shinfo(skb)->gso_type; - head_meta->frag.size = skb_shinfo(skb)->gso_size; - extra = !!head_meta->frag.size + 1; + old_meta_prod = npo->meta_prod; - for (i = 0; i < nr_frags; i++) { + if (netif->copying_receiver) { + netif_rx_request_t *req; + + req = RING_GET_REQUEST(&netif->rx, netif->rx.req_cons++); meta = npo->meta + npo->meta_prod++; - meta->frag = skb_shinfo(skb)->frags[i]; - meta->id = netbk_gop_frag(netif, meta, i + extra, npo, - meta->frag.page, - meta->frag.size, - meta->frag.page_offset); - } + meta->copy = 1; + meta->gso_size = skb_shinfo(skb)->gso_size; + meta->size = 0; + meta->id = req->id; + npo->copy_off = netif->copying_rx_offset; + npo->copy_gref = req->gref; + + netbk_gop_frag_copy(netif, + npo, virt_to_page(skb->data), + skb_headlen(skb), + offset_in_page(skb->data)); + if (skb_shinfo(skb)->gso_size) { + /* Leave a gap for the GSO descriptor. */ + netif->rx.req_cons++; + } + for (i = 0; i < nr_frags; i++) { + netbk_gop_frag_copy(netif, npo, + skb_shinfo(skb)->frags[i].page, + skb_shinfo(skb)->frags[i].size, + skb_shinfo(skb)->frags[i].page_offset); + } + } else { + head_meta = npo->meta + npo->meta_prod++; + head_meta->frag.page_offset = skb_shinfo(skb)->gso_type; + head_meta->frag.size = skb_shinfo(skb)->gso_size; + head_meta->gso_size = skb_shinfo(skb)->gso_size; + head_meta->size = skb_headlen(skb); + extra = !!skb_shinfo(skb)->gso_size + 1; + + for (i = 0; i < nr_frags; i++) { + meta = npo->meta + npo->meta_prod++; + meta->frag = skb_shinfo(skb)->frags[i]; + meta->id = netbk_gop_frag_flip(netif, meta, i + extra, npo, + skb_shinfo(skb)->frags[i].page, + skb_shinfo(skb)->frags[i].size, + skb_shinfo(skb)->frags[i].page_offset); + } - /* - * This must occur at the end to ensure that we don't trash skb_shinfo - * until we're done. We know that the head doesn't cross a page - * boundary because such packets get copied in netif_be_start_xmit. - */ - head_meta->id = netbk_gop_frag(netif, head_meta, 0, npo, - virt_to_page(skb->data), - skb_headlen(skb), - offset_in_page(skb->data)); + /* + * This must occur at the end to ensure that we don't + * trash skb_shinfo until we're done. We know that the + * head doesn't cross a page boundary because such + * packets get copied in netif_be_start_xmit. + */ + head_meta->id = netbk_gop_frag_flip(netif, head_meta, 0, npo, + virt_to_page(skb->data), + skb_headlen(skb), + offset_in_page(skb->data)); - netif->rx.req_cons += nr_frags + extra; + netif->rx.req_cons += nr_frags + extra; + } + return npo->meta_prod - old_meta_prod; } static inline void netbk_free_pages(int nr_frags, struct netbk_rx_meta *meta) @@ -497,7 +591,7 @@ static inline void netbk_free_pages(int nr_frags, struct netbk_rx_meta *meta) used to set up the operations on the top of netrx_pending_operations, which have since been done. Check that they didn't give any errors and advance over them. */ -static int netbk_check_gop(int nr_frags, domid_t domid, +static int netbk_check_gop(int nr_meta_slots, domid_t domid, struct netrx_pending_operations *npo) { multicall_entry_t *mcl; @@ -506,7 +600,7 @@ static int netbk_check_gop(int nr_frags, domid_t domid, int status = NETIF_RSP_OKAY; int i; - for (i = 0; i <= nr_frags; i++) { + for (i = 0; i < nr_meta_slots; i++) { if (npo->meta[npo->meta_cons + i].copy) { copy_op = npo->copy + npo->copy_cons++; if (copy_op->status != GNTST_okay) { @@ -541,29 +635,37 @@ static int netbk_check_gop(int nr_frags, domid_t domid, } static void netbk_add_frag_responses(netif_t *netif, int status, - struct netbk_rx_meta *meta, int nr_frags) + struct netbk_rx_meta *meta, + int nr_meta_slots) { int i; unsigned long offset; - for (i = 0; i < nr_frags; i++) { - int id = meta[i].id; - int flags = (i == nr_frags - 1) ? 0 : NETRXF_more_data; + for (i = 0; i < nr_meta_slots; i++) { + int flags; + if (i == nr_meta_slots - 1) + flags = 0; + else + flags = NETRXF_more_data; if (meta[i].copy) offset = 0; else offset = meta[i].frag.page_offset; - make_rx_response(netif, id, status, offset, - meta[i].frag.size, flags); + make_rx_response(netif, meta[i].id, status, offset, + meta[i].size, flags); } } +struct skb_cb_overlay { + int meta_slots_used; +}; + static void net_rx_action(unsigned long unused) { netif_t *netif = NULL; s8 status; - u16 id, irq, flags; + u16 irq, flags; netif_rx_response_t *resp; multicall_entry_t *mcl; struct sk_buff_head rxq; @@ -573,6 +675,7 @@ static void net_rx_action(unsigned long unused) int nr_frags; int count; unsigned long offset; + struct skb_cb_overlay *sco; /* * Putting hundreds of bytes on the stack is considered rude. @@ -598,11 +701,11 @@ static void net_rx_action(unsigned long unused) count = 0; while ((skb = skb_dequeue(&rx_queue)) != NULL) { + netif = netdev_priv(skb->dev); nr_frags = skb_shinfo(skb)->nr_frags; - *(int *)skb->cb = nr_frags; - if (!xen_feature(XENFEAT_auto_translated_physmap) && - !((netif_t *)netdev_priv(skb->dev))->copying_receiver && + if (!netif->copying_receiver && + !xen_feature(XENFEAT_auto_translated_physmap) && check_mfn(nr_frags + 1)) { /* Memory squeeze? Back off for an arbitrary while. */ if ( net_ratelimit() ) @@ -613,7 +716,8 @@ static void net_rx_action(unsigned long unused) break; } - netbk_gop_skb(skb, &npo); + sco = (struct skb_cb_overlay *)skb->cb; + sco->meta_slots_used = netbk_gop_skb(skb, &npo); count += nr_frags + 1; @@ -672,7 +776,7 @@ static void net_rx_action(unsigned long unused) BUG_ON(npo.mmu_mcl && npo.mcl[npo.mmu_mcl].result != 0); while ((skb = __skb_dequeue(&rxq)) != NULL) { - nr_frags = *(int *)skb->cb; + sco = (struct skb_cb_overlay *)skb->cb; netif = netdev_priv(skb->dev); /* We can't rely on skb_release_data to release the @@ -687,17 +791,20 @@ static void net_rx_action(unsigned long unused) atomic_set(&(skb_shinfo(skb)->dataref), 1); skb_shinfo(skb)->frag_list = NULL; skb_shinfo(skb)->nr_frags = 0; - netbk_free_pages(nr_frags, meta + npo.meta_cons + 1); + netbk_free_pages(sco->meta_slots_used - 1, + meta + npo.meta_cons + 1); } netif->stats.tx_bytes += skb->len; netif->stats.tx_packets++; - status = netbk_check_gop(nr_frags, netif->domid, &npo); - - id = meta[npo.meta_cons].id; - flags = nr_frags ? NETRXF_more_data : 0; + status = netbk_check_gop(sco->meta_slots_used, + netif->domid, &npo); + if (sco->meta_slots_used == 1) + flags = 0; + else + flags = NETRXF_more_data; if (skb->ip_summed == CHECKSUM_HW) /* local packet? */ flags |= NETRXF_csum_blank | NETRXF_data_validated; else if (skb->proto_data_valid) /* remote but checksummed? */ @@ -707,10 +814,12 @@ static void net_rx_action(unsigned long unused) offset = 0; else offset = offset_in_page(skb->data); - resp = make_rx_response(netif, id, status, offset, - skb_headlen(skb), flags); + resp = make_rx_response(netif, meta[npo.meta_cons].id, + status, offset, + meta[npo.meta_cons].size, + flags); - if (meta[npo.meta_cons].frag.size) { + if (meta[npo.meta_cons].gso_size) { struct netif_extra_info *gso = (struct netif_extra_info *) RING_GET_RESPONSE(&netif->rx, @@ -718,7 +827,7 @@ static void net_rx_action(unsigned long unused) resp->flags |= NETRXF_extra_info; - gso->u.gso.size = meta[npo.meta_cons].frag.size; + gso->u.gso.size = meta[npo.meta_cons].gso_size; gso->u.gso.type = XEN_NETIF_GSO_TYPE_TCPV4; gso->u.gso.pad = 0; gso->u.gso.features = 0; @@ -727,9 +836,11 @@ static void net_rx_action(unsigned long unused) gso->flags = 0; } - netbk_add_frag_responses(netif, status, - meta + npo.meta_cons + 1, - nr_frags); + if (sco->meta_slots_used > 1) { + netbk_add_frag_responses(netif, status, + meta + npo.meta_cons + 1, + sco->meta_slots_used - 1); + } RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&netif->rx, ret); irq = netif->irq; @@ -744,8 +855,8 @@ static void net_rx_action(unsigned long unused) netif_wake_queue(netif->dev); netif_put(netif); + npo.meta_cons += sco->meta_slots_used; dev_kfree_skb(skb); - npo.meta_cons += nr_frags + 1; } while (notify_nr != 0) { -- 2.39.5