ia64/xen-unstable

changeset 671:15a6d4d70e86

bitkeeper revision 1.386 (3f282cb1_39eb4QUC1O0T4BEwSH9Zg)

network.c, dev.c:
Reduce hypercalsl required for network transmission.
author kaf24@scramble.cl.cam.ac.uk
date Wed Jul 30 20:38:09 2003 +0000 (2003-07-30)
parents 0e5f8fd98576
children 48a3323d8b93
files xen/net/dev.c xenolinux-2.4.21-sparse/arch/xeno/drivers/network/network.c
line diff
     1.1 --- a/xen/net/dev.c	Wed Jul 30 18:57:39 2003 +0000
     1.2 +++ b/xen/net/dev.c	Wed Jul 30 20:38:09 2003 +0000
     1.3 @@ -53,9 +53,11 @@
     1.4  
     1.5  static struct sk_buff_head rx_skb_queue[NR_CPUS] __cacheline_aligned;
     1.6  
     1.7 -static void make_tx_response(net_vif_t *vif, 
     1.8 -                             unsigned short id, 
     1.9 -                             unsigned char  st);
    1.10 +static int get_tx_bufs(net_vif_t *vif);
    1.11 +
    1.12 +static void __make_tx_response(net_vif_t *vif, 
    1.13 +                               unsigned short id, 
    1.14 +                               unsigned char  st);
    1.15  static void make_rx_response(net_vif_t     *vif, 
    1.16                               unsigned short id, 
    1.17                               unsigned short size,
    1.18 @@ -722,28 +724,7 @@ static void add_to_net_schedule_list_tai
    1.19  }
    1.20  
    1.21  
    1.22 -/* Destructor function for tx skbs. */
    1.23 -static void tx_skb_release(struct sk_buff *skb)
    1.24 -{
    1.25 -    int i;
    1.26 -    net_vif_t *vif = skb->src_vif;
    1.27 -    unsigned long flags;
    1.28 -    
    1.29 -    spin_lock_irqsave(&vif->domain->page_lock, flags);
    1.30 -    for ( i = 0; i < skb_shinfo(skb)->nr_frags; i++ )
    1.31 -        put_page_tot(skb_shinfo(skb)->frags[i].page);
    1.32 -    spin_unlock_irqrestore(&vif->domain->page_lock, flags);
    1.33 -
    1.34 -    if ( skb->skb_type == SKB_NODATA )
    1.35 -        kmem_cache_free(net_header_cachep, skb->head);
    1.36 -
    1.37 -    skb_shinfo(skb)->nr_frags = 0; 
    1.38 -
    1.39 -    make_tx_response(vif, skb->guest_id, RING_STATUS_OK);
    1.40 -
    1.41 -    put_vif(vif);
    1.42 -}
    1.43 -
    1.44 +static void tx_skb_release(struct sk_buff *skb);
    1.45      
    1.46  static void net_tx_action(unsigned long unused)
    1.47  {
    1.48 @@ -762,12 +743,16 @@ static void net_tx_action(unsigned long 
    1.49          vif = list_entry(ent, net_vif_t, list);
    1.50          get_vif(vif);
    1.51          remove_from_net_schedule_list(vif);
    1.52 -        if ( vif->tx_cons == vif->tx_prod )
    1.53 +
    1.54 +        /* Check whether there are packets to be transmitted. */
    1.55 +        if ( (vif->tx_cons == vif->tx_prod) && !get_tx_bufs(vif) )
    1.56          {
    1.57              put_vif(vif);
    1.58              continue;
    1.59          }
    1.60  
    1.61 +        add_to_net_schedule_list_tail(vif);
    1.62 +
    1.63          if ( (skb = alloc_skb_nodata(GFP_ATOMIC)) == NULL )
    1.64          {
    1.65              printk("Out of memory in net_tx_action()!\n");
    1.66 @@ -779,8 +764,6 @@ static void net_tx_action(unsigned long 
    1.67          /* Pick an entry from the transmit queue. */
    1.68          tx = &vif->tx_shadow_ring[vif->tx_cons];
    1.69          vif->tx_cons = TX_RING_INC(vif->tx_cons);
    1.70 -        if ( vif->tx_cons != vif->tx_prod )
    1.71 -            add_to_net_schedule_list_tail(vif);
    1.72  
    1.73          skb->destructor = tx_skb_release;
    1.74  
    1.75 @@ -832,6 +815,37 @@ static inline void maybe_schedule_tx_act
    1.76  }
    1.77  
    1.78  
    1.79 +/* Destructor function for tx skbs. */
    1.80 +static void tx_skb_release(struct sk_buff *skb)
    1.81 +{
    1.82 +    int i;
    1.83 +    net_vif_t *vif = skb->src_vif;
    1.84 +    unsigned long flags;
    1.85 +    
    1.86 +    spin_lock_irqsave(&vif->domain->page_lock, flags);
    1.87 +    for ( i = 0; i < skb_shinfo(skb)->nr_frags; i++ )
    1.88 +        put_page_tot(skb_shinfo(skb)->frags[i].page);
    1.89 +    spin_unlock_irqrestore(&vif->domain->page_lock, flags);
    1.90 +
    1.91 +    if ( skb->skb_type == SKB_NODATA )
    1.92 +        kmem_cache_free(net_header_cachep, skb->head);
    1.93 +
    1.94 +    skb_shinfo(skb)->nr_frags = 0; 
    1.95 +
    1.96 +    spin_lock_irqsave(&vif->tx_lock, flags);
    1.97 +    __make_tx_response(vif, skb->guest_id, RING_STATUS_OK);
    1.98 +    spin_unlock_irqrestore(&vif->tx_lock, flags);
    1.99 +
   1.100 +    if ( (vif->tx_cons == vif->tx_prod) && get_tx_bufs(vif) )
   1.101 +    {
   1.102 +        add_to_net_schedule_list_tail(vif);
   1.103 +        maybe_schedule_tx_action();        
   1.104 +    }
   1.105 +
   1.106 +    put_vif(vif);
   1.107 +}
   1.108 +
   1.109 +
   1.110  /*
   1.111   *	We need this ioctl for efficient implementation of the
   1.112   *	if_indextoname() function required by the IPv6 API.  Without
   1.113 @@ -1788,6 +1802,159 @@ inline int init_tx_header(u8 *data, unsi
   1.114  }
   1.115  
   1.116  
   1.117 +static int get_tx_bufs(net_vif_t *vif)
   1.118 +{
   1.119 +    struct task_struct *p = vif->domain;
   1.120 +    net_idx_t          *shared_idxs  = vif->shared_idxs;
   1.121 +    net_ring_t         *shared_rings = vif->shared_rings;
   1.122 +    net_vif_t          *target;
   1.123 +    unsigned long       buf_pfn;
   1.124 +    struct pfn_info    *buf_page;
   1.125 +    u8                 *g_data;
   1.126 +    unsigned short      protocol;
   1.127 +    struct sk_buff     *skb;
   1.128 +    tx_req_entry_t      tx;
   1.129 +    int                 i, j, ret;
   1.130 +    unsigned long       flags;
   1.131 +
   1.132 +    if ( vif->tx_req_cons == shared_idxs->tx_req_prod )
   1.133 +        return 0;
   1.134 +
   1.135 +    spin_lock_irqsave(&vif->tx_lock, flags);
   1.136 +
   1.137 +    j = vif->tx_prod;
   1.138 +
   1.139 +    /*
   1.140 +     * Collect up new transmit buffers. We collect up to the guest OS's new 
   1.141 +     * producer index, but take care not to catch up with our own consumer 
   1.142 +     * index.
   1.143 +     */
   1.144 + again:
   1.145 +    for ( i = vif->tx_req_cons; 
   1.146 +          (i != shared_idxs->tx_req_prod) && 
   1.147 +              (((vif->tx_resp_prod-i) & (TX_RING_SIZE-1)) != 1); 
   1.148 +          i = TX_RING_INC(i) )
   1.149 +    {
   1.150 +        tx     = shared_rings->tx_ring[i].req;
   1.151 +        target = VIF_DROP;
   1.152 +
   1.153 +        if ( (tx.size < PKT_PROT_LEN) || (tx.size > ETH_FRAME_LEN) )
   1.154 +        {
   1.155 +            DPRINTK("Bad packet size: %d\n", tx.size);
   1.156 +            __make_tx_response(vif, tx.id, RING_STATUS_BAD_PAGE);
   1.157 +            continue; 
   1.158 +        }
   1.159 +
   1.160 +        /* No crossing a page boundary as the payload mustn't fragment. */
   1.161 +        if ( ((tx.addr & ~PAGE_MASK) + tx.size) >= PAGE_SIZE ) 
   1.162 +        {
   1.163 +            DPRINTK("tx.addr: %lx, size: %u, end: %lu\n", 
   1.164 +                    tx.addr, tx.size, (tx.addr &~PAGE_MASK) + tx.size);
   1.165 +            __make_tx_response(vif, tx.id, RING_STATUS_BAD_PAGE);
   1.166 +            continue;
   1.167 +        }
   1.168 +
   1.169 +        buf_pfn  = tx.addr >> PAGE_SHIFT;
   1.170 +        buf_page = frame_table + buf_pfn;
   1.171 +        spin_lock(&p->page_lock);
   1.172 +        if ( (buf_pfn >= max_page) || 
   1.173 +             ((buf_page->flags & PG_domain_mask) != p->domain) ) 
   1.174 +        {
   1.175 +            DPRINTK("Bad page frame\n");
   1.176 +            spin_unlock(&p->page_lock);
   1.177 +            __make_tx_response(vif, tx.id, RING_STATUS_BAD_PAGE);
   1.178 +            continue;
   1.179 +        }
   1.180 +            
   1.181 +        g_data = map_domain_mem(tx.addr);
   1.182 +
   1.183 +        protocol = __constant_htons(
   1.184 +            init_tx_header(g_data, tx.size, the_dev));
   1.185 +        if ( protocol == 0 )
   1.186 +        {
   1.187 +            __make_tx_response(vif, tx.id, RING_STATUS_BAD_PAGE);
   1.188 +            goto tx_unmap_and_continue;
   1.189 +        }
   1.190 +
   1.191 +        target = net_get_target_vif(g_data, tx.size, vif);
   1.192 +
   1.193 +        if ( VIF_LOCAL(target) )
   1.194 +        {
   1.195 +            /* Local delivery */
   1.196 +            if ( (skb = dev_alloc_skb(ETH_FRAME_LEN + 32)) == NULL )
   1.197 +            {
   1.198 +                __make_tx_response(vif, tx.id, RING_STATUS_BAD_PAGE);
   1.199 +                put_vif(target);
   1.200 +                goto tx_unmap_and_continue;
   1.201 +            }
   1.202 +
   1.203 +            skb->src_vif = vif;
   1.204 +            skb->dst_vif = target;
   1.205 +            skb->protocol = protocol;                
   1.206 +
   1.207 +            /*
   1.208 +             * We don't need a well-formed skb as netif_rx will fill these
   1.209 +             * fields in as necessary. All we actually need is the right
   1.210 +             * page offset in skb->data, and the right length in skb->len.
   1.211 +             * Note that the correct address/length *excludes* link header.
   1.212 +             */
   1.213 +            skb->head = (u8 *)map_domain_mem(
   1.214 +                ((skb->pf - frame_table) << PAGE_SHIFT));
   1.215 +            skb->data = skb->head + 18;
   1.216 +            memcpy(skb->data, g_data, tx.size);
   1.217 +            skb->data += ETH_HLEN;
   1.218 +            skb->len = tx.size - ETH_HLEN;
   1.219 +            unmap_domain_mem(skb->head);
   1.220 +
   1.221 +            netif_rx(skb);
   1.222 +
   1.223 +            __make_tx_response(vif, tx.id, RING_STATUS_OK);
   1.224 +        }
   1.225 +        else if ( (target == VIF_PHYS) || IS_PRIV(p) )
   1.226 +        {
   1.227 +            vif->tx_shadow_ring[j].id     = tx.id;
   1.228 +            vif->tx_shadow_ring[j].size   = tx.size;
   1.229 +            vif->tx_shadow_ring[j].header = 
   1.230 +                kmem_cache_alloc(net_header_cachep, GFP_KERNEL);
   1.231 +            if ( vif->tx_shadow_ring[j].header == NULL )
   1.232 +            { 
   1.233 +                __make_tx_response(vif, tx.id, RING_STATUS_OK);
   1.234 +                goto tx_unmap_and_continue;
   1.235 +            }
   1.236 +
   1.237 +            memcpy(vif->tx_shadow_ring[j].header, g_data, PKT_PROT_LEN);
   1.238 +            vif->tx_shadow_ring[j].payload = tx.addr + PKT_PROT_LEN;
   1.239 +            get_page_tot(buf_page);
   1.240 +            j = TX_RING_INC(j);
   1.241 +        }
   1.242 +        else
   1.243 +        {
   1.244 +            __make_tx_response(vif, tx.id, RING_STATUS_DROPPED);
   1.245 +        }
   1.246 +
   1.247 +    tx_unmap_and_continue:
   1.248 +        unmap_domain_mem(g_data);
   1.249 +        spin_unlock(&p->page_lock);
   1.250 +    }
   1.251 +
   1.252 +    /*
   1.253 +     * Needed as a final check for req_prod updates on another CPU.
   1.254 +     * Also ensures that other CPUs see shadow ring updates.
   1.255 +     */
   1.256 +    smp_mb();
   1.257 +
   1.258 +    if ( (vif->tx_req_cons = i) != shared_idxs->tx_req_prod )
   1.259 +        goto again;
   1.260 +
   1.261 +    if ( (ret = (vif->tx_prod != j)) )
   1.262 +        vif->tx_prod = j;
   1.263 +
   1.264 +    spin_unlock_irqrestore(&vif->tx_lock, flags);
   1.265 +
   1.266 +    return ret;
   1.267 +}
   1.268 +
   1.269 +
   1.270  /*
   1.271   * do_net_update:
   1.272   * 
   1.273 @@ -1801,15 +1968,10 @@ long do_net_update(void)
   1.274      net_vif_t *vif;
   1.275      net_idx_t *shared_idxs;
   1.276      unsigned int i, j, idx;
   1.277 -    struct sk_buff *skb;
   1.278 -    tx_req_entry_t tx;
   1.279      rx_req_entry_t rx;
   1.280 -    unsigned long pte_pfn, buf_pfn;
   1.281 +    unsigned long  pte_pfn, buf_pfn;
   1.282      struct pfn_info *pte_page, *buf_page;
   1.283      unsigned long *ptep;    
   1.284 -    net_vif_t *target;
   1.285 -    u8 *g_data;
   1.286 -    unsigned short protocol;
   1.287  
   1.288      perfc_incr(net_hypercalls);
   1.289  
   1.290 @@ -1825,125 +1987,8 @@ long do_net_update(void)
   1.291           * PHASE 1 -- TRANSMIT RING
   1.292           */
   1.293  
   1.294 -        /*
   1.295 -         * Collect up new transmit buffers. We collect up to the guest OS's
   1.296 -         * new producer index, but take care not to catch up with our own
   1.297 -         * consumer index.
   1.298 -         */
   1.299 -        j = vif->tx_prod;
   1.300 -        for ( i = vif->tx_req_cons; 
   1.301 -              (i != shared_idxs->tx_req_prod) && 
   1.302 -                  (((vif->tx_resp_prod-i) & (TX_RING_SIZE-1)) != 1); 
   1.303 -              i = TX_RING_INC(i) )
   1.304 +        if ( get_tx_bufs(vif) )
   1.305          {
   1.306 -            tx     = shared_rings->tx_ring[i].req;
   1.307 -            target = VIF_DROP;
   1.308 -
   1.309 -            if ( (tx.size < PKT_PROT_LEN) || (tx.size > ETH_FRAME_LEN) )
   1.310 -            {
   1.311 -                DPRINTK("Bad packet size: %d\n", tx.size);
   1.312 -                make_tx_response(vif, tx.id, RING_STATUS_BAD_PAGE);
   1.313 -                continue; 
   1.314 -            }
   1.315 -
   1.316 -            /* No crossing a page boundary as the payload mustn't fragment. */
   1.317 -            if ( ((tx.addr & ~PAGE_MASK) + tx.size) >= PAGE_SIZE ) 
   1.318 -            {
   1.319 -                DPRINTK("tx.addr: %lx, size: %u, end: %lu\n", 
   1.320 -                        tx.addr, tx.size, (tx.addr &~PAGE_MASK) + tx.size);
   1.321 -                make_tx_response(vif, tx.id, RING_STATUS_BAD_PAGE);
   1.322 -                continue;
   1.323 -            }
   1.324 -
   1.325 -            buf_pfn  = tx.addr >> PAGE_SHIFT;
   1.326 -            buf_page = frame_table + buf_pfn;
   1.327 -            spin_lock_irq(&current->page_lock);
   1.328 -            if ( (buf_pfn >= max_page) || 
   1.329 -                 ((buf_page->flags & PG_domain_mask) != current->domain) ) 
   1.330 -            {
   1.331 -                DPRINTK("Bad page frame\n");
   1.332 -                spin_unlock_irq(&current->page_lock);
   1.333 -                make_tx_response(vif, tx.id, RING_STATUS_BAD_PAGE);
   1.334 -                continue;
   1.335 -            }
   1.336 -            
   1.337 -            g_data = map_domain_mem(tx.addr);
   1.338 -
   1.339 -            protocol = __constant_htons(
   1.340 -                init_tx_header(g_data, tx.size, the_dev));
   1.341 -            if ( protocol == 0 )
   1.342 -            {
   1.343 -                make_tx_response(vif, tx.id, RING_STATUS_BAD_PAGE);
   1.344 -                goto tx_unmap_and_continue;
   1.345 -            }
   1.346 -
   1.347 -            target = net_get_target_vif(g_data, tx.size, vif);
   1.348 -
   1.349 -            if ( VIF_LOCAL(target) )
   1.350 -            {
   1.351 -                /* Local delivery */
   1.352 -                if ( (skb = dev_alloc_skb(ETH_FRAME_LEN + 32)) == NULL )
   1.353 -                {
   1.354 -                    make_tx_response(vif, tx.id, RING_STATUS_BAD_PAGE);
   1.355 -                    put_vif(target);
   1.356 -                    goto tx_unmap_and_continue;
   1.357 -                }
   1.358 -
   1.359 -                skb->src_vif = vif;
   1.360 -                skb->dst_vif = target;
   1.361 -                skb->protocol = protocol;                
   1.362 -
   1.363 -                /*
   1.364 -                 * We don't need a well-formed skb as netif_rx will fill these
   1.365 -                 * fields in as necessary. All we actually need is the right
   1.366 -                 * page offset in skb->data, and the right length in skb->len.
   1.367 -                 * Note that the correct address/length *excludes* link header.
   1.368 -                 */
   1.369 -                skb->head = (u8 *)map_domain_mem(
   1.370 -                    ((skb->pf - frame_table) << PAGE_SHIFT));
   1.371 -                skb->data = skb->head + 18;
   1.372 -                memcpy(skb->data, g_data, tx.size);
   1.373 -                skb->data += ETH_HLEN;
   1.374 -                skb->len = tx.size - ETH_HLEN;
   1.375 -                unmap_domain_mem(skb->head);
   1.376 -
   1.377 -                netif_rx(skb);
   1.378 -
   1.379 -                make_tx_response(vif, tx.id, RING_STATUS_OK);
   1.380 -            }
   1.381 -            else if ( (target == VIF_PHYS) || IS_PRIV(current) )
   1.382 -            {
   1.383 -                vif->tx_shadow_ring[j].id     = tx.id;
   1.384 -                vif->tx_shadow_ring[j].size   = tx.size;
   1.385 -                vif->tx_shadow_ring[j].header = 
   1.386 -                    kmem_cache_alloc(net_header_cachep, GFP_KERNEL);
   1.387 -                if ( vif->tx_shadow_ring[j].header == NULL )
   1.388 -                { 
   1.389 -                    make_tx_response(vif, tx.id, RING_STATUS_OK);
   1.390 -                    goto tx_unmap_and_continue;
   1.391 -                }
   1.392 -
   1.393 -                memcpy(vif->tx_shadow_ring[j].header, g_data, PKT_PROT_LEN);
   1.394 -                vif->tx_shadow_ring[j].payload = tx.addr + PKT_PROT_LEN;
   1.395 -                get_page_tot(buf_page);
   1.396 -                j = TX_RING_INC(j);
   1.397 -            }
   1.398 -            else
   1.399 -            {
   1.400 -                make_tx_response(vif, tx.id, RING_STATUS_DROPPED);
   1.401 -            }
   1.402 -
   1.403 -        tx_unmap_and_continue:
   1.404 -            unmap_domain_mem(g_data);
   1.405 -            spin_unlock_irq(&current->page_lock);
   1.406 -        }
   1.407 -
   1.408 -        vif->tx_req_cons = i;
   1.409 -
   1.410 -        if ( vif->tx_prod != j )
   1.411 -        {
   1.412 -            smp_mb(); /* Let other CPUs see new descriptors first. */
   1.413 -            vif->tx_prod = j;
   1.414              add_to_net_schedule_list_tail(vif);
   1.415              maybe_schedule_tx_action();
   1.416          }
   1.417 @@ -2037,16 +2082,14 @@ long do_net_update(void)
   1.418  }
   1.419  
   1.420  
   1.421 -static void make_tx_response(net_vif_t     *vif, 
   1.422 -                             unsigned short id, 
   1.423 -                             unsigned char  st)
   1.424 +static void __make_tx_response(net_vif_t     *vif, 
   1.425 +                               unsigned short id, 
   1.426 +                               unsigned char  st)
   1.427  {
   1.428 -    unsigned long flags;
   1.429      unsigned int pos;
   1.430      tx_resp_entry_t *resp;
   1.431  
   1.432      /* Place on the response ring for the relevant domain. */ 
   1.433 -    spin_lock_irqsave(&vif->tx_lock, flags);
   1.434      pos  = vif->tx_resp_prod;
   1.435      resp = &vif->shared_rings->tx_ring[pos].resp;
   1.436      resp->id     = id;
   1.437 @@ -2058,7 +2101,6 @@ static void make_tx_response(net_vif_t  
   1.438          unsigned long cpu_mask = mark_guest_event(vif->domain, _EVENT_NET);
   1.439          guest_event_notify(cpu_mask);    
   1.440      }
   1.441 -    spin_unlock_irqrestore(&vif->tx_lock, flags);
   1.442  }
   1.443  
   1.444  
     2.1 --- a/xenolinux-2.4.21-sparse/arch/xeno/drivers/network/network.c	Wed Jul 30 18:57:39 2003 +0000
     2.2 +++ b/xenolinux-2.4.21-sparse/arch/xeno/drivers/network/network.c	Wed Jul 30 20:38:09 2003 +0000
     2.3 @@ -317,7 +317,10 @@ static int network_start_xmit(struct sk_
     2.4      np->stats.tx_bytes += skb->len;
     2.5      np->stats.tx_packets++;
     2.6  
     2.7 -    HYPERVISOR_net_update();
     2.8 +    /* Only notify Xen if there are no outstanding responses. */
     2.9 +    smp_wmb();
    2.10 +    if ( np->net_idx->tx_resp_prod == i )
    2.11 +        HYPERVISOR_net_update();
    2.12  
    2.13      return 0;
    2.14  }