ia64/xen-unstable
changeset 671:15a6d4d70e86
bitkeeper revision 1.386 (3f282cb1_39eb4QUC1O0T4BEwSH9Zg)
network.c, dev.c:
Reduce hypercalsl required for network transmission.
network.c, dev.c:
Reduce hypercalsl required for network transmission.
author | kaf24@scramble.cl.cam.ac.uk |
---|---|
date | Wed Jul 30 20:38:09 2003 +0000 (2003-07-30) |
parents | 0e5f8fd98576 |
children | 48a3323d8b93 |
files | xen/net/dev.c xenolinux-2.4.21-sparse/arch/xeno/drivers/network/network.c |
line diff
1.1 --- a/xen/net/dev.c Wed Jul 30 18:57:39 2003 +0000 1.2 +++ b/xen/net/dev.c Wed Jul 30 20:38:09 2003 +0000 1.3 @@ -53,9 +53,11 @@ 1.4 1.5 static struct sk_buff_head rx_skb_queue[NR_CPUS] __cacheline_aligned; 1.6 1.7 -static void make_tx_response(net_vif_t *vif, 1.8 - unsigned short id, 1.9 - unsigned char st); 1.10 +static int get_tx_bufs(net_vif_t *vif); 1.11 + 1.12 +static void __make_tx_response(net_vif_t *vif, 1.13 + unsigned short id, 1.14 + unsigned char st); 1.15 static void make_rx_response(net_vif_t *vif, 1.16 unsigned short id, 1.17 unsigned short size, 1.18 @@ -722,28 +724,7 @@ static void add_to_net_schedule_list_tai 1.19 } 1.20 1.21 1.22 -/* Destructor function for tx skbs. */ 1.23 -static void tx_skb_release(struct sk_buff *skb) 1.24 -{ 1.25 - int i; 1.26 - net_vif_t *vif = skb->src_vif; 1.27 - unsigned long flags; 1.28 - 1.29 - spin_lock_irqsave(&vif->domain->page_lock, flags); 1.30 - for ( i = 0; i < skb_shinfo(skb)->nr_frags; i++ ) 1.31 - put_page_tot(skb_shinfo(skb)->frags[i].page); 1.32 - spin_unlock_irqrestore(&vif->domain->page_lock, flags); 1.33 - 1.34 - if ( skb->skb_type == SKB_NODATA ) 1.35 - kmem_cache_free(net_header_cachep, skb->head); 1.36 - 1.37 - skb_shinfo(skb)->nr_frags = 0; 1.38 - 1.39 - make_tx_response(vif, skb->guest_id, RING_STATUS_OK); 1.40 - 1.41 - put_vif(vif); 1.42 -} 1.43 - 1.44 +static void tx_skb_release(struct sk_buff *skb); 1.45 1.46 static void net_tx_action(unsigned long unused) 1.47 { 1.48 @@ -762,12 +743,16 @@ static void net_tx_action(unsigned long 1.49 vif = list_entry(ent, net_vif_t, list); 1.50 get_vif(vif); 1.51 remove_from_net_schedule_list(vif); 1.52 - if ( vif->tx_cons == vif->tx_prod ) 1.53 + 1.54 + /* Check whether there are packets to be transmitted. */ 1.55 + if ( (vif->tx_cons == vif->tx_prod) && !get_tx_bufs(vif) ) 1.56 { 1.57 put_vif(vif); 1.58 continue; 1.59 } 1.60 1.61 + add_to_net_schedule_list_tail(vif); 1.62 + 1.63 if ( (skb = alloc_skb_nodata(GFP_ATOMIC)) == NULL ) 1.64 { 1.65 printk("Out of memory in net_tx_action()!\n"); 1.66 @@ -779,8 +764,6 @@ static void net_tx_action(unsigned long 1.67 /* Pick an entry from the transmit queue. */ 1.68 tx = &vif->tx_shadow_ring[vif->tx_cons]; 1.69 vif->tx_cons = TX_RING_INC(vif->tx_cons); 1.70 - if ( vif->tx_cons != vif->tx_prod ) 1.71 - add_to_net_schedule_list_tail(vif); 1.72 1.73 skb->destructor = tx_skb_release; 1.74 1.75 @@ -832,6 +815,37 @@ static inline void maybe_schedule_tx_act 1.76 } 1.77 1.78 1.79 +/* Destructor function for tx skbs. */ 1.80 +static void tx_skb_release(struct sk_buff *skb) 1.81 +{ 1.82 + int i; 1.83 + net_vif_t *vif = skb->src_vif; 1.84 + unsigned long flags; 1.85 + 1.86 + spin_lock_irqsave(&vif->domain->page_lock, flags); 1.87 + for ( i = 0; i < skb_shinfo(skb)->nr_frags; i++ ) 1.88 + put_page_tot(skb_shinfo(skb)->frags[i].page); 1.89 + spin_unlock_irqrestore(&vif->domain->page_lock, flags); 1.90 + 1.91 + if ( skb->skb_type == SKB_NODATA ) 1.92 + kmem_cache_free(net_header_cachep, skb->head); 1.93 + 1.94 + skb_shinfo(skb)->nr_frags = 0; 1.95 + 1.96 + spin_lock_irqsave(&vif->tx_lock, flags); 1.97 + __make_tx_response(vif, skb->guest_id, RING_STATUS_OK); 1.98 + spin_unlock_irqrestore(&vif->tx_lock, flags); 1.99 + 1.100 + if ( (vif->tx_cons == vif->tx_prod) && get_tx_bufs(vif) ) 1.101 + { 1.102 + add_to_net_schedule_list_tail(vif); 1.103 + maybe_schedule_tx_action(); 1.104 + } 1.105 + 1.106 + put_vif(vif); 1.107 +} 1.108 + 1.109 + 1.110 /* 1.111 * We need this ioctl for efficient implementation of the 1.112 * if_indextoname() function required by the IPv6 API. Without 1.113 @@ -1788,6 +1802,159 @@ inline int init_tx_header(u8 *data, unsi 1.114 } 1.115 1.116 1.117 +static int get_tx_bufs(net_vif_t *vif) 1.118 +{ 1.119 + struct task_struct *p = vif->domain; 1.120 + net_idx_t *shared_idxs = vif->shared_idxs; 1.121 + net_ring_t *shared_rings = vif->shared_rings; 1.122 + net_vif_t *target; 1.123 + unsigned long buf_pfn; 1.124 + struct pfn_info *buf_page; 1.125 + u8 *g_data; 1.126 + unsigned short protocol; 1.127 + struct sk_buff *skb; 1.128 + tx_req_entry_t tx; 1.129 + int i, j, ret; 1.130 + unsigned long flags; 1.131 + 1.132 + if ( vif->tx_req_cons == shared_idxs->tx_req_prod ) 1.133 + return 0; 1.134 + 1.135 + spin_lock_irqsave(&vif->tx_lock, flags); 1.136 + 1.137 + j = vif->tx_prod; 1.138 + 1.139 + /* 1.140 + * Collect up new transmit buffers. We collect up to the guest OS's new 1.141 + * producer index, but take care not to catch up with our own consumer 1.142 + * index. 1.143 + */ 1.144 + again: 1.145 + for ( i = vif->tx_req_cons; 1.146 + (i != shared_idxs->tx_req_prod) && 1.147 + (((vif->tx_resp_prod-i) & (TX_RING_SIZE-1)) != 1); 1.148 + i = TX_RING_INC(i) ) 1.149 + { 1.150 + tx = shared_rings->tx_ring[i].req; 1.151 + target = VIF_DROP; 1.152 + 1.153 + if ( (tx.size < PKT_PROT_LEN) || (tx.size > ETH_FRAME_LEN) ) 1.154 + { 1.155 + DPRINTK("Bad packet size: %d\n", tx.size); 1.156 + __make_tx_response(vif, tx.id, RING_STATUS_BAD_PAGE); 1.157 + continue; 1.158 + } 1.159 + 1.160 + /* No crossing a page boundary as the payload mustn't fragment. */ 1.161 + if ( ((tx.addr & ~PAGE_MASK) + tx.size) >= PAGE_SIZE ) 1.162 + { 1.163 + DPRINTK("tx.addr: %lx, size: %u, end: %lu\n", 1.164 + tx.addr, tx.size, (tx.addr &~PAGE_MASK) + tx.size); 1.165 + __make_tx_response(vif, tx.id, RING_STATUS_BAD_PAGE); 1.166 + continue; 1.167 + } 1.168 + 1.169 + buf_pfn = tx.addr >> PAGE_SHIFT; 1.170 + buf_page = frame_table + buf_pfn; 1.171 + spin_lock(&p->page_lock); 1.172 + if ( (buf_pfn >= max_page) || 1.173 + ((buf_page->flags & PG_domain_mask) != p->domain) ) 1.174 + { 1.175 + DPRINTK("Bad page frame\n"); 1.176 + spin_unlock(&p->page_lock); 1.177 + __make_tx_response(vif, tx.id, RING_STATUS_BAD_PAGE); 1.178 + continue; 1.179 + } 1.180 + 1.181 + g_data = map_domain_mem(tx.addr); 1.182 + 1.183 + protocol = __constant_htons( 1.184 + init_tx_header(g_data, tx.size, the_dev)); 1.185 + if ( protocol == 0 ) 1.186 + { 1.187 + __make_tx_response(vif, tx.id, RING_STATUS_BAD_PAGE); 1.188 + goto tx_unmap_and_continue; 1.189 + } 1.190 + 1.191 + target = net_get_target_vif(g_data, tx.size, vif); 1.192 + 1.193 + if ( VIF_LOCAL(target) ) 1.194 + { 1.195 + /* Local delivery */ 1.196 + if ( (skb = dev_alloc_skb(ETH_FRAME_LEN + 32)) == NULL ) 1.197 + { 1.198 + __make_tx_response(vif, tx.id, RING_STATUS_BAD_PAGE); 1.199 + put_vif(target); 1.200 + goto tx_unmap_and_continue; 1.201 + } 1.202 + 1.203 + skb->src_vif = vif; 1.204 + skb->dst_vif = target; 1.205 + skb->protocol = protocol; 1.206 + 1.207 + /* 1.208 + * We don't need a well-formed skb as netif_rx will fill these 1.209 + * fields in as necessary. All we actually need is the right 1.210 + * page offset in skb->data, and the right length in skb->len. 1.211 + * Note that the correct address/length *excludes* link header. 1.212 + */ 1.213 + skb->head = (u8 *)map_domain_mem( 1.214 + ((skb->pf - frame_table) << PAGE_SHIFT)); 1.215 + skb->data = skb->head + 18; 1.216 + memcpy(skb->data, g_data, tx.size); 1.217 + skb->data += ETH_HLEN; 1.218 + skb->len = tx.size - ETH_HLEN; 1.219 + unmap_domain_mem(skb->head); 1.220 + 1.221 + netif_rx(skb); 1.222 + 1.223 + __make_tx_response(vif, tx.id, RING_STATUS_OK); 1.224 + } 1.225 + else if ( (target == VIF_PHYS) || IS_PRIV(p) ) 1.226 + { 1.227 + vif->tx_shadow_ring[j].id = tx.id; 1.228 + vif->tx_shadow_ring[j].size = tx.size; 1.229 + vif->tx_shadow_ring[j].header = 1.230 + kmem_cache_alloc(net_header_cachep, GFP_KERNEL); 1.231 + if ( vif->tx_shadow_ring[j].header == NULL ) 1.232 + { 1.233 + __make_tx_response(vif, tx.id, RING_STATUS_OK); 1.234 + goto tx_unmap_and_continue; 1.235 + } 1.236 + 1.237 + memcpy(vif->tx_shadow_ring[j].header, g_data, PKT_PROT_LEN); 1.238 + vif->tx_shadow_ring[j].payload = tx.addr + PKT_PROT_LEN; 1.239 + get_page_tot(buf_page); 1.240 + j = TX_RING_INC(j); 1.241 + } 1.242 + else 1.243 + { 1.244 + __make_tx_response(vif, tx.id, RING_STATUS_DROPPED); 1.245 + } 1.246 + 1.247 + tx_unmap_and_continue: 1.248 + unmap_domain_mem(g_data); 1.249 + spin_unlock(&p->page_lock); 1.250 + } 1.251 + 1.252 + /* 1.253 + * Needed as a final check for req_prod updates on another CPU. 1.254 + * Also ensures that other CPUs see shadow ring updates. 1.255 + */ 1.256 + smp_mb(); 1.257 + 1.258 + if ( (vif->tx_req_cons = i) != shared_idxs->tx_req_prod ) 1.259 + goto again; 1.260 + 1.261 + if ( (ret = (vif->tx_prod != j)) ) 1.262 + vif->tx_prod = j; 1.263 + 1.264 + spin_unlock_irqrestore(&vif->tx_lock, flags); 1.265 + 1.266 + return ret; 1.267 +} 1.268 + 1.269 + 1.270 /* 1.271 * do_net_update: 1.272 * 1.273 @@ -1801,15 +1968,10 @@ long do_net_update(void) 1.274 net_vif_t *vif; 1.275 net_idx_t *shared_idxs; 1.276 unsigned int i, j, idx; 1.277 - struct sk_buff *skb; 1.278 - tx_req_entry_t tx; 1.279 rx_req_entry_t rx; 1.280 - unsigned long pte_pfn, buf_pfn; 1.281 + unsigned long pte_pfn, buf_pfn; 1.282 struct pfn_info *pte_page, *buf_page; 1.283 unsigned long *ptep; 1.284 - net_vif_t *target; 1.285 - u8 *g_data; 1.286 - unsigned short protocol; 1.287 1.288 perfc_incr(net_hypercalls); 1.289 1.290 @@ -1825,125 +1987,8 @@ long do_net_update(void) 1.291 * PHASE 1 -- TRANSMIT RING 1.292 */ 1.293 1.294 - /* 1.295 - * Collect up new transmit buffers. We collect up to the guest OS's 1.296 - * new producer index, but take care not to catch up with our own 1.297 - * consumer index. 1.298 - */ 1.299 - j = vif->tx_prod; 1.300 - for ( i = vif->tx_req_cons; 1.301 - (i != shared_idxs->tx_req_prod) && 1.302 - (((vif->tx_resp_prod-i) & (TX_RING_SIZE-1)) != 1); 1.303 - i = TX_RING_INC(i) ) 1.304 + if ( get_tx_bufs(vif) ) 1.305 { 1.306 - tx = shared_rings->tx_ring[i].req; 1.307 - target = VIF_DROP; 1.308 - 1.309 - if ( (tx.size < PKT_PROT_LEN) || (tx.size > ETH_FRAME_LEN) ) 1.310 - { 1.311 - DPRINTK("Bad packet size: %d\n", tx.size); 1.312 - make_tx_response(vif, tx.id, RING_STATUS_BAD_PAGE); 1.313 - continue; 1.314 - } 1.315 - 1.316 - /* No crossing a page boundary as the payload mustn't fragment. */ 1.317 - if ( ((tx.addr & ~PAGE_MASK) + tx.size) >= PAGE_SIZE ) 1.318 - { 1.319 - DPRINTK("tx.addr: %lx, size: %u, end: %lu\n", 1.320 - tx.addr, tx.size, (tx.addr &~PAGE_MASK) + tx.size); 1.321 - make_tx_response(vif, tx.id, RING_STATUS_BAD_PAGE); 1.322 - continue; 1.323 - } 1.324 - 1.325 - buf_pfn = tx.addr >> PAGE_SHIFT; 1.326 - buf_page = frame_table + buf_pfn; 1.327 - spin_lock_irq(¤t->page_lock); 1.328 - if ( (buf_pfn >= max_page) || 1.329 - ((buf_page->flags & PG_domain_mask) != current->domain) ) 1.330 - { 1.331 - DPRINTK("Bad page frame\n"); 1.332 - spin_unlock_irq(¤t->page_lock); 1.333 - make_tx_response(vif, tx.id, RING_STATUS_BAD_PAGE); 1.334 - continue; 1.335 - } 1.336 - 1.337 - g_data = map_domain_mem(tx.addr); 1.338 - 1.339 - protocol = __constant_htons( 1.340 - init_tx_header(g_data, tx.size, the_dev)); 1.341 - if ( protocol == 0 ) 1.342 - { 1.343 - make_tx_response(vif, tx.id, RING_STATUS_BAD_PAGE); 1.344 - goto tx_unmap_and_continue; 1.345 - } 1.346 - 1.347 - target = net_get_target_vif(g_data, tx.size, vif); 1.348 - 1.349 - if ( VIF_LOCAL(target) ) 1.350 - { 1.351 - /* Local delivery */ 1.352 - if ( (skb = dev_alloc_skb(ETH_FRAME_LEN + 32)) == NULL ) 1.353 - { 1.354 - make_tx_response(vif, tx.id, RING_STATUS_BAD_PAGE); 1.355 - put_vif(target); 1.356 - goto tx_unmap_and_continue; 1.357 - } 1.358 - 1.359 - skb->src_vif = vif; 1.360 - skb->dst_vif = target; 1.361 - skb->protocol = protocol; 1.362 - 1.363 - /* 1.364 - * We don't need a well-formed skb as netif_rx will fill these 1.365 - * fields in as necessary. All we actually need is the right 1.366 - * page offset in skb->data, and the right length in skb->len. 1.367 - * Note that the correct address/length *excludes* link header. 1.368 - */ 1.369 - skb->head = (u8 *)map_domain_mem( 1.370 - ((skb->pf - frame_table) << PAGE_SHIFT)); 1.371 - skb->data = skb->head + 18; 1.372 - memcpy(skb->data, g_data, tx.size); 1.373 - skb->data += ETH_HLEN; 1.374 - skb->len = tx.size - ETH_HLEN; 1.375 - unmap_domain_mem(skb->head); 1.376 - 1.377 - netif_rx(skb); 1.378 - 1.379 - make_tx_response(vif, tx.id, RING_STATUS_OK); 1.380 - } 1.381 - else if ( (target == VIF_PHYS) || IS_PRIV(current) ) 1.382 - { 1.383 - vif->tx_shadow_ring[j].id = tx.id; 1.384 - vif->tx_shadow_ring[j].size = tx.size; 1.385 - vif->tx_shadow_ring[j].header = 1.386 - kmem_cache_alloc(net_header_cachep, GFP_KERNEL); 1.387 - if ( vif->tx_shadow_ring[j].header == NULL ) 1.388 - { 1.389 - make_tx_response(vif, tx.id, RING_STATUS_OK); 1.390 - goto tx_unmap_and_continue; 1.391 - } 1.392 - 1.393 - memcpy(vif->tx_shadow_ring[j].header, g_data, PKT_PROT_LEN); 1.394 - vif->tx_shadow_ring[j].payload = tx.addr + PKT_PROT_LEN; 1.395 - get_page_tot(buf_page); 1.396 - j = TX_RING_INC(j); 1.397 - } 1.398 - else 1.399 - { 1.400 - make_tx_response(vif, tx.id, RING_STATUS_DROPPED); 1.401 - } 1.402 - 1.403 - tx_unmap_and_continue: 1.404 - unmap_domain_mem(g_data); 1.405 - spin_unlock_irq(¤t->page_lock); 1.406 - } 1.407 - 1.408 - vif->tx_req_cons = i; 1.409 - 1.410 - if ( vif->tx_prod != j ) 1.411 - { 1.412 - smp_mb(); /* Let other CPUs see new descriptors first. */ 1.413 - vif->tx_prod = j; 1.414 add_to_net_schedule_list_tail(vif); 1.415 maybe_schedule_tx_action(); 1.416 } 1.417 @@ -2037,16 +2082,14 @@ long do_net_update(void) 1.418 } 1.419 1.420 1.421 -static void make_tx_response(net_vif_t *vif, 1.422 - unsigned short id, 1.423 - unsigned char st) 1.424 +static void __make_tx_response(net_vif_t *vif, 1.425 + unsigned short id, 1.426 + unsigned char st) 1.427 { 1.428 - unsigned long flags; 1.429 unsigned int pos; 1.430 tx_resp_entry_t *resp; 1.431 1.432 /* Place on the response ring for the relevant domain. */ 1.433 - spin_lock_irqsave(&vif->tx_lock, flags); 1.434 pos = vif->tx_resp_prod; 1.435 resp = &vif->shared_rings->tx_ring[pos].resp; 1.436 resp->id = id; 1.437 @@ -2058,7 +2101,6 @@ static void make_tx_response(net_vif_t 1.438 unsigned long cpu_mask = mark_guest_event(vif->domain, _EVENT_NET); 1.439 guest_event_notify(cpu_mask); 1.440 } 1.441 - spin_unlock_irqrestore(&vif->tx_lock, flags); 1.442 } 1.443 1.444
2.1 --- a/xenolinux-2.4.21-sparse/arch/xeno/drivers/network/network.c Wed Jul 30 18:57:39 2003 +0000 2.2 +++ b/xenolinux-2.4.21-sparse/arch/xeno/drivers/network/network.c Wed Jul 30 20:38:09 2003 +0000 2.3 @@ -317,7 +317,10 @@ static int network_start_xmit(struct sk_ 2.4 np->stats.tx_bytes += skb->len; 2.5 np->stats.tx_packets++; 2.6 2.7 - HYPERVISOR_net_update(); 2.8 + /* Only notify Xen if there are no outstanding responses. */ 2.9 + smp_wmb(); 2.10 + if ( np->net_idx->tx_resp_prod == i ) 2.11 + HYPERVISOR_net_update(); 2.12 2.13 return 0; 2.14 }