ia64/xen-unstable

view xenolinux-2.4.22-sparse/arch/xeno/drivers/network/network.c @ 779:30c521db4c71

bitkeeper revision 1.475 (3f796d3bblNbmAFIYOUxNwNgsLILNg)

network.c, dev.c, keyhandler.c:
Stronger SMP memory barriers in the newtork code.
author kaf24@scramble.cl.cam.ac.uk
date Tue Sep 30 11:47:07 2003 +0000 (2003-09-30)
parents 5e39ada9640f
children 02306208d767
line source
1 /******************************************************************************
2 * network.c
3 *
4 * Virtual network driver for XenoLinux.
5 *
6 * Copyright (c) 2002-2003, K A Fraser
7 */
9 #include <linux/config.h>
10 #include <linux/module.h>
12 #include <linux/kernel.h>
13 #include <linux/sched.h>
14 #include <linux/slab.h>
15 #include <linux/string.h>
16 #include <linux/errno.h>
18 #include <linux/netdevice.h>
19 #include <linux/inetdevice.h>
20 #include <linux/etherdevice.h>
21 #include <linux/skbuff.h>
22 #include <linux/init.h>
24 #include <asm/io.h>
25 #include <net/sock.h>
26 #include <net/pkt_sched.h>
28 #define NET_IRQ _EVENT_NET
30 #define TX_MAX_ENTRIES (TX_RING_SIZE - 2)
31 #define RX_MAX_ENTRIES (RX_RING_SIZE - 2)
33 #define TX_RING_INC(_i) (((_i)+1) & (TX_RING_SIZE-1))
34 #define RX_RING_INC(_i) (((_i)+1) & (RX_RING_SIZE-1))
35 #define TX_RING_ADD(_i,_j) (((_i)+(_j)) & (TX_RING_SIZE-1))
36 #define RX_RING_ADD(_i,_j) (((_i)+(_j)) & (RX_RING_SIZE-1))
38 #define RX_BUF_SIZE ((PAGE_SIZE/2)+1) /* Fool the slab allocator :-) */
40 static void network_interrupt(int irq, void *dev_id, struct pt_regs *ptregs);
41 static void network_tx_buf_gc(struct net_device *dev);
42 static void network_alloc_rx_buffers(struct net_device *dev);
43 static void network_free_rx_buffers(struct net_device *dev);
44 static void cleanup_module(void);
46 static struct list_head dev_list;
48 /*
49 * Needed because network_close() is not properly implemented yet. So
50 * an open after a close needs to do much less than the initial open.
51 */
52 static int opened_once_already = 0;
54 struct net_private
55 {
56 struct list_head list;
57 struct net_device *dev;
59 struct net_device_stats stats;
60 atomic_t tx_entries;
61 unsigned int rx_resp_cons, tx_resp_cons, tx_full;
62 net_ring_t *net_ring;
63 net_idx_t *net_idx;
64 spinlock_t tx_lock;
65 unsigned int idx; /* Domain-specific index of this VIF. */
67 unsigned int rx_bufs_to_notify;
69 /*
70 * {tx,rx}_skbs store outstanding skbuffs. The first entry in each
71 * array is an index into a chain of free entries.
72 */
73 struct sk_buff *tx_skbs[TX_RING_SIZE];
74 struct sk_buff *rx_skbs[RX_RING_SIZE];
75 };
77 /* Access macros for acquiring freeing slots in {tx,rx}_skbs[]. */
78 #define ADD_ID_TO_FREELIST(_list, _id) \
79 (_list)[(_id)] = (_list)[0]; \
80 (_list)[0] = (void *)(unsigned long)(_id);
81 #define GET_ID_FROM_FREELIST(_list) \
82 ({ unsigned long _id = (unsigned long)(_list)[0]; \
83 (_list)[0] = (_list)[_id]; \
84 _id; })
87 static void dbg_network_int(int irq, void *dev_id, struct pt_regs *ptregs)
88 {
89 struct net_device *dev = (struct net_device *)dev_id;
90 struct net_private *np = dev->priv;
91 printk(KERN_ALERT "tx_full = %d, tx_entries = %d, tx_resp_cons = %d,"
92 " tx_req_prod = %d, tx_resp_prod = %d, tx_event = %d, state=%d\n",
93 np->tx_full, atomic_read(&np->tx_entries), np->tx_resp_cons,
94 np->net_idx->tx_req_prod, np->net_idx->tx_resp_prod,
95 np->net_idx->tx_event,
96 test_bit(__LINK_STATE_XOFF, &dev->state));
97 printk(KERN_ALERT "rx_resp_cons = %d,"
98 " rx_req_prod = %d, rx_resp_prod = %d, rx_event = %d\n",
99 np->rx_resp_cons, np->net_idx->rx_req_prod,
100 np->net_idx->rx_resp_prod, np->net_idx->rx_event);
102 show_registers(ptregs);
103 }
106 static int network_open(struct net_device *dev)
107 {
108 struct net_private *np = dev->priv;
109 int i, error = 0;
111 if ( opened_once_already )
112 {
113 memset(&np->stats, 0, sizeof(np->stats));
114 netif_start_queue(dev);
115 return 0;
116 }
118 np->rx_bufs_to_notify = 0;
119 np->rx_resp_cons = np->tx_resp_cons = np->tx_full = 0;
120 memset(&np->stats, 0, sizeof(np->stats));
121 spin_lock_init(&np->tx_lock);
122 atomic_set(&np->tx_entries, 0);
123 memset(np->net_ring, 0, sizeof(*np->net_ring));
124 memset(np->net_idx, 0, sizeof(*np->net_idx));
126 /* Initialise {tx,rx}_skbs to be a free chain containing every entry. */
127 for ( i = 0; i < TX_RING_SIZE; i++ )
128 np->tx_skbs[i] = (void *)(i+1);
129 for ( i = 0; i < RX_RING_SIZE; i++ )
130 np->rx_skbs[i] = (void *)(i+1);
132 network_alloc_rx_buffers(dev);
134 error = request_irq(NET_IRQ, network_interrupt,
135 SA_SAMPLE_RANDOM, "network", dev);
136 if ( error )
137 {
138 printk(KERN_WARNING "%s: Could not allocate network interrupt\n",
139 dev->name);
140 network_free_rx_buffers(dev);
141 goto fail;
142 }
144 error = request_irq(_EVENT_DEBUG, dbg_network_int, SA_SHIRQ,
145 "debug", dev);
146 if ( error )
147 {
148 printk(KERN_WARNING "%s: Non-fatal error -- no debug interrupt\n",
149 dev->name);
150 }
152 printk("XenoLinux Virtual Network Driver installed as %s\n", dev->name);
154 netif_start_queue(dev);
156 MOD_INC_USE_COUNT;
158 opened_once_already = 1;
160 return 0;
162 fail:
163 kfree(np);
164 return error;
165 }
168 static void network_tx_buf_gc(struct net_device *dev)
169 {
170 unsigned int i;
171 struct net_private *np = dev->priv;
172 struct sk_buff *skb;
173 unsigned int prod;
174 tx_entry_t *tx_ring = np->net_ring->tx_ring;
176 do {
177 prod = np->net_idx->tx_resp_prod;
179 for ( i = np->tx_resp_cons; i != prod; i = TX_RING_INC(i) )
180 {
181 skb = np->tx_skbs[tx_ring[i].resp.id];
182 ADD_ID_TO_FREELIST(np->tx_skbs, tx_ring[i].resp.id);
183 dev_kfree_skb_any(skb);
184 atomic_dec(&np->tx_entries);
185 }
187 np->tx_resp_cons = prod;
189 /* Set a new event, then check for race with update of tx_cons. */
190 np->net_idx->tx_event =
191 TX_RING_ADD(prod, (atomic_read(&np->tx_entries)>>1) + 1);
192 mb();
193 }
194 while ( prod != np->net_idx->tx_resp_prod );
196 if ( np->tx_full && (atomic_read(&np->tx_entries) < TX_MAX_ENTRIES) )
197 {
198 np->tx_full = 0;
199 netif_wake_queue(dev);
200 }
201 }
204 static inline pte_t *get_ppte(void *addr)
205 {
206 pgd_t *pgd; pmd_t *pmd; pte_t *pte;
207 pgd = pgd_offset_k( (unsigned long)addr);
208 pmd = pmd_offset(pgd, (unsigned long)addr);
209 pte = pte_offset(pmd, (unsigned long)addr);
210 return pte;
211 }
214 static void network_alloc_rx_buffers(struct net_device *dev)
215 {
216 unsigned int i, id;
217 struct net_private *np = dev->priv;
218 struct sk_buff *skb;
219 unsigned int end = RX_RING_ADD(np->rx_resp_cons, RX_MAX_ENTRIES);
221 if ( (i = np->net_idx->rx_req_prod) == end )
222 return;
224 do {
225 skb = dev_alloc_skb(RX_BUF_SIZE);
226 if ( skb == NULL ) break;
227 skb->dev = dev;
229 id = GET_ID_FROM_FREELIST(np->rx_skbs);
230 np->rx_skbs[id] = skb;
232 np->net_ring->rx_ring[i].req.id = (unsigned short)id;
233 np->net_ring->rx_ring[i].req.addr =
234 virt_to_machine(get_ppte(skb->head));
236 np->rx_bufs_to_notify++;
237 }
238 while ( (i = RX_RING_INC(i)) != end );
240 /*
241 * We may have allocated buffers which have entries outstanding in the page
242 * update queue -- make sure we flush those first!
243 */
244 flush_page_update_queue();
246 np->net_idx->rx_req_prod = i;
247 np->net_idx->rx_event = RX_RING_INC(np->rx_resp_cons);
249 /* Batch Xen notifications. */
250 if ( np->rx_bufs_to_notify > (RX_MAX_ENTRIES/4) )
251 {
252 HYPERVISOR_net_update();
253 np->rx_bufs_to_notify = 0;
254 }
255 }
258 static void network_free_rx_buffers(struct net_device *dev)
259 {
260 unsigned int i;
261 struct net_private *np = dev->priv;
262 struct sk_buff *skb;
264 for ( i = np->rx_resp_cons;
265 i != np->net_idx->rx_req_prod;
266 i = RX_RING_INC(i) )
267 {
268 skb = np->rx_skbs[np->net_ring->rx_ring[i].req.id];
269 dev_kfree_skb_any(skb);
270 }
271 }
273 static int network_start_xmit(struct sk_buff *skb, struct net_device *dev)
274 {
275 unsigned int i, id;
276 struct net_private *np = (struct net_private *)dev->priv;
278 if ( np->tx_full )
279 {
280 printk(KERN_ALERT "%s: full queue wasn't stopped!\n", dev->name);
281 netif_stop_queue(dev);
282 return -ENOBUFS;
283 }
285 if ( (((unsigned long)skb->data & ~PAGE_MASK) + skb->len) >= PAGE_SIZE )
286 {
287 struct sk_buff *new_skb = dev_alloc_skb(RX_BUF_SIZE);
288 if ( new_skb == NULL ) return 1;
289 skb_put(new_skb, skb->len);
290 memcpy(new_skb->data, skb->data, skb->len);
291 dev_kfree_skb(skb);
292 skb = new_skb;
293 }
295 spin_lock_irq(&np->tx_lock);
297 i = np->net_idx->tx_req_prod;
299 id = GET_ID_FROM_FREELIST(np->tx_skbs);
300 np->tx_skbs[id] = skb;
302 np->net_ring->tx_ring[i].req.id = (unsigned short)id;
303 np->net_ring->tx_ring[i].req.addr =
304 phys_to_machine(virt_to_phys(skb->data));
305 np->net_ring->tx_ring[i].req.size = skb->len;
306 np->net_idx->tx_req_prod = TX_RING_INC(i);
307 atomic_inc(&np->tx_entries);
309 network_tx_buf_gc(dev);
311 if ( atomic_read(&np->tx_entries) >= TX_MAX_ENTRIES )
312 {
313 np->tx_full = 1;
314 netif_stop_queue(dev);
315 }
317 spin_unlock_irq(&np->tx_lock);
319 np->stats.tx_bytes += skb->len;
320 np->stats.tx_packets++;
322 /* Only notify Xen if there are no outstanding responses. */
323 mb();
324 if ( np->net_idx->tx_resp_prod == i )
325 HYPERVISOR_net_update();
327 return 0;
328 }
331 static void network_interrupt(int irq, void *dev_id, struct pt_regs *ptregs)
332 {
333 unsigned int i;
334 unsigned long flags;
335 struct net_device *dev = (struct net_device *)dev_id;
336 struct net_private *np = dev->priv;
337 struct sk_buff *skb;
338 rx_resp_entry_t *rx;
340 spin_lock_irqsave(&np->tx_lock, flags);
341 network_tx_buf_gc(dev);
342 spin_unlock_irqrestore(&np->tx_lock, flags);
344 again:
345 for ( i = np->rx_resp_cons;
346 i != np->net_idx->rx_resp_prod;
347 i = RX_RING_INC(i) )
348 {
349 rx = &np->net_ring->rx_ring[i].resp;
351 skb = np->rx_skbs[rx->id];
352 ADD_ID_TO_FREELIST(np->rx_skbs, rx->id);
354 if ( rx->status != RING_STATUS_OK )
355 {
356 printk(KERN_ALERT "bad buffer on RX ring!(%d)\n", rx->status);
357 dev_kfree_skb_any(skb);
358 continue;
359 }
361 /*
362 * Set up shinfo -- from alloc_skb This was particularily nasty: the
363 * shared info is hidden at the back of the data area (presumably so it
364 * can be shared), but on page flip it gets very spunked.
365 */
366 atomic_set(&(skb_shinfo(skb)->dataref), 1);
367 skb_shinfo(skb)->nr_frags = 0;
368 skb_shinfo(skb)->frag_list = NULL;
370 phys_to_machine_mapping[virt_to_phys(skb->head) >> PAGE_SHIFT] =
371 (*(unsigned long *)get_ppte(skb->head)) >> PAGE_SHIFT;
373 if ( rx->offset < 16 )
374 {
375 printk(KERN_ALERT "need pkt offset >= 16 (got %d)\n", rx->offset);
376 dev_kfree_skb_any(skb);
377 continue;
378 }
380 skb_reserve(skb, rx->offset - 16);
382 skb_put(skb, rx->size);
383 skb->protocol = eth_type_trans(skb, dev);
385 np->stats.rx_packets++;
387 np->stats.rx_bytes += rx->size;
388 netif_rx(skb);
389 dev->last_rx = jiffies;
390 }
392 np->rx_resp_cons = i;
394 network_alloc_rx_buffers(dev);
396 /* Deal with hypervisor racing our resetting of rx_event. */
397 mb();
398 if ( np->net_idx->rx_resp_prod != i ) goto again;
399 }
402 int network_close(struct net_device *dev)
403 {
404 netif_stop_queue(dev);
406 /*
407 * XXXX This cannot be done safely until be have a proper interface
408 * for setting up and tearing down virtual interfaces on the fly.
409 * Currently the receive buffers are locked down by Xen and we have
410 * no sensible way of retrieving them.
411 */
412 #if 0
413 free_irq(NET_IRQ, dev);
415 network_free_rx_buffers(dev);
416 kfree(np->net_ring->rx_ring);
417 kfree(np->net_ring->tx_ring);
419 MOD_DEC_USE_COUNT;
420 #endif
422 return 0;
423 }
426 static struct net_device_stats *network_get_stats(struct net_device *dev)
427 {
428 struct net_private *np = (struct net_private *)dev->priv;
429 return &np->stats;
430 }
433 /*
434 * This notifier is installed for domain 0 only.
435 * All other domains have VFR rules installed on their behalf by domain 0
436 * when they are created. For bootstrap, Xen creates wildcard rules for
437 * domain 0 -- this notifier is used to detect when we find our proper
438 * IP address, so we can poke down proper rules and remove the wildcards.
439 */
440 static int inetdev_notify(struct notifier_block *this,
441 unsigned long event,
442 void *ptr)
443 {
444 struct in_ifaddr *ifa = (struct in_ifaddr *)ptr;
445 struct net_device *dev = ifa->ifa_dev->dev;
446 struct list_head *ent;
447 struct net_private *np;
448 int idx = -1;
449 network_op_t op;
451 list_for_each ( ent, &dev_list )
452 {
453 np = list_entry(dev_list.next, struct net_private, list);
454 if ( np->dev == dev )
455 idx = np->idx;
456 }
458 if ( idx == -1 )
459 goto out;
461 memset(&op, 0, sizeof(op));
462 op.u.net_rule.proto = NETWORK_PROTO_ANY;
463 op.u.net_rule.action = NETWORK_ACTION_ACCEPT;
465 if ( event == NETDEV_UP )
466 op.cmd = NETWORK_OP_ADDRULE;
467 else if ( event == NETDEV_DOWN )
468 op.cmd = NETWORK_OP_DELETERULE;
469 else
470 goto out;
472 op.u.net_rule.src_vif = idx;
473 op.u.net_rule.dst_vif = VIF_PHYSICAL_INTERFACE;
474 op.u.net_rule.src_addr = ntohl(ifa->ifa_address);
475 op.u.net_rule.src_addr_mask = ~0UL;
476 op.u.net_rule.dst_addr = 0;
477 op.u.net_rule.dst_addr_mask = 0;
478 (void)HYPERVISOR_network_op(&op);
480 op.u.net_rule.src_vif = VIF_ANY_INTERFACE;
481 op.u.net_rule.dst_vif = idx;
482 op.u.net_rule.src_addr = 0;
483 op.u.net_rule.src_addr_mask = 0;
484 op.u.net_rule.dst_addr = ntohl(ifa->ifa_address);
485 op.u.net_rule.dst_addr_mask = ~0UL;
486 (void)HYPERVISOR_network_op(&op);
488 out:
489 return NOTIFY_DONE;
490 }
492 static struct notifier_block notifier_inetdev = {
493 .notifier_call = inetdev_notify,
494 .next = NULL,
495 .priority = 0
496 };
499 int __init init_module(void)
500 {
501 int i, fixmap_idx=-1, err;
502 struct net_device *dev;
503 struct net_private *np;
505 INIT_LIST_HEAD(&dev_list);
507 /*
508 * Domain 0 must poke its own network rules as it discovers its IP
509 * addresses. All other domains have a privileged "parent" to do this for
510 * them at start of day.
511 */
512 if ( start_info.dom_id == 0 )
513 (void)register_inetaddr_notifier(&notifier_inetdev);
515 for ( i = 0; i < MAX_DOMAIN_VIFS; i++ )
516 {
517 if ( start_info.net_rings[i] == 0 )
518 continue;
520 /* We actually only support up to 4 vifs right now. */
521 if ( ++fixmap_idx == 4 )
522 break;
524 dev = alloc_etherdev(sizeof(struct net_private));
525 if ( dev == NULL )
526 {
527 err = -ENOMEM;
528 goto fail;
529 }
531 set_fixmap(FIX_NETRING0_BASE+fixmap_idx, start_info.net_rings[i]);
533 np = dev->priv;
534 np->net_ring = (net_ring_t *)fix_to_virt(FIX_NETRING0_BASE+fixmap_idx);
535 np->net_idx = &HYPERVISOR_shared_info->net_idx[i];
536 np->idx = i;
538 SET_MODULE_OWNER(dev);
539 dev->open = network_open;
540 dev->hard_start_xmit = network_start_xmit;
541 dev->stop = network_close;
542 dev->get_stats = network_get_stats;
544 memcpy(dev->dev_addr, start_info.net_vmac[i], ETH_ALEN);
546 if ( (err = register_netdev(dev)) != 0 )
547 {
548 kfree(dev);
549 goto fail;
550 }
552 np->dev = dev;
553 list_add(&np->list, &dev_list);
554 }
556 return 0;
558 fail:
559 cleanup_module();
560 return err;
561 }
564 static void cleanup_module(void)
565 {
566 struct net_private *np;
567 struct net_device *dev;
569 while ( !list_empty(&dev_list) )
570 {
571 np = list_entry(dev_list.next, struct net_private, list);
572 list_del(&np->list);
573 dev = np->dev;
574 unregister_netdev(dev);
575 kfree(dev);
576 }
578 if ( start_info.dom_id == 0 )
579 (void)unregister_inetaddr_notifier(&notifier_inetdev);
580 }
583 module_init(init_module);
584 module_exit(cleanup_module);