ia64/xen-unstable

view xenolinux-2.4.21-sparse/arch/xeno/drivers/network/network.c @ 670:0e5f8fd98576

bitkeeper revision 1.385 (3f281523ZcjTKYBHYR4Oynisduu7hA)

network.c:
Network bug fix.
author kaf24@scramble.cl.cam.ac.uk
date Wed Jul 30 18:57:39 2003 +0000 (2003-07-30)
parents 734baea3a436
children 15a6d4d70e86
line source
1 /******************************************************************************
2 * network.c
3 *
4 * Virtual network driver for XenoLinux.
5 *
6 * Copyright (c) 2002-2003, K A Fraser
7 */
9 #include <linux/config.h>
10 #include <linux/module.h>
12 #include <linux/kernel.h>
13 #include <linux/sched.h>
14 #include <linux/slab.h>
15 #include <linux/string.h>
16 #include <linux/errno.h>
18 #include <linux/netdevice.h>
19 #include <linux/inetdevice.h>
20 #include <linux/etherdevice.h>
21 #include <linux/skbuff.h>
22 #include <linux/init.h>
24 #include <asm/io.h>
25 #include <net/sock.h>
26 #include <net/pkt_sched.h>
28 #define NET_IRQ _EVENT_NET
30 #define TX_MAX_ENTRIES (TX_RING_SIZE - 2)
31 #define RX_MAX_ENTRIES (RX_RING_SIZE - 2)
33 #define TX_RING_INC(_i) (((_i)+1) & (TX_RING_SIZE-1))
34 #define RX_RING_INC(_i) (((_i)+1) & (RX_RING_SIZE-1))
35 #define TX_RING_ADD(_i,_j) (((_i)+(_j)) & (TX_RING_SIZE-1))
36 #define RX_RING_ADD(_i,_j) (((_i)+(_j)) & (RX_RING_SIZE-1))
38 #define RX_BUF_SIZE ((PAGE_SIZE/2)+1) /* Fool the slab allocator :-) */
40 static void network_interrupt(int irq, void *dev_id, struct pt_regs *ptregs);
41 static void network_tx_buf_gc(struct net_device *dev);
42 static void network_alloc_rx_buffers(struct net_device *dev);
43 static void network_free_rx_buffers(struct net_device *dev);
44 static void cleanup_module(void);
46 static struct list_head dev_list;
48 /*
49 * Needed because network_close() is not properly implemented yet. So
50 * an open after a close needs to do much less than the initial open.
51 */
52 static int opened_once_already = 0;
54 struct net_private
55 {
56 struct list_head list;
57 struct net_device *dev;
59 struct net_device_stats stats;
60 atomic_t tx_entries;
61 unsigned int rx_resp_cons, tx_resp_cons, tx_full;
62 net_ring_t *net_ring;
63 net_idx_t *net_idx;
64 spinlock_t tx_lock;
65 unsigned int idx; /* Domain-specific index of this VIF. */
67 unsigned int rx_bufs_to_notify;
69 /*
70 * {tx,rx}_skbs store outstanding skbuffs. The first entry in each
71 * array is an index into a chain of free entries.
72 */
73 struct sk_buff *tx_skbs[TX_RING_SIZE];
74 struct sk_buff *rx_skbs[RX_RING_SIZE];
75 };
77 /* Access macros for acquiring freeing slots in {tx,rx}_skbs[]. */
78 #define ADD_ID_TO_FREELIST(_list, _id) \
79 (_list)[(_id)] = (_list)[0]; \
80 (_list)[0] = (void *)(unsigned long)(_id);
81 #define GET_ID_FROM_FREELIST(_list) \
82 ({ unsigned long _id = (unsigned long)(_list)[0]; \
83 (_list)[0] = (_list)[_id]; \
84 _id; })
87 static void dbg_network_int(int irq, void *dev_id, struct pt_regs *ptregs)
88 {
89 struct net_device *dev = (struct net_device *)dev_id;
90 struct net_private *np = dev->priv;
91 printk(KERN_ALERT "tx_full = %d, tx_entries = %d, tx_resp_cons = %d,"
92 " tx_req_prod = %d, tx_resp_prod = %d, tx_event = %d, state=%d\n",
93 np->tx_full, atomic_read(&np->tx_entries), np->tx_resp_cons,
94 np->net_idx->tx_req_prod, np->net_idx->tx_resp_prod,
95 np->net_idx->tx_event,
96 test_bit(__LINK_STATE_XOFF, &dev->state));
97 printk(KERN_ALERT "rx_resp_cons = %d,"
98 " rx_req_prod = %d, rx_resp_prod = %d, rx_event = %d\n",
99 np->rx_resp_cons, np->net_idx->rx_req_prod,
100 np->net_idx->rx_resp_prod, np->net_idx->rx_event);
101 }
104 static int network_open(struct net_device *dev)
105 {
106 struct net_private *np = dev->priv;
107 int i, error = 0;
109 if ( opened_once_already )
110 {
111 memset(&np->stats, 0, sizeof(np->stats));
112 netif_start_queue(dev);
113 return 0;
114 }
116 np->rx_bufs_to_notify = 0;
117 np->rx_resp_cons = np->tx_resp_cons = np->tx_full = 0;
118 memset(&np->stats, 0, sizeof(np->stats));
119 spin_lock_init(&np->tx_lock);
120 atomic_set(&np->tx_entries, 0);
121 memset(np->net_ring, 0, sizeof(*np->net_ring));
122 memset(np->net_idx, 0, sizeof(*np->net_idx));
124 /* Initialise {tx,rx}_skbs to be a free chain containing every entry. */
125 for ( i = 0; i < TX_RING_SIZE; i++ )
126 np->tx_skbs[i] = (void *)(i+1);
127 for ( i = 0; i < RX_RING_SIZE; i++ )
128 np->rx_skbs[i] = (void *)(i+1);
130 network_alloc_rx_buffers(dev);
132 error = request_irq(NET_IRQ, network_interrupt,
133 SA_SAMPLE_RANDOM, "network", dev);
134 if ( error )
135 {
136 printk(KERN_WARNING "%s: Could not allocate network interrupt\n",
137 dev->name);
138 network_free_rx_buffers(dev);
139 goto fail;
140 }
142 error = request_irq(_EVENT_DEBUG, dbg_network_int, SA_SHIRQ,
143 "debug", dev);
144 if ( error )
145 {
146 printk(KERN_WARNING "%s: Non-fatal error -- no debug interrupt\n",
147 dev->name);
148 }
150 printk("XenoLinux Virtual Network Driver installed as %s\n", dev->name);
152 netif_start_queue(dev);
154 MOD_INC_USE_COUNT;
156 opened_once_already = 1;
158 return 0;
160 fail:
161 kfree(np);
162 return error;
163 }
166 static void network_tx_buf_gc(struct net_device *dev)
167 {
168 unsigned int i;
169 struct net_private *np = dev->priv;
170 struct sk_buff *skb;
171 unsigned int prod;
172 tx_entry_t *tx_ring = np->net_ring->tx_ring;
174 do {
175 prod = np->net_idx->tx_resp_prod;
177 for ( i = np->tx_resp_cons; i != prod; i = TX_RING_INC(i) )
178 {
179 skb = np->tx_skbs[tx_ring[i].resp.id];
180 ADD_ID_TO_FREELIST(np->tx_skbs, tx_ring[i].resp.id);
181 dev_kfree_skb_any(skb);
182 atomic_dec(&np->tx_entries);
183 }
185 np->tx_resp_cons = prod;
187 /* Set a new event, then check for race with update of tx_cons. */
188 np->net_idx->tx_event =
189 TX_RING_ADD(prod, (atomic_read(&np->tx_entries)>>1) + 1);
190 smp_mb();
191 }
192 while ( prod != np->net_idx->tx_resp_prod );
194 if ( np->tx_full && (atomic_read(&np->tx_entries) < TX_MAX_ENTRIES) )
195 {
196 np->tx_full = 0;
197 netif_wake_queue(dev);
198 }
199 }
202 static inline pte_t *get_ppte(void *addr)
203 {
204 pgd_t *pgd; pmd_t *pmd; pte_t *pte;
205 pgd = pgd_offset_k( (unsigned long)addr);
206 pmd = pmd_offset(pgd, (unsigned long)addr);
207 pte = pte_offset(pmd, (unsigned long)addr);
208 return pte;
209 }
212 static void network_alloc_rx_buffers(struct net_device *dev)
213 {
214 unsigned int i, id;
215 struct net_private *np = dev->priv;
216 struct sk_buff *skb;
217 unsigned int end = RX_RING_ADD(np->rx_resp_cons, RX_MAX_ENTRIES);
219 if ( (i = np->net_idx->rx_req_prod) == end )
220 return;
222 do {
223 skb = dev_alloc_skb(RX_BUF_SIZE);
224 if ( skb == NULL ) break;
225 skb->dev = dev;
227 id = GET_ID_FROM_FREELIST(np->rx_skbs);
228 np->rx_skbs[id] = skb;
230 np->net_ring->rx_ring[i].req.id = (unsigned short)id;
231 np->net_ring->rx_ring[i].req.addr =
232 virt_to_machine(get_ppte(skb->head));
234 np->rx_bufs_to_notify++;
235 }
236 while ( (i = RX_RING_INC(i)) != end );
238 /*
239 * We may have allocated buffers which have entries outstanding in the page
240 * update queue -- make sure we flush those first!
241 */
242 flush_page_update_queue();
244 np->net_idx->rx_req_prod = i;
245 np->net_idx->rx_event = RX_RING_INC(np->rx_resp_cons);
247 /* Batch Xen notifications. */
248 if ( np->rx_bufs_to_notify > (RX_MAX_ENTRIES/4) )
249 {
250 HYPERVISOR_net_update();
251 np->rx_bufs_to_notify = 0;
252 }
253 }
256 static void network_free_rx_buffers(struct net_device *dev)
257 {
258 unsigned int i;
259 struct net_private *np = dev->priv;
260 struct sk_buff *skb;
262 for ( i = np->rx_resp_cons;
263 i != np->net_idx->rx_req_prod;
264 i = RX_RING_INC(i) )
265 {
266 skb = np->rx_skbs[np->net_ring->rx_ring[i].req.id];
267 dev_kfree_skb_any(skb);
268 }
269 }
271 static int network_start_xmit(struct sk_buff *skb, struct net_device *dev)
272 {
273 unsigned int i, id;
274 struct net_private *np = (struct net_private *)dev->priv;
276 if ( np->tx_full )
277 {
278 printk(KERN_ALERT "%s: full queue wasn't stopped!\n", dev->name);
279 netif_stop_queue(dev);
280 return -ENOBUFS;
281 }
283 if ( (((unsigned long)skb->data & ~PAGE_MASK) + skb->len) >= PAGE_SIZE )
284 {
285 struct sk_buff *new_skb = dev_alloc_skb(RX_BUF_SIZE);
286 if ( new_skb == NULL ) return 1;
287 skb_put(new_skb, skb->len);
288 memcpy(new_skb->data, skb->data, skb->len);
289 dev_kfree_skb(skb);
290 skb = new_skb;
291 }
293 spin_lock_irq(&np->tx_lock);
295 i = np->net_idx->tx_req_prod;
297 id = GET_ID_FROM_FREELIST(np->tx_skbs);
298 np->tx_skbs[id] = skb;
300 np->net_ring->tx_ring[i].req.id = (unsigned short)id;
301 np->net_ring->tx_ring[i].req.addr =
302 phys_to_machine(virt_to_phys(skb->data));
303 np->net_ring->tx_ring[i].req.size = skb->len;
304 np->net_idx->tx_req_prod = TX_RING_INC(i);
305 atomic_inc(&np->tx_entries);
307 network_tx_buf_gc(dev);
309 if ( atomic_read(&np->tx_entries) >= TX_MAX_ENTRIES )
310 {
311 np->tx_full = 1;
312 netif_stop_queue(dev);
313 }
315 spin_unlock_irq(&np->tx_lock);
317 np->stats.tx_bytes += skb->len;
318 np->stats.tx_packets++;
320 HYPERVISOR_net_update();
322 return 0;
323 }
326 static void network_interrupt(int irq, void *dev_id, struct pt_regs *ptregs)
327 {
328 unsigned int i;
329 unsigned long flags;
330 struct net_device *dev = (struct net_device *)dev_id;
331 struct net_private *np = dev->priv;
332 struct sk_buff *skb;
333 rx_resp_entry_t *rx;
335 spin_lock_irqsave(&np->tx_lock, flags);
336 network_tx_buf_gc(dev);
337 spin_unlock_irqrestore(&np->tx_lock, flags);
339 again:
340 for ( i = np->rx_resp_cons;
341 i != np->net_idx->rx_resp_prod;
342 i = RX_RING_INC(i) )
343 {
344 rx = &np->net_ring->rx_ring[i].resp;
346 skb = np->rx_skbs[rx->id];
347 ADD_ID_TO_FREELIST(np->rx_skbs, rx->id);
349 if ( rx->status != RING_STATUS_OK )
350 {
351 printk(KERN_ALERT "bad buffer on RX ring!(%d)\n", rx->status);
352 dev_kfree_skb_any(skb);
353 continue;
354 }
356 /*
357 * Set up shinfo -- from alloc_skb This was particularily nasty: the
358 * shared info is hidden at the back of the data area (presumably so it
359 * can be shared), but on page flip it gets very spunked.
360 */
361 atomic_set(&(skb_shinfo(skb)->dataref), 1);
362 skb_shinfo(skb)->nr_frags = 0;
363 skb_shinfo(skb)->frag_list = NULL;
365 phys_to_machine_mapping[virt_to_phys(skb->head) >> PAGE_SHIFT] =
366 (*(unsigned long *)get_ppte(skb->head)) >> PAGE_SHIFT;
368 if ( rx->offset < 16 )
369 {
370 printk(KERN_ALERT "need pkt offset >= 16 (got %d)\n", rx->offset);
371 dev_kfree_skb_any(skb);
372 continue;
373 }
375 skb_reserve(skb, rx->offset - 16);
377 skb_put(skb, rx->size);
378 skb->protocol = eth_type_trans(skb, dev);
380 np->stats.rx_packets++;
382 np->stats.rx_bytes += rx->size;
383 netif_rx(skb);
384 dev->last_rx = jiffies;
385 }
387 np->rx_resp_cons = i;
389 network_alloc_rx_buffers(dev);
391 /* Deal with hypervisor racing our resetting of rx_event. */
392 smp_mb();
393 if ( np->net_idx->rx_resp_prod != i ) goto again;
394 }
397 int network_close(struct net_device *dev)
398 {
399 netif_stop_queue(dev);
401 /*
402 * XXXX This cannot be done safely until be have a proper interface
403 * for setting up and tearing down virtual interfaces on the fly.
404 * Currently the receive buffers are locked down by Xen and we have
405 * no sensible way of retrieving them.
406 */
407 #if 0
408 free_irq(NET_IRQ, dev);
410 network_free_rx_buffers(dev);
411 kfree(np->net_ring->rx_ring);
412 kfree(np->net_ring->tx_ring);
414 MOD_DEC_USE_COUNT;
415 #endif
417 return 0;
418 }
421 static struct net_device_stats *network_get_stats(struct net_device *dev)
422 {
423 struct net_private *np = (struct net_private *)dev->priv;
424 return &np->stats;
425 }
428 /*
429 * This notifier is installed for domain 0 only.
430 * All other domains have VFR rules installed on their behalf by domain 0
431 * when they are created. For bootstrap, Xen creates wildcard rules for
432 * domain 0 -- this notifier is used to detect when we find our proper
433 * IP address, so we can poke down proper rules and remove the wildcards.
434 */
435 static int inetdev_notify(struct notifier_block *this,
436 unsigned long event,
437 void *ptr)
438 {
439 struct in_ifaddr *ifa = (struct in_ifaddr *)ptr;
440 struct net_device *dev = ifa->ifa_dev->dev;
441 struct list_head *ent;
442 struct net_private *np;
443 int idx = -1;
444 network_op_t op;
446 list_for_each ( ent, &dev_list )
447 {
448 np = list_entry(dev_list.next, struct net_private, list);
449 if ( np->dev == dev )
450 idx = np->idx;
451 }
453 if ( idx == -1 )
454 goto out;
456 memset(&op, 0, sizeof(op));
457 op.u.net_rule.proto = NETWORK_PROTO_ANY;
458 op.u.net_rule.action = NETWORK_ACTION_ACCEPT;
460 if ( event == NETDEV_UP )
461 op.cmd = NETWORK_OP_ADDRULE;
462 else if ( event == NETDEV_DOWN )
463 op.cmd = NETWORK_OP_DELETERULE;
464 else
465 goto out;
467 op.u.net_rule.src_vif = idx;
468 op.u.net_rule.dst_vif = VIF_PHYSICAL_INTERFACE;
469 op.u.net_rule.src_addr = ntohl(ifa->ifa_address);
470 op.u.net_rule.src_addr_mask = ~0UL;
471 op.u.net_rule.dst_addr = 0;
472 op.u.net_rule.dst_addr_mask = 0;
473 (void)HYPERVISOR_network_op(&op);
475 op.u.net_rule.src_vif = VIF_ANY_INTERFACE;
476 op.u.net_rule.dst_vif = idx;
477 op.u.net_rule.src_addr = 0;
478 op.u.net_rule.src_addr_mask = 0;
479 op.u.net_rule.dst_addr = ntohl(ifa->ifa_address);
480 op.u.net_rule.dst_addr_mask = ~0UL;
481 (void)HYPERVISOR_network_op(&op);
483 out:
484 return NOTIFY_DONE;
485 }
487 static struct notifier_block notifier_inetdev = {
488 .notifier_call = inetdev_notify,
489 .next = NULL,
490 .priority = 0
491 };
494 int __init init_module(void)
495 {
496 int i, fixmap_idx=-1, err;
497 struct net_device *dev;
498 struct net_private *np;
500 INIT_LIST_HEAD(&dev_list);
502 /*
503 * Domain 0 must poke its own network rules as it discovers its IP
504 * addresses. All other domains have a privileged "parent" to do this for
505 * them at start of day.
506 */
507 if ( start_info.dom_id == 0 )
508 (void)register_inetaddr_notifier(&notifier_inetdev);
510 for ( i = 0; i < MAX_DOMAIN_VIFS; i++ )
511 {
512 if ( start_info.net_rings[i] == 0 )
513 continue;
515 /* We actually only support up to 4 vifs right now. */
516 if ( ++fixmap_idx == 4 )
517 break;
519 dev = alloc_etherdev(sizeof(struct net_private));
520 if ( dev == NULL )
521 {
522 err = -ENOMEM;
523 goto fail;
524 }
526 set_fixmap(FIX_NETRING0_BASE+fixmap_idx, start_info.net_rings[i]);
528 np = dev->priv;
529 np->net_ring = (net_ring_t *)fix_to_virt(FIX_NETRING0_BASE+fixmap_idx);
530 np->net_idx = &HYPERVISOR_shared_info->net_idx[i];
531 np->idx = i;
533 SET_MODULE_OWNER(dev);
534 dev->open = network_open;
535 dev->hard_start_xmit = network_start_xmit;
536 dev->stop = network_close;
537 dev->get_stats = network_get_stats;
539 memcpy(dev->dev_addr, start_info.net_vmac[i], ETH_ALEN);
541 if ( (err = register_netdev(dev)) != 0 )
542 {
543 kfree(dev);
544 goto fail;
545 }
547 np->dev = dev;
548 list_add(&np->list, &dev_list);
549 }
551 return 0;
553 fail:
554 cleanup_module();
555 return err;
556 }
559 static void cleanup_module(void)
560 {
561 struct net_private *np;
562 struct net_device *dev;
564 while ( !list_empty(&dev_list) )
565 {
566 np = list_entry(dev_list.next, struct net_private, list);
567 list_del(&np->list);
568 dev = np->dev;
569 unregister_netdev(dev);
570 kfree(dev);
571 }
573 if ( start_info.dom_id == 0 )
574 (void)unregister_inetaddr_notifier(&notifier_inetdev);
575 }
578 module_init(init_module);
579 module_exit(cleanup_module);