ia64/xen-unstable

view linux-2.6-xen-sparse/net/core/dev.c @ 8612:d783bdd14f2e

Remove the free_vcpu() interface I added in the preceding
changeset. It makes no sense, since an allocated VCPU
cannot be freed at any arbitrary point because individual
VCPUs are not refcounted.

Instead extend free_domain() slightly so it really does do
the reverse of alloc_vcpu() for every allocated VCPU.

Signed-off-by: Keir Fraser <keir@xensource.com>
author kaf24@firebug.cl.cam.ac.uk
date Sat Jan 14 23:40:09 2006 +0100 (2006-01-14)
parents 9b7649651f49
children fd9b2c1bb577
line source
1 /*
2 * NET3 Protocol independent device support routines.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
8 *
9 * Derived from the non IP parts of dev.c 1.0.19
10 * Authors: Ross Biro
11 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12 * Mark Evans, <evansmp@uhura.aston.ac.uk>
13 *
14 * Additional Authors:
15 * Florian la Roche <rzsfl@rz.uni-sb.de>
16 * Alan Cox <gw4pts@gw4pts.ampr.org>
17 * David Hinds <dahinds@users.sourceforge.net>
18 * Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
19 * Adam Sulmicki <adam@cfar.umd.edu>
20 * Pekka Riikonen <priikone@poesidon.pspt.fi>
21 *
22 * Changes:
23 * D.J. Barrow : Fixed bug where dev->refcnt gets set
24 * to 2 if register_netdev gets called
25 * before net_dev_init & also removed a
26 * few lines of code in the process.
27 * Alan Cox : device private ioctl copies fields back.
28 * Alan Cox : Transmit queue code does relevant
29 * stunts to keep the queue safe.
30 * Alan Cox : Fixed double lock.
31 * Alan Cox : Fixed promisc NULL pointer trap
32 * ???????? : Support the full private ioctl range
33 * Alan Cox : Moved ioctl permission check into
34 * drivers
35 * Tim Kordas : SIOCADDMULTI/SIOCDELMULTI
36 * Alan Cox : 100 backlog just doesn't cut it when
37 * you start doing multicast video 8)
38 * Alan Cox : Rewrote net_bh and list manager.
39 * Alan Cox : Fix ETH_P_ALL echoback lengths.
40 * Alan Cox : Took out transmit every packet pass
41 * Saved a few bytes in the ioctl handler
42 * Alan Cox : Network driver sets packet type before
43 * calling netif_rx. Saves a function
44 * call a packet.
45 * Alan Cox : Hashed net_bh()
46 * Richard Kooijman: Timestamp fixes.
47 * Alan Cox : Wrong field in SIOCGIFDSTADDR
48 * Alan Cox : Device lock protection.
49 * Alan Cox : Fixed nasty side effect of device close
50 * changes.
51 * Rudi Cilibrasi : Pass the right thing to
52 * set_mac_address()
53 * Dave Miller : 32bit quantity for the device lock to
54 * make it work out on a Sparc.
55 * Bjorn Ekwall : Added KERNELD hack.
56 * Alan Cox : Cleaned up the backlog initialise.
57 * Craig Metz : SIOCGIFCONF fix if space for under
58 * 1 device.
59 * Thomas Bogendoerfer : Return ENODEV for dev_open, if there
60 * is no device open function.
61 * Andi Kleen : Fix error reporting for SIOCGIFCONF
62 * Michael Chastain : Fix signed/unsigned for SIOCGIFCONF
63 * Cyrus Durgin : Cleaned for KMOD
64 * Adam Sulmicki : Bug Fix : Network Device Unload
65 * A network device unload needs to purge
66 * the backlog queue.
67 * Paul Rusty Russell : SIOCSIFNAME
68 * Pekka Riikonen : Netdev boot-time settings code
69 * Andrew Morton : Make unregister_netdevice wait
70 * indefinitely on dev->refcnt
71 * J Hadi Salim : - Backlog queue sampling
72 * - netif_rx() feedback
73 */
75 #include <asm/uaccess.h>
76 #include <asm/system.h>
77 #include <linux/bitops.h>
78 #include <linux/config.h>
79 #include <linux/cpu.h>
80 #include <linux/types.h>
81 #include <linux/kernel.h>
82 #include <linux/sched.h>
83 #include <linux/string.h>
84 #include <linux/mm.h>
85 #include <linux/socket.h>
86 #include <linux/sockios.h>
87 #include <linux/errno.h>
88 #include <linux/interrupt.h>
89 #include <linux/if_ether.h>
90 #include <linux/netdevice.h>
91 #include <linux/etherdevice.h>
92 #include <linux/notifier.h>
93 #include <linux/skbuff.h>
94 #include <net/sock.h>
95 #include <linux/rtnetlink.h>
96 #include <linux/proc_fs.h>
97 #include <linux/seq_file.h>
98 #include <linux/stat.h>
99 #include <linux/if_bridge.h>
100 #include <linux/divert.h>
101 #include <net/dst.h>
102 #include <net/pkt_sched.h>
103 #include <net/checksum.h>
104 #include <linux/highmem.h>
105 #include <linux/init.h>
106 #include <linux/kmod.h>
107 #include <linux/module.h>
108 #include <linux/kallsyms.h>
109 #include <linux/netpoll.h>
110 #include <linux/rcupdate.h>
111 #include <linux/delay.h>
112 #ifdef CONFIG_NET_RADIO
113 #include <linux/wireless.h> /* Note : will define WIRELESS_EXT */
114 #include <net/iw_handler.h>
115 #endif /* CONFIG_NET_RADIO */
116 #include <asm/current.h>
118 #include <net/ip.h>
119 #include <linux/tcp.h>
120 #include <linux/udp.h>
123 /* This define, if set, will randomly drop a packet when congestion
124 * is more than moderate. It helps fairness in the multi-interface
125 * case when one of them is a hog, but it kills performance for the
126 * single interface case so it is off now by default.
127 */
128 #undef RAND_LIE
130 /* Setting this will sample the queue lengths and thus congestion
131 * via a timer instead of as each packet is received.
132 */
133 #undef OFFLINE_SAMPLE
135 /*
136 * The list of packet types we will receive (as opposed to discard)
137 * and the routines to invoke.
138 *
139 * Why 16. Because with 16 the only overlap we get on a hash of the
140 * low nibble of the protocol value is RARP/SNAP/X.25.
141 *
142 * NOTE: That is no longer true with the addition of VLAN tags. Not
143 * sure which should go first, but I bet it won't make much
144 * difference if we are running VLANs. The good news is that
145 * this protocol won't be in the list unless compiled in, so
146 * the average user (w/out VLANs) will not be adversly affected.
147 * --BLG
148 *
149 * 0800 IP
150 * 8100 802.1Q VLAN
151 * 0001 802.3
152 * 0002 AX.25
153 * 0004 802.2
154 * 8035 RARP
155 * 0005 SNAP
156 * 0805 X.25
157 * 0806 ARP
158 * 8137 IPX
159 * 0009 Localtalk
160 * 86DD IPv6
161 */
163 static DEFINE_SPINLOCK(ptype_lock);
164 static struct list_head ptype_base[16]; /* 16 way hashed list */
165 static struct list_head ptype_all; /* Taps */
167 #ifdef OFFLINE_SAMPLE
168 static void sample_queue(unsigned long dummy);
169 static struct timer_list samp_timer = TIMER_INITIALIZER(sample_queue, 0, 0);
170 #endif
172 /*
173 * The @dev_base list is protected by @dev_base_lock and the rtln
174 * semaphore.
175 *
176 * Pure readers hold dev_base_lock for reading.
177 *
178 * Writers must hold the rtnl semaphore while they loop through the
179 * dev_base list, and hold dev_base_lock for writing when they do the
180 * actual updates. This allows pure readers to access the list even
181 * while a writer is preparing to update it.
182 *
183 * To put it another way, dev_base_lock is held for writing only to
184 * protect against pure readers; the rtnl semaphore provides the
185 * protection against other writers.
186 *
187 * See, for example usages, register_netdevice() and
188 * unregister_netdevice(), which must be called with the rtnl
189 * semaphore held.
190 */
191 struct net_device *dev_base;
192 static struct net_device **dev_tail = &dev_base;
193 DEFINE_RWLOCK(dev_base_lock);
195 EXPORT_SYMBOL(dev_base);
196 EXPORT_SYMBOL(dev_base_lock);
198 #define NETDEV_HASHBITS 8
199 static struct hlist_head dev_name_head[1<<NETDEV_HASHBITS];
200 static struct hlist_head dev_index_head[1<<NETDEV_HASHBITS];
202 static inline struct hlist_head *dev_name_hash(const char *name)
203 {
204 unsigned hash = full_name_hash(name, strnlen(name, IFNAMSIZ));
205 return &dev_name_head[hash & ((1<<NETDEV_HASHBITS)-1)];
206 }
208 static inline struct hlist_head *dev_index_hash(int ifindex)
209 {
210 return &dev_index_head[ifindex & ((1<<NETDEV_HASHBITS)-1)];
211 }
213 /*
214 * Our notifier list
215 */
217 static struct notifier_block *netdev_chain;
219 /*
220 * Device drivers call our routines to queue packets here. We empty the
221 * queue in the local softnet handler.
222 */
223 DEFINE_PER_CPU(struct softnet_data, softnet_data) = { 0, };
225 #ifdef CONFIG_SYSFS
226 extern int netdev_sysfs_init(void);
227 extern int netdev_register_sysfs(struct net_device *);
228 extern void netdev_unregister_sysfs(struct net_device *);
229 #else
230 #define netdev_sysfs_init() (0)
231 #define netdev_register_sysfs(dev) (0)
232 #define netdev_unregister_sysfs(dev) do { } while(0)
233 #endif
236 /*******************************************************************************
238 Protocol management and registration routines
240 *******************************************************************************/
242 /*
243 * For efficiency
244 */
246 int netdev_nit;
248 /*
249 * Add a protocol ID to the list. Now that the input handler is
250 * smarter we can dispense with all the messy stuff that used to be
251 * here.
252 *
253 * BEWARE!!! Protocol handlers, mangling input packets,
254 * MUST BE last in hash buckets and checking protocol handlers
255 * MUST start from promiscuous ptype_all chain in net_bh.
256 * It is true now, do not change it.
257 * Explanation follows: if protocol handler, mangling packet, will
258 * be the first on list, it is not able to sense, that packet
259 * is cloned and should be copied-on-write, so that it will
260 * change it and subsequent readers will get broken packet.
261 * --ANK (980803)
262 */
264 /**
265 * dev_add_pack - add packet handler
266 * @pt: packet type declaration
267 *
268 * Add a protocol handler to the networking stack. The passed &packet_type
269 * is linked into kernel lists and may not be freed until it has been
270 * removed from the kernel lists.
271 *
272 * This call does not sleep therefore it can not
273 * guarantee all CPU's that are in middle of receiving packets
274 * will see the new packet type (until the next received packet).
275 */
277 void dev_add_pack(struct packet_type *pt)
278 {
279 int hash;
281 spin_lock_bh(&ptype_lock);
282 if (pt->type == htons(ETH_P_ALL)) {
283 netdev_nit++;
284 list_add_rcu(&pt->list, &ptype_all);
285 } else {
286 hash = ntohs(pt->type) & 15;
287 list_add_rcu(&pt->list, &ptype_base[hash]);
288 }
289 spin_unlock_bh(&ptype_lock);
290 }
292 extern void linkwatch_run_queue(void);
296 /**
297 * __dev_remove_pack - remove packet handler
298 * @pt: packet type declaration
299 *
300 * Remove a protocol handler that was previously added to the kernel
301 * protocol handlers by dev_add_pack(). The passed &packet_type is removed
302 * from the kernel lists and can be freed or reused once this function
303 * returns.
304 *
305 * The packet type might still be in use by receivers
306 * and must not be freed until after all the CPU's have gone
307 * through a quiescent state.
308 */
309 void __dev_remove_pack(struct packet_type *pt)
310 {
311 struct list_head *head;
312 struct packet_type *pt1;
314 spin_lock_bh(&ptype_lock);
316 if (pt->type == htons(ETH_P_ALL)) {
317 netdev_nit--;
318 head = &ptype_all;
319 } else
320 head = &ptype_base[ntohs(pt->type) & 15];
322 list_for_each_entry(pt1, head, list) {
323 if (pt == pt1) {
324 list_del_rcu(&pt->list);
325 goto out;
326 }
327 }
329 printk(KERN_WARNING "dev_remove_pack: %p not found.\n", pt);
330 out:
331 spin_unlock_bh(&ptype_lock);
332 }
333 /**
334 * dev_remove_pack - remove packet handler
335 * @pt: packet type declaration
336 *
337 * Remove a protocol handler that was previously added to the kernel
338 * protocol handlers by dev_add_pack(). The passed &packet_type is removed
339 * from the kernel lists and can be freed or reused once this function
340 * returns.
341 *
342 * This call sleeps to guarantee that no CPU is looking at the packet
343 * type after return.
344 */
345 void dev_remove_pack(struct packet_type *pt)
346 {
347 __dev_remove_pack(pt);
349 synchronize_net();
350 }
352 /******************************************************************************
354 Device Boot-time Settings Routines
356 *******************************************************************************/
358 /* Boot time configuration table */
359 static struct netdev_boot_setup dev_boot_setup[NETDEV_BOOT_SETUP_MAX];
361 /**
362 * netdev_boot_setup_add - add new setup entry
363 * @name: name of the device
364 * @map: configured settings for the device
365 *
366 * Adds new setup entry to the dev_boot_setup list. The function
367 * returns 0 on error and 1 on success. This is a generic routine to
368 * all netdevices.
369 */
370 static int netdev_boot_setup_add(char *name, struct ifmap *map)
371 {
372 struct netdev_boot_setup *s;
373 int i;
375 s = dev_boot_setup;
376 for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) {
377 if (s[i].name[0] == '\0' || s[i].name[0] == ' ') {
378 memset(s[i].name, 0, sizeof(s[i].name));
379 strcpy(s[i].name, name);
380 memcpy(&s[i].map, map, sizeof(s[i].map));
381 break;
382 }
383 }
385 return i >= NETDEV_BOOT_SETUP_MAX ? 0 : 1;
386 }
388 /**
389 * netdev_boot_setup_check - check boot time settings
390 * @dev: the netdevice
391 *
392 * Check boot time settings for the device.
393 * The found settings are set for the device to be used
394 * later in the device probing.
395 * Returns 0 if no settings found, 1 if they are.
396 */
397 int netdev_boot_setup_check(struct net_device *dev)
398 {
399 struct netdev_boot_setup *s = dev_boot_setup;
400 int i;
402 for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) {
403 if (s[i].name[0] != '\0' && s[i].name[0] != ' ' &&
404 !strncmp(dev->name, s[i].name, strlen(s[i].name))) {
405 dev->irq = s[i].map.irq;
406 dev->base_addr = s[i].map.base_addr;
407 dev->mem_start = s[i].map.mem_start;
408 dev->mem_end = s[i].map.mem_end;
409 return 1;
410 }
411 }
412 return 0;
413 }
416 /**
417 * netdev_boot_base - get address from boot time settings
418 * @prefix: prefix for network device
419 * @unit: id for network device
420 *
421 * Check boot time settings for the base address of device.
422 * The found settings are set for the device to be used
423 * later in the device probing.
424 * Returns 0 if no settings found.
425 */
426 unsigned long netdev_boot_base(const char *prefix, int unit)
427 {
428 const struct netdev_boot_setup *s = dev_boot_setup;
429 char name[IFNAMSIZ];
430 int i;
432 sprintf(name, "%s%d", prefix, unit);
434 /*
435 * If device already registered then return base of 1
436 * to indicate not to probe for this interface
437 */
438 if (__dev_get_by_name(name))
439 return 1;
441 for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++)
442 if (!strcmp(name, s[i].name))
443 return s[i].map.base_addr;
444 return 0;
445 }
447 /*
448 * Saves at boot time configured settings for any netdevice.
449 */
450 int __init netdev_boot_setup(char *str)
451 {
452 int ints[5];
453 struct ifmap map;
455 str = get_options(str, ARRAY_SIZE(ints), ints);
456 if (!str || !*str)
457 return 0;
459 /* Save settings */
460 memset(&map, 0, sizeof(map));
461 if (ints[0] > 0)
462 map.irq = ints[1];
463 if (ints[0] > 1)
464 map.base_addr = ints[2];
465 if (ints[0] > 2)
466 map.mem_start = ints[3];
467 if (ints[0] > 3)
468 map.mem_end = ints[4];
470 /* Add new entry to the list */
471 return netdev_boot_setup_add(str, &map);
472 }
474 __setup("netdev=", netdev_boot_setup);
476 /*******************************************************************************
478 Device Interface Subroutines
480 *******************************************************************************/
482 /**
483 * __dev_get_by_name - find a device by its name
484 * @name: name to find
485 *
486 * Find an interface by name. Must be called under RTNL semaphore
487 * or @dev_base_lock. If the name is found a pointer to the device
488 * is returned. If the name is not found then %NULL is returned. The
489 * reference counters are not incremented so the caller must be
490 * careful with locks.
491 */
493 struct net_device *__dev_get_by_name(const char *name)
494 {
495 struct hlist_node *p;
497 hlist_for_each(p, dev_name_hash(name)) {
498 struct net_device *dev
499 = hlist_entry(p, struct net_device, name_hlist);
500 if (!strncmp(dev->name, name, IFNAMSIZ))
501 return dev;
502 }
503 return NULL;
504 }
506 /**
507 * dev_get_by_name - find a device by its name
508 * @name: name to find
509 *
510 * Find an interface by name. This can be called from any
511 * context and does its own locking. The returned handle has
512 * the usage count incremented and the caller must use dev_put() to
513 * release it when it is no longer needed. %NULL is returned if no
514 * matching device is found.
515 */
517 struct net_device *dev_get_by_name(const char *name)
518 {
519 struct net_device *dev;
521 read_lock(&dev_base_lock);
522 dev = __dev_get_by_name(name);
523 if (dev)
524 dev_hold(dev);
525 read_unlock(&dev_base_lock);
526 return dev;
527 }
529 /**
530 * __dev_get_by_index - find a device by its ifindex
531 * @ifindex: index of device
532 *
533 * Search for an interface by index. Returns %NULL if the device
534 * is not found or a pointer to the device. The device has not
535 * had its reference counter increased so the caller must be careful
536 * about locking. The caller must hold either the RTNL semaphore
537 * or @dev_base_lock.
538 */
540 struct net_device *__dev_get_by_index(int ifindex)
541 {
542 struct hlist_node *p;
544 hlist_for_each(p, dev_index_hash(ifindex)) {
545 struct net_device *dev
546 = hlist_entry(p, struct net_device, index_hlist);
547 if (dev->ifindex == ifindex)
548 return dev;
549 }
550 return NULL;
551 }
554 /**
555 * dev_get_by_index - find a device by its ifindex
556 * @ifindex: index of device
557 *
558 * Search for an interface by index. Returns NULL if the device
559 * is not found or a pointer to the device. The device returned has
560 * had a reference added and the pointer is safe until the user calls
561 * dev_put to indicate they have finished with it.
562 */
564 struct net_device *dev_get_by_index(int ifindex)
565 {
566 struct net_device *dev;
568 read_lock(&dev_base_lock);
569 dev = __dev_get_by_index(ifindex);
570 if (dev)
571 dev_hold(dev);
572 read_unlock(&dev_base_lock);
573 return dev;
574 }
576 /**
577 * dev_getbyhwaddr - find a device by its hardware address
578 * @type: media type of device
579 * @ha: hardware address
580 *
581 * Search for an interface by MAC address. Returns NULL if the device
582 * is not found or a pointer to the device. The caller must hold the
583 * rtnl semaphore. The returned device has not had its ref count increased
584 * and the caller must therefore be careful about locking
585 *
586 * BUGS:
587 * If the API was consistent this would be __dev_get_by_hwaddr
588 */
590 struct net_device *dev_getbyhwaddr(unsigned short type, char *ha)
591 {
592 struct net_device *dev;
594 ASSERT_RTNL();
596 for (dev = dev_base; dev; dev = dev->next)
597 if (dev->type == type &&
598 !memcmp(dev->dev_addr, ha, dev->addr_len))
599 break;
600 return dev;
601 }
603 struct net_device *dev_getfirstbyhwtype(unsigned short type)
604 {
605 struct net_device *dev;
607 rtnl_lock();
608 for (dev = dev_base; dev; dev = dev->next) {
609 if (dev->type == type) {
610 dev_hold(dev);
611 break;
612 }
613 }
614 rtnl_unlock();
615 return dev;
616 }
618 EXPORT_SYMBOL(dev_getfirstbyhwtype);
620 /**
621 * dev_get_by_flags - find any device with given flags
622 * @if_flags: IFF_* values
623 * @mask: bitmask of bits in if_flags to check
624 *
625 * Search for any interface with the given flags. Returns NULL if a device
626 * is not found or a pointer to the device. The device returned has
627 * had a reference added and the pointer is safe until the user calls
628 * dev_put to indicate they have finished with it.
629 */
631 struct net_device * dev_get_by_flags(unsigned short if_flags, unsigned short mask)
632 {
633 struct net_device *dev;
635 read_lock(&dev_base_lock);
636 for (dev = dev_base; dev != NULL; dev = dev->next) {
637 if (((dev->flags ^ if_flags) & mask) == 0) {
638 dev_hold(dev);
639 break;
640 }
641 }
642 read_unlock(&dev_base_lock);
643 return dev;
644 }
646 /**
647 * dev_valid_name - check if name is okay for network device
648 * @name: name string
649 *
650 * Network device names need to be valid file names to
651 * to allow sysfs to work
652 */
653 static int dev_valid_name(const char *name)
654 {
655 return !(*name == '\0'
656 || !strcmp(name, ".")
657 || !strcmp(name, "..")
658 || strchr(name, '/'));
659 }
661 /**
662 * dev_alloc_name - allocate a name for a device
663 * @dev: device
664 * @name: name format string
665 *
666 * Passed a format string - eg "lt%d" it will try and find a suitable
667 * id. Not efficient for many devices, not called a lot. The caller
668 * must hold the dev_base or rtnl lock while allocating the name and
669 * adding the device in order to avoid duplicates. Returns the number
670 * of the unit assigned or a negative errno code.
671 */
673 int dev_alloc_name(struct net_device *dev, const char *name)
674 {
675 int i = 0;
676 char buf[IFNAMSIZ];
677 const char *p;
678 const int max_netdevices = 8*PAGE_SIZE;
679 long *inuse;
680 struct net_device *d;
682 p = strnchr(name, IFNAMSIZ-1, '%');
683 if (p) {
684 /*
685 * Verify the string as this thing may have come from
686 * the user. There must be either one "%d" and no other "%"
687 * characters.
688 */
689 if (p[1] != 'd' || strchr(p + 2, '%'))
690 return -EINVAL;
692 /* Use one page as a bit array of possible slots */
693 inuse = (long *) get_zeroed_page(GFP_ATOMIC);
694 if (!inuse)
695 return -ENOMEM;
697 for (d = dev_base; d; d = d->next) {
698 if (!sscanf(d->name, name, &i))
699 continue;
700 if (i < 0 || i >= max_netdevices)
701 continue;
703 /* avoid cases where sscanf is not exact inverse of printf */
704 snprintf(buf, sizeof(buf), name, i);
705 if (!strncmp(buf, d->name, IFNAMSIZ))
706 set_bit(i, inuse);
707 }
709 i = find_first_zero_bit(inuse, max_netdevices);
710 free_page((unsigned long) inuse);
711 }
713 snprintf(buf, sizeof(buf), name, i);
714 if (!__dev_get_by_name(buf)) {
715 strlcpy(dev->name, buf, IFNAMSIZ);
716 return i;
717 }
719 /* It is possible to run out of possible slots
720 * when the name is long and there isn't enough space left
721 * for the digits, or if all bits are used.
722 */
723 return -ENFILE;
724 }
727 /**
728 * dev_change_name - change name of a device
729 * @dev: device
730 * @newname: name (or format string) must be at least IFNAMSIZ
731 *
732 * Change name of a device, can pass format strings "eth%d".
733 * for wildcarding.
734 */
735 int dev_change_name(struct net_device *dev, char *newname)
736 {
737 int err = 0;
739 ASSERT_RTNL();
741 if (dev->flags & IFF_UP)
742 return -EBUSY;
744 if (!dev_valid_name(newname))
745 return -EINVAL;
747 if (strchr(newname, '%')) {
748 err = dev_alloc_name(dev, newname);
749 if (err < 0)
750 return err;
751 strcpy(newname, dev->name);
752 }
753 else if (__dev_get_by_name(newname))
754 return -EEXIST;
755 else
756 strlcpy(dev->name, newname, IFNAMSIZ);
758 err = class_device_rename(&dev->class_dev, dev->name);
759 if (!err) {
760 hlist_del(&dev->name_hlist);
761 hlist_add_head(&dev->name_hlist, dev_name_hash(dev->name));
762 notifier_call_chain(&netdev_chain, NETDEV_CHANGENAME, dev);
763 }
765 return err;
766 }
768 /**
769 * netdev_features_change - device changes fatures
770 * @dev: device to cause notification
771 *
772 * Called to indicate a device has changed features.
773 */
774 void netdev_features_change(struct net_device *dev)
775 {
776 notifier_call_chain(&netdev_chain, NETDEV_FEAT_CHANGE, dev);
777 }
778 EXPORT_SYMBOL(netdev_features_change);
780 /**
781 * netdev_state_change - device changes state
782 * @dev: device to cause notification
783 *
784 * Called to indicate a device has changed state. This function calls
785 * the notifier chains for netdev_chain and sends a NEWLINK message
786 * to the routing socket.
787 */
788 void netdev_state_change(struct net_device *dev)
789 {
790 if (dev->flags & IFF_UP) {
791 notifier_call_chain(&netdev_chain, NETDEV_CHANGE, dev);
792 rtmsg_ifinfo(RTM_NEWLINK, dev, 0);
793 }
794 }
796 /**
797 * dev_load - load a network module
798 * @name: name of interface
799 *
800 * If a network interface is not present and the process has suitable
801 * privileges this function loads the module. If module loading is not
802 * available in this kernel then it becomes a nop.
803 */
805 void dev_load(const char *name)
806 {
807 struct net_device *dev;
809 read_lock(&dev_base_lock);
810 dev = __dev_get_by_name(name);
811 read_unlock(&dev_base_lock);
813 if (!dev && capable(CAP_SYS_MODULE))
814 request_module("%s", name);
815 }
817 static int default_rebuild_header(struct sk_buff *skb)
818 {
819 printk(KERN_DEBUG "%s: default_rebuild_header called -- BUG!\n",
820 skb->dev ? skb->dev->name : "NULL!!!");
821 kfree_skb(skb);
822 return 1;
823 }
826 /**
827 * dev_open - prepare an interface for use.
828 * @dev: device to open
829 *
830 * Takes a device from down to up state. The device's private open
831 * function is invoked and then the multicast lists are loaded. Finally
832 * the device is moved into the up state and a %NETDEV_UP message is
833 * sent to the netdev notifier chain.
834 *
835 * Calling this function on an active interface is a nop. On a failure
836 * a negative errno code is returned.
837 */
838 int dev_open(struct net_device *dev)
839 {
840 int ret = 0;
842 /*
843 * Is it already up?
844 */
846 if (dev->flags & IFF_UP)
847 return 0;
849 /*
850 * Is it even present?
851 */
852 if (!netif_device_present(dev))
853 return -ENODEV;
855 /*
856 * Call device private open method
857 */
858 set_bit(__LINK_STATE_START, &dev->state);
859 if (dev->open) {
860 ret = dev->open(dev);
861 if (ret)
862 clear_bit(__LINK_STATE_START, &dev->state);
863 }
865 /*
866 * If it went open OK then:
867 */
869 if (!ret) {
870 /*
871 * Set the flags.
872 */
873 dev->flags |= IFF_UP;
875 /*
876 * Initialize multicasting status
877 */
878 dev_mc_upload(dev);
880 /*
881 * Wakeup transmit queue engine
882 */
883 dev_activate(dev);
885 /*
886 * ... and announce new interface.
887 */
888 notifier_call_chain(&netdev_chain, NETDEV_UP, dev);
889 }
890 return ret;
891 }
893 /**
894 * dev_close - shutdown an interface.
895 * @dev: device to shutdown
896 *
897 * This function moves an active device into down state. A
898 * %NETDEV_GOING_DOWN is sent to the netdev notifier chain. The device
899 * is then deactivated and finally a %NETDEV_DOWN is sent to the notifier
900 * chain.
901 */
902 int dev_close(struct net_device *dev)
903 {
904 if (!(dev->flags & IFF_UP))
905 return 0;
907 /*
908 * Tell people we are going down, so that they can
909 * prepare to death, when device is still operating.
910 */
911 notifier_call_chain(&netdev_chain, NETDEV_GOING_DOWN, dev);
913 dev_deactivate(dev);
915 clear_bit(__LINK_STATE_START, &dev->state);
917 /* Synchronize to scheduled poll. We cannot touch poll list,
918 * it can be even on different cpu. So just clear netif_running(),
919 * and wait when poll really will happen. Actually, the best place
920 * for this is inside dev->stop() after device stopped its irq
921 * engine, but this requires more changes in devices. */
923 smp_mb__after_clear_bit(); /* Commit netif_running(). */
924 while (test_bit(__LINK_STATE_RX_SCHED, &dev->state)) {
925 /* No hurry. */
926 current->state = TASK_INTERRUPTIBLE;
927 schedule_timeout(1);
928 }
930 /*
931 * Call the device specific close. This cannot fail.
932 * Only if device is UP
933 *
934 * We allow it to be called even after a DETACH hot-plug
935 * event.
936 */
937 if (dev->stop)
938 dev->stop(dev);
940 /*
941 * Device is now down.
942 */
944 dev->flags &= ~IFF_UP;
946 /*
947 * Tell people we are down
948 */
949 notifier_call_chain(&netdev_chain, NETDEV_DOWN, dev);
951 return 0;
952 }
955 /*
956 * Device change register/unregister. These are not inline or static
957 * as we export them to the world.
958 */
960 /**
961 * register_netdevice_notifier - register a network notifier block
962 * @nb: notifier
963 *
964 * Register a notifier to be called when network device events occur.
965 * The notifier passed is linked into the kernel structures and must
966 * not be reused until it has been unregistered. A negative errno code
967 * is returned on a failure.
968 *
969 * When registered all registration and up events are replayed
970 * to the new notifier to allow device to have a race free
971 * view of the network device list.
972 */
974 int register_netdevice_notifier(struct notifier_block *nb)
975 {
976 struct net_device *dev;
977 int err;
979 rtnl_lock();
980 err = notifier_chain_register(&netdev_chain, nb);
981 if (!err) {
982 for (dev = dev_base; dev; dev = dev->next) {
983 nb->notifier_call(nb, NETDEV_REGISTER, dev);
985 if (dev->flags & IFF_UP)
986 nb->notifier_call(nb, NETDEV_UP, dev);
987 }
988 }
989 rtnl_unlock();
990 return err;
991 }
993 /**
994 * unregister_netdevice_notifier - unregister a network notifier block
995 * @nb: notifier
996 *
997 * Unregister a notifier previously registered by
998 * register_netdevice_notifier(). The notifier is unlinked into the
999 * kernel structures and may then be reused. A negative errno code
1000 * is returned on a failure.
1001 */
1003 int unregister_netdevice_notifier(struct notifier_block *nb)
1005 return notifier_chain_unregister(&netdev_chain, nb);
1008 /**
1009 * call_netdevice_notifiers - call all network notifier blocks
1010 * @val: value passed unmodified to notifier function
1011 * @v: pointer passed unmodified to notifier function
1013 * Call all network notifier blocks. Parameters and return value
1014 * are as for notifier_call_chain().
1015 */
1017 int call_netdevice_notifiers(unsigned long val, void *v)
1019 return notifier_call_chain(&netdev_chain, val, v);
1022 /* When > 0 there are consumers of rx skb time stamps */
1023 static atomic_t netstamp_needed = ATOMIC_INIT(0);
1025 void net_enable_timestamp(void)
1027 atomic_inc(&netstamp_needed);
1030 void net_disable_timestamp(void)
1032 atomic_dec(&netstamp_needed);
1035 static inline void net_timestamp(struct timeval *stamp)
1037 if (atomic_read(&netstamp_needed))
1038 do_gettimeofday(stamp);
1039 else {
1040 stamp->tv_sec = 0;
1041 stamp->tv_usec = 0;
1045 /*
1046 * Support routine. Sends outgoing frames to any network
1047 * taps currently in use.
1048 */
1050 void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
1052 struct packet_type *ptype;
1053 net_timestamp(&skb->stamp);
1055 rcu_read_lock();
1056 list_for_each_entry_rcu(ptype, &ptype_all, list) {
1057 /* Never send packets back to the socket
1058 * they originated from - MvS (miquels@drinkel.ow.org)
1059 */
1060 if ((ptype->dev == dev || !ptype->dev) &&
1061 (ptype->af_packet_priv == NULL ||
1062 (struct sock *)ptype->af_packet_priv != skb->sk)) {
1063 struct sk_buff *skb2= skb_clone(skb, GFP_ATOMIC);
1064 if (!skb2)
1065 break;
1067 /* skb->nh should be correctly
1068 set by sender, so that the second statement is
1069 just protection against buggy protocols.
1070 */
1071 skb2->mac.raw = skb2->data;
1073 if (skb2->nh.raw < skb2->data ||
1074 skb2->nh.raw > skb2->tail) {
1075 if (net_ratelimit())
1076 printk(KERN_CRIT "protocol %04x is "
1077 "buggy, dev %s\n",
1078 skb2->protocol, dev->name);
1079 skb2->nh.raw = skb2->data;
1082 skb2->h.raw = skb2->nh.raw;
1083 skb2->pkt_type = PACKET_OUTGOING;
1084 ptype->func(skb2, skb->dev, ptype);
1087 rcu_read_unlock();
1090 /*
1091 * Invalidate hardware checksum when packet is to be mangled, and
1092 * complete checksum manually on outgoing path.
1093 */
1094 int skb_checksum_help(struct sk_buff *skb, int inward)
1096 unsigned int csum;
1097 int ret = 0, offset = skb->h.raw - skb->data;
1099 if (inward) {
1100 skb->ip_summed = CHECKSUM_NONE;
1101 goto out;
1104 if (skb_cloned(skb)) {
1105 ret = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
1106 if (ret)
1107 goto out;
1110 if (offset > (int)skb->len)
1111 BUG();
1112 csum = skb_checksum(skb, offset, skb->len-offset, 0);
1114 offset = skb->tail - skb->h.raw;
1115 if (offset <= 0)
1116 BUG();
1117 if (skb->csum + 2 > offset)
1118 BUG();
1120 *(u16*)(skb->h.raw + skb->csum) = csum_fold(csum);
1121 skb->ip_summed = CHECKSUM_NONE;
1122 out:
1123 return ret;
1126 #ifdef CONFIG_HIGHMEM
1127 /* Actually, we should eliminate this check as soon as we know, that:
1128 * 1. IOMMU is present and allows to map all the memory.
1129 * 2. No high memory really exists on this machine.
1130 */
1132 static inline int illegal_highdma(struct net_device *dev, struct sk_buff *skb)
1134 int i;
1136 if (dev->features & NETIF_F_HIGHDMA)
1137 return 0;
1139 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
1140 if (PageHighMem(skb_shinfo(skb)->frags[i].page))
1141 return 1;
1143 return 0;
1145 #else
1146 #define illegal_highdma(dev, skb) (0)
1147 #endif
1149 extern void skb_release_data(struct sk_buff *);
1151 /* Keep head the same: replace data */
1152 int __skb_linearize(struct sk_buff *skb, int gfp_mask)
1154 unsigned int size;
1155 u8 *data;
1156 long offset;
1157 struct skb_shared_info *ninfo;
1158 int headerlen = skb->data - skb->head;
1159 int expand = (skb->tail + skb->data_len) - skb->end;
1161 if (skb_shared(skb))
1162 BUG();
1164 if (expand <= 0)
1165 expand = 0;
1167 size = skb->end - skb->head + expand;
1168 size = SKB_DATA_ALIGN(size);
1169 data = kmalloc(size + sizeof(struct skb_shared_info), gfp_mask);
1170 if (!data)
1171 return -ENOMEM;
1173 /* Copy entire thing */
1174 if (skb_copy_bits(skb, -headerlen, data, headerlen + skb->len))
1175 BUG();
1177 /* Set up shinfo */
1178 ninfo = (struct skb_shared_info*)(data + size);
1179 atomic_set(&ninfo->dataref, 1);
1180 ninfo->tso_size = skb_shinfo(skb)->tso_size;
1181 ninfo->tso_segs = skb_shinfo(skb)->tso_segs;
1182 ninfo->nr_frags = 0;
1183 ninfo->frag_list = NULL;
1185 /* Offset between the two in bytes */
1186 offset = data - skb->head;
1188 /* Free old data. */
1189 skb_release_data(skb);
1191 skb->head = data;
1192 skb->end = data + size;
1194 /* Set up new pointers */
1195 skb->h.raw += offset;
1196 skb->nh.raw += offset;
1197 skb->mac.raw += offset;
1198 skb->tail += offset;
1199 skb->data += offset;
1201 /* We are no longer a clone, even if we were. */
1202 skb->cloned = 0;
1204 skb->tail += skb->data_len;
1205 skb->data_len = 0;
1206 return 0;
1209 #define HARD_TX_LOCK(dev, cpu) { \
1210 if ((dev->features & NETIF_F_LLTX) == 0) { \
1211 spin_lock(&dev->xmit_lock); \
1212 dev->xmit_lock_owner = cpu; \
1213 } \
1216 #define HARD_TX_UNLOCK(dev) { \
1217 if ((dev->features & NETIF_F_LLTX) == 0) { \
1218 dev->xmit_lock_owner = -1; \
1219 spin_unlock(&dev->xmit_lock); \
1220 } \
1223 /**
1224 * dev_queue_xmit - transmit a buffer
1225 * @skb: buffer to transmit
1227 * Queue a buffer for transmission to a network device. The caller must
1228 * have set the device and priority and built the buffer before calling
1229 * this function. The function can be called from an interrupt.
1231 * A negative errno code is returned on a failure. A success does not
1232 * guarantee the frame will be transmitted as it may be dropped due
1233 * to congestion or traffic shaping.
1235 * -----------------------------------------------------------------------------------
1236 * I notice this method can also return errors from the queue disciplines,
1237 * including NET_XMIT_DROP, which is a positive value. So, errors can also
1238 * be positive.
1240 * Regardless of the return value, the skb is consumed, so it is currently
1241 * difficult to retry a send to this method. (You can bump the ref count
1242 * before sending to hold a reference for retry if you are careful.)
1244 * When calling this method, interrupts MUST be enabled. This is because
1245 * the BH enable code must have IRQs enabled so that it will not deadlock.
1246 * --BLG
1247 */
1249 int dev_queue_xmit(struct sk_buff *skb)
1251 struct net_device *dev = skb->dev;
1252 struct Qdisc *q;
1253 int rc = -ENOMEM;
1255 if (skb_shinfo(skb)->frag_list &&
1256 !(dev->features & NETIF_F_FRAGLIST) &&
1257 __skb_linearize(skb, GFP_ATOMIC))
1258 goto out_kfree_skb;
1260 /* Fragmented skb is linearized if device does not support SG,
1261 * or if at least one of fragments is in highmem and device
1262 * does not support DMA from it.
1263 */
1264 if (skb_shinfo(skb)->nr_frags &&
1265 (!(dev->features & NETIF_F_SG) || illegal_highdma(dev, skb)) &&
1266 __skb_linearize(skb, GFP_ATOMIC))
1267 goto out_kfree_skb;
1269 /* If a checksum-deferred packet is forwarded to a device that needs a
1270 * checksum, correct the pointers and force checksumming.
1271 */
1272 if (skb->proto_csum_blank) {
1273 if (skb->protocol != htons(ETH_P_IP))
1274 goto out_kfree_skb;
1275 skb->h.raw = (unsigned char *)skb->nh.iph + 4*skb->nh.iph->ihl;
1276 if (skb->h.raw >= skb->tail)
1277 goto out_kfree_skb;
1278 switch (skb->nh.iph->protocol) {
1279 case IPPROTO_TCP:
1280 skb->csum = offsetof(struct tcphdr, check);
1281 break;
1282 case IPPROTO_UDP:
1283 skb->csum = offsetof(struct udphdr, check);
1284 break;
1285 default:
1286 if (net_ratelimit())
1287 printk(KERN_ERR "Attempting to checksum a non-"
1288 "TCP/UDP packet, dropping a protocol"
1289 " %d packet", skb->nh.iph->protocol);
1290 rc = -EPROTO;
1291 goto out_kfree_skb;
1293 if ((skb->h.raw + skb->csum + 2) > skb->tail)
1294 goto out_kfree_skb;
1295 skb->ip_summed = CHECKSUM_HW;
1298 /* If packet is not checksummed and device does not support
1299 * checksumming for this protocol, complete checksumming here.
1300 */
1301 if (skb->ip_summed == CHECKSUM_HW &&
1302 (!(dev->features & (NETIF_F_HW_CSUM | NETIF_F_NO_CSUM)) &&
1303 (!(dev->features & NETIF_F_IP_CSUM) ||
1304 skb->protocol != htons(ETH_P_IP))))
1305 if (skb_checksum_help(skb, 0))
1306 goto out_kfree_skb;
1308 /* Disable soft irqs for various locks below. Also
1309 * stops preemption for RCU.
1310 */
1311 local_bh_disable();
1313 /* Updates of qdisc are serialized by queue_lock.
1314 * The struct Qdisc which is pointed to by qdisc is now a
1315 * rcu structure - it may be accessed without acquiring
1316 * a lock (but the structure may be stale.) The freeing of the
1317 * qdisc will be deferred until it's known that there are no
1318 * more references to it.
1320 * If the qdisc has an enqueue function, we still need to
1321 * hold the queue_lock before calling it, since queue_lock
1322 * also serializes access to the device queue.
1323 */
1325 q = rcu_dereference(dev->qdisc);
1326 #ifdef CONFIG_NET_CLS_ACT
1327 skb->tc_verd = SET_TC_AT(skb->tc_verd,AT_EGRESS);
1328 #endif
1329 if (q->enqueue) {
1330 /* Grab device queue */
1331 spin_lock(&dev->queue_lock);
1333 rc = q->enqueue(skb, q);
1335 qdisc_run(dev);
1337 spin_unlock(&dev->queue_lock);
1338 rc = rc == NET_XMIT_BYPASS ? NET_XMIT_SUCCESS : rc;
1339 goto out;
1342 /* The device has no queue. Common case for software devices:
1343 loopback, all the sorts of tunnels...
1345 Really, it is unlikely that xmit_lock protection is necessary here.
1346 (f.e. loopback and IP tunnels are clean ignoring statistics
1347 counters.)
1348 However, it is possible, that they rely on protection
1349 made by us here.
1351 Check this and shot the lock. It is not prone from deadlocks.
1352 Either shot noqueue qdisc, it is even simpler 8)
1353 */
1354 if (dev->flags & IFF_UP) {
1355 int cpu = smp_processor_id(); /* ok because BHs are off */
1357 if (dev->xmit_lock_owner != cpu) {
1359 HARD_TX_LOCK(dev, cpu);
1361 if (!netif_queue_stopped(dev)) {
1362 if (netdev_nit)
1363 dev_queue_xmit_nit(skb, dev);
1365 rc = 0;
1366 if (!dev->hard_start_xmit(skb, dev)) {
1367 HARD_TX_UNLOCK(dev);
1368 goto out;
1371 HARD_TX_UNLOCK(dev);
1372 if (net_ratelimit())
1373 printk(KERN_CRIT "Virtual device %s asks to "
1374 "queue packet!\n", dev->name);
1375 } else {
1376 /* Recursion is detected! It is possible,
1377 * unfortunately */
1378 if (net_ratelimit())
1379 printk(KERN_CRIT "Dead loop on virtual device "
1380 "%s, fix it urgently!\n", dev->name);
1384 rc = -ENETDOWN;
1385 local_bh_enable();
1387 out_kfree_skb:
1388 kfree_skb(skb);
1389 return rc;
1390 out:
1391 local_bh_enable();
1392 return rc;
1396 /*=======================================================================
1397 Receiver routines
1398 =======================================================================*/
1400 int netdev_max_backlog = 300;
1401 int weight_p = 64; /* old backlog weight */
1402 /* These numbers are selected based on intuition and some
1403 * experimentatiom, if you have more scientific way of doing this
1404 * please go ahead and fix things.
1405 */
1406 int no_cong_thresh = 10;
1407 int no_cong = 20;
1408 int lo_cong = 100;
1409 int mod_cong = 290;
1411 DEFINE_PER_CPU(struct netif_rx_stats, netdev_rx_stat) = { 0, };
1414 static void get_sample_stats(int cpu)
1416 #ifdef RAND_LIE
1417 unsigned long rd;
1418 int rq;
1419 #endif
1420 struct softnet_data *sd = &per_cpu(softnet_data, cpu);
1421 int blog = sd->input_pkt_queue.qlen;
1422 int avg_blog = sd->avg_blog;
1424 avg_blog = (avg_blog >> 1) + (blog >> 1);
1426 if (avg_blog > mod_cong) {
1427 /* Above moderate congestion levels. */
1428 sd->cng_level = NET_RX_CN_HIGH;
1429 #ifdef RAND_LIE
1430 rd = net_random();
1431 rq = rd % netdev_max_backlog;
1432 if (rq < avg_blog) /* unlucky bastard */
1433 sd->cng_level = NET_RX_DROP;
1434 #endif
1435 } else if (avg_blog > lo_cong) {
1436 sd->cng_level = NET_RX_CN_MOD;
1437 #ifdef RAND_LIE
1438 rd = net_random();
1439 rq = rd % netdev_max_backlog;
1440 if (rq < avg_blog) /* unlucky bastard */
1441 sd->cng_level = NET_RX_CN_HIGH;
1442 #endif
1443 } else if (avg_blog > no_cong)
1444 sd->cng_level = NET_RX_CN_LOW;
1445 else /* no congestion */
1446 sd->cng_level = NET_RX_SUCCESS;
1448 sd->avg_blog = avg_blog;
1451 #ifdef OFFLINE_SAMPLE
1452 static void sample_queue(unsigned long dummy)
1454 /* 10 ms 0r 1ms -- i don't care -- JHS */
1455 int next_tick = 1;
1456 int cpu = smp_processor_id();
1458 get_sample_stats(cpu);
1459 next_tick += jiffies;
1460 mod_timer(&samp_timer, next_tick);
1462 #endif
1465 /**
1466 * netif_rx - post buffer to the network code
1467 * @skb: buffer to post
1469 * This function receives a packet from a device driver and queues it for
1470 * the upper (protocol) levels to process. It always succeeds. The buffer
1471 * may be dropped during processing for congestion control or by the
1472 * protocol layers.
1474 * return values:
1475 * NET_RX_SUCCESS (no congestion)
1476 * NET_RX_CN_LOW (low congestion)
1477 * NET_RX_CN_MOD (moderate congestion)
1478 * NET_RX_CN_HIGH (high congestion)
1479 * NET_RX_DROP (packet was dropped)
1481 */
1483 int netif_rx(struct sk_buff *skb)
1485 int this_cpu;
1486 struct softnet_data *queue;
1487 unsigned long flags;
1489 /* if netpoll wants it, pretend we never saw it */
1490 if (netpoll_rx(skb))
1491 return NET_RX_DROP;
1493 if (!skb->stamp.tv_sec)
1494 net_timestamp(&skb->stamp);
1496 /*
1497 * The code is rearranged so that the path is the most
1498 * short when CPU is congested, but is still operating.
1499 */
1500 local_irq_save(flags);
1501 this_cpu = smp_processor_id();
1502 queue = &__get_cpu_var(softnet_data);
1504 __get_cpu_var(netdev_rx_stat).total++;
1505 if (queue->input_pkt_queue.qlen <= netdev_max_backlog) {
1506 if (queue->input_pkt_queue.qlen) {
1507 if (queue->throttle)
1508 goto drop;
1510 enqueue:
1511 dev_hold(skb->dev);
1512 __skb_queue_tail(&queue->input_pkt_queue, skb);
1513 #ifndef OFFLINE_SAMPLE
1514 get_sample_stats(this_cpu);
1515 #endif
1516 local_irq_restore(flags);
1517 return queue->cng_level;
1520 if (queue->throttle)
1521 queue->throttle = 0;
1523 netif_rx_schedule(&queue->backlog_dev);
1524 goto enqueue;
1527 if (!queue->throttle) {
1528 queue->throttle = 1;
1529 __get_cpu_var(netdev_rx_stat).throttled++;
1532 drop:
1533 __get_cpu_var(netdev_rx_stat).dropped++;
1534 local_irq_restore(flags);
1536 kfree_skb(skb);
1537 return NET_RX_DROP;
1540 int netif_rx_ni(struct sk_buff *skb)
1542 int err;
1544 preempt_disable();
1545 err = netif_rx(skb);
1546 if (local_softirq_pending())
1547 do_softirq();
1548 preempt_enable();
1550 return err;
1553 EXPORT_SYMBOL(netif_rx_ni);
1555 static __inline__ void skb_bond(struct sk_buff *skb)
1557 struct net_device *dev = skb->dev;
1559 if (dev->master) {
1560 skb->real_dev = skb->dev;
1561 skb->dev = dev->master;
1565 static void net_tx_action(struct softirq_action *h)
1567 struct softnet_data *sd = &__get_cpu_var(softnet_data);
1569 if (sd->completion_queue) {
1570 struct sk_buff *clist;
1572 local_irq_disable();
1573 clist = sd->completion_queue;
1574 sd->completion_queue = NULL;
1575 local_irq_enable();
1577 while (clist) {
1578 struct sk_buff *skb = clist;
1579 clist = clist->next;
1581 BUG_TRAP(!atomic_read(&skb->users));
1582 __kfree_skb(skb);
1586 if (sd->output_queue) {
1587 struct net_device *head;
1589 local_irq_disable();
1590 head = sd->output_queue;
1591 sd->output_queue = NULL;
1592 local_irq_enable();
1594 while (head) {
1595 struct net_device *dev = head;
1596 head = head->next_sched;
1598 smp_mb__before_clear_bit();
1599 clear_bit(__LINK_STATE_SCHED, &dev->state);
1601 if (spin_trylock(&dev->queue_lock)) {
1602 qdisc_run(dev);
1603 spin_unlock(&dev->queue_lock);
1604 } else {
1605 netif_schedule(dev);
1611 static __inline__ int deliver_skb(struct sk_buff *skb,
1612 struct packet_type *pt_prev)
1614 atomic_inc(&skb->users);
1615 return pt_prev->func(skb, skb->dev, pt_prev);
1618 #if defined(CONFIG_BRIDGE) || defined (CONFIG_BRIDGE_MODULE)
1619 int (*br_handle_frame_hook)(struct net_bridge_port *p, struct sk_buff **pskb);
1620 struct net_bridge;
1621 struct net_bridge_fdb_entry *(*br_fdb_get_hook)(struct net_bridge *br,
1622 unsigned char *addr);
1623 void (*br_fdb_put_hook)(struct net_bridge_fdb_entry *ent);
1625 static __inline__ int handle_bridge(struct sk_buff **pskb,
1626 struct packet_type **pt_prev, int *ret)
1628 struct net_bridge_port *port;
1630 if ((*pskb)->pkt_type == PACKET_LOOPBACK ||
1631 (port = rcu_dereference((*pskb)->dev->br_port)) == NULL)
1632 return 0;
1634 if (*pt_prev) {
1635 *ret = deliver_skb(*pskb, *pt_prev);
1636 *pt_prev = NULL;
1639 return br_handle_frame_hook(port, pskb);
1641 #else
1642 #define handle_bridge(skb, pt_prev, ret) (0)
1643 #endif
1645 #ifdef CONFIG_NET_CLS_ACT
1646 /* TODO: Maybe we should just force sch_ingress to be compiled in
1647 * when CONFIG_NET_CLS_ACT is? otherwise some useless instructions
1648 * a compare and 2 stores extra right now if we dont have it on
1649 * but have CONFIG_NET_CLS_ACT
1650 * NOTE: This doesnt stop any functionality; if you dont have
1651 * the ingress scheduler, you just cant add policies on ingress.
1653 */
1654 static int ing_filter(struct sk_buff *skb)
1656 struct Qdisc *q;
1657 struct net_device *dev = skb->dev;
1658 int result = TC_ACT_OK;
1660 if (dev->qdisc_ingress) {
1661 __u32 ttl = (__u32) G_TC_RTTL(skb->tc_verd);
1662 if (MAX_RED_LOOP < ttl++) {
1663 printk("Redir loop detected Dropping packet (%s->%s)\n",
1664 skb->input_dev?skb->input_dev->name:"??",skb->dev->name);
1665 return TC_ACT_SHOT;
1668 skb->tc_verd = SET_TC_RTTL(skb->tc_verd,ttl);
1670 skb->tc_verd = SET_TC_AT(skb->tc_verd,AT_INGRESS);
1671 if (NULL == skb->input_dev) {
1672 skb->input_dev = skb->dev;
1673 printk("ing_filter: fixed %s out %s\n",skb->input_dev->name,skb->dev->name);
1675 spin_lock(&dev->ingress_lock);
1676 if ((q = dev->qdisc_ingress) != NULL)
1677 result = q->enqueue(skb, q);
1678 spin_unlock(&dev->ingress_lock);
1682 return result;
1684 #endif
1686 int netif_receive_skb(struct sk_buff *skb)
1688 struct packet_type *ptype, *pt_prev;
1689 int ret = NET_RX_DROP;
1690 unsigned short type;
1692 /* if we've gotten here through NAPI, check netpoll */
1693 if (skb->dev->poll && netpoll_rx(skb))
1694 return NET_RX_DROP;
1696 if (!skb->stamp.tv_sec)
1697 net_timestamp(&skb->stamp);
1699 skb_bond(skb);
1701 __get_cpu_var(netdev_rx_stat).total++;
1703 skb->h.raw = skb->nh.raw = skb->data;
1704 skb->mac_len = skb->nh.raw - skb->mac.raw;
1706 pt_prev = NULL;
1708 rcu_read_lock();
1710 #ifdef CONFIG_NET_CLS_ACT
1711 if (skb->tc_verd & TC_NCLS) {
1712 skb->tc_verd = CLR_TC_NCLS(skb->tc_verd);
1713 goto ncls;
1715 #endif
1717 switch (skb->ip_summed) {
1718 case CHECKSUM_UNNECESSARY:
1719 skb->proto_csum_valid = 1;
1720 break;
1721 case CHECKSUM_HW:
1722 /* XXX Implement me. */
1723 default:
1724 skb->proto_csum_valid = 0;
1725 break;
1728 list_for_each_entry_rcu(ptype, &ptype_all, list) {
1729 if (!ptype->dev || ptype->dev == skb->dev) {
1730 if (pt_prev)
1731 ret = deliver_skb(skb, pt_prev);
1732 pt_prev = ptype;
1736 #ifdef CONFIG_NET_CLS_ACT
1737 if (pt_prev) {
1738 ret = deliver_skb(skb, pt_prev);
1739 pt_prev = NULL; /* noone else should process this after*/
1740 } else {
1741 skb->tc_verd = SET_TC_OK2MUNGE(skb->tc_verd);
1744 ret = ing_filter(skb);
1746 if (ret == TC_ACT_SHOT || (ret == TC_ACT_STOLEN)) {
1747 kfree_skb(skb);
1748 goto out;
1751 skb->tc_verd = 0;
1752 ncls:
1753 #endif
1755 handle_diverter(skb);
1757 if (handle_bridge(&skb, &pt_prev, &ret))
1758 goto out;
1760 type = skb->protocol;
1761 list_for_each_entry_rcu(ptype, &ptype_base[ntohs(type)&15], list) {
1762 if (ptype->type == type &&
1763 (!ptype->dev || ptype->dev == skb->dev)) {
1764 if (pt_prev)
1765 ret = deliver_skb(skb, pt_prev);
1766 pt_prev = ptype;
1770 if (pt_prev) {
1771 ret = pt_prev->func(skb, skb->dev, pt_prev);
1772 } else {
1773 kfree_skb(skb);
1774 /* Jamal, now you will not able to escape explaining
1775 * me how you were going to use this. :-)
1776 */
1777 ret = NET_RX_DROP;
1780 out:
1781 rcu_read_unlock();
1782 return ret;
1785 static int process_backlog(struct net_device *backlog_dev, int *budget)
1787 int work = 0;
1788 int quota = min(backlog_dev->quota, *budget);
1789 struct softnet_data *queue = &__get_cpu_var(softnet_data);
1790 unsigned long start_time = jiffies;
1792 backlog_dev->weight = weight_p;
1793 for (;;) {
1794 struct sk_buff *skb;
1795 struct net_device *dev;
1797 local_irq_disable();
1798 skb = __skb_dequeue(&queue->input_pkt_queue);
1799 if (!skb)
1800 goto job_done;
1801 local_irq_enable();
1803 dev = skb->dev;
1805 netif_receive_skb(skb);
1807 dev_put(dev);
1809 work++;
1811 if (work >= quota || jiffies - start_time > 1)
1812 break;
1816 backlog_dev->quota -= work;
1817 *budget -= work;
1818 return -1;
1820 job_done:
1821 backlog_dev->quota -= work;
1822 *budget -= work;
1824 list_del(&backlog_dev->poll_list);
1825 smp_mb__before_clear_bit();
1826 netif_poll_enable(backlog_dev);
1828 if (queue->throttle)
1829 queue->throttle = 0;
1830 local_irq_enable();
1831 return 0;
1834 static void net_rx_action(struct softirq_action *h)
1836 struct softnet_data *queue = &__get_cpu_var(softnet_data);
1837 unsigned long start_time = jiffies;
1838 int budget = netdev_max_backlog;
1841 local_irq_disable();
1843 while (!list_empty(&queue->poll_list)) {
1844 struct net_device *dev;
1846 if (budget <= 0 || jiffies - start_time > 1)
1847 goto softnet_break;
1849 local_irq_enable();
1851 dev = list_entry(queue->poll_list.next,
1852 struct net_device, poll_list);
1853 netpoll_poll_lock(dev);
1855 if (dev->quota <= 0 || dev->poll(dev, &budget)) {
1856 netpoll_poll_unlock(dev);
1857 local_irq_disable();
1858 list_del(&dev->poll_list);
1859 list_add_tail(&dev->poll_list, &queue->poll_list);
1860 if (dev->quota < 0)
1861 dev->quota += dev->weight;
1862 else
1863 dev->quota = dev->weight;
1864 } else {
1865 netpoll_poll_unlock(dev);
1866 dev_put(dev);
1867 local_irq_disable();
1870 out:
1871 local_irq_enable();
1872 return;
1874 softnet_break:
1875 __get_cpu_var(netdev_rx_stat).time_squeeze++;
1876 __raise_softirq_irqoff(NET_RX_SOFTIRQ);
1877 goto out;
1880 static gifconf_func_t * gifconf_list [NPROTO];
1882 /**
1883 * register_gifconf - register a SIOCGIF handler
1884 * @family: Address family
1885 * @gifconf: Function handler
1887 * Register protocol dependent address dumping routines. The handler
1888 * that is passed must not be freed or reused until it has been replaced
1889 * by another handler.
1890 */
1891 int register_gifconf(unsigned int family, gifconf_func_t * gifconf)
1893 if (family >= NPROTO)
1894 return -EINVAL;
1895 gifconf_list[family] = gifconf;
1896 return 0;
1900 /*
1901 * Map an interface index to its name (SIOCGIFNAME)
1902 */
1904 /*
1905 * We need this ioctl for efficient implementation of the
1906 * if_indextoname() function required by the IPv6 API. Without
1907 * it, we would have to search all the interfaces to find a
1908 * match. --pb
1909 */
1911 static int dev_ifname(struct ifreq __user *arg)
1913 struct net_device *dev;
1914 struct ifreq ifr;
1916 /*
1917 * Fetch the caller's info block.
1918 */
1920 if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
1921 return -EFAULT;
1923 read_lock(&dev_base_lock);
1924 dev = __dev_get_by_index(ifr.ifr_ifindex);
1925 if (!dev) {
1926 read_unlock(&dev_base_lock);
1927 return -ENODEV;
1930 strcpy(ifr.ifr_name, dev->name);
1931 read_unlock(&dev_base_lock);
1933 if (copy_to_user(arg, &ifr, sizeof(struct ifreq)))
1934 return -EFAULT;
1935 return 0;
1938 /*
1939 * Perform a SIOCGIFCONF call. This structure will change
1940 * size eventually, and there is nothing I can do about it.
1941 * Thus we will need a 'compatibility mode'.
1942 */
1944 static int dev_ifconf(char __user *arg)
1946 struct ifconf ifc;
1947 struct net_device *dev;
1948 char __user *pos;
1949 int len;
1950 int total;
1951 int i;
1953 /*
1954 * Fetch the caller's info block.
1955 */
1957 if (copy_from_user(&ifc, arg, sizeof(struct ifconf)))
1958 return -EFAULT;
1960 pos = ifc.ifc_buf;
1961 len = ifc.ifc_len;
1963 /*
1964 * Loop over the interfaces, and write an info block for each.
1965 */
1967 total = 0;
1968 for (dev = dev_base; dev; dev = dev->next) {
1969 for (i = 0; i < NPROTO; i++) {
1970 if (gifconf_list[i]) {
1971 int done;
1972 if (!pos)
1973 done = gifconf_list[i](dev, NULL, 0);
1974 else
1975 done = gifconf_list[i](dev, pos + total,
1976 len - total);
1977 if (done < 0)
1978 return -EFAULT;
1979 total += done;
1984 /*
1985 * All done. Write the updated control block back to the caller.
1986 */
1987 ifc.ifc_len = total;
1989 /*
1990 * Both BSD and Solaris return 0 here, so we do too.
1991 */
1992 return copy_to_user(arg, &ifc, sizeof(struct ifconf)) ? -EFAULT : 0;
1995 #ifdef CONFIG_PROC_FS
1996 /*
1997 * This is invoked by the /proc filesystem handler to display a device
1998 * in detail.
1999 */
2000 static __inline__ struct net_device *dev_get_idx(loff_t pos)
2002 struct net_device *dev;
2003 loff_t i;
2005 for (i = 0, dev = dev_base; dev && i < pos; ++i, dev = dev->next);
2007 return i == pos ? dev : NULL;
2010 void *dev_seq_start(struct seq_file *seq, loff_t *pos)
2012 read_lock(&dev_base_lock);
2013 return *pos ? dev_get_idx(*pos - 1) : SEQ_START_TOKEN;
2016 void *dev_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2018 ++*pos;
2019 return v == SEQ_START_TOKEN ? dev_base : ((struct net_device *)v)->next;
2022 void dev_seq_stop(struct seq_file *seq, void *v)
2024 read_unlock(&dev_base_lock);
2027 static void dev_seq_printf_stats(struct seq_file *seq, struct net_device *dev)
2029 if (dev->get_stats) {
2030 struct net_device_stats *stats = dev->get_stats(dev);
2032 seq_printf(seq, "%6s:%8lu %7lu %4lu %4lu %4lu %5lu %10lu %9lu "
2033 "%8lu %7lu %4lu %4lu %4lu %5lu %7lu %10lu\n",
2034 dev->name, stats->rx_bytes, stats->rx_packets,
2035 stats->rx_errors,
2036 stats->rx_dropped + stats->rx_missed_errors,
2037 stats->rx_fifo_errors,
2038 stats->rx_length_errors + stats->rx_over_errors +
2039 stats->rx_crc_errors + stats->rx_frame_errors,
2040 stats->rx_compressed, stats->multicast,
2041 stats->tx_bytes, stats->tx_packets,
2042 stats->tx_errors, stats->tx_dropped,
2043 stats->tx_fifo_errors, stats->collisions,
2044 stats->tx_carrier_errors +
2045 stats->tx_aborted_errors +
2046 stats->tx_window_errors +
2047 stats->tx_heartbeat_errors,
2048 stats->tx_compressed);
2049 } else
2050 seq_printf(seq, "%6s: No statistics available.\n", dev->name);
2053 /*
2054 * Called from the PROCfs module. This now uses the new arbitrary sized
2055 * /proc/net interface to create /proc/net/dev
2056 */
2057 static int dev_seq_show(struct seq_file *seq, void *v)
2059 if (v == SEQ_START_TOKEN)
2060 seq_puts(seq, "Inter-| Receive "
2061 " | Transmit\n"
2062 " face |bytes packets errs drop fifo frame "
2063 "compressed multicast|bytes packets errs "
2064 "drop fifo colls carrier compressed\n");
2065 else
2066 dev_seq_printf_stats(seq, v);
2067 return 0;
2070 static struct netif_rx_stats *softnet_get_online(loff_t *pos)
2072 struct netif_rx_stats *rc = NULL;
2074 while (*pos < NR_CPUS)
2075 if (cpu_online(*pos)) {
2076 rc = &per_cpu(netdev_rx_stat, *pos);
2077 break;
2078 } else
2079 ++*pos;
2080 return rc;
2083 static void *softnet_seq_start(struct seq_file *seq, loff_t *pos)
2085 return softnet_get_online(pos);
2088 static void *softnet_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2090 ++*pos;
2091 return softnet_get_online(pos);
2094 static void softnet_seq_stop(struct seq_file *seq, void *v)
2098 static int softnet_seq_show(struct seq_file *seq, void *v)
2100 struct netif_rx_stats *s = v;
2102 seq_printf(seq, "%08x %08x %08x %08x %08x %08x %08x %08x %08x\n",
2103 s->total, s->dropped, s->time_squeeze, s->throttled,
2104 s->fastroute_hit, s->fastroute_success, s->fastroute_defer,
2105 s->fastroute_deferred_out,
2106 #if 0
2107 s->fastroute_latency_reduction
2108 #else
2109 s->cpu_collision
2110 #endif
2111 );
2112 return 0;
2115 static struct seq_operations dev_seq_ops = {
2116 .start = dev_seq_start,
2117 .next = dev_seq_next,
2118 .stop = dev_seq_stop,
2119 .show = dev_seq_show,
2120 };
2122 static int dev_seq_open(struct inode *inode, struct file *file)
2124 return seq_open(file, &dev_seq_ops);
2127 static struct file_operations dev_seq_fops = {
2128 .owner = THIS_MODULE,
2129 .open = dev_seq_open,
2130 .read = seq_read,
2131 .llseek = seq_lseek,
2132 .release = seq_release,
2133 };
2135 static struct seq_operations softnet_seq_ops = {
2136 .start = softnet_seq_start,
2137 .next = softnet_seq_next,
2138 .stop = softnet_seq_stop,
2139 .show = softnet_seq_show,
2140 };
2142 static int softnet_seq_open(struct inode *inode, struct file *file)
2144 return seq_open(file, &softnet_seq_ops);
2147 static struct file_operations softnet_seq_fops = {
2148 .owner = THIS_MODULE,
2149 .open = softnet_seq_open,
2150 .read = seq_read,
2151 .llseek = seq_lseek,
2152 .release = seq_release,
2153 };
2155 #ifdef WIRELESS_EXT
2156 extern int wireless_proc_init(void);
2157 #else
2158 #define wireless_proc_init() 0
2159 #endif
2161 static int __init dev_proc_init(void)
2163 int rc = -ENOMEM;
2165 if (!proc_net_fops_create("dev", S_IRUGO, &dev_seq_fops))
2166 goto out;
2167 if (!proc_net_fops_create("softnet_stat", S_IRUGO, &softnet_seq_fops))
2168 goto out_dev;
2169 if (wireless_proc_init())
2170 goto out_softnet;
2171 rc = 0;
2172 out:
2173 return rc;
2174 out_softnet:
2175 proc_net_remove("softnet_stat");
2176 out_dev:
2177 proc_net_remove("dev");
2178 goto out;
2180 #else
2181 #define dev_proc_init() 0
2182 #endif /* CONFIG_PROC_FS */
2185 /**
2186 * netdev_set_master - set up master/slave pair
2187 * @slave: slave device
2188 * @master: new master device
2190 * Changes the master device of the slave. Pass %NULL to break the
2191 * bonding. The caller must hold the RTNL semaphore. On a failure
2192 * a negative errno code is returned. On success the reference counts
2193 * are adjusted, %RTM_NEWLINK is sent to the routing socket and the
2194 * function returns zero.
2195 */
2196 int netdev_set_master(struct net_device *slave, struct net_device *master)
2198 struct net_device *old = slave->master;
2200 ASSERT_RTNL();
2202 if (master) {
2203 if (old)
2204 return -EBUSY;
2205 dev_hold(master);
2208 slave->master = master;
2210 synchronize_net();
2212 if (old)
2213 dev_put(old);
2215 if (master)
2216 slave->flags |= IFF_SLAVE;
2217 else
2218 slave->flags &= ~IFF_SLAVE;
2220 rtmsg_ifinfo(RTM_NEWLINK, slave, IFF_SLAVE);
2221 return 0;
2224 /**
2225 * dev_set_promiscuity - update promiscuity count on a device
2226 * @dev: device
2227 * @inc: modifier
2229 * Add or remove promsicuity from a device. While the count in the device
2230 * remains above zero the interface remains promiscuous. Once it hits zero
2231 * the device reverts back to normal filtering operation. A negative inc
2232 * value is used to drop promiscuity on the device.
2233 */
2234 void dev_set_promiscuity(struct net_device *dev, int inc)
2236 unsigned short old_flags = dev->flags;
2238 dev->flags |= IFF_PROMISC;
2239 if ((dev->promiscuity += inc) == 0)
2240 dev->flags &= ~IFF_PROMISC;
2241 if (dev->flags ^ old_flags) {
2242 dev_mc_upload(dev);
2243 printk(KERN_INFO "device %s %s promiscuous mode\n",
2244 dev->name, (dev->flags & IFF_PROMISC) ? "entered" :
2245 "left");
2249 /**
2250 * dev_set_allmulti - update allmulti count on a device
2251 * @dev: device
2252 * @inc: modifier
2254 * Add or remove reception of all multicast frames to a device. While the
2255 * count in the device remains above zero the interface remains listening
2256 * to all interfaces. Once it hits zero the device reverts back to normal
2257 * filtering operation. A negative @inc value is used to drop the counter
2258 * when releasing a resource needing all multicasts.
2259 */
2261 void dev_set_allmulti(struct net_device *dev, int inc)
2263 unsigned short old_flags = dev->flags;
2265 dev->flags |= IFF_ALLMULTI;
2266 if ((dev->allmulti += inc) == 0)
2267 dev->flags &= ~IFF_ALLMULTI;
2268 if (dev->flags ^ old_flags)
2269 dev_mc_upload(dev);
2272 unsigned dev_get_flags(const struct net_device *dev)
2274 unsigned flags;
2276 flags = (dev->flags & ~(IFF_PROMISC |
2277 IFF_ALLMULTI |
2278 IFF_RUNNING)) |
2279 (dev->gflags & (IFF_PROMISC |
2280 IFF_ALLMULTI));
2282 if (netif_running(dev) && netif_carrier_ok(dev))
2283 flags |= IFF_RUNNING;
2285 return flags;
2288 int dev_change_flags(struct net_device *dev, unsigned flags)
2290 int ret;
2291 int old_flags = dev->flags;
2293 /*
2294 * Set the flags on our device.
2295 */
2297 dev->flags = (flags & (IFF_DEBUG | IFF_NOTRAILERS | IFF_NOARP |
2298 IFF_DYNAMIC | IFF_MULTICAST | IFF_PORTSEL |
2299 IFF_AUTOMEDIA)) |
2300 (dev->flags & (IFF_UP | IFF_VOLATILE | IFF_PROMISC |
2301 IFF_ALLMULTI));
2303 /*
2304 * Load in the correct multicast list now the flags have changed.
2305 */
2307 dev_mc_upload(dev);
2309 /*
2310 * Have we downed the interface. We handle IFF_UP ourselves
2311 * according to user attempts to set it, rather than blindly
2312 * setting it.
2313 */
2315 ret = 0;
2316 if ((old_flags ^ flags) & IFF_UP) { /* Bit is different ? */
2317 ret = ((old_flags & IFF_UP) ? dev_close : dev_open)(dev);
2319 if (!ret)
2320 dev_mc_upload(dev);
2323 if (dev->flags & IFF_UP &&
2324 ((old_flags ^ dev->flags) &~ (IFF_UP | IFF_PROMISC | IFF_ALLMULTI |
2325 IFF_VOLATILE)))
2326 notifier_call_chain(&netdev_chain, NETDEV_CHANGE, dev);
2328 if ((flags ^ dev->gflags) & IFF_PROMISC) {
2329 int inc = (flags & IFF_PROMISC) ? +1 : -1;
2330 dev->gflags ^= IFF_PROMISC;
2331 dev_set_promiscuity(dev, inc);
2334 /* NOTE: order of synchronization of IFF_PROMISC and IFF_ALLMULTI
2335 is important. Some (broken) drivers set IFF_PROMISC, when
2336 IFF_ALLMULTI is requested not asking us and not reporting.
2337 */
2338 if ((flags ^ dev->gflags) & IFF_ALLMULTI) {
2339 int inc = (flags & IFF_ALLMULTI) ? +1 : -1;
2340 dev->gflags ^= IFF_ALLMULTI;
2341 dev_set_allmulti(dev, inc);
2344 if (old_flags ^ dev->flags)
2345 rtmsg_ifinfo(RTM_NEWLINK, dev, old_flags ^ dev->flags);
2347 return ret;
2350 int dev_set_mtu(struct net_device *dev, int new_mtu)
2352 int err;
2354 if (new_mtu == dev->mtu)
2355 return 0;
2357 /* MTU must be positive. */
2358 if (new_mtu < 0)
2359 return -EINVAL;
2361 if (!netif_device_present(dev))
2362 return -ENODEV;
2364 err = 0;
2365 if (dev->change_mtu)
2366 err = dev->change_mtu(dev, new_mtu);
2367 else
2368 dev->mtu = new_mtu;
2369 if (!err && dev->flags & IFF_UP)
2370 notifier_call_chain(&netdev_chain,
2371 NETDEV_CHANGEMTU, dev);
2372 return err;
2375 int dev_set_mac_address(struct net_device *dev, struct sockaddr *sa)
2377 int err;
2379 if (!dev->set_mac_address)
2380 return -EOPNOTSUPP;
2381 if (sa->sa_family != dev->type)
2382 return -EINVAL;
2383 if (!netif_device_present(dev))
2384 return -ENODEV;
2385 err = dev->set_mac_address(dev, sa);
2386 if (!err)
2387 notifier_call_chain(&netdev_chain, NETDEV_CHANGEADDR, dev);
2388 return err;
2391 /*
2392 * Perform the SIOCxIFxxx calls.
2393 */
2394 static int dev_ifsioc(struct ifreq *ifr, unsigned int cmd)
2396 int err;
2397 struct net_device *dev = __dev_get_by_name(ifr->ifr_name);
2399 if (!dev)
2400 return -ENODEV;
2402 switch (cmd) {
2403 case SIOCGIFFLAGS: /* Get interface flags */
2404 ifr->ifr_flags = dev_get_flags(dev);
2405 return 0;
2407 case SIOCSIFFLAGS: /* Set interface flags */
2408 return dev_change_flags(dev, ifr->ifr_flags);
2410 case SIOCGIFMETRIC: /* Get the metric on the interface
2411 (currently unused) */
2412 ifr->ifr_metric = 0;
2413 return 0;
2415 case SIOCSIFMETRIC: /* Set the metric on the interface
2416 (currently unused) */
2417 return -EOPNOTSUPP;
2419 case SIOCGIFMTU: /* Get the MTU of a device */
2420 ifr->ifr_mtu = dev->mtu;
2421 return 0;
2423 case SIOCSIFMTU: /* Set the MTU of a device */
2424 return dev_set_mtu(dev, ifr->ifr_mtu);
2426 case SIOCGIFHWADDR:
2427 if (!dev->addr_len)
2428 memset(ifr->ifr_hwaddr.sa_data, 0, sizeof ifr->ifr_hwaddr.sa_data);
2429 else
2430 memcpy(ifr->ifr_hwaddr.sa_data, dev->dev_addr,
2431 min(sizeof ifr->ifr_hwaddr.sa_data, (size_t) dev->addr_len));
2432 ifr->ifr_hwaddr.sa_family = dev->type;
2433 return 0;
2435 case SIOCSIFHWADDR:
2436 return dev_set_mac_address(dev, &ifr->ifr_hwaddr);
2438 case SIOCSIFHWBROADCAST:
2439 if (ifr->ifr_hwaddr.sa_family != dev->type)
2440 return -EINVAL;
2441 memcpy(dev->broadcast, ifr->ifr_hwaddr.sa_data,
2442 min(sizeof ifr->ifr_hwaddr.sa_data, (size_t) dev->addr_len));
2443 notifier_call_chain(&netdev_chain,
2444 NETDEV_CHANGEADDR, dev);
2445 return 0;
2447 case SIOCGIFMAP:
2448 ifr->ifr_map.mem_start = dev->mem_start;
2449 ifr->ifr_map.mem_end = dev->mem_end;
2450 ifr->ifr_map.base_addr = dev->base_addr;
2451 ifr->ifr_map.irq = dev->irq;
2452 ifr->ifr_map.dma = dev->dma;
2453 ifr->ifr_map.port = dev->if_port;
2454 return 0;
2456 case SIOCSIFMAP:
2457 if (dev->set_config) {
2458 if (!netif_device_present(dev))
2459 return -ENODEV;
2460 return dev->set_config(dev, &ifr->ifr_map);
2462 return -EOPNOTSUPP;
2464 case SIOCADDMULTI:
2465 if (!dev->set_multicast_list ||
2466 ifr->ifr_hwaddr.sa_family != AF_UNSPEC)
2467 return -EINVAL;
2468 if (!netif_device_present(dev))
2469 return -ENODEV;
2470 return dev_mc_add(dev, ifr->ifr_hwaddr.sa_data,
2471 dev->addr_len, 1);
2473 case SIOCDELMULTI:
2474 if (!dev->set_multicast_list ||
2475 ifr->ifr_hwaddr.sa_family != AF_UNSPEC)
2476 return -EINVAL;
2477 if (!netif_device_present(dev))
2478 return -ENODEV;
2479 return dev_mc_delete(dev, ifr->ifr_hwaddr.sa_data,
2480 dev->addr_len, 1);
2482 case SIOCGIFINDEX:
2483 ifr->ifr_ifindex = dev->ifindex;
2484 return 0;
2486 case SIOCGIFTXQLEN:
2487 ifr->ifr_qlen = dev->tx_queue_len;
2488 return 0;
2490 case SIOCSIFTXQLEN:
2491 if (ifr->ifr_qlen < 0)
2492 return -EINVAL;
2493 dev->tx_queue_len = ifr->ifr_qlen;
2494 return 0;
2496 case SIOCSIFNAME:
2497 ifr->ifr_newname[IFNAMSIZ-1] = '\0';
2498 return dev_change_name(dev, ifr->ifr_newname);
2500 /*
2501 * Unknown or private ioctl
2502 */
2504 default:
2505 if ((cmd >= SIOCDEVPRIVATE &&
2506 cmd <= SIOCDEVPRIVATE + 15) ||
2507 cmd == SIOCBONDENSLAVE ||
2508 cmd == SIOCBONDRELEASE ||
2509 cmd == SIOCBONDSETHWADDR ||
2510 cmd == SIOCBONDSLAVEINFOQUERY ||
2511 cmd == SIOCBONDINFOQUERY ||
2512 cmd == SIOCBONDCHANGEACTIVE ||
2513 cmd == SIOCGMIIPHY ||
2514 cmd == SIOCGMIIREG ||
2515 cmd == SIOCSMIIREG ||
2516 cmd == SIOCBRADDIF ||
2517 cmd == SIOCBRDELIF ||
2518 cmd == SIOCWANDEV) {
2519 err = -EOPNOTSUPP;
2520 if (dev->do_ioctl) {
2521 if (netif_device_present(dev))
2522 err = dev->do_ioctl(dev, ifr,
2523 cmd);
2524 else
2525 err = -ENODEV;
2527 } else
2528 err = -EINVAL;
2531 return err;
2534 /*
2535 * This function handles all "interface"-type I/O control requests. The actual
2536 * 'doing' part of this is dev_ifsioc above.
2537 */
2539 /**
2540 * dev_ioctl - network device ioctl
2541 * @cmd: command to issue
2542 * @arg: pointer to a struct ifreq in user space
2544 * Issue ioctl functions to devices. This is normally called by the
2545 * user space syscall interfaces but can sometimes be useful for
2546 * other purposes. The return value is the return from the syscall if
2547 * positive or a negative errno code on error.
2548 */
2550 int dev_ioctl(unsigned int cmd, void __user *arg)
2552 struct ifreq ifr;
2553 int ret;
2554 char *colon;
2556 /* One special case: SIOCGIFCONF takes ifconf argument
2557 and requires shared lock, because it sleeps writing
2558 to user space.
2559 */
2561 if (cmd == SIOCGIFCONF) {
2562 rtnl_shlock();
2563 ret = dev_ifconf((char __user *) arg);
2564 rtnl_shunlock();
2565 return ret;
2567 if (cmd == SIOCGIFNAME)
2568 return dev_ifname((struct ifreq __user *)arg);
2570 if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
2571 return -EFAULT;
2573 ifr.ifr_name[IFNAMSIZ-1] = 0;
2575 colon = strchr(ifr.ifr_name, ':');
2576 if (colon)
2577 *colon = 0;
2579 /*
2580 * See which interface the caller is talking about.
2581 */
2583 switch (cmd) {
2584 /*
2585 * These ioctl calls:
2586 * - can be done by all.
2587 * - atomic and do not require locking.
2588 * - return a value
2589 */
2590 case SIOCGIFFLAGS:
2591 case SIOCGIFMETRIC:
2592 case SIOCGIFMTU:
2593 case SIOCGIFHWADDR:
2594 case SIOCGIFSLAVE:
2595 case SIOCGIFMAP:
2596 case SIOCGIFINDEX:
2597 case SIOCGIFTXQLEN:
2598 dev_load(ifr.ifr_name);
2599 read_lock(&dev_base_lock);
2600 ret = dev_ifsioc(&ifr, cmd);
2601 read_unlock(&dev_base_lock);
2602 if (!ret) {
2603 if (colon)
2604 *colon = ':';
2605 if (copy_to_user(arg, &ifr,
2606 sizeof(struct ifreq)))
2607 ret = -EFAULT;
2609 return ret;
2611 case SIOCETHTOOL:
2612 dev_load(ifr.ifr_name);
2613 rtnl_lock();
2614 ret = dev_ethtool(&ifr);
2615 rtnl_unlock();
2616 if (!ret) {
2617 if (colon)
2618 *colon = ':';
2619 if (copy_to_user(arg, &ifr,
2620 sizeof(struct ifreq)))
2621 ret = -EFAULT;
2623 return ret;
2625 /*
2626 * These ioctl calls:
2627 * - require superuser power.
2628 * - require strict serialization.
2629 * - return a value
2630 */
2631 case SIOCGMIIPHY:
2632 case SIOCGMIIREG:
2633 case SIOCSIFNAME:
2634 if (!capable(CAP_NET_ADMIN))
2635 return -EPERM;
2636 dev_load(ifr.ifr_name);
2637 rtnl_lock();
2638 ret = dev_ifsioc(&ifr, cmd);
2639 rtnl_unlock();
2640 if (!ret) {
2641 if (colon)
2642 *colon = ':';
2643 if (copy_to_user(arg, &ifr,
2644 sizeof(struct ifreq)))
2645 ret = -EFAULT;
2647 return ret;
2649 /*
2650 * These ioctl calls:
2651 * - require superuser power.
2652 * - require strict serialization.
2653 * - do not return a value
2654 */
2655 case SIOCSIFFLAGS:
2656 case SIOCSIFMETRIC:
2657 case SIOCSIFMTU:
2658 case SIOCSIFMAP:
2659 case SIOCSIFHWADDR:
2660 case SIOCSIFSLAVE:
2661 case SIOCADDMULTI:
2662 case SIOCDELMULTI:
2663 case SIOCSIFHWBROADCAST:
2664 case SIOCSIFTXQLEN:
2665 case SIOCSMIIREG:
2666 case SIOCBONDENSLAVE:
2667 case SIOCBONDRELEASE:
2668 case SIOCBONDSETHWADDR:
2669 case SIOCBONDSLAVEINFOQUERY:
2670 case SIOCBONDINFOQUERY:
2671 case SIOCBONDCHANGEACTIVE:
2672 case SIOCBRADDIF:
2673 case SIOCBRDELIF:
2674 if (!capable(CAP_NET_ADMIN))
2675 return -EPERM;
2676 dev_load(ifr.ifr_name);
2677 rtnl_lock();
2678 ret = dev_ifsioc(&ifr, cmd);
2679 rtnl_unlock();
2680 return ret;
2682 case SIOCGIFMEM:
2683 /* Get the per device memory space. We can add this but
2684 * currently do not support it */
2685 case SIOCSIFMEM:
2686 /* Set the per device memory buffer space.
2687 * Not applicable in our case */
2688 case SIOCSIFLINK:
2689 return -EINVAL;
2691 /*
2692 * Unknown or private ioctl.
2693 */
2694 default:
2695 if (cmd == SIOCWANDEV ||
2696 (cmd >= SIOCDEVPRIVATE &&
2697 cmd <= SIOCDEVPRIVATE + 15)) {
2698 dev_load(ifr.ifr_name);
2699 rtnl_lock();
2700 ret = dev_ifsioc(&ifr, cmd);
2701 rtnl_unlock();
2702 if (!ret && copy_to_user(arg, &ifr,
2703 sizeof(struct ifreq)))
2704 ret = -EFAULT;
2705 return ret;
2707 #ifdef WIRELESS_EXT
2708 /* Take care of Wireless Extensions */
2709 if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) {
2710 /* If command is `set a parameter', or
2711 * `get the encoding parameters', check if
2712 * the user has the right to do it */
2713 if (IW_IS_SET(cmd) || cmd == SIOCGIWENCODE) {
2714 if (!capable(CAP_NET_ADMIN))
2715 return -EPERM;
2717 dev_load(ifr.ifr_name);
2718 rtnl_lock();
2719 /* Follow me in net/core/wireless.c */
2720 ret = wireless_process_ioctl(&ifr, cmd);
2721 rtnl_unlock();
2722 if (IW_IS_GET(cmd) &&
2723 copy_to_user(arg, &ifr,
2724 sizeof(struct ifreq)))
2725 ret = -EFAULT;
2726 return ret;
2728 #endif /* WIRELESS_EXT */
2729 return -EINVAL;
2734 /**
2735 * dev_new_index - allocate an ifindex
2737 * Returns a suitable unique value for a new device interface
2738 * number. The caller must hold the rtnl semaphore or the
2739 * dev_base_lock to be sure it remains unique.
2740 */
2741 static int dev_new_index(void)
2743 static int ifindex;
2744 for (;;) {
2745 if (++ifindex <= 0)
2746 ifindex = 1;
2747 if (!__dev_get_by_index(ifindex))
2748 return ifindex;
2752 static int dev_boot_phase = 1;
2754 /* Delayed registration/unregisteration */
2755 static DEFINE_SPINLOCK(net_todo_list_lock);
2756 static struct list_head net_todo_list = LIST_HEAD_INIT(net_todo_list);
2758 static inline void net_set_todo(struct net_device *dev)
2760 spin_lock(&net_todo_list_lock);
2761 list_add_tail(&dev->todo_list, &net_todo_list);
2762 spin_unlock(&net_todo_list_lock);
2765 /**
2766 * register_netdevice - register a network device
2767 * @dev: device to register
2769 * Take a completed network device structure and add it to the kernel
2770 * interfaces. A %NETDEV_REGISTER message is sent to the netdev notifier
2771 * chain. 0 is returned on success. A negative errno code is returned
2772 * on a failure to set up the device, or if the name is a duplicate.
2774 * Callers must hold the rtnl semaphore. You may want
2775 * register_netdev() instead of this.
2777 * BUGS:
2778 * The locking appears insufficient to guarantee two parallel registers
2779 * will not get the same name.
2780 */
2782 int register_netdevice(struct net_device *dev)
2784 struct hlist_head *head;
2785 struct hlist_node *p;
2786 int ret;
2788 BUG_ON(dev_boot_phase);
2789 ASSERT_RTNL();
2791 /* When net_device's are persistent, this will be fatal. */
2792 BUG_ON(dev->reg_state != NETREG_UNINITIALIZED);
2794 spin_lock_init(&dev->queue_lock);
2795 spin_lock_init(&dev->xmit_lock);
2796 dev->xmit_lock_owner = -1;
2797 #ifdef CONFIG_NET_CLS_ACT
2798 spin_lock_init(&dev->ingress_lock);
2799 #endif
2801 ret = alloc_divert_blk(dev);
2802 if (ret)
2803 goto out;
2805 dev->iflink = -1;
2807 /* Init, if this function is available */
2808 if (dev->init) {
2809 ret = dev->init(dev);
2810 if (ret) {
2811 if (ret > 0)
2812 ret = -EIO;
2813 goto out_err;
2817 if (!dev_valid_name(dev->name)) {
2818 ret = -EINVAL;
2819 goto out_err;
2822 dev->ifindex = dev_new_index();
2823 if (dev->iflink == -1)
2824 dev->iflink = dev->ifindex;
2826 /* Check for existence of name */
2827 head = dev_name_hash(dev->name);
2828 hlist_for_each(p, head) {
2829 struct net_device *d
2830 = hlist_entry(p, struct net_device, name_hlist);
2831 if (!strncmp(d->name, dev->name, IFNAMSIZ)) {
2832 ret = -EEXIST;
2833 goto out_err;
2837 /* Fix illegal SG+CSUM combinations. */
2838 if ((dev->features & NETIF_F_SG) &&
2839 !(dev->features & (NETIF_F_IP_CSUM |
2840 NETIF_F_NO_CSUM |
2841 NETIF_F_HW_CSUM))) {
2842 printk("%s: Dropping NETIF_F_SG since no checksum feature.\n",
2843 dev->name);
2844 dev->features &= ~NETIF_F_SG;
2847 /* TSO requires that SG is present as well. */
2848 if ((dev->features & NETIF_F_TSO) &&
2849 !(dev->features & NETIF_F_SG)) {
2850 printk("%s: Dropping NETIF_F_TSO since no SG feature.\n",
2851 dev->name);
2852 dev->features &= ~NETIF_F_TSO;
2855 /*
2856 * nil rebuild_header routine,
2857 * that should be never called and used as just bug trap.
2858 */
2860 if (!dev->rebuild_header)
2861 dev->rebuild_header = default_rebuild_header;
2863 /*
2864 * Default initial state at registry is that the
2865 * device is present.
2866 */
2868 set_bit(__LINK_STATE_PRESENT, &dev->state);
2870 dev->next = NULL;
2871 dev_init_scheduler(dev);
2872 write_lock_bh(&dev_base_lock);
2873 *dev_tail = dev;
2874 dev_tail = &dev->next;
2875 hlist_add_head(&dev->name_hlist, head);
2876 hlist_add_head(&dev->index_hlist, dev_index_hash(dev->ifindex));
2877 dev_hold(dev);
2878 dev->reg_state = NETREG_REGISTERING;
2879 write_unlock_bh(&dev_base_lock);
2881 /* Notify protocols, that a new device appeared. */
2882 notifier_call_chain(&netdev_chain, NETDEV_REGISTER, dev);
2884 /* Finish registration after unlock */
2885 net_set_todo(dev);
2886 ret = 0;
2888 out:
2889 return ret;
2890 out_err:
2891 free_divert_blk(dev);
2892 goto out;
2895 /**
2896 * register_netdev - register a network device
2897 * @dev: device to register
2899 * Take a completed network device structure and add it to the kernel
2900 * interfaces. A %NETDEV_REGISTER message is sent to the netdev notifier
2901 * chain. 0 is returned on success. A negative errno code is returned
2902 * on a failure to set up the device, or if the name is a duplicate.
2904 * This is a wrapper around register_netdev that takes the rtnl semaphore
2905 * and expands the device name if you passed a format string to
2906 * alloc_netdev.
2907 */
2908 int register_netdev(struct net_device *dev)
2910 int err;
2912 rtnl_lock();
2914 /*
2915 * If the name is a format string the caller wants us to do a
2916 * name allocation.
2917 */
2918 if (strchr(dev->name, '%')) {
2919 err = dev_alloc_name(dev, dev->name);
2920 if (err < 0)
2921 goto out;
2924 /*
2925 * Back compatibility hook. Kill this one in 2.5
2926 */
2927 if (dev->name[0] == 0 || dev->name[0] == ' ') {
2928 err = dev_alloc_name(dev, "eth%d");
2929 if (err < 0)
2930 goto out;
2933 err = register_netdevice(dev);
2934 out:
2935 rtnl_unlock();
2936 return err;
2938 EXPORT_SYMBOL(register_netdev);
2940 /*
2941 * netdev_wait_allrefs - wait until all references are gone.
2943 * This is called when unregistering network devices.
2945 * Any protocol or device that holds a reference should register
2946 * for netdevice notification, and cleanup and put back the
2947 * reference if they receive an UNREGISTER event.
2948 * We can get stuck here if buggy protocols don't correctly
2949 * call dev_put.
2950 */
2951 static void netdev_wait_allrefs(struct net_device *dev)
2953 unsigned long rebroadcast_time, warning_time;
2955 rebroadcast_time = warning_time = jiffies;
2956 while (atomic_read(&dev->refcnt) != 0) {
2957 if (time_after(jiffies, rebroadcast_time + 1 * HZ)) {
2958 rtnl_shlock();
2960 /* Rebroadcast unregister notification */
2961 notifier_call_chain(&netdev_chain,
2962 NETDEV_UNREGISTER, dev);
2964 if (test_bit(__LINK_STATE_LINKWATCH_PENDING,
2965 &dev->state)) {
2966 /* We must not have linkwatch events
2967 * pending on unregister. If this
2968 * happens, we simply run the queue
2969 * unscheduled, resulting in a noop
2970 * for this device.
2971 */
2972 linkwatch_run_queue();
2975 rtnl_shunlock();
2977 rebroadcast_time = jiffies;
2980 msleep(250);
2982 if (time_after(jiffies, warning_time + 10 * HZ)) {
2983 printk(KERN_EMERG "unregister_netdevice: "
2984 "waiting for %s to become free. Usage "
2985 "count = %d\n",
2986 dev->name, atomic_read(&dev->refcnt));
2987 warning_time = jiffies;
2992 /* The sequence is:
2994 * rtnl_lock();
2995 * ...
2996 * register_netdevice(x1);
2997 * register_netdevice(x2);
2998 * ...
2999 * unregister_netdevice(y1);
3000 * unregister_netdevice(y2);
3001 * ...
3002 * rtnl_unlock();
3003 * free_netdev(y1);
3004 * free_netdev(y2);
3006 * We are invoked by rtnl_unlock() after it drops the semaphore.
3007 * This allows us to deal with problems:
3008 * 1) We can create/delete sysfs objects which invoke hotplug
3009 * without deadlocking with linkwatch via keventd.
3010 * 2) Since we run with the RTNL semaphore not held, we can sleep
3011 * safely in order to wait for the netdev refcnt to drop to zero.
3012 */
3013 static DECLARE_MUTEX(net_todo_run_mutex);
3014 void netdev_run_todo(void)
3016 struct list_head list = LIST_HEAD_INIT(list);
3017 int err;
3020 /* Need to guard against multiple cpu's getting out of order. */
3021 down(&net_todo_run_mutex);
3023 /* Not safe to do outside the semaphore. We must not return
3024 * until all unregister events invoked by the local processor
3025 * have been completed (either by this todo run, or one on
3026 * another cpu).
3027 */
3028 if (list_empty(&net_todo_list))
3029 goto out;
3031 /* Snapshot list, allow later requests */
3032 spin_lock(&net_todo_list_lock);
3033 list_splice_init(&net_todo_list, &list);
3034 spin_unlock(&net_todo_list_lock);
3036 while (!list_empty(&list)) {
3037 struct net_device *dev
3038 = list_entry(list.next, struct net_device, todo_list);
3039 list_del(&dev->todo_list);
3041 switch(dev->reg_state) {
3042 case NETREG_REGISTERING:
3043 err = netdev_register_sysfs(dev);
3044 if (err)
3045 printk(KERN_ERR "%s: failed sysfs registration (%d)\n",
3046 dev->name, err);
3047 dev->reg_state = NETREG_REGISTERED;
3048 break;
3050 case NETREG_UNREGISTERING:
3051 netdev_unregister_sysfs(dev);
3052 dev->reg_state = NETREG_UNREGISTERED;
3054 netdev_wait_allrefs(dev);
3056 /* paranoia */
3057 BUG_ON(atomic_read(&dev->refcnt));
3058 BUG_TRAP(!dev->ip_ptr);
3059 BUG_TRAP(!dev->ip6_ptr);
3060 BUG_TRAP(!dev->dn_ptr);
3063 /* It must be the very last action,
3064 * after this 'dev' may point to freed up memory.
3065 */
3066 if (dev->destructor)
3067 dev->destructor(dev);
3068 break;
3070 default:
3071 printk(KERN_ERR "network todo '%s' but state %d\n",
3072 dev->name, dev->reg_state);
3073 break;
3077 out:
3078 up(&net_todo_run_mutex);
3081 /**
3082 * alloc_netdev - allocate network device
3083 * @sizeof_priv: size of private data to allocate space for
3084 * @name: device name format string
3085 * @setup: callback to initialize device
3087 * Allocates a struct net_device with private data area for driver use
3088 * and performs basic initialization.
3089 */
3090 struct net_device *alloc_netdev(int sizeof_priv, const char *name,
3091 void (*setup)(struct net_device *))
3093 void *p;
3094 struct net_device *dev;
3095 int alloc_size;
3097 /* ensure 32-byte alignment of both the device and private area */
3098 alloc_size = (sizeof(*dev) + NETDEV_ALIGN_CONST) & ~NETDEV_ALIGN_CONST;
3099 alloc_size += sizeof_priv + NETDEV_ALIGN_CONST;
3101 p = kmalloc(alloc_size, GFP_KERNEL);
3102 if (!p) {
3103 printk(KERN_ERR "alloc_dev: Unable to allocate device.\n");
3104 return NULL;
3106 memset(p, 0, alloc_size);
3108 dev = (struct net_device *)
3109 (((long)p + NETDEV_ALIGN_CONST) & ~NETDEV_ALIGN_CONST);
3110 dev->padded = (char *)dev - (char *)p;
3112 if (sizeof_priv)
3113 dev->priv = netdev_priv(dev);
3115 setup(dev);
3116 strcpy(dev->name, name);
3117 return dev;
3119 EXPORT_SYMBOL(alloc_netdev);
3121 /**
3122 * free_netdev - free network device
3123 * @dev: device
3125 * This function does the last stage of destroying an allocated device
3126 * interface. The reference to the device object is released.
3127 * If this is the last reference then it will be freed.
3128 */
3129 void free_netdev(struct net_device *dev)
3131 #ifdef CONFIG_SYSFS
3132 /* Compatiablity with error handling in drivers */
3133 if (dev->reg_state == NETREG_UNINITIALIZED) {
3134 kfree((char *)dev - dev->padded);
3135 return;
3138 BUG_ON(dev->reg_state != NETREG_UNREGISTERED);
3139 dev->reg_state = NETREG_RELEASED;
3141 /* will free via class release */
3142 class_device_put(&dev->class_dev);
3143 #else
3144 kfree((char *)dev - dev->padded);
3145 #endif
3148 /* Synchronize with packet receive processing. */
3149 void synchronize_net(void)
3151 might_sleep();
3152 synchronize_rcu();
3155 /**
3156 * unregister_netdevice - remove device from the kernel
3157 * @dev: device
3159 * This function shuts down a device interface and removes it
3160 * from the kernel tables. On success 0 is returned, on a failure
3161 * a negative errno code is returned.
3163 * Callers must hold the rtnl semaphore. You may want
3164 * unregister_netdev() instead of this.
3165 */
3167 int unregister_netdevice(struct net_device *dev)
3169 struct net_device *d, **dp;
3171 BUG_ON(dev_boot_phase);
3172 ASSERT_RTNL();
3174 /* Some devices call without registering for initialization unwind. */
3175 if (dev->reg_state == NETREG_UNINITIALIZED) {
3176 printk(KERN_DEBUG "unregister_netdevice: device %s/%p never "
3177 "was registered\n", dev->name, dev);
3178 return -ENODEV;
3181 BUG_ON(dev->reg_state != NETREG_REGISTERED);
3183 /* If device is running, close it first. */
3184 if (dev->flags & IFF_UP)
3185 dev_close(dev);
3187 /* And unlink it from device chain. */
3188 for (dp = &dev_base; (d = *dp) != NULL; dp = &d->next) {
3189 if (d == dev) {
3190 write_lock_bh(&dev_base_lock);
3191 hlist_del(&dev->name_hlist);
3192 hlist_del(&dev->index_hlist);
3193 if (dev_tail == &dev->next)
3194 dev_tail = dp;
3195 *dp = d->next;
3196 write_unlock_bh(&dev_base_lock);
3197 break;
3200 if (!d) {
3201 printk(KERN_ERR "unregister net_device: '%s' not found\n",
3202 dev->name);
3203 return -ENODEV;
3206 dev->reg_state = NETREG_UNREGISTERING;
3208 synchronize_net();
3210 /* Shutdown queueing discipline. */
3211 dev_shutdown(dev);
3214 /* Notify protocols, that we are about to destroy
3215 this device. They should clean all the things.
3216 */
3217 notifier_call_chain(&netdev_chain, NETDEV_UNREGISTER, dev);
3219 /*
3220 * Flush the multicast chain
3221 */
3222 dev_mc_discard(dev);
3224 if (dev->uninit)
3225 dev->uninit(dev);
3227 /* Notifier chain MUST detach us from master device. */
3228 BUG_TRAP(!dev->master);
3230 free_divert_blk(dev);
3232 /* Finish processing unregister after unlock */
3233 net_set_todo(dev);
3235 synchronize_net();
3237 dev_put(dev);
3238 return 0;
3241 /**
3242 * unregister_netdev - remove device from the kernel
3243 * @dev: device
3245 * This function shuts down a device interface and removes it
3246 * from the kernel tables. On success 0 is returned, on a failure
3247 * a negative errno code is returned.
3249 * This is just a wrapper for unregister_netdevice that takes
3250 * the rtnl semaphore. In general you want to use this and not
3251 * unregister_netdevice.
3252 */
3253 void unregister_netdev(struct net_device *dev)
3255 rtnl_lock();
3256 unregister_netdevice(dev);
3257 rtnl_unlock();
3260 EXPORT_SYMBOL(unregister_netdev);
3262 #ifdef CONFIG_HOTPLUG_CPU
3263 static int dev_cpu_callback(struct notifier_block *nfb,
3264 unsigned long action,
3265 void *ocpu)
3267 struct sk_buff **list_skb;
3268 struct net_device **list_net;
3269 struct sk_buff *skb;
3270 unsigned int cpu, oldcpu = (unsigned long)ocpu;
3271 struct softnet_data *sd, *oldsd;
3273 if (action != CPU_DEAD)
3274 return NOTIFY_OK;
3276 local_irq_disable();
3277 cpu = smp_processor_id();
3278 sd = &per_cpu(softnet_data, cpu);
3279 oldsd = &per_cpu(softnet_data, oldcpu);
3281 /* Find end of our completion_queue. */
3282 list_skb = &sd->completion_queue;
3283 while (*list_skb)
3284 list_skb = &(*list_skb)->next;
3285 /* Append completion queue from offline CPU. */
3286 *list_skb = oldsd->completion_queue;
3287 oldsd->completion_queue = NULL;
3289 /* Find end of our output_queue. */
3290 list_net = &sd->output_queue;
3291 while (*list_net)
3292 list_net = &(*list_net)->next_sched;
3293 /* Append output queue from offline CPU. */
3294 *list_net = oldsd->output_queue;
3295 oldsd->output_queue = NULL;
3297 raise_softirq_irqoff(NET_TX_SOFTIRQ);
3298 local_irq_enable();
3300 /* Process offline CPU's input_pkt_queue */
3301 while ((skb = __skb_dequeue(&oldsd->input_pkt_queue)))
3302 netif_rx(skb);
3304 return NOTIFY_OK;
3306 #endif /* CONFIG_HOTPLUG_CPU */
3309 /*
3310 * Initialize the DEV module. At boot time this walks the device list and
3311 * unhooks any devices that fail to initialise (normally hardware not
3312 * present) and leaves us with a valid list of present and active devices.
3314 */
3316 /*
3317 * This is called single threaded during boot, so no need
3318 * to take the rtnl semaphore.
3319 */
3320 static int __init net_dev_init(void)
3322 int i, rc = -ENOMEM;
3324 BUG_ON(!dev_boot_phase);
3326 net_random_init();
3328 if (dev_proc_init())
3329 goto out;
3331 if (netdev_sysfs_init())
3332 goto out;
3334 INIT_LIST_HEAD(&ptype_all);
3335 for (i = 0; i < 16; i++)
3336 INIT_LIST_HEAD(&ptype_base[i]);
3338 for (i = 0; i < ARRAY_SIZE(dev_name_head); i++)
3339 INIT_HLIST_HEAD(&dev_name_head[i]);
3341 for (i = 0; i < ARRAY_SIZE(dev_index_head); i++)
3342 INIT_HLIST_HEAD(&dev_index_head[i]);
3344 /*
3345 * Initialise the packet receive queues.
3346 */
3348 for (i = 0; i < NR_CPUS; i++) {
3349 struct softnet_data *queue;
3351 queue = &per_cpu(softnet_data, i);
3352 skb_queue_head_init(&queue->input_pkt_queue);
3353 queue->throttle = 0;
3354 queue->cng_level = 0;
3355 queue->avg_blog = 10; /* arbitrary non-zero */
3356 queue->completion_queue = NULL;
3357 INIT_LIST_HEAD(&queue->poll_list);
3358 set_bit(__LINK_STATE_START, &queue->backlog_dev.state);
3359 queue->backlog_dev.weight = weight_p;
3360 queue->backlog_dev.poll = process_backlog;
3361 atomic_set(&queue->backlog_dev.refcnt, 1);
3364 #ifdef OFFLINE_SAMPLE
3365 samp_timer.expires = jiffies + (10 * HZ);
3366 add_timer(&samp_timer);
3367 #endif
3369 dev_boot_phase = 0;
3371 open_softirq(NET_TX_SOFTIRQ, net_tx_action, NULL);
3372 open_softirq(NET_RX_SOFTIRQ, net_rx_action, NULL);
3374 hotcpu_notifier(dev_cpu_callback, 0);
3375 dst_init();
3376 dev_mcast_init();
3377 rc = 0;
3378 out:
3379 return rc;
3382 subsys_initcall(net_dev_init);
3384 EXPORT_SYMBOL(__dev_get_by_index);
3385 EXPORT_SYMBOL(__dev_get_by_name);
3386 EXPORT_SYMBOL(__dev_remove_pack);
3387 EXPORT_SYMBOL(__skb_linearize);
3388 EXPORT_SYMBOL(dev_add_pack);
3389 EXPORT_SYMBOL(dev_alloc_name);
3390 EXPORT_SYMBOL(dev_close);
3391 EXPORT_SYMBOL(dev_get_by_flags);
3392 EXPORT_SYMBOL(dev_get_by_index);
3393 EXPORT_SYMBOL(dev_get_by_name);
3394 EXPORT_SYMBOL(dev_ioctl);
3395 EXPORT_SYMBOL(dev_open);
3396 EXPORT_SYMBOL(dev_queue_xmit);
3397 EXPORT_SYMBOL(dev_remove_pack);
3398 EXPORT_SYMBOL(dev_set_allmulti);
3399 EXPORT_SYMBOL(dev_set_promiscuity);
3400 EXPORT_SYMBOL(dev_change_flags);
3401 EXPORT_SYMBOL(dev_set_mtu);
3402 EXPORT_SYMBOL(dev_set_mac_address);
3403 EXPORT_SYMBOL(free_netdev);
3404 EXPORT_SYMBOL(netdev_boot_setup_check);
3405 EXPORT_SYMBOL(netdev_set_master);
3406 EXPORT_SYMBOL(netdev_state_change);
3407 EXPORT_SYMBOL(netif_receive_skb);
3408 EXPORT_SYMBOL(netif_rx);
3409 EXPORT_SYMBOL(register_gifconf);
3410 EXPORT_SYMBOL(register_netdevice);
3411 EXPORT_SYMBOL(register_netdevice_notifier);
3412 EXPORT_SYMBOL(skb_checksum_help);
3413 EXPORT_SYMBOL(synchronize_net);
3414 EXPORT_SYMBOL(unregister_netdevice);
3415 EXPORT_SYMBOL(unregister_netdevice_notifier);
3416 EXPORT_SYMBOL(net_enable_timestamp);
3417 EXPORT_SYMBOL(net_disable_timestamp);
3418 EXPORT_SYMBOL(dev_get_flags);
3420 #if defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE)
3421 EXPORT_SYMBOL(br_handle_frame_hook);
3422 EXPORT_SYMBOL(br_fdb_get_hook);
3423 EXPORT_SYMBOL(br_fdb_put_hook);
3424 #endif
3426 #ifdef CONFIG_KMOD
3427 EXPORT_SYMBOL(dev_load);
3428 #endif
3430 EXPORT_PER_CPU_SYMBOL(softnet_data);