ia64/xen-unstable

view linux-2.6-xen-sparse/net/core/dev.c @ 9092:849723752858

Silence the messages that are emitted when removing nodes that already have
been removed. This is fine, in a cleanup script.

Signed-off-by: Ewan Mellor <ewan@xensource.com>
author emellor@leeni.uk.xensource.com
date Thu Mar 02 02:01:17 2006 +0100 (2006-03-02)
parents 1ca3d63e7008
children 4ad317429111
line source
1 /*
2 * NET3 Protocol independent device support routines.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
8 *
9 * Derived from the non IP parts of dev.c 1.0.19
10 * Authors: Ross Biro
11 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12 * Mark Evans, <evansmp@uhura.aston.ac.uk>
13 *
14 * Additional Authors:
15 * Florian la Roche <rzsfl@rz.uni-sb.de>
16 * Alan Cox <gw4pts@gw4pts.ampr.org>
17 * David Hinds <dahinds@users.sourceforge.net>
18 * Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
19 * Adam Sulmicki <adam@cfar.umd.edu>
20 * Pekka Riikonen <priikone@poesidon.pspt.fi>
21 *
22 * Changes:
23 * D.J. Barrow : Fixed bug where dev->refcnt gets set
24 * to 2 if register_netdev gets called
25 * before net_dev_init & also removed a
26 * few lines of code in the process.
27 * Alan Cox : device private ioctl copies fields back.
28 * Alan Cox : Transmit queue code does relevant
29 * stunts to keep the queue safe.
30 * Alan Cox : Fixed double lock.
31 * Alan Cox : Fixed promisc NULL pointer trap
32 * ???????? : Support the full private ioctl range
33 * Alan Cox : Moved ioctl permission check into
34 * drivers
35 * Tim Kordas : SIOCADDMULTI/SIOCDELMULTI
36 * Alan Cox : 100 backlog just doesn't cut it when
37 * you start doing multicast video 8)
38 * Alan Cox : Rewrote net_bh and list manager.
39 * Alan Cox : Fix ETH_P_ALL echoback lengths.
40 * Alan Cox : Took out transmit every packet pass
41 * Saved a few bytes in the ioctl handler
42 * Alan Cox : Network driver sets packet type before
43 * calling netif_rx. Saves a function
44 * call a packet.
45 * Alan Cox : Hashed net_bh()
46 * Richard Kooijman: Timestamp fixes.
47 * Alan Cox : Wrong field in SIOCGIFDSTADDR
48 * Alan Cox : Device lock protection.
49 * Alan Cox : Fixed nasty side effect of device close
50 * changes.
51 * Rudi Cilibrasi : Pass the right thing to
52 * set_mac_address()
53 * Dave Miller : 32bit quantity for the device lock to
54 * make it work out on a Sparc.
55 * Bjorn Ekwall : Added KERNELD hack.
56 * Alan Cox : Cleaned up the backlog initialise.
57 * Craig Metz : SIOCGIFCONF fix if space for under
58 * 1 device.
59 * Thomas Bogendoerfer : Return ENODEV for dev_open, if there
60 * is no device open function.
61 * Andi Kleen : Fix error reporting for SIOCGIFCONF
62 * Michael Chastain : Fix signed/unsigned for SIOCGIFCONF
63 * Cyrus Durgin : Cleaned for KMOD
64 * Adam Sulmicki : Bug Fix : Network Device Unload
65 * A network device unload needs to purge
66 * the backlog queue.
67 * Paul Rusty Russell : SIOCSIFNAME
68 * Pekka Riikonen : Netdev boot-time settings code
69 * Andrew Morton : Make unregister_netdevice wait
70 * indefinitely on dev->refcnt
71 * J Hadi Salim : - Backlog queue sampling
72 * - netif_rx() feedback
73 */
75 #include <asm/uaccess.h>
76 #include <asm/system.h>
77 #include <linux/bitops.h>
78 #include <linux/capability.h>
79 #include <linux/config.h>
80 #include <linux/cpu.h>
81 #include <linux/types.h>
82 #include <linux/kernel.h>
83 #include <linux/sched.h>
84 #include <linux/string.h>
85 #include <linux/mm.h>
86 #include <linux/socket.h>
87 #include <linux/sockios.h>
88 #include <linux/errno.h>
89 #include <linux/interrupt.h>
90 #include <linux/if_ether.h>
91 #include <linux/netdevice.h>
92 #include <linux/etherdevice.h>
93 #include <linux/notifier.h>
94 #include <linux/skbuff.h>
95 #include <net/sock.h>
96 #include <linux/rtnetlink.h>
97 #include <linux/proc_fs.h>
98 #include <linux/seq_file.h>
99 #include <linux/stat.h>
100 #include <linux/if_bridge.h>
101 #include <linux/divert.h>
102 #include <net/dst.h>
103 #include <net/pkt_sched.h>
104 #include <net/checksum.h>
105 #include <linux/highmem.h>
106 #include <linux/init.h>
107 #include <linux/kmod.h>
108 #include <linux/module.h>
109 #include <linux/kallsyms.h>
110 #include <linux/netpoll.h>
111 #include <linux/rcupdate.h>
112 #include <linux/delay.h>
113 #ifdef CONFIG_NET_RADIO
114 #include <linux/wireless.h> /* Note : will define WIRELESS_EXT */
115 #include <net/iw_handler.h>
116 #endif /* CONFIG_NET_RADIO */
117 #include <asm/current.h>
119 #ifdef CONFIG_XEN
120 #include <net/ip.h>
121 #include <linux/tcp.h>
122 #include <linux/udp.h>
123 #endif
125 /*
126 * The list of packet types we will receive (as opposed to discard)
127 * and the routines to invoke.
128 *
129 * Why 16. Because with 16 the only overlap we get on a hash of the
130 * low nibble of the protocol value is RARP/SNAP/X.25.
131 *
132 * NOTE: That is no longer true with the addition of VLAN tags. Not
133 * sure which should go first, but I bet it won't make much
134 * difference if we are running VLANs. The good news is that
135 * this protocol won't be in the list unless compiled in, so
136 * the average user (w/out VLANs) will not be adversly affected.
137 * --BLG
138 *
139 * 0800 IP
140 * 8100 802.1Q VLAN
141 * 0001 802.3
142 * 0002 AX.25
143 * 0004 802.2
144 * 8035 RARP
145 * 0005 SNAP
146 * 0805 X.25
147 * 0806 ARP
148 * 8137 IPX
149 * 0009 Localtalk
150 * 86DD IPv6
151 */
153 static DEFINE_SPINLOCK(ptype_lock);
154 static struct list_head ptype_base[16]; /* 16 way hashed list */
155 static struct list_head ptype_all; /* Taps */
157 /*
158 * The @dev_base list is protected by @dev_base_lock and the rtln
159 * semaphore.
160 *
161 * Pure readers hold dev_base_lock for reading.
162 *
163 * Writers must hold the rtnl semaphore while they loop through the
164 * dev_base list, and hold dev_base_lock for writing when they do the
165 * actual updates. This allows pure readers to access the list even
166 * while a writer is preparing to update it.
167 *
168 * To put it another way, dev_base_lock is held for writing only to
169 * protect against pure readers; the rtnl semaphore provides the
170 * protection against other writers.
171 *
172 * See, for example usages, register_netdevice() and
173 * unregister_netdevice(), which must be called with the rtnl
174 * semaphore held.
175 */
176 struct net_device *dev_base;
177 static struct net_device **dev_tail = &dev_base;
178 DEFINE_RWLOCK(dev_base_lock);
180 EXPORT_SYMBOL(dev_base);
181 EXPORT_SYMBOL(dev_base_lock);
183 #define NETDEV_HASHBITS 8
184 static struct hlist_head dev_name_head[1<<NETDEV_HASHBITS];
185 static struct hlist_head dev_index_head[1<<NETDEV_HASHBITS];
187 static inline struct hlist_head *dev_name_hash(const char *name)
188 {
189 unsigned hash = full_name_hash(name, strnlen(name, IFNAMSIZ));
190 return &dev_name_head[hash & ((1<<NETDEV_HASHBITS)-1)];
191 }
193 static inline struct hlist_head *dev_index_hash(int ifindex)
194 {
195 return &dev_index_head[ifindex & ((1<<NETDEV_HASHBITS)-1)];
196 }
198 /*
199 * Our notifier list
200 */
202 static struct notifier_block *netdev_chain;
204 /*
205 * Device drivers call our routines to queue packets here. We empty the
206 * queue in the local softnet handler.
207 */
208 DEFINE_PER_CPU(struct softnet_data, softnet_data) = { NULL };
210 #ifdef CONFIG_SYSFS
211 extern int netdev_sysfs_init(void);
212 extern int netdev_register_sysfs(struct net_device *);
213 extern void netdev_unregister_sysfs(struct net_device *);
214 #else
215 #define netdev_sysfs_init() (0)
216 #define netdev_register_sysfs(dev) (0)
217 #define netdev_unregister_sysfs(dev) do { } while(0)
218 #endif
221 /*******************************************************************************
223 Protocol management and registration routines
225 *******************************************************************************/
227 /*
228 * For efficiency
229 */
231 int netdev_nit;
233 /*
234 * Add a protocol ID to the list. Now that the input handler is
235 * smarter we can dispense with all the messy stuff that used to be
236 * here.
237 *
238 * BEWARE!!! Protocol handlers, mangling input packets,
239 * MUST BE last in hash buckets and checking protocol handlers
240 * MUST start from promiscuous ptype_all chain in net_bh.
241 * It is true now, do not change it.
242 * Explanation follows: if protocol handler, mangling packet, will
243 * be the first on list, it is not able to sense, that packet
244 * is cloned and should be copied-on-write, so that it will
245 * change it and subsequent readers will get broken packet.
246 * --ANK (980803)
247 */
249 /**
250 * dev_add_pack - add packet handler
251 * @pt: packet type declaration
252 *
253 * Add a protocol handler to the networking stack. The passed &packet_type
254 * is linked into kernel lists and may not be freed until it has been
255 * removed from the kernel lists.
256 *
257 * This call does not sleep therefore it can not
258 * guarantee all CPU's that are in middle of receiving packets
259 * will see the new packet type (until the next received packet).
260 */
262 void dev_add_pack(struct packet_type *pt)
263 {
264 int hash;
266 spin_lock_bh(&ptype_lock);
267 if (pt->type == htons(ETH_P_ALL)) {
268 netdev_nit++;
269 list_add_rcu(&pt->list, &ptype_all);
270 } else {
271 hash = ntohs(pt->type) & 15;
272 list_add_rcu(&pt->list, &ptype_base[hash]);
273 }
274 spin_unlock_bh(&ptype_lock);
275 }
277 /**
278 * __dev_remove_pack - remove packet handler
279 * @pt: packet type declaration
280 *
281 * Remove a protocol handler that was previously added to the kernel
282 * protocol handlers by dev_add_pack(). The passed &packet_type is removed
283 * from the kernel lists and can be freed or reused once this function
284 * returns.
285 *
286 * The packet type might still be in use by receivers
287 * and must not be freed until after all the CPU's have gone
288 * through a quiescent state.
289 */
290 void __dev_remove_pack(struct packet_type *pt)
291 {
292 struct list_head *head;
293 struct packet_type *pt1;
295 spin_lock_bh(&ptype_lock);
297 if (pt->type == htons(ETH_P_ALL)) {
298 netdev_nit--;
299 head = &ptype_all;
300 } else
301 head = &ptype_base[ntohs(pt->type) & 15];
303 list_for_each_entry(pt1, head, list) {
304 if (pt == pt1) {
305 list_del_rcu(&pt->list);
306 goto out;
307 }
308 }
310 printk(KERN_WARNING "dev_remove_pack: %p not found.\n", pt);
311 out:
312 spin_unlock_bh(&ptype_lock);
313 }
314 /**
315 * dev_remove_pack - remove packet handler
316 * @pt: packet type declaration
317 *
318 * Remove a protocol handler that was previously added to the kernel
319 * protocol handlers by dev_add_pack(). The passed &packet_type is removed
320 * from the kernel lists and can be freed or reused once this function
321 * returns.
322 *
323 * This call sleeps to guarantee that no CPU is looking at the packet
324 * type after return.
325 */
326 void dev_remove_pack(struct packet_type *pt)
327 {
328 __dev_remove_pack(pt);
330 synchronize_net();
331 }
333 /******************************************************************************
335 Device Boot-time Settings Routines
337 *******************************************************************************/
339 /* Boot time configuration table */
340 static struct netdev_boot_setup dev_boot_setup[NETDEV_BOOT_SETUP_MAX];
342 /**
343 * netdev_boot_setup_add - add new setup entry
344 * @name: name of the device
345 * @map: configured settings for the device
346 *
347 * Adds new setup entry to the dev_boot_setup list. The function
348 * returns 0 on error and 1 on success. This is a generic routine to
349 * all netdevices.
350 */
351 static int netdev_boot_setup_add(char *name, struct ifmap *map)
352 {
353 struct netdev_boot_setup *s;
354 int i;
356 s = dev_boot_setup;
357 for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) {
358 if (s[i].name[0] == '\0' || s[i].name[0] == ' ') {
359 memset(s[i].name, 0, sizeof(s[i].name));
360 strcpy(s[i].name, name);
361 memcpy(&s[i].map, map, sizeof(s[i].map));
362 break;
363 }
364 }
366 return i >= NETDEV_BOOT_SETUP_MAX ? 0 : 1;
367 }
369 /**
370 * netdev_boot_setup_check - check boot time settings
371 * @dev: the netdevice
372 *
373 * Check boot time settings for the device.
374 * The found settings are set for the device to be used
375 * later in the device probing.
376 * Returns 0 if no settings found, 1 if they are.
377 */
378 int netdev_boot_setup_check(struct net_device *dev)
379 {
380 struct netdev_boot_setup *s = dev_boot_setup;
381 int i;
383 for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) {
384 if (s[i].name[0] != '\0' && s[i].name[0] != ' ' &&
385 !strncmp(dev->name, s[i].name, strlen(s[i].name))) {
386 dev->irq = s[i].map.irq;
387 dev->base_addr = s[i].map.base_addr;
388 dev->mem_start = s[i].map.mem_start;
389 dev->mem_end = s[i].map.mem_end;
390 return 1;
391 }
392 }
393 return 0;
394 }
397 /**
398 * netdev_boot_base - get address from boot time settings
399 * @prefix: prefix for network device
400 * @unit: id for network device
401 *
402 * Check boot time settings for the base address of device.
403 * The found settings are set for the device to be used
404 * later in the device probing.
405 * Returns 0 if no settings found.
406 */
407 unsigned long netdev_boot_base(const char *prefix, int unit)
408 {
409 const struct netdev_boot_setup *s = dev_boot_setup;
410 char name[IFNAMSIZ];
411 int i;
413 sprintf(name, "%s%d", prefix, unit);
415 /*
416 * If device already registered then return base of 1
417 * to indicate not to probe for this interface
418 */
419 if (__dev_get_by_name(name))
420 return 1;
422 for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++)
423 if (!strcmp(name, s[i].name))
424 return s[i].map.base_addr;
425 return 0;
426 }
428 /*
429 * Saves at boot time configured settings for any netdevice.
430 */
431 int __init netdev_boot_setup(char *str)
432 {
433 int ints[5];
434 struct ifmap map;
436 str = get_options(str, ARRAY_SIZE(ints), ints);
437 if (!str || !*str)
438 return 0;
440 /* Save settings */
441 memset(&map, 0, sizeof(map));
442 if (ints[0] > 0)
443 map.irq = ints[1];
444 if (ints[0] > 1)
445 map.base_addr = ints[2];
446 if (ints[0] > 2)
447 map.mem_start = ints[3];
448 if (ints[0] > 3)
449 map.mem_end = ints[4];
451 /* Add new entry to the list */
452 return netdev_boot_setup_add(str, &map);
453 }
455 __setup("netdev=", netdev_boot_setup);
457 /*******************************************************************************
459 Device Interface Subroutines
461 *******************************************************************************/
463 /**
464 * __dev_get_by_name - find a device by its name
465 * @name: name to find
466 *
467 * Find an interface by name. Must be called under RTNL semaphore
468 * or @dev_base_lock. If the name is found a pointer to the device
469 * is returned. If the name is not found then %NULL is returned. The
470 * reference counters are not incremented so the caller must be
471 * careful with locks.
472 */
474 struct net_device *__dev_get_by_name(const char *name)
475 {
476 struct hlist_node *p;
478 hlist_for_each(p, dev_name_hash(name)) {
479 struct net_device *dev
480 = hlist_entry(p, struct net_device, name_hlist);
481 if (!strncmp(dev->name, name, IFNAMSIZ))
482 return dev;
483 }
484 return NULL;
485 }
487 /**
488 * dev_get_by_name - find a device by its name
489 * @name: name to find
490 *
491 * Find an interface by name. This can be called from any
492 * context and does its own locking. The returned handle has
493 * the usage count incremented and the caller must use dev_put() to
494 * release it when it is no longer needed. %NULL is returned if no
495 * matching device is found.
496 */
498 struct net_device *dev_get_by_name(const char *name)
499 {
500 struct net_device *dev;
502 read_lock(&dev_base_lock);
503 dev = __dev_get_by_name(name);
504 if (dev)
505 dev_hold(dev);
506 read_unlock(&dev_base_lock);
507 return dev;
508 }
510 /**
511 * __dev_get_by_index - find a device by its ifindex
512 * @ifindex: index of device
513 *
514 * Search for an interface by index. Returns %NULL if the device
515 * is not found or a pointer to the device. The device has not
516 * had its reference counter increased so the caller must be careful
517 * about locking. The caller must hold either the RTNL semaphore
518 * or @dev_base_lock.
519 */
521 struct net_device *__dev_get_by_index(int ifindex)
522 {
523 struct hlist_node *p;
525 hlist_for_each(p, dev_index_hash(ifindex)) {
526 struct net_device *dev
527 = hlist_entry(p, struct net_device, index_hlist);
528 if (dev->ifindex == ifindex)
529 return dev;
530 }
531 return NULL;
532 }
535 /**
536 * dev_get_by_index - find a device by its ifindex
537 * @ifindex: index of device
538 *
539 * Search for an interface by index. Returns NULL if the device
540 * is not found or a pointer to the device. The device returned has
541 * had a reference added and the pointer is safe until the user calls
542 * dev_put to indicate they have finished with it.
543 */
545 struct net_device *dev_get_by_index(int ifindex)
546 {
547 struct net_device *dev;
549 read_lock(&dev_base_lock);
550 dev = __dev_get_by_index(ifindex);
551 if (dev)
552 dev_hold(dev);
553 read_unlock(&dev_base_lock);
554 return dev;
555 }
557 /**
558 * dev_getbyhwaddr - find a device by its hardware address
559 * @type: media type of device
560 * @ha: hardware address
561 *
562 * Search for an interface by MAC address. Returns NULL if the device
563 * is not found or a pointer to the device. The caller must hold the
564 * rtnl semaphore. The returned device has not had its ref count increased
565 * and the caller must therefore be careful about locking
566 *
567 * BUGS:
568 * If the API was consistent this would be __dev_get_by_hwaddr
569 */
571 struct net_device *dev_getbyhwaddr(unsigned short type, char *ha)
572 {
573 struct net_device *dev;
575 ASSERT_RTNL();
577 for (dev = dev_base; dev; dev = dev->next)
578 if (dev->type == type &&
579 !memcmp(dev->dev_addr, ha, dev->addr_len))
580 break;
581 return dev;
582 }
584 EXPORT_SYMBOL(dev_getbyhwaddr);
586 struct net_device *dev_getfirstbyhwtype(unsigned short type)
587 {
588 struct net_device *dev;
590 rtnl_lock();
591 for (dev = dev_base; dev; dev = dev->next) {
592 if (dev->type == type) {
593 dev_hold(dev);
594 break;
595 }
596 }
597 rtnl_unlock();
598 return dev;
599 }
601 EXPORT_SYMBOL(dev_getfirstbyhwtype);
603 /**
604 * dev_get_by_flags - find any device with given flags
605 * @if_flags: IFF_* values
606 * @mask: bitmask of bits in if_flags to check
607 *
608 * Search for any interface with the given flags. Returns NULL if a device
609 * is not found or a pointer to the device. The device returned has
610 * had a reference added and the pointer is safe until the user calls
611 * dev_put to indicate they have finished with it.
612 */
614 struct net_device * dev_get_by_flags(unsigned short if_flags, unsigned short mask)
615 {
616 struct net_device *dev;
618 read_lock(&dev_base_lock);
619 for (dev = dev_base; dev != NULL; dev = dev->next) {
620 if (((dev->flags ^ if_flags) & mask) == 0) {
621 dev_hold(dev);
622 break;
623 }
624 }
625 read_unlock(&dev_base_lock);
626 return dev;
627 }
629 /**
630 * dev_valid_name - check if name is okay for network device
631 * @name: name string
632 *
633 * Network device names need to be valid file names to
634 * to allow sysfs to work
635 */
636 int dev_valid_name(const char *name)
637 {
638 return !(*name == '\0'
639 || !strcmp(name, ".")
640 || !strcmp(name, "..")
641 || strchr(name, '/'));
642 }
644 /**
645 * dev_alloc_name - allocate a name for a device
646 * @dev: device
647 * @name: name format string
648 *
649 * Passed a format string - eg "lt%d" it will try and find a suitable
650 * id. Not efficient for many devices, not called a lot. The caller
651 * must hold the dev_base or rtnl lock while allocating the name and
652 * adding the device in order to avoid duplicates. Returns the number
653 * of the unit assigned or a negative errno code.
654 */
656 int dev_alloc_name(struct net_device *dev, const char *name)
657 {
658 int i = 0;
659 char buf[IFNAMSIZ];
660 const char *p;
661 const int max_netdevices = 8*PAGE_SIZE;
662 long *inuse;
663 struct net_device *d;
665 p = strnchr(name, IFNAMSIZ-1, '%');
666 if (p) {
667 /*
668 * Verify the string as this thing may have come from
669 * the user. There must be either one "%d" and no other "%"
670 * characters.
671 */
672 if (p[1] != 'd' || strchr(p + 2, '%'))
673 return -EINVAL;
675 /* Use one page as a bit array of possible slots */
676 inuse = (long *) get_zeroed_page(GFP_ATOMIC);
677 if (!inuse)
678 return -ENOMEM;
680 for (d = dev_base; d; d = d->next) {
681 if (!sscanf(d->name, name, &i))
682 continue;
683 if (i < 0 || i >= max_netdevices)
684 continue;
686 /* avoid cases where sscanf is not exact inverse of printf */
687 snprintf(buf, sizeof(buf), name, i);
688 if (!strncmp(buf, d->name, IFNAMSIZ))
689 set_bit(i, inuse);
690 }
692 i = find_first_zero_bit(inuse, max_netdevices);
693 free_page((unsigned long) inuse);
694 }
696 snprintf(buf, sizeof(buf), name, i);
697 if (!__dev_get_by_name(buf)) {
698 strlcpy(dev->name, buf, IFNAMSIZ);
699 return i;
700 }
702 /* It is possible to run out of possible slots
703 * when the name is long and there isn't enough space left
704 * for the digits, or if all bits are used.
705 */
706 return -ENFILE;
707 }
710 /**
711 * dev_change_name - change name of a device
712 * @dev: device
713 * @newname: name (or format string) must be at least IFNAMSIZ
714 *
715 * Change name of a device, can pass format strings "eth%d".
716 * for wildcarding.
717 */
718 int dev_change_name(struct net_device *dev, char *newname)
719 {
720 int err = 0;
722 ASSERT_RTNL();
724 if (dev->flags & IFF_UP)
725 return -EBUSY;
727 if (!dev_valid_name(newname))
728 return -EINVAL;
730 if (strchr(newname, '%')) {
731 err = dev_alloc_name(dev, newname);
732 if (err < 0)
733 return err;
734 strcpy(newname, dev->name);
735 }
736 else if (__dev_get_by_name(newname))
737 return -EEXIST;
738 else
739 strlcpy(dev->name, newname, IFNAMSIZ);
741 err = class_device_rename(&dev->class_dev, dev->name);
742 if (!err) {
743 hlist_del(&dev->name_hlist);
744 hlist_add_head(&dev->name_hlist, dev_name_hash(dev->name));
745 notifier_call_chain(&netdev_chain, NETDEV_CHANGENAME, dev);
746 }
748 return err;
749 }
751 /**
752 * netdev_features_change - device changes fatures
753 * @dev: device to cause notification
754 *
755 * Called to indicate a device has changed features.
756 */
757 void netdev_features_change(struct net_device *dev)
758 {
759 notifier_call_chain(&netdev_chain, NETDEV_FEAT_CHANGE, dev);
760 }
761 EXPORT_SYMBOL(netdev_features_change);
763 /**
764 * netdev_state_change - device changes state
765 * @dev: device to cause notification
766 *
767 * Called to indicate a device has changed state. This function calls
768 * the notifier chains for netdev_chain and sends a NEWLINK message
769 * to the routing socket.
770 */
771 void netdev_state_change(struct net_device *dev)
772 {
773 if (dev->flags & IFF_UP) {
774 notifier_call_chain(&netdev_chain, NETDEV_CHANGE, dev);
775 rtmsg_ifinfo(RTM_NEWLINK, dev, 0);
776 }
777 }
779 /**
780 * dev_load - load a network module
781 * @name: name of interface
782 *
783 * If a network interface is not present and the process has suitable
784 * privileges this function loads the module. If module loading is not
785 * available in this kernel then it becomes a nop.
786 */
788 void dev_load(const char *name)
789 {
790 struct net_device *dev;
792 read_lock(&dev_base_lock);
793 dev = __dev_get_by_name(name);
794 read_unlock(&dev_base_lock);
796 if (!dev && capable(CAP_SYS_MODULE))
797 request_module("%s", name);
798 }
800 static int default_rebuild_header(struct sk_buff *skb)
801 {
802 printk(KERN_DEBUG "%s: default_rebuild_header called -- BUG!\n",
803 skb->dev ? skb->dev->name : "NULL!!!");
804 kfree_skb(skb);
805 return 1;
806 }
809 /**
810 * dev_open - prepare an interface for use.
811 * @dev: device to open
812 *
813 * Takes a device from down to up state. The device's private open
814 * function is invoked and then the multicast lists are loaded. Finally
815 * the device is moved into the up state and a %NETDEV_UP message is
816 * sent to the netdev notifier chain.
817 *
818 * Calling this function on an active interface is a nop. On a failure
819 * a negative errno code is returned.
820 */
821 int dev_open(struct net_device *dev)
822 {
823 int ret = 0;
825 /*
826 * Is it already up?
827 */
829 if (dev->flags & IFF_UP)
830 return 0;
832 /*
833 * Is it even present?
834 */
835 if (!netif_device_present(dev))
836 return -ENODEV;
838 /*
839 * Call device private open method
840 */
841 set_bit(__LINK_STATE_START, &dev->state);
842 if (dev->open) {
843 ret = dev->open(dev);
844 if (ret)
845 clear_bit(__LINK_STATE_START, &dev->state);
846 }
848 /*
849 * If it went open OK then:
850 */
852 if (!ret) {
853 /*
854 * Set the flags.
855 */
856 dev->flags |= IFF_UP;
858 /*
859 * Initialize multicasting status
860 */
861 dev_mc_upload(dev);
863 /*
864 * Wakeup transmit queue engine
865 */
866 dev_activate(dev);
868 /*
869 * ... and announce new interface.
870 */
871 notifier_call_chain(&netdev_chain, NETDEV_UP, dev);
872 }
873 return ret;
874 }
876 /**
877 * dev_close - shutdown an interface.
878 * @dev: device to shutdown
879 *
880 * This function moves an active device into down state. A
881 * %NETDEV_GOING_DOWN is sent to the netdev notifier chain. The device
882 * is then deactivated and finally a %NETDEV_DOWN is sent to the notifier
883 * chain.
884 */
885 int dev_close(struct net_device *dev)
886 {
887 if (!(dev->flags & IFF_UP))
888 return 0;
890 /*
891 * Tell people we are going down, so that they can
892 * prepare to death, when device is still operating.
893 */
894 notifier_call_chain(&netdev_chain, NETDEV_GOING_DOWN, dev);
896 dev_deactivate(dev);
898 clear_bit(__LINK_STATE_START, &dev->state);
900 /* Synchronize to scheduled poll. We cannot touch poll list,
901 * it can be even on different cpu. So just clear netif_running(),
902 * and wait when poll really will happen. Actually, the best place
903 * for this is inside dev->stop() after device stopped its irq
904 * engine, but this requires more changes in devices. */
906 smp_mb__after_clear_bit(); /* Commit netif_running(). */
907 while (test_bit(__LINK_STATE_RX_SCHED, &dev->state)) {
908 /* No hurry. */
909 msleep(1);
910 }
912 /*
913 * Call the device specific close. This cannot fail.
914 * Only if device is UP
915 *
916 * We allow it to be called even after a DETACH hot-plug
917 * event.
918 */
919 if (dev->stop)
920 dev->stop(dev);
922 /*
923 * Device is now down.
924 */
926 dev->flags &= ~IFF_UP;
928 /*
929 * Tell people we are down
930 */
931 notifier_call_chain(&netdev_chain, NETDEV_DOWN, dev);
933 return 0;
934 }
937 /*
938 * Device change register/unregister. These are not inline or static
939 * as we export them to the world.
940 */
942 /**
943 * register_netdevice_notifier - register a network notifier block
944 * @nb: notifier
945 *
946 * Register a notifier to be called when network device events occur.
947 * The notifier passed is linked into the kernel structures and must
948 * not be reused until it has been unregistered. A negative errno code
949 * is returned on a failure.
950 *
951 * When registered all registration and up events are replayed
952 * to the new notifier to allow device to have a race free
953 * view of the network device list.
954 */
956 int register_netdevice_notifier(struct notifier_block *nb)
957 {
958 struct net_device *dev;
959 int err;
961 rtnl_lock();
962 err = notifier_chain_register(&netdev_chain, nb);
963 if (!err) {
964 for (dev = dev_base; dev; dev = dev->next) {
965 nb->notifier_call(nb, NETDEV_REGISTER, dev);
967 if (dev->flags & IFF_UP)
968 nb->notifier_call(nb, NETDEV_UP, dev);
969 }
970 }
971 rtnl_unlock();
972 return err;
973 }
975 /**
976 * unregister_netdevice_notifier - unregister a network notifier block
977 * @nb: notifier
978 *
979 * Unregister a notifier previously registered by
980 * register_netdevice_notifier(). The notifier is unlinked into the
981 * kernel structures and may then be reused. A negative errno code
982 * is returned on a failure.
983 */
985 int unregister_netdevice_notifier(struct notifier_block *nb)
986 {
987 return notifier_chain_unregister(&netdev_chain, nb);
988 }
990 /**
991 * call_netdevice_notifiers - call all network notifier blocks
992 * @val: value passed unmodified to notifier function
993 * @v: pointer passed unmodified to notifier function
994 *
995 * Call all network notifier blocks. Parameters and return value
996 * are as for notifier_call_chain().
997 */
999 int call_netdevice_notifiers(unsigned long val, void *v)
1001 return notifier_call_chain(&netdev_chain, val, v);
1004 /* When > 0 there are consumers of rx skb time stamps */
1005 static atomic_t netstamp_needed = ATOMIC_INIT(0);
1007 void net_enable_timestamp(void)
1009 atomic_inc(&netstamp_needed);
1012 void net_disable_timestamp(void)
1014 atomic_dec(&netstamp_needed);
1017 void __net_timestamp(struct sk_buff *skb)
1019 struct timeval tv;
1021 do_gettimeofday(&tv);
1022 skb_set_timestamp(skb, &tv);
1024 EXPORT_SYMBOL(__net_timestamp);
1026 static inline void net_timestamp(struct sk_buff *skb)
1028 if (atomic_read(&netstamp_needed))
1029 __net_timestamp(skb);
1030 else {
1031 skb->tstamp.off_sec = 0;
1032 skb->tstamp.off_usec = 0;
1036 /*
1037 * Support routine. Sends outgoing frames to any network
1038 * taps currently in use.
1039 */
1041 void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
1043 struct packet_type *ptype;
1045 net_timestamp(skb);
1047 rcu_read_lock();
1048 list_for_each_entry_rcu(ptype, &ptype_all, list) {
1049 /* Never send packets back to the socket
1050 * they originated from - MvS (miquels@drinkel.ow.org)
1051 */
1052 if ((ptype->dev == dev || !ptype->dev) &&
1053 (ptype->af_packet_priv == NULL ||
1054 (struct sock *)ptype->af_packet_priv != skb->sk)) {
1055 struct sk_buff *skb2= skb_clone(skb, GFP_ATOMIC);
1056 if (!skb2)
1057 break;
1059 /* skb->nh should be correctly
1060 set by sender, so that the second statement is
1061 just protection against buggy protocols.
1062 */
1063 skb2->mac.raw = skb2->data;
1065 if (skb2->nh.raw < skb2->data ||
1066 skb2->nh.raw > skb2->tail) {
1067 if (net_ratelimit())
1068 printk(KERN_CRIT "protocol %04x is "
1069 "buggy, dev %s\n",
1070 skb2->protocol, dev->name);
1071 skb2->nh.raw = skb2->data;
1074 skb2->h.raw = skb2->nh.raw;
1075 skb2->pkt_type = PACKET_OUTGOING;
1076 ptype->func(skb2, skb->dev, ptype, skb->dev);
1079 rcu_read_unlock();
1082 /*
1083 * Invalidate hardware checksum when packet is to be mangled, and
1084 * complete checksum manually on outgoing path.
1085 */
1086 int skb_checksum_help(struct sk_buff *skb, int inward)
1088 unsigned int csum;
1089 int ret = 0, offset = skb->h.raw - skb->data;
1091 if (inward) {
1092 skb->ip_summed = CHECKSUM_NONE;
1093 goto out;
1096 if (skb_cloned(skb)) {
1097 ret = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
1098 if (ret)
1099 goto out;
1102 BUG_ON(offset > (int)skb->len);
1103 csum = skb_checksum(skb, offset, skb->len-offset, 0);
1105 offset = skb->tail - skb->h.raw;
1106 BUG_ON(offset <= 0);
1107 BUG_ON(skb->csum + 2 > offset);
1109 *(u16*)(skb->h.raw + skb->csum) = csum_fold(csum);
1110 skb->ip_summed = CHECKSUM_NONE;
1111 out:
1112 return ret;
1115 /* Take action when hardware reception checksum errors are detected. */
1116 #ifdef CONFIG_BUG
1117 void netdev_rx_csum_fault(struct net_device *dev)
1119 if (net_ratelimit()) {
1120 printk(KERN_ERR "%s: hw csum failure.\n",
1121 dev ? dev->name : "<unknown>");
1122 dump_stack();
1125 EXPORT_SYMBOL(netdev_rx_csum_fault);
1126 #endif
1128 #ifdef CONFIG_HIGHMEM
1129 /* Actually, we should eliminate this check as soon as we know, that:
1130 * 1. IOMMU is present and allows to map all the memory.
1131 * 2. No high memory really exists on this machine.
1132 */
1134 static inline int illegal_highdma(struct net_device *dev, struct sk_buff *skb)
1136 int i;
1138 if (dev->features & NETIF_F_HIGHDMA)
1139 return 0;
1141 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
1142 if (PageHighMem(skb_shinfo(skb)->frags[i].page))
1143 return 1;
1145 return 0;
1147 #else
1148 #define illegal_highdma(dev, skb) (0)
1149 #endif
1151 /* Keep head the same: replace data */
1152 int __skb_linearize(struct sk_buff *skb, gfp_t gfp_mask)
1154 unsigned int size;
1155 u8 *data;
1156 long offset;
1157 struct skb_shared_info *ninfo;
1158 int headerlen = skb->data - skb->head;
1159 int expand = (skb->tail + skb->data_len) - skb->end;
1161 if (skb_shared(skb))
1162 BUG();
1164 if (expand <= 0)
1165 expand = 0;
1167 size = skb->end - skb->head + expand;
1168 size = SKB_DATA_ALIGN(size);
1169 data = kmalloc(size + sizeof(struct skb_shared_info), gfp_mask);
1170 if (!data)
1171 return -ENOMEM;
1173 /* Copy entire thing */
1174 if (skb_copy_bits(skb, -headerlen, data, headerlen + skb->len))
1175 BUG();
1177 /* Set up shinfo */
1178 ninfo = (struct skb_shared_info*)(data + size);
1179 atomic_set(&ninfo->dataref, 1);
1180 ninfo->tso_size = skb_shinfo(skb)->tso_size;
1181 ninfo->tso_segs = skb_shinfo(skb)->tso_segs;
1182 ninfo->nr_frags = 0;
1183 ninfo->frag_list = NULL;
1185 /* Offset between the two in bytes */
1186 offset = data - skb->head;
1188 /* Free old data. */
1189 skb_release_data(skb);
1191 skb->head = data;
1192 skb->end = data + size;
1194 /* Set up new pointers */
1195 skb->h.raw += offset;
1196 skb->nh.raw += offset;
1197 skb->mac.raw += offset;
1198 skb->tail += offset;
1199 skb->data += offset;
1201 /* We are no longer a clone, even if we were. */
1202 skb->cloned = 0;
1204 skb->tail += skb->data_len;
1205 skb->data_len = 0;
1206 return 0;
1209 #define HARD_TX_LOCK(dev, cpu) { \
1210 if ((dev->features & NETIF_F_LLTX) == 0) { \
1211 spin_lock(&dev->xmit_lock); \
1212 dev->xmit_lock_owner = cpu; \
1213 } \
1216 #define HARD_TX_UNLOCK(dev) { \
1217 if ((dev->features & NETIF_F_LLTX) == 0) { \
1218 dev->xmit_lock_owner = -1; \
1219 spin_unlock(&dev->xmit_lock); \
1220 } \
1223 /**
1224 * dev_queue_xmit - transmit a buffer
1225 * @skb: buffer to transmit
1227 * Queue a buffer for transmission to a network device. The caller must
1228 * have set the device and priority and built the buffer before calling
1229 * this function. The function can be called from an interrupt.
1231 * A negative errno code is returned on a failure. A success does not
1232 * guarantee the frame will be transmitted as it may be dropped due
1233 * to congestion or traffic shaping.
1235 * -----------------------------------------------------------------------------------
1236 * I notice this method can also return errors from the queue disciplines,
1237 * including NET_XMIT_DROP, which is a positive value. So, errors can also
1238 * be positive.
1240 * Regardless of the return value, the skb is consumed, so it is currently
1241 * difficult to retry a send to this method. (You can bump the ref count
1242 * before sending to hold a reference for retry if you are careful.)
1244 * When calling this method, interrupts MUST be enabled. This is because
1245 * the BH enable code must have IRQs enabled so that it will not deadlock.
1246 * --BLG
1247 */
1249 int dev_queue_xmit(struct sk_buff *skb)
1251 struct net_device *dev = skb->dev;
1252 struct Qdisc *q;
1253 int rc = -ENOMEM;
1255 if (skb_shinfo(skb)->frag_list &&
1256 !(dev->features & NETIF_F_FRAGLIST) &&
1257 __skb_linearize(skb, GFP_ATOMIC))
1258 goto out_kfree_skb;
1260 /* Fragmented skb is linearized if device does not support SG,
1261 * or if at least one of fragments is in highmem and device
1262 * does not support DMA from it.
1263 */
1264 if (skb_shinfo(skb)->nr_frags &&
1265 (!(dev->features & NETIF_F_SG) || illegal_highdma(dev, skb)) &&
1266 __skb_linearize(skb, GFP_ATOMIC))
1267 goto out_kfree_skb;
1269 #ifdef CONFIG_XEN
1270 /* If a checksum-deferred packet is forwarded to a device that needs a
1271 * checksum, correct the pointers and force checksumming.
1272 */
1273 if (skb->proto_csum_blank) {
1274 if (skb->protocol != htons(ETH_P_IP))
1275 goto out_kfree_skb;
1276 skb->h.raw = (unsigned char *)skb->nh.iph + 4*skb->nh.iph->ihl;
1277 if (skb->h.raw >= skb->tail)
1278 goto out_kfree_skb;
1279 switch (skb->nh.iph->protocol) {
1280 case IPPROTO_TCP:
1281 skb->csum = offsetof(struct tcphdr, check);
1282 break;
1283 case IPPROTO_UDP:
1284 skb->csum = offsetof(struct udphdr, check);
1285 break;
1286 default:
1287 if (net_ratelimit())
1288 printk(KERN_ERR "Attempting to checksum a non-"
1289 "TCP/UDP packet, dropping a protocol"
1290 " %d packet", skb->nh.iph->protocol);
1291 rc = -EPROTO;
1292 goto out_kfree_skb;
1294 if ((skb->h.raw + skb->csum + 2) > skb->tail)
1295 goto out_kfree_skb;
1296 skb->ip_summed = CHECKSUM_HW;
1298 #endif
1300 /* If packet is not checksummed and device does not support
1301 * checksumming for this protocol, complete checksumming here.
1302 */
1303 if (skb->ip_summed == CHECKSUM_HW &&
1304 (!(dev->features & (NETIF_F_HW_CSUM | NETIF_F_NO_CSUM)) &&
1305 (!(dev->features & NETIF_F_IP_CSUM) ||
1306 skb->protocol != htons(ETH_P_IP))))
1307 if (skb_checksum_help(skb, 0))
1308 goto out_kfree_skb;
1310 spin_lock_prefetch(&dev->queue_lock);
1312 /* Disable soft irqs for various locks below. Also
1313 * stops preemption for RCU.
1314 */
1315 local_bh_disable();
1317 /* Updates of qdisc are serialized by queue_lock.
1318 * The struct Qdisc which is pointed to by qdisc is now a
1319 * rcu structure - it may be accessed without acquiring
1320 * a lock (but the structure may be stale.) The freeing of the
1321 * qdisc will be deferred until it's known that there are no
1322 * more references to it.
1324 * If the qdisc has an enqueue function, we still need to
1325 * hold the queue_lock before calling it, since queue_lock
1326 * also serializes access to the device queue.
1327 */
1329 q = rcu_dereference(dev->qdisc);
1330 #ifdef CONFIG_NET_CLS_ACT
1331 skb->tc_verd = SET_TC_AT(skb->tc_verd,AT_EGRESS);
1332 #endif
1333 if (q->enqueue) {
1334 /* Grab device queue */
1335 spin_lock(&dev->queue_lock);
1337 rc = q->enqueue(skb, q);
1339 qdisc_run(dev);
1341 spin_unlock(&dev->queue_lock);
1342 rc = rc == NET_XMIT_BYPASS ? NET_XMIT_SUCCESS : rc;
1343 goto out;
1346 /* The device has no queue. Common case for software devices:
1347 loopback, all the sorts of tunnels...
1349 Really, it is unlikely that xmit_lock protection is necessary here.
1350 (f.e. loopback and IP tunnels are clean ignoring statistics
1351 counters.)
1352 However, it is possible, that they rely on protection
1353 made by us here.
1355 Check this and shot the lock. It is not prone from deadlocks.
1356 Either shot noqueue qdisc, it is even simpler 8)
1357 */
1358 if (dev->flags & IFF_UP) {
1359 int cpu = smp_processor_id(); /* ok because BHs are off */
1361 if (dev->xmit_lock_owner != cpu) {
1363 HARD_TX_LOCK(dev, cpu);
1365 if (!netif_queue_stopped(dev)) {
1366 if (netdev_nit)
1367 dev_queue_xmit_nit(skb, dev);
1369 rc = 0;
1370 if (!dev->hard_start_xmit(skb, dev)) {
1371 HARD_TX_UNLOCK(dev);
1372 goto out;
1375 HARD_TX_UNLOCK(dev);
1376 if (net_ratelimit())
1377 printk(KERN_CRIT "Virtual device %s asks to "
1378 "queue packet!\n", dev->name);
1379 } else {
1380 /* Recursion is detected! It is possible,
1381 * unfortunately */
1382 if (net_ratelimit())
1383 printk(KERN_CRIT "Dead loop on virtual device "
1384 "%s, fix it urgently!\n", dev->name);
1388 rc = -ENETDOWN;
1389 local_bh_enable();
1391 out_kfree_skb:
1392 kfree_skb(skb);
1393 return rc;
1394 out:
1395 local_bh_enable();
1396 return rc;
1400 /*=======================================================================
1401 Receiver routines
1402 =======================================================================*/
1404 int netdev_max_backlog = 1000;
1405 int netdev_budget = 300;
1406 int weight_p = 64; /* old backlog weight */
1408 DEFINE_PER_CPU(struct netif_rx_stats, netdev_rx_stat) = { 0, };
1411 /**
1412 * netif_rx - post buffer to the network code
1413 * @skb: buffer to post
1415 * This function receives a packet from a device driver and queues it for
1416 * the upper (protocol) levels to process. It always succeeds. The buffer
1417 * may be dropped during processing for congestion control or by the
1418 * protocol layers.
1420 * return values:
1421 * NET_RX_SUCCESS (no congestion)
1422 * NET_RX_CN_LOW (low congestion)
1423 * NET_RX_CN_MOD (moderate congestion)
1424 * NET_RX_CN_HIGH (high congestion)
1425 * NET_RX_DROP (packet was dropped)
1427 */
1429 int netif_rx(struct sk_buff *skb)
1431 struct softnet_data *queue;
1432 unsigned long flags;
1434 /* if netpoll wants it, pretend we never saw it */
1435 if (netpoll_rx(skb))
1436 return NET_RX_DROP;
1438 if (!skb->tstamp.off_sec)
1439 net_timestamp(skb);
1441 /*
1442 * The code is rearranged so that the path is the most
1443 * short when CPU is congested, but is still operating.
1444 */
1445 local_irq_save(flags);
1446 queue = &__get_cpu_var(softnet_data);
1448 __get_cpu_var(netdev_rx_stat).total++;
1449 if (queue->input_pkt_queue.qlen <= netdev_max_backlog) {
1450 if (queue->input_pkt_queue.qlen) {
1451 enqueue:
1452 dev_hold(skb->dev);
1453 __skb_queue_tail(&queue->input_pkt_queue, skb);
1454 local_irq_restore(flags);
1455 return NET_RX_SUCCESS;
1458 netif_rx_schedule(&queue->backlog_dev);
1459 goto enqueue;
1462 __get_cpu_var(netdev_rx_stat).dropped++;
1463 local_irq_restore(flags);
1465 kfree_skb(skb);
1466 return NET_RX_DROP;
1469 int netif_rx_ni(struct sk_buff *skb)
1471 int err;
1473 preempt_disable();
1474 err = netif_rx(skb);
1475 if (local_softirq_pending())
1476 do_softirq();
1477 preempt_enable();
1479 return err;
1482 EXPORT_SYMBOL(netif_rx_ni);
1484 static inline struct net_device *skb_bond(struct sk_buff *skb)
1486 struct net_device *dev = skb->dev;
1488 if (dev->master)
1489 skb->dev = dev->master;
1491 return dev;
1494 static void net_tx_action(struct softirq_action *h)
1496 struct softnet_data *sd = &__get_cpu_var(softnet_data);
1498 if (sd->completion_queue) {
1499 struct sk_buff *clist;
1501 local_irq_disable();
1502 clist = sd->completion_queue;
1503 sd->completion_queue = NULL;
1504 local_irq_enable();
1506 while (clist) {
1507 struct sk_buff *skb = clist;
1508 clist = clist->next;
1510 BUG_TRAP(!atomic_read(&skb->users));
1511 __kfree_skb(skb);
1515 if (sd->output_queue) {
1516 struct net_device *head;
1518 local_irq_disable();
1519 head = sd->output_queue;
1520 sd->output_queue = NULL;
1521 local_irq_enable();
1523 while (head) {
1524 struct net_device *dev = head;
1525 head = head->next_sched;
1527 smp_mb__before_clear_bit();
1528 clear_bit(__LINK_STATE_SCHED, &dev->state);
1530 if (spin_trylock(&dev->queue_lock)) {
1531 qdisc_run(dev);
1532 spin_unlock(&dev->queue_lock);
1533 } else {
1534 netif_schedule(dev);
1540 static __inline__ int deliver_skb(struct sk_buff *skb,
1541 struct packet_type *pt_prev,
1542 struct net_device *orig_dev)
1544 atomic_inc(&skb->users);
1545 return pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
1548 #if defined(CONFIG_BRIDGE) || defined (CONFIG_BRIDGE_MODULE)
1549 int (*br_handle_frame_hook)(struct net_bridge_port *p, struct sk_buff **pskb);
1550 struct net_bridge;
1551 struct net_bridge_fdb_entry *(*br_fdb_get_hook)(struct net_bridge *br,
1552 unsigned char *addr);
1553 void (*br_fdb_put_hook)(struct net_bridge_fdb_entry *ent);
1555 static __inline__ int handle_bridge(struct sk_buff **pskb,
1556 struct packet_type **pt_prev, int *ret,
1557 struct net_device *orig_dev)
1559 struct net_bridge_port *port;
1561 if ((*pskb)->pkt_type == PACKET_LOOPBACK ||
1562 (port = rcu_dereference((*pskb)->dev->br_port)) == NULL)
1563 return 0;
1565 if (*pt_prev) {
1566 *ret = deliver_skb(*pskb, *pt_prev, orig_dev);
1567 *pt_prev = NULL;
1570 return br_handle_frame_hook(port, pskb);
1572 #else
1573 #define handle_bridge(skb, pt_prev, ret, orig_dev) (0)
1574 #endif
1576 #ifdef CONFIG_NET_CLS_ACT
1577 /* TODO: Maybe we should just force sch_ingress to be compiled in
1578 * when CONFIG_NET_CLS_ACT is? otherwise some useless instructions
1579 * a compare and 2 stores extra right now if we dont have it on
1580 * but have CONFIG_NET_CLS_ACT
1581 * NOTE: This doesnt stop any functionality; if you dont have
1582 * the ingress scheduler, you just cant add policies on ingress.
1584 */
1585 static int ing_filter(struct sk_buff *skb)
1587 struct Qdisc *q;
1588 struct net_device *dev = skb->dev;
1589 int result = TC_ACT_OK;
1591 if (dev->qdisc_ingress) {
1592 __u32 ttl = (__u32) G_TC_RTTL(skb->tc_verd);
1593 if (MAX_RED_LOOP < ttl++) {
1594 printk("Redir loop detected Dropping packet (%s->%s)\n",
1595 skb->input_dev->name, skb->dev->name);
1596 return TC_ACT_SHOT;
1599 skb->tc_verd = SET_TC_RTTL(skb->tc_verd,ttl);
1601 skb->tc_verd = SET_TC_AT(skb->tc_verd,AT_INGRESS);
1603 spin_lock(&dev->ingress_lock);
1604 if ((q = dev->qdisc_ingress) != NULL)
1605 result = q->enqueue(skb, q);
1606 spin_unlock(&dev->ingress_lock);
1610 return result;
1612 #endif
1614 int netif_receive_skb(struct sk_buff *skb)
1616 struct packet_type *ptype, *pt_prev;
1617 struct net_device *orig_dev;
1618 int ret = NET_RX_DROP;
1619 unsigned short type;
1621 /* if we've gotten here through NAPI, check netpoll */
1622 if (skb->dev->poll && netpoll_rx(skb))
1623 return NET_RX_DROP;
1625 if (!skb->tstamp.off_sec)
1626 net_timestamp(skb);
1628 if (!skb->input_dev)
1629 skb->input_dev = skb->dev;
1631 orig_dev = skb_bond(skb);
1633 __get_cpu_var(netdev_rx_stat).total++;
1635 skb->h.raw = skb->nh.raw = skb->data;
1636 skb->mac_len = skb->nh.raw - skb->mac.raw;
1638 pt_prev = NULL;
1640 rcu_read_lock();
1642 #ifdef CONFIG_NET_CLS_ACT
1643 if (skb->tc_verd & TC_NCLS) {
1644 skb->tc_verd = CLR_TC_NCLS(skb->tc_verd);
1645 goto ncls;
1647 #endif
1649 #ifdef CONFIG_XEN
1650 switch (skb->ip_summed) {
1651 case CHECKSUM_UNNECESSARY:
1652 skb->proto_csum_valid = 1;
1653 break;
1654 case CHECKSUM_HW:
1655 /* XXX Implement me. */
1656 default:
1657 skb->proto_csum_valid = 0;
1658 break;
1660 #endif
1662 list_for_each_entry_rcu(ptype, &ptype_all, list) {
1663 if (!ptype->dev || ptype->dev == skb->dev) {
1664 if (pt_prev)
1665 ret = deliver_skb(skb, pt_prev, orig_dev);
1666 pt_prev = ptype;
1670 #ifdef CONFIG_NET_CLS_ACT
1671 if (pt_prev) {
1672 ret = deliver_skb(skb, pt_prev, orig_dev);
1673 pt_prev = NULL; /* noone else should process this after*/
1674 } else {
1675 skb->tc_verd = SET_TC_OK2MUNGE(skb->tc_verd);
1678 ret = ing_filter(skb);
1680 if (ret == TC_ACT_SHOT || (ret == TC_ACT_STOLEN)) {
1681 kfree_skb(skb);
1682 goto out;
1685 skb->tc_verd = 0;
1686 ncls:
1687 #endif
1689 handle_diverter(skb);
1691 if (handle_bridge(&skb, &pt_prev, &ret, orig_dev))
1692 goto out;
1694 type = skb->protocol;
1695 list_for_each_entry_rcu(ptype, &ptype_base[ntohs(type)&15], list) {
1696 if (ptype->type == type &&
1697 (!ptype->dev || ptype->dev == skb->dev)) {
1698 if (pt_prev)
1699 ret = deliver_skb(skb, pt_prev, orig_dev);
1700 pt_prev = ptype;
1704 if (pt_prev) {
1705 ret = pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
1706 } else {
1707 kfree_skb(skb);
1708 /* Jamal, now you will not able to escape explaining
1709 * me how you were going to use this. :-)
1710 */
1711 ret = NET_RX_DROP;
1714 out:
1715 rcu_read_unlock();
1716 return ret;
1719 static int process_backlog(struct net_device *backlog_dev, int *budget)
1721 int work = 0;
1722 int quota = min(backlog_dev->quota, *budget);
1723 struct softnet_data *queue = &__get_cpu_var(softnet_data);
1724 unsigned long start_time = jiffies;
1726 backlog_dev->weight = weight_p;
1727 for (;;) {
1728 struct sk_buff *skb;
1729 struct net_device *dev;
1731 local_irq_disable();
1732 skb = __skb_dequeue(&queue->input_pkt_queue);
1733 if (!skb)
1734 goto job_done;
1735 local_irq_enable();
1737 dev = skb->dev;
1739 netif_receive_skb(skb);
1741 dev_put(dev);
1743 work++;
1745 if (work >= quota || jiffies - start_time > 1)
1746 break;
1750 backlog_dev->quota -= work;
1751 *budget -= work;
1752 return -1;
1754 job_done:
1755 backlog_dev->quota -= work;
1756 *budget -= work;
1758 list_del(&backlog_dev->poll_list);
1759 smp_mb__before_clear_bit();
1760 netif_poll_enable(backlog_dev);
1762 local_irq_enable();
1763 return 0;
1766 static void net_rx_action(struct softirq_action *h)
1768 struct softnet_data *queue = &__get_cpu_var(softnet_data);
1769 unsigned long start_time = jiffies;
1770 int budget = netdev_budget;
1771 void *have;
1773 local_irq_disable();
1775 while (!list_empty(&queue->poll_list)) {
1776 struct net_device *dev;
1778 if (budget <= 0 || jiffies - start_time > 1)
1779 goto softnet_break;
1781 local_irq_enable();
1783 dev = list_entry(queue->poll_list.next,
1784 struct net_device, poll_list);
1785 have = netpoll_poll_lock(dev);
1787 if (dev->quota <= 0 || dev->poll(dev, &budget)) {
1788 netpoll_poll_unlock(have);
1789 local_irq_disable();
1790 list_del(&dev->poll_list);
1791 list_add_tail(&dev->poll_list, &queue->poll_list);
1792 if (dev->quota < 0)
1793 dev->quota += dev->weight;
1794 else
1795 dev->quota = dev->weight;
1796 } else {
1797 netpoll_poll_unlock(have);
1798 dev_put(dev);
1799 local_irq_disable();
1802 out:
1803 local_irq_enable();
1804 return;
1806 softnet_break:
1807 __get_cpu_var(netdev_rx_stat).time_squeeze++;
1808 __raise_softirq_irqoff(NET_RX_SOFTIRQ);
1809 goto out;
1812 static gifconf_func_t * gifconf_list [NPROTO];
1814 /**
1815 * register_gifconf - register a SIOCGIF handler
1816 * @family: Address family
1817 * @gifconf: Function handler
1819 * Register protocol dependent address dumping routines. The handler
1820 * that is passed must not be freed or reused until it has been replaced
1821 * by another handler.
1822 */
1823 int register_gifconf(unsigned int family, gifconf_func_t * gifconf)
1825 if (family >= NPROTO)
1826 return -EINVAL;
1827 gifconf_list[family] = gifconf;
1828 return 0;
1832 /*
1833 * Map an interface index to its name (SIOCGIFNAME)
1834 */
1836 /*
1837 * We need this ioctl for efficient implementation of the
1838 * if_indextoname() function required by the IPv6 API. Without
1839 * it, we would have to search all the interfaces to find a
1840 * match. --pb
1841 */
1843 static int dev_ifname(struct ifreq __user *arg)
1845 struct net_device *dev;
1846 struct ifreq ifr;
1848 /*
1849 * Fetch the caller's info block.
1850 */
1852 if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
1853 return -EFAULT;
1855 read_lock(&dev_base_lock);
1856 dev = __dev_get_by_index(ifr.ifr_ifindex);
1857 if (!dev) {
1858 read_unlock(&dev_base_lock);
1859 return -ENODEV;
1862 strcpy(ifr.ifr_name, dev->name);
1863 read_unlock(&dev_base_lock);
1865 if (copy_to_user(arg, &ifr, sizeof(struct ifreq)))
1866 return -EFAULT;
1867 return 0;
1870 /*
1871 * Perform a SIOCGIFCONF call. This structure will change
1872 * size eventually, and there is nothing I can do about it.
1873 * Thus we will need a 'compatibility mode'.
1874 */
1876 static int dev_ifconf(char __user *arg)
1878 struct ifconf ifc;
1879 struct net_device *dev;
1880 char __user *pos;
1881 int len;
1882 int total;
1883 int i;
1885 /*
1886 * Fetch the caller's info block.
1887 */
1889 if (copy_from_user(&ifc, arg, sizeof(struct ifconf)))
1890 return -EFAULT;
1892 pos = ifc.ifc_buf;
1893 len = ifc.ifc_len;
1895 /*
1896 * Loop over the interfaces, and write an info block for each.
1897 */
1899 total = 0;
1900 for (dev = dev_base; dev; dev = dev->next) {
1901 for (i = 0; i < NPROTO; i++) {
1902 if (gifconf_list[i]) {
1903 int done;
1904 if (!pos)
1905 done = gifconf_list[i](dev, NULL, 0);
1906 else
1907 done = gifconf_list[i](dev, pos + total,
1908 len - total);
1909 if (done < 0)
1910 return -EFAULT;
1911 total += done;
1916 /*
1917 * All done. Write the updated control block back to the caller.
1918 */
1919 ifc.ifc_len = total;
1921 /*
1922 * Both BSD and Solaris return 0 here, so we do too.
1923 */
1924 return copy_to_user(arg, &ifc, sizeof(struct ifconf)) ? -EFAULT : 0;
1927 #ifdef CONFIG_PROC_FS
1928 /*
1929 * This is invoked by the /proc filesystem handler to display a device
1930 * in detail.
1931 */
1932 static __inline__ struct net_device *dev_get_idx(loff_t pos)
1934 struct net_device *dev;
1935 loff_t i;
1937 for (i = 0, dev = dev_base; dev && i < pos; ++i, dev = dev->next);
1939 return i == pos ? dev : NULL;
1942 void *dev_seq_start(struct seq_file *seq, loff_t *pos)
1944 read_lock(&dev_base_lock);
1945 return *pos ? dev_get_idx(*pos - 1) : SEQ_START_TOKEN;
1948 void *dev_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1950 ++*pos;
1951 return v == SEQ_START_TOKEN ? dev_base : ((struct net_device *)v)->next;
1954 void dev_seq_stop(struct seq_file *seq, void *v)
1956 read_unlock(&dev_base_lock);
1959 static void dev_seq_printf_stats(struct seq_file *seq, struct net_device *dev)
1961 if (dev->get_stats) {
1962 struct net_device_stats *stats = dev->get_stats(dev);
1964 seq_printf(seq, "%6s:%8lu %7lu %4lu %4lu %4lu %5lu %10lu %9lu "
1965 "%8lu %7lu %4lu %4lu %4lu %5lu %7lu %10lu\n",
1966 dev->name, stats->rx_bytes, stats->rx_packets,
1967 stats->rx_errors,
1968 stats->rx_dropped + stats->rx_missed_errors,
1969 stats->rx_fifo_errors,
1970 stats->rx_length_errors + stats->rx_over_errors +
1971 stats->rx_crc_errors + stats->rx_frame_errors,
1972 stats->rx_compressed, stats->multicast,
1973 stats->tx_bytes, stats->tx_packets,
1974 stats->tx_errors, stats->tx_dropped,
1975 stats->tx_fifo_errors, stats->collisions,
1976 stats->tx_carrier_errors +
1977 stats->tx_aborted_errors +
1978 stats->tx_window_errors +
1979 stats->tx_heartbeat_errors,
1980 stats->tx_compressed);
1981 } else
1982 seq_printf(seq, "%6s: No statistics available.\n", dev->name);
1985 /*
1986 * Called from the PROCfs module. This now uses the new arbitrary sized
1987 * /proc/net interface to create /proc/net/dev
1988 */
1989 static int dev_seq_show(struct seq_file *seq, void *v)
1991 if (v == SEQ_START_TOKEN)
1992 seq_puts(seq, "Inter-| Receive "
1993 " | Transmit\n"
1994 " face |bytes packets errs drop fifo frame "
1995 "compressed multicast|bytes packets errs "
1996 "drop fifo colls carrier compressed\n");
1997 else
1998 dev_seq_printf_stats(seq, v);
1999 return 0;
2002 static struct netif_rx_stats *softnet_get_online(loff_t *pos)
2004 struct netif_rx_stats *rc = NULL;
2006 while (*pos < NR_CPUS)
2007 if (cpu_online(*pos)) {
2008 rc = &per_cpu(netdev_rx_stat, *pos);
2009 break;
2010 } else
2011 ++*pos;
2012 return rc;
2015 static void *softnet_seq_start(struct seq_file *seq, loff_t *pos)
2017 return softnet_get_online(pos);
2020 static void *softnet_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2022 ++*pos;
2023 return softnet_get_online(pos);
2026 static void softnet_seq_stop(struct seq_file *seq, void *v)
2030 static int softnet_seq_show(struct seq_file *seq, void *v)
2032 struct netif_rx_stats *s = v;
2034 seq_printf(seq, "%08x %08x %08x %08x %08x %08x %08x %08x %08x\n",
2035 s->total, s->dropped, s->time_squeeze, 0,
2036 0, 0, 0, 0, /* was fastroute */
2037 s->cpu_collision );
2038 return 0;
2041 static struct seq_operations dev_seq_ops = {
2042 .start = dev_seq_start,
2043 .next = dev_seq_next,
2044 .stop = dev_seq_stop,
2045 .show = dev_seq_show,
2046 };
2048 static int dev_seq_open(struct inode *inode, struct file *file)
2050 return seq_open(file, &dev_seq_ops);
2053 static struct file_operations dev_seq_fops = {
2054 .owner = THIS_MODULE,
2055 .open = dev_seq_open,
2056 .read = seq_read,
2057 .llseek = seq_lseek,
2058 .release = seq_release,
2059 };
2061 static struct seq_operations softnet_seq_ops = {
2062 .start = softnet_seq_start,
2063 .next = softnet_seq_next,
2064 .stop = softnet_seq_stop,
2065 .show = softnet_seq_show,
2066 };
2068 static int softnet_seq_open(struct inode *inode, struct file *file)
2070 return seq_open(file, &softnet_seq_ops);
2073 static struct file_operations softnet_seq_fops = {
2074 .owner = THIS_MODULE,
2075 .open = softnet_seq_open,
2076 .read = seq_read,
2077 .llseek = seq_lseek,
2078 .release = seq_release,
2079 };
2081 #ifdef WIRELESS_EXT
2082 extern int wireless_proc_init(void);
2083 #else
2084 #define wireless_proc_init() 0
2085 #endif
2087 static int __init dev_proc_init(void)
2089 int rc = -ENOMEM;
2091 if (!proc_net_fops_create("dev", S_IRUGO, &dev_seq_fops))
2092 goto out;
2093 if (!proc_net_fops_create("softnet_stat", S_IRUGO, &softnet_seq_fops))
2094 goto out_dev;
2095 if (wireless_proc_init())
2096 goto out_softnet;
2097 rc = 0;
2098 out:
2099 return rc;
2100 out_softnet:
2101 proc_net_remove("softnet_stat");
2102 out_dev:
2103 proc_net_remove("dev");
2104 goto out;
2106 #else
2107 #define dev_proc_init() 0
2108 #endif /* CONFIG_PROC_FS */
2111 /**
2112 * netdev_set_master - set up master/slave pair
2113 * @slave: slave device
2114 * @master: new master device
2116 * Changes the master device of the slave. Pass %NULL to break the
2117 * bonding. The caller must hold the RTNL semaphore. On a failure
2118 * a negative errno code is returned. On success the reference counts
2119 * are adjusted, %RTM_NEWLINK is sent to the routing socket and the
2120 * function returns zero.
2121 */
2122 int netdev_set_master(struct net_device *slave, struct net_device *master)
2124 struct net_device *old = slave->master;
2126 ASSERT_RTNL();
2128 if (master) {
2129 if (old)
2130 return -EBUSY;
2131 dev_hold(master);
2134 slave->master = master;
2136 synchronize_net();
2138 if (old)
2139 dev_put(old);
2141 if (master)
2142 slave->flags |= IFF_SLAVE;
2143 else
2144 slave->flags &= ~IFF_SLAVE;
2146 rtmsg_ifinfo(RTM_NEWLINK, slave, IFF_SLAVE);
2147 return 0;
2150 /**
2151 * dev_set_promiscuity - update promiscuity count on a device
2152 * @dev: device
2153 * @inc: modifier
2155 * Add or remove promsicuity from a device. While the count in the device
2156 * remains above zero the interface remains promiscuous. Once it hits zero
2157 * the device reverts back to normal filtering operation. A negative inc
2158 * value is used to drop promiscuity on the device.
2159 */
2160 void dev_set_promiscuity(struct net_device *dev, int inc)
2162 unsigned short old_flags = dev->flags;
2164 if ((dev->promiscuity += inc) == 0)
2165 dev->flags &= ~IFF_PROMISC;
2166 else
2167 dev->flags |= IFF_PROMISC;
2168 if (dev->flags != old_flags) {
2169 dev_mc_upload(dev);
2170 printk(KERN_INFO "device %s %s promiscuous mode\n",
2171 dev->name, (dev->flags & IFF_PROMISC) ? "entered" :
2172 "left");
2176 /**
2177 * dev_set_allmulti - update allmulti count on a device
2178 * @dev: device
2179 * @inc: modifier
2181 * Add or remove reception of all multicast frames to a device. While the
2182 * count in the device remains above zero the interface remains listening
2183 * to all interfaces. Once it hits zero the device reverts back to normal
2184 * filtering operation. A negative @inc value is used to drop the counter
2185 * when releasing a resource needing all multicasts.
2186 */
2188 void dev_set_allmulti(struct net_device *dev, int inc)
2190 unsigned short old_flags = dev->flags;
2192 dev->flags |= IFF_ALLMULTI;
2193 if ((dev->allmulti += inc) == 0)
2194 dev->flags &= ~IFF_ALLMULTI;
2195 if (dev->flags ^ old_flags)
2196 dev_mc_upload(dev);
2199 unsigned dev_get_flags(const struct net_device *dev)
2201 unsigned flags;
2203 flags = (dev->flags & ~(IFF_PROMISC |
2204 IFF_ALLMULTI |
2205 IFF_RUNNING)) |
2206 (dev->gflags & (IFF_PROMISC |
2207 IFF_ALLMULTI));
2209 if (netif_running(dev) && netif_carrier_ok(dev))
2210 flags |= IFF_RUNNING;
2212 return flags;
2215 int dev_change_flags(struct net_device *dev, unsigned flags)
2217 int ret;
2218 int old_flags = dev->flags;
2220 /*
2221 * Set the flags on our device.
2222 */
2224 dev->flags = (flags & (IFF_DEBUG | IFF_NOTRAILERS | IFF_NOARP |
2225 IFF_DYNAMIC | IFF_MULTICAST | IFF_PORTSEL |
2226 IFF_AUTOMEDIA)) |
2227 (dev->flags & (IFF_UP | IFF_VOLATILE | IFF_PROMISC |
2228 IFF_ALLMULTI));
2230 /*
2231 * Load in the correct multicast list now the flags have changed.
2232 */
2234 dev_mc_upload(dev);
2236 /*
2237 * Have we downed the interface. We handle IFF_UP ourselves
2238 * according to user attempts to set it, rather than blindly
2239 * setting it.
2240 */
2242 ret = 0;
2243 if ((old_flags ^ flags) & IFF_UP) { /* Bit is different ? */
2244 ret = ((old_flags & IFF_UP) ? dev_close : dev_open)(dev);
2246 if (!ret)
2247 dev_mc_upload(dev);
2250 if (dev->flags & IFF_UP &&
2251 ((old_flags ^ dev->flags) &~ (IFF_UP | IFF_PROMISC | IFF_ALLMULTI |
2252 IFF_VOLATILE)))
2253 notifier_call_chain(&netdev_chain, NETDEV_CHANGE, dev);
2255 if ((flags ^ dev->gflags) & IFF_PROMISC) {
2256 int inc = (flags & IFF_PROMISC) ? +1 : -1;
2257 dev->gflags ^= IFF_PROMISC;
2258 dev_set_promiscuity(dev, inc);
2261 /* NOTE: order of synchronization of IFF_PROMISC and IFF_ALLMULTI
2262 is important. Some (broken) drivers set IFF_PROMISC, when
2263 IFF_ALLMULTI is requested not asking us and not reporting.
2264 */
2265 if ((flags ^ dev->gflags) & IFF_ALLMULTI) {
2266 int inc = (flags & IFF_ALLMULTI) ? +1 : -1;
2267 dev->gflags ^= IFF_ALLMULTI;
2268 dev_set_allmulti(dev, inc);
2271 if (old_flags ^ dev->flags)
2272 rtmsg_ifinfo(RTM_NEWLINK, dev, old_flags ^ dev->flags);
2274 return ret;
2277 int dev_set_mtu(struct net_device *dev, int new_mtu)
2279 int err;
2281 if (new_mtu == dev->mtu)
2282 return 0;
2284 /* MTU must be positive. */
2285 if (new_mtu < 0)
2286 return -EINVAL;
2288 if (!netif_device_present(dev))
2289 return -ENODEV;
2291 err = 0;
2292 if (dev->change_mtu)
2293 err = dev->change_mtu(dev, new_mtu);
2294 else
2295 dev->mtu = new_mtu;
2296 if (!err && dev->flags & IFF_UP)
2297 notifier_call_chain(&netdev_chain,
2298 NETDEV_CHANGEMTU, dev);
2299 return err;
2302 int dev_set_mac_address(struct net_device *dev, struct sockaddr *sa)
2304 int err;
2306 if (!dev->set_mac_address)
2307 return -EOPNOTSUPP;
2308 if (sa->sa_family != dev->type)
2309 return -EINVAL;
2310 if (!netif_device_present(dev))
2311 return -ENODEV;
2312 err = dev->set_mac_address(dev, sa);
2313 if (!err)
2314 notifier_call_chain(&netdev_chain, NETDEV_CHANGEADDR, dev);
2315 return err;
2318 /*
2319 * Perform the SIOCxIFxxx calls.
2320 */
2321 static int dev_ifsioc(struct ifreq *ifr, unsigned int cmd)
2323 int err;
2324 struct net_device *dev = __dev_get_by_name(ifr->ifr_name);
2326 if (!dev)
2327 return -ENODEV;
2329 switch (cmd) {
2330 case SIOCGIFFLAGS: /* Get interface flags */
2331 ifr->ifr_flags = dev_get_flags(dev);
2332 return 0;
2334 case SIOCSIFFLAGS: /* Set interface flags */
2335 return dev_change_flags(dev, ifr->ifr_flags);
2337 case SIOCGIFMETRIC: /* Get the metric on the interface
2338 (currently unused) */
2339 ifr->ifr_metric = 0;
2340 return 0;
2342 case SIOCSIFMETRIC: /* Set the metric on the interface
2343 (currently unused) */
2344 return -EOPNOTSUPP;
2346 case SIOCGIFMTU: /* Get the MTU of a device */
2347 ifr->ifr_mtu = dev->mtu;
2348 return 0;
2350 case SIOCSIFMTU: /* Set the MTU of a device */
2351 return dev_set_mtu(dev, ifr->ifr_mtu);
2353 case SIOCGIFHWADDR:
2354 if (!dev->addr_len)
2355 memset(ifr->ifr_hwaddr.sa_data, 0, sizeof ifr->ifr_hwaddr.sa_data);
2356 else
2357 memcpy(ifr->ifr_hwaddr.sa_data, dev->dev_addr,
2358 min(sizeof ifr->ifr_hwaddr.sa_data, (size_t) dev->addr_len));
2359 ifr->ifr_hwaddr.sa_family = dev->type;
2360 return 0;
2362 case SIOCSIFHWADDR:
2363 return dev_set_mac_address(dev, &ifr->ifr_hwaddr);
2365 case SIOCSIFHWBROADCAST:
2366 if (ifr->ifr_hwaddr.sa_family != dev->type)
2367 return -EINVAL;
2368 memcpy(dev->broadcast, ifr->ifr_hwaddr.sa_data,
2369 min(sizeof ifr->ifr_hwaddr.sa_data, (size_t) dev->addr_len));
2370 notifier_call_chain(&netdev_chain,
2371 NETDEV_CHANGEADDR, dev);
2372 return 0;
2374 case SIOCGIFMAP:
2375 ifr->ifr_map.mem_start = dev->mem_start;
2376 ifr->ifr_map.mem_end = dev->mem_end;
2377 ifr->ifr_map.base_addr = dev->base_addr;
2378 ifr->ifr_map.irq = dev->irq;
2379 ifr->ifr_map.dma = dev->dma;
2380 ifr->ifr_map.port = dev->if_port;
2381 return 0;
2383 case SIOCSIFMAP:
2384 if (dev->set_config) {
2385 if (!netif_device_present(dev))
2386 return -ENODEV;
2387 return dev->set_config(dev, &ifr->ifr_map);
2389 return -EOPNOTSUPP;
2391 case SIOCADDMULTI:
2392 if (!dev->set_multicast_list ||
2393 ifr->ifr_hwaddr.sa_family != AF_UNSPEC)
2394 return -EINVAL;
2395 if (!netif_device_present(dev))
2396 return -ENODEV;
2397 return dev_mc_add(dev, ifr->ifr_hwaddr.sa_data,
2398 dev->addr_len, 1);
2400 case SIOCDELMULTI:
2401 if (!dev->set_multicast_list ||
2402 ifr->ifr_hwaddr.sa_family != AF_UNSPEC)
2403 return -EINVAL;
2404 if (!netif_device_present(dev))
2405 return -ENODEV;
2406 return dev_mc_delete(dev, ifr->ifr_hwaddr.sa_data,
2407 dev->addr_len, 1);
2409 case SIOCGIFINDEX:
2410 ifr->ifr_ifindex = dev->ifindex;
2411 return 0;
2413 case SIOCGIFTXQLEN:
2414 ifr->ifr_qlen = dev->tx_queue_len;
2415 return 0;
2417 case SIOCSIFTXQLEN:
2418 if (ifr->ifr_qlen < 0)
2419 return -EINVAL;
2420 dev->tx_queue_len = ifr->ifr_qlen;
2421 return 0;
2423 case SIOCSIFNAME:
2424 ifr->ifr_newname[IFNAMSIZ-1] = '\0';
2425 return dev_change_name(dev, ifr->ifr_newname);
2427 /*
2428 * Unknown or private ioctl
2429 */
2431 default:
2432 if ((cmd >= SIOCDEVPRIVATE &&
2433 cmd <= SIOCDEVPRIVATE + 15) ||
2434 cmd == SIOCBONDENSLAVE ||
2435 cmd == SIOCBONDRELEASE ||
2436 cmd == SIOCBONDSETHWADDR ||
2437 cmd == SIOCBONDSLAVEINFOQUERY ||
2438 cmd == SIOCBONDINFOQUERY ||
2439 cmd == SIOCBONDCHANGEACTIVE ||
2440 cmd == SIOCGMIIPHY ||
2441 cmd == SIOCGMIIREG ||
2442 cmd == SIOCSMIIREG ||
2443 cmd == SIOCBRADDIF ||
2444 cmd == SIOCBRDELIF ||
2445 cmd == SIOCWANDEV) {
2446 err = -EOPNOTSUPP;
2447 if (dev->do_ioctl) {
2448 if (netif_device_present(dev))
2449 err = dev->do_ioctl(dev, ifr,
2450 cmd);
2451 else
2452 err = -ENODEV;
2454 } else
2455 err = -EINVAL;
2458 return err;
2461 /*
2462 * This function handles all "interface"-type I/O control requests. The actual
2463 * 'doing' part of this is dev_ifsioc above.
2464 */
2466 /**
2467 * dev_ioctl - network device ioctl
2468 * @cmd: command to issue
2469 * @arg: pointer to a struct ifreq in user space
2471 * Issue ioctl functions to devices. This is normally called by the
2472 * user space syscall interfaces but can sometimes be useful for
2473 * other purposes. The return value is the return from the syscall if
2474 * positive or a negative errno code on error.
2475 */
2477 int dev_ioctl(unsigned int cmd, void __user *arg)
2479 struct ifreq ifr;
2480 int ret;
2481 char *colon;
2483 /* One special case: SIOCGIFCONF takes ifconf argument
2484 and requires shared lock, because it sleeps writing
2485 to user space.
2486 */
2488 if (cmd == SIOCGIFCONF) {
2489 rtnl_shlock();
2490 ret = dev_ifconf((char __user *) arg);
2491 rtnl_shunlock();
2492 return ret;
2494 if (cmd == SIOCGIFNAME)
2495 return dev_ifname((struct ifreq __user *)arg);
2497 if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
2498 return -EFAULT;
2500 ifr.ifr_name[IFNAMSIZ-1] = 0;
2502 colon = strchr(ifr.ifr_name, ':');
2503 if (colon)
2504 *colon = 0;
2506 /*
2507 * See which interface the caller is talking about.
2508 */
2510 switch (cmd) {
2511 /*
2512 * These ioctl calls:
2513 * - can be done by all.
2514 * - atomic and do not require locking.
2515 * - return a value
2516 */
2517 case SIOCGIFFLAGS:
2518 case SIOCGIFMETRIC:
2519 case SIOCGIFMTU:
2520 case SIOCGIFHWADDR:
2521 case SIOCGIFSLAVE:
2522 case SIOCGIFMAP:
2523 case SIOCGIFINDEX:
2524 case SIOCGIFTXQLEN:
2525 dev_load(ifr.ifr_name);
2526 read_lock(&dev_base_lock);
2527 ret = dev_ifsioc(&ifr, cmd);
2528 read_unlock(&dev_base_lock);
2529 if (!ret) {
2530 if (colon)
2531 *colon = ':';
2532 if (copy_to_user(arg, &ifr,
2533 sizeof(struct ifreq)))
2534 ret = -EFAULT;
2536 return ret;
2538 case SIOCETHTOOL:
2539 dev_load(ifr.ifr_name);
2540 rtnl_lock();
2541 ret = dev_ethtool(&ifr);
2542 rtnl_unlock();
2543 if (!ret) {
2544 if (colon)
2545 *colon = ':';
2546 if (copy_to_user(arg, &ifr,
2547 sizeof(struct ifreq)))
2548 ret = -EFAULT;
2550 return ret;
2552 /*
2553 * These ioctl calls:
2554 * - require superuser power.
2555 * - require strict serialization.
2556 * - return a value
2557 */
2558 case SIOCGMIIPHY:
2559 case SIOCGMIIREG:
2560 case SIOCSIFNAME:
2561 if (!capable(CAP_NET_ADMIN))
2562 return -EPERM;
2563 dev_load(ifr.ifr_name);
2564 rtnl_lock();
2565 ret = dev_ifsioc(&ifr, cmd);
2566 rtnl_unlock();
2567 if (!ret) {
2568 if (colon)
2569 *colon = ':';
2570 if (copy_to_user(arg, &ifr,
2571 sizeof(struct ifreq)))
2572 ret = -EFAULT;
2574 return ret;
2576 /*
2577 * These ioctl calls:
2578 * - require superuser power.
2579 * - require strict serialization.
2580 * - do not return a value
2581 */
2582 case SIOCSIFFLAGS:
2583 case SIOCSIFMETRIC:
2584 case SIOCSIFMTU:
2585 case SIOCSIFMAP:
2586 case SIOCSIFHWADDR:
2587 case SIOCSIFSLAVE:
2588 case SIOCADDMULTI:
2589 case SIOCDELMULTI:
2590 case SIOCSIFHWBROADCAST:
2591 case SIOCSIFTXQLEN:
2592 case SIOCSMIIREG:
2593 case SIOCBONDENSLAVE:
2594 case SIOCBONDRELEASE:
2595 case SIOCBONDSETHWADDR:
2596 case SIOCBONDCHANGEACTIVE:
2597 case SIOCBRADDIF:
2598 case SIOCBRDELIF:
2599 if (!capable(CAP_NET_ADMIN))
2600 return -EPERM;
2601 /* fall through */
2602 case SIOCBONDSLAVEINFOQUERY:
2603 case SIOCBONDINFOQUERY:
2604 dev_load(ifr.ifr_name);
2605 rtnl_lock();
2606 ret = dev_ifsioc(&ifr, cmd);
2607 rtnl_unlock();
2608 return ret;
2610 case SIOCGIFMEM:
2611 /* Get the per device memory space. We can add this but
2612 * currently do not support it */
2613 case SIOCSIFMEM:
2614 /* Set the per device memory buffer space.
2615 * Not applicable in our case */
2616 case SIOCSIFLINK:
2617 return -EINVAL;
2619 /*
2620 * Unknown or private ioctl.
2621 */
2622 default:
2623 if (cmd == SIOCWANDEV ||
2624 (cmd >= SIOCDEVPRIVATE &&
2625 cmd <= SIOCDEVPRIVATE + 15)) {
2626 dev_load(ifr.ifr_name);
2627 rtnl_lock();
2628 ret = dev_ifsioc(&ifr, cmd);
2629 rtnl_unlock();
2630 if (!ret && copy_to_user(arg, &ifr,
2631 sizeof(struct ifreq)))
2632 ret = -EFAULT;
2633 return ret;
2635 #ifdef WIRELESS_EXT
2636 /* Take care of Wireless Extensions */
2637 if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) {
2638 /* If command is `set a parameter', or
2639 * `get the encoding parameters', check if
2640 * the user has the right to do it */
2641 if (IW_IS_SET(cmd) || cmd == SIOCGIWENCODE) {
2642 if (!capable(CAP_NET_ADMIN))
2643 return -EPERM;
2645 dev_load(ifr.ifr_name);
2646 rtnl_lock();
2647 /* Follow me in net/core/wireless.c */
2648 ret = wireless_process_ioctl(&ifr, cmd);
2649 rtnl_unlock();
2650 if (IW_IS_GET(cmd) &&
2651 copy_to_user(arg, &ifr,
2652 sizeof(struct ifreq)))
2653 ret = -EFAULT;
2654 return ret;
2656 #endif /* WIRELESS_EXT */
2657 return -EINVAL;
2662 /**
2663 * dev_new_index - allocate an ifindex
2665 * Returns a suitable unique value for a new device interface
2666 * number. The caller must hold the rtnl semaphore or the
2667 * dev_base_lock to be sure it remains unique.
2668 */
2669 static int dev_new_index(void)
2671 static int ifindex;
2672 for (;;) {
2673 if (++ifindex <= 0)
2674 ifindex = 1;
2675 if (!__dev_get_by_index(ifindex))
2676 return ifindex;
2680 static int dev_boot_phase = 1;
2682 /* Delayed registration/unregisteration */
2683 static DEFINE_SPINLOCK(net_todo_list_lock);
2684 static struct list_head net_todo_list = LIST_HEAD_INIT(net_todo_list);
2686 static inline void net_set_todo(struct net_device *dev)
2688 spin_lock(&net_todo_list_lock);
2689 list_add_tail(&dev->todo_list, &net_todo_list);
2690 spin_unlock(&net_todo_list_lock);
2693 /**
2694 * register_netdevice - register a network device
2695 * @dev: device to register
2697 * Take a completed network device structure and add it to the kernel
2698 * interfaces. A %NETDEV_REGISTER message is sent to the netdev notifier
2699 * chain. 0 is returned on success. A negative errno code is returned
2700 * on a failure to set up the device, or if the name is a duplicate.
2702 * Callers must hold the rtnl semaphore. You may want
2703 * register_netdev() instead of this.
2705 * BUGS:
2706 * The locking appears insufficient to guarantee two parallel registers
2707 * will not get the same name.
2708 */
2710 int register_netdevice(struct net_device *dev)
2712 struct hlist_head *head;
2713 struct hlist_node *p;
2714 int ret;
2716 BUG_ON(dev_boot_phase);
2717 ASSERT_RTNL();
2719 /* When net_device's are persistent, this will be fatal. */
2720 BUG_ON(dev->reg_state != NETREG_UNINITIALIZED);
2722 spin_lock_init(&dev->queue_lock);
2723 spin_lock_init(&dev->xmit_lock);
2724 dev->xmit_lock_owner = -1;
2725 #ifdef CONFIG_NET_CLS_ACT
2726 spin_lock_init(&dev->ingress_lock);
2727 #endif
2729 ret = alloc_divert_blk(dev);
2730 if (ret)
2731 goto out;
2733 dev->iflink = -1;
2735 /* Init, if this function is available */
2736 if (dev->init) {
2737 ret = dev->init(dev);
2738 if (ret) {
2739 if (ret > 0)
2740 ret = -EIO;
2741 goto out_err;
2745 if (!dev_valid_name(dev->name)) {
2746 ret = -EINVAL;
2747 goto out_err;
2750 dev->ifindex = dev_new_index();
2751 if (dev->iflink == -1)
2752 dev->iflink = dev->ifindex;
2754 /* Check for existence of name */
2755 head = dev_name_hash(dev->name);
2756 hlist_for_each(p, head) {
2757 struct net_device *d
2758 = hlist_entry(p, struct net_device, name_hlist);
2759 if (!strncmp(d->name, dev->name, IFNAMSIZ)) {
2760 ret = -EEXIST;
2761 goto out_err;
2765 /* Fix illegal SG+CSUM combinations. */
2766 if ((dev->features & NETIF_F_SG) &&
2767 !(dev->features & (NETIF_F_IP_CSUM |
2768 NETIF_F_NO_CSUM |
2769 NETIF_F_HW_CSUM))) {
2770 printk("%s: Dropping NETIF_F_SG since no checksum feature.\n",
2771 dev->name);
2772 dev->features &= ~NETIF_F_SG;
2775 /* TSO requires that SG is present as well. */
2776 if ((dev->features & NETIF_F_TSO) &&
2777 !(dev->features & NETIF_F_SG)) {
2778 printk("%s: Dropping NETIF_F_TSO since no SG feature.\n",
2779 dev->name);
2780 dev->features &= ~NETIF_F_TSO;
2782 if (dev->features & NETIF_F_UFO) {
2783 if (!(dev->features & NETIF_F_HW_CSUM)) {
2784 printk(KERN_ERR "%s: Dropping NETIF_F_UFO since no "
2785 "NETIF_F_HW_CSUM feature.\n",
2786 dev->name);
2787 dev->features &= ~NETIF_F_UFO;
2789 if (!(dev->features & NETIF_F_SG)) {
2790 printk(KERN_ERR "%s: Dropping NETIF_F_UFO since no "
2791 "NETIF_F_SG feature.\n",
2792 dev->name);
2793 dev->features &= ~NETIF_F_UFO;
2797 /*
2798 * nil rebuild_header routine,
2799 * that should be never called and used as just bug trap.
2800 */
2802 if (!dev->rebuild_header)
2803 dev->rebuild_header = default_rebuild_header;
2805 /*
2806 * Default initial state at registry is that the
2807 * device is present.
2808 */
2810 set_bit(__LINK_STATE_PRESENT, &dev->state);
2812 dev->next = NULL;
2813 dev_init_scheduler(dev);
2814 write_lock_bh(&dev_base_lock);
2815 *dev_tail = dev;
2816 dev_tail = &dev->next;
2817 hlist_add_head(&dev->name_hlist, head);
2818 hlist_add_head(&dev->index_hlist, dev_index_hash(dev->ifindex));
2819 dev_hold(dev);
2820 dev->reg_state = NETREG_REGISTERING;
2821 write_unlock_bh(&dev_base_lock);
2823 /* Notify protocols, that a new device appeared. */
2824 notifier_call_chain(&netdev_chain, NETDEV_REGISTER, dev);
2826 /* Finish registration after unlock */
2827 net_set_todo(dev);
2828 ret = 0;
2830 out:
2831 return ret;
2832 out_err:
2833 free_divert_blk(dev);
2834 goto out;
2837 /**
2838 * register_netdev - register a network device
2839 * @dev: device to register
2841 * Take a completed network device structure and add it to the kernel
2842 * interfaces. A %NETDEV_REGISTER message is sent to the netdev notifier
2843 * chain. 0 is returned on success. A negative errno code is returned
2844 * on a failure to set up the device, or if the name is a duplicate.
2846 * This is a wrapper around register_netdev that takes the rtnl semaphore
2847 * and expands the device name if you passed a format string to
2848 * alloc_netdev.
2849 */
2850 int register_netdev(struct net_device *dev)
2852 int err;
2854 rtnl_lock();
2856 /*
2857 * If the name is a format string the caller wants us to do a
2858 * name allocation.
2859 */
2860 if (strchr(dev->name, '%')) {
2861 err = dev_alloc_name(dev, dev->name);
2862 if (err < 0)
2863 goto out;
2866 /*
2867 * Back compatibility hook. Kill this one in 2.5
2868 */
2869 if (dev->name[0] == 0 || dev->name[0] == ' ') {
2870 err = dev_alloc_name(dev, "eth%d");
2871 if (err < 0)
2872 goto out;
2875 err = register_netdevice(dev);
2876 out:
2877 rtnl_unlock();
2878 return err;
2880 EXPORT_SYMBOL(register_netdev);
2882 /*
2883 * netdev_wait_allrefs - wait until all references are gone.
2885 * This is called when unregistering network devices.
2887 * Any protocol or device that holds a reference should register
2888 * for netdevice notification, and cleanup and put back the
2889 * reference if they receive an UNREGISTER event.
2890 * We can get stuck here if buggy protocols don't correctly
2891 * call dev_put.
2892 */
2893 static void netdev_wait_allrefs(struct net_device *dev)
2895 unsigned long rebroadcast_time, warning_time;
2897 rebroadcast_time = warning_time = jiffies;
2898 while (atomic_read(&dev->refcnt) != 0) {
2899 if (time_after(jiffies, rebroadcast_time + 1 * HZ)) {
2900 rtnl_shlock();
2902 /* Rebroadcast unregister notification */
2903 notifier_call_chain(&netdev_chain,
2904 NETDEV_UNREGISTER, dev);
2906 if (test_bit(__LINK_STATE_LINKWATCH_PENDING,
2907 &dev->state)) {
2908 /* We must not have linkwatch events
2909 * pending on unregister. If this
2910 * happens, we simply run the queue
2911 * unscheduled, resulting in a noop
2912 * for this device.
2913 */
2914 linkwatch_run_queue();
2917 rtnl_shunlock();
2919 rebroadcast_time = jiffies;
2922 msleep(250);
2924 if (time_after(jiffies, warning_time + 10 * HZ)) {
2925 printk(KERN_EMERG "unregister_netdevice: "
2926 "waiting for %s to become free. Usage "
2927 "count = %d\n",
2928 dev->name, atomic_read(&dev->refcnt));
2929 warning_time = jiffies;
2934 /* The sequence is:
2936 * rtnl_lock();
2937 * ...
2938 * register_netdevice(x1);
2939 * register_netdevice(x2);
2940 * ...
2941 * unregister_netdevice(y1);
2942 * unregister_netdevice(y2);
2943 * ...
2944 * rtnl_unlock();
2945 * free_netdev(y1);
2946 * free_netdev(y2);
2948 * We are invoked by rtnl_unlock() after it drops the semaphore.
2949 * This allows us to deal with problems:
2950 * 1) We can create/delete sysfs objects which invoke hotplug
2951 * without deadlocking with linkwatch via keventd.
2952 * 2) Since we run with the RTNL semaphore not held, we can sleep
2953 * safely in order to wait for the netdev refcnt to drop to zero.
2954 */
2955 static DECLARE_MUTEX(net_todo_run_mutex);
2956 void netdev_run_todo(void)
2958 struct list_head list = LIST_HEAD_INIT(list);
2959 int err;
2962 /* Need to guard against multiple cpu's getting out of order. */
2963 down(&net_todo_run_mutex);
2965 /* Not safe to do outside the semaphore. We must not return
2966 * until all unregister events invoked by the local processor
2967 * have been completed (either by this todo run, or one on
2968 * another cpu).
2969 */
2970 if (list_empty(&net_todo_list))
2971 goto out;
2973 /* Snapshot list, allow later requests */
2974 spin_lock(&net_todo_list_lock);
2975 list_splice_init(&net_todo_list, &list);
2976 spin_unlock(&net_todo_list_lock);
2978 while (!list_empty(&list)) {
2979 struct net_device *dev
2980 = list_entry(list.next, struct net_device, todo_list);
2981 list_del(&dev->todo_list);
2983 switch(dev->reg_state) {
2984 case NETREG_REGISTERING:
2985 err = netdev_register_sysfs(dev);
2986 if (err)
2987 printk(KERN_ERR "%s: failed sysfs registration (%d)\n",
2988 dev->name, err);
2989 dev->reg_state = NETREG_REGISTERED;
2990 break;
2992 case NETREG_UNREGISTERING:
2993 netdev_unregister_sysfs(dev);
2994 dev->reg_state = NETREG_UNREGISTERED;
2996 netdev_wait_allrefs(dev);
2998 /* paranoia */
2999 BUG_ON(atomic_read(&dev->refcnt));
3000 BUG_TRAP(!dev->ip_ptr);
3001 BUG_TRAP(!dev->ip6_ptr);
3002 BUG_TRAP(!dev->dn_ptr);
3005 /* It must be the very last action,
3006 * after this 'dev' may point to freed up memory.
3007 */
3008 if (dev->destructor)
3009 dev->destructor(dev);
3010 break;
3012 default:
3013 printk(KERN_ERR "network todo '%s' but state %d\n",
3014 dev->name, dev->reg_state);
3015 break;
3019 out:
3020 up(&net_todo_run_mutex);
3023 /**
3024 * alloc_netdev - allocate network device
3025 * @sizeof_priv: size of private data to allocate space for
3026 * @name: device name format string
3027 * @setup: callback to initialize device
3029 * Allocates a struct net_device with private data area for driver use
3030 * and performs basic initialization.
3031 */
3032 struct net_device *alloc_netdev(int sizeof_priv, const char *name,
3033 void (*setup)(struct net_device *))
3035 void *p;
3036 struct net_device *dev;
3037 int alloc_size;
3039 /* ensure 32-byte alignment of both the device and private area */
3040 alloc_size = (sizeof(*dev) + NETDEV_ALIGN_CONST) & ~NETDEV_ALIGN_CONST;
3041 alloc_size += sizeof_priv + NETDEV_ALIGN_CONST;
3043 p = kmalloc(alloc_size, GFP_KERNEL);
3044 if (!p) {
3045 printk(KERN_ERR "alloc_dev: Unable to allocate device.\n");
3046 return NULL;
3048 memset(p, 0, alloc_size);
3050 dev = (struct net_device *)
3051 (((long)p + NETDEV_ALIGN_CONST) & ~NETDEV_ALIGN_CONST);
3052 dev->padded = (char *)dev - (char *)p;
3054 if (sizeof_priv)
3055 dev->priv = netdev_priv(dev);
3057 setup(dev);
3058 strcpy(dev->name, name);
3059 return dev;
3061 EXPORT_SYMBOL(alloc_netdev);
3063 /**
3064 * free_netdev - free network device
3065 * @dev: device
3067 * This function does the last stage of destroying an allocated device
3068 * interface. The reference to the device object is released.
3069 * If this is the last reference then it will be freed.
3070 */
3071 void free_netdev(struct net_device *dev)
3073 #ifdef CONFIG_SYSFS
3074 /* Compatiablity with error handling in drivers */
3075 if (dev->reg_state == NETREG_UNINITIALIZED) {
3076 kfree((char *)dev - dev->padded);
3077 return;
3080 BUG_ON(dev->reg_state != NETREG_UNREGISTERED);
3081 dev->reg_state = NETREG_RELEASED;
3083 /* will free via class release */
3084 class_device_put(&dev->class_dev);
3085 #else
3086 kfree((char *)dev - dev->padded);
3087 #endif
3090 /* Synchronize with packet receive processing. */
3091 void synchronize_net(void)
3093 might_sleep();
3094 synchronize_rcu();
3097 /**
3098 * unregister_netdevice - remove device from the kernel
3099 * @dev: device
3101 * This function shuts down a device interface and removes it
3102 * from the kernel tables. On success 0 is returned, on a failure
3103 * a negative errno code is returned.
3105 * Callers must hold the rtnl semaphore. You may want
3106 * unregister_netdev() instead of this.
3107 */
3109 int unregister_netdevice(struct net_device *dev)
3111 struct net_device *d, **dp;
3113 BUG_ON(dev_boot_phase);
3114 ASSERT_RTNL();
3116 /* Some devices call without registering for initialization unwind. */
3117 if (dev->reg_state == NETREG_UNINITIALIZED) {
3118 printk(KERN_DEBUG "unregister_netdevice: device %s/%p never "
3119 "was registered\n", dev->name, dev);
3120 return -ENODEV;
3123 BUG_ON(dev->reg_state != NETREG_REGISTERED);
3125 /* If device is running, close it first. */
3126 if (dev->flags & IFF_UP)
3127 dev_close(dev);
3129 /* And unlink it from device chain. */
3130 for (dp = &dev_base; (d = *dp) != NULL; dp = &d->next) {
3131 if (d == dev) {
3132 write_lock_bh(&dev_base_lock);
3133 hlist_del(&dev->name_hlist);
3134 hlist_del(&dev->index_hlist);
3135 if (dev_tail == &dev->next)
3136 dev_tail = dp;
3137 *dp = d->next;
3138 write_unlock_bh(&dev_base_lock);
3139 break;
3142 if (!d) {
3143 printk(KERN_ERR "unregister net_device: '%s' not found\n",
3144 dev->name);
3145 return -ENODEV;
3148 dev->reg_state = NETREG_UNREGISTERING;
3150 synchronize_net();
3152 /* Shutdown queueing discipline. */
3153 dev_shutdown(dev);
3156 /* Notify protocols, that we are about to destroy
3157 this device. They should clean all the things.
3158 */
3159 notifier_call_chain(&netdev_chain, NETDEV_UNREGISTER, dev);
3161 /*
3162 * Flush the multicast chain
3163 */
3164 dev_mc_discard(dev);
3166 if (dev->uninit)
3167 dev->uninit(dev);
3169 /* Notifier chain MUST detach us from master device. */
3170 BUG_TRAP(!dev->master);
3172 free_divert_blk(dev);
3174 /* Finish processing unregister after unlock */
3175 net_set_todo(dev);
3177 synchronize_net();
3179 dev_put(dev);
3180 return 0;
3183 /**
3184 * unregister_netdev - remove device from the kernel
3185 * @dev: device
3187 * This function shuts down a device interface and removes it
3188 * from the kernel tables. On success 0 is returned, on a failure
3189 * a negative errno code is returned.
3191 * This is just a wrapper for unregister_netdevice that takes
3192 * the rtnl semaphore. In general you want to use this and not
3193 * unregister_netdevice.
3194 */
3195 void unregister_netdev(struct net_device *dev)
3197 rtnl_lock();
3198 unregister_netdevice(dev);
3199 rtnl_unlock();
3202 EXPORT_SYMBOL(unregister_netdev);
3204 #ifdef CONFIG_HOTPLUG_CPU
3205 static int dev_cpu_callback(struct notifier_block *nfb,
3206 unsigned long action,
3207 void *ocpu)
3209 struct sk_buff **list_skb;
3210 struct net_device **list_net;
3211 struct sk_buff *skb;
3212 unsigned int cpu, oldcpu = (unsigned long)ocpu;
3213 struct softnet_data *sd, *oldsd;
3215 if (action != CPU_DEAD)
3216 return NOTIFY_OK;
3218 local_irq_disable();
3219 cpu = smp_processor_id();
3220 sd = &per_cpu(softnet_data, cpu);
3221 oldsd = &per_cpu(softnet_data, oldcpu);
3223 /* Find end of our completion_queue. */
3224 list_skb = &sd->completion_queue;
3225 while (*list_skb)
3226 list_skb = &(*list_skb)->next;
3227 /* Append completion queue from offline CPU. */
3228 *list_skb = oldsd->completion_queue;
3229 oldsd->completion_queue = NULL;
3231 /* Find end of our output_queue. */
3232 list_net = &sd->output_queue;
3233 while (*list_net)
3234 list_net = &(*list_net)->next_sched;
3235 /* Append output queue from offline CPU. */
3236 *list_net = oldsd->output_queue;
3237 oldsd->output_queue = NULL;
3239 raise_softirq_irqoff(NET_TX_SOFTIRQ);
3240 local_irq_enable();
3242 /* Process offline CPU's input_pkt_queue */
3243 while ((skb = __skb_dequeue(&oldsd->input_pkt_queue)))
3244 netif_rx(skb);
3246 return NOTIFY_OK;
3248 #endif /* CONFIG_HOTPLUG_CPU */
3251 /*
3252 * Initialize the DEV module. At boot time this walks the device list and
3253 * unhooks any devices that fail to initialise (normally hardware not
3254 * present) and leaves us with a valid list of present and active devices.
3256 */
3258 /*
3259 * This is called single threaded during boot, so no need
3260 * to take the rtnl semaphore.
3261 */
3262 static int __init net_dev_init(void)
3264 int i, rc = -ENOMEM;
3266 BUG_ON(!dev_boot_phase);
3268 net_random_init();
3270 if (dev_proc_init())
3271 goto out;
3273 if (netdev_sysfs_init())
3274 goto out;
3276 INIT_LIST_HEAD(&ptype_all);
3277 for (i = 0; i < 16; i++)
3278 INIT_LIST_HEAD(&ptype_base[i]);
3280 for (i = 0; i < ARRAY_SIZE(dev_name_head); i++)
3281 INIT_HLIST_HEAD(&dev_name_head[i]);
3283 for (i = 0; i < ARRAY_SIZE(dev_index_head); i++)
3284 INIT_HLIST_HEAD(&dev_index_head[i]);
3286 /*
3287 * Initialise the packet receive queues.
3288 */
3290 for_each_cpu(i) {
3291 struct softnet_data *queue;
3293 queue = &per_cpu(softnet_data, i);
3294 skb_queue_head_init(&queue->input_pkt_queue);
3295 queue->completion_queue = NULL;
3296 INIT_LIST_HEAD(&queue->poll_list);
3297 set_bit(__LINK_STATE_START, &queue->backlog_dev.state);
3298 queue->backlog_dev.weight = weight_p;
3299 queue->backlog_dev.poll = process_backlog;
3300 atomic_set(&queue->backlog_dev.refcnt, 1);
3303 dev_boot_phase = 0;
3305 open_softirq(NET_TX_SOFTIRQ, net_tx_action, NULL);
3306 open_softirq(NET_RX_SOFTIRQ, net_rx_action, NULL);
3308 hotcpu_notifier(dev_cpu_callback, 0);
3309 dst_init();
3310 dev_mcast_init();
3311 rc = 0;
3312 out:
3313 return rc;
3316 subsys_initcall(net_dev_init);
3318 EXPORT_SYMBOL(__dev_get_by_index);
3319 EXPORT_SYMBOL(__dev_get_by_name);
3320 EXPORT_SYMBOL(__dev_remove_pack);
3321 EXPORT_SYMBOL(__skb_linearize);
3322 EXPORT_SYMBOL(dev_valid_name);
3323 EXPORT_SYMBOL(dev_add_pack);
3324 EXPORT_SYMBOL(dev_alloc_name);
3325 EXPORT_SYMBOL(dev_close);
3326 EXPORT_SYMBOL(dev_get_by_flags);
3327 EXPORT_SYMBOL(dev_get_by_index);
3328 EXPORT_SYMBOL(dev_get_by_name);
3329 EXPORT_SYMBOL(dev_open);
3330 EXPORT_SYMBOL(dev_queue_xmit);
3331 EXPORT_SYMBOL(dev_remove_pack);
3332 EXPORT_SYMBOL(dev_set_allmulti);
3333 EXPORT_SYMBOL(dev_set_promiscuity);
3334 EXPORT_SYMBOL(dev_change_flags);
3335 EXPORT_SYMBOL(dev_set_mtu);
3336 EXPORT_SYMBOL(dev_set_mac_address);
3337 EXPORT_SYMBOL(free_netdev);
3338 EXPORT_SYMBOL(netdev_boot_setup_check);
3339 EXPORT_SYMBOL(netdev_set_master);
3340 EXPORT_SYMBOL(netdev_state_change);
3341 EXPORT_SYMBOL(netif_receive_skb);
3342 EXPORT_SYMBOL(netif_rx);
3343 EXPORT_SYMBOL(register_gifconf);
3344 EXPORT_SYMBOL(register_netdevice);
3345 EXPORT_SYMBOL(register_netdevice_notifier);
3346 EXPORT_SYMBOL(skb_checksum_help);
3347 EXPORT_SYMBOL(synchronize_net);
3348 EXPORT_SYMBOL(unregister_netdevice);
3349 EXPORT_SYMBOL(unregister_netdevice_notifier);
3350 EXPORT_SYMBOL(net_enable_timestamp);
3351 EXPORT_SYMBOL(net_disable_timestamp);
3352 EXPORT_SYMBOL(dev_get_flags);
3354 #if defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE)
3355 EXPORT_SYMBOL(br_handle_frame_hook);
3356 EXPORT_SYMBOL(br_fdb_get_hook);
3357 EXPORT_SYMBOL(br_fdb_put_hook);
3358 #endif
3360 #ifdef CONFIG_KMOD
3361 EXPORT_SYMBOL(dev_load);
3362 #endif
3364 EXPORT_PER_CPU_SYMBOL(softnet_data);