ia64/xen-unstable

view linux-2.6-xen-sparse/net/core/dev.c @ 9843:16bdb9ecb329

Add a return statement to the non-Xen version of skb_checksum_setup.

Signed-off-by: Jan Beulich <jbeulich@novell.com>
author kaf24@firebug.cl.cam.ac.uk
date Tue Apr 25 09:02:53 2006 +0100 (2006-04-25)
parents 1020c52c58c1
children 44e5abbf333b
line source
1 /*
2 * NET3 Protocol independent device support routines.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
8 *
9 * Derived from the non IP parts of dev.c 1.0.19
10 * Authors: Ross Biro
11 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12 * Mark Evans, <evansmp@uhura.aston.ac.uk>
13 *
14 * Additional Authors:
15 * Florian la Roche <rzsfl@rz.uni-sb.de>
16 * Alan Cox <gw4pts@gw4pts.ampr.org>
17 * David Hinds <dahinds@users.sourceforge.net>
18 * Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
19 * Adam Sulmicki <adam@cfar.umd.edu>
20 * Pekka Riikonen <priikone@poesidon.pspt.fi>
21 *
22 * Changes:
23 * D.J. Barrow : Fixed bug where dev->refcnt gets set
24 * to 2 if register_netdev gets called
25 * before net_dev_init & also removed a
26 * few lines of code in the process.
27 * Alan Cox : device private ioctl copies fields back.
28 * Alan Cox : Transmit queue code does relevant
29 * stunts to keep the queue safe.
30 * Alan Cox : Fixed double lock.
31 * Alan Cox : Fixed promisc NULL pointer trap
32 * ???????? : Support the full private ioctl range
33 * Alan Cox : Moved ioctl permission check into
34 * drivers
35 * Tim Kordas : SIOCADDMULTI/SIOCDELMULTI
36 * Alan Cox : 100 backlog just doesn't cut it when
37 * you start doing multicast video 8)
38 * Alan Cox : Rewrote net_bh and list manager.
39 * Alan Cox : Fix ETH_P_ALL echoback lengths.
40 * Alan Cox : Took out transmit every packet pass
41 * Saved a few bytes in the ioctl handler
42 * Alan Cox : Network driver sets packet type before
43 * calling netif_rx. Saves a function
44 * call a packet.
45 * Alan Cox : Hashed net_bh()
46 * Richard Kooijman: Timestamp fixes.
47 * Alan Cox : Wrong field in SIOCGIFDSTADDR
48 * Alan Cox : Device lock protection.
49 * Alan Cox : Fixed nasty side effect of device close
50 * changes.
51 * Rudi Cilibrasi : Pass the right thing to
52 * set_mac_address()
53 * Dave Miller : 32bit quantity for the device lock to
54 * make it work out on a Sparc.
55 * Bjorn Ekwall : Added KERNELD hack.
56 * Alan Cox : Cleaned up the backlog initialise.
57 * Craig Metz : SIOCGIFCONF fix if space for under
58 * 1 device.
59 * Thomas Bogendoerfer : Return ENODEV for dev_open, if there
60 * is no device open function.
61 * Andi Kleen : Fix error reporting for SIOCGIFCONF
62 * Michael Chastain : Fix signed/unsigned for SIOCGIFCONF
63 * Cyrus Durgin : Cleaned for KMOD
64 * Adam Sulmicki : Bug Fix : Network Device Unload
65 * A network device unload needs to purge
66 * the backlog queue.
67 * Paul Rusty Russell : SIOCSIFNAME
68 * Pekka Riikonen : Netdev boot-time settings code
69 * Andrew Morton : Make unregister_netdevice wait
70 * indefinitely on dev->refcnt
71 * J Hadi Salim : - Backlog queue sampling
72 * - netif_rx() feedback
73 */
75 #include <asm/uaccess.h>
76 #include <asm/system.h>
77 #include <linux/bitops.h>
78 #include <linux/capability.h>
79 #include <linux/config.h>
80 #include <linux/cpu.h>
81 #include <linux/types.h>
82 #include <linux/kernel.h>
83 #include <linux/sched.h>
84 #include <linux/string.h>
85 #include <linux/mm.h>
86 #include <linux/socket.h>
87 #include <linux/sockios.h>
88 #include <linux/errno.h>
89 #include <linux/interrupt.h>
90 #include <linux/if_ether.h>
91 #include <linux/netdevice.h>
92 #include <linux/etherdevice.h>
93 #include <linux/notifier.h>
94 #include <linux/skbuff.h>
95 #include <net/sock.h>
96 #include <linux/rtnetlink.h>
97 #include <linux/proc_fs.h>
98 #include <linux/seq_file.h>
99 #include <linux/stat.h>
100 #include <linux/if_bridge.h>
101 #include <linux/divert.h>
102 #include <net/dst.h>
103 #include <net/pkt_sched.h>
104 #include <net/checksum.h>
105 #include <linux/highmem.h>
106 #include <linux/init.h>
107 #include <linux/kmod.h>
108 #include <linux/module.h>
109 #include <linux/kallsyms.h>
110 #include <linux/netpoll.h>
111 #include <linux/rcupdate.h>
112 #include <linux/delay.h>
113 #ifdef CONFIG_NET_RADIO
114 #include <linux/wireless.h> /* Note : will define WIRELESS_EXT */
115 #include <net/iw_handler.h>
116 #endif /* CONFIG_NET_RADIO */
117 #include <asm/current.h>
119 #ifdef CONFIG_XEN
120 #include <net/ip.h>
121 #include <linux/tcp.h>
122 #include <linux/udp.h>
123 #endif
125 /*
126 * The list of packet types we will receive (as opposed to discard)
127 * and the routines to invoke.
128 *
129 * Why 16. Because with 16 the only overlap we get on a hash of the
130 * low nibble of the protocol value is RARP/SNAP/X.25.
131 *
132 * NOTE: That is no longer true with the addition of VLAN tags. Not
133 * sure which should go first, but I bet it won't make much
134 * difference if we are running VLANs. The good news is that
135 * this protocol won't be in the list unless compiled in, so
136 * the average user (w/out VLANs) will not be adversly affected.
137 * --BLG
138 *
139 * 0800 IP
140 * 8100 802.1Q VLAN
141 * 0001 802.3
142 * 0002 AX.25
143 * 0004 802.2
144 * 8035 RARP
145 * 0005 SNAP
146 * 0805 X.25
147 * 0806 ARP
148 * 8137 IPX
149 * 0009 Localtalk
150 * 86DD IPv6
151 */
153 static DEFINE_SPINLOCK(ptype_lock);
154 static struct list_head ptype_base[16]; /* 16 way hashed list */
155 static struct list_head ptype_all; /* Taps */
157 /*
158 * The @dev_base list is protected by @dev_base_lock and the rtln
159 * semaphore.
160 *
161 * Pure readers hold dev_base_lock for reading.
162 *
163 * Writers must hold the rtnl semaphore while they loop through the
164 * dev_base list, and hold dev_base_lock for writing when they do the
165 * actual updates. This allows pure readers to access the list even
166 * while a writer is preparing to update it.
167 *
168 * To put it another way, dev_base_lock is held for writing only to
169 * protect against pure readers; the rtnl semaphore provides the
170 * protection against other writers.
171 *
172 * See, for example usages, register_netdevice() and
173 * unregister_netdevice(), which must be called with the rtnl
174 * semaphore held.
175 */
176 struct net_device *dev_base;
177 static struct net_device **dev_tail = &dev_base;
178 DEFINE_RWLOCK(dev_base_lock);
180 EXPORT_SYMBOL(dev_base);
181 EXPORT_SYMBOL(dev_base_lock);
183 #define NETDEV_HASHBITS 8
184 static struct hlist_head dev_name_head[1<<NETDEV_HASHBITS];
185 static struct hlist_head dev_index_head[1<<NETDEV_HASHBITS];
187 static inline struct hlist_head *dev_name_hash(const char *name)
188 {
189 unsigned hash = full_name_hash(name, strnlen(name, IFNAMSIZ));
190 return &dev_name_head[hash & ((1<<NETDEV_HASHBITS)-1)];
191 }
193 static inline struct hlist_head *dev_index_hash(int ifindex)
194 {
195 return &dev_index_head[ifindex & ((1<<NETDEV_HASHBITS)-1)];
196 }
198 /*
199 * Our notifier list
200 */
202 static struct notifier_block *netdev_chain;
204 /*
205 * Device drivers call our routines to queue packets here. We empty the
206 * queue in the local softnet handler.
207 */
208 DEFINE_PER_CPU(struct softnet_data, softnet_data) = { NULL };
210 #ifdef CONFIG_SYSFS
211 extern int netdev_sysfs_init(void);
212 extern int netdev_register_sysfs(struct net_device *);
213 extern void netdev_unregister_sysfs(struct net_device *);
214 #else
215 #define netdev_sysfs_init() (0)
216 #define netdev_register_sysfs(dev) (0)
217 #define netdev_unregister_sysfs(dev) do { } while(0)
218 #endif
221 /*******************************************************************************
223 Protocol management and registration routines
225 *******************************************************************************/
227 /*
228 * For efficiency
229 */
231 int netdev_nit;
233 /*
234 * Add a protocol ID to the list. Now that the input handler is
235 * smarter we can dispense with all the messy stuff that used to be
236 * here.
237 *
238 * BEWARE!!! Protocol handlers, mangling input packets,
239 * MUST BE last in hash buckets and checking protocol handlers
240 * MUST start from promiscuous ptype_all chain in net_bh.
241 * It is true now, do not change it.
242 * Explanation follows: if protocol handler, mangling packet, will
243 * be the first on list, it is not able to sense, that packet
244 * is cloned and should be copied-on-write, so that it will
245 * change it and subsequent readers will get broken packet.
246 * --ANK (980803)
247 */
249 /**
250 * dev_add_pack - add packet handler
251 * @pt: packet type declaration
252 *
253 * Add a protocol handler to the networking stack. The passed &packet_type
254 * is linked into kernel lists and may not be freed until it has been
255 * removed from the kernel lists.
256 *
257 * This call does not sleep therefore it can not
258 * guarantee all CPU's that are in middle of receiving packets
259 * will see the new packet type (until the next received packet).
260 */
262 void dev_add_pack(struct packet_type *pt)
263 {
264 int hash;
266 spin_lock_bh(&ptype_lock);
267 if (pt->type == htons(ETH_P_ALL)) {
268 netdev_nit++;
269 list_add_rcu(&pt->list, &ptype_all);
270 } else {
271 hash = ntohs(pt->type) & 15;
272 list_add_rcu(&pt->list, &ptype_base[hash]);
273 }
274 spin_unlock_bh(&ptype_lock);
275 }
277 /**
278 * __dev_remove_pack - remove packet handler
279 * @pt: packet type declaration
280 *
281 * Remove a protocol handler that was previously added to the kernel
282 * protocol handlers by dev_add_pack(). The passed &packet_type is removed
283 * from the kernel lists and can be freed or reused once this function
284 * returns.
285 *
286 * The packet type might still be in use by receivers
287 * and must not be freed until after all the CPU's have gone
288 * through a quiescent state.
289 */
290 void __dev_remove_pack(struct packet_type *pt)
291 {
292 struct list_head *head;
293 struct packet_type *pt1;
295 spin_lock_bh(&ptype_lock);
297 if (pt->type == htons(ETH_P_ALL)) {
298 netdev_nit--;
299 head = &ptype_all;
300 } else
301 head = &ptype_base[ntohs(pt->type) & 15];
303 list_for_each_entry(pt1, head, list) {
304 if (pt == pt1) {
305 list_del_rcu(&pt->list);
306 goto out;
307 }
308 }
310 printk(KERN_WARNING "dev_remove_pack: %p not found.\n", pt);
311 out:
312 spin_unlock_bh(&ptype_lock);
313 }
314 /**
315 * dev_remove_pack - remove packet handler
316 * @pt: packet type declaration
317 *
318 * Remove a protocol handler that was previously added to the kernel
319 * protocol handlers by dev_add_pack(). The passed &packet_type is removed
320 * from the kernel lists and can be freed or reused once this function
321 * returns.
322 *
323 * This call sleeps to guarantee that no CPU is looking at the packet
324 * type after return.
325 */
326 void dev_remove_pack(struct packet_type *pt)
327 {
328 __dev_remove_pack(pt);
330 synchronize_net();
331 }
333 /******************************************************************************
335 Device Boot-time Settings Routines
337 *******************************************************************************/
339 /* Boot time configuration table */
340 static struct netdev_boot_setup dev_boot_setup[NETDEV_BOOT_SETUP_MAX];
342 /**
343 * netdev_boot_setup_add - add new setup entry
344 * @name: name of the device
345 * @map: configured settings for the device
346 *
347 * Adds new setup entry to the dev_boot_setup list. The function
348 * returns 0 on error and 1 on success. This is a generic routine to
349 * all netdevices.
350 */
351 static int netdev_boot_setup_add(char *name, struct ifmap *map)
352 {
353 struct netdev_boot_setup *s;
354 int i;
356 s = dev_boot_setup;
357 for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) {
358 if (s[i].name[0] == '\0' || s[i].name[0] == ' ') {
359 memset(s[i].name, 0, sizeof(s[i].name));
360 strcpy(s[i].name, name);
361 memcpy(&s[i].map, map, sizeof(s[i].map));
362 break;
363 }
364 }
366 return i >= NETDEV_BOOT_SETUP_MAX ? 0 : 1;
367 }
369 /**
370 * netdev_boot_setup_check - check boot time settings
371 * @dev: the netdevice
372 *
373 * Check boot time settings for the device.
374 * The found settings are set for the device to be used
375 * later in the device probing.
376 * Returns 0 if no settings found, 1 if they are.
377 */
378 int netdev_boot_setup_check(struct net_device *dev)
379 {
380 struct netdev_boot_setup *s = dev_boot_setup;
381 int i;
383 for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) {
384 if (s[i].name[0] != '\0' && s[i].name[0] != ' ' &&
385 !strncmp(dev->name, s[i].name, strlen(s[i].name))) {
386 dev->irq = s[i].map.irq;
387 dev->base_addr = s[i].map.base_addr;
388 dev->mem_start = s[i].map.mem_start;
389 dev->mem_end = s[i].map.mem_end;
390 return 1;
391 }
392 }
393 return 0;
394 }
397 /**
398 * netdev_boot_base - get address from boot time settings
399 * @prefix: prefix for network device
400 * @unit: id for network device
401 *
402 * Check boot time settings for the base address of device.
403 * The found settings are set for the device to be used
404 * later in the device probing.
405 * Returns 0 if no settings found.
406 */
407 unsigned long netdev_boot_base(const char *prefix, int unit)
408 {
409 const struct netdev_boot_setup *s = dev_boot_setup;
410 char name[IFNAMSIZ];
411 int i;
413 sprintf(name, "%s%d", prefix, unit);
415 /*
416 * If device already registered then return base of 1
417 * to indicate not to probe for this interface
418 */
419 if (__dev_get_by_name(name))
420 return 1;
422 for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++)
423 if (!strcmp(name, s[i].name))
424 return s[i].map.base_addr;
425 return 0;
426 }
428 /*
429 * Saves at boot time configured settings for any netdevice.
430 */
431 int __init netdev_boot_setup(char *str)
432 {
433 int ints[5];
434 struct ifmap map;
436 str = get_options(str, ARRAY_SIZE(ints), ints);
437 if (!str || !*str)
438 return 0;
440 /* Save settings */
441 memset(&map, 0, sizeof(map));
442 if (ints[0] > 0)
443 map.irq = ints[1];
444 if (ints[0] > 1)
445 map.base_addr = ints[2];
446 if (ints[0] > 2)
447 map.mem_start = ints[3];
448 if (ints[0] > 3)
449 map.mem_end = ints[4];
451 /* Add new entry to the list */
452 return netdev_boot_setup_add(str, &map);
453 }
455 __setup("netdev=", netdev_boot_setup);
457 /*******************************************************************************
459 Device Interface Subroutines
461 *******************************************************************************/
463 /**
464 * __dev_get_by_name - find a device by its name
465 * @name: name to find
466 *
467 * Find an interface by name. Must be called under RTNL semaphore
468 * or @dev_base_lock. If the name is found a pointer to the device
469 * is returned. If the name is not found then %NULL is returned. The
470 * reference counters are not incremented so the caller must be
471 * careful with locks.
472 */
474 struct net_device *__dev_get_by_name(const char *name)
475 {
476 struct hlist_node *p;
478 hlist_for_each(p, dev_name_hash(name)) {
479 struct net_device *dev
480 = hlist_entry(p, struct net_device, name_hlist);
481 if (!strncmp(dev->name, name, IFNAMSIZ))
482 return dev;
483 }
484 return NULL;
485 }
487 /**
488 * dev_get_by_name - find a device by its name
489 * @name: name to find
490 *
491 * Find an interface by name. This can be called from any
492 * context and does its own locking. The returned handle has
493 * the usage count incremented and the caller must use dev_put() to
494 * release it when it is no longer needed. %NULL is returned if no
495 * matching device is found.
496 */
498 struct net_device *dev_get_by_name(const char *name)
499 {
500 struct net_device *dev;
502 read_lock(&dev_base_lock);
503 dev = __dev_get_by_name(name);
504 if (dev)
505 dev_hold(dev);
506 read_unlock(&dev_base_lock);
507 return dev;
508 }
510 /**
511 * __dev_get_by_index - find a device by its ifindex
512 * @ifindex: index of device
513 *
514 * Search for an interface by index. Returns %NULL if the device
515 * is not found or a pointer to the device. The device has not
516 * had its reference counter increased so the caller must be careful
517 * about locking. The caller must hold either the RTNL semaphore
518 * or @dev_base_lock.
519 */
521 struct net_device *__dev_get_by_index(int ifindex)
522 {
523 struct hlist_node *p;
525 hlist_for_each(p, dev_index_hash(ifindex)) {
526 struct net_device *dev
527 = hlist_entry(p, struct net_device, index_hlist);
528 if (dev->ifindex == ifindex)
529 return dev;
530 }
531 return NULL;
532 }
535 /**
536 * dev_get_by_index - find a device by its ifindex
537 * @ifindex: index of device
538 *
539 * Search for an interface by index. Returns NULL if the device
540 * is not found or a pointer to the device. The device returned has
541 * had a reference added and the pointer is safe until the user calls
542 * dev_put to indicate they have finished with it.
543 */
545 struct net_device *dev_get_by_index(int ifindex)
546 {
547 struct net_device *dev;
549 read_lock(&dev_base_lock);
550 dev = __dev_get_by_index(ifindex);
551 if (dev)
552 dev_hold(dev);
553 read_unlock(&dev_base_lock);
554 return dev;
555 }
557 /**
558 * dev_getbyhwaddr - find a device by its hardware address
559 * @type: media type of device
560 * @ha: hardware address
561 *
562 * Search for an interface by MAC address. Returns NULL if the device
563 * is not found or a pointer to the device. The caller must hold the
564 * rtnl semaphore. The returned device has not had its ref count increased
565 * and the caller must therefore be careful about locking
566 *
567 * BUGS:
568 * If the API was consistent this would be __dev_get_by_hwaddr
569 */
571 struct net_device *dev_getbyhwaddr(unsigned short type, char *ha)
572 {
573 struct net_device *dev;
575 ASSERT_RTNL();
577 for (dev = dev_base; dev; dev = dev->next)
578 if (dev->type == type &&
579 !memcmp(dev->dev_addr, ha, dev->addr_len))
580 break;
581 return dev;
582 }
584 EXPORT_SYMBOL(dev_getbyhwaddr);
586 struct net_device *dev_getfirstbyhwtype(unsigned short type)
587 {
588 struct net_device *dev;
590 rtnl_lock();
591 for (dev = dev_base; dev; dev = dev->next) {
592 if (dev->type == type) {
593 dev_hold(dev);
594 break;
595 }
596 }
597 rtnl_unlock();
598 return dev;
599 }
601 EXPORT_SYMBOL(dev_getfirstbyhwtype);
603 /**
604 * dev_get_by_flags - find any device with given flags
605 * @if_flags: IFF_* values
606 * @mask: bitmask of bits in if_flags to check
607 *
608 * Search for any interface with the given flags. Returns NULL if a device
609 * is not found or a pointer to the device. The device returned has
610 * had a reference added and the pointer is safe until the user calls
611 * dev_put to indicate they have finished with it.
612 */
614 struct net_device * dev_get_by_flags(unsigned short if_flags, unsigned short mask)
615 {
616 struct net_device *dev;
618 read_lock(&dev_base_lock);
619 for (dev = dev_base; dev != NULL; dev = dev->next) {
620 if (((dev->flags ^ if_flags) & mask) == 0) {
621 dev_hold(dev);
622 break;
623 }
624 }
625 read_unlock(&dev_base_lock);
626 return dev;
627 }
629 /**
630 * dev_valid_name - check if name is okay for network device
631 * @name: name string
632 *
633 * Network device names need to be valid file names to
634 * to allow sysfs to work
635 */
636 int dev_valid_name(const char *name)
637 {
638 return !(*name == '\0'
639 || !strcmp(name, ".")
640 || !strcmp(name, "..")
641 || strchr(name, '/'));
642 }
644 /**
645 * dev_alloc_name - allocate a name for a device
646 * @dev: device
647 * @name: name format string
648 *
649 * Passed a format string - eg "lt%d" it will try and find a suitable
650 * id. Not efficient for many devices, not called a lot. The caller
651 * must hold the dev_base or rtnl lock while allocating the name and
652 * adding the device in order to avoid duplicates. Returns the number
653 * of the unit assigned or a negative errno code.
654 */
656 int dev_alloc_name(struct net_device *dev, const char *name)
657 {
658 int i = 0;
659 char buf[IFNAMSIZ];
660 const char *p;
661 const int max_netdevices = 8*PAGE_SIZE;
662 long *inuse;
663 struct net_device *d;
665 p = strnchr(name, IFNAMSIZ-1, '%');
666 if (p) {
667 /*
668 * Verify the string as this thing may have come from
669 * the user. There must be either one "%d" and no other "%"
670 * characters.
671 */
672 if (p[1] != 'd' || strchr(p + 2, '%'))
673 return -EINVAL;
675 /* Use one page as a bit array of possible slots */
676 inuse = (long *) get_zeroed_page(GFP_ATOMIC);
677 if (!inuse)
678 return -ENOMEM;
680 for (d = dev_base; d; d = d->next) {
681 if (!sscanf(d->name, name, &i))
682 continue;
683 if (i < 0 || i >= max_netdevices)
684 continue;
686 /* avoid cases where sscanf is not exact inverse of printf */
687 snprintf(buf, sizeof(buf), name, i);
688 if (!strncmp(buf, d->name, IFNAMSIZ))
689 set_bit(i, inuse);
690 }
692 i = find_first_zero_bit(inuse, max_netdevices);
693 free_page((unsigned long) inuse);
694 }
696 snprintf(buf, sizeof(buf), name, i);
697 if (!__dev_get_by_name(buf)) {
698 strlcpy(dev->name, buf, IFNAMSIZ);
699 return i;
700 }
702 /* It is possible to run out of possible slots
703 * when the name is long and there isn't enough space left
704 * for the digits, or if all bits are used.
705 */
706 return -ENFILE;
707 }
710 /**
711 * dev_change_name - change name of a device
712 * @dev: device
713 * @newname: name (or format string) must be at least IFNAMSIZ
714 *
715 * Change name of a device, can pass format strings "eth%d".
716 * for wildcarding.
717 */
718 int dev_change_name(struct net_device *dev, char *newname)
719 {
720 int err = 0;
722 ASSERT_RTNL();
724 if (dev->flags & IFF_UP)
725 return -EBUSY;
727 if (!dev_valid_name(newname))
728 return -EINVAL;
730 if (strchr(newname, '%')) {
731 err = dev_alloc_name(dev, newname);
732 if (err < 0)
733 return err;
734 strcpy(newname, dev->name);
735 }
736 else if (__dev_get_by_name(newname))
737 return -EEXIST;
738 else
739 strlcpy(dev->name, newname, IFNAMSIZ);
741 err = class_device_rename(&dev->class_dev, dev->name);
742 if (!err) {
743 hlist_del(&dev->name_hlist);
744 hlist_add_head(&dev->name_hlist, dev_name_hash(dev->name));
745 notifier_call_chain(&netdev_chain, NETDEV_CHANGENAME, dev);
746 }
748 return err;
749 }
751 /**
752 * netdev_features_change - device changes fatures
753 * @dev: device to cause notification
754 *
755 * Called to indicate a device has changed features.
756 */
757 void netdev_features_change(struct net_device *dev)
758 {
759 notifier_call_chain(&netdev_chain, NETDEV_FEAT_CHANGE, dev);
760 }
761 EXPORT_SYMBOL(netdev_features_change);
763 /**
764 * netdev_state_change - device changes state
765 * @dev: device to cause notification
766 *
767 * Called to indicate a device has changed state. This function calls
768 * the notifier chains for netdev_chain and sends a NEWLINK message
769 * to the routing socket.
770 */
771 void netdev_state_change(struct net_device *dev)
772 {
773 if (dev->flags & IFF_UP) {
774 notifier_call_chain(&netdev_chain, NETDEV_CHANGE, dev);
775 rtmsg_ifinfo(RTM_NEWLINK, dev, 0);
776 }
777 }
779 /**
780 * dev_load - load a network module
781 * @name: name of interface
782 *
783 * If a network interface is not present and the process has suitable
784 * privileges this function loads the module. If module loading is not
785 * available in this kernel then it becomes a nop.
786 */
788 void dev_load(const char *name)
789 {
790 struct net_device *dev;
792 read_lock(&dev_base_lock);
793 dev = __dev_get_by_name(name);
794 read_unlock(&dev_base_lock);
796 if (!dev && capable(CAP_SYS_MODULE))
797 request_module("%s", name);
798 }
800 static int default_rebuild_header(struct sk_buff *skb)
801 {
802 printk(KERN_DEBUG "%s: default_rebuild_header called -- BUG!\n",
803 skb->dev ? skb->dev->name : "NULL!!!");
804 kfree_skb(skb);
805 return 1;
806 }
809 /**
810 * dev_open - prepare an interface for use.
811 * @dev: device to open
812 *
813 * Takes a device from down to up state. The device's private open
814 * function is invoked and then the multicast lists are loaded. Finally
815 * the device is moved into the up state and a %NETDEV_UP message is
816 * sent to the netdev notifier chain.
817 *
818 * Calling this function on an active interface is a nop. On a failure
819 * a negative errno code is returned.
820 */
821 int dev_open(struct net_device *dev)
822 {
823 int ret = 0;
825 /*
826 * Is it already up?
827 */
829 if (dev->flags & IFF_UP)
830 return 0;
832 /*
833 * Is it even present?
834 */
835 if (!netif_device_present(dev))
836 return -ENODEV;
838 /*
839 * Call device private open method
840 */
841 set_bit(__LINK_STATE_START, &dev->state);
842 if (dev->open) {
843 ret = dev->open(dev);
844 if (ret)
845 clear_bit(__LINK_STATE_START, &dev->state);
846 }
848 /*
849 * If it went open OK then:
850 */
852 if (!ret) {
853 /*
854 * Set the flags.
855 */
856 dev->flags |= IFF_UP;
858 /*
859 * Initialize multicasting status
860 */
861 dev_mc_upload(dev);
863 /*
864 * Wakeup transmit queue engine
865 */
866 dev_activate(dev);
868 /*
869 * ... and announce new interface.
870 */
871 notifier_call_chain(&netdev_chain, NETDEV_UP, dev);
872 }
873 return ret;
874 }
876 /**
877 * dev_close - shutdown an interface.
878 * @dev: device to shutdown
879 *
880 * This function moves an active device into down state. A
881 * %NETDEV_GOING_DOWN is sent to the netdev notifier chain. The device
882 * is then deactivated and finally a %NETDEV_DOWN is sent to the notifier
883 * chain.
884 */
885 int dev_close(struct net_device *dev)
886 {
887 if (!(dev->flags & IFF_UP))
888 return 0;
890 /*
891 * Tell people we are going down, so that they can
892 * prepare to death, when device is still operating.
893 */
894 notifier_call_chain(&netdev_chain, NETDEV_GOING_DOWN, dev);
896 dev_deactivate(dev);
898 clear_bit(__LINK_STATE_START, &dev->state);
900 /* Synchronize to scheduled poll. We cannot touch poll list,
901 * it can be even on different cpu. So just clear netif_running(),
902 * and wait when poll really will happen. Actually, the best place
903 * for this is inside dev->stop() after device stopped its irq
904 * engine, but this requires more changes in devices. */
906 smp_mb__after_clear_bit(); /* Commit netif_running(). */
907 while (test_bit(__LINK_STATE_RX_SCHED, &dev->state)) {
908 /* No hurry. */
909 msleep(1);
910 }
912 /*
913 * Call the device specific close. This cannot fail.
914 * Only if device is UP
915 *
916 * We allow it to be called even after a DETACH hot-plug
917 * event.
918 */
919 if (dev->stop)
920 dev->stop(dev);
922 /*
923 * Device is now down.
924 */
926 dev->flags &= ~IFF_UP;
928 /*
929 * Tell people we are down
930 */
931 notifier_call_chain(&netdev_chain, NETDEV_DOWN, dev);
933 return 0;
934 }
937 /*
938 * Device change register/unregister. These are not inline or static
939 * as we export them to the world.
940 */
942 /**
943 * register_netdevice_notifier - register a network notifier block
944 * @nb: notifier
945 *
946 * Register a notifier to be called when network device events occur.
947 * The notifier passed is linked into the kernel structures and must
948 * not be reused until it has been unregistered. A negative errno code
949 * is returned on a failure.
950 *
951 * When registered all registration and up events are replayed
952 * to the new notifier to allow device to have a race free
953 * view of the network device list.
954 */
956 int register_netdevice_notifier(struct notifier_block *nb)
957 {
958 struct net_device *dev;
959 int err;
961 rtnl_lock();
962 err = notifier_chain_register(&netdev_chain, nb);
963 if (!err) {
964 for (dev = dev_base; dev; dev = dev->next) {
965 nb->notifier_call(nb, NETDEV_REGISTER, dev);
967 if (dev->flags & IFF_UP)
968 nb->notifier_call(nb, NETDEV_UP, dev);
969 }
970 }
971 rtnl_unlock();
972 return err;
973 }
975 /**
976 * unregister_netdevice_notifier - unregister a network notifier block
977 * @nb: notifier
978 *
979 * Unregister a notifier previously registered by
980 * register_netdevice_notifier(). The notifier is unlinked into the
981 * kernel structures and may then be reused. A negative errno code
982 * is returned on a failure.
983 */
985 int unregister_netdevice_notifier(struct notifier_block *nb)
986 {
987 return notifier_chain_unregister(&netdev_chain, nb);
988 }
990 /**
991 * call_netdevice_notifiers - call all network notifier blocks
992 * @val: value passed unmodified to notifier function
993 * @v: pointer passed unmodified to notifier function
994 *
995 * Call all network notifier blocks. Parameters and return value
996 * are as for notifier_call_chain().
997 */
999 int call_netdevice_notifiers(unsigned long val, void *v)
1001 return notifier_call_chain(&netdev_chain, val, v);
1004 /* When > 0 there are consumers of rx skb time stamps */
1005 static atomic_t netstamp_needed = ATOMIC_INIT(0);
1007 void net_enable_timestamp(void)
1009 atomic_inc(&netstamp_needed);
1012 void net_disable_timestamp(void)
1014 atomic_dec(&netstamp_needed);
1017 void __net_timestamp(struct sk_buff *skb)
1019 struct timeval tv;
1021 do_gettimeofday(&tv);
1022 skb_set_timestamp(skb, &tv);
1024 EXPORT_SYMBOL(__net_timestamp);
1026 static inline void net_timestamp(struct sk_buff *skb)
1028 if (atomic_read(&netstamp_needed))
1029 __net_timestamp(skb);
1030 else {
1031 skb->tstamp.off_sec = 0;
1032 skb->tstamp.off_usec = 0;
1036 /*
1037 * Support routine. Sends outgoing frames to any network
1038 * taps currently in use.
1039 */
1041 void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
1043 struct packet_type *ptype;
1045 net_timestamp(skb);
1047 rcu_read_lock();
1048 list_for_each_entry_rcu(ptype, &ptype_all, list) {
1049 /* Never send packets back to the socket
1050 * they originated from - MvS (miquels@drinkel.ow.org)
1051 */
1052 if ((ptype->dev == dev || !ptype->dev) &&
1053 (ptype->af_packet_priv == NULL ||
1054 (struct sock *)ptype->af_packet_priv != skb->sk)) {
1055 struct sk_buff *skb2= skb_clone(skb, GFP_ATOMIC);
1056 if (!skb2)
1057 break;
1059 /* skb->nh should be correctly
1060 set by sender, so that the second statement is
1061 just protection against buggy protocols.
1062 */
1063 skb2->mac.raw = skb2->data;
1065 if (skb2->nh.raw < skb2->data ||
1066 skb2->nh.raw > skb2->tail) {
1067 if (net_ratelimit())
1068 printk(KERN_CRIT "protocol %04x is "
1069 "buggy, dev %s\n",
1070 skb2->protocol, dev->name);
1071 skb2->nh.raw = skb2->data;
1074 skb2->h.raw = skb2->nh.raw;
1075 skb2->pkt_type = PACKET_OUTGOING;
1076 ptype->func(skb2, skb->dev, ptype, skb->dev);
1079 rcu_read_unlock();
1082 /*
1083 * Invalidate hardware checksum when packet is to be mangled, and
1084 * complete checksum manually on outgoing path.
1085 */
1086 int skb_checksum_help(struct sk_buff *skb, int inward)
1088 unsigned int csum;
1089 int ret = 0, offset = skb->h.raw - skb->data;
1091 if (inward) {
1092 skb->ip_summed = CHECKSUM_NONE;
1093 goto out;
1096 if (skb_cloned(skb)) {
1097 ret = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
1098 if (ret)
1099 goto out;
1102 BUG_ON(offset > (int)skb->len);
1103 csum = skb_checksum(skb, offset, skb->len-offset, 0);
1105 offset = skb->tail - skb->h.raw;
1106 BUG_ON(offset <= 0);
1107 BUG_ON(skb->csum + 2 > offset);
1109 *(u16*)(skb->h.raw + skb->csum) = csum_fold(csum);
1110 skb->ip_summed = CHECKSUM_NONE;
1111 out:
1112 return ret;
1115 /* Take action when hardware reception checksum errors are detected. */
1116 #ifdef CONFIG_BUG
1117 void netdev_rx_csum_fault(struct net_device *dev)
1119 if (net_ratelimit()) {
1120 printk(KERN_ERR "%s: hw csum failure.\n",
1121 dev ? dev->name : "<unknown>");
1122 dump_stack();
1125 EXPORT_SYMBOL(netdev_rx_csum_fault);
1126 #endif
1128 #ifdef CONFIG_HIGHMEM
1129 /* Actually, we should eliminate this check as soon as we know, that:
1130 * 1. IOMMU is present and allows to map all the memory.
1131 * 2. No high memory really exists on this machine.
1132 */
1134 static inline int illegal_highdma(struct net_device *dev, struct sk_buff *skb)
1136 int i;
1138 if (dev->features & NETIF_F_HIGHDMA)
1139 return 0;
1141 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
1142 if (PageHighMem(skb_shinfo(skb)->frags[i].page))
1143 return 1;
1145 return 0;
1147 #else
1148 #define illegal_highdma(dev, skb) (0)
1149 #endif
1151 /* Keep head the same: replace data */
1152 int __skb_linearize(struct sk_buff *skb, gfp_t gfp_mask)
1154 unsigned int size;
1155 u8 *data;
1156 long offset;
1157 struct skb_shared_info *ninfo;
1158 int headerlen = skb->data - skb->head;
1159 int expand = (skb->tail + skb->data_len) - skb->end;
1161 if (skb_shared(skb))
1162 BUG();
1164 if (expand <= 0)
1165 expand = 0;
1167 size = skb->end - skb->head + expand;
1168 size = SKB_DATA_ALIGN(size);
1169 data = kmalloc(size + sizeof(struct skb_shared_info), gfp_mask);
1170 if (!data)
1171 return -ENOMEM;
1173 /* Copy entire thing */
1174 if (skb_copy_bits(skb, -headerlen, data, headerlen + skb->len))
1175 BUG();
1177 /* Set up shinfo */
1178 ninfo = (struct skb_shared_info*)(data + size);
1179 atomic_set(&ninfo->dataref, 1);
1180 ninfo->tso_size = skb_shinfo(skb)->tso_size;
1181 ninfo->tso_segs = skb_shinfo(skb)->tso_segs;
1182 ninfo->nr_frags = 0;
1183 ninfo->frag_list = NULL;
1185 /* Offset between the two in bytes */
1186 offset = data - skb->head;
1188 /* Free old data. */
1189 skb_release_data(skb);
1191 skb->head = data;
1192 skb->end = data + size;
1194 /* Set up new pointers */
1195 skb->h.raw += offset;
1196 skb->nh.raw += offset;
1197 skb->mac.raw += offset;
1198 skb->tail += offset;
1199 skb->data += offset;
1201 /* We are no longer a clone, even if we were. */
1202 skb->cloned = 0;
1204 skb->tail += skb->data_len;
1205 skb->data_len = 0;
1206 return 0;
1209 #define HARD_TX_LOCK(dev, cpu) { \
1210 if ((dev->features & NETIF_F_LLTX) == 0) { \
1211 spin_lock(&dev->xmit_lock); \
1212 dev->xmit_lock_owner = cpu; \
1213 } \
1216 #define HARD_TX_UNLOCK(dev) { \
1217 if ((dev->features & NETIF_F_LLTX) == 0) { \
1218 dev->xmit_lock_owner = -1; \
1219 spin_unlock(&dev->xmit_lock); \
1220 } \
1223 #ifdef CONFIG_XEN
1224 inline int skb_checksum_setup(struct sk_buff *skb)
1226 if (skb->proto_csum_blank) {
1227 if (skb->protocol != htons(ETH_P_IP))
1228 goto out;
1229 skb->h.raw = (unsigned char *)skb->nh.iph + 4*skb->nh.iph->ihl;
1230 if (skb->h.raw >= skb->tail)
1231 goto out;
1232 switch (skb->nh.iph->protocol) {
1233 case IPPROTO_TCP:
1234 skb->csum = offsetof(struct tcphdr, check);
1235 break;
1236 case IPPROTO_UDP:
1237 skb->csum = offsetof(struct udphdr, check);
1238 break;
1239 default:
1240 if (net_ratelimit())
1241 printk(KERN_ERR "Attempting to checksum a non-"
1242 "TCP/UDP packet, dropping a protocol"
1243 " %d packet", skb->nh.iph->protocol);
1244 goto out;
1246 if ((skb->h.raw + skb->csum + 2) > skb->tail)
1247 goto out;
1248 skb->ip_summed = CHECKSUM_HW;
1249 skb->proto_csum_blank = 0;
1251 return 0;
1252 out:
1253 return -EPROTO;
1255 #else
1256 inline int skb_checksum_setup(struct sk_buff *skb) { return 0; }
1257 #endif
1260 /**
1261 * dev_queue_xmit - transmit a buffer
1262 * @skb: buffer to transmit
1264 * Queue a buffer for transmission to a network device. The caller must
1265 * have set the device and priority and built the buffer before calling
1266 * this function. The function can be called from an interrupt.
1268 * A negative errno code is returned on a failure. A success does not
1269 * guarantee the frame will be transmitted as it may be dropped due
1270 * to congestion or traffic shaping.
1272 * -----------------------------------------------------------------------------------
1273 * I notice this method can also return errors from the queue disciplines,
1274 * including NET_XMIT_DROP, which is a positive value. So, errors can also
1275 * be positive.
1277 * Regardless of the return value, the skb is consumed, so it is currently
1278 * difficult to retry a send to this method. (You can bump the ref count
1279 * before sending to hold a reference for retry if you are careful.)
1281 * When calling this method, interrupts MUST be enabled. This is because
1282 * the BH enable code must have IRQs enabled so that it will not deadlock.
1283 * --BLG
1284 */
1286 int dev_queue_xmit(struct sk_buff *skb)
1288 struct net_device *dev = skb->dev;
1289 struct Qdisc *q;
1290 int rc = -ENOMEM;
1292 if (skb_shinfo(skb)->frag_list &&
1293 !(dev->features & NETIF_F_FRAGLIST) &&
1294 __skb_linearize(skb, GFP_ATOMIC))
1295 goto out_kfree_skb;
1297 /* Fragmented skb is linearized if device does not support SG,
1298 * or if at least one of fragments is in highmem and device
1299 * does not support DMA from it.
1300 */
1301 if (skb_shinfo(skb)->nr_frags &&
1302 (!(dev->features & NETIF_F_SG) || illegal_highdma(dev, skb)) &&
1303 __skb_linearize(skb, GFP_ATOMIC))
1304 goto out_kfree_skb;
1306 /* If a checksum-deferred packet is forwarded to a device that needs a
1307 * checksum, correct the pointers and force checksumming.
1308 */
1309 if(skb_checksum_setup(skb))
1310 goto out_kfree_skb;
1312 /* If packet is not checksummed and device does not support
1313 * checksumming for this protocol, complete checksumming here.
1314 */
1315 if (skb->ip_summed == CHECKSUM_HW &&
1316 (!(dev->features & (NETIF_F_HW_CSUM | NETIF_F_NO_CSUM)) &&
1317 (!(dev->features & NETIF_F_IP_CSUM) ||
1318 skb->protocol != htons(ETH_P_IP))))
1319 if (skb_checksum_help(skb, 0))
1320 goto out_kfree_skb;
1322 spin_lock_prefetch(&dev->queue_lock);
1324 /* Disable soft irqs for various locks below. Also
1325 * stops preemption for RCU.
1326 */
1327 local_bh_disable();
1329 /* Updates of qdisc are serialized by queue_lock.
1330 * The struct Qdisc which is pointed to by qdisc is now a
1331 * rcu structure - it may be accessed without acquiring
1332 * a lock (but the structure may be stale.) The freeing of the
1333 * qdisc will be deferred until it's known that there are no
1334 * more references to it.
1336 * If the qdisc has an enqueue function, we still need to
1337 * hold the queue_lock before calling it, since queue_lock
1338 * also serializes access to the device queue.
1339 */
1341 q = rcu_dereference(dev->qdisc);
1342 #ifdef CONFIG_NET_CLS_ACT
1343 skb->tc_verd = SET_TC_AT(skb->tc_verd,AT_EGRESS);
1344 #endif
1345 if (q->enqueue) {
1346 /* Grab device queue */
1347 spin_lock(&dev->queue_lock);
1349 rc = q->enqueue(skb, q);
1351 qdisc_run(dev);
1353 spin_unlock(&dev->queue_lock);
1354 rc = rc == NET_XMIT_BYPASS ? NET_XMIT_SUCCESS : rc;
1355 goto out;
1358 /* The device has no queue. Common case for software devices:
1359 loopback, all the sorts of tunnels...
1361 Really, it is unlikely that xmit_lock protection is necessary here.
1362 (f.e. loopback and IP tunnels are clean ignoring statistics
1363 counters.)
1364 However, it is possible, that they rely on protection
1365 made by us here.
1367 Check this and shot the lock. It is not prone from deadlocks.
1368 Either shot noqueue qdisc, it is even simpler 8)
1369 */
1370 if (dev->flags & IFF_UP) {
1371 int cpu = smp_processor_id(); /* ok because BHs are off */
1373 if (dev->xmit_lock_owner != cpu) {
1375 HARD_TX_LOCK(dev, cpu);
1377 if (!netif_queue_stopped(dev)) {
1378 if (netdev_nit)
1379 dev_queue_xmit_nit(skb, dev);
1381 rc = 0;
1382 if (!dev->hard_start_xmit(skb, dev)) {
1383 HARD_TX_UNLOCK(dev);
1384 goto out;
1387 HARD_TX_UNLOCK(dev);
1388 if (net_ratelimit())
1389 printk(KERN_CRIT "Virtual device %s asks to "
1390 "queue packet!\n", dev->name);
1391 } else {
1392 /* Recursion is detected! It is possible,
1393 * unfortunately */
1394 if (net_ratelimit())
1395 printk(KERN_CRIT "Dead loop on virtual device "
1396 "%s, fix it urgently!\n", dev->name);
1400 rc = -ENETDOWN;
1401 local_bh_enable();
1403 out_kfree_skb:
1404 kfree_skb(skb);
1405 return rc;
1406 out:
1407 local_bh_enable();
1408 return rc;
1412 /*=======================================================================
1413 Receiver routines
1414 =======================================================================*/
1416 int netdev_max_backlog = 1000;
1417 int netdev_budget = 300;
1418 int weight_p = 64; /* old backlog weight */
1420 DEFINE_PER_CPU(struct netif_rx_stats, netdev_rx_stat) = { 0, };
1423 /**
1424 * netif_rx - post buffer to the network code
1425 * @skb: buffer to post
1427 * This function receives a packet from a device driver and queues it for
1428 * the upper (protocol) levels to process. It always succeeds. The buffer
1429 * may be dropped during processing for congestion control or by the
1430 * protocol layers.
1432 * return values:
1433 * NET_RX_SUCCESS (no congestion)
1434 * NET_RX_CN_LOW (low congestion)
1435 * NET_RX_CN_MOD (moderate congestion)
1436 * NET_RX_CN_HIGH (high congestion)
1437 * NET_RX_DROP (packet was dropped)
1439 */
1441 int netif_rx(struct sk_buff *skb)
1443 struct softnet_data *queue;
1444 unsigned long flags;
1446 /* if netpoll wants it, pretend we never saw it */
1447 if (netpoll_rx(skb))
1448 return NET_RX_DROP;
1450 if (!skb->tstamp.off_sec)
1451 net_timestamp(skb);
1453 /*
1454 * The code is rearranged so that the path is the most
1455 * short when CPU is congested, but is still operating.
1456 */
1457 local_irq_save(flags);
1458 queue = &__get_cpu_var(softnet_data);
1460 __get_cpu_var(netdev_rx_stat).total++;
1461 if (queue->input_pkt_queue.qlen <= netdev_max_backlog) {
1462 if (queue->input_pkt_queue.qlen) {
1463 enqueue:
1464 dev_hold(skb->dev);
1465 __skb_queue_tail(&queue->input_pkt_queue, skb);
1466 local_irq_restore(flags);
1467 return NET_RX_SUCCESS;
1470 netif_rx_schedule(&queue->backlog_dev);
1471 goto enqueue;
1474 __get_cpu_var(netdev_rx_stat).dropped++;
1475 local_irq_restore(flags);
1477 kfree_skb(skb);
1478 return NET_RX_DROP;
1481 int netif_rx_ni(struct sk_buff *skb)
1483 int err;
1485 preempt_disable();
1486 err = netif_rx(skb);
1487 if (local_softirq_pending())
1488 do_softirq();
1489 preempt_enable();
1491 return err;
1494 EXPORT_SYMBOL(netif_rx_ni);
1496 static inline struct net_device *skb_bond(struct sk_buff *skb)
1498 struct net_device *dev = skb->dev;
1500 if (dev->master)
1501 skb->dev = dev->master;
1503 return dev;
1506 static void net_tx_action(struct softirq_action *h)
1508 struct softnet_data *sd = &__get_cpu_var(softnet_data);
1510 if (sd->completion_queue) {
1511 struct sk_buff *clist;
1513 local_irq_disable();
1514 clist = sd->completion_queue;
1515 sd->completion_queue = NULL;
1516 local_irq_enable();
1518 while (clist) {
1519 struct sk_buff *skb = clist;
1520 clist = clist->next;
1522 BUG_TRAP(!atomic_read(&skb->users));
1523 __kfree_skb(skb);
1527 if (sd->output_queue) {
1528 struct net_device *head;
1530 local_irq_disable();
1531 head = sd->output_queue;
1532 sd->output_queue = NULL;
1533 local_irq_enable();
1535 while (head) {
1536 struct net_device *dev = head;
1537 head = head->next_sched;
1539 smp_mb__before_clear_bit();
1540 clear_bit(__LINK_STATE_SCHED, &dev->state);
1542 if (spin_trylock(&dev->queue_lock)) {
1543 qdisc_run(dev);
1544 spin_unlock(&dev->queue_lock);
1545 } else {
1546 netif_schedule(dev);
1552 static __inline__ int deliver_skb(struct sk_buff *skb,
1553 struct packet_type *pt_prev,
1554 struct net_device *orig_dev)
1556 atomic_inc(&skb->users);
1557 return pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
1560 #if defined(CONFIG_BRIDGE) || defined (CONFIG_BRIDGE_MODULE)
1561 int (*br_handle_frame_hook)(struct net_bridge_port *p, struct sk_buff **pskb);
1562 struct net_bridge;
1563 struct net_bridge_fdb_entry *(*br_fdb_get_hook)(struct net_bridge *br,
1564 unsigned char *addr);
1565 void (*br_fdb_put_hook)(struct net_bridge_fdb_entry *ent);
1567 static __inline__ int handle_bridge(struct sk_buff **pskb,
1568 struct packet_type **pt_prev, int *ret,
1569 struct net_device *orig_dev)
1571 struct net_bridge_port *port;
1573 if ((*pskb)->pkt_type == PACKET_LOOPBACK ||
1574 (port = rcu_dereference((*pskb)->dev->br_port)) == NULL)
1575 return 0;
1577 if (*pt_prev) {
1578 *ret = deliver_skb(*pskb, *pt_prev, orig_dev);
1579 *pt_prev = NULL;
1582 return br_handle_frame_hook(port, pskb);
1584 #else
1585 #define handle_bridge(skb, pt_prev, ret, orig_dev) (0)
1586 #endif
1588 #ifdef CONFIG_NET_CLS_ACT
1589 /* TODO: Maybe we should just force sch_ingress to be compiled in
1590 * when CONFIG_NET_CLS_ACT is? otherwise some useless instructions
1591 * a compare and 2 stores extra right now if we dont have it on
1592 * but have CONFIG_NET_CLS_ACT
1593 * NOTE: This doesnt stop any functionality; if you dont have
1594 * the ingress scheduler, you just cant add policies on ingress.
1596 */
1597 static int ing_filter(struct sk_buff *skb)
1599 struct Qdisc *q;
1600 struct net_device *dev = skb->dev;
1601 int result = TC_ACT_OK;
1603 if (dev->qdisc_ingress) {
1604 __u32 ttl = (__u32) G_TC_RTTL(skb->tc_verd);
1605 if (MAX_RED_LOOP < ttl++) {
1606 printk("Redir loop detected Dropping packet (%s->%s)\n",
1607 skb->input_dev->name, skb->dev->name);
1608 return TC_ACT_SHOT;
1611 skb->tc_verd = SET_TC_RTTL(skb->tc_verd,ttl);
1613 skb->tc_verd = SET_TC_AT(skb->tc_verd,AT_INGRESS);
1615 spin_lock(&dev->ingress_lock);
1616 if ((q = dev->qdisc_ingress) != NULL)
1617 result = q->enqueue(skb, q);
1618 spin_unlock(&dev->ingress_lock);
1622 return result;
1624 #endif
1626 int netif_receive_skb(struct sk_buff *skb)
1628 struct packet_type *ptype, *pt_prev;
1629 struct net_device *orig_dev;
1630 int ret = NET_RX_DROP;
1631 unsigned short type;
1633 /* if we've gotten here through NAPI, check netpoll */
1634 if (skb->dev->poll && netpoll_rx(skb))
1635 return NET_RX_DROP;
1637 if (!skb->tstamp.off_sec)
1638 net_timestamp(skb);
1640 if (!skb->input_dev)
1641 skb->input_dev = skb->dev;
1643 orig_dev = skb_bond(skb);
1645 __get_cpu_var(netdev_rx_stat).total++;
1647 skb->h.raw = skb->nh.raw = skb->data;
1648 skb->mac_len = skb->nh.raw - skb->mac.raw;
1650 pt_prev = NULL;
1652 rcu_read_lock();
1654 #ifdef CONFIG_NET_CLS_ACT
1655 if (skb->tc_verd & TC_NCLS) {
1656 skb->tc_verd = CLR_TC_NCLS(skb->tc_verd);
1657 goto ncls;
1659 #endif
1661 #ifdef CONFIG_XEN
1662 switch (skb->ip_summed) {
1663 case CHECKSUM_UNNECESSARY:
1664 skb->proto_data_valid = 1;
1665 break;
1666 case CHECKSUM_HW:
1667 /* XXX Implement me. */
1668 default:
1669 skb->proto_data_valid = 0;
1670 break;
1672 #endif
1674 list_for_each_entry_rcu(ptype, &ptype_all, list) {
1675 if (!ptype->dev || ptype->dev == skb->dev) {
1676 if (pt_prev)
1677 ret = deliver_skb(skb, pt_prev, orig_dev);
1678 pt_prev = ptype;
1682 #ifdef CONFIG_NET_CLS_ACT
1683 if (pt_prev) {
1684 ret = deliver_skb(skb, pt_prev, orig_dev);
1685 pt_prev = NULL; /* noone else should process this after*/
1686 } else {
1687 skb->tc_verd = SET_TC_OK2MUNGE(skb->tc_verd);
1690 ret = ing_filter(skb);
1692 if (ret == TC_ACT_SHOT || (ret == TC_ACT_STOLEN)) {
1693 kfree_skb(skb);
1694 goto out;
1697 skb->tc_verd = 0;
1698 ncls:
1699 #endif
1701 handle_diverter(skb);
1703 if (handle_bridge(&skb, &pt_prev, &ret, orig_dev))
1704 goto out;
1706 type = skb->protocol;
1707 list_for_each_entry_rcu(ptype, &ptype_base[ntohs(type)&15], list) {
1708 if (ptype->type == type &&
1709 (!ptype->dev || ptype->dev == skb->dev)) {
1710 if (pt_prev)
1711 ret = deliver_skb(skb, pt_prev, orig_dev);
1712 pt_prev = ptype;
1716 if (pt_prev) {
1717 ret = pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
1718 } else {
1719 kfree_skb(skb);
1720 /* Jamal, now you will not able to escape explaining
1721 * me how you were going to use this. :-)
1722 */
1723 ret = NET_RX_DROP;
1726 out:
1727 rcu_read_unlock();
1728 return ret;
1731 static int process_backlog(struct net_device *backlog_dev, int *budget)
1733 int work = 0;
1734 int quota = min(backlog_dev->quota, *budget);
1735 struct softnet_data *queue = &__get_cpu_var(softnet_data);
1736 unsigned long start_time = jiffies;
1738 backlog_dev->weight = weight_p;
1739 for (;;) {
1740 struct sk_buff *skb;
1741 struct net_device *dev;
1743 local_irq_disable();
1744 skb = __skb_dequeue(&queue->input_pkt_queue);
1745 if (!skb)
1746 goto job_done;
1747 local_irq_enable();
1749 dev = skb->dev;
1751 netif_receive_skb(skb);
1753 dev_put(dev);
1755 work++;
1757 if (work >= quota || jiffies - start_time > 1)
1758 break;
1762 backlog_dev->quota -= work;
1763 *budget -= work;
1764 return -1;
1766 job_done:
1767 backlog_dev->quota -= work;
1768 *budget -= work;
1770 list_del(&backlog_dev->poll_list);
1771 smp_mb__before_clear_bit();
1772 netif_poll_enable(backlog_dev);
1774 local_irq_enable();
1775 return 0;
1778 static void net_rx_action(struct softirq_action *h)
1780 struct softnet_data *queue = &__get_cpu_var(softnet_data);
1781 unsigned long start_time = jiffies;
1782 int budget = netdev_budget;
1783 void *have;
1785 local_irq_disable();
1787 while (!list_empty(&queue->poll_list)) {
1788 struct net_device *dev;
1790 if (budget <= 0 || jiffies - start_time > 1)
1791 goto softnet_break;
1793 local_irq_enable();
1795 dev = list_entry(queue->poll_list.next,
1796 struct net_device, poll_list);
1797 have = netpoll_poll_lock(dev);
1799 if (dev->quota <= 0 || dev->poll(dev, &budget)) {
1800 netpoll_poll_unlock(have);
1801 local_irq_disable();
1802 list_del(&dev->poll_list);
1803 list_add_tail(&dev->poll_list, &queue->poll_list);
1804 if (dev->quota < 0)
1805 dev->quota += dev->weight;
1806 else
1807 dev->quota = dev->weight;
1808 } else {
1809 netpoll_poll_unlock(have);
1810 dev_put(dev);
1811 local_irq_disable();
1814 out:
1815 local_irq_enable();
1816 return;
1818 softnet_break:
1819 __get_cpu_var(netdev_rx_stat).time_squeeze++;
1820 __raise_softirq_irqoff(NET_RX_SOFTIRQ);
1821 goto out;
1824 static gifconf_func_t * gifconf_list [NPROTO];
1826 /**
1827 * register_gifconf - register a SIOCGIF handler
1828 * @family: Address family
1829 * @gifconf: Function handler
1831 * Register protocol dependent address dumping routines. The handler
1832 * that is passed must not be freed or reused until it has been replaced
1833 * by another handler.
1834 */
1835 int register_gifconf(unsigned int family, gifconf_func_t * gifconf)
1837 if (family >= NPROTO)
1838 return -EINVAL;
1839 gifconf_list[family] = gifconf;
1840 return 0;
1844 /*
1845 * Map an interface index to its name (SIOCGIFNAME)
1846 */
1848 /*
1849 * We need this ioctl for efficient implementation of the
1850 * if_indextoname() function required by the IPv6 API. Without
1851 * it, we would have to search all the interfaces to find a
1852 * match. --pb
1853 */
1855 static int dev_ifname(struct ifreq __user *arg)
1857 struct net_device *dev;
1858 struct ifreq ifr;
1860 /*
1861 * Fetch the caller's info block.
1862 */
1864 if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
1865 return -EFAULT;
1867 read_lock(&dev_base_lock);
1868 dev = __dev_get_by_index(ifr.ifr_ifindex);
1869 if (!dev) {
1870 read_unlock(&dev_base_lock);
1871 return -ENODEV;
1874 strcpy(ifr.ifr_name, dev->name);
1875 read_unlock(&dev_base_lock);
1877 if (copy_to_user(arg, &ifr, sizeof(struct ifreq)))
1878 return -EFAULT;
1879 return 0;
1882 /*
1883 * Perform a SIOCGIFCONF call. This structure will change
1884 * size eventually, and there is nothing I can do about it.
1885 * Thus we will need a 'compatibility mode'.
1886 */
1888 static int dev_ifconf(char __user *arg)
1890 struct ifconf ifc;
1891 struct net_device *dev;
1892 char __user *pos;
1893 int len;
1894 int total;
1895 int i;
1897 /*
1898 * Fetch the caller's info block.
1899 */
1901 if (copy_from_user(&ifc, arg, sizeof(struct ifconf)))
1902 return -EFAULT;
1904 pos = ifc.ifc_buf;
1905 len = ifc.ifc_len;
1907 /*
1908 * Loop over the interfaces, and write an info block for each.
1909 */
1911 total = 0;
1912 for (dev = dev_base; dev; dev = dev->next) {
1913 for (i = 0; i < NPROTO; i++) {
1914 if (gifconf_list[i]) {
1915 int done;
1916 if (!pos)
1917 done = gifconf_list[i](dev, NULL, 0);
1918 else
1919 done = gifconf_list[i](dev, pos + total,
1920 len - total);
1921 if (done < 0)
1922 return -EFAULT;
1923 total += done;
1928 /*
1929 * All done. Write the updated control block back to the caller.
1930 */
1931 ifc.ifc_len = total;
1933 /*
1934 * Both BSD and Solaris return 0 here, so we do too.
1935 */
1936 return copy_to_user(arg, &ifc, sizeof(struct ifconf)) ? -EFAULT : 0;
1939 #ifdef CONFIG_PROC_FS
1940 /*
1941 * This is invoked by the /proc filesystem handler to display a device
1942 * in detail.
1943 */
1944 static __inline__ struct net_device *dev_get_idx(loff_t pos)
1946 struct net_device *dev;
1947 loff_t i;
1949 for (i = 0, dev = dev_base; dev && i < pos; ++i, dev = dev->next);
1951 return i == pos ? dev : NULL;
1954 void *dev_seq_start(struct seq_file *seq, loff_t *pos)
1956 read_lock(&dev_base_lock);
1957 return *pos ? dev_get_idx(*pos - 1) : SEQ_START_TOKEN;
1960 void *dev_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1962 ++*pos;
1963 return v == SEQ_START_TOKEN ? dev_base : ((struct net_device *)v)->next;
1966 void dev_seq_stop(struct seq_file *seq, void *v)
1968 read_unlock(&dev_base_lock);
1971 static void dev_seq_printf_stats(struct seq_file *seq, struct net_device *dev)
1973 if (dev->get_stats) {
1974 struct net_device_stats *stats = dev->get_stats(dev);
1976 seq_printf(seq, "%6s:%8lu %7lu %4lu %4lu %4lu %5lu %10lu %9lu "
1977 "%8lu %7lu %4lu %4lu %4lu %5lu %7lu %10lu\n",
1978 dev->name, stats->rx_bytes, stats->rx_packets,
1979 stats->rx_errors,
1980 stats->rx_dropped + stats->rx_missed_errors,
1981 stats->rx_fifo_errors,
1982 stats->rx_length_errors + stats->rx_over_errors +
1983 stats->rx_crc_errors + stats->rx_frame_errors,
1984 stats->rx_compressed, stats->multicast,
1985 stats->tx_bytes, stats->tx_packets,
1986 stats->tx_errors, stats->tx_dropped,
1987 stats->tx_fifo_errors, stats->collisions,
1988 stats->tx_carrier_errors +
1989 stats->tx_aborted_errors +
1990 stats->tx_window_errors +
1991 stats->tx_heartbeat_errors,
1992 stats->tx_compressed);
1993 } else
1994 seq_printf(seq, "%6s: No statistics available.\n", dev->name);
1997 /*
1998 * Called from the PROCfs module. This now uses the new arbitrary sized
1999 * /proc/net interface to create /proc/net/dev
2000 */
2001 static int dev_seq_show(struct seq_file *seq, void *v)
2003 if (v == SEQ_START_TOKEN)
2004 seq_puts(seq, "Inter-| Receive "
2005 " | Transmit\n"
2006 " face |bytes packets errs drop fifo frame "
2007 "compressed multicast|bytes packets errs "
2008 "drop fifo colls carrier compressed\n");
2009 else
2010 dev_seq_printf_stats(seq, v);
2011 return 0;
2014 static struct netif_rx_stats *softnet_get_online(loff_t *pos)
2016 struct netif_rx_stats *rc = NULL;
2018 while (*pos < NR_CPUS)
2019 if (cpu_online(*pos)) {
2020 rc = &per_cpu(netdev_rx_stat, *pos);
2021 break;
2022 } else
2023 ++*pos;
2024 return rc;
2027 static void *softnet_seq_start(struct seq_file *seq, loff_t *pos)
2029 return softnet_get_online(pos);
2032 static void *softnet_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2034 ++*pos;
2035 return softnet_get_online(pos);
2038 static void softnet_seq_stop(struct seq_file *seq, void *v)
2042 static int softnet_seq_show(struct seq_file *seq, void *v)
2044 struct netif_rx_stats *s = v;
2046 seq_printf(seq, "%08x %08x %08x %08x %08x %08x %08x %08x %08x\n",
2047 s->total, s->dropped, s->time_squeeze, 0,
2048 0, 0, 0, 0, /* was fastroute */
2049 s->cpu_collision );
2050 return 0;
2053 static struct seq_operations dev_seq_ops = {
2054 .start = dev_seq_start,
2055 .next = dev_seq_next,
2056 .stop = dev_seq_stop,
2057 .show = dev_seq_show,
2058 };
2060 static int dev_seq_open(struct inode *inode, struct file *file)
2062 return seq_open(file, &dev_seq_ops);
2065 static struct file_operations dev_seq_fops = {
2066 .owner = THIS_MODULE,
2067 .open = dev_seq_open,
2068 .read = seq_read,
2069 .llseek = seq_lseek,
2070 .release = seq_release,
2071 };
2073 static struct seq_operations softnet_seq_ops = {
2074 .start = softnet_seq_start,
2075 .next = softnet_seq_next,
2076 .stop = softnet_seq_stop,
2077 .show = softnet_seq_show,
2078 };
2080 static int softnet_seq_open(struct inode *inode, struct file *file)
2082 return seq_open(file, &softnet_seq_ops);
2085 static struct file_operations softnet_seq_fops = {
2086 .owner = THIS_MODULE,
2087 .open = softnet_seq_open,
2088 .read = seq_read,
2089 .llseek = seq_lseek,
2090 .release = seq_release,
2091 };
2093 #ifdef WIRELESS_EXT
2094 extern int wireless_proc_init(void);
2095 #else
2096 #define wireless_proc_init() 0
2097 #endif
2099 static int __init dev_proc_init(void)
2101 int rc = -ENOMEM;
2103 if (!proc_net_fops_create("dev", S_IRUGO, &dev_seq_fops))
2104 goto out;
2105 if (!proc_net_fops_create("softnet_stat", S_IRUGO, &softnet_seq_fops))
2106 goto out_dev;
2107 if (wireless_proc_init())
2108 goto out_softnet;
2109 rc = 0;
2110 out:
2111 return rc;
2112 out_softnet:
2113 proc_net_remove("softnet_stat");
2114 out_dev:
2115 proc_net_remove("dev");
2116 goto out;
2118 #else
2119 #define dev_proc_init() 0
2120 #endif /* CONFIG_PROC_FS */
2123 /**
2124 * netdev_set_master - set up master/slave pair
2125 * @slave: slave device
2126 * @master: new master device
2128 * Changes the master device of the slave. Pass %NULL to break the
2129 * bonding. The caller must hold the RTNL semaphore. On a failure
2130 * a negative errno code is returned. On success the reference counts
2131 * are adjusted, %RTM_NEWLINK is sent to the routing socket and the
2132 * function returns zero.
2133 */
2134 int netdev_set_master(struct net_device *slave, struct net_device *master)
2136 struct net_device *old = slave->master;
2138 ASSERT_RTNL();
2140 if (master) {
2141 if (old)
2142 return -EBUSY;
2143 dev_hold(master);
2146 slave->master = master;
2148 synchronize_net();
2150 if (old)
2151 dev_put(old);
2153 if (master)
2154 slave->flags |= IFF_SLAVE;
2155 else
2156 slave->flags &= ~IFF_SLAVE;
2158 rtmsg_ifinfo(RTM_NEWLINK, slave, IFF_SLAVE);
2159 return 0;
2162 /**
2163 * dev_set_promiscuity - update promiscuity count on a device
2164 * @dev: device
2165 * @inc: modifier
2167 * Add or remove promsicuity from a device. While the count in the device
2168 * remains above zero the interface remains promiscuous. Once it hits zero
2169 * the device reverts back to normal filtering operation. A negative inc
2170 * value is used to drop promiscuity on the device.
2171 */
2172 void dev_set_promiscuity(struct net_device *dev, int inc)
2174 unsigned short old_flags = dev->flags;
2176 if ((dev->promiscuity += inc) == 0)
2177 dev->flags &= ~IFF_PROMISC;
2178 else
2179 dev->flags |= IFF_PROMISC;
2180 if (dev->flags != old_flags) {
2181 dev_mc_upload(dev);
2182 printk(KERN_INFO "device %s %s promiscuous mode\n",
2183 dev->name, (dev->flags & IFF_PROMISC) ? "entered" :
2184 "left");
2188 /**
2189 * dev_set_allmulti - update allmulti count on a device
2190 * @dev: device
2191 * @inc: modifier
2193 * Add or remove reception of all multicast frames to a device. While the
2194 * count in the device remains above zero the interface remains listening
2195 * to all interfaces. Once it hits zero the device reverts back to normal
2196 * filtering operation. A negative @inc value is used to drop the counter
2197 * when releasing a resource needing all multicasts.
2198 */
2200 void dev_set_allmulti(struct net_device *dev, int inc)
2202 unsigned short old_flags = dev->flags;
2204 dev->flags |= IFF_ALLMULTI;
2205 if ((dev->allmulti += inc) == 0)
2206 dev->flags &= ~IFF_ALLMULTI;
2207 if (dev->flags ^ old_flags)
2208 dev_mc_upload(dev);
2211 unsigned dev_get_flags(const struct net_device *dev)
2213 unsigned flags;
2215 flags = (dev->flags & ~(IFF_PROMISC |
2216 IFF_ALLMULTI |
2217 IFF_RUNNING)) |
2218 (dev->gflags & (IFF_PROMISC |
2219 IFF_ALLMULTI));
2221 if (netif_running(dev) && netif_carrier_ok(dev))
2222 flags |= IFF_RUNNING;
2224 return flags;
2227 int dev_change_flags(struct net_device *dev, unsigned flags)
2229 int ret;
2230 int old_flags = dev->flags;
2232 /*
2233 * Set the flags on our device.
2234 */
2236 dev->flags = (flags & (IFF_DEBUG | IFF_NOTRAILERS | IFF_NOARP |
2237 IFF_DYNAMIC | IFF_MULTICAST | IFF_PORTSEL |
2238 IFF_AUTOMEDIA)) |
2239 (dev->flags & (IFF_UP | IFF_VOLATILE | IFF_PROMISC |
2240 IFF_ALLMULTI));
2242 /*
2243 * Load in the correct multicast list now the flags have changed.
2244 */
2246 dev_mc_upload(dev);
2248 /*
2249 * Have we downed the interface. We handle IFF_UP ourselves
2250 * according to user attempts to set it, rather than blindly
2251 * setting it.
2252 */
2254 ret = 0;
2255 if ((old_flags ^ flags) & IFF_UP) { /* Bit is different ? */
2256 ret = ((old_flags & IFF_UP) ? dev_close : dev_open)(dev);
2258 if (!ret)
2259 dev_mc_upload(dev);
2262 if (dev->flags & IFF_UP &&
2263 ((old_flags ^ dev->flags) &~ (IFF_UP | IFF_PROMISC | IFF_ALLMULTI |
2264 IFF_VOLATILE)))
2265 notifier_call_chain(&netdev_chain, NETDEV_CHANGE, dev);
2267 if ((flags ^ dev->gflags) & IFF_PROMISC) {
2268 int inc = (flags & IFF_PROMISC) ? +1 : -1;
2269 dev->gflags ^= IFF_PROMISC;
2270 dev_set_promiscuity(dev, inc);
2273 /* NOTE: order of synchronization of IFF_PROMISC and IFF_ALLMULTI
2274 is important. Some (broken) drivers set IFF_PROMISC, when
2275 IFF_ALLMULTI is requested not asking us and not reporting.
2276 */
2277 if ((flags ^ dev->gflags) & IFF_ALLMULTI) {
2278 int inc = (flags & IFF_ALLMULTI) ? +1 : -1;
2279 dev->gflags ^= IFF_ALLMULTI;
2280 dev_set_allmulti(dev, inc);
2283 if (old_flags ^ dev->flags)
2284 rtmsg_ifinfo(RTM_NEWLINK, dev, old_flags ^ dev->flags);
2286 return ret;
2289 int dev_set_mtu(struct net_device *dev, int new_mtu)
2291 int err;
2293 if (new_mtu == dev->mtu)
2294 return 0;
2296 /* MTU must be positive. */
2297 if (new_mtu < 0)
2298 return -EINVAL;
2300 if (!netif_device_present(dev))
2301 return -ENODEV;
2303 err = 0;
2304 if (dev->change_mtu)
2305 err = dev->change_mtu(dev, new_mtu);
2306 else
2307 dev->mtu = new_mtu;
2308 if (!err && dev->flags & IFF_UP)
2309 notifier_call_chain(&netdev_chain,
2310 NETDEV_CHANGEMTU, dev);
2311 return err;
2314 int dev_set_mac_address(struct net_device *dev, struct sockaddr *sa)
2316 int err;
2318 if (!dev->set_mac_address)
2319 return -EOPNOTSUPP;
2320 if (sa->sa_family != dev->type)
2321 return -EINVAL;
2322 if (!netif_device_present(dev))
2323 return -ENODEV;
2324 err = dev->set_mac_address(dev, sa);
2325 if (!err)
2326 notifier_call_chain(&netdev_chain, NETDEV_CHANGEADDR, dev);
2327 return err;
2330 /*
2331 * Perform the SIOCxIFxxx calls.
2332 */
2333 static int dev_ifsioc(struct ifreq *ifr, unsigned int cmd)
2335 int err;
2336 struct net_device *dev = __dev_get_by_name(ifr->ifr_name);
2338 if (!dev)
2339 return -ENODEV;
2341 switch (cmd) {
2342 case SIOCGIFFLAGS: /* Get interface flags */
2343 ifr->ifr_flags = dev_get_flags(dev);
2344 return 0;
2346 case SIOCSIFFLAGS: /* Set interface flags */
2347 return dev_change_flags(dev, ifr->ifr_flags);
2349 case SIOCGIFMETRIC: /* Get the metric on the interface
2350 (currently unused) */
2351 ifr->ifr_metric = 0;
2352 return 0;
2354 case SIOCSIFMETRIC: /* Set the metric on the interface
2355 (currently unused) */
2356 return -EOPNOTSUPP;
2358 case SIOCGIFMTU: /* Get the MTU of a device */
2359 ifr->ifr_mtu = dev->mtu;
2360 return 0;
2362 case SIOCSIFMTU: /* Set the MTU of a device */
2363 return dev_set_mtu(dev, ifr->ifr_mtu);
2365 case SIOCGIFHWADDR:
2366 if (!dev->addr_len)
2367 memset(ifr->ifr_hwaddr.sa_data, 0, sizeof ifr->ifr_hwaddr.sa_data);
2368 else
2369 memcpy(ifr->ifr_hwaddr.sa_data, dev->dev_addr,
2370 min(sizeof ifr->ifr_hwaddr.sa_data, (size_t) dev->addr_len));
2371 ifr->ifr_hwaddr.sa_family = dev->type;
2372 return 0;
2374 case SIOCSIFHWADDR:
2375 return dev_set_mac_address(dev, &ifr->ifr_hwaddr);
2377 case SIOCSIFHWBROADCAST:
2378 if (ifr->ifr_hwaddr.sa_family != dev->type)
2379 return -EINVAL;
2380 memcpy(dev->broadcast, ifr->ifr_hwaddr.sa_data,
2381 min(sizeof ifr->ifr_hwaddr.sa_data, (size_t) dev->addr_len));
2382 notifier_call_chain(&netdev_chain,
2383 NETDEV_CHANGEADDR, dev);
2384 return 0;
2386 case SIOCGIFMAP:
2387 ifr->ifr_map.mem_start = dev->mem_start;
2388 ifr->ifr_map.mem_end = dev->mem_end;
2389 ifr->ifr_map.base_addr = dev->base_addr;
2390 ifr->ifr_map.irq = dev->irq;
2391 ifr->ifr_map.dma = dev->dma;
2392 ifr->ifr_map.port = dev->if_port;
2393 return 0;
2395 case SIOCSIFMAP:
2396 if (dev->set_config) {
2397 if (!netif_device_present(dev))
2398 return -ENODEV;
2399 return dev->set_config(dev, &ifr->ifr_map);
2401 return -EOPNOTSUPP;
2403 case SIOCADDMULTI:
2404 if (!dev->set_multicast_list ||
2405 ifr->ifr_hwaddr.sa_family != AF_UNSPEC)
2406 return -EINVAL;
2407 if (!netif_device_present(dev))
2408 return -ENODEV;
2409 return dev_mc_add(dev, ifr->ifr_hwaddr.sa_data,
2410 dev->addr_len, 1);
2412 case SIOCDELMULTI:
2413 if (!dev->set_multicast_list ||
2414 ifr->ifr_hwaddr.sa_family != AF_UNSPEC)
2415 return -EINVAL;
2416 if (!netif_device_present(dev))
2417 return -ENODEV;
2418 return dev_mc_delete(dev, ifr->ifr_hwaddr.sa_data,
2419 dev->addr_len, 1);
2421 case SIOCGIFINDEX:
2422 ifr->ifr_ifindex = dev->ifindex;
2423 return 0;
2425 case SIOCGIFTXQLEN:
2426 ifr->ifr_qlen = dev->tx_queue_len;
2427 return 0;
2429 case SIOCSIFTXQLEN:
2430 if (ifr->ifr_qlen < 0)
2431 return -EINVAL;
2432 dev->tx_queue_len = ifr->ifr_qlen;
2433 return 0;
2435 case SIOCSIFNAME:
2436 ifr->ifr_newname[IFNAMSIZ-1] = '\0';
2437 return dev_change_name(dev, ifr->ifr_newname);
2439 /*
2440 * Unknown or private ioctl
2441 */
2443 default:
2444 if ((cmd >= SIOCDEVPRIVATE &&
2445 cmd <= SIOCDEVPRIVATE + 15) ||
2446 cmd == SIOCBONDENSLAVE ||
2447 cmd == SIOCBONDRELEASE ||
2448 cmd == SIOCBONDSETHWADDR ||
2449 cmd == SIOCBONDSLAVEINFOQUERY ||
2450 cmd == SIOCBONDINFOQUERY ||
2451 cmd == SIOCBONDCHANGEACTIVE ||
2452 cmd == SIOCGMIIPHY ||
2453 cmd == SIOCGMIIREG ||
2454 cmd == SIOCSMIIREG ||
2455 cmd == SIOCBRADDIF ||
2456 cmd == SIOCBRDELIF ||
2457 cmd == SIOCWANDEV) {
2458 err = -EOPNOTSUPP;
2459 if (dev->do_ioctl) {
2460 if (netif_device_present(dev))
2461 err = dev->do_ioctl(dev, ifr,
2462 cmd);
2463 else
2464 err = -ENODEV;
2466 } else
2467 err = -EINVAL;
2470 return err;
2473 /*
2474 * This function handles all "interface"-type I/O control requests. The actual
2475 * 'doing' part of this is dev_ifsioc above.
2476 */
2478 /**
2479 * dev_ioctl - network device ioctl
2480 * @cmd: command to issue
2481 * @arg: pointer to a struct ifreq in user space
2483 * Issue ioctl functions to devices. This is normally called by the
2484 * user space syscall interfaces but can sometimes be useful for
2485 * other purposes. The return value is the return from the syscall if
2486 * positive or a negative errno code on error.
2487 */
2489 int dev_ioctl(unsigned int cmd, void __user *arg)
2491 struct ifreq ifr;
2492 int ret;
2493 char *colon;
2495 /* One special case: SIOCGIFCONF takes ifconf argument
2496 and requires shared lock, because it sleeps writing
2497 to user space.
2498 */
2500 if (cmd == SIOCGIFCONF) {
2501 rtnl_shlock();
2502 ret = dev_ifconf((char __user *) arg);
2503 rtnl_shunlock();
2504 return ret;
2506 if (cmd == SIOCGIFNAME)
2507 return dev_ifname((struct ifreq __user *)arg);
2509 if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
2510 return -EFAULT;
2512 ifr.ifr_name[IFNAMSIZ-1] = 0;
2514 colon = strchr(ifr.ifr_name, ':');
2515 if (colon)
2516 *colon = 0;
2518 /*
2519 * See which interface the caller is talking about.
2520 */
2522 switch (cmd) {
2523 /*
2524 * These ioctl calls:
2525 * - can be done by all.
2526 * - atomic and do not require locking.
2527 * - return a value
2528 */
2529 case SIOCGIFFLAGS:
2530 case SIOCGIFMETRIC:
2531 case SIOCGIFMTU:
2532 case SIOCGIFHWADDR:
2533 case SIOCGIFSLAVE:
2534 case SIOCGIFMAP:
2535 case SIOCGIFINDEX:
2536 case SIOCGIFTXQLEN:
2537 dev_load(ifr.ifr_name);
2538 read_lock(&dev_base_lock);
2539 ret = dev_ifsioc(&ifr, cmd);
2540 read_unlock(&dev_base_lock);
2541 if (!ret) {
2542 if (colon)
2543 *colon = ':';
2544 if (copy_to_user(arg, &ifr,
2545 sizeof(struct ifreq)))
2546 ret = -EFAULT;
2548 return ret;
2550 case SIOCETHTOOL:
2551 dev_load(ifr.ifr_name);
2552 rtnl_lock();
2553 ret = dev_ethtool(&ifr);
2554 rtnl_unlock();
2555 if (!ret) {
2556 if (colon)
2557 *colon = ':';
2558 if (copy_to_user(arg, &ifr,
2559 sizeof(struct ifreq)))
2560 ret = -EFAULT;
2562 return ret;
2564 /*
2565 * These ioctl calls:
2566 * - require superuser power.
2567 * - require strict serialization.
2568 * - return a value
2569 */
2570 case SIOCGMIIPHY:
2571 case SIOCGMIIREG:
2572 case SIOCSIFNAME:
2573 if (!capable(CAP_NET_ADMIN))
2574 return -EPERM;
2575 dev_load(ifr.ifr_name);
2576 rtnl_lock();
2577 ret = dev_ifsioc(&ifr, cmd);
2578 rtnl_unlock();
2579 if (!ret) {
2580 if (colon)
2581 *colon = ':';
2582 if (copy_to_user(arg, &ifr,
2583 sizeof(struct ifreq)))
2584 ret = -EFAULT;
2586 return ret;
2588 /*
2589 * These ioctl calls:
2590 * - require superuser power.
2591 * - require strict serialization.
2592 * - do not return a value
2593 */
2594 case SIOCSIFFLAGS:
2595 case SIOCSIFMETRIC:
2596 case SIOCSIFMTU:
2597 case SIOCSIFMAP:
2598 case SIOCSIFHWADDR:
2599 case SIOCSIFSLAVE:
2600 case SIOCADDMULTI:
2601 case SIOCDELMULTI:
2602 case SIOCSIFHWBROADCAST:
2603 case SIOCSIFTXQLEN:
2604 case SIOCSMIIREG:
2605 case SIOCBONDENSLAVE:
2606 case SIOCBONDRELEASE:
2607 case SIOCBONDSETHWADDR:
2608 case SIOCBONDCHANGEACTIVE:
2609 case SIOCBRADDIF:
2610 case SIOCBRDELIF:
2611 if (!capable(CAP_NET_ADMIN))
2612 return -EPERM;
2613 /* fall through */
2614 case SIOCBONDSLAVEINFOQUERY:
2615 case SIOCBONDINFOQUERY:
2616 dev_load(ifr.ifr_name);
2617 rtnl_lock();
2618 ret = dev_ifsioc(&ifr, cmd);
2619 rtnl_unlock();
2620 return ret;
2622 case SIOCGIFMEM:
2623 /* Get the per device memory space. We can add this but
2624 * currently do not support it */
2625 case SIOCSIFMEM:
2626 /* Set the per device memory buffer space.
2627 * Not applicable in our case */
2628 case SIOCSIFLINK:
2629 return -EINVAL;
2631 /*
2632 * Unknown or private ioctl.
2633 */
2634 default:
2635 if (cmd == SIOCWANDEV ||
2636 (cmd >= SIOCDEVPRIVATE &&
2637 cmd <= SIOCDEVPRIVATE + 15)) {
2638 dev_load(ifr.ifr_name);
2639 rtnl_lock();
2640 ret = dev_ifsioc(&ifr, cmd);
2641 rtnl_unlock();
2642 if (!ret && copy_to_user(arg, &ifr,
2643 sizeof(struct ifreq)))
2644 ret = -EFAULT;
2645 return ret;
2647 #ifdef WIRELESS_EXT
2648 /* Take care of Wireless Extensions */
2649 if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) {
2650 /* If command is `set a parameter', or
2651 * `get the encoding parameters', check if
2652 * the user has the right to do it */
2653 if (IW_IS_SET(cmd) || cmd == SIOCGIWENCODE) {
2654 if (!capable(CAP_NET_ADMIN))
2655 return -EPERM;
2657 dev_load(ifr.ifr_name);
2658 rtnl_lock();
2659 /* Follow me in net/core/wireless.c */
2660 ret = wireless_process_ioctl(&ifr, cmd);
2661 rtnl_unlock();
2662 if (IW_IS_GET(cmd) &&
2663 copy_to_user(arg, &ifr,
2664 sizeof(struct ifreq)))
2665 ret = -EFAULT;
2666 return ret;
2668 #endif /* WIRELESS_EXT */
2669 return -EINVAL;
2674 /**
2675 * dev_new_index - allocate an ifindex
2677 * Returns a suitable unique value for a new device interface
2678 * number. The caller must hold the rtnl semaphore or the
2679 * dev_base_lock to be sure it remains unique.
2680 */
2681 static int dev_new_index(void)
2683 static int ifindex;
2684 for (;;) {
2685 if (++ifindex <= 0)
2686 ifindex = 1;
2687 if (!__dev_get_by_index(ifindex))
2688 return ifindex;
2692 static int dev_boot_phase = 1;
2694 /* Delayed registration/unregisteration */
2695 static DEFINE_SPINLOCK(net_todo_list_lock);
2696 static struct list_head net_todo_list = LIST_HEAD_INIT(net_todo_list);
2698 static inline void net_set_todo(struct net_device *dev)
2700 spin_lock(&net_todo_list_lock);
2701 list_add_tail(&dev->todo_list, &net_todo_list);
2702 spin_unlock(&net_todo_list_lock);
2705 /**
2706 * register_netdevice - register a network device
2707 * @dev: device to register
2709 * Take a completed network device structure and add it to the kernel
2710 * interfaces. A %NETDEV_REGISTER message is sent to the netdev notifier
2711 * chain. 0 is returned on success. A negative errno code is returned
2712 * on a failure to set up the device, or if the name is a duplicate.
2714 * Callers must hold the rtnl semaphore. You may want
2715 * register_netdev() instead of this.
2717 * BUGS:
2718 * The locking appears insufficient to guarantee two parallel registers
2719 * will not get the same name.
2720 */
2722 int register_netdevice(struct net_device *dev)
2724 struct hlist_head *head;
2725 struct hlist_node *p;
2726 int ret;
2728 BUG_ON(dev_boot_phase);
2729 ASSERT_RTNL();
2731 /* When net_device's are persistent, this will be fatal. */
2732 BUG_ON(dev->reg_state != NETREG_UNINITIALIZED);
2734 spin_lock_init(&dev->queue_lock);
2735 spin_lock_init(&dev->xmit_lock);
2736 dev->xmit_lock_owner = -1;
2737 #ifdef CONFIG_NET_CLS_ACT
2738 spin_lock_init(&dev->ingress_lock);
2739 #endif
2741 ret = alloc_divert_blk(dev);
2742 if (ret)
2743 goto out;
2745 dev->iflink = -1;
2747 /* Init, if this function is available */
2748 if (dev->init) {
2749 ret = dev->init(dev);
2750 if (ret) {
2751 if (ret > 0)
2752 ret = -EIO;
2753 goto out_err;
2757 if (!dev_valid_name(dev->name)) {
2758 ret = -EINVAL;
2759 goto out_err;
2762 dev->ifindex = dev_new_index();
2763 if (dev->iflink == -1)
2764 dev->iflink = dev->ifindex;
2766 /* Check for existence of name */
2767 head = dev_name_hash(dev->name);
2768 hlist_for_each(p, head) {
2769 struct net_device *d
2770 = hlist_entry(p, struct net_device, name_hlist);
2771 if (!strncmp(d->name, dev->name, IFNAMSIZ)) {
2772 ret = -EEXIST;
2773 goto out_err;
2777 /* Fix illegal SG+CSUM combinations. */
2778 if ((dev->features & NETIF_F_SG) &&
2779 !(dev->features & (NETIF_F_IP_CSUM |
2780 NETIF_F_NO_CSUM |
2781 NETIF_F_HW_CSUM))) {
2782 printk("%s: Dropping NETIF_F_SG since no checksum feature.\n",
2783 dev->name);
2784 dev->features &= ~NETIF_F_SG;
2787 /* TSO requires that SG is present as well. */
2788 if ((dev->features & NETIF_F_TSO) &&
2789 !(dev->features & NETIF_F_SG)) {
2790 printk("%s: Dropping NETIF_F_TSO since no SG feature.\n",
2791 dev->name);
2792 dev->features &= ~NETIF_F_TSO;
2794 if (dev->features & NETIF_F_UFO) {
2795 if (!(dev->features & NETIF_F_HW_CSUM)) {
2796 printk(KERN_ERR "%s: Dropping NETIF_F_UFO since no "
2797 "NETIF_F_HW_CSUM feature.\n",
2798 dev->name);
2799 dev->features &= ~NETIF_F_UFO;
2801 if (!(dev->features & NETIF_F_SG)) {
2802 printk(KERN_ERR "%s: Dropping NETIF_F_UFO since no "
2803 "NETIF_F_SG feature.\n",
2804 dev->name);
2805 dev->features &= ~NETIF_F_UFO;
2809 /*
2810 * nil rebuild_header routine,
2811 * that should be never called and used as just bug trap.
2812 */
2814 if (!dev->rebuild_header)
2815 dev->rebuild_header = default_rebuild_header;
2817 /*
2818 * Default initial state at registry is that the
2819 * device is present.
2820 */
2822 set_bit(__LINK_STATE_PRESENT, &dev->state);
2824 dev->next = NULL;
2825 dev_init_scheduler(dev);
2826 write_lock_bh(&dev_base_lock);
2827 *dev_tail = dev;
2828 dev_tail = &dev->next;
2829 hlist_add_head(&dev->name_hlist, head);
2830 hlist_add_head(&dev->index_hlist, dev_index_hash(dev->ifindex));
2831 dev_hold(dev);
2832 dev->reg_state = NETREG_REGISTERING;
2833 write_unlock_bh(&dev_base_lock);
2835 /* Notify protocols, that a new device appeared. */
2836 notifier_call_chain(&netdev_chain, NETDEV_REGISTER, dev);
2838 /* Finish registration after unlock */
2839 net_set_todo(dev);
2840 ret = 0;
2842 out:
2843 return ret;
2844 out_err:
2845 free_divert_blk(dev);
2846 goto out;
2849 /**
2850 * register_netdev - register a network device
2851 * @dev: device to register
2853 * Take a completed network device structure and add it to the kernel
2854 * interfaces. A %NETDEV_REGISTER message is sent to the netdev notifier
2855 * chain. 0 is returned on success. A negative errno code is returned
2856 * on a failure to set up the device, or if the name is a duplicate.
2858 * This is a wrapper around register_netdev that takes the rtnl semaphore
2859 * and expands the device name if you passed a format string to
2860 * alloc_netdev.
2861 */
2862 int register_netdev(struct net_device *dev)
2864 int err;
2866 rtnl_lock();
2868 /*
2869 * If the name is a format string the caller wants us to do a
2870 * name allocation.
2871 */
2872 if (strchr(dev->name, '%')) {
2873 err = dev_alloc_name(dev, dev->name);
2874 if (err < 0)
2875 goto out;
2878 /*
2879 * Back compatibility hook. Kill this one in 2.5
2880 */
2881 if (dev->name[0] == 0 || dev->name[0] == ' ') {
2882 err = dev_alloc_name(dev, "eth%d");
2883 if (err < 0)
2884 goto out;
2887 err = register_netdevice(dev);
2888 out:
2889 rtnl_unlock();
2890 return err;
2892 EXPORT_SYMBOL(register_netdev);
2894 /*
2895 * netdev_wait_allrefs - wait until all references are gone.
2897 * This is called when unregistering network devices.
2899 * Any protocol or device that holds a reference should register
2900 * for netdevice notification, and cleanup and put back the
2901 * reference if they receive an UNREGISTER event.
2902 * We can get stuck here if buggy protocols don't correctly
2903 * call dev_put.
2904 */
2905 static void netdev_wait_allrefs(struct net_device *dev)
2907 unsigned long rebroadcast_time, warning_time;
2909 rebroadcast_time = warning_time = jiffies;
2910 while (atomic_read(&dev->refcnt) != 0) {
2911 if (time_after(jiffies, rebroadcast_time + 1 * HZ)) {
2912 rtnl_shlock();
2914 /* Rebroadcast unregister notification */
2915 notifier_call_chain(&netdev_chain,
2916 NETDEV_UNREGISTER, dev);
2918 if (test_bit(__LINK_STATE_LINKWATCH_PENDING,
2919 &dev->state)) {
2920 /* We must not have linkwatch events
2921 * pending on unregister. If this
2922 * happens, we simply run the queue
2923 * unscheduled, resulting in a noop
2924 * for this device.
2925 */
2926 linkwatch_run_queue();
2929 rtnl_shunlock();
2931 rebroadcast_time = jiffies;
2934 msleep(250);
2936 if (time_after(jiffies, warning_time + 10 * HZ)) {
2937 printk(KERN_EMERG "unregister_netdevice: "
2938 "waiting for %s to become free. Usage "
2939 "count = %d\n",
2940 dev->name, atomic_read(&dev->refcnt));
2941 warning_time = jiffies;
2946 /* The sequence is:
2948 * rtnl_lock();
2949 * ...
2950 * register_netdevice(x1);
2951 * register_netdevice(x2);
2952 * ...
2953 * unregister_netdevice(y1);
2954 * unregister_netdevice(y2);
2955 * ...
2956 * rtnl_unlock();
2957 * free_netdev(y1);
2958 * free_netdev(y2);
2960 * We are invoked by rtnl_unlock() after it drops the semaphore.
2961 * This allows us to deal with problems:
2962 * 1) We can create/delete sysfs objects which invoke hotplug
2963 * without deadlocking with linkwatch via keventd.
2964 * 2) Since we run with the RTNL semaphore not held, we can sleep
2965 * safely in order to wait for the netdev refcnt to drop to zero.
2966 */
2967 static DECLARE_MUTEX(net_todo_run_mutex);
2968 void netdev_run_todo(void)
2970 struct list_head list = LIST_HEAD_INIT(list);
2971 int err;
2974 /* Need to guard against multiple cpu's getting out of order. */
2975 down(&net_todo_run_mutex);
2977 /* Not safe to do outside the semaphore. We must not return
2978 * until all unregister events invoked by the local processor
2979 * have been completed (either by this todo run, or one on
2980 * another cpu).
2981 */
2982 if (list_empty(&net_todo_list))
2983 goto out;
2985 /* Snapshot list, allow later requests */
2986 spin_lock(&net_todo_list_lock);
2987 list_splice_init(&net_todo_list, &list);
2988 spin_unlock(&net_todo_list_lock);
2990 while (!list_empty(&list)) {
2991 struct net_device *dev
2992 = list_entry(list.next, struct net_device, todo_list);
2993 list_del(&dev->todo_list);
2995 switch(dev->reg_state) {
2996 case NETREG_REGISTERING:
2997 err = netdev_register_sysfs(dev);
2998 if (err)
2999 printk(KERN_ERR "%s: failed sysfs registration (%d)\n",
3000 dev->name, err);
3001 dev->reg_state = NETREG_REGISTERED;
3002 break;
3004 case NETREG_UNREGISTERING:
3005 netdev_unregister_sysfs(dev);
3006 dev->reg_state = NETREG_UNREGISTERED;
3008 netdev_wait_allrefs(dev);
3010 /* paranoia */
3011 BUG_ON(atomic_read(&dev->refcnt));
3012 BUG_TRAP(!dev->ip_ptr);
3013 BUG_TRAP(!dev->ip6_ptr);
3014 BUG_TRAP(!dev->dn_ptr);
3017 /* It must be the very last action,
3018 * after this 'dev' may point to freed up memory.
3019 */
3020 if (dev->destructor)
3021 dev->destructor(dev);
3022 break;
3024 default:
3025 printk(KERN_ERR "network todo '%s' but state %d\n",
3026 dev->name, dev->reg_state);
3027 break;
3031 out:
3032 up(&net_todo_run_mutex);
3035 /**
3036 * alloc_netdev - allocate network device
3037 * @sizeof_priv: size of private data to allocate space for
3038 * @name: device name format string
3039 * @setup: callback to initialize device
3041 * Allocates a struct net_device with private data area for driver use
3042 * and performs basic initialization.
3043 */
3044 struct net_device *alloc_netdev(int sizeof_priv, const char *name,
3045 void (*setup)(struct net_device *))
3047 void *p;
3048 struct net_device *dev;
3049 int alloc_size;
3051 /* ensure 32-byte alignment of both the device and private area */
3052 alloc_size = (sizeof(*dev) + NETDEV_ALIGN_CONST) & ~NETDEV_ALIGN_CONST;
3053 alloc_size += sizeof_priv + NETDEV_ALIGN_CONST;
3055 p = kmalloc(alloc_size, GFP_KERNEL);
3056 if (!p) {
3057 printk(KERN_ERR "alloc_dev: Unable to allocate device.\n");
3058 return NULL;
3060 memset(p, 0, alloc_size);
3062 dev = (struct net_device *)
3063 (((long)p + NETDEV_ALIGN_CONST) & ~NETDEV_ALIGN_CONST);
3064 dev->padded = (char *)dev - (char *)p;
3066 if (sizeof_priv)
3067 dev->priv = netdev_priv(dev);
3069 setup(dev);
3070 strcpy(dev->name, name);
3071 return dev;
3073 EXPORT_SYMBOL(alloc_netdev);
3075 /**
3076 * free_netdev - free network device
3077 * @dev: device
3079 * This function does the last stage of destroying an allocated device
3080 * interface. The reference to the device object is released.
3081 * If this is the last reference then it will be freed.
3082 */
3083 void free_netdev(struct net_device *dev)
3085 #ifdef CONFIG_SYSFS
3086 /* Compatiablity with error handling in drivers */
3087 if (dev->reg_state == NETREG_UNINITIALIZED) {
3088 kfree((char *)dev - dev->padded);
3089 return;
3092 BUG_ON(dev->reg_state != NETREG_UNREGISTERED);
3093 dev->reg_state = NETREG_RELEASED;
3095 /* will free via class release */
3096 class_device_put(&dev->class_dev);
3097 #else
3098 kfree((char *)dev - dev->padded);
3099 #endif
3102 /* Synchronize with packet receive processing. */
3103 void synchronize_net(void)
3105 might_sleep();
3106 synchronize_rcu();
3109 /**
3110 * unregister_netdevice - remove device from the kernel
3111 * @dev: device
3113 * This function shuts down a device interface and removes it
3114 * from the kernel tables. On success 0 is returned, on a failure
3115 * a negative errno code is returned.
3117 * Callers must hold the rtnl semaphore. You may want
3118 * unregister_netdev() instead of this.
3119 */
3121 int unregister_netdevice(struct net_device *dev)
3123 struct net_device *d, **dp;
3125 BUG_ON(dev_boot_phase);
3126 ASSERT_RTNL();
3128 /* Some devices call without registering for initialization unwind. */
3129 if (dev->reg_state == NETREG_UNINITIALIZED) {
3130 printk(KERN_DEBUG "unregister_netdevice: device %s/%p never "
3131 "was registered\n", dev->name, dev);
3132 return -ENODEV;
3135 BUG_ON(dev->reg_state != NETREG_REGISTERED);
3137 /* If device is running, close it first. */
3138 if (dev->flags & IFF_UP)
3139 dev_close(dev);
3141 /* And unlink it from device chain. */
3142 for (dp = &dev_base; (d = *dp) != NULL; dp = &d->next) {
3143 if (d == dev) {
3144 write_lock_bh(&dev_base_lock);
3145 hlist_del(&dev->name_hlist);
3146 hlist_del(&dev->index_hlist);
3147 if (dev_tail == &dev->next)
3148 dev_tail = dp;
3149 *dp = d->next;
3150 write_unlock_bh(&dev_base_lock);
3151 break;
3154 if (!d) {
3155 printk(KERN_ERR "unregister net_device: '%s' not found\n",
3156 dev->name);
3157 return -ENODEV;
3160 dev->reg_state = NETREG_UNREGISTERING;
3162 synchronize_net();
3164 /* Shutdown queueing discipline. */
3165 dev_shutdown(dev);
3168 /* Notify protocols, that we are about to destroy
3169 this device. They should clean all the things.
3170 */
3171 notifier_call_chain(&netdev_chain, NETDEV_UNREGISTER, dev);
3173 /*
3174 * Flush the multicast chain
3175 */
3176 dev_mc_discard(dev);
3178 if (dev->uninit)
3179 dev->uninit(dev);
3181 /* Notifier chain MUST detach us from master device. */
3182 BUG_TRAP(!dev->master);
3184 free_divert_blk(dev);
3186 /* Finish processing unregister after unlock */
3187 net_set_todo(dev);
3189 synchronize_net();
3191 dev_put(dev);
3192 return 0;
3195 /**
3196 * unregister_netdev - remove device from the kernel
3197 * @dev: device
3199 * This function shuts down a device interface and removes it
3200 * from the kernel tables. On success 0 is returned, on a failure
3201 * a negative errno code is returned.
3203 * This is just a wrapper for unregister_netdevice that takes
3204 * the rtnl semaphore. In general you want to use this and not
3205 * unregister_netdevice.
3206 */
3207 void unregister_netdev(struct net_device *dev)
3209 rtnl_lock();
3210 unregister_netdevice(dev);
3211 rtnl_unlock();
3214 EXPORT_SYMBOL(unregister_netdev);
3216 #ifdef CONFIG_HOTPLUG_CPU
3217 static int dev_cpu_callback(struct notifier_block *nfb,
3218 unsigned long action,
3219 void *ocpu)
3221 struct sk_buff **list_skb;
3222 struct net_device **list_net;
3223 struct sk_buff *skb;
3224 unsigned int cpu, oldcpu = (unsigned long)ocpu;
3225 struct softnet_data *sd, *oldsd;
3227 if (action != CPU_DEAD)
3228 return NOTIFY_OK;
3230 local_irq_disable();
3231 cpu = smp_processor_id();
3232 sd = &per_cpu(softnet_data, cpu);
3233 oldsd = &per_cpu(softnet_data, oldcpu);
3235 /* Find end of our completion_queue. */
3236 list_skb = &sd->completion_queue;
3237 while (*list_skb)
3238 list_skb = &(*list_skb)->next;
3239 /* Append completion queue from offline CPU. */
3240 *list_skb = oldsd->completion_queue;
3241 oldsd->completion_queue = NULL;
3243 /* Find end of our output_queue. */
3244 list_net = &sd->output_queue;
3245 while (*list_net)
3246 list_net = &(*list_net)->next_sched;
3247 /* Append output queue from offline CPU. */
3248 *list_net = oldsd->output_queue;
3249 oldsd->output_queue = NULL;
3251 raise_softirq_irqoff(NET_TX_SOFTIRQ);
3252 local_irq_enable();
3254 /* Process offline CPU's input_pkt_queue */
3255 while ((skb = __skb_dequeue(&oldsd->input_pkt_queue)))
3256 netif_rx(skb);
3258 return NOTIFY_OK;
3260 #endif /* CONFIG_HOTPLUG_CPU */
3263 /*
3264 * Initialize the DEV module. At boot time this walks the device list and
3265 * unhooks any devices that fail to initialise (normally hardware not
3266 * present) and leaves us with a valid list of present and active devices.
3268 */
3270 /*
3271 * This is called single threaded during boot, so no need
3272 * to take the rtnl semaphore.
3273 */
3274 static int __init net_dev_init(void)
3276 int i, rc = -ENOMEM;
3278 BUG_ON(!dev_boot_phase);
3280 net_random_init();
3282 if (dev_proc_init())
3283 goto out;
3285 if (netdev_sysfs_init())
3286 goto out;
3288 INIT_LIST_HEAD(&ptype_all);
3289 for (i = 0; i < 16; i++)
3290 INIT_LIST_HEAD(&ptype_base[i]);
3292 for (i = 0; i < ARRAY_SIZE(dev_name_head); i++)
3293 INIT_HLIST_HEAD(&dev_name_head[i]);
3295 for (i = 0; i < ARRAY_SIZE(dev_index_head); i++)
3296 INIT_HLIST_HEAD(&dev_index_head[i]);
3298 /*
3299 * Initialise the packet receive queues.
3300 */
3302 for_each_cpu(i) {
3303 struct softnet_data *queue;
3305 queue = &per_cpu(softnet_data, i);
3306 skb_queue_head_init(&queue->input_pkt_queue);
3307 queue->completion_queue = NULL;
3308 INIT_LIST_HEAD(&queue->poll_list);
3309 set_bit(__LINK_STATE_START, &queue->backlog_dev.state);
3310 queue->backlog_dev.weight = weight_p;
3311 queue->backlog_dev.poll = process_backlog;
3312 atomic_set(&queue->backlog_dev.refcnt, 1);
3315 dev_boot_phase = 0;
3317 open_softirq(NET_TX_SOFTIRQ, net_tx_action, NULL);
3318 open_softirq(NET_RX_SOFTIRQ, net_rx_action, NULL);
3320 hotcpu_notifier(dev_cpu_callback, 0);
3321 dst_init();
3322 dev_mcast_init();
3323 rc = 0;
3324 out:
3325 return rc;
3328 subsys_initcall(net_dev_init);
3330 EXPORT_SYMBOL(__dev_get_by_index);
3331 EXPORT_SYMBOL(__dev_get_by_name);
3332 EXPORT_SYMBOL(__dev_remove_pack);
3333 EXPORT_SYMBOL(__skb_linearize);
3334 EXPORT_SYMBOL(dev_valid_name);
3335 EXPORT_SYMBOL(dev_add_pack);
3336 EXPORT_SYMBOL(dev_alloc_name);
3337 EXPORT_SYMBOL(dev_close);
3338 EXPORT_SYMBOL(dev_get_by_flags);
3339 EXPORT_SYMBOL(dev_get_by_index);
3340 EXPORT_SYMBOL(dev_get_by_name);
3341 EXPORT_SYMBOL(dev_open);
3342 EXPORT_SYMBOL(dev_queue_xmit);
3343 EXPORT_SYMBOL(dev_remove_pack);
3344 EXPORT_SYMBOL(dev_set_allmulti);
3345 EXPORT_SYMBOL(dev_set_promiscuity);
3346 EXPORT_SYMBOL(dev_change_flags);
3347 EXPORT_SYMBOL(dev_set_mtu);
3348 EXPORT_SYMBOL(dev_set_mac_address);
3349 EXPORT_SYMBOL(free_netdev);
3350 EXPORT_SYMBOL(netdev_boot_setup_check);
3351 EXPORT_SYMBOL(netdev_set_master);
3352 EXPORT_SYMBOL(netdev_state_change);
3353 EXPORT_SYMBOL(netif_receive_skb);
3354 EXPORT_SYMBOL(netif_rx);
3355 EXPORT_SYMBOL(register_gifconf);
3356 EXPORT_SYMBOL(register_netdevice);
3357 EXPORT_SYMBOL(register_netdevice_notifier);
3358 EXPORT_SYMBOL(skb_checksum_help);
3359 EXPORT_SYMBOL(synchronize_net);
3360 EXPORT_SYMBOL(unregister_netdevice);
3361 EXPORT_SYMBOL(unregister_netdevice_notifier);
3362 EXPORT_SYMBOL(net_enable_timestamp);
3363 EXPORT_SYMBOL(net_disable_timestamp);
3364 EXPORT_SYMBOL(dev_get_flags);
3365 EXPORT_SYMBOL(skb_checksum_setup);
3367 #if defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE)
3368 EXPORT_SYMBOL(br_handle_frame_hook);
3369 EXPORT_SYMBOL(br_fdb_get_hook);
3370 EXPORT_SYMBOL(br_fdb_put_hook);
3371 #endif
3373 #ifdef CONFIG_KMOD
3374 EXPORT_SYMBOL(dev_load);
3375 #endif
3377 EXPORT_PER_CPU_SYMBOL(softnet_data);