ia64/xen-unstable

annotate linux-2.6-xen-sparse/net/core/dev.c @ 10714:a4041ac6f152

[NET] net-gso.patch: Fix up GSO packets with broken checksums

Here is the original changelog:

[NET] gso: Fix up GSO packets with broken checksums

Certain subsystems in the stack (e.g., netfilter) can break the
partial
checksum on GSO packets. Until they're fixed, this patch allows
this to
work by recomputing the partial checksums through the GSO
mechanism.

Once they've all been converted to update the partial checksum
instead of
clearing it, this workaround can be removed.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
author kfraser@localhost.localdomain
date Mon Jul 10 15:36:04 2006 +0100 (2006-07-10)
parents 6e7027a2abca
children 9519445d9e9d
rev   line source
kaf24@5077 1 /*
kaf24@5077 2 * NET3 Protocol independent device support routines.
kaf24@5077 3 *
kaf24@5077 4 * This program is free software; you can redistribute it and/or
kaf24@5077 5 * modify it under the terms of the GNU General Public License
kaf24@5077 6 * as published by the Free Software Foundation; either version
kaf24@5077 7 * 2 of the License, or (at your option) any later version.
kaf24@5077 8 *
kaf24@5077 9 * Derived from the non IP parts of dev.c 1.0.19
vh249@5730 10 * Authors: Ross Biro
kaf24@5077 11 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
kaf24@5077 12 * Mark Evans, <evansmp@uhura.aston.ac.uk>
kaf24@5077 13 *
kaf24@5077 14 * Additional Authors:
kaf24@5077 15 * Florian la Roche <rzsfl@rz.uni-sb.de>
kaf24@5077 16 * Alan Cox <gw4pts@gw4pts.ampr.org>
kaf24@5077 17 * David Hinds <dahinds@users.sourceforge.net>
kaf24@5077 18 * Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
kaf24@5077 19 * Adam Sulmicki <adam@cfar.umd.edu>
kaf24@5077 20 * Pekka Riikonen <priikone@poesidon.pspt.fi>
kaf24@5077 21 *
kaf24@5077 22 * Changes:
kaf24@5077 23 * D.J. Barrow : Fixed bug where dev->refcnt gets set
kaf24@5077 24 * to 2 if register_netdev gets called
kaf24@5077 25 * before net_dev_init & also removed a
kaf24@5077 26 * few lines of code in the process.
kaf24@5077 27 * Alan Cox : device private ioctl copies fields back.
kaf24@5077 28 * Alan Cox : Transmit queue code does relevant
kaf24@5077 29 * stunts to keep the queue safe.
kaf24@5077 30 * Alan Cox : Fixed double lock.
kaf24@5077 31 * Alan Cox : Fixed promisc NULL pointer trap
kaf24@5077 32 * ???????? : Support the full private ioctl range
kaf24@5077 33 * Alan Cox : Moved ioctl permission check into
kaf24@5077 34 * drivers
kaf24@5077 35 * Tim Kordas : SIOCADDMULTI/SIOCDELMULTI
kaf24@5077 36 * Alan Cox : 100 backlog just doesn't cut it when
kaf24@5077 37 * you start doing multicast video 8)
kaf24@5077 38 * Alan Cox : Rewrote net_bh and list manager.
kaf24@5077 39 * Alan Cox : Fix ETH_P_ALL echoback lengths.
kaf24@5077 40 * Alan Cox : Took out transmit every packet pass
kaf24@5077 41 * Saved a few bytes in the ioctl handler
kaf24@5077 42 * Alan Cox : Network driver sets packet type before
kaf24@5077 43 * calling netif_rx. Saves a function
kaf24@5077 44 * call a packet.
kaf24@5077 45 * Alan Cox : Hashed net_bh()
kaf24@5077 46 * Richard Kooijman: Timestamp fixes.
kaf24@5077 47 * Alan Cox : Wrong field in SIOCGIFDSTADDR
kaf24@5077 48 * Alan Cox : Device lock protection.
kaf24@5077 49 * Alan Cox : Fixed nasty side effect of device close
kaf24@5077 50 * changes.
kaf24@5077 51 * Rudi Cilibrasi : Pass the right thing to
kaf24@5077 52 * set_mac_address()
kaf24@5077 53 * Dave Miller : 32bit quantity for the device lock to
kaf24@5077 54 * make it work out on a Sparc.
kaf24@5077 55 * Bjorn Ekwall : Added KERNELD hack.
kaf24@5077 56 * Alan Cox : Cleaned up the backlog initialise.
kaf24@5077 57 * Craig Metz : SIOCGIFCONF fix if space for under
kaf24@5077 58 * 1 device.
kaf24@5077 59 * Thomas Bogendoerfer : Return ENODEV for dev_open, if there
kaf24@5077 60 * is no device open function.
kaf24@5077 61 * Andi Kleen : Fix error reporting for SIOCGIFCONF
kaf24@5077 62 * Michael Chastain : Fix signed/unsigned for SIOCGIFCONF
kaf24@5077 63 * Cyrus Durgin : Cleaned for KMOD
kaf24@5077 64 * Adam Sulmicki : Bug Fix : Network Device Unload
kaf24@5077 65 * A network device unload needs to purge
kaf24@5077 66 * the backlog queue.
kaf24@5077 67 * Paul Rusty Russell : SIOCSIFNAME
kaf24@5077 68 * Pekka Riikonen : Netdev boot-time settings code
kaf24@5077 69 * Andrew Morton : Make unregister_netdevice wait
kaf24@5077 70 * indefinitely on dev->refcnt
kaf24@5077 71 * J Hadi Salim : - Backlog queue sampling
kaf24@5077 72 * - netif_rx() feedback
kaf24@5077 73 */
kaf24@5077 74
kaf24@5077 75 #include <asm/uaccess.h>
kaf24@5077 76 #include <asm/system.h>
kaf24@5077 77 #include <linux/bitops.h>
cl349@8742 78 #include <linux/capability.h>
kaf24@5077 79 #include <linux/config.h>
kaf24@5077 80 #include <linux/cpu.h>
kaf24@5077 81 #include <linux/types.h>
kaf24@5077 82 #include <linux/kernel.h>
kaf24@5077 83 #include <linux/sched.h>
kaf24@5077 84 #include <linux/string.h>
kaf24@5077 85 #include <linux/mm.h>
kaf24@5077 86 #include <linux/socket.h>
kaf24@5077 87 #include <linux/sockios.h>
kaf24@5077 88 #include <linux/errno.h>
kaf24@5077 89 #include <linux/interrupt.h>
kaf24@5077 90 #include <linux/if_ether.h>
kaf24@5077 91 #include <linux/netdevice.h>
kaf24@5077 92 #include <linux/etherdevice.h>
kaf24@5077 93 #include <linux/notifier.h>
kaf24@5077 94 #include <linux/skbuff.h>
kaf24@5077 95 #include <net/sock.h>
kaf24@5077 96 #include <linux/rtnetlink.h>
kaf24@5077 97 #include <linux/proc_fs.h>
kaf24@5077 98 #include <linux/seq_file.h>
kaf24@5077 99 #include <linux/stat.h>
kaf24@5077 100 #include <linux/if_bridge.h>
kaf24@5077 101 #include <linux/divert.h>
kaf24@5077 102 #include <net/dst.h>
kaf24@5077 103 #include <net/pkt_sched.h>
kaf24@5077 104 #include <net/checksum.h>
kaf24@5077 105 #include <linux/highmem.h>
kaf24@5077 106 #include <linux/init.h>
kaf24@5077 107 #include <linux/kmod.h>
kaf24@5077 108 #include <linux/module.h>
kaf24@5077 109 #include <linux/kallsyms.h>
kaf24@5077 110 #include <linux/netpoll.h>
kaf24@5077 111 #include <linux/rcupdate.h>
kaf24@5077 112 #include <linux/delay.h>
kaf24@5077 113 #ifdef CONFIG_NET_RADIO
kaf24@5077 114 #include <linux/wireless.h> /* Note : will define WIRELESS_EXT */
kaf24@5077 115 #include <net/iw_handler.h>
kaf24@5077 116 #endif /* CONFIG_NET_RADIO */
kaf24@5077 117 #include <asm/current.h>
kaf24@10555 118 #include <linux/err.h>
kaf24@5077 119
cl349@8706 120 #ifdef CONFIG_XEN
kaf24@5077 121 #include <net/ip.h>
kaf24@5077 122 #include <linux/tcp.h>
kaf24@5077 123 #include <linux/udp.h>
cl349@8706 124 #endif
kaf24@5077 125
kaf24@5077 126 /*
kaf24@5077 127 * The list of packet types we will receive (as opposed to discard)
kaf24@5077 128 * and the routines to invoke.
kaf24@5077 129 *
kaf24@5077 130 * Why 16. Because with 16 the only overlap we get on a hash of the
kaf24@5077 131 * low nibble of the protocol value is RARP/SNAP/X.25.
kaf24@5077 132 *
kaf24@5077 133 * NOTE: That is no longer true with the addition of VLAN tags. Not
kaf24@5077 134 * sure which should go first, but I bet it won't make much
kaf24@5077 135 * difference if we are running VLANs. The good news is that
kaf24@5077 136 * this protocol won't be in the list unless compiled in, so
kaf24@5077 137 * the average user (w/out VLANs) will not be adversly affected.
kaf24@5077 138 * --BLG
kaf24@5077 139 *
kaf24@5077 140 * 0800 IP
kaf24@5077 141 * 8100 802.1Q VLAN
kaf24@5077 142 * 0001 802.3
kaf24@5077 143 * 0002 AX.25
kaf24@5077 144 * 0004 802.2
kaf24@5077 145 * 8035 RARP
kaf24@5077 146 * 0005 SNAP
kaf24@5077 147 * 0805 X.25
kaf24@5077 148 * 0806 ARP
kaf24@5077 149 * 8137 IPX
kaf24@5077 150 * 0009 Localtalk
kaf24@5077 151 * 86DD IPv6
kaf24@5077 152 */
kaf24@5077 153
kaf24@5077 154 static DEFINE_SPINLOCK(ptype_lock);
kaf24@5077 155 static struct list_head ptype_base[16]; /* 16 way hashed list */
kaf24@5077 156 static struct list_head ptype_all; /* Taps */
kaf24@5077 157
kaf24@5077 158 /*
kaf24@5077 159 * The @dev_base list is protected by @dev_base_lock and the rtln
kaf24@5077 160 * semaphore.
kaf24@5077 161 *
kaf24@5077 162 * Pure readers hold dev_base_lock for reading.
kaf24@5077 163 *
kaf24@5077 164 * Writers must hold the rtnl semaphore while they loop through the
kaf24@5077 165 * dev_base list, and hold dev_base_lock for writing when they do the
kaf24@5077 166 * actual updates. This allows pure readers to access the list even
kaf24@5077 167 * while a writer is preparing to update it.
kaf24@5077 168 *
kaf24@5077 169 * To put it another way, dev_base_lock is held for writing only to
kaf24@5077 170 * protect against pure readers; the rtnl semaphore provides the
kaf24@5077 171 * protection against other writers.
kaf24@5077 172 *
kaf24@5077 173 * See, for example usages, register_netdevice() and
kaf24@5077 174 * unregister_netdevice(), which must be called with the rtnl
kaf24@5077 175 * semaphore held.
kaf24@5077 176 */
kaf24@5077 177 struct net_device *dev_base;
kaf24@5077 178 static struct net_device **dev_tail = &dev_base;
kaf24@5077 179 DEFINE_RWLOCK(dev_base_lock);
kaf24@5077 180
kaf24@5077 181 EXPORT_SYMBOL(dev_base);
kaf24@5077 182 EXPORT_SYMBOL(dev_base_lock);
kaf24@5077 183
kaf24@5077 184 #define NETDEV_HASHBITS 8
kaf24@5077 185 static struct hlist_head dev_name_head[1<<NETDEV_HASHBITS];
kaf24@5077 186 static struct hlist_head dev_index_head[1<<NETDEV_HASHBITS];
kaf24@5077 187
kaf24@5077 188 static inline struct hlist_head *dev_name_hash(const char *name)
kaf24@5077 189 {
kaf24@5077 190 unsigned hash = full_name_hash(name, strnlen(name, IFNAMSIZ));
kaf24@5077 191 return &dev_name_head[hash & ((1<<NETDEV_HASHBITS)-1)];
kaf24@5077 192 }
kaf24@5077 193
kaf24@5077 194 static inline struct hlist_head *dev_index_hash(int ifindex)
kaf24@5077 195 {
kaf24@5077 196 return &dev_index_head[ifindex & ((1<<NETDEV_HASHBITS)-1)];
kaf24@5077 197 }
kaf24@5077 198
kaf24@5077 199 /*
kaf24@5077 200 * Our notifier list
kaf24@5077 201 */
kaf24@5077 202
kaf24@5077 203 static struct notifier_block *netdev_chain;
kaf24@5077 204
kaf24@5077 205 /*
kaf24@5077 206 * Device drivers call our routines to queue packets here. We empty the
kaf24@5077 207 * queue in the local softnet handler.
kaf24@5077 208 */
cl349@8718 209 DEFINE_PER_CPU(struct softnet_data, softnet_data) = { NULL };
kaf24@5077 210
kaf24@5077 211 #ifdef CONFIG_SYSFS
kaf24@5077 212 extern int netdev_sysfs_init(void);
kaf24@5077 213 extern int netdev_register_sysfs(struct net_device *);
kaf24@5077 214 extern void netdev_unregister_sysfs(struct net_device *);
kaf24@5077 215 #else
kaf24@5077 216 #define netdev_sysfs_init() (0)
kaf24@5077 217 #define netdev_register_sysfs(dev) (0)
kaf24@5077 218 #define netdev_unregister_sysfs(dev) do { } while(0)
kaf24@5077 219 #endif
kaf24@5077 220
kaf24@5077 221
kaf24@5077 222 /*******************************************************************************
kaf24@5077 223
kaf24@5077 224 Protocol management and registration routines
kaf24@5077 225
kaf24@5077 226 *******************************************************************************/
kaf24@5077 227
kaf24@5077 228 /*
kaf24@5077 229 * For efficiency
kaf24@5077 230 */
kaf24@5077 231
kaf24@5077 232 int netdev_nit;
kaf24@5077 233
kaf24@5077 234 /*
kaf24@5077 235 * Add a protocol ID to the list. Now that the input handler is
kaf24@5077 236 * smarter we can dispense with all the messy stuff that used to be
kaf24@5077 237 * here.
kaf24@5077 238 *
kaf24@5077 239 * BEWARE!!! Protocol handlers, mangling input packets,
kaf24@5077 240 * MUST BE last in hash buckets and checking protocol handlers
kaf24@5077 241 * MUST start from promiscuous ptype_all chain in net_bh.
kaf24@5077 242 * It is true now, do not change it.
kaf24@5077 243 * Explanation follows: if protocol handler, mangling packet, will
kaf24@5077 244 * be the first on list, it is not able to sense, that packet
kaf24@5077 245 * is cloned and should be copied-on-write, so that it will
kaf24@5077 246 * change it and subsequent readers will get broken packet.
kaf24@5077 247 * --ANK (980803)
kaf24@5077 248 */
kaf24@5077 249
kaf24@5077 250 /**
kaf24@5077 251 * dev_add_pack - add packet handler
kaf24@5077 252 * @pt: packet type declaration
kaf24@5077 253 *
kaf24@5077 254 * Add a protocol handler to the networking stack. The passed &packet_type
kaf24@5077 255 * is linked into kernel lists and may not be freed until it has been
kaf24@5077 256 * removed from the kernel lists.
kaf24@5077 257 *
kaf24@5077 258 * This call does not sleep therefore it can not
kaf24@5077 259 * guarantee all CPU's that are in middle of receiving packets
kaf24@5077 260 * will see the new packet type (until the next received packet).
kaf24@5077 261 */
kaf24@5077 262
kaf24@5077 263 void dev_add_pack(struct packet_type *pt)
kaf24@5077 264 {
kaf24@5077 265 int hash;
kaf24@5077 266
kaf24@5077 267 spin_lock_bh(&ptype_lock);
kaf24@5077 268 if (pt->type == htons(ETH_P_ALL)) {
kaf24@5077 269 netdev_nit++;
kaf24@5077 270 list_add_rcu(&pt->list, &ptype_all);
kaf24@5077 271 } else {
kaf24@5077 272 hash = ntohs(pt->type) & 15;
kaf24@5077 273 list_add_rcu(&pt->list, &ptype_base[hash]);
kaf24@5077 274 }
kaf24@5077 275 spin_unlock_bh(&ptype_lock);
kaf24@5077 276 }
kaf24@5077 277
kaf24@5077 278 /**
kaf24@5077 279 * __dev_remove_pack - remove packet handler
kaf24@5077 280 * @pt: packet type declaration
kaf24@5077 281 *
kaf24@5077 282 * Remove a protocol handler that was previously added to the kernel
kaf24@5077 283 * protocol handlers by dev_add_pack(). The passed &packet_type is removed
kaf24@5077 284 * from the kernel lists and can be freed or reused once this function
kaf24@5077 285 * returns.
kaf24@5077 286 *
kaf24@5077 287 * The packet type might still be in use by receivers
kaf24@5077 288 * and must not be freed until after all the CPU's have gone
kaf24@5077 289 * through a quiescent state.
kaf24@5077 290 */
kaf24@5077 291 void __dev_remove_pack(struct packet_type *pt)
kaf24@5077 292 {
kaf24@5077 293 struct list_head *head;
kaf24@5077 294 struct packet_type *pt1;
kaf24@5077 295
kaf24@5077 296 spin_lock_bh(&ptype_lock);
kaf24@5077 297
kaf24@5077 298 if (pt->type == htons(ETH_P_ALL)) {
kaf24@5077 299 netdev_nit--;
kaf24@5077 300 head = &ptype_all;
kaf24@5077 301 } else
kaf24@5077 302 head = &ptype_base[ntohs(pt->type) & 15];
kaf24@5077 303
kaf24@5077 304 list_for_each_entry(pt1, head, list) {
kaf24@5077 305 if (pt == pt1) {
kaf24@5077 306 list_del_rcu(&pt->list);
kaf24@5077 307 goto out;
kaf24@5077 308 }
kaf24@5077 309 }
kaf24@5077 310
kaf24@5077 311 printk(KERN_WARNING "dev_remove_pack: %p not found.\n", pt);
kaf24@5077 312 out:
kaf24@5077 313 spin_unlock_bh(&ptype_lock);
kaf24@5077 314 }
kaf24@5077 315 /**
kaf24@5077 316 * dev_remove_pack - remove packet handler
kaf24@5077 317 * @pt: packet type declaration
kaf24@5077 318 *
kaf24@5077 319 * Remove a protocol handler that was previously added to the kernel
kaf24@5077 320 * protocol handlers by dev_add_pack(). The passed &packet_type is removed
kaf24@5077 321 * from the kernel lists and can be freed or reused once this function
kaf24@5077 322 * returns.
kaf24@5077 323 *
kaf24@5077 324 * This call sleeps to guarantee that no CPU is looking at the packet
kaf24@5077 325 * type after return.
kaf24@5077 326 */
kaf24@5077 327 void dev_remove_pack(struct packet_type *pt)
kaf24@5077 328 {
kaf24@5077 329 __dev_remove_pack(pt);
kaf24@5077 330
kaf24@5077 331 synchronize_net();
kaf24@5077 332 }
kaf24@5077 333
kaf24@5077 334 /******************************************************************************
kaf24@5077 335
kaf24@5077 336 Device Boot-time Settings Routines
kaf24@5077 337
kaf24@5077 338 *******************************************************************************/
kaf24@5077 339
kaf24@5077 340 /* Boot time configuration table */
kaf24@5077 341 static struct netdev_boot_setup dev_boot_setup[NETDEV_BOOT_SETUP_MAX];
kaf24@5077 342
kaf24@5077 343 /**
kaf24@5077 344 * netdev_boot_setup_add - add new setup entry
kaf24@5077 345 * @name: name of the device
kaf24@5077 346 * @map: configured settings for the device
kaf24@5077 347 *
kaf24@5077 348 * Adds new setup entry to the dev_boot_setup list. The function
kaf24@5077 349 * returns 0 on error and 1 on success. This is a generic routine to
kaf24@5077 350 * all netdevices.
kaf24@5077 351 */
kaf24@5077 352 static int netdev_boot_setup_add(char *name, struct ifmap *map)
kaf24@5077 353 {
kaf24@5077 354 struct netdev_boot_setup *s;
kaf24@5077 355 int i;
kaf24@5077 356
kaf24@5077 357 s = dev_boot_setup;
kaf24@5077 358 for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) {
kaf24@5077 359 if (s[i].name[0] == '\0' || s[i].name[0] == ' ') {
kaf24@5077 360 memset(s[i].name, 0, sizeof(s[i].name));
kaf24@5077 361 strcpy(s[i].name, name);
kaf24@5077 362 memcpy(&s[i].map, map, sizeof(s[i].map));
kaf24@5077 363 break;
kaf24@5077 364 }
kaf24@5077 365 }
kaf24@5077 366
kaf24@5077 367 return i >= NETDEV_BOOT_SETUP_MAX ? 0 : 1;
kaf24@5077 368 }
kaf24@5077 369
kaf24@5077 370 /**
kaf24@5077 371 * netdev_boot_setup_check - check boot time settings
kaf24@5077 372 * @dev: the netdevice
kaf24@5077 373 *
kaf24@5077 374 * Check boot time settings for the device.
kaf24@5077 375 * The found settings are set for the device to be used
kaf24@5077 376 * later in the device probing.
kaf24@5077 377 * Returns 0 if no settings found, 1 if they are.
kaf24@5077 378 */
kaf24@5077 379 int netdev_boot_setup_check(struct net_device *dev)
kaf24@5077 380 {
kaf24@5077 381 struct netdev_boot_setup *s = dev_boot_setup;
kaf24@5077 382 int i;
kaf24@5077 383
kaf24@5077 384 for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) {
kaf24@5077 385 if (s[i].name[0] != '\0' && s[i].name[0] != ' ' &&
kaf24@5077 386 !strncmp(dev->name, s[i].name, strlen(s[i].name))) {
kaf24@5077 387 dev->irq = s[i].map.irq;
kaf24@5077 388 dev->base_addr = s[i].map.base_addr;
kaf24@5077 389 dev->mem_start = s[i].map.mem_start;
kaf24@5077 390 dev->mem_end = s[i].map.mem_end;
kaf24@5077 391 return 1;
kaf24@5077 392 }
kaf24@5077 393 }
kaf24@5077 394 return 0;
kaf24@5077 395 }
kaf24@5077 396
kaf24@5077 397
kaf24@5077 398 /**
kaf24@5077 399 * netdev_boot_base - get address from boot time settings
kaf24@5077 400 * @prefix: prefix for network device
kaf24@5077 401 * @unit: id for network device
kaf24@5077 402 *
kaf24@5077 403 * Check boot time settings for the base address of device.
kaf24@5077 404 * The found settings are set for the device to be used
kaf24@5077 405 * later in the device probing.
kaf24@5077 406 * Returns 0 if no settings found.
kaf24@5077 407 */
kaf24@5077 408 unsigned long netdev_boot_base(const char *prefix, int unit)
kaf24@5077 409 {
kaf24@5077 410 const struct netdev_boot_setup *s = dev_boot_setup;
kaf24@5077 411 char name[IFNAMSIZ];
kaf24@5077 412 int i;
kaf24@5077 413
kaf24@5077 414 sprintf(name, "%s%d", prefix, unit);
kaf24@5077 415
kaf24@5077 416 /*
kaf24@5077 417 * If device already registered then return base of 1
kaf24@5077 418 * to indicate not to probe for this interface
kaf24@5077 419 */
kaf24@5077 420 if (__dev_get_by_name(name))
kaf24@5077 421 return 1;
kaf24@5077 422
kaf24@5077 423 for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++)
kaf24@5077 424 if (!strcmp(name, s[i].name))
kaf24@5077 425 return s[i].map.base_addr;
kaf24@5077 426 return 0;
kaf24@5077 427 }
kaf24@5077 428
kaf24@5077 429 /*
kaf24@5077 430 * Saves at boot time configured settings for any netdevice.
kaf24@5077 431 */
kaf24@5077 432 int __init netdev_boot_setup(char *str)
kaf24@5077 433 {
kaf24@5077 434 int ints[5];
kaf24@5077 435 struct ifmap map;
kaf24@5077 436
kaf24@5077 437 str = get_options(str, ARRAY_SIZE(ints), ints);
kaf24@5077 438 if (!str || !*str)
kaf24@5077 439 return 0;
kaf24@5077 440
kaf24@5077 441 /* Save settings */
kaf24@5077 442 memset(&map, 0, sizeof(map));
kaf24@5077 443 if (ints[0] > 0)
kaf24@5077 444 map.irq = ints[1];
kaf24@5077 445 if (ints[0] > 1)
kaf24@5077 446 map.base_addr = ints[2];
kaf24@5077 447 if (ints[0] > 2)
kaf24@5077 448 map.mem_start = ints[3];
kaf24@5077 449 if (ints[0] > 3)
kaf24@5077 450 map.mem_end = ints[4];
kaf24@5077 451
kaf24@5077 452 /* Add new entry to the list */
kaf24@5077 453 return netdev_boot_setup_add(str, &map);
kaf24@5077 454 }
kaf24@5077 455
kaf24@5077 456 __setup("netdev=", netdev_boot_setup);
kaf24@5077 457
kaf24@5077 458 /*******************************************************************************
kaf24@5077 459
kaf24@5077 460 Device Interface Subroutines
kaf24@5077 461
kaf24@5077 462 *******************************************************************************/
kaf24@5077 463
kaf24@5077 464 /**
kaf24@5077 465 * __dev_get_by_name - find a device by its name
kaf24@5077 466 * @name: name to find
kaf24@5077 467 *
kaf24@5077 468 * Find an interface by name. Must be called under RTNL semaphore
kaf24@5077 469 * or @dev_base_lock. If the name is found a pointer to the device
kaf24@5077 470 * is returned. If the name is not found then %NULL is returned. The
kaf24@5077 471 * reference counters are not incremented so the caller must be
kaf24@5077 472 * careful with locks.
kaf24@5077 473 */
kaf24@5077 474
kaf24@5077 475 struct net_device *__dev_get_by_name(const char *name)
kaf24@5077 476 {
kaf24@5077 477 struct hlist_node *p;
kaf24@5077 478
kaf24@5077 479 hlist_for_each(p, dev_name_hash(name)) {
kaf24@5077 480 struct net_device *dev
kaf24@5077 481 = hlist_entry(p, struct net_device, name_hlist);
kaf24@5077 482 if (!strncmp(dev->name, name, IFNAMSIZ))
kaf24@5077 483 return dev;
kaf24@5077 484 }
kaf24@5077 485 return NULL;
kaf24@5077 486 }
kaf24@5077 487
kaf24@5077 488 /**
kaf24@5077 489 * dev_get_by_name - find a device by its name
kaf24@5077 490 * @name: name to find
kaf24@5077 491 *
kaf24@5077 492 * Find an interface by name. This can be called from any
kaf24@5077 493 * context and does its own locking. The returned handle has
kaf24@5077 494 * the usage count incremented and the caller must use dev_put() to
kaf24@5077 495 * release it when it is no longer needed. %NULL is returned if no
kaf24@5077 496 * matching device is found.
kaf24@5077 497 */
kaf24@5077 498
kaf24@5077 499 struct net_device *dev_get_by_name(const char *name)
kaf24@5077 500 {
kaf24@5077 501 struct net_device *dev;
kaf24@5077 502
kaf24@5077 503 read_lock(&dev_base_lock);
kaf24@5077 504 dev = __dev_get_by_name(name);
kaf24@5077 505 if (dev)
kaf24@5077 506 dev_hold(dev);
kaf24@5077 507 read_unlock(&dev_base_lock);
kaf24@5077 508 return dev;
kaf24@5077 509 }
kaf24@5077 510
kaf24@5077 511 /**
kaf24@5077 512 * __dev_get_by_index - find a device by its ifindex
kaf24@5077 513 * @ifindex: index of device
kaf24@5077 514 *
kaf24@5077 515 * Search for an interface by index. Returns %NULL if the device
kaf24@5077 516 * is not found or a pointer to the device. The device has not
kaf24@5077 517 * had its reference counter increased so the caller must be careful
kaf24@5077 518 * about locking. The caller must hold either the RTNL semaphore
kaf24@5077 519 * or @dev_base_lock.
kaf24@5077 520 */
kaf24@5077 521
kaf24@5077 522 struct net_device *__dev_get_by_index(int ifindex)
kaf24@5077 523 {
kaf24@5077 524 struct hlist_node *p;
kaf24@5077 525
kaf24@5077 526 hlist_for_each(p, dev_index_hash(ifindex)) {
kaf24@5077 527 struct net_device *dev
kaf24@5077 528 = hlist_entry(p, struct net_device, index_hlist);
kaf24@5077 529 if (dev->ifindex == ifindex)
kaf24@5077 530 return dev;
kaf24@5077 531 }
kaf24@5077 532 return NULL;
kaf24@5077 533 }
kaf24@5077 534
kaf24@5077 535
kaf24@5077 536 /**
kaf24@5077 537 * dev_get_by_index - find a device by its ifindex
kaf24@5077 538 * @ifindex: index of device
kaf24@5077 539 *
kaf24@5077 540 * Search for an interface by index. Returns NULL if the device
kaf24@5077 541 * is not found or a pointer to the device. The device returned has
kaf24@5077 542 * had a reference added and the pointer is safe until the user calls
kaf24@5077 543 * dev_put to indicate they have finished with it.
kaf24@5077 544 */
kaf24@5077 545
kaf24@5077 546 struct net_device *dev_get_by_index(int ifindex)
kaf24@5077 547 {
kaf24@5077 548 struct net_device *dev;
kaf24@5077 549
kaf24@5077 550 read_lock(&dev_base_lock);
kaf24@5077 551 dev = __dev_get_by_index(ifindex);
kaf24@5077 552 if (dev)
kaf24@5077 553 dev_hold(dev);
kaf24@5077 554 read_unlock(&dev_base_lock);
kaf24@5077 555 return dev;
kaf24@5077 556 }
kaf24@5077 557
kaf24@5077 558 /**
kaf24@5077 559 * dev_getbyhwaddr - find a device by its hardware address
kaf24@5077 560 * @type: media type of device
kaf24@5077 561 * @ha: hardware address
kaf24@5077 562 *
kaf24@5077 563 * Search for an interface by MAC address. Returns NULL if the device
kaf24@5077 564 * is not found or a pointer to the device. The caller must hold the
kaf24@5077 565 * rtnl semaphore. The returned device has not had its ref count increased
kaf24@5077 566 * and the caller must therefore be careful about locking
kaf24@5077 567 *
kaf24@5077 568 * BUGS:
kaf24@5077 569 * If the API was consistent this would be __dev_get_by_hwaddr
kaf24@5077 570 */
kaf24@5077 571
kaf24@5077 572 struct net_device *dev_getbyhwaddr(unsigned short type, char *ha)
kaf24@5077 573 {
kaf24@5077 574 struct net_device *dev;
kaf24@5077 575
kaf24@5077 576 ASSERT_RTNL();
kaf24@5077 577
kaf24@5077 578 for (dev = dev_base; dev; dev = dev->next)
kaf24@5077 579 if (dev->type == type &&
kaf24@5077 580 !memcmp(dev->dev_addr, ha, dev->addr_len))
kaf24@5077 581 break;
kaf24@5077 582 return dev;
kaf24@5077 583 }
kaf24@5077 584
cl349@8718 585 EXPORT_SYMBOL(dev_getbyhwaddr);
cl349@8718 586
kaf24@5077 587 struct net_device *dev_getfirstbyhwtype(unsigned short type)
kaf24@5077 588 {
kaf24@5077 589 struct net_device *dev;
kaf24@5077 590
kaf24@5077 591 rtnl_lock();
kaf24@5077 592 for (dev = dev_base; dev; dev = dev->next) {
kaf24@5077 593 if (dev->type == type) {
kaf24@5077 594 dev_hold(dev);
kaf24@5077 595 break;
kaf24@5077 596 }
kaf24@5077 597 }
kaf24@5077 598 rtnl_unlock();
kaf24@5077 599 return dev;
kaf24@5077 600 }
kaf24@5077 601
kaf24@5077 602 EXPORT_SYMBOL(dev_getfirstbyhwtype);
kaf24@5077 603
kaf24@5077 604 /**
kaf24@5077 605 * dev_get_by_flags - find any device with given flags
kaf24@5077 606 * @if_flags: IFF_* values
kaf24@5077 607 * @mask: bitmask of bits in if_flags to check
kaf24@5077 608 *
kaf24@5077 609 * Search for any interface with the given flags. Returns NULL if a device
kaf24@5077 610 * is not found or a pointer to the device. The device returned has
kaf24@5077 611 * had a reference added and the pointer is safe until the user calls
kaf24@5077 612 * dev_put to indicate they have finished with it.
kaf24@5077 613 */
kaf24@5077 614
kaf24@5077 615 struct net_device * dev_get_by_flags(unsigned short if_flags, unsigned short mask)
kaf24@5077 616 {
kaf24@5077 617 struct net_device *dev;
kaf24@5077 618
kaf24@5077 619 read_lock(&dev_base_lock);
kaf24@5077 620 for (dev = dev_base; dev != NULL; dev = dev->next) {
kaf24@5077 621 if (((dev->flags ^ if_flags) & mask) == 0) {
kaf24@5077 622 dev_hold(dev);
kaf24@5077 623 break;
kaf24@5077 624 }
kaf24@5077 625 }
kaf24@5077 626 read_unlock(&dev_base_lock);
kaf24@5077 627 return dev;
kaf24@5077 628 }
kaf24@5077 629
kaf24@5077 630 /**
kaf24@5077 631 * dev_valid_name - check if name is okay for network device
kaf24@5077 632 * @name: name string
kaf24@5077 633 *
kaf24@5077 634 * Network device names need to be valid file names to
kaf24@5077 635 * to allow sysfs to work
kaf24@5077 636 */
cl349@8742 637 int dev_valid_name(const char *name)
kaf24@5077 638 {
kaf24@5077 639 return !(*name == '\0'
kaf24@5077 640 || !strcmp(name, ".")
kaf24@5077 641 || !strcmp(name, "..")
kaf24@5077 642 || strchr(name, '/'));
kaf24@5077 643 }
kaf24@5077 644
kaf24@5077 645 /**
kaf24@5077 646 * dev_alloc_name - allocate a name for a device
kaf24@5077 647 * @dev: device
kaf24@5077 648 * @name: name format string
kaf24@5077 649 *
kaf24@5077 650 * Passed a format string - eg "lt%d" it will try and find a suitable
kaf24@5077 651 * id. Not efficient for many devices, not called a lot. The caller
kaf24@5077 652 * must hold the dev_base or rtnl lock while allocating the name and
kaf24@5077 653 * adding the device in order to avoid duplicates. Returns the number
kaf24@5077 654 * of the unit assigned or a negative errno code.
kaf24@5077 655 */
kaf24@5077 656
kaf24@5077 657 int dev_alloc_name(struct net_device *dev, const char *name)
kaf24@5077 658 {
kaf24@5077 659 int i = 0;
kaf24@5077 660 char buf[IFNAMSIZ];
kaf24@5077 661 const char *p;
kaf24@5077 662 const int max_netdevices = 8*PAGE_SIZE;
kaf24@5077 663 long *inuse;
kaf24@5077 664 struct net_device *d;
kaf24@5077 665
kaf24@5077 666 p = strnchr(name, IFNAMSIZ-1, '%');
kaf24@5077 667 if (p) {
kaf24@5077 668 /*
kaf24@5077 669 * Verify the string as this thing may have come from
kaf24@5077 670 * the user. There must be either one "%d" and no other "%"
kaf24@5077 671 * characters.
kaf24@5077 672 */
kaf24@5077 673 if (p[1] != 'd' || strchr(p + 2, '%'))
kaf24@5077 674 return -EINVAL;
kaf24@5077 675
kaf24@5077 676 /* Use one page as a bit array of possible slots */
kaf24@5077 677 inuse = (long *) get_zeroed_page(GFP_ATOMIC);
kaf24@5077 678 if (!inuse)
kaf24@5077 679 return -ENOMEM;
kaf24@5077 680
kaf24@5077 681 for (d = dev_base; d; d = d->next) {
kaf24@5077 682 if (!sscanf(d->name, name, &i))
kaf24@5077 683 continue;
kaf24@5077 684 if (i < 0 || i >= max_netdevices)
kaf24@5077 685 continue;
kaf24@5077 686
kaf24@5077 687 /* avoid cases where sscanf is not exact inverse of printf */
kaf24@5077 688 snprintf(buf, sizeof(buf), name, i);
kaf24@5077 689 if (!strncmp(buf, d->name, IFNAMSIZ))
kaf24@5077 690 set_bit(i, inuse);
kaf24@5077 691 }
kaf24@5077 692
kaf24@5077 693 i = find_first_zero_bit(inuse, max_netdevices);
kaf24@5077 694 free_page((unsigned long) inuse);
kaf24@5077 695 }
kaf24@5077 696
kaf24@5077 697 snprintf(buf, sizeof(buf), name, i);
kaf24@5077 698 if (!__dev_get_by_name(buf)) {
kaf24@5077 699 strlcpy(dev->name, buf, IFNAMSIZ);
kaf24@5077 700 return i;
kaf24@5077 701 }
kaf24@5077 702
kaf24@5077 703 /* It is possible to run out of possible slots
kaf24@5077 704 * when the name is long and there isn't enough space left
kaf24@5077 705 * for the digits, or if all bits are used.
kaf24@5077 706 */
kaf24@5077 707 return -ENFILE;
kaf24@5077 708 }
kaf24@5077 709
kaf24@5077 710
kaf24@5077 711 /**
kaf24@5077 712 * dev_change_name - change name of a device
kaf24@5077 713 * @dev: device
kaf24@5077 714 * @newname: name (or format string) must be at least IFNAMSIZ
kaf24@5077 715 *
kaf24@5077 716 * Change name of a device, can pass format strings "eth%d".
kaf24@5077 717 * for wildcarding.
kaf24@5077 718 */
kaf24@5077 719 int dev_change_name(struct net_device *dev, char *newname)
kaf24@5077 720 {
kaf24@5077 721 int err = 0;
kaf24@5077 722
kaf24@5077 723 ASSERT_RTNL();
kaf24@5077 724
kaf24@5077 725 if (dev->flags & IFF_UP)
kaf24@5077 726 return -EBUSY;
kaf24@5077 727
kaf24@5077 728 if (!dev_valid_name(newname))
kaf24@5077 729 return -EINVAL;
kaf24@5077 730
kaf24@5077 731 if (strchr(newname, '%')) {
kaf24@5077 732 err = dev_alloc_name(dev, newname);
kaf24@5077 733 if (err < 0)
kaf24@5077 734 return err;
kaf24@5077 735 strcpy(newname, dev->name);
kaf24@5077 736 }
kaf24@5077 737 else if (__dev_get_by_name(newname))
kaf24@5077 738 return -EEXIST;
kaf24@5077 739 else
kaf24@5077 740 strlcpy(dev->name, newname, IFNAMSIZ);
kaf24@5077 741
kaf24@5077 742 err = class_device_rename(&dev->class_dev, dev->name);
kaf24@5077 743 if (!err) {
kaf24@5077 744 hlist_del(&dev->name_hlist);
kaf24@5077 745 hlist_add_head(&dev->name_hlist, dev_name_hash(dev->name));
kaf24@5077 746 notifier_call_chain(&netdev_chain, NETDEV_CHANGENAME, dev);
kaf24@5077 747 }
kaf24@5077 748
kaf24@5077 749 return err;
kaf24@5077 750 }
kaf24@5077 751
kaf24@5077 752 /**
vh249@5730 753 * netdev_features_change - device changes fatures
vh249@5730 754 * @dev: device to cause notification
vh249@5730 755 *
vh249@5730 756 * Called to indicate a device has changed features.
vh249@5730 757 */
vh249@5730 758 void netdev_features_change(struct net_device *dev)
vh249@5730 759 {
vh249@5730 760 notifier_call_chain(&netdev_chain, NETDEV_FEAT_CHANGE, dev);
vh249@5730 761 }
vh249@5730 762 EXPORT_SYMBOL(netdev_features_change);
vh249@5730 763
vh249@5730 764 /**
kaf24@5077 765 * netdev_state_change - device changes state
kaf24@5077 766 * @dev: device to cause notification
kaf24@5077 767 *
kaf24@5077 768 * Called to indicate a device has changed state. This function calls
kaf24@5077 769 * the notifier chains for netdev_chain and sends a NEWLINK message
kaf24@5077 770 * to the routing socket.
kaf24@5077 771 */
kaf24@5077 772 void netdev_state_change(struct net_device *dev)
kaf24@5077 773 {
kaf24@5077 774 if (dev->flags & IFF_UP) {
kaf24@5077 775 notifier_call_chain(&netdev_chain, NETDEV_CHANGE, dev);
kaf24@5077 776 rtmsg_ifinfo(RTM_NEWLINK, dev, 0);
kaf24@5077 777 }
kaf24@5077 778 }
kaf24@5077 779
kaf24@5077 780 /**
kaf24@5077 781 * dev_load - load a network module
kaf24@5077 782 * @name: name of interface
kaf24@5077 783 *
kaf24@5077 784 * If a network interface is not present and the process has suitable
kaf24@5077 785 * privileges this function loads the module. If module loading is not
kaf24@5077 786 * available in this kernel then it becomes a nop.
kaf24@5077 787 */
kaf24@5077 788
kaf24@5077 789 void dev_load(const char *name)
kaf24@5077 790 {
kaf24@5077 791 struct net_device *dev;
kaf24@5077 792
kaf24@5077 793 read_lock(&dev_base_lock);
kaf24@5077 794 dev = __dev_get_by_name(name);
kaf24@5077 795 read_unlock(&dev_base_lock);
kaf24@5077 796
kaf24@5077 797 if (!dev && capable(CAP_SYS_MODULE))
kaf24@5077 798 request_module("%s", name);
kaf24@5077 799 }
kaf24@5077 800
kaf24@5077 801 static int default_rebuild_header(struct sk_buff *skb)
kaf24@5077 802 {
kaf24@5077 803 printk(KERN_DEBUG "%s: default_rebuild_header called -- BUG!\n",
kaf24@5077 804 skb->dev ? skb->dev->name : "NULL!!!");
kaf24@5077 805 kfree_skb(skb);
kaf24@5077 806 return 1;
kaf24@5077 807 }
kaf24@5077 808
kaf24@5077 809
kaf24@5077 810 /**
kaf24@5077 811 * dev_open - prepare an interface for use.
kaf24@5077 812 * @dev: device to open
kaf24@5077 813 *
kaf24@5077 814 * Takes a device from down to up state. The device's private open
kaf24@5077 815 * function is invoked and then the multicast lists are loaded. Finally
kaf24@5077 816 * the device is moved into the up state and a %NETDEV_UP message is
kaf24@5077 817 * sent to the netdev notifier chain.
kaf24@5077 818 *
kaf24@5077 819 * Calling this function on an active interface is a nop. On a failure
kaf24@5077 820 * a negative errno code is returned.
kaf24@5077 821 */
kaf24@5077 822 int dev_open(struct net_device *dev)
kaf24@5077 823 {
kaf24@5077 824 int ret = 0;
kaf24@5077 825
kaf24@5077 826 /*
kaf24@5077 827 * Is it already up?
kaf24@5077 828 */
kaf24@5077 829
kaf24@5077 830 if (dev->flags & IFF_UP)
kaf24@5077 831 return 0;
kaf24@5077 832
kaf24@5077 833 /*
kaf24@5077 834 * Is it even present?
kaf24@5077 835 */
kaf24@5077 836 if (!netif_device_present(dev))
kaf24@5077 837 return -ENODEV;
kaf24@5077 838
kaf24@5077 839 /*
kaf24@5077 840 * Call device private open method
kaf24@5077 841 */
kaf24@5077 842 set_bit(__LINK_STATE_START, &dev->state);
kaf24@5077 843 if (dev->open) {
kaf24@5077 844 ret = dev->open(dev);
kaf24@5077 845 if (ret)
kaf24@5077 846 clear_bit(__LINK_STATE_START, &dev->state);
kaf24@5077 847 }
kaf24@5077 848
kaf24@5077 849 /*
kaf24@5077 850 * If it went open OK then:
kaf24@5077 851 */
kaf24@5077 852
kaf24@5077 853 if (!ret) {
kaf24@5077 854 /*
kaf24@5077 855 * Set the flags.
kaf24@5077 856 */
kaf24@5077 857 dev->flags |= IFF_UP;
kaf24@5077 858
kaf24@5077 859 /*
kaf24@5077 860 * Initialize multicasting status
kaf24@5077 861 */
kaf24@5077 862 dev_mc_upload(dev);
kaf24@5077 863
kaf24@5077 864 /*
kaf24@5077 865 * Wakeup transmit queue engine
kaf24@5077 866 */
kaf24@5077 867 dev_activate(dev);
kaf24@5077 868
kaf24@5077 869 /*
kaf24@5077 870 * ... and announce new interface.
kaf24@5077 871 */
kaf24@5077 872 notifier_call_chain(&netdev_chain, NETDEV_UP, dev);
kaf24@5077 873 }
kaf24@5077 874 return ret;
kaf24@5077 875 }
kaf24@5077 876
kaf24@5077 877 /**
kaf24@5077 878 * dev_close - shutdown an interface.
kaf24@5077 879 * @dev: device to shutdown
kaf24@5077 880 *
kaf24@5077 881 * This function moves an active device into down state. A
kaf24@5077 882 * %NETDEV_GOING_DOWN is sent to the netdev notifier chain. The device
kaf24@5077 883 * is then deactivated and finally a %NETDEV_DOWN is sent to the notifier
kaf24@5077 884 * chain.
kaf24@5077 885 */
kaf24@5077 886 int dev_close(struct net_device *dev)
kaf24@5077 887 {
kaf24@5077 888 if (!(dev->flags & IFF_UP))
kaf24@5077 889 return 0;
kaf24@5077 890
kaf24@5077 891 /*
kaf24@5077 892 * Tell people we are going down, so that they can
kaf24@5077 893 * prepare to death, when device is still operating.
kaf24@5077 894 */
kaf24@5077 895 notifier_call_chain(&netdev_chain, NETDEV_GOING_DOWN, dev);
kaf24@5077 896
kaf24@5077 897 dev_deactivate(dev);
kaf24@5077 898
kaf24@5077 899 clear_bit(__LINK_STATE_START, &dev->state);
kaf24@5077 900
kaf24@5077 901 /* Synchronize to scheduled poll. We cannot touch poll list,
kaf24@5077 902 * it can be even on different cpu. So just clear netif_running(),
kaf24@5077 903 * and wait when poll really will happen. Actually, the best place
kaf24@5077 904 * for this is inside dev->stop() after device stopped its irq
kaf24@5077 905 * engine, but this requires more changes in devices. */
kaf24@5077 906
kaf24@5077 907 smp_mb__after_clear_bit(); /* Commit netif_running(). */
kaf24@5077 908 while (test_bit(__LINK_STATE_RX_SCHED, &dev->state)) {
kaf24@5077 909 /* No hurry. */
cl349@8718 910 msleep(1);
kaf24@5077 911 }
kaf24@5077 912
kaf24@5077 913 /*
kaf24@5077 914 * Call the device specific close. This cannot fail.
kaf24@5077 915 * Only if device is UP
kaf24@5077 916 *
kaf24@5077 917 * We allow it to be called even after a DETACH hot-plug
kaf24@5077 918 * event.
kaf24@5077 919 */
kaf24@5077 920 if (dev->stop)
kaf24@5077 921 dev->stop(dev);
kaf24@5077 922
kaf24@5077 923 /*
kaf24@5077 924 * Device is now down.
kaf24@5077 925 */
kaf24@5077 926
kaf24@5077 927 dev->flags &= ~IFF_UP;
kaf24@5077 928
kaf24@5077 929 /*
kaf24@5077 930 * Tell people we are down
kaf24@5077 931 */
kaf24@5077 932 notifier_call_chain(&netdev_chain, NETDEV_DOWN, dev);
kaf24@5077 933
kaf24@5077 934 return 0;
kaf24@5077 935 }
kaf24@5077 936
kaf24@5077 937
kaf24@5077 938 /*
kaf24@5077 939 * Device change register/unregister. These are not inline or static
kaf24@5077 940 * as we export them to the world.
kaf24@5077 941 */
kaf24@5077 942
kaf24@5077 943 /**
kaf24@5077 944 * register_netdevice_notifier - register a network notifier block
kaf24@5077 945 * @nb: notifier
kaf24@5077 946 *
kaf24@5077 947 * Register a notifier to be called when network device events occur.
kaf24@5077 948 * The notifier passed is linked into the kernel structures and must
kaf24@5077 949 * not be reused until it has been unregistered. A negative errno code
kaf24@5077 950 * is returned on a failure.
kaf24@5077 951 *
kaf24@5077 952 * When registered all registration and up events are replayed
kaf24@5077 953 * to the new notifier to allow device to have a race free
kaf24@5077 954 * view of the network device list.
kaf24@5077 955 */
kaf24@5077 956
kaf24@5077 957 int register_netdevice_notifier(struct notifier_block *nb)
kaf24@5077 958 {
kaf24@5077 959 struct net_device *dev;
kaf24@5077 960 int err;
kaf24@5077 961
kaf24@5077 962 rtnl_lock();
kaf24@5077 963 err = notifier_chain_register(&netdev_chain, nb);
kaf24@5077 964 if (!err) {
kaf24@5077 965 for (dev = dev_base; dev; dev = dev->next) {
kaf24@5077 966 nb->notifier_call(nb, NETDEV_REGISTER, dev);
kaf24@5077 967
kaf24@5077 968 if (dev->flags & IFF_UP)
kaf24@5077 969 nb->notifier_call(nb, NETDEV_UP, dev);
kaf24@5077 970 }
kaf24@5077 971 }
kaf24@5077 972 rtnl_unlock();
kaf24@5077 973 return err;
kaf24@5077 974 }
kaf24@5077 975
kaf24@5077 976 /**
kaf24@5077 977 * unregister_netdevice_notifier - unregister a network notifier block
kaf24@5077 978 * @nb: notifier
kaf24@5077 979 *
kaf24@5077 980 * Unregister a notifier previously registered by
kaf24@5077 981 * register_netdevice_notifier(). The notifier is unlinked into the
kaf24@5077 982 * kernel structures and may then be reused. A negative errno code
kaf24@5077 983 * is returned on a failure.
kaf24@5077 984 */
kaf24@5077 985
kaf24@5077 986 int unregister_netdevice_notifier(struct notifier_block *nb)
kaf24@5077 987 {
kaf24@5077 988 return notifier_chain_unregister(&netdev_chain, nb);
kaf24@5077 989 }
kaf24@5077 990
kaf24@5077 991 /**
kaf24@5077 992 * call_netdevice_notifiers - call all network notifier blocks
kaf24@5077 993 * @val: value passed unmodified to notifier function
kaf24@5077 994 * @v: pointer passed unmodified to notifier function
kaf24@5077 995 *
kaf24@5077 996 * Call all network notifier blocks. Parameters and return value
kaf24@5077 997 * are as for notifier_call_chain().
kaf24@5077 998 */
kaf24@5077 999
kaf24@5077 1000 int call_netdevice_notifiers(unsigned long val, void *v)
kaf24@5077 1001 {
kaf24@5077 1002 return notifier_call_chain(&netdev_chain, val, v);
kaf24@5077 1003 }
kaf24@5077 1004
kaf24@5077 1005 /* When > 0 there are consumers of rx skb time stamps */
kaf24@5077 1006 static atomic_t netstamp_needed = ATOMIC_INIT(0);
kaf24@5077 1007
kaf24@5077 1008 void net_enable_timestamp(void)
kaf24@5077 1009 {
kaf24@5077 1010 atomic_inc(&netstamp_needed);
kaf24@5077 1011 }
kaf24@5077 1012
kaf24@5077 1013 void net_disable_timestamp(void)
kaf24@5077 1014 {
kaf24@5077 1015 atomic_dec(&netstamp_needed);
kaf24@5077 1016 }
kaf24@5077 1017
cl349@8718 1018 void __net_timestamp(struct sk_buff *skb)
cl349@8718 1019 {
cl349@8718 1020 struct timeval tv;
cl349@8718 1021
cl349@8718 1022 do_gettimeofday(&tv);
cl349@8718 1023 skb_set_timestamp(skb, &tv);
cl349@8718 1024 }
cl349@8718 1025 EXPORT_SYMBOL(__net_timestamp);
cl349@8718 1026
cl349@8718 1027 static inline void net_timestamp(struct sk_buff *skb)
kaf24@5077 1028 {
kaf24@5077 1029 if (atomic_read(&netstamp_needed))
cl349@8718 1030 __net_timestamp(skb);
kaf24@5077 1031 else {
cl349@8718 1032 skb->tstamp.off_sec = 0;
cl349@8718 1033 skb->tstamp.off_usec = 0;
kaf24@5077 1034 }
kaf24@5077 1035 }
kaf24@5077 1036
kaf24@5077 1037 /*
kaf24@5077 1038 * Support routine. Sends outgoing frames to any network
kaf24@5077 1039 * taps currently in use.
kaf24@5077 1040 */
kaf24@5077 1041
kaf24@10555 1042 static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
kaf24@5077 1043 {
kaf24@5077 1044 struct packet_type *ptype;
cl349@8718 1045
cl349@8718 1046 net_timestamp(skb);
kaf24@5077 1047
kaf24@5077 1048 rcu_read_lock();
kaf24@5077 1049 list_for_each_entry_rcu(ptype, &ptype_all, list) {
kaf24@5077 1050 /* Never send packets back to the socket
kaf24@5077 1051 * they originated from - MvS (miquels@drinkel.ow.org)
kaf24@5077 1052 */
kaf24@5077 1053 if ((ptype->dev == dev || !ptype->dev) &&
kaf24@5077 1054 (ptype->af_packet_priv == NULL ||
kaf24@5077 1055 (struct sock *)ptype->af_packet_priv != skb->sk)) {
kaf24@5077 1056 struct sk_buff *skb2= skb_clone(skb, GFP_ATOMIC);
kaf24@5077 1057 if (!skb2)
kaf24@5077 1058 break;
kaf24@5077 1059
kaf24@5077 1060 /* skb->nh should be correctly
kaf24@5077 1061 set by sender, so that the second statement is
kaf24@5077 1062 just protection against buggy protocols.
kaf24@5077 1063 */
kaf24@5077 1064 skb2->mac.raw = skb2->data;
kaf24@5077 1065
kaf24@5077 1066 if (skb2->nh.raw < skb2->data ||
kaf24@5077 1067 skb2->nh.raw > skb2->tail) {
kaf24@5077 1068 if (net_ratelimit())
kaf24@5077 1069 printk(KERN_CRIT "protocol %04x is "
kaf24@5077 1070 "buggy, dev %s\n",
kaf24@5077 1071 skb2->protocol, dev->name);
kaf24@5077 1072 skb2->nh.raw = skb2->data;
kaf24@5077 1073 }
kaf24@5077 1074
kaf24@5077 1075 skb2->h.raw = skb2->nh.raw;
kaf24@5077 1076 skb2->pkt_type = PACKET_OUTGOING;
cl349@8718 1077 ptype->func(skb2, skb->dev, ptype, skb->dev);
kaf24@5077 1078 }
kaf24@5077 1079 }
kaf24@5077 1080 rcu_read_unlock();
kaf24@5077 1081 }
kaf24@5077 1082
kaf24@5077 1083 /*
kaf24@5077 1084 * Invalidate hardware checksum when packet is to be mangled, and
kaf24@5077 1085 * complete checksum manually on outgoing path.
kaf24@5077 1086 */
kaf24@5077 1087 int skb_checksum_help(struct sk_buff *skb, int inward)
kaf24@5077 1088 {
kaf24@5077 1089 unsigned int csum;
kaf24@5077 1090 int ret = 0, offset = skb->h.raw - skb->data;
kaf24@5077 1091
kfraser@10714 1092 if (inward)
kfraser@10714 1093 goto out_set_summed;
kfraser@10714 1094
kfraser@10714 1095 if (unlikely(skb_shinfo(skb)->gso_size)) {
kfraser@10714 1096 static int warned;
kfraser@10714 1097
kfraser@10714 1098 WARN_ON(!warned);
kfraser@10714 1099 warned = 1;
kfraser@10714 1100
kfraser@10714 1101 /* Let GSO fix up the checksum. */
kfraser@10714 1102 goto out_set_summed;
kaf24@5077 1103 }
kaf24@5077 1104
kaf24@5077 1105 if (skb_cloned(skb)) {
kaf24@5077 1106 ret = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
kaf24@5077 1107 if (ret)
kaf24@5077 1108 goto out;
kaf24@5077 1109 }
kaf24@5077 1110
cl349@8742 1111 BUG_ON(offset > (int)skb->len);
kaf24@5077 1112 csum = skb_checksum(skb, offset, skb->len-offset, 0);
kaf24@5077 1113
kaf24@5077 1114 offset = skb->tail - skb->h.raw;
cl349@8742 1115 BUG_ON(offset <= 0);
cl349@8742 1116 BUG_ON(skb->csum + 2 > offset);
kaf24@5077 1117
kaf24@5077 1118 *(u16*)(skb->h.raw + skb->csum) = csum_fold(csum);
kfraser@10714 1119
kfraser@10714 1120 out_set_summed:
kaf24@5077 1121 skb->ip_summed = CHECKSUM_NONE;
kaf24@5077 1122 out:
kaf24@5077 1123 return ret;
kaf24@5077 1124 }
kaf24@5077 1125
kaf24@10555 1126 /**
kaf24@10555 1127 * skb_gso_segment - Perform segmentation on skb.
kaf24@10555 1128 * @skb: buffer to segment
kaf24@10555 1129 * @features: features for the output path (see dev->features)
kaf24@10555 1130 *
kaf24@10555 1131 * This function segments the given skb and returns a list of segments.
kaf24@10555 1132 *
kaf24@10555 1133 * It may return NULL if the skb requires no segmentation. This is
kaf24@10555 1134 * only possible when GSO is used for verifying header integrity.
kaf24@10555 1135 */
kaf24@10555 1136 struct sk_buff *skb_gso_segment(struct sk_buff *skb, int features)
kaf24@10555 1137 {
kaf24@10555 1138 struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT);
kaf24@10555 1139 struct packet_type *ptype;
kaf24@10555 1140 int type = skb->protocol;
kfraser@10714 1141 int err;
kaf24@10555 1142
kaf24@10555 1143 BUG_ON(skb_shinfo(skb)->frag_list);
kaf24@10555 1144
kaf24@10555 1145 skb->mac.raw = skb->data;
kaf24@10555 1146 skb->mac_len = skb->nh.raw - skb->data;
kaf24@10555 1147 __skb_pull(skb, skb->mac_len);
kaf24@10555 1148
kfraser@10714 1149 if (unlikely(skb->ip_summed != CHECKSUM_HW)) {
kfraser@10714 1150 static int warned;
kfraser@10714 1151
kfraser@10714 1152 WARN_ON(!warned);
kfraser@10714 1153 warned = 1;
kfraser@10714 1154
kfraser@10714 1155 if (skb_header_cloned(skb) &&
kfraser@10714 1156 (err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC)))
kfraser@10714 1157 return ERR_PTR(err);
kfraser@10714 1158 }
kfraser@10714 1159
kaf24@10555 1160 rcu_read_lock();
kaf24@10555 1161 list_for_each_entry_rcu(ptype, &ptype_base[ntohs(type) & 15], list) {
kaf24@10555 1162 if (ptype->type == type && !ptype->dev && ptype->gso_segment) {
kfraser@10714 1163 if (unlikely(skb->ip_summed != CHECKSUM_HW)) {
kfraser@10714 1164 err = ptype->gso_send_check(skb);
kfraser@10714 1165 segs = ERR_PTR(err);
kfraser@10714 1166 if (err || skb_gso_ok(skb, features))
kfraser@10714 1167 break;
kfraser@10714 1168 __skb_push(skb, skb->data - skb->nh.raw);
kfraser@10714 1169 }
kaf24@10555 1170 segs = ptype->gso_segment(skb, features);
kaf24@10555 1171 break;
kaf24@10555 1172 }
kaf24@10555 1173 }
kaf24@10555 1174 rcu_read_unlock();
kaf24@10555 1175
kaf24@10555 1176 __skb_push(skb, skb->data - skb->mac.raw);
kaf24@10555 1177
kaf24@10555 1178 return segs;
kaf24@10555 1179 }
kaf24@10555 1180
kaf24@10555 1181 EXPORT_SYMBOL(skb_gso_segment);
kaf24@10555 1182
cl349@8729 1183 /* Take action when hardware reception checksum errors are detected. */
cl349@8729 1184 #ifdef CONFIG_BUG
cl349@8729 1185 void netdev_rx_csum_fault(struct net_device *dev)
cl349@8729 1186 {
cl349@8729 1187 if (net_ratelimit()) {
cl349@8729 1188 printk(KERN_ERR "%s: hw csum failure.\n",
cl349@8729 1189 dev ? dev->name : "<unknown>");
cl349@8729 1190 dump_stack();
cl349@8729 1191 }
cl349@8729 1192 }
cl349@8729 1193 EXPORT_SYMBOL(netdev_rx_csum_fault);
cl349@8729 1194 #endif
cl349@8729 1195
kaf24@5077 1196 #ifdef CONFIG_HIGHMEM
kaf24@5077 1197 /* Actually, we should eliminate this check as soon as we know, that:
kaf24@5077 1198 * 1. IOMMU is present and allows to map all the memory.
kaf24@5077 1199 * 2. No high memory really exists on this machine.
kaf24@5077 1200 */
kaf24@5077 1201
kaf24@5077 1202 static inline int illegal_highdma(struct net_device *dev, struct sk_buff *skb)
kaf24@5077 1203 {
kaf24@5077 1204 int i;
kaf24@5077 1205
kaf24@5077 1206 if (dev->features & NETIF_F_HIGHDMA)
kaf24@5077 1207 return 0;
kaf24@5077 1208
kaf24@5077 1209 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
kaf24@5077 1210 if (PageHighMem(skb_shinfo(skb)->frags[i].page))
kaf24@5077 1211 return 1;
kaf24@5077 1212
kaf24@5077 1213 return 0;
kaf24@5077 1214 }
kaf24@5077 1215 #else
kaf24@5077 1216 #define illegal_highdma(dev, skb) (0)
kaf24@5077 1217 #endif
kaf24@5077 1218
kaf24@10555 1219 struct dev_gso_cb {
kaf24@10555 1220 void (*destructor)(struct sk_buff *skb);
kaf24@10555 1221 };
kaf24@10555 1222
kaf24@10555 1223 #define DEV_GSO_CB(skb) ((struct dev_gso_cb *)(skb)->cb)
kaf24@10555 1224
kaf24@10555 1225 static void dev_gso_skb_destructor(struct sk_buff *skb)
kaf24@5077 1226 {
kaf24@10555 1227 struct dev_gso_cb *cb;
kaf24@10555 1228
kaf24@10555 1229 do {
kaf24@10555 1230 struct sk_buff *nskb = skb->next;
kaf24@10555 1231
kaf24@10555 1232 skb->next = nskb->next;
kaf24@10555 1233 nskb->next = NULL;
kaf24@10555 1234 kfree_skb(nskb);
kaf24@10555 1235 } while (skb->next);
kaf24@10555 1236
kaf24@10555 1237 cb = DEV_GSO_CB(skb);
kaf24@10555 1238 if (cb->destructor)
kaf24@10555 1239 cb->destructor(skb);
kaf24@10555 1240 }
kaf24@10555 1241
kaf24@10555 1242 /**
kaf24@10555 1243 * dev_gso_segment - Perform emulated hardware segmentation on skb.
kaf24@10555 1244 * @skb: buffer to segment
kaf24@10555 1245 *
kaf24@10555 1246 * This function segments the given skb and stores the list of segments
kaf24@10555 1247 * in skb->next.
kaf24@10555 1248 */
kaf24@10555 1249 static int dev_gso_segment(struct sk_buff *skb)
kaf24@10555 1250 {
kaf24@10555 1251 struct net_device *dev = skb->dev;
kaf24@10555 1252 struct sk_buff *segs;
kaf24@10555 1253 int features = dev->features & ~(illegal_highdma(dev, skb) ?
kaf24@10555 1254 NETIF_F_SG : 0);
kaf24@10555 1255
kaf24@10555 1256 segs = skb_gso_segment(skb, features);
kaf24@10555 1257
kaf24@10555 1258 /* Verifying header integrity only. */
kaf24@10555 1259 if (!segs)
kaf24@10555 1260 return 0;
kaf24@10555 1261
kaf24@10555 1262 if (unlikely(IS_ERR(segs)))
kaf24@10555 1263 return PTR_ERR(segs);
kaf24@10555 1264
kaf24@10555 1265 skb->next = segs;
kaf24@10555 1266 DEV_GSO_CB(skb)->destructor = skb->destructor;
kaf24@10555 1267 skb->destructor = dev_gso_skb_destructor;
kaf24@10555 1268
kaf24@10555 1269 return 0;
kaf24@10555 1270 }
kaf24@10555 1271
kaf24@10555 1272 int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev)
kaf24@10555 1273 {
kaf24@10555 1274 if (likely(!skb->next)) {
kaf24@10555 1275 if (netdev_nit)
kaf24@10555 1276 dev_queue_xmit_nit(skb, dev);
kaf24@10555 1277
kaf24@10555 1278 if (netif_needs_gso(dev, skb)) {
kaf24@10555 1279 if (unlikely(dev_gso_segment(skb)))
kaf24@10555 1280 goto out_kfree_skb;
kaf24@10555 1281 if (skb->next)
kaf24@10555 1282 goto gso;
kaf24@10555 1283 }
kaf24@10555 1284
kaf24@10555 1285 return dev->hard_start_xmit(skb, dev);
kaf24@10555 1286 }
kaf24@10555 1287
kaf24@10555 1288 gso:
kaf24@10555 1289 do {
kaf24@10555 1290 struct sk_buff *nskb = skb->next;
kaf24@10555 1291 int rc;
kaf24@10555 1292
kaf24@10555 1293 skb->next = nskb->next;
kaf24@10555 1294 nskb->next = NULL;
kaf24@10555 1295 rc = dev->hard_start_xmit(nskb, dev);
kaf24@10555 1296 if (unlikely(rc)) {
kaf24@10555 1297 nskb->next = skb->next;
kaf24@10555 1298 skb->next = nskb;
kaf24@10555 1299 return rc;
kaf24@10555 1300 }
kaf24@10555 1301 if (unlikely(netif_queue_stopped(dev) && skb->next))
kaf24@10555 1302 return NETDEV_TX_BUSY;
kaf24@10555 1303 } while (skb->next);
kaf24@10555 1304
kaf24@10555 1305 skb->destructor = DEV_GSO_CB(skb)->destructor;
kaf24@10555 1306
kaf24@10555 1307 out_kfree_skb:
kaf24@10555 1308 kfree_skb(skb);
kaf24@5077 1309 return 0;
kaf24@5077 1310 }
kaf24@5077 1311
kaf24@5077 1312 #define HARD_TX_LOCK(dev, cpu) { \
kaf24@5077 1313 if ((dev->features & NETIF_F_LLTX) == 0) { \
kaf24@10555 1314 netif_tx_lock(dev); \
kaf24@5077 1315 } \
kaf24@5077 1316 }
kaf24@5077 1317
kaf24@5077 1318 #define HARD_TX_UNLOCK(dev) { \
kaf24@5077 1319 if ((dev->features & NETIF_F_LLTX) == 0) { \
kaf24@10555 1320 netif_tx_unlock(dev); \
kaf24@5077 1321 } \
kaf24@5077 1322 }
kaf24@5077 1323
kaf24@9828 1324 #ifdef CONFIG_XEN
kaf24@9828 1325 inline int skb_checksum_setup(struct sk_buff *skb)
kaf24@9828 1326 {
kaf24@9828 1327 if (skb->proto_csum_blank) {
kaf24@9828 1328 if (skb->protocol != htons(ETH_P_IP))
kaf24@9828 1329 goto out;
kaf24@9828 1330 skb->h.raw = (unsigned char *)skb->nh.iph + 4*skb->nh.iph->ihl;
kaf24@9828 1331 if (skb->h.raw >= skb->tail)
kaf24@9828 1332 goto out;
kaf24@9828 1333 switch (skb->nh.iph->protocol) {
kaf24@9828 1334 case IPPROTO_TCP:
kaf24@9828 1335 skb->csum = offsetof(struct tcphdr, check);
kaf24@9828 1336 break;
kaf24@9828 1337 case IPPROTO_UDP:
kaf24@9828 1338 skb->csum = offsetof(struct udphdr, check);
kaf24@9828 1339 break;
kaf24@9828 1340 default:
kaf24@9828 1341 if (net_ratelimit())
kaf24@9828 1342 printk(KERN_ERR "Attempting to checksum a non-"
kaf24@9828 1343 "TCP/UDP packet, dropping a protocol"
kaf24@9828 1344 " %d packet", skb->nh.iph->protocol);
kaf24@9828 1345 goto out;
kaf24@9828 1346 }
kaf24@9828 1347 if ((skb->h.raw + skb->csum + 2) > skb->tail)
kaf24@9828 1348 goto out;
kaf24@9828 1349 skb->ip_summed = CHECKSUM_HW;
kaf24@9828 1350 skb->proto_csum_blank = 0;
kaf24@9828 1351 }
kaf24@9828 1352 return 0;
kaf24@9828 1353 out:
kaf24@9828 1354 return -EPROTO;
kaf24@9828 1355 }
kaf24@9828 1356 #else
kaf24@9843 1357 inline int skb_checksum_setup(struct sk_buff *skb) { return 0; }
kaf24@9828 1358 #endif
kaf24@9828 1359
kaf24@9828 1360
kaf24@5077 1361 /**
kaf24@5077 1362 * dev_queue_xmit - transmit a buffer
kaf24@5077 1363 * @skb: buffer to transmit
kaf24@5077 1364 *
kaf24@5077 1365 * Queue a buffer for transmission to a network device. The caller must
kaf24@5077 1366 * have set the device and priority and built the buffer before calling
kaf24@5077 1367 * this function. The function can be called from an interrupt.
kaf24@5077 1368 *
kaf24@5077 1369 * A negative errno code is returned on a failure. A success does not
kaf24@5077 1370 * guarantee the frame will be transmitted as it may be dropped due
kaf24@5077 1371 * to congestion or traffic shaping.
vh249@5730 1372 *
vh249@5730 1373 * -----------------------------------------------------------------------------------
vh249@5730 1374 * I notice this method can also return errors from the queue disciplines,
vh249@5730 1375 * including NET_XMIT_DROP, which is a positive value. So, errors can also
vh249@5730 1376 * be positive.
vh249@5730 1377 *
vh249@5730 1378 * Regardless of the return value, the skb is consumed, so it is currently
vh249@5730 1379 * difficult to retry a send to this method. (You can bump the ref count
vh249@5730 1380 * before sending to hold a reference for retry if you are careful.)
vh249@5730 1381 *
vh249@5730 1382 * When calling this method, interrupts MUST be enabled. This is because
vh249@5730 1383 * the BH enable code must have IRQs enabled so that it will not deadlock.
vh249@5730 1384 * --BLG
kaf24@5077 1385 */
kaf24@5077 1386
kaf24@5077 1387 int dev_queue_xmit(struct sk_buff *skb)
kaf24@5077 1388 {
kaf24@5077 1389 struct net_device *dev = skb->dev;
kaf24@5077 1390 struct Qdisc *q;
kaf24@5077 1391 int rc = -ENOMEM;
kaf24@5077 1392
kaf24@10555 1393 /* If a checksum-deferred packet is forwarded to a device that needs a
kaf24@10555 1394 * checksum, correct the pointers and force checksumming.
kaf24@10555 1395 */
kaf24@10555 1396 if (skb_checksum_setup(skb))
kaf24@10555 1397 goto out_kfree_skb;
kaf24@10555 1398
kaf24@10555 1399 /* GSO will handle the following emulations directly. */
kaf24@10555 1400 if (netif_needs_gso(dev, skb))
kaf24@10555 1401 goto gso;
kaf24@10555 1402
kaf24@5077 1403 if (skb_shinfo(skb)->frag_list &&
kaf24@5077 1404 !(dev->features & NETIF_F_FRAGLIST) &&
kaf24@10555 1405 __skb_linearize(skb))
kaf24@5077 1406 goto out_kfree_skb;
kaf24@5077 1407
kaf24@5077 1408 /* Fragmented skb is linearized if device does not support SG,
kaf24@5077 1409 * or if at least one of fragments is in highmem and device
kaf24@5077 1410 * does not support DMA from it.
kaf24@5077 1411 */
kaf24@5077 1412 if (skb_shinfo(skb)->nr_frags &&
kaf24@5077 1413 (!(dev->features & NETIF_F_SG) || illegal_highdma(dev, skb)) &&
kaf24@10555 1414 __skb_linearize(skb))
kaf24@5077 1415 goto out_kfree_skb;
kaf24@5077 1416
kaf24@5077 1417 /* If packet is not checksummed and device does not support
kaf24@5077 1418 * checksumming for this protocol, complete checksumming here.
kaf24@5077 1419 */
kaf24@5077 1420 if (skb->ip_summed == CHECKSUM_HW &&
kaf24@10555 1421 (!(dev->features & NETIF_F_GEN_CSUM) &&
kaf24@5077 1422 (!(dev->features & NETIF_F_IP_CSUM) ||
kaf24@5077 1423 skb->protocol != htons(ETH_P_IP))))
kaf24@5077 1424 if (skb_checksum_help(skb, 0))
kaf24@5077 1425 goto out_kfree_skb;
kaf24@5077 1426
kaf24@10555 1427 gso:
cl349@8718 1428 spin_lock_prefetch(&dev->queue_lock);
cl349@8718 1429
kaf24@5077 1430 /* Disable soft irqs for various locks below. Also
kaf24@5077 1431 * stops preemption for RCU.
kaf24@5077 1432 */
kaf24@10555 1433 rcu_read_lock_bh();
kaf24@5077 1434
kaf24@5077 1435 /* Updates of qdisc are serialized by queue_lock.
kaf24@5077 1436 * The struct Qdisc which is pointed to by qdisc is now a
kaf24@5077 1437 * rcu structure - it may be accessed without acquiring
kaf24@5077 1438 * a lock (but the structure may be stale.) The freeing of the
kaf24@5077 1439 * qdisc will be deferred until it's known that there are no
kaf24@5077 1440 * more references to it.
kaf24@5077 1441 *
kaf24@5077 1442 * If the qdisc has an enqueue function, we still need to
kaf24@5077 1443 * hold the queue_lock before calling it, since queue_lock
kaf24@5077 1444 * also serializes access to the device queue.
kaf24@5077 1445 */
kaf24@5077 1446
kaf24@5077 1447 q = rcu_dereference(dev->qdisc);
kaf24@5077 1448 #ifdef CONFIG_NET_CLS_ACT
kaf24@5077 1449 skb->tc_verd = SET_TC_AT(skb->tc_verd,AT_EGRESS);
kaf24@5077 1450 #endif
kaf24@5077 1451 if (q->enqueue) {
kaf24@5077 1452 /* Grab device queue */
kaf24@5077 1453 spin_lock(&dev->queue_lock);
kaf24@5077 1454
kaf24@5077 1455 rc = q->enqueue(skb, q);
kaf24@5077 1456
kaf24@5077 1457 qdisc_run(dev);
kaf24@5077 1458
kaf24@5077 1459 spin_unlock(&dev->queue_lock);
kaf24@5077 1460 rc = rc == NET_XMIT_BYPASS ? NET_XMIT_SUCCESS : rc;
kaf24@5077 1461 goto out;
kaf24@5077 1462 }
kaf24@5077 1463
kaf24@5077 1464 /* The device has no queue. Common case for software devices:
kaf24@5077 1465 loopback, all the sorts of tunnels...
kaf24@5077 1466
kaf24@10555 1467 Really, it is unlikely that netif_tx_lock protection is necessary
kaf24@10555 1468 here. (f.e. loopback and IP tunnels are clean ignoring statistics
kaf24@5077 1469 counters.)
kaf24@5077 1470 However, it is possible, that they rely on protection
kaf24@5077 1471 made by us here.
kaf24@5077 1472
kaf24@5077 1473 Check this and shot the lock. It is not prone from deadlocks.
kaf24@5077 1474 Either shot noqueue qdisc, it is even simpler 8)
kaf24@5077 1475 */
kaf24@5077 1476 if (dev->flags & IFF_UP) {
kaf24@5077 1477 int cpu = smp_processor_id(); /* ok because BHs are off */
kaf24@5077 1478
kaf24@5077 1479 if (dev->xmit_lock_owner != cpu) {
kaf24@5077 1480
kaf24@5077 1481 HARD_TX_LOCK(dev, cpu);
kaf24@5077 1482
kaf24@5077 1483 if (!netif_queue_stopped(dev)) {
kaf24@5077 1484 rc = 0;
kaf24@10555 1485 if (!dev_hard_start_xmit(skb, dev)) {
kaf24@5077 1486 HARD_TX_UNLOCK(dev);
kaf24@5077 1487 goto out;
kaf24@5077 1488 }
kaf24@5077 1489 }
kaf24@5077 1490 HARD_TX_UNLOCK(dev);
kaf24@5077 1491 if (net_ratelimit())
kaf24@5077 1492 printk(KERN_CRIT "Virtual device %s asks to "
kaf24@5077 1493 "queue packet!\n", dev->name);
kaf24@5077 1494 } else {
kaf24@5077 1495 /* Recursion is detected! It is possible,
kaf24@5077 1496 * unfortunately */
kaf24@5077 1497 if (net_ratelimit())
kaf24@5077 1498 printk(KERN_CRIT "Dead loop on virtual device "
kaf24@5077 1499 "%s, fix it urgently!\n", dev->name);
kaf24@5077 1500 }
kaf24@5077 1501 }
kaf24@5077 1502
kaf24@5077 1503 rc = -ENETDOWN;
kaf24@10555 1504 rcu_read_unlock_bh();
kaf24@5077 1505
kaf24@5077 1506 out_kfree_skb:
kaf24@5077 1507 kfree_skb(skb);
kaf24@5077 1508 return rc;
kaf24@5077 1509 out:
kaf24@10555 1510 rcu_read_unlock_bh();
kaf24@5077 1511 return rc;
kaf24@5077 1512 }
kaf24@5077 1513
kaf24@5077 1514
kaf24@5077 1515 /*=======================================================================
kaf24@5077 1516 Receiver routines
kaf24@5077 1517 =======================================================================*/
kaf24@5077 1518
cl349@8718 1519 int netdev_max_backlog = 1000;
cl349@8718 1520 int netdev_budget = 300;
kaf24@5077 1521 int weight_p = 64; /* old backlog weight */
kaf24@5077 1522
kaf24@5077 1523 DEFINE_PER_CPU(struct netif_rx_stats, netdev_rx_stat) = { 0, };
kaf24@5077 1524
kaf24@5077 1525
kaf24@5077 1526 /**
kaf24@5077 1527 * netif_rx - post buffer to the network code
kaf24@5077 1528 * @skb: buffer to post
kaf24@5077 1529 *
kaf24@5077 1530 * This function receives a packet from a device driver and queues it for
kaf24@5077 1531 * the upper (protocol) levels to process. It always succeeds. The buffer
kaf24@5077 1532 * may be dropped during processing for congestion control or by the
kaf24@5077 1533 * protocol layers.
kaf24@5077 1534 *
kaf24@5077 1535 * return values:
kaf24@5077 1536 * NET_RX_SUCCESS (no congestion)
kaf24@5077 1537 * NET_RX_CN_LOW (low congestion)
kaf24@5077 1538 * NET_RX_CN_MOD (moderate congestion)
kaf24@5077 1539 * NET_RX_CN_HIGH (high congestion)
kaf24@5077 1540 * NET_RX_DROP (packet was dropped)
kaf24@5077 1541 *
kaf24@5077 1542 */
kaf24@5077 1543
kaf24@5077 1544 int netif_rx(struct sk_buff *skb)
kaf24@5077 1545 {
kaf24@5077 1546 struct softnet_data *queue;
kaf24@5077 1547 unsigned long flags;
kaf24@5077 1548
vh249@5730 1549 /* if netpoll wants it, pretend we never saw it */
vh249@5730 1550 if (netpoll_rx(skb))
kaf24@5077 1551 return NET_RX_DROP;
vh249@5730 1552
cl349@8718 1553 if (!skb->tstamp.off_sec)
cl349@8718 1554 net_timestamp(skb);
kaf24@5077 1555
kaf24@5077 1556 /*
kaf24@5077 1557 * The code is rearranged so that the path is the most
kaf24@5077 1558 * short when CPU is congested, but is still operating.
kaf24@5077 1559 */
kaf24@5077 1560 local_irq_save(flags);
kaf24@5077 1561 queue = &__get_cpu_var(softnet_data);
kaf24@5077 1562
kaf24@5077 1563 __get_cpu_var(netdev_rx_stat).total++;
kaf24@5077 1564 if (queue->input_pkt_queue.qlen <= netdev_max_backlog) {
kaf24@5077 1565 if (queue->input_pkt_queue.qlen) {
kaf24@5077 1566 enqueue:
kaf24@5077 1567 dev_hold(skb->dev);
kaf24@5077 1568 __skb_queue_tail(&queue->input_pkt_queue, skb);
kaf24@5077 1569 local_irq_restore(flags);
cl349@8718 1570 return NET_RX_SUCCESS;
kaf24@5077 1571 }
kaf24@5077 1572
kaf24@5077 1573 netif_rx_schedule(&queue->backlog_dev);
kaf24@5077 1574 goto enqueue;
kaf24@5077 1575 }
kaf24@5077 1576
kaf24@5077 1577 __get_cpu_var(netdev_rx_stat).dropped++;
kaf24@5077 1578 local_irq_restore(flags);
kaf24@5077 1579
kaf24@5077 1580 kfree_skb(skb);
kaf24@5077 1581 return NET_RX_DROP;
kaf24@5077 1582 }
kaf24@5077 1583
kaf24@5077 1584 int netif_rx_ni(struct sk_buff *skb)
kaf24@5077 1585 {
kaf24@5077 1586 int err;
kaf24@5077 1587
kaf24@5077 1588 preempt_disable();
kaf24@5077 1589 err = netif_rx(skb);
kaf24@5077 1590 if (local_softirq_pending())
kaf24@5077 1591 do_softirq();
kaf24@5077 1592 preempt_enable();
kaf24@5077 1593
kaf24@5077 1594 return err;
kaf24@5077 1595 }
kaf24@5077 1596
kaf24@5077 1597 EXPORT_SYMBOL(netif_rx_ni);
kaf24@5077 1598
cl349@8718 1599 static inline struct net_device *skb_bond(struct sk_buff *skb)
kaf24@5077 1600 {
kaf24@5077 1601 struct net_device *dev = skb->dev;
kaf24@5077 1602
cl349@8718 1603 if (dev->master)
kaf24@5077 1604 skb->dev = dev->master;
cl349@8718 1605
cl349@8718 1606 return dev;
kaf24@5077 1607 }
kaf24@5077 1608
kaf24@5077 1609 static void net_tx_action(struct softirq_action *h)
kaf24@5077 1610 {
kaf24@5077 1611 struct softnet_data *sd = &__get_cpu_var(softnet_data);
kaf24@5077 1612
kaf24@5077 1613 if (sd->completion_queue) {
kaf24@5077 1614 struct sk_buff *clist;
kaf24@5077 1615
kaf24@5077 1616 local_irq_disable();
kaf24@5077 1617 clist = sd->completion_queue;
kaf24@5077 1618 sd->completion_queue = NULL;
kaf24@5077 1619 local_irq_enable();
kaf24@5077 1620
kaf24@5077 1621 while (clist) {
kaf24@5077 1622 struct sk_buff *skb = clist;
kaf24@5077 1623 clist = clist->next;
kaf24@5077 1624
kaf24@5077 1625 BUG_TRAP(!atomic_read(&skb->users));
kaf24@5077 1626 __kfree_skb(skb);
kaf24@5077 1627 }
kaf24@5077 1628 }
kaf24@5077 1629
kaf24@5077 1630 if (sd->output_queue) {
kaf24@5077 1631 struct net_device *head;
kaf24@5077 1632
kaf24@5077 1633 local_irq_disable();
kaf24@5077 1634 head = sd->output_queue;
kaf24@5077 1635 sd->output_queue = NULL;
kaf24@5077 1636 local_irq_enable();
kaf24@5077 1637
kaf24@5077 1638 while (head) {
kaf24@5077 1639 struct net_device *dev = head;
kaf24@5077 1640 head = head->next_sched;
kaf24@5077 1641
kaf24@5077 1642 smp_mb__before_clear_bit();
kaf24@5077 1643 clear_bit(__LINK_STATE_SCHED, &dev->state);
kaf24@5077 1644
kaf24@5077 1645 if (spin_trylock(&dev->queue_lock)) {
kaf24@5077 1646 qdisc_run(dev);
kaf24@5077 1647 spin_unlock(&dev->queue_lock);
kaf24@5077 1648 } else {
kaf24@5077 1649 netif_schedule(dev);
kaf24@5077 1650 }
kaf24@5077 1651 }
kaf24@5077 1652 }
kaf24@5077 1653 }
kaf24@5077 1654
kaf24@5077 1655 static __inline__ int deliver_skb(struct sk_buff *skb,
cl349@8718 1656 struct packet_type *pt_prev,
cl349@8718 1657 struct net_device *orig_dev)
kaf24@5077 1658 {
kaf24@5077 1659 atomic_inc(&skb->users);
cl349@8718 1660 return pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
kaf24@5077 1661 }
kaf24@5077 1662
kaf24@5077 1663 #if defined(CONFIG_BRIDGE) || defined (CONFIG_BRIDGE_MODULE)
kaf24@5077 1664 int (*br_handle_frame_hook)(struct net_bridge_port *p, struct sk_buff **pskb);
vh249@5730 1665 struct net_bridge;
vh249@5730 1666 struct net_bridge_fdb_entry *(*br_fdb_get_hook)(struct net_bridge *br,
vh249@5730 1667 unsigned char *addr);
vh249@5730 1668 void (*br_fdb_put_hook)(struct net_bridge_fdb_entry *ent);
kaf24@5077 1669
kaf24@5077 1670 static __inline__ int handle_bridge(struct sk_buff **pskb,
cl349@8718 1671 struct packet_type **pt_prev, int *ret,
cl349@8718 1672 struct net_device *orig_dev)
kaf24@5077 1673 {
kaf24@5077 1674 struct net_bridge_port *port;
kaf24@5077 1675
kaf24@5077 1676 if ((*pskb)->pkt_type == PACKET_LOOPBACK ||
kaf24@5077 1677 (port = rcu_dereference((*pskb)->dev->br_port)) == NULL)
kaf24@5077 1678 return 0;
kaf24@5077 1679
kaf24@5077 1680 if (*pt_prev) {
cl349@8718 1681 *ret = deliver_skb(*pskb, *pt_prev, orig_dev);
kaf24@5077 1682 *pt_prev = NULL;
kaf24@5077 1683 }
kaf24@5077 1684
kaf24@5077 1685 return br_handle_frame_hook(port, pskb);
kaf24@5077 1686 }
kaf24@5077 1687 #else
cl349@8718 1688 #define handle_bridge(skb, pt_prev, ret, orig_dev) (0)
kaf24@5077 1689 #endif
kaf24@5077 1690
kaf24@5077 1691 #ifdef CONFIG_NET_CLS_ACT
kaf24@5077 1692 /* TODO: Maybe we should just force sch_ingress to be compiled in
kaf24@5077 1693 * when CONFIG_NET_CLS_ACT is? otherwise some useless instructions
kaf24@5077 1694 * a compare and 2 stores extra right now if we dont have it on
kaf24@5077 1695 * but have CONFIG_NET_CLS_ACT
kaf24@5077 1696 * NOTE: This doesnt stop any functionality; if you dont have
kaf24@5077 1697 * the ingress scheduler, you just cant add policies on ingress.
kaf24@5077 1698 *
kaf24@5077 1699 */
kaf24@5077 1700 static int ing_filter(struct sk_buff *skb)
kaf24@5077 1701 {
kaf24@5077 1702 struct Qdisc *q;
kaf24@5077 1703 struct net_device *dev = skb->dev;
kaf24@5077 1704 int result = TC_ACT_OK;
kaf24@5077 1705
kaf24@5077 1706 if (dev->qdisc_ingress) {
kaf24@5077 1707 __u32 ttl = (__u32) G_TC_RTTL(skb->tc_verd);
kaf24@5077 1708 if (MAX_RED_LOOP < ttl++) {
kaf24@5077 1709 printk("Redir loop detected Dropping packet (%s->%s)\n",
cl349@8718 1710 skb->input_dev->name, skb->dev->name);
kaf24@5077 1711 return TC_ACT_SHOT;
kaf24@5077 1712 }
kaf24@5077 1713
kaf24@5077 1714 skb->tc_verd = SET_TC_RTTL(skb->tc_verd,ttl);
kaf24@5077 1715
kaf24@5077 1716 skb->tc_verd = SET_TC_AT(skb->tc_verd,AT_INGRESS);
cl349@8718 1717
kaf24@5077 1718 spin_lock(&dev->ingress_lock);
kaf24@5077 1719 if ((q = dev->qdisc_ingress) != NULL)
kaf24@5077 1720 result = q->enqueue(skb, q);
kaf24@5077 1721 spin_unlock(&dev->ingress_lock);
kaf24@5077 1722
kaf24@5077 1723 }
kaf24@5077 1724
kaf24@5077 1725 return result;
kaf24@5077 1726 }
kaf24@5077 1727 #endif
kaf24@5077 1728
kaf24@5077 1729 int netif_receive_skb(struct sk_buff *skb)
kaf24@5077 1730 {
kaf24@5077 1731 struct packet_type *ptype, *pt_prev;
cl349@8718 1732 struct net_device *orig_dev;
kaf24@5077 1733 int ret = NET_RX_DROP;
kaf24@5077 1734 unsigned short type;
kaf24@5077 1735
vh249@5730 1736 /* if we've gotten here through NAPI, check netpoll */
vh249@5730 1737 if (skb->dev->poll && netpoll_rx(skb))
kaf24@5077 1738 return NET_RX_DROP;
kaf24@5077 1739
cl349@8718 1740 if (!skb->tstamp.off_sec)
cl349@8718 1741 net_timestamp(skb);
cl349@8718 1742
cl349@8718 1743 if (!skb->input_dev)
cl349@8718 1744 skb->input_dev = skb->dev;
cl349@8718 1745
cl349@8718 1746 orig_dev = skb_bond(skb);
kaf24@5077 1747
kaf24@5077 1748 __get_cpu_var(netdev_rx_stat).total++;
kaf24@5077 1749
kaf24@5077 1750 skb->h.raw = skb->nh.raw = skb->data;
kaf24@5077 1751 skb->mac_len = skb->nh.raw - skb->mac.raw;
kaf24@5077 1752
kaf24@5077 1753 pt_prev = NULL;
kaf24@5077 1754
kaf24@5077 1755 rcu_read_lock();
kaf24@5077 1756
kaf24@5077 1757 #ifdef CONFIG_NET_CLS_ACT
kaf24@5077 1758 if (skb->tc_verd & TC_NCLS) {
kaf24@5077 1759 skb->tc_verd = CLR_TC_NCLS(skb->tc_verd);
kaf24@5077 1760 goto ncls;
kaf24@5077 1761 }
kaf24@5077 1762 #endif
kaf24@5077 1763
cl349@8706 1764 #ifdef CONFIG_XEN
kaf24@5077 1765 switch (skb->ip_summed) {
kaf24@5077 1766 case CHECKSUM_UNNECESSARY:
kaf24@9567 1767 skb->proto_data_valid = 1;
kaf24@5077 1768 break;
kaf24@5077 1769 case CHECKSUM_HW:
kaf24@5077 1770 /* XXX Implement me. */
kaf24@5077 1771 default:
kaf24@9567 1772 skb->proto_data_valid = 0;
kaf24@5077 1773 break;
kaf24@5077 1774 }
cl349@8706 1775 #endif
kaf24@5077 1776
kaf24@5077 1777 list_for_each_entry_rcu(ptype, &ptype_all, list) {
kaf24@5077 1778 if (!ptype->dev || ptype->dev == skb->dev) {
kaf24@5077 1779 if (pt_prev)
cl349@8718 1780 ret = deliver_skb(skb, pt_prev, orig_dev);
kaf24@5077 1781 pt_prev = ptype;
kaf24@5077 1782 }
kaf24@5077 1783 }
kaf24@5077 1784
kaf24@5077 1785 #ifdef CONFIG_NET_CLS_ACT
kaf24@5077 1786 if (pt_prev) {
cl349@8718 1787 ret = deliver_skb(skb, pt_prev, orig_dev);
kaf24@5077 1788 pt_prev = NULL; /* noone else should process this after*/
kaf24@5077 1789 } else {
kaf24@5077 1790 skb->tc_verd = SET_TC_OK2MUNGE(skb->tc_verd);
kaf24@5077 1791 }
kaf24@5077 1792
kaf24@5077 1793 ret = ing_filter(skb);
kaf24@5077 1794
kaf24@5077 1795 if (ret == TC_ACT_SHOT || (ret == TC_ACT_STOLEN)) {
kaf24@5077 1796 kfree_skb(skb);
kaf24@5077 1797 goto out;
kaf24@5077 1798 }
kaf24@5077 1799
kaf24@5077 1800 skb->tc_verd = 0;
kaf24@5077 1801 ncls:
kaf24@5077 1802 #endif
kaf24@5077 1803
kaf24@5077 1804 handle_diverter(skb);
kaf24@5077 1805
cl349@8718 1806 if (handle_bridge(&skb, &pt_prev, &ret, orig_dev))
kaf24@5077 1807 goto out;
kaf24@5077 1808
kaf24@5077 1809 type = skb->protocol;
kaf24@5077 1810 list_for_each_entry_rcu(ptype, &ptype_base[ntohs(type)&15], list) {
kaf24@5077 1811 if (ptype->type == type &&
kaf24@5077 1812 (!ptype->dev || ptype->dev == skb->dev)) {
kaf24@5077 1813 if (pt_prev)
cl349@8718 1814 ret = deliver_skb(skb, pt_prev, orig_dev);
kaf24@5077 1815 pt_prev = ptype;
kaf24@5077 1816 }
kaf24@5077 1817 }
kaf24@5077 1818
kaf24@5077 1819 if (pt_prev) {
cl349@8718 1820 ret = pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
kaf24@5077 1821 } else {
kaf24@5077 1822 kfree_skb(skb);
kaf24@5077 1823 /* Jamal, now you will not able to escape explaining
kaf24@5077 1824 * me how you were going to use this. :-)
kaf24@5077 1825 */
kaf24@5077 1826 ret = NET_RX_DROP;
kaf24@5077 1827 }
kaf24@5077 1828
kaf24@5077 1829 out:
kaf24@5077 1830 rcu_read_unlock();
kaf24@5077 1831 return ret;
kaf24@5077 1832 }
kaf24@5077 1833
kaf24@5077 1834 static int process_backlog(struct net_device *backlog_dev, int *budget)
kaf24@5077 1835 {
kaf24@5077 1836 int work = 0;
kaf24@5077 1837 int quota = min(backlog_dev->quota, *budget);
kaf24@5077 1838 struct softnet_data *queue = &__get_cpu_var(softnet_data);
kaf24@5077 1839 unsigned long start_time = jiffies;
kaf24@5077 1840
vh249@5730 1841 backlog_dev->weight = weight_p;
kaf24@5077 1842 for (;;) {
kaf24@5077 1843 struct sk_buff *skb;
kaf24@5077 1844 struct net_device *dev;
kaf24@5077 1845
kaf24@5077 1846 local_irq_disable();
kaf24@5077 1847 skb = __skb_dequeue(&queue->input_pkt_queue);
kaf24@5077 1848 if (!skb)
kaf24@5077 1849 goto job_done;
kaf24@5077 1850 local_irq_enable();
kaf24@5077 1851
kaf24@5077 1852 dev = skb->dev;
kaf24@5077 1853
kaf24@5077 1854 netif_receive_skb(skb);
kaf24@5077 1855
kaf24@5077 1856 dev_put(dev);
kaf24@5077 1857
kaf24@5077 1858 work++;
kaf24@5077 1859
kaf24@5077 1860 if (work >= quota || jiffies - start_time > 1)
kaf24@5077 1861 break;
kaf24@5077 1862
kaf24@5077 1863 }
kaf24@5077 1864
kaf24@5077 1865 backlog_dev->quota -= work;
kaf24@5077 1866 *budget -= work;
kaf24@5077 1867 return -1;
kaf24@5077 1868
kaf24@5077 1869 job_done:
kaf24@5077 1870 backlog_dev->quota -= work;
kaf24@5077 1871 *budget -= work;
kaf24@5077 1872
kaf24@5077 1873 list_del(&backlog_dev->poll_list);
kaf24@5077 1874 smp_mb__before_clear_bit();
kaf24@5077 1875 netif_poll_enable(backlog_dev);
kaf24@5077 1876
kaf24@5077 1877 local_irq_enable();
kaf24@5077 1878 return 0;
kaf24@5077 1879 }
kaf24@5077 1880
kaf24@5077 1881 static void net_rx_action(struct softirq_action *h)
kaf24@5077 1882 {
kaf24@5077 1883 struct softnet_data *queue = &__get_cpu_var(softnet_data);
kaf24@5077 1884 unsigned long start_time = jiffies;
cl349@8718 1885 int budget = netdev_budget;
cl349@8718 1886 void *have;
cl349@8718 1887
kaf24@5077 1888 local_irq_disable();
kaf24@5077 1889
kaf24@5077 1890 while (!list_empty(&queue->poll_list)) {
kaf24@5077 1891 struct net_device *dev;
kaf24@5077 1892
kaf24@5077 1893 if (budget <= 0 || jiffies - start_time > 1)
kaf24@5077 1894 goto softnet_break;
kaf24@5077 1895
kaf24@5077 1896 local_irq_enable();
kaf24@5077 1897
kaf24@5077 1898 dev = list_entry(queue->poll_list.next,
kaf24@5077 1899 struct net_device, poll_list);
cl349@8718 1900 have = netpoll_poll_lock(dev);
kaf24@5077 1901
kaf24@5077 1902 if (dev->quota <= 0 || dev->poll(dev, &budget)) {
cl349@8718 1903 netpoll_poll_unlock(have);
kaf24@5077 1904 local_irq_disable();
kaf24@5077 1905 list_del(&dev->poll_list);
kaf24@5077 1906 list_add_tail(&dev->poll_list, &queue->poll_list);
kaf24@5077 1907 if (dev->quota < 0)
kaf24@5077 1908 dev->quota += dev->weight;
kaf24@5077 1909 else
kaf24@5077 1910 dev->quota = dev->weight;
kaf24@5077 1911 } else {
cl349@8718 1912 netpoll_poll_unlock(have);
kaf24@5077 1913 dev_put(dev);
kaf24@5077 1914 local_irq_disable();
kaf24@5077 1915 }
kaf24@5077 1916 }
kaf24@5077 1917 out:
kaf24@5077 1918 local_irq_enable();
kaf24@5077 1919 return;
kaf24@5077 1920
kaf24@5077 1921 softnet_break:
kaf24@5077 1922 __get_cpu_var(netdev_rx_stat).time_squeeze++;
kaf24@5077 1923 __raise_softirq_irqoff(NET_RX_SOFTIRQ);
kaf24@5077 1924 goto out;
kaf24@5077 1925 }
kaf24@5077 1926
kaf24@5077 1927 static gifconf_func_t * gifconf_list [NPROTO];
kaf24@5077 1928
kaf24@5077 1929 /**
kaf24@5077 1930 * register_gifconf - register a SIOCGIF handler
kaf24@5077 1931 * @family: Address family
kaf24@5077 1932 * @gifconf: Function handler
kaf24@5077 1933 *
kaf24@5077 1934 * Register protocol dependent address dumping routines. The handler
kaf24@5077 1935 * that is passed must not be freed or reused until it has been replaced
kaf24@5077 1936 * by another handler.
kaf24@5077 1937 */
kaf24@5077 1938 int register_gifconf(unsigned int family, gifconf_func_t * gifconf)
kaf24@5077 1939 {
kaf24@5077 1940 if (family >= NPROTO)
kaf24@5077 1941 return -EINVAL;
kaf24@5077 1942 gifconf_list[family] = gifconf;
kaf24@5077 1943 return 0;
kaf24@5077 1944 }
kaf24@5077 1945
kaf24@5077 1946
kaf24@5077 1947 /*
kaf24@5077 1948 * Map an interface index to its name (SIOCGIFNAME)
kaf24@5077 1949 */
kaf24@5077 1950
kaf24@5077 1951 /*
kaf24@5077 1952 * We need this ioctl for efficient implementation of the
kaf24@5077 1953 * if_indextoname() function required by the IPv6 API. Without
kaf24@5077 1954 * it, we would have to search all the interfaces to find a
kaf24@5077 1955 * match. --pb
kaf24@5077 1956 */
kaf24@5077 1957
kaf24@5077 1958 static int dev_ifname(struct ifreq __user *arg)
kaf24@5077 1959 {
kaf24@5077 1960 struct net_device *dev;
kaf24@5077 1961 struct ifreq ifr;
kaf24@5077 1962
kaf24@5077 1963 /*
kaf24@5077 1964 * Fetch the caller's info block.
kaf24@5077 1965 */
kaf24@5077 1966
kaf24@5077 1967 if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
kaf24@5077 1968 return -EFAULT;
kaf24@5077 1969
kaf24@5077 1970 read_lock(&dev_base_lock);
kaf24@5077 1971 dev = __dev_get_by_index(ifr.ifr_ifindex);
kaf24@5077 1972 if (!dev) {
kaf24@5077 1973 read_unlock(&dev_base_lock);
kaf24@5077 1974 return -ENODEV;
kaf24@5077 1975 }
kaf24@5077 1976
kaf24@5077 1977 strcpy(ifr.ifr_name, dev->name);
kaf24@5077 1978 read_unlock(&dev_base_lock);
kaf24@5077 1979
kaf24@5077 1980 if (copy_to_user(arg, &ifr, sizeof(struct ifreq)))
kaf24@5077 1981 return -EFAULT;
kaf24@5077 1982 return 0;
kaf24@5077 1983 }
kaf24@5077 1984
kaf24@5077 1985 /*
kaf24@5077 1986 * Perform a SIOCGIFCONF call. This structure will change
kaf24@5077 1987 * size eventually, and there is nothing I can do about it.
kaf24@5077 1988 * Thus we will need a 'compatibility mode'.
kaf24@5077 1989 */
kaf24@5077 1990
kaf24@5077 1991 static int dev_ifconf(char __user *arg)
kaf24@5077 1992 {
kaf24@5077 1993 struct ifconf ifc;
kaf24@5077 1994 struct net_device *dev;
kaf24@5077 1995 char __user *pos;
kaf24@5077 1996 int len;
kaf24@5077 1997 int total;
kaf24@5077 1998 int i;
kaf24@5077 1999
kaf24@5077 2000 /*
kaf24@5077 2001 * Fetch the caller's info block.
kaf24@5077 2002 */
kaf24@5077 2003
kaf24@5077 2004 if (copy_from_user(&ifc, arg, sizeof(struct ifconf)))
kaf24@5077 2005 return -EFAULT;
kaf24@5077 2006
kaf24@5077 2007 pos = ifc.ifc_buf;
kaf24@5077 2008 len = ifc.ifc_len;
kaf24@5077 2009
kaf24@5077 2010 /*
kaf24@5077 2011 * Loop over the interfaces, and write an info block for each.
kaf24@5077 2012 */
kaf24@5077 2013
kaf24@5077 2014 total = 0;
kaf24@5077 2015 for (dev = dev_base; dev; dev = dev->next) {
kaf24@5077 2016 for (i = 0; i < NPROTO; i++) {
kaf24@5077 2017 if (gifconf_list[i]) {
kaf24@5077 2018 int done;
kaf24@5077 2019 if (!pos)
kaf24@5077 2020 done = gifconf_list[i](dev, NULL, 0);
kaf24@5077 2021 else
kaf24@5077 2022 done = gifconf_list[i](dev, pos + total,
kaf24@5077 2023 len - total);
kaf24@5077 2024 if (done < 0)
kaf24@5077 2025 return -EFAULT;
kaf24@5077 2026 total += done;
kaf24@5077 2027 }
kaf24@5077 2028 }
kaf24@5077 2029 }
kaf24@5077 2030
kaf24@5077 2031 /*
kaf24@5077 2032 * All done. Write the updated control block back to the caller.
kaf24@5077 2033 */
kaf24@5077 2034 ifc.ifc_len = total;
kaf24@5077 2035
kaf24@5077 2036 /*
kaf24@5077 2037 * Both BSD and Solaris return 0 here, so we do too.
kaf24@5077 2038 */
kaf24@5077 2039 return copy_to_user(arg, &ifc, sizeof(struct ifconf)) ? -EFAULT : 0;
kaf24@5077 2040 }
kaf24@5077 2041
kaf24@5077 2042 #ifdef CONFIG_PROC_FS
kaf24@5077 2043 /*
kaf24@5077 2044 * This is invoked by the /proc filesystem handler to display a device
kaf24@5077 2045 * in detail.
kaf24@5077 2046 */
kaf24@5077 2047 static __inline__ struct net_device *dev_get_idx(loff_t pos)
kaf24@5077 2048 {
kaf24@5077 2049 struct net_device *dev;
kaf24@5077 2050 loff_t i;
kaf24@5077 2051
kaf24@5077 2052 for (i = 0, dev = dev_base; dev && i < pos; ++i, dev = dev->next);
kaf24@5077 2053
kaf24@5077 2054 return i == pos ? dev : NULL;
kaf24@5077 2055 }
kaf24@5077 2056
kaf24@5077 2057 void *dev_seq_start(struct seq_file *seq, loff_t *pos)
kaf24@5077 2058 {
kaf24@5077 2059 read_lock(&dev_base_lock);
kaf24@5077 2060 return *pos ? dev_get_idx(*pos - 1) : SEQ_START_TOKEN;
kaf24@5077 2061 }
kaf24@5077 2062
kaf24@5077 2063 void *dev_seq_next(struct seq_file *seq, void *v, loff_t *pos)
kaf24@5077 2064 {
kaf24@5077 2065 ++*pos;
kaf24@5077 2066 return v == SEQ_START_TOKEN ? dev_base : ((struct net_device *)v)->next;
kaf24@5077 2067 }
kaf24@5077 2068
kaf24@5077 2069 void dev_seq_stop(struct seq_file *seq, void *v)
kaf24@5077 2070 {
kaf24@5077 2071 read_unlock(&dev_base_lock);
kaf24@5077 2072 }
kaf24@5077 2073
kaf24@5077 2074 static void dev_seq_printf_stats(struct seq_file *seq, struct net_device *dev)
kaf24@5077 2075 {
kaf24@5077 2076 if (dev->get_stats) {
kaf24@5077 2077 struct net_device_stats *stats = dev->get_stats(dev);
kaf24@5077 2078
kaf24@5077 2079 seq_printf(seq, "%6s:%8lu %7lu %4lu %4lu %4lu %5lu %10lu %9lu "
kaf24@5077 2080 "%8lu %7lu %4lu %4lu %4lu %5lu %7lu %10lu\n",
kaf24@5077 2081 dev->name, stats->rx_bytes, stats->rx_packets,
kaf24@5077 2082 stats->rx_errors,
kaf24@5077 2083 stats->rx_dropped + stats->rx_missed_errors,
kaf24@5077 2084 stats->rx_fifo_errors,
kaf24@5077 2085 stats->rx_length_errors + stats->rx_over_errors +
kaf24@5077 2086 stats->rx_crc_errors + stats->rx_frame_errors,
kaf24@5077 2087 stats->rx_compressed, stats->multicast,
kaf24@5077 2088 stats->tx_bytes, stats->tx_packets,
kaf24@5077 2089 stats->tx_errors, stats->tx_dropped,
kaf24@5077 2090 stats->tx_fifo_errors, stats->collisions,
kaf24@5077 2091 stats->tx_carrier_errors +
kaf24@5077 2092 stats->tx_aborted_errors +
kaf24@5077 2093 stats->tx_window_errors +
kaf24@5077 2094 stats->tx_heartbeat_errors,
kaf24@5077 2095 stats->tx_compressed);
kaf24@5077 2096 } else
kaf24@5077 2097 seq_printf(seq, "%6s: No statistics available.\n", dev->name);
kaf24@5077 2098 }
kaf24@5077 2099
kaf24@5077 2100 /*
kaf24@5077 2101 * Called from the PROCfs module. This now uses the new arbitrary sized
kaf24@5077 2102 * /proc/net interface to create /proc/net/dev
kaf24@5077 2103 */
kaf24@5077 2104 static int dev_seq_show(struct seq_file *seq, void *v)
kaf24@5077 2105 {
kaf24@5077 2106 if (v == SEQ_START_TOKEN)
kaf24@5077 2107 seq_puts(seq, "Inter-| Receive "
kaf24@5077 2108 " | Transmit\n"
kaf24@5077 2109 " face |bytes packets errs drop fifo frame "
kaf24@5077 2110 "compressed multicast|bytes packets errs "
kaf24@5077 2111 "drop fifo colls carrier compressed\n");
kaf24@5077 2112 else
kaf24@5077 2113 dev_seq_printf_stats(seq, v);
kaf24@5077 2114 return 0;
kaf24@5077 2115 }
kaf24@5077 2116
kaf24@5077 2117 static struct netif_rx_stats *softnet_get_online(loff_t *pos)
kaf24@5077 2118 {
kaf24@5077 2119 struct netif_rx_stats *rc = NULL;
kaf24@5077 2120
kaf24@5077 2121 while (*pos < NR_CPUS)
kaf24@5077 2122 if (cpu_online(*pos)) {
kaf24@5077 2123 rc = &per_cpu(netdev_rx_stat, *pos);
kaf24@5077 2124 break;
kaf24@5077 2125 } else
kaf24@5077 2126 ++*pos;
kaf24@5077 2127 return rc;
kaf24@5077 2128 }
kaf24@5077 2129
kaf24@5077 2130 static void *softnet_seq_start(struct seq_file *seq, loff_t *pos)
kaf24@5077 2131 {
kaf24@5077 2132 return softnet_get_online(pos);
kaf24@5077 2133 }
kaf24@5077 2134
kaf24@5077 2135 static void *softnet_seq_next(struct seq_file *seq, void *v, loff_t *pos)
kaf24@5077 2136 {
kaf24@5077 2137 ++*pos;
kaf24@5077 2138 return softnet_get_online(pos);
kaf24@5077 2139 }
kaf24@5077 2140
kaf24@5077 2141 static void softnet_seq_stop(struct seq_file *seq, void *v)
kaf24@5077 2142 {
kaf24@5077 2143 }
kaf24@5077 2144
kaf24@5077 2145 static int softnet_seq_show(struct seq_file *seq, void *v)
kaf24@5077 2146 {
kaf24@5077 2147 struct netif_rx_stats *s = v;
kaf24@5077 2148
kaf24@5077 2149 seq_printf(seq, "%08x %08x %08x %08x %08x %08x %08x %08x %08x\n",
cl349@8718 2150 s->total, s->dropped, s->time_squeeze, 0,
cl349@8718 2151 0, 0, 0, 0, /* was fastroute */
cl349@8718 2152 s->cpu_collision );
kaf24@5077 2153 return 0;
kaf24@5077 2154 }
kaf24@5077 2155
kaf24@5077 2156 static struct seq_operations dev_seq_ops = {
kaf24@5077 2157 .start = dev_seq_start,
kaf24@5077 2158 .next = dev_seq_next,
kaf24@5077 2159 .stop = dev_seq_stop,
kaf24@5077 2160 .show = dev_seq_show,
kaf24@5077 2161 };
kaf24@5077 2162
kaf24@5077 2163 static int dev_seq_open(struct inode *inode, struct file *file)
kaf24@5077 2164 {
kaf24@5077 2165 return seq_open(file, &dev_seq_ops);
kaf24@5077 2166 }
kaf24@5077 2167
kaf24@5077 2168 static struct file_operations dev_seq_fops = {
kaf24@5077 2169 .owner = THIS_MODULE,
kaf24@5077 2170 .open = dev_seq_open,
kaf24@5077 2171 .read = seq_read,
kaf24@5077 2172 .llseek = seq_lseek,
kaf24@5077 2173 .release = seq_release,
kaf24@5077 2174 };
kaf24@5077 2175
kaf24@5077 2176 static struct seq_operations softnet_seq_ops = {
kaf24@5077 2177 .start = softnet_seq_start,
kaf24@5077 2178 .next = softnet_seq_next,
kaf24@5077 2179 .stop = softnet_seq_stop,
kaf24@5077 2180 .show = softnet_seq_show,
kaf24@5077 2181 };
kaf24@5077 2182
kaf24@5077 2183 static int softnet_seq_open(struct inode *inode, struct file *file)
kaf24@5077 2184 {
kaf24@5077 2185 return seq_open(file, &softnet_seq_ops);
kaf24@5077 2186 }
kaf24@5077 2187
kaf24@5077 2188 static struct file_operations softnet_seq_fops = {
kaf24@5077 2189 .owner = THIS_MODULE,
kaf24@5077 2190 .open = softnet_seq_open,
kaf24@5077 2191 .read = seq_read,
kaf24@5077 2192 .llseek = seq_lseek,
kaf24@5077 2193 .release = seq_release,
kaf24@5077 2194 };
kaf24@5077 2195
kaf24@5077 2196 #ifdef WIRELESS_EXT
kaf24@5077 2197 extern int wireless_proc_init(void);
kaf24@5077 2198 #else
kaf24@5077 2199 #define wireless_proc_init() 0
kaf24@5077 2200 #endif
kaf24@5077 2201
kaf24@5077 2202 static int __init dev_proc_init(void)
kaf24@5077 2203 {
kaf24@5077 2204 int rc = -ENOMEM;
kaf24@5077 2205
kaf24@5077 2206 if (!proc_net_fops_create("dev", S_IRUGO, &dev_seq_fops))
kaf24@5077 2207 goto out;
kaf24@5077 2208 if (!proc_net_fops_create("softnet_stat", S_IRUGO, &softnet_seq_fops))
kaf24@5077 2209 goto out_dev;
kaf24@5077 2210 if (wireless_proc_init())
kaf24@5077 2211 goto out_softnet;
kaf24@5077 2212 rc = 0;
kaf24@5077 2213 out:
kaf24@5077 2214 return rc;
kaf24@5077 2215 out_softnet:
kaf24@5077 2216 proc_net_remove("softnet_stat");
kaf24@5077 2217 out_dev:
kaf24@5077 2218 proc_net_remove("dev");
kaf24@5077 2219 goto out;
kaf24@5077 2220 }
kaf24@5077 2221 #else
kaf24@5077 2222 #define dev_proc_init() 0
kaf24@5077 2223 #endif /* CONFIG_PROC_FS */
kaf24@5077 2224
kaf24@5077 2225
kaf24@5077 2226 /**
kaf24@5077 2227 * netdev_set_master - set up master/slave pair
kaf24@5077 2228 * @slave: slave device
kaf24@5077 2229 * @master: new master device
kaf24@5077 2230 *
kaf24@5077 2231 * Changes the master device of the slave. Pass %NULL to break the
kaf24@5077 2232 * bonding. The caller must hold the RTNL semaphore. On a failure
kaf24@5077 2233 * a negative errno code is returned. On success the reference counts
kaf24@5077 2234 * are adjusted, %RTM_NEWLINK is sent to the routing socket and the
kaf24@5077 2235 * function returns zero.
kaf24@5077 2236 */
kaf24@5077 2237 int netdev_set_master(struct net_device *slave, struct net_device *master)
kaf24@5077 2238 {
kaf24@5077 2239 struct net_device *old = slave->master;
kaf24@5077 2240
kaf24@5077 2241 ASSERT_RTNL();
kaf24@5077 2242
kaf24@5077 2243 if (master) {
kaf24@5077 2244 if (old)
kaf24@5077 2245 return -EBUSY;
kaf24@5077 2246 dev_hold(master);
kaf24@5077 2247 }
kaf24@5077 2248
kaf24@5077 2249 slave->master = master;
kaf24@5077 2250
kaf24@5077 2251 synchronize_net();
kaf24@5077 2252
kaf24@5077 2253 if (old)
kaf24@5077 2254 dev_put(old);
kaf24@5077 2255
kaf24@5077 2256 if (master)
kaf24@5077 2257 slave->flags |= IFF_SLAVE;
kaf24@5077 2258 else
kaf24@5077 2259 slave->flags &= ~IFF_SLAVE;
kaf24@5077 2260
kaf24@5077 2261 rtmsg_ifinfo(RTM_NEWLINK, slave, IFF_SLAVE);
kaf24@5077 2262 return 0;
kaf24@5077 2263 }
kaf24@5077 2264
kaf24@5077 2265 /**
kaf24@5077 2266 * dev_set_promiscuity - update promiscuity count on a device
kaf24@5077 2267 * @dev: device
kaf24@5077 2268 * @inc: modifier
kaf24@5077 2269 *
kaf24@5077 2270 * Add or remove promsicuity from a device. While the count in the device
kaf24@5077 2271 * remains above zero the interface remains promiscuous. Once it hits zero
kaf24@5077 2272 * the device reverts back to normal filtering operation. A negative inc
kaf24@5077 2273 * value is used to drop promiscuity on the device.
kaf24@5077 2274 */
kaf24@5077 2275 void dev_set_promiscuity(struct net_device *dev, int inc)
kaf24@5077 2276 {
kaf24@5077 2277 unsigned short old_flags = dev->flags;
kaf24@5077 2278
kaf24@5077 2279 if ((dev->promiscuity += inc) == 0)
kaf24@5077 2280 dev->flags &= ~IFF_PROMISC;
cl349@8718 2281 else
cl349@8718 2282 dev->flags |= IFF_PROMISC;
cl349@8718 2283 if (dev->flags != old_flags) {
kaf24@5077 2284 dev_mc_upload(dev);
kaf24@5077 2285 printk(KERN_INFO "device %s %s promiscuous mode\n",
kaf24@5077 2286 dev->name, (dev->flags & IFF_PROMISC) ? "entered" :
kaf24@5077 2287 "left");
kaf24@5077 2288 }
kaf24@5077 2289 }
kaf24@5077 2290
kaf24@5077 2291 /**
kaf24@5077 2292 * dev_set_allmulti - update allmulti count on a device
kaf24@5077 2293 * @dev: device
kaf24@5077 2294 * @inc: modifier
kaf24@5077 2295 *
kaf24@5077 2296 * Add or remove reception of all multicast frames to a device. While the
kaf24@5077 2297 * count in the device remains above zero the interface remains listening
kaf24@5077 2298 * to all interfaces. Once it hits zero the device reverts back to normal
kaf24@5077 2299 * filtering operation. A negative @inc value is used to drop the counter
kaf24@5077 2300 * when releasing a resource needing all multicasts.
kaf24@5077 2301 */
kaf24@5077 2302
kaf24@5077 2303 void dev_set_allmulti(struct net_device *dev, int inc)
kaf24@5077 2304 {
kaf24@5077 2305 unsigned short old_flags = dev->flags;
kaf24@5077 2306
kaf24@5077 2307 dev->flags |= IFF_ALLMULTI;
kaf24@5077 2308 if ((dev->allmulti += inc) == 0)
kaf24@5077 2309 dev->flags &= ~IFF_ALLMULTI;
kaf24@5077 2310 if (dev->flags ^ old_flags)
kaf24@5077 2311 dev_mc_upload(dev);
kaf24@5077 2312 }
kaf24@5077 2313
kaf24@5077 2314 unsigned dev_get_flags(const struct net_device *dev)
kaf24@5077 2315 {
kaf24@5077 2316 unsigned flags;
kaf24@5077 2317
kaf24@5077 2318 flags = (dev->flags & ~(IFF_PROMISC |
kaf24@5077 2319 IFF_ALLMULTI |
kaf24@5077 2320 IFF_RUNNING)) |
kaf24@5077 2321 (dev->gflags & (IFF_PROMISC |
kaf24@5077 2322 IFF_ALLMULTI));
kaf24@5077 2323
kaf24@5077 2324 if (netif_running(dev) && netif_carrier_ok(dev))
kaf24@5077 2325 flags |= IFF_RUNNING;
kaf24@5077 2326
kaf24@5077 2327 return flags;
kaf24@5077 2328 }
kaf24@5077 2329
kaf24@5077 2330 int dev_change_flags(struct net_device *dev, unsigned flags)
kaf24@5077 2331 {
kaf24@5077 2332 int ret;
kaf24@5077 2333 int old_flags = dev->flags;
kaf24@5077 2334
kaf24@5077 2335 /*
kaf24@5077 2336 * Set the flags on our device.
kaf24@5077 2337 */
kaf24@5077 2338
kaf24@5077 2339 dev->flags = (flags & (IFF_DEBUG | IFF_NOTRAILERS | IFF_NOARP |
kaf24@5077 2340 IFF_DYNAMIC | IFF_MULTICAST | IFF_PORTSEL |
kaf24@5077 2341 IFF_AUTOMEDIA)) |
kaf24@5077 2342 (dev->flags & (IFF_UP | IFF_VOLATILE | IFF_PROMISC |
kaf24@5077 2343 IFF_ALLMULTI));
kaf24@5077 2344
kaf24@5077 2345 /*
kaf24@5077 2346 * Load in the correct multicast list now the flags have changed.
kaf24@5077 2347 */
kaf24@5077 2348
kaf24@5077 2349 dev_mc_upload(dev);
kaf24@5077 2350
kaf24@5077 2351 /*
kaf24@5077 2352 * Have we downed the interface. We handle IFF_UP ourselves
kaf24@5077 2353 * according to user attempts to set it, rather than blindly
kaf24@5077 2354 * setting it.
kaf24@5077 2355 */
kaf24@5077 2356
kaf24@5077 2357 ret = 0;
kaf24@5077 2358 if ((old_flags ^ flags) & IFF_UP) { /* Bit is different ? */
kaf24@5077 2359 ret = ((old_flags & IFF_UP) ? dev_close : dev_open)(dev);
kaf24@5077 2360
kaf24@5077 2361 if (!ret)
kaf24@5077 2362 dev_mc_upload(dev);
kaf24@5077 2363 }
kaf24@5077 2364
kaf24@5077 2365 if (dev->flags & IFF_UP &&
kaf24@5077 2366 ((old_flags ^ dev->flags) &~ (IFF_UP | IFF_PROMISC | IFF_ALLMULTI |
kaf24@5077 2367 IFF_VOLATILE)))
kaf24@5077 2368 notifier_call_chain(&netdev_chain, NETDEV_CHANGE, dev);
kaf24@5077 2369
kaf24@5077 2370 if ((flags ^ dev->gflags) & IFF_PROMISC) {
kaf24@5077 2371 int inc = (flags & IFF_PROMISC) ? +1 : -1;
kaf24@5077 2372 dev->gflags ^= IFF_PROMISC;
kaf24@5077 2373 dev_set_promiscuity(dev, inc);
kaf24@5077 2374 }
kaf24@5077 2375
kaf24@5077 2376 /* NOTE: order of synchronization of IFF_PROMISC and IFF_ALLMULTI
kaf24@5077 2377 is important. Some (broken) drivers set IFF_PROMISC, when
kaf24@5077 2378 IFF_ALLMULTI is requested not asking us and not reporting.
kaf24@5077 2379 */
kaf24@5077 2380 if ((flags ^ dev->gflags) & IFF_ALLMULTI) {
kaf24@5077 2381 int inc = (flags & IFF_ALLMULTI) ? +1 : -1;
kaf24@5077 2382 dev->gflags ^= IFF_ALLMULTI;
kaf24@5077 2383 dev_set_allmulti(dev, inc);
kaf24@5077 2384 }
kaf24@5077 2385
kaf24@5077 2386 if (old_flags ^ dev->flags)
kaf24@5077 2387 rtmsg_ifinfo(RTM_NEWLINK, dev, old_flags ^ dev->flags);
kaf24@5077 2388
kaf24@5077 2389 return ret;
kaf24@5077 2390 }
kaf24@5077 2391
kaf24@5077 2392 int dev_set_mtu(struct net_device *dev, int new_mtu)
kaf24@5077 2393 {
kaf24@5077 2394 int err;
kaf24@5077 2395
kaf24@5077 2396 if (new_mtu == dev->mtu)
kaf24@5077 2397 return 0;
kaf24@5077 2398
kaf24@5077 2399 /* MTU must be positive. */
kaf24@5077 2400 if (new_mtu < 0)
kaf24@5077 2401 return -EINVAL;
kaf24@5077 2402
kaf24@5077 2403 if (!netif_device_present(dev))
kaf24@5077 2404 return -ENODEV;
kaf24@5077 2405
kaf24@5077 2406 err = 0;
kaf24@5077 2407 if (dev->change_mtu)
kaf24@5077 2408 err = dev->change_mtu(dev, new_mtu);
kaf24@5077 2409 else
kaf24@5077 2410 dev->mtu = new_mtu;
kaf24@5077 2411 if (!err && dev->flags & IFF_UP)
kaf24@5077 2412 notifier_call_chain(&netdev_chain,
kaf24@5077 2413 NETDEV_CHANGEMTU, dev);
kaf24@5077 2414 return err;
kaf24@5077 2415 }
kaf24@5077 2416
vh249@5730 2417 int dev_set_mac_address(struct net_device *dev, struct sockaddr *sa)
vh249@5730 2418 {
vh249@5730 2419 int err;
vh249@5730 2420
vh249@5730 2421 if (!dev->set_mac_address)
vh249@5730 2422 return -EOPNOTSUPP;
vh249@5730 2423 if (sa->sa_family != dev->type)
vh249@5730 2424 return -EINVAL;
vh249@5730 2425 if (!netif_device_present(dev))
vh249@5730 2426 return -ENODEV;
vh249@5730 2427 err = dev->set_mac_address(dev, sa);
vh249@5730 2428 if (!err)
vh249@5730 2429 notifier_call_chain(&netdev_chain, NETDEV_CHANGEADDR, dev);
vh249@5730 2430 return err;
vh249@5730 2431 }
kaf24@5077 2432
kaf24@5077 2433 /*
kaf24@5077 2434 * Perform the SIOCxIFxxx calls.
kaf24@5077 2435 */
kaf24@5077 2436 static int dev_ifsioc(struct ifreq *ifr, unsigned int cmd)
kaf24@5077 2437 {
kaf24@5077 2438 int err;
kaf24@5077 2439 struct net_device *dev = __dev_get_by_name(ifr->ifr_name);
kaf24@5077 2440
kaf24@5077 2441 if (!dev)
kaf24@5077 2442 return -ENODEV;
kaf24@5077 2443
kaf24@5077 2444 switch (cmd) {
kaf24@5077 2445 case SIOCGIFFLAGS: /* Get interface flags */
kaf24@5077 2446 ifr->ifr_flags = dev_get_flags(dev);
kaf24@5077 2447 return 0;
kaf24@5077 2448
kaf24@5077 2449 case SIOCSIFFLAGS: /* Set interface flags */
kaf24@5077 2450 return dev_change_flags(dev, ifr->ifr_flags);
kaf24@5077 2451
kaf24@5077 2452 case SIOCGIFMETRIC: /* Get the metric on the interface
kaf24@5077 2453 (currently unused) */
kaf24@5077 2454 ifr->ifr_metric = 0;
kaf24@5077 2455 return 0;
kaf24@5077 2456
kaf24@5077 2457 case SIOCSIFMETRIC: /* Set the metric on the interface
kaf24@5077 2458 (currently unused) */
kaf24@5077 2459 return -EOPNOTSUPP;
kaf24@5077 2460
kaf24@5077 2461 case SIOCGIFMTU: /* Get the MTU of a device */
kaf24@5077 2462 ifr->ifr_mtu = dev->mtu;
kaf24@5077 2463 return 0;
kaf24@5077 2464
kaf24@5077 2465 case SIOCSIFMTU: /* Set the MTU of a device */
kaf24@5077 2466 return dev_set_mtu(dev, ifr->ifr_mtu);
kaf24@5077 2467
kaf24@5077 2468 case SIOCGIFHWADDR:
kaf24@5077 2469 if (!dev->addr_len)
kaf24@5077 2470 memset(ifr->ifr_hwaddr.sa_data, 0, sizeof ifr->ifr_hwaddr.sa_data);
kaf24@5077 2471 else
kaf24@5077 2472 memcpy(ifr->ifr_hwaddr.sa_data, dev->dev_addr,
kaf24@5077 2473 min(sizeof ifr->ifr_hwaddr.sa_data, (size_t) dev->addr_len));
kaf24@5077 2474 ifr->ifr_hwaddr.sa_family = dev->type;
kaf24@5077 2475 return 0;
kaf24@5077 2476
kaf24@5077 2477 case SIOCSIFHWADDR:
vh249@5730 2478 return dev_set_mac_address(dev, &ifr->ifr_hwaddr);
kaf24@5077 2479
kaf24@5077 2480 case SIOCSIFHWBROADCAST:
kaf24@5077 2481 if (ifr->ifr_hwaddr.sa_family != dev->type)
kaf24@5077 2482 return -EINVAL;
kaf24@5077 2483 memcpy(dev->broadcast, ifr->ifr_hwaddr.sa_data,
kaf24@5077 2484 min(sizeof ifr->ifr_hwaddr.sa_data, (size_t) dev->addr_len));
kaf24@5077 2485 notifier_call_chain(&netdev_chain,
kaf24@5077 2486 NETDEV_CHANGEADDR, dev);
kaf24@5077 2487 return 0;
kaf24@5077 2488
kaf24@5077 2489 case SIOCGIFMAP:
kaf24@5077 2490 ifr->ifr_map.mem_start = dev->mem_start;
kaf24@5077 2491 ifr->ifr_map.mem_end = dev->mem_end;
kaf24@5077 2492 ifr->ifr_map.base_addr = dev->base_addr;
kaf24@5077 2493 ifr->ifr_map.irq = dev->irq;
kaf24@5077 2494 ifr->ifr_map.dma = dev->dma;
kaf24@5077 2495 ifr->ifr_map.port = dev->if_port;
kaf24@5077 2496 return 0;
kaf24@5077 2497
kaf24@5077 2498 case SIOCSIFMAP:
kaf24@5077 2499 if (dev->set_config) {
kaf24@5077 2500 if (!netif_device_present(dev))
kaf24@5077 2501 return -ENODEV;
kaf24@5077 2502 return dev->set_config(dev, &ifr->ifr_map);
kaf24@5077 2503 }
kaf24@5077 2504 return -EOPNOTSUPP;
kaf24@5077 2505
kaf24@5077 2506 case SIOCADDMULTI:
kaf24@5077 2507 if (!dev->set_multicast_list ||
kaf24@5077 2508 ifr->ifr_hwaddr.sa_family != AF_UNSPEC)
kaf24@5077 2509 return -EINVAL;
kaf24@5077 2510 if (!netif_device_present(dev))
kaf24@5077 2511 return -ENODEV;
kaf24@5077 2512 return dev_mc_add(dev, ifr->ifr_hwaddr.sa_data,
kaf24@5077 2513 dev->addr_len, 1);
kaf24@5077 2514
kaf24@5077 2515 case SIOCDELMULTI:
kaf24@5077 2516 if (!dev->set_multicast_list ||
kaf24@5077 2517 ifr->ifr_hwaddr.sa_family != AF_UNSPEC)
kaf24@5077 2518 return -EINVAL;
kaf24@5077 2519 if (!netif_device_present(dev))
kaf24@5077 2520 return -ENODEV;
kaf24@5077 2521 return dev_mc_delete(dev, ifr->ifr_hwaddr.sa_data,
kaf24@5077 2522 dev->addr_len, 1);
kaf24@5077 2523
kaf24@5077 2524 case SIOCGIFINDEX:
kaf24@5077 2525 ifr->ifr_ifindex = dev->ifindex;
kaf24@5077 2526 return 0;
kaf24@5077 2527
kaf24@5077 2528 case SIOCGIFTXQLEN:
kaf24@5077 2529 ifr->ifr_qlen = dev->tx_queue_len;
kaf24@5077 2530 return 0;
kaf24@5077 2531
kaf24@5077 2532 case SIOCSIFTXQLEN:
kaf24@5077 2533 if (ifr->ifr_qlen < 0)
kaf24@5077 2534 return -EINVAL;
kaf24@5077 2535 dev->tx_queue_len = ifr->ifr_qlen;
kaf24@5077 2536 return 0;
kaf24@5077 2537
kaf24@5077 2538 case SIOCSIFNAME:
kaf24@5077 2539 ifr->ifr_newname[IFNAMSIZ-1] = '\0';
kaf24@5077 2540 return dev_change_name(dev, ifr->ifr_newname);
kaf24@5077 2541
kaf24@5077 2542 /*
kaf24@5077 2543 * Unknown or private ioctl
kaf24@5077 2544 */
kaf24@5077 2545
kaf24@5077 2546 default:
kaf24@5077 2547 if ((cmd >= SIOCDEVPRIVATE &&
kaf24@5077 2548 cmd <= SIOCDEVPRIVATE + 15) ||
kaf24@5077 2549 cmd == SIOCBONDENSLAVE ||
kaf24@5077 2550 cmd == SIOCBONDRELEASE ||
kaf24@5077 2551 cmd == SIOCBONDSETHWADDR ||
kaf24@5077 2552 cmd == SIOCBONDSLAVEINFOQUERY ||
kaf24@5077 2553 cmd == SIOCBONDINFOQUERY ||
kaf24@5077 2554 cmd == SIOCBONDCHANGEACTIVE ||
kaf24@5077 2555 cmd == SIOCGMIIPHY ||
kaf24@5077 2556 cmd == SIOCGMIIREG ||
kaf24@5077 2557 cmd == SIOCSMIIREG ||
kaf24@5077 2558 cmd == SIOCBRADDIF ||
kaf24@5077 2559 cmd == SIOCBRDELIF ||
kaf24@5077 2560 cmd == SIOCWANDEV) {
kaf24@5077 2561 err = -EOPNOTSUPP;
kaf24@5077 2562 if (dev->do_ioctl) {
kaf24@5077 2563 if (netif_device_present(dev))
kaf24@5077 2564 err = dev->do_ioctl(dev, ifr,
kaf24@5077 2565 cmd);
kaf24@5077 2566 else
kaf24@5077 2567 err = -ENODEV;
kaf24@5077 2568 }
kaf24@5077 2569 } else
kaf24@5077 2570 err = -EINVAL;
kaf24@5077 2571
kaf24@5077 2572 }
kaf24@5077 2573 return err;
kaf24@5077 2574 }
kaf24@5077 2575
kaf24@5077 2576 /*
kaf24@5077 2577 * This function handles all "interface"-type I/O control requests. The actual
kaf24@5077 2578 * 'doing' part of this is dev_ifsioc above.
kaf24@5077 2579 */
kaf24@5077 2580
kaf24@5077 2581 /**
kaf24@5077 2582 * dev_ioctl - network device ioctl
kaf24@5077 2583 * @cmd: command to issue
kaf24@5077 2584 * @arg: pointer to a struct ifreq in user space
kaf24@5077 2585 *
kaf24@5077 2586 * Issue ioctl functions to devices. This is normally called by the
kaf24@5077 2587 * user space syscall interfaces but can sometimes be useful for
kaf24@5077 2588 * other purposes. The return value is the return from the syscall if
kaf24@5077 2589 * positive or a negative errno code on error.
kaf24@5077 2590 */
kaf24@5077 2591
kaf24@5077 2592 int dev_ioctl(unsigned int cmd, void __user *arg)
kaf24@5077 2593 {
kaf24@5077 2594 struct ifreq ifr;
kaf24@5077 2595 int ret;
kaf24@5077 2596 char *colon;
kaf24@5077 2597
kaf24@5077 2598 /* One special case: SIOCGIFCONF takes ifconf argument
kaf24@5077 2599 and requires shared lock, because it sleeps writing
kaf24@5077 2600 to user space.
kaf24@5077 2601 */
kaf24@5077 2602
kaf24@5077 2603 if (cmd == SIOCGIFCONF) {
kaf24@5077 2604 rtnl_shlock();
kaf24@5077 2605 ret = dev_ifconf((char __user *) arg);
kaf24@5077 2606 rtnl_shunlock();
kaf24@5077 2607 return ret;
kaf24@5077 2608 }
kaf24@5077 2609 if (cmd == SIOCGIFNAME)
kaf24@5077 2610 return dev_ifname((struct ifreq __user *)arg);
kaf24@5077 2611
kaf24@5077 2612 if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
kaf24@5077 2613 return -EFAULT;
kaf24@5077 2614
kaf24@5077 2615 ifr.ifr_name[IFNAMSIZ-1] = 0;
kaf24@5077 2616
kaf24@5077 2617 colon = strchr(ifr.ifr_name, ':');
kaf24@5077 2618 if (colon)
kaf24@5077 2619 *colon = 0;
kaf24@5077 2620
kaf24@5077 2621 /*
kaf24@5077 2622 * See which interface the caller is talking about.
kaf24@5077 2623 */
kaf24@5077 2624
kaf24@5077 2625 switch (cmd) {
kaf24@5077 2626 /*
kaf24@5077 2627 * These ioctl calls:
kaf24@5077 2628 * - can be done by all.
kaf24@5077 2629 * - atomic and do not require locking.
kaf24@5077 2630 * - return a value
kaf24@5077 2631 */
kaf24@5077 2632 case SIOCGIFFLAGS:
kaf24@5077 2633 case SIOCGIFMETRIC:
kaf24@5077 2634 case SIOCGIFMTU:
kaf24@5077 2635 case SIOCGIFHWADDR:
kaf24@5077 2636 case SIOCGIFSLAVE:
kaf24@5077 2637 case SIOCGIFMAP:
kaf24@5077 2638 case SIOCGIFINDEX:
kaf24@5077 2639 case SIOCGIFTXQLEN:
kaf24@5077 2640 dev_load(ifr.ifr_name);
kaf24@5077 2641 read_lock(&dev_base_lock);
kaf24@5077 2642 ret = dev_ifsioc(&ifr, cmd);
kaf24@5077 2643 read_unlock(&dev_base_lock);
kaf24@5077 2644 if (!ret) {
kaf24@5077 2645 if (colon)
kaf24@5077 2646 *colon = ':';
kaf24@5077 2647 if (copy_to_user(arg, &ifr,
kaf24@5077 2648 sizeof(struct ifreq)))
kaf24@5077 2649 ret = -EFAULT;
kaf24@5077 2650 }
kaf24@5077 2651 return ret;
kaf24@5077 2652
kaf24@5077 2653 case SIOCETHTOOL:
kaf24@5077 2654 dev_load(ifr.ifr_name);
kaf24@5077 2655 rtnl_lock();
kaf24@5077 2656 ret = dev_ethtool(&ifr);
kaf24@5077 2657 rtnl_unlock();
kaf24@5077 2658 if (!ret) {
kaf24@5077 2659 if (colon)
kaf24@5077 2660 *colon = ':';
kaf24@5077 2661 if (copy_to_user(arg, &ifr,
kaf24@5077 2662 sizeof(struct ifreq)))
kaf24@5077 2663 ret = -EFAULT;
kaf24@5077 2664 }
kaf24@5077 2665 return ret;
kaf24@5077 2666
kaf24@5077 2667 /*
kaf24@5077 2668 * These ioctl calls:
kaf24@5077 2669 * - require superuser power.
kaf24@5077 2670 * - require strict serialization.
kaf24@5077 2671 * - return a value
kaf24@5077 2672 */
kaf24@5077 2673 case SIOCGMIIPHY:
kaf24@5077 2674 case SIOCGMIIREG:
kaf24@5077 2675 case SIOCSIFNAME:
kaf24@5077 2676 if (!capable(CAP_NET_ADMIN))
kaf24@5077 2677 return -EPERM;
kaf24@5077 2678 dev_load(ifr.ifr_name);
kaf24@5077 2679 rtnl_lock();
kaf24@5077 2680 ret = dev_ifsioc(&ifr, cmd);
kaf24@5077 2681 rtnl_unlock();
kaf24@5077 2682 if (!ret) {
kaf24@5077 2683 if (colon)
kaf24@5077 2684 *colon = ':';
kaf24@5077 2685 if (copy_to_user(arg, &ifr,
kaf24@5077 2686 sizeof(struct ifreq)))
kaf24@5077 2687 ret = -EFAULT;
kaf24@5077 2688 }
kaf24@5077 2689 return ret;
kaf24@5077 2690
kaf24@5077 2691 /*
kaf24@5077 2692 * These ioctl calls:
kaf24@5077 2693 * - require superuser power.
kaf24@5077 2694 * - require strict serialization.
kaf24@5077 2695 * - do not return a value
kaf24@5077 2696 */
kaf24@5077 2697 case SIOCSIFFLAGS:
kaf24@5077 2698 case SIOCSIFMETRIC:
kaf24@5077 2699 case SIOCSIFMTU:
kaf24@5077 2700 case SIOCSIFMAP:
kaf24@5077 2701 case SIOCSIFHWADDR:
kaf24@5077 2702 case SIOCSIFSLAVE:
kaf24@5077 2703 case SIOCADDMULTI:
kaf24@5077 2704 case SIOCDELMULTI:
kaf24@5077 2705 case SIOCSIFHWBROADCAST:
kaf24@5077 2706 case SIOCSIFTXQLEN:
kaf24@5077 2707 case SIOCSMIIREG:
kaf24@5077 2708 case SIOCBONDENSLAVE:
kaf24@5077 2709 case SIOCBONDRELEASE:
kaf24@5077 2710 case SIOCBONDSETHWADDR:
kaf24@5077 2711 case SIOCBONDCHANGEACTIVE:
kaf24@5077 2712 case SIOCBRADDIF:
kaf24@5077 2713 case SIOCBRDELIF:
kaf24@5077 2714 if (!capable(CAP_NET_ADMIN))
kaf24@5077 2715 return -EPERM;
cl349@8755 2716 /* fall through */
cl349@8755 2717 case SIOCBONDSLAVEINFOQUERY:
cl349@8755 2718 case SIOCBONDINFOQUERY:
kaf24@5077 2719 dev_load(ifr.ifr_name);
kaf24@5077 2720 rtnl_lock();
kaf24@5077 2721 ret = dev_ifsioc(&ifr, cmd);
kaf24@5077 2722 rtnl_unlock();
kaf24@5077 2723 return ret;
kaf24@5077 2724
kaf24@5077 2725 case SIOCGIFMEM:
kaf24@5077 2726 /* Get the per device memory space. We can add this but
kaf24@5077 2727 * currently do not support it */
kaf24@5077 2728 case SIOCSIFMEM:
kaf24@5077 2729 /* Set the per device memory buffer space.
kaf24@5077 2730 * Not applicable in our case */
kaf24@5077 2731 case SIOCSIFLINK:
kaf24@5077 2732 return -EINVAL;
kaf24@5077 2733
kaf24@5077 2734 /*
kaf24@5077 2735 * Unknown or private ioctl.
kaf24@5077 2736 */
kaf24@5077 2737 default:
kaf24@5077 2738 if (cmd == SIOCWANDEV ||
kaf24@5077 2739 (cmd >= SIOCDEVPRIVATE &&
kaf24@5077 2740 cmd <= SIOCDEVPRIVATE + 15)) {
kaf24@5077 2741 dev_load(ifr.ifr_name);
kaf24@5077 2742 rtnl_lock();
kaf24@5077 2743 ret = dev_ifsioc(&ifr, cmd);
kaf24@5077 2744 rtnl_unlock();
kaf24@5077 2745 if (!ret && copy_to_user(arg, &ifr,
kaf24@5077 2746 sizeof(struct ifreq)))
kaf24@5077 2747 ret = -EFAULT;
kaf24@5077 2748 return ret;
kaf24@5077 2749 }
kaf24@5077 2750 #ifdef WIRELESS_EXT
kaf24@5077 2751 /* Take care of Wireless Extensions */
kaf24@5077 2752 if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) {
kaf24@5077 2753 /* If command is `set a parameter', or
kaf24@5077 2754 * `get the encoding parameters', check if
kaf24@5077 2755 * the user has the right to do it */
kaf24@5077 2756 if (IW_IS_SET(cmd) || cmd == SIOCGIWENCODE) {
kaf24@5077 2757 if (!capable(CAP_NET_ADMIN))
kaf24@5077 2758 return -EPERM;
kaf24@5077 2759 }
kaf24@5077 2760 dev_load(ifr.ifr_name);
kaf24@5077 2761 rtnl_lock();
kaf24@5077 2762 /* Follow me in net/core/wireless.c */
kaf24@5077 2763 ret = wireless_process_ioctl(&ifr, cmd);
kaf24@5077 2764 rtnl_unlock();
kaf24@5077 2765 if (IW_IS_GET(cmd) &&
kaf24@5077 2766 copy_to_user(arg, &ifr,
kaf24@5077 2767 sizeof(struct ifreq)))
kaf24@5077 2768 ret = -EFAULT;
kaf24@5077 2769 return ret;
kaf24@5077 2770 }
kaf24@5077 2771 #endif /* WIRELESS_EXT */
kaf24@5077 2772 return -EINVAL;
kaf24@5077 2773 }
kaf24@5077 2774 }
kaf24@5077 2775
kaf24@5077 2776
kaf24@5077 2777 /**
kaf24@5077 2778 * dev_new_index - allocate an ifindex
kaf24@5077 2779 *
kaf24@5077 2780 * Returns a suitable unique value for a new device interface
kaf24@5077 2781 * number. The caller must hold the rtnl semaphore or the
kaf24@5077 2782 * dev_base_lock to be sure it remains unique.
kaf24@5077 2783 */
kaf24@5077 2784 static int dev_new_index(void)
kaf24@5077 2785 {
kaf24@5077 2786 static int ifindex;
kaf24@5077 2787 for (;;) {
kaf24@5077 2788 if (++ifindex <= 0)
kaf24@5077 2789 ifindex = 1;
kaf24@5077 2790 if (!__dev_get_by_index(ifindex))
kaf24@5077 2791 return ifindex;
kaf24@5077 2792 }
kaf24@5077 2793 }
kaf24@5077 2794
kaf24@5077 2795 static int dev_boot_phase = 1;
kaf24@5077 2796
kaf24@5077 2797 /* Delayed registration/unregisteration */
kaf24@5077 2798 static DEFINE_SPINLOCK(net_todo_list_lock);
kaf24@5077 2799 static struct list_head net_todo_list = LIST_HEAD_INIT(net_todo_list);
kaf24@5077 2800
kaf24@5077 2801 static inline void net_set_todo(struct net_device *dev)
kaf24@5077 2802 {
kaf24@5077 2803 spin_lock(&net_todo_list_lock);
kaf24@5077 2804 list_add_tail(&dev->todo_list, &net_todo_list);
kaf24@5077 2805 spin_unlock(&net_todo_list_lock);
kaf24@5077 2806 }
kaf24@5077 2807
kaf24@5077 2808 /**
kaf24@5077 2809 * register_netdevice - register a network device
kaf24@5077 2810 * @dev: device to register
kaf24@5077 2811 *
kaf24@5077 2812 * Take a completed network device structure and add it to the kernel
kaf24@5077 2813 * interfaces. A %NETDEV_REGISTER message is sent to the netdev notifier
kaf24@5077 2814 * chain. 0 is returned on success. A negative errno code is returned
kaf24@5077 2815 * on a failure to set up the device, or if the name is a duplicate.
kaf24@5077 2816 *
kaf24@5077 2817 * Callers must hold the rtnl semaphore. You may want
kaf24@5077 2818 * register_netdev() instead of this.
kaf24@5077 2819 *
kaf24@5077 2820 * BUGS:
kaf24@5077 2821 * The locking appears insufficient to guarantee two parallel registers
kaf24@5077 2822 * will not get the same name.
kaf24@5077 2823 */
kaf24@5077 2824
kaf24@5077 2825 int register_netdevice(struct net_device *dev)
kaf24@5077 2826 {
kaf24@5077 2827 struct hlist_head *head;
kaf24@5077 2828 struct hlist_node *p;
kaf24@5077 2829 int ret;
kaf24@5077 2830
kaf24@5077 2831 BUG_ON(dev_boot_phase);
kaf24@5077 2832 ASSERT_RTNL();
kaf24@5077 2833
kaf24@5077 2834 /* When net_device's are persistent, this will be fatal. */
kaf24@5077 2835 BUG_ON(dev->reg_state != NETREG_UNINITIALIZED);
kaf24@5077 2836
kaf24@5077 2837 spin_lock_init(&dev->queue_lock);
kaf24@10555 2838 spin_lock_init(&dev->_xmit_lock);
kaf24@5077 2839 dev->xmit_lock_owner = -1;
kaf24@5077 2840 #ifdef CONFIG_NET_CLS_ACT
kaf24@5077 2841 spin_lock_init(&dev->ingress_lock);
kaf24@5077 2842 #endif
kaf24@5077 2843
kaf24@5077 2844 ret = alloc_divert_blk(dev);
kaf24@5077 2845 if (ret)
kaf24@5077 2846 goto out;
kaf24@5077 2847
kaf24@5077 2848 dev->iflink = -1;
kaf24@5077 2849
kaf24@5077 2850 /* Init, if this function is available */
kaf24@5077 2851 if (dev->init) {
kaf24@5077 2852 ret = dev->init(dev);
kaf24@5077 2853 if (ret) {
kaf24@5077 2854 if (ret > 0)
kaf24@5077 2855 ret = -EIO;
kaf24@5077 2856 goto out_err;
kaf24@5077 2857 }
kaf24@5077 2858 }
kaf24@5077 2859
kaf24@5077 2860 if (!dev_valid_name(dev->name)) {
kaf24@5077 2861 ret = -EINVAL;
kaf24@5077 2862 goto out_err;
kaf24@5077 2863 }
kaf24@5077 2864
kaf24@5077 2865 dev->ifindex = dev_new_index();
kaf24@5077 2866 if (dev->iflink == -1)
kaf24@5077 2867 dev->iflink = dev->ifindex;
kaf24@5077 2868
kaf24@5077 2869 /* Check for existence of name */
kaf24@5077 2870 head = dev_name_hash(dev->name);
kaf24@5077 2871 hlist_for_each(p, head) {
kaf24@5077 2872 struct net_device *d
kaf24@5077 2873 = hlist_entry(p, struct net_device, name_hlist);
kaf24@5077 2874 if (!strncmp(d->name, dev->name, IFNAMSIZ)) {
kaf24@5077 2875 ret = -EEXIST;
kaf24@5077 2876 goto out_err;
kaf24@5077 2877 }
kaf24@5077 2878 }
kaf24@5077 2879
kaf24@5077 2880 /* Fix illegal SG+CSUM combinations. */
kaf24@5077 2881 if ((dev->features & NETIF_F_SG) &&
kaf24@10555 2882 !(dev->features & NETIF_F_ALL_CSUM)) {
kaf24@5077 2883 printk("%s: Dropping NETIF_F_SG since no checksum feature.\n",
kaf24@5077 2884 dev->name);
kaf24@5077 2885 dev->features &= ~NETIF_F_SG;
kaf24@5077 2886 }
kaf24@5077 2887
kaf24@5077 2888 /* TSO requires that SG is present as well. */
kaf24@5077 2889 if ((dev->features & NETIF_F_TSO) &&
kaf24@5077 2890 !(dev->features & NETIF_F_SG)) {
kaf24@5077 2891 printk("%s: Dropping NETIF_F_TSO since no SG feature.\n",
kaf24@5077 2892 dev->name);
kaf24@5077 2893 dev->features &= ~NETIF_F_TSO;
kaf24@5077 2894 }
cl349@8729 2895 if (dev->features & NETIF_F_UFO) {
cl349@8729 2896 if (!(dev->features & NETIF_F_HW_CSUM)) {
cl349@8729 2897 printk(KERN_ERR "%s: Dropping NETIF_F_UFO since no "
cl349@8729 2898 "NETIF_F_HW_CSUM feature.\n",
cl349@8729 2899 dev->name);
cl349@8729 2900 dev->features &= ~NETIF_F_UFO;
cl349@8729 2901 }
cl349@8729 2902 if (!(dev->features & NETIF_F_SG)) {
cl349@8729 2903 printk(KERN_ERR "%s: Dropping NETIF_F_UFO since no "
cl349@8729 2904 "NETIF_F_SG feature.\n",
cl349@8729 2905 dev->name);
cl349@8729 2906 dev->features &= ~NETIF_F_UFO;
cl349@8729 2907 }
cl349@8729 2908 }
kaf24@5077 2909
kaf24@5077 2910 /*
kaf24@5077 2911 * nil rebuild_header routine,
kaf24@5077 2912 * that should be never called and used as just bug trap.
kaf24@5077 2913 */
kaf24@5077 2914
kaf24@5077 2915 if (!dev->rebuild_header)
kaf24@5077 2916 dev->rebuild_header = default_rebuild_header;
kaf24@5077 2917
kaf24@5077 2918 /*
kaf24@5077 2919 * Default initial state at registry is that the
kaf24@5077 2920 * device is present.
kaf24@5077 2921 */
kaf24@5077 2922
kaf24@5077 2923 set_bit(__LINK_STATE_PRESENT, &dev->state);
kaf24@5077 2924
kaf24@5077 2925 dev->next = NULL;
kaf24@5077 2926 dev_init_scheduler(dev);
kaf24@5077 2927 write_lock_bh(&dev_base_lock);
kaf24@5077 2928 *dev_tail = dev;
kaf24@5077 2929 dev_tail = &dev->next;
kaf24@5077 2930 hlist_add_head(&dev->name_hlist, head);
kaf24@5077 2931 hlist_add_head(&dev->index_hlist, dev_index_hash(dev->ifindex));
kaf24@5077 2932 dev_hold(dev);
kaf24@5077 2933 dev->reg_state = NETREG_REGISTERING;
kaf24@5077 2934 write_unlock_bh(&dev_base_lock);
kaf24@5077 2935
kaf24@5077 2936 /* Notify protocols, that a new device appeared. */
kaf24@5077 2937 notifier_call_chain(&netdev_chain, NETDEV_REGISTER, dev);
kaf24@5077 2938
kaf24@5077 2939 /* Finish registration after unlock */
kaf24@5077 2940 net_set_todo(dev);
kaf24@5077 2941 ret = 0;
kaf24@5077 2942
kaf24@5077 2943 out:
kaf24@5077 2944 return ret;
kaf24@5077 2945 out_err:
kaf24@5077 2946 free_divert_blk(dev);
kaf24@5077 2947 goto out;
kaf24@5077 2948 }
kaf24@5077 2949
kaf24@5077 2950 /**
kaf24@5077 2951 * register_netdev - register a network device
kaf24@5077 2952 * @dev: device to register
kaf24@5077 2953 *
kaf24@5077 2954 * Take a completed network device structure and add it to the kernel
kaf24@5077 2955 * interfaces. A %NETDEV_REGISTER message is sent to the netdev notifier
kaf24@5077 2956 * chain. 0 is returned on success. A negative errno code is returned
kaf24@5077 2957 * on a failure to set up the device, or if the name is a duplicate.
kaf24@5077 2958 *
kaf24@5077 2959 * This is a wrapper around register_netdev that takes the rtnl semaphore
kaf24@5077 2960 * and expands the device name if you passed a format string to
kaf24@5077 2961 * alloc_netdev.
kaf24@5077 2962 */
kaf24@5077 2963 int register_netdev(struct net_device *dev)
kaf24@5077 2964 {
kaf24@5077 2965 int err;
kaf24@5077 2966
kaf24@5077 2967 rtnl_lock();
kaf24@5077 2968
kaf24@5077 2969 /*
kaf24@5077 2970 * If the name is a format string the caller wants us to do a
kaf24@5077 2971 * name allocation.
kaf24@5077 2972 */
kaf24@5077 2973 if (strchr(dev->name, '%')) {
kaf24@5077 2974 err = dev_alloc_name(dev, dev->name);
kaf24@5077 2975 if (err < 0)
kaf24@5077 2976 goto out;
kaf24@5077 2977 }
kaf24@5077 2978
kaf24@5077 2979 /*
kaf24@5077 2980 * Back compatibility hook. Kill this one in 2.5
kaf24@5077 2981 */
kaf24@5077 2982 if (dev->name[0] == 0 || dev->name[0] == ' ') {
kaf24@5077 2983 err = dev_alloc_name(dev, "eth%d");
kaf24@5077 2984 if (err < 0)
kaf24@5077 2985 goto out;
kaf24@5077 2986 }
kaf24@5077 2987
kaf24@5077 2988 err = register_netdevice(dev);
kaf24@5077 2989 out:
kaf24@5077 2990 rtnl_unlock();
kaf24@5077 2991 return err;
kaf24@5077 2992 }
kaf24@5077 2993 EXPORT_SYMBOL(register_netdev);
kaf24@5077 2994
kaf24@5077 2995 /*
kaf24@5077 2996 * netdev_wait_allrefs - wait until all references are gone.
kaf24@5077 2997 *
kaf24@5077 2998 * This is called when unregistering network devices.
kaf24@5077 2999 *
kaf24@5077 3000 * Any protocol or device that holds a reference should register
kaf24@5077 3001 * for netdevice notification, and cleanup and put back the
kaf24@5077 3002 * reference if they receive an UNREGISTER event.
kaf24@5077 3003 * We can get stuck here if buggy protocols don't correctly
kaf24@5077 3004 * call dev_put.
kaf24@5077 3005 */
kaf24@5077 3006 static void netdev_wait_allrefs(struct net_device *dev)
kaf24@5077 3007 {
kaf24@5077 3008 unsigned long rebroadcast_time, warning_time;
kaf24@5077 3009
kaf24@5077 3010 rebroadcast_time = warning_time = jiffies;
kaf24@5077 3011 while (atomic_read(&dev->refcnt) != 0) {
kaf24@5077 3012 if (time_after(jiffies, rebroadcast_time + 1 * HZ)) {
kaf24@5077 3013 rtnl_shlock();
kaf24@5077 3014
kaf24@5077 3015 /* Rebroadcast unregister notification */
kaf24@5077 3016 notifier_call_chain(&netdev_chain,
kaf24@5077 3017 NETDEV_UNREGISTER, dev);
kaf24@5077 3018
kaf24@5077 3019 if (test_bit(__LINK_STATE_LINKWATCH_PENDING,
kaf24@5077 3020 &dev->state)) {
kaf24@5077 3021 /* We must not have linkwatch events
kaf24@5077 3022 * pending on unregister. If this
kaf24@5077 3023 * happens, we simply run the queue
kaf24@5077 3024 * unscheduled, resulting in a noop
kaf24@5077 3025 * for this device.
kaf24@5077 3026 */
kaf24@5077 3027 linkwatch_run_queue();
kaf24@5077 3028 }
kaf24@5077 3029
kaf24@5077 3030 rtnl_shunlock();
kaf24@5077 3031
kaf24@5077 3032 rebroadcast_time = jiffies;
kaf24@5077 3033 }
kaf24@5077 3034
kaf24@5077 3035 msleep(250);
kaf24@5077 3036
kaf24@5077 3037 if (time_after(jiffies, warning_time + 10 * HZ)) {
kaf24@5077 3038 printk(KERN_EMERG "unregister_netdevice: "
kaf24@5077 3039 "waiting for %s to become free. Usage "
kaf24@5077 3040 "count = %d\n",
kaf24@5077 3041 dev->name, atomic_read(&dev->refcnt));
kaf24@5077 3042 warning_time = jiffies;
kaf24@5077 3043 }
kaf24@5077 3044 }
kaf24@5077 3045 }
kaf24@5077 3046
kaf24@5077 3047 /* The sequence is:
kaf24@5077 3048 *
kaf24@5077 3049 * rtnl_lock();
kaf24@5077 3050 * ...
kaf24@5077 3051 * register_netdevice(x1);
kaf24@5077 3052 * register_netdevice(x2);
kaf24@5077 3053 * ...
kaf24@5077 3054 * unregister_netdevice(y1);
kaf24@5077 3055 * unregister_netdevice(y2);
kaf24@5077 3056 * ...
kaf24@5077 3057 * rtnl_unlock();
kaf24@5077 3058 * free_netdev(y1);
kaf24@5077 3059 * free_netdev(y2);
kaf24@5077 3060 *
kaf24@5077 3061 * We are invoked by rtnl_unlock() after it drops the semaphore.
kaf24@5077 3062 * This allows us to deal with problems:
kaf24@5077 3063 * 1) We can create/delete sysfs objects which invoke hotplug
kaf24@5077 3064 * without deadlocking with linkwatch via keventd.
kaf24@5077 3065 * 2) Since we run with the RTNL semaphore not held, we can sleep
kaf24@5077 3066 * safely in order to wait for the netdev refcnt to drop to zero.
kaf24@5077 3067 */
kaf24@5077 3068 static DECLARE_MUTEX(net_todo_run_mutex);
kaf24@5077 3069 void netdev_run_todo(void)
kaf24@5077 3070 {
kaf24@5077 3071 struct list_head list = LIST_HEAD_INIT(list);
kaf24@5077 3072 int err;
kaf24@5077 3073
kaf24@5077 3074
kaf24@5077 3075 /* Need to guard against multiple cpu's getting out of order. */
kaf24@5077 3076 down(&net_todo_run_mutex);
kaf24@5077 3077
kaf24@5077 3078 /* Not safe to do outside the semaphore. We must not return
kaf24@5077 3079 * until all unregister events invoked by the local processor
kaf24@5077 3080 * have been completed (either by this todo run, or one on
kaf24@5077 3081 * another cpu).
kaf24@5077 3082 */
kaf24@5077 3083 if (list_empty(&net_todo_list))
kaf24@5077 3084 goto out;
kaf24@5077 3085
kaf24@5077 3086 /* Snapshot list, allow later requests */
kaf24@5077 3087 spin_lock(&net_todo_list_lock);
kaf24@5077 3088 list_splice_init(&net_todo_list, &list);
kaf24@5077 3089 spin_unlock(&net_todo_list_lock);
kaf24@5077 3090
kaf24@5077 3091 while (!list_empty(&list)) {
kaf24@5077 3092 struct net_device *dev
kaf24@5077 3093 = list_entry(list.next, struct net_device, todo_list);
kaf24@5077 3094 list_del(&dev->todo_list);
kaf24@5077 3095
kaf24@5077 3096 switch(dev->reg_state) {
kaf24@5077 3097 case NETREG_REGISTERING:
cl349@9940 3098 dev->reg_state = NETREG_REGISTERED;
kaf24@5077 3099 err = netdev_register_sysfs(dev);
kaf24@5077 3100 if (err)
kaf24@5077 3101 printk(KERN_ERR "%s: failed sysfs registration (%d)\n",
kaf24@5077 3102 dev->name, err);
kaf24@5077 3103 break;
kaf24@5077 3104
kaf24@5077 3105 case NETREG_UNREGISTERING:
kaf24@5077 3106 netdev_unregister_sysfs(dev);
kaf24@5077 3107 dev->reg_state = NETREG_UNREGISTERED;
kaf24@5077 3108
kaf24@5077 3109 netdev_wait_allrefs(dev);
kaf24@5077 3110
kaf24@5077 3111 /* paranoia */
kaf24@5077 3112 BUG_ON(atomic_read(&dev->refcnt));
kaf24@5077 3113 BUG_TRAP(!dev->ip_ptr);
kaf24@5077 3114 BUG_TRAP(!dev->ip6_ptr);
kaf24@5077 3115 BUG_TRAP(!dev->dn_ptr);