]> bbs.cooldavid.org Git - net-next-2.6.git/blame - net/core/dev.c
Merge branch 'master' of master.kernel.org:/pub/scm/linux/kernel/git/davem/net-2.6
[net-next-2.6.git] / net / core / dev.c
CommitLineData
1da177e4
LT
1/*
2 * NET3 Protocol independent device support routines.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
8 *
9 * Derived from the non IP parts of dev.c 1.0.19
02c30a84 10 * Authors: Ross Biro
1da177e4
LT
11 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12 * Mark Evans, <evansmp@uhura.aston.ac.uk>
13 *
14 * Additional Authors:
15 * Florian la Roche <rzsfl@rz.uni-sb.de>
16 * Alan Cox <gw4pts@gw4pts.ampr.org>
17 * David Hinds <dahinds@users.sourceforge.net>
18 * Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
19 * Adam Sulmicki <adam@cfar.umd.edu>
20 * Pekka Riikonen <priikone@poesidon.pspt.fi>
21 *
22 * Changes:
23 * D.J. Barrow : Fixed bug where dev->refcnt gets set
24 * to 2 if register_netdev gets called
25 * before net_dev_init & also removed a
26 * few lines of code in the process.
27 * Alan Cox : device private ioctl copies fields back.
28 * Alan Cox : Transmit queue code does relevant
29 * stunts to keep the queue safe.
30 * Alan Cox : Fixed double lock.
31 * Alan Cox : Fixed promisc NULL pointer trap
32 * ???????? : Support the full private ioctl range
33 * Alan Cox : Moved ioctl permission check into
34 * drivers
35 * Tim Kordas : SIOCADDMULTI/SIOCDELMULTI
36 * Alan Cox : 100 backlog just doesn't cut it when
37 * you start doing multicast video 8)
38 * Alan Cox : Rewrote net_bh and list manager.
39 * Alan Cox : Fix ETH_P_ALL echoback lengths.
40 * Alan Cox : Took out transmit every packet pass
41 * Saved a few bytes in the ioctl handler
42 * Alan Cox : Network driver sets packet type before
43 * calling netif_rx. Saves a function
44 * call a packet.
45 * Alan Cox : Hashed net_bh()
46 * Richard Kooijman: Timestamp fixes.
47 * Alan Cox : Wrong field in SIOCGIFDSTADDR
48 * Alan Cox : Device lock protection.
49 * Alan Cox : Fixed nasty side effect of device close
50 * changes.
51 * Rudi Cilibrasi : Pass the right thing to
52 * set_mac_address()
53 * Dave Miller : 32bit quantity for the device lock to
54 * make it work out on a Sparc.
55 * Bjorn Ekwall : Added KERNELD hack.
56 * Alan Cox : Cleaned up the backlog initialise.
57 * Craig Metz : SIOCGIFCONF fix if space for under
58 * 1 device.
59 * Thomas Bogendoerfer : Return ENODEV for dev_open, if there
60 * is no device open function.
61 * Andi Kleen : Fix error reporting for SIOCGIFCONF
62 * Michael Chastain : Fix signed/unsigned for SIOCGIFCONF
63 * Cyrus Durgin : Cleaned for KMOD
64 * Adam Sulmicki : Bug Fix : Network Device Unload
65 * A network device unload needs to purge
66 * the backlog queue.
67 * Paul Rusty Russell : SIOCSIFNAME
68 * Pekka Riikonen : Netdev boot-time settings code
69 * Andrew Morton : Make unregister_netdevice wait
70 * indefinitely on dev->refcnt
71 * J Hadi Salim : - Backlog queue sampling
72 * - netif_rx() feedback
73 */
74
75#include <asm/uaccess.h>
76#include <asm/system.h>
77#include <linux/bitops.h>
4fc268d2 78#include <linux/capability.h>
1da177e4
LT
79#include <linux/cpu.h>
80#include <linux/types.h>
81#include <linux/kernel.h>
82#include <linux/sched.h>
4a3e2f71 83#include <linux/mutex.h>
1da177e4
LT
84#include <linux/string.h>
85#include <linux/mm.h>
86#include <linux/socket.h>
87#include <linux/sockios.h>
88#include <linux/errno.h>
89#include <linux/interrupt.h>
90#include <linux/if_ether.h>
91#include <linux/netdevice.h>
92#include <linux/etherdevice.h>
0187bdfb 93#include <linux/ethtool.h>
1da177e4
LT
94#include <linux/notifier.h>
95#include <linux/skbuff.h>
457c4cbc 96#include <net/net_namespace.h>
1da177e4
LT
97#include <net/sock.h>
98#include <linux/rtnetlink.h>
99#include <linux/proc_fs.h>
100#include <linux/seq_file.h>
101#include <linux/stat.h>
102#include <linux/if_bridge.h>
b863ceb7 103#include <linux/if_macvlan.h>
1da177e4
LT
104#include <net/dst.h>
105#include <net/pkt_sched.h>
106#include <net/checksum.h>
107#include <linux/highmem.h>
108#include <linux/init.h>
109#include <linux/kmod.h>
110#include <linux/module.h>
1da177e4
LT
111#include <linux/netpoll.h>
112#include <linux/rcupdate.h>
113#include <linux/delay.h>
295f4a1f 114#include <net/wext.h>
1da177e4 115#include <net/iw_handler.h>
1da177e4 116#include <asm/current.h>
5bdb9886 117#include <linux/audit.h>
db217334 118#include <linux/dmaengine.h>
f6a78bfc 119#include <linux/err.h>
c7fa9d18 120#include <linux/ctype.h>
723e98b7 121#include <linux/if_arp.h>
6de329e2 122#include <linux/if_vlan.h>
8f0f2223 123#include <linux/ip.h>
ad55dcaf 124#include <net/ip.h>
8f0f2223
DM
125#include <linux/ipv6.h>
126#include <linux/in.h>
b6b2fed1
DM
127#include <linux/jhash.h>
128#include <linux/random.h>
1da177e4 129
342709ef
PE
130#include "net-sysfs.h"
131
1da177e4
LT
132/*
133 * The list of packet types we will receive (as opposed to discard)
134 * and the routines to invoke.
135 *
136 * Why 16. Because with 16 the only overlap we get on a hash of the
137 * low nibble of the protocol value is RARP/SNAP/X.25.
138 *
139 * NOTE: That is no longer true with the addition of VLAN tags. Not
140 * sure which should go first, but I bet it won't make much
141 * difference if we are running VLANs. The good news is that
142 * this protocol won't be in the list unless compiled in, so
3041a069 143 * the average user (w/out VLANs) will not be adversely affected.
1da177e4
LT
144 * --BLG
145 *
146 * 0800 IP
147 * 8100 802.1Q VLAN
148 * 0001 802.3
149 * 0002 AX.25
150 * 0004 802.2
151 * 8035 RARP
152 * 0005 SNAP
153 * 0805 X.25
154 * 0806 ARP
155 * 8137 IPX
156 * 0009 Localtalk
157 * 86DD IPv6
158 */
159
82d8a867
PE
160#define PTYPE_HASH_SIZE (16)
161#define PTYPE_HASH_MASK (PTYPE_HASH_SIZE - 1)
162
1da177e4 163static DEFINE_SPINLOCK(ptype_lock);
82d8a867 164static struct list_head ptype_base[PTYPE_HASH_SIZE] __read_mostly;
6b2bedc3 165static struct list_head ptype_all __read_mostly; /* Taps */
1da177e4 166
db217334 167#ifdef CONFIG_NET_DMA
d379b01e
DW
168struct net_dma {
169 struct dma_client client;
170 spinlock_t lock;
171 cpumask_t channel_mask;
0c0b0aca 172 struct dma_chan **channels;
d379b01e
DW
173};
174
175static enum dma_state_client
176netdev_dma_event(struct dma_client *client, struct dma_chan *chan,
177 enum dma_state state);
178
179static struct net_dma net_dma = {
180 .client = {
181 .event_callback = netdev_dma_event,
182 },
183};
db217334
CL
184#endif
185
1da177e4 186/*
7562f876 187 * The @dev_base_head list is protected by @dev_base_lock and the rtnl
1da177e4
LT
188 * semaphore.
189 *
190 * Pure readers hold dev_base_lock for reading.
191 *
192 * Writers must hold the rtnl semaphore while they loop through the
7562f876 193 * dev_base_head list, and hold dev_base_lock for writing when they do the
1da177e4
LT
194 * actual updates. This allows pure readers to access the list even
195 * while a writer is preparing to update it.
196 *
197 * To put it another way, dev_base_lock is held for writing only to
198 * protect against pure readers; the rtnl semaphore provides the
199 * protection against other writers.
200 *
201 * See, for example usages, register_netdevice() and
202 * unregister_netdevice(), which must be called with the rtnl
203 * semaphore held.
204 */
1da177e4
LT
205DEFINE_RWLOCK(dev_base_lock);
206
1da177e4
LT
207EXPORT_SYMBOL(dev_base_lock);
208
209#define NETDEV_HASHBITS 8
881d966b 210#define NETDEV_HASHENTRIES (1 << NETDEV_HASHBITS)
1da177e4 211
881d966b 212static inline struct hlist_head *dev_name_hash(struct net *net, const char *name)
1da177e4
LT
213{
214 unsigned hash = full_name_hash(name, strnlen(name, IFNAMSIZ));
881d966b 215 return &net->dev_name_head[hash & ((1 << NETDEV_HASHBITS) - 1)];
1da177e4
LT
216}
217
881d966b 218static inline struct hlist_head *dev_index_hash(struct net *net, int ifindex)
1da177e4 219{
881d966b 220 return &net->dev_index_head[ifindex & ((1 << NETDEV_HASHBITS) - 1)];
1da177e4
LT
221}
222
ce286d32
EB
223/* Device list insertion */
224static int list_netdevice(struct net_device *dev)
225{
c346dca1 226 struct net *net = dev_net(dev);
ce286d32
EB
227
228 ASSERT_RTNL();
229
230 write_lock_bh(&dev_base_lock);
231 list_add_tail(&dev->dev_list, &net->dev_base_head);
232 hlist_add_head(&dev->name_hlist, dev_name_hash(net, dev->name));
233 hlist_add_head(&dev->index_hlist, dev_index_hash(net, dev->ifindex));
234 write_unlock_bh(&dev_base_lock);
235 return 0;
236}
237
238/* Device list removal */
239static void unlist_netdevice(struct net_device *dev)
240{
241 ASSERT_RTNL();
242
243 /* Unlink dev from the device chain */
244 write_lock_bh(&dev_base_lock);
245 list_del(&dev->dev_list);
246 hlist_del(&dev->name_hlist);
247 hlist_del(&dev->index_hlist);
248 write_unlock_bh(&dev_base_lock);
249}
250
1da177e4
LT
251/*
252 * Our notifier list
253 */
254
f07d5b94 255static RAW_NOTIFIER_HEAD(netdev_chain);
1da177e4
LT
256
257/*
258 * Device drivers call our routines to queue packets here. We empty the
259 * queue in the local softnet handler.
260 */
bea3348e
SH
261
262DEFINE_PER_CPU(struct softnet_data, softnet_data);
1da177e4 263
cf508b12 264#ifdef CONFIG_LOCKDEP
723e98b7 265/*
c773e847 266 * register_netdevice() inits txq->_xmit_lock and sets lockdep class
723e98b7
JP
267 * according to dev->type
268 */
269static const unsigned short netdev_lock_type[] =
270 {ARPHRD_NETROM, ARPHRD_ETHER, ARPHRD_EETHER, ARPHRD_AX25,
271 ARPHRD_PRONET, ARPHRD_CHAOS, ARPHRD_IEEE802, ARPHRD_ARCNET,
272 ARPHRD_APPLETLK, ARPHRD_DLCI, ARPHRD_ATM, ARPHRD_METRICOM,
273 ARPHRD_IEEE1394, ARPHRD_EUI64, ARPHRD_INFINIBAND, ARPHRD_SLIP,
274 ARPHRD_CSLIP, ARPHRD_SLIP6, ARPHRD_CSLIP6, ARPHRD_RSRVD,
275 ARPHRD_ADAPT, ARPHRD_ROSE, ARPHRD_X25, ARPHRD_HWX25,
276 ARPHRD_PPP, ARPHRD_CISCO, ARPHRD_LAPB, ARPHRD_DDCMP,
277 ARPHRD_RAWHDLC, ARPHRD_TUNNEL, ARPHRD_TUNNEL6, ARPHRD_FRAD,
278 ARPHRD_SKIP, ARPHRD_LOOPBACK, ARPHRD_LOCALTLK, ARPHRD_FDDI,
279 ARPHRD_BIF, ARPHRD_SIT, ARPHRD_IPDDP, ARPHRD_IPGRE,
280 ARPHRD_PIMREG, ARPHRD_HIPPI, ARPHRD_ASH, ARPHRD_ECONET,
281 ARPHRD_IRDA, ARPHRD_FCPP, ARPHRD_FCAL, ARPHRD_FCPL,
282 ARPHRD_FCFABRIC, ARPHRD_IEEE802_TR, ARPHRD_IEEE80211,
283 ARPHRD_IEEE80211_PRISM, ARPHRD_IEEE80211_RADIOTAP, ARPHRD_VOID,
284 ARPHRD_NONE};
285
286static const char *netdev_lock_name[] =
287 {"_xmit_NETROM", "_xmit_ETHER", "_xmit_EETHER", "_xmit_AX25",
288 "_xmit_PRONET", "_xmit_CHAOS", "_xmit_IEEE802", "_xmit_ARCNET",
289 "_xmit_APPLETLK", "_xmit_DLCI", "_xmit_ATM", "_xmit_METRICOM",
290 "_xmit_IEEE1394", "_xmit_EUI64", "_xmit_INFINIBAND", "_xmit_SLIP",
291 "_xmit_CSLIP", "_xmit_SLIP6", "_xmit_CSLIP6", "_xmit_RSRVD",
292 "_xmit_ADAPT", "_xmit_ROSE", "_xmit_X25", "_xmit_HWX25",
293 "_xmit_PPP", "_xmit_CISCO", "_xmit_LAPB", "_xmit_DDCMP",
294 "_xmit_RAWHDLC", "_xmit_TUNNEL", "_xmit_TUNNEL6", "_xmit_FRAD",
295 "_xmit_SKIP", "_xmit_LOOPBACK", "_xmit_LOCALTLK", "_xmit_FDDI",
296 "_xmit_BIF", "_xmit_SIT", "_xmit_IPDDP", "_xmit_IPGRE",
297 "_xmit_PIMREG", "_xmit_HIPPI", "_xmit_ASH", "_xmit_ECONET",
298 "_xmit_IRDA", "_xmit_FCPP", "_xmit_FCAL", "_xmit_FCPL",
299 "_xmit_FCFABRIC", "_xmit_IEEE802_TR", "_xmit_IEEE80211",
300 "_xmit_IEEE80211_PRISM", "_xmit_IEEE80211_RADIOTAP", "_xmit_VOID",
301 "_xmit_NONE"};
302
303static struct lock_class_key netdev_xmit_lock_key[ARRAY_SIZE(netdev_lock_type)];
cf508b12 304static struct lock_class_key netdev_addr_lock_key[ARRAY_SIZE(netdev_lock_type)];
723e98b7
JP
305
306static inline unsigned short netdev_lock_pos(unsigned short dev_type)
307{
308 int i;
309
310 for (i = 0; i < ARRAY_SIZE(netdev_lock_type); i++)
311 if (netdev_lock_type[i] == dev_type)
312 return i;
313 /* the last key is used by default */
314 return ARRAY_SIZE(netdev_lock_type) - 1;
315}
316
cf508b12
DM
317static inline void netdev_set_xmit_lockdep_class(spinlock_t *lock,
318 unsigned short dev_type)
723e98b7
JP
319{
320 int i;
321
322 i = netdev_lock_pos(dev_type);
323 lockdep_set_class_and_name(lock, &netdev_xmit_lock_key[i],
324 netdev_lock_name[i]);
325}
cf508b12
DM
326
327static inline void netdev_set_addr_lockdep_class(struct net_device *dev)
328{
329 int i;
330
331 i = netdev_lock_pos(dev->type);
332 lockdep_set_class_and_name(&dev->addr_list_lock,
333 &netdev_addr_lock_key[i],
334 netdev_lock_name[i]);
335}
723e98b7 336#else
cf508b12
DM
337static inline void netdev_set_xmit_lockdep_class(spinlock_t *lock,
338 unsigned short dev_type)
339{
340}
341static inline void netdev_set_addr_lockdep_class(struct net_device *dev)
723e98b7
JP
342{
343}
344#endif
1da177e4
LT
345
346/*******************************************************************************
347
348 Protocol management and registration routines
349
350*******************************************************************************/
351
1da177e4
LT
352/*
353 * Add a protocol ID to the list. Now that the input handler is
354 * smarter we can dispense with all the messy stuff that used to be
355 * here.
356 *
357 * BEWARE!!! Protocol handlers, mangling input packets,
358 * MUST BE last in hash buckets and checking protocol handlers
359 * MUST start from promiscuous ptype_all chain in net_bh.
360 * It is true now, do not change it.
361 * Explanation follows: if protocol handler, mangling packet, will
362 * be the first on list, it is not able to sense, that packet
363 * is cloned and should be copied-on-write, so that it will
364 * change it and subsequent readers will get broken packet.
365 * --ANK (980803)
366 */
367
368/**
369 * dev_add_pack - add packet handler
370 * @pt: packet type declaration
371 *
372 * Add a protocol handler to the networking stack. The passed &packet_type
373 * is linked into kernel lists and may not be freed until it has been
374 * removed from the kernel lists.
375 *
4ec93edb 376 * This call does not sleep therefore it can not
1da177e4
LT
377 * guarantee all CPU's that are in middle of receiving packets
378 * will see the new packet type (until the next received packet).
379 */
380
381void dev_add_pack(struct packet_type *pt)
382{
383 int hash;
384
385 spin_lock_bh(&ptype_lock);
9be9a6b9 386 if (pt->type == htons(ETH_P_ALL))
1da177e4 387 list_add_rcu(&pt->list, &ptype_all);
9be9a6b9 388 else {
82d8a867 389 hash = ntohs(pt->type) & PTYPE_HASH_MASK;
1da177e4
LT
390 list_add_rcu(&pt->list, &ptype_base[hash]);
391 }
392 spin_unlock_bh(&ptype_lock);
393}
394
1da177e4
LT
395/**
396 * __dev_remove_pack - remove packet handler
397 * @pt: packet type declaration
398 *
399 * Remove a protocol handler that was previously added to the kernel
400 * protocol handlers by dev_add_pack(). The passed &packet_type is removed
401 * from the kernel lists and can be freed or reused once this function
4ec93edb 402 * returns.
1da177e4
LT
403 *
404 * The packet type might still be in use by receivers
405 * and must not be freed until after all the CPU's have gone
406 * through a quiescent state.
407 */
408void __dev_remove_pack(struct packet_type *pt)
409{
410 struct list_head *head;
411 struct packet_type *pt1;
412
413 spin_lock_bh(&ptype_lock);
414
9be9a6b9 415 if (pt->type == htons(ETH_P_ALL))
1da177e4 416 head = &ptype_all;
9be9a6b9 417 else
82d8a867 418 head = &ptype_base[ntohs(pt->type) & PTYPE_HASH_MASK];
1da177e4
LT
419
420 list_for_each_entry(pt1, head, list) {
421 if (pt == pt1) {
422 list_del_rcu(&pt->list);
423 goto out;
424 }
425 }
426
427 printk(KERN_WARNING "dev_remove_pack: %p not found.\n", pt);
428out:
429 spin_unlock_bh(&ptype_lock);
430}
431/**
432 * dev_remove_pack - remove packet handler
433 * @pt: packet type declaration
434 *
435 * Remove a protocol handler that was previously added to the kernel
436 * protocol handlers by dev_add_pack(). The passed &packet_type is removed
437 * from the kernel lists and can be freed or reused once this function
438 * returns.
439 *
440 * This call sleeps to guarantee that no CPU is looking at the packet
441 * type after return.
442 */
443void dev_remove_pack(struct packet_type *pt)
444{
445 __dev_remove_pack(pt);
4ec93edb 446
1da177e4
LT
447 synchronize_net();
448}
449
450/******************************************************************************
451
452 Device Boot-time Settings Routines
453
454*******************************************************************************/
455
456/* Boot time configuration table */
457static struct netdev_boot_setup dev_boot_setup[NETDEV_BOOT_SETUP_MAX];
458
459/**
460 * netdev_boot_setup_add - add new setup entry
461 * @name: name of the device
462 * @map: configured settings for the device
463 *
464 * Adds new setup entry to the dev_boot_setup list. The function
465 * returns 0 on error and 1 on success. This is a generic routine to
466 * all netdevices.
467 */
468static int netdev_boot_setup_add(char *name, struct ifmap *map)
469{
470 struct netdev_boot_setup *s;
471 int i;
472
473 s = dev_boot_setup;
474 for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) {
475 if (s[i].name[0] == '\0' || s[i].name[0] == ' ') {
476 memset(s[i].name, 0, sizeof(s[i].name));
93b3cff9 477 strlcpy(s[i].name, name, IFNAMSIZ);
1da177e4
LT
478 memcpy(&s[i].map, map, sizeof(s[i].map));
479 break;
480 }
481 }
482
483 return i >= NETDEV_BOOT_SETUP_MAX ? 0 : 1;
484}
485
486/**
487 * netdev_boot_setup_check - check boot time settings
488 * @dev: the netdevice
489 *
490 * Check boot time settings for the device.
491 * The found settings are set for the device to be used
492 * later in the device probing.
493 * Returns 0 if no settings found, 1 if they are.
494 */
495int netdev_boot_setup_check(struct net_device *dev)
496{
497 struct netdev_boot_setup *s = dev_boot_setup;
498 int i;
499
500 for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) {
501 if (s[i].name[0] != '\0' && s[i].name[0] != ' ' &&
93b3cff9 502 !strcmp(dev->name, s[i].name)) {
1da177e4
LT
503 dev->irq = s[i].map.irq;
504 dev->base_addr = s[i].map.base_addr;
505 dev->mem_start = s[i].map.mem_start;
506 dev->mem_end = s[i].map.mem_end;
507 return 1;
508 }
509 }
510 return 0;
511}
512
513
514/**
515 * netdev_boot_base - get address from boot time settings
516 * @prefix: prefix for network device
517 * @unit: id for network device
518 *
519 * Check boot time settings for the base address of device.
520 * The found settings are set for the device to be used
521 * later in the device probing.
522 * Returns 0 if no settings found.
523 */
524unsigned long netdev_boot_base(const char *prefix, int unit)
525{
526 const struct netdev_boot_setup *s = dev_boot_setup;
527 char name[IFNAMSIZ];
528 int i;
529
530 sprintf(name, "%s%d", prefix, unit);
531
532 /*
533 * If device already registered then return base of 1
534 * to indicate not to probe for this interface
535 */
881d966b 536 if (__dev_get_by_name(&init_net, name))
1da177e4
LT
537 return 1;
538
539 for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++)
540 if (!strcmp(name, s[i].name))
541 return s[i].map.base_addr;
542 return 0;
543}
544
545/*
546 * Saves at boot time configured settings for any netdevice.
547 */
548int __init netdev_boot_setup(char *str)
549{
550 int ints[5];
551 struct ifmap map;
552
553 str = get_options(str, ARRAY_SIZE(ints), ints);
554 if (!str || !*str)
555 return 0;
556
557 /* Save settings */
558 memset(&map, 0, sizeof(map));
559 if (ints[0] > 0)
560 map.irq = ints[1];
561 if (ints[0] > 1)
562 map.base_addr = ints[2];
563 if (ints[0] > 2)
564 map.mem_start = ints[3];
565 if (ints[0] > 3)
566 map.mem_end = ints[4];
567
568 /* Add new entry to the list */
569 return netdev_boot_setup_add(str, &map);
570}
571
572__setup("netdev=", netdev_boot_setup);
573
574/*******************************************************************************
575
576 Device Interface Subroutines
577
578*******************************************************************************/
579
580/**
581 * __dev_get_by_name - find a device by its name
c4ea43c5 582 * @net: the applicable net namespace
1da177e4
LT
583 * @name: name to find
584 *
585 * Find an interface by name. Must be called under RTNL semaphore
586 * or @dev_base_lock. If the name is found a pointer to the device
587 * is returned. If the name is not found then %NULL is returned. The
588 * reference counters are not incremented so the caller must be
589 * careful with locks.
590 */
591
881d966b 592struct net_device *__dev_get_by_name(struct net *net, const char *name)
1da177e4
LT
593{
594 struct hlist_node *p;
595
881d966b 596 hlist_for_each(p, dev_name_hash(net, name)) {
1da177e4
LT
597 struct net_device *dev
598 = hlist_entry(p, struct net_device, name_hlist);
599 if (!strncmp(dev->name, name, IFNAMSIZ))
600 return dev;
601 }
602 return NULL;
603}
604
605/**
606 * dev_get_by_name - find a device by its name
c4ea43c5 607 * @net: the applicable net namespace
1da177e4
LT
608 * @name: name to find
609 *
610 * Find an interface by name. This can be called from any
611 * context and does its own locking. The returned handle has
612 * the usage count incremented and the caller must use dev_put() to
613 * release it when it is no longer needed. %NULL is returned if no
614 * matching device is found.
615 */
616
881d966b 617struct net_device *dev_get_by_name(struct net *net, const char *name)
1da177e4
LT
618{
619 struct net_device *dev;
620
621 read_lock(&dev_base_lock);
881d966b 622 dev = __dev_get_by_name(net, name);
1da177e4
LT
623 if (dev)
624 dev_hold(dev);
625 read_unlock(&dev_base_lock);
626 return dev;
627}
628
629/**
630 * __dev_get_by_index - find a device by its ifindex
c4ea43c5 631 * @net: the applicable net namespace
1da177e4
LT
632 * @ifindex: index of device
633 *
634 * Search for an interface by index. Returns %NULL if the device
635 * is not found or a pointer to the device. The device has not
636 * had its reference counter increased so the caller must be careful
637 * about locking. The caller must hold either the RTNL semaphore
638 * or @dev_base_lock.
639 */
640
881d966b 641struct net_device *__dev_get_by_index(struct net *net, int ifindex)
1da177e4
LT
642{
643 struct hlist_node *p;
644
881d966b 645 hlist_for_each(p, dev_index_hash(net, ifindex)) {
1da177e4
LT
646 struct net_device *dev
647 = hlist_entry(p, struct net_device, index_hlist);
648 if (dev->ifindex == ifindex)
649 return dev;
650 }
651 return NULL;
652}
653
654
655/**
656 * dev_get_by_index - find a device by its ifindex
c4ea43c5 657 * @net: the applicable net namespace
1da177e4
LT
658 * @ifindex: index of device
659 *
660 * Search for an interface by index. Returns NULL if the device
661 * is not found or a pointer to the device. The device returned has
662 * had a reference added and the pointer is safe until the user calls
663 * dev_put to indicate they have finished with it.
664 */
665
881d966b 666struct net_device *dev_get_by_index(struct net *net, int ifindex)
1da177e4
LT
667{
668 struct net_device *dev;
669
670 read_lock(&dev_base_lock);
881d966b 671 dev = __dev_get_by_index(net, ifindex);
1da177e4
LT
672 if (dev)
673 dev_hold(dev);
674 read_unlock(&dev_base_lock);
675 return dev;
676}
677
678/**
679 * dev_getbyhwaddr - find a device by its hardware address
c4ea43c5 680 * @net: the applicable net namespace
1da177e4
LT
681 * @type: media type of device
682 * @ha: hardware address
683 *
684 * Search for an interface by MAC address. Returns NULL if the device
685 * is not found or a pointer to the device. The caller must hold the
686 * rtnl semaphore. The returned device has not had its ref count increased
687 * and the caller must therefore be careful about locking
688 *
689 * BUGS:
690 * If the API was consistent this would be __dev_get_by_hwaddr
691 */
692
881d966b 693struct net_device *dev_getbyhwaddr(struct net *net, unsigned short type, char *ha)
1da177e4
LT
694{
695 struct net_device *dev;
696
697 ASSERT_RTNL();
698
81103a52 699 for_each_netdev(net, dev)
1da177e4
LT
700 if (dev->type == type &&
701 !memcmp(dev->dev_addr, ha, dev->addr_len))
7562f876
PE
702 return dev;
703
704 return NULL;
1da177e4
LT
705}
706
cf309e3f
JF
707EXPORT_SYMBOL(dev_getbyhwaddr);
708
881d966b 709struct net_device *__dev_getfirstbyhwtype(struct net *net, unsigned short type)
1da177e4
LT
710{
711 struct net_device *dev;
712
4e9cac2b 713 ASSERT_RTNL();
881d966b 714 for_each_netdev(net, dev)
4e9cac2b 715 if (dev->type == type)
7562f876
PE
716 return dev;
717
718 return NULL;
4e9cac2b
PM
719}
720
721EXPORT_SYMBOL(__dev_getfirstbyhwtype);
722
881d966b 723struct net_device *dev_getfirstbyhwtype(struct net *net, unsigned short type)
4e9cac2b
PM
724{
725 struct net_device *dev;
726
727 rtnl_lock();
881d966b 728 dev = __dev_getfirstbyhwtype(net, type);
4e9cac2b
PM
729 if (dev)
730 dev_hold(dev);
1da177e4
LT
731 rtnl_unlock();
732 return dev;
733}
734
735EXPORT_SYMBOL(dev_getfirstbyhwtype);
736
737/**
738 * dev_get_by_flags - find any device with given flags
c4ea43c5 739 * @net: the applicable net namespace
1da177e4
LT
740 * @if_flags: IFF_* values
741 * @mask: bitmask of bits in if_flags to check
742 *
743 * Search for any interface with the given flags. Returns NULL if a device
4ec93edb 744 * is not found or a pointer to the device. The device returned has
1da177e4
LT
745 * had a reference added and the pointer is safe until the user calls
746 * dev_put to indicate they have finished with it.
747 */
748
881d966b 749struct net_device * dev_get_by_flags(struct net *net, unsigned short if_flags, unsigned short mask)
1da177e4 750{
7562f876 751 struct net_device *dev, *ret;
1da177e4 752
7562f876 753 ret = NULL;
1da177e4 754 read_lock(&dev_base_lock);
881d966b 755 for_each_netdev(net, dev) {
1da177e4
LT
756 if (((dev->flags ^ if_flags) & mask) == 0) {
757 dev_hold(dev);
7562f876 758 ret = dev;
1da177e4
LT
759 break;
760 }
761 }
762 read_unlock(&dev_base_lock);
7562f876 763 return ret;
1da177e4
LT
764}
765
766/**
767 * dev_valid_name - check if name is okay for network device
768 * @name: name string
769 *
770 * Network device names need to be valid file names to
c7fa9d18
DM
771 * to allow sysfs to work. We also disallow any kind of
772 * whitespace.
1da177e4 773 */
c2373ee9 774int dev_valid_name(const char *name)
1da177e4 775{
c7fa9d18
DM
776 if (*name == '\0')
777 return 0;
b6fe17d6
SH
778 if (strlen(name) >= IFNAMSIZ)
779 return 0;
c7fa9d18
DM
780 if (!strcmp(name, ".") || !strcmp(name, ".."))
781 return 0;
782
783 while (*name) {
784 if (*name == '/' || isspace(*name))
785 return 0;
786 name++;
787 }
788 return 1;
1da177e4
LT
789}
790
791/**
b267b179
EB
792 * __dev_alloc_name - allocate a name for a device
793 * @net: network namespace to allocate the device name in
1da177e4 794 * @name: name format string
b267b179 795 * @buf: scratch buffer and result name string
1da177e4
LT
796 *
797 * Passed a format string - eg "lt%d" it will try and find a suitable
3041a069
SH
798 * id. It scans list of devices to build up a free map, then chooses
799 * the first empty slot. The caller must hold the dev_base or rtnl lock
800 * while allocating the name and adding the device in order to avoid
801 * duplicates.
802 * Limited to bits_per_byte * page size devices (ie 32K on most platforms).
803 * Returns the number of the unit assigned or a negative errno code.
1da177e4
LT
804 */
805
b267b179 806static int __dev_alloc_name(struct net *net, const char *name, char *buf)
1da177e4
LT
807{
808 int i = 0;
1da177e4
LT
809 const char *p;
810 const int max_netdevices = 8*PAGE_SIZE;
cfcabdcc 811 unsigned long *inuse;
1da177e4
LT
812 struct net_device *d;
813
814 p = strnchr(name, IFNAMSIZ-1, '%');
815 if (p) {
816 /*
817 * Verify the string as this thing may have come from
818 * the user. There must be either one "%d" and no other "%"
819 * characters.
820 */
821 if (p[1] != 'd' || strchr(p + 2, '%'))
822 return -EINVAL;
823
824 /* Use one page as a bit array of possible slots */
cfcabdcc 825 inuse = (unsigned long *) get_zeroed_page(GFP_ATOMIC);
1da177e4
LT
826 if (!inuse)
827 return -ENOMEM;
828
881d966b 829 for_each_netdev(net, d) {
1da177e4
LT
830 if (!sscanf(d->name, name, &i))
831 continue;
832 if (i < 0 || i >= max_netdevices)
833 continue;
834
835 /* avoid cases where sscanf is not exact inverse of printf */
b267b179 836 snprintf(buf, IFNAMSIZ, name, i);
1da177e4
LT
837 if (!strncmp(buf, d->name, IFNAMSIZ))
838 set_bit(i, inuse);
839 }
840
841 i = find_first_zero_bit(inuse, max_netdevices);
842 free_page((unsigned long) inuse);
843 }
844
b267b179
EB
845 snprintf(buf, IFNAMSIZ, name, i);
846 if (!__dev_get_by_name(net, buf))
1da177e4 847 return i;
1da177e4
LT
848
849 /* It is possible to run out of possible slots
850 * when the name is long and there isn't enough space left
851 * for the digits, or if all bits are used.
852 */
853 return -ENFILE;
854}
855
b267b179
EB
856/**
857 * dev_alloc_name - allocate a name for a device
858 * @dev: device
859 * @name: name format string
860 *
861 * Passed a format string - eg "lt%d" it will try and find a suitable
862 * id. It scans list of devices to build up a free map, then chooses
863 * the first empty slot. The caller must hold the dev_base or rtnl lock
864 * while allocating the name and adding the device in order to avoid
865 * duplicates.
866 * Limited to bits_per_byte * page size devices (ie 32K on most platforms).
867 * Returns the number of the unit assigned or a negative errno code.
868 */
869
870int dev_alloc_name(struct net_device *dev, const char *name)
871{
872 char buf[IFNAMSIZ];
873 struct net *net;
874 int ret;
875
c346dca1
YH
876 BUG_ON(!dev_net(dev));
877 net = dev_net(dev);
b267b179
EB
878 ret = __dev_alloc_name(net, name, buf);
879 if (ret >= 0)
880 strlcpy(dev->name, buf, IFNAMSIZ);
881 return ret;
882}
883
1da177e4
LT
884
885/**
886 * dev_change_name - change name of a device
887 * @dev: device
888 * @newname: name (or format string) must be at least IFNAMSIZ
889 *
890 * Change name of a device, can pass format strings "eth%d".
891 * for wildcarding.
892 */
cf04a4c7 893int dev_change_name(struct net_device *dev, const char *newname)
1da177e4 894{
fcc5a03a 895 char oldname[IFNAMSIZ];
1da177e4 896 int err = 0;
fcc5a03a 897 int ret;
881d966b 898 struct net *net;
1da177e4
LT
899
900 ASSERT_RTNL();
c346dca1 901 BUG_ON(!dev_net(dev));
1da177e4 902
c346dca1 903 net = dev_net(dev);
1da177e4
LT
904 if (dev->flags & IFF_UP)
905 return -EBUSY;
906
907 if (!dev_valid_name(newname))
908 return -EINVAL;
909
c8d90dca
SH
910 if (strncmp(newname, dev->name, IFNAMSIZ) == 0)
911 return 0;
912
fcc5a03a
HX
913 memcpy(oldname, dev->name, IFNAMSIZ);
914
1da177e4
LT
915 if (strchr(newname, '%')) {
916 err = dev_alloc_name(dev, newname);
917 if (err < 0)
918 return err;
1da177e4 919 }
881d966b 920 else if (__dev_get_by_name(net, newname))
1da177e4
LT
921 return -EEXIST;
922 else
923 strlcpy(dev->name, newname, IFNAMSIZ);
924
fcc5a03a 925rollback:
3891845e
EB
926 /* For now only devices in the initial network namespace
927 * are in sysfs.
928 */
929 if (net == &init_net) {
930 ret = device_rename(&dev->dev, dev->name);
931 if (ret) {
932 memcpy(dev->name, oldname, IFNAMSIZ);
933 return ret;
934 }
dcc99773 935 }
7f988eab
HX
936
937 write_lock_bh(&dev_base_lock);
92749821 938 hlist_del(&dev->name_hlist);
881d966b 939 hlist_add_head(&dev->name_hlist, dev_name_hash(net, dev->name));
7f988eab
HX
940 write_unlock_bh(&dev_base_lock);
941
056925ab 942 ret = call_netdevice_notifiers(NETDEV_CHANGENAME, dev);
fcc5a03a
HX
943 ret = notifier_to_errno(ret);
944
945 if (ret) {
946 if (err) {
947 printk(KERN_ERR
948 "%s: name change rollback failed: %d.\n",
949 dev->name, ret);
950 } else {
951 err = ret;
952 memcpy(dev->name, oldname, IFNAMSIZ);
953 goto rollback;
954 }
955 }
1da177e4
LT
956
957 return err;
958}
959
0b815a1a
SH
960/**
961 * dev_set_alias - change ifalias of a device
962 * @dev: device
963 * @alias: name up to IFALIASZ
f0db275a 964 * @len: limit of bytes to copy from info
0b815a1a
SH
965 *
966 * Set ifalias for a device,
967 */
968int dev_set_alias(struct net_device *dev, const char *alias, size_t len)
969{
970 ASSERT_RTNL();
971
972 if (len >= IFALIASZ)
973 return -EINVAL;
974
96ca4a2c
OH
975 if (!len) {
976 if (dev->ifalias) {
977 kfree(dev->ifalias);
978 dev->ifalias = NULL;
979 }
980 return 0;
981 }
982
0b815a1a
SH
983 dev->ifalias = krealloc(dev->ifalias, len+1, GFP_KERNEL);
984 if (!dev->ifalias)
985 return -ENOMEM;
986
987 strlcpy(dev->ifalias, alias, len+1);
988 return len;
989}
990
991
d8a33ac4 992/**
3041a069 993 * netdev_features_change - device changes features
d8a33ac4
SH
994 * @dev: device to cause notification
995 *
996 * Called to indicate a device has changed features.
997 */
998void netdev_features_change(struct net_device *dev)
999{
056925ab 1000 call_netdevice_notifiers(NETDEV_FEAT_CHANGE, dev);
d8a33ac4
SH
1001}
1002EXPORT_SYMBOL(netdev_features_change);
1003
1da177e4
LT
1004/**
1005 * netdev_state_change - device changes state
1006 * @dev: device to cause notification
1007 *
1008 * Called to indicate a device has changed state. This function calls
1009 * the notifier chains for netdev_chain and sends a NEWLINK message
1010 * to the routing socket.
1011 */
1012void netdev_state_change(struct net_device *dev)
1013{
1014 if (dev->flags & IFF_UP) {
056925ab 1015 call_netdevice_notifiers(NETDEV_CHANGE, dev);
1da177e4
LT
1016 rtmsg_ifinfo(RTM_NEWLINK, dev, 0);
1017 }
1018}
1019
c1da4ac7
OG
1020void netdev_bonding_change(struct net_device *dev)
1021{
1022 call_netdevice_notifiers(NETDEV_BONDING_FAILOVER, dev);
1023}
1024EXPORT_SYMBOL(netdev_bonding_change);
1025
1da177e4
LT
1026/**
1027 * dev_load - load a network module
c4ea43c5 1028 * @net: the applicable net namespace
1da177e4
LT
1029 * @name: name of interface
1030 *
1031 * If a network interface is not present and the process has suitable
1032 * privileges this function loads the module. If module loading is not
1033 * available in this kernel then it becomes a nop.
1034 */
1035
881d966b 1036void dev_load(struct net *net, const char *name)
1da177e4 1037{
4ec93edb 1038 struct net_device *dev;
1da177e4
LT
1039
1040 read_lock(&dev_base_lock);
881d966b 1041 dev = __dev_get_by_name(net, name);
1da177e4
LT
1042 read_unlock(&dev_base_lock);
1043
1044 if (!dev && capable(CAP_SYS_MODULE))
1045 request_module("%s", name);
1046}
1047
1da177e4
LT
1048/**
1049 * dev_open - prepare an interface for use.
1050 * @dev: device to open
1051 *
1052 * Takes a device from down to up state. The device's private open
1053 * function is invoked and then the multicast lists are loaded. Finally
1054 * the device is moved into the up state and a %NETDEV_UP message is
1055 * sent to the netdev notifier chain.
1056 *
1057 * Calling this function on an active interface is a nop. On a failure
1058 * a negative errno code is returned.
1059 */
1060int dev_open(struct net_device *dev)
1061{
d314774c 1062 const struct net_device_ops *ops = dev->netdev_ops;
1da177e4
LT
1063 int ret = 0;
1064
e46b66bc
BH
1065 ASSERT_RTNL();
1066
1da177e4
LT
1067 /*
1068 * Is it already up?
1069 */
1070
1071 if (dev->flags & IFF_UP)
1072 return 0;
1073
1074 /*
1075 * Is it even present?
1076 */
1077 if (!netif_device_present(dev))
1078 return -ENODEV;
1079
1080 /*
1081 * Call device private open method
1082 */
1083 set_bit(__LINK_STATE_START, &dev->state);
bada339b 1084
d314774c
SH
1085 if (ops->ndo_validate_addr)
1086 ret = ops->ndo_validate_addr(dev);
bada339b 1087
d314774c
SH
1088 if (!ret && ops->ndo_open)
1089 ret = ops->ndo_open(dev);
1da177e4 1090
4ec93edb 1091 /*
1da177e4
LT
1092 * If it went open OK then:
1093 */
1094
bada339b
JG
1095 if (ret)
1096 clear_bit(__LINK_STATE_START, &dev->state);
1097 else {
1da177e4
LT
1098 /*
1099 * Set the flags.
1100 */
1101 dev->flags |= IFF_UP;
1102
1103 /*
1104 * Initialize multicasting status
1105 */
4417da66 1106 dev_set_rx_mode(dev);
1da177e4
LT
1107
1108 /*
1109 * Wakeup transmit queue engine
1110 */
1111 dev_activate(dev);
1112
1113 /*
1114 * ... and announce new interface.
1115 */
056925ab 1116 call_netdevice_notifiers(NETDEV_UP, dev);
1da177e4 1117 }
bada339b 1118
1da177e4
LT
1119 return ret;
1120}
1121
1122/**
1123 * dev_close - shutdown an interface.
1124 * @dev: device to shutdown
1125 *
1126 * This function moves an active device into down state. A
1127 * %NETDEV_GOING_DOWN is sent to the netdev notifier chain. The device
1128 * is then deactivated and finally a %NETDEV_DOWN is sent to the notifier
1129 * chain.
1130 */
1131int dev_close(struct net_device *dev)
1132{
d314774c 1133 const struct net_device_ops *ops = dev->netdev_ops;
e46b66bc
BH
1134 ASSERT_RTNL();
1135
9d5010db
DM
1136 might_sleep();
1137
1da177e4
LT
1138 if (!(dev->flags & IFF_UP))
1139 return 0;
1140
1141 /*
1142 * Tell people we are going down, so that they can
1143 * prepare to death, when device is still operating.
1144 */
056925ab 1145 call_netdevice_notifiers(NETDEV_GOING_DOWN, dev);
1da177e4 1146
1da177e4
LT
1147 clear_bit(__LINK_STATE_START, &dev->state);
1148
1149 /* Synchronize to scheduled poll. We cannot touch poll list,
bea3348e
SH
1150 * it can be even on different cpu. So just clear netif_running().
1151 *
1152 * dev->stop() will invoke napi_disable() on all of it's
1153 * napi_struct instances on this device.
1154 */
1da177e4 1155 smp_mb__after_clear_bit(); /* Commit netif_running(). */
1da177e4 1156
d8b2a4d2
ML
1157 dev_deactivate(dev);
1158
1da177e4
LT
1159 /*
1160 * Call the device specific close. This cannot fail.
1161 * Only if device is UP
1162 *
1163 * We allow it to be called even after a DETACH hot-plug
1164 * event.
1165 */
d314774c
SH
1166 if (ops->ndo_stop)
1167 ops->ndo_stop(dev);
1da177e4
LT
1168
1169 /*
1170 * Device is now down.
1171 */
1172
1173 dev->flags &= ~IFF_UP;
1174
1175 /*
1176 * Tell people we are down
1177 */
056925ab 1178 call_netdevice_notifiers(NETDEV_DOWN, dev);
1da177e4
LT
1179
1180 return 0;
1181}
1182
1183
0187bdfb
BH
1184/**
1185 * dev_disable_lro - disable Large Receive Offload on a device
1186 * @dev: device
1187 *
1188 * Disable Large Receive Offload (LRO) on a net device. Must be
1189 * called under RTNL. This is needed if received packets may be
1190 * forwarded to another interface.
1191 */
1192void dev_disable_lro(struct net_device *dev)
1193{
1194 if (dev->ethtool_ops && dev->ethtool_ops->get_flags &&
1195 dev->ethtool_ops->set_flags) {
1196 u32 flags = dev->ethtool_ops->get_flags(dev);
1197 if (flags & ETH_FLAG_LRO) {
1198 flags &= ~ETH_FLAG_LRO;
1199 dev->ethtool_ops->set_flags(dev, flags);
1200 }
1201 }
1202 WARN_ON(dev->features & NETIF_F_LRO);
1203}
1204EXPORT_SYMBOL(dev_disable_lro);
1205
1206
881d966b
EB
1207static int dev_boot_phase = 1;
1208
1da177e4
LT
1209/*
1210 * Device change register/unregister. These are not inline or static
1211 * as we export them to the world.
1212 */
1213
1214/**
1215 * register_netdevice_notifier - register a network notifier block
1216 * @nb: notifier
1217 *
1218 * Register a notifier to be called when network device events occur.
1219 * The notifier passed is linked into the kernel structures and must
1220 * not be reused until it has been unregistered. A negative errno code
1221 * is returned on a failure.
1222 *
1223 * When registered all registration and up events are replayed
4ec93edb 1224 * to the new notifier to allow device to have a race free
1da177e4
LT
1225 * view of the network device list.
1226 */
1227
1228int register_netdevice_notifier(struct notifier_block *nb)
1229{
1230 struct net_device *dev;
fcc5a03a 1231 struct net_device *last;
881d966b 1232 struct net *net;
1da177e4
LT
1233 int err;
1234
1235 rtnl_lock();
f07d5b94 1236 err = raw_notifier_chain_register(&netdev_chain, nb);
fcc5a03a
HX
1237 if (err)
1238 goto unlock;
881d966b
EB
1239 if (dev_boot_phase)
1240 goto unlock;
1241 for_each_net(net) {
1242 for_each_netdev(net, dev) {
1243 err = nb->notifier_call(nb, NETDEV_REGISTER, dev);
1244 err = notifier_to_errno(err);
1245 if (err)
1246 goto rollback;
1247
1248 if (!(dev->flags & IFF_UP))
1249 continue;
1da177e4 1250
881d966b
EB
1251 nb->notifier_call(nb, NETDEV_UP, dev);
1252 }
1da177e4 1253 }
fcc5a03a
HX
1254
1255unlock:
1da177e4
LT
1256 rtnl_unlock();
1257 return err;
fcc5a03a
HX
1258
1259rollback:
1260 last = dev;
881d966b
EB
1261 for_each_net(net) {
1262 for_each_netdev(net, dev) {
1263 if (dev == last)
1264 break;
fcc5a03a 1265
881d966b
EB
1266 if (dev->flags & IFF_UP) {
1267 nb->notifier_call(nb, NETDEV_GOING_DOWN, dev);
1268 nb->notifier_call(nb, NETDEV_DOWN, dev);
1269 }
1270 nb->notifier_call(nb, NETDEV_UNREGISTER, dev);
fcc5a03a 1271 }
fcc5a03a 1272 }
c67625a1
PE
1273
1274 raw_notifier_chain_unregister(&netdev_chain, nb);
fcc5a03a 1275 goto unlock;
1da177e4
LT
1276}
1277
1278/**
1279 * unregister_netdevice_notifier - unregister a network notifier block
1280 * @nb: notifier
1281 *
1282 * Unregister a notifier previously registered by
1283 * register_netdevice_notifier(). The notifier is unlinked into the
1284 * kernel structures and may then be reused. A negative errno code
1285 * is returned on a failure.
1286 */
1287
1288int unregister_netdevice_notifier(struct notifier_block *nb)
1289{
9f514950
HX
1290 int err;
1291
1292 rtnl_lock();
f07d5b94 1293 err = raw_notifier_chain_unregister(&netdev_chain, nb);
9f514950
HX
1294 rtnl_unlock();
1295 return err;
1da177e4
LT
1296}
1297
1298/**
1299 * call_netdevice_notifiers - call all network notifier blocks
1300 * @val: value passed unmodified to notifier function
c4ea43c5 1301 * @dev: net_device pointer passed unmodified to notifier function
1da177e4
LT
1302 *
1303 * Call all network notifier blocks. Parameters and return value
f07d5b94 1304 * are as for raw_notifier_call_chain().
1da177e4
LT
1305 */
1306
ad7379d4 1307int call_netdevice_notifiers(unsigned long val, struct net_device *dev)
1da177e4 1308{
ad7379d4 1309 return raw_notifier_call_chain(&netdev_chain, val, dev);
1da177e4
LT
1310}
1311
1312/* When > 0 there are consumers of rx skb time stamps */
1313static atomic_t netstamp_needed = ATOMIC_INIT(0);
1314
1315void net_enable_timestamp(void)
1316{
1317 atomic_inc(&netstamp_needed);
1318}
1319
1320void net_disable_timestamp(void)
1321{
1322 atomic_dec(&netstamp_needed);
1323}
1324
a61bbcf2 1325static inline void net_timestamp(struct sk_buff *skb)
1da177e4
LT
1326{
1327 if (atomic_read(&netstamp_needed))
a61bbcf2 1328 __net_timestamp(skb);
b7aa0bf7
ED
1329 else
1330 skb->tstamp.tv64 = 0;
1da177e4
LT
1331}
1332
1333/*
1334 * Support routine. Sends outgoing frames to any network
1335 * taps currently in use.
1336 */
1337
f6a78bfc 1338static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
1da177e4
LT
1339{
1340 struct packet_type *ptype;
a61bbcf2
PM
1341
1342 net_timestamp(skb);
1da177e4
LT
1343
1344 rcu_read_lock();
1345 list_for_each_entry_rcu(ptype, &ptype_all, list) {
1346 /* Never send packets back to the socket
1347 * they originated from - MvS (miquels@drinkel.ow.org)
1348 */
1349 if ((ptype->dev == dev || !ptype->dev) &&
1350 (ptype->af_packet_priv == NULL ||
1351 (struct sock *)ptype->af_packet_priv != skb->sk)) {
1352 struct sk_buff *skb2= skb_clone(skb, GFP_ATOMIC);
1353 if (!skb2)
1354 break;
1355
1356 /* skb->nh should be correctly
1357 set by sender, so that the second statement is
1358 just protection against buggy protocols.
1359 */
459a98ed 1360 skb_reset_mac_header(skb2);
1da177e4 1361
d56f90a7 1362 if (skb_network_header(skb2) < skb2->data ||
27a884dc 1363 skb2->network_header > skb2->tail) {
1da177e4
LT
1364 if (net_ratelimit())
1365 printk(KERN_CRIT "protocol %04x is "
1366 "buggy, dev %s\n",
1367 skb2->protocol, dev->name);
c1d2bbe1 1368 skb_reset_network_header(skb2);
1da177e4
LT
1369 }
1370
b0e380b1 1371 skb2->transport_header = skb2->network_header;
1da177e4 1372 skb2->pkt_type = PACKET_OUTGOING;
f2ccd8fa 1373 ptype->func(skb2, skb->dev, ptype, skb->dev);
1da177e4
LT
1374 }
1375 }
1376 rcu_read_unlock();
1377}
1378
56079431 1379
def82a1d 1380static inline void __netif_reschedule(struct Qdisc *q)
56079431 1381{
def82a1d
JP
1382 struct softnet_data *sd;
1383 unsigned long flags;
56079431 1384
def82a1d
JP
1385 local_irq_save(flags);
1386 sd = &__get_cpu_var(softnet_data);
1387 q->next_sched = sd->output_queue;
1388 sd->output_queue = q;
1389 raise_softirq_irqoff(NET_TX_SOFTIRQ);
1390 local_irq_restore(flags);
1391}
1392
1393void __netif_schedule(struct Qdisc *q)
1394{
1395 if (!test_and_set_bit(__QDISC_STATE_SCHED, &q->state))
1396 __netif_reschedule(q);
56079431
DV
1397}
1398EXPORT_SYMBOL(__netif_schedule);
1399
bea3348e 1400void dev_kfree_skb_irq(struct sk_buff *skb)
56079431 1401{
bea3348e
SH
1402 if (atomic_dec_and_test(&skb->users)) {
1403 struct softnet_data *sd;
1404 unsigned long flags;
56079431 1405
bea3348e
SH
1406 local_irq_save(flags);
1407 sd = &__get_cpu_var(softnet_data);
1408 skb->next = sd->completion_queue;
1409 sd->completion_queue = skb;
1410 raise_softirq_irqoff(NET_TX_SOFTIRQ);
1411 local_irq_restore(flags);
1412 }
56079431 1413}
bea3348e 1414EXPORT_SYMBOL(dev_kfree_skb_irq);
56079431
DV
1415
1416void dev_kfree_skb_any(struct sk_buff *skb)
1417{
1418 if (in_irq() || irqs_disabled())
1419 dev_kfree_skb_irq(skb);
1420 else
1421 dev_kfree_skb(skb);
1422}
1423EXPORT_SYMBOL(dev_kfree_skb_any);
1424
1425
bea3348e
SH
1426/**
1427 * netif_device_detach - mark device as removed
1428 * @dev: network device
1429 *
1430 * Mark device as removed from system and therefore no longer available.
1431 */
56079431
DV
1432void netif_device_detach(struct net_device *dev)
1433{
1434 if (test_and_clear_bit(__LINK_STATE_PRESENT, &dev->state) &&
1435 netif_running(dev)) {
1436 netif_stop_queue(dev);
1437 }
1438}
1439EXPORT_SYMBOL(netif_device_detach);
1440
bea3348e
SH
1441/**
1442 * netif_device_attach - mark device as attached
1443 * @dev: network device
1444 *
1445 * Mark device as attached from system and restart if needed.
1446 */
56079431
DV
1447void netif_device_attach(struct net_device *dev)
1448{
1449 if (!test_and_set_bit(__LINK_STATE_PRESENT, &dev->state) &&
1450 netif_running(dev)) {
1451 netif_wake_queue(dev);
4ec93edb 1452 __netdev_watchdog_up(dev);
56079431
DV
1453 }
1454}
1455EXPORT_SYMBOL(netif_device_attach);
1456
6de329e2
BH
1457static bool can_checksum_protocol(unsigned long features, __be16 protocol)
1458{
1459 return ((features & NETIF_F_GEN_CSUM) ||
1460 ((features & NETIF_F_IP_CSUM) &&
1461 protocol == htons(ETH_P_IP)) ||
1462 ((features & NETIF_F_IPV6_CSUM) &&
1463 protocol == htons(ETH_P_IPV6)));
1464}
1465
1466static bool dev_can_checksum(struct net_device *dev, struct sk_buff *skb)
1467{
1468 if (can_checksum_protocol(dev->features, skb->protocol))
1469 return true;
1470
1471 if (skb->protocol == htons(ETH_P_8021Q)) {
1472 struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data;
1473 if (can_checksum_protocol(dev->features & dev->vlan_features,
1474 veh->h_vlan_encapsulated_proto))
1475 return true;
1476 }
1477
1478 return false;
1479}
56079431 1480
1da177e4
LT
1481/*
1482 * Invalidate hardware checksum when packet is to be mangled, and
1483 * complete checksum manually on outgoing path.
1484 */
84fa7933 1485int skb_checksum_help(struct sk_buff *skb)
1da177e4 1486{
d3bc23e7 1487 __wsum csum;
663ead3b 1488 int ret = 0, offset;
1da177e4 1489
84fa7933 1490 if (skb->ip_summed == CHECKSUM_COMPLETE)
a430a43d
HX
1491 goto out_set_summed;
1492
1493 if (unlikely(skb_shinfo(skb)->gso_size)) {
a430a43d
HX
1494 /* Let GSO fix up the checksum. */
1495 goto out_set_summed;
1da177e4
LT
1496 }
1497
a030847e
HX
1498 offset = skb->csum_start - skb_headroom(skb);
1499 BUG_ON(offset >= skb_headlen(skb));
1500 csum = skb_checksum(skb, offset, skb->len - offset, 0);
1501
1502 offset += skb->csum_offset;
1503 BUG_ON(offset + sizeof(__sum16) > skb_headlen(skb));
1504
1505 if (skb_cloned(skb) &&
1506 !skb_clone_writable(skb, offset + sizeof(__sum16))) {
1da177e4
LT
1507 ret = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
1508 if (ret)
1509 goto out;
1510 }
1511
a030847e 1512 *(__sum16 *)(skb->data + offset) = csum_fold(csum);
a430a43d 1513out_set_summed:
1da177e4 1514 skb->ip_summed = CHECKSUM_NONE;
4ec93edb 1515out:
1da177e4
LT
1516 return ret;
1517}
1518
f6a78bfc
HX
1519/**
1520 * skb_gso_segment - Perform segmentation on skb.
1521 * @skb: buffer to segment
576a30eb 1522 * @features: features for the output path (see dev->features)
f6a78bfc
HX
1523 *
1524 * This function segments the given skb and returns a list of segments.
576a30eb
HX
1525 *
1526 * It may return NULL if the skb requires no segmentation. This is
1527 * only possible when GSO is used for verifying header integrity.
f6a78bfc 1528 */
576a30eb 1529struct sk_buff *skb_gso_segment(struct sk_buff *skb, int features)
f6a78bfc
HX
1530{
1531 struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT);
1532 struct packet_type *ptype;
252e3346 1533 __be16 type = skb->protocol;
a430a43d 1534 int err;
f6a78bfc
HX
1535
1536 BUG_ON(skb_shinfo(skb)->frag_list);
f6a78bfc 1537
459a98ed 1538 skb_reset_mac_header(skb);
b0e380b1 1539 skb->mac_len = skb->network_header - skb->mac_header;
f6a78bfc
HX
1540 __skb_pull(skb, skb->mac_len);
1541
f9d106a6 1542 if (WARN_ON(skb->ip_summed != CHECKSUM_PARTIAL)) {
a430a43d
HX
1543 if (skb_header_cloned(skb) &&
1544 (err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC)))
1545 return ERR_PTR(err);
1546 }
1547
f6a78bfc 1548 rcu_read_lock();
82d8a867
PE
1549 list_for_each_entry_rcu(ptype,
1550 &ptype_base[ntohs(type) & PTYPE_HASH_MASK], list) {
f6a78bfc 1551 if (ptype->type == type && !ptype->dev && ptype->gso_segment) {
84fa7933 1552 if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) {
a430a43d
HX
1553 err = ptype->gso_send_check(skb);
1554 segs = ERR_PTR(err);
1555 if (err || skb_gso_ok(skb, features))
1556 break;
d56f90a7
ACM
1557 __skb_push(skb, (skb->data -
1558 skb_network_header(skb)));
a430a43d 1559 }
576a30eb 1560 segs = ptype->gso_segment(skb, features);
f6a78bfc
HX
1561 break;
1562 }
1563 }
1564 rcu_read_unlock();
1565
98e399f8 1566 __skb_push(skb, skb->data - skb_mac_header(skb));
576a30eb 1567
f6a78bfc
HX
1568 return segs;
1569}
1570
1571EXPORT_SYMBOL(skb_gso_segment);
1572
fb286bb2
HX
1573/* Take action when hardware reception checksum errors are detected. */
1574#ifdef CONFIG_BUG
1575void netdev_rx_csum_fault(struct net_device *dev)
1576{
1577 if (net_ratelimit()) {
4ec93edb 1578 printk(KERN_ERR "%s: hw csum failure.\n",
246a4212 1579 dev ? dev->name : "<unknown>");
fb286bb2
HX
1580 dump_stack();
1581 }
1582}
1583EXPORT_SYMBOL(netdev_rx_csum_fault);
1584#endif
1585
1da177e4
LT
1586/* Actually, we should eliminate this check as soon as we know, that:
1587 * 1. IOMMU is present and allows to map all the memory.
1588 * 2. No high memory really exists on this machine.
1589 */
1590
1591static inline int illegal_highdma(struct net_device *dev, struct sk_buff *skb)
1592{
3d3a8533 1593#ifdef CONFIG_HIGHMEM
1da177e4
LT
1594 int i;
1595
1596 if (dev->features & NETIF_F_HIGHDMA)
1597 return 0;
1598
1599 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
1600 if (PageHighMem(skb_shinfo(skb)->frags[i].page))
1601 return 1;
1602
3d3a8533 1603#endif
1da177e4
LT
1604 return 0;
1605}
1da177e4 1606
f6a78bfc
HX
1607struct dev_gso_cb {
1608 void (*destructor)(struct sk_buff *skb);
1609};
1610
1611#define DEV_GSO_CB(skb) ((struct dev_gso_cb *)(skb)->cb)
1612
1613static void dev_gso_skb_destructor(struct sk_buff *skb)
1614{
1615 struct dev_gso_cb *cb;
1616
1617 do {
1618 struct sk_buff *nskb = skb->next;
1619
1620 skb->next = nskb->next;
1621 nskb->next = NULL;
1622 kfree_skb(nskb);
1623 } while (skb->next);
1624
1625 cb = DEV_GSO_CB(skb);
1626 if (cb->destructor)
1627 cb->destructor(skb);
1628}
1629
1630/**
1631 * dev_gso_segment - Perform emulated hardware segmentation on skb.
1632 * @skb: buffer to segment
1633 *
1634 * This function segments the given skb and stores the list of segments
1635 * in skb->next.
1636 */
1637static int dev_gso_segment(struct sk_buff *skb)
1638{
1639 struct net_device *dev = skb->dev;
1640 struct sk_buff *segs;
576a30eb
HX
1641 int features = dev->features & ~(illegal_highdma(dev, skb) ?
1642 NETIF_F_SG : 0);
1643
1644 segs = skb_gso_segment(skb, features);
1645
1646 /* Verifying header integrity only. */
1647 if (!segs)
1648 return 0;
f6a78bfc 1649
801678c5 1650 if (IS_ERR(segs))
f6a78bfc
HX
1651 return PTR_ERR(segs);
1652
1653 skb->next = segs;
1654 DEV_GSO_CB(skb)->destructor = skb->destructor;
1655 skb->destructor = dev_gso_skb_destructor;
1656
1657 return 0;
1658}
1659
fd2ea0a7
DM
1660int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
1661 struct netdev_queue *txq)
f6a78bfc
HX
1662{
1663 if (likely(!skb->next)) {
9be9a6b9 1664 if (!list_empty(&ptype_all))
f6a78bfc
HX
1665 dev_queue_xmit_nit(skb, dev);
1666
576a30eb
HX
1667 if (netif_needs_gso(dev, skb)) {
1668 if (unlikely(dev_gso_segment(skb)))
1669 goto out_kfree_skb;
1670 if (skb->next)
1671 goto gso;
1672 }
f6a78bfc 1673
576a30eb 1674 return dev->hard_start_xmit(skb, dev);
f6a78bfc
HX
1675 }
1676
576a30eb 1677gso:
f6a78bfc
HX
1678 do {
1679 struct sk_buff *nskb = skb->next;
1680 int rc;
1681
1682 skb->next = nskb->next;
1683 nskb->next = NULL;
1684 rc = dev->hard_start_xmit(nskb, dev);
1685 if (unlikely(rc)) {
f54d9e8d 1686 nskb->next = skb->next;
f6a78bfc
HX
1687 skb->next = nskb;
1688 return rc;
1689 }
fd2ea0a7 1690 if (unlikely(netif_tx_queue_stopped(txq) && skb->next))
f54d9e8d 1691 return NETDEV_TX_BUSY;
f6a78bfc 1692 } while (skb->next);
4ec93edb 1693
f6a78bfc
HX
1694 skb->destructor = DEV_GSO_CB(skb)->destructor;
1695
1696out_kfree_skb:
1697 kfree_skb(skb);
1698 return 0;
1699}
1700
b6b2fed1
DM
1701static u32 simple_tx_hashrnd;
1702static int simple_tx_hashrnd_initialized = 0;
1703
8f0f2223
DM
1704static u16 simple_tx_hash(struct net_device *dev, struct sk_buff *skb)
1705{
b6b2fed1
DM
1706 u32 addr1, addr2, ports;
1707 u32 hash, ihl;
ad55dcaf 1708 u8 ip_proto = 0;
b6b2fed1
DM
1709
1710 if (unlikely(!simple_tx_hashrnd_initialized)) {
1711 get_random_bytes(&simple_tx_hashrnd, 4);
1712 simple_tx_hashrnd_initialized = 1;
1713 }
8f0f2223
DM
1714
1715 switch (skb->protocol) {
60678040 1716 case htons(ETH_P_IP):
ad55dcaf
AD
1717 if (!(ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)))
1718 ip_proto = ip_hdr(skb)->protocol;
b6b2fed1
DM
1719 addr1 = ip_hdr(skb)->saddr;
1720 addr2 = ip_hdr(skb)->daddr;
8f0f2223 1721 ihl = ip_hdr(skb)->ihl;
8f0f2223 1722 break;
60678040 1723 case htons(ETH_P_IPV6):
8f0f2223 1724 ip_proto = ipv6_hdr(skb)->nexthdr;
b6b2fed1
DM
1725 addr1 = ipv6_hdr(skb)->saddr.s6_addr32[3];
1726 addr2 = ipv6_hdr(skb)->daddr.s6_addr32[3];
8f0f2223 1727 ihl = (40 >> 2);
8f0f2223
DM
1728 break;
1729 default:
1730 return 0;
1731 }
1732
8f0f2223
DM
1733
1734 switch (ip_proto) {
1735 case IPPROTO_TCP:
1736 case IPPROTO_UDP:
1737 case IPPROTO_DCCP:
1738 case IPPROTO_ESP:
1739 case IPPROTO_AH:
1740 case IPPROTO_SCTP:
1741 case IPPROTO_UDPLITE:
b6b2fed1 1742 ports = *((u32 *) (skb_network_header(skb) + (ihl * 4)));
8f0f2223
DM
1743 break;
1744
1745 default:
b6b2fed1 1746 ports = 0;
8f0f2223
DM
1747 break;
1748 }
1749
b6b2fed1
DM
1750 hash = jhash_3words(addr1, addr2, ports, simple_tx_hashrnd);
1751
1752 return (u16) (((u64) hash * dev->real_num_tx_queues) >> 32);
8f0f2223
DM
1753}
1754
e8a0464c
DM
1755static struct netdev_queue *dev_pick_tx(struct net_device *dev,
1756 struct sk_buff *skb)
1757{
fd2ea0a7
DM
1758 u16 queue_index = 0;
1759
eae792b7
DM
1760 if (dev->select_queue)
1761 queue_index = dev->select_queue(dev, skb);
8f0f2223
DM
1762 else if (dev->real_num_tx_queues > 1)
1763 queue_index = simple_tx_hash(dev, skb);
eae792b7 1764
fd2ea0a7
DM
1765 skb_set_queue_mapping(skb, queue_index);
1766 return netdev_get_tx_queue(dev, queue_index);
e8a0464c
DM
1767}
1768
d29f749e
DJ
1769/**
1770 * dev_queue_xmit - transmit a buffer
1771 * @skb: buffer to transmit
1772 *
1773 * Queue a buffer for transmission to a network device. The caller must
1774 * have set the device and priority and built the buffer before calling
1775 * this function. The function can be called from an interrupt.
1776 *
1777 * A negative errno code is returned on a failure. A success does not
1778 * guarantee the frame will be transmitted as it may be dropped due
1779 * to congestion or traffic shaping.
1780 *
1781 * -----------------------------------------------------------------------------------
1782 * I notice this method can also return errors from the queue disciplines,
1783 * including NET_XMIT_DROP, which is a positive value. So, errors can also
1784 * be positive.
1785 *
1786 * Regardless of the return value, the skb is consumed, so it is currently
1787 * difficult to retry a send to this method. (You can bump the ref count
1788 * before sending to hold a reference for retry if you are careful.)
1789 *
1790 * When calling this method, interrupts MUST be enabled. This is because
1791 * the BH enable code must have IRQs enabled so that it will not deadlock.
1792 * --BLG
1793 */
1da177e4
LT
1794int dev_queue_xmit(struct sk_buff *skb)
1795{
1796 struct net_device *dev = skb->dev;
dc2b4847 1797 struct netdev_queue *txq;
1da177e4
LT
1798 struct Qdisc *q;
1799 int rc = -ENOMEM;
1800
f6a78bfc
HX
1801 /* GSO will handle the following emulations directly. */
1802 if (netif_needs_gso(dev, skb))
1803 goto gso;
1804
1da177e4
LT
1805 if (skb_shinfo(skb)->frag_list &&
1806 !(dev->features & NETIF_F_FRAGLIST) &&
364c6bad 1807 __skb_linearize(skb))
1da177e4
LT
1808 goto out_kfree_skb;
1809
1810 /* Fragmented skb is linearized if device does not support SG,
1811 * or if at least one of fragments is in highmem and device
1812 * does not support DMA from it.
1813 */
1814 if (skb_shinfo(skb)->nr_frags &&
1815 (!(dev->features & NETIF_F_SG) || illegal_highdma(dev, skb)) &&
364c6bad 1816 __skb_linearize(skb))
1da177e4
LT
1817 goto out_kfree_skb;
1818
1819 /* If packet is not checksummed and device does not support
1820 * checksumming for this protocol, complete checksumming here.
1821 */
663ead3b
HX
1822 if (skb->ip_summed == CHECKSUM_PARTIAL) {
1823 skb_set_transport_header(skb, skb->csum_start -
1824 skb_headroom(skb));
6de329e2
BH
1825 if (!dev_can_checksum(dev, skb) && skb_checksum_help(skb))
1826 goto out_kfree_skb;
663ead3b 1827 }
1da177e4 1828
f6a78bfc 1829gso:
4ec93edb
YH
1830 /* Disable soft irqs for various locks below. Also
1831 * stops preemption for RCU.
1da177e4 1832 */
4ec93edb 1833 rcu_read_lock_bh();
1da177e4 1834
eae792b7 1835 txq = dev_pick_tx(dev, skb);
b0e1e646 1836 q = rcu_dereference(txq->qdisc);
37437bb2 1837
1da177e4
LT
1838#ifdef CONFIG_NET_CLS_ACT
1839 skb->tc_verd = SET_TC_AT(skb->tc_verd,AT_EGRESS);
1840#endif
1841 if (q->enqueue) {
5fb66229 1842 spinlock_t *root_lock = qdisc_lock(q);
37437bb2
DM
1843
1844 spin_lock(root_lock);
1845
a9312ae8 1846 if (unlikely(test_bit(__QDISC_STATE_DEACTIVATED, &q->state))) {
96d20316 1847 kfree_skb(skb);
a9312ae8 1848 rc = NET_XMIT_DROP;
96d20316
DM
1849 } else {
1850 rc = qdisc_enqueue_root(skb, q);
1851 qdisc_run(q);
a9312ae8 1852 }
37437bb2
DM
1853 spin_unlock(root_lock);
1854
37437bb2 1855 goto out;
1da177e4
LT
1856 }
1857
1858 /* The device has no queue. Common case for software devices:
1859 loopback, all the sorts of tunnels...
1860
932ff279
HX
1861 Really, it is unlikely that netif_tx_lock protection is necessary
1862 here. (f.e. loopback and IP tunnels are clean ignoring statistics
1da177e4
LT
1863 counters.)
1864 However, it is possible, that they rely on protection
1865 made by us here.
1866
1867 Check this and shot the lock. It is not prone from deadlocks.
1868 Either shot noqueue qdisc, it is even simpler 8)
1869 */
1870 if (dev->flags & IFF_UP) {
1871 int cpu = smp_processor_id(); /* ok because BHs are off */
1872
c773e847 1873 if (txq->xmit_lock_owner != cpu) {
1da177e4 1874
c773e847 1875 HARD_TX_LOCK(dev, txq, cpu);
1da177e4 1876
fd2ea0a7 1877 if (!netif_tx_queue_stopped(txq)) {
1da177e4 1878 rc = 0;
fd2ea0a7 1879 if (!dev_hard_start_xmit(skb, dev, txq)) {
c773e847 1880 HARD_TX_UNLOCK(dev, txq);
1da177e4
LT
1881 goto out;
1882 }
1883 }
c773e847 1884 HARD_TX_UNLOCK(dev, txq);
1da177e4
LT
1885 if (net_ratelimit())
1886 printk(KERN_CRIT "Virtual device %s asks to "
1887 "queue packet!\n", dev->name);
1888 } else {
1889 /* Recursion is detected! It is possible,
1890 * unfortunately */
1891 if (net_ratelimit())
1892 printk(KERN_CRIT "Dead loop on virtual device "
1893 "%s, fix it urgently!\n", dev->name);
1894 }
1895 }
1896
1897 rc = -ENETDOWN;
d4828d85 1898 rcu_read_unlock_bh();
1da177e4
LT
1899
1900out_kfree_skb:
1901 kfree_skb(skb);
1902 return rc;
1903out:
d4828d85 1904 rcu_read_unlock_bh();
1da177e4
LT
1905 return rc;
1906}
1907
1908
1909/*=======================================================================
1910 Receiver routines
1911 =======================================================================*/
1912
6b2bedc3
SH
1913int netdev_max_backlog __read_mostly = 1000;
1914int netdev_budget __read_mostly = 300;
1915int weight_p __read_mostly = 64; /* old backlog weight */
1da177e4
LT
1916
1917DEFINE_PER_CPU(struct netif_rx_stats, netdev_rx_stat) = { 0, };
1918
1919
1da177e4
LT
1920/**
1921 * netif_rx - post buffer to the network code
1922 * @skb: buffer to post
1923 *
1924 * This function receives a packet from a device driver and queues it for
1925 * the upper (protocol) levels to process. It always succeeds. The buffer
1926 * may be dropped during processing for congestion control or by the
1927 * protocol layers.
1928 *
1929 * return values:
1930 * NET_RX_SUCCESS (no congestion)
1da177e4
LT
1931 * NET_RX_DROP (packet was dropped)
1932 *
1933 */
1934
1935int netif_rx(struct sk_buff *skb)
1936{
1da177e4
LT
1937 struct softnet_data *queue;
1938 unsigned long flags;
1939
1940 /* if netpoll wants it, pretend we never saw it */
1941 if (netpoll_rx(skb))
1942 return NET_RX_DROP;
1943
b7aa0bf7 1944 if (!skb->tstamp.tv64)
a61bbcf2 1945 net_timestamp(skb);
1da177e4
LT
1946
1947 /*
1948 * The code is rearranged so that the path is the most
1949 * short when CPU is congested, but is still operating.
1950 */
1951 local_irq_save(flags);
1da177e4
LT
1952 queue = &__get_cpu_var(softnet_data);
1953
1954 __get_cpu_var(netdev_rx_stat).total++;
1955 if (queue->input_pkt_queue.qlen <= netdev_max_backlog) {
1956 if (queue->input_pkt_queue.qlen) {
1da177e4 1957enqueue:
1da177e4 1958 __skb_queue_tail(&queue->input_pkt_queue, skb);
1da177e4 1959 local_irq_restore(flags);
34008d8c 1960 return NET_RX_SUCCESS;
1da177e4
LT
1961 }
1962
bea3348e 1963 napi_schedule(&queue->backlog);
1da177e4
LT
1964 goto enqueue;
1965 }
1966
1da177e4
LT
1967 __get_cpu_var(netdev_rx_stat).dropped++;
1968 local_irq_restore(flags);
1969
1970 kfree_skb(skb);
1971 return NET_RX_DROP;
1972}
1973
1974int netif_rx_ni(struct sk_buff *skb)
1975{
1976 int err;
1977
1978 preempt_disable();
1979 err = netif_rx(skb);
1980 if (local_softirq_pending())
1981 do_softirq();
1982 preempt_enable();
1983
1984 return err;
1985}
1986
1987EXPORT_SYMBOL(netif_rx_ni);
1988
1da177e4
LT
1989static void net_tx_action(struct softirq_action *h)
1990{
1991 struct softnet_data *sd = &__get_cpu_var(softnet_data);
1992
1993 if (sd->completion_queue) {
1994 struct sk_buff *clist;
1995
1996 local_irq_disable();
1997 clist = sd->completion_queue;
1998 sd->completion_queue = NULL;
1999 local_irq_enable();
2000
2001 while (clist) {
2002 struct sk_buff *skb = clist;
2003 clist = clist->next;
2004
547b792c 2005 WARN_ON(atomic_read(&skb->users));
1da177e4
LT
2006 __kfree_skb(skb);
2007 }
2008 }
2009
2010 if (sd->output_queue) {
37437bb2 2011 struct Qdisc *head;
1da177e4
LT
2012
2013 local_irq_disable();
2014 head = sd->output_queue;
2015 sd->output_queue = NULL;
2016 local_irq_enable();
2017
2018 while (head) {
37437bb2
DM
2019 struct Qdisc *q = head;
2020 spinlock_t *root_lock;
2021
1da177e4
LT
2022 head = head->next_sched;
2023
5fb66229 2024 root_lock = qdisc_lock(q);
37437bb2 2025 if (spin_trylock(root_lock)) {
def82a1d
JP
2026 smp_mb__before_clear_bit();
2027 clear_bit(__QDISC_STATE_SCHED,
2028 &q->state);
37437bb2
DM
2029 qdisc_run(q);
2030 spin_unlock(root_lock);
1da177e4 2031 } else {
195648bb 2032 if (!test_bit(__QDISC_STATE_DEACTIVATED,
e8a83e10 2033 &q->state)) {
195648bb 2034 __netif_reschedule(q);
e8a83e10
JP
2035 } else {
2036 smp_mb__before_clear_bit();
2037 clear_bit(__QDISC_STATE_SCHED,
2038 &q->state);
2039 }
1da177e4
LT
2040 }
2041 }
2042 }
2043}
2044
6f05f629
SH
2045static inline int deliver_skb(struct sk_buff *skb,
2046 struct packet_type *pt_prev,
2047 struct net_device *orig_dev)
1da177e4
LT
2048{
2049 atomic_inc(&skb->users);
f2ccd8fa 2050 return pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
1da177e4
LT
2051}
2052
2053#if defined(CONFIG_BRIDGE) || defined (CONFIG_BRIDGE_MODULE)
6229e362 2054/* These hooks defined here for ATM */
1da177e4
LT
2055struct net_bridge;
2056struct net_bridge_fdb_entry *(*br_fdb_get_hook)(struct net_bridge *br,
2057 unsigned char *addr);
6229e362 2058void (*br_fdb_put_hook)(struct net_bridge_fdb_entry *ent) __read_mostly;
1da177e4 2059
6229e362
SH
2060/*
2061 * If bridge module is loaded call bridging hook.
2062 * returns NULL if packet was consumed.
2063 */
2064struct sk_buff *(*br_handle_frame_hook)(struct net_bridge_port *p,
2065 struct sk_buff *skb) __read_mostly;
2066static inline struct sk_buff *handle_bridge(struct sk_buff *skb,
2067 struct packet_type **pt_prev, int *ret,
2068 struct net_device *orig_dev)
1da177e4
LT
2069{
2070 struct net_bridge_port *port;
2071
6229e362
SH
2072 if (skb->pkt_type == PACKET_LOOPBACK ||
2073 (port = rcu_dereference(skb->dev->br_port)) == NULL)
2074 return skb;
1da177e4
LT
2075
2076 if (*pt_prev) {
6229e362 2077 *ret = deliver_skb(skb, *pt_prev, orig_dev);
1da177e4 2078 *pt_prev = NULL;
4ec93edb
YH
2079 }
2080
6229e362 2081 return br_handle_frame_hook(port, skb);
1da177e4
LT
2082}
2083#else
6229e362 2084#define handle_bridge(skb, pt_prev, ret, orig_dev) (skb)
1da177e4
LT
2085#endif
2086
b863ceb7
PM
2087#if defined(CONFIG_MACVLAN) || defined(CONFIG_MACVLAN_MODULE)
2088struct sk_buff *(*macvlan_handle_frame_hook)(struct sk_buff *skb) __read_mostly;
2089EXPORT_SYMBOL_GPL(macvlan_handle_frame_hook);
2090
2091static inline struct sk_buff *handle_macvlan(struct sk_buff *skb,
2092 struct packet_type **pt_prev,
2093 int *ret,
2094 struct net_device *orig_dev)
2095{
2096 if (skb->dev->macvlan_port == NULL)
2097 return skb;
2098
2099 if (*pt_prev) {
2100 *ret = deliver_skb(skb, *pt_prev, orig_dev);
2101 *pt_prev = NULL;
2102 }
2103 return macvlan_handle_frame_hook(skb);
2104}
2105#else
2106#define handle_macvlan(skb, pt_prev, ret, orig_dev) (skb)
2107#endif
2108
1da177e4
LT
2109#ifdef CONFIG_NET_CLS_ACT
2110/* TODO: Maybe we should just force sch_ingress to be compiled in
2111 * when CONFIG_NET_CLS_ACT is? otherwise some useless instructions
2112 * a compare and 2 stores extra right now if we dont have it on
2113 * but have CONFIG_NET_CLS_ACT
4ec93edb 2114 * NOTE: This doesnt stop any functionality; if you dont have
1da177e4
LT
2115 * the ingress scheduler, you just cant add policies on ingress.
2116 *
2117 */
4ec93edb 2118static int ing_filter(struct sk_buff *skb)
1da177e4 2119{
1da177e4 2120 struct net_device *dev = skb->dev;
f697c3e8 2121 u32 ttl = G_TC_RTTL(skb->tc_verd);
555353cf
DM
2122 struct netdev_queue *rxq;
2123 int result = TC_ACT_OK;
2124 struct Qdisc *q;
4ec93edb 2125
f697c3e8
HX
2126 if (MAX_RED_LOOP < ttl++) {
2127 printk(KERN_WARNING
2128 "Redir loop detected Dropping packet (%d->%d)\n",
2129 skb->iif, dev->ifindex);
2130 return TC_ACT_SHOT;
2131 }
1da177e4 2132
f697c3e8
HX
2133 skb->tc_verd = SET_TC_RTTL(skb->tc_verd, ttl);
2134 skb->tc_verd = SET_TC_AT(skb->tc_verd, AT_INGRESS);
1da177e4 2135
555353cf
DM
2136 rxq = &dev->rx_queue;
2137
83874000 2138 q = rxq->qdisc;
8d50b53d 2139 if (q != &noop_qdisc) {
83874000 2140 spin_lock(qdisc_lock(q));
a9312ae8
DM
2141 if (likely(!test_bit(__QDISC_STATE_DEACTIVATED, &q->state)))
2142 result = qdisc_enqueue_root(skb, q);
83874000
DM
2143 spin_unlock(qdisc_lock(q));
2144 }
f697c3e8
HX
2145
2146 return result;
2147}
86e65da9 2148
f697c3e8
HX
2149static inline struct sk_buff *handle_ing(struct sk_buff *skb,
2150 struct packet_type **pt_prev,
2151 int *ret, struct net_device *orig_dev)
2152{
8d50b53d 2153 if (skb->dev->rx_queue.qdisc == &noop_qdisc)
f697c3e8 2154 goto out;
1da177e4 2155
f697c3e8
HX
2156 if (*pt_prev) {
2157 *ret = deliver_skb(skb, *pt_prev, orig_dev);
2158 *pt_prev = NULL;
2159 } else {
2160 /* Huh? Why does turning on AF_PACKET affect this? */
2161 skb->tc_verd = SET_TC_OK2MUNGE(skb->tc_verd);
1da177e4
LT
2162 }
2163
f697c3e8
HX
2164 switch (ing_filter(skb)) {
2165 case TC_ACT_SHOT:
2166 case TC_ACT_STOLEN:
2167 kfree_skb(skb);
2168 return NULL;
2169 }
2170
2171out:
2172 skb->tc_verd = 0;
2173 return skb;
1da177e4
LT
2174}
2175#endif
2176
bc1d0411
PM
2177/*
2178 * netif_nit_deliver - deliver received packets to network taps
2179 * @skb: buffer
2180 *
2181 * This function is used to deliver incoming packets to network
2182 * taps. It should be used when the normal netif_receive_skb path
2183 * is bypassed, for example because of VLAN acceleration.
2184 */
2185void netif_nit_deliver(struct sk_buff *skb)
2186{
2187 struct packet_type *ptype;
2188
2189 if (list_empty(&ptype_all))
2190 return;
2191
2192 skb_reset_network_header(skb);
2193 skb_reset_transport_header(skb);
2194 skb->mac_len = skb->network_header - skb->mac_header;
2195
2196 rcu_read_lock();
2197 list_for_each_entry_rcu(ptype, &ptype_all, list) {
2198 if (!ptype->dev || ptype->dev == skb->dev)
2199 deliver_skb(skb, ptype, skb->dev);
2200 }
2201 rcu_read_unlock();
2202}
2203
3b582cc1
SH
2204/**
2205 * netif_receive_skb - process receive buffer from network
2206 * @skb: buffer to process
2207 *
2208 * netif_receive_skb() is the main receive data processing function.
2209 * It always succeeds. The buffer may be dropped during processing
2210 * for congestion control or by the protocol layers.
2211 *
2212 * This function may only be called from softirq context and interrupts
2213 * should be enabled.
2214 *
2215 * Return values (usually ignored):
2216 * NET_RX_SUCCESS: no congestion
2217 * NET_RX_DROP: packet was dropped
2218 */
1da177e4
LT
2219int netif_receive_skb(struct sk_buff *skb)
2220{
2221 struct packet_type *ptype, *pt_prev;
f2ccd8fa 2222 struct net_device *orig_dev;
0d7a3681 2223 struct net_device *null_or_orig;
1da177e4 2224 int ret = NET_RX_DROP;
252e3346 2225 __be16 type;
1da177e4 2226
9b22ea56
PM
2227 if (skb->vlan_tci && vlan_hwaccel_do_receive(skb))
2228 return NET_RX_SUCCESS;
2229
1da177e4 2230 /* if we've gotten here through NAPI, check netpoll */
bea3348e 2231 if (netpoll_receive_skb(skb))
1da177e4
LT
2232 return NET_RX_DROP;
2233
b7aa0bf7 2234 if (!skb->tstamp.tv64)
a61bbcf2 2235 net_timestamp(skb);
1da177e4 2236
c01003c2
PM
2237 if (!skb->iif)
2238 skb->iif = skb->dev->ifindex;
86e65da9 2239
0d7a3681 2240 null_or_orig = NULL;
cc9bd5ce
JE
2241 orig_dev = skb->dev;
2242 if (orig_dev->master) {
0d7a3681
JE
2243 if (skb_bond_should_drop(skb))
2244 null_or_orig = orig_dev; /* deliver only exact match */
2245 else
2246 skb->dev = orig_dev->master;
cc9bd5ce 2247 }
8f903c70 2248
1da177e4
LT
2249 __get_cpu_var(netdev_rx_stat).total++;
2250
c1d2bbe1 2251 skb_reset_network_header(skb);
badff6d0 2252 skb_reset_transport_header(skb);
b0e380b1 2253 skb->mac_len = skb->network_header - skb->mac_header;
1da177e4
LT
2254
2255 pt_prev = NULL;
2256
2257 rcu_read_lock();
2258
b9f75f45 2259 /* Don't receive packets in an exiting network namespace */
0a36b345
EB
2260 if (!net_alive(dev_net(skb->dev))) {
2261 kfree_skb(skb);
b9f75f45 2262 goto out;
0a36b345 2263 }
b9f75f45 2264
1da177e4
LT
2265#ifdef CONFIG_NET_CLS_ACT
2266 if (skb->tc_verd & TC_NCLS) {
2267 skb->tc_verd = CLR_TC_NCLS(skb->tc_verd);
2268 goto ncls;
2269 }
2270#endif
2271
2272 list_for_each_entry_rcu(ptype, &ptype_all, list) {
f982307f
JE
2273 if (ptype->dev == null_or_orig || ptype->dev == skb->dev ||
2274 ptype->dev == orig_dev) {
4ec93edb 2275 if (pt_prev)
f2ccd8fa 2276 ret = deliver_skb(skb, pt_prev, orig_dev);
1da177e4
LT
2277 pt_prev = ptype;
2278 }
2279 }
2280
2281#ifdef CONFIG_NET_CLS_ACT
f697c3e8
HX
2282 skb = handle_ing(skb, &pt_prev, &ret, orig_dev);
2283 if (!skb)
1da177e4 2284 goto out;
1da177e4
LT
2285ncls:
2286#endif
2287
6229e362 2288 skb = handle_bridge(skb, &pt_prev, &ret, orig_dev);
b863ceb7
PM
2289 if (!skb)
2290 goto out;
2291 skb = handle_macvlan(skb, &pt_prev, &ret, orig_dev);
6229e362 2292 if (!skb)
1da177e4
LT
2293 goto out;
2294
2295 type = skb->protocol;
82d8a867
PE
2296 list_for_each_entry_rcu(ptype,
2297 &ptype_base[ntohs(type) & PTYPE_HASH_MASK], list) {
1da177e4 2298 if (ptype->type == type &&
f982307f
JE
2299 (ptype->dev == null_or_orig || ptype->dev == skb->dev ||
2300 ptype->dev == orig_dev)) {
4ec93edb 2301 if (pt_prev)
f2ccd8fa 2302 ret = deliver_skb(skb, pt_prev, orig_dev);
1da177e4
LT
2303 pt_prev = ptype;
2304 }
2305 }
2306
2307 if (pt_prev) {
f2ccd8fa 2308 ret = pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
1da177e4
LT
2309 } else {
2310 kfree_skb(skb);
2311 /* Jamal, now you will not able to escape explaining
2312 * me how you were going to use this. :-)
2313 */
2314 ret = NET_RX_DROP;
2315 }
2316
2317out:
2318 rcu_read_unlock();
2319 return ret;
2320}
2321
6e583ce5
SH
2322/* Network device is going away, flush any packets still pending */
2323static void flush_backlog(void *arg)
2324{
2325 struct net_device *dev = arg;
2326 struct softnet_data *queue = &__get_cpu_var(softnet_data);
2327 struct sk_buff *skb, *tmp;
2328
2329 skb_queue_walk_safe(&queue->input_pkt_queue, skb, tmp)
2330 if (skb->dev == dev) {
2331 __skb_unlink(skb, &queue->input_pkt_queue);
2332 kfree_skb(skb);
2333 }
2334}
2335
bea3348e 2336static int process_backlog(struct napi_struct *napi, int quota)
1da177e4
LT
2337{
2338 int work = 0;
1da177e4
LT
2339 struct softnet_data *queue = &__get_cpu_var(softnet_data);
2340 unsigned long start_time = jiffies;
2341
bea3348e
SH
2342 napi->weight = weight_p;
2343 do {
1da177e4 2344 struct sk_buff *skb;
1da177e4
LT
2345
2346 local_irq_disable();
2347 skb = __skb_dequeue(&queue->input_pkt_queue);
bea3348e
SH
2348 if (!skb) {
2349 __napi_complete(napi);
2350 local_irq_enable();
2351 break;
2352 }
1da177e4
LT
2353 local_irq_enable();
2354
1da177e4 2355 netif_receive_skb(skb);
bea3348e 2356 } while (++work < quota && jiffies == start_time);
1da177e4 2357
bea3348e
SH
2358 return work;
2359}
1da177e4 2360
bea3348e
SH
2361/**
2362 * __napi_schedule - schedule for receive
c4ea43c5 2363 * @n: entry to schedule
bea3348e
SH
2364 *
2365 * The entry's receive function will be scheduled to run
2366 */
b5606c2d 2367void __napi_schedule(struct napi_struct *n)
bea3348e
SH
2368{
2369 unsigned long flags;
1da177e4 2370
bea3348e
SH
2371 local_irq_save(flags);
2372 list_add_tail(&n->poll_list, &__get_cpu_var(softnet_data).poll_list);
2373 __raise_softirq_irqoff(NET_RX_SOFTIRQ);
2374 local_irq_restore(flags);
1da177e4 2375}
bea3348e
SH
2376EXPORT_SYMBOL(__napi_schedule);
2377
1da177e4
LT
2378
2379static void net_rx_action(struct softirq_action *h)
2380{
bea3348e 2381 struct list_head *list = &__get_cpu_var(softnet_data).poll_list;
24f8b238 2382 unsigned long time_limit = jiffies + 2;
51b0bded 2383 int budget = netdev_budget;
53fb95d3
MM
2384 void *have;
2385
1da177e4
LT
2386 local_irq_disable();
2387
bea3348e
SH
2388 while (!list_empty(list)) {
2389 struct napi_struct *n;
2390 int work, weight;
1da177e4 2391
bea3348e 2392 /* If softirq window is exhuasted then punt.
24f8b238
SH
2393 * Allow this to run for 2 jiffies since which will allow
2394 * an average latency of 1.5/HZ.
bea3348e 2395 */
24f8b238 2396 if (unlikely(budget <= 0 || time_after(jiffies, time_limit)))
1da177e4
LT
2397 goto softnet_break;
2398
2399 local_irq_enable();
2400
bea3348e
SH
2401 /* Even though interrupts have been re-enabled, this
2402 * access is safe because interrupts can only add new
2403 * entries to the tail of this list, and only ->poll()
2404 * calls can remove this head entry from the list.
2405 */
2406 n = list_entry(list->next, struct napi_struct, poll_list);
1da177e4 2407
bea3348e
SH
2408 have = netpoll_poll_lock(n);
2409
2410 weight = n->weight;
2411
0a7606c1
DM
2412 /* This NAPI_STATE_SCHED test is for avoiding a race
2413 * with netpoll's poll_napi(). Only the entity which
2414 * obtains the lock and sees NAPI_STATE_SCHED set will
2415 * actually make the ->poll() call. Therefore we avoid
2416 * accidently calling ->poll() when NAPI is not scheduled.
2417 */
2418 work = 0;
2419 if (test_bit(NAPI_STATE_SCHED, &n->state))
2420 work = n->poll(n, weight);
bea3348e
SH
2421
2422 WARN_ON_ONCE(work > weight);
2423
2424 budget -= work;
2425
2426 local_irq_disable();
2427
2428 /* Drivers must not modify the NAPI state if they
2429 * consume the entire weight. In such cases this code
2430 * still "owns" the NAPI instance and therefore can
2431 * move the instance around on the list at-will.
2432 */
fed17f30
DM
2433 if (unlikely(work == weight)) {
2434 if (unlikely(napi_disable_pending(n)))
2435 __napi_complete(n);
2436 else
2437 list_move_tail(&n->poll_list, list);
2438 }
bea3348e
SH
2439
2440 netpoll_poll_unlock(have);
1da177e4
LT
2441 }
2442out:
515e06c4 2443 local_irq_enable();
bea3348e 2444
db217334
CL
2445#ifdef CONFIG_NET_DMA
2446 /*
2447 * There may not be any more sk_buffs coming right now, so push
2448 * any pending DMA copies to hardware
2449 */
d379b01e
DW
2450 if (!cpus_empty(net_dma.channel_mask)) {
2451 int chan_idx;
0e12f848 2452 for_each_cpu_mask_nr(chan_idx, net_dma.channel_mask) {
d379b01e
DW
2453 struct dma_chan *chan = net_dma.channels[chan_idx];
2454 if (chan)
2455 dma_async_memcpy_issue_pending(chan);
2456 }
db217334
CL
2457 }
2458#endif
bea3348e 2459
1da177e4
LT
2460 return;
2461
2462softnet_break:
2463 __get_cpu_var(netdev_rx_stat).time_squeeze++;
2464 __raise_softirq_irqoff(NET_RX_SOFTIRQ);
2465 goto out;
2466}
2467
2468static gifconf_func_t * gifconf_list [NPROTO];
2469
2470/**
2471 * register_gifconf - register a SIOCGIF handler
2472 * @family: Address family
2473 * @gifconf: Function handler
2474 *
2475 * Register protocol dependent address dumping routines. The handler
2476 * that is passed must not be freed or reused until it has been replaced
2477 * by another handler.
2478 */
2479int register_gifconf(unsigned int family, gifconf_func_t * gifconf)
2480{
2481 if (family >= NPROTO)
2482 return -EINVAL;
2483 gifconf_list[family] = gifconf;
2484 return 0;
2485}
2486
2487
2488/*
2489 * Map an interface index to its name (SIOCGIFNAME)
2490 */
2491
2492/*
2493 * We need this ioctl for efficient implementation of the
2494 * if_indextoname() function required by the IPv6 API. Without
2495 * it, we would have to search all the interfaces to find a
2496 * match. --pb
2497 */
2498
881d966b 2499static int dev_ifname(struct net *net, struct ifreq __user *arg)
1da177e4
LT
2500{
2501 struct net_device *dev;
2502 struct ifreq ifr;
2503
2504 /*
2505 * Fetch the caller's info block.
2506 */
2507
2508 if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
2509 return -EFAULT;
2510
2511 read_lock(&dev_base_lock);
881d966b 2512 dev = __dev_get_by_index(net, ifr.ifr_ifindex);
1da177e4
LT
2513 if (!dev) {
2514 read_unlock(&dev_base_lock);
2515 return -ENODEV;
2516 }
2517
2518 strcpy(ifr.ifr_name, dev->name);
2519 read_unlock(&dev_base_lock);
2520
2521 if (copy_to_user(arg, &ifr, sizeof(struct ifreq)))
2522 return -EFAULT;
2523 return 0;
2524}
2525
2526/*
2527 * Perform a SIOCGIFCONF call. This structure will change
2528 * size eventually, and there is nothing I can do about it.
2529 * Thus we will need a 'compatibility mode'.
2530 */
2531
881d966b 2532static int dev_ifconf(struct net *net, char __user *arg)
1da177e4
LT
2533{
2534 struct ifconf ifc;
2535 struct net_device *dev;
2536 char __user *pos;
2537 int len;
2538 int total;
2539 int i;
2540
2541 /*
2542 * Fetch the caller's info block.
2543 */
2544
2545 if (copy_from_user(&ifc, arg, sizeof(struct ifconf)))
2546 return -EFAULT;
2547
2548 pos = ifc.ifc_buf;
2549 len = ifc.ifc_len;
2550
2551 /*
2552 * Loop over the interfaces, and write an info block for each.
2553 */
2554
2555 total = 0;
881d966b 2556 for_each_netdev(net, dev) {
1da177e4
LT
2557 for (i = 0; i < NPROTO; i++) {
2558 if (gifconf_list[i]) {
2559 int done;
2560 if (!pos)
2561 done = gifconf_list[i](dev, NULL, 0);
2562 else
2563 done = gifconf_list[i](dev, pos + total,
2564 len - total);
2565 if (done < 0)
2566 return -EFAULT;
2567 total += done;
2568 }
2569 }
4ec93edb 2570 }
1da177e4
LT
2571
2572 /*
2573 * All done. Write the updated control block back to the caller.
2574 */
2575 ifc.ifc_len = total;
2576
2577 /*
2578 * Both BSD and Solaris return 0 here, so we do too.
2579 */
2580 return copy_to_user(arg, &ifc, sizeof(struct ifconf)) ? -EFAULT : 0;
2581}
2582
2583#ifdef CONFIG_PROC_FS
2584/*
2585 * This is invoked by the /proc filesystem handler to display a device
2586 * in detail.
2587 */
7562f876 2588void *dev_seq_start(struct seq_file *seq, loff_t *pos)
9a429c49 2589 __acquires(dev_base_lock)
1da177e4 2590{
e372c414 2591 struct net *net = seq_file_net(seq);
7562f876 2592 loff_t off;
1da177e4 2593 struct net_device *dev;
1da177e4 2594
7562f876
PE
2595 read_lock(&dev_base_lock);
2596 if (!*pos)
2597 return SEQ_START_TOKEN;
1da177e4 2598
7562f876 2599 off = 1;
881d966b 2600 for_each_netdev(net, dev)
7562f876
PE
2601 if (off++ == *pos)
2602 return dev;
1da177e4 2603
7562f876 2604 return NULL;
1da177e4
LT
2605}
2606
2607void *dev_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2608{
e372c414 2609 struct net *net = seq_file_net(seq);
1da177e4 2610 ++*pos;
7562f876 2611 return v == SEQ_START_TOKEN ?
881d966b 2612 first_net_device(net) : next_net_device((struct net_device *)v);
1da177e4
LT
2613}
2614
2615void dev_seq_stop(struct seq_file *seq, void *v)
9a429c49 2616 __releases(dev_base_lock)
1da177e4
LT
2617{
2618 read_unlock(&dev_base_lock);
2619}
2620
2621static void dev_seq_printf_stats(struct seq_file *seq, struct net_device *dev)
2622{
eeda3fd6 2623 const struct net_device_stats *stats = dev_get_stats(dev);
1da177e4 2624
5a1b5898
RR
2625 seq_printf(seq, "%6s:%8lu %7lu %4lu %4lu %4lu %5lu %10lu %9lu "
2626 "%8lu %7lu %4lu %4lu %4lu %5lu %7lu %10lu\n",
2627 dev->name, stats->rx_bytes, stats->rx_packets,
2628 stats->rx_errors,
2629 stats->rx_dropped + stats->rx_missed_errors,
2630 stats->rx_fifo_errors,
2631 stats->rx_length_errors + stats->rx_over_errors +
2632 stats->rx_crc_errors + stats->rx_frame_errors,
2633 stats->rx_compressed, stats->multicast,
2634 stats->tx_bytes, stats->tx_packets,
2635 stats->tx_errors, stats->tx_dropped,
2636 stats->tx_fifo_errors, stats->collisions,
2637 stats->tx_carrier_errors +
2638 stats->tx_aborted_errors +
2639 stats->tx_window_errors +
2640 stats->tx_heartbeat_errors,
2641 stats->tx_compressed);
1da177e4
LT
2642}
2643
2644/*
2645 * Called from the PROCfs module. This now uses the new arbitrary sized
2646 * /proc/net interface to create /proc/net/dev
2647 */
2648static int dev_seq_show(struct seq_file *seq, void *v)
2649{
2650 if (v == SEQ_START_TOKEN)
2651 seq_puts(seq, "Inter-| Receive "
2652 " | Transmit\n"
2653 " face |bytes packets errs drop fifo frame "
2654 "compressed multicast|bytes packets errs "
2655 "drop fifo colls carrier compressed\n");
2656 else
2657 dev_seq_printf_stats(seq, v);
2658 return 0;
2659}
2660
2661static struct netif_rx_stats *softnet_get_online(loff_t *pos)
2662{
2663 struct netif_rx_stats *rc = NULL;
2664
0c0b0aca 2665 while (*pos < nr_cpu_ids)
4ec93edb 2666 if (cpu_online(*pos)) {
1da177e4
LT
2667 rc = &per_cpu(netdev_rx_stat, *pos);
2668 break;
2669 } else
2670 ++*pos;
2671 return rc;
2672}
2673
2674static void *softnet_seq_start(struct seq_file *seq, loff_t *pos)
2675{
2676 return softnet_get_online(pos);
2677}
2678
2679static void *softnet_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2680{
2681 ++*pos;
2682 return softnet_get_online(pos);
2683}
2684
2685static void softnet_seq_stop(struct seq_file *seq, void *v)
2686{
2687}
2688
2689static int softnet_seq_show(struct seq_file *seq, void *v)
2690{
2691 struct netif_rx_stats *s = v;
2692
2693 seq_printf(seq, "%08x %08x %08x %08x %08x %08x %08x %08x %08x\n",
31aa02c5 2694 s->total, s->dropped, s->time_squeeze, 0,
c1ebcdb8
SH
2695 0, 0, 0, 0, /* was fastroute */
2696 s->cpu_collision );
1da177e4
LT
2697 return 0;
2698}
2699
f690808e 2700static const struct seq_operations dev_seq_ops = {
1da177e4
LT
2701 .start = dev_seq_start,
2702 .next = dev_seq_next,
2703 .stop = dev_seq_stop,
2704 .show = dev_seq_show,
2705};
2706
2707static int dev_seq_open(struct inode *inode, struct file *file)
2708{
e372c414
DL
2709 return seq_open_net(inode, file, &dev_seq_ops,
2710 sizeof(struct seq_net_private));
1da177e4
LT
2711}
2712
9a32144e 2713static const struct file_operations dev_seq_fops = {
1da177e4
LT
2714 .owner = THIS_MODULE,
2715 .open = dev_seq_open,
2716 .read = seq_read,
2717 .llseek = seq_lseek,
e372c414 2718 .release = seq_release_net,
1da177e4
LT
2719};
2720
f690808e 2721static const struct seq_operations softnet_seq_ops = {
1da177e4
LT
2722 .start = softnet_seq_start,
2723 .next = softnet_seq_next,
2724 .stop = softnet_seq_stop,
2725 .show = softnet_seq_show,
2726};
2727
2728static int softnet_seq_open(struct inode *inode, struct file *file)
2729{
2730 return seq_open(file, &softnet_seq_ops);
2731}
2732
9a32144e 2733static const struct file_operations softnet_seq_fops = {
1da177e4
LT
2734 .owner = THIS_MODULE,
2735 .open = softnet_seq_open,
2736 .read = seq_read,
2737 .llseek = seq_lseek,
2738 .release = seq_release,
2739};
2740
0e1256ff
SH
2741static void *ptype_get_idx(loff_t pos)
2742{
2743 struct packet_type *pt = NULL;
2744 loff_t i = 0;
2745 int t;
2746
2747 list_for_each_entry_rcu(pt, &ptype_all, list) {
2748 if (i == pos)
2749 return pt;
2750 ++i;
2751 }
2752
82d8a867 2753 for (t = 0; t < PTYPE_HASH_SIZE; t++) {
0e1256ff
SH
2754 list_for_each_entry_rcu(pt, &ptype_base[t], list) {
2755 if (i == pos)
2756 return pt;
2757 ++i;
2758 }
2759 }
2760 return NULL;
2761}
2762
2763static void *ptype_seq_start(struct seq_file *seq, loff_t *pos)
72348a42 2764 __acquires(RCU)
0e1256ff
SH
2765{
2766 rcu_read_lock();
2767 return *pos ? ptype_get_idx(*pos - 1) : SEQ_START_TOKEN;
2768}
2769
2770static void *ptype_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2771{
2772 struct packet_type *pt;
2773 struct list_head *nxt;
2774 int hash;
2775
2776 ++*pos;
2777 if (v == SEQ_START_TOKEN)
2778 return ptype_get_idx(0);
2779
2780 pt = v;
2781 nxt = pt->list.next;
2782 if (pt->type == htons(ETH_P_ALL)) {
2783 if (nxt != &ptype_all)
2784 goto found;
2785 hash = 0;
2786 nxt = ptype_base[0].next;
2787 } else
82d8a867 2788 hash = ntohs(pt->type) & PTYPE_HASH_MASK;
0e1256ff
SH
2789
2790 while (nxt == &ptype_base[hash]) {
82d8a867 2791 if (++hash >= PTYPE_HASH_SIZE)
0e1256ff
SH
2792 return NULL;
2793 nxt = ptype_base[hash].next;
2794 }
2795found:
2796 return list_entry(nxt, struct packet_type, list);
2797}
2798
2799static void ptype_seq_stop(struct seq_file *seq, void *v)
72348a42 2800 __releases(RCU)
0e1256ff
SH
2801{
2802 rcu_read_unlock();
2803}
2804
0e1256ff
SH
2805static int ptype_seq_show(struct seq_file *seq, void *v)
2806{
2807 struct packet_type *pt = v;
2808
2809 if (v == SEQ_START_TOKEN)
2810 seq_puts(seq, "Type Device Function\n");
c346dca1 2811 else if (pt->dev == NULL || dev_net(pt->dev) == seq_file_net(seq)) {
0e1256ff
SH
2812 if (pt->type == htons(ETH_P_ALL))
2813 seq_puts(seq, "ALL ");
2814 else
2815 seq_printf(seq, "%04x", ntohs(pt->type));
2816
908cd2da
AD
2817 seq_printf(seq, " %-8s %pF\n",
2818 pt->dev ? pt->dev->name : "", pt->func);
0e1256ff
SH
2819 }
2820
2821 return 0;
2822}
2823
2824static const struct seq_operations ptype_seq_ops = {
2825 .start = ptype_seq_start,
2826 .next = ptype_seq_next,
2827 .stop = ptype_seq_stop,
2828 .show = ptype_seq_show,
2829};
2830
2831static int ptype_seq_open(struct inode *inode, struct file *file)
2832{
2feb27db
PE
2833 return seq_open_net(inode, file, &ptype_seq_ops,
2834 sizeof(struct seq_net_private));
0e1256ff
SH
2835}
2836
2837static const struct file_operations ptype_seq_fops = {
2838 .owner = THIS_MODULE,
2839 .open = ptype_seq_open,
2840 .read = seq_read,
2841 .llseek = seq_lseek,
2feb27db 2842 .release = seq_release_net,
0e1256ff
SH
2843};
2844
2845
4665079c 2846static int __net_init dev_proc_net_init(struct net *net)
1da177e4
LT
2847{
2848 int rc = -ENOMEM;
2849
881d966b 2850 if (!proc_net_fops_create(net, "dev", S_IRUGO, &dev_seq_fops))
1da177e4 2851 goto out;
881d966b 2852 if (!proc_net_fops_create(net, "softnet_stat", S_IRUGO, &softnet_seq_fops))
1da177e4 2853 goto out_dev;
881d966b 2854 if (!proc_net_fops_create(net, "ptype", S_IRUGO, &ptype_seq_fops))
457c4cbc 2855 goto out_softnet;
0e1256ff 2856
881d966b 2857 if (wext_proc_init(net))
457c4cbc 2858 goto out_ptype;
1da177e4
LT
2859 rc = 0;
2860out:
2861 return rc;
457c4cbc 2862out_ptype:
881d966b 2863 proc_net_remove(net, "ptype");
1da177e4 2864out_softnet:
881d966b 2865 proc_net_remove(net, "softnet_stat");
1da177e4 2866out_dev:
881d966b 2867 proc_net_remove(net, "dev");
1da177e4
LT
2868 goto out;
2869}
881d966b 2870
4665079c 2871static void __net_exit dev_proc_net_exit(struct net *net)
881d966b
EB
2872{
2873 wext_proc_exit(net);
2874
2875 proc_net_remove(net, "ptype");
2876 proc_net_remove(net, "softnet_stat");
2877 proc_net_remove(net, "dev");
2878}
2879
022cbae6 2880static struct pernet_operations __net_initdata dev_proc_ops = {
881d966b
EB
2881 .init = dev_proc_net_init,
2882 .exit = dev_proc_net_exit,
2883};
2884
2885static int __init dev_proc_init(void)
2886{
2887 return register_pernet_subsys(&dev_proc_ops);
2888}
1da177e4
LT
2889#else
2890#define dev_proc_init() 0
2891#endif /* CONFIG_PROC_FS */
2892
2893
2894/**
2895 * netdev_set_master - set up master/slave pair
2896 * @slave: slave device
2897 * @master: new master device
2898 *
2899 * Changes the master device of the slave. Pass %NULL to break the
2900 * bonding. The caller must hold the RTNL semaphore. On a failure
2901 * a negative errno code is returned. On success the reference counts
2902 * are adjusted, %RTM_NEWLINK is sent to the routing socket and the
2903 * function returns zero.
2904 */
2905int netdev_set_master(struct net_device *slave, struct net_device *master)
2906{
2907 struct net_device *old = slave->master;
2908
2909 ASSERT_RTNL();
2910
2911 if (master) {
2912 if (old)
2913 return -EBUSY;
2914 dev_hold(master);
2915 }
2916
2917 slave->master = master;
4ec93edb 2918
1da177e4
LT
2919 synchronize_net();
2920
2921 if (old)
2922 dev_put(old);
2923
2924 if (master)
2925 slave->flags |= IFF_SLAVE;
2926 else
2927 slave->flags &= ~IFF_SLAVE;
2928
2929 rtmsg_ifinfo(RTM_NEWLINK, slave, IFF_SLAVE);
2930 return 0;
2931}
2932
b6c40d68
PM
2933static void dev_change_rx_flags(struct net_device *dev, int flags)
2934{
d314774c
SH
2935 const struct net_device_ops *ops = dev->netdev_ops;
2936
2937 if ((dev->flags & IFF_UP) && ops->ndo_change_rx_flags)
2938 ops->ndo_change_rx_flags(dev, flags);
b6c40d68
PM
2939}
2940
dad9b335 2941static int __dev_set_promiscuity(struct net_device *dev, int inc)
1da177e4
LT
2942{
2943 unsigned short old_flags = dev->flags;
2944
24023451
PM
2945 ASSERT_RTNL();
2946
dad9b335
WC
2947 dev->flags |= IFF_PROMISC;
2948 dev->promiscuity += inc;
2949 if (dev->promiscuity == 0) {
2950 /*
2951 * Avoid overflow.
2952 * If inc causes overflow, untouch promisc and return error.
2953 */
2954 if (inc < 0)
2955 dev->flags &= ~IFF_PROMISC;
2956 else {
2957 dev->promiscuity -= inc;
2958 printk(KERN_WARNING "%s: promiscuity touches roof, "
2959 "set promiscuity failed, promiscuity feature "
2960 "of device might be broken.\n", dev->name);
2961 return -EOVERFLOW;
2962 }
2963 }
52609c0b 2964 if (dev->flags != old_flags) {
1da177e4
LT
2965 printk(KERN_INFO "device %s %s promiscuous mode\n",
2966 dev->name, (dev->flags & IFF_PROMISC) ? "entered" :
4ec93edb 2967 "left");
7759db82
KHK
2968 if (audit_enabled)
2969 audit_log(current->audit_context, GFP_ATOMIC,
2970 AUDIT_ANOM_PROMISCUOUS,
2971 "dev=%s prom=%d old_prom=%d auid=%u uid=%u gid=%u ses=%u",
2972 dev->name, (dev->flags & IFF_PROMISC),
2973 (old_flags & IFF_PROMISC),
2974 audit_get_loginuid(current),
2975 current->uid, current->gid,
2976 audit_get_sessionid(current));
24023451 2977
b6c40d68 2978 dev_change_rx_flags(dev, IFF_PROMISC);
1da177e4 2979 }
dad9b335 2980 return 0;
1da177e4
LT
2981}
2982
4417da66
PM
2983/**
2984 * dev_set_promiscuity - update promiscuity count on a device
2985 * @dev: device
2986 * @inc: modifier
2987 *
2988 * Add or remove promiscuity from a device. While the count in the device
2989 * remains above zero the interface remains promiscuous. Once it hits zero
2990 * the device reverts back to normal filtering operation. A negative inc
2991 * value is used to drop promiscuity on the device.
dad9b335 2992 * Return 0 if successful or a negative errno code on error.
4417da66 2993 */
dad9b335 2994int dev_set_promiscuity(struct net_device *dev, int inc)
4417da66
PM
2995{
2996 unsigned short old_flags = dev->flags;
dad9b335 2997 int err;
4417da66 2998
dad9b335 2999 err = __dev_set_promiscuity(dev, inc);
4b5a698e 3000 if (err < 0)
dad9b335 3001 return err;
4417da66
PM
3002 if (dev->flags != old_flags)
3003 dev_set_rx_mode(dev);
dad9b335 3004 return err;
4417da66
PM
3005}
3006
1da177e4
LT
3007/**
3008 * dev_set_allmulti - update allmulti count on a device
3009 * @dev: device
3010 * @inc: modifier
3011 *
3012 * Add or remove reception of all multicast frames to a device. While the
3013 * count in the device remains above zero the interface remains listening
3014 * to all interfaces. Once it hits zero the device reverts back to normal
3015 * filtering operation. A negative @inc value is used to drop the counter
3016 * when releasing a resource needing all multicasts.
dad9b335 3017 * Return 0 if successful or a negative errno code on error.
1da177e4
LT
3018 */
3019
dad9b335 3020int dev_set_allmulti(struct net_device *dev, int inc)
1da177e4
LT
3021{
3022 unsigned short old_flags = dev->flags;
3023
24023451
PM
3024 ASSERT_RTNL();
3025
1da177e4 3026 dev->flags |= IFF_ALLMULTI;
dad9b335
WC
3027 dev->allmulti += inc;
3028 if (dev->allmulti == 0) {
3029 /*
3030 * Avoid overflow.
3031 * If inc causes overflow, untouch allmulti and return error.
3032 */
3033 if (inc < 0)
3034 dev->flags &= ~IFF_ALLMULTI;
3035 else {
3036 dev->allmulti -= inc;
3037 printk(KERN_WARNING "%s: allmulti touches roof, "
3038 "set allmulti failed, allmulti feature of "
3039 "device might be broken.\n", dev->name);
3040 return -EOVERFLOW;
3041 }
3042 }
24023451 3043 if (dev->flags ^ old_flags) {
b6c40d68 3044 dev_change_rx_flags(dev, IFF_ALLMULTI);
4417da66 3045 dev_set_rx_mode(dev);
24023451 3046 }
dad9b335 3047 return 0;
4417da66
PM
3048}
3049
3050/*
3051 * Upload unicast and multicast address lists to device and
3052 * configure RX filtering. When the device doesn't support unicast
53ccaae1 3053 * filtering it is put in promiscuous mode while unicast addresses
4417da66
PM
3054 * are present.
3055 */
3056void __dev_set_rx_mode(struct net_device *dev)
3057{
d314774c
SH
3058 const struct net_device_ops *ops = dev->netdev_ops;
3059
4417da66
PM
3060 /* dev_open will call this function so the list will stay sane. */
3061 if (!(dev->flags&IFF_UP))
3062 return;
3063
3064 if (!netif_device_present(dev))
40b77c94 3065 return;
4417da66 3066
d314774c
SH
3067 if (ops->ndo_set_rx_mode)
3068 ops->ndo_set_rx_mode(dev);
4417da66
PM
3069 else {
3070 /* Unicast addresses changes may only happen under the rtnl,
3071 * therefore calling __dev_set_promiscuity here is safe.
3072 */
3073 if (dev->uc_count > 0 && !dev->uc_promisc) {
3074 __dev_set_promiscuity(dev, 1);
3075 dev->uc_promisc = 1;
3076 } else if (dev->uc_count == 0 && dev->uc_promisc) {
3077 __dev_set_promiscuity(dev, -1);
3078 dev->uc_promisc = 0;
3079 }
3080
d314774c
SH
3081 if (ops->ndo_set_multicast_list)
3082 ops->ndo_set_multicast_list(dev);
4417da66
PM
3083 }
3084}
3085
3086void dev_set_rx_mode(struct net_device *dev)
3087{
b9e40857 3088 netif_addr_lock_bh(dev);
4417da66 3089 __dev_set_rx_mode(dev);
b9e40857 3090 netif_addr_unlock_bh(dev);
1da177e4
LT
3091}
3092
61cbc2fc
PM
3093int __dev_addr_delete(struct dev_addr_list **list, int *count,
3094 void *addr, int alen, int glbl)
bf742482
PM
3095{
3096 struct dev_addr_list *da;
3097
3098 for (; (da = *list) != NULL; list = &da->next) {
3099 if (memcmp(da->da_addr, addr, da->da_addrlen) == 0 &&
3100 alen == da->da_addrlen) {
3101 if (glbl) {
3102 int old_glbl = da->da_gusers;
3103 da->da_gusers = 0;
3104 if (old_glbl == 0)
3105 break;
3106 }
3107 if (--da->da_users)
3108 return 0;
3109
3110 *list = da->next;
3111 kfree(da);
61cbc2fc 3112 (*count)--;
bf742482
PM
3113 return 0;
3114 }
3115 }
3116 return -ENOENT;
3117}
3118
61cbc2fc
PM
3119int __dev_addr_add(struct dev_addr_list **list, int *count,
3120 void *addr, int alen, int glbl)
bf742482
PM
3121{
3122 struct dev_addr_list *da;
3123
3124 for (da = *list; da != NULL; da = da->next) {
3125 if (memcmp(da->da_addr, addr, da->da_addrlen) == 0 &&
3126 da->da_addrlen == alen) {
3127 if (glbl) {
3128 int old_glbl = da->da_gusers;
3129 da->da_gusers = 1;
3130 if (old_glbl)
3131 return 0;
3132 }
3133 da->da_users++;
3134 return 0;
3135 }
3136 }
3137
12aa343a 3138 da = kzalloc(sizeof(*da), GFP_ATOMIC);
bf742482
PM
3139 if (da == NULL)
3140 return -ENOMEM;
3141 memcpy(da->da_addr, addr, alen);
3142 da->da_addrlen = alen;
3143 da->da_users = 1;
3144 da->da_gusers = glbl ? 1 : 0;
3145 da->next = *list;
3146 *list = da;
61cbc2fc 3147 (*count)++;
bf742482
PM
3148 return 0;
3149}
3150
4417da66
PM
3151/**
3152 * dev_unicast_delete - Release secondary unicast address.
3153 * @dev: device
0ed72ec4
RD
3154 * @addr: address to delete
3155 * @alen: length of @addr
4417da66
PM
3156 *
3157 * Release reference to a secondary unicast address and remove it
0ed72ec4 3158 * from the device if the reference count drops to zero.
4417da66
PM
3159 *
3160 * The caller must hold the rtnl_mutex.
3161 */
3162int dev_unicast_delete(struct net_device *dev, void *addr, int alen)
3163{
3164 int err;
3165
3166 ASSERT_RTNL();
3167
b9e40857 3168 netif_addr_lock_bh(dev);
61cbc2fc
PM
3169 err = __dev_addr_delete(&dev->uc_list, &dev->uc_count, addr, alen, 0);
3170 if (!err)
4417da66 3171 __dev_set_rx_mode(dev);
b9e40857 3172 netif_addr_unlock_bh(dev);
4417da66
PM
3173 return err;
3174}
3175EXPORT_SYMBOL(dev_unicast_delete);
3176
3177/**
3178 * dev_unicast_add - add a secondary unicast address
3179 * @dev: device
5dbaec5d 3180 * @addr: address to add
0ed72ec4 3181 * @alen: length of @addr
4417da66
PM
3182 *
3183 * Add a secondary unicast address to the device or increase
3184 * the reference count if it already exists.
3185 *
3186 * The caller must hold the rtnl_mutex.
3187 */
3188int dev_unicast_add(struct net_device *dev, void *addr, int alen)
3189{
3190 int err;
3191
3192 ASSERT_RTNL();
3193
b9e40857 3194 netif_addr_lock_bh(dev);
61cbc2fc
PM
3195 err = __dev_addr_add(&dev->uc_list, &dev->uc_count, addr, alen, 0);
3196 if (!err)
4417da66 3197 __dev_set_rx_mode(dev);
b9e40857 3198 netif_addr_unlock_bh(dev);
4417da66
PM
3199 return err;
3200}
3201EXPORT_SYMBOL(dev_unicast_add);
3202
e83a2ea8
CL
3203int __dev_addr_sync(struct dev_addr_list **to, int *to_count,
3204 struct dev_addr_list **from, int *from_count)
3205{
3206 struct dev_addr_list *da, *next;
3207 int err = 0;
3208
3209 da = *from;
3210 while (da != NULL) {
3211 next = da->next;
3212 if (!da->da_synced) {
3213 err = __dev_addr_add(to, to_count,
3214 da->da_addr, da->da_addrlen, 0);
3215 if (err < 0)
3216 break;
3217 da->da_synced = 1;
3218 da->da_users++;
3219 } else if (da->da_users == 1) {
3220 __dev_addr_delete(to, to_count,
3221 da->da_addr, da->da_addrlen, 0);
3222 __dev_addr_delete(from, from_count,
3223 da->da_addr, da->da_addrlen, 0);
3224 }
3225 da = next;
3226 }
3227 return err;
3228}
3229
3230void __dev_addr_unsync(struct dev_addr_list **to, int *to_count,
3231 struct dev_addr_list **from, int *from_count)
3232{
3233 struct dev_addr_list *da, *next;
3234
3235 da = *from;
3236 while (da != NULL) {
3237 next = da->next;
3238 if (da->da_synced) {
3239 __dev_addr_delete(to, to_count,
3240 da->da_addr, da->da_addrlen, 0);
3241 da->da_synced = 0;
3242 __dev_addr_delete(from, from_count,
3243 da->da_addr, da->da_addrlen, 0);
3244 }
3245 da = next;
3246 }
3247}
3248
3249/**
3250 * dev_unicast_sync - Synchronize device's unicast list to another device
3251 * @to: destination device
3252 * @from: source device
3253 *
3254 * Add newly added addresses to the destination device and release
3255 * addresses that have no users left. The source device must be
3256 * locked by netif_tx_lock_bh.
3257 *
3258 * This function is intended to be called from the dev->set_rx_mode
3259 * function of layered software devices.
3260 */
3261int dev_unicast_sync(struct net_device *to, struct net_device *from)
3262{
3263 int err = 0;
3264
b9e40857 3265 netif_addr_lock_bh(to);
e83a2ea8
CL
3266 err = __dev_addr_sync(&to->uc_list, &to->uc_count,
3267 &from->uc_list, &from->uc_count);
3268 if (!err)
3269 __dev_set_rx_mode(to);
b9e40857 3270 netif_addr_unlock_bh(to);
e83a2ea8
CL
3271 return err;
3272}
3273EXPORT_SYMBOL(dev_unicast_sync);
3274
3275/**
bc2cda1e 3276 * dev_unicast_unsync - Remove synchronized addresses from the destination device
e83a2ea8
CL
3277 * @to: destination device
3278 * @from: source device
3279 *
3280 * Remove all addresses that were added to the destination device by
3281 * dev_unicast_sync(). This function is intended to be called from the
3282 * dev->stop function of layered software devices.
3283 */
3284void dev_unicast_unsync(struct net_device *to, struct net_device *from)
3285{
b9e40857 3286 netif_addr_lock_bh(from);
e308a5d8 3287 netif_addr_lock(to);
e83a2ea8
CL
3288
3289 __dev_addr_unsync(&to->uc_list, &to->uc_count,
3290 &from->uc_list, &from->uc_count);
3291 __dev_set_rx_mode(to);
3292
e308a5d8 3293 netif_addr_unlock(to);
b9e40857 3294 netif_addr_unlock_bh(from);
e83a2ea8
CL
3295}
3296EXPORT_SYMBOL(dev_unicast_unsync);
3297
12972621
DC
3298static void __dev_addr_discard(struct dev_addr_list **list)
3299{
3300 struct dev_addr_list *tmp;
3301
3302 while (*list != NULL) {
3303 tmp = *list;
3304 *list = tmp->next;
3305 if (tmp->da_users > tmp->da_gusers)
3306 printk("__dev_addr_discard: address leakage! "
3307 "da_users=%d\n", tmp->da_users);
3308 kfree(tmp);
3309 }
3310}
3311
26cc2522 3312static void dev_addr_discard(struct net_device *dev)
4417da66 3313{
b9e40857 3314 netif_addr_lock_bh(dev);
26cc2522 3315
4417da66
PM
3316 __dev_addr_discard(&dev->uc_list);
3317 dev->uc_count = 0;
4417da66 3318
456ad75c
DC
3319 __dev_addr_discard(&dev->mc_list);
3320 dev->mc_count = 0;
26cc2522 3321
b9e40857 3322 netif_addr_unlock_bh(dev);
456ad75c
DC
3323}
3324
f0db275a
SH
3325/**
3326 * dev_get_flags - get flags reported to userspace
3327 * @dev: device
3328 *
3329 * Get the combination of flag bits exported through APIs to userspace.
3330 */
1da177e4
LT
3331unsigned dev_get_flags(const struct net_device *dev)
3332{
3333 unsigned flags;
3334
3335 flags = (dev->flags & ~(IFF_PROMISC |
3336 IFF_ALLMULTI |
b00055aa
SR
3337 IFF_RUNNING |
3338 IFF_LOWER_UP |
3339 IFF_DORMANT)) |
1da177e4
LT
3340 (dev->gflags & (IFF_PROMISC |
3341 IFF_ALLMULTI));
3342
b00055aa
SR
3343 if (netif_running(dev)) {
3344 if (netif_oper_up(dev))
3345 flags |= IFF_RUNNING;
3346 if (netif_carrier_ok(dev))
3347 flags |= IFF_LOWER_UP;
3348 if (netif_dormant(dev))
3349 flags |= IFF_DORMANT;
3350 }
1da177e4
LT
3351
3352 return flags;
3353}
3354
f0db275a
SH
3355/**
3356 * dev_change_flags - change device settings
3357 * @dev: device
3358 * @flags: device state flags
3359 *
3360 * Change settings on device based state flags. The flags are
3361 * in the userspace exported format.
3362 */
1da177e4
LT
3363int dev_change_flags(struct net_device *dev, unsigned flags)
3364{
7c355f53 3365 int ret, changes;
1da177e4
LT
3366 int old_flags = dev->flags;
3367
24023451
PM
3368 ASSERT_RTNL();
3369
1da177e4
LT
3370 /*
3371 * Set the flags on our device.
3372 */
3373
3374 dev->flags = (flags & (IFF_DEBUG | IFF_NOTRAILERS | IFF_NOARP |
3375 IFF_DYNAMIC | IFF_MULTICAST | IFF_PORTSEL |
3376 IFF_AUTOMEDIA)) |
3377 (dev->flags & (IFF_UP | IFF_VOLATILE | IFF_PROMISC |
3378 IFF_ALLMULTI));
3379
3380 /*
3381 * Load in the correct multicast list now the flags have changed.
3382 */
3383
b6c40d68
PM
3384 if ((old_flags ^ flags) & IFF_MULTICAST)
3385 dev_change_rx_flags(dev, IFF_MULTICAST);
24023451 3386
4417da66 3387 dev_set_rx_mode(dev);
1da177e4
LT
3388
3389 /*
3390 * Have we downed the interface. We handle IFF_UP ourselves
3391 * according to user attempts to set it, rather than blindly
3392 * setting it.
3393 */
3394
3395 ret = 0;
3396 if ((old_flags ^ flags) & IFF_UP) { /* Bit is different ? */
3397 ret = ((old_flags & IFF_UP) ? dev_close : dev_open)(dev);
3398
3399 if (!ret)
4417da66 3400 dev_set_rx_mode(dev);
1da177e4
LT
3401 }
3402
3403 if (dev->flags & IFF_UP &&
3404 ((old_flags ^ dev->flags) &~ (IFF_UP | IFF_PROMISC | IFF_ALLMULTI |
3405 IFF_VOLATILE)))
056925ab 3406 call_netdevice_notifiers(NETDEV_CHANGE, dev);
1da177e4
LT
3407
3408 if ((flags ^ dev->gflags) & IFF_PROMISC) {
3409 int inc = (flags & IFF_PROMISC) ? +1 : -1;
3410 dev->gflags ^= IFF_PROMISC;
3411 dev_set_promiscuity(dev, inc);
3412 }
3413
3414 /* NOTE: order of synchronization of IFF_PROMISC and IFF_ALLMULTI
3415 is important. Some (broken) drivers set IFF_PROMISC, when
3416 IFF_ALLMULTI is requested not asking us and not reporting.
3417 */
3418 if ((flags ^ dev->gflags) & IFF_ALLMULTI) {
3419 int inc = (flags & IFF_ALLMULTI) ? +1 : -1;
3420 dev->gflags ^= IFF_ALLMULTI;
3421 dev_set_allmulti(dev, inc);
3422 }
3423
7c355f53
TG
3424 /* Exclude state transition flags, already notified */
3425 changes = (old_flags ^ dev->flags) & ~(IFF_UP | IFF_RUNNING);
3426 if (changes)
3427 rtmsg_ifinfo(RTM_NEWLINK, dev, changes);
1da177e4
LT
3428
3429 return ret;
3430}
3431
f0db275a
SH
3432/**
3433 * dev_set_mtu - Change maximum transfer unit
3434 * @dev: device
3435 * @new_mtu: new transfer unit
3436 *
3437 * Change the maximum transfer size of the network device.
3438 */
1da177e4
LT
3439int dev_set_mtu(struct net_device *dev, int new_mtu)
3440{
d314774c 3441 const struct net_device_ops *ops = dev->netdev_ops;
1da177e4
LT
3442 int err;
3443
3444 if (new_mtu == dev->mtu)
3445 return 0;
3446
3447 /* MTU must be positive. */
3448 if (new_mtu < 0)
3449 return -EINVAL;
3450
3451 if (!netif_device_present(dev))
3452 return -ENODEV;
3453
3454 err = 0;
d314774c
SH
3455 if (ops->ndo_change_mtu)
3456 err = ops->ndo_change_mtu(dev, new_mtu);
1da177e4
LT
3457 else
3458 dev->mtu = new_mtu;
d314774c 3459
1da177e4 3460 if (!err && dev->flags & IFF_UP)
056925ab 3461 call_netdevice_notifiers(NETDEV_CHANGEMTU, dev);
1da177e4
LT
3462 return err;
3463}
3464
f0db275a
SH
3465/**
3466 * dev_set_mac_address - Change Media Access Control Address
3467 * @dev: device
3468 * @sa: new address
3469 *
3470 * Change the hardware (MAC) address of the device
3471 */
1da177e4
LT
3472int dev_set_mac_address(struct net_device *dev, struct sockaddr *sa)
3473{
d314774c 3474 const struct net_device_ops *ops = dev->netdev_ops;
1da177e4
LT
3475 int err;
3476
d314774c 3477 if (!ops->ndo_set_mac_address)
1da177e4
LT
3478 return -EOPNOTSUPP;
3479 if (sa->sa_family != dev->type)
3480 return -EINVAL;
3481 if (!netif_device_present(dev))
3482 return -ENODEV;
d314774c 3483 err = ops->ndo_set_mac_address(dev, sa);
1da177e4 3484 if (!err)
056925ab 3485 call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
1da177e4
LT
3486 return err;
3487}
3488
3489/*
14e3e079 3490 * Perform the SIOCxIFxxx calls, inside read_lock(dev_base_lock)
1da177e4 3491 */
14e3e079 3492static int dev_ifsioc_locked(struct net *net, struct ifreq *ifr, unsigned int cmd)
1da177e4
LT
3493{
3494 int err;
881d966b 3495 struct net_device *dev = __dev_get_by_name(net, ifr->ifr_name);
1da177e4
LT
3496
3497 if (!dev)
3498 return -ENODEV;
3499
3500 switch (cmd) {
3501 case SIOCGIFFLAGS: /* Get interface flags */
3502 ifr->ifr_flags = dev_get_flags(dev);
3503 return 0;
3504
1da177e4
LT
3505 case SIOCGIFMETRIC: /* Get the metric on the interface
3506 (currently unused) */
3507 ifr->ifr_metric = 0;
3508 return 0;
3509
1da177e4
LT
3510 case SIOCGIFMTU: /* Get the MTU of a device */
3511 ifr->ifr_mtu = dev->mtu;
3512 return 0;
3513
1da177e4
LT
3514 case SIOCGIFHWADDR:
3515 if (!dev->addr_len)
3516 memset(ifr->ifr_hwaddr.sa_data, 0, sizeof ifr->ifr_hwaddr.sa_data);
3517 else
3518 memcpy(ifr->ifr_hwaddr.sa_data, dev->dev_addr,
3519 min(sizeof ifr->ifr_hwaddr.sa_data, (size_t) dev->addr_len));
3520 ifr->ifr_hwaddr.sa_family = dev->type;
3521 return 0;
3522
14e3e079
JG
3523 case SIOCGIFSLAVE:
3524 err = -EINVAL;
3525 break;
3526
3527 case SIOCGIFMAP:
3528 ifr->ifr_map.mem_start = dev->mem_start;
3529 ifr->ifr_map.mem_end = dev->mem_end;
3530 ifr->ifr_map.base_addr = dev->base_addr;
3531 ifr->ifr_map.irq = dev->irq;
3532 ifr->ifr_map.dma = dev->dma;
3533 ifr->ifr_map.port = dev->if_port;
3534 return 0;
3535
3536 case SIOCGIFINDEX:
3537 ifr->ifr_ifindex = dev->ifindex;
3538 return 0;
3539
3540 case SIOCGIFTXQLEN:
3541 ifr->ifr_qlen = dev->tx_queue_len;
3542 return 0;
3543
3544 default:
3545 /* dev_ioctl() should ensure this case
3546 * is never reached
3547 */
3548 WARN_ON(1);
3549 err = -EINVAL;
3550 break;
3551
3552 }
3553 return err;
3554}
3555
3556/*
3557 * Perform the SIOCxIFxxx calls, inside rtnl_lock()
3558 */
3559static int dev_ifsioc(struct net *net, struct ifreq *ifr, unsigned int cmd)
3560{
3561 int err;
3562 struct net_device *dev = __dev_get_by_name(net, ifr->ifr_name);
d314774c 3563 const struct net_device_ops *ops = dev->netdev_ops;
14e3e079
JG
3564
3565 if (!dev)
3566 return -ENODEV;
3567
3568 switch (cmd) {
3569 case SIOCSIFFLAGS: /* Set interface flags */
3570 return dev_change_flags(dev, ifr->ifr_flags);
3571
3572 case SIOCSIFMETRIC: /* Set the metric on the interface
3573 (currently unused) */
3574 return -EOPNOTSUPP;
3575
3576 case SIOCSIFMTU: /* Set the MTU of a device */
3577 return dev_set_mtu(dev, ifr->ifr_mtu);
3578
1da177e4
LT
3579 case SIOCSIFHWADDR:
3580 return dev_set_mac_address(dev, &ifr->ifr_hwaddr);
3581
3582 case SIOCSIFHWBROADCAST:
3583 if (ifr->ifr_hwaddr.sa_family != dev->type)
3584 return -EINVAL;
3585 memcpy(dev->broadcast, ifr->ifr_hwaddr.sa_data,
3586 min(sizeof ifr->ifr_hwaddr.sa_data, (size_t) dev->addr_len));
056925ab 3587 call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
1da177e4
LT
3588 return 0;
3589
1da177e4 3590 case SIOCSIFMAP:
d314774c 3591 if (ops->ndo_set_config) {
1da177e4
LT
3592 if (!netif_device_present(dev))
3593 return -ENODEV;
d314774c 3594 return ops->ndo_set_config(dev, &ifr->ifr_map);
1da177e4
LT
3595 }
3596 return -EOPNOTSUPP;
3597
3598 case SIOCADDMULTI:
d314774c 3599 if ((!ops->ndo_set_multicast_list && !ops->ndo_set_rx_mode) ||
1da177e4
LT
3600 ifr->ifr_hwaddr.sa_family != AF_UNSPEC)
3601 return -EINVAL;
3602 if (!netif_device_present(dev))
3603 return -ENODEV;
3604 return dev_mc_add(dev, ifr->ifr_hwaddr.sa_data,
3605 dev->addr_len, 1);
3606
3607 case SIOCDELMULTI:
d314774c 3608 if ((!ops->ndo_set_multicast_list && !ops->ndo_set_rx_mode) ||
1da177e4
LT
3609 ifr->ifr_hwaddr.sa_family != AF_UNSPEC)
3610 return -EINVAL;
3611 if (!netif_device_present(dev))
3612 return -ENODEV;
3613 return dev_mc_delete(dev, ifr->ifr_hwaddr.sa_data,
3614 dev->addr_len, 1);
3615
1da177e4
LT
3616 case SIOCSIFTXQLEN:
3617 if (ifr->ifr_qlen < 0)
3618 return -EINVAL;
3619 dev->tx_queue_len = ifr->ifr_qlen;
3620 return 0;
3621
3622 case SIOCSIFNAME:
3623 ifr->ifr_newname[IFNAMSIZ-1] = '\0';
3624 return dev_change_name(dev, ifr->ifr_newname);
3625
3626 /*
3627 * Unknown or private ioctl
3628 */
3629
3630 default:
3631 if ((cmd >= SIOCDEVPRIVATE &&
3632 cmd <= SIOCDEVPRIVATE + 15) ||
3633 cmd == SIOCBONDENSLAVE ||
3634 cmd == SIOCBONDRELEASE ||
3635 cmd == SIOCBONDSETHWADDR ||
3636 cmd == SIOCBONDSLAVEINFOQUERY ||
3637 cmd == SIOCBONDINFOQUERY ||
3638 cmd == SIOCBONDCHANGEACTIVE ||
3639 cmd == SIOCGMIIPHY ||
3640 cmd == SIOCGMIIREG ||
3641 cmd == SIOCSMIIREG ||
3642 cmd == SIOCBRADDIF ||
3643 cmd == SIOCBRDELIF ||
3644 cmd == SIOCWANDEV) {
3645 err = -EOPNOTSUPP;
d314774c 3646 if (ops->ndo_do_ioctl) {
1da177e4 3647 if (netif_device_present(dev))
d314774c 3648 err = ops->ndo_do_ioctl(dev, ifr, cmd);
1da177e4
LT
3649 else
3650 err = -ENODEV;
3651 }
3652 } else
3653 err = -EINVAL;
3654
3655 }
3656 return err;
3657}
3658
3659/*
3660 * This function handles all "interface"-type I/O control requests. The actual
3661 * 'doing' part of this is dev_ifsioc above.
3662 */
3663
3664/**
3665 * dev_ioctl - network device ioctl
c4ea43c5 3666 * @net: the applicable net namespace
1da177e4
LT
3667 * @cmd: command to issue
3668 * @arg: pointer to a struct ifreq in user space
3669 *
3670 * Issue ioctl functions to devices. This is normally called by the
3671 * user space syscall interfaces but can sometimes be useful for
3672 * other purposes. The return value is the return from the syscall if
3673 * positive or a negative errno code on error.
3674 */
3675
881d966b 3676int dev_ioctl(struct net *net, unsigned int cmd, void __user *arg)
1da177e4
LT
3677{
3678 struct ifreq ifr;
3679 int ret;
3680 char *colon;
3681
3682 /* One special case: SIOCGIFCONF takes ifconf argument
3683 and requires shared lock, because it sleeps writing
3684 to user space.
3685 */
3686
3687 if (cmd == SIOCGIFCONF) {
6756ae4b 3688 rtnl_lock();
881d966b 3689 ret = dev_ifconf(net, (char __user *) arg);
6756ae4b 3690 rtnl_unlock();
1da177e4
LT
3691 return ret;
3692 }
3693 if (cmd == SIOCGIFNAME)
881d966b 3694 return dev_ifname(net, (struct ifreq __user *)arg);
1da177e4
LT
3695
3696 if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
3697 return -EFAULT;
3698
3699 ifr.ifr_name[IFNAMSIZ-1] = 0;
3700
3701 colon = strchr(ifr.ifr_name, ':');
3702 if (colon)
3703 *colon = 0;
3704
3705 /*
3706 * See which interface the caller is talking about.
3707 */
3708
3709 switch (cmd) {
3710 /*
3711 * These ioctl calls:
3712 * - can be done by all.
3713 * - atomic and do not require locking.
3714 * - return a value
3715 */
3716 case SIOCGIFFLAGS:
3717 case SIOCGIFMETRIC:
3718 case SIOCGIFMTU:
3719 case SIOCGIFHWADDR:
3720 case SIOCGIFSLAVE:
3721 case SIOCGIFMAP:
3722 case SIOCGIFINDEX:
3723 case SIOCGIFTXQLEN:
881d966b 3724 dev_load(net, ifr.ifr_name);
1da177e4 3725 read_lock(&dev_base_lock);
14e3e079 3726 ret = dev_ifsioc_locked(net, &ifr, cmd);
1da177e4
LT
3727 read_unlock(&dev_base_lock);
3728 if (!ret) {
3729 if (colon)
3730 *colon = ':';
3731 if (copy_to_user(arg, &ifr,
3732 sizeof(struct ifreq)))
3733 ret = -EFAULT;
3734 }
3735 return ret;
3736
3737 case SIOCETHTOOL:
881d966b 3738 dev_load(net, ifr.ifr_name);
1da177e4 3739 rtnl_lock();
881d966b 3740 ret = dev_ethtool(net, &ifr);
1da177e4
LT
3741 rtnl_unlock();
3742 if (!ret) {
3743 if (colon)
3744 *colon = ':';
3745 if (copy_to_user(arg, &ifr,
3746 sizeof(struct ifreq)))
3747 ret = -EFAULT;
3748 }
3749 return ret;
3750
3751 /*
3752 * These ioctl calls:
3753 * - require superuser power.
3754 * - require strict serialization.
3755 * - return a value
3756 */
3757 case SIOCGMIIPHY:
3758 case SIOCGMIIREG:
3759 case SIOCSIFNAME:
3760 if (!capable(CAP_NET_ADMIN))
3761 return -EPERM;
881d966b 3762 dev_load(net, ifr.ifr_name);
1da177e4 3763 rtnl_lock();
881d966b 3764 ret = dev_ifsioc(net, &ifr, cmd);
1da177e4
LT
3765 rtnl_unlock();
3766 if (!ret) {
3767 if (colon)
3768 *colon = ':';
3769 if (copy_to_user(arg, &ifr,
3770 sizeof(struct ifreq)))
3771 ret = -EFAULT;
3772 }
3773 return ret;
3774
3775 /*
3776 * These ioctl calls:
3777 * - require superuser power.
3778 * - require strict serialization.
3779 * - do not return a value
3780 */
3781 case SIOCSIFFLAGS:
3782 case SIOCSIFMETRIC:
3783 case SIOCSIFMTU:
3784 case SIOCSIFMAP:
3785 case SIOCSIFHWADDR:
3786 case SIOCSIFSLAVE:
3787 case SIOCADDMULTI:
3788 case SIOCDELMULTI:
3789 case SIOCSIFHWBROADCAST:
3790 case SIOCSIFTXQLEN:
3791 case SIOCSMIIREG:
3792 case SIOCBONDENSLAVE:
3793 case SIOCBONDRELEASE:
3794 case SIOCBONDSETHWADDR:
1da177e4
LT
3795 case SIOCBONDCHANGEACTIVE:
3796 case SIOCBRADDIF:
3797 case SIOCBRDELIF:
3798 if (!capable(CAP_NET_ADMIN))
3799 return -EPERM;
cabcac0b
TG
3800 /* fall through */
3801 case SIOCBONDSLAVEINFOQUERY:
3802 case SIOCBONDINFOQUERY:
881d966b 3803 dev_load(net, ifr.ifr_name);
1da177e4 3804 rtnl_lock();
881d966b 3805 ret = dev_ifsioc(net, &ifr, cmd);
1da177e4
LT
3806 rtnl_unlock();
3807 return ret;
3808
3809 case SIOCGIFMEM:
3810 /* Get the per device memory space. We can add this but
3811 * currently do not support it */
3812 case SIOCSIFMEM:
3813 /* Set the per device memory buffer space.
3814 * Not applicable in our case */
3815 case SIOCSIFLINK:
3816 return -EINVAL;
3817
3818 /*
3819 * Unknown or private ioctl.
3820 */
3821 default:
3822 if (cmd == SIOCWANDEV ||
3823 (cmd >= SIOCDEVPRIVATE &&
3824 cmd <= SIOCDEVPRIVATE + 15)) {
881d966b 3825 dev_load(net, ifr.ifr_name);
1da177e4 3826 rtnl_lock();
881d966b 3827 ret = dev_ifsioc(net, &ifr, cmd);
1da177e4
LT
3828 rtnl_unlock();
3829 if (!ret && copy_to_user(arg, &ifr,
3830 sizeof(struct ifreq)))
3831 ret = -EFAULT;
3832 return ret;
3833 }
1da177e4 3834 /* Take care of Wireless Extensions */
295f4a1f 3835 if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST)
881d966b 3836 return wext_handle_ioctl(net, &ifr, cmd, arg);
1da177e4
LT
3837 return -EINVAL;
3838 }
3839}
3840
3841
3842/**
3843 * dev_new_index - allocate an ifindex
c4ea43c5 3844 * @net: the applicable net namespace
1da177e4
LT
3845 *
3846 * Returns a suitable unique value for a new device interface
3847 * number. The caller must hold the rtnl semaphore or the
3848 * dev_base_lock to be sure it remains unique.
3849 */
881d966b 3850static int dev_new_index(struct net *net)
1da177e4
LT
3851{
3852 static int ifindex;
3853 for (;;) {
3854 if (++ifindex <= 0)
3855 ifindex = 1;
881d966b 3856 if (!__dev_get_by_index(net, ifindex))
1da177e4
LT
3857 return ifindex;
3858 }
3859}
3860
1da177e4 3861/* Delayed registration/unregisteration */
3b5b34fd 3862static LIST_HEAD(net_todo_list);
1da177e4 3863
6f05f629 3864static void net_set_todo(struct net_device *dev)
1da177e4 3865{
1da177e4 3866 list_add_tail(&dev->todo_list, &net_todo_list);
1da177e4
LT
3867}
3868
93ee31f1
DL
3869static void rollback_registered(struct net_device *dev)
3870{
3871 BUG_ON(dev_boot_phase);
3872 ASSERT_RTNL();
3873
3874 /* Some devices call without registering for initialization unwind. */
3875 if (dev->reg_state == NETREG_UNINITIALIZED) {
3876 printk(KERN_DEBUG "unregister_netdevice: device %s/%p never "
3877 "was registered\n", dev->name, dev);
3878
3879 WARN_ON(1);
3880 return;
3881 }
3882
3883 BUG_ON(dev->reg_state != NETREG_REGISTERED);
3884
3885 /* If device is running, close it first. */
3886 dev_close(dev);
3887
3888 /* And unlink it from device chain. */
3889 unlist_netdevice(dev);
3890
3891 dev->reg_state = NETREG_UNREGISTERING;
3892
3893 synchronize_net();
3894
3895 /* Shutdown queueing discipline. */
3896 dev_shutdown(dev);
3897
3898
3899 /* Notify protocols, that we are about to destroy
3900 this device. They should clean all the things.
3901 */
3902 call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
3903
3904 /*
3905 * Flush the unicast and multicast chains
3906 */
3907 dev_addr_discard(dev);
3908
d314774c
SH
3909 if (dev->netdev_ops->ndo_uninit)
3910 dev->netdev_ops->ndo_uninit(dev);
93ee31f1
DL
3911
3912 /* Notifier chain MUST detach us from master device. */
547b792c 3913 WARN_ON(dev->master);
93ee31f1
DL
3914
3915 /* Remove entries from kobject tree */
3916 netdev_unregister_kobject(dev);
3917
3918 synchronize_net();
3919
3920 dev_put(dev);
3921}
3922
e8a0464c
DM
3923static void __netdev_init_queue_locks_one(struct net_device *dev,
3924 struct netdev_queue *dev_queue,
3925 void *_unused)
c773e847
DM
3926{
3927 spin_lock_init(&dev_queue->_xmit_lock);
cf508b12 3928 netdev_set_xmit_lockdep_class(&dev_queue->_xmit_lock, dev->type);
c773e847
DM
3929 dev_queue->xmit_lock_owner = -1;
3930}
3931
3932static void netdev_init_queue_locks(struct net_device *dev)
3933{
e8a0464c
DM
3934 netdev_for_each_tx_queue(dev, __netdev_init_queue_locks_one, NULL);
3935 __netdev_init_queue_locks_one(dev, &dev->rx_queue, NULL);
c773e847
DM
3936}
3937
b63365a2
HX
3938unsigned long netdev_fix_features(unsigned long features, const char *name)
3939{
3940 /* Fix illegal SG+CSUM combinations. */
3941 if ((features & NETIF_F_SG) &&
3942 !(features & NETIF_F_ALL_CSUM)) {
3943 if (name)
3944 printk(KERN_NOTICE "%s: Dropping NETIF_F_SG since no "
3945 "checksum feature.\n", name);
3946 features &= ~NETIF_F_SG;
3947 }
3948
3949 /* TSO requires that SG is present as well. */
3950 if ((features & NETIF_F_TSO) && !(features & NETIF_F_SG)) {
3951 if (name)
3952 printk(KERN_NOTICE "%s: Dropping NETIF_F_TSO since no "
3953 "SG feature.\n", name);
3954 features &= ~NETIF_F_TSO;
3955 }
3956
3957 if (features & NETIF_F_UFO) {
3958 if (!(features & NETIF_F_GEN_CSUM)) {
3959 if (name)
3960 printk(KERN_ERR "%s: Dropping NETIF_F_UFO "
3961 "since no NETIF_F_HW_CSUM feature.\n",
3962 name);
3963 features &= ~NETIF_F_UFO;
3964 }
3965
3966 if (!(features & NETIF_F_SG)) {
3967 if (name)
3968 printk(KERN_ERR "%s: Dropping NETIF_F_UFO "
3969 "since no NETIF_F_SG feature.\n", name);
3970 features &= ~NETIF_F_UFO;
3971 }
3972 }
3973
3974 return features;
3975}
3976EXPORT_SYMBOL(netdev_fix_features);
3977
1da177e4
LT
3978/**
3979 * register_netdevice - register a network device
3980 * @dev: device to register
3981 *
3982 * Take a completed network device structure and add it to the kernel
3983 * interfaces. A %NETDEV_REGISTER message is sent to the netdev notifier
3984 * chain. 0 is returned on success. A negative errno code is returned
3985 * on a failure to set up the device, or if the name is a duplicate.
3986 *
3987 * Callers must hold the rtnl semaphore. You may want
3988 * register_netdev() instead of this.
3989 *
3990 * BUGS:
3991 * The locking appears insufficient to guarantee two parallel registers
3992 * will not get the same name.
3993 */
3994
3995int register_netdevice(struct net_device *dev)
3996{
3997 struct hlist_head *head;
3998 struct hlist_node *p;
3999 int ret;
d314774c 4000 struct net *net = dev_net(dev);
1da177e4
LT
4001
4002 BUG_ON(dev_boot_phase);
4003 ASSERT_RTNL();
4004
b17a7c17
SH
4005 might_sleep();
4006
1da177e4
LT
4007 /* When net_device's are persistent, this will be fatal. */
4008 BUG_ON(dev->reg_state != NETREG_UNINITIALIZED);
d314774c 4009 BUG_ON(!net);
1da177e4 4010
f1f28aa3 4011 spin_lock_init(&dev->addr_list_lock);
cf508b12 4012 netdev_set_addr_lockdep_class(dev);
c773e847 4013 netdev_init_queue_locks(dev);
1da177e4 4014
1da177e4
LT
4015 dev->iflink = -1;
4016
d314774c
SH
4017#ifdef CONFIG_COMPAT_NET_DEV_OPS
4018 /* Netdevice_ops API compatiability support.
4019 * This is temporary until all network devices are converted.
4020 */
4021 if (dev->netdev_ops) {
4022 const struct net_device_ops *ops = dev->netdev_ops;
4023
4024 dev->init = ops->ndo_init;
4025 dev->uninit = ops->ndo_uninit;
4026 dev->open = ops->ndo_open;
4027 dev->change_rx_flags = ops->ndo_change_rx_flags;
4028 dev->set_rx_mode = ops->ndo_set_rx_mode;
4029 dev->set_multicast_list = ops->ndo_set_multicast_list;
4030 dev->set_mac_address = ops->ndo_set_mac_address;
4031 dev->validate_addr = ops->ndo_validate_addr;
4032 dev->do_ioctl = ops->ndo_do_ioctl;
4033 dev->set_config = ops->ndo_set_config;
4034 dev->change_mtu = ops->ndo_change_mtu;
4035 dev->tx_timeout = ops->ndo_tx_timeout;
4036 dev->get_stats = ops->ndo_get_stats;
4037 dev->vlan_rx_register = ops->ndo_vlan_rx_register;
4038 dev->vlan_rx_add_vid = ops->ndo_vlan_rx_add_vid;
4039 dev->vlan_rx_kill_vid = ops->ndo_vlan_rx_kill_vid;
4040#ifdef CONFIG_NET_POLL_CONTROLLER
4041 dev->poll_controller = ops->ndo_poll_controller;
4042#endif
4043 } else {
4044 char drivername[64];
4045 pr_info("%s (%s): not using net_device_ops yet\n",
4046 dev->name, netdev_drivername(dev, drivername, 64));
4047
4048 /* This works only because net_device_ops and the
4049 compatiablity structure are the same. */
4050 dev->netdev_ops = (void *) &(dev->init);
4051 }
4052#endif
4053
1da177e4 4054 /* Init, if this function is available */
d314774c
SH
4055 if (dev->netdev_ops->ndo_init) {
4056 ret = dev->netdev_ops->ndo_init(dev);
1da177e4
LT
4057 if (ret) {
4058 if (ret > 0)
4059 ret = -EIO;
90833aa4 4060 goto out;
1da177e4
LT
4061 }
4062 }
4ec93edb 4063
1da177e4
LT
4064 if (!dev_valid_name(dev->name)) {
4065 ret = -EINVAL;
7ce1b0ed 4066 goto err_uninit;
1da177e4
LT
4067 }
4068
881d966b 4069 dev->ifindex = dev_new_index(net);
1da177e4
LT
4070 if (dev->iflink == -1)
4071 dev->iflink = dev->ifindex;
4072
4073 /* Check for existence of name */
881d966b 4074 head = dev_name_hash(net, dev->name);
1da177e4
LT
4075 hlist_for_each(p, head) {
4076 struct net_device *d
4077 = hlist_entry(p, struct net_device, name_hlist);
4078 if (!strncmp(d->name, dev->name, IFNAMSIZ)) {
4079 ret = -EEXIST;
7ce1b0ed 4080 goto err_uninit;
1da177e4 4081 }
4ec93edb 4082 }
1da177e4 4083
d212f87b
SH
4084 /* Fix illegal checksum combinations */
4085 if ((dev->features & NETIF_F_HW_CSUM) &&
4086 (dev->features & (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) {
4087 printk(KERN_NOTICE "%s: mixed HW and IP checksum settings.\n",
4088 dev->name);
4089 dev->features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM);
4090 }
4091
4092 if ((dev->features & NETIF_F_NO_CSUM) &&
4093 (dev->features & (NETIF_F_HW_CSUM|NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) {
4094 printk(KERN_NOTICE "%s: mixed no checksumming and other settings.\n",
4095 dev->name);
4096 dev->features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM|NETIF_F_HW_CSUM);
4097 }
4098
b63365a2 4099 dev->features = netdev_fix_features(dev->features, dev->name);
1da177e4 4100
e5a4a72d
LB
4101 /* Enable software GSO if SG is supported. */
4102 if (dev->features & NETIF_F_SG)
4103 dev->features |= NETIF_F_GSO;
4104
aaf8cdc3 4105 netdev_initialize_kobject(dev);
8b41d188 4106 ret = netdev_register_kobject(dev);
b17a7c17 4107 if (ret)
7ce1b0ed 4108 goto err_uninit;
b17a7c17
SH
4109 dev->reg_state = NETREG_REGISTERED;
4110
1da177e4
LT
4111 /*
4112 * Default initial state at registry is that the
4113 * device is present.
4114 */
4115
4116 set_bit(__LINK_STATE_PRESENT, &dev->state);
4117
1da177e4 4118 dev_init_scheduler(dev);
1da177e4 4119 dev_hold(dev);
ce286d32 4120 list_netdevice(dev);
1da177e4
LT
4121
4122 /* Notify protocols, that a new device appeared. */
056925ab 4123 ret = call_netdevice_notifiers(NETDEV_REGISTER, dev);
fcc5a03a 4124 ret = notifier_to_errno(ret);
93ee31f1
DL
4125 if (ret) {
4126 rollback_registered(dev);
4127 dev->reg_state = NETREG_UNREGISTERED;
4128 }
1da177e4
LT
4129
4130out:
4131 return ret;
7ce1b0ed
HX
4132
4133err_uninit:
d314774c
SH
4134 if (dev->netdev_ops->ndo_uninit)
4135 dev->netdev_ops->ndo_uninit(dev);
7ce1b0ed 4136 goto out;
1da177e4
LT
4137}
4138
4139/**
4140 * register_netdev - register a network device
4141 * @dev: device to register
4142 *
4143 * Take a completed network device structure and add it to the kernel
4144 * interfaces. A %NETDEV_REGISTER message is sent to the netdev notifier
4145 * chain. 0 is returned on success. A negative errno code is returned
4146 * on a failure to set up the device, or if the name is a duplicate.
4147 *
38b4da38 4148 * This is a wrapper around register_netdevice that takes the rtnl semaphore
1da177e4
LT
4149 * and expands the device name if you passed a format string to
4150 * alloc_netdev.
4151 */
4152int register_netdev(struct net_device *dev)
4153{
4154 int err;
4155
4156 rtnl_lock();
4157
4158 /*
4159 * If the name is a format string the caller wants us to do a
4160 * name allocation.
4161 */
4162 if (strchr(dev->name, '%')) {
4163 err = dev_alloc_name(dev, dev->name);
4164 if (err < 0)
4165 goto out;
4166 }
4ec93edb 4167
1da177e4
LT
4168 err = register_netdevice(dev);
4169out:
4170 rtnl_unlock();
4171 return err;
4172}
4173EXPORT_SYMBOL(register_netdev);
4174
4175/*
4176 * netdev_wait_allrefs - wait until all references are gone.
4177 *
4178 * This is called when unregistering network devices.
4179 *
4180 * Any protocol or device that holds a reference should register
4181 * for netdevice notification, and cleanup and put back the
4182 * reference if they receive an UNREGISTER event.
4183 * We can get stuck here if buggy protocols don't correctly
4ec93edb 4184 * call dev_put.
1da177e4
LT
4185 */
4186static void netdev_wait_allrefs(struct net_device *dev)
4187{
4188 unsigned long rebroadcast_time, warning_time;
4189
4190 rebroadcast_time = warning_time = jiffies;
4191 while (atomic_read(&dev->refcnt) != 0) {
4192 if (time_after(jiffies, rebroadcast_time + 1 * HZ)) {
6756ae4b 4193 rtnl_lock();
1da177e4
LT
4194
4195 /* Rebroadcast unregister notification */
056925ab 4196 call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
1da177e4
LT
4197
4198 if (test_bit(__LINK_STATE_LINKWATCH_PENDING,
4199 &dev->state)) {
4200 /* We must not have linkwatch events
4201 * pending on unregister. If this
4202 * happens, we simply run the queue
4203 * unscheduled, resulting in a noop
4204 * for this device.
4205 */
4206 linkwatch_run_queue();
4207 }
4208
6756ae4b 4209 __rtnl_unlock();
1da177e4
LT
4210
4211 rebroadcast_time = jiffies;
4212 }
4213
4214 msleep(250);
4215
4216 if (time_after(jiffies, warning_time + 10 * HZ)) {
4217 printk(KERN_EMERG "unregister_netdevice: "
4218 "waiting for %s to become free. Usage "
4219 "count = %d\n",
4220 dev->name, atomic_read(&dev->refcnt));
4221 warning_time = jiffies;
4222 }
4223 }
4224}
4225
4226/* The sequence is:
4227 *
4228 * rtnl_lock();
4229 * ...
4230 * register_netdevice(x1);
4231 * register_netdevice(x2);
4232 * ...
4233 * unregister_netdevice(y1);
4234 * unregister_netdevice(y2);
4235 * ...
4236 * rtnl_unlock();
4237 * free_netdev(y1);
4238 * free_netdev(y2);
4239 *
58ec3b4d 4240 * We are invoked by rtnl_unlock().
1da177e4 4241 * This allows us to deal with problems:
b17a7c17 4242 * 1) We can delete sysfs objects which invoke hotplug
1da177e4
LT
4243 * without deadlocking with linkwatch via keventd.
4244 * 2) Since we run with the RTNL semaphore not held, we can sleep
4245 * safely in order to wait for the netdev refcnt to drop to zero.
58ec3b4d
HX
4246 *
4247 * We must not return until all unregister events added during
4248 * the interval the lock was held have been completed.
1da177e4 4249 */
1da177e4
LT
4250void netdev_run_todo(void)
4251{
626ab0e6 4252 struct list_head list;
1da177e4 4253
1da177e4 4254 /* Snapshot list, allow later requests */
626ab0e6 4255 list_replace_init(&net_todo_list, &list);
58ec3b4d
HX
4256
4257 __rtnl_unlock();
626ab0e6 4258
1da177e4
LT
4259 while (!list_empty(&list)) {
4260 struct net_device *dev
4261 = list_entry(list.next, struct net_device, todo_list);
4262 list_del(&dev->todo_list);
4263
b17a7c17
SH
4264 if (unlikely(dev->reg_state != NETREG_UNREGISTERING)) {
4265 printk(KERN_ERR "network todo '%s' but state %d\n",
4266 dev->name, dev->reg_state);
4267 dump_stack();
4268 continue;
4269 }
1da177e4 4270
b17a7c17 4271 dev->reg_state = NETREG_UNREGISTERED;
1da177e4 4272
6e583ce5
SH
4273 on_each_cpu(flush_backlog, dev, 1);
4274
b17a7c17 4275 netdev_wait_allrefs(dev);
1da177e4 4276
b17a7c17
SH
4277 /* paranoia */
4278 BUG_ON(atomic_read(&dev->refcnt));
547b792c
IJ
4279 WARN_ON(dev->ip_ptr);
4280 WARN_ON(dev->ip6_ptr);
4281 WARN_ON(dev->dn_ptr);
1da177e4 4282
b17a7c17
SH
4283 if (dev->destructor)
4284 dev->destructor(dev);
9093bbb2
SH
4285
4286 /* Free network device */
4287 kobject_put(&dev->dev.kobj);
1da177e4 4288 }
1da177e4
LT
4289}
4290
eeda3fd6
SH
4291/**
4292 * dev_get_stats - get network device statistics
4293 * @dev: device to get statistics from
4294 *
4295 * Get network statistics from device. The device driver may provide
4296 * its own method by setting dev->netdev_ops->get_stats; otherwise
4297 * the internal statistics structure is used.
4298 */
4299const struct net_device_stats *dev_get_stats(struct net_device *dev)
4300 {
4301 const struct net_device_ops *ops = dev->netdev_ops;
4302
4303 if (ops->ndo_get_stats)
4304 return ops->ndo_get_stats(dev);
4305 else
4306 return &dev->stats;
c45d286e 4307}
eeda3fd6 4308EXPORT_SYMBOL(dev_get_stats);
c45d286e 4309
dc2b4847 4310static void netdev_init_one_queue(struct net_device *dev,
e8a0464c
DM
4311 struct netdev_queue *queue,
4312 void *_unused)
dc2b4847 4313{
dc2b4847
DM
4314 queue->dev = dev;
4315}
4316
bb949fbd
DM
4317static void netdev_init_queues(struct net_device *dev)
4318{
e8a0464c
DM
4319 netdev_init_one_queue(dev, &dev->rx_queue, NULL);
4320 netdev_for_each_tx_queue(dev, netdev_init_one_queue, NULL);
c3f26a26 4321 spin_lock_init(&dev->tx_global_lock);
bb949fbd
DM
4322}
4323
1da177e4 4324/**
f25f4e44 4325 * alloc_netdev_mq - allocate network device
1da177e4
LT
4326 * @sizeof_priv: size of private data to allocate space for
4327 * @name: device name format string
4328 * @setup: callback to initialize device
f25f4e44 4329 * @queue_count: the number of subqueues to allocate
1da177e4
LT
4330 *
4331 * Allocates a struct net_device with private data area for driver use
f25f4e44
PWJ
4332 * and performs basic initialization. Also allocates subquue structs
4333 * for each queue on the device at the end of the netdevice.
1da177e4 4334 */
f25f4e44
PWJ
4335struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
4336 void (*setup)(struct net_device *), unsigned int queue_count)
1da177e4 4337{
e8a0464c 4338 struct netdev_queue *tx;
1da177e4 4339 struct net_device *dev;
7943986c 4340 size_t alloc_size;
e8a0464c 4341 void *p;
1da177e4 4342
b6fe17d6
SH
4343 BUG_ON(strlen(name) >= sizeof(dev->name));
4344
fd2ea0a7 4345 alloc_size = sizeof(struct net_device);
d1643d24
AD
4346 if (sizeof_priv) {
4347 /* ensure 32-byte alignment of private area */
4348 alloc_size = (alloc_size + NETDEV_ALIGN_CONST) & ~NETDEV_ALIGN_CONST;
4349 alloc_size += sizeof_priv;
4350 }
4351 /* ensure 32-byte alignment of whole construct */
4352 alloc_size += NETDEV_ALIGN_CONST;
1da177e4 4353
31380de9 4354 p = kzalloc(alloc_size, GFP_KERNEL);
1da177e4 4355 if (!p) {
b6fe17d6 4356 printk(KERN_ERR "alloc_netdev: Unable to allocate device.\n");
1da177e4
LT
4357 return NULL;
4358 }
1da177e4 4359
7943986c 4360 tx = kcalloc(queue_count, sizeof(struct netdev_queue), GFP_KERNEL);
e8a0464c
DM
4361 if (!tx) {
4362 printk(KERN_ERR "alloc_netdev: Unable to allocate "
4363 "tx qdiscs.\n");
4364 kfree(p);
4365 return NULL;
4366 }
4367
1da177e4
LT
4368 dev = (struct net_device *)
4369 (((long)p + NETDEV_ALIGN_CONST) & ~NETDEV_ALIGN_CONST);
4370 dev->padded = (char *)dev - (char *)p;
c346dca1 4371 dev_net_set(dev, &init_net);
1da177e4 4372
e8a0464c
DM
4373 dev->_tx = tx;
4374 dev->num_tx_queues = queue_count;
fd2ea0a7 4375 dev->real_num_tx_queues = queue_count;
e8a0464c 4376
f25f4e44
PWJ
4377 if (sizeof_priv) {
4378 dev->priv = ((char *)dev +
fd2ea0a7 4379 ((sizeof(struct net_device) + NETDEV_ALIGN_CONST)
f25f4e44
PWJ
4380 & ~NETDEV_ALIGN_CONST));
4381 }
4382
82cc1a7a 4383 dev->gso_max_size = GSO_MAX_SIZE;
1da177e4 4384
bb949fbd
DM
4385 netdev_init_queues(dev);
4386
bea3348e 4387 netpoll_netdev_init(dev);
1da177e4
LT
4388 setup(dev);
4389 strcpy(dev->name, name);
4390 return dev;
4391}
f25f4e44 4392EXPORT_SYMBOL(alloc_netdev_mq);
1da177e4
LT
4393
4394/**
4395 * free_netdev - free network device
4396 * @dev: device
4397 *
4ec93edb
YH
4398 * This function does the last stage of destroying an allocated device
4399 * interface. The reference to the device object is released.
1da177e4
LT
4400 * If this is the last reference then it will be freed.
4401 */
4402void free_netdev(struct net_device *dev)
4403{
f3005d7f
DL
4404 release_net(dev_net(dev));
4405
e8a0464c
DM
4406 kfree(dev->_tx);
4407
3041a069 4408 /* Compatibility with error handling in drivers */
1da177e4
LT
4409 if (dev->reg_state == NETREG_UNINITIALIZED) {
4410 kfree((char *)dev - dev->padded);
4411 return;
4412 }
4413
4414 BUG_ON(dev->reg_state != NETREG_UNREGISTERED);
4415 dev->reg_state = NETREG_RELEASED;
4416
43cb76d9
GKH
4417 /* will free via device release */
4418 put_device(&dev->dev);
1da177e4 4419}
4ec93edb 4420
f0db275a
SH
4421/**
4422 * synchronize_net - Synchronize with packet receive processing
4423 *
4424 * Wait for packets currently being received to be done.
4425 * Does not block later packets from starting.
4426 */
4ec93edb 4427void synchronize_net(void)
1da177e4
LT
4428{
4429 might_sleep();
fbd568a3 4430 synchronize_rcu();
1da177e4
LT
4431}
4432
4433/**
4434 * unregister_netdevice - remove device from the kernel
4435 * @dev: device
4436 *
4437 * This function shuts down a device interface and removes it
d59b54b1 4438 * from the kernel tables.
1da177e4
LT
4439 *
4440 * Callers must hold the rtnl semaphore. You may want
4441 * unregister_netdev() instead of this.
4442 */
4443
22f8cde5 4444void unregister_netdevice(struct net_device *dev)
1da177e4 4445{
a6620712
HX
4446 ASSERT_RTNL();
4447
93ee31f1 4448 rollback_registered(dev);
1da177e4
LT
4449 /* Finish processing unregister after unlock */
4450 net_set_todo(dev);
1da177e4
LT
4451}
4452
4453/**
4454 * unregister_netdev - remove device from the kernel
4455 * @dev: device
4456 *
4457 * This function shuts down a device interface and removes it
d59b54b1 4458 * from the kernel tables.
1da177e4
LT
4459 *
4460 * This is just a wrapper for unregister_netdevice that takes
4461 * the rtnl semaphore. In general you want to use this and not
4462 * unregister_netdevice.
4463 */
4464void unregister_netdev(struct net_device *dev)
4465{
4466 rtnl_lock();
4467 unregister_netdevice(dev);
4468 rtnl_unlock();
4469}
4470
4471EXPORT_SYMBOL(unregister_netdev);
4472
ce286d32
EB
4473/**
4474 * dev_change_net_namespace - move device to different nethost namespace
4475 * @dev: device
4476 * @net: network namespace
4477 * @pat: If not NULL name pattern to try if the current device name
4478 * is already taken in the destination network namespace.
4479 *
4480 * This function shuts down a device interface and moves it
4481 * to a new network namespace. On success 0 is returned, on
4482 * a failure a netagive errno code is returned.
4483 *
4484 * Callers must hold the rtnl semaphore.
4485 */
4486
4487int dev_change_net_namespace(struct net_device *dev, struct net *net, const char *pat)
4488{
4489 char buf[IFNAMSIZ];
4490 const char *destname;
4491 int err;
4492
4493 ASSERT_RTNL();
4494
4495 /* Don't allow namespace local devices to be moved. */
4496 err = -EINVAL;
4497 if (dev->features & NETIF_F_NETNS_LOCAL)
4498 goto out;
4499
3891845e
EB
4500#ifdef CONFIG_SYSFS
4501 /* Don't allow real devices to be moved when sysfs
4502 * is enabled.
4503 */
4504 err = -EINVAL;
4505 if (dev->dev.parent)
4506 goto out;
4507#endif
4508
ce286d32
EB
4509 /* Ensure the device has been registrered */
4510 err = -EINVAL;
4511 if (dev->reg_state != NETREG_REGISTERED)
4512 goto out;
4513
4514 /* Get out if there is nothing todo */
4515 err = 0;
878628fb 4516 if (net_eq(dev_net(dev), net))
ce286d32
EB
4517 goto out;
4518
4519 /* Pick the destination device name, and ensure
4520 * we can use it in the destination network namespace.
4521 */
4522 err = -EEXIST;
4523 destname = dev->name;
4524 if (__dev_get_by_name(net, destname)) {
4525 /* We get here if we can't use the current device name */
4526 if (!pat)
4527 goto out;
4528 if (!dev_valid_name(pat))
4529 goto out;
4530 if (strchr(pat, '%')) {
4531 if (__dev_alloc_name(net, pat, buf) < 0)
4532 goto out;
4533 destname = buf;
4534 } else
4535 destname = pat;
4536 if (__dev_get_by_name(net, destname))
4537 goto out;
4538 }
4539
4540 /*
4541 * And now a mini version of register_netdevice unregister_netdevice.
4542 */
4543
4544 /* If device is running close it first. */
9b772652 4545 dev_close(dev);
ce286d32
EB
4546
4547 /* And unlink it from device chain */
4548 err = -ENODEV;
4549 unlist_netdevice(dev);
4550
4551 synchronize_net();
4552
4553 /* Shutdown queueing discipline. */
4554 dev_shutdown(dev);
4555
4556 /* Notify protocols, that we are about to destroy
4557 this device. They should clean all the things.
4558 */
4559 call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
4560
4561 /*
4562 * Flush the unicast and multicast chains
4563 */
4564 dev_addr_discard(dev);
4565
3891845e
EB
4566 netdev_unregister_kobject(dev);
4567
ce286d32 4568 /* Actually switch the network namespace */
c346dca1 4569 dev_net_set(dev, net);
ce286d32
EB
4570
4571 /* Assign the new device name */
4572 if (destname != dev->name)
4573 strcpy(dev->name, destname);
4574
4575 /* If there is an ifindex conflict assign a new one */
4576 if (__dev_get_by_index(net, dev->ifindex)) {
4577 int iflink = (dev->iflink == dev->ifindex);
4578 dev->ifindex = dev_new_index(net);
4579 if (iflink)
4580 dev->iflink = dev->ifindex;
4581 }
4582
8b41d188 4583 /* Fixup kobjects */
aaf8cdc3 4584 err = netdev_register_kobject(dev);
8b41d188 4585 WARN_ON(err);
ce286d32
EB
4586
4587 /* Add the device back in the hashes */
4588 list_netdevice(dev);
4589
4590 /* Notify protocols, that a new device appeared. */
4591 call_netdevice_notifiers(NETDEV_REGISTER, dev);
4592
4593 synchronize_net();
4594 err = 0;
4595out:
4596 return err;
4597}
4598
1da177e4
LT
4599static int dev_cpu_callback(struct notifier_block *nfb,
4600 unsigned long action,
4601 void *ocpu)
4602{
4603 struct sk_buff **list_skb;
37437bb2 4604 struct Qdisc **list_net;
1da177e4
LT
4605 struct sk_buff *skb;
4606 unsigned int cpu, oldcpu = (unsigned long)ocpu;
4607 struct softnet_data *sd, *oldsd;
4608
8bb78442 4609 if (action != CPU_DEAD && action != CPU_DEAD_FROZEN)
1da177e4
LT
4610 return NOTIFY_OK;
4611
4612 local_irq_disable();
4613 cpu = smp_processor_id();
4614 sd = &per_cpu(softnet_data, cpu);
4615 oldsd = &per_cpu(softnet_data, oldcpu);
4616
4617 /* Find end of our completion_queue. */
4618 list_skb = &sd->completion_queue;
4619 while (*list_skb)
4620 list_skb = &(*list_skb)->next;
4621 /* Append completion queue from offline CPU. */
4622 *list_skb = oldsd->completion_queue;
4623 oldsd->completion_queue = NULL;
4624
4625 /* Find end of our output_queue. */
4626 list_net = &sd->output_queue;
4627 while (*list_net)
4628 list_net = &(*list_net)->next_sched;
4629 /* Append output queue from offline CPU. */
4630 *list_net = oldsd->output_queue;
4631 oldsd->output_queue = NULL;
4632
4633 raise_softirq_irqoff(NET_TX_SOFTIRQ);
4634 local_irq_enable();
4635
4636 /* Process offline CPU's input_pkt_queue */
4637 while ((skb = __skb_dequeue(&oldsd->input_pkt_queue)))
4638 netif_rx(skb);
4639
4640 return NOTIFY_OK;
4641}
1da177e4 4642
db217334
CL
4643#ifdef CONFIG_NET_DMA
4644/**
0ed72ec4
RD
4645 * net_dma_rebalance - try to maintain one DMA channel per CPU
4646 * @net_dma: DMA client and associated data (lock, channels, channel_mask)
4647 *
4648 * This is called when the number of channels allocated to the net_dma client
4649 * changes. The net_dma client tries to have one DMA channel per CPU.
db217334 4650 */
d379b01e
DW
4651
4652static void net_dma_rebalance(struct net_dma *net_dma)
db217334 4653{
d379b01e 4654 unsigned int cpu, i, n, chan_idx;
db217334
CL
4655 struct dma_chan *chan;
4656
d379b01e 4657 if (cpus_empty(net_dma->channel_mask)) {
db217334 4658 for_each_online_cpu(cpu)
29bbd72d 4659 rcu_assign_pointer(per_cpu(softnet_data, cpu).net_dma, NULL);
db217334
CL
4660 return;
4661 }
4662
4663 i = 0;
4664 cpu = first_cpu(cpu_online_map);
4665
0e12f848 4666 for_each_cpu_mask_nr(chan_idx, net_dma->channel_mask) {
d379b01e
DW
4667 chan = net_dma->channels[chan_idx];
4668
4669 n = ((num_online_cpus() / cpus_weight(net_dma->channel_mask))
4670 + (i < (num_online_cpus() %
4671 cpus_weight(net_dma->channel_mask)) ? 1 : 0));
db217334
CL
4672
4673 while(n) {
29bbd72d 4674 per_cpu(softnet_data, cpu).net_dma = chan;
db217334
CL
4675 cpu = next_cpu(cpu, cpu_online_map);
4676 n--;
4677 }
4678 i++;
4679 }
db217334
CL
4680}
4681
4682/**
4683 * netdev_dma_event - event callback for the net_dma_client
4684 * @client: should always be net_dma_client
f4b8ea78 4685 * @chan: DMA channel for the event
0ed72ec4 4686 * @state: DMA state to be handled
db217334 4687 */
d379b01e
DW
4688static enum dma_state_client
4689netdev_dma_event(struct dma_client *client, struct dma_chan *chan,
4690 enum dma_state state)
4691{
4692 int i, found = 0, pos = -1;
4693 struct net_dma *net_dma =
4694 container_of(client, struct net_dma, client);
4695 enum dma_state_client ack = DMA_DUP; /* default: take no action */
4696
4697 spin_lock(&net_dma->lock);
4698 switch (state) {
4699 case DMA_RESOURCE_AVAILABLE:
0c0b0aca 4700 for (i = 0; i < nr_cpu_ids; i++)
d379b01e
DW
4701 if (net_dma->channels[i] == chan) {
4702 found = 1;
4703 break;
4704 } else if (net_dma->channels[i] == NULL && pos < 0)
4705 pos = i;
4706
4707 if (!found && pos >= 0) {
4708 ack = DMA_ACK;
4709 net_dma->channels[pos] = chan;
4710 cpu_set(pos, net_dma->channel_mask);
4711 net_dma_rebalance(net_dma);
4712 }
db217334
CL
4713 break;
4714 case DMA_RESOURCE_REMOVED:
0c0b0aca 4715 for (i = 0; i < nr_cpu_ids; i++)
d379b01e
DW
4716 if (net_dma->channels[i] == chan) {
4717 found = 1;
4718 pos = i;
4719 break;
4720 }
4721
4722 if (found) {
4723 ack = DMA_ACK;
4724 cpu_clear(pos, net_dma->channel_mask);
4725 net_dma->channels[i] = NULL;
4726 net_dma_rebalance(net_dma);
4727 }
db217334
CL
4728 break;
4729 default:
4730 break;
4731 }
d379b01e
DW
4732 spin_unlock(&net_dma->lock);
4733
4734 return ack;
db217334
CL
4735}
4736
4737/**
f0db275a 4738 * netdev_dma_register - register the networking subsystem as a DMA client
db217334
CL
4739 */
4740static int __init netdev_dma_register(void)
4741{
0c0b0aca
MT
4742 net_dma.channels = kzalloc(nr_cpu_ids * sizeof(struct net_dma),
4743 GFP_KERNEL);
4744 if (unlikely(!net_dma.channels)) {
4745 printk(KERN_NOTICE
4746 "netdev_dma: no memory for net_dma.channels\n");
4747 return -ENOMEM;
4748 }
d379b01e
DW
4749 spin_lock_init(&net_dma.lock);
4750 dma_cap_set(DMA_MEMCPY, net_dma.client.cap_mask);
4751 dma_async_client_register(&net_dma.client);
4752 dma_async_client_chan_request(&net_dma.client);
db217334
CL
4753 return 0;
4754}
4755
4756#else
4757static int __init netdev_dma_register(void) { return -ENODEV; }
4758#endif /* CONFIG_NET_DMA */
1da177e4 4759
7f353bf2 4760/**
b63365a2
HX
4761 * netdev_increment_features - increment feature set by one
4762 * @all: current feature set
4763 * @one: new feature set
4764 * @mask: mask feature set
7f353bf2
HX
4765 *
4766 * Computes a new feature set after adding a device with feature set
b63365a2
HX
4767 * @one to the master device with current feature set @all. Will not
4768 * enable anything that is off in @mask. Returns the new feature set.
7f353bf2 4769 */
b63365a2
HX
4770unsigned long netdev_increment_features(unsigned long all, unsigned long one,
4771 unsigned long mask)
4772{
4773 /* If device needs checksumming, downgrade to it. */
4774 if (all & NETIF_F_NO_CSUM && !(one & NETIF_F_NO_CSUM))
4775 all ^= NETIF_F_NO_CSUM | (one & NETIF_F_ALL_CSUM);
4776 else if (mask & NETIF_F_ALL_CSUM) {
4777 /* If one device supports v4/v6 checksumming, set for all. */
4778 if (one & (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM) &&
4779 !(all & NETIF_F_GEN_CSUM)) {
4780 all &= ~NETIF_F_ALL_CSUM;
4781 all |= one & (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM);
4782 }
e2a6b852 4783
b63365a2
HX
4784 /* If one device supports hw checksumming, set for all. */
4785 if (one & NETIF_F_GEN_CSUM && !(all & NETIF_F_GEN_CSUM)) {
4786 all &= ~NETIF_F_ALL_CSUM;
4787 all |= NETIF_F_HW_CSUM;
4788 }
4789 }
7f353bf2 4790
b63365a2 4791 one |= NETIF_F_ALL_CSUM;
7f353bf2 4792
b63365a2
HX
4793 one |= all & NETIF_F_ONE_FOR_ALL;
4794 all &= one | NETIF_F_LLTX | NETIF_F_GSO;
4795 all |= one & mask & NETIF_F_ONE_FOR_ALL;
7f353bf2
HX
4796
4797 return all;
4798}
b63365a2 4799EXPORT_SYMBOL(netdev_increment_features);
7f353bf2 4800
30d97d35
PE
4801static struct hlist_head *netdev_create_hash(void)
4802{
4803 int i;
4804 struct hlist_head *hash;
4805
4806 hash = kmalloc(sizeof(*hash) * NETDEV_HASHENTRIES, GFP_KERNEL);
4807 if (hash != NULL)
4808 for (i = 0; i < NETDEV_HASHENTRIES; i++)
4809 INIT_HLIST_HEAD(&hash[i]);
4810
4811 return hash;
4812}
4813
881d966b 4814/* Initialize per network namespace state */
4665079c 4815static int __net_init netdev_init(struct net *net)
881d966b 4816{
881d966b 4817 INIT_LIST_HEAD(&net->dev_base_head);
881d966b 4818
30d97d35
PE
4819 net->dev_name_head = netdev_create_hash();
4820 if (net->dev_name_head == NULL)
4821 goto err_name;
881d966b 4822
30d97d35
PE
4823 net->dev_index_head = netdev_create_hash();
4824 if (net->dev_index_head == NULL)
4825 goto err_idx;
881d966b
EB
4826
4827 return 0;
30d97d35
PE
4828
4829err_idx:
4830 kfree(net->dev_name_head);
4831err_name:
4832 return -ENOMEM;
881d966b
EB
4833}
4834
f0db275a
SH
4835/**
4836 * netdev_drivername - network driver for the device
4837 * @dev: network device
4838 * @buffer: buffer for resulting name
4839 * @len: size of buffer
4840 *
4841 * Determine network driver for device.
4842 */
cf04a4c7 4843char *netdev_drivername(const struct net_device *dev, char *buffer, int len)
6579e57b 4844{
cf04a4c7
SH
4845 const struct device_driver *driver;
4846 const struct device *parent;
6579e57b
AV
4847
4848 if (len <= 0 || !buffer)
4849 return buffer;
4850 buffer[0] = 0;
4851
4852 parent = dev->dev.parent;
4853
4854 if (!parent)
4855 return buffer;
4856
4857 driver = parent->driver;
4858 if (driver && driver->name)
4859 strlcpy(buffer, driver->name, len);
4860 return buffer;
4861}
4862
4665079c 4863static void __net_exit netdev_exit(struct net *net)
881d966b
EB
4864{
4865 kfree(net->dev_name_head);
4866 kfree(net->dev_index_head);
4867}
4868
022cbae6 4869static struct pernet_operations __net_initdata netdev_net_ops = {
881d966b
EB
4870 .init = netdev_init,
4871 .exit = netdev_exit,
4872};
4873
4665079c 4874static void __net_exit default_device_exit(struct net *net)
ce286d32
EB
4875{
4876 struct net_device *dev, *next;
4877 /*
4878 * Push all migratable of the network devices back to the
4879 * initial network namespace
4880 */
4881 rtnl_lock();
4882 for_each_netdev_safe(net, dev, next) {
4883 int err;
aca51397 4884 char fb_name[IFNAMSIZ];
ce286d32
EB
4885
4886 /* Ignore unmoveable devices (i.e. loopback) */
4887 if (dev->features & NETIF_F_NETNS_LOCAL)
4888 continue;
4889
d0c082ce
EB
4890 /* Delete virtual devices */
4891 if (dev->rtnl_link_ops && dev->rtnl_link_ops->dellink) {
4892 dev->rtnl_link_ops->dellink(dev);
4893 continue;
4894 }
4895
ce286d32 4896 /* Push remaing network devices to init_net */
aca51397
PE
4897 snprintf(fb_name, IFNAMSIZ, "dev%d", dev->ifindex);
4898 err = dev_change_net_namespace(dev, &init_net, fb_name);
ce286d32 4899 if (err) {
aca51397 4900 printk(KERN_EMERG "%s: failed to move %s to init_net: %d\n",
ce286d32 4901 __func__, dev->name, err);
aca51397 4902 BUG();
ce286d32
EB
4903 }
4904 }
4905 rtnl_unlock();
4906}
4907
022cbae6 4908static struct pernet_operations __net_initdata default_device_ops = {
ce286d32
EB
4909 .exit = default_device_exit,
4910};
4911
1da177e4
LT
4912/*
4913 * Initialize the DEV module. At boot time this walks the device list and
4914 * unhooks any devices that fail to initialise (normally hardware not
4915 * present) and leaves us with a valid list of present and active devices.
4916 *
4917 */
4918
4919/*
4920 * This is called single threaded during boot, so no need
4921 * to take the rtnl semaphore.
4922 */
4923static int __init net_dev_init(void)
4924{
4925 int i, rc = -ENOMEM;
4926
4927 BUG_ON(!dev_boot_phase);
4928
1da177e4
LT
4929 if (dev_proc_init())
4930 goto out;
4931
8b41d188 4932 if (netdev_kobject_init())
1da177e4
LT
4933 goto out;
4934
4935 INIT_LIST_HEAD(&ptype_all);
82d8a867 4936 for (i = 0; i < PTYPE_HASH_SIZE; i++)
1da177e4
LT
4937 INIT_LIST_HEAD(&ptype_base[i]);
4938
881d966b
EB
4939 if (register_pernet_subsys(&netdev_net_ops))
4940 goto out;
1da177e4
LT
4941
4942 /*
4943 * Initialise the packet receive queues.
4944 */
4945
6f912042 4946 for_each_possible_cpu(i) {
1da177e4
LT
4947 struct softnet_data *queue;
4948
4949 queue = &per_cpu(softnet_data, i);
4950 skb_queue_head_init(&queue->input_pkt_queue);
1da177e4
LT
4951 queue->completion_queue = NULL;
4952 INIT_LIST_HEAD(&queue->poll_list);
bea3348e
SH
4953
4954 queue->backlog.poll = process_backlog;
4955 queue->backlog.weight = weight_p;
1da177e4
LT
4956 }
4957
1da177e4
LT
4958 dev_boot_phase = 0;
4959
505d4f73
EB
4960 /* The loopback device is special if any other network devices
4961 * is present in a network namespace the loopback device must
4962 * be present. Since we now dynamically allocate and free the
4963 * loopback device ensure this invariant is maintained by
4964 * keeping the loopback device as the first device on the
4965 * list of network devices. Ensuring the loopback devices
4966 * is the first device that appears and the last network device
4967 * that disappears.
4968 */
4969 if (register_pernet_device(&loopback_net_ops))
4970 goto out;
4971
4972 if (register_pernet_device(&default_device_ops))
4973 goto out;
4974
4975 netdev_dma_register();
4976
962cf36c
CM
4977 open_softirq(NET_TX_SOFTIRQ, net_tx_action);
4978 open_softirq(NET_RX_SOFTIRQ, net_rx_action);
1da177e4
LT
4979
4980 hotcpu_notifier(dev_cpu_callback, 0);
4981 dst_init();
4982 dev_mcast_init();
4983 rc = 0;
4984out:
4985 return rc;
4986}
4987
4988subsys_initcall(net_dev_init);
4989
4990EXPORT_SYMBOL(__dev_get_by_index);
4991EXPORT_SYMBOL(__dev_get_by_name);
4992EXPORT_SYMBOL(__dev_remove_pack);
c2373ee9 4993EXPORT_SYMBOL(dev_valid_name);
1da177e4
LT
4994EXPORT_SYMBOL(dev_add_pack);
4995EXPORT_SYMBOL(dev_alloc_name);
4996EXPORT_SYMBOL(dev_close);
4997EXPORT_SYMBOL(dev_get_by_flags);
4998EXPORT_SYMBOL(dev_get_by_index);
4999EXPORT_SYMBOL(dev_get_by_name);
1da177e4
LT
5000EXPORT_SYMBOL(dev_open);
5001EXPORT_SYMBOL(dev_queue_xmit);
5002EXPORT_SYMBOL(dev_remove_pack);
5003EXPORT_SYMBOL(dev_set_allmulti);
5004EXPORT_SYMBOL(dev_set_promiscuity);
5005EXPORT_SYMBOL(dev_change_flags);
5006EXPORT_SYMBOL(dev_set_mtu);
5007EXPORT_SYMBOL(dev_set_mac_address);
5008EXPORT_SYMBOL(free_netdev);
5009EXPORT_SYMBOL(netdev_boot_setup_check);
5010EXPORT_SYMBOL(netdev_set_master);
5011EXPORT_SYMBOL(netdev_state_change);
5012EXPORT_SYMBOL(netif_receive_skb);
5013EXPORT_SYMBOL(netif_rx);
5014EXPORT_SYMBOL(register_gifconf);
5015EXPORT_SYMBOL(register_netdevice);
5016EXPORT_SYMBOL(register_netdevice_notifier);
5017EXPORT_SYMBOL(skb_checksum_help);
5018EXPORT_SYMBOL(synchronize_net);
5019EXPORT_SYMBOL(unregister_netdevice);
5020EXPORT_SYMBOL(unregister_netdevice_notifier);
5021EXPORT_SYMBOL(net_enable_timestamp);
5022EXPORT_SYMBOL(net_disable_timestamp);
5023EXPORT_SYMBOL(dev_get_flags);
5024
5025#if defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE)
5026EXPORT_SYMBOL(br_handle_frame_hook);
5027EXPORT_SYMBOL(br_fdb_get_hook);
5028EXPORT_SYMBOL(br_fdb_put_hook);
5029#endif
5030
1da177e4 5031EXPORT_SYMBOL(dev_load);
1da177e4
LT
5032
5033EXPORT_PER_CPU_SYMBOL(softnet_data);