]> bbs.cooldavid.org Git - net-next-2.6.git/blame - net/core/dev.c
Merge master.kernel.org:/pub/scm/linux/kernel/git/acme/net-2.6
[net-next-2.6.git] / net / core / dev.c
CommitLineData
1da177e4
LT
1/*
2 * NET3 Protocol independent device support routines.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
8 *
9 * Derived from the non IP parts of dev.c 1.0.19
02c30a84 10 * Authors: Ross Biro
1da177e4
LT
11 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12 * Mark Evans, <evansmp@uhura.aston.ac.uk>
13 *
14 * Additional Authors:
15 * Florian la Roche <rzsfl@rz.uni-sb.de>
16 * Alan Cox <gw4pts@gw4pts.ampr.org>
17 * David Hinds <dahinds@users.sourceforge.net>
18 * Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
19 * Adam Sulmicki <adam@cfar.umd.edu>
20 * Pekka Riikonen <priikone@poesidon.pspt.fi>
21 *
22 * Changes:
23 * D.J. Barrow : Fixed bug where dev->refcnt gets set
24 * to 2 if register_netdev gets called
25 * before net_dev_init & also removed a
26 * few lines of code in the process.
27 * Alan Cox : device private ioctl copies fields back.
28 * Alan Cox : Transmit queue code does relevant
29 * stunts to keep the queue safe.
30 * Alan Cox : Fixed double lock.
31 * Alan Cox : Fixed promisc NULL pointer trap
32 * ???????? : Support the full private ioctl range
33 * Alan Cox : Moved ioctl permission check into
34 * drivers
35 * Tim Kordas : SIOCADDMULTI/SIOCDELMULTI
36 * Alan Cox : 100 backlog just doesn't cut it when
37 * you start doing multicast video 8)
38 * Alan Cox : Rewrote net_bh and list manager.
39 * Alan Cox : Fix ETH_P_ALL echoback lengths.
40 * Alan Cox : Took out transmit every packet pass
41 * Saved a few bytes in the ioctl handler
42 * Alan Cox : Network driver sets packet type before
43 * calling netif_rx. Saves a function
44 * call a packet.
45 * Alan Cox : Hashed net_bh()
46 * Richard Kooijman: Timestamp fixes.
47 * Alan Cox : Wrong field in SIOCGIFDSTADDR
48 * Alan Cox : Device lock protection.
49 * Alan Cox : Fixed nasty side effect of device close
50 * changes.
51 * Rudi Cilibrasi : Pass the right thing to
52 * set_mac_address()
53 * Dave Miller : 32bit quantity for the device lock to
54 * make it work out on a Sparc.
55 * Bjorn Ekwall : Added KERNELD hack.
56 * Alan Cox : Cleaned up the backlog initialise.
57 * Craig Metz : SIOCGIFCONF fix if space for under
58 * 1 device.
59 * Thomas Bogendoerfer : Return ENODEV for dev_open, if there
60 * is no device open function.
61 * Andi Kleen : Fix error reporting for SIOCGIFCONF
62 * Michael Chastain : Fix signed/unsigned for SIOCGIFCONF
63 * Cyrus Durgin : Cleaned for KMOD
64 * Adam Sulmicki : Bug Fix : Network Device Unload
65 * A network device unload needs to purge
66 * the backlog queue.
67 * Paul Rusty Russell : SIOCSIFNAME
68 * Pekka Riikonen : Netdev boot-time settings code
69 * Andrew Morton : Make unregister_netdevice wait
70 * indefinitely on dev->refcnt
71 * J Hadi Salim : - Backlog queue sampling
72 * - netif_rx() feedback
73 */
74
75#include <asm/uaccess.h>
76#include <asm/system.h>
77#include <linux/bitops.h>
4fc268d2 78#include <linux/capability.h>
1da177e4
LT
79#include <linux/cpu.h>
80#include <linux/types.h>
81#include <linux/kernel.h>
82#include <linux/sched.h>
4a3e2f71 83#include <linux/mutex.h>
1da177e4
LT
84#include <linux/string.h>
85#include <linux/mm.h>
86#include <linux/socket.h>
87#include <linux/sockios.h>
88#include <linux/errno.h>
89#include <linux/interrupt.h>
90#include <linux/if_ether.h>
91#include <linux/netdevice.h>
92#include <linux/etherdevice.h>
93#include <linux/notifier.h>
94#include <linux/skbuff.h>
457c4cbc 95#include <net/net_namespace.h>
1da177e4
LT
96#include <net/sock.h>
97#include <linux/rtnetlink.h>
98#include <linux/proc_fs.h>
99#include <linux/seq_file.h>
100#include <linux/stat.h>
101#include <linux/if_bridge.h>
b863ceb7 102#include <linux/if_macvlan.h>
1da177e4
LT
103#include <net/dst.h>
104#include <net/pkt_sched.h>
105#include <net/checksum.h>
106#include <linux/highmem.h>
107#include <linux/init.h>
108#include <linux/kmod.h>
109#include <linux/module.h>
110#include <linux/kallsyms.h>
111#include <linux/netpoll.h>
112#include <linux/rcupdate.h>
113#include <linux/delay.h>
295f4a1f 114#include <net/wext.h>
1da177e4 115#include <net/iw_handler.h>
1da177e4 116#include <asm/current.h>
5bdb9886 117#include <linux/audit.h>
db217334 118#include <linux/dmaengine.h>
f6a78bfc 119#include <linux/err.h>
c7fa9d18 120#include <linux/ctype.h>
723e98b7 121#include <linux/if_arp.h>
1da177e4 122
342709ef
PE
123#include "net-sysfs.h"
124
1da177e4
LT
125/*
126 * The list of packet types we will receive (as opposed to discard)
127 * and the routines to invoke.
128 *
129 * Why 16. Because with 16 the only overlap we get on a hash of the
130 * low nibble of the protocol value is RARP/SNAP/X.25.
131 *
132 * NOTE: That is no longer true with the addition of VLAN tags. Not
133 * sure which should go first, but I bet it won't make much
134 * difference if we are running VLANs. The good news is that
135 * this protocol won't be in the list unless compiled in, so
3041a069 136 * the average user (w/out VLANs) will not be adversely affected.
1da177e4
LT
137 * --BLG
138 *
139 * 0800 IP
140 * 8100 802.1Q VLAN
141 * 0001 802.3
142 * 0002 AX.25
143 * 0004 802.2
144 * 8035 RARP
145 * 0005 SNAP
146 * 0805 X.25
147 * 0806 ARP
148 * 8137 IPX
149 * 0009 Localtalk
150 * 86DD IPv6
151 */
152
153static DEFINE_SPINLOCK(ptype_lock);
6b2bedc3
SH
154static struct list_head ptype_base[16] __read_mostly; /* 16 way hashed list */
155static struct list_head ptype_all __read_mostly; /* Taps */
1da177e4 156
db217334 157#ifdef CONFIG_NET_DMA
d379b01e
DW
158struct net_dma {
159 struct dma_client client;
160 spinlock_t lock;
161 cpumask_t channel_mask;
162 struct dma_chan *channels[NR_CPUS];
163};
164
165static enum dma_state_client
166netdev_dma_event(struct dma_client *client, struct dma_chan *chan,
167 enum dma_state state);
168
169static struct net_dma net_dma = {
170 .client = {
171 .event_callback = netdev_dma_event,
172 },
173};
db217334
CL
174#endif
175
1da177e4 176/*
7562f876 177 * The @dev_base_head list is protected by @dev_base_lock and the rtnl
1da177e4
LT
178 * semaphore.
179 *
180 * Pure readers hold dev_base_lock for reading.
181 *
182 * Writers must hold the rtnl semaphore while they loop through the
7562f876 183 * dev_base_head list, and hold dev_base_lock for writing when they do the
1da177e4
LT
184 * actual updates. This allows pure readers to access the list even
185 * while a writer is preparing to update it.
186 *
187 * To put it another way, dev_base_lock is held for writing only to
188 * protect against pure readers; the rtnl semaphore provides the
189 * protection against other writers.
190 *
191 * See, for example usages, register_netdevice() and
192 * unregister_netdevice(), which must be called with the rtnl
193 * semaphore held.
194 */
1da177e4
LT
195DEFINE_RWLOCK(dev_base_lock);
196
1da177e4
LT
197EXPORT_SYMBOL(dev_base_lock);
198
199#define NETDEV_HASHBITS 8
881d966b 200#define NETDEV_HASHENTRIES (1 << NETDEV_HASHBITS)
1da177e4 201
881d966b 202static inline struct hlist_head *dev_name_hash(struct net *net, const char *name)
1da177e4
LT
203{
204 unsigned hash = full_name_hash(name, strnlen(name, IFNAMSIZ));
881d966b 205 return &net->dev_name_head[hash & ((1 << NETDEV_HASHBITS) - 1)];
1da177e4
LT
206}
207
881d966b 208static inline struct hlist_head *dev_index_hash(struct net *net, int ifindex)
1da177e4 209{
881d966b 210 return &net->dev_index_head[ifindex & ((1 << NETDEV_HASHBITS) - 1)];
1da177e4
LT
211}
212
ce286d32
EB
213/* Device list insertion */
214static int list_netdevice(struct net_device *dev)
215{
216 struct net *net = dev->nd_net;
217
218 ASSERT_RTNL();
219
220 write_lock_bh(&dev_base_lock);
221 list_add_tail(&dev->dev_list, &net->dev_base_head);
222 hlist_add_head(&dev->name_hlist, dev_name_hash(net, dev->name));
223 hlist_add_head(&dev->index_hlist, dev_index_hash(net, dev->ifindex));
224 write_unlock_bh(&dev_base_lock);
225 return 0;
226}
227
228/* Device list removal */
229static void unlist_netdevice(struct net_device *dev)
230{
231 ASSERT_RTNL();
232
233 /* Unlink dev from the device chain */
234 write_lock_bh(&dev_base_lock);
235 list_del(&dev->dev_list);
236 hlist_del(&dev->name_hlist);
237 hlist_del(&dev->index_hlist);
238 write_unlock_bh(&dev_base_lock);
239}
240
1da177e4
LT
241/*
242 * Our notifier list
243 */
244
f07d5b94 245static RAW_NOTIFIER_HEAD(netdev_chain);
1da177e4
LT
246
247/*
248 * Device drivers call our routines to queue packets here. We empty the
249 * queue in the local softnet handler.
250 */
bea3348e
SH
251
252DEFINE_PER_CPU(struct softnet_data, softnet_data);
1da177e4 253
723e98b7
JP
254#ifdef CONFIG_DEBUG_LOCK_ALLOC
255/*
256 * register_netdevice() inits dev->_xmit_lock and sets lockdep class
257 * according to dev->type
258 */
259static const unsigned short netdev_lock_type[] =
260 {ARPHRD_NETROM, ARPHRD_ETHER, ARPHRD_EETHER, ARPHRD_AX25,
261 ARPHRD_PRONET, ARPHRD_CHAOS, ARPHRD_IEEE802, ARPHRD_ARCNET,
262 ARPHRD_APPLETLK, ARPHRD_DLCI, ARPHRD_ATM, ARPHRD_METRICOM,
263 ARPHRD_IEEE1394, ARPHRD_EUI64, ARPHRD_INFINIBAND, ARPHRD_SLIP,
264 ARPHRD_CSLIP, ARPHRD_SLIP6, ARPHRD_CSLIP6, ARPHRD_RSRVD,
265 ARPHRD_ADAPT, ARPHRD_ROSE, ARPHRD_X25, ARPHRD_HWX25,
266 ARPHRD_PPP, ARPHRD_CISCO, ARPHRD_LAPB, ARPHRD_DDCMP,
267 ARPHRD_RAWHDLC, ARPHRD_TUNNEL, ARPHRD_TUNNEL6, ARPHRD_FRAD,
268 ARPHRD_SKIP, ARPHRD_LOOPBACK, ARPHRD_LOCALTLK, ARPHRD_FDDI,
269 ARPHRD_BIF, ARPHRD_SIT, ARPHRD_IPDDP, ARPHRD_IPGRE,
270 ARPHRD_PIMREG, ARPHRD_HIPPI, ARPHRD_ASH, ARPHRD_ECONET,
271 ARPHRD_IRDA, ARPHRD_FCPP, ARPHRD_FCAL, ARPHRD_FCPL,
272 ARPHRD_FCFABRIC, ARPHRD_IEEE802_TR, ARPHRD_IEEE80211,
273 ARPHRD_IEEE80211_PRISM, ARPHRD_IEEE80211_RADIOTAP, ARPHRD_VOID,
274 ARPHRD_NONE};
275
276static const char *netdev_lock_name[] =
277 {"_xmit_NETROM", "_xmit_ETHER", "_xmit_EETHER", "_xmit_AX25",
278 "_xmit_PRONET", "_xmit_CHAOS", "_xmit_IEEE802", "_xmit_ARCNET",
279 "_xmit_APPLETLK", "_xmit_DLCI", "_xmit_ATM", "_xmit_METRICOM",
280 "_xmit_IEEE1394", "_xmit_EUI64", "_xmit_INFINIBAND", "_xmit_SLIP",
281 "_xmit_CSLIP", "_xmit_SLIP6", "_xmit_CSLIP6", "_xmit_RSRVD",
282 "_xmit_ADAPT", "_xmit_ROSE", "_xmit_X25", "_xmit_HWX25",
283 "_xmit_PPP", "_xmit_CISCO", "_xmit_LAPB", "_xmit_DDCMP",
284 "_xmit_RAWHDLC", "_xmit_TUNNEL", "_xmit_TUNNEL6", "_xmit_FRAD",
285 "_xmit_SKIP", "_xmit_LOOPBACK", "_xmit_LOCALTLK", "_xmit_FDDI",
286 "_xmit_BIF", "_xmit_SIT", "_xmit_IPDDP", "_xmit_IPGRE",
287 "_xmit_PIMREG", "_xmit_HIPPI", "_xmit_ASH", "_xmit_ECONET",
288 "_xmit_IRDA", "_xmit_FCPP", "_xmit_FCAL", "_xmit_FCPL",
289 "_xmit_FCFABRIC", "_xmit_IEEE802_TR", "_xmit_IEEE80211",
290 "_xmit_IEEE80211_PRISM", "_xmit_IEEE80211_RADIOTAP", "_xmit_VOID",
291 "_xmit_NONE"};
292
293static struct lock_class_key netdev_xmit_lock_key[ARRAY_SIZE(netdev_lock_type)];
294
295static inline unsigned short netdev_lock_pos(unsigned short dev_type)
296{
297 int i;
298
299 for (i = 0; i < ARRAY_SIZE(netdev_lock_type); i++)
300 if (netdev_lock_type[i] == dev_type)
301 return i;
302 /* the last key is used by default */
303 return ARRAY_SIZE(netdev_lock_type) - 1;
304}
305
306static inline void netdev_set_lockdep_class(spinlock_t *lock,
307 unsigned short dev_type)
308{
309 int i;
310
311 i = netdev_lock_pos(dev_type);
312 lockdep_set_class_and_name(lock, &netdev_xmit_lock_key[i],
313 netdev_lock_name[i]);
314}
315#else
316static inline void netdev_set_lockdep_class(spinlock_t *lock,
317 unsigned short dev_type)
318{
319}
320#endif
1da177e4
LT
321
322/*******************************************************************************
323
324 Protocol management and registration routines
325
326*******************************************************************************/
327
1da177e4
LT
328/*
329 * Add a protocol ID to the list. Now that the input handler is
330 * smarter we can dispense with all the messy stuff that used to be
331 * here.
332 *
333 * BEWARE!!! Protocol handlers, mangling input packets,
334 * MUST BE last in hash buckets and checking protocol handlers
335 * MUST start from promiscuous ptype_all chain in net_bh.
336 * It is true now, do not change it.
337 * Explanation follows: if protocol handler, mangling packet, will
338 * be the first on list, it is not able to sense, that packet
339 * is cloned and should be copied-on-write, so that it will
340 * change it and subsequent readers will get broken packet.
341 * --ANK (980803)
342 */
343
344/**
345 * dev_add_pack - add packet handler
346 * @pt: packet type declaration
347 *
348 * Add a protocol handler to the networking stack. The passed &packet_type
349 * is linked into kernel lists and may not be freed until it has been
350 * removed from the kernel lists.
351 *
4ec93edb 352 * This call does not sleep therefore it can not
1da177e4
LT
353 * guarantee all CPU's that are in middle of receiving packets
354 * will see the new packet type (until the next received packet).
355 */
356
357void dev_add_pack(struct packet_type *pt)
358{
359 int hash;
360
361 spin_lock_bh(&ptype_lock);
9be9a6b9 362 if (pt->type == htons(ETH_P_ALL))
1da177e4 363 list_add_rcu(&pt->list, &ptype_all);
9be9a6b9 364 else {
1da177e4
LT
365 hash = ntohs(pt->type) & 15;
366 list_add_rcu(&pt->list, &ptype_base[hash]);
367 }
368 spin_unlock_bh(&ptype_lock);
369}
370
1da177e4
LT
371/**
372 * __dev_remove_pack - remove packet handler
373 * @pt: packet type declaration
374 *
375 * Remove a protocol handler that was previously added to the kernel
376 * protocol handlers by dev_add_pack(). The passed &packet_type is removed
377 * from the kernel lists and can be freed or reused once this function
4ec93edb 378 * returns.
1da177e4
LT
379 *
380 * The packet type might still be in use by receivers
381 * and must not be freed until after all the CPU's have gone
382 * through a quiescent state.
383 */
384void __dev_remove_pack(struct packet_type *pt)
385{
386 struct list_head *head;
387 struct packet_type *pt1;
388
389 spin_lock_bh(&ptype_lock);
390
9be9a6b9 391 if (pt->type == htons(ETH_P_ALL))
1da177e4 392 head = &ptype_all;
9be9a6b9 393 else
1da177e4
LT
394 head = &ptype_base[ntohs(pt->type) & 15];
395
396 list_for_each_entry(pt1, head, list) {
397 if (pt == pt1) {
398 list_del_rcu(&pt->list);
399 goto out;
400 }
401 }
402
403 printk(KERN_WARNING "dev_remove_pack: %p not found.\n", pt);
404out:
405 spin_unlock_bh(&ptype_lock);
406}
407/**
408 * dev_remove_pack - remove packet handler
409 * @pt: packet type declaration
410 *
411 * Remove a protocol handler that was previously added to the kernel
412 * protocol handlers by dev_add_pack(). The passed &packet_type is removed
413 * from the kernel lists and can be freed or reused once this function
414 * returns.
415 *
416 * This call sleeps to guarantee that no CPU is looking at the packet
417 * type after return.
418 */
419void dev_remove_pack(struct packet_type *pt)
420{
421 __dev_remove_pack(pt);
4ec93edb 422
1da177e4
LT
423 synchronize_net();
424}
425
426/******************************************************************************
427
428 Device Boot-time Settings Routines
429
430*******************************************************************************/
431
432/* Boot time configuration table */
433static struct netdev_boot_setup dev_boot_setup[NETDEV_BOOT_SETUP_MAX];
434
435/**
436 * netdev_boot_setup_add - add new setup entry
437 * @name: name of the device
438 * @map: configured settings for the device
439 *
440 * Adds new setup entry to the dev_boot_setup list. The function
441 * returns 0 on error and 1 on success. This is a generic routine to
442 * all netdevices.
443 */
444static int netdev_boot_setup_add(char *name, struct ifmap *map)
445{
446 struct netdev_boot_setup *s;
447 int i;
448
449 s = dev_boot_setup;
450 for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) {
451 if (s[i].name[0] == '\0' || s[i].name[0] == ' ') {
452 memset(s[i].name, 0, sizeof(s[i].name));
453 strcpy(s[i].name, name);
454 memcpy(&s[i].map, map, sizeof(s[i].map));
455 break;
456 }
457 }
458
459 return i >= NETDEV_BOOT_SETUP_MAX ? 0 : 1;
460}
461
462/**
463 * netdev_boot_setup_check - check boot time settings
464 * @dev: the netdevice
465 *
466 * Check boot time settings for the device.
467 * The found settings are set for the device to be used
468 * later in the device probing.
469 * Returns 0 if no settings found, 1 if they are.
470 */
471int netdev_boot_setup_check(struct net_device *dev)
472{
473 struct netdev_boot_setup *s = dev_boot_setup;
474 int i;
475
476 for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) {
477 if (s[i].name[0] != '\0' && s[i].name[0] != ' ' &&
478 !strncmp(dev->name, s[i].name, strlen(s[i].name))) {
479 dev->irq = s[i].map.irq;
480 dev->base_addr = s[i].map.base_addr;
481 dev->mem_start = s[i].map.mem_start;
482 dev->mem_end = s[i].map.mem_end;
483 return 1;
484 }
485 }
486 return 0;
487}
488
489
490/**
491 * netdev_boot_base - get address from boot time settings
492 * @prefix: prefix for network device
493 * @unit: id for network device
494 *
495 * Check boot time settings for the base address of device.
496 * The found settings are set for the device to be used
497 * later in the device probing.
498 * Returns 0 if no settings found.
499 */
500unsigned long netdev_boot_base(const char *prefix, int unit)
501{
502 const struct netdev_boot_setup *s = dev_boot_setup;
503 char name[IFNAMSIZ];
504 int i;
505
506 sprintf(name, "%s%d", prefix, unit);
507
508 /*
509 * If device already registered then return base of 1
510 * to indicate not to probe for this interface
511 */
881d966b 512 if (__dev_get_by_name(&init_net, name))
1da177e4
LT
513 return 1;
514
515 for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++)
516 if (!strcmp(name, s[i].name))
517 return s[i].map.base_addr;
518 return 0;
519}
520
521/*
522 * Saves at boot time configured settings for any netdevice.
523 */
524int __init netdev_boot_setup(char *str)
525{
526 int ints[5];
527 struct ifmap map;
528
529 str = get_options(str, ARRAY_SIZE(ints), ints);
530 if (!str || !*str)
531 return 0;
532
533 /* Save settings */
534 memset(&map, 0, sizeof(map));
535 if (ints[0] > 0)
536 map.irq = ints[1];
537 if (ints[0] > 1)
538 map.base_addr = ints[2];
539 if (ints[0] > 2)
540 map.mem_start = ints[3];
541 if (ints[0] > 3)
542 map.mem_end = ints[4];
543
544 /* Add new entry to the list */
545 return netdev_boot_setup_add(str, &map);
546}
547
548__setup("netdev=", netdev_boot_setup);
549
550/*******************************************************************************
551
552 Device Interface Subroutines
553
554*******************************************************************************/
555
556/**
557 * __dev_get_by_name - find a device by its name
c4ea43c5 558 * @net: the applicable net namespace
1da177e4
LT
559 * @name: name to find
560 *
561 * Find an interface by name. Must be called under RTNL semaphore
562 * or @dev_base_lock. If the name is found a pointer to the device
563 * is returned. If the name is not found then %NULL is returned. The
564 * reference counters are not incremented so the caller must be
565 * careful with locks.
566 */
567
881d966b 568struct net_device *__dev_get_by_name(struct net *net, const char *name)
1da177e4
LT
569{
570 struct hlist_node *p;
571
881d966b 572 hlist_for_each(p, dev_name_hash(net, name)) {
1da177e4
LT
573 struct net_device *dev
574 = hlist_entry(p, struct net_device, name_hlist);
575 if (!strncmp(dev->name, name, IFNAMSIZ))
576 return dev;
577 }
578 return NULL;
579}
580
581/**
582 * dev_get_by_name - find a device by its name
c4ea43c5 583 * @net: the applicable net namespace
1da177e4
LT
584 * @name: name to find
585 *
586 * Find an interface by name. This can be called from any
587 * context and does its own locking. The returned handle has
588 * the usage count incremented and the caller must use dev_put() to
589 * release it when it is no longer needed. %NULL is returned if no
590 * matching device is found.
591 */
592
881d966b 593struct net_device *dev_get_by_name(struct net *net, const char *name)
1da177e4
LT
594{
595 struct net_device *dev;
596
597 read_lock(&dev_base_lock);
881d966b 598 dev = __dev_get_by_name(net, name);
1da177e4
LT
599 if (dev)
600 dev_hold(dev);
601 read_unlock(&dev_base_lock);
602 return dev;
603}
604
605/**
606 * __dev_get_by_index - find a device by its ifindex
c4ea43c5 607 * @net: the applicable net namespace
1da177e4
LT
608 * @ifindex: index of device
609 *
610 * Search for an interface by index. Returns %NULL if the device
611 * is not found or a pointer to the device. The device has not
612 * had its reference counter increased so the caller must be careful
613 * about locking. The caller must hold either the RTNL semaphore
614 * or @dev_base_lock.
615 */
616
881d966b 617struct net_device *__dev_get_by_index(struct net *net, int ifindex)
1da177e4
LT
618{
619 struct hlist_node *p;
620
881d966b 621 hlist_for_each(p, dev_index_hash(net, ifindex)) {
1da177e4
LT
622 struct net_device *dev
623 = hlist_entry(p, struct net_device, index_hlist);
624 if (dev->ifindex == ifindex)
625 return dev;
626 }
627 return NULL;
628}
629
630
631/**
632 * dev_get_by_index - find a device by its ifindex
c4ea43c5 633 * @net: the applicable net namespace
1da177e4
LT
634 * @ifindex: index of device
635 *
636 * Search for an interface by index. Returns NULL if the device
637 * is not found or a pointer to the device. The device returned has
638 * had a reference added and the pointer is safe until the user calls
639 * dev_put to indicate they have finished with it.
640 */
641
881d966b 642struct net_device *dev_get_by_index(struct net *net, int ifindex)
1da177e4
LT
643{
644 struct net_device *dev;
645
646 read_lock(&dev_base_lock);
881d966b 647 dev = __dev_get_by_index(net, ifindex);
1da177e4
LT
648 if (dev)
649 dev_hold(dev);
650 read_unlock(&dev_base_lock);
651 return dev;
652}
653
654/**
655 * dev_getbyhwaddr - find a device by its hardware address
c4ea43c5 656 * @net: the applicable net namespace
1da177e4
LT
657 * @type: media type of device
658 * @ha: hardware address
659 *
660 * Search for an interface by MAC address. Returns NULL if the device
661 * is not found or a pointer to the device. The caller must hold the
662 * rtnl semaphore. The returned device has not had its ref count increased
663 * and the caller must therefore be careful about locking
664 *
665 * BUGS:
666 * If the API was consistent this would be __dev_get_by_hwaddr
667 */
668
881d966b 669struct net_device *dev_getbyhwaddr(struct net *net, unsigned short type, char *ha)
1da177e4
LT
670{
671 struct net_device *dev;
672
673 ASSERT_RTNL();
674
881d966b 675 for_each_netdev(&init_net, dev)
1da177e4
LT
676 if (dev->type == type &&
677 !memcmp(dev->dev_addr, ha, dev->addr_len))
7562f876
PE
678 return dev;
679
680 return NULL;
1da177e4
LT
681}
682
cf309e3f
JF
683EXPORT_SYMBOL(dev_getbyhwaddr);
684
881d966b 685struct net_device *__dev_getfirstbyhwtype(struct net *net, unsigned short type)
1da177e4
LT
686{
687 struct net_device *dev;
688
4e9cac2b 689 ASSERT_RTNL();
881d966b 690 for_each_netdev(net, dev)
4e9cac2b 691 if (dev->type == type)
7562f876
PE
692 return dev;
693
694 return NULL;
4e9cac2b
PM
695}
696
697EXPORT_SYMBOL(__dev_getfirstbyhwtype);
698
881d966b 699struct net_device *dev_getfirstbyhwtype(struct net *net, unsigned short type)
4e9cac2b
PM
700{
701 struct net_device *dev;
702
703 rtnl_lock();
881d966b 704 dev = __dev_getfirstbyhwtype(net, type);
4e9cac2b
PM
705 if (dev)
706 dev_hold(dev);
1da177e4
LT
707 rtnl_unlock();
708 return dev;
709}
710
711EXPORT_SYMBOL(dev_getfirstbyhwtype);
712
713/**
714 * dev_get_by_flags - find any device with given flags
c4ea43c5 715 * @net: the applicable net namespace
1da177e4
LT
716 * @if_flags: IFF_* values
717 * @mask: bitmask of bits in if_flags to check
718 *
719 * Search for any interface with the given flags. Returns NULL if a device
4ec93edb 720 * is not found or a pointer to the device. The device returned has
1da177e4
LT
721 * had a reference added and the pointer is safe until the user calls
722 * dev_put to indicate they have finished with it.
723 */
724
881d966b 725struct net_device * dev_get_by_flags(struct net *net, unsigned short if_flags, unsigned short mask)
1da177e4 726{
7562f876 727 struct net_device *dev, *ret;
1da177e4 728
7562f876 729 ret = NULL;
1da177e4 730 read_lock(&dev_base_lock);
881d966b 731 for_each_netdev(net, dev) {
1da177e4
LT
732 if (((dev->flags ^ if_flags) & mask) == 0) {
733 dev_hold(dev);
7562f876 734 ret = dev;
1da177e4
LT
735 break;
736 }
737 }
738 read_unlock(&dev_base_lock);
7562f876 739 return ret;
1da177e4
LT
740}
741
742/**
743 * dev_valid_name - check if name is okay for network device
744 * @name: name string
745 *
746 * Network device names need to be valid file names to
c7fa9d18
DM
747 * to allow sysfs to work. We also disallow any kind of
748 * whitespace.
1da177e4 749 */
c2373ee9 750int dev_valid_name(const char *name)
1da177e4 751{
c7fa9d18
DM
752 if (*name == '\0')
753 return 0;
b6fe17d6
SH
754 if (strlen(name) >= IFNAMSIZ)
755 return 0;
c7fa9d18
DM
756 if (!strcmp(name, ".") || !strcmp(name, ".."))
757 return 0;
758
759 while (*name) {
760 if (*name == '/' || isspace(*name))
761 return 0;
762 name++;
763 }
764 return 1;
1da177e4
LT
765}
766
767/**
b267b179
EB
768 * __dev_alloc_name - allocate a name for a device
769 * @net: network namespace to allocate the device name in
1da177e4 770 * @name: name format string
b267b179 771 * @buf: scratch buffer and result name string
1da177e4
LT
772 *
773 * Passed a format string - eg "lt%d" it will try and find a suitable
3041a069
SH
774 * id. It scans list of devices to build up a free map, then chooses
775 * the first empty slot. The caller must hold the dev_base or rtnl lock
776 * while allocating the name and adding the device in order to avoid
777 * duplicates.
778 * Limited to bits_per_byte * page size devices (ie 32K on most platforms).
779 * Returns the number of the unit assigned or a negative errno code.
1da177e4
LT
780 */
781
b267b179 782static int __dev_alloc_name(struct net *net, const char *name, char *buf)
1da177e4
LT
783{
784 int i = 0;
1da177e4
LT
785 const char *p;
786 const int max_netdevices = 8*PAGE_SIZE;
cfcabdcc 787 unsigned long *inuse;
1da177e4
LT
788 struct net_device *d;
789
790 p = strnchr(name, IFNAMSIZ-1, '%');
791 if (p) {
792 /*
793 * Verify the string as this thing may have come from
794 * the user. There must be either one "%d" and no other "%"
795 * characters.
796 */
797 if (p[1] != 'd' || strchr(p + 2, '%'))
798 return -EINVAL;
799
800 /* Use one page as a bit array of possible slots */
cfcabdcc 801 inuse = (unsigned long *) get_zeroed_page(GFP_ATOMIC);
1da177e4
LT
802 if (!inuse)
803 return -ENOMEM;
804
881d966b 805 for_each_netdev(net, d) {
1da177e4
LT
806 if (!sscanf(d->name, name, &i))
807 continue;
808 if (i < 0 || i >= max_netdevices)
809 continue;
810
811 /* avoid cases where sscanf is not exact inverse of printf */
b267b179 812 snprintf(buf, IFNAMSIZ, name, i);
1da177e4
LT
813 if (!strncmp(buf, d->name, IFNAMSIZ))
814 set_bit(i, inuse);
815 }
816
817 i = find_first_zero_bit(inuse, max_netdevices);
818 free_page((unsigned long) inuse);
819 }
820
b267b179
EB
821 snprintf(buf, IFNAMSIZ, name, i);
822 if (!__dev_get_by_name(net, buf))
1da177e4 823 return i;
1da177e4
LT
824
825 /* It is possible to run out of possible slots
826 * when the name is long and there isn't enough space left
827 * for the digits, or if all bits are used.
828 */
829 return -ENFILE;
830}
831
b267b179
EB
832/**
833 * dev_alloc_name - allocate a name for a device
834 * @dev: device
835 * @name: name format string
836 *
837 * Passed a format string - eg "lt%d" it will try and find a suitable
838 * id. It scans list of devices to build up a free map, then chooses
839 * the first empty slot. The caller must hold the dev_base or rtnl lock
840 * while allocating the name and adding the device in order to avoid
841 * duplicates.
842 * Limited to bits_per_byte * page size devices (ie 32K on most platforms).
843 * Returns the number of the unit assigned or a negative errno code.
844 */
845
846int dev_alloc_name(struct net_device *dev, const char *name)
847{
848 char buf[IFNAMSIZ];
849 struct net *net;
850 int ret;
851
852 BUG_ON(!dev->nd_net);
853 net = dev->nd_net;
854 ret = __dev_alloc_name(net, name, buf);
855 if (ret >= 0)
856 strlcpy(dev->name, buf, IFNAMSIZ);
857 return ret;
858}
859
1da177e4
LT
860
861/**
862 * dev_change_name - change name of a device
863 * @dev: device
864 * @newname: name (or format string) must be at least IFNAMSIZ
865 *
866 * Change name of a device, can pass format strings "eth%d".
867 * for wildcarding.
868 */
869int dev_change_name(struct net_device *dev, char *newname)
870{
fcc5a03a 871 char oldname[IFNAMSIZ];
1da177e4 872 int err = 0;
fcc5a03a 873 int ret;
881d966b 874 struct net *net;
1da177e4
LT
875
876 ASSERT_RTNL();
881d966b 877 BUG_ON(!dev->nd_net);
1da177e4 878
881d966b 879 net = dev->nd_net;
1da177e4
LT
880 if (dev->flags & IFF_UP)
881 return -EBUSY;
882
883 if (!dev_valid_name(newname))
884 return -EINVAL;
885
fcc5a03a
HX
886 memcpy(oldname, dev->name, IFNAMSIZ);
887
1da177e4
LT
888 if (strchr(newname, '%')) {
889 err = dev_alloc_name(dev, newname);
890 if (err < 0)
891 return err;
892 strcpy(newname, dev->name);
893 }
881d966b 894 else if (__dev_get_by_name(net, newname))
1da177e4
LT
895 return -EEXIST;
896 else
897 strlcpy(dev->name, newname, IFNAMSIZ);
898
fcc5a03a 899rollback:
92749821 900 device_rename(&dev->dev, dev->name);
7f988eab
HX
901
902 write_lock_bh(&dev_base_lock);
92749821 903 hlist_del(&dev->name_hlist);
881d966b 904 hlist_add_head(&dev->name_hlist, dev_name_hash(net, dev->name));
7f988eab
HX
905 write_unlock_bh(&dev_base_lock);
906
056925ab 907 ret = call_netdevice_notifiers(NETDEV_CHANGENAME, dev);
fcc5a03a
HX
908 ret = notifier_to_errno(ret);
909
910 if (ret) {
911 if (err) {
912 printk(KERN_ERR
913 "%s: name change rollback failed: %d.\n",
914 dev->name, ret);
915 } else {
916 err = ret;
917 memcpy(dev->name, oldname, IFNAMSIZ);
918 goto rollback;
919 }
920 }
1da177e4
LT
921
922 return err;
923}
924
d8a33ac4 925/**
3041a069 926 * netdev_features_change - device changes features
d8a33ac4
SH
927 * @dev: device to cause notification
928 *
929 * Called to indicate a device has changed features.
930 */
931void netdev_features_change(struct net_device *dev)
932{
056925ab 933 call_netdevice_notifiers(NETDEV_FEAT_CHANGE, dev);
d8a33ac4
SH
934}
935EXPORT_SYMBOL(netdev_features_change);
936
1da177e4
LT
937/**
938 * netdev_state_change - device changes state
939 * @dev: device to cause notification
940 *
941 * Called to indicate a device has changed state. This function calls
942 * the notifier chains for netdev_chain and sends a NEWLINK message
943 * to the routing socket.
944 */
945void netdev_state_change(struct net_device *dev)
946{
947 if (dev->flags & IFF_UP) {
056925ab 948 call_netdevice_notifiers(NETDEV_CHANGE, dev);
1da177e4
LT
949 rtmsg_ifinfo(RTM_NEWLINK, dev, 0);
950 }
951}
952
953/**
954 * dev_load - load a network module
c4ea43c5 955 * @net: the applicable net namespace
1da177e4
LT
956 * @name: name of interface
957 *
958 * If a network interface is not present and the process has suitable
959 * privileges this function loads the module. If module loading is not
960 * available in this kernel then it becomes a nop.
961 */
962
881d966b 963void dev_load(struct net *net, const char *name)
1da177e4 964{
4ec93edb 965 struct net_device *dev;
1da177e4
LT
966
967 read_lock(&dev_base_lock);
881d966b 968 dev = __dev_get_by_name(net, name);
1da177e4
LT
969 read_unlock(&dev_base_lock);
970
971 if (!dev && capable(CAP_SYS_MODULE))
972 request_module("%s", name);
973}
974
1da177e4
LT
975/**
976 * dev_open - prepare an interface for use.
977 * @dev: device to open
978 *
979 * Takes a device from down to up state. The device's private open
980 * function is invoked and then the multicast lists are loaded. Finally
981 * the device is moved into the up state and a %NETDEV_UP message is
982 * sent to the netdev notifier chain.
983 *
984 * Calling this function on an active interface is a nop. On a failure
985 * a negative errno code is returned.
986 */
987int dev_open(struct net_device *dev)
988{
989 int ret = 0;
990
991 /*
992 * Is it already up?
993 */
994
995 if (dev->flags & IFF_UP)
996 return 0;
997
998 /*
999 * Is it even present?
1000 */
1001 if (!netif_device_present(dev))
1002 return -ENODEV;
1003
1004 /*
1005 * Call device private open method
1006 */
1007 set_bit(__LINK_STATE_START, &dev->state);
bada339b
JG
1008
1009 if (dev->validate_addr)
1010 ret = dev->validate_addr(dev);
1011
1012 if (!ret && dev->open)
1da177e4 1013 ret = dev->open(dev);
1da177e4 1014
4ec93edb 1015 /*
1da177e4
LT
1016 * If it went open OK then:
1017 */
1018
bada339b
JG
1019 if (ret)
1020 clear_bit(__LINK_STATE_START, &dev->state);
1021 else {
1da177e4
LT
1022 /*
1023 * Set the flags.
1024 */
1025 dev->flags |= IFF_UP;
1026
1027 /*
1028 * Initialize multicasting status
1029 */
4417da66 1030 dev_set_rx_mode(dev);
1da177e4
LT
1031
1032 /*
1033 * Wakeup transmit queue engine
1034 */
1035 dev_activate(dev);
1036
1037 /*
1038 * ... and announce new interface.
1039 */
056925ab 1040 call_netdevice_notifiers(NETDEV_UP, dev);
1da177e4 1041 }
bada339b 1042
1da177e4
LT
1043 return ret;
1044}
1045
1046/**
1047 * dev_close - shutdown an interface.
1048 * @dev: device to shutdown
1049 *
1050 * This function moves an active device into down state. A
1051 * %NETDEV_GOING_DOWN is sent to the netdev notifier chain. The device
1052 * is then deactivated and finally a %NETDEV_DOWN is sent to the notifier
1053 * chain.
1054 */
1055int dev_close(struct net_device *dev)
1056{
9d5010db
DM
1057 might_sleep();
1058
1da177e4
LT
1059 if (!(dev->flags & IFF_UP))
1060 return 0;
1061
1062 /*
1063 * Tell people we are going down, so that they can
1064 * prepare to death, when device is still operating.
1065 */
056925ab 1066 call_netdevice_notifiers(NETDEV_GOING_DOWN, dev);
1da177e4
LT
1067
1068 dev_deactivate(dev);
1069
1070 clear_bit(__LINK_STATE_START, &dev->state);
1071
1072 /* Synchronize to scheduled poll. We cannot touch poll list,
bea3348e
SH
1073 * it can be even on different cpu. So just clear netif_running().
1074 *
1075 * dev->stop() will invoke napi_disable() on all of it's
1076 * napi_struct instances on this device.
1077 */
1da177e4 1078 smp_mb__after_clear_bit(); /* Commit netif_running(). */
1da177e4
LT
1079
1080 /*
1081 * Call the device specific close. This cannot fail.
1082 * Only if device is UP
1083 *
1084 * We allow it to be called even after a DETACH hot-plug
1085 * event.
1086 */
1087 if (dev->stop)
1088 dev->stop(dev);
1089
1090 /*
1091 * Device is now down.
1092 */
1093
1094 dev->flags &= ~IFF_UP;
1095
1096 /*
1097 * Tell people we are down
1098 */
056925ab 1099 call_netdevice_notifiers(NETDEV_DOWN, dev);
1da177e4
LT
1100
1101 return 0;
1102}
1103
1104
881d966b
EB
1105static int dev_boot_phase = 1;
1106
1da177e4
LT
1107/*
1108 * Device change register/unregister. These are not inline or static
1109 * as we export them to the world.
1110 */
1111
1112/**
1113 * register_netdevice_notifier - register a network notifier block
1114 * @nb: notifier
1115 *
1116 * Register a notifier to be called when network device events occur.
1117 * The notifier passed is linked into the kernel structures and must
1118 * not be reused until it has been unregistered. A negative errno code
1119 * is returned on a failure.
1120 *
1121 * When registered all registration and up events are replayed
4ec93edb 1122 * to the new notifier to allow device to have a race free
1da177e4
LT
1123 * view of the network device list.
1124 */
1125
1126int register_netdevice_notifier(struct notifier_block *nb)
1127{
1128 struct net_device *dev;
fcc5a03a 1129 struct net_device *last;
881d966b 1130 struct net *net;
1da177e4
LT
1131 int err;
1132
1133 rtnl_lock();
f07d5b94 1134 err = raw_notifier_chain_register(&netdev_chain, nb);
fcc5a03a
HX
1135 if (err)
1136 goto unlock;
881d966b
EB
1137 if (dev_boot_phase)
1138 goto unlock;
1139 for_each_net(net) {
1140 for_each_netdev(net, dev) {
1141 err = nb->notifier_call(nb, NETDEV_REGISTER, dev);
1142 err = notifier_to_errno(err);
1143 if (err)
1144 goto rollback;
1145
1146 if (!(dev->flags & IFF_UP))
1147 continue;
1da177e4 1148
881d966b
EB
1149 nb->notifier_call(nb, NETDEV_UP, dev);
1150 }
1da177e4 1151 }
fcc5a03a
HX
1152
1153unlock:
1da177e4
LT
1154 rtnl_unlock();
1155 return err;
fcc5a03a
HX
1156
1157rollback:
1158 last = dev;
881d966b
EB
1159 for_each_net(net) {
1160 for_each_netdev(net, dev) {
1161 if (dev == last)
1162 break;
fcc5a03a 1163
881d966b
EB
1164 if (dev->flags & IFF_UP) {
1165 nb->notifier_call(nb, NETDEV_GOING_DOWN, dev);
1166 nb->notifier_call(nb, NETDEV_DOWN, dev);
1167 }
1168 nb->notifier_call(nb, NETDEV_UNREGISTER, dev);
fcc5a03a 1169 }
fcc5a03a
HX
1170 }
1171 goto unlock;
1da177e4
LT
1172}
1173
1174/**
1175 * unregister_netdevice_notifier - unregister a network notifier block
1176 * @nb: notifier
1177 *
1178 * Unregister a notifier previously registered by
1179 * register_netdevice_notifier(). The notifier is unlinked into the
1180 * kernel structures and may then be reused. A negative errno code
1181 * is returned on a failure.
1182 */
1183
1184int unregister_netdevice_notifier(struct notifier_block *nb)
1185{
9f514950
HX
1186 int err;
1187
1188 rtnl_lock();
f07d5b94 1189 err = raw_notifier_chain_unregister(&netdev_chain, nb);
9f514950
HX
1190 rtnl_unlock();
1191 return err;
1da177e4
LT
1192}
1193
1194/**
1195 * call_netdevice_notifiers - call all network notifier blocks
1196 * @val: value passed unmodified to notifier function
c4ea43c5 1197 * @dev: net_device pointer passed unmodified to notifier function
1da177e4
LT
1198 *
1199 * Call all network notifier blocks. Parameters and return value
f07d5b94 1200 * are as for raw_notifier_call_chain().
1da177e4
LT
1201 */
1202
ad7379d4 1203int call_netdevice_notifiers(unsigned long val, struct net_device *dev)
1da177e4 1204{
ad7379d4 1205 return raw_notifier_call_chain(&netdev_chain, val, dev);
1da177e4
LT
1206}
1207
1208/* When > 0 there are consumers of rx skb time stamps */
1209static atomic_t netstamp_needed = ATOMIC_INIT(0);
1210
1211void net_enable_timestamp(void)
1212{
1213 atomic_inc(&netstamp_needed);
1214}
1215
1216void net_disable_timestamp(void)
1217{
1218 atomic_dec(&netstamp_needed);
1219}
1220
a61bbcf2 1221static inline void net_timestamp(struct sk_buff *skb)
1da177e4
LT
1222{
1223 if (atomic_read(&netstamp_needed))
a61bbcf2 1224 __net_timestamp(skb);
b7aa0bf7
ED
1225 else
1226 skb->tstamp.tv64 = 0;
1da177e4
LT
1227}
1228
1229/*
1230 * Support routine. Sends outgoing frames to any network
1231 * taps currently in use.
1232 */
1233
f6a78bfc 1234static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
1da177e4
LT
1235{
1236 struct packet_type *ptype;
a61bbcf2
PM
1237
1238 net_timestamp(skb);
1da177e4
LT
1239
1240 rcu_read_lock();
1241 list_for_each_entry_rcu(ptype, &ptype_all, list) {
1242 /* Never send packets back to the socket
1243 * they originated from - MvS (miquels@drinkel.ow.org)
1244 */
1245 if ((ptype->dev == dev || !ptype->dev) &&
1246 (ptype->af_packet_priv == NULL ||
1247 (struct sock *)ptype->af_packet_priv != skb->sk)) {
1248 struct sk_buff *skb2= skb_clone(skb, GFP_ATOMIC);
1249 if (!skb2)
1250 break;
1251
1252 /* skb->nh should be correctly
1253 set by sender, so that the second statement is
1254 just protection against buggy protocols.
1255 */
459a98ed 1256 skb_reset_mac_header(skb2);
1da177e4 1257
d56f90a7 1258 if (skb_network_header(skb2) < skb2->data ||
27a884dc 1259 skb2->network_header > skb2->tail) {
1da177e4
LT
1260 if (net_ratelimit())
1261 printk(KERN_CRIT "protocol %04x is "
1262 "buggy, dev %s\n",
1263 skb2->protocol, dev->name);
c1d2bbe1 1264 skb_reset_network_header(skb2);
1da177e4
LT
1265 }
1266
b0e380b1 1267 skb2->transport_header = skb2->network_header;
1da177e4 1268 skb2->pkt_type = PACKET_OUTGOING;
f2ccd8fa 1269 ptype->func(skb2, skb->dev, ptype, skb->dev);
1da177e4
LT
1270 }
1271 }
1272 rcu_read_unlock();
1273}
1274
56079431
DV
1275
1276void __netif_schedule(struct net_device *dev)
1277{
1278 if (!test_and_set_bit(__LINK_STATE_SCHED, &dev->state)) {
1279 unsigned long flags;
1280 struct softnet_data *sd;
1281
1282 local_irq_save(flags);
1283 sd = &__get_cpu_var(softnet_data);
1284 dev->next_sched = sd->output_queue;
1285 sd->output_queue = dev;
1286 raise_softirq_irqoff(NET_TX_SOFTIRQ);
1287 local_irq_restore(flags);
1288 }
1289}
1290EXPORT_SYMBOL(__netif_schedule);
1291
bea3348e 1292void dev_kfree_skb_irq(struct sk_buff *skb)
56079431 1293{
bea3348e
SH
1294 if (atomic_dec_and_test(&skb->users)) {
1295 struct softnet_data *sd;
1296 unsigned long flags;
56079431 1297
bea3348e
SH
1298 local_irq_save(flags);
1299 sd = &__get_cpu_var(softnet_data);
1300 skb->next = sd->completion_queue;
1301 sd->completion_queue = skb;
1302 raise_softirq_irqoff(NET_TX_SOFTIRQ);
1303 local_irq_restore(flags);
1304 }
56079431 1305}
bea3348e 1306EXPORT_SYMBOL(dev_kfree_skb_irq);
56079431
DV
1307
1308void dev_kfree_skb_any(struct sk_buff *skb)
1309{
1310 if (in_irq() || irqs_disabled())
1311 dev_kfree_skb_irq(skb);
1312 else
1313 dev_kfree_skb(skb);
1314}
1315EXPORT_SYMBOL(dev_kfree_skb_any);
1316
1317
bea3348e
SH
1318/**
1319 * netif_device_detach - mark device as removed
1320 * @dev: network device
1321 *
1322 * Mark device as removed from system and therefore no longer available.
1323 */
56079431
DV
1324void netif_device_detach(struct net_device *dev)
1325{
1326 if (test_and_clear_bit(__LINK_STATE_PRESENT, &dev->state) &&
1327 netif_running(dev)) {
1328 netif_stop_queue(dev);
1329 }
1330}
1331EXPORT_SYMBOL(netif_device_detach);
1332
bea3348e
SH
1333/**
1334 * netif_device_attach - mark device as attached
1335 * @dev: network device
1336 *
1337 * Mark device as attached from system and restart if needed.
1338 */
56079431
DV
1339void netif_device_attach(struct net_device *dev)
1340{
1341 if (!test_and_set_bit(__LINK_STATE_PRESENT, &dev->state) &&
1342 netif_running(dev)) {
1343 netif_wake_queue(dev);
4ec93edb 1344 __netdev_watchdog_up(dev);
56079431
DV
1345 }
1346}
1347EXPORT_SYMBOL(netif_device_attach);
1348
1349
1da177e4
LT
1350/*
1351 * Invalidate hardware checksum when packet is to be mangled, and
1352 * complete checksum manually on outgoing path.
1353 */
84fa7933 1354int skb_checksum_help(struct sk_buff *skb)
1da177e4 1355{
d3bc23e7 1356 __wsum csum;
663ead3b 1357 int ret = 0, offset;
1da177e4 1358
84fa7933 1359 if (skb->ip_summed == CHECKSUM_COMPLETE)
a430a43d
HX
1360 goto out_set_summed;
1361
1362 if (unlikely(skb_shinfo(skb)->gso_size)) {
a430a43d
HX
1363 /* Let GSO fix up the checksum. */
1364 goto out_set_summed;
1da177e4
LT
1365 }
1366
a030847e
HX
1367 offset = skb->csum_start - skb_headroom(skb);
1368 BUG_ON(offset >= skb_headlen(skb));
1369 csum = skb_checksum(skb, offset, skb->len - offset, 0);
1370
1371 offset += skb->csum_offset;
1372 BUG_ON(offset + sizeof(__sum16) > skb_headlen(skb));
1373
1374 if (skb_cloned(skb) &&
1375 !skb_clone_writable(skb, offset + sizeof(__sum16))) {
1da177e4
LT
1376 ret = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
1377 if (ret)
1378 goto out;
1379 }
1380
a030847e 1381 *(__sum16 *)(skb->data + offset) = csum_fold(csum);
a430a43d 1382out_set_summed:
1da177e4 1383 skb->ip_summed = CHECKSUM_NONE;
4ec93edb 1384out:
1da177e4
LT
1385 return ret;
1386}
1387
f6a78bfc
HX
1388/**
1389 * skb_gso_segment - Perform segmentation on skb.
1390 * @skb: buffer to segment
576a30eb 1391 * @features: features for the output path (see dev->features)
f6a78bfc
HX
1392 *
1393 * This function segments the given skb and returns a list of segments.
576a30eb
HX
1394 *
1395 * It may return NULL if the skb requires no segmentation. This is
1396 * only possible when GSO is used for verifying header integrity.
f6a78bfc 1397 */
576a30eb 1398struct sk_buff *skb_gso_segment(struct sk_buff *skb, int features)
f6a78bfc
HX
1399{
1400 struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT);
1401 struct packet_type *ptype;
252e3346 1402 __be16 type = skb->protocol;
a430a43d 1403 int err;
f6a78bfc
HX
1404
1405 BUG_ON(skb_shinfo(skb)->frag_list);
f6a78bfc 1406
459a98ed 1407 skb_reset_mac_header(skb);
b0e380b1 1408 skb->mac_len = skb->network_header - skb->mac_header;
f6a78bfc
HX
1409 __skb_pull(skb, skb->mac_len);
1410
f9d106a6 1411 if (WARN_ON(skb->ip_summed != CHECKSUM_PARTIAL)) {
a430a43d
HX
1412 if (skb_header_cloned(skb) &&
1413 (err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC)))
1414 return ERR_PTR(err);
1415 }
1416
f6a78bfc
HX
1417 rcu_read_lock();
1418 list_for_each_entry_rcu(ptype, &ptype_base[ntohs(type) & 15], list) {
1419 if (ptype->type == type && !ptype->dev && ptype->gso_segment) {
84fa7933 1420 if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) {
a430a43d
HX
1421 err = ptype->gso_send_check(skb);
1422 segs = ERR_PTR(err);
1423 if (err || skb_gso_ok(skb, features))
1424 break;
d56f90a7
ACM
1425 __skb_push(skb, (skb->data -
1426 skb_network_header(skb)));
a430a43d 1427 }
576a30eb 1428 segs = ptype->gso_segment(skb, features);
f6a78bfc
HX
1429 break;
1430 }
1431 }
1432 rcu_read_unlock();
1433
98e399f8 1434 __skb_push(skb, skb->data - skb_mac_header(skb));
576a30eb 1435
f6a78bfc
HX
1436 return segs;
1437}
1438
1439EXPORT_SYMBOL(skb_gso_segment);
1440
fb286bb2
HX
1441/* Take action when hardware reception checksum errors are detected. */
1442#ifdef CONFIG_BUG
1443void netdev_rx_csum_fault(struct net_device *dev)
1444{
1445 if (net_ratelimit()) {
4ec93edb 1446 printk(KERN_ERR "%s: hw csum failure.\n",
246a4212 1447 dev ? dev->name : "<unknown>");
fb286bb2
HX
1448 dump_stack();
1449 }
1450}
1451EXPORT_SYMBOL(netdev_rx_csum_fault);
1452#endif
1453
1da177e4
LT
1454/* Actually, we should eliminate this check as soon as we know, that:
1455 * 1. IOMMU is present and allows to map all the memory.
1456 * 2. No high memory really exists on this machine.
1457 */
1458
1459static inline int illegal_highdma(struct net_device *dev, struct sk_buff *skb)
1460{
3d3a8533 1461#ifdef CONFIG_HIGHMEM
1da177e4
LT
1462 int i;
1463
1464 if (dev->features & NETIF_F_HIGHDMA)
1465 return 0;
1466
1467 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
1468 if (PageHighMem(skb_shinfo(skb)->frags[i].page))
1469 return 1;
1470
3d3a8533 1471#endif
1da177e4
LT
1472 return 0;
1473}
1da177e4 1474
f6a78bfc
HX
1475struct dev_gso_cb {
1476 void (*destructor)(struct sk_buff *skb);
1477};
1478
1479#define DEV_GSO_CB(skb) ((struct dev_gso_cb *)(skb)->cb)
1480
1481static void dev_gso_skb_destructor(struct sk_buff *skb)
1482{
1483 struct dev_gso_cb *cb;
1484
1485 do {
1486 struct sk_buff *nskb = skb->next;
1487
1488 skb->next = nskb->next;
1489 nskb->next = NULL;
1490 kfree_skb(nskb);
1491 } while (skb->next);
1492
1493 cb = DEV_GSO_CB(skb);
1494 if (cb->destructor)
1495 cb->destructor(skb);
1496}
1497
1498/**
1499 * dev_gso_segment - Perform emulated hardware segmentation on skb.
1500 * @skb: buffer to segment
1501 *
1502 * This function segments the given skb and stores the list of segments
1503 * in skb->next.
1504 */
1505static int dev_gso_segment(struct sk_buff *skb)
1506{
1507 struct net_device *dev = skb->dev;
1508 struct sk_buff *segs;
576a30eb
HX
1509 int features = dev->features & ~(illegal_highdma(dev, skb) ?
1510 NETIF_F_SG : 0);
1511
1512 segs = skb_gso_segment(skb, features);
1513
1514 /* Verifying header integrity only. */
1515 if (!segs)
1516 return 0;
f6a78bfc 1517
f6a78bfc
HX
1518 if (unlikely(IS_ERR(segs)))
1519 return PTR_ERR(segs);
1520
1521 skb->next = segs;
1522 DEV_GSO_CB(skb)->destructor = skb->destructor;
1523 skb->destructor = dev_gso_skb_destructor;
1524
1525 return 0;
1526}
1527
1528int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev)
1529{
1530 if (likely(!skb->next)) {
9be9a6b9 1531 if (!list_empty(&ptype_all))
f6a78bfc
HX
1532 dev_queue_xmit_nit(skb, dev);
1533
576a30eb
HX
1534 if (netif_needs_gso(dev, skb)) {
1535 if (unlikely(dev_gso_segment(skb)))
1536 goto out_kfree_skb;
1537 if (skb->next)
1538 goto gso;
1539 }
f6a78bfc 1540
576a30eb 1541 return dev->hard_start_xmit(skb, dev);
f6a78bfc
HX
1542 }
1543
576a30eb 1544gso:
f6a78bfc
HX
1545 do {
1546 struct sk_buff *nskb = skb->next;
1547 int rc;
1548
1549 skb->next = nskb->next;
1550 nskb->next = NULL;
1551 rc = dev->hard_start_xmit(nskb, dev);
1552 if (unlikely(rc)) {
f54d9e8d 1553 nskb->next = skb->next;
f6a78bfc
HX
1554 skb->next = nskb;
1555 return rc;
1556 }
f25f4e44 1557 if (unlikely((netif_queue_stopped(dev) ||
668f895a 1558 netif_subqueue_stopped(dev, skb)) &&
f25f4e44 1559 skb->next))
f54d9e8d 1560 return NETDEV_TX_BUSY;
f6a78bfc 1561 } while (skb->next);
4ec93edb 1562
f6a78bfc
HX
1563 skb->destructor = DEV_GSO_CB(skb)->destructor;
1564
1565out_kfree_skb:
1566 kfree_skb(skb);
1567 return 0;
1568}
1569
1da177e4
LT
1570/**
1571 * dev_queue_xmit - transmit a buffer
1572 * @skb: buffer to transmit
1573 *
1574 * Queue a buffer for transmission to a network device. The caller must
1575 * have set the device and priority and built the buffer before calling
1576 * this function. The function can be called from an interrupt.
1577 *
1578 * A negative errno code is returned on a failure. A success does not
1579 * guarantee the frame will be transmitted as it may be dropped due
1580 * to congestion or traffic shaping.
af191367
BG
1581 *
1582 * -----------------------------------------------------------------------------------
1583 * I notice this method can also return errors from the queue disciplines,
1584 * including NET_XMIT_DROP, which is a positive value. So, errors can also
1585 * be positive.
1586 *
1587 * Regardless of the return value, the skb is consumed, so it is currently
1588 * difficult to retry a send to this method. (You can bump the ref count
1589 * before sending to hold a reference for retry if you are careful.)
1590 *
1591 * When calling this method, interrupts MUST be enabled. This is because
1592 * the BH enable code must have IRQs enabled so that it will not deadlock.
1593 * --BLG
1da177e4
LT
1594 */
1595
1596int dev_queue_xmit(struct sk_buff *skb)
1597{
1598 struct net_device *dev = skb->dev;
1599 struct Qdisc *q;
1600 int rc = -ENOMEM;
1601
f6a78bfc
HX
1602 /* GSO will handle the following emulations directly. */
1603 if (netif_needs_gso(dev, skb))
1604 goto gso;
1605
1da177e4
LT
1606 if (skb_shinfo(skb)->frag_list &&
1607 !(dev->features & NETIF_F_FRAGLIST) &&
364c6bad 1608 __skb_linearize(skb))
1da177e4
LT
1609 goto out_kfree_skb;
1610
1611 /* Fragmented skb is linearized if device does not support SG,
1612 * or if at least one of fragments is in highmem and device
1613 * does not support DMA from it.
1614 */
1615 if (skb_shinfo(skb)->nr_frags &&
1616 (!(dev->features & NETIF_F_SG) || illegal_highdma(dev, skb)) &&
364c6bad 1617 __skb_linearize(skb))
1da177e4
LT
1618 goto out_kfree_skb;
1619
1620 /* If packet is not checksummed and device does not support
1621 * checksumming for this protocol, complete checksumming here.
1622 */
663ead3b
HX
1623 if (skb->ip_summed == CHECKSUM_PARTIAL) {
1624 skb_set_transport_header(skb, skb->csum_start -
1625 skb_headroom(skb));
1626
a298830c
HX
1627 if (!(dev->features & NETIF_F_GEN_CSUM) &&
1628 !((dev->features & NETIF_F_IP_CSUM) &&
1629 skb->protocol == htons(ETH_P_IP)) &&
1630 !((dev->features & NETIF_F_IPV6_CSUM) &&
1631 skb->protocol == htons(ETH_P_IPV6)))
663ead3b
HX
1632 if (skb_checksum_help(skb))
1633 goto out_kfree_skb;
1634 }
1da177e4 1635
f6a78bfc 1636gso:
2d7ceece
ED
1637 spin_lock_prefetch(&dev->queue_lock);
1638
4ec93edb
YH
1639 /* Disable soft irqs for various locks below. Also
1640 * stops preemption for RCU.
1da177e4 1641 */
4ec93edb 1642 rcu_read_lock_bh();
1da177e4 1643
4ec93edb
YH
1644 /* Updates of qdisc are serialized by queue_lock.
1645 * The struct Qdisc which is pointed to by qdisc is now a
1646 * rcu structure - it may be accessed without acquiring
1da177e4 1647 * a lock (but the structure may be stale.) The freeing of the
4ec93edb 1648 * qdisc will be deferred until it's known that there are no
1da177e4 1649 * more references to it.
4ec93edb
YH
1650 *
1651 * If the qdisc has an enqueue function, we still need to
1da177e4
LT
1652 * hold the queue_lock before calling it, since queue_lock
1653 * also serializes access to the device queue.
1654 */
1655
1656 q = rcu_dereference(dev->qdisc);
1657#ifdef CONFIG_NET_CLS_ACT
1658 skb->tc_verd = SET_TC_AT(skb->tc_verd,AT_EGRESS);
1659#endif
1660 if (q->enqueue) {
1661 /* Grab device queue */
1662 spin_lock(&dev->queue_lock);
85670cc1
PM
1663 q = dev->qdisc;
1664 if (q->enqueue) {
f25f4e44 1665 /* reset queue_mapping to zero */
dfa40911 1666 skb_set_queue_mapping(skb, 0);
85670cc1
PM
1667 rc = q->enqueue(skb, q);
1668 qdisc_run(dev);
1669 spin_unlock(&dev->queue_lock);
1da177e4 1670
85670cc1
PM
1671 rc = rc == NET_XMIT_BYPASS ? NET_XMIT_SUCCESS : rc;
1672 goto out;
1673 }
1da177e4 1674 spin_unlock(&dev->queue_lock);
1da177e4
LT
1675 }
1676
1677 /* The device has no queue. Common case for software devices:
1678 loopback, all the sorts of tunnels...
1679
932ff279
HX
1680 Really, it is unlikely that netif_tx_lock protection is necessary
1681 here. (f.e. loopback and IP tunnels are clean ignoring statistics
1da177e4
LT
1682 counters.)
1683 However, it is possible, that they rely on protection
1684 made by us here.
1685
1686 Check this and shot the lock. It is not prone from deadlocks.
1687 Either shot noqueue qdisc, it is even simpler 8)
1688 */
1689 if (dev->flags & IFF_UP) {
1690 int cpu = smp_processor_id(); /* ok because BHs are off */
1691
1692 if (dev->xmit_lock_owner != cpu) {
1693
1694 HARD_TX_LOCK(dev, cpu);
1695
f25f4e44 1696 if (!netif_queue_stopped(dev) &&
668f895a 1697 !netif_subqueue_stopped(dev, skb)) {
1da177e4 1698 rc = 0;
f6a78bfc 1699 if (!dev_hard_start_xmit(skb, dev)) {
1da177e4
LT
1700 HARD_TX_UNLOCK(dev);
1701 goto out;
1702 }
1703 }
1704 HARD_TX_UNLOCK(dev);
1705 if (net_ratelimit())
1706 printk(KERN_CRIT "Virtual device %s asks to "
1707 "queue packet!\n", dev->name);
1708 } else {
1709 /* Recursion is detected! It is possible,
1710 * unfortunately */
1711 if (net_ratelimit())
1712 printk(KERN_CRIT "Dead loop on virtual device "
1713 "%s, fix it urgently!\n", dev->name);
1714 }
1715 }
1716
1717 rc = -ENETDOWN;
d4828d85 1718 rcu_read_unlock_bh();
1da177e4
LT
1719
1720out_kfree_skb:
1721 kfree_skb(skb);
1722 return rc;
1723out:
d4828d85 1724 rcu_read_unlock_bh();
1da177e4
LT
1725 return rc;
1726}
1727
1728
1729/*=======================================================================
1730 Receiver routines
1731 =======================================================================*/
1732
6b2bedc3
SH
1733int netdev_max_backlog __read_mostly = 1000;
1734int netdev_budget __read_mostly = 300;
1735int weight_p __read_mostly = 64; /* old backlog weight */
1da177e4
LT
1736
1737DEFINE_PER_CPU(struct netif_rx_stats, netdev_rx_stat) = { 0, };
1738
1739
1da177e4
LT
1740/**
1741 * netif_rx - post buffer to the network code
1742 * @skb: buffer to post
1743 *
1744 * This function receives a packet from a device driver and queues it for
1745 * the upper (protocol) levels to process. It always succeeds. The buffer
1746 * may be dropped during processing for congestion control or by the
1747 * protocol layers.
1748 *
1749 * return values:
1750 * NET_RX_SUCCESS (no congestion)
1751 * NET_RX_CN_LOW (low congestion)
1752 * NET_RX_CN_MOD (moderate congestion)
1753 * NET_RX_CN_HIGH (high congestion)
1754 * NET_RX_DROP (packet was dropped)
1755 *
1756 */
1757
1758int netif_rx(struct sk_buff *skb)
1759{
1da177e4
LT
1760 struct softnet_data *queue;
1761 unsigned long flags;
1762
1763 /* if netpoll wants it, pretend we never saw it */
1764 if (netpoll_rx(skb))
1765 return NET_RX_DROP;
1766
b7aa0bf7 1767 if (!skb->tstamp.tv64)
a61bbcf2 1768 net_timestamp(skb);
1da177e4
LT
1769
1770 /*
1771 * The code is rearranged so that the path is the most
1772 * short when CPU is congested, but is still operating.
1773 */
1774 local_irq_save(flags);
1da177e4
LT
1775 queue = &__get_cpu_var(softnet_data);
1776
1777 __get_cpu_var(netdev_rx_stat).total++;
1778 if (queue->input_pkt_queue.qlen <= netdev_max_backlog) {
1779 if (queue->input_pkt_queue.qlen) {
1da177e4
LT
1780enqueue:
1781 dev_hold(skb->dev);
1782 __skb_queue_tail(&queue->input_pkt_queue, skb);
1da177e4 1783 local_irq_restore(flags);
34008d8c 1784 return NET_RX_SUCCESS;
1da177e4
LT
1785 }
1786
bea3348e 1787 napi_schedule(&queue->backlog);
1da177e4
LT
1788 goto enqueue;
1789 }
1790
1da177e4
LT
1791 __get_cpu_var(netdev_rx_stat).dropped++;
1792 local_irq_restore(flags);
1793
1794 kfree_skb(skb);
1795 return NET_RX_DROP;
1796}
1797
1798int netif_rx_ni(struct sk_buff *skb)
1799{
1800 int err;
1801
1802 preempt_disable();
1803 err = netif_rx(skb);
1804 if (local_softirq_pending())
1805 do_softirq();
1806 preempt_enable();
1807
1808 return err;
1809}
1810
1811EXPORT_SYMBOL(netif_rx_ni);
1812
f2ccd8fa 1813static inline struct net_device *skb_bond(struct sk_buff *skb)
1da177e4
LT
1814{
1815 struct net_device *dev = skb->dev;
1816
8f903c70 1817 if (dev->master) {
7ea49ed7 1818 if (skb_bond_should_drop(skb)) {
8f903c70
JV
1819 kfree_skb(skb);
1820 return NULL;
1821 }
1da177e4 1822 skb->dev = dev->master;
8f903c70 1823 }
f2ccd8fa
DM
1824
1825 return dev;
1da177e4
LT
1826}
1827
bea3348e 1828
1da177e4
LT
1829static void net_tx_action(struct softirq_action *h)
1830{
1831 struct softnet_data *sd = &__get_cpu_var(softnet_data);
1832
1833 if (sd->completion_queue) {
1834 struct sk_buff *clist;
1835
1836 local_irq_disable();
1837 clist = sd->completion_queue;
1838 sd->completion_queue = NULL;
1839 local_irq_enable();
1840
1841 while (clist) {
1842 struct sk_buff *skb = clist;
1843 clist = clist->next;
1844
1845 BUG_TRAP(!atomic_read(&skb->users));
1846 __kfree_skb(skb);
1847 }
1848 }
1849
1850 if (sd->output_queue) {
1851 struct net_device *head;
1852
1853 local_irq_disable();
1854 head = sd->output_queue;
1855 sd->output_queue = NULL;
1856 local_irq_enable();
1857
1858 while (head) {
1859 struct net_device *dev = head;
1860 head = head->next_sched;
1861
1862 smp_mb__before_clear_bit();
1863 clear_bit(__LINK_STATE_SCHED, &dev->state);
1864
1865 if (spin_trylock(&dev->queue_lock)) {
1866 qdisc_run(dev);
1867 spin_unlock(&dev->queue_lock);
1868 } else {
1869 netif_schedule(dev);
1870 }
1871 }
1872 }
1873}
1874
6f05f629
SH
1875static inline int deliver_skb(struct sk_buff *skb,
1876 struct packet_type *pt_prev,
1877 struct net_device *orig_dev)
1da177e4
LT
1878{
1879 atomic_inc(&skb->users);
f2ccd8fa 1880 return pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
1da177e4
LT
1881}
1882
1883#if defined(CONFIG_BRIDGE) || defined (CONFIG_BRIDGE_MODULE)
6229e362 1884/* These hooks defined here for ATM */
1da177e4
LT
1885struct net_bridge;
1886struct net_bridge_fdb_entry *(*br_fdb_get_hook)(struct net_bridge *br,
1887 unsigned char *addr);
6229e362 1888void (*br_fdb_put_hook)(struct net_bridge_fdb_entry *ent) __read_mostly;
1da177e4 1889
6229e362
SH
1890/*
1891 * If bridge module is loaded call bridging hook.
1892 * returns NULL if packet was consumed.
1893 */
1894struct sk_buff *(*br_handle_frame_hook)(struct net_bridge_port *p,
1895 struct sk_buff *skb) __read_mostly;
1896static inline struct sk_buff *handle_bridge(struct sk_buff *skb,
1897 struct packet_type **pt_prev, int *ret,
1898 struct net_device *orig_dev)
1da177e4
LT
1899{
1900 struct net_bridge_port *port;
1901
6229e362
SH
1902 if (skb->pkt_type == PACKET_LOOPBACK ||
1903 (port = rcu_dereference(skb->dev->br_port)) == NULL)
1904 return skb;
1da177e4
LT
1905
1906 if (*pt_prev) {
6229e362 1907 *ret = deliver_skb(skb, *pt_prev, orig_dev);
1da177e4 1908 *pt_prev = NULL;
4ec93edb
YH
1909 }
1910
6229e362 1911 return br_handle_frame_hook(port, skb);
1da177e4
LT
1912}
1913#else
6229e362 1914#define handle_bridge(skb, pt_prev, ret, orig_dev) (skb)
1da177e4
LT
1915#endif
1916
b863ceb7
PM
1917#if defined(CONFIG_MACVLAN) || defined(CONFIG_MACVLAN_MODULE)
1918struct sk_buff *(*macvlan_handle_frame_hook)(struct sk_buff *skb) __read_mostly;
1919EXPORT_SYMBOL_GPL(macvlan_handle_frame_hook);
1920
1921static inline struct sk_buff *handle_macvlan(struct sk_buff *skb,
1922 struct packet_type **pt_prev,
1923 int *ret,
1924 struct net_device *orig_dev)
1925{
1926 if (skb->dev->macvlan_port == NULL)
1927 return skb;
1928
1929 if (*pt_prev) {
1930 *ret = deliver_skb(skb, *pt_prev, orig_dev);
1931 *pt_prev = NULL;
1932 }
1933 return macvlan_handle_frame_hook(skb);
1934}
1935#else
1936#define handle_macvlan(skb, pt_prev, ret, orig_dev) (skb)
1937#endif
1938
1da177e4
LT
1939#ifdef CONFIG_NET_CLS_ACT
1940/* TODO: Maybe we should just force sch_ingress to be compiled in
1941 * when CONFIG_NET_CLS_ACT is? otherwise some useless instructions
1942 * a compare and 2 stores extra right now if we dont have it on
1943 * but have CONFIG_NET_CLS_ACT
4ec93edb 1944 * NOTE: This doesnt stop any functionality; if you dont have
1da177e4
LT
1945 * the ingress scheduler, you just cant add policies on ingress.
1946 *
1947 */
4ec93edb 1948static int ing_filter(struct sk_buff *skb)
1da177e4
LT
1949{
1950 struct Qdisc *q;
1951 struct net_device *dev = skb->dev;
1952 int result = TC_ACT_OK;
f697c3e8 1953 u32 ttl = G_TC_RTTL(skb->tc_verd);
4ec93edb 1954
f697c3e8
HX
1955 if (MAX_RED_LOOP < ttl++) {
1956 printk(KERN_WARNING
1957 "Redir loop detected Dropping packet (%d->%d)\n",
1958 skb->iif, dev->ifindex);
1959 return TC_ACT_SHOT;
1960 }
1da177e4 1961
f697c3e8
HX
1962 skb->tc_verd = SET_TC_RTTL(skb->tc_verd, ttl);
1963 skb->tc_verd = SET_TC_AT(skb->tc_verd, AT_INGRESS);
1da177e4 1964
f697c3e8
HX
1965 spin_lock(&dev->ingress_lock);
1966 if ((q = dev->qdisc_ingress) != NULL)
1967 result = q->enqueue(skb, q);
1968 spin_unlock(&dev->ingress_lock);
1969
1970 return result;
1971}
86e65da9 1972
f697c3e8
HX
1973static inline struct sk_buff *handle_ing(struct sk_buff *skb,
1974 struct packet_type **pt_prev,
1975 int *ret, struct net_device *orig_dev)
1976{
1977 if (!skb->dev->qdisc_ingress)
1978 goto out;
1da177e4 1979
f697c3e8
HX
1980 if (*pt_prev) {
1981 *ret = deliver_skb(skb, *pt_prev, orig_dev);
1982 *pt_prev = NULL;
1983 } else {
1984 /* Huh? Why does turning on AF_PACKET affect this? */
1985 skb->tc_verd = SET_TC_OK2MUNGE(skb->tc_verd);
1da177e4
LT
1986 }
1987
f697c3e8
HX
1988 switch (ing_filter(skb)) {
1989 case TC_ACT_SHOT:
1990 case TC_ACT_STOLEN:
1991 kfree_skb(skb);
1992 return NULL;
1993 }
1994
1995out:
1996 skb->tc_verd = 0;
1997 return skb;
1da177e4
LT
1998}
1999#endif
2000
2001int netif_receive_skb(struct sk_buff *skb)
2002{
2003 struct packet_type *ptype, *pt_prev;
f2ccd8fa 2004 struct net_device *orig_dev;
1da177e4 2005 int ret = NET_RX_DROP;
252e3346 2006 __be16 type;
1da177e4
LT
2007
2008 /* if we've gotten here through NAPI, check netpoll */
bea3348e 2009 if (netpoll_receive_skb(skb))
1da177e4
LT
2010 return NET_RX_DROP;
2011
b7aa0bf7 2012 if (!skb->tstamp.tv64)
a61bbcf2 2013 net_timestamp(skb);
1da177e4 2014
c01003c2
PM
2015 if (!skb->iif)
2016 skb->iif = skb->dev->ifindex;
86e65da9 2017
f2ccd8fa 2018 orig_dev = skb_bond(skb);
1da177e4 2019
8f903c70
JV
2020 if (!orig_dev)
2021 return NET_RX_DROP;
2022
1da177e4
LT
2023 __get_cpu_var(netdev_rx_stat).total++;
2024
c1d2bbe1 2025 skb_reset_network_header(skb);
badff6d0 2026 skb_reset_transport_header(skb);
b0e380b1 2027 skb->mac_len = skb->network_header - skb->mac_header;
1da177e4
LT
2028
2029 pt_prev = NULL;
2030
2031 rcu_read_lock();
2032
2033#ifdef CONFIG_NET_CLS_ACT
2034 if (skb->tc_verd & TC_NCLS) {
2035 skb->tc_verd = CLR_TC_NCLS(skb->tc_verd);
2036 goto ncls;
2037 }
2038#endif
2039
2040 list_for_each_entry_rcu(ptype, &ptype_all, list) {
2041 if (!ptype->dev || ptype->dev == skb->dev) {
4ec93edb 2042 if (pt_prev)
f2ccd8fa 2043 ret = deliver_skb(skb, pt_prev, orig_dev);
1da177e4
LT
2044 pt_prev = ptype;
2045 }
2046 }
2047
2048#ifdef CONFIG_NET_CLS_ACT
f697c3e8
HX
2049 skb = handle_ing(skb, &pt_prev, &ret, orig_dev);
2050 if (!skb)
1da177e4 2051 goto out;
1da177e4
LT
2052ncls:
2053#endif
2054
6229e362 2055 skb = handle_bridge(skb, &pt_prev, &ret, orig_dev);
b863ceb7
PM
2056 if (!skb)
2057 goto out;
2058 skb = handle_macvlan(skb, &pt_prev, &ret, orig_dev);
6229e362 2059 if (!skb)
1da177e4
LT
2060 goto out;
2061
2062 type = skb->protocol;
2063 list_for_each_entry_rcu(ptype, &ptype_base[ntohs(type)&15], list) {
2064 if (ptype->type == type &&
2065 (!ptype->dev || ptype->dev == skb->dev)) {
4ec93edb 2066 if (pt_prev)
f2ccd8fa 2067 ret = deliver_skb(skb, pt_prev, orig_dev);
1da177e4
LT
2068 pt_prev = ptype;
2069 }
2070 }
2071
2072 if (pt_prev) {
f2ccd8fa 2073 ret = pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
1da177e4
LT
2074 } else {
2075 kfree_skb(skb);
2076 /* Jamal, now you will not able to escape explaining
2077 * me how you were going to use this. :-)
2078 */
2079 ret = NET_RX_DROP;
2080 }
2081
2082out:
2083 rcu_read_unlock();
2084 return ret;
2085}
2086
bea3348e 2087static int process_backlog(struct napi_struct *napi, int quota)
1da177e4
LT
2088{
2089 int work = 0;
1da177e4
LT
2090 struct softnet_data *queue = &__get_cpu_var(softnet_data);
2091 unsigned long start_time = jiffies;
2092
bea3348e
SH
2093 napi->weight = weight_p;
2094 do {
1da177e4
LT
2095 struct sk_buff *skb;
2096 struct net_device *dev;
2097
2098 local_irq_disable();
2099 skb = __skb_dequeue(&queue->input_pkt_queue);
bea3348e
SH
2100 if (!skb) {
2101 __napi_complete(napi);
2102 local_irq_enable();
2103 break;
2104 }
2105
1da177e4
LT
2106 local_irq_enable();
2107
2108 dev = skb->dev;
2109
2110 netif_receive_skb(skb);
2111
2112 dev_put(dev);
bea3348e 2113 } while (++work < quota && jiffies == start_time);
1da177e4 2114
bea3348e
SH
2115 return work;
2116}
1da177e4 2117
bea3348e
SH
2118/**
2119 * __napi_schedule - schedule for receive
c4ea43c5 2120 * @n: entry to schedule
bea3348e
SH
2121 *
2122 * The entry's receive function will be scheduled to run
2123 */
2124void fastcall __napi_schedule(struct napi_struct *n)
2125{
2126 unsigned long flags;
1da177e4 2127
bea3348e
SH
2128 local_irq_save(flags);
2129 list_add_tail(&n->poll_list, &__get_cpu_var(softnet_data).poll_list);
2130 __raise_softirq_irqoff(NET_RX_SOFTIRQ);
2131 local_irq_restore(flags);
1da177e4 2132}
bea3348e
SH
2133EXPORT_SYMBOL(__napi_schedule);
2134
1da177e4
LT
2135
2136static void net_rx_action(struct softirq_action *h)
2137{
bea3348e 2138 struct list_head *list = &__get_cpu_var(softnet_data).poll_list;
1da177e4 2139 unsigned long start_time = jiffies;
51b0bded 2140 int budget = netdev_budget;
53fb95d3
MM
2141 void *have;
2142
1da177e4
LT
2143 local_irq_disable();
2144
bea3348e
SH
2145 while (!list_empty(list)) {
2146 struct napi_struct *n;
2147 int work, weight;
1da177e4 2148
bea3348e
SH
2149 /* If softirq window is exhuasted then punt.
2150 *
2151 * Note that this is a slight policy change from the
2152 * previous NAPI code, which would allow up to 2
2153 * jiffies to pass before breaking out. The test
2154 * used to be "jiffies - start_time > 1".
2155 */
2156 if (unlikely(budget <= 0 || jiffies != start_time))
1da177e4
LT
2157 goto softnet_break;
2158
2159 local_irq_enable();
2160
bea3348e
SH
2161 /* Even though interrupts have been re-enabled, this
2162 * access is safe because interrupts can only add new
2163 * entries to the tail of this list, and only ->poll()
2164 * calls can remove this head entry from the list.
2165 */
2166 n = list_entry(list->next, struct napi_struct, poll_list);
1da177e4 2167
bea3348e
SH
2168 have = netpoll_poll_lock(n);
2169
2170 weight = n->weight;
2171
2172 work = n->poll(n, weight);
2173
2174 WARN_ON_ONCE(work > weight);
2175
2176 budget -= work;
2177
2178 local_irq_disable();
2179
2180 /* Drivers must not modify the NAPI state if they
2181 * consume the entire weight. In such cases this code
2182 * still "owns" the NAPI instance and therefore can
2183 * move the instance around on the list at-will.
2184 */
2185 if (unlikely(work == weight))
2186 list_move_tail(&n->poll_list, list);
2187
2188 netpoll_poll_unlock(have);
1da177e4
LT
2189 }
2190out:
515e06c4 2191 local_irq_enable();
bea3348e 2192
db217334
CL
2193#ifdef CONFIG_NET_DMA
2194 /*
2195 * There may not be any more sk_buffs coming right now, so push
2196 * any pending DMA copies to hardware
2197 */
d379b01e
DW
2198 if (!cpus_empty(net_dma.channel_mask)) {
2199 int chan_idx;
2200 for_each_cpu_mask(chan_idx, net_dma.channel_mask) {
2201 struct dma_chan *chan = net_dma.channels[chan_idx];
2202 if (chan)
2203 dma_async_memcpy_issue_pending(chan);
2204 }
db217334
CL
2205 }
2206#endif
bea3348e 2207
1da177e4
LT
2208 return;
2209
2210softnet_break:
2211 __get_cpu_var(netdev_rx_stat).time_squeeze++;
2212 __raise_softirq_irqoff(NET_RX_SOFTIRQ);
2213 goto out;
2214}
2215
2216static gifconf_func_t * gifconf_list [NPROTO];
2217
2218/**
2219 * register_gifconf - register a SIOCGIF handler
2220 * @family: Address family
2221 * @gifconf: Function handler
2222 *
2223 * Register protocol dependent address dumping routines. The handler
2224 * that is passed must not be freed or reused until it has been replaced
2225 * by another handler.
2226 */
2227int register_gifconf(unsigned int family, gifconf_func_t * gifconf)
2228{
2229 if (family >= NPROTO)
2230 return -EINVAL;
2231 gifconf_list[family] = gifconf;
2232 return 0;
2233}
2234
2235
2236/*
2237 * Map an interface index to its name (SIOCGIFNAME)
2238 */
2239
2240/*
2241 * We need this ioctl for efficient implementation of the
2242 * if_indextoname() function required by the IPv6 API. Without
2243 * it, we would have to search all the interfaces to find a
2244 * match. --pb
2245 */
2246
881d966b 2247static int dev_ifname(struct net *net, struct ifreq __user *arg)
1da177e4
LT
2248{
2249 struct net_device *dev;
2250 struct ifreq ifr;
2251
2252 /*
2253 * Fetch the caller's info block.
2254 */
2255
2256 if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
2257 return -EFAULT;
2258
2259 read_lock(&dev_base_lock);
881d966b 2260 dev = __dev_get_by_index(net, ifr.ifr_ifindex);
1da177e4
LT
2261 if (!dev) {
2262 read_unlock(&dev_base_lock);
2263 return -ENODEV;
2264 }
2265
2266 strcpy(ifr.ifr_name, dev->name);
2267 read_unlock(&dev_base_lock);
2268
2269 if (copy_to_user(arg, &ifr, sizeof(struct ifreq)))
2270 return -EFAULT;
2271 return 0;
2272}
2273
2274/*
2275 * Perform a SIOCGIFCONF call. This structure will change
2276 * size eventually, and there is nothing I can do about it.
2277 * Thus we will need a 'compatibility mode'.
2278 */
2279
881d966b 2280static int dev_ifconf(struct net *net, char __user *arg)
1da177e4
LT
2281{
2282 struct ifconf ifc;
2283 struct net_device *dev;
2284 char __user *pos;
2285 int len;
2286 int total;
2287 int i;
2288
2289 /*
2290 * Fetch the caller's info block.
2291 */
2292
2293 if (copy_from_user(&ifc, arg, sizeof(struct ifconf)))
2294 return -EFAULT;
2295
2296 pos = ifc.ifc_buf;
2297 len = ifc.ifc_len;
2298
2299 /*
2300 * Loop over the interfaces, and write an info block for each.
2301 */
2302
2303 total = 0;
881d966b 2304 for_each_netdev(net, dev) {
1da177e4
LT
2305 for (i = 0; i < NPROTO; i++) {
2306 if (gifconf_list[i]) {
2307 int done;
2308 if (!pos)
2309 done = gifconf_list[i](dev, NULL, 0);
2310 else
2311 done = gifconf_list[i](dev, pos + total,
2312 len - total);
2313 if (done < 0)
2314 return -EFAULT;
2315 total += done;
2316 }
2317 }
4ec93edb 2318 }
1da177e4
LT
2319
2320 /*
2321 * All done. Write the updated control block back to the caller.
2322 */
2323 ifc.ifc_len = total;
2324
2325 /*
2326 * Both BSD and Solaris return 0 here, so we do too.
2327 */
2328 return copy_to_user(arg, &ifc, sizeof(struct ifconf)) ? -EFAULT : 0;
2329}
2330
2331#ifdef CONFIG_PROC_FS
2332/*
2333 * This is invoked by the /proc filesystem handler to display a device
2334 * in detail.
2335 */
7562f876 2336void *dev_seq_start(struct seq_file *seq, loff_t *pos)
1da177e4 2337{
881d966b 2338 struct net *net = seq->private;
7562f876 2339 loff_t off;
1da177e4 2340 struct net_device *dev;
1da177e4 2341
7562f876
PE
2342 read_lock(&dev_base_lock);
2343 if (!*pos)
2344 return SEQ_START_TOKEN;
1da177e4 2345
7562f876 2346 off = 1;
881d966b 2347 for_each_netdev(net, dev)
7562f876
PE
2348 if (off++ == *pos)
2349 return dev;
1da177e4 2350
7562f876 2351 return NULL;
1da177e4
LT
2352}
2353
2354void *dev_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2355{
881d966b 2356 struct net *net = seq->private;
1da177e4 2357 ++*pos;
7562f876 2358 return v == SEQ_START_TOKEN ?
881d966b 2359 first_net_device(net) : next_net_device((struct net_device *)v);
1da177e4
LT
2360}
2361
2362void dev_seq_stop(struct seq_file *seq, void *v)
2363{
2364 read_unlock(&dev_base_lock);
2365}
2366
2367static void dev_seq_printf_stats(struct seq_file *seq, struct net_device *dev)
2368{
c45d286e 2369 struct net_device_stats *stats = dev->get_stats(dev);
1da177e4 2370
5a1b5898
RR
2371 seq_printf(seq, "%6s:%8lu %7lu %4lu %4lu %4lu %5lu %10lu %9lu "
2372 "%8lu %7lu %4lu %4lu %4lu %5lu %7lu %10lu\n",
2373 dev->name, stats->rx_bytes, stats->rx_packets,
2374 stats->rx_errors,
2375 stats->rx_dropped + stats->rx_missed_errors,
2376 stats->rx_fifo_errors,
2377 stats->rx_length_errors + stats->rx_over_errors +
2378 stats->rx_crc_errors + stats->rx_frame_errors,
2379 stats->rx_compressed, stats->multicast,
2380 stats->tx_bytes, stats->tx_packets,
2381 stats->tx_errors, stats->tx_dropped,
2382 stats->tx_fifo_errors, stats->collisions,
2383 stats->tx_carrier_errors +
2384 stats->tx_aborted_errors +
2385 stats->tx_window_errors +
2386 stats->tx_heartbeat_errors,
2387 stats->tx_compressed);
1da177e4
LT
2388}
2389
2390/*
2391 * Called from the PROCfs module. This now uses the new arbitrary sized
2392 * /proc/net interface to create /proc/net/dev
2393 */
2394static int dev_seq_show(struct seq_file *seq, void *v)
2395{
2396 if (v == SEQ_START_TOKEN)
2397 seq_puts(seq, "Inter-| Receive "
2398 " | Transmit\n"
2399 " face |bytes packets errs drop fifo frame "
2400 "compressed multicast|bytes packets errs "
2401 "drop fifo colls carrier compressed\n");
2402 else
2403 dev_seq_printf_stats(seq, v);
2404 return 0;
2405}
2406
2407static struct netif_rx_stats *softnet_get_online(loff_t *pos)
2408{
2409 struct netif_rx_stats *rc = NULL;
2410
2411 while (*pos < NR_CPUS)
4ec93edb 2412 if (cpu_online(*pos)) {
1da177e4
LT
2413 rc = &per_cpu(netdev_rx_stat, *pos);
2414 break;
2415 } else
2416 ++*pos;
2417 return rc;
2418}
2419
2420static void *softnet_seq_start(struct seq_file *seq, loff_t *pos)
2421{
2422 return softnet_get_online(pos);
2423}
2424
2425static void *softnet_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2426{
2427 ++*pos;
2428 return softnet_get_online(pos);
2429}
2430
2431static void softnet_seq_stop(struct seq_file *seq, void *v)
2432{
2433}
2434
2435static int softnet_seq_show(struct seq_file *seq, void *v)
2436{
2437 struct netif_rx_stats *s = v;
2438
2439 seq_printf(seq, "%08x %08x %08x %08x %08x %08x %08x %08x %08x\n",
31aa02c5 2440 s->total, s->dropped, s->time_squeeze, 0,
c1ebcdb8
SH
2441 0, 0, 0, 0, /* was fastroute */
2442 s->cpu_collision );
1da177e4
LT
2443 return 0;
2444}
2445
f690808e 2446static const struct seq_operations dev_seq_ops = {
1da177e4
LT
2447 .start = dev_seq_start,
2448 .next = dev_seq_next,
2449 .stop = dev_seq_stop,
2450 .show = dev_seq_show,
2451};
2452
2453static int dev_seq_open(struct inode *inode, struct file *file)
2454{
881d966b
EB
2455 struct seq_file *seq;
2456 int res;
2457 res = seq_open(file, &dev_seq_ops);
2458 if (!res) {
2459 seq = file->private_data;
077130c0
EB
2460 seq->private = get_proc_net(inode);
2461 if (!seq->private) {
2462 seq_release(inode, file);
2463 res = -ENXIO;
2464 }
881d966b
EB
2465 }
2466 return res;
2467}
2468
2469static int dev_seq_release(struct inode *inode, struct file *file)
2470{
2471 struct seq_file *seq = file->private_data;
2472 struct net *net = seq->private;
2473 put_net(net);
2474 return seq_release(inode, file);
1da177e4
LT
2475}
2476
9a32144e 2477static const struct file_operations dev_seq_fops = {
1da177e4
LT
2478 .owner = THIS_MODULE,
2479 .open = dev_seq_open,
2480 .read = seq_read,
2481 .llseek = seq_lseek,
881d966b 2482 .release = dev_seq_release,
1da177e4
LT
2483};
2484
f690808e 2485static const struct seq_operations softnet_seq_ops = {
1da177e4
LT
2486 .start = softnet_seq_start,
2487 .next = softnet_seq_next,
2488 .stop = softnet_seq_stop,
2489 .show = softnet_seq_show,
2490};
2491
2492static int softnet_seq_open(struct inode *inode, struct file *file)
2493{
2494 return seq_open(file, &softnet_seq_ops);
2495}
2496
9a32144e 2497static const struct file_operations softnet_seq_fops = {
1da177e4
LT
2498 .owner = THIS_MODULE,
2499 .open = softnet_seq_open,
2500 .read = seq_read,
2501 .llseek = seq_lseek,
2502 .release = seq_release,
2503};
2504
0e1256ff
SH
2505static void *ptype_get_idx(loff_t pos)
2506{
2507 struct packet_type *pt = NULL;
2508 loff_t i = 0;
2509 int t;
2510
2511 list_for_each_entry_rcu(pt, &ptype_all, list) {
2512 if (i == pos)
2513 return pt;
2514 ++i;
2515 }
2516
2517 for (t = 0; t < 16; t++) {
2518 list_for_each_entry_rcu(pt, &ptype_base[t], list) {
2519 if (i == pos)
2520 return pt;
2521 ++i;
2522 }
2523 }
2524 return NULL;
2525}
2526
2527static void *ptype_seq_start(struct seq_file *seq, loff_t *pos)
2528{
2529 rcu_read_lock();
2530 return *pos ? ptype_get_idx(*pos - 1) : SEQ_START_TOKEN;
2531}
2532
2533static void *ptype_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2534{
2535 struct packet_type *pt;
2536 struct list_head *nxt;
2537 int hash;
2538
2539 ++*pos;
2540 if (v == SEQ_START_TOKEN)
2541 return ptype_get_idx(0);
2542
2543 pt = v;
2544 nxt = pt->list.next;
2545 if (pt->type == htons(ETH_P_ALL)) {
2546 if (nxt != &ptype_all)
2547 goto found;
2548 hash = 0;
2549 nxt = ptype_base[0].next;
2550 } else
2551 hash = ntohs(pt->type) & 15;
2552
2553 while (nxt == &ptype_base[hash]) {
2554 if (++hash >= 16)
2555 return NULL;
2556 nxt = ptype_base[hash].next;
2557 }
2558found:
2559 return list_entry(nxt, struct packet_type, list);
2560}
2561
2562static void ptype_seq_stop(struct seq_file *seq, void *v)
2563{
2564 rcu_read_unlock();
2565}
2566
2567static void ptype_seq_decode(struct seq_file *seq, void *sym)
2568{
2569#ifdef CONFIG_KALLSYMS
2570 unsigned long offset = 0, symsize;
2571 const char *symname;
2572 char *modname;
2573 char namebuf[128];
2574
2575 symname = kallsyms_lookup((unsigned long)sym, &symsize, &offset,
2576 &modname, namebuf);
2577
2578 if (symname) {
2579 char *delim = ":";
2580
2581 if (!modname)
2582 modname = delim = "";
2583 seq_printf(seq, "%s%s%s%s+0x%lx", delim, modname, delim,
2584 symname, offset);
2585 return;
2586 }
2587#endif
2588
2589 seq_printf(seq, "[%p]", sym);
2590}
2591
2592static int ptype_seq_show(struct seq_file *seq, void *v)
2593{
2594 struct packet_type *pt = v;
2595
2596 if (v == SEQ_START_TOKEN)
2597 seq_puts(seq, "Type Device Function\n");
2598 else {
2599 if (pt->type == htons(ETH_P_ALL))
2600 seq_puts(seq, "ALL ");
2601 else
2602 seq_printf(seq, "%04x", ntohs(pt->type));
2603
2604 seq_printf(seq, " %-8s ",
2605 pt->dev ? pt->dev->name : "");
2606 ptype_seq_decode(seq, pt->func);
2607 seq_putc(seq, '\n');
2608 }
2609
2610 return 0;
2611}
2612
2613static const struct seq_operations ptype_seq_ops = {
2614 .start = ptype_seq_start,
2615 .next = ptype_seq_next,
2616 .stop = ptype_seq_stop,
2617 .show = ptype_seq_show,
2618};
2619
2620static int ptype_seq_open(struct inode *inode, struct file *file)
2621{
2622 return seq_open(file, &ptype_seq_ops);
2623}
2624
2625static const struct file_operations ptype_seq_fops = {
2626 .owner = THIS_MODULE,
2627 .open = ptype_seq_open,
2628 .read = seq_read,
2629 .llseek = seq_lseek,
2630 .release = seq_release,
2631};
2632
2633
4665079c 2634static int __net_init dev_proc_net_init(struct net *net)
1da177e4
LT
2635{
2636 int rc = -ENOMEM;
2637
881d966b 2638 if (!proc_net_fops_create(net, "dev", S_IRUGO, &dev_seq_fops))
1da177e4 2639 goto out;
881d966b 2640 if (!proc_net_fops_create(net, "softnet_stat", S_IRUGO, &softnet_seq_fops))
1da177e4 2641 goto out_dev;
881d966b 2642 if (!proc_net_fops_create(net, "ptype", S_IRUGO, &ptype_seq_fops))
457c4cbc 2643 goto out_softnet;
0e1256ff 2644
881d966b 2645 if (wext_proc_init(net))
457c4cbc 2646 goto out_ptype;
1da177e4
LT
2647 rc = 0;
2648out:
2649 return rc;
457c4cbc 2650out_ptype:
881d966b 2651 proc_net_remove(net, "ptype");
1da177e4 2652out_softnet:
881d966b 2653 proc_net_remove(net, "softnet_stat");
1da177e4 2654out_dev:
881d966b 2655 proc_net_remove(net, "dev");
1da177e4
LT
2656 goto out;
2657}
881d966b 2658
4665079c 2659static void __net_exit dev_proc_net_exit(struct net *net)
881d966b
EB
2660{
2661 wext_proc_exit(net);
2662
2663 proc_net_remove(net, "ptype");
2664 proc_net_remove(net, "softnet_stat");
2665 proc_net_remove(net, "dev");
2666}
2667
4665079c 2668static struct pernet_operations __net_initdata dev_proc_ops = {
881d966b
EB
2669 .init = dev_proc_net_init,
2670 .exit = dev_proc_net_exit,
2671};
2672
2673static int __init dev_proc_init(void)
2674{
2675 return register_pernet_subsys(&dev_proc_ops);
2676}
1da177e4
LT
2677#else
2678#define dev_proc_init() 0
2679#endif /* CONFIG_PROC_FS */
2680
2681
2682/**
2683 * netdev_set_master - set up master/slave pair
2684 * @slave: slave device
2685 * @master: new master device
2686 *
2687 * Changes the master device of the slave. Pass %NULL to break the
2688 * bonding. The caller must hold the RTNL semaphore. On a failure
2689 * a negative errno code is returned. On success the reference counts
2690 * are adjusted, %RTM_NEWLINK is sent to the routing socket and the
2691 * function returns zero.
2692 */
2693int netdev_set_master(struct net_device *slave, struct net_device *master)
2694{
2695 struct net_device *old = slave->master;
2696
2697 ASSERT_RTNL();
2698
2699 if (master) {
2700 if (old)
2701 return -EBUSY;
2702 dev_hold(master);
2703 }
2704
2705 slave->master = master;
4ec93edb 2706
1da177e4
LT
2707 synchronize_net();
2708
2709 if (old)
2710 dev_put(old);
2711
2712 if (master)
2713 slave->flags |= IFF_SLAVE;
2714 else
2715 slave->flags &= ~IFF_SLAVE;
2716
2717 rtmsg_ifinfo(RTM_NEWLINK, slave, IFF_SLAVE);
2718 return 0;
2719}
2720
4417da66 2721static void __dev_set_promiscuity(struct net_device *dev, int inc)
1da177e4
LT
2722{
2723 unsigned short old_flags = dev->flags;
2724
24023451
PM
2725 ASSERT_RTNL();
2726
1da177e4
LT
2727 if ((dev->promiscuity += inc) == 0)
2728 dev->flags &= ~IFF_PROMISC;
52609c0b
DC
2729 else
2730 dev->flags |= IFF_PROMISC;
2731 if (dev->flags != old_flags) {
1da177e4
LT
2732 printk(KERN_INFO "device %s %s promiscuous mode\n",
2733 dev->name, (dev->flags & IFF_PROMISC) ? "entered" :
4ec93edb 2734 "left");
5bdb9886
SG
2735 audit_log(current->audit_context, GFP_ATOMIC,
2736 AUDIT_ANOM_PROMISCUOUS,
2737 "dev=%s prom=%d old_prom=%d auid=%u",
2738 dev->name, (dev->flags & IFF_PROMISC),
2739 (old_flags & IFF_PROMISC),
4ec93edb 2740 audit_get_loginuid(current->audit_context));
24023451
PM
2741
2742 if (dev->change_rx_flags)
2743 dev->change_rx_flags(dev, IFF_PROMISC);
1da177e4
LT
2744 }
2745}
2746
4417da66
PM
2747/**
2748 * dev_set_promiscuity - update promiscuity count on a device
2749 * @dev: device
2750 * @inc: modifier
2751 *
2752 * Add or remove promiscuity from a device. While the count in the device
2753 * remains above zero the interface remains promiscuous. Once it hits zero
2754 * the device reverts back to normal filtering operation. A negative inc
2755 * value is used to drop promiscuity on the device.
2756 */
2757void dev_set_promiscuity(struct net_device *dev, int inc)
2758{
2759 unsigned short old_flags = dev->flags;
2760
2761 __dev_set_promiscuity(dev, inc);
2762 if (dev->flags != old_flags)
2763 dev_set_rx_mode(dev);
2764}
2765
1da177e4
LT
2766/**
2767 * dev_set_allmulti - update allmulti count on a device
2768 * @dev: device
2769 * @inc: modifier
2770 *
2771 * Add or remove reception of all multicast frames to a device. While the
2772 * count in the device remains above zero the interface remains listening
2773 * to all interfaces. Once it hits zero the device reverts back to normal
2774 * filtering operation. A negative @inc value is used to drop the counter
2775 * when releasing a resource needing all multicasts.
2776 */
2777
2778void dev_set_allmulti(struct net_device *dev, int inc)
2779{
2780 unsigned short old_flags = dev->flags;
2781
24023451
PM
2782 ASSERT_RTNL();
2783
1da177e4
LT
2784 dev->flags |= IFF_ALLMULTI;
2785 if ((dev->allmulti += inc) == 0)
2786 dev->flags &= ~IFF_ALLMULTI;
24023451
PM
2787 if (dev->flags ^ old_flags) {
2788 if (dev->change_rx_flags)
2789 dev->change_rx_flags(dev, IFF_ALLMULTI);
4417da66 2790 dev_set_rx_mode(dev);
24023451 2791 }
4417da66
PM
2792}
2793
2794/*
2795 * Upload unicast and multicast address lists to device and
2796 * configure RX filtering. When the device doesn't support unicast
2797 * filtering it is put in promiscous mode while unicast addresses
2798 * are present.
2799 */
2800void __dev_set_rx_mode(struct net_device *dev)
2801{
2802 /* dev_open will call this function so the list will stay sane. */
2803 if (!(dev->flags&IFF_UP))
2804 return;
2805
2806 if (!netif_device_present(dev))
40b77c94 2807 return;
4417da66
PM
2808
2809 if (dev->set_rx_mode)
2810 dev->set_rx_mode(dev);
2811 else {
2812 /* Unicast addresses changes may only happen under the rtnl,
2813 * therefore calling __dev_set_promiscuity here is safe.
2814 */
2815 if (dev->uc_count > 0 && !dev->uc_promisc) {
2816 __dev_set_promiscuity(dev, 1);
2817 dev->uc_promisc = 1;
2818 } else if (dev->uc_count == 0 && dev->uc_promisc) {
2819 __dev_set_promiscuity(dev, -1);
2820 dev->uc_promisc = 0;
2821 }
2822
2823 if (dev->set_multicast_list)
2824 dev->set_multicast_list(dev);
2825 }
2826}
2827
2828void dev_set_rx_mode(struct net_device *dev)
2829{
2830 netif_tx_lock_bh(dev);
2831 __dev_set_rx_mode(dev);
2832 netif_tx_unlock_bh(dev);
1da177e4
LT
2833}
2834
61cbc2fc
PM
2835int __dev_addr_delete(struct dev_addr_list **list, int *count,
2836 void *addr, int alen, int glbl)
bf742482
PM
2837{
2838 struct dev_addr_list *da;
2839
2840 for (; (da = *list) != NULL; list = &da->next) {
2841 if (memcmp(da->da_addr, addr, da->da_addrlen) == 0 &&
2842 alen == da->da_addrlen) {
2843 if (glbl) {
2844 int old_glbl = da->da_gusers;
2845 da->da_gusers = 0;
2846 if (old_glbl == 0)
2847 break;
2848 }
2849 if (--da->da_users)
2850 return 0;
2851
2852 *list = da->next;
2853 kfree(da);
61cbc2fc 2854 (*count)--;
bf742482
PM
2855 return 0;
2856 }
2857 }
2858 return -ENOENT;
2859}
2860
61cbc2fc
PM
2861int __dev_addr_add(struct dev_addr_list **list, int *count,
2862 void *addr, int alen, int glbl)
bf742482
PM
2863{
2864 struct dev_addr_list *da;
2865
2866 for (da = *list; da != NULL; da = da->next) {
2867 if (memcmp(da->da_addr, addr, da->da_addrlen) == 0 &&
2868 da->da_addrlen == alen) {
2869 if (glbl) {
2870 int old_glbl = da->da_gusers;
2871 da->da_gusers = 1;
2872 if (old_glbl)
2873 return 0;
2874 }
2875 da->da_users++;
2876 return 0;
2877 }
2878 }
2879
2880 da = kmalloc(sizeof(*da), GFP_ATOMIC);
2881 if (da == NULL)
2882 return -ENOMEM;
2883 memcpy(da->da_addr, addr, alen);
2884 da->da_addrlen = alen;
2885 da->da_users = 1;
2886 da->da_gusers = glbl ? 1 : 0;
2887 da->next = *list;
2888 *list = da;
61cbc2fc 2889 (*count)++;
bf742482
PM
2890 return 0;
2891}
2892
4417da66
PM
2893/**
2894 * dev_unicast_delete - Release secondary unicast address.
2895 * @dev: device
0ed72ec4
RD
2896 * @addr: address to delete
2897 * @alen: length of @addr
4417da66
PM
2898 *
2899 * Release reference to a secondary unicast address and remove it
0ed72ec4 2900 * from the device if the reference count drops to zero.
4417da66
PM
2901 *
2902 * The caller must hold the rtnl_mutex.
2903 */
2904int dev_unicast_delete(struct net_device *dev, void *addr, int alen)
2905{
2906 int err;
2907
2908 ASSERT_RTNL();
2909
2910 netif_tx_lock_bh(dev);
61cbc2fc
PM
2911 err = __dev_addr_delete(&dev->uc_list, &dev->uc_count, addr, alen, 0);
2912 if (!err)
4417da66 2913 __dev_set_rx_mode(dev);
4417da66
PM
2914 netif_tx_unlock_bh(dev);
2915 return err;
2916}
2917EXPORT_SYMBOL(dev_unicast_delete);
2918
2919/**
2920 * dev_unicast_add - add a secondary unicast address
2921 * @dev: device
0ed72ec4
RD
2922 * @addr: address to delete
2923 * @alen: length of @addr
4417da66
PM
2924 *
2925 * Add a secondary unicast address to the device or increase
2926 * the reference count if it already exists.
2927 *
2928 * The caller must hold the rtnl_mutex.
2929 */
2930int dev_unicast_add(struct net_device *dev, void *addr, int alen)
2931{
2932 int err;
2933
2934 ASSERT_RTNL();
2935
2936 netif_tx_lock_bh(dev);
61cbc2fc
PM
2937 err = __dev_addr_add(&dev->uc_list, &dev->uc_count, addr, alen, 0);
2938 if (!err)
4417da66 2939 __dev_set_rx_mode(dev);
4417da66
PM
2940 netif_tx_unlock_bh(dev);
2941 return err;
2942}
2943EXPORT_SYMBOL(dev_unicast_add);
2944
12972621
DC
2945static void __dev_addr_discard(struct dev_addr_list **list)
2946{
2947 struct dev_addr_list *tmp;
2948
2949 while (*list != NULL) {
2950 tmp = *list;
2951 *list = tmp->next;
2952 if (tmp->da_users > tmp->da_gusers)
2953 printk("__dev_addr_discard: address leakage! "
2954 "da_users=%d\n", tmp->da_users);
2955 kfree(tmp);
2956 }
2957}
2958
26cc2522 2959static void dev_addr_discard(struct net_device *dev)
4417da66
PM
2960{
2961 netif_tx_lock_bh(dev);
26cc2522 2962
4417da66
PM
2963 __dev_addr_discard(&dev->uc_list);
2964 dev->uc_count = 0;
4417da66 2965
456ad75c
DC
2966 __dev_addr_discard(&dev->mc_list);
2967 dev->mc_count = 0;
26cc2522 2968
456ad75c
DC
2969 netif_tx_unlock_bh(dev);
2970}
2971
1da177e4
LT
2972unsigned dev_get_flags(const struct net_device *dev)
2973{
2974 unsigned flags;
2975
2976 flags = (dev->flags & ~(IFF_PROMISC |
2977 IFF_ALLMULTI |
b00055aa
SR
2978 IFF_RUNNING |
2979 IFF_LOWER_UP |
2980 IFF_DORMANT)) |
1da177e4
LT
2981 (dev->gflags & (IFF_PROMISC |
2982 IFF_ALLMULTI));
2983
b00055aa
SR
2984 if (netif_running(dev)) {
2985 if (netif_oper_up(dev))
2986 flags |= IFF_RUNNING;
2987 if (netif_carrier_ok(dev))
2988 flags |= IFF_LOWER_UP;
2989 if (netif_dormant(dev))
2990 flags |= IFF_DORMANT;
2991 }
1da177e4
LT
2992
2993 return flags;
2994}
2995
2996int dev_change_flags(struct net_device *dev, unsigned flags)
2997{
7c355f53 2998 int ret, changes;
1da177e4
LT
2999 int old_flags = dev->flags;
3000
24023451
PM
3001 ASSERT_RTNL();
3002
1da177e4
LT
3003 /*
3004 * Set the flags on our device.
3005 */
3006
3007 dev->flags = (flags & (IFF_DEBUG | IFF_NOTRAILERS | IFF_NOARP |
3008 IFF_DYNAMIC | IFF_MULTICAST | IFF_PORTSEL |
3009 IFF_AUTOMEDIA)) |
3010 (dev->flags & (IFF_UP | IFF_VOLATILE | IFF_PROMISC |
3011 IFF_ALLMULTI));
3012
3013 /*
3014 * Load in the correct multicast list now the flags have changed.
3015 */
3016
24023451
PM
3017 if (dev->change_rx_flags && (dev->flags ^ flags) & IFF_MULTICAST)
3018 dev->change_rx_flags(dev, IFF_MULTICAST);
3019
4417da66 3020 dev_set_rx_mode(dev);
1da177e4
LT
3021
3022 /*
3023 * Have we downed the interface. We handle IFF_UP ourselves
3024 * according to user attempts to set it, rather than blindly
3025 * setting it.
3026 */
3027
3028 ret = 0;
3029 if ((old_flags ^ flags) & IFF_UP) { /* Bit is different ? */
3030 ret = ((old_flags & IFF_UP) ? dev_close : dev_open)(dev);
3031
3032 if (!ret)
4417da66 3033 dev_set_rx_mode(dev);
1da177e4
LT
3034 }
3035
3036 if (dev->flags & IFF_UP &&
3037 ((old_flags ^ dev->flags) &~ (IFF_UP | IFF_PROMISC | IFF_ALLMULTI |
3038 IFF_VOLATILE)))
056925ab 3039 call_netdevice_notifiers(NETDEV_CHANGE, dev);
1da177e4
LT
3040
3041 if ((flags ^ dev->gflags) & IFF_PROMISC) {
3042 int inc = (flags & IFF_PROMISC) ? +1 : -1;
3043 dev->gflags ^= IFF_PROMISC;
3044 dev_set_promiscuity(dev, inc);
3045 }
3046
3047 /* NOTE: order of synchronization of IFF_PROMISC and IFF_ALLMULTI
3048 is important. Some (broken) drivers set IFF_PROMISC, when
3049 IFF_ALLMULTI is requested not asking us and not reporting.
3050 */
3051 if ((flags ^ dev->gflags) & IFF_ALLMULTI) {
3052 int inc = (flags & IFF_ALLMULTI) ? +1 : -1;
3053 dev->gflags ^= IFF_ALLMULTI;
3054 dev_set_allmulti(dev, inc);
3055 }
3056
7c355f53
TG
3057 /* Exclude state transition flags, already notified */
3058 changes = (old_flags ^ dev->flags) & ~(IFF_UP | IFF_RUNNING);
3059 if (changes)
3060 rtmsg_ifinfo(RTM_NEWLINK, dev, changes);
1da177e4
LT
3061
3062 return ret;
3063}
3064
3065int dev_set_mtu(struct net_device *dev, int new_mtu)
3066{
3067 int err;
3068
3069 if (new_mtu == dev->mtu)
3070 return 0;
3071
3072 /* MTU must be positive. */
3073 if (new_mtu < 0)
3074 return -EINVAL;
3075
3076 if (!netif_device_present(dev))
3077 return -ENODEV;
3078
3079 err = 0;
3080 if (dev->change_mtu)
3081 err = dev->change_mtu(dev, new_mtu);
3082 else
3083 dev->mtu = new_mtu;
3084 if (!err && dev->flags & IFF_UP)
056925ab 3085 call_netdevice_notifiers(NETDEV_CHANGEMTU, dev);
1da177e4
LT
3086 return err;
3087}
3088
3089int dev_set_mac_address(struct net_device *dev, struct sockaddr *sa)
3090{
3091 int err;
3092
3093 if (!dev->set_mac_address)
3094 return -EOPNOTSUPP;
3095 if (sa->sa_family != dev->type)
3096 return -EINVAL;
3097 if (!netif_device_present(dev))
3098 return -ENODEV;
3099 err = dev->set_mac_address(dev, sa);
3100 if (!err)
056925ab 3101 call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
1da177e4
LT
3102 return err;
3103}
3104
3105/*
14e3e079 3106 * Perform the SIOCxIFxxx calls, inside read_lock(dev_base_lock)
1da177e4 3107 */
14e3e079 3108static int dev_ifsioc_locked(struct net *net, struct ifreq *ifr, unsigned int cmd)
1da177e4
LT
3109{
3110 int err;
881d966b 3111 struct net_device *dev = __dev_get_by_name(net, ifr->ifr_name);
1da177e4
LT
3112
3113 if (!dev)
3114 return -ENODEV;
3115
3116 switch (cmd) {
3117 case SIOCGIFFLAGS: /* Get interface flags */
3118 ifr->ifr_flags = dev_get_flags(dev);
3119 return 0;
3120
1da177e4
LT
3121 case SIOCGIFMETRIC: /* Get the metric on the interface
3122 (currently unused) */
3123 ifr->ifr_metric = 0;
3124 return 0;
3125
1da177e4
LT
3126 case SIOCGIFMTU: /* Get the MTU of a device */
3127 ifr->ifr_mtu = dev->mtu;
3128 return 0;
3129
1da177e4
LT
3130 case SIOCGIFHWADDR:
3131 if (!dev->addr_len)
3132 memset(ifr->ifr_hwaddr.sa_data, 0, sizeof ifr->ifr_hwaddr.sa_data);
3133 else
3134 memcpy(ifr->ifr_hwaddr.sa_data, dev->dev_addr,
3135 min(sizeof ifr->ifr_hwaddr.sa_data, (size_t) dev->addr_len));
3136 ifr->ifr_hwaddr.sa_family = dev->type;
3137 return 0;
3138
14e3e079
JG
3139 case SIOCGIFSLAVE:
3140 err = -EINVAL;
3141 break;
3142
3143 case SIOCGIFMAP:
3144 ifr->ifr_map.mem_start = dev->mem_start;
3145 ifr->ifr_map.mem_end = dev->mem_end;
3146 ifr->ifr_map.base_addr = dev->base_addr;
3147 ifr->ifr_map.irq = dev->irq;
3148 ifr->ifr_map.dma = dev->dma;
3149 ifr->ifr_map.port = dev->if_port;
3150 return 0;
3151
3152 case SIOCGIFINDEX:
3153 ifr->ifr_ifindex = dev->ifindex;
3154 return 0;
3155
3156 case SIOCGIFTXQLEN:
3157 ifr->ifr_qlen = dev->tx_queue_len;
3158 return 0;
3159
3160 default:
3161 /* dev_ioctl() should ensure this case
3162 * is never reached
3163 */
3164 WARN_ON(1);
3165 err = -EINVAL;
3166 break;
3167
3168 }
3169 return err;
3170}
3171
3172/*
3173 * Perform the SIOCxIFxxx calls, inside rtnl_lock()
3174 */
3175static int dev_ifsioc(struct net *net, struct ifreq *ifr, unsigned int cmd)
3176{
3177 int err;
3178 struct net_device *dev = __dev_get_by_name(net, ifr->ifr_name);
3179
3180 if (!dev)
3181 return -ENODEV;
3182
3183 switch (cmd) {
3184 case SIOCSIFFLAGS: /* Set interface flags */
3185 return dev_change_flags(dev, ifr->ifr_flags);
3186
3187 case SIOCSIFMETRIC: /* Set the metric on the interface
3188 (currently unused) */
3189 return -EOPNOTSUPP;
3190
3191 case SIOCSIFMTU: /* Set the MTU of a device */
3192 return dev_set_mtu(dev, ifr->ifr_mtu);
3193
1da177e4
LT
3194 case SIOCSIFHWADDR:
3195 return dev_set_mac_address(dev, &ifr->ifr_hwaddr);
3196
3197 case SIOCSIFHWBROADCAST:
3198 if (ifr->ifr_hwaddr.sa_family != dev->type)
3199 return -EINVAL;
3200 memcpy(dev->broadcast, ifr->ifr_hwaddr.sa_data,
3201 min(sizeof ifr->ifr_hwaddr.sa_data, (size_t) dev->addr_len));
056925ab 3202 call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
1da177e4
LT
3203 return 0;
3204
1da177e4
LT
3205 case SIOCSIFMAP:
3206 if (dev->set_config) {
3207 if (!netif_device_present(dev))
3208 return -ENODEV;
3209 return dev->set_config(dev, &ifr->ifr_map);
3210 }
3211 return -EOPNOTSUPP;
3212
3213 case SIOCADDMULTI:
3214 if (!dev->set_multicast_list ||
3215 ifr->ifr_hwaddr.sa_family != AF_UNSPEC)
3216 return -EINVAL;
3217 if (!netif_device_present(dev))
3218 return -ENODEV;
3219 return dev_mc_add(dev, ifr->ifr_hwaddr.sa_data,
3220 dev->addr_len, 1);
3221
3222 case SIOCDELMULTI:
3223 if (!dev->set_multicast_list ||
3224 ifr->ifr_hwaddr.sa_family != AF_UNSPEC)
3225 return -EINVAL;
3226 if (!netif_device_present(dev))
3227 return -ENODEV;
3228 return dev_mc_delete(dev, ifr->ifr_hwaddr.sa_data,
3229 dev->addr_len, 1);
3230
1da177e4
LT
3231 case SIOCSIFTXQLEN:
3232 if (ifr->ifr_qlen < 0)
3233 return -EINVAL;
3234 dev->tx_queue_len = ifr->ifr_qlen;
3235 return 0;
3236
3237 case SIOCSIFNAME:
3238 ifr->ifr_newname[IFNAMSIZ-1] = '\0';
3239 return dev_change_name(dev, ifr->ifr_newname);
3240
3241 /*
3242 * Unknown or private ioctl
3243 */
3244
3245 default:
3246 if ((cmd >= SIOCDEVPRIVATE &&
3247 cmd <= SIOCDEVPRIVATE + 15) ||
3248 cmd == SIOCBONDENSLAVE ||
3249 cmd == SIOCBONDRELEASE ||
3250 cmd == SIOCBONDSETHWADDR ||
3251 cmd == SIOCBONDSLAVEINFOQUERY ||
3252 cmd == SIOCBONDINFOQUERY ||
3253 cmd == SIOCBONDCHANGEACTIVE ||
3254 cmd == SIOCGMIIPHY ||
3255 cmd == SIOCGMIIREG ||
3256 cmd == SIOCSMIIREG ||
3257 cmd == SIOCBRADDIF ||
3258 cmd == SIOCBRDELIF ||
3259 cmd == SIOCWANDEV) {
3260 err = -EOPNOTSUPP;
3261 if (dev->do_ioctl) {
3262 if (netif_device_present(dev))
3263 err = dev->do_ioctl(dev, ifr,
3264 cmd);
3265 else
3266 err = -ENODEV;
3267 }
3268 } else
3269 err = -EINVAL;
3270
3271 }
3272 return err;
3273}
3274
3275/*
3276 * This function handles all "interface"-type I/O control requests. The actual
3277 * 'doing' part of this is dev_ifsioc above.
3278 */
3279
3280/**
3281 * dev_ioctl - network device ioctl
c4ea43c5 3282 * @net: the applicable net namespace
1da177e4
LT
3283 * @cmd: command to issue
3284 * @arg: pointer to a struct ifreq in user space
3285 *
3286 * Issue ioctl functions to devices. This is normally called by the
3287 * user space syscall interfaces but can sometimes be useful for
3288 * other purposes. The return value is the return from the syscall if
3289 * positive or a negative errno code on error.
3290 */
3291
881d966b 3292int dev_ioctl(struct net *net, unsigned int cmd, void __user *arg)
1da177e4
LT
3293{
3294 struct ifreq ifr;
3295 int ret;
3296 char *colon;
3297
3298 /* One special case: SIOCGIFCONF takes ifconf argument
3299 and requires shared lock, because it sleeps writing
3300 to user space.
3301 */
3302
3303 if (cmd == SIOCGIFCONF) {
6756ae4b 3304 rtnl_lock();
881d966b 3305 ret = dev_ifconf(net, (char __user *) arg);
6756ae4b 3306 rtnl_unlock();
1da177e4
LT
3307 return ret;
3308 }
3309 if (cmd == SIOCGIFNAME)
881d966b 3310 return dev_ifname(net, (struct ifreq __user *)arg);
1da177e4
LT
3311
3312 if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
3313 return -EFAULT;
3314
3315 ifr.ifr_name[IFNAMSIZ-1] = 0;
3316
3317 colon = strchr(ifr.ifr_name, ':');
3318 if (colon)
3319 *colon = 0;
3320
3321 /*
3322 * See which interface the caller is talking about.
3323 */
3324
3325 switch (cmd) {
3326 /*
3327 * These ioctl calls:
3328 * - can be done by all.
3329 * - atomic and do not require locking.
3330 * - return a value
3331 */
3332 case SIOCGIFFLAGS:
3333 case SIOCGIFMETRIC:
3334 case SIOCGIFMTU:
3335 case SIOCGIFHWADDR:
3336 case SIOCGIFSLAVE:
3337 case SIOCGIFMAP:
3338 case SIOCGIFINDEX:
3339 case SIOCGIFTXQLEN:
881d966b 3340 dev_load(net, ifr.ifr_name);
1da177e4 3341 read_lock(&dev_base_lock);
14e3e079 3342 ret = dev_ifsioc_locked(net, &ifr, cmd);
1da177e4
LT
3343 read_unlock(&dev_base_lock);
3344 if (!ret) {
3345 if (colon)
3346 *colon = ':';
3347 if (copy_to_user(arg, &ifr,
3348 sizeof(struct ifreq)))
3349 ret = -EFAULT;
3350 }
3351 return ret;
3352
3353 case SIOCETHTOOL:
881d966b 3354 dev_load(net, ifr.ifr_name);
1da177e4 3355 rtnl_lock();
881d966b 3356 ret = dev_ethtool(net, &ifr);
1da177e4
LT
3357 rtnl_unlock();
3358 if (!ret) {
3359 if (colon)
3360 *colon = ':';
3361 if (copy_to_user(arg, &ifr,
3362 sizeof(struct ifreq)))
3363 ret = -EFAULT;
3364 }
3365 return ret;
3366
3367 /*
3368 * These ioctl calls:
3369 * - require superuser power.
3370 * - require strict serialization.
3371 * - return a value
3372 */
3373 case SIOCGMIIPHY:
3374 case SIOCGMIIREG:
3375 case SIOCSIFNAME:
3376 if (!capable(CAP_NET_ADMIN))
3377 return -EPERM;
881d966b 3378 dev_load(net, ifr.ifr_name);
1da177e4 3379 rtnl_lock();
881d966b 3380 ret = dev_ifsioc(net, &ifr, cmd);
1da177e4
LT
3381 rtnl_unlock();
3382 if (!ret) {
3383 if (colon)
3384 *colon = ':';
3385 if (copy_to_user(arg, &ifr,
3386 sizeof(struct ifreq)))
3387 ret = -EFAULT;
3388 }
3389 return ret;
3390
3391 /*
3392 * These ioctl calls:
3393 * - require superuser power.
3394 * - require strict serialization.
3395 * - do not return a value
3396 */
3397 case SIOCSIFFLAGS:
3398 case SIOCSIFMETRIC:
3399 case SIOCSIFMTU:
3400 case SIOCSIFMAP:
3401 case SIOCSIFHWADDR:
3402 case SIOCSIFSLAVE:
3403 case SIOCADDMULTI:
3404 case SIOCDELMULTI:
3405 case SIOCSIFHWBROADCAST:
3406 case SIOCSIFTXQLEN:
3407 case SIOCSMIIREG:
3408 case SIOCBONDENSLAVE:
3409 case SIOCBONDRELEASE:
3410 case SIOCBONDSETHWADDR:
1da177e4
LT
3411 case SIOCBONDCHANGEACTIVE:
3412 case SIOCBRADDIF:
3413 case SIOCBRDELIF:
3414 if (!capable(CAP_NET_ADMIN))
3415 return -EPERM;
cabcac0b
TG
3416 /* fall through */
3417 case SIOCBONDSLAVEINFOQUERY:
3418 case SIOCBONDINFOQUERY:
881d966b 3419 dev_load(net, ifr.ifr_name);
1da177e4 3420 rtnl_lock();
881d966b 3421 ret = dev_ifsioc(net, &ifr, cmd);
1da177e4
LT
3422 rtnl_unlock();
3423 return ret;
3424
3425 case SIOCGIFMEM:
3426 /* Get the per device memory space. We can add this but
3427 * currently do not support it */
3428 case SIOCSIFMEM:
3429 /* Set the per device memory buffer space.
3430 * Not applicable in our case */
3431 case SIOCSIFLINK:
3432 return -EINVAL;
3433
3434 /*
3435 * Unknown or private ioctl.
3436 */
3437 default:
3438 if (cmd == SIOCWANDEV ||
3439 (cmd >= SIOCDEVPRIVATE &&
3440 cmd <= SIOCDEVPRIVATE + 15)) {
881d966b 3441 dev_load(net, ifr.ifr_name);
1da177e4 3442 rtnl_lock();
881d966b 3443 ret = dev_ifsioc(net, &ifr, cmd);
1da177e4
LT
3444 rtnl_unlock();
3445 if (!ret && copy_to_user(arg, &ifr,
3446 sizeof(struct ifreq)))
3447 ret = -EFAULT;
3448 return ret;
3449 }
1da177e4 3450 /* Take care of Wireless Extensions */
295f4a1f 3451 if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST)
881d966b 3452 return wext_handle_ioctl(net, &ifr, cmd, arg);
1da177e4
LT
3453 return -EINVAL;
3454 }
3455}
3456
3457
3458/**
3459 * dev_new_index - allocate an ifindex
c4ea43c5 3460 * @net: the applicable net namespace
1da177e4
LT
3461 *
3462 * Returns a suitable unique value for a new device interface
3463 * number. The caller must hold the rtnl semaphore or the
3464 * dev_base_lock to be sure it remains unique.
3465 */
881d966b 3466static int dev_new_index(struct net *net)
1da177e4
LT
3467{
3468 static int ifindex;
3469 for (;;) {
3470 if (++ifindex <= 0)
3471 ifindex = 1;
881d966b 3472 if (!__dev_get_by_index(net, ifindex))
1da177e4
LT
3473 return ifindex;
3474 }
3475}
3476
1da177e4
LT
3477/* Delayed registration/unregisteration */
3478static DEFINE_SPINLOCK(net_todo_list_lock);
3479static struct list_head net_todo_list = LIST_HEAD_INIT(net_todo_list);
3480
6f05f629 3481static void net_set_todo(struct net_device *dev)
1da177e4
LT
3482{
3483 spin_lock(&net_todo_list_lock);
3484 list_add_tail(&dev->todo_list, &net_todo_list);
3485 spin_unlock(&net_todo_list_lock);
3486}
3487
3488/**
3489 * register_netdevice - register a network device
3490 * @dev: device to register
3491 *
3492 * Take a completed network device structure and add it to the kernel
3493 * interfaces. A %NETDEV_REGISTER message is sent to the netdev notifier
3494 * chain. 0 is returned on success. A negative errno code is returned
3495 * on a failure to set up the device, or if the name is a duplicate.
3496 *
3497 * Callers must hold the rtnl semaphore. You may want
3498 * register_netdev() instead of this.
3499 *
3500 * BUGS:
3501 * The locking appears insufficient to guarantee two parallel registers
3502 * will not get the same name.
3503 */
3504
3505int register_netdevice(struct net_device *dev)
3506{
3507 struct hlist_head *head;
3508 struct hlist_node *p;
3509 int ret;
881d966b 3510 struct net *net;
1da177e4
LT
3511
3512 BUG_ON(dev_boot_phase);
3513 ASSERT_RTNL();
3514
b17a7c17
SH
3515 might_sleep();
3516
1da177e4
LT
3517 /* When net_device's are persistent, this will be fatal. */
3518 BUG_ON(dev->reg_state != NETREG_UNINITIALIZED);
881d966b
EB
3519 BUG_ON(!dev->nd_net);
3520 net = dev->nd_net;
1da177e4
LT
3521
3522 spin_lock_init(&dev->queue_lock);
932ff279 3523 spin_lock_init(&dev->_xmit_lock);
723e98b7 3524 netdev_set_lockdep_class(&dev->_xmit_lock, dev->type);
1da177e4 3525 dev->xmit_lock_owner = -1;
1da177e4 3526 spin_lock_init(&dev->ingress_lock);
1da177e4 3527
1da177e4
LT
3528 dev->iflink = -1;
3529
3530 /* Init, if this function is available */
3531 if (dev->init) {
3532 ret = dev->init(dev);
3533 if (ret) {
3534 if (ret > 0)
3535 ret = -EIO;
90833aa4 3536 goto out;
1da177e4
LT
3537 }
3538 }
4ec93edb 3539
1da177e4
LT
3540 if (!dev_valid_name(dev->name)) {
3541 ret = -EINVAL;
7ce1b0ed 3542 goto err_uninit;
1da177e4
LT
3543 }
3544
881d966b 3545 dev->ifindex = dev_new_index(net);
1da177e4
LT
3546 if (dev->iflink == -1)
3547 dev->iflink = dev->ifindex;
3548
3549 /* Check for existence of name */
881d966b 3550 head = dev_name_hash(net, dev->name);
1da177e4
LT
3551 hlist_for_each(p, head) {
3552 struct net_device *d
3553 = hlist_entry(p, struct net_device, name_hlist);
3554 if (!strncmp(d->name, dev->name, IFNAMSIZ)) {
3555 ret = -EEXIST;
7ce1b0ed 3556 goto err_uninit;
1da177e4 3557 }
4ec93edb 3558 }
1da177e4 3559
d212f87b
SH
3560 /* Fix illegal checksum combinations */
3561 if ((dev->features & NETIF_F_HW_CSUM) &&
3562 (dev->features & (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) {
3563 printk(KERN_NOTICE "%s: mixed HW and IP checksum settings.\n",
3564 dev->name);
3565 dev->features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM);
3566 }
3567
3568 if ((dev->features & NETIF_F_NO_CSUM) &&
3569 (dev->features & (NETIF_F_HW_CSUM|NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) {
3570 printk(KERN_NOTICE "%s: mixed no checksumming and other settings.\n",
3571 dev->name);
3572 dev->features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM|NETIF_F_HW_CSUM);
3573 }
3574
3575
1da177e4
LT
3576 /* Fix illegal SG+CSUM combinations. */
3577 if ((dev->features & NETIF_F_SG) &&
8648b305 3578 !(dev->features & NETIF_F_ALL_CSUM)) {
5a8da02b 3579 printk(KERN_NOTICE "%s: Dropping NETIF_F_SG since no checksum feature.\n",
1da177e4
LT
3580 dev->name);
3581 dev->features &= ~NETIF_F_SG;
3582 }
3583
3584 /* TSO requires that SG is present as well. */
3585 if ((dev->features & NETIF_F_TSO) &&
3586 !(dev->features & NETIF_F_SG)) {
5a8da02b 3587 printk(KERN_NOTICE "%s: Dropping NETIF_F_TSO since no SG feature.\n",
1da177e4
LT
3588 dev->name);
3589 dev->features &= ~NETIF_F_TSO;
3590 }
e89e9cf5
AR
3591 if (dev->features & NETIF_F_UFO) {
3592 if (!(dev->features & NETIF_F_HW_CSUM)) {
3593 printk(KERN_ERR "%s: Dropping NETIF_F_UFO since no "
3594 "NETIF_F_HW_CSUM feature.\n",
3595 dev->name);
3596 dev->features &= ~NETIF_F_UFO;
3597 }
3598 if (!(dev->features & NETIF_F_SG)) {
3599 printk(KERN_ERR "%s: Dropping NETIF_F_UFO since no "
3600 "NETIF_F_SG feature.\n",
3601 dev->name);
3602 dev->features &= ~NETIF_F_UFO;
3603 }
3604 }
1da177e4 3605
8b41d188 3606 ret = netdev_register_kobject(dev);
b17a7c17 3607 if (ret)
7ce1b0ed 3608 goto err_uninit;
b17a7c17
SH
3609 dev->reg_state = NETREG_REGISTERED;
3610
1da177e4
LT
3611 /*
3612 * Default initial state at registry is that the
3613 * device is present.
3614 */
3615
3616 set_bit(__LINK_STATE_PRESENT, &dev->state);
3617
1da177e4 3618 dev_init_scheduler(dev);
1da177e4 3619 dev_hold(dev);
ce286d32 3620 list_netdevice(dev);
1da177e4
LT
3621
3622 /* Notify protocols, that a new device appeared. */
056925ab 3623 ret = call_netdevice_notifiers(NETDEV_REGISTER, dev);
fcc5a03a
HX
3624 ret = notifier_to_errno(ret);
3625 if (ret)
3626 unregister_netdevice(dev);
1da177e4
LT
3627
3628out:
3629 return ret;
7ce1b0ed
HX
3630
3631err_uninit:
3632 if (dev->uninit)
3633 dev->uninit(dev);
3634 goto out;
1da177e4
LT
3635}
3636
3637/**
3638 * register_netdev - register a network device
3639 * @dev: device to register
3640 *
3641 * Take a completed network device structure and add it to the kernel
3642 * interfaces. A %NETDEV_REGISTER message is sent to the netdev notifier
3643 * chain. 0 is returned on success. A negative errno code is returned
3644 * on a failure to set up the device, or if the name is a duplicate.
3645 *
38b4da38 3646 * This is a wrapper around register_netdevice that takes the rtnl semaphore
1da177e4
LT
3647 * and expands the device name if you passed a format string to
3648 * alloc_netdev.
3649 */
3650int register_netdev(struct net_device *dev)
3651{
3652 int err;
3653
3654 rtnl_lock();
3655
3656 /*
3657 * If the name is a format string the caller wants us to do a
3658 * name allocation.
3659 */
3660 if (strchr(dev->name, '%')) {
3661 err = dev_alloc_name(dev, dev->name);
3662 if (err < 0)
3663 goto out;
3664 }
4ec93edb 3665
1da177e4
LT
3666 err = register_netdevice(dev);
3667out:
3668 rtnl_unlock();
3669 return err;
3670}
3671EXPORT_SYMBOL(register_netdev);
3672
3673/*
3674 * netdev_wait_allrefs - wait until all references are gone.
3675 *
3676 * This is called when unregistering network devices.
3677 *
3678 * Any protocol or device that holds a reference should register
3679 * for netdevice notification, and cleanup and put back the
3680 * reference if they receive an UNREGISTER event.
3681 * We can get stuck here if buggy protocols don't correctly
4ec93edb 3682 * call dev_put.
1da177e4
LT
3683 */
3684static void netdev_wait_allrefs(struct net_device *dev)
3685{
3686 unsigned long rebroadcast_time, warning_time;
3687
3688 rebroadcast_time = warning_time = jiffies;
3689 while (atomic_read(&dev->refcnt) != 0) {
3690 if (time_after(jiffies, rebroadcast_time + 1 * HZ)) {
6756ae4b 3691 rtnl_lock();
1da177e4
LT
3692
3693 /* Rebroadcast unregister notification */
056925ab 3694 call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
1da177e4
LT
3695
3696 if (test_bit(__LINK_STATE_LINKWATCH_PENDING,
3697 &dev->state)) {
3698 /* We must not have linkwatch events
3699 * pending on unregister. If this
3700 * happens, we simply run the queue
3701 * unscheduled, resulting in a noop
3702 * for this device.
3703 */
3704 linkwatch_run_queue();
3705 }
3706
6756ae4b 3707 __rtnl_unlock();
1da177e4
LT
3708
3709 rebroadcast_time = jiffies;
3710 }
3711
3712 msleep(250);
3713
3714 if (time_after(jiffies, warning_time + 10 * HZ)) {
3715 printk(KERN_EMERG "unregister_netdevice: "
3716 "waiting for %s to become free. Usage "
3717 "count = %d\n",
3718 dev->name, atomic_read(&dev->refcnt));
3719 warning_time = jiffies;
3720 }
3721 }
3722}
3723
3724/* The sequence is:
3725 *
3726 * rtnl_lock();
3727 * ...
3728 * register_netdevice(x1);
3729 * register_netdevice(x2);
3730 * ...
3731 * unregister_netdevice(y1);
3732 * unregister_netdevice(y2);
3733 * ...
3734 * rtnl_unlock();
3735 * free_netdev(y1);
3736 * free_netdev(y2);
3737 *
3738 * We are invoked by rtnl_unlock() after it drops the semaphore.
3739 * This allows us to deal with problems:
b17a7c17 3740 * 1) We can delete sysfs objects which invoke hotplug
1da177e4
LT
3741 * without deadlocking with linkwatch via keventd.
3742 * 2) Since we run with the RTNL semaphore not held, we can sleep
3743 * safely in order to wait for the netdev refcnt to drop to zero.
3744 */
4a3e2f71 3745static DEFINE_MUTEX(net_todo_run_mutex);
1da177e4
LT
3746void netdev_run_todo(void)
3747{
626ab0e6 3748 struct list_head list;
1da177e4
LT
3749
3750 /* Need to guard against multiple cpu's getting out of order. */
4a3e2f71 3751 mutex_lock(&net_todo_run_mutex);
1da177e4
LT
3752
3753 /* Not safe to do outside the semaphore. We must not return
3754 * until all unregister events invoked by the local processor
3755 * have been completed (either by this todo run, or one on
3756 * another cpu).
3757 */
3758 if (list_empty(&net_todo_list))
3759 goto out;
3760
3761 /* Snapshot list, allow later requests */
3762 spin_lock(&net_todo_list_lock);
626ab0e6 3763 list_replace_init(&net_todo_list, &list);
1da177e4 3764 spin_unlock(&net_todo_list_lock);
626ab0e6 3765
1da177e4
LT
3766 while (!list_empty(&list)) {
3767 struct net_device *dev
3768 = list_entry(list.next, struct net_device, todo_list);
3769 list_del(&dev->todo_list);
3770
b17a7c17
SH
3771 if (unlikely(dev->reg_state != NETREG_UNREGISTERING)) {
3772 printk(KERN_ERR "network todo '%s' but state %d\n",
3773 dev->name, dev->reg_state);
3774 dump_stack();
3775 continue;
3776 }
1da177e4 3777
b17a7c17 3778 dev->reg_state = NETREG_UNREGISTERED;
1da177e4 3779
b17a7c17 3780 netdev_wait_allrefs(dev);
1da177e4 3781
b17a7c17
SH
3782 /* paranoia */
3783 BUG_ON(atomic_read(&dev->refcnt));
3784 BUG_TRAP(!dev->ip_ptr);
3785 BUG_TRAP(!dev->ip6_ptr);
3786 BUG_TRAP(!dev->dn_ptr);
1da177e4 3787
b17a7c17
SH
3788 if (dev->destructor)
3789 dev->destructor(dev);
9093bbb2
SH
3790
3791 /* Free network device */
3792 kobject_put(&dev->dev.kobj);
1da177e4
LT
3793 }
3794
3795out:
4a3e2f71 3796 mutex_unlock(&net_todo_run_mutex);
1da177e4
LT
3797}
3798
5a1b5898 3799static struct net_device_stats *internal_stats(struct net_device *dev)
c45d286e 3800{
5a1b5898 3801 return &dev->stats;
c45d286e
RR
3802}
3803
1da177e4 3804/**
f25f4e44 3805 * alloc_netdev_mq - allocate network device
1da177e4
LT
3806 * @sizeof_priv: size of private data to allocate space for
3807 * @name: device name format string
3808 * @setup: callback to initialize device
f25f4e44 3809 * @queue_count: the number of subqueues to allocate
1da177e4
LT
3810 *
3811 * Allocates a struct net_device with private data area for driver use
f25f4e44
PWJ
3812 * and performs basic initialization. Also allocates subquue structs
3813 * for each queue on the device at the end of the netdevice.
1da177e4 3814 */
f25f4e44
PWJ
3815struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
3816 void (*setup)(struct net_device *), unsigned int queue_count)
1da177e4
LT
3817{
3818 void *p;
3819 struct net_device *dev;
3820 int alloc_size;
3821
b6fe17d6
SH
3822 BUG_ON(strlen(name) >= sizeof(dev->name));
3823
1da177e4 3824 /* ensure 32-byte alignment of both the device and private area */
f25f4e44 3825 alloc_size = (sizeof(*dev) + NETDEV_ALIGN_CONST +
31ce72a6 3826 (sizeof(struct net_device_subqueue) * (queue_count - 1))) &
f25f4e44 3827 ~NETDEV_ALIGN_CONST;
1da177e4
LT
3828 alloc_size += sizeof_priv + NETDEV_ALIGN_CONST;
3829
31380de9 3830 p = kzalloc(alloc_size, GFP_KERNEL);
1da177e4 3831 if (!p) {
b6fe17d6 3832 printk(KERN_ERR "alloc_netdev: Unable to allocate device.\n");
1da177e4
LT
3833 return NULL;
3834 }
1da177e4
LT
3835
3836 dev = (struct net_device *)
3837 (((long)p + NETDEV_ALIGN_CONST) & ~NETDEV_ALIGN_CONST);
3838 dev->padded = (char *)dev - (char *)p;
6d34b1c2 3839 dev->nd_net = &init_net;
1da177e4 3840
f25f4e44
PWJ
3841 if (sizeof_priv) {
3842 dev->priv = ((char *)dev +
3843 ((sizeof(struct net_device) +
3844 (sizeof(struct net_device_subqueue) *
31ce72a6 3845 (queue_count - 1)) + NETDEV_ALIGN_CONST)
f25f4e44
PWJ
3846 & ~NETDEV_ALIGN_CONST));
3847 }
3848
3849 dev->egress_subqueue_count = queue_count;
1da177e4 3850
5a1b5898 3851 dev->get_stats = internal_stats;
bea3348e 3852 netpoll_netdev_init(dev);
1da177e4
LT
3853 setup(dev);
3854 strcpy(dev->name, name);
3855 return dev;
3856}
f25f4e44 3857EXPORT_SYMBOL(alloc_netdev_mq);
1da177e4
LT
3858
3859/**
3860 * free_netdev - free network device
3861 * @dev: device
3862 *
4ec93edb
YH
3863 * This function does the last stage of destroying an allocated device
3864 * interface. The reference to the device object is released.
1da177e4
LT
3865 * If this is the last reference then it will be freed.
3866 */
3867void free_netdev(struct net_device *dev)
3868{
3041a069 3869 /* Compatibility with error handling in drivers */
1da177e4
LT
3870 if (dev->reg_state == NETREG_UNINITIALIZED) {
3871 kfree((char *)dev - dev->padded);
3872 return;
3873 }
3874
3875 BUG_ON(dev->reg_state != NETREG_UNREGISTERED);
3876 dev->reg_state = NETREG_RELEASED;
3877
43cb76d9
GKH
3878 /* will free via device release */
3879 put_device(&dev->dev);
1da177e4 3880}
4ec93edb 3881
1da177e4 3882/* Synchronize with packet receive processing. */
4ec93edb 3883void synchronize_net(void)
1da177e4
LT
3884{
3885 might_sleep();
fbd568a3 3886 synchronize_rcu();
1da177e4
LT
3887}
3888
3889/**
3890 * unregister_netdevice - remove device from the kernel
3891 * @dev: device
3892 *
3893 * This function shuts down a device interface and removes it
3894 * from the kernel tables. On success 0 is returned, on a failure
3895 * a negative errno code is returned.
3896 *
3897 * Callers must hold the rtnl semaphore. You may want
3898 * unregister_netdev() instead of this.
3899 */
3900
22f8cde5 3901void unregister_netdevice(struct net_device *dev)
1da177e4 3902{
1da177e4
LT
3903 BUG_ON(dev_boot_phase);
3904 ASSERT_RTNL();
3905
3906 /* Some devices call without registering for initialization unwind. */
3907 if (dev->reg_state == NETREG_UNINITIALIZED) {
3908 printk(KERN_DEBUG "unregister_netdevice: device %s/%p never "
3909 "was registered\n", dev->name, dev);
22f8cde5
SH
3910
3911 WARN_ON(1);
3912 return;
1da177e4
LT
3913 }
3914
3915 BUG_ON(dev->reg_state != NETREG_REGISTERED);
3916
3917 /* If device is running, close it first. */
9b772652 3918 dev_close(dev);
1da177e4
LT
3919
3920 /* And unlink it from device chain. */
ce286d32 3921 unlist_netdevice(dev);
1da177e4
LT
3922
3923 dev->reg_state = NETREG_UNREGISTERING;
3924
3925 synchronize_net();
3926
3927 /* Shutdown queueing discipline. */
3928 dev_shutdown(dev);
3929
4ec93edb 3930
1da177e4
LT
3931 /* Notify protocols, that we are about to destroy
3932 this device. They should clean all the things.
3933 */
056925ab 3934 call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
4ec93edb 3935
1da177e4 3936 /*
4417da66 3937 * Flush the unicast and multicast chains
1da177e4 3938 */
26cc2522 3939 dev_addr_discard(dev);
1da177e4
LT
3940
3941 if (dev->uninit)
3942 dev->uninit(dev);
3943
3944 /* Notifier chain MUST detach us from master device. */
3945 BUG_TRAP(!dev->master);
3946
8b41d188
EB
3947 /* Remove entries from kobject tree */
3948 netdev_unregister_kobject(dev);
9093bbb2 3949
1da177e4
LT
3950 /* Finish processing unregister after unlock */
3951 net_set_todo(dev);
3952
3953 synchronize_net();
3954
3955 dev_put(dev);
1da177e4
LT
3956}
3957
3958/**
3959 * unregister_netdev - remove device from the kernel
3960 * @dev: device
3961 *
3962 * This function shuts down a device interface and removes it
3963 * from the kernel tables. On success 0 is returned, on a failure
3964 * a negative errno code is returned.
3965 *
3966 * This is just a wrapper for unregister_netdevice that takes
3967 * the rtnl semaphore. In general you want to use this and not
3968 * unregister_netdevice.
3969 */
3970void unregister_netdev(struct net_device *dev)
3971{
3972 rtnl_lock();
3973 unregister_netdevice(dev);
3974 rtnl_unlock();
3975}
3976
3977EXPORT_SYMBOL(unregister_netdev);
3978
ce286d32
EB
3979/**
3980 * dev_change_net_namespace - move device to different nethost namespace
3981 * @dev: device
3982 * @net: network namespace
3983 * @pat: If not NULL name pattern to try if the current device name
3984 * is already taken in the destination network namespace.
3985 *
3986 * This function shuts down a device interface and moves it
3987 * to a new network namespace. On success 0 is returned, on
3988 * a failure a netagive errno code is returned.
3989 *
3990 * Callers must hold the rtnl semaphore.
3991 */
3992
3993int dev_change_net_namespace(struct net_device *dev, struct net *net, const char *pat)
3994{
3995 char buf[IFNAMSIZ];
3996 const char *destname;
3997 int err;
3998
3999 ASSERT_RTNL();
4000
4001 /* Don't allow namespace local devices to be moved. */
4002 err = -EINVAL;
4003 if (dev->features & NETIF_F_NETNS_LOCAL)
4004 goto out;
4005
4006 /* Ensure the device has been registrered */
4007 err = -EINVAL;
4008 if (dev->reg_state != NETREG_REGISTERED)
4009 goto out;
4010
4011 /* Get out if there is nothing todo */
4012 err = 0;
4013 if (dev->nd_net == net)
4014 goto out;
4015
4016 /* Pick the destination device name, and ensure
4017 * we can use it in the destination network namespace.
4018 */
4019 err = -EEXIST;
4020 destname = dev->name;
4021 if (__dev_get_by_name(net, destname)) {
4022 /* We get here if we can't use the current device name */
4023 if (!pat)
4024 goto out;
4025 if (!dev_valid_name(pat))
4026 goto out;
4027 if (strchr(pat, '%')) {
4028 if (__dev_alloc_name(net, pat, buf) < 0)
4029 goto out;
4030 destname = buf;
4031 } else
4032 destname = pat;
4033 if (__dev_get_by_name(net, destname))
4034 goto out;
4035 }
4036
4037 /*
4038 * And now a mini version of register_netdevice unregister_netdevice.
4039 */
4040
4041 /* If device is running close it first. */
9b772652 4042 dev_close(dev);
ce286d32
EB
4043
4044 /* And unlink it from device chain */
4045 err = -ENODEV;
4046 unlist_netdevice(dev);
4047
4048 synchronize_net();
4049
4050 /* Shutdown queueing discipline. */
4051 dev_shutdown(dev);
4052
4053 /* Notify protocols, that we are about to destroy
4054 this device. They should clean all the things.
4055 */
4056 call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
4057
4058 /*
4059 * Flush the unicast and multicast chains
4060 */
4061 dev_addr_discard(dev);
4062
4063 /* Actually switch the network namespace */
4064 dev->nd_net = net;
4065
4066 /* Assign the new device name */
4067 if (destname != dev->name)
4068 strcpy(dev->name, destname);
4069
4070 /* If there is an ifindex conflict assign a new one */
4071 if (__dev_get_by_index(net, dev->ifindex)) {
4072 int iflink = (dev->iflink == dev->ifindex);
4073 dev->ifindex = dev_new_index(net);
4074 if (iflink)
4075 dev->iflink = dev->ifindex;
4076 }
4077
8b41d188 4078 /* Fixup kobjects */
ce286d32 4079 err = device_rename(&dev->dev, dev->name);
8b41d188 4080 WARN_ON(err);
ce286d32
EB
4081
4082 /* Add the device back in the hashes */
4083 list_netdevice(dev);
4084
4085 /* Notify protocols, that a new device appeared. */
4086 call_netdevice_notifiers(NETDEV_REGISTER, dev);
4087
4088 synchronize_net();
4089 err = 0;
4090out:
4091 return err;
4092}
4093
1da177e4
LT
4094static int dev_cpu_callback(struct notifier_block *nfb,
4095 unsigned long action,
4096 void *ocpu)
4097{
4098 struct sk_buff **list_skb;
4099 struct net_device **list_net;
4100 struct sk_buff *skb;
4101 unsigned int cpu, oldcpu = (unsigned long)ocpu;
4102 struct softnet_data *sd, *oldsd;
4103
8bb78442 4104 if (action != CPU_DEAD && action != CPU_DEAD_FROZEN)
1da177e4
LT
4105 return NOTIFY_OK;
4106
4107 local_irq_disable();
4108 cpu = smp_processor_id();
4109 sd = &per_cpu(softnet_data, cpu);
4110 oldsd = &per_cpu(softnet_data, oldcpu);
4111
4112 /* Find end of our completion_queue. */
4113 list_skb = &sd->completion_queue;
4114 while (*list_skb)
4115 list_skb = &(*list_skb)->next;
4116 /* Append completion queue from offline CPU. */
4117 *list_skb = oldsd->completion_queue;
4118 oldsd->completion_queue = NULL;
4119
4120 /* Find end of our output_queue. */
4121 list_net = &sd->output_queue;
4122 while (*list_net)
4123 list_net = &(*list_net)->next_sched;
4124 /* Append output queue from offline CPU. */
4125 *list_net = oldsd->output_queue;
4126 oldsd->output_queue = NULL;
4127
4128 raise_softirq_irqoff(NET_TX_SOFTIRQ);
4129 local_irq_enable();
4130
4131 /* Process offline CPU's input_pkt_queue */
4132 while ((skb = __skb_dequeue(&oldsd->input_pkt_queue)))
4133 netif_rx(skb);
4134
4135 return NOTIFY_OK;
4136}
1da177e4 4137
db217334
CL
4138#ifdef CONFIG_NET_DMA
4139/**
0ed72ec4
RD
4140 * net_dma_rebalance - try to maintain one DMA channel per CPU
4141 * @net_dma: DMA client and associated data (lock, channels, channel_mask)
4142 *
4143 * This is called when the number of channels allocated to the net_dma client
4144 * changes. The net_dma client tries to have one DMA channel per CPU.
db217334 4145 */
d379b01e
DW
4146
4147static void net_dma_rebalance(struct net_dma *net_dma)
db217334 4148{
d379b01e 4149 unsigned int cpu, i, n, chan_idx;
db217334
CL
4150 struct dma_chan *chan;
4151
d379b01e 4152 if (cpus_empty(net_dma->channel_mask)) {
db217334 4153 for_each_online_cpu(cpu)
29bbd72d 4154 rcu_assign_pointer(per_cpu(softnet_data, cpu).net_dma, NULL);
db217334
CL
4155 return;
4156 }
4157
4158 i = 0;
4159 cpu = first_cpu(cpu_online_map);
4160
d379b01e
DW
4161 for_each_cpu_mask(chan_idx, net_dma->channel_mask) {
4162 chan = net_dma->channels[chan_idx];
4163
4164 n = ((num_online_cpus() / cpus_weight(net_dma->channel_mask))
4165 + (i < (num_online_cpus() %
4166 cpus_weight(net_dma->channel_mask)) ? 1 : 0));
db217334
CL
4167
4168 while(n) {
29bbd72d 4169 per_cpu(softnet_data, cpu).net_dma = chan;
db217334
CL
4170 cpu = next_cpu(cpu, cpu_online_map);
4171 n--;
4172 }
4173 i++;
4174 }
db217334
CL
4175}
4176
4177/**
4178 * netdev_dma_event - event callback for the net_dma_client
4179 * @client: should always be net_dma_client
f4b8ea78 4180 * @chan: DMA channel for the event
0ed72ec4 4181 * @state: DMA state to be handled
db217334 4182 */
d379b01e
DW
4183static enum dma_state_client
4184netdev_dma_event(struct dma_client *client, struct dma_chan *chan,
4185 enum dma_state state)
4186{
4187 int i, found = 0, pos = -1;
4188 struct net_dma *net_dma =
4189 container_of(client, struct net_dma, client);
4190 enum dma_state_client ack = DMA_DUP; /* default: take no action */
4191
4192 spin_lock(&net_dma->lock);
4193 switch (state) {
4194 case DMA_RESOURCE_AVAILABLE:
4195 for (i = 0; i < NR_CPUS; i++)
4196 if (net_dma->channels[i] == chan) {
4197 found = 1;
4198 break;
4199 } else if (net_dma->channels[i] == NULL && pos < 0)
4200 pos = i;
4201
4202 if (!found && pos >= 0) {
4203 ack = DMA_ACK;
4204 net_dma->channels[pos] = chan;
4205 cpu_set(pos, net_dma->channel_mask);
4206 net_dma_rebalance(net_dma);
4207 }
db217334
CL
4208 break;
4209 case DMA_RESOURCE_REMOVED:
d379b01e
DW
4210 for (i = 0; i < NR_CPUS; i++)
4211 if (net_dma->channels[i] == chan) {
4212 found = 1;
4213 pos = i;
4214 break;
4215 }
4216
4217 if (found) {
4218 ack = DMA_ACK;
4219 cpu_clear(pos, net_dma->channel_mask);
4220 net_dma->channels[i] = NULL;
4221 net_dma_rebalance(net_dma);
4222 }
db217334
CL
4223 break;
4224 default:
4225 break;
4226 }
d379b01e
DW
4227 spin_unlock(&net_dma->lock);
4228
4229 return ack;
db217334
CL
4230}
4231
4232/**
4233 * netdev_dma_regiser - register the networking subsystem as a DMA client
4234 */
4235static int __init netdev_dma_register(void)
4236{
d379b01e
DW
4237 spin_lock_init(&net_dma.lock);
4238 dma_cap_set(DMA_MEMCPY, net_dma.client.cap_mask);
4239 dma_async_client_register(&net_dma.client);
4240 dma_async_client_chan_request(&net_dma.client);
db217334
CL
4241 return 0;
4242}
4243
4244#else
4245static int __init netdev_dma_register(void) { return -ENODEV; }
4246#endif /* CONFIG_NET_DMA */
1da177e4 4247
7f353bf2
HX
4248/**
4249 * netdev_compute_feature - compute conjunction of two feature sets
4250 * @all: first feature set
4251 * @one: second feature set
4252 *
4253 * Computes a new feature set after adding a device with feature set
4254 * @one to the master device with current feature set @all. Returns
4255 * the new feature set.
4256 */
4257int netdev_compute_features(unsigned long all, unsigned long one)
4258{
4259 /* if device needs checksumming, downgrade to hw checksumming */
4260 if (all & NETIF_F_NO_CSUM && !(one & NETIF_F_NO_CSUM))
4261 all ^= NETIF_F_NO_CSUM | NETIF_F_HW_CSUM;
4262
4263 /* if device can't do all checksum, downgrade to ipv4/ipv6 */
4264 if (all & NETIF_F_HW_CSUM && !(one & NETIF_F_HW_CSUM))
4265 all ^= NETIF_F_HW_CSUM
4266 | NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM;
4267
4268 if (one & NETIF_F_GSO)
4269 one |= NETIF_F_GSO_SOFTWARE;
4270 one |= NETIF_F_GSO;
4271
4272 /* If even one device supports robust GSO, enable it for all. */
4273 if (one & NETIF_F_GSO_ROBUST)
4274 all |= NETIF_F_GSO_ROBUST;
4275
4276 all &= one | NETIF_F_LLTX;
4277
4278 if (!(all & NETIF_F_ALL_CSUM))
4279 all &= ~NETIF_F_SG;
4280 if (!(all & NETIF_F_SG))
4281 all &= ~NETIF_F_GSO_MASK;
4282
4283 return all;
4284}
4285EXPORT_SYMBOL(netdev_compute_features);
4286
30d97d35
PE
4287static struct hlist_head *netdev_create_hash(void)
4288{
4289 int i;
4290 struct hlist_head *hash;
4291
4292 hash = kmalloc(sizeof(*hash) * NETDEV_HASHENTRIES, GFP_KERNEL);
4293 if (hash != NULL)
4294 for (i = 0; i < NETDEV_HASHENTRIES; i++)
4295 INIT_HLIST_HEAD(&hash[i]);
4296
4297 return hash;
4298}
4299
881d966b 4300/* Initialize per network namespace state */
4665079c 4301static int __net_init netdev_init(struct net *net)
881d966b 4302{
881d966b
EB
4303 INIT_LIST_HEAD(&net->dev_base_head);
4304 rwlock_init(&dev_base_lock);
4305
30d97d35
PE
4306 net->dev_name_head = netdev_create_hash();
4307 if (net->dev_name_head == NULL)
4308 goto err_name;
881d966b 4309
30d97d35
PE
4310 net->dev_index_head = netdev_create_hash();
4311 if (net->dev_index_head == NULL)
4312 goto err_idx;
881d966b
EB
4313
4314 return 0;
30d97d35
PE
4315
4316err_idx:
4317 kfree(net->dev_name_head);
4318err_name:
4319 return -ENOMEM;
881d966b
EB
4320}
4321
4665079c 4322static void __net_exit netdev_exit(struct net *net)
881d966b
EB
4323{
4324 kfree(net->dev_name_head);
4325 kfree(net->dev_index_head);
4326}
4327
4665079c 4328static struct pernet_operations __net_initdata netdev_net_ops = {
881d966b
EB
4329 .init = netdev_init,
4330 .exit = netdev_exit,
4331};
4332
4665079c 4333static void __net_exit default_device_exit(struct net *net)
ce286d32
EB
4334{
4335 struct net_device *dev, *next;
4336 /*
4337 * Push all migratable of the network devices back to the
4338 * initial network namespace
4339 */
4340 rtnl_lock();
4341 for_each_netdev_safe(net, dev, next) {
4342 int err;
4343
4344 /* Ignore unmoveable devices (i.e. loopback) */
4345 if (dev->features & NETIF_F_NETNS_LOCAL)
4346 continue;
4347
4348 /* Push remaing network devices to init_net */
4349 err = dev_change_net_namespace(dev, &init_net, "dev%d");
4350 if (err) {
4351 printk(KERN_WARNING "%s: failed to move %s to init_net: %d\n",
4352 __func__, dev->name, err);
4353 unregister_netdevice(dev);
4354 }
4355 }
4356 rtnl_unlock();
4357}
4358
4665079c 4359static struct pernet_operations __net_initdata default_device_ops = {
ce286d32
EB
4360 .exit = default_device_exit,
4361};
4362
1da177e4
LT
4363/*
4364 * Initialize the DEV module. At boot time this walks the device list and
4365 * unhooks any devices that fail to initialise (normally hardware not
4366 * present) and leaves us with a valid list of present and active devices.
4367 *
4368 */
4369
4370/*
4371 * This is called single threaded during boot, so no need
4372 * to take the rtnl semaphore.
4373 */
4374static int __init net_dev_init(void)
4375{
4376 int i, rc = -ENOMEM;
4377
4378 BUG_ON(!dev_boot_phase);
4379
1da177e4
LT
4380 if (dev_proc_init())
4381 goto out;
4382
8b41d188 4383 if (netdev_kobject_init())
1da177e4
LT
4384 goto out;
4385
4386 INIT_LIST_HEAD(&ptype_all);
4ec93edb 4387 for (i = 0; i < 16; i++)
1da177e4
LT
4388 INIT_LIST_HEAD(&ptype_base[i]);
4389
881d966b
EB
4390 if (register_pernet_subsys(&netdev_net_ops))
4391 goto out;
1da177e4 4392
ce286d32
EB
4393 if (register_pernet_device(&default_device_ops))
4394 goto out;
4395
1da177e4
LT
4396 /*
4397 * Initialise the packet receive queues.
4398 */
4399
6f912042 4400 for_each_possible_cpu(i) {
1da177e4
LT
4401 struct softnet_data *queue;
4402
4403 queue = &per_cpu(softnet_data, i);
4404 skb_queue_head_init(&queue->input_pkt_queue);
1da177e4
LT
4405 queue->completion_queue = NULL;
4406 INIT_LIST_HEAD(&queue->poll_list);
bea3348e
SH
4407
4408 queue->backlog.poll = process_backlog;
4409 queue->backlog.weight = weight_p;
1da177e4
LT
4410 }
4411
db217334
CL
4412 netdev_dma_register();
4413
1da177e4
LT
4414 dev_boot_phase = 0;
4415
4416 open_softirq(NET_TX_SOFTIRQ, net_tx_action, NULL);
4417 open_softirq(NET_RX_SOFTIRQ, net_rx_action, NULL);
4418
4419 hotcpu_notifier(dev_cpu_callback, 0);
4420 dst_init();
4421 dev_mcast_init();
4422 rc = 0;
4423out:
4424 return rc;
4425}
4426
4427subsys_initcall(net_dev_init);
4428
4429EXPORT_SYMBOL(__dev_get_by_index);
4430EXPORT_SYMBOL(__dev_get_by_name);
4431EXPORT_SYMBOL(__dev_remove_pack);
c2373ee9 4432EXPORT_SYMBOL(dev_valid_name);
1da177e4
LT
4433EXPORT_SYMBOL(dev_add_pack);
4434EXPORT_SYMBOL(dev_alloc_name);
4435EXPORT_SYMBOL(dev_close);
4436EXPORT_SYMBOL(dev_get_by_flags);
4437EXPORT_SYMBOL(dev_get_by_index);
4438EXPORT_SYMBOL(dev_get_by_name);
1da177e4
LT
4439EXPORT_SYMBOL(dev_open);
4440EXPORT_SYMBOL(dev_queue_xmit);
4441EXPORT_SYMBOL(dev_remove_pack);
4442EXPORT_SYMBOL(dev_set_allmulti);
4443EXPORT_SYMBOL(dev_set_promiscuity);
4444EXPORT_SYMBOL(dev_change_flags);
4445EXPORT_SYMBOL(dev_set_mtu);
4446EXPORT_SYMBOL(dev_set_mac_address);
4447EXPORT_SYMBOL(free_netdev);
4448EXPORT_SYMBOL(netdev_boot_setup_check);
4449EXPORT_SYMBOL(netdev_set_master);
4450EXPORT_SYMBOL(netdev_state_change);
4451EXPORT_SYMBOL(netif_receive_skb);
4452EXPORT_SYMBOL(netif_rx);
4453EXPORT_SYMBOL(register_gifconf);
4454EXPORT_SYMBOL(register_netdevice);
4455EXPORT_SYMBOL(register_netdevice_notifier);
4456EXPORT_SYMBOL(skb_checksum_help);
4457EXPORT_SYMBOL(synchronize_net);
4458EXPORT_SYMBOL(unregister_netdevice);
4459EXPORT_SYMBOL(unregister_netdevice_notifier);
4460EXPORT_SYMBOL(net_enable_timestamp);
4461EXPORT_SYMBOL(net_disable_timestamp);
4462EXPORT_SYMBOL(dev_get_flags);
4463
4464#if defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE)
4465EXPORT_SYMBOL(br_handle_frame_hook);
4466EXPORT_SYMBOL(br_fdb_get_hook);
4467EXPORT_SYMBOL(br_fdb_put_hook);
4468#endif
4469
4470#ifdef CONFIG_KMOD
4471EXPORT_SYMBOL(dev_load);
4472#endif
4473
4474EXPORT_PER_CPU_SYMBOL(softnet_data);