]> bbs.cooldavid.org Git - net-next-2.6.git/blame - net/core/dev.c
[NET]: Inline net_device_stats
[net-next-2.6.git] / net / core / dev.c
CommitLineData
1da177e4
LT
1/*
2 * NET3 Protocol independent device support routines.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
8 *
9 * Derived from the non IP parts of dev.c 1.0.19
02c30a84 10 * Authors: Ross Biro
1da177e4
LT
11 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12 * Mark Evans, <evansmp@uhura.aston.ac.uk>
13 *
14 * Additional Authors:
15 * Florian la Roche <rzsfl@rz.uni-sb.de>
16 * Alan Cox <gw4pts@gw4pts.ampr.org>
17 * David Hinds <dahinds@users.sourceforge.net>
18 * Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
19 * Adam Sulmicki <adam@cfar.umd.edu>
20 * Pekka Riikonen <priikone@poesidon.pspt.fi>
21 *
22 * Changes:
23 * D.J. Barrow : Fixed bug where dev->refcnt gets set
24 * to 2 if register_netdev gets called
25 * before net_dev_init & also removed a
26 * few lines of code in the process.
27 * Alan Cox : device private ioctl copies fields back.
28 * Alan Cox : Transmit queue code does relevant
29 * stunts to keep the queue safe.
30 * Alan Cox : Fixed double lock.
31 * Alan Cox : Fixed promisc NULL pointer trap
32 * ???????? : Support the full private ioctl range
33 * Alan Cox : Moved ioctl permission check into
34 * drivers
35 * Tim Kordas : SIOCADDMULTI/SIOCDELMULTI
36 * Alan Cox : 100 backlog just doesn't cut it when
37 * you start doing multicast video 8)
38 * Alan Cox : Rewrote net_bh and list manager.
39 * Alan Cox : Fix ETH_P_ALL echoback lengths.
40 * Alan Cox : Took out transmit every packet pass
41 * Saved a few bytes in the ioctl handler
42 * Alan Cox : Network driver sets packet type before
43 * calling netif_rx. Saves a function
44 * call a packet.
45 * Alan Cox : Hashed net_bh()
46 * Richard Kooijman: Timestamp fixes.
47 * Alan Cox : Wrong field in SIOCGIFDSTADDR
48 * Alan Cox : Device lock protection.
49 * Alan Cox : Fixed nasty side effect of device close
50 * changes.
51 * Rudi Cilibrasi : Pass the right thing to
52 * set_mac_address()
53 * Dave Miller : 32bit quantity for the device lock to
54 * make it work out on a Sparc.
55 * Bjorn Ekwall : Added KERNELD hack.
56 * Alan Cox : Cleaned up the backlog initialise.
57 * Craig Metz : SIOCGIFCONF fix if space for under
58 * 1 device.
59 * Thomas Bogendoerfer : Return ENODEV for dev_open, if there
60 * is no device open function.
61 * Andi Kleen : Fix error reporting for SIOCGIFCONF
62 * Michael Chastain : Fix signed/unsigned for SIOCGIFCONF
63 * Cyrus Durgin : Cleaned for KMOD
64 * Adam Sulmicki : Bug Fix : Network Device Unload
65 * A network device unload needs to purge
66 * the backlog queue.
67 * Paul Rusty Russell : SIOCSIFNAME
68 * Pekka Riikonen : Netdev boot-time settings code
69 * Andrew Morton : Make unregister_netdevice wait
70 * indefinitely on dev->refcnt
71 * J Hadi Salim : - Backlog queue sampling
72 * - netif_rx() feedback
73 */
74
75#include <asm/uaccess.h>
76#include <asm/system.h>
77#include <linux/bitops.h>
4fc268d2 78#include <linux/capability.h>
1da177e4
LT
79#include <linux/cpu.h>
80#include <linux/types.h>
81#include <linux/kernel.h>
82#include <linux/sched.h>
4a3e2f71 83#include <linux/mutex.h>
1da177e4
LT
84#include <linux/string.h>
85#include <linux/mm.h>
86#include <linux/socket.h>
87#include <linux/sockios.h>
88#include <linux/errno.h>
89#include <linux/interrupt.h>
90#include <linux/if_ether.h>
91#include <linux/netdevice.h>
92#include <linux/etherdevice.h>
93#include <linux/notifier.h>
94#include <linux/skbuff.h>
95#include <net/sock.h>
96#include <linux/rtnetlink.h>
97#include <linux/proc_fs.h>
98#include <linux/seq_file.h>
99#include <linux/stat.h>
100#include <linux/if_bridge.h>
1da177e4
LT
101#include <net/dst.h>
102#include <net/pkt_sched.h>
103#include <net/checksum.h>
104#include <linux/highmem.h>
105#include <linux/init.h>
106#include <linux/kmod.h>
107#include <linux/module.h>
108#include <linux/kallsyms.h>
109#include <linux/netpoll.h>
110#include <linux/rcupdate.h>
111#include <linux/delay.h>
d86b5e0e 112#include <linux/wireless.h>
1da177e4 113#include <net/iw_handler.h>
1da177e4 114#include <asm/current.h>
5bdb9886 115#include <linux/audit.h>
db217334 116#include <linux/dmaengine.h>
f6a78bfc 117#include <linux/err.h>
c7fa9d18 118#include <linux/ctype.h>
1da177e4 119
1da177e4
LT
120/*
121 * The list of packet types we will receive (as opposed to discard)
122 * and the routines to invoke.
123 *
124 * Why 16. Because with 16 the only overlap we get on a hash of the
125 * low nibble of the protocol value is RARP/SNAP/X.25.
126 *
127 * NOTE: That is no longer true with the addition of VLAN tags. Not
128 * sure which should go first, but I bet it won't make much
129 * difference if we are running VLANs. The good news is that
130 * this protocol won't be in the list unless compiled in, so
3041a069 131 * the average user (w/out VLANs) will not be adversely affected.
1da177e4
LT
132 * --BLG
133 *
134 * 0800 IP
135 * 8100 802.1Q VLAN
136 * 0001 802.3
137 * 0002 AX.25
138 * 0004 802.2
139 * 8035 RARP
140 * 0005 SNAP
141 * 0805 X.25
142 * 0806 ARP
143 * 8137 IPX
144 * 0009 Localtalk
145 * 86DD IPv6
146 */
147
148static DEFINE_SPINLOCK(ptype_lock);
6b2bedc3
SH
149static struct list_head ptype_base[16] __read_mostly; /* 16 way hashed list */
150static struct list_head ptype_all __read_mostly; /* Taps */
1da177e4 151
db217334
CL
152#ifdef CONFIG_NET_DMA
153static struct dma_client *net_dma_client;
154static unsigned int net_dma_count;
155static spinlock_t net_dma_event_lock;
156#endif
157
1da177e4 158/*
3041a069 159 * The @dev_base list is protected by @dev_base_lock and the rtnl
1da177e4
LT
160 * semaphore.
161 *
162 * Pure readers hold dev_base_lock for reading.
163 *
164 * Writers must hold the rtnl semaphore while they loop through the
165 * dev_base list, and hold dev_base_lock for writing when they do the
166 * actual updates. This allows pure readers to access the list even
167 * while a writer is preparing to update it.
168 *
169 * To put it another way, dev_base_lock is held for writing only to
170 * protect against pure readers; the rtnl semaphore provides the
171 * protection against other writers.
172 *
173 * See, for example usages, register_netdevice() and
174 * unregister_netdevice(), which must be called with the rtnl
175 * semaphore held.
176 */
177struct net_device *dev_base;
178static struct net_device **dev_tail = &dev_base;
179DEFINE_RWLOCK(dev_base_lock);
180
181EXPORT_SYMBOL(dev_base);
182EXPORT_SYMBOL(dev_base_lock);
183
184#define NETDEV_HASHBITS 8
185static struct hlist_head dev_name_head[1<<NETDEV_HASHBITS];
186static struct hlist_head dev_index_head[1<<NETDEV_HASHBITS];
187
188static inline struct hlist_head *dev_name_hash(const char *name)
189{
190 unsigned hash = full_name_hash(name, strnlen(name, IFNAMSIZ));
191 return &dev_name_head[hash & ((1<<NETDEV_HASHBITS)-1)];
192}
193
194static inline struct hlist_head *dev_index_hash(int ifindex)
195{
196 return &dev_index_head[ifindex & ((1<<NETDEV_HASHBITS)-1)];
197}
198
199/*
200 * Our notifier list
201 */
202
f07d5b94 203static RAW_NOTIFIER_HEAD(netdev_chain);
1da177e4
LT
204
205/*
206 * Device drivers call our routines to queue packets here. We empty the
207 * queue in the local softnet handler.
208 */
31aa02c5 209DEFINE_PER_CPU(struct softnet_data, softnet_data) = { NULL };
1da177e4
LT
210
211#ifdef CONFIG_SYSFS
212extern int netdev_sysfs_init(void);
213extern int netdev_register_sysfs(struct net_device *);
214extern void netdev_unregister_sysfs(struct net_device *);
215#else
216#define netdev_sysfs_init() (0)
217#define netdev_register_sysfs(dev) (0)
218#define netdev_unregister_sysfs(dev) do { } while(0)
219#endif
220
221
222/*******************************************************************************
223
224 Protocol management and registration routines
225
226*******************************************************************************/
227
228/*
229 * For efficiency
230 */
231
60481264 232static int netdev_nit;
1da177e4
LT
233
234/*
235 * Add a protocol ID to the list. Now that the input handler is
236 * smarter we can dispense with all the messy stuff that used to be
237 * here.
238 *
239 * BEWARE!!! Protocol handlers, mangling input packets,
240 * MUST BE last in hash buckets and checking protocol handlers
241 * MUST start from promiscuous ptype_all chain in net_bh.
242 * It is true now, do not change it.
243 * Explanation follows: if protocol handler, mangling packet, will
244 * be the first on list, it is not able to sense, that packet
245 * is cloned and should be copied-on-write, so that it will
246 * change it and subsequent readers will get broken packet.
247 * --ANK (980803)
248 */
249
250/**
251 * dev_add_pack - add packet handler
252 * @pt: packet type declaration
253 *
254 * Add a protocol handler to the networking stack. The passed &packet_type
255 * is linked into kernel lists and may not be freed until it has been
256 * removed from the kernel lists.
257 *
4ec93edb 258 * This call does not sleep therefore it can not
1da177e4
LT
259 * guarantee all CPU's that are in middle of receiving packets
260 * will see the new packet type (until the next received packet).
261 */
262
263void dev_add_pack(struct packet_type *pt)
264{
265 int hash;
266
267 spin_lock_bh(&ptype_lock);
268 if (pt->type == htons(ETH_P_ALL)) {
269 netdev_nit++;
270 list_add_rcu(&pt->list, &ptype_all);
271 } else {
272 hash = ntohs(pt->type) & 15;
273 list_add_rcu(&pt->list, &ptype_base[hash]);
274 }
275 spin_unlock_bh(&ptype_lock);
276}
277
1da177e4
LT
278/**
279 * __dev_remove_pack - remove packet handler
280 * @pt: packet type declaration
281 *
282 * Remove a protocol handler that was previously added to the kernel
283 * protocol handlers by dev_add_pack(). The passed &packet_type is removed
284 * from the kernel lists and can be freed or reused once this function
4ec93edb 285 * returns.
1da177e4
LT
286 *
287 * The packet type might still be in use by receivers
288 * and must not be freed until after all the CPU's have gone
289 * through a quiescent state.
290 */
291void __dev_remove_pack(struct packet_type *pt)
292{
293 struct list_head *head;
294 struct packet_type *pt1;
295
296 spin_lock_bh(&ptype_lock);
297
298 if (pt->type == htons(ETH_P_ALL)) {
299 netdev_nit--;
300 head = &ptype_all;
301 } else
302 head = &ptype_base[ntohs(pt->type) & 15];
303
304 list_for_each_entry(pt1, head, list) {
305 if (pt == pt1) {
306 list_del_rcu(&pt->list);
307 goto out;
308 }
309 }
310
311 printk(KERN_WARNING "dev_remove_pack: %p not found.\n", pt);
312out:
313 spin_unlock_bh(&ptype_lock);
314}
315/**
316 * dev_remove_pack - remove packet handler
317 * @pt: packet type declaration
318 *
319 * Remove a protocol handler that was previously added to the kernel
320 * protocol handlers by dev_add_pack(). The passed &packet_type is removed
321 * from the kernel lists and can be freed or reused once this function
322 * returns.
323 *
324 * This call sleeps to guarantee that no CPU is looking at the packet
325 * type after return.
326 */
327void dev_remove_pack(struct packet_type *pt)
328{
329 __dev_remove_pack(pt);
4ec93edb 330
1da177e4
LT
331 synchronize_net();
332}
333
334/******************************************************************************
335
336 Device Boot-time Settings Routines
337
338*******************************************************************************/
339
340/* Boot time configuration table */
341static struct netdev_boot_setup dev_boot_setup[NETDEV_BOOT_SETUP_MAX];
342
343/**
344 * netdev_boot_setup_add - add new setup entry
345 * @name: name of the device
346 * @map: configured settings for the device
347 *
348 * Adds new setup entry to the dev_boot_setup list. The function
349 * returns 0 on error and 1 on success. This is a generic routine to
350 * all netdevices.
351 */
352static int netdev_boot_setup_add(char *name, struct ifmap *map)
353{
354 struct netdev_boot_setup *s;
355 int i;
356
357 s = dev_boot_setup;
358 for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) {
359 if (s[i].name[0] == '\0' || s[i].name[0] == ' ') {
360 memset(s[i].name, 0, sizeof(s[i].name));
361 strcpy(s[i].name, name);
362 memcpy(&s[i].map, map, sizeof(s[i].map));
363 break;
364 }
365 }
366
367 return i >= NETDEV_BOOT_SETUP_MAX ? 0 : 1;
368}
369
370/**
371 * netdev_boot_setup_check - check boot time settings
372 * @dev: the netdevice
373 *
374 * Check boot time settings for the device.
375 * The found settings are set for the device to be used
376 * later in the device probing.
377 * Returns 0 if no settings found, 1 if they are.
378 */
379int netdev_boot_setup_check(struct net_device *dev)
380{
381 struct netdev_boot_setup *s = dev_boot_setup;
382 int i;
383
384 for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) {
385 if (s[i].name[0] != '\0' && s[i].name[0] != ' ' &&
386 !strncmp(dev->name, s[i].name, strlen(s[i].name))) {
387 dev->irq = s[i].map.irq;
388 dev->base_addr = s[i].map.base_addr;
389 dev->mem_start = s[i].map.mem_start;
390 dev->mem_end = s[i].map.mem_end;
391 return 1;
392 }
393 }
394 return 0;
395}
396
397
398/**
399 * netdev_boot_base - get address from boot time settings
400 * @prefix: prefix for network device
401 * @unit: id for network device
402 *
403 * Check boot time settings for the base address of device.
404 * The found settings are set for the device to be used
405 * later in the device probing.
406 * Returns 0 if no settings found.
407 */
408unsigned long netdev_boot_base(const char *prefix, int unit)
409{
410 const struct netdev_boot_setup *s = dev_boot_setup;
411 char name[IFNAMSIZ];
412 int i;
413
414 sprintf(name, "%s%d", prefix, unit);
415
416 /*
417 * If device already registered then return base of 1
418 * to indicate not to probe for this interface
419 */
420 if (__dev_get_by_name(name))
421 return 1;
422
423 for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++)
424 if (!strcmp(name, s[i].name))
425 return s[i].map.base_addr;
426 return 0;
427}
428
429/*
430 * Saves at boot time configured settings for any netdevice.
431 */
432int __init netdev_boot_setup(char *str)
433{
434 int ints[5];
435 struct ifmap map;
436
437 str = get_options(str, ARRAY_SIZE(ints), ints);
438 if (!str || !*str)
439 return 0;
440
441 /* Save settings */
442 memset(&map, 0, sizeof(map));
443 if (ints[0] > 0)
444 map.irq = ints[1];
445 if (ints[0] > 1)
446 map.base_addr = ints[2];
447 if (ints[0] > 2)
448 map.mem_start = ints[3];
449 if (ints[0] > 3)
450 map.mem_end = ints[4];
451
452 /* Add new entry to the list */
453 return netdev_boot_setup_add(str, &map);
454}
455
456__setup("netdev=", netdev_boot_setup);
457
458/*******************************************************************************
459
460 Device Interface Subroutines
461
462*******************************************************************************/
463
464/**
465 * __dev_get_by_name - find a device by its name
466 * @name: name to find
467 *
468 * Find an interface by name. Must be called under RTNL semaphore
469 * or @dev_base_lock. If the name is found a pointer to the device
470 * is returned. If the name is not found then %NULL is returned. The
471 * reference counters are not incremented so the caller must be
472 * careful with locks.
473 */
474
475struct net_device *__dev_get_by_name(const char *name)
476{
477 struct hlist_node *p;
478
479 hlist_for_each(p, dev_name_hash(name)) {
480 struct net_device *dev
481 = hlist_entry(p, struct net_device, name_hlist);
482 if (!strncmp(dev->name, name, IFNAMSIZ))
483 return dev;
484 }
485 return NULL;
486}
487
488/**
489 * dev_get_by_name - find a device by its name
490 * @name: name to find
491 *
492 * Find an interface by name. This can be called from any
493 * context and does its own locking. The returned handle has
494 * the usage count incremented and the caller must use dev_put() to
495 * release it when it is no longer needed. %NULL is returned if no
496 * matching device is found.
497 */
498
499struct net_device *dev_get_by_name(const char *name)
500{
501 struct net_device *dev;
502
503 read_lock(&dev_base_lock);
504 dev = __dev_get_by_name(name);
505 if (dev)
506 dev_hold(dev);
507 read_unlock(&dev_base_lock);
508 return dev;
509}
510
511/**
512 * __dev_get_by_index - find a device by its ifindex
513 * @ifindex: index of device
514 *
515 * Search for an interface by index. Returns %NULL if the device
516 * is not found or a pointer to the device. The device has not
517 * had its reference counter increased so the caller must be careful
518 * about locking. The caller must hold either the RTNL semaphore
519 * or @dev_base_lock.
520 */
521
522struct net_device *__dev_get_by_index(int ifindex)
523{
524 struct hlist_node *p;
525
526 hlist_for_each(p, dev_index_hash(ifindex)) {
527 struct net_device *dev
528 = hlist_entry(p, struct net_device, index_hlist);
529 if (dev->ifindex == ifindex)
530 return dev;
531 }
532 return NULL;
533}
534
535
536/**
537 * dev_get_by_index - find a device by its ifindex
538 * @ifindex: index of device
539 *
540 * Search for an interface by index. Returns NULL if the device
541 * is not found or a pointer to the device. The device returned has
542 * had a reference added and the pointer is safe until the user calls
543 * dev_put to indicate they have finished with it.
544 */
545
546struct net_device *dev_get_by_index(int ifindex)
547{
548 struct net_device *dev;
549
550 read_lock(&dev_base_lock);
551 dev = __dev_get_by_index(ifindex);
552 if (dev)
553 dev_hold(dev);
554 read_unlock(&dev_base_lock);
555 return dev;
556}
557
558/**
559 * dev_getbyhwaddr - find a device by its hardware address
560 * @type: media type of device
561 * @ha: hardware address
562 *
563 * Search for an interface by MAC address. Returns NULL if the device
564 * is not found or a pointer to the device. The caller must hold the
565 * rtnl semaphore. The returned device has not had its ref count increased
566 * and the caller must therefore be careful about locking
567 *
568 * BUGS:
569 * If the API was consistent this would be __dev_get_by_hwaddr
570 */
571
572struct net_device *dev_getbyhwaddr(unsigned short type, char *ha)
573{
574 struct net_device *dev;
575
576 ASSERT_RTNL();
577
578 for (dev = dev_base; dev; dev = dev->next)
579 if (dev->type == type &&
580 !memcmp(dev->dev_addr, ha, dev->addr_len))
581 break;
582 return dev;
583}
584
cf309e3f
JF
585EXPORT_SYMBOL(dev_getbyhwaddr);
586
1da177e4
LT
587struct net_device *dev_getfirstbyhwtype(unsigned short type)
588{
589 struct net_device *dev;
590
591 rtnl_lock();
592 for (dev = dev_base; dev; dev = dev->next) {
593 if (dev->type == type) {
594 dev_hold(dev);
595 break;
596 }
597 }
598 rtnl_unlock();
599 return dev;
600}
601
602EXPORT_SYMBOL(dev_getfirstbyhwtype);
603
604/**
605 * dev_get_by_flags - find any device with given flags
606 * @if_flags: IFF_* values
607 * @mask: bitmask of bits in if_flags to check
608 *
609 * Search for any interface with the given flags. Returns NULL if a device
4ec93edb 610 * is not found or a pointer to the device. The device returned has
1da177e4
LT
611 * had a reference added and the pointer is safe until the user calls
612 * dev_put to indicate they have finished with it.
613 */
614
615struct net_device * dev_get_by_flags(unsigned short if_flags, unsigned short mask)
616{
617 struct net_device *dev;
618
619 read_lock(&dev_base_lock);
620 for (dev = dev_base; dev != NULL; dev = dev->next) {
621 if (((dev->flags ^ if_flags) & mask) == 0) {
622 dev_hold(dev);
623 break;
624 }
625 }
626 read_unlock(&dev_base_lock);
627 return dev;
628}
629
630/**
631 * dev_valid_name - check if name is okay for network device
632 * @name: name string
633 *
634 * Network device names need to be valid file names to
c7fa9d18
DM
635 * to allow sysfs to work. We also disallow any kind of
636 * whitespace.
1da177e4 637 */
c2373ee9 638int dev_valid_name(const char *name)
1da177e4 639{
c7fa9d18
DM
640 if (*name == '\0')
641 return 0;
b6fe17d6
SH
642 if (strlen(name) >= IFNAMSIZ)
643 return 0;
c7fa9d18
DM
644 if (!strcmp(name, ".") || !strcmp(name, ".."))
645 return 0;
646
647 while (*name) {
648 if (*name == '/' || isspace(*name))
649 return 0;
650 name++;
651 }
652 return 1;
1da177e4
LT
653}
654
655/**
656 * dev_alloc_name - allocate a name for a device
657 * @dev: device
658 * @name: name format string
659 *
660 * Passed a format string - eg "lt%d" it will try and find a suitable
3041a069
SH
661 * id. It scans list of devices to build up a free map, then chooses
662 * the first empty slot. The caller must hold the dev_base or rtnl lock
663 * while allocating the name and adding the device in order to avoid
664 * duplicates.
665 * Limited to bits_per_byte * page size devices (ie 32K on most platforms).
666 * Returns the number of the unit assigned or a negative errno code.
1da177e4
LT
667 */
668
669int dev_alloc_name(struct net_device *dev, const char *name)
670{
671 int i = 0;
672 char buf[IFNAMSIZ];
673 const char *p;
674 const int max_netdevices = 8*PAGE_SIZE;
675 long *inuse;
676 struct net_device *d;
677
678 p = strnchr(name, IFNAMSIZ-1, '%');
679 if (p) {
680 /*
681 * Verify the string as this thing may have come from
682 * the user. There must be either one "%d" and no other "%"
683 * characters.
684 */
685 if (p[1] != 'd' || strchr(p + 2, '%'))
686 return -EINVAL;
687
688 /* Use one page as a bit array of possible slots */
689 inuse = (long *) get_zeroed_page(GFP_ATOMIC);
690 if (!inuse)
691 return -ENOMEM;
692
693 for (d = dev_base; d; d = d->next) {
694 if (!sscanf(d->name, name, &i))
695 continue;
696 if (i < 0 || i >= max_netdevices)
697 continue;
698
699 /* avoid cases where sscanf is not exact inverse of printf */
700 snprintf(buf, sizeof(buf), name, i);
701 if (!strncmp(buf, d->name, IFNAMSIZ))
702 set_bit(i, inuse);
703 }
704
705 i = find_first_zero_bit(inuse, max_netdevices);
706 free_page((unsigned long) inuse);
707 }
708
709 snprintf(buf, sizeof(buf), name, i);
710 if (!__dev_get_by_name(buf)) {
711 strlcpy(dev->name, buf, IFNAMSIZ);
712 return i;
713 }
714
715 /* It is possible to run out of possible slots
716 * when the name is long and there isn't enough space left
717 * for the digits, or if all bits are used.
718 */
719 return -ENFILE;
720}
721
722
723/**
724 * dev_change_name - change name of a device
725 * @dev: device
726 * @newname: name (or format string) must be at least IFNAMSIZ
727 *
728 * Change name of a device, can pass format strings "eth%d".
729 * for wildcarding.
730 */
731int dev_change_name(struct net_device *dev, char *newname)
732{
733 int err = 0;
734
735 ASSERT_RTNL();
736
737 if (dev->flags & IFF_UP)
738 return -EBUSY;
739
740 if (!dev_valid_name(newname))
741 return -EINVAL;
742
743 if (strchr(newname, '%')) {
744 err = dev_alloc_name(dev, newname);
745 if (err < 0)
746 return err;
747 strcpy(newname, dev->name);
748 }
749 else if (__dev_get_by_name(newname))
750 return -EEXIST;
751 else
752 strlcpy(dev->name, newname, IFNAMSIZ);
753
92749821
EB
754 device_rename(&dev->dev, dev->name);
755 hlist_del(&dev->name_hlist);
756 hlist_add_head(&dev->name_hlist, dev_name_hash(dev->name));
757 raw_notifier_call_chain(&netdev_chain, NETDEV_CHANGENAME, dev);
1da177e4
LT
758
759 return err;
760}
761
d8a33ac4 762/**
3041a069 763 * netdev_features_change - device changes features
d8a33ac4
SH
764 * @dev: device to cause notification
765 *
766 * Called to indicate a device has changed features.
767 */
768void netdev_features_change(struct net_device *dev)
769{
f07d5b94 770 raw_notifier_call_chain(&netdev_chain, NETDEV_FEAT_CHANGE, dev);
d8a33ac4
SH
771}
772EXPORT_SYMBOL(netdev_features_change);
773
1da177e4
LT
774/**
775 * netdev_state_change - device changes state
776 * @dev: device to cause notification
777 *
778 * Called to indicate a device has changed state. This function calls
779 * the notifier chains for netdev_chain and sends a NEWLINK message
780 * to the routing socket.
781 */
782void netdev_state_change(struct net_device *dev)
783{
784 if (dev->flags & IFF_UP) {
f07d5b94 785 raw_notifier_call_chain(&netdev_chain,
e041c683 786 NETDEV_CHANGE, dev);
1da177e4
LT
787 rtmsg_ifinfo(RTM_NEWLINK, dev, 0);
788 }
789}
790
791/**
792 * dev_load - load a network module
793 * @name: name of interface
794 *
795 * If a network interface is not present and the process has suitable
796 * privileges this function loads the module. If module loading is not
797 * available in this kernel then it becomes a nop.
798 */
799
800void dev_load(const char *name)
801{
4ec93edb 802 struct net_device *dev;
1da177e4
LT
803
804 read_lock(&dev_base_lock);
805 dev = __dev_get_by_name(name);
806 read_unlock(&dev_base_lock);
807
808 if (!dev && capable(CAP_SYS_MODULE))
809 request_module("%s", name);
810}
811
812static int default_rebuild_header(struct sk_buff *skb)
813{
814 printk(KERN_DEBUG "%s: default_rebuild_header called -- BUG!\n",
815 skb->dev ? skb->dev->name : "NULL!!!");
816 kfree_skb(skb);
817 return 1;
818}
819
1da177e4
LT
820/**
821 * dev_open - prepare an interface for use.
822 * @dev: device to open
823 *
824 * Takes a device from down to up state. The device's private open
825 * function is invoked and then the multicast lists are loaded. Finally
826 * the device is moved into the up state and a %NETDEV_UP message is
827 * sent to the netdev notifier chain.
828 *
829 * Calling this function on an active interface is a nop. On a failure
830 * a negative errno code is returned.
831 */
832int dev_open(struct net_device *dev)
833{
834 int ret = 0;
835
836 /*
837 * Is it already up?
838 */
839
840 if (dev->flags & IFF_UP)
841 return 0;
842
843 /*
844 * Is it even present?
845 */
846 if (!netif_device_present(dev))
847 return -ENODEV;
848
849 /*
850 * Call device private open method
851 */
852 set_bit(__LINK_STATE_START, &dev->state);
853 if (dev->open) {
854 ret = dev->open(dev);
855 if (ret)
856 clear_bit(__LINK_STATE_START, &dev->state);
857 }
858
4ec93edb 859 /*
1da177e4
LT
860 * If it went open OK then:
861 */
862
863 if (!ret) {
864 /*
865 * Set the flags.
866 */
867 dev->flags |= IFF_UP;
868
869 /*
870 * Initialize multicasting status
871 */
872 dev_mc_upload(dev);
873
874 /*
875 * Wakeup transmit queue engine
876 */
877 dev_activate(dev);
878
879 /*
880 * ... and announce new interface.
881 */
f07d5b94 882 raw_notifier_call_chain(&netdev_chain, NETDEV_UP, dev);
1da177e4
LT
883 }
884 return ret;
885}
886
887/**
888 * dev_close - shutdown an interface.
889 * @dev: device to shutdown
890 *
891 * This function moves an active device into down state. A
892 * %NETDEV_GOING_DOWN is sent to the netdev notifier chain. The device
893 * is then deactivated and finally a %NETDEV_DOWN is sent to the notifier
894 * chain.
895 */
896int dev_close(struct net_device *dev)
897{
898 if (!(dev->flags & IFF_UP))
899 return 0;
900
901 /*
902 * Tell people we are going down, so that they can
903 * prepare to death, when device is still operating.
904 */
f07d5b94 905 raw_notifier_call_chain(&netdev_chain, NETDEV_GOING_DOWN, dev);
1da177e4
LT
906
907 dev_deactivate(dev);
908
909 clear_bit(__LINK_STATE_START, &dev->state);
910
911 /* Synchronize to scheduled poll. We cannot touch poll list,
912 * it can be even on different cpu. So just clear netif_running(),
913 * and wait when poll really will happen. Actually, the best place
914 * for this is inside dev->stop() after device stopped its irq
915 * engine, but this requires more changes in devices. */
916
917 smp_mb__after_clear_bit(); /* Commit netif_running(). */
918 while (test_bit(__LINK_STATE_RX_SCHED, &dev->state)) {
919 /* No hurry. */
6192b54b 920 msleep(1);
1da177e4
LT
921 }
922
923 /*
924 * Call the device specific close. This cannot fail.
925 * Only if device is UP
926 *
927 * We allow it to be called even after a DETACH hot-plug
928 * event.
929 */
930 if (dev->stop)
931 dev->stop(dev);
932
933 /*
934 * Device is now down.
935 */
936
937 dev->flags &= ~IFF_UP;
938
939 /*
940 * Tell people we are down
941 */
f07d5b94 942 raw_notifier_call_chain(&netdev_chain, NETDEV_DOWN, dev);
1da177e4
LT
943
944 return 0;
945}
946
947
948/*
949 * Device change register/unregister. These are not inline or static
950 * as we export them to the world.
951 */
952
953/**
954 * register_netdevice_notifier - register a network notifier block
955 * @nb: notifier
956 *
957 * Register a notifier to be called when network device events occur.
958 * The notifier passed is linked into the kernel structures and must
959 * not be reused until it has been unregistered. A negative errno code
960 * is returned on a failure.
961 *
962 * When registered all registration and up events are replayed
4ec93edb 963 * to the new notifier to allow device to have a race free
1da177e4
LT
964 * view of the network device list.
965 */
966
967int register_netdevice_notifier(struct notifier_block *nb)
968{
969 struct net_device *dev;
970 int err;
971
972 rtnl_lock();
f07d5b94 973 err = raw_notifier_chain_register(&netdev_chain, nb);
1da177e4
LT
974 if (!err) {
975 for (dev = dev_base; dev; dev = dev->next) {
976 nb->notifier_call(nb, NETDEV_REGISTER, dev);
977
4ec93edb 978 if (dev->flags & IFF_UP)
1da177e4
LT
979 nb->notifier_call(nb, NETDEV_UP, dev);
980 }
981 }
982 rtnl_unlock();
983 return err;
984}
985
986/**
987 * unregister_netdevice_notifier - unregister a network notifier block
988 * @nb: notifier
989 *
990 * Unregister a notifier previously registered by
991 * register_netdevice_notifier(). The notifier is unlinked into the
992 * kernel structures and may then be reused. A negative errno code
993 * is returned on a failure.
994 */
995
996int unregister_netdevice_notifier(struct notifier_block *nb)
997{
9f514950
HX
998 int err;
999
1000 rtnl_lock();
f07d5b94 1001 err = raw_notifier_chain_unregister(&netdev_chain, nb);
9f514950
HX
1002 rtnl_unlock();
1003 return err;
1da177e4
LT
1004}
1005
1006/**
1007 * call_netdevice_notifiers - call all network notifier blocks
1008 * @val: value passed unmodified to notifier function
1009 * @v: pointer passed unmodified to notifier function
1010 *
1011 * Call all network notifier blocks. Parameters and return value
f07d5b94 1012 * are as for raw_notifier_call_chain().
1da177e4
LT
1013 */
1014
1015int call_netdevice_notifiers(unsigned long val, void *v)
1016{
f07d5b94 1017 return raw_notifier_call_chain(&netdev_chain, val, v);
1da177e4
LT
1018}
1019
1020/* When > 0 there are consumers of rx skb time stamps */
1021static atomic_t netstamp_needed = ATOMIC_INIT(0);
1022
1023void net_enable_timestamp(void)
1024{
1025 atomic_inc(&netstamp_needed);
1026}
1027
1028void net_disable_timestamp(void)
1029{
1030 atomic_dec(&netstamp_needed);
1031}
1032
a61bbcf2 1033static inline void net_timestamp(struct sk_buff *skb)
1da177e4
LT
1034{
1035 if (atomic_read(&netstamp_needed))
a61bbcf2 1036 __net_timestamp(skb);
b7aa0bf7
ED
1037 else
1038 skb->tstamp.tv64 = 0;
1da177e4
LT
1039}
1040
1041/*
1042 * Support routine. Sends outgoing frames to any network
1043 * taps currently in use.
1044 */
1045
f6a78bfc 1046static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
1da177e4
LT
1047{
1048 struct packet_type *ptype;
a61bbcf2
PM
1049
1050 net_timestamp(skb);
1da177e4
LT
1051
1052 rcu_read_lock();
1053 list_for_each_entry_rcu(ptype, &ptype_all, list) {
1054 /* Never send packets back to the socket
1055 * they originated from - MvS (miquels@drinkel.ow.org)
1056 */
1057 if ((ptype->dev == dev || !ptype->dev) &&
1058 (ptype->af_packet_priv == NULL ||
1059 (struct sock *)ptype->af_packet_priv != skb->sk)) {
1060 struct sk_buff *skb2= skb_clone(skb, GFP_ATOMIC);
1061 if (!skb2)
1062 break;
1063
1064 /* skb->nh should be correctly
1065 set by sender, so that the second statement is
1066 just protection against buggy protocols.
1067 */
459a98ed 1068 skb_reset_mac_header(skb2);
1da177e4 1069
d56f90a7 1070 if (skb_network_header(skb2) < skb2->data ||
27a884dc 1071 skb2->network_header > skb2->tail) {
1da177e4
LT
1072 if (net_ratelimit())
1073 printk(KERN_CRIT "protocol %04x is "
1074 "buggy, dev %s\n",
1075 skb2->protocol, dev->name);
c1d2bbe1 1076 skb_reset_network_header(skb2);
1da177e4
LT
1077 }
1078
b0e380b1 1079 skb2->transport_header = skb2->network_header;
1da177e4 1080 skb2->pkt_type = PACKET_OUTGOING;
f2ccd8fa 1081 ptype->func(skb2, skb->dev, ptype, skb->dev);
1da177e4
LT
1082 }
1083 }
1084 rcu_read_unlock();
1085}
1086
56079431
DV
1087
1088void __netif_schedule(struct net_device *dev)
1089{
1090 if (!test_and_set_bit(__LINK_STATE_SCHED, &dev->state)) {
1091 unsigned long flags;
1092 struct softnet_data *sd;
1093
1094 local_irq_save(flags);
1095 sd = &__get_cpu_var(softnet_data);
1096 dev->next_sched = sd->output_queue;
1097 sd->output_queue = dev;
1098 raise_softirq_irqoff(NET_TX_SOFTIRQ);
1099 local_irq_restore(flags);
1100 }
1101}
1102EXPORT_SYMBOL(__netif_schedule);
1103
1104void __netif_rx_schedule(struct net_device *dev)
1105{
1106 unsigned long flags;
1107
1108 local_irq_save(flags);
1109 dev_hold(dev);
1110 list_add_tail(&dev->poll_list, &__get_cpu_var(softnet_data).poll_list);
1111 if (dev->quota < 0)
1112 dev->quota += dev->weight;
1113 else
1114 dev->quota = dev->weight;
1115 __raise_softirq_irqoff(NET_RX_SOFTIRQ);
1116 local_irq_restore(flags);
1117}
1118EXPORT_SYMBOL(__netif_rx_schedule);
1119
1120void dev_kfree_skb_any(struct sk_buff *skb)
1121{
1122 if (in_irq() || irqs_disabled())
1123 dev_kfree_skb_irq(skb);
1124 else
1125 dev_kfree_skb(skb);
1126}
1127EXPORT_SYMBOL(dev_kfree_skb_any);
1128
1129
1130/* Hot-plugging. */
1131void netif_device_detach(struct net_device *dev)
1132{
1133 if (test_and_clear_bit(__LINK_STATE_PRESENT, &dev->state) &&
1134 netif_running(dev)) {
1135 netif_stop_queue(dev);
1136 }
1137}
1138EXPORT_SYMBOL(netif_device_detach);
1139
1140void netif_device_attach(struct net_device *dev)
1141{
1142 if (!test_and_set_bit(__LINK_STATE_PRESENT, &dev->state) &&
1143 netif_running(dev)) {
1144 netif_wake_queue(dev);
4ec93edb 1145 __netdev_watchdog_up(dev);
56079431
DV
1146 }
1147}
1148EXPORT_SYMBOL(netif_device_attach);
1149
1150
1da177e4
LT
1151/*
1152 * Invalidate hardware checksum when packet is to be mangled, and
1153 * complete checksum manually on outgoing path.
1154 */
84fa7933 1155int skb_checksum_help(struct sk_buff *skb)
1da177e4 1156{
d3bc23e7 1157 __wsum csum;
ea2ae17d 1158 int ret = 0, offset = skb_transport_offset(skb);
1da177e4 1159
84fa7933 1160 if (skb->ip_summed == CHECKSUM_COMPLETE)
a430a43d
HX
1161 goto out_set_summed;
1162
1163 if (unlikely(skb_shinfo(skb)->gso_size)) {
a430a43d
HX
1164 /* Let GSO fix up the checksum. */
1165 goto out_set_summed;
1da177e4
LT
1166 }
1167
1168 if (skb_cloned(skb)) {
1169 ret = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
1170 if (ret)
1171 goto out;
1172 }
1173
09a62660 1174 BUG_ON(offset > (int)skb->len);
1da177e4
LT
1175 csum = skb_checksum(skb, offset, skb->len-offset, 0);
1176
27a884dc 1177 offset = skb->tail - skb->transport_header;
09a62660 1178 BUG_ON(offset <= 0);
ff1dcadb 1179 BUG_ON(skb->csum_offset + 2 > offset);
1da177e4 1180
9c70220b
ACM
1181 *(__sum16 *)(skb_transport_header(skb) +
1182 skb->csum_offset) = csum_fold(csum);
a430a43d 1183out_set_summed:
1da177e4 1184 skb->ip_summed = CHECKSUM_NONE;
4ec93edb 1185out:
1da177e4
LT
1186 return ret;
1187}
1188
f6a78bfc
HX
1189/**
1190 * skb_gso_segment - Perform segmentation on skb.
1191 * @skb: buffer to segment
576a30eb 1192 * @features: features for the output path (see dev->features)
f6a78bfc
HX
1193 *
1194 * This function segments the given skb and returns a list of segments.
576a30eb
HX
1195 *
1196 * It may return NULL if the skb requires no segmentation. This is
1197 * only possible when GSO is used for verifying header integrity.
f6a78bfc 1198 */
576a30eb 1199struct sk_buff *skb_gso_segment(struct sk_buff *skb, int features)
f6a78bfc
HX
1200{
1201 struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT);
1202 struct packet_type *ptype;
252e3346 1203 __be16 type = skb->protocol;
a430a43d 1204 int err;
f6a78bfc
HX
1205
1206 BUG_ON(skb_shinfo(skb)->frag_list);
f6a78bfc 1207
459a98ed 1208 skb_reset_mac_header(skb);
b0e380b1 1209 skb->mac_len = skb->network_header - skb->mac_header;
f6a78bfc
HX
1210 __skb_pull(skb, skb->mac_len);
1211
84fa7933 1212 if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) {
a430a43d
HX
1213 if (skb_header_cloned(skb) &&
1214 (err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC)))
1215 return ERR_PTR(err);
1216 }
1217
f6a78bfc
HX
1218 rcu_read_lock();
1219 list_for_each_entry_rcu(ptype, &ptype_base[ntohs(type) & 15], list) {
1220 if (ptype->type == type && !ptype->dev && ptype->gso_segment) {
84fa7933 1221 if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) {
a430a43d
HX
1222 err = ptype->gso_send_check(skb);
1223 segs = ERR_PTR(err);
1224 if (err || skb_gso_ok(skb, features))
1225 break;
d56f90a7
ACM
1226 __skb_push(skb, (skb->data -
1227 skb_network_header(skb)));
a430a43d 1228 }
576a30eb 1229 segs = ptype->gso_segment(skb, features);
f6a78bfc
HX
1230 break;
1231 }
1232 }
1233 rcu_read_unlock();
1234
98e399f8 1235 __skb_push(skb, skb->data - skb_mac_header(skb));
576a30eb 1236
f6a78bfc
HX
1237 return segs;
1238}
1239
1240EXPORT_SYMBOL(skb_gso_segment);
1241
fb286bb2
HX
1242/* Take action when hardware reception checksum errors are detected. */
1243#ifdef CONFIG_BUG
1244void netdev_rx_csum_fault(struct net_device *dev)
1245{
1246 if (net_ratelimit()) {
4ec93edb 1247 printk(KERN_ERR "%s: hw csum failure.\n",
246a4212 1248 dev ? dev->name : "<unknown>");
fb286bb2
HX
1249 dump_stack();
1250 }
1251}
1252EXPORT_SYMBOL(netdev_rx_csum_fault);
1253#endif
1254
1da177e4
LT
1255/* Actually, we should eliminate this check as soon as we know, that:
1256 * 1. IOMMU is present and allows to map all the memory.
1257 * 2. No high memory really exists on this machine.
1258 */
1259
1260static inline int illegal_highdma(struct net_device *dev, struct sk_buff *skb)
1261{
3d3a8533 1262#ifdef CONFIG_HIGHMEM
1da177e4
LT
1263 int i;
1264
1265 if (dev->features & NETIF_F_HIGHDMA)
1266 return 0;
1267
1268 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
1269 if (PageHighMem(skb_shinfo(skb)->frags[i].page))
1270 return 1;
1271
3d3a8533 1272#endif
1da177e4
LT
1273 return 0;
1274}
1da177e4 1275
f6a78bfc
HX
1276struct dev_gso_cb {
1277 void (*destructor)(struct sk_buff *skb);
1278};
1279
1280#define DEV_GSO_CB(skb) ((struct dev_gso_cb *)(skb)->cb)
1281
1282static void dev_gso_skb_destructor(struct sk_buff *skb)
1283{
1284 struct dev_gso_cb *cb;
1285
1286 do {
1287 struct sk_buff *nskb = skb->next;
1288
1289 skb->next = nskb->next;
1290 nskb->next = NULL;
1291 kfree_skb(nskb);
1292 } while (skb->next);
1293
1294 cb = DEV_GSO_CB(skb);
1295 if (cb->destructor)
1296 cb->destructor(skb);
1297}
1298
1299/**
1300 * dev_gso_segment - Perform emulated hardware segmentation on skb.
1301 * @skb: buffer to segment
1302 *
1303 * This function segments the given skb and stores the list of segments
1304 * in skb->next.
1305 */
1306static int dev_gso_segment(struct sk_buff *skb)
1307{
1308 struct net_device *dev = skb->dev;
1309 struct sk_buff *segs;
576a30eb
HX
1310 int features = dev->features & ~(illegal_highdma(dev, skb) ?
1311 NETIF_F_SG : 0);
1312
1313 segs = skb_gso_segment(skb, features);
1314
1315 /* Verifying header integrity only. */
1316 if (!segs)
1317 return 0;
f6a78bfc 1318
f6a78bfc
HX
1319 if (unlikely(IS_ERR(segs)))
1320 return PTR_ERR(segs);
1321
1322 skb->next = segs;
1323 DEV_GSO_CB(skb)->destructor = skb->destructor;
1324 skb->destructor = dev_gso_skb_destructor;
1325
1326 return 0;
1327}
1328
1329int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev)
1330{
1331 if (likely(!skb->next)) {
1332 if (netdev_nit)
1333 dev_queue_xmit_nit(skb, dev);
1334
576a30eb
HX
1335 if (netif_needs_gso(dev, skb)) {
1336 if (unlikely(dev_gso_segment(skb)))
1337 goto out_kfree_skb;
1338 if (skb->next)
1339 goto gso;
1340 }
f6a78bfc 1341
576a30eb 1342 return dev->hard_start_xmit(skb, dev);
f6a78bfc
HX
1343 }
1344
576a30eb 1345gso:
f6a78bfc
HX
1346 do {
1347 struct sk_buff *nskb = skb->next;
1348 int rc;
1349
1350 skb->next = nskb->next;
1351 nskb->next = NULL;
1352 rc = dev->hard_start_xmit(nskb, dev);
1353 if (unlikely(rc)) {
f54d9e8d 1354 nskb->next = skb->next;
f6a78bfc
HX
1355 skb->next = nskb;
1356 return rc;
1357 }
f54d9e8d
MC
1358 if (unlikely(netif_queue_stopped(dev) && skb->next))
1359 return NETDEV_TX_BUSY;
f6a78bfc 1360 } while (skb->next);
4ec93edb 1361
f6a78bfc
HX
1362 skb->destructor = DEV_GSO_CB(skb)->destructor;
1363
1364out_kfree_skb:
1365 kfree_skb(skb);
1366 return 0;
1367}
1368
1da177e4
LT
1369#define HARD_TX_LOCK(dev, cpu) { \
1370 if ((dev->features & NETIF_F_LLTX) == 0) { \
932ff279 1371 netif_tx_lock(dev); \
1da177e4
LT
1372 } \
1373}
1374
1375#define HARD_TX_UNLOCK(dev) { \
1376 if ((dev->features & NETIF_F_LLTX) == 0) { \
932ff279 1377 netif_tx_unlock(dev); \
1da177e4
LT
1378 } \
1379}
1380
1381/**
1382 * dev_queue_xmit - transmit a buffer
1383 * @skb: buffer to transmit
1384 *
1385 * Queue a buffer for transmission to a network device. The caller must
1386 * have set the device and priority and built the buffer before calling
1387 * this function. The function can be called from an interrupt.
1388 *
1389 * A negative errno code is returned on a failure. A success does not
1390 * guarantee the frame will be transmitted as it may be dropped due
1391 * to congestion or traffic shaping.
af191367
BG
1392 *
1393 * -----------------------------------------------------------------------------------
1394 * I notice this method can also return errors from the queue disciplines,
1395 * including NET_XMIT_DROP, which is a positive value. So, errors can also
1396 * be positive.
1397 *
1398 * Regardless of the return value, the skb is consumed, so it is currently
1399 * difficult to retry a send to this method. (You can bump the ref count
1400 * before sending to hold a reference for retry if you are careful.)
1401 *
1402 * When calling this method, interrupts MUST be enabled. This is because
1403 * the BH enable code must have IRQs enabled so that it will not deadlock.
1404 * --BLG
1da177e4
LT
1405 */
1406
1407int dev_queue_xmit(struct sk_buff *skb)
1408{
1409 struct net_device *dev = skb->dev;
1410 struct Qdisc *q;
1411 int rc = -ENOMEM;
1412
f6a78bfc
HX
1413 /* GSO will handle the following emulations directly. */
1414 if (netif_needs_gso(dev, skb))
1415 goto gso;
1416
1da177e4
LT
1417 if (skb_shinfo(skb)->frag_list &&
1418 !(dev->features & NETIF_F_FRAGLIST) &&
364c6bad 1419 __skb_linearize(skb))
1da177e4
LT
1420 goto out_kfree_skb;
1421
1422 /* Fragmented skb is linearized if device does not support SG,
1423 * or if at least one of fragments is in highmem and device
1424 * does not support DMA from it.
1425 */
1426 if (skb_shinfo(skb)->nr_frags &&
1427 (!(dev->features & NETIF_F_SG) || illegal_highdma(dev, skb)) &&
364c6bad 1428 __skb_linearize(skb))
1da177e4
LT
1429 goto out_kfree_skb;
1430
1431 /* If packet is not checksummed and device does not support
1432 * checksumming for this protocol, complete checksumming here.
1433 */
84fa7933 1434 if (skb->ip_summed == CHECKSUM_PARTIAL &&
8648b305 1435 (!(dev->features & NETIF_F_GEN_CSUM) &&
1da177e4
LT
1436 (!(dev->features & NETIF_F_IP_CSUM) ||
1437 skb->protocol != htons(ETH_P_IP))))
4ec93edb
YH
1438 if (skb_checksum_help(skb))
1439 goto out_kfree_skb;
1da177e4 1440
f6a78bfc 1441gso:
2d7ceece
ED
1442 spin_lock_prefetch(&dev->queue_lock);
1443
4ec93edb
YH
1444 /* Disable soft irqs for various locks below. Also
1445 * stops preemption for RCU.
1da177e4 1446 */
4ec93edb 1447 rcu_read_lock_bh();
1da177e4 1448
4ec93edb
YH
1449 /* Updates of qdisc are serialized by queue_lock.
1450 * The struct Qdisc which is pointed to by qdisc is now a
1451 * rcu structure - it may be accessed without acquiring
1da177e4 1452 * a lock (but the structure may be stale.) The freeing of the
4ec93edb 1453 * qdisc will be deferred until it's known that there are no
1da177e4 1454 * more references to it.
4ec93edb
YH
1455 *
1456 * If the qdisc has an enqueue function, we still need to
1da177e4
LT
1457 * hold the queue_lock before calling it, since queue_lock
1458 * also serializes access to the device queue.
1459 */
1460
1461 q = rcu_dereference(dev->qdisc);
1462#ifdef CONFIG_NET_CLS_ACT
1463 skb->tc_verd = SET_TC_AT(skb->tc_verd,AT_EGRESS);
1464#endif
1465 if (q->enqueue) {
1466 /* Grab device queue */
1467 spin_lock(&dev->queue_lock);
85670cc1
PM
1468 q = dev->qdisc;
1469 if (q->enqueue) {
1470 rc = q->enqueue(skb, q);
1471 qdisc_run(dev);
1472 spin_unlock(&dev->queue_lock);
1da177e4 1473
85670cc1
PM
1474 rc = rc == NET_XMIT_BYPASS ? NET_XMIT_SUCCESS : rc;
1475 goto out;
1476 }
1da177e4 1477 spin_unlock(&dev->queue_lock);
1da177e4
LT
1478 }
1479
1480 /* The device has no queue. Common case for software devices:
1481 loopback, all the sorts of tunnels...
1482
932ff279
HX
1483 Really, it is unlikely that netif_tx_lock protection is necessary
1484 here. (f.e. loopback and IP tunnels are clean ignoring statistics
1da177e4
LT
1485 counters.)
1486 However, it is possible, that they rely on protection
1487 made by us here.
1488
1489 Check this and shot the lock. It is not prone from deadlocks.
1490 Either shot noqueue qdisc, it is even simpler 8)
1491 */
1492 if (dev->flags & IFF_UP) {
1493 int cpu = smp_processor_id(); /* ok because BHs are off */
1494
1495 if (dev->xmit_lock_owner != cpu) {
1496
1497 HARD_TX_LOCK(dev, cpu);
1498
1499 if (!netif_queue_stopped(dev)) {
1da177e4 1500 rc = 0;
f6a78bfc 1501 if (!dev_hard_start_xmit(skb, dev)) {
1da177e4
LT
1502 HARD_TX_UNLOCK(dev);
1503 goto out;
1504 }
1505 }
1506 HARD_TX_UNLOCK(dev);
1507 if (net_ratelimit())
1508 printk(KERN_CRIT "Virtual device %s asks to "
1509 "queue packet!\n", dev->name);
1510 } else {
1511 /* Recursion is detected! It is possible,
1512 * unfortunately */
1513 if (net_ratelimit())
1514 printk(KERN_CRIT "Dead loop on virtual device "
1515 "%s, fix it urgently!\n", dev->name);
1516 }
1517 }
1518
1519 rc = -ENETDOWN;
d4828d85 1520 rcu_read_unlock_bh();
1da177e4
LT
1521
1522out_kfree_skb:
1523 kfree_skb(skb);
1524 return rc;
1525out:
d4828d85 1526 rcu_read_unlock_bh();
1da177e4
LT
1527 return rc;
1528}
1529
1530
1531/*=======================================================================
1532 Receiver routines
1533 =======================================================================*/
1534
6b2bedc3
SH
1535int netdev_max_backlog __read_mostly = 1000;
1536int netdev_budget __read_mostly = 300;
1537int weight_p __read_mostly = 64; /* old backlog weight */
1da177e4
LT
1538
1539DEFINE_PER_CPU(struct netif_rx_stats, netdev_rx_stat) = { 0, };
1540
1541
1da177e4
LT
1542/**
1543 * netif_rx - post buffer to the network code
1544 * @skb: buffer to post
1545 *
1546 * This function receives a packet from a device driver and queues it for
1547 * the upper (protocol) levels to process. It always succeeds. The buffer
1548 * may be dropped during processing for congestion control or by the
1549 * protocol layers.
1550 *
1551 * return values:
1552 * NET_RX_SUCCESS (no congestion)
1553 * NET_RX_CN_LOW (low congestion)
1554 * NET_RX_CN_MOD (moderate congestion)
1555 * NET_RX_CN_HIGH (high congestion)
1556 * NET_RX_DROP (packet was dropped)
1557 *
1558 */
1559
1560int netif_rx(struct sk_buff *skb)
1561{
1da177e4
LT
1562 struct softnet_data *queue;
1563 unsigned long flags;
1564
1565 /* if netpoll wants it, pretend we never saw it */
1566 if (netpoll_rx(skb))
1567 return NET_RX_DROP;
1568
b7aa0bf7 1569 if (!skb->tstamp.tv64)
a61bbcf2 1570 net_timestamp(skb);
1da177e4
LT
1571
1572 /*
1573 * The code is rearranged so that the path is the most
1574 * short when CPU is congested, but is still operating.
1575 */
1576 local_irq_save(flags);
1da177e4
LT
1577 queue = &__get_cpu_var(softnet_data);
1578
1579 __get_cpu_var(netdev_rx_stat).total++;
1580 if (queue->input_pkt_queue.qlen <= netdev_max_backlog) {
1581 if (queue->input_pkt_queue.qlen) {
1da177e4
LT
1582enqueue:
1583 dev_hold(skb->dev);
1584 __skb_queue_tail(&queue->input_pkt_queue, skb);
1da177e4 1585 local_irq_restore(flags);
34008d8c 1586 return NET_RX_SUCCESS;
1da177e4
LT
1587 }
1588
1da177e4
LT
1589 netif_rx_schedule(&queue->backlog_dev);
1590 goto enqueue;
1591 }
1592
1da177e4
LT
1593 __get_cpu_var(netdev_rx_stat).dropped++;
1594 local_irq_restore(flags);
1595
1596 kfree_skb(skb);
1597 return NET_RX_DROP;
1598}
1599
1600int netif_rx_ni(struct sk_buff *skb)
1601{
1602 int err;
1603
1604 preempt_disable();
1605 err = netif_rx(skb);
1606 if (local_softirq_pending())
1607 do_softirq();
1608 preempt_enable();
1609
1610 return err;
1611}
1612
1613EXPORT_SYMBOL(netif_rx_ni);
1614
f2ccd8fa 1615static inline struct net_device *skb_bond(struct sk_buff *skb)
1da177e4
LT
1616{
1617 struct net_device *dev = skb->dev;
1618
8f903c70 1619 if (dev->master) {
7ea49ed7 1620 if (skb_bond_should_drop(skb)) {
8f903c70
JV
1621 kfree_skb(skb);
1622 return NULL;
1623 }
1da177e4 1624 skb->dev = dev->master;
8f903c70 1625 }
f2ccd8fa
DM
1626
1627 return dev;
1da177e4
LT
1628}
1629
1630static void net_tx_action(struct softirq_action *h)
1631{
1632 struct softnet_data *sd = &__get_cpu_var(softnet_data);
1633
1634 if (sd->completion_queue) {
1635 struct sk_buff *clist;
1636
1637 local_irq_disable();
1638 clist = sd->completion_queue;
1639 sd->completion_queue = NULL;
1640 local_irq_enable();
1641
1642 while (clist) {
1643 struct sk_buff *skb = clist;
1644 clist = clist->next;
1645
1646 BUG_TRAP(!atomic_read(&skb->users));
1647 __kfree_skb(skb);
1648 }
1649 }
1650
1651 if (sd->output_queue) {
1652 struct net_device *head;
1653
1654 local_irq_disable();
1655 head = sd->output_queue;
1656 sd->output_queue = NULL;
1657 local_irq_enable();
1658
1659 while (head) {
1660 struct net_device *dev = head;
1661 head = head->next_sched;
1662
1663 smp_mb__before_clear_bit();
1664 clear_bit(__LINK_STATE_SCHED, &dev->state);
1665
1666 if (spin_trylock(&dev->queue_lock)) {
1667 qdisc_run(dev);
1668 spin_unlock(&dev->queue_lock);
1669 } else {
1670 netif_schedule(dev);
1671 }
1672 }
1673 }
1674}
1675
6f05f629
SH
1676static inline int deliver_skb(struct sk_buff *skb,
1677 struct packet_type *pt_prev,
1678 struct net_device *orig_dev)
1da177e4
LT
1679{
1680 atomic_inc(&skb->users);
f2ccd8fa 1681 return pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
1da177e4
LT
1682}
1683
1684#if defined(CONFIG_BRIDGE) || defined (CONFIG_BRIDGE_MODULE)
1685int (*br_handle_frame_hook)(struct net_bridge_port *p, struct sk_buff **pskb);
1686struct net_bridge;
1687struct net_bridge_fdb_entry *(*br_fdb_get_hook)(struct net_bridge *br,
1688 unsigned char *addr);
1689void (*br_fdb_put_hook)(struct net_bridge_fdb_entry *ent);
1690
1691static __inline__ int handle_bridge(struct sk_buff **pskb,
f2ccd8fa
DM
1692 struct packet_type **pt_prev, int *ret,
1693 struct net_device *orig_dev)
1da177e4
LT
1694{
1695 struct net_bridge_port *port;
1696
1697 if ((*pskb)->pkt_type == PACKET_LOOPBACK ||
1698 (port = rcu_dereference((*pskb)->dev->br_port)) == NULL)
1699 return 0;
1700
1701 if (*pt_prev) {
f2ccd8fa 1702 *ret = deliver_skb(*pskb, *pt_prev, orig_dev);
1da177e4 1703 *pt_prev = NULL;
4ec93edb
YH
1704 }
1705
1da177e4
LT
1706 return br_handle_frame_hook(port, pskb);
1707}
1708#else
f2ccd8fa 1709#define handle_bridge(skb, pt_prev, ret, orig_dev) (0)
1da177e4
LT
1710#endif
1711
1712#ifdef CONFIG_NET_CLS_ACT
1713/* TODO: Maybe we should just force sch_ingress to be compiled in
1714 * when CONFIG_NET_CLS_ACT is? otherwise some useless instructions
1715 * a compare and 2 stores extra right now if we dont have it on
1716 * but have CONFIG_NET_CLS_ACT
4ec93edb 1717 * NOTE: This doesnt stop any functionality; if you dont have
1da177e4
LT
1718 * the ingress scheduler, you just cant add policies on ingress.
1719 *
1720 */
4ec93edb 1721static int ing_filter(struct sk_buff *skb)
1da177e4
LT
1722{
1723 struct Qdisc *q;
1724 struct net_device *dev = skb->dev;
1725 int result = TC_ACT_OK;
4ec93edb 1726
1da177e4
LT
1727 if (dev->qdisc_ingress) {
1728 __u32 ttl = (__u32) G_TC_RTTL(skb->tc_verd);
1729 if (MAX_RED_LOOP < ttl++) {
c01003c2
PM
1730 printk(KERN_WARNING "Redir loop detected Dropping packet (%d->%d)\n",
1731 skb->iif, skb->dev->ifindex);
1da177e4
LT
1732 return TC_ACT_SHOT;
1733 }
1734
1735 skb->tc_verd = SET_TC_RTTL(skb->tc_verd,ttl);
1736
1737 skb->tc_verd = SET_TC_AT(skb->tc_verd,AT_INGRESS);
86e65da9 1738
035832a2 1739 spin_lock(&dev->queue_lock);
1da177e4
LT
1740 if ((q = dev->qdisc_ingress) != NULL)
1741 result = q->enqueue(skb, q);
035832a2 1742 spin_unlock(&dev->queue_lock);
1da177e4
LT
1743
1744 }
1745
1746 return result;
1747}
1748#endif
1749
1750int netif_receive_skb(struct sk_buff *skb)
1751{
1752 struct packet_type *ptype, *pt_prev;
f2ccd8fa 1753 struct net_device *orig_dev;
1da177e4 1754 int ret = NET_RX_DROP;
252e3346 1755 __be16 type;
1da177e4
LT
1756
1757 /* if we've gotten here through NAPI, check netpoll */
1758 if (skb->dev->poll && netpoll_rx(skb))
1759 return NET_RX_DROP;
1760
b7aa0bf7 1761 if (!skb->tstamp.tv64)
a61bbcf2 1762 net_timestamp(skb);
1da177e4 1763
c01003c2
PM
1764 if (!skb->iif)
1765 skb->iif = skb->dev->ifindex;
86e65da9 1766
f2ccd8fa 1767 orig_dev = skb_bond(skb);
1da177e4 1768
8f903c70
JV
1769 if (!orig_dev)
1770 return NET_RX_DROP;
1771
1da177e4
LT
1772 __get_cpu_var(netdev_rx_stat).total++;
1773
c1d2bbe1 1774 skb_reset_network_header(skb);
badff6d0 1775 skb_reset_transport_header(skb);
b0e380b1 1776 skb->mac_len = skb->network_header - skb->mac_header;
1da177e4
LT
1777
1778 pt_prev = NULL;
1779
1780 rcu_read_lock();
1781
1782#ifdef CONFIG_NET_CLS_ACT
1783 if (skb->tc_verd & TC_NCLS) {
1784 skb->tc_verd = CLR_TC_NCLS(skb->tc_verd);
1785 goto ncls;
1786 }
1787#endif
1788
1789 list_for_each_entry_rcu(ptype, &ptype_all, list) {
1790 if (!ptype->dev || ptype->dev == skb->dev) {
4ec93edb 1791 if (pt_prev)
f2ccd8fa 1792 ret = deliver_skb(skb, pt_prev, orig_dev);
1da177e4
LT
1793 pt_prev = ptype;
1794 }
1795 }
1796
1797#ifdef CONFIG_NET_CLS_ACT
1798 if (pt_prev) {
f2ccd8fa 1799 ret = deliver_skb(skb, pt_prev, orig_dev);
1da177e4
LT
1800 pt_prev = NULL; /* noone else should process this after*/
1801 } else {
1802 skb->tc_verd = SET_TC_OK2MUNGE(skb->tc_verd);
1803 }
1804
1805 ret = ing_filter(skb);
1806
1807 if (ret == TC_ACT_SHOT || (ret == TC_ACT_STOLEN)) {
1808 kfree_skb(skb);
1809 goto out;
1810 }
1811
1812 skb->tc_verd = 0;
1813ncls:
1814#endif
1815
f2ccd8fa 1816 if (handle_bridge(&skb, &pt_prev, &ret, orig_dev))
1da177e4
LT
1817 goto out;
1818
1819 type = skb->protocol;
1820 list_for_each_entry_rcu(ptype, &ptype_base[ntohs(type)&15], list) {
1821 if (ptype->type == type &&
1822 (!ptype->dev || ptype->dev == skb->dev)) {
4ec93edb 1823 if (pt_prev)
f2ccd8fa 1824 ret = deliver_skb(skb, pt_prev, orig_dev);
1da177e4
LT
1825 pt_prev = ptype;
1826 }
1827 }
1828
1829 if (pt_prev) {
f2ccd8fa 1830 ret = pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
1da177e4
LT
1831 } else {
1832 kfree_skb(skb);
1833 /* Jamal, now you will not able to escape explaining
1834 * me how you were going to use this. :-)
1835 */
1836 ret = NET_RX_DROP;
1837 }
1838
1839out:
1840 rcu_read_unlock();
1841 return ret;
1842}
1843
1844static int process_backlog(struct net_device *backlog_dev, int *budget)
1845{
1846 int work = 0;
1847 int quota = min(backlog_dev->quota, *budget);
1848 struct softnet_data *queue = &__get_cpu_var(softnet_data);
1849 unsigned long start_time = jiffies;
1850
e3876605 1851 backlog_dev->weight = weight_p;
1da177e4
LT
1852 for (;;) {
1853 struct sk_buff *skb;
1854 struct net_device *dev;
1855
1856 local_irq_disable();
1857 skb = __skb_dequeue(&queue->input_pkt_queue);
1858 if (!skb)
1859 goto job_done;
1860 local_irq_enable();
1861
1862 dev = skb->dev;
1863
1864 netif_receive_skb(skb);
1865
1866 dev_put(dev);
1867
1868 work++;
1869
1870 if (work >= quota || jiffies - start_time > 1)
1871 break;
1872
1873 }
1874
1875 backlog_dev->quota -= work;
1876 *budget -= work;
1877 return -1;
1878
1879job_done:
1880 backlog_dev->quota -= work;
1881 *budget -= work;
1882
1883 list_del(&backlog_dev->poll_list);
1884 smp_mb__before_clear_bit();
1885 netif_poll_enable(backlog_dev);
1886
1da177e4
LT
1887 local_irq_enable();
1888 return 0;
1889}
1890
1891static void net_rx_action(struct softirq_action *h)
1892{
1893 struct softnet_data *queue = &__get_cpu_var(softnet_data);
1894 unsigned long start_time = jiffies;
51b0bded 1895 int budget = netdev_budget;
53fb95d3
MM
1896 void *have;
1897
1da177e4
LT
1898 local_irq_disable();
1899
1900 while (!list_empty(&queue->poll_list)) {
1901 struct net_device *dev;
1902
1903 if (budget <= 0 || jiffies - start_time > 1)
1904 goto softnet_break;
1905
1906 local_irq_enable();
1907
1908 dev = list_entry(queue->poll_list.next,
1909 struct net_device, poll_list);
53fb95d3 1910 have = netpoll_poll_lock(dev);
1da177e4
LT
1911
1912 if (dev->quota <= 0 || dev->poll(dev, &budget)) {
53fb95d3 1913 netpoll_poll_unlock(have);
1da177e4 1914 local_irq_disable();
8aca8a27 1915 list_move_tail(&dev->poll_list, &queue->poll_list);
1da177e4
LT
1916 if (dev->quota < 0)
1917 dev->quota += dev->weight;
1918 else
1919 dev->quota = dev->weight;
1920 } else {
53fb95d3 1921 netpoll_poll_unlock(have);
1da177e4
LT
1922 dev_put(dev);
1923 local_irq_disable();
1924 }
1925 }
1926out:
db217334
CL
1927#ifdef CONFIG_NET_DMA
1928 /*
1929 * There may not be any more sk_buffs coming right now, so push
1930 * any pending DMA copies to hardware
1931 */
1932 if (net_dma_client) {
1933 struct dma_chan *chan;
1934 rcu_read_lock();
1935 list_for_each_entry_rcu(chan, &net_dma_client->channels, client_node)
1936 dma_async_memcpy_issue_pending(chan);
1937 rcu_read_unlock();
1938 }
1939#endif
1da177e4
LT
1940 local_irq_enable();
1941 return;
1942
1943softnet_break:
1944 __get_cpu_var(netdev_rx_stat).time_squeeze++;
1945 __raise_softirq_irqoff(NET_RX_SOFTIRQ);
1946 goto out;
1947}
1948
1949static gifconf_func_t * gifconf_list [NPROTO];
1950
1951/**
1952 * register_gifconf - register a SIOCGIF handler
1953 * @family: Address family
1954 * @gifconf: Function handler
1955 *
1956 * Register protocol dependent address dumping routines. The handler
1957 * that is passed must not be freed or reused until it has been replaced
1958 * by another handler.
1959 */
1960int register_gifconf(unsigned int family, gifconf_func_t * gifconf)
1961{
1962 if (family >= NPROTO)
1963 return -EINVAL;
1964 gifconf_list[family] = gifconf;
1965 return 0;
1966}
1967
1968
1969/*
1970 * Map an interface index to its name (SIOCGIFNAME)
1971 */
1972
1973/*
1974 * We need this ioctl for efficient implementation of the
1975 * if_indextoname() function required by the IPv6 API. Without
1976 * it, we would have to search all the interfaces to find a
1977 * match. --pb
1978 */
1979
1980static int dev_ifname(struct ifreq __user *arg)
1981{
1982 struct net_device *dev;
1983 struct ifreq ifr;
1984
1985 /*
1986 * Fetch the caller's info block.
1987 */
1988
1989 if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
1990 return -EFAULT;
1991
1992 read_lock(&dev_base_lock);
1993 dev = __dev_get_by_index(ifr.ifr_ifindex);
1994 if (!dev) {
1995 read_unlock(&dev_base_lock);
1996 return -ENODEV;
1997 }
1998
1999 strcpy(ifr.ifr_name, dev->name);
2000 read_unlock(&dev_base_lock);
2001
2002 if (copy_to_user(arg, &ifr, sizeof(struct ifreq)))
2003 return -EFAULT;
2004 return 0;
2005}
2006
2007/*
2008 * Perform a SIOCGIFCONF call. This structure will change
2009 * size eventually, and there is nothing I can do about it.
2010 * Thus we will need a 'compatibility mode'.
2011 */
2012
2013static int dev_ifconf(char __user *arg)
2014{
2015 struct ifconf ifc;
2016 struct net_device *dev;
2017 char __user *pos;
2018 int len;
2019 int total;
2020 int i;
2021
2022 /*
2023 * Fetch the caller's info block.
2024 */
2025
2026 if (copy_from_user(&ifc, arg, sizeof(struct ifconf)))
2027 return -EFAULT;
2028
2029 pos = ifc.ifc_buf;
2030 len = ifc.ifc_len;
2031
2032 /*
2033 * Loop over the interfaces, and write an info block for each.
2034 */
2035
2036 total = 0;
2037 for (dev = dev_base; dev; dev = dev->next) {
2038 for (i = 0; i < NPROTO; i++) {
2039 if (gifconf_list[i]) {
2040 int done;
2041 if (!pos)
2042 done = gifconf_list[i](dev, NULL, 0);
2043 else
2044 done = gifconf_list[i](dev, pos + total,
2045 len - total);
2046 if (done < 0)
2047 return -EFAULT;
2048 total += done;
2049 }
2050 }
4ec93edb 2051 }
1da177e4
LT
2052
2053 /*
2054 * All done. Write the updated control block back to the caller.
2055 */
2056 ifc.ifc_len = total;
2057
2058 /*
2059 * Both BSD and Solaris return 0 here, so we do too.
2060 */
2061 return copy_to_user(arg, &ifc, sizeof(struct ifconf)) ? -EFAULT : 0;
2062}
2063
2064#ifdef CONFIG_PROC_FS
2065/*
2066 * This is invoked by the /proc filesystem handler to display a device
2067 * in detail.
2068 */
6f05f629 2069static struct net_device *dev_get_idx(loff_t pos)
1da177e4
LT
2070{
2071 struct net_device *dev;
2072 loff_t i;
2073
2074 for (i = 0, dev = dev_base; dev && i < pos; ++i, dev = dev->next);
2075
2076 return i == pos ? dev : NULL;
2077}
2078
2079void *dev_seq_start(struct seq_file *seq, loff_t *pos)
2080{
2081 read_lock(&dev_base_lock);
2082 return *pos ? dev_get_idx(*pos - 1) : SEQ_START_TOKEN;
2083}
2084
2085void *dev_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2086{
2087 ++*pos;
2088 return v == SEQ_START_TOKEN ? dev_base : ((struct net_device *)v)->next;
2089}
2090
2091void dev_seq_stop(struct seq_file *seq, void *v)
2092{
2093 read_unlock(&dev_base_lock);
2094}
2095
2096static void dev_seq_printf_stats(struct seq_file *seq, struct net_device *dev)
2097{
c45d286e 2098 struct net_device_stats *stats = dev->get_stats(dev);
1da177e4 2099
c45d286e 2100 if (stats) {
1da177e4
LT
2101 seq_printf(seq, "%6s:%8lu %7lu %4lu %4lu %4lu %5lu %10lu %9lu "
2102 "%8lu %7lu %4lu %4lu %4lu %5lu %7lu %10lu\n",
2103 dev->name, stats->rx_bytes, stats->rx_packets,
2104 stats->rx_errors,
2105 stats->rx_dropped + stats->rx_missed_errors,
2106 stats->rx_fifo_errors,
2107 stats->rx_length_errors + stats->rx_over_errors +
2108 stats->rx_crc_errors + stats->rx_frame_errors,
2109 stats->rx_compressed, stats->multicast,
2110 stats->tx_bytes, stats->tx_packets,
2111 stats->tx_errors, stats->tx_dropped,
2112 stats->tx_fifo_errors, stats->collisions,
2113 stats->tx_carrier_errors +
2114 stats->tx_aborted_errors +
2115 stats->tx_window_errors +
2116 stats->tx_heartbeat_errors,
2117 stats->tx_compressed);
2118 } else
2119 seq_printf(seq, "%6s: No statistics available.\n", dev->name);
2120}
2121
2122/*
2123 * Called from the PROCfs module. This now uses the new arbitrary sized
2124 * /proc/net interface to create /proc/net/dev
2125 */
2126static int dev_seq_show(struct seq_file *seq, void *v)
2127{
2128 if (v == SEQ_START_TOKEN)
2129 seq_puts(seq, "Inter-| Receive "
2130 " | Transmit\n"
2131 " face |bytes packets errs drop fifo frame "
2132 "compressed multicast|bytes packets errs "
2133 "drop fifo colls carrier compressed\n");
2134 else
2135 dev_seq_printf_stats(seq, v);
2136 return 0;
2137}
2138
2139static struct netif_rx_stats *softnet_get_online(loff_t *pos)
2140{
2141 struct netif_rx_stats *rc = NULL;
2142
2143 while (*pos < NR_CPUS)
4ec93edb 2144 if (cpu_online(*pos)) {
1da177e4
LT
2145 rc = &per_cpu(netdev_rx_stat, *pos);
2146 break;
2147 } else
2148 ++*pos;
2149 return rc;
2150}
2151
2152static void *softnet_seq_start(struct seq_file *seq, loff_t *pos)
2153{
2154 return softnet_get_online(pos);
2155}
2156
2157static void *softnet_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2158{
2159 ++*pos;
2160 return softnet_get_online(pos);
2161}
2162
2163static void softnet_seq_stop(struct seq_file *seq, void *v)
2164{
2165}
2166
2167static int softnet_seq_show(struct seq_file *seq, void *v)
2168{
2169 struct netif_rx_stats *s = v;
2170
2171 seq_printf(seq, "%08x %08x %08x %08x %08x %08x %08x %08x %08x\n",
31aa02c5 2172 s->total, s->dropped, s->time_squeeze, 0,
c1ebcdb8
SH
2173 0, 0, 0, 0, /* was fastroute */
2174 s->cpu_collision );
1da177e4
LT
2175 return 0;
2176}
2177
f690808e 2178static const struct seq_operations dev_seq_ops = {
1da177e4
LT
2179 .start = dev_seq_start,
2180 .next = dev_seq_next,
2181 .stop = dev_seq_stop,
2182 .show = dev_seq_show,
2183};
2184
2185static int dev_seq_open(struct inode *inode, struct file *file)
2186{
2187 return seq_open(file, &dev_seq_ops);
2188}
2189
9a32144e 2190static const struct file_operations dev_seq_fops = {
1da177e4
LT
2191 .owner = THIS_MODULE,
2192 .open = dev_seq_open,
2193 .read = seq_read,
2194 .llseek = seq_lseek,
2195 .release = seq_release,
2196};
2197
f690808e 2198static const struct seq_operations softnet_seq_ops = {
1da177e4
LT
2199 .start = softnet_seq_start,
2200 .next = softnet_seq_next,
2201 .stop = softnet_seq_stop,
2202 .show = softnet_seq_show,
2203};
2204
2205static int softnet_seq_open(struct inode *inode, struct file *file)
2206{
2207 return seq_open(file, &softnet_seq_ops);
2208}
2209
9a32144e 2210static const struct file_operations softnet_seq_fops = {
1da177e4
LT
2211 .owner = THIS_MODULE,
2212 .open = softnet_seq_open,
2213 .read = seq_read,
2214 .llseek = seq_lseek,
2215 .release = seq_release,
2216};
2217
0e1256ff
SH
2218static void *ptype_get_idx(loff_t pos)
2219{
2220 struct packet_type *pt = NULL;
2221 loff_t i = 0;
2222 int t;
2223
2224 list_for_each_entry_rcu(pt, &ptype_all, list) {
2225 if (i == pos)
2226 return pt;
2227 ++i;
2228 }
2229
2230 for (t = 0; t < 16; t++) {
2231 list_for_each_entry_rcu(pt, &ptype_base[t], list) {
2232 if (i == pos)
2233 return pt;
2234 ++i;
2235 }
2236 }
2237 return NULL;
2238}
2239
2240static void *ptype_seq_start(struct seq_file *seq, loff_t *pos)
2241{
2242 rcu_read_lock();
2243 return *pos ? ptype_get_idx(*pos - 1) : SEQ_START_TOKEN;
2244}
2245
2246static void *ptype_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2247{
2248 struct packet_type *pt;
2249 struct list_head *nxt;
2250 int hash;
2251
2252 ++*pos;
2253 if (v == SEQ_START_TOKEN)
2254 return ptype_get_idx(0);
2255
2256 pt = v;
2257 nxt = pt->list.next;
2258 if (pt->type == htons(ETH_P_ALL)) {
2259 if (nxt != &ptype_all)
2260 goto found;
2261 hash = 0;
2262 nxt = ptype_base[0].next;
2263 } else
2264 hash = ntohs(pt->type) & 15;
2265
2266 while (nxt == &ptype_base[hash]) {
2267 if (++hash >= 16)
2268 return NULL;
2269 nxt = ptype_base[hash].next;
2270 }
2271found:
2272 return list_entry(nxt, struct packet_type, list);
2273}
2274
2275static void ptype_seq_stop(struct seq_file *seq, void *v)
2276{
2277 rcu_read_unlock();
2278}
2279
2280static void ptype_seq_decode(struct seq_file *seq, void *sym)
2281{
2282#ifdef CONFIG_KALLSYMS
2283 unsigned long offset = 0, symsize;
2284 const char *symname;
2285 char *modname;
2286 char namebuf[128];
2287
2288 symname = kallsyms_lookup((unsigned long)sym, &symsize, &offset,
2289 &modname, namebuf);
2290
2291 if (symname) {
2292 char *delim = ":";
2293
2294 if (!modname)
2295 modname = delim = "";
2296 seq_printf(seq, "%s%s%s%s+0x%lx", delim, modname, delim,
2297 symname, offset);
2298 return;
2299 }
2300#endif
2301
2302 seq_printf(seq, "[%p]", sym);
2303}
2304
2305static int ptype_seq_show(struct seq_file *seq, void *v)
2306{
2307 struct packet_type *pt = v;
2308
2309 if (v == SEQ_START_TOKEN)
2310 seq_puts(seq, "Type Device Function\n");
2311 else {
2312 if (pt->type == htons(ETH_P_ALL))
2313 seq_puts(seq, "ALL ");
2314 else
2315 seq_printf(seq, "%04x", ntohs(pt->type));
2316
2317 seq_printf(seq, " %-8s ",
2318 pt->dev ? pt->dev->name : "");
2319 ptype_seq_decode(seq, pt->func);
2320 seq_putc(seq, '\n');
2321 }
2322
2323 return 0;
2324}
2325
2326static const struct seq_operations ptype_seq_ops = {
2327 .start = ptype_seq_start,
2328 .next = ptype_seq_next,
2329 .stop = ptype_seq_stop,
2330 .show = ptype_seq_show,
2331};
2332
2333static int ptype_seq_open(struct inode *inode, struct file *file)
2334{
2335 return seq_open(file, &ptype_seq_ops);
2336}
2337
2338static const struct file_operations ptype_seq_fops = {
2339 .owner = THIS_MODULE,
2340 .open = ptype_seq_open,
2341 .read = seq_read,
2342 .llseek = seq_lseek,
2343 .release = seq_release,
2344};
2345
2346
d86b5e0e 2347#ifdef CONFIG_WIRELESS_EXT
1da177e4
LT
2348extern int wireless_proc_init(void);
2349#else
2350#define wireless_proc_init() 0
2351#endif
2352
2353static int __init dev_proc_init(void)
2354{
2355 int rc = -ENOMEM;
2356
2357 if (!proc_net_fops_create("dev", S_IRUGO, &dev_seq_fops))
2358 goto out;
2359 if (!proc_net_fops_create("softnet_stat", S_IRUGO, &softnet_seq_fops))
2360 goto out_dev;
0e1256ff
SH
2361 if (!proc_net_fops_create("ptype", S_IRUGO, &ptype_seq_fops))
2362 goto out_dev2;
2363
1da177e4
LT
2364 if (wireless_proc_init())
2365 goto out_softnet;
2366 rc = 0;
2367out:
2368 return rc;
2369out_softnet:
2370 proc_net_remove("softnet_stat");
0e1256ff
SH
2371out_dev2:
2372 proc_net_remove("ptype");
1da177e4
LT
2373out_dev:
2374 proc_net_remove("dev");
2375 goto out;
2376}
2377#else
2378#define dev_proc_init() 0
2379#endif /* CONFIG_PROC_FS */
2380
2381
2382/**
2383 * netdev_set_master - set up master/slave pair
2384 * @slave: slave device
2385 * @master: new master device
2386 *
2387 * Changes the master device of the slave. Pass %NULL to break the
2388 * bonding. The caller must hold the RTNL semaphore. On a failure
2389 * a negative errno code is returned. On success the reference counts
2390 * are adjusted, %RTM_NEWLINK is sent to the routing socket and the
2391 * function returns zero.
2392 */
2393int netdev_set_master(struct net_device *slave, struct net_device *master)
2394{
2395 struct net_device *old = slave->master;
2396
2397 ASSERT_RTNL();
2398
2399 if (master) {
2400 if (old)
2401 return -EBUSY;
2402 dev_hold(master);
2403 }
2404
2405 slave->master = master;
4ec93edb 2406
1da177e4
LT
2407 synchronize_net();
2408
2409 if (old)
2410 dev_put(old);
2411
2412 if (master)
2413 slave->flags |= IFF_SLAVE;
2414 else
2415 slave->flags &= ~IFF_SLAVE;
2416
2417 rtmsg_ifinfo(RTM_NEWLINK, slave, IFF_SLAVE);
2418 return 0;
2419}
2420
2421/**
2422 * dev_set_promiscuity - update promiscuity count on a device
2423 * @dev: device
2424 * @inc: modifier
2425 *
3041a069 2426 * Add or remove promiscuity from a device. While the count in the device
1da177e4
LT
2427 * remains above zero the interface remains promiscuous. Once it hits zero
2428 * the device reverts back to normal filtering operation. A negative inc
2429 * value is used to drop promiscuity on the device.
2430 */
2431void dev_set_promiscuity(struct net_device *dev, int inc)
2432{
2433 unsigned short old_flags = dev->flags;
2434
1da177e4
LT
2435 if ((dev->promiscuity += inc) == 0)
2436 dev->flags &= ~IFF_PROMISC;
52609c0b
DC
2437 else
2438 dev->flags |= IFF_PROMISC;
2439 if (dev->flags != old_flags) {
1da177e4
LT
2440 dev_mc_upload(dev);
2441 printk(KERN_INFO "device %s %s promiscuous mode\n",
2442 dev->name, (dev->flags & IFF_PROMISC) ? "entered" :
4ec93edb 2443 "left");
5bdb9886
SG
2444 audit_log(current->audit_context, GFP_ATOMIC,
2445 AUDIT_ANOM_PROMISCUOUS,
2446 "dev=%s prom=%d old_prom=%d auid=%u",
2447 dev->name, (dev->flags & IFF_PROMISC),
2448 (old_flags & IFF_PROMISC),
4ec93edb 2449 audit_get_loginuid(current->audit_context));
1da177e4
LT
2450 }
2451}
2452
2453/**
2454 * dev_set_allmulti - update allmulti count on a device
2455 * @dev: device
2456 * @inc: modifier
2457 *
2458 * Add or remove reception of all multicast frames to a device. While the
2459 * count in the device remains above zero the interface remains listening
2460 * to all interfaces. Once it hits zero the device reverts back to normal
2461 * filtering operation. A negative @inc value is used to drop the counter
2462 * when releasing a resource needing all multicasts.
2463 */
2464
2465void dev_set_allmulti(struct net_device *dev, int inc)
2466{
2467 unsigned short old_flags = dev->flags;
2468
2469 dev->flags |= IFF_ALLMULTI;
2470 if ((dev->allmulti += inc) == 0)
2471 dev->flags &= ~IFF_ALLMULTI;
2472 if (dev->flags ^ old_flags)
2473 dev_mc_upload(dev);
2474}
2475
2476unsigned dev_get_flags(const struct net_device *dev)
2477{
2478 unsigned flags;
2479
2480 flags = (dev->flags & ~(IFF_PROMISC |
2481 IFF_ALLMULTI |
b00055aa
SR
2482 IFF_RUNNING |
2483 IFF_LOWER_UP |
2484 IFF_DORMANT)) |
1da177e4
LT
2485 (dev->gflags & (IFF_PROMISC |
2486 IFF_ALLMULTI));
2487
b00055aa
SR
2488 if (netif_running(dev)) {
2489 if (netif_oper_up(dev))
2490 flags |= IFF_RUNNING;
2491 if (netif_carrier_ok(dev))
2492 flags |= IFF_LOWER_UP;
2493 if (netif_dormant(dev))
2494 flags |= IFF_DORMANT;
2495 }
1da177e4
LT
2496
2497 return flags;
2498}
2499
2500int dev_change_flags(struct net_device *dev, unsigned flags)
2501{
2502 int ret;
2503 int old_flags = dev->flags;
2504
2505 /*
2506 * Set the flags on our device.
2507 */
2508
2509 dev->flags = (flags & (IFF_DEBUG | IFF_NOTRAILERS | IFF_NOARP |
2510 IFF_DYNAMIC | IFF_MULTICAST | IFF_PORTSEL |
2511 IFF_AUTOMEDIA)) |
2512 (dev->flags & (IFF_UP | IFF_VOLATILE | IFF_PROMISC |
2513 IFF_ALLMULTI));
2514
2515 /*
2516 * Load in the correct multicast list now the flags have changed.
2517 */
2518
2519 dev_mc_upload(dev);
2520
2521 /*
2522 * Have we downed the interface. We handle IFF_UP ourselves
2523 * according to user attempts to set it, rather than blindly
2524 * setting it.
2525 */
2526
2527 ret = 0;
2528 if ((old_flags ^ flags) & IFF_UP) { /* Bit is different ? */
2529 ret = ((old_flags & IFF_UP) ? dev_close : dev_open)(dev);
2530
2531 if (!ret)
2532 dev_mc_upload(dev);
2533 }
2534
2535 if (dev->flags & IFF_UP &&
2536 ((old_flags ^ dev->flags) &~ (IFF_UP | IFF_PROMISC | IFF_ALLMULTI |
2537 IFF_VOLATILE)))
f07d5b94 2538 raw_notifier_call_chain(&netdev_chain,
e041c683 2539 NETDEV_CHANGE, dev);
1da177e4
LT
2540
2541 if ((flags ^ dev->gflags) & IFF_PROMISC) {
2542 int inc = (flags & IFF_PROMISC) ? +1 : -1;
2543 dev->gflags ^= IFF_PROMISC;
2544 dev_set_promiscuity(dev, inc);
2545 }
2546
2547 /* NOTE: order of synchronization of IFF_PROMISC and IFF_ALLMULTI
2548 is important. Some (broken) drivers set IFF_PROMISC, when
2549 IFF_ALLMULTI is requested not asking us and not reporting.
2550 */
2551 if ((flags ^ dev->gflags) & IFF_ALLMULTI) {
2552 int inc = (flags & IFF_ALLMULTI) ? +1 : -1;
2553 dev->gflags ^= IFF_ALLMULTI;
2554 dev_set_allmulti(dev, inc);
2555 }
2556
2557 if (old_flags ^ dev->flags)
2558 rtmsg_ifinfo(RTM_NEWLINK, dev, old_flags ^ dev->flags);
2559
2560 return ret;
2561}
2562
2563int dev_set_mtu(struct net_device *dev, int new_mtu)
2564{
2565 int err;
2566
2567 if (new_mtu == dev->mtu)
2568 return 0;
2569
2570 /* MTU must be positive. */
2571 if (new_mtu < 0)
2572 return -EINVAL;
2573
2574 if (!netif_device_present(dev))
2575 return -ENODEV;
2576
2577 err = 0;
2578 if (dev->change_mtu)
2579 err = dev->change_mtu(dev, new_mtu);
2580 else
2581 dev->mtu = new_mtu;
2582 if (!err && dev->flags & IFF_UP)
f07d5b94 2583 raw_notifier_call_chain(&netdev_chain,
e041c683 2584 NETDEV_CHANGEMTU, dev);
1da177e4
LT
2585 return err;
2586}
2587
2588int dev_set_mac_address(struct net_device *dev, struct sockaddr *sa)
2589{
2590 int err;
2591
2592 if (!dev->set_mac_address)
2593 return -EOPNOTSUPP;
2594 if (sa->sa_family != dev->type)
2595 return -EINVAL;
2596 if (!netif_device_present(dev))
2597 return -ENODEV;
2598 err = dev->set_mac_address(dev, sa);
2599 if (!err)
f07d5b94 2600 raw_notifier_call_chain(&netdev_chain,
e041c683 2601 NETDEV_CHANGEADDR, dev);
1da177e4
LT
2602 return err;
2603}
2604
2605/*
2606 * Perform the SIOCxIFxxx calls.
2607 */
2608static int dev_ifsioc(struct ifreq *ifr, unsigned int cmd)
2609{
2610 int err;
2611 struct net_device *dev = __dev_get_by_name(ifr->ifr_name);
2612
2613 if (!dev)
2614 return -ENODEV;
2615
2616 switch (cmd) {
2617 case SIOCGIFFLAGS: /* Get interface flags */
2618 ifr->ifr_flags = dev_get_flags(dev);
2619 return 0;
2620
2621 case SIOCSIFFLAGS: /* Set interface flags */
2622 return dev_change_flags(dev, ifr->ifr_flags);
2623
2624 case SIOCGIFMETRIC: /* Get the metric on the interface
2625 (currently unused) */
2626 ifr->ifr_metric = 0;
2627 return 0;
2628
2629 case SIOCSIFMETRIC: /* Set the metric on the interface
2630 (currently unused) */
2631 return -EOPNOTSUPP;
2632
2633 case SIOCGIFMTU: /* Get the MTU of a device */
2634 ifr->ifr_mtu = dev->mtu;
2635 return 0;
2636
2637 case SIOCSIFMTU: /* Set the MTU of a device */
2638 return dev_set_mtu(dev, ifr->ifr_mtu);
2639
2640 case SIOCGIFHWADDR:
2641 if (!dev->addr_len)
2642 memset(ifr->ifr_hwaddr.sa_data, 0, sizeof ifr->ifr_hwaddr.sa_data);
2643 else
2644 memcpy(ifr->ifr_hwaddr.sa_data, dev->dev_addr,
2645 min(sizeof ifr->ifr_hwaddr.sa_data, (size_t) dev->addr_len));
2646 ifr->ifr_hwaddr.sa_family = dev->type;
2647 return 0;
2648
2649 case SIOCSIFHWADDR:
2650 return dev_set_mac_address(dev, &ifr->ifr_hwaddr);
2651
2652 case SIOCSIFHWBROADCAST:
2653 if (ifr->ifr_hwaddr.sa_family != dev->type)
2654 return -EINVAL;
2655 memcpy(dev->broadcast, ifr->ifr_hwaddr.sa_data,
2656 min(sizeof ifr->ifr_hwaddr.sa_data, (size_t) dev->addr_len));
f07d5b94 2657 raw_notifier_call_chain(&netdev_chain,
1da177e4
LT
2658 NETDEV_CHANGEADDR, dev);
2659 return 0;
2660
2661 case SIOCGIFMAP:
2662 ifr->ifr_map.mem_start = dev->mem_start;
2663 ifr->ifr_map.mem_end = dev->mem_end;
2664 ifr->ifr_map.base_addr = dev->base_addr;
2665 ifr->ifr_map.irq = dev->irq;
2666 ifr->ifr_map.dma = dev->dma;
2667 ifr->ifr_map.port = dev->if_port;
2668 return 0;
2669
2670 case SIOCSIFMAP:
2671 if (dev->set_config) {
2672 if (!netif_device_present(dev))
2673 return -ENODEV;
2674 return dev->set_config(dev, &ifr->ifr_map);
2675 }
2676 return -EOPNOTSUPP;
2677
2678 case SIOCADDMULTI:
2679 if (!dev->set_multicast_list ||
2680 ifr->ifr_hwaddr.sa_family != AF_UNSPEC)
2681 return -EINVAL;
2682 if (!netif_device_present(dev))
2683 return -ENODEV;
2684 return dev_mc_add(dev, ifr->ifr_hwaddr.sa_data,
2685 dev->addr_len, 1);
2686
2687 case SIOCDELMULTI:
2688 if (!dev->set_multicast_list ||
2689 ifr->ifr_hwaddr.sa_family != AF_UNSPEC)
2690 return -EINVAL;
2691 if (!netif_device_present(dev))
2692 return -ENODEV;
2693 return dev_mc_delete(dev, ifr->ifr_hwaddr.sa_data,
2694 dev->addr_len, 1);
2695
2696 case SIOCGIFINDEX:
2697 ifr->ifr_ifindex = dev->ifindex;
2698 return 0;
2699
2700 case SIOCGIFTXQLEN:
2701 ifr->ifr_qlen = dev->tx_queue_len;
2702 return 0;
2703
2704 case SIOCSIFTXQLEN:
2705 if (ifr->ifr_qlen < 0)
2706 return -EINVAL;
2707 dev->tx_queue_len = ifr->ifr_qlen;
2708 return 0;
2709
2710 case SIOCSIFNAME:
2711 ifr->ifr_newname[IFNAMSIZ-1] = '\0';
2712 return dev_change_name(dev, ifr->ifr_newname);
2713
2714 /*
2715 * Unknown or private ioctl
2716 */
2717
2718 default:
2719 if ((cmd >= SIOCDEVPRIVATE &&
2720 cmd <= SIOCDEVPRIVATE + 15) ||
2721 cmd == SIOCBONDENSLAVE ||
2722 cmd == SIOCBONDRELEASE ||
2723 cmd == SIOCBONDSETHWADDR ||
2724 cmd == SIOCBONDSLAVEINFOQUERY ||
2725 cmd == SIOCBONDINFOQUERY ||
2726 cmd == SIOCBONDCHANGEACTIVE ||
2727 cmd == SIOCGMIIPHY ||
2728 cmd == SIOCGMIIREG ||
2729 cmd == SIOCSMIIREG ||
2730 cmd == SIOCBRADDIF ||
2731 cmd == SIOCBRDELIF ||
2732 cmd == SIOCWANDEV) {
2733 err = -EOPNOTSUPP;
2734 if (dev->do_ioctl) {
2735 if (netif_device_present(dev))
2736 err = dev->do_ioctl(dev, ifr,
2737 cmd);
2738 else
2739 err = -ENODEV;
2740 }
2741 } else
2742 err = -EINVAL;
2743
2744 }
2745 return err;
2746}
2747
2748/*
2749 * This function handles all "interface"-type I/O control requests. The actual
2750 * 'doing' part of this is dev_ifsioc above.
2751 */
2752
2753/**
2754 * dev_ioctl - network device ioctl
2755 * @cmd: command to issue
2756 * @arg: pointer to a struct ifreq in user space
2757 *
2758 * Issue ioctl functions to devices. This is normally called by the
2759 * user space syscall interfaces but can sometimes be useful for
2760 * other purposes. The return value is the return from the syscall if
2761 * positive or a negative errno code on error.
2762 */
2763
2764int dev_ioctl(unsigned int cmd, void __user *arg)
2765{
2766 struct ifreq ifr;
2767 int ret;
2768 char *colon;
2769
2770 /* One special case: SIOCGIFCONF takes ifconf argument
2771 and requires shared lock, because it sleeps writing
2772 to user space.
2773 */
2774
2775 if (cmd == SIOCGIFCONF) {
6756ae4b 2776 rtnl_lock();
1da177e4 2777 ret = dev_ifconf((char __user *) arg);
6756ae4b 2778 rtnl_unlock();
1da177e4
LT
2779 return ret;
2780 }
2781 if (cmd == SIOCGIFNAME)
2782 return dev_ifname((struct ifreq __user *)arg);
2783
2784 if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
2785 return -EFAULT;
2786
2787 ifr.ifr_name[IFNAMSIZ-1] = 0;
2788
2789 colon = strchr(ifr.ifr_name, ':');
2790 if (colon)
2791 *colon = 0;
2792
2793 /*
2794 * See which interface the caller is talking about.
2795 */
2796
2797 switch (cmd) {
2798 /*
2799 * These ioctl calls:
2800 * - can be done by all.
2801 * - atomic and do not require locking.
2802 * - return a value
2803 */
2804 case SIOCGIFFLAGS:
2805 case SIOCGIFMETRIC:
2806 case SIOCGIFMTU:
2807 case SIOCGIFHWADDR:
2808 case SIOCGIFSLAVE:
2809 case SIOCGIFMAP:
2810 case SIOCGIFINDEX:
2811 case SIOCGIFTXQLEN:
2812 dev_load(ifr.ifr_name);
2813 read_lock(&dev_base_lock);
2814 ret = dev_ifsioc(&ifr, cmd);
2815 read_unlock(&dev_base_lock);
2816 if (!ret) {
2817 if (colon)
2818 *colon = ':';
2819 if (copy_to_user(arg, &ifr,
2820 sizeof(struct ifreq)))
2821 ret = -EFAULT;
2822 }
2823 return ret;
2824
2825 case SIOCETHTOOL:
2826 dev_load(ifr.ifr_name);
2827 rtnl_lock();
2828 ret = dev_ethtool(&ifr);
2829 rtnl_unlock();
2830 if (!ret) {
2831 if (colon)
2832 *colon = ':';
2833 if (copy_to_user(arg, &ifr,
2834 sizeof(struct ifreq)))
2835 ret = -EFAULT;
2836 }
2837 return ret;
2838
2839 /*
2840 * These ioctl calls:
2841 * - require superuser power.
2842 * - require strict serialization.
2843 * - return a value
2844 */
2845 case SIOCGMIIPHY:
2846 case SIOCGMIIREG:
2847 case SIOCSIFNAME:
2848 if (!capable(CAP_NET_ADMIN))
2849 return -EPERM;
2850 dev_load(ifr.ifr_name);
2851 rtnl_lock();
2852 ret = dev_ifsioc(&ifr, cmd);
2853 rtnl_unlock();
2854 if (!ret) {
2855 if (colon)
2856 *colon = ':';
2857 if (copy_to_user(arg, &ifr,
2858 sizeof(struct ifreq)))
2859 ret = -EFAULT;
2860 }
2861 return ret;
2862
2863 /*
2864 * These ioctl calls:
2865 * - require superuser power.
2866 * - require strict serialization.
2867 * - do not return a value
2868 */
2869 case SIOCSIFFLAGS:
2870 case SIOCSIFMETRIC:
2871 case SIOCSIFMTU:
2872 case SIOCSIFMAP:
2873 case SIOCSIFHWADDR:
2874 case SIOCSIFSLAVE:
2875 case SIOCADDMULTI:
2876 case SIOCDELMULTI:
2877 case SIOCSIFHWBROADCAST:
2878 case SIOCSIFTXQLEN:
2879 case SIOCSMIIREG:
2880 case SIOCBONDENSLAVE:
2881 case SIOCBONDRELEASE:
2882 case SIOCBONDSETHWADDR:
1da177e4
LT
2883 case SIOCBONDCHANGEACTIVE:
2884 case SIOCBRADDIF:
2885 case SIOCBRDELIF:
2886 if (!capable(CAP_NET_ADMIN))
2887 return -EPERM;
cabcac0b
TG
2888 /* fall through */
2889 case SIOCBONDSLAVEINFOQUERY:
2890 case SIOCBONDINFOQUERY:
1da177e4
LT
2891 dev_load(ifr.ifr_name);
2892 rtnl_lock();
2893 ret = dev_ifsioc(&ifr, cmd);
2894 rtnl_unlock();
2895 return ret;
2896
2897 case SIOCGIFMEM:
2898 /* Get the per device memory space. We can add this but
2899 * currently do not support it */
2900 case SIOCSIFMEM:
2901 /* Set the per device memory buffer space.
2902 * Not applicable in our case */
2903 case SIOCSIFLINK:
2904 return -EINVAL;
2905
2906 /*
2907 * Unknown or private ioctl.
2908 */
2909 default:
2910 if (cmd == SIOCWANDEV ||
2911 (cmd >= SIOCDEVPRIVATE &&
2912 cmd <= SIOCDEVPRIVATE + 15)) {
2913 dev_load(ifr.ifr_name);
2914 rtnl_lock();
2915 ret = dev_ifsioc(&ifr, cmd);
2916 rtnl_unlock();
2917 if (!ret && copy_to_user(arg, &ifr,
2918 sizeof(struct ifreq)))
2919 ret = -EFAULT;
2920 return ret;
2921 }
d86b5e0e 2922#ifdef CONFIG_WIRELESS_EXT
1da177e4
LT
2923 /* Take care of Wireless Extensions */
2924 if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) {
2925 /* If command is `set a parameter', or
2926 * `get the encoding parameters', check if
2927 * the user has the right to do it */
a417016d
JT
2928 if (IW_IS_SET(cmd) || cmd == SIOCGIWENCODE
2929 || cmd == SIOCGIWENCODEEXT) {
1da177e4
LT
2930 if (!capable(CAP_NET_ADMIN))
2931 return -EPERM;
2932 }
2933 dev_load(ifr.ifr_name);
2934 rtnl_lock();
2935 /* Follow me in net/core/wireless.c */
2936 ret = wireless_process_ioctl(&ifr, cmd);
2937 rtnl_unlock();
2938 if (IW_IS_GET(cmd) &&
2939 copy_to_user(arg, &ifr,
4ec93edb 2940 sizeof(struct ifreq)))
1da177e4
LT
2941 ret = -EFAULT;
2942 return ret;
2943 }
d86b5e0e 2944#endif /* CONFIG_WIRELESS_EXT */
1da177e4
LT
2945 return -EINVAL;
2946 }
2947}
2948
2949
2950/**
2951 * dev_new_index - allocate an ifindex
2952 *
2953 * Returns a suitable unique value for a new device interface
2954 * number. The caller must hold the rtnl semaphore or the
2955 * dev_base_lock to be sure it remains unique.
2956 */
2957static int dev_new_index(void)
2958{
2959 static int ifindex;
2960 for (;;) {
2961 if (++ifindex <= 0)
2962 ifindex = 1;
2963 if (!__dev_get_by_index(ifindex))
2964 return ifindex;
2965 }
2966}
2967
2968static int dev_boot_phase = 1;
2969
2970/* Delayed registration/unregisteration */
2971static DEFINE_SPINLOCK(net_todo_list_lock);
2972static struct list_head net_todo_list = LIST_HEAD_INIT(net_todo_list);
2973
6f05f629 2974static void net_set_todo(struct net_device *dev)
1da177e4
LT
2975{
2976 spin_lock(&net_todo_list_lock);
2977 list_add_tail(&dev->todo_list, &net_todo_list);
2978 spin_unlock(&net_todo_list_lock);
2979}
2980
2981/**
2982 * register_netdevice - register a network device
2983 * @dev: device to register
2984 *
2985 * Take a completed network device structure and add it to the kernel
2986 * interfaces. A %NETDEV_REGISTER message is sent to the netdev notifier
2987 * chain. 0 is returned on success. A negative errno code is returned
2988 * on a failure to set up the device, or if the name is a duplicate.
2989 *
2990 * Callers must hold the rtnl semaphore. You may want
2991 * register_netdev() instead of this.
2992 *
2993 * BUGS:
2994 * The locking appears insufficient to guarantee two parallel registers
2995 * will not get the same name.
2996 */
2997
2998int register_netdevice(struct net_device *dev)
2999{
3000 struct hlist_head *head;
3001 struct hlist_node *p;
3002 int ret;
3003
3004 BUG_ON(dev_boot_phase);
3005 ASSERT_RTNL();
3006
b17a7c17
SH
3007 might_sleep();
3008
1da177e4
LT
3009 /* When net_device's are persistent, this will be fatal. */
3010 BUG_ON(dev->reg_state != NETREG_UNINITIALIZED);
3011
3012 spin_lock_init(&dev->queue_lock);
932ff279 3013 spin_lock_init(&dev->_xmit_lock);
1da177e4
LT
3014 dev->xmit_lock_owner = -1;
3015#ifdef CONFIG_NET_CLS_ACT
3016 spin_lock_init(&dev->ingress_lock);
3017#endif
3018
1da177e4
LT
3019 dev->iflink = -1;
3020
3021 /* Init, if this function is available */
3022 if (dev->init) {
3023 ret = dev->init(dev);
3024 if (ret) {
3025 if (ret > 0)
3026 ret = -EIO;
90833aa4 3027 goto out;
1da177e4
LT
3028 }
3029 }
4ec93edb 3030
1da177e4
LT
3031 if (!dev_valid_name(dev->name)) {
3032 ret = -EINVAL;
90833aa4 3033 goto out;
1da177e4
LT
3034 }
3035
3036 dev->ifindex = dev_new_index();
3037 if (dev->iflink == -1)
3038 dev->iflink = dev->ifindex;
3039
3040 /* Check for existence of name */
3041 head = dev_name_hash(dev->name);
3042 hlist_for_each(p, head) {
3043 struct net_device *d
3044 = hlist_entry(p, struct net_device, name_hlist);
3045 if (!strncmp(d->name, dev->name, IFNAMSIZ)) {
3046 ret = -EEXIST;
4ec93edb 3047 goto out;
1da177e4 3048 }
4ec93edb 3049 }
1da177e4
LT
3050
3051 /* Fix illegal SG+CSUM combinations. */
3052 if ((dev->features & NETIF_F_SG) &&
8648b305 3053 !(dev->features & NETIF_F_ALL_CSUM)) {
5a8da02b 3054 printk(KERN_NOTICE "%s: Dropping NETIF_F_SG since no checksum feature.\n",
1da177e4
LT
3055 dev->name);
3056 dev->features &= ~NETIF_F_SG;
3057 }
3058
3059 /* TSO requires that SG is present as well. */
3060 if ((dev->features & NETIF_F_TSO) &&
3061 !(dev->features & NETIF_F_SG)) {
5a8da02b 3062 printk(KERN_NOTICE "%s: Dropping NETIF_F_TSO since no SG feature.\n",
1da177e4
LT
3063 dev->name);
3064 dev->features &= ~NETIF_F_TSO;
3065 }
e89e9cf5
AR
3066 if (dev->features & NETIF_F_UFO) {
3067 if (!(dev->features & NETIF_F_HW_CSUM)) {
3068 printk(KERN_ERR "%s: Dropping NETIF_F_UFO since no "
3069 "NETIF_F_HW_CSUM feature.\n",
3070 dev->name);
3071 dev->features &= ~NETIF_F_UFO;
3072 }
3073 if (!(dev->features & NETIF_F_SG)) {
3074 printk(KERN_ERR "%s: Dropping NETIF_F_UFO since no "
3075 "NETIF_F_SG feature.\n",
3076 dev->name);
3077 dev->features &= ~NETIF_F_UFO;
3078 }
3079 }
1da177e4
LT
3080
3081 /*
3082 * nil rebuild_header routine,
3083 * that should be never called and used as just bug trap.
3084 */
3085
3086 if (!dev->rebuild_header)
3087 dev->rebuild_header = default_rebuild_header;
3088
b17a7c17
SH
3089 ret = netdev_register_sysfs(dev);
3090 if (ret)
90833aa4 3091 goto out;
b17a7c17
SH
3092 dev->reg_state = NETREG_REGISTERED;
3093
1da177e4
LT
3094 /*
3095 * Default initial state at registry is that the
3096 * device is present.
3097 */
3098
3099 set_bit(__LINK_STATE_PRESENT, &dev->state);
3100
3101 dev->next = NULL;
3102 dev_init_scheduler(dev);
3103 write_lock_bh(&dev_base_lock);
3104 *dev_tail = dev;
3105 dev_tail = &dev->next;
3106 hlist_add_head(&dev->name_hlist, head);
3107 hlist_add_head(&dev->index_hlist, dev_index_hash(dev->ifindex));
3108 dev_hold(dev);
1da177e4
LT
3109 write_unlock_bh(&dev_base_lock);
3110
3111 /* Notify protocols, that a new device appeared. */
f07d5b94 3112 raw_notifier_call_chain(&netdev_chain, NETDEV_REGISTER, dev);
1da177e4 3113
1da177e4
LT
3114 ret = 0;
3115
3116out:
3117 return ret;
1da177e4
LT
3118}
3119
3120/**
3121 * register_netdev - register a network device
3122 * @dev: device to register
3123 *
3124 * Take a completed network device structure and add it to the kernel
3125 * interfaces. A %NETDEV_REGISTER message is sent to the netdev notifier
3126 * chain. 0 is returned on success. A negative errno code is returned
3127 * on a failure to set up the device, or if the name is a duplicate.
3128 *
3129 * This is a wrapper around register_netdev that takes the rtnl semaphore
3130 * and expands the device name if you passed a format string to
3131 * alloc_netdev.
3132 */
3133int register_netdev(struct net_device *dev)
3134{
3135 int err;
3136
3137 rtnl_lock();
3138
3139 /*
3140 * If the name is a format string the caller wants us to do a
3141 * name allocation.
3142 */
3143 if (strchr(dev->name, '%')) {
3144 err = dev_alloc_name(dev, dev->name);
3145 if (err < 0)
3146 goto out;
3147 }
4ec93edb 3148
1da177e4
LT
3149 err = register_netdevice(dev);
3150out:
3151 rtnl_unlock();
3152 return err;
3153}
3154EXPORT_SYMBOL(register_netdev);
3155
3156/*
3157 * netdev_wait_allrefs - wait until all references are gone.
3158 *
3159 * This is called when unregistering network devices.
3160 *
3161 * Any protocol or device that holds a reference should register
3162 * for netdevice notification, and cleanup and put back the
3163 * reference if they receive an UNREGISTER event.
3164 * We can get stuck here if buggy protocols don't correctly
4ec93edb 3165 * call dev_put.
1da177e4
LT
3166 */
3167static void netdev_wait_allrefs(struct net_device *dev)
3168{
3169 unsigned long rebroadcast_time, warning_time;
3170
3171 rebroadcast_time = warning_time = jiffies;
3172 while (atomic_read(&dev->refcnt) != 0) {
3173 if (time_after(jiffies, rebroadcast_time + 1 * HZ)) {
6756ae4b 3174 rtnl_lock();
1da177e4
LT
3175
3176 /* Rebroadcast unregister notification */
f07d5b94 3177 raw_notifier_call_chain(&netdev_chain,
1da177e4
LT
3178 NETDEV_UNREGISTER, dev);
3179
3180 if (test_bit(__LINK_STATE_LINKWATCH_PENDING,
3181 &dev->state)) {
3182 /* We must not have linkwatch events
3183 * pending on unregister. If this
3184 * happens, we simply run the queue
3185 * unscheduled, resulting in a noop
3186 * for this device.
3187 */
3188 linkwatch_run_queue();
3189 }
3190
6756ae4b 3191 __rtnl_unlock();
1da177e4
LT
3192
3193 rebroadcast_time = jiffies;
3194 }
3195
3196 msleep(250);
3197
3198 if (time_after(jiffies, warning_time + 10 * HZ)) {
3199 printk(KERN_EMERG "unregister_netdevice: "
3200 "waiting for %s to become free. Usage "
3201 "count = %d\n",
3202 dev->name, atomic_read(&dev->refcnt));
3203 warning_time = jiffies;
3204 }
3205 }
3206}
3207
3208/* The sequence is:
3209 *
3210 * rtnl_lock();
3211 * ...
3212 * register_netdevice(x1);
3213 * register_netdevice(x2);
3214 * ...
3215 * unregister_netdevice(y1);
3216 * unregister_netdevice(y2);
3217 * ...
3218 * rtnl_unlock();
3219 * free_netdev(y1);
3220 * free_netdev(y2);
3221 *
3222 * We are invoked by rtnl_unlock() after it drops the semaphore.
3223 * This allows us to deal with problems:
b17a7c17 3224 * 1) We can delete sysfs objects which invoke hotplug
1da177e4
LT
3225 * without deadlocking with linkwatch via keventd.
3226 * 2) Since we run with the RTNL semaphore not held, we can sleep
3227 * safely in order to wait for the netdev refcnt to drop to zero.
3228 */
4a3e2f71 3229static DEFINE_MUTEX(net_todo_run_mutex);
1da177e4
LT
3230void netdev_run_todo(void)
3231{
626ab0e6 3232 struct list_head list;
1da177e4
LT
3233
3234 /* Need to guard against multiple cpu's getting out of order. */
4a3e2f71 3235 mutex_lock(&net_todo_run_mutex);
1da177e4
LT
3236
3237 /* Not safe to do outside the semaphore. We must not return
3238 * until all unregister events invoked by the local processor
3239 * have been completed (either by this todo run, or one on
3240 * another cpu).
3241 */
3242 if (list_empty(&net_todo_list))
3243 goto out;
3244
3245 /* Snapshot list, allow later requests */
3246 spin_lock(&net_todo_list_lock);
626ab0e6 3247 list_replace_init(&net_todo_list, &list);
1da177e4 3248 spin_unlock(&net_todo_list_lock);
626ab0e6 3249
1da177e4
LT
3250 while (!list_empty(&list)) {
3251 struct net_device *dev
3252 = list_entry(list.next, struct net_device, todo_list);
3253 list_del(&dev->todo_list);
3254
b17a7c17
SH
3255 if (unlikely(dev->reg_state != NETREG_UNREGISTERING)) {
3256 printk(KERN_ERR "network todo '%s' but state %d\n",
3257 dev->name, dev->reg_state);
3258 dump_stack();
3259 continue;
3260 }
1da177e4 3261
b17a7c17
SH
3262 netdev_unregister_sysfs(dev);
3263 dev->reg_state = NETREG_UNREGISTERED;
1da177e4 3264
b17a7c17 3265 netdev_wait_allrefs(dev);
1da177e4 3266
b17a7c17
SH
3267 /* paranoia */
3268 BUG_ON(atomic_read(&dev->refcnt));
3269 BUG_TRAP(!dev->ip_ptr);
3270 BUG_TRAP(!dev->ip6_ptr);
3271 BUG_TRAP(!dev->dn_ptr);
1da177e4 3272
b17a7c17
SH
3273 /* It must be the very last action,
3274 * after this 'dev' may point to freed up memory.
3275 */
3276 if (dev->destructor)
3277 dev->destructor(dev);
1da177e4
LT
3278 }
3279
3280out:
4a3e2f71 3281 mutex_unlock(&net_todo_run_mutex);
1da177e4
LT
3282}
3283
c45d286e
RR
3284static struct net_device_stats *maybe_internal_stats(struct net_device *dev)
3285{
3286 if (dev->features & NETIF_F_INTERNAL_STATS)
3287 return &dev->stats;
3288 return NULL;
3289}
3290
1da177e4
LT
3291/**
3292 * alloc_netdev - allocate network device
3293 * @sizeof_priv: size of private data to allocate space for
3294 * @name: device name format string
3295 * @setup: callback to initialize device
3296 *
3297 * Allocates a struct net_device with private data area for driver use
3298 * and performs basic initialization.
3299 */
3300struct net_device *alloc_netdev(int sizeof_priv, const char *name,
3301 void (*setup)(struct net_device *))
3302{
3303 void *p;
3304 struct net_device *dev;
3305 int alloc_size;
3306
b6fe17d6
SH
3307 BUG_ON(strlen(name) >= sizeof(dev->name));
3308
1da177e4
LT
3309 /* ensure 32-byte alignment of both the device and private area */
3310 alloc_size = (sizeof(*dev) + NETDEV_ALIGN_CONST) & ~NETDEV_ALIGN_CONST;
3311 alloc_size += sizeof_priv + NETDEV_ALIGN_CONST;
3312
31380de9 3313 p = kzalloc(alloc_size, GFP_KERNEL);
1da177e4 3314 if (!p) {
b6fe17d6 3315 printk(KERN_ERR "alloc_netdev: Unable to allocate device.\n");
1da177e4
LT
3316 return NULL;
3317 }
1da177e4
LT
3318
3319 dev = (struct net_device *)
3320 (((long)p + NETDEV_ALIGN_CONST) & ~NETDEV_ALIGN_CONST);
3321 dev->padded = (char *)dev - (char *)p;
3322
3323 if (sizeof_priv)
3324 dev->priv = netdev_priv(dev);
3325
c45d286e 3326 dev->get_stats = maybe_internal_stats;
1da177e4
LT
3327 setup(dev);
3328 strcpy(dev->name, name);
3329 return dev;
3330}
3331EXPORT_SYMBOL(alloc_netdev);
3332
3333/**
3334 * free_netdev - free network device
3335 * @dev: device
3336 *
4ec93edb
YH
3337 * This function does the last stage of destroying an allocated device
3338 * interface. The reference to the device object is released.
1da177e4
LT
3339 * If this is the last reference then it will be freed.
3340 */
3341void free_netdev(struct net_device *dev)
3342{
3343#ifdef CONFIG_SYSFS
3041a069 3344 /* Compatibility with error handling in drivers */
1da177e4
LT
3345 if (dev->reg_state == NETREG_UNINITIALIZED) {
3346 kfree((char *)dev - dev->padded);
3347 return;
3348 }
3349
3350 BUG_ON(dev->reg_state != NETREG_UNREGISTERED);
3351 dev->reg_state = NETREG_RELEASED;
3352
43cb76d9
GKH
3353 /* will free via device release */
3354 put_device(&dev->dev);
1da177e4
LT
3355#else
3356 kfree((char *)dev - dev->padded);
3357#endif
3358}
4ec93edb 3359
1da177e4 3360/* Synchronize with packet receive processing. */
4ec93edb 3361void synchronize_net(void)
1da177e4
LT
3362{
3363 might_sleep();
fbd568a3 3364 synchronize_rcu();
1da177e4
LT
3365}
3366
3367/**
3368 * unregister_netdevice - remove device from the kernel
3369 * @dev: device
3370 *
3371 * This function shuts down a device interface and removes it
3372 * from the kernel tables. On success 0 is returned, on a failure
3373 * a negative errno code is returned.
3374 *
3375 * Callers must hold the rtnl semaphore. You may want
3376 * unregister_netdev() instead of this.
3377 */
3378
22f8cde5 3379void unregister_netdevice(struct net_device *dev)
1da177e4
LT
3380{
3381 struct net_device *d, **dp;
3382
3383 BUG_ON(dev_boot_phase);
3384 ASSERT_RTNL();
3385
3386 /* Some devices call without registering for initialization unwind. */
3387 if (dev->reg_state == NETREG_UNINITIALIZED) {
3388 printk(KERN_DEBUG "unregister_netdevice: device %s/%p never "
3389 "was registered\n", dev->name, dev);
22f8cde5
SH
3390
3391 WARN_ON(1);
3392 return;
1da177e4
LT
3393 }
3394
3395 BUG_ON(dev->reg_state != NETREG_REGISTERED);
3396
3397 /* If device is running, close it first. */
3398 if (dev->flags & IFF_UP)
3399 dev_close(dev);
3400
3401 /* And unlink it from device chain. */
3402 for (dp = &dev_base; (d = *dp) != NULL; dp = &d->next) {
3403 if (d == dev) {
3404 write_lock_bh(&dev_base_lock);
3405 hlist_del(&dev->name_hlist);
3406 hlist_del(&dev->index_hlist);
3407 if (dev_tail == &dev->next)
3408 dev_tail = dp;
3409 *dp = d->next;
3410 write_unlock_bh(&dev_base_lock);
3411 break;
3412 }
3413 }
22f8cde5 3414 BUG_ON(!d);
1da177e4
LT
3415
3416 dev->reg_state = NETREG_UNREGISTERING;
3417
3418 synchronize_net();
3419
3420 /* Shutdown queueing discipline. */
3421 dev_shutdown(dev);
3422
4ec93edb 3423
1da177e4
LT
3424 /* Notify protocols, that we are about to destroy
3425 this device. They should clean all the things.
3426 */
f07d5b94 3427 raw_notifier_call_chain(&netdev_chain, NETDEV_UNREGISTER, dev);
4ec93edb 3428
1da177e4
LT
3429 /*
3430 * Flush the multicast chain
3431 */
3432 dev_mc_discard(dev);
3433
3434 if (dev->uninit)
3435 dev->uninit(dev);
3436
3437 /* Notifier chain MUST detach us from master device. */
3438 BUG_TRAP(!dev->master);
3439
1da177e4
LT
3440 /* Finish processing unregister after unlock */
3441 net_set_todo(dev);
3442
3443 synchronize_net();
3444
3445 dev_put(dev);
1da177e4
LT
3446}
3447
3448/**
3449 * unregister_netdev - remove device from the kernel
3450 * @dev: device
3451 *
3452 * This function shuts down a device interface and removes it
3453 * from the kernel tables. On success 0 is returned, on a failure
3454 * a negative errno code is returned.
3455 *
3456 * This is just a wrapper for unregister_netdevice that takes
3457 * the rtnl semaphore. In general you want to use this and not
3458 * unregister_netdevice.
3459 */
3460void unregister_netdev(struct net_device *dev)
3461{
3462 rtnl_lock();
3463 unregister_netdevice(dev);
3464 rtnl_unlock();
3465}
3466
3467EXPORT_SYMBOL(unregister_netdev);
3468
1da177e4
LT
3469static int dev_cpu_callback(struct notifier_block *nfb,
3470 unsigned long action,
3471 void *ocpu)
3472{
3473 struct sk_buff **list_skb;
3474 struct net_device **list_net;
3475 struct sk_buff *skb;
3476 unsigned int cpu, oldcpu = (unsigned long)ocpu;
3477 struct softnet_data *sd, *oldsd;
3478
3479 if (action != CPU_DEAD)
3480 return NOTIFY_OK;
3481
3482 local_irq_disable();
3483 cpu = smp_processor_id();
3484 sd = &per_cpu(softnet_data, cpu);
3485 oldsd = &per_cpu(softnet_data, oldcpu);
3486
3487 /* Find end of our completion_queue. */
3488 list_skb = &sd->completion_queue;
3489 while (*list_skb)
3490 list_skb = &(*list_skb)->next;
3491 /* Append completion queue from offline CPU. */
3492 *list_skb = oldsd->completion_queue;
3493 oldsd->completion_queue = NULL;
3494
3495 /* Find end of our output_queue. */
3496 list_net = &sd->output_queue;
3497 while (*list_net)
3498 list_net = &(*list_net)->next_sched;
3499 /* Append output queue from offline CPU. */
3500 *list_net = oldsd->output_queue;
3501 oldsd->output_queue = NULL;
3502
3503 raise_softirq_irqoff(NET_TX_SOFTIRQ);
3504 local_irq_enable();
3505
3506 /* Process offline CPU's input_pkt_queue */
3507 while ((skb = __skb_dequeue(&oldsd->input_pkt_queue)))
3508 netif_rx(skb);
3509
3510 return NOTIFY_OK;
3511}
1da177e4 3512
db217334
CL
3513#ifdef CONFIG_NET_DMA
3514/**
3515 * net_dma_rebalance -
3516 * This is called when the number of channels allocated to the net_dma_client
3517 * changes. The net_dma_client tries to have one DMA channel per CPU.
3518 */
3519static void net_dma_rebalance(void)
3520{
3521 unsigned int cpu, i, n;
3522 struct dma_chan *chan;
3523
db217334
CL
3524 if (net_dma_count == 0) {
3525 for_each_online_cpu(cpu)
29bbd72d 3526 rcu_assign_pointer(per_cpu(softnet_data, cpu).net_dma, NULL);
db217334
CL
3527 return;
3528 }
3529
3530 i = 0;
3531 cpu = first_cpu(cpu_online_map);
3532
3533 rcu_read_lock();
3534 list_for_each_entry(chan, &net_dma_client->channels, client_node) {
3535 n = ((num_online_cpus() / net_dma_count)
3536 + (i < (num_online_cpus() % net_dma_count) ? 1 : 0));
3537
3538 while(n) {
29bbd72d 3539 per_cpu(softnet_data, cpu).net_dma = chan;
db217334
CL
3540 cpu = next_cpu(cpu, cpu_online_map);
3541 n--;
3542 }
3543 i++;
3544 }
3545 rcu_read_unlock();
db217334
CL
3546}
3547
3548/**
3549 * netdev_dma_event - event callback for the net_dma_client
3550 * @client: should always be net_dma_client
f4b8ea78
RD
3551 * @chan: DMA channel for the event
3552 * @event: event type
db217334
CL
3553 */
3554static void netdev_dma_event(struct dma_client *client, struct dma_chan *chan,
3555 enum dma_event event)
3556{
3557 spin_lock(&net_dma_event_lock);
3558 switch (event) {
3559 case DMA_RESOURCE_ADDED:
3560 net_dma_count++;
3561 net_dma_rebalance();
3562 break;
3563 case DMA_RESOURCE_REMOVED:
3564 net_dma_count--;
3565 net_dma_rebalance();
3566 break;
3567 default:
3568 break;
3569 }
3570 spin_unlock(&net_dma_event_lock);
3571}
3572
3573/**
3574 * netdev_dma_regiser - register the networking subsystem as a DMA client
3575 */
3576static int __init netdev_dma_register(void)
3577{
3578 spin_lock_init(&net_dma_event_lock);
3579 net_dma_client = dma_async_client_register(netdev_dma_event);
3580 if (net_dma_client == NULL)
3581 return -ENOMEM;
3582
3583 dma_async_client_chan_request(net_dma_client, num_online_cpus());
3584 return 0;
3585}
3586
3587#else
3588static int __init netdev_dma_register(void) { return -ENODEV; }
3589#endif /* CONFIG_NET_DMA */
1da177e4
LT
3590
3591/*
3592 * Initialize the DEV module. At boot time this walks the device list and
3593 * unhooks any devices that fail to initialise (normally hardware not
3594 * present) and leaves us with a valid list of present and active devices.
3595 *
3596 */
3597
3598/*
3599 * This is called single threaded during boot, so no need
3600 * to take the rtnl semaphore.
3601 */
3602static int __init net_dev_init(void)
3603{
3604 int i, rc = -ENOMEM;
3605
3606 BUG_ON(!dev_boot_phase);
3607
1da177e4
LT
3608 if (dev_proc_init())
3609 goto out;
3610
3611 if (netdev_sysfs_init())
3612 goto out;
3613
3614 INIT_LIST_HEAD(&ptype_all);
4ec93edb 3615 for (i = 0; i < 16; i++)
1da177e4
LT
3616 INIT_LIST_HEAD(&ptype_base[i]);
3617
3618 for (i = 0; i < ARRAY_SIZE(dev_name_head); i++)
3619 INIT_HLIST_HEAD(&dev_name_head[i]);
3620
3621 for (i = 0; i < ARRAY_SIZE(dev_index_head); i++)
3622 INIT_HLIST_HEAD(&dev_index_head[i]);
3623
3624 /*
3625 * Initialise the packet receive queues.
3626 */
3627
6f912042 3628 for_each_possible_cpu(i) {
1da177e4
LT
3629 struct softnet_data *queue;
3630
3631 queue = &per_cpu(softnet_data, i);
3632 skb_queue_head_init(&queue->input_pkt_queue);
1da177e4
LT
3633 queue->completion_queue = NULL;
3634 INIT_LIST_HEAD(&queue->poll_list);
3635 set_bit(__LINK_STATE_START, &queue->backlog_dev.state);
3636 queue->backlog_dev.weight = weight_p;
3637 queue->backlog_dev.poll = process_backlog;
3638 atomic_set(&queue->backlog_dev.refcnt, 1);
3639 }
3640
db217334
CL
3641 netdev_dma_register();
3642
1da177e4
LT
3643 dev_boot_phase = 0;
3644
3645 open_softirq(NET_TX_SOFTIRQ, net_tx_action, NULL);
3646 open_softirq(NET_RX_SOFTIRQ, net_rx_action, NULL);
3647
3648 hotcpu_notifier(dev_cpu_callback, 0);
3649 dst_init();
3650 dev_mcast_init();
3651 rc = 0;
3652out:
3653 return rc;
3654}
3655
3656subsys_initcall(net_dev_init);
3657
3658EXPORT_SYMBOL(__dev_get_by_index);
3659EXPORT_SYMBOL(__dev_get_by_name);
3660EXPORT_SYMBOL(__dev_remove_pack);
c2373ee9 3661EXPORT_SYMBOL(dev_valid_name);
1da177e4
LT
3662EXPORT_SYMBOL(dev_add_pack);
3663EXPORT_SYMBOL(dev_alloc_name);
3664EXPORT_SYMBOL(dev_close);
3665EXPORT_SYMBOL(dev_get_by_flags);
3666EXPORT_SYMBOL(dev_get_by_index);
3667EXPORT_SYMBOL(dev_get_by_name);
1da177e4
LT
3668EXPORT_SYMBOL(dev_open);
3669EXPORT_SYMBOL(dev_queue_xmit);
3670EXPORT_SYMBOL(dev_remove_pack);
3671EXPORT_SYMBOL(dev_set_allmulti);
3672EXPORT_SYMBOL(dev_set_promiscuity);
3673EXPORT_SYMBOL(dev_change_flags);
3674EXPORT_SYMBOL(dev_set_mtu);
3675EXPORT_SYMBOL(dev_set_mac_address);
3676EXPORT_SYMBOL(free_netdev);
3677EXPORT_SYMBOL(netdev_boot_setup_check);
3678EXPORT_SYMBOL(netdev_set_master);
3679EXPORT_SYMBOL(netdev_state_change);
3680EXPORT_SYMBOL(netif_receive_skb);
3681EXPORT_SYMBOL(netif_rx);
3682EXPORT_SYMBOL(register_gifconf);
3683EXPORT_SYMBOL(register_netdevice);
3684EXPORT_SYMBOL(register_netdevice_notifier);
3685EXPORT_SYMBOL(skb_checksum_help);
3686EXPORT_SYMBOL(synchronize_net);
3687EXPORT_SYMBOL(unregister_netdevice);
3688EXPORT_SYMBOL(unregister_netdevice_notifier);
3689EXPORT_SYMBOL(net_enable_timestamp);
3690EXPORT_SYMBOL(net_disable_timestamp);
3691EXPORT_SYMBOL(dev_get_flags);
3692
3693#if defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE)
3694EXPORT_SYMBOL(br_handle_frame_hook);
3695EXPORT_SYMBOL(br_fdb_get_hook);
3696EXPORT_SYMBOL(br_fdb_put_hook);
3697#endif
3698
3699#ifdef CONFIG_KMOD
3700EXPORT_SYMBOL(dev_load);
3701#endif
3702
3703EXPORT_PER_CPU_SYMBOL(softnet_data);