]> bbs.cooldavid.org Git - net-next-2.6.git/blame - net/core/dev.c
[IPV4]: ip_options_fragment() has no effect on fragmentation
[net-next-2.6.git] / net / core / dev.c
CommitLineData
1da177e4
LT
1/*
2 * NET3 Protocol independent device support routines.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
8 *
9 * Derived from the non IP parts of dev.c 1.0.19
02c30a84 10 * Authors: Ross Biro
1da177e4
LT
11 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12 * Mark Evans, <evansmp@uhura.aston.ac.uk>
13 *
14 * Additional Authors:
15 * Florian la Roche <rzsfl@rz.uni-sb.de>
16 * Alan Cox <gw4pts@gw4pts.ampr.org>
17 * David Hinds <dahinds@users.sourceforge.net>
18 * Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
19 * Adam Sulmicki <adam@cfar.umd.edu>
20 * Pekka Riikonen <priikone@poesidon.pspt.fi>
21 *
22 * Changes:
23 * D.J. Barrow : Fixed bug where dev->refcnt gets set
24 * to 2 if register_netdev gets called
25 * before net_dev_init & also removed a
26 * few lines of code in the process.
27 * Alan Cox : device private ioctl copies fields back.
28 * Alan Cox : Transmit queue code does relevant
29 * stunts to keep the queue safe.
30 * Alan Cox : Fixed double lock.
31 * Alan Cox : Fixed promisc NULL pointer trap
32 * ???????? : Support the full private ioctl range
33 * Alan Cox : Moved ioctl permission check into
34 * drivers
35 * Tim Kordas : SIOCADDMULTI/SIOCDELMULTI
36 * Alan Cox : 100 backlog just doesn't cut it when
37 * you start doing multicast video 8)
38 * Alan Cox : Rewrote net_bh and list manager.
39 * Alan Cox : Fix ETH_P_ALL echoback lengths.
40 * Alan Cox : Took out transmit every packet pass
41 * Saved a few bytes in the ioctl handler
42 * Alan Cox : Network driver sets packet type before
43 * calling netif_rx. Saves a function
44 * call a packet.
45 * Alan Cox : Hashed net_bh()
46 * Richard Kooijman: Timestamp fixes.
47 * Alan Cox : Wrong field in SIOCGIFDSTADDR
48 * Alan Cox : Device lock protection.
49 * Alan Cox : Fixed nasty side effect of device close
50 * changes.
51 * Rudi Cilibrasi : Pass the right thing to
52 * set_mac_address()
53 * Dave Miller : 32bit quantity for the device lock to
54 * make it work out on a Sparc.
55 * Bjorn Ekwall : Added KERNELD hack.
56 * Alan Cox : Cleaned up the backlog initialise.
57 * Craig Metz : SIOCGIFCONF fix if space for under
58 * 1 device.
59 * Thomas Bogendoerfer : Return ENODEV for dev_open, if there
60 * is no device open function.
61 * Andi Kleen : Fix error reporting for SIOCGIFCONF
62 * Michael Chastain : Fix signed/unsigned for SIOCGIFCONF
63 * Cyrus Durgin : Cleaned for KMOD
64 * Adam Sulmicki : Bug Fix : Network Device Unload
65 * A network device unload needs to purge
66 * the backlog queue.
67 * Paul Rusty Russell : SIOCSIFNAME
68 * Pekka Riikonen : Netdev boot-time settings code
69 * Andrew Morton : Make unregister_netdevice wait
70 * indefinitely on dev->refcnt
71 * J Hadi Salim : - Backlog queue sampling
72 * - netif_rx() feedback
73 */
74
75#include <asm/uaccess.h>
76#include <asm/system.h>
77#include <linux/bitops.h>
4fc268d2 78#include <linux/capability.h>
1da177e4
LT
79#include <linux/config.h>
80#include <linux/cpu.h>
81#include <linux/types.h>
82#include <linux/kernel.h>
83#include <linux/sched.h>
4a3e2f71 84#include <linux/mutex.h>
1da177e4
LT
85#include <linux/string.h>
86#include <linux/mm.h>
87#include <linux/socket.h>
88#include <linux/sockios.h>
89#include <linux/errno.h>
90#include <linux/interrupt.h>
91#include <linux/if_ether.h>
92#include <linux/netdevice.h>
93#include <linux/etherdevice.h>
94#include <linux/notifier.h>
95#include <linux/skbuff.h>
96#include <net/sock.h>
97#include <linux/rtnetlink.h>
98#include <linux/proc_fs.h>
99#include <linux/seq_file.h>
100#include <linux/stat.h>
101#include <linux/if_bridge.h>
102#include <linux/divert.h>
103#include <net/dst.h>
104#include <net/pkt_sched.h>
105#include <net/checksum.h>
106#include <linux/highmem.h>
107#include <linux/init.h>
108#include <linux/kmod.h>
109#include <linux/module.h>
110#include <linux/kallsyms.h>
111#include <linux/netpoll.h>
112#include <linux/rcupdate.h>
113#include <linux/delay.h>
d86b5e0e 114#include <linux/wireless.h>
1da177e4 115#include <net/iw_handler.h>
1da177e4 116#include <asm/current.h>
5bdb9886 117#include <linux/audit.h>
1da177e4 118
1da177e4
LT
119/*
120 * The list of packet types we will receive (as opposed to discard)
121 * and the routines to invoke.
122 *
123 * Why 16. Because with 16 the only overlap we get on a hash of the
124 * low nibble of the protocol value is RARP/SNAP/X.25.
125 *
126 * NOTE: That is no longer true with the addition of VLAN tags. Not
127 * sure which should go first, but I bet it won't make much
128 * difference if we are running VLANs. The good news is that
129 * this protocol won't be in the list unless compiled in, so
130 * the average user (w/out VLANs) will not be adversly affected.
131 * --BLG
132 *
133 * 0800 IP
134 * 8100 802.1Q VLAN
135 * 0001 802.3
136 * 0002 AX.25
137 * 0004 802.2
138 * 8035 RARP
139 * 0005 SNAP
140 * 0805 X.25
141 * 0806 ARP
142 * 8137 IPX
143 * 0009 Localtalk
144 * 86DD IPv6
145 */
146
147static DEFINE_SPINLOCK(ptype_lock);
148static struct list_head ptype_base[16]; /* 16 way hashed list */
149static struct list_head ptype_all; /* Taps */
150
1da177e4
LT
151/*
152 * The @dev_base list is protected by @dev_base_lock and the rtln
153 * semaphore.
154 *
155 * Pure readers hold dev_base_lock for reading.
156 *
157 * Writers must hold the rtnl semaphore while they loop through the
158 * dev_base list, and hold dev_base_lock for writing when they do the
159 * actual updates. This allows pure readers to access the list even
160 * while a writer is preparing to update it.
161 *
162 * To put it another way, dev_base_lock is held for writing only to
163 * protect against pure readers; the rtnl semaphore provides the
164 * protection against other writers.
165 *
166 * See, for example usages, register_netdevice() and
167 * unregister_netdevice(), which must be called with the rtnl
168 * semaphore held.
169 */
170struct net_device *dev_base;
171static struct net_device **dev_tail = &dev_base;
172DEFINE_RWLOCK(dev_base_lock);
173
174EXPORT_SYMBOL(dev_base);
175EXPORT_SYMBOL(dev_base_lock);
176
177#define NETDEV_HASHBITS 8
178static struct hlist_head dev_name_head[1<<NETDEV_HASHBITS];
179static struct hlist_head dev_index_head[1<<NETDEV_HASHBITS];
180
181static inline struct hlist_head *dev_name_hash(const char *name)
182{
183 unsigned hash = full_name_hash(name, strnlen(name, IFNAMSIZ));
184 return &dev_name_head[hash & ((1<<NETDEV_HASHBITS)-1)];
185}
186
187static inline struct hlist_head *dev_index_hash(int ifindex)
188{
189 return &dev_index_head[ifindex & ((1<<NETDEV_HASHBITS)-1)];
190}
191
192/*
193 * Our notifier list
194 */
195
e041c683 196static BLOCKING_NOTIFIER_HEAD(netdev_chain);
1da177e4
LT
197
198/*
199 * Device drivers call our routines to queue packets here. We empty the
200 * queue in the local softnet handler.
201 */
31aa02c5 202DEFINE_PER_CPU(struct softnet_data, softnet_data) = { NULL };
1da177e4
LT
203
204#ifdef CONFIG_SYSFS
205extern int netdev_sysfs_init(void);
206extern int netdev_register_sysfs(struct net_device *);
207extern void netdev_unregister_sysfs(struct net_device *);
208#else
209#define netdev_sysfs_init() (0)
210#define netdev_register_sysfs(dev) (0)
211#define netdev_unregister_sysfs(dev) do { } while(0)
212#endif
213
214
215/*******************************************************************************
216
217 Protocol management and registration routines
218
219*******************************************************************************/
220
221/*
222 * For efficiency
223 */
224
225int netdev_nit;
226
227/*
228 * Add a protocol ID to the list. Now that the input handler is
229 * smarter we can dispense with all the messy stuff that used to be
230 * here.
231 *
232 * BEWARE!!! Protocol handlers, mangling input packets,
233 * MUST BE last in hash buckets and checking protocol handlers
234 * MUST start from promiscuous ptype_all chain in net_bh.
235 * It is true now, do not change it.
236 * Explanation follows: if protocol handler, mangling packet, will
237 * be the first on list, it is not able to sense, that packet
238 * is cloned and should be copied-on-write, so that it will
239 * change it and subsequent readers will get broken packet.
240 * --ANK (980803)
241 */
242
243/**
244 * dev_add_pack - add packet handler
245 * @pt: packet type declaration
246 *
247 * Add a protocol handler to the networking stack. The passed &packet_type
248 * is linked into kernel lists and may not be freed until it has been
249 * removed from the kernel lists.
250 *
251 * This call does not sleep therefore it can not
252 * guarantee all CPU's that are in middle of receiving packets
253 * will see the new packet type (until the next received packet).
254 */
255
256void dev_add_pack(struct packet_type *pt)
257{
258 int hash;
259
260 spin_lock_bh(&ptype_lock);
261 if (pt->type == htons(ETH_P_ALL)) {
262 netdev_nit++;
263 list_add_rcu(&pt->list, &ptype_all);
264 } else {
265 hash = ntohs(pt->type) & 15;
266 list_add_rcu(&pt->list, &ptype_base[hash]);
267 }
268 spin_unlock_bh(&ptype_lock);
269}
270
1da177e4
LT
271/**
272 * __dev_remove_pack - remove packet handler
273 * @pt: packet type declaration
274 *
275 * Remove a protocol handler that was previously added to the kernel
276 * protocol handlers by dev_add_pack(). The passed &packet_type is removed
277 * from the kernel lists and can be freed or reused once this function
278 * returns.
279 *
280 * The packet type might still be in use by receivers
281 * and must not be freed until after all the CPU's have gone
282 * through a quiescent state.
283 */
284void __dev_remove_pack(struct packet_type *pt)
285{
286 struct list_head *head;
287 struct packet_type *pt1;
288
289 spin_lock_bh(&ptype_lock);
290
291 if (pt->type == htons(ETH_P_ALL)) {
292 netdev_nit--;
293 head = &ptype_all;
294 } else
295 head = &ptype_base[ntohs(pt->type) & 15];
296
297 list_for_each_entry(pt1, head, list) {
298 if (pt == pt1) {
299 list_del_rcu(&pt->list);
300 goto out;
301 }
302 }
303
304 printk(KERN_WARNING "dev_remove_pack: %p not found.\n", pt);
305out:
306 spin_unlock_bh(&ptype_lock);
307}
308/**
309 * dev_remove_pack - remove packet handler
310 * @pt: packet type declaration
311 *
312 * Remove a protocol handler that was previously added to the kernel
313 * protocol handlers by dev_add_pack(). The passed &packet_type is removed
314 * from the kernel lists and can be freed or reused once this function
315 * returns.
316 *
317 * This call sleeps to guarantee that no CPU is looking at the packet
318 * type after return.
319 */
320void dev_remove_pack(struct packet_type *pt)
321{
322 __dev_remove_pack(pt);
323
324 synchronize_net();
325}
326
327/******************************************************************************
328
329 Device Boot-time Settings Routines
330
331*******************************************************************************/
332
333/* Boot time configuration table */
334static struct netdev_boot_setup dev_boot_setup[NETDEV_BOOT_SETUP_MAX];
335
336/**
337 * netdev_boot_setup_add - add new setup entry
338 * @name: name of the device
339 * @map: configured settings for the device
340 *
341 * Adds new setup entry to the dev_boot_setup list. The function
342 * returns 0 on error and 1 on success. This is a generic routine to
343 * all netdevices.
344 */
345static int netdev_boot_setup_add(char *name, struct ifmap *map)
346{
347 struct netdev_boot_setup *s;
348 int i;
349
350 s = dev_boot_setup;
351 for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) {
352 if (s[i].name[0] == '\0' || s[i].name[0] == ' ') {
353 memset(s[i].name, 0, sizeof(s[i].name));
354 strcpy(s[i].name, name);
355 memcpy(&s[i].map, map, sizeof(s[i].map));
356 break;
357 }
358 }
359
360 return i >= NETDEV_BOOT_SETUP_MAX ? 0 : 1;
361}
362
363/**
364 * netdev_boot_setup_check - check boot time settings
365 * @dev: the netdevice
366 *
367 * Check boot time settings for the device.
368 * The found settings are set for the device to be used
369 * later in the device probing.
370 * Returns 0 if no settings found, 1 if they are.
371 */
372int netdev_boot_setup_check(struct net_device *dev)
373{
374 struct netdev_boot_setup *s = dev_boot_setup;
375 int i;
376
377 for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) {
378 if (s[i].name[0] != '\0' && s[i].name[0] != ' ' &&
379 !strncmp(dev->name, s[i].name, strlen(s[i].name))) {
380 dev->irq = s[i].map.irq;
381 dev->base_addr = s[i].map.base_addr;
382 dev->mem_start = s[i].map.mem_start;
383 dev->mem_end = s[i].map.mem_end;
384 return 1;
385 }
386 }
387 return 0;
388}
389
390
391/**
392 * netdev_boot_base - get address from boot time settings
393 * @prefix: prefix for network device
394 * @unit: id for network device
395 *
396 * Check boot time settings for the base address of device.
397 * The found settings are set for the device to be used
398 * later in the device probing.
399 * Returns 0 if no settings found.
400 */
401unsigned long netdev_boot_base(const char *prefix, int unit)
402{
403 const struct netdev_boot_setup *s = dev_boot_setup;
404 char name[IFNAMSIZ];
405 int i;
406
407 sprintf(name, "%s%d", prefix, unit);
408
409 /*
410 * If device already registered then return base of 1
411 * to indicate not to probe for this interface
412 */
413 if (__dev_get_by_name(name))
414 return 1;
415
416 for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++)
417 if (!strcmp(name, s[i].name))
418 return s[i].map.base_addr;
419 return 0;
420}
421
422/*
423 * Saves at boot time configured settings for any netdevice.
424 */
425int __init netdev_boot_setup(char *str)
426{
427 int ints[5];
428 struct ifmap map;
429
430 str = get_options(str, ARRAY_SIZE(ints), ints);
431 if (!str || !*str)
432 return 0;
433
434 /* Save settings */
435 memset(&map, 0, sizeof(map));
436 if (ints[0] > 0)
437 map.irq = ints[1];
438 if (ints[0] > 1)
439 map.base_addr = ints[2];
440 if (ints[0] > 2)
441 map.mem_start = ints[3];
442 if (ints[0] > 3)
443 map.mem_end = ints[4];
444
445 /* Add new entry to the list */
446 return netdev_boot_setup_add(str, &map);
447}
448
449__setup("netdev=", netdev_boot_setup);
450
451/*******************************************************************************
452
453 Device Interface Subroutines
454
455*******************************************************************************/
456
457/**
458 * __dev_get_by_name - find a device by its name
459 * @name: name to find
460 *
461 * Find an interface by name. Must be called under RTNL semaphore
462 * or @dev_base_lock. If the name is found a pointer to the device
463 * is returned. If the name is not found then %NULL is returned. The
464 * reference counters are not incremented so the caller must be
465 * careful with locks.
466 */
467
468struct net_device *__dev_get_by_name(const char *name)
469{
470 struct hlist_node *p;
471
472 hlist_for_each(p, dev_name_hash(name)) {
473 struct net_device *dev
474 = hlist_entry(p, struct net_device, name_hlist);
475 if (!strncmp(dev->name, name, IFNAMSIZ))
476 return dev;
477 }
478 return NULL;
479}
480
481/**
482 * dev_get_by_name - find a device by its name
483 * @name: name to find
484 *
485 * Find an interface by name. This can be called from any
486 * context and does its own locking. The returned handle has
487 * the usage count incremented and the caller must use dev_put() to
488 * release it when it is no longer needed. %NULL is returned if no
489 * matching device is found.
490 */
491
492struct net_device *dev_get_by_name(const char *name)
493{
494 struct net_device *dev;
495
496 read_lock(&dev_base_lock);
497 dev = __dev_get_by_name(name);
498 if (dev)
499 dev_hold(dev);
500 read_unlock(&dev_base_lock);
501 return dev;
502}
503
504/**
505 * __dev_get_by_index - find a device by its ifindex
506 * @ifindex: index of device
507 *
508 * Search for an interface by index. Returns %NULL if the device
509 * is not found or a pointer to the device. The device has not
510 * had its reference counter increased so the caller must be careful
511 * about locking. The caller must hold either the RTNL semaphore
512 * or @dev_base_lock.
513 */
514
515struct net_device *__dev_get_by_index(int ifindex)
516{
517 struct hlist_node *p;
518
519 hlist_for_each(p, dev_index_hash(ifindex)) {
520 struct net_device *dev
521 = hlist_entry(p, struct net_device, index_hlist);
522 if (dev->ifindex == ifindex)
523 return dev;
524 }
525 return NULL;
526}
527
528
529/**
530 * dev_get_by_index - find a device by its ifindex
531 * @ifindex: index of device
532 *
533 * Search for an interface by index. Returns NULL if the device
534 * is not found or a pointer to the device. The device returned has
535 * had a reference added and the pointer is safe until the user calls
536 * dev_put to indicate they have finished with it.
537 */
538
539struct net_device *dev_get_by_index(int ifindex)
540{
541 struct net_device *dev;
542
543 read_lock(&dev_base_lock);
544 dev = __dev_get_by_index(ifindex);
545 if (dev)
546 dev_hold(dev);
547 read_unlock(&dev_base_lock);
548 return dev;
549}
550
551/**
552 * dev_getbyhwaddr - find a device by its hardware address
553 * @type: media type of device
554 * @ha: hardware address
555 *
556 * Search for an interface by MAC address. Returns NULL if the device
557 * is not found or a pointer to the device. The caller must hold the
558 * rtnl semaphore. The returned device has not had its ref count increased
559 * and the caller must therefore be careful about locking
560 *
561 * BUGS:
562 * If the API was consistent this would be __dev_get_by_hwaddr
563 */
564
565struct net_device *dev_getbyhwaddr(unsigned short type, char *ha)
566{
567 struct net_device *dev;
568
569 ASSERT_RTNL();
570
571 for (dev = dev_base; dev; dev = dev->next)
572 if (dev->type == type &&
573 !memcmp(dev->dev_addr, ha, dev->addr_len))
574 break;
575 return dev;
576}
577
cf309e3f
JF
578EXPORT_SYMBOL(dev_getbyhwaddr);
579
1da177e4
LT
580struct net_device *dev_getfirstbyhwtype(unsigned short type)
581{
582 struct net_device *dev;
583
584 rtnl_lock();
585 for (dev = dev_base; dev; dev = dev->next) {
586 if (dev->type == type) {
587 dev_hold(dev);
588 break;
589 }
590 }
591 rtnl_unlock();
592 return dev;
593}
594
595EXPORT_SYMBOL(dev_getfirstbyhwtype);
596
597/**
598 * dev_get_by_flags - find any device with given flags
599 * @if_flags: IFF_* values
600 * @mask: bitmask of bits in if_flags to check
601 *
602 * Search for any interface with the given flags. Returns NULL if a device
603 * is not found or a pointer to the device. The device returned has
604 * had a reference added and the pointer is safe until the user calls
605 * dev_put to indicate they have finished with it.
606 */
607
608struct net_device * dev_get_by_flags(unsigned short if_flags, unsigned short mask)
609{
610 struct net_device *dev;
611
612 read_lock(&dev_base_lock);
613 for (dev = dev_base; dev != NULL; dev = dev->next) {
614 if (((dev->flags ^ if_flags) & mask) == 0) {
615 dev_hold(dev);
616 break;
617 }
618 }
619 read_unlock(&dev_base_lock);
620 return dev;
621}
622
623/**
624 * dev_valid_name - check if name is okay for network device
625 * @name: name string
626 *
627 * Network device names need to be valid file names to
628 * to allow sysfs to work
629 */
c2373ee9 630int dev_valid_name(const char *name)
1da177e4
LT
631{
632 return !(*name == '\0'
633 || !strcmp(name, ".")
634 || !strcmp(name, "..")
635 || strchr(name, '/'));
636}
637
638/**
639 * dev_alloc_name - allocate a name for a device
640 * @dev: device
641 * @name: name format string
642 *
643 * Passed a format string - eg "lt%d" it will try and find a suitable
644 * id. Not efficient for many devices, not called a lot. The caller
645 * must hold the dev_base or rtnl lock while allocating the name and
646 * adding the device in order to avoid duplicates. Returns the number
647 * of the unit assigned or a negative errno code.
648 */
649
650int dev_alloc_name(struct net_device *dev, const char *name)
651{
652 int i = 0;
653 char buf[IFNAMSIZ];
654 const char *p;
655 const int max_netdevices = 8*PAGE_SIZE;
656 long *inuse;
657 struct net_device *d;
658
659 p = strnchr(name, IFNAMSIZ-1, '%');
660 if (p) {
661 /*
662 * Verify the string as this thing may have come from
663 * the user. There must be either one "%d" and no other "%"
664 * characters.
665 */
666 if (p[1] != 'd' || strchr(p + 2, '%'))
667 return -EINVAL;
668
669 /* Use one page as a bit array of possible slots */
670 inuse = (long *) get_zeroed_page(GFP_ATOMIC);
671 if (!inuse)
672 return -ENOMEM;
673
674 for (d = dev_base; d; d = d->next) {
675 if (!sscanf(d->name, name, &i))
676 continue;
677 if (i < 0 || i >= max_netdevices)
678 continue;
679
680 /* avoid cases where sscanf is not exact inverse of printf */
681 snprintf(buf, sizeof(buf), name, i);
682 if (!strncmp(buf, d->name, IFNAMSIZ))
683 set_bit(i, inuse);
684 }
685
686 i = find_first_zero_bit(inuse, max_netdevices);
687 free_page((unsigned long) inuse);
688 }
689
690 snprintf(buf, sizeof(buf), name, i);
691 if (!__dev_get_by_name(buf)) {
692 strlcpy(dev->name, buf, IFNAMSIZ);
693 return i;
694 }
695
696 /* It is possible to run out of possible slots
697 * when the name is long and there isn't enough space left
698 * for the digits, or if all bits are used.
699 */
700 return -ENFILE;
701}
702
703
704/**
705 * dev_change_name - change name of a device
706 * @dev: device
707 * @newname: name (or format string) must be at least IFNAMSIZ
708 *
709 * Change name of a device, can pass format strings "eth%d".
710 * for wildcarding.
711 */
712int dev_change_name(struct net_device *dev, char *newname)
713{
714 int err = 0;
715
716 ASSERT_RTNL();
717
718 if (dev->flags & IFF_UP)
719 return -EBUSY;
720
721 if (!dev_valid_name(newname))
722 return -EINVAL;
723
724 if (strchr(newname, '%')) {
725 err = dev_alloc_name(dev, newname);
726 if (err < 0)
727 return err;
728 strcpy(newname, dev->name);
729 }
730 else if (__dev_get_by_name(newname))
731 return -EEXIST;
732 else
733 strlcpy(dev->name, newname, IFNAMSIZ);
734
735 err = class_device_rename(&dev->class_dev, dev->name);
736 if (!err) {
737 hlist_del(&dev->name_hlist);
738 hlist_add_head(&dev->name_hlist, dev_name_hash(dev->name));
e041c683
AS
739 blocking_notifier_call_chain(&netdev_chain,
740 NETDEV_CHANGENAME, dev);
1da177e4
LT
741 }
742
743 return err;
744}
745
d8a33ac4
SH
746/**
747 * netdev_features_change - device changes fatures
748 * @dev: device to cause notification
749 *
750 * Called to indicate a device has changed features.
751 */
752void netdev_features_change(struct net_device *dev)
753{
e041c683 754 blocking_notifier_call_chain(&netdev_chain, NETDEV_FEAT_CHANGE, dev);
d8a33ac4
SH
755}
756EXPORT_SYMBOL(netdev_features_change);
757
1da177e4
LT
758/**
759 * netdev_state_change - device changes state
760 * @dev: device to cause notification
761 *
762 * Called to indicate a device has changed state. This function calls
763 * the notifier chains for netdev_chain and sends a NEWLINK message
764 * to the routing socket.
765 */
766void netdev_state_change(struct net_device *dev)
767{
768 if (dev->flags & IFF_UP) {
e041c683
AS
769 blocking_notifier_call_chain(&netdev_chain,
770 NETDEV_CHANGE, dev);
1da177e4
LT
771 rtmsg_ifinfo(RTM_NEWLINK, dev, 0);
772 }
773}
774
775/**
776 * dev_load - load a network module
777 * @name: name of interface
778 *
779 * If a network interface is not present and the process has suitable
780 * privileges this function loads the module. If module loading is not
781 * available in this kernel then it becomes a nop.
782 */
783
784void dev_load(const char *name)
785{
786 struct net_device *dev;
787
788 read_lock(&dev_base_lock);
789 dev = __dev_get_by_name(name);
790 read_unlock(&dev_base_lock);
791
792 if (!dev && capable(CAP_SYS_MODULE))
793 request_module("%s", name);
794}
795
796static int default_rebuild_header(struct sk_buff *skb)
797{
798 printk(KERN_DEBUG "%s: default_rebuild_header called -- BUG!\n",
799 skb->dev ? skb->dev->name : "NULL!!!");
800 kfree_skb(skb);
801 return 1;
802}
803
804
805/**
806 * dev_open - prepare an interface for use.
807 * @dev: device to open
808 *
809 * Takes a device from down to up state. The device's private open
810 * function is invoked and then the multicast lists are loaded. Finally
811 * the device is moved into the up state and a %NETDEV_UP message is
812 * sent to the netdev notifier chain.
813 *
814 * Calling this function on an active interface is a nop. On a failure
815 * a negative errno code is returned.
816 */
817int dev_open(struct net_device *dev)
818{
819 int ret = 0;
820
821 /*
822 * Is it already up?
823 */
824
825 if (dev->flags & IFF_UP)
826 return 0;
827
828 /*
829 * Is it even present?
830 */
831 if (!netif_device_present(dev))
832 return -ENODEV;
833
834 /*
835 * Call device private open method
836 */
837 set_bit(__LINK_STATE_START, &dev->state);
838 if (dev->open) {
839 ret = dev->open(dev);
840 if (ret)
841 clear_bit(__LINK_STATE_START, &dev->state);
842 }
843
844 /*
845 * If it went open OK then:
846 */
847
848 if (!ret) {
849 /*
850 * Set the flags.
851 */
852 dev->flags |= IFF_UP;
853
854 /*
855 * Initialize multicasting status
856 */
857 dev_mc_upload(dev);
858
859 /*
860 * Wakeup transmit queue engine
861 */
862 dev_activate(dev);
863
864 /*
865 * ... and announce new interface.
866 */
e041c683 867 blocking_notifier_call_chain(&netdev_chain, NETDEV_UP, dev);
1da177e4
LT
868 }
869 return ret;
870}
871
872/**
873 * dev_close - shutdown an interface.
874 * @dev: device to shutdown
875 *
876 * This function moves an active device into down state. A
877 * %NETDEV_GOING_DOWN is sent to the netdev notifier chain. The device
878 * is then deactivated and finally a %NETDEV_DOWN is sent to the notifier
879 * chain.
880 */
881int dev_close(struct net_device *dev)
882{
883 if (!(dev->flags & IFF_UP))
884 return 0;
885
886 /*
887 * Tell people we are going down, so that they can
888 * prepare to death, when device is still operating.
889 */
e041c683 890 blocking_notifier_call_chain(&netdev_chain, NETDEV_GOING_DOWN, dev);
1da177e4
LT
891
892 dev_deactivate(dev);
893
894 clear_bit(__LINK_STATE_START, &dev->state);
895
896 /* Synchronize to scheduled poll. We cannot touch poll list,
897 * it can be even on different cpu. So just clear netif_running(),
898 * and wait when poll really will happen. Actually, the best place
899 * for this is inside dev->stop() after device stopped its irq
900 * engine, but this requires more changes in devices. */
901
902 smp_mb__after_clear_bit(); /* Commit netif_running(). */
903 while (test_bit(__LINK_STATE_RX_SCHED, &dev->state)) {
904 /* No hurry. */
6192b54b 905 msleep(1);
1da177e4
LT
906 }
907
908 /*
909 * Call the device specific close. This cannot fail.
910 * Only if device is UP
911 *
912 * We allow it to be called even after a DETACH hot-plug
913 * event.
914 */
915 if (dev->stop)
916 dev->stop(dev);
917
918 /*
919 * Device is now down.
920 */
921
922 dev->flags &= ~IFF_UP;
923
924 /*
925 * Tell people we are down
926 */
e041c683 927 blocking_notifier_call_chain(&netdev_chain, NETDEV_DOWN, dev);
1da177e4
LT
928
929 return 0;
930}
931
932
933/*
934 * Device change register/unregister. These are not inline or static
935 * as we export them to the world.
936 */
937
938/**
939 * register_netdevice_notifier - register a network notifier block
940 * @nb: notifier
941 *
942 * Register a notifier to be called when network device events occur.
943 * The notifier passed is linked into the kernel structures and must
944 * not be reused until it has been unregistered. A negative errno code
945 * is returned on a failure.
946 *
947 * When registered all registration and up events are replayed
948 * to the new notifier to allow device to have a race free
949 * view of the network device list.
950 */
951
952int register_netdevice_notifier(struct notifier_block *nb)
953{
954 struct net_device *dev;
955 int err;
956
957 rtnl_lock();
e041c683 958 err = blocking_notifier_chain_register(&netdev_chain, nb);
1da177e4
LT
959 if (!err) {
960 for (dev = dev_base; dev; dev = dev->next) {
961 nb->notifier_call(nb, NETDEV_REGISTER, dev);
962
963 if (dev->flags & IFF_UP)
964 nb->notifier_call(nb, NETDEV_UP, dev);
965 }
966 }
967 rtnl_unlock();
968 return err;
969}
970
971/**
972 * unregister_netdevice_notifier - unregister a network notifier block
973 * @nb: notifier
974 *
975 * Unregister a notifier previously registered by
976 * register_netdevice_notifier(). The notifier is unlinked into the
977 * kernel structures and may then be reused. A negative errno code
978 * is returned on a failure.
979 */
980
981int unregister_netdevice_notifier(struct notifier_block *nb)
982{
9f514950
HX
983 int err;
984
985 rtnl_lock();
e041c683 986 err = blocking_notifier_chain_unregister(&netdev_chain, nb);
9f514950
HX
987 rtnl_unlock();
988 return err;
1da177e4
LT
989}
990
991/**
992 * call_netdevice_notifiers - call all network notifier blocks
993 * @val: value passed unmodified to notifier function
994 * @v: pointer passed unmodified to notifier function
995 *
996 * Call all network notifier blocks. Parameters and return value
e041c683 997 * are as for blocking_notifier_call_chain().
1da177e4
LT
998 */
999
1000int call_netdevice_notifiers(unsigned long val, void *v)
1001{
e041c683 1002 return blocking_notifier_call_chain(&netdev_chain, val, v);
1da177e4
LT
1003}
1004
1005/* When > 0 there are consumers of rx skb time stamps */
1006static atomic_t netstamp_needed = ATOMIC_INIT(0);
1007
1008void net_enable_timestamp(void)
1009{
1010 atomic_inc(&netstamp_needed);
1011}
1012
1013void net_disable_timestamp(void)
1014{
1015 atomic_dec(&netstamp_needed);
1016}
1017
a61bbcf2
PM
1018void __net_timestamp(struct sk_buff *skb)
1019{
1020 struct timeval tv;
1021
1022 do_gettimeofday(&tv);
1023 skb_set_timestamp(skb, &tv);
1024}
1025EXPORT_SYMBOL(__net_timestamp);
1026
1027static inline void net_timestamp(struct sk_buff *skb)
1da177e4
LT
1028{
1029 if (atomic_read(&netstamp_needed))
a61bbcf2 1030 __net_timestamp(skb);
1da177e4 1031 else {
a61bbcf2
PM
1032 skb->tstamp.off_sec = 0;
1033 skb->tstamp.off_usec = 0;
1da177e4
LT
1034 }
1035}
1036
1037/*
1038 * Support routine. Sends outgoing frames to any network
1039 * taps currently in use.
1040 */
1041
1042void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
1043{
1044 struct packet_type *ptype;
a61bbcf2
PM
1045
1046 net_timestamp(skb);
1da177e4
LT
1047
1048 rcu_read_lock();
1049 list_for_each_entry_rcu(ptype, &ptype_all, list) {
1050 /* Never send packets back to the socket
1051 * they originated from - MvS (miquels@drinkel.ow.org)
1052 */
1053 if ((ptype->dev == dev || !ptype->dev) &&
1054 (ptype->af_packet_priv == NULL ||
1055 (struct sock *)ptype->af_packet_priv != skb->sk)) {
1056 struct sk_buff *skb2= skb_clone(skb, GFP_ATOMIC);
1057 if (!skb2)
1058 break;
1059
1060 /* skb->nh should be correctly
1061 set by sender, so that the second statement is
1062 just protection against buggy protocols.
1063 */
1064 skb2->mac.raw = skb2->data;
1065
1066 if (skb2->nh.raw < skb2->data ||
1067 skb2->nh.raw > skb2->tail) {
1068 if (net_ratelimit())
1069 printk(KERN_CRIT "protocol %04x is "
1070 "buggy, dev %s\n",
1071 skb2->protocol, dev->name);
1072 skb2->nh.raw = skb2->data;
1073 }
1074
1075 skb2->h.raw = skb2->nh.raw;
1076 skb2->pkt_type = PACKET_OUTGOING;
f2ccd8fa 1077 ptype->func(skb2, skb->dev, ptype, skb->dev);
1da177e4
LT
1078 }
1079 }
1080 rcu_read_unlock();
1081}
1082
56079431
DV
1083
1084void __netif_schedule(struct net_device *dev)
1085{
1086 if (!test_and_set_bit(__LINK_STATE_SCHED, &dev->state)) {
1087 unsigned long flags;
1088 struct softnet_data *sd;
1089
1090 local_irq_save(flags);
1091 sd = &__get_cpu_var(softnet_data);
1092 dev->next_sched = sd->output_queue;
1093 sd->output_queue = dev;
1094 raise_softirq_irqoff(NET_TX_SOFTIRQ);
1095 local_irq_restore(flags);
1096 }
1097}
1098EXPORT_SYMBOL(__netif_schedule);
1099
1100void __netif_rx_schedule(struct net_device *dev)
1101{
1102 unsigned long flags;
1103
1104 local_irq_save(flags);
1105 dev_hold(dev);
1106 list_add_tail(&dev->poll_list, &__get_cpu_var(softnet_data).poll_list);
1107 if (dev->quota < 0)
1108 dev->quota += dev->weight;
1109 else
1110 dev->quota = dev->weight;
1111 __raise_softirq_irqoff(NET_RX_SOFTIRQ);
1112 local_irq_restore(flags);
1113}
1114EXPORT_SYMBOL(__netif_rx_schedule);
1115
1116void dev_kfree_skb_any(struct sk_buff *skb)
1117{
1118 if (in_irq() || irqs_disabled())
1119 dev_kfree_skb_irq(skb);
1120 else
1121 dev_kfree_skb(skb);
1122}
1123EXPORT_SYMBOL(dev_kfree_skb_any);
1124
1125
1126/* Hot-plugging. */
1127void netif_device_detach(struct net_device *dev)
1128{
1129 if (test_and_clear_bit(__LINK_STATE_PRESENT, &dev->state) &&
1130 netif_running(dev)) {
1131 netif_stop_queue(dev);
1132 }
1133}
1134EXPORT_SYMBOL(netif_device_detach);
1135
1136void netif_device_attach(struct net_device *dev)
1137{
1138 if (!test_and_set_bit(__LINK_STATE_PRESENT, &dev->state) &&
1139 netif_running(dev)) {
1140 netif_wake_queue(dev);
1141 __netdev_watchdog_up(dev);
1142 }
1143}
1144EXPORT_SYMBOL(netif_device_attach);
1145
1146
1da177e4
LT
1147/*
1148 * Invalidate hardware checksum when packet is to be mangled, and
1149 * complete checksum manually on outgoing path.
1150 */
1151int skb_checksum_help(struct sk_buff *skb, int inward)
1152{
1153 unsigned int csum;
1154 int ret = 0, offset = skb->h.raw - skb->data;
1155
1156 if (inward) {
1157 skb->ip_summed = CHECKSUM_NONE;
1158 goto out;
1159 }
1160
1161 if (skb_cloned(skb)) {
1162 ret = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
1163 if (ret)
1164 goto out;
1165 }
1166
09a62660 1167 BUG_ON(offset > (int)skb->len);
1da177e4
LT
1168 csum = skb_checksum(skb, offset, skb->len-offset, 0);
1169
1170 offset = skb->tail - skb->h.raw;
09a62660
KK
1171 BUG_ON(offset <= 0);
1172 BUG_ON(skb->csum + 2 > offset);
1da177e4
LT
1173
1174 *(u16*)(skb->h.raw + skb->csum) = csum_fold(csum);
1175 skb->ip_summed = CHECKSUM_NONE;
1176out:
1177 return ret;
1178}
1179
fb286bb2
HX
1180/* Take action when hardware reception checksum errors are detected. */
1181#ifdef CONFIG_BUG
1182void netdev_rx_csum_fault(struct net_device *dev)
1183{
1184 if (net_ratelimit()) {
246a4212
SH
1185 printk(KERN_ERR "%s: hw csum failure.\n",
1186 dev ? dev->name : "<unknown>");
fb286bb2
HX
1187 dump_stack();
1188 }
1189}
1190EXPORT_SYMBOL(netdev_rx_csum_fault);
1191#endif
1192
1da177e4
LT
1193#ifdef CONFIG_HIGHMEM
1194/* Actually, we should eliminate this check as soon as we know, that:
1195 * 1. IOMMU is present and allows to map all the memory.
1196 * 2. No high memory really exists on this machine.
1197 */
1198
1199static inline int illegal_highdma(struct net_device *dev, struct sk_buff *skb)
1200{
1201 int i;
1202
1203 if (dev->features & NETIF_F_HIGHDMA)
1204 return 0;
1205
1206 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
1207 if (PageHighMem(skb_shinfo(skb)->frags[i].page))
1208 return 1;
1209
1210 return 0;
1211}
1212#else
1213#define illegal_highdma(dev, skb) (0)
1214#endif
1215
1da177e4 1216/* Keep head the same: replace data */
dd0fc66f 1217int __skb_linearize(struct sk_buff *skb, gfp_t gfp_mask)
1da177e4
LT
1218{
1219 unsigned int size;
1220 u8 *data;
1221 long offset;
1222 struct skb_shared_info *ninfo;
1223 int headerlen = skb->data - skb->head;
1224 int expand = (skb->tail + skb->data_len) - skb->end;
1225
1226 if (skb_shared(skb))
1227 BUG();
1228
1229 if (expand <= 0)
1230 expand = 0;
1231
1232 size = skb->end - skb->head + expand;
1233 size = SKB_DATA_ALIGN(size);
1234 data = kmalloc(size + sizeof(struct skb_shared_info), gfp_mask);
1235 if (!data)
1236 return -ENOMEM;
1237
1238 /* Copy entire thing */
1239 if (skb_copy_bits(skb, -headerlen, data, headerlen + skb->len))
1240 BUG();
1241
1242 /* Set up shinfo */
1243 ninfo = (struct skb_shared_info*)(data + size);
1244 atomic_set(&ninfo->dataref, 1);
1245 ninfo->tso_size = skb_shinfo(skb)->tso_size;
1246 ninfo->tso_segs = skb_shinfo(skb)->tso_segs;
1247 ninfo->nr_frags = 0;
1248 ninfo->frag_list = NULL;
1249
1250 /* Offset between the two in bytes */
1251 offset = data - skb->head;
1252
1253 /* Free old data. */
1254 skb_release_data(skb);
1255
1256 skb->head = data;
1257 skb->end = data + size;
1258
1259 /* Set up new pointers */
1260 skb->h.raw += offset;
1261 skb->nh.raw += offset;
1262 skb->mac.raw += offset;
1263 skb->tail += offset;
1264 skb->data += offset;
1265
1266 /* We are no longer a clone, even if we were. */
1267 skb->cloned = 0;
1268
1269 skb->tail += skb->data_len;
1270 skb->data_len = 0;
1271 return 0;
1272}
1273
1274#define HARD_TX_LOCK(dev, cpu) { \
1275 if ((dev->features & NETIF_F_LLTX) == 0) { \
1276 spin_lock(&dev->xmit_lock); \
1277 dev->xmit_lock_owner = cpu; \
1278 } \
1279}
1280
1281#define HARD_TX_UNLOCK(dev) { \
1282 if ((dev->features & NETIF_F_LLTX) == 0) { \
1283 dev->xmit_lock_owner = -1; \
1284 spin_unlock(&dev->xmit_lock); \
1285 } \
1286}
1287
1288/**
1289 * dev_queue_xmit - transmit a buffer
1290 * @skb: buffer to transmit
1291 *
1292 * Queue a buffer for transmission to a network device. The caller must
1293 * have set the device and priority and built the buffer before calling
1294 * this function. The function can be called from an interrupt.
1295 *
1296 * A negative errno code is returned on a failure. A success does not
1297 * guarantee the frame will be transmitted as it may be dropped due
1298 * to congestion or traffic shaping.
af191367
BG
1299 *
1300 * -----------------------------------------------------------------------------------
1301 * I notice this method can also return errors from the queue disciplines,
1302 * including NET_XMIT_DROP, which is a positive value. So, errors can also
1303 * be positive.
1304 *
1305 * Regardless of the return value, the skb is consumed, so it is currently
1306 * difficult to retry a send to this method. (You can bump the ref count
1307 * before sending to hold a reference for retry if you are careful.)
1308 *
1309 * When calling this method, interrupts MUST be enabled. This is because
1310 * the BH enable code must have IRQs enabled so that it will not deadlock.
1311 * --BLG
1da177e4
LT
1312 */
1313
1314int dev_queue_xmit(struct sk_buff *skb)
1315{
1316 struct net_device *dev = skb->dev;
1317 struct Qdisc *q;
1318 int rc = -ENOMEM;
1319
1320 if (skb_shinfo(skb)->frag_list &&
1321 !(dev->features & NETIF_F_FRAGLIST) &&
1322 __skb_linearize(skb, GFP_ATOMIC))
1323 goto out_kfree_skb;
1324
1325 /* Fragmented skb is linearized if device does not support SG,
1326 * or if at least one of fragments is in highmem and device
1327 * does not support DMA from it.
1328 */
1329 if (skb_shinfo(skb)->nr_frags &&
1330 (!(dev->features & NETIF_F_SG) || illegal_highdma(dev, skb)) &&
1331 __skb_linearize(skb, GFP_ATOMIC))
1332 goto out_kfree_skb;
1333
1334 /* If packet is not checksummed and device does not support
1335 * checksumming for this protocol, complete checksumming here.
1336 */
1337 if (skb->ip_summed == CHECKSUM_HW &&
1338 (!(dev->features & (NETIF_F_HW_CSUM | NETIF_F_NO_CSUM)) &&
1339 (!(dev->features & NETIF_F_IP_CSUM) ||
1340 skb->protocol != htons(ETH_P_IP))))
1341 if (skb_checksum_help(skb, 0))
1342 goto out_kfree_skb;
1343
2d7ceece
ED
1344 spin_lock_prefetch(&dev->queue_lock);
1345
1da177e4
LT
1346 /* Disable soft irqs for various locks below. Also
1347 * stops preemption for RCU.
1348 */
1349 local_bh_disable();
1350
1351 /* Updates of qdisc are serialized by queue_lock.
1352 * The struct Qdisc which is pointed to by qdisc is now a
1353 * rcu structure - it may be accessed without acquiring
1354 * a lock (but the structure may be stale.) The freeing of the
1355 * qdisc will be deferred until it's known that there are no
1356 * more references to it.
1357 *
1358 * If the qdisc has an enqueue function, we still need to
1359 * hold the queue_lock before calling it, since queue_lock
1360 * also serializes access to the device queue.
1361 */
1362
1363 q = rcu_dereference(dev->qdisc);
1364#ifdef CONFIG_NET_CLS_ACT
1365 skb->tc_verd = SET_TC_AT(skb->tc_verd,AT_EGRESS);
1366#endif
1367 if (q->enqueue) {
1368 /* Grab device queue */
1369 spin_lock(&dev->queue_lock);
1370
1371 rc = q->enqueue(skb, q);
1372
1373 qdisc_run(dev);
1374
1375 spin_unlock(&dev->queue_lock);
1376 rc = rc == NET_XMIT_BYPASS ? NET_XMIT_SUCCESS : rc;
1377 goto out;
1378 }
1379
1380 /* The device has no queue. Common case for software devices:
1381 loopback, all the sorts of tunnels...
1382
1383 Really, it is unlikely that xmit_lock protection is necessary here.
1384 (f.e. loopback and IP tunnels are clean ignoring statistics
1385 counters.)
1386 However, it is possible, that they rely on protection
1387 made by us here.
1388
1389 Check this and shot the lock. It is not prone from deadlocks.
1390 Either shot noqueue qdisc, it is even simpler 8)
1391 */
1392 if (dev->flags & IFF_UP) {
1393 int cpu = smp_processor_id(); /* ok because BHs are off */
1394
1395 if (dev->xmit_lock_owner != cpu) {
1396
1397 HARD_TX_LOCK(dev, cpu);
1398
1399 if (!netif_queue_stopped(dev)) {
1400 if (netdev_nit)
1401 dev_queue_xmit_nit(skb, dev);
1402
1403 rc = 0;
1404 if (!dev->hard_start_xmit(skb, dev)) {
1405 HARD_TX_UNLOCK(dev);
1406 goto out;
1407 }
1408 }
1409 HARD_TX_UNLOCK(dev);
1410 if (net_ratelimit())
1411 printk(KERN_CRIT "Virtual device %s asks to "
1412 "queue packet!\n", dev->name);
1413 } else {
1414 /* Recursion is detected! It is possible,
1415 * unfortunately */
1416 if (net_ratelimit())
1417 printk(KERN_CRIT "Dead loop on virtual device "
1418 "%s, fix it urgently!\n", dev->name);
1419 }
1420 }
1421
1422 rc = -ENETDOWN;
1423 local_bh_enable();
1424
1425out_kfree_skb:
1426 kfree_skb(skb);
1427 return rc;
1428out:
1429 local_bh_enable();
1430 return rc;
1431}
1432
1433
1434/*=======================================================================
1435 Receiver routines
1436 =======================================================================*/
1437
51b0bded
SH
1438int netdev_max_backlog = 1000;
1439int netdev_budget = 300;
1da177e4 1440int weight_p = 64; /* old backlog weight */
1da177e4
LT
1441
1442DEFINE_PER_CPU(struct netif_rx_stats, netdev_rx_stat) = { 0, };
1443
1444
1da177e4
LT
1445/**
1446 * netif_rx - post buffer to the network code
1447 * @skb: buffer to post
1448 *
1449 * This function receives a packet from a device driver and queues it for
1450 * the upper (protocol) levels to process. It always succeeds. The buffer
1451 * may be dropped during processing for congestion control or by the
1452 * protocol layers.
1453 *
1454 * return values:
1455 * NET_RX_SUCCESS (no congestion)
1456 * NET_RX_CN_LOW (low congestion)
1457 * NET_RX_CN_MOD (moderate congestion)
1458 * NET_RX_CN_HIGH (high congestion)
1459 * NET_RX_DROP (packet was dropped)
1460 *
1461 */
1462
1463int netif_rx(struct sk_buff *skb)
1464{
1da177e4
LT
1465 struct softnet_data *queue;
1466 unsigned long flags;
1467
1468 /* if netpoll wants it, pretend we never saw it */
1469 if (netpoll_rx(skb))
1470 return NET_RX_DROP;
1471
a61bbcf2
PM
1472 if (!skb->tstamp.off_sec)
1473 net_timestamp(skb);
1da177e4
LT
1474
1475 /*
1476 * The code is rearranged so that the path is the most
1477 * short when CPU is congested, but is still operating.
1478 */
1479 local_irq_save(flags);
1da177e4
LT
1480 queue = &__get_cpu_var(softnet_data);
1481
1482 __get_cpu_var(netdev_rx_stat).total++;
1483 if (queue->input_pkt_queue.qlen <= netdev_max_backlog) {
1484 if (queue->input_pkt_queue.qlen) {
1da177e4
LT
1485enqueue:
1486 dev_hold(skb->dev);
1487 __skb_queue_tail(&queue->input_pkt_queue, skb);
1da177e4 1488 local_irq_restore(flags);
34008d8c 1489 return NET_RX_SUCCESS;
1da177e4
LT
1490 }
1491
1da177e4
LT
1492 netif_rx_schedule(&queue->backlog_dev);
1493 goto enqueue;
1494 }
1495
1da177e4
LT
1496 __get_cpu_var(netdev_rx_stat).dropped++;
1497 local_irq_restore(flags);
1498
1499 kfree_skb(skb);
1500 return NET_RX_DROP;
1501}
1502
1503int netif_rx_ni(struct sk_buff *skb)
1504{
1505 int err;
1506
1507 preempt_disable();
1508 err = netif_rx(skb);
1509 if (local_softirq_pending())
1510 do_softirq();
1511 preempt_enable();
1512
1513 return err;
1514}
1515
1516EXPORT_SYMBOL(netif_rx_ni);
1517
f2ccd8fa 1518static inline struct net_device *skb_bond(struct sk_buff *skb)
1da177e4
LT
1519{
1520 struct net_device *dev = skb->dev;
1521
8f903c70
JV
1522 if (dev->master) {
1523 /*
1524 * On bonding slaves other than the currently active
1525 * slave, suppress duplicates except for 802.3ad
1526 * ETH_P_SLOW and alb non-mcast/bcast.
1527 */
1528 if (dev->priv_flags & IFF_SLAVE_INACTIVE) {
1529 if (dev->master->priv_flags & IFF_MASTER_ALB) {
1530 if (skb->pkt_type != PACKET_BROADCAST &&
1531 skb->pkt_type != PACKET_MULTICAST)
1532 goto keep;
1533 }
1534
1535 if (dev->master->priv_flags & IFF_MASTER_8023AD &&
1536 skb->protocol == __constant_htons(ETH_P_SLOW))
1537 goto keep;
1538
1539 kfree_skb(skb);
1540 return NULL;
1541 }
1542keep:
1da177e4 1543 skb->dev = dev->master;
8f903c70 1544 }
f2ccd8fa
DM
1545
1546 return dev;
1da177e4
LT
1547}
1548
1549static void net_tx_action(struct softirq_action *h)
1550{
1551 struct softnet_data *sd = &__get_cpu_var(softnet_data);
1552
1553 if (sd->completion_queue) {
1554 struct sk_buff *clist;
1555
1556 local_irq_disable();
1557 clist = sd->completion_queue;
1558 sd->completion_queue = NULL;
1559 local_irq_enable();
1560
1561 while (clist) {
1562 struct sk_buff *skb = clist;
1563 clist = clist->next;
1564
1565 BUG_TRAP(!atomic_read(&skb->users));
1566 __kfree_skb(skb);
1567 }
1568 }
1569
1570 if (sd->output_queue) {
1571 struct net_device *head;
1572
1573 local_irq_disable();
1574 head = sd->output_queue;
1575 sd->output_queue = NULL;
1576 local_irq_enable();
1577
1578 while (head) {
1579 struct net_device *dev = head;
1580 head = head->next_sched;
1581
1582 smp_mb__before_clear_bit();
1583 clear_bit(__LINK_STATE_SCHED, &dev->state);
1584
1585 if (spin_trylock(&dev->queue_lock)) {
1586 qdisc_run(dev);
1587 spin_unlock(&dev->queue_lock);
1588 } else {
1589 netif_schedule(dev);
1590 }
1591 }
1592 }
1593}
1594
1595static __inline__ int deliver_skb(struct sk_buff *skb,
f2ccd8fa
DM
1596 struct packet_type *pt_prev,
1597 struct net_device *orig_dev)
1da177e4
LT
1598{
1599 atomic_inc(&skb->users);
f2ccd8fa 1600 return pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
1da177e4
LT
1601}
1602
1603#if defined(CONFIG_BRIDGE) || defined (CONFIG_BRIDGE_MODULE)
1604int (*br_handle_frame_hook)(struct net_bridge_port *p, struct sk_buff **pskb);
1605struct net_bridge;
1606struct net_bridge_fdb_entry *(*br_fdb_get_hook)(struct net_bridge *br,
1607 unsigned char *addr);
1608void (*br_fdb_put_hook)(struct net_bridge_fdb_entry *ent);
1609
1610static __inline__ int handle_bridge(struct sk_buff **pskb,
f2ccd8fa
DM
1611 struct packet_type **pt_prev, int *ret,
1612 struct net_device *orig_dev)
1da177e4
LT
1613{
1614 struct net_bridge_port *port;
1615
1616 if ((*pskb)->pkt_type == PACKET_LOOPBACK ||
1617 (port = rcu_dereference((*pskb)->dev->br_port)) == NULL)
1618 return 0;
1619
1620 if (*pt_prev) {
f2ccd8fa 1621 *ret = deliver_skb(*pskb, *pt_prev, orig_dev);
1da177e4
LT
1622 *pt_prev = NULL;
1623 }
1624
1625 return br_handle_frame_hook(port, pskb);
1626}
1627#else
f2ccd8fa 1628#define handle_bridge(skb, pt_prev, ret, orig_dev) (0)
1da177e4
LT
1629#endif
1630
1631#ifdef CONFIG_NET_CLS_ACT
1632/* TODO: Maybe we should just force sch_ingress to be compiled in
1633 * when CONFIG_NET_CLS_ACT is? otherwise some useless instructions
1634 * a compare and 2 stores extra right now if we dont have it on
1635 * but have CONFIG_NET_CLS_ACT
1636 * NOTE: This doesnt stop any functionality; if you dont have
1637 * the ingress scheduler, you just cant add policies on ingress.
1638 *
1639 */
1640static int ing_filter(struct sk_buff *skb)
1641{
1642 struct Qdisc *q;
1643 struct net_device *dev = skb->dev;
1644 int result = TC_ACT_OK;
1645
1646 if (dev->qdisc_ingress) {
1647 __u32 ttl = (__u32) G_TC_RTTL(skb->tc_verd);
1648 if (MAX_RED_LOOP < ttl++) {
1649 printk("Redir loop detected Dropping packet (%s->%s)\n",
86e65da9 1650 skb->input_dev->name, skb->dev->name);
1da177e4
LT
1651 return TC_ACT_SHOT;
1652 }
1653
1654 skb->tc_verd = SET_TC_RTTL(skb->tc_verd,ttl);
1655
1656 skb->tc_verd = SET_TC_AT(skb->tc_verd,AT_INGRESS);
86e65da9 1657
1da177e4
LT
1658 spin_lock(&dev->ingress_lock);
1659 if ((q = dev->qdisc_ingress) != NULL)
1660 result = q->enqueue(skb, q);
1661 spin_unlock(&dev->ingress_lock);
1662
1663 }
1664
1665 return result;
1666}
1667#endif
1668
1669int netif_receive_skb(struct sk_buff *skb)
1670{
1671 struct packet_type *ptype, *pt_prev;
f2ccd8fa 1672 struct net_device *orig_dev;
1da177e4
LT
1673 int ret = NET_RX_DROP;
1674 unsigned short type;
1675
1676 /* if we've gotten here through NAPI, check netpoll */
1677 if (skb->dev->poll && netpoll_rx(skb))
1678 return NET_RX_DROP;
1679
a61bbcf2
PM
1680 if (!skb->tstamp.off_sec)
1681 net_timestamp(skb);
1da177e4 1682
86e65da9
DM
1683 if (!skb->input_dev)
1684 skb->input_dev = skb->dev;
1685
f2ccd8fa 1686 orig_dev = skb_bond(skb);
1da177e4 1687
8f903c70
JV
1688 if (!orig_dev)
1689 return NET_RX_DROP;
1690
1da177e4
LT
1691 __get_cpu_var(netdev_rx_stat).total++;
1692
1693 skb->h.raw = skb->nh.raw = skb->data;
1694 skb->mac_len = skb->nh.raw - skb->mac.raw;
1695
1696 pt_prev = NULL;
1697
1698 rcu_read_lock();
1699
1700#ifdef CONFIG_NET_CLS_ACT
1701 if (skb->tc_verd & TC_NCLS) {
1702 skb->tc_verd = CLR_TC_NCLS(skb->tc_verd);
1703 goto ncls;
1704 }
1705#endif
1706
1707 list_for_each_entry_rcu(ptype, &ptype_all, list) {
1708 if (!ptype->dev || ptype->dev == skb->dev) {
1709 if (pt_prev)
f2ccd8fa 1710 ret = deliver_skb(skb, pt_prev, orig_dev);
1da177e4
LT
1711 pt_prev = ptype;
1712 }
1713 }
1714
1715#ifdef CONFIG_NET_CLS_ACT
1716 if (pt_prev) {
f2ccd8fa 1717 ret = deliver_skb(skb, pt_prev, orig_dev);
1da177e4
LT
1718 pt_prev = NULL; /* noone else should process this after*/
1719 } else {
1720 skb->tc_verd = SET_TC_OK2MUNGE(skb->tc_verd);
1721 }
1722
1723 ret = ing_filter(skb);
1724
1725 if (ret == TC_ACT_SHOT || (ret == TC_ACT_STOLEN)) {
1726 kfree_skb(skb);
1727 goto out;
1728 }
1729
1730 skb->tc_verd = 0;
1731ncls:
1732#endif
1733
1734 handle_diverter(skb);
1735
f2ccd8fa 1736 if (handle_bridge(&skb, &pt_prev, &ret, orig_dev))
1da177e4
LT
1737 goto out;
1738
1739 type = skb->protocol;
1740 list_for_each_entry_rcu(ptype, &ptype_base[ntohs(type)&15], list) {
1741 if (ptype->type == type &&
1742 (!ptype->dev || ptype->dev == skb->dev)) {
1743 if (pt_prev)
f2ccd8fa 1744 ret = deliver_skb(skb, pt_prev, orig_dev);
1da177e4
LT
1745 pt_prev = ptype;
1746 }
1747 }
1748
1749 if (pt_prev) {
f2ccd8fa 1750 ret = pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
1da177e4
LT
1751 } else {
1752 kfree_skb(skb);
1753 /* Jamal, now you will not able to escape explaining
1754 * me how you were going to use this. :-)
1755 */
1756 ret = NET_RX_DROP;
1757 }
1758
1759out:
1760 rcu_read_unlock();
1761 return ret;
1762}
1763
1764static int process_backlog(struct net_device *backlog_dev, int *budget)
1765{
1766 int work = 0;
1767 int quota = min(backlog_dev->quota, *budget);
1768 struct softnet_data *queue = &__get_cpu_var(softnet_data);
1769 unsigned long start_time = jiffies;
1770
e3876605 1771 backlog_dev->weight = weight_p;
1da177e4
LT
1772 for (;;) {
1773 struct sk_buff *skb;
1774 struct net_device *dev;
1775
1776 local_irq_disable();
1777 skb = __skb_dequeue(&queue->input_pkt_queue);
1778 if (!skb)
1779 goto job_done;
1780 local_irq_enable();
1781
1782 dev = skb->dev;
1783
1784 netif_receive_skb(skb);
1785
1786 dev_put(dev);
1787
1788 work++;
1789
1790 if (work >= quota || jiffies - start_time > 1)
1791 break;
1792
1793 }
1794
1795 backlog_dev->quota -= work;
1796 *budget -= work;
1797 return -1;
1798
1799job_done:
1800 backlog_dev->quota -= work;
1801 *budget -= work;
1802
1803 list_del(&backlog_dev->poll_list);
1804 smp_mb__before_clear_bit();
1805 netif_poll_enable(backlog_dev);
1806
1da177e4
LT
1807 local_irq_enable();
1808 return 0;
1809}
1810
1811static void net_rx_action(struct softirq_action *h)
1812{
1813 struct softnet_data *queue = &__get_cpu_var(softnet_data);
1814 unsigned long start_time = jiffies;
51b0bded 1815 int budget = netdev_budget;
53fb95d3
MM
1816 void *have;
1817
1da177e4
LT
1818 local_irq_disable();
1819
1820 while (!list_empty(&queue->poll_list)) {
1821 struct net_device *dev;
1822
1823 if (budget <= 0 || jiffies - start_time > 1)
1824 goto softnet_break;
1825
1826 local_irq_enable();
1827
1828 dev = list_entry(queue->poll_list.next,
1829 struct net_device, poll_list);
53fb95d3 1830 have = netpoll_poll_lock(dev);
1da177e4
LT
1831
1832 if (dev->quota <= 0 || dev->poll(dev, &budget)) {
53fb95d3 1833 netpoll_poll_unlock(have);
1da177e4 1834 local_irq_disable();
8aca8a27 1835 list_move_tail(&dev->poll_list, &queue->poll_list);
1da177e4
LT
1836 if (dev->quota < 0)
1837 dev->quota += dev->weight;
1838 else
1839 dev->quota = dev->weight;
1840 } else {
53fb95d3 1841 netpoll_poll_unlock(have);
1da177e4
LT
1842 dev_put(dev);
1843 local_irq_disable();
1844 }
1845 }
1846out:
1847 local_irq_enable();
1848 return;
1849
1850softnet_break:
1851 __get_cpu_var(netdev_rx_stat).time_squeeze++;
1852 __raise_softirq_irqoff(NET_RX_SOFTIRQ);
1853 goto out;
1854}
1855
1856static gifconf_func_t * gifconf_list [NPROTO];
1857
1858/**
1859 * register_gifconf - register a SIOCGIF handler
1860 * @family: Address family
1861 * @gifconf: Function handler
1862 *
1863 * Register protocol dependent address dumping routines. The handler
1864 * that is passed must not be freed or reused until it has been replaced
1865 * by another handler.
1866 */
1867int register_gifconf(unsigned int family, gifconf_func_t * gifconf)
1868{
1869 if (family >= NPROTO)
1870 return -EINVAL;
1871 gifconf_list[family] = gifconf;
1872 return 0;
1873}
1874
1875
1876/*
1877 * Map an interface index to its name (SIOCGIFNAME)
1878 */
1879
1880/*
1881 * We need this ioctl for efficient implementation of the
1882 * if_indextoname() function required by the IPv6 API. Without
1883 * it, we would have to search all the interfaces to find a
1884 * match. --pb
1885 */
1886
1887static int dev_ifname(struct ifreq __user *arg)
1888{
1889 struct net_device *dev;
1890 struct ifreq ifr;
1891
1892 /*
1893 * Fetch the caller's info block.
1894 */
1895
1896 if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
1897 return -EFAULT;
1898
1899 read_lock(&dev_base_lock);
1900 dev = __dev_get_by_index(ifr.ifr_ifindex);
1901 if (!dev) {
1902 read_unlock(&dev_base_lock);
1903 return -ENODEV;
1904 }
1905
1906 strcpy(ifr.ifr_name, dev->name);
1907 read_unlock(&dev_base_lock);
1908
1909 if (copy_to_user(arg, &ifr, sizeof(struct ifreq)))
1910 return -EFAULT;
1911 return 0;
1912}
1913
1914/*
1915 * Perform a SIOCGIFCONF call. This structure will change
1916 * size eventually, and there is nothing I can do about it.
1917 * Thus we will need a 'compatibility mode'.
1918 */
1919
1920static int dev_ifconf(char __user *arg)
1921{
1922 struct ifconf ifc;
1923 struct net_device *dev;
1924 char __user *pos;
1925 int len;
1926 int total;
1927 int i;
1928
1929 /*
1930 * Fetch the caller's info block.
1931 */
1932
1933 if (copy_from_user(&ifc, arg, sizeof(struct ifconf)))
1934 return -EFAULT;
1935
1936 pos = ifc.ifc_buf;
1937 len = ifc.ifc_len;
1938
1939 /*
1940 * Loop over the interfaces, and write an info block for each.
1941 */
1942
1943 total = 0;
1944 for (dev = dev_base; dev; dev = dev->next) {
1945 for (i = 0; i < NPROTO; i++) {
1946 if (gifconf_list[i]) {
1947 int done;
1948 if (!pos)
1949 done = gifconf_list[i](dev, NULL, 0);
1950 else
1951 done = gifconf_list[i](dev, pos + total,
1952 len - total);
1953 if (done < 0)
1954 return -EFAULT;
1955 total += done;
1956 }
1957 }
1958 }
1959
1960 /*
1961 * All done. Write the updated control block back to the caller.
1962 */
1963 ifc.ifc_len = total;
1964
1965 /*
1966 * Both BSD and Solaris return 0 here, so we do too.
1967 */
1968 return copy_to_user(arg, &ifc, sizeof(struct ifconf)) ? -EFAULT : 0;
1969}
1970
1971#ifdef CONFIG_PROC_FS
1972/*
1973 * This is invoked by the /proc filesystem handler to display a device
1974 * in detail.
1975 */
1976static __inline__ struct net_device *dev_get_idx(loff_t pos)
1977{
1978 struct net_device *dev;
1979 loff_t i;
1980
1981 for (i = 0, dev = dev_base; dev && i < pos; ++i, dev = dev->next);
1982
1983 return i == pos ? dev : NULL;
1984}
1985
1986void *dev_seq_start(struct seq_file *seq, loff_t *pos)
1987{
1988 read_lock(&dev_base_lock);
1989 return *pos ? dev_get_idx(*pos - 1) : SEQ_START_TOKEN;
1990}
1991
1992void *dev_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1993{
1994 ++*pos;
1995 return v == SEQ_START_TOKEN ? dev_base : ((struct net_device *)v)->next;
1996}
1997
1998void dev_seq_stop(struct seq_file *seq, void *v)
1999{
2000 read_unlock(&dev_base_lock);
2001}
2002
2003static void dev_seq_printf_stats(struct seq_file *seq, struct net_device *dev)
2004{
2005 if (dev->get_stats) {
2006 struct net_device_stats *stats = dev->get_stats(dev);
2007
2008 seq_printf(seq, "%6s:%8lu %7lu %4lu %4lu %4lu %5lu %10lu %9lu "
2009 "%8lu %7lu %4lu %4lu %4lu %5lu %7lu %10lu\n",
2010 dev->name, stats->rx_bytes, stats->rx_packets,
2011 stats->rx_errors,
2012 stats->rx_dropped + stats->rx_missed_errors,
2013 stats->rx_fifo_errors,
2014 stats->rx_length_errors + stats->rx_over_errors +
2015 stats->rx_crc_errors + stats->rx_frame_errors,
2016 stats->rx_compressed, stats->multicast,
2017 stats->tx_bytes, stats->tx_packets,
2018 stats->tx_errors, stats->tx_dropped,
2019 stats->tx_fifo_errors, stats->collisions,
2020 stats->tx_carrier_errors +
2021 stats->tx_aborted_errors +
2022 stats->tx_window_errors +
2023 stats->tx_heartbeat_errors,
2024 stats->tx_compressed);
2025 } else
2026 seq_printf(seq, "%6s: No statistics available.\n", dev->name);
2027}
2028
2029/*
2030 * Called from the PROCfs module. This now uses the new arbitrary sized
2031 * /proc/net interface to create /proc/net/dev
2032 */
2033static int dev_seq_show(struct seq_file *seq, void *v)
2034{
2035 if (v == SEQ_START_TOKEN)
2036 seq_puts(seq, "Inter-| Receive "
2037 " | Transmit\n"
2038 " face |bytes packets errs drop fifo frame "
2039 "compressed multicast|bytes packets errs "
2040 "drop fifo colls carrier compressed\n");
2041 else
2042 dev_seq_printf_stats(seq, v);
2043 return 0;
2044}
2045
2046static struct netif_rx_stats *softnet_get_online(loff_t *pos)
2047{
2048 struct netif_rx_stats *rc = NULL;
2049
2050 while (*pos < NR_CPUS)
2051 if (cpu_online(*pos)) {
2052 rc = &per_cpu(netdev_rx_stat, *pos);
2053 break;
2054 } else
2055 ++*pos;
2056 return rc;
2057}
2058
2059static void *softnet_seq_start(struct seq_file *seq, loff_t *pos)
2060{
2061 return softnet_get_online(pos);
2062}
2063
2064static void *softnet_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2065{
2066 ++*pos;
2067 return softnet_get_online(pos);
2068}
2069
2070static void softnet_seq_stop(struct seq_file *seq, void *v)
2071{
2072}
2073
2074static int softnet_seq_show(struct seq_file *seq, void *v)
2075{
2076 struct netif_rx_stats *s = v;
2077
2078 seq_printf(seq, "%08x %08x %08x %08x %08x %08x %08x %08x %08x\n",
31aa02c5 2079 s->total, s->dropped, s->time_squeeze, 0,
c1ebcdb8
SH
2080 0, 0, 0, 0, /* was fastroute */
2081 s->cpu_collision );
1da177e4
LT
2082 return 0;
2083}
2084
2085static struct seq_operations dev_seq_ops = {
2086 .start = dev_seq_start,
2087 .next = dev_seq_next,
2088 .stop = dev_seq_stop,
2089 .show = dev_seq_show,
2090};
2091
2092static int dev_seq_open(struct inode *inode, struct file *file)
2093{
2094 return seq_open(file, &dev_seq_ops);
2095}
2096
2097static struct file_operations dev_seq_fops = {
2098 .owner = THIS_MODULE,
2099 .open = dev_seq_open,
2100 .read = seq_read,
2101 .llseek = seq_lseek,
2102 .release = seq_release,
2103};
2104
2105static struct seq_operations softnet_seq_ops = {
2106 .start = softnet_seq_start,
2107 .next = softnet_seq_next,
2108 .stop = softnet_seq_stop,
2109 .show = softnet_seq_show,
2110};
2111
2112static int softnet_seq_open(struct inode *inode, struct file *file)
2113{
2114 return seq_open(file, &softnet_seq_ops);
2115}
2116
2117static struct file_operations softnet_seq_fops = {
2118 .owner = THIS_MODULE,
2119 .open = softnet_seq_open,
2120 .read = seq_read,
2121 .llseek = seq_lseek,
2122 .release = seq_release,
2123};
2124
d86b5e0e 2125#ifdef CONFIG_WIRELESS_EXT
1da177e4
LT
2126extern int wireless_proc_init(void);
2127#else
2128#define wireless_proc_init() 0
2129#endif
2130
2131static int __init dev_proc_init(void)
2132{
2133 int rc = -ENOMEM;
2134
2135 if (!proc_net_fops_create("dev", S_IRUGO, &dev_seq_fops))
2136 goto out;
2137 if (!proc_net_fops_create("softnet_stat", S_IRUGO, &softnet_seq_fops))
2138 goto out_dev;
2139 if (wireless_proc_init())
2140 goto out_softnet;
2141 rc = 0;
2142out:
2143 return rc;
2144out_softnet:
2145 proc_net_remove("softnet_stat");
2146out_dev:
2147 proc_net_remove("dev");
2148 goto out;
2149}
2150#else
2151#define dev_proc_init() 0
2152#endif /* CONFIG_PROC_FS */
2153
2154
2155/**
2156 * netdev_set_master - set up master/slave pair
2157 * @slave: slave device
2158 * @master: new master device
2159 *
2160 * Changes the master device of the slave. Pass %NULL to break the
2161 * bonding. The caller must hold the RTNL semaphore. On a failure
2162 * a negative errno code is returned. On success the reference counts
2163 * are adjusted, %RTM_NEWLINK is sent to the routing socket and the
2164 * function returns zero.
2165 */
2166int netdev_set_master(struct net_device *slave, struct net_device *master)
2167{
2168 struct net_device *old = slave->master;
2169
2170 ASSERT_RTNL();
2171
2172 if (master) {
2173 if (old)
2174 return -EBUSY;
2175 dev_hold(master);
2176 }
2177
2178 slave->master = master;
2179
2180 synchronize_net();
2181
2182 if (old)
2183 dev_put(old);
2184
2185 if (master)
2186 slave->flags |= IFF_SLAVE;
2187 else
2188 slave->flags &= ~IFF_SLAVE;
2189
2190 rtmsg_ifinfo(RTM_NEWLINK, slave, IFF_SLAVE);
2191 return 0;
2192}
2193
2194/**
2195 * dev_set_promiscuity - update promiscuity count on a device
2196 * @dev: device
2197 * @inc: modifier
2198 *
2199 * Add or remove promsicuity from a device. While the count in the device
2200 * remains above zero the interface remains promiscuous. Once it hits zero
2201 * the device reverts back to normal filtering operation. A negative inc
2202 * value is used to drop promiscuity on the device.
2203 */
2204void dev_set_promiscuity(struct net_device *dev, int inc)
2205{
2206 unsigned short old_flags = dev->flags;
2207
1da177e4
LT
2208 if ((dev->promiscuity += inc) == 0)
2209 dev->flags &= ~IFF_PROMISC;
52609c0b
DC
2210 else
2211 dev->flags |= IFF_PROMISC;
2212 if (dev->flags != old_flags) {
1da177e4
LT
2213 dev_mc_upload(dev);
2214 printk(KERN_INFO "device %s %s promiscuous mode\n",
2215 dev->name, (dev->flags & IFF_PROMISC) ? "entered" :
2216 "left");
5bdb9886
SG
2217 audit_log(current->audit_context, GFP_ATOMIC,
2218 AUDIT_ANOM_PROMISCUOUS,
2219 "dev=%s prom=%d old_prom=%d auid=%u",
2220 dev->name, (dev->flags & IFF_PROMISC),
2221 (old_flags & IFF_PROMISC),
2222 audit_get_loginuid(current->audit_context));
1da177e4
LT
2223 }
2224}
2225
2226/**
2227 * dev_set_allmulti - update allmulti count on a device
2228 * @dev: device
2229 * @inc: modifier
2230 *
2231 * Add or remove reception of all multicast frames to a device. While the
2232 * count in the device remains above zero the interface remains listening
2233 * to all interfaces. Once it hits zero the device reverts back to normal
2234 * filtering operation. A negative @inc value is used to drop the counter
2235 * when releasing a resource needing all multicasts.
2236 */
2237
2238void dev_set_allmulti(struct net_device *dev, int inc)
2239{
2240 unsigned short old_flags = dev->flags;
2241
2242 dev->flags |= IFF_ALLMULTI;
2243 if ((dev->allmulti += inc) == 0)
2244 dev->flags &= ~IFF_ALLMULTI;
2245 if (dev->flags ^ old_flags)
2246 dev_mc_upload(dev);
2247}
2248
2249unsigned dev_get_flags(const struct net_device *dev)
2250{
2251 unsigned flags;
2252
2253 flags = (dev->flags & ~(IFF_PROMISC |
2254 IFF_ALLMULTI |
b00055aa
SR
2255 IFF_RUNNING |
2256 IFF_LOWER_UP |
2257 IFF_DORMANT)) |
1da177e4
LT
2258 (dev->gflags & (IFF_PROMISC |
2259 IFF_ALLMULTI));
2260
b00055aa
SR
2261 if (netif_running(dev)) {
2262 if (netif_oper_up(dev))
2263 flags |= IFF_RUNNING;
2264 if (netif_carrier_ok(dev))
2265 flags |= IFF_LOWER_UP;
2266 if (netif_dormant(dev))
2267 flags |= IFF_DORMANT;
2268 }
1da177e4
LT
2269
2270 return flags;
2271}
2272
2273int dev_change_flags(struct net_device *dev, unsigned flags)
2274{
2275 int ret;
2276 int old_flags = dev->flags;
2277
2278 /*
2279 * Set the flags on our device.
2280 */
2281
2282 dev->flags = (flags & (IFF_DEBUG | IFF_NOTRAILERS | IFF_NOARP |
2283 IFF_DYNAMIC | IFF_MULTICAST | IFF_PORTSEL |
2284 IFF_AUTOMEDIA)) |
2285 (dev->flags & (IFF_UP | IFF_VOLATILE | IFF_PROMISC |
2286 IFF_ALLMULTI));
2287
2288 /*
2289 * Load in the correct multicast list now the flags have changed.
2290 */
2291
2292 dev_mc_upload(dev);
2293
2294 /*
2295 * Have we downed the interface. We handle IFF_UP ourselves
2296 * according to user attempts to set it, rather than blindly
2297 * setting it.
2298 */
2299
2300 ret = 0;
2301 if ((old_flags ^ flags) & IFF_UP) { /* Bit is different ? */
2302 ret = ((old_flags & IFF_UP) ? dev_close : dev_open)(dev);
2303
2304 if (!ret)
2305 dev_mc_upload(dev);
2306 }
2307
2308 if (dev->flags & IFF_UP &&
2309 ((old_flags ^ dev->flags) &~ (IFF_UP | IFF_PROMISC | IFF_ALLMULTI |
2310 IFF_VOLATILE)))
e041c683
AS
2311 blocking_notifier_call_chain(&netdev_chain,
2312 NETDEV_CHANGE, dev);
1da177e4
LT
2313
2314 if ((flags ^ dev->gflags) & IFF_PROMISC) {
2315 int inc = (flags & IFF_PROMISC) ? +1 : -1;
2316 dev->gflags ^= IFF_PROMISC;
2317 dev_set_promiscuity(dev, inc);
2318 }
2319
2320 /* NOTE: order of synchronization of IFF_PROMISC and IFF_ALLMULTI
2321 is important. Some (broken) drivers set IFF_PROMISC, when
2322 IFF_ALLMULTI is requested not asking us and not reporting.
2323 */
2324 if ((flags ^ dev->gflags) & IFF_ALLMULTI) {
2325 int inc = (flags & IFF_ALLMULTI) ? +1 : -1;
2326 dev->gflags ^= IFF_ALLMULTI;
2327 dev_set_allmulti(dev, inc);
2328 }
2329
2330 if (old_flags ^ dev->flags)
2331 rtmsg_ifinfo(RTM_NEWLINK, dev, old_flags ^ dev->flags);
2332
2333 return ret;
2334}
2335
2336int dev_set_mtu(struct net_device *dev, int new_mtu)
2337{
2338 int err;
2339
2340 if (new_mtu == dev->mtu)
2341 return 0;
2342
2343 /* MTU must be positive. */
2344 if (new_mtu < 0)
2345 return -EINVAL;
2346
2347 if (!netif_device_present(dev))
2348 return -ENODEV;
2349
2350 err = 0;
2351 if (dev->change_mtu)
2352 err = dev->change_mtu(dev, new_mtu);
2353 else
2354 dev->mtu = new_mtu;
2355 if (!err && dev->flags & IFF_UP)
e041c683
AS
2356 blocking_notifier_call_chain(&netdev_chain,
2357 NETDEV_CHANGEMTU, dev);
1da177e4
LT
2358 return err;
2359}
2360
2361int dev_set_mac_address(struct net_device *dev, struct sockaddr *sa)
2362{
2363 int err;
2364
2365 if (!dev->set_mac_address)
2366 return -EOPNOTSUPP;
2367 if (sa->sa_family != dev->type)
2368 return -EINVAL;
2369 if (!netif_device_present(dev))
2370 return -ENODEV;
2371 err = dev->set_mac_address(dev, sa);
2372 if (!err)
e041c683
AS
2373 blocking_notifier_call_chain(&netdev_chain,
2374 NETDEV_CHANGEADDR, dev);
1da177e4
LT
2375 return err;
2376}
2377
2378/*
2379 * Perform the SIOCxIFxxx calls.
2380 */
2381static int dev_ifsioc(struct ifreq *ifr, unsigned int cmd)
2382{
2383 int err;
2384 struct net_device *dev = __dev_get_by_name(ifr->ifr_name);
2385
2386 if (!dev)
2387 return -ENODEV;
2388
2389 switch (cmd) {
2390 case SIOCGIFFLAGS: /* Get interface flags */
2391 ifr->ifr_flags = dev_get_flags(dev);
2392 return 0;
2393
2394 case SIOCSIFFLAGS: /* Set interface flags */
2395 return dev_change_flags(dev, ifr->ifr_flags);
2396
2397 case SIOCGIFMETRIC: /* Get the metric on the interface
2398 (currently unused) */
2399 ifr->ifr_metric = 0;
2400 return 0;
2401
2402 case SIOCSIFMETRIC: /* Set the metric on the interface
2403 (currently unused) */
2404 return -EOPNOTSUPP;
2405
2406 case SIOCGIFMTU: /* Get the MTU of a device */
2407 ifr->ifr_mtu = dev->mtu;
2408 return 0;
2409
2410 case SIOCSIFMTU: /* Set the MTU of a device */
2411 return dev_set_mtu(dev, ifr->ifr_mtu);
2412
2413 case SIOCGIFHWADDR:
2414 if (!dev->addr_len)
2415 memset(ifr->ifr_hwaddr.sa_data, 0, sizeof ifr->ifr_hwaddr.sa_data);
2416 else
2417 memcpy(ifr->ifr_hwaddr.sa_data, dev->dev_addr,
2418 min(sizeof ifr->ifr_hwaddr.sa_data, (size_t) dev->addr_len));
2419 ifr->ifr_hwaddr.sa_family = dev->type;
2420 return 0;
2421
2422 case SIOCSIFHWADDR:
2423 return dev_set_mac_address(dev, &ifr->ifr_hwaddr);
2424
2425 case SIOCSIFHWBROADCAST:
2426 if (ifr->ifr_hwaddr.sa_family != dev->type)
2427 return -EINVAL;
2428 memcpy(dev->broadcast, ifr->ifr_hwaddr.sa_data,
2429 min(sizeof ifr->ifr_hwaddr.sa_data, (size_t) dev->addr_len));
e041c683 2430 blocking_notifier_call_chain(&netdev_chain,
1da177e4
LT
2431 NETDEV_CHANGEADDR, dev);
2432 return 0;
2433
2434 case SIOCGIFMAP:
2435 ifr->ifr_map.mem_start = dev->mem_start;
2436 ifr->ifr_map.mem_end = dev->mem_end;
2437 ifr->ifr_map.base_addr = dev->base_addr;
2438 ifr->ifr_map.irq = dev->irq;
2439 ifr->ifr_map.dma = dev->dma;
2440 ifr->ifr_map.port = dev->if_port;
2441 return 0;
2442
2443 case SIOCSIFMAP:
2444 if (dev->set_config) {
2445 if (!netif_device_present(dev))
2446 return -ENODEV;
2447 return dev->set_config(dev, &ifr->ifr_map);
2448 }
2449 return -EOPNOTSUPP;
2450
2451 case SIOCADDMULTI:
2452 if (!dev->set_multicast_list ||
2453 ifr->ifr_hwaddr.sa_family != AF_UNSPEC)
2454 return -EINVAL;
2455 if (!netif_device_present(dev))
2456 return -ENODEV;
2457 return dev_mc_add(dev, ifr->ifr_hwaddr.sa_data,
2458 dev->addr_len, 1);
2459
2460 case SIOCDELMULTI:
2461 if (!dev->set_multicast_list ||
2462 ifr->ifr_hwaddr.sa_family != AF_UNSPEC)
2463 return -EINVAL;
2464 if (!netif_device_present(dev))
2465 return -ENODEV;
2466 return dev_mc_delete(dev, ifr->ifr_hwaddr.sa_data,
2467 dev->addr_len, 1);
2468
2469 case SIOCGIFINDEX:
2470 ifr->ifr_ifindex = dev->ifindex;
2471 return 0;
2472
2473 case SIOCGIFTXQLEN:
2474 ifr->ifr_qlen = dev->tx_queue_len;
2475 return 0;
2476
2477 case SIOCSIFTXQLEN:
2478 if (ifr->ifr_qlen < 0)
2479 return -EINVAL;
2480 dev->tx_queue_len = ifr->ifr_qlen;
2481 return 0;
2482
2483 case SIOCSIFNAME:
2484 ifr->ifr_newname[IFNAMSIZ-1] = '\0';
2485 return dev_change_name(dev, ifr->ifr_newname);
2486
2487 /*
2488 * Unknown or private ioctl
2489 */
2490
2491 default:
2492 if ((cmd >= SIOCDEVPRIVATE &&
2493 cmd <= SIOCDEVPRIVATE + 15) ||
2494 cmd == SIOCBONDENSLAVE ||
2495 cmd == SIOCBONDRELEASE ||
2496 cmd == SIOCBONDSETHWADDR ||
2497 cmd == SIOCBONDSLAVEINFOQUERY ||
2498 cmd == SIOCBONDINFOQUERY ||
2499 cmd == SIOCBONDCHANGEACTIVE ||
2500 cmd == SIOCGMIIPHY ||
2501 cmd == SIOCGMIIREG ||
2502 cmd == SIOCSMIIREG ||
2503 cmd == SIOCBRADDIF ||
2504 cmd == SIOCBRDELIF ||
2505 cmd == SIOCWANDEV) {
2506 err = -EOPNOTSUPP;
2507 if (dev->do_ioctl) {
2508 if (netif_device_present(dev))
2509 err = dev->do_ioctl(dev, ifr,
2510 cmd);
2511 else
2512 err = -ENODEV;
2513 }
2514 } else
2515 err = -EINVAL;
2516
2517 }
2518 return err;
2519}
2520
2521/*
2522 * This function handles all "interface"-type I/O control requests. The actual
2523 * 'doing' part of this is dev_ifsioc above.
2524 */
2525
2526/**
2527 * dev_ioctl - network device ioctl
2528 * @cmd: command to issue
2529 * @arg: pointer to a struct ifreq in user space
2530 *
2531 * Issue ioctl functions to devices. This is normally called by the
2532 * user space syscall interfaces but can sometimes be useful for
2533 * other purposes. The return value is the return from the syscall if
2534 * positive or a negative errno code on error.
2535 */
2536
2537int dev_ioctl(unsigned int cmd, void __user *arg)
2538{
2539 struct ifreq ifr;
2540 int ret;
2541 char *colon;
2542
2543 /* One special case: SIOCGIFCONF takes ifconf argument
2544 and requires shared lock, because it sleeps writing
2545 to user space.
2546 */
2547
2548 if (cmd == SIOCGIFCONF) {
6756ae4b 2549 rtnl_lock();
1da177e4 2550 ret = dev_ifconf((char __user *) arg);
6756ae4b 2551 rtnl_unlock();
1da177e4
LT
2552 return ret;
2553 }
2554 if (cmd == SIOCGIFNAME)
2555 return dev_ifname((struct ifreq __user *)arg);
2556
2557 if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
2558 return -EFAULT;
2559
2560 ifr.ifr_name[IFNAMSIZ-1] = 0;
2561
2562 colon = strchr(ifr.ifr_name, ':');
2563 if (colon)
2564 *colon = 0;
2565
2566 /*
2567 * See which interface the caller is talking about.
2568 */
2569
2570 switch (cmd) {
2571 /*
2572 * These ioctl calls:
2573 * - can be done by all.
2574 * - atomic and do not require locking.
2575 * - return a value
2576 */
2577 case SIOCGIFFLAGS:
2578 case SIOCGIFMETRIC:
2579 case SIOCGIFMTU:
2580 case SIOCGIFHWADDR:
2581 case SIOCGIFSLAVE:
2582 case SIOCGIFMAP:
2583 case SIOCGIFINDEX:
2584 case SIOCGIFTXQLEN:
2585 dev_load(ifr.ifr_name);
2586 read_lock(&dev_base_lock);
2587 ret = dev_ifsioc(&ifr, cmd);
2588 read_unlock(&dev_base_lock);
2589 if (!ret) {
2590 if (colon)
2591 *colon = ':';
2592 if (copy_to_user(arg, &ifr,
2593 sizeof(struct ifreq)))
2594 ret = -EFAULT;
2595 }
2596 return ret;
2597
2598 case SIOCETHTOOL:
2599 dev_load(ifr.ifr_name);
2600 rtnl_lock();
2601 ret = dev_ethtool(&ifr);
2602 rtnl_unlock();
2603 if (!ret) {
2604 if (colon)
2605 *colon = ':';
2606 if (copy_to_user(arg, &ifr,
2607 sizeof(struct ifreq)))
2608 ret = -EFAULT;
2609 }
2610 return ret;
2611
2612 /*
2613 * These ioctl calls:
2614 * - require superuser power.
2615 * - require strict serialization.
2616 * - return a value
2617 */
2618 case SIOCGMIIPHY:
2619 case SIOCGMIIREG:
2620 case SIOCSIFNAME:
2621 if (!capable(CAP_NET_ADMIN))
2622 return -EPERM;
2623 dev_load(ifr.ifr_name);
2624 rtnl_lock();
2625 ret = dev_ifsioc(&ifr, cmd);
2626 rtnl_unlock();
2627 if (!ret) {
2628 if (colon)
2629 *colon = ':';
2630 if (copy_to_user(arg, &ifr,
2631 sizeof(struct ifreq)))
2632 ret = -EFAULT;
2633 }
2634 return ret;
2635
2636 /*
2637 * These ioctl calls:
2638 * - require superuser power.
2639 * - require strict serialization.
2640 * - do not return a value
2641 */
2642 case SIOCSIFFLAGS:
2643 case SIOCSIFMETRIC:
2644 case SIOCSIFMTU:
2645 case SIOCSIFMAP:
2646 case SIOCSIFHWADDR:
2647 case SIOCSIFSLAVE:
2648 case SIOCADDMULTI:
2649 case SIOCDELMULTI:
2650 case SIOCSIFHWBROADCAST:
2651 case SIOCSIFTXQLEN:
2652 case SIOCSMIIREG:
2653 case SIOCBONDENSLAVE:
2654 case SIOCBONDRELEASE:
2655 case SIOCBONDSETHWADDR:
1da177e4
LT
2656 case SIOCBONDCHANGEACTIVE:
2657 case SIOCBRADDIF:
2658 case SIOCBRDELIF:
2659 if (!capable(CAP_NET_ADMIN))
2660 return -EPERM;
cabcac0b
TG
2661 /* fall through */
2662 case SIOCBONDSLAVEINFOQUERY:
2663 case SIOCBONDINFOQUERY:
1da177e4
LT
2664 dev_load(ifr.ifr_name);
2665 rtnl_lock();
2666 ret = dev_ifsioc(&ifr, cmd);
2667 rtnl_unlock();
2668 return ret;
2669
2670 case SIOCGIFMEM:
2671 /* Get the per device memory space. We can add this but
2672 * currently do not support it */
2673 case SIOCSIFMEM:
2674 /* Set the per device memory buffer space.
2675 * Not applicable in our case */
2676 case SIOCSIFLINK:
2677 return -EINVAL;
2678
2679 /*
2680 * Unknown or private ioctl.
2681 */
2682 default:
2683 if (cmd == SIOCWANDEV ||
2684 (cmd >= SIOCDEVPRIVATE &&
2685 cmd <= SIOCDEVPRIVATE + 15)) {
2686 dev_load(ifr.ifr_name);
2687 rtnl_lock();
2688 ret = dev_ifsioc(&ifr, cmd);
2689 rtnl_unlock();
2690 if (!ret && copy_to_user(arg, &ifr,
2691 sizeof(struct ifreq)))
2692 ret = -EFAULT;
2693 return ret;
2694 }
d86b5e0e 2695#ifdef CONFIG_WIRELESS_EXT
1da177e4
LT
2696 /* Take care of Wireless Extensions */
2697 if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) {
2698 /* If command is `set a parameter', or
2699 * `get the encoding parameters', check if
2700 * the user has the right to do it */
a417016d
JT
2701 if (IW_IS_SET(cmd) || cmd == SIOCGIWENCODE
2702 || cmd == SIOCGIWENCODEEXT) {
1da177e4
LT
2703 if (!capable(CAP_NET_ADMIN))
2704 return -EPERM;
2705 }
2706 dev_load(ifr.ifr_name);
2707 rtnl_lock();
2708 /* Follow me in net/core/wireless.c */
2709 ret = wireless_process_ioctl(&ifr, cmd);
2710 rtnl_unlock();
2711 if (IW_IS_GET(cmd) &&
2712 copy_to_user(arg, &ifr,
2713 sizeof(struct ifreq)))
2714 ret = -EFAULT;
2715 return ret;
2716 }
d86b5e0e 2717#endif /* CONFIG_WIRELESS_EXT */
1da177e4
LT
2718 return -EINVAL;
2719 }
2720}
2721
2722
2723/**
2724 * dev_new_index - allocate an ifindex
2725 *
2726 * Returns a suitable unique value for a new device interface
2727 * number. The caller must hold the rtnl semaphore or the
2728 * dev_base_lock to be sure it remains unique.
2729 */
2730static int dev_new_index(void)
2731{
2732 static int ifindex;
2733 for (;;) {
2734 if (++ifindex <= 0)
2735 ifindex = 1;
2736 if (!__dev_get_by_index(ifindex))
2737 return ifindex;
2738 }
2739}
2740
2741static int dev_boot_phase = 1;
2742
2743/* Delayed registration/unregisteration */
2744static DEFINE_SPINLOCK(net_todo_list_lock);
2745static struct list_head net_todo_list = LIST_HEAD_INIT(net_todo_list);
2746
2747static inline void net_set_todo(struct net_device *dev)
2748{
2749 spin_lock(&net_todo_list_lock);
2750 list_add_tail(&dev->todo_list, &net_todo_list);
2751 spin_unlock(&net_todo_list_lock);
2752}
2753
2754/**
2755 * register_netdevice - register a network device
2756 * @dev: device to register
2757 *
2758 * Take a completed network device structure and add it to the kernel
2759 * interfaces. A %NETDEV_REGISTER message is sent to the netdev notifier
2760 * chain. 0 is returned on success. A negative errno code is returned
2761 * on a failure to set up the device, or if the name is a duplicate.
2762 *
2763 * Callers must hold the rtnl semaphore. You may want
2764 * register_netdev() instead of this.
2765 *
2766 * BUGS:
2767 * The locking appears insufficient to guarantee two parallel registers
2768 * will not get the same name.
2769 */
2770
2771int register_netdevice(struct net_device *dev)
2772{
2773 struct hlist_head *head;
2774 struct hlist_node *p;
2775 int ret;
2776
2777 BUG_ON(dev_boot_phase);
2778 ASSERT_RTNL();
2779
2780 /* When net_device's are persistent, this will be fatal. */
2781 BUG_ON(dev->reg_state != NETREG_UNINITIALIZED);
2782
2783 spin_lock_init(&dev->queue_lock);
2784 spin_lock_init(&dev->xmit_lock);
2785 dev->xmit_lock_owner = -1;
2786#ifdef CONFIG_NET_CLS_ACT
2787 spin_lock_init(&dev->ingress_lock);
2788#endif
2789
2790 ret = alloc_divert_blk(dev);
2791 if (ret)
2792 goto out;
2793
2794 dev->iflink = -1;
2795
2796 /* Init, if this function is available */
2797 if (dev->init) {
2798 ret = dev->init(dev);
2799 if (ret) {
2800 if (ret > 0)
2801 ret = -EIO;
2802 goto out_err;
2803 }
2804 }
2805
2806 if (!dev_valid_name(dev->name)) {
2807 ret = -EINVAL;
2808 goto out_err;
2809 }
2810
2811 dev->ifindex = dev_new_index();
2812 if (dev->iflink == -1)
2813 dev->iflink = dev->ifindex;
2814
2815 /* Check for existence of name */
2816 head = dev_name_hash(dev->name);
2817 hlist_for_each(p, head) {
2818 struct net_device *d
2819 = hlist_entry(p, struct net_device, name_hlist);
2820 if (!strncmp(d->name, dev->name, IFNAMSIZ)) {
2821 ret = -EEXIST;
2822 goto out_err;
2823 }
2824 }
2825
2826 /* Fix illegal SG+CSUM combinations. */
2827 if ((dev->features & NETIF_F_SG) &&
2828 !(dev->features & (NETIF_F_IP_CSUM |
2829 NETIF_F_NO_CSUM |
2830 NETIF_F_HW_CSUM))) {
2831 printk("%s: Dropping NETIF_F_SG since no checksum feature.\n",
2832 dev->name);
2833 dev->features &= ~NETIF_F_SG;
2834 }
2835
2836 /* TSO requires that SG is present as well. */
2837 if ((dev->features & NETIF_F_TSO) &&
2838 !(dev->features & NETIF_F_SG)) {
2839 printk("%s: Dropping NETIF_F_TSO since no SG feature.\n",
2840 dev->name);
2841 dev->features &= ~NETIF_F_TSO;
2842 }
e89e9cf5
AR
2843 if (dev->features & NETIF_F_UFO) {
2844 if (!(dev->features & NETIF_F_HW_CSUM)) {
2845 printk(KERN_ERR "%s: Dropping NETIF_F_UFO since no "
2846 "NETIF_F_HW_CSUM feature.\n",
2847 dev->name);
2848 dev->features &= ~NETIF_F_UFO;
2849 }
2850 if (!(dev->features & NETIF_F_SG)) {
2851 printk(KERN_ERR "%s: Dropping NETIF_F_UFO since no "
2852 "NETIF_F_SG feature.\n",
2853 dev->name);
2854 dev->features &= ~NETIF_F_UFO;
2855 }
2856 }
1da177e4
LT
2857
2858 /*
2859 * nil rebuild_header routine,
2860 * that should be never called and used as just bug trap.
2861 */
2862
2863 if (!dev->rebuild_header)
2864 dev->rebuild_header = default_rebuild_header;
2865
2866 /*
2867 * Default initial state at registry is that the
2868 * device is present.
2869 */
2870
2871 set_bit(__LINK_STATE_PRESENT, &dev->state);
2872
2873 dev->next = NULL;
2874 dev_init_scheduler(dev);
2875 write_lock_bh(&dev_base_lock);
2876 *dev_tail = dev;
2877 dev_tail = &dev->next;
2878 hlist_add_head(&dev->name_hlist, head);
2879 hlist_add_head(&dev->index_hlist, dev_index_hash(dev->ifindex));
2880 dev_hold(dev);
2881 dev->reg_state = NETREG_REGISTERING;
2882 write_unlock_bh(&dev_base_lock);
2883
2884 /* Notify protocols, that a new device appeared. */
e041c683 2885 blocking_notifier_call_chain(&netdev_chain, NETDEV_REGISTER, dev);
1da177e4
LT
2886
2887 /* Finish registration after unlock */
2888 net_set_todo(dev);
2889 ret = 0;
2890
2891out:
2892 return ret;
2893out_err:
2894 free_divert_blk(dev);
2895 goto out;
2896}
2897
2898/**
2899 * register_netdev - register a network device
2900 * @dev: device to register
2901 *
2902 * Take a completed network device structure and add it to the kernel
2903 * interfaces. A %NETDEV_REGISTER message is sent to the netdev notifier
2904 * chain. 0 is returned on success. A negative errno code is returned
2905 * on a failure to set up the device, or if the name is a duplicate.
2906 *
2907 * This is a wrapper around register_netdev that takes the rtnl semaphore
2908 * and expands the device name if you passed a format string to
2909 * alloc_netdev.
2910 */
2911int register_netdev(struct net_device *dev)
2912{
2913 int err;
2914
2915 rtnl_lock();
2916
2917 /*
2918 * If the name is a format string the caller wants us to do a
2919 * name allocation.
2920 */
2921 if (strchr(dev->name, '%')) {
2922 err = dev_alloc_name(dev, dev->name);
2923 if (err < 0)
2924 goto out;
2925 }
2926
2927 /*
2928 * Back compatibility hook. Kill this one in 2.5
2929 */
2930 if (dev->name[0] == 0 || dev->name[0] == ' ') {
2931 err = dev_alloc_name(dev, "eth%d");
2932 if (err < 0)
2933 goto out;
2934 }
2935
2936 err = register_netdevice(dev);
2937out:
2938 rtnl_unlock();
2939 return err;
2940}
2941EXPORT_SYMBOL(register_netdev);
2942
2943/*
2944 * netdev_wait_allrefs - wait until all references are gone.
2945 *
2946 * This is called when unregistering network devices.
2947 *
2948 * Any protocol or device that holds a reference should register
2949 * for netdevice notification, and cleanup and put back the
2950 * reference if they receive an UNREGISTER event.
2951 * We can get stuck here if buggy protocols don't correctly
2952 * call dev_put.
2953 */
2954static void netdev_wait_allrefs(struct net_device *dev)
2955{
2956 unsigned long rebroadcast_time, warning_time;
2957
2958 rebroadcast_time = warning_time = jiffies;
2959 while (atomic_read(&dev->refcnt) != 0) {
2960 if (time_after(jiffies, rebroadcast_time + 1 * HZ)) {
6756ae4b 2961 rtnl_lock();
1da177e4
LT
2962
2963 /* Rebroadcast unregister notification */
e041c683 2964 blocking_notifier_call_chain(&netdev_chain,
1da177e4
LT
2965 NETDEV_UNREGISTER, dev);
2966
2967 if (test_bit(__LINK_STATE_LINKWATCH_PENDING,
2968 &dev->state)) {
2969 /* We must not have linkwatch events
2970 * pending on unregister. If this
2971 * happens, we simply run the queue
2972 * unscheduled, resulting in a noop
2973 * for this device.
2974 */
2975 linkwatch_run_queue();
2976 }
2977
6756ae4b 2978 __rtnl_unlock();
1da177e4
LT
2979
2980 rebroadcast_time = jiffies;
2981 }
2982
2983 msleep(250);
2984
2985 if (time_after(jiffies, warning_time + 10 * HZ)) {
2986 printk(KERN_EMERG "unregister_netdevice: "
2987 "waiting for %s to become free. Usage "
2988 "count = %d\n",
2989 dev->name, atomic_read(&dev->refcnt));
2990 warning_time = jiffies;
2991 }
2992 }
2993}
2994
2995/* The sequence is:
2996 *
2997 * rtnl_lock();
2998 * ...
2999 * register_netdevice(x1);
3000 * register_netdevice(x2);
3001 * ...
3002 * unregister_netdevice(y1);
3003 * unregister_netdevice(y2);
3004 * ...
3005 * rtnl_unlock();
3006 * free_netdev(y1);
3007 * free_netdev(y2);
3008 *
3009 * We are invoked by rtnl_unlock() after it drops the semaphore.
3010 * This allows us to deal with problems:
3011 * 1) We can create/delete sysfs objects which invoke hotplug
3012 * without deadlocking with linkwatch via keventd.
3013 * 2) Since we run with the RTNL semaphore not held, we can sleep
3014 * safely in order to wait for the netdev refcnt to drop to zero.
3015 */
4a3e2f71 3016static DEFINE_MUTEX(net_todo_run_mutex);
1da177e4
LT
3017void netdev_run_todo(void)
3018{
3019 struct list_head list = LIST_HEAD_INIT(list);
3020 int err;
3021
3022
3023 /* Need to guard against multiple cpu's getting out of order. */
4a3e2f71 3024 mutex_lock(&net_todo_run_mutex);
1da177e4
LT
3025
3026 /* Not safe to do outside the semaphore. We must not return
3027 * until all unregister events invoked by the local processor
3028 * have been completed (either by this todo run, or one on
3029 * another cpu).
3030 */
3031 if (list_empty(&net_todo_list))
3032 goto out;
3033
3034 /* Snapshot list, allow later requests */
3035 spin_lock(&net_todo_list_lock);
3036 list_splice_init(&net_todo_list, &list);
3037 spin_unlock(&net_todo_list_lock);
3038
3039 while (!list_empty(&list)) {
3040 struct net_device *dev
3041 = list_entry(list.next, struct net_device, todo_list);
3042 list_del(&dev->todo_list);
3043
3044 switch(dev->reg_state) {
3045 case NETREG_REGISTERING:
3046 err = netdev_register_sysfs(dev);
3047 if (err)
3048 printk(KERN_ERR "%s: failed sysfs registration (%d)\n",
3049 dev->name, err);
fe9925b5 3050 dev->reg_state = NETREG_REGISTERED;
1da177e4
LT
3051 break;
3052
3053 case NETREG_UNREGISTERING:
3054 netdev_unregister_sysfs(dev);
3055 dev->reg_state = NETREG_UNREGISTERED;
3056
3057 netdev_wait_allrefs(dev);
3058
3059 /* paranoia */
3060 BUG_ON(atomic_read(&dev->refcnt));
3061 BUG_TRAP(!dev->ip_ptr);
3062 BUG_TRAP(!dev->ip6_ptr);
3063 BUG_TRAP(!dev->dn_ptr);
3064
3065
3066 /* It must be the very last action,
3067 * after this 'dev' may point to freed up memory.
3068 */
3069 if (dev->destructor)
3070 dev->destructor(dev);
3071 break;
3072
3073 default:
3074 printk(KERN_ERR "network todo '%s' but state %d\n",
3075 dev->name, dev->reg_state);
3076 break;
3077 }
3078 }
3079
3080out:
4a3e2f71 3081 mutex_unlock(&net_todo_run_mutex);
1da177e4
LT
3082}
3083
3084/**
3085 * alloc_netdev - allocate network device
3086 * @sizeof_priv: size of private data to allocate space for
3087 * @name: device name format string
3088 * @setup: callback to initialize device
3089 *
3090 * Allocates a struct net_device with private data area for driver use
3091 * and performs basic initialization.
3092 */
3093struct net_device *alloc_netdev(int sizeof_priv, const char *name,
3094 void (*setup)(struct net_device *))
3095{
3096 void *p;
3097 struct net_device *dev;
3098 int alloc_size;
3099
3100 /* ensure 32-byte alignment of both the device and private area */
3101 alloc_size = (sizeof(*dev) + NETDEV_ALIGN_CONST) & ~NETDEV_ALIGN_CONST;
3102 alloc_size += sizeof_priv + NETDEV_ALIGN_CONST;
3103
31380de9 3104 p = kzalloc(alloc_size, GFP_KERNEL);
1da177e4
LT
3105 if (!p) {
3106 printk(KERN_ERR "alloc_dev: Unable to allocate device.\n");
3107 return NULL;
3108 }
1da177e4
LT
3109
3110 dev = (struct net_device *)
3111 (((long)p + NETDEV_ALIGN_CONST) & ~NETDEV_ALIGN_CONST);
3112 dev->padded = (char *)dev - (char *)p;
3113
3114 if (sizeof_priv)
3115 dev->priv = netdev_priv(dev);
3116
3117 setup(dev);
3118 strcpy(dev->name, name);
3119 return dev;
3120}
3121EXPORT_SYMBOL(alloc_netdev);
3122
3123/**
3124 * free_netdev - free network device
3125 * @dev: device
3126 *
3127 * This function does the last stage of destroying an allocated device
3128 * interface. The reference to the device object is released.
3129 * If this is the last reference then it will be freed.
3130 */
3131void free_netdev(struct net_device *dev)
3132{
3133#ifdef CONFIG_SYSFS
3134 /* Compatiablity with error handling in drivers */
3135 if (dev->reg_state == NETREG_UNINITIALIZED) {
3136 kfree((char *)dev - dev->padded);
3137 return;
3138 }
3139
3140 BUG_ON(dev->reg_state != NETREG_UNREGISTERED);
3141 dev->reg_state = NETREG_RELEASED;
3142
3143 /* will free via class release */
3144 class_device_put(&dev->class_dev);
3145#else
3146 kfree((char *)dev - dev->padded);
3147#endif
3148}
3149
3150/* Synchronize with packet receive processing. */
3151void synchronize_net(void)
3152{
3153 might_sleep();
fbd568a3 3154 synchronize_rcu();
1da177e4
LT
3155}
3156
3157/**
3158 * unregister_netdevice - remove device from the kernel
3159 * @dev: device
3160 *
3161 * This function shuts down a device interface and removes it
3162 * from the kernel tables. On success 0 is returned, on a failure
3163 * a negative errno code is returned.
3164 *
3165 * Callers must hold the rtnl semaphore. You may want
3166 * unregister_netdev() instead of this.
3167 */
3168
3169int unregister_netdevice(struct net_device *dev)
3170{
3171 struct net_device *d, **dp;
3172
3173 BUG_ON(dev_boot_phase);
3174 ASSERT_RTNL();
3175
3176 /* Some devices call without registering for initialization unwind. */
3177 if (dev->reg_state == NETREG_UNINITIALIZED) {
3178 printk(KERN_DEBUG "unregister_netdevice: device %s/%p never "
3179 "was registered\n", dev->name, dev);
3180 return -ENODEV;
3181 }
3182
3183 BUG_ON(dev->reg_state != NETREG_REGISTERED);
3184
3185 /* If device is running, close it first. */
3186 if (dev->flags & IFF_UP)
3187 dev_close(dev);
3188
3189 /* And unlink it from device chain. */
3190 for (dp = &dev_base; (d = *dp) != NULL; dp = &d->next) {
3191 if (d == dev) {
3192 write_lock_bh(&dev_base_lock);
3193 hlist_del(&dev->name_hlist);
3194 hlist_del(&dev->index_hlist);
3195 if (dev_tail == &dev->next)
3196 dev_tail = dp;
3197 *dp = d->next;
3198 write_unlock_bh(&dev_base_lock);
3199 break;
3200 }
3201 }
3202 if (!d) {
3203 printk(KERN_ERR "unregister net_device: '%s' not found\n",
3204 dev->name);
3205 return -ENODEV;
3206 }
3207
3208 dev->reg_state = NETREG_UNREGISTERING;
3209
3210 synchronize_net();
3211
3212 /* Shutdown queueing discipline. */
3213 dev_shutdown(dev);
3214
3215
3216 /* Notify protocols, that we are about to destroy
3217 this device. They should clean all the things.
3218 */
e041c683 3219 blocking_notifier_call_chain(&netdev_chain, NETDEV_UNREGISTER, dev);
1da177e4
LT
3220
3221 /*
3222 * Flush the multicast chain
3223 */
3224 dev_mc_discard(dev);
3225
3226 if (dev->uninit)
3227 dev->uninit(dev);
3228
3229 /* Notifier chain MUST detach us from master device. */
3230 BUG_TRAP(!dev->master);
3231
3232 free_divert_blk(dev);
3233
3234 /* Finish processing unregister after unlock */
3235 net_set_todo(dev);
3236
3237 synchronize_net();
3238
3239 dev_put(dev);
3240 return 0;
3241}
3242
3243/**
3244 * unregister_netdev - remove device from the kernel
3245 * @dev: device
3246 *
3247 * This function shuts down a device interface and removes it
3248 * from the kernel tables. On success 0 is returned, on a failure
3249 * a negative errno code is returned.
3250 *
3251 * This is just a wrapper for unregister_netdevice that takes
3252 * the rtnl semaphore. In general you want to use this and not
3253 * unregister_netdevice.
3254 */
3255void unregister_netdev(struct net_device *dev)
3256{
3257 rtnl_lock();
3258 unregister_netdevice(dev);
3259 rtnl_unlock();
3260}
3261
3262EXPORT_SYMBOL(unregister_netdev);
3263
3264#ifdef CONFIG_HOTPLUG_CPU
3265static int dev_cpu_callback(struct notifier_block *nfb,
3266 unsigned long action,
3267 void *ocpu)
3268{
3269 struct sk_buff **list_skb;
3270 struct net_device **list_net;
3271 struct sk_buff *skb;
3272 unsigned int cpu, oldcpu = (unsigned long)ocpu;
3273 struct softnet_data *sd, *oldsd;
3274
3275 if (action != CPU_DEAD)
3276 return NOTIFY_OK;
3277
3278 local_irq_disable();
3279 cpu = smp_processor_id();
3280 sd = &per_cpu(softnet_data, cpu);
3281 oldsd = &per_cpu(softnet_data, oldcpu);
3282
3283 /* Find end of our completion_queue. */
3284 list_skb = &sd->completion_queue;
3285 while (*list_skb)
3286 list_skb = &(*list_skb)->next;
3287 /* Append completion queue from offline CPU. */
3288 *list_skb = oldsd->completion_queue;
3289 oldsd->completion_queue = NULL;
3290
3291 /* Find end of our output_queue. */
3292 list_net = &sd->output_queue;
3293 while (*list_net)
3294 list_net = &(*list_net)->next_sched;
3295 /* Append output queue from offline CPU. */
3296 *list_net = oldsd->output_queue;
3297 oldsd->output_queue = NULL;
3298
3299 raise_softirq_irqoff(NET_TX_SOFTIRQ);
3300 local_irq_enable();
3301
3302 /* Process offline CPU's input_pkt_queue */
3303 while ((skb = __skb_dequeue(&oldsd->input_pkt_queue)))
3304 netif_rx(skb);
3305
3306 return NOTIFY_OK;
3307}
3308#endif /* CONFIG_HOTPLUG_CPU */
3309
3310
3311/*
3312 * Initialize the DEV module. At boot time this walks the device list and
3313 * unhooks any devices that fail to initialise (normally hardware not
3314 * present) and leaves us with a valid list of present and active devices.
3315 *
3316 */
3317
3318/*
3319 * This is called single threaded during boot, so no need
3320 * to take the rtnl semaphore.
3321 */
3322static int __init net_dev_init(void)
3323{
3324 int i, rc = -ENOMEM;
3325
3326 BUG_ON(!dev_boot_phase);
3327
3328 net_random_init();
3329
3330 if (dev_proc_init())
3331 goto out;
3332
3333 if (netdev_sysfs_init())
3334 goto out;
3335
3336 INIT_LIST_HEAD(&ptype_all);
3337 for (i = 0; i < 16; i++)
3338 INIT_LIST_HEAD(&ptype_base[i]);
3339
3340 for (i = 0; i < ARRAY_SIZE(dev_name_head); i++)
3341 INIT_HLIST_HEAD(&dev_name_head[i]);
3342
3343 for (i = 0; i < ARRAY_SIZE(dev_index_head); i++)
3344 INIT_HLIST_HEAD(&dev_index_head[i]);
3345
3346 /*
3347 * Initialise the packet receive queues.
3348 */
3349
6f912042 3350 for_each_possible_cpu(i) {
1da177e4
LT
3351 struct softnet_data *queue;
3352
3353 queue = &per_cpu(softnet_data, i);
3354 skb_queue_head_init(&queue->input_pkt_queue);
1da177e4
LT
3355 queue->completion_queue = NULL;
3356 INIT_LIST_HEAD(&queue->poll_list);
3357 set_bit(__LINK_STATE_START, &queue->backlog_dev.state);
3358 queue->backlog_dev.weight = weight_p;
3359 queue->backlog_dev.poll = process_backlog;
3360 atomic_set(&queue->backlog_dev.refcnt, 1);
3361 }
3362
1da177e4
LT
3363 dev_boot_phase = 0;
3364
3365 open_softirq(NET_TX_SOFTIRQ, net_tx_action, NULL);
3366 open_softirq(NET_RX_SOFTIRQ, net_rx_action, NULL);
3367
3368 hotcpu_notifier(dev_cpu_callback, 0);
3369 dst_init();
3370 dev_mcast_init();
3371 rc = 0;
3372out:
3373 return rc;
3374}
3375
3376subsys_initcall(net_dev_init);
3377
3378EXPORT_SYMBOL(__dev_get_by_index);
3379EXPORT_SYMBOL(__dev_get_by_name);
3380EXPORT_SYMBOL(__dev_remove_pack);
3381EXPORT_SYMBOL(__skb_linearize);
c2373ee9 3382EXPORT_SYMBOL(dev_valid_name);
1da177e4
LT
3383EXPORT_SYMBOL(dev_add_pack);
3384EXPORT_SYMBOL(dev_alloc_name);
3385EXPORT_SYMBOL(dev_close);
3386EXPORT_SYMBOL(dev_get_by_flags);
3387EXPORT_SYMBOL(dev_get_by_index);
3388EXPORT_SYMBOL(dev_get_by_name);
1da177e4
LT
3389EXPORT_SYMBOL(dev_open);
3390EXPORT_SYMBOL(dev_queue_xmit);
3391EXPORT_SYMBOL(dev_remove_pack);
3392EXPORT_SYMBOL(dev_set_allmulti);
3393EXPORT_SYMBOL(dev_set_promiscuity);
3394EXPORT_SYMBOL(dev_change_flags);
3395EXPORT_SYMBOL(dev_set_mtu);
3396EXPORT_SYMBOL(dev_set_mac_address);
3397EXPORT_SYMBOL(free_netdev);
3398EXPORT_SYMBOL(netdev_boot_setup_check);
3399EXPORT_SYMBOL(netdev_set_master);
3400EXPORT_SYMBOL(netdev_state_change);
3401EXPORT_SYMBOL(netif_receive_skb);
3402EXPORT_SYMBOL(netif_rx);
3403EXPORT_SYMBOL(register_gifconf);
3404EXPORT_SYMBOL(register_netdevice);
3405EXPORT_SYMBOL(register_netdevice_notifier);
3406EXPORT_SYMBOL(skb_checksum_help);
3407EXPORT_SYMBOL(synchronize_net);
3408EXPORT_SYMBOL(unregister_netdevice);
3409EXPORT_SYMBOL(unregister_netdevice_notifier);
3410EXPORT_SYMBOL(net_enable_timestamp);
3411EXPORT_SYMBOL(net_disable_timestamp);
3412EXPORT_SYMBOL(dev_get_flags);
3413
3414#if defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE)
3415EXPORT_SYMBOL(br_handle_frame_hook);
3416EXPORT_SYMBOL(br_fdb_get_hook);
3417EXPORT_SYMBOL(br_fdb_put_hook);
3418#endif
3419
3420#ifdef CONFIG_KMOD
3421EXPORT_SYMBOL(dev_load);
3422#endif
3423
3424EXPORT_PER_CPU_SYMBOL(softnet_data);