]> bbs.cooldavid.org Git - net-next-2.6.git/blobdiff - net/core/dev.c
[NET]: dev_mcast: switch to generic net_device address lists
[net-next-2.6.git] / net / core / dev.c
index eb999003bbb7646634acac8c69eeb0c5663cca8d..18759ccdf219b998d0c5ca95c6edf9e62004ebc2 100644 (file)
 #include <linux/dmaengine.h>
 #include <linux/err.h>
 #include <linux/ctype.h>
+#include <linux/if_arp.h>
 
 /*
  *     The list of packet types we will receive (as opposed to discard)
@@ -156,13 +157,13 @@ static spinlock_t net_dma_event_lock;
 #endif
 
 /*
- * The @dev_base list is protected by @dev_base_lock and the rtnl
+ * The @dev_base_head list is protected by @dev_base_lock and the rtnl
  * semaphore.
  *
  * Pure readers hold dev_base_lock for reading.
  *
  * Writers must hold the rtnl semaphore while they loop through the
- * dev_base list, and hold dev_base_lock for writing when they do the
+ * dev_base_head list, and hold dev_base_lock for writing when they do the
  * actual updates.  This allows pure readers to access the list even
  * while a writer is preparing to update it.
  *
@@ -174,11 +175,10 @@ static spinlock_t net_dma_event_lock;
  * unregister_netdevice(), which must be called with the rtnl
  * semaphore held.
  */
-struct net_device *dev_base;
-static struct net_device **dev_tail = &dev_base;
+LIST_HEAD(dev_base_head);
 DEFINE_RWLOCK(dev_base_lock);
 
-EXPORT_SYMBOL(dev_base);
+EXPORT_SYMBOL(dev_base_head);
 EXPORT_SYMBOL(dev_base_lock);
 
 #define NETDEV_HASHBITS        8
@@ -218,6 +218,73 @@ extern void netdev_unregister_sysfs(struct net_device *);
 #define        netdev_unregister_sysfs(dev)    do { } while(0)
 #endif
 
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+/*
+ * register_netdevice() inits dev->_xmit_lock and sets lockdep class
+ * according to dev->type
+ */
+static const unsigned short netdev_lock_type[] =
+       {ARPHRD_NETROM, ARPHRD_ETHER, ARPHRD_EETHER, ARPHRD_AX25,
+        ARPHRD_PRONET, ARPHRD_CHAOS, ARPHRD_IEEE802, ARPHRD_ARCNET,
+        ARPHRD_APPLETLK, ARPHRD_DLCI, ARPHRD_ATM, ARPHRD_METRICOM,
+        ARPHRD_IEEE1394, ARPHRD_EUI64, ARPHRD_INFINIBAND, ARPHRD_SLIP,
+        ARPHRD_CSLIP, ARPHRD_SLIP6, ARPHRD_CSLIP6, ARPHRD_RSRVD,
+        ARPHRD_ADAPT, ARPHRD_ROSE, ARPHRD_X25, ARPHRD_HWX25,
+        ARPHRD_PPP, ARPHRD_CISCO, ARPHRD_LAPB, ARPHRD_DDCMP,
+        ARPHRD_RAWHDLC, ARPHRD_TUNNEL, ARPHRD_TUNNEL6, ARPHRD_FRAD,
+        ARPHRD_SKIP, ARPHRD_LOOPBACK, ARPHRD_LOCALTLK, ARPHRD_FDDI,
+        ARPHRD_BIF, ARPHRD_SIT, ARPHRD_IPDDP, ARPHRD_IPGRE,
+        ARPHRD_PIMREG, ARPHRD_HIPPI, ARPHRD_ASH, ARPHRD_ECONET,
+        ARPHRD_IRDA, ARPHRD_FCPP, ARPHRD_FCAL, ARPHRD_FCPL,
+        ARPHRD_FCFABRIC, ARPHRD_IEEE802_TR, ARPHRD_IEEE80211,
+        ARPHRD_IEEE80211_PRISM, ARPHRD_IEEE80211_RADIOTAP, ARPHRD_VOID,
+        ARPHRD_NONE};
+
+static const char *netdev_lock_name[] =
+       {"_xmit_NETROM", "_xmit_ETHER", "_xmit_EETHER", "_xmit_AX25",
+        "_xmit_PRONET", "_xmit_CHAOS", "_xmit_IEEE802", "_xmit_ARCNET",
+        "_xmit_APPLETLK", "_xmit_DLCI", "_xmit_ATM", "_xmit_METRICOM",
+        "_xmit_IEEE1394", "_xmit_EUI64", "_xmit_INFINIBAND", "_xmit_SLIP",
+        "_xmit_CSLIP", "_xmit_SLIP6", "_xmit_CSLIP6", "_xmit_RSRVD",
+        "_xmit_ADAPT", "_xmit_ROSE", "_xmit_X25", "_xmit_HWX25",
+        "_xmit_PPP", "_xmit_CISCO", "_xmit_LAPB", "_xmit_DDCMP",
+        "_xmit_RAWHDLC", "_xmit_TUNNEL", "_xmit_TUNNEL6", "_xmit_FRAD",
+        "_xmit_SKIP", "_xmit_LOOPBACK", "_xmit_LOCALTLK", "_xmit_FDDI",
+        "_xmit_BIF", "_xmit_SIT", "_xmit_IPDDP", "_xmit_IPGRE",
+        "_xmit_PIMREG", "_xmit_HIPPI", "_xmit_ASH", "_xmit_ECONET",
+        "_xmit_IRDA", "_xmit_FCPP", "_xmit_FCAL", "_xmit_FCPL",
+        "_xmit_FCFABRIC", "_xmit_IEEE802_TR", "_xmit_IEEE80211",
+        "_xmit_IEEE80211_PRISM", "_xmit_IEEE80211_RADIOTAP", "_xmit_VOID",
+        "_xmit_NONE"};
+
+static struct lock_class_key netdev_xmit_lock_key[ARRAY_SIZE(netdev_lock_type)];
+
+static inline unsigned short netdev_lock_pos(unsigned short dev_type)
+{
+       int i;
+
+       for (i = 0; i < ARRAY_SIZE(netdev_lock_type); i++)
+               if (netdev_lock_type[i] == dev_type)
+                       return i;
+       /* the last key is used by default */
+       return ARRAY_SIZE(netdev_lock_type) - 1;
+}
+
+static inline void netdev_set_lockdep_class(spinlock_t *lock,
+                                           unsigned short dev_type)
+{
+       int i;
+
+       i = netdev_lock_pos(dev_type);
+       lockdep_set_class_and_name(lock, &netdev_xmit_lock_key[i],
+                                  netdev_lock_name[i]);
+}
+#else
+static inline void netdev_set_lockdep_class(spinlock_t *lock,
+                                           unsigned short dev_type)
+{
+}
+#endif
 
 /*******************************************************************************
 
@@ -567,26 +634,38 @@ struct net_device *dev_getbyhwaddr(unsigned short type, char *ha)
 
        ASSERT_RTNL();
 
-       for (dev = dev_base; dev; dev = dev->next)
+       for_each_netdev(dev)
                if (dev->type == type &&
                    !memcmp(dev->dev_addr, ha, dev->addr_len))
-                       break;
-       return dev;
+                       return dev;
+
+       return NULL;
 }
 
 EXPORT_SYMBOL(dev_getbyhwaddr);
 
+struct net_device *__dev_getfirstbyhwtype(unsigned short type)
+{
+       struct net_device *dev;
+
+       ASSERT_RTNL();
+       for_each_netdev(dev)
+               if (dev->type == type)
+                       return dev;
+
+       return NULL;
+}
+
+EXPORT_SYMBOL(__dev_getfirstbyhwtype);
+
 struct net_device *dev_getfirstbyhwtype(unsigned short type)
 {
        struct net_device *dev;
 
        rtnl_lock();
-       for (dev = dev_base; dev; dev = dev->next) {
-               if (dev->type == type) {
-                       dev_hold(dev);
-                       break;
-               }
-       }
+       dev = __dev_getfirstbyhwtype(type);
+       if (dev)
+               dev_hold(dev);
        rtnl_unlock();
        return dev;
 }
@@ -606,17 +685,19 @@ EXPORT_SYMBOL(dev_getfirstbyhwtype);
 
 struct net_device * dev_get_by_flags(unsigned short if_flags, unsigned short mask)
 {
-       struct net_device *dev;
+       struct net_device *dev, *ret;
 
+       ret = NULL;
        read_lock(&dev_base_lock);
-       for (dev = dev_base; dev != NULL; dev = dev->next) {
+       for_each_netdev(dev) {
                if (((dev->flags ^ if_flags) & mask) == 0) {
                        dev_hold(dev);
+                       ret = dev;
                        break;
                }
        }
        read_unlock(&dev_base_lock);
-       return dev;
+       return ret;
 }
 
 /**
@@ -682,7 +763,7 @@ int dev_alloc_name(struct net_device *dev, const char *name)
                if (!inuse)
                        return -ENOMEM;
 
-               for (d = dev_base; d; d = d->next) {
+               for_each_netdev(d) {
                        if (!sscanf(d->name, name, &i))
                                continue;
                        if (i < 0 || i >= max_netdevices)
@@ -964,7 +1045,7 @@ int register_netdevice_notifier(struct notifier_block *nb)
        rtnl_lock();
        err = raw_notifier_chain_register(&netdev_chain, nb);
        if (!err) {
-               for (dev = dev_base; dev; dev = dev->next) {
+               for_each_netdev(dev) {
                        nb->notifier_call(nb, NETDEV_REGISTER, dev);
 
                        if (dev->flags & IFF_UP)
@@ -1428,9 +1509,11 @@ int dev_queue_xmit(struct sk_buff *skb)
                skb_set_transport_header(skb, skb->csum_start -
                                              skb_headroom(skb));
 
-               if (!(dev->features & NETIF_F_GEN_CSUM) &&
-                   (!(dev->features & NETIF_F_IP_CSUM) ||
-                    skb->protocol != htons(ETH_P_IP)))
+               if (!(dev->features & NETIF_F_GEN_CSUM)
+                   || ((dev->features & NETIF_F_IP_CSUM)
+                       && skb->protocol == htons(ETH_P_IP))
+                   || ((dev->features & NETIF_F_IPV6_CSUM)
+                       && skb->protocol == htons(ETH_P_IPV6)))
                        if (skb_checksum_help(skb))
                                goto out_kfree_skb;
        }
@@ -1928,6 +2011,7 @@ static void net_rx_action(struct softirq_action *h)
                }
        }
 out:
+       local_irq_enable();
 #ifdef CONFIG_NET_DMA
        /*
         * There may not be any more sk_buffs coming right now, so push
@@ -1941,7 +2025,6 @@ out:
                rcu_read_unlock();
        }
 #endif
-       local_irq_enable();
        return;
 
 softnet_break:
@@ -2038,7 +2121,7 @@ static int dev_ifconf(char __user *arg)
         */
 
        total = 0;
-       for (dev = dev_base; dev; dev = dev->next) {
+       for_each_netdev(dev) {
                for (i = 0; i < NPROTO; i++) {
                        if (gifconf_list[i]) {
                                int done;
@@ -2070,26 +2153,28 @@ static int dev_ifconf(char __user *arg)
  *     This is invoked by the /proc filesystem handler to display a device
  *     in detail.
  */
-static struct net_device *dev_get_idx(loff_t pos)
+void *dev_seq_start(struct seq_file *seq, loff_t *pos)
 {
+       loff_t off;
        struct net_device *dev;
-       loff_t i;
 
-       for (i = 0, dev = dev_base; dev && i < pos; ++i, dev = dev->next);
+       read_lock(&dev_base_lock);
+       if (!*pos)
+               return SEQ_START_TOKEN;
 
-       return i == pos ? dev : NULL;
-}
+       off = 1;
+       for_each_netdev(dev)
+               if (off++ == *pos)
+                       return dev;
 
-void *dev_seq_start(struct seq_file *seq, loff_t *pos)
-{
-       read_lock(&dev_base_lock);
-       return *pos ? dev_get_idx(*pos - 1) : SEQ_START_TOKEN;
+       return NULL;
 }
 
 void *dev_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 {
        ++*pos;
-       return v == SEQ_START_TOKEN ? dev_base : ((struct net_device *)v)->next;
+       return v == SEQ_START_TOKEN ?
+               first_net_device() : next_net_device((struct net_device *)v);
 }
 
 void dev_seq_stop(struct seq_file *seq, void *v)
@@ -2362,9 +2447,9 @@ static int __init dev_proc_init(void)
 out:
        return rc;
 out_softnet:
-       proc_net_remove("softnet_stat");
-out_dev2:
        proc_net_remove("ptype");
+out_dev2:
+       proc_net_remove("softnet_stat");
 out_dev:
        proc_net_remove("dev");
        goto out;
@@ -2468,6 +2553,75 @@ void dev_set_allmulti(struct net_device *dev, int inc)
                dev_mc_upload(dev);
 }
 
+int __dev_addr_delete(struct dev_addr_list **list, void *addr, int alen,
+                     int glbl)
+{
+       struct dev_addr_list *da;
+
+       for (; (da = *list) != NULL; list = &da->next) {
+               if (memcmp(da->da_addr, addr, da->da_addrlen) == 0 &&
+                   alen == da->da_addrlen) {
+                       if (glbl) {
+                               int old_glbl = da->da_gusers;
+                               da->da_gusers = 0;
+                               if (old_glbl == 0)
+                                       break;
+                       }
+                       if (--da->da_users)
+                               return 0;
+
+                       *list = da->next;
+                       kfree(da);
+                       return 0;
+               }
+       }
+       return -ENOENT;
+}
+
+int __dev_addr_add(struct dev_addr_list **list, void *addr, int alen, int glbl)
+{
+       struct dev_addr_list *da;
+
+       for (da = *list; da != NULL; da = da->next) {
+               if (memcmp(da->da_addr, addr, da->da_addrlen) == 0 &&
+                   da->da_addrlen == alen) {
+                       if (glbl) {
+                               int old_glbl = da->da_gusers;
+                               da->da_gusers = 1;
+                               if (old_glbl)
+                                       return 0;
+                       }
+                       da->da_users++;
+                       return 0;
+               }
+       }
+
+       da = kmalloc(sizeof(*da), GFP_ATOMIC);
+       if (da == NULL)
+               return -ENOMEM;
+       memcpy(da->da_addr, addr, alen);
+       da->da_addrlen = alen;
+       da->da_users = 1;
+       da->da_gusers = glbl ? 1 : 0;
+       da->next = *list;
+       *list = da;
+       return 0;
+}
+
+void __dev_addr_discard(struct dev_addr_list **list)
+{
+       struct dev_addr_list *tmp;
+
+       while (*list != NULL) {
+               tmp = *list;
+               *list = tmp->next;
+               if (tmp->da_users > tmp->da_gusers)
+                       printk("__dev_addr_discard: address leakage! "
+                              "da_users=%d\n", tmp->da_users);
+               kfree(tmp);
+       }
+}
+
 unsigned dev_get_flags(const struct net_device *dev)
 {
        unsigned flags;
@@ -2494,7 +2648,7 @@ unsigned dev_get_flags(const struct net_device *dev)
 
 int dev_change_flags(struct net_device *dev, unsigned flags)
 {
-       int ret;
+       int ret, changes;
        int old_flags = dev->flags;
 
        /*
@@ -2549,8 +2703,10 @@ int dev_change_flags(struct net_device *dev, unsigned flags)
                dev_set_allmulti(dev, inc);
        }
 
-       if (old_flags ^ dev->flags)
-               rtmsg_ifinfo(RTM_NEWLINK, dev, old_flags ^ dev->flags);
+       /* Exclude state transition flags, already notified */
+       changes = (old_flags ^ dev->flags) & ~(IFF_UP | IFF_RUNNING);
+       if (changes)
+               rtmsg_ifinfo(RTM_NEWLINK, dev, changes);
 
        return ret;
 }
@@ -2986,6 +3142,7 @@ int register_netdevice(struct net_device *dev)
 
        spin_lock_init(&dev->queue_lock);
        spin_lock_init(&dev->_xmit_lock);
+       netdev_set_lockdep_class(&dev->_xmit_lock, dev->type);
        dev->xmit_lock_owner = -1;
        spin_lock_init(&dev->ingress_lock);
 
@@ -3021,6 +3178,22 @@ int register_netdevice(struct net_device *dev)
                }
        }
 
+       /* Fix illegal checksum combinations */
+       if ((dev->features & NETIF_F_HW_CSUM) &&
+           (dev->features & (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) {
+               printk(KERN_NOTICE "%s: mixed HW and IP checksum settings.\n",
+                      dev->name);
+               dev->features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM);
+       }
+
+       if ((dev->features & NETIF_F_NO_CSUM) &&
+           (dev->features & (NETIF_F_HW_CSUM|NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) {
+               printk(KERN_NOTICE "%s: mixed no checksumming and other settings.\n",
+                      dev->name);
+               dev->features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM|NETIF_F_HW_CSUM);
+       }
+
+
        /* Fix illegal SG+CSUM combinations. */
        if ((dev->features & NETIF_F_SG) &&
            !(dev->features & NETIF_F_ALL_CSUM)) {
@@ -3071,11 +3244,9 @@ int register_netdevice(struct net_device *dev)
 
        set_bit(__LINK_STATE_PRESENT, &dev->state);
 
-       dev->next = NULL;
        dev_init_scheduler(dev);
        write_lock_bh(&dev_base_lock);
-       *dev_tail = dev;
-       dev_tail = &dev->next;
+       list_add_tail(&dev->dev_list, &dev_base_head);
        hlist_add_head(&dev->name_hlist, head);
        hlist_add_head(&dev->index_hlist, dev_index_hash(dev->ifindex));
        dev_hold(dev);
@@ -3232,7 +3403,6 @@ void netdev_run_todo(void)
                        continue;
                }
 
-               netdev_unregister_sysfs(dev);
                dev->reg_state = NETREG_UNREGISTERED;
 
                netdev_wait_allrefs(dev);
@@ -3243,11 +3413,11 @@ void netdev_run_todo(void)
                BUG_TRAP(!dev->ip6_ptr);
                BUG_TRAP(!dev->dn_ptr);
 
-               /* It must be the very last action,
-                * after this 'dev' may point to freed up memory.
-                */
                if (dev->destructor)
                        dev->destructor(dev);
+
+               /* Free network device */
+               kobject_put(&dev->dev.kobj);
        }
 
 out:
@@ -3349,8 +3519,6 @@ void synchronize_net(void)
 
 void unregister_netdevice(struct net_device *dev)
 {
-       struct net_device *d, **dp;
-
        BUG_ON(dev_boot_phase);
        ASSERT_RTNL();
 
@@ -3370,19 +3538,11 @@ void unregister_netdevice(struct net_device *dev)
                dev_close(dev);
 
        /* And unlink it from device chain. */
-       for (dp = &dev_base; (d = *dp) != NULL; dp = &d->next) {
-               if (d == dev) {
-                       write_lock_bh(&dev_base_lock);
-                       hlist_del(&dev->name_hlist);
-                       hlist_del(&dev->index_hlist);
-                       if (dev_tail == &dev->next)
-                               dev_tail = dp;
-                       *dp = d->next;
-                       write_unlock_bh(&dev_base_lock);
-                       break;
-               }
-       }
-       BUG_ON(!d);
+       write_lock_bh(&dev_base_lock);
+       list_del(&dev->dev_list);
+       hlist_del(&dev->name_hlist);
+       hlist_del(&dev->index_hlist);
+       write_unlock_bh(&dev_base_lock);
 
        dev->reg_state = NETREG_UNREGISTERING;
 
@@ -3408,6 +3568,9 @@ void unregister_netdevice(struct net_device *dev)
        /* Notifier chain MUST detach us from master device. */
        BUG_TRAP(!dev->master);
 
+       /* Remove entries from sysfs */
+       netdev_unregister_sysfs(dev);
+
        /* Finish processing unregister after unlock */
        net_set_todo(dev);
 
@@ -3447,7 +3610,7 @@ static int dev_cpu_callback(struct notifier_block *nfb,
        unsigned int cpu, oldcpu = (unsigned long)ocpu;
        struct softnet_data *sd, *oldsd;
 
-       if (action != CPU_DEAD)
+       if (action != CPU_DEAD && action != CPU_DEAD_FROZEN)
                return NOTIFY_OK;
 
        local_irq_disable();