]> bbs.cooldavid.org Git - net-next-2.6.git/blobdiff - net/ipv4/ipmr.c
net: use the macros defined for the members of flowi
[net-next-2.6.git] / net / ipv4 / ipmr.c
index 179fcab866fc5f550d580f3ca06b5abd24f0f58a..3f3a9afd73e02f1d5a4f89eb85d1bf7ba47b4ddc 100644 (file)
@@ -75,7 +75,7 @@ struct mr_table {
        struct net              *net;
 #endif
        u32                     id;
-       struct sock             *mroute_sk;
+       struct sock __rcu       *mroute_sk;
        struct timer_list       ipmr_expire_timer;
        struct list_head        mfc_unres_queue;
        struct list_head        mfc_cache_array[MFC_LINES];
@@ -98,7 +98,7 @@ struct ipmr_result {
 };
 
 /* Big lock, protecting vif table, mrt cache and mroute socket state.
  Note that the changes are semaphored via rtnl_lock.
* Note that the changes are semaphored via rtnl_lock.
  */
 
 static DEFINE_RWLOCK(mrt_lock);
@@ -113,11 +113,11 @@ static DEFINE_RWLOCK(mrt_lock);
 static DEFINE_SPINLOCK(mfc_unres_lock);
 
 /* We return to original Alan's scheme. Hash table of resolved
  entries is changed only in process context and protected
  with weak lock mrt_lock. Queue of unresolved entries is protected
  with strong spinlock mfc_unres_lock.
-
  In this case data path is free of exclusive locks at all.
* entries is changed only in process context and protected
* with weak lock mrt_lock. Queue of unresolved entries is protected
* with strong spinlock mfc_unres_lock.
+ *
* In this case data path is free of exclusive locks at all.
  */
 
 static struct kmem_cache *mrt_cachep __read_mostly;
@@ -396,9 +396,9 @@ struct net_device *ipmr_new_tunnel(struct net *net, struct vifctl *v)
                        set_fs(KERNEL_DS);
                        err = ops->ndo_do_ioctl(dev, &ifr, SIOCADDTUNNEL);
                        set_fs(oldfs);
-               } else
+               } else {
                        err = -EOPNOTSUPP;
-
+               }
                dev = NULL;
 
                if (err == 0 &&
@@ -495,7 +495,8 @@ static struct net_device *ipmr_reg_vif(struct net *net, struct mr_table *mrt)
        dev->iflink = 0;
 
        rcu_read_lock();
-       if ((in_dev = __in_dev_get_rcu(dev)) == NULL) {
+       in_dev = __in_dev_get_rcu(dev);
+       if (!in_dev) {
                rcu_read_unlock();
                goto failure;
        }
@@ -552,9 +553,10 @@ static int vif_delete(struct mr_table *mrt, int vifi, int notify,
                mrt->mroute_reg_vif_num = -1;
 #endif
 
-       if (vifi+1 == mrt->maxvif) {
+       if (vifi + 1 == mrt->maxvif) {
                int tmp;
-               for (tmp=vifi-1; tmp>=0; tmp--) {
+
+               for (tmp = vifi - 1; tmp >= 0; tmp--) {
                        if (VIF_EXISTS(mrt, tmp))
                                break;
                }
@@ -565,25 +567,33 @@ static int vif_delete(struct mr_table *mrt, int vifi, int notify,
 
        dev_set_allmulti(dev, -1);
 
-       if ((in_dev = __in_dev_get_rtnl(dev)) != NULL) {
+       in_dev = __in_dev_get_rtnl(dev);
+       if (in_dev) {
                IPV4_DEVCONF(in_dev->cnf, MC_FORWARDING)--;
                ip_rt_multicast_event(in_dev);
        }
 
-       if (v->flags&(VIFF_TUNNEL|VIFF_REGISTER) && !notify)
+       if (v->flags & (VIFF_TUNNEL | VIFF_REGISTER) && !notify)
                unregister_netdevice_queue(dev, head);
 
        dev_put(dev);
        return 0;
 }
 
-static inline void ipmr_cache_free(struct mfc_cache *c)
+static void ipmr_cache_free_rcu(struct rcu_head *head)
 {
+       struct mfc_cache *c = container_of(head, struct mfc_cache, rcu);
+
        kmem_cache_free(mrt_cachep, c);
 }
 
+static inline void ipmr_cache_free(struct mfc_cache *c)
+{
+       call_rcu(&c->rcu, ipmr_cache_free_rcu);
+}
+
 /* Destroy an unresolved cache entry, killing queued skbs
  and reporting error to netlink readers.
* and reporting error to netlink readers.
  */
 
 static void ipmr_destroy_unres(struct mr_table *mrt, struct mfc_cache *c)
@@ -605,8 +615,9 @@ static void ipmr_destroy_unres(struct mr_table *mrt, struct mfc_cache *c)
                        memset(&e->msg, 0, sizeof(e->msg));
 
                        rtnl_unicast(skb, net, NETLINK_CB(skb).pid);
-               } else
+               } else {
                        kfree_skb(skb);
+               }
        }
 
        ipmr_cache_free(c);
@@ -724,13 +735,13 @@ static int vif_add(struct net *net, struct mr_table *mrt,
        case 0:
                if (vifc->vifc_flags == VIFF_USE_IFINDEX) {
                        dev = dev_get_by_index(net, vifc->vifc_lcl_ifindex);
-                       if (dev && dev->ip_ptr == NULL) {
+                       if (dev && __in_dev_get_rtnl(dev) == NULL) {
                                dev_put(dev);
                                return -EADDRNOTAVAIL;
                        }
-               } else
+               } else {
                        dev = ip_dev_find(net, vifc->vifc_lcl_addr.s_addr);
-
+               }
                if (!dev)
                        return -EADDRNOTAVAIL;
                err = dev_set_allmulti(dev, 1);
@@ -743,16 +754,16 @@ static int vif_add(struct net *net, struct mr_table *mrt,
                return -EINVAL;
        }
 
-       if ((in_dev = __in_dev_get_rtnl(dev)) == NULL) {
+       in_dev = __in_dev_get_rtnl(dev);
+       if (!in_dev) {
                dev_put(dev);
                return -EADDRNOTAVAIL;
        }
        IPV4_DEVCONF(in_dev->cnf, MC_FORWARDING)++;
        ip_rt_multicast_event(in_dev);
 
-       /*
-        *      Fill in the VIF structures
-        */
+       /* Fill in the VIF structures */
+
        v->rate_limit = vifc->vifc_rate_limit;
        v->local = vifc->vifc_lcl_addr.s_addr;
        v->remote = vifc->vifc_rmt_addr.s_addr;
@@ -765,14 +776,14 @@ static int vif_add(struct net *net, struct mr_table *mrt,
        v->pkt_in = 0;
        v->pkt_out = 0;
        v->link = dev->ifindex;
-       if (v->flags&(VIFF_TUNNEL|VIFF_REGISTER))
+       if (v->flags & (VIFF_TUNNEL | VIFF_REGISTER))
                v->link = dev->iflink;
 
        /* And finish update writing critical data */
        write_lock_bh(&mrt_lock);
        v->dev = dev;
 #ifdef CONFIG_IP_PIMSM
-       if (v->flags&VIFF_REGISTER)
+       if (v->flags & VIFF_REGISTER)
                mrt->mroute_reg_vif_num = vifi;
 #endif
        if (vifi+1 > mrt->maxvif)
@@ -781,6 +792,7 @@ static int vif_add(struct net *net, struct mr_table *mrt,
        return 0;
 }
 
+/* called with rcu_read_lock() */
 static struct mfc_cache *ipmr_cache_find(struct mr_table *mrt,
                                         __be32 origin,
                                         __be32 mcastgrp)
@@ -788,7 +800,7 @@ static struct mfc_cache *ipmr_cache_find(struct mr_table *mrt,
        int line = MFC_HASH(mcastgrp, origin);
        struct mfc_cache *c;
 
-       list_for_each_entry(c, &mrt->mfc_cache_array[line], list) {
+       list_for_each_entry_rcu(c, &mrt->mfc_cache_array[line], list) {
                if (c->mfc_origin == origin && c->mfc_mcastgrp == mcastgrp)
                        return c;
        }
@@ -801,19 +813,20 @@ static struct mfc_cache *ipmr_cache_find(struct mr_table *mrt,
 static struct mfc_cache *ipmr_cache_alloc(void)
 {
        struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL);
-       if (c == NULL)
-               return NULL;
-       c->mfc_un.res.minvif = MAXVIFS;
+
+       if (c)
+               c->mfc_un.res.minvif = MAXVIFS;
        return c;
 }
 
 static struct mfc_cache *ipmr_cache_alloc_unres(void)
 {
        struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC);
-       if (c == NULL)
-               return NULL;
-       skb_queue_head_init(&c->mfc_un.unres.unresolved);
-       c->mfc_un.unres.expires = jiffies + 10*HZ;
+
+       if (c) {
+               skb_queue_head_init(&c->mfc_un.unres.unresolved);
+               c->mfc_un.unres.expires = jiffies + 10*HZ;
+       }
        return c;
 }
 
@@ -827,17 +840,15 @@ static void ipmr_cache_resolve(struct net *net, struct mr_table *mrt,
        struct sk_buff *skb;
        struct nlmsgerr *e;
 
-       /*
-        *      Play the pending entries through our router
-        */
+       /* Play the pending entries through our router */
 
        while ((skb = __skb_dequeue(&uc->mfc_un.unres.unresolved))) {
                if (ip_hdr(skb)->version == 0) {
                        struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr));
 
                        if (__ipmr_fill_mroute(mrt, skb, c, NLMSG_DATA(nlh)) > 0) {
-                               nlh->nlmsg_len = (skb_tail_pointer(skb) -
-                                                 (u8 *)nlh);
+                               nlh->nlmsg_len = skb_tail_pointer(skb) -
+                                                (u8 *)nlh;
                        } else {
                                nlh->nlmsg_type = NLMSG_ERROR;
                                nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
@@ -848,8 +859,9 @@ static void ipmr_cache_resolve(struct net *net, struct mr_table *mrt,
                        }
 
                        rtnl_unicast(skb, net, NETLINK_CB(skb).pid);
-               } else
+               } else {
                        ip_mr_forward(net, mrt, skb, c, 0);
+               }
        }
 }
 
@@ -867,6 +879,7 @@ static int ipmr_cache_report(struct mr_table *mrt,
        const int ihl = ip_hdrlen(pkt);
        struct igmphdr *igmp;
        struct igmpmsg *msg;
+       struct sock *mroute_sk;
        int ret;
 
 #ifdef CONFIG_IP_PIMSM
@@ -882,9 +895,9 @@ static int ipmr_cache_report(struct mr_table *mrt,
 #ifdef CONFIG_IP_PIMSM
        if (assert == IGMPMSG_WHOLEPKT) {
                /* Ugly, but we have no choice with this interface.
-                  Duplicate old header, fix ihl, length etc.
-                  And all this only to mangle msg->im_msgtype and
-                  to set msg->im_mbz to "mbz" :-)
+                * Duplicate old header, fix ihl, length etc.
+                * And all this only to mangle msg->im_msgtype and
+                * to set msg->im_mbz to "mbz" :-)
                 */
                skb_push(skb, sizeof(struct iphdr));
                skb_reset_network_header(skb);
@@ -901,39 +914,38 @@ static int ipmr_cache_report(struct mr_table *mrt,
 #endif
        {
 
-       /*
-        *      Copy the IP header
-        */
+       /* Copy the IP header */
 
        skb->network_header = skb->tail;
        skb_put(skb, ihl);
        skb_copy_to_linear_data(skb, pkt->data, ihl);
-       ip_hdr(skb)->protocol = 0;                      /* Flag to the kernel this is a route add */
+       ip_hdr(skb)->protocol = 0;      /* Flag to the kernel this is a route add */
        msg = (struct igmpmsg *)skb_network_header(skb);
        msg->im_vif = vifi;
        skb_dst_set(skb, dst_clone(skb_dst(pkt)));
 
-       /*
-        *      Add our header
-        */
+       /* Add our header */
 
-       igmp=(struct igmphdr *)skb_put(skb, sizeof(struct igmphdr));
+       igmp = (struct igmphdr *)skb_put(skb, sizeof(struct igmphdr));
        igmp->type      =
        msg->im_msgtype = assert;
-       igmp->code      =       0;
-       ip_hdr(skb)->tot_len = htons(skb->len);                 /* Fix the length */
+       igmp->code      = 0;
+       ip_hdr(skb)->tot_len = htons(skb->len);         /* Fix the length */
        skb->transport_header = skb->network_header;
        }
 
-       if (mrt->mroute_sk == NULL) {
+       rcu_read_lock();
+       mroute_sk = rcu_dereference(mrt->mroute_sk);
+       if (mroute_sk == NULL) {
+               rcu_read_unlock();
                kfree_skb(skb);
                return -EINVAL;
        }
 
-       /*
-        *      Deliver to mrouted
-        */
-       ret = sock_queue_rcv_skb(mrt->mroute_sk, skb);
+       /* Deliver to mrouted */
+
+       ret = sock_queue_rcv_skb(mroute_sk, skb);
+       rcu_read_unlock();
        if (ret < 0) {
                if (net_ratelimit())
                        printk(KERN_WARNING "mroute: pending queue full, dropping entries.\n");
@@ -965,9 +977,7 @@ ipmr_cache_unresolved(struct mr_table *mrt, vifi_t vifi, struct sk_buff *skb)
        }
 
        if (!found) {
-               /*
-                *      Create a new entry if allowable
-                */
+               /* Create a new entry if allowable */
 
                if (atomic_read(&mrt->cache_resolve_queue_len) >= 10 ||
                    (c = ipmr_cache_alloc_unres()) == NULL) {
@@ -977,16 +987,14 @@ ipmr_cache_unresolved(struct mr_table *mrt, vifi_t vifi, struct sk_buff *skb)
                        return -ENOBUFS;
                }
 
-               /*
-                *      Fill in the new cache entry
-                */
+               /* Fill in the new cache entry */
+
                c->mfc_parent   = -1;
                c->mfc_origin   = iph->saddr;
                c->mfc_mcastgrp = iph->daddr;
 
-               /*
-                *      Reflect first query at mrouted.
-                */
+               /* Reflect first query at mrouted. */
+
                err = ipmr_cache_report(mrt, skb, vifi, IGMPMSG_NOCACHE);
                if (err < 0) {
                        /* If the report failed throw the cache entry
@@ -1006,10 +1014,9 @@ ipmr_cache_unresolved(struct mr_table *mrt, vifi_t vifi, struct sk_buff *skb)
                        mod_timer(&mrt->ipmr_expire_timer, c->mfc_un.unres.expires);
        }
 
-       /*
-        *      See if we can append the packet
-        */
-       if (c->mfc_un.unres.unresolved.qlen>3) {
+       /* See if we can append the packet */
+
+       if (c->mfc_un.unres.unresolved.qlen > 3) {
                kfree_skb(skb);
                err = -ENOBUFS;
        } else {
@@ -1035,9 +1042,7 @@ static int ipmr_mfc_delete(struct mr_table *mrt, struct mfcctl *mfc)
        list_for_each_entry_safe(c, next, &mrt->mfc_cache_array[line], list) {
                if (c->mfc_origin == mfc->mfcc_origin.s_addr &&
                    c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr) {
-                       write_lock_bh(&mrt_lock);
-                       list_del(&c->list);
-                       write_unlock_bh(&mrt_lock);
+                       list_del_rcu(&c->list);
 
                        ipmr_cache_free(c);
                        return 0;
@@ -1090,9 +1095,7 @@ static int ipmr_mfc_add(struct net *net, struct mr_table *mrt,
        if (!mrtsock)
                c->mfc_flags |= MFC_STATIC;
 
-       write_lock_bh(&mrt_lock);
-       list_add(&c->list, &mrt->mfc_cache_array[line]);
-       write_unlock_bh(&mrt_lock);
+       list_add_rcu(&c->list, &mrt->mfc_cache_array[line]);
 
        /*
         *      Check to see if we resolved a queued list. If so we
@@ -1130,26 +1133,21 @@ static void mroute_clean_tables(struct mr_table *mrt)
        LIST_HEAD(list);
        struct mfc_cache *c, *next;
 
-       /*
-        *      Shut down all active vif entries
-        */
+       /* Shut down all active vif entries */
+
        for (i = 0; i < mrt->maxvif; i++) {
-               if (!(mrt->vif_table[i].flags&VIFF_STATIC))
+               if (!(mrt->vif_table[i].flags & VIFF_STATIC))
                        vif_delete(mrt, i, 0, &list);
        }
        unregister_netdevice_many(&list);
 
-       /*
-        *      Wipe the cache
-        */
+       /* Wipe the cache */
+
        for (i = 0; i < MFC_LINES; i++) {
                list_for_each_entry_safe(c, next, &mrt->mfc_cache_array[i], list) {
-                       if (c->mfc_flags&MFC_STATIC)
+                       if (c->mfc_flags & MFC_STATIC)
                                continue;
-                       write_lock_bh(&mrt_lock);
-                       list_del(&c->list);
-                       write_unlock_bh(&mrt_lock);
-
+                       list_del_rcu(&c->list);
                        ipmr_cache_free(c);
                }
        }
@@ -1164,6 +1162,9 @@ static void mroute_clean_tables(struct mr_table *mrt)
        }
 }
 
+/* called from ip_ra_control(), before an RCU grace period,
+ * we dont need to call synchronize_rcu() here
+ */
 static void mrtsock_destruct(struct sock *sk)
 {
        struct net *net = sock_net(sk);
@@ -1171,13 +1172,9 @@ static void mrtsock_destruct(struct sock *sk)
 
        rtnl_lock();
        ipmr_for_each_table(mrt, net) {
-               if (sk == mrt->mroute_sk) {
+               if (sk == rtnl_dereference(mrt->mroute_sk)) {
                        IPV4_DEVCONF_ALL(net, MC_FORWARDING)--;
-
-                       write_lock_bh(&mrt_lock);
-                       mrt->mroute_sk = NULL;
-                       write_unlock_bh(&mrt_lock);
-
+                       rcu_assign_pointer(mrt->mroute_sk, NULL);
                        mroute_clean_tables(mrt);
                }
        }
@@ -1204,7 +1201,8 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsi
                return -ENOENT;
 
        if (optname != MRT_INIT) {
-               if (sk != mrt->mroute_sk && !capable(CAP_NET_ADMIN))
+               if (sk != rcu_dereference_raw(mrt->mroute_sk) &&
+                   !capable(CAP_NET_ADMIN))
                        return -EACCES;
        }
 
@@ -1217,23 +1215,20 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsi
                        return -ENOPROTOOPT;
 
                rtnl_lock();
-               if (mrt->mroute_sk) {
+               if (rtnl_dereference(mrt->mroute_sk)) {
                        rtnl_unlock();
                        return -EADDRINUSE;
                }
 
                ret = ip_ra_control(sk, 1, mrtsock_destruct);
                if (ret == 0) {
-                       write_lock_bh(&mrt_lock);
-                       mrt->mroute_sk = sk;
-                       write_unlock_bh(&mrt_lock);
-
+                       rcu_assign_pointer(mrt->mroute_sk, sk);
                        IPV4_DEVCONF_ALL(net, MC_FORWARDING)++;
                }
                rtnl_unlock();
                return ret;
        case MRT_DONE:
-               if (sk != mrt->mroute_sk)
+               if (sk != rcu_dereference_raw(mrt->mroute_sk))
                        return -EACCES;
                return ip_ra_control(sk, 0, NULL);
        case MRT_ADD_VIF:
@@ -1246,7 +1241,8 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsi
                        return -ENFILE;
                rtnl_lock();
                if (optname == MRT_ADD_VIF) {
-                       ret = vif_add(net, mrt, &vif, sk == mrt->mroute_sk);
+                       ret = vif_add(net, mrt, &vif,
+                                     sk == rtnl_dereference(mrt->mroute_sk));
                } else {
                        ret = vif_delete(mrt, vif.vifc_vifi, 0, NULL);
                }
@@ -1267,7 +1263,8 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsi
                if (optname == MRT_DEL_MFC)
                        ret = ipmr_mfc_delete(mrt, &mfc);
                else
-                       ret = ipmr_mfc_add(net, mrt, &mfc, sk == mrt->mroute_sk);
+                       ret = ipmr_mfc_add(net, mrt, &mfc,
+                                          sk == rtnl_dereference(mrt->mroute_sk));
                rtnl_unlock();
                return ret;
                /*
@@ -1276,7 +1273,7 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsi
        case MRT_ASSERT:
        {
                int v;
-               if (get_user(v,(int __user *)optval))
+               if (get_user(v, (int __user *)optval))
                        return -EFAULT;
                mrt->mroute_do_assert = (v) ? 1 : 0;
                return 0;
@@ -1286,7 +1283,7 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsi
        {
                int v;
 
-               if (get_user(v,(int __user *)optval))
+               if (get_user(v, (int __user *)optval))
                        return -EFAULT;
                v = (v) ? 1 : 0;
 
@@ -1309,14 +1306,16 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsi
                        return -EINVAL;
                if (get_user(v, (u32 __user *)optval))
                        return -EFAULT;
-               if (sk == mrt->mroute_sk)
-                       return -EBUSY;
 
                rtnl_lock();
                ret = 0;
-               if (!ipmr_new_table(net, v))
-                       ret = -ENOMEM;
-               raw_sk(sk)->ipmr_table = v;
+               if (sk == rtnl_dereference(mrt->mroute_sk)) {
+                       ret = -EBUSY;
+               } else {
+                       if (!ipmr_new_table(net, v))
+                               ret = -ENOMEM;
+                       raw_sk(sk)->ipmr_table = v;
+               }
                rtnl_unlock();
                return ret;
        }
@@ -1347,9 +1346,9 @@ int ip_mroute_getsockopt(struct sock *sk, int optname, char __user *optval, int
 
        if (optname != MRT_VERSION &&
 #ifdef CONFIG_IP_PIMSM
-          optname!=MRT_PIM &&
+          optname != MRT_PIM &&
 #endif
-          optname!=MRT_ASSERT)
+          optname != MRT_ASSERT)
                return -ENOPROTOOPT;
 
        if (get_user(olr, optlen))
@@ -1416,19 +1415,19 @@ int ipmr_ioctl(struct sock *sk, int cmd, void __user *arg)
                if (copy_from_user(&sr, arg, sizeof(sr)))
                        return -EFAULT;
 
-               read_lock(&mrt_lock);
+               rcu_read_lock();
                c = ipmr_cache_find(mrt, sr.src.s_addr, sr.grp.s_addr);
                if (c) {
                        sr.pktcnt = c->mfc_un.res.pkt;
                        sr.bytecnt = c->mfc_un.res.bytes;
                        sr.wrong_if = c->mfc_un.res.wrong_if;
-                       read_unlock(&mrt_lock);
+                       rcu_read_unlock();
 
                        if (copy_to_user(arg, &sr, sizeof(sr)))
                                return -EFAULT;
                        return 0;
                }
-               read_unlock(&mrt_lock);
+               rcu_read_unlock();
                return -EADDRNOTAVAIL;
        default:
                return -ENOIOCTLCMD;
@@ -1465,7 +1464,7 @@ static struct notifier_block ip_mr_notifier = {
 };
 
 /*
- *     Encapsulate a packet by attaching a valid IPIP header to it.
+ *     Encapsulate a packet by attaching a valid IPIP header to it.
  *     This avoids tunnel drivers and other mess and gives us the speed so
  *     important for multicast video.
  */
@@ -1480,7 +1479,7 @@ static void ip_encap(struct sk_buff *skb, __be32 saddr, __be32 daddr)
        skb_reset_network_header(skb);
        iph = ip_hdr(skb);
 
-       iph->version    =       4;
+       iph->version    =       4;
        iph->tos        =       old_iph->tos;
        iph->ttl        =       old_iph->ttl;
        iph->frag_off   =       0;
@@ -1498,7 +1497,7 @@ static void ip_encap(struct sk_buff *skb, __be32 saddr, __be32 daddr)
 
 static inline int ipmr_forward_finish(struct sk_buff *skb)
 {
-       struct ip_options * opt = &(IPCB(skb)->opt);
+       struct ip_options *opt = &(IPCB(skb)->opt);
 
        IP_INC_STATS_BH(dev_net(skb_dst(skb)->dev), IPSTATS_MIB_OUTFORWDATAGRAMS);
 
@@ -1535,22 +1534,26 @@ static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt,
        }
 #endif
 
-       if (vif->flags&VIFF_TUNNEL) {
-               struct flowi fl = { .oif = vif->link,
-                                   .nl_u = { .ip4_u =
-                                             { .daddr = vif->remote,
-                                               .saddr = vif->local,
-                                               .tos = RT_TOS(iph->tos) } },
-                                   .proto = IPPROTO_IPIP };
+       if (vif->flags & VIFF_TUNNEL) {
+               struct flowi fl = {
+                       .oif = vif->link,
+                       .fl4_dst = vif->remote,
+                       .fl4_src = vif->local,
+                       .fl4_tos = RT_TOS(iph->tos),
+                       .proto = IPPROTO_IPIP
+               };
+
                if (ip_route_output_key(net, &rt, &fl))
                        goto out_free;
                encap = sizeof(struct iphdr);
        } else {
-               struct flowi fl = { .oif = vif->link,
-                                   .nl_u = { .ip4_u =
-                                             { .daddr = iph->daddr,
-                                               .tos = RT_TOS(iph->tos) } },
-                                   .proto = IPPROTO_IPIP };
+               struct flowi fl = {
+                       .oif = vif->link,
+                       .fl4_dst = iph->daddr,
+                       .fl4_tos = RT_TOS(iph->tos),
+                       .proto = IPPROTO_IPIP
+               };
+
                if (ip_route_output_key(net, &rt, &fl))
                        goto out_free;
        }
@@ -1559,8 +1562,8 @@ static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt,
 
        if (skb->len+encap > dst_mtu(&rt->dst) && (ntohs(iph->frag_off) & IP_DF)) {
                /* Do not fragment multicasts. Alas, IPv4 does not
-                  allow to send ICMP, so that packets will disappear
-                  to blackhole.
+                * allow to send ICMP, so that packets will disappear
+                * to blackhole.
                 */
 
                IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_FRAGFAILS);
@@ -1583,7 +1586,8 @@ static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt,
        ip_decrease_ttl(ip_hdr(skb));
 
        /* FIXME: forward and output firewalls used to be called here.
-        * What do we do with netfilter? -- RR */
+        * What do we do with netfilter? -- RR
+        */
        if (vif->flags & VIFF_TUNNEL) {
                ip_encap(skb, vif->local, vif->remote);
                /* FIXME: extra output firewall step used to be here. --RR */
@@ -1642,17 +1646,17 @@ static int ip_mr_forward(struct net *net, struct mr_table *mrt,
        if (mrt->vif_table[vif].dev != skb->dev) {
                int true_vifi;
 
-               if (skb_rtable(skb)->fl.iif == 0) {
+               if (rt_is_output_route(skb_rtable(skb))) {
                        /* It is our own packet, looped back.
-                          Very complicated situation...
-
-                          The best workaround until routing daemons will be
-                          fixed is not to redistribute packet, if it was
-                          send through wrong interface. It means, that
-                          multicast applications WILL NOT work for
-                          (S,G), which have default multicast route pointing
-                          to wrong oif. In any case, it is not a good
-                          idea to use multicasting applications on router.
+                        * Very complicated situation...
+                        *
+                        * The best workaround until routing daemons will be
+                        * fixed is not to redistribute packet, if it was
+                        * send through wrong interface. It means, that
+                        * multicast applications WILL NOT work for
+                        * (S,G), which have default multicast route pointing
+                        * to wrong oif. In any case, it is not a good
+                        * idea to use multicasting applications on router.
                         */
                        goto dont_forward;
                }
@@ -1662,9 +1666,9 @@ static int ip_mr_forward(struct net *net, struct mr_table *mrt,
 
                if (true_vifi >= 0 && mrt->mroute_do_assert &&
                    /* pimsm uses asserts, when switching from RPT to SPT,
-                      so that we cannot check that packet arrived on an oif.
-                      It is bad, but otherwise we would need to move pretty
-                      large chunk of pimd to kernel. Ough... --ANK
+                    * so that we cannot check that packet arrived on an oif.
+                    * It is bad, but otherwise we would need to move pretty
+                    * large chunk of pimd to kernel. Ough... --ANK
                     */
                    (mrt->mroute_do_pim ||
                     cache->mfc_un.res.ttls[true_vifi] < 255) &&
@@ -1682,10 +1686,12 @@ static int ip_mr_forward(struct net *net, struct mr_table *mrt,
        /*
         *      Forward the frame
         */
-       for (ct = cache->mfc_un.res.maxvif-1; ct >= cache->mfc_un.res.minvif; ct--) {
+       for (ct = cache->mfc_un.res.maxvif - 1;
+            ct >= cache->mfc_un.res.minvif; ct--) {
                if (ip_hdr(skb)->ttl > cache->mfc_un.res.ttls[ct]) {
                        if (psend != -1) {
                                struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
+
                                if (skb2)
                                        ipmr_queue_xmit(net, mrt, skb2, cache,
                                                        psend);
@@ -1696,6 +1702,7 @@ static int ip_mr_forward(struct net *net, struct mr_table *mrt,
        if (psend != -1) {
                if (local) {
                        struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
+
                        if (skb2)
                                ipmr_queue_xmit(net, mrt, skb2, cache, psend);
                } else {
@@ -1713,6 +1720,7 @@ dont_forward:
 
 /*
  *     Multicast packets for forwarding arrive here
+ *     Called with rcu_read_lock();
  */
 
 int ip_mr_input(struct sk_buff *skb)
@@ -1724,9 +1732,9 @@ int ip_mr_input(struct sk_buff *skb)
        int err;
 
        /* Packet is looped back after forward, it should not be
-          forwarded second time, but still can be delivered locally.
+        * forwarded second time, but still can be delivered locally.
         */
-       if (IPCB(skb)->flags&IPSKB_FORWARDED)
+       if (IPCB(skb)->flags & IPSKB_FORWARDED)
                goto dont_forward;
 
        err = ipmr_fib_lookup(net, &skb_rtable(skb)->fl, &mrt);
@@ -1736,28 +1744,28 @@ int ip_mr_input(struct sk_buff *skb)
        }
 
        if (!local) {
-                   if (IPCB(skb)->opt.router_alert) {
-                           if (ip_call_ra_chain(skb))
-                                   return 0;
-                   } else if (ip_hdr(skb)->protocol == IPPROTO_IGMP){
-                           /* IGMPv1 (and broken IGMPv2 implementations sort of
-                              Cisco IOS <= 11.2(8)) do not put router alert
-                              option to IGMP packets destined to routable
-                              groups. It is very bad, because it means
-                              that we can forward NO IGMP messages.
-                            */
-                           read_lock(&mrt_lock);
-                           if (mrt->mroute_sk) {
-                                   nf_reset(skb);
-                                   raw_rcv(mrt->mroute_sk, skb);
-                                   read_unlock(&mrt_lock);
-                                   return 0;
-                           }
-                           read_unlock(&mrt_lock);
+               if (IPCB(skb)->opt.router_alert) {
+                       if (ip_call_ra_chain(skb))
+                               return 0;
+               } else if (ip_hdr(skb)->protocol == IPPROTO_IGMP) {
+                       /* IGMPv1 (and broken IGMPv2 implementations sort of
+                        * Cisco IOS <= 11.2(8)) do not put router alert
+                        * option to IGMP packets destined to routable
+                        * groups. It is very bad, because it means
+                        * that we can forward NO IGMP messages.
+                        */
+                       struct sock *mroute_sk;
+
+                       mroute_sk = rcu_dereference(mrt->mroute_sk);
+                       if (mroute_sk) {
+                               nf_reset(skb);
+                               raw_rcv(mroute_sk, skb);
+                               return 0;
+                       }
                    }
        }
 
-       read_lock(&mrt_lock);
+       /* already under rcu_read_lock() */
        cache = ipmr_cache_find(mrt, ip_hdr(skb)->saddr, ip_hdr(skb)->daddr);
 
        /*
@@ -1769,13 +1777,12 @@ int ip_mr_input(struct sk_buff *skb)
                if (local) {
                        struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
                        ip_local_deliver(skb);
-                       if (skb2 == NULL) {
-                               read_unlock(&mrt_lock);
+                       if (skb2 == NULL)
                                return -ENOBUFS;
-                       }
                        skb = skb2;
                }
 
+               read_lock(&mrt_lock);
                vif = ipmr_find_vif(mrt, skb->dev);
                if (vif >= 0) {
                        int err2 = ipmr_cache_unresolved(mrt, vif, skb);
@@ -1788,8 +1795,8 @@ int ip_mr_input(struct sk_buff *skb)
                return -ENODEV;
        }
 
+       read_lock(&mrt_lock);
        ip_mr_forward(net, mrt, skb, cache, local);
-
        read_unlock(&mrt_lock);
 
        if (local)
@@ -1805,6 +1812,7 @@ dont_forward:
 }
 
 #ifdef CONFIG_IP_PIMSM
+/* called with rcu_read_lock() */
 static int __pim_rcv(struct mr_table *mrt, struct sk_buff *skb,
                     unsigned int pimlen)
 {
@@ -1813,10 +1821,10 @@ static int __pim_rcv(struct mr_table *mrt, struct sk_buff *skb,
 
        encap = (struct iphdr *)(skb_transport_header(skb) + pimlen);
        /*
-          Check that:
-          a. packet is really destinted to a multicast group
-          b. packet is not a NULL-REGISTER
-          c. packet is not truncated
+        * Check that:
+        * a. packet is really sent to a multicast group
+        * b. packet is not a NULL-REGISTER
+        * c. packet is not truncated
         */
        if (!ipv4_is_multicast(encap->daddr) ||
            encap->tot_len == 0 ||
@@ -1826,26 +1834,23 @@ static int __pim_rcv(struct mr_table *mrt, struct sk_buff *skb,
        read_lock(&mrt_lock);
        if (mrt->mroute_reg_vif_num >= 0)
                reg_dev = mrt->vif_table[mrt->mroute_reg_vif_num].dev;
-       if (reg_dev)
-               dev_hold(reg_dev);
        read_unlock(&mrt_lock);
 
        if (reg_dev == NULL)
                return 1;
 
        skb->mac_header = skb->network_header;
-       skb_pull(skb, (u8*)encap - skb->data);
+       skb_pull(skb, (u8 *)encap - skb->data);
        skb_reset_network_header(skb);
        skb->protocol = htons(ETH_P_IP);
-       skb->ip_summed = 0;
+       skb->ip_summed = CHECKSUM_NONE;
        skb->pkt_type = PACKET_HOST;
 
        skb_tunnel_rx(skb, reg_dev);
 
        netif_rx(skb);
-       dev_put(reg_dev);
 
-       return 0;
+       return NET_RX_SUCCESS;
 }
 #endif
 
@@ -1854,7 +1859,7 @@ static int __pim_rcv(struct mr_table *mrt, struct sk_buff *skb,
  * Handle IGMP messages of PIMv1
  */
 
-int pim_rcv_v1(struct sk_buff * skb)
+int pim_rcv_v1(struct sk_buff *skb)
 {
        struct igmphdr *pim;
        struct net *net = dev_net(skb->dev);
@@ -1881,7 +1886,7 @@ drop:
 #endif
 
 #ifdef CONFIG_IP_PIMSM_V2
-static int pim_rcv(struct sk_buff * skb)
+static int pim_rcv(struct sk_buff *skb)
 {
        struct pimreghdr *pim;
        struct net *net = dev_net(skb->dev);
@@ -1891,8 +1896,8 @@ static int pim_rcv(struct sk_buff * skb)
                goto drop;
 
        pim = (struct pimreghdr *)skb_transport_header(skb);
-       if (pim->type != ((PIM_VERSION<<4)|(PIM_REGISTER)) ||
-           (pim->flags&PIM_NULL_REGISTER) ||
+       if (pim->type != ((PIM_VERSION << 4) | (PIM_REGISTER)) ||
+           (pim->flags & PIM_NULL_REGISTER) ||
            (ip_compute_csum((void *)pim, sizeof(*pim)) != 0 &&
             csum_fold(skb_checksum(skb, 0, skb->len, 0))))
                goto drop;
@@ -1958,28 +1963,33 @@ int ipmr_get_route(struct net *net,
        if (mrt == NULL)
                return -ENOENT;
 
-       read_lock(&mrt_lock);
+       rcu_read_lock();
        cache = ipmr_cache_find(mrt, rt->rt_src, rt->rt_dst);
 
        if (cache == NULL) {
                struct sk_buff *skb2;
                struct iphdr *iph;
                struct net_device *dev;
-               int vif;
+               int vif = -1;
 
                if (nowait) {
-                       read_unlock(&mrt_lock);
+                       rcu_read_unlock();
                        return -EAGAIN;
                }
 
                dev = skb->dev;
-               if (dev == NULL || (vif = ipmr_find_vif(mrt, dev)) < 0) {
+               read_lock(&mrt_lock);
+               if (dev)
+                       vif = ipmr_find_vif(mrt, dev);
+               if (vif < 0) {
                        read_unlock(&mrt_lock);
+                       rcu_read_unlock();
                        return -ENODEV;
                }
                skb2 = skb_clone(skb, GFP_ATOMIC);
                if (!skb2) {
                        read_unlock(&mrt_lock);
+                       rcu_read_unlock();
                        return -ENOMEM;
                }
 
@@ -1992,13 +2002,16 @@ int ipmr_get_route(struct net *net,
                iph->version = 0;
                err = ipmr_cache_unresolved(mrt, vif, skb2);
                read_unlock(&mrt_lock);
+               rcu_read_unlock();
                return err;
        }
 
-       if (!nowait && (rtm->rtm_flags&RTM_F_NOTIFY))
+       read_lock(&mrt_lock);
+       if (!nowait && (rtm->rtm_flags & RTM_F_NOTIFY))
                cache->mfc_flags |= MFC_NOTIFY;
        err = __ipmr_fill_mroute(mrt, skb, cache, rtm);
        read_unlock(&mrt_lock);
+       rcu_read_unlock();
        return err;
 }
 
@@ -2050,14 +2063,14 @@ static int ipmr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb)
        s_h = cb->args[1];
        s_e = cb->args[2];
 
-       read_lock(&mrt_lock);
+       rcu_read_lock();
        ipmr_for_each_table(mrt, net) {
                if (t < s_t)
                        goto next_table;
                if (t > s_t)
                        s_h = 0;
                for (h = s_h; h < MFC_LINES; h++) {
-                       list_for_each_entry(mfc, &mrt->mfc_cache_array[h], list) {
+                       list_for_each_entry_rcu(mfc, &mrt->mfc_cache_array[h], list) {
                                if (e < s_e)
                                        goto next_entry;
                                if (ipmr_fill_mroute(mrt, skb,
@@ -2075,7 +2088,7 @@ next_table:
                t++;
        }
 done:
-       read_unlock(&mrt_lock);
+       rcu_read_unlock();
 
        cb->args[2] = e;
        cb->args[1] = h;
@@ -2086,7 +2099,8 @@ done:
 
 #ifdef CONFIG_PROC_FS
 /*
- *     The /proc interfaces to multicast routing /proc/ip_mr_cache /proc/ip_mr_vif
+ *     The /proc interfaces to multicast routing :
+ *     /proc/net/ip_mr_cache & /proc/net/ip_mr_vif
  */
 struct ipmr_vif_iter {
        struct seq_net_private p;
@@ -2208,14 +2222,14 @@ static struct mfc_cache *ipmr_mfc_seq_idx(struct net *net,
        struct mr_table *mrt = it->mrt;
        struct mfc_cache *mfc;
 
-       read_lock(&mrt_lock);
+       rcu_read_lock();
        for (it->ct = 0; it->ct < MFC_LINES; it->ct++) {
                it->cache = &mrt->mfc_cache_array[it->ct];
-               list_for_each_entry(mfc, it->cache, list)
+               list_for_each_entry_rcu(mfc, it->cache, list)
                        if (pos-- == 0)
                                return mfc;
        }
-       read_unlock(&mrt_lock);
+       rcu_read_unlock();
 
        spin_lock_bh(&mfc_unres_lock);
        it->cache = &mrt->mfc_unres_queue;
@@ -2274,7 +2288,7 @@ static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
        }
 
        /* exhausted cache_array, show unresolved */
-       read_unlock(&mrt_lock);
+       rcu_read_unlock();
        it->cache = &mrt->mfc_unres_queue;
        it->ct = 0;
 
@@ -2282,7 +2296,7 @@ static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
        if (!list_empty(it->cache))
                return list_first_entry(it->cache, struct mfc_cache, list);
 
- end_of_list:
+end_of_list:
        spin_unlock_bh(&mfc_unres_lock);
        it->cache = NULL;
 
@@ -2297,7 +2311,7 @@ static void ipmr_mfc_seq_stop(struct seq_file *seq, void *v)
        if (it->cache == &mrt->mfc_unres_queue)
                spin_unlock_bh(&mfc_unres_lock);
        else if (it->cache == &mrt->mfc_cache_array[it->ct])
-               read_unlock(&mrt_lock);
+               rcu_read_unlock();
 }
 
 static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
@@ -2323,7 +2337,7 @@ static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
                                   mfc->mfc_un.res.bytes,
                                   mfc->mfc_un.res.wrong_if);
                        for (n = mfc->mfc_un.res.minvif;
-                            n < mfc->mfc_un.res.maxvif; n++ ) {
+                            n < mfc->mfc_un.res.maxvif; n++) {
                                if (VIF_EXISTS(mrt, n) &&
                                    mfc->mfc_un.res.ttls[n] < 255)
                                        seq_printf(seq,
@@ -2421,7 +2435,7 @@ int __init ip_mr_init(void)
 
        mrt_cachep = kmem_cache_create("ip_mrt_cache",
                                       sizeof(struct mfc_cache),
-                                      0, SLAB_HWCACHE_ALIGN|SLAB_PANIC,
+                                      0, SLAB_HWCACHE_ALIGN | SLAB_PANIC,
                                       NULL);
        if (!mrt_cachep)
                return -ENOMEM;