]> bbs.cooldavid.org Git - net-next-2.6.git/blobdiff - net/ipv4/route.c
ipv4: Remove leftover rcu_read_unlock calls from __mkroute_output()
[net-next-2.6.git] / net / ipv4 / route.c
index 3f56b6e6c6aab583d65902e7190bbf1a6eaa60b6..3888f6ba0a5c559a9ce6437a059c91831f08ba2b 100644 (file)
@@ -1107,6 +1107,7 @@ restart:
                 * on the route gc list.
                 */
 
+               rt->dst.flags |= DST_NOCACHE;
                if (rt->rt_type == RTN_UNICAST || rt->fl.iif == 0) {
                        int err = arp_bind_neighbour(&rt->dst);
                        if (err) {
@@ -1231,7 +1232,7 @@ restart:
                        }
 
                        if (net_ratelimit())
-                               printk(KERN_WARNING "Neighbour table overflow.\n");
+                               printk(KERN_WARNING "ipv4: Neighbour table overflow.\n");
                        rt_drop(rt);
                        return -ENOBUFS;
                }
@@ -1268,18 +1269,11 @@ skip_hashing:
 
 void rt_bind_peer(struct rtable *rt, int create)
 {
-       static DEFINE_SPINLOCK(rt_peer_lock);
        struct inet_peer *peer;
 
        peer = inet_getpeer(rt->rt_dst, create);
 
-       spin_lock_bh(&rt_peer_lock);
-       if (rt->peer == NULL) {
-               rt->peer = peer;
-               peer = NULL;
-       }
-       spin_unlock_bh(&rt_peer_lock);
-       if (peer)
+       if (peer && cmpxchg(&rt->peer, NULL, peer) != NULL)
                inet_putpeer(peer);
 }
 
@@ -1779,12 +1773,15 @@ void ip_rt_get_source(u8 *addr, struct rtable *rt)
 
        if (rt->fl.iif == 0)
                src = rt->rt_src;
-       else if (fib_lookup(dev_net(rt->dst.dev), &rt->fl, &res) == 0) {
-               src = FIB_RES_PREFSRC(res);
-               fib_res_put(&res);
-       } else
-               src = inet_select_addr(rt->dst.dev, rt->rt_gateway,
+       else {
+               rcu_read_lock();
+               if (fib_lookup(dev_net(rt->dst.dev), &rt->fl, &res) == 0)
+                       src = FIB_RES_PREFSRC(res);
+               else
+                       src = inet_select_addr(rt->dst.dev, rt->rt_gateway,
                                        RT_SCOPE_UNIVERSE);
+               rcu_read_unlock();
+       }
        memcpy(addr, &src, 4);
 }
 
@@ -2087,6 +2084,7 @@ static int ip_mkroute_input(struct sk_buff *skb,
  *     Such approach solves two big problems:
  *     1. Not simplex devices are handled properly.
  *     2. IP spoofing attempts are filtered with 100% of guarantee.
+ *     called with rcu_read_lock()
  */
 
 static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
@@ -2108,7 +2106,6 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
        unsigned        hash;
        __be32          spec_dst;
        int             err = -EINVAL;
-       int             free_res = 0;
        struct net    * net = dev_net(dev);
 
        /* IP on this device is disabled. */
@@ -2140,12 +2137,12 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
        /*
         *      Now we are ready to route packet.
         */
-       if ((err = fib_lookup(net, &fl, &res)) != 0) {
+       err = fib_lookup(net, &fl, &res);
+       if (err != 0) {
                if (!IN_DEV_FORWARD(in_dev))
                        goto e_hostunreach;
                goto no_route;
        }
-       free_res = 1;
 
        RT_CACHE_STAT_INC(in_slow_tot);
 
@@ -2154,8 +2151,8 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
 
        if (res.type == RTN_LOCAL) {
                err = fib_validate_source(saddr, daddr, tos,
-                                            net->loopback_dev->ifindex,
-                                            dev, &spec_dst, &itag, skb->mark);
+                                         net->loopback_dev->ifindex,
+                                         dev, &spec_dst, &itag, skb->mark);
                if (err < 0)
                        goto martian_source_keep_err;
                if (err)
@@ -2170,9 +2167,6 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
                goto martian_destination;
 
        err = ip_mkroute_input(skb, &res, &fl, in_dev, daddr, saddr, tos);
-done:
-       if (free_res)
-               fib_res_put(&res);
 out:   return err;
 
 brd_input:
@@ -2232,7 +2226,7 @@ local_input:
        rth->rt_type    = res.type;
        hash = rt_hash(daddr, saddr, fl.iif, rt_genid(net));
        err = rt_intern_hash(hash, rth, NULL, skb, fl.iif);
-       goto done;
+       goto out;
 
 no_route:
        RT_CACHE_STAT_INC(in_no_route);
@@ -2255,21 +2249,21 @@ martian_destination:
 
 e_hostunreach:
        err = -EHOSTUNREACH;
-       goto done;
+       goto out;
 
 e_inval:
        err = -EINVAL;
-       goto done;
+       goto out;
 
 e_nobufs:
        err = -ENOBUFS;
-       goto done;
+       goto out;
 
 martian_source:
        err = -EINVAL;
 martian_source_keep_err:
        ip_handle_martian_source(dev, in_dev, skb, daddr, saddr);
-       goto done;
+       goto out;
 }
 
 int ip_route_input_common(struct sk_buff *skb, __be32 daddr, __be32 saddr,
@@ -2355,6 +2349,7 @@ skip_cache:
 }
 EXPORT_SYMBOL(ip_route_input_common);
 
+/* called with rcu_read_lock() */
 static int __mkroute_output(struct rtable **result,
                            struct fib_result *res,
                            const struct flowi *fl,
@@ -2365,9 +2360,8 @@ static int __mkroute_output(struct rtable **result,
        struct rtable *rth;
        struct in_device *in_dev;
        u32 tos = RT_FL_TOS(oldflp);
-       int err = 0;
 
-       if (ipv4_is_loopback(fl->fl4_src) && !(dev_out->flags&IFF_LOOPBACK))
+       if (ipv4_is_loopback(fl->fl4_src) && !(dev_out->flags & IFF_LOOPBACK))
                return -EINVAL;
 
        if (fl->fl4_dst == htonl(0xFFFFFFFF))
@@ -2380,38 +2374,33 @@ static int __mkroute_output(struct rtable **result,
        if (dev_out->flags & IFF_LOOPBACK)
                flags |= RTCF_LOCAL;
 
-       /* get work reference to inet device */
-       in_dev = in_dev_get(dev_out);
+       in_dev = __in_dev_get_rcu(dev_out);
        if (!in_dev)
                return -EINVAL;
 
        if (res->type == RTN_BROADCAST) {
                flags |= RTCF_BROADCAST | RTCF_LOCAL;
-               if (res->fi) {
-                       fib_info_put(res->fi);
-                       res->fi = NULL;
-               }
+               res->fi = NULL;
        } else if (res->type == RTN_MULTICAST) {
-               flags |= RTCF_MULTICAST|RTCF_LOCAL;
+               flags |= RTCF_MULTICAST | RTCF_LOCAL;
                if (!ip_check_mc(in_dev, oldflp->fl4_dst, oldflp->fl4_src,
                                 oldflp->proto))
                        flags &= ~RTCF_LOCAL;
                /* If multicast route do not exist use
-                  default one, but do not gateway in this case.
-                  Yes, it is hack.
+                * default one, but do not gateway in this case.
+                * Yes, it is hack.
                 */
-               if (res->fi && res->prefixlen < 4) {
-                       fib_info_put(res->fi);
+               if (res->fi && res->prefixlen < 4)
                        res->fi = NULL;
-               }
        }
 
 
        rth = dst_alloc(&ipv4_dst_ops);
-       if (!rth) {
-               err = -ENOBUFS;
-               goto cleanup;
-       }
+       if (!rth)
+               return -ENOBUFS;
+
+       in_dev_hold(in_dev);
+       rth->idev = in_dev;
 
        atomic_set(&rth->dst.__refcnt, 1);
        rth->dst.flags= DST_HOST;
@@ -2432,7 +2421,6 @@ static int __mkroute_output(struct rtable **result,
           cache entry */
        rth->dst.dev    = dev_out;
        dev_hold(dev_out);
-       rth->idev       = in_dev_get(dev_out);
        rth->rt_gateway = fl->fl4_dst;
        rth->rt_spec_dst= fl->fl4_src;
 
@@ -2467,15 +2455,11 @@ static int __mkroute_output(struct rtable **result,
        rt_set_nexthop(rth, res, 0);
 
        rth->rt_flags = flags;
-
        *result = rth;
- cleanup:
-       /* release work reference to inet device */
-       in_dev_put(in_dev);
-
-       return err;
+       return 0;
 }
 
+/* called with rcu_read_lock() */
 static int ip_mkroute_output(struct rtable **rp,
                             struct fib_result *res,
                             const struct flowi *fl,
@@ -2497,6 +2481,7 @@ static int ip_mkroute_output(struct rtable **rp,
 
 /*
  * Major route resolver routine.
+ * called with rcu_read_lock();
  */
 
 static int ip_route_output_slow(struct net *net, struct rtable **rp,
@@ -2515,9 +2500,8 @@ static int ip_route_output_slow(struct net *net, struct rtable **rp,
                            .iif = net->loopback_dev->ifindex,
                            .oif = oldflp->oif };
        struct fib_result res;
-       unsigned flags = 0;
+       unsigned int flags = 0;
        struct net_device *dev_out = NULL;
-       int free_res = 0;
        int err;
 
 
@@ -2545,7 +2529,7 @@ static int ip_route_output_slow(struct net *net, struct rtable **rp,
                    (ipv4_is_multicast(oldflp->fl4_dst) ||
                     oldflp->fl4_dst == htonl(0xFFFFFFFF))) {
                        /* It is equivalent to inet_addr_type(saddr) == RTN_LOCAL */
-                       dev_out = ip_dev_find(net, oldflp->fl4_src);
+                       dev_out = __ip_dev_find(net, oldflp->fl4_src, false);
                        if (dev_out == NULL)
                                goto out;
 
@@ -2570,26 +2554,21 @@ static int ip_route_output_slow(struct net *net, struct rtable **rp,
 
                if (!(oldflp->flags & FLOWI_FLAG_ANYSRC)) {
                        /* It is equivalent to inet_addr_type(saddr) == RTN_LOCAL */
-                       dev_out = ip_dev_find(net, oldflp->fl4_src);
-                       if (dev_out == NULL)
+                       if (!__ip_dev_find(net, oldflp->fl4_src, false))
                                goto out;
-                       dev_put(dev_out);
-                       dev_out = NULL;
                }
        }
 
 
        if (oldflp->oif) {
-               dev_out = dev_get_by_index(net, oldflp->oif);
+               dev_out = dev_get_by_index_rcu(net, oldflp->oif);
                err = -ENODEV;
                if (dev_out == NULL)
                        goto out;
 
                /* RACE: Check return value of inet_select_addr instead. */
-               if (__in_dev_get_rtnl(dev_out) == NULL) {
-                       dev_put(dev_out);
+               if (rcu_dereference(dev_out->ip_ptr) == NULL)
                        goto out;       /* Wrong error code */
-               }
 
                if (ipv4_is_local_multicast(oldflp->fl4_dst) ||
                    oldflp->fl4_dst == htonl(0xFFFFFFFF)) {
@@ -2612,10 +2591,7 @@ static int ip_route_output_slow(struct net *net, struct rtable **rp,
                fl.fl4_dst = fl.fl4_src;
                if (!fl.fl4_dst)
                        fl.fl4_dst = fl.fl4_src = htonl(INADDR_LOOPBACK);
-               if (dev_out)
-                       dev_put(dev_out);
                dev_out = net->loopback_dev;
-               dev_hold(dev_out);
                fl.oif = net->loopback_dev->ifindex;
                res.type = RTN_LOCAL;
                flags |= RTCF_LOCAL;
@@ -2649,23 +2625,15 @@ static int ip_route_output_slow(struct net *net, struct rtable **rp,
                        res.type = RTN_UNICAST;
                        goto make_route;
                }
-               if (dev_out)
-                       dev_put(dev_out);
                err = -ENETUNREACH;
                goto out;
        }
-       free_res = 1;
 
        if (res.type == RTN_LOCAL) {
                if (!fl.fl4_src)
                        fl.fl4_src = fl.fl4_dst;
-               if (dev_out)
-                       dev_put(dev_out);
                dev_out = net->loopback_dev;
-               dev_hold(dev_out);
                fl.oif = dev_out->ifindex;
-               if (res.fi)
-                       fib_info_put(res.fi);
                res.fi = NULL;
                flags |= RTCF_LOCAL;
                goto make_route;
@@ -2682,28 +2650,21 @@ static int ip_route_output_slow(struct net *net, struct rtable **rp,
        if (!fl.fl4_src)
                fl.fl4_src = FIB_RES_PREFSRC(res);
 
-       if (dev_out)
-               dev_put(dev_out);
        dev_out = FIB_RES_DEV(res);
-       dev_hold(dev_out);
        fl.oif = dev_out->ifindex;
 
 
 make_route:
        err = ip_mkroute_output(rp, &res, &fl, oldflp, dev_out, flags);
 
-
-       if (free_res)
-               fib_res_put(&res);
-       if (dev_out)
-               dev_put(dev_out);
 out:   return err;
 }
 
 int __ip_route_output_key(struct net *net, struct rtable **rp,
                          const struct flowi *flp)
 {
-       unsigned hash;
+       unsigned int hash;
+       int res;
        struct rtable *rth;
 
        if (!rt_caching(net))
@@ -2734,10 +2695,18 @@ int __ip_route_output_key(struct net *net, struct rtable **rp,
        rcu_read_unlock_bh();
 
 slow_output:
-       return ip_route_output_slow(net, rp, flp);
+       rcu_read_lock();
+       res = ip_route_output_slow(net, rp, flp);
+       rcu_read_unlock();
+       return res;
 }
 EXPORT_SYMBOL_GPL(__ip_route_output_key);
 
+static struct dst_entry *ipv4_blackhole_dst_check(struct dst_entry *dst, u32 cookie)
+{
+       return NULL;
+}
+
 static void ipv4_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu)
 {
 }
@@ -2746,7 +2715,7 @@ static struct dst_ops ipv4_dst_blackhole_ops = {
        .family                 =       AF_INET,
        .protocol               =       cpu_to_be16(ETH_P_IP),
        .destroy                =       ipv4_dst_destroy,
-       .check                  =       ipv4_dst_check,
+       .check                  =       ipv4_blackhole_dst_check,
        .update_pmtu            =       ipv4_rt_blackhole_update_pmtu,
        .entries                =       ATOMIC_INIT(0),
 };
@@ -2793,7 +2762,7 @@ static int ipv4_dst_blackhole(struct net *net, struct rtable **rp, struct flowi
 
        dst_release(&(*rp)->dst);
        *rp = rt;
-       return (rt ? 0 : -ENOMEM);
+       return rt ? 0 : -ENOMEM;
 }
 
 int ip_route_output_flow(struct net *net, struct rtable **rp, struct flowi *flp,