]> bbs.cooldavid.org Git - net-next-2.6.git/blobdiff - net/ipv4/route.c
net: use the macros defined for the members of flowi
[net-next-2.6.git] / net / ipv4 / route.c
index d6cb2bfcd8e1baf7495e55ce83b534b2b0955b2f..ec2333fb637e4e80c9a0cb94356efdf96f740046 100644 (file)
@@ -140,13 +140,15 @@ static unsigned long expires_ljiffies;
 
 static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie);
 static void             ipv4_dst_destroy(struct dst_entry *dst);
-static void             ipv4_dst_ifdown(struct dst_entry *dst,
-                                        struct net_device *dev, int how);
 static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst);
 static void             ipv4_link_failure(struct sk_buff *skb);
 static void             ip_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
 static int rt_garbage_collect(struct dst_ops *ops);
 
+static void ipv4_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
+                           int how)
+{
+}
 
 static struct dst_ops ipv4_dst_ops = {
        .family =               AF_INET,
@@ -198,7 +200,7 @@ const __u8 ip_tos2prio[16] = {
  */
 
 struct rt_hash_bucket {
-       struct rtable   *chain;
+       struct rtable __rcu     *chain;
 };
 
 #if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK) || \
@@ -280,7 +282,7 @@ static struct rtable *rt_cache_get_first(struct seq_file *seq)
        struct rtable *r = NULL;
 
        for (st->bucket = rt_hash_mask; st->bucket >= 0; --st->bucket) {
-               if (!rt_hash_table[st->bucket].chain)
+               if (!rcu_dereference_raw(rt_hash_table[st->bucket].chain))
                        continue;
                rcu_read_lock_bh();
                r = rcu_dereference_bh(rt_hash_table[st->bucket].chain);
@@ -300,17 +302,17 @@ static struct rtable *__rt_cache_get_next(struct seq_file *seq,
 {
        struct rt_cache_iter_state *st = seq->private;
 
-       r = r->dst.rt_next;
+       r = rcu_dereference_bh(r->dst.rt_next);
        while (!r) {
                rcu_read_unlock_bh();
                do {
                        if (--st->bucket < 0)
                                return NULL;
-               } while (!rt_hash_table[st->bucket].chain);
+               } while (!rcu_dereference_raw(rt_hash_table[st->bucket].chain));
                rcu_read_lock_bh();
-               r = rt_hash_table[st->bucket].chain;
+               r = rcu_dereference_bh(rt_hash_table[st->bucket].chain);
        }
-       return rcu_dereference_bh(r);
+       return r;
 }
 
 static struct rtable *rt_cache_get_next(struct seq_file *seq,
@@ -621,7 +623,7 @@ static inline int rt_fast_clean(struct rtable *rth)
        /* Kill broadcast/multicast entries very aggresively, if they
           collide in hash table with more useful entries */
        return (rth->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST)) &&
-               rth->fl.iif && rth->dst.rt_next;
+               rt_is_input_route(rth) && rth->dst.rt_next;
 }
 
 static inline int rt_valuable(struct rtable *rth)
@@ -666,7 +668,7 @@ static inline u32 rt_score(struct rtable *rt)
        if (rt_valuable(rt))
                score |= (1<<31);
 
-       if (!rt->fl.iif ||
+       if (rt_is_output_route(rt) ||
            !(rt->rt_flags & (RTCF_BROADCAST|RTCF_MULTICAST|RTCF_LOCAL)))
                score |= (1<<30);
 
@@ -682,17 +684,17 @@ static inline bool rt_caching(const struct net *net)
 static inline bool compare_hash_inputs(const struct flowi *fl1,
                                        const struct flowi *fl2)
 {
-       return ((((__force u32)fl1->nl_u.ip4_u.daddr ^ (__force u32)fl2->nl_u.ip4_u.daddr) |
-               ((__force u32)fl1->nl_u.ip4_u.saddr ^ (__force u32)fl2->nl_u.ip4_u.saddr) |
+       return ((((__force u32)fl1->fl4_dst ^ (__force u32)fl2->fl4_dst) |
+               ((__force u32)fl1->fl4_src ^ (__force u32)fl2->fl4_src) |
                (fl1->iif ^ fl2->iif)) == 0);
 }
 
 static inline int compare_keys(struct flowi *fl1, struct flowi *fl2)
 {
-       return (((__force u32)fl1->nl_u.ip4_u.daddr ^ (__force u32)fl2->nl_u.ip4_u.daddr) |
-               ((__force u32)fl1->nl_u.ip4_u.saddr ^ (__force u32)fl2->nl_u.ip4_u.saddr) |
+       return (((__force u32)fl1->fl4_dst ^ (__force u32)fl2->fl4_dst) |
+               ((__force u32)fl1->fl4_src ^ (__force u32)fl2->fl4_src) |
                (fl1->mark ^ fl2->mark) |
-               (*(u16 *)&fl1->nl_u.ip4_u.tos ^ *(u16 *)&fl2->nl_u.ip4_u.tos) |
+               (*(u16 *)&fl1->fl4_tos ^ *(u16 *)&fl2->fl4_tos) |
                (fl1->oif ^ fl2->oif) |
                (fl1->iif ^ fl2->iif)) == 0;
 }
@@ -721,19 +723,23 @@ static void rt_do_flush(int process_context)
        for (i = 0; i <= rt_hash_mask; i++) {
                if (process_context && need_resched())
                        cond_resched();
-               rth = rt_hash_table[i].chain;
+               rth = rcu_dereference_raw(rt_hash_table[i].chain);
                if (!rth)
                        continue;
 
                spin_lock_bh(rt_hash_lock_addr(i));
 #ifdef CONFIG_NET_NS
                {
-               struct rtable ** prev, * p;
+               struct rtable __rcu **prev;
+               struct rtable *p;
 
-               rth = rt_hash_table[i].chain;
+               rth = rcu_dereference_protected(rt_hash_table[i].chain,
+                       lockdep_is_held(rt_hash_lock_addr(i)));
 
                /* defer releasing the head of the list after spin_unlock */
-               for (tail = rth; tail; tail = tail->dst.rt_next)
+               for (tail = rth; tail;
+                    tail = rcu_dereference_protected(tail->dst.rt_next,
+                               lockdep_is_held(rt_hash_lock_addr(i))))
                        if (!rt_is_expired(tail))
                                break;
                if (rth != tail)
@@ -741,8 +747,12 @@ static void rt_do_flush(int process_context)
 
                /* call rt_free on entries after the tail requiring flush */
                prev = &rt_hash_table[i].chain;
-               for (p = *prev; p; p = next) {
-                       next = p->dst.rt_next;
+               for (p = rcu_dereference_protected(*prev,
+                               lockdep_is_held(rt_hash_lock_addr(i)));
+                    p != NULL;
+                    p = next) {
+                       next = rcu_dereference_protected(p->dst.rt_next,
+                               lockdep_is_held(rt_hash_lock_addr(i)));
                        if (!rt_is_expired(p)) {
                                prev = &p->dst.rt_next;
                        } else {
@@ -752,14 +762,15 @@ static void rt_do_flush(int process_context)
                }
                }
 #else
-               rth = rt_hash_table[i].chain;
-               rt_hash_table[i].chain = NULL;
+               rth = rcu_dereference_protected(rt_hash_table[i].chain,
+                       lockdep_is_held(rt_hash_lock_addr(i)));
+               rcu_assign_pointer(rt_hash_table[i].chain, NULL);
                tail = NULL;
 #endif
                spin_unlock_bh(rt_hash_lock_addr(i));
 
                for (; rth != tail; rth = next) {
-                       next = rth->dst.rt_next;
+                       next = rcu_dereference_protected(rth->dst.rt_next, 1);
                        rt_free(rth);
                }
        }
@@ -790,7 +801,7 @@ static int has_noalias(const struct rtable *head, const struct rtable *rth)
        while (aux != rth) {
                if (compare_hash_inputs(&aux->fl, &rth->fl))
                        return 0;
-               aux = aux->dst.rt_next;
+               aux = rcu_dereference_protected(aux->dst.rt_next, 1);
        }
        return ONE;
 }
@@ -799,7 +810,8 @@ static void rt_check_expire(void)
 {
        static unsigned int rover;
        unsigned int i = rover, goal;
-       struct rtable *rth, **rthp;
+       struct rtable *rth;
+       struct rtable __rcu **rthp;
        unsigned long samples = 0;
        unsigned long sum = 0, sum2 = 0;
        unsigned long delta;
@@ -825,11 +837,12 @@ static void rt_check_expire(void)
 
                samples++;
 
-               if (*rthp == NULL)
+               if (rcu_dereference_raw(*rthp) == NULL)
                        continue;
                length = 0;
                spin_lock_bh(rt_hash_lock_addr(i));
-               while ((rth = *rthp) != NULL) {
+               while ((rth = rcu_dereference_protected(*rthp,
+                                       lockdep_is_held(rt_hash_lock_addr(i)))) != NULL) {
                        prefetch(rth->dst.rt_next);
                        if (rt_is_expired(rth)) {
                                *rthp = rth->dst.rt_next;
@@ -941,7 +954,8 @@ static int rt_garbage_collect(struct dst_ops *ops)
        static unsigned long last_gc;
        static int rover;
        static int equilibrium;
-       struct rtable *rth, **rthp;
+       struct rtable *rth;
+       struct rtable __rcu **rthp;
        unsigned long now = jiffies;
        int goal;
        int entries = dst_entries_get_fast(&ipv4_dst_ops);
@@ -995,7 +1009,8 @@ static int rt_garbage_collect(struct dst_ops *ops)
                        k = (k + 1) & rt_hash_mask;
                        rthp = &rt_hash_table[k].chain;
                        spin_lock_bh(rt_hash_lock_addr(k));
-                       while ((rth = *rthp) != NULL) {
+                       while ((rth = rcu_dereference_protected(*rthp,
+                                       lockdep_is_held(rt_hash_lock_addr(k)))) != NULL) {
                                if (!rt_is_expired(rth) &&
                                        !rt_may_expire(rth, tmo, expire)) {
                                        tmo >>= 1;
@@ -1071,7 +1086,7 @@ static int slow_chain_length(const struct rtable *head)
 
        while (rth) {
                length += has_noalias(head, rth);
-               rth = rth->dst.rt_next;
+               rth = rcu_dereference_protected(rth->dst.rt_next, 1);
        }
        return length >> FRACT_BITS;
 }
@@ -1079,9 +1094,9 @@ static int slow_chain_length(const struct rtable *head)
 static int rt_intern_hash(unsigned hash, struct rtable *rt,
                          struct rtable **rp, struct sk_buff *skb, int ifindex)
 {
-       struct rtable   *rth, **rthp;
+       struct rtable   *rth, *cand;
+       struct rtable __rcu **rthp, **candp;
        unsigned long   now;
-       struct rtable *cand, **candp;
        u32             min_score;
        int             chain_length;
        int attempts = !in_softirq();
@@ -1111,7 +1126,7 @@ restart:
                 */
 
                rt->dst.flags |= DST_NOCACHE;
-               if (rt->rt_type == RTN_UNICAST || rt->fl.iif == 0) {
+               if (rt->rt_type == RTN_UNICAST || rt_is_output_route(rt)) {
                        int err = arp_bind_neighbour(&rt->dst);
                        if (err) {
                                if (net_ratelimit())
@@ -1128,7 +1143,8 @@ restart:
        rthp = &rt_hash_table[hash].chain;
 
        spin_lock_bh(rt_hash_lock_addr(hash));
-       while ((rth = *rthp) != NULL) {
+       while ((rth = rcu_dereference_protected(*rthp,
+                       lockdep_is_held(rt_hash_lock_addr(hash)))) != NULL) {
                if (rt_is_expired(rth)) {
                        *rthp = rth->dst.rt_next;
                        rt_free(rth);
@@ -1208,7 +1224,7 @@ restart:
        /* Try to bind route to arp only if it is output
           route or unicast forwarding path.
         */
-       if (rt->rt_type == RTN_UNICAST || rt->fl.iif == 0) {
+       if (rt->rt_type == RTN_UNICAST || rt_is_output_route(rt)) {
                int err = arp_bind_neighbour(&rt->dst);
                if (err) {
                        spin_unlock_bh(rt_hash_lock_addr(hash));
@@ -1324,12 +1340,14 @@ EXPORT_SYMBOL(__ip_select_ident);
 
 static void rt_del(unsigned hash, struct rtable *rt)
 {
-       struct rtable **rthp, *aux;
+       struct rtable __rcu **rthp;
+       struct rtable *aux;
 
        rthp = &rt_hash_table[hash].chain;
        spin_lock_bh(rt_hash_lock_addr(hash));
        ip_rt_put(rt);
-       while ((aux = *rthp) != NULL) {
+       while ((aux = rcu_dereference_protected(*rthp,
+                       lockdep_is_held(rt_hash_lock_addr(hash)))) != NULL) {
                if (aux == rt || rt_is_expired(aux)) {
                        *rthp = aux->dst.rt_next;
                        rt_free(aux);
@@ -1346,7 +1364,8 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw,
 {
        int i, k;
        struct in_device *in_dev = __in_dev_get_rcu(dev);
-       struct rtable *rth, **rthp;
+       struct rtable *rth;
+       struct rtable __rcu **rthp;
        __be32  skeys[2] = { saddr, 0 };
        int  ikeys[2] = { dev->ifindex, 0 };
        struct netevent_redirect netevent;
@@ -1379,7 +1398,7 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw,
                        unsigned hash = rt_hash(daddr, skeys[i], ikeys[k],
                                                rt_genid(net));
 
-                       rthp=&rt_hash_table[hash].chain;
+                       rthp = &rt_hash_table[hash].chain;
 
                        while ((rth = rcu_dereference(*rthp)) != NULL) {
                                struct rtable *rt;
@@ -1387,7 +1406,7 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw,
                                if (rth->fl.fl4_dst != daddr ||
                                    rth->fl.fl4_src != skeys[i] ||
                                    rth->fl.oif != ikeys[k] ||
-                                   rth->fl.iif != 0 ||
+                                   rt_is_input_route(rth) ||
                                    rt_is_expired(rth) ||
                                    !net_eq(dev_net(rth->dst.dev), net)) {
                                        rthp = &rth->dst.rt_next;
@@ -1416,8 +1435,6 @@ void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw,
                                rt->dst.child           = NULL;
                                if (rt->dst.dev)
                                        dev_hold(rt->dst.dev);
-                               if (rt->idev)
-                                       in_dev_hold(rt->idev);
                                rt->dst.obsolete        = -1;
                                rt->dst.lastuse = jiffies;
                                rt->dst.path            = &rt->dst;
@@ -1649,7 +1666,7 @@ unsigned short ip_rt_frag_needed(struct net *net, struct iphdr *iph,
                                    rth->rt_dst != daddr ||
                                    rth->rt_src != iph->saddr ||
                                    rth->fl.oif != ikeys[k] ||
-                                   rth->fl.iif != 0 ||
+                                   rt_is_input_route(rth) ||
                                    dst_metric_locked(&rth->dst, RTAX_MTU) ||
                                    !net_eq(dev_net(rth->dst.dev), net) ||
                                    rt_is_expired(rth))
@@ -1711,33 +1728,13 @@ static void ipv4_dst_destroy(struct dst_entry *dst)
 {
        struct rtable *rt = (struct rtable *) dst;
        struct inet_peer *peer = rt->peer;
-       struct in_device *idev = rt->idev;
 
        if (peer) {
                rt->peer = NULL;
                inet_putpeer(peer);
        }
-
-       if (idev) {
-               rt->idev = NULL;
-               in_dev_put(idev);
-       }
 }
 
-static void ipv4_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
-                           int how)
-{
-       struct rtable *rt = (struct rtable *) dst;
-       struct in_device *idev = rt->idev;
-       if (dev != dev_net(dev)->loopback_dev && idev && idev->dev == dev) {
-               struct in_device *loopback_idev =
-                       in_dev_get(dev_net(dev)->loopback_dev);
-               if (loopback_idev) {
-                       rt->idev = loopback_idev;
-                       in_dev_put(idev);
-               }
-       }
-}
 
 static void ipv4_link_failure(struct sk_buff *skb)
 {
@@ -1773,7 +1770,7 @@ void ip_rt_get_source(u8 *addr, struct rtable *rt)
        __be32 src;
        struct fib_result res;
 
-       if (rt->fl.iif == 0)
+       if (rt_is_output_route(rt))
                src = rt->rt_src;
        else {
                rcu_read_lock();
@@ -1893,7 +1890,6 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
        rth->fl.iif     = dev->ifindex;
        rth->dst.dev    = init_net.loopback_dev;
        dev_hold(rth->dst.dev);
-       rth->idev       = in_dev_get(rth->dst.dev);
        rth->fl.oif     = 0;
        rth->rt_gateway = daddr;
        rth->rt_spec_dst= spec_dst;
@@ -2033,7 +2029,6 @@ static int __mkroute_input(struct sk_buff *skb,
                rth->fl.iif     = in_dev->dev->ifindex;
        rth->dst.dev    = (out_dev)->dev;
        dev_hold(rth->dst.dev);
-       rth->idev       = in_dev_get(rth->dst.dev);
        rth->fl.oif     = 0;
        rth->rt_spec_dst= spec_dst;
 
@@ -2094,12 +2089,10 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
 {
        struct fib_result res;
        struct in_device *in_dev = __in_dev_get_rcu(dev);
-       struct flowi fl = { .nl_u = { .ip4_u =
-                                     { .daddr = daddr,
-                                       .saddr = saddr,
-                                       .tos = tos,
-                                       .scope = RT_SCOPE_UNIVERSE,
-                                     } },
+       struct flowi fl = { .fl4_dst    = daddr,
+                           .fl4_src    = saddr,
+                           .fl4_tos    = tos,
+                           .fl4_scope  = RT_SCOPE_UNIVERSE,
                            .mark = skb->mark,
                            .iif = dev->ifindex };
        unsigned        flags = 0;
@@ -2214,7 +2207,6 @@ local_input:
        rth->fl.iif     = dev->ifindex;
        rth->dst.dev    = net->loopback_dev;
        dev_hold(rth->dst.dev);
-       rth->idev       = in_dev_get(rth->dst.dev);
        rth->rt_gateway = daddr;
        rth->rt_spec_dst= spec_dst;
        rth->dst.input= ip_local_deliver;
@@ -2400,9 +2392,6 @@ static int __mkroute_output(struct rtable **result,
        if (!rth)
                return -ENOBUFS;
 
-       in_dev_hold(in_dev);
-       rth->idev = in_dev;
-
        atomic_set(&rth->dst.__refcnt, 1);
        rth->dst.flags= DST_HOST;
        if (IN_DEV_CONF_GET(in_dev, NOXFRM))
@@ -2489,14 +2478,11 @@ static int ip_route_output_slow(struct net *net, struct rtable **rp,
                                const struct flowi *oldflp)
 {
        u32 tos = RT_FL_TOS(oldflp);
-       struct flowi fl = { .nl_u = { .ip4_u =
-                                     { .daddr = oldflp->fl4_dst,
-                                       .saddr = oldflp->fl4_src,
-                                       .tos = tos & IPTOS_RT_MASK,
-                                       .scope = ((tos & RTO_ONLINK) ?
-                                                 RT_SCOPE_LINK :
-                                                 RT_SCOPE_UNIVERSE),
-                                     } },
+       struct flowi fl = { .fl4_dst = oldflp->fl4_dst,
+                           .fl4_src = oldflp->fl4_src,
+                           .fl4_tos = tos & IPTOS_RT_MASK,
+                           .fl4_scope = ((tos & RTO_ONLINK) ?
+                                         RT_SCOPE_LINK : RT_SCOPE_UNIVERSE),
                            .mark = oldflp->mark,
                            .iif = net->loopback_dev->ifindex,
                            .oif = oldflp->oif };
@@ -2678,7 +2664,7 @@ int __ip_route_output_key(struct net *net, struct rtable **rp,
                rth = rcu_dereference_bh(rth->dst.rt_next)) {
                if (rth->fl.fl4_dst == flp->fl4_dst &&
                    rth->fl.fl4_src == flp->fl4_src &&
-                   rth->fl.iif == 0 &&
+                   rt_is_output_route(rth) &&
                    rth->fl.oif == flp->oif &&
                    rth->fl.mark == flp->mark &&
                    !((rth->fl.fl4_tos ^ flp->fl4_tos) &
@@ -2742,9 +2728,6 @@ static int ipv4_dst_blackhole(struct net *net, struct rtable **rp, struct flowi
 
                rt->fl = ort->fl;
 
-               rt->idev = ort->idev;
-               if (rt->idev)
-                       in_dev_hold(rt->idev);
                rt->rt_genid = rt_genid(net);
                rt->rt_flags = ort->rt_flags;
                rt->rt_type = ort->rt_type;
@@ -2836,7 +2819,7 @@ static int rt_fill_info(struct net *net,
        if (rt->dst.tclassid)
                NLA_PUT_U32(skb, RTA_FLOW, rt->dst.tclassid);
 #endif
-       if (rt->fl.iif)
+       if (rt_is_input_route(rt))
                NLA_PUT_BE32(skb, RTA_PREFSRC, rt->rt_spec_dst);
        else if (rt->rt_src != rt->fl.fl4_src)
                NLA_PUT_BE32(skb, RTA_PREFSRC, rt->rt_src);
@@ -2861,7 +2844,7 @@ static int rt_fill_info(struct net *net,
                }
        }
 
-       if (rt->fl.iif) {
+       if (rt_is_input_route(rt)) {
 #ifdef CONFIG_IP_MROUTE
                __be32 dst = rt->rt_dst;
 
@@ -2956,13 +2939,9 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void
                        err = -rt->dst.error;
        } else {
                struct flowi fl = {
-                       .nl_u = {
-                               .ip4_u = {
-                                       .daddr = dst,
-                                       .saddr = src,
-                                       .tos = rtm->rtm_tos,
-                               },
-                       },
+                       .fl4_dst = dst,
+                       .fl4_src = src,
+                       .fl4_tos = rtm->rtm_tos,
                        .oif = tb[RTA_OIF] ? nla_get_u32(tb[RTA_OIF]) : 0,
                        .mark = mark,
                };