]> bbs.cooldavid.org Git - net-next-2.6.git/blobdiff - net/ipv4/route.c
[NET]: kaweth was forgotten in msec switchover of usb_start_wait_urb
[net-next-2.6.git] / net / ipv4 / route.c
index c7ca94bd152cc945c99c1ab63fcf7d3985d5f639..d2bc6148a7378452bde89de27cd7a0876a18c78d 100644 (file)
@@ -81,6 +81,7 @@
 #include <linux/netdevice.h>
 #include <linux/proc_fs.h>
 #include <linux/init.h>
+#include <linux/workqueue.h>
 #include <linux/skbuff.h>
 #include <linux/inetdevice.h>
 #include <linux/igmp.h>
@@ -91,6 +92,7 @@
 #include <linux/jhash.h>
 #include <linux/rcupdate.h>
 #include <linux/times.h>
+#include <net/net_namespace.h>
 #include <net/protocol.h>
 #include <net/ip.h>
 #include <net/route.h>
@@ -135,7 +137,8 @@ static unsigned long rt_deadline;
 #define RTprint(a...)  printk(KERN_DEBUG a)
 
 static struct timer_list rt_flush_timer;
-static struct timer_list rt_periodic_timer;
+static void rt_check_expire(struct work_struct *work);
+static DECLARE_DELAYED_WORK(expires_work, rt_check_expire);
 static struct timer_list rt_secret_timer;
 
 /*
@@ -243,7 +246,7 @@ static spinlock_t   *rt_hash_locks;
 
 static struct rt_hash_bucket   *rt_hash_table;
 static unsigned                        rt_hash_mask;
-static int                     rt_hash_log;
+static unsigned int            rt_hash_log;
 static unsigned int            rt_hash_rnd;
 
 static DEFINE_PER_CPU(struct rt_cache_stat, rt_cache_stat);
@@ -372,23 +375,8 @@ static const struct seq_operations rt_cache_seq_ops = {
 
 static int rt_cache_seq_open(struct inode *inode, struct file *file)
 {
-       struct seq_file *seq;
-       int rc = -ENOMEM;
-       struct rt_cache_iter_state *s;
-
-       s = kzalloc(sizeof(*s), GFP_KERNEL);
-       if (!s)
-               goto out;
-       rc = seq_open(file, &rt_cache_seq_ops);
-       if (rc)
-               goto out_kfree;
-       seq          = file->private_data;
-       seq->private = s;
-out:
-       return rc;
-out_kfree:
-       kfree(s);
-       goto out;
+       return seq_open_private(file, &rt_cache_seq_ops,
+                       sizeof(struct rt_cache_iter_state));
 }
 
 static const struct file_operations rt_cache_seq_fops = {
@@ -571,33 +559,35 @@ static inline int compare_keys(struct flowi *fl1, struct flowi *fl2)
                (fl1->iif ^ fl2->iif)) == 0;
 }
 
-/* This runs via a timer and thus is always in BH context. */
-static void rt_check_expire(unsigned long dummy)
+static void rt_check_expire(struct work_struct *work)
 {
        static unsigned int rover;
        unsigned int i = rover, goal;
        struct rtable *rth, **rthp;
-       unsigned long now = jiffies;
        u64 mult;
 
        mult = ((u64)ip_rt_gc_interval) << rt_hash_log;
        if (ip_rt_gc_timeout > 1)
                do_div(mult, ip_rt_gc_timeout);
        goal = (unsigned int)mult;
-       if (goal > rt_hash_mask) goal = rt_hash_mask + 1;
+       if (goal > rt_hash_mask)
+               goal = rt_hash_mask + 1;
        for (; goal > 0; goal--) {
                unsigned long tmo = ip_rt_gc_timeout;
 
                i = (i + 1) & rt_hash_mask;
                rthp = &rt_hash_table[i].chain;
 
-               if (*rthp == 0)
+               if (need_resched())
+                       cond_resched();
+
+               if (*rthp == NULL)
                        continue;
-               spin_lock(rt_hash_lock_addr(i));
+               spin_lock_bh(rt_hash_lock_addr(i));
                while ((rth = *rthp) != NULL) {
                        if (rth->u.dst.expires) {
                                /* Entry is expired even if it is in use */
-                               if (time_before_eq(now, rth->u.dst.expires)) {
+                               if (time_before_eq(jiffies, rth->u.dst.expires)) {
                                        tmo >>= 1;
                                        rthp = &rth->u.dst.rt_next;
                                        continue;
@@ -612,14 +602,10 @@ static void rt_check_expire(unsigned long dummy)
                        *rthp = rth->u.dst.rt_next;
                        rt_free(rth);
                }
-               spin_unlock(rt_hash_lock_addr(i));
-
-               /* Fallback loop breaker. */
-               if (time_after(jiffies, now))
-                       break;
+               spin_unlock_bh(rt_hash_lock_addr(i));
        }
        rover = i;
-       mod_timer(&rt_periodic_timer, jiffies + ip_rt_gc_interval);
+       schedule_delayed_work(&expires_work, ip_rt_gc_interval);
 }
 
 /* This can run from both BH and non-BH contexts, the latter
@@ -868,9 +854,7 @@ restart:
                         */
                        rcu_assign_pointer(rt_hash_table[hash].chain, rth);
 
-                       rth->u.dst.__use++;
-                       dst_hold(&rth->u.dst);
-                       rth->u.dst.lastuse = now;
+                       dst_use(&rth->u.dst, now);
                        spin_unlock_bh(rt_hash_lock_addr(hash));
 
                        rt_drop(rt);
@@ -1177,7 +1161,7 @@ static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst)
                        unsigned hash = rt_hash(rt->fl.fl4_dst, rt->fl.fl4_src,
                                                rt->fl.oif);
 #if RT_CACHE_DEBUG >= 1
-                       printk(KERN_DEBUG "ip_rt_advice: redirect to "
+                       printk(KERN_DEBUG "ipv4_negative_advice: redirect to "
                                          "%u.%u.%u.%u/%02x dropped\n",
                                NIPQUAD(rt->rt_dst), rt->fl.fl4_tos);
 #endif
@@ -1268,6 +1252,7 @@ static int ip_error(struct sk_buff *skb)
                        break;
                case ENETUNREACH:
                        code = ICMP_NET_UNREACH;
+                       IP_INC_STATS_BH(IPSTATS_MIB_INNOROUTES);
                        break;
                case EACCES:
                        code = ICMP_PKT_FILTERED;
@@ -1404,8 +1389,8 @@ static void ipv4_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
 {
        struct rtable *rt = (struct rtable *) dst;
        struct in_device *idev = rt->idev;
-       if (dev != &loopback_dev && idev && idev->dev == dev) {
-               struct in_device *loopback_idev = in_dev_get(&loopback_dev);
+       if (dev != init_net.loopback_dev && idev && idev->dev == dev) {
+               struct in_device *loopback_idev = in_dev_get(init_net.loopback_dev);
                if (loopback_idev) {
                        rt->idev = loopback_idev;
                        in_dev_put(idev);
@@ -1557,7 +1542,7 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
 #endif
        rth->rt_iif     =
        rth->fl.iif     = dev->ifindex;
-       rth->u.dst.dev  = &loopback_dev;
+       rth->u.dst.dev  = init_net.loopback_dev;
        dev_hold(rth->u.dst.dev);
        rth->idev       = in_dev_get(rth->u.dst.dev);
        rth->fl.oif     = 0;
@@ -1814,7 +1799,7 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
        if (res.type == RTN_LOCAL) {
                int result;
                result = fib_validate_source(saddr, daddr, tos,
-                                            loopback_dev.ifindex,
+                                            init_net.loopback_dev->ifindex,
                                             dev, &spec_dst, &itag);
                if (result < 0)
                        goto martian_source;
@@ -1830,11 +1815,6 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
                goto martian_destination;
 
        err = ip_mkroute_input(skb, &res, &fl, in_dev, daddr, saddr, tos);
-       if (err == -ENOBUFS)
-               goto e_nobufs;
-       if (err == -EINVAL)
-               goto e_inval;
-
 done:
        in_dev_put(in_dev);
        if (free_res)
@@ -1881,7 +1861,7 @@ local_input:
 #endif
        rth->rt_iif     =
        rth->fl.iif     = dev->ifindex;
-       rth->u.dst.dev  = &loopback_dev;
+       rth->u.dst.dev  = init_net.loopback_dev;
        dev_hold(rth->u.dst.dev);
        rth->idev       = in_dev_get(rth->u.dst.dev);
        rth->rt_gateway = daddr;
@@ -1902,6 +1882,8 @@ no_route:
        RT_CACHE_STAT_INC(in_no_route);
        spec_dst = inet_select_addr(dev, 0, RT_SCOPE_UNIVERSE);
        res.type = RTN_UNREACHABLE;
+       if (err == -ESRCH)
+               err = -ENETUNREACH;
        goto local_input;
 
        /*
@@ -1952,9 +1934,7 @@ int ip_route_input(struct sk_buff *skb, __be32 daddr, __be32 saddr,
                    rth->fl.oif == 0 &&
                    rth->fl.mark == skb->mark &&
                    rth->fl.fl4_tos == tos) {
-                       rth->u.dst.lastuse = jiffies;
-                       dst_hold(&rth->u.dst);
-                       rth->u.dst.__use++;
+                       dst_use(&rth->u.dst, jiffies);
                        RT_CACHE_STAT_INC(in_hit);
                        rcu_read_unlock();
                        skb->dst = (struct dst_entry*)rth;
@@ -2151,7 +2131,7 @@ static int ip_route_output_slow(struct rtable **rp, const struct flowi *oldflp)
                                                  RT_SCOPE_UNIVERSE),
                                      } },
                            .mark = oldflp->mark,
-                           .iif = loopback_dev.ifindex,
+                           .iif = init_net.loopback_dev->ifindex,
                            .oif = oldflp->oif };
        struct fib_result res;
        unsigned flags = 0;
@@ -2212,7 +2192,7 @@ static int ip_route_output_slow(struct rtable **rp, const struct flowi *oldflp)
 
 
        if (oldflp->oif) {
-               dev_out = dev_get_by_index(oldflp->oif);
+               dev_out = dev_get_by_index(&init_net, oldflp->oif);
                err = -ENODEV;
                if (dev_out == NULL)
                        goto out;
@@ -2245,9 +2225,9 @@ static int ip_route_output_slow(struct rtable **rp, const struct flowi *oldflp)
                        fl.fl4_dst = fl.fl4_src = htonl(INADDR_LOOPBACK);
                if (dev_out)
                        dev_put(dev_out);
-               dev_out = &loopback_dev;
+               dev_out = init_net.loopback_dev;
                dev_hold(dev_out);
-               fl.oif = loopback_dev.ifindex;
+               fl.oif = init_net.loopback_dev->ifindex;
                res.type = RTN_LOCAL;
                flags |= RTCF_LOCAL;
                goto make_route;
@@ -2292,7 +2272,7 @@ static int ip_route_output_slow(struct rtable **rp, const struct flowi *oldflp)
                        fl.fl4_src = fl.fl4_dst;
                if (dev_out)
                        dev_put(dev_out);
-               dev_out = &loopback_dev;
+               dev_out = init_net.loopback_dev;
                dev_hold(dev_out);
                fl.oif = dev_out->ifindex;
                if (res.fi)
@@ -2348,9 +2328,7 @@ int __ip_route_output_key(struct rtable **rp, const struct flowi *flp)
                    rth->fl.mark == flp->mark &&
                    !((rth->fl.fl4_tos ^ flp->fl4_tos) &
                            (IPTOS_RT_MASK | RTO_ONLINK))) {
-                       rth->u.dst.lastuse = jiffies;
-                       dst_hold(&rth->u.dst);
-                       rth->u.dst.__use++;
+                       dst_use(&rth->u.dst, jiffies);
                        RT_CACHE_STAT_INC(out_hit);
                        rcu_read_unlock_bh();
                        *rp = rth;
@@ -2591,7 +2569,7 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void
        if (iif) {
                struct net_device *dev;
 
-               dev = __dev_get_by_index(iif);
+               dev = __dev_get_by_index(&init_net, iif);
                if (dev == NULL) {
                        err = -ENODEV;
                        goto errout_free;
@@ -2913,18 +2891,14 @@ static int ip_rt_acct_read(char *buffer, char **start, off_t offset,
        offset /= sizeof(u32);
 
        if (length > 0) {
-               u32 *src = ((u32 *) IP_RT_ACCT_CPU(0)) + offset;
                u32 *dst = (u32 *) buffer;
 
-               /* Copy first cpu. */
                *start = buffer;
-               memcpy(dst, src, length);
+               memset(dst, 0, length);
 
-               /* Add the other cpus in, one int at a time */
                for_each_possible_cpu(i) {
                        unsigned int j;
-
-                       src = ((u32 *) IP_RT_ACCT_CPU(i)) + offset;
+                       u32 *src = ((u32 *) IP_RT_ACCT_CPU(i)) + offset;
 
                        for (j = 0; j < length/4; j++)
                                dst[j] += src[j];
@@ -2992,17 +2966,14 @@ int __init ip_rt_init(void)
 
        init_timer(&rt_flush_timer);
        rt_flush_timer.function = rt_run_flush;
-       init_timer(&rt_periodic_timer);
-       rt_periodic_timer.function = rt_check_expire;
        init_timer(&rt_secret_timer);
        rt_secret_timer.function = rt_secret_rebuild;
 
        /* All the timers, started at system startup tend
           to synchronize. Perturb it a bit.
         */
-       rt_periodic_timer.expires = jiffies + net_random() % ip_rt_gc_interval +
-                                       ip_rt_gc_interval;
-       add_timer(&rt_periodic_timer);
+       schedule_delayed_work(&expires_work,
+               net_random() % ip_rt_gc_interval + ip_rt_gc_interval);
 
        rt_secret_timer.expires = jiffies + net_random() % ip_rt_secret_interval +
                ip_rt_secret_interval;
@@ -3011,15 +2982,15 @@ int __init ip_rt_init(void)
 #ifdef CONFIG_PROC_FS
        {
        struct proc_dir_entry *rtstat_pde = NULL; /* keep gcc happy */
-       if (!proc_net_fops_create("rt_cache", S_IRUGO, &rt_cache_seq_fops) ||
+       if (!proc_net_fops_create(&init_net, "rt_cache", S_IRUGO, &rt_cache_seq_fops) ||
            !(rtstat_pde = create_proc_entry("rt_cache", S_IRUGO,
-                                            proc_net_stat))) {
+                                            init_net.proc_net_stat))) {
                return -ENOMEM;
        }
        rtstat_pde->proc_fops = &rt_cpu_seq_fops;
        }
 #ifdef CONFIG_NET_CLS_ROUTE
-       create_proc_read_entry("rt_acct", 0, proc_net, ip_rt_acct_read, NULL);
+       create_proc_read_entry("rt_acct", 0, init_net.proc_net, ip_rt_acct_read, NULL);
 #endif
 #endif
 #ifdef CONFIG_XFRM