.link_failure = ipv4_link_failure,
.update_pmtu = ip_rt_update_pmtu,
.local_out = __ip_local_out,
- .entries = ATOMIC_INIT(0),
};
#define ECN_OR_COST(class) TC_PRIO_##class
*/
struct rt_hash_bucket {
- struct rtable *chain;
+ struct rtable __rcu *chain;
};
#if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK) || \
struct rtable *r = NULL;
for (st->bucket = rt_hash_mask; st->bucket >= 0; --st->bucket) {
- if (!rt_hash_table[st->bucket].chain)
+ if (!rcu_dereference_raw(rt_hash_table[st->bucket].chain))
continue;
rcu_read_lock_bh();
r = rcu_dereference_bh(rt_hash_table[st->bucket].chain);
{
struct rt_cache_iter_state *st = seq->private;
- r = r->dst.rt_next;
+ r = rcu_dereference_bh(r->dst.rt_next);
while (!r) {
rcu_read_unlock_bh();
do {
if (--st->bucket < 0)
return NULL;
- } while (!rt_hash_table[st->bucket].chain);
+ } while (!rcu_dereference_raw(rt_hash_table[st->bucket].chain));
rcu_read_lock_bh();
- r = rt_hash_table[st->bucket].chain;
+ r = rcu_dereference_bh(rt_hash_table[st->bucket].chain);
}
- return rcu_dereference_bh(r);
+ return r;
}
static struct rtable *rt_cache_get_next(struct seq_file *seq,
seq_printf(seq,"%08x %08x %08x %08x %08x %08x %08x %08x "
" %08x %08x %08x %08x %08x %08x %08x %08x %08x \n",
- atomic_read(&ipv4_dst_ops.entries),
+ dst_entries_get_slow(&ipv4_dst_ops),
st->in_hit,
st->in_slow_tot,
st->in_slow_mc,
for (i = 0; i <= rt_hash_mask; i++) {
if (process_context && need_resched())
cond_resched();
- rth = rt_hash_table[i].chain;
+ rth = rcu_dereference_raw(rt_hash_table[i].chain);
if (!rth)
continue;
spin_lock_bh(rt_hash_lock_addr(i));
#ifdef CONFIG_NET_NS
{
- struct rtable ** prev, * p;
+ struct rtable __rcu **prev;
+ struct rtable *p;
- rth = rt_hash_table[i].chain;
+ rth = rcu_dereference_protected(rt_hash_table[i].chain,
+ lockdep_is_held(rt_hash_lock_addr(i)));
/* defer releasing the head of the list after spin_unlock */
- for (tail = rth; tail; tail = tail->dst.rt_next)
+ for (tail = rth; tail;
+ tail = rcu_dereference_protected(tail->dst.rt_next,
+ lockdep_is_held(rt_hash_lock_addr(i))))
if (!rt_is_expired(tail))
break;
if (rth != tail)
/* call rt_free on entries after the tail requiring flush */
prev = &rt_hash_table[i].chain;
- for (p = *prev; p; p = next) {
- next = p->dst.rt_next;
+ for (p = rcu_dereference_protected(*prev,
+ lockdep_is_held(rt_hash_lock_addr(i)));
+ p != NULL;
+ p = next) {
+ next = rcu_dereference_protected(p->dst.rt_next,
+ lockdep_is_held(rt_hash_lock_addr(i)));
if (!rt_is_expired(p)) {
prev = &p->dst.rt_next;
} else {
}
}
#else
- rth = rt_hash_table[i].chain;
- rt_hash_table[i].chain = NULL;
+ rth = rcu_dereference_protected(rt_hash_table[i].chain,
+ lockdep_is_held(rt_hash_lock_addr(i)));
+ rcu_assign_pointer(rt_hash_table[i].chain, NULL);
tail = NULL;
#endif
spin_unlock_bh(rt_hash_lock_addr(i));
for (; rth != tail; rth = next) {
- next = rth->dst.rt_next;
+ next = rcu_dereference_protected(rth->dst.rt_next, 1);
rt_free(rth);
}
}
while (aux != rth) {
if (compare_hash_inputs(&aux->fl, &rth->fl))
return 0;
- aux = aux->dst.rt_next;
+ aux = rcu_dereference_protected(aux->dst.rt_next, 1);
}
return ONE;
}
{
static unsigned int rover;
unsigned int i = rover, goal;
- struct rtable *rth, **rthp;
+ struct rtable *rth;
+ struct rtable __rcu **rthp;
unsigned long samples = 0;
unsigned long sum = 0, sum2 = 0;
unsigned long delta;
samples++;
- if (*rthp == NULL)
+ if (rcu_dereference_raw(*rthp) == NULL)
continue;
length = 0;
spin_lock_bh(rt_hash_lock_addr(i));
- while ((rth = *rthp) != NULL) {
+ while ((rth = rcu_dereference_protected(*rthp,
+ lockdep_is_held(rt_hash_lock_addr(i)))) != NULL) {
prefetch(rth->dst.rt_next);
if (rt_is_expired(rth)) {
*rthp = rth->dst.rt_next;
static unsigned long last_gc;
static int rover;
static int equilibrium;
- struct rtable *rth, **rthp;
+ struct rtable *rth;
+ struct rtable __rcu **rthp;
unsigned long now = jiffies;
int goal;
+ int entries = dst_entries_get_fast(&ipv4_dst_ops);
/*
* Garbage collection is pretty expensive,
RT_CACHE_STAT_INC(gc_total);
if (now - last_gc < ip_rt_gc_min_interval &&
- atomic_read(&ipv4_dst_ops.entries) < ip_rt_max_size) {
+ entries < ip_rt_max_size) {
RT_CACHE_STAT_INC(gc_ignored);
goto out;
}
+ entries = dst_entries_get_slow(&ipv4_dst_ops);
/* Calculate number of entries, which we want to expire now. */
- goal = atomic_read(&ipv4_dst_ops.entries) -
- (ip_rt_gc_elasticity << rt_hash_log);
+ goal = entries - (ip_rt_gc_elasticity << rt_hash_log);
if (goal <= 0) {
if (equilibrium < ipv4_dst_ops.gc_thresh)
equilibrium = ipv4_dst_ops.gc_thresh;
- goal = atomic_read(&ipv4_dst_ops.entries) - equilibrium;
+ goal = entries - equilibrium;
if (goal > 0) {
equilibrium += min_t(unsigned int, goal >> 1, rt_hash_mask + 1);
- goal = atomic_read(&ipv4_dst_ops.entries) - equilibrium;
+ goal = entries - equilibrium;
}
} else {
/* We are in dangerous area. Try to reduce cache really
* aggressively.
*/
goal = max_t(unsigned int, goal >> 1, rt_hash_mask + 1);
- equilibrium = atomic_read(&ipv4_dst_ops.entries) - goal;
+ equilibrium = entries - goal;
}
if (now - last_gc >= ip_rt_gc_min_interval)
k = (k + 1) & rt_hash_mask;
rthp = &rt_hash_table[k].chain;
spin_lock_bh(rt_hash_lock_addr(k));
- while ((rth = *rthp) != NULL) {
+ while ((rth = rcu_dereference_protected(*rthp,
+ lockdep_is_held(rt_hash_lock_addr(k)))) != NULL) {
if (!rt_is_expired(rth) &&
!rt_may_expire(rth, tmo, expire)) {
tmo >>= 1;
expire >>= 1;
#if RT_CACHE_DEBUG >= 2
printk(KERN_DEBUG "expire>> %u %d %d %d\n", expire,
- atomic_read(&ipv4_dst_ops.entries), goal, i);
+ dst_entries_get_fast(&ipv4_dst_ops), goal, i);
#endif
- if (atomic_read(&ipv4_dst_ops.entries) < ip_rt_max_size)
+ if (dst_entries_get_fast(&ipv4_dst_ops) < ip_rt_max_size)
goto out;
} while (!in_softirq() && time_before_eq(jiffies, now));
- if (atomic_read(&ipv4_dst_ops.entries) < ip_rt_max_size)
+ if (dst_entries_get_fast(&ipv4_dst_ops) < ip_rt_max_size)
+ goto out;
+ if (dst_entries_get_slow(&ipv4_dst_ops) < ip_rt_max_size)
goto out;
if (net_ratelimit())
printk(KERN_WARNING "dst cache overflow\n");
work_done:
expire += ip_rt_gc_min_interval;
if (expire > ip_rt_gc_timeout ||
- atomic_read(&ipv4_dst_ops.entries) < ipv4_dst_ops.gc_thresh)
+ dst_entries_get_fast(&ipv4_dst_ops) < ipv4_dst_ops.gc_thresh ||
+ dst_entries_get_slow(&ipv4_dst_ops) < ipv4_dst_ops.gc_thresh)
expire = ip_rt_gc_timeout;
#if RT_CACHE_DEBUG >= 2
printk(KERN_DEBUG "expire++ %u %d %d %d\n", expire,
- atomic_read(&ipv4_dst_ops.entries), goal, rover);
+ dst_entries_get_fast(&ipv4_dst_ops), goal, rover);
#endif
out: return 0;
}
while (rth) {
length += has_noalias(head, rth);
- rth = rth->dst.rt_next;
+ rth = rcu_dereference_protected(rth->dst.rt_next, 1);
}
return length >> FRACT_BITS;
}
static int rt_intern_hash(unsigned hash, struct rtable *rt,
struct rtable **rp, struct sk_buff *skb, int ifindex)
{
- struct rtable *rth, **rthp;
+ struct rtable *rth, *cand;
+ struct rtable __rcu **rthp, **candp;
unsigned long now;
- struct rtable *cand, **candp;
u32 min_score;
int chain_length;
int attempts = !in_softirq();
* Note that we do rt_free on this new route entry, so that
* once its refcount hits zero, we are still able to reap it
* (Thanks Alexey)
- * Note also the rt_free uses call_rcu. We don't actually
- * need rcu protection here, this is just our path to get
- * on the route gc list.
+ * Note: To avoid expensive rcu stuff for this uncached dst,
+ * we set DST_NOCACHE so that dst_release() can free dst without
+ * waiting a grace period.
*/
rt->dst.flags |= DST_NOCACHE;
if (net_ratelimit())
printk(KERN_WARNING
"Neighbour table failure & not caching routes.\n");
- rt_drop(rt);
+ ip_rt_put(rt);
return err;
}
}
- rt_free(rt);
goto skip_hashing;
}
rthp = &rt_hash_table[hash].chain;
spin_lock_bh(rt_hash_lock_addr(hash));
- while ((rth = *rthp) != NULL) {
+ while ((rth = rcu_dereference_protected(*rthp,
+ lockdep_is_held(rt_hash_lock_addr(hash)))) != NULL) {
if (rt_is_expired(rth)) {
*rthp = rth->dst.rt_next;
rt_free(rth);
static void rt_del(unsigned hash, struct rtable *rt)
{
- struct rtable **rthp, *aux;
+ struct rtable __rcu **rthp;
+ struct rtable *aux;
rthp = &rt_hash_table[hash].chain;
spin_lock_bh(rt_hash_lock_addr(hash));
ip_rt_put(rt);
- while ((aux = *rthp) != NULL) {
+ while ((aux = rcu_dereference_protected(*rthp,
+ lockdep_is_held(rt_hash_lock_addr(hash)))) != NULL) {
if (aux == rt || rt_is_expired(aux)) {
*rthp = aux->dst.rt_next;
rt_free(aux);
{
int i, k;
struct in_device *in_dev = __in_dev_get_rcu(dev);
- struct rtable *rth, **rthp;
+ struct rtable *rth;
+ struct rtable __rcu **rthp;
__be32 skeys[2] = { saddr, 0 };
int ikeys[2] = { dev->ifindex, 0 };
struct netevent_redirect netevent;
unsigned hash = rt_hash(daddr, skeys[i], ikeys[k],
rt_genid(net));
- rthp=&rt_hash_table[hash].chain;
+ rthp = &rt_hash_table[hash].chain;
while ((rth = rcu_dereference(*rthp)) != NULL) {
struct rtable *rt;
ipv4_is_loopback(saddr))
goto martian_source;
- if (daddr == htonl(0xFFFFFFFF) || (saddr == 0 && daddr == 0))
+ if (ipv4_is_lbcast(daddr) || (saddr == 0 && daddr == 0))
goto brd_input;
/* Accept zero addresses only to limited broadcast;
if (ipv4_is_zeronet(saddr))
goto martian_source;
- if (ipv4_is_lbcast(daddr) || ipv4_is_zeronet(daddr) ||
- ipv4_is_loopback(daddr))
+ if (ipv4_is_zeronet(daddr) || ipv4_is_loopback(daddr))
goto martian_destination;
/*
if (ipv4_is_loopback(fl->fl4_src) && !(dev_out->flags & IFF_LOOPBACK))
return -EINVAL;
- if (fl->fl4_dst == htonl(0xFFFFFFFF))
+ if (ipv4_is_lbcast(fl->fl4_dst))
res->type = RTN_BROADCAST;
else if (ipv4_is_multicast(fl->fl4_dst))
res->type = RTN_MULTICAST;
- else if (ipv4_is_lbcast(fl->fl4_dst) || ipv4_is_zeronet(fl->fl4_dst))
+ else if (ipv4_is_zeronet(fl->fl4_dst))
return -EINVAL;
if (dev_out->flags & IFF_LOOPBACK)
if (oldflp->oif == 0 &&
(ipv4_is_multicast(oldflp->fl4_dst) ||
- oldflp->fl4_dst == htonl(0xFFFFFFFF))) {
+ ipv4_is_lbcast(oldflp->fl4_dst))) {
/* It is equivalent to inet_addr_type(saddr) == RTN_LOCAL */
dev_out = __ip_dev_find(net, oldflp->fl4_src, false);
if (dev_out == NULL)
goto out; /* Wrong error code */
if (ipv4_is_local_multicast(oldflp->fl4_dst) ||
- oldflp->fl4_dst == htonl(0xFFFFFFFF)) {
+ ipv4_is_lbcast(oldflp->fl4_dst)) {
if (!fl.fl4_src)
fl.fl4_src = inet_select_addr(dev_out, 0,
RT_SCOPE_LINK);
.destroy = ipv4_dst_destroy,
.check = ipv4_blackhole_dst_check,
.update_pmtu = ipv4_rt_blackhole_update_pmtu,
- .entries = ATOMIC_INIT(0),
};
ipv4_dst_blackhole_ops.kmem_cachep = ipv4_dst_ops.kmem_cachep;
+ if (dst_entries_init(&ipv4_dst_ops) < 0)
+ panic("IP: failed to allocate ipv4_dst_ops counter\n");
+
+ if (dst_entries_init(&ipv4_dst_blackhole_ops) < 0)
+ panic("IP: failed to allocate ipv4_dst_blackhole_ops counter\n");
+
rt_hash_table = (struct rt_hash_bucket *)
alloc_large_system_hash("IP route cache",
sizeof(struct rt_hash_bucket),