* routers in REACHABLE, STALE, DELAY or PROBE states).
* - always select the same router if it is (probably)
* reachable. otherwise, round-robin the list.
+ * Ville Nuorvala
+ * Fixed routing subtrees.
*/
#include <linux/capability.h>
#define CLONE_OFFLINK_ROUTE 0
-#define RT6_SELECT_F_IFACE 0x1
-#define RT6_SELECT_F_REACHABLE 0x2
-
static int ip6_rt_max_size = 4096;
static int ip6_rt_gc_min_interval = HZ / 2;
static int ip6_rt_gc_timeout = 60*HZ;
#ifdef CONFIG_IPV6_MULTIPLE_TABLES
+static int ip6_pkt_prohibit(struct sk_buff *skb);
+static int ip6_pkt_prohibit_out(struct sk_buff *skb);
+static int ip6_pkt_blk_hole(struct sk_buff *skb);
+
struct rt6_info ip6_prohibit_entry = {
.u = {
.dst = {
.obsolete = -1,
.error = -EACCES,
.metrics = { [RTAX_HOPLIMIT - 1] = 255, },
- .input = ip6_pkt_discard,
- .output = ip6_pkt_discard_out,
+ .input = ip6_pkt_prohibit,
+ .output = ip6_pkt_prohibit_out,
.ops = &ip6_dst_ops,
.path = (struct dst_entry*)&ip6_prohibit_entry,
}
.obsolete = -1,
.error = -EINVAL,
.metrics = { [RTAX_HOPLIMIT - 1] = 255, },
- .input = ip6_pkt_discard,
- .output = ip6_pkt_discard_out,
+ .input = ip6_pkt_blk_hole,
+ .output = ip6_pkt_blk_hole,
.ops = &ip6_dst_ops,
.path = (struct dst_entry*)&ip6_blk_hole_entry,
}
read_lock_bh(&neigh->lock);
if (neigh->nud_state & NUD_VALID)
m = 2;
+ else if (!(neigh->nud_state & NUD_FAILED))
+ m = 1;
read_unlock_bh(&neigh->lock);
}
return m;
int m, n;
m = rt6_check_dev(rt, oif);
- if (!m && (strict & RT6_SELECT_F_IFACE))
+ if (!m && (strict & RT6_LOOKUP_F_IFACE))
return -1;
#ifdef CONFIG_IPV6_ROUTER_PREF
m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
#endif
n = rt6_check_neigh(rt);
- if (n > 1)
- m |= 16;
- else if (!n && strict & RT6_SELECT_F_REACHABLE)
+ if (!n && (strict & RT6_LOOKUP_F_REACHABLE))
return -1;
return m;
}
continue;
if (m > mpri) {
- rt6_probe(match);
+ if (strict & RT6_LOOKUP_F_REACHABLE)
+ rt6_probe(match);
match = rt;
mpri = m;
- } else {
+ } else if (strict & RT6_LOOKUP_F_REACHABLE) {
rt6_probe(rt);
}
}
if (!match &&
- (strict & RT6_SELECT_F_REACHABLE) &&
+ (strict & RT6_LOOKUP_F_REACHABLE) &&
last && last != rt0) {
/* no entries matched; do round-robin */
static DEFINE_SPINLOCK(lock);
}
#endif
-#define BACKTRACK() \
-if (rt == &ip6_null_entry && flags & RT6_F_STRICT) { \
- while ((fn = fn->parent) != NULL) { \
- if (fn->fn_flags & RTN_TL_ROOT) { \
- dst_hold(&rt->u.dst); \
- goto out; \
+#define BACKTRACK(saddr) \
+do { \
+ if (rt == &ip6_null_entry) { \
+ struct fib6_node *pn; \
+ while (1) { \
+ if (fn->fn_flags & RTN_TL_ROOT) \
+ goto out; \
+ pn = fn->parent; \
+ if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
+ fn = fib6_lookup(pn->subtree, NULL, saddr); \
+ else \
+ fn = pn; \
+ if (fn->fn_flags & RTN_RTINFO) \
+ goto restart; \
} \
- if (fn->fn_flags & RTN_RTINFO) \
- goto restart; \
} \
-}
+} while(0)
static struct rt6_info *ip6_pol_route_lookup(struct fib6_table *table,
struct flowi *fl, int flags)
fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
restart:
rt = fn->leaf;
- rt = rt6_device_match(rt, fl->oif, flags & RT6_F_STRICT);
- BACKTRACK();
- dst_hold(&rt->u.dst);
+ rt = rt6_device_match(rt, fl->oif, flags);
+ BACKTRACK(&fl->fl6_src);
out:
+ dst_hold(&rt->u.dst);
read_unlock_bh(&table->tb6_lock);
rt->u.dst.lastuse = jiffies;
.nl_u = {
.ip6_u = {
.daddr = *daddr,
- /* TODO: saddr */
},
},
};
struct dst_entry *dst;
- int flags = strict ? RT6_F_STRICT : 0;
+ int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
+
+ if (saddr) {
+ memcpy(&fl.fl6_src, saddr, sizeof(*saddr));
+ flags |= RT6_LOOKUP_F_HAS_SADDR;
+ }
dst = fib6_rule_lookup(&fl, flags, ip6_pol_route_lookup);
if (dst->error == 0)
ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
rt->rt6i_dst.plen = 128;
rt->rt6i_flags |= RTF_CACHE;
- if (rt->rt6i_flags & RTF_REJECT)
- rt->u.dst.error = ort->u.dst.error;
rt->u.dst.flags |= DST_HOST;
rt->rt6i_nexthop = neigh_clone(ort->rt6i_nexthop);
}
int strict = 0;
int attempts = 3;
int err;
- int reachable = RT6_SELECT_F_REACHABLE;
+ int reachable = ipv6_devconf.forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
- if (flags & RT6_F_STRICT)
- strict = RT6_SELECT_F_IFACE;
+ strict |= flags & RT6_LOOKUP_F_IFACE;
relookup:
read_lock_bh(&table->tb6_lock);
restart:
rt = rt6_select(&fn->leaf, fl->iif, strict | reachable);
- BACKTRACK();
+ BACKTRACK(&fl->fl6_src);
if (rt == &ip6_null_entry ||
rt->rt6i_flags & RTF_CACHE)
goto out;
void ip6_route_input(struct sk_buff *skb)
{
struct ipv6hdr *iph = skb->nh.ipv6h;
+ int flags = RT6_LOOKUP_F_HAS_SADDR;
struct flowi fl = {
.iif = skb->dev->ifindex,
.nl_u = {
.ip6_u = {
.daddr = iph->daddr,
.saddr = iph->saddr,
- .flowlabel = (* (u32 *) iph)&IPV6_FLOWINFO_MASK,
+ .flowlabel = (* (__be32 *) iph)&IPV6_FLOWINFO_MASK,
},
},
+ .mark = skb->mark,
.proto = iph->nexthdr,
};
- int flags = 0;
if (rt6_need_strict(&iph->daddr))
- flags |= RT6_F_STRICT;
+ flags |= RT6_LOOKUP_F_IFACE;
skb->dst = fib6_rule_lookup(&fl, flags, ip6_pol_route_input);
}
int strict = 0;
int attempts = 3;
int err;
- int reachable = RT6_SELECT_F_REACHABLE;
+ int reachable = ipv6_devconf.forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
- if (flags & RT6_F_STRICT)
- strict = RT6_SELECT_F_IFACE;
+ strict |= flags & RT6_LOOKUP_F_IFACE;
relookup:
read_lock_bh(&table->tb6_lock);
restart:
rt = rt6_select(&fn->leaf, fl->oif, strict | reachable);
- BACKTRACK();
+ BACKTRACK(&fl->fl6_src);
if (rt == &ip6_null_entry ||
rt->rt6i_flags & RTF_CACHE)
goto out;
int flags = 0;
if (rt6_need_strict(&fl->fl6_dst))
- flags |= RT6_F_STRICT;
+ flags |= RT6_LOOKUP_F_IFACE;
+
+ if (!ipv6_addr_any(&fl->fl6_src))
+ flags |= RT6_LOOKUP_F_HAS_SADDR;
return fib6_rule_lookup(fl, flags, ip6_pol_route_output);
}
fib6_force_start_gc();
out:
- return (struct dst_entry *)rt;
+ return &rt->u.dst;
}
int ndisc_dst_gc(int *more)
if (idev)
in6_dev_put(idev);
if (rt)
- dst_free((struct dst_entry *) rt);
+ dst_free(&rt->u.dst);
return err;
}
/*
* Handle redirects
*/
-void rt6_redirect(struct in6_addr *dest, struct in6_addr *saddr,
- struct neighbour *neigh, u8 *lladdr, int on_link)
+struct ip6rd_flowi {
+ struct flowi fl;
+ struct in6_addr gateway;
+};
+
+static struct rt6_info *__ip6_route_redirect(struct fib6_table *table,
+ struct flowi *fl,
+ int flags)
{
- struct rt6_info *rt, *nrt = NULL;
+ struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl;
+ struct rt6_info *rt;
struct fib6_node *fn;
- struct fib6_table *table;
- struct netevent_redirect netevent;
-
- /* TODO: Very lazy, might need to check all tables */
- table = fib6_get_table(RT6_TABLE_MAIN);
- if (table == NULL)
- return;
/*
* Get the "current" route for this destination and
*/
read_lock_bh(&table->tb6_lock);
- fn = fib6_lookup(&table->tb6_root, dest, NULL);
+ fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
restart:
for (rt = fn->leaf; rt; rt = rt->u.next) {
/*
continue;
if (!(rt->rt6i_flags & RTF_GATEWAY))
continue;
- if (neigh->dev != rt->rt6i_dev)
+ if (fl->oif != rt->rt6i_dev->ifindex)
continue;
- if (!ipv6_addr_equal(saddr, &rt->rt6i_gateway))
+ if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
continue;
break;
}
- if (rt)
- dst_hold(&rt->u.dst);
- else if (rt6_need_strict(dest)) {
- while ((fn = fn->parent) != NULL) {
- if (fn->fn_flags & RTN_ROOT)
- break;
- if (fn->fn_flags & RTN_RTINFO)
- goto restart;
- }
- }
+
+ if (!rt)
+ rt = &ip6_null_entry;
+ BACKTRACK(&fl->fl6_src);
+out:
+ dst_hold(&rt->u.dst);
+
read_unlock_bh(&table->tb6_lock);
- if (!rt) {
+ return rt;
+};
+
+static struct rt6_info *ip6_route_redirect(struct in6_addr *dest,
+ struct in6_addr *src,
+ struct in6_addr *gateway,
+ struct net_device *dev)
+{
+ int flags = RT6_LOOKUP_F_HAS_SADDR;
+ struct ip6rd_flowi rdfl = {
+ .fl = {
+ .oif = dev->ifindex,
+ .nl_u = {
+ .ip6_u = {
+ .daddr = *dest,
+ .saddr = *src,
+ },
+ },
+ },
+ .gateway = *gateway,
+ };
+
+ if (rt6_need_strict(dest))
+ flags |= RT6_LOOKUP_F_IFACE;
+
+ return (struct rt6_info *)fib6_rule_lookup((struct flowi *)&rdfl, flags, __ip6_route_redirect);
+}
+
+void rt6_redirect(struct in6_addr *dest, struct in6_addr *src,
+ struct in6_addr *saddr,
+ struct neighbour *neigh, u8 *lladdr, int on_link)
+{
+ struct rt6_info *rt, *nrt = NULL;
+ struct netevent_redirect netevent;
+
+ rt = ip6_route_redirect(dest, src, saddr, neigh->dev);
+
+ if (rt == &ip6_null_entry) {
if (net_ratelimit())
printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
"for redirect target\n");
- return;
+ goto out;
}
/*
rt->u.dst.output = ort->u.dst.output;
memcpy(rt->u.dst.metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32));
+ rt->u.dst.error = ort->u.dst.error;
rt->u.dst.dev = ort->u.dst.dev;
if (rt->u.dst.dev)
dev_hold(rt->u.dst.dev);
* Drop the packet on the floor
*/
-static int ip6_pkt_discard(struct sk_buff *skb)
+static inline int ip6_pkt_drop(struct sk_buff *skb, int code)
{
int type = ipv6_addr_type(&skb->nh.ipv6h->daddr);
if (type == IPV6_ADDR_ANY || type == IPV6_ADDR_RESERVED)
- IP6_INC_STATS(IPSTATS_MIB_INADDRERRORS);
+ IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_INADDRERRORS);
- IP6_INC_STATS(IPSTATS_MIB_OUTNOROUTES);
- icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_NOROUTE, 0, skb->dev);
+ IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_OUTNOROUTES);
+ icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0, skb->dev);
kfree_skb(skb);
return 0;
}
+static int ip6_pkt_discard(struct sk_buff *skb)
+{
+ return ip6_pkt_drop(skb, ICMPV6_NOROUTE);
+}
+
static int ip6_pkt_discard_out(struct sk_buff *skb)
{
skb->dev = skb->dst->dev;
return ip6_pkt_discard(skb);
}
+#ifdef CONFIG_IPV6_MULTIPLE_TABLES
+
+static int ip6_pkt_prohibit(struct sk_buff *skb)
+{
+ return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED);
+}
+
+static int ip6_pkt_prohibit_out(struct sk_buff *skb)
+{
+ skb->dev = skb->dst->dev;
+ return ip6_pkt_prohibit(skb);
+}
+
+static int ip6_pkt_blk_hole(struct sk_buff *skb)
+{
+ kfree_skb(skb);
+ return 0;
+}
+
+#endif
+
/*
* Allocate a dst for local (unicast / anycast) address.
*/
rt->rt6i_flags |= RTF_LOCAL;
rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
if (rt->rt6i_nexthop == NULL) {
- dst_free((struct dst_entry *) rt);
+ dst_free(&rt->u.dst);
return ERR_PTR(-ENOMEM);
}
}
static struct nla_policy rtm_ipv6_policy[RTA_MAX+1] __read_mostly = {
- [RTA_GATEWAY] = { .minlen = sizeof(struct in6_addr) },
+ [RTA_GATEWAY] = { .len = sizeof(struct in6_addr) },
[RTA_OIF] = { .type = NLA_U32 },
+ [RTA_IIF] = { .type = NLA_U32 },
[RTA_PRIORITY] = { .type = NLA_U32 },
[RTA_METRICS] = { .type = NLA_NESTED },
};
return ip6_route_add(&cfg);
}
+static inline size_t rt6_nlmsg_size(void)
+{
+ return NLMSG_ALIGN(sizeof(struct rtmsg))
+ + nla_total_size(16) /* RTA_SRC */
+ + nla_total_size(16) /* RTA_DST */
+ + nla_total_size(16) /* RTA_GATEWAY */
+ + nla_total_size(16) /* RTA_PREFSRC */
+ + nla_total_size(4) /* RTA_TABLE */
+ + nla_total_size(4) /* RTA_IIF */
+ + nla_total_size(4) /* RTA_OIF */
+ + nla_total_size(4) /* RTA_PRIORITY */
+ + nla_total_size(sizeof(struct rta_cacheinfo));
+}
+
static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt,
struct in6_addr *dst, struct in6_addr *src,
int iif, int type, u32 pid, u32 seq,
int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
{
- struct rtattr **rta = arg;
- int iif = 0;
- int err = -ENOBUFS;
+ struct nlattr *tb[RTA_MAX+1];
+ struct rt6_info *rt;
struct sk_buff *skb;
+ struct rtmsg *rtm;
struct flowi fl;
- struct rt6_info *rt;
+ int err, iif = 0;
- skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
- if (skb == NULL)
- goto out;
-
- /* Reserve room for dummy headers, this skb can pass
- through good chunk of routing engine.
- */
- skb->mac.raw = skb->data;
- skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
+ err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
+ if (err < 0)
+ goto errout;
+ err = -EINVAL;
memset(&fl, 0, sizeof(fl));
- if (rta[RTA_SRC-1])
- ipv6_addr_copy(&fl.fl6_src,
- (struct in6_addr*)RTA_DATA(rta[RTA_SRC-1]));
- if (rta[RTA_DST-1])
- ipv6_addr_copy(&fl.fl6_dst,
- (struct in6_addr*)RTA_DATA(rta[RTA_DST-1]));
- if (rta[RTA_IIF-1])
- memcpy(&iif, RTA_DATA(rta[RTA_IIF-1]), sizeof(int));
+ if (tb[RTA_SRC]) {
+ if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
+ goto errout;
+
+ ipv6_addr_copy(&fl.fl6_src, nla_data(tb[RTA_SRC]));
+ }
+
+ if (tb[RTA_DST]) {
+ if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
+ goto errout;
+
+ ipv6_addr_copy(&fl.fl6_dst, nla_data(tb[RTA_DST]));
+ }
+
+ if (tb[RTA_IIF])
+ iif = nla_get_u32(tb[RTA_IIF]);
+
+ if (tb[RTA_OIF])
+ fl.oif = nla_get_u32(tb[RTA_OIF]);
if (iif) {
struct net_device *dev;
dev = __dev_get_by_index(iif);
if (!dev) {
err = -ENODEV;
- goto out_free;
+ goto errout;
}
}
- fl.oif = 0;
- if (rta[RTA_OIF-1])
- memcpy(&fl.oif, RTA_DATA(rta[RTA_OIF-1]), sizeof(int));
+ skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
+ if (skb == NULL) {
+ err = -ENOBUFS;
+ goto errout;
+ }
- rt = (struct rt6_info*)ip6_route_output(NULL, &fl);
+ /* Reserve room for dummy headers, this skb can pass
+ through good chunk of routing engine.
+ */
+ skb->mac.raw = skb->data;
+ skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
+ rt = (struct rt6_info*) ip6_route_output(NULL, &fl);
skb->dst = &rt->u.dst;
- NETLINK_CB(skb).dst_pid = NETLINK_CB(in_skb).pid;
- err = rt6_fill_node(skb, rt,
- &fl.fl6_dst, &fl.fl6_src,
- iif,
+ err = rt6_fill_node(skb, rt, &fl.fl6_dst, &fl.fl6_src, iif,
RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
nlh->nlmsg_seq, 0, 0);
if (err < 0) {
- err = -EMSGSIZE;
- goto out_free;
+ kfree_skb(skb);
+ goto errout;
}
err = rtnl_unicast(skb, NETLINK_CB(in_skb).pid);
-out:
+errout:
return err;
-out_free:
- kfree_skb(skb);
- goto out;
}
void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
struct sk_buff *skb;
u32 pid = 0, seq = 0;
struct nlmsghdr *nlh = NULL;
- int payload = sizeof(struct rtmsg) + 256;
int err = -ENOBUFS;
if (info) {
seq = nlh->nlmsg_seq;
}
- skb = nlmsg_new(nlmsg_total_size(payload), gfp_any());
+ skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
if (skb == NULL)
goto errout;
err = rt6_fill_node(skb, rt, NULL, NULL, 0, event, pid, seq, 0, 0);
- if (err < 0) {
- kfree_skb(skb);
- goto errout;
- }
+ /* failure implies BUG in rt6_nlmsg_size() */
+ BUG_ON(err < 0);
err = rtnl_notify(skb, pid, RTNLGRP_IPV6_ROUTE, nlh, gfp_any());
errout:
static int rt6_info_route(struct rt6_info *rt, void *p_arg)
{
struct rt6_proc_arg *arg = (struct rt6_proc_arg *) p_arg;
- int i;
if (arg->skip < arg->offset / RT6_INFO_LEN) {
arg->skip++;
if (arg->len >= arg->length)
return 0;
- for (i=0; i<16; i++) {
- sprintf(arg->buffer + arg->len, "%02x",
- rt->rt6i_dst.addr.s6_addr[i]);
- arg->len += 2;
- }
- arg->len += sprintf(arg->buffer + arg->len, " %02x ",
+ arg->len += sprintf(arg->buffer + arg->len,
+ NIP6_SEQFMT " %02x ",
+ NIP6(rt->rt6i_dst.addr),
rt->rt6i_dst.plen);
#ifdef CONFIG_IPV6_SUBTREES
- for (i=0; i<16; i++) {
- sprintf(arg->buffer + arg->len, "%02x",
- rt->rt6i_src.addr.s6_addr[i]);
- arg->len += 2;
- }
- arg->len += sprintf(arg->buffer + arg->len, " %02x ",
+ arg->len += sprintf(arg->buffer + arg->len,
+ NIP6_SEQFMT " %02x ",
+ NIP6(rt->rt6i_src.addr),
rt->rt6i_src.plen);
#else
- sprintf(arg->buffer + arg->len,
- "00000000000000000000000000000000 00 ");
- arg->len += 36;
+ arg->len += sprintf(arg->buffer + arg->len,
+ "00000000000000000000000000000000 00 ");
#endif
if (rt->rt6i_nexthop) {
- for (i=0; i<16; i++) {
- sprintf(arg->buffer + arg->len, "%02x",
- rt->rt6i_nexthop->primary_key[i]);
- arg->len += 2;
- }
+ arg->len += sprintf(arg->buffer + arg->len,
+ NIP6_SEQFMT,
+ NIP6(*((struct in6_addr *)rt->rt6i_nexthop->primary_key)));
} else {
- sprintf(arg->buffer + arg->len,
- "00000000000000000000000000000000");
- arg->len += 32;
+ arg->len += sprintf(arg->buffer + arg->len,
+ "00000000000000000000000000000000");
}
arg->len += sprintf(arg->buffer + arg->len,
" %08x %08x %08x %08x %8s\n",
{
struct proc_dir_entry *p;
- ip6_dst_ops.kmem_cachep = kmem_cache_create("ip6_dst_cache",
- sizeof(struct rt6_info),
- 0, SLAB_HWCACHE_ALIGN,
- NULL, NULL);
- if (!ip6_dst_ops.kmem_cachep)
- panic("cannot create ip6_dst_cache");
-
+ ip6_dst_ops.kmem_cachep =
+ kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
+ SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL);
fib6_init();
#ifdef CONFIG_PROC_FS
p = proc_net_create("ipv6_route", 0, rt6_proc_info);