2 * Linux INET6 implementation
6 * Pedro Roque <roque@di.fc.ul.pt>
8 * $Id: route.c,v 1.56 2001/10/31 21:55:55 davem Exp $
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License
12 * as published by the Free Software Foundation; either version
13 * 2 of the License, or (at your option) any later version.
18 * YOSHIFUJI Hideaki @USAGI
19 * reworked default router selection.
20 * - respect outgoing interface
21 * - select from (probably) reachable routers (i.e.
22 * routers in REACHABLE, STALE, DELAY or PROBE states).
23 * - always select the same router if it is (probably)
24 * reachable. otherwise, round-robin the list.
27 #include <linux/capability.h>
28 #include <linux/errno.h>
29 #include <linux/types.h>
30 #include <linux/times.h>
31 #include <linux/socket.h>
32 #include <linux/sockios.h>
33 #include <linux/net.h>
34 #include <linux/route.h>
35 #include <linux/netdevice.h>
36 #include <linux/in6.h>
37 #include <linux/init.h>
38 #include <linux/if_arp.h>
41 #include <linux/proc_fs.h>
42 #include <linux/seq_file.h>
47 #include <net/ip6_fib.h>
48 #include <net/ip6_route.h>
49 #include <net/ndisc.h>
50 #include <net/addrconf.h>
52 #include <linux/rtnetlink.h>
55 #include <net/netevent.h>
56 #include <net/netlink.h>
58 #include <asm/uaccess.h>
61 #include <linux/sysctl.h>
64 /* Set to 3 to get tracing. */
68 #define RDBG(x) printk x
69 #define RT6_TRACE(x...) printk(KERN_DEBUG x)
72 #define RT6_TRACE(x...) do { ; } while (0)
75 #define CLONE_OFFLINK_ROUTE 0
77 #define RT6_SELECT_F_IFACE 0x1
78 #define RT6_SELECT_F_REACHABLE 0x2
80 static int ip6_rt_max_size = 4096;
81 static int ip6_rt_gc_min_interval = HZ / 2;
82 static int ip6_rt_gc_timeout = 60*HZ;
83 int ip6_rt_gc_interval = 30*HZ;
84 static int ip6_rt_gc_elasticity = 9;
85 static int ip6_rt_mtu_expires = 10*60*HZ;
86 static int ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
88 static struct rt6_info * ip6_rt_copy(struct rt6_info *ort);
89 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
90 static struct dst_entry *ip6_negative_advice(struct dst_entry *);
91 static void ip6_dst_destroy(struct dst_entry *);
92 static void ip6_dst_ifdown(struct dst_entry *,
93 struct net_device *dev, int how);
94 static int ip6_dst_gc(void);
96 static int ip6_pkt_discard(struct sk_buff *skb);
97 static int ip6_pkt_discard_out(struct sk_buff *skb);
98 static void ip6_link_failure(struct sk_buff *skb);
99 static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
101 #ifdef CONFIG_IPV6_ROUTE_INFO
102 static struct rt6_info *rt6_add_route_info(struct in6_addr *prefix, int prefixlen,
103 struct in6_addr *gwaddr, int ifindex,
105 static struct rt6_info *rt6_get_route_info(struct in6_addr *prefix, int prefixlen,
106 struct in6_addr *gwaddr, int ifindex);
109 static struct dst_ops ip6_dst_ops = {
111 .protocol = __constant_htons(ETH_P_IPV6),
114 .check = ip6_dst_check,
115 .destroy = ip6_dst_destroy,
116 .ifdown = ip6_dst_ifdown,
117 .negative_advice = ip6_negative_advice,
118 .link_failure = ip6_link_failure,
119 .update_pmtu = ip6_rt_update_pmtu,
120 .entry_size = sizeof(struct rt6_info),
123 struct rt6_info ip6_null_entry = {
126 .__refcnt = ATOMIC_INIT(1),
128 .dev = &loopback_dev,
130 .error = -ENETUNREACH,
131 .metrics = { [RTAX_HOPLIMIT - 1] = 255, },
132 .input = ip6_pkt_discard,
133 .output = ip6_pkt_discard_out,
135 .path = (struct dst_entry*)&ip6_null_entry,
138 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
139 .rt6i_metric = ~(u32) 0,
140 .rt6i_ref = ATOMIC_INIT(1),
143 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
145 struct rt6_info ip6_prohibit_entry = {
148 .__refcnt = ATOMIC_INIT(1),
150 .dev = &loopback_dev,
153 .metrics = { [RTAX_HOPLIMIT - 1] = 255, },
154 .input = ip6_pkt_discard,
155 .output = ip6_pkt_discard_out,
157 .path = (struct dst_entry*)&ip6_prohibit_entry,
160 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
161 .rt6i_metric = ~(u32) 0,
162 .rt6i_ref = ATOMIC_INIT(1),
165 struct rt6_info ip6_blk_hole_entry = {
168 .__refcnt = ATOMIC_INIT(1),
170 .dev = &loopback_dev,
173 .metrics = { [RTAX_HOPLIMIT - 1] = 255, },
174 .input = ip6_pkt_discard,
175 .output = ip6_pkt_discard_out,
177 .path = (struct dst_entry*)&ip6_blk_hole_entry,
180 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
181 .rt6i_metric = ~(u32) 0,
182 .rt6i_ref = ATOMIC_INIT(1),
187 /* allocate dst with ip6_dst_ops */
188 static __inline__ struct rt6_info *ip6_dst_alloc(void)
190 return (struct rt6_info *)dst_alloc(&ip6_dst_ops);
193 static void ip6_dst_destroy(struct dst_entry *dst)
195 struct rt6_info *rt = (struct rt6_info *)dst;
196 struct inet6_dev *idev = rt->rt6i_idev;
199 rt->rt6i_idev = NULL;
204 static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
207 struct rt6_info *rt = (struct rt6_info *)dst;
208 struct inet6_dev *idev = rt->rt6i_idev;
210 if (dev != &loopback_dev && idev != NULL && idev->dev == dev) {
211 struct inet6_dev *loopback_idev = in6_dev_get(&loopback_dev);
212 if (loopback_idev != NULL) {
213 rt->rt6i_idev = loopback_idev;
219 static __inline__ int rt6_check_expired(const struct rt6_info *rt)
221 return (rt->rt6i_flags & RTF_EXPIRES &&
222 time_after(jiffies, rt->rt6i_expires));
225 static inline int rt6_need_strict(struct in6_addr *daddr)
227 return (ipv6_addr_type(daddr) &
228 (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL));
232 * Route lookup. Any table->tb6_lock is implied.
235 static __inline__ struct rt6_info *rt6_device_match(struct rt6_info *rt,
239 struct rt6_info *local = NULL;
240 struct rt6_info *sprt;
243 for (sprt = rt; sprt; sprt = sprt->u.next) {
244 struct net_device *dev = sprt->rt6i_dev;
245 if (dev->ifindex == oif)
247 if (dev->flags & IFF_LOOPBACK) {
248 if (sprt->rt6i_idev == NULL ||
249 sprt->rt6i_idev->dev->ifindex != oif) {
252 if (local && (!oif ||
253 local->rt6i_idev->dev->ifindex == oif))
264 return &ip6_null_entry;
269 #ifdef CONFIG_IPV6_ROUTER_PREF
270 static void rt6_probe(struct rt6_info *rt)
272 struct neighbour *neigh = rt ? rt->rt6i_nexthop : NULL;
274 * Okay, this does not seem to be appropriate
275 * for now, however, we need to check if it
276 * is really so; aka Router Reachability Probing.
278 * Router Reachability Probe MUST be rate-limited
279 * to no more than one per minute.
281 if (!neigh || (neigh->nud_state & NUD_VALID))
283 read_lock_bh(&neigh->lock);
284 if (!(neigh->nud_state & NUD_VALID) &&
285 time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
286 struct in6_addr mcaddr;
287 struct in6_addr *target;
289 neigh->updated = jiffies;
290 read_unlock_bh(&neigh->lock);
292 target = (struct in6_addr *)&neigh->primary_key;
293 addrconf_addr_solict_mult(target, &mcaddr);
294 ndisc_send_ns(rt->rt6i_dev, NULL, target, &mcaddr, NULL);
296 read_unlock_bh(&neigh->lock);
299 static inline void rt6_probe(struct rt6_info *rt)
306 * Default Router Selection (RFC 2461 6.3.6)
308 static int inline rt6_check_dev(struct rt6_info *rt, int oif)
310 struct net_device *dev = rt->rt6i_dev;
311 if (!oif || dev->ifindex == oif)
313 if ((dev->flags & IFF_LOOPBACK) &&
314 rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
319 static int inline rt6_check_neigh(struct rt6_info *rt)
321 struct neighbour *neigh = rt->rt6i_nexthop;
323 if (rt->rt6i_flags & RTF_NONEXTHOP ||
324 !(rt->rt6i_flags & RTF_GATEWAY))
327 read_lock_bh(&neigh->lock);
328 if (neigh->nud_state & NUD_VALID)
330 read_unlock_bh(&neigh->lock);
335 static int rt6_score_route(struct rt6_info *rt, int oif,
340 m = rt6_check_dev(rt, oif);
341 if (!m && (strict & RT6_SELECT_F_IFACE))
343 #ifdef CONFIG_IPV6_ROUTER_PREF
344 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
346 n = rt6_check_neigh(rt);
349 else if (!n && strict & RT6_SELECT_F_REACHABLE)
354 static struct rt6_info *rt6_select(struct rt6_info **head, int oif,
357 struct rt6_info *match = NULL, *last = NULL;
358 struct rt6_info *rt, *rt0 = *head;
362 RT6_TRACE("%s(head=%p(*head=%p), oif=%d)\n",
363 __FUNCTION__, head, head ? *head : NULL, oif);
365 for (rt = rt0, metric = rt0->rt6i_metric;
366 rt && rt->rt6i_metric == metric && (!last || rt != rt0);
370 if (rt6_check_expired(rt))
375 m = rt6_score_route(rt, oif, strict);
389 (strict & RT6_SELECT_F_REACHABLE) &&
390 last && last != rt0) {
391 /* no entries matched; do round-robin */
392 static DEFINE_SPINLOCK(lock);
395 rt0->u.next = last->u.next;
400 RT6_TRACE("%s() => %p, score=%d\n",
401 __FUNCTION__, match, mpri);
403 return (match ? match : &ip6_null_entry);
406 #ifdef CONFIG_IPV6_ROUTE_INFO
407 int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
408 struct in6_addr *gwaddr)
410 struct route_info *rinfo = (struct route_info *) opt;
411 struct in6_addr prefix_buf, *prefix;
416 if (len < sizeof(struct route_info)) {
420 /* Sanity check for prefix_len and length */
421 if (rinfo->length > 3) {
423 } else if (rinfo->prefix_len > 128) {
425 } else if (rinfo->prefix_len > 64) {
426 if (rinfo->length < 2) {
429 } else if (rinfo->prefix_len > 0) {
430 if (rinfo->length < 1) {
435 pref = rinfo->route_pref;
436 if (pref == ICMPV6_ROUTER_PREF_INVALID)
437 pref = ICMPV6_ROUTER_PREF_MEDIUM;
439 lifetime = htonl(rinfo->lifetime);
440 if (lifetime == 0xffffffff) {
442 } else if (lifetime > 0x7fffffff/HZ) {
443 /* Avoid arithmetic overflow */
444 lifetime = 0x7fffffff/HZ - 1;
447 if (rinfo->length == 3)
448 prefix = (struct in6_addr *)rinfo->prefix;
450 /* this function is safe */
451 ipv6_addr_prefix(&prefix_buf,
452 (struct in6_addr *)rinfo->prefix,
454 prefix = &prefix_buf;
457 rt = rt6_get_route_info(prefix, rinfo->prefix_len, gwaddr, dev->ifindex);
459 if (rt && !lifetime) {
465 rt = rt6_add_route_info(prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
468 rt->rt6i_flags = RTF_ROUTEINFO |
469 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
472 if (lifetime == 0xffffffff) {
473 rt->rt6i_flags &= ~RTF_EXPIRES;
475 rt->rt6i_expires = jiffies + HZ * lifetime;
476 rt->rt6i_flags |= RTF_EXPIRES;
478 dst_release(&rt->u.dst);
484 #define BACKTRACK() \
485 if (rt == &ip6_null_entry && flags & RT6_F_STRICT) { \
486 while ((fn = fn->parent) != NULL) { \
487 if (fn->fn_flags & RTN_TL_ROOT) { \
488 dst_hold(&rt->u.dst); \
491 if (fn->fn_flags & RTN_RTINFO) \
496 static struct rt6_info *ip6_pol_route_lookup(struct fib6_table *table,
497 struct flowi *fl, int flags)
499 struct fib6_node *fn;
502 read_lock_bh(&table->tb6_lock);
503 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
506 rt = rt6_device_match(rt, fl->oif, flags & RT6_F_STRICT);
508 dst_hold(&rt->u.dst);
510 read_unlock_bh(&table->tb6_lock);
512 rt->u.dst.lastuse = jiffies;
519 struct rt6_info *rt6_lookup(struct in6_addr *daddr, struct in6_addr *saddr,
531 struct dst_entry *dst;
532 int flags = strict ? RT6_F_STRICT : 0;
534 dst = fib6_rule_lookup(&fl, flags, ip6_pol_route_lookup);
536 return (struct rt6_info *) dst;
543 /* ip6_ins_rt is called with FREE table->tb6_lock.
544 It takes new route entry, the addition fails by any reason the
545 route is freed. In any case, if caller does not hold it, it may
549 static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
552 struct fib6_table *table;
554 table = rt->rt6i_table;
555 write_lock_bh(&table->tb6_lock);
556 err = fib6_add(&table->tb6_root, rt, info);
557 write_unlock_bh(&table->tb6_lock);
562 int ip6_ins_rt(struct rt6_info *rt)
564 return __ip6_ins_rt(rt, NULL);
567 static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, struct in6_addr *daddr,
568 struct in6_addr *saddr)
576 rt = ip6_rt_copy(ort);
579 if (!(rt->rt6i_flags&RTF_GATEWAY)) {
580 if (rt->rt6i_dst.plen != 128 &&
581 ipv6_addr_equal(&rt->rt6i_dst.addr, daddr))
582 rt->rt6i_flags |= RTF_ANYCAST;
583 ipv6_addr_copy(&rt->rt6i_gateway, daddr);
586 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
587 rt->rt6i_dst.plen = 128;
588 rt->rt6i_flags |= RTF_CACHE;
589 rt->u.dst.flags |= DST_HOST;
591 #ifdef CONFIG_IPV6_SUBTREES
592 if (rt->rt6i_src.plen && saddr) {
593 ipv6_addr_copy(&rt->rt6i_src.addr, saddr);
594 rt->rt6i_src.plen = 128;
598 rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
605 static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort, struct in6_addr *daddr)
607 struct rt6_info *rt = ip6_rt_copy(ort);
609 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
610 rt->rt6i_dst.plen = 128;
611 rt->rt6i_flags |= RTF_CACHE;
612 if (rt->rt6i_flags & RTF_REJECT)
613 rt->u.dst.error = ort->u.dst.error;
614 rt->u.dst.flags |= DST_HOST;
615 rt->rt6i_nexthop = neigh_clone(ort->rt6i_nexthop);
620 static struct rt6_info *ip6_pol_route_input(struct fib6_table *table,
621 struct flowi *fl, int flags)
623 struct fib6_node *fn;
624 struct rt6_info *rt, *nrt;
628 int reachable = RT6_SELECT_F_REACHABLE;
630 if (flags & RT6_F_STRICT)
631 strict = RT6_SELECT_F_IFACE;
634 read_lock_bh(&table->tb6_lock);
637 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
640 rt = rt6_select(&fn->leaf, fl->iif, strict | reachable);
642 if (rt == &ip6_null_entry ||
643 rt->rt6i_flags & RTF_CACHE)
646 dst_hold(&rt->u.dst);
647 read_unlock_bh(&table->tb6_lock);
649 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
650 nrt = rt6_alloc_cow(rt, &fl->fl6_dst, &fl->fl6_src);
652 #if CLONE_OFFLINK_ROUTE
653 nrt = rt6_alloc_clone(rt, &fl->fl6_dst);
659 dst_release(&rt->u.dst);
660 rt = nrt ? : &ip6_null_entry;
662 dst_hold(&rt->u.dst);
664 err = ip6_ins_rt(nrt);
673 * Race condition! In the gap, when table->tb6_lock was
674 * released someone could insert this route. Relookup.
676 dst_release(&rt->u.dst);
684 dst_hold(&rt->u.dst);
685 read_unlock_bh(&table->tb6_lock);
687 rt->u.dst.lastuse = jiffies;
693 void ip6_route_input(struct sk_buff *skb)
695 struct ipv6hdr *iph = skb->nh.ipv6h;
697 .iif = skb->dev->ifindex,
702 .flowlabel = (* (u32 *) iph)&IPV6_FLOWINFO_MASK,
705 .proto = iph->nexthdr,
709 if (rt6_need_strict(&iph->daddr))
710 flags |= RT6_F_STRICT;
712 skb->dst = fib6_rule_lookup(&fl, flags, ip6_pol_route_input);
715 static struct rt6_info *ip6_pol_route_output(struct fib6_table *table,
716 struct flowi *fl, int flags)
718 struct fib6_node *fn;
719 struct rt6_info *rt, *nrt;
723 int reachable = RT6_SELECT_F_REACHABLE;
725 if (flags & RT6_F_STRICT)
726 strict = RT6_SELECT_F_IFACE;
729 read_lock_bh(&table->tb6_lock);
732 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
735 rt = rt6_select(&fn->leaf, fl->oif, strict | reachable);
737 if (rt == &ip6_null_entry ||
738 rt->rt6i_flags & RTF_CACHE)
741 dst_hold(&rt->u.dst);
742 read_unlock_bh(&table->tb6_lock);
744 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
745 nrt = rt6_alloc_cow(rt, &fl->fl6_dst, &fl->fl6_src);
747 #if CLONE_OFFLINK_ROUTE
748 nrt = rt6_alloc_clone(rt, &fl->fl6_dst);
754 dst_release(&rt->u.dst);
755 rt = nrt ? : &ip6_null_entry;
757 dst_hold(&rt->u.dst);
759 err = ip6_ins_rt(nrt);
768 * Race condition! In the gap, when table->tb6_lock was
769 * released someone could insert this route. Relookup.
771 dst_release(&rt->u.dst);
779 dst_hold(&rt->u.dst);
780 read_unlock_bh(&table->tb6_lock);
782 rt->u.dst.lastuse = jiffies;
787 struct dst_entry * ip6_route_output(struct sock *sk, struct flowi *fl)
791 if (rt6_need_strict(&fl->fl6_dst))
792 flags |= RT6_F_STRICT;
794 return fib6_rule_lookup(fl, flags, ip6_pol_route_output);
799 * Destination cache support functions
802 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
806 rt = (struct rt6_info *) dst;
808 if (rt && rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie))
814 static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
816 struct rt6_info *rt = (struct rt6_info *) dst;
819 if (rt->rt6i_flags & RTF_CACHE)
827 static void ip6_link_failure(struct sk_buff *skb)
831 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0, skb->dev);
833 rt = (struct rt6_info *) skb->dst;
835 if (rt->rt6i_flags&RTF_CACHE) {
836 dst_set_expires(&rt->u.dst, 0);
837 rt->rt6i_flags |= RTF_EXPIRES;
838 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
839 rt->rt6i_node->fn_sernum = -1;
843 static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
845 struct rt6_info *rt6 = (struct rt6_info*)dst;
847 if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
848 rt6->rt6i_flags |= RTF_MODIFIED;
849 if (mtu < IPV6_MIN_MTU) {
851 dst->metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
853 dst->metrics[RTAX_MTU-1] = mtu;
854 call_netevent_notifiers(NETEVENT_PMTU_UPDATE, dst);
858 static int ipv6_get_mtu(struct net_device *dev);
860 static inline unsigned int ipv6_advmss(unsigned int mtu)
862 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
864 if (mtu < ip6_rt_min_advmss)
865 mtu = ip6_rt_min_advmss;
868 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
869 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
870 * IPV6_MAXPLEN is also valid and means: "any MSS,
871 * rely only on pmtu discovery"
873 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
878 static struct dst_entry *ndisc_dst_gc_list;
879 static DEFINE_SPINLOCK(ndisc_lock);
881 struct dst_entry *ndisc_dst_alloc(struct net_device *dev,
882 struct neighbour *neigh,
883 struct in6_addr *addr,
884 int (*output)(struct sk_buff *))
887 struct inet6_dev *idev = in6_dev_get(dev);
889 if (unlikely(idev == NULL))
892 rt = ip6_dst_alloc();
893 if (unlikely(rt == NULL)) {
902 neigh = ndisc_get_neigh(dev, addr);
905 rt->rt6i_idev = idev;
906 rt->rt6i_nexthop = neigh;
907 atomic_set(&rt->u.dst.__refcnt, 1);
908 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = 255;
909 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
910 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
911 rt->u.dst.output = output;
913 #if 0 /* there's no chance to use these for ndisc */
914 rt->u.dst.flags = ipv6_addr_type(addr) & IPV6_ADDR_UNICAST
917 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
918 rt->rt6i_dst.plen = 128;
921 spin_lock_bh(&ndisc_lock);
922 rt->u.dst.next = ndisc_dst_gc_list;
923 ndisc_dst_gc_list = &rt->u.dst;
924 spin_unlock_bh(&ndisc_lock);
926 fib6_force_start_gc();
929 return (struct dst_entry *)rt;
932 int ndisc_dst_gc(int *more)
934 struct dst_entry *dst, *next, **pprev;
940 spin_lock_bh(&ndisc_lock);
941 pprev = &ndisc_dst_gc_list;
943 while ((dst = *pprev) != NULL) {
944 if (!atomic_read(&dst->__refcnt)) {
954 spin_unlock_bh(&ndisc_lock);
959 static int ip6_dst_gc(void)
961 static unsigned expire = 30*HZ;
962 static unsigned long last_gc;
963 unsigned long now = jiffies;
965 if (time_after(last_gc + ip6_rt_gc_min_interval, now) &&
966 atomic_read(&ip6_dst_ops.entries) <= ip6_rt_max_size)
972 if (atomic_read(&ip6_dst_ops.entries) < ip6_dst_ops.gc_thresh)
973 expire = ip6_rt_gc_timeout>>1;
976 expire -= expire>>ip6_rt_gc_elasticity;
977 return (atomic_read(&ip6_dst_ops.entries) > ip6_rt_max_size);
980 /* Clean host part of a prefix. Not necessary in radix tree,
981 but results in cleaner routing tables.
983 Remove it only when all the things will work!
986 static int ipv6_get_mtu(struct net_device *dev)
988 int mtu = IPV6_MIN_MTU;
989 struct inet6_dev *idev;
991 idev = in6_dev_get(dev);
993 mtu = idev->cnf.mtu6;
999 int ipv6_get_hoplimit(struct net_device *dev)
1001 int hoplimit = ipv6_devconf.hop_limit;
1002 struct inet6_dev *idev;
1004 idev = in6_dev_get(dev);
1006 hoplimit = idev->cnf.hop_limit;
1016 int ip6_route_add(struct fib6_config *cfg)
1019 struct rt6_info *rt = NULL;
1020 struct net_device *dev = NULL;
1021 struct inet6_dev *idev = NULL;
1022 struct fib6_table *table;
1025 if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1027 #ifndef CONFIG_IPV6_SUBTREES
1028 if (cfg->fc_src_len)
1031 if (cfg->fc_ifindex) {
1033 dev = dev_get_by_index(cfg->fc_ifindex);
1036 idev = in6_dev_get(dev);
1041 if (cfg->fc_metric == 0)
1042 cfg->fc_metric = IP6_RT_PRIO_USER;
1044 table = fib6_new_table(cfg->fc_table);
1045 if (table == NULL) {
1050 rt = ip6_dst_alloc();
1057 rt->u.dst.obsolete = -1;
1058 rt->rt6i_expires = jiffies + clock_t_to_jiffies(cfg->fc_expires);
1060 if (cfg->fc_protocol == RTPROT_UNSPEC)
1061 cfg->fc_protocol = RTPROT_BOOT;
1062 rt->rt6i_protocol = cfg->fc_protocol;
1064 addr_type = ipv6_addr_type(&cfg->fc_dst);
1066 if (addr_type & IPV6_ADDR_MULTICAST)
1067 rt->u.dst.input = ip6_mc_input;
1069 rt->u.dst.input = ip6_forward;
1071 rt->u.dst.output = ip6_output;
1073 ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1074 rt->rt6i_dst.plen = cfg->fc_dst_len;
1075 if (rt->rt6i_dst.plen == 128)
1076 rt->u.dst.flags = DST_HOST;
1078 #ifdef CONFIG_IPV6_SUBTREES
1079 ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1080 rt->rt6i_src.plen = cfg->fc_src_len;
1083 rt->rt6i_metric = cfg->fc_metric;
1085 /* We cannot add true routes via loopback here,
1086 they would result in kernel looping; promote them to reject routes
1088 if ((cfg->fc_flags & RTF_REJECT) ||
1089 (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK))) {
1090 /* hold loopback dev/idev if we haven't done so. */
1091 if (dev != &loopback_dev) {
1096 dev = &loopback_dev;
1098 idev = in6_dev_get(dev);
1104 rt->u.dst.output = ip6_pkt_discard_out;
1105 rt->u.dst.input = ip6_pkt_discard;
1106 rt->u.dst.error = -ENETUNREACH;
1107 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1111 if (cfg->fc_flags & RTF_GATEWAY) {
1112 struct in6_addr *gw_addr;
1115 gw_addr = &cfg->fc_gateway;
1116 ipv6_addr_copy(&rt->rt6i_gateway, gw_addr);
1117 gwa_type = ipv6_addr_type(gw_addr);
1119 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1120 struct rt6_info *grt;
1122 /* IPv6 strictly inhibits using not link-local
1123 addresses as nexthop address.
1124 Otherwise, router will not able to send redirects.
1125 It is very good, but in some (rare!) circumstances
1126 (SIT, PtP, NBMA NOARP links) it is handy to allow
1127 some exceptions. --ANK
1130 if (!(gwa_type&IPV6_ADDR_UNICAST))
1133 grt = rt6_lookup(gw_addr, NULL, cfg->fc_ifindex, 1);
1135 err = -EHOSTUNREACH;
1139 if (dev != grt->rt6i_dev) {
1140 dst_release(&grt->u.dst);
1144 dev = grt->rt6i_dev;
1145 idev = grt->rt6i_idev;
1147 in6_dev_hold(grt->rt6i_idev);
1149 if (!(grt->rt6i_flags&RTF_GATEWAY))
1151 dst_release(&grt->u.dst);
1157 if (dev == NULL || (dev->flags&IFF_LOOPBACK))
1165 if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) {
1166 rt->rt6i_nexthop = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev);
1167 if (IS_ERR(rt->rt6i_nexthop)) {
1168 err = PTR_ERR(rt->rt6i_nexthop);
1169 rt->rt6i_nexthop = NULL;
1174 rt->rt6i_flags = cfg->fc_flags;
1181 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
1182 int type = nla->nla_type;
1185 if (type > RTAX_MAX) {
1190 rt->u.dst.metrics[type - 1] = nla_get_u32(nla);
1195 if (rt->u.dst.metrics[RTAX_HOPLIMIT-1] == 0)
1196 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
1197 if (!rt->u.dst.metrics[RTAX_MTU-1])
1198 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(dev);
1199 if (!rt->u.dst.metrics[RTAX_ADVMSS-1])
1200 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
1201 rt->u.dst.dev = dev;
1202 rt->rt6i_idev = idev;
1203 rt->rt6i_table = table;
1204 return __ip6_ins_rt(rt, &cfg->fc_nlinfo);
1212 dst_free((struct dst_entry *) rt);
1216 static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
1219 struct fib6_table *table;
1221 if (rt == &ip6_null_entry)
1224 table = rt->rt6i_table;
1225 write_lock_bh(&table->tb6_lock);
1227 err = fib6_del(rt, info);
1228 dst_release(&rt->u.dst);
1230 write_unlock_bh(&table->tb6_lock);
1235 int ip6_del_rt(struct rt6_info *rt)
1237 return __ip6_del_rt(rt, NULL);
1240 static int ip6_route_del(struct fib6_config *cfg)
1242 struct fib6_table *table;
1243 struct fib6_node *fn;
1244 struct rt6_info *rt;
1247 table = fib6_get_table(cfg->fc_table);
1251 read_lock_bh(&table->tb6_lock);
1253 fn = fib6_locate(&table->tb6_root,
1254 &cfg->fc_dst, cfg->fc_dst_len,
1255 &cfg->fc_src, cfg->fc_src_len);
1258 for (rt = fn->leaf; rt; rt = rt->u.next) {
1259 if (cfg->fc_ifindex &&
1260 (rt->rt6i_dev == NULL ||
1261 rt->rt6i_dev->ifindex != cfg->fc_ifindex))
1263 if (cfg->fc_flags & RTF_GATEWAY &&
1264 !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
1266 if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
1268 dst_hold(&rt->u.dst);
1269 read_unlock_bh(&table->tb6_lock);
1271 return __ip6_del_rt(rt, &cfg->fc_nlinfo);
1274 read_unlock_bh(&table->tb6_lock);
1282 void rt6_redirect(struct in6_addr *dest, struct in6_addr *src,
1283 struct in6_addr *saddr,
1284 struct neighbour *neigh, u8 *lladdr, int on_link)
1286 struct rt6_info *rt, *nrt = NULL;
1287 struct fib6_node *fn;
1288 struct fib6_table *table;
1289 struct netevent_redirect netevent;
1291 /* TODO: Very lazy, might need to check all tables */
1292 table = fib6_get_table(RT6_TABLE_MAIN);
1297 * Get the "current" route for this destination and
1298 * check if the redirect has come from approriate router.
1300 * RFC 2461 specifies that redirects should only be
1301 * accepted if they come from the nexthop to the target.
1302 * Due to the way the routes are chosen, this notion
1303 * is a bit fuzzy and one might need to check all possible
1307 read_lock_bh(&table->tb6_lock);
1308 fn = fib6_lookup(&table->tb6_root, dest, src);
1310 for (rt = fn->leaf; rt; rt = rt->u.next) {
1312 * Current route is on-link; redirect is always invalid.
1314 * Seems, previous statement is not true. It could
1315 * be node, which looks for us as on-link (f.e. proxy ndisc)
1316 * But then router serving it might decide, that we should
1317 * know truth 8)8) --ANK (980726).
1319 if (rt6_check_expired(rt))
1321 if (!(rt->rt6i_flags & RTF_GATEWAY))
1323 if (neigh->dev != rt->rt6i_dev)
1325 if (!ipv6_addr_equal(saddr, &rt->rt6i_gateway))
1330 dst_hold(&rt->u.dst);
1331 else if (rt6_need_strict(dest)) {
1332 while ((fn = fn->parent) != NULL) {
1333 if (fn->fn_flags & RTN_ROOT)
1335 if (fn->fn_flags & RTN_RTINFO)
1339 read_unlock_bh(&table->tb6_lock);
1342 if (net_ratelimit())
1343 printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
1344 "for redirect target\n");
1349 * We have finally decided to accept it.
1352 neigh_update(neigh, lladdr, NUD_STALE,
1353 NEIGH_UPDATE_F_WEAK_OVERRIDE|
1354 NEIGH_UPDATE_F_OVERRIDE|
1355 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1356 NEIGH_UPDATE_F_ISROUTER))
1360 * Redirect received -> path was valid.
1361 * Look, redirects are sent only in response to data packets,
1362 * so that this nexthop apparently is reachable. --ANK
1364 dst_confirm(&rt->u.dst);
1366 /* Duplicate redirect: silently ignore. */
1367 if (neigh == rt->u.dst.neighbour)
1370 nrt = ip6_rt_copy(rt);
1374 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1376 nrt->rt6i_flags &= ~RTF_GATEWAY;
1378 ipv6_addr_copy(&nrt->rt6i_dst.addr, dest);
1379 nrt->rt6i_dst.plen = 128;
1380 nrt->u.dst.flags |= DST_HOST;
1382 ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key);
1383 nrt->rt6i_nexthop = neigh_clone(neigh);
1384 /* Reset pmtu, it may be better */
1385 nrt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(neigh->dev);
1386 nrt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&nrt->u.dst));
1388 if (ip6_ins_rt(nrt))
1391 netevent.old = &rt->u.dst;
1392 netevent.new = &nrt->u.dst;
1393 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1395 if (rt->rt6i_flags&RTF_CACHE) {
1401 dst_release(&rt->u.dst);
1406 * Handle ICMP "packet too big" messages
1407 * i.e. Path MTU discovery
1410 void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr,
1411 struct net_device *dev, u32 pmtu)
1413 struct rt6_info *rt, *nrt;
1416 rt = rt6_lookup(daddr, saddr, dev->ifindex, 0);
1420 if (pmtu >= dst_mtu(&rt->u.dst))
1423 if (pmtu < IPV6_MIN_MTU) {
1425 * According to RFC2460, PMTU is set to the IPv6 Minimum Link
1426 * MTU (1280) and a fragment header should always be included
1427 * after a node receiving Too Big message reporting PMTU is
1428 * less than the IPv6 Minimum Link MTU.
1430 pmtu = IPV6_MIN_MTU;
1434 /* New mtu received -> path was valid.
1435 They are sent only in response to data packets,
1436 so that this nexthop apparently is reachable. --ANK
1438 dst_confirm(&rt->u.dst);
1440 /* Host route. If it is static, it would be better
1441 not to override it, but add new one, so that
1442 when cache entry will expire old pmtu
1443 would return automatically.
1445 if (rt->rt6i_flags & RTF_CACHE) {
1446 rt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1448 rt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1449 dst_set_expires(&rt->u.dst, ip6_rt_mtu_expires);
1450 rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
1455 Two cases are possible:
1456 1. It is connected route. Action: COW
1457 2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1459 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
1460 nrt = rt6_alloc_cow(rt, daddr, saddr);
1462 nrt = rt6_alloc_clone(rt, daddr);
1465 nrt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1467 nrt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1469 /* According to RFC 1981, detecting PMTU increase shouldn't be
1470 * happened within 5 mins, the recommended timer is 10 mins.
1471 * Here this route expiration time is set to ip6_rt_mtu_expires
1472 * which is 10 mins. After 10 mins the decreased pmtu is expired
1473 * and detecting PMTU increase will be automatically happened.
1475 dst_set_expires(&nrt->u.dst, ip6_rt_mtu_expires);
1476 nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
1481 dst_release(&rt->u.dst);
1485 * Misc support functions
1488 static struct rt6_info * ip6_rt_copy(struct rt6_info *ort)
1490 struct rt6_info *rt = ip6_dst_alloc();
1493 rt->u.dst.input = ort->u.dst.input;
1494 rt->u.dst.output = ort->u.dst.output;
1496 memcpy(rt->u.dst.metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32));
1497 rt->u.dst.dev = ort->u.dst.dev;
1499 dev_hold(rt->u.dst.dev);
1500 rt->rt6i_idev = ort->rt6i_idev;
1502 in6_dev_hold(rt->rt6i_idev);
1503 rt->u.dst.lastuse = jiffies;
1504 rt->rt6i_expires = 0;
1506 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
1507 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
1508 rt->rt6i_metric = 0;
1510 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1511 #ifdef CONFIG_IPV6_SUBTREES
1512 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1514 rt->rt6i_table = ort->rt6i_table;
1519 #ifdef CONFIG_IPV6_ROUTE_INFO
1520 static struct rt6_info *rt6_get_route_info(struct in6_addr *prefix, int prefixlen,
1521 struct in6_addr *gwaddr, int ifindex)
1523 struct fib6_node *fn;
1524 struct rt6_info *rt = NULL;
1525 struct fib6_table *table;
1527 table = fib6_get_table(RT6_TABLE_INFO);
1531 write_lock_bh(&table->tb6_lock);
1532 fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
1536 for (rt = fn->leaf; rt; rt = rt->u.next) {
1537 if (rt->rt6i_dev->ifindex != ifindex)
1539 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1541 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1543 dst_hold(&rt->u.dst);
1547 write_unlock_bh(&table->tb6_lock);
1551 static struct rt6_info *rt6_add_route_info(struct in6_addr *prefix, int prefixlen,
1552 struct in6_addr *gwaddr, int ifindex,
1555 struct fib6_config cfg = {
1556 .fc_table = RT6_TABLE_INFO,
1558 .fc_ifindex = ifindex,
1559 .fc_dst_len = prefixlen,
1560 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
1561 RTF_UP | RTF_PREF(pref),
1564 ipv6_addr_copy(&cfg.fc_dst, prefix);
1565 ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
1567 /* We should treat it as a default route if prefix length is 0. */
1569 cfg.fc_flags |= RTF_DEFAULT;
1571 ip6_route_add(&cfg);
1573 return rt6_get_route_info(prefix, prefixlen, gwaddr, ifindex);
1577 struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *dev)
1579 struct rt6_info *rt;
1580 struct fib6_table *table;
1582 table = fib6_get_table(RT6_TABLE_DFLT);
1586 write_lock_bh(&table->tb6_lock);
1587 for (rt = table->tb6_root.leaf; rt; rt=rt->u.next) {
1588 if (dev == rt->rt6i_dev &&
1589 ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
1590 ipv6_addr_equal(&rt->rt6i_gateway, addr))
1594 dst_hold(&rt->u.dst);
1595 write_unlock_bh(&table->tb6_lock);
1599 struct rt6_info *rt6_add_dflt_router(struct in6_addr *gwaddr,
1600 struct net_device *dev,
1603 struct fib6_config cfg = {
1604 .fc_table = RT6_TABLE_DFLT,
1606 .fc_ifindex = dev->ifindex,
1607 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
1608 RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
1611 ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
1613 ip6_route_add(&cfg);
1615 return rt6_get_dflt_router(gwaddr, dev);
1618 void rt6_purge_dflt_routers(void)
1620 struct rt6_info *rt;
1621 struct fib6_table *table;
1623 /* NOTE: Keep consistent with rt6_get_dflt_router */
1624 table = fib6_get_table(RT6_TABLE_DFLT);
1629 read_lock_bh(&table->tb6_lock);
1630 for (rt = table->tb6_root.leaf; rt; rt = rt->u.next) {
1631 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
1632 dst_hold(&rt->u.dst);
1633 read_unlock_bh(&table->tb6_lock);
1638 read_unlock_bh(&table->tb6_lock);
1641 static void rtmsg_to_fib6_config(struct in6_rtmsg *rtmsg,
1642 struct fib6_config *cfg)
1644 memset(cfg, 0, sizeof(*cfg));
1646 cfg->fc_table = RT6_TABLE_MAIN;
1647 cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
1648 cfg->fc_metric = rtmsg->rtmsg_metric;
1649 cfg->fc_expires = rtmsg->rtmsg_info;
1650 cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
1651 cfg->fc_src_len = rtmsg->rtmsg_src_len;
1652 cfg->fc_flags = rtmsg->rtmsg_flags;
1654 ipv6_addr_copy(&cfg->fc_dst, &rtmsg->rtmsg_dst);
1655 ipv6_addr_copy(&cfg->fc_src, &rtmsg->rtmsg_src);
1656 ipv6_addr_copy(&cfg->fc_gateway, &rtmsg->rtmsg_gateway);
1659 int ipv6_route_ioctl(unsigned int cmd, void __user *arg)
1661 struct fib6_config cfg;
1662 struct in6_rtmsg rtmsg;
1666 case SIOCADDRT: /* Add a route */
1667 case SIOCDELRT: /* Delete a route */
1668 if (!capable(CAP_NET_ADMIN))
1670 err = copy_from_user(&rtmsg, arg,
1671 sizeof(struct in6_rtmsg));
1675 rtmsg_to_fib6_config(&rtmsg, &cfg);
1680 err = ip6_route_add(&cfg);
1683 err = ip6_route_del(&cfg);
1697 * Drop the packet on the floor
1700 static int ip6_pkt_discard(struct sk_buff *skb)
1702 int type = ipv6_addr_type(&skb->nh.ipv6h->daddr);
1703 if (type == IPV6_ADDR_ANY || type == IPV6_ADDR_RESERVED)
1704 IP6_INC_STATS(IPSTATS_MIB_INADDRERRORS);
1706 IP6_INC_STATS(IPSTATS_MIB_OUTNOROUTES);
1707 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_NOROUTE, 0, skb->dev);
1712 static int ip6_pkt_discard_out(struct sk_buff *skb)
1714 skb->dev = skb->dst->dev;
1715 return ip6_pkt_discard(skb);
1719 * Allocate a dst for local (unicast / anycast) address.
1722 struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
1723 const struct in6_addr *addr,
1726 struct rt6_info *rt = ip6_dst_alloc();
1729 return ERR_PTR(-ENOMEM);
1731 dev_hold(&loopback_dev);
1734 rt->u.dst.flags = DST_HOST;
1735 rt->u.dst.input = ip6_input;
1736 rt->u.dst.output = ip6_output;
1737 rt->rt6i_dev = &loopback_dev;
1738 rt->rt6i_idev = idev;
1739 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
1740 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
1741 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
1742 rt->u.dst.obsolete = -1;
1744 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
1746 rt->rt6i_flags |= RTF_ANYCAST;
1748 rt->rt6i_flags |= RTF_LOCAL;
1749 rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
1750 if (rt->rt6i_nexthop == NULL) {
1751 dst_free((struct dst_entry *) rt);
1752 return ERR_PTR(-ENOMEM);
1755 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
1756 rt->rt6i_dst.plen = 128;
1757 rt->rt6i_table = fib6_get_table(RT6_TABLE_LOCAL);
1759 atomic_set(&rt->u.dst.__refcnt, 1);
1764 static int fib6_ifdown(struct rt6_info *rt, void *arg)
1766 if (((void*)rt->rt6i_dev == arg || arg == NULL) &&
1767 rt != &ip6_null_entry) {
1768 RT6_TRACE("deleted by ifdown %p\n", rt);
1774 void rt6_ifdown(struct net_device *dev)
1776 fib6_clean_all(fib6_ifdown, 0, dev);
1779 struct rt6_mtu_change_arg
1781 struct net_device *dev;
1785 static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
1787 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
1788 struct inet6_dev *idev;
1790 /* In IPv6 pmtu discovery is not optional,
1791 so that RTAX_MTU lock cannot disable it.
1792 We still use this lock to block changes
1793 caused by addrconf/ndisc.
1796 idev = __in6_dev_get(arg->dev);
1800 /* For administrative MTU increase, there is no way to discover
1801 IPv6 PMTU increase, so PMTU increase should be updated here.
1802 Since RFC 1981 doesn't include administrative MTU increase
1803 update PMTU increase is a MUST. (i.e. jumbo frame)
1806 If new MTU is less than route PMTU, this new MTU will be the
1807 lowest MTU in the path, update the route PMTU to reflect PMTU
1808 decreases; if new MTU is greater than route PMTU, and the
1809 old MTU is the lowest MTU in the path, update the route PMTU
1810 to reflect the increase. In this case if the other nodes' MTU
1811 also have the lowest MTU, TOO BIG MESSAGE will be lead to
1814 if (rt->rt6i_dev == arg->dev &&
1815 !dst_metric_locked(&rt->u.dst, RTAX_MTU) &&
1816 (dst_mtu(&rt->u.dst) > arg->mtu ||
1817 (dst_mtu(&rt->u.dst) < arg->mtu &&
1818 dst_mtu(&rt->u.dst) == idev->cnf.mtu6)))
1819 rt->u.dst.metrics[RTAX_MTU-1] = arg->mtu;
1820 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(arg->mtu);
1824 void rt6_mtu_change(struct net_device *dev, unsigned mtu)
1826 struct rt6_mtu_change_arg arg = {
1831 fib6_clean_all(rt6_mtu_change_route, 0, &arg);
1834 static struct nla_policy rtm_ipv6_policy[RTA_MAX+1] __read_mostly = {
1835 [RTA_GATEWAY] = { .minlen = sizeof(struct in6_addr) },
1836 [RTA_OIF] = { .type = NLA_U32 },
1837 [RTA_IIF] = { .type = NLA_U32 },
1838 [RTA_PRIORITY] = { .type = NLA_U32 },
1839 [RTA_METRICS] = { .type = NLA_NESTED },
1842 static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
1843 struct fib6_config *cfg)
1846 struct nlattr *tb[RTA_MAX+1];
1849 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
1854 rtm = nlmsg_data(nlh);
1855 memset(cfg, 0, sizeof(*cfg));
1857 cfg->fc_table = rtm->rtm_table;
1858 cfg->fc_dst_len = rtm->rtm_dst_len;
1859 cfg->fc_src_len = rtm->rtm_src_len;
1860 cfg->fc_flags = RTF_UP;
1861 cfg->fc_protocol = rtm->rtm_protocol;
1863 if (rtm->rtm_type == RTN_UNREACHABLE)
1864 cfg->fc_flags |= RTF_REJECT;
1866 cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
1867 cfg->fc_nlinfo.nlh = nlh;
1869 if (tb[RTA_GATEWAY]) {
1870 nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
1871 cfg->fc_flags |= RTF_GATEWAY;
1875 int plen = (rtm->rtm_dst_len + 7) >> 3;
1877 if (nla_len(tb[RTA_DST]) < plen)
1880 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
1884 int plen = (rtm->rtm_src_len + 7) >> 3;
1886 if (nla_len(tb[RTA_SRC]) < plen)
1889 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
1893 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
1895 if (tb[RTA_PRIORITY])
1896 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
1898 if (tb[RTA_METRICS]) {
1899 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
1900 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
1904 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
1911 int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1913 struct fib6_config cfg;
1916 err = rtm_to_fib6_config(skb, nlh, &cfg);
1920 return ip6_route_del(&cfg);
1923 int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1925 struct fib6_config cfg;
1928 err = rtm_to_fib6_config(skb, nlh, &cfg);
1932 return ip6_route_add(&cfg);
1935 static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt,
1936 struct in6_addr *dst, struct in6_addr *src,
1937 int iif, int type, u32 pid, u32 seq,
1938 int prefix, unsigned int flags)
1941 struct nlmsghdr *nlh;
1942 struct rta_cacheinfo ci;
1945 if (prefix) { /* user wants prefix routes only */
1946 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
1947 /* success since this is not a prefix route */
1952 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*rtm), flags);
1956 rtm = nlmsg_data(nlh);
1957 rtm->rtm_family = AF_INET6;
1958 rtm->rtm_dst_len = rt->rt6i_dst.plen;
1959 rtm->rtm_src_len = rt->rt6i_src.plen;
1962 table = rt->rt6i_table->tb6_id;
1964 table = RT6_TABLE_UNSPEC;
1965 rtm->rtm_table = table;
1966 NLA_PUT_U32(skb, RTA_TABLE, table);
1967 if (rt->rt6i_flags&RTF_REJECT)
1968 rtm->rtm_type = RTN_UNREACHABLE;
1969 else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK))
1970 rtm->rtm_type = RTN_LOCAL;
1972 rtm->rtm_type = RTN_UNICAST;
1974 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
1975 rtm->rtm_protocol = rt->rt6i_protocol;
1976 if (rt->rt6i_flags&RTF_DYNAMIC)
1977 rtm->rtm_protocol = RTPROT_REDIRECT;
1978 else if (rt->rt6i_flags & RTF_ADDRCONF)
1979 rtm->rtm_protocol = RTPROT_KERNEL;
1980 else if (rt->rt6i_flags&RTF_DEFAULT)
1981 rtm->rtm_protocol = RTPROT_RA;
1983 if (rt->rt6i_flags&RTF_CACHE)
1984 rtm->rtm_flags |= RTM_F_CLONED;
1987 NLA_PUT(skb, RTA_DST, 16, dst);
1988 rtm->rtm_dst_len = 128;
1989 } else if (rtm->rtm_dst_len)
1990 NLA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr);
1991 #ifdef CONFIG_IPV6_SUBTREES
1993 NLA_PUT(skb, RTA_SRC, 16, src);
1994 rtm->rtm_src_len = 128;
1995 } else if (rtm->rtm_src_len)
1996 NLA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr);
1999 NLA_PUT_U32(skb, RTA_IIF, iif);
2001 struct in6_addr saddr_buf;
2002 if (ipv6_get_saddr(&rt->u.dst, dst, &saddr_buf) == 0)
2003 NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
2006 if (rtnetlink_put_metrics(skb, rt->u.dst.metrics) < 0)
2007 goto nla_put_failure;
2009 if (rt->u.dst.neighbour)
2010 NLA_PUT(skb, RTA_GATEWAY, 16, &rt->u.dst.neighbour->primary_key);
2013 NLA_PUT_U32(skb, RTA_OIF, rt->rt6i_dev->ifindex);
2015 NLA_PUT_U32(skb, RTA_PRIORITY, rt->rt6i_metric);
2016 ci.rta_lastuse = jiffies_to_clock_t(jiffies - rt->u.dst.lastuse);
2017 if (rt->rt6i_expires)
2018 ci.rta_expires = jiffies_to_clock_t(rt->rt6i_expires - jiffies);
2021 ci.rta_used = rt->u.dst.__use;
2022 ci.rta_clntref = atomic_read(&rt->u.dst.__refcnt);
2023 ci.rta_error = rt->u.dst.error;
2027 NLA_PUT(skb, RTA_CACHEINFO, sizeof(ci), &ci);
2029 return nlmsg_end(skb, nlh);
2032 return nlmsg_cancel(skb, nlh);
2035 int rt6_dump_route(struct rt6_info *rt, void *p_arg)
2037 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
2040 if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
2041 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
2042 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
2046 return rt6_fill_node(arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
2047 NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
2048 prefix, NLM_F_MULTI);
2051 int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
2053 struct nlattr *tb[RTA_MAX+1];
2054 struct rt6_info *rt;
2055 struct sk_buff *skb;
2060 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2065 memset(&fl, 0, sizeof(fl));
2068 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2071 ipv6_addr_copy(&fl.fl6_src, nla_data(tb[RTA_SRC]));
2075 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2078 ipv6_addr_copy(&fl.fl6_dst, nla_data(tb[RTA_DST]));
2082 iif = nla_get_u32(tb[RTA_IIF]);
2085 fl.oif = nla_get_u32(tb[RTA_OIF]);
2088 struct net_device *dev;
2089 dev = __dev_get_by_index(iif);
2096 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
2102 /* Reserve room for dummy headers, this skb can pass
2103 through good chunk of routing engine.
2105 skb->mac.raw = skb->data;
2106 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
2108 rt = (struct rt6_info*) ip6_route_output(NULL, &fl);
2109 skb->dst = &rt->u.dst;
2111 err = rt6_fill_node(skb, rt, &fl.fl6_dst, &fl.fl6_src, iif,
2112 RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
2113 nlh->nlmsg_seq, 0, 0);
2119 err = rtnl_unicast(skb, NETLINK_CB(in_skb).pid);
2124 void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
2126 struct sk_buff *skb;
2127 u32 pid = 0, seq = 0;
2128 struct nlmsghdr *nlh = NULL;
2129 int payload = sizeof(struct rtmsg) + 256;
2136 seq = nlh->nlmsg_seq;
2139 skb = nlmsg_new(nlmsg_total_size(payload), gfp_any());
2143 err = rt6_fill_node(skb, rt, NULL, NULL, 0, event, pid, seq, 0, 0);
2149 err = rtnl_notify(skb, pid, RTNLGRP_IPV6_ROUTE, nlh, gfp_any());
2152 rtnl_set_sk_err(RTNLGRP_IPV6_ROUTE, err);
2159 #ifdef CONFIG_PROC_FS
2161 #define RT6_INFO_LEN (32 + 4 + 32 + 4 + 32 + 40 + 5 + 1)
2172 static int rt6_info_route(struct rt6_info *rt, void *p_arg)
2174 struct rt6_proc_arg *arg = (struct rt6_proc_arg *) p_arg;
2177 if (arg->skip < arg->offset / RT6_INFO_LEN) {
2182 if (arg->len >= arg->length)
2185 for (i=0; i<16; i++) {
2186 sprintf(arg->buffer + arg->len, "%02x",
2187 rt->rt6i_dst.addr.s6_addr[i]);
2190 arg->len += sprintf(arg->buffer + arg->len, " %02x ",
2193 #ifdef CONFIG_IPV6_SUBTREES
2194 for (i=0; i<16; i++) {
2195 sprintf(arg->buffer + arg->len, "%02x",
2196 rt->rt6i_src.addr.s6_addr[i]);
2199 arg->len += sprintf(arg->buffer + arg->len, " %02x ",
2202 sprintf(arg->buffer + arg->len,
2203 "00000000000000000000000000000000 00 ");
2207 if (rt->rt6i_nexthop) {
2208 for (i=0; i<16; i++) {
2209 sprintf(arg->buffer + arg->len, "%02x",
2210 rt->rt6i_nexthop->primary_key[i]);
2214 sprintf(arg->buffer + arg->len,
2215 "00000000000000000000000000000000");
2218 arg->len += sprintf(arg->buffer + arg->len,
2219 " %08x %08x %08x %08x %8s\n",
2220 rt->rt6i_metric, atomic_read(&rt->u.dst.__refcnt),
2221 rt->u.dst.__use, rt->rt6i_flags,
2222 rt->rt6i_dev ? rt->rt6i_dev->name : "");
2226 static int rt6_proc_info(char *buffer, char **start, off_t offset, int length)
2228 struct rt6_proc_arg arg = {
2234 fib6_clean_all(rt6_info_route, 0, &arg);
2238 *start += offset % RT6_INFO_LEN;
2240 arg.len -= offset % RT6_INFO_LEN;
2242 if (arg.len > length)
2250 static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2252 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
2253 rt6_stats.fib_nodes, rt6_stats.fib_route_nodes,
2254 rt6_stats.fib_rt_alloc, rt6_stats.fib_rt_entries,
2255 rt6_stats.fib_rt_cache,
2256 atomic_read(&ip6_dst_ops.entries),
2257 rt6_stats.fib_discarded_routes);
2262 static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2264 return single_open(file, rt6_stats_seq_show, NULL);
2267 static struct file_operations rt6_stats_seq_fops = {
2268 .owner = THIS_MODULE,
2269 .open = rt6_stats_seq_open,
2271 .llseek = seq_lseek,
2272 .release = single_release,
2274 #endif /* CONFIG_PROC_FS */
2276 #ifdef CONFIG_SYSCTL
2278 static int flush_delay;
2281 int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write, struct file * filp,
2282 void __user *buffer, size_t *lenp, loff_t *ppos)
2285 proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
2286 fib6_run_gc(flush_delay <= 0 ? ~0UL : (unsigned long)flush_delay);
2292 ctl_table ipv6_route_table[] = {
2294 .ctl_name = NET_IPV6_ROUTE_FLUSH,
2295 .procname = "flush",
2296 .data = &flush_delay,
2297 .maxlen = sizeof(int),
2299 .proc_handler = &ipv6_sysctl_rtcache_flush
2302 .ctl_name = NET_IPV6_ROUTE_GC_THRESH,
2303 .procname = "gc_thresh",
2304 .data = &ip6_dst_ops.gc_thresh,
2305 .maxlen = sizeof(int),
2307 .proc_handler = &proc_dointvec,
2310 .ctl_name = NET_IPV6_ROUTE_MAX_SIZE,
2311 .procname = "max_size",
2312 .data = &ip6_rt_max_size,
2313 .maxlen = sizeof(int),
2315 .proc_handler = &proc_dointvec,
2318 .ctl_name = NET_IPV6_ROUTE_GC_MIN_INTERVAL,
2319 .procname = "gc_min_interval",
2320 .data = &ip6_rt_gc_min_interval,
2321 .maxlen = sizeof(int),
2323 .proc_handler = &proc_dointvec_jiffies,
2324 .strategy = &sysctl_jiffies,
2327 .ctl_name = NET_IPV6_ROUTE_GC_TIMEOUT,
2328 .procname = "gc_timeout",
2329 .data = &ip6_rt_gc_timeout,
2330 .maxlen = sizeof(int),
2332 .proc_handler = &proc_dointvec_jiffies,
2333 .strategy = &sysctl_jiffies,
2336 .ctl_name = NET_IPV6_ROUTE_GC_INTERVAL,
2337 .procname = "gc_interval",
2338 .data = &ip6_rt_gc_interval,
2339 .maxlen = sizeof(int),
2341 .proc_handler = &proc_dointvec_jiffies,
2342 .strategy = &sysctl_jiffies,
2345 .ctl_name = NET_IPV6_ROUTE_GC_ELASTICITY,
2346 .procname = "gc_elasticity",
2347 .data = &ip6_rt_gc_elasticity,
2348 .maxlen = sizeof(int),
2350 .proc_handler = &proc_dointvec_jiffies,
2351 .strategy = &sysctl_jiffies,
2354 .ctl_name = NET_IPV6_ROUTE_MTU_EXPIRES,
2355 .procname = "mtu_expires",
2356 .data = &ip6_rt_mtu_expires,
2357 .maxlen = sizeof(int),
2359 .proc_handler = &proc_dointvec_jiffies,
2360 .strategy = &sysctl_jiffies,
2363 .ctl_name = NET_IPV6_ROUTE_MIN_ADVMSS,
2364 .procname = "min_adv_mss",
2365 .data = &ip6_rt_min_advmss,
2366 .maxlen = sizeof(int),
2368 .proc_handler = &proc_dointvec_jiffies,
2369 .strategy = &sysctl_jiffies,
2372 .ctl_name = NET_IPV6_ROUTE_GC_MIN_INTERVAL_MS,
2373 .procname = "gc_min_interval_ms",
2374 .data = &ip6_rt_gc_min_interval,
2375 .maxlen = sizeof(int),
2377 .proc_handler = &proc_dointvec_ms_jiffies,
2378 .strategy = &sysctl_ms_jiffies,
2385 void __init ip6_route_init(void)
2387 struct proc_dir_entry *p;
2389 ip6_dst_ops.kmem_cachep = kmem_cache_create("ip6_dst_cache",
2390 sizeof(struct rt6_info),
2391 0, SLAB_HWCACHE_ALIGN,
2393 if (!ip6_dst_ops.kmem_cachep)
2394 panic("cannot create ip6_dst_cache");
2397 #ifdef CONFIG_PROC_FS
2398 p = proc_net_create("ipv6_route", 0, rt6_proc_info);
2400 p->owner = THIS_MODULE;
2402 proc_net_fops_create("rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2407 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2412 void ip6_route_cleanup(void)
2414 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2415 fib6_rules_cleanup();
2417 #ifdef CONFIG_PROC_FS
2418 proc_net_remove("ipv6_route");
2419 proc_net_remove("rt6_stats");
2426 kmem_cache_destroy(ip6_dst_ops.kmem_cachep);