2 * Linux INET6 implementation
6 * Pedro Roque <roque@di.fc.ul.pt>
8 * $Id: route.c,v 1.56 2001/10/31 21:55:55 davem Exp $
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License
12 * as published by the Free Software Foundation; either version
13 * 2 of the License, or (at your option) any later version.
18 * YOSHIFUJI Hideaki @USAGI
19 * reworked default router selection.
20 * - respect outgoing interface
21 * - select from (probably) reachable routers (i.e.
22 * routers in REACHABLE, STALE, DELAY or PROBE states).
23 * - always select the same router if it is (probably)
24 * reachable. otherwise, round-robin the list.
27 #include <linux/capability.h>
28 #include <linux/config.h>
29 #include <linux/errno.h>
30 #include <linux/types.h>
31 #include <linux/times.h>
32 #include <linux/socket.h>
33 #include <linux/sockios.h>
34 #include <linux/net.h>
35 #include <linux/route.h>
36 #include <linux/netdevice.h>
37 #include <linux/in6.h>
38 #include <linux/init.h>
39 #include <linux/netlink.h>
40 #include <linux/if_arp.h>
43 #include <linux/proc_fs.h>
44 #include <linux/seq_file.h>
49 #include <net/ip6_fib.h>
50 #include <net/ip6_route.h>
51 #include <net/ndisc.h>
52 #include <net/addrconf.h>
54 #include <linux/rtnetlink.h>
58 #include <asm/uaccess.h>
61 #include <linux/sysctl.h>
64 /* Set to 3 to get tracing. */
68 #define RDBG(x) printk x
69 #define RT6_TRACE(x...) printk(KERN_DEBUG x)
72 #define RT6_TRACE(x...) do { ; } while (0)
76 static int ip6_rt_max_size = 4096;
77 static int ip6_rt_gc_min_interval = HZ / 2;
78 static int ip6_rt_gc_timeout = 60*HZ;
79 int ip6_rt_gc_interval = 30*HZ;
80 static int ip6_rt_gc_elasticity = 9;
81 static int ip6_rt_mtu_expires = 10*60*HZ;
82 static int ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
84 static struct rt6_info * ip6_rt_copy(struct rt6_info *ort);
85 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
86 static struct dst_entry *ip6_negative_advice(struct dst_entry *);
87 static void ip6_dst_destroy(struct dst_entry *);
88 static void ip6_dst_ifdown(struct dst_entry *,
89 struct net_device *dev, int how);
90 static int ip6_dst_gc(void);
92 static int ip6_pkt_discard(struct sk_buff *skb);
93 static int ip6_pkt_discard_out(struct sk_buff *skb);
94 static void ip6_link_failure(struct sk_buff *skb);
95 static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
97 static struct dst_ops ip6_dst_ops = {
99 .protocol = __constant_htons(ETH_P_IPV6),
102 .check = ip6_dst_check,
103 .destroy = ip6_dst_destroy,
104 .ifdown = ip6_dst_ifdown,
105 .negative_advice = ip6_negative_advice,
106 .link_failure = ip6_link_failure,
107 .update_pmtu = ip6_rt_update_pmtu,
108 .entry_size = sizeof(struct rt6_info),
111 struct rt6_info ip6_null_entry = {
114 .__refcnt = ATOMIC_INIT(1),
116 .dev = &loopback_dev,
118 .error = -ENETUNREACH,
119 .metrics = { [RTAX_HOPLIMIT - 1] = 255, },
120 .input = ip6_pkt_discard,
121 .output = ip6_pkt_discard_out,
123 .path = (struct dst_entry*)&ip6_null_entry,
126 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
127 .rt6i_metric = ~(u32) 0,
128 .rt6i_ref = ATOMIC_INIT(1),
131 struct fib6_node ip6_routing_table = {
132 .leaf = &ip6_null_entry,
133 .fn_flags = RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO,
136 /* Protects all the ip6 fib */
138 DEFINE_RWLOCK(rt6_lock);
141 /* allocate dst with ip6_dst_ops */
142 static __inline__ struct rt6_info *ip6_dst_alloc(void)
144 return (struct rt6_info *)dst_alloc(&ip6_dst_ops);
147 static void ip6_dst_destroy(struct dst_entry *dst)
149 struct rt6_info *rt = (struct rt6_info *)dst;
150 struct inet6_dev *idev = rt->rt6i_idev;
153 rt->rt6i_idev = NULL;
158 static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
161 struct rt6_info *rt = (struct rt6_info *)dst;
162 struct inet6_dev *idev = rt->rt6i_idev;
164 if (dev != &loopback_dev && idev != NULL && idev->dev == dev) {
165 struct inet6_dev *loopback_idev = in6_dev_get(&loopback_dev);
166 if (loopback_idev != NULL) {
167 rt->rt6i_idev = loopback_idev;
173 static __inline__ int rt6_check_expired(const struct rt6_info *rt)
175 return (rt->rt6i_flags & RTF_EXPIRES &&
176 time_after(jiffies, rt->rt6i_expires));
180 * Route lookup. Any rt6_lock is implied.
183 static __inline__ struct rt6_info *rt6_device_match(struct rt6_info *rt,
187 struct rt6_info *local = NULL;
188 struct rt6_info *sprt;
191 for (sprt = rt; sprt; sprt = sprt->u.next) {
192 struct net_device *dev = sprt->rt6i_dev;
193 if (dev->ifindex == oif)
195 if (dev->flags & IFF_LOOPBACK) {
196 if (sprt->rt6i_idev == NULL ||
197 sprt->rt6i_idev->dev->ifindex != oif) {
200 if (local && (!oif ||
201 local->rt6i_idev->dev->ifindex == oif))
212 return &ip6_null_entry;
218 * pointer to the last default router chosen. BH is disabled locally.
220 static struct rt6_info *rt6_dflt_pointer;
221 static DEFINE_SPINLOCK(rt6_dflt_lock);
223 void rt6_reset_dflt_pointer(struct rt6_info *rt)
225 spin_lock_bh(&rt6_dflt_lock);
226 if (rt == NULL || rt == rt6_dflt_pointer) {
227 RT6_TRACE("reset default router: %p->NULL\n", rt6_dflt_pointer);
228 rt6_dflt_pointer = NULL;
230 spin_unlock_bh(&rt6_dflt_lock);
233 /* Default Router Selection (RFC 2461 6.3.6) */
234 static struct rt6_info *rt6_best_dflt(struct rt6_info *rt, int oif)
236 struct rt6_info *match = NULL;
237 struct rt6_info *sprt;
240 for (sprt = rt; sprt; sprt = sprt->u.next) {
241 struct neighbour *neigh;
246 sprt->rt6i_dev->ifindex == oif))
249 if (rt6_check_expired(sprt))
252 if (sprt == rt6_dflt_pointer)
255 if ((neigh = sprt->rt6i_nexthop) != NULL) {
256 read_lock_bh(&neigh->lock);
257 switch (neigh->nud_state) {
275 read_unlock_bh(&neigh->lock);
278 read_unlock_bh(&neigh->lock);
283 if (m > mpri || m >= 12) {
287 /* we choose the last default router if it
288 * is in (probably) reachable state.
289 * If route changed, we should do pmtu
290 * discovery. --yoshfuji
297 spin_lock(&rt6_dflt_lock);
300 * No default routers are known to be reachable.
303 if (rt6_dflt_pointer) {
304 for (sprt = rt6_dflt_pointer->u.next;
305 sprt; sprt = sprt->u.next) {
306 if (sprt->u.dst.obsolete <= 0 &&
307 sprt->u.dst.error == 0 &&
308 !rt6_check_expired(sprt)) {
315 sprt = sprt->u.next) {
316 if (sprt->u.dst.obsolete <= 0 &&
317 sprt->u.dst.error == 0 &&
318 !rt6_check_expired(sprt)) {
322 if (sprt == rt6_dflt_pointer)
329 if (rt6_dflt_pointer != match)
330 RT6_TRACE("changed default router: %p->%p\n",
331 rt6_dflt_pointer, match);
332 rt6_dflt_pointer = match;
334 spin_unlock(&rt6_dflt_lock);
338 * Last Resort: if no default routers found,
339 * use addrconf default route.
340 * We don't record this route.
342 for (sprt = ip6_routing_table.leaf;
343 sprt; sprt = sprt->u.next) {
344 if (!rt6_check_expired(sprt) &&
345 (sprt->rt6i_flags & RTF_DEFAULT) &&
348 sprt->rt6i_dev->ifindex == oif))) {
354 /* no default route. give up. */
355 match = &ip6_null_entry;
362 struct rt6_info *rt6_lookup(struct in6_addr *daddr, struct in6_addr *saddr,
365 struct fib6_node *fn;
368 read_lock_bh(&rt6_lock);
369 fn = fib6_lookup(&ip6_routing_table, daddr, saddr);
370 rt = rt6_device_match(fn->leaf, oif, strict);
371 dst_hold(&rt->u.dst);
373 read_unlock_bh(&rt6_lock);
375 rt->u.dst.lastuse = jiffies;
376 if (rt->u.dst.error == 0)
378 dst_release(&rt->u.dst);
382 /* ip6_ins_rt is called with FREE rt6_lock.
383 It takes new route entry, the addition fails by any reason the
384 route is freed. In any case, if caller does not hold it, it may
388 int ip6_ins_rt(struct rt6_info *rt, struct nlmsghdr *nlh,
389 void *_rtattr, struct netlink_skb_parms *req)
393 write_lock_bh(&rt6_lock);
394 err = fib6_add(&ip6_routing_table, rt, nlh, _rtattr, req);
395 write_unlock_bh(&rt6_lock);
400 /* No rt6_lock! If COW failed, the function returns dead route entry
401 with dst->error set to errno value.
404 static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, struct in6_addr *daddr,
405 struct in6_addr *saddr)
413 rt = ip6_rt_copy(ort);
416 if (!(rt->rt6i_flags&RTF_GATEWAY)) {
417 if (rt->rt6i_dst.plen != 128 &&
418 ipv6_addr_equal(&rt->rt6i_dst.addr, daddr))
419 rt->rt6i_flags |= RTF_ANYCAST;
420 ipv6_addr_copy(&rt->rt6i_gateway, daddr);
423 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
424 rt->rt6i_dst.plen = 128;
425 rt->rt6i_flags |= RTF_CACHE;
426 rt->u.dst.flags |= DST_HOST;
428 #ifdef CONFIG_IPV6_SUBTREES
429 if (rt->rt6i_src.plen && saddr) {
430 ipv6_addr_copy(&rt->rt6i_src.addr, saddr);
431 rt->rt6i_src.plen = 128;
435 rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
442 static struct rt6_info *rt6_cow(struct rt6_info *ort, struct in6_addr *daddr,
443 struct in6_addr *saddr, struct netlink_skb_parms *req)
445 struct rt6_info *rt = rt6_alloc_cow(ort, daddr, saddr);
449 dst_hold(&ip6_null_entry.u.dst);
450 return &ip6_null_entry;
453 dst_hold(&rt->u.dst);
455 err = ip6_ins_rt(rt, NULL, NULL, req);
457 rt->u.dst.error = err;
462 #define BACKTRACK() \
463 if (rt == &ip6_null_entry && strict) { \
464 while ((fn = fn->parent) != NULL) { \
465 if (fn->fn_flags & RTN_ROOT) { \
466 dst_hold(&rt->u.dst); \
469 if (fn->fn_flags & RTN_RTINFO) \
475 void ip6_route_input(struct sk_buff *skb)
477 struct fib6_node *fn;
482 strict = ipv6_addr_type(&skb->nh.ipv6h->daddr) & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL);
485 read_lock_bh(&rt6_lock);
487 fn = fib6_lookup(&ip6_routing_table, &skb->nh.ipv6h->daddr,
488 &skb->nh.ipv6h->saddr);
493 if ((rt->rt6i_flags & RTF_CACHE)) {
494 rt = rt6_device_match(rt, skb->dev->ifindex, strict);
496 dst_hold(&rt->u.dst);
500 rt = rt6_device_match(rt, skb->dev->ifindex, strict);
503 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) {
504 struct rt6_info *nrt;
505 dst_hold(&rt->u.dst);
506 read_unlock_bh(&rt6_lock);
508 nrt = rt6_cow(rt, &skb->nh.ipv6h->daddr,
509 &skb->nh.ipv6h->saddr,
512 dst_release(&rt->u.dst);
515 if (rt->u.dst.error != -EEXIST || --attempts <= 0)
518 /* Race condition! In the gap, when rt6_lock was
519 released someone could insert this route. Relookup.
521 dst_release(&rt->u.dst);
524 dst_hold(&rt->u.dst);
527 read_unlock_bh(&rt6_lock);
529 rt->u.dst.lastuse = jiffies;
531 skb->dst = (struct dst_entry *) rt;
534 struct dst_entry * ip6_route_output(struct sock *sk, struct flowi *fl)
536 struct fib6_node *fn;
541 strict = ipv6_addr_type(&fl->fl6_dst) & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL);
544 read_lock_bh(&rt6_lock);
546 fn = fib6_lookup(&ip6_routing_table, &fl->fl6_dst, &fl->fl6_src);
551 if ((rt->rt6i_flags & RTF_CACHE)) {
552 rt = rt6_device_match(rt, fl->oif, strict);
554 dst_hold(&rt->u.dst);
557 if (rt->rt6i_flags & RTF_DEFAULT) {
558 if (rt->rt6i_metric >= IP6_RT_PRIO_ADDRCONF)
559 rt = rt6_best_dflt(rt, fl->oif);
561 rt = rt6_device_match(rt, fl->oif, strict);
565 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) {
566 struct rt6_info *nrt;
567 dst_hold(&rt->u.dst);
568 read_unlock_bh(&rt6_lock);
570 nrt = rt6_cow(rt, &fl->fl6_dst, &fl->fl6_src, NULL);
572 dst_release(&rt->u.dst);
575 if (rt->u.dst.error != -EEXIST || --attempts <= 0)
578 /* Race condition! In the gap, when rt6_lock was
579 released someone could insert this route. Relookup.
581 dst_release(&rt->u.dst);
584 dst_hold(&rt->u.dst);
587 read_unlock_bh(&rt6_lock);
589 rt->u.dst.lastuse = jiffies;
596 * Destination cache support functions
599 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
603 rt = (struct rt6_info *) dst;
605 if (rt && rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie))
611 static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
613 struct rt6_info *rt = (struct rt6_info *) dst;
616 if (rt->rt6i_flags & RTF_CACHE)
617 ip6_del_rt(rt, NULL, NULL, NULL);
624 static void ip6_link_failure(struct sk_buff *skb)
628 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0, skb->dev);
630 rt = (struct rt6_info *) skb->dst;
632 if (rt->rt6i_flags&RTF_CACHE) {
633 dst_set_expires(&rt->u.dst, 0);
634 rt->rt6i_flags |= RTF_EXPIRES;
635 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
636 rt->rt6i_node->fn_sernum = -1;
640 static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
642 struct rt6_info *rt6 = (struct rt6_info*)dst;
644 if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
645 rt6->rt6i_flags |= RTF_MODIFIED;
646 if (mtu < IPV6_MIN_MTU) {
648 dst->metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
650 dst->metrics[RTAX_MTU-1] = mtu;
654 /* Protected by rt6_lock. */
655 static struct dst_entry *ndisc_dst_gc_list;
656 static int ipv6_get_mtu(struct net_device *dev);
658 static inline unsigned int ipv6_advmss(unsigned int mtu)
660 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
662 if (mtu < ip6_rt_min_advmss)
663 mtu = ip6_rt_min_advmss;
666 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
667 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
668 * IPV6_MAXPLEN is also valid and means: "any MSS,
669 * rely only on pmtu discovery"
671 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
676 struct dst_entry *ndisc_dst_alloc(struct net_device *dev,
677 struct neighbour *neigh,
678 struct in6_addr *addr,
679 int (*output)(struct sk_buff *))
682 struct inet6_dev *idev = in6_dev_get(dev);
684 if (unlikely(idev == NULL))
687 rt = ip6_dst_alloc();
688 if (unlikely(rt == NULL)) {
697 neigh = ndisc_get_neigh(dev, addr);
700 rt->rt6i_idev = idev;
701 rt->rt6i_nexthop = neigh;
702 atomic_set(&rt->u.dst.__refcnt, 1);
703 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = 255;
704 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
705 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
706 rt->u.dst.output = output;
708 #if 0 /* there's no chance to use these for ndisc */
709 rt->u.dst.flags = ipv6_addr_type(addr) & IPV6_ADDR_UNICAST
712 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
713 rt->rt6i_dst.plen = 128;
716 write_lock_bh(&rt6_lock);
717 rt->u.dst.next = ndisc_dst_gc_list;
718 ndisc_dst_gc_list = &rt->u.dst;
719 write_unlock_bh(&rt6_lock);
721 fib6_force_start_gc();
724 return (struct dst_entry *)rt;
727 int ndisc_dst_gc(int *more)
729 struct dst_entry *dst, *next, **pprev;
733 pprev = &ndisc_dst_gc_list;
735 while ((dst = *pprev) != NULL) {
736 if (!atomic_read(&dst->__refcnt)) {
749 static int ip6_dst_gc(void)
751 static unsigned expire = 30*HZ;
752 static unsigned long last_gc;
753 unsigned long now = jiffies;
755 if (time_after(last_gc + ip6_rt_gc_min_interval, now) &&
756 atomic_read(&ip6_dst_ops.entries) <= ip6_rt_max_size)
762 if (atomic_read(&ip6_dst_ops.entries) < ip6_dst_ops.gc_thresh)
763 expire = ip6_rt_gc_timeout>>1;
766 expire -= expire>>ip6_rt_gc_elasticity;
767 return (atomic_read(&ip6_dst_ops.entries) > ip6_rt_max_size);
770 /* Clean host part of a prefix. Not necessary in radix tree,
771 but results in cleaner routing tables.
773 Remove it only when all the things will work!
776 static int ipv6_get_mtu(struct net_device *dev)
778 int mtu = IPV6_MIN_MTU;
779 struct inet6_dev *idev;
781 idev = in6_dev_get(dev);
783 mtu = idev->cnf.mtu6;
789 int ipv6_get_hoplimit(struct net_device *dev)
791 int hoplimit = ipv6_devconf.hop_limit;
792 struct inet6_dev *idev;
794 idev = in6_dev_get(dev);
796 hoplimit = idev->cnf.hop_limit;
806 int ip6_route_add(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh,
807 void *_rtattr, struct netlink_skb_parms *req)
812 struct rt6_info *rt = NULL;
813 struct net_device *dev = NULL;
814 struct inet6_dev *idev = NULL;
817 rta = (struct rtattr **) _rtattr;
819 if (rtmsg->rtmsg_dst_len > 128 || rtmsg->rtmsg_src_len > 128)
821 #ifndef CONFIG_IPV6_SUBTREES
822 if (rtmsg->rtmsg_src_len)
825 if (rtmsg->rtmsg_ifindex) {
827 dev = dev_get_by_index(rtmsg->rtmsg_ifindex);
830 idev = in6_dev_get(dev);
835 if (rtmsg->rtmsg_metric == 0)
836 rtmsg->rtmsg_metric = IP6_RT_PRIO_USER;
838 rt = ip6_dst_alloc();
845 rt->u.dst.obsolete = -1;
846 rt->rt6i_expires = jiffies + clock_t_to_jiffies(rtmsg->rtmsg_info);
847 if (nlh && (r = NLMSG_DATA(nlh))) {
848 rt->rt6i_protocol = r->rtm_protocol;
850 rt->rt6i_protocol = RTPROT_BOOT;
853 addr_type = ipv6_addr_type(&rtmsg->rtmsg_dst);
855 if (addr_type & IPV6_ADDR_MULTICAST)
856 rt->u.dst.input = ip6_mc_input;
858 rt->u.dst.input = ip6_forward;
860 rt->u.dst.output = ip6_output;
862 ipv6_addr_prefix(&rt->rt6i_dst.addr,
863 &rtmsg->rtmsg_dst, rtmsg->rtmsg_dst_len);
864 rt->rt6i_dst.plen = rtmsg->rtmsg_dst_len;
865 if (rt->rt6i_dst.plen == 128)
866 rt->u.dst.flags = DST_HOST;
868 #ifdef CONFIG_IPV6_SUBTREES
869 ipv6_addr_prefix(&rt->rt6i_src.addr,
870 &rtmsg->rtmsg_src, rtmsg->rtmsg_src_len);
871 rt->rt6i_src.plen = rtmsg->rtmsg_src_len;
874 rt->rt6i_metric = rtmsg->rtmsg_metric;
876 /* We cannot add true routes via loopback here,
877 they would result in kernel looping; promote them to reject routes
879 if ((rtmsg->rtmsg_flags&RTF_REJECT) ||
880 (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK))) {
881 /* hold loopback dev/idev if we haven't done so. */
882 if (dev != &loopback_dev) {
889 idev = in6_dev_get(dev);
895 rt->u.dst.output = ip6_pkt_discard_out;
896 rt->u.dst.input = ip6_pkt_discard;
897 rt->u.dst.error = -ENETUNREACH;
898 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
902 if (rtmsg->rtmsg_flags & RTF_GATEWAY) {
903 struct in6_addr *gw_addr;
906 gw_addr = &rtmsg->rtmsg_gateway;
907 ipv6_addr_copy(&rt->rt6i_gateway, &rtmsg->rtmsg_gateway);
908 gwa_type = ipv6_addr_type(gw_addr);
910 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
911 struct rt6_info *grt;
913 /* IPv6 strictly inhibits using not link-local
914 addresses as nexthop address.
915 Otherwise, router will not able to send redirects.
916 It is very good, but in some (rare!) circumstances
917 (SIT, PtP, NBMA NOARP links) it is handy to allow
918 some exceptions. --ANK
921 if (!(gwa_type&IPV6_ADDR_UNICAST))
924 grt = rt6_lookup(gw_addr, NULL, rtmsg->rtmsg_ifindex, 1);
930 if (dev != grt->rt6i_dev) {
931 dst_release(&grt->u.dst);
936 idev = grt->rt6i_idev;
938 in6_dev_hold(grt->rt6i_idev);
940 if (!(grt->rt6i_flags&RTF_GATEWAY))
942 dst_release(&grt->u.dst);
948 if (dev == NULL || (dev->flags&IFF_LOOPBACK))
956 if (rtmsg->rtmsg_flags & (RTF_GATEWAY|RTF_NONEXTHOP)) {
957 rt->rt6i_nexthop = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev);
958 if (IS_ERR(rt->rt6i_nexthop)) {
959 err = PTR_ERR(rt->rt6i_nexthop);
960 rt->rt6i_nexthop = NULL;
965 rt->rt6i_flags = rtmsg->rtmsg_flags;
968 if (rta && rta[RTA_METRICS-1]) {
969 int attrlen = RTA_PAYLOAD(rta[RTA_METRICS-1]);
970 struct rtattr *attr = RTA_DATA(rta[RTA_METRICS-1]);
972 while (RTA_OK(attr, attrlen)) {
973 unsigned flavor = attr->rta_type;
975 if (flavor > RTAX_MAX) {
979 rt->u.dst.metrics[flavor-1] =
980 *(u32 *)RTA_DATA(attr);
982 attr = RTA_NEXT(attr, attrlen);
986 if (rt->u.dst.metrics[RTAX_HOPLIMIT-1] == 0)
987 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
988 if (!rt->u.dst.metrics[RTAX_MTU-1])
989 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(dev);
990 if (!rt->u.dst.metrics[RTAX_ADVMSS-1])
991 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
993 rt->rt6i_idev = idev;
994 return ip6_ins_rt(rt, nlh, _rtattr, req);
1002 dst_free((struct dst_entry *) rt);
1006 int ip6_del_rt(struct rt6_info *rt, struct nlmsghdr *nlh, void *_rtattr, struct netlink_skb_parms *req)
1010 write_lock_bh(&rt6_lock);
1012 rt6_reset_dflt_pointer(NULL);
1014 err = fib6_del(rt, nlh, _rtattr, req);
1015 dst_release(&rt->u.dst);
1017 write_unlock_bh(&rt6_lock);
1022 static int ip6_route_del(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh, void *_rtattr, struct netlink_skb_parms *req)
1024 struct fib6_node *fn;
1025 struct rt6_info *rt;
1028 read_lock_bh(&rt6_lock);
1030 fn = fib6_locate(&ip6_routing_table,
1031 &rtmsg->rtmsg_dst, rtmsg->rtmsg_dst_len,
1032 &rtmsg->rtmsg_src, rtmsg->rtmsg_src_len);
1035 for (rt = fn->leaf; rt; rt = rt->u.next) {
1036 if (rtmsg->rtmsg_ifindex &&
1037 (rt->rt6i_dev == NULL ||
1038 rt->rt6i_dev->ifindex != rtmsg->rtmsg_ifindex))
1040 if (rtmsg->rtmsg_flags&RTF_GATEWAY &&
1041 !ipv6_addr_equal(&rtmsg->rtmsg_gateway, &rt->rt6i_gateway))
1043 if (rtmsg->rtmsg_metric &&
1044 rtmsg->rtmsg_metric != rt->rt6i_metric)
1046 dst_hold(&rt->u.dst);
1047 read_unlock_bh(&rt6_lock);
1049 return ip6_del_rt(rt, nlh, _rtattr, req);
1052 read_unlock_bh(&rt6_lock);
1060 void rt6_redirect(struct in6_addr *dest, struct in6_addr *saddr,
1061 struct neighbour *neigh, u8 *lladdr, int on_link)
1063 struct rt6_info *rt, *nrt;
1065 /* Locate old route to this destination. */
1066 rt = rt6_lookup(dest, NULL, neigh->dev->ifindex, 1);
1071 if (neigh->dev != rt->rt6i_dev)
1075 * Current route is on-link; redirect is always invalid.
1077 * Seems, previous statement is not true. It could
1078 * be node, which looks for us as on-link (f.e. proxy ndisc)
1079 * But then router serving it might decide, that we should
1080 * know truth 8)8) --ANK (980726).
1082 if (!(rt->rt6i_flags&RTF_GATEWAY))
1086 * RFC 2461 specifies that redirects should only be
1087 * accepted if they come from the nexthop to the target.
1088 * Due to the way default routers are chosen, this notion
1089 * is a bit fuzzy and one might need to check all default
1092 if (!ipv6_addr_equal(saddr, &rt->rt6i_gateway)) {
1093 if (rt->rt6i_flags & RTF_DEFAULT) {
1094 struct rt6_info *rt1;
1096 read_lock(&rt6_lock);
1097 for (rt1 = ip6_routing_table.leaf; rt1; rt1 = rt1->u.next) {
1098 if (ipv6_addr_equal(saddr, &rt1->rt6i_gateway)) {
1099 dst_hold(&rt1->u.dst);
1100 dst_release(&rt->u.dst);
1101 read_unlock(&rt6_lock);
1106 read_unlock(&rt6_lock);
1108 if (net_ratelimit())
1109 printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
1110 "for redirect target\n");
1117 * We have finally decided to accept it.
1120 neigh_update(neigh, lladdr, NUD_STALE,
1121 NEIGH_UPDATE_F_WEAK_OVERRIDE|
1122 NEIGH_UPDATE_F_OVERRIDE|
1123 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1124 NEIGH_UPDATE_F_ISROUTER))
1128 * Redirect received -> path was valid.
1129 * Look, redirects are sent only in response to data packets,
1130 * so that this nexthop apparently is reachable. --ANK
1132 dst_confirm(&rt->u.dst);
1134 /* Duplicate redirect: silently ignore. */
1135 if (neigh == rt->u.dst.neighbour)
1138 nrt = ip6_rt_copy(rt);
1142 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1144 nrt->rt6i_flags &= ~RTF_GATEWAY;
1146 ipv6_addr_copy(&nrt->rt6i_dst.addr, dest);
1147 nrt->rt6i_dst.plen = 128;
1148 nrt->u.dst.flags |= DST_HOST;
1150 ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key);
1151 nrt->rt6i_nexthop = neigh_clone(neigh);
1152 /* Reset pmtu, it may be better */
1153 nrt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(neigh->dev);
1154 nrt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&nrt->u.dst));
1156 if (ip6_ins_rt(nrt, NULL, NULL, NULL))
1159 if (rt->rt6i_flags&RTF_CACHE) {
1160 ip6_del_rt(rt, NULL, NULL, NULL);
1165 dst_release(&rt->u.dst);
1170 * Handle ICMP "packet too big" messages
1171 * i.e. Path MTU discovery
1174 void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr,
1175 struct net_device *dev, u32 pmtu)
1177 struct rt6_info *rt, *nrt;
1180 rt = rt6_lookup(daddr, saddr, dev->ifindex, 0);
1184 if (pmtu >= dst_mtu(&rt->u.dst))
1187 if (pmtu < IPV6_MIN_MTU) {
1189 * According to RFC2460, PMTU is set to the IPv6 Minimum Link
1190 * MTU (1280) and a fragment header should always be included
1191 * after a node receiving Too Big message reporting PMTU is
1192 * less than the IPv6 Minimum Link MTU.
1194 pmtu = IPV6_MIN_MTU;
1198 /* New mtu received -> path was valid.
1199 They are sent only in response to data packets,
1200 so that this nexthop apparently is reachable. --ANK
1202 dst_confirm(&rt->u.dst);
1204 /* Host route. If it is static, it would be better
1205 not to override it, but add new one, so that
1206 when cache entry will expire old pmtu
1207 would return automatically.
1209 if (rt->rt6i_flags & RTF_CACHE) {
1210 rt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1212 rt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1213 dst_set_expires(&rt->u.dst, ip6_rt_mtu_expires);
1214 rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
1219 Two cases are possible:
1220 1. It is connected route. Action: COW
1221 2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1223 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) {
1224 nrt = rt6_alloc_cow(rt, daddr, saddr);
1228 nrt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1230 nrt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1232 /* According to RFC 1981, detecting PMTU increase shouldn't be
1233 * happened within 5 mins, the recommended timer is 10 mins.
1234 * Here this route expiration time is set to ip6_rt_mtu_expires
1235 * which is 10 mins. After 10 mins the decreased pmtu is expired
1236 * and detecting PMTU increase will be automatically happened.
1238 dst_set_expires(&nrt->u.dst, ip6_rt_mtu_expires);
1239 nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
1241 ip6_ins_rt(nrt, NULL, NULL, NULL);
1243 nrt = ip6_rt_copy(rt);
1246 ipv6_addr_copy(&nrt->rt6i_dst.addr, daddr);
1247 nrt->rt6i_dst.plen = 128;
1248 nrt->u.dst.flags |= DST_HOST;
1249 nrt->rt6i_nexthop = neigh_clone(rt->rt6i_nexthop);
1250 dst_set_expires(&nrt->u.dst, ip6_rt_mtu_expires);
1251 nrt->rt6i_flags |= RTF_DYNAMIC|RTF_CACHE|RTF_EXPIRES;
1252 if (nrt->rt6i_flags & RTF_REJECT)
1253 nrt->u.dst.error = rt->u.dst.error;
1254 nrt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1256 nrt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1257 ip6_ins_rt(nrt, NULL, NULL, NULL);
1261 dst_release(&rt->u.dst);
1265 * Misc support functions
1268 static struct rt6_info * ip6_rt_copy(struct rt6_info *ort)
1270 struct rt6_info *rt = ip6_dst_alloc();
1273 rt->u.dst.input = ort->u.dst.input;
1274 rt->u.dst.output = ort->u.dst.output;
1276 memcpy(rt->u.dst.metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32));
1277 rt->u.dst.dev = ort->u.dst.dev;
1279 dev_hold(rt->u.dst.dev);
1280 rt->rt6i_idev = ort->rt6i_idev;
1282 in6_dev_hold(rt->rt6i_idev);
1283 rt->u.dst.lastuse = jiffies;
1284 rt->rt6i_expires = 0;
1286 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
1287 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
1288 rt->rt6i_metric = 0;
1290 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1291 #ifdef CONFIG_IPV6_SUBTREES
1292 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1298 struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *dev)
1300 struct rt6_info *rt;
1301 struct fib6_node *fn;
1303 fn = &ip6_routing_table;
1305 write_lock_bh(&rt6_lock);
1306 for (rt = fn->leaf; rt; rt=rt->u.next) {
1307 if (dev == rt->rt6i_dev &&
1308 ipv6_addr_equal(&rt->rt6i_gateway, addr))
1312 dst_hold(&rt->u.dst);
1313 write_unlock_bh(&rt6_lock);
1317 struct rt6_info *rt6_add_dflt_router(struct in6_addr *gwaddr,
1318 struct net_device *dev)
1320 struct in6_rtmsg rtmsg;
1322 memset(&rtmsg, 0, sizeof(struct in6_rtmsg));
1323 rtmsg.rtmsg_type = RTMSG_NEWROUTE;
1324 ipv6_addr_copy(&rtmsg.rtmsg_gateway, gwaddr);
1325 rtmsg.rtmsg_metric = 1024;
1326 rtmsg.rtmsg_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT | RTF_UP | RTF_EXPIRES;
1328 rtmsg.rtmsg_ifindex = dev->ifindex;
1330 ip6_route_add(&rtmsg, NULL, NULL, NULL);
1331 return rt6_get_dflt_router(gwaddr, dev);
1334 void rt6_purge_dflt_routers(void)
1336 struct rt6_info *rt;
1339 read_lock_bh(&rt6_lock);
1340 for (rt = ip6_routing_table.leaf; rt; rt = rt->u.next) {
1341 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
1342 dst_hold(&rt->u.dst);
1344 rt6_reset_dflt_pointer(NULL);
1346 read_unlock_bh(&rt6_lock);
1348 ip6_del_rt(rt, NULL, NULL, NULL);
1353 read_unlock_bh(&rt6_lock);
1356 int ipv6_route_ioctl(unsigned int cmd, void __user *arg)
1358 struct in6_rtmsg rtmsg;
1362 case SIOCADDRT: /* Add a route */
1363 case SIOCDELRT: /* Delete a route */
1364 if (!capable(CAP_NET_ADMIN))
1366 err = copy_from_user(&rtmsg, arg,
1367 sizeof(struct in6_rtmsg));
1374 err = ip6_route_add(&rtmsg, NULL, NULL, NULL);
1377 err = ip6_route_del(&rtmsg, NULL, NULL, NULL);
1391 * Drop the packet on the floor
1394 static int ip6_pkt_discard(struct sk_buff *skb)
1396 IP6_INC_STATS(IPSTATS_MIB_OUTNOROUTES);
1397 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_NOROUTE, 0, skb->dev);
1402 static int ip6_pkt_discard_out(struct sk_buff *skb)
1404 skb->dev = skb->dst->dev;
1405 return ip6_pkt_discard(skb);
1409 * Allocate a dst for local (unicast / anycast) address.
1412 struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
1413 const struct in6_addr *addr,
1416 struct rt6_info *rt = ip6_dst_alloc();
1419 return ERR_PTR(-ENOMEM);
1421 dev_hold(&loopback_dev);
1424 rt->u.dst.flags = DST_HOST;
1425 rt->u.dst.input = ip6_input;
1426 rt->u.dst.output = ip6_output;
1427 rt->rt6i_dev = &loopback_dev;
1428 rt->rt6i_idev = idev;
1429 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
1430 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
1431 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
1432 rt->u.dst.obsolete = -1;
1434 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
1436 rt->rt6i_flags |= RTF_ANYCAST;
1438 rt->rt6i_flags |= RTF_LOCAL;
1439 rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
1440 if (rt->rt6i_nexthop == NULL) {
1441 dst_free((struct dst_entry *) rt);
1442 return ERR_PTR(-ENOMEM);
1445 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
1446 rt->rt6i_dst.plen = 128;
1448 atomic_set(&rt->u.dst.__refcnt, 1);
1453 static int fib6_ifdown(struct rt6_info *rt, void *arg)
1455 if (((void*)rt->rt6i_dev == arg || arg == NULL) &&
1456 rt != &ip6_null_entry) {
1457 RT6_TRACE("deleted by ifdown %p\n", rt);
1463 void rt6_ifdown(struct net_device *dev)
1465 write_lock_bh(&rt6_lock);
1466 fib6_clean_tree(&ip6_routing_table, fib6_ifdown, 0, dev);
1467 write_unlock_bh(&rt6_lock);
1470 struct rt6_mtu_change_arg
1472 struct net_device *dev;
1476 static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
1478 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
1479 struct inet6_dev *idev;
1481 /* In IPv6 pmtu discovery is not optional,
1482 so that RTAX_MTU lock cannot disable it.
1483 We still use this lock to block changes
1484 caused by addrconf/ndisc.
1487 idev = __in6_dev_get(arg->dev);
1491 /* For administrative MTU increase, there is no way to discover
1492 IPv6 PMTU increase, so PMTU increase should be updated here.
1493 Since RFC 1981 doesn't include administrative MTU increase
1494 update PMTU increase is a MUST. (i.e. jumbo frame)
1497 If new MTU is less than route PMTU, this new MTU will be the
1498 lowest MTU in the path, update the route PMTU to reflect PMTU
1499 decreases; if new MTU is greater than route PMTU, and the
1500 old MTU is the lowest MTU in the path, update the route PMTU
1501 to reflect the increase. In this case if the other nodes' MTU
1502 also have the lowest MTU, TOO BIG MESSAGE will be lead to
1505 if (rt->rt6i_dev == arg->dev &&
1506 !dst_metric_locked(&rt->u.dst, RTAX_MTU) &&
1507 (dst_mtu(&rt->u.dst) > arg->mtu ||
1508 (dst_mtu(&rt->u.dst) < arg->mtu &&
1509 dst_mtu(&rt->u.dst) == idev->cnf.mtu6)))
1510 rt->u.dst.metrics[RTAX_MTU-1] = arg->mtu;
1511 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(arg->mtu);
1515 void rt6_mtu_change(struct net_device *dev, unsigned mtu)
1517 struct rt6_mtu_change_arg arg;
1521 read_lock_bh(&rt6_lock);
1522 fib6_clean_tree(&ip6_routing_table, rt6_mtu_change_route, 0, &arg);
1523 read_unlock_bh(&rt6_lock);
1526 static int inet6_rtm_to_rtmsg(struct rtmsg *r, struct rtattr **rta,
1527 struct in6_rtmsg *rtmsg)
1529 memset(rtmsg, 0, sizeof(*rtmsg));
1531 rtmsg->rtmsg_dst_len = r->rtm_dst_len;
1532 rtmsg->rtmsg_src_len = r->rtm_src_len;
1533 rtmsg->rtmsg_flags = RTF_UP;
1534 if (r->rtm_type == RTN_UNREACHABLE)
1535 rtmsg->rtmsg_flags |= RTF_REJECT;
1537 if (rta[RTA_GATEWAY-1]) {
1538 if (rta[RTA_GATEWAY-1]->rta_len != RTA_LENGTH(16))
1540 memcpy(&rtmsg->rtmsg_gateway, RTA_DATA(rta[RTA_GATEWAY-1]), 16);
1541 rtmsg->rtmsg_flags |= RTF_GATEWAY;
1543 if (rta[RTA_DST-1]) {
1544 if (RTA_PAYLOAD(rta[RTA_DST-1]) < ((r->rtm_dst_len+7)>>3))
1546 memcpy(&rtmsg->rtmsg_dst, RTA_DATA(rta[RTA_DST-1]), ((r->rtm_dst_len+7)>>3));
1548 if (rta[RTA_SRC-1]) {
1549 if (RTA_PAYLOAD(rta[RTA_SRC-1]) < ((r->rtm_src_len+7)>>3))
1551 memcpy(&rtmsg->rtmsg_src, RTA_DATA(rta[RTA_SRC-1]), ((r->rtm_src_len+7)>>3));
1553 if (rta[RTA_OIF-1]) {
1554 if (rta[RTA_OIF-1]->rta_len != RTA_LENGTH(sizeof(int)))
1556 memcpy(&rtmsg->rtmsg_ifindex, RTA_DATA(rta[RTA_OIF-1]), sizeof(int));
1558 if (rta[RTA_PRIORITY-1]) {
1559 if (rta[RTA_PRIORITY-1]->rta_len != RTA_LENGTH(4))
1561 memcpy(&rtmsg->rtmsg_metric, RTA_DATA(rta[RTA_PRIORITY-1]), 4);
1566 int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1568 struct rtmsg *r = NLMSG_DATA(nlh);
1569 struct in6_rtmsg rtmsg;
1571 if (inet6_rtm_to_rtmsg(r, arg, &rtmsg))
1573 return ip6_route_del(&rtmsg, nlh, arg, &NETLINK_CB(skb));
1576 int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1578 struct rtmsg *r = NLMSG_DATA(nlh);
1579 struct in6_rtmsg rtmsg;
1581 if (inet6_rtm_to_rtmsg(r, arg, &rtmsg))
1583 return ip6_route_add(&rtmsg, nlh, arg, &NETLINK_CB(skb));
1586 struct rt6_rtnl_dump_arg
1588 struct sk_buff *skb;
1589 struct netlink_callback *cb;
1592 static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt,
1593 struct in6_addr *dst, struct in6_addr *src,
1594 int iif, int type, u32 pid, u32 seq,
1595 int prefix, unsigned int flags)
1598 struct nlmsghdr *nlh;
1599 unsigned char *b = skb->tail;
1600 struct rta_cacheinfo ci;
1602 if (prefix) { /* user wants prefix routes only */
1603 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
1604 /* success since this is not a prefix route */
1609 nlh = NLMSG_NEW(skb, pid, seq, type, sizeof(*rtm), flags);
1610 rtm = NLMSG_DATA(nlh);
1611 rtm->rtm_family = AF_INET6;
1612 rtm->rtm_dst_len = rt->rt6i_dst.plen;
1613 rtm->rtm_src_len = rt->rt6i_src.plen;
1615 rtm->rtm_table = RT_TABLE_MAIN;
1616 if (rt->rt6i_flags&RTF_REJECT)
1617 rtm->rtm_type = RTN_UNREACHABLE;
1618 else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK))
1619 rtm->rtm_type = RTN_LOCAL;
1621 rtm->rtm_type = RTN_UNICAST;
1623 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
1624 rtm->rtm_protocol = rt->rt6i_protocol;
1625 if (rt->rt6i_flags&RTF_DYNAMIC)
1626 rtm->rtm_protocol = RTPROT_REDIRECT;
1627 else if (rt->rt6i_flags & RTF_ADDRCONF)
1628 rtm->rtm_protocol = RTPROT_KERNEL;
1629 else if (rt->rt6i_flags&RTF_DEFAULT)
1630 rtm->rtm_protocol = RTPROT_RA;
1632 if (rt->rt6i_flags&RTF_CACHE)
1633 rtm->rtm_flags |= RTM_F_CLONED;
1636 RTA_PUT(skb, RTA_DST, 16, dst);
1637 rtm->rtm_dst_len = 128;
1638 } else if (rtm->rtm_dst_len)
1639 RTA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr);
1640 #ifdef CONFIG_IPV6_SUBTREES
1642 RTA_PUT(skb, RTA_SRC, 16, src);
1643 rtm->rtm_src_len = 128;
1644 } else if (rtm->rtm_src_len)
1645 RTA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr);
1648 RTA_PUT(skb, RTA_IIF, 4, &iif);
1650 struct in6_addr saddr_buf;
1651 if (ipv6_get_saddr(&rt->u.dst, dst, &saddr_buf) == 0)
1652 RTA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
1654 if (rtnetlink_put_metrics(skb, rt->u.dst.metrics) < 0)
1655 goto rtattr_failure;
1656 if (rt->u.dst.neighbour)
1657 RTA_PUT(skb, RTA_GATEWAY, 16, &rt->u.dst.neighbour->primary_key);
1659 RTA_PUT(skb, RTA_OIF, sizeof(int), &rt->rt6i_dev->ifindex);
1660 RTA_PUT(skb, RTA_PRIORITY, 4, &rt->rt6i_metric);
1661 ci.rta_lastuse = jiffies_to_clock_t(jiffies - rt->u.dst.lastuse);
1662 if (rt->rt6i_expires)
1663 ci.rta_expires = jiffies_to_clock_t(rt->rt6i_expires - jiffies);
1666 ci.rta_used = rt->u.dst.__use;
1667 ci.rta_clntref = atomic_read(&rt->u.dst.__refcnt);
1668 ci.rta_error = rt->u.dst.error;
1672 RTA_PUT(skb, RTA_CACHEINFO, sizeof(ci), &ci);
1673 nlh->nlmsg_len = skb->tail - b;
1678 skb_trim(skb, b - skb->data);
1682 static int rt6_dump_route(struct rt6_info *rt, void *p_arg)
1684 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
1687 if (arg->cb->nlh->nlmsg_len >= NLMSG_LENGTH(sizeof(struct rtmsg))) {
1688 struct rtmsg *rtm = NLMSG_DATA(arg->cb->nlh);
1689 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
1693 return rt6_fill_node(arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
1694 NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
1695 prefix, NLM_F_MULTI);
1698 static int fib6_dump_node(struct fib6_walker_t *w)
1701 struct rt6_info *rt;
1703 for (rt = w->leaf; rt; rt = rt->u.next) {
1704 res = rt6_dump_route(rt, w->args);
1706 /* Frame is full, suspend walking */
1716 static void fib6_dump_end(struct netlink_callback *cb)
1718 struct fib6_walker_t *w = (void*)cb->args[0];
1722 fib6_walker_unlink(w);
1725 cb->done = (void*)cb->args[1];
1729 static int fib6_dump_done(struct netlink_callback *cb)
1732 return cb->done ? cb->done(cb) : 0;
1735 int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
1737 struct rt6_rtnl_dump_arg arg;
1738 struct fib6_walker_t *w;
1744 w = (void*)cb->args[0];
1748 * 1. hook callback destructor.
1750 cb->args[1] = (long)cb->done;
1751 cb->done = fib6_dump_done;
1754 * 2. allocate and initialize walker.
1756 w = kmalloc(sizeof(*w), GFP_ATOMIC);
1759 RT6_TRACE("dump<%p", w);
1760 memset(w, 0, sizeof(*w));
1761 w->root = &ip6_routing_table;
1762 w->func = fib6_dump_node;
1764 cb->args[0] = (long)w;
1765 read_lock_bh(&rt6_lock);
1767 read_unlock_bh(&rt6_lock);
1770 read_lock_bh(&rt6_lock);
1771 res = fib6_walk_continue(w);
1772 read_unlock_bh(&rt6_lock);
1775 if (res <= 0 && skb->len == 0)
1776 RT6_TRACE("%p>dump end\n", w);
1778 res = res < 0 ? res : skb->len;
1779 /* res < 0 is an error. (really, impossible)
1780 res == 0 means that dump is complete, but skb still can contain data.
1781 res > 0 dump is not complete, but frame is full.
1783 /* Destroy walker, if dump of this table is complete. */
1789 int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
1791 struct rtattr **rta = arg;
1794 struct sk_buff *skb;
1796 struct rt6_info *rt;
1798 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
1802 /* Reserve room for dummy headers, this skb can pass
1803 through good chunk of routing engine.
1805 skb->mac.raw = skb->data;
1806 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
1808 memset(&fl, 0, sizeof(fl));
1810 ipv6_addr_copy(&fl.fl6_src,
1811 (struct in6_addr*)RTA_DATA(rta[RTA_SRC-1]));
1813 ipv6_addr_copy(&fl.fl6_dst,
1814 (struct in6_addr*)RTA_DATA(rta[RTA_DST-1]));
1817 memcpy(&iif, RTA_DATA(rta[RTA_IIF-1]), sizeof(int));
1820 struct net_device *dev;
1821 dev = __dev_get_by_index(iif);
1830 memcpy(&fl.oif, RTA_DATA(rta[RTA_OIF-1]), sizeof(int));
1832 rt = (struct rt6_info*)ip6_route_output(NULL, &fl);
1834 skb->dst = &rt->u.dst;
1836 NETLINK_CB(skb).dst_pid = NETLINK_CB(in_skb).pid;
1837 err = rt6_fill_node(skb, rt,
1838 &fl.fl6_dst, &fl.fl6_src,
1840 RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
1841 nlh->nlmsg_seq, 0, 0);
1847 err = netlink_unicast(rtnl, skb, NETLINK_CB(in_skb).pid, MSG_DONTWAIT);
1857 void inet6_rt_notify(int event, struct rt6_info *rt, struct nlmsghdr *nlh,
1858 struct netlink_skb_parms *req)
1860 struct sk_buff *skb;
1861 int size = NLMSG_SPACE(sizeof(struct rtmsg)+256);
1862 u32 pid = current->pid;
1868 seq = nlh->nlmsg_seq;
1870 skb = alloc_skb(size, gfp_any());
1872 netlink_set_err(rtnl, 0, RTNLGRP_IPV6_ROUTE, ENOBUFS);
1875 if (rt6_fill_node(skb, rt, NULL, NULL, 0, event, pid, seq, 0, 0) < 0) {
1877 netlink_set_err(rtnl, 0, RTNLGRP_IPV6_ROUTE, EINVAL);
1880 NETLINK_CB(skb).dst_group = RTNLGRP_IPV6_ROUTE;
1881 netlink_broadcast(rtnl, skb, 0, RTNLGRP_IPV6_ROUTE, gfp_any());
1888 #ifdef CONFIG_PROC_FS
1890 #define RT6_INFO_LEN (32 + 4 + 32 + 4 + 32 + 40 + 5 + 1)
1901 static int rt6_info_route(struct rt6_info *rt, void *p_arg)
1903 struct rt6_proc_arg *arg = (struct rt6_proc_arg *) p_arg;
1906 if (arg->skip < arg->offset / RT6_INFO_LEN) {
1911 if (arg->len >= arg->length)
1914 for (i=0; i<16; i++) {
1915 sprintf(arg->buffer + arg->len, "%02x",
1916 rt->rt6i_dst.addr.s6_addr[i]);
1919 arg->len += sprintf(arg->buffer + arg->len, " %02x ",
1922 #ifdef CONFIG_IPV6_SUBTREES
1923 for (i=0; i<16; i++) {
1924 sprintf(arg->buffer + arg->len, "%02x",
1925 rt->rt6i_src.addr.s6_addr[i]);
1928 arg->len += sprintf(arg->buffer + arg->len, " %02x ",
1931 sprintf(arg->buffer + arg->len,
1932 "00000000000000000000000000000000 00 ");
1936 if (rt->rt6i_nexthop) {
1937 for (i=0; i<16; i++) {
1938 sprintf(arg->buffer + arg->len, "%02x",
1939 rt->rt6i_nexthop->primary_key[i]);
1943 sprintf(arg->buffer + arg->len,
1944 "00000000000000000000000000000000");
1947 arg->len += sprintf(arg->buffer + arg->len,
1948 " %08x %08x %08x %08x %8s\n",
1949 rt->rt6i_metric, atomic_read(&rt->u.dst.__refcnt),
1950 rt->u.dst.__use, rt->rt6i_flags,
1951 rt->rt6i_dev ? rt->rt6i_dev->name : "");
1955 static int rt6_proc_info(char *buffer, char **start, off_t offset, int length)
1957 struct rt6_proc_arg arg;
1958 arg.buffer = buffer;
1959 arg.offset = offset;
1960 arg.length = length;
1964 read_lock_bh(&rt6_lock);
1965 fib6_clean_tree(&ip6_routing_table, rt6_info_route, 0, &arg);
1966 read_unlock_bh(&rt6_lock);
1970 *start += offset % RT6_INFO_LEN;
1972 arg.len -= offset % RT6_INFO_LEN;
1974 if (arg.len > length)
1982 static int rt6_stats_seq_show(struct seq_file *seq, void *v)
1984 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
1985 rt6_stats.fib_nodes, rt6_stats.fib_route_nodes,
1986 rt6_stats.fib_rt_alloc, rt6_stats.fib_rt_entries,
1987 rt6_stats.fib_rt_cache,
1988 atomic_read(&ip6_dst_ops.entries),
1989 rt6_stats.fib_discarded_routes);
1994 static int rt6_stats_seq_open(struct inode *inode, struct file *file)
1996 return single_open(file, rt6_stats_seq_show, NULL);
1999 static struct file_operations rt6_stats_seq_fops = {
2000 .owner = THIS_MODULE,
2001 .open = rt6_stats_seq_open,
2003 .llseek = seq_lseek,
2004 .release = single_release,
2006 #endif /* CONFIG_PROC_FS */
2008 #ifdef CONFIG_SYSCTL
2010 static int flush_delay;
2013 int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write, struct file * filp,
2014 void __user *buffer, size_t *lenp, loff_t *ppos)
2017 proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
2018 fib6_run_gc(flush_delay <= 0 ? ~0UL : (unsigned long)flush_delay);
2024 ctl_table ipv6_route_table[] = {
2026 .ctl_name = NET_IPV6_ROUTE_FLUSH,
2027 .procname = "flush",
2028 .data = &flush_delay,
2029 .maxlen = sizeof(int),
2031 .proc_handler = &ipv6_sysctl_rtcache_flush
2034 .ctl_name = NET_IPV6_ROUTE_GC_THRESH,
2035 .procname = "gc_thresh",
2036 .data = &ip6_dst_ops.gc_thresh,
2037 .maxlen = sizeof(int),
2039 .proc_handler = &proc_dointvec,
2042 .ctl_name = NET_IPV6_ROUTE_MAX_SIZE,
2043 .procname = "max_size",
2044 .data = &ip6_rt_max_size,
2045 .maxlen = sizeof(int),
2047 .proc_handler = &proc_dointvec,
2050 .ctl_name = NET_IPV6_ROUTE_GC_MIN_INTERVAL,
2051 .procname = "gc_min_interval",
2052 .data = &ip6_rt_gc_min_interval,
2053 .maxlen = sizeof(int),
2055 .proc_handler = &proc_dointvec_jiffies,
2056 .strategy = &sysctl_jiffies,
2059 .ctl_name = NET_IPV6_ROUTE_GC_TIMEOUT,
2060 .procname = "gc_timeout",
2061 .data = &ip6_rt_gc_timeout,
2062 .maxlen = sizeof(int),
2064 .proc_handler = &proc_dointvec_jiffies,
2065 .strategy = &sysctl_jiffies,
2068 .ctl_name = NET_IPV6_ROUTE_GC_INTERVAL,
2069 .procname = "gc_interval",
2070 .data = &ip6_rt_gc_interval,
2071 .maxlen = sizeof(int),
2073 .proc_handler = &proc_dointvec_jiffies,
2074 .strategy = &sysctl_jiffies,
2077 .ctl_name = NET_IPV6_ROUTE_GC_ELASTICITY,
2078 .procname = "gc_elasticity",
2079 .data = &ip6_rt_gc_elasticity,
2080 .maxlen = sizeof(int),
2082 .proc_handler = &proc_dointvec_jiffies,
2083 .strategy = &sysctl_jiffies,
2086 .ctl_name = NET_IPV6_ROUTE_MTU_EXPIRES,
2087 .procname = "mtu_expires",
2088 .data = &ip6_rt_mtu_expires,
2089 .maxlen = sizeof(int),
2091 .proc_handler = &proc_dointvec_jiffies,
2092 .strategy = &sysctl_jiffies,
2095 .ctl_name = NET_IPV6_ROUTE_MIN_ADVMSS,
2096 .procname = "min_adv_mss",
2097 .data = &ip6_rt_min_advmss,
2098 .maxlen = sizeof(int),
2100 .proc_handler = &proc_dointvec_jiffies,
2101 .strategy = &sysctl_jiffies,
2104 .ctl_name = NET_IPV6_ROUTE_GC_MIN_INTERVAL_MS,
2105 .procname = "gc_min_interval_ms",
2106 .data = &ip6_rt_gc_min_interval,
2107 .maxlen = sizeof(int),
2109 .proc_handler = &proc_dointvec_ms_jiffies,
2110 .strategy = &sysctl_ms_jiffies,
2117 void __init ip6_route_init(void)
2119 struct proc_dir_entry *p;
2121 ip6_dst_ops.kmem_cachep = kmem_cache_create("ip6_dst_cache",
2122 sizeof(struct rt6_info),
2123 0, SLAB_HWCACHE_ALIGN,
2125 if (!ip6_dst_ops.kmem_cachep)
2126 panic("cannot create ip6_dst_cache");
2129 #ifdef CONFIG_PROC_FS
2130 p = proc_net_create("ipv6_route", 0, rt6_proc_info);
2132 p->owner = THIS_MODULE;
2134 proc_net_fops_create("rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2141 void ip6_route_cleanup(void)
2143 #ifdef CONFIG_PROC_FS
2144 proc_net_remove("ipv6_route");
2145 proc_net_remove("rt6_stats");
2152 kmem_cache_destroy(ip6_dst_ops.kmem_cachep);