2 * Linux INET6 implementation
6 * Pedro Roque <roque@di.fc.ul.pt>
8 * $Id: route.c,v 1.56 2001/10/31 21:55:55 davem Exp $
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License
12 * as published by the Free Software Foundation; either version
13 * 2 of the License, or (at your option) any later version.
18 * YOSHIFUJI Hideaki @USAGI
19 * reworked default router selection.
20 * - respect outgoing interface
21 * - select from (probably) reachable routers (i.e.
22 * routers in REACHABLE, STALE, DELAY or PROBE states).
23 * - always select the same router if it is (probably)
24 * reachable. otherwise, round-robin the list.
27 #include <linux/capability.h>
28 #include <linux/config.h>
29 #include <linux/errno.h>
30 #include <linux/types.h>
31 #include <linux/times.h>
32 #include <linux/socket.h>
33 #include <linux/sockios.h>
34 #include <linux/net.h>
35 #include <linux/route.h>
36 #include <linux/netdevice.h>
37 #include <linux/in6.h>
38 #include <linux/init.h>
39 #include <linux/netlink.h>
40 #include <linux/if_arp.h>
43 #include <linux/proc_fs.h>
44 #include <linux/seq_file.h>
49 #include <net/ip6_fib.h>
50 #include <net/ip6_route.h>
51 #include <net/ndisc.h>
52 #include <net/addrconf.h>
54 #include <linux/rtnetlink.h>
58 #include <asm/uaccess.h>
61 #include <linux/sysctl.h>
64 /* Set to 3 to get tracing. */
68 #define RDBG(x) printk x
69 #define RT6_TRACE(x...) printk(KERN_DEBUG x)
72 #define RT6_TRACE(x...) do { ; } while (0)
76 static int ip6_rt_max_size = 4096;
77 static int ip6_rt_gc_min_interval = HZ / 2;
78 static int ip6_rt_gc_timeout = 60*HZ;
79 int ip6_rt_gc_interval = 30*HZ;
80 static int ip6_rt_gc_elasticity = 9;
81 static int ip6_rt_mtu_expires = 10*60*HZ;
82 static int ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
84 static struct rt6_info * ip6_rt_copy(struct rt6_info *ort);
85 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
86 static struct dst_entry *ip6_negative_advice(struct dst_entry *);
87 static void ip6_dst_destroy(struct dst_entry *);
88 static void ip6_dst_ifdown(struct dst_entry *,
89 struct net_device *dev, int how);
90 static int ip6_dst_gc(void);
92 static int ip6_pkt_discard(struct sk_buff *skb);
93 static int ip6_pkt_discard_out(struct sk_buff *skb);
94 static void ip6_link_failure(struct sk_buff *skb);
95 static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
97 static struct dst_ops ip6_dst_ops = {
99 .protocol = __constant_htons(ETH_P_IPV6),
102 .check = ip6_dst_check,
103 .destroy = ip6_dst_destroy,
104 .ifdown = ip6_dst_ifdown,
105 .negative_advice = ip6_negative_advice,
106 .link_failure = ip6_link_failure,
107 .update_pmtu = ip6_rt_update_pmtu,
108 .entry_size = sizeof(struct rt6_info),
111 struct rt6_info ip6_null_entry = {
114 .__refcnt = ATOMIC_INIT(1),
116 .dev = &loopback_dev,
118 .error = -ENETUNREACH,
119 .metrics = { [RTAX_HOPLIMIT - 1] = 255, },
120 .input = ip6_pkt_discard,
121 .output = ip6_pkt_discard_out,
123 .path = (struct dst_entry*)&ip6_null_entry,
126 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
127 .rt6i_metric = ~(u32) 0,
128 .rt6i_ref = ATOMIC_INIT(1),
131 struct fib6_node ip6_routing_table = {
132 .leaf = &ip6_null_entry,
133 .fn_flags = RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO,
136 /* Protects all the ip6 fib */
138 DEFINE_RWLOCK(rt6_lock);
141 /* allocate dst with ip6_dst_ops */
142 static __inline__ struct rt6_info *ip6_dst_alloc(void)
144 return (struct rt6_info *)dst_alloc(&ip6_dst_ops);
147 static void ip6_dst_destroy(struct dst_entry *dst)
149 struct rt6_info *rt = (struct rt6_info *)dst;
150 struct inet6_dev *idev = rt->rt6i_idev;
153 rt->rt6i_idev = NULL;
158 static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
161 struct rt6_info *rt = (struct rt6_info *)dst;
162 struct inet6_dev *idev = rt->rt6i_idev;
164 if (dev != &loopback_dev && idev != NULL && idev->dev == dev) {
165 struct inet6_dev *loopback_idev = in6_dev_get(&loopback_dev);
166 if (loopback_idev != NULL) {
167 rt->rt6i_idev = loopback_idev;
173 static __inline__ int rt6_check_expired(const struct rt6_info *rt)
175 return (rt->rt6i_flags & RTF_EXPIRES &&
176 time_after(jiffies, rt->rt6i_expires));
180 * Route lookup. Any rt6_lock is implied.
183 static __inline__ struct rt6_info *rt6_device_match(struct rt6_info *rt,
187 struct rt6_info *local = NULL;
188 struct rt6_info *sprt;
191 for (sprt = rt; sprt; sprt = sprt->u.next) {
192 struct net_device *dev = sprt->rt6i_dev;
193 if (dev->ifindex == oif)
195 if (dev->flags & IFF_LOOPBACK) {
196 if (sprt->rt6i_idev == NULL ||
197 sprt->rt6i_idev->dev->ifindex != oif) {
200 if (local && (!oif ||
201 local->rt6i_idev->dev->ifindex == oif))
212 return &ip6_null_entry;
218 * pointer to the last default router chosen. BH is disabled locally.
220 static struct rt6_info *rt6_dflt_pointer;
221 static DEFINE_SPINLOCK(rt6_dflt_lock);
223 void rt6_reset_dflt_pointer(struct rt6_info *rt)
225 spin_lock_bh(&rt6_dflt_lock);
226 if (rt == NULL || rt == rt6_dflt_pointer) {
227 RT6_TRACE("reset default router: %p->NULL\n", rt6_dflt_pointer);
228 rt6_dflt_pointer = NULL;
230 spin_unlock_bh(&rt6_dflt_lock);
233 /* Default Router Selection (RFC 2461 6.3.6) */
234 static struct rt6_info *rt6_best_dflt(struct rt6_info *rt, int oif)
236 struct rt6_info *match = NULL;
237 struct rt6_info *sprt;
240 for (sprt = rt; sprt; sprt = sprt->u.next) {
241 struct neighbour *neigh;
246 sprt->rt6i_dev->ifindex == oif))
249 if (rt6_check_expired(sprt))
252 if (sprt == rt6_dflt_pointer)
255 if ((neigh = sprt->rt6i_nexthop) != NULL) {
256 read_lock_bh(&neigh->lock);
257 switch (neigh->nud_state) {
275 read_unlock_bh(&neigh->lock);
278 read_unlock_bh(&neigh->lock);
283 if (m > mpri || m >= 12) {
287 /* we choose the last default router if it
288 * is in (probably) reachable state.
289 * If route changed, we should do pmtu
290 * discovery. --yoshfuji
297 spin_lock(&rt6_dflt_lock);
300 * No default routers are known to be reachable.
303 if (rt6_dflt_pointer) {
304 for (sprt = rt6_dflt_pointer->u.next;
305 sprt; sprt = sprt->u.next) {
306 if (sprt->u.dst.obsolete <= 0 &&
307 sprt->u.dst.error == 0 &&
308 !rt6_check_expired(sprt)) {
315 sprt = sprt->u.next) {
316 if (sprt->u.dst.obsolete <= 0 &&
317 sprt->u.dst.error == 0 &&
318 !rt6_check_expired(sprt)) {
322 if (sprt == rt6_dflt_pointer)
329 if (rt6_dflt_pointer != match)
330 RT6_TRACE("changed default router: %p->%p\n",
331 rt6_dflt_pointer, match);
332 rt6_dflt_pointer = match;
334 spin_unlock(&rt6_dflt_lock);
338 * Last Resort: if no default routers found,
339 * use addrconf default route.
340 * We don't record this route.
342 for (sprt = ip6_routing_table.leaf;
343 sprt; sprt = sprt->u.next) {
344 if (!rt6_check_expired(sprt) &&
345 (sprt->rt6i_flags & RTF_DEFAULT) &&
348 sprt->rt6i_dev->ifindex == oif))) {
354 /* no default route. give up. */
355 match = &ip6_null_entry;
362 struct rt6_info *rt6_lookup(struct in6_addr *daddr, struct in6_addr *saddr,
365 struct fib6_node *fn;
368 read_lock_bh(&rt6_lock);
369 fn = fib6_lookup(&ip6_routing_table, daddr, saddr);
370 rt = rt6_device_match(fn->leaf, oif, strict);
371 dst_hold(&rt->u.dst);
373 read_unlock_bh(&rt6_lock);
375 rt->u.dst.lastuse = jiffies;
376 if (rt->u.dst.error == 0)
378 dst_release(&rt->u.dst);
382 /* ip6_ins_rt is called with FREE rt6_lock.
383 It takes new route entry, the addition fails by any reason the
384 route is freed. In any case, if caller does not hold it, it may
388 int ip6_ins_rt(struct rt6_info *rt, struct nlmsghdr *nlh,
389 void *_rtattr, struct netlink_skb_parms *req)
393 write_lock_bh(&rt6_lock);
394 err = fib6_add(&ip6_routing_table, rt, nlh, _rtattr, req);
395 write_unlock_bh(&rt6_lock);
400 /* No rt6_lock! If COW failed, the function returns dead route entry
401 with dst->error set to errno value.
404 static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, struct in6_addr *daddr,
405 struct in6_addr *saddr)
413 rt = ip6_rt_copy(ort);
416 if (!(rt->rt6i_flags&RTF_GATEWAY)) {
417 if (rt->rt6i_dst.plen != 128 &&
418 ipv6_addr_equal(&rt->rt6i_dst.addr, daddr))
419 rt->rt6i_flags |= RTF_ANYCAST;
420 ipv6_addr_copy(&rt->rt6i_gateway, daddr);
423 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
424 rt->rt6i_dst.plen = 128;
425 rt->rt6i_flags |= RTF_CACHE;
426 rt->u.dst.flags |= DST_HOST;
428 #ifdef CONFIG_IPV6_SUBTREES
429 if (rt->rt6i_src.plen && saddr) {
430 ipv6_addr_copy(&rt->rt6i_src.addr, saddr);
431 rt->rt6i_src.plen = 128;
435 rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
442 static struct rt6_info *rt6_cow(struct rt6_info *ort, struct in6_addr *daddr,
443 struct in6_addr *saddr, struct netlink_skb_parms *req)
445 struct rt6_info *rt = rt6_alloc_cow(ort, daddr, saddr);
449 dst_hold(&ip6_null_entry.u.dst);
450 return &ip6_null_entry;
453 dst_hold(&rt->u.dst);
455 err = ip6_ins_rt(rt, NULL, NULL, req);
457 rt->u.dst.error = err;
462 static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort, struct in6_addr *daddr)
464 struct rt6_info *rt = ip6_rt_copy(ort);
466 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
467 rt->rt6i_dst.plen = 128;
468 rt->rt6i_flags |= RTF_CACHE;
469 if (rt->rt6i_flags & RTF_REJECT)
470 rt->u.dst.error = ort->u.dst.error;
471 rt->u.dst.flags |= DST_HOST;
472 rt->rt6i_nexthop = neigh_clone(ort->rt6i_nexthop);
477 #define BACKTRACK() \
478 if (rt == &ip6_null_entry && strict) { \
479 while ((fn = fn->parent) != NULL) { \
480 if (fn->fn_flags & RTN_ROOT) { \
481 dst_hold(&rt->u.dst); \
484 if (fn->fn_flags & RTN_RTINFO) \
490 void ip6_route_input(struct sk_buff *skb)
492 struct fib6_node *fn;
497 strict = ipv6_addr_type(&skb->nh.ipv6h->daddr) & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL);
500 read_lock_bh(&rt6_lock);
502 fn = fib6_lookup(&ip6_routing_table, &skb->nh.ipv6h->daddr,
503 &skb->nh.ipv6h->saddr);
508 if ((rt->rt6i_flags & RTF_CACHE)) {
509 rt = rt6_device_match(rt, skb->dev->ifindex, strict);
511 dst_hold(&rt->u.dst);
515 rt = rt6_device_match(rt, skb->dev->ifindex, strict);
518 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) {
519 struct rt6_info *nrt;
520 dst_hold(&rt->u.dst);
521 read_unlock_bh(&rt6_lock);
523 nrt = rt6_cow(rt, &skb->nh.ipv6h->daddr,
524 &skb->nh.ipv6h->saddr,
527 dst_release(&rt->u.dst);
530 if (rt->u.dst.error != -EEXIST || --attempts <= 0)
533 /* Race condition! In the gap, when rt6_lock was
534 released someone could insert this route. Relookup.
536 dst_release(&rt->u.dst);
539 dst_hold(&rt->u.dst);
542 read_unlock_bh(&rt6_lock);
544 rt->u.dst.lastuse = jiffies;
546 skb->dst = (struct dst_entry *) rt;
549 struct dst_entry * ip6_route_output(struct sock *sk, struct flowi *fl)
551 struct fib6_node *fn;
556 strict = ipv6_addr_type(&fl->fl6_dst) & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL);
559 read_lock_bh(&rt6_lock);
561 fn = fib6_lookup(&ip6_routing_table, &fl->fl6_dst, &fl->fl6_src);
566 if ((rt->rt6i_flags & RTF_CACHE)) {
567 rt = rt6_device_match(rt, fl->oif, strict);
569 dst_hold(&rt->u.dst);
572 if (rt->rt6i_flags & RTF_DEFAULT) {
573 if (rt->rt6i_metric >= IP6_RT_PRIO_ADDRCONF)
574 rt = rt6_best_dflt(rt, fl->oif);
576 rt = rt6_device_match(rt, fl->oif, strict);
580 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) {
581 struct rt6_info *nrt;
582 dst_hold(&rt->u.dst);
583 read_unlock_bh(&rt6_lock);
585 nrt = rt6_cow(rt, &fl->fl6_dst, &fl->fl6_src, NULL);
587 dst_release(&rt->u.dst);
590 if (rt->u.dst.error != -EEXIST || --attempts <= 0)
593 /* Race condition! In the gap, when rt6_lock was
594 released someone could insert this route. Relookup.
596 dst_release(&rt->u.dst);
599 dst_hold(&rt->u.dst);
602 read_unlock_bh(&rt6_lock);
604 rt->u.dst.lastuse = jiffies;
611 * Destination cache support functions
614 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
618 rt = (struct rt6_info *) dst;
620 if (rt && rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie))
626 static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
628 struct rt6_info *rt = (struct rt6_info *) dst;
631 if (rt->rt6i_flags & RTF_CACHE)
632 ip6_del_rt(rt, NULL, NULL, NULL);
639 static void ip6_link_failure(struct sk_buff *skb)
643 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0, skb->dev);
645 rt = (struct rt6_info *) skb->dst;
647 if (rt->rt6i_flags&RTF_CACHE) {
648 dst_set_expires(&rt->u.dst, 0);
649 rt->rt6i_flags |= RTF_EXPIRES;
650 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
651 rt->rt6i_node->fn_sernum = -1;
655 static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
657 struct rt6_info *rt6 = (struct rt6_info*)dst;
659 if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
660 rt6->rt6i_flags |= RTF_MODIFIED;
661 if (mtu < IPV6_MIN_MTU) {
663 dst->metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
665 dst->metrics[RTAX_MTU-1] = mtu;
669 /* Protected by rt6_lock. */
670 static struct dst_entry *ndisc_dst_gc_list;
671 static int ipv6_get_mtu(struct net_device *dev);
673 static inline unsigned int ipv6_advmss(unsigned int mtu)
675 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
677 if (mtu < ip6_rt_min_advmss)
678 mtu = ip6_rt_min_advmss;
681 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
682 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
683 * IPV6_MAXPLEN is also valid and means: "any MSS,
684 * rely only on pmtu discovery"
686 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
691 struct dst_entry *ndisc_dst_alloc(struct net_device *dev,
692 struct neighbour *neigh,
693 struct in6_addr *addr,
694 int (*output)(struct sk_buff *))
697 struct inet6_dev *idev = in6_dev_get(dev);
699 if (unlikely(idev == NULL))
702 rt = ip6_dst_alloc();
703 if (unlikely(rt == NULL)) {
712 neigh = ndisc_get_neigh(dev, addr);
715 rt->rt6i_idev = idev;
716 rt->rt6i_nexthop = neigh;
717 atomic_set(&rt->u.dst.__refcnt, 1);
718 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = 255;
719 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
720 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
721 rt->u.dst.output = output;
723 #if 0 /* there's no chance to use these for ndisc */
724 rt->u.dst.flags = ipv6_addr_type(addr) & IPV6_ADDR_UNICAST
727 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
728 rt->rt6i_dst.plen = 128;
731 write_lock_bh(&rt6_lock);
732 rt->u.dst.next = ndisc_dst_gc_list;
733 ndisc_dst_gc_list = &rt->u.dst;
734 write_unlock_bh(&rt6_lock);
736 fib6_force_start_gc();
739 return (struct dst_entry *)rt;
742 int ndisc_dst_gc(int *more)
744 struct dst_entry *dst, *next, **pprev;
748 pprev = &ndisc_dst_gc_list;
750 while ((dst = *pprev) != NULL) {
751 if (!atomic_read(&dst->__refcnt)) {
764 static int ip6_dst_gc(void)
766 static unsigned expire = 30*HZ;
767 static unsigned long last_gc;
768 unsigned long now = jiffies;
770 if (time_after(last_gc + ip6_rt_gc_min_interval, now) &&
771 atomic_read(&ip6_dst_ops.entries) <= ip6_rt_max_size)
777 if (atomic_read(&ip6_dst_ops.entries) < ip6_dst_ops.gc_thresh)
778 expire = ip6_rt_gc_timeout>>1;
781 expire -= expire>>ip6_rt_gc_elasticity;
782 return (atomic_read(&ip6_dst_ops.entries) > ip6_rt_max_size);
785 /* Clean host part of a prefix. Not necessary in radix tree,
786 but results in cleaner routing tables.
788 Remove it only when all the things will work!
791 static int ipv6_get_mtu(struct net_device *dev)
793 int mtu = IPV6_MIN_MTU;
794 struct inet6_dev *idev;
796 idev = in6_dev_get(dev);
798 mtu = idev->cnf.mtu6;
804 int ipv6_get_hoplimit(struct net_device *dev)
806 int hoplimit = ipv6_devconf.hop_limit;
807 struct inet6_dev *idev;
809 idev = in6_dev_get(dev);
811 hoplimit = idev->cnf.hop_limit;
821 int ip6_route_add(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh,
822 void *_rtattr, struct netlink_skb_parms *req)
827 struct rt6_info *rt = NULL;
828 struct net_device *dev = NULL;
829 struct inet6_dev *idev = NULL;
832 rta = (struct rtattr **) _rtattr;
834 if (rtmsg->rtmsg_dst_len > 128 || rtmsg->rtmsg_src_len > 128)
836 #ifndef CONFIG_IPV6_SUBTREES
837 if (rtmsg->rtmsg_src_len)
840 if (rtmsg->rtmsg_ifindex) {
842 dev = dev_get_by_index(rtmsg->rtmsg_ifindex);
845 idev = in6_dev_get(dev);
850 if (rtmsg->rtmsg_metric == 0)
851 rtmsg->rtmsg_metric = IP6_RT_PRIO_USER;
853 rt = ip6_dst_alloc();
860 rt->u.dst.obsolete = -1;
861 rt->rt6i_expires = jiffies + clock_t_to_jiffies(rtmsg->rtmsg_info);
862 if (nlh && (r = NLMSG_DATA(nlh))) {
863 rt->rt6i_protocol = r->rtm_protocol;
865 rt->rt6i_protocol = RTPROT_BOOT;
868 addr_type = ipv6_addr_type(&rtmsg->rtmsg_dst);
870 if (addr_type & IPV6_ADDR_MULTICAST)
871 rt->u.dst.input = ip6_mc_input;
873 rt->u.dst.input = ip6_forward;
875 rt->u.dst.output = ip6_output;
877 ipv6_addr_prefix(&rt->rt6i_dst.addr,
878 &rtmsg->rtmsg_dst, rtmsg->rtmsg_dst_len);
879 rt->rt6i_dst.plen = rtmsg->rtmsg_dst_len;
880 if (rt->rt6i_dst.plen == 128)
881 rt->u.dst.flags = DST_HOST;
883 #ifdef CONFIG_IPV6_SUBTREES
884 ipv6_addr_prefix(&rt->rt6i_src.addr,
885 &rtmsg->rtmsg_src, rtmsg->rtmsg_src_len);
886 rt->rt6i_src.plen = rtmsg->rtmsg_src_len;
889 rt->rt6i_metric = rtmsg->rtmsg_metric;
891 /* We cannot add true routes via loopback here,
892 they would result in kernel looping; promote them to reject routes
894 if ((rtmsg->rtmsg_flags&RTF_REJECT) ||
895 (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK))) {
896 /* hold loopback dev/idev if we haven't done so. */
897 if (dev != &loopback_dev) {
904 idev = in6_dev_get(dev);
910 rt->u.dst.output = ip6_pkt_discard_out;
911 rt->u.dst.input = ip6_pkt_discard;
912 rt->u.dst.error = -ENETUNREACH;
913 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
917 if (rtmsg->rtmsg_flags & RTF_GATEWAY) {
918 struct in6_addr *gw_addr;
921 gw_addr = &rtmsg->rtmsg_gateway;
922 ipv6_addr_copy(&rt->rt6i_gateway, &rtmsg->rtmsg_gateway);
923 gwa_type = ipv6_addr_type(gw_addr);
925 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
926 struct rt6_info *grt;
928 /* IPv6 strictly inhibits using not link-local
929 addresses as nexthop address.
930 Otherwise, router will not able to send redirects.
931 It is very good, but in some (rare!) circumstances
932 (SIT, PtP, NBMA NOARP links) it is handy to allow
933 some exceptions. --ANK
936 if (!(gwa_type&IPV6_ADDR_UNICAST))
939 grt = rt6_lookup(gw_addr, NULL, rtmsg->rtmsg_ifindex, 1);
945 if (dev != grt->rt6i_dev) {
946 dst_release(&grt->u.dst);
951 idev = grt->rt6i_idev;
953 in6_dev_hold(grt->rt6i_idev);
955 if (!(grt->rt6i_flags&RTF_GATEWAY))
957 dst_release(&grt->u.dst);
963 if (dev == NULL || (dev->flags&IFF_LOOPBACK))
971 if (rtmsg->rtmsg_flags & (RTF_GATEWAY|RTF_NONEXTHOP)) {
972 rt->rt6i_nexthop = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev);
973 if (IS_ERR(rt->rt6i_nexthop)) {
974 err = PTR_ERR(rt->rt6i_nexthop);
975 rt->rt6i_nexthop = NULL;
980 rt->rt6i_flags = rtmsg->rtmsg_flags;
983 if (rta && rta[RTA_METRICS-1]) {
984 int attrlen = RTA_PAYLOAD(rta[RTA_METRICS-1]);
985 struct rtattr *attr = RTA_DATA(rta[RTA_METRICS-1]);
987 while (RTA_OK(attr, attrlen)) {
988 unsigned flavor = attr->rta_type;
990 if (flavor > RTAX_MAX) {
994 rt->u.dst.metrics[flavor-1] =
995 *(u32 *)RTA_DATA(attr);
997 attr = RTA_NEXT(attr, attrlen);
1001 if (rt->u.dst.metrics[RTAX_HOPLIMIT-1] == 0)
1002 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
1003 if (!rt->u.dst.metrics[RTAX_MTU-1])
1004 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(dev);
1005 if (!rt->u.dst.metrics[RTAX_ADVMSS-1])
1006 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
1007 rt->u.dst.dev = dev;
1008 rt->rt6i_idev = idev;
1009 return ip6_ins_rt(rt, nlh, _rtattr, req);
1017 dst_free((struct dst_entry *) rt);
1021 int ip6_del_rt(struct rt6_info *rt, struct nlmsghdr *nlh, void *_rtattr, struct netlink_skb_parms *req)
1025 write_lock_bh(&rt6_lock);
1027 rt6_reset_dflt_pointer(NULL);
1029 err = fib6_del(rt, nlh, _rtattr, req);
1030 dst_release(&rt->u.dst);
1032 write_unlock_bh(&rt6_lock);
1037 static int ip6_route_del(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh, void *_rtattr, struct netlink_skb_parms *req)
1039 struct fib6_node *fn;
1040 struct rt6_info *rt;
1043 read_lock_bh(&rt6_lock);
1045 fn = fib6_locate(&ip6_routing_table,
1046 &rtmsg->rtmsg_dst, rtmsg->rtmsg_dst_len,
1047 &rtmsg->rtmsg_src, rtmsg->rtmsg_src_len);
1050 for (rt = fn->leaf; rt; rt = rt->u.next) {
1051 if (rtmsg->rtmsg_ifindex &&
1052 (rt->rt6i_dev == NULL ||
1053 rt->rt6i_dev->ifindex != rtmsg->rtmsg_ifindex))
1055 if (rtmsg->rtmsg_flags&RTF_GATEWAY &&
1056 !ipv6_addr_equal(&rtmsg->rtmsg_gateway, &rt->rt6i_gateway))
1058 if (rtmsg->rtmsg_metric &&
1059 rtmsg->rtmsg_metric != rt->rt6i_metric)
1061 dst_hold(&rt->u.dst);
1062 read_unlock_bh(&rt6_lock);
1064 return ip6_del_rt(rt, nlh, _rtattr, req);
1067 read_unlock_bh(&rt6_lock);
1075 void rt6_redirect(struct in6_addr *dest, struct in6_addr *saddr,
1076 struct neighbour *neigh, u8 *lladdr, int on_link)
1078 struct rt6_info *rt, *nrt;
1080 /* Locate old route to this destination. */
1081 rt = rt6_lookup(dest, NULL, neigh->dev->ifindex, 1);
1086 if (neigh->dev != rt->rt6i_dev)
1090 * Current route is on-link; redirect is always invalid.
1092 * Seems, previous statement is not true. It could
1093 * be node, which looks for us as on-link (f.e. proxy ndisc)
1094 * But then router serving it might decide, that we should
1095 * know truth 8)8) --ANK (980726).
1097 if (!(rt->rt6i_flags&RTF_GATEWAY))
1101 * RFC 2461 specifies that redirects should only be
1102 * accepted if they come from the nexthop to the target.
1103 * Due to the way default routers are chosen, this notion
1104 * is a bit fuzzy and one might need to check all default
1107 if (!ipv6_addr_equal(saddr, &rt->rt6i_gateway)) {
1108 if (rt->rt6i_flags & RTF_DEFAULT) {
1109 struct rt6_info *rt1;
1111 read_lock(&rt6_lock);
1112 for (rt1 = ip6_routing_table.leaf; rt1; rt1 = rt1->u.next) {
1113 if (ipv6_addr_equal(saddr, &rt1->rt6i_gateway)) {
1114 dst_hold(&rt1->u.dst);
1115 dst_release(&rt->u.dst);
1116 read_unlock(&rt6_lock);
1121 read_unlock(&rt6_lock);
1123 if (net_ratelimit())
1124 printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
1125 "for redirect target\n");
1132 * We have finally decided to accept it.
1135 neigh_update(neigh, lladdr, NUD_STALE,
1136 NEIGH_UPDATE_F_WEAK_OVERRIDE|
1137 NEIGH_UPDATE_F_OVERRIDE|
1138 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1139 NEIGH_UPDATE_F_ISROUTER))
1143 * Redirect received -> path was valid.
1144 * Look, redirects are sent only in response to data packets,
1145 * so that this nexthop apparently is reachable. --ANK
1147 dst_confirm(&rt->u.dst);
1149 /* Duplicate redirect: silently ignore. */
1150 if (neigh == rt->u.dst.neighbour)
1153 nrt = ip6_rt_copy(rt);
1157 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1159 nrt->rt6i_flags &= ~RTF_GATEWAY;
1161 ipv6_addr_copy(&nrt->rt6i_dst.addr, dest);
1162 nrt->rt6i_dst.plen = 128;
1163 nrt->u.dst.flags |= DST_HOST;
1165 ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key);
1166 nrt->rt6i_nexthop = neigh_clone(neigh);
1167 /* Reset pmtu, it may be better */
1168 nrt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(neigh->dev);
1169 nrt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&nrt->u.dst));
1171 if (ip6_ins_rt(nrt, NULL, NULL, NULL))
1174 if (rt->rt6i_flags&RTF_CACHE) {
1175 ip6_del_rt(rt, NULL, NULL, NULL);
1180 dst_release(&rt->u.dst);
1185 * Handle ICMP "packet too big" messages
1186 * i.e. Path MTU discovery
1189 void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr,
1190 struct net_device *dev, u32 pmtu)
1192 struct rt6_info *rt, *nrt;
1195 rt = rt6_lookup(daddr, saddr, dev->ifindex, 0);
1199 if (pmtu >= dst_mtu(&rt->u.dst))
1202 if (pmtu < IPV6_MIN_MTU) {
1204 * According to RFC2460, PMTU is set to the IPv6 Minimum Link
1205 * MTU (1280) and a fragment header should always be included
1206 * after a node receiving Too Big message reporting PMTU is
1207 * less than the IPv6 Minimum Link MTU.
1209 pmtu = IPV6_MIN_MTU;
1213 /* New mtu received -> path was valid.
1214 They are sent only in response to data packets,
1215 so that this nexthop apparently is reachable. --ANK
1217 dst_confirm(&rt->u.dst);
1219 /* Host route. If it is static, it would be better
1220 not to override it, but add new one, so that
1221 when cache entry will expire old pmtu
1222 would return automatically.
1224 if (rt->rt6i_flags & RTF_CACHE) {
1225 rt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1227 rt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1228 dst_set_expires(&rt->u.dst, ip6_rt_mtu_expires);
1229 rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
1234 Two cases are possible:
1235 1. It is connected route. Action: COW
1236 2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1238 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
1239 nrt = rt6_alloc_cow(rt, daddr, saddr);
1241 nrt = rt6_alloc_clone(rt, daddr);
1244 nrt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1246 nrt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1248 /* According to RFC 1981, detecting PMTU increase shouldn't be
1249 * happened within 5 mins, the recommended timer is 10 mins.
1250 * Here this route expiration time is set to ip6_rt_mtu_expires
1251 * which is 10 mins. After 10 mins the decreased pmtu is expired
1252 * and detecting PMTU increase will be automatically happened.
1254 dst_set_expires(&nrt->u.dst, ip6_rt_mtu_expires);
1255 nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
1257 ip6_ins_rt(nrt, NULL, NULL, NULL);
1260 dst_release(&rt->u.dst);
1264 * Misc support functions
1267 static struct rt6_info * ip6_rt_copy(struct rt6_info *ort)
1269 struct rt6_info *rt = ip6_dst_alloc();
1272 rt->u.dst.input = ort->u.dst.input;
1273 rt->u.dst.output = ort->u.dst.output;
1275 memcpy(rt->u.dst.metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32));
1276 rt->u.dst.dev = ort->u.dst.dev;
1278 dev_hold(rt->u.dst.dev);
1279 rt->rt6i_idev = ort->rt6i_idev;
1281 in6_dev_hold(rt->rt6i_idev);
1282 rt->u.dst.lastuse = jiffies;
1283 rt->rt6i_expires = 0;
1285 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
1286 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
1287 rt->rt6i_metric = 0;
1289 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1290 #ifdef CONFIG_IPV6_SUBTREES
1291 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1297 struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *dev)
1299 struct rt6_info *rt;
1300 struct fib6_node *fn;
1302 fn = &ip6_routing_table;
1304 write_lock_bh(&rt6_lock);
1305 for (rt = fn->leaf; rt; rt=rt->u.next) {
1306 if (dev == rt->rt6i_dev &&
1307 ipv6_addr_equal(&rt->rt6i_gateway, addr))
1311 dst_hold(&rt->u.dst);
1312 write_unlock_bh(&rt6_lock);
1316 struct rt6_info *rt6_add_dflt_router(struct in6_addr *gwaddr,
1317 struct net_device *dev)
1319 struct in6_rtmsg rtmsg;
1321 memset(&rtmsg, 0, sizeof(struct in6_rtmsg));
1322 rtmsg.rtmsg_type = RTMSG_NEWROUTE;
1323 ipv6_addr_copy(&rtmsg.rtmsg_gateway, gwaddr);
1324 rtmsg.rtmsg_metric = 1024;
1325 rtmsg.rtmsg_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT | RTF_UP | RTF_EXPIRES;
1327 rtmsg.rtmsg_ifindex = dev->ifindex;
1329 ip6_route_add(&rtmsg, NULL, NULL, NULL);
1330 return rt6_get_dflt_router(gwaddr, dev);
1333 void rt6_purge_dflt_routers(void)
1335 struct rt6_info *rt;
1338 read_lock_bh(&rt6_lock);
1339 for (rt = ip6_routing_table.leaf; rt; rt = rt->u.next) {
1340 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
1341 dst_hold(&rt->u.dst);
1343 rt6_reset_dflt_pointer(NULL);
1345 read_unlock_bh(&rt6_lock);
1347 ip6_del_rt(rt, NULL, NULL, NULL);
1352 read_unlock_bh(&rt6_lock);
1355 int ipv6_route_ioctl(unsigned int cmd, void __user *arg)
1357 struct in6_rtmsg rtmsg;
1361 case SIOCADDRT: /* Add a route */
1362 case SIOCDELRT: /* Delete a route */
1363 if (!capable(CAP_NET_ADMIN))
1365 err = copy_from_user(&rtmsg, arg,
1366 sizeof(struct in6_rtmsg));
1373 err = ip6_route_add(&rtmsg, NULL, NULL, NULL);
1376 err = ip6_route_del(&rtmsg, NULL, NULL, NULL);
1390 * Drop the packet on the floor
1393 static int ip6_pkt_discard(struct sk_buff *skb)
1395 IP6_INC_STATS(IPSTATS_MIB_OUTNOROUTES);
1396 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_NOROUTE, 0, skb->dev);
1401 static int ip6_pkt_discard_out(struct sk_buff *skb)
1403 skb->dev = skb->dst->dev;
1404 return ip6_pkt_discard(skb);
1408 * Allocate a dst for local (unicast / anycast) address.
1411 struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
1412 const struct in6_addr *addr,
1415 struct rt6_info *rt = ip6_dst_alloc();
1418 return ERR_PTR(-ENOMEM);
1420 dev_hold(&loopback_dev);
1423 rt->u.dst.flags = DST_HOST;
1424 rt->u.dst.input = ip6_input;
1425 rt->u.dst.output = ip6_output;
1426 rt->rt6i_dev = &loopback_dev;
1427 rt->rt6i_idev = idev;
1428 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
1429 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
1430 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
1431 rt->u.dst.obsolete = -1;
1433 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
1435 rt->rt6i_flags |= RTF_ANYCAST;
1437 rt->rt6i_flags |= RTF_LOCAL;
1438 rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
1439 if (rt->rt6i_nexthop == NULL) {
1440 dst_free((struct dst_entry *) rt);
1441 return ERR_PTR(-ENOMEM);
1444 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
1445 rt->rt6i_dst.plen = 128;
1447 atomic_set(&rt->u.dst.__refcnt, 1);
1452 static int fib6_ifdown(struct rt6_info *rt, void *arg)
1454 if (((void*)rt->rt6i_dev == arg || arg == NULL) &&
1455 rt != &ip6_null_entry) {
1456 RT6_TRACE("deleted by ifdown %p\n", rt);
1462 void rt6_ifdown(struct net_device *dev)
1464 write_lock_bh(&rt6_lock);
1465 fib6_clean_tree(&ip6_routing_table, fib6_ifdown, 0, dev);
1466 write_unlock_bh(&rt6_lock);
1469 struct rt6_mtu_change_arg
1471 struct net_device *dev;
1475 static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
1477 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
1478 struct inet6_dev *idev;
1480 /* In IPv6 pmtu discovery is not optional,
1481 so that RTAX_MTU lock cannot disable it.
1482 We still use this lock to block changes
1483 caused by addrconf/ndisc.
1486 idev = __in6_dev_get(arg->dev);
1490 /* For administrative MTU increase, there is no way to discover
1491 IPv6 PMTU increase, so PMTU increase should be updated here.
1492 Since RFC 1981 doesn't include administrative MTU increase
1493 update PMTU increase is a MUST. (i.e. jumbo frame)
1496 If new MTU is less than route PMTU, this new MTU will be the
1497 lowest MTU in the path, update the route PMTU to reflect PMTU
1498 decreases; if new MTU is greater than route PMTU, and the
1499 old MTU is the lowest MTU in the path, update the route PMTU
1500 to reflect the increase. In this case if the other nodes' MTU
1501 also have the lowest MTU, TOO BIG MESSAGE will be lead to
1504 if (rt->rt6i_dev == arg->dev &&
1505 !dst_metric_locked(&rt->u.dst, RTAX_MTU) &&
1506 (dst_mtu(&rt->u.dst) > arg->mtu ||
1507 (dst_mtu(&rt->u.dst) < arg->mtu &&
1508 dst_mtu(&rt->u.dst) == idev->cnf.mtu6)))
1509 rt->u.dst.metrics[RTAX_MTU-1] = arg->mtu;
1510 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(arg->mtu);
1514 void rt6_mtu_change(struct net_device *dev, unsigned mtu)
1516 struct rt6_mtu_change_arg arg;
1520 read_lock_bh(&rt6_lock);
1521 fib6_clean_tree(&ip6_routing_table, rt6_mtu_change_route, 0, &arg);
1522 read_unlock_bh(&rt6_lock);
1525 static int inet6_rtm_to_rtmsg(struct rtmsg *r, struct rtattr **rta,
1526 struct in6_rtmsg *rtmsg)
1528 memset(rtmsg, 0, sizeof(*rtmsg));
1530 rtmsg->rtmsg_dst_len = r->rtm_dst_len;
1531 rtmsg->rtmsg_src_len = r->rtm_src_len;
1532 rtmsg->rtmsg_flags = RTF_UP;
1533 if (r->rtm_type == RTN_UNREACHABLE)
1534 rtmsg->rtmsg_flags |= RTF_REJECT;
1536 if (rta[RTA_GATEWAY-1]) {
1537 if (rta[RTA_GATEWAY-1]->rta_len != RTA_LENGTH(16))
1539 memcpy(&rtmsg->rtmsg_gateway, RTA_DATA(rta[RTA_GATEWAY-1]), 16);
1540 rtmsg->rtmsg_flags |= RTF_GATEWAY;
1542 if (rta[RTA_DST-1]) {
1543 if (RTA_PAYLOAD(rta[RTA_DST-1]) < ((r->rtm_dst_len+7)>>3))
1545 memcpy(&rtmsg->rtmsg_dst, RTA_DATA(rta[RTA_DST-1]), ((r->rtm_dst_len+7)>>3));
1547 if (rta[RTA_SRC-1]) {
1548 if (RTA_PAYLOAD(rta[RTA_SRC-1]) < ((r->rtm_src_len+7)>>3))
1550 memcpy(&rtmsg->rtmsg_src, RTA_DATA(rta[RTA_SRC-1]), ((r->rtm_src_len+7)>>3));
1552 if (rta[RTA_OIF-1]) {
1553 if (rta[RTA_OIF-1]->rta_len != RTA_LENGTH(sizeof(int)))
1555 memcpy(&rtmsg->rtmsg_ifindex, RTA_DATA(rta[RTA_OIF-1]), sizeof(int));
1557 if (rta[RTA_PRIORITY-1]) {
1558 if (rta[RTA_PRIORITY-1]->rta_len != RTA_LENGTH(4))
1560 memcpy(&rtmsg->rtmsg_metric, RTA_DATA(rta[RTA_PRIORITY-1]), 4);
1565 int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1567 struct rtmsg *r = NLMSG_DATA(nlh);
1568 struct in6_rtmsg rtmsg;
1570 if (inet6_rtm_to_rtmsg(r, arg, &rtmsg))
1572 return ip6_route_del(&rtmsg, nlh, arg, &NETLINK_CB(skb));
1575 int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1577 struct rtmsg *r = NLMSG_DATA(nlh);
1578 struct in6_rtmsg rtmsg;
1580 if (inet6_rtm_to_rtmsg(r, arg, &rtmsg))
1582 return ip6_route_add(&rtmsg, nlh, arg, &NETLINK_CB(skb));
1585 struct rt6_rtnl_dump_arg
1587 struct sk_buff *skb;
1588 struct netlink_callback *cb;
1591 static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt,
1592 struct in6_addr *dst, struct in6_addr *src,
1593 int iif, int type, u32 pid, u32 seq,
1594 int prefix, unsigned int flags)
1597 struct nlmsghdr *nlh;
1598 unsigned char *b = skb->tail;
1599 struct rta_cacheinfo ci;
1601 if (prefix) { /* user wants prefix routes only */
1602 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
1603 /* success since this is not a prefix route */
1608 nlh = NLMSG_NEW(skb, pid, seq, type, sizeof(*rtm), flags);
1609 rtm = NLMSG_DATA(nlh);
1610 rtm->rtm_family = AF_INET6;
1611 rtm->rtm_dst_len = rt->rt6i_dst.plen;
1612 rtm->rtm_src_len = rt->rt6i_src.plen;
1614 rtm->rtm_table = RT_TABLE_MAIN;
1615 if (rt->rt6i_flags&RTF_REJECT)
1616 rtm->rtm_type = RTN_UNREACHABLE;
1617 else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK))
1618 rtm->rtm_type = RTN_LOCAL;
1620 rtm->rtm_type = RTN_UNICAST;
1622 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
1623 rtm->rtm_protocol = rt->rt6i_protocol;
1624 if (rt->rt6i_flags&RTF_DYNAMIC)
1625 rtm->rtm_protocol = RTPROT_REDIRECT;
1626 else if (rt->rt6i_flags & RTF_ADDRCONF)
1627 rtm->rtm_protocol = RTPROT_KERNEL;
1628 else if (rt->rt6i_flags&RTF_DEFAULT)
1629 rtm->rtm_protocol = RTPROT_RA;
1631 if (rt->rt6i_flags&RTF_CACHE)
1632 rtm->rtm_flags |= RTM_F_CLONED;
1635 RTA_PUT(skb, RTA_DST, 16, dst);
1636 rtm->rtm_dst_len = 128;
1637 } else if (rtm->rtm_dst_len)
1638 RTA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr);
1639 #ifdef CONFIG_IPV6_SUBTREES
1641 RTA_PUT(skb, RTA_SRC, 16, src);
1642 rtm->rtm_src_len = 128;
1643 } else if (rtm->rtm_src_len)
1644 RTA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr);
1647 RTA_PUT(skb, RTA_IIF, 4, &iif);
1649 struct in6_addr saddr_buf;
1650 if (ipv6_get_saddr(&rt->u.dst, dst, &saddr_buf) == 0)
1651 RTA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
1653 if (rtnetlink_put_metrics(skb, rt->u.dst.metrics) < 0)
1654 goto rtattr_failure;
1655 if (rt->u.dst.neighbour)
1656 RTA_PUT(skb, RTA_GATEWAY, 16, &rt->u.dst.neighbour->primary_key);
1658 RTA_PUT(skb, RTA_OIF, sizeof(int), &rt->rt6i_dev->ifindex);
1659 RTA_PUT(skb, RTA_PRIORITY, 4, &rt->rt6i_metric);
1660 ci.rta_lastuse = jiffies_to_clock_t(jiffies - rt->u.dst.lastuse);
1661 if (rt->rt6i_expires)
1662 ci.rta_expires = jiffies_to_clock_t(rt->rt6i_expires - jiffies);
1665 ci.rta_used = rt->u.dst.__use;
1666 ci.rta_clntref = atomic_read(&rt->u.dst.__refcnt);
1667 ci.rta_error = rt->u.dst.error;
1671 RTA_PUT(skb, RTA_CACHEINFO, sizeof(ci), &ci);
1672 nlh->nlmsg_len = skb->tail - b;
1677 skb_trim(skb, b - skb->data);
1681 static int rt6_dump_route(struct rt6_info *rt, void *p_arg)
1683 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
1686 if (arg->cb->nlh->nlmsg_len >= NLMSG_LENGTH(sizeof(struct rtmsg))) {
1687 struct rtmsg *rtm = NLMSG_DATA(arg->cb->nlh);
1688 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
1692 return rt6_fill_node(arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
1693 NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
1694 prefix, NLM_F_MULTI);
1697 static int fib6_dump_node(struct fib6_walker_t *w)
1700 struct rt6_info *rt;
1702 for (rt = w->leaf; rt; rt = rt->u.next) {
1703 res = rt6_dump_route(rt, w->args);
1705 /* Frame is full, suspend walking */
1715 static void fib6_dump_end(struct netlink_callback *cb)
1717 struct fib6_walker_t *w = (void*)cb->args[0];
1721 fib6_walker_unlink(w);
1724 cb->done = (void*)cb->args[1];
1728 static int fib6_dump_done(struct netlink_callback *cb)
1731 return cb->done ? cb->done(cb) : 0;
1734 int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
1736 struct rt6_rtnl_dump_arg arg;
1737 struct fib6_walker_t *w;
1743 w = (void*)cb->args[0];
1747 * 1. hook callback destructor.
1749 cb->args[1] = (long)cb->done;
1750 cb->done = fib6_dump_done;
1753 * 2. allocate and initialize walker.
1755 w = kmalloc(sizeof(*w), GFP_ATOMIC);
1758 RT6_TRACE("dump<%p", w);
1759 memset(w, 0, sizeof(*w));
1760 w->root = &ip6_routing_table;
1761 w->func = fib6_dump_node;
1763 cb->args[0] = (long)w;
1764 read_lock_bh(&rt6_lock);
1766 read_unlock_bh(&rt6_lock);
1769 read_lock_bh(&rt6_lock);
1770 res = fib6_walk_continue(w);
1771 read_unlock_bh(&rt6_lock);
1774 if (res <= 0 && skb->len == 0)
1775 RT6_TRACE("%p>dump end\n", w);
1777 res = res < 0 ? res : skb->len;
1778 /* res < 0 is an error. (really, impossible)
1779 res == 0 means that dump is complete, but skb still can contain data.
1780 res > 0 dump is not complete, but frame is full.
1782 /* Destroy walker, if dump of this table is complete. */
1788 int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
1790 struct rtattr **rta = arg;
1793 struct sk_buff *skb;
1795 struct rt6_info *rt;
1797 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
1801 /* Reserve room for dummy headers, this skb can pass
1802 through good chunk of routing engine.
1804 skb->mac.raw = skb->data;
1805 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
1807 memset(&fl, 0, sizeof(fl));
1809 ipv6_addr_copy(&fl.fl6_src,
1810 (struct in6_addr*)RTA_DATA(rta[RTA_SRC-1]));
1812 ipv6_addr_copy(&fl.fl6_dst,
1813 (struct in6_addr*)RTA_DATA(rta[RTA_DST-1]));
1816 memcpy(&iif, RTA_DATA(rta[RTA_IIF-1]), sizeof(int));
1819 struct net_device *dev;
1820 dev = __dev_get_by_index(iif);
1829 memcpy(&fl.oif, RTA_DATA(rta[RTA_OIF-1]), sizeof(int));
1831 rt = (struct rt6_info*)ip6_route_output(NULL, &fl);
1833 skb->dst = &rt->u.dst;
1835 NETLINK_CB(skb).dst_pid = NETLINK_CB(in_skb).pid;
1836 err = rt6_fill_node(skb, rt,
1837 &fl.fl6_dst, &fl.fl6_src,
1839 RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
1840 nlh->nlmsg_seq, 0, 0);
1846 err = netlink_unicast(rtnl, skb, NETLINK_CB(in_skb).pid, MSG_DONTWAIT);
1856 void inet6_rt_notify(int event, struct rt6_info *rt, struct nlmsghdr *nlh,
1857 struct netlink_skb_parms *req)
1859 struct sk_buff *skb;
1860 int size = NLMSG_SPACE(sizeof(struct rtmsg)+256);
1861 u32 pid = current->pid;
1867 seq = nlh->nlmsg_seq;
1869 skb = alloc_skb(size, gfp_any());
1871 netlink_set_err(rtnl, 0, RTNLGRP_IPV6_ROUTE, ENOBUFS);
1874 if (rt6_fill_node(skb, rt, NULL, NULL, 0, event, pid, seq, 0, 0) < 0) {
1876 netlink_set_err(rtnl, 0, RTNLGRP_IPV6_ROUTE, EINVAL);
1879 NETLINK_CB(skb).dst_group = RTNLGRP_IPV6_ROUTE;
1880 netlink_broadcast(rtnl, skb, 0, RTNLGRP_IPV6_ROUTE, gfp_any());
1887 #ifdef CONFIG_PROC_FS
1889 #define RT6_INFO_LEN (32 + 4 + 32 + 4 + 32 + 40 + 5 + 1)
1900 static int rt6_info_route(struct rt6_info *rt, void *p_arg)
1902 struct rt6_proc_arg *arg = (struct rt6_proc_arg *) p_arg;
1905 if (arg->skip < arg->offset / RT6_INFO_LEN) {
1910 if (arg->len >= arg->length)
1913 for (i=0; i<16; i++) {
1914 sprintf(arg->buffer + arg->len, "%02x",
1915 rt->rt6i_dst.addr.s6_addr[i]);
1918 arg->len += sprintf(arg->buffer + arg->len, " %02x ",
1921 #ifdef CONFIG_IPV6_SUBTREES
1922 for (i=0; i<16; i++) {
1923 sprintf(arg->buffer + arg->len, "%02x",
1924 rt->rt6i_src.addr.s6_addr[i]);
1927 arg->len += sprintf(arg->buffer + arg->len, " %02x ",
1930 sprintf(arg->buffer + arg->len,
1931 "00000000000000000000000000000000 00 ");
1935 if (rt->rt6i_nexthop) {
1936 for (i=0; i<16; i++) {
1937 sprintf(arg->buffer + arg->len, "%02x",
1938 rt->rt6i_nexthop->primary_key[i]);
1942 sprintf(arg->buffer + arg->len,
1943 "00000000000000000000000000000000");
1946 arg->len += sprintf(arg->buffer + arg->len,
1947 " %08x %08x %08x %08x %8s\n",
1948 rt->rt6i_metric, atomic_read(&rt->u.dst.__refcnt),
1949 rt->u.dst.__use, rt->rt6i_flags,
1950 rt->rt6i_dev ? rt->rt6i_dev->name : "");
1954 static int rt6_proc_info(char *buffer, char **start, off_t offset, int length)
1956 struct rt6_proc_arg arg;
1957 arg.buffer = buffer;
1958 arg.offset = offset;
1959 arg.length = length;
1963 read_lock_bh(&rt6_lock);
1964 fib6_clean_tree(&ip6_routing_table, rt6_info_route, 0, &arg);
1965 read_unlock_bh(&rt6_lock);
1969 *start += offset % RT6_INFO_LEN;
1971 arg.len -= offset % RT6_INFO_LEN;
1973 if (arg.len > length)
1981 static int rt6_stats_seq_show(struct seq_file *seq, void *v)
1983 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
1984 rt6_stats.fib_nodes, rt6_stats.fib_route_nodes,
1985 rt6_stats.fib_rt_alloc, rt6_stats.fib_rt_entries,
1986 rt6_stats.fib_rt_cache,
1987 atomic_read(&ip6_dst_ops.entries),
1988 rt6_stats.fib_discarded_routes);
1993 static int rt6_stats_seq_open(struct inode *inode, struct file *file)
1995 return single_open(file, rt6_stats_seq_show, NULL);
1998 static struct file_operations rt6_stats_seq_fops = {
1999 .owner = THIS_MODULE,
2000 .open = rt6_stats_seq_open,
2002 .llseek = seq_lseek,
2003 .release = single_release,
2005 #endif /* CONFIG_PROC_FS */
2007 #ifdef CONFIG_SYSCTL
2009 static int flush_delay;
2012 int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write, struct file * filp,
2013 void __user *buffer, size_t *lenp, loff_t *ppos)
2016 proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
2017 fib6_run_gc(flush_delay <= 0 ? ~0UL : (unsigned long)flush_delay);
2023 ctl_table ipv6_route_table[] = {
2025 .ctl_name = NET_IPV6_ROUTE_FLUSH,
2026 .procname = "flush",
2027 .data = &flush_delay,
2028 .maxlen = sizeof(int),
2030 .proc_handler = &ipv6_sysctl_rtcache_flush
2033 .ctl_name = NET_IPV6_ROUTE_GC_THRESH,
2034 .procname = "gc_thresh",
2035 .data = &ip6_dst_ops.gc_thresh,
2036 .maxlen = sizeof(int),
2038 .proc_handler = &proc_dointvec,
2041 .ctl_name = NET_IPV6_ROUTE_MAX_SIZE,
2042 .procname = "max_size",
2043 .data = &ip6_rt_max_size,
2044 .maxlen = sizeof(int),
2046 .proc_handler = &proc_dointvec,
2049 .ctl_name = NET_IPV6_ROUTE_GC_MIN_INTERVAL,
2050 .procname = "gc_min_interval",
2051 .data = &ip6_rt_gc_min_interval,
2052 .maxlen = sizeof(int),
2054 .proc_handler = &proc_dointvec_jiffies,
2055 .strategy = &sysctl_jiffies,
2058 .ctl_name = NET_IPV6_ROUTE_GC_TIMEOUT,
2059 .procname = "gc_timeout",
2060 .data = &ip6_rt_gc_timeout,
2061 .maxlen = sizeof(int),
2063 .proc_handler = &proc_dointvec_jiffies,
2064 .strategy = &sysctl_jiffies,
2067 .ctl_name = NET_IPV6_ROUTE_GC_INTERVAL,
2068 .procname = "gc_interval",
2069 .data = &ip6_rt_gc_interval,
2070 .maxlen = sizeof(int),
2072 .proc_handler = &proc_dointvec_jiffies,
2073 .strategy = &sysctl_jiffies,
2076 .ctl_name = NET_IPV6_ROUTE_GC_ELASTICITY,
2077 .procname = "gc_elasticity",
2078 .data = &ip6_rt_gc_elasticity,
2079 .maxlen = sizeof(int),
2081 .proc_handler = &proc_dointvec_jiffies,
2082 .strategy = &sysctl_jiffies,
2085 .ctl_name = NET_IPV6_ROUTE_MTU_EXPIRES,
2086 .procname = "mtu_expires",
2087 .data = &ip6_rt_mtu_expires,
2088 .maxlen = sizeof(int),
2090 .proc_handler = &proc_dointvec_jiffies,
2091 .strategy = &sysctl_jiffies,
2094 .ctl_name = NET_IPV6_ROUTE_MIN_ADVMSS,
2095 .procname = "min_adv_mss",
2096 .data = &ip6_rt_min_advmss,
2097 .maxlen = sizeof(int),
2099 .proc_handler = &proc_dointvec_jiffies,
2100 .strategy = &sysctl_jiffies,
2103 .ctl_name = NET_IPV6_ROUTE_GC_MIN_INTERVAL_MS,
2104 .procname = "gc_min_interval_ms",
2105 .data = &ip6_rt_gc_min_interval,
2106 .maxlen = sizeof(int),
2108 .proc_handler = &proc_dointvec_ms_jiffies,
2109 .strategy = &sysctl_ms_jiffies,
2116 void __init ip6_route_init(void)
2118 struct proc_dir_entry *p;
2120 ip6_dst_ops.kmem_cachep = kmem_cache_create("ip6_dst_cache",
2121 sizeof(struct rt6_info),
2122 0, SLAB_HWCACHE_ALIGN,
2124 if (!ip6_dst_ops.kmem_cachep)
2125 panic("cannot create ip6_dst_cache");
2128 #ifdef CONFIG_PROC_FS
2129 p = proc_net_create("ipv6_route", 0, rt6_proc_info);
2131 p->owner = THIS_MODULE;
2133 proc_net_fops_create("rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2140 void ip6_route_cleanup(void)
2142 #ifdef CONFIG_PROC_FS
2143 proc_net_remove("ipv6_route");
2144 proc_net_remove("rt6_stats");
2151 kmem_cache_destroy(ip6_dst_ops.kmem_cachep);