2 * Linux INET6 implementation
6 * Pedro Roque <roque@di.fc.ul.pt>
8 * $Id: route.c,v 1.56 2001/10/31 21:55:55 davem Exp $
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License
12 * as published by the Free Software Foundation; either version
13 * 2 of the License, or (at your option) any later version.
18 * YOSHIFUJI Hideaki @USAGI
19 * reworked default router selection.
20 * - respect outgoing interface
21 * - select from (probably) reachable routers (i.e.
22 * routers in REACHABLE, STALE, DELAY or PROBE states).
23 * - always select the same router if it is (probably)
24 * reachable. otherwise, round-robin the list.
26 * Fixed routing subtrees.
29 #include <linux/capability.h>
30 #include <linux/errno.h>
31 #include <linux/types.h>
32 #include <linux/times.h>
33 #include <linux/socket.h>
34 #include <linux/sockios.h>
35 #include <linux/net.h>
36 #include <linux/route.h>
37 #include <linux/netdevice.h>
38 #include <linux/in6.h>
39 #include <linux/init.h>
40 #include <linux/if_arp.h>
41 #include <linux/proc_fs.h>
42 #include <linux/seq_file.h>
43 #include <linux/nsproxy.h>
44 #include <net/net_namespace.h>
47 #include <net/ip6_fib.h>
48 #include <net/ip6_route.h>
49 #include <net/ndisc.h>
50 #include <net/addrconf.h>
52 #include <linux/rtnetlink.h>
55 #include <net/netevent.h>
56 #include <net/netlink.h>
58 #include <asm/uaccess.h>
61 #include <linux/sysctl.h>
64 /* Set to 3 to get tracing. */
68 #define RDBG(x) printk x
69 #define RT6_TRACE(x...) printk(KERN_DEBUG x)
72 #define RT6_TRACE(x...) do { ; } while (0)
75 #define CLONE_OFFLINK_ROUTE 0
77 static struct rt6_info * ip6_rt_copy(struct rt6_info *ort);
78 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
79 static struct dst_entry *ip6_negative_advice(struct dst_entry *);
80 static void ip6_dst_destroy(struct dst_entry *);
81 static void ip6_dst_ifdown(struct dst_entry *,
82 struct net_device *dev, int how);
83 static int ip6_dst_gc(struct dst_ops *ops);
85 static int ip6_pkt_discard(struct sk_buff *skb);
86 static int ip6_pkt_discard_out(struct sk_buff *skb);
87 static void ip6_link_failure(struct sk_buff *skb);
88 static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
90 #ifdef CONFIG_IPV6_ROUTE_INFO
91 static struct rt6_info *rt6_add_route_info(struct net *net,
92 struct in6_addr *prefix, int prefixlen,
93 struct in6_addr *gwaddr, int ifindex,
95 static struct rt6_info *rt6_get_route_info(struct net *net,
96 struct in6_addr *prefix, int prefixlen,
97 struct in6_addr *gwaddr, int ifindex);
100 static struct dst_ops ip6_dst_ops = {
102 .protocol = __constant_htons(ETH_P_IPV6),
105 .check = ip6_dst_check,
106 .destroy = ip6_dst_destroy,
107 .ifdown = ip6_dst_ifdown,
108 .negative_advice = ip6_negative_advice,
109 .link_failure = ip6_link_failure,
110 .update_pmtu = ip6_rt_update_pmtu,
111 .local_out = ip6_local_out,
112 .entry_size = sizeof(struct rt6_info),
113 .entries = ATOMIC_INIT(0),
116 static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu)
120 static struct dst_ops ip6_dst_blackhole_ops = {
122 .protocol = __constant_htons(ETH_P_IPV6),
123 .destroy = ip6_dst_destroy,
124 .check = ip6_dst_check,
125 .update_pmtu = ip6_rt_blackhole_update_pmtu,
126 .entry_size = sizeof(struct rt6_info),
127 .entries = ATOMIC_INIT(0),
130 struct rt6_info ip6_null_entry = {
133 .__refcnt = ATOMIC_INIT(1),
136 .error = -ENETUNREACH,
137 .metrics = { [RTAX_HOPLIMIT - 1] = 255, },
138 .input = ip6_pkt_discard,
139 .output = ip6_pkt_discard_out,
141 .path = (struct dst_entry*)&ip6_null_entry,
144 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
145 .rt6i_metric = ~(u32) 0,
146 .rt6i_ref = ATOMIC_INIT(1),
149 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
151 static int ip6_pkt_prohibit(struct sk_buff *skb);
152 static int ip6_pkt_prohibit_out(struct sk_buff *skb);
154 struct rt6_info ip6_prohibit_entry = {
157 .__refcnt = ATOMIC_INIT(1),
161 .metrics = { [RTAX_HOPLIMIT - 1] = 255, },
162 .input = ip6_pkt_prohibit,
163 .output = ip6_pkt_prohibit_out,
165 .path = (struct dst_entry*)&ip6_prohibit_entry,
168 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
169 .rt6i_metric = ~(u32) 0,
170 .rt6i_ref = ATOMIC_INIT(1),
173 struct rt6_info ip6_blk_hole_entry = {
176 .__refcnt = ATOMIC_INIT(1),
180 .metrics = { [RTAX_HOPLIMIT - 1] = 255, },
181 .input = dst_discard,
182 .output = dst_discard,
184 .path = (struct dst_entry*)&ip6_blk_hole_entry,
187 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
188 .rt6i_metric = ~(u32) 0,
189 .rt6i_ref = ATOMIC_INIT(1),
194 /* allocate dst with ip6_dst_ops */
195 static __inline__ struct rt6_info *ip6_dst_alloc(void)
197 return (struct rt6_info *)dst_alloc(&ip6_dst_ops);
200 static void ip6_dst_destroy(struct dst_entry *dst)
202 struct rt6_info *rt = (struct rt6_info *)dst;
203 struct inet6_dev *idev = rt->rt6i_idev;
206 rt->rt6i_idev = NULL;
211 static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
214 struct rt6_info *rt = (struct rt6_info *)dst;
215 struct inet6_dev *idev = rt->rt6i_idev;
216 struct net_device *loopback_dev =
217 dev->nd_net->loopback_dev;
219 if (dev != loopback_dev && idev != NULL && idev->dev == dev) {
220 struct inet6_dev *loopback_idev =
221 in6_dev_get(loopback_dev);
222 if (loopback_idev != NULL) {
223 rt->rt6i_idev = loopback_idev;
229 static __inline__ int rt6_check_expired(const struct rt6_info *rt)
231 return (rt->rt6i_flags & RTF_EXPIRES &&
232 time_after(jiffies, rt->rt6i_expires));
235 static inline int rt6_need_strict(struct in6_addr *daddr)
237 return (ipv6_addr_type(daddr) &
238 (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL));
242 * Route lookup. Any table->tb6_lock is implied.
245 static __inline__ struct rt6_info *rt6_device_match(struct rt6_info *rt,
249 struct rt6_info *local = NULL;
250 struct rt6_info *sprt;
253 for (sprt = rt; sprt; sprt = sprt->u.dst.rt6_next) {
254 struct net_device *dev = sprt->rt6i_dev;
255 if (dev->ifindex == oif)
257 if (dev->flags & IFF_LOOPBACK) {
258 if (sprt->rt6i_idev == NULL ||
259 sprt->rt6i_idev->dev->ifindex != oif) {
262 if (local && (!oif ||
263 local->rt6i_idev->dev->ifindex == oif))
274 return &ip6_null_entry;
279 #ifdef CONFIG_IPV6_ROUTER_PREF
280 static void rt6_probe(struct rt6_info *rt)
282 struct neighbour *neigh = rt ? rt->rt6i_nexthop : NULL;
284 * Okay, this does not seem to be appropriate
285 * for now, however, we need to check if it
286 * is really so; aka Router Reachability Probing.
288 * Router Reachability Probe MUST be rate-limited
289 * to no more than one per minute.
291 if (!neigh || (neigh->nud_state & NUD_VALID))
293 read_lock_bh(&neigh->lock);
294 if (!(neigh->nud_state & NUD_VALID) &&
295 time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
296 struct in6_addr mcaddr;
297 struct in6_addr *target;
299 neigh->updated = jiffies;
300 read_unlock_bh(&neigh->lock);
302 target = (struct in6_addr *)&neigh->primary_key;
303 addrconf_addr_solict_mult(target, &mcaddr);
304 ndisc_send_ns(rt->rt6i_dev, NULL, target, &mcaddr, NULL);
306 read_unlock_bh(&neigh->lock);
309 static inline void rt6_probe(struct rt6_info *rt)
316 * Default Router Selection (RFC 2461 6.3.6)
318 static inline int rt6_check_dev(struct rt6_info *rt, int oif)
320 struct net_device *dev = rt->rt6i_dev;
321 if (!oif || dev->ifindex == oif)
323 if ((dev->flags & IFF_LOOPBACK) &&
324 rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
329 static inline int rt6_check_neigh(struct rt6_info *rt)
331 struct neighbour *neigh = rt->rt6i_nexthop;
333 if (rt->rt6i_flags & RTF_NONEXTHOP ||
334 !(rt->rt6i_flags & RTF_GATEWAY))
337 read_lock_bh(&neigh->lock);
338 if (neigh->nud_state & NUD_VALID)
340 #ifdef CONFIG_IPV6_ROUTER_PREF
341 else if (neigh->nud_state & NUD_FAILED)
346 read_unlock_bh(&neigh->lock);
352 static int rt6_score_route(struct rt6_info *rt, int oif,
357 m = rt6_check_dev(rt, oif);
358 if (!m && (strict & RT6_LOOKUP_F_IFACE))
360 #ifdef CONFIG_IPV6_ROUTER_PREF
361 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
363 n = rt6_check_neigh(rt);
364 if (!n && (strict & RT6_LOOKUP_F_REACHABLE))
369 static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
370 int *mpri, struct rt6_info *match)
374 if (rt6_check_expired(rt))
377 m = rt6_score_route(rt, oif, strict);
382 if (strict & RT6_LOOKUP_F_REACHABLE)
386 } else if (strict & RT6_LOOKUP_F_REACHABLE) {
394 static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
395 struct rt6_info *rr_head,
396 u32 metric, int oif, int strict)
398 struct rt6_info *rt, *match;
402 for (rt = rr_head; rt && rt->rt6i_metric == metric;
403 rt = rt->u.dst.rt6_next)
404 match = find_match(rt, oif, strict, &mpri, match);
405 for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric;
406 rt = rt->u.dst.rt6_next)
407 match = find_match(rt, oif, strict, &mpri, match);
412 static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
414 struct rt6_info *match, *rt0;
416 RT6_TRACE("%s(fn->leaf=%p, oif=%d)\n",
417 __FUNCTION__, fn->leaf, oif);
421 fn->rr_ptr = rt0 = fn->leaf;
423 match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict);
426 (strict & RT6_LOOKUP_F_REACHABLE)) {
427 struct rt6_info *next = rt0->u.dst.rt6_next;
429 /* no entries matched; do round-robin */
430 if (!next || next->rt6i_metric != rt0->rt6i_metric)
437 RT6_TRACE("%s() => %p\n",
438 __FUNCTION__, match);
440 return (match ? match : &ip6_null_entry);
443 #ifdef CONFIG_IPV6_ROUTE_INFO
444 int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
445 struct in6_addr *gwaddr)
447 struct net *net = dev->nd_net;
448 struct route_info *rinfo = (struct route_info *) opt;
449 struct in6_addr prefix_buf, *prefix;
454 if (len < sizeof(struct route_info)) {
458 /* Sanity check for prefix_len and length */
459 if (rinfo->length > 3) {
461 } else if (rinfo->prefix_len > 128) {
463 } else if (rinfo->prefix_len > 64) {
464 if (rinfo->length < 2) {
467 } else if (rinfo->prefix_len > 0) {
468 if (rinfo->length < 1) {
473 pref = rinfo->route_pref;
474 if (pref == ICMPV6_ROUTER_PREF_INVALID)
475 pref = ICMPV6_ROUTER_PREF_MEDIUM;
477 lifetime = ntohl(rinfo->lifetime);
478 if (lifetime == 0xffffffff) {
480 } else if (lifetime > 0x7fffffff/HZ) {
481 /* Avoid arithmetic overflow */
482 lifetime = 0x7fffffff/HZ - 1;
485 if (rinfo->length == 3)
486 prefix = (struct in6_addr *)rinfo->prefix;
488 /* this function is safe */
489 ipv6_addr_prefix(&prefix_buf,
490 (struct in6_addr *)rinfo->prefix,
492 prefix = &prefix_buf;
495 rt = rt6_get_route_info(net, prefix, rinfo->prefix_len, gwaddr,
498 if (rt && !lifetime) {
504 rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
507 rt->rt6i_flags = RTF_ROUTEINFO |
508 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
511 if (lifetime == 0xffffffff) {
512 rt->rt6i_flags &= ~RTF_EXPIRES;
514 rt->rt6i_expires = jiffies + HZ * lifetime;
515 rt->rt6i_flags |= RTF_EXPIRES;
517 dst_release(&rt->u.dst);
523 #define BACKTRACK(saddr) \
525 if (rt == &ip6_null_entry) { \
526 struct fib6_node *pn; \
528 if (fn->fn_flags & RTN_TL_ROOT) \
531 if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
532 fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \
535 if (fn->fn_flags & RTN_RTINFO) \
541 static struct rt6_info *ip6_pol_route_lookup(struct fib6_table *table,
542 struct flowi *fl, int flags)
544 struct fib6_node *fn;
547 read_lock_bh(&table->tb6_lock);
548 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
551 rt = rt6_device_match(rt, fl->oif, flags);
552 BACKTRACK(&fl->fl6_src);
554 dst_use(&rt->u.dst, jiffies);
555 read_unlock_bh(&table->tb6_lock);
560 struct rt6_info *rt6_lookup(struct net *net, struct in6_addr *daddr,
561 struct in6_addr *saddr, int oif, int strict)
571 struct dst_entry *dst;
572 int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
575 memcpy(&fl.fl6_src, saddr, sizeof(*saddr));
576 flags |= RT6_LOOKUP_F_HAS_SADDR;
579 dst = fib6_rule_lookup(net, &fl, flags, ip6_pol_route_lookup);
581 return (struct rt6_info *) dst;
588 EXPORT_SYMBOL(rt6_lookup);
590 /* ip6_ins_rt is called with FREE table->tb6_lock.
591 It takes new route entry, the addition fails by any reason the
592 route is freed. In any case, if caller does not hold it, it may
596 static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
599 struct fib6_table *table;
601 table = rt->rt6i_table;
602 write_lock_bh(&table->tb6_lock);
603 err = fib6_add(&table->tb6_root, rt, info);
604 write_unlock_bh(&table->tb6_lock);
609 int ip6_ins_rt(struct rt6_info *rt)
611 struct nl_info info = {
612 .nl_net = rt->rt6i_dev->nd_net,
614 return __ip6_ins_rt(rt, &info);
617 static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, struct in6_addr *daddr,
618 struct in6_addr *saddr)
626 rt = ip6_rt_copy(ort);
629 if (!(rt->rt6i_flags&RTF_GATEWAY)) {
630 if (rt->rt6i_dst.plen != 128 &&
631 ipv6_addr_equal(&rt->rt6i_dst.addr, daddr))
632 rt->rt6i_flags |= RTF_ANYCAST;
633 ipv6_addr_copy(&rt->rt6i_gateway, daddr);
636 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
637 rt->rt6i_dst.plen = 128;
638 rt->rt6i_flags |= RTF_CACHE;
639 rt->u.dst.flags |= DST_HOST;
641 #ifdef CONFIG_IPV6_SUBTREES
642 if (rt->rt6i_src.plen && saddr) {
643 ipv6_addr_copy(&rt->rt6i_src.addr, saddr);
644 rt->rt6i_src.plen = 128;
648 rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
655 static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort, struct in6_addr *daddr)
657 struct rt6_info *rt = ip6_rt_copy(ort);
659 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
660 rt->rt6i_dst.plen = 128;
661 rt->rt6i_flags |= RTF_CACHE;
662 rt->u.dst.flags |= DST_HOST;
663 rt->rt6i_nexthop = neigh_clone(ort->rt6i_nexthop);
668 static struct rt6_info *ip6_pol_route(struct fib6_table *table, int oif,
669 struct flowi *fl, int flags)
671 struct fib6_node *fn;
672 struct rt6_info *rt, *nrt;
676 int reachable = ipv6_devconf.forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
678 strict |= flags & RT6_LOOKUP_F_IFACE;
681 read_lock_bh(&table->tb6_lock);
684 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
687 rt = rt6_select(fn, oif, strict | reachable);
688 BACKTRACK(&fl->fl6_src);
689 if (rt == &ip6_null_entry ||
690 rt->rt6i_flags & RTF_CACHE)
693 dst_hold(&rt->u.dst);
694 read_unlock_bh(&table->tb6_lock);
696 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
697 nrt = rt6_alloc_cow(rt, &fl->fl6_dst, &fl->fl6_src);
699 #if CLONE_OFFLINK_ROUTE
700 nrt = rt6_alloc_clone(rt, &fl->fl6_dst);
706 dst_release(&rt->u.dst);
707 rt = nrt ? : &ip6_null_entry;
709 dst_hold(&rt->u.dst);
711 err = ip6_ins_rt(nrt);
720 * Race condition! In the gap, when table->tb6_lock was
721 * released someone could insert this route. Relookup.
723 dst_release(&rt->u.dst);
731 dst_hold(&rt->u.dst);
732 read_unlock_bh(&table->tb6_lock);
734 rt->u.dst.lastuse = jiffies;
740 static struct rt6_info *ip6_pol_route_input(struct fib6_table *table,
741 struct flowi *fl, int flags)
743 return ip6_pol_route(table, fl->iif, fl, flags);
746 void ip6_route_input(struct sk_buff *skb)
748 struct ipv6hdr *iph = ipv6_hdr(skb);
749 struct net *net = skb->dev->nd_net;
750 int flags = RT6_LOOKUP_F_HAS_SADDR;
752 .iif = skb->dev->ifindex,
757 .flowlabel = (* (__be32 *) iph)&IPV6_FLOWINFO_MASK,
761 .proto = iph->nexthdr,
764 if (rt6_need_strict(&iph->daddr))
765 flags |= RT6_LOOKUP_F_IFACE;
767 skb->dst = fib6_rule_lookup(net, &fl, flags, ip6_pol_route_input);
770 static struct rt6_info *ip6_pol_route_output(struct fib6_table *table,
771 struct flowi *fl, int flags)
773 return ip6_pol_route(table, fl->oif, fl, flags);
776 struct dst_entry * ip6_route_output(struct sock *sk, struct flowi *fl)
780 if (rt6_need_strict(&fl->fl6_dst))
781 flags |= RT6_LOOKUP_F_IFACE;
783 if (!ipv6_addr_any(&fl->fl6_src))
784 flags |= RT6_LOOKUP_F_HAS_SADDR;
786 return fib6_rule_lookup(&init_net, fl, flags, ip6_pol_route_output);
789 EXPORT_SYMBOL(ip6_route_output);
791 int ip6_dst_blackhole(struct sock *sk, struct dst_entry **dstp, struct flowi *fl)
793 struct rt6_info *ort = (struct rt6_info *) *dstp;
794 struct rt6_info *rt = (struct rt6_info *)
795 dst_alloc(&ip6_dst_blackhole_ops);
796 struct dst_entry *new = NULL;
801 atomic_set(&new->__refcnt, 1);
803 new->input = dst_discard;
804 new->output = dst_discard;
806 memcpy(new->metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32));
807 new->dev = ort->u.dst.dev;
810 rt->rt6i_idev = ort->rt6i_idev;
812 in6_dev_hold(rt->rt6i_idev);
813 rt->rt6i_expires = 0;
815 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
816 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
819 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
820 #ifdef CONFIG_IPV6_SUBTREES
821 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
829 return (new ? 0 : -ENOMEM);
831 EXPORT_SYMBOL_GPL(ip6_dst_blackhole);
834 * Destination cache support functions
837 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
841 rt = (struct rt6_info *) dst;
843 if (rt && rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie))
849 static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
851 struct rt6_info *rt = (struct rt6_info *) dst;
854 if (rt->rt6i_flags & RTF_CACHE)
862 static void ip6_link_failure(struct sk_buff *skb)
866 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0, skb->dev);
868 rt = (struct rt6_info *) skb->dst;
870 if (rt->rt6i_flags&RTF_CACHE) {
871 dst_set_expires(&rt->u.dst, 0);
872 rt->rt6i_flags |= RTF_EXPIRES;
873 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
874 rt->rt6i_node->fn_sernum = -1;
878 static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
880 struct rt6_info *rt6 = (struct rt6_info*)dst;
882 if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
883 rt6->rt6i_flags |= RTF_MODIFIED;
884 if (mtu < IPV6_MIN_MTU) {
886 dst->metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
888 dst->metrics[RTAX_MTU-1] = mtu;
889 call_netevent_notifiers(NETEVENT_PMTU_UPDATE, dst);
893 static int ipv6_get_mtu(struct net_device *dev);
895 static inline unsigned int ipv6_advmss(struct net *net, unsigned int mtu)
897 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
899 if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
900 mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
903 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
904 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
905 * IPV6_MAXPLEN is also valid and means: "any MSS,
906 * rely only on pmtu discovery"
908 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
913 static struct dst_entry *icmp6_dst_gc_list;
914 static DEFINE_SPINLOCK(icmp6_dst_lock);
916 struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
917 struct neighbour *neigh,
918 struct in6_addr *addr)
921 struct inet6_dev *idev = in6_dev_get(dev);
922 struct net *net = dev->nd_net;
924 if (unlikely(idev == NULL))
927 rt = ip6_dst_alloc();
928 if (unlikely(rt == NULL)) {
937 neigh = ndisc_get_neigh(dev, addr);
940 rt->rt6i_idev = idev;
941 rt->rt6i_nexthop = neigh;
942 atomic_set(&rt->u.dst.__refcnt, 1);
943 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = 255;
944 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
945 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, dst_mtu(&rt->u.dst));
946 rt->u.dst.output = ip6_output;
948 #if 0 /* there's no chance to use these for ndisc */
949 rt->u.dst.flags = ipv6_addr_type(addr) & IPV6_ADDR_UNICAST
952 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
953 rt->rt6i_dst.plen = 128;
956 spin_lock_bh(&icmp6_dst_lock);
957 rt->u.dst.next = icmp6_dst_gc_list;
958 icmp6_dst_gc_list = &rt->u.dst;
959 spin_unlock_bh(&icmp6_dst_lock);
961 fib6_force_start_gc(net);
967 int icmp6_dst_gc(int *more)
969 struct dst_entry *dst, *next, **pprev;
975 spin_lock_bh(&icmp6_dst_lock);
976 pprev = &icmp6_dst_gc_list;
978 while ((dst = *pprev) != NULL) {
979 if (!atomic_read(&dst->__refcnt)) {
989 spin_unlock_bh(&icmp6_dst_lock);
994 static int ip6_dst_gc(struct dst_ops *ops)
996 static unsigned expire = 30*HZ;
997 static unsigned long last_gc;
998 unsigned long now = jiffies;
1000 if (time_after(last_gc + init_net.ipv6.sysctl.ip6_rt_gc_min_interval, now) &&
1001 atomic_read(&ip6_dst_ops.entries) <= init_net.ipv6.sysctl.ip6_rt_max_size)
1005 fib6_run_gc(expire, &init_net);
1007 if (atomic_read(&ip6_dst_ops.entries) < ip6_dst_ops.gc_thresh)
1008 expire = init_net.ipv6.sysctl.ip6_rt_gc_timeout>>1;
1011 expire -= expire>>init_net.ipv6.sysctl.ip6_rt_gc_elasticity;
1012 return (atomic_read(&ip6_dst_ops.entries) > init_net.ipv6.sysctl.ip6_rt_max_size);
1015 /* Clean host part of a prefix. Not necessary in radix tree,
1016 but results in cleaner routing tables.
1018 Remove it only when all the things will work!
1021 static int ipv6_get_mtu(struct net_device *dev)
1023 int mtu = IPV6_MIN_MTU;
1024 struct inet6_dev *idev;
1026 idev = in6_dev_get(dev);
1028 mtu = idev->cnf.mtu6;
1034 int ipv6_get_hoplimit(struct net_device *dev)
1036 int hoplimit = ipv6_devconf.hop_limit;
1037 struct inet6_dev *idev;
1039 idev = in6_dev_get(dev);
1041 hoplimit = idev->cnf.hop_limit;
1051 int ip6_route_add(struct fib6_config *cfg)
1054 struct net *net = cfg->fc_nlinfo.nl_net;
1055 struct rt6_info *rt = NULL;
1056 struct net_device *dev = NULL;
1057 struct inet6_dev *idev = NULL;
1058 struct fib6_table *table;
1061 if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1063 #ifndef CONFIG_IPV6_SUBTREES
1064 if (cfg->fc_src_len)
1067 if (cfg->fc_ifindex) {
1069 dev = dev_get_by_index(net, cfg->fc_ifindex);
1072 idev = in6_dev_get(dev);
1077 if (cfg->fc_metric == 0)
1078 cfg->fc_metric = IP6_RT_PRIO_USER;
1080 table = fib6_new_table(net, cfg->fc_table);
1081 if (table == NULL) {
1086 rt = ip6_dst_alloc();
1093 rt->u.dst.obsolete = -1;
1094 rt->rt6i_expires = jiffies + clock_t_to_jiffies(cfg->fc_expires);
1096 if (cfg->fc_protocol == RTPROT_UNSPEC)
1097 cfg->fc_protocol = RTPROT_BOOT;
1098 rt->rt6i_protocol = cfg->fc_protocol;
1100 addr_type = ipv6_addr_type(&cfg->fc_dst);
1102 if (addr_type & IPV6_ADDR_MULTICAST)
1103 rt->u.dst.input = ip6_mc_input;
1105 rt->u.dst.input = ip6_forward;
1107 rt->u.dst.output = ip6_output;
1109 ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1110 rt->rt6i_dst.plen = cfg->fc_dst_len;
1111 if (rt->rt6i_dst.plen == 128)
1112 rt->u.dst.flags = DST_HOST;
1114 #ifdef CONFIG_IPV6_SUBTREES
1115 ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1116 rt->rt6i_src.plen = cfg->fc_src_len;
1119 rt->rt6i_metric = cfg->fc_metric;
1121 /* We cannot add true routes via loopback here,
1122 they would result in kernel looping; promote them to reject routes
1124 if ((cfg->fc_flags & RTF_REJECT) ||
1125 (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK))) {
1126 /* hold loopback dev/idev if we haven't done so. */
1127 if (dev != net->loopback_dev) {
1132 dev = net->loopback_dev;
1134 idev = in6_dev_get(dev);
1140 rt->u.dst.output = ip6_pkt_discard_out;
1141 rt->u.dst.input = ip6_pkt_discard;
1142 rt->u.dst.error = -ENETUNREACH;
1143 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1147 if (cfg->fc_flags & RTF_GATEWAY) {
1148 struct in6_addr *gw_addr;
1151 gw_addr = &cfg->fc_gateway;
1152 ipv6_addr_copy(&rt->rt6i_gateway, gw_addr);
1153 gwa_type = ipv6_addr_type(gw_addr);
1155 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1156 struct rt6_info *grt;
1158 /* IPv6 strictly inhibits using not link-local
1159 addresses as nexthop address.
1160 Otherwise, router will not able to send redirects.
1161 It is very good, but in some (rare!) circumstances
1162 (SIT, PtP, NBMA NOARP links) it is handy to allow
1163 some exceptions. --ANK
1166 if (!(gwa_type&IPV6_ADDR_UNICAST))
1169 grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
1171 err = -EHOSTUNREACH;
1175 if (dev != grt->rt6i_dev) {
1176 dst_release(&grt->u.dst);
1180 dev = grt->rt6i_dev;
1181 idev = grt->rt6i_idev;
1183 in6_dev_hold(grt->rt6i_idev);
1185 if (!(grt->rt6i_flags&RTF_GATEWAY))
1187 dst_release(&grt->u.dst);
1193 if (dev == NULL || (dev->flags&IFF_LOOPBACK))
1201 if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) {
1202 rt->rt6i_nexthop = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev);
1203 if (IS_ERR(rt->rt6i_nexthop)) {
1204 err = PTR_ERR(rt->rt6i_nexthop);
1205 rt->rt6i_nexthop = NULL;
1210 rt->rt6i_flags = cfg->fc_flags;
1217 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
1218 int type = nla_type(nla);
1221 if (type > RTAX_MAX) {
1226 rt->u.dst.metrics[type - 1] = nla_get_u32(nla);
1231 if (rt->u.dst.metrics[RTAX_HOPLIMIT-1] == 0)
1232 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
1233 if (!rt->u.dst.metrics[RTAX_MTU-1])
1234 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(dev);
1235 if (!rt->u.dst.metrics[RTAX_ADVMSS-1])
1236 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, dst_mtu(&rt->u.dst));
1237 rt->u.dst.dev = dev;
1238 rt->rt6i_idev = idev;
1239 rt->rt6i_table = table;
1241 cfg->fc_nlinfo.nl_net = dev->nd_net;
1243 return __ip6_ins_rt(rt, &cfg->fc_nlinfo);
1251 dst_free(&rt->u.dst);
1255 static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
1258 struct fib6_table *table;
1260 if (rt == &ip6_null_entry)
1263 table = rt->rt6i_table;
1264 write_lock_bh(&table->tb6_lock);
1266 err = fib6_del(rt, info);
1267 dst_release(&rt->u.dst);
1269 write_unlock_bh(&table->tb6_lock);
1274 int ip6_del_rt(struct rt6_info *rt)
1276 struct nl_info info = {
1277 .nl_net = rt->rt6i_dev->nd_net,
1279 return __ip6_del_rt(rt, &info);
1282 static int ip6_route_del(struct fib6_config *cfg)
1284 struct fib6_table *table;
1285 struct fib6_node *fn;
1286 struct rt6_info *rt;
1289 table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
1293 read_lock_bh(&table->tb6_lock);
1295 fn = fib6_locate(&table->tb6_root,
1296 &cfg->fc_dst, cfg->fc_dst_len,
1297 &cfg->fc_src, cfg->fc_src_len);
1300 for (rt = fn->leaf; rt; rt = rt->u.dst.rt6_next) {
1301 if (cfg->fc_ifindex &&
1302 (rt->rt6i_dev == NULL ||
1303 rt->rt6i_dev->ifindex != cfg->fc_ifindex))
1305 if (cfg->fc_flags & RTF_GATEWAY &&
1306 !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
1308 if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
1310 dst_hold(&rt->u.dst);
1311 read_unlock_bh(&table->tb6_lock);
1313 return __ip6_del_rt(rt, &cfg->fc_nlinfo);
1316 read_unlock_bh(&table->tb6_lock);
1324 struct ip6rd_flowi {
1326 struct in6_addr gateway;
1329 static struct rt6_info *__ip6_route_redirect(struct fib6_table *table,
1333 struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl;
1334 struct rt6_info *rt;
1335 struct fib6_node *fn;
1338 * Get the "current" route for this destination and
1339 * check if the redirect has come from approriate router.
1341 * RFC 2461 specifies that redirects should only be
1342 * accepted if they come from the nexthop to the target.
1343 * Due to the way the routes are chosen, this notion
1344 * is a bit fuzzy and one might need to check all possible
1348 read_lock_bh(&table->tb6_lock);
1349 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
1351 for (rt = fn->leaf; rt; rt = rt->u.dst.rt6_next) {
1353 * Current route is on-link; redirect is always invalid.
1355 * Seems, previous statement is not true. It could
1356 * be node, which looks for us as on-link (f.e. proxy ndisc)
1357 * But then router serving it might decide, that we should
1358 * know truth 8)8) --ANK (980726).
1360 if (rt6_check_expired(rt))
1362 if (!(rt->rt6i_flags & RTF_GATEWAY))
1364 if (fl->oif != rt->rt6i_dev->ifindex)
1366 if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
1372 rt = &ip6_null_entry;
1373 BACKTRACK(&fl->fl6_src);
1375 dst_hold(&rt->u.dst);
1377 read_unlock_bh(&table->tb6_lock);
1382 static struct rt6_info *ip6_route_redirect(struct in6_addr *dest,
1383 struct in6_addr *src,
1384 struct in6_addr *gateway,
1385 struct net_device *dev)
1387 int flags = RT6_LOOKUP_F_HAS_SADDR;
1388 struct net *net = dev->nd_net;
1389 struct ip6rd_flowi rdfl = {
1391 .oif = dev->ifindex,
1399 .gateway = *gateway,
1402 if (rt6_need_strict(dest))
1403 flags |= RT6_LOOKUP_F_IFACE;
1405 return (struct rt6_info *)fib6_rule_lookup(net, (struct flowi *)&rdfl,
1406 flags, __ip6_route_redirect);
1409 void rt6_redirect(struct in6_addr *dest, struct in6_addr *src,
1410 struct in6_addr *saddr,
1411 struct neighbour *neigh, u8 *lladdr, int on_link)
1413 struct rt6_info *rt, *nrt = NULL;
1414 struct netevent_redirect netevent;
1416 rt = ip6_route_redirect(dest, src, saddr, neigh->dev);
1418 if (rt == &ip6_null_entry) {
1419 if (net_ratelimit())
1420 printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
1421 "for redirect target\n");
1426 * We have finally decided to accept it.
1429 neigh_update(neigh, lladdr, NUD_STALE,
1430 NEIGH_UPDATE_F_WEAK_OVERRIDE|
1431 NEIGH_UPDATE_F_OVERRIDE|
1432 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1433 NEIGH_UPDATE_F_ISROUTER))
1437 * Redirect received -> path was valid.
1438 * Look, redirects are sent only in response to data packets,
1439 * so that this nexthop apparently is reachable. --ANK
1441 dst_confirm(&rt->u.dst);
1443 /* Duplicate redirect: silently ignore. */
1444 if (neigh == rt->u.dst.neighbour)
1447 nrt = ip6_rt_copy(rt);
1451 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1453 nrt->rt6i_flags &= ~RTF_GATEWAY;
1455 ipv6_addr_copy(&nrt->rt6i_dst.addr, dest);
1456 nrt->rt6i_dst.plen = 128;
1457 nrt->u.dst.flags |= DST_HOST;
1459 ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key);
1460 nrt->rt6i_nexthop = neigh_clone(neigh);
1461 /* Reset pmtu, it may be better */
1462 nrt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(neigh->dev);
1463 nrt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(neigh->dev->nd_net,
1464 dst_mtu(&nrt->u.dst));
1466 if (ip6_ins_rt(nrt))
1469 netevent.old = &rt->u.dst;
1470 netevent.new = &nrt->u.dst;
1471 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1473 if (rt->rt6i_flags&RTF_CACHE) {
1479 dst_release(&rt->u.dst);
1484 * Handle ICMP "packet too big" messages
1485 * i.e. Path MTU discovery
1488 void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr,
1489 struct net_device *dev, u32 pmtu)
1491 struct rt6_info *rt, *nrt;
1492 struct net *net = dev->nd_net;
1495 rt = rt6_lookup(net, daddr, saddr, dev->ifindex, 0);
1499 if (pmtu >= dst_mtu(&rt->u.dst))
1502 if (pmtu < IPV6_MIN_MTU) {
1504 * According to RFC2460, PMTU is set to the IPv6 Minimum Link
1505 * MTU (1280) and a fragment header should always be included
1506 * after a node receiving Too Big message reporting PMTU is
1507 * less than the IPv6 Minimum Link MTU.
1509 pmtu = IPV6_MIN_MTU;
1513 /* New mtu received -> path was valid.
1514 They are sent only in response to data packets,
1515 so that this nexthop apparently is reachable. --ANK
1517 dst_confirm(&rt->u.dst);
1519 /* Host route. If it is static, it would be better
1520 not to override it, but add new one, so that
1521 when cache entry will expire old pmtu
1522 would return automatically.
1524 if (rt->rt6i_flags & RTF_CACHE) {
1525 rt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1527 rt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1528 dst_set_expires(&rt->u.dst, net->ipv6.sysctl.ip6_rt_mtu_expires);
1529 rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
1534 Two cases are possible:
1535 1. It is connected route. Action: COW
1536 2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1538 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
1539 nrt = rt6_alloc_cow(rt, daddr, saddr);
1541 nrt = rt6_alloc_clone(rt, daddr);
1544 nrt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1546 nrt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1548 /* According to RFC 1981, detecting PMTU increase shouldn't be
1549 * happened within 5 mins, the recommended timer is 10 mins.
1550 * Here this route expiration time is set to ip6_rt_mtu_expires
1551 * which is 10 mins. After 10 mins the decreased pmtu is expired
1552 * and detecting PMTU increase will be automatically happened.
1554 dst_set_expires(&nrt->u.dst, net->ipv6.sysctl.ip6_rt_mtu_expires);
1555 nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
1560 dst_release(&rt->u.dst);
1564 * Misc support functions
1567 static struct rt6_info * ip6_rt_copy(struct rt6_info *ort)
1569 struct rt6_info *rt = ip6_dst_alloc();
1572 rt->u.dst.input = ort->u.dst.input;
1573 rt->u.dst.output = ort->u.dst.output;
1575 memcpy(rt->u.dst.metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32));
1576 rt->u.dst.error = ort->u.dst.error;
1577 rt->u.dst.dev = ort->u.dst.dev;
1579 dev_hold(rt->u.dst.dev);
1580 rt->rt6i_idev = ort->rt6i_idev;
1582 in6_dev_hold(rt->rt6i_idev);
1583 rt->u.dst.lastuse = jiffies;
1584 rt->rt6i_expires = 0;
1586 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
1587 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
1588 rt->rt6i_metric = 0;
1590 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1591 #ifdef CONFIG_IPV6_SUBTREES
1592 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1594 rt->rt6i_table = ort->rt6i_table;
1599 #ifdef CONFIG_IPV6_ROUTE_INFO
1600 static struct rt6_info *rt6_get_route_info(struct net *net,
1601 struct in6_addr *prefix, int prefixlen,
1602 struct in6_addr *gwaddr, int ifindex)
1604 struct fib6_node *fn;
1605 struct rt6_info *rt = NULL;
1606 struct fib6_table *table;
1608 table = fib6_get_table(net, RT6_TABLE_INFO);
1612 write_lock_bh(&table->tb6_lock);
1613 fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
1617 for (rt = fn->leaf; rt; rt = rt->u.dst.rt6_next) {
1618 if (rt->rt6i_dev->ifindex != ifindex)
1620 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1622 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1624 dst_hold(&rt->u.dst);
1628 write_unlock_bh(&table->tb6_lock);
1632 static struct rt6_info *rt6_add_route_info(struct net *net,
1633 struct in6_addr *prefix, int prefixlen,
1634 struct in6_addr *gwaddr, int ifindex,
1637 struct fib6_config cfg = {
1638 .fc_table = RT6_TABLE_INFO,
1639 .fc_metric = IP6_RT_PRIO_USER,
1640 .fc_ifindex = ifindex,
1641 .fc_dst_len = prefixlen,
1642 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
1643 RTF_UP | RTF_PREF(pref),
1645 .fc_nlinfo.nlh = NULL,
1646 .fc_nlinfo.nl_net = net,
1649 ipv6_addr_copy(&cfg.fc_dst, prefix);
1650 ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
1652 /* We should treat it as a default route if prefix length is 0. */
1654 cfg.fc_flags |= RTF_DEFAULT;
1656 ip6_route_add(&cfg);
1658 return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex);
1662 struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *dev)
1664 struct rt6_info *rt;
1665 struct fib6_table *table;
1667 table = fib6_get_table(dev->nd_net, RT6_TABLE_DFLT);
1671 write_lock_bh(&table->tb6_lock);
1672 for (rt = table->tb6_root.leaf; rt; rt=rt->u.dst.rt6_next) {
1673 if (dev == rt->rt6i_dev &&
1674 ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
1675 ipv6_addr_equal(&rt->rt6i_gateway, addr))
1679 dst_hold(&rt->u.dst);
1680 write_unlock_bh(&table->tb6_lock);
1684 EXPORT_SYMBOL(rt6_get_dflt_router);
1686 struct rt6_info *rt6_add_dflt_router(struct in6_addr *gwaddr,
1687 struct net_device *dev,
1690 struct fib6_config cfg = {
1691 .fc_table = RT6_TABLE_DFLT,
1692 .fc_metric = IP6_RT_PRIO_USER,
1693 .fc_ifindex = dev->ifindex,
1694 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
1695 RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
1697 .fc_nlinfo.nlh = NULL,
1698 .fc_nlinfo.nl_net = dev->nd_net,
1701 ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
1703 ip6_route_add(&cfg);
1705 return rt6_get_dflt_router(gwaddr, dev);
1708 void rt6_purge_dflt_routers(struct net *net)
1710 struct rt6_info *rt;
1711 struct fib6_table *table;
1713 /* NOTE: Keep consistent with rt6_get_dflt_router */
1714 table = fib6_get_table(net, RT6_TABLE_DFLT);
1719 read_lock_bh(&table->tb6_lock);
1720 for (rt = table->tb6_root.leaf; rt; rt = rt->u.dst.rt6_next) {
1721 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
1722 dst_hold(&rt->u.dst);
1723 read_unlock_bh(&table->tb6_lock);
1728 read_unlock_bh(&table->tb6_lock);
1731 static void rtmsg_to_fib6_config(struct net *net,
1732 struct in6_rtmsg *rtmsg,
1733 struct fib6_config *cfg)
1735 memset(cfg, 0, sizeof(*cfg));
1737 cfg->fc_table = RT6_TABLE_MAIN;
1738 cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
1739 cfg->fc_metric = rtmsg->rtmsg_metric;
1740 cfg->fc_expires = rtmsg->rtmsg_info;
1741 cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
1742 cfg->fc_src_len = rtmsg->rtmsg_src_len;
1743 cfg->fc_flags = rtmsg->rtmsg_flags;
1745 cfg->fc_nlinfo.nl_net = net;
1747 ipv6_addr_copy(&cfg->fc_dst, &rtmsg->rtmsg_dst);
1748 ipv6_addr_copy(&cfg->fc_src, &rtmsg->rtmsg_src);
1749 ipv6_addr_copy(&cfg->fc_gateway, &rtmsg->rtmsg_gateway);
1752 int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
1754 struct fib6_config cfg;
1755 struct in6_rtmsg rtmsg;
1759 case SIOCADDRT: /* Add a route */
1760 case SIOCDELRT: /* Delete a route */
1761 if (!capable(CAP_NET_ADMIN))
1763 err = copy_from_user(&rtmsg, arg,
1764 sizeof(struct in6_rtmsg));
1768 rtmsg_to_fib6_config(net, &rtmsg, &cfg);
1773 err = ip6_route_add(&cfg);
1776 err = ip6_route_del(&cfg);
1790 * Drop the packet on the floor
1793 static int ip6_pkt_drop(struct sk_buff *skb, int code, int ipstats_mib_noroutes)
1796 switch (ipstats_mib_noroutes) {
1797 case IPSTATS_MIB_INNOROUTES:
1798 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
1799 if (type == IPV6_ADDR_ANY || type == IPV6_ADDR_RESERVED) {
1800 IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_INADDRERRORS);
1804 case IPSTATS_MIB_OUTNOROUTES:
1805 IP6_INC_STATS(ip6_dst_idev(skb->dst), ipstats_mib_noroutes);
1808 icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0, skb->dev);
1813 static int ip6_pkt_discard(struct sk_buff *skb)
1815 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
1818 static int ip6_pkt_discard_out(struct sk_buff *skb)
1820 skb->dev = skb->dst->dev;
1821 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
1824 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
1826 static int ip6_pkt_prohibit(struct sk_buff *skb)
1828 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
1831 static int ip6_pkt_prohibit_out(struct sk_buff *skb)
1833 skb->dev = skb->dst->dev;
1834 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
1840 * Allocate a dst for local (unicast / anycast) address.
1843 struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
1844 const struct in6_addr *addr,
1847 struct net *net = idev->dev->nd_net;
1848 struct rt6_info *rt = ip6_dst_alloc();
1851 return ERR_PTR(-ENOMEM);
1853 dev_hold(net->loopback_dev);
1856 rt->u.dst.flags = DST_HOST;
1857 rt->u.dst.input = ip6_input;
1858 rt->u.dst.output = ip6_output;
1859 rt->rt6i_dev = net->loopback_dev;
1860 rt->rt6i_idev = idev;
1861 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
1862 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, dst_mtu(&rt->u.dst));
1863 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
1864 rt->u.dst.obsolete = -1;
1866 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
1868 rt->rt6i_flags |= RTF_ANYCAST;
1870 rt->rt6i_flags |= RTF_LOCAL;
1871 rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
1872 if (rt->rt6i_nexthop == NULL) {
1873 dst_free(&rt->u.dst);
1874 return ERR_PTR(-ENOMEM);
1877 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
1878 rt->rt6i_dst.plen = 128;
1879 rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL);
1881 atomic_set(&rt->u.dst.__refcnt, 1);
1886 static int fib6_ifdown(struct rt6_info *rt, void *arg)
1888 if (((void*)rt->rt6i_dev == arg || arg == NULL) &&
1889 rt != &ip6_null_entry) {
1890 RT6_TRACE("deleted by ifdown %p\n", rt);
1896 void rt6_ifdown(struct net *net, struct net_device *dev)
1898 fib6_clean_all(net, fib6_ifdown, 0, dev);
1901 struct rt6_mtu_change_arg
1903 struct net_device *dev;
1907 static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
1909 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
1910 struct inet6_dev *idev;
1911 struct net *net = arg->dev->nd_net;
1913 /* In IPv6 pmtu discovery is not optional,
1914 so that RTAX_MTU lock cannot disable it.
1915 We still use this lock to block changes
1916 caused by addrconf/ndisc.
1919 idev = __in6_dev_get(arg->dev);
1923 /* For administrative MTU increase, there is no way to discover
1924 IPv6 PMTU increase, so PMTU increase should be updated here.
1925 Since RFC 1981 doesn't include administrative MTU increase
1926 update PMTU increase is a MUST. (i.e. jumbo frame)
1929 If new MTU is less than route PMTU, this new MTU will be the
1930 lowest MTU in the path, update the route PMTU to reflect PMTU
1931 decreases; if new MTU is greater than route PMTU, and the
1932 old MTU is the lowest MTU in the path, update the route PMTU
1933 to reflect the increase. In this case if the other nodes' MTU
1934 also have the lowest MTU, TOO BIG MESSAGE will be lead to
1937 if (rt->rt6i_dev == arg->dev &&
1938 !dst_metric_locked(&rt->u.dst, RTAX_MTU) &&
1939 (dst_mtu(&rt->u.dst) >= arg->mtu ||
1940 (dst_mtu(&rt->u.dst) < arg->mtu &&
1941 dst_mtu(&rt->u.dst) == idev->cnf.mtu6))) {
1942 rt->u.dst.metrics[RTAX_MTU-1] = arg->mtu;
1943 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, arg->mtu);
1948 void rt6_mtu_change(struct net_device *dev, unsigned mtu)
1950 struct rt6_mtu_change_arg arg = {
1955 fib6_clean_all(dev->nd_net, rt6_mtu_change_route, 0, &arg);
1958 static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
1959 [RTA_GATEWAY] = { .len = sizeof(struct in6_addr) },
1960 [RTA_OIF] = { .type = NLA_U32 },
1961 [RTA_IIF] = { .type = NLA_U32 },
1962 [RTA_PRIORITY] = { .type = NLA_U32 },
1963 [RTA_METRICS] = { .type = NLA_NESTED },
1966 static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
1967 struct fib6_config *cfg)
1970 struct nlattr *tb[RTA_MAX+1];
1973 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
1978 rtm = nlmsg_data(nlh);
1979 memset(cfg, 0, sizeof(*cfg));
1981 cfg->fc_table = rtm->rtm_table;
1982 cfg->fc_dst_len = rtm->rtm_dst_len;
1983 cfg->fc_src_len = rtm->rtm_src_len;
1984 cfg->fc_flags = RTF_UP;
1985 cfg->fc_protocol = rtm->rtm_protocol;
1987 if (rtm->rtm_type == RTN_UNREACHABLE)
1988 cfg->fc_flags |= RTF_REJECT;
1990 cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
1991 cfg->fc_nlinfo.nlh = nlh;
1992 cfg->fc_nlinfo.nl_net = skb->sk->sk_net;
1994 if (tb[RTA_GATEWAY]) {
1995 nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
1996 cfg->fc_flags |= RTF_GATEWAY;
2000 int plen = (rtm->rtm_dst_len + 7) >> 3;
2002 if (nla_len(tb[RTA_DST]) < plen)
2005 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
2009 int plen = (rtm->rtm_src_len + 7) >> 3;
2011 if (nla_len(tb[RTA_SRC]) < plen)
2014 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
2018 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2020 if (tb[RTA_PRIORITY])
2021 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2023 if (tb[RTA_METRICS]) {
2024 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2025 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
2029 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2036 static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
2038 struct fib6_config cfg;
2041 err = rtm_to_fib6_config(skb, nlh, &cfg);
2045 return ip6_route_del(&cfg);
2048 static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
2050 struct fib6_config cfg;
2053 err = rtm_to_fib6_config(skb, nlh, &cfg);
2057 return ip6_route_add(&cfg);
2060 static inline size_t rt6_nlmsg_size(void)
2062 return NLMSG_ALIGN(sizeof(struct rtmsg))
2063 + nla_total_size(16) /* RTA_SRC */
2064 + nla_total_size(16) /* RTA_DST */
2065 + nla_total_size(16) /* RTA_GATEWAY */
2066 + nla_total_size(16) /* RTA_PREFSRC */
2067 + nla_total_size(4) /* RTA_TABLE */
2068 + nla_total_size(4) /* RTA_IIF */
2069 + nla_total_size(4) /* RTA_OIF */
2070 + nla_total_size(4) /* RTA_PRIORITY */
2071 + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
2072 + nla_total_size(sizeof(struct rta_cacheinfo));
2075 static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt,
2076 struct in6_addr *dst, struct in6_addr *src,
2077 int iif, int type, u32 pid, u32 seq,
2078 int prefix, unsigned int flags)
2081 struct nlmsghdr *nlh;
2085 if (prefix) { /* user wants prefix routes only */
2086 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
2087 /* success since this is not a prefix route */
2092 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*rtm), flags);
2096 rtm = nlmsg_data(nlh);
2097 rtm->rtm_family = AF_INET6;
2098 rtm->rtm_dst_len = rt->rt6i_dst.plen;
2099 rtm->rtm_src_len = rt->rt6i_src.plen;
2102 table = rt->rt6i_table->tb6_id;
2104 table = RT6_TABLE_UNSPEC;
2105 rtm->rtm_table = table;
2106 NLA_PUT_U32(skb, RTA_TABLE, table);
2107 if (rt->rt6i_flags&RTF_REJECT)
2108 rtm->rtm_type = RTN_UNREACHABLE;
2109 else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK))
2110 rtm->rtm_type = RTN_LOCAL;
2112 rtm->rtm_type = RTN_UNICAST;
2114 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2115 rtm->rtm_protocol = rt->rt6i_protocol;
2116 if (rt->rt6i_flags&RTF_DYNAMIC)
2117 rtm->rtm_protocol = RTPROT_REDIRECT;
2118 else if (rt->rt6i_flags & RTF_ADDRCONF)
2119 rtm->rtm_protocol = RTPROT_KERNEL;
2120 else if (rt->rt6i_flags&RTF_DEFAULT)
2121 rtm->rtm_protocol = RTPROT_RA;
2123 if (rt->rt6i_flags&RTF_CACHE)
2124 rtm->rtm_flags |= RTM_F_CLONED;
2127 NLA_PUT(skb, RTA_DST, 16, dst);
2128 rtm->rtm_dst_len = 128;
2129 } else if (rtm->rtm_dst_len)
2130 NLA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr);
2131 #ifdef CONFIG_IPV6_SUBTREES
2133 NLA_PUT(skb, RTA_SRC, 16, src);
2134 rtm->rtm_src_len = 128;
2135 } else if (rtm->rtm_src_len)
2136 NLA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr);
2139 NLA_PUT_U32(skb, RTA_IIF, iif);
2141 struct in6_addr saddr_buf;
2142 if (ipv6_dev_get_saddr(ip6_dst_idev(&rt->u.dst)->dev,
2143 dst, &saddr_buf) == 0)
2144 NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
2147 if (rtnetlink_put_metrics(skb, rt->u.dst.metrics) < 0)
2148 goto nla_put_failure;
2150 if (rt->u.dst.neighbour)
2151 NLA_PUT(skb, RTA_GATEWAY, 16, &rt->u.dst.neighbour->primary_key);
2154 NLA_PUT_U32(skb, RTA_OIF, rt->rt6i_dev->ifindex);
2156 NLA_PUT_U32(skb, RTA_PRIORITY, rt->rt6i_metric);
2158 expires = rt->rt6i_expires ? rt->rt6i_expires - jiffies : 0;
2159 if (rtnl_put_cacheinfo(skb, &rt->u.dst, 0, 0, 0,
2160 expires, rt->u.dst.error) < 0)
2161 goto nla_put_failure;
2163 return nlmsg_end(skb, nlh);
2166 nlmsg_cancel(skb, nlh);
2170 int rt6_dump_route(struct rt6_info *rt, void *p_arg)
2172 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
2175 if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
2176 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
2177 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
2181 return rt6_fill_node(arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
2182 NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
2183 prefix, NLM_F_MULTI);
2186 static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
2188 struct net *net = in_skb->sk->sk_net;
2189 struct nlattr *tb[RTA_MAX+1];
2190 struct rt6_info *rt;
2191 struct sk_buff *skb;
2196 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2201 memset(&fl, 0, sizeof(fl));
2204 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2207 ipv6_addr_copy(&fl.fl6_src, nla_data(tb[RTA_SRC]));
2211 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2214 ipv6_addr_copy(&fl.fl6_dst, nla_data(tb[RTA_DST]));
2218 iif = nla_get_u32(tb[RTA_IIF]);
2221 fl.oif = nla_get_u32(tb[RTA_OIF]);
2224 struct net_device *dev;
2225 dev = __dev_get_by_index(net, iif);
2232 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
2238 /* Reserve room for dummy headers, this skb can pass
2239 through good chunk of routing engine.
2241 skb_reset_mac_header(skb);
2242 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
2244 rt = (struct rt6_info*) ip6_route_output(NULL, &fl);
2245 skb->dst = &rt->u.dst;
2247 err = rt6_fill_node(skb, rt, &fl.fl6_dst, &fl.fl6_src, iif,
2248 RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
2249 nlh->nlmsg_seq, 0, 0);
2255 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).pid);
2260 void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
2262 struct sk_buff *skb;
2263 struct net *net = info->nl_net;
2268 seq = info->nlh != NULL ? info->nlh->nlmsg_seq : 0;
2270 skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
2274 err = rt6_fill_node(skb, rt, NULL, NULL, 0,
2275 event, info->pid, seq, 0, 0);
2277 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
2278 WARN_ON(err == -EMSGSIZE);
2282 err = rtnl_notify(skb, net, info->pid, RTNLGRP_IPV6_ROUTE,
2283 info->nlh, gfp_any());
2286 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
2293 #ifdef CONFIG_PROC_FS
2295 #define RT6_INFO_LEN (32 + 4 + 32 + 4 + 32 + 40 + 5 + 1)
2306 static int rt6_info_route(struct rt6_info *rt, void *p_arg)
2308 struct seq_file *m = p_arg;
2310 seq_printf(m, NIP6_SEQFMT " %02x ", NIP6(rt->rt6i_dst.addr),
2313 #ifdef CONFIG_IPV6_SUBTREES
2314 seq_printf(m, NIP6_SEQFMT " %02x ", NIP6(rt->rt6i_src.addr),
2317 seq_puts(m, "00000000000000000000000000000000 00 ");
2320 if (rt->rt6i_nexthop) {
2321 seq_printf(m, NIP6_SEQFMT,
2322 NIP6(*((struct in6_addr *)rt->rt6i_nexthop->primary_key)));
2324 seq_puts(m, "00000000000000000000000000000000");
2326 seq_printf(m, " %08x %08x %08x %08x %8s\n",
2327 rt->rt6i_metric, atomic_read(&rt->u.dst.__refcnt),
2328 rt->u.dst.__use, rt->rt6i_flags,
2329 rt->rt6i_dev ? rt->rt6i_dev->name : "");
2333 static int ipv6_route_show(struct seq_file *m, void *v)
2335 struct net *net = (struct net *)m->private;
2336 fib6_clean_all(net, rt6_info_route, 0, m);
2340 static int ipv6_route_open(struct inode *inode, struct file *file)
2342 struct net *net = get_proc_net(inode);
2345 return single_open(file, ipv6_route_show, net);
2348 static int ipv6_route_release(struct inode *inode, struct file *file)
2350 struct seq_file *seq = file->private_data;
2351 struct net *net = seq->private;
2353 return single_release(inode, file);
2356 static const struct file_operations ipv6_route_proc_fops = {
2357 .owner = THIS_MODULE,
2358 .open = ipv6_route_open,
2360 .llseek = seq_lseek,
2361 .release = ipv6_route_release,
2364 static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2366 struct net *net = (struct net *)seq->private;
2367 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
2368 net->ipv6.rt6_stats->fib_nodes,
2369 net->ipv6.rt6_stats->fib_route_nodes,
2370 net->ipv6.rt6_stats->fib_rt_alloc,
2371 net->ipv6.rt6_stats->fib_rt_entries,
2372 net->ipv6.rt6_stats->fib_rt_cache,
2373 atomic_read(&ip6_dst_ops.entries),
2374 net->ipv6.rt6_stats->fib_discarded_routes);
2379 static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2381 struct net *net = get_proc_net(inode);
2382 return single_open(file, rt6_stats_seq_show, net);
2385 static int rt6_stats_seq_release(struct inode *inode, struct file *file)
2387 struct seq_file *seq = file->private_data;
2388 struct net *net = (struct net *)seq->private;
2390 return single_release(inode, file);
2393 static const struct file_operations rt6_stats_seq_fops = {
2394 .owner = THIS_MODULE,
2395 .open = rt6_stats_seq_open,
2397 .llseek = seq_lseek,
2398 .release = rt6_stats_seq_release,
2400 #endif /* CONFIG_PROC_FS */
2402 #ifdef CONFIG_SYSCTL
2405 int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write, struct file * filp,
2406 void __user *buffer, size_t *lenp, loff_t *ppos)
2408 struct net *net = current->nsproxy->net_ns;
2409 int delay = net->ipv6.sysctl.flush_delay;
2411 proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
2412 fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay, net);
2418 ctl_table ipv6_route_table_template[] = {
2420 .procname = "flush",
2421 .data = &init_net.ipv6.sysctl.flush_delay,
2422 .maxlen = sizeof(int),
2424 .proc_handler = &ipv6_sysctl_rtcache_flush
2427 .ctl_name = NET_IPV6_ROUTE_GC_THRESH,
2428 .procname = "gc_thresh",
2429 .data = &ip6_dst_ops.gc_thresh,
2430 .maxlen = sizeof(int),
2432 .proc_handler = &proc_dointvec,
2435 .ctl_name = NET_IPV6_ROUTE_MAX_SIZE,
2436 .procname = "max_size",
2437 .data = &init_net.ipv6.sysctl.ip6_rt_max_size,
2438 .maxlen = sizeof(int),
2440 .proc_handler = &proc_dointvec,
2443 .ctl_name = NET_IPV6_ROUTE_GC_MIN_INTERVAL,
2444 .procname = "gc_min_interval",
2445 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
2446 .maxlen = sizeof(int),
2448 .proc_handler = &proc_dointvec_jiffies,
2449 .strategy = &sysctl_jiffies,
2452 .ctl_name = NET_IPV6_ROUTE_GC_TIMEOUT,
2453 .procname = "gc_timeout",
2454 .data = &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
2455 .maxlen = sizeof(int),
2457 .proc_handler = &proc_dointvec_jiffies,
2458 .strategy = &sysctl_jiffies,
2461 .ctl_name = NET_IPV6_ROUTE_GC_INTERVAL,
2462 .procname = "gc_interval",
2463 .data = &init_net.ipv6.sysctl.ip6_rt_gc_interval,
2464 .maxlen = sizeof(int),
2466 .proc_handler = &proc_dointvec_jiffies,
2467 .strategy = &sysctl_jiffies,
2470 .ctl_name = NET_IPV6_ROUTE_GC_ELASTICITY,
2471 .procname = "gc_elasticity",
2472 .data = &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
2473 .maxlen = sizeof(int),
2475 .proc_handler = &proc_dointvec_jiffies,
2476 .strategy = &sysctl_jiffies,
2479 .ctl_name = NET_IPV6_ROUTE_MTU_EXPIRES,
2480 .procname = "mtu_expires",
2481 .data = &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
2482 .maxlen = sizeof(int),
2484 .proc_handler = &proc_dointvec_jiffies,
2485 .strategy = &sysctl_jiffies,
2488 .ctl_name = NET_IPV6_ROUTE_MIN_ADVMSS,
2489 .procname = "min_adv_mss",
2490 .data = &init_net.ipv6.sysctl.ip6_rt_min_advmss,
2491 .maxlen = sizeof(int),
2493 .proc_handler = &proc_dointvec_jiffies,
2494 .strategy = &sysctl_jiffies,
2497 .ctl_name = NET_IPV6_ROUTE_GC_MIN_INTERVAL_MS,
2498 .procname = "gc_min_interval_ms",
2499 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
2500 .maxlen = sizeof(int),
2502 .proc_handler = &proc_dointvec_ms_jiffies,
2503 .strategy = &sysctl_ms_jiffies,
2508 struct ctl_table *ipv6_route_sysctl_init(struct net *net)
2510 struct ctl_table *table;
2512 table = kmemdup(ipv6_route_table_template,
2513 sizeof(ipv6_route_table_template),
2517 table[0].data = &net->ipv6.sysctl.flush_delay;
2518 /* table[1].data will be handled when we have
2519 routes per namespace */
2520 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
2521 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2522 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
2523 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
2524 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
2525 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
2526 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
2533 static int ip6_route_net_init(struct net *net)
2535 #ifdef CONFIG_PROC_FS
2536 proc_net_fops_create(net, "ipv6_route", 0, &ipv6_route_proc_fops);
2537 proc_net_fops_create(net, "rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2542 static void ip6_route_net_exit(struct net *net)
2544 #ifdef CONFIG_PROC_FS
2545 proc_net_remove(net, "ipv6_route");
2546 proc_net_remove(net, "rt6_stats");
2548 rt6_ifdown(net, NULL);
2551 static struct pernet_operations ip6_route_net_ops = {
2552 .init = ip6_route_net_init,
2553 .exit = ip6_route_net_exit,
2556 int __init ip6_route_init(void)
2560 ip6_dst_ops.kmem_cachep =
2561 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
2562 SLAB_HWCACHE_ALIGN, NULL);
2563 if (!ip6_dst_ops.kmem_cachep)
2566 ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops.kmem_cachep;
2570 goto out_kmem_cache;
2576 ret = fib6_rules_init();
2581 if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL) ||
2582 __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL) ||
2583 __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL))
2584 goto fib6_rules_init;
2586 ret = register_pernet_subsys(&ip6_route_net_ops);
2588 goto fib6_rules_init;
2593 fib6_rules_cleanup();
2599 kmem_cache_destroy(ip6_dst_ops.kmem_cachep);
2603 void ip6_route_cleanup(void)
2605 unregister_pernet_subsys(&ip6_route_net_ops);
2606 fib6_rules_cleanup();
2609 kmem_cache_destroy(ip6_dst_ops.kmem_cachep);