]> bbs.cooldavid.org Git - net-next-2.6.git/blame_incremental - net/ipv6/route.c
cpmac: bump version to 0.5.2
[net-next-2.6.git] / net / ipv6 / route.c
... / ...
CommitLineData
1/*
2 * Linux INET6 implementation
3 * FIB front-end.
4 *
5 * Authors:
6 * Pedro Roque <roque@di.fc.ul.pt>
7 *
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
12 */
13
14/* Changes:
15 *
16 * YOSHIFUJI Hideaki @USAGI
17 * reworked default router selection.
18 * - respect outgoing interface
19 * - select from (probably) reachable routers (i.e.
20 * routers in REACHABLE, STALE, DELAY or PROBE states).
21 * - always select the same router if it is (probably)
22 * reachable. otherwise, round-robin the list.
23 * Ville Nuorvala
24 * Fixed routing subtrees.
25 */
26
27#include <linux/capability.h>
28#include <linux/errno.h>
29#include <linux/types.h>
30#include <linux/times.h>
31#include <linux/socket.h>
32#include <linux/sockios.h>
33#include <linux/net.h>
34#include <linux/route.h>
35#include <linux/netdevice.h>
36#include <linux/in6.h>
37#include <linux/mroute6.h>
38#include <linux/init.h>
39#include <linux/if_arp.h>
40#include <linux/proc_fs.h>
41#include <linux/seq_file.h>
42#include <linux/nsproxy.h>
43#include <net/net_namespace.h>
44#include <net/snmp.h>
45#include <net/ipv6.h>
46#include <net/ip6_fib.h>
47#include <net/ip6_route.h>
48#include <net/ndisc.h>
49#include <net/addrconf.h>
50#include <net/tcp.h>
51#include <linux/rtnetlink.h>
52#include <net/dst.h>
53#include <net/xfrm.h>
54#include <net/netevent.h>
55#include <net/netlink.h>
56
57#include <asm/uaccess.h>
58
59#ifdef CONFIG_SYSCTL
60#include <linux/sysctl.h>
61#endif
62
63/* Set to 3 to get tracing. */
64#define RT6_DEBUG 2
65
66#if RT6_DEBUG >= 3
67#define RDBG(x) printk x
68#define RT6_TRACE(x...) printk(KERN_DEBUG x)
69#else
70#define RDBG(x)
71#define RT6_TRACE(x...) do { ; } while (0)
72#endif
73
74#define CLONE_OFFLINK_ROUTE 0
75
76static struct rt6_info * ip6_rt_copy(struct rt6_info *ort);
77static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
78static struct dst_entry *ip6_negative_advice(struct dst_entry *);
79static void ip6_dst_destroy(struct dst_entry *);
80static void ip6_dst_ifdown(struct dst_entry *,
81 struct net_device *dev, int how);
82static int ip6_dst_gc(struct dst_ops *ops);
83
84static int ip6_pkt_discard(struct sk_buff *skb);
85static int ip6_pkt_discard_out(struct sk_buff *skb);
86static void ip6_link_failure(struct sk_buff *skb);
87static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
88
89#ifdef CONFIG_IPV6_ROUTE_INFO
90static struct rt6_info *rt6_add_route_info(struct net *net,
91 struct in6_addr *prefix, int prefixlen,
92 struct in6_addr *gwaddr, int ifindex,
93 unsigned pref);
94static struct rt6_info *rt6_get_route_info(struct net *net,
95 struct in6_addr *prefix, int prefixlen,
96 struct in6_addr *gwaddr, int ifindex);
97#endif
98
99static struct dst_ops ip6_dst_ops_template = {
100 .family = AF_INET6,
101 .protocol = cpu_to_be16(ETH_P_IPV6),
102 .gc = ip6_dst_gc,
103 .gc_thresh = 1024,
104 .check = ip6_dst_check,
105 .destroy = ip6_dst_destroy,
106 .ifdown = ip6_dst_ifdown,
107 .negative_advice = ip6_negative_advice,
108 .link_failure = ip6_link_failure,
109 .update_pmtu = ip6_rt_update_pmtu,
110 .local_out = __ip6_local_out,
111 .entries = ATOMIC_INIT(0),
112};
113
114static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu)
115{
116}
117
118static struct dst_ops ip6_dst_blackhole_ops = {
119 .family = AF_INET6,
120 .protocol = cpu_to_be16(ETH_P_IPV6),
121 .destroy = ip6_dst_destroy,
122 .check = ip6_dst_check,
123 .update_pmtu = ip6_rt_blackhole_update_pmtu,
124 .entries = ATOMIC_INIT(0),
125};
126
127static struct rt6_info ip6_null_entry_template = {
128 .u = {
129 .dst = {
130 .__refcnt = ATOMIC_INIT(1),
131 .__use = 1,
132 .obsolete = -1,
133 .error = -ENETUNREACH,
134 .metrics = { [RTAX_HOPLIMIT - 1] = 255, },
135 .input = ip6_pkt_discard,
136 .output = ip6_pkt_discard_out,
137 }
138 },
139 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
140 .rt6i_protocol = RTPROT_KERNEL,
141 .rt6i_metric = ~(u32) 0,
142 .rt6i_ref = ATOMIC_INIT(1),
143};
144
145#ifdef CONFIG_IPV6_MULTIPLE_TABLES
146
147static int ip6_pkt_prohibit(struct sk_buff *skb);
148static int ip6_pkt_prohibit_out(struct sk_buff *skb);
149
150static struct rt6_info ip6_prohibit_entry_template = {
151 .u = {
152 .dst = {
153 .__refcnt = ATOMIC_INIT(1),
154 .__use = 1,
155 .obsolete = -1,
156 .error = -EACCES,
157 .metrics = { [RTAX_HOPLIMIT - 1] = 255, },
158 .input = ip6_pkt_prohibit,
159 .output = ip6_pkt_prohibit_out,
160 }
161 },
162 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
163 .rt6i_protocol = RTPROT_KERNEL,
164 .rt6i_metric = ~(u32) 0,
165 .rt6i_ref = ATOMIC_INIT(1),
166};
167
168static struct rt6_info ip6_blk_hole_entry_template = {
169 .u = {
170 .dst = {
171 .__refcnt = ATOMIC_INIT(1),
172 .__use = 1,
173 .obsolete = -1,
174 .error = -EINVAL,
175 .metrics = { [RTAX_HOPLIMIT - 1] = 255, },
176 .input = dst_discard,
177 .output = dst_discard,
178 }
179 },
180 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
181 .rt6i_protocol = RTPROT_KERNEL,
182 .rt6i_metric = ~(u32) 0,
183 .rt6i_ref = ATOMIC_INIT(1),
184};
185
186#endif
187
188/* allocate dst with ip6_dst_ops */
189static inline struct rt6_info *ip6_dst_alloc(struct dst_ops *ops)
190{
191 return (struct rt6_info *)dst_alloc(ops);
192}
193
194static void ip6_dst_destroy(struct dst_entry *dst)
195{
196 struct rt6_info *rt = (struct rt6_info *)dst;
197 struct inet6_dev *idev = rt->rt6i_idev;
198
199 if (idev != NULL) {
200 rt->rt6i_idev = NULL;
201 in6_dev_put(idev);
202 }
203}
204
205static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
206 int how)
207{
208 struct rt6_info *rt = (struct rt6_info *)dst;
209 struct inet6_dev *idev = rt->rt6i_idev;
210 struct net_device *loopback_dev =
211 dev_net(dev)->loopback_dev;
212
213 if (dev != loopback_dev && idev != NULL && idev->dev == dev) {
214 struct inet6_dev *loopback_idev =
215 in6_dev_get(loopback_dev);
216 if (loopback_idev != NULL) {
217 rt->rt6i_idev = loopback_idev;
218 in6_dev_put(idev);
219 }
220 }
221}
222
223static __inline__ int rt6_check_expired(const struct rt6_info *rt)
224{
225 return (rt->rt6i_flags & RTF_EXPIRES &&
226 time_after(jiffies, rt->rt6i_expires));
227}
228
229static inline int rt6_need_strict(struct in6_addr *daddr)
230{
231 return (ipv6_addr_type(daddr) &
232 (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK));
233}
234
235/*
236 * Route lookup. Any table->tb6_lock is implied.
237 */
238
239static inline struct rt6_info *rt6_device_match(struct net *net,
240 struct rt6_info *rt,
241 struct in6_addr *saddr,
242 int oif,
243 int flags)
244{
245 struct rt6_info *local = NULL;
246 struct rt6_info *sprt;
247
248 if (!oif && ipv6_addr_any(saddr))
249 goto out;
250
251 for (sprt = rt; sprt; sprt = sprt->u.dst.rt6_next) {
252 struct net_device *dev = sprt->rt6i_dev;
253
254 if (oif) {
255 if (dev->ifindex == oif)
256 return sprt;
257 if (dev->flags & IFF_LOOPBACK) {
258 if (sprt->rt6i_idev == NULL ||
259 sprt->rt6i_idev->dev->ifindex != oif) {
260 if (flags & RT6_LOOKUP_F_IFACE && oif)
261 continue;
262 if (local && (!oif ||
263 local->rt6i_idev->dev->ifindex == oif))
264 continue;
265 }
266 local = sprt;
267 }
268 } else {
269 if (ipv6_chk_addr(net, saddr, dev,
270 flags & RT6_LOOKUP_F_IFACE))
271 return sprt;
272 }
273 }
274
275 if (oif) {
276 if (local)
277 return local;
278
279 if (flags & RT6_LOOKUP_F_IFACE)
280 return net->ipv6.ip6_null_entry;
281 }
282out:
283 return rt;
284}
285
286#ifdef CONFIG_IPV6_ROUTER_PREF
287static void rt6_probe(struct rt6_info *rt)
288{
289 struct neighbour *neigh = rt ? rt->rt6i_nexthop : NULL;
290 /*
291 * Okay, this does not seem to be appropriate
292 * for now, however, we need to check if it
293 * is really so; aka Router Reachability Probing.
294 *
295 * Router Reachability Probe MUST be rate-limited
296 * to no more than one per minute.
297 */
298 if (!neigh || (neigh->nud_state & NUD_VALID))
299 return;
300 read_lock_bh(&neigh->lock);
301 if (!(neigh->nud_state & NUD_VALID) &&
302 time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
303 struct in6_addr mcaddr;
304 struct in6_addr *target;
305
306 neigh->updated = jiffies;
307 read_unlock_bh(&neigh->lock);
308
309 target = (struct in6_addr *)&neigh->primary_key;
310 addrconf_addr_solict_mult(target, &mcaddr);
311 ndisc_send_ns(rt->rt6i_dev, NULL, target, &mcaddr, NULL);
312 } else
313 read_unlock_bh(&neigh->lock);
314}
315#else
316static inline void rt6_probe(struct rt6_info *rt)
317{
318 return;
319}
320#endif
321
322/*
323 * Default Router Selection (RFC 2461 6.3.6)
324 */
325static inline int rt6_check_dev(struct rt6_info *rt, int oif)
326{
327 struct net_device *dev = rt->rt6i_dev;
328 if (!oif || dev->ifindex == oif)
329 return 2;
330 if ((dev->flags & IFF_LOOPBACK) &&
331 rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
332 return 1;
333 return 0;
334}
335
336static inline int rt6_check_neigh(struct rt6_info *rt)
337{
338 struct neighbour *neigh = rt->rt6i_nexthop;
339 int m;
340 if (rt->rt6i_flags & RTF_NONEXTHOP ||
341 !(rt->rt6i_flags & RTF_GATEWAY))
342 m = 1;
343 else if (neigh) {
344 read_lock_bh(&neigh->lock);
345 if (neigh->nud_state & NUD_VALID)
346 m = 2;
347#ifdef CONFIG_IPV6_ROUTER_PREF
348 else if (neigh->nud_state & NUD_FAILED)
349 m = 0;
350#endif
351 else
352 m = 1;
353 read_unlock_bh(&neigh->lock);
354 } else
355 m = 0;
356 return m;
357}
358
359static int rt6_score_route(struct rt6_info *rt, int oif,
360 int strict)
361{
362 int m, n;
363
364 m = rt6_check_dev(rt, oif);
365 if (!m && (strict & RT6_LOOKUP_F_IFACE))
366 return -1;
367#ifdef CONFIG_IPV6_ROUTER_PREF
368 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
369#endif
370 n = rt6_check_neigh(rt);
371 if (!n && (strict & RT6_LOOKUP_F_REACHABLE))
372 return -1;
373 return m;
374}
375
376static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
377 int *mpri, struct rt6_info *match)
378{
379 int m;
380
381 if (rt6_check_expired(rt))
382 goto out;
383
384 m = rt6_score_route(rt, oif, strict);
385 if (m < 0)
386 goto out;
387
388 if (m > *mpri) {
389 if (strict & RT6_LOOKUP_F_REACHABLE)
390 rt6_probe(match);
391 *mpri = m;
392 match = rt;
393 } else if (strict & RT6_LOOKUP_F_REACHABLE) {
394 rt6_probe(rt);
395 }
396
397out:
398 return match;
399}
400
401static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
402 struct rt6_info *rr_head,
403 u32 metric, int oif, int strict)
404{
405 struct rt6_info *rt, *match;
406 int mpri = -1;
407
408 match = NULL;
409 for (rt = rr_head; rt && rt->rt6i_metric == metric;
410 rt = rt->u.dst.rt6_next)
411 match = find_match(rt, oif, strict, &mpri, match);
412 for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric;
413 rt = rt->u.dst.rt6_next)
414 match = find_match(rt, oif, strict, &mpri, match);
415
416 return match;
417}
418
419static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
420{
421 struct rt6_info *match, *rt0;
422 struct net *net;
423
424 RT6_TRACE("%s(fn->leaf=%p, oif=%d)\n",
425 __func__, fn->leaf, oif);
426
427 rt0 = fn->rr_ptr;
428 if (!rt0)
429 fn->rr_ptr = rt0 = fn->leaf;
430
431 match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict);
432
433 if (!match &&
434 (strict & RT6_LOOKUP_F_REACHABLE)) {
435 struct rt6_info *next = rt0->u.dst.rt6_next;
436
437 /* no entries matched; do round-robin */
438 if (!next || next->rt6i_metric != rt0->rt6i_metric)
439 next = fn->leaf;
440
441 if (next != rt0)
442 fn->rr_ptr = next;
443 }
444
445 RT6_TRACE("%s() => %p\n",
446 __func__, match);
447
448 net = dev_net(rt0->rt6i_dev);
449 return (match ? match : net->ipv6.ip6_null_entry);
450}
451
452#ifdef CONFIG_IPV6_ROUTE_INFO
453int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
454 struct in6_addr *gwaddr)
455{
456 struct net *net = dev_net(dev);
457 struct route_info *rinfo = (struct route_info *) opt;
458 struct in6_addr prefix_buf, *prefix;
459 unsigned int pref;
460 unsigned long lifetime;
461 struct rt6_info *rt;
462
463 if (len < sizeof(struct route_info)) {
464 return -EINVAL;
465 }
466
467 /* Sanity check for prefix_len and length */
468 if (rinfo->length > 3) {
469 return -EINVAL;
470 } else if (rinfo->prefix_len > 128) {
471 return -EINVAL;
472 } else if (rinfo->prefix_len > 64) {
473 if (rinfo->length < 2) {
474 return -EINVAL;
475 }
476 } else if (rinfo->prefix_len > 0) {
477 if (rinfo->length < 1) {
478 return -EINVAL;
479 }
480 }
481
482 pref = rinfo->route_pref;
483 if (pref == ICMPV6_ROUTER_PREF_INVALID)
484 return -EINVAL;
485
486 lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
487
488 if (rinfo->length == 3)
489 prefix = (struct in6_addr *)rinfo->prefix;
490 else {
491 /* this function is safe */
492 ipv6_addr_prefix(&prefix_buf,
493 (struct in6_addr *)rinfo->prefix,
494 rinfo->prefix_len);
495 prefix = &prefix_buf;
496 }
497
498 rt = rt6_get_route_info(net, prefix, rinfo->prefix_len, gwaddr,
499 dev->ifindex);
500
501 if (rt && !lifetime) {
502 ip6_del_rt(rt);
503 rt = NULL;
504 }
505
506 if (!rt && lifetime)
507 rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
508 pref);
509 else if (rt)
510 rt->rt6i_flags = RTF_ROUTEINFO |
511 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
512
513 if (rt) {
514 if (!addrconf_finite_timeout(lifetime)) {
515 rt->rt6i_flags &= ~RTF_EXPIRES;
516 } else {
517 rt->rt6i_expires = jiffies + HZ * lifetime;
518 rt->rt6i_flags |= RTF_EXPIRES;
519 }
520 dst_release(&rt->u.dst);
521 }
522 return 0;
523}
524#endif
525
526#define BACKTRACK(__net, saddr) \
527do { \
528 if (rt == __net->ipv6.ip6_null_entry) { \
529 struct fib6_node *pn; \
530 while (1) { \
531 if (fn->fn_flags & RTN_TL_ROOT) \
532 goto out; \
533 pn = fn->parent; \
534 if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
535 fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \
536 else \
537 fn = pn; \
538 if (fn->fn_flags & RTN_RTINFO) \
539 goto restart; \
540 } \
541 } \
542} while(0)
543
544static struct rt6_info *ip6_pol_route_lookup(struct net *net,
545 struct fib6_table *table,
546 struct flowi *fl, int flags)
547{
548 struct fib6_node *fn;
549 struct rt6_info *rt;
550
551 read_lock_bh(&table->tb6_lock);
552 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
553restart:
554 rt = fn->leaf;
555 rt = rt6_device_match(net, rt, &fl->fl6_src, fl->oif, flags);
556 BACKTRACK(net, &fl->fl6_src);
557out:
558 dst_use(&rt->u.dst, jiffies);
559 read_unlock_bh(&table->tb6_lock);
560 return rt;
561
562}
563
564struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
565 const struct in6_addr *saddr, int oif, int strict)
566{
567 struct flowi fl = {
568 .oif = oif,
569 .nl_u = {
570 .ip6_u = {
571 .daddr = *daddr,
572 },
573 },
574 };
575 struct dst_entry *dst;
576 int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
577
578 if (saddr) {
579 memcpy(&fl.fl6_src, saddr, sizeof(*saddr));
580 flags |= RT6_LOOKUP_F_HAS_SADDR;
581 }
582
583 dst = fib6_rule_lookup(net, &fl, flags, ip6_pol_route_lookup);
584 if (dst->error == 0)
585 return (struct rt6_info *) dst;
586
587 dst_release(dst);
588
589 return NULL;
590}
591
592EXPORT_SYMBOL(rt6_lookup);
593
594/* ip6_ins_rt is called with FREE table->tb6_lock.
595 It takes new route entry, the addition fails by any reason the
596 route is freed. In any case, if caller does not hold it, it may
597 be destroyed.
598 */
599
600static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
601{
602 int err;
603 struct fib6_table *table;
604
605 table = rt->rt6i_table;
606 write_lock_bh(&table->tb6_lock);
607 err = fib6_add(&table->tb6_root, rt, info);
608 write_unlock_bh(&table->tb6_lock);
609
610 return err;
611}
612
613int ip6_ins_rt(struct rt6_info *rt)
614{
615 struct nl_info info = {
616 .nl_net = dev_net(rt->rt6i_dev),
617 };
618 return __ip6_ins_rt(rt, &info);
619}
620
621static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, struct in6_addr *daddr,
622 struct in6_addr *saddr)
623{
624 struct rt6_info *rt;
625
626 /*
627 * Clone the route.
628 */
629
630 rt = ip6_rt_copy(ort);
631
632 if (rt) {
633 struct neighbour *neigh;
634 int attempts = !in_softirq();
635
636 if (!(rt->rt6i_flags&RTF_GATEWAY)) {
637 if (rt->rt6i_dst.plen != 128 &&
638 ipv6_addr_equal(&rt->rt6i_dst.addr, daddr))
639 rt->rt6i_flags |= RTF_ANYCAST;
640 ipv6_addr_copy(&rt->rt6i_gateway, daddr);
641 }
642
643 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
644 rt->rt6i_dst.plen = 128;
645 rt->rt6i_flags |= RTF_CACHE;
646 rt->u.dst.flags |= DST_HOST;
647
648#ifdef CONFIG_IPV6_SUBTREES
649 if (rt->rt6i_src.plen && saddr) {
650 ipv6_addr_copy(&rt->rt6i_src.addr, saddr);
651 rt->rt6i_src.plen = 128;
652 }
653#endif
654
655 retry:
656 neigh = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
657 if (IS_ERR(neigh)) {
658 struct net *net = dev_net(rt->rt6i_dev);
659 int saved_rt_min_interval =
660 net->ipv6.sysctl.ip6_rt_gc_min_interval;
661 int saved_rt_elasticity =
662 net->ipv6.sysctl.ip6_rt_gc_elasticity;
663
664 if (attempts-- > 0) {
665 net->ipv6.sysctl.ip6_rt_gc_elasticity = 1;
666 net->ipv6.sysctl.ip6_rt_gc_min_interval = 0;
667
668 ip6_dst_gc(&net->ipv6.ip6_dst_ops);
669
670 net->ipv6.sysctl.ip6_rt_gc_elasticity =
671 saved_rt_elasticity;
672 net->ipv6.sysctl.ip6_rt_gc_min_interval =
673 saved_rt_min_interval;
674 goto retry;
675 }
676
677 if (net_ratelimit())
678 printk(KERN_WARNING
679 "Neighbour table overflow.\n");
680 dst_free(&rt->u.dst);
681 return NULL;
682 }
683 rt->rt6i_nexthop = neigh;
684
685 }
686
687 return rt;
688}
689
690static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort, struct in6_addr *daddr)
691{
692 struct rt6_info *rt = ip6_rt_copy(ort);
693 if (rt) {
694 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
695 rt->rt6i_dst.plen = 128;
696 rt->rt6i_flags |= RTF_CACHE;
697 rt->u.dst.flags |= DST_HOST;
698 rt->rt6i_nexthop = neigh_clone(ort->rt6i_nexthop);
699 }
700 return rt;
701}
702
703static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
704 struct flowi *fl, int flags)
705{
706 struct fib6_node *fn;
707 struct rt6_info *rt, *nrt;
708 int strict = 0;
709 int attempts = 3;
710 int err;
711 int reachable = net->ipv6.devconf_all->forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
712
713 strict |= flags & RT6_LOOKUP_F_IFACE;
714
715relookup:
716 read_lock_bh(&table->tb6_lock);
717
718restart_2:
719 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
720
721restart:
722 rt = rt6_select(fn, oif, strict | reachable);
723
724 BACKTRACK(net, &fl->fl6_src);
725 if (rt == net->ipv6.ip6_null_entry ||
726 rt->rt6i_flags & RTF_CACHE)
727 goto out;
728
729 dst_hold(&rt->u.dst);
730 read_unlock_bh(&table->tb6_lock);
731
732 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
733 nrt = rt6_alloc_cow(rt, &fl->fl6_dst, &fl->fl6_src);
734 else {
735#if CLONE_OFFLINK_ROUTE
736 nrt = rt6_alloc_clone(rt, &fl->fl6_dst);
737#else
738 goto out2;
739#endif
740 }
741
742 dst_release(&rt->u.dst);
743 rt = nrt ? : net->ipv6.ip6_null_entry;
744
745 dst_hold(&rt->u.dst);
746 if (nrt) {
747 err = ip6_ins_rt(nrt);
748 if (!err)
749 goto out2;
750 }
751
752 if (--attempts <= 0)
753 goto out2;
754
755 /*
756 * Race condition! In the gap, when table->tb6_lock was
757 * released someone could insert this route. Relookup.
758 */
759 dst_release(&rt->u.dst);
760 goto relookup;
761
762out:
763 if (reachable) {
764 reachable = 0;
765 goto restart_2;
766 }
767 dst_hold(&rt->u.dst);
768 read_unlock_bh(&table->tb6_lock);
769out2:
770 rt->u.dst.lastuse = jiffies;
771 rt->u.dst.__use++;
772
773 return rt;
774}
775
776static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
777 struct flowi *fl, int flags)
778{
779 return ip6_pol_route(net, table, fl->iif, fl, flags);
780}
781
782void ip6_route_input(struct sk_buff *skb)
783{
784 struct ipv6hdr *iph = ipv6_hdr(skb);
785 struct net *net = dev_net(skb->dev);
786 int flags = RT6_LOOKUP_F_HAS_SADDR;
787 struct flowi fl = {
788 .iif = skb->dev->ifindex,
789 .nl_u = {
790 .ip6_u = {
791 .daddr = iph->daddr,
792 .saddr = iph->saddr,
793 .flowlabel = (* (__be32 *) iph)&IPV6_FLOWINFO_MASK,
794 },
795 },
796 .mark = skb->mark,
797 .proto = iph->nexthdr,
798 };
799
800 if (rt6_need_strict(&iph->daddr) && skb->dev->type != ARPHRD_PIMREG)
801 flags |= RT6_LOOKUP_F_IFACE;
802
803 skb_dst_set(skb, fib6_rule_lookup(net, &fl, flags, ip6_pol_route_input));
804}
805
806static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
807 struct flowi *fl, int flags)
808{
809 return ip6_pol_route(net, table, fl->oif, fl, flags);
810}
811
812struct dst_entry * ip6_route_output(struct net *net, struct sock *sk,
813 struct flowi *fl)
814{
815 int flags = 0;
816
817 if (rt6_need_strict(&fl->fl6_dst))
818 flags |= RT6_LOOKUP_F_IFACE;
819
820 if (!ipv6_addr_any(&fl->fl6_src))
821 flags |= RT6_LOOKUP_F_HAS_SADDR;
822 else if (sk) {
823 unsigned int prefs = inet6_sk(sk)->srcprefs;
824 if (prefs & IPV6_PREFER_SRC_TMP)
825 flags |= RT6_LOOKUP_F_SRCPREF_TMP;
826 if (prefs & IPV6_PREFER_SRC_PUBLIC)
827 flags |= RT6_LOOKUP_F_SRCPREF_PUBLIC;
828 if (prefs & IPV6_PREFER_SRC_COA)
829 flags |= RT6_LOOKUP_F_SRCPREF_COA;
830 }
831
832 return fib6_rule_lookup(net, fl, flags, ip6_pol_route_output);
833}
834
835EXPORT_SYMBOL(ip6_route_output);
836
837int ip6_dst_blackhole(struct sock *sk, struct dst_entry **dstp, struct flowi *fl)
838{
839 struct rt6_info *ort = (struct rt6_info *) *dstp;
840 struct rt6_info *rt = (struct rt6_info *)
841 dst_alloc(&ip6_dst_blackhole_ops);
842 struct dst_entry *new = NULL;
843
844 if (rt) {
845 new = &rt->u.dst;
846
847 atomic_set(&new->__refcnt, 1);
848 new->__use = 1;
849 new->input = dst_discard;
850 new->output = dst_discard;
851
852 memcpy(new->metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32));
853 new->dev = ort->u.dst.dev;
854 if (new->dev)
855 dev_hold(new->dev);
856 rt->rt6i_idev = ort->rt6i_idev;
857 if (rt->rt6i_idev)
858 in6_dev_hold(rt->rt6i_idev);
859 rt->rt6i_expires = 0;
860
861 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
862 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
863 rt->rt6i_metric = 0;
864
865 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
866#ifdef CONFIG_IPV6_SUBTREES
867 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
868#endif
869
870 dst_free(new);
871 }
872
873 dst_release(*dstp);
874 *dstp = new;
875 return (new ? 0 : -ENOMEM);
876}
877EXPORT_SYMBOL_GPL(ip6_dst_blackhole);
878
879/*
880 * Destination cache support functions
881 */
882
883static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
884{
885 struct rt6_info *rt;
886
887 rt = (struct rt6_info *) dst;
888
889 if (rt && rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie))
890 return dst;
891
892 return NULL;
893}
894
895static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
896{
897 struct rt6_info *rt = (struct rt6_info *) dst;
898
899 if (rt) {
900 if (rt->rt6i_flags & RTF_CACHE)
901 ip6_del_rt(rt);
902 else
903 dst_release(dst);
904 }
905 return NULL;
906}
907
908static void ip6_link_failure(struct sk_buff *skb)
909{
910 struct rt6_info *rt;
911
912 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
913
914 rt = (struct rt6_info *) skb_dst(skb);
915 if (rt) {
916 if (rt->rt6i_flags&RTF_CACHE) {
917 dst_set_expires(&rt->u.dst, 0);
918 rt->rt6i_flags |= RTF_EXPIRES;
919 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
920 rt->rt6i_node->fn_sernum = -1;
921 }
922}
923
924static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
925{
926 struct rt6_info *rt6 = (struct rt6_info*)dst;
927
928 if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
929 rt6->rt6i_flags |= RTF_MODIFIED;
930 if (mtu < IPV6_MIN_MTU) {
931 mtu = IPV6_MIN_MTU;
932 dst->metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
933 }
934 dst->metrics[RTAX_MTU-1] = mtu;
935 call_netevent_notifiers(NETEVENT_PMTU_UPDATE, dst);
936 }
937}
938
939static int ipv6_get_mtu(struct net_device *dev);
940
941static inline unsigned int ipv6_advmss(struct net *net, unsigned int mtu)
942{
943 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
944
945 if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
946 mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
947
948 /*
949 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
950 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
951 * IPV6_MAXPLEN is also valid and means: "any MSS,
952 * rely only on pmtu discovery"
953 */
954 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
955 mtu = IPV6_MAXPLEN;
956 return mtu;
957}
958
959static struct dst_entry *icmp6_dst_gc_list;
960static DEFINE_SPINLOCK(icmp6_dst_lock);
961
962struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
963 struct neighbour *neigh,
964 const struct in6_addr *addr)
965{
966 struct rt6_info *rt;
967 struct inet6_dev *idev = in6_dev_get(dev);
968 struct net *net = dev_net(dev);
969
970 if (unlikely(idev == NULL))
971 return NULL;
972
973 rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops);
974 if (unlikely(rt == NULL)) {
975 in6_dev_put(idev);
976 goto out;
977 }
978
979 dev_hold(dev);
980 if (neigh)
981 neigh_hold(neigh);
982 else {
983 neigh = ndisc_get_neigh(dev, addr);
984 if (IS_ERR(neigh))
985 neigh = NULL;
986 }
987
988 rt->rt6i_dev = dev;
989 rt->rt6i_idev = idev;
990 rt->rt6i_nexthop = neigh;
991 atomic_set(&rt->u.dst.__refcnt, 1);
992 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = 255;
993 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
994 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, dst_mtu(&rt->u.dst));
995 rt->u.dst.output = ip6_output;
996
997#if 0 /* there's no chance to use these for ndisc */
998 rt->u.dst.flags = ipv6_addr_type(addr) & IPV6_ADDR_UNICAST
999 ? DST_HOST
1000 : 0;
1001 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
1002 rt->rt6i_dst.plen = 128;
1003#endif
1004
1005 spin_lock_bh(&icmp6_dst_lock);
1006 rt->u.dst.next = icmp6_dst_gc_list;
1007 icmp6_dst_gc_list = &rt->u.dst;
1008 spin_unlock_bh(&icmp6_dst_lock);
1009
1010 fib6_force_start_gc(net);
1011
1012out:
1013 return &rt->u.dst;
1014}
1015
1016int icmp6_dst_gc(void)
1017{
1018 struct dst_entry *dst, *next, **pprev;
1019 int more = 0;
1020
1021 next = NULL;
1022
1023 spin_lock_bh(&icmp6_dst_lock);
1024 pprev = &icmp6_dst_gc_list;
1025
1026 while ((dst = *pprev) != NULL) {
1027 if (!atomic_read(&dst->__refcnt)) {
1028 *pprev = dst->next;
1029 dst_free(dst);
1030 } else {
1031 pprev = &dst->next;
1032 ++more;
1033 }
1034 }
1035
1036 spin_unlock_bh(&icmp6_dst_lock);
1037
1038 return more;
1039}
1040
1041static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg),
1042 void *arg)
1043{
1044 struct dst_entry *dst, **pprev;
1045
1046 spin_lock_bh(&icmp6_dst_lock);
1047 pprev = &icmp6_dst_gc_list;
1048 while ((dst = *pprev) != NULL) {
1049 struct rt6_info *rt = (struct rt6_info *) dst;
1050 if (func(rt, arg)) {
1051 *pprev = dst->next;
1052 dst_free(dst);
1053 } else {
1054 pprev = &dst->next;
1055 }
1056 }
1057 spin_unlock_bh(&icmp6_dst_lock);
1058}
1059
1060static int ip6_dst_gc(struct dst_ops *ops)
1061{
1062 unsigned long now = jiffies;
1063 struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
1064 int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
1065 int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
1066 int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1067 int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1068 unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
1069
1070 if (time_after(rt_last_gc + rt_min_interval, now) &&
1071 atomic_read(&ops->entries) <= rt_max_size)
1072 goto out;
1073
1074 net->ipv6.ip6_rt_gc_expire++;
1075 fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net);
1076 net->ipv6.ip6_rt_last_gc = now;
1077 if (atomic_read(&ops->entries) < ops->gc_thresh)
1078 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1079out:
1080 net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
1081 return (atomic_read(&ops->entries) > rt_max_size);
1082}
1083
1084/* Clean host part of a prefix. Not necessary in radix tree,
1085 but results in cleaner routing tables.
1086
1087 Remove it only when all the things will work!
1088 */
1089
1090static int ipv6_get_mtu(struct net_device *dev)
1091{
1092 int mtu = IPV6_MIN_MTU;
1093 struct inet6_dev *idev;
1094
1095 idev = in6_dev_get(dev);
1096 if (idev) {
1097 mtu = idev->cnf.mtu6;
1098 in6_dev_put(idev);
1099 }
1100 return mtu;
1101}
1102
1103int ip6_dst_hoplimit(struct dst_entry *dst)
1104{
1105 int hoplimit = dst_metric(dst, RTAX_HOPLIMIT);
1106 if (hoplimit < 0) {
1107 struct net_device *dev = dst->dev;
1108 struct inet6_dev *idev = in6_dev_get(dev);
1109 if (idev) {
1110 hoplimit = idev->cnf.hop_limit;
1111 in6_dev_put(idev);
1112 } else
1113 hoplimit = dev_net(dev)->ipv6.devconf_all->hop_limit;
1114 }
1115 return hoplimit;
1116}
1117
1118/*
1119 *
1120 */
1121
1122int ip6_route_add(struct fib6_config *cfg)
1123{
1124 int err;
1125 struct net *net = cfg->fc_nlinfo.nl_net;
1126 struct rt6_info *rt = NULL;
1127 struct net_device *dev = NULL;
1128 struct inet6_dev *idev = NULL;
1129 struct fib6_table *table;
1130 int addr_type;
1131
1132 if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1133 return -EINVAL;
1134#ifndef CONFIG_IPV6_SUBTREES
1135 if (cfg->fc_src_len)
1136 return -EINVAL;
1137#endif
1138 if (cfg->fc_ifindex) {
1139 err = -ENODEV;
1140 dev = dev_get_by_index(net, cfg->fc_ifindex);
1141 if (!dev)
1142 goto out;
1143 idev = in6_dev_get(dev);
1144 if (!idev)
1145 goto out;
1146 }
1147
1148 if (cfg->fc_metric == 0)
1149 cfg->fc_metric = IP6_RT_PRIO_USER;
1150
1151 table = fib6_new_table(net, cfg->fc_table);
1152 if (table == NULL) {
1153 err = -ENOBUFS;
1154 goto out;
1155 }
1156
1157 rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops);
1158
1159 if (rt == NULL) {
1160 err = -ENOMEM;
1161 goto out;
1162 }
1163
1164 rt->u.dst.obsolete = -1;
1165 rt->rt6i_expires = (cfg->fc_flags & RTF_EXPIRES) ?
1166 jiffies + clock_t_to_jiffies(cfg->fc_expires) :
1167 0;
1168
1169 if (cfg->fc_protocol == RTPROT_UNSPEC)
1170 cfg->fc_protocol = RTPROT_BOOT;
1171 rt->rt6i_protocol = cfg->fc_protocol;
1172
1173 addr_type = ipv6_addr_type(&cfg->fc_dst);
1174
1175 if (addr_type & IPV6_ADDR_MULTICAST)
1176 rt->u.dst.input = ip6_mc_input;
1177 else
1178 rt->u.dst.input = ip6_forward;
1179
1180 rt->u.dst.output = ip6_output;
1181
1182 ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1183 rt->rt6i_dst.plen = cfg->fc_dst_len;
1184 if (rt->rt6i_dst.plen == 128)
1185 rt->u.dst.flags = DST_HOST;
1186
1187#ifdef CONFIG_IPV6_SUBTREES
1188 ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1189 rt->rt6i_src.plen = cfg->fc_src_len;
1190#endif
1191
1192 rt->rt6i_metric = cfg->fc_metric;
1193
1194 /* We cannot add true routes via loopback here,
1195 they would result in kernel looping; promote them to reject routes
1196 */
1197 if ((cfg->fc_flags & RTF_REJECT) ||
1198 (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK))) {
1199 /* hold loopback dev/idev if we haven't done so. */
1200 if (dev != net->loopback_dev) {
1201 if (dev) {
1202 dev_put(dev);
1203 in6_dev_put(idev);
1204 }
1205 dev = net->loopback_dev;
1206 dev_hold(dev);
1207 idev = in6_dev_get(dev);
1208 if (!idev) {
1209 err = -ENODEV;
1210 goto out;
1211 }
1212 }
1213 rt->u.dst.output = ip6_pkt_discard_out;
1214 rt->u.dst.input = ip6_pkt_discard;
1215 rt->u.dst.error = -ENETUNREACH;
1216 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1217 goto install_route;
1218 }
1219
1220 if (cfg->fc_flags & RTF_GATEWAY) {
1221 struct in6_addr *gw_addr;
1222 int gwa_type;
1223
1224 gw_addr = &cfg->fc_gateway;
1225 ipv6_addr_copy(&rt->rt6i_gateway, gw_addr);
1226 gwa_type = ipv6_addr_type(gw_addr);
1227
1228 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1229 struct rt6_info *grt;
1230
1231 /* IPv6 strictly inhibits using not link-local
1232 addresses as nexthop address.
1233 Otherwise, router will not able to send redirects.
1234 It is very good, but in some (rare!) circumstances
1235 (SIT, PtP, NBMA NOARP links) it is handy to allow
1236 some exceptions. --ANK
1237 */
1238 err = -EINVAL;
1239 if (!(gwa_type&IPV6_ADDR_UNICAST))
1240 goto out;
1241
1242 grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
1243
1244 err = -EHOSTUNREACH;
1245 if (grt == NULL)
1246 goto out;
1247 if (dev) {
1248 if (dev != grt->rt6i_dev) {
1249 dst_release(&grt->u.dst);
1250 goto out;
1251 }
1252 } else {
1253 dev = grt->rt6i_dev;
1254 idev = grt->rt6i_idev;
1255 dev_hold(dev);
1256 in6_dev_hold(grt->rt6i_idev);
1257 }
1258 if (!(grt->rt6i_flags&RTF_GATEWAY))
1259 err = 0;
1260 dst_release(&grt->u.dst);
1261
1262 if (err)
1263 goto out;
1264 }
1265 err = -EINVAL;
1266 if (dev == NULL || (dev->flags&IFF_LOOPBACK))
1267 goto out;
1268 }
1269
1270 err = -ENODEV;
1271 if (dev == NULL)
1272 goto out;
1273
1274 if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) {
1275 rt->rt6i_nexthop = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev);
1276 if (IS_ERR(rt->rt6i_nexthop)) {
1277 err = PTR_ERR(rt->rt6i_nexthop);
1278 rt->rt6i_nexthop = NULL;
1279 goto out;
1280 }
1281 }
1282
1283 rt->rt6i_flags = cfg->fc_flags;
1284
1285install_route:
1286 if (cfg->fc_mx) {
1287 struct nlattr *nla;
1288 int remaining;
1289
1290 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
1291 int type = nla_type(nla);
1292
1293 if (type) {
1294 if (type > RTAX_MAX) {
1295 err = -EINVAL;
1296 goto out;
1297 }
1298
1299 rt->u.dst.metrics[type - 1] = nla_get_u32(nla);
1300 }
1301 }
1302 }
1303
1304 if (dst_metric(&rt->u.dst, RTAX_HOPLIMIT) == 0)
1305 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
1306 if (!dst_mtu(&rt->u.dst))
1307 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(dev);
1308 if (!dst_metric(&rt->u.dst, RTAX_ADVMSS))
1309 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, dst_mtu(&rt->u.dst));
1310 rt->u.dst.dev = dev;
1311 rt->rt6i_idev = idev;
1312 rt->rt6i_table = table;
1313
1314 cfg->fc_nlinfo.nl_net = dev_net(dev);
1315
1316 return __ip6_ins_rt(rt, &cfg->fc_nlinfo);
1317
1318out:
1319 if (dev)
1320 dev_put(dev);
1321 if (idev)
1322 in6_dev_put(idev);
1323 if (rt)
1324 dst_free(&rt->u.dst);
1325 return err;
1326}
1327
1328static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
1329{
1330 int err;
1331 struct fib6_table *table;
1332 struct net *net = dev_net(rt->rt6i_dev);
1333
1334 if (rt == net->ipv6.ip6_null_entry)
1335 return -ENOENT;
1336
1337 table = rt->rt6i_table;
1338 write_lock_bh(&table->tb6_lock);
1339
1340 err = fib6_del(rt, info);
1341 dst_release(&rt->u.dst);
1342
1343 write_unlock_bh(&table->tb6_lock);
1344
1345 return err;
1346}
1347
1348int ip6_del_rt(struct rt6_info *rt)
1349{
1350 struct nl_info info = {
1351 .nl_net = dev_net(rt->rt6i_dev),
1352 };
1353 return __ip6_del_rt(rt, &info);
1354}
1355
1356static int ip6_route_del(struct fib6_config *cfg)
1357{
1358 struct fib6_table *table;
1359 struct fib6_node *fn;
1360 struct rt6_info *rt;
1361 int err = -ESRCH;
1362
1363 table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
1364 if (table == NULL)
1365 return err;
1366
1367 read_lock_bh(&table->tb6_lock);
1368
1369 fn = fib6_locate(&table->tb6_root,
1370 &cfg->fc_dst, cfg->fc_dst_len,
1371 &cfg->fc_src, cfg->fc_src_len);
1372
1373 if (fn) {
1374 for (rt = fn->leaf; rt; rt = rt->u.dst.rt6_next) {
1375 if (cfg->fc_ifindex &&
1376 (rt->rt6i_dev == NULL ||
1377 rt->rt6i_dev->ifindex != cfg->fc_ifindex))
1378 continue;
1379 if (cfg->fc_flags & RTF_GATEWAY &&
1380 !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
1381 continue;
1382 if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
1383 continue;
1384 dst_hold(&rt->u.dst);
1385 read_unlock_bh(&table->tb6_lock);
1386
1387 return __ip6_del_rt(rt, &cfg->fc_nlinfo);
1388 }
1389 }
1390 read_unlock_bh(&table->tb6_lock);
1391
1392 return err;
1393}
1394
1395/*
1396 * Handle redirects
1397 */
1398struct ip6rd_flowi {
1399 struct flowi fl;
1400 struct in6_addr gateway;
1401};
1402
1403static struct rt6_info *__ip6_route_redirect(struct net *net,
1404 struct fib6_table *table,
1405 struct flowi *fl,
1406 int flags)
1407{
1408 struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl;
1409 struct rt6_info *rt;
1410 struct fib6_node *fn;
1411
1412 /*
1413 * Get the "current" route for this destination and
1414 * check if the redirect has come from approriate router.
1415 *
1416 * RFC 2461 specifies that redirects should only be
1417 * accepted if they come from the nexthop to the target.
1418 * Due to the way the routes are chosen, this notion
1419 * is a bit fuzzy and one might need to check all possible
1420 * routes.
1421 */
1422
1423 read_lock_bh(&table->tb6_lock);
1424 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
1425restart:
1426 for (rt = fn->leaf; rt; rt = rt->u.dst.rt6_next) {
1427 /*
1428 * Current route is on-link; redirect is always invalid.
1429 *
1430 * Seems, previous statement is not true. It could
1431 * be node, which looks for us as on-link (f.e. proxy ndisc)
1432 * But then router serving it might decide, that we should
1433 * know truth 8)8) --ANK (980726).
1434 */
1435 if (rt6_check_expired(rt))
1436 continue;
1437 if (!(rt->rt6i_flags & RTF_GATEWAY))
1438 continue;
1439 if (fl->oif != rt->rt6i_dev->ifindex)
1440 continue;
1441 if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
1442 continue;
1443 break;
1444 }
1445
1446 if (!rt)
1447 rt = net->ipv6.ip6_null_entry;
1448 BACKTRACK(net, &fl->fl6_src);
1449out:
1450 dst_hold(&rt->u.dst);
1451
1452 read_unlock_bh(&table->tb6_lock);
1453
1454 return rt;
1455};
1456
1457static struct rt6_info *ip6_route_redirect(struct in6_addr *dest,
1458 struct in6_addr *src,
1459 struct in6_addr *gateway,
1460 struct net_device *dev)
1461{
1462 int flags = RT6_LOOKUP_F_HAS_SADDR;
1463 struct net *net = dev_net(dev);
1464 struct ip6rd_flowi rdfl = {
1465 .fl = {
1466 .oif = dev->ifindex,
1467 .nl_u = {
1468 .ip6_u = {
1469 .daddr = *dest,
1470 .saddr = *src,
1471 },
1472 },
1473 },
1474 };
1475
1476 ipv6_addr_copy(&rdfl.gateway, gateway);
1477
1478 if (rt6_need_strict(dest))
1479 flags |= RT6_LOOKUP_F_IFACE;
1480
1481 return (struct rt6_info *)fib6_rule_lookup(net, (struct flowi *)&rdfl,
1482 flags, __ip6_route_redirect);
1483}
1484
1485void rt6_redirect(struct in6_addr *dest, struct in6_addr *src,
1486 struct in6_addr *saddr,
1487 struct neighbour *neigh, u8 *lladdr, int on_link)
1488{
1489 struct rt6_info *rt, *nrt = NULL;
1490 struct netevent_redirect netevent;
1491 struct net *net = dev_net(neigh->dev);
1492
1493 rt = ip6_route_redirect(dest, src, saddr, neigh->dev);
1494
1495 if (rt == net->ipv6.ip6_null_entry) {
1496 if (net_ratelimit())
1497 printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
1498 "for redirect target\n");
1499 goto out;
1500 }
1501
1502 /*
1503 * We have finally decided to accept it.
1504 */
1505
1506 neigh_update(neigh, lladdr, NUD_STALE,
1507 NEIGH_UPDATE_F_WEAK_OVERRIDE|
1508 NEIGH_UPDATE_F_OVERRIDE|
1509 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1510 NEIGH_UPDATE_F_ISROUTER))
1511 );
1512
1513 /*
1514 * Redirect received -> path was valid.
1515 * Look, redirects are sent only in response to data packets,
1516 * so that this nexthop apparently is reachable. --ANK
1517 */
1518 dst_confirm(&rt->u.dst);
1519
1520 /* Duplicate redirect: silently ignore. */
1521 if (neigh == rt->u.dst.neighbour)
1522 goto out;
1523
1524 nrt = ip6_rt_copy(rt);
1525 if (nrt == NULL)
1526 goto out;
1527
1528 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1529 if (on_link)
1530 nrt->rt6i_flags &= ~RTF_GATEWAY;
1531
1532 ipv6_addr_copy(&nrt->rt6i_dst.addr, dest);
1533 nrt->rt6i_dst.plen = 128;
1534 nrt->u.dst.flags |= DST_HOST;
1535
1536 ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key);
1537 nrt->rt6i_nexthop = neigh_clone(neigh);
1538 /* Reset pmtu, it may be better */
1539 nrt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(neigh->dev);
1540 nrt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dev_net(neigh->dev),
1541 dst_mtu(&nrt->u.dst));
1542
1543 if (ip6_ins_rt(nrt))
1544 goto out;
1545
1546 netevent.old = &rt->u.dst;
1547 netevent.new = &nrt->u.dst;
1548 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1549
1550 if (rt->rt6i_flags&RTF_CACHE) {
1551 ip6_del_rt(rt);
1552 return;
1553 }
1554
1555out:
1556 dst_release(&rt->u.dst);
1557 return;
1558}
1559
1560/*
1561 * Handle ICMP "packet too big" messages
1562 * i.e. Path MTU discovery
1563 */
1564
1565void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr,
1566 struct net_device *dev, u32 pmtu)
1567{
1568 struct rt6_info *rt, *nrt;
1569 struct net *net = dev_net(dev);
1570 int allfrag = 0;
1571
1572 rt = rt6_lookup(net, daddr, saddr, dev->ifindex, 0);
1573 if (rt == NULL)
1574 return;
1575
1576 if (pmtu >= dst_mtu(&rt->u.dst))
1577 goto out;
1578
1579 if (pmtu < IPV6_MIN_MTU) {
1580 /*
1581 * According to RFC2460, PMTU is set to the IPv6 Minimum Link
1582 * MTU (1280) and a fragment header should always be included
1583 * after a node receiving Too Big message reporting PMTU is
1584 * less than the IPv6 Minimum Link MTU.
1585 */
1586 pmtu = IPV6_MIN_MTU;
1587 allfrag = 1;
1588 }
1589
1590 /* New mtu received -> path was valid.
1591 They are sent only in response to data packets,
1592 so that this nexthop apparently is reachable. --ANK
1593 */
1594 dst_confirm(&rt->u.dst);
1595
1596 /* Host route. If it is static, it would be better
1597 not to override it, but add new one, so that
1598 when cache entry will expire old pmtu
1599 would return automatically.
1600 */
1601 if (rt->rt6i_flags & RTF_CACHE) {
1602 rt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1603 if (allfrag)
1604 rt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1605 dst_set_expires(&rt->u.dst, net->ipv6.sysctl.ip6_rt_mtu_expires);
1606 rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
1607 goto out;
1608 }
1609
1610 /* Network route.
1611 Two cases are possible:
1612 1. It is connected route. Action: COW
1613 2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1614 */
1615 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
1616 nrt = rt6_alloc_cow(rt, daddr, saddr);
1617 else
1618 nrt = rt6_alloc_clone(rt, daddr);
1619
1620 if (nrt) {
1621 nrt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1622 if (allfrag)
1623 nrt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1624
1625 /* According to RFC 1981, detecting PMTU increase shouldn't be
1626 * happened within 5 mins, the recommended timer is 10 mins.
1627 * Here this route expiration time is set to ip6_rt_mtu_expires
1628 * which is 10 mins. After 10 mins the decreased pmtu is expired
1629 * and detecting PMTU increase will be automatically happened.
1630 */
1631 dst_set_expires(&nrt->u.dst, net->ipv6.sysctl.ip6_rt_mtu_expires);
1632 nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
1633
1634 ip6_ins_rt(nrt);
1635 }
1636out:
1637 dst_release(&rt->u.dst);
1638}
1639
1640/*
1641 * Misc support functions
1642 */
1643
1644static struct rt6_info * ip6_rt_copy(struct rt6_info *ort)
1645{
1646 struct net *net = dev_net(ort->rt6i_dev);
1647 struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops);
1648
1649 if (rt) {
1650 rt->u.dst.input = ort->u.dst.input;
1651 rt->u.dst.output = ort->u.dst.output;
1652
1653 memcpy(rt->u.dst.metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32));
1654 rt->u.dst.error = ort->u.dst.error;
1655 rt->u.dst.dev = ort->u.dst.dev;
1656 if (rt->u.dst.dev)
1657 dev_hold(rt->u.dst.dev);
1658 rt->rt6i_idev = ort->rt6i_idev;
1659 if (rt->rt6i_idev)
1660 in6_dev_hold(rt->rt6i_idev);
1661 rt->u.dst.lastuse = jiffies;
1662 rt->rt6i_expires = 0;
1663
1664 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
1665 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
1666 rt->rt6i_metric = 0;
1667
1668 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1669#ifdef CONFIG_IPV6_SUBTREES
1670 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1671#endif
1672 rt->rt6i_table = ort->rt6i_table;
1673 }
1674 return rt;
1675}
1676
1677#ifdef CONFIG_IPV6_ROUTE_INFO
1678static struct rt6_info *rt6_get_route_info(struct net *net,
1679 struct in6_addr *prefix, int prefixlen,
1680 struct in6_addr *gwaddr, int ifindex)
1681{
1682 struct fib6_node *fn;
1683 struct rt6_info *rt = NULL;
1684 struct fib6_table *table;
1685
1686 table = fib6_get_table(net, RT6_TABLE_INFO);
1687 if (table == NULL)
1688 return NULL;
1689
1690 write_lock_bh(&table->tb6_lock);
1691 fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
1692 if (!fn)
1693 goto out;
1694
1695 for (rt = fn->leaf; rt; rt = rt->u.dst.rt6_next) {
1696 if (rt->rt6i_dev->ifindex != ifindex)
1697 continue;
1698 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1699 continue;
1700 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1701 continue;
1702 dst_hold(&rt->u.dst);
1703 break;
1704 }
1705out:
1706 write_unlock_bh(&table->tb6_lock);
1707 return rt;
1708}
1709
1710static struct rt6_info *rt6_add_route_info(struct net *net,
1711 struct in6_addr *prefix, int prefixlen,
1712 struct in6_addr *gwaddr, int ifindex,
1713 unsigned pref)
1714{
1715 struct fib6_config cfg = {
1716 .fc_table = RT6_TABLE_INFO,
1717 .fc_metric = IP6_RT_PRIO_USER,
1718 .fc_ifindex = ifindex,
1719 .fc_dst_len = prefixlen,
1720 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
1721 RTF_UP | RTF_PREF(pref),
1722 .fc_nlinfo.pid = 0,
1723 .fc_nlinfo.nlh = NULL,
1724 .fc_nlinfo.nl_net = net,
1725 };
1726
1727 ipv6_addr_copy(&cfg.fc_dst, prefix);
1728 ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
1729
1730 /* We should treat it as a default route if prefix length is 0. */
1731 if (!prefixlen)
1732 cfg.fc_flags |= RTF_DEFAULT;
1733
1734 ip6_route_add(&cfg);
1735
1736 return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex);
1737}
1738#endif
1739
1740struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *dev)
1741{
1742 struct rt6_info *rt;
1743 struct fib6_table *table;
1744
1745 table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT);
1746 if (table == NULL)
1747 return NULL;
1748
1749 write_lock_bh(&table->tb6_lock);
1750 for (rt = table->tb6_root.leaf; rt; rt=rt->u.dst.rt6_next) {
1751 if (dev == rt->rt6i_dev &&
1752 ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
1753 ipv6_addr_equal(&rt->rt6i_gateway, addr))
1754 break;
1755 }
1756 if (rt)
1757 dst_hold(&rt->u.dst);
1758 write_unlock_bh(&table->tb6_lock);
1759 return rt;
1760}
1761
1762struct rt6_info *rt6_add_dflt_router(struct in6_addr *gwaddr,
1763 struct net_device *dev,
1764 unsigned int pref)
1765{
1766 struct fib6_config cfg = {
1767 .fc_table = RT6_TABLE_DFLT,
1768 .fc_metric = IP6_RT_PRIO_USER,
1769 .fc_ifindex = dev->ifindex,
1770 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
1771 RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
1772 .fc_nlinfo.pid = 0,
1773 .fc_nlinfo.nlh = NULL,
1774 .fc_nlinfo.nl_net = dev_net(dev),
1775 };
1776
1777 ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
1778
1779 ip6_route_add(&cfg);
1780
1781 return rt6_get_dflt_router(gwaddr, dev);
1782}
1783
1784void rt6_purge_dflt_routers(struct net *net)
1785{
1786 struct rt6_info *rt;
1787 struct fib6_table *table;
1788
1789 /* NOTE: Keep consistent with rt6_get_dflt_router */
1790 table = fib6_get_table(net, RT6_TABLE_DFLT);
1791 if (table == NULL)
1792 return;
1793
1794restart:
1795 read_lock_bh(&table->tb6_lock);
1796 for (rt = table->tb6_root.leaf; rt; rt = rt->u.dst.rt6_next) {
1797 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
1798 dst_hold(&rt->u.dst);
1799 read_unlock_bh(&table->tb6_lock);
1800 ip6_del_rt(rt);
1801 goto restart;
1802 }
1803 }
1804 read_unlock_bh(&table->tb6_lock);
1805}
1806
1807static void rtmsg_to_fib6_config(struct net *net,
1808 struct in6_rtmsg *rtmsg,
1809 struct fib6_config *cfg)
1810{
1811 memset(cfg, 0, sizeof(*cfg));
1812
1813 cfg->fc_table = RT6_TABLE_MAIN;
1814 cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
1815 cfg->fc_metric = rtmsg->rtmsg_metric;
1816 cfg->fc_expires = rtmsg->rtmsg_info;
1817 cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
1818 cfg->fc_src_len = rtmsg->rtmsg_src_len;
1819 cfg->fc_flags = rtmsg->rtmsg_flags;
1820
1821 cfg->fc_nlinfo.nl_net = net;
1822
1823 ipv6_addr_copy(&cfg->fc_dst, &rtmsg->rtmsg_dst);
1824 ipv6_addr_copy(&cfg->fc_src, &rtmsg->rtmsg_src);
1825 ipv6_addr_copy(&cfg->fc_gateway, &rtmsg->rtmsg_gateway);
1826}
1827
1828int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
1829{
1830 struct fib6_config cfg;
1831 struct in6_rtmsg rtmsg;
1832 int err;
1833
1834 switch(cmd) {
1835 case SIOCADDRT: /* Add a route */
1836 case SIOCDELRT: /* Delete a route */
1837 if (!capable(CAP_NET_ADMIN))
1838 return -EPERM;
1839 err = copy_from_user(&rtmsg, arg,
1840 sizeof(struct in6_rtmsg));
1841 if (err)
1842 return -EFAULT;
1843
1844 rtmsg_to_fib6_config(net, &rtmsg, &cfg);
1845
1846 rtnl_lock();
1847 switch (cmd) {
1848 case SIOCADDRT:
1849 err = ip6_route_add(&cfg);
1850 break;
1851 case SIOCDELRT:
1852 err = ip6_route_del(&cfg);
1853 break;
1854 default:
1855 err = -EINVAL;
1856 }
1857 rtnl_unlock();
1858
1859 return err;
1860 }
1861
1862 return -EINVAL;
1863}
1864
1865/*
1866 * Drop the packet on the floor
1867 */
1868
1869static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
1870{
1871 int type;
1872 struct dst_entry *dst = skb_dst(skb);
1873 switch (ipstats_mib_noroutes) {
1874 case IPSTATS_MIB_INNOROUTES:
1875 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
1876 if (type == IPV6_ADDR_ANY) {
1877 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
1878 IPSTATS_MIB_INADDRERRORS);
1879 break;
1880 }
1881 /* FALLTHROUGH */
1882 case IPSTATS_MIB_OUTNOROUTES:
1883 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
1884 ipstats_mib_noroutes);
1885 break;
1886 }
1887 icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
1888 kfree_skb(skb);
1889 return 0;
1890}
1891
1892static int ip6_pkt_discard(struct sk_buff *skb)
1893{
1894 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
1895}
1896
1897static int ip6_pkt_discard_out(struct sk_buff *skb)
1898{
1899 skb->dev = skb_dst(skb)->dev;
1900 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
1901}
1902
1903#ifdef CONFIG_IPV6_MULTIPLE_TABLES
1904
1905static int ip6_pkt_prohibit(struct sk_buff *skb)
1906{
1907 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
1908}
1909
1910static int ip6_pkt_prohibit_out(struct sk_buff *skb)
1911{
1912 skb->dev = skb_dst(skb)->dev;
1913 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
1914}
1915
1916#endif
1917
1918/*
1919 * Allocate a dst for local (unicast / anycast) address.
1920 */
1921
1922struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
1923 const struct in6_addr *addr,
1924 int anycast)
1925{
1926 struct net *net = dev_net(idev->dev);
1927 struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops);
1928 struct neighbour *neigh;
1929
1930 if (rt == NULL)
1931 return ERR_PTR(-ENOMEM);
1932
1933 dev_hold(net->loopback_dev);
1934 in6_dev_hold(idev);
1935
1936 rt->u.dst.flags = DST_HOST;
1937 rt->u.dst.input = ip6_input;
1938 rt->u.dst.output = ip6_output;
1939 rt->rt6i_dev = net->loopback_dev;
1940 rt->rt6i_idev = idev;
1941 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
1942 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, dst_mtu(&rt->u.dst));
1943 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
1944 rt->u.dst.obsolete = -1;
1945
1946 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
1947 if (anycast)
1948 rt->rt6i_flags |= RTF_ANYCAST;
1949 else
1950 rt->rt6i_flags |= RTF_LOCAL;
1951 neigh = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
1952 if (IS_ERR(neigh)) {
1953 dst_free(&rt->u.dst);
1954
1955 /* We are casting this because that is the return
1956 * value type. But an errno encoded pointer is the
1957 * same regardless of the underlying pointer type,
1958 * and that's what we are returning. So this is OK.
1959 */
1960 return (struct rt6_info *) neigh;
1961 }
1962 rt->rt6i_nexthop = neigh;
1963
1964 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
1965 rt->rt6i_dst.plen = 128;
1966 rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL);
1967
1968 atomic_set(&rt->u.dst.__refcnt, 1);
1969
1970 return rt;
1971}
1972
1973struct arg_dev_net {
1974 struct net_device *dev;
1975 struct net *net;
1976};
1977
1978static int fib6_ifdown(struct rt6_info *rt, void *arg)
1979{
1980 struct net_device *dev = ((struct arg_dev_net *)arg)->dev;
1981 struct net *net = ((struct arg_dev_net *)arg)->net;
1982
1983 if (((void *)rt->rt6i_dev == dev || dev == NULL) &&
1984 rt != net->ipv6.ip6_null_entry) {
1985 RT6_TRACE("deleted by ifdown %p\n", rt);
1986 return -1;
1987 }
1988 return 0;
1989}
1990
1991void rt6_ifdown(struct net *net, struct net_device *dev)
1992{
1993 struct arg_dev_net adn = {
1994 .dev = dev,
1995 .net = net,
1996 };
1997
1998 fib6_clean_all(net, fib6_ifdown, 0, &adn);
1999 icmp6_clean_all(fib6_ifdown, &adn);
2000}
2001
2002struct rt6_mtu_change_arg
2003{
2004 struct net_device *dev;
2005 unsigned mtu;
2006};
2007
2008static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
2009{
2010 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
2011 struct inet6_dev *idev;
2012 struct net *net = dev_net(arg->dev);
2013
2014 /* In IPv6 pmtu discovery is not optional,
2015 so that RTAX_MTU lock cannot disable it.
2016 We still use this lock to block changes
2017 caused by addrconf/ndisc.
2018 */
2019
2020 idev = __in6_dev_get(arg->dev);
2021 if (idev == NULL)
2022 return 0;
2023
2024 /* For administrative MTU increase, there is no way to discover
2025 IPv6 PMTU increase, so PMTU increase should be updated here.
2026 Since RFC 1981 doesn't include administrative MTU increase
2027 update PMTU increase is a MUST. (i.e. jumbo frame)
2028 */
2029 /*
2030 If new MTU is less than route PMTU, this new MTU will be the
2031 lowest MTU in the path, update the route PMTU to reflect PMTU
2032 decreases; if new MTU is greater than route PMTU, and the
2033 old MTU is the lowest MTU in the path, update the route PMTU
2034 to reflect the increase. In this case if the other nodes' MTU
2035 also have the lowest MTU, TOO BIG MESSAGE will be lead to
2036 PMTU discouvery.
2037 */
2038 if (rt->rt6i_dev == arg->dev &&
2039 !dst_metric_locked(&rt->u.dst, RTAX_MTU) &&
2040 (dst_mtu(&rt->u.dst) >= arg->mtu ||
2041 (dst_mtu(&rt->u.dst) < arg->mtu &&
2042 dst_mtu(&rt->u.dst) == idev->cnf.mtu6))) {
2043 rt->u.dst.metrics[RTAX_MTU-1] = arg->mtu;
2044 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, arg->mtu);
2045 }
2046 return 0;
2047}
2048
2049void rt6_mtu_change(struct net_device *dev, unsigned mtu)
2050{
2051 struct rt6_mtu_change_arg arg = {
2052 .dev = dev,
2053 .mtu = mtu,
2054 };
2055
2056 fib6_clean_all(dev_net(dev), rt6_mtu_change_route, 0, &arg);
2057}
2058
2059static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
2060 [RTA_GATEWAY] = { .len = sizeof(struct in6_addr) },
2061 [RTA_OIF] = { .type = NLA_U32 },
2062 [RTA_IIF] = { .type = NLA_U32 },
2063 [RTA_PRIORITY] = { .type = NLA_U32 },
2064 [RTA_METRICS] = { .type = NLA_NESTED },
2065};
2066
2067static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2068 struct fib6_config *cfg)
2069{
2070 struct rtmsg *rtm;
2071 struct nlattr *tb[RTA_MAX+1];
2072 int err;
2073
2074 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2075 if (err < 0)
2076 goto errout;
2077
2078 err = -EINVAL;
2079 rtm = nlmsg_data(nlh);
2080 memset(cfg, 0, sizeof(*cfg));
2081
2082 cfg->fc_table = rtm->rtm_table;
2083 cfg->fc_dst_len = rtm->rtm_dst_len;
2084 cfg->fc_src_len = rtm->rtm_src_len;
2085 cfg->fc_flags = RTF_UP;
2086 cfg->fc_protocol = rtm->rtm_protocol;
2087
2088 if (rtm->rtm_type == RTN_UNREACHABLE)
2089 cfg->fc_flags |= RTF_REJECT;
2090
2091 cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
2092 cfg->fc_nlinfo.nlh = nlh;
2093 cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
2094
2095 if (tb[RTA_GATEWAY]) {
2096 nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
2097 cfg->fc_flags |= RTF_GATEWAY;
2098 }
2099
2100 if (tb[RTA_DST]) {
2101 int plen = (rtm->rtm_dst_len + 7) >> 3;
2102
2103 if (nla_len(tb[RTA_DST]) < plen)
2104 goto errout;
2105
2106 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
2107 }
2108
2109 if (tb[RTA_SRC]) {
2110 int plen = (rtm->rtm_src_len + 7) >> 3;
2111
2112 if (nla_len(tb[RTA_SRC]) < plen)
2113 goto errout;
2114
2115 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
2116 }
2117
2118 if (tb[RTA_OIF])
2119 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2120
2121 if (tb[RTA_PRIORITY])
2122 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2123
2124 if (tb[RTA_METRICS]) {
2125 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2126 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
2127 }
2128
2129 if (tb[RTA_TABLE])
2130 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2131
2132 err = 0;
2133errout:
2134 return err;
2135}
2136
2137static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
2138{
2139 struct fib6_config cfg;
2140 int err;
2141
2142 err = rtm_to_fib6_config(skb, nlh, &cfg);
2143 if (err < 0)
2144 return err;
2145
2146 return ip6_route_del(&cfg);
2147}
2148
2149static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
2150{
2151 struct fib6_config cfg;
2152 int err;
2153
2154 err = rtm_to_fib6_config(skb, nlh, &cfg);
2155 if (err < 0)
2156 return err;
2157
2158 return ip6_route_add(&cfg);
2159}
2160
2161static inline size_t rt6_nlmsg_size(void)
2162{
2163 return NLMSG_ALIGN(sizeof(struct rtmsg))
2164 + nla_total_size(16) /* RTA_SRC */
2165 + nla_total_size(16) /* RTA_DST */
2166 + nla_total_size(16) /* RTA_GATEWAY */
2167 + nla_total_size(16) /* RTA_PREFSRC */
2168 + nla_total_size(4) /* RTA_TABLE */
2169 + nla_total_size(4) /* RTA_IIF */
2170 + nla_total_size(4) /* RTA_OIF */
2171 + nla_total_size(4) /* RTA_PRIORITY */
2172 + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
2173 + nla_total_size(sizeof(struct rta_cacheinfo));
2174}
2175
2176static int rt6_fill_node(struct net *net,
2177 struct sk_buff *skb, struct rt6_info *rt,
2178 struct in6_addr *dst, struct in6_addr *src,
2179 int iif, int type, u32 pid, u32 seq,
2180 int prefix, int nowait, unsigned int flags)
2181{
2182 struct rtmsg *rtm;
2183 struct nlmsghdr *nlh;
2184 long expires;
2185 u32 table;
2186
2187 if (prefix) { /* user wants prefix routes only */
2188 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
2189 /* success since this is not a prefix route */
2190 return 1;
2191 }
2192 }
2193
2194 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*rtm), flags);
2195 if (nlh == NULL)
2196 return -EMSGSIZE;
2197
2198 rtm = nlmsg_data(nlh);
2199 rtm->rtm_family = AF_INET6;
2200 rtm->rtm_dst_len = rt->rt6i_dst.plen;
2201 rtm->rtm_src_len = rt->rt6i_src.plen;
2202 rtm->rtm_tos = 0;
2203 if (rt->rt6i_table)
2204 table = rt->rt6i_table->tb6_id;
2205 else
2206 table = RT6_TABLE_UNSPEC;
2207 rtm->rtm_table = table;
2208 NLA_PUT_U32(skb, RTA_TABLE, table);
2209 if (rt->rt6i_flags&RTF_REJECT)
2210 rtm->rtm_type = RTN_UNREACHABLE;
2211 else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK))
2212 rtm->rtm_type = RTN_LOCAL;
2213 else
2214 rtm->rtm_type = RTN_UNICAST;
2215 rtm->rtm_flags = 0;
2216 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2217 rtm->rtm_protocol = rt->rt6i_protocol;
2218 if (rt->rt6i_flags&RTF_DYNAMIC)
2219 rtm->rtm_protocol = RTPROT_REDIRECT;
2220 else if (rt->rt6i_flags & RTF_ADDRCONF)
2221 rtm->rtm_protocol = RTPROT_KERNEL;
2222 else if (rt->rt6i_flags&RTF_DEFAULT)
2223 rtm->rtm_protocol = RTPROT_RA;
2224
2225 if (rt->rt6i_flags&RTF_CACHE)
2226 rtm->rtm_flags |= RTM_F_CLONED;
2227
2228 if (dst) {
2229 NLA_PUT(skb, RTA_DST, 16, dst);
2230 rtm->rtm_dst_len = 128;
2231 } else if (rtm->rtm_dst_len)
2232 NLA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr);
2233#ifdef CONFIG_IPV6_SUBTREES
2234 if (src) {
2235 NLA_PUT(skb, RTA_SRC, 16, src);
2236 rtm->rtm_src_len = 128;
2237 } else if (rtm->rtm_src_len)
2238 NLA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr);
2239#endif
2240 if (iif) {
2241#ifdef CONFIG_IPV6_MROUTE
2242 if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
2243 int err = ip6mr_get_route(net, skb, rtm, nowait);
2244 if (err <= 0) {
2245 if (!nowait) {
2246 if (err == 0)
2247 return 0;
2248 goto nla_put_failure;
2249 } else {
2250 if (err == -EMSGSIZE)
2251 goto nla_put_failure;
2252 }
2253 }
2254 } else
2255#endif
2256 NLA_PUT_U32(skb, RTA_IIF, iif);
2257 } else if (dst) {
2258 struct inet6_dev *idev = ip6_dst_idev(&rt->u.dst);
2259 struct in6_addr saddr_buf;
2260 if (ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
2261 dst, 0, &saddr_buf) == 0)
2262 NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
2263 }
2264
2265 if (rtnetlink_put_metrics(skb, rt->u.dst.metrics) < 0)
2266 goto nla_put_failure;
2267
2268 if (rt->u.dst.neighbour)
2269 NLA_PUT(skb, RTA_GATEWAY, 16, &rt->u.dst.neighbour->primary_key);
2270
2271 if (rt->u.dst.dev)
2272 NLA_PUT_U32(skb, RTA_OIF, rt->rt6i_dev->ifindex);
2273
2274 NLA_PUT_U32(skb, RTA_PRIORITY, rt->rt6i_metric);
2275
2276 if (!(rt->rt6i_flags & RTF_EXPIRES))
2277 expires = 0;
2278 else if (rt->rt6i_expires - jiffies < INT_MAX)
2279 expires = rt->rt6i_expires - jiffies;
2280 else
2281 expires = INT_MAX;
2282
2283 if (rtnl_put_cacheinfo(skb, &rt->u.dst, 0, 0, 0,
2284 expires, rt->u.dst.error) < 0)
2285 goto nla_put_failure;
2286
2287 return nlmsg_end(skb, nlh);
2288
2289nla_put_failure:
2290 nlmsg_cancel(skb, nlh);
2291 return -EMSGSIZE;
2292}
2293
2294int rt6_dump_route(struct rt6_info *rt, void *p_arg)
2295{
2296 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
2297 int prefix;
2298
2299 if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
2300 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
2301 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
2302 } else
2303 prefix = 0;
2304
2305 return rt6_fill_node(arg->net,
2306 arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
2307 NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
2308 prefix, 0, NLM_F_MULTI);
2309}
2310
2311static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
2312{
2313 struct net *net = sock_net(in_skb->sk);
2314 struct nlattr *tb[RTA_MAX+1];
2315 struct rt6_info *rt;
2316 struct sk_buff *skb;
2317 struct rtmsg *rtm;
2318 struct flowi fl;
2319 int err, iif = 0;
2320
2321 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2322 if (err < 0)
2323 goto errout;
2324
2325 err = -EINVAL;
2326 memset(&fl, 0, sizeof(fl));
2327
2328 if (tb[RTA_SRC]) {
2329 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2330 goto errout;
2331
2332 ipv6_addr_copy(&fl.fl6_src, nla_data(tb[RTA_SRC]));
2333 }
2334
2335 if (tb[RTA_DST]) {
2336 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2337 goto errout;
2338
2339 ipv6_addr_copy(&fl.fl6_dst, nla_data(tb[RTA_DST]));
2340 }
2341
2342 if (tb[RTA_IIF])
2343 iif = nla_get_u32(tb[RTA_IIF]);
2344
2345 if (tb[RTA_OIF])
2346 fl.oif = nla_get_u32(tb[RTA_OIF]);
2347
2348 if (iif) {
2349 struct net_device *dev;
2350 dev = __dev_get_by_index(net, iif);
2351 if (!dev) {
2352 err = -ENODEV;
2353 goto errout;
2354 }
2355 }
2356
2357 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
2358 if (skb == NULL) {
2359 err = -ENOBUFS;
2360 goto errout;
2361 }
2362
2363 /* Reserve room for dummy headers, this skb can pass
2364 through good chunk of routing engine.
2365 */
2366 skb_reset_mac_header(skb);
2367 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
2368
2369 rt = (struct rt6_info*) ip6_route_output(net, NULL, &fl);
2370 skb_dst_set(skb, &rt->u.dst);
2371
2372 err = rt6_fill_node(net, skb, rt, &fl.fl6_dst, &fl.fl6_src, iif,
2373 RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
2374 nlh->nlmsg_seq, 0, 0, 0);
2375 if (err < 0) {
2376 kfree_skb(skb);
2377 goto errout;
2378 }
2379
2380 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).pid);
2381errout:
2382 return err;
2383}
2384
2385void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
2386{
2387 struct sk_buff *skb;
2388 struct net *net = info->nl_net;
2389 u32 seq;
2390 int err;
2391
2392 err = -ENOBUFS;
2393 seq = info->nlh != NULL ? info->nlh->nlmsg_seq : 0;
2394
2395 skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
2396 if (skb == NULL)
2397 goto errout;
2398
2399 err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
2400 event, info->pid, seq, 0, 0, 0);
2401 if (err < 0) {
2402 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
2403 WARN_ON(err == -EMSGSIZE);
2404 kfree_skb(skb);
2405 goto errout;
2406 }
2407 rtnl_notify(skb, net, info->pid, RTNLGRP_IPV6_ROUTE,
2408 info->nlh, gfp_any());
2409 return;
2410errout:
2411 if (err < 0)
2412 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
2413}
2414
2415static int ip6_route_dev_notify(struct notifier_block *this,
2416 unsigned long event, void *data)
2417{
2418 struct net_device *dev = (struct net_device *)data;
2419 struct net *net = dev_net(dev);
2420
2421 if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
2422 net->ipv6.ip6_null_entry->u.dst.dev = dev;
2423 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
2424#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2425 net->ipv6.ip6_prohibit_entry->u.dst.dev = dev;
2426 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
2427 net->ipv6.ip6_blk_hole_entry->u.dst.dev = dev;
2428 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
2429#endif
2430 }
2431
2432 return NOTIFY_OK;
2433}
2434
2435/*
2436 * /proc
2437 */
2438
2439#ifdef CONFIG_PROC_FS
2440
2441#define RT6_INFO_LEN (32 + 4 + 32 + 4 + 32 + 40 + 5 + 1)
2442
2443struct rt6_proc_arg
2444{
2445 char *buffer;
2446 int offset;
2447 int length;
2448 int skip;
2449 int len;
2450};
2451
2452static int rt6_info_route(struct rt6_info *rt, void *p_arg)
2453{
2454 struct seq_file *m = p_arg;
2455
2456 seq_printf(m, "%pi6 %02x ", &rt->rt6i_dst.addr, rt->rt6i_dst.plen);
2457
2458#ifdef CONFIG_IPV6_SUBTREES
2459 seq_printf(m, "%pi6 %02x ", &rt->rt6i_src.addr, rt->rt6i_src.plen);
2460#else
2461 seq_puts(m, "00000000000000000000000000000000 00 ");
2462#endif
2463
2464 if (rt->rt6i_nexthop) {
2465 seq_printf(m, "%pi6", rt->rt6i_nexthop->primary_key);
2466 } else {
2467 seq_puts(m, "00000000000000000000000000000000");
2468 }
2469 seq_printf(m, " %08x %08x %08x %08x %8s\n",
2470 rt->rt6i_metric, atomic_read(&rt->u.dst.__refcnt),
2471 rt->u.dst.__use, rt->rt6i_flags,
2472 rt->rt6i_dev ? rt->rt6i_dev->name : "");
2473 return 0;
2474}
2475
2476static int ipv6_route_show(struct seq_file *m, void *v)
2477{
2478 struct net *net = (struct net *)m->private;
2479 fib6_clean_all(net, rt6_info_route, 0, m);
2480 return 0;
2481}
2482
2483static int ipv6_route_open(struct inode *inode, struct file *file)
2484{
2485 return single_open_net(inode, file, ipv6_route_show);
2486}
2487
2488static const struct file_operations ipv6_route_proc_fops = {
2489 .owner = THIS_MODULE,
2490 .open = ipv6_route_open,
2491 .read = seq_read,
2492 .llseek = seq_lseek,
2493 .release = single_release_net,
2494};
2495
2496static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2497{
2498 struct net *net = (struct net *)seq->private;
2499 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
2500 net->ipv6.rt6_stats->fib_nodes,
2501 net->ipv6.rt6_stats->fib_route_nodes,
2502 net->ipv6.rt6_stats->fib_rt_alloc,
2503 net->ipv6.rt6_stats->fib_rt_entries,
2504 net->ipv6.rt6_stats->fib_rt_cache,
2505 atomic_read(&net->ipv6.ip6_dst_ops.entries),
2506 net->ipv6.rt6_stats->fib_discarded_routes);
2507
2508 return 0;
2509}
2510
2511static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2512{
2513 return single_open_net(inode, file, rt6_stats_seq_show);
2514}
2515
2516static const struct file_operations rt6_stats_seq_fops = {
2517 .owner = THIS_MODULE,
2518 .open = rt6_stats_seq_open,
2519 .read = seq_read,
2520 .llseek = seq_lseek,
2521 .release = single_release_net,
2522};
2523#endif /* CONFIG_PROC_FS */
2524
2525#ifdef CONFIG_SYSCTL
2526
2527static
2528int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write,
2529 void __user *buffer, size_t *lenp, loff_t *ppos)
2530{
2531 struct net *net = current->nsproxy->net_ns;
2532 int delay = net->ipv6.sysctl.flush_delay;
2533 if (write) {
2534 proc_dointvec(ctl, write, buffer, lenp, ppos);
2535 fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay, net);
2536 return 0;
2537 } else
2538 return -EINVAL;
2539}
2540
2541ctl_table ipv6_route_table_template[] = {
2542 {
2543 .procname = "flush",
2544 .data = &init_net.ipv6.sysctl.flush_delay,
2545 .maxlen = sizeof(int),
2546 .mode = 0200,
2547 .proc_handler = ipv6_sysctl_rtcache_flush
2548 },
2549 {
2550 .procname = "gc_thresh",
2551 .data = &ip6_dst_ops_template.gc_thresh,
2552 .maxlen = sizeof(int),
2553 .mode = 0644,
2554 .proc_handler = proc_dointvec,
2555 },
2556 {
2557 .procname = "max_size",
2558 .data = &init_net.ipv6.sysctl.ip6_rt_max_size,
2559 .maxlen = sizeof(int),
2560 .mode = 0644,
2561 .proc_handler = proc_dointvec,
2562 },
2563 {
2564 .procname = "gc_min_interval",
2565 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
2566 .maxlen = sizeof(int),
2567 .mode = 0644,
2568 .proc_handler = proc_dointvec_jiffies,
2569 },
2570 {
2571 .procname = "gc_timeout",
2572 .data = &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
2573 .maxlen = sizeof(int),
2574 .mode = 0644,
2575 .proc_handler = proc_dointvec_jiffies,
2576 },
2577 {
2578 .procname = "gc_interval",
2579 .data = &init_net.ipv6.sysctl.ip6_rt_gc_interval,
2580 .maxlen = sizeof(int),
2581 .mode = 0644,
2582 .proc_handler = proc_dointvec_jiffies,
2583 },
2584 {
2585 .procname = "gc_elasticity",
2586 .data = &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
2587 .maxlen = sizeof(int),
2588 .mode = 0644,
2589 .proc_handler = proc_dointvec_jiffies,
2590 },
2591 {
2592 .procname = "mtu_expires",
2593 .data = &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
2594 .maxlen = sizeof(int),
2595 .mode = 0644,
2596 .proc_handler = proc_dointvec_jiffies,
2597 },
2598 {
2599 .procname = "min_adv_mss",
2600 .data = &init_net.ipv6.sysctl.ip6_rt_min_advmss,
2601 .maxlen = sizeof(int),
2602 .mode = 0644,
2603 .proc_handler = proc_dointvec_jiffies,
2604 },
2605 {
2606 .procname = "gc_min_interval_ms",
2607 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
2608 .maxlen = sizeof(int),
2609 .mode = 0644,
2610 .proc_handler = proc_dointvec_ms_jiffies,
2611 },
2612 { }
2613};
2614
2615struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
2616{
2617 struct ctl_table *table;
2618
2619 table = kmemdup(ipv6_route_table_template,
2620 sizeof(ipv6_route_table_template),
2621 GFP_KERNEL);
2622
2623 if (table) {
2624 table[0].data = &net->ipv6.sysctl.flush_delay;
2625 table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
2626 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
2627 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2628 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
2629 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
2630 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
2631 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
2632 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
2633 table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2634 }
2635
2636 return table;
2637}
2638#endif
2639
2640static int __net_init ip6_route_net_init(struct net *net)
2641{
2642 int ret = -ENOMEM;
2643
2644 memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
2645 sizeof(net->ipv6.ip6_dst_ops));
2646
2647 net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
2648 sizeof(*net->ipv6.ip6_null_entry),
2649 GFP_KERNEL);
2650 if (!net->ipv6.ip6_null_entry)
2651 goto out_ip6_dst_ops;
2652 net->ipv6.ip6_null_entry->u.dst.path =
2653 (struct dst_entry *)net->ipv6.ip6_null_entry;
2654 net->ipv6.ip6_null_entry->u.dst.ops = &net->ipv6.ip6_dst_ops;
2655
2656#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2657 net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
2658 sizeof(*net->ipv6.ip6_prohibit_entry),
2659 GFP_KERNEL);
2660 if (!net->ipv6.ip6_prohibit_entry)
2661 goto out_ip6_null_entry;
2662 net->ipv6.ip6_prohibit_entry->u.dst.path =
2663 (struct dst_entry *)net->ipv6.ip6_prohibit_entry;
2664 net->ipv6.ip6_prohibit_entry->u.dst.ops = &net->ipv6.ip6_dst_ops;
2665
2666 net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
2667 sizeof(*net->ipv6.ip6_blk_hole_entry),
2668 GFP_KERNEL);
2669 if (!net->ipv6.ip6_blk_hole_entry)
2670 goto out_ip6_prohibit_entry;
2671 net->ipv6.ip6_blk_hole_entry->u.dst.path =
2672 (struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
2673 net->ipv6.ip6_blk_hole_entry->u.dst.ops = &net->ipv6.ip6_dst_ops;
2674#endif
2675
2676 net->ipv6.sysctl.flush_delay = 0;
2677 net->ipv6.sysctl.ip6_rt_max_size = 4096;
2678 net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
2679 net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
2680 net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
2681 net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
2682 net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
2683 net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
2684
2685#ifdef CONFIG_PROC_FS
2686 proc_net_fops_create(net, "ipv6_route", 0, &ipv6_route_proc_fops);
2687 proc_net_fops_create(net, "rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2688#endif
2689 net->ipv6.ip6_rt_gc_expire = 30*HZ;
2690
2691 ret = 0;
2692out:
2693 return ret;
2694
2695#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2696out_ip6_prohibit_entry:
2697 kfree(net->ipv6.ip6_prohibit_entry);
2698out_ip6_null_entry:
2699 kfree(net->ipv6.ip6_null_entry);
2700#endif
2701out_ip6_dst_ops:
2702 goto out;
2703}
2704
2705static void __net_exit ip6_route_net_exit(struct net *net)
2706{
2707#ifdef CONFIG_PROC_FS
2708 proc_net_remove(net, "ipv6_route");
2709 proc_net_remove(net, "rt6_stats");
2710#endif
2711 kfree(net->ipv6.ip6_null_entry);
2712#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2713 kfree(net->ipv6.ip6_prohibit_entry);
2714 kfree(net->ipv6.ip6_blk_hole_entry);
2715#endif
2716}
2717
2718static struct pernet_operations ip6_route_net_ops = {
2719 .init = ip6_route_net_init,
2720 .exit = ip6_route_net_exit,
2721};
2722
2723static struct notifier_block ip6_route_dev_notifier = {
2724 .notifier_call = ip6_route_dev_notify,
2725 .priority = 0,
2726};
2727
2728int __init ip6_route_init(void)
2729{
2730 int ret;
2731
2732 ret = -ENOMEM;
2733 ip6_dst_ops_template.kmem_cachep =
2734 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
2735 SLAB_HWCACHE_ALIGN, NULL);
2736 if (!ip6_dst_ops_template.kmem_cachep)
2737 goto out;
2738
2739 ret = register_pernet_subsys(&ip6_route_net_ops);
2740 if (ret)
2741 goto out_kmem_cache;
2742
2743 ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
2744
2745 /* Registering of the loopback is done before this portion of code,
2746 * the loopback reference in rt6_info will not be taken, do it
2747 * manually for init_net */
2748 init_net.ipv6.ip6_null_entry->u.dst.dev = init_net.loopback_dev;
2749 init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2750 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2751 init_net.ipv6.ip6_prohibit_entry->u.dst.dev = init_net.loopback_dev;
2752 init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2753 init_net.ipv6.ip6_blk_hole_entry->u.dst.dev = init_net.loopback_dev;
2754 init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2755 #endif
2756 ret = fib6_init();
2757 if (ret)
2758 goto out_register_subsys;
2759
2760 ret = xfrm6_init();
2761 if (ret)
2762 goto out_fib6_init;
2763
2764 ret = fib6_rules_init();
2765 if (ret)
2766 goto xfrm6_init;
2767
2768 ret = -ENOBUFS;
2769 if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL) ||
2770 __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL) ||
2771 __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL))
2772 goto fib6_rules_init;
2773
2774 ret = register_netdevice_notifier(&ip6_route_dev_notifier);
2775 if (ret)
2776 goto fib6_rules_init;
2777
2778out:
2779 return ret;
2780
2781fib6_rules_init:
2782 fib6_rules_cleanup();
2783xfrm6_init:
2784 xfrm6_fini();
2785out_fib6_init:
2786 fib6_gc_cleanup();
2787out_register_subsys:
2788 unregister_pernet_subsys(&ip6_route_net_ops);
2789out_kmem_cache:
2790 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
2791 goto out;
2792}
2793
2794void ip6_route_cleanup(void)
2795{
2796 unregister_netdevice_notifier(&ip6_route_dev_notifier);
2797 fib6_rules_cleanup();
2798 xfrm6_fini();
2799 fib6_gc_cleanup();
2800 unregister_pernet_subsys(&ip6_route_net_ops);
2801 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
2802}