]> bbs.cooldavid.org Git - net-next-2.6.git/blame - net/ipv6/route.c
ipsec: Add missing list_del() in xfrm_state_gc_task().
[net-next-2.6.git] / net / ipv6 / route.c
CommitLineData
1da177e4
LT
1/*
2 * Linux INET6 implementation
3 * FIB front-end.
4 *
5 * Authors:
1ab1457c 6 * Pedro Roque <roque@di.fc.ul.pt>
1da177e4 7 *
1da177e4
LT
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
12 */
13
14/* Changes:
15 *
16 * YOSHIFUJI Hideaki @USAGI
17 * reworked default router selection.
18 * - respect outgoing interface
19 * - select from (probably) reachable routers (i.e.
20 * routers in REACHABLE, STALE, DELAY or PROBE states).
21 * - always select the same router if it is (probably)
22 * reachable. otherwise, round-robin the list.
c0bece9f
YH
23 * Ville Nuorvala
24 * Fixed routing subtrees.
1da177e4
LT
25 */
26
4fc268d2 27#include <linux/capability.h>
1da177e4
LT
28#include <linux/errno.h>
29#include <linux/types.h>
30#include <linux/times.h>
31#include <linux/socket.h>
32#include <linux/sockios.h>
33#include <linux/net.h>
34#include <linux/route.h>
35#include <linux/netdevice.h>
36#include <linux/in6.h>
7bc570c8 37#include <linux/mroute6.h>
1da177e4 38#include <linux/init.h>
1da177e4 39#include <linux/if_arp.h>
1da177e4
LT
40#include <linux/proc_fs.h>
41#include <linux/seq_file.h>
5b7c931d 42#include <linux/nsproxy.h>
457c4cbc 43#include <net/net_namespace.h>
1da177e4
LT
44#include <net/snmp.h>
45#include <net/ipv6.h>
46#include <net/ip6_fib.h>
47#include <net/ip6_route.h>
48#include <net/ndisc.h>
49#include <net/addrconf.h>
50#include <net/tcp.h>
51#include <linux/rtnetlink.h>
52#include <net/dst.h>
53#include <net/xfrm.h>
8d71740c 54#include <net/netevent.h>
21713ebc 55#include <net/netlink.h>
1da177e4
LT
56
57#include <asm/uaccess.h>
58
59#ifdef CONFIG_SYSCTL
60#include <linux/sysctl.h>
61#endif
62
63/* Set to 3 to get tracing. */
64#define RT6_DEBUG 2
65
66#if RT6_DEBUG >= 3
67#define RDBG(x) printk x
68#define RT6_TRACE(x...) printk(KERN_DEBUG x)
69#else
70#define RDBG(x)
71#define RT6_TRACE(x...) do { ; } while (0)
72#endif
73
519fbd87 74#define CLONE_OFFLINK_ROUTE 0
1da177e4 75
1da177e4
LT
76static struct rt6_info * ip6_rt_copy(struct rt6_info *ort);
77static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
78static struct dst_entry *ip6_negative_advice(struct dst_entry *);
79static void ip6_dst_destroy(struct dst_entry *);
80static void ip6_dst_ifdown(struct dst_entry *,
81 struct net_device *dev, int how);
569d3645 82static int ip6_dst_gc(struct dst_ops *ops);
1da177e4
LT
83
84static int ip6_pkt_discard(struct sk_buff *skb);
85static int ip6_pkt_discard_out(struct sk_buff *skb);
86static void ip6_link_failure(struct sk_buff *skb);
87static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
88
70ceb4f5 89#ifdef CONFIG_IPV6_ROUTE_INFO
efa2cea0
DL
90static struct rt6_info *rt6_add_route_info(struct net *net,
91 struct in6_addr *prefix, int prefixlen,
70ceb4f5
YH
92 struct in6_addr *gwaddr, int ifindex,
93 unsigned pref);
efa2cea0
DL
94static struct rt6_info *rt6_get_route_info(struct net *net,
95 struct in6_addr *prefix, int prefixlen,
70ceb4f5
YH
96 struct in6_addr *gwaddr, int ifindex);
97#endif
98
9a7ec3a9 99static struct dst_ops ip6_dst_ops_template = {
1da177e4
LT
100 .family = AF_INET6,
101 .protocol = __constant_htons(ETH_P_IPV6),
102 .gc = ip6_dst_gc,
103 .gc_thresh = 1024,
104 .check = ip6_dst_check,
105 .destroy = ip6_dst_destroy,
106 .ifdown = ip6_dst_ifdown,
107 .negative_advice = ip6_negative_advice,
108 .link_failure = ip6_link_failure,
109 .update_pmtu = ip6_rt_update_pmtu,
1ac06e03 110 .local_out = __ip6_local_out,
1da177e4 111 .entry_size = sizeof(struct rt6_info),
e2422970 112 .entries = ATOMIC_INIT(0),
1da177e4
LT
113};
114
14e50e57
DM
115static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu)
116{
117}
118
119static struct dst_ops ip6_dst_blackhole_ops = {
120 .family = AF_INET6,
121 .protocol = __constant_htons(ETH_P_IPV6),
122 .destroy = ip6_dst_destroy,
123 .check = ip6_dst_check,
124 .update_pmtu = ip6_rt_blackhole_update_pmtu,
125 .entry_size = sizeof(struct rt6_info),
e2422970 126 .entries = ATOMIC_INIT(0),
14e50e57
DM
127};
128
bdb3289f 129static struct rt6_info ip6_null_entry_template = {
1da177e4
LT
130 .u = {
131 .dst = {
132 .__refcnt = ATOMIC_INIT(1),
133 .__use = 1,
1da177e4
LT
134 .obsolete = -1,
135 .error = -ENETUNREACH,
136 .metrics = { [RTAX_HOPLIMIT - 1] = 255, },
137 .input = ip6_pkt_discard,
138 .output = ip6_pkt_discard_out,
1da177e4
LT
139 }
140 },
141 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
142 .rt6i_metric = ~(u32) 0,
143 .rt6i_ref = ATOMIC_INIT(1),
144};
145
101367c2
TG
146#ifdef CONFIG_IPV6_MULTIPLE_TABLES
147
6723ab54
DM
148static int ip6_pkt_prohibit(struct sk_buff *skb);
149static int ip6_pkt_prohibit_out(struct sk_buff *skb);
6723ab54 150
280a34c8 151static struct rt6_info ip6_prohibit_entry_template = {
101367c2
TG
152 .u = {
153 .dst = {
154 .__refcnt = ATOMIC_INIT(1),
155 .__use = 1,
101367c2
TG
156 .obsolete = -1,
157 .error = -EACCES,
158 .metrics = { [RTAX_HOPLIMIT - 1] = 255, },
9ce8ade0
TG
159 .input = ip6_pkt_prohibit,
160 .output = ip6_pkt_prohibit_out,
101367c2
TG
161 }
162 },
163 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
164 .rt6i_metric = ~(u32) 0,
165 .rt6i_ref = ATOMIC_INIT(1),
166};
167
bdb3289f 168static struct rt6_info ip6_blk_hole_entry_template = {
101367c2
TG
169 .u = {
170 .dst = {
171 .__refcnt = ATOMIC_INIT(1),
172 .__use = 1,
101367c2
TG
173 .obsolete = -1,
174 .error = -EINVAL,
175 .metrics = { [RTAX_HOPLIMIT - 1] = 255, },
352e512c
HX
176 .input = dst_discard,
177 .output = dst_discard,
101367c2
TG
178 }
179 },
180 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
181 .rt6i_metric = ~(u32) 0,
182 .rt6i_ref = ATOMIC_INIT(1),
183};
184
185#endif
186
1da177e4 187/* allocate dst with ip6_dst_ops */
f2fc6a54 188static inline struct rt6_info *ip6_dst_alloc(struct dst_ops *ops)
1da177e4 189{
f2fc6a54 190 return (struct rt6_info *)dst_alloc(ops);
1da177e4
LT
191}
192
193static void ip6_dst_destroy(struct dst_entry *dst)
194{
195 struct rt6_info *rt = (struct rt6_info *)dst;
196 struct inet6_dev *idev = rt->rt6i_idev;
197
198 if (idev != NULL) {
199 rt->rt6i_idev = NULL;
200 in6_dev_put(idev);
1ab1457c 201 }
1da177e4
LT
202}
203
204static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
205 int how)
206{
207 struct rt6_info *rt = (struct rt6_info *)dst;
208 struct inet6_dev *idev = rt->rt6i_idev;
5a3e55d6 209 struct net_device *loopback_dev =
c346dca1 210 dev_net(dev)->loopback_dev;
1da177e4 211
5a3e55d6
DL
212 if (dev != loopback_dev && idev != NULL && idev->dev == dev) {
213 struct inet6_dev *loopback_idev =
214 in6_dev_get(loopback_dev);
1da177e4
LT
215 if (loopback_idev != NULL) {
216 rt->rt6i_idev = loopback_idev;
217 in6_dev_put(idev);
218 }
219 }
220}
221
222static __inline__ int rt6_check_expired(const struct rt6_info *rt)
223{
224 return (rt->rt6i_flags & RTF_EXPIRES &&
225 time_after(jiffies, rt->rt6i_expires));
226}
227
c71099ac
TG
228static inline int rt6_need_strict(struct in6_addr *daddr)
229{
230 return (ipv6_addr_type(daddr) &
5ce83afa 231 (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK));
c71099ac
TG
232}
233
1da177e4 234/*
c71099ac 235 * Route lookup. Any table->tb6_lock is implied.
1da177e4
LT
236 */
237
8ed67789
DL
238static inline struct rt6_info *rt6_device_match(struct net *net,
239 struct rt6_info *rt,
dd3abc4e 240 struct in6_addr *saddr,
1da177e4 241 int oif,
d420895e 242 int flags)
1da177e4
LT
243{
244 struct rt6_info *local = NULL;
245 struct rt6_info *sprt;
246
dd3abc4e
YH
247 if (!oif && ipv6_addr_any(saddr))
248 goto out;
249
250 for (sprt = rt; sprt; sprt = sprt->u.dst.rt6_next) {
251 struct net_device *dev = sprt->rt6i_dev;
252
253 if (oif) {
1da177e4
LT
254 if (dev->ifindex == oif)
255 return sprt;
256 if (dev->flags & IFF_LOOPBACK) {
257 if (sprt->rt6i_idev == NULL ||
258 sprt->rt6i_idev->dev->ifindex != oif) {
d420895e 259 if (flags & RT6_LOOKUP_F_IFACE && oif)
1da177e4 260 continue;
1ab1457c 261 if (local && (!oif ||
1da177e4
LT
262 local->rt6i_idev->dev->ifindex == oif))
263 continue;
264 }
265 local = sprt;
266 }
dd3abc4e
YH
267 } else {
268 if (ipv6_chk_addr(net, saddr, dev,
269 flags & RT6_LOOKUP_F_IFACE))
270 return sprt;
1da177e4 271 }
dd3abc4e 272 }
1da177e4 273
dd3abc4e 274 if (oif) {
1da177e4
LT
275 if (local)
276 return local;
277
d420895e 278 if (flags & RT6_LOOKUP_F_IFACE)
8ed67789 279 return net->ipv6.ip6_null_entry;
1da177e4 280 }
dd3abc4e 281out:
1da177e4
LT
282 return rt;
283}
284
27097255
YH
285#ifdef CONFIG_IPV6_ROUTER_PREF
286static void rt6_probe(struct rt6_info *rt)
287{
288 struct neighbour *neigh = rt ? rt->rt6i_nexthop : NULL;
289 /*
290 * Okay, this does not seem to be appropriate
291 * for now, however, we need to check if it
292 * is really so; aka Router Reachability Probing.
293 *
294 * Router Reachability Probe MUST be rate-limited
295 * to no more than one per minute.
296 */
297 if (!neigh || (neigh->nud_state & NUD_VALID))
298 return;
299 read_lock_bh(&neigh->lock);
300 if (!(neigh->nud_state & NUD_VALID) &&
52e16356 301 time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
27097255
YH
302 struct in6_addr mcaddr;
303 struct in6_addr *target;
304
305 neigh->updated = jiffies;
306 read_unlock_bh(&neigh->lock);
307
308 target = (struct in6_addr *)&neigh->primary_key;
309 addrconf_addr_solict_mult(target, &mcaddr);
310 ndisc_send_ns(rt->rt6i_dev, NULL, target, &mcaddr, NULL);
311 } else
312 read_unlock_bh(&neigh->lock);
313}
314#else
315static inline void rt6_probe(struct rt6_info *rt)
316{
317 return;
318}
319#endif
320
1da177e4 321/*
554cfb7e 322 * Default Router Selection (RFC 2461 6.3.6)
1da177e4 323 */
b6f99a21 324static inline int rt6_check_dev(struct rt6_info *rt, int oif)
554cfb7e
YH
325{
326 struct net_device *dev = rt->rt6i_dev;
161980f4 327 if (!oif || dev->ifindex == oif)
554cfb7e 328 return 2;
161980f4
DM
329 if ((dev->flags & IFF_LOOPBACK) &&
330 rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
331 return 1;
332 return 0;
554cfb7e 333}
1da177e4 334
b6f99a21 335static inline int rt6_check_neigh(struct rt6_info *rt)
1da177e4 336{
554cfb7e 337 struct neighbour *neigh = rt->rt6i_nexthop;
398bcbeb 338 int m;
4d0c5911
YH
339 if (rt->rt6i_flags & RTF_NONEXTHOP ||
340 !(rt->rt6i_flags & RTF_GATEWAY))
341 m = 1;
342 else if (neigh) {
554cfb7e
YH
343 read_lock_bh(&neigh->lock);
344 if (neigh->nud_state & NUD_VALID)
4d0c5911 345 m = 2;
398bcbeb
YH
346#ifdef CONFIG_IPV6_ROUTER_PREF
347 else if (neigh->nud_state & NUD_FAILED)
348 m = 0;
349#endif
350 else
ea73ee23 351 m = 1;
554cfb7e 352 read_unlock_bh(&neigh->lock);
398bcbeb
YH
353 } else
354 m = 0;
554cfb7e 355 return m;
1da177e4
LT
356}
357
554cfb7e
YH
358static int rt6_score_route(struct rt6_info *rt, int oif,
359 int strict)
1da177e4 360{
4d0c5911 361 int m, n;
1ab1457c 362
4d0c5911 363 m = rt6_check_dev(rt, oif);
77d16f45 364 if (!m && (strict & RT6_LOOKUP_F_IFACE))
554cfb7e 365 return -1;
ebacaaa0
YH
366#ifdef CONFIG_IPV6_ROUTER_PREF
367 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
368#endif
4d0c5911 369 n = rt6_check_neigh(rt);
557e92ef 370 if (!n && (strict & RT6_LOOKUP_F_REACHABLE))
554cfb7e
YH
371 return -1;
372 return m;
373}
374
f11e6659
DM
375static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
376 int *mpri, struct rt6_info *match)
554cfb7e 377{
f11e6659
DM
378 int m;
379
380 if (rt6_check_expired(rt))
381 goto out;
382
383 m = rt6_score_route(rt, oif, strict);
384 if (m < 0)
385 goto out;
386
387 if (m > *mpri) {
388 if (strict & RT6_LOOKUP_F_REACHABLE)
389 rt6_probe(match);
390 *mpri = m;
391 match = rt;
392 } else if (strict & RT6_LOOKUP_F_REACHABLE) {
393 rt6_probe(rt);
394 }
395
396out:
397 return match;
398}
399
400static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
401 struct rt6_info *rr_head,
402 u32 metric, int oif, int strict)
403{
404 struct rt6_info *rt, *match;
554cfb7e 405 int mpri = -1;
1da177e4 406
f11e6659
DM
407 match = NULL;
408 for (rt = rr_head; rt && rt->rt6i_metric == metric;
409 rt = rt->u.dst.rt6_next)
410 match = find_match(rt, oif, strict, &mpri, match);
411 for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric;
412 rt = rt->u.dst.rt6_next)
413 match = find_match(rt, oif, strict, &mpri, match);
1da177e4 414
f11e6659
DM
415 return match;
416}
1da177e4 417
f11e6659
DM
418static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
419{
420 struct rt6_info *match, *rt0;
8ed67789 421 struct net *net;
1da177e4 422
f11e6659 423 RT6_TRACE("%s(fn->leaf=%p, oif=%d)\n",
0dc47877 424 __func__, fn->leaf, oif);
554cfb7e 425
f11e6659
DM
426 rt0 = fn->rr_ptr;
427 if (!rt0)
428 fn->rr_ptr = rt0 = fn->leaf;
1da177e4 429
f11e6659 430 match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict);
1da177e4 431
554cfb7e 432 if (!match &&
f11e6659
DM
433 (strict & RT6_LOOKUP_F_REACHABLE)) {
434 struct rt6_info *next = rt0->u.dst.rt6_next;
435
554cfb7e 436 /* no entries matched; do round-robin */
f11e6659
DM
437 if (!next || next->rt6i_metric != rt0->rt6i_metric)
438 next = fn->leaf;
439
440 if (next != rt0)
441 fn->rr_ptr = next;
1da177e4 442 }
1da177e4 443
f11e6659 444 RT6_TRACE("%s() => %p\n",
0dc47877 445 __func__, match);
1da177e4 446
c346dca1 447 net = dev_net(rt0->rt6i_dev);
8ed67789 448 return (match ? match : net->ipv6.ip6_null_entry);
1da177e4
LT
449}
450
70ceb4f5
YH
451#ifdef CONFIG_IPV6_ROUTE_INFO
452int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
453 struct in6_addr *gwaddr)
454{
c346dca1 455 struct net *net = dev_net(dev);
70ceb4f5
YH
456 struct route_info *rinfo = (struct route_info *) opt;
457 struct in6_addr prefix_buf, *prefix;
458 unsigned int pref;
4bed72e4 459 unsigned long lifetime;
70ceb4f5
YH
460 struct rt6_info *rt;
461
462 if (len < sizeof(struct route_info)) {
463 return -EINVAL;
464 }
465
466 /* Sanity check for prefix_len and length */
467 if (rinfo->length > 3) {
468 return -EINVAL;
469 } else if (rinfo->prefix_len > 128) {
470 return -EINVAL;
471 } else if (rinfo->prefix_len > 64) {
472 if (rinfo->length < 2) {
473 return -EINVAL;
474 }
475 } else if (rinfo->prefix_len > 0) {
476 if (rinfo->length < 1) {
477 return -EINVAL;
478 }
479 }
480
481 pref = rinfo->route_pref;
482 if (pref == ICMPV6_ROUTER_PREF_INVALID)
483 pref = ICMPV6_ROUTER_PREF_MEDIUM;
484
4bed72e4 485 lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
70ceb4f5
YH
486
487 if (rinfo->length == 3)
488 prefix = (struct in6_addr *)rinfo->prefix;
489 else {
490 /* this function is safe */
491 ipv6_addr_prefix(&prefix_buf,
492 (struct in6_addr *)rinfo->prefix,
493 rinfo->prefix_len);
494 prefix = &prefix_buf;
495 }
496
efa2cea0
DL
497 rt = rt6_get_route_info(net, prefix, rinfo->prefix_len, gwaddr,
498 dev->ifindex);
70ceb4f5
YH
499
500 if (rt && !lifetime) {
e0a1ad73 501 ip6_del_rt(rt);
70ceb4f5
YH
502 rt = NULL;
503 }
504
505 if (!rt && lifetime)
efa2cea0 506 rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
70ceb4f5
YH
507 pref);
508 else if (rt)
509 rt->rt6i_flags = RTF_ROUTEINFO |
510 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
511
512 if (rt) {
4bed72e4 513 if (!addrconf_finite_timeout(lifetime)) {
70ceb4f5
YH
514 rt->rt6i_flags &= ~RTF_EXPIRES;
515 } else {
516 rt->rt6i_expires = jiffies + HZ * lifetime;
517 rt->rt6i_flags |= RTF_EXPIRES;
518 }
519 dst_release(&rt->u.dst);
520 }
521 return 0;
522}
523#endif
524
8ed67789 525#define BACKTRACK(__net, saddr) \
982f56f3 526do { \
8ed67789 527 if (rt == __net->ipv6.ip6_null_entry) { \
982f56f3 528 struct fib6_node *pn; \
e0eda7bb 529 while (1) { \
982f56f3
YH
530 if (fn->fn_flags & RTN_TL_ROOT) \
531 goto out; \
532 pn = fn->parent; \
533 if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
8bce65b9 534 fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \
982f56f3
YH
535 else \
536 fn = pn; \
537 if (fn->fn_flags & RTN_RTINFO) \
538 goto restart; \
c71099ac 539 } \
c71099ac 540 } \
982f56f3 541} while(0)
c71099ac 542
8ed67789
DL
543static struct rt6_info *ip6_pol_route_lookup(struct net *net,
544 struct fib6_table *table,
c71099ac 545 struct flowi *fl, int flags)
1da177e4
LT
546{
547 struct fib6_node *fn;
548 struct rt6_info *rt;
549
c71099ac
TG
550 read_lock_bh(&table->tb6_lock);
551 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
552restart:
553 rt = fn->leaf;
dd3abc4e 554 rt = rt6_device_match(net, rt, &fl->fl6_src, fl->oif, flags);
8ed67789 555 BACKTRACK(net, &fl->fl6_src);
c71099ac 556out:
03f49f34 557 dst_use(&rt->u.dst, jiffies);
c71099ac 558 read_unlock_bh(&table->tb6_lock);
c71099ac
TG
559 return rt;
560
561}
562
9acd9f3a
YH
563struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
564 const struct in6_addr *saddr, int oif, int strict)
c71099ac
TG
565{
566 struct flowi fl = {
567 .oif = oif,
568 .nl_u = {
569 .ip6_u = {
570 .daddr = *daddr,
c71099ac
TG
571 },
572 },
573 };
574 struct dst_entry *dst;
77d16f45 575 int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
c71099ac 576
adaa70bb
TG
577 if (saddr) {
578 memcpy(&fl.fl6_src, saddr, sizeof(*saddr));
579 flags |= RT6_LOOKUP_F_HAS_SADDR;
580 }
581
606a2b48 582 dst = fib6_rule_lookup(net, &fl, flags, ip6_pol_route_lookup);
c71099ac
TG
583 if (dst->error == 0)
584 return (struct rt6_info *) dst;
585
586 dst_release(dst);
587
1da177e4
LT
588 return NULL;
589}
590
7159039a
YH
591EXPORT_SYMBOL(rt6_lookup);
592
c71099ac 593/* ip6_ins_rt is called with FREE table->tb6_lock.
1da177e4
LT
594 It takes new route entry, the addition fails by any reason the
595 route is freed. In any case, if caller does not hold it, it may
596 be destroyed.
597 */
598
86872cb5 599static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
1da177e4
LT
600{
601 int err;
c71099ac 602 struct fib6_table *table;
1da177e4 603
c71099ac
TG
604 table = rt->rt6i_table;
605 write_lock_bh(&table->tb6_lock);
86872cb5 606 err = fib6_add(&table->tb6_root, rt, info);
c71099ac 607 write_unlock_bh(&table->tb6_lock);
1da177e4
LT
608
609 return err;
610}
611
40e22e8f
TG
612int ip6_ins_rt(struct rt6_info *rt)
613{
4d1169c1 614 struct nl_info info = {
c346dca1 615 .nl_net = dev_net(rt->rt6i_dev),
4d1169c1 616 };
528c4ceb 617 return __ip6_ins_rt(rt, &info);
40e22e8f
TG
618}
619
95a9a5ba
YH
620static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, struct in6_addr *daddr,
621 struct in6_addr *saddr)
1da177e4 622{
1da177e4
LT
623 struct rt6_info *rt;
624
625 /*
626 * Clone the route.
627 */
628
629 rt = ip6_rt_copy(ort);
630
631 if (rt) {
58c4fb86
YH
632 if (!(rt->rt6i_flags&RTF_GATEWAY)) {
633 if (rt->rt6i_dst.plen != 128 &&
634 ipv6_addr_equal(&rt->rt6i_dst.addr, daddr))
635 rt->rt6i_flags |= RTF_ANYCAST;
1da177e4 636 ipv6_addr_copy(&rt->rt6i_gateway, daddr);
58c4fb86 637 }
1da177e4 638
58c4fb86 639 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
1da177e4
LT
640 rt->rt6i_dst.plen = 128;
641 rt->rt6i_flags |= RTF_CACHE;
642 rt->u.dst.flags |= DST_HOST;
643
644#ifdef CONFIG_IPV6_SUBTREES
645 if (rt->rt6i_src.plen && saddr) {
646 ipv6_addr_copy(&rt->rt6i_src.addr, saddr);
647 rt->rt6i_src.plen = 128;
648 }
649#endif
650
651 rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
652
95a9a5ba 653 }
1da177e4 654
95a9a5ba
YH
655 return rt;
656}
1da177e4 657
299d9939
YH
658static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort, struct in6_addr *daddr)
659{
660 struct rt6_info *rt = ip6_rt_copy(ort);
661 if (rt) {
662 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
663 rt->rt6i_dst.plen = 128;
664 rt->rt6i_flags |= RTF_CACHE;
299d9939
YH
665 rt->u.dst.flags |= DST_HOST;
666 rt->rt6i_nexthop = neigh_clone(ort->rt6i_nexthop);
667 }
668 return rt;
669}
670
8ed67789
DL
671static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
672 struct flowi *fl, int flags)
1da177e4
LT
673{
674 struct fib6_node *fn;
519fbd87 675 struct rt6_info *rt, *nrt;
c71099ac 676 int strict = 0;
1da177e4 677 int attempts = 3;
519fbd87 678 int err;
53b7997f 679 int reachable = net->ipv6.devconf_all->forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
1da177e4 680
77d16f45 681 strict |= flags & RT6_LOOKUP_F_IFACE;
1da177e4
LT
682
683relookup:
c71099ac 684 read_lock_bh(&table->tb6_lock);
1da177e4 685
8238dd06 686restart_2:
c71099ac 687 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
1da177e4
LT
688
689restart:
4acad72d 690 rt = rt6_select(fn, oif, strict | reachable);
8ed67789
DL
691
692 BACKTRACK(net, &fl->fl6_src);
693 if (rt == net->ipv6.ip6_null_entry ||
8238dd06 694 rt->rt6i_flags & RTF_CACHE)
1ddef044 695 goto out;
1da177e4 696
fb9de91e 697 dst_hold(&rt->u.dst);
c71099ac 698 read_unlock_bh(&table->tb6_lock);
fb9de91e 699
519fbd87 700 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
c71099ac 701 nrt = rt6_alloc_cow(rt, &fl->fl6_dst, &fl->fl6_src);
519fbd87
YH
702 else {
703#if CLONE_OFFLINK_ROUTE
c71099ac 704 nrt = rt6_alloc_clone(rt, &fl->fl6_dst);
519fbd87
YH
705#else
706 goto out2;
707#endif
708 }
e40cf353 709
519fbd87 710 dst_release(&rt->u.dst);
8ed67789 711 rt = nrt ? : net->ipv6.ip6_null_entry;
1da177e4 712
519fbd87
YH
713 dst_hold(&rt->u.dst);
714 if (nrt) {
40e22e8f 715 err = ip6_ins_rt(nrt);
519fbd87 716 if (!err)
1da177e4 717 goto out2;
1da177e4 718 }
1da177e4 719
519fbd87
YH
720 if (--attempts <= 0)
721 goto out2;
722
723 /*
c71099ac 724 * Race condition! In the gap, when table->tb6_lock was
519fbd87
YH
725 * released someone could insert this route. Relookup.
726 */
727 dst_release(&rt->u.dst);
728 goto relookup;
729
730out:
8238dd06
YH
731 if (reachable) {
732 reachable = 0;
733 goto restart_2;
734 }
519fbd87 735 dst_hold(&rt->u.dst);
c71099ac 736 read_unlock_bh(&table->tb6_lock);
1da177e4
LT
737out2:
738 rt->u.dst.lastuse = jiffies;
739 rt->u.dst.__use++;
c71099ac
TG
740
741 return rt;
1da177e4
LT
742}
743
8ed67789 744static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
4acad72d
PE
745 struct flowi *fl, int flags)
746{
8ed67789 747 return ip6_pol_route(net, table, fl->iif, fl, flags);
4acad72d
PE
748}
749
c71099ac
TG
750void ip6_route_input(struct sk_buff *skb)
751{
0660e03f 752 struct ipv6hdr *iph = ipv6_hdr(skb);
c346dca1 753 struct net *net = dev_net(skb->dev);
adaa70bb 754 int flags = RT6_LOOKUP_F_HAS_SADDR;
c71099ac
TG
755 struct flowi fl = {
756 .iif = skb->dev->ifindex,
757 .nl_u = {
758 .ip6_u = {
759 .daddr = iph->daddr,
760 .saddr = iph->saddr,
90bcaf7b 761 .flowlabel = (* (__be32 *) iph)&IPV6_FLOWINFO_MASK,
c71099ac
TG
762 },
763 },
1ab1457c 764 .mark = skb->mark,
c71099ac
TG
765 .proto = iph->nexthdr,
766 };
adaa70bb
TG
767
768 if (rt6_need_strict(&iph->daddr))
769 flags |= RT6_LOOKUP_F_IFACE;
c71099ac 770
5578689a 771 skb->dst = fib6_rule_lookup(net, &fl, flags, ip6_pol_route_input);
c71099ac
TG
772}
773
8ed67789 774static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
c71099ac 775 struct flowi *fl, int flags)
1da177e4 776{
8ed67789 777 return ip6_pol_route(net, table, fl->oif, fl, flags);
c71099ac
TG
778}
779
4591db4f
DL
780struct dst_entry * ip6_route_output(struct net *net, struct sock *sk,
781 struct flowi *fl)
c71099ac
TG
782{
783 int flags = 0;
784
785 if (rt6_need_strict(&fl->fl6_dst))
77d16f45 786 flags |= RT6_LOOKUP_F_IFACE;
c71099ac 787
adaa70bb
TG
788 if (!ipv6_addr_any(&fl->fl6_src))
789 flags |= RT6_LOOKUP_F_HAS_SADDR;
7cbca67c
YH
790 else if (sk) {
791 unsigned int prefs = inet6_sk(sk)->srcprefs;
792 if (prefs & IPV6_PREFER_SRC_TMP)
793 flags |= RT6_LOOKUP_F_SRCPREF_TMP;
794 if (prefs & IPV6_PREFER_SRC_PUBLIC)
795 flags |= RT6_LOOKUP_F_SRCPREF_PUBLIC;
796 if (prefs & IPV6_PREFER_SRC_COA)
797 flags |= RT6_LOOKUP_F_SRCPREF_COA;
798 }
adaa70bb 799
4591db4f 800 return fib6_rule_lookup(net, fl, flags, ip6_pol_route_output);
1da177e4
LT
801}
802
7159039a 803EXPORT_SYMBOL(ip6_route_output);
1da177e4 804
14e50e57
DM
805int ip6_dst_blackhole(struct sock *sk, struct dst_entry **dstp, struct flowi *fl)
806{
807 struct rt6_info *ort = (struct rt6_info *) *dstp;
808 struct rt6_info *rt = (struct rt6_info *)
809 dst_alloc(&ip6_dst_blackhole_ops);
810 struct dst_entry *new = NULL;
811
812 if (rt) {
813 new = &rt->u.dst;
814
815 atomic_set(&new->__refcnt, 1);
816 new->__use = 1;
352e512c
HX
817 new->input = dst_discard;
818 new->output = dst_discard;
14e50e57
DM
819
820 memcpy(new->metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32));
821 new->dev = ort->u.dst.dev;
822 if (new->dev)
823 dev_hold(new->dev);
824 rt->rt6i_idev = ort->rt6i_idev;
825 if (rt->rt6i_idev)
826 in6_dev_hold(rt->rt6i_idev);
827 rt->rt6i_expires = 0;
828
829 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
830 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
831 rt->rt6i_metric = 0;
832
833 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
834#ifdef CONFIG_IPV6_SUBTREES
835 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
836#endif
837
838 dst_free(new);
839 }
840
841 dst_release(*dstp);
842 *dstp = new;
843 return (new ? 0 : -ENOMEM);
844}
845EXPORT_SYMBOL_GPL(ip6_dst_blackhole);
846
1da177e4
LT
847/*
848 * Destination cache support functions
849 */
850
851static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
852{
853 struct rt6_info *rt;
854
855 rt = (struct rt6_info *) dst;
856
857 if (rt && rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie))
858 return dst;
859
860 return NULL;
861}
862
863static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
864{
865 struct rt6_info *rt = (struct rt6_info *) dst;
866
867 if (rt) {
868 if (rt->rt6i_flags & RTF_CACHE)
e0a1ad73 869 ip6_del_rt(rt);
1da177e4
LT
870 else
871 dst_release(dst);
872 }
873 return NULL;
874}
875
876static void ip6_link_failure(struct sk_buff *skb)
877{
878 struct rt6_info *rt;
879
880 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0, skb->dev);
881
882 rt = (struct rt6_info *) skb->dst;
883 if (rt) {
884 if (rt->rt6i_flags&RTF_CACHE) {
885 dst_set_expires(&rt->u.dst, 0);
886 rt->rt6i_flags |= RTF_EXPIRES;
887 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
888 rt->rt6i_node->fn_sernum = -1;
889 }
890}
891
892static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
893{
894 struct rt6_info *rt6 = (struct rt6_info*)dst;
895
896 if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
897 rt6->rt6i_flags |= RTF_MODIFIED;
898 if (mtu < IPV6_MIN_MTU) {
899 mtu = IPV6_MIN_MTU;
900 dst->metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
901 }
902 dst->metrics[RTAX_MTU-1] = mtu;
8d71740c 903 call_netevent_notifiers(NETEVENT_PMTU_UPDATE, dst);
1da177e4
LT
904 }
905}
906
1da177e4
LT
907static int ipv6_get_mtu(struct net_device *dev);
908
5578689a 909static inline unsigned int ipv6_advmss(struct net *net, unsigned int mtu)
1da177e4
LT
910{
911 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
912
5578689a
DL
913 if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
914 mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
1da177e4
LT
915
916 /*
1ab1457c
YH
917 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
918 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
919 * IPV6_MAXPLEN is also valid and means: "any MSS,
1da177e4
LT
920 * rely only on pmtu discovery"
921 */
922 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
923 mtu = IPV6_MAXPLEN;
924 return mtu;
925}
926
3b00944c
YH
927static struct dst_entry *icmp6_dst_gc_list;
928static DEFINE_SPINLOCK(icmp6_dst_lock);
5d0bbeeb 929
3b00944c 930struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
1da177e4 931 struct neighbour *neigh,
9acd9f3a 932 const struct in6_addr *addr)
1da177e4
LT
933{
934 struct rt6_info *rt;
935 struct inet6_dev *idev = in6_dev_get(dev);
c346dca1 936 struct net *net = dev_net(dev);
1da177e4
LT
937
938 if (unlikely(idev == NULL))
939 return NULL;
940
f2fc6a54 941 rt = ip6_dst_alloc(net->ipv6.ip6_dst_ops);
1da177e4
LT
942 if (unlikely(rt == NULL)) {
943 in6_dev_put(idev);
944 goto out;
945 }
946
947 dev_hold(dev);
948 if (neigh)
949 neigh_hold(neigh);
950 else
951 neigh = ndisc_get_neigh(dev, addr);
952
953 rt->rt6i_dev = dev;
954 rt->rt6i_idev = idev;
955 rt->rt6i_nexthop = neigh;
956 atomic_set(&rt->u.dst.__refcnt, 1);
957 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = 255;
958 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
5578689a 959 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, dst_mtu(&rt->u.dst));
3b00944c 960 rt->u.dst.output = ip6_output;
1da177e4
LT
961
962#if 0 /* there's no chance to use these for ndisc */
1ab1457c
YH
963 rt->u.dst.flags = ipv6_addr_type(addr) & IPV6_ADDR_UNICAST
964 ? DST_HOST
1da177e4
LT
965 : 0;
966 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
967 rt->rt6i_dst.plen = 128;
968#endif
969
3b00944c
YH
970 spin_lock_bh(&icmp6_dst_lock);
971 rt->u.dst.next = icmp6_dst_gc_list;
972 icmp6_dst_gc_list = &rt->u.dst;
973 spin_unlock_bh(&icmp6_dst_lock);
1da177e4 974
5578689a 975 fib6_force_start_gc(net);
1da177e4
LT
976
977out:
40aa7b90 978 return &rt->u.dst;
1da177e4
LT
979}
980
3d0f24a7 981int icmp6_dst_gc(void)
1da177e4
LT
982{
983 struct dst_entry *dst, *next, **pprev;
3d0f24a7 984 int more = 0;
1da177e4
LT
985
986 next = NULL;
5d0bbeeb 987
3b00944c
YH
988 spin_lock_bh(&icmp6_dst_lock);
989 pprev = &icmp6_dst_gc_list;
5d0bbeeb 990
1da177e4
LT
991 while ((dst = *pprev) != NULL) {
992 if (!atomic_read(&dst->__refcnt)) {
993 *pprev = dst->next;
994 dst_free(dst);
1da177e4
LT
995 } else {
996 pprev = &dst->next;
3d0f24a7 997 ++more;
1da177e4
LT
998 }
999 }
1000
3b00944c 1001 spin_unlock_bh(&icmp6_dst_lock);
5d0bbeeb 1002
3d0f24a7 1003 return more;
1da177e4
LT
1004}
1005
569d3645 1006static int ip6_dst_gc(struct dst_ops *ops)
1da177e4 1007{
1da177e4 1008 unsigned long now = jiffies;
7019b78e
DL
1009 struct net *net = ops->dst_net;
1010 int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
1011 int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
1012 int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1013 int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1014 unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
1015
1016 if (time_after(rt_last_gc + rt_min_interval, now) &&
1017 atomic_read(&ops->entries) <= rt_max_size)
1da177e4
LT
1018 goto out;
1019
6891a346
BT
1020 net->ipv6.ip6_rt_gc_expire++;
1021 fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net);
1022 net->ipv6.ip6_rt_last_gc = now;
7019b78e
DL
1023 if (atomic_read(&ops->entries) < ops->gc_thresh)
1024 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1da177e4 1025out:
7019b78e
DL
1026 net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
1027 return (atomic_read(&ops->entries) > rt_max_size);
1da177e4
LT
1028}
1029
1030/* Clean host part of a prefix. Not necessary in radix tree,
1031 but results in cleaner routing tables.
1032
1033 Remove it only when all the things will work!
1034 */
1035
1036static int ipv6_get_mtu(struct net_device *dev)
1037{
1038 int mtu = IPV6_MIN_MTU;
1039 struct inet6_dev *idev;
1040
1041 idev = in6_dev_get(dev);
1042 if (idev) {
1043 mtu = idev->cnf.mtu6;
1044 in6_dev_put(idev);
1045 }
1046 return mtu;
1047}
1048
6b75d090 1049int ip6_dst_hoplimit(struct dst_entry *dst)
1da177e4 1050{
6b75d090
YH
1051 int hoplimit = dst_metric(dst, RTAX_HOPLIMIT);
1052 if (hoplimit < 0) {
1053 struct net_device *dev = dst->dev;
1054 struct inet6_dev *idev = in6_dev_get(dev);
1055 if (idev) {
1056 hoplimit = idev->cnf.hop_limit;
1057 in6_dev_put(idev);
1058 } else
53b7997f 1059 hoplimit = dev_net(dev)->ipv6.devconf_all->hop_limit;
1da177e4
LT
1060 }
1061 return hoplimit;
1062}
1063
1064/*
1065 *
1066 */
1067
86872cb5 1068int ip6_route_add(struct fib6_config *cfg)
1da177e4
LT
1069{
1070 int err;
5578689a 1071 struct net *net = cfg->fc_nlinfo.nl_net;
1da177e4
LT
1072 struct rt6_info *rt = NULL;
1073 struct net_device *dev = NULL;
1074 struct inet6_dev *idev = NULL;
c71099ac 1075 struct fib6_table *table;
1da177e4
LT
1076 int addr_type;
1077
86872cb5 1078 if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1da177e4
LT
1079 return -EINVAL;
1080#ifndef CONFIG_IPV6_SUBTREES
86872cb5 1081 if (cfg->fc_src_len)
1da177e4
LT
1082 return -EINVAL;
1083#endif
86872cb5 1084 if (cfg->fc_ifindex) {
1da177e4 1085 err = -ENODEV;
5578689a 1086 dev = dev_get_by_index(net, cfg->fc_ifindex);
1da177e4
LT
1087 if (!dev)
1088 goto out;
1089 idev = in6_dev_get(dev);
1090 if (!idev)
1091 goto out;
1092 }
1093
86872cb5
TG
1094 if (cfg->fc_metric == 0)
1095 cfg->fc_metric = IP6_RT_PRIO_USER;
1da177e4 1096
5578689a 1097 table = fib6_new_table(net, cfg->fc_table);
c71099ac
TG
1098 if (table == NULL) {
1099 err = -ENOBUFS;
1100 goto out;
1101 }
1102
f2fc6a54 1103 rt = ip6_dst_alloc(net->ipv6.ip6_dst_ops);
1da177e4
LT
1104
1105 if (rt == NULL) {
1106 err = -ENOMEM;
1107 goto out;
1108 }
1109
1110 rt->u.dst.obsolete = -1;
6f704992
YH
1111 rt->rt6i_expires = (cfg->fc_flags & RTF_EXPIRES) ?
1112 jiffies + clock_t_to_jiffies(cfg->fc_expires) :
1113 0;
1da177e4 1114
86872cb5
TG
1115 if (cfg->fc_protocol == RTPROT_UNSPEC)
1116 cfg->fc_protocol = RTPROT_BOOT;
1117 rt->rt6i_protocol = cfg->fc_protocol;
1118
1119 addr_type = ipv6_addr_type(&cfg->fc_dst);
1da177e4
LT
1120
1121 if (addr_type & IPV6_ADDR_MULTICAST)
1122 rt->u.dst.input = ip6_mc_input;
1123 else
1124 rt->u.dst.input = ip6_forward;
1125
1126 rt->u.dst.output = ip6_output;
1127
86872cb5
TG
1128 ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1129 rt->rt6i_dst.plen = cfg->fc_dst_len;
1da177e4
LT
1130 if (rt->rt6i_dst.plen == 128)
1131 rt->u.dst.flags = DST_HOST;
1132
1133#ifdef CONFIG_IPV6_SUBTREES
86872cb5
TG
1134 ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1135 rt->rt6i_src.plen = cfg->fc_src_len;
1da177e4
LT
1136#endif
1137
86872cb5 1138 rt->rt6i_metric = cfg->fc_metric;
1da177e4
LT
1139
1140 /* We cannot add true routes via loopback here,
1141 they would result in kernel looping; promote them to reject routes
1142 */
86872cb5 1143 if ((cfg->fc_flags & RTF_REJECT) ||
1da177e4
LT
1144 (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK))) {
1145 /* hold loopback dev/idev if we haven't done so. */
5578689a 1146 if (dev != net->loopback_dev) {
1da177e4
LT
1147 if (dev) {
1148 dev_put(dev);
1149 in6_dev_put(idev);
1150 }
5578689a 1151 dev = net->loopback_dev;
1da177e4
LT
1152 dev_hold(dev);
1153 idev = in6_dev_get(dev);
1154 if (!idev) {
1155 err = -ENODEV;
1156 goto out;
1157 }
1158 }
1159 rt->u.dst.output = ip6_pkt_discard_out;
1160 rt->u.dst.input = ip6_pkt_discard;
1161 rt->u.dst.error = -ENETUNREACH;
1162 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1163 goto install_route;
1164 }
1165
86872cb5 1166 if (cfg->fc_flags & RTF_GATEWAY) {
1da177e4
LT
1167 struct in6_addr *gw_addr;
1168 int gwa_type;
1169
86872cb5
TG
1170 gw_addr = &cfg->fc_gateway;
1171 ipv6_addr_copy(&rt->rt6i_gateway, gw_addr);
1da177e4
LT
1172 gwa_type = ipv6_addr_type(gw_addr);
1173
1174 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1175 struct rt6_info *grt;
1176
1177 /* IPv6 strictly inhibits using not link-local
1178 addresses as nexthop address.
1179 Otherwise, router will not able to send redirects.
1180 It is very good, but in some (rare!) circumstances
1181 (SIT, PtP, NBMA NOARP links) it is handy to allow
1182 some exceptions. --ANK
1183 */
1184 err = -EINVAL;
1185 if (!(gwa_type&IPV6_ADDR_UNICAST))
1186 goto out;
1187
5578689a 1188 grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
1da177e4
LT
1189
1190 err = -EHOSTUNREACH;
1191 if (grt == NULL)
1192 goto out;
1193 if (dev) {
1194 if (dev != grt->rt6i_dev) {
1195 dst_release(&grt->u.dst);
1196 goto out;
1197 }
1198 } else {
1199 dev = grt->rt6i_dev;
1200 idev = grt->rt6i_idev;
1201 dev_hold(dev);
1202 in6_dev_hold(grt->rt6i_idev);
1203 }
1204 if (!(grt->rt6i_flags&RTF_GATEWAY))
1205 err = 0;
1206 dst_release(&grt->u.dst);
1207
1208 if (err)
1209 goto out;
1210 }
1211 err = -EINVAL;
1212 if (dev == NULL || (dev->flags&IFF_LOOPBACK))
1213 goto out;
1214 }
1215
1216 err = -ENODEV;
1217 if (dev == NULL)
1218 goto out;
1219
86872cb5 1220 if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) {
1da177e4
LT
1221 rt->rt6i_nexthop = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev);
1222 if (IS_ERR(rt->rt6i_nexthop)) {
1223 err = PTR_ERR(rt->rt6i_nexthop);
1224 rt->rt6i_nexthop = NULL;
1225 goto out;
1226 }
1227 }
1228
86872cb5 1229 rt->rt6i_flags = cfg->fc_flags;
1da177e4
LT
1230
1231install_route:
86872cb5
TG
1232 if (cfg->fc_mx) {
1233 struct nlattr *nla;
1234 int remaining;
1235
1236 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
8f4c1f9b 1237 int type = nla_type(nla);
86872cb5
TG
1238
1239 if (type) {
1240 if (type > RTAX_MAX) {
1da177e4
LT
1241 err = -EINVAL;
1242 goto out;
1243 }
86872cb5
TG
1244
1245 rt->u.dst.metrics[type - 1] = nla_get_u32(nla);
1da177e4 1246 }
1da177e4
LT
1247 }
1248 }
1249
5ffc02a1 1250 if (dst_metric(&rt->u.dst, RTAX_HOPLIMIT) == 0)
1da177e4 1251 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
1ca615fb 1252 if (!dst_mtu(&rt->u.dst))
1da177e4 1253 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(dev);
5ffc02a1 1254 if (!dst_metric(&rt->u.dst, RTAX_ADVMSS))
5578689a 1255 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, dst_mtu(&rt->u.dst));
1da177e4
LT
1256 rt->u.dst.dev = dev;
1257 rt->rt6i_idev = idev;
c71099ac 1258 rt->rt6i_table = table;
63152fc0 1259
c346dca1 1260 cfg->fc_nlinfo.nl_net = dev_net(dev);
63152fc0 1261
86872cb5 1262 return __ip6_ins_rt(rt, &cfg->fc_nlinfo);
1da177e4
LT
1263
1264out:
1265 if (dev)
1266 dev_put(dev);
1267 if (idev)
1268 in6_dev_put(idev);
1269 if (rt)
40aa7b90 1270 dst_free(&rt->u.dst);
1da177e4
LT
1271 return err;
1272}
1273
86872cb5 1274static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
1da177e4
LT
1275{
1276 int err;
c71099ac 1277 struct fib6_table *table;
c346dca1 1278 struct net *net = dev_net(rt->rt6i_dev);
1da177e4 1279
8ed67789 1280 if (rt == net->ipv6.ip6_null_entry)
6c813a72
PM
1281 return -ENOENT;
1282
c71099ac
TG
1283 table = rt->rt6i_table;
1284 write_lock_bh(&table->tb6_lock);
1da177e4 1285
86872cb5 1286 err = fib6_del(rt, info);
1da177e4
LT
1287 dst_release(&rt->u.dst);
1288
c71099ac 1289 write_unlock_bh(&table->tb6_lock);
1da177e4
LT
1290
1291 return err;
1292}
1293
e0a1ad73
TG
1294int ip6_del_rt(struct rt6_info *rt)
1295{
4d1169c1 1296 struct nl_info info = {
c346dca1 1297 .nl_net = dev_net(rt->rt6i_dev),
4d1169c1 1298 };
528c4ceb 1299 return __ip6_del_rt(rt, &info);
e0a1ad73
TG
1300}
1301
86872cb5 1302static int ip6_route_del(struct fib6_config *cfg)
1da177e4 1303{
c71099ac 1304 struct fib6_table *table;
1da177e4
LT
1305 struct fib6_node *fn;
1306 struct rt6_info *rt;
1307 int err = -ESRCH;
1308
5578689a 1309 table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
c71099ac
TG
1310 if (table == NULL)
1311 return err;
1312
1313 read_lock_bh(&table->tb6_lock);
1da177e4 1314
c71099ac 1315 fn = fib6_locate(&table->tb6_root,
86872cb5
TG
1316 &cfg->fc_dst, cfg->fc_dst_len,
1317 &cfg->fc_src, cfg->fc_src_len);
1ab1457c 1318
1da177e4 1319 if (fn) {
7cc48263 1320 for (rt = fn->leaf; rt; rt = rt->u.dst.rt6_next) {
86872cb5 1321 if (cfg->fc_ifindex &&
1da177e4 1322 (rt->rt6i_dev == NULL ||
86872cb5 1323 rt->rt6i_dev->ifindex != cfg->fc_ifindex))
1da177e4 1324 continue;
86872cb5
TG
1325 if (cfg->fc_flags & RTF_GATEWAY &&
1326 !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
1da177e4 1327 continue;
86872cb5 1328 if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
1da177e4
LT
1329 continue;
1330 dst_hold(&rt->u.dst);
c71099ac 1331 read_unlock_bh(&table->tb6_lock);
1da177e4 1332
86872cb5 1333 return __ip6_del_rt(rt, &cfg->fc_nlinfo);
1da177e4
LT
1334 }
1335 }
c71099ac 1336 read_unlock_bh(&table->tb6_lock);
1da177e4
LT
1337
1338 return err;
1339}
1340
1341/*
1342 * Handle redirects
1343 */
a6279458
YH
1344struct ip6rd_flowi {
1345 struct flowi fl;
1346 struct in6_addr gateway;
1347};
1348
8ed67789
DL
1349static struct rt6_info *__ip6_route_redirect(struct net *net,
1350 struct fib6_table *table,
a6279458
YH
1351 struct flowi *fl,
1352 int flags)
1da177e4 1353{
a6279458
YH
1354 struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl;
1355 struct rt6_info *rt;
e843b9e1 1356 struct fib6_node *fn;
c71099ac 1357
1da177e4 1358 /*
e843b9e1
YH
1359 * Get the "current" route for this destination and
1360 * check if the redirect has come from approriate router.
1361 *
1362 * RFC 2461 specifies that redirects should only be
1363 * accepted if they come from the nexthop to the target.
1364 * Due to the way the routes are chosen, this notion
1365 * is a bit fuzzy and one might need to check all possible
1366 * routes.
1da177e4 1367 */
1da177e4 1368
c71099ac 1369 read_lock_bh(&table->tb6_lock);
a6279458 1370 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
e843b9e1 1371restart:
7cc48263 1372 for (rt = fn->leaf; rt; rt = rt->u.dst.rt6_next) {
e843b9e1
YH
1373 /*
1374 * Current route is on-link; redirect is always invalid.
1375 *
1376 * Seems, previous statement is not true. It could
1377 * be node, which looks for us as on-link (f.e. proxy ndisc)
1378 * But then router serving it might decide, that we should
1379 * know truth 8)8) --ANK (980726).
1380 */
1381 if (rt6_check_expired(rt))
1382 continue;
1383 if (!(rt->rt6i_flags & RTF_GATEWAY))
1384 continue;
a6279458 1385 if (fl->oif != rt->rt6i_dev->ifindex)
e843b9e1 1386 continue;
a6279458 1387 if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
e843b9e1
YH
1388 continue;
1389 break;
1390 }
a6279458 1391
cb15d9c2 1392 if (!rt)
8ed67789
DL
1393 rt = net->ipv6.ip6_null_entry;
1394 BACKTRACK(net, &fl->fl6_src);
cb15d9c2 1395out:
a6279458
YH
1396 dst_hold(&rt->u.dst);
1397
c71099ac 1398 read_unlock_bh(&table->tb6_lock);
e843b9e1 1399
a6279458
YH
1400 return rt;
1401};
1402
1403static struct rt6_info *ip6_route_redirect(struct in6_addr *dest,
1404 struct in6_addr *src,
1405 struct in6_addr *gateway,
1406 struct net_device *dev)
1407{
adaa70bb 1408 int flags = RT6_LOOKUP_F_HAS_SADDR;
c346dca1 1409 struct net *net = dev_net(dev);
a6279458
YH
1410 struct ip6rd_flowi rdfl = {
1411 .fl = {
1412 .oif = dev->ifindex,
1413 .nl_u = {
1414 .ip6_u = {
1415 .daddr = *dest,
1416 .saddr = *src,
1417 },
1418 },
1419 },
1420 .gateway = *gateway,
1421 };
adaa70bb
TG
1422
1423 if (rt6_need_strict(dest))
1424 flags |= RT6_LOOKUP_F_IFACE;
a6279458 1425
5578689a 1426 return (struct rt6_info *)fib6_rule_lookup(net, (struct flowi *)&rdfl,
58f09b78 1427 flags, __ip6_route_redirect);
a6279458
YH
1428}
1429
1430void rt6_redirect(struct in6_addr *dest, struct in6_addr *src,
1431 struct in6_addr *saddr,
1432 struct neighbour *neigh, u8 *lladdr, int on_link)
1433{
1434 struct rt6_info *rt, *nrt = NULL;
1435 struct netevent_redirect netevent;
c346dca1 1436 struct net *net = dev_net(neigh->dev);
a6279458
YH
1437
1438 rt = ip6_route_redirect(dest, src, saddr, neigh->dev);
1439
8ed67789 1440 if (rt == net->ipv6.ip6_null_entry) {
1da177e4
LT
1441 if (net_ratelimit())
1442 printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
1443 "for redirect target\n");
a6279458 1444 goto out;
1da177e4
LT
1445 }
1446
1da177e4
LT
1447 /*
1448 * We have finally decided to accept it.
1449 */
1450
1ab1457c 1451 neigh_update(neigh, lladdr, NUD_STALE,
1da177e4
LT
1452 NEIGH_UPDATE_F_WEAK_OVERRIDE|
1453 NEIGH_UPDATE_F_OVERRIDE|
1454 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1455 NEIGH_UPDATE_F_ISROUTER))
1456 );
1457
1458 /*
1459 * Redirect received -> path was valid.
1460 * Look, redirects are sent only in response to data packets,
1461 * so that this nexthop apparently is reachable. --ANK
1462 */
1463 dst_confirm(&rt->u.dst);
1464
1465 /* Duplicate redirect: silently ignore. */
1466 if (neigh == rt->u.dst.neighbour)
1467 goto out;
1468
1469 nrt = ip6_rt_copy(rt);
1470 if (nrt == NULL)
1471 goto out;
1472
1473 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1474 if (on_link)
1475 nrt->rt6i_flags &= ~RTF_GATEWAY;
1476
1477 ipv6_addr_copy(&nrt->rt6i_dst.addr, dest);
1478 nrt->rt6i_dst.plen = 128;
1479 nrt->u.dst.flags |= DST_HOST;
1480
1481 ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key);
1482 nrt->rt6i_nexthop = neigh_clone(neigh);
1483 /* Reset pmtu, it may be better */
1484 nrt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(neigh->dev);
c346dca1 1485 nrt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dev_net(neigh->dev),
5578689a 1486 dst_mtu(&nrt->u.dst));
1da177e4 1487
40e22e8f 1488 if (ip6_ins_rt(nrt))
1da177e4
LT
1489 goto out;
1490
8d71740c
TT
1491 netevent.old = &rt->u.dst;
1492 netevent.new = &nrt->u.dst;
1493 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1494
1da177e4 1495 if (rt->rt6i_flags&RTF_CACHE) {
e0a1ad73 1496 ip6_del_rt(rt);
1da177e4
LT
1497 return;
1498 }
1499
1500out:
1ab1457c 1501 dst_release(&rt->u.dst);
1da177e4
LT
1502 return;
1503}
1504
1505/*
1506 * Handle ICMP "packet too big" messages
1507 * i.e. Path MTU discovery
1508 */
1509
1510void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr,
1511 struct net_device *dev, u32 pmtu)
1512{
1513 struct rt6_info *rt, *nrt;
c346dca1 1514 struct net *net = dev_net(dev);
1da177e4
LT
1515 int allfrag = 0;
1516
5578689a 1517 rt = rt6_lookup(net, daddr, saddr, dev->ifindex, 0);
1da177e4
LT
1518 if (rt == NULL)
1519 return;
1520
1521 if (pmtu >= dst_mtu(&rt->u.dst))
1522 goto out;
1523
1524 if (pmtu < IPV6_MIN_MTU) {
1525 /*
1ab1457c 1526 * According to RFC2460, PMTU is set to the IPv6 Minimum Link
1da177e4
LT
1527 * MTU (1280) and a fragment header should always be included
1528 * after a node receiving Too Big message reporting PMTU is
1529 * less than the IPv6 Minimum Link MTU.
1530 */
1531 pmtu = IPV6_MIN_MTU;
1532 allfrag = 1;
1533 }
1534
1535 /* New mtu received -> path was valid.
1536 They are sent only in response to data packets,
1537 so that this nexthop apparently is reachable. --ANK
1538 */
1539 dst_confirm(&rt->u.dst);
1540
1541 /* Host route. If it is static, it would be better
1542 not to override it, but add new one, so that
1543 when cache entry will expire old pmtu
1544 would return automatically.
1545 */
1546 if (rt->rt6i_flags & RTF_CACHE) {
1547 rt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1548 if (allfrag)
1549 rt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
5578689a 1550 dst_set_expires(&rt->u.dst, net->ipv6.sysctl.ip6_rt_mtu_expires);
1da177e4
LT
1551 rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
1552 goto out;
1553 }
1554
1555 /* Network route.
1556 Two cases are possible:
1557 1. It is connected route. Action: COW
1558 2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1559 */
d5315b50 1560 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
a1e78363 1561 nrt = rt6_alloc_cow(rt, daddr, saddr);
d5315b50
YH
1562 else
1563 nrt = rt6_alloc_clone(rt, daddr);
a1e78363 1564
d5315b50 1565 if (nrt) {
a1e78363
YH
1566 nrt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1567 if (allfrag)
1568 nrt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1569
1570 /* According to RFC 1981, detecting PMTU increase shouldn't be
1571 * happened within 5 mins, the recommended timer is 10 mins.
1572 * Here this route expiration time is set to ip6_rt_mtu_expires
1573 * which is 10 mins. After 10 mins the decreased pmtu is expired
1574 * and detecting PMTU increase will be automatically happened.
1575 */
5578689a 1576 dst_set_expires(&nrt->u.dst, net->ipv6.sysctl.ip6_rt_mtu_expires);
a1e78363
YH
1577 nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
1578
40e22e8f 1579 ip6_ins_rt(nrt);
1da177e4 1580 }
1da177e4
LT
1581out:
1582 dst_release(&rt->u.dst);
1583}
1584
1585/*
1586 * Misc support functions
1587 */
1588
1589static struct rt6_info * ip6_rt_copy(struct rt6_info *ort)
1590{
c346dca1 1591 struct net *net = dev_net(ort->rt6i_dev);
f2fc6a54 1592 struct rt6_info *rt = ip6_dst_alloc(net->ipv6.ip6_dst_ops);
1da177e4
LT
1593
1594 if (rt) {
1595 rt->u.dst.input = ort->u.dst.input;
1596 rt->u.dst.output = ort->u.dst.output;
1597
1598 memcpy(rt->u.dst.metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32));
22e1e4d8 1599 rt->u.dst.error = ort->u.dst.error;
1da177e4
LT
1600 rt->u.dst.dev = ort->u.dst.dev;
1601 if (rt->u.dst.dev)
1602 dev_hold(rt->u.dst.dev);
1603 rt->rt6i_idev = ort->rt6i_idev;
1604 if (rt->rt6i_idev)
1605 in6_dev_hold(rt->rt6i_idev);
1606 rt->u.dst.lastuse = jiffies;
1607 rt->rt6i_expires = 0;
1608
1609 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
1610 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
1611 rt->rt6i_metric = 0;
1612
1613 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1614#ifdef CONFIG_IPV6_SUBTREES
1615 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1616#endif
c71099ac 1617 rt->rt6i_table = ort->rt6i_table;
1da177e4
LT
1618 }
1619 return rt;
1620}
1621
70ceb4f5 1622#ifdef CONFIG_IPV6_ROUTE_INFO
efa2cea0
DL
1623static struct rt6_info *rt6_get_route_info(struct net *net,
1624 struct in6_addr *prefix, int prefixlen,
70ceb4f5
YH
1625 struct in6_addr *gwaddr, int ifindex)
1626{
1627 struct fib6_node *fn;
1628 struct rt6_info *rt = NULL;
c71099ac
TG
1629 struct fib6_table *table;
1630
efa2cea0 1631 table = fib6_get_table(net, RT6_TABLE_INFO);
c71099ac
TG
1632 if (table == NULL)
1633 return NULL;
70ceb4f5 1634
c71099ac
TG
1635 write_lock_bh(&table->tb6_lock);
1636 fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
70ceb4f5
YH
1637 if (!fn)
1638 goto out;
1639
7cc48263 1640 for (rt = fn->leaf; rt; rt = rt->u.dst.rt6_next) {
70ceb4f5
YH
1641 if (rt->rt6i_dev->ifindex != ifindex)
1642 continue;
1643 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1644 continue;
1645 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1646 continue;
1647 dst_hold(&rt->u.dst);
1648 break;
1649 }
1650out:
c71099ac 1651 write_unlock_bh(&table->tb6_lock);
70ceb4f5
YH
1652 return rt;
1653}
1654
efa2cea0
DL
1655static struct rt6_info *rt6_add_route_info(struct net *net,
1656 struct in6_addr *prefix, int prefixlen,
70ceb4f5
YH
1657 struct in6_addr *gwaddr, int ifindex,
1658 unsigned pref)
1659{
86872cb5
TG
1660 struct fib6_config cfg = {
1661 .fc_table = RT6_TABLE_INFO,
238fc7ea 1662 .fc_metric = IP6_RT_PRIO_USER,
86872cb5
TG
1663 .fc_ifindex = ifindex,
1664 .fc_dst_len = prefixlen,
1665 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
1666 RTF_UP | RTF_PREF(pref),
efa2cea0
DL
1667 .fc_nlinfo.pid = 0,
1668 .fc_nlinfo.nlh = NULL,
1669 .fc_nlinfo.nl_net = net,
86872cb5
TG
1670 };
1671
1672 ipv6_addr_copy(&cfg.fc_dst, prefix);
1673 ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
70ceb4f5 1674
e317da96
YH
1675 /* We should treat it as a default route if prefix length is 0. */
1676 if (!prefixlen)
86872cb5 1677 cfg.fc_flags |= RTF_DEFAULT;
70ceb4f5 1678
86872cb5 1679 ip6_route_add(&cfg);
70ceb4f5 1680
efa2cea0 1681 return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex);
70ceb4f5
YH
1682}
1683#endif
1684
1da177e4 1685struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *dev)
1ab1457c 1686{
1da177e4 1687 struct rt6_info *rt;
c71099ac 1688 struct fib6_table *table;
1da177e4 1689
c346dca1 1690 table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT);
c71099ac
TG
1691 if (table == NULL)
1692 return NULL;
1da177e4 1693
c71099ac 1694 write_lock_bh(&table->tb6_lock);
7cc48263 1695 for (rt = table->tb6_root.leaf; rt; rt=rt->u.dst.rt6_next) {
1da177e4 1696 if (dev == rt->rt6i_dev &&
045927ff 1697 ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
1da177e4
LT
1698 ipv6_addr_equal(&rt->rt6i_gateway, addr))
1699 break;
1700 }
1701 if (rt)
1702 dst_hold(&rt->u.dst);
c71099ac 1703 write_unlock_bh(&table->tb6_lock);
1da177e4
LT
1704 return rt;
1705}
1706
1707struct rt6_info *rt6_add_dflt_router(struct in6_addr *gwaddr,
ebacaaa0
YH
1708 struct net_device *dev,
1709 unsigned int pref)
1da177e4 1710{
86872cb5
TG
1711 struct fib6_config cfg = {
1712 .fc_table = RT6_TABLE_DFLT,
238fc7ea 1713 .fc_metric = IP6_RT_PRIO_USER,
86872cb5
TG
1714 .fc_ifindex = dev->ifindex,
1715 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
1716 RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
5578689a
DL
1717 .fc_nlinfo.pid = 0,
1718 .fc_nlinfo.nlh = NULL,
c346dca1 1719 .fc_nlinfo.nl_net = dev_net(dev),
86872cb5 1720 };
1da177e4 1721
86872cb5 1722 ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
1da177e4 1723
86872cb5 1724 ip6_route_add(&cfg);
1da177e4 1725
1da177e4
LT
1726 return rt6_get_dflt_router(gwaddr, dev);
1727}
1728
7b4da532 1729void rt6_purge_dflt_routers(struct net *net)
1da177e4
LT
1730{
1731 struct rt6_info *rt;
c71099ac
TG
1732 struct fib6_table *table;
1733
1734 /* NOTE: Keep consistent with rt6_get_dflt_router */
7b4da532 1735 table = fib6_get_table(net, RT6_TABLE_DFLT);
c71099ac
TG
1736 if (table == NULL)
1737 return;
1da177e4
LT
1738
1739restart:
c71099ac 1740 read_lock_bh(&table->tb6_lock);
7cc48263 1741 for (rt = table->tb6_root.leaf; rt; rt = rt->u.dst.rt6_next) {
1da177e4
LT
1742 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
1743 dst_hold(&rt->u.dst);
c71099ac 1744 read_unlock_bh(&table->tb6_lock);
e0a1ad73 1745 ip6_del_rt(rt);
1da177e4
LT
1746 goto restart;
1747 }
1748 }
c71099ac 1749 read_unlock_bh(&table->tb6_lock);
1da177e4
LT
1750}
1751
5578689a
DL
1752static void rtmsg_to_fib6_config(struct net *net,
1753 struct in6_rtmsg *rtmsg,
86872cb5
TG
1754 struct fib6_config *cfg)
1755{
1756 memset(cfg, 0, sizeof(*cfg));
1757
1758 cfg->fc_table = RT6_TABLE_MAIN;
1759 cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
1760 cfg->fc_metric = rtmsg->rtmsg_metric;
1761 cfg->fc_expires = rtmsg->rtmsg_info;
1762 cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
1763 cfg->fc_src_len = rtmsg->rtmsg_src_len;
1764 cfg->fc_flags = rtmsg->rtmsg_flags;
1765
5578689a 1766 cfg->fc_nlinfo.nl_net = net;
f1243c2d 1767
86872cb5
TG
1768 ipv6_addr_copy(&cfg->fc_dst, &rtmsg->rtmsg_dst);
1769 ipv6_addr_copy(&cfg->fc_src, &rtmsg->rtmsg_src);
1770 ipv6_addr_copy(&cfg->fc_gateway, &rtmsg->rtmsg_gateway);
1771}
1772
5578689a 1773int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
1da177e4 1774{
86872cb5 1775 struct fib6_config cfg;
1da177e4
LT
1776 struct in6_rtmsg rtmsg;
1777 int err;
1778
1779 switch(cmd) {
1780 case SIOCADDRT: /* Add a route */
1781 case SIOCDELRT: /* Delete a route */
1782 if (!capable(CAP_NET_ADMIN))
1783 return -EPERM;
1784 err = copy_from_user(&rtmsg, arg,
1785 sizeof(struct in6_rtmsg));
1786 if (err)
1787 return -EFAULT;
86872cb5 1788
5578689a 1789 rtmsg_to_fib6_config(net, &rtmsg, &cfg);
86872cb5 1790
1da177e4
LT
1791 rtnl_lock();
1792 switch (cmd) {
1793 case SIOCADDRT:
86872cb5 1794 err = ip6_route_add(&cfg);
1da177e4
LT
1795 break;
1796 case SIOCDELRT:
86872cb5 1797 err = ip6_route_del(&cfg);
1da177e4
LT
1798 break;
1799 default:
1800 err = -EINVAL;
1801 }
1802 rtnl_unlock();
1803
1804 return err;
3ff50b79 1805 }
1da177e4
LT
1806
1807 return -EINVAL;
1808}
1809
1810/*
1811 * Drop the packet on the floor
1812 */
1813
50eb431d 1814static int ip6_pkt_drop(struct sk_buff *skb, int code, int ipstats_mib_noroutes)
1da177e4 1815{
612f09e8
YH
1816 int type;
1817 switch (ipstats_mib_noroutes) {
1818 case IPSTATS_MIB_INNOROUTES:
0660e03f 1819 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
612f09e8
YH
1820 if (type == IPV6_ADDR_ANY || type == IPV6_ADDR_RESERVED) {
1821 IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_INADDRERRORS);
1822 break;
1823 }
1824 /* FALLTHROUGH */
1825 case IPSTATS_MIB_OUTNOROUTES:
1826 IP6_INC_STATS(ip6_dst_idev(skb->dst), ipstats_mib_noroutes);
1827 break;
1828 }
9ce8ade0 1829 icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0, skb->dev);
1da177e4
LT
1830 kfree_skb(skb);
1831 return 0;
1832}
1833
9ce8ade0
TG
1834static int ip6_pkt_discard(struct sk_buff *skb)
1835{
612f09e8 1836 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
9ce8ade0
TG
1837}
1838
20380731 1839static int ip6_pkt_discard_out(struct sk_buff *skb)
1da177e4
LT
1840{
1841 skb->dev = skb->dst->dev;
612f09e8 1842 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
1da177e4
LT
1843}
1844
6723ab54
DM
1845#ifdef CONFIG_IPV6_MULTIPLE_TABLES
1846
9ce8ade0
TG
1847static int ip6_pkt_prohibit(struct sk_buff *skb)
1848{
612f09e8 1849 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
9ce8ade0
TG
1850}
1851
1852static int ip6_pkt_prohibit_out(struct sk_buff *skb)
1853{
1854 skb->dev = skb->dst->dev;
612f09e8 1855 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
9ce8ade0
TG
1856}
1857
6723ab54
DM
1858#endif
1859
1da177e4
LT
1860/*
1861 * Allocate a dst for local (unicast / anycast) address.
1862 */
1863
1864struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
1865 const struct in6_addr *addr,
1866 int anycast)
1867{
c346dca1 1868 struct net *net = dev_net(idev->dev);
f2fc6a54 1869 struct rt6_info *rt = ip6_dst_alloc(net->ipv6.ip6_dst_ops);
1da177e4
LT
1870
1871 if (rt == NULL)
1872 return ERR_PTR(-ENOMEM);
1873
5578689a 1874 dev_hold(net->loopback_dev);
1da177e4
LT
1875 in6_dev_hold(idev);
1876
1877 rt->u.dst.flags = DST_HOST;
1878 rt->u.dst.input = ip6_input;
1879 rt->u.dst.output = ip6_output;
5578689a 1880 rt->rt6i_dev = net->loopback_dev;
1da177e4
LT
1881 rt->rt6i_idev = idev;
1882 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
5578689a 1883 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, dst_mtu(&rt->u.dst));
1da177e4
LT
1884 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
1885 rt->u.dst.obsolete = -1;
1886
1887 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
58c4fb86
YH
1888 if (anycast)
1889 rt->rt6i_flags |= RTF_ANYCAST;
1890 else
1da177e4
LT
1891 rt->rt6i_flags |= RTF_LOCAL;
1892 rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
1893 if (rt->rt6i_nexthop == NULL) {
40aa7b90 1894 dst_free(&rt->u.dst);
1da177e4
LT
1895 return ERR_PTR(-ENOMEM);
1896 }
1897
1898 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
1899 rt->rt6i_dst.plen = 128;
5578689a 1900 rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL);
1da177e4
LT
1901
1902 atomic_set(&rt->u.dst.__refcnt, 1);
1903
1904 return rt;
1905}
1906
8ed67789
DL
1907struct arg_dev_net {
1908 struct net_device *dev;
1909 struct net *net;
1910};
1911
1da177e4
LT
1912static int fib6_ifdown(struct rt6_info *rt, void *arg)
1913{
8ed67789
DL
1914 struct net_device *dev = ((struct arg_dev_net *)arg)->dev;
1915 struct net *net = ((struct arg_dev_net *)arg)->net;
1916
1917 if (((void *)rt->rt6i_dev == dev || dev == NULL) &&
1918 rt != net->ipv6.ip6_null_entry) {
1da177e4
LT
1919 RT6_TRACE("deleted by ifdown %p\n", rt);
1920 return -1;
1921 }
1922 return 0;
1923}
1924
f3db4851 1925void rt6_ifdown(struct net *net, struct net_device *dev)
1da177e4 1926{
8ed67789
DL
1927 struct arg_dev_net adn = {
1928 .dev = dev,
1929 .net = net,
1930 };
1931
1932 fib6_clean_all(net, fib6_ifdown, 0, &adn);
1da177e4
LT
1933}
1934
1935struct rt6_mtu_change_arg
1936{
1937 struct net_device *dev;
1938 unsigned mtu;
1939};
1940
1941static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
1942{
1943 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
1944 struct inet6_dev *idev;
c346dca1 1945 struct net *net = dev_net(arg->dev);
1da177e4
LT
1946
1947 /* In IPv6 pmtu discovery is not optional,
1948 so that RTAX_MTU lock cannot disable it.
1949 We still use this lock to block changes
1950 caused by addrconf/ndisc.
1951 */
1952
1953 idev = __in6_dev_get(arg->dev);
1954 if (idev == NULL)
1955 return 0;
1956
1957 /* For administrative MTU increase, there is no way to discover
1958 IPv6 PMTU increase, so PMTU increase should be updated here.
1959 Since RFC 1981 doesn't include administrative MTU increase
1960 update PMTU increase is a MUST. (i.e. jumbo frame)
1961 */
1962 /*
1963 If new MTU is less than route PMTU, this new MTU will be the
1964 lowest MTU in the path, update the route PMTU to reflect PMTU
1965 decreases; if new MTU is greater than route PMTU, and the
1966 old MTU is the lowest MTU in the path, update the route PMTU
1967 to reflect the increase. In this case if the other nodes' MTU
1968 also have the lowest MTU, TOO BIG MESSAGE will be lead to
1969 PMTU discouvery.
1970 */
1971 if (rt->rt6i_dev == arg->dev &&
1972 !dst_metric_locked(&rt->u.dst, RTAX_MTU) &&
23717795 1973 (dst_mtu(&rt->u.dst) >= arg->mtu ||
1ab1457c 1974 (dst_mtu(&rt->u.dst) < arg->mtu &&
566cfd8f 1975 dst_mtu(&rt->u.dst) == idev->cnf.mtu6))) {
1da177e4 1976 rt->u.dst.metrics[RTAX_MTU-1] = arg->mtu;
5578689a 1977 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, arg->mtu);
566cfd8f 1978 }
1da177e4
LT
1979 return 0;
1980}
1981
1982void rt6_mtu_change(struct net_device *dev, unsigned mtu)
1983{
c71099ac
TG
1984 struct rt6_mtu_change_arg arg = {
1985 .dev = dev,
1986 .mtu = mtu,
1987 };
1da177e4 1988
c346dca1 1989 fib6_clean_all(dev_net(dev), rt6_mtu_change_route, 0, &arg);
1da177e4
LT
1990}
1991
ef7c79ed 1992static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
5176f91e 1993 [RTA_GATEWAY] = { .len = sizeof(struct in6_addr) },
86872cb5 1994 [RTA_OIF] = { .type = NLA_U32 },
ab364a6f 1995 [RTA_IIF] = { .type = NLA_U32 },
86872cb5
TG
1996 [RTA_PRIORITY] = { .type = NLA_U32 },
1997 [RTA_METRICS] = { .type = NLA_NESTED },
1998};
1999
2000static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2001 struct fib6_config *cfg)
1da177e4 2002{
86872cb5
TG
2003 struct rtmsg *rtm;
2004 struct nlattr *tb[RTA_MAX+1];
2005 int err;
1da177e4 2006
86872cb5
TG
2007 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2008 if (err < 0)
2009 goto errout;
1da177e4 2010
86872cb5
TG
2011 err = -EINVAL;
2012 rtm = nlmsg_data(nlh);
2013 memset(cfg, 0, sizeof(*cfg));
2014
2015 cfg->fc_table = rtm->rtm_table;
2016 cfg->fc_dst_len = rtm->rtm_dst_len;
2017 cfg->fc_src_len = rtm->rtm_src_len;
2018 cfg->fc_flags = RTF_UP;
2019 cfg->fc_protocol = rtm->rtm_protocol;
2020
2021 if (rtm->rtm_type == RTN_UNREACHABLE)
2022 cfg->fc_flags |= RTF_REJECT;
2023
2024 cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
2025 cfg->fc_nlinfo.nlh = nlh;
3b1e0a65 2026 cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
86872cb5
TG
2027
2028 if (tb[RTA_GATEWAY]) {
2029 nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
2030 cfg->fc_flags |= RTF_GATEWAY;
1da177e4 2031 }
86872cb5
TG
2032
2033 if (tb[RTA_DST]) {
2034 int plen = (rtm->rtm_dst_len + 7) >> 3;
2035
2036 if (nla_len(tb[RTA_DST]) < plen)
2037 goto errout;
2038
2039 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
1da177e4 2040 }
86872cb5
TG
2041
2042 if (tb[RTA_SRC]) {
2043 int plen = (rtm->rtm_src_len + 7) >> 3;
2044
2045 if (nla_len(tb[RTA_SRC]) < plen)
2046 goto errout;
2047
2048 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
1da177e4 2049 }
86872cb5
TG
2050
2051 if (tb[RTA_OIF])
2052 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2053
2054 if (tb[RTA_PRIORITY])
2055 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2056
2057 if (tb[RTA_METRICS]) {
2058 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2059 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
1da177e4 2060 }
86872cb5
TG
2061
2062 if (tb[RTA_TABLE])
2063 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2064
2065 err = 0;
2066errout:
2067 return err;
1da177e4
LT
2068}
2069
c127ea2c 2070static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1da177e4 2071{
86872cb5
TG
2072 struct fib6_config cfg;
2073 int err;
1da177e4 2074
86872cb5
TG
2075 err = rtm_to_fib6_config(skb, nlh, &cfg);
2076 if (err < 0)
2077 return err;
2078
2079 return ip6_route_del(&cfg);
1da177e4
LT
2080}
2081
c127ea2c 2082static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1da177e4 2083{
86872cb5
TG
2084 struct fib6_config cfg;
2085 int err;
1da177e4 2086
86872cb5
TG
2087 err = rtm_to_fib6_config(skb, nlh, &cfg);
2088 if (err < 0)
2089 return err;
2090
2091 return ip6_route_add(&cfg);
1da177e4
LT
2092}
2093
339bf98f
TG
2094static inline size_t rt6_nlmsg_size(void)
2095{
2096 return NLMSG_ALIGN(sizeof(struct rtmsg))
2097 + nla_total_size(16) /* RTA_SRC */
2098 + nla_total_size(16) /* RTA_DST */
2099 + nla_total_size(16) /* RTA_GATEWAY */
2100 + nla_total_size(16) /* RTA_PREFSRC */
2101 + nla_total_size(4) /* RTA_TABLE */
2102 + nla_total_size(4) /* RTA_IIF */
2103 + nla_total_size(4) /* RTA_OIF */
2104 + nla_total_size(4) /* RTA_PRIORITY */
6a2b9ce0 2105 + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
339bf98f
TG
2106 + nla_total_size(sizeof(struct rta_cacheinfo));
2107}
2108
191cd582
BH
2109static int rt6_fill_node(struct net *net,
2110 struct sk_buff *skb, struct rt6_info *rt,
0d51aa80
JHS
2111 struct in6_addr *dst, struct in6_addr *src,
2112 int iif, int type, u32 pid, u32 seq,
7bc570c8 2113 int prefix, int nowait, unsigned int flags)
1da177e4
LT
2114{
2115 struct rtmsg *rtm;
2d7202bf 2116 struct nlmsghdr *nlh;
e3703b3d 2117 long expires;
9e762a4a 2118 u32 table;
1da177e4
LT
2119
2120 if (prefix) { /* user wants prefix routes only */
2121 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
2122 /* success since this is not a prefix route */
2123 return 1;
2124 }
2125 }
2126
2d7202bf
TG
2127 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*rtm), flags);
2128 if (nlh == NULL)
26932566 2129 return -EMSGSIZE;
2d7202bf
TG
2130
2131 rtm = nlmsg_data(nlh);
1da177e4
LT
2132 rtm->rtm_family = AF_INET6;
2133 rtm->rtm_dst_len = rt->rt6i_dst.plen;
2134 rtm->rtm_src_len = rt->rt6i_src.plen;
2135 rtm->rtm_tos = 0;
c71099ac 2136 if (rt->rt6i_table)
9e762a4a 2137 table = rt->rt6i_table->tb6_id;
c71099ac 2138 else
9e762a4a
PM
2139 table = RT6_TABLE_UNSPEC;
2140 rtm->rtm_table = table;
2d7202bf 2141 NLA_PUT_U32(skb, RTA_TABLE, table);
1da177e4
LT
2142 if (rt->rt6i_flags&RTF_REJECT)
2143 rtm->rtm_type = RTN_UNREACHABLE;
2144 else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK))
2145 rtm->rtm_type = RTN_LOCAL;
2146 else
2147 rtm->rtm_type = RTN_UNICAST;
2148 rtm->rtm_flags = 0;
2149 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2150 rtm->rtm_protocol = rt->rt6i_protocol;
2151 if (rt->rt6i_flags&RTF_DYNAMIC)
2152 rtm->rtm_protocol = RTPROT_REDIRECT;
2153 else if (rt->rt6i_flags & RTF_ADDRCONF)
2154 rtm->rtm_protocol = RTPROT_KERNEL;
2155 else if (rt->rt6i_flags&RTF_DEFAULT)
2156 rtm->rtm_protocol = RTPROT_RA;
2157
2158 if (rt->rt6i_flags&RTF_CACHE)
2159 rtm->rtm_flags |= RTM_F_CLONED;
2160
2161 if (dst) {
2d7202bf 2162 NLA_PUT(skb, RTA_DST, 16, dst);
1ab1457c 2163 rtm->rtm_dst_len = 128;
1da177e4 2164 } else if (rtm->rtm_dst_len)
2d7202bf 2165 NLA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr);
1da177e4
LT
2166#ifdef CONFIG_IPV6_SUBTREES
2167 if (src) {
2d7202bf 2168 NLA_PUT(skb, RTA_SRC, 16, src);
1ab1457c 2169 rtm->rtm_src_len = 128;
1da177e4 2170 } else if (rtm->rtm_src_len)
2d7202bf 2171 NLA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr);
1da177e4 2172#endif
7bc570c8
YH
2173 if (iif) {
2174#ifdef CONFIG_IPV6_MROUTE
2175 if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
2176 int err = ip6mr_get_route(skb, rtm, nowait);
2177 if (err <= 0) {
2178 if (!nowait) {
2179 if (err == 0)
2180 return 0;
2181 goto nla_put_failure;
2182 } else {
2183 if (err == -EMSGSIZE)
2184 goto nla_put_failure;
2185 }
2186 }
2187 } else
2188#endif
2189 NLA_PUT_U32(skb, RTA_IIF, iif);
2190 } else if (dst) {
5e0115e5 2191 struct inet6_dev *idev = ip6_dst_idev(&rt->u.dst);
1da177e4 2192 struct in6_addr saddr_buf;
191cd582 2193 if (ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
7cbca67c 2194 dst, 0, &saddr_buf) == 0)
2d7202bf 2195 NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
1da177e4 2196 }
2d7202bf 2197
1da177e4 2198 if (rtnetlink_put_metrics(skb, rt->u.dst.metrics) < 0)
2d7202bf
TG
2199 goto nla_put_failure;
2200
1da177e4 2201 if (rt->u.dst.neighbour)
2d7202bf
TG
2202 NLA_PUT(skb, RTA_GATEWAY, 16, &rt->u.dst.neighbour->primary_key);
2203
1da177e4 2204 if (rt->u.dst.dev)
2d7202bf
TG
2205 NLA_PUT_U32(skb, RTA_OIF, rt->rt6i_dev->ifindex);
2206
2207 NLA_PUT_U32(skb, RTA_PRIORITY, rt->rt6i_metric);
e3703b3d 2208
36e3deae
YH
2209 if (!(rt->rt6i_flags & RTF_EXPIRES))
2210 expires = 0;
2211 else if (rt->rt6i_expires - jiffies < INT_MAX)
2212 expires = rt->rt6i_expires - jiffies;
2213 else
2214 expires = INT_MAX;
69cdf8f9 2215
e3703b3d
TG
2216 if (rtnl_put_cacheinfo(skb, &rt->u.dst, 0, 0, 0,
2217 expires, rt->u.dst.error) < 0)
2218 goto nla_put_failure;
2d7202bf
TG
2219
2220 return nlmsg_end(skb, nlh);
2221
2222nla_put_failure:
26932566
PM
2223 nlmsg_cancel(skb, nlh);
2224 return -EMSGSIZE;
1da177e4
LT
2225}
2226
1b43af54 2227int rt6_dump_route(struct rt6_info *rt, void *p_arg)
1da177e4
LT
2228{
2229 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
2230 int prefix;
2231
2d7202bf
TG
2232 if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
2233 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
1da177e4
LT
2234 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
2235 } else
2236 prefix = 0;
2237
191cd582
BH
2238 return rt6_fill_node(arg->net,
2239 arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
1da177e4 2240 NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
7bc570c8 2241 prefix, 0, NLM_F_MULTI);
1da177e4
LT
2242}
2243
c127ea2c 2244static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
1da177e4 2245{
3b1e0a65 2246 struct net *net = sock_net(in_skb->sk);
ab364a6f
TG
2247 struct nlattr *tb[RTA_MAX+1];
2248 struct rt6_info *rt;
1da177e4 2249 struct sk_buff *skb;
ab364a6f 2250 struct rtmsg *rtm;
1da177e4 2251 struct flowi fl;
ab364a6f 2252 int err, iif = 0;
1da177e4 2253
ab364a6f
TG
2254 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2255 if (err < 0)
2256 goto errout;
1da177e4 2257
ab364a6f 2258 err = -EINVAL;
1da177e4 2259 memset(&fl, 0, sizeof(fl));
1da177e4 2260
ab364a6f
TG
2261 if (tb[RTA_SRC]) {
2262 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2263 goto errout;
2264
2265 ipv6_addr_copy(&fl.fl6_src, nla_data(tb[RTA_SRC]));
2266 }
2267
2268 if (tb[RTA_DST]) {
2269 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2270 goto errout;
2271
2272 ipv6_addr_copy(&fl.fl6_dst, nla_data(tb[RTA_DST]));
2273 }
2274
2275 if (tb[RTA_IIF])
2276 iif = nla_get_u32(tb[RTA_IIF]);
2277
2278 if (tb[RTA_OIF])
2279 fl.oif = nla_get_u32(tb[RTA_OIF]);
1da177e4
LT
2280
2281 if (iif) {
2282 struct net_device *dev;
5578689a 2283 dev = __dev_get_by_index(net, iif);
1da177e4
LT
2284 if (!dev) {
2285 err = -ENODEV;
ab364a6f 2286 goto errout;
1da177e4
LT
2287 }
2288 }
2289
ab364a6f
TG
2290 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
2291 if (skb == NULL) {
2292 err = -ENOBUFS;
2293 goto errout;
2294 }
1da177e4 2295
ab364a6f
TG
2296 /* Reserve room for dummy headers, this skb can pass
2297 through good chunk of routing engine.
2298 */
459a98ed 2299 skb_reset_mac_header(skb);
ab364a6f 2300 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
1da177e4 2301
8a3edd80 2302 rt = (struct rt6_info*) ip6_route_output(net, NULL, &fl);
1da177e4
LT
2303 skb->dst = &rt->u.dst;
2304
191cd582 2305 err = rt6_fill_node(net, skb, rt, &fl.fl6_dst, &fl.fl6_src, iif,
1da177e4 2306 RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
7bc570c8 2307 nlh->nlmsg_seq, 0, 0, 0);
1da177e4 2308 if (err < 0) {
ab364a6f
TG
2309 kfree_skb(skb);
2310 goto errout;
1da177e4
LT
2311 }
2312
5578689a 2313 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).pid);
ab364a6f 2314errout:
1da177e4 2315 return err;
1da177e4
LT
2316}
2317
86872cb5 2318void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
1da177e4
LT
2319{
2320 struct sk_buff *skb;
5578689a 2321 struct net *net = info->nl_net;
528c4ceb
DL
2322 u32 seq;
2323 int err;
2324
2325 err = -ENOBUFS;
2326 seq = info->nlh != NULL ? info->nlh->nlmsg_seq : 0;
86872cb5 2327
339bf98f 2328 skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
21713ebc
TG
2329 if (skb == NULL)
2330 goto errout;
2331
191cd582 2332 err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
7bc570c8 2333 event, info->pid, seq, 0, 0, 0);
26932566
PM
2334 if (err < 0) {
2335 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
2336 WARN_ON(err == -EMSGSIZE);
2337 kfree_skb(skb);
2338 goto errout;
2339 }
5578689a
DL
2340 err = rtnl_notify(skb, net, info->pid, RTNLGRP_IPV6_ROUTE,
2341 info->nlh, gfp_any());
21713ebc
TG
2342errout:
2343 if (err < 0)
5578689a 2344 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
1da177e4
LT
2345}
2346
8ed67789
DL
2347static int ip6_route_dev_notify(struct notifier_block *this,
2348 unsigned long event, void *data)
2349{
2350 struct net_device *dev = (struct net_device *)data;
c346dca1 2351 struct net *net = dev_net(dev);
8ed67789
DL
2352
2353 if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
2354 net->ipv6.ip6_null_entry->u.dst.dev = dev;
2355 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
2356#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2357 net->ipv6.ip6_prohibit_entry->u.dst.dev = dev;
2358 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
2359 net->ipv6.ip6_blk_hole_entry->u.dst.dev = dev;
2360 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
2361#endif
2362 }
2363
2364 return NOTIFY_OK;
2365}
2366
1da177e4
LT
2367/*
2368 * /proc
2369 */
2370
2371#ifdef CONFIG_PROC_FS
2372
2373#define RT6_INFO_LEN (32 + 4 + 32 + 4 + 32 + 40 + 5 + 1)
2374
2375struct rt6_proc_arg
2376{
2377 char *buffer;
2378 int offset;
2379 int length;
2380 int skip;
2381 int len;
2382};
2383
2384static int rt6_info_route(struct rt6_info *rt, void *p_arg)
2385{
33120b30 2386 struct seq_file *m = p_arg;
1da177e4 2387
33120b30
AD
2388 seq_printf(m, NIP6_SEQFMT " %02x ", NIP6(rt->rt6i_dst.addr),
2389 rt->rt6i_dst.plen);
1da177e4
LT
2390
2391#ifdef CONFIG_IPV6_SUBTREES
33120b30
AD
2392 seq_printf(m, NIP6_SEQFMT " %02x ", NIP6(rt->rt6i_src.addr),
2393 rt->rt6i_src.plen);
1da177e4 2394#else
33120b30 2395 seq_puts(m, "00000000000000000000000000000000 00 ");
1da177e4
LT
2396#endif
2397
2398 if (rt->rt6i_nexthop) {
33120b30
AD
2399 seq_printf(m, NIP6_SEQFMT,
2400 NIP6(*((struct in6_addr *)rt->rt6i_nexthop->primary_key)));
1da177e4 2401 } else {
33120b30 2402 seq_puts(m, "00000000000000000000000000000000");
1da177e4 2403 }
33120b30
AD
2404 seq_printf(m, " %08x %08x %08x %08x %8s\n",
2405 rt->rt6i_metric, atomic_read(&rt->u.dst.__refcnt),
2406 rt->u.dst.__use, rt->rt6i_flags,
2407 rt->rt6i_dev ? rt->rt6i_dev->name : "");
1da177e4
LT
2408 return 0;
2409}
2410
33120b30 2411static int ipv6_route_show(struct seq_file *m, void *v)
1da177e4 2412{
f3db4851
DL
2413 struct net *net = (struct net *)m->private;
2414 fib6_clean_all(net, rt6_info_route, 0, m);
33120b30
AD
2415 return 0;
2416}
1da177e4 2417
33120b30
AD
2418static int ipv6_route_open(struct inode *inode, struct file *file)
2419{
de05c557 2420 return single_open_net(inode, file, ipv6_route_show);
f3db4851
DL
2421}
2422
33120b30
AD
2423static const struct file_operations ipv6_route_proc_fops = {
2424 .owner = THIS_MODULE,
2425 .open = ipv6_route_open,
2426 .read = seq_read,
2427 .llseek = seq_lseek,
b6fcbdb4 2428 .release = single_release_net,
33120b30
AD
2429};
2430
1da177e4
LT
2431static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2432{
69ddb805 2433 struct net *net = (struct net *)seq->private;
1da177e4 2434 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
69ddb805
DL
2435 net->ipv6.rt6_stats->fib_nodes,
2436 net->ipv6.rt6_stats->fib_route_nodes,
2437 net->ipv6.rt6_stats->fib_rt_alloc,
2438 net->ipv6.rt6_stats->fib_rt_entries,
2439 net->ipv6.rt6_stats->fib_rt_cache,
f2fc6a54 2440 atomic_read(&net->ipv6.ip6_dst_ops->entries),
69ddb805 2441 net->ipv6.rt6_stats->fib_discarded_routes);
1da177e4
LT
2442
2443 return 0;
2444}
2445
2446static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2447{
de05c557 2448 return single_open_net(inode, file, rt6_stats_seq_show);
69ddb805
DL
2449}
2450
9a32144e 2451static const struct file_operations rt6_stats_seq_fops = {
1da177e4
LT
2452 .owner = THIS_MODULE,
2453 .open = rt6_stats_seq_open,
2454 .read = seq_read,
2455 .llseek = seq_lseek,
b6fcbdb4 2456 .release = single_release_net,
1da177e4
LT
2457};
2458#endif /* CONFIG_PROC_FS */
2459
2460#ifdef CONFIG_SYSCTL
2461
1da177e4
LT
2462static
2463int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write, struct file * filp,
2464 void __user *buffer, size_t *lenp, loff_t *ppos)
2465{
5b7c931d
DL
2466 struct net *net = current->nsproxy->net_ns;
2467 int delay = net->ipv6.sysctl.flush_delay;
1da177e4
LT
2468 if (write) {
2469 proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
5b7c931d 2470 fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay, net);
1da177e4
LT
2471 return 0;
2472 } else
2473 return -EINVAL;
2474}
2475
760f2d01 2476ctl_table ipv6_route_table_template[] = {
1ab1457c 2477 {
1da177e4 2478 .procname = "flush",
4990509f 2479 .data = &init_net.ipv6.sysctl.flush_delay,
1da177e4 2480 .maxlen = sizeof(int),
89c8b3a1 2481 .mode = 0200,
1ab1457c 2482 .proc_handler = &ipv6_sysctl_rtcache_flush
1da177e4
LT
2483 },
2484 {
2485 .ctl_name = NET_IPV6_ROUTE_GC_THRESH,
2486 .procname = "gc_thresh",
9a7ec3a9 2487 .data = &ip6_dst_ops_template.gc_thresh,
1da177e4
LT
2488 .maxlen = sizeof(int),
2489 .mode = 0644,
1ab1457c 2490 .proc_handler = &proc_dointvec,
1da177e4
LT
2491 },
2492 {
2493 .ctl_name = NET_IPV6_ROUTE_MAX_SIZE,
2494 .procname = "max_size",
4990509f 2495 .data = &init_net.ipv6.sysctl.ip6_rt_max_size,
1da177e4
LT
2496 .maxlen = sizeof(int),
2497 .mode = 0644,
1ab1457c 2498 .proc_handler = &proc_dointvec,
1da177e4
LT
2499 },
2500 {
2501 .ctl_name = NET_IPV6_ROUTE_GC_MIN_INTERVAL,
2502 .procname = "gc_min_interval",
4990509f 2503 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
1da177e4
LT
2504 .maxlen = sizeof(int),
2505 .mode = 0644,
1ab1457c 2506 .proc_handler = &proc_dointvec_jiffies,
1da177e4
LT
2507 .strategy = &sysctl_jiffies,
2508 },
2509 {
2510 .ctl_name = NET_IPV6_ROUTE_GC_TIMEOUT,
2511 .procname = "gc_timeout",
4990509f 2512 .data = &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
1da177e4
LT
2513 .maxlen = sizeof(int),
2514 .mode = 0644,
1ab1457c 2515 .proc_handler = &proc_dointvec_jiffies,
1da177e4
LT
2516 .strategy = &sysctl_jiffies,
2517 },
2518 {
2519 .ctl_name = NET_IPV6_ROUTE_GC_INTERVAL,
2520 .procname = "gc_interval",
4990509f 2521 .data = &init_net.ipv6.sysctl.ip6_rt_gc_interval,
1da177e4
LT
2522 .maxlen = sizeof(int),
2523 .mode = 0644,
1ab1457c 2524 .proc_handler = &proc_dointvec_jiffies,
1da177e4
LT
2525 .strategy = &sysctl_jiffies,
2526 },
2527 {
2528 .ctl_name = NET_IPV6_ROUTE_GC_ELASTICITY,
2529 .procname = "gc_elasticity",
4990509f 2530 .data = &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
1da177e4
LT
2531 .maxlen = sizeof(int),
2532 .mode = 0644,
1ab1457c 2533 .proc_handler = &proc_dointvec_jiffies,
1da177e4
LT
2534 .strategy = &sysctl_jiffies,
2535 },
2536 {
2537 .ctl_name = NET_IPV6_ROUTE_MTU_EXPIRES,
2538 .procname = "mtu_expires",
4990509f 2539 .data = &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
1da177e4
LT
2540 .maxlen = sizeof(int),
2541 .mode = 0644,
1ab1457c 2542 .proc_handler = &proc_dointvec_jiffies,
1da177e4
LT
2543 .strategy = &sysctl_jiffies,
2544 },
2545 {
2546 .ctl_name = NET_IPV6_ROUTE_MIN_ADVMSS,
2547 .procname = "min_adv_mss",
4990509f 2548 .data = &init_net.ipv6.sysctl.ip6_rt_min_advmss,
1da177e4
LT
2549 .maxlen = sizeof(int),
2550 .mode = 0644,
1ab1457c 2551 .proc_handler = &proc_dointvec_jiffies,
1da177e4
LT
2552 .strategy = &sysctl_jiffies,
2553 },
2554 {
2555 .ctl_name = NET_IPV6_ROUTE_GC_MIN_INTERVAL_MS,
2556 .procname = "gc_min_interval_ms",
4990509f 2557 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
1da177e4
LT
2558 .maxlen = sizeof(int),
2559 .mode = 0644,
1ab1457c 2560 .proc_handler = &proc_dointvec_ms_jiffies,
1da177e4
LT
2561 .strategy = &sysctl_ms_jiffies,
2562 },
2563 { .ctl_name = 0 }
2564};
2565
760f2d01
DL
2566struct ctl_table *ipv6_route_sysctl_init(struct net *net)
2567{
2568 struct ctl_table *table;
2569
2570 table = kmemdup(ipv6_route_table_template,
2571 sizeof(ipv6_route_table_template),
2572 GFP_KERNEL);
5ee09105
YH
2573
2574 if (table) {
2575 table[0].data = &net->ipv6.sysctl.flush_delay;
f2fc6a54 2576 table[1].data = &net->ipv6.ip6_dst_ops->gc_thresh;
5ee09105
YH
2577 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
2578 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2579 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
2580 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
2581 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
2582 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
2583 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
2584 }
2585
760f2d01
DL
2586 return table;
2587}
1da177e4
LT
2588#endif
2589
cdb18761
DL
2590static int ip6_route_net_init(struct net *net)
2591{
633d424b 2592 int ret = -ENOMEM;
8ed67789 2593
f2fc6a54
BT
2594 net->ipv6.ip6_dst_ops = kmemdup(&ip6_dst_ops_template,
2595 sizeof(*net->ipv6.ip6_dst_ops),
2596 GFP_KERNEL);
2597 if (!net->ipv6.ip6_dst_ops)
2598 goto out;
48115bec 2599 net->ipv6.ip6_dst_ops->dst_net = hold_net(net);
f2fc6a54 2600
8ed67789
DL
2601 net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
2602 sizeof(*net->ipv6.ip6_null_entry),
2603 GFP_KERNEL);
2604 if (!net->ipv6.ip6_null_entry)
f2fc6a54 2605 goto out_ip6_dst_ops;
8ed67789
DL
2606 net->ipv6.ip6_null_entry->u.dst.path =
2607 (struct dst_entry *)net->ipv6.ip6_null_entry;
f2fc6a54 2608 net->ipv6.ip6_null_entry->u.dst.ops = net->ipv6.ip6_dst_ops;
8ed67789
DL
2609
2610#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2611 net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
2612 sizeof(*net->ipv6.ip6_prohibit_entry),
2613 GFP_KERNEL);
2614 if (!net->ipv6.ip6_prohibit_entry) {
2615 kfree(net->ipv6.ip6_null_entry);
2616 goto out;
2617 }
2618 net->ipv6.ip6_prohibit_entry->u.dst.path =
2619 (struct dst_entry *)net->ipv6.ip6_prohibit_entry;
f2fc6a54 2620 net->ipv6.ip6_prohibit_entry->u.dst.ops = net->ipv6.ip6_dst_ops;
8ed67789
DL
2621
2622 net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
2623 sizeof(*net->ipv6.ip6_blk_hole_entry),
2624 GFP_KERNEL);
2625 if (!net->ipv6.ip6_blk_hole_entry) {
2626 kfree(net->ipv6.ip6_null_entry);
2627 kfree(net->ipv6.ip6_prohibit_entry);
2628 goto out;
2629 }
2630 net->ipv6.ip6_blk_hole_entry->u.dst.path =
2631 (struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
f2fc6a54 2632 net->ipv6.ip6_blk_hole_entry->u.dst.ops = net->ipv6.ip6_dst_ops;
8ed67789
DL
2633#endif
2634
cdb18761
DL
2635#ifdef CONFIG_PROC_FS
2636 proc_net_fops_create(net, "ipv6_route", 0, &ipv6_route_proc_fops);
2637 proc_net_fops_create(net, "rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2638#endif
6891a346
BT
2639 net->ipv6.ip6_rt_gc_expire = 30*HZ;
2640
8ed67789
DL
2641 ret = 0;
2642out:
2643 return ret;
f2fc6a54
BT
2644
2645out_ip6_dst_ops:
48115bec 2646 release_net(net->ipv6.ip6_dst_ops->dst_net);
f2fc6a54
BT
2647 kfree(net->ipv6.ip6_dst_ops);
2648 goto out;
cdb18761
DL
2649}
2650
2651static void ip6_route_net_exit(struct net *net)
2652{
2653#ifdef CONFIG_PROC_FS
2654 proc_net_remove(net, "ipv6_route");
2655 proc_net_remove(net, "rt6_stats");
2656#endif
8ed67789
DL
2657 kfree(net->ipv6.ip6_null_entry);
2658#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2659 kfree(net->ipv6.ip6_prohibit_entry);
2660 kfree(net->ipv6.ip6_blk_hole_entry);
2661#endif
48115bec 2662 release_net(net->ipv6.ip6_dst_ops->dst_net);
f2fc6a54 2663 kfree(net->ipv6.ip6_dst_ops);
cdb18761
DL
2664}
2665
2666static struct pernet_operations ip6_route_net_ops = {
2667 .init = ip6_route_net_init,
2668 .exit = ip6_route_net_exit,
2669};
2670
8ed67789
DL
2671static struct notifier_block ip6_route_dev_notifier = {
2672 .notifier_call = ip6_route_dev_notify,
2673 .priority = 0,
2674};
2675
433d49c3 2676int __init ip6_route_init(void)
1da177e4 2677{
433d49c3
DL
2678 int ret;
2679
9a7ec3a9
DL
2680 ret = -ENOMEM;
2681 ip6_dst_ops_template.kmem_cachep =
e5d679f3 2682 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
f845ab6b 2683 SLAB_HWCACHE_ALIGN, NULL);
9a7ec3a9 2684 if (!ip6_dst_ops_template.kmem_cachep)
f2fc6a54 2685 goto out;;
14e50e57 2686
8ed67789
DL
2687 ret = register_pernet_subsys(&ip6_route_net_ops);
2688 if (ret)
bdb3289f 2689 goto out_kmem_cache;
bdb3289f 2690
8ed67789
DL
2691 /* Registering of the loopback is done before this portion of code,
2692 * the loopback reference in rt6_info will not be taken, do it
2693 * manually for init_net */
2694 init_net.ipv6.ip6_null_entry->u.dst.dev = init_net.loopback_dev;
2695 init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2696 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2697 init_net.ipv6.ip6_prohibit_entry->u.dst.dev = init_net.loopback_dev;
2698 init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2699 init_net.ipv6.ip6_blk_hole_entry->u.dst.dev = init_net.loopback_dev;
2700 init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2701 #endif
433d49c3
DL
2702 ret = fib6_init();
2703 if (ret)
8ed67789 2704 goto out_register_subsys;
433d49c3 2705
433d49c3
DL
2706 ret = xfrm6_init();
2707 if (ret)
cdb18761 2708 goto out_fib6_init;
c35b7e72 2709
433d49c3
DL
2710 ret = fib6_rules_init();
2711 if (ret)
2712 goto xfrm6_init;
7e5449c2 2713
433d49c3
DL
2714 ret = -ENOBUFS;
2715 if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL) ||
2716 __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL) ||
2717 __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL))
2718 goto fib6_rules_init;
c127ea2c 2719
8ed67789 2720 ret = register_netdevice_notifier(&ip6_route_dev_notifier);
cdb18761
DL
2721 if (ret)
2722 goto fib6_rules_init;
8ed67789 2723
433d49c3
DL
2724out:
2725 return ret;
2726
2727fib6_rules_init:
433d49c3
DL
2728 fib6_rules_cleanup();
2729xfrm6_init:
433d49c3 2730 xfrm6_fini();
433d49c3 2731out_fib6_init:
433d49c3 2732 fib6_gc_cleanup();
8ed67789
DL
2733out_register_subsys:
2734 unregister_pernet_subsys(&ip6_route_net_ops);
433d49c3 2735out_kmem_cache:
f2fc6a54 2736 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
433d49c3 2737 goto out;
1da177e4
LT
2738}
2739
2740void ip6_route_cleanup(void)
2741{
8ed67789 2742 unregister_netdevice_notifier(&ip6_route_dev_notifier);
101367c2 2743 fib6_rules_cleanup();
1da177e4 2744 xfrm6_fini();
1da177e4 2745 fib6_gc_cleanup();
8ed67789 2746 unregister_pernet_subsys(&ip6_route_net_ops);
f2fc6a54 2747 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
1da177e4 2748}