]> bbs.cooldavid.org Git - net-next-2.6.git/blame - net/ipv6/route.c
net: use the macros defined for the members of flowi
[net-next-2.6.git] / net / ipv6 / route.c
CommitLineData
1da177e4
LT
1/*
2 * Linux INET6 implementation
3 * FIB front-end.
4 *
5 * Authors:
1ab1457c 6 * Pedro Roque <roque@di.fc.ul.pt>
1da177e4 7 *
1da177e4
LT
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
12 */
13
14/* Changes:
15 *
16 * YOSHIFUJI Hideaki @USAGI
17 * reworked default router selection.
18 * - respect outgoing interface
19 * - select from (probably) reachable routers (i.e.
20 * routers in REACHABLE, STALE, DELAY or PROBE states).
21 * - always select the same router if it is (probably)
22 * reachable. otherwise, round-robin the list.
c0bece9f
YH
23 * Ville Nuorvala
24 * Fixed routing subtrees.
1da177e4
LT
25 */
26
4fc268d2 27#include <linux/capability.h>
1da177e4
LT
28#include <linux/errno.h>
29#include <linux/types.h>
30#include <linux/times.h>
31#include <linux/socket.h>
32#include <linux/sockios.h>
33#include <linux/net.h>
34#include <linux/route.h>
35#include <linux/netdevice.h>
36#include <linux/in6.h>
7bc570c8 37#include <linux/mroute6.h>
1da177e4 38#include <linux/init.h>
1da177e4 39#include <linux/if_arp.h>
1da177e4
LT
40#include <linux/proc_fs.h>
41#include <linux/seq_file.h>
5b7c931d 42#include <linux/nsproxy.h>
5a0e3ad6 43#include <linux/slab.h>
457c4cbc 44#include <net/net_namespace.h>
1da177e4
LT
45#include <net/snmp.h>
46#include <net/ipv6.h>
47#include <net/ip6_fib.h>
48#include <net/ip6_route.h>
49#include <net/ndisc.h>
50#include <net/addrconf.h>
51#include <net/tcp.h>
52#include <linux/rtnetlink.h>
53#include <net/dst.h>
54#include <net/xfrm.h>
8d71740c 55#include <net/netevent.h>
21713ebc 56#include <net/netlink.h>
1da177e4
LT
57
58#include <asm/uaccess.h>
59
60#ifdef CONFIG_SYSCTL
61#include <linux/sysctl.h>
62#endif
63
64/* Set to 3 to get tracing. */
65#define RT6_DEBUG 2
66
67#if RT6_DEBUG >= 3
68#define RDBG(x) printk x
69#define RT6_TRACE(x...) printk(KERN_DEBUG x)
70#else
71#define RDBG(x)
72#define RT6_TRACE(x...) do { ; } while (0)
73#endif
74
519fbd87 75#define CLONE_OFFLINK_ROUTE 0
1da177e4 76
1da177e4
LT
77static struct rt6_info * ip6_rt_copy(struct rt6_info *ort);
78static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
79static struct dst_entry *ip6_negative_advice(struct dst_entry *);
80static void ip6_dst_destroy(struct dst_entry *);
81static void ip6_dst_ifdown(struct dst_entry *,
82 struct net_device *dev, int how);
569d3645 83static int ip6_dst_gc(struct dst_ops *ops);
1da177e4
LT
84
85static int ip6_pkt_discard(struct sk_buff *skb);
86static int ip6_pkt_discard_out(struct sk_buff *skb);
87static void ip6_link_failure(struct sk_buff *skb);
88static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
89
70ceb4f5 90#ifdef CONFIG_IPV6_ROUTE_INFO
efa2cea0
DL
91static struct rt6_info *rt6_add_route_info(struct net *net,
92 struct in6_addr *prefix, int prefixlen,
70ceb4f5
YH
93 struct in6_addr *gwaddr, int ifindex,
94 unsigned pref);
efa2cea0
DL
95static struct rt6_info *rt6_get_route_info(struct net *net,
96 struct in6_addr *prefix, int prefixlen,
70ceb4f5
YH
97 struct in6_addr *gwaddr, int ifindex);
98#endif
99
9a7ec3a9 100static struct dst_ops ip6_dst_ops_template = {
1da177e4 101 .family = AF_INET6,
09640e63 102 .protocol = cpu_to_be16(ETH_P_IPV6),
1da177e4
LT
103 .gc = ip6_dst_gc,
104 .gc_thresh = 1024,
105 .check = ip6_dst_check,
106 .destroy = ip6_dst_destroy,
107 .ifdown = ip6_dst_ifdown,
108 .negative_advice = ip6_negative_advice,
109 .link_failure = ip6_link_failure,
110 .update_pmtu = ip6_rt_update_pmtu,
1ac06e03 111 .local_out = __ip6_local_out,
1da177e4
LT
112};
113
14e50e57
DM
114static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu)
115{
116}
117
118static struct dst_ops ip6_dst_blackhole_ops = {
119 .family = AF_INET6,
09640e63 120 .protocol = cpu_to_be16(ETH_P_IPV6),
14e50e57
DM
121 .destroy = ip6_dst_destroy,
122 .check = ip6_dst_check,
123 .update_pmtu = ip6_rt_blackhole_update_pmtu,
14e50e57
DM
124};
125
bdb3289f 126static struct rt6_info ip6_null_entry_template = {
d8d1f30b
CG
127 .dst = {
128 .__refcnt = ATOMIC_INIT(1),
129 .__use = 1,
130 .obsolete = -1,
131 .error = -ENETUNREACH,
132 .metrics = { [RTAX_HOPLIMIT - 1] = 255, },
133 .input = ip6_pkt_discard,
134 .output = ip6_pkt_discard_out,
1da177e4
LT
135 },
136 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
4f724279 137 .rt6i_protocol = RTPROT_KERNEL,
1da177e4
LT
138 .rt6i_metric = ~(u32) 0,
139 .rt6i_ref = ATOMIC_INIT(1),
140};
141
101367c2
TG
142#ifdef CONFIG_IPV6_MULTIPLE_TABLES
143
6723ab54
DM
144static int ip6_pkt_prohibit(struct sk_buff *skb);
145static int ip6_pkt_prohibit_out(struct sk_buff *skb);
6723ab54 146
280a34c8 147static struct rt6_info ip6_prohibit_entry_template = {
d8d1f30b
CG
148 .dst = {
149 .__refcnt = ATOMIC_INIT(1),
150 .__use = 1,
151 .obsolete = -1,
152 .error = -EACCES,
153 .metrics = { [RTAX_HOPLIMIT - 1] = 255, },
154 .input = ip6_pkt_prohibit,
155 .output = ip6_pkt_prohibit_out,
101367c2
TG
156 },
157 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
4f724279 158 .rt6i_protocol = RTPROT_KERNEL,
101367c2
TG
159 .rt6i_metric = ~(u32) 0,
160 .rt6i_ref = ATOMIC_INIT(1),
161};
162
bdb3289f 163static struct rt6_info ip6_blk_hole_entry_template = {
d8d1f30b
CG
164 .dst = {
165 .__refcnt = ATOMIC_INIT(1),
166 .__use = 1,
167 .obsolete = -1,
168 .error = -EINVAL,
169 .metrics = { [RTAX_HOPLIMIT - 1] = 255, },
170 .input = dst_discard,
171 .output = dst_discard,
101367c2
TG
172 },
173 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
4f724279 174 .rt6i_protocol = RTPROT_KERNEL,
101367c2
TG
175 .rt6i_metric = ~(u32) 0,
176 .rt6i_ref = ATOMIC_INIT(1),
177};
178
179#endif
180
1da177e4 181/* allocate dst with ip6_dst_ops */
f2fc6a54 182static inline struct rt6_info *ip6_dst_alloc(struct dst_ops *ops)
1da177e4 183{
f2fc6a54 184 return (struct rt6_info *)dst_alloc(ops);
1da177e4
LT
185}
186
187static void ip6_dst_destroy(struct dst_entry *dst)
188{
189 struct rt6_info *rt = (struct rt6_info *)dst;
190 struct inet6_dev *idev = rt->rt6i_idev;
191
192 if (idev != NULL) {
193 rt->rt6i_idev = NULL;
194 in6_dev_put(idev);
1ab1457c 195 }
1da177e4
LT
196}
197
198static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
199 int how)
200{
201 struct rt6_info *rt = (struct rt6_info *)dst;
202 struct inet6_dev *idev = rt->rt6i_idev;
5a3e55d6 203 struct net_device *loopback_dev =
c346dca1 204 dev_net(dev)->loopback_dev;
1da177e4 205
5a3e55d6
DL
206 if (dev != loopback_dev && idev != NULL && idev->dev == dev) {
207 struct inet6_dev *loopback_idev =
208 in6_dev_get(loopback_dev);
1da177e4
LT
209 if (loopback_idev != NULL) {
210 rt->rt6i_idev = loopback_idev;
211 in6_dev_put(idev);
212 }
213 }
214}
215
216static __inline__ int rt6_check_expired(const struct rt6_info *rt)
217{
a02cec21
ED
218 return (rt->rt6i_flags & RTF_EXPIRES) &&
219 time_after(jiffies, rt->rt6i_expires);
1da177e4
LT
220}
221
c71099ac
TG
222static inline int rt6_need_strict(struct in6_addr *daddr)
223{
a02cec21
ED
224 return ipv6_addr_type(daddr) &
225 (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK);
c71099ac
TG
226}
227
1da177e4 228/*
c71099ac 229 * Route lookup. Any table->tb6_lock is implied.
1da177e4
LT
230 */
231
8ed67789
DL
232static inline struct rt6_info *rt6_device_match(struct net *net,
233 struct rt6_info *rt,
dd3abc4e 234 struct in6_addr *saddr,
1da177e4 235 int oif,
d420895e 236 int flags)
1da177e4
LT
237{
238 struct rt6_info *local = NULL;
239 struct rt6_info *sprt;
240
dd3abc4e
YH
241 if (!oif && ipv6_addr_any(saddr))
242 goto out;
243
d8d1f30b 244 for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
dd3abc4e
YH
245 struct net_device *dev = sprt->rt6i_dev;
246
247 if (oif) {
1da177e4
LT
248 if (dev->ifindex == oif)
249 return sprt;
250 if (dev->flags & IFF_LOOPBACK) {
251 if (sprt->rt6i_idev == NULL ||
252 sprt->rt6i_idev->dev->ifindex != oif) {
d420895e 253 if (flags & RT6_LOOKUP_F_IFACE && oif)
1da177e4 254 continue;
1ab1457c 255 if (local && (!oif ||
1da177e4
LT
256 local->rt6i_idev->dev->ifindex == oif))
257 continue;
258 }
259 local = sprt;
260 }
dd3abc4e
YH
261 } else {
262 if (ipv6_chk_addr(net, saddr, dev,
263 flags & RT6_LOOKUP_F_IFACE))
264 return sprt;
1da177e4 265 }
dd3abc4e 266 }
1da177e4 267
dd3abc4e 268 if (oif) {
1da177e4
LT
269 if (local)
270 return local;
271
d420895e 272 if (flags & RT6_LOOKUP_F_IFACE)
8ed67789 273 return net->ipv6.ip6_null_entry;
1da177e4 274 }
dd3abc4e 275out:
1da177e4
LT
276 return rt;
277}
278
27097255
YH
279#ifdef CONFIG_IPV6_ROUTER_PREF
280static void rt6_probe(struct rt6_info *rt)
281{
282 struct neighbour *neigh = rt ? rt->rt6i_nexthop : NULL;
283 /*
284 * Okay, this does not seem to be appropriate
285 * for now, however, we need to check if it
286 * is really so; aka Router Reachability Probing.
287 *
288 * Router Reachability Probe MUST be rate-limited
289 * to no more than one per minute.
290 */
291 if (!neigh || (neigh->nud_state & NUD_VALID))
292 return;
293 read_lock_bh(&neigh->lock);
294 if (!(neigh->nud_state & NUD_VALID) &&
52e16356 295 time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
27097255
YH
296 struct in6_addr mcaddr;
297 struct in6_addr *target;
298
299 neigh->updated = jiffies;
300 read_unlock_bh(&neigh->lock);
301
302 target = (struct in6_addr *)&neigh->primary_key;
303 addrconf_addr_solict_mult(target, &mcaddr);
304 ndisc_send_ns(rt->rt6i_dev, NULL, target, &mcaddr, NULL);
305 } else
306 read_unlock_bh(&neigh->lock);
307}
308#else
309static inline void rt6_probe(struct rt6_info *rt)
310{
27097255
YH
311}
312#endif
313
1da177e4 314/*
554cfb7e 315 * Default Router Selection (RFC 2461 6.3.6)
1da177e4 316 */
b6f99a21 317static inline int rt6_check_dev(struct rt6_info *rt, int oif)
554cfb7e
YH
318{
319 struct net_device *dev = rt->rt6i_dev;
161980f4 320 if (!oif || dev->ifindex == oif)
554cfb7e 321 return 2;
161980f4
DM
322 if ((dev->flags & IFF_LOOPBACK) &&
323 rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
324 return 1;
325 return 0;
554cfb7e 326}
1da177e4 327
b6f99a21 328static inline int rt6_check_neigh(struct rt6_info *rt)
1da177e4 329{
554cfb7e 330 struct neighbour *neigh = rt->rt6i_nexthop;
398bcbeb 331 int m;
4d0c5911
YH
332 if (rt->rt6i_flags & RTF_NONEXTHOP ||
333 !(rt->rt6i_flags & RTF_GATEWAY))
334 m = 1;
335 else if (neigh) {
554cfb7e
YH
336 read_lock_bh(&neigh->lock);
337 if (neigh->nud_state & NUD_VALID)
4d0c5911 338 m = 2;
398bcbeb
YH
339#ifdef CONFIG_IPV6_ROUTER_PREF
340 else if (neigh->nud_state & NUD_FAILED)
341 m = 0;
342#endif
343 else
ea73ee23 344 m = 1;
554cfb7e 345 read_unlock_bh(&neigh->lock);
398bcbeb
YH
346 } else
347 m = 0;
554cfb7e 348 return m;
1da177e4
LT
349}
350
554cfb7e
YH
351static int rt6_score_route(struct rt6_info *rt, int oif,
352 int strict)
1da177e4 353{
4d0c5911 354 int m, n;
1ab1457c 355
4d0c5911 356 m = rt6_check_dev(rt, oif);
77d16f45 357 if (!m && (strict & RT6_LOOKUP_F_IFACE))
554cfb7e 358 return -1;
ebacaaa0
YH
359#ifdef CONFIG_IPV6_ROUTER_PREF
360 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
361#endif
4d0c5911 362 n = rt6_check_neigh(rt);
557e92ef 363 if (!n && (strict & RT6_LOOKUP_F_REACHABLE))
554cfb7e
YH
364 return -1;
365 return m;
366}
367
f11e6659
DM
368static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
369 int *mpri, struct rt6_info *match)
554cfb7e 370{
f11e6659
DM
371 int m;
372
373 if (rt6_check_expired(rt))
374 goto out;
375
376 m = rt6_score_route(rt, oif, strict);
377 if (m < 0)
378 goto out;
379
380 if (m > *mpri) {
381 if (strict & RT6_LOOKUP_F_REACHABLE)
382 rt6_probe(match);
383 *mpri = m;
384 match = rt;
385 } else if (strict & RT6_LOOKUP_F_REACHABLE) {
386 rt6_probe(rt);
387 }
388
389out:
390 return match;
391}
392
393static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
394 struct rt6_info *rr_head,
395 u32 metric, int oif, int strict)
396{
397 struct rt6_info *rt, *match;
554cfb7e 398 int mpri = -1;
1da177e4 399
f11e6659
DM
400 match = NULL;
401 for (rt = rr_head; rt && rt->rt6i_metric == metric;
d8d1f30b 402 rt = rt->dst.rt6_next)
f11e6659
DM
403 match = find_match(rt, oif, strict, &mpri, match);
404 for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric;
d8d1f30b 405 rt = rt->dst.rt6_next)
f11e6659 406 match = find_match(rt, oif, strict, &mpri, match);
1da177e4 407
f11e6659
DM
408 return match;
409}
1da177e4 410
f11e6659
DM
411static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
412{
413 struct rt6_info *match, *rt0;
8ed67789 414 struct net *net;
1da177e4 415
f11e6659 416 RT6_TRACE("%s(fn->leaf=%p, oif=%d)\n",
0dc47877 417 __func__, fn->leaf, oif);
554cfb7e 418
f11e6659
DM
419 rt0 = fn->rr_ptr;
420 if (!rt0)
421 fn->rr_ptr = rt0 = fn->leaf;
1da177e4 422
f11e6659 423 match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict);
1da177e4 424
554cfb7e 425 if (!match &&
f11e6659 426 (strict & RT6_LOOKUP_F_REACHABLE)) {
d8d1f30b 427 struct rt6_info *next = rt0->dst.rt6_next;
f11e6659 428
554cfb7e 429 /* no entries matched; do round-robin */
f11e6659
DM
430 if (!next || next->rt6i_metric != rt0->rt6i_metric)
431 next = fn->leaf;
432
433 if (next != rt0)
434 fn->rr_ptr = next;
1da177e4 435 }
1da177e4 436
f11e6659 437 RT6_TRACE("%s() => %p\n",
0dc47877 438 __func__, match);
1da177e4 439
c346dca1 440 net = dev_net(rt0->rt6i_dev);
a02cec21 441 return match ? match : net->ipv6.ip6_null_entry;
1da177e4
LT
442}
443
70ceb4f5
YH
444#ifdef CONFIG_IPV6_ROUTE_INFO
445int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
446 struct in6_addr *gwaddr)
447{
c346dca1 448 struct net *net = dev_net(dev);
70ceb4f5
YH
449 struct route_info *rinfo = (struct route_info *) opt;
450 struct in6_addr prefix_buf, *prefix;
451 unsigned int pref;
4bed72e4 452 unsigned long lifetime;
70ceb4f5
YH
453 struct rt6_info *rt;
454
455 if (len < sizeof(struct route_info)) {
456 return -EINVAL;
457 }
458
459 /* Sanity check for prefix_len and length */
460 if (rinfo->length > 3) {
461 return -EINVAL;
462 } else if (rinfo->prefix_len > 128) {
463 return -EINVAL;
464 } else if (rinfo->prefix_len > 64) {
465 if (rinfo->length < 2) {
466 return -EINVAL;
467 }
468 } else if (rinfo->prefix_len > 0) {
469 if (rinfo->length < 1) {
470 return -EINVAL;
471 }
472 }
473
474 pref = rinfo->route_pref;
475 if (pref == ICMPV6_ROUTER_PREF_INVALID)
3933fc95 476 return -EINVAL;
70ceb4f5 477
4bed72e4 478 lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
70ceb4f5
YH
479
480 if (rinfo->length == 3)
481 prefix = (struct in6_addr *)rinfo->prefix;
482 else {
483 /* this function is safe */
484 ipv6_addr_prefix(&prefix_buf,
485 (struct in6_addr *)rinfo->prefix,
486 rinfo->prefix_len);
487 prefix = &prefix_buf;
488 }
489
efa2cea0
DL
490 rt = rt6_get_route_info(net, prefix, rinfo->prefix_len, gwaddr,
491 dev->ifindex);
70ceb4f5
YH
492
493 if (rt && !lifetime) {
e0a1ad73 494 ip6_del_rt(rt);
70ceb4f5
YH
495 rt = NULL;
496 }
497
498 if (!rt && lifetime)
efa2cea0 499 rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
70ceb4f5
YH
500 pref);
501 else if (rt)
502 rt->rt6i_flags = RTF_ROUTEINFO |
503 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
504
505 if (rt) {
4bed72e4 506 if (!addrconf_finite_timeout(lifetime)) {
70ceb4f5
YH
507 rt->rt6i_flags &= ~RTF_EXPIRES;
508 } else {
509 rt->rt6i_expires = jiffies + HZ * lifetime;
510 rt->rt6i_flags |= RTF_EXPIRES;
511 }
d8d1f30b 512 dst_release(&rt->dst);
70ceb4f5
YH
513 }
514 return 0;
515}
516#endif
517
8ed67789 518#define BACKTRACK(__net, saddr) \
982f56f3 519do { \
8ed67789 520 if (rt == __net->ipv6.ip6_null_entry) { \
982f56f3 521 struct fib6_node *pn; \
e0eda7bb 522 while (1) { \
982f56f3
YH
523 if (fn->fn_flags & RTN_TL_ROOT) \
524 goto out; \
525 pn = fn->parent; \
526 if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
8bce65b9 527 fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \
982f56f3
YH
528 else \
529 fn = pn; \
530 if (fn->fn_flags & RTN_RTINFO) \
531 goto restart; \
c71099ac 532 } \
c71099ac 533 } \
982f56f3 534} while(0)
c71099ac 535
8ed67789
DL
536static struct rt6_info *ip6_pol_route_lookup(struct net *net,
537 struct fib6_table *table,
c71099ac 538 struct flowi *fl, int flags)
1da177e4
LT
539{
540 struct fib6_node *fn;
541 struct rt6_info *rt;
542
c71099ac
TG
543 read_lock_bh(&table->tb6_lock);
544 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
545restart:
546 rt = fn->leaf;
dd3abc4e 547 rt = rt6_device_match(net, rt, &fl->fl6_src, fl->oif, flags);
8ed67789 548 BACKTRACK(net, &fl->fl6_src);
c71099ac 549out:
d8d1f30b 550 dst_use(&rt->dst, jiffies);
c71099ac 551 read_unlock_bh(&table->tb6_lock);
c71099ac
TG
552 return rt;
553
554}
555
9acd9f3a
YH
556struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
557 const struct in6_addr *saddr, int oif, int strict)
c71099ac
TG
558{
559 struct flowi fl = {
560 .oif = oif,
5811662b 561 .fl6_dst = *daddr,
c71099ac
TG
562 };
563 struct dst_entry *dst;
77d16f45 564 int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
c71099ac 565
adaa70bb
TG
566 if (saddr) {
567 memcpy(&fl.fl6_src, saddr, sizeof(*saddr));
568 flags |= RT6_LOOKUP_F_HAS_SADDR;
569 }
570
606a2b48 571 dst = fib6_rule_lookup(net, &fl, flags, ip6_pol_route_lookup);
c71099ac
TG
572 if (dst->error == 0)
573 return (struct rt6_info *) dst;
574
575 dst_release(dst);
576
1da177e4
LT
577 return NULL;
578}
579
7159039a
YH
580EXPORT_SYMBOL(rt6_lookup);
581
c71099ac 582/* ip6_ins_rt is called with FREE table->tb6_lock.
1da177e4
LT
583 It takes new route entry, the addition fails by any reason the
584 route is freed. In any case, if caller does not hold it, it may
585 be destroyed.
586 */
587
86872cb5 588static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
1da177e4
LT
589{
590 int err;
c71099ac 591 struct fib6_table *table;
1da177e4 592
c71099ac
TG
593 table = rt->rt6i_table;
594 write_lock_bh(&table->tb6_lock);
86872cb5 595 err = fib6_add(&table->tb6_root, rt, info);
c71099ac 596 write_unlock_bh(&table->tb6_lock);
1da177e4
LT
597
598 return err;
599}
600
40e22e8f
TG
601int ip6_ins_rt(struct rt6_info *rt)
602{
4d1169c1 603 struct nl_info info = {
c346dca1 604 .nl_net = dev_net(rt->rt6i_dev),
4d1169c1 605 };
528c4ceb 606 return __ip6_ins_rt(rt, &info);
40e22e8f
TG
607}
608
95a9a5ba
YH
609static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, struct in6_addr *daddr,
610 struct in6_addr *saddr)
1da177e4 611{
1da177e4
LT
612 struct rt6_info *rt;
613
614 /*
615 * Clone the route.
616 */
617
618 rt = ip6_rt_copy(ort);
619
620 if (rt) {
14deae41
DM
621 struct neighbour *neigh;
622 int attempts = !in_softirq();
623
58c4fb86
YH
624 if (!(rt->rt6i_flags&RTF_GATEWAY)) {
625 if (rt->rt6i_dst.plen != 128 &&
626 ipv6_addr_equal(&rt->rt6i_dst.addr, daddr))
627 rt->rt6i_flags |= RTF_ANYCAST;
1da177e4 628 ipv6_addr_copy(&rt->rt6i_gateway, daddr);
58c4fb86 629 }
1da177e4 630
58c4fb86 631 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
1da177e4
LT
632 rt->rt6i_dst.plen = 128;
633 rt->rt6i_flags |= RTF_CACHE;
d8d1f30b 634 rt->dst.flags |= DST_HOST;
1da177e4
LT
635
636#ifdef CONFIG_IPV6_SUBTREES
637 if (rt->rt6i_src.plen && saddr) {
638 ipv6_addr_copy(&rt->rt6i_src.addr, saddr);
639 rt->rt6i_src.plen = 128;
640 }
641#endif
642
14deae41
DM
643 retry:
644 neigh = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
645 if (IS_ERR(neigh)) {
646 struct net *net = dev_net(rt->rt6i_dev);
647 int saved_rt_min_interval =
648 net->ipv6.sysctl.ip6_rt_gc_min_interval;
649 int saved_rt_elasticity =
650 net->ipv6.sysctl.ip6_rt_gc_elasticity;
651
652 if (attempts-- > 0) {
653 net->ipv6.sysctl.ip6_rt_gc_elasticity = 1;
654 net->ipv6.sysctl.ip6_rt_gc_min_interval = 0;
655
86393e52 656 ip6_dst_gc(&net->ipv6.ip6_dst_ops);
14deae41
DM
657
658 net->ipv6.sysctl.ip6_rt_gc_elasticity =
659 saved_rt_elasticity;
660 net->ipv6.sysctl.ip6_rt_gc_min_interval =
661 saved_rt_min_interval;
662 goto retry;
663 }
664
665 if (net_ratelimit())
666 printk(KERN_WARNING
7e1b33e5 667 "ipv6: Neighbour table overflow.\n");
d8d1f30b 668 dst_free(&rt->dst);
14deae41
DM
669 return NULL;
670 }
671 rt->rt6i_nexthop = neigh;
1da177e4 672
95a9a5ba 673 }
1da177e4 674
95a9a5ba
YH
675 return rt;
676}
1da177e4 677
299d9939
YH
678static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort, struct in6_addr *daddr)
679{
680 struct rt6_info *rt = ip6_rt_copy(ort);
681 if (rt) {
682 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
683 rt->rt6i_dst.plen = 128;
684 rt->rt6i_flags |= RTF_CACHE;
d8d1f30b 685 rt->dst.flags |= DST_HOST;
299d9939
YH
686 rt->rt6i_nexthop = neigh_clone(ort->rt6i_nexthop);
687 }
688 return rt;
689}
690
8ed67789
DL
691static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
692 struct flowi *fl, int flags)
1da177e4
LT
693{
694 struct fib6_node *fn;
519fbd87 695 struct rt6_info *rt, *nrt;
c71099ac 696 int strict = 0;
1da177e4 697 int attempts = 3;
519fbd87 698 int err;
53b7997f 699 int reachable = net->ipv6.devconf_all->forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
1da177e4 700
77d16f45 701 strict |= flags & RT6_LOOKUP_F_IFACE;
1da177e4
LT
702
703relookup:
c71099ac 704 read_lock_bh(&table->tb6_lock);
1da177e4 705
8238dd06 706restart_2:
c71099ac 707 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
1da177e4
LT
708
709restart:
4acad72d 710 rt = rt6_select(fn, oif, strict | reachable);
8ed67789
DL
711
712 BACKTRACK(net, &fl->fl6_src);
713 if (rt == net->ipv6.ip6_null_entry ||
8238dd06 714 rt->rt6i_flags & RTF_CACHE)
1ddef044 715 goto out;
1da177e4 716
d8d1f30b 717 dst_hold(&rt->dst);
c71099ac 718 read_unlock_bh(&table->tb6_lock);
fb9de91e 719
519fbd87 720 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
c71099ac 721 nrt = rt6_alloc_cow(rt, &fl->fl6_dst, &fl->fl6_src);
519fbd87
YH
722 else {
723#if CLONE_OFFLINK_ROUTE
c71099ac 724 nrt = rt6_alloc_clone(rt, &fl->fl6_dst);
519fbd87
YH
725#else
726 goto out2;
727#endif
728 }
e40cf353 729
d8d1f30b 730 dst_release(&rt->dst);
8ed67789 731 rt = nrt ? : net->ipv6.ip6_null_entry;
1da177e4 732
d8d1f30b 733 dst_hold(&rt->dst);
519fbd87 734 if (nrt) {
40e22e8f 735 err = ip6_ins_rt(nrt);
519fbd87 736 if (!err)
1da177e4 737 goto out2;
1da177e4 738 }
1da177e4 739
519fbd87
YH
740 if (--attempts <= 0)
741 goto out2;
742
743 /*
c71099ac 744 * Race condition! In the gap, when table->tb6_lock was
519fbd87
YH
745 * released someone could insert this route. Relookup.
746 */
d8d1f30b 747 dst_release(&rt->dst);
519fbd87
YH
748 goto relookup;
749
750out:
8238dd06
YH
751 if (reachable) {
752 reachable = 0;
753 goto restart_2;
754 }
d8d1f30b 755 dst_hold(&rt->dst);
c71099ac 756 read_unlock_bh(&table->tb6_lock);
1da177e4 757out2:
d8d1f30b
CG
758 rt->dst.lastuse = jiffies;
759 rt->dst.__use++;
c71099ac
TG
760
761 return rt;
1da177e4
LT
762}
763
8ed67789 764static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
4acad72d
PE
765 struct flowi *fl, int flags)
766{
8ed67789 767 return ip6_pol_route(net, table, fl->iif, fl, flags);
4acad72d
PE
768}
769
c71099ac
TG
770void ip6_route_input(struct sk_buff *skb)
771{
0660e03f 772 struct ipv6hdr *iph = ipv6_hdr(skb);
c346dca1 773 struct net *net = dev_net(skb->dev);
adaa70bb 774 int flags = RT6_LOOKUP_F_HAS_SADDR;
c71099ac
TG
775 struct flowi fl = {
776 .iif = skb->dev->ifindex,
5811662b
CG
777 .fl6_dst = iph->daddr,
778 .fl6_src = iph->saddr,
779 .fl6_flowlabel = (* (__be32 *) iph)&IPV6_FLOWINFO_MASK,
1ab1457c 780 .mark = skb->mark,
c71099ac
TG
781 .proto = iph->nexthdr,
782 };
adaa70bb 783
1d6e55f1 784 if (rt6_need_strict(&iph->daddr) && skb->dev->type != ARPHRD_PIMREG)
adaa70bb 785 flags |= RT6_LOOKUP_F_IFACE;
c71099ac 786
adf30907 787 skb_dst_set(skb, fib6_rule_lookup(net, &fl, flags, ip6_pol_route_input));
c71099ac
TG
788}
789
8ed67789 790static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
c71099ac 791 struct flowi *fl, int flags)
1da177e4 792{
8ed67789 793 return ip6_pol_route(net, table, fl->oif, fl, flags);
c71099ac
TG
794}
795
4591db4f
DL
796struct dst_entry * ip6_route_output(struct net *net, struct sock *sk,
797 struct flowi *fl)
c71099ac
TG
798{
799 int flags = 0;
800
6057fd78 801 if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl->fl6_dst))
77d16f45 802 flags |= RT6_LOOKUP_F_IFACE;
c71099ac 803
adaa70bb
TG
804 if (!ipv6_addr_any(&fl->fl6_src))
805 flags |= RT6_LOOKUP_F_HAS_SADDR;
0c9a2ac1
YH
806 else if (sk)
807 flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
adaa70bb 808
4591db4f 809 return fib6_rule_lookup(net, fl, flags, ip6_pol_route_output);
1da177e4
LT
810}
811
7159039a 812EXPORT_SYMBOL(ip6_route_output);
1da177e4 813
14e50e57
DM
814int ip6_dst_blackhole(struct sock *sk, struct dst_entry **dstp, struct flowi *fl)
815{
816 struct rt6_info *ort = (struct rt6_info *) *dstp;
817 struct rt6_info *rt = (struct rt6_info *)
818 dst_alloc(&ip6_dst_blackhole_ops);
819 struct dst_entry *new = NULL;
820
821 if (rt) {
d8d1f30b 822 new = &rt->dst;
14e50e57
DM
823
824 atomic_set(&new->__refcnt, 1);
825 new->__use = 1;
352e512c
HX
826 new->input = dst_discard;
827 new->output = dst_discard;
14e50e57 828
d8d1f30b
CG
829 memcpy(new->metrics, ort->dst.metrics, RTAX_MAX*sizeof(u32));
830 new->dev = ort->dst.dev;
14e50e57
DM
831 if (new->dev)
832 dev_hold(new->dev);
833 rt->rt6i_idev = ort->rt6i_idev;
834 if (rt->rt6i_idev)
835 in6_dev_hold(rt->rt6i_idev);
836 rt->rt6i_expires = 0;
837
838 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
839 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
840 rt->rt6i_metric = 0;
841
842 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
843#ifdef CONFIG_IPV6_SUBTREES
844 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
845#endif
846
847 dst_free(new);
848 }
849
850 dst_release(*dstp);
851 *dstp = new;
a02cec21 852 return new ? 0 : -ENOMEM;
14e50e57
DM
853}
854EXPORT_SYMBOL_GPL(ip6_dst_blackhole);
855
1da177e4
LT
856/*
857 * Destination cache support functions
858 */
859
860static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
861{
862 struct rt6_info *rt;
863
864 rt = (struct rt6_info *) dst;
865
10414444 866 if (rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie))
1da177e4
LT
867 return dst;
868
869 return NULL;
870}
871
872static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
873{
874 struct rt6_info *rt = (struct rt6_info *) dst;
875
876 if (rt) {
54c1a859
YH
877 if (rt->rt6i_flags & RTF_CACHE) {
878 if (rt6_check_expired(rt)) {
879 ip6_del_rt(rt);
880 dst = NULL;
881 }
882 } else {
1da177e4 883 dst_release(dst);
54c1a859
YH
884 dst = NULL;
885 }
1da177e4 886 }
54c1a859 887 return dst;
1da177e4
LT
888}
889
890static void ip6_link_failure(struct sk_buff *skb)
891{
892 struct rt6_info *rt;
893
3ffe533c 894 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
1da177e4 895
adf30907 896 rt = (struct rt6_info *) skb_dst(skb);
1da177e4
LT
897 if (rt) {
898 if (rt->rt6i_flags&RTF_CACHE) {
d8d1f30b 899 dst_set_expires(&rt->dst, 0);
1da177e4
LT
900 rt->rt6i_flags |= RTF_EXPIRES;
901 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
902 rt->rt6i_node->fn_sernum = -1;
903 }
904}
905
906static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
907{
908 struct rt6_info *rt6 = (struct rt6_info*)dst;
909
910 if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
911 rt6->rt6i_flags |= RTF_MODIFIED;
912 if (mtu < IPV6_MIN_MTU) {
913 mtu = IPV6_MIN_MTU;
914 dst->metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
915 }
916 dst->metrics[RTAX_MTU-1] = mtu;
8d71740c 917 call_netevent_notifiers(NETEVENT_PMTU_UPDATE, dst);
1da177e4
LT
918 }
919}
920
1da177e4
LT
921static int ipv6_get_mtu(struct net_device *dev);
922
5578689a 923static inline unsigned int ipv6_advmss(struct net *net, unsigned int mtu)
1da177e4
LT
924{
925 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
926
5578689a
DL
927 if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
928 mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
1da177e4
LT
929
930 /*
1ab1457c
YH
931 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
932 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
933 * IPV6_MAXPLEN is also valid and means: "any MSS,
1da177e4
LT
934 * rely only on pmtu discovery"
935 */
936 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
937 mtu = IPV6_MAXPLEN;
938 return mtu;
939}
940
3b00944c
YH
941static struct dst_entry *icmp6_dst_gc_list;
942static DEFINE_SPINLOCK(icmp6_dst_lock);
5d0bbeeb 943
3b00944c 944struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
1da177e4 945 struct neighbour *neigh,
9acd9f3a 946 const struct in6_addr *addr)
1da177e4
LT
947{
948 struct rt6_info *rt;
949 struct inet6_dev *idev = in6_dev_get(dev);
c346dca1 950 struct net *net = dev_net(dev);
1da177e4
LT
951
952 if (unlikely(idev == NULL))
953 return NULL;
954
86393e52 955 rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops);
1da177e4
LT
956 if (unlikely(rt == NULL)) {
957 in6_dev_put(idev);
958 goto out;
959 }
960
961 dev_hold(dev);
962 if (neigh)
963 neigh_hold(neigh);
14deae41 964 else {
1da177e4 965 neigh = ndisc_get_neigh(dev, addr);
14deae41
DM
966 if (IS_ERR(neigh))
967 neigh = NULL;
968 }
1da177e4
LT
969
970 rt->rt6i_dev = dev;
971 rt->rt6i_idev = idev;
972 rt->rt6i_nexthop = neigh;
d8d1f30b
CG
973 atomic_set(&rt->dst.__refcnt, 1);
974 rt->dst.metrics[RTAX_HOPLIMIT-1] = 255;
975 rt->dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
976 rt->dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, dst_mtu(&rt->dst));
977 rt->dst.output = ip6_output;
1da177e4
LT
978
979#if 0 /* there's no chance to use these for ndisc */
d8d1f30b 980 rt->dst.flags = ipv6_addr_type(addr) & IPV6_ADDR_UNICAST
1ab1457c 981 ? DST_HOST
1da177e4
LT
982 : 0;
983 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
984 rt->rt6i_dst.plen = 128;
985#endif
986
3b00944c 987 spin_lock_bh(&icmp6_dst_lock);
d8d1f30b
CG
988 rt->dst.next = icmp6_dst_gc_list;
989 icmp6_dst_gc_list = &rt->dst;
3b00944c 990 spin_unlock_bh(&icmp6_dst_lock);
1da177e4 991
5578689a 992 fib6_force_start_gc(net);
1da177e4
LT
993
994out:
d8d1f30b 995 return &rt->dst;
1da177e4
LT
996}
997
3d0f24a7 998int icmp6_dst_gc(void)
1da177e4
LT
999{
1000 struct dst_entry *dst, *next, **pprev;
3d0f24a7 1001 int more = 0;
1da177e4
LT
1002
1003 next = NULL;
5d0bbeeb 1004
3b00944c
YH
1005 spin_lock_bh(&icmp6_dst_lock);
1006 pprev = &icmp6_dst_gc_list;
5d0bbeeb 1007
1da177e4
LT
1008 while ((dst = *pprev) != NULL) {
1009 if (!atomic_read(&dst->__refcnt)) {
1010 *pprev = dst->next;
1011 dst_free(dst);
1da177e4
LT
1012 } else {
1013 pprev = &dst->next;
3d0f24a7 1014 ++more;
1da177e4
LT
1015 }
1016 }
1017
3b00944c 1018 spin_unlock_bh(&icmp6_dst_lock);
5d0bbeeb 1019
3d0f24a7 1020 return more;
1da177e4
LT
1021}
1022
1e493d19
DM
1023static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg),
1024 void *arg)
1025{
1026 struct dst_entry *dst, **pprev;
1027
1028 spin_lock_bh(&icmp6_dst_lock);
1029 pprev = &icmp6_dst_gc_list;
1030 while ((dst = *pprev) != NULL) {
1031 struct rt6_info *rt = (struct rt6_info *) dst;
1032 if (func(rt, arg)) {
1033 *pprev = dst->next;
1034 dst_free(dst);
1035 } else {
1036 pprev = &dst->next;
1037 }
1038 }
1039 spin_unlock_bh(&icmp6_dst_lock);
1040}
1041
569d3645 1042static int ip6_dst_gc(struct dst_ops *ops)
1da177e4 1043{
1da177e4 1044 unsigned long now = jiffies;
86393e52 1045 struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
7019b78e
DL
1046 int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
1047 int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
1048 int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1049 int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1050 unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
fc66f95c 1051 int entries;
7019b78e 1052
fc66f95c 1053 entries = dst_entries_get_fast(ops);
7019b78e 1054 if (time_after(rt_last_gc + rt_min_interval, now) &&
fc66f95c 1055 entries <= rt_max_size)
1da177e4
LT
1056 goto out;
1057
6891a346
BT
1058 net->ipv6.ip6_rt_gc_expire++;
1059 fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net);
1060 net->ipv6.ip6_rt_last_gc = now;
fc66f95c
ED
1061 entries = dst_entries_get_slow(ops);
1062 if (entries < ops->gc_thresh)
7019b78e 1063 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1da177e4 1064out:
7019b78e 1065 net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
fc66f95c 1066 return entries > rt_max_size;
1da177e4
LT
1067}
1068
1069/* Clean host part of a prefix. Not necessary in radix tree,
1070 but results in cleaner routing tables.
1071
1072 Remove it only when all the things will work!
1073 */
1074
1075static int ipv6_get_mtu(struct net_device *dev)
1076{
1077 int mtu = IPV6_MIN_MTU;
1078 struct inet6_dev *idev;
1079
c68f24cc
ED
1080 rcu_read_lock();
1081 idev = __in6_dev_get(dev);
1082 if (idev)
1da177e4 1083 mtu = idev->cnf.mtu6;
c68f24cc 1084 rcu_read_unlock();
1da177e4
LT
1085 return mtu;
1086}
1087
6b75d090 1088int ip6_dst_hoplimit(struct dst_entry *dst)
1da177e4 1089{
6b75d090
YH
1090 int hoplimit = dst_metric(dst, RTAX_HOPLIMIT);
1091 if (hoplimit < 0) {
1092 struct net_device *dev = dst->dev;
c68f24cc
ED
1093 struct inet6_dev *idev;
1094
1095 rcu_read_lock();
1096 idev = __in6_dev_get(dev);
1097 if (idev)
6b75d090 1098 hoplimit = idev->cnf.hop_limit;
c68f24cc 1099 else
53b7997f 1100 hoplimit = dev_net(dev)->ipv6.devconf_all->hop_limit;
c68f24cc 1101 rcu_read_unlock();
1da177e4
LT
1102 }
1103 return hoplimit;
1104}
1105
1106/*
1107 *
1108 */
1109
86872cb5 1110int ip6_route_add(struct fib6_config *cfg)
1da177e4
LT
1111{
1112 int err;
5578689a 1113 struct net *net = cfg->fc_nlinfo.nl_net;
1da177e4
LT
1114 struct rt6_info *rt = NULL;
1115 struct net_device *dev = NULL;
1116 struct inet6_dev *idev = NULL;
c71099ac 1117 struct fib6_table *table;
1da177e4
LT
1118 int addr_type;
1119
86872cb5 1120 if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1da177e4
LT
1121 return -EINVAL;
1122#ifndef CONFIG_IPV6_SUBTREES
86872cb5 1123 if (cfg->fc_src_len)
1da177e4
LT
1124 return -EINVAL;
1125#endif
86872cb5 1126 if (cfg->fc_ifindex) {
1da177e4 1127 err = -ENODEV;
5578689a 1128 dev = dev_get_by_index(net, cfg->fc_ifindex);
1da177e4
LT
1129 if (!dev)
1130 goto out;
1131 idev = in6_dev_get(dev);
1132 if (!idev)
1133 goto out;
1134 }
1135
86872cb5
TG
1136 if (cfg->fc_metric == 0)
1137 cfg->fc_metric = IP6_RT_PRIO_USER;
1da177e4 1138
5578689a 1139 table = fib6_new_table(net, cfg->fc_table);
c71099ac
TG
1140 if (table == NULL) {
1141 err = -ENOBUFS;
1142 goto out;
1143 }
1144
86393e52 1145 rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops);
1da177e4
LT
1146
1147 if (rt == NULL) {
1148 err = -ENOMEM;
1149 goto out;
1150 }
1151
d8d1f30b 1152 rt->dst.obsolete = -1;
6f704992
YH
1153 rt->rt6i_expires = (cfg->fc_flags & RTF_EXPIRES) ?
1154 jiffies + clock_t_to_jiffies(cfg->fc_expires) :
1155 0;
1da177e4 1156
86872cb5
TG
1157 if (cfg->fc_protocol == RTPROT_UNSPEC)
1158 cfg->fc_protocol = RTPROT_BOOT;
1159 rt->rt6i_protocol = cfg->fc_protocol;
1160
1161 addr_type = ipv6_addr_type(&cfg->fc_dst);
1da177e4
LT
1162
1163 if (addr_type & IPV6_ADDR_MULTICAST)
d8d1f30b 1164 rt->dst.input = ip6_mc_input;
ab79ad14
1165 else if (cfg->fc_flags & RTF_LOCAL)
1166 rt->dst.input = ip6_input;
1da177e4 1167 else
d8d1f30b 1168 rt->dst.input = ip6_forward;
1da177e4 1169
d8d1f30b 1170 rt->dst.output = ip6_output;
1da177e4 1171
86872cb5
TG
1172 ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1173 rt->rt6i_dst.plen = cfg->fc_dst_len;
1da177e4 1174 if (rt->rt6i_dst.plen == 128)
d8d1f30b 1175 rt->dst.flags = DST_HOST;
1da177e4
LT
1176
1177#ifdef CONFIG_IPV6_SUBTREES
86872cb5
TG
1178 ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1179 rt->rt6i_src.plen = cfg->fc_src_len;
1da177e4
LT
1180#endif
1181
86872cb5 1182 rt->rt6i_metric = cfg->fc_metric;
1da177e4
LT
1183
1184 /* We cannot add true routes via loopback here,
1185 they would result in kernel looping; promote them to reject routes
1186 */
86872cb5 1187 if ((cfg->fc_flags & RTF_REJECT) ||
ab79ad14
1188 (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK)
1189 && !(cfg->fc_flags&RTF_LOCAL))) {
1da177e4 1190 /* hold loopback dev/idev if we haven't done so. */
5578689a 1191 if (dev != net->loopback_dev) {
1da177e4
LT
1192 if (dev) {
1193 dev_put(dev);
1194 in6_dev_put(idev);
1195 }
5578689a 1196 dev = net->loopback_dev;
1da177e4
LT
1197 dev_hold(dev);
1198 idev = in6_dev_get(dev);
1199 if (!idev) {
1200 err = -ENODEV;
1201 goto out;
1202 }
1203 }
d8d1f30b
CG
1204 rt->dst.output = ip6_pkt_discard_out;
1205 rt->dst.input = ip6_pkt_discard;
1206 rt->dst.error = -ENETUNREACH;
1da177e4
LT
1207 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1208 goto install_route;
1209 }
1210
86872cb5 1211 if (cfg->fc_flags & RTF_GATEWAY) {
1da177e4
LT
1212 struct in6_addr *gw_addr;
1213 int gwa_type;
1214
86872cb5
TG
1215 gw_addr = &cfg->fc_gateway;
1216 ipv6_addr_copy(&rt->rt6i_gateway, gw_addr);
1da177e4
LT
1217 gwa_type = ipv6_addr_type(gw_addr);
1218
1219 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1220 struct rt6_info *grt;
1221
1222 /* IPv6 strictly inhibits using not link-local
1223 addresses as nexthop address.
1224 Otherwise, router will not able to send redirects.
1225 It is very good, but in some (rare!) circumstances
1226 (SIT, PtP, NBMA NOARP links) it is handy to allow
1227 some exceptions. --ANK
1228 */
1229 err = -EINVAL;
1230 if (!(gwa_type&IPV6_ADDR_UNICAST))
1231 goto out;
1232
5578689a 1233 grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
1da177e4
LT
1234
1235 err = -EHOSTUNREACH;
1236 if (grt == NULL)
1237 goto out;
1238 if (dev) {
1239 if (dev != grt->rt6i_dev) {
d8d1f30b 1240 dst_release(&grt->dst);
1da177e4
LT
1241 goto out;
1242 }
1243 } else {
1244 dev = grt->rt6i_dev;
1245 idev = grt->rt6i_idev;
1246 dev_hold(dev);
1247 in6_dev_hold(grt->rt6i_idev);
1248 }
1249 if (!(grt->rt6i_flags&RTF_GATEWAY))
1250 err = 0;
d8d1f30b 1251 dst_release(&grt->dst);
1da177e4
LT
1252
1253 if (err)
1254 goto out;
1255 }
1256 err = -EINVAL;
1257 if (dev == NULL || (dev->flags&IFF_LOOPBACK))
1258 goto out;
1259 }
1260
1261 err = -ENODEV;
1262 if (dev == NULL)
1263 goto out;
1264
86872cb5 1265 if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) {
1da177e4
LT
1266 rt->rt6i_nexthop = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev);
1267 if (IS_ERR(rt->rt6i_nexthop)) {
1268 err = PTR_ERR(rt->rt6i_nexthop);
1269 rt->rt6i_nexthop = NULL;
1270 goto out;
1271 }
1272 }
1273
86872cb5 1274 rt->rt6i_flags = cfg->fc_flags;
1da177e4
LT
1275
1276install_route:
86872cb5
TG
1277 if (cfg->fc_mx) {
1278 struct nlattr *nla;
1279 int remaining;
1280
1281 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
8f4c1f9b 1282 int type = nla_type(nla);
86872cb5
TG
1283
1284 if (type) {
1285 if (type > RTAX_MAX) {
1da177e4
LT
1286 err = -EINVAL;
1287 goto out;
1288 }
86872cb5 1289
d8d1f30b 1290 rt->dst.metrics[type - 1] = nla_get_u32(nla);
1da177e4 1291 }
1da177e4
LT
1292 }
1293 }
1294
d8d1f30b
CG
1295 if (dst_metric(&rt->dst, RTAX_HOPLIMIT) == 0)
1296 rt->dst.metrics[RTAX_HOPLIMIT-1] = -1;
1297 if (!dst_mtu(&rt->dst))
1298 rt->dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(dev);
1299 if (!dst_metric(&rt->dst, RTAX_ADVMSS))
1300 rt->dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, dst_mtu(&rt->dst));
1301 rt->dst.dev = dev;
1da177e4 1302 rt->rt6i_idev = idev;
c71099ac 1303 rt->rt6i_table = table;
63152fc0 1304
c346dca1 1305 cfg->fc_nlinfo.nl_net = dev_net(dev);
63152fc0 1306
86872cb5 1307 return __ip6_ins_rt(rt, &cfg->fc_nlinfo);
1da177e4
LT
1308
1309out:
1310 if (dev)
1311 dev_put(dev);
1312 if (idev)
1313 in6_dev_put(idev);
1314 if (rt)
d8d1f30b 1315 dst_free(&rt->dst);
1da177e4
LT
1316 return err;
1317}
1318
86872cb5 1319static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
1da177e4
LT
1320{
1321 int err;
c71099ac 1322 struct fib6_table *table;
c346dca1 1323 struct net *net = dev_net(rt->rt6i_dev);
1da177e4 1324
8ed67789 1325 if (rt == net->ipv6.ip6_null_entry)
6c813a72
PM
1326 return -ENOENT;
1327
c71099ac
TG
1328 table = rt->rt6i_table;
1329 write_lock_bh(&table->tb6_lock);
1da177e4 1330
86872cb5 1331 err = fib6_del(rt, info);
d8d1f30b 1332 dst_release(&rt->dst);
1da177e4 1333
c71099ac 1334 write_unlock_bh(&table->tb6_lock);
1da177e4
LT
1335
1336 return err;
1337}
1338
e0a1ad73
TG
1339int ip6_del_rt(struct rt6_info *rt)
1340{
4d1169c1 1341 struct nl_info info = {
c346dca1 1342 .nl_net = dev_net(rt->rt6i_dev),
4d1169c1 1343 };
528c4ceb 1344 return __ip6_del_rt(rt, &info);
e0a1ad73
TG
1345}
1346
86872cb5 1347static int ip6_route_del(struct fib6_config *cfg)
1da177e4 1348{
c71099ac 1349 struct fib6_table *table;
1da177e4
LT
1350 struct fib6_node *fn;
1351 struct rt6_info *rt;
1352 int err = -ESRCH;
1353
5578689a 1354 table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
c71099ac
TG
1355 if (table == NULL)
1356 return err;
1357
1358 read_lock_bh(&table->tb6_lock);
1da177e4 1359
c71099ac 1360 fn = fib6_locate(&table->tb6_root,
86872cb5
TG
1361 &cfg->fc_dst, cfg->fc_dst_len,
1362 &cfg->fc_src, cfg->fc_src_len);
1ab1457c 1363
1da177e4 1364 if (fn) {
d8d1f30b 1365 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
86872cb5 1366 if (cfg->fc_ifindex &&
1da177e4 1367 (rt->rt6i_dev == NULL ||
86872cb5 1368 rt->rt6i_dev->ifindex != cfg->fc_ifindex))
1da177e4 1369 continue;
86872cb5
TG
1370 if (cfg->fc_flags & RTF_GATEWAY &&
1371 !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
1da177e4 1372 continue;
86872cb5 1373 if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
1da177e4 1374 continue;
d8d1f30b 1375 dst_hold(&rt->dst);
c71099ac 1376 read_unlock_bh(&table->tb6_lock);
1da177e4 1377
86872cb5 1378 return __ip6_del_rt(rt, &cfg->fc_nlinfo);
1da177e4
LT
1379 }
1380 }
c71099ac 1381 read_unlock_bh(&table->tb6_lock);
1da177e4
LT
1382
1383 return err;
1384}
1385
1386/*
1387 * Handle redirects
1388 */
a6279458
YH
1389struct ip6rd_flowi {
1390 struct flowi fl;
1391 struct in6_addr gateway;
1392};
1393
8ed67789
DL
1394static struct rt6_info *__ip6_route_redirect(struct net *net,
1395 struct fib6_table *table,
a6279458
YH
1396 struct flowi *fl,
1397 int flags)
1da177e4 1398{
a6279458
YH
1399 struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl;
1400 struct rt6_info *rt;
e843b9e1 1401 struct fib6_node *fn;
c71099ac 1402
1da177e4 1403 /*
e843b9e1
YH
1404 * Get the "current" route for this destination and
1405 * check if the redirect has come from approriate router.
1406 *
1407 * RFC 2461 specifies that redirects should only be
1408 * accepted if they come from the nexthop to the target.
1409 * Due to the way the routes are chosen, this notion
1410 * is a bit fuzzy and one might need to check all possible
1411 * routes.
1da177e4 1412 */
1da177e4 1413
c71099ac 1414 read_lock_bh(&table->tb6_lock);
a6279458 1415 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
e843b9e1 1416restart:
d8d1f30b 1417 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
e843b9e1
YH
1418 /*
1419 * Current route is on-link; redirect is always invalid.
1420 *
1421 * Seems, previous statement is not true. It could
1422 * be node, which looks for us as on-link (f.e. proxy ndisc)
1423 * But then router serving it might decide, that we should
1424 * know truth 8)8) --ANK (980726).
1425 */
1426 if (rt6_check_expired(rt))
1427 continue;
1428 if (!(rt->rt6i_flags & RTF_GATEWAY))
1429 continue;
a6279458 1430 if (fl->oif != rt->rt6i_dev->ifindex)
e843b9e1 1431 continue;
a6279458 1432 if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
e843b9e1
YH
1433 continue;
1434 break;
1435 }
a6279458 1436
cb15d9c2 1437 if (!rt)
8ed67789
DL
1438 rt = net->ipv6.ip6_null_entry;
1439 BACKTRACK(net, &fl->fl6_src);
cb15d9c2 1440out:
d8d1f30b 1441 dst_hold(&rt->dst);
a6279458 1442
c71099ac 1443 read_unlock_bh(&table->tb6_lock);
e843b9e1 1444
a6279458
YH
1445 return rt;
1446};
1447
1448static struct rt6_info *ip6_route_redirect(struct in6_addr *dest,
1449 struct in6_addr *src,
1450 struct in6_addr *gateway,
1451 struct net_device *dev)
1452{
adaa70bb 1453 int flags = RT6_LOOKUP_F_HAS_SADDR;
c346dca1 1454 struct net *net = dev_net(dev);
a6279458
YH
1455 struct ip6rd_flowi rdfl = {
1456 .fl = {
1457 .oif = dev->ifindex,
5811662b
CG
1458 .fl6_dst = *dest,
1459 .fl6_src = *src,
a6279458 1460 },
a6279458 1461 };
adaa70bb 1462
86c36ce4
BH
1463 ipv6_addr_copy(&rdfl.gateway, gateway);
1464
adaa70bb
TG
1465 if (rt6_need_strict(dest))
1466 flags |= RT6_LOOKUP_F_IFACE;
a6279458 1467
5578689a 1468 return (struct rt6_info *)fib6_rule_lookup(net, (struct flowi *)&rdfl,
58f09b78 1469 flags, __ip6_route_redirect);
a6279458
YH
1470}
1471
1472void rt6_redirect(struct in6_addr *dest, struct in6_addr *src,
1473 struct in6_addr *saddr,
1474 struct neighbour *neigh, u8 *lladdr, int on_link)
1475{
1476 struct rt6_info *rt, *nrt = NULL;
1477 struct netevent_redirect netevent;
c346dca1 1478 struct net *net = dev_net(neigh->dev);
a6279458
YH
1479
1480 rt = ip6_route_redirect(dest, src, saddr, neigh->dev);
1481
8ed67789 1482 if (rt == net->ipv6.ip6_null_entry) {
1da177e4
LT
1483 if (net_ratelimit())
1484 printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
1485 "for redirect target\n");
a6279458 1486 goto out;
1da177e4
LT
1487 }
1488
1da177e4
LT
1489 /*
1490 * We have finally decided to accept it.
1491 */
1492
1ab1457c 1493 neigh_update(neigh, lladdr, NUD_STALE,
1da177e4
LT
1494 NEIGH_UPDATE_F_WEAK_OVERRIDE|
1495 NEIGH_UPDATE_F_OVERRIDE|
1496 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1497 NEIGH_UPDATE_F_ISROUTER))
1498 );
1499
1500 /*
1501 * Redirect received -> path was valid.
1502 * Look, redirects are sent only in response to data packets,
1503 * so that this nexthop apparently is reachable. --ANK
1504 */
d8d1f30b 1505 dst_confirm(&rt->dst);
1da177e4
LT
1506
1507 /* Duplicate redirect: silently ignore. */
d8d1f30b 1508 if (neigh == rt->dst.neighbour)
1da177e4
LT
1509 goto out;
1510
1511 nrt = ip6_rt_copy(rt);
1512 if (nrt == NULL)
1513 goto out;
1514
1515 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1516 if (on_link)
1517 nrt->rt6i_flags &= ~RTF_GATEWAY;
1518
1519 ipv6_addr_copy(&nrt->rt6i_dst.addr, dest);
1520 nrt->rt6i_dst.plen = 128;
d8d1f30b 1521 nrt->dst.flags |= DST_HOST;
1da177e4
LT
1522
1523 ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key);
1524 nrt->rt6i_nexthop = neigh_clone(neigh);
1525 /* Reset pmtu, it may be better */
d8d1f30b
CG
1526 nrt->dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(neigh->dev);
1527 nrt->dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dev_net(neigh->dev),
1528 dst_mtu(&nrt->dst));
1da177e4 1529
40e22e8f 1530 if (ip6_ins_rt(nrt))
1da177e4
LT
1531 goto out;
1532
d8d1f30b
CG
1533 netevent.old = &rt->dst;
1534 netevent.new = &nrt->dst;
8d71740c
TT
1535 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1536
1da177e4 1537 if (rt->rt6i_flags&RTF_CACHE) {
e0a1ad73 1538 ip6_del_rt(rt);
1da177e4
LT
1539 return;
1540 }
1541
1542out:
d8d1f30b 1543 dst_release(&rt->dst);
1da177e4
LT
1544}
1545
1546/*
1547 * Handle ICMP "packet too big" messages
1548 * i.e. Path MTU discovery
1549 */
1550
ae878ae2
1551static void rt6_do_pmtu_disc(struct in6_addr *daddr, struct in6_addr *saddr,
1552 struct net *net, u32 pmtu, int ifindex)
1da177e4
LT
1553{
1554 struct rt6_info *rt, *nrt;
1555 int allfrag = 0;
1556
ae878ae2 1557 rt = rt6_lookup(net, daddr, saddr, ifindex, 0);
1da177e4
LT
1558 if (rt == NULL)
1559 return;
1560
d8d1f30b 1561 if (pmtu >= dst_mtu(&rt->dst))
1da177e4
LT
1562 goto out;
1563
1564 if (pmtu < IPV6_MIN_MTU) {
1565 /*
1ab1457c 1566 * According to RFC2460, PMTU is set to the IPv6 Minimum Link
1da177e4
LT
1567 * MTU (1280) and a fragment header should always be included
1568 * after a node receiving Too Big message reporting PMTU is
1569 * less than the IPv6 Minimum Link MTU.
1570 */
1571 pmtu = IPV6_MIN_MTU;
1572 allfrag = 1;
1573 }
1574
1575 /* New mtu received -> path was valid.
1576 They are sent only in response to data packets,
1577 so that this nexthop apparently is reachable. --ANK
1578 */
d8d1f30b 1579 dst_confirm(&rt->dst);
1da177e4
LT
1580
1581 /* Host route. If it is static, it would be better
1582 not to override it, but add new one, so that
1583 when cache entry will expire old pmtu
1584 would return automatically.
1585 */
1586 if (rt->rt6i_flags & RTF_CACHE) {
d8d1f30b 1587 rt->dst.metrics[RTAX_MTU-1] = pmtu;
1da177e4 1588 if (allfrag)
d8d1f30b
CG
1589 rt->dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1590 dst_set_expires(&rt->dst, net->ipv6.sysctl.ip6_rt_mtu_expires);
1da177e4
LT
1591 rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
1592 goto out;
1593 }
1594
1595 /* Network route.
1596 Two cases are possible:
1597 1. It is connected route. Action: COW
1598 2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1599 */
d5315b50 1600 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
a1e78363 1601 nrt = rt6_alloc_cow(rt, daddr, saddr);
d5315b50
YH
1602 else
1603 nrt = rt6_alloc_clone(rt, daddr);
a1e78363 1604
d5315b50 1605 if (nrt) {
d8d1f30b 1606 nrt->dst.metrics[RTAX_MTU-1] = pmtu;
a1e78363 1607 if (allfrag)
d8d1f30b 1608 nrt->dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
a1e78363
YH
1609
1610 /* According to RFC 1981, detecting PMTU increase shouldn't be
1611 * happened within 5 mins, the recommended timer is 10 mins.
1612 * Here this route expiration time is set to ip6_rt_mtu_expires
1613 * which is 10 mins. After 10 mins the decreased pmtu is expired
1614 * and detecting PMTU increase will be automatically happened.
1615 */
d8d1f30b 1616 dst_set_expires(&nrt->dst, net->ipv6.sysctl.ip6_rt_mtu_expires);
a1e78363
YH
1617 nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
1618
40e22e8f 1619 ip6_ins_rt(nrt);
1da177e4 1620 }
1da177e4 1621out:
d8d1f30b 1622 dst_release(&rt->dst);
1da177e4
LT
1623}
1624
ae878ae2
1625void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr,
1626 struct net_device *dev, u32 pmtu)
1627{
1628 struct net *net = dev_net(dev);
1629
1630 /*
1631 * RFC 1981 states that a node "MUST reduce the size of the packets it
1632 * is sending along the path" that caused the Packet Too Big message.
1633 * Since it's not possible in the general case to determine which
1634 * interface was used to send the original packet, we update the MTU
1635 * on the interface that will be used to send future packets. We also
1636 * update the MTU on the interface that received the Packet Too Big in
1637 * case the original packet was forced out that interface with
1638 * SO_BINDTODEVICE or similar. This is the next best thing to the
1639 * correct behaviour, which would be to update the MTU on all
1640 * interfaces.
1641 */
1642 rt6_do_pmtu_disc(daddr, saddr, net, pmtu, 0);
1643 rt6_do_pmtu_disc(daddr, saddr, net, pmtu, dev->ifindex);
1644}
1645
1da177e4
LT
1646/*
1647 * Misc support functions
1648 */
1649
1650static struct rt6_info * ip6_rt_copy(struct rt6_info *ort)
1651{
c346dca1 1652 struct net *net = dev_net(ort->rt6i_dev);
86393e52 1653 struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops);
1da177e4
LT
1654
1655 if (rt) {
d8d1f30b
CG
1656 rt->dst.input = ort->dst.input;
1657 rt->dst.output = ort->dst.output;
1658
1659 memcpy(rt->dst.metrics, ort->dst.metrics, RTAX_MAX*sizeof(u32));
1660 rt->dst.error = ort->dst.error;
1661 rt->dst.dev = ort->dst.dev;
1662 if (rt->dst.dev)
1663 dev_hold(rt->dst.dev);
1da177e4
LT
1664 rt->rt6i_idev = ort->rt6i_idev;
1665 if (rt->rt6i_idev)
1666 in6_dev_hold(rt->rt6i_idev);
d8d1f30b 1667 rt->dst.lastuse = jiffies;
1da177e4
LT
1668 rt->rt6i_expires = 0;
1669
1670 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
1671 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
1672 rt->rt6i_metric = 0;
1673
1674 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1675#ifdef CONFIG_IPV6_SUBTREES
1676 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1677#endif
c71099ac 1678 rt->rt6i_table = ort->rt6i_table;
1da177e4
LT
1679 }
1680 return rt;
1681}
1682
70ceb4f5 1683#ifdef CONFIG_IPV6_ROUTE_INFO
efa2cea0
DL
1684static struct rt6_info *rt6_get_route_info(struct net *net,
1685 struct in6_addr *prefix, int prefixlen,
70ceb4f5
YH
1686 struct in6_addr *gwaddr, int ifindex)
1687{
1688 struct fib6_node *fn;
1689 struct rt6_info *rt = NULL;
c71099ac
TG
1690 struct fib6_table *table;
1691
efa2cea0 1692 table = fib6_get_table(net, RT6_TABLE_INFO);
c71099ac
TG
1693 if (table == NULL)
1694 return NULL;
70ceb4f5 1695
c71099ac
TG
1696 write_lock_bh(&table->tb6_lock);
1697 fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
70ceb4f5
YH
1698 if (!fn)
1699 goto out;
1700
d8d1f30b 1701 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
70ceb4f5
YH
1702 if (rt->rt6i_dev->ifindex != ifindex)
1703 continue;
1704 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1705 continue;
1706 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1707 continue;
d8d1f30b 1708 dst_hold(&rt->dst);
70ceb4f5
YH
1709 break;
1710 }
1711out:
c71099ac 1712 write_unlock_bh(&table->tb6_lock);
70ceb4f5
YH
1713 return rt;
1714}
1715
efa2cea0
DL
1716static struct rt6_info *rt6_add_route_info(struct net *net,
1717 struct in6_addr *prefix, int prefixlen,
70ceb4f5
YH
1718 struct in6_addr *gwaddr, int ifindex,
1719 unsigned pref)
1720{
86872cb5
TG
1721 struct fib6_config cfg = {
1722 .fc_table = RT6_TABLE_INFO,
238fc7ea 1723 .fc_metric = IP6_RT_PRIO_USER,
86872cb5
TG
1724 .fc_ifindex = ifindex,
1725 .fc_dst_len = prefixlen,
1726 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
1727 RTF_UP | RTF_PREF(pref),
efa2cea0
DL
1728 .fc_nlinfo.pid = 0,
1729 .fc_nlinfo.nlh = NULL,
1730 .fc_nlinfo.nl_net = net,
86872cb5
TG
1731 };
1732
1733 ipv6_addr_copy(&cfg.fc_dst, prefix);
1734 ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
70ceb4f5 1735
e317da96
YH
1736 /* We should treat it as a default route if prefix length is 0. */
1737 if (!prefixlen)
86872cb5 1738 cfg.fc_flags |= RTF_DEFAULT;
70ceb4f5 1739
86872cb5 1740 ip6_route_add(&cfg);
70ceb4f5 1741
efa2cea0 1742 return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex);
70ceb4f5
YH
1743}
1744#endif
1745
1da177e4 1746struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *dev)
1ab1457c 1747{
1da177e4 1748 struct rt6_info *rt;
c71099ac 1749 struct fib6_table *table;
1da177e4 1750
c346dca1 1751 table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT);
c71099ac
TG
1752 if (table == NULL)
1753 return NULL;
1da177e4 1754
c71099ac 1755 write_lock_bh(&table->tb6_lock);
d8d1f30b 1756 for (rt = table->tb6_root.leaf; rt; rt=rt->dst.rt6_next) {
1da177e4 1757 if (dev == rt->rt6i_dev &&
045927ff 1758 ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
1da177e4
LT
1759 ipv6_addr_equal(&rt->rt6i_gateway, addr))
1760 break;
1761 }
1762 if (rt)
d8d1f30b 1763 dst_hold(&rt->dst);
c71099ac 1764 write_unlock_bh(&table->tb6_lock);
1da177e4
LT
1765 return rt;
1766}
1767
1768struct rt6_info *rt6_add_dflt_router(struct in6_addr *gwaddr,
ebacaaa0
YH
1769 struct net_device *dev,
1770 unsigned int pref)
1da177e4 1771{
86872cb5
TG
1772 struct fib6_config cfg = {
1773 .fc_table = RT6_TABLE_DFLT,
238fc7ea 1774 .fc_metric = IP6_RT_PRIO_USER,
86872cb5
TG
1775 .fc_ifindex = dev->ifindex,
1776 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
1777 RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
5578689a
DL
1778 .fc_nlinfo.pid = 0,
1779 .fc_nlinfo.nlh = NULL,
c346dca1 1780 .fc_nlinfo.nl_net = dev_net(dev),
86872cb5 1781 };
1da177e4 1782
86872cb5 1783 ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
1da177e4 1784
86872cb5 1785 ip6_route_add(&cfg);
1da177e4 1786
1da177e4
LT
1787 return rt6_get_dflt_router(gwaddr, dev);
1788}
1789
7b4da532 1790void rt6_purge_dflt_routers(struct net *net)
1da177e4
LT
1791{
1792 struct rt6_info *rt;
c71099ac
TG
1793 struct fib6_table *table;
1794
1795 /* NOTE: Keep consistent with rt6_get_dflt_router */
7b4da532 1796 table = fib6_get_table(net, RT6_TABLE_DFLT);
c71099ac
TG
1797 if (table == NULL)
1798 return;
1da177e4
LT
1799
1800restart:
c71099ac 1801 read_lock_bh(&table->tb6_lock);
d8d1f30b 1802 for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
1da177e4 1803 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
d8d1f30b 1804 dst_hold(&rt->dst);
c71099ac 1805 read_unlock_bh(&table->tb6_lock);
e0a1ad73 1806 ip6_del_rt(rt);
1da177e4
LT
1807 goto restart;
1808 }
1809 }
c71099ac 1810 read_unlock_bh(&table->tb6_lock);
1da177e4
LT
1811}
1812
5578689a
DL
1813static void rtmsg_to_fib6_config(struct net *net,
1814 struct in6_rtmsg *rtmsg,
86872cb5
TG
1815 struct fib6_config *cfg)
1816{
1817 memset(cfg, 0, sizeof(*cfg));
1818
1819 cfg->fc_table = RT6_TABLE_MAIN;
1820 cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
1821 cfg->fc_metric = rtmsg->rtmsg_metric;
1822 cfg->fc_expires = rtmsg->rtmsg_info;
1823 cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
1824 cfg->fc_src_len = rtmsg->rtmsg_src_len;
1825 cfg->fc_flags = rtmsg->rtmsg_flags;
1826
5578689a 1827 cfg->fc_nlinfo.nl_net = net;
f1243c2d 1828
86872cb5
TG
1829 ipv6_addr_copy(&cfg->fc_dst, &rtmsg->rtmsg_dst);
1830 ipv6_addr_copy(&cfg->fc_src, &rtmsg->rtmsg_src);
1831 ipv6_addr_copy(&cfg->fc_gateway, &rtmsg->rtmsg_gateway);
1832}
1833
5578689a 1834int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
1da177e4 1835{
86872cb5 1836 struct fib6_config cfg;
1da177e4
LT
1837 struct in6_rtmsg rtmsg;
1838 int err;
1839
1840 switch(cmd) {
1841 case SIOCADDRT: /* Add a route */
1842 case SIOCDELRT: /* Delete a route */
1843 if (!capable(CAP_NET_ADMIN))
1844 return -EPERM;
1845 err = copy_from_user(&rtmsg, arg,
1846 sizeof(struct in6_rtmsg));
1847 if (err)
1848 return -EFAULT;
86872cb5 1849
5578689a 1850 rtmsg_to_fib6_config(net, &rtmsg, &cfg);
86872cb5 1851
1da177e4
LT
1852 rtnl_lock();
1853 switch (cmd) {
1854 case SIOCADDRT:
86872cb5 1855 err = ip6_route_add(&cfg);
1da177e4
LT
1856 break;
1857 case SIOCDELRT:
86872cb5 1858 err = ip6_route_del(&cfg);
1da177e4
LT
1859 break;
1860 default:
1861 err = -EINVAL;
1862 }
1863 rtnl_unlock();
1864
1865 return err;
3ff50b79 1866 }
1da177e4
LT
1867
1868 return -EINVAL;
1869}
1870
1871/*
1872 * Drop the packet on the floor
1873 */
1874
d5fdd6ba 1875static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
1da177e4 1876{
612f09e8 1877 int type;
adf30907 1878 struct dst_entry *dst = skb_dst(skb);
612f09e8
YH
1879 switch (ipstats_mib_noroutes) {
1880 case IPSTATS_MIB_INNOROUTES:
0660e03f 1881 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
45bb0060 1882 if (type == IPV6_ADDR_ANY) {
3bd653c8
DL
1883 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
1884 IPSTATS_MIB_INADDRERRORS);
612f09e8
YH
1885 break;
1886 }
1887 /* FALLTHROUGH */
1888 case IPSTATS_MIB_OUTNOROUTES:
3bd653c8
DL
1889 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
1890 ipstats_mib_noroutes);
612f09e8
YH
1891 break;
1892 }
3ffe533c 1893 icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
1da177e4
LT
1894 kfree_skb(skb);
1895 return 0;
1896}
1897
9ce8ade0
TG
1898static int ip6_pkt_discard(struct sk_buff *skb)
1899{
612f09e8 1900 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
9ce8ade0
TG
1901}
1902
20380731 1903static int ip6_pkt_discard_out(struct sk_buff *skb)
1da177e4 1904{
adf30907 1905 skb->dev = skb_dst(skb)->dev;
612f09e8 1906 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
1da177e4
LT
1907}
1908
6723ab54
DM
1909#ifdef CONFIG_IPV6_MULTIPLE_TABLES
1910
9ce8ade0
TG
1911static int ip6_pkt_prohibit(struct sk_buff *skb)
1912{
612f09e8 1913 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
9ce8ade0
TG
1914}
1915
1916static int ip6_pkt_prohibit_out(struct sk_buff *skb)
1917{
adf30907 1918 skb->dev = skb_dst(skb)->dev;
612f09e8 1919 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
9ce8ade0
TG
1920}
1921
6723ab54
DM
1922#endif
1923
1da177e4
LT
1924/*
1925 * Allocate a dst for local (unicast / anycast) address.
1926 */
1927
1928struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
1929 const struct in6_addr *addr,
1930 int anycast)
1931{
c346dca1 1932 struct net *net = dev_net(idev->dev);
86393e52 1933 struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops);
14deae41 1934 struct neighbour *neigh;
1da177e4 1935
40385653
BG
1936 if (rt == NULL) {
1937 if (net_ratelimit())
1938 pr_warning("IPv6: Maximum number of routes reached,"
1939 " consider increasing route/max_size.\n");
1da177e4 1940 return ERR_PTR(-ENOMEM);
40385653 1941 }
1da177e4 1942
5578689a 1943 dev_hold(net->loopback_dev);
1da177e4
LT
1944 in6_dev_hold(idev);
1945
d8d1f30b
CG
1946 rt->dst.flags = DST_HOST;
1947 rt->dst.input = ip6_input;
1948 rt->dst.output = ip6_output;
5578689a 1949 rt->rt6i_dev = net->loopback_dev;
1da177e4 1950 rt->rt6i_idev = idev;
d8d1f30b
CG
1951 rt->dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
1952 rt->dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, dst_mtu(&rt->dst));
1953 rt->dst.metrics[RTAX_HOPLIMIT-1] = -1;
1954 rt->dst.obsolete = -1;
1da177e4
LT
1955
1956 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
58c4fb86
YH
1957 if (anycast)
1958 rt->rt6i_flags |= RTF_ANYCAST;
1959 else
1da177e4 1960 rt->rt6i_flags |= RTF_LOCAL;
14deae41
DM
1961 neigh = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
1962 if (IS_ERR(neigh)) {
d8d1f30b 1963 dst_free(&rt->dst);
14deae41
DM
1964
1965 /* We are casting this because that is the return
1966 * value type. But an errno encoded pointer is the
1967 * same regardless of the underlying pointer type,
1968 * and that's what we are returning. So this is OK.
1969 */
1970 return (struct rt6_info *) neigh;
1da177e4 1971 }
14deae41 1972 rt->rt6i_nexthop = neigh;
1da177e4
LT
1973
1974 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
1975 rt->rt6i_dst.plen = 128;
5578689a 1976 rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL);
1da177e4 1977
d8d1f30b 1978 atomic_set(&rt->dst.__refcnt, 1);
1da177e4
LT
1979
1980 return rt;
1981}
1982
8ed67789
DL
1983struct arg_dev_net {
1984 struct net_device *dev;
1985 struct net *net;
1986};
1987
1da177e4
LT
1988static int fib6_ifdown(struct rt6_info *rt, void *arg)
1989{
8ed67789
DL
1990 struct net_device *dev = ((struct arg_dev_net *)arg)->dev;
1991 struct net *net = ((struct arg_dev_net *)arg)->net;
1992
1993 if (((void *)rt->rt6i_dev == dev || dev == NULL) &&
1994 rt != net->ipv6.ip6_null_entry) {
1da177e4
LT
1995 RT6_TRACE("deleted by ifdown %p\n", rt);
1996 return -1;
1997 }
1998 return 0;
1999}
2000
f3db4851 2001void rt6_ifdown(struct net *net, struct net_device *dev)
1da177e4 2002{
8ed67789
DL
2003 struct arg_dev_net adn = {
2004 .dev = dev,
2005 .net = net,
2006 };
2007
2008 fib6_clean_all(net, fib6_ifdown, 0, &adn);
1e493d19 2009 icmp6_clean_all(fib6_ifdown, &adn);
1da177e4
LT
2010}
2011
2012struct rt6_mtu_change_arg
2013{
2014 struct net_device *dev;
2015 unsigned mtu;
2016};
2017
2018static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
2019{
2020 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
2021 struct inet6_dev *idev;
c346dca1 2022 struct net *net = dev_net(arg->dev);
1da177e4
LT
2023
2024 /* In IPv6 pmtu discovery is not optional,
2025 so that RTAX_MTU lock cannot disable it.
2026 We still use this lock to block changes
2027 caused by addrconf/ndisc.
2028 */
2029
2030 idev = __in6_dev_get(arg->dev);
2031 if (idev == NULL)
2032 return 0;
2033
2034 /* For administrative MTU increase, there is no way to discover
2035 IPv6 PMTU increase, so PMTU increase should be updated here.
2036 Since RFC 1981 doesn't include administrative MTU increase
2037 update PMTU increase is a MUST. (i.e. jumbo frame)
2038 */
2039 /*
2040 If new MTU is less than route PMTU, this new MTU will be the
2041 lowest MTU in the path, update the route PMTU to reflect PMTU
2042 decreases; if new MTU is greater than route PMTU, and the
2043 old MTU is the lowest MTU in the path, update the route PMTU
2044 to reflect the increase. In this case if the other nodes' MTU
2045 also have the lowest MTU, TOO BIG MESSAGE will be lead to
2046 PMTU discouvery.
2047 */
2048 if (rt->rt6i_dev == arg->dev &&
d8d1f30b
CG
2049 !dst_metric_locked(&rt->dst, RTAX_MTU) &&
2050 (dst_mtu(&rt->dst) >= arg->mtu ||
2051 (dst_mtu(&rt->dst) < arg->mtu &&
2052 dst_mtu(&rt->dst) == idev->cnf.mtu6))) {
2053 rt->dst.metrics[RTAX_MTU-1] = arg->mtu;
2054 rt->dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, arg->mtu);
566cfd8f 2055 }
1da177e4
LT
2056 return 0;
2057}
2058
2059void rt6_mtu_change(struct net_device *dev, unsigned mtu)
2060{
c71099ac
TG
2061 struct rt6_mtu_change_arg arg = {
2062 .dev = dev,
2063 .mtu = mtu,
2064 };
1da177e4 2065
c346dca1 2066 fib6_clean_all(dev_net(dev), rt6_mtu_change_route, 0, &arg);
1da177e4
LT
2067}
2068
ef7c79ed 2069static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
5176f91e 2070 [RTA_GATEWAY] = { .len = sizeof(struct in6_addr) },
86872cb5 2071 [RTA_OIF] = { .type = NLA_U32 },
ab364a6f 2072 [RTA_IIF] = { .type = NLA_U32 },
86872cb5
TG
2073 [RTA_PRIORITY] = { .type = NLA_U32 },
2074 [RTA_METRICS] = { .type = NLA_NESTED },
2075};
2076
2077static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2078 struct fib6_config *cfg)
1da177e4 2079{
86872cb5
TG
2080 struct rtmsg *rtm;
2081 struct nlattr *tb[RTA_MAX+1];
2082 int err;
1da177e4 2083
86872cb5
TG
2084 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2085 if (err < 0)
2086 goto errout;
1da177e4 2087
86872cb5
TG
2088 err = -EINVAL;
2089 rtm = nlmsg_data(nlh);
2090 memset(cfg, 0, sizeof(*cfg));
2091
2092 cfg->fc_table = rtm->rtm_table;
2093 cfg->fc_dst_len = rtm->rtm_dst_len;
2094 cfg->fc_src_len = rtm->rtm_src_len;
2095 cfg->fc_flags = RTF_UP;
2096 cfg->fc_protocol = rtm->rtm_protocol;
2097
2098 if (rtm->rtm_type == RTN_UNREACHABLE)
2099 cfg->fc_flags |= RTF_REJECT;
2100
ab79ad14
2101 if (rtm->rtm_type == RTN_LOCAL)
2102 cfg->fc_flags |= RTF_LOCAL;
2103
86872cb5
TG
2104 cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
2105 cfg->fc_nlinfo.nlh = nlh;
3b1e0a65 2106 cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
86872cb5
TG
2107
2108 if (tb[RTA_GATEWAY]) {
2109 nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
2110 cfg->fc_flags |= RTF_GATEWAY;
1da177e4 2111 }
86872cb5
TG
2112
2113 if (tb[RTA_DST]) {
2114 int plen = (rtm->rtm_dst_len + 7) >> 3;
2115
2116 if (nla_len(tb[RTA_DST]) < plen)
2117 goto errout;
2118
2119 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
1da177e4 2120 }
86872cb5
TG
2121
2122 if (tb[RTA_SRC]) {
2123 int plen = (rtm->rtm_src_len + 7) >> 3;
2124
2125 if (nla_len(tb[RTA_SRC]) < plen)
2126 goto errout;
2127
2128 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
1da177e4 2129 }
86872cb5
TG
2130
2131 if (tb[RTA_OIF])
2132 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2133
2134 if (tb[RTA_PRIORITY])
2135 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2136
2137 if (tb[RTA_METRICS]) {
2138 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2139 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
1da177e4 2140 }
86872cb5
TG
2141
2142 if (tb[RTA_TABLE])
2143 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2144
2145 err = 0;
2146errout:
2147 return err;
1da177e4
LT
2148}
2149
c127ea2c 2150static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1da177e4 2151{
86872cb5
TG
2152 struct fib6_config cfg;
2153 int err;
1da177e4 2154
86872cb5
TG
2155 err = rtm_to_fib6_config(skb, nlh, &cfg);
2156 if (err < 0)
2157 return err;
2158
2159 return ip6_route_del(&cfg);
1da177e4
LT
2160}
2161
c127ea2c 2162static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1da177e4 2163{
86872cb5
TG
2164 struct fib6_config cfg;
2165 int err;
1da177e4 2166
86872cb5
TG
2167 err = rtm_to_fib6_config(skb, nlh, &cfg);
2168 if (err < 0)
2169 return err;
2170
2171 return ip6_route_add(&cfg);
1da177e4
LT
2172}
2173
339bf98f
TG
2174static inline size_t rt6_nlmsg_size(void)
2175{
2176 return NLMSG_ALIGN(sizeof(struct rtmsg))
2177 + nla_total_size(16) /* RTA_SRC */
2178 + nla_total_size(16) /* RTA_DST */
2179 + nla_total_size(16) /* RTA_GATEWAY */
2180 + nla_total_size(16) /* RTA_PREFSRC */
2181 + nla_total_size(4) /* RTA_TABLE */
2182 + nla_total_size(4) /* RTA_IIF */
2183 + nla_total_size(4) /* RTA_OIF */
2184 + nla_total_size(4) /* RTA_PRIORITY */
6a2b9ce0 2185 + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
339bf98f
TG
2186 + nla_total_size(sizeof(struct rta_cacheinfo));
2187}
2188
191cd582
BH
2189static int rt6_fill_node(struct net *net,
2190 struct sk_buff *skb, struct rt6_info *rt,
0d51aa80
JHS
2191 struct in6_addr *dst, struct in6_addr *src,
2192 int iif, int type, u32 pid, u32 seq,
7bc570c8 2193 int prefix, int nowait, unsigned int flags)
1da177e4
LT
2194{
2195 struct rtmsg *rtm;
2d7202bf 2196 struct nlmsghdr *nlh;
e3703b3d 2197 long expires;
9e762a4a 2198 u32 table;
1da177e4
LT
2199
2200 if (prefix) { /* user wants prefix routes only */
2201 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
2202 /* success since this is not a prefix route */
2203 return 1;
2204 }
2205 }
2206
2d7202bf
TG
2207 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*rtm), flags);
2208 if (nlh == NULL)
26932566 2209 return -EMSGSIZE;
2d7202bf
TG
2210
2211 rtm = nlmsg_data(nlh);
1da177e4
LT
2212 rtm->rtm_family = AF_INET6;
2213 rtm->rtm_dst_len = rt->rt6i_dst.plen;
2214 rtm->rtm_src_len = rt->rt6i_src.plen;
2215 rtm->rtm_tos = 0;
c71099ac 2216 if (rt->rt6i_table)
9e762a4a 2217 table = rt->rt6i_table->tb6_id;
c71099ac 2218 else
9e762a4a
PM
2219 table = RT6_TABLE_UNSPEC;
2220 rtm->rtm_table = table;
2d7202bf 2221 NLA_PUT_U32(skb, RTA_TABLE, table);
1da177e4
LT
2222 if (rt->rt6i_flags&RTF_REJECT)
2223 rtm->rtm_type = RTN_UNREACHABLE;
ab79ad14
2224 else if (rt->rt6i_flags&RTF_LOCAL)
2225 rtm->rtm_type = RTN_LOCAL;
1da177e4
LT
2226 else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK))
2227 rtm->rtm_type = RTN_LOCAL;
2228 else
2229 rtm->rtm_type = RTN_UNICAST;
2230 rtm->rtm_flags = 0;
2231 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2232 rtm->rtm_protocol = rt->rt6i_protocol;
2233 if (rt->rt6i_flags&RTF_DYNAMIC)
2234 rtm->rtm_protocol = RTPROT_REDIRECT;
2235 else if (rt->rt6i_flags & RTF_ADDRCONF)
2236 rtm->rtm_protocol = RTPROT_KERNEL;
2237 else if (rt->rt6i_flags&RTF_DEFAULT)
2238 rtm->rtm_protocol = RTPROT_RA;
2239
2240 if (rt->rt6i_flags&RTF_CACHE)
2241 rtm->rtm_flags |= RTM_F_CLONED;
2242
2243 if (dst) {
2d7202bf 2244 NLA_PUT(skb, RTA_DST, 16, dst);
1ab1457c 2245 rtm->rtm_dst_len = 128;
1da177e4 2246 } else if (rtm->rtm_dst_len)
2d7202bf 2247 NLA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr);
1da177e4
LT
2248#ifdef CONFIG_IPV6_SUBTREES
2249 if (src) {
2d7202bf 2250 NLA_PUT(skb, RTA_SRC, 16, src);
1ab1457c 2251 rtm->rtm_src_len = 128;
1da177e4 2252 } else if (rtm->rtm_src_len)
2d7202bf 2253 NLA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr);
1da177e4 2254#endif
7bc570c8
YH
2255 if (iif) {
2256#ifdef CONFIG_IPV6_MROUTE
2257 if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
8229efda 2258 int err = ip6mr_get_route(net, skb, rtm, nowait);
7bc570c8
YH
2259 if (err <= 0) {
2260 if (!nowait) {
2261 if (err == 0)
2262 return 0;
2263 goto nla_put_failure;
2264 } else {
2265 if (err == -EMSGSIZE)
2266 goto nla_put_failure;
2267 }
2268 }
2269 } else
2270#endif
2271 NLA_PUT_U32(skb, RTA_IIF, iif);
2272 } else if (dst) {
d8d1f30b 2273 struct inet6_dev *idev = ip6_dst_idev(&rt->dst);
1da177e4 2274 struct in6_addr saddr_buf;
191cd582 2275 if (ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
7cbca67c 2276 dst, 0, &saddr_buf) == 0)
2d7202bf 2277 NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
1da177e4 2278 }
2d7202bf 2279
d8d1f30b 2280 if (rtnetlink_put_metrics(skb, rt->dst.metrics) < 0)
2d7202bf
TG
2281 goto nla_put_failure;
2282
d8d1f30b
CG
2283 if (rt->dst.neighbour)
2284 NLA_PUT(skb, RTA_GATEWAY, 16, &rt->dst.neighbour->primary_key);
2d7202bf 2285
d8d1f30b 2286 if (rt->dst.dev)
2d7202bf
TG
2287 NLA_PUT_U32(skb, RTA_OIF, rt->rt6i_dev->ifindex);
2288
2289 NLA_PUT_U32(skb, RTA_PRIORITY, rt->rt6i_metric);
e3703b3d 2290
36e3deae
YH
2291 if (!(rt->rt6i_flags & RTF_EXPIRES))
2292 expires = 0;
2293 else if (rt->rt6i_expires - jiffies < INT_MAX)
2294 expires = rt->rt6i_expires - jiffies;
2295 else
2296 expires = INT_MAX;
69cdf8f9 2297
d8d1f30b
CG
2298 if (rtnl_put_cacheinfo(skb, &rt->dst, 0, 0, 0,
2299 expires, rt->dst.error) < 0)
e3703b3d 2300 goto nla_put_failure;
2d7202bf
TG
2301
2302 return nlmsg_end(skb, nlh);
2303
2304nla_put_failure:
26932566
PM
2305 nlmsg_cancel(skb, nlh);
2306 return -EMSGSIZE;
1da177e4
LT
2307}
2308
1b43af54 2309int rt6_dump_route(struct rt6_info *rt, void *p_arg)
1da177e4
LT
2310{
2311 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
2312 int prefix;
2313
2d7202bf
TG
2314 if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
2315 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
1da177e4
LT
2316 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
2317 } else
2318 prefix = 0;
2319
191cd582
BH
2320 return rt6_fill_node(arg->net,
2321 arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
1da177e4 2322 NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
7bc570c8 2323 prefix, 0, NLM_F_MULTI);
1da177e4
LT
2324}
2325
c127ea2c 2326static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
1da177e4 2327{
3b1e0a65 2328 struct net *net = sock_net(in_skb->sk);
ab364a6f
TG
2329 struct nlattr *tb[RTA_MAX+1];
2330 struct rt6_info *rt;
1da177e4 2331 struct sk_buff *skb;
ab364a6f 2332 struct rtmsg *rtm;
1da177e4 2333 struct flowi fl;
ab364a6f 2334 int err, iif = 0;
1da177e4 2335
ab364a6f
TG
2336 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2337 if (err < 0)
2338 goto errout;
1da177e4 2339
ab364a6f 2340 err = -EINVAL;
1da177e4 2341 memset(&fl, 0, sizeof(fl));
1da177e4 2342
ab364a6f
TG
2343 if (tb[RTA_SRC]) {
2344 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2345 goto errout;
2346
2347 ipv6_addr_copy(&fl.fl6_src, nla_data(tb[RTA_SRC]));
2348 }
2349
2350 if (tb[RTA_DST]) {
2351 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2352 goto errout;
2353
2354 ipv6_addr_copy(&fl.fl6_dst, nla_data(tb[RTA_DST]));
2355 }
2356
2357 if (tb[RTA_IIF])
2358 iif = nla_get_u32(tb[RTA_IIF]);
2359
2360 if (tb[RTA_OIF])
2361 fl.oif = nla_get_u32(tb[RTA_OIF]);
1da177e4
LT
2362
2363 if (iif) {
2364 struct net_device *dev;
5578689a 2365 dev = __dev_get_by_index(net, iif);
1da177e4
LT
2366 if (!dev) {
2367 err = -ENODEV;
ab364a6f 2368 goto errout;
1da177e4
LT
2369 }
2370 }
2371
ab364a6f
TG
2372 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
2373 if (skb == NULL) {
2374 err = -ENOBUFS;
2375 goto errout;
2376 }
1da177e4 2377
ab364a6f
TG
2378 /* Reserve room for dummy headers, this skb can pass
2379 through good chunk of routing engine.
2380 */
459a98ed 2381 skb_reset_mac_header(skb);
ab364a6f 2382 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
1da177e4 2383
8a3edd80 2384 rt = (struct rt6_info*) ip6_route_output(net, NULL, &fl);
d8d1f30b 2385 skb_dst_set(skb, &rt->dst);
1da177e4 2386
191cd582 2387 err = rt6_fill_node(net, skb, rt, &fl.fl6_dst, &fl.fl6_src, iif,
1da177e4 2388 RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
7bc570c8 2389 nlh->nlmsg_seq, 0, 0, 0);
1da177e4 2390 if (err < 0) {
ab364a6f
TG
2391 kfree_skb(skb);
2392 goto errout;
1da177e4
LT
2393 }
2394
5578689a 2395 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).pid);
ab364a6f 2396errout:
1da177e4 2397 return err;
1da177e4
LT
2398}
2399
86872cb5 2400void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
1da177e4
LT
2401{
2402 struct sk_buff *skb;
5578689a 2403 struct net *net = info->nl_net;
528c4ceb
DL
2404 u32 seq;
2405 int err;
2406
2407 err = -ENOBUFS;
2408 seq = info->nlh != NULL ? info->nlh->nlmsg_seq : 0;
86872cb5 2409
339bf98f 2410 skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
21713ebc
TG
2411 if (skb == NULL)
2412 goto errout;
2413
191cd582 2414 err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
7bc570c8 2415 event, info->pid, seq, 0, 0, 0);
26932566
PM
2416 if (err < 0) {
2417 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
2418 WARN_ON(err == -EMSGSIZE);
2419 kfree_skb(skb);
2420 goto errout;
2421 }
1ce85fe4
PNA
2422 rtnl_notify(skb, net, info->pid, RTNLGRP_IPV6_ROUTE,
2423 info->nlh, gfp_any());
2424 return;
21713ebc
TG
2425errout:
2426 if (err < 0)
5578689a 2427 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
1da177e4
LT
2428}
2429
8ed67789
DL
2430static int ip6_route_dev_notify(struct notifier_block *this,
2431 unsigned long event, void *data)
2432{
2433 struct net_device *dev = (struct net_device *)data;
c346dca1 2434 struct net *net = dev_net(dev);
8ed67789
DL
2435
2436 if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
d8d1f30b 2437 net->ipv6.ip6_null_entry->dst.dev = dev;
8ed67789
DL
2438 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
2439#ifdef CONFIG_IPV6_MULTIPLE_TABLES
d8d1f30b 2440 net->ipv6.ip6_prohibit_entry->dst.dev = dev;
8ed67789 2441 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
d8d1f30b 2442 net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
8ed67789
DL
2443 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
2444#endif
2445 }
2446
2447 return NOTIFY_OK;
2448}
2449
1da177e4
LT
2450/*
2451 * /proc
2452 */
2453
2454#ifdef CONFIG_PROC_FS
2455
2456#define RT6_INFO_LEN (32 + 4 + 32 + 4 + 32 + 40 + 5 + 1)
2457
2458struct rt6_proc_arg
2459{
2460 char *buffer;
2461 int offset;
2462 int length;
2463 int skip;
2464 int len;
2465};
2466
2467static int rt6_info_route(struct rt6_info *rt, void *p_arg)
2468{
33120b30 2469 struct seq_file *m = p_arg;
1da177e4 2470
4b7a4274 2471 seq_printf(m, "%pi6 %02x ", &rt->rt6i_dst.addr, rt->rt6i_dst.plen);
1da177e4
LT
2472
2473#ifdef CONFIG_IPV6_SUBTREES
4b7a4274 2474 seq_printf(m, "%pi6 %02x ", &rt->rt6i_src.addr, rt->rt6i_src.plen);
1da177e4 2475#else
33120b30 2476 seq_puts(m, "00000000000000000000000000000000 00 ");
1da177e4
LT
2477#endif
2478
2479 if (rt->rt6i_nexthop) {
4b7a4274 2480 seq_printf(m, "%pi6", rt->rt6i_nexthop->primary_key);
1da177e4 2481 } else {
33120b30 2482 seq_puts(m, "00000000000000000000000000000000");
1da177e4 2483 }
33120b30 2484 seq_printf(m, " %08x %08x %08x %08x %8s\n",
d8d1f30b
CG
2485 rt->rt6i_metric, atomic_read(&rt->dst.__refcnt),
2486 rt->dst.__use, rt->rt6i_flags,
33120b30 2487 rt->rt6i_dev ? rt->rt6i_dev->name : "");
1da177e4
LT
2488 return 0;
2489}
2490
33120b30 2491static int ipv6_route_show(struct seq_file *m, void *v)
1da177e4 2492{
f3db4851
DL
2493 struct net *net = (struct net *)m->private;
2494 fib6_clean_all(net, rt6_info_route, 0, m);
33120b30
AD
2495 return 0;
2496}
1da177e4 2497
33120b30
AD
2498static int ipv6_route_open(struct inode *inode, struct file *file)
2499{
de05c557 2500 return single_open_net(inode, file, ipv6_route_show);
f3db4851
DL
2501}
2502
33120b30
AD
2503static const struct file_operations ipv6_route_proc_fops = {
2504 .owner = THIS_MODULE,
2505 .open = ipv6_route_open,
2506 .read = seq_read,
2507 .llseek = seq_lseek,
b6fcbdb4 2508 .release = single_release_net,
33120b30
AD
2509};
2510
1da177e4
LT
2511static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2512{
69ddb805 2513 struct net *net = (struct net *)seq->private;
1da177e4 2514 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
69ddb805
DL
2515 net->ipv6.rt6_stats->fib_nodes,
2516 net->ipv6.rt6_stats->fib_route_nodes,
2517 net->ipv6.rt6_stats->fib_rt_alloc,
2518 net->ipv6.rt6_stats->fib_rt_entries,
2519 net->ipv6.rt6_stats->fib_rt_cache,
fc66f95c 2520 dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
69ddb805 2521 net->ipv6.rt6_stats->fib_discarded_routes);
1da177e4
LT
2522
2523 return 0;
2524}
2525
2526static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2527{
de05c557 2528 return single_open_net(inode, file, rt6_stats_seq_show);
69ddb805
DL
2529}
2530
9a32144e 2531static const struct file_operations rt6_stats_seq_fops = {
1da177e4
LT
2532 .owner = THIS_MODULE,
2533 .open = rt6_stats_seq_open,
2534 .read = seq_read,
2535 .llseek = seq_lseek,
b6fcbdb4 2536 .release = single_release_net,
1da177e4
LT
2537};
2538#endif /* CONFIG_PROC_FS */
2539
2540#ifdef CONFIG_SYSCTL
2541
1da177e4 2542static
8d65af78 2543int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write,
1da177e4
LT
2544 void __user *buffer, size_t *lenp, loff_t *ppos)
2545{
5b7c931d
DL
2546 struct net *net = current->nsproxy->net_ns;
2547 int delay = net->ipv6.sysctl.flush_delay;
1da177e4 2548 if (write) {
8d65af78 2549 proc_dointvec(ctl, write, buffer, lenp, ppos);
5b7c931d 2550 fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay, net);
1da177e4
LT
2551 return 0;
2552 } else
2553 return -EINVAL;
2554}
2555
760f2d01 2556ctl_table ipv6_route_table_template[] = {
1ab1457c 2557 {
1da177e4 2558 .procname = "flush",
4990509f 2559 .data = &init_net.ipv6.sysctl.flush_delay,
1da177e4 2560 .maxlen = sizeof(int),
89c8b3a1 2561 .mode = 0200,
6d9f239a 2562 .proc_handler = ipv6_sysctl_rtcache_flush
1da177e4
LT
2563 },
2564 {
1da177e4 2565 .procname = "gc_thresh",
9a7ec3a9 2566 .data = &ip6_dst_ops_template.gc_thresh,
1da177e4
LT
2567 .maxlen = sizeof(int),
2568 .mode = 0644,
6d9f239a 2569 .proc_handler = proc_dointvec,
1da177e4
LT
2570 },
2571 {
1da177e4 2572 .procname = "max_size",
4990509f 2573 .data = &init_net.ipv6.sysctl.ip6_rt_max_size,
1da177e4
LT
2574 .maxlen = sizeof(int),
2575 .mode = 0644,
6d9f239a 2576 .proc_handler = proc_dointvec,
1da177e4
LT
2577 },
2578 {
1da177e4 2579 .procname = "gc_min_interval",
4990509f 2580 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
1da177e4
LT
2581 .maxlen = sizeof(int),
2582 .mode = 0644,
6d9f239a 2583 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
2584 },
2585 {
1da177e4 2586 .procname = "gc_timeout",
4990509f 2587 .data = &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
1da177e4
LT
2588 .maxlen = sizeof(int),
2589 .mode = 0644,
6d9f239a 2590 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
2591 },
2592 {
1da177e4 2593 .procname = "gc_interval",
4990509f 2594 .data = &init_net.ipv6.sysctl.ip6_rt_gc_interval,
1da177e4
LT
2595 .maxlen = sizeof(int),
2596 .mode = 0644,
6d9f239a 2597 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
2598 },
2599 {
1da177e4 2600 .procname = "gc_elasticity",
4990509f 2601 .data = &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
1da177e4
LT
2602 .maxlen = sizeof(int),
2603 .mode = 0644,
f3d3f616 2604 .proc_handler = proc_dointvec,
1da177e4
LT
2605 },
2606 {
1da177e4 2607 .procname = "mtu_expires",
4990509f 2608 .data = &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
1da177e4
LT
2609 .maxlen = sizeof(int),
2610 .mode = 0644,
6d9f239a 2611 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
2612 },
2613 {
1da177e4 2614 .procname = "min_adv_mss",
4990509f 2615 .data = &init_net.ipv6.sysctl.ip6_rt_min_advmss,
1da177e4
LT
2616 .maxlen = sizeof(int),
2617 .mode = 0644,
f3d3f616 2618 .proc_handler = proc_dointvec,
1da177e4
LT
2619 },
2620 {
1da177e4 2621 .procname = "gc_min_interval_ms",
4990509f 2622 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
1da177e4
LT
2623 .maxlen = sizeof(int),
2624 .mode = 0644,
6d9f239a 2625 .proc_handler = proc_dointvec_ms_jiffies,
1da177e4 2626 },
f8572d8f 2627 { }
1da177e4
LT
2628};
2629
2c8c1e72 2630struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
760f2d01
DL
2631{
2632 struct ctl_table *table;
2633
2634 table = kmemdup(ipv6_route_table_template,
2635 sizeof(ipv6_route_table_template),
2636 GFP_KERNEL);
5ee09105
YH
2637
2638 if (table) {
2639 table[0].data = &net->ipv6.sysctl.flush_delay;
86393e52 2640 table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
5ee09105
YH
2641 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
2642 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2643 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
2644 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
2645 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
2646 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
2647 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
9c69fabe 2648 table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
5ee09105
YH
2649 }
2650
760f2d01
DL
2651 return table;
2652}
1da177e4
LT
2653#endif
2654
2c8c1e72 2655static int __net_init ip6_route_net_init(struct net *net)
cdb18761 2656{
633d424b 2657 int ret = -ENOMEM;
8ed67789 2658
86393e52
AD
2659 memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
2660 sizeof(net->ipv6.ip6_dst_ops));
f2fc6a54 2661
fc66f95c
ED
2662 if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
2663 goto out_ip6_dst_ops;
2664
8ed67789
DL
2665 net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
2666 sizeof(*net->ipv6.ip6_null_entry),
2667 GFP_KERNEL);
2668 if (!net->ipv6.ip6_null_entry)
fc66f95c 2669 goto out_ip6_dst_entries;
d8d1f30b 2670 net->ipv6.ip6_null_entry->dst.path =
8ed67789 2671 (struct dst_entry *)net->ipv6.ip6_null_entry;
d8d1f30b 2672 net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
8ed67789
DL
2673
2674#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2675 net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
2676 sizeof(*net->ipv6.ip6_prohibit_entry),
2677 GFP_KERNEL);
68fffc67
PZ
2678 if (!net->ipv6.ip6_prohibit_entry)
2679 goto out_ip6_null_entry;
d8d1f30b 2680 net->ipv6.ip6_prohibit_entry->dst.path =
8ed67789 2681 (struct dst_entry *)net->ipv6.ip6_prohibit_entry;
d8d1f30b 2682 net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
8ed67789
DL
2683
2684 net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
2685 sizeof(*net->ipv6.ip6_blk_hole_entry),
2686 GFP_KERNEL);
68fffc67
PZ
2687 if (!net->ipv6.ip6_blk_hole_entry)
2688 goto out_ip6_prohibit_entry;
d8d1f30b 2689 net->ipv6.ip6_blk_hole_entry->dst.path =
8ed67789 2690 (struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
d8d1f30b 2691 net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
8ed67789
DL
2692#endif
2693
b339a47c
PZ
2694 net->ipv6.sysctl.flush_delay = 0;
2695 net->ipv6.sysctl.ip6_rt_max_size = 4096;
2696 net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
2697 net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
2698 net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
2699 net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
2700 net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
2701 net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
2702
cdb18761
DL
2703#ifdef CONFIG_PROC_FS
2704 proc_net_fops_create(net, "ipv6_route", 0, &ipv6_route_proc_fops);
2705 proc_net_fops_create(net, "rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2706#endif
6891a346
BT
2707 net->ipv6.ip6_rt_gc_expire = 30*HZ;
2708
8ed67789
DL
2709 ret = 0;
2710out:
2711 return ret;
f2fc6a54 2712
68fffc67
PZ
2713#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2714out_ip6_prohibit_entry:
2715 kfree(net->ipv6.ip6_prohibit_entry);
2716out_ip6_null_entry:
2717 kfree(net->ipv6.ip6_null_entry);
2718#endif
fc66f95c
ED
2719out_ip6_dst_entries:
2720 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
f2fc6a54 2721out_ip6_dst_ops:
f2fc6a54 2722 goto out;
cdb18761
DL
2723}
2724
2c8c1e72 2725static void __net_exit ip6_route_net_exit(struct net *net)
cdb18761
DL
2726{
2727#ifdef CONFIG_PROC_FS
2728 proc_net_remove(net, "ipv6_route");
2729 proc_net_remove(net, "rt6_stats");
2730#endif
8ed67789
DL
2731 kfree(net->ipv6.ip6_null_entry);
2732#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2733 kfree(net->ipv6.ip6_prohibit_entry);
2734 kfree(net->ipv6.ip6_blk_hole_entry);
2735#endif
41bb78b4 2736 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
cdb18761
DL
2737}
2738
2739static struct pernet_operations ip6_route_net_ops = {
2740 .init = ip6_route_net_init,
2741 .exit = ip6_route_net_exit,
2742};
2743
8ed67789
DL
2744static struct notifier_block ip6_route_dev_notifier = {
2745 .notifier_call = ip6_route_dev_notify,
2746 .priority = 0,
2747};
2748
433d49c3 2749int __init ip6_route_init(void)
1da177e4 2750{
433d49c3
DL
2751 int ret;
2752
9a7ec3a9
DL
2753 ret = -ENOMEM;
2754 ip6_dst_ops_template.kmem_cachep =
e5d679f3 2755 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
f845ab6b 2756 SLAB_HWCACHE_ALIGN, NULL);
9a7ec3a9 2757 if (!ip6_dst_ops_template.kmem_cachep)
c19a28e1 2758 goto out;
14e50e57 2759
fc66f95c 2760 ret = dst_entries_init(&ip6_dst_blackhole_ops);
8ed67789 2761 if (ret)
bdb3289f 2762 goto out_kmem_cache;
bdb3289f 2763
fc66f95c
ED
2764 ret = register_pernet_subsys(&ip6_route_net_ops);
2765 if (ret)
2766 goto out_dst_entries;
2767
5dc121e9
AE
2768 ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
2769
8ed67789
DL
2770 /* Registering of the loopback is done before this portion of code,
2771 * the loopback reference in rt6_info will not be taken, do it
2772 * manually for init_net */
d8d1f30b 2773 init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
8ed67789
DL
2774 init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2775 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
d8d1f30b 2776 init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
8ed67789 2777 init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
d8d1f30b 2778 init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
8ed67789
DL
2779 init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2780 #endif
433d49c3
DL
2781 ret = fib6_init();
2782 if (ret)
8ed67789 2783 goto out_register_subsys;
433d49c3 2784
433d49c3
DL
2785 ret = xfrm6_init();
2786 if (ret)
cdb18761 2787 goto out_fib6_init;
c35b7e72 2788
433d49c3
DL
2789 ret = fib6_rules_init();
2790 if (ret)
2791 goto xfrm6_init;
7e5449c2 2792
433d49c3
DL
2793 ret = -ENOBUFS;
2794 if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL) ||
2795 __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL) ||
2796 __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL))
2797 goto fib6_rules_init;
c127ea2c 2798
8ed67789 2799 ret = register_netdevice_notifier(&ip6_route_dev_notifier);
cdb18761
DL
2800 if (ret)
2801 goto fib6_rules_init;
8ed67789 2802
433d49c3
DL
2803out:
2804 return ret;
2805
2806fib6_rules_init:
433d49c3
DL
2807 fib6_rules_cleanup();
2808xfrm6_init:
433d49c3 2809 xfrm6_fini();
433d49c3 2810out_fib6_init:
433d49c3 2811 fib6_gc_cleanup();
8ed67789
DL
2812out_register_subsys:
2813 unregister_pernet_subsys(&ip6_route_net_ops);
fc66f95c
ED
2814out_dst_entries:
2815 dst_entries_destroy(&ip6_dst_blackhole_ops);
433d49c3 2816out_kmem_cache:
f2fc6a54 2817 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
433d49c3 2818 goto out;
1da177e4
LT
2819}
2820
2821void ip6_route_cleanup(void)
2822{
8ed67789 2823 unregister_netdevice_notifier(&ip6_route_dev_notifier);
101367c2 2824 fib6_rules_cleanup();
1da177e4 2825 xfrm6_fini();
1da177e4 2826 fib6_gc_cleanup();
8ed67789 2827 unregister_pernet_subsys(&ip6_route_net_ops);
41bb78b4 2828 dst_entries_destroy(&ip6_dst_blackhole_ops);
f2fc6a54 2829 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
1da177e4 2830}