]> bbs.cooldavid.org Git - net-next-2.6.git/blame - net/ipv6/route.c
neigh: Protect neigh->ha[] with a seqlock
[net-next-2.6.git] / net / ipv6 / route.c
CommitLineData
1da177e4
LT
1/*
2 * Linux INET6 implementation
3 * FIB front-end.
4 *
5 * Authors:
1ab1457c 6 * Pedro Roque <roque@di.fc.ul.pt>
1da177e4 7 *
1da177e4
LT
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
12 */
13
14/* Changes:
15 *
16 * YOSHIFUJI Hideaki @USAGI
17 * reworked default router selection.
18 * - respect outgoing interface
19 * - select from (probably) reachable routers (i.e.
20 * routers in REACHABLE, STALE, DELAY or PROBE states).
21 * - always select the same router if it is (probably)
22 * reachable. otherwise, round-robin the list.
c0bece9f
YH
23 * Ville Nuorvala
24 * Fixed routing subtrees.
1da177e4
LT
25 */
26
4fc268d2 27#include <linux/capability.h>
1da177e4
LT
28#include <linux/errno.h>
29#include <linux/types.h>
30#include <linux/times.h>
31#include <linux/socket.h>
32#include <linux/sockios.h>
33#include <linux/net.h>
34#include <linux/route.h>
35#include <linux/netdevice.h>
36#include <linux/in6.h>
7bc570c8 37#include <linux/mroute6.h>
1da177e4 38#include <linux/init.h>
1da177e4 39#include <linux/if_arp.h>
1da177e4
LT
40#include <linux/proc_fs.h>
41#include <linux/seq_file.h>
5b7c931d 42#include <linux/nsproxy.h>
5a0e3ad6 43#include <linux/slab.h>
457c4cbc 44#include <net/net_namespace.h>
1da177e4
LT
45#include <net/snmp.h>
46#include <net/ipv6.h>
47#include <net/ip6_fib.h>
48#include <net/ip6_route.h>
49#include <net/ndisc.h>
50#include <net/addrconf.h>
51#include <net/tcp.h>
52#include <linux/rtnetlink.h>
53#include <net/dst.h>
54#include <net/xfrm.h>
8d71740c 55#include <net/netevent.h>
21713ebc 56#include <net/netlink.h>
1da177e4
LT
57
58#include <asm/uaccess.h>
59
60#ifdef CONFIG_SYSCTL
61#include <linux/sysctl.h>
62#endif
63
64/* Set to 3 to get tracing. */
65#define RT6_DEBUG 2
66
67#if RT6_DEBUG >= 3
68#define RDBG(x) printk x
69#define RT6_TRACE(x...) printk(KERN_DEBUG x)
70#else
71#define RDBG(x)
72#define RT6_TRACE(x...) do { ; } while (0)
73#endif
74
519fbd87 75#define CLONE_OFFLINK_ROUTE 0
1da177e4 76
1da177e4
LT
77static struct rt6_info * ip6_rt_copy(struct rt6_info *ort);
78static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
79static struct dst_entry *ip6_negative_advice(struct dst_entry *);
80static void ip6_dst_destroy(struct dst_entry *);
81static void ip6_dst_ifdown(struct dst_entry *,
82 struct net_device *dev, int how);
569d3645 83static int ip6_dst_gc(struct dst_ops *ops);
1da177e4
LT
84
85static int ip6_pkt_discard(struct sk_buff *skb);
86static int ip6_pkt_discard_out(struct sk_buff *skb);
87static void ip6_link_failure(struct sk_buff *skb);
88static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
89
70ceb4f5 90#ifdef CONFIG_IPV6_ROUTE_INFO
efa2cea0
DL
91static struct rt6_info *rt6_add_route_info(struct net *net,
92 struct in6_addr *prefix, int prefixlen,
70ceb4f5
YH
93 struct in6_addr *gwaddr, int ifindex,
94 unsigned pref);
efa2cea0
DL
95static struct rt6_info *rt6_get_route_info(struct net *net,
96 struct in6_addr *prefix, int prefixlen,
70ceb4f5
YH
97 struct in6_addr *gwaddr, int ifindex);
98#endif
99
9a7ec3a9 100static struct dst_ops ip6_dst_ops_template = {
1da177e4 101 .family = AF_INET6,
09640e63 102 .protocol = cpu_to_be16(ETH_P_IPV6),
1da177e4
LT
103 .gc = ip6_dst_gc,
104 .gc_thresh = 1024,
105 .check = ip6_dst_check,
106 .destroy = ip6_dst_destroy,
107 .ifdown = ip6_dst_ifdown,
108 .negative_advice = ip6_negative_advice,
109 .link_failure = ip6_link_failure,
110 .update_pmtu = ip6_rt_update_pmtu,
1ac06e03 111 .local_out = __ip6_local_out,
e2422970 112 .entries = ATOMIC_INIT(0),
1da177e4
LT
113};
114
14e50e57
DM
115static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu)
116{
117}
118
119static struct dst_ops ip6_dst_blackhole_ops = {
120 .family = AF_INET6,
09640e63 121 .protocol = cpu_to_be16(ETH_P_IPV6),
14e50e57
DM
122 .destroy = ip6_dst_destroy,
123 .check = ip6_dst_check,
124 .update_pmtu = ip6_rt_blackhole_update_pmtu,
e2422970 125 .entries = ATOMIC_INIT(0),
14e50e57
DM
126};
127
bdb3289f 128static struct rt6_info ip6_null_entry_template = {
d8d1f30b
CG
129 .dst = {
130 .__refcnt = ATOMIC_INIT(1),
131 .__use = 1,
132 .obsolete = -1,
133 .error = -ENETUNREACH,
134 .metrics = { [RTAX_HOPLIMIT - 1] = 255, },
135 .input = ip6_pkt_discard,
136 .output = ip6_pkt_discard_out,
1da177e4
LT
137 },
138 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
4f724279 139 .rt6i_protocol = RTPROT_KERNEL,
1da177e4
LT
140 .rt6i_metric = ~(u32) 0,
141 .rt6i_ref = ATOMIC_INIT(1),
142};
143
101367c2
TG
144#ifdef CONFIG_IPV6_MULTIPLE_TABLES
145
6723ab54
DM
146static int ip6_pkt_prohibit(struct sk_buff *skb);
147static int ip6_pkt_prohibit_out(struct sk_buff *skb);
6723ab54 148
280a34c8 149static struct rt6_info ip6_prohibit_entry_template = {
d8d1f30b
CG
150 .dst = {
151 .__refcnt = ATOMIC_INIT(1),
152 .__use = 1,
153 .obsolete = -1,
154 .error = -EACCES,
155 .metrics = { [RTAX_HOPLIMIT - 1] = 255, },
156 .input = ip6_pkt_prohibit,
157 .output = ip6_pkt_prohibit_out,
101367c2
TG
158 },
159 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
4f724279 160 .rt6i_protocol = RTPROT_KERNEL,
101367c2
TG
161 .rt6i_metric = ~(u32) 0,
162 .rt6i_ref = ATOMIC_INIT(1),
163};
164
bdb3289f 165static struct rt6_info ip6_blk_hole_entry_template = {
d8d1f30b
CG
166 .dst = {
167 .__refcnt = ATOMIC_INIT(1),
168 .__use = 1,
169 .obsolete = -1,
170 .error = -EINVAL,
171 .metrics = { [RTAX_HOPLIMIT - 1] = 255, },
172 .input = dst_discard,
173 .output = dst_discard,
101367c2
TG
174 },
175 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
4f724279 176 .rt6i_protocol = RTPROT_KERNEL,
101367c2
TG
177 .rt6i_metric = ~(u32) 0,
178 .rt6i_ref = ATOMIC_INIT(1),
179};
180
181#endif
182
1da177e4 183/* allocate dst with ip6_dst_ops */
f2fc6a54 184static inline struct rt6_info *ip6_dst_alloc(struct dst_ops *ops)
1da177e4 185{
f2fc6a54 186 return (struct rt6_info *)dst_alloc(ops);
1da177e4
LT
187}
188
189static void ip6_dst_destroy(struct dst_entry *dst)
190{
191 struct rt6_info *rt = (struct rt6_info *)dst;
192 struct inet6_dev *idev = rt->rt6i_idev;
193
194 if (idev != NULL) {
195 rt->rt6i_idev = NULL;
196 in6_dev_put(idev);
1ab1457c 197 }
1da177e4
LT
198}
199
200static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
201 int how)
202{
203 struct rt6_info *rt = (struct rt6_info *)dst;
204 struct inet6_dev *idev = rt->rt6i_idev;
5a3e55d6 205 struct net_device *loopback_dev =
c346dca1 206 dev_net(dev)->loopback_dev;
1da177e4 207
5a3e55d6
DL
208 if (dev != loopback_dev && idev != NULL && idev->dev == dev) {
209 struct inet6_dev *loopback_idev =
210 in6_dev_get(loopback_dev);
1da177e4
LT
211 if (loopback_idev != NULL) {
212 rt->rt6i_idev = loopback_idev;
213 in6_dev_put(idev);
214 }
215 }
216}
217
218static __inline__ int rt6_check_expired(const struct rt6_info *rt)
219{
a02cec21
ED
220 return (rt->rt6i_flags & RTF_EXPIRES) &&
221 time_after(jiffies, rt->rt6i_expires);
1da177e4
LT
222}
223
c71099ac
TG
224static inline int rt6_need_strict(struct in6_addr *daddr)
225{
a02cec21
ED
226 return ipv6_addr_type(daddr) &
227 (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK);
c71099ac
TG
228}
229
1da177e4 230/*
c71099ac 231 * Route lookup. Any table->tb6_lock is implied.
1da177e4
LT
232 */
233
8ed67789
DL
234static inline struct rt6_info *rt6_device_match(struct net *net,
235 struct rt6_info *rt,
dd3abc4e 236 struct in6_addr *saddr,
1da177e4 237 int oif,
d420895e 238 int flags)
1da177e4
LT
239{
240 struct rt6_info *local = NULL;
241 struct rt6_info *sprt;
242
dd3abc4e
YH
243 if (!oif && ipv6_addr_any(saddr))
244 goto out;
245
d8d1f30b 246 for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
dd3abc4e
YH
247 struct net_device *dev = sprt->rt6i_dev;
248
249 if (oif) {
1da177e4
LT
250 if (dev->ifindex == oif)
251 return sprt;
252 if (dev->flags & IFF_LOOPBACK) {
253 if (sprt->rt6i_idev == NULL ||
254 sprt->rt6i_idev->dev->ifindex != oif) {
d420895e 255 if (flags & RT6_LOOKUP_F_IFACE && oif)
1da177e4 256 continue;
1ab1457c 257 if (local && (!oif ||
1da177e4
LT
258 local->rt6i_idev->dev->ifindex == oif))
259 continue;
260 }
261 local = sprt;
262 }
dd3abc4e
YH
263 } else {
264 if (ipv6_chk_addr(net, saddr, dev,
265 flags & RT6_LOOKUP_F_IFACE))
266 return sprt;
1da177e4 267 }
dd3abc4e 268 }
1da177e4 269
dd3abc4e 270 if (oif) {
1da177e4
LT
271 if (local)
272 return local;
273
d420895e 274 if (flags & RT6_LOOKUP_F_IFACE)
8ed67789 275 return net->ipv6.ip6_null_entry;
1da177e4 276 }
dd3abc4e 277out:
1da177e4
LT
278 return rt;
279}
280
27097255
YH
281#ifdef CONFIG_IPV6_ROUTER_PREF
282static void rt6_probe(struct rt6_info *rt)
283{
284 struct neighbour *neigh = rt ? rt->rt6i_nexthop : NULL;
285 /*
286 * Okay, this does not seem to be appropriate
287 * for now, however, we need to check if it
288 * is really so; aka Router Reachability Probing.
289 *
290 * Router Reachability Probe MUST be rate-limited
291 * to no more than one per minute.
292 */
293 if (!neigh || (neigh->nud_state & NUD_VALID))
294 return;
295 read_lock_bh(&neigh->lock);
296 if (!(neigh->nud_state & NUD_VALID) &&
52e16356 297 time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
27097255
YH
298 struct in6_addr mcaddr;
299 struct in6_addr *target;
300
301 neigh->updated = jiffies;
302 read_unlock_bh(&neigh->lock);
303
304 target = (struct in6_addr *)&neigh->primary_key;
305 addrconf_addr_solict_mult(target, &mcaddr);
306 ndisc_send_ns(rt->rt6i_dev, NULL, target, &mcaddr, NULL);
307 } else
308 read_unlock_bh(&neigh->lock);
309}
310#else
311static inline void rt6_probe(struct rt6_info *rt)
312{
27097255
YH
313}
314#endif
315
1da177e4 316/*
554cfb7e 317 * Default Router Selection (RFC 2461 6.3.6)
1da177e4 318 */
b6f99a21 319static inline int rt6_check_dev(struct rt6_info *rt, int oif)
554cfb7e
YH
320{
321 struct net_device *dev = rt->rt6i_dev;
161980f4 322 if (!oif || dev->ifindex == oif)
554cfb7e 323 return 2;
161980f4
DM
324 if ((dev->flags & IFF_LOOPBACK) &&
325 rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
326 return 1;
327 return 0;
554cfb7e 328}
1da177e4 329
b6f99a21 330static inline int rt6_check_neigh(struct rt6_info *rt)
1da177e4 331{
554cfb7e 332 struct neighbour *neigh = rt->rt6i_nexthop;
398bcbeb 333 int m;
4d0c5911
YH
334 if (rt->rt6i_flags & RTF_NONEXTHOP ||
335 !(rt->rt6i_flags & RTF_GATEWAY))
336 m = 1;
337 else if (neigh) {
554cfb7e
YH
338 read_lock_bh(&neigh->lock);
339 if (neigh->nud_state & NUD_VALID)
4d0c5911 340 m = 2;
398bcbeb
YH
341#ifdef CONFIG_IPV6_ROUTER_PREF
342 else if (neigh->nud_state & NUD_FAILED)
343 m = 0;
344#endif
345 else
ea73ee23 346 m = 1;
554cfb7e 347 read_unlock_bh(&neigh->lock);
398bcbeb
YH
348 } else
349 m = 0;
554cfb7e 350 return m;
1da177e4
LT
351}
352
554cfb7e
YH
353static int rt6_score_route(struct rt6_info *rt, int oif,
354 int strict)
1da177e4 355{
4d0c5911 356 int m, n;
1ab1457c 357
4d0c5911 358 m = rt6_check_dev(rt, oif);
77d16f45 359 if (!m && (strict & RT6_LOOKUP_F_IFACE))
554cfb7e 360 return -1;
ebacaaa0
YH
361#ifdef CONFIG_IPV6_ROUTER_PREF
362 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
363#endif
4d0c5911 364 n = rt6_check_neigh(rt);
557e92ef 365 if (!n && (strict & RT6_LOOKUP_F_REACHABLE))
554cfb7e
YH
366 return -1;
367 return m;
368}
369
f11e6659
DM
370static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
371 int *mpri, struct rt6_info *match)
554cfb7e 372{
f11e6659
DM
373 int m;
374
375 if (rt6_check_expired(rt))
376 goto out;
377
378 m = rt6_score_route(rt, oif, strict);
379 if (m < 0)
380 goto out;
381
382 if (m > *mpri) {
383 if (strict & RT6_LOOKUP_F_REACHABLE)
384 rt6_probe(match);
385 *mpri = m;
386 match = rt;
387 } else if (strict & RT6_LOOKUP_F_REACHABLE) {
388 rt6_probe(rt);
389 }
390
391out:
392 return match;
393}
394
395static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
396 struct rt6_info *rr_head,
397 u32 metric, int oif, int strict)
398{
399 struct rt6_info *rt, *match;
554cfb7e 400 int mpri = -1;
1da177e4 401
f11e6659
DM
402 match = NULL;
403 for (rt = rr_head; rt && rt->rt6i_metric == metric;
d8d1f30b 404 rt = rt->dst.rt6_next)
f11e6659
DM
405 match = find_match(rt, oif, strict, &mpri, match);
406 for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric;
d8d1f30b 407 rt = rt->dst.rt6_next)
f11e6659 408 match = find_match(rt, oif, strict, &mpri, match);
1da177e4 409
f11e6659
DM
410 return match;
411}
1da177e4 412
f11e6659
DM
413static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
414{
415 struct rt6_info *match, *rt0;
8ed67789 416 struct net *net;
1da177e4 417
f11e6659 418 RT6_TRACE("%s(fn->leaf=%p, oif=%d)\n",
0dc47877 419 __func__, fn->leaf, oif);
554cfb7e 420
f11e6659
DM
421 rt0 = fn->rr_ptr;
422 if (!rt0)
423 fn->rr_ptr = rt0 = fn->leaf;
1da177e4 424
f11e6659 425 match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict);
1da177e4 426
554cfb7e 427 if (!match &&
f11e6659 428 (strict & RT6_LOOKUP_F_REACHABLE)) {
d8d1f30b 429 struct rt6_info *next = rt0->dst.rt6_next;
f11e6659 430
554cfb7e 431 /* no entries matched; do round-robin */
f11e6659
DM
432 if (!next || next->rt6i_metric != rt0->rt6i_metric)
433 next = fn->leaf;
434
435 if (next != rt0)
436 fn->rr_ptr = next;
1da177e4 437 }
1da177e4 438
f11e6659 439 RT6_TRACE("%s() => %p\n",
0dc47877 440 __func__, match);
1da177e4 441
c346dca1 442 net = dev_net(rt0->rt6i_dev);
a02cec21 443 return match ? match : net->ipv6.ip6_null_entry;
1da177e4
LT
444}
445
70ceb4f5
YH
446#ifdef CONFIG_IPV6_ROUTE_INFO
447int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
448 struct in6_addr *gwaddr)
449{
c346dca1 450 struct net *net = dev_net(dev);
70ceb4f5
YH
451 struct route_info *rinfo = (struct route_info *) opt;
452 struct in6_addr prefix_buf, *prefix;
453 unsigned int pref;
4bed72e4 454 unsigned long lifetime;
70ceb4f5
YH
455 struct rt6_info *rt;
456
457 if (len < sizeof(struct route_info)) {
458 return -EINVAL;
459 }
460
461 /* Sanity check for prefix_len and length */
462 if (rinfo->length > 3) {
463 return -EINVAL;
464 } else if (rinfo->prefix_len > 128) {
465 return -EINVAL;
466 } else if (rinfo->prefix_len > 64) {
467 if (rinfo->length < 2) {
468 return -EINVAL;
469 }
470 } else if (rinfo->prefix_len > 0) {
471 if (rinfo->length < 1) {
472 return -EINVAL;
473 }
474 }
475
476 pref = rinfo->route_pref;
477 if (pref == ICMPV6_ROUTER_PREF_INVALID)
3933fc95 478 return -EINVAL;
70ceb4f5 479
4bed72e4 480 lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
70ceb4f5
YH
481
482 if (rinfo->length == 3)
483 prefix = (struct in6_addr *)rinfo->prefix;
484 else {
485 /* this function is safe */
486 ipv6_addr_prefix(&prefix_buf,
487 (struct in6_addr *)rinfo->prefix,
488 rinfo->prefix_len);
489 prefix = &prefix_buf;
490 }
491
efa2cea0
DL
492 rt = rt6_get_route_info(net, prefix, rinfo->prefix_len, gwaddr,
493 dev->ifindex);
70ceb4f5
YH
494
495 if (rt && !lifetime) {
e0a1ad73 496 ip6_del_rt(rt);
70ceb4f5
YH
497 rt = NULL;
498 }
499
500 if (!rt && lifetime)
efa2cea0 501 rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
70ceb4f5
YH
502 pref);
503 else if (rt)
504 rt->rt6i_flags = RTF_ROUTEINFO |
505 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
506
507 if (rt) {
4bed72e4 508 if (!addrconf_finite_timeout(lifetime)) {
70ceb4f5
YH
509 rt->rt6i_flags &= ~RTF_EXPIRES;
510 } else {
511 rt->rt6i_expires = jiffies + HZ * lifetime;
512 rt->rt6i_flags |= RTF_EXPIRES;
513 }
d8d1f30b 514 dst_release(&rt->dst);
70ceb4f5
YH
515 }
516 return 0;
517}
518#endif
519
8ed67789 520#define BACKTRACK(__net, saddr) \
982f56f3 521do { \
8ed67789 522 if (rt == __net->ipv6.ip6_null_entry) { \
982f56f3 523 struct fib6_node *pn; \
e0eda7bb 524 while (1) { \
982f56f3
YH
525 if (fn->fn_flags & RTN_TL_ROOT) \
526 goto out; \
527 pn = fn->parent; \
528 if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
8bce65b9 529 fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \
982f56f3
YH
530 else \
531 fn = pn; \
532 if (fn->fn_flags & RTN_RTINFO) \
533 goto restart; \
c71099ac 534 } \
c71099ac 535 } \
982f56f3 536} while(0)
c71099ac 537
8ed67789
DL
538static struct rt6_info *ip6_pol_route_lookup(struct net *net,
539 struct fib6_table *table,
c71099ac 540 struct flowi *fl, int flags)
1da177e4
LT
541{
542 struct fib6_node *fn;
543 struct rt6_info *rt;
544
c71099ac
TG
545 read_lock_bh(&table->tb6_lock);
546 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
547restart:
548 rt = fn->leaf;
dd3abc4e 549 rt = rt6_device_match(net, rt, &fl->fl6_src, fl->oif, flags);
8ed67789 550 BACKTRACK(net, &fl->fl6_src);
c71099ac 551out:
d8d1f30b 552 dst_use(&rt->dst, jiffies);
c71099ac 553 read_unlock_bh(&table->tb6_lock);
c71099ac
TG
554 return rt;
555
556}
557
9acd9f3a
YH
558struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
559 const struct in6_addr *saddr, int oif, int strict)
c71099ac
TG
560{
561 struct flowi fl = {
562 .oif = oif,
563 .nl_u = {
564 .ip6_u = {
565 .daddr = *daddr,
c71099ac
TG
566 },
567 },
568 };
569 struct dst_entry *dst;
77d16f45 570 int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
c71099ac 571
adaa70bb
TG
572 if (saddr) {
573 memcpy(&fl.fl6_src, saddr, sizeof(*saddr));
574 flags |= RT6_LOOKUP_F_HAS_SADDR;
575 }
576
606a2b48 577 dst = fib6_rule_lookup(net, &fl, flags, ip6_pol_route_lookup);
c71099ac
TG
578 if (dst->error == 0)
579 return (struct rt6_info *) dst;
580
581 dst_release(dst);
582
1da177e4
LT
583 return NULL;
584}
585
7159039a
YH
586EXPORT_SYMBOL(rt6_lookup);
587
c71099ac 588/* ip6_ins_rt is called with FREE table->tb6_lock.
1da177e4
LT
589 It takes new route entry, the addition fails by any reason the
590 route is freed. In any case, if caller does not hold it, it may
591 be destroyed.
592 */
593
86872cb5 594static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
1da177e4
LT
595{
596 int err;
c71099ac 597 struct fib6_table *table;
1da177e4 598
c71099ac
TG
599 table = rt->rt6i_table;
600 write_lock_bh(&table->tb6_lock);
86872cb5 601 err = fib6_add(&table->tb6_root, rt, info);
c71099ac 602 write_unlock_bh(&table->tb6_lock);
1da177e4
LT
603
604 return err;
605}
606
40e22e8f
TG
607int ip6_ins_rt(struct rt6_info *rt)
608{
4d1169c1 609 struct nl_info info = {
c346dca1 610 .nl_net = dev_net(rt->rt6i_dev),
4d1169c1 611 };
528c4ceb 612 return __ip6_ins_rt(rt, &info);
40e22e8f
TG
613}
614
95a9a5ba
YH
615static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, struct in6_addr *daddr,
616 struct in6_addr *saddr)
1da177e4 617{
1da177e4
LT
618 struct rt6_info *rt;
619
620 /*
621 * Clone the route.
622 */
623
624 rt = ip6_rt_copy(ort);
625
626 if (rt) {
14deae41
DM
627 struct neighbour *neigh;
628 int attempts = !in_softirq();
629
58c4fb86
YH
630 if (!(rt->rt6i_flags&RTF_GATEWAY)) {
631 if (rt->rt6i_dst.plen != 128 &&
632 ipv6_addr_equal(&rt->rt6i_dst.addr, daddr))
633 rt->rt6i_flags |= RTF_ANYCAST;
1da177e4 634 ipv6_addr_copy(&rt->rt6i_gateway, daddr);
58c4fb86 635 }
1da177e4 636
58c4fb86 637 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
1da177e4
LT
638 rt->rt6i_dst.plen = 128;
639 rt->rt6i_flags |= RTF_CACHE;
d8d1f30b 640 rt->dst.flags |= DST_HOST;
1da177e4
LT
641
642#ifdef CONFIG_IPV6_SUBTREES
643 if (rt->rt6i_src.plen && saddr) {
644 ipv6_addr_copy(&rt->rt6i_src.addr, saddr);
645 rt->rt6i_src.plen = 128;
646 }
647#endif
648
14deae41
DM
649 retry:
650 neigh = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
651 if (IS_ERR(neigh)) {
652 struct net *net = dev_net(rt->rt6i_dev);
653 int saved_rt_min_interval =
654 net->ipv6.sysctl.ip6_rt_gc_min_interval;
655 int saved_rt_elasticity =
656 net->ipv6.sysctl.ip6_rt_gc_elasticity;
657
658 if (attempts-- > 0) {
659 net->ipv6.sysctl.ip6_rt_gc_elasticity = 1;
660 net->ipv6.sysctl.ip6_rt_gc_min_interval = 0;
661
86393e52 662 ip6_dst_gc(&net->ipv6.ip6_dst_ops);
14deae41
DM
663
664 net->ipv6.sysctl.ip6_rt_gc_elasticity =
665 saved_rt_elasticity;
666 net->ipv6.sysctl.ip6_rt_gc_min_interval =
667 saved_rt_min_interval;
668 goto retry;
669 }
670
671 if (net_ratelimit())
672 printk(KERN_WARNING
7e1b33e5 673 "ipv6: Neighbour table overflow.\n");
d8d1f30b 674 dst_free(&rt->dst);
14deae41
DM
675 return NULL;
676 }
677 rt->rt6i_nexthop = neigh;
1da177e4 678
95a9a5ba 679 }
1da177e4 680
95a9a5ba
YH
681 return rt;
682}
1da177e4 683
299d9939
YH
684static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort, struct in6_addr *daddr)
685{
686 struct rt6_info *rt = ip6_rt_copy(ort);
687 if (rt) {
688 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
689 rt->rt6i_dst.plen = 128;
690 rt->rt6i_flags |= RTF_CACHE;
d8d1f30b 691 rt->dst.flags |= DST_HOST;
299d9939
YH
692 rt->rt6i_nexthop = neigh_clone(ort->rt6i_nexthop);
693 }
694 return rt;
695}
696
8ed67789
DL
697static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
698 struct flowi *fl, int flags)
1da177e4
LT
699{
700 struct fib6_node *fn;
519fbd87 701 struct rt6_info *rt, *nrt;
c71099ac 702 int strict = 0;
1da177e4 703 int attempts = 3;
519fbd87 704 int err;
53b7997f 705 int reachable = net->ipv6.devconf_all->forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
1da177e4 706
77d16f45 707 strict |= flags & RT6_LOOKUP_F_IFACE;
1da177e4
LT
708
709relookup:
c71099ac 710 read_lock_bh(&table->tb6_lock);
1da177e4 711
8238dd06 712restart_2:
c71099ac 713 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
1da177e4
LT
714
715restart:
4acad72d 716 rt = rt6_select(fn, oif, strict | reachable);
8ed67789
DL
717
718 BACKTRACK(net, &fl->fl6_src);
719 if (rt == net->ipv6.ip6_null_entry ||
8238dd06 720 rt->rt6i_flags & RTF_CACHE)
1ddef044 721 goto out;
1da177e4 722
d8d1f30b 723 dst_hold(&rt->dst);
c71099ac 724 read_unlock_bh(&table->tb6_lock);
fb9de91e 725
519fbd87 726 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
c71099ac 727 nrt = rt6_alloc_cow(rt, &fl->fl6_dst, &fl->fl6_src);
519fbd87
YH
728 else {
729#if CLONE_OFFLINK_ROUTE
c71099ac 730 nrt = rt6_alloc_clone(rt, &fl->fl6_dst);
519fbd87
YH
731#else
732 goto out2;
733#endif
734 }
e40cf353 735
d8d1f30b 736 dst_release(&rt->dst);
8ed67789 737 rt = nrt ? : net->ipv6.ip6_null_entry;
1da177e4 738
d8d1f30b 739 dst_hold(&rt->dst);
519fbd87 740 if (nrt) {
40e22e8f 741 err = ip6_ins_rt(nrt);
519fbd87 742 if (!err)
1da177e4 743 goto out2;
1da177e4 744 }
1da177e4 745
519fbd87
YH
746 if (--attempts <= 0)
747 goto out2;
748
749 /*
c71099ac 750 * Race condition! In the gap, when table->tb6_lock was
519fbd87
YH
751 * released someone could insert this route. Relookup.
752 */
d8d1f30b 753 dst_release(&rt->dst);
519fbd87
YH
754 goto relookup;
755
756out:
8238dd06
YH
757 if (reachable) {
758 reachable = 0;
759 goto restart_2;
760 }
d8d1f30b 761 dst_hold(&rt->dst);
c71099ac 762 read_unlock_bh(&table->tb6_lock);
1da177e4 763out2:
d8d1f30b
CG
764 rt->dst.lastuse = jiffies;
765 rt->dst.__use++;
c71099ac
TG
766
767 return rt;
1da177e4
LT
768}
769
8ed67789 770static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
4acad72d
PE
771 struct flowi *fl, int flags)
772{
8ed67789 773 return ip6_pol_route(net, table, fl->iif, fl, flags);
4acad72d
PE
774}
775
c71099ac
TG
776void ip6_route_input(struct sk_buff *skb)
777{
0660e03f 778 struct ipv6hdr *iph = ipv6_hdr(skb);
c346dca1 779 struct net *net = dev_net(skb->dev);
adaa70bb 780 int flags = RT6_LOOKUP_F_HAS_SADDR;
c71099ac
TG
781 struct flowi fl = {
782 .iif = skb->dev->ifindex,
783 .nl_u = {
784 .ip6_u = {
785 .daddr = iph->daddr,
786 .saddr = iph->saddr,
90bcaf7b 787 .flowlabel = (* (__be32 *) iph)&IPV6_FLOWINFO_MASK,
c71099ac
TG
788 },
789 },
1ab1457c 790 .mark = skb->mark,
c71099ac
TG
791 .proto = iph->nexthdr,
792 };
adaa70bb 793
1d6e55f1 794 if (rt6_need_strict(&iph->daddr) && skb->dev->type != ARPHRD_PIMREG)
adaa70bb 795 flags |= RT6_LOOKUP_F_IFACE;
c71099ac 796
adf30907 797 skb_dst_set(skb, fib6_rule_lookup(net, &fl, flags, ip6_pol_route_input));
c71099ac
TG
798}
799
8ed67789 800static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
c71099ac 801 struct flowi *fl, int flags)
1da177e4 802{
8ed67789 803 return ip6_pol_route(net, table, fl->oif, fl, flags);
c71099ac
TG
804}
805
4591db4f
DL
806struct dst_entry * ip6_route_output(struct net *net, struct sock *sk,
807 struct flowi *fl)
c71099ac
TG
808{
809 int flags = 0;
810
6057fd78 811 if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl->fl6_dst))
77d16f45 812 flags |= RT6_LOOKUP_F_IFACE;
c71099ac 813
adaa70bb
TG
814 if (!ipv6_addr_any(&fl->fl6_src))
815 flags |= RT6_LOOKUP_F_HAS_SADDR;
0c9a2ac1
YH
816 else if (sk)
817 flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
adaa70bb 818
4591db4f 819 return fib6_rule_lookup(net, fl, flags, ip6_pol_route_output);
1da177e4
LT
820}
821
7159039a 822EXPORT_SYMBOL(ip6_route_output);
1da177e4 823
14e50e57
DM
824int ip6_dst_blackhole(struct sock *sk, struct dst_entry **dstp, struct flowi *fl)
825{
826 struct rt6_info *ort = (struct rt6_info *) *dstp;
827 struct rt6_info *rt = (struct rt6_info *)
828 dst_alloc(&ip6_dst_blackhole_ops);
829 struct dst_entry *new = NULL;
830
831 if (rt) {
d8d1f30b 832 new = &rt->dst;
14e50e57
DM
833
834 atomic_set(&new->__refcnt, 1);
835 new->__use = 1;
352e512c
HX
836 new->input = dst_discard;
837 new->output = dst_discard;
14e50e57 838
d8d1f30b
CG
839 memcpy(new->metrics, ort->dst.metrics, RTAX_MAX*sizeof(u32));
840 new->dev = ort->dst.dev;
14e50e57
DM
841 if (new->dev)
842 dev_hold(new->dev);
843 rt->rt6i_idev = ort->rt6i_idev;
844 if (rt->rt6i_idev)
845 in6_dev_hold(rt->rt6i_idev);
846 rt->rt6i_expires = 0;
847
848 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
849 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
850 rt->rt6i_metric = 0;
851
852 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
853#ifdef CONFIG_IPV6_SUBTREES
854 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
855#endif
856
857 dst_free(new);
858 }
859
860 dst_release(*dstp);
861 *dstp = new;
a02cec21 862 return new ? 0 : -ENOMEM;
14e50e57
DM
863}
864EXPORT_SYMBOL_GPL(ip6_dst_blackhole);
865
1da177e4
LT
866/*
867 * Destination cache support functions
868 */
869
870static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
871{
872 struct rt6_info *rt;
873
874 rt = (struct rt6_info *) dst;
875
10414444 876 if (rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie))
1da177e4
LT
877 return dst;
878
879 return NULL;
880}
881
882static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
883{
884 struct rt6_info *rt = (struct rt6_info *) dst;
885
886 if (rt) {
54c1a859
YH
887 if (rt->rt6i_flags & RTF_CACHE) {
888 if (rt6_check_expired(rt)) {
889 ip6_del_rt(rt);
890 dst = NULL;
891 }
892 } else {
1da177e4 893 dst_release(dst);
54c1a859
YH
894 dst = NULL;
895 }
1da177e4 896 }
54c1a859 897 return dst;
1da177e4
LT
898}
899
900static void ip6_link_failure(struct sk_buff *skb)
901{
902 struct rt6_info *rt;
903
3ffe533c 904 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
1da177e4 905
adf30907 906 rt = (struct rt6_info *) skb_dst(skb);
1da177e4
LT
907 if (rt) {
908 if (rt->rt6i_flags&RTF_CACHE) {
d8d1f30b 909 dst_set_expires(&rt->dst, 0);
1da177e4
LT
910 rt->rt6i_flags |= RTF_EXPIRES;
911 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
912 rt->rt6i_node->fn_sernum = -1;
913 }
914}
915
916static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
917{
918 struct rt6_info *rt6 = (struct rt6_info*)dst;
919
920 if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
921 rt6->rt6i_flags |= RTF_MODIFIED;
922 if (mtu < IPV6_MIN_MTU) {
923 mtu = IPV6_MIN_MTU;
924 dst->metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
925 }
926 dst->metrics[RTAX_MTU-1] = mtu;
8d71740c 927 call_netevent_notifiers(NETEVENT_PMTU_UPDATE, dst);
1da177e4
LT
928 }
929}
930
1da177e4
LT
931static int ipv6_get_mtu(struct net_device *dev);
932
5578689a 933static inline unsigned int ipv6_advmss(struct net *net, unsigned int mtu)
1da177e4
LT
934{
935 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
936
5578689a
DL
937 if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
938 mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
1da177e4
LT
939
940 /*
1ab1457c
YH
941 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
942 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
943 * IPV6_MAXPLEN is also valid and means: "any MSS,
1da177e4
LT
944 * rely only on pmtu discovery"
945 */
946 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
947 mtu = IPV6_MAXPLEN;
948 return mtu;
949}
950
3b00944c
YH
951static struct dst_entry *icmp6_dst_gc_list;
952static DEFINE_SPINLOCK(icmp6_dst_lock);
5d0bbeeb 953
3b00944c 954struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
1da177e4 955 struct neighbour *neigh,
9acd9f3a 956 const struct in6_addr *addr)
1da177e4
LT
957{
958 struct rt6_info *rt;
959 struct inet6_dev *idev = in6_dev_get(dev);
c346dca1 960 struct net *net = dev_net(dev);
1da177e4
LT
961
962 if (unlikely(idev == NULL))
963 return NULL;
964
86393e52 965 rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops);
1da177e4
LT
966 if (unlikely(rt == NULL)) {
967 in6_dev_put(idev);
968 goto out;
969 }
970
971 dev_hold(dev);
972 if (neigh)
973 neigh_hold(neigh);
14deae41 974 else {
1da177e4 975 neigh = ndisc_get_neigh(dev, addr);
14deae41
DM
976 if (IS_ERR(neigh))
977 neigh = NULL;
978 }
1da177e4
LT
979
980 rt->rt6i_dev = dev;
981 rt->rt6i_idev = idev;
982 rt->rt6i_nexthop = neigh;
d8d1f30b
CG
983 atomic_set(&rt->dst.__refcnt, 1);
984 rt->dst.metrics[RTAX_HOPLIMIT-1] = 255;
985 rt->dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
986 rt->dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, dst_mtu(&rt->dst));
987 rt->dst.output = ip6_output;
1da177e4
LT
988
989#if 0 /* there's no chance to use these for ndisc */
d8d1f30b 990 rt->dst.flags = ipv6_addr_type(addr) & IPV6_ADDR_UNICAST
1ab1457c 991 ? DST_HOST
1da177e4
LT
992 : 0;
993 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
994 rt->rt6i_dst.plen = 128;
995#endif
996
3b00944c 997 spin_lock_bh(&icmp6_dst_lock);
d8d1f30b
CG
998 rt->dst.next = icmp6_dst_gc_list;
999 icmp6_dst_gc_list = &rt->dst;
3b00944c 1000 spin_unlock_bh(&icmp6_dst_lock);
1da177e4 1001
5578689a 1002 fib6_force_start_gc(net);
1da177e4
LT
1003
1004out:
d8d1f30b 1005 return &rt->dst;
1da177e4
LT
1006}
1007
3d0f24a7 1008int icmp6_dst_gc(void)
1da177e4
LT
1009{
1010 struct dst_entry *dst, *next, **pprev;
3d0f24a7 1011 int more = 0;
1da177e4
LT
1012
1013 next = NULL;
5d0bbeeb 1014
3b00944c
YH
1015 spin_lock_bh(&icmp6_dst_lock);
1016 pprev = &icmp6_dst_gc_list;
5d0bbeeb 1017
1da177e4
LT
1018 while ((dst = *pprev) != NULL) {
1019 if (!atomic_read(&dst->__refcnt)) {
1020 *pprev = dst->next;
1021 dst_free(dst);
1da177e4
LT
1022 } else {
1023 pprev = &dst->next;
3d0f24a7 1024 ++more;
1da177e4
LT
1025 }
1026 }
1027
3b00944c 1028 spin_unlock_bh(&icmp6_dst_lock);
5d0bbeeb 1029
3d0f24a7 1030 return more;
1da177e4
LT
1031}
1032
1e493d19
DM
1033static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg),
1034 void *arg)
1035{
1036 struct dst_entry *dst, **pprev;
1037
1038 spin_lock_bh(&icmp6_dst_lock);
1039 pprev = &icmp6_dst_gc_list;
1040 while ((dst = *pprev) != NULL) {
1041 struct rt6_info *rt = (struct rt6_info *) dst;
1042 if (func(rt, arg)) {
1043 *pprev = dst->next;
1044 dst_free(dst);
1045 } else {
1046 pprev = &dst->next;
1047 }
1048 }
1049 spin_unlock_bh(&icmp6_dst_lock);
1050}
1051
569d3645 1052static int ip6_dst_gc(struct dst_ops *ops)
1da177e4 1053{
1da177e4 1054 unsigned long now = jiffies;
86393e52 1055 struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
7019b78e
DL
1056 int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
1057 int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
1058 int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1059 int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1060 unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
1061
1062 if (time_after(rt_last_gc + rt_min_interval, now) &&
1063 atomic_read(&ops->entries) <= rt_max_size)
1da177e4
LT
1064 goto out;
1065
6891a346
BT
1066 net->ipv6.ip6_rt_gc_expire++;
1067 fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net);
1068 net->ipv6.ip6_rt_last_gc = now;
7019b78e
DL
1069 if (atomic_read(&ops->entries) < ops->gc_thresh)
1070 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1da177e4 1071out:
7019b78e 1072 net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
a02cec21 1073 return atomic_read(&ops->entries) > rt_max_size;
1da177e4
LT
1074}
1075
1076/* Clean host part of a prefix. Not necessary in radix tree,
1077 but results in cleaner routing tables.
1078
1079 Remove it only when all the things will work!
1080 */
1081
1082static int ipv6_get_mtu(struct net_device *dev)
1083{
1084 int mtu = IPV6_MIN_MTU;
1085 struct inet6_dev *idev;
1086
c68f24cc
ED
1087 rcu_read_lock();
1088 idev = __in6_dev_get(dev);
1089 if (idev)
1da177e4 1090 mtu = idev->cnf.mtu6;
c68f24cc 1091 rcu_read_unlock();
1da177e4
LT
1092 return mtu;
1093}
1094
6b75d090 1095int ip6_dst_hoplimit(struct dst_entry *dst)
1da177e4 1096{
6b75d090
YH
1097 int hoplimit = dst_metric(dst, RTAX_HOPLIMIT);
1098 if (hoplimit < 0) {
1099 struct net_device *dev = dst->dev;
c68f24cc
ED
1100 struct inet6_dev *idev;
1101
1102 rcu_read_lock();
1103 idev = __in6_dev_get(dev);
1104 if (idev)
6b75d090 1105 hoplimit = idev->cnf.hop_limit;
c68f24cc 1106 else
53b7997f 1107 hoplimit = dev_net(dev)->ipv6.devconf_all->hop_limit;
c68f24cc 1108 rcu_read_unlock();
1da177e4
LT
1109 }
1110 return hoplimit;
1111}
1112
1113/*
1114 *
1115 */
1116
86872cb5 1117int ip6_route_add(struct fib6_config *cfg)
1da177e4
LT
1118{
1119 int err;
5578689a 1120 struct net *net = cfg->fc_nlinfo.nl_net;
1da177e4
LT
1121 struct rt6_info *rt = NULL;
1122 struct net_device *dev = NULL;
1123 struct inet6_dev *idev = NULL;
c71099ac 1124 struct fib6_table *table;
1da177e4
LT
1125 int addr_type;
1126
86872cb5 1127 if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1da177e4
LT
1128 return -EINVAL;
1129#ifndef CONFIG_IPV6_SUBTREES
86872cb5 1130 if (cfg->fc_src_len)
1da177e4
LT
1131 return -EINVAL;
1132#endif
86872cb5 1133 if (cfg->fc_ifindex) {
1da177e4 1134 err = -ENODEV;
5578689a 1135 dev = dev_get_by_index(net, cfg->fc_ifindex);
1da177e4
LT
1136 if (!dev)
1137 goto out;
1138 idev = in6_dev_get(dev);
1139 if (!idev)
1140 goto out;
1141 }
1142
86872cb5
TG
1143 if (cfg->fc_metric == 0)
1144 cfg->fc_metric = IP6_RT_PRIO_USER;
1da177e4 1145
5578689a 1146 table = fib6_new_table(net, cfg->fc_table);
c71099ac
TG
1147 if (table == NULL) {
1148 err = -ENOBUFS;
1149 goto out;
1150 }
1151
86393e52 1152 rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops);
1da177e4
LT
1153
1154 if (rt == NULL) {
1155 err = -ENOMEM;
1156 goto out;
1157 }
1158
d8d1f30b 1159 rt->dst.obsolete = -1;
6f704992
YH
1160 rt->rt6i_expires = (cfg->fc_flags & RTF_EXPIRES) ?
1161 jiffies + clock_t_to_jiffies(cfg->fc_expires) :
1162 0;
1da177e4 1163
86872cb5
TG
1164 if (cfg->fc_protocol == RTPROT_UNSPEC)
1165 cfg->fc_protocol = RTPROT_BOOT;
1166 rt->rt6i_protocol = cfg->fc_protocol;
1167
1168 addr_type = ipv6_addr_type(&cfg->fc_dst);
1da177e4
LT
1169
1170 if (addr_type & IPV6_ADDR_MULTICAST)
d8d1f30b 1171 rt->dst.input = ip6_mc_input;
ab79ad14
1172 else if (cfg->fc_flags & RTF_LOCAL)
1173 rt->dst.input = ip6_input;
1da177e4 1174 else
d8d1f30b 1175 rt->dst.input = ip6_forward;
1da177e4 1176
d8d1f30b 1177 rt->dst.output = ip6_output;
1da177e4 1178
86872cb5
TG
1179 ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1180 rt->rt6i_dst.plen = cfg->fc_dst_len;
1da177e4 1181 if (rt->rt6i_dst.plen == 128)
d8d1f30b 1182 rt->dst.flags = DST_HOST;
1da177e4
LT
1183
1184#ifdef CONFIG_IPV6_SUBTREES
86872cb5
TG
1185 ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1186 rt->rt6i_src.plen = cfg->fc_src_len;
1da177e4
LT
1187#endif
1188
86872cb5 1189 rt->rt6i_metric = cfg->fc_metric;
1da177e4
LT
1190
1191 /* We cannot add true routes via loopback here,
1192 they would result in kernel looping; promote them to reject routes
1193 */
86872cb5 1194 if ((cfg->fc_flags & RTF_REJECT) ||
ab79ad14
1195 (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK)
1196 && !(cfg->fc_flags&RTF_LOCAL))) {
1da177e4 1197 /* hold loopback dev/idev if we haven't done so. */
5578689a 1198 if (dev != net->loopback_dev) {
1da177e4
LT
1199 if (dev) {
1200 dev_put(dev);
1201 in6_dev_put(idev);
1202 }
5578689a 1203 dev = net->loopback_dev;
1da177e4
LT
1204 dev_hold(dev);
1205 idev = in6_dev_get(dev);
1206 if (!idev) {
1207 err = -ENODEV;
1208 goto out;
1209 }
1210 }
d8d1f30b
CG
1211 rt->dst.output = ip6_pkt_discard_out;
1212 rt->dst.input = ip6_pkt_discard;
1213 rt->dst.error = -ENETUNREACH;
1da177e4
LT
1214 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1215 goto install_route;
1216 }
1217
86872cb5 1218 if (cfg->fc_flags & RTF_GATEWAY) {
1da177e4
LT
1219 struct in6_addr *gw_addr;
1220 int gwa_type;
1221
86872cb5
TG
1222 gw_addr = &cfg->fc_gateway;
1223 ipv6_addr_copy(&rt->rt6i_gateway, gw_addr);
1da177e4
LT
1224 gwa_type = ipv6_addr_type(gw_addr);
1225
1226 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1227 struct rt6_info *grt;
1228
1229 /* IPv6 strictly inhibits using not link-local
1230 addresses as nexthop address.
1231 Otherwise, router will not able to send redirects.
1232 It is very good, but in some (rare!) circumstances
1233 (SIT, PtP, NBMA NOARP links) it is handy to allow
1234 some exceptions. --ANK
1235 */
1236 err = -EINVAL;
1237 if (!(gwa_type&IPV6_ADDR_UNICAST))
1238 goto out;
1239
5578689a 1240 grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
1da177e4
LT
1241
1242 err = -EHOSTUNREACH;
1243 if (grt == NULL)
1244 goto out;
1245 if (dev) {
1246 if (dev != grt->rt6i_dev) {
d8d1f30b 1247 dst_release(&grt->dst);
1da177e4
LT
1248 goto out;
1249 }
1250 } else {
1251 dev = grt->rt6i_dev;
1252 idev = grt->rt6i_idev;
1253 dev_hold(dev);
1254 in6_dev_hold(grt->rt6i_idev);
1255 }
1256 if (!(grt->rt6i_flags&RTF_GATEWAY))
1257 err = 0;
d8d1f30b 1258 dst_release(&grt->dst);
1da177e4
LT
1259
1260 if (err)
1261 goto out;
1262 }
1263 err = -EINVAL;
1264 if (dev == NULL || (dev->flags&IFF_LOOPBACK))
1265 goto out;
1266 }
1267
1268 err = -ENODEV;
1269 if (dev == NULL)
1270 goto out;
1271
86872cb5 1272 if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) {
1da177e4
LT
1273 rt->rt6i_nexthop = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev);
1274 if (IS_ERR(rt->rt6i_nexthop)) {
1275 err = PTR_ERR(rt->rt6i_nexthop);
1276 rt->rt6i_nexthop = NULL;
1277 goto out;
1278 }
1279 }
1280
86872cb5 1281 rt->rt6i_flags = cfg->fc_flags;
1da177e4
LT
1282
1283install_route:
86872cb5
TG
1284 if (cfg->fc_mx) {
1285 struct nlattr *nla;
1286 int remaining;
1287
1288 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
8f4c1f9b 1289 int type = nla_type(nla);
86872cb5
TG
1290
1291 if (type) {
1292 if (type > RTAX_MAX) {
1da177e4
LT
1293 err = -EINVAL;
1294 goto out;
1295 }
86872cb5 1296
d8d1f30b 1297 rt->dst.metrics[type - 1] = nla_get_u32(nla);
1da177e4 1298 }
1da177e4
LT
1299 }
1300 }
1301
d8d1f30b
CG
1302 if (dst_metric(&rt->dst, RTAX_HOPLIMIT) == 0)
1303 rt->dst.metrics[RTAX_HOPLIMIT-1] = -1;
1304 if (!dst_mtu(&rt->dst))
1305 rt->dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(dev);
1306 if (!dst_metric(&rt->dst, RTAX_ADVMSS))
1307 rt->dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, dst_mtu(&rt->dst));
1308 rt->dst.dev = dev;
1da177e4 1309 rt->rt6i_idev = idev;
c71099ac 1310 rt->rt6i_table = table;
63152fc0 1311
c346dca1 1312 cfg->fc_nlinfo.nl_net = dev_net(dev);
63152fc0 1313
86872cb5 1314 return __ip6_ins_rt(rt, &cfg->fc_nlinfo);
1da177e4
LT
1315
1316out:
1317 if (dev)
1318 dev_put(dev);
1319 if (idev)
1320 in6_dev_put(idev);
1321 if (rt)
d8d1f30b 1322 dst_free(&rt->dst);
1da177e4
LT
1323 return err;
1324}
1325
86872cb5 1326static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
1da177e4
LT
1327{
1328 int err;
c71099ac 1329 struct fib6_table *table;
c346dca1 1330 struct net *net = dev_net(rt->rt6i_dev);
1da177e4 1331
8ed67789 1332 if (rt == net->ipv6.ip6_null_entry)
6c813a72
PM
1333 return -ENOENT;
1334
c71099ac
TG
1335 table = rt->rt6i_table;
1336 write_lock_bh(&table->tb6_lock);
1da177e4 1337
86872cb5 1338 err = fib6_del(rt, info);
d8d1f30b 1339 dst_release(&rt->dst);
1da177e4 1340
c71099ac 1341 write_unlock_bh(&table->tb6_lock);
1da177e4
LT
1342
1343 return err;
1344}
1345
e0a1ad73
TG
1346int ip6_del_rt(struct rt6_info *rt)
1347{
4d1169c1 1348 struct nl_info info = {
c346dca1 1349 .nl_net = dev_net(rt->rt6i_dev),
4d1169c1 1350 };
528c4ceb 1351 return __ip6_del_rt(rt, &info);
e0a1ad73
TG
1352}
1353
86872cb5 1354static int ip6_route_del(struct fib6_config *cfg)
1da177e4 1355{
c71099ac 1356 struct fib6_table *table;
1da177e4
LT
1357 struct fib6_node *fn;
1358 struct rt6_info *rt;
1359 int err = -ESRCH;
1360
5578689a 1361 table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
c71099ac
TG
1362 if (table == NULL)
1363 return err;
1364
1365 read_lock_bh(&table->tb6_lock);
1da177e4 1366
c71099ac 1367 fn = fib6_locate(&table->tb6_root,
86872cb5
TG
1368 &cfg->fc_dst, cfg->fc_dst_len,
1369 &cfg->fc_src, cfg->fc_src_len);
1ab1457c 1370
1da177e4 1371 if (fn) {
d8d1f30b 1372 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
86872cb5 1373 if (cfg->fc_ifindex &&
1da177e4 1374 (rt->rt6i_dev == NULL ||
86872cb5 1375 rt->rt6i_dev->ifindex != cfg->fc_ifindex))
1da177e4 1376 continue;
86872cb5
TG
1377 if (cfg->fc_flags & RTF_GATEWAY &&
1378 !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
1da177e4 1379 continue;
86872cb5 1380 if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
1da177e4 1381 continue;
d8d1f30b 1382 dst_hold(&rt->dst);
c71099ac 1383 read_unlock_bh(&table->tb6_lock);
1da177e4 1384
86872cb5 1385 return __ip6_del_rt(rt, &cfg->fc_nlinfo);
1da177e4
LT
1386 }
1387 }
c71099ac 1388 read_unlock_bh(&table->tb6_lock);
1da177e4
LT
1389
1390 return err;
1391}
1392
1393/*
1394 * Handle redirects
1395 */
a6279458
YH
1396struct ip6rd_flowi {
1397 struct flowi fl;
1398 struct in6_addr gateway;
1399};
1400
8ed67789
DL
1401static struct rt6_info *__ip6_route_redirect(struct net *net,
1402 struct fib6_table *table,
a6279458
YH
1403 struct flowi *fl,
1404 int flags)
1da177e4 1405{
a6279458
YH
1406 struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl;
1407 struct rt6_info *rt;
e843b9e1 1408 struct fib6_node *fn;
c71099ac 1409
1da177e4 1410 /*
e843b9e1
YH
1411 * Get the "current" route for this destination and
1412 * check if the redirect has come from approriate router.
1413 *
1414 * RFC 2461 specifies that redirects should only be
1415 * accepted if they come from the nexthop to the target.
1416 * Due to the way the routes are chosen, this notion
1417 * is a bit fuzzy and one might need to check all possible
1418 * routes.
1da177e4 1419 */
1da177e4 1420
c71099ac 1421 read_lock_bh(&table->tb6_lock);
a6279458 1422 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
e843b9e1 1423restart:
d8d1f30b 1424 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
e843b9e1
YH
1425 /*
1426 * Current route is on-link; redirect is always invalid.
1427 *
1428 * Seems, previous statement is not true. It could
1429 * be node, which looks for us as on-link (f.e. proxy ndisc)
1430 * But then router serving it might decide, that we should
1431 * know truth 8)8) --ANK (980726).
1432 */
1433 if (rt6_check_expired(rt))
1434 continue;
1435 if (!(rt->rt6i_flags & RTF_GATEWAY))
1436 continue;
a6279458 1437 if (fl->oif != rt->rt6i_dev->ifindex)
e843b9e1 1438 continue;
a6279458 1439 if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
e843b9e1
YH
1440 continue;
1441 break;
1442 }
a6279458 1443
cb15d9c2 1444 if (!rt)
8ed67789
DL
1445 rt = net->ipv6.ip6_null_entry;
1446 BACKTRACK(net, &fl->fl6_src);
cb15d9c2 1447out:
d8d1f30b 1448 dst_hold(&rt->dst);
a6279458 1449
c71099ac 1450 read_unlock_bh(&table->tb6_lock);
e843b9e1 1451
a6279458
YH
1452 return rt;
1453};
1454
1455static struct rt6_info *ip6_route_redirect(struct in6_addr *dest,
1456 struct in6_addr *src,
1457 struct in6_addr *gateway,
1458 struct net_device *dev)
1459{
adaa70bb 1460 int flags = RT6_LOOKUP_F_HAS_SADDR;
c346dca1 1461 struct net *net = dev_net(dev);
a6279458
YH
1462 struct ip6rd_flowi rdfl = {
1463 .fl = {
1464 .oif = dev->ifindex,
1465 .nl_u = {
1466 .ip6_u = {
1467 .daddr = *dest,
1468 .saddr = *src,
1469 },
1470 },
1471 },
a6279458 1472 };
adaa70bb 1473
86c36ce4
BH
1474 ipv6_addr_copy(&rdfl.gateway, gateway);
1475
adaa70bb
TG
1476 if (rt6_need_strict(dest))
1477 flags |= RT6_LOOKUP_F_IFACE;
a6279458 1478
5578689a 1479 return (struct rt6_info *)fib6_rule_lookup(net, (struct flowi *)&rdfl,
58f09b78 1480 flags, __ip6_route_redirect);
a6279458
YH
1481}
1482
1483void rt6_redirect(struct in6_addr *dest, struct in6_addr *src,
1484 struct in6_addr *saddr,
1485 struct neighbour *neigh, u8 *lladdr, int on_link)
1486{
1487 struct rt6_info *rt, *nrt = NULL;
1488 struct netevent_redirect netevent;
c346dca1 1489 struct net *net = dev_net(neigh->dev);
a6279458
YH
1490
1491 rt = ip6_route_redirect(dest, src, saddr, neigh->dev);
1492
8ed67789 1493 if (rt == net->ipv6.ip6_null_entry) {
1da177e4
LT
1494 if (net_ratelimit())
1495 printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
1496 "for redirect target\n");
a6279458 1497 goto out;
1da177e4
LT
1498 }
1499
1da177e4
LT
1500 /*
1501 * We have finally decided to accept it.
1502 */
1503
1ab1457c 1504 neigh_update(neigh, lladdr, NUD_STALE,
1da177e4
LT
1505 NEIGH_UPDATE_F_WEAK_OVERRIDE|
1506 NEIGH_UPDATE_F_OVERRIDE|
1507 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1508 NEIGH_UPDATE_F_ISROUTER))
1509 );
1510
1511 /*
1512 * Redirect received -> path was valid.
1513 * Look, redirects are sent only in response to data packets,
1514 * so that this nexthop apparently is reachable. --ANK
1515 */
d8d1f30b 1516 dst_confirm(&rt->dst);
1da177e4
LT
1517
1518 /* Duplicate redirect: silently ignore. */
d8d1f30b 1519 if (neigh == rt->dst.neighbour)
1da177e4
LT
1520 goto out;
1521
1522 nrt = ip6_rt_copy(rt);
1523 if (nrt == NULL)
1524 goto out;
1525
1526 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1527 if (on_link)
1528 nrt->rt6i_flags &= ~RTF_GATEWAY;
1529
1530 ipv6_addr_copy(&nrt->rt6i_dst.addr, dest);
1531 nrt->rt6i_dst.plen = 128;
d8d1f30b 1532 nrt->dst.flags |= DST_HOST;
1da177e4
LT
1533
1534 ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key);
1535 nrt->rt6i_nexthop = neigh_clone(neigh);
1536 /* Reset pmtu, it may be better */
d8d1f30b
CG
1537 nrt->dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(neigh->dev);
1538 nrt->dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dev_net(neigh->dev),
1539 dst_mtu(&nrt->dst));
1da177e4 1540
40e22e8f 1541 if (ip6_ins_rt(nrt))
1da177e4
LT
1542 goto out;
1543
d8d1f30b
CG
1544 netevent.old = &rt->dst;
1545 netevent.new = &nrt->dst;
8d71740c
TT
1546 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1547
1da177e4 1548 if (rt->rt6i_flags&RTF_CACHE) {
e0a1ad73 1549 ip6_del_rt(rt);
1da177e4
LT
1550 return;
1551 }
1552
1553out:
d8d1f30b 1554 dst_release(&rt->dst);
1da177e4
LT
1555}
1556
1557/*
1558 * Handle ICMP "packet too big" messages
1559 * i.e. Path MTU discovery
1560 */
1561
ae878ae2
1562static void rt6_do_pmtu_disc(struct in6_addr *daddr, struct in6_addr *saddr,
1563 struct net *net, u32 pmtu, int ifindex)
1da177e4
LT
1564{
1565 struct rt6_info *rt, *nrt;
1566 int allfrag = 0;
1567
ae878ae2 1568 rt = rt6_lookup(net, daddr, saddr, ifindex, 0);
1da177e4
LT
1569 if (rt == NULL)
1570 return;
1571
d8d1f30b 1572 if (pmtu >= dst_mtu(&rt->dst))
1da177e4
LT
1573 goto out;
1574
1575 if (pmtu < IPV6_MIN_MTU) {
1576 /*
1ab1457c 1577 * According to RFC2460, PMTU is set to the IPv6 Minimum Link
1da177e4
LT
1578 * MTU (1280) and a fragment header should always be included
1579 * after a node receiving Too Big message reporting PMTU is
1580 * less than the IPv6 Minimum Link MTU.
1581 */
1582 pmtu = IPV6_MIN_MTU;
1583 allfrag = 1;
1584 }
1585
1586 /* New mtu received -> path was valid.
1587 They are sent only in response to data packets,
1588 so that this nexthop apparently is reachable. --ANK
1589 */
d8d1f30b 1590 dst_confirm(&rt->dst);
1da177e4
LT
1591
1592 /* Host route. If it is static, it would be better
1593 not to override it, but add new one, so that
1594 when cache entry will expire old pmtu
1595 would return automatically.
1596 */
1597 if (rt->rt6i_flags & RTF_CACHE) {
d8d1f30b 1598 rt->dst.metrics[RTAX_MTU-1] = pmtu;
1da177e4 1599 if (allfrag)
d8d1f30b
CG
1600 rt->dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1601 dst_set_expires(&rt->dst, net->ipv6.sysctl.ip6_rt_mtu_expires);
1da177e4
LT
1602 rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
1603 goto out;
1604 }
1605
1606 /* Network route.
1607 Two cases are possible:
1608 1. It is connected route. Action: COW
1609 2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1610 */
d5315b50 1611 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
a1e78363 1612 nrt = rt6_alloc_cow(rt, daddr, saddr);
d5315b50
YH
1613 else
1614 nrt = rt6_alloc_clone(rt, daddr);
a1e78363 1615
d5315b50 1616 if (nrt) {
d8d1f30b 1617 nrt->dst.metrics[RTAX_MTU-1] = pmtu;
a1e78363 1618 if (allfrag)
d8d1f30b 1619 nrt->dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
a1e78363
YH
1620
1621 /* According to RFC 1981, detecting PMTU increase shouldn't be
1622 * happened within 5 mins, the recommended timer is 10 mins.
1623 * Here this route expiration time is set to ip6_rt_mtu_expires
1624 * which is 10 mins. After 10 mins the decreased pmtu is expired
1625 * and detecting PMTU increase will be automatically happened.
1626 */
d8d1f30b 1627 dst_set_expires(&nrt->dst, net->ipv6.sysctl.ip6_rt_mtu_expires);
a1e78363
YH
1628 nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
1629
40e22e8f 1630 ip6_ins_rt(nrt);
1da177e4 1631 }
1da177e4 1632out:
d8d1f30b 1633 dst_release(&rt->dst);
1da177e4
LT
1634}
1635
ae878ae2
1636void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr,
1637 struct net_device *dev, u32 pmtu)
1638{
1639 struct net *net = dev_net(dev);
1640
1641 /*
1642 * RFC 1981 states that a node "MUST reduce the size of the packets it
1643 * is sending along the path" that caused the Packet Too Big message.
1644 * Since it's not possible in the general case to determine which
1645 * interface was used to send the original packet, we update the MTU
1646 * on the interface that will be used to send future packets. We also
1647 * update the MTU on the interface that received the Packet Too Big in
1648 * case the original packet was forced out that interface with
1649 * SO_BINDTODEVICE or similar. This is the next best thing to the
1650 * correct behaviour, which would be to update the MTU on all
1651 * interfaces.
1652 */
1653 rt6_do_pmtu_disc(daddr, saddr, net, pmtu, 0);
1654 rt6_do_pmtu_disc(daddr, saddr, net, pmtu, dev->ifindex);
1655}
1656
1da177e4
LT
1657/*
1658 * Misc support functions
1659 */
1660
1661static struct rt6_info * ip6_rt_copy(struct rt6_info *ort)
1662{
c346dca1 1663 struct net *net = dev_net(ort->rt6i_dev);
86393e52 1664 struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops);
1da177e4
LT
1665
1666 if (rt) {
d8d1f30b
CG
1667 rt->dst.input = ort->dst.input;
1668 rt->dst.output = ort->dst.output;
1669
1670 memcpy(rt->dst.metrics, ort->dst.metrics, RTAX_MAX*sizeof(u32));
1671 rt->dst.error = ort->dst.error;
1672 rt->dst.dev = ort->dst.dev;
1673 if (rt->dst.dev)
1674 dev_hold(rt->dst.dev);
1da177e4
LT
1675 rt->rt6i_idev = ort->rt6i_idev;
1676 if (rt->rt6i_idev)
1677 in6_dev_hold(rt->rt6i_idev);
d8d1f30b 1678 rt->dst.lastuse = jiffies;
1da177e4
LT
1679 rt->rt6i_expires = 0;
1680
1681 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
1682 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
1683 rt->rt6i_metric = 0;
1684
1685 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1686#ifdef CONFIG_IPV6_SUBTREES
1687 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1688#endif
c71099ac 1689 rt->rt6i_table = ort->rt6i_table;
1da177e4
LT
1690 }
1691 return rt;
1692}
1693
70ceb4f5 1694#ifdef CONFIG_IPV6_ROUTE_INFO
efa2cea0
DL
1695static struct rt6_info *rt6_get_route_info(struct net *net,
1696 struct in6_addr *prefix, int prefixlen,
70ceb4f5
YH
1697 struct in6_addr *gwaddr, int ifindex)
1698{
1699 struct fib6_node *fn;
1700 struct rt6_info *rt = NULL;
c71099ac
TG
1701 struct fib6_table *table;
1702
efa2cea0 1703 table = fib6_get_table(net, RT6_TABLE_INFO);
c71099ac
TG
1704 if (table == NULL)
1705 return NULL;
70ceb4f5 1706
c71099ac
TG
1707 write_lock_bh(&table->tb6_lock);
1708 fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
70ceb4f5
YH
1709 if (!fn)
1710 goto out;
1711
d8d1f30b 1712 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
70ceb4f5
YH
1713 if (rt->rt6i_dev->ifindex != ifindex)
1714 continue;
1715 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1716 continue;
1717 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1718 continue;
d8d1f30b 1719 dst_hold(&rt->dst);
70ceb4f5
YH
1720 break;
1721 }
1722out:
c71099ac 1723 write_unlock_bh(&table->tb6_lock);
70ceb4f5
YH
1724 return rt;
1725}
1726
efa2cea0
DL
1727static struct rt6_info *rt6_add_route_info(struct net *net,
1728 struct in6_addr *prefix, int prefixlen,
70ceb4f5
YH
1729 struct in6_addr *gwaddr, int ifindex,
1730 unsigned pref)
1731{
86872cb5
TG
1732 struct fib6_config cfg = {
1733 .fc_table = RT6_TABLE_INFO,
238fc7ea 1734 .fc_metric = IP6_RT_PRIO_USER,
86872cb5
TG
1735 .fc_ifindex = ifindex,
1736 .fc_dst_len = prefixlen,
1737 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
1738 RTF_UP | RTF_PREF(pref),
efa2cea0
DL
1739 .fc_nlinfo.pid = 0,
1740 .fc_nlinfo.nlh = NULL,
1741 .fc_nlinfo.nl_net = net,
86872cb5
TG
1742 };
1743
1744 ipv6_addr_copy(&cfg.fc_dst, prefix);
1745 ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
70ceb4f5 1746
e317da96
YH
1747 /* We should treat it as a default route if prefix length is 0. */
1748 if (!prefixlen)
86872cb5 1749 cfg.fc_flags |= RTF_DEFAULT;
70ceb4f5 1750
86872cb5 1751 ip6_route_add(&cfg);
70ceb4f5 1752
efa2cea0 1753 return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex);
70ceb4f5
YH
1754}
1755#endif
1756
1da177e4 1757struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *dev)
1ab1457c 1758{
1da177e4 1759 struct rt6_info *rt;
c71099ac 1760 struct fib6_table *table;
1da177e4 1761
c346dca1 1762 table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT);
c71099ac
TG
1763 if (table == NULL)
1764 return NULL;
1da177e4 1765
c71099ac 1766 write_lock_bh(&table->tb6_lock);
d8d1f30b 1767 for (rt = table->tb6_root.leaf; rt; rt=rt->dst.rt6_next) {
1da177e4 1768 if (dev == rt->rt6i_dev &&
045927ff 1769 ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
1da177e4
LT
1770 ipv6_addr_equal(&rt->rt6i_gateway, addr))
1771 break;
1772 }
1773 if (rt)
d8d1f30b 1774 dst_hold(&rt->dst);
c71099ac 1775 write_unlock_bh(&table->tb6_lock);
1da177e4
LT
1776 return rt;
1777}
1778
1779struct rt6_info *rt6_add_dflt_router(struct in6_addr *gwaddr,
ebacaaa0
YH
1780 struct net_device *dev,
1781 unsigned int pref)
1da177e4 1782{
86872cb5
TG
1783 struct fib6_config cfg = {
1784 .fc_table = RT6_TABLE_DFLT,
238fc7ea 1785 .fc_metric = IP6_RT_PRIO_USER,
86872cb5
TG
1786 .fc_ifindex = dev->ifindex,
1787 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
1788 RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
5578689a
DL
1789 .fc_nlinfo.pid = 0,
1790 .fc_nlinfo.nlh = NULL,
c346dca1 1791 .fc_nlinfo.nl_net = dev_net(dev),
86872cb5 1792 };
1da177e4 1793
86872cb5 1794 ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
1da177e4 1795
86872cb5 1796 ip6_route_add(&cfg);
1da177e4 1797
1da177e4
LT
1798 return rt6_get_dflt_router(gwaddr, dev);
1799}
1800
7b4da532 1801void rt6_purge_dflt_routers(struct net *net)
1da177e4
LT
1802{
1803 struct rt6_info *rt;
c71099ac
TG
1804 struct fib6_table *table;
1805
1806 /* NOTE: Keep consistent with rt6_get_dflt_router */
7b4da532 1807 table = fib6_get_table(net, RT6_TABLE_DFLT);
c71099ac
TG
1808 if (table == NULL)
1809 return;
1da177e4
LT
1810
1811restart:
c71099ac 1812 read_lock_bh(&table->tb6_lock);
d8d1f30b 1813 for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
1da177e4 1814 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
d8d1f30b 1815 dst_hold(&rt->dst);
c71099ac 1816 read_unlock_bh(&table->tb6_lock);
e0a1ad73 1817 ip6_del_rt(rt);
1da177e4
LT
1818 goto restart;
1819 }
1820 }
c71099ac 1821 read_unlock_bh(&table->tb6_lock);
1da177e4
LT
1822}
1823
5578689a
DL
1824static void rtmsg_to_fib6_config(struct net *net,
1825 struct in6_rtmsg *rtmsg,
86872cb5
TG
1826 struct fib6_config *cfg)
1827{
1828 memset(cfg, 0, sizeof(*cfg));
1829
1830 cfg->fc_table = RT6_TABLE_MAIN;
1831 cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
1832 cfg->fc_metric = rtmsg->rtmsg_metric;
1833 cfg->fc_expires = rtmsg->rtmsg_info;
1834 cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
1835 cfg->fc_src_len = rtmsg->rtmsg_src_len;
1836 cfg->fc_flags = rtmsg->rtmsg_flags;
1837
5578689a 1838 cfg->fc_nlinfo.nl_net = net;
f1243c2d 1839
86872cb5
TG
1840 ipv6_addr_copy(&cfg->fc_dst, &rtmsg->rtmsg_dst);
1841 ipv6_addr_copy(&cfg->fc_src, &rtmsg->rtmsg_src);
1842 ipv6_addr_copy(&cfg->fc_gateway, &rtmsg->rtmsg_gateway);
1843}
1844
5578689a 1845int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
1da177e4 1846{
86872cb5 1847 struct fib6_config cfg;
1da177e4
LT
1848 struct in6_rtmsg rtmsg;
1849 int err;
1850
1851 switch(cmd) {
1852 case SIOCADDRT: /* Add a route */
1853 case SIOCDELRT: /* Delete a route */
1854 if (!capable(CAP_NET_ADMIN))
1855 return -EPERM;
1856 err = copy_from_user(&rtmsg, arg,
1857 sizeof(struct in6_rtmsg));
1858 if (err)
1859 return -EFAULT;
86872cb5 1860
5578689a 1861 rtmsg_to_fib6_config(net, &rtmsg, &cfg);
86872cb5 1862
1da177e4
LT
1863 rtnl_lock();
1864 switch (cmd) {
1865 case SIOCADDRT:
86872cb5 1866 err = ip6_route_add(&cfg);
1da177e4
LT
1867 break;
1868 case SIOCDELRT:
86872cb5 1869 err = ip6_route_del(&cfg);
1da177e4
LT
1870 break;
1871 default:
1872 err = -EINVAL;
1873 }
1874 rtnl_unlock();
1875
1876 return err;
3ff50b79 1877 }
1da177e4
LT
1878
1879 return -EINVAL;
1880}
1881
1882/*
1883 * Drop the packet on the floor
1884 */
1885
d5fdd6ba 1886static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
1da177e4 1887{
612f09e8 1888 int type;
adf30907 1889 struct dst_entry *dst = skb_dst(skb);
612f09e8
YH
1890 switch (ipstats_mib_noroutes) {
1891 case IPSTATS_MIB_INNOROUTES:
0660e03f 1892 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
45bb0060 1893 if (type == IPV6_ADDR_ANY) {
3bd653c8
DL
1894 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
1895 IPSTATS_MIB_INADDRERRORS);
612f09e8
YH
1896 break;
1897 }
1898 /* FALLTHROUGH */
1899 case IPSTATS_MIB_OUTNOROUTES:
3bd653c8
DL
1900 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
1901 ipstats_mib_noroutes);
612f09e8
YH
1902 break;
1903 }
3ffe533c 1904 icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
1da177e4
LT
1905 kfree_skb(skb);
1906 return 0;
1907}
1908
9ce8ade0
TG
1909static int ip6_pkt_discard(struct sk_buff *skb)
1910{
612f09e8 1911 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
9ce8ade0
TG
1912}
1913
20380731 1914static int ip6_pkt_discard_out(struct sk_buff *skb)
1da177e4 1915{
adf30907 1916 skb->dev = skb_dst(skb)->dev;
612f09e8 1917 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
1da177e4
LT
1918}
1919
6723ab54
DM
1920#ifdef CONFIG_IPV6_MULTIPLE_TABLES
1921
9ce8ade0
TG
1922static int ip6_pkt_prohibit(struct sk_buff *skb)
1923{
612f09e8 1924 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
9ce8ade0
TG
1925}
1926
1927static int ip6_pkt_prohibit_out(struct sk_buff *skb)
1928{
adf30907 1929 skb->dev = skb_dst(skb)->dev;
612f09e8 1930 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
9ce8ade0
TG
1931}
1932
6723ab54
DM
1933#endif
1934
1da177e4
LT
1935/*
1936 * Allocate a dst for local (unicast / anycast) address.
1937 */
1938
1939struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
1940 const struct in6_addr *addr,
1941 int anycast)
1942{
c346dca1 1943 struct net *net = dev_net(idev->dev);
86393e52 1944 struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops);
14deae41 1945 struct neighbour *neigh;
1da177e4
LT
1946
1947 if (rt == NULL)
1948 return ERR_PTR(-ENOMEM);
1949
5578689a 1950 dev_hold(net->loopback_dev);
1da177e4
LT
1951 in6_dev_hold(idev);
1952
d8d1f30b
CG
1953 rt->dst.flags = DST_HOST;
1954 rt->dst.input = ip6_input;
1955 rt->dst.output = ip6_output;
5578689a 1956 rt->rt6i_dev = net->loopback_dev;
1da177e4 1957 rt->rt6i_idev = idev;
d8d1f30b
CG
1958 rt->dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
1959 rt->dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, dst_mtu(&rt->dst));
1960 rt->dst.metrics[RTAX_HOPLIMIT-1] = -1;
1961 rt->dst.obsolete = -1;
1da177e4
LT
1962
1963 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
58c4fb86
YH
1964 if (anycast)
1965 rt->rt6i_flags |= RTF_ANYCAST;
1966 else
1da177e4 1967 rt->rt6i_flags |= RTF_LOCAL;
14deae41
DM
1968 neigh = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
1969 if (IS_ERR(neigh)) {
d8d1f30b 1970 dst_free(&rt->dst);
14deae41
DM
1971
1972 /* We are casting this because that is the return
1973 * value type. But an errno encoded pointer is the
1974 * same regardless of the underlying pointer type,
1975 * and that's what we are returning. So this is OK.
1976 */
1977 return (struct rt6_info *) neigh;
1da177e4 1978 }
14deae41 1979 rt->rt6i_nexthop = neigh;
1da177e4
LT
1980
1981 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
1982 rt->rt6i_dst.plen = 128;
5578689a 1983 rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL);
1da177e4 1984
d8d1f30b 1985 atomic_set(&rt->dst.__refcnt, 1);
1da177e4
LT
1986
1987 return rt;
1988}
1989
8ed67789
DL
1990struct arg_dev_net {
1991 struct net_device *dev;
1992 struct net *net;
1993};
1994
1da177e4
LT
1995static int fib6_ifdown(struct rt6_info *rt, void *arg)
1996{
8ed67789
DL
1997 struct net_device *dev = ((struct arg_dev_net *)arg)->dev;
1998 struct net *net = ((struct arg_dev_net *)arg)->net;
1999
2000 if (((void *)rt->rt6i_dev == dev || dev == NULL) &&
2001 rt != net->ipv6.ip6_null_entry) {
1da177e4
LT
2002 RT6_TRACE("deleted by ifdown %p\n", rt);
2003 return -1;
2004 }
2005 return 0;
2006}
2007
f3db4851 2008void rt6_ifdown(struct net *net, struct net_device *dev)
1da177e4 2009{
8ed67789
DL
2010 struct arg_dev_net adn = {
2011 .dev = dev,
2012 .net = net,
2013 };
2014
2015 fib6_clean_all(net, fib6_ifdown, 0, &adn);
1e493d19 2016 icmp6_clean_all(fib6_ifdown, &adn);
1da177e4
LT
2017}
2018
2019struct rt6_mtu_change_arg
2020{
2021 struct net_device *dev;
2022 unsigned mtu;
2023};
2024
2025static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
2026{
2027 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
2028 struct inet6_dev *idev;
c346dca1 2029 struct net *net = dev_net(arg->dev);
1da177e4
LT
2030
2031 /* In IPv6 pmtu discovery is not optional,
2032 so that RTAX_MTU lock cannot disable it.
2033 We still use this lock to block changes
2034 caused by addrconf/ndisc.
2035 */
2036
2037 idev = __in6_dev_get(arg->dev);
2038 if (idev == NULL)
2039 return 0;
2040
2041 /* For administrative MTU increase, there is no way to discover
2042 IPv6 PMTU increase, so PMTU increase should be updated here.
2043 Since RFC 1981 doesn't include administrative MTU increase
2044 update PMTU increase is a MUST. (i.e. jumbo frame)
2045 */
2046 /*
2047 If new MTU is less than route PMTU, this new MTU will be the
2048 lowest MTU in the path, update the route PMTU to reflect PMTU
2049 decreases; if new MTU is greater than route PMTU, and the
2050 old MTU is the lowest MTU in the path, update the route PMTU
2051 to reflect the increase. In this case if the other nodes' MTU
2052 also have the lowest MTU, TOO BIG MESSAGE will be lead to
2053 PMTU discouvery.
2054 */
2055 if (rt->rt6i_dev == arg->dev &&
d8d1f30b
CG
2056 !dst_metric_locked(&rt->dst, RTAX_MTU) &&
2057 (dst_mtu(&rt->dst) >= arg->mtu ||
2058 (dst_mtu(&rt->dst) < arg->mtu &&
2059 dst_mtu(&rt->dst) == idev->cnf.mtu6))) {
2060 rt->dst.metrics[RTAX_MTU-1] = arg->mtu;
2061 rt->dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, arg->mtu);
566cfd8f 2062 }
1da177e4
LT
2063 return 0;
2064}
2065
2066void rt6_mtu_change(struct net_device *dev, unsigned mtu)
2067{
c71099ac
TG
2068 struct rt6_mtu_change_arg arg = {
2069 .dev = dev,
2070 .mtu = mtu,
2071 };
1da177e4 2072
c346dca1 2073 fib6_clean_all(dev_net(dev), rt6_mtu_change_route, 0, &arg);
1da177e4
LT
2074}
2075
ef7c79ed 2076static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
5176f91e 2077 [RTA_GATEWAY] = { .len = sizeof(struct in6_addr) },
86872cb5 2078 [RTA_OIF] = { .type = NLA_U32 },
ab364a6f 2079 [RTA_IIF] = { .type = NLA_U32 },
86872cb5
TG
2080 [RTA_PRIORITY] = { .type = NLA_U32 },
2081 [RTA_METRICS] = { .type = NLA_NESTED },
2082};
2083
2084static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2085 struct fib6_config *cfg)
1da177e4 2086{
86872cb5
TG
2087 struct rtmsg *rtm;
2088 struct nlattr *tb[RTA_MAX+1];
2089 int err;
1da177e4 2090
86872cb5
TG
2091 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2092 if (err < 0)
2093 goto errout;
1da177e4 2094
86872cb5
TG
2095 err = -EINVAL;
2096 rtm = nlmsg_data(nlh);
2097 memset(cfg, 0, sizeof(*cfg));
2098
2099 cfg->fc_table = rtm->rtm_table;
2100 cfg->fc_dst_len = rtm->rtm_dst_len;
2101 cfg->fc_src_len = rtm->rtm_src_len;
2102 cfg->fc_flags = RTF_UP;
2103 cfg->fc_protocol = rtm->rtm_protocol;
2104
2105 if (rtm->rtm_type == RTN_UNREACHABLE)
2106 cfg->fc_flags |= RTF_REJECT;
2107
ab79ad14
2108 if (rtm->rtm_type == RTN_LOCAL)
2109 cfg->fc_flags |= RTF_LOCAL;
2110
86872cb5
TG
2111 cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
2112 cfg->fc_nlinfo.nlh = nlh;
3b1e0a65 2113 cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
86872cb5
TG
2114
2115 if (tb[RTA_GATEWAY]) {
2116 nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
2117 cfg->fc_flags |= RTF_GATEWAY;
1da177e4 2118 }
86872cb5
TG
2119
2120 if (tb[RTA_DST]) {
2121 int plen = (rtm->rtm_dst_len + 7) >> 3;
2122
2123 if (nla_len(tb[RTA_DST]) < plen)
2124 goto errout;
2125
2126 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
1da177e4 2127 }
86872cb5
TG
2128
2129 if (tb[RTA_SRC]) {
2130 int plen = (rtm->rtm_src_len + 7) >> 3;
2131
2132 if (nla_len(tb[RTA_SRC]) < plen)
2133 goto errout;
2134
2135 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
1da177e4 2136 }
86872cb5
TG
2137
2138 if (tb[RTA_OIF])
2139 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2140
2141 if (tb[RTA_PRIORITY])
2142 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2143
2144 if (tb[RTA_METRICS]) {
2145 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2146 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
1da177e4 2147 }
86872cb5
TG
2148
2149 if (tb[RTA_TABLE])
2150 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2151
2152 err = 0;
2153errout:
2154 return err;
1da177e4
LT
2155}
2156
c127ea2c 2157static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1da177e4 2158{
86872cb5
TG
2159 struct fib6_config cfg;
2160 int err;
1da177e4 2161
86872cb5
TG
2162 err = rtm_to_fib6_config(skb, nlh, &cfg);
2163 if (err < 0)
2164 return err;
2165
2166 return ip6_route_del(&cfg);
1da177e4
LT
2167}
2168
c127ea2c 2169static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1da177e4 2170{
86872cb5
TG
2171 struct fib6_config cfg;
2172 int err;
1da177e4 2173
86872cb5
TG
2174 err = rtm_to_fib6_config(skb, nlh, &cfg);
2175 if (err < 0)
2176 return err;
2177
2178 return ip6_route_add(&cfg);
1da177e4
LT
2179}
2180
339bf98f
TG
2181static inline size_t rt6_nlmsg_size(void)
2182{
2183 return NLMSG_ALIGN(sizeof(struct rtmsg))
2184 + nla_total_size(16) /* RTA_SRC */
2185 + nla_total_size(16) /* RTA_DST */
2186 + nla_total_size(16) /* RTA_GATEWAY */
2187 + nla_total_size(16) /* RTA_PREFSRC */
2188 + nla_total_size(4) /* RTA_TABLE */
2189 + nla_total_size(4) /* RTA_IIF */
2190 + nla_total_size(4) /* RTA_OIF */
2191 + nla_total_size(4) /* RTA_PRIORITY */
6a2b9ce0 2192 + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
339bf98f
TG
2193 + nla_total_size(sizeof(struct rta_cacheinfo));
2194}
2195
191cd582
BH
2196static int rt6_fill_node(struct net *net,
2197 struct sk_buff *skb, struct rt6_info *rt,
0d51aa80
JHS
2198 struct in6_addr *dst, struct in6_addr *src,
2199 int iif, int type, u32 pid, u32 seq,
7bc570c8 2200 int prefix, int nowait, unsigned int flags)
1da177e4
LT
2201{
2202 struct rtmsg *rtm;
2d7202bf 2203 struct nlmsghdr *nlh;
e3703b3d 2204 long expires;
9e762a4a 2205 u32 table;
1da177e4
LT
2206
2207 if (prefix) { /* user wants prefix routes only */
2208 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
2209 /* success since this is not a prefix route */
2210 return 1;
2211 }
2212 }
2213
2d7202bf
TG
2214 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*rtm), flags);
2215 if (nlh == NULL)
26932566 2216 return -EMSGSIZE;
2d7202bf
TG
2217
2218 rtm = nlmsg_data(nlh);
1da177e4
LT
2219 rtm->rtm_family = AF_INET6;
2220 rtm->rtm_dst_len = rt->rt6i_dst.plen;
2221 rtm->rtm_src_len = rt->rt6i_src.plen;
2222 rtm->rtm_tos = 0;
c71099ac 2223 if (rt->rt6i_table)
9e762a4a 2224 table = rt->rt6i_table->tb6_id;
c71099ac 2225 else
9e762a4a
PM
2226 table = RT6_TABLE_UNSPEC;
2227 rtm->rtm_table = table;
2d7202bf 2228 NLA_PUT_U32(skb, RTA_TABLE, table);
1da177e4
LT
2229 if (rt->rt6i_flags&RTF_REJECT)
2230 rtm->rtm_type = RTN_UNREACHABLE;
ab79ad14
2231 else if (rt->rt6i_flags&RTF_LOCAL)
2232 rtm->rtm_type = RTN_LOCAL;
1da177e4
LT
2233 else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK))
2234 rtm->rtm_type = RTN_LOCAL;
2235 else
2236 rtm->rtm_type = RTN_UNICAST;
2237 rtm->rtm_flags = 0;
2238 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2239 rtm->rtm_protocol = rt->rt6i_protocol;
2240 if (rt->rt6i_flags&RTF_DYNAMIC)
2241 rtm->rtm_protocol = RTPROT_REDIRECT;
2242 else if (rt->rt6i_flags & RTF_ADDRCONF)
2243 rtm->rtm_protocol = RTPROT_KERNEL;
2244 else if (rt->rt6i_flags&RTF_DEFAULT)
2245 rtm->rtm_protocol = RTPROT_RA;
2246
2247 if (rt->rt6i_flags&RTF_CACHE)
2248 rtm->rtm_flags |= RTM_F_CLONED;
2249
2250 if (dst) {
2d7202bf 2251 NLA_PUT(skb, RTA_DST, 16, dst);
1ab1457c 2252 rtm->rtm_dst_len = 128;
1da177e4 2253 } else if (rtm->rtm_dst_len)
2d7202bf 2254 NLA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr);
1da177e4
LT
2255#ifdef CONFIG_IPV6_SUBTREES
2256 if (src) {
2d7202bf 2257 NLA_PUT(skb, RTA_SRC, 16, src);
1ab1457c 2258 rtm->rtm_src_len = 128;
1da177e4 2259 } else if (rtm->rtm_src_len)
2d7202bf 2260 NLA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr);
1da177e4 2261#endif
7bc570c8
YH
2262 if (iif) {
2263#ifdef CONFIG_IPV6_MROUTE
2264 if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
8229efda 2265 int err = ip6mr_get_route(net, skb, rtm, nowait);
7bc570c8
YH
2266 if (err <= 0) {
2267 if (!nowait) {
2268 if (err == 0)
2269 return 0;
2270 goto nla_put_failure;
2271 } else {
2272 if (err == -EMSGSIZE)
2273 goto nla_put_failure;
2274 }
2275 }
2276 } else
2277#endif
2278 NLA_PUT_U32(skb, RTA_IIF, iif);
2279 } else if (dst) {
d8d1f30b 2280 struct inet6_dev *idev = ip6_dst_idev(&rt->dst);
1da177e4 2281 struct in6_addr saddr_buf;
191cd582 2282 if (ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
7cbca67c 2283 dst, 0, &saddr_buf) == 0)
2d7202bf 2284 NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
1da177e4 2285 }
2d7202bf 2286
d8d1f30b 2287 if (rtnetlink_put_metrics(skb, rt->dst.metrics) < 0)
2d7202bf
TG
2288 goto nla_put_failure;
2289
d8d1f30b
CG
2290 if (rt->dst.neighbour)
2291 NLA_PUT(skb, RTA_GATEWAY, 16, &rt->dst.neighbour->primary_key);
2d7202bf 2292
d8d1f30b 2293 if (rt->dst.dev)
2d7202bf
TG
2294 NLA_PUT_U32(skb, RTA_OIF, rt->rt6i_dev->ifindex);
2295
2296 NLA_PUT_U32(skb, RTA_PRIORITY, rt->rt6i_metric);
e3703b3d 2297
36e3deae
YH
2298 if (!(rt->rt6i_flags & RTF_EXPIRES))
2299 expires = 0;
2300 else if (rt->rt6i_expires - jiffies < INT_MAX)
2301 expires = rt->rt6i_expires - jiffies;
2302 else
2303 expires = INT_MAX;
69cdf8f9 2304
d8d1f30b
CG
2305 if (rtnl_put_cacheinfo(skb, &rt->dst, 0, 0, 0,
2306 expires, rt->dst.error) < 0)
e3703b3d 2307 goto nla_put_failure;
2d7202bf
TG
2308
2309 return nlmsg_end(skb, nlh);
2310
2311nla_put_failure:
26932566
PM
2312 nlmsg_cancel(skb, nlh);
2313 return -EMSGSIZE;
1da177e4
LT
2314}
2315
1b43af54 2316int rt6_dump_route(struct rt6_info *rt, void *p_arg)
1da177e4
LT
2317{
2318 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
2319 int prefix;
2320
2d7202bf
TG
2321 if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
2322 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
1da177e4
LT
2323 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
2324 } else
2325 prefix = 0;
2326
191cd582
BH
2327 return rt6_fill_node(arg->net,
2328 arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
1da177e4 2329 NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
7bc570c8 2330 prefix, 0, NLM_F_MULTI);
1da177e4
LT
2331}
2332
c127ea2c 2333static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
1da177e4 2334{
3b1e0a65 2335 struct net *net = sock_net(in_skb->sk);
ab364a6f
TG
2336 struct nlattr *tb[RTA_MAX+1];
2337 struct rt6_info *rt;
1da177e4 2338 struct sk_buff *skb;
ab364a6f 2339 struct rtmsg *rtm;
1da177e4 2340 struct flowi fl;
ab364a6f 2341 int err, iif = 0;
1da177e4 2342
ab364a6f
TG
2343 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2344 if (err < 0)
2345 goto errout;
1da177e4 2346
ab364a6f 2347 err = -EINVAL;
1da177e4 2348 memset(&fl, 0, sizeof(fl));
1da177e4 2349
ab364a6f
TG
2350 if (tb[RTA_SRC]) {
2351 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2352 goto errout;
2353
2354 ipv6_addr_copy(&fl.fl6_src, nla_data(tb[RTA_SRC]));
2355 }
2356
2357 if (tb[RTA_DST]) {
2358 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2359 goto errout;
2360
2361 ipv6_addr_copy(&fl.fl6_dst, nla_data(tb[RTA_DST]));
2362 }
2363
2364 if (tb[RTA_IIF])
2365 iif = nla_get_u32(tb[RTA_IIF]);
2366
2367 if (tb[RTA_OIF])
2368 fl.oif = nla_get_u32(tb[RTA_OIF]);
1da177e4
LT
2369
2370 if (iif) {
2371 struct net_device *dev;
5578689a 2372 dev = __dev_get_by_index(net, iif);
1da177e4
LT
2373 if (!dev) {
2374 err = -ENODEV;
ab364a6f 2375 goto errout;
1da177e4
LT
2376 }
2377 }
2378
ab364a6f
TG
2379 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
2380 if (skb == NULL) {
2381 err = -ENOBUFS;
2382 goto errout;
2383 }
1da177e4 2384
ab364a6f
TG
2385 /* Reserve room for dummy headers, this skb can pass
2386 through good chunk of routing engine.
2387 */
459a98ed 2388 skb_reset_mac_header(skb);
ab364a6f 2389 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
1da177e4 2390
8a3edd80 2391 rt = (struct rt6_info*) ip6_route_output(net, NULL, &fl);
d8d1f30b 2392 skb_dst_set(skb, &rt->dst);
1da177e4 2393
191cd582 2394 err = rt6_fill_node(net, skb, rt, &fl.fl6_dst, &fl.fl6_src, iif,
1da177e4 2395 RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
7bc570c8 2396 nlh->nlmsg_seq, 0, 0, 0);
1da177e4 2397 if (err < 0) {
ab364a6f
TG
2398 kfree_skb(skb);
2399 goto errout;
1da177e4
LT
2400 }
2401
5578689a 2402 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).pid);
ab364a6f 2403errout:
1da177e4 2404 return err;
1da177e4
LT
2405}
2406
86872cb5 2407void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
1da177e4
LT
2408{
2409 struct sk_buff *skb;
5578689a 2410 struct net *net = info->nl_net;
528c4ceb
DL
2411 u32 seq;
2412 int err;
2413
2414 err = -ENOBUFS;
2415 seq = info->nlh != NULL ? info->nlh->nlmsg_seq : 0;
86872cb5 2416
339bf98f 2417 skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
21713ebc
TG
2418 if (skb == NULL)
2419 goto errout;
2420
191cd582 2421 err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
7bc570c8 2422 event, info->pid, seq, 0, 0, 0);
26932566
PM
2423 if (err < 0) {
2424 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
2425 WARN_ON(err == -EMSGSIZE);
2426 kfree_skb(skb);
2427 goto errout;
2428 }
1ce85fe4
PNA
2429 rtnl_notify(skb, net, info->pid, RTNLGRP_IPV6_ROUTE,
2430 info->nlh, gfp_any());
2431 return;
21713ebc
TG
2432errout:
2433 if (err < 0)
5578689a 2434 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
1da177e4
LT
2435}
2436
8ed67789
DL
2437static int ip6_route_dev_notify(struct notifier_block *this,
2438 unsigned long event, void *data)
2439{
2440 struct net_device *dev = (struct net_device *)data;
c346dca1 2441 struct net *net = dev_net(dev);
8ed67789
DL
2442
2443 if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
d8d1f30b 2444 net->ipv6.ip6_null_entry->dst.dev = dev;
8ed67789
DL
2445 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
2446#ifdef CONFIG_IPV6_MULTIPLE_TABLES
d8d1f30b 2447 net->ipv6.ip6_prohibit_entry->dst.dev = dev;
8ed67789 2448 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
d8d1f30b 2449 net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
8ed67789
DL
2450 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
2451#endif
2452 }
2453
2454 return NOTIFY_OK;
2455}
2456
1da177e4
LT
2457/*
2458 * /proc
2459 */
2460
2461#ifdef CONFIG_PROC_FS
2462
2463#define RT6_INFO_LEN (32 + 4 + 32 + 4 + 32 + 40 + 5 + 1)
2464
2465struct rt6_proc_arg
2466{
2467 char *buffer;
2468 int offset;
2469 int length;
2470 int skip;
2471 int len;
2472};
2473
2474static int rt6_info_route(struct rt6_info *rt, void *p_arg)
2475{
33120b30 2476 struct seq_file *m = p_arg;
1da177e4 2477
4b7a4274 2478 seq_printf(m, "%pi6 %02x ", &rt->rt6i_dst.addr, rt->rt6i_dst.plen);
1da177e4
LT
2479
2480#ifdef CONFIG_IPV6_SUBTREES
4b7a4274 2481 seq_printf(m, "%pi6 %02x ", &rt->rt6i_src.addr, rt->rt6i_src.plen);
1da177e4 2482#else
33120b30 2483 seq_puts(m, "00000000000000000000000000000000 00 ");
1da177e4
LT
2484#endif
2485
2486 if (rt->rt6i_nexthop) {
4b7a4274 2487 seq_printf(m, "%pi6", rt->rt6i_nexthop->primary_key);
1da177e4 2488 } else {
33120b30 2489 seq_puts(m, "00000000000000000000000000000000");
1da177e4 2490 }
33120b30 2491 seq_printf(m, " %08x %08x %08x %08x %8s\n",
d8d1f30b
CG
2492 rt->rt6i_metric, atomic_read(&rt->dst.__refcnt),
2493 rt->dst.__use, rt->rt6i_flags,
33120b30 2494 rt->rt6i_dev ? rt->rt6i_dev->name : "");
1da177e4
LT
2495 return 0;
2496}
2497
33120b30 2498static int ipv6_route_show(struct seq_file *m, void *v)
1da177e4 2499{
f3db4851
DL
2500 struct net *net = (struct net *)m->private;
2501 fib6_clean_all(net, rt6_info_route, 0, m);
33120b30
AD
2502 return 0;
2503}
1da177e4 2504
33120b30
AD
2505static int ipv6_route_open(struct inode *inode, struct file *file)
2506{
de05c557 2507 return single_open_net(inode, file, ipv6_route_show);
f3db4851
DL
2508}
2509
33120b30
AD
2510static const struct file_operations ipv6_route_proc_fops = {
2511 .owner = THIS_MODULE,
2512 .open = ipv6_route_open,
2513 .read = seq_read,
2514 .llseek = seq_lseek,
b6fcbdb4 2515 .release = single_release_net,
33120b30
AD
2516};
2517
1da177e4
LT
2518static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2519{
69ddb805 2520 struct net *net = (struct net *)seq->private;
1da177e4 2521 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
69ddb805
DL
2522 net->ipv6.rt6_stats->fib_nodes,
2523 net->ipv6.rt6_stats->fib_route_nodes,
2524 net->ipv6.rt6_stats->fib_rt_alloc,
2525 net->ipv6.rt6_stats->fib_rt_entries,
2526 net->ipv6.rt6_stats->fib_rt_cache,
86393e52 2527 atomic_read(&net->ipv6.ip6_dst_ops.entries),
69ddb805 2528 net->ipv6.rt6_stats->fib_discarded_routes);
1da177e4
LT
2529
2530 return 0;
2531}
2532
2533static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2534{
de05c557 2535 return single_open_net(inode, file, rt6_stats_seq_show);
69ddb805
DL
2536}
2537
9a32144e 2538static const struct file_operations rt6_stats_seq_fops = {
1da177e4
LT
2539 .owner = THIS_MODULE,
2540 .open = rt6_stats_seq_open,
2541 .read = seq_read,
2542 .llseek = seq_lseek,
b6fcbdb4 2543 .release = single_release_net,
1da177e4
LT
2544};
2545#endif /* CONFIG_PROC_FS */
2546
2547#ifdef CONFIG_SYSCTL
2548
1da177e4 2549static
8d65af78 2550int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write,
1da177e4
LT
2551 void __user *buffer, size_t *lenp, loff_t *ppos)
2552{
5b7c931d
DL
2553 struct net *net = current->nsproxy->net_ns;
2554 int delay = net->ipv6.sysctl.flush_delay;
1da177e4 2555 if (write) {
8d65af78 2556 proc_dointvec(ctl, write, buffer, lenp, ppos);
5b7c931d 2557 fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay, net);
1da177e4
LT
2558 return 0;
2559 } else
2560 return -EINVAL;
2561}
2562
760f2d01 2563ctl_table ipv6_route_table_template[] = {
1ab1457c 2564 {
1da177e4 2565 .procname = "flush",
4990509f 2566 .data = &init_net.ipv6.sysctl.flush_delay,
1da177e4 2567 .maxlen = sizeof(int),
89c8b3a1 2568 .mode = 0200,
6d9f239a 2569 .proc_handler = ipv6_sysctl_rtcache_flush
1da177e4
LT
2570 },
2571 {
1da177e4 2572 .procname = "gc_thresh",
9a7ec3a9 2573 .data = &ip6_dst_ops_template.gc_thresh,
1da177e4
LT
2574 .maxlen = sizeof(int),
2575 .mode = 0644,
6d9f239a 2576 .proc_handler = proc_dointvec,
1da177e4
LT
2577 },
2578 {
1da177e4 2579 .procname = "max_size",
4990509f 2580 .data = &init_net.ipv6.sysctl.ip6_rt_max_size,
1da177e4
LT
2581 .maxlen = sizeof(int),
2582 .mode = 0644,
6d9f239a 2583 .proc_handler = proc_dointvec,
1da177e4
LT
2584 },
2585 {
1da177e4 2586 .procname = "gc_min_interval",
4990509f 2587 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
1da177e4
LT
2588 .maxlen = sizeof(int),
2589 .mode = 0644,
6d9f239a 2590 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
2591 },
2592 {
1da177e4 2593 .procname = "gc_timeout",
4990509f 2594 .data = &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
1da177e4
LT
2595 .maxlen = sizeof(int),
2596 .mode = 0644,
6d9f239a 2597 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
2598 },
2599 {
1da177e4 2600 .procname = "gc_interval",
4990509f 2601 .data = &init_net.ipv6.sysctl.ip6_rt_gc_interval,
1da177e4
LT
2602 .maxlen = sizeof(int),
2603 .mode = 0644,
6d9f239a 2604 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
2605 },
2606 {
1da177e4 2607 .procname = "gc_elasticity",
4990509f 2608 .data = &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
1da177e4
LT
2609 .maxlen = sizeof(int),
2610 .mode = 0644,
f3d3f616 2611 .proc_handler = proc_dointvec,
1da177e4
LT
2612 },
2613 {
1da177e4 2614 .procname = "mtu_expires",
4990509f 2615 .data = &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
1da177e4
LT
2616 .maxlen = sizeof(int),
2617 .mode = 0644,
6d9f239a 2618 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
2619 },
2620 {
1da177e4 2621 .procname = "min_adv_mss",
4990509f 2622 .data = &init_net.ipv6.sysctl.ip6_rt_min_advmss,
1da177e4
LT
2623 .maxlen = sizeof(int),
2624 .mode = 0644,
f3d3f616 2625 .proc_handler = proc_dointvec,
1da177e4
LT
2626 },
2627 {
1da177e4 2628 .procname = "gc_min_interval_ms",
4990509f 2629 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
1da177e4
LT
2630 .maxlen = sizeof(int),
2631 .mode = 0644,
6d9f239a 2632 .proc_handler = proc_dointvec_ms_jiffies,
1da177e4 2633 },
f8572d8f 2634 { }
1da177e4
LT
2635};
2636
2c8c1e72 2637struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
760f2d01
DL
2638{
2639 struct ctl_table *table;
2640
2641 table = kmemdup(ipv6_route_table_template,
2642 sizeof(ipv6_route_table_template),
2643 GFP_KERNEL);
5ee09105
YH
2644
2645 if (table) {
2646 table[0].data = &net->ipv6.sysctl.flush_delay;
86393e52 2647 table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
5ee09105
YH
2648 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
2649 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2650 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
2651 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
2652 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
2653 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
2654 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
9c69fabe 2655 table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
5ee09105
YH
2656 }
2657
760f2d01
DL
2658 return table;
2659}
1da177e4
LT
2660#endif
2661
2c8c1e72 2662static int __net_init ip6_route_net_init(struct net *net)
cdb18761 2663{
633d424b 2664 int ret = -ENOMEM;
8ed67789 2665
86393e52
AD
2666 memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
2667 sizeof(net->ipv6.ip6_dst_ops));
f2fc6a54 2668
8ed67789
DL
2669 net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
2670 sizeof(*net->ipv6.ip6_null_entry),
2671 GFP_KERNEL);
2672 if (!net->ipv6.ip6_null_entry)
f2fc6a54 2673 goto out_ip6_dst_ops;
d8d1f30b 2674 net->ipv6.ip6_null_entry->dst.path =
8ed67789 2675 (struct dst_entry *)net->ipv6.ip6_null_entry;
d8d1f30b 2676 net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
8ed67789
DL
2677
2678#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2679 net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
2680 sizeof(*net->ipv6.ip6_prohibit_entry),
2681 GFP_KERNEL);
68fffc67
PZ
2682 if (!net->ipv6.ip6_prohibit_entry)
2683 goto out_ip6_null_entry;
d8d1f30b 2684 net->ipv6.ip6_prohibit_entry->dst.path =
8ed67789 2685 (struct dst_entry *)net->ipv6.ip6_prohibit_entry;
d8d1f30b 2686 net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
8ed67789
DL
2687
2688 net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
2689 sizeof(*net->ipv6.ip6_blk_hole_entry),
2690 GFP_KERNEL);
68fffc67
PZ
2691 if (!net->ipv6.ip6_blk_hole_entry)
2692 goto out_ip6_prohibit_entry;
d8d1f30b 2693 net->ipv6.ip6_blk_hole_entry->dst.path =
8ed67789 2694 (struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
d8d1f30b 2695 net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
8ed67789
DL
2696#endif
2697
b339a47c
PZ
2698 net->ipv6.sysctl.flush_delay = 0;
2699 net->ipv6.sysctl.ip6_rt_max_size = 4096;
2700 net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
2701 net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
2702 net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
2703 net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
2704 net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
2705 net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
2706
cdb18761
DL
2707#ifdef CONFIG_PROC_FS
2708 proc_net_fops_create(net, "ipv6_route", 0, &ipv6_route_proc_fops);
2709 proc_net_fops_create(net, "rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2710#endif
6891a346
BT
2711 net->ipv6.ip6_rt_gc_expire = 30*HZ;
2712
8ed67789
DL
2713 ret = 0;
2714out:
2715 return ret;
f2fc6a54 2716
68fffc67
PZ
2717#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2718out_ip6_prohibit_entry:
2719 kfree(net->ipv6.ip6_prohibit_entry);
2720out_ip6_null_entry:
2721 kfree(net->ipv6.ip6_null_entry);
2722#endif
f2fc6a54 2723out_ip6_dst_ops:
f2fc6a54 2724 goto out;
cdb18761
DL
2725}
2726
2c8c1e72 2727static void __net_exit ip6_route_net_exit(struct net *net)
cdb18761
DL
2728{
2729#ifdef CONFIG_PROC_FS
2730 proc_net_remove(net, "ipv6_route");
2731 proc_net_remove(net, "rt6_stats");
2732#endif
8ed67789
DL
2733 kfree(net->ipv6.ip6_null_entry);
2734#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2735 kfree(net->ipv6.ip6_prohibit_entry);
2736 kfree(net->ipv6.ip6_blk_hole_entry);
2737#endif
cdb18761
DL
2738}
2739
2740static struct pernet_operations ip6_route_net_ops = {
2741 .init = ip6_route_net_init,
2742 .exit = ip6_route_net_exit,
2743};
2744
8ed67789
DL
2745static struct notifier_block ip6_route_dev_notifier = {
2746 .notifier_call = ip6_route_dev_notify,
2747 .priority = 0,
2748};
2749
433d49c3 2750int __init ip6_route_init(void)
1da177e4 2751{
433d49c3
DL
2752 int ret;
2753
9a7ec3a9
DL
2754 ret = -ENOMEM;
2755 ip6_dst_ops_template.kmem_cachep =
e5d679f3 2756 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
f845ab6b 2757 SLAB_HWCACHE_ALIGN, NULL);
9a7ec3a9 2758 if (!ip6_dst_ops_template.kmem_cachep)
c19a28e1 2759 goto out;
14e50e57 2760
8ed67789
DL
2761 ret = register_pernet_subsys(&ip6_route_net_ops);
2762 if (ret)
bdb3289f 2763 goto out_kmem_cache;
bdb3289f 2764
5dc121e9
AE
2765 ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
2766
8ed67789
DL
2767 /* Registering of the loopback is done before this portion of code,
2768 * the loopback reference in rt6_info will not be taken, do it
2769 * manually for init_net */
d8d1f30b 2770 init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
8ed67789
DL
2771 init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2772 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
d8d1f30b 2773 init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
8ed67789 2774 init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
d8d1f30b 2775 init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
8ed67789
DL
2776 init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2777 #endif
433d49c3
DL
2778 ret = fib6_init();
2779 if (ret)
8ed67789 2780 goto out_register_subsys;
433d49c3 2781
433d49c3
DL
2782 ret = xfrm6_init();
2783 if (ret)
cdb18761 2784 goto out_fib6_init;
c35b7e72 2785
433d49c3
DL
2786 ret = fib6_rules_init();
2787 if (ret)
2788 goto xfrm6_init;
7e5449c2 2789
433d49c3
DL
2790 ret = -ENOBUFS;
2791 if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL) ||
2792 __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL) ||
2793 __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL))
2794 goto fib6_rules_init;
c127ea2c 2795
8ed67789 2796 ret = register_netdevice_notifier(&ip6_route_dev_notifier);
cdb18761
DL
2797 if (ret)
2798 goto fib6_rules_init;
8ed67789 2799
433d49c3
DL
2800out:
2801 return ret;
2802
2803fib6_rules_init:
433d49c3
DL
2804 fib6_rules_cleanup();
2805xfrm6_init:
433d49c3 2806 xfrm6_fini();
433d49c3 2807out_fib6_init:
433d49c3 2808 fib6_gc_cleanup();
8ed67789
DL
2809out_register_subsys:
2810 unregister_pernet_subsys(&ip6_route_net_ops);
433d49c3 2811out_kmem_cache:
f2fc6a54 2812 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
433d49c3 2813 goto out;
1da177e4
LT
2814}
2815
2816void ip6_route_cleanup(void)
2817{
8ed67789 2818 unregister_netdevice_notifier(&ip6_route_dev_notifier);
101367c2 2819 fib6_rules_cleanup();
1da177e4 2820 xfrm6_fini();
1da177e4 2821 fib6_gc_cleanup();
8ed67789 2822 unregister_pernet_subsys(&ip6_route_net_ops);
f2fc6a54 2823 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
1da177e4 2824}