]> bbs.cooldavid.org Git - net-next-2.6.git/blame - net/ipv6/route.c
cpmac: bump version to 0.5.2
[net-next-2.6.git] / net / ipv6 / route.c
CommitLineData
1da177e4
LT
1/*
2 * Linux INET6 implementation
3 * FIB front-end.
4 *
5 * Authors:
1ab1457c 6 * Pedro Roque <roque@di.fc.ul.pt>
1da177e4 7 *
1da177e4
LT
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
12 */
13
14/* Changes:
15 *
16 * YOSHIFUJI Hideaki @USAGI
17 * reworked default router selection.
18 * - respect outgoing interface
19 * - select from (probably) reachable routers (i.e.
20 * routers in REACHABLE, STALE, DELAY or PROBE states).
21 * - always select the same router if it is (probably)
22 * reachable. otherwise, round-robin the list.
c0bece9f
YH
23 * Ville Nuorvala
24 * Fixed routing subtrees.
1da177e4
LT
25 */
26
4fc268d2 27#include <linux/capability.h>
1da177e4
LT
28#include <linux/errno.h>
29#include <linux/types.h>
30#include <linux/times.h>
31#include <linux/socket.h>
32#include <linux/sockios.h>
33#include <linux/net.h>
34#include <linux/route.h>
35#include <linux/netdevice.h>
36#include <linux/in6.h>
7bc570c8 37#include <linux/mroute6.h>
1da177e4 38#include <linux/init.h>
1da177e4 39#include <linux/if_arp.h>
1da177e4
LT
40#include <linux/proc_fs.h>
41#include <linux/seq_file.h>
5b7c931d 42#include <linux/nsproxy.h>
457c4cbc 43#include <net/net_namespace.h>
1da177e4
LT
44#include <net/snmp.h>
45#include <net/ipv6.h>
46#include <net/ip6_fib.h>
47#include <net/ip6_route.h>
48#include <net/ndisc.h>
49#include <net/addrconf.h>
50#include <net/tcp.h>
51#include <linux/rtnetlink.h>
52#include <net/dst.h>
53#include <net/xfrm.h>
8d71740c 54#include <net/netevent.h>
21713ebc 55#include <net/netlink.h>
1da177e4
LT
56
57#include <asm/uaccess.h>
58
59#ifdef CONFIG_SYSCTL
60#include <linux/sysctl.h>
61#endif
62
63/* Set to 3 to get tracing. */
64#define RT6_DEBUG 2
65
66#if RT6_DEBUG >= 3
67#define RDBG(x) printk x
68#define RT6_TRACE(x...) printk(KERN_DEBUG x)
69#else
70#define RDBG(x)
71#define RT6_TRACE(x...) do { ; } while (0)
72#endif
73
519fbd87 74#define CLONE_OFFLINK_ROUTE 0
1da177e4 75
1da177e4
LT
76static struct rt6_info * ip6_rt_copy(struct rt6_info *ort);
77static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
78static struct dst_entry *ip6_negative_advice(struct dst_entry *);
79static void ip6_dst_destroy(struct dst_entry *);
80static void ip6_dst_ifdown(struct dst_entry *,
81 struct net_device *dev, int how);
569d3645 82static int ip6_dst_gc(struct dst_ops *ops);
1da177e4
LT
83
84static int ip6_pkt_discard(struct sk_buff *skb);
85static int ip6_pkt_discard_out(struct sk_buff *skb);
86static void ip6_link_failure(struct sk_buff *skb);
87static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
88
70ceb4f5 89#ifdef CONFIG_IPV6_ROUTE_INFO
efa2cea0
DL
90static struct rt6_info *rt6_add_route_info(struct net *net,
91 struct in6_addr *prefix, int prefixlen,
70ceb4f5
YH
92 struct in6_addr *gwaddr, int ifindex,
93 unsigned pref);
efa2cea0
DL
94static struct rt6_info *rt6_get_route_info(struct net *net,
95 struct in6_addr *prefix, int prefixlen,
70ceb4f5
YH
96 struct in6_addr *gwaddr, int ifindex);
97#endif
98
9a7ec3a9 99static struct dst_ops ip6_dst_ops_template = {
1da177e4 100 .family = AF_INET6,
09640e63 101 .protocol = cpu_to_be16(ETH_P_IPV6),
1da177e4
LT
102 .gc = ip6_dst_gc,
103 .gc_thresh = 1024,
104 .check = ip6_dst_check,
105 .destroy = ip6_dst_destroy,
106 .ifdown = ip6_dst_ifdown,
107 .negative_advice = ip6_negative_advice,
108 .link_failure = ip6_link_failure,
109 .update_pmtu = ip6_rt_update_pmtu,
1ac06e03 110 .local_out = __ip6_local_out,
e2422970 111 .entries = ATOMIC_INIT(0),
1da177e4
LT
112};
113
14e50e57
DM
114static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu)
115{
116}
117
118static struct dst_ops ip6_dst_blackhole_ops = {
119 .family = AF_INET6,
09640e63 120 .protocol = cpu_to_be16(ETH_P_IPV6),
14e50e57
DM
121 .destroy = ip6_dst_destroy,
122 .check = ip6_dst_check,
123 .update_pmtu = ip6_rt_blackhole_update_pmtu,
e2422970 124 .entries = ATOMIC_INIT(0),
14e50e57
DM
125};
126
bdb3289f 127static struct rt6_info ip6_null_entry_template = {
1da177e4
LT
128 .u = {
129 .dst = {
130 .__refcnt = ATOMIC_INIT(1),
131 .__use = 1,
1da177e4
LT
132 .obsolete = -1,
133 .error = -ENETUNREACH,
134 .metrics = { [RTAX_HOPLIMIT - 1] = 255, },
135 .input = ip6_pkt_discard,
136 .output = ip6_pkt_discard_out,
1da177e4
LT
137 }
138 },
139 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
4f724279 140 .rt6i_protocol = RTPROT_KERNEL,
1da177e4
LT
141 .rt6i_metric = ~(u32) 0,
142 .rt6i_ref = ATOMIC_INIT(1),
143};
144
101367c2
TG
145#ifdef CONFIG_IPV6_MULTIPLE_TABLES
146
6723ab54
DM
147static int ip6_pkt_prohibit(struct sk_buff *skb);
148static int ip6_pkt_prohibit_out(struct sk_buff *skb);
6723ab54 149
280a34c8 150static struct rt6_info ip6_prohibit_entry_template = {
101367c2
TG
151 .u = {
152 .dst = {
153 .__refcnt = ATOMIC_INIT(1),
154 .__use = 1,
101367c2
TG
155 .obsolete = -1,
156 .error = -EACCES,
157 .metrics = { [RTAX_HOPLIMIT - 1] = 255, },
9ce8ade0
TG
158 .input = ip6_pkt_prohibit,
159 .output = ip6_pkt_prohibit_out,
101367c2
TG
160 }
161 },
162 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
4f724279 163 .rt6i_protocol = RTPROT_KERNEL,
101367c2
TG
164 .rt6i_metric = ~(u32) 0,
165 .rt6i_ref = ATOMIC_INIT(1),
166};
167
bdb3289f 168static struct rt6_info ip6_blk_hole_entry_template = {
101367c2
TG
169 .u = {
170 .dst = {
171 .__refcnt = ATOMIC_INIT(1),
172 .__use = 1,
101367c2
TG
173 .obsolete = -1,
174 .error = -EINVAL,
175 .metrics = { [RTAX_HOPLIMIT - 1] = 255, },
352e512c
HX
176 .input = dst_discard,
177 .output = dst_discard,
101367c2
TG
178 }
179 },
180 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
4f724279 181 .rt6i_protocol = RTPROT_KERNEL,
101367c2
TG
182 .rt6i_metric = ~(u32) 0,
183 .rt6i_ref = ATOMIC_INIT(1),
184};
185
186#endif
187
1da177e4 188/* allocate dst with ip6_dst_ops */
f2fc6a54 189static inline struct rt6_info *ip6_dst_alloc(struct dst_ops *ops)
1da177e4 190{
f2fc6a54 191 return (struct rt6_info *)dst_alloc(ops);
1da177e4
LT
192}
193
194static void ip6_dst_destroy(struct dst_entry *dst)
195{
196 struct rt6_info *rt = (struct rt6_info *)dst;
197 struct inet6_dev *idev = rt->rt6i_idev;
198
199 if (idev != NULL) {
200 rt->rt6i_idev = NULL;
201 in6_dev_put(idev);
1ab1457c 202 }
1da177e4
LT
203}
204
205static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
206 int how)
207{
208 struct rt6_info *rt = (struct rt6_info *)dst;
209 struct inet6_dev *idev = rt->rt6i_idev;
5a3e55d6 210 struct net_device *loopback_dev =
c346dca1 211 dev_net(dev)->loopback_dev;
1da177e4 212
5a3e55d6
DL
213 if (dev != loopback_dev && idev != NULL && idev->dev == dev) {
214 struct inet6_dev *loopback_idev =
215 in6_dev_get(loopback_dev);
1da177e4
LT
216 if (loopback_idev != NULL) {
217 rt->rt6i_idev = loopback_idev;
218 in6_dev_put(idev);
219 }
220 }
221}
222
223static __inline__ int rt6_check_expired(const struct rt6_info *rt)
224{
225 return (rt->rt6i_flags & RTF_EXPIRES &&
226 time_after(jiffies, rt->rt6i_expires));
227}
228
c71099ac
TG
229static inline int rt6_need_strict(struct in6_addr *daddr)
230{
231 return (ipv6_addr_type(daddr) &
5ce83afa 232 (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK));
c71099ac
TG
233}
234
1da177e4 235/*
c71099ac 236 * Route lookup. Any table->tb6_lock is implied.
1da177e4
LT
237 */
238
8ed67789
DL
239static inline struct rt6_info *rt6_device_match(struct net *net,
240 struct rt6_info *rt,
dd3abc4e 241 struct in6_addr *saddr,
1da177e4 242 int oif,
d420895e 243 int flags)
1da177e4
LT
244{
245 struct rt6_info *local = NULL;
246 struct rt6_info *sprt;
247
dd3abc4e
YH
248 if (!oif && ipv6_addr_any(saddr))
249 goto out;
250
251 for (sprt = rt; sprt; sprt = sprt->u.dst.rt6_next) {
252 struct net_device *dev = sprt->rt6i_dev;
253
254 if (oif) {
1da177e4
LT
255 if (dev->ifindex == oif)
256 return sprt;
257 if (dev->flags & IFF_LOOPBACK) {
258 if (sprt->rt6i_idev == NULL ||
259 sprt->rt6i_idev->dev->ifindex != oif) {
d420895e 260 if (flags & RT6_LOOKUP_F_IFACE && oif)
1da177e4 261 continue;
1ab1457c 262 if (local && (!oif ||
1da177e4
LT
263 local->rt6i_idev->dev->ifindex == oif))
264 continue;
265 }
266 local = sprt;
267 }
dd3abc4e
YH
268 } else {
269 if (ipv6_chk_addr(net, saddr, dev,
270 flags & RT6_LOOKUP_F_IFACE))
271 return sprt;
1da177e4 272 }
dd3abc4e 273 }
1da177e4 274
dd3abc4e 275 if (oif) {
1da177e4
LT
276 if (local)
277 return local;
278
d420895e 279 if (flags & RT6_LOOKUP_F_IFACE)
8ed67789 280 return net->ipv6.ip6_null_entry;
1da177e4 281 }
dd3abc4e 282out:
1da177e4
LT
283 return rt;
284}
285
27097255
YH
286#ifdef CONFIG_IPV6_ROUTER_PREF
287static void rt6_probe(struct rt6_info *rt)
288{
289 struct neighbour *neigh = rt ? rt->rt6i_nexthop : NULL;
290 /*
291 * Okay, this does not seem to be appropriate
292 * for now, however, we need to check if it
293 * is really so; aka Router Reachability Probing.
294 *
295 * Router Reachability Probe MUST be rate-limited
296 * to no more than one per minute.
297 */
298 if (!neigh || (neigh->nud_state & NUD_VALID))
299 return;
300 read_lock_bh(&neigh->lock);
301 if (!(neigh->nud_state & NUD_VALID) &&
52e16356 302 time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
27097255
YH
303 struct in6_addr mcaddr;
304 struct in6_addr *target;
305
306 neigh->updated = jiffies;
307 read_unlock_bh(&neigh->lock);
308
309 target = (struct in6_addr *)&neigh->primary_key;
310 addrconf_addr_solict_mult(target, &mcaddr);
311 ndisc_send_ns(rt->rt6i_dev, NULL, target, &mcaddr, NULL);
312 } else
313 read_unlock_bh(&neigh->lock);
314}
315#else
316static inline void rt6_probe(struct rt6_info *rt)
317{
318 return;
319}
320#endif
321
1da177e4 322/*
554cfb7e 323 * Default Router Selection (RFC 2461 6.3.6)
1da177e4 324 */
b6f99a21 325static inline int rt6_check_dev(struct rt6_info *rt, int oif)
554cfb7e
YH
326{
327 struct net_device *dev = rt->rt6i_dev;
161980f4 328 if (!oif || dev->ifindex == oif)
554cfb7e 329 return 2;
161980f4
DM
330 if ((dev->flags & IFF_LOOPBACK) &&
331 rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
332 return 1;
333 return 0;
554cfb7e 334}
1da177e4 335
b6f99a21 336static inline int rt6_check_neigh(struct rt6_info *rt)
1da177e4 337{
554cfb7e 338 struct neighbour *neigh = rt->rt6i_nexthop;
398bcbeb 339 int m;
4d0c5911
YH
340 if (rt->rt6i_flags & RTF_NONEXTHOP ||
341 !(rt->rt6i_flags & RTF_GATEWAY))
342 m = 1;
343 else if (neigh) {
554cfb7e
YH
344 read_lock_bh(&neigh->lock);
345 if (neigh->nud_state & NUD_VALID)
4d0c5911 346 m = 2;
398bcbeb
YH
347#ifdef CONFIG_IPV6_ROUTER_PREF
348 else if (neigh->nud_state & NUD_FAILED)
349 m = 0;
350#endif
351 else
ea73ee23 352 m = 1;
554cfb7e 353 read_unlock_bh(&neigh->lock);
398bcbeb
YH
354 } else
355 m = 0;
554cfb7e 356 return m;
1da177e4
LT
357}
358
554cfb7e
YH
359static int rt6_score_route(struct rt6_info *rt, int oif,
360 int strict)
1da177e4 361{
4d0c5911 362 int m, n;
1ab1457c 363
4d0c5911 364 m = rt6_check_dev(rt, oif);
77d16f45 365 if (!m && (strict & RT6_LOOKUP_F_IFACE))
554cfb7e 366 return -1;
ebacaaa0
YH
367#ifdef CONFIG_IPV6_ROUTER_PREF
368 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
369#endif
4d0c5911 370 n = rt6_check_neigh(rt);
557e92ef 371 if (!n && (strict & RT6_LOOKUP_F_REACHABLE))
554cfb7e
YH
372 return -1;
373 return m;
374}
375
f11e6659
DM
376static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
377 int *mpri, struct rt6_info *match)
554cfb7e 378{
f11e6659
DM
379 int m;
380
381 if (rt6_check_expired(rt))
382 goto out;
383
384 m = rt6_score_route(rt, oif, strict);
385 if (m < 0)
386 goto out;
387
388 if (m > *mpri) {
389 if (strict & RT6_LOOKUP_F_REACHABLE)
390 rt6_probe(match);
391 *mpri = m;
392 match = rt;
393 } else if (strict & RT6_LOOKUP_F_REACHABLE) {
394 rt6_probe(rt);
395 }
396
397out:
398 return match;
399}
400
401static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
402 struct rt6_info *rr_head,
403 u32 metric, int oif, int strict)
404{
405 struct rt6_info *rt, *match;
554cfb7e 406 int mpri = -1;
1da177e4 407
f11e6659
DM
408 match = NULL;
409 for (rt = rr_head; rt && rt->rt6i_metric == metric;
410 rt = rt->u.dst.rt6_next)
411 match = find_match(rt, oif, strict, &mpri, match);
412 for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric;
413 rt = rt->u.dst.rt6_next)
414 match = find_match(rt, oif, strict, &mpri, match);
1da177e4 415
f11e6659
DM
416 return match;
417}
1da177e4 418
f11e6659
DM
419static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
420{
421 struct rt6_info *match, *rt0;
8ed67789 422 struct net *net;
1da177e4 423
f11e6659 424 RT6_TRACE("%s(fn->leaf=%p, oif=%d)\n",
0dc47877 425 __func__, fn->leaf, oif);
554cfb7e 426
f11e6659
DM
427 rt0 = fn->rr_ptr;
428 if (!rt0)
429 fn->rr_ptr = rt0 = fn->leaf;
1da177e4 430
f11e6659 431 match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict);
1da177e4 432
554cfb7e 433 if (!match &&
f11e6659
DM
434 (strict & RT6_LOOKUP_F_REACHABLE)) {
435 struct rt6_info *next = rt0->u.dst.rt6_next;
436
554cfb7e 437 /* no entries matched; do round-robin */
f11e6659
DM
438 if (!next || next->rt6i_metric != rt0->rt6i_metric)
439 next = fn->leaf;
440
441 if (next != rt0)
442 fn->rr_ptr = next;
1da177e4 443 }
1da177e4 444
f11e6659 445 RT6_TRACE("%s() => %p\n",
0dc47877 446 __func__, match);
1da177e4 447
c346dca1 448 net = dev_net(rt0->rt6i_dev);
8ed67789 449 return (match ? match : net->ipv6.ip6_null_entry);
1da177e4
LT
450}
451
70ceb4f5
YH
452#ifdef CONFIG_IPV6_ROUTE_INFO
453int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
454 struct in6_addr *gwaddr)
455{
c346dca1 456 struct net *net = dev_net(dev);
70ceb4f5
YH
457 struct route_info *rinfo = (struct route_info *) opt;
458 struct in6_addr prefix_buf, *prefix;
459 unsigned int pref;
4bed72e4 460 unsigned long lifetime;
70ceb4f5
YH
461 struct rt6_info *rt;
462
463 if (len < sizeof(struct route_info)) {
464 return -EINVAL;
465 }
466
467 /* Sanity check for prefix_len and length */
468 if (rinfo->length > 3) {
469 return -EINVAL;
470 } else if (rinfo->prefix_len > 128) {
471 return -EINVAL;
472 } else if (rinfo->prefix_len > 64) {
473 if (rinfo->length < 2) {
474 return -EINVAL;
475 }
476 } else if (rinfo->prefix_len > 0) {
477 if (rinfo->length < 1) {
478 return -EINVAL;
479 }
480 }
481
482 pref = rinfo->route_pref;
483 if (pref == ICMPV6_ROUTER_PREF_INVALID)
3933fc95 484 return -EINVAL;
70ceb4f5 485
4bed72e4 486 lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
70ceb4f5
YH
487
488 if (rinfo->length == 3)
489 prefix = (struct in6_addr *)rinfo->prefix;
490 else {
491 /* this function is safe */
492 ipv6_addr_prefix(&prefix_buf,
493 (struct in6_addr *)rinfo->prefix,
494 rinfo->prefix_len);
495 prefix = &prefix_buf;
496 }
497
efa2cea0
DL
498 rt = rt6_get_route_info(net, prefix, rinfo->prefix_len, gwaddr,
499 dev->ifindex);
70ceb4f5
YH
500
501 if (rt && !lifetime) {
e0a1ad73 502 ip6_del_rt(rt);
70ceb4f5
YH
503 rt = NULL;
504 }
505
506 if (!rt && lifetime)
efa2cea0 507 rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
70ceb4f5
YH
508 pref);
509 else if (rt)
510 rt->rt6i_flags = RTF_ROUTEINFO |
511 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
512
513 if (rt) {
4bed72e4 514 if (!addrconf_finite_timeout(lifetime)) {
70ceb4f5
YH
515 rt->rt6i_flags &= ~RTF_EXPIRES;
516 } else {
517 rt->rt6i_expires = jiffies + HZ * lifetime;
518 rt->rt6i_flags |= RTF_EXPIRES;
519 }
520 dst_release(&rt->u.dst);
521 }
522 return 0;
523}
524#endif
525
8ed67789 526#define BACKTRACK(__net, saddr) \
982f56f3 527do { \
8ed67789 528 if (rt == __net->ipv6.ip6_null_entry) { \
982f56f3 529 struct fib6_node *pn; \
e0eda7bb 530 while (1) { \
982f56f3
YH
531 if (fn->fn_flags & RTN_TL_ROOT) \
532 goto out; \
533 pn = fn->parent; \
534 if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
8bce65b9 535 fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \
982f56f3
YH
536 else \
537 fn = pn; \
538 if (fn->fn_flags & RTN_RTINFO) \
539 goto restart; \
c71099ac 540 } \
c71099ac 541 } \
982f56f3 542} while(0)
c71099ac 543
8ed67789
DL
544static struct rt6_info *ip6_pol_route_lookup(struct net *net,
545 struct fib6_table *table,
c71099ac 546 struct flowi *fl, int flags)
1da177e4
LT
547{
548 struct fib6_node *fn;
549 struct rt6_info *rt;
550
c71099ac
TG
551 read_lock_bh(&table->tb6_lock);
552 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
553restart:
554 rt = fn->leaf;
dd3abc4e 555 rt = rt6_device_match(net, rt, &fl->fl6_src, fl->oif, flags);
8ed67789 556 BACKTRACK(net, &fl->fl6_src);
c71099ac 557out:
03f49f34 558 dst_use(&rt->u.dst, jiffies);
c71099ac 559 read_unlock_bh(&table->tb6_lock);
c71099ac
TG
560 return rt;
561
562}
563
9acd9f3a
YH
564struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
565 const struct in6_addr *saddr, int oif, int strict)
c71099ac
TG
566{
567 struct flowi fl = {
568 .oif = oif,
569 .nl_u = {
570 .ip6_u = {
571 .daddr = *daddr,
c71099ac
TG
572 },
573 },
574 };
575 struct dst_entry *dst;
77d16f45 576 int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
c71099ac 577
adaa70bb
TG
578 if (saddr) {
579 memcpy(&fl.fl6_src, saddr, sizeof(*saddr));
580 flags |= RT6_LOOKUP_F_HAS_SADDR;
581 }
582
606a2b48 583 dst = fib6_rule_lookup(net, &fl, flags, ip6_pol_route_lookup);
c71099ac
TG
584 if (dst->error == 0)
585 return (struct rt6_info *) dst;
586
587 dst_release(dst);
588
1da177e4
LT
589 return NULL;
590}
591
7159039a
YH
592EXPORT_SYMBOL(rt6_lookup);
593
c71099ac 594/* ip6_ins_rt is called with FREE table->tb6_lock.
1da177e4
LT
595 It takes new route entry, the addition fails by any reason the
596 route is freed. In any case, if caller does not hold it, it may
597 be destroyed.
598 */
599
86872cb5 600static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
1da177e4
LT
601{
602 int err;
c71099ac 603 struct fib6_table *table;
1da177e4 604
c71099ac
TG
605 table = rt->rt6i_table;
606 write_lock_bh(&table->tb6_lock);
86872cb5 607 err = fib6_add(&table->tb6_root, rt, info);
c71099ac 608 write_unlock_bh(&table->tb6_lock);
1da177e4
LT
609
610 return err;
611}
612
40e22e8f
TG
613int ip6_ins_rt(struct rt6_info *rt)
614{
4d1169c1 615 struct nl_info info = {
c346dca1 616 .nl_net = dev_net(rt->rt6i_dev),
4d1169c1 617 };
528c4ceb 618 return __ip6_ins_rt(rt, &info);
40e22e8f
TG
619}
620
95a9a5ba
YH
621static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, struct in6_addr *daddr,
622 struct in6_addr *saddr)
1da177e4 623{
1da177e4
LT
624 struct rt6_info *rt;
625
626 /*
627 * Clone the route.
628 */
629
630 rt = ip6_rt_copy(ort);
631
632 if (rt) {
14deae41
DM
633 struct neighbour *neigh;
634 int attempts = !in_softirq();
635
58c4fb86
YH
636 if (!(rt->rt6i_flags&RTF_GATEWAY)) {
637 if (rt->rt6i_dst.plen != 128 &&
638 ipv6_addr_equal(&rt->rt6i_dst.addr, daddr))
639 rt->rt6i_flags |= RTF_ANYCAST;
1da177e4 640 ipv6_addr_copy(&rt->rt6i_gateway, daddr);
58c4fb86 641 }
1da177e4 642
58c4fb86 643 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
1da177e4
LT
644 rt->rt6i_dst.plen = 128;
645 rt->rt6i_flags |= RTF_CACHE;
646 rt->u.dst.flags |= DST_HOST;
647
648#ifdef CONFIG_IPV6_SUBTREES
649 if (rt->rt6i_src.plen && saddr) {
650 ipv6_addr_copy(&rt->rt6i_src.addr, saddr);
651 rt->rt6i_src.plen = 128;
652 }
653#endif
654
14deae41
DM
655 retry:
656 neigh = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
657 if (IS_ERR(neigh)) {
658 struct net *net = dev_net(rt->rt6i_dev);
659 int saved_rt_min_interval =
660 net->ipv6.sysctl.ip6_rt_gc_min_interval;
661 int saved_rt_elasticity =
662 net->ipv6.sysctl.ip6_rt_gc_elasticity;
663
664 if (attempts-- > 0) {
665 net->ipv6.sysctl.ip6_rt_gc_elasticity = 1;
666 net->ipv6.sysctl.ip6_rt_gc_min_interval = 0;
667
86393e52 668 ip6_dst_gc(&net->ipv6.ip6_dst_ops);
14deae41
DM
669
670 net->ipv6.sysctl.ip6_rt_gc_elasticity =
671 saved_rt_elasticity;
672 net->ipv6.sysctl.ip6_rt_gc_min_interval =
673 saved_rt_min_interval;
674 goto retry;
675 }
676
677 if (net_ratelimit())
678 printk(KERN_WARNING
679 "Neighbour table overflow.\n");
680 dst_free(&rt->u.dst);
681 return NULL;
682 }
683 rt->rt6i_nexthop = neigh;
1da177e4 684
95a9a5ba 685 }
1da177e4 686
95a9a5ba
YH
687 return rt;
688}
1da177e4 689
299d9939
YH
690static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort, struct in6_addr *daddr)
691{
692 struct rt6_info *rt = ip6_rt_copy(ort);
693 if (rt) {
694 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
695 rt->rt6i_dst.plen = 128;
696 rt->rt6i_flags |= RTF_CACHE;
299d9939
YH
697 rt->u.dst.flags |= DST_HOST;
698 rt->rt6i_nexthop = neigh_clone(ort->rt6i_nexthop);
699 }
700 return rt;
701}
702
8ed67789
DL
703static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
704 struct flowi *fl, int flags)
1da177e4
LT
705{
706 struct fib6_node *fn;
519fbd87 707 struct rt6_info *rt, *nrt;
c71099ac 708 int strict = 0;
1da177e4 709 int attempts = 3;
519fbd87 710 int err;
53b7997f 711 int reachable = net->ipv6.devconf_all->forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
1da177e4 712
77d16f45 713 strict |= flags & RT6_LOOKUP_F_IFACE;
1da177e4
LT
714
715relookup:
c71099ac 716 read_lock_bh(&table->tb6_lock);
1da177e4 717
8238dd06 718restart_2:
c71099ac 719 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
1da177e4
LT
720
721restart:
4acad72d 722 rt = rt6_select(fn, oif, strict | reachable);
8ed67789
DL
723
724 BACKTRACK(net, &fl->fl6_src);
725 if (rt == net->ipv6.ip6_null_entry ||
8238dd06 726 rt->rt6i_flags & RTF_CACHE)
1ddef044 727 goto out;
1da177e4 728
fb9de91e 729 dst_hold(&rt->u.dst);
c71099ac 730 read_unlock_bh(&table->tb6_lock);
fb9de91e 731
519fbd87 732 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
c71099ac 733 nrt = rt6_alloc_cow(rt, &fl->fl6_dst, &fl->fl6_src);
519fbd87
YH
734 else {
735#if CLONE_OFFLINK_ROUTE
c71099ac 736 nrt = rt6_alloc_clone(rt, &fl->fl6_dst);
519fbd87
YH
737#else
738 goto out2;
739#endif
740 }
e40cf353 741
519fbd87 742 dst_release(&rt->u.dst);
8ed67789 743 rt = nrt ? : net->ipv6.ip6_null_entry;
1da177e4 744
519fbd87
YH
745 dst_hold(&rt->u.dst);
746 if (nrt) {
40e22e8f 747 err = ip6_ins_rt(nrt);
519fbd87 748 if (!err)
1da177e4 749 goto out2;
1da177e4 750 }
1da177e4 751
519fbd87
YH
752 if (--attempts <= 0)
753 goto out2;
754
755 /*
c71099ac 756 * Race condition! In the gap, when table->tb6_lock was
519fbd87
YH
757 * released someone could insert this route. Relookup.
758 */
759 dst_release(&rt->u.dst);
760 goto relookup;
761
762out:
8238dd06
YH
763 if (reachable) {
764 reachable = 0;
765 goto restart_2;
766 }
519fbd87 767 dst_hold(&rt->u.dst);
c71099ac 768 read_unlock_bh(&table->tb6_lock);
1da177e4
LT
769out2:
770 rt->u.dst.lastuse = jiffies;
771 rt->u.dst.__use++;
c71099ac
TG
772
773 return rt;
1da177e4
LT
774}
775
8ed67789 776static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
4acad72d
PE
777 struct flowi *fl, int flags)
778{
8ed67789 779 return ip6_pol_route(net, table, fl->iif, fl, flags);
4acad72d
PE
780}
781
c71099ac
TG
782void ip6_route_input(struct sk_buff *skb)
783{
0660e03f 784 struct ipv6hdr *iph = ipv6_hdr(skb);
c346dca1 785 struct net *net = dev_net(skb->dev);
adaa70bb 786 int flags = RT6_LOOKUP_F_HAS_SADDR;
c71099ac
TG
787 struct flowi fl = {
788 .iif = skb->dev->ifindex,
789 .nl_u = {
790 .ip6_u = {
791 .daddr = iph->daddr,
792 .saddr = iph->saddr,
90bcaf7b 793 .flowlabel = (* (__be32 *) iph)&IPV6_FLOWINFO_MASK,
c71099ac
TG
794 },
795 },
1ab1457c 796 .mark = skb->mark,
c71099ac
TG
797 .proto = iph->nexthdr,
798 };
adaa70bb 799
1d6e55f1 800 if (rt6_need_strict(&iph->daddr) && skb->dev->type != ARPHRD_PIMREG)
adaa70bb 801 flags |= RT6_LOOKUP_F_IFACE;
c71099ac 802
adf30907 803 skb_dst_set(skb, fib6_rule_lookup(net, &fl, flags, ip6_pol_route_input));
c71099ac
TG
804}
805
8ed67789 806static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
c71099ac 807 struct flowi *fl, int flags)
1da177e4 808{
8ed67789 809 return ip6_pol_route(net, table, fl->oif, fl, flags);
c71099ac
TG
810}
811
4591db4f
DL
812struct dst_entry * ip6_route_output(struct net *net, struct sock *sk,
813 struct flowi *fl)
c71099ac
TG
814{
815 int flags = 0;
816
817 if (rt6_need_strict(&fl->fl6_dst))
77d16f45 818 flags |= RT6_LOOKUP_F_IFACE;
c71099ac 819
adaa70bb
TG
820 if (!ipv6_addr_any(&fl->fl6_src))
821 flags |= RT6_LOOKUP_F_HAS_SADDR;
7cbca67c
YH
822 else if (sk) {
823 unsigned int prefs = inet6_sk(sk)->srcprefs;
824 if (prefs & IPV6_PREFER_SRC_TMP)
825 flags |= RT6_LOOKUP_F_SRCPREF_TMP;
826 if (prefs & IPV6_PREFER_SRC_PUBLIC)
827 flags |= RT6_LOOKUP_F_SRCPREF_PUBLIC;
828 if (prefs & IPV6_PREFER_SRC_COA)
829 flags |= RT6_LOOKUP_F_SRCPREF_COA;
830 }
adaa70bb 831
4591db4f 832 return fib6_rule_lookup(net, fl, flags, ip6_pol_route_output);
1da177e4
LT
833}
834
7159039a 835EXPORT_SYMBOL(ip6_route_output);
1da177e4 836
14e50e57
DM
837int ip6_dst_blackhole(struct sock *sk, struct dst_entry **dstp, struct flowi *fl)
838{
839 struct rt6_info *ort = (struct rt6_info *) *dstp;
840 struct rt6_info *rt = (struct rt6_info *)
841 dst_alloc(&ip6_dst_blackhole_ops);
842 struct dst_entry *new = NULL;
843
844 if (rt) {
845 new = &rt->u.dst;
846
847 atomic_set(&new->__refcnt, 1);
848 new->__use = 1;
352e512c
HX
849 new->input = dst_discard;
850 new->output = dst_discard;
14e50e57
DM
851
852 memcpy(new->metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32));
853 new->dev = ort->u.dst.dev;
854 if (new->dev)
855 dev_hold(new->dev);
856 rt->rt6i_idev = ort->rt6i_idev;
857 if (rt->rt6i_idev)
858 in6_dev_hold(rt->rt6i_idev);
859 rt->rt6i_expires = 0;
860
861 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
862 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
863 rt->rt6i_metric = 0;
864
865 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
866#ifdef CONFIG_IPV6_SUBTREES
867 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
868#endif
869
870 dst_free(new);
871 }
872
873 dst_release(*dstp);
874 *dstp = new;
875 return (new ? 0 : -ENOMEM);
876}
877EXPORT_SYMBOL_GPL(ip6_dst_blackhole);
878
1da177e4
LT
879/*
880 * Destination cache support functions
881 */
882
883static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
884{
885 struct rt6_info *rt;
886
887 rt = (struct rt6_info *) dst;
888
889 if (rt && rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie))
890 return dst;
891
892 return NULL;
893}
894
895static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
896{
897 struct rt6_info *rt = (struct rt6_info *) dst;
898
899 if (rt) {
900 if (rt->rt6i_flags & RTF_CACHE)
e0a1ad73 901 ip6_del_rt(rt);
1da177e4
LT
902 else
903 dst_release(dst);
904 }
905 return NULL;
906}
907
908static void ip6_link_failure(struct sk_buff *skb)
909{
910 struct rt6_info *rt;
911
3ffe533c 912 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
1da177e4 913
adf30907 914 rt = (struct rt6_info *) skb_dst(skb);
1da177e4
LT
915 if (rt) {
916 if (rt->rt6i_flags&RTF_CACHE) {
917 dst_set_expires(&rt->u.dst, 0);
918 rt->rt6i_flags |= RTF_EXPIRES;
919 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
920 rt->rt6i_node->fn_sernum = -1;
921 }
922}
923
924static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
925{
926 struct rt6_info *rt6 = (struct rt6_info*)dst;
927
928 if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
929 rt6->rt6i_flags |= RTF_MODIFIED;
930 if (mtu < IPV6_MIN_MTU) {
931 mtu = IPV6_MIN_MTU;
932 dst->metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
933 }
934 dst->metrics[RTAX_MTU-1] = mtu;
8d71740c 935 call_netevent_notifiers(NETEVENT_PMTU_UPDATE, dst);
1da177e4
LT
936 }
937}
938
1da177e4
LT
939static int ipv6_get_mtu(struct net_device *dev);
940
5578689a 941static inline unsigned int ipv6_advmss(struct net *net, unsigned int mtu)
1da177e4
LT
942{
943 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
944
5578689a
DL
945 if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
946 mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
1da177e4
LT
947
948 /*
1ab1457c
YH
949 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
950 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
951 * IPV6_MAXPLEN is also valid and means: "any MSS,
1da177e4
LT
952 * rely only on pmtu discovery"
953 */
954 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
955 mtu = IPV6_MAXPLEN;
956 return mtu;
957}
958
3b00944c
YH
959static struct dst_entry *icmp6_dst_gc_list;
960static DEFINE_SPINLOCK(icmp6_dst_lock);
5d0bbeeb 961
3b00944c 962struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
1da177e4 963 struct neighbour *neigh,
9acd9f3a 964 const struct in6_addr *addr)
1da177e4
LT
965{
966 struct rt6_info *rt;
967 struct inet6_dev *idev = in6_dev_get(dev);
c346dca1 968 struct net *net = dev_net(dev);
1da177e4
LT
969
970 if (unlikely(idev == NULL))
971 return NULL;
972
86393e52 973 rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops);
1da177e4
LT
974 if (unlikely(rt == NULL)) {
975 in6_dev_put(idev);
976 goto out;
977 }
978
979 dev_hold(dev);
980 if (neigh)
981 neigh_hold(neigh);
14deae41 982 else {
1da177e4 983 neigh = ndisc_get_neigh(dev, addr);
14deae41
DM
984 if (IS_ERR(neigh))
985 neigh = NULL;
986 }
1da177e4
LT
987
988 rt->rt6i_dev = dev;
989 rt->rt6i_idev = idev;
990 rt->rt6i_nexthop = neigh;
991 atomic_set(&rt->u.dst.__refcnt, 1);
992 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = 255;
993 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
5578689a 994 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, dst_mtu(&rt->u.dst));
3b00944c 995 rt->u.dst.output = ip6_output;
1da177e4
LT
996
997#if 0 /* there's no chance to use these for ndisc */
1ab1457c
YH
998 rt->u.dst.flags = ipv6_addr_type(addr) & IPV6_ADDR_UNICAST
999 ? DST_HOST
1da177e4
LT
1000 : 0;
1001 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
1002 rt->rt6i_dst.plen = 128;
1003#endif
1004
3b00944c
YH
1005 spin_lock_bh(&icmp6_dst_lock);
1006 rt->u.dst.next = icmp6_dst_gc_list;
1007 icmp6_dst_gc_list = &rt->u.dst;
1008 spin_unlock_bh(&icmp6_dst_lock);
1da177e4 1009
5578689a 1010 fib6_force_start_gc(net);
1da177e4
LT
1011
1012out:
40aa7b90 1013 return &rt->u.dst;
1da177e4
LT
1014}
1015
3d0f24a7 1016int icmp6_dst_gc(void)
1da177e4
LT
1017{
1018 struct dst_entry *dst, *next, **pprev;
3d0f24a7 1019 int more = 0;
1da177e4
LT
1020
1021 next = NULL;
5d0bbeeb 1022
3b00944c
YH
1023 spin_lock_bh(&icmp6_dst_lock);
1024 pprev = &icmp6_dst_gc_list;
5d0bbeeb 1025
1da177e4
LT
1026 while ((dst = *pprev) != NULL) {
1027 if (!atomic_read(&dst->__refcnt)) {
1028 *pprev = dst->next;
1029 dst_free(dst);
1da177e4
LT
1030 } else {
1031 pprev = &dst->next;
3d0f24a7 1032 ++more;
1da177e4
LT
1033 }
1034 }
1035
3b00944c 1036 spin_unlock_bh(&icmp6_dst_lock);
5d0bbeeb 1037
3d0f24a7 1038 return more;
1da177e4
LT
1039}
1040
1e493d19
DM
1041static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg),
1042 void *arg)
1043{
1044 struct dst_entry *dst, **pprev;
1045
1046 spin_lock_bh(&icmp6_dst_lock);
1047 pprev = &icmp6_dst_gc_list;
1048 while ((dst = *pprev) != NULL) {
1049 struct rt6_info *rt = (struct rt6_info *) dst;
1050 if (func(rt, arg)) {
1051 *pprev = dst->next;
1052 dst_free(dst);
1053 } else {
1054 pprev = &dst->next;
1055 }
1056 }
1057 spin_unlock_bh(&icmp6_dst_lock);
1058}
1059
569d3645 1060static int ip6_dst_gc(struct dst_ops *ops)
1da177e4 1061{
1da177e4 1062 unsigned long now = jiffies;
86393e52 1063 struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
7019b78e
DL
1064 int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
1065 int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
1066 int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1067 int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1068 unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
1069
1070 if (time_after(rt_last_gc + rt_min_interval, now) &&
1071 atomic_read(&ops->entries) <= rt_max_size)
1da177e4
LT
1072 goto out;
1073
6891a346
BT
1074 net->ipv6.ip6_rt_gc_expire++;
1075 fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net);
1076 net->ipv6.ip6_rt_last_gc = now;
7019b78e
DL
1077 if (atomic_read(&ops->entries) < ops->gc_thresh)
1078 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1da177e4 1079out:
7019b78e
DL
1080 net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
1081 return (atomic_read(&ops->entries) > rt_max_size);
1da177e4
LT
1082}
1083
1084/* Clean host part of a prefix. Not necessary in radix tree,
1085 but results in cleaner routing tables.
1086
1087 Remove it only when all the things will work!
1088 */
1089
1090static int ipv6_get_mtu(struct net_device *dev)
1091{
1092 int mtu = IPV6_MIN_MTU;
1093 struct inet6_dev *idev;
1094
1095 idev = in6_dev_get(dev);
1096 if (idev) {
1097 mtu = idev->cnf.mtu6;
1098 in6_dev_put(idev);
1099 }
1100 return mtu;
1101}
1102
6b75d090 1103int ip6_dst_hoplimit(struct dst_entry *dst)
1da177e4 1104{
6b75d090
YH
1105 int hoplimit = dst_metric(dst, RTAX_HOPLIMIT);
1106 if (hoplimit < 0) {
1107 struct net_device *dev = dst->dev;
1108 struct inet6_dev *idev = in6_dev_get(dev);
1109 if (idev) {
1110 hoplimit = idev->cnf.hop_limit;
1111 in6_dev_put(idev);
1112 } else
53b7997f 1113 hoplimit = dev_net(dev)->ipv6.devconf_all->hop_limit;
1da177e4
LT
1114 }
1115 return hoplimit;
1116}
1117
1118/*
1119 *
1120 */
1121
86872cb5 1122int ip6_route_add(struct fib6_config *cfg)
1da177e4
LT
1123{
1124 int err;
5578689a 1125 struct net *net = cfg->fc_nlinfo.nl_net;
1da177e4
LT
1126 struct rt6_info *rt = NULL;
1127 struct net_device *dev = NULL;
1128 struct inet6_dev *idev = NULL;
c71099ac 1129 struct fib6_table *table;
1da177e4
LT
1130 int addr_type;
1131
86872cb5 1132 if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1da177e4
LT
1133 return -EINVAL;
1134#ifndef CONFIG_IPV6_SUBTREES
86872cb5 1135 if (cfg->fc_src_len)
1da177e4
LT
1136 return -EINVAL;
1137#endif
86872cb5 1138 if (cfg->fc_ifindex) {
1da177e4 1139 err = -ENODEV;
5578689a 1140 dev = dev_get_by_index(net, cfg->fc_ifindex);
1da177e4
LT
1141 if (!dev)
1142 goto out;
1143 idev = in6_dev_get(dev);
1144 if (!idev)
1145 goto out;
1146 }
1147
86872cb5
TG
1148 if (cfg->fc_metric == 0)
1149 cfg->fc_metric = IP6_RT_PRIO_USER;
1da177e4 1150
5578689a 1151 table = fib6_new_table(net, cfg->fc_table);
c71099ac
TG
1152 if (table == NULL) {
1153 err = -ENOBUFS;
1154 goto out;
1155 }
1156
86393e52 1157 rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops);
1da177e4
LT
1158
1159 if (rt == NULL) {
1160 err = -ENOMEM;
1161 goto out;
1162 }
1163
1164 rt->u.dst.obsolete = -1;
6f704992
YH
1165 rt->rt6i_expires = (cfg->fc_flags & RTF_EXPIRES) ?
1166 jiffies + clock_t_to_jiffies(cfg->fc_expires) :
1167 0;
1da177e4 1168
86872cb5
TG
1169 if (cfg->fc_protocol == RTPROT_UNSPEC)
1170 cfg->fc_protocol = RTPROT_BOOT;
1171 rt->rt6i_protocol = cfg->fc_protocol;
1172
1173 addr_type = ipv6_addr_type(&cfg->fc_dst);
1da177e4
LT
1174
1175 if (addr_type & IPV6_ADDR_MULTICAST)
1176 rt->u.dst.input = ip6_mc_input;
1177 else
1178 rt->u.dst.input = ip6_forward;
1179
1180 rt->u.dst.output = ip6_output;
1181
86872cb5
TG
1182 ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1183 rt->rt6i_dst.plen = cfg->fc_dst_len;
1da177e4
LT
1184 if (rt->rt6i_dst.plen == 128)
1185 rt->u.dst.flags = DST_HOST;
1186
1187#ifdef CONFIG_IPV6_SUBTREES
86872cb5
TG
1188 ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1189 rt->rt6i_src.plen = cfg->fc_src_len;
1da177e4
LT
1190#endif
1191
86872cb5 1192 rt->rt6i_metric = cfg->fc_metric;
1da177e4
LT
1193
1194 /* We cannot add true routes via loopback here,
1195 they would result in kernel looping; promote them to reject routes
1196 */
86872cb5 1197 if ((cfg->fc_flags & RTF_REJECT) ||
1da177e4
LT
1198 (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK))) {
1199 /* hold loopback dev/idev if we haven't done so. */
5578689a 1200 if (dev != net->loopback_dev) {
1da177e4
LT
1201 if (dev) {
1202 dev_put(dev);
1203 in6_dev_put(idev);
1204 }
5578689a 1205 dev = net->loopback_dev;
1da177e4
LT
1206 dev_hold(dev);
1207 idev = in6_dev_get(dev);
1208 if (!idev) {
1209 err = -ENODEV;
1210 goto out;
1211 }
1212 }
1213 rt->u.dst.output = ip6_pkt_discard_out;
1214 rt->u.dst.input = ip6_pkt_discard;
1215 rt->u.dst.error = -ENETUNREACH;
1216 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1217 goto install_route;
1218 }
1219
86872cb5 1220 if (cfg->fc_flags & RTF_GATEWAY) {
1da177e4
LT
1221 struct in6_addr *gw_addr;
1222 int gwa_type;
1223
86872cb5
TG
1224 gw_addr = &cfg->fc_gateway;
1225 ipv6_addr_copy(&rt->rt6i_gateway, gw_addr);
1da177e4
LT
1226 gwa_type = ipv6_addr_type(gw_addr);
1227
1228 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1229 struct rt6_info *grt;
1230
1231 /* IPv6 strictly inhibits using not link-local
1232 addresses as nexthop address.
1233 Otherwise, router will not able to send redirects.
1234 It is very good, but in some (rare!) circumstances
1235 (SIT, PtP, NBMA NOARP links) it is handy to allow
1236 some exceptions. --ANK
1237 */
1238 err = -EINVAL;
1239 if (!(gwa_type&IPV6_ADDR_UNICAST))
1240 goto out;
1241
5578689a 1242 grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
1da177e4
LT
1243
1244 err = -EHOSTUNREACH;
1245 if (grt == NULL)
1246 goto out;
1247 if (dev) {
1248 if (dev != grt->rt6i_dev) {
1249 dst_release(&grt->u.dst);
1250 goto out;
1251 }
1252 } else {
1253 dev = grt->rt6i_dev;
1254 idev = grt->rt6i_idev;
1255 dev_hold(dev);
1256 in6_dev_hold(grt->rt6i_idev);
1257 }
1258 if (!(grt->rt6i_flags&RTF_GATEWAY))
1259 err = 0;
1260 dst_release(&grt->u.dst);
1261
1262 if (err)
1263 goto out;
1264 }
1265 err = -EINVAL;
1266 if (dev == NULL || (dev->flags&IFF_LOOPBACK))
1267 goto out;
1268 }
1269
1270 err = -ENODEV;
1271 if (dev == NULL)
1272 goto out;
1273
86872cb5 1274 if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) {
1da177e4
LT
1275 rt->rt6i_nexthop = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev);
1276 if (IS_ERR(rt->rt6i_nexthop)) {
1277 err = PTR_ERR(rt->rt6i_nexthop);
1278 rt->rt6i_nexthop = NULL;
1279 goto out;
1280 }
1281 }
1282
86872cb5 1283 rt->rt6i_flags = cfg->fc_flags;
1da177e4
LT
1284
1285install_route:
86872cb5
TG
1286 if (cfg->fc_mx) {
1287 struct nlattr *nla;
1288 int remaining;
1289
1290 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
8f4c1f9b 1291 int type = nla_type(nla);
86872cb5
TG
1292
1293 if (type) {
1294 if (type > RTAX_MAX) {
1da177e4
LT
1295 err = -EINVAL;
1296 goto out;
1297 }
86872cb5
TG
1298
1299 rt->u.dst.metrics[type - 1] = nla_get_u32(nla);
1da177e4 1300 }
1da177e4
LT
1301 }
1302 }
1303
5ffc02a1 1304 if (dst_metric(&rt->u.dst, RTAX_HOPLIMIT) == 0)
1da177e4 1305 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
1ca615fb 1306 if (!dst_mtu(&rt->u.dst))
1da177e4 1307 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(dev);
5ffc02a1 1308 if (!dst_metric(&rt->u.dst, RTAX_ADVMSS))
5578689a 1309 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, dst_mtu(&rt->u.dst));
1da177e4
LT
1310 rt->u.dst.dev = dev;
1311 rt->rt6i_idev = idev;
c71099ac 1312 rt->rt6i_table = table;
63152fc0 1313
c346dca1 1314 cfg->fc_nlinfo.nl_net = dev_net(dev);
63152fc0 1315
86872cb5 1316 return __ip6_ins_rt(rt, &cfg->fc_nlinfo);
1da177e4
LT
1317
1318out:
1319 if (dev)
1320 dev_put(dev);
1321 if (idev)
1322 in6_dev_put(idev);
1323 if (rt)
40aa7b90 1324 dst_free(&rt->u.dst);
1da177e4
LT
1325 return err;
1326}
1327
86872cb5 1328static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
1da177e4
LT
1329{
1330 int err;
c71099ac 1331 struct fib6_table *table;
c346dca1 1332 struct net *net = dev_net(rt->rt6i_dev);
1da177e4 1333
8ed67789 1334 if (rt == net->ipv6.ip6_null_entry)
6c813a72
PM
1335 return -ENOENT;
1336
c71099ac
TG
1337 table = rt->rt6i_table;
1338 write_lock_bh(&table->tb6_lock);
1da177e4 1339
86872cb5 1340 err = fib6_del(rt, info);
1da177e4
LT
1341 dst_release(&rt->u.dst);
1342
c71099ac 1343 write_unlock_bh(&table->tb6_lock);
1da177e4
LT
1344
1345 return err;
1346}
1347
e0a1ad73
TG
1348int ip6_del_rt(struct rt6_info *rt)
1349{
4d1169c1 1350 struct nl_info info = {
c346dca1 1351 .nl_net = dev_net(rt->rt6i_dev),
4d1169c1 1352 };
528c4ceb 1353 return __ip6_del_rt(rt, &info);
e0a1ad73
TG
1354}
1355
86872cb5 1356static int ip6_route_del(struct fib6_config *cfg)
1da177e4 1357{
c71099ac 1358 struct fib6_table *table;
1da177e4
LT
1359 struct fib6_node *fn;
1360 struct rt6_info *rt;
1361 int err = -ESRCH;
1362
5578689a 1363 table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
c71099ac
TG
1364 if (table == NULL)
1365 return err;
1366
1367 read_lock_bh(&table->tb6_lock);
1da177e4 1368
c71099ac 1369 fn = fib6_locate(&table->tb6_root,
86872cb5
TG
1370 &cfg->fc_dst, cfg->fc_dst_len,
1371 &cfg->fc_src, cfg->fc_src_len);
1ab1457c 1372
1da177e4 1373 if (fn) {
7cc48263 1374 for (rt = fn->leaf; rt; rt = rt->u.dst.rt6_next) {
86872cb5 1375 if (cfg->fc_ifindex &&
1da177e4 1376 (rt->rt6i_dev == NULL ||
86872cb5 1377 rt->rt6i_dev->ifindex != cfg->fc_ifindex))
1da177e4 1378 continue;
86872cb5
TG
1379 if (cfg->fc_flags & RTF_GATEWAY &&
1380 !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
1da177e4 1381 continue;
86872cb5 1382 if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
1da177e4
LT
1383 continue;
1384 dst_hold(&rt->u.dst);
c71099ac 1385 read_unlock_bh(&table->tb6_lock);
1da177e4 1386
86872cb5 1387 return __ip6_del_rt(rt, &cfg->fc_nlinfo);
1da177e4
LT
1388 }
1389 }
c71099ac 1390 read_unlock_bh(&table->tb6_lock);
1da177e4
LT
1391
1392 return err;
1393}
1394
1395/*
1396 * Handle redirects
1397 */
a6279458
YH
1398struct ip6rd_flowi {
1399 struct flowi fl;
1400 struct in6_addr gateway;
1401};
1402
8ed67789
DL
1403static struct rt6_info *__ip6_route_redirect(struct net *net,
1404 struct fib6_table *table,
a6279458
YH
1405 struct flowi *fl,
1406 int flags)
1da177e4 1407{
a6279458
YH
1408 struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl;
1409 struct rt6_info *rt;
e843b9e1 1410 struct fib6_node *fn;
c71099ac 1411
1da177e4 1412 /*
e843b9e1
YH
1413 * Get the "current" route for this destination and
1414 * check if the redirect has come from approriate router.
1415 *
1416 * RFC 2461 specifies that redirects should only be
1417 * accepted if they come from the nexthop to the target.
1418 * Due to the way the routes are chosen, this notion
1419 * is a bit fuzzy and one might need to check all possible
1420 * routes.
1da177e4 1421 */
1da177e4 1422
c71099ac 1423 read_lock_bh(&table->tb6_lock);
a6279458 1424 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
e843b9e1 1425restart:
7cc48263 1426 for (rt = fn->leaf; rt; rt = rt->u.dst.rt6_next) {
e843b9e1
YH
1427 /*
1428 * Current route is on-link; redirect is always invalid.
1429 *
1430 * Seems, previous statement is not true. It could
1431 * be node, which looks for us as on-link (f.e. proxy ndisc)
1432 * But then router serving it might decide, that we should
1433 * know truth 8)8) --ANK (980726).
1434 */
1435 if (rt6_check_expired(rt))
1436 continue;
1437 if (!(rt->rt6i_flags & RTF_GATEWAY))
1438 continue;
a6279458 1439 if (fl->oif != rt->rt6i_dev->ifindex)
e843b9e1 1440 continue;
a6279458 1441 if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
e843b9e1
YH
1442 continue;
1443 break;
1444 }
a6279458 1445
cb15d9c2 1446 if (!rt)
8ed67789
DL
1447 rt = net->ipv6.ip6_null_entry;
1448 BACKTRACK(net, &fl->fl6_src);
cb15d9c2 1449out:
a6279458
YH
1450 dst_hold(&rt->u.dst);
1451
c71099ac 1452 read_unlock_bh(&table->tb6_lock);
e843b9e1 1453
a6279458
YH
1454 return rt;
1455};
1456
1457static struct rt6_info *ip6_route_redirect(struct in6_addr *dest,
1458 struct in6_addr *src,
1459 struct in6_addr *gateway,
1460 struct net_device *dev)
1461{
adaa70bb 1462 int flags = RT6_LOOKUP_F_HAS_SADDR;
c346dca1 1463 struct net *net = dev_net(dev);
a6279458
YH
1464 struct ip6rd_flowi rdfl = {
1465 .fl = {
1466 .oif = dev->ifindex,
1467 .nl_u = {
1468 .ip6_u = {
1469 .daddr = *dest,
1470 .saddr = *src,
1471 },
1472 },
1473 },
a6279458 1474 };
adaa70bb 1475
86c36ce4
BH
1476 ipv6_addr_copy(&rdfl.gateway, gateway);
1477
adaa70bb
TG
1478 if (rt6_need_strict(dest))
1479 flags |= RT6_LOOKUP_F_IFACE;
a6279458 1480
5578689a 1481 return (struct rt6_info *)fib6_rule_lookup(net, (struct flowi *)&rdfl,
58f09b78 1482 flags, __ip6_route_redirect);
a6279458
YH
1483}
1484
1485void rt6_redirect(struct in6_addr *dest, struct in6_addr *src,
1486 struct in6_addr *saddr,
1487 struct neighbour *neigh, u8 *lladdr, int on_link)
1488{
1489 struct rt6_info *rt, *nrt = NULL;
1490 struct netevent_redirect netevent;
c346dca1 1491 struct net *net = dev_net(neigh->dev);
a6279458
YH
1492
1493 rt = ip6_route_redirect(dest, src, saddr, neigh->dev);
1494
8ed67789 1495 if (rt == net->ipv6.ip6_null_entry) {
1da177e4
LT
1496 if (net_ratelimit())
1497 printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
1498 "for redirect target\n");
a6279458 1499 goto out;
1da177e4
LT
1500 }
1501
1da177e4
LT
1502 /*
1503 * We have finally decided to accept it.
1504 */
1505
1ab1457c 1506 neigh_update(neigh, lladdr, NUD_STALE,
1da177e4
LT
1507 NEIGH_UPDATE_F_WEAK_OVERRIDE|
1508 NEIGH_UPDATE_F_OVERRIDE|
1509 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1510 NEIGH_UPDATE_F_ISROUTER))
1511 );
1512
1513 /*
1514 * Redirect received -> path was valid.
1515 * Look, redirects are sent only in response to data packets,
1516 * so that this nexthop apparently is reachable. --ANK
1517 */
1518 dst_confirm(&rt->u.dst);
1519
1520 /* Duplicate redirect: silently ignore. */
1521 if (neigh == rt->u.dst.neighbour)
1522 goto out;
1523
1524 nrt = ip6_rt_copy(rt);
1525 if (nrt == NULL)
1526 goto out;
1527
1528 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1529 if (on_link)
1530 nrt->rt6i_flags &= ~RTF_GATEWAY;
1531
1532 ipv6_addr_copy(&nrt->rt6i_dst.addr, dest);
1533 nrt->rt6i_dst.plen = 128;
1534 nrt->u.dst.flags |= DST_HOST;
1535
1536 ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key);
1537 nrt->rt6i_nexthop = neigh_clone(neigh);
1538 /* Reset pmtu, it may be better */
1539 nrt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(neigh->dev);
c346dca1 1540 nrt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dev_net(neigh->dev),
5578689a 1541 dst_mtu(&nrt->u.dst));
1da177e4 1542
40e22e8f 1543 if (ip6_ins_rt(nrt))
1da177e4
LT
1544 goto out;
1545
8d71740c
TT
1546 netevent.old = &rt->u.dst;
1547 netevent.new = &nrt->u.dst;
1548 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1549
1da177e4 1550 if (rt->rt6i_flags&RTF_CACHE) {
e0a1ad73 1551 ip6_del_rt(rt);
1da177e4
LT
1552 return;
1553 }
1554
1555out:
1ab1457c 1556 dst_release(&rt->u.dst);
1da177e4
LT
1557 return;
1558}
1559
1560/*
1561 * Handle ICMP "packet too big" messages
1562 * i.e. Path MTU discovery
1563 */
1564
1565void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr,
1566 struct net_device *dev, u32 pmtu)
1567{
1568 struct rt6_info *rt, *nrt;
c346dca1 1569 struct net *net = dev_net(dev);
1da177e4
LT
1570 int allfrag = 0;
1571
5578689a 1572 rt = rt6_lookup(net, daddr, saddr, dev->ifindex, 0);
1da177e4
LT
1573 if (rt == NULL)
1574 return;
1575
1576 if (pmtu >= dst_mtu(&rt->u.dst))
1577 goto out;
1578
1579 if (pmtu < IPV6_MIN_MTU) {
1580 /*
1ab1457c 1581 * According to RFC2460, PMTU is set to the IPv6 Minimum Link
1da177e4
LT
1582 * MTU (1280) and a fragment header should always be included
1583 * after a node receiving Too Big message reporting PMTU is
1584 * less than the IPv6 Minimum Link MTU.
1585 */
1586 pmtu = IPV6_MIN_MTU;
1587 allfrag = 1;
1588 }
1589
1590 /* New mtu received -> path was valid.
1591 They are sent only in response to data packets,
1592 so that this nexthop apparently is reachable. --ANK
1593 */
1594 dst_confirm(&rt->u.dst);
1595
1596 /* Host route. If it is static, it would be better
1597 not to override it, but add new one, so that
1598 when cache entry will expire old pmtu
1599 would return automatically.
1600 */
1601 if (rt->rt6i_flags & RTF_CACHE) {
1602 rt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1603 if (allfrag)
1604 rt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
5578689a 1605 dst_set_expires(&rt->u.dst, net->ipv6.sysctl.ip6_rt_mtu_expires);
1da177e4
LT
1606 rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
1607 goto out;
1608 }
1609
1610 /* Network route.
1611 Two cases are possible:
1612 1. It is connected route. Action: COW
1613 2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1614 */
d5315b50 1615 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
a1e78363 1616 nrt = rt6_alloc_cow(rt, daddr, saddr);
d5315b50
YH
1617 else
1618 nrt = rt6_alloc_clone(rt, daddr);
a1e78363 1619
d5315b50 1620 if (nrt) {
a1e78363
YH
1621 nrt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1622 if (allfrag)
1623 nrt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1624
1625 /* According to RFC 1981, detecting PMTU increase shouldn't be
1626 * happened within 5 mins, the recommended timer is 10 mins.
1627 * Here this route expiration time is set to ip6_rt_mtu_expires
1628 * which is 10 mins. After 10 mins the decreased pmtu is expired
1629 * and detecting PMTU increase will be automatically happened.
1630 */
5578689a 1631 dst_set_expires(&nrt->u.dst, net->ipv6.sysctl.ip6_rt_mtu_expires);
a1e78363
YH
1632 nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
1633
40e22e8f 1634 ip6_ins_rt(nrt);
1da177e4 1635 }
1da177e4
LT
1636out:
1637 dst_release(&rt->u.dst);
1638}
1639
1640/*
1641 * Misc support functions
1642 */
1643
1644static struct rt6_info * ip6_rt_copy(struct rt6_info *ort)
1645{
c346dca1 1646 struct net *net = dev_net(ort->rt6i_dev);
86393e52 1647 struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops);
1da177e4
LT
1648
1649 if (rt) {
1650 rt->u.dst.input = ort->u.dst.input;
1651 rt->u.dst.output = ort->u.dst.output;
1652
1653 memcpy(rt->u.dst.metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32));
22e1e4d8 1654 rt->u.dst.error = ort->u.dst.error;
1da177e4
LT
1655 rt->u.dst.dev = ort->u.dst.dev;
1656 if (rt->u.dst.dev)
1657 dev_hold(rt->u.dst.dev);
1658 rt->rt6i_idev = ort->rt6i_idev;
1659 if (rt->rt6i_idev)
1660 in6_dev_hold(rt->rt6i_idev);
1661 rt->u.dst.lastuse = jiffies;
1662 rt->rt6i_expires = 0;
1663
1664 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
1665 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
1666 rt->rt6i_metric = 0;
1667
1668 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1669#ifdef CONFIG_IPV6_SUBTREES
1670 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1671#endif
c71099ac 1672 rt->rt6i_table = ort->rt6i_table;
1da177e4
LT
1673 }
1674 return rt;
1675}
1676
70ceb4f5 1677#ifdef CONFIG_IPV6_ROUTE_INFO
efa2cea0
DL
1678static struct rt6_info *rt6_get_route_info(struct net *net,
1679 struct in6_addr *prefix, int prefixlen,
70ceb4f5
YH
1680 struct in6_addr *gwaddr, int ifindex)
1681{
1682 struct fib6_node *fn;
1683 struct rt6_info *rt = NULL;
c71099ac
TG
1684 struct fib6_table *table;
1685
efa2cea0 1686 table = fib6_get_table(net, RT6_TABLE_INFO);
c71099ac
TG
1687 if (table == NULL)
1688 return NULL;
70ceb4f5 1689
c71099ac
TG
1690 write_lock_bh(&table->tb6_lock);
1691 fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
70ceb4f5
YH
1692 if (!fn)
1693 goto out;
1694
7cc48263 1695 for (rt = fn->leaf; rt; rt = rt->u.dst.rt6_next) {
70ceb4f5
YH
1696 if (rt->rt6i_dev->ifindex != ifindex)
1697 continue;
1698 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1699 continue;
1700 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1701 continue;
1702 dst_hold(&rt->u.dst);
1703 break;
1704 }
1705out:
c71099ac 1706 write_unlock_bh(&table->tb6_lock);
70ceb4f5
YH
1707 return rt;
1708}
1709
efa2cea0
DL
1710static struct rt6_info *rt6_add_route_info(struct net *net,
1711 struct in6_addr *prefix, int prefixlen,
70ceb4f5
YH
1712 struct in6_addr *gwaddr, int ifindex,
1713 unsigned pref)
1714{
86872cb5
TG
1715 struct fib6_config cfg = {
1716 .fc_table = RT6_TABLE_INFO,
238fc7ea 1717 .fc_metric = IP6_RT_PRIO_USER,
86872cb5
TG
1718 .fc_ifindex = ifindex,
1719 .fc_dst_len = prefixlen,
1720 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
1721 RTF_UP | RTF_PREF(pref),
efa2cea0
DL
1722 .fc_nlinfo.pid = 0,
1723 .fc_nlinfo.nlh = NULL,
1724 .fc_nlinfo.nl_net = net,
86872cb5
TG
1725 };
1726
1727 ipv6_addr_copy(&cfg.fc_dst, prefix);
1728 ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
70ceb4f5 1729
e317da96
YH
1730 /* We should treat it as a default route if prefix length is 0. */
1731 if (!prefixlen)
86872cb5 1732 cfg.fc_flags |= RTF_DEFAULT;
70ceb4f5 1733
86872cb5 1734 ip6_route_add(&cfg);
70ceb4f5 1735
efa2cea0 1736 return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex);
70ceb4f5
YH
1737}
1738#endif
1739
1da177e4 1740struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *dev)
1ab1457c 1741{
1da177e4 1742 struct rt6_info *rt;
c71099ac 1743 struct fib6_table *table;
1da177e4 1744
c346dca1 1745 table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT);
c71099ac
TG
1746 if (table == NULL)
1747 return NULL;
1da177e4 1748
c71099ac 1749 write_lock_bh(&table->tb6_lock);
7cc48263 1750 for (rt = table->tb6_root.leaf; rt; rt=rt->u.dst.rt6_next) {
1da177e4 1751 if (dev == rt->rt6i_dev &&
045927ff 1752 ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
1da177e4
LT
1753 ipv6_addr_equal(&rt->rt6i_gateway, addr))
1754 break;
1755 }
1756 if (rt)
1757 dst_hold(&rt->u.dst);
c71099ac 1758 write_unlock_bh(&table->tb6_lock);
1da177e4
LT
1759 return rt;
1760}
1761
1762struct rt6_info *rt6_add_dflt_router(struct in6_addr *gwaddr,
ebacaaa0
YH
1763 struct net_device *dev,
1764 unsigned int pref)
1da177e4 1765{
86872cb5
TG
1766 struct fib6_config cfg = {
1767 .fc_table = RT6_TABLE_DFLT,
238fc7ea 1768 .fc_metric = IP6_RT_PRIO_USER,
86872cb5
TG
1769 .fc_ifindex = dev->ifindex,
1770 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
1771 RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
5578689a
DL
1772 .fc_nlinfo.pid = 0,
1773 .fc_nlinfo.nlh = NULL,
c346dca1 1774 .fc_nlinfo.nl_net = dev_net(dev),
86872cb5 1775 };
1da177e4 1776
86872cb5 1777 ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
1da177e4 1778
86872cb5 1779 ip6_route_add(&cfg);
1da177e4 1780
1da177e4
LT
1781 return rt6_get_dflt_router(gwaddr, dev);
1782}
1783
7b4da532 1784void rt6_purge_dflt_routers(struct net *net)
1da177e4
LT
1785{
1786 struct rt6_info *rt;
c71099ac
TG
1787 struct fib6_table *table;
1788
1789 /* NOTE: Keep consistent with rt6_get_dflt_router */
7b4da532 1790 table = fib6_get_table(net, RT6_TABLE_DFLT);
c71099ac
TG
1791 if (table == NULL)
1792 return;
1da177e4
LT
1793
1794restart:
c71099ac 1795 read_lock_bh(&table->tb6_lock);
7cc48263 1796 for (rt = table->tb6_root.leaf; rt; rt = rt->u.dst.rt6_next) {
1da177e4
LT
1797 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
1798 dst_hold(&rt->u.dst);
c71099ac 1799 read_unlock_bh(&table->tb6_lock);
e0a1ad73 1800 ip6_del_rt(rt);
1da177e4
LT
1801 goto restart;
1802 }
1803 }
c71099ac 1804 read_unlock_bh(&table->tb6_lock);
1da177e4
LT
1805}
1806
5578689a
DL
1807static void rtmsg_to_fib6_config(struct net *net,
1808 struct in6_rtmsg *rtmsg,
86872cb5
TG
1809 struct fib6_config *cfg)
1810{
1811 memset(cfg, 0, sizeof(*cfg));
1812
1813 cfg->fc_table = RT6_TABLE_MAIN;
1814 cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
1815 cfg->fc_metric = rtmsg->rtmsg_metric;
1816 cfg->fc_expires = rtmsg->rtmsg_info;
1817 cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
1818 cfg->fc_src_len = rtmsg->rtmsg_src_len;
1819 cfg->fc_flags = rtmsg->rtmsg_flags;
1820
5578689a 1821 cfg->fc_nlinfo.nl_net = net;
f1243c2d 1822
86872cb5
TG
1823 ipv6_addr_copy(&cfg->fc_dst, &rtmsg->rtmsg_dst);
1824 ipv6_addr_copy(&cfg->fc_src, &rtmsg->rtmsg_src);
1825 ipv6_addr_copy(&cfg->fc_gateway, &rtmsg->rtmsg_gateway);
1826}
1827
5578689a 1828int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
1da177e4 1829{
86872cb5 1830 struct fib6_config cfg;
1da177e4
LT
1831 struct in6_rtmsg rtmsg;
1832 int err;
1833
1834 switch(cmd) {
1835 case SIOCADDRT: /* Add a route */
1836 case SIOCDELRT: /* Delete a route */
1837 if (!capable(CAP_NET_ADMIN))
1838 return -EPERM;
1839 err = copy_from_user(&rtmsg, arg,
1840 sizeof(struct in6_rtmsg));
1841 if (err)
1842 return -EFAULT;
86872cb5 1843
5578689a 1844 rtmsg_to_fib6_config(net, &rtmsg, &cfg);
86872cb5 1845
1da177e4
LT
1846 rtnl_lock();
1847 switch (cmd) {
1848 case SIOCADDRT:
86872cb5 1849 err = ip6_route_add(&cfg);
1da177e4
LT
1850 break;
1851 case SIOCDELRT:
86872cb5 1852 err = ip6_route_del(&cfg);
1da177e4
LT
1853 break;
1854 default:
1855 err = -EINVAL;
1856 }
1857 rtnl_unlock();
1858
1859 return err;
3ff50b79 1860 }
1da177e4
LT
1861
1862 return -EINVAL;
1863}
1864
1865/*
1866 * Drop the packet on the floor
1867 */
1868
d5fdd6ba 1869static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
1da177e4 1870{
612f09e8 1871 int type;
adf30907 1872 struct dst_entry *dst = skb_dst(skb);
612f09e8
YH
1873 switch (ipstats_mib_noroutes) {
1874 case IPSTATS_MIB_INNOROUTES:
0660e03f 1875 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
45bb0060 1876 if (type == IPV6_ADDR_ANY) {
3bd653c8
DL
1877 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
1878 IPSTATS_MIB_INADDRERRORS);
612f09e8
YH
1879 break;
1880 }
1881 /* FALLTHROUGH */
1882 case IPSTATS_MIB_OUTNOROUTES:
3bd653c8
DL
1883 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
1884 ipstats_mib_noroutes);
612f09e8
YH
1885 break;
1886 }
3ffe533c 1887 icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
1da177e4
LT
1888 kfree_skb(skb);
1889 return 0;
1890}
1891
9ce8ade0
TG
1892static int ip6_pkt_discard(struct sk_buff *skb)
1893{
612f09e8 1894 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
9ce8ade0
TG
1895}
1896
20380731 1897static int ip6_pkt_discard_out(struct sk_buff *skb)
1da177e4 1898{
adf30907 1899 skb->dev = skb_dst(skb)->dev;
612f09e8 1900 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
1da177e4
LT
1901}
1902
6723ab54
DM
1903#ifdef CONFIG_IPV6_MULTIPLE_TABLES
1904
9ce8ade0
TG
1905static int ip6_pkt_prohibit(struct sk_buff *skb)
1906{
612f09e8 1907 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
9ce8ade0
TG
1908}
1909
1910static int ip6_pkt_prohibit_out(struct sk_buff *skb)
1911{
adf30907 1912 skb->dev = skb_dst(skb)->dev;
612f09e8 1913 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
9ce8ade0
TG
1914}
1915
6723ab54
DM
1916#endif
1917
1da177e4
LT
1918/*
1919 * Allocate a dst for local (unicast / anycast) address.
1920 */
1921
1922struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
1923 const struct in6_addr *addr,
1924 int anycast)
1925{
c346dca1 1926 struct net *net = dev_net(idev->dev);
86393e52 1927 struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops);
14deae41 1928 struct neighbour *neigh;
1da177e4
LT
1929
1930 if (rt == NULL)
1931 return ERR_PTR(-ENOMEM);
1932
5578689a 1933 dev_hold(net->loopback_dev);
1da177e4
LT
1934 in6_dev_hold(idev);
1935
1936 rt->u.dst.flags = DST_HOST;
1937 rt->u.dst.input = ip6_input;
1938 rt->u.dst.output = ip6_output;
5578689a 1939 rt->rt6i_dev = net->loopback_dev;
1da177e4
LT
1940 rt->rt6i_idev = idev;
1941 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
5578689a 1942 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, dst_mtu(&rt->u.dst));
1da177e4
LT
1943 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
1944 rt->u.dst.obsolete = -1;
1945
1946 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
58c4fb86
YH
1947 if (anycast)
1948 rt->rt6i_flags |= RTF_ANYCAST;
1949 else
1da177e4 1950 rt->rt6i_flags |= RTF_LOCAL;
14deae41
DM
1951 neigh = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
1952 if (IS_ERR(neigh)) {
40aa7b90 1953 dst_free(&rt->u.dst);
14deae41
DM
1954
1955 /* We are casting this because that is the return
1956 * value type. But an errno encoded pointer is the
1957 * same regardless of the underlying pointer type,
1958 * and that's what we are returning. So this is OK.
1959 */
1960 return (struct rt6_info *) neigh;
1da177e4 1961 }
14deae41 1962 rt->rt6i_nexthop = neigh;
1da177e4
LT
1963
1964 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
1965 rt->rt6i_dst.plen = 128;
5578689a 1966 rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL);
1da177e4
LT
1967
1968 atomic_set(&rt->u.dst.__refcnt, 1);
1969
1970 return rt;
1971}
1972
8ed67789
DL
1973struct arg_dev_net {
1974 struct net_device *dev;
1975 struct net *net;
1976};
1977
1da177e4
LT
1978static int fib6_ifdown(struct rt6_info *rt, void *arg)
1979{
8ed67789
DL
1980 struct net_device *dev = ((struct arg_dev_net *)arg)->dev;
1981 struct net *net = ((struct arg_dev_net *)arg)->net;
1982
1983 if (((void *)rt->rt6i_dev == dev || dev == NULL) &&
1984 rt != net->ipv6.ip6_null_entry) {
1da177e4
LT
1985 RT6_TRACE("deleted by ifdown %p\n", rt);
1986 return -1;
1987 }
1988 return 0;
1989}
1990
f3db4851 1991void rt6_ifdown(struct net *net, struct net_device *dev)
1da177e4 1992{
8ed67789
DL
1993 struct arg_dev_net adn = {
1994 .dev = dev,
1995 .net = net,
1996 };
1997
1998 fib6_clean_all(net, fib6_ifdown, 0, &adn);
1e493d19 1999 icmp6_clean_all(fib6_ifdown, &adn);
1da177e4
LT
2000}
2001
2002struct rt6_mtu_change_arg
2003{
2004 struct net_device *dev;
2005 unsigned mtu;
2006};
2007
2008static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
2009{
2010 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
2011 struct inet6_dev *idev;
c346dca1 2012 struct net *net = dev_net(arg->dev);
1da177e4
LT
2013
2014 /* In IPv6 pmtu discovery is not optional,
2015 so that RTAX_MTU lock cannot disable it.
2016 We still use this lock to block changes
2017 caused by addrconf/ndisc.
2018 */
2019
2020 idev = __in6_dev_get(arg->dev);
2021 if (idev == NULL)
2022 return 0;
2023
2024 /* For administrative MTU increase, there is no way to discover
2025 IPv6 PMTU increase, so PMTU increase should be updated here.
2026 Since RFC 1981 doesn't include administrative MTU increase
2027 update PMTU increase is a MUST. (i.e. jumbo frame)
2028 */
2029 /*
2030 If new MTU is less than route PMTU, this new MTU will be the
2031 lowest MTU in the path, update the route PMTU to reflect PMTU
2032 decreases; if new MTU is greater than route PMTU, and the
2033 old MTU is the lowest MTU in the path, update the route PMTU
2034 to reflect the increase. In this case if the other nodes' MTU
2035 also have the lowest MTU, TOO BIG MESSAGE will be lead to
2036 PMTU discouvery.
2037 */
2038 if (rt->rt6i_dev == arg->dev &&
2039 !dst_metric_locked(&rt->u.dst, RTAX_MTU) &&
23717795 2040 (dst_mtu(&rt->u.dst) >= arg->mtu ||
1ab1457c 2041 (dst_mtu(&rt->u.dst) < arg->mtu &&
566cfd8f 2042 dst_mtu(&rt->u.dst) == idev->cnf.mtu6))) {
1da177e4 2043 rt->u.dst.metrics[RTAX_MTU-1] = arg->mtu;
5578689a 2044 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, arg->mtu);
566cfd8f 2045 }
1da177e4
LT
2046 return 0;
2047}
2048
2049void rt6_mtu_change(struct net_device *dev, unsigned mtu)
2050{
c71099ac
TG
2051 struct rt6_mtu_change_arg arg = {
2052 .dev = dev,
2053 .mtu = mtu,
2054 };
1da177e4 2055
c346dca1 2056 fib6_clean_all(dev_net(dev), rt6_mtu_change_route, 0, &arg);
1da177e4
LT
2057}
2058
ef7c79ed 2059static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
5176f91e 2060 [RTA_GATEWAY] = { .len = sizeof(struct in6_addr) },
86872cb5 2061 [RTA_OIF] = { .type = NLA_U32 },
ab364a6f 2062 [RTA_IIF] = { .type = NLA_U32 },
86872cb5
TG
2063 [RTA_PRIORITY] = { .type = NLA_U32 },
2064 [RTA_METRICS] = { .type = NLA_NESTED },
2065};
2066
2067static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2068 struct fib6_config *cfg)
1da177e4 2069{
86872cb5
TG
2070 struct rtmsg *rtm;
2071 struct nlattr *tb[RTA_MAX+1];
2072 int err;
1da177e4 2073
86872cb5
TG
2074 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2075 if (err < 0)
2076 goto errout;
1da177e4 2077
86872cb5
TG
2078 err = -EINVAL;
2079 rtm = nlmsg_data(nlh);
2080 memset(cfg, 0, sizeof(*cfg));
2081
2082 cfg->fc_table = rtm->rtm_table;
2083 cfg->fc_dst_len = rtm->rtm_dst_len;
2084 cfg->fc_src_len = rtm->rtm_src_len;
2085 cfg->fc_flags = RTF_UP;
2086 cfg->fc_protocol = rtm->rtm_protocol;
2087
2088 if (rtm->rtm_type == RTN_UNREACHABLE)
2089 cfg->fc_flags |= RTF_REJECT;
2090
2091 cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
2092 cfg->fc_nlinfo.nlh = nlh;
3b1e0a65 2093 cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
86872cb5
TG
2094
2095 if (tb[RTA_GATEWAY]) {
2096 nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
2097 cfg->fc_flags |= RTF_GATEWAY;
1da177e4 2098 }
86872cb5
TG
2099
2100 if (tb[RTA_DST]) {
2101 int plen = (rtm->rtm_dst_len + 7) >> 3;
2102
2103 if (nla_len(tb[RTA_DST]) < plen)
2104 goto errout;
2105
2106 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
1da177e4 2107 }
86872cb5
TG
2108
2109 if (tb[RTA_SRC]) {
2110 int plen = (rtm->rtm_src_len + 7) >> 3;
2111
2112 if (nla_len(tb[RTA_SRC]) < plen)
2113 goto errout;
2114
2115 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
1da177e4 2116 }
86872cb5
TG
2117
2118 if (tb[RTA_OIF])
2119 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2120
2121 if (tb[RTA_PRIORITY])
2122 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2123
2124 if (tb[RTA_METRICS]) {
2125 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2126 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
1da177e4 2127 }
86872cb5
TG
2128
2129 if (tb[RTA_TABLE])
2130 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2131
2132 err = 0;
2133errout:
2134 return err;
1da177e4
LT
2135}
2136
c127ea2c 2137static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1da177e4 2138{
86872cb5
TG
2139 struct fib6_config cfg;
2140 int err;
1da177e4 2141
86872cb5
TG
2142 err = rtm_to_fib6_config(skb, nlh, &cfg);
2143 if (err < 0)
2144 return err;
2145
2146 return ip6_route_del(&cfg);
1da177e4
LT
2147}
2148
c127ea2c 2149static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1da177e4 2150{
86872cb5
TG
2151 struct fib6_config cfg;
2152 int err;
1da177e4 2153
86872cb5
TG
2154 err = rtm_to_fib6_config(skb, nlh, &cfg);
2155 if (err < 0)
2156 return err;
2157
2158 return ip6_route_add(&cfg);
1da177e4
LT
2159}
2160
339bf98f
TG
2161static inline size_t rt6_nlmsg_size(void)
2162{
2163 return NLMSG_ALIGN(sizeof(struct rtmsg))
2164 + nla_total_size(16) /* RTA_SRC */
2165 + nla_total_size(16) /* RTA_DST */
2166 + nla_total_size(16) /* RTA_GATEWAY */
2167 + nla_total_size(16) /* RTA_PREFSRC */
2168 + nla_total_size(4) /* RTA_TABLE */
2169 + nla_total_size(4) /* RTA_IIF */
2170 + nla_total_size(4) /* RTA_OIF */
2171 + nla_total_size(4) /* RTA_PRIORITY */
6a2b9ce0 2172 + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
339bf98f
TG
2173 + nla_total_size(sizeof(struct rta_cacheinfo));
2174}
2175
191cd582
BH
2176static int rt6_fill_node(struct net *net,
2177 struct sk_buff *skb, struct rt6_info *rt,
0d51aa80
JHS
2178 struct in6_addr *dst, struct in6_addr *src,
2179 int iif, int type, u32 pid, u32 seq,
7bc570c8 2180 int prefix, int nowait, unsigned int flags)
1da177e4
LT
2181{
2182 struct rtmsg *rtm;
2d7202bf 2183 struct nlmsghdr *nlh;
e3703b3d 2184 long expires;
9e762a4a 2185 u32 table;
1da177e4
LT
2186
2187 if (prefix) { /* user wants prefix routes only */
2188 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
2189 /* success since this is not a prefix route */
2190 return 1;
2191 }
2192 }
2193
2d7202bf
TG
2194 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*rtm), flags);
2195 if (nlh == NULL)
26932566 2196 return -EMSGSIZE;
2d7202bf
TG
2197
2198 rtm = nlmsg_data(nlh);
1da177e4
LT
2199 rtm->rtm_family = AF_INET6;
2200 rtm->rtm_dst_len = rt->rt6i_dst.plen;
2201 rtm->rtm_src_len = rt->rt6i_src.plen;
2202 rtm->rtm_tos = 0;
c71099ac 2203 if (rt->rt6i_table)
9e762a4a 2204 table = rt->rt6i_table->tb6_id;
c71099ac 2205 else
9e762a4a
PM
2206 table = RT6_TABLE_UNSPEC;
2207 rtm->rtm_table = table;
2d7202bf 2208 NLA_PUT_U32(skb, RTA_TABLE, table);
1da177e4
LT
2209 if (rt->rt6i_flags&RTF_REJECT)
2210 rtm->rtm_type = RTN_UNREACHABLE;
2211 else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK))
2212 rtm->rtm_type = RTN_LOCAL;
2213 else
2214 rtm->rtm_type = RTN_UNICAST;
2215 rtm->rtm_flags = 0;
2216 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2217 rtm->rtm_protocol = rt->rt6i_protocol;
2218 if (rt->rt6i_flags&RTF_DYNAMIC)
2219 rtm->rtm_protocol = RTPROT_REDIRECT;
2220 else if (rt->rt6i_flags & RTF_ADDRCONF)
2221 rtm->rtm_protocol = RTPROT_KERNEL;
2222 else if (rt->rt6i_flags&RTF_DEFAULT)
2223 rtm->rtm_protocol = RTPROT_RA;
2224
2225 if (rt->rt6i_flags&RTF_CACHE)
2226 rtm->rtm_flags |= RTM_F_CLONED;
2227
2228 if (dst) {
2d7202bf 2229 NLA_PUT(skb, RTA_DST, 16, dst);
1ab1457c 2230 rtm->rtm_dst_len = 128;
1da177e4 2231 } else if (rtm->rtm_dst_len)
2d7202bf 2232 NLA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr);
1da177e4
LT
2233#ifdef CONFIG_IPV6_SUBTREES
2234 if (src) {
2d7202bf 2235 NLA_PUT(skb, RTA_SRC, 16, src);
1ab1457c 2236 rtm->rtm_src_len = 128;
1da177e4 2237 } else if (rtm->rtm_src_len)
2d7202bf 2238 NLA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr);
1da177e4 2239#endif
7bc570c8
YH
2240 if (iif) {
2241#ifdef CONFIG_IPV6_MROUTE
2242 if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
8229efda 2243 int err = ip6mr_get_route(net, skb, rtm, nowait);
7bc570c8
YH
2244 if (err <= 0) {
2245 if (!nowait) {
2246 if (err == 0)
2247 return 0;
2248 goto nla_put_failure;
2249 } else {
2250 if (err == -EMSGSIZE)
2251 goto nla_put_failure;
2252 }
2253 }
2254 } else
2255#endif
2256 NLA_PUT_U32(skb, RTA_IIF, iif);
2257 } else if (dst) {
5e0115e5 2258 struct inet6_dev *idev = ip6_dst_idev(&rt->u.dst);
1da177e4 2259 struct in6_addr saddr_buf;
191cd582 2260 if (ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
7cbca67c 2261 dst, 0, &saddr_buf) == 0)
2d7202bf 2262 NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
1da177e4 2263 }
2d7202bf 2264
1da177e4 2265 if (rtnetlink_put_metrics(skb, rt->u.dst.metrics) < 0)
2d7202bf
TG
2266 goto nla_put_failure;
2267
1da177e4 2268 if (rt->u.dst.neighbour)
2d7202bf
TG
2269 NLA_PUT(skb, RTA_GATEWAY, 16, &rt->u.dst.neighbour->primary_key);
2270
1da177e4 2271 if (rt->u.dst.dev)
2d7202bf
TG
2272 NLA_PUT_U32(skb, RTA_OIF, rt->rt6i_dev->ifindex);
2273
2274 NLA_PUT_U32(skb, RTA_PRIORITY, rt->rt6i_metric);
e3703b3d 2275
36e3deae
YH
2276 if (!(rt->rt6i_flags & RTF_EXPIRES))
2277 expires = 0;
2278 else if (rt->rt6i_expires - jiffies < INT_MAX)
2279 expires = rt->rt6i_expires - jiffies;
2280 else
2281 expires = INT_MAX;
69cdf8f9 2282
e3703b3d
TG
2283 if (rtnl_put_cacheinfo(skb, &rt->u.dst, 0, 0, 0,
2284 expires, rt->u.dst.error) < 0)
2285 goto nla_put_failure;
2d7202bf
TG
2286
2287 return nlmsg_end(skb, nlh);
2288
2289nla_put_failure:
26932566
PM
2290 nlmsg_cancel(skb, nlh);
2291 return -EMSGSIZE;
1da177e4
LT
2292}
2293
1b43af54 2294int rt6_dump_route(struct rt6_info *rt, void *p_arg)
1da177e4
LT
2295{
2296 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
2297 int prefix;
2298
2d7202bf
TG
2299 if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
2300 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
1da177e4
LT
2301 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
2302 } else
2303 prefix = 0;
2304
191cd582
BH
2305 return rt6_fill_node(arg->net,
2306 arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
1da177e4 2307 NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
7bc570c8 2308 prefix, 0, NLM_F_MULTI);
1da177e4
LT
2309}
2310
c127ea2c 2311static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
1da177e4 2312{
3b1e0a65 2313 struct net *net = sock_net(in_skb->sk);
ab364a6f
TG
2314 struct nlattr *tb[RTA_MAX+1];
2315 struct rt6_info *rt;
1da177e4 2316 struct sk_buff *skb;
ab364a6f 2317 struct rtmsg *rtm;
1da177e4 2318 struct flowi fl;
ab364a6f 2319 int err, iif = 0;
1da177e4 2320
ab364a6f
TG
2321 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2322 if (err < 0)
2323 goto errout;
1da177e4 2324
ab364a6f 2325 err = -EINVAL;
1da177e4 2326 memset(&fl, 0, sizeof(fl));
1da177e4 2327
ab364a6f
TG
2328 if (tb[RTA_SRC]) {
2329 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2330 goto errout;
2331
2332 ipv6_addr_copy(&fl.fl6_src, nla_data(tb[RTA_SRC]));
2333 }
2334
2335 if (tb[RTA_DST]) {
2336 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2337 goto errout;
2338
2339 ipv6_addr_copy(&fl.fl6_dst, nla_data(tb[RTA_DST]));
2340 }
2341
2342 if (tb[RTA_IIF])
2343 iif = nla_get_u32(tb[RTA_IIF]);
2344
2345 if (tb[RTA_OIF])
2346 fl.oif = nla_get_u32(tb[RTA_OIF]);
1da177e4
LT
2347
2348 if (iif) {
2349 struct net_device *dev;
5578689a 2350 dev = __dev_get_by_index(net, iif);
1da177e4
LT
2351 if (!dev) {
2352 err = -ENODEV;
ab364a6f 2353 goto errout;
1da177e4
LT
2354 }
2355 }
2356
ab364a6f
TG
2357 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
2358 if (skb == NULL) {
2359 err = -ENOBUFS;
2360 goto errout;
2361 }
1da177e4 2362
ab364a6f
TG
2363 /* Reserve room for dummy headers, this skb can pass
2364 through good chunk of routing engine.
2365 */
459a98ed 2366 skb_reset_mac_header(skb);
ab364a6f 2367 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
1da177e4 2368
8a3edd80 2369 rt = (struct rt6_info*) ip6_route_output(net, NULL, &fl);
adf30907 2370 skb_dst_set(skb, &rt->u.dst);
1da177e4 2371
191cd582 2372 err = rt6_fill_node(net, skb, rt, &fl.fl6_dst, &fl.fl6_src, iif,
1da177e4 2373 RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
7bc570c8 2374 nlh->nlmsg_seq, 0, 0, 0);
1da177e4 2375 if (err < 0) {
ab364a6f
TG
2376 kfree_skb(skb);
2377 goto errout;
1da177e4
LT
2378 }
2379
5578689a 2380 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).pid);
ab364a6f 2381errout:
1da177e4 2382 return err;
1da177e4
LT
2383}
2384
86872cb5 2385void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
1da177e4
LT
2386{
2387 struct sk_buff *skb;
5578689a 2388 struct net *net = info->nl_net;
528c4ceb
DL
2389 u32 seq;
2390 int err;
2391
2392 err = -ENOBUFS;
2393 seq = info->nlh != NULL ? info->nlh->nlmsg_seq : 0;
86872cb5 2394
339bf98f 2395 skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
21713ebc
TG
2396 if (skb == NULL)
2397 goto errout;
2398
191cd582 2399 err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
7bc570c8 2400 event, info->pid, seq, 0, 0, 0);
26932566
PM
2401 if (err < 0) {
2402 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
2403 WARN_ON(err == -EMSGSIZE);
2404 kfree_skb(skb);
2405 goto errout;
2406 }
1ce85fe4
PNA
2407 rtnl_notify(skb, net, info->pid, RTNLGRP_IPV6_ROUTE,
2408 info->nlh, gfp_any());
2409 return;
21713ebc
TG
2410errout:
2411 if (err < 0)
5578689a 2412 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
1da177e4
LT
2413}
2414
8ed67789
DL
2415static int ip6_route_dev_notify(struct notifier_block *this,
2416 unsigned long event, void *data)
2417{
2418 struct net_device *dev = (struct net_device *)data;
c346dca1 2419 struct net *net = dev_net(dev);
8ed67789
DL
2420
2421 if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
2422 net->ipv6.ip6_null_entry->u.dst.dev = dev;
2423 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
2424#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2425 net->ipv6.ip6_prohibit_entry->u.dst.dev = dev;
2426 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
2427 net->ipv6.ip6_blk_hole_entry->u.dst.dev = dev;
2428 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
2429#endif
2430 }
2431
2432 return NOTIFY_OK;
2433}
2434
1da177e4
LT
2435/*
2436 * /proc
2437 */
2438
2439#ifdef CONFIG_PROC_FS
2440
2441#define RT6_INFO_LEN (32 + 4 + 32 + 4 + 32 + 40 + 5 + 1)
2442
2443struct rt6_proc_arg
2444{
2445 char *buffer;
2446 int offset;
2447 int length;
2448 int skip;
2449 int len;
2450};
2451
2452static int rt6_info_route(struct rt6_info *rt, void *p_arg)
2453{
33120b30 2454 struct seq_file *m = p_arg;
1da177e4 2455
4b7a4274 2456 seq_printf(m, "%pi6 %02x ", &rt->rt6i_dst.addr, rt->rt6i_dst.plen);
1da177e4
LT
2457
2458#ifdef CONFIG_IPV6_SUBTREES
4b7a4274 2459 seq_printf(m, "%pi6 %02x ", &rt->rt6i_src.addr, rt->rt6i_src.plen);
1da177e4 2460#else
33120b30 2461 seq_puts(m, "00000000000000000000000000000000 00 ");
1da177e4
LT
2462#endif
2463
2464 if (rt->rt6i_nexthop) {
4b7a4274 2465 seq_printf(m, "%pi6", rt->rt6i_nexthop->primary_key);
1da177e4 2466 } else {
33120b30 2467 seq_puts(m, "00000000000000000000000000000000");
1da177e4 2468 }
33120b30
AD
2469 seq_printf(m, " %08x %08x %08x %08x %8s\n",
2470 rt->rt6i_metric, atomic_read(&rt->u.dst.__refcnt),
2471 rt->u.dst.__use, rt->rt6i_flags,
2472 rt->rt6i_dev ? rt->rt6i_dev->name : "");
1da177e4
LT
2473 return 0;
2474}
2475
33120b30 2476static int ipv6_route_show(struct seq_file *m, void *v)
1da177e4 2477{
f3db4851
DL
2478 struct net *net = (struct net *)m->private;
2479 fib6_clean_all(net, rt6_info_route, 0, m);
33120b30
AD
2480 return 0;
2481}
1da177e4 2482
33120b30
AD
2483static int ipv6_route_open(struct inode *inode, struct file *file)
2484{
de05c557 2485 return single_open_net(inode, file, ipv6_route_show);
f3db4851
DL
2486}
2487
33120b30
AD
2488static const struct file_operations ipv6_route_proc_fops = {
2489 .owner = THIS_MODULE,
2490 .open = ipv6_route_open,
2491 .read = seq_read,
2492 .llseek = seq_lseek,
b6fcbdb4 2493 .release = single_release_net,
33120b30
AD
2494};
2495
1da177e4
LT
2496static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2497{
69ddb805 2498 struct net *net = (struct net *)seq->private;
1da177e4 2499 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
69ddb805
DL
2500 net->ipv6.rt6_stats->fib_nodes,
2501 net->ipv6.rt6_stats->fib_route_nodes,
2502 net->ipv6.rt6_stats->fib_rt_alloc,
2503 net->ipv6.rt6_stats->fib_rt_entries,
2504 net->ipv6.rt6_stats->fib_rt_cache,
86393e52 2505 atomic_read(&net->ipv6.ip6_dst_ops.entries),
69ddb805 2506 net->ipv6.rt6_stats->fib_discarded_routes);
1da177e4
LT
2507
2508 return 0;
2509}
2510
2511static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2512{
de05c557 2513 return single_open_net(inode, file, rt6_stats_seq_show);
69ddb805
DL
2514}
2515
9a32144e 2516static const struct file_operations rt6_stats_seq_fops = {
1da177e4
LT
2517 .owner = THIS_MODULE,
2518 .open = rt6_stats_seq_open,
2519 .read = seq_read,
2520 .llseek = seq_lseek,
b6fcbdb4 2521 .release = single_release_net,
1da177e4
LT
2522};
2523#endif /* CONFIG_PROC_FS */
2524
2525#ifdef CONFIG_SYSCTL
2526
1da177e4 2527static
8d65af78 2528int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write,
1da177e4
LT
2529 void __user *buffer, size_t *lenp, loff_t *ppos)
2530{
5b7c931d
DL
2531 struct net *net = current->nsproxy->net_ns;
2532 int delay = net->ipv6.sysctl.flush_delay;
1da177e4 2533 if (write) {
8d65af78 2534 proc_dointvec(ctl, write, buffer, lenp, ppos);
5b7c931d 2535 fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay, net);
1da177e4
LT
2536 return 0;
2537 } else
2538 return -EINVAL;
2539}
2540
760f2d01 2541ctl_table ipv6_route_table_template[] = {
1ab1457c 2542 {
1da177e4 2543 .procname = "flush",
4990509f 2544 .data = &init_net.ipv6.sysctl.flush_delay,
1da177e4 2545 .maxlen = sizeof(int),
89c8b3a1 2546 .mode = 0200,
6d9f239a 2547 .proc_handler = ipv6_sysctl_rtcache_flush
1da177e4
LT
2548 },
2549 {
1da177e4 2550 .procname = "gc_thresh",
9a7ec3a9 2551 .data = &ip6_dst_ops_template.gc_thresh,
1da177e4
LT
2552 .maxlen = sizeof(int),
2553 .mode = 0644,
6d9f239a 2554 .proc_handler = proc_dointvec,
1da177e4
LT
2555 },
2556 {
1da177e4 2557 .procname = "max_size",
4990509f 2558 .data = &init_net.ipv6.sysctl.ip6_rt_max_size,
1da177e4
LT
2559 .maxlen = sizeof(int),
2560 .mode = 0644,
6d9f239a 2561 .proc_handler = proc_dointvec,
1da177e4
LT
2562 },
2563 {
1da177e4 2564 .procname = "gc_min_interval",
4990509f 2565 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
1da177e4
LT
2566 .maxlen = sizeof(int),
2567 .mode = 0644,
6d9f239a 2568 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
2569 },
2570 {
1da177e4 2571 .procname = "gc_timeout",
4990509f 2572 .data = &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
1da177e4
LT
2573 .maxlen = sizeof(int),
2574 .mode = 0644,
6d9f239a 2575 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
2576 },
2577 {
1da177e4 2578 .procname = "gc_interval",
4990509f 2579 .data = &init_net.ipv6.sysctl.ip6_rt_gc_interval,
1da177e4
LT
2580 .maxlen = sizeof(int),
2581 .mode = 0644,
6d9f239a 2582 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
2583 },
2584 {
1da177e4 2585 .procname = "gc_elasticity",
4990509f 2586 .data = &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
1da177e4
LT
2587 .maxlen = sizeof(int),
2588 .mode = 0644,
6d9f239a 2589 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
2590 },
2591 {
1da177e4 2592 .procname = "mtu_expires",
4990509f 2593 .data = &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
1da177e4
LT
2594 .maxlen = sizeof(int),
2595 .mode = 0644,
6d9f239a 2596 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
2597 },
2598 {
1da177e4 2599 .procname = "min_adv_mss",
4990509f 2600 .data = &init_net.ipv6.sysctl.ip6_rt_min_advmss,
1da177e4
LT
2601 .maxlen = sizeof(int),
2602 .mode = 0644,
6d9f239a 2603 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
2604 },
2605 {
1da177e4 2606 .procname = "gc_min_interval_ms",
4990509f 2607 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
1da177e4
LT
2608 .maxlen = sizeof(int),
2609 .mode = 0644,
6d9f239a 2610 .proc_handler = proc_dointvec_ms_jiffies,
1da177e4 2611 },
f8572d8f 2612 { }
1da177e4
LT
2613};
2614
2c8c1e72 2615struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
760f2d01
DL
2616{
2617 struct ctl_table *table;
2618
2619 table = kmemdup(ipv6_route_table_template,
2620 sizeof(ipv6_route_table_template),
2621 GFP_KERNEL);
5ee09105
YH
2622
2623 if (table) {
2624 table[0].data = &net->ipv6.sysctl.flush_delay;
86393e52 2625 table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
5ee09105
YH
2626 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
2627 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2628 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
2629 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
2630 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
2631 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
2632 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
9c69fabe 2633 table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
5ee09105
YH
2634 }
2635
760f2d01
DL
2636 return table;
2637}
1da177e4
LT
2638#endif
2639
2c8c1e72 2640static int __net_init ip6_route_net_init(struct net *net)
cdb18761 2641{
633d424b 2642 int ret = -ENOMEM;
8ed67789 2643
86393e52
AD
2644 memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
2645 sizeof(net->ipv6.ip6_dst_ops));
f2fc6a54 2646
8ed67789
DL
2647 net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
2648 sizeof(*net->ipv6.ip6_null_entry),
2649 GFP_KERNEL);
2650 if (!net->ipv6.ip6_null_entry)
f2fc6a54 2651 goto out_ip6_dst_ops;
8ed67789
DL
2652 net->ipv6.ip6_null_entry->u.dst.path =
2653 (struct dst_entry *)net->ipv6.ip6_null_entry;
86393e52 2654 net->ipv6.ip6_null_entry->u.dst.ops = &net->ipv6.ip6_dst_ops;
8ed67789
DL
2655
2656#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2657 net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
2658 sizeof(*net->ipv6.ip6_prohibit_entry),
2659 GFP_KERNEL);
68fffc67
PZ
2660 if (!net->ipv6.ip6_prohibit_entry)
2661 goto out_ip6_null_entry;
8ed67789
DL
2662 net->ipv6.ip6_prohibit_entry->u.dst.path =
2663 (struct dst_entry *)net->ipv6.ip6_prohibit_entry;
86393e52 2664 net->ipv6.ip6_prohibit_entry->u.dst.ops = &net->ipv6.ip6_dst_ops;
8ed67789
DL
2665
2666 net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
2667 sizeof(*net->ipv6.ip6_blk_hole_entry),
2668 GFP_KERNEL);
68fffc67
PZ
2669 if (!net->ipv6.ip6_blk_hole_entry)
2670 goto out_ip6_prohibit_entry;
8ed67789
DL
2671 net->ipv6.ip6_blk_hole_entry->u.dst.path =
2672 (struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
86393e52 2673 net->ipv6.ip6_blk_hole_entry->u.dst.ops = &net->ipv6.ip6_dst_ops;
8ed67789
DL
2674#endif
2675
b339a47c
PZ
2676 net->ipv6.sysctl.flush_delay = 0;
2677 net->ipv6.sysctl.ip6_rt_max_size = 4096;
2678 net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
2679 net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
2680 net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
2681 net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
2682 net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
2683 net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
2684
cdb18761
DL
2685#ifdef CONFIG_PROC_FS
2686 proc_net_fops_create(net, "ipv6_route", 0, &ipv6_route_proc_fops);
2687 proc_net_fops_create(net, "rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2688#endif
6891a346
BT
2689 net->ipv6.ip6_rt_gc_expire = 30*HZ;
2690
8ed67789
DL
2691 ret = 0;
2692out:
2693 return ret;
f2fc6a54 2694
68fffc67
PZ
2695#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2696out_ip6_prohibit_entry:
2697 kfree(net->ipv6.ip6_prohibit_entry);
2698out_ip6_null_entry:
2699 kfree(net->ipv6.ip6_null_entry);
2700#endif
f2fc6a54 2701out_ip6_dst_ops:
f2fc6a54 2702 goto out;
cdb18761
DL
2703}
2704
2c8c1e72 2705static void __net_exit ip6_route_net_exit(struct net *net)
cdb18761
DL
2706{
2707#ifdef CONFIG_PROC_FS
2708 proc_net_remove(net, "ipv6_route");
2709 proc_net_remove(net, "rt6_stats");
2710#endif
8ed67789
DL
2711 kfree(net->ipv6.ip6_null_entry);
2712#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2713 kfree(net->ipv6.ip6_prohibit_entry);
2714 kfree(net->ipv6.ip6_blk_hole_entry);
2715#endif
cdb18761
DL
2716}
2717
2718static struct pernet_operations ip6_route_net_ops = {
2719 .init = ip6_route_net_init,
2720 .exit = ip6_route_net_exit,
2721};
2722
8ed67789
DL
2723static struct notifier_block ip6_route_dev_notifier = {
2724 .notifier_call = ip6_route_dev_notify,
2725 .priority = 0,
2726};
2727
433d49c3 2728int __init ip6_route_init(void)
1da177e4 2729{
433d49c3
DL
2730 int ret;
2731
9a7ec3a9
DL
2732 ret = -ENOMEM;
2733 ip6_dst_ops_template.kmem_cachep =
e5d679f3 2734 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
f845ab6b 2735 SLAB_HWCACHE_ALIGN, NULL);
9a7ec3a9 2736 if (!ip6_dst_ops_template.kmem_cachep)
c19a28e1 2737 goto out;
14e50e57 2738
8ed67789
DL
2739 ret = register_pernet_subsys(&ip6_route_net_ops);
2740 if (ret)
bdb3289f 2741 goto out_kmem_cache;
bdb3289f 2742
5dc121e9
AE
2743 ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
2744
8ed67789
DL
2745 /* Registering of the loopback is done before this portion of code,
2746 * the loopback reference in rt6_info will not be taken, do it
2747 * manually for init_net */
2748 init_net.ipv6.ip6_null_entry->u.dst.dev = init_net.loopback_dev;
2749 init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2750 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2751 init_net.ipv6.ip6_prohibit_entry->u.dst.dev = init_net.loopback_dev;
2752 init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2753 init_net.ipv6.ip6_blk_hole_entry->u.dst.dev = init_net.loopback_dev;
2754 init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2755 #endif
433d49c3
DL
2756 ret = fib6_init();
2757 if (ret)
8ed67789 2758 goto out_register_subsys;
433d49c3 2759
433d49c3
DL
2760 ret = xfrm6_init();
2761 if (ret)
cdb18761 2762 goto out_fib6_init;
c35b7e72 2763
433d49c3
DL
2764 ret = fib6_rules_init();
2765 if (ret)
2766 goto xfrm6_init;
7e5449c2 2767
433d49c3
DL
2768 ret = -ENOBUFS;
2769 if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL) ||
2770 __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL) ||
2771 __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL))
2772 goto fib6_rules_init;
c127ea2c 2773
8ed67789 2774 ret = register_netdevice_notifier(&ip6_route_dev_notifier);
cdb18761
DL
2775 if (ret)
2776 goto fib6_rules_init;
8ed67789 2777
433d49c3
DL
2778out:
2779 return ret;
2780
2781fib6_rules_init:
433d49c3
DL
2782 fib6_rules_cleanup();
2783xfrm6_init:
433d49c3 2784 xfrm6_fini();
433d49c3 2785out_fib6_init:
433d49c3 2786 fib6_gc_cleanup();
8ed67789
DL
2787out_register_subsys:
2788 unregister_pernet_subsys(&ip6_route_net_ops);
433d49c3 2789out_kmem_cache:
f2fc6a54 2790 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
433d49c3 2791 goto out;
1da177e4
LT
2792}
2793
2794void ip6_route_cleanup(void)
2795{
8ed67789 2796 unregister_netdevice_notifier(&ip6_route_dev_notifier);
101367c2 2797 fib6_rules_cleanup();
1da177e4 2798 xfrm6_fini();
1da177e4 2799 fib6_gc_cleanup();
8ed67789 2800 unregister_pernet_subsys(&ip6_route_net_ops);
f2fc6a54 2801 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
1da177e4 2802}