]> bbs.cooldavid.org Git - net-next-2.6.git/blame - net/ipv6/route.c
IPv6: Fix 6RD build error
[net-next-2.6.git] / net / ipv6 / route.c
CommitLineData
1da177e4
LT
1/*
2 * Linux INET6 implementation
3 * FIB front-end.
4 *
5 * Authors:
1ab1457c 6 * Pedro Roque <roque@di.fc.ul.pt>
1da177e4 7 *
1da177e4
LT
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
12 */
13
14/* Changes:
15 *
16 * YOSHIFUJI Hideaki @USAGI
17 * reworked default router selection.
18 * - respect outgoing interface
19 * - select from (probably) reachable routers (i.e.
20 * routers in REACHABLE, STALE, DELAY or PROBE states).
21 * - always select the same router if it is (probably)
22 * reachable. otherwise, round-robin the list.
c0bece9f
YH
23 * Ville Nuorvala
24 * Fixed routing subtrees.
1da177e4
LT
25 */
26
4fc268d2 27#include <linux/capability.h>
1da177e4
LT
28#include <linux/errno.h>
29#include <linux/types.h>
30#include <linux/times.h>
31#include <linux/socket.h>
32#include <linux/sockios.h>
33#include <linux/net.h>
34#include <linux/route.h>
35#include <linux/netdevice.h>
36#include <linux/in6.h>
7bc570c8 37#include <linux/mroute6.h>
1da177e4 38#include <linux/init.h>
1da177e4 39#include <linux/if_arp.h>
1da177e4
LT
40#include <linux/proc_fs.h>
41#include <linux/seq_file.h>
5b7c931d 42#include <linux/nsproxy.h>
457c4cbc 43#include <net/net_namespace.h>
1da177e4
LT
44#include <net/snmp.h>
45#include <net/ipv6.h>
46#include <net/ip6_fib.h>
47#include <net/ip6_route.h>
48#include <net/ndisc.h>
49#include <net/addrconf.h>
50#include <net/tcp.h>
51#include <linux/rtnetlink.h>
52#include <net/dst.h>
53#include <net/xfrm.h>
8d71740c 54#include <net/netevent.h>
21713ebc 55#include <net/netlink.h>
1da177e4
LT
56
57#include <asm/uaccess.h>
58
59#ifdef CONFIG_SYSCTL
60#include <linux/sysctl.h>
61#endif
62
63/* Set to 3 to get tracing. */
64#define RT6_DEBUG 2
65
66#if RT6_DEBUG >= 3
67#define RDBG(x) printk x
68#define RT6_TRACE(x...) printk(KERN_DEBUG x)
69#else
70#define RDBG(x)
71#define RT6_TRACE(x...) do { ; } while (0)
72#endif
73
519fbd87 74#define CLONE_OFFLINK_ROUTE 0
1da177e4 75
1da177e4
LT
76static struct rt6_info * ip6_rt_copy(struct rt6_info *ort);
77static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
78static struct dst_entry *ip6_negative_advice(struct dst_entry *);
79static void ip6_dst_destroy(struct dst_entry *);
80static void ip6_dst_ifdown(struct dst_entry *,
81 struct net_device *dev, int how);
569d3645 82static int ip6_dst_gc(struct dst_ops *ops);
1da177e4
LT
83
84static int ip6_pkt_discard(struct sk_buff *skb);
85static int ip6_pkt_discard_out(struct sk_buff *skb);
86static void ip6_link_failure(struct sk_buff *skb);
87static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
88
70ceb4f5 89#ifdef CONFIG_IPV6_ROUTE_INFO
efa2cea0
DL
90static struct rt6_info *rt6_add_route_info(struct net *net,
91 struct in6_addr *prefix, int prefixlen,
70ceb4f5
YH
92 struct in6_addr *gwaddr, int ifindex,
93 unsigned pref);
efa2cea0
DL
94static struct rt6_info *rt6_get_route_info(struct net *net,
95 struct in6_addr *prefix, int prefixlen,
70ceb4f5
YH
96 struct in6_addr *gwaddr, int ifindex);
97#endif
98
9a7ec3a9 99static struct dst_ops ip6_dst_ops_template = {
1da177e4 100 .family = AF_INET6,
09640e63 101 .protocol = cpu_to_be16(ETH_P_IPV6),
1da177e4
LT
102 .gc = ip6_dst_gc,
103 .gc_thresh = 1024,
104 .check = ip6_dst_check,
105 .destroy = ip6_dst_destroy,
106 .ifdown = ip6_dst_ifdown,
107 .negative_advice = ip6_negative_advice,
108 .link_failure = ip6_link_failure,
109 .update_pmtu = ip6_rt_update_pmtu,
1ac06e03 110 .local_out = __ip6_local_out,
e2422970 111 .entries = ATOMIC_INIT(0),
1da177e4
LT
112};
113
14e50e57
DM
114static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu)
115{
116}
117
118static struct dst_ops ip6_dst_blackhole_ops = {
119 .family = AF_INET6,
09640e63 120 .protocol = cpu_to_be16(ETH_P_IPV6),
14e50e57
DM
121 .destroy = ip6_dst_destroy,
122 .check = ip6_dst_check,
123 .update_pmtu = ip6_rt_blackhole_update_pmtu,
e2422970 124 .entries = ATOMIC_INIT(0),
14e50e57
DM
125};
126
bdb3289f 127static struct rt6_info ip6_null_entry_template = {
1da177e4
LT
128 .u = {
129 .dst = {
130 .__refcnt = ATOMIC_INIT(1),
131 .__use = 1,
1da177e4
LT
132 .obsolete = -1,
133 .error = -ENETUNREACH,
134 .metrics = { [RTAX_HOPLIMIT - 1] = 255, },
135 .input = ip6_pkt_discard,
136 .output = ip6_pkt_discard_out,
1da177e4
LT
137 }
138 },
139 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
4f724279 140 .rt6i_protocol = RTPROT_KERNEL,
1da177e4
LT
141 .rt6i_metric = ~(u32) 0,
142 .rt6i_ref = ATOMIC_INIT(1),
143};
144
101367c2
TG
145#ifdef CONFIG_IPV6_MULTIPLE_TABLES
146
6723ab54
DM
147static int ip6_pkt_prohibit(struct sk_buff *skb);
148static int ip6_pkt_prohibit_out(struct sk_buff *skb);
6723ab54 149
280a34c8 150static struct rt6_info ip6_prohibit_entry_template = {
101367c2
TG
151 .u = {
152 .dst = {
153 .__refcnt = ATOMIC_INIT(1),
154 .__use = 1,
101367c2
TG
155 .obsolete = -1,
156 .error = -EACCES,
157 .metrics = { [RTAX_HOPLIMIT - 1] = 255, },
9ce8ade0
TG
158 .input = ip6_pkt_prohibit,
159 .output = ip6_pkt_prohibit_out,
101367c2
TG
160 }
161 },
162 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
4f724279 163 .rt6i_protocol = RTPROT_KERNEL,
101367c2
TG
164 .rt6i_metric = ~(u32) 0,
165 .rt6i_ref = ATOMIC_INIT(1),
166};
167
bdb3289f 168static struct rt6_info ip6_blk_hole_entry_template = {
101367c2
TG
169 .u = {
170 .dst = {
171 .__refcnt = ATOMIC_INIT(1),
172 .__use = 1,
101367c2
TG
173 .obsolete = -1,
174 .error = -EINVAL,
175 .metrics = { [RTAX_HOPLIMIT - 1] = 255, },
352e512c
HX
176 .input = dst_discard,
177 .output = dst_discard,
101367c2
TG
178 }
179 },
180 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
4f724279 181 .rt6i_protocol = RTPROT_KERNEL,
101367c2
TG
182 .rt6i_metric = ~(u32) 0,
183 .rt6i_ref = ATOMIC_INIT(1),
184};
185
186#endif
187
1da177e4 188/* allocate dst with ip6_dst_ops */
f2fc6a54 189static inline struct rt6_info *ip6_dst_alloc(struct dst_ops *ops)
1da177e4 190{
f2fc6a54 191 return (struct rt6_info *)dst_alloc(ops);
1da177e4
LT
192}
193
194static void ip6_dst_destroy(struct dst_entry *dst)
195{
196 struct rt6_info *rt = (struct rt6_info *)dst;
197 struct inet6_dev *idev = rt->rt6i_idev;
198
199 if (idev != NULL) {
200 rt->rt6i_idev = NULL;
201 in6_dev_put(idev);
1ab1457c 202 }
1da177e4
LT
203}
204
205static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
206 int how)
207{
208 struct rt6_info *rt = (struct rt6_info *)dst;
209 struct inet6_dev *idev = rt->rt6i_idev;
5a3e55d6 210 struct net_device *loopback_dev =
c346dca1 211 dev_net(dev)->loopback_dev;
1da177e4 212
5a3e55d6
DL
213 if (dev != loopback_dev && idev != NULL && idev->dev == dev) {
214 struct inet6_dev *loopback_idev =
215 in6_dev_get(loopback_dev);
1da177e4
LT
216 if (loopback_idev != NULL) {
217 rt->rt6i_idev = loopback_idev;
218 in6_dev_put(idev);
219 }
220 }
221}
222
223static __inline__ int rt6_check_expired(const struct rt6_info *rt)
224{
225 return (rt->rt6i_flags & RTF_EXPIRES &&
226 time_after(jiffies, rt->rt6i_expires));
227}
228
c71099ac
TG
229static inline int rt6_need_strict(struct in6_addr *daddr)
230{
231 return (ipv6_addr_type(daddr) &
5ce83afa 232 (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK));
c71099ac
TG
233}
234
1da177e4 235/*
c71099ac 236 * Route lookup. Any table->tb6_lock is implied.
1da177e4
LT
237 */
238
8ed67789
DL
239static inline struct rt6_info *rt6_device_match(struct net *net,
240 struct rt6_info *rt,
dd3abc4e 241 struct in6_addr *saddr,
1da177e4 242 int oif,
d420895e 243 int flags)
1da177e4
LT
244{
245 struct rt6_info *local = NULL;
246 struct rt6_info *sprt;
247
dd3abc4e
YH
248 if (!oif && ipv6_addr_any(saddr))
249 goto out;
250
251 for (sprt = rt; sprt; sprt = sprt->u.dst.rt6_next) {
252 struct net_device *dev = sprt->rt6i_dev;
253
254 if (oif) {
1da177e4
LT
255 if (dev->ifindex == oif)
256 return sprt;
257 if (dev->flags & IFF_LOOPBACK) {
258 if (sprt->rt6i_idev == NULL ||
259 sprt->rt6i_idev->dev->ifindex != oif) {
d420895e 260 if (flags & RT6_LOOKUP_F_IFACE && oif)
1da177e4 261 continue;
1ab1457c 262 if (local && (!oif ||
1da177e4
LT
263 local->rt6i_idev->dev->ifindex == oif))
264 continue;
265 }
266 local = sprt;
267 }
dd3abc4e
YH
268 } else {
269 if (ipv6_chk_addr(net, saddr, dev,
270 flags & RT6_LOOKUP_F_IFACE))
271 return sprt;
1da177e4 272 }
dd3abc4e 273 }
1da177e4 274
dd3abc4e 275 if (oif) {
1da177e4
LT
276 if (local)
277 return local;
278
d420895e 279 if (flags & RT6_LOOKUP_F_IFACE)
8ed67789 280 return net->ipv6.ip6_null_entry;
1da177e4 281 }
dd3abc4e 282out:
1da177e4
LT
283 return rt;
284}
285
27097255
YH
286#ifdef CONFIG_IPV6_ROUTER_PREF
287static void rt6_probe(struct rt6_info *rt)
288{
289 struct neighbour *neigh = rt ? rt->rt6i_nexthop : NULL;
290 /*
291 * Okay, this does not seem to be appropriate
292 * for now, however, we need to check if it
293 * is really so; aka Router Reachability Probing.
294 *
295 * Router Reachability Probe MUST be rate-limited
296 * to no more than one per minute.
297 */
298 if (!neigh || (neigh->nud_state & NUD_VALID))
299 return;
300 read_lock_bh(&neigh->lock);
301 if (!(neigh->nud_state & NUD_VALID) &&
52e16356 302 time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
27097255
YH
303 struct in6_addr mcaddr;
304 struct in6_addr *target;
305
306 neigh->updated = jiffies;
307 read_unlock_bh(&neigh->lock);
308
309 target = (struct in6_addr *)&neigh->primary_key;
310 addrconf_addr_solict_mult(target, &mcaddr);
311 ndisc_send_ns(rt->rt6i_dev, NULL, target, &mcaddr, NULL);
312 } else
313 read_unlock_bh(&neigh->lock);
314}
315#else
316static inline void rt6_probe(struct rt6_info *rt)
317{
318 return;
319}
320#endif
321
1da177e4 322/*
554cfb7e 323 * Default Router Selection (RFC 2461 6.3.6)
1da177e4 324 */
b6f99a21 325static inline int rt6_check_dev(struct rt6_info *rt, int oif)
554cfb7e
YH
326{
327 struct net_device *dev = rt->rt6i_dev;
161980f4 328 if (!oif || dev->ifindex == oif)
554cfb7e 329 return 2;
161980f4
DM
330 if ((dev->flags & IFF_LOOPBACK) &&
331 rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
332 return 1;
333 return 0;
554cfb7e 334}
1da177e4 335
b6f99a21 336static inline int rt6_check_neigh(struct rt6_info *rt)
1da177e4 337{
554cfb7e 338 struct neighbour *neigh = rt->rt6i_nexthop;
398bcbeb 339 int m;
4d0c5911
YH
340 if (rt->rt6i_flags & RTF_NONEXTHOP ||
341 !(rt->rt6i_flags & RTF_GATEWAY))
342 m = 1;
343 else if (neigh) {
554cfb7e
YH
344 read_lock_bh(&neigh->lock);
345 if (neigh->nud_state & NUD_VALID)
4d0c5911 346 m = 2;
398bcbeb
YH
347#ifdef CONFIG_IPV6_ROUTER_PREF
348 else if (neigh->nud_state & NUD_FAILED)
349 m = 0;
350#endif
351 else
ea73ee23 352 m = 1;
554cfb7e 353 read_unlock_bh(&neigh->lock);
398bcbeb
YH
354 } else
355 m = 0;
554cfb7e 356 return m;
1da177e4
LT
357}
358
554cfb7e
YH
359static int rt6_score_route(struct rt6_info *rt, int oif,
360 int strict)
1da177e4 361{
4d0c5911 362 int m, n;
1ab1457c 363
4d0c5911 364 m = rt6_check_dev(rt, oif);
77d16f45 365 if (!m && (strict & RT6_LOOKUP_F_IFACE))
554cfb7e 366 return -1;
ebacaaa0
YH
367#ifdef CONFIG_IPV6_ROUTER_PREF
368 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
369#endif
4d0c5911 370 n = rt6_check_neigh(rt);
557e92ef 371 if (!n && (strict & RT6_LOOKUP_F_REACHABLE))
554cfb7e
YH
372 return -1;
373 return m;
374}
375
f11e6659
DM
376static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
377 int *mpri, struct rt6_info *match)
554cfb7e 378{
f11e6659
DM
379 int m;
380
381 if (rt6_check_expired(rt))
382 goto out;
383
384 m = rt6_score_route(rt, oif, strict);
385 if (m < 0)
386 goto out;
387
388 if (m > *mpri) {
389 if (strict & RT6_LOOKUP_F_REACHABLE)
390 rt6_probe(match);
391 *mpri = m;
392 match = rt;
393 } else if (strict & RT6_LOOKUP_F_REACHABLE) {
394 rt6_probe(rt);
395 }
396
397out:
398 return match;
399}
400
401static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
402 struct rt6_info *rr_head,
403 u32 metric, int oif, int strict)
404{
405 struct rt6_info *rt, *match;
554cfb7e 406 int mpri = -1;
1da177e4 407
f11e6659
DM
408 match = NULL;
409 for (rt = rr_head; rt && rt->rt6i_metric == metric;
410 rt = rt->u.dst.rt6_next)
411 match = find_match(rt, oif, strict, &mpri, match);
412 for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric;
413 rt = rt->u.dst.rt6_next)
414 match = find_match(rt, oif, strict, &mpri, match);
1da177e4 415
f11e6659
DM
416 return match;
417}
1da177e4 418
f11e6659
DM
419static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
420{
421 struct rt6_info *match, *rt0;
8ed67789 422 struct net *net;
1da177e4 423
f11e6659 424 RT6_TRACE("%s(fn->leaf=%p, oif=%d)\n",
0dc47877 425 __func__, fn->leaf, oif);
554cfb7e 426
f11e6659
DM
427 rt0 = fn->rr_ptr;
428 if (!rt0)
429 fn->rr_ptr = rt0 = fn->leaf;
1da177e4 430
f11e6659 431 match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict);
1da177e4 432
554cfb7e 433 if (!match &&
f11e6659
DM
434 (strict & RT6_LOOKUP_F_REACHABLE)) {
435 struct rt6_info *next = rt0->u.dst.rt6_next;
436
554cfb7e 437 /* no entries matched; do round-robin */
f11e6659
DM
438 if (!next || next->rt6i_metric != rt0->rt6i_metric)
439 next = fn->leaf;
440
441 if (next != rt0)
442 fn->rr_ptr = next;
1da177e4 443 }
1da177e4 444
f11e6659 445 RT6_TRACE("%s() => %p\n",
0dc47877 446 __func__, match);
1da177e4 447
c346dca1 448 net = dev_net(rt0->rt6i_dev);
8ed67789 449 return (match ? match : net->ipv6.ip6_null_entry);
1da177e4
LT
450}
451
70ceb4f5
YH
452#ifdef CONFIG_IPV6_ROUTE_INFO
453int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
454 struct in6_addr *gwaddr)
455{
c346dca1 456 struct net *net = dev_net(dev);
70ceb4f5
YH
457 struct route_info *rinfo = (struct route_info *) opt;
458 struct in6_addr prefix_buf, *prefix;
459 unsigned int pref;
4bed72e4 460 unsigned long lifetime;
70ceb4f5
YH
461 struct rt6_info *rt;
462
463 if (len < sizeof(struct route_info)) {
464 return -EINVAL;
465 }
466
467 /* Sanity check for prefix_len and length */
468 if (rinfo->length > 3) {
469 return -EINVAL;
470 } else if (rinfo->prefix_len > 128) {
471 return -EINVAL;
472 } else if (rinfo->prefix_len > 64) {
473 if (rinfo->length < 2) {
474 return -EINVAL;
475 }
476 } else if (rinfo->prefix_len > 0) {
477 if (rinfo->length < 1) {
478 return -EINVAL;
479 }
480 }
481
482 pref = rinfo->route_pref;
483 if (pref == ICMPV6_ROUTER_PREF_INVALID)
3933fc95 484 return -EINVAL;
70ceb4f5 485
4bed72e4 486 lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
70ceb4f5
YH
487
488 if (rinfo->length == 3)
489 prefix = (struct in6_addr *)rinfo->prefix;
490 else {
491 /* this function is safe */
492 ipv6_addr_prefix(&prefix_buf,
493 (struct in6_addr *)rinfo->prefix,
494 rinfo->prefix_len);
495 prefix = &prefix_buf;
496 }
497
efa2cea0
DL
498 rt = rt6_get_route_info(net, prefix, rinfo->prefix_len, gwaddr,
499 dev->ifindex);
70ceb4f5
YH
500
501 if (rt && !lifetime) {
e0a1ad73 502 ip6_del_rt(rt);
70ceb4f5
YH
503 rt = NULL;
504 }
505
506 if (!rt && lifetime)
efa2cea0 507 rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
70ceb4f5
YH
508 pref);
509 else if (rt)
510 rt->rt6i_flags = RTF_ROUTEINFO |
511 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
512
513 if (rt) {
4bed72e4 514 if (!addrconf_finite_timeout(lifetime)) {
70ceb4f5
YH
515 rt->rt6i_flags &= ~RTF_EXPIRES;
516 } else {
517 rt->rt6i_expires = jiffies + HZ * lifetime;
518 rt->rt6i_flags |= RTF_EXPIRES;
519 }
520 dst_release(&rt->u.dst);
521 }
522 return 0;
523}
524#endif
525
8ed67789 526#define BACKTRACK(__net, saddr) \
982f56f3 527do { \
8ed67789 528 if (rt == __net->ipv6.ip6_null_entry) { \
982f56f3 529 struct fib6_node *pn; \
e0eda7bb 530 while (1) { \
982f56f3
YH
531 if (fn->fn_flags & RTN_TL_ROOT) \
532 goto out; \
533 pn = fn->parent; \
534 if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
8bce65b9 535 fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \
982f56f3
YH
536 else \
537 fn = pn; \
538 if (fn->fn_flags & RTN_RTINFO) \
539 goto restart; \
c71099ac 540 } \
c71099ac 541 } \
982f56f3 542} while(0)
c71099ac 543
8ed67789
DL
544static struct rt6_info *ip6_pol_route_lookup(struct net *net,
545 struct fib6_table *table,
c71099ac 546 struct flowi *fl, int flags)
1da177e4
LT
547{
548 struct fib6_node *fn;
549 struct rt6_info *rt;
550
c71099ac
TG
551 read_lock_bh(&table->tb6_lock);
552 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
553restart:
554 rt = fn->leaf;
dd3abc4e 555 rt = rt6_device_match(net, rt, &fl->fl6_src, fl->oif, flags);
8ed67789 556 BACKTRACK(net, &fl->fl6_src);
c71099ac 557out:
03f49f34 558 dst_use(&rt->u.dst, jiffies);
c71099ac 559 read_unlock_bh(&table->tb6_lock);
c71099ac
TG
560 return rt;
561
562}
563
9acd9f3a
YH
564struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
565 const struct in6_addr *saddr, int oif, int strict)
c71099ac
TG
566{
567 struct flowi fl = {
568 .oif = oif,
569 .nl_u = {
570 .ip6_u = {
571 .daddr = *daddr,
c71099ac
TG
572 },
573 },
574 };
575 struct dst_entry *dst;
77d16f45 576 int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
c71099ac 577
adaa70bb
TG
578 if (saddr) {
579 memcpy(&fl.fl6_src, saddr, sizeof(*saddr));
580 flags |= RT6_LOOKUP_F_HAS_SADDR;
581 }
582
606a2b48 583 dst = fib6_rule_lookup(net, &fl, flags, ip6_pol_route_lookup);
c71099ac
TG
584 if (dst->error == 0)
585 return (struct rt6_info *) dst;
586
587 dst_release(dst);
588
1da177e4
LT
589 return NULL;
590}
591
7159039a
YH
592EXPORT_SYMBOL(rt6_lookup);
593
c71099ac 594/* ip6_ins_rt is called with FREE table->tb6_lock.
1da177e4
LT
595 It takes new route entry, the addition fails by any reason the
596 route is freed. In any case, if caller does not hold it, it may
597 be destroyed.
598 */
599
86872cb5 600static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
1da177e4
LT
601{
602 int err;
c71099ac 603 struct fib6_table *table;
1da177e4 604
c71099ac
TG
605 table = rt->rt6i_table;
606 write_lock_bh(&table->tb6_lock);
86872cb5 607 err = fib6_add(&table->tb6_root, rt, info);
c71099ac 608 write_unlock_bh(&table->tb6_lock);
1da177e4
LT
609
610 return err;
611}
612
40e22e8f
TG
613int ip6_ins_rt(struct rt6_info *rt)
614{
4d1169c1 615 struct nl_info info = {
c346dca1 616 .nl_net = dev_net(rt->rt6i_dev),
4d1169c1 617 };
528c4ceb 618 return __ip6_ins_rt(rt, &info);
40e22e8f
TG
619}
620
95a9a5ba
YH
621static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, struct in6_addr *daddr,
622 struct in6_addr *saddr)
1da177e4 623{
1da177e4
LT
624 struct rt6_info *rt;
625
626 /*
627 * Clone the route.
628 */
629
630 rt = ip6_rt_copy(ort);
631
632 if (rt) {
14deae41
DM
633 struct neighbour *neigh;
634 int attempts = !in_softirq();
635
58c4fb86
YH
636 if (!(rt->rt6i_flags&RTF_GATEWAY)) {
637 if (rt->rt6i_dst.plen != 128 &&
638 ipv6_addr_equal(&rt->rt6i_dst.addr, daddr))
639 rt->rt6i_flags |= RTF_ANYCAST;
1da177e4 640 ipv6_addr_copy(&rt->rt6i_gateway, daddr);
58c4fb86 641 }
1da177e4 642
58c4fb86 643 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
1da177e4
LT
644 rt->rt6i_dst.plen = 128;
645 rt->rt6i_flags |= RTF_CACHE;
646 rt->u.dst.flags |= DST_HOST;
647
648#ifdef CONFIG_IPV6_SUBTREES
649 if (rt->rt6i_src.plen && saddr) {
650 ipv6_addr_copy(&rt->rt6i_src.addr, saddr);
651 rt->rt6i_src.plen = 128;
652 }
653#endif
654
14deae41
DM
655 retry:
656 neigh = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
657 if (IS_ERR(neigh)) {
658 struct net *net = dev_net(rt->rt6i_dev);
659 int saved_rt_min_interval =
660 net->ipv6.sysctl.ip6_rt_gc_min_interval;
661 int saved_rt_elasticity =
662 net->ipv6.sysctl.ip6_rt_gc_elasticity;
663
664 if (attempts-- > 0) {
665 net->ipv6.sysctl.ip6_rt_gc_elasticity = 1;
666 net->ipv6.sysctl.ip6_rt_gc_min_interval = 0;
667
86393e52 668 ip6_dst_gc(&net->ipv6.ip6_dst_ops);
14deae41
DM
669
670 net->ipv6.sysctl.ip6_rt_gc_elasticity =
671 saved_rt_elasticity;
672 net->ipv6.sysctl.ip6_rt_gc_min_interval =
673 saved_rt_min_interval;
674 goto retry;
675 }
676
677 if (net_ratelimit())
678 printk(KERN_WARNING
679 "Neighbour table overflow.\n");
680 dst_free(&rt->u.dst);
681 return NULL;
682 }
683 rt->rt6i_nexthop = neigh;
1da177e4 684
95a9a5ba 685 }
1da177e4 686
95a9a5ba
YH
687 return rt;
688}
1da177e4 689
299d9939
YH
690static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort, struct in6_addr *daddr)
691{
692 struct rt6_info *rt = ip6_rt_copy(ort);
693 if (rt) {
694 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
695 rt->rt6i_dst.plen = 128;
696 rt->rt6i_flags |= RTF_CACHE;
299d9939
YH
697 rt->u.dst.flags |= DST_HOST;
698 rt->rt6i_nexthop = neigh_clone(ort->rt6i_nexthop);
699 }
700 return rt;
701}
702
8ed67789
DL
703static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
704 struct flowi *fl, int flags)
1da177e4
LT
705{
706 struct fib6_node *fn;
519fbd87 707 struct rt6_info *rt, *nrt;
c71099ac 708 int strict = 0;
1da177e4 709 int attempts = 3;
519fbd87 710 int err;
53b7997f 711 int reachable = net->ipv6.devconf_all->forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
1da177e4 712
77d16f45 713 strict |= flags & RT6_LOOKUP_F_IFACE;
1da177e4
LT
714
715relookup:
c71099ac 716 read_lock_bh(&table->tb6_lock);
1da177e4 717
8238dd06 718restart_2:
c71099ac 719 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
1da177e4
LT
720
721restart:
4acad72d 722 rt = rt6_select(fn, oif, strict | reachable);
8ed67789
DL
723
724 BACKTRACK(net, &fl->fl6_src);
725 if (rt == net->ipv6.ip6_null_entry ||
8238dd06 726 rt->rt6i_flags & RTF_CACHE)
1ddef044 727 goto out;
1da177e4 728
fb9de91e 729 dst_hold(&rt->u.dst);
c71099ac 730 read_unlock_bh(&table->tb6_lock);
fb9de91e 731
519fbd87 732 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
c71099ac 733 nrt = rt6_alloc_cow(rt, &fl->fl6_dst, &fl->fl6_src);
519fbd87
YH
734 else {
735#if CLONE_OFFLINK_ROUTE
c71099ac 736 nrt = rt6_alloc_clone(rt, &fl->fl6_dst);
519fbd87
YH
737#else
738 goto out2;
739#endif
740 }
e40cf353 741
519fbd87 742 dst_release(&rt->u.dst);
8ed67789 743 rt = nrt ? : net->ipv6.ip6_null_entry;
1da177e4 744
519fbd87
YH
745 dst_hold(&rt->u.dst);
746 if (nrt) {
40e22e8f 747 err = ip6_ins_rt(nrt);
519fbd87 748 if (!err)
1da177e4 749 goto out2;
1da177e4 750 }
1da177e4 751
519fbd87
YH
752 if (--attempts <= 0)
753 goto out2;
754
755 /*
c71099ac 756 * Race condition! In the gap, when table->tb6_lock was
519fbd87
YH
757 * released someone could insert this route. Relookup.
758 */
759 dst_release(&rt->u.dst);
760 goto relookup;
761
762out:
8238dd06
YH
763 if (reachable) {
764 reachable = 0;
765 goto restart_2;
766 }
519fbd87 767 dst_hold(&rt->u.dst);
c71099ac 768 read_unlock_bh(&table->tb6_lock);
1da177e4
LT
769out2:
770 rt->u.dst.lastuse = jiffies;
771 rt->u.dst.__use++;
c71099ac
TG
772
773 return rt;
1da177e4
LT
774}
775
8ed67789 776static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
4acad72d
PE
777 struct flowi *fl, int flags)
778{
8ed67789 779 return ip6_pol_route(net, table, fl->iif, fl, flags);
4acad72d
PE
780}
781
c71099ac
TG
782void ip6_route_input(struct sk_buff *skb)
783{
0660e03f 784 struct ipv6hdr *iph = ipv6_hdr(skb);
c346dca1 785 struct net *net = dev_net(skb->dev);
adaa70bb 786 int flags = RT6_LOOKUP_F_HAS_SADDR;
c71099ac
TG
787 struct flowi fl = {
788 .iif = skb->dev->ifindex,
789 .nl_u = {
790 .ip6_u = {
791 .daddr = iph->daddr,
792 .saddr = iph->saddr,
90bcaf7b 793 .flowlabel = (* (__be32 *) iph)&IPV6_FLOWINFO_MASK,
c71099ac
TG
794 },
795 },
1ab1457c 796 .mark = skb->mark,
c71099ac
TG
797 .proto = iph->nexthdr,
798 };
adaa70bb 799
1d6e55f1 800 if (rt6_need_strict(&iph->daddr) && skb->dev->type != ARPHRD_PIMREG)
adaa70bb 801 flags |= RT6_LOOKUP_F_IFACE;
c71099ac 802
adf30907 803 skb_dst_set(skb, fib6_rule_lookup(net, &fl, flags, ip6_pol_route_input));
c71099ac
TG
804}
805
8ed67789 806static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
c71099ac 807 struct flowi *fl, int flags)
1da177e4 808{
8ed67789 809 return ip6_pol_route(net, table, fl->oif, fl, flags);
c71099ac
TG
810}
811
4591db4f
DL
812struct dst_entry * ip6_route_output(struct net *net, struct sock *sk,
813 struct flowi *fl)
c71099ac
TG
814{
815 int flags = 0;
816
817 if (rt6_need_strict(&fl->fl6_dst))
77d16f45 818 flags |= RT6_LOOKUP_F_IFACE;
c71099ac 819
adaa70bb
TG
820 if (!ipv6_addr_any(&fl->fl6_src))
821 flags |= RT6_LOOKUP_F_HAS_SADDR;
7cbca67c
YH
822 else if (sk) {
823 unsigned int prefs = inet6_sk(sk)->srcprefs;
824 if (prefs & IPV6_PREFER_SRC_TMP)
825 flags |= RT6_LOOKUP_F_SRCPREF_TMP;
826 if (prefs & IPV6_PREFER_SRC_PUBLIC)
827 flags |= RT6_LOOKUP_F_SRCPREF_PUBLIC;
828 if (prefs & IPV6_PREFER_SRC_COA)
829 flags |= RT6_LOOKUP_F_SRCPREF_COA;
830 }
adaa70bb 831
4591db4f 832 return fib6_rule_lookup(net, fl, flags, ip6_pol_route_output);
1da177e4
LT
833}
834
7159039a 835EXPORT_SYMBOL(ip6_route_output);
1da177e4 836
14e50e57
DM
837int ip6_dst_blackhole(struct sock *sk, struct dst_entry **dstp, struct flowi *fl)
838{
839 struct rt6_info *ort = (struct rt6_info *) *dstp;
840 struct rt6_info *rt = (struct rt6_info *)
841 dst_alloc(&ip6_dst_blackhole_ops);
842 struct dst_entry *new = NULL;
843
844 if (rt) {
845 new = &rt->u.dst;
846
847 atomic_set(&new->__refcnt, 1);
848 new->__use = 1;
352e512c
HX
849 new->input = dst_discard;
850 new->output = dst_discard;
14e50e57
DM
851
852 memcpy(new->metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32));
853 new->dev = ort->u.dst.dev;
854 if (new->dev)
855 dev_hold(new->dev);
856 rt->rt6i_idev = ort->rt6i_idev;
857 if (rt->rt6i_idev)
858 in6_dev_hold(rt->rt6i_idev);
859 rt->rt6i_expires = 0;
860
861 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
862 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
863 rt->rt6i_metric = 0;
864
865 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
866#ifdef CONFIG_IPV6_SUBTREES
867 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
868#endif
869
870 dst_free(new);
871 }
872
873 dst_release(*dstp);
874 *dstp = new;
875 return (new ? 0 : -ENOMEM);
876}
877EXPORT_SYMBOL_GPL(ip6_dst_blackhole);
878
1da177e4
LT
879/*
880 * Destination cache support functions
881 */
882
883static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
884{
885 struct rt6_info *rt;
886
887 rt = (struct rt6_info *) dst;
888
889 if (rt && rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie))
890 return dst;
891
892 return NULL;
893}
894
895static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
896{
897 struct rt6_info *rt = (struct rt6_info *) dst;
898
899 if (rt) {
900 if (rt->rt6i_flags & RTF_CACHE)
e0a1ad73 901 ip6_del_rt(rt);
1da177e4
LT
902 else
903 dst_release(dst);
904 }
905 return NULL;
906}
907
908static void ip6_link_failure(struct sk_buff *skb)
909{
910 struct rt6_info *rt;
911
912 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0, skb->dev);
913
adf30907 914 rt = (struct rt6_info *) skb_dst(skb);
1da177e4
LT
915 if (rt) {
916 if (rt->rt6i_flags&RTF_CACHE) {
917 dst_set_expires(&rt->u.dst, 0);
918 rt->rt6i_flags |= RTF_EXPIRES;
919 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
920 rt->rt6i_node->fn_sernum = -1;
921 }
922}
923
924static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
925{
926 struct rt6_info *rt6 = (struct rt6_info*)dst;
927
928 if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
929 rt6->rt6i_flags |= RTF_MODIFIED;
930 if (mtu < IPV6_MIN_MTU) {
931 mtu = IPV6_MIN_MTU;
932 dst->metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
933 }
934 dst->metrics[RTAX_MTU-1] = mtu;
8d71740c 935 call_netevent_notifiers(NETEVENT_PMTU_UPDATE, dst);
1da177e4
LT
936 }
937}
938
1da177e4
LT
939static int ipv6_get_mtu(struct net_device *dev);
940
5578689a 941static inline unsigned int ipv6_advmss(struct net *net, unsigned int mtu)
1da177e4
LT
942{
943 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
944
5578689a
DL
945 if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
946 mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
1da177e4
LT
947
948 /*
1ab1457c
YH
949 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
950 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
951 * IPV6_MAXPLEN is also valid and means: "any MSS,
1da177e4
LT
952 * rely only on pmtu discovery"
953 */
954 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
955 mtu = IPV6_MAXPLEN;
956 return mtu;
957}
958
3b00944c
YH
959static struct dst_entry *icmp6_dst_gc_list;
960static DEFINE_SPINLOCK(icmp6_dst_lock);
5d0bbeeb 961
3b00944c 962struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
1da177e4 963 struct neighbour *neigh,
9acd9f3a 964 const struct in6_addr *addr)
1da177e4
LT
965{
966 struct rt6_info *rt;
967 struct inet6_dev *idev = in6_dev_get(dev);
c346dca1 968 struct net *net = dev_net(dev);
1da177e4
LT
969
970 if (unlikely(idev == NULL))
971 return NULL;
972
86393e52 973 rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops);
1da177e4
LT
974 if (unlikely(rt == NULL)) {
975 in6_dev_put(idev);
976 goto out;
977 }
978
979 dev_hold(dev);
980 if (neigh)
981 neigh_hold(neigh);
14deae41 982 else {
1da177e4 983 neigh = ndisc_get_neigh(dev, addr);
14deae41
DM
984 if (IS_ERR(neigh))
985 neigh = NULL;
986 }
1da177e4
LT
987
988 rt->rt6i_dev = dev;
989 rt->rt6i_idev = idev;
990 rt->rt6i_nexthop = neigh;
991 atomic_set(&rt->u.dst.__refcnt, 1);
992 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = 255;
993 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
5578689a 994 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, dst_mtu(&rt->u.dst));
3b00944c 995 rt->u.dst.output = ip6_output;
1da177e4
LT
996
997#if 0 /* there's no chance to use these for ndisc */
1ab1457c
YH
998 rt->u.dst.flags = ipv6_addr_type(addr) & IPV6_ADDR_UNICAST
999 ? DST_HOST
1da177e4
LT
1000 : 0;
1001 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
1002 rt->rt6i_dst.plen = 128;
1003#endif
1004
3b00944c
YH
1005 spin_lock_bh(&icmp6_dst_lock);
1006 rt->u.dst.next = icmp6_dst_gc_list;
1007 icmp6_dst_gc_list = &rt->u.dst;
1008 spin_unlock_bh(&icmp6_dst_lock);
1da177e4 1009
5578689a 1010 fib6_force_start_gc(net);
1da177e4
LT
1011
1012out:
40aa7b90 1013 return &rt->u.dst;
1da177e4
LT
1014}
1015
3d0f24a7 1016int icmp6_dst_gc(void)
1da177e4
LT
1017{
1018 struct dst_entry *dst, *next, **pprev;
3d0f24a7 1019 int more = 0;
1da177e4
LT
1020
1021 next = NULL;
5d0bbeeb 1022
3b00944c
YH
1023 spin_lock_bh(&icmp6_dst_lock);
1024 pprev = &icmp6_dst_gc_list;
5d0bbeeb 1025
1da177e4
LT
1026 while ((dst = *pprev) != NULL) {
1027 if (!atomic_read(&dst->__refcnt)) {
1028 *pprev = dst->next;
1029 dst_free(dst);
1da177e4
LT
1030 } else {
1031 pprev = &dst->next;
3d0f24a7 1032 ++more;
1da177e4
LT
1033 }
1034 }
1035
3b00944c 1036 spin_unlock_bh(&icmp6_dst_lock);
5d0bbeeb 1037
3d0f24a7 1038 return more;
1da177e4
LT
1039}
1040
1e493d19
DM
1041static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg),
1042 void *arg)
1043{
1044 struct dst_entry *dst, **pprev;
1045
1046 spin_lock_bh(&icmp6_dst_lock);
1047 pprev = &icmp6_dst_gc_list;
1048 while ((dst = *pprev) != NULL) {
1049 struct rt6_info *rt = (struct rt6_info *) dst;
1050 if (func(rt, arg)) {
1051 *pprev = dst->next;
1052 dst_free(dst);
1053 } else {
1054 pprev = &dst->next;
1055 }
1056 }
1057 spin_unlock_bh(&icmp6_dst_lock);
1058}
1059
569d3645 1060static int ip6_dst_gc(struct dst_ops *ops)
1da177e4 1061{
1da177e4 1062 unsigned long now = jiffies;
86393e52 1063 struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
7019b78e
DL
1064 int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
1065 int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
1066 int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1067 int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1068 unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
1069
1070 if (time_after(rt_last_gc + rt_min_interval, now) &&
1071 atomic_read(&ops->entries) <= rt_max_size)
1da177e4
LT
1072 goto out;
1073
6891a346
BT
1074 net->ipv6.ip6_rt_gc_expire++;
1075 fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net);
1076 net->ipv6.ip6_rt_last_gc = now;
7019b78e
DL
1077 if (atomic_read(&ops->entries) < ops->gc_thresh)
1078 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1da177e4 1079out:
7019b78e
DL
1080 net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
1081 return (atomic_read(&ops->entries) > rt_max_size);
1da177e4
LT
1082}
1083
1084/* Clean host part of a prefix. Not necessary in radix tree,
1085 but results in cleaner routing tables.
1086
1087 Remove it only when all the things will work!
1088 */
1089
1090static int ipv6_get_mtu(struct net_device *dev)
1091{
1092 int mtu = IPV6_MIN_MTU;
1093 struct inet6_dev *idev;
1094
1095 idev = in6_dev_get(dev);
1096 if (idev) {
1097 mtu = idev->cnf.mtu6;
1098 in6_dev_put(idev);
1099 }
1100 return mtu;
1101}
1102
6b75d090 1103int ip6_dst_hoplimit(struct dst_entry *dst)
1da177e4 1104{
6b75d090
YH
1105 int hoplimit = dst_metric(dst, RTAX_HOPLIMIT);
1106 if (hoplimit < 0) {
1107 struct net_device *dev = dst->dev;
1108 struct inet6_dev *idev = in6_dev_get(dev);
1109 if (idev) {
1110 hoplimit = idev->cnf.hop_limit;
1111 in6_dev_put(idev);
1112 } else
53b7997f 1113 hoplimit = dev_net(dev)->ipv6.devconf_all->hop_limit;
1da177e4
LT
1114 }
1115 return hoplimit;
1116}
1117
1118/*
1119 *
1120 */
1121
86872cb5 1122int ip6_route_add(struct fib6_config *cfg)
1da177e4
LT
1123{
1124 int err;
5578689a 1125 struct net *net = cfg->fc_nlinfo.nl_net;
1da177e4
LT
1126 struct rt6_info *rt = NULL;
1127 struct net_device *dev = NULL;
1128 struct inet6_dev *idev = NULL;
c71099ac 1129 struct fib6_table *table;
1da177e4
LT
1130 int addr_type;
1131
86872cb5 1132 if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1da177e4
LT
1133 return -EINVAL;
1134#ifndef CONFIG_IPV6_SUBTREES
86872cb5 1135 if (cfg->fc_src_len)
1da177e4
LT
1136 return -EINVAL;
1137#endif
86872cb5 1138 if (cfg->fc_ifindex) {
1da177e4 1139 err = -ENODEV;
5578689a 1140 dev = dev_get_by_index(net, cfg->fc_ifindex);
1da177e4
LT
1141 if (!dev)
1142 goto out;
1143 idev = in6_dev_get(dev);
1144 if (!idev)
1145 goto out;
1146 }
1147
86872cb5
TG
1148 if (cfg->fc_metric == 0)
1149 cfg->fc_metric = IP6_RT_PRIO_USER;
1da177e4 1150
5578689a 1151 table = fib6_new_table(net, cfg->fc_table);
c71099ac
TG
1152 if (table == NULL) {
1153 err = -ENOBUFS;
1154 goto out;
1155 }
1156
86393e52 1157 rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops);
1da177e4
LT
1158
1159 if (rt == NULL) {
1160 err = -ENOMEM;
1161 goto out;
1162 }
1163
1164 rt->u.dst.obsolete = -1;
6f704992
YH
1165 rt->rt6i_expires = (cfg->fc_flags & RTF_EXPIRES) ?
1166 jiffies + clock_t_to_jiffies(cfg->fc_expires) :
1167 0;
1da177e4 1168
86872cb5
TG
1169 if (cfg->fc_protocol == RTPROT_UNSPEC)
1170 cfg->fc_protocol = RTPROT_BOOT;
1171 rt->rt6i_protocol = cfg->fc_protocol;
1172
1173 addr_type = ipv6_addr_type(&cfg->fc_dst);
1da177e4
LT
1174
1175 if (addr_type & IPV6_ADDR_MULTICAST)
1176 rt->u.dst.input = ip6_mc_input;
1177 else
1178 rt->u.dst.input = ip6_forward;
1179
1180 rt->u.dst.output = ip6_output;
1181
86872cb5
TG
1182 ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1183 rt->rt6i_dst.plen = cfg->fc_dst_len;
1da177e4
LT
1184 if (rt->rt6i_dst.plen == 128)
1185 rt->u.dst.flags = DST_HOST;
1186
1187#ifdef CONFIG_IPV6_SUBTREES
86872cb5
TG
1188 ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1189 rt->rt6i_src.plen = cfg->fc_src_len;
1da177e4
LT
1190#endif
1191
86872cb5 1192 rt->rt6i_metric = cfg->fc_metric;
1da177e4
LT
1193
1194 /* We cannot add true routes via loopback here,
1195 they would result in kernel looping; promote them to reject routes
1196 */
86872cb5 1197 if ((cfg->fc_flags & RTF_REJECT) ||
1da177e4
LT
1198 (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK))) {
1199 /* hold loopback dev/idev if we haven't done so. */
5578689a 1200 if (dev != net->loopback_dev) {
1da177e4
LT
1201 if (dev) {
1202 dev_put(dev);
1203 in6_dev_put(idev);
1204 }
5578689a 1205 dev = net->loopback_dev;
1da177e4
LT
1206 dev_hold(dev);
1207 idev = in6_dev_get(dev);
1208 if (!idev) {
1209 err = -ENODEV;
1210 goto out;
1211 }
1212 }
1213 rt->u.dst.output = ip6_pkt_discard_out;
1214 rt->u.dst.input = ip6_pkt_discard;
1215 rt->u.dst.error = -ENETUNREACH;
1216 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1217 goto install_route;
1218 }
1219
86872cb5 1220 if (cfg->fc_flags & RTF_GATEWAY) {
1da177e4
LT
1221 struct in6_addr *gw_addr;
1222 int gwa_type;
1223
86872cb5
TG
1224 gw_addr = &cfg->fc_gateway;
1225 ipv6_addr_copy(&rt->rt6i_gateway, gw_addr);
1da177e4
LT
1226 gwa_type = ipv6_addr_type(gw_addr);
1227
1228 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1229 struct rt6_info *grt;
1230
1231 /* IPv6 strictly inhibits using not link-local
1232 addresses as nexthop address.
1233 Otherwise, router will not able to send redirects.
1234 It is very good, but in some (rare!) circumstances
1235 (SIT, PtP, NBMA NOARP links) it is handy to allow
1236 some exceptions. --ANK
1237 */
1238 err = -EINVAL;
1239 if (!(gwa_type&IPV6_ADDR_UNICAST))
1240 goto out;
1241
5578689a 1242 grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
1da177e4
LT
1243
1244 err = -EHOSTUNREACH;
1245 if (grt == NULL)
1246 goto out;
1247 if (dev) {
1248 if (dev != grt->rt6i_dev) {
1249 dst_release(&grt->u.dst);
1250 goto out;
1251 }
1252 } else {
1253 dev = grt->rt6i_dev;
1254 idev = grt->rt6i_idev;
1255 dev_hold(dev);
1256 in6_dev_hold(grt->rt6i_idev);
1257 }
1258 if (!(grt->rt6i_flags&RTF_GATEWAY))
1259 err = 0;
1260 dst_release(&grt->u.dst);
1261
1262 if (err)
1263 goto out;
1264 }
1265 err = -EINVAL;
1266 if (dev == NULL || (dev->flags&IFF_LOOPBACK))
1267 goto out;
1268 }
1269
1270 err = -ENODEV;
1271 if (dev == NULL)
1272 goto out;
1273
86872cb5 1274 if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) {
1da177e4
LT
1275 rt->rt6i_nexthop = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev);
1276 if (IS_ERR(rt->rt6i_nexthop)) {
1277 err = PTR_ERR(rt->rt6i_nexthop);
1278 rt->rt6i_nexthop = NULL;
1279 goto out;
1280 }
1281 }
1282
86872cb5 1283 rt->rt6i_flags = cfg->fc_flags;
1da177e4
LT
1284
1285install_route:
86872cb5
TG
1286 if (cfg->fc_mx) {
1287 struct nlattr *nla;
1288 int remaining;
1289
1290 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
8f4c1f9b 1291 int type = nla_type(nla);
86872cb5
TG
1292
1293 if (type) {
1294 if (type > RTAX_MAX) {
1da177e4
LT
1295 err = -EINVAL;
1296 goto out;
1297 }
86872cb5
TG
1298
1299 rt->u.dst.metrics[type - 1] = nla_get_u32(nla);
1da177e4 1300 }
1da177e4
LT
1301 }
1302 }
1303
5ffc02a1 1304 if (dst_metric(&rt->u.dst, RTAX_HOPLIMIT) == 0)
1da177e4 1305 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
1ca615fb 1306 if (!dst_mtu(&rt->u.dst))
1da177e4 1307 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(dev);
5ffc02a1 1308 if (!dst_metric(&rt->u.dst, RTAX_ADVMSS))
5578689a 1309 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, dst_mtu(&rt->u.dst));
1da177e4
LT
1310 rt->u.dst.dev = dev;
1311 rt->rt6i_idev = idev;
c71099ac 1312 rt->rt6i_table = table;
63152fc0 1313
c346dca1 1314 cfg->fc_nlinfo.nl_net = dev_net(dev);
63152fc0 1315
86872cb5 1316 return __ip6_ins_rt(rt, &cfg->fc_nlinfo);
1da177e4
LT
1317
1318out:
1319 if (dev)
1320 dev_put(dev);
1321 if (idev)
1322 in6_dev_put(idev);
1323 if (rt)
40aa7b90 1324 dst_free(&rt->u.dst);
1da177e4
LT
1325 return err;
1326}
1327
86872cb5 1328static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
1da177e4
LT
1329{
1330 int err;
c71099ac 1331 struct fib6_table *table;
c346dca1 1332 struct net *net = dev_net(rt->rt6i_dev);
1da177e4 1333
8ed67789 1334 if (rt == net->ipv6.ip6_null_entry)
6c813a72
PM
1335 return -ENOENT;
1336
c71099ac
TG
1337 table = rt->rt6i_table;
1338 write_lock_bh(&table->tb6_lock);
1da177e4 1339
86872cb5 1340 err = fib6_del(rt, info);
1da177e4
LT
1341 dst_release(&rt->u.dst);
1342
c71099ac 1343 write_unlock_bh(&table->tb6_lock);
1da177e4
LT
1344
1345 return err;
1346}
1347
e0a1ad73
TG
1348int ip6_del_rt(struct rt6_info *rt)
1349{
4d1169c1 1350 struct nl_info info = {
c346dca1 1351 .nl_net = dev_net(rt->rt6i_dev),
4d1169c1 1352 };
528c4ceb 1353 return __ip6_del_rt(rt, &info);
e0a1ad73
TG
1354}
1355
86872cb5 1356static int ip6_route_del(struct fib6_config *cfg)
1da177e4 1357{
c71099ac 1358 struct fib6_table *table;
1da177e4
LT
1359 struct fib6_node *fn;
1360 struct rt6_info *rt;
1361 int err = -ESRCH;
1362
5578689a 1363 table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
c71099ac
TG
1364 if (table == NULL)
1365 return err;
1366
1367 read_lock_bh(&table->tb6_lock);
1da177e4 1368
c71099ac 1369 fn = fib6_locate(&table->tb6_root,
86872cb5
TG
1370 &cfg->fc_dst, cfg->fc_dst_len,
1371 &cfg->fc_src, cfg->fc_src_len);
1ab1457c 1372
1da177e4 1373 if (fn) {
7cc48263 1374 for (rt = fn->leaf; rt; rt = rt->u.dst.rt6_next) {
86872cb5 1375 if (cfg->fc_ifindex &&
1da177e4 1376 (rt->rt6i_dev == NULL ||
86872cb5 1377 rt->rt6i_dev->ifindex != cfg->fc_ifindex))
1da177e4 1378 continue;
86872cb5
TG
1379 if (cfg->fc_flags & RTF_GATEWAY &&
1380 !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
1da177e4 1381 continue;
86872cb5 1382 if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
1da177e4
LT
1383 continue;
1384 dst_hold(&rt->u.dst);
c71099ac 1385 read_unlock_bh(&table->tb6_lock);
1da177e4 1386
86872cb5 1387 return __ip6_del_rt(rt, &cfg->fc_nlinfo);
1da177e4
LT
1388 }
1389 }
c71099ac 1390 read_unlock_bh(&table->tb6_lock);
1da177e4
LT
1391
1392 return err;
1393}
1394
1395/*
1396 * Handle redirects
1397 */
a6279458
YH
1398struct ip6rd_flowi {
1399 struct flowi fl;
1400 struct in6_addr gateway;
1401};
1402
8ed67789
DL
1403static struct rt6_info *__ip6_route_redirect(struct net *net,
1404 struct fib6_table *table,
a6279458
YH
1405 struct flowi *fl,
1406 int flags)
1da177e4 1407{
a6279458
YH
1408 struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl;
1409 struct rt6_info *rt;
e843b9e1 1410 struct fib6_node *fn;
c71099ac 1411
1da177e4 1412 /*
e843b9e1
YH
1413 * Get the "current" route for this destination and
1414 * check if the redirect has come from approriate router.
1415 *
1416 * RFC 2461 specifies that redirects should only be
1417 * accepted if they come from the nexthop to the target.
1418 * Due to the way the routes are chosen, this notion
1419 * is a bit fuzzy and one might need to check all possible
1420 * routes.
1da177e4 1421 */
1da177e4 1422
c71099ac 1423 read_lock_bh(&table->tb6_lock);
a6279458 1424 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
e843b9e1 1425restart:
7cc48263 1426 for (rt = fn->leaf; rt; rt = rt->u.dst.rt6_next) {
e843b9e1
YH
1427 /*
1428 * Current route is on-link; redirect is always invalid.
1429 *
1430 * Seems, previous statement is not true. It could
1431 * be node, which looks for us as on-link (f.e. proxy ndisc)
1432 * But then router serving it might decide, that we should
1433 * know truth 8)8) --ANK (980726).
1434 */
1435 if (rt6_check_expired(rt))
1436 continue;
1437 if (!(rt->rt6i_flags & RTF_GATEWAY))
1438 continue;
a6279458 1439 if (fl->oif != rt->rt6i_dev->ifindex)
e843b9e1 1440 continue;
a6279458 1441 if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
e843b9e1
YH
1442 continue;
1443 break;
1444 }
a6279458 1445
cb15d9c2 1446 if (!rt)
8ed67789
DL
1447 rt = net->ipv6.ip6_null_entry;
1448 BACKTRACK(net, &fl->fl6_src);
cb15d9c2 1449out:
a6279458
YH
1450 dst_hold(&rt->u.dst);
1451
c71099ac 1452 read_unlock_bh(&table->tb6_lock);
e843b9e1 1453
a6279458
YH
1454 return rt;
1455};
1456
1457static struct rt6_info *ip6_route_redirect(struct in6_addr *dest,
1458 struct in6_addr *src,
1459 struct in6_addr *gateway,
1460 struct net_device *dev)
1461{
adaa70bb 1462 int flags = RT6_LOOKUP_F_HAS_SADDR;
c346dca1 1463 struct net *net = dev_net(dev);
a6279458
YH
1464 struct ip6rd_flowi rdfl = {
1465 .fl = {
1466 .oif = dev->ifindex,
1467 .nl_u = {
1468 .ip6_u = {
1469 .daddr = *dest,
1470 .saddr = *src,
1471 },
1472 },
1473 },
1474 .gateway = *gateway,
1475 };
adaa70bb
TG
1476
1477 if (rt6_need_strict(dest))
1478 flags |= RT6_LOOKUP_F_IFACE;
a6279458 1479
5578689a 1480 return (struct rt6_info *)fib6_rule_lookup(net, (struct flowi *)&rdfl,
58f09b78 1481 flags, __ip6_route_redirect);
a6279458
YH
1482}
1483
1484void rt6_redirect(struct in6_addr *dest, struct in6_addr *src,
1485 struct in6_addr *saddr,
1486 struct neighbour *neigh, u8 *lladdr, int on_link)
1487{
1488 struct rt6_info *rt, *nrt = NULL;
1489 struct netevent_redirect netevent;
c346dca1 1490 struct net *net = dev_net(neigh->dev);
a6279458
YH
1491
1492 rt = ip6_route_redirect(dest, src, saddr, neigh->dev);
1493
8ed67789 1494 if (rt == net->ipv6.ip6_null_entry) {
1da177e4
LT
1495 if (net_ratelimit())
1496 printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
1497 "for redirect target\n");
a6279458 1498 goto out;
1da177e4
LT
1499 }
1500
1da177e4
LT
1501 /*
1502 * We have finally decided to accept it.
1503 */
1504
1ab1457c 1505 neigh_update(neigh, lladdr, NUD_STALE,
1da177e4
LT
1506 NEIGH_UPDATE_F_WEAK_OVERRIDE|
1507 NEIGH_UPDATE_F_OVERRIDE|
1508 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1509 NEIGH_UPDATE_F_ISROUTER))
1510 );
1511
1512 /*
1513 * Redirect received -> path was valid.
1514 * Look, redirects are sent only in response to data packets,
1515 * so that this nexthop apparently is reachable. --ANK
1516 */
1517 dst_confirm(&rt->u.dst);
1518
1519 /* Duplicate redirect: silently ignore. */
1520 if (neigh == rt->u.dst.neighbour)
1521 goto out;
1522
1523 nrt = ip6_rt_copy(rt);
1524 if (nrt == NULL)
1525 goto out;
1526
1527 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1528 if (on_link)
1529 nrt->rt6i_flags &= ~RTF_GATEWAY;
1530
1531 ipv6_addr_copy(&nrt->rt6i_dst.addr, dest);
1532 nrt->rt6i_dst.plen = 128;
1533 nrt->u.dst.flags |= DST_HOST;
1534
1535 ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key);
1536 nrt->rt6i_nexthop = neigh_clone(neigh);
1537 /* Reset pmtu, it may be better */
1538 nrt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(neigh->dev);
c346dca1 1539 nrt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dev_net(neigh->dev),
5578689a 1540 dst_mtu(&nrt->u.dst));
1da177e4 1541
40e22e8f 1542 if (ip6_ins_rt(nrt))
1da177e4
LT
1543 goto out;
1544
8d71740c
TT
1545 netevent.old = &rt->u.dst;
1546 netevent.new = &nrt->u.dst;
1547 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1548
1da177e4 1549 if (rt->rt6i_flags&RTF_CACHE) {
e0a1ad73 1550 ip6_del_rt(rt);
1da177e4
LT
1551 return;
1552 }
1553
1554out:
1ab1457c 1555 dst_release(&rt->u.dst);
1da177e4
LT
1556 return;
1557}
1558
1559/*
1560 * Handle ICMP "packet too big" messages
1561 * i.e. Path MTU discovery
1562 */
1563
1564void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr,
1565 struct net_device *dev, u32 pmtu)
1566{
1567 struct rt6_info *rt, *nrt;
c346dca1 1568 struct net *net = dev_net(dev);
1da177e4
LT
1569 int allfrag = 0;
1570
5578689a 1571 rt = rt6_lookup(net, daddr, saddr, dev->ifindex, 0);
1da177e4
LT
1572 if (rt == NULL)
1573 return;
1574
1575 if (pmtu >= dst_mtu(&rt->u.dst))
1576 goto out;
1577
1578 if (pmtu < IPV6_MIN_MTU) {
1579 /*
1ab1457c 1580 * According to RFC2460, PMTU is set to the IPv6 Minimum Link
1da177e4
LT
1581 * MTU (1280) and a fragment header should always be included
1582 * after a node receiving Too Big message reporting PMTU is
1583 * less than the IPv6 Minimum Link MTU.
1584 */
1585 pmtu = IPV6_MIN_MTU;
1586 allfrag = 1;
1587 }
1588
1589 /* New mtu received -> path was valid.
1590 They are sent only in response to data packets,
1591 so that this nexthop apparently is reachable. --ANK
1592 */
1593 dst_confirm(&rt->u.dst);
1594
1595 /* Host route. If it is static, it would be better
1596 not to override it, but add new one, so that
1597 when cache entry will expire old pmtu
1598 would return automatically.
1599 */
1600 if (rt->rt6i_flags & RTF_CACHE) {
1601 rt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1602 if (allfrag)
1603 rt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
5578689a 1604 dst_set_expires(&rt->u.dst, net->ipv6.sysctl.ip6_rt_mtu_expires);
1da177e4
LT
1605 rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
1606 goto out;
1607 }
1608
1609 /* Network route.
1610 Two cases are possible:
1611 1. It is connected route. Action: COW
1612 2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1613 */
d5315b50 1614 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
a1e78363 1615 nrt = rt6_alloc_cow(rt, daddr, saddr);
d5315b50
YH
1616 else
1617 nrt = rt6_alloc_clone(rt, daddr);
a1e78363 1618
d5315b50 1619 if (nrt) {
a1e78363
YH
1620 nrt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1621 if (allfrag)
1622 nrt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1623
1624 /* According to RFC 1981, detecting PMTU increase shouldn't be
1625 * happened within 5 mins, the recommended timer is 10 mins.
1626 * Here this route expiration time is set to ip6_rt_mtu_expires
1627 * which is 10 mins. After 10 mins the decreased pmtu is expired
1628 * and detecting PMTU increase will be automatically happened.
1629 */
5578689a 1630 dst_set_expires(&nrt->u.dst, net->ipv6.sysctl.ip6_rt_mtu_expires);
a1e78363
YH
1631 nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
1632
40e22e8f 1633 ip6_ins_rt(nrt);
1da177e4 1634 }
1da177e4
LT
1635out:
1636 dst_release(&rt->u.dst);
1637}
1638
1639/*
1640 * Misc support functions
1641 */
1642
1643static struct rt6_info * ip6_rt_copy(struct rt6_info *ort)
1644{
c346dca1 1645 struct net *net = dev_net(ort->rt6i_dev);
86393e52 1646 struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops);
1da177e4
LT
1647
1648 if (rt) {
1649 rt->u.dst.input = ort->u.dst.input;
1650 rt->u.dst.output = ort->u.dst.output;
1651
1652 memcpy(rt->u.dst.metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32));
22e1e4d8 1653 rt->u.dst.error = ort->u.dst.error;
1da177e4
LT
1654 rt->u.dst.dev = ort->u.dst.dev;
1655 if (rt->u.dst.dev)
1656 dev_hold(rt->u.dst.dev);
1657 rt->rt6i_idev = ort->rt6i_idev;
1658 if (rt->rt6i_idev)
1659 in6_dev_hold(rt->rt6i_idev);
1660 rt->u.dst.lastuse = jiffies;
1661 rt->rt6i_expires = 0;
1662
1663 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
1664 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
1665 rt->rt6i_metric = 0;
1666
1667 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1668#ifdef CONFIG_IPV6_SUBTREES
1669 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1670#endif
c71099ac 1671 rt->rt6i_table = ort->rt6i_table;
1da177e4
LT
1672 }
1673 return rt;
1674}
1675
70ceb4f5 1676#ifdef CONFIG_IPV6_ROUTE_INFO
efa2cea0
DL
1677static struct rt6_info *rt6_get_route_info(struct net *net,
1678 struct in6_addr *prefix, int prefixlen,
70ceb4f5
YH
1679 struct in6_addr *gwaddr, int ifindex)
1680{
1681 struct fib6_node *fn;
1682 struct rt6_info *rt = NULL;
c71099ac
TG
1683 struct fib6_table *table;
1684
efa2cea0 1685 table = fib6_get_table(net, RT6_TABLE_INFO);
c71099ac
TG
1686 if (table == NULL)
1687 return NULL;
70ceb4f5 1688
c71099ac
TG
1689 write_lock_bh(&table->tb6_lock);
1690 fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
70ceb4f5
YH
1691 if (!fn)
1692 goto out;
1693
7cc48263 1694 for (rt = fn->leaf; rt; rt = rt->u.dst.rt6_next) {
70ceb4f5
YH
1695 if (rt->rt6i_dev->ifindex != ifindex)
1696 continue;
1697 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1698 continue;
1699 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1700 continue;
1701 dst_hold(&rt->u.dst);
1702 break;
1703 }
1704out:
c71099ac 1705 write_unlock_bh(&table->tb6_lock);
70ceb4f5
YH
1706 return rt;
1707}
1708
efa2cea0
DL
1709static struct rt6_info *rt6_add_route_info(struct net *net,
1710 struct in6_addr *prefix, int prefixlen,
70ceb4f5
YH
1711 struct in6_addr *gwaddr, int ifindex,
1712 unsigned pref)
1713{
86872cb5
TG
1714 struct fib6_config cfg = {
1715 .fc_table = RT6_TABLE_INFO,
238fc7ea 1716 .fc_metric = IP6_RT_PRIO_USER,
86872cb5
TG
1717 .fc_ifindex = ifindex,
1718 .fc_dst_len = prefixlen,
1719 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
1720 RTF_UP | RTF_PREF(pref),
efa2cea0
DL
1721 .fc_nlinfo.pid = 0,
1722 .fc_nlinfo.nlh = NULL,
1723 .fc_nlinfo.nl_net = net,
86872cb5
TG
1724 };
1725
1726 ipv6_addr_copy(&cfg.fc_dst, prefix);
1727 ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
70ceb4f5 1728
e317da96
YH
1729 /* We should treat it as a default route if prefix length is 0. */
1730 if (!prefixlen)
86872cb5 1731 cfg.fc_flags |= RTF_DEFAULT;
70ceb4f5 1732
86872cb5 1733 ip6_route_add(&cfg);
70ceb4f5 1734
efa2cea0 1735 return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex);
70ceb4f5
YH
1736}
1737#endif
1738
1da177e4 1739struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *dev)
1ab1457c 1740{
1da177e4 1741 struct rt6_info *rt;
c71099ac 1742 struct fib6_table *table;
1da177e4 1743
c346dca1 1744 table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT);
c71099ac
TG
1745 if (table == NULL)
1746 return NULL;
1da177e4 1747
c71099ac 1748 write_lock_bh(&table->tb6_lock);
7cc48263 1749 for (rt = table->tb6_root.leaf; rt; rt=rt->u.dst.rt6_next) {
1da177e4 1750 if (dev == rt->rt6i_dev &&
045927ff 1751 ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
1da177e4
LT
1752 ipv6_addr_equal(&rt->rt6i_gateway, addr))
1753 break;
1754 }
1755 if (rt)
1756 dst_hold(&rt->u.dst);
c71099ac 1757 write_unlock_bh(&table->tb6_lock);
1da177e4
LT
1758 return rt;
1759}
1760
1761struct rt6_info *rt6_add_dflt_router(struct in6_addr *gwaddr,
ebacaaa0
YH
1762 struct net_device *dev,
1763 unsigned int pref)
1da177e4 1764{
86872cb5
TG
1765 struct fib6_config cfg = {
1766 .fc_table = RT6_TABLE_DFLT,
238fc7ea 1767 .fc_metric = IP6_RT_PRIO_USER,
86872cb5
TG
1768 .fc_ifindex = dev->ifindex,
1769 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
1770 RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
5578689a
DL
1771 .fc_nlinfo.pid = 0,
1772 .fc_nlinfo.nlh = NULL,
c346dca1 1773 .fc_nlinfo.nl_net = dev_net(dev),
86872cb5 1774 };
1da177e4 1775
86872cb5 1776 ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
1da177e4 1777
86872cb5 1778 ip6_route_add(&cfg);
1da177e4 1779
1da177e4
LT
1780 return rt6_get_dflt_router(gwaddr, dev);
1781}
1782
7b4da532 1783void rt6_purge_dflt_routers(struct net *net)
1da177e4
LT
1784{
1785 struct rt6_info *rt;
c71099ac
TG
1786 struct fib6_table *table;
1787
1788 /* NOTE: Keep consistent with rt6_get_dflt_router */
7b4da532 1789 table = fib6_get_table(net, RT6_TABLE_DFLT);
c71099ac
TG
1790 if (table == NULL)
1791 return;
1da177e4
LT
1792
1793restart:
c71099ac 1794 read_lock_bh(&table->tb6_lock);
7cc48263 1795 for (rt = table->tb6_root.leaf; rt; rt = rt->u.dst.rt6_next) {
1da177e4
LT
1796 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
1797 dst_hold(&rt->u.dst);
c71099ac 1798 read_unlock_bh(&table->tb6_lock);
e0a1ad73 1799 ip6_del_rt(rt);
1da177e4
LT
1800 goto restart;
1801 }
1802 }
c71099ac 1803 read_unlock_bh(&table->tb6_lock);
1da177e4
LT
1804}
1805
5578689a
DL
1806static void rtmsg_to_fib6_config(struct net *net,
1807 struct in6_rtmsg *rtmsg,
86872cb5
TG
1808 struct fib6_config *cfg)
1809{
1810 memset(cfg, 0, sizeof(*cfg));
1811
1812 cfg->fc_table = RT6_TABLE_MAIN;
1813 cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
1814 cfg->fc_metric = rtmsg->rtmsg_metric;
1815 cfg->fc_expires = rtmsg->rtmsg_info;
1816 cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
1817 cfg->fc_src_len = rtmsg->rtmsg_src_len;
1818 cfg->fc_flags = rtmsg->rtmsg_flags;
1819
5578689a 1820 cfg->fc_nlinfo.nl_net = net;
f1243c2d 1821
86872cb5
TG
1822 ipv6_addr_copy(&cfg->fc_dst, &rtmsg->rtmsg_dst);
1823 ipv6_addr_copy(&cfg->fc_src, &rtmsg->rtmsg_src);
1824 ipv6_addr_copy(&cfg->fc_gateway, &rtmsg->rtmsg_gateway);
1825}
1826
5578689a 1827int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
1da177e4 1828{
86872cb5 1829 struct fib6_config cfg;
1da177e4
LT
1830 struct in6_rtmsg rtmsg;
1831 int err;
1832
1833 switch(cmd) {
1834 case SIOCADDRT: /* Add a route */
1835 case SIOCDELRT: /* Delete a route */
1836 if (!capable(CAP_NET_ADMIN))
1837 return -EPERM;
1838 err = copy_from_user(&rtmsg, arg,
1839 sizeof(struct in6_rtmsg));
1840 if (err)
1841 return -EFAULT;
86872cb5 1842
5578689a 1843 rtmsg_to_fib6_config(net, &rtmsg, &cfg);
86872cb5 1844
1da177e4
LT
1845 rtnl_lock();
1846 switch (cmd) {
1847 case SIOCADDRT:
86872cb5 1848 err = ip6_route_add(&cfg);
1da177e4
LT
1849 break;
1850 case SIOCDELRT:
86872cb5 1851 err = ip6_route_del(&cfg);
1da177e4
LT
1852 break;
1853 default:
1854 err = -EINVAL;
1855 }
1856 rtnl_unlock();
1857
1858 return err;
3ff50b79 1859 }
1da177e4
LT
1860
1861 return -EINVAL;
1862}
1863
1864/*
1865 * Drop the packet on the floor
1866 */
1867
d5fdd6ba 1868static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
1da177e4 1869{
612f09e8 1870 int type;
adf30907 1871 struct dst_entry *dst = skb_dst(skb);
612f09e8
YH
1872 switch (ipstats_mib_noroutes) {
1873 case IPSTATS_MIB_INNOROUTES:
0660e03f 1874 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
612f09e8 1875 if (type == IPV6_ADDR_ANY || type == IPV6_ADDR_RESERVED) {
3bd653c8
DL
1876 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
1877 IPSTATS_MIB_INADDRERRORS);
612f09e8
YH
1878 break;
1879 }
1880 /* FALLTHROUGH */
1881 case IPSTATS_MIB_OUTNOROUTES:
3bd653c8
DL
1882 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
1883 ipstats_mib_noroutes);
612f09e8
YH
1884 break;
1885 }
9ce8ade0 1886 icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0, skb->dev);
1da177e4
LT
1887 kfree_skb(skb);
1888 return 0;
1889}
1890
9ce8ade0
TG
1891static int ip6_pkt_discard(struct sk_buff *skb)
1892{
612f09e8 1893 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
9ce8ade0
TG
1894}
1895
20380731 1896static int ip6_pkt_discard_out(struct sk_buff *skb)
1da177e4 1897{
adf30907 1898 skb->dev = skb_dst(skb)->dev;
612f09e8 1899 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
1da177e4
LT
1900}
1901
6723ab54
DM
1902#ifdef CONFIG_IPV6_MULTIPLE_TABLES
1903
9ce8ade0
TG
1904static int ip6_pkt_prohibit(struct sk_buff *skb)
1905{
612f09e8 1906 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
9ce8ade0
TG
1907}
1908
1909static int ip6_pkt_prohibit_out(struct sk_buff *skb)
1910{
adf30907 1911 skb->dev = skb_dst(skb)->dev;
612f09e8 1912 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
9ce8ade0
TG
1913}
1914
6723ab54
DM
1915#endif
1916
1da177e4
LT
1917/*
1918 * Allocate a dst for local (unicast / anycast) address.
1919 */
1920
1921struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
1922 const struct in6_addr *addr,
1923 int anycast)
1924{
c346dca1 1925 struct net *net = dev_net(idev->dev);
86393e52 1926 struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops);
14deae41 1927 struct neighbour *neigh;
1da177e4
LT
1928
1929 if (rt == NULL)
1930 return ERR_PTR(-ENOMEM);
1931
5578689a 1932 dev_hold(net->loopback_dev);
1da177e4
LT
1933 in6_dev_hold(idev);
1934
1935 rt->u.dst.flags = DST_HOST;
1936 rt->u.dst.input = ip6_input;
1937 rt->u.dst.output = ip6_output;
5578689a 1938 rt->rt6i_dev = net->loopback_dev;
1da177e4
LT
1939 rt->rt6i_idev = idev;
1940 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
5578689a 1941 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, dst_mtu(&rt->u.dst));
1da177e4
LT
1942 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
1943 rt->u.dst.obsolete = -1;
1944
1945 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
58c4fb86
YH
1946 if (anycast)
1947 rt->rt6i_flags |= RTF_ANYCAST;
1948 else
1da177e4 1949 rt->rt6i_flags |= RTF_LOCAL;
14deae41
DM
1950 neigh = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
1951 if (IS_ERR(neigh)) {
40aa7b90 1952 dst_free(&rt->u.dst);
14deae41
DM
1953
1954 /* We are casting this because that is the return
1955 * value type. But an errno encoded pointer is the
1956 * same regardless of the underlying pointer type,
1957 * and that's what we are returning. So this is OK.
1958 */
1959 return (struct rt6_info *) neigh;
1da177e4 1960 }
14deae41 1961 rt->rt6i_nexthop = neigh;
1da177e4
LT
1962
1963 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
1964 rt->rt6i_dst.plen = 128;
5578689a 1965 rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL);
1da177e4
LT
1966
1967 atomic_set(&rt->u.dst.__refcnt, 1);
1968
1969 return rt;
1970}
1971
8ed67789
DL
1972struct arg_dev_net {
1973 struct net_device *dev;
1974 struct net *net;
1975};
1976
1da177e4
LT
1977static int fib6_ifdown(struct rt6_info *rt, void *arg)
1978{
8ed67789
DL
1979 struct net_device *dev = ((struct arg_dev_net *)arg)->dev;
1980 struct net *net = ((struct arg_dev_net *)arg)->net;
1981
1982 if (((void *)rt->rt6i_dev == dev || dev == NULL) &&
1983 rt != net->ipv6.ip6_null_entry) {
1da177e4
LT
1984 RT6_TRACE("deleted by ifdown %p\n", rt);
1985 return -1;
1986 }
1987 return 0;
1988}
1989
f3db4851 1990void rt6_ifdown(struct net *net, struct net_device *dev)
1da177e4 1991{
8ed67789
DL
1992 struct arg_dev_net adn = {
1993 .dev = dev,
1994 .net = net,
1995 };
1996
1997 fib6_clean_all(net, fib6_ifdown, 0, &adn);
1e493d19 1998 icmp6_clean_all(fib6_ifdown, &adn);
1da177e4
LT
1999}
2000
2001struct rt6_mtu_change_arg
2002{
2003 struct net_device *dev;
2004 unsigned mtu;
2005};
2006
2007static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
2008{
2009 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
2010 struct inet6_dev *idev;
c346dca1 2011 struct net *net = dev_net(arg->dev);
1da177e4
LT
2012
2013 /* In IPv6 pmtu discovery is not optional,
2014 so that RTAX_MTU lock cannot disable it.
2015 We still use this lock to block changes
2016 caused by addrconf/ndisc.
2017 */
2018
2019 idev = __in6_dev_get(arg->dev);
2020 if (idev == NULL)
2021 return 0;
2022
2023 /* For administrative MTU increase, there is no way to discover
2024 IPv6 PMTU increase, so PMTU increase should be updated here.
2025 Since RFC 1981 doesn't include administrative MTU increase
2026 update PMTU increase is a MUST. (i.e. jumbo frame)
2027 */
2028 /*
2029 If new MTU is less than route PMTU, this new MTU will be the
2030 lowest MTU in the path, update the route PMTU to reflect PMTU
2031 decreases; if new MTU is greater than route PMTU, and the
2032 old MTU is the lowest MTU in the path, update the route PMTU
2033 to reflect the increase. In this case if the other nodes' MTU
2034 also have the lowest MTU, TOO BIG MESSAGE will be lead to
2035 PMTU discouvery.
2036 */
2037 if (rt->rt6i_dev == arg->dev &&
2038 !dst_metric_locked(&rt->u.dst, RTAX_MTU) &&
23717795 2039 (dst_mtu(&rt->u.dst) >= arg->mtu ||
1ab1457c 2040 (dst_mtu(&rt->u.dst) < arg->mtu &&
566cfd8f 2041 dst_mtu(&rt->u.dst) == idev->cnf.mtu6))) {
1da177e4 2042 rt->u.dst.metrics[RTAX_MTU-1] = arg->mtu;
5578689a 2043 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, arg->mtu);
566cfd8f 2044 }
1da177e4
LT
2045 return 0;
2046}
2047
2048void rt6_mtu_change(struct net_device *dev, unsigned mtu)
2049{
c71099ac
TG
2050 struct rt6_mtu_change_arg arg = {
2051 .dev = dev,
2052 .mtu = mtu,
2053 };
1da177e4 2054
c346dca1 2055 fib6_clean_all(dev_net(dev), rt6_mtu_change_route, 0, &arg);
1da177e4
LT
2056}
2057
ef7c79ed 2058static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
5176f91e 2059 [RTA_GATEWAY] = { .len = sizeof(struct in6_addr) },
86872cb5 2060 [RTA_OIF] = { .type = NLA_U32 },
ab364a6f 2061 [RTA_IIF] = { .type = NLA_U32 },
86872cb5
TG
2062 [RTA_PRIORITY] = { .type = NLA_U32 },
2063 [RTA_METRICS] = { .type = NLA_NESTED },
2064};
2065
2066static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2067 struct fib6_config *cfg)
1da177e4 2068{
86872cb5
TG
2069 struct rtmsg *rtm;
2070 struct nlattr *tb[RTA_MAX+1];
2071 int err;
1da177e4 2072
86872cb5
TG
2073 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2074 if (err < 0)
2075 goto errout;
1da177e4 2076
86872cb5
TG
2077 err = -EINVAL;
2078 rtm = nlmsg_data(nlh);
2079 memset(cfg, 0, sizeof(*cfg));
2080
2081 cfg->fc_table = rtm->rtm_table;
2082 cfg->fc_dst_len = rtm->rtm_dst_len;
2083 cfg->fc_src_len = rtm->rtm_src_len;
2084 cfg->fc_flags = RTF_UP;
2085 cfg->fc_protocol = rtm->rtm_protocol;
2086
2087 if (rtm->rtm_type == RTN_UNREACHABLE)
2088 cfg->fc_flags |= RTF_REJECT;
2089
2090 cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
2091 cfg->fc_nlinfo.nlh = nlh;
3b1e0a65 2092 cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
86872cb5
TG
2093
2094 if (tb[RTA_GATEWAY]) {
2095 nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
2096 cfg->fc_flags |= RTF_GATEWAY;
1da177e4 2097 }
86872cb5
TG
2098
2099 if (tb[RTA_DST]) {
2100 int plen = (rtm->rtm_dst_len + 7) >> 3;
2101
2102 if (nla_len(tb[RTA_DST]) < plen)
2103 goto errout;
2104
2105 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
1da177e4 2106 }
86872cb5
TG
2107
2108 if (tb[RTA_SRC]) {
2109 int plen = (rtm->rtm_src_len + 7) >> 3;
2110
2111 if (nla_len(tb[RTA_SRC]) < plen)
2112 goto errout;
2113
2114 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
1da177e4 2115 }
86872cb5
TG
2116
2117 if (tb[RTA_OIF])
2118 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2119
2120 if (tb[RTA_PRIORITY])
2121 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2122
2123 if (tb[RTA_METRICS]) {
2124 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2125 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
1da177e4 2126 }
86872cb5
TG
2127
2128 if (tb[RTA_TABLE])
2129 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2130
2131 err = 0;
2132errout:
2133 return err;
1da177e4
LT
2134}
2135
c127ea2c 2136static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1da177e4 2137{
86872cb5
TG
2138 struct fib6_config cfg;
2139 int err;
1da177e4 2140
86872cb5
TG
2141 err = rtm_to_fib6_config(skb, nlh, &cfg);
2142 if (err < 0)
2143 return err;
2144
2145 return ip6_route_del(&cfg);
1da177e4
LT
2146}
2147
c127ea2c 2148static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1da177e4 2149{
86872cb5
TG
2150 struct fib6_config cfg;
2151 int err;
1da177e4 2152
86872cb5
TG
2153 err = rtm_to_fib6_config(skb, nlh, &cfg);
2154 if (err < 0)
2155 return err;
2156
2157 return ip6_route_add(&cfg);
1da177e4
LT
2158}
2159
339bf98f
TG
2160static inline size_t rt6_nlmsg_size(void)
2161{
2162 return NLMSG_ALIGN(sizeof(struct rtmsg))
2163 + nla_total_size(16) /* RTA_SRC */
2164 + nla_total_size(16) /* RTA_DST */
2165 + nla_total_size(16) /* RTA_GATEWAY */
2166 + nla_total_size(16) /* RTA_PREFSRC */
2167 + nla_total_size(4) /* RTA_TABLE */
2168 + nla_total_size(4) /* RTA_IIF */
2169 + nla_total_size(4) /* RTA_OIF */
2170 + nla_total_size(4) /* RTA_PRIORITY */
6a2b9ce0 2171 + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
339bf98f
TG
2172 + nla_total_size(sizeof(struct rta_cacheinfo));
2173}
2174
191cd582
BH
2175static int rt6_fill_node(struct net *net,
2176 struct sk_buff *skb, struct rt6_info *rt,
0d51aa80
JHS
2177 struct in6_addr *dst, struct in6_addr *src,
2178 int iif, int type, u32 pid, u32 seq,
7bc570c8 2179 int prefix, int nowait, unsigned int flags)
1da177e4
LT
2180{
2181 struct rtmsg *rtm;
2d7202bf 2182 struct nlmsghdr *nlh;
e3703b3d 2183 long expires;
9e762a4a 2184 u32 table;
1da177e4
LT
2185
2186 if (prefix) { /* user wants prefix routes only */
2187 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
2188 /* success since this is not a prefix route */
2189 return 1;
2190 }
2191 }
2192
2d7202bf
TG
2193 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*rtm), flags);
2194 if (nlh == NULL)
26932566 2195 return -EMSGSIZE;
2d7202bf
TG
2196
2197 rtm = nlmsg_data(nlh);
1da177e4
LT
2198 rtm->rtm_family = AF_INET6;
2199 rtm->rtm_dst_len = rt->rt6i_dst.plen;
2200 rtm->rtm_src_len = rt->rt6i_src.plen;
2201 rtm->rtm_tos = 0;
c71099ac 2202 if (rt->rt6i_table)
9e762a4a 2203 table = rt->rt6i_table->tb6_id;
c71099ac 2204 else
9e762a4a
PM
2205 table = RT6_TABLE_UNSPEC;
2206 rtm->rtm_table = table;
2d7202bf 2207 NLA_PUT_U32(skb, RTA_TABLE, table);
1da177e4
LT
2208 if (rt->rt6i_flags&RTF_REJECT)
2209 rtm->rtm_type = RTN_UNREACHABLE;
2210 else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK))
2211 rtm->rtm_type = RTN_LOCAL;
2212 else
2213 rtm->rtm_type = RTN_UNICAST;
2214 rtm->rtm_flags = 0;
2215 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2216 rtm->rtm_protocol = rt->rt6i_protocol;
2217 if (rt->rt6i_flags&RTF_DYNAMIC)
2218 rtm->rtm_protocol = RTPROT_REDIRECT;
2219 else if (rt->rt6i_flags & RTF_ADDRCONF)
2220 rtm->rtm_protocol = RTPROT_KERNEL;
2221 else if (rt->rt6i_flags&RTF_DEFAULT)
2222 rtm->rtm_protocol = RTPROT_RA;
2223
2224 if (rt->rt6i_flags&RTF_CACHE)
2225 rtm->rtm_flags |= RTM_F_CLONED;
2226
2227 if (dst) {
2d7202bf 2228 NLA_PUT(skb, RTA_DST, 16, dst);
1ab1457c 2229 rtm->rtm_dst_len = 128;
1da177e4 2230 } else if (rtm->rtm_dst_len)
2d7202bf 2231 NLA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr);
1da177e4
LT
2232#ifdef CONFIG_IPV6_SUBTREES
2233 if (src) {
2d7202bf 2234 NLA_PUT(skb, RTA_SRC, 16, src);
1ab1457c 2235 rtm->rtm_src_len = 128;
1da177e4 2236 } else if (rtm->rtm_src_len)
2d7202bf 2237 NLA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr);
1da177e4 2238#endif
7bc570c8
YH
2239 if (iif) {
2240#ifdef CONFIG_IPV6_MROUTE
2241 if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
8229efda 2242 int err = ip6mr_get_route(net, skb, rtm, nowait);
7bc570c8
YH
2243 if (err <= 0) {
2244 if (!nowait) {
2245 if (err == 0)
2246 return 0;
2247 goto nla_put_failure;
2248 } else {
2249 if (err == -EMSGSIZE)
2250 goto nla_put_failure;
2251 }
2252 }
2253 } else
2254#endif
2255 NLA_PUT_U32(skb, RTA_IIF, iif);
2256 } else if (dst) {
5e0115e5 2257 struct inet6_dev *idev = ip6_dst_idev(&rt->u.dst);
1da177e4 2258 struct in6_addr saddr_buf;
191cd582 2259 if (ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
7cbca67c 2260 dst, 0, &saddr_buf) == 0)
2d7202bf 2261 NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
1da177e4 2262 }
2d7202bf 2263
1da177e4 2264 if (rtnetlink_put_metrics(skb, rt->u.dst.metrics) < 0)
2d7202bf
TG
2265 goto nla_put_failure;
2266
1da177e4 2267 if (rt->u.dst.neighbour)
2d7202bf
TG
2268 NLA_PUT(skb, RTA_GATEWAY, 16, &rt->u.dst.neighbour->primary_key);
2269
1da177e4 2270 if (rt->u.dst.dev)
2d7202bf
TG
2271 NLA_PUT_U32(skb, RTA_OIF, rt->rt6i_dev->ifindex);
2272
2273 NLA_PUT_U32(skb, RTA_PRIORITY, rt->rt6i_metric);
e3703b3d 2274
36e3deae
YH
2275 if (!(rt->rt6i_flags & RTF_EXPIRES))
2276 expires = 0;
2277 else if (rt->rt6i_expires - jiffies < INT_MAX)
2278 expires = rt->rt6i_expires - jiffies;
2279 else
2280 expires = INT_MAX;
69cdf8f9 2281
e3703b3d
TG
2282 if (rtnl_put_cacheinfo(skb, &rt->u.dst, 0, 0, 0,
2283 expires, rt->u.dst.error) < 0)
2284 goto nla_put_failure;
2d7202bf
TG
2285
2286 return nlmsg_end(skb, nlh);
2287
2288nla_put_failure:
26932566
PM
2289 nlmsg_cancel(skb, nlh);
2290 return -EMSGSIZE;
1da177e4
LT
2291}
2292
1b43af54 2293int rt6_dump_route(struct rt6_info *rt, void *p_arg)
1da177e4
LT
2294{
2295 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
2296 int prefix;
2297
2d7202bf
TG
2298 if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
2299 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
1da177e4
LT
2300 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
2301 } else
2302 prefix = 0;
2303
191cd582
BH
2304 return rt6_fill_node(arg->net,
2305 arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
1da177e4 2306 NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
7bc570c8 2307 prefix, 0, NLM_F_MULTI);
1da177e4
LT
2308}
2309
c127ea2c 2310static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
1da177e4 2311{
3b1e0a65 2312 struct net *net = sock_net(in_skb->sk);
ab364a6f
TG
2313 struct nlattr *tb[RTA_MAX+1];
2314 struct rt6_info *rt;
1da177e4 2315 struct sk_buff *skb;
ab364a6f 2316 struct rtmsg *rtm;
1da177e4 2317 struct flowi fl;
ab364a6f 2318 int err, iif = 0;
1da177e4 2319
ab364a6f
TG
2320 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2321 if (err < 0)
2322 goto errout;
1da177e4 2323
ab364a6f 2324 err = -EINVAL;
1da177e4 2325 memset(&fl, 0, sizeof(fl));
1da177e4 2326
ab364a6f
TG
2327 if (tb[RTA_SRC]) {
2328 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2329 goto errout;
2330
2331 ipv6_addr_copy(&fl.fl6_src, nla_data(tb[RTA_SRC]));
2332 }
2333
2334 if (tb[RTA_DST]) {
2335 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2336 goto errout;
2337
2338 ipv6_addr_copy(&fl.fl6_dst, nla_data(tb[RTA_DST]));
2339 }
2340
2341 if (tb[RTA_IIF])
2342 iif = nla_get_u32(tb[RTA_IIF]);
2343
2344 if (tb[RTA_OIF])
2345 fl.oif = nla_get_u32(tb[RTA_OIF]);
1da177e4
LT
2346
2347 if (iif) {
2348 struct net_device *dev;
5578689a 2349 dev = __dev_get_by_index(net, iif);
1da177e4
LT
2350 if (!dev) {
2351 err = -ENODEV;
ab364a6f 2352 goto errout;
1da177e4
LT
2353 }
2354 }
2355
ab364a6f
TG
2356 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
2357 if (skb == NULL) {
2358 err = -ENOBUFS;
2359 goto errout;
2360 }
1da177e4 2361
ab364a6f
TG
2362 /* Reserve room for dummy headers, this skb can pass
2363 through good chunk of routing engine.
2364 */
459a98ed 2365 skb_reset_mac_header(skb);
ab364a6f 2366 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
1da177e4 2367
8a3edd80 2368 rt = (struct rt6_info*) ip6_route_output(net, NULL, &fl);
adf30907 2369 skb_dst_set(skb, &rt->u.dst);
1da177e4 2370
191cd582 2371 err = rt6_fill_node(net, skb, rt, &fl.fl6_dst, &fl.fl6_src, iif,
1da177e4 2372 RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
7bc570c8 2373 nlh->nlmsg_seq, 0, 0, 0);
1da177e4 2374 if (err < 0) {
ab364a6f
TG
2375 kfree_skb(skb);
2376 goto errout;
1da177e4
LT
2377 }
2378
5578689a 2379 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).pid);
ab364a6f 2380errout:
1da177e4 2381 return err;
1da177e4
LT
2382}
2383
86872cb5 2384void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
1da177e4
LT
2385{
2386 struct sk_buff *skb;
5578689a 2387 struct net *net = info->nl_net;
528c4ceb
DL
2388 u32 seq;
2389 int err;
2390
2391 err = -ENOBUFS;
2392 seq = info->nlh != NULL ? info->nlh->nlmsg_seq : 0;
86872cb5 2393
339bf98f 2394 skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
21713ebc
TG
2395 if (skb == NULL)
2396 goto errout;
2397
191cd582 2398 err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
7bc570c8 2399 event, info->pid, seq, 0, 0, 0);
26932566
PM
2400 if (err < 0) {
2401 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
2402 WARN_ON(err == -EMSGSIZE);
2403 kfree_skb(skb);
2404 goto errout;
2405 }
1ce85fe4
PNA
2406 rtnl_notify(skb, net, info->pid, RTNLGRP_IPV6_ROUTE,
2407 info->nlh, gfp_any());
2408 return;
21713ebc
TG
2409errout:
2410 if (err < 0)
5578689a 2411 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
1da177e4
LT
2412}
2413
8ed67789
DL
2414static int ip6_route_dev_notify(struct notifier_block *this,
2415 unsigned long event, void *data)
2416{
2417 struct net_device *dev = (struct net_device *)data;
c346dca1 2418 struct net *net = dev_net(dev);
8ed67789
DL
2419
2420 if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
2421 net->ipv6.ip6_null_entry->u.dst.dev = dev;
2422 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
2423#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2424 net->ipv6.ip6_prohibit_entry->u.dst.dev = dev;
2425 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
2426 net->ipv6.ip6_blk_hole_entry->u.dst.dev = dev;
2427 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
2428#endif
2429 }
2430
2431 return NOTIFY_OK;
2432}
2433
1da177e4
LT
2434/*
2435 * /proc
2436 */
2437
2438#ifdef CONFIG_PROC_FS
2439
2440#define RT6_INFO_LEN (32 + 4 + 32 + 4 + 32 + 40 + 5 + 1)
2441
2442struct rt6_proc_arg
2443{
2444 char *buffer;
2445 int offset;
2446 int length;
2447 int skip;
2448 int len;
2449};
2450
2451static int rt6_info_route(struct rt6_info *rt, void *p_arg)
2452{
33120b30 2453 struct seq_file *m = p_arg;
1da177e4 2454
4b7a4274 2455 seq_printf(m, "%pi6 %02x ", &rt->rt6i_dst.addr, rt->rt6i_dst.plen);
1da177e4
LT
2456
2457#ifdef CONFIG_IPV6_SUBTREES
4b7a4274 2458 seq_printf(m, "%pi6 %02x ", &rt->rt6i_src.addr, rt->rt6i_src.plen);
1da177e4 2459#else
33120b30 2460 seq_puts(m, "00000000000000000000000000000000 00 ");
1da177e4
LT
2461#endif
2462
2463 if (rt->rt6i_nexthop) {
4b7a4274 2464 seq_printf(m, "%pi6", rt->rt6i_nexthop->primary_key);
1da177e4 2465 } else {
33120b30 2466 seq_puts(m, "00000000000000000000000000000000");
1da177e4 2467 }
33120b30
AD
2468 seq_printf(m, " %08x %08x %08x %08x %8s\n",
2469 rt->rt6i_metric, atomic_read(&rt->u.dst.__refcnt),
2470 rt->u.dst.__use, rt->rt6i_flags,
2471 rt->rt6i_dev ? rt->rt6i_dev->name : "");
1da177e4
LT
2472 return 0;
2473}
2474
33120b30 2475static int ipv6_route_show(struct seq_file *m, void *v)
1da177e4 2476{
f3db4851
DL
2477 struct net *net = (struct net *)m->private;
2478 fib6_clean_all(net, rt6_info_route, 0, m);
33120b30
AD
2479 return 0;
2480}
1da177e4 2481
33120b30
AD
2482static int ipv6_route_open(struct inode *inode, struct file *file)
2483{
de05c557 2484 return single_open_net(inode, file, ipv6_route_show);
f3db4851
DL
2485}
2486
33120b30
AD
2487static const struct file_operations ipv6_route_proc_fops = {
2488 .owner = THIS_MODULE,
2489 .open = ipv6_route_open,
2490 .read = seq_read,
2491 .llseek = seq_lseek,
b6fcbdb4 2492 .release = single_release_net,
33120b30
AD
2493};
2494
1da177e4
LT
2495static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2496{
69ddb805 2497 struct net *net = (struct net *)seq->private;
1da177e4 2498 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
69ddb805
DL
2499 net->ipv6.rt6_stats->fib_nodes,
2500 net->ipv6.rt6_stats->fib_route_nodes,
2501 net->ipv6.rt6_stats->fib_rt_alloc,
2502 net->ipv6.rt6_stats->fib_rt_entries,
2503 net->ipv6.rt6_stats->fib_rt_cache,
86393e52 2504 atomic_read(&net->ipv6.ip6_dst_ops.entries),
69ddb805 2505 net->ipv6.rt6_stats->fib_discarded_routes);
1da177e4
LT
2506
2507 return 0;
2508}
2509
2510static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2511{
de05c557 2512 return single_open_net(inode, file, rt6_stats_seq_show);
69ddb805
DL
2513}
2514
9a32144e 2515static const struct file_operations rt6_stats_seq_fops = {
1da177e4
LT
2516 .owner = THIS_MODULE,
2517 .open = rt6_stats_seq_open,
2518 .read = seq_read,
2519 .llseek = seq_lseek,
b6fcbdb4 2520 .release = single_release_net,
1da177e4
LT
2521};
2522#endif /* CONFIG_PROC_FS */
2523
2524#ifdef CONFIG_SYSCTL
2525
1da177e4 2526static
8d65af78 2527int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write,
1da177e4
LT
2528 void __user *buffer, size_t *lenp, loff_t *ppos)
2529{
5b7c931d
DL
2530 struct net *net = current->nsproxy->net_ns;
2531 int delay = net->ipv6.sysctl.flush_delay;
1da177e4 2532 if (write) {
8d65af78 2533 proc_dointvec(ctl, write, buffer, lenp, ppos);
5b7c931d 2534 fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay, net);
1da177e4
LT
2535 return 0;
2536 } else
2537 return -EINVAL;
2538}
2539
760f2d01 2540ctl_table ipv6_route_table_template[] = {
1ab1457c 2541 {
1da177e4 2542 .procname = "flush",
4990509f 2543 .data = &init_net.ipv6.sysctl.flush_delay,
1da177e4 2544 .maxlen = sizeof(int),
89c8b3a1 2545 .mode = 0200,
6d9f239a 2546 .proc_handler = ipv6_sysctl_rtcache_flush
1da177e4
LT
2547 },
2548 {
2549 .ctl_name = NET_IPV6_ROUTE_GC_THRESH,
2550 .procname = "gc_thresh",
9a7ec3a9 2551 .data = &ip6_dst_ops_template.gc_thresh,
1da177e4
LT
2552 .maxlen = sizeof(int),
2553 .mode = 0644,
6d9f239a 2554 .proc_handler = proc_dointvec,
1da177e4
LT
2555 },
2556 {
2557 .ctl_name = NET_IPV6_ROUTE_MAX_SIZE,
2558 .procname = "max_size",
4990509f 2559 .data = &init_net.ipv6.sysctl.ip6_rt_max_size,
1da177e4
LT
2560 .maxlen = sizeof(int),
2561 .mode = 0644,
6d9f239a 2562 .proc_handler = proc_dointvec,
1da177e4
LT
2563 },
2564 {
2565 .ctl_name = NET_IPV6_ROUTE_GC_MIN_INTERVAL,
2566 .procname = "gc_min_interval",
4990509f 2567 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
1da177e4
LT
2568 .maxlen = sizeof(int),
2569 .mode = 0644,
6d9f239a
AD
2570 .proc_handler = proc_dointvec_jiffies,
2571 .strategy = sysctl_jiffies,
1da177e4
LT
2572 },
2573 {
2574 .ctl_name = NET_IPV6_ROUTE_GC_TIMEOUT,
2575 .procname = "gc_timeout",
4990509f 2576 .data = &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
1da177e4
LT
2577 .maxlen = sizeof(int),
2578 .mode = 0644,
6d9f239a
AD
2579 .proc_handler = proc_dointvec_jiffies,
2580 .strategy = sysctl_jiffies,
1da177e4
LT
2581 },
2582 {
2583 .ctl_name = NET_IPV6_ROUTE_GC_INTERVAL,
2584 .procname = "gc_interval",
4990509f 2585 .data = &init_net.ipv6.sysctl.ip6_rt_gc_interval,
1da177e4
LT
2586 .maxlen = sizeof(int),
2587 .mode = 0644,
6d9f239a
AD
2588 .proc_handler = proc_dointvec_jiffies,
2589 .strategy = sysctl_jiffies,
1da177e4
LT
2590 },
2591 {
2592 .ctl_name = NET_IPV6_ROUTE_GC_ELASTICITY,
2593 .procname = "gc_elasticity",
4990509f 2594 .data = &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
1da177e4
LT
2595 .maxlen = sizeof(int),
2596 .mode = 0644,
6d9f239a
AD
2597 .proc_handler = proc_dointvec_jiffies,
2598 .strategy = sysctl_jiffies,
1da177e4
LT
2599 },
2600 {
2601 .ctl_name = NET_IPV6_ROUTE_MTU_EXPIRES,
2602 .procname = "mtu_expires",
4990509f 2603 .data = &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
1da177e4
LT
2604 .maxlen = sizeof(int),
2605 .mode = 0644,
6d9f239a
AD
2606 .proc_handler = proc_dointvec_jiffies,
2607 .strategy = sysctl_jiffies,
1da177e4
LT
2608 },
2609 {
2610 .ctl_name = NET_IPV6_ROUTE_MIN_ADVMSS,
2611 .procname = "min_adv_mss",
4990509f 2612 .data = &init_net.ipv6.sysctl.ip6_rt_min_advmss,
1da177e4
LT
2613 .maxlen = sizeof(int),
2614 .mode = 0644,
6d9f239a
AD
2615 .proc_handler = proc_dointvec_jiffies,
2616 .strategy = sysctl_jiffies,
1da177e4
LT
2617 },
2618 {
2619 .ctl_name = NET_IPV6_ROUTE_GC_MIN_INTERVAL_MS,
2620 .procname = "gc_min_interval_ms",
4990509f 2621 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
1da177e4
LT
2622 .maxlen = sizeof(int),
2623 .mode = 0644,
6d9f239a
AD
2624 .proc_handler = proc_dointvec_ms_jiffies,
2625 .strategy = sysctl_ms_jiffies,
1da177e4
LT
2626 },
2627 { .ctl_name = 0 }
2628};
2629
760f2d01
DL
2630struct ctl_table *ipv6_route_sysctl_init(struct net *net)
2631{
2632 struct ctl_table *table;
2633
2634 table = kmemdup(ipv6_route_table_template,
2635 sizeof(ipv6_route_table_template),
2636 GFP_KERNEL);
5ee09105
YH
2637
2638 if (table) {
2639 table[0].data = &net->ipv6.sysctl.flush_delay;
86393e52 2640 table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
5ee09105
YH
2641 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
2642 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2643 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
2644 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
2645 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
2646 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
2647 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
2648 }
2649
760f2d01
DL
2650 return table;
2651}
1da177e4
LT
2652#endif
2653
cdb18761
DL
2654static int ip6_route_net_init(struct net *net)
2655{
633d424b 2656 int ret = -ENOMEM;
8ed67789 2657
86393e52
AD
2658 memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
2659 sizeof(net->ipv6.ip6_dst_ops));
f2fc6a54 2660
8ed67789
DL
2661 net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
2662 sizeof(*net->ipv6.ip6_null_entry),
2663 GFP_KERNEL);
2664 if (!net->ipv6.ip6_null_entry)
f2fc6a54 2665 goto out_ip6_dst_ops;
8ed67789
DL
2666 net->ipv6.ip6_null_entry->u.dst.path =
2667 (struct dst_entry *)net->ipv6.ip6_null_entry;
86393e52 2668 net->ipv6.ip6_null_entry->u.dst.ops = &net->ipv6.ip6_dst_ops;
8ed67789
DL
2669
2670#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2671 net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
2672 sizeof(*net->ipv6.ip6_prohibit_entry),
2673 GFP_KERNEL);
68fffc67
PZ
2674 if (!net->ipv6.ip6_prohibit_entry)
2675 goto out_ip6_null_entry;
8ed67789
DL
2676 net->ipv6.ip6_prohibit_entry->u.dst.path =
2677 (struct dst_entry *)net->ipv6.ip6_prohibit_entry;
86393e52 2678 net->ipv6.ip6_prohibit_entry->u.dst.ops = &net->ipv6.ip6_dst_ops;
8ed67789
DL
2679
2680 net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
2681 sizeof(*net->ipv6.ip6_blk_hole_entry),
2682 GFP_KERNEL);
68fffc67
PZ
2683 if (!net->ipv6.ip6_blk_hole_entry)
2684 goto out_ip6_prohibit_entry;
8ed67789
DL
2685 net->ipv6.ip6_blk_hole_entry->u.dst.path =
2686 (struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
86393e52 2687 net->ipv6.ip6_blk_hole_entry->u.dst.ops = &net->ipv6.ip6_dst_ops;
8ed67789
DL
2688#endif
2689
b339a47c
PZ
2690 net->ipv6.sysctl.flush_delay = 0;
2691 net->ipv6.sysctl.ip6_rt_max_size = 4096;
2692 net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
2693 net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
2694 net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
2695 net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
2696 net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
2697 net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
2698
cdb18761
DL
2699#ifdef CONFIG_PROC_FS
2700 proc_net_fops_create(net, "ipv6_route", 0, &ipv6_route_proc_fops);
2701 proc_net_fops_create(net, "rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2702#endif
6891a346
BT
2703 net->ipv6.ip6_rt_gc_expire = 30*HZ;
2704
8ed67789
DL
2705 ret = 0;
2706out:
2707 return ret;
f2fc6a54 2708
68fffc67
PZ
2709#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2710out_ip6_prohibit_entry:
2711 kfree(net->ipv6.ip6_prohibit_entry);
2712out_ip6_null_entry:
2713 kfree(net->ipv6.ip6_null_entry);
2714#endif
f2fc6a54 2715out_ip6_dst_ops:
f2fc6a54 2716 goto out;
cdb18761
DL
2717}
2718
2719static void ip6_route_net_exit(struct net *net)
2720{
2721#ifdef CONFIG_PROC_FS
2722 proc_net_remove(net, "ipv6_route");
2723 proc_net_remove(net, "rt6_stats");
2724#endif
8ed67789
DL
2725 kfree(net->ipv6.ip6_null_entry);
2726#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2727 kfree(net->ipv6.ip6_prohibit_entry);
2728 kfree(net->ipv6.ip6_blk_hole_entry);
2729#endif
cdb18761
DL
2730}
2731
2732static struct pernet_operations ip6_route_net_ops = {
2733 .init = ip6_route_net_init,
2734 .exit = ip6_route_net_exit,
2735};
2736
8ed67789
DL
2737static struct notifier_block ip6_route_dev_notifier = {
2738 .notifier_call = ip6_route_dev_notify,
2739 .priority = 0,
2740};
2741
433d49c3 2742int __init ip6_route_init(void)
1da177e4 2743{
433d49c3
DL
2744 int ret;
2745
9a7ec3a9
DL
2746 ret = -ENOMEM;
2747 ip6_dst_ops_template.kmem_cachep =
e5d679f3 2748 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
f845ab6b 2749 SLAB_HWCACHE_ALIGN, NULL);
9a7ec3a9 2750 if (!ip6_dst_ops_template.kmem_cachep)
c19a28e1 2751 goto out;
14e50e57 2752
8ed67789
DL
2753 ret = register_pernet_subsys(&ip6_route_net_ops);
2754 if (ret)
bdb3289f 2755 goto out_kmem_cache;
bdb3289f 2756
5dc121e9
AE
2757 ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
2758
8ed67789
DL
2759 /* Registering of the loopback is done before this portion of code,
2760 * the loopback reference in rt6_info will not be taken, do it
2761 * manually for init_net */
2762 init_net.ipv6.ip6_null_entry->u.dst.dev = init_net.loopback_dev;
2763 init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2764 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2765 init_net.ipv6.ip6_prohibit_entry->u.dst.dev = init_net.loopback_dev;
2766 init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2767 init_net.ipv6.ip6_blk_hole_entry->u.dst.dev = init_net.loopback_dev;
2768 init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2769 #endif
433d49c3
DL
2770 ret = fib6_init();
2771 if (ret)
8ed67789 2772 goto out_register_subsys;
433d49c3 2773
433d49c3
DL
2774 ret = xfrm6_init();
2775 if (ret)
cdb18761 2776 goto out_fib6_init;
c35b7e72 2777
433d49c3
DL
2778 ret = fib6_rules_init();
2779 if (ret)
2780 goto xfrm6_init;
7e5449c2 2781
433d49c3
DL
2782 ret = -ENOBUFS;
2783 if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL) ||
2784 __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL) ||
2785 __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL))
2786 goto fib6_rules_init;
c127ea2c 2787
8ed67789 2788 ret = register_netdevice_notifier(&ip6_route_dev_notifier);
cdb18761
DL
2789 if (ret)
2790 goto fib6_rules_init;
8ed67789 2791
433d49c3
DL
2792out:
2793 return ret;
2794
2795fib6_rules_init:
433d49c3
DL
2796 fib6_rules_cleanup();
2797xfrm6_init:
433d49c3 2798 xfrm6_fini();
433d49c3 2799out_fib6_init:
433d49c3 2800 fib6_gc_cleanup();
8ed67789
DL
2801out_register_subsys:
2802 unregister_pernet_subsys(&ip6_route_net_ops);
433d49c3 2803out_kmem_cache:
f2fc6a54 2804 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
433d49c3 2805 goto out;
1da177e4
LT
2806}
2807
2808void ip6_route_cleanup(void)
2809{
8ed67789 2810 unregister_netdevice_notifier(&ip6_route_dev_notifier);
101367c2 2811 fib6_rules_cleanup();
1da177e4 2812 xfrm6_fini();
1da177e4 2813 fib6_gc_cleanup();
8ed67789 2814 unregister_pernet_subsys(&ip6_route_net_ops);
f2fc6a54 2815 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
1da177e4 2816}