]> bbs.cooldavid.org Git - net-next-2.6.git/blame - net/ipv6/route.c
romfs: romfs_iget() - unsigned ino >= 0 is always true
[net-next-2.6.git] / net / ipv6 / route.c
CommitLineData
1da177e4
LT
1/*
2 * Linux INET6 implementation
3 * FIB front-end.
4 *
5 * Authors:
1ab1457c 6 * Pedro Roque <roque@di.fc.ul.pt>
1da177e4 7 *
1da177e4
LT
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
12 */
13
14/* Changes:
15 *
16 * YOSHIFUJI Hideaki @USAGI
17 * reworked default router selection.
18 * - respect outgoing interface
19 * - select from (probably) reachable routers (i.e.
20 * routers in REACHABLE, STALE, DELAY or PROBE states).
21 * - always select the same router if it is (probably)
22 * reachable. otherwise, round-robin the list.
c0bece9f
YH
23 * Ville Nuorvala
24 * Fixed routing subtrees.
1da177e4
LT
25 */
26
4fc268d2 27#include <linux/capability.h>
1da177e4
LT
28#include <linux/errno.h>
29#include <linux/types.h>
30#include <linux/times.h>
31#include <linux/socket.h>
32#include <linux/sockios.h>
33#include <linux/net.h>
34#include <linux/route.h>
35#include <linux/netdevice.h>
36#include <linux/in6.h>
7bc570c8 37#include <linux/mroute6.h>
1da177e4 38#include <linux/init.h>
1da177e4 39#include <linux/if_arp.h>
1da177e4
LT
40#include <linux/proc_fs.h>
41#include <linux/seq_file.h>
5b7c931d 42#include <linux/nsproxy.h>
457c4cbc 43#include <net/net_namespace.h>
1da177e4
LT
44#include <net/snmp.h>
45#include <net/ipv6.h>
46#include <net/ip6_fib.h>
47#include <net/ip6_route.h>
48#include <net/ndisc.h>
49#include <net/addrconf.h>
50#include <net/tcp.h>
51#include <linux/rtnetlink.h>
52#include <net/dst.h>
53#include <net/xfrm.h>
8d71740c 54#include <net/netevent.h>
21713ebc 55#include <net/netlink.h>
1da177e4
LT
56
57#include <asm/uaccess.h>
58
59#ifdef CONFIG_SYSCTL
60#include <linux/sysctl.h>
61#endif
62
63/* Set to 3 to get tracing. */
64#define RT6_DEBUG 2
65
66#if RT6_DEBUG >= 3
67#define RDBG(x) printk x
68#define RT6_TRACE(x...) printk(KERN_DEBUG x)
69#else
70#define RDBG(x)
71#define RT6_TRACE(x...) do { ; } while (0)
72#endif
73
519fbd87 74#define CLONE_OFFLINK_ROUTE 0
1da177e4 75
1da177e4
LT
76static struct rt6_info * ip6_rt_copy(struct rt6_info *ort);
77static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
78static struct dst_entry *ip6_negative_advice(struct dst_entry *);
79static void ip6_dst_destroy(struct dst_entry *);
80static void ip6_dst_ifdown(struct dst_entry *,
81 struct net_device *dev, int how);
569d3645 82static int ip6_dst_gc(struct dst_ops *ops);
1da177e4
LT
83
84static int ip6_pkt_discard(struct sk_buff *skb);
85static int ip6_pkt_discard_out(struct sk_buff *skb);
86static void ip6_link_failure(struct sk_buff *skb);
87static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
88
70ceb4f5 89#ifdef CONFIG_IPV6_ROUTE_INFO
efa2cea0
DL
90static struct rt6_info *rt6_add_route_info(struct net *net,
91 struct in6_addr *prefix, int prefixlen,
70ceb4f5
YH
92 struct in6_addr *gwaddr, int ifindex,
93 unsigned pref);
efa2cea0
DL
94static struct rt6_info *rt6_get_route_info(struct net *net,
95 struct in6_addr *prefix, int prefixlen,
70ceb4f5
YH
96 struct in6_addr *gwaddr, int ifindex);
97#endif
98
9a7ec3a9 99static struct dst_ops ip6_dst_ops_template = {
1da177e4
LT
100 .family = AF_INET6,
101 .protocol = __constant_htons(ETH_P_IPV6),
102 .gc = ip6_dst_gc,
103 .gc_thresh = 1024,
104 .check = ip6_dst_check,
105 .destroy = ip6_dst_destroy,
106 .ifdown = ip6_dst_ifdown,
107 .negative_advice = ip6_negative_advice,
108 .link_failure = ip6_link_failure,
109 .update_pmtu = ip6_rt_update_pmtu,
1ac06e03 110 .local_out = __ip6_local_out,
e2422970 111 .entries = ATOMIC_INIT(0),
1da177e4
LT
112};
113
14e50e57
DM
114static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu)
115{
116}
117
118static struct dst_ops ip6_dst_blackhole_ops = {
119 .family = AF_INET6,
120 .protocol = __constant_htons(ETH_P_IPV6),
121 .destroy = ip6_dst_destroy,
122 .check = ip6_dst_check,
123 .update_pmtu = ip6_rt_blackhole_update_pmtu,
e2422970 124 .entries = ATOMIC_INIT(0),
14e50e57
DM
125};
126
bdb3289f 127static struct rt6_info ip6_null_entry_template = {
1da177e4
LT
128 .u = {
129 .dst = {
130 .__refcnt = ATOMIC_INIT(1),
131 .__use = 1,
1da177e4
LT
132 .obsolete = -1,
133 .error = -ENETUNREACH,
134 .metrics = { [RTAX_HOPLIMIT - 1] = 255, },
135 .input = ip6_pkt_discard,
136 .output = ip6_pkt_discard_out,
1da177e4
LT
137 }
138 },
139 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
140 .rt6i_metric = ~(u32) 0,
141 .rt6i_ref = ATOMIC_INIT(1),
142};
143
101367c2
TG
144#ifdef CONFIG_IPV6_MULTIPLE_TABLES
145
6723ab54
DM
146static int ip6_pkt_prohibit(struct sk_buff *skb);
147static int ip6_pkt_prohibit_out(struct sk_buff *skb);
6723ab54 148
280a34c8 149static struct rt6_info ip6_prohibit_entry_template = {
101367c2
TG
150 .u = {
151 .dst = {
152 .__refcnt = ATOMIC_INIT(1),
153 .__use = 1,
101367c2
TG
154 .obsolete = -1,
155 .error = -EACCES,
156 .metrics = { [RTAX_HOPLIMIT - 1] = 255, },
9ce8ade0
TG
157 .input = ip6_pkt_prohibit,
158 .output = ip6_pkt_prohibit_out,
101367c2
TG
159 }
160 },
161 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
162 .rt6i_metric = ~(u32) 0,
163 .rt6i_ref = ATOMIC_INIT(1),
164};
165
bdb3289f 166static struct rt6_info ip6_blk_hole_entry_template = {
101367c2
TG
167 .u = {
168 .dst = {
169 .__refcnt = ATOMIC_INIT(1),
170 .__use = 1,
101367c2
TG
171 .obsolete = -1,
172 .error = -EINVAL,
173 .metrics = { [RTAX_HOPLIMIT - 1] = 255, },
352e512c
HX
174 .input = dst_discard,
175 .output = dst_discard,
101367c2
TG
176 }
177 },
178 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
179 .rt6i_metric = ~(u32) 0,
180 .rt6i_ref = ATOMIC_INIT(1),
181};
182
183#endif
184
1da177e4 185/* allocate dst with ip6_dst_ops */
f2fc6a54 186static inline struct rt6_info *ip6_dst_alloc(struct dst_ops *ops)
1da177e4 187{
f2fc6a54 188 return (struct rt6_info *)dst_alloc(ops);
1da177e4
LT
189}
190
191static void ip6_dst_destroy(struct dst_entry *dst)
192{
193 struct rt6_info *rt = (struct rt6_info *)dst;
194 struct inet6_dev *idev = rt->rt6i_idev;
195
196 if (idev != NULL) {
197 rt->rt6i_idev = NULL;
198 in6_dev_put(idev);
1ab1457c 199 }
1da177e4
LT
200}
201
202static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
203 int how)
204{
205 struct rt6_info *rt = (struct rt6_info *)dst;
206 struct inet6_dev *idev = rt->rt6i_idev;
5a3e55d6 207 struct net_device *loopback_dev =
c346dca1 208 dev_net(dev)->loopback_dev;
1da177e4 209
5a3e55d6
DL
210 if (dev != loopback_dev && idev != NULL && idev->dev == dev) {
211 struct inet6_dev *loopback_idev =
212 in6_dev_get(loopback_dev);
1da177e4
LT
213 if (loopback_idev != NULL) {
214 rt->rt6i_idev = loopback_idev;
215 in6_dev_put(idev);
216 }
217 }
218}
219
220static __inline__ int rt6_check_expired(const struct rt6_info *rt)
221{
222 return (rt->rt6i_flags & RTF_EXPIRES &&
223 time_after(jiffies, rt->rt6i_expires));
224}
225
c71099ac
TG
226static inline int rt6_need_strict(struct in6_addr *daddr)
227{
228 return (ipv6_addr_type(daddr) &
5ce83afa 229 (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK));
c71099ac
TG
230}
231
1da177e4 232/*
c71099ac 233 * Route lookup. Any table->tb6_lock is implied.
1da177e4
LT
234 */
235
8ed67789
DL
236static inline struct rt6_info *rt6_device_match(struct net *net,
237 struct rt6_info *rt,
dd3abc4e 238 struct in6_addr *saddr,
1da177e4 239 int oif,
d420895e 240 int flags)
1da177e4
LT
241{
242 struct rt6_info *local = NULL;
243 struct rt6_info *sprt;
244
dd3abc4e
YH
245 if (!oif && ipv6_addr_any(saddr))
246 goto out;
247
248 for (sprt = rt; sprt; sprt = sprt->u.dst.rt6_next) {
249 struct net_device *dev = sprt->rt6i_dev;
250
251 if (oif) {
1da177e4
LT
252 if (dev->ifindex == oif)
253 return sprt;
254 if (dev->flags & IFF_LOOPBACK) {
255 if (sprt->rt6i_idev == NULL ||
256 sprt->rt6i_idev->dev->ifindex != oif) {
d420895e 257 if (flags & RT6_LOOKUP_F_IFACE && oif)
1da177e4 258 continue;
1ab1457c 259 if (local && (!oif ||
1da177e4
LT
260 local->rt6i_idev->dev->ifindex == oif))
261 continue;
262 }
263 local = sprt;
264 }
dd3abc4e
YH
265 } else {
266 if (ipv6_chk_addr(net, saddr, dev,
267 flags & RT6_LOOKUP_F_IFACE))
268 return sprt;
1da177e4 269 }
dd3abc4e 270 }
1da177e4 271
dd3abc4e 272 if (oif) {
1da177e4
LT
273 if (local)
274 return local;
275
d420895e 276 if (flags & RT6_LOOKUP_F_IFACE)
8ed67789 277 return net->ipv6.ip6_null_entry;
1da177e4 278 }
dd3abc4e 279out:
1da177e4
LT
280 return rt;
281}
282
27097255
YH
283#ifdef CONFIG_IPV6_ROUTER_PREF
284static void rt6_probe(struct rt6_info *rt)
285{
286 struct neighbour *neigh = rt ? rt->rt6i_nexthop : NULL;
287 /*
288 * Okay, this does not seem to be appropriate
289 * for now, however, we need to check if it
290 * is really so; aka Router Reachability Probing.
291 *
292 * Router Reachability Probe MUST be rate-limited
293 * to no more than one per minute.
294 */
295 if (!neigh || (neigh->nud_state & NUD_VALID))
296 return;
297 read_lock_bh(&neigh->lock);
298 if (!(neigh->nud_state & NUD_VALID) &&
52e16356 299 time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
27097255
YH
300 struct in6_addr mcaddr;
301 struct in6_addr *target;
302
303 neigh->updated = jiffies;
304 read_unlock_bh(&neigh->lock);
305
306 target = (struct in6_addr *)&neigh->primary_key;
307 addrconf_addr_solict_mult(target, &mcaddr);
308 ndisc_send_ns(rt->rt6i_dev, NULL, target, &mcaddr, NULL);
309 } else
310 read_unlock_bh(&neigh->lock);
311}
312#else
313static inline void rt6_probe(struct rt6_info *rt)
314{
315 return;
316}
317#endif
318
1da177e4 319/*
554cfb7e 320 * Default Router Selection (RFC 2461 6.3.6)
1da177e4 321 */
b6f99a21 322static inline int rt6_check_dev(struct rt6_info *rt, int oif)
554cfb7e
YH
323{
324 struct net_device *dev = rt->rt6i_dev;
161980f4 325 if (!oif || dev->ifindex == oif)
554cfb7e 326 return 2;
161980f4
DM
327 if ((dev->flags & IFF_LOOPBACK) &&
328 rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
329 return 1;
330 return 0;
554cfb7e 331}
1da177e4 332
b6f99a21 333static inline int rt6_check_neigh(struct rt6_info *rt)
1da177e4 334{
554cfb7e 335 struct neighbour *neigh = rt->rt6i_nexthop;
398bcbeb 336 int m;
4d0c5911
YH
337 if (rt->rt6i_flags & RTF_NONEXTHOP ||
338 !(rt->rt6i_flags & RTF_GATEWAY))
339 m = 1;
340 else if (neigh) {
554cfb7e
YH
341 read_lock_bh(&neigh->lock);
342 if (neigh->nud_state & NUD_VALID)
4d0c5911 343 m = 2;
398bcbeb
YH
344#ifdef CONFIG_IPV6_ROUTER_PREF
345 else if (neigh->nud_state & NUD_FAILED)
346 m = 0;
347#endif
348 else
ea73ee23 349 m = 1;
554cfb7e 350 read_unlock_bh(&neigh->lock);
398bcbeb
YH
351 } else
352 m = 0;
554cfb7e 353 return m;
1da177e4
LT
354}
355
554cfb7e
YH
356static int rt6_score_route(struct rt6_info *rt, int oif,
357 int strict)
1da177e4 358{
4d0c5911 359 int m, n;
1ab1457c 360
4d0c5911 361 m = rt6_check_dev(rt, oif);
77d16f45 362 if (!m && (strict & RT6_LOOKUP_F_IFACE))
554cfb7e 363 return -1;
ebacaaa0
YH
364#ifdef CONFIG_IPV6_ROUTER_PREF
365 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
366#endif
4d0c5911 367 n = rt6_check_neigh(rt);
557e92ef 368 if (!n && (strict & RT6_LOOKUP_F_REACHABLE))
554cfb7e
YH
369 return -1;
370 return m;
371}
372
f11e6659
DM
373static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
374 int *mpri, struct rt6_info *match)
554cfb7e 375{
f11e6659
DM
376 int m;
377
378 if (rt6_check_expired(rt))
379 goto out;
380
381 m = rt6_score_route(rt, oif, strict);
382 if (m < 0)
383 goto out;
384
385 if (m > *mpri) {
386 if (strict & RT6_LOOKUP_F_REACHABLE)
387 rt6_probe(match);
388 *mpri = m;
389 match = rt;
390 } else if (strict & RT6_LOOKUP_F_REACHABLE) {
391 rt6_probe(rt);
392 }
393
394out:
395 return match;
396}
397
398static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
399 struct rt6_info *rr_head,
400 u32 metric, int oif, int strict)
401{
402 struct rt6_info *rt, *match;
554cfb7e 403 int mpri = -1;
1da177e4 404
f11e6659
DM
405 match = NULL;
406 for (rt = rr_head; rt && rt->rt6i_metric == metric;
407 rt = rt->u.dst.rt6_next)
408 match = find_match(rt, oif, strict, &mpri, match);
409 for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric;
410 rt = rt->u.dst.rt6_next)
411 match = find_match(rt, oif, strict, &mpri, match);
1da177e4 412
f11e6659
DM
413 return match;
414}
1da177e4 415
f11e6659
DM
416static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
417{
418 struct rt6_info *match, *rt0;
8ed67789 419 struct net *net;
1da177e4 420
f11e6659 421 RT6_TRACE("%s(fn->leaf=%p, oif=%d)\n",
0dc47877 422 __func__, fn->leaf, oif);
554cfb7e 423
f11e6659
DM
424 rt0 = fn->rr_ptr;
425 if (!rt0)
426 fn->rr_ptr = rt0 = fn->leaf;
1da177e4 427
f11e6659 428 match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict);
1da177e4 429
554cfb7e 430 if (!match &&
f11e6659
DM
431 (strict & RT6_LOOKUP_F_REACHABLE)) {
432 struct rt6_info *next = rt0->u.dst.rt6_next;
433
554cfb7e 434 /* no entries matched; do round-robin */
f11e6659
DM
435 if (!next || next->rt6i_metric != rt0->rt6i_metric)
436 next = fn->leaf;
437
438 if (next != rt0)
439 fn->rr_ptr = next;
1da177e4 440 }
1da177e4 441
f11e6659 442 RT6_TRACE("%s() => %p\n",
0dc47877 443 __func__, match);
1da177e4 444
c346dca1 445 net = dev_net(rt0->rt6i_dev);
8ed67789 446 return (match ? match : net->ipv6.ip6_null_entry);
1da177e4
LT
447}
448
70ceb4f5
YH
449#ifdef CONFIG_IPV6_ROUTE_INFO
450int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
451 struct in6_addr *gwaddr)
452{
c346dca1 453 struct net *net = dev_net(dev);
70ceb4f5
YH
454 struct route_info *rinfo = (struct route_info *) opt;
455 struct in6_addr prefix_buf, *prefix;
456 unsigned int pref;
4bed72e4 457 unsigned long lifetime;
70ceb4f5
YH
458 struct rt6_info *rt;
459
460 if (len < sizeof(struct route_info)) {
461 return -EINVAL;
462 }
463
464 /* Sanity check for prefix_len and length */
465 if (rinfo->length > 3) {
466 return -EINVAL;
467 } else if (rinfo->prefix_len > 128) {
468 return -EINVAL;
469 } else if (rinfo->prefix_len > 64) {
470 if (rinfo->length < 2) {
471 return -EINVAL;
472 }
473 } else if (rinfo->prefix_len > 0) {
474 if (rinfo->length < 1) {
475 return -EINVAL;
476 }
477 }
478
479 pref = rinfo->route_pref;
480 if (pref == ICMPV6_ROUTER_PREF_INVALID)
481 pref = ICMPV6_ROUTER_PREF_MEDIUM;
482
4bed72e4 483 lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
70ceb4f5
YH
484
485 if (rinfo->length == 3)
486 prefix = (struct in6_addr *)rinfo->prefix;
487 else {
488 /* this function is safe */
489 ipv6_addr_prefix(&prefix_buf,
490 (struct in6_addr *)rinfo->prefix,
491 rinfo->prefix_len);
492 prefix = &prefix_buf;
493 }
494
efa2cea0
DL
495 rt = rt6_get_route_info(net, prefix, rinfo->prefix_len, gwaddr,
496 dev->ifindex);
70ceb4f5
YH
497
498 if (rt && !lifetime) {
e0a1ad73 499 ip6_del_rt(rt);
70ceb4f5
YH
500 rt = NULL;
501 }
502
503 if (!rt && lifetime)
efa2cea0 504 rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
70ceb4f5
YH
505 pref);
506 else if (rt)
507 rt->rt6i_flags = RTF_ROUTEINFO |
508 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
509
510 if (rt) {
4bed72e4 511 if (!addrconf_finite_timeout(lifetime)) {
70ceb4f5
YH
512 rt->rt6i_flags &= ~RTF_EXPIRES;
513 } else {
514 rt->rt6i_expires = jiffies + HZ * lifetime;
515 rt->rt6i_flags |= RTF_EXPIRES;
516 }
517 dst_release(&rt->u.dst);
518 }
519 return 0;
520}
521#endif
522
8ed67789 523#define BACKTRACK(__net, saddr) \
982f56f3 524do { \
8ed67789 525 if (rt == __net->ipv6.ip6_null_entry) { \
982f56f3 526 struct fib6_node *pn; \
e0eda7bb 527 while (1) { \
982f56f3
YH
528 if (fn->fn_flags & RTN_TL_ROOT) \
529 goto out; \
530 pn = fn->parent; \
531 if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
8bce65b9 532 fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \
982f56f3
YH
533 else \
534 fn = pn; \
535 if (fn->fn_flags & RTN_RTINFO) \
536 goto restart; \
c71099ac 537 } \
c71099ac 538 } \
982f56f3 539} while(0)
c71099ac 540
8ed67789
DL
541static struct rt6_info *ip6_pol_route_lookup(struct net *net,
542 struct fib6_table *table,
c71099ac 543 struct flowi *fl, int flags)
1da177e4
LT
544{
545 struct fib6_node *fn;
546 struct rt6_info *rt;
547
c71099ac
TG
548 read_lock_bh(&table->tb6_lock);
549 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
550restart:
551 rt = fn->leaf;
dd3abc4e 552 rt = rt6_device_match(net, rt, &fl->fl6_src, fl->oif, flags);
8ed67789 553 BACKTRACK(net, &fl->fl6_src);
c71099ac 554out:
03f49f34 555 dst_use(&rt->u.dst, jiffies);
c71099ac 556 read_unlock_bh(&table->tb6_lock);
c71099ac
TG
557 return rt;
558
559}
560
9acd9f3a
YH
561struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
562 const struct in6_addr *saddr, int oif, int strict)
c71099ac
TG
563{
564 struct flowi fl = {
565 .oif = oif,
566 .nl_u = {
567 .ip6_u = {
568 .daddr = *daddr,
c71099ac
TG
569 },
570 },
571 };
572 struct dst_entry *dst;
77d16f45 573 int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
c71099ac 574
adaa70bb
TG
575 if (saddr) {
576 memcpy(&fl.fl6_src, saddr, sizeof(*saddr));
577 flags |= RT6_LOOKUP_F_HAS_SADDR;
578 }
579
606a2b48 580 dst = fib6_rule_lookup(net, &fl, flags, ip6_pol_route_lookup);
c71099ac
TG
581 if (dst->error == 0)
582 return (struct rt6_info *) dst;
583
584 dst_release(dst);
585
1da177e4
LT
586 return NULL;
587}
588
7159039a
YH
589EXPORT_SYMBOL(rt6_lookup);
590
c71099ac 591/* ip6_ins_rt is called with FREE table->tb6_lock.
1da177e4
LT
592 It takes new route entry, the addition fails by any reason the
593 route is freed. In any case, if caller does not hold it, it may
594 be destroyed.
595 */
596
86872cb5 597static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
1da177e4
LT
598{
599 int err;
c71099ac 600 struct fib6_table *table;
1da177e4 601
c71099ac
TG
602 table = rt->rt6i_table;
603 write_lock_bh(&table->tb6_lock);
86872cb5 604 err = fib6_add(&table->tb6_root, rt, info);
c71099ac 605 write_unlock_bh(&table->tb6_lock);
1da177e4
LT
606
607 return err;
608}
609
40e22e8f
TG
610int ip6_ins_rt(struct rt6_info *rt)
611{
4d1169c1 612 struct nl_info info = {
c346dca1 613 .nl_net = dev_net(rt->rt6i_dev),
4d1169c1 614 };
528c4ceb 615 return __ip6_ins_rt(rt, &info);
40e22e8f
TG
616}
617
95a9a5ba
YH
618static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, struct in6_addr *daddr,
619 struct in6_addr *saddr)
1da177e4 620{
1da177e4
LT
621 struct rt6_info *rt;
622
623 /*
624 * Clone the route.
625 */
626
627 rt = ip6_rt_copy(ort);
628
629 if (rt) {
14deae41
DM
630 struct neighbour *neigh;
631 int attempts = !in_softirq();
632
58c4fb86
YH
633 if (!(rt->rt6i_flags&RTF_GATEWAY)) {
634 if (rt->rt6i_dst.plen != 128 &&
635 ipv6_addr_equal(&rt->rt6i_dst.addr, daddr))
636 rt->rt6i_flags |= RTF_ANYCAST;
1da177e4 637 ipv6_addr_copy(&rt->rt6i_gateway, daddr);
58c4fb86 638 }
1da177e4 639
58c4fb86 640 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
1da177e4
LT
641 rt->rt6i_dst.plen = 128;
642 rt->rt6i_flags |= RTF_CACHE;
643 rt->u.dst.flags |= DST_HOST;
644
645#ifdef CONFIG_IPV6_SUBTREES
646 if (rt->rt6i_src.plen && saddr) {
647 ipv6_addr_copy(&rt->rt6i_src.addr, saddr);
648 rt->rt6i_src.plen = 128;
649 }
650#endif
651
14deae41
DM
652 retry:
653 neigh = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
654 if (IS_ERR(neigh)) {
655 struct net *net = dev_net(rt->rt6i_dev);
656 int saved_rt_min_interval =
657 net->ipv6.sysctl.ip6_rt_gc_min_interval;
658 int saved_rt_elasticity =
659 net->ipv6.sysctl.ip6_rt_gc_elasticity;
660
661 if (attempts-- > 0) {
662 net->ipv6.sysctl.ip6_rt_gc_elasticity = 1;
663 net->ipv6.sysctl.ip6_rt_gc_min_interval = 0;
664
665 ip6_dst_gc(net->ipv6.ip6_dst_ops);
666
667 net->ipv6.sysctl.ip6_rt_gc_elasticity =
668 saved_rt_elasticity;
669 net->ipv6.sysctl.ip6_rt_gc_min_interval =
670 saved_rt_min_interval;
671 goto retry;
672 }
673
674 if (net_ratelimit())
675 printk(KERN_WARNING
676 "Neighbour table overflow.\n");
677 dst_free(&rt->u.dst);
678 return NULL;
679 }
680 rt->rt6i_nexthop = neigh;
1da177e4 681
95a9a5ba 682 }
1da177e4 683
95a9a5ba
YH
684 return rt;
685}
1da177e4 686
299d9939
YH
687static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort, struct in6_addr *daddr)
688{
689 struct rt6_info *rt = ip6_rt_copy(ort);
690 if (rt) {
691 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
692 rt->rt6i_dst.plen = 128;
693 rt->rt6i_flags |= RTF_CACHE;
299d9939
YH
694 rt->u.dst.flags |= DST_HOST;
695 rt->rt6i_nexthop = neigh_clone(ort->rt6i_nexthop);
696 }
697 return rt;
698}
699
8ed67789
DL
700static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
701 struct flowi *fl, int flags)
1da177e4
LT
702{
703 struct fib6_node *fn;
519fbd87 704 struct rt6_info *rt, *nrt;
c71099ac 705 int strict = 0;
1da177e4 706 int attempts = 3;
519fbd87 707 int err;
53b7997f 708 int reachable = net->ipv6.devconf_all->forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
1da177e4 709
77d16f45 710 strict |= flags & RT6_LOOKUP_F_IFACE;
1da177e4
LT
711
712relookup:
c71099ac 713 read_lock_bh(&table->tb6_lock);
1da177e4 714
8238dd06 715restart_2:
c71099ac 716 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
1da177e4
LT
717
718restart:
4acad72d 719 rt = rt6_select(fn, oif, strict | reachable);
8ed67789
DL
720
721 BACKTRACK(net, &fl->fl6_src);
722 if (rt == net->ipv6.ip6_null_entry ||
8238dd06 723 rt->rt6i_flags & RTF_CACHE)
1ddef044 724 goto out;
1da177e4 725
fb9de91e 726 dst_hold(&rt->u.dst);
c71099ac 727 read_unlock_bh(&table->tb6_lock);
fb9de91e 728
519fbd87 729 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
c71099ac 730 nrt = rt6_alloc_cow(rt, &fl->fl6_dst, &fl->fl6_src);
519fbd87
YH
731 else {
732#if CLONE_OFFLINK_ROUTE
c71099ac 733 nrt = rt6_alloc_clone(rt, &fl->fl6_dst);
519fbd87
YH
734#else
735 goto out2;
736#endif
737 }
e40cf353 738
519fbd87 739 dst_release(&rt->u.dst);
8ed67789 740 rt = nrt ? : net->ipv6.ip6_null_entry;
1da177e4 741
519fbd87
YH
742 dst_hold(&rt->u.dst);
743 if (nrt) {
40e22e8f 744 err = ip6_ins_rt(nrt);
519fbd87 745 if (!err)
1da177e4 746 goto out2;
1da177e4 747 }
1da177e4 748
519fbd87
YH
749 if (--attempts <= 0)
750 goto out2;
751
752 /*
c71099ac 753 * Race condition! In the gap, when table->tb6_lock was
519fbd87
YH
754 * released someone could insert this route. Relookup.
755 */
756 dst_release(&rt->u.dst);
757 goto relookup;
758
759out:
8238dd06
YH
760 if (reachable) {
761 reachable = 0;
762 goto restart_2;
763 }
519fbd87 764 dst_hold(&rt->u.dst);
c71099ac 765 read_unlock_bh(&table->tb6_lock);
1da177e4
LT
766out2:
767 rt->u.dst.lastuse = jiffies;
768 rt->u.dst.__use++;
c71099ac
TG
769
770 return rt;
1da177e4
LT
771}
772
8ed67789 773static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
4acad72d
PE
774 struct flowi *fl, int flags)
775{
8ed67789 776 return ip6_pol_route(net, table, fl->iif, fl, flags);
4acad72d
PE
777}
778
c71099ac
TG
779void ip6_route_input(struct sk_buff *skb)
780{
0660e03f 781 struct ipv6hdr *iph = ipv6_hdr(skb);
c346dca1 782 struct net *net = dev_net(skb->dev);
adaa70bb 783 int flags = RT6_LOOKUP_F_HAS_SADDR;
c71099ac
TG
784 struct flowi fl = {
785 .iif = skb->dev->ifindex,
786 .nl_u = {
787 .ip6_u = {
788 .daddr = iph->daddr,
789 .saddr = iph->saddr,
90bcaf7b 790 .flowlabel = (* (__be32 *) iph)&IPV6_FLOWINFO_MASK,
c71099ac
TG
791 },
792 },
1ab1457c 793 .mark = skb->mark,
c71099ac
TG
794 .proto = iph->nexthdr,
795 };
adaa70bb
TG
796
797 if (rt6_need_strict(&iph->daddr))
798 flags |= RT6_LOOKUP_F_IFACE;
c71099ac 799
5578689a 800 skb->dst = fib6_rule_lookup(net, &fl, flags, ip6_pol_route_input);
c71099ac
TG
801}
802
8ed67789 803static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
c71099ac 804 struct flowi *fl, int flags)
1da177e4 805{
8ed67789 806 return ip6_pol_route(net, table, fl->oif, fl, flags);
c71099ac
TG
807}
808
4591db4f
DL
809struct dst_entry * ip6_route_output(struct net *net, struct sock *sk,
810 struct flowi *fl)
c71099ac
TG
811{
812 int flags = 0;
813
814 if (rt6_need_strict(&fl->fl6_dst))
77d16f45 815 flags |= RT6_LOOKUP_F_IFACE;
c71099ac 816
adaa70bb
TG
817 if (!ipv6_addr_any(&fl->fl6_src))
818 flags |= RT6_LOOKUP_F_HAS_SADDR;
7cbca67c
YH
819 else if (sk) {
820 unsigned int prefs = inet6_sk(sk)->srcprefs;
821 if (prefs & IPV6_PREFER_SRC_TMP)
822 flags |= RT6_LOOKUP_F_SRCPREF_TMP;
823 if (prefs & IPV6_PREFER_SRC_PUBLIC)
824 flags |= RT6_LOOKUP_F_SRCPREF_PUBLIC;
825 if (prefs & IPV6_PREFER_SRC_COA)
826 flags |= RT6_LOOKUP_F_SRCPREF_COA;
827 }
adaa70bb 828
4591db4f 829 return fib6_rule_lookup(net, fl, flags, ip6_pol_route_output);
1da177e4
LT
830}
831
7159039a 832EXPORT_SYMBOL(ip6_route_output);
1da177e4 833
14e50e57
DM
834int ip6_dst_blackhole(struct sock *sk, struct dst_entry **dstp, struct flowi *fl)
835{
836 struct rt6_info *ort = (struct rt6_info *) *dstp;
837 struct rt6_info *rt = (struct rt6_info *)
838 dst_alloc(&ip6_dst_blackhole_ops);
839 struct dst_entry *new = NULL;
840
841 if (rt) {
842 new = &rt->u.dst;
843
844 atomic_set(&new->__refcnt, 1);
845 new->__use = 1;
352e512c
HX
846 new->input = dst_discard;
847 new->output = dst_discard;
14e50e57
DM
848
849 memcpy(new->metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32));
850 new->dev = ort->u.dst.dev;
851 if (new->dev)
852 dev_hold(new->dev);
853 rt->rt6i_idev = ort->rt6i_idev;
854 if (rt->rt6i_idev)
855 in6_dev_hold(rt->rt6i_idev);
856 rt->rt6i_expires = 0;
857
858 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
859 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
860 rt->rt6i_metric = 0;
861
862 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
863#ifdef CONFIG_IPV6_SUBTREES
864 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
865#endif
866
867 dst_free(new);
868 }
869
870 dst_release(*dstp);
871 *dstp = new;
872 return (new ? 0 : -ENOMEM);
873}
874EXPORT_SYMBOL_GPL(ip6_dst_blackhole);
875
1da177e4
LT
876/*
877 * Destination cache support functions
878 */
879
880static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
881{
882 struct rt6_info *rt;
883
884 rt = (struct rt6_info *) dst;
885
886 if (rt && rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie))
887 return dst;
888
889 return NULL;
890}
891
892static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
893{
894 struct rt6_info *rt = (struct rt6_info *) dst;
895
896 if (rt) {
897 if (rt->rt6i_flags & RTF_CACHE)
e0a1ad73 898 ip6_del_rt(rt);
1da177e4
LT
899 else
900 dst_release(dst);
901 }
902 return NULL;
903}
904
905static void ip6_link_failure(struct sk_buff *skb)
906{
907 struct rt6_info *rt;
908
909 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0, skb->dev);
910
911 rt = (struct rt6_info *) skb->dst;
912 if (rt) {
913 if (rt->rt6i_flags&RTF_CACHE) {
914 dst_set_expires(&rt->u.dst, 0);
915 rt->rt6i_flags |= RTF_EXPIRES;
916 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
917 rt->rt6i_node->fn_sernum = -1;
918 }
919}
920
921static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
922{
923 struct rt6_info *rt6 = (struct rt6_info*)dst;
924
925 if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
926 rt6->rt6i_flags |= RTF_MODIFIED;
927 if (mtu < IPV6_MIN_MTU) {
928 mtu = IPV6_MIN_MTU;
929 dst->metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
930 }
931 dst->metrics[RTAX_MTU-1] = mtu;
8d71740c 932 call_netevent_notifiers(NETEVENT_PMTU_UPDATE, dst);
1da177e4
LT
933 }
934}
935
1da177e4
LT
936static int ipv6_get_mtu(struct net_device *dev);
937
5578689a 938static inline unsigned int ipv6_advmss(struct net *net, unsigned int mtu)
1da177e4
LT
939{
940 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
941
5578689a
DL
942 if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
943 mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
1da177e4
LT
944
945 /*
1ab1457c
YH
946 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
947 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
948 * IPV6_MAXPLEN is also valid and means: "any MSS,
1da177e4
LT
949 * rely only on pmtu discovery"
950 */
951 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
952 mtu = IPV6_MAXPLEN;
953 return mtu;
954}
955
3b00944c
YH
956static struct dst_entry *icmp6_dst_gc_list;
957static DEFINE_SPINLOCK(icmp6_dst_lock);
5d0bbeeb 958
3b00944c 959struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
1da177e4 960 struct neighbour *neigh,
9acd9f3a 961 const struct in6_addr *addr)
1da177e4
LT
962{
963 struct rt6_info *rt;
964 struct inet6_dev *idev = in6_dev_get(dev);
c346dca1 965 struct net *net = dev_net(dev);
1da177e4
LT
966
967 if (unlikely(idev == NULL))
968 return NULL;
969
f2fc6a54 970 rt = ip6_dst_alloc(net->ipv6.ip6_dst_ops);
1da177e4
LT
971 if (unlikely(rt == NULL)) {
972 in6_dev_put(idev);
973 goto out;
974 }
975
976 dev_hold(dev);
977 if (neigh)
978 neigh_hold(neigh);
14deae41 979 else {
1da177e4 980 neigh = ndisc_get_neigh(dev, addr);
14deae41
DM
981 if (IS_ERR(neigh))
982 neigh = NULL;
983 }
1da177e4
LT
984
985 rt->rt6i_dev = dev;
986 rt->rt6i_idev = idev;
987 rt->rt6i_nexthop = neigh;
988 atomic_set(&rt->u.dst.__refcnt, 1);
989 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = 255;
990 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
5578689a 991 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, dst_mtu(&rt->u.dst));
3b00944c 992 rt->u.dst.output = ip6_output;
1da177e4
LT
993
994#if 0 /* there's no chance to use these for ndisc */
1ab1457c
YH
995 rt->u.dst.flags = ipv6_addr_type(addr) & IPV6_ADDR_UNICAST
996 ? DST_HOST
1da177e4
LT
997 : 0;
998 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
999 rt->rt6i_dst.plen = 128;
1000#endif
1001
3b00944c
YH
1002 spin_lock_bh(&icmp6_dst_lock);
1003 rt->u.dst.next = icmp6_dst_gc_list;
1004 icmp6_dst_gc_list = &rt->u.dst;
1005 spin_unlock_bh(&icmp6_dst_lock);
1da177e4 1006
5578689a 1007 fib6_force_start_gc(net);
1da177e4
LT
1008
1009out:
40aa7b90 1010 return &rt->u.dst;
1da177e4
LT
1011}
1012
3d0f24a7 1013int icmp6_dst_gc(void)
1da177e4
LT
1014{
1015 struct dst_entry *dst, *next, **pprev;
3d0f24a7 1016 int more = 0;
1da177e4
LT
1017
1018 next = NULL;
5d0bbeeb 1019
3b00944c
YH
1020 spin_lock_bh(&icmp6_dst_lock);
1021 pprev = &icmp6_dst_gc_list;
5d0bbeeb 1022
1da177e4
LT
1023 while ((dst = *pprev) != NULL) {
1024 if (!atomic_read(&dst->__refcnt)) {
1025 *pprev = dst->next;
1026 dst_free(dst);
1da177e4
LT
1027 } else {
1028 pprev = &dst->next;
3d0f24a7 1029 ++more;
1da177e4
LT
1030 }
1031 }
1032
3b00944c 1033 spin_unlock_bh(&icmp6_dst_lock);
5d0bbeeb 1034
3d0f24a7 1035 return more;
1da177e4
LT
1036}
1037
1e493d19
DM
1038static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg),
1039 void *arg)
1040{
1041 struct dst_entry *dst, **pprev;
1042
1043 spin_lock_bh(&icmp6_dst_lock);
1044 pprev = &icmp6_dst_gc_list;
1045 while ((dst = *pprev) != NULL) {
1046 struct rt6_info *rt = (struct rt6_info *) dst;
1047 if (func(rt, arg)) {
1048 *pprev = dst->next;
1049 dst_free(dst);
1050 } else {
1051 pprev = &dst->next;
1052 }
1053 }
1054 spin_unlock_bh(&icmp6_dst_lock);
1055}
1056
569d3645 1057static int ip6_dst_gc(struct dst_ops *ops)
1da177e4 1058{
1da177e4 1059 unsigned long now = jiffies;
7019b78e
DL
1060 struct net *net = ops->dst_net;
1061 int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
1062 int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
1063 int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1064 int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1065 unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
1066
1067 if (time_after(rt_last_gc + rt_min_interval, now) &&
1068 atomic_read(&ops->entries) <= rt_max_size)
1da177e4
LT
1069 goto out;
1070
6891a346
BT
1071 net->ipv6.ip6_rt_gc_expire++;
1072 fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net);
1073 net->ipv6.ip6_rt_last_gc = now;
7019b78e
DL
1074 if (atomic_read(&ops->entries) < ops->gc_thresh)
1075 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1da177e4 1076out:
7019b78e
DL
1077 net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
1078 return (atomic_read(&ops->entries) > rt_max_size);
1da177e4
LT
1079}
1080
1081/* Clean host part of a prefix. Not necessary in radix tree,
1082 but results in cleaner routing tables.
1083
1084 Remove it only when all the things will work!
1085 */
1086
1087static int ipv6_get_mtu(struct net_device *dev)
1088{
1089 int mtu = IPV6_MIN_MTU;
1090 struct inet6_dev *idev;
1091
1092 idev = in6_dev_get(dev);
1093 if (idev) {
1094 mtu = idev->cnf.mtu6;
1095 in6_dev_put(idev);
1096 }
1097 return mtu;
1098}
1099
6b75d090 1100int ip6_dst_hoplimit(struct dst_entry *dst)
1da177e4 1101{
6b75d090
YH
1102 int hoplimit = dst_metric(dst, RTAX_HOPLIMIT);
1103 if (hoplimit < 0) {
1104 struct net_device *dev = dst->dev;
1105 struct inet6_dev *idev = in6_dev_get(dev);
1106 if (idev) {
1107 hoplimit = idev->cnf.hop_limit;
1108 in6_dev_put(idev);
1109 } else
53b7997f 1110 hoplimit = dev_net(dev)->ipv6.devconf_all->hop_limit;
1da177e4
LT
1111 }
1112 return hoplimit;
1113}
1114
1115/*
1116 *
1117 */
1118
86872cb5 1119int ip6_route_add(struct fib6_config *cfg)
1da177e4
LT
1120{
1121 int err;
5578689a 1122 struct net *net = cfg->fc_nlinfo.nl_net;
1da177e4
LT
1123 struct rt6_info *rt = NULL;
1124 struct net_device *dev = NULL;
1125 struct inet6_dev *idev = NULL;
c71099ac 1126 struct fib6_table *table;
1da177e4
LT
1127 int addr_type;
1128
86872cb5 1129 if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1da177e4
LT
1130 return -EINVAL;
1131#ifndef CONFIG_IPV6_SUBTREES
86872cb5 1132 if (cfg->fc_src_len)
1da177e4
LT
1133 return -EINVAL;
1134#endif
86872cb5 1135 if (cfg->fc_ifindex) {
1da177e4 1136 err = -ENODEV;
5578689a 1137 dev = dev_get_by_index(net, cfg->fc_ifindex);
1da177e4
LT
1138 if (!dev)
1139 goto out;
1140 idev = in6_dev_get(dev);
1141 if (!idev)
1142 goto out;
1143 }
1144
86872cb5
TG
1145 if (cfg->fc_metric == 0)
1146 cfg->fc_metric = IP6_RT_PRIO_USER;
1da177e4 1147
5578689a 1148 table = fib6_new_table(net, cfg->fc_table);
c71099ac
TG
1149 if (table == NULL) {
1150 err = -ENOBUFS;
1151 goto out;
1152 }
1153
f2fc6a54 1154 rt = ip6_dst_alloc(net->ipv6.ip6_dst_ops);
1da177e4
LT
1155
1156 if (rt == NULL) {
1157 err = -ENOMEM;
1158 goto out;
1159 }
1160
1161 rt->u.dst.obsolete = -1;
6f704992
YH
1162 rt->rt6i_expires = (cfg->fc_flags & RTF_EXPIRES) ?
1163 jiffies + clock_t_to_jiffies(cfg->fc_expires) :
1164 0;
1da177e4 1165
86872cb5
TG
1166 if (cfg->fc_protocol == RTPROT_UNSPEC)
1167 cfg->fc_protocol = RTPROT_BOOT;
1168 rt->rt6i_protocol = cfg->fc_protocol;
1169
1170 addr_type = ipv6_addr_type(&cfg->fc_dst);
1da177e4
LT
1171
1172 if (addr_type & IPV6_ADDR_MULTICAST)
1173 rt->u.dst.input = ip6_mc_input;
1174 else
1175 rt->u.dst.input = ip6_forward;
1176
1177 rt->u.dst.output = ip6_output;
1178
86872cb5
TG
1179 ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1180 rt->rt6i_dst.plen = cfg->fc_dst_len;
1da177e4
LT
1181 if (rt->rt6i_dst.plen == 128)
1182 rt->u.dst.flags = DST_HOST;
1183
1184#ifdef CONFIG_IPV6_SUBTREES
86872cb5
TG
1185 ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1186 rt->rt6i_src.plen = cfg->fc_src_len;
1da177e4
LT
1187#endif
1188
86872cb5 1189 rt->rt6i_metric = cfg->fc_metric;
1da177e4
LT
1190
1191 /* We cannot add true routes via loopback here,
1192 they would result in kernel looping; promote them to reject routes
1193 */
86872cb5 1194 if ((cfg->fc_flags & RTF_REJECT) ||
1da177e4
LT
1195 (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK))) {
1196 /* hold loopback dev/idev if we haven't done so. */
5578689a 1197 if (dev != net->loopback_dev) {
1da177e4
LT
1198 if (dev) {
1199 dev_put(dev);
1200 in6_dev_put(idev);
1201 }
5578689a 1202 dev = net->loopback_dev;
1da177e4
LT
1203 dev_hold(dev);
1204 idev = in6_dev_get(dev);
1205 if (!idev) {
1206 err = -ENODEV;
1207 goto out;
1208 }
1209 }
1210 rt->u.dst.output = ip6_pkt_discard_out;
1211 rt->u.dst.input = ip6_pkt_discard;
1212 rt->u.dst.error = -ENETUNREACH;
1213 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1214 goto install_route;
1215 }
1216
86872cb5 1217 if (cfg->fc_flags & RTF_GATEWAY) {
1da177e4
LT
1218 struct in6_addr *gw_addr;
1219 int gwa_type;
1220
86872cb5
TG
1221 gw_addr = &cfg->fc_gateway;
1222 ipv6_addr_copy(&rt->rt6i_gateway, gw_addr);
1da177e4
LT
1223 gwa_type = ipv6_addr_type(gw_addr);
1224
1225 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1226 struct rt6_info *grt;
1227
1228 /* IPv6 strictly inhibits using not link-local
1229 addresses as nexthop address.
1230 Otherwise, router will not able to send redirects.
1231 It is very good, but in some (rare!) circumstances
1232 (SIT, PtP, NBMA NOARP links) it is handy to allow
1233 some exceptions. --ANK
1234 */
1235 err = -EINVAL;
1236 if (!(gwa_type&IPV6_ADDR_UNICAST))
1237 goto out;
1238
5578689a 1239 grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
1da177e4
LT
1240
1241 err = -EHOSTUNREACH;
1242 if (grt == NULL)
1243 goto out;
1244 if (dev) {
1245 if (dev != grt->rt6i_dev) {
1246 dst_release(&grt->u.dst);
1247 goto out;
1248 }
1249 } else {
1250 dev = grt->rt6i_dev;
1251 idev = grt->rt6i_idev;
1252 dev_hold(dev);
1253 in6_dev_hold(grt->rt6i_idev);
1254 }
1255 if (!(grt->rt6i_flags&RTF_GATEWAY))
1256 err = 0;
1257 dst_release(&grt->u.dst);
1258
1259 if (err)
1260 goto out;
1261 }
1262 err = -EINVAL;
1263 if (dev == NULL || (dev->flags&IFF_LOOPBACK))
1264 goto out;
1265 }
1266
1267 err = -ENODEV;
1268 if (dev == NULL)
1269 goto out;
1270
86872cb5 1271 if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) {
1da177e4
LT
1272 rt->rt6i_nexthop = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev);
1273 if (IS_ERR(rt->rt6i_nexthop)) {
1274 err = PTR_ERR(rt->rt6i_nexthop);
1275 rt->rt6i_nexthop = NULL;
1276 goto out;
1277 }
1278 }
1279
86872cb5 1280 rt->rt6i_flags = cfg->fc_flags;
1da177e4
LT
1281
1282install_route:
86872cb5
TG
1283 if (cfg->fc_mx) {
1284 struct nlattr *nla;
1285 int remaining;
1286
1287 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
8f4c1f9b 1288 int type = nla_type(nla);
86872cb5
TG
1289
1290 if (type) {
1291 if (type > RTAX_MAX) {
1da177e4
LT
1292 err = -EINVAL;
1293 goto out;
1294 }
86872cb5
TG
1295
1296 rt->u.dst.metrics[type - 1] = nla_get_u32(nla);
1da177e4 1297 }
1da177e4
LT
1298 }
1299 }
1300
5ffc02a1 1301 if (dst_metric(&rt->u.dst, RTAX_HOPLIMIT) == 0)
1da177e4 1302 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
1ca615fb 1303 if (!dst_mtu(&rt->u.dst))
1da177e4 1304 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(dev);
5ffc02a1 1305 if (!dst_metric(&rt->u.dst, RTAX_ADVMSS))
5578689a 1306 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, dst_mtu(&rt->u.dst));
1da177e4
LT
1307 rt->u.dst.dev = dev;
1308 rt->rt6i_idev = idev;
c71099ac 1309 rt->rt6i_table = table;
63152fc0 1310
c346dca1 1311 cfg->fc_nlinfo.nl_net = dev_net(dev);
63152fc0 1312
86872cb5 1313 return __ip6_ins_rt(rt, &cfg->fc_nlinfo);
1da177e4
LT
1314
1315out:
1316 if (dev)
1317 dev_put(dev);
1318 if (idev)
1319 in6_dev_put(idev);
1320 if (rt)
40aa7b90 1321 dst_free(&rt->u.dst);
1da177e4
LT
1322 return err;
1323}
1324
86872cb5 1325static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
1da177e4
LT
1326{
1327 int err;
c71099ac 1328 struct fib6_table *table;
c346dca1 1329 struct net *net = dev_net(rt->rt6i_dev);
1da177e4 1330
8ed67789 1331 if (rt == net->ipv6.ip6_null_entry)
6c813a72
PM
1332 return -ENOENT;
1333
c71099ac
TG
1334 table = rt->rt6i_table;
1335 write_lock_bh(&table->tb6_lock);
1da177e4 1336
86872cb5 1337 err = fib6_del(rt, info);
1da177e4
LT
1338 dst_release(&rt->u.dst);
1339
c71099ac 1340 write_unlock_bh(&table->tb6_lock);
1da177e4
LT
1341
1342 return err;
1343}
1344
e0a1ad73
TG
1345int ip6_del_rt(struct rt6_info *rt)
1346{
4d1169c1 1347 struct nl_info info = {
c346dca1 1348 .nl_net = dev_net(rt->rt6i_dev),
4d1169c1 1349 };
528c4ceb 1350 return __ip6_del_rt(rt, &info);
e0a1ad73
TG
1351}
1352
86872cb5 1353static int ip6_route_del(struct fib6_config *cfg)
1da177e4 1354{
c71099ac 1355 struct fib6_table *table;
1da177e4
LT
1356 struct fib6_node *fn;
1357 struct rt6_info *rt;
1358 int err = -ESRCH;
1359
5578689a 1360 table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
c71099ac
TG
1361 if (table == NULL)
1362 return err;
1363
1364 read_lock_bh(&table->tb6_lock);
1da177e4 1365
c71099ac 1366 fn = fib6_locate(&table->tb6_root,
86872cb5
TG
1367 &cfg->fc_dst, cfg->fc_dst_len,
1368 &cfg->fc_src, cfg->fc_src_len);
1ab1457c 1369
1da177e4 1370 if (fn) {
7cc48263 1371 for (rt = fn->leaf; rt; rt = rt->u.dst.rt6_next) {
86872cb5 1372 if (cfg->fc_ifindex &&
1da177e4 1373 (rt->rt6i_dev == NULL ||
86872cb5 1374 rt->rt6i_dev->ifindex != cfg->fc_ifindex))
1da177e4 1375 continue;
86872cb5
TG
1376 if (cfg->fc_flags & RTF_GATEWAY &&
1377 !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
1da177e4 1378 continue;
86872cb5 1379 if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
1da177e4
LT
1380 continue;
1381 dst_hold(&rt->u.dst);
c71099ac 1382 read_unlock_bh(&table->tb6_lock);
1da177e4 1383
86872cb5 1384 return __ip6_del_rt(rt, &cfg->fc_nlinfo);
1da177e4
LT
1385 }
1386 }
c71099ac 1387 read_unlock_bh(&table->tb6_lock);
1da177e4
LT
1388
1389 return err;
1390}
1391
1392/*
1393 * Handle redirects
1394 */
a6279458
YH
1395struct ip6rd_flowi {
1396 struct flowi fl;
1397 struct in6_addr gateway;
1398};
1399
8ed67789
DL
1400static struct rt6_info *__ip6_route_redirect(struct net *net,
1401 struct fib6_table *table,
a6279458
YH
1402 struct flowi *fl,
1403 int flags)
1da177e4 1404{
a6279458
YH
1405 struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl;
1406 struct rt6_info *rt;
e843b9e1 1407 struct fib6_node *fn;
c71099ac 1408
1da177e4 1409 /*
e843b9e1
YH
1410 * Get the "current" route for this destination and
1411 * check if the redirect has come from approriate router.
1412 *
1413 * RFC 2461 specifies that redirects should only be
1414 * accepted if they come from the nexthop to the target.
1415 * Due to the way the routes are chosen, this notion
1416 * is a bit fuzzy and one might need to check all possible
1417 * routes.
1da177e4 1418 */
1da177e4 1419
c71099ac 1420 read_lock_bh(&table->tb6_lock);
a6279458 1421 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
e843b9e1 1422restart:
7cc48263 1423 for (rt = fn->leaf; rt; rt = rt->u.dst.rt6_next) {
e843b9e1
YH
1424 /*
1425 * Current route is on-link; redirect is always invalid.
1426 *
1427 * Seems, previous statement is not true. It could
1428 * be node, which looks for us as on-link (f.e. proxy ndisc)
1429 * But then router serving it might decide, that we should
1430 * know truth 8)8) --ANK (980726).
1431 */
1432 if (rt6_check_expired(rt))
1433 continue;
1434 if (!(rt->rt6i_flags & RTF_GATEWAY))
1435 continue;
a6279458 1436 if (fl->oif != rt->rt6i_dev->ifindex)
e843b9e1 1437 continue;
a6279458 1438 if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
e843b9e1
YH
1439 continue;
1440 break;
1441 }
a6279458 1442
cb15d9c2 1443 if (!rt)
8ed67789
DL
1444 rt = net->ipv6.ip6_null_entry;
1445 BACKTRACK(net, &fl->fl6_src);
cb15d9c2 1446out:
a6279458
YH
1447 dst_hold(&rt->u.dst);
1448
c71099ac 1449 read_unlock_bh(&table->tb6_lock);
e843b9e1 1450
a6279458
YH
1451 return rt;
1452};
1453
1454static struct rt6_info *ip6_route_redirect(struct in6_addr *dest,
1455 struct in6_addr *src,
1456 struct in6_addr *gateway,
1457 struct net_device *dev)
1458{
adaa70bb 1459 int flags = RT6_LOOKUP_F_HAS_SADDR;
c346dca1 1460 struct net *net = dev_net(dev);
a6279458
YH
1461 struct ip6rd_flowi rdfl = {
1462 .fl = {
1463 .oif = dev->ifindex,
1464 .nl_u = {
1465 .ip6_u = {
1466 .daddr = *dest,
1467 .saddr = *src,
1468 },
1469 },
1470 },
1471 .gateway = *gateway,
1472 };
adaa70bb
TG
1473
1474 if (rt6_need_strict(dest))
1475 flags |= RT6_LOOKUP_F_IFACE;
a6279458 1476
5578689a 1477 return (struct rt6_info *)fib6_rule_lookup(net, (struct flowi *)&rdfl,
58f09b78 1478 flags, __ip6_route_redirect);
a6279458
YH
1479}
1480
1481void rt6_redirect(struct in6_addr *dest, struct in6_addr *src,
1482 struct in6_addr *saddr,
1483 struct neighbour *neigh, u8 *lladdr, int on_link)
1484{
1485 struct rt6_info *rt, *nrt = NULL;
1486 struct netevent_redirect netevent;
c346dca1 1487 struct net *net = dev_net(neigh->dev);
a6279458
YH
1488
1489 rt = ip6_route_redirect(dest, src, saddr, neigh->dev);
1490
8ed67789 1491 if (rt == net->ipv6.ip6_null_entry) {
1da177e4
LT
1492 if (net_ratelimit())
1493 printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
1494 "for redirect target\n");
a6279458 1495 goto out;
1da177e4
LT
1496 }
1497
1da177e4
LT
1498 /*
1499 * We have finally decided to accept it.
1500 */
1501
1ab1457c 1502 neigh_update(neigh, lladdr, NUD_STALE,
1da177e4
LT
1503 NEIGH_UPDATE_F_WEAK_OVERRIDE|
1504 NEIGH_UPDATE_F_OVERRIDE|
1505 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1506 NEIGH_UPDATE_F_ISROUTER))
1507 );
1508
1509 /*
1510 * Redirect received -> path was valid.
1511 * Look, redirects are sent only in response to data packets,
1512 * so that this nexthop apparently is reachable. --ANK
1513 */
1514 dst_confirm(&rt->u.dst);
1515
1516 /* Duplicate redirect: silently ignore. */
1517 if (neigh == rt->u.dst.neighbour)
1518 goto out;
1519
1520 nrt = ip6_rt_copy(rt);
1521 if (nrt == NULL)
1522 goto out;
1523
1524 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1525 if (on_link)
1526 nrt->rt6i_flags &= ~RTF_GATEWAY;
1527
1528 ipv6_addr_copy(&nrt->rt6i_dst.addr, dest);
1529 nrt->rt6i_dst.plen = 128;
1530 nrt->u.dst.flags |= DST_HOST;
1531
1532 ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key);
1533 nrt->rt6i_nexthop = neigh_clone(neigh);
1534 /* Reset pmtu, it may be better */
1535 nrt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(neigh->dev);
c346dca1 1536 nrt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dev_net(neigh->dev),
5578689a 1537 dst_mtu(&nrt->u.dst));
1da177e4 1538
40e22e8f 1539 if (ip6_ins_rt(nrt))
1da177e4
LT
1540 goto out;
1541
8d71740c
TT
1542 netevent.old = &rt->u.dst;
1543 netevent.new = &nrt->u.dst;
1544 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1545
1da177e4 1546 if (rt->rt6i_flags&RTF_CACHE) {
e0a1ad73 1547 ip6_del_rt(rt);
1da177e4
LT
1548 return;
1549 }
1550
1551out:
1ab1457c 1552 dst_release(&rt->u.dst);
1da177e4
LT
1553 return;
1554}
1555
1556/*
1557 * Handle ICMP "packet too big" messages
1558 * i.e. Path MTU discovery
1559 */
1560
1561void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr,
1562 struct net_device *dev, u32 pmtu)
1563{
1564 struct rt6_info *rt, *nrt;
c346dca1 1565 struct net *net = dev_net(dev);
1da177e4
LT
1566 int allfrag = 0;
1567
5578689a 1568 rt = rt6_lookup(net, daddr, saddr, dev->ifindex, 0);
1da177e4
LT
1569 if (rt == NULL)
1570 return;
1571
1572 if (pmtu >= dst_mtu(&rt->u.dst))
1573 goto out;
1574
1575 if (pmtu < IPV6_MIN_MTU) {
1576 /*
1ab1457c 1577 * According to RFC2460, PMTU is set to the IPv6 Minimum Link
1da177e4
LT
1578 * MTU (1280) and a fragment header should always be included
1579 * after a node receiving Too Big message reporting PMTU is
1580 * less than the IPv6 Minimum Link MTU.
1581 */
1582 pmtu = IPV6_MIN_MTU;
1583 allfrag = 1;
1584 }
1585
1586 /* New mtu received -> path was valid.
1587 They are sent only in response to data packets,
1588 so that this nexthop apparently is reachable. --ANK
1589 */
1590 dst_confirm(&rt->u.dst);
1591
1592 /* Host route. If it is static, it would be better
1593 not to override it, but add new one, so that
1594 when cache entry will expire old pmtu
1595 would return automatically.
1596 */
1597 if (rt->rt6i_flags & RTF_CACHE) {
1598 rt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1599 if (allfrag)
1600 rt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
5578689a 1601 dst_set_expires(&rt->u.dst, net->ipv6.sysctl.ip6_rt_mtu_expires);
1da177e4
LT
1602 rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
1603 goto out;
1604 }
1605
1606 /* Network route.
1607 Two cases are possible:
1608 1. It is connected route. Action: COW
1609 2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1610 */
d5315b50 1611 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
a1e78363 1612 nrt = rt6_alloc_cow(rt, daddr, saddr);
d5315b50
YH
1613 else
1614 nrt = rt6_alloc_clone(rt, daddr);
a1e78363 1615
d5315b50 1616 if (nrt) {
a1e78363
YH
1617 nrt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1618 if (allfrag)
1619 nrt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1620
1621 /* According to RFC 1981, detecting PMTU increase shouldn't be
1622 * happened within 5 mins, the recommended timer is 10 mins.
1623 * Here this route expiration time is set to ip6_rt_mtu_expires
1624 * which is 10 mins. After 10 mins the decreased pmtu is expired
1625 * and detecting PMTU increase will be automatically happened.
1626 */
5578689a 1627 dst_set_expires(&nrt->u.dst, net->ipv6.sysctl.ip6_rt_mtu_expires);
a1e78363
YH
1628 nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
1629
40e22e8f 1630 ip6_ins_rt(nrt);
1da177e4 1631 }
1da177e4
LT
1632out:
1633 dst_release(&rt->u.dst);
1634}
1635
1636/*
1637 * Misc support functions
1638 */
1639
1640static struct rt6_info * ip6_rt_copy(struct rt6_info *ort)
1641{
c346dca1 1642 struct net *net = dev_net(ort->rt6i_dev);
f2fc6a54 1643 struct rt6_info *rt = ip6_dst_alloc(net->ipv6.ip6_dst_ops);
1da177e4
LT
1644
1645 if (rt) {
1646 rt->u.dst.input = ort->u.dst.input;
1647 rt->u.dst.output = ort->u.dst.output;
1648
1649 memcpy(rt->u.dst.metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32));
22e1e4d8 1650 rt->u.dst.error = ort->u.dst.error;
1da177e4
LT
1651 rt->u.dst.dev = ort->u.dst.dev;
1652 if (rt->u.dst.dev)
1653 dev_hold(rt->u.dst.dev);
1654 rt->rt6i_idev = ort->rt6i_idev;
1655 if (rt->rt6i_idev)
1656 in6_dev_hold(rt->rt6i_idev);
1657 rt->u.dst.lastuse = jiffies;
1658 rt->rt6i_expires = 0;
1659
1660 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
1661 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
1662 rt->rt6i_metric = 0;
1663
1664 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1665#ifdef CONFIG_IPV6_SUBTREES
1666 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1667#endif
c71099ac 1668 rt->rt6i_table = ort->rt6i_table;
1da177e4
LT
1669 }
1670 return rt;
1671}
1672
70ceb4f5 1673#ifdef CONFIG_IPV6_ROUTE_INFO
efa2cea0
DL
1674static struct rt6_info *rt6_get_route_info(struct net *net,
1675 struct in6_addr *prefix, int prefixlen,
70ceb4f5
YH
1676 struct in6_addr *gwaddr, int ifindex)
1677{
1678 struct fib6_node *fn;
1679 struct rt6_info *rt = NULL;
c71099ac
TG
1680 struct fib6_table *table;
1681
efa2cea0 1682 table = fib6_get_table(net, RT6_TABLE_INFO);
c71099ac
TG
1683 if (table == NULL)
1684 return NULL;
70ceb4f5 1685
c71099ac
TG
1686 write_lock_bh(&table->tb6_lock);
1687 fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
70ceb4f5
YH
1688 if (!fn)
1689 goto out;
1690
7cc48263 1691 for (rt = fn->leaf; rt; rt = rt->u.dst.rt6_next) {
70ceb4f5
YH
1692 if (rt->rt6i_dev->ifindex != ifindex)
1693 continue;
1694 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1695 continue;
1696 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1697 continue;
1698 dst_hold(&rt->u.dst);
1699 break;
1700 }
1701out:
c71099ac 1702 write_unlock_bh(&table->tb6_lock);
70ceb4f5
YH
1703 return rt;
1704}
1705
efa2cea0
DL
1706static struct rt6_info *rt6_add_route_info(struct net *net,
1707 struct in6_addr *prefix, int prefixlen,
70ceb4f5
YH
1708 struct in6_addr *gwaddr, int ifindex,
1709 unsigned pref)
1710{
86872cb5
TG
1711 struct fib6_config cfg = {
1712 .fc_table = RT6_TABLE_INFO,
238fc7ea 1713 .fc_metric = IP6_RT_PRIO_USER,
86872cb5
TG
1714 .fc_ifindex = ifindex,
1715 .fc_dst_len = prefixlen,
1716 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
1717 RTF_UP | RTF_PREF(pref),
efa2cea0
DL
1718 .fc_nlinfo.pid = 0,
1719 .fc_nlinfo.nlh = NULL,
1720 .fc_nlinfo.nl_net = net,
86872cb5
TG
1721 };
1722
1723 ipv6_addr_copy(&cfg.fc_dst, prefix);
1724 ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
70ceb4f5 1725
e317da96
YH
1726 /* We should treat it as a default route if prefix length is 0. */
1727 if (!prefixlen)
86872cb5 1728 cfg.fc_flags |= RTF_DEFAULT;
70ceb4f5 1729
86872cb5 1730 ip6_route_add(&cfg);
70ceb4f5 1731
efa2cea0 1732 return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex);
70ceb4f5
YH
1733}
1734#endif
1735
1da177e4 1736struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *dev)
1ab1457c 1737{
1da177e4 1738 struct rt6_info *rt;
c71099ac 1739 struct fib6_table *table;
1da177e4 1740
c346dca1 1741 table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT);
c71099ac
TG
1742 if (table == NULL)
1743 return NULL;
1da177e4 1744
c71099ac 1745 write_lock_bh(&table->tb6_lock);
7cc48263 1746 for (rt = table->tb6_root.leaf; rt; rt=rt->u.dst.rt6_next) {
1da177e4 1747 if (dev == rt->rt6i_dev &&
045927ff 1748 ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
1da177e4
LT
1749 ipv6_addr_equal(&rt->rt6i_gateway, addr))
1750 break;
1751 }
1752 if (rt)
1753 dst_hold(&rt->u.dst);
c71099ac 1754 write_unlock_bh(&table->tb6_lock);
1da177e4
LT
1755 return rt;
1756}
1757
1758struct rt6_info *rt6_add_dflt_router(struct in6_addr *gwaddr,
ebacaaa0
YH
1759 struct net_device *dev,
1760 unsigned int pref)
1da177e4 1761{
86872cb5
TG
1762 struct fib6_config cfg = {
1763 .fc_table = RT6_TABLE_DFLT,
238fc7ea 1764 .fc_metric = IP6_RT_PRIO_USER,
86872cb5
TG
1765 .fc_ifindex = dev->ifindex,
1766 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
1767 RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
5578689a
DL
1768 .fc_nlinfo.pid = 0,
1769 .fc_nlinfo.nlh = NULL,
c346dca1 1770 .fc_nlinfo.nl_net = dev_net(dev),
86872cb5 1771 };
1da177e4 1772
86872cb5 1773 ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
1da177e4 1774
86872cb5 1775 ip6_route_add(&cfg);
1da177e4 1776
1da177e4
LT
1777 return rt6_get_dflt_router(gwaddr, dev);
1778}
1779
7b4da532 1780void rt6_purge_dflt_routers(struct net *net)
1da177e4
LT
1781{
1782 struct rt6_info *rt;
c71099ac
TG
1783 struct fib6_table *table;
1784
1785 /* NOTE: Keep consistent with rt6_get_dflt_router */
7b4da532 1786 table = fib6_get_table(net, RT6_TABLE_DFLT);
c71099ac
TG
1787 if (table == NULL)
1788 return;
1da177e4
LT
1789
1790restart:
c71099ac 1791 read_lock_bh(&table->tb6_lock);
7cc48263 1792 for (rt = table->tb6_root.leaf; rt; rt = rt->u.dst.rt6_next) {
1da177e4
LT
1793 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
1794 dst_hold(&rt->u.dst);
c71099ac 1795 read_unlock_bh(&table->tb6_lock);
e0a1ad73 1796 ip6_del_rt(rt);
1da177e4
LT
1797 goto restart;
1798 }
1799 }
c71099ac 1800 read_unlock_bh(&table->tb6_lock);
1da177e4
LT
1801}
1802
5578689a
DL
1803static void rtmsg_to_fib6_config(struct net *net,
1804 struct in6_rtmsg *rtmsg,
86872cb5
TG
1805 struct fib6_config *cfg)
1806{
1807 memset(cfg, 0, sizeof(*cfg));
1808
1809 cfg->fc_table = RT6_TABLE_MAIN;
1810 cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
1811 cfg->fc_metric = rtmsg->rtmsg_metric;
1812 cfg->fc_expires = rtmsg->rtmsg_info;
1813 cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
1814 cfg->fc_src_len = rtmsg->rtmsg_src_len;
1815 cfg->fc_flags = rtmsg->rtmsg_flags;
1816
5578689a 1817 cfg->fc_nlinfo.nl_net = net;
f1243c2d 1818
86872cb5
TG
1819 ipv6_addr_copy(&cfg->fc_dst, &rtmsg->rtmsg_dst);
1820 ipv6_addr_copy(&cfg->fc_src, &rtmsg->rtmsg_src);
1821 ipv6_addr_copy(&cfg->fc_gateway, &rtmsg->rtmsg_gateway);
1822}
1823
5578689a 1824int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
1da177e4 1825{
86872cb5 1826 struct fib6_config cfg;
1da177e4
LT
1827 struct in6_rtmsg rtmsg;
1828 int err;
1829
1830 switch(cmd) {
1831 case SIOCADDRT: /* Add a route */
1832 case SIOCDELRT: /* Delete a route */
1833 if (!capable(CAP_NET_ADMIN))
1834 return -EPERM;
1835 err = copy_from_user(&rtmsg, arg,
1836 sizeof(struct in6_rtmsg));
1837 if (err)
1838 return -EFAULT;
86872cb5 1839
5578689a 1840 rtmsg_to_fib6_config(net, &rtmsg, &cfg);
86872cb5 1841
1da177e4
LT
1842 rtnl_lock();
1843 switch (cmd) {
1844 case SIOCADDRT:
86872cb5 1845 err = ip6_route_add(&cfg);
1da177e4
LT
1846 break;
1847 case SIOCDELRT:
86872cb5 1848 err = ip6_route_del(&cfg);
1da177e4
LT
1849 break;
1850 default:
1851 err = -EINVAL;
1852 }
1853 rtnl_unlock();
1854
1855 return err;
3ff50b79 1856 }
1da177e4
LT
1857
1858 return -EINVAL;
1859}
1860
1861/*
1862 * Drop the packet on the floor
1863 */
1864
50eb431d 1865static int ip6_pkt_drop(struct sk_buff *skb, int code, int ipstats_mib_noroutes)
1da177e4 1866{
612f09e8 1867 int type;
3bd653c8 1868 struct dst_entry *dst = skb->dst;
612f09e8
YH
1869 switch (ipstats_mib_noroutes) {
1870 case IPSTATS_MIB_INNOROUTES:
0660e03f 1871 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
612f09e8 1872 if (type == IPV6_ADDR_ANY || type == IPV6_ADDR_RESERVED) {
3bd653c8
DL
1873 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
1874 IPSTATS_MIB_INADDRERRORS);
612f09e8
YH
1875 break;
1876 }
1877 /* FALLTHROUGH */
1878 case IPSTATS_MIB_OUTNOROUTES:
3bd653c8
DL
1879 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
1880 ipstats_mib_noroutes);
612f09e8
YH
1881 break;
1882 }
9ce8ade0 1883 icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0, skb->dev);
1da177e4
LT
1884 kfree_skb(skb);
1885 return 0;
1886}
1887
9ce8ade0
TG
1888static int ip6_pkt_discard(struct sk_buff *skb)
1889{
612f09e8 1890 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
9ce8ade0
TG
1891}
1892
20380731 1893static int ip6_pkt_discard_out(struct sk_buff *skb)
1da177e4
LT
1894{
1895 skb->dev = skb->dst->dev;
612f09e8 1896 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
1da177e4
LT
1897}
1898
6723ab54
DM
1899#ifdef CONFIG_IPV6_MULTIPLE_TABLES
1900
9ce8ade0
TG
1901static int ip6_pkt_prohibit(struct sk_buff *skb)
1902{
612f09e8 1903 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
9ce8ade0
TG
1904}
1905
1906static int ip6_pkt_prohibit_out(struct sk_buff *skb)
1907{
1908 skb->dev = skb->dst->dev;
612f09e8 1909 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
9ce8ade0
TG
1910}
1911
6723ab54
DM
1912#endif
1913
1da177e4
LT
1914/*
1915 * Allocate a dst for local (unicast / anycast) address.
1916 */
1917
1918struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
1919 const struct in6_addr *addr,
1920 int anycast)
1921{
c346dca1 1922 struct net *net = dev_net(idev->dev);
f2fc6a54 1923 struct rt6_info *rt = ip6_dst_alloc(net->ipv6.ip6_dst_ops);
14deae41 1924 struct neighbour *neigh;
1da177e4
LT
1925
1926 if (rt == NULL)
1927 return ERR_PTR(-ENOMEM);
1928
5578689a 1929 dev_hold(net->loopback_dev);
1da177e4
LT
1930 in6_dev_hold(idev);
1931
1932 rt->u.dst.flags = DST_HOST;
1933 rt->u.dst.input = ip6_input;
1934 rt->u.dst.output = ip6_output;
5578689a 1935 rt->rt6i_dev = net->loopback_dev;
1da177e4
LT
1936 rt->rt6i_idev = idev;
1937 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
5578689a 1938 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, dst_mtu(&rt->u.dst));
1da177e4
LT
1939 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
1940 rt->u.dst.obsolete = -1;
1941
1942 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
58c4fb86
YH
1943 if (anycast)
1944 rt->rt6i_flags |= RTF_ANYCAST;
1945 else
1da177e4 1946 rt->rt6i_flags |= RTF_LOCAL;
14deae41
DM
1947 neigh = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
1948 if (IS_ERR(neigh)) {
40aa7b90 1949 dst_free(&rt->u.dst);
14deae41
DM
1950
1951 /* We are casting this because that is the return
1952 * value type. But an errno encoded pointer is the
1953 * same regardless of the underlying pointer type,
1954 * and that's what we are returning. So this is OK.
1955 */
1956 return (struct rt6_info *) neigh;
1da177e4 1957 }
14deae41 1958 rt->rt6i_nexthop = neigh;
1da177e4
LT
1959
1960 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
1961 rt->rt6i_dst.plen = 128;
5578689a 1962 rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL);
1da177e4
LT
1963
1964 atomic_set(&rt->u.dst.__refcnt, 1);
1965
1966 return rt;
1967}
1968
8ed67789
DL
1969struct arg_dev_net {
1970 struct net_device *dev;
1971 struct net *net;
1972};
1973
1da177e4
LT
1974static int fib6_ifdown(struct rt6_info *rt, void *arg)
1975{
8ed67789
DL
1976 struct net_device *dev = ((struct arg_dev_net *)arg)->dev;
1977 struct net *net = ((struct arg_dev_net *)arg)->net;
1978
1979 if (((void *)rt->rt6i_dev == dev || dev == NULL) &&
1980 rt != net->ipv6.ip6_null_entry) {
1da177e4
LT
1981 RT6_TRACE("deleted by ifdown %p\n", rt);
1982 return -1;
1983 }
1984 return 0;
1985}
1986
f3db4851 1987void rt6_ifdown(struct net *net, struct net_device *dev)
1da177e4 1988{
8ed67789
DL
1989 struct arg_dev_net adn = {
1990 .dev = dev,
1991 .net = net,
1992 };
1993
1994 fib6_clean_all(net, fib6_ifdown, 0, &adn);
1e493d19 1995 icmp6_clean_all(fib6_ifdown, &adn);
1da177e4
LT
1996}
1997
1998struct rt6_mtu_change_arg
1999{
2000 struct net_device *dev;
2001 unsigned mtu;
2002};
2003
2004static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
2005{
2006 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
2007 struct inet6_dev *idev;
c346dca1 2008 struct net *net = dev_net(arg->dev);
1da177e4
LT
2009
2010 /* In IPv6 pmtu discovery is not optional,
2011 so that RTAX_MTU lock cannot disable it.
2012 We still use this lock to block changes
2013 caused by addrconf/ndisc.
2014 */
2015
2016 idev = __in6_dev_get(arg->dev);
2017 if (idev == NULL)
2018 return 0;
2019
2020 /* For administrative MTU increase, there is no way to discover
2021 IPv6 PMTU increase, so PMTU increase should be updated here.
2022 Since RFC 1981 doesn't include administrative MTU increase
2023 update PMTU increase is a MUST. (i.e. jumbo frame)
2024 */
2025 /*
2026 If new MTU is less than route PMTU, this new MTU will be the
2027 lowest MTU in the path, update the route PMTU to reflect PMTU
2028 decreases; if new MTU is greater than route PMTU, and the
2029 old MTU is the lowest MTU in the path, update the route PMTU
2030 to reflect the increase. In this case if the other nodes' MTU
2031 also have the lowest MTU, TOO BIG MESSAGE will be lead to
2032 PMTU discouvery.
2033 */
2034 if (rt->rt6i_dev == arg->dev &&
2035 !dst_metric_locked(&rt->u.dst, RTAX_MTU) &&
23717795 2036 (dst_mtu(&rt->u.dst) >= arg->mtu ||
1ab1457c 2037 (dst_mtu(&rt->u.dst) < arg->mtu &&
566cfd8f 2038 dst_mtu(&rt->u.dst) == idev->cnf.mtu6))) {
1da177e4 2039 rt->u.dst.metrics[RTAX_MTU-1] = arg->mtu;
5578689a 2040 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, arg->mtu);
566cfd8f 2041 }
1da177e4
LT
2042 return 0;
2043}
2044
2045void rt6_mtu_change(struct net_device *dev, unsigned mtu)
2046{
c71099ac
TG
2047 struct rt6_mtu_change_arg arg = {
2048 .dev = dev,
2049 .mtu = mtu,
2050 };
1da177e4 2051
c346dca1 2052 fib6_clean_all(dev_net(dev), rt6_mtu_change_route, 0, &arg);
1da177e4
LT
2053}
2054
ef7c79ed 2055static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
5176f91e 2056 [RTA_GATEWAY] = { .len = sizeof(struct in6_addr) },
86872cb5 2057 [RTA_OIF] = { .type = NLA_U32 },
ab364a6f 2058 [RTA_IIF] = { .type = NLA_U32 },
86872cb5
TG
2059 [RTA_PRIORITY] = { .type = NLA_U32 },
2060 [RTA_METRICS] = { .type = NLA_NESTED },
2061};
2062
2063static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2064 struct fib6_config *cfg)
1da177e4 2065{
86872cb5
TG
2066 struct rtmsg *rtm;
2067 struct nlattr *tb[RTA_MAX+1];
2068 int err;
1da177e4 2069
86872cb5
TG
2070 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2071 if (err < 0)
2072 goto errout;
1da177e4 2073
86872cb5
TG
2074 err = -EINVAL;
2075 rtm = nlmsg_data(nlh);
2076 memset(cfg, 0, sizeof(*cfg));
2077
2078 cfg->fc_table = rtm->rtm_table;
2079 cfg->fc_dst_len = rtm->rtm_dst_len;
2080 cfg->fc_src_len = rtm->rtm_src_len;
2081 cfg->fc_flags = RTF_UP;
2082 cfg->fc_protocol = rtm->rtm_protocol;
2083
2084 if (rtm->rtm_type == RTN_UNREACHABLE)
2085 cfg->fc_flags |= RTF_REJECT;
2086
2087 cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
2088 cfg->fc_nlinfo.nlh = nlh;
3b1e0a65 2089 cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
86872cb5
TG
2090
2091 if (tb[RTA_GATEWAY]) {
2092 nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
2093 cfg->fc_flags |= RTF_GATEWAY;
1da177e4 2094 }
86872cb5
TG
2095
2096 if (tb[RTA_DST]) {
2097 int plen = (rtm->rtm_dst_len + 7) >> 3;
2098
2099 if (nla_len(tb[RTA_DST]) < plen)
2100 goto errout;
2101
2102 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
1da177e4 2103 }
86872cb5
TG
2104
2105 if (tb[RTA_SRC]) {
2106 int plen = (rtm->rtm_src_len + 7) >> 3;
2107
2108 if (nla_len(tb[RTA_SRC]) < plen)
2109 goto errout;
2110
2111 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
1da177e4 2112 }
86872cb5
TG
2113
2114 if (tb[RTA_OIF])
2115 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2116
2117 if (tb[RTA_PRIORITY])
2118 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2119
2120 if (tb[RTA_METRICS]) {
2121 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2122 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
1da177e4 2123 }
86872cb5
TG
2124
2125 if (tb[RTA_TABLE])
2126 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2127
2128 err = 0;
2129errout:
2130 return err;
1da177e4
LT
2131}
2132
c127ea2c 2133static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1da177e4 2134{
86872cb5
TG
2135 struct fib6_config cfg;
2136 int err;
1da177e4 2137
86872cb5
TG
2138 err = rtm_to_fib6_config(skb, nlh, &cfg);
2139 if (err < 0)
2140 return err;
2141
2142 return ip6_route_del(&cfg);
1da177e4
LT
2143}
2144
c127ea2c 2145static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1da177e4 2146{
86872cb5
TG
2147 struct fib6_config cfg;
2148 int err;
1da177e4 2149
86872cb5
TG
2150 err = rtm_to_fib6_config(skb, nlh, &cfg);
2151 if (err < 0)
2152 return err;
2153
2154 return ip6_route_add(&cfg);
1da177e4
LT
2155}
2156
339bf98f
TG
2157static inline size_t rt6_nlmsg_size(void)
2158{
2159 return NLMSG_ALIGN(sizeof(struct rtmsg))
2160 + nla_total_size(16) /* RTA_SRC */
2161 + nla_total_size(16) /* RTA_DST */
2162 + nla_total_size(16) /* RTA_GATEWAY */
2163 + nla_total_size(16) /* RTA_PREFSRC */
2164 + nla_total_size(4) /* RTA_TABLE */
2165 + nla_total_size(4) /* RTA_IIF */
2166 + nla_total_size(4) /* RTA_OIF */
2167 + nla_total_size(4) /* RTA_PRIORITY */
6a2b9ce0 2168 + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
339bf98f
TG
2169 + nla_total_size(sizeof(struct rta_cacheinfo));
2170}
2171
191cd582
BH
2172static int rt6_fill_node(struct net *net,
2173 struct sk_buff *skb, struct rt6_info *rt,
0d51aa80
JHS
2174 struct in6_addr *dst, struct in6_addr *src,
2175 int iif, int type, u32 pid, u32 seq,
7bc570c8 2176 int prefix, int nowait, unsigned int flags)
1da177e4
LT
2177{
2178 struct rtmsg *rtm;
2d7202bf 2179 struct nlmsghdr *nlh;
e3703b3d 2180 long expires;
9e762a4a 2181 u32 table;
1da177e4
LT
2182
2183 if (prefix) { /* user wants prefix routes only */
2184 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
2185 /* success since this is not a prefix route */
2186 return 1;
2187 }
2188 }
2189
2d7202bf
TG
2190 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*rtm), flags);
2191 if (nlh == NULL)
26932566 2192 return -EMSGSIZE;
2d7202bf
TG
2193
2194 rtm = nlmsg_data(nlh);
1da177e4
LT
2195 rtm->rtm_family = AF_INET6;
2196 rtm->rtm_dst_len = rt->rt6i_dst.plen;
2197 rtm->rtm_src_len = rt->rt6i_src.plen;
2198 rtm->rtm_tos = 0;
c71099ac 2199 if (rt->rt6i_table)
9e762a4a 2200 table = rt->rt6i_table->tb6_id;
c71099ac 2201 else
9e762a4a
PM
2202 table = RT6_TABLE_UNSPEC;
2203 rtm->rtm_table = table;
2d7202bf 2204 NLA_PUT_U32(skb, RTA_TABLE, table);
1da177e4
LT
2205 if (rt->rt6i_flags&RTF_REJECT)
2206 rtm->rtm_type = RTN_UNREACHABLE;
2207 else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK))
2208 rtm->rtm_type = RTN_LOCAL;
2209 else
2210 rtm->rtm_type = RTN_UNICAST;
2211 rtm->rtm_flags = 0;
2212 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2213 rtm->rtm_protocol = rt->rt6i_protocol;
2214 if (rt->rt6i_flags&RTF_DYNAMIC)
2215 rtm->rtm_protocol = RTPROT_REDIRECT;
2216 else if (rt->rt6i_flags & RTF_ADDRCONF)
2217 rtm->rtm_protocol = RTPROT_KERNEL;
2218 else if (rt->rt6i_flags&RTF_DEFAULT)
2219 rtm->rtm_protocol = RTPROT_RA;
2220
2221 if (rt->rt6i_flags&RTF_CACHE)
2222 rtm->rtm_flags |= RTM_F_CLONED;
2223
2224 if (dst) {
2d7202bf 2225 NLA_PUT(skb, RTA_DST, 16, dst);
1ab1457c 2226 rtm->rtm_dst_len = 128;
1da177e4 2227 } else if (rtm->rtm_dst_len)
2d7202bf 2228 NLA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr);
1da177e4
LT
2229#ifdef CONFIG_IPV6_SUBTREES
2230 if (src) {
2d7202bf 2231 NLA_PUT(skb, RTA_SRC, 16, src);
1ab1457c 2232 rtm->rtm_src_len = 128;
1da177e4 2233 } else if (rtm->rtm_src_len)
2d7202bf 2234 NLA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr);
1da177e4 2235#endif
7bc570c8
YH
2236 if (iif) {
2237#ifdef CONFIG_IPV6_MROUTE
2238 if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
8229efda 2239 int err = ip6mr_get_route(net, skb, rtm, nowait);
7bc570c8
YH
2240 if (err <= 0) {
2241 if (!nowait) {
2242 if (err == 0)
2243 return 0;
2244 goto nla_put_failure;
2245 } else {
2246 if (err == -EMSGSIZE)
2247 goto nla_put_failure;
2248 }
2249 }
2250 } else
2251#endif
2252 NLA_PUT_U32(skb, RTA_IIF, iif);
2253 } else if (dst) {
5e0115e5 2254 struct inet6_dev *idev = ip6_dst_idev(&rt->u.dst);
1da177e4 2255 struct in6_addr saddr_buf;
191cd582 2256 if (ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
7cbca67c 2257 dst, 0, &saddr_buf) == 0)
2d7202bf 2258 NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
1da177e4 2259 }
2d7202bf 2260
1da177e4 2261 if (rtnetlink_put_metrics(skb, rt->u.dst.metrics) < 0)
2d7202bf
TG
2262 goto nla_put_failure;
2263
1da177e4 2264 if (rt->u.dst.neighbour)
2d7202bf
TG
2265 NLA_PUT(skb, RTA_GATEWAY, 16, &rt->u.dst.neighbour->primary_key);
2266
1da177e4 2267 if (rt->u.dst.dev)
2d7202bf
TG
2268 NLA_PUT_U32(skb, RTA_OIF, rt->rt6i_dev->ifindex);
2269
2270 NLA_PUT_U32(skb, RTA_PRIORITY, rt->rt6i_metric);
e3703b3d 2271
36e3deae
YH
2272 if (!(rt->rt6i_flags & RTF_EXPIRES))
2273 expires = 0;
2274 else if (rt->rt6i_expires - jiffies < INT_MAX)
2275 expires = rt->rt6i_expires - jiffies;
2276 else
2277 expires = INT_MAX;
69cdf8f9 2278
e3703b3d
TG
2279 if (rtnl_put_cacheinfo(skb, &rt->u.dst, 0, 0, 0,
2280 expires, rt->u.dst.error) < 0)
2281 goto nla_put_failure;
2d7202bf
TG
2282
2283 return nlmsg_end(skb, nlh);
2284
2285nla_put_failure:
26932566
PM
2286 nlmsg_cancel(skb, nlh);
2287 return -EMSGSIZE;
1da177e4
LT
2288}
2289
1b43af54 2290int rt6_dump_route(struct rt6_info *rt, void *p_arg)
1da177e4
LT
2291{
2292 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
2293 int prefix;
2294
2d7202bf
TG
2295 if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
2296 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
1da177e4
LT
2297 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
2298 } else
2299 prefix = 0;
2300
191cd582
BH
2301 return rt6_fill_node(arg->net,
2302 arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
1da177e4 2303 NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
7bc570c8 2304 prefix, 0, NLM_F_MULTI);
1da177e4
LT
2305}
2306
c127ea2c 2307static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
1da177e4 2308{
3b1e0a65 2309 struct net *net = sock_net(in_skb->sk);
ab364a6f
TG
2310 struct nlattr *tb[RTA_MAX+1];
2311 struct rt6_info *rt;
1da177e4 2312 struct sk_buff *skb;
ab364a6f 2313 struct rtmsg *rtm;
1da177e4 2314 struct flowi fl;
ab364a6f 2315 int err, iif = 0;
1da177e4 2316
ab364a6f
TG
2317 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2318 if (err < 0)
2319 goto errout;
1da177e4 2320
ab364a6f 2321 err = -EINVAL;
1da177e4 2322 memset(&fl, 0, sizeof(fl));
1da177e4 2323
ab364a6f
TG
2324 if (tb[RTA_SRC]) {
2325 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2326 goto errout;
2327
2328 ipv6_addr_copy(&fl.fl6_src, nla_data(tb[RTA_SRC]));
2329 }
2330
2331 if (tb[RTA_DST]) {
2332 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2333 goto errout;
2334
2335 ipv6_addr_copy(&fl.fl6_dst, nla_data(tb[RTA_DST]));
2336 }
2337
2338 if (tb[RTA_IIF])
2339 iif = nla_get_u32(tb[RTA_IIF]);
2340
2341 if (tb[RTA_OIF])
2342 fl.oif = nla_get_u32(tb[RTA_OIF]);
1da177e4
LT
2343
2344 if (iif) {
2345 struct net_device *dev;
5578689a 2346 dev = __dev_get_by_index(net, iif);
1da177e4
LT
2347 if (!dev) {
2348 err = -ENODEV;
ab364a6f 2349 goto errout;
1da177e4
LT
2350 }
2351 }
2352
ab364a6f
TG
2353 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
2354 if (skb == NULL) {
2355 err = -ENOBUFS;
2356 goto errout;
2357 }
1da177e4 2358
ab364a6f
TG
2359 /* Reserve room for dummy headers, this skb can pass
2360 through good chunk of routing engine.
2361 */
459a98ed 2362 skb_reset_mac_header(skb);
ab364a6f 2363 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
1da177e4 2364
8a3edd80 2365 rt = (struct rt6_info*) ip6_route_output(net, NULL, &fl);
1da177e4
LT
2366 skb->dst = &rt->u.dst;
2367
191cd582 2368 err = rt6_fill_node(net, skb, rt, &fl.fl6_dst, &fl.fl6_src, iif,
1da177e4 2369 RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
7bc570c8 2370 nlh->nlmsg_seq, 0, 0, 0);
1da177e4 2371 if (err < 0) {
ab364a6f
TG
2372 kfree_skb(skb);
2373 goto errout;
1da177e4
LT
2374 }
2375
5578689a 2376 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).pid);
ab364a6f 2377errout:
1da177e4 2378 return err;
1da177e4
LT
2379}
2380
86872cb5 2381void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
1da177e4
LT
2382{
2383 struct sk_buff *skb;
5578689a 2384 struct net *net = info->nl_net;
528c4ceb
DL
2385 u32 seq;
2386 int err;
2387
2388 err = -ENOBUFS;
2389 seq = info->nlh != NULL ? info->nlh->nlmsg_seq : 0;
86872cb5 2390
339bf98f 2391 skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
21713ebc
TG
2392 if (skb == NULL)
2393 goto errout;
2394
191cd582 2395 err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
7bc570c8 2396 event, info->pid, seq, 0, 0, 0);
26932566
PM
2397 if (err < 0) {
2398 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
2399 WARN_ON(err == -EMSGSIZE);
2400 kfree_skb(skb);
2401 goto errout;
2402 }
5578689a
DL
2403 err = rtnl_notify(skb, net, info->pid, RTNLGRP_IPV6_ROUTE,
2404 info->nlh, gfp_any());
21713ebc
TG
2405errout:
2406 if (err < 0)
5578689a 2407 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
1da177e4
LT
2408}
2409
8ed67789
DL
2410static int ip6_route_dev_notify(struct notifier_block *this,
2411 unsigned long event, void *data)
2412{
2413 struct net_device *dev = (struct net_device *)data;
c346dca1 2414 struct net *net = dev_net(dev);
8ed67789
DL
2415
2416 if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
2417 net->ipv6.ip6_null_entry->u.dst.dev = dev;
2418 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
2419#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2420 net->ipv6.ip6_prohibit_entry->u.dst.dev = dev;
2421 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
2422 net->ipv6.ip6_blk_hole_entry->u.dst.dev = dev;
2423 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
2424#endif
2425 }
2426
2427 return NOTIFY_OK;
2428}
2429
1da177e4
LT
2430/*
2431 * /proc
2432 */
2433
2434#ifdef CONFIG_PROC_FS
2435
2436#define RT6_INFO_LEN (32 + 4 + 32 + 4 + 32 + 40 + 5 + 1)
2437
2438struct rt6_proc_arg
2439{
2440 char *buffer;
2441 int offset;
2442 int length;
2443 int skip;
2444 int len;
2445};
2446
2447static int rt6_info_route(struct rt6_info *rt, void *p_arg)
2448{
33120b30 2449 struct seq_file *m = p_arg;
1da177e4 2450
4b7a4274 2451 seq_printf(m, "%pi6 %02x ", &rt->rt6i_dst.addr, rt->rt6i_dst.plen);
1da177e4
LT
2452
2453#ifdef CONFIG_IPV6_SUBTREES
4b7a4274 2454 seq_printf(m, "%pi6 %02x ", &rt->rt6i_src.addr, rt->rt6i_src.plen);
1da177e4 2455#else
33120b30 2456 seq_puts(m, "00000000000000000000000000000000 00 ");
1da177e4
LT
2457#endif
2458
2459 if (rt->rt6i_nexthop) {
4b7a4274 2460 seq_printf(m, "%pi6", rt->rt6i_nexthop->primary_key);
1da177e4 2461 } else {
33120b30 2462 seq_puts(m, "00000000000000000000000000000000");
1da177e4 2463 }
33120b30
AD
2464 seq_printf(m, " %08x %08x %08x %08x %8s\n",
2465 rt->rt6i_metric, atomic_read(&rt->u.dst.__refcnt),
2466 rt->u.dst.__use, rt->rt6i_flags,
2467 rt->rt6i_dev ? rt->rt6i_dev->name : "");
1da177e4
LT
2468 return 0;
2469}
2470
33120b30 2471static int ipv6_route_show(struct seq_file *m, void *v)
1da177e4 2472{
f3db4851
DL
2473 struct net *net = (struct net *)m->private;
2474 fib6_clean_all(net, rt6_info_route, 0, m);
33120b30
AD
2475 return 0;
2476}
1da177e4 2477
33120b30
AD
2478static int ipv6_route_open(struct inode *inode, struct file *file)
2479{
de05c557 2480 return single_open_net(inode, file, ipv6_route_show);
f3db4851
DL
2481}
2482
33120b30
AD
2483static const struct file_operations ipv6_route_proc_fops = {
2484 .owner = THIS_MODULE,
2485 .open = ipv6_route_open,
2486 .read = seq_read,
2487 .llseek = seq_lseek,
b6fcbdb4 2488 .release = single_release_net,
33120b30
AD
2489};
2490
1da177e4
LT
2491static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2492{
69ddb805 2493 struct net *net = (struct net *)seq->private;
1da177e4 2494 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
69ddb805
DL
2495 net->ipv6.rt6_stats->fib_nodes,
2496 net->ipv6.rt6_stats->fib_route_nodes,
2497 net->ipv6.rt6_stats->fib_rt_alloc,
2498 net->ipv6.rt6_stats->fib_rt_entries,
2499 net->ipv6.rt6_stats->fib_rt_cache,
f2fc6a54 2500 atomic_read(&net->ipv6.ip6_dst_ops->entries),
69ddb805 2501 net->ipv6.rt6_stats->fib_discarded_routes);
1da177e4
LT
2502
2503 return 0;
2504}
2505
2506static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2507{
de05c557 2508 return single_open_net(inode, file, rt6_stats_seq_show);
69ddb805
DL
2509}
2510
9a32144e 2511static const struct file_operations rt6_stats_seq_fops = {
1da177e4
LT
2512 .owner = THIS_MODULE,
2513 .open = rt6_stats_seq_open,
2514 .read = seq_read,
2515 .llseek = seq_lseek,
b6fcbdb4 2516 .release = single_release_net,
1da177e4
LT
2517};
2518#endif /* CONFIG_PROC_FS */
2519
2520#ifdef CONFIG_SYSCTL
2521
1da177e4
LT
2522static
2523int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write, struct file * filp,
2524 void __user *buffer, size_t *lenp, loff_t *ppos)
2525{
5b7c931d
DL
2526 struct net *net = current->nsproxy->net_ns;
2527 int delay = net->ipv6.sysctl.flush_delay;
1da177e4
LT
2528 if (write) {
2529 proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
5b7c931d 2530 fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay, net);
1da177e4
LT
2531 return 0;
2532 } else
2533 return -EINVAL;
2534}
2535
760f2d01 2536ctl_table ipv6_route_table_template[] = {
1ab1457c 2537 {
1da177e4 2538 .procname = "flush",
4990509f 2539 .data = &init_net.ipv6.sysctl.flush_delay,
1da177e4 2540 .maxlen = sizeof(int),
89c8b3a1 2541 .mode = 0200,
6d9f239a 2542 .proc_handler = ipv6_sysctl_rtcache_flush
1da177e4
LT
2543 },
2544 {
2545 .ctl_name = NET_IPV6_ROUTE_GC_THRESH,
2546 .procname = "gc_thresh",
9a7ec3a9 2547 .data = &ip6_dst_ops_template.gc_thresh,
1da177e4
LT
2548 .maxlen = sizeof(int),
2549 .mode = 0644,
6d9f239a 2550 .proc_handler = proc_dointvec,
1da177e4
LT
2551 },
2552 {
2553 .ctl_name = NET_IPV6_ROUTE_MAX_SIZE,
2554 .procname = "max_size",
4990509f 2555 .data = &init_net.ipv6.sysctl.ip6_rt_max_size,
1da177e4
LT
2556 .maxlen = sizeof(int),
2557 .mode = 0644,
6d9f239a 2558 .proc_handler = proc_dointvec,
1da177e4
LT
2559 },
2560 {
2561 .ctl_name = NET_IPV6_ROUTE_GC_MIN_INTERVAL,
2562 .procname = "gc_min_interval",
4990509f 2563 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
1da177e4
LT
2564 .maxlen = sizeof(int),
2565 .mode = 0644,
6d9f239a
AD
2566 .proc_handler = proc_dointvec_jiffies,
2567 .strategy = sysctl_jiffies,
1da177e4
LT
2568 },
2569 {
2570 .ctl_name = NET_IPV6_ROUTE_GC_TIMEOUT,
2571 .procname = "gc_timeout",
4990509f 2572 .data = &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
1da177e4
LT
2573 .maxlen = sizeof(int),
2574 .mode = 0644,
6d9f239a
AD
2575 .proc_handler = proc_dointvec_jiffies,
2576 .strategy = sysctl_jiffies,
1da177e4
LT
2577 },
2578 {
2579 .ctl_name = NET_IPV6_ROUTE_GC_INTERVAL,
2580 .procname = "gc_interval",
4990509f 2581 .data = &init_net.ipv6.sysctl.ip6_rt_gc_interval,
1da177e4
LT
2582 .maxlen = sizeof(int),
2583 .mode = 0644,
6d9f239a
AD
2584 .proc_handler = proc_dointvec_jiffies,
2585 .strategy = sysctl_jiffies,
1da177e4
LT
2586 },
2587 {
2588 .ctl_name = NET_IPV6_ROUTE_GC_ELASTICITY,
2589 .procname = "gc_elasticity",
4990509f 2590 .data = &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
1da177e4
LT
2591 .maxlen = sizeof(int),
2592 .mode = 0644,
6d9f239a
AD
2593 .proc_handler = proc_dointvec_jiffies,
2594 .strategy = sysctl_jiffies,
1da177e4
LT
2595 },
2596 {
2597 .ctl_name = NET_IPV6_ROUTE_MTU_EXPIRES,
2598 .procname = "mtu_expires",
4990509f 2599 .data = &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
1da177e4
LT
2600 .maxlen = sizeof(int),
2601 .mode = 0644,
6d9f239a
AD
2602 .proc_handler = proc_dointvec_jiffies,
2603 .strategy = sysctl_jiffies,
1da177e4
LT
2604 },
2605 {
2606 .ctl_name = NET_IPV6_ROUTE_MIN_ADVMSS,
2607 .procname = "min_adv_mss",
4990509f 2608 .data = &init_net.ipv6.sysctl.ip6_rt_min_advmss,
1da177e4
LT
2609 .maxlen = sizeof(int),
2610 .mode = 0644,
6d9f239a
AD
2611 .proc_handler = proc_dointvec_jiffies,
2612 .strategy = sysctl_jiffies,
1da177e4
LT
2613 },
2614 {
2615 .ctl_name = NET_IPV6_ROUTE_GC_MIN_INTERVAL_MS,
2616 .procname = "gc_min_interval_ms",
4990509f 2617 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
1da177e4
LT
2618 .maxlen = sizeof(int),
2619 .mode = 0644,
6d9f239a
AD
2620 .proc_handler = proc_dointvec_ms_jiffies,
2621 .strategy = sysctl_ms_jiffies,
1da177e4
LT
2622 },
2623 { .ctl_name = 0 }
2624};
2625
760f2d01
DL
2626struct ctl_table *ipv6_route_sysctl_init(struct net *net)
2627{
2628 struct ctl_table *table;
2629
2630 table = kmemdup(ipv6_route_table_template,
2631 sizeof(ipv6_route_table_template),
2632 GFP_KERNEL);
5ee09105
YH
2633
2634 if (table) {
2635 table[0].data = &net->ipv6.sysctl.flush_delay;
f2fc6a54 2636 table[1].data = &net->ipv6.ip6_dst_ops->gc_thresh;
5ee09105
YH
2637 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
2638 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2639 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
2640 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
2641 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
2642 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
2643 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
2644 }
2645
760f2d01
DL
2646 return table;
2647}
1da177e4
LT
2648#endif
2649
cdb18761
DL
2650static int ip6_route_net_init(struct net *net)
2651{
633d424b 2652 int ret = -ENOMEM;
8ed67789 2653
f2fc6a54
BT
2654 net->ipv6.ip6_dst_ops = kmemdup(&ip6_dst_ops_template,
2655 sizeof(*net->ipv6.ip6_dst_ops),
2656 GFP_KERNEL);
2657 if (!net->ipv6.ip6_dst_ops)
2658 goto out;
48115bec 2659 net->ipv6.ip6_dst_ops->dst_net = hold_net(net);
f2fc6a54 2660
8ed67789
DL
2661 net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
2662 sizeof(*net->ipv6.ip6_null_entry),
2663 GFP_KERNEL);
2664 if (!net->ipv6.ip6_null_entry)
f2fc6a54 2665 goto out_ip6_dst_ops;
8ed67789
DL
2666 net->ipv6.ip6_null_entry->u.dst.path =
2667 (struct dst_entry *)net->ipv6.ip6_null_entry;
f2fc6a54 2668 net->ipv6.ip6_null_entry->u.dst.ops = net->ipv6.ip6_dst_ops;
8ed67789
DL
2669
2670#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2671 net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
2672 sizeof(*net->ipv6.ip6_prohibit_entry),
2673 GFP_KERNEL);
68fffc67
PZ
2674 if (!net->ipv6.ip6_prohibit_entry)
2675 goto out_ip6_null_entry;
8ed67789
DL
2676 net->ipv6.ip6_prohibit_entry->u.dst.path =
2677 (struct dst_entry *)net->ipv6.ip6_prohibit_entry;
f2fc6a54 2678 net->ipv6.ip6_prohibit_entry->u.dst.ops = net->ipv6.ip6_dst_ops;
8ed67789
DL
2679
2680 net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
2681 sizeof(*net->ipv6.ip6_blk_hole_entry),
2682 GFP_KERNEL);
68fffc67
PZ
2683 if (!net->ipv6.ip6_blk_hole_entry)
2684 goto out_ip6_prohibit_entry;
8ed67789
DL
2685 net->ipv6.ip6_blk_hole_entry->u.dst.path =
2686 (struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
f2fc6a54 2687 net->ipv6.ip6_blk_hole_entry->u.dst.ops = net->ipv6.ip6_dst_ops;
8ed67789
DL
2688#endif
2689
b339a47c
PZ
2690 net->ipv6.sysctl.flush_delay = 0;
2691 net->ipv6.sysctl.ip6_rt_max_size = 4096;
2692 net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
2693 net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
2694 net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
2695 net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
2696 net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
2697 net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
2698
cdb18761
DL
2699#ifdef CONFIG_PROC_FS
2700 proc_net_fops_create(net, "ipv6_route", 0, &ipv6_route_proc_fops);
2701 proc_net_fops_create(net, "rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2702#endif
6891a346
BT
2703 net->ipv6.ip6_rt_gc_expire = 30*HZ;
2704
8ed67789
DL
2705 ret = 0;
2706out:
2707 return ret;
f2fc6a54 2708
68fffc67
PZ
2709#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2710out_ip6_prohibit_entry:
2711 kfree(net->ipv6.ip6_prohibit_entry);
2712out_ip6_null_entry:
2713 kfree(net->ipv6.ip6_null_entry);
2714#endif
f2fc6a54 2715out_ip6_dst_ops:
48115bec 2716 release_net(net->ipv6.ip6_dst_ops->dst_net);
f2fc6a54
BT
2717 kfree(net->ipv6.ip6_dst_ops);
2718 goto out;
cdb18761
DL
2719}
2720
2721static void ip6_route_net_exit(struct net *net)
2722{
2723#ifdef CONFIG_PROC_FS
2724 proc_net_remove(net, "ipv6_route");
2725 proc_net_remove(net, "rt6_stats");
2726#endif
8ed67789
DL
2727 kfree(net->ipv6.ip6_null_entry);
2728#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2729 kfree(net->ipv6.ip6_prohibit_entry);
2730 kfree(net->ipv6.ip6_blk_hole_entry);
2731#endif
48115bec 2732 release_net(net->ipv6.ip6_dst_ops->dst_net);
f2fc6a54 2733 kfree(net->ipv6.ip6_dst_ops);
cdb18761
DL
2734}
2735
2736static struct pernet_operations ip6_route_net_ops = {
2737 .init = ip6_route_net_init,
2738 .exit = ip6_route_net_exit,
2739};
2740
8ed67789
DL
2741static struct notifier_block ip6_route_dev_notifier = {
2742 .notifier_call = ip6_route_dev_notify,
2743 .priority = 0,
2744};
2745
433d49c3 2746int __init ip6_route_init(void)
1da177e4 2747{
433d49c3
DL
2748 int ret;
2749
9a7ec3a9
DL
2750 ret = -ENOMEM;
2751 ip6_dst_ops_template.kmem_cachep =
e5d679f3 2752 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
f845ab6b 2753 SLAB_HWCACHE_ALIGN, NULL);
9a7ec3a9 2754 if (!ip6_dst_ops_template.kmem_cachep)
f2fc6a54 2755 goto out;;
14e50e57 2756
8ed67789
DL
2757 ret = register_pernet_subsys(&ip6_route_net_ops);
2758 if (ret)
bdb3289f 2759 goto out_kmem_cache;
bdb3289f 2760
5dc121e9
AE
2761 ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
2762
8ed67789
DL
2763 /* Registering of the loopback is done before this portion of code,
2764 * the loopback reference in rt6_info will not be taken, do it
2765 * manually for init_net */
2766 init_net.ipv6.ip6_null_entry->u.dst.dev = init_net.loopback_dev;
2767 init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2768 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2769 init_net.ipv6.ip6_prohibit_entry->u.dst.dev = init_net.loopback_dev;
2770 init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2771 init_net.ipv6.ip6_blk_hole_entry->u.dst.dev = init_net.loopback_dev;
2772 init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2773 #endif
433d49c3
DL
2774 ret = fib6_init();
2775 if (ret)
8ed67789 2776 goto out_register_subsys;
433d49c3 2777
433d49c3
DL
2778 ret = xfrm6_init();
2779 if (ret)
cdb18761 2780 goto out_fib6_init;
c35b7e72 2781
433d49c3
DL
2782 ret = fib6_rules_init();
2783 if (ret)
2784 goto xfrm6_init;
7e5449c2 2785
433d49c3
DL
2786 ret = -ENOBUFS;
2787 if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL) ||
2788 __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL) ||
2789 __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL))
2790 goto fib6_rules_init;
c127ea2c 2791
8ed67789 2792 ret = register_netdevice_notifier(&ip6_route_dev_notifier);
cdb18761
DL
2793 if (ret)
2794 goto fib6_rules_init;
8ed67789 2795
433d49c3
DL
2796out:
2797 return ret;
2798
2799fib6_rules_init:
433d49c3
DL
2800 fib6_rules_cleanup();
2801xfrm6_init:
433d49c3 2802 xfrm6_fini();
433d49c3 2803out_fib6_init:
433d49c3 2804 fib6_gc_cleanup();
8ed67789
DL
2805out_register_subsys:
2806 unregister_pernet_subsys(&ip6_route_net_ops);
433d49c3 2807out_kmem_cache:
f2fc6a54 2808 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
433d49c3 2809 goto out;
1da177e4
LT
2810}
2811
2812void ip6_route_cleanup(void)
2813{
8ed67789 2814 unregister_netdevice_notifier(&ip6_route_dev_notifier);
101367c2 2815 fib6_rules_cleanup();
1da177e4 2816 xfrm6_fini();
1da177e4 2817 fib6_gc_cleanup();
8ed67789 2818 unregister_pernet_subsys(&ip6_route_net_ops);
f2fc6a54 2819 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
1da177e4 2820}