]> bbs.cooldavid.org Git - net-next-2.6.git/blame - net/ipv6/route.c
[NETNS][IPV6] rt6_info - move rt6_info structure inside the namespace
[net-next-2.6.git] / net / ipv6 / route.c
CommitLineData
1da177e4
LT
1/*
2 * Linux INET6 implementation
3 * FIB front-end.
4 *
5 * Authors:
1ab1457c 6 * Pedro Roque <roque@di.fc.ul.pt>
1da177e4
LT
7 *
8 * $Id: route.c,v 1.56 2001/10/31 21:55:55 davem Exp $
9 *
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License
12 * as published by the Free Software Foundation; either version
13 * 2 of the License, or (at your option) any later version.
14 */
15
16/* Changes:
17 *
18 * YOSHIFUJI Hideaki @USAGI
19 * reworked default router selection.
20 * - respect outgoing interface
21 * - select from (probably) reachable routers (i.e.
22 * routers in REACHABLE, STALE, DELAY or PROBE states).
23 * - always select the same router if it is (probably)
24 * reachable. otherwise, round-robin the list.
c0bece9f
YH
25 * Ville Nuorvala
26 * Fixed routing subtrees.
1da177e4
LT
27 */
28
4fc268d2 29#include <linux/capability.h>
1da177e4
LT
30#include <linux/errno.h>
31#include <linux/types.h>
32#include <linux/times.h>
33#include <linux/socket.h>
34#include <linux/sockios.h>
35#include <linux/net.h>
36#include <linux/route.h>
37#include <linux/netdevice.h>
38#include <linux/in6.h>
39#include <linux/init.h>
1da177e4 40#include <linux/if_arp.h>
1da177e4
LT
41#include <linux/proc_fs.h>
42#include <linux/seq_file.h>
5b7c931d 43#include <linux/nsproxy.h>
457c4cbc 44#include <net/net_namespace.h>
1da177e4
LT
45#include <net/snmp.h>
46#include <net/ipv6.h>
47#include <net/ip6_fib.h>
48#include <net/ip6_route.h>
49#include <net/ndisc.h>
50#include <net/addrconf.h>
51#include <net/tcp.h>
52#include <linux/rtnetlink.h>
53#include <net/dst.h>
54#include <net/xfrm.h>
8d71740c 55#include <net/netevent.h>
21713ebc 56#include <net/netlink.h>
1da177e4
LT
57
58#include <asm/uaccess.h>
59
60#ifdef CONFIG_SYSCTL
61#include <linux/sysctl.h>
62#endif
63
64/* Set to 3 to get tracing. */
65#define RT6_DEBUG 2
66
67#if RT6_DEBUG >= 3
68#define RDBG(x) printk x
69#define RT6_TRACE(x...) printk(KERN_DEBUG x)
70#else
71#define RDBG(x)
72#define RT6_TRACE(x...) do { ; } while (0)
73#endif
74
519fbd87 75#define CLONE_OFFLINK_ROUTE 0
1da177e4 76
1da177e4
LT
77static struct rt6_info * ip6_rt_copy(struct rt6_info *ort);
78static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
79static struct dst_entry *ip6_negative_advice(struct dst_entry *);
80static void ip6_dst_destroy(struct dst_entry *);
81static void ip6_dst_ifdown(struct dst_entry *,
82 struct net_device *dev, int how);
569d3645 83static int ip6_dst_gc(struct dst_ops *ops);
1da177e4
LT
84
85static int ip6_pkt_discard(struct sk_buff *skb);
86static int ip6_pkt_discard_out(struct sk_buff *skb);
87static void ip6_link_failure(struct sk_buff *skb);
88static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
89
70ceb4f5 90#ifdef CONFIG_IPV6_ROUTE_INFO
efa2cea0
DL
91static struct rt6_info *rt6_add_route_info(struct net *net,
92 struct in6_addr *prefix, int prefixlen,
70ceb4f5
YH
93 struct in6_addr *gwaddr, int ifindex,
94 unsigned pref);
efa2cea0
DL
95static struct rt6_info *rt6_get_route_info(struct net *net,
96 struct in6_addr *prefix, int prefixlen,
70ceb4f5
YH
97 struct in6_addr *gwaddr, int ifindex);
98#endif
99
1da177e4
LT
100static struct dst_ops ip6_dst_ops = {
101 .family = AF_INET6,
102 .protocol = __constant_htons(ETH_P_IPV6),
103 .gc = ip6_dst_gc,
104 .gc_thresh = 1024,
105 .check = ip6_dst_check,
106 .destroy = ip6_dst_destroy,
107 .ifdown = ip6_dst_ifdown,
108 .negative_advice = ip6_negative_advice,
109 .link_failure = ip6_link_failure,
110 .update_pmtu = ip6_rt_update_pmtu,
862b82c6 111 .local_out = ip6_local_out,
1da177e4 112 .entry_size = sizeof(struct rt6_info),
e2422970 113 .entries = ATOMIC_INIT(0),
1da177e4
LT
114};
115
14e50e57
DM
116static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu)
117{
118}
119
120static struct dst_ops ip6_dst_blackhole_ops = {
121 .family = AF_INET6,
122 .protocol = __constant_htons(ETH_P_IPV6),
123 .destroy = ip6_dst_destroy,
124 .check = ip6_dst_check,
125 .update_pmtu = ip6_rt_blackhole_update_pmtu,
126 .entry_size = sizeof(struct rt6_info),
e2422970 127 .entries = ATOMIC_INIT(0),
14e50e57
DM
128};
129
bdb3289f 130static struct rt6_info ip6_null_entry_template = {
1da177e4
LT
131 .u = {
132 .dst = {
133 .__refcnt = ATOMIC_INIT(1),
134 .__use = 1,
1da177e4
LT
135 .obsolete = -1,
136 .error = -ENETUNREACH,
137 .metrics = { [RTAX_HOPLIMIT - 1] = 255, },
138 .input = ip6_pkt_discard,
139 .output = ip6_pkt_discard_out,
140 .ops = &ip6_dst_ops,
1da177e4
LT
141 }
142 },
143 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
144 .rt6i_metric = ~(u32) 0,
145 .rt6i_ref = ATOMIC_INIT(1),
146};
147
101367c2
TG
148#ifdef CONFIG_IPV6_MULTIPLE_TABLES
149
6723ab54
DM
150static int ip6_pkt_prohibit(struct sk_buff *skb);
151static int ip6_pkt_prohibit_out(struct sk_buff *skb);
6723ab54 152
bdb3289f 153struct rt6_info ip6_prohibit_entry_template = {
101367c2
TG
154 .u = {
155 .dst = {
156 .__refcnt = ATOMIC_INIT(1),
157 .__use = 1,
101367c2
TG
158 .obsolete = -1,
159 .error = -EACCES,
160 .metrics = { [RTAX_HOPLIMIT - 1] = 255, },
9ce8ade0
TG
161 .input = ip6_pkt_prohibit,
162 .output = ip6_pkt_prohibit_out,
101367c2 163 .ops = &ip6_dst_ops,
101367c2
TG
164 }
165 },
166 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
167 .rt6i_metric = ~(u32) 0,
168 .rt6i_ref = ATOMIC_INIT(1),
169};
170
bdb3289f 171static struct rt6_info ip6_blk_hole_entry_template = {
101367c2
TG
172 .u = {
173 .dst = {
174 .__refcnt = ATOMIC_INIT(1),
175 .__use = 1,
101367c2
TG
176 .obsolete = -1,
177 .error = -EINVAL,
178 .metrics = { [RTAX_HOPLIMIT - 1] = 255, },
352e512c
HX
179 .input = dst_discard,
180 .output = dst_discard,
101367c2 181 .ops = &ip6_dst_ops,
101367c2
TG
182 }
183 },
184 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
185 .rt6i_metric = ~(u32) 0,
186 .rt6i_ref = ATOMIC_INIT(1),
187};
188
189#endif
190
1da177e4
LT
191/* allocate dst with ip6_dst_ops */
192static __inline__ struct rt6_info *ip6_dst_alloc(void)
193{
194 return (struct rt6_info *)dst_alloc(&ip6_dst_ops);
195}
196
197static void ip6_dst_destroy(struct dst_entry *dst)
198{
199 struct rt6_info *rt = (struct rt6_info *)dst;
200 struct inet6_dev *idev = rt->rt6i_idev;
201
202 if (idev != NULL) {
203 rt->rt6i_idev = NULL;
204 in6_dev_put(idev);
1ab1457c 205 }
1da177e4
LT
206}
207
208static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
209 int how)
210{
211 struct rt6_info *rt = (struct rt6_info *)dst;
212 struct inet6_dev *idev = rt->rt6i_idev;
5a3e55d6
DL
213 struct net_device *loopback_dev =
214 dev->nd_net->loopback_dev;
1da177e4 215
5a3e55d6
DL
216 if (dev != loopback_dev && idev != NULL && idev->dev == dev) {
217 struct inet6_dev *loopback_idev =
218 in6_dev_get(loopback_dev);
1da177e4
LT
219 if (loopback_idev != NULL) {
220 rt->rt6i_idev = loopback_idev;
221 in6_dev_put(idev);
222 }
223 }
224}
225
226static __inline__ int rt6_check_expired(const struct rt6_info *rt)
227{
228 return (rt->rt6i_flags & RTF_EXPIRES &&
229 time_after(jiffies, rt->rt6i_expires));
230}
231
c71099ac
TG
232static inline int rt6_need_strict(struct in6_addr *daddr)
233{
234 return (ipv6_addr_type(daddr) &
235 (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL));
236}
237
1da177e4 238/*
c71099ac 239 * Route lookup. Any table->tb6_lock is implied.
1da177e4
LT
240 */
241
8ed67789
DL
242static inline struct rt6_info *rt6_device_match(struct net *net,
243 struct rt6_info *rt,
1da177e4
LT
244 int oif,
245 int strict)
246{
247 struct rt6_info *local = NULL;
248 struct rt6_info *sprt;
249
250 if (oif) {
7cc48263 251 for (sprt = rt; sprt; sprt = sprt->u.dst.rt6_next) {
1da177e4
LT
252 struct net_device *dev = sprt->rt6i_dev;
253 if (dev->ifindex == oif)
254 return sprt;
255 if (dev->flags & IFF_LOOPBACK) {
256 if (sprt->rt6i_idev == NULL ||
257 sprt->rt6i_idev->dev->ifindex != oif) {
258 if (strict && oif)
259 continue;
1ab1457c 260 if (local && (!oif ||
1da177e4
LT
261 local->rt6i_idev->dev->ifindex == oif))
262 continue;
263 }
264 local = sprt;
265 }
266 }
267
268 if (local)
269 return local;
270
271 if (strict)
8ed67789 272 return net->ipv6.ip6_null_entry;
1da177e4
LT
273 }
274 return rt;
275}
276
27097255
YH
277#ifdef CONFIG_IPV6_ROUTER_PREF
278static void rt6_probe(struct rt6_info *rt)
279{
280 struct neighbour *neigh = rt ? rt->rt6i_nexthop : NULL;
281 /*
282 * Okay, this does not seem to be appropriate
283 * for now, however, we need to check if it
284 * is really so; aka Router Reachability Probing.
285 *
286 * Router Reachability Probe MUST be rate-limited
287 * to no more than one per minute.
288 */
289 if (!neigh || (neigh->nud_state & NUD_VALID))
290 return;
291 read_lock_bh(&neigh->lock);
292 if (!(neigh->nud_state & NUD_VALID) &&
52e16356 293 time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
27097255
YH
294 struct in6_addr mcaddr;
295 struct in6_addr *target;
296
297 neigh->updated = jiffies;
298 read_unlock_bh(&neigh->lock);
299
300 target = (struct in6_addr *)&neigh->primary_key;
301 addrconf_addr_solict_mult(target, &mcaddr);
302 ndisc_send_ns(rt->rt6i_dev, NULL, target, &mcaddr, NULL);
303 } else
304 read_unlock_bh(&neigh->lock);
305}
306#else
307static inline void rt6_probe(struct rt6_info *rt)
308{
309 return;
310}
311#endif
312
1da177e4 313/*
554cfb7e 314 * Default Router Selection (RFC 2461 6.3.6)
1da177e4 315 */
b6f99a21 316static inline int rt6_check_dev(struct rt6_info *rt, int oif)
554cfb7e
YH
317{
318 struct net_device *dev = rt->rt6i_dev;
161980f4 319 if (!oif || dev->ifindex == oif)
554cfb7e 320 return 2;
161980f4
DM
321 if ((dev->flags & IFF_LOOPBACK) &&
322 rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
323 return 1;
324 return 0;
554cfb7e 325}
1da177e4 326
b6f99a21 327static inline int rt6_check_neigh(struct rt6_info *rt)
1da177e4 328{
554cfb7e 329 struct neighbour *neigh = rt->rt6i_nexthop;
398bcbeb 330 int m;
4d0c5911
YH
331 if (rt->rt6i_flags & RTF_NONEXTHOP ||
332 !(rt->rt6i_flags & RTF_GATEWAY))
333 m = 1;
334 else if (neigh) {
554cfb7e
YH
335 read_lock_bh(&neigh->lock);
336 if (neigh->nud_state & NUD_VALID)
4d0c5911 337 m = 2;
398bcbeb
YH
338#ifdef CONFIG_IPV6_ROUTER_PREF
339 else if (neigh->nud_state & NUD_FAILED)
340 m = 0;
341#endif
342 else
ea73ee23 343 m = 1;
554cfb7e 344 read_unlock_bh(&neigh->lock);
398bcbeb
YH
345 } else
346 m = 0;
554cfb7e 347 return m;
1da177e4
LT
348}
349
554cfb7e
YH
350static int rt6_score_route(struct rt6_info *rt, int oif,
351 int strict)
1da177e4 352{
4d0c5911 353 int m, n;
1ab1457c 354
4d0c5911 355 m = rt6_check_dev(rt, oif);
77d16f45 356 if (!m && (strict & RT6_LOOKUP_F_IFACE))
554cfb7e 357 return -1;
ebacaaa0
YH
358#ifdef CONFIG_IPV6_ROUTER_PREF
359 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
360#endif
4d0c5911 361 n = rt6_check_neigh(rt);
557e92ef 362 if (!n && (strict & RT6_LOOKUP_F_REACHABLE))
554cfb7e
YH
363 return -1;
364 return m;
365}
366
f11e6659
DM
367static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
368 int *mpri, struct rt6_info *match)
554cfb7e 369{
f11e6659
DM
370 int m;
371
372 if (rt6_check_expired(rt))
373 goto out;
374
375 m = rt6_score_route(rt, oif, strict);
376 if (m < 0)
377 goto out;
378
379 if (m > *mpri) {
380 if (strict & RT6_LOOKUP_F_REACHABLE)
381 rt6_probe(match);
382 *mpri = m;
383 match = rt;
384 } else if (strict & RT6_LOOKUP_F_REACHABLE) {
385 rt6_probe(rt);
386 }
387
388out:
389 return match;
390}
391
392static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
393 struct rt6_info *rr_head,
394 u32 metric, int oif, int strict)
395{
396 struct rt6_info *rt, *match;
554cfb7e 397 int mpri = -1;
1da177e4 398
f11e6659
DM
399 match = NULL;
400 for (rt = rr_head; rt && rt->rt6i_metric == metric;
401 rt = rt->u.dst.rt6_next)
402 match = find_match(rt, oif, strict, &mpri, match);
403 for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric;
404 rt = rt->u.dst.rt6_next)
405 match = find_match(rt, oif, strict, &mpri, match);
1da177e4 406
f11e6659
DM
407 return match;
408}
1da177e4 409
f11e6659
DM
410static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
411{
412 struct rt6_info *match, *rt0;
8ed67789 413 struct net *net;
1da177e4 414
f11e6659
DM
415 RT6_TRACE("%s(fn->leaf=%p, oif=%d)\n",
416 __FUNCTION__, fn->leaf, oif);
554cfb7e 417
f11e6659
DM
418 rt0 = fn->rr_ptr;
419 if (!rt0)
420 fn->rr_ptr = rt0 = fn->leaf;
1da177e4 421
f11e6659 422 match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict);
1da177e4 423
554cfb7e 424 if (!match &&
f11e6659
DM
425 (strict & RT6_LOOKUP_F_REACHABLE)) {
426 struct rt6_info *next = rt0->u.dst.rt6_next;
427
554cfb7e 428 /* no entries matched; do round-robin */
f11e6659
DM
429 if (!next || next->rt6i_metric != rt0->rt6i_metric)
430 next = fn->leaf;
431
432 if (next != rt0)
433 fn->rr_ptr = next;
1da177e4 434 }
1da177e4 435
f11e6659
DM
436 RT6_TRACE("%s() => %p\n",
437 __FUNCTION__, match);
1da177e4 438
8ed67789
DL
439 net = rt0->rt6i_dev->nd_net;
440 return (match ? match : net->ipv6.ip6_null_entry);
1da177e4
LT
441}
442
70ceb4f5
YH
443#ifdef CONFIG_IPV6_ROUTE_INFO
444int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
445 struct in6_addr *gwaddr)
446{
efa2cea0 447 struct net *net = dev->nd_net;
70ceb4f5
YH
448 struct route_info *rinfo = (struct route_info *) opt;
449 struct in6_addr prefix_buf, *prefix;
450 unsigned int pref;
451 u32 lifetime;
452 struct rt6_info *rt;
453
454 if (len < sizeof(struct route_info)) {
455 return -EINVAL;
456 }
457
458 /* Sanity check for prefix_len and length */
459 if (rinfo->length > 3) {
460 return -EINVAL;
461 } else if (rinfo->prefix_len > 128) {
462 return -EINVAL;
463 } else if (rinfo->prefix_len > 64) {
464 if (rinfo->length < 2) {
465 return -EINVAL;
466 }
467 } else if (rinfo->prefix_len > 0) {
468 if (rinfo->length < 1) {
469 return -EINVAL;
470 }
471 }
472
473 pref = rinfo->route_pref;
474 if (pref == ICMPV6_ROUTER_PREF_INVALID)
475 pref = ICMPV6_ROUTER_PREF_MEDIUM;
476
e69a4adc 477 lifetime = ntohl(rinfo->lifetime);
70ceb4f5
YH
478 if (lifetime == 0xffffffff) {
479 /* infinity */
480 } else if (lifetime > 0x7fffffff/HZ) {
481 /* Avoid arithmetic overflow */
482 lifetime = 0x7fffffff/HZ - 1;
483 }
484
485 if (rinfo->length == 3)
486 prefix = (struct in6_addr *)rinfo->prefix;
487 else {
488 /* this function is safe */
489 ipv6_addr_prefix(&prefix_buf,
490 (struct in6_addr *)rinfo->prefix,
491 rinfo->prefix_len);
492 prefix = &prefix_buf;
493 }
494
efa2cea0
DL
495 rt = rt6_get_route_info(net, prefix, rinfo->prefix_len, gwaddr,
496 dev->ifindex);
70ceb4f5
YH
497
498 if (rt && !lifetime) {
e0a1ad73 499 ip6_del_rt(rt);
70ceb4f5
YH
500 rt = NULL;
501 }
502
503 if (!rt && lifetime)
efa2cea0 504 rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
70ceb4f5
YH
505 pref);
506 else if (rt)
507 rt->rt6i_flags = RTF_ROUTEINFO |
508 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
509
510 if (rt) {
511 if (lifetime == 0xffffffff) {
512 rt->rt6i_flags &= ~RTF_EXPIRES;
513 } else {
514 rt->rt6i_expires = jiffies + HZ * lifetime;
515 rt->rt6i_flags |= RTF_EXPIRES;
516 }
517 dst_release(&rt->u.dst);
518 }
519 return 0;
520}
521#endif
522
8ed67789 523#define BACKTRACK(__net, saddr) \
982f56f3 524do { \
8ed67789 525 if (rt == __net->ipv6.ip6_null_entry) { \
982f56f3 526 struct fib6_node *pn; \
e0eda7bb 527 while (1) { \
982f56f3
YH
528 if (fn->fn_flags & RTN_TL_ROOT) \
529 goto out; \
530 pn = fn->parent; \
531 if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
8bce65b9 532 fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \
982f56f3
YH
533 else \
534 fn = pn; \
535 if (fn->fn_flags & RTN_RTINFO) \
536 goto restart; \
c71099ac 537 } \
c71099ac 538 } \
982f56f3 539} while(0)
c71099ac 540
8ed67789
DL
541static struct rt6_info *ip6_pol_route_lookup(struct net *net,
542 struct fib6_table *table,
c71099ac 543 struct flowi *fl, int flags)
1da177e4
LT
544{
545 struct fib6_node *fn;
546 struct rt6_info *rt;
547
c71099ac
TG
548 read_lock_bh(&table->tb6_lock);
549 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
550restart:
551 rt = fn->leaf;
8ed67789
DL
552 rt = rt6_device_match(net, rt, fl->oif, flags);
553 BACKTRACK(net, &fl->fl6_src);
c71099ac 554out:
03f49f34 555 dst_use(&rt->u.dst, jiffies);
c71099ac 556 read_unlock_bh(&table->tb6_lock);
c71099ac
TG
557 return rt;
558
559}
560
606a2b48
DL
561struct rt6_info *rt6_lookup(struct net *net, struct in6_addr *daddr,
562 struct in6_addr *saddr, int oif, int strict)
c71099ac
TG
563{
564 struct flowi fl = {
565 .oif = oif,
566 .nl_u = {
567 .ip6_u = {
568 .daddr = *daddr,
c71099ac
TG
569 },
570 },
571 };
572 struct dst_entry *dst;
77d16f45 573 int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
c71099ac 574
adaa70bb
TG
575 if (saddr) {
576 memcpy(&fl.fl6_src, saddr, sizeof(*saddr));
577 flags |= RT6_LOOKUP_F_HAS_SADDR;
578 }
579
606a2b48 580 dst = fib6_rule_lookup(net, &fl, flags, ip6_pol_route_lookup);
c71099ac
TG
581 if (dst->error == 0)
582 return (struct rt6_info *) dst;
583
584 dst_release(dst);
585
1da177e4
LT
586 return NULL;
587}
588
7159039a
YH
589EXPORT_SYMBOL(rt6_lookup);
590
c71099ac 591/* ip6_ins_rt is called with FREE table->tb6_lock.
1da177e4
LT
592 It takes new route entry, the addition fails by any reason the
593 route is freed. In any case, if caller does not hold it, it may
594 be destroyed.
595 */
596
86872cb5 597static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
1da177e4
LT
598{
599 int err;
c71099ac 600 struct fib6_table *table;
1da177e4 601
c71099ac
TG
602 table = rt->rt6i_table;
603 write_lock_bh(&table->tb6_lock);
86872cb5 604 err = fib6_add(&table->tb6_root, rt, info);
c71099ac 605 write_unlock_bh(&table->tb6_lock);
1da177e4
LT
606
607 return err;
608}
609
40e22e8f
TG
610int ip6_ins_rt(struct rt6_info *rt)
611{
4d1169c1 612 struct nl_info info = {
5578689a 613 .nl_net = rt->rt6i_dev->nd_net,
4d1169c1 614 };
528c4ceb 615 return __ip6_ins_rt(rt, &info);
40e22e8f
TG
616}
617
95a9a5ba
YH
618static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, struct in6_addr *daddr,
619 struct in6_addr *saddr)
1da177e4 620{
1da177e4
LT
621 struct rt6_info *rt;
622
623 /*
624 * Clone the route.
625 */
626
627 rt = ip6_rt_copy(ort);
628
629 if (rt) {
58c4fb86
YH
630 if (!(rt->rt6i_flags&RTF_GATEWAY)) {
631 if (rt->rt6i_dst.plen != 128 &&
632 ipv6_addr_equal(&rt->rt6i_dst.addr, daddr))
633 rt->rt6i_flags |= RTF_ANYCAST;
1da177e4 634 ipv6_addr_copy(&rt->rt6i_gateway, daddr);
58c4fb86 635 }
1da177e4 636
58c4fb86 637 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
1da177e4
LT
638 rt->rt6i_dst.plen = 128;
639 rt->rt6i_flags |= RTF_CACHE;
640 rt->u.dst.flags |= DST_HOST;
641
642#ifdef CONFIG_IPV6_SUBTREES
643 if (rt->rt6i_src.plen && saddr) {
644 ipv6_addr_copy(&rt->rt6i_src.addr, saddr);
645 rt->rt6i_src.plen = 128;
646 }
647#endif
648
649 rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
650
95a9a5ba 651 }
1da177e4 652
95a9a5ba
YH
653 return rt;
654}
1da177e4 655
299d9939
YH
656static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort, struct in6_addr *daddr)
657{
658 struct rt6_info *rt = ip6_rt_copy(ort);
659 if (rt) {
660 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
661 rt->rt6i_dst.plen = 128;
662 rt->rt6i_flags |= RTF_CACHE;
299d9939
YH
663 rt->u.dst.flags |= DST_HOST;
664 rt->rt6i_nexthop = neigh_clone(ort->rt6i_nexthop);
665 }
666 return rt;
667}
668
8ed67789
DL
669static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
670 struct flowi *fl, int flags)
1da177e4
LT
671{
672 struct fib6_node *fn;
519fbd87 673 struct rt6_info *rt, *nrt;
c71099ac 674 int strict = 0;
1da177e4 675 int attempts = 3;
519fbd87 676 int err;
ea659e07 677 int reachable = ipv6_devconf.forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
1da177e4 678
77d16f45 679 strict |= flags & RT6_LOOKUP_F_IFACE;
1da177e4
LT
680
681relookup:
c71099ac 682 read_lock_bh(&table->tb6_lock);
1da177e4 683
8238dd06 684restart_2:
c71099ac 685 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
1da177e4
LT
686
687restart:
4acad72d 688 rt = rt6_select(fn, oif, strict | reachable);
8ed67789
DL
689
690 BACKTRACK(net, &fl->fl6_src);
691 if (rt == net->ipv6.ip6_null_entry ||
8238dd06 692 rt->rt6i_flags & RTF_CACHE)
1ddef044 693 goto out;
1da177e4 694
fb9de91e 695 dst_hold(&rt->u.dst);
c71099ac 696 read_unlock_bh(&table->tb6_lock);
fb9de91e 697
519fbd87 698 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
c71099ac 699 nrt = rt6_alloc_cow(rt, &fl->fl6_dst, &fl->fl6_src);
519fbd87
YH
700 else {
701#if CLONE_OFFLINK_ROUTE
c71099ac 702 nrt = rt6_alloc_clone(rt, &fl->fl6_dst);
519fbd87
YH
703#else
704 goto out2;
705#endif
706 }
e40cf353 707
519fbd87 708 dst_release(&rt->u.dst);
8ed67789 709 rt = nrt ? : net->ipv6.ip6_null_entry;
1da177e4 710
519fbd87
YH
711 dst_hold(&rt->u.dst);
712 if (nrt) {
40e22e8f 713 err = ip6_ins_rt(nrt);
519fbd87 714 if (!err)
1da177e4 715 goto out2;
1da177e4 716 }
1da177e4 717
519fbd87
YH
718 if (--attempts <= 0)
719 goto out2;
720
721 /*
c71099ac 722 * Race condition! In the gap, when table->tb6_lock was
519fbd87
YH
723 * released someone could insert this route. Relookup.
724 */
725 dst_release(&rt->u.dst);
726 goto relookup;
727
728out:
8238dd06
YH
729 if (reachable) {
730 reachable = 0;
731 goto restart_2;
732 }
519fbd87 733 dst_hold(&rt->u.dst);
c71099ac 734 read_unlock_bh(&table->tb6_lock);
1da177e4
LT
735out2:
736 rt->u.dst.lastuse = jiffies;
737 rt->u.dst.__use++;
c71099ac
TG
738
739 return rt;
1da177e4
LT
740}
741
8ed67789 742static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
4acad72d
PE
743 struct flowi *fl, int flags)
744{
8ed67789 745 return ip6_pol_route(net, table, fl->iif, fl, flags);
4acad72d
PE
746}
747
c71099ac
TG
748void ip6_route_input(struct sk_buff *skb)
749{
0660e03f 750 struct ipv6hdr *iph = ipv6_hdr(skb);
5578689a 751 struct net *net = skb->dev->nd_net;
adaa70bb 752 int flags = RT6_LOOKUP_F_HAS_SADDR;
c71099ac
TG
753 struct flowi fl = {
754 .iif = skb->dev->ifindex,
755 .nl_u = {
756 .ip6_u = {
757 .daddr = iph->daddr,
758 .saddr = iph->saddr,
90bcaf7b 759 .flowlabel = (* (__be32 *) iph)&IPV6_FLOWINFO_MASK,
c71099ac
TG
760 },
761 },
1ab1457c 762 .mark = skb->mark,
c71099ac
TG
763 .proto = iph->nexthdr,
764 };
adaa70bb
TG
765
766 if (rt6_need_strict(&iph->daddr))
767 flags |= RT6_LOOKUP_F_IFACE;
c71099ac 768
5578689a 769 skb->dst = fib6_rule_lookup(net, &fl, flags, ip6_pol_route_input);
c71099ac
TG
770}
771
8ed67789 772static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
c71099ac 773 struct flowi *fl, int flags)
1da177e4 774{
8ed67789 775 return ip6_pol_route(net, table, fl->oif, fl, flags);
c71099ac
TG
776}
777
778struct dst_entry * ip6_route_output(struct sock *sk, struct flowi *fl)
779{
780 int flags = 0;
781
782 if (rt6_need_strict(&fl->fl6_dst))
77d16f45 783 flags |= RT6_LOOKUP_F_IFACE;
c71099ac 784
adaa70bb
TG
785 if (!ipv6_addr_any(&fl->fl6_src))
786 flags |= RT6_LOOKUP_F_HAS_SADDR;
787
58f09b78 788 return fib6_rule_lookup(&init_net, fl, flags, ip6_pol_route_output);
1da177e4
LT
789}
790
7159039a 791EXPORT_SYMBOL(ip6_route_output);
1da177e4 792
14e50e57
DM
793int ip6_dst_blackhole(struct sock *sk, struct dst_entry **dstp, struct flowi *fl)
794{
795 struct rt6_info *ort = (struct rt6_info *) *dstp;
796 struct rt6_info *rt = (struct rt6_info *)
797 dst_alloc(&ip6_dst_blackhole_ops);
798 struct dst_entry *new = NULL;
799
800 if (rt) {
801 new = &rt->u.dst;
802
803 atomic_set(&new->__refcnt, 1);
804 new->__use = 1;
352e512c
HX
805 new->input = dst_discard;
806 new->output = dst_discard;
14e50e57
DM
807
808 memcpy(new->metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32));
809 new->dev = ort->u.dst.dev;
810 if (new->dev)
811 dev_hold(new->dev);
812 rt->rt6i_idev = ort->rt6i_idev;
813 if (rt->rt6i_idev)
814 in6_dev_hold(rt->rt6i_idev);
815 rt->rt6i_expires = 0;
816
817 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
818 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
819 rt->rt6i_metric = 0;
820
821 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
822#ifdef CONFIG_IPV6_SUBTREES
823 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
824#endif
825
826 dst_free(new);
827 }
828
829 dst_release(*dstp);
830 *dstp = new;
831 return (new ? 0 : -ENOMEM);
832}
833EXPORT_SYMBOL_GPL(ip6_dst_blackhole);
834
1da177e4
LT
835/*
836 * Destination cache support functions
837 */
838
839static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
840{
841 struct rt6_info *rt;
842
843 rt = (struct rt6_info *) dst;
844
845 if (rt && rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie))
846 return dst;
847
848 return NULL;
849}
850
851static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
852{
853 struct rt6_info *rt = (struct rt6_info *) dst;
854
855 if (rt) {
856 if (rt->rt6i_flags & RTF_CACHE)
e0a1ad73 857 ip6_del_rt(rt);
1da177e4
LT
858 else
859 dst_release(dst);
860 }
861 return NULL;
862}
863
864static void ip6_link_failure(struct sk_buff *skb)
865{
866 struct rt6_info *rt;
867
868 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0, skb->dev);
869
870 rt = (struct rt6_info *) skb->dst;
871 if (rt) {
872 if (rt->rt6i_flags&RTF_CACHE) {
873 dst_set_expires(&rt->u.dst, 0);
874 rt->rt6i_flags |= RTF_EXPIRES;
875 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
876 rt->rt6i_node->fn_sernum = -1;
877 }
878}
879
880static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
881{
882 struct rt6_info *rt6 = (struct rt6_info*)dst;
883
884 if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
885 rt6->rt6i_flags |= RTF_MODIFIED;
886 if (mtu < IPV6_MIN_MTU) {
887 mtu = IPV6_MIN_MTU;
888 dst->metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
889 }
890 dst->metrics[RTAX_MTU-1] = mtu;
8d71740c 891 call_netevent_notifiers(NETEVENT_PMTU_UPDATE, dst);
1da177e4
LT
892 }
893}
894
1da177e4
LT
895static int ipv6_get_mtu(struct net_device *dev);
896
5578689a 897static inline unsigned int ipv6_advmss(struct net *net, unsigned int mtu)
1da177e4
LT
898{
899 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
900
5578689a
DL
901 if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
902 mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
1da177e4
LT
903
904 /*
1ab1457c
YH
905 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
906 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
907 * IPV6_MAXPLEN is also valid and means: "any MSS,
1da177e4
LT
908 * rely only on pmtu discovery"
909 */
910 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
911 mtu = IPV6_MAXPLEN;
912 return mtu;
913}
914
3b00944c
YH
915static struct dst_entry *icmp6_dst_gc_list;
916static DEFINE_SPINLOCK(icmp6_dst_lock);
5d0bbeeb 917
3b00944c 918struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
1da177e4 919 struct neighbour *neigh,
3b00944c 920 struct in6_addr *addr)
1da177e4
LT
921{
922 struct rt6_info *rt;
923 struct inet6_dev *idev = in6_dev_get(dev);
5578689a 924 struct net *net = dev->nd_net;
1da177e4
LT
925
926 if (unlikely(idev == NULL))
927 return NULL;
928
929 rt = ip6_dst_alloc();
930 if (unlikely(rt == NULL)) {
931 in6_dev_put(idev);
932 goto out;
933 }
934
935 dev_hold(dev);
936 if (neigh)
937 neigh_hold(neigh);
938 else
939 neigh = ndisc_get_neigh(dev, addr);
940
941 rt->rt6i_dev = dev;
942 rt->rt6i_idev = idev;
943 rt->rt6i_nexthop = neigh;
944 atomic_set(&rt->u.dst.__refcnt, 1);
945 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = 255;
946 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
5578689a 947 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, dst_mtu(&rt->u.dst));
3b00944c 948 rt->u.dst.output = ip6_output;
1da177e4
LT
949
950#if 0 /* there's no chance to use these for ndisc */
1ab1457c
YH
951 rt->u.dst.flags = ipv6_addr_type(addr) & IPV6_ADDR_UNICAST
952 ? DST_HOST
1da177e4
LT
953 : 0;
954 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
955 rt->rt6i_dst.plen = 128;
956#endif
957
3b00944c
YH
958 spin_lock_bh(&icmp6_dst_lock);
959 rt->u.dst.next = icmp6_dst_gc_list;
960 icmp6_dst_gc_list = &rt->u.dst;
961 spin_unlock_bh(&icmp6_dst_lock);
1da177e4 962
5578689a 963 fib6_force_start_gc(net);
1da177e4
LT
964
965out:
40aa7b90 966 return &rt->u.dst;
1da177e4
LT
967}
968
3b00944c 969int icmp6_dst_gc(int *more)
1da177e4
LT
970{
971 struct dst_entry *dst, *next, **pprev;
972 int freed;
973
974 next = NULL;
1ab1457c 975 freed = 0;
5d0bbeeb 976
3b00944c
YH
977 spin_lock_bh(&icmp6_dst_lock);
978 pprev = &icmp6_dst_gc_list;
5d0bbeeb 979
1da177e4
LT
980 while ((dst = *pprev) != NULL) {
981 if (!atomic_read(&dst->__refcnt)) {
982 *pprev = dst->next;
983 dst_free(dst);
984 freed++;
985 } else {
986 pprev = &dst->next;
987 (*more)++;
988 }
989 }
990
3b00944c 991 spin_unlock_bh(&icmp6_dst_lock);
5d0bbeeb 992
1da177e4
LT
993 return freed;
994}
995
569d3645 996static int ip6_dst_gc(struct dst_ops *ops)
1da177e4
LT
997{
998 static unsigned expire = 30*HZ;
999 static unsigned long last_gc;
1000 unsigned long now = jiffies;
1001
4990509f
DL
1002 if (time_after(last_gc + init_net.ipv6.sysctl.ip6_rt_gc_min_interval, now) &&
1003 atomic_read(&ip6_dst_ops.entries) <= init_net.ipv6.sysctl.ip6_rt_max_size)
1da177e4
LT
1004 goto out;
1005
1006 expire++;
5b7c931d 1007 fib6_run_gc(expire, &init_net);
1da177e4
LT
1008 last_gc = now;
1009 if (atomic_read(&ip6_dst_ops.entries) < ip6_dst_ops.gc_thresh)
4990509f 1010 expire = init_net.ipv6.sysctl.ip6_rt_gc_timeout>>1;
1da177e4
LT
1011
1012out:
4990509f
DL
1013 expire -= expire>>init_net.ipv6.sysctl.ip6_rt_gc_elasticity;
1014 return (atomic_read(&ip6_dst_ops.entries) > init_net.ipv6.sysctl.ip6_rt_max_size);
1da177e4
LT
1015}
1016
1017/* Clean host part of a prefix. Not necessary in radix tree,
1018 but results in cleaner routing tables.
1019
1020 Remove it only when all the things will work!
1021 */
1022
1023static int ipv6_get_mtu(struct net_device *dev)
1024{
1025 int mtu = IPV6_MIN_MTU;
1026 struct inet6_dev *idev;
1027
1028 idev = in6_dev_get(dev);
1029 if (idev) {
1030 mtu = idev->cnf.mtu6;
1031 in6_dev_put(idev);
1032 }
1033 return mtu;
1034}
1035
1036int ipv6_get_hoplimit(struct net_device *dev)
1037{
1038 int hoplimit = ipv6_devconf.hop_limit;
1039 struct inet6_dev *idev;
1040
1041 idev = in6_dev_get(dev);
1042 if (idev) {
1043 hoplimit = idev->cnf.hop_limit;
1044 in6_dev_put(idev);
1045 }
1046 return hoplimit;
1047}
1048
1049/*
1050 *
1051 */
1052
86872cb5 1053int ip6_route_add(struct fib6_config *cfg)
1da177e4
LT
1054{
1055 int err;
5578689a 1056 struct net *net = cfg->fc_nlinfo.nl_net;
1da177e4
LT
1057 struct rt6_info *rt = NULL;
1058 struct net_device *dev = NULL;
1059 struct inet6_dev *idev = NULL;
c71099ac 1060 struct fib6_table *table;
1da177e4
LT
1061 int addr_type;
1062
86872cb5 1063 if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1da177e4
LT
1064 return -EINVAL;
1065#ifndef CONFIG_IPV6_SUBTREES
86872cb5 1066 if (cfg->fc_src_len)
1da177e4
LT
1067 return -EINVAL;
1068#endif
86872cb5 1069 if (cfg->fc_ifindex) {
1da177e4 1070 err = -ENODEV;
5578689a 1071 dev = dev_get_by_index(net, cfg->fc_ifindex);
1da177e4
LT
1072 if (!dev)
1073 goto out;
1074 idev = in6_dev_get(dev);
1075 if (!idev)
1076 goto out;
1077 }
1078
86872cb5
TG
1079 if (cfg->fc_metric == 0)
1080 cfg->fc_metric = IP6_RT_PRIO_USER;
1da177e4 1081
5578689a 1082 table = fib6_new_table(net, cfg->fc_table);
c71099ac
TG
1083 if (table == NULL) {
1084 err = -ENOBUFS;
1085 goto out;
1086 }
1087
1da177e4
LT
1088 rt = ip6_dst_alloc();
1089
1090 if (rt == NULL) {
1091 err = -ENOMEM;
1092 goto out;
1093 }
1094
1095 rt->u.dst.obsolete = -1;
86872cb5 1096 rt->rt6i_expires = jiffies + clock_t_to_jiffies(cfg->fc_expires);
1da177e4 1097
86872cb5
TG
1098 if (cfg->fc_protocol == RTPROT_UNSPEC)
1099 cfg->fc_protocol = RTPROT_BOOT;
1100 rt->rt6i_protocol = cfg->fc_protocol;
1101
1102 addr_type = ipv6_addr_type(&cfg->fc_dst);
1da177e4
LT
1103
1104 if (addr_type & IPV6_ADDR_MULTICAST)
1105 rt->u.dst.input = ip6_mc_input;
1106 else
1107 rt->u.dst.input = ip6_forward;
1108
1109 rt->u.dst.output = ip6_output;
1110
86872cb5
TG
1111 ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1112 rt->rt6i_dst.plen = cfg->fc_dst_len;
1da177e4
LT
1113 if (rt->rt6i_dst.plen == 128)
1114 rt->u.dst.flags = DST_HOST;
1115
1116#ifdef CONFIG_IPV6_SUBTREES
86872cb5
TG
1117 ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1118 rt->rt6i_src.plen = cfg->fc_src_len;
1da177e4
LT
1119#endif
1120
86872cb5 1121 rt->rt6i_metric = cfg->fc_metric;
1da177e4
LT
1122
1123 /* We cannot add true routes via loopback here,
1124 they would result in kernel looping; promote them to reject routes
1125 */
86872cb5 1126 if ((cfg->fc_flags & RTF_REJECT) ||
1da177e4
LT
1127 (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK))) {
1128 /* hold loopback dev/idev if we haven't done so. */
5578689a 1129 if (dev != net->loopback_dev) {
1da177e4
LT
1130 if (dev) {
1131 dev_put(dev);
1132 in6_dev_put(idev);
1133 }
5578689a 1134 dev = net->loopback_dev;
1da177e4
LT
1135 dev_hold(dev);
1136 idev = in6_dev_get(dev);
1137 if (!idev) {
1138 err = -ENODEV;
1139 goto out;
1140 }
1141 }
1142 rt->u.dst.output = ip6_pkt_discard_out;
1143 rt->u.dst.input = ip6_pkt_discard;
1144 rt->u.dst.error = -ENETUNREACH;
1145 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1146 goto install_route;
1147 }
1148
86872cb5 1149 if (cfg->fc_flags & RTF_GATEWAY) {
1da177e4
LT
1150 struct in6_addr *gw_addr;
1151 int gwa_type;
1152
86872cb5
TG
1153 gw_addr = &cfg->fc_gateway;
1154 ipv6_addr_copy(&rt->rt6i_gateway, gw_addr);
1da177e4
LT
1155 gwa_type = ipv6_addr_type(gw_addr);
1156
1157 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1158 struct rt6_info *grt;
1159
1160 /* IPv6 strictly inhibits using not link-local
1161 addresses as nexthop address.
1162 Otherwise, router will not able to send redirects.
1163 It is very good, but in some (rare!) circumstances
1164 (SIT, PtP, NBMA NOARP links) it is handy to allow
1165 some exceptions. --ANK
1166 */
1167 err = -EINVAL;
1168 if (!(gwa_type&IPV6_ADDR_UNICAST))
1169 goto out;
1170
5578689a 1171 grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
1da177e4
LT
1172
1173 err = -EHOSTUNREACH;
1174 if (grt == NULL)
1175 goto out;
1176 if (dev) {
1177 if (dev != grt->rt6i_dev) {
1178 dst_release(&grt->u.dst);
1179 goto out;
1180 }
1181 } else {
1182 dev = grt->rt6i_dev;
1183 idev = grt->rt6i_idev;
1184 dev_hold(dev);
1185 in6_dev_hold(grt->rt6i_idev);
1186 }
1187 if (!(grt->rt6i_flags&RTF_GATEWAY))
1188 err = 0;
1189 dst_release(&grt->u.dst);
1190
1191 if (err)
1192 goto out;
1193 }
1194 err = -EINVAL;
1195 if (dev == NULL || (dev->flags&IFF_LOOPBACK))
1196 goto out;
1197 }
1198
1199 err = -ENODEV;
1200 if (dev == NULL)
1201 goto out;
1202
86872cb5 1203 if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) {
1da177e4
LT
1204 rt->rt6i_nexthop = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev);
1205 if (IS_ERR(rt->rt6i_nexthop)) {
1206 err = PTR_ERR(rt->rt6i_nexthop);
1207 rt->rt6i_nexthop = NULL;
1208 goto out;
1209 }
1210 }
1211
86872cb5 1212 rt->rt6i_flags = cfg->fc_flags;
1da177e4
LT
1213
1214install_route:
86872cb5
TG
1215 if (cfg->fc_mx) {
1216 struct nlattr *nla;
1217 int remaining;
1218
1219 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
8f4c1f9b 1220 int type = nla_type(nla);
86872cb5
TG
1221
1222 if (type) {
1223 if (type > RTAX_MAX) {
1da177e4
LT
1224 err = -EINVAL;
1225 goto out;
1226 }
86872cb5
TG
1227
1228 rt->u.dst.metrics[type - 1] = nla_get_u32(nla);
1da177e4 1229 }
1da177e4
LT
1230 }
1231 }
1232
1233 if (rt->u.dst.metrics[RTAX_HOPLIMIT-1] == 0)
1234 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
1235 if (!rt->u.dst.metrics[RTAX_MTU-1])
1236 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(dev);
1237 if (!rt->u.dst.metrics[RTAX_ADVMSS-1])
5578689a 1238 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, dst_mtu(&rt->u.dst));
1da177e4
LT
1239 rt->u.dst.dev = dev;
1240 rt->rt6i_idev = idev;
c71099ac 1241 rt->rt6i_table = table;
63152fc0
DL
1242
1243 cfg->fc_nlinfo.nl_net = dev->nd_net;
1244
86872cb5 1245 return __ip6_ins_rt(rt, &cfg->fc_nlinfo);
1da177e4
LT
1246
1247out:
1248 if (dev)
1249 dev_put(dev);
1250 if (idev)
1251 in6_dev_put(idev);
1252 if (rt)
40aa7b90 1253 dst_free(&rt->u.dst);
1da177e4
LT
1254 return err;
1255}
1256
86872cb5 1257static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
1da177e4
LT
1258{
1259 int err;
c71099ac 1260 struct fib6_table *table;
8ed67789 1261 struct net *net = rt->rt6i_dev->nd_net;
1da177e4 1262
8ed67789 1263 if (rt == net->ipv6.ip6_null_entry)
6c813a72
PM
1264 return -ENOENT;
1265
c71099ac
TG
1266 table = rt->rt6i_table;
1267 write_lock_bh(&table->tb6_lock);
1da177e4 1268
86872cb5 1269 err = fib6_del(rt, info);
1da177e4
LT
1270 dst_release(&rt->u.dst);
1271
c71099ac 1272 write_unlock_bh(&table->tb6_lock);
1da177e4
LT
1273
1274 return err;
1275}
1276
e0a1ad73
TG
1277int ip6_del_rt(struct rt6_info *rt)
1278{
4d1169c1 1279 struct nl_info info = {
5578689a 1280 .nl_net = rt->rt6i_dev->nd_net,
4d1169c1 1281 };
528c4ceb 1282 return __ip6_del_rt(rt, &info);
e0a1ad73
TG
1283}
1284
86872cb5 1285static int ip6_route_del(struct fib6_config *cfg)
1da177e4 1286{
c71099ac 1287 struct fib6_table *table;
1da177e4
LT
1288 struct fib6_node *fn;
1289 struct rt6_info *rt;
1290 int err = -ESRCH;
1291
5578689a 1292 table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
c71099ac
TG
1293 if (table == NULL)
1294 return err;
1295
1296 read_lock_bh(&table->tb6_lock);
1da177e4 1297
c71099ac 1298 fn = fib6_locate(&table->tb6_root,
86872cb5
TG
1299 &cfg->fc_dst, cfg->fc_dst_len,
1300 &cfg->fc_src, cfg->fc_src_len);
1ab1457c 1301
1da177e4 1302 if (fn) {
7cc48263 1303 for (rt = fn->leaf; rt; rt = rt->u.dst.rt6_next) {
86872cb5 1304 if (cfg->fc_ifindex &&
1da177e4 1305 (rt->rt6i_dev == NULL ||
86872cb5 1306 rt->rt6i_dev->ifindex != cfg->fc_ifindex))
1da177e4 1307 continue;
86872cb5
TG
1308 if (cfg->fc_flags & RTF_GATEWAY &&
1309 !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
1da177e4 1310 continue;
86872cb5 1311 if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
1da177e4
LT
1312 continue;
1313 dst_hold(&rt->u.dst);
c71099ac 1314 read_unlock_bh(&table->tb6_lock);
1da177e4 1315
86872cb5 1316 return __ip6_del_rt(rt, &cfg->fc_nlinfo);
1da177e4
LT
1317 }
1318 }
c71099ac 1319 read_unlock_bh(&table->tb6_lock);
1da177e4
LT
1320
1321 return err;
1322}
1323
1324/*
1325 * Handle redirects
1326 */
a6279458
YH
1327struct ip6rd_flowi {
1328 struct flowi fl;
1329 struct in6_addr gateway;
1330};
1331
8ed67789
DL
1332static struct rt6_info *__ip6_route_redirect(struct net *net,
1333 struct fib6_table *table,
a6279458
YH
1334 struct flowi *fl,
1335 int flags)
1da177e4 1336{
a6279458
YH
1337 struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl;
1338 struct rt6_info *rt;
e843b9e1 1339 struct fib6_node *fn;
c71099ac 1340
1da177e4 1341 /*
e843b9e1
YH
1342 * Get the "current" route for this destination and
1343 * check if the redirect has come from approriate router.
1344 *
1345 * RFC 2461 specifies that redirects should only be
1346 * accepted if they come from the nexthop to the target.
1347 * Due to the way the routes are chosen, this notion
1348 * is a bit fuzzy and one might need to check all possible
1349 * routes.
1da177e4 1350 */
1da177e4 1351
c71099ac 1352 read_lock_bh(&table->tb6_lock);
a6279458 1353 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
e843b9e1 1354restart:
7cc48263 1355 for (rt = fn->leaf; rt; rt = rt->u.dst.rt6_next) {
e843b9e1
YH
1356 /*
1357 * Current route is on-link; redirect is always invalid.
1358 *
1359 * Seems, previous statement is not true. It could
1360 * be node, which looks for us as on-link (f.e. proxy ndisc)
1361 * But then router serving it might decide, that we should
1362 * know truth 8)8) --ANK (980726).
1363 */
1364 if (rt6_check_expired(rt))
1365 continue;
1366 if (!(rt->rt6i_flags & RTF_GATEWAY))
1367 continue;
a6279458 1368 if (fl->oif != rt->rt6i_dev->ifindex)
e843b9e1 1369 continue;
a6279458 1370 if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
e843b9e1
YH
1371 continue;
1372 break;
1373 }
a6279458 1374
cb15d9c2 1375 if (!rt)
8ed67789
DL
1376 rt = net->ipv6.ip6_null_entry;
1377 BACKTRACK(net, &fl->fl6_src);
cb15d9c2 1378out:
a6279458
YH
1379 dst_hold(&rt->u.dst);
1380
c71099ac 1381 read_unlock_bh(&table->tb6_lock);
e843b9e1 1382
a6279458
YH
1383 return rt;
1384};
1385
1386static struct rt6_info *ip6_route_redirect(struct in6_addr *dest,
1387 struct in6_addr *src,
1388 struct in6_addr *gateway,
1389 struct net_device *dev)
1390{
adaa70bb 1391 int flags = RT6_LOOKUP_F_HAS_SADDR;
5578689a 1392 struct net *net = dev->nd_net;
a6279458
YH
1393 struct ip6rd_flowi rdfl = {
1394 .fl = {
1395 .oif = dev->ifindex,
1396 .nl_u = {
1397 .ip6_u = {
1398 .daddr = *dest,
1399 .saddr = *src,
1400 },
1401 },
1402 },
1403 .gateway = *gateway,
1404 };
adaa70bb
TG
1405
1406 if (rt6_need_strict(dest))
1407 flags |= RT6_LOOKUP_F_IFACE;
a6279458 1408
5578689a 1409 return (struct rt6_info *)fib6_rule_lookup(net, (struct flowi *)&rdfl,
58f09b78 1410 flags, __ip6_route_redirect);
a6279458
YH
1411}
1412
1413void rt6_redirect(struct in6_addr *dest, struct in6_addr *src,
1414 struct in6_addr *saddr,
1415 struct neighbour *neigh, u8 *lladdr, int on_link)
1416{
1417 struct rt6_info *rt, *nrt = NULL;
1418 struct netevent_redirect netevent;
8ed67789 1419 struct net *net = neigh->dev->nd_net;
a6279458
YH
1420
1421 rt = ip6_route_redirect(dest, src, saddr, neigh->dev);
1422
8ed67789 1423 if (rt == net->ipv6.ip6_null_entry) {
1da177e4
LT
1424 if (net_ratelimit())
1425 printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
1426 "for redirect target\n");
a6279458 1427 goto out;
1da177e4
LT
1428 }
1429
1da177e4
LT
1430 /*
1431 * We have finally decided to accept it.
1432 */
1433
1ab1457c 1434 neigh_update(neigh, lladdr, NUD_STALE,
1da177e4
LT
1435 NEIGH_UPDATE_F_WEAK_OVERRIDE|
1436 NEIGH_UPDATE_F_OVERRIDE|
1437 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1438 NEIGH_UPDATE_F_ISROUTER))
1439 );
1440
1441 /*
1442 * Redirect received -> path was valid.
1443 * Look, redirects are sent only in response to data packets,
1444 * so that this nexthop apparently is reachable. --ANK
1445 */
1446 dst_confirm(&rt->u.dst);
1447
1448 /* Duplicate redirect: silently ignore. */
1449 if (neigh == rt->u.dst.neighbour)
1450 goto out;
1451
1452 nrt = ip6_rt_copy(rt);
1453 if (nrt == NULL)
1454 goto out;
1455
1456 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1457 if (on_link)
1458 nrt->rt6i_flags &= ~RTF_GATEWAY;
1459
1460 ipv6_addr_copy(&nrt->rt6i_dst.addr, dest);
1461 nrt->rt6i_dst.plen = 128;
1462 nrt->u.dst.flags |= DST_HOST;
1463
1464 ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key);
1465 nrt->rt6i_nexthop = neigh_clone(neigh);
1466 /* Reset pmtu, it may be better */
1467 nrt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(neigh->dev);
5578689a
DL
1468 nrt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(neigh->dev->nd_net,
1469 dst_mtu(&nrt->u.dst));
1da177e4 1470
40e22e8f 1471 if (ip6_ins_rt(nrt))
1da177e4
LT
1472 goto out;
1473
8d71740c
TT
1474 netevent.old = &rt->u.dst;
1475 netevent.new = &nrt->u.dst;
1476 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1477
1da177e4 1478 if (rt->rt6i_flags&RTF_CACHE) {
e0a1ad73 1479 ip6_del_rt(rt);
1da177e4
LT
1480 return;
1481 }
1482
1483out:
1ab1457c 1484 dst_release(&rt->u.dst);
1da177e4
LT
1485 return;
1486}
1487
1488/*
1489 * Handle ICMP "packet too big" messages
1490 * i.e. Path MTU discovery
1491 */
1492
1493void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr,
1494 struct net_device *dev, u32 pmtu)
1495{
1496 struct rt6_info *rt, *nrt;
5578689a 1497 struct net *net = dev->nd_net;
1da177e4
LT
1498 int allfrag = 0;
1499
5578689a 1500 rt = rt6_lookup(net, daddr, saddr, dev->ifindex, 0);
1da177e4
LT
1501 if (rt == NULL)
1502 return;
1503
1504 if (pmtu >= dst_mtu(&rt->u.dst))
1505 goto out;
1506
1507 if (pmtu < IPV6_MIN_MTU) {
1508 /*
1ab1457c 1509 * According to RFC2460, PMTU is set to the IPv6 Minimum Link
1da177e4
LT
1510 * MTU (1280) and a fragment header should always be included
1511 * after a node receiving Too Big message reporting PMTU is
1512 * less than the IPv6 Minimum Link MTU.
1513 */
1514 pmtu = IPV6_MIN_MTU;
1515 allfrag = 1;
1516 }
1517
1518 /* New mtu received -> path was valid.
1519 They are sent only in response to data packets,
1520 so that this nexthop apparently is reachable. --ANK
1521 */
1522 dst_confirm(&rt->u.dst);
1523
1524 /* Host route. If it is static, it would be better
1525 not to override it, but add new one, so that
1526 when cache entry will expire old pmtu
1527 would return automatically.
1528 */
1529 if (rt->rt6i_flags & RTF_CACHE) {
1530 rt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1531 if (allfrag)
1532 rt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
5578689a 1533 dst_set_expires(&rt->u.dst, net->ipv6.sysctl.ip6_rt_mtu_expires);
1da177e4
LT
1534 rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
1535 goto out;
1536 }
1537
1538 /* Network route.
1539 Two cases are possible:
1540 1. It is connected route. Action: COW
1541 2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1542 */
d5315b50 1543 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
a1e78363 1544 nrt = rt6_alloc_cow(rt, daddr, saddr);
d5315b50
YH
1545 else
1546 nrt = rt6_alloc_clone(rt, daddr);
a1e78363 1547
d5315b50 1548 if (nrt) {
a1e78363
YH
1549 nrt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1550 if (allfrag)
1551 nrt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1552
1553 /* According to RFC 1981, detecting PMTU increase shouldn't be
1554 * happened within 5 mins, the recommended timer is 10 mins.
1555 * Here this route expiration time is set to ip6_rt_mtu_expires
1556 * which is 10 mins. After 10 mins the decreased pmtu is expired
1557 * and detecting PMTU increase will be automatically happened.
1558 */
5578689a 1559 dst_set_expires(&nrt->u.dst, net->ipv6.sysctl.ip6_rt_mtu_expires);
a1e78363
YH
1560 nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
1561
40e22e8f 1562 ip6_ins_rt(nrt);
1da177e4 1563 }
1da177e4
LT
1564out:
1565 dst_release(&rt->u.dst);
1566}
1567
1568/*
1569 * Misc support functions
1570 */
1571
1572static struct rt6_info * ip6_rt_copy(struct rt6_info *ort)
1573{
1574 struct rt6_info *rt = ip6_dst_alloc();
1575
1576 if (rt) {
1577 rt->u.dst.input = ort->u.dst.input;
1578 rt->u.dst.output = ort->u.dst.output;
1579
1580 memcpy(rt->u.dst.metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32));
22e1e4d8 1581 rt->u.dst.error = ort->u.dst.error;
1da177e4
LT
1582 rt->u.dst.dev = ort->u.dst.dev;
1583 if (rt->u.dst.dev)
1584 dev_hold(rt->u.dst.dev);
1585 rt->rt6i_idev = ort->rt6i_idev;
1586 if (rt->rt6i_idev)
1587 in6_dev_hold(rt->rt6i_idev);
1588 rt->u.dst.lastuse = jiffies;
1589 rt->rt6i_expires = 0;
1590
1591 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
1592 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
1593 rt->rt6i_metric = 0;
1594
1595 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1596#ifdef CONFIG_IPV6_SUBTREES
1597 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1598#endif
c71099ac 1599 rt->rt6i_table = ort->rt6i_table;
1da177e4
LT
1600 }
1601 return rt;
1602}
1603
70ceb4f5 1604#ifdef CONFIG_IPV6_ROUTE_INFO
efa2cea0
DL
1605static struct rt6_info *rt6_get_route_info(struct net *net,
1606 struct in6_addr *prefix, int prefixlen,
70ceb4f5
YH
1607 struct in6_addr *gwaddr, int ifindex)
1608{
1609 struct fib6_node *fn;
1610 struct rt6_info *rt = NULL;
c71099ac
TG
1611 struct fib6_table *table;
1612
efa2cea0 1613 table = fib6_get_table(net, RT6_TABLE_INFO);
c71099ac
TG
1614 if (table == NULL)
1615 return NULL;
70ceb4f5 1616
c71099ac
TG
1617 write_lock_bh(&table->tb6_lock);
1618 fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
70ceb4f5
YH
1619 if (!fn)
1620 goto out;
1621
7cc48263 1622 for (rt = fn->leaf; rt; rt = rt->u.dst.rt6_next) {
70ceb4f5
YH
1623 if (rt->rt6i_dev->ifindex != ifindex)
1624 continue;
1625 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1626 continue;
1627 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1628 continue;
1629 dst_hold(&rt->u.dst);
1630 break;
1631 }
1632out:
c71099ac 1633 write_unlock_bh(&table->tb6_lock);
70ceb4f5
YH
1634 return rt;
1635}
1636
efa2cea0
DL
1637static struct rt6_info *rt6_add_route_info(struct net *net,
1638 struct in6_addr *prefix, int prefixlen,
70ceb4f5
YH
1639 struct in6_addr *gwaddr, int ifindex,
1640 unsigned pref)
1641{
86872cb5
TG
1642 struct fib6_config cfg = {
1643 .fc_table = RT6_TABLE_INFO,
238fc7ea 1644 .fc_metric = IP6_RT_PRIO_USER,
86872cb5
TG
1645 .fc_ifindex = ifindex,
1646 .fc_dst_len = prefixlen,
1647 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
1648 RTF_UP | RTF_PREF(pref),
efa2cea0
DL
1649 .fc_nlinfo.pid = 0,
1650 .fc_nlinfo.nlh = NULL,
1651 .fc_nlinfo.nl_net = net,
86872cb5
TG
1652 };
1653
1654 ipv6_addr_copy(&cfg.fc_dst, prefix);
1655 ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
70ceb4f5 1656
e317da96
YH
1657 /* We should treat it as a default route if prefix length is 0. */
1658 if (!prefixlen)
86872cb5 1659 cfg.fc_flags |= RTF_DEFAULT;
70ceb4f5 1660
86872cb5 1661 ip6_route_add(&cfg);
70ceb4f5 1662
efa2cea0 1663 return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex);
70ceb4f5
YH
1664}
1665#endif
1666
1da177e4 1667struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *dev)
1ab1457c 1668{
1da177e4 1669 struct rt6_info *rt;
c71099ac 1670 struct fib6_table *table;
1da177e4 1671
5578689a 1672 table = fib6_get_table(dev->nd_net, RT6_TABLE_DFLT);
c71099ac
TG
1673 if (table == NULL)
1674 return NULL;
1da177e4 1675
c71099ac 1676 write_lock_bh(&table->tb6_lock);
7cc48263 1677 for (rt = table->tb6_root.leaf; rt; rt=rt->u.dst.rt6_next) {
1da177e4 1678 if (dev == rt->rt6i_dev &&
045927ff 1679 ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
1da177e4
LT
1680 ipv6_addr_equal(&rt->rt6i_gateway, addr))
1681 break;
1682 }
1683 if (rt)
1684 dst_hold(&rt->u.dst);
c71099ac 1685 write_unlock_bh(&table->tb6_lock);
1da177e4
LT
1686 return rt;
1687}
1688
c7dc89c0
FT
1689EXPORT_SYMBOL(rt6_get_dflt_router);
1690
1da177e4 1691struct rt6_info *rt6_add_dflt_router(struct in6_addr *gwaddr,
ebacaaa0
YH
1692 struct net_device *dev,
1693 unsigned int pref)
1da177e4 1694{
86872cb5
TG
1695 struct fib6_config cfg = {
1696 .fc_table = RT6_TABLE_DFLT,
238fc7ea 1697 .fc_metric = IP6_RT_PRIO_USER,
86872cb5
TG
1698 .fc_ifindex = dev->ifindex,
1699 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
1700 RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
5578689a
DL
1701 .fc_nlinfo.pid = 0,
1702 .fc_nlinfo.nlh = NULL,
1703 .fc_nlinfo.nl_net = dev->nd_net,
86872cb5 1704 };
1da177e4 1705
86872cb5 1706 ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
1da177e4 1707
86872cb5 1708 ip6_route_add(&cfg);
1da177e4 1709
1da177e4
LT
1710 return rt6_get_dflt_router(gwaddr, dev);
1711}
1712
7b4da532 1713void rt6_purge_dflt_routers(struct net *net)
1da177e4
LT
1714{
1715 struct rt6_info *rt;
c71099ac
TG
1716 struct fib6_table *table;
1717
1718 /* NOTE: Keep consistent with rt6_get_dflt_router */
7b4da532 1719 table = fib6_get_table(net, RT6_TABLE_DFLT);
c71099ac
TG
1720 if (table == NULL)
1721 return;
1da177e4
LT
1722
1723restart:
c71099ac 1724 read_lock_bh(&table->tb6_lock);
7cc48263 1725 for (rt = table->tb6_root.leaf; rt; rt = rt->u.dst.rt6_next) {
1da177e4
LT
1726 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
1727 dst_hold(&rt->u.dst);
c71099ac 1728 read_unlock_bh(&table->tb6_lock);
e0a1ad73 1729 ip6_del_rt(rt);
1da177e4
LT
1730 goto restart;
1731 }
1732 }
c71099ac 1733 read_unlock_bh(&table->tb6_lock);
1da177e4
LT
1734}
1735
5578689a
DL
1736static void rtmsg_to_fib6_config(struct net *net,
1737 struct in6_rtmsg *rtmsg,
86872cb5
TG
1738 struct fib6_config *cfg)
1739{
1740 memset(cfg, 0, sizeof(*cfg));
1741
1742 cfg->fc_table = RT6_TABLE_MAIN;
1743 cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
1744 cfg->fc_metric = rtmsg->rtmsg_metric;
1745 cfg->fc_expires = rtmsg->rtmsg_info;
1746 cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
1747 cfg->fc_src_len = rtmsg->rtmsg_src_len;
1748 cfg->fc_flags = rtmsg->rtmsg_flags;
1749
5578689a 1750 cfg->fc_nlinfo.nl_net = net;
f1243c2d 1751
86872cb5
TG
1752 ipv6_addr_copy(&cfg->fc_dst, &rtmsg->rtmsg_dst);
1753 ipv6_addr_copy(&cfg->fc_src, &rtmsg->rtmsg_src);
1754 ipv6_addr_copy(&cfg->fc_gateway, &rtmsg->rtmsg_gateway);
1755}
1756
5578689a 1757int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
1da177e4 1758{
86872cb5 1759 struct fib6_config cfg;
1da177e4
LT
1760 struct in6_rtmsg rtmsg;
1761 int err;
1762
1763 switch(cmd) {
1764 case SIOCADDRT: /* Add a route */
1765 case SIOCDELRT: /* Delete a route */
1766 if (!capable(CAP_NET_ADMIN))
1767 return -EPERM;
1768 err = copy_from_user(&rtmsg, arg,
1769 sizeof(struct in6_rtmsg));
1770 if (err)
1771 return -EFAULT;
86872cb5 1772
5578689a 1773 rtmsg_to_fib6_config(net, &rtmsg, &cfg);
86872cb5 1774
1da177e4
LT
1775 rtnl_lock();
1776 switch (cmd) {
1777 case SIOCADDRT:
86872cb5 1778 err = ip6_route_add(&cfg);
1da177e4
LT
1779 break;
1780 case SIOCDELRT:
86872cb5 1781 err = ip6_route_del(&cfg);
1da177e4
LT
1782 break;
1783 default:
1784 err = -EINVAL;
1785 }
1786 rtnl_unlock();
1787
1788 return err;
3ff50b79 1789 }
1da177e4
LT
1790
1791 return -EINVAL;
1792}
1793
1794/*
1795 * Drop the packet on the floor
1796 */
1797
50eb431d 1798static int ip6_pkt_drop(struct sk_buff *skb, int code, int ipstats_mib_noroutes)
1da177e4 1799{
612f09e8
YH
1800 int type;
1801 switch (ipstats_mib_noroutes) {
1802 case IPSTATS_MIB_INNOROUTES:
0660e03f 1803 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
612f09e8
YH
1804 if (type == IPV6_ADDR_ANY || type == IPV6_ADDR_RESERVED) {
1805 IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_INADDRERRORS);
1806 break;
1807 }
1808 /* FALLTHROUGH */
1809 case IPSTATS_MIB_OUTNOROUTES:
1810 IP6_INC_STATS(ip6_dst_idev(skb->dst), ipstats_mib_noroutes);
1811 break;
1812 }
9ce8ade0 1813 icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0, skb->dev);
1da177e4
LT
1814 kfree_skb(skb);
1815 return 0;
1816}
1817
9ce8ade0
TG
1818static int ip6_pkt_discard(struct sk_buff *skb)
1819{
612f09e8 1820 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
9ce8ade0
TG
1821}
1822
20380731 1823static int ip6_pkt_discard_out(struct sk_buff *skb)
1da177e4
LT
1824{
1825 skb->dev = skb->dst->dev;
612f09e8 1826 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
1da177e4
LT
1827}
1828
6723ab54
DM
1829#ifdef CONFIG_IPV6_MULTIPLE_TABLES
1830
9ce8ade0
TG
1831static int ip6_pkt_prohibit(struct sk_buff *skb)
1832{
612f09e8 1833 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
9ce8ade0
TG
1834}
1835
1836static int ip6_pkt_prohibit_out(struct sk_buff *skb)
1837{
1838 skb->dev = skb->dst->dev;
612f09e8 1839 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
9ce8ade0
TG
1840}
1841
6723ab54
DM
1842#endif
1843
1da177e4
LT
1844/*
1845 * Allocate a dst for local (unicast / anycast) address.
1846 */
1847
1848struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
1849 const struct in6_addr *addr,
1850 int anycast)
1851{
5578689a 1852 struct net *net = idev->dev->nd_net;
1da177e4
LT
1853 struct rt6_info *rt = ip6_dst_alloc();
1854
1855 if (rt == NULL)
1856 return ERR_PTR(-ENOMEM);
1857
5578689a 1858 dev_hold(net->loopback_dev);
1da177e4
LT
1859 in6_dev_hold(idev);
1860
1861 rt->u.dst.flags = DST_HOST;
1862 rt->u.dst.input = ip6_input;
1863 rt->u.dst.output = ip6_output;
5578689a 1864 rt->rt6i_dev = net->loopback_dev;
1da177e4
LT
1865 rt->rt6i_idev = idev;
1866 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
5578689a 1867 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, dst_mtu(&rt->u.dst));
1da177e4
LT
1868 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
1869 rt->u.dst.obsolete = -1;
1870
1871 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
58c4fb86
YH
1872 if (anycast)
1873 rt->rt6i_flags |= RTF_ANYCAST;
1874 else
1da177e4
LT
1875 rt->rt6i_flags |= RTF_LOCAL;
1876 rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
1877 if (rt->rt6i_nexthop == NULL) {
40aa7b90 1878 dst_free(&rt->u.dst);
1da177e4
LT
1879 return ERR_PTR(-ENOMEM);
1880 }
1881
1882 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
1883 rt->rt6i_dst.plen = 128;
5578689a 1884 rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL);
1da177e4
LT
1885
1886 atomic_set(&rt->u.dst.__refcnt, 1);
1887
1888 return rt;
1889}
1890
8ed67789
DL
1891struct arg_dev_net {
1892 struct net_device *dev;
1893 struct net *net;
1894};
1895
1da177e4
LT
1896static int fib6_ifdown(struct rt6_info *rt, void *arg)
1897{
8ed67789
DL
1898 struct net_device *dev = ((struct arg_dev_net *)arg)->dev;
1899 struct net *net = ((struct arg_dev_net *)arg)->net;
1900
1901 if (((void *)rt->rt6i_dev == dev || dev == NULL) &&
1902 rt != net->ipv6.ip6_null_entry) {
1da177e4
LT
1903 RT6_TRACE("deleted by ifdown %p\n", rt);
1904 return -1;
1905 }
1906 return 0;
1907}
1908
f3db4851 1909void rt6_ifdown(struct net *net, struct net_device *dev)
1da177e4 1910{
8ed67789
DL
1911 struct arg_dev_net adn = {
1912 .dev = dev,
1913 .net = net,
1914 };
1915
1916 fib6_clean_all(net, fib6_ifdown, 0, &adn);
1da177e4
LT
1917}
1918
1919struct rt6_mtu_change_arg
1920{
1921 struct net_device *dev;
1922 unsigned mtu;
1923};
1924
1925static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
1926{
1927 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
1928 struct inet6_dev *idev;
5578689a 1929 struct net *net = arg->dev->nd_net;
1da177e4
LT
1930
1931 /* In IPv6 pmtu discovery is not optional,
1932 so that RTAX_MTU lock cannot disable it.
1933 We still use this lock to block changes
1934 caused by addrconf/ndisc.
1935 */
1936
1937 idev = __in6_dev_get(arg->dev);
1938 if (idev == NULL)
1939 return 0;
1940
1941 /* For administrative MTU increase, there is no way to discover
1942 IPv6 PMTU increase, so PMTU increase should be updated here.
1943 Since RFC 1981 doesn't include administrative MTU increase
1944 update PMTU increase is a MUST. (i.e. jumbo frame)
1945 */
1946 /*
1947 If new MTU is less than route PMTU, this new MTU will be the
1948 lowest MTU in the path, update the route PMTU to reflect PMTU
1949 decreases; if new MTU is greater than route PMTU, and the
1950 old MTU is the lowest MTU in the path, update the route PMTU
1951 to reflect the increase. In this case if the other nodes' MTU
1952 also have the lowest MTU, TOO BIG MESSAGE will be lead to
1953 PMTU discouvery.
1954 */
1955 if (rt->rt6i_dev == arg->dev &&
1956 !dst_metric_locked(&rt->u.dst, RTAX_MTU) &&
23717795 1957 (dst_mtu(&rt->u.dst) >= arg->mtu ||
1ab1457c 1958 (dst_mtu(&rt->u.dst) < arg->mtu &&
566cfd8f 1959 dst_mtu(&rt->u.dst) == idev->cnf.mtu6))) {
1da177e4 1960 rt->u.dst.metrics[RTAX_MTU-1] = arg->mtu;
5578689a 1961 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, arg->mtu);
566cfd8f 1962 }
1da177e4
LT
1963 return 0;
1964}
1965
1966void rt6_mtu_change(struct net_device *dev, unsigned mtu)
1967{
c71099ac
TG
1968 struct rt6_mtu_change_arg arg = {
1969 .dev = dev,
1970 .mtu = mtu,
1971 };
1da177e4 1972
f3db4851 1973 fib6_clean_all(dev->nd_net, rt6_mtu_change_route, 0, &arg);
1da177e4
LT
1974}
1975
ef7c79ed 1976static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
5176f91e 1977 [RTA_GATEWAY] = { .len = sizeof(struct in6_addr) },
86872cb5 1978 [RTA_OIF] = { .type = NLA_U32 },
ab364a6f 1979 [RTA_IIF] = { .type = NLA_U32 },
86872cb5
TG
1980 [RTA_PRIORITY] = { .type = NLA_U32 },
1981 [RTA_METRICS] = { .type = NLA_NESTED },
1982};
1983
1984static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
1985 struct fib6_config *cfg)
1da177e4 1986{
86872cb5
TG
1987 struct rtmsg *rtm;
1988 struct nlattr *tb[RTA_MAX+1];
1989 int err;
1da177e4 1990
86872cb5
TG
1991 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
1992 if (err < 0)
1993 goto errout;
1da177e4 1994
86872cb5
TG
1995 err = -EINVAL;
1996 rtm = nlmsg_data(nlh);
1997 memset(cfg, 0, sizeof(*cfg));
1998
1999 cfg->fc_table = rtm->rtm_table;
2000 cfg->fc_dst_len = rtm->rtm_dst_len;
2001 cfg->fc_src_len = rtm->rtm_src_len;
2002 cfg->fc_flags = RTF_UP;
2003 cfg->fc_protocol = rtm->rtm_protocol;
2004
2005 if (rtm->rtm_type == RTN_UNREACHABLE)
2006 cfg->fc_flags |= RTF_REJECT;
2007
2008 cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
2009 cfg->fc_nlinfo.nlh = nlh;
2216b483 2010 cfg->fc_nlinfo.nl_net = skb->sk->sk_net;
86872cb5
TG
2011
2012 if (tb[RTA_GATEWAY]) {
2013 nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
2014 cfg->fc_flags |= RTF_GATEWAY;
1da177e4 2015 }
86872cb5
TG
2016
2017 if (tb[RTA_DST]) {
2018 int plen = (rtm->rtm_dst_len + 7) >> 3;
2019
2020 if (nla_len(tb[RTA_DST]) < plen)
2021 goto errout;
2022
2023 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
1da177e4 2024 }
86872cb5
TG
2025
2026 if (tb[RTA_SRC]) {
2027 int plen = (rtm->rtm_src_len + 7) >> 3;
2028
2029 if (nla_len(tb[RTA_SRC]) < plen)
2030 goto errout;
2031
2032 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
1da177e4 2033 }
86872cb5
TG
2034
2035 if (tb[RTA_OIF])
2036 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2037
2038 if (tb[RTA_PRIORITY])
2039 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2040
2041 if (tb[RTA_METRICS]) {
2042 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2043 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
1da177e4 2044 }
86872cb5
TG
2045
2046 if (tb[RTA_TABLE])
2047 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2048
2049 err = 0;
2050errout:
2051 return err;
1da177e4
LT
2052}
2053
c127ea2c 2054static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1da177e4 2055{
86872cb5
TG
2056 struct fib6_config cfg;
2057 int err;
1da177e4 2058
86872cb5
TG
2059 err = rtm_to_fib6_config(skb, nlh, &cfg);
2060 if (err < 0)
2061 return err;
2062
2063 return ip6_route_del(&cfg);
1da177e4
LT
2064}
2065
c127ea2c 2066static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1da177e4 2067{
86872cb5
TG
2068 struct fib6_config cfg;
2069 int err;
1da177e4 2070
86872cb5
TG
2071 err = rtm_to_fib6_config(skb, nlh, &cfg);
2072 if (err < 0)
2073 return err;
2074
2075 return ip6_route_add(&cfg);
1da177e4
LT
2076}
2077
339bf98f
TG
2078static inline size_t rt6_nlmsg_size(void)
2079{
2080 return NLMSG_ALIGN(sizeof(struct rtmsg))
2081 + nla_total_size(16) /* RTA_SRC */
2082 + nla_total_size(16) /* RTA_DST */
2083 + nla_total_size(16) /* RTA_GATEWAY */
2084 + nla_total_size(16) /* RTA_PREFSRC */
2085 + nla_total_size(4) /* RTA_TABLE */
2086 + nla_total_size(4) /* RTA_IIF */
2087 + nla_total_size(4) /* RTA_OIF */
2088 + nla_total_size(4) /* RTA_PRIORITY */
6a2b9ce0 2089 + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
339bf98f
TG
2090 + nla_total_size(sizeof(struct rta_cacheinfo));
2091}
2092
1da177e4 2093static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt,
0d51aa80
JHS
2094 struct in6_addr *dst, struct in6_addr *src,
2095 int iif, int type, u32 pid, u32 seq,
2096 int prefix, unsigned int flags)
1da177e4
LT
2097{
2098 struct rtmsg *rtm;
2d7202bf 2099 struct nlmsghdr *nlh;
e3703b3d 2100 long expires;
9e762a4a 2101 u32 table;
1da177e4
LT
2102
2103 if (prefix) { /* user wants prefix routes only */
2104 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
2105 /* success since this is not a prefix route */
2106 return 1;
2107 }
2108 }
2109
2d7202bf
TG
2110 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*rtm), flags);
2111 if (nlh == NULL)
26932566 2112 return -EMSGSIZE;
2d7202bf
TG
2113
2114 rtm = nlmsg_data(nlh);
1da177e4
LT
2115 rtm->rtm_family = AF_INET6;
2116 rtm->rtm_dst_len = rt->rt6i_dst.plen;
2117 rtm->rtm_src_len = rt->rt6i_src.plen;
2118 rtm->rtm_tos = 0;
c71099ac 2119 if (rt->rt6i_table)
9e762a4a 2120 table = rt->rt6i_table->tb6_id;
c71099ac 2121 else
9e762a4a
PM
2122 table = RT6_TABLE_UNSPEC;
2123 rtm->rtm_table = table;
2d7202bf 2124 NLA_PUT_U32(skb, RTA_TABLE, table);
1da177e4
LT
2125 if (rt->rt6i_flags&RTF_REJECT)
2126 rtm->rtm_type = RTN_UNREACHABLE;
2127 else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK))
2128 rtm->rtm_type = RTN_LOCAL;
2129 else
2130 rtm->rtm_type = RTN_UNICAST;
2131 rtm->rtm_flags = 0;
2132 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2133 rtm->rtm_protocol = rt->rt6i_protocol;
2134 if (rt->rt6i_flags&RTF_DYNAMIC)
2135 rtm->rtm_protocol = RTPROT_REDIRECT;
2136 else if (rt->rt6i_flags & RTF_ADDRCONF)
2137 rtm->rtm_protocol = RTPROT_KERNEL;
2138 else if (rt->rt6i_flags&RTF_DEFAULT)
2139 rtm->rtm_protocol = RTPROT_RA;
2140
2141 if (rt->rt6i_flags&RTF_CACHE)
2142 rtm->rtm_flags |= RTM_F_CLONED;
2143
2144 if (dst) {
2d7202bf 2145 NLA_PUT(skb, RTA_DST, 16, dst);
1ab1457c 2146 rtm->rtm_dst_len = 128;
1da177e4 2147 } else if (rtm->rtm_dst_len)
2d7202bf 2148 NLA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr);
1da177e4
LT
2149#ifdef CONFIG_IPV6_SUBTREES
2150 if (src) {
2d7202bf 2151 NLA_PUT(skb, RTA_SRC, 16, src);
1ab1457c 2152 rtm->rtm_src_len = 128;
1da177e4 2153 } else if (rtm->rtm_src_len)
2d7202bf 2154 NLA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr);
1da177e4
LT
2155#endif
2156 if (iif)
2d7202bf 2157 NLA_PUT_U32(skb, RTA_IIF, iif);
1da177e4
LT
2158 else if (dst) {
2159 struct in6_addr saddr_buf;
5e5f3f0f
YH
2160 if (ipv6_dev_get_saddr(ip6_dst_idev(&rt->u.dst)->dev,
2161 dst, &saddr_buf) == 0)
2d7202bf 2162 NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
1da177e4 2163 }
2d7202bf 2164
1da177e4 2165 if (rtnetlink_put_metrics(skb, rt->u.dst.metrics) < 0)
2d7202bf
TG
2166 goto nla_put_failure;
2167
1da177e4 2168 if (rt->u.dst.neighbour)
2d7202bf
TG
2169 NLA_PUT(skb, RTA_GATEWAY, 16, &rt->u.dst.neighbour->primary_key);
2170
1da177e4 2171 if (rt->u.dst.dev)
2d7202bf
TG
2172 NLA_PUT_U32(skb, RTA_OIF, rt->rt6i_dev->ifindex);
2173
2174 NLA_PUT_U32(skb, RTA_PRIORITY, rt->rt6i_metric);
e3703b3d
TG
2175
2176 expires = rt->rt6i_expires ? rt->rt6i_expires - jiffies : 0;
2177 if (rtnl_put_cacheinfo(skb, &rt->u.dst, 0, 0, 0,
2178 expires, rt->u.dst.error) < 0)
2179 goto nla_put_failure;
2d7202bf
TG
2180
2181 return nlmsg_end(skb, nlh);
2182
2183nla_put_failure:
26932566
PM
2184 nlmsg_cancel(skb, nlh);
2185 return -EMSGSIZE;
1da177e4
LT
2186}
2187
1b43af54 2188int rt6_dump_route(struct rt6_info *rt, void *p_arg)
1da177e4
LT
2189{
2190 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
2191 int prefix;
2192
2d7202bf
TG
2193 if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
2194 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
1da177e4
LT
2195 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
2196 } else
2197 prefix = 0;
2198
2199 return rt6_fill_node(arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
2200 NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
0d51aa80 2201 prefix, NLM_F_MULTI);
1da177e4
LT
2202}
2203
c127ea2c 2204static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
1da177e4 2205{
b854272b 2206 struct net *net = in_skb->sk->sk_net;
ab364a6f
TG
2207 struct nlattr *tb[RTA_MAX+1];
2208 struct rt6_info *rt;
1da177e4 2209 struct sk_buff *skb;
ab364a6f 2210 struct rtmsg *rtm;
1da177e4 2211 struct flowi fl;
ab364a6f 2212 int err, iif = 0;
1da177e4 2213
ab364a6f
TG
2214 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2215 if (err < 0)
2216 goto errout;
1da177e4 2217
ab364a6f 2218 err = -EINVAL;
1da177e4 2219 memset(&fl, 0, sizeof(fl));
1da177e4 2220
ab364a6f
TG
2221 if (tb[RTA_SRC]) {
2222 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2223 goto errout;
2224
2225 ipv6_addr_copy(&fl.fl6_src, nla_data(tb[RTA_SRC]));
2226 }
2227
2228 if (tb[RTA_DST]) {
2229 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2230 goto errout;
2231
2232 ipv6_addr_copy(&fl.fl6_dst, nla_data(tb[RTA_DST]));
2233 }
2234
2235 if (tb[RTA_IIF])
2236 iif = nla_get_u32(tb[RTA_IIF]);
2237
2238 if (tb[RTA_OIF])
2239 fl.oif = nla_get_u32(tb[RTA_OIF]);
1da177e4
LT
2240
2241 if (iif) {
2242 struct net_device *dev;
5578689a 2243 dev = __dev_get_by_index(net, iif);
1da177e4
LT
2244 if (!dev) {
2245 err = -ENODEV;
ab364a6f 2246 goto errout;
1da177e4
LT
2247 }
2248 }
2249
ab364a6f
TG
2250 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
2251 if (skb == NULL) {
2252 err = -ENOBUFS;
2253 goto errout;
2254 }
1da177e4 2255
ab364a6f
TG
2256 /* Reserve room for dummy headers, this skb can pass
2257 through good chunk of routing engine.
2258 */
459a98ed 2259 skb_reset_mac_header(skb);
ab364a6f 2260 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
1da177e4 2261
ab364a6f 2262 rt = (struct rt6_info*) ip6_route_output(NULL, &fl);
1da177e4
LT
2263 skb->dst = &rt->u.dst;
2264
ab364a6f 2265 err = rt6_fill_node(skb, rt, &fl.fl6_dst, &fl.fl6_src, iif,
1da177e4 2266 RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
0d51aa80 2267 nlh->nlmsg_seq, 0, 0);
1da177e4 2268 if (err < 0) {
ab364a6f
TG
2269 kfree_skb(skb);
2270 goto errout;
1da177e4
LT
2271 }
2272
5578689a 2273 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).pid);
ab364a6f 2274errout:
1da177e4 2275 return err;
1da177e4
LT
2276}
2277
86872cb5 2278void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
1da177e4
LT
2279{
2280 struct sk_buff *skb;
5578689a 2281 struct net *net = info->nl_net;
528c4ceb
DL
2282 u32 seq;
2283 int err;
2284
2285 err = -ENOBUFS;
2286 seq = info->nlh != NULL ? info->nlh->nlmsg_seq : 0;
86872cb5 2287
339bf98f 2288 skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
21713ebc
TG
2289 if (skb == NULL)
2290 goto errout;
2291
528c4ceb
DL
2292 err = rt6_fill_node(skb, rt, NULL, NULL, 0,
2293 event, info->pid, seq, 0, 0);
26932566
PM
2294 if (err < 0) {
2295 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
2296 WARN_ON(err == -EMSGSIZE);
2297 kfree_skb(skb);
2298 goto errout;
2299 }
5578689a
DL
2300 err = rtnl_notify(skb, net, info->pid, RTNLGRP_IPV6_ROUTE,
2301 info->nlh, gfp_any());
21713ebc
TG
2302errout:
2303 if (err < 0)
5578689a 2304 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
1da177e4
LT
2305}
2306
8ed67789
DL
2307static int ip6_route_dev_notify(struct notifier_block *this,
2308 unsigned long event, void *data)
2309{
2310 struct net_device *dev = (struct net_device *)data;
2311 struct net *net = dev->nd_net;
2312
2313 if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
2314 net->ipv6.ip6_null_entry->u.dst.dev = dev;
2315 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
2316#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2317 net->ipv6.ip6_prohibit_entry->u.dst.dev = dev;
2318 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
2319 net->ipv6.ip6_blk_hole_entry->u.dst.dev = dev;
2320 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
2321#endif
2322 }
2323
2324 return NOTIFY_OK;
2325}
2326
1da177e4
LT
2327/*
2328 * /proc
2329 */
2330
2331#ifdef CONFIG_PROC_FS
2332
2333#define RT6_INFO_LEN (32 + 4 + 32 + 4 + 32 + 40 + 5 + 1)
2334
2335struct rt6_proc_arg
2336{
2337 char *buffer;
2338 int offset;
2339 int length;
2340 int skip;
2341 int len;
2342};
2343
2344static int rt6_info_route(struct rt6_info *rt, void *p_arg)
2345{
33120b30 2346 struct seq_file *m = p_arg;
1da177e4 2347
33120b30
AD
2348 seq_printf(m, NIP6_SEQFMT " %02x ", NIP6(rt->rt6i_dst.addr),
2349 rt->rt6i_dst.plen);
1da177e4
LT
2350
2351#ifdef CONFIG_IPV6_SUBTREES
33120b30
AD
2352 seq_printf(m, NIP6_SEQFMT " %02x ", NIP6(rt->rt6i_src.addr),
2353 rt->rt6i_src.plen);
1da177e4 2354#else
33120b30 2355 seq_puts(m, "00000000000000000000000000000000 00 ");
1da177e4
LT
2356#endif
2357
2358 if (rt->rt6i_nexthop) {
33120b30
AD
2359 seq_printf(m, NIP6_SEQFMT,
2360 NIP6(*((struct in6_addr *)rt->rt6i_nexthop->primary_key)));
1da177e4 2361 } else {
33120b30 2362 seq_puts(m, "00000000000000000000000000000000");
1da177e4 2363 }
33120b30
AD
2364 seq_printf(m, " %08x %08x %08x %08x %8s\n",
2365 rt->rt6i_metric, atomic_read(&rt->u.dst.__refcnt),
2366 rt->u.dst.__use, rt->rt6i_flags,
2367 rt->rt6i_dev ? rt->rt6i_dev->name : "");
1da177e4
LT
2368 return 0;
2369}
2370
33120b30 2371static int ipv6_route_show(struct seq_file *m, void *v)
1da177e4 2372{
f3db4851
DL
2373 struct net *net = (struct net *)m->private;
2374 fib6_clean_all(net, rt6_info_route, 0, m);
33120b30
AD
2375 return 0;
2376}
1da177e4 2377
33120b30
AD
2378static int ipv6_route_open(struct inode *inode, struct file *file)
2379{
f3db4851
DL
2380 struct net *net = get_proc_net(inode);
2381 if (!net)
2382 return -ENXIO;
2383 return single_open(file, ipv6_route_show, net);
2384}
2385
2386static int ipv6_route_release(struct inode *inode, struct file *file)
2387{
2388 struct seq_file *seq = file->private_data;
2389 struct net *net = seq->private;
2390 put_net(net);
2391 return single_release(inode, file);
1da177e4
LT
2392}
2393
33120b30
AD
2394static const struct file_operations ipv6_route_proc_fops = {
2395 .owner = THIS_MODULE,
2396 .open = ipv6_route_open,
2397 .read = seq_read,
2398 .llseek = seq_lseek,
f3db4851 2399 .release = ipv6_route_release,
33120b30
AD
2400};
2401
1da177e4
LT
2402static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2403{
69ddb805 2404 struct net *net = (struct net *)seq->private;
1da177e4 2405 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
69ddb805
DL
2406 net->ipv6.rt6_stats->fib_nodes,
2407 net->ipv6.rt6_stats->fib_route_nodes,
2408 net->ipv6.rt6_stats->fib_rt_alloc,
2409 net->ipv6.rt6_stats->fib_rt_entries,
2410 net->ipv6.rt6_stats->fib_rt_cache,
c572872f 2411 atomic_read(&ip6_dst_ops.entries),
69ddb805 2412 net->ipv6.rt6_stats->fib_discarded_routes);
1da177e4
LT
2413
2414 return 0;
2415}
2416
2417static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2418{
69ddb805
DL
2419 struct net *net = get_proc_net(inode);
2420 return single_open(file, rt6_stats_seq_show, net);
2421}
2422
2423static int rt6_stats_seq_release(struct inode *inode, struct file *file)
2424{
2425 struct seq_file *seq = file->private_data;
2426 struct net *net = (struct net *)seq->private;
2427 put_net(net);
2428 return single_release(inode, file);
1da177e4
LT
2429}
2430
9a32144e 2431static const struct file_operations rt6_stats_seq_fops = {
1da177e4
LT
2432 .owner = THIS_MODULE,
2433 .open = rt6_stats_seq_open,
2434 .read = seq_read,
2435 .llseek = seq_lseek,
69ddb805 2436 .release = rt6_stats_seq_release,
1da177e4
LT
2437};
2438#endif /* CONFIG_PROC_FS */
2439
2440#ifdef CONFIG_SYSCTL
2441
1da177e4
LT
2442static
2443int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write, struct file * filp,
2444 void __user *buffer, size_t *lenp, loff_t *ppos)
2445{
5b7c931d
DL
2446 struct net *net = current->nsproxy->net_ns;
2447 int delay = net->ipv6.sysctl.flush_delay;
1da177e4
LT
2448 if (write) {
2449 proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
5b7c931d 2450 fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay, net);
1da177e4
LT
2451 return 0;
2452 } else
2453 return -EINVAL;
2454}
2455
760f2d01 2456ctl_table ipv6_route_table_template[] = {
1ab1457c 2457 {
1da177e4 2458 .procname = "flush",
4990509f 2459 .data = &init_net.ipv6.sysctl.flush_delay,
1da177e4 2460 .maxlen = sizeof(int),
89c8b3a1 2461 .mode = 0200,
1ab1457c 2462 .proc_handler = &ipv6_sysctl_rtcache_flush
1da177e4
LT
2463 },
2464 {
2465 .ctl_name = NET_IPV6_ROUTE_GC_THRESH,
2466 .procname = "gc_thresh",
1ab1457c 2467 .data = &ip6_dst_ops.gc_thresh,
1da177e4
LT
2468 .maxlen = sizeof(int),
2469 .mode = 0644,
1ab1457c 2470 .proc_handler = &proc_dointvec,
1da177e4
LT
2471 },
2472 {
2473 .ctl_name = NET_IPV6_ROUTE_MAX_SIZE,
2474 .procname = "max_size",
4990509f 2475 .data = &init_net.ipv6.sysctl.ip6_rt_max_size,
1da177e4
LT
2476 .maxlen = sizeof(int),
2477 .mode = 0644,
1ab1457c 2478 .proc_handler = &proc_dointvec,
1da177e4
LT
2479 },
2480 {
2481 .ctl_name = NET_IPV6_ROUTE_GC_MIN_INTERVAL,
2482 .procname = "gc_min_interval",
4990509f 2483 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
1da177e4
LT
2484 .maxlen = sizeof(int),
2485 .mode = 0644,
1ab1457c 2486 .proc_handler = &proc_dointvec_jiffies,
1da177e4
LT
2487 .strategy = &sysctl_jiffies,
2488 },
2489 {
2490 .ctl_name = NET_IPV6_ROUTE_GC_TIMEOUT,
2491 .procname = "gc_timeout",
4990509f 2492 .data = &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
1da177e4
LT
2493 .maxlen = sizeof(int),
2494 .mode = 0644,
1ab1457c 2495 .proc_handler = &proc_dointvec_jiffies,
1da177e4
LT
2496 .strategy = &sysctl_jiffies,
2497 },
2498 {
2499 .ctl_name = NET_IPV6_ROUTE_GC_INTERVAL,
2500 .procname = "gc_interval",
4990509f 2501 .data = &init_net.ipv6.sysctl.ip6_rt_gc_interval,
1da177e4
LT
2502 .maxlen = sizeof(int),
2503 .mode = 0644,
1ab1457c 2504 .proc_handler = &proc_dointvec_jiffies,
1da177e4
LT
2505 .strategy = &sysctl_jiffies,
2506 },
2507 {
2508 .ctl_name = NET_IPV6_ROUTE_GC_ELASTICITY,
2509 .procname = "gc_elasticity",
4990509f 2510 .data = &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
1da177e4
LT
2511 .maxlen = sizeof(int),
2512 .mode = 0644,
1ab1457c 2513 .proc_handler = &proc_dointvec_jiffies,
1da177e4
LT
2514 .strategy = &sysctl_jiffies,
2515 },
2516 {
2517 .ctl_name = NET_IPV6_ROUTE_MTU_EXPIRES,
2518 .procname = "mtu_expires",
4990509f 2519 .data = &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
1da177e4
LT
2520 .maxlen = sizeof(int),
2521 .mode = 0644,
1ab1457c 2522 .proc_handler = &proc_dointvec_jiffies,
1da177e4
LT
2523 .strategy = &sysctl_jiffies,
2524 },
2525 {
2526 .ctl_name = NET_IPV6_ROUTE_MIN_ADVMSS,
2527 .procname = "min_adv_mss",
4990509f 2528 .data = &init_net.ipv6.sysctl.ip6_rt_min_advmss,
1da177e4
LT
2529 .maxlen = sizeof(int),
2530 .mode = 0644,
1ab1457c 2531 .proc_handler = &proc_dointvec_jiffies,
1da177e4
LT
2532 .strategy = &sysctl_jiffies,
2533 },
2534 {
2535 .ctl_name = NET_IPV6_ROUTE_GC_MIN_INTERVAL_MS,
2536 .procname = "gc_min_interval_ms",
4990509f 2537 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
1da177e4
LT
2538 .maxlen = sizeof(int),
2539 .mode = 0644,
1ab1457c 2540 .proc_handler = &proc_dointvec_ms_jiffies,
1da177e4
LT
2541 .strategy = &sysctl_ms_jiffies,
2542 },
2543 { .ctl_name = 0 }
2544};
2545
760f2d01
DL
2546struct ctl_table *ipv6_route_sysctl_init(struct net *net)
2547{
2548 struct ctl_table *table;
2549
2550 table = kmemdup(ipv6_route_table_template,
2551 sizeof(ipv6_route_table_template),
2552 GFP_KERNEL);
5ee09105
YH
2553
2554 if (table) {
2555 table[0].data = &net->ipv6.sysctl.flush_delay;
2556 /* table[1].data will be handled when we have
2557 routes per namespace */
2558 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
2559 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2560 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
2561 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
2562 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
2563 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
2564 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
2565 }
2566
760f2d01
DL
2567 return table;
2568}
1da177e4
LT
2569#endif
2570
cdb18761
DL
2571static int ip6_route_net_init(struct net *net)
2572{
8ed67789
DL
2573 int ret = 0;
2574
2575 ret = -ENOMEM;
2576 net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
2577 sizeof(*net->ipv6.ip6_null_entry),
2578 GFP_KERNEL);
2579 if (!net->ipv6.ip6_null_entry)
2580 goto out;
2581 net->ipv6.ip6_null_entry->u.dst.path =
2582 (struct dst_entry *)net->ipv6.ip6_null_entry;
2583
2584#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2585 net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
2586 sizeof(*net->ipv6.ip6_prohibit_entry),
2587 GFP_KERNEL);
2588 if (!net->ipv6.ip6_prohibit_entry) {
2589 kfree(net->ipv6.ip6_null_entry);
2590 goto out;
2591 }
2592 net->ipv6.ip6_prohibit_entry->u.dst.path =
2593 (struct dst_entry *)net->ipv6.ip6_prohibit_entry;
2594
2595 net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
2596 sizeof(*net->ipv6.ip6_blk_hole_entry),
2597 GFP_KERNEL);
2598 if (!net->ipv6.ip6_blk_hole_entry) {
2599 kfree(net->ipv6.ip6_null_entry);
2600 kfree(net->ipv6.ip6_prohibit_entry);
2601 goto out;
2602 }
2603 net->ipv6.ip6_blk_hole_entry->u.dst.path =
2604 (struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
2605#endif
2606
cdb18761
DL
2607#ifdef CONFIG_PROC_FS
2608 proc_net_fops_create(net, "ipv6_route", 0, &ipv6_route_proc_fops);
2609 proc_net_fops_create(net, "rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2610#endif
8ed67789
DL
2611 ret = 0;
2612out:
2613 return ret;
cdb18761
DL
2614}
2615
2616static void ip6_route_net_exit(struct net *net)
2617{
2618#ifdef CONFIG_PROC_FS
2619 proc_net_remove(net, "ipv6_route");
2620 proc_net_remove(net, "rt6_stats");
2621#endif
8ed67789
DL
2622 kfree(net->ipv6.ip6_null_entry);
2623#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2624 kfree(net->ipv6.ip6_prohibit_entry);
2625 kfree(net->ipv6.ip6_blk_hole_entry);
2626#endif
cdb18761
DL
2627}
2628
2629static struct pernet_operations ip6_route_net_ops = {
2630 .init = ip6_route_net_init,
2631 .exit = ip6_route_net_exit,
2632};
2633
8ed67789
DL
2634static struct notifier_block ip6_route_dev_notifier = {
2635 .notifier_call = ip6_route_dev_notify,
2636 .priority = 0,
2637};
2638
433d49c3 2639int __init ip6_route_init(void)
1da177e4 2640{
433d49c3
DL
2641 int ret;
2642
e5d679f3
AD
2643 ip6_dst_ops.kmem_cachep =
2644 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
f845ab6b
DL
2645 SLAB_HWCACHE_ALIGN, NULL);
2646 if (!ip6_dst_ops.kmem_cachep)
2647 return -ENOMEM;
2648
14e50e57
DM
2649 ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops.kmem_cachep;
2650
8ed67789
DL
2651 ret = register_pernet_subsys(&ip6_route_net_ops);
2652 if (ret)
bdb3289f 2653 goto out_kmem_cache;
bdb3289f 2654
8ed67789
DL
2655 /* Registering of the loopback is done before this portion of code,
2656 * the loopback reference in rt6_info will not be taken, do it
2657 * manually for init_net */
2658 init_net.ipv6.ip6_null_entry->u.dst.dev = init_net.loopback_dev;
2659 init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2660 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2661 init_net.ipv6.ip6_prohibit_entry->u.dst.dev = init_net.loopback_dev;
2662 init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2663 init_net.ipv6.ip6_blk_hole_entry->u.dst.dev = init_net.loopback_dev;
2664 init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2665 #endif
433d49c3
DL
2666 ret = fib6_init();
2667 if (ret)
8ed67789 2668 goto out_register_subsys;
433d49c3 2669
433d49c3
DL
2670 ret = xfrm6_init();
2671 if (ret)
cdb18761 2672 goto out_fib6_init;
c35b7e72 2673
433d49c3
DL
2674 ret = fib6_rules_init();
2675 if (ret)
2676 goto xfrm6_init;
7e5449c2 2677
433d49c3
DL
2678 ret = -ENOBUFS;
2679 if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL) ||
2680 __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL) ||
2681 __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL))
2682 goto fib6_rules_init;
c127ea2c 2683
8ed67789 2684 ret = register_netdevice_notifier(&ip6_route_dev_notifier);
cdb18761
DL
2685 if (ret)
2686 goto fib6_rules_init;
8ed67789 2687
433d49c3
DL
2688out:
2689 return ret;
2690
2691fib6_rules_init:
433d49c3
DL
2692 fib6_rules_cleanup();
2693xfrm6_init:
433d49c3 2694 xfrm6_fini();
433d49c3 2695out_fib6_init:
433d49c3 2696 fib6_gc_cleanup();
8ed67789
DL
2697out_register_subsys:
2698 unregister_pernet_subsys(&ip6_route_net_ops);
433d49c3
DL
2699out_kmem_cache:
2700 kmem_cache_destroy(ip6_dst_ops.kmem_cachep);
2701 goto out;
1da177e4
LT
2702}
2703
2704void ip6_route_cleanup(void)
2705{
8ed67789 2706 unregister_netdevice_notifier(&ip6_route_dev_notifier);
101367c2 2707 fib6_rules_cleanup();
1da177e4 2708 xfrm6_fini();
1da177e4 2709 fib6_gc_cleanup();
8ed67789 2710 unregister_pernet_subsys(&ip6_route_net_ops);
1da177e4
LT
2711 kmem_cache_destroy(ip6_dst_ops.kmem_cachep);
2712}