]> bbs.cooldavid.org Git - net-next-2.6.git/blame - net/ipv6/route.c
[NETNS][IPV6] route6 - make route6 per namespace
[net-next-2.6.git] / net / ipv6 / route.c
CommitLineData
1da177e4
LT
1/*
2 * Linux INET6 implementation
3 * FIB front-end.
4 *
5 * Authors:
1ab1457c 6 * Pedro Roque <roque@di.fc.ul.pt>
1da177e4
LT
7 *
8 * $Id: route.c,v 1.56 2001/10/31 21:55:55 davem Exp $
9 *
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License
12 * as published by the Free Software Foundation; either version
13 * 2 of the License, or (at your option) any later version.
14 */
15
16/* Changes:
17 *
18 * YOSHIFUJI Hideaki @USAGI
19 * reworked default router selection.
20 * - respect outgoing interface
21 * - select from (probably) reachable routers (i.e.
22 * routers in REACHABLE, STALE, DELAY or PROBE states).
23 * - always select the same router if it is (probably)
24 * reachable. otherwise, round-robin the list.
c0bece9f
YH
25 * Ville Nuorvala
26 * Fixed routing subtrees.
1da177e4
LT
27 */
28
4fc268d2 29#include <linux/capability.h>
1da177e4
LT
30#include <linux/errno.h>
31#include <linux/types.h>
32#include <linux/times.h>
33#include <linux/socket.h>
34#include <linux/sockios.h>
35#include <linux/net.h>
36#include <linux/route.h>
37#include <linux/netdevice.h>
38#include <linux/in6.h>
39#include <linux/init.h>
1da177e4 40#include <linux/if_arp.h>
1da177e4
LT
41#include <linux/proc_fs.h>
42#include <linux/seq_file.h>
5b7c931d 43#include <linux/nsproxy.h>
457c4cbc 44#include <net/net_namespace.h>
1da177e4
LT
45#include <net/snmp.h>
46#include <net/ipv6.h>
47#include <net/ip6_fib.h>
48#include <net/ip6_route.h>
49#include <net/ndisc.h>
50#include <net/addrconf.h>
51#include <net/tcp.h>
52#include <linux/rtnetlink.h>
53#include <net/dst.h>
54#include <net/xfrm.h>
8d71740c 55#include <net/netevent.h>
21713ebc 56#include <net/netlink.h>
1da177e4
LT
57
58#include <asm/uaccess.h>
59
60#ifdef CONFIG_SYSCTL
61#include <linux/sysctl.h>
62#endif
63
64/* Set to 3 to get tracing. */
65#define RT6_DEBUG 2
66
67#if RT6_DEBUG >= 3
68#define RDBG(x) printk x
69#define RT6_TRACE(x...) printk(KERN_DEBUG x)
70#else
71#define RDBG(x)
72#define RT6_TRACE(x...) do { ; } while (0)
73#endif
74
519fbd87 75#define CLONE_OFFLINK_ROUTE 0
1da177e4 76
1da177e4
LT
77static struct rt6_info * ip6_rt_copy(struct rt6_info *ort);
78static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
79static struct dst_entry *ip6_negative_advice(struct dst_entry *);
80static void ip6_dst_destroy(struct dst_entry *);
81static void ip6_dst_ifdown(struct dst_entry *,
82 struct net_device *dev, int how);
569d3645 83static int ip6_dst_gc(struct dst_ops *ops);
1da177e4
LT
84
85static int ip6_pkt_discard(struct sk_buff *skb);
86static int ip6_pkt_discard_out(struct sk_buff *skb);
87static void ip6_link_failure(struct sk_buff *skb);
88static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
89
70ceb4f5 90#ifdef CONFIG_IPV6_ROUTE_INFO
efa2cea0
DL
91static struct rt6_info *rt6_add_route_info(struct net *net,
92 struct in6_addr *prefix, int prefixlen,
70ceb4f5
YH
93 struct in6_addr *gwaddr, int ifindex,
94 unsigned pref);
efa2cea0
DL
95static struct rt6_info *rt6_get_route_info(struct net *net,
96 struct in6_addr *prefix, int prefixlen,
70ceb4f5
YH
97 struct in6_addr *gwaddr, int ifindex);
98#endif
99
1da177e4
LT
100static struct dst_ops ip6_dst_ops = {
101 .family = AF_INET6,
102 .protocol = __constant_htons(ETH_P_IPV6),
103 .gc = ip6_dst_gc,
104 .gc_thresh = 1024,
105 .check = ip6_dst_check,
106 .destroy = ip6_dst_destroy,
107 .ifdown = ip6_dst_ifdown,
108 .negative_advice = ip6_negative_advice,
109 .link_failure = ip6_link_failure,
110 .update_pmtu = ip6_rt_update_pmtu,
862b82c6 111 .local_out = ip6_local_out,
1da177e4 112 .entry_size = sizeof(struct rt6_info),
e2422970 113 .entries = ATOMIC_INIT(0),
1da177e4
LT
114};
115
14e50e57
DM
116static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu)
117{
118}
119
120static struct dst_ops ip6_dst_blackhole_ops = {
121 .family = AF_INET6,
122 .protocol = __constant_htons(ETH_P_IPV6),
123 .destroy = ip6_dst_destroy,
124 .check = ip6_dst_check,
125 .update_pmtu = ip6_rt_blackhole_update_pmtu,
126 .entry_size = sizeof(struct rt6_info),
e2422970 127 .entries = ATOMIC_INIT(0),
14e50e57
DM
128};
129
1da177e4
LT
130struct rt6_info ip6_null_entry = {
131 .u = {
132 .dst = {
133 .__refcnt = ATOMIC_INIT(1),
134 .__use = 1,
1da177e4
LT
135 .obsolete = -1,
136 .error = -ENETUNREACH,
137 .metrics = { [RTAX_HOPLIMIT - 1] = 255, },
138 .input = ip6_pkt_discard,
139 .output = ip6_pkt_discard_out,
140 .ops = &ip6_dst_ops,
141 .path = (struct dst_entry*)&ip6_null_entry,
142 }
143 },
144 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
145 .rt6i_metric = ~(u32) 0,
146 .rt6i_ref = ATOMIC_INIT(1),
147};
148
101367c2
TG
149#ifdef CONFIG_IPV6_MULTIPLE_TABLES
150
6723ab54
DM
151static int ip6_pkt_prohibit(struct sk_buff *skb);
152static int ip6_pkt_prohibit_out(struct sk_buff *skb);
6723ab54 153
101367c2
TG
154struct rt6_info ip6_prohibit_entry = {
155 .u = {
156 .dst = {
157 .__refcnt = ATOMIC_INIT(1),
158 .__use = 1,
101367c2
TG
159 .obsolete = -1,
160 .error = -EACCES,
161 .metrics = { [RTAX_HOPLIMIT - 1] = 255, },
9ce8ade0
TG
162 .input = ip6_pkt_prohibit,
163 .output = ip6_pkt_prohibit_out,
101367c2
TG
164 .ops = &ip6_dst_ops,
165 .path = (struct dst_entry*)&ip6_prohibit_entry,
166 }
167 },
168 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
169 .rt6i_metric = ~(u32) 0,
170 .rt6i_ref = ATOMIC_INIT(1),
171};
172
173struct rt6_info ip6_blk_hole_entry = {
174 .u = {
175 .dst = {
176 .__refcnt = ATOMIC_INIT(1),
177 .__use = 1,
101367c2
TG
178 .obsolete = -1,
179 .error = -EINVAL,
180 .metrics = { [RTAX_HOPLIMIT - 1] = 255, },
352e512c
HX
181 .input = dst_discard,
182 .output = dst_discard,
101367c2
TG
183 .ops = &ip6_dst_ops,
184 .path = (struct dst_entry*)&ip6_blk_hole_entry,
185 }
186 },
187 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
188 .rt6i_metric = ~(u32) 0,
189 .rt6i_ref = ATOMIC_INIT(1),
190};
191
192#endif
193
1da177e4
LT
194/* allocate dst with ip6_dst_ops */
195static __inline__ struct rt6_info *ip6_dst_alloc(void)
196{
197 return (struct rt6_info *)dst_alloc(&ip6_dst_ops);
198}
199
200static void ip6_dst_destroy(struct dst_entry *dst)
201{
202 struct rt6_info *rt = (struct rt6_info *)dst;
203 struct inet6_dev *idev = rt->rt6i_idev;
204
205 if (idev != NULL) {
206 rt->rt6i_idev = NULL;
207 in6_dev_put(idev);
1ab1457c 208 }
1da177e4
LT
209}
210
211static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
212 int how)
213{
214 struct rt6_info *rt = (struct rt6_info *)dst;
215 struct inet6_dev *idev = rt->rt6i_idev;
5a3e55d6
DL
216 struct net_device *loopback_dev =
217 dev->nd_net->loopback_dev;
1da177e4 218
5a3e55d6
DL
219 if (dev != loopback_dev && idev != NULL && idev->dev == dev) {
220 struct inet6_dev *loopback_idev =
221 in6_dev_get(loopback_dev);
1da177e4
LT
222 if (loopback_idev != NULL) {
223 rt->rt6i_idev = loopback_idev;
224 in6_dev_put(idev);
225 }
226 }
227}
228
229static __inline__ int rt6_check_expired(const struct rt6_info *rt)
230{
231 return (rt->rt6i_flags & RTF_EXPIRES &&
232 time_after(jiffies, rt->rt6i_expires));
233}
234
c71099ac
TG
235static inline int rt6_need_strict(struct in6_addr *daddr)
236{
237 return (ipv6_addr_type(daddr) &
238 (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL));
239}
240
1da177e4 241/*
c71099ac 242 * Route lookup. Any table->tb6_lock is implied.
1da177e4
LT
243 */
244
245static __inline__ struct rt6_info *rt6_device_match(struct rt6_info *rt,
246 int oif,
247 int strict)
248{
249 struct rt6_info *local = NULL;
250 struct rt6_info *sprt;
251
252 if (oif) {
7cc48263 253 for (sprt = rt; sprt; sprt = sprt->u.dst.rt6_next) {
1da177e4
LT
254 struct net_device *dev = sprt->rt6i_dev;
255 if (dev->ifindex == oif)
256 return sprt;
257 if (dev->flags & IFF_LOOPBACK) {
258 if (sprt->rt6i_idev == NULL ||
259 sprt->rt6i_idev->dev->ifindex != oif) {
260 if (strict && oif)
261 continue;
1ab1457c 262 if (local && (!oif ||
1da177e4
LT
263 local->rt6i_idev->dev->ifindex == oif))
264 continue;
265 }
266 local = sprt;
267 }
268 }
269
270 if (local)
271 return local;
272
273 if (strict)
274 return &ip6_null_entry;
275 }
276 return rt;
277}
278
27097255
YH
279#ifdef CONFIG_IPV6_ROUTER_PREF
280static void rt6_probe(struct rt6_info *rt)
281{
282 struct neighbour *neigh = rt ? rt->rt6i_nexthop : NULL;
283 /*
284 * Okay, this does not seem to be appropriate
285 * for now, however, we need to check if it
286 * is really so; aka Router Reachability Probing.
287 *
288 * Router Reachability Probe MUST be rate-limited
289 * to no more than one per minute.
290 */
291 if (!neigh || (neigh->nud_state & NUD_VALID))
292 return;
293 read_lock_bh(&neigh->lock);
294 if (!(neigh->nud_state & NUD_VALID) &&
52e16356 295 time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
27097255
YH
296 struct in6_addr mcaddr;
297 struct in6_addr *target;
298
299 neigh->updated = jiffies;
300 read_unlock_bh(&neigh->lock);
301
302 target = (struct in6_addr *)&neigh->primary_key;
303 addrconf_addr_solict_mult(target, &mcaddr);
304 ndisc_send_ns(rt->rt6i_dev, NULL, target, &mcaddr, NULL);
305 } else
306 read_unlock_bh(&neigh->lock);
307}
308#else
309static inline void rt6_probe(struct rt6_info *rt)
310{
311 return;
312}
313#endif
314
1da177e4 315/*
554cfb7e 316 * Default Router Selection (RFC 2461 6.3.6)
1da177e4 317 */
b6f99a21 318static inline int rt6_check_dev(struct rt6_info *rt, int oif)
554cfb7e
YH
319{
320 struct net_device *dev = rt->rt6i_dev;
161980f4 321 if (!oif || dev->ifindex == oif)
554cfb7e 322 return 2;
161980f4
DM
323 if ((dev->flags & IFF_LOOPBACK) &&
324 rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
325 return 1;
326 return 0;
554cfb7e 327}
1da177e4 328
b6f99a21 329static inline int rt6_check_neigh(struct rt6_info *rt)
1da177e4 330{
554cfb7e 331 struct neighbour *neigh = rt->rt6i_nexthop;
398bcbeb 332 int m;
4d0c5911
YH
333 if (rt->rt6i_flags & RTF_NONEXTHOP ||
334 !(rt->rt6i_flags & RTF_GATEWAY))
335 m = 1;
336 else if (neigh) {
554cfb7e
YH
337 read_lock_bh(&neigh->lock);
338 if (neigh->nud_state & NUD_VALID)
4d0c5911 339 m = 2;
398bcbeb
YH
340#ifdef CONFIG_IPV6_ROUTER_PREF
341 else if (neigh->nud_state & NUD_FAILED)
342 m = 0;
343#endif
344 else
ea73ee23 345 m = 1;
554cfb7e 346 read_unlock_bh(&neigh->lock);
398bcbeb
YH
347 } else
348 m = 0;
554cfb7e 349 return m;
1da177e4
LT
350}
351
554cfb7e
YH
352static int rt6_score_route(struct rt6_info *rt, int oif,
353 int strict)
1da177e4 354{
4d0c5911 355 int m, n;
1ab1457c 356
4d0c5911 357 m = rt6_check_dev(rt, oif);
77d16f45 358 if (!m && (strict & RT6_LOOKUP_F_IFACE))
554cfb7e 359 return -1;
ebacaaa0
YH
360#ifdef CONFIG_IPV6_ROUTER_PREF
361 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
362#endif
4d0c5911 363 n = rt6_check_neigh(rt);
557e92ef 364 if (!n && (strict & RT6_LOOKUP_F_REACHABLE))
554cfb7e
YH
365 return -1;
366 return m;
367}
368
f11e6659
DM
369static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
370 int *mpri, struct rt6_info *match)
554cfb7e 371{
f11e6659
DM
372 int m;
373
374 if (rt6_check_expired(rt))
375 goto out;
376
377 m = rt6_score_route(rt, oif, strict);
378 if (m < 0)
379 goto out;
380
381 if (m > *mpri) {
382 if (strict & RT6_LOOKUP_F_REACHABLE)
383 rt6_probe(match);
384 *mpri = m;
385 match = rt;
386 } else if (strict & RT6_LOOKUP_F_REACHABLE) {
387 rt6_probe(rt);
388 }
389
390out:
391 return match;
392}
393
394static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
395 struct rt6_info *rr_head,
396 u32 metric, int oif, int strict)
397{
398 struct rt6_info *rt, *match;
554cfb7e 399 int mpri = -1;
1da177e4 400
f11e6659
DM
401 match = NULL;
402 for (rt = rr_head; rt && rt->rt6i_metric == metric;
403 rt = rt->u.dst.rt6_next)
404 match = find_match(rt, oif, strict, &mpri, match);
405 for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric;
406 rt = rt->u.dst.rt6_next)
407 match = find_match(rt, oif, strict, &mpri, match);
1da177e4 408
f11e6659
DM
409 return match;
410}
1da177e4 411
f11e6659
DM
412static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
413{
414 struct rt6_info *match, *rt0;
1da177e4 415
f11e6659
DM
416 RT6_TRACE("%s(fn->leaf=%p, oif=%d)\n",
417 __FUNCTION__, fn->leaf, oif);
554cfb7e 418
f11e6659
DM
419 rt0 = fn->rr_ptr;
420 if (!rt0)
421 fn->rr_ptr = rt0 = fn->leaf;
1da177e4 422
f11e6659 423 match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict);
1da177e4 424
554cfb7e 425 if (!match &&
f11e6659
DM
426 (strict & RT6_LOOKUP_F_REACHABLE)) {
427 struct rt6_info *next = rt0->u.dst.rt6_next;
428
554cfb7e 429 /* no entries matched; do round-robin */
f11e6659
DM
430 if (!next || next->rt6i_metric != rt0->rt6i_metric)
431 next = fn->leaf;
432
433 if (next != rt0)
434 fn->rr_ptr = next;
1da177e4 435 }
1da177e4 436
f11e6659
DM
437 RT6_TRACE("%s() => %p\n",
438 __FUNCTION__, match);
1da177e4 439
554cfb7e 440 return (match ? match : &ip6_null_entry);
1da177e4
LT
441}
442
70ceb4f5
YH
443#ifdef CONFIG_IPV6_ROUTE_INFO
444int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
445 struct in6_addr *gwaddr)
446{
efa2cea0 447 struct net *net = dev->nd_net;
70ceb4f5
YH
448 struct route_info *rinfo = (struct route_info *) opt;
449 struct in6_addr prefix_buf, *prefix;
450 unsigned int pref;
451 u32 lifetime;
452 struct rt6_info *rt;
453
454 if (len < sizeof(struct route_info)) {
455 return -EINVAL;
456 }
457
458 /* Sanity check for prefix_len and length */
459 if (rinfo->length > 3) {
460 return -EINVAL;
461 } else if (rinfo->prefix_len > 128) {
462 return -EINVAL;
463 } else if (rinfo->prefix_len > 64) {
464 if (rinfo->length < 2) {
465 return -EINVAL;
466 }
467 } else if (rinfo->prefix_len > 0) {
468 if (rinfo->length < 1) {
469 return -EINVAL;
470 }
471 }
472
473 pref = rinfo->route_pref;
474 if (pref == ICMPV6_ROUTER_PREF_INVALID)
475 pref = ICMPV6_ROUTER_PREF_MEDIUM;
476
e69a4adc 477 lifetime = ntohl(rinfo->lifetime);
70ceb4f5
YH
478 if (lifetime == 0xffffffff) {
479 /* infinity */
480 } else if (lifetime > 0x7fffffff/HZ) {
481 /* Avoid arithmetic overflow */
482 lifetime = 0x7fffffff/HZ - 1;
483 }
484
485 if (rinfo->length == 3)
486 prefix = (struct in6_addr *)rinfo->prefix;
487 else {
488 /* this function is safe */
489 ipv6_addr_prefix(&prefix_buf,
490 (struct in6_addr *)rinfo->prefix,
491 rinfo->prefix_len);
492 prefix = &prefix_buf;
493 }
494
efa2cea0
DL
495 rt = rt6_get_route_info(net, prefix, rinfo->prefix_len, gwaddr,
496 dev->ifindex);
70ceb4f5
YH
497
498 if (rt && !lifetime) {
e0a1ad73 499 ip6_del_rt(rt);
70ceb4f5
YH
500 rt = NULL;
501 }
502
503 if (!rt && lifetime)
efa2cea0 504 rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
70ceb4f5
YH
505 pref);
506 else if (rt)
507 rt->rt6i_flags = RTF_ROUTEINFO |
508 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
509
510 if (rt) {
511 if (lifetime == 0xffffffff) {
512 rt->rt6i_flags &= ~RTF_EXPIRES;
513 } else {
514 rt->rt6i_expires = jiffies + HZ * lifetime;
515 rt->rt6i_flags |= RTF_EXPIRES;
516 }
517 dst_release(&rt->u.dst);
518 }
519 return 0;
520}
521#endif
522
982f56f3
YH
523#define BACKTRACK(saddr) \
524do { \
525 if (rt == &ip6_null_entry) { \
526 struct fib6_node *pn; \
e0eda7bb 527 while (1) { \
982f56f3
YH
528 if (fn->fn_flags & RTN_TL_ROOT) \
529 goto out; \
530 pn = fn->parent; \
531 if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
8bce65b9 532 fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \
982f56f3
YH
533 else \
534 fn = pn; \
535 if (fn->fn_flags & RTN_RTINFO) \
536 goto restart; \
c71099ac 537 } \
c71099ac 538 } \
982f56f3 539} while(0)
c71099ac
TG
540
541static struct rt6_info *ip6_pol_route_lookup(struct fib6_table *table,
542 struct flowi *fl, int flags)
1da177e4
LT
543{
544 struct fib6_node *fn;
545 struct rt6_info *rt;
546
c71099ac
TG
547 read_lock_bh(&table->tb6_lock);
548 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
549restart:
550 rt = fn->leaf;
77d16f45 551 rt = rt6_device_match(rt, fl->oif, flags);
982f56f3 552 BACKTRACK(&fl->fl6_src);
c71099ac 553out:
03f49f34 554 dst_use(&rt->u.dst, jiffies);
c71099ac 555 read_unlock_bh(&table->tb6_lock);
c71099ac
TG
556 return rt;
557
558}
559
606a2b48
DL
560struct rt6_info *rt6_lookup(struct net *net, struct in6_addr *daddr,
561 struct in6_addr *saddr, int oif, int strict)
c71099ac
TG
562{
563 struct flowi fl = {
564 .oif = oif,
565 .nl_u = {
566 .ip6_u = {
567 .daddr = *daddr,
c71099ac
TG
568 },
569 },
570 };
571 struct dst_entry *dst;
77d16f45 572 int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
c71099ac 573
adaa70bb
TG
574 if (saddr) {
575 memcpy(&fl.fl6_src, saddr, sizeof(*saddr));
576 flags |= RT6_LOOKUP_F_HAS_SADDR;
577 }
578
606a2b48 579 dst = fib6_rule_lookup(net, &fl, flags, ip6_pol_route_lookup);
c71099ac
TG
580 if (dst->error == 0)
581 return (struct rt6_info *) dst;
582
583 dst_release(dst);
584
1da177e4
LT
585 return NULL;
586}
587
7159039a
YH
588EXPORT_SYMBOL(rt6_lookup);
589
c71099ac 590/* ip6_ins_rt is called with FREE table->tb6_lock.
1da177e4
LT
591 It takes new route entry, the addition fails by any reason the
592 route is freed. In any case, if caller does not hold it, it may
593 be destroyed.
594 */
595
86872cb5 596static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
1da177e4
LT
597{
598 int err;
c71099ac 599 struct fib6_table *table;
1da177e4 600
c71099ac
TG
601 table = rt->rt6i_table;
602 write_lock_bh(&table->tb6_lock);
86872cb5 603 err = fib6_add(&table->tb6_root, rt, info);
c71099ac 604 write_unlock_bh(&table->tb6_lock);
1da177e4
LT
605
606 return err;
607}
608
40e22e8f
TG
609int ip6_ins_rt(struct rt6_info *rt)
610{
4d1169c1 611 struct nl_info info = {
5578689a 612 .nl_net = rt->rt6i_dev->nd_net,
4d1169c1 613 };
528c4ceb 614 return __ip6_ins_rt(rt, &info);
40e22e8f
TG
615}
616
95a9a5ba
YH
617static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, struct in6_addr *daddr,
618 struct in6_addr *saddr)
1da177e4 619{
1da177e4
LT
620 struct rt6_info *rt;
621
622 /*
623 * Clone the route.
624 */
625
626 rt = ip6_rt_copy(ort);
627
628 if (rt) {
58c4fb86
YH
629 if (!(rt->rt6i_flags&RTF_GATEWAY)) {
630 if (rt->rt6i_dst.plen != 128 &&
631 ipv6_addr_equal(&rt->rt6i_dst.addr, daddr))
632 rt->rt6i_flags |= RTF_ANYCAST;
1da177e4 633 ipv6_addr_copy(&rt->rt6i_gateway, daddr);
58c4fb86 634 }
1da177e4 635
58c4fb86 636 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
1da177e4
LT
637 rt->rt6i_dst.plen = 128;
638 rt->rt6i_flags |= RTF_CACHE;
639 rt->u.dst.flags |= DST_HOST;
640
641#ifdef CONFIG_IPV6_SUBTREES
642 if (rt->rt6i_src.plen && saddr) {
643 ipv6_addr_copy(&rt->rt6i_src.addr, saddr);
644 rt->rt6i_src.plen = 128;
645 }
646#endif
647
648 rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
649
95a9a5ba 650 }
1da177e4 651
95a9a5ba
YH
652 return rt;
653}
1da177e4 654
299d9939
YH
655static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort, struct in6_addr *daddr)
656{
657 struct rt6_info *rt = ip6_rt_copy(ort);
658 if (rt) {
659 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
660 rt->rt6i_dst.plen = 128;
661 rt->rt6i_flags |= RTF_CACHE;
299d9939
YH
662 rt->u.dst.flags |= DST_HOST;
663 rt->rt6i_nexthop = neigh_clone(ort->rt6i_nexthop);
664 }
665 return rt;
666}
667
4acad72d 668static struct rt6_info *ip6_pol_route(struct fib6_table *table, int oif,
8ce11e6a 669 struct flowi *fl, int flags)
1da177e4
LT
670{
671 struct fib6_node *fn;
519fbd87 672 struct rt6_info *rt, *nrt;
c71099ac 673 int strict = 0;
1da177e4 674 int attempts = 3;
519fbd87 675 int err;
ea659e07 676 int reachable = ipv6_devconf.forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
1da177e4 677
77d16f45 678 strict |= flags & RT6_LOOKUP_F_IFACE;
1da177e4
LT
679
680relookup:
c71099ac 681 read_lock_bh(&table->tb6_lock);
1da177e4 682
8238dd06 683restart_2:
c71099ac 684 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
1da177e4
LT
685
686restart:
4acad72d 687 rt = rt6_select(fn, oif, strict | reachable);
982f56f3 688 BACKTRACK(&fl->fl6_src);
8238dd06
YH
689 if (rt == &ip6_null_entry ||
690 rt->rt6i_flags & RTF_CACHE)
1ddef044 691 goto out;
1da177e4 692
fb9de91e 693 dst_hold(&rt->u.dst);
c71099ac 694 read_unlock_bh(&table->tb6_lock);
fb9de91e 695
519fbd87 696 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
c71099ac 697 nrt = rt6_alloc_cow(rt, &fl->fl6_dst, &fl->fl6_src);
519fbd87
YH
698 else {
699#if CLONE_OFFLINK_ROUTE
c71099ac 700 nrt = rt6_alloc_clone(rt, &fl->fl6_dst);
519fbd87
YH
701#else
702 goto out2;
703#endif
704 }
e40cf353 705
519fbd87
YH
706 dst_release(&rt->u.dst);
707 rt = nrt ? : &ip6_null_entry;
1da177e4 708
519fbd87
YH
709 dst_hold(&rt->u.dst);
710 if (nrt) {
40e22e8f 711 err = ip6_ins_rt(nrt);
519fbd87 712 if (!err)
1da177e4 713 goto out2;
1da177e4 714 }
1da177e4 715
519fbd87
YH
716 if (--attempts <= 0)
717 goto out2;
718
719 /*
c71099ac 720 * Race condition! In the gap, when table->tb6_lock was
519fbd87
YH
721 * released someone could insert this route. Relookup.
722 */
723 dst_release(&rt->u.dst);
724 goto relookup;
725
726out:
8238dd06
YH
727 if (reachable) {
728 reachable = 0;
729 goto restart_2;
730 }
519fbd87 731 dst_hold(&rt->u.dst);
c71099ac 732 read_unlock_bh(&table->tb6_lock);
1da177e4
LT
733out2:
734 rt->u.dst.lastuse = jiffies;
735 rt->u.dst.__use++;
c71099ac
TG
736
737 return rt;
1da177e4
LT
738}
739
4acad72d
PE
740static struct rt6_info *ip6_pol_route_input(struct fib6_table *table,
741 struct flowi *fl, int flags)
742{
743 return ip6_pol_route(table, fl->iif, fl, flags);
744}
745
c71099ac
TG
746void ip6_route_input(struct sk_buff *skb)
747{
0660e03f 748 struct ipv6hdr *iph = ipv6_hdr(skb);
5578689a 749 struct net *net = skb->dev->nd_net;
adaa70bb 750 int flags = RT6_LOOKUP_F_HAS_SADDR;
c71099ac
TG
751 struct flowi fl = {
752 .iif = skb->dev->ifindex,
753 .nl_u = {
754 .ip6_u = {
755 .daddr = iph->daddr,
756 .saddr = iph->saddr,
90bcaf7b 757 .flowlabel = (* (__be32 *) iph)&IPV6_FLOWINFO_MASK,
c71099ac
TG
758 },
759 },
1ab1457c 760 .mark = skb->mark,
c71099ac
TG
761 .proto = iph->nexthdr,
762 };
adaa70bb
TG
763
764 if (rt6_need_strict(&iph->daddr))
765 flags |= RT6_LOOKUP_F_IFACE;
c71099ac 766
5578689a 767 skb->dst = fib6_rule_lookup(net, &fl, flags, ip6_pol_route_input);
c71099ac
TG
768}
769
770static struct rt6_info *ip6_pol_route_output(struct fib6_table *table,
771 struct flowi *fl, int flags)
1da177e4 772{
4acad72d 773 return ip6_pol_route(table, fl->oif, fl, flags);
c71099ac
TG
774}
775
776struct dst_entry * ip6_route_output(struct sock *sk, struct flowi *fl)
777{
778 int flags = 0;
779
780 if (rt6_need_strict(&fl->fl6_dst))
77d16f45 781 flags |= RT6_LOOKUP_F_IFACE;
c71099ac 782
adaa70bb
TG
783 if (!ipv6_addr_any(&fl->fl6_src))
784 flags |= RT6_LOOKUP_F_HAS_SADDR;
785
58f09b78 786 return fib6_rule_lookup(&init_net, fl, flags, ip6_pol_route_output);
1da177e4
LT
787}
788
7159039a 789EXPORT_SYMBOL(ip6_route_output);
1da177e4 790
14e50e57
DM
791int ip6_dst_blackhole(struct sock *sk, struct dst_entry **dstp, struct flowi *fl)
792{
793 struct rt6_info *ort = (struct rt6_info *) *dstp;
794 struct rt6_info *rt = (struct rt6_info *)
795 dst_alloc(&ip6_dst_blackhole_ops);
796 struct dst_entry *new = NULL;
797
798 if (rt) {
799 new = &rt->u.dst;
800
801 atomic_set(&new->__refcnt, 1);
802 new->__use = 1;
352e512c
HX
803 new->input = dst_discard;
804 new->output = dst_discard;
14e50e57
DM
805
806 memcpy(new->metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32));
807 new->dev = ort->u.dst.dev;
808 if (new->dev)
809 dev_hold(new->dev);
810 rt->rt6i_idev = ort->rt6i_idev;
811 if (rt->rt6i_idev)
812 in6_dev_hold(rt->rt6i_idev);
813 rt->rt6i_expires = 0;
814
815 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
816 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
817 rt->rt6i_metric = 0;
818
819 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
820#ifdef CONFIG_IPV6_SUBTREES
821 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
822#endif
823
824 dst_free(new);
825 }
826
827 dst_release(*dstp);
828 *dstp = new;
829 return (new ? 0 : -ENOMEM);
830}
831EXPORT_SYMBOL_GPL(ip6_dst_blackhole);
832
1da177e4
LT
833/*
834 * Destination cache support functions
835 */
836
837static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
838{
839 struct rt6_info *rt;
840
841 rt = (struct rt6_info *) dst;
842
843 if (rt && rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie))
844 return dst;
845
846 return NULL;
847}
848
849static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
850{
851 struct rt6_info *rt = (struct rt6_info *) dst;
852
853 if (rt) {
854 if (rt->rt6i_flags & RTF_CACHE)
e0a1ad73 855 ip6_del_rt(rt);
1da177e4
LT
856 else
857 dst_release(dst);
858 }
859 return NULL;
860}
861
862static void ip6_link_failure(struct sk_buff *skb)
863{
864 struct rt6_info *rt;
865
866 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0, skb->dev);
867
868 rt = (struct rt6_info *) skb->dst;
869 if (rt) {
870 if (rt->rt6i_flags&RTF_CACHE) {
871 dst_set_expires(&rt->u.dst, 0);
872 rt->rt6i_flags |= RTF_EXPIRES;
873 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
874 rt->rt6i_node->fn_sernum = -1;
875 }
876}
877
878static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
879{
880 struct rt6_info *rt6 = (struct rt6_info*)dst;
881
882 if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
883 rt6->rt6i_flags |= RTF_MODIFIED;
884 if (mtu < IPV6_MIN_MTU) {
885 mtu = IPV6_MIN_MTU;
886 dst->metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
887 }
888 dst->metrics[RTAX_MTU-1] = mtu;
8d71740c 889 call_netevent_notifiers(NETEVENT_PMTU_UPDATE, dst);
1da177e4
LT
890 }
891}
892
1da177e4
LT
893static int ipv6_get_mtu(struct net_device *dev);
894
5578689a 895static inline unsigned int ipv6_advmss(struct net *net, unsigned int mtu)
1da177e4
LT
896{
897 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
898
5578689a
DL
899 if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
900 mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
1da177e4
LT
901
902 /*
1ab1457c
YH
903 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
904 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
905 * IPV6_MAXPLEN is also valid and means: "any MSS,
1da177e4
LT
906 * rely only on pmtu discovery"
907 */
908 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
909 mtu = IPV6_MAXPLEN;
910 return mtu;
911}
912
3b00944c
YH
913static struct dst_entry *icmp6_dst_gc_list;
914static DEFINE_SPINLOCK(icmp6_dst_lock);
5d0bbeeb 915
3b00944c 916struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
1da177e4 917 struct neighbour *neigh,
3b00944c 918 struct in6_addr *addr)
1da177e4
LT
919{
920 struct rt6_info *rt;
921 struct inet6_dev *idev = in6_dev_get(dev);
5578689a 922 struct net *net = dev->nd_net;
1da177e4
LT
923
924 if (unlikely(idev == NULL))
925 return NULL;
926
927 rt = ip6_dst_alloc();
928 if (unlikely(rt == NULL)) {
929 in6_dev_put(idev);
930 goto out;
931 }
932
933 dev_hold(dev);
934 if (neigh)
935 neigh_hold(neigh);
936 else
937 neigh = ndisc_get_neigh(dev, addr);
938
939 rt->rt6i_dev = dev;
940 rt->rt6i_idev = idev;
941 rt->rt6i_nexthop = neigh;
942 atomic_set(&rt->u.dst.__refcnt, 1);
943 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = 255;
944 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
5578689a 945 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, dst_mtu(&rt->u.dst));
3b00944c 946 rt->u.dst.output = ip6_output;
1da177e4
LT
947
948#if 0 /* there's no chance to use these for ndisc */
1ab1457c
YH
949 rt->u.dst.flags = ipv6_addr_type(addr) & IPV6_ADDR_UNICAST
950 ? DST_HOST
1da177e4
LT
951 : 0;
952 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
953 rt->rt6i_dst.plen = 128;
954#endif
955
3b00944c
YH
956 spin_lock_bh(&icmp6_dst_lock);
957 rt->u.dst.next = icmp6_dst_gc_list;
958 icmp6_dst_gc_list = &rt->u.dst;
959 spin_unlock_bh(&icmp6_dst_lock);
1da177e4 960
5578689a 961 fib6_force_start_gc(net);
1da177e4
LT
962
963out:
40aa7b90 964 return &rt->u.dst;
1da177e4
LT
965}
966
3b00944c 967int icmp6_dst_gc(int *more)
1da177e4
LT
968{
969 struct dst_entry *dst, *next, **pprev;
970 int freed;
971
972 next = NULL;
1ab1457c 973 freed = 0;
5d0bbeeb 974
3b00944c
YH
975 spin_lock_bh(&icmp6_dst_lock);
976 pprev = &icmp6_dst_gc_list;
5d0bbeeb 977
1da177e4
LT
978 while ((dst = *pprev) != NULL) {
979 if (!atomic_read(&dst->__refcnt)) {
980 *pprev = dst->next;
981 dst_free(dst);
982 freed++;
983 } else {
984 pprev = &dst->next;
985 (*more)++;
986 }
987 }
988
3b00944c 989 spin_unlock_bh(&icmp6_dst_lock);
5d0bbeeb 990
1da177e4
LT
991 return freed;
992}
993
569d3645 994static int ip6_dst_gc(struct dst_ops *ops)
1da177e4
LT
995{
996 static unsigned expire = 30*HZ;
997 static unsigned long last_gc;
998 unsigned long now = jiffies;
999
4990509f
DL
1000 if (time_after(last_gc + init_net.ipv6.sysctl.ip6_rt_gc_min_interval, now) &&
1001 atomic_read(&ip6_dst_ops.entries) <= init_net.ipv6.sysctl.ip6_rt_max_size)
1da177e4
LT
1002 goto out;
1003
1004 expire++;
5b7c931d 1005 fib6_run_gc(expire, &init_net);
1da177e4
LT
1006 last_gc = now;
1007 if (atomic_read(&ip6_dst_ops.entries) < ip6_dst_ops.gc_thresh)
4990509f 1008 expire = init_net.ipv6.sysctl.ip6_rt_gc_timeout>>1;
1da177e4
LT
1009
1010out:
4990509f
DL
1011 expire -= expire>>init_net.ipv6.sysctl.ip6_rt_gc_elasticity;
1012 return (atomic_read(&ip6_dst_ops.entries) > init_net.ipv6.sysctl.ip6_rt_max_size);
1da177e4
LT
1013}
1014
1015/* Clean host part of a prefix. Not necessary in radix tree,
1016 but results in cleaner routing tables.
1017
1018 Remove it only when all the things will work!
1019 */
1020
1021static int ipv6_get_mtu(struct net_device *dev)
1022{
1023 int mtu = IPV6_MIN_MTU;
1024 struct inet6_dev *idev;
1025
1026 idev = in6_dev_get(dev);
1027 if (idev) {
1028 mtu = idev->cnf.mtu6;
1029 in6_dev_put(idev);
1030 }
1031 return mtu;
1032}
1033
1034int ipv6_get_hoplimit(struct net_device *dev)
1035{
1036 int hoplimit = ipv6_devconf.hop_limit;
1037 struct inet6_dev *idev;
1038
1039 idev = in6_dev_get(dev);
1040 if (idev) {
1041 hoplimit = idev->cnf.hop_limit;
1042 in6_dev_put(idev);
1043 }
1044 return hoplimit;
1045}
1046
1047/*
1048 *
1049 */
1050
86872cb5 1051int ip6_route_add(struct fib6_config *cfg)
1da177e4
LT
1052{
1053 int err;
5578689a 1054 struct net *net = cfg->fc_nlinfo.nl_net;
1da177e4
LT
1055 struct rt6_info *rt = NULL;
1056 struct net_device *dev = NULL;
1057 struct inet6_dev *idev = NULL;
c71099ac 1058 struct fib6_table *table;
1da177e4
LT
1059 int addr_type;
1060
86872cb5 1061 if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1da177e4
LT
1062 return -EINVAL;
1063#ifndef CONFIG_IPV6_SUBTREES
86872cb5 1064 if (cfg->fc_src_len)
1da177e4
LT
1065 return -EINVAL;
1066#endif
86872cb5 1067 if (cfg->fc_ifindex) {
1da177e4 1068 err = -ENODEV;
5578689a 1069 dev = dev_get_by_index(net, cfg->fc_ifindex);
1da177e4
LT
1070 if (!dev)
1071 goto out;
1072 idev = in6_dev_get(dev);
1073 if (!idev)
1074 goto out;
1075 }
1076
86872cb5
TG
1077 if (cfg->fc_metric == 0)
1078 cfg->fc_metric = IP6_RT_PRIO_USER;
1da177e4 1079
5578689a 1080 table = fib6_new_table(net, cfg->fc_table);
c71099ac
TG
1081 if (table == NULL) {
1082 err = -ENOBUFS;
1083 goto out;
1084 }
1085
1da177e4
LT
1086 rt = ip6_dst_alloc();
1087
1088 if (rt == NULL) {
1089 err = -ENOMEM;
1090 goto out;
1091 }
1092
1093 rt->u.dst.obsolete = -1;
86872cb5 1094 rt->rt6i_expires = jiffies + clock_t_to_jiffies(cfg->fc_expires);
1da177e4 1095
86872cb5
TG
1096 if (cfg->fc_protocol == RTPROT_UNSPEC)
1097 cfg->fc_protocol = RTPROT_BOOT;
1098 rt->rt6i_protocol = cfg->fc_protocol;
1099
1100 addr_type = ipv6_addr_type(&cfg->fc_dst);
1da177e4
LT
1101
1102 if (addr_type & IPV6_ADDR_MULTICAST)
1103 rt->u.dst.input = ip6_mc_input;
1104 else
1105 rt->u.dst.input = ip6_forward;
1106
1107 rt->u.dst.output = ip6_output;
1108
86872cb5
TG
1109 ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1110 rt->rt6i_dst.plen = cfg->fc_dst_len;
1da177e4
LT
1111 if (rt->rt6i_dst.plen == 128)
1112 rt->u.dst.flags = DST_HOST;
1113
1114#ifdef CONFIG_IPV6_SUBTREES
86872cb5
TG
1115 ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1116 rt->rt6i_src.plen = cfg->fc_src_len;
1da177e4
LT
1117#endif
1118
86872cb5 1119 rt->rt6i_metric = cfg->fc_metric;
1da177e4
LT
1120
1121 /* We cannot add true routes via loopback here,
1122 they would result in kernel looping; promote them to reject routes
1123 */
86872cb5 1124 if ((cfg->fc_flags & RTF_REJECT) ||
1da177e4
LT
1125 (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK))) {
1126 /* hold loopback dev/idev if we haven't done so. */
5578689a 1127 if (dev != net->loopback_dev) {
1da177e4
LT
1128 if (dev) {
1129 dev_put(dev);
1130 in6_dev_put(idev);
1131 }
5578689a 1132 dev = net->loopback_dev;
1da177e4
LT
1133 dev_hold(dev);
1134 idev = in6_dev_get(dev);
1135 if (!idev) {
1136 err = -ENODEV;
1137 goto out;
1138 }
1139 }
1140 rt->u.dst.output = ip6_pkt_discard_out;
1141 rt->u.dst.input = ip6_pkt_discard;
1142 rt->u.dst.error = -ENETUNREACH;
1143 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1144 goto install_route;
1145 }
1146
86872cb5 1147 if (cfg->fc_flags & RTF_GATEWAY) {
1da177e4
LT
1148 struct in6_addr *gw_addr;
1149 int gwa_type;
1150
86872cb5
TG
1151 gw_addr = &cfg->fc_gateway;
1152 ipv6_addr_copy(&rt->rt6i_gateway, gw_addr);
1da177e4
LT
1153 gwa_type = ipv6_addr_type(gw_addr);
1154
1155 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1156 struct rt6_info *grt;
1157
1158 /* IPv6 strictly inhibits using not link-local
1159 addresses as nexthop address.
1160 Otherwise, router will not able to send redirects.
1161 It is very good, but in some (rare!) circumstances
1162 (SIT, PtP, NBMA NOARP links) it is handy to allow
1163 some exceptions. --ANK
1164 */
1165 err = -EINVAL;
1166 if (!(gwa_type&IPV6_ADDR_UNICAST))
1167 goto out;
1168
5578689a 1169 grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
1da177e4
LT
1170
1171 err = -EHOSTUNREACH;
1172 if (grt == NULL)
1173 goto out;
1174 if (dev) {
1175 if (dev != grt->rt6i_dev) {
1176 dst_release(&grt->u.dst);
1177 goto out;
1178 }
1179 } else {
1180 dev = grt->rt6i_dev;
1181 idev = grt->rt6i_idev;
1182 dev_hold(dev);
1183 in6_dev_hold(grt->rt6i_idev);
1184 }
1185 if (!(grt->rt6i_flags&RTF_GATEWAY))
1186 err = 0;
1187 dst_release(&grt->u.dst);
1188
1189 if (err)
1190 goto out;
1191 }
1192 err = -EINVAL;
1193 if (dev == NULL || (dev->flags&IFF_LOOPBACK))
1194 goto out;
1195 }
1196
1197 err = -ENODEV;
1198 if (dev == NULL)
1199 goto out;
1200
86872cb5 1201 if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) {
1da177e4
LT
1202 rt->rt6i_nexthop = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev);
1203 if (IS_ERR(rt->rt6i_nexthop)) {
1204 err = PTR_ERR(rt->rt6i_nexthop);
1205 rt->rt6i_nexthop = NULL;
1206 goto out;
1207 }
1208 }
1209
86872cb5 1210 rt->rt6i_flags = cfg->fc_flags;
1da177e4
LT
1211
1212install_route:
86872cb5
TG
1213 if (cfg->fc_mx) {
1214 struct nlattr *nla;
1215 int remaining;
1216
1217 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
8f4c1f9b 1218 int type = nla_type(nla);
86872cb5
TG
1219
1220 if (type) {
1221 if (type > RTAX_MAX) {
1da177e4
LT
1222 err = -EINVAL;
1223 goto out;
1224 }
86872cb5
TG
1225
1226 rt->u.dst.metrics[type - 1] = nla_get_u32(nla);
1da177e4 1227 }
1da177e4
LT
1228 }
1229 }
1230
1231 if (rt->u.dst.metrics[RTAX_HOPLIMIT-1] == 0)
1232 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
1233 if (!rt->u.dst.metrics[RTAX_MTU-1])
1234 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(dev);
1235 if (!rt->u.dst.metrics[RTAX_ADVMSS-1])
5578689a 1236 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, dst_mtu(&rt->u.dst));
1da177e4
LT
1237 rt->u.dst.dev = dev;
1238 rt->rt6i_idev = idev;
c71099ac 1239 rt->rt6i_table = table;
63152fc0
DL
1240
1241 cfg->fc_nlinfo.nl_net = dev->nd_net;
1242
86872cb5 1243 return __ip6_ins_rt(rt, &cfg->fc_nlinfo);
1da177e4
LT
1244
1245out:
1246 if (dev)
1247 dev_put(dev);
1248 if (idev)
1249 in6_dev_put(idev);
1250 if (rt)
40aa7b90 1251 dst_free(&rt->u.dst);
1da177e4
LT
1252 return err;
1253}
1254
86872cb5 1255static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
1da177e4
LT
1256{
1257 int err;
c71099ac 1258 struct fib6_table *table;
1da177e4 1259
6c813a72
PM
1260 if (rt == &ip6_null_entry)
1261 return -ENOENT;
1262
c71099ac
TG
1263 table = rt->rt6i_table;
1264 write_lock_bh(&table->tb6_lock);
1da177e4 1265
86872cb5 1266 err = fib6_del(rt, info);
1da177e4
LT
1267 dst_release(&rt->u.dst);
1268
c71099ac 1269 write_unlock_bh(&table->tb6_lock);
1da177e4
LT
1270
1271 return err;
1272}
1273
e0a1ad73
TG
1274int ip6_del_rt(struct rt6_info *rt)
1275{
4d1169c1 1276 struct nl_info info = {
5578689a 1277 .nl_net = rt->rt6i_dev->nd_net,
4d1169c1 1278 };
528c4ceb 1279 return __ip6_del_rt(rt, &info);
e0a1ad73
TG
1280}
1281
86872cb5 1282static int ip6_route_del(struct fib6_config *cfg)
1da177e4 1283{
c71099ac 1284 struct fib6_table *table;
1da177e4
LT
1285 struct fib6_node *fn;
1286 struct rt6_info *rt;
1287 int err = -ESRCH;
1288
5578689a 1289 table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
c71099ac
TG
1290 if (table == NULL)
1291 return err;
1292
1293 read_lock_bh(&table->tb6_lock);
1da177e4 1294
c71099ac 1295 fn = fib6_locate(&table->tb6_root,
86872cb5
TG
1296 &cfg->fc_dst, cfg->fc_dst_len,
1297 &cfg->fc_src, cfg->fc_src_len);
1ab1457c 1298
1da177e4 1299 if (fn) {
7cc48263 1300 for (rt = fn->leaf; rt; rt = rt->u.dst.rt6_next) {
86872cb5 1301 if (cfg->fc_ifindex &&
1da177e4 1302 (rt->rt6i_dev == NULL ||
86872cb5 1303 rt->rt6i_dev->ifindex != cfg->fc_ifindex))
1da177e4 1304 continue;
86872cb5
TG
1305 if (cfg->fc_flags & RTF_GATEWAY &&
1306 !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
1da177e4 1307 continue;
86872cb5 1308 if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
1da177e4
LT
1309 continue;
1310 dst_hold(&rt->u.dst);
c71099ac 1311 read_unlock_bh(&table->tb6_lock);
1da177e4 1312
86872cb5 1313 return __ip6_del_rt(rt, &cfg->fc_nlinfo);
1da177e4
LT
1314 }
1315 }
c71099ac 1316 read_unlock_bh(&table->tb6_lock);
1da177e4
LT
1317
1318 return err;
1319}
1320
1321/*
1322 * Handle redirects
1323 */
a6279458
YH
1324struct ip6rd_flowi {
1325 struct flowi fl;
1326 struct in6_addr gateway;
1327};
1328
1329static struct rt6_info *__ip6_route_redirect(struct fib6_table *table,
1330 struct flowi *fl,
1331 int flags)
1da177e4 1332{
a6279458
YH
1333 struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl;
1334 struct rt6_info *rt;
e843b9e1 1335 struct fib6_node *fn;
c71099ac 1336
1da177e4 1337 /*
e843b9e1
YH
1338 * Get the "current" route for this destination and
1339 * check if the redirect has come from approriate router.
1340 *
1341 * RFC 2461 specifies that redirects should only be
1342 * accepted if they come from the nexthop to the target.
1343 * Due to the way the routes are chosen, this notion
1344 * is a bit fuzzy and one might need to check all possible
1345 * routes.
1da177e4 1346 */
1da177e4 1347
c71099ac 1348 read_lock_bh(&table->tb6_lock);
a6279458 1349 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
e843b9e1 1350restart:
7cc48263 1351 for (rt = fn->leaf; rt; rt = rt->u.dst.rt6_next) {
e843b9e1
YH
1352 /*
1353 * Current route is on-link; redirect is always invalid.
1354 *
1355 * Seems, previous statement is not true. It could
1356 * be node, which looks for us as on-link (f.e. proxy ndisc)
1357 * But then router serving it might decide, that we should
1358 * know truth 8)8) --ANK (980726).
1359 */
1360 if (rt6_check_expired(rt))
1361 continue;
1362 if (!(rt->rt6i_flags & RTF_GATEWAY))
1363 continue;
a6279458 1364 if (fl->oif != rt->rt6i_dev->ifindex)
e843b9e1 1365 continue;
a6279458 1366 if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
e843b9e1
YH
1367 continue;
1368 break;
1369 }
a6279458 1370
cb15d9c2 1371 if (!rt)
a6279458 1372 rt = &ip6_null_entry;
cb15d9c2
YH
1373 BACKTRACK(&fl->fl6_src);
1374out:
a6279458
YH
1375 dst_hold(&rt->u.dst);
1376
c71099ac 1377 read_unlock_bh(&table->tb6_lock);
e843b9e1 1378
a6279458
YH
1379 return rt;
1380};
1381
1382static struct rt6_info *ip6_route_redirect(struct in6_addr *dest,
1383 struct in6_addr *src,
1384 struct in6_addr *gateway,
1385 struct net_device *dev)
1386{
adaa70bb 1387 int flags = RT6_LOOKUP_F_HAS_SADDR;
5578689a 1388 struct net *net = dev->nd_net;
a6279458
YH
1389 struct ip6rd_flowi rdfl = {
1390 .fl = {
1391 .oif = dev->ifindex,
1392 .nl_u = {
1393 .ip6_u = {
1394 .daddr = *dest,
1395 .saddr = *src,
1396 },
1397 },
1398 },
1399 .gateway = *gateway,
1400 };
adaa70bb
TG
1401
1402 if (rt6_need_strict(dest))
1403 flags |= RT6_LOOKUP_F_IFACE;
a6279458 1404
5578689a 1405 return (struct rt6_info *)fib6_rule_lookup(net, (struct flowi *)&rdfl,
58f09b78 1406 flags, __ip6_route_redirect);
a6279458
YH
1407}
1408
1409void rt6_redirect(struct in6_addr *dest, struct in6_addr *src,
1410 struct in6_addr *saddr,
1411 struct neighbour *neigh, u8 *lladdr, int on_link)
1412{
1413 struct rt6_info *rt, *nrt = NULL;
1414 struct netevent_redirect netevent;
1415
1416 rt = ip6_route_redirect(dest, src, saddr, neigh->dev);
1417
1418 if (rt == &ip6_null_entry) {
1da177e4
LT
1419 if (net_ratelimit())
1420 printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
1421 "for redirect target\n");
a6279458 1422 goto out;
1da177e4
LT
1423 }
1424
1da177e4
LT
1425 /*
1426 * We have finally decided to accept it.
1427 */
1428
1ab1457c 1429 neigh_update(neigh, lladdr, NUD_STALE,
1da177e4
LT
1430 NEIGH_UPDATE_F_WEAK_OVERRIDE|
1431 NEIGH_UPDATE_F_OVERRIDE|
1432 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1433 NEIGH_UPDATE_F_ISROUTER))
1434 );
1435
1436 /*
1437 * Redirect received -> path was valid.
1438 * Look, redirects are sent only in response to data packets,
1439 * so that this nexthop apparently is reachable. --ANK
1440 */
1441 dst_confirm(&rt->u.dst);
1442
1443 /* Duplicate redirect: silently ignore. */
1444 if (neigh == rt->u.dst.neighbour)
1445 goto out;
1446
1447 nrt = ip6_rt_copy(rt);
1448 if (nrt == NULL)
1449 goto out;
1450
1451 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1452 if (on_link)
1453 nrt->rt6i_flags &= ~RTF_GATEWAY;
1454
1455 ipv6_addr_copy(&nrt->rt6i_dst.addr, dest);
1456 nrt->rt6i_dst.plen = 128;
1457 nrt->u.dst.flags |= DST_HOST;
1458
1459 ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key);
1460 nrt->rt6i_nexthop = neigh_clone(neigh);
1461 /* Reset pmtu, it may be better */
1462 nrt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(neigh->dev);
5578689a
DL
1463 nrt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(neigh->dev->nd_net,
1464 dst_mtu(&nrt->u.dst));
1da177e4 1465
40e22e8f 1466 if (ip6_ins_rt(nrt))
1da177e4
LT
1467 goto out;
1468
8d71740c
TT
1469 netevent.old = &rt->u.dst;
1470 netevent.new = &nrt->u.dst;
1471 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1472
1da177e4 1473 if (rt->rt6i_flags&RTF_CACHE) {
e0a1ad73 1474 ip6_del_rt(rt);
1da177e4
LT
1475 return;
1476 }
1477
1478out:
1ab1457c 1479 dst_release(&rt->u.dst);
1da177e4
LT
1480 return;
1481}
1482
1483/*
1484 * Handle ICMP "packet too big" messages
1485 * i.e. Path MTU discovery
1486 */
1487
1488void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr,
1489 struct net_device *dev, u32 pmtu)
1490{
1491 struct rt6_info *rt, *nrt;
5578689a 1492 struct net *net = dev->nd_net;
1da177e4
LT
1493 int allfrag = 0;
1494
5578689a 1495 rt = rt6_lookup(net, daddr, saddr, dev->ifindex, 0);
1da177e4
LT
1496 if (rt == NULL)
1497 return;
1498
1499 if (pmtu >= dst_mtu(&rt->u.dst))
1500 goto out;
1501
1502 if (pmtu < IPV6_MIN_MTU) {
1503 /*
1ab1457c 1504 * According to RFC2460, PMTU is set to the IPv6 Minimum Link
1da177e4
LT
1505 * MTU (1280) and a fragment header should always be included
1506 * after a node receiving Too Big message reporting PMTU is
1507 * less than the IPv6 Minimum Link MTU.
1508 */
1509 pmtu = IPV6_MIN_MTU;
1510 allfrag = 1;
1511 }
1512
1513 /* New mtu received -> path was valid.
1514 They are sent only in response to data packets,
1515 so that this nexthop apparently is reachable. --ANK
1516 */
1517 dst_confirm(&rt->u.dst);
1518
1519 /* Host route. If it is static, it would be better
1520 not to override it, but add new one, so that
1521 when cache entry will expire old pmtu
1522 would return automatically.
1523 */
1524 if (rt->rt6i_flags & RTF_CACHE) {
1525 rt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1526 if (allfrag)
1527 rt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
5578689a 1528 dst_set_expires(&rt->u.dst, net->ipv6.sysctl.ip6_rt_mtu_expires);
1da177e4
LT
1529 rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
1530 goto out;
1531 }
1532
1533 /* Network route.
1534 Two cases are possible:
1535 1. It is connected route. Action: COW
1536 2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1537 */
d5315b50 1538 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
a1e78363 1539 nrt = rt6_alloc_cow(rt, daddr, saddr);
d5315b50
YH
1540 else
1541 nrt = rt6_alloc_clone(rt, daddr);
a1e78363 1542
d5315b50 1543 if (nrt) {
a1e78363
YH
1544 nrt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1545 if (allfrag)
1546 nrt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1547
1548 /* According to RFC 1981, detecting PMTU increase shouldn't be
1549 * happened within 5 mins, the recommended timer is 10 mins.
1550 * Here this route expiration time is set to ip6_rt_mtu_expires
1551 * which is 10 mins. After 10 mins the decreased pmtu is expired
1552 * and detecting PMTU increase will be automatically happened.
1553 */
5578689a 1554 dst_set_expires(&nrt->u.dst, net->ipv6.sysctl.ip6_rt_mtu_expires);
a1e78363
YH
1555 nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
1556
40e22e8f 1557 ip6_ins_rt(nrt);
1da177e4 1558 }
1da177e4
LT
1559out:
1560 dst_release(&rt->u.dst);
1561}
1562
1563/*
1564 * Misc support functions
1565 */
1566
1567static struct rt6_info * ip6_rt_copy(struct rt6_info *ort)
1568{
1569 struct rt6_info *rt = ip6_dst_alloc();
1570
1571 if (rt) {
1572 rt->u.dst.input = ort->u.dst.input;
1573 rt->u.dst.output = ort->u.dst.output;
1574
1575 memcpy(rt->u.dst.metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32));
22e1e4d8 1576 rt->u.dst.error = ort->u.dst.error;
1da177e4
LT
1577 rt->u.dst.dev = ort->u.dst.dev;
1578 if (rt->u.dst.dev)
1579 dev_hold(rt->u.dst.dev);
1580 rt->rt6i_idev = ort->rt6i_idev;
1581 if (rt->rt6i_idev)
1582 in6_dev_hold(rt->rt6i_idev);
1583 rt->u.dst.lastuse = jiffies;
1584 rt->rt6i_expires = 0;
1585
1586 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
1587 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
1588 rt->rt6i_metric = 0;
1589
1590 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1591#ifdef CONFIG_IPV6_SUBTREES
1592 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1593#endif
c71099ac 1594 rt->rt6i_table = ort->rt6i_table;
1da177e4
LT
1595 }
1596 return rt;
1597}
1598
70ceb4f5 1599#ifdef CONFIG_IPV6_ROUTE_INFO
efa2cea0
DL
1600static struct rt6_info *rt6_get_route_info(struct net *net,
1601 struct in6_addr *prefix, int prefixlen,
70ceb4f5
YH
1602 struct in6_addr *gwaddr, int ifindex)
1603{
1604 struct fib6_node *fn;
1605 struct rt6_info *rt = NULL;
c71099ac
TG
1606 struct fib6_table *table;
1607
efa2cea0 1608 table = fib6_get_table(net, RT6_TABLE_INFO);
c71099ac
TG
1609 if (table == NULL)
1610 return NULL;
70ceb4f5 1611
c71099ac
TG
1612 write_lock_bh(&table->tb6_lock);
1613 fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
70ceb4f5
YH
1614 if (!fn)
1615 goto out;
1616
7cc48263 1617 for (rt = fn->leaf; rt; rt = rt->u.dst.rt6_next) {
70ceb4f5
YH
1618 if (rt->rt6i_dev->ifindex != ifindex)
1619 continue;
1620 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1621 continue;
1622 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1623 continue;
1624 dst_hold(&rt->u.dst);
1625 break;
1626 }
1627out:
c71099ac 1628 write_unlock_bh(&table->tb6_lock);
70ceb4f5
YH
1629 return rt;
1630}
1631
efa2cea0
DL
1632static struct rt6_info *rt6_add_route_info(struct net *net,
1633 struct in6_addr *prefix, int prefixlen,
70ceb4f5
YH
1634 struct in6_addr *gwaddr, int ifindex,
1635 unsigned pref)
1636{
86872cb5
TG
1637 struct fib6_config cfg = {
1638 .fc_table = RT6_TABLE_INFO,
238fc7ea 1639 .fc_metric = IP6_RT_PRIO_USER,
86872cb5
TG
1640 .fc_ifindex = ifindex,
1641 .fc_dst_len = prefixlen,
1642 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
1643 RTF_UP | RTF_PREF(pref),
efa2cea0
DL
1644 .fc_nlinfo.pid = 0,
1645 .fc_nlinfo.nlh = NULL,
1646 .fc_nlinfo.nl_net = net,
86872cb5
TG
1647 };
1648
1649 ipv6_addr_copy(&cfg.fc_dst, prefix);
1650 ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
70ceb4f5 1651
e317da96
YH
1652 /* We should treat it as a default route if prefix length is 0. */
1653 if (!prefixlen)
86872cb5 1654 cfg.fc_flags |= RTF_DEFAULT;
70ceb4f5 1655
86872cb5 1656 ip6_route_add(&cfg);
70ceb4f5 1657
efa2cea0 1658 return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex);
70ceb4f5
YH
1659}
1660#endif
1661
1da177e4 1662struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *dev)
1ab1457c 1663{
1da177e4 1664 struct rt6_info *rt;
c71099ac 1665 struct fib6_table *table;
1da177e4 1666
5578689a 1667 table = fib6_get_table(dev->nd_net, RT6_TABLE_DFLT);
c71099ac
TG
1668 if (table == NULL)
1669 return NULL;
1da177e4 1670
c71099ac 1671 write_lock_bh(&table->tb6_lock);
7cc48263 1672 for (rt = table->tb6_root.leaf; rt; rt=rt->u.dst.rt6_next) {
1da177e4 1673 if (dev == rt->rt6i_dev &&
045927ff 1674 ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
1da177e4
LT
1675 ipv6_addr_equal(&rt->rt6i_gateway, addr))
1676 break;
1677 }
1678 if (rt)
1679 dst_hold(&rt->u.dst);
c71099ac 1680 write_unlock_bh(&table->tb6_lock);
1da177e4
LT
1681 return rt;
1682}
1683
c7dc89c0
FT
1684EXPORT_SYMBOL(rt6_get_dflt_router);
1685
1da177e4 1686struct rt6_info *rt6_add_dflt_router(struct in6_addr *gwaddr,
ebacaaa0
YH
1687 struct net_device *dev,
1688 unsigned int pref)
1da177e4 1689{
86872cb5
TG
1690 struct fib6_config cfg = {
1691 .fc_table = RT6_TABLE_DFLT,
238fc7ea 1692 .fc_metric = IP6_RT_PRIO_USER,
86872cb5
TG
1693 .fc_ifindex = dev->ifindex,
1694 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
1695 RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
5578689a
DL
1696 .fc_nlinfo.pid = 0,
1697 .fc_nlinfo.nlh = NULL,
1698 .fc_nlinfo.nl_net = dev->nd_net,
86872cb5 1699 };
1da177e4 1700
86872cb5 1701 ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
1da177e4 1702
86872cb5 1703 ip6_route_add(&cfg);
1da177e4 1704
1da177e4
LT
1705 return rt6_get_dflt_router(gwaddr, dev);
1706}
1707
7b4da532 1708void rt6_purge_dflt_routers(struct net *net)
1da177e4
LT
1709{
1710 struct rt6_info *rt;
c71099ac
TG
1711 struct fib6_table *table;
1712
1713 /* NOTE: Keep consistent with rt6_get_dflt_router */
7b4da532 1714 table = fib6_get_table(net, RT6_TABLE_DFLT);
c71099ac
TG
1715 if (table == NULL)
1716 return;
1da177e4
LT
1717
1718restart:
c71099ac 1719 read_lock_bh(&table->tb6_lock);
7cc48263 1720 for (rt = table->tb6_root.leaf; rt; rt = rt->u.dst.rt6_next) {
1da177e4
LT
1721 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
1722 dst_hold(&rt->u.dst);
c71099ac 1723 read_unlock_bh(&table->tb6_lock);
e0a1ad73 1724 ip6_del_rt(rt);
1da177e4
LT
1725 goto restart;
1726 }
1727 }
c71099ac 1728 read_unlock_bh(&table->tb6_lock);
1da177e4
LT
1729}
1730
5578689a
DL
1731static void rtmsg_to_fib6_config(struct net *net,
1732 struct in6_rtmsg *rtmsg,
86872cb5
TG
1733 struct fib6_config *cfg)
1734{
1735 memset(cfg, 0, sizeof(*cfg));
1736
1737 cfg->fc_table = RT6_TABLE_MAIN;
1738 cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
1739 cfg->fc_metric = rtmsg->rtmsg_metric;
1740 cfg->fc_expires = rtmsg->rtmsg_info;
1741 cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
1742 cfg->fc_src_len = rtmsg->rtmsg_src_len;
1743 cfg->fc_flags = rtmsg->rtmsg_flags;
1744
5578689a 1745 cfg->fc_nlinfo.nl_net = net;
f1243c2d 1746
86872cb5
TG
1747 ipv6_addr_copy(&cfg->fc_dst, &rtmsg->rtmsg_dst);
1748 ipv6_addr_copy(&cfg->fc_src, &rtmsg->rtmsg_src);
1749 ipv6_addr_copy(&cfg->fc_gateway, &rtmsg->rtmsg_gateway);
1750}
1751
5578689a 1752int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
1da177e4 1753{
86872cb5 1754 struct fib6_config cfg;
1da177e4
LT
1755 struct in6_rtmsg rtmsg;
1756 int err;
1757
1758 switch(cmd) {
1759 case SIOCADDRT: /* Add a route */
1760 case SIOCDELRT: /* Delete a route */
1761 if (!capable(CAP_NET_ADMIN))
1762 return -EPERM;
1763 err = copy_from_user(&rtmsg, arg,
1764 sizeof(struct in6_rtmsg));
1765 if (err)
1766 return -EFAULT;
86872cb5 1767
5578689a 1768 rtmsg_to_fib6_config(net, &rtmsg, &cfg);
86872cb5 1769
1da177e4
LT
1770 rtnl_lock();
1771 switch (cmd) {
1772 case SIOCADDRT:
86872cb5 1773 err = ip6_route_add(&cfg);
1da177e4
LT
1774 break;
1775 case SIOCDELRT:
86872cb5 1776 err = ip6_route_del(&cfg);
1da177e4
LT
1777 break;
1778 default:
1779 err = -EINVAL;
1780 }
1781 rtnl_unlock();
1782
1783 return err;
3ff50b79 1784 }
1da177e4
LT
1785
1786 return -EINVAL;
1787}
1788
1789/*
1790 * Drop the packet on the floor
1791 */
1792
50eb431d 1793static int ip6_pkt_drop(struct sk_buff *skb, int code, int ipstats_mib_noroutes)
1da177e4 1794{
612f09e8
YH
1795 int type;
1796 switch (ipstats_mib_noroutes) {
1797 case IPSTATS_MIB_INNOROUTES:
0660e03f 1798 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
612f09e8
YH
1799 if (type == IPV6_ADDR_ANY || type == IPV6_ADDR_RESERVED) {
1800 IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_INADDRERRORS);
1801 break;
1802 }
1803 /* FALLTHROUGH */
1804 case IPSTATS_MIB_OUTNOROUTES:
1805 IP6_INC_STATS(ip6_dst_idev(skb->dst), ipstats_mib_noroutes);
1806 break;
1807 }
9ce8ade0 1808 icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0, skb->dev);
1da177e4
LT
1809 kfree_skb(skb);
1810 return 0;
1811}
1812
9ce8ade0
TG
1813static int ip6_pkt_discard(struct sk_buff *skb)
1814{
612f09e8 1815 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
9ce8ade0
TG
1816}
1817
20380731 1818static int ip6_pkt_discard_out(struct sk_buff *skb)
1da177e4
LT
1819{
1820 skb->dev = skb->dst->dev;
612f09e8 1821 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
1da177e4
LT
1822}
1823
6723ab54
DM
1824#ifdef CONFIG_IPV6_MULTIPLE_TABLES
1825
9ce8ade0
TG
1826static int ip6_pkt_prohibit(struct sk_buff *skb)
1827{
612f09e8 1828 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
9ce8ade0
TG
1829}
1830
1831static int ip6_pkt_prohibit_out(struct sk_buff *skb)
1832{
1833 skb->dev = skb->dst->dev;
612f09e8 1834 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
9ce8ade0
TG
1835}
1836
6723ab54
DM
1837#endif
1838
1da177e4
LT
1839/*
1840 * Allocate a dst for local (unicast / anycast) address.
1841 */
1842
1843struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
1844 const struct in6_addr *addr,
1845 int anycast)
1846{
5578689a 1847 struct net *net = idev->dev->nd_net;
1da177e4
LT
1848 struct rt6_info *rt = ip6_dst_alloc();
1849
1850 if (rt == NULL)
1851 return ERR_PTR(-ENOMEM);
1852
5578689a 1853 dev_hold(net->loopback_dev);
1da177e4
LT
1854 in6_dev_hold(idev);
1855
1856 rt->u.dst.flags = DST_HOST;
1857 rt->u.dst.input = ip6_input;
1858 rt->u.dst.output = ip6_output;
5578689a 1859 rt->rt6i_dev = net->loopback_dev;
1da177e4
LT
1860 rt->rt6i_idev = idev;
1861 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
5578689a 1862 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, dst_mtu(&rt->u.dst));
1da177e4
LT
1863 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
1864 rt->u.dst.obsolete = -1;
1865
1866 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
58c4fb86
YH
1867 if (anycast)
1868 rt->rt6i_flags |= RTF_ANYCAST;
1869 else
1da177e4
LT
1870 rt->rt6i_flags |= RTF_LOCAL;
1871 rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
1872 if (rt->rt6i_nexthop == NULL) {
40aa7b90 1873 dst_free(&rt->u.dst);
1da177e4
LT
1874 return ERR_PTR(-ENOMEM);
1875 }
1876
1877 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
1878 rt->rt6i_dst.plen = 128;
5578689a 1879 rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL);
1da177e4
LT
1880
1881 atomic_set(&rt->u.dst.__refcnt, 1);
1882
1883 return rt;
1884}
1885
1886static int fib6_ifdown(struct rt6_info *rt, void *arg)
1887{
1888 if (((void*)rt->rt6i_dev == arg || arg == NULL) &&
1889 rt != &ip6_null_entry) {
1890 RT6_TRACE("deleted by ifdown %p\n", rt);
1891 return -1;
1892 }
1893 return 0;
1894}
1895
f3db4851 1896void rt6_ifdown(struct net *net, struct net_device *dev)
1da177e4 1897{
f3db4851 1898 fib6_clean_all(net, fib6_ifdown, 0, dev);
1da177e4
LT
1899}
1900
1901struct rt6_mtu_change_arg
1902{
1903 struct net_device *dev;
1904 unsigned mtu;
1905};
1906
1907static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
1908{
1909 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
1910 struct inet6_dev *idev;
5578689a 1911 struct net *net = arg->dev->nd_net;
1da177e4
LT
1912
1913 /* In IPv6 pmtu discovery is not optional,
1914 so that RTAX_MTU lock cannot disable it.
1915 We still use this lock to block changes
1916 caused by addrconf/ndisc.
1917 */
1918
1919 idev = __in6_dev_get(arg->dev);
1920 if (idev == NULL)
1921 return 0;
1922
1923 /* For administrative MTU increase, there is no way to discover
1924 IPv6 PMTU increase, so PMTU increase should be updated here.
1925 Since RFC 1981 doesn't include administrative MTU increase
1926 update PMTU increase is a MUST. (i.e. jumbo frame)
1927 */
1928 /*
1929 If new MTU is less than route PMTU, this new MTU will be the
1930 lowest MTU in the path, update the route PMTU to reflect PMTU
1931 decreases; if new MTU is greater than route PMTU, and the
1932 old MTU is the lowest MTU in the path, update the route PMTU
1933 to reflect the increase. In this case if the other nodes' MTU
1934 also have the lowest MTU, TOO BIG MESSAGE will be lead to
1935 PMTU discouvery.
1936 */
1937 if (rt->rt6i_dev == arg->dev &&
1938 !dst_metric_locked(&rt->u.dst, RTAX_MTU) &&
23717795 1939 (dst_mtu(&rt->u.dst) >= arg->mtu ||
1ab1457c 1940 (dst_mtu(&rt->u.dst) < arg->mtu &&
566cfd8f 1941 dst_mtu(&rt->u.dst) == idev->cnf.mtu6))) {
1da177e4 1942 rt->u.dst.metrics[RTAX_MTU-1] = arg->mtu;
5578689a 1943 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, arg->mtu);
566cfd8f 1944 }
1da177e4
LT
1945 return 0;
1946}
1947
1948void rt6_mtu_change(struct net_device *dev, unsigned mtu)
1949{
c71099ac
TG
1950 struct rt6_mtu_change_arg arg = {
1951 .dev = dev,
1952 .mtu = mtu,
1953 };
1da177e4 1954
f3db4851 1955 fib6_clean_all(dev->nd_net, rt6_mtu_change_route, 0, &arg);
1da177e4
LT
1956}
1957
ef7c79ed 1958static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
5176f91e 1959 [RTA_GATEWAY] = { .len = sizeof(struct in6_addr) },
86872cb5 1960 [RTA_OIF] = { .type = NLA_U32 },
ab364a6f 1961 [RTA_IIF] = { .type = NLA_U32 },
86872cb5
TG
1962 [RTA_PRIORITY] = { .type = NLA_U32 },
1963 [RTA_METRICS] = { .type = NLA_NESTED },
1964};
1965
1966static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
1967 struct fib6_config *cfg)
1da177e4 1968{
86872cb5
TG
1969 struct rtmsg *rtm;
1970 struct nlattr *tb[RTA_MAX+1];
1971 int err;
1da177e4 1972
86872cb5
TG
1973 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
1974 if (err < 0)
1975 goto errout;
1da177e4 1976
86872cb5
TG
1977 err = -EINVAL;
1978 rtm = nlmsg_data(nlh);
1979 memset(cfg, 0, sizeof(*cfg));
1980
1981 cfg->fc_table = rtm->rtm_table;
1982 cfg->fc_dst_len = rtm->rtm_dst_len;
1983 cfg->fc_src_len = rtm->rtm_src_len;
1984 cfg->fc_flags = RTF_UP;
1985 cfg->fc_protocol = rtm->rtm_protocol;
1986
1987 if (rtm->rtm_type == RTN_UNREACHABLE)
1988 cfg->fc_flags |= RTF_REJECT;
1989
1990 cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
1991 cfg->fc_nlinfo.nlh = nlh;
2216b483 1992 cfg->fc_nlinfo.nl_net = skb->sk->sk_net;
86872cb5
TG
1993
1994 if (tb[RTA_GATEWAY]) {
1995 nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
1996 cfg->fc_flags |= RTF_GATEWAY;
1da177e4 1997 }
86872cb5
TG
1998
1999 if (tb[RTA_DST]) {
2000 int plen = (rtm->rtm_dst_len + 7) >> 3;
2001
2002 if (nla_len(tb[RTA_DST]) < plen)
2003 goto errout;
2004
2005 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
1da177e4 2006 }
86872cb5
TG
2007
2008 if (tb[RTA_SRC]) {
2009 int plen = (rtm->rtm_src_len + 7) >> 3;
2010
2011 if (nla_len(tb[RTA_SRC]) < plen)
2012 goto errout;
2013
2014 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
1da177e4 2015 }
86872cb5
TG
2016
2017 if (tb[RTA_OIF])
2018 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2019
2020 if (tb[RTA_PRIORITY])
2021 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2022
2023 if (tb[RTA_METRICS]) {
2024 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2025 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
1da177e4 2026 }
86872cb5
TG
2027
2028 if (tb[RTA_TABLE])
2029 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2030
2031 err = 0;
2032errout:
2033 return err;
1da177e4
LT
2034}
2035
c127ea2c 2036static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1da177e4 2037{
86872cb5
TG
2038 struct fib6_config cfg;
2039 int err;
1da177e4 2040
86872cb5
TG
2041 err = rtm_to_fib6_config(skb, nlh, &cfg);
2042 if (err < 0)
2043 return err;
2044
2045 return ip6_route_del(&cfg);
1da177e4
LT
2046}
2047
c127ea2c 2048static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1da177e4 2049{
86872cb5
TG
2050 struct fib6_config cfg;
2051 int err;
1da177e4 2052
86872cb5
TG
2053 err = rtm_to_fib6_config(skb, nlh, &cfg);
2054 if (err < 0)
2055 return err;
2056
2057 return ip6_route_add(&cfg);
1da177e4
LT
2058}
2059
339bf98f
TG
2060static inline size_t rt6_nlmsg_size(void)
2061{
2062 return NLMSG_ALIGN(sizeof(struct rtmsg))
2063 + nla_total_size(16) /* RTA_SRC */
2064 + nla_total_size(16) /* RTA_DST */
2065 + nla_total_size(16) /* RTA_GATEWAY */
2066 + nla_total_size(16) /* RTA_PREFSRC */
2067 + nla_total_size(4) /* RTA_TABLE */
2068 + nla_total_size(4) /* RTA_IIF */
2069 + nla_total_size(4) /* RTA_OIF */
2070 + nla_total_size(4) /* RTA_PRIORITY */
6a2b9ce0 2071 + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
339bf98f
TG
2072 + nla_total_size(sizeof(struct rta_cacheinfo));
2073}
2074
1da177e4 2075static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt,
0d51aa80
JHS
2076 struct in6_addr *dst, struct in6_addr *src,
2077 int iif, int type, u32 pid, u32 seq,
2078 int prefix, unsigned int flags)
1da177e4
LT
2079{
2080 struct rtmsg *rtm;
2d7202bf 2081 struct nlmsghdr *nlh;
e3703b3d 2082 long expires;
9e762a4a 2083 u32 table;
1da177e4
LT
2084
2085 if (prefix) { /* user wants prefix routes only */
2086 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
2087 /* success since this is not a prefix route */
2088 return 1;
2089 }
2090 }
2091
2d7202bf
TG
2092 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*rtm), flags);
2093 if (nlh == NULL)
26932566 2094 return -EMSGSIZE;
2d7202bf
TG
2095
2096 rtm = nlmsg_data(nlh);
1da177e4
LT
2097 rtm->rtm_family = AF_INET6;
2098 rtm->rtm_dst_len = rt->rt6i_dst.plen;
2099 rtm->rtm_src_len = rt->rt6i_src.plen;
2100 rtm->rtm_tos = 0;
c71099ac 2101 if (rt->rt6i_table)
9e762a4a 2102 table = rt->rt6i_table->tb6_id;
c71099ac 2103 else
9e762a4a
PM
2104 table = RT6_TABLE_UNSPEC;
2105 rtm->rtm_table = table;
2d7202bf 2106 NLA_PUT_U32(skb, RTA_TABLE, table);
1da177e4
LT
2107 if (rt->rt6i_flags&RTF_REJECT)
2108 rtm->rtm_type = RTN_UNREACHABLE;
2109 else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK))
2110 rtm->rtm_type = RTN_LOCAL;
2111 else
2112 rtm->rtm_type = RTN_UNICAST;
2113 rtm->rtm_flags = 0;
2114 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2115 rtm->rtm_protocol = rt->rt6i_protocol;
2116 if (rt->rt6i_flags&RTF_DYNAMIC)
2117 rtm->rtm_protocol = RTPROT_REDIRECT;
2118 else if (rt->rt6i_flags & RTF_ADDRCONF)
2119 rtm->rtm_protocol = RTPROT_KERNEL;
2120 else if (rt->rt6i_flags&RTF_DEFAULT)
2121 rtm->rtm_protocol = RTPROT_RA;
2122
2123 if (rt->rt6i_flags&RTF_CACHE)
2124 rtm->rtm_flags |= RTM_F_CLONED;
2125
2126 if (dst) {
2d7202bf 2127 NLA_PUT(skb, RTA_DST, 16, dst);
1ab1457c 2128 rtm->rtm_dst_len = 128;
1da177e4 2129 } else if (rtm->rtm_dst_len)
2d7202bf 2130 NLA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr);
1da177e4
LT
2131#ifdef CONFIG_IPV6_SUBTREES
2132 if (src) {
2d7202bf 2133 NLA_PUT(skb, RTA_SRC, 16, src);
1ab1457c 2134 rtm->rtm_src_len = 128;
1da177e4 2135 } else if (rtm->rtm_src_len)
2d7202bf 2136 NLA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr);
1da177e4
LT
2137#endif
2138 if (iif)
2d7202bf 2139 NLA_PUT_U32(skb, RTA_IIF, iif);
1da177e4
LT
2140 else if (dst) {
2141 struct in6_addr saddr_buf;
5e5f3f0f
YH
2142 if (ipv6_dev_get_saddr(ip6_dst_idev(&rt->u.dst)->dev,
2143 dst, &saddr_buf) == 0)
2d7202bf 2144 NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
1da177e4 2145 }
2d7202bf 2146
1da177e4 2147 if (rtnetlink_put_metrics(skb, rt->u.dst.metrics) < 0)
2d7202bf
TG
2148 goto nla_put_failure;
2149
1da177e4 2150 if (rt->u.dst.neighbour)
2d7202bf
TG
2151 NLA_PUT(skb, RTA_GATEWAY, 16, &rt->u.dst.neighbour->primary_key);
2152
1da177e4 2153 if (rt->u.dst.dev)
2d7202bf
TG
2154 NLA_PUT_U32(skb, RTA_OIF, rt->rt6i_dev->ifindex);
2155
2156 NLA_PUT_U32(skb, RTA_PRIORITY, rt->rt6i_metric);
e3703b3d
TG
2157
2158 expires = rt->rt6i_expires ? rt->rt6i_expires - jiffies : 0;
2159 if (rtnl_put_cacheinfo(skb, &rt->u.dst, 0, 0, 0,
2160 expires, rt->u.dst.error) < 0)
2161 goto nla_put_failure;
2d7202bf
TG
2162
2163 return nlmsg_end(skb, nlh);
2164
2165nla_put_failure:
26932566
PM
2166 nlmsg_cancel(skb, nlh);
2167 return -EMSGSIZE;
1da177e4
LT
2168}
2169
1b43af54 2170int rt6_dump_route(struct rt6_info *rt, void *p_arg)
1da177e4
LT
2171{
2172 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
2173 int prefix;
2174
2d7202bf
TG
2175 if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
2176 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
1da177e4
LT
2177 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
2178 } else
2179 prefix = 0;
2180
2181 return rt6_fill_node(arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
2182 NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
0d51aa80 2183 prefix, NLM_F_MULTI);
1da177e4
LT
2184}
2185
c127ea2c 2186static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
1da177e4 2187{
b854272b 2188 struct net *net = in_skb->sk->sk_net;
ab364a6f
TG
2189 struct nlattr *tb[RTA_MAX+1];
2190 struct rt6_info *rt;
1da177e4 2191 struct sk_buff *skb;
ab364a6f 2192 struct rtmsg *rtm;
1da177e4 2193 struct flowi fl;
ab364a6f 2194 int err, iif = 0;
1da177e4 2195
ab364a6f
TG
2196 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2197 if (err < 0)
2198 goto errout;
1da177e4 2199
ab364a6f 2200 err = -EINVAL;
1da177e4 2201 memset(&fl, 0, sizeof(fl));
1da177e4 2202
ab364a6f
TG
2203 if (tb[RTA_SRC]) {
2204 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2205 goto errout;
2206
2207 ipv6_addr_copy(&fl.fl6_src, nla_data(tb[RTA_SRC]));
2208 }
2209
2210 if (tb[RTA_DST]) {
2211 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2212 goto errout;
2213
2214 ipv6_addr_copy(&fl.fl6_dst, nla_data(tb[RTA_DST]));
2215 }
2216
2217 if (tb[RTA_IIF])
2218 iif = nla_get_u32(tb[RTA_IIF]);
2219
2220 if (tb[RTA_OIF])
2221 fl.oif = nla_get_u32(tb[RTA_OIF]);
1da177e4
LT
2222
2223 if (iif) {
2224 struct net_device *dev;
5578689a 2225 dev = __dev_get_by_index(net, iif);
1da177e4
LT
2226 if (!dev) {
2227 err = -ENODEV;
ab364a6f 2228 goto errout;
1da177e4
LT
2229 }
2230 }
2231
ab364a6f
TG
2232 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
2233 if (skb == NULL) {
2234 err = -ENOBUFS;
2235 goto errout;
2236 }
1da177e4 2237
ab364a6f
TG
2238 /* Reserve room for dummy headers, this skb can pass
2239 through good chunk of routing engine.
2240 */
459a98ed 2241 skb_reset_mac_header(skb);
ab364a6f 2242 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
1da177e4 2243
ab364a6f 2244 rt = (struct rt6_info*) ip6_route_output(NULL, &fl);
1da177e4
LT
2245 skb->dst = &rt->u.dst;
2246
ab364a6f 2247 err = rt6_fill_node(skb, rt, &fl.fl6_dst, &fl.fl6_src, iif,
1da177e4 2248 RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
0d51aa80 2249 nlh->nlmsg_seq, 0, 0);
1da177e4 2250 if (err < 0) {
ab364a6f
TG
2251 kfree_skb(skb);
2252 goto errout;
1da177e4
LT
2253 }
2254
5578689a 2255 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).pid);
ab364a6f 2256errout:
1da177e4 2257 return err;
1da177e4
LT
2258}
2259
86872cb5 2260void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
1da177e4
LT
2261{
2262 struct sk_buff *skb;
5578689a 2263 struct net *net = info->nl_net;
528c4ceb
DL
2264 u32 seq;
2265 int err;
2266
2267 err = -ENOBUFS;
2268 seq = info->nlh != NULL ? info->nlh->nlmsg_seq : 0;
86872cb5 2269
339bf98f 2270 skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
21713ebc
TG
2271 if (skb == NULL)
2272 goto errout;
2273
528c4ceb
DL
2274 err = rt6_fill_node(skb, rt, NULL, NULL, 0,
2275 event, info->pid, seq, 0, 0);
26932566
PM
2276 if (err < 0) {
2277 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
2278 WARN_ON(err == -EMSGSIZE);
2279 kfree_skb(skb);
2280 goto errout;
2281 }
5578689a
DL
2282 err = rtnl_notify(skb, net, info->pid, RTNLGRP_IPV6_ROUTE,
2283 info->nlh, gfp_any());
21713ebc
TG
2284errout:
2285 if (err < 0)
5578689a 2286 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
1da177e4
LT
2287}
2288
2289/*
2290 * /proc
2291 */
2292
2293#ifdef CONFIG_PROC_FS
2294
2295#define RT6_INFO_LEN (32 + 4 + 32 + 4 + 32 + 40 + 5 + 1)
2296
2297struct rt6_proc_arg
2298{
2299 char *buffer;
2300 int offset;
2301 int length;
2302 int skip;
2303 int len;
2304};
2305
2306static int rt6_info_route(struct rt6_info *rt, void *p_arg)
2307{
33120b30 2308 struct seq_file *m = p_arg;
1da177e4 2309
33120b30
AD
2310 seq_printf(m, NIP6_SEQFMT " %02x ", NIP6(rt->rt6i_dst.addr),
2311 rt->rt6i_dst.plen);
1da177e4
LT
2312
2313#ifdef CONFIG_IPV6_SUBTREES
33120b30
AD
2314 seq_printf(m, NIP6_SEQFMT " %02x ", NIP6(rt->rt6i_src.addr),
2315 rt->rt6i_src.plen);
1da177e4 2316#else
33120b30 2317 seq_puts(m, "00000000000000000000000000000000 00 ");
1da177e4
LT
2318#endif
2319
2320 if (rt->rt6i_nexthop) {
33120b30
AD
2321 seq_printf(m, NIP6_SEQFMT,
2322 NIP6(*((struct in6_addr *)rt->rt6i_nexthop->primary_key)));
1da177e4 2323 } else {
33120b30 2324 seq_puts(m, "00000000000000000000000000000000");
1da177e4 2325 }
33120b30
AD
2326 seq_printf(m, " %08x %08x %08x %08x %8s\n",
2327 rt->rt6i_metric, atomic_read(&rt->u.dst.__refcnt),
2328 rt->u.dst.__use, rt->rt6i_flags,
2329 rt->rt6i_dev ? rt->rt6i_dev->name : "");
1da177e4
LT
2330 return 0;
2331}
2332
33120b30 2333static int ipv6_route_show(struct seq_file *m, void *v)
1da177e4 2334{
f3db4851
DL
2335 struct net *net = (struct net *)m->private;
2336 fib6_clean_all(net, rt6_info_route, 0, m);
33120b30
AD
2337 return 0;
2338}
1da177e4 2339
33120b30
AD
2340static int ipv6_route_open(struct inode *inode, struct file *file)
2341{
f3db4851
DL
2342 struct net *net = get_proc_net(inode);
2343 if (!net)
2344 return -ENXIO;
2345 return single_open(file, ipv6_route_show, net);
2346}
2347
2348static int ipv6_route_release(struct inode *inode, struct file *file)
2349{
2350 struct seq_file *seq = file->private_data;
2351 struct net *net = seq->private;
2352 put_net(net);
2353 return single_release(inode, file);
1da177e4
LT
2354}
2355
33120b30
AD
2356static const struct file_operations ipv6_route_proc_fops = {
2357 .owner = THIS_MODULE,
2358 .open = ipv6_route_open,
2359 .read = seq_read,
2360 .llseek = seq_lseek,
f3db4851 2361 .release = ipv6_route_release,
33120b30
AD
2362};
2363
1da177e4
LT
2364static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2365{
69ddb805 2366 struct net *net = (struct net *)seq->private;
1da177e4 2367 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
69ddb805
DL
2368 net->ipv6.rt6_stats->fib_nodes,
2369 net->ipv6.rt6_stats->fib_route_nodes,
2370 net->ipv6.rt6_stats->fib_rt_alloc,
2371 net->ipv6.rt6_stats->fib_rt_entries,
2372 net->ipv6.rt6_stats->fib_rt_cache,
c572872f 2373 atomic_read(&ip6_dst_ops.entries),
69ddb805 2374 net->ipv6.rt6_stats->fib_discarded_routes);
1da177e4
LT
2375
2376 return 0;
2377}
2378
2379static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2380{
69ddb805
DL
2381 struct net *net = get_proc_net(inode);
2382 return single_open(file, rt6_stats_seq_show, net);
2383}
2384
2385static int rt6_stats_seq_release(struct inode *inode, struct file *file)
2386{
2387 struct seq_file *seq = file->private_data;
2388 struct net *net = (struct net *)seq->private;
2389 put_net(net);
2390 return single_release(inode, file);
1da177e4
LT
2391}
2392
9a32144e 2393static const struct file_operations rt6_stats_seq_fops = {
1da177e4
LT
2394 .owner = THIS_MODULE,
2395 .open = rt6_stats_seq_open,
2396 .read = seq_read,
2397 .llseek = seq_lseek,
69ddb805 2398 .release = rt6_stats_seq_release,
1da177e4
LT
2399};
2400#endif /* CONFIG_PROC_FS */
2401
2402#ifdef CONFIG_SYSCTL
2403
1da177e4
LT
2404static
2405int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write, struct file * filp,
2406 void __user *buffer, size_t *lenp, loff_t *ppos)
2407{
5b7c931d
DL
2408 struct net *net = current->nsproxy->net_ns;
2409 int delay = net->ipv6.sysctl.flush_delay;
1da177e4
LT
2410 if (write) {
2411 proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
5b7c931d 2412 fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay, net);
1da177e4
LT
2413 return 0;
2414 } else
2415 return -EINVAL;
2416}
2417
760f2d01 2418ctl_table ipv6_route_table_template[] = {
1ab1457c 2419 {
1da177e4 2420 .procname = "flush",
4990509f 2421 .data = &init_net.ipv6.sysctl.flush_delay,
1da177e4 2422 .maxlen = sizeof(int),
89c8b3a1 2423 .mode = 0200,
1ab1457c 2424 .proc_handler = &ipv6_sysctl_rtcache_flush
1da177e4
LT
2425 },
2426 {
2427 .ctl_name = NET_IPV6_ROUTE_GC_THRESH,
2428 .procname = "gc_thresh",
1ab1457c 2429 .data = &ip6_dst_ops.gc_thresh,
1da177e4
LT
2430 .maxlen = sizeof(int),
2431 .mode = 0644,
1ab1457c 2432 .proc_handler = &proc_dointvec,
1da177e4
LT
2433 },
2434 {
2435 .ctl_name = NET_IPV6_ROUTE_MAX_SIZE,
2436 .procname = "max_size",
4990509f 2437 .data = &init_net.ipv6.sysctl.ip6_rt_max_size,
1da177e4
LT
2438 .maxlen = sizeof(int),
2439 .mode = 0644,
1ab1457c 2440 .proc_handler = &proc_dointvec,
1da177e4
LT
2441 },
2442 {
2443 .ctl_name = NET_IPV6_ROUTE_GC_MIN_INTERVAL,
2444 .procname = "gc_min_interval",
4990509f 2445 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
1da177e4
LT
2446 .maxlen = sizeof(int),
2447 .mode = 0644,
1ab1457c 2448 .proc_handler = &proc_dointvec_jiffies,
1da177e4
LT
2449 .strategy = &sysctl_jiffies,
2450 },
2451 {
2452 .ctl_name = NET_IPV6_ROUTE_GC_TIMEOUT,
2453 .procname = "gc_timeout",
4990509f 2454 .data = &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
1da177e4
LT
2455 .maxlen = sizeof(int),
2456 .mode = 0644,
1ab1457c 2457 .proc_handler = &proc_dointvec_jiffies,
1da177e4
LT
2458 .strategy = &sysctl_jiffies,
2459 },
2460 {
2461 .ctl_name = NET_IPV6_ROUTE_GC_INTERVAL,
2462 .procname = "gc_interval",
4990509f 2463 .data = &init_net.ipv6.sysctl.ip6_rt_gc_interval,
1da177e4
LT
2464 .maxlen = sizeof(int),
2465 .mode = 0644,
1ab1457c 2466 .proc_handler = &proc_dointvec_jiffies,
1da177e4
LT
2467 .strategy = &sysctl_jiffies,
2468 },
2469 {
2470 .ctl_name = NET_IPV6_ROUTE_GC_ELASTICITY,
2471 .procname = "gc_elasticity",
4990509f 2472 .data = &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
1da177e4
LT
2473 .maxlen = sizeof(int),
2474 .mode = 0644,
1ab1457c 2475 .proc_handler = &proc_dointvec_jiffies,
1da177e4
LT
2476 .strategy = &sysctl_jiffies,
2477 },
2478 {
2479 .ctl_name = NET_IPV6_ROUTE_MTU_EXPIRES,
2480 .procname = "mtu_expires",
4990509f 2481 .data = &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
1da177e4
LT
2482 .maxlen = sizeof(int),
2483 .mode = 0644,
1ab1457c 2484 .proc_handler = &proc_dointvec_jiffies,
1da177e4
LT
2485 .strategy = &sysctl_jiffies,
2486 },
2487 {
2488 .ctl_name = NET_IPV6_ROUTE_MIN_ADVMSS,
2489 .procname = "min_adv_mss",
4990509f 2490 .data = &init_net.ipv6.sysctl.ip6_rt_min_advmss,
1da177e4
LT
2491 .maxlen = sizeof(int),
2492 .mode = 0644,
1ab1457c 2493 .proc_handler = &proc_dointvec_jiffies,
1da177e4
LT
2494 .strategy = &sysctl_jiffies,
2495 },
2496 {
2497 .ctl_name = NET_IPV6_ROUTE_GC_MIN_INTERVAL_MS,
2498 .procname = "gc_min_interval_ms",
4990509f 2499 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
1da177e4
LT
2500 .maxlen = sizeof(int),
2501 .mode = 0644,
1ab1457c 2502 .proc_handler = &proc_dointvec_ms_jiffies,
1da177e4
LT
2503 .strategy = &sysctl_ms_jiffies,
2504 },
2505 { .ctl_name = 0 }
2506};
2507
760f2d01
DL
2508struct ctl_table *ipv6_route_sysctl_init(struct net *net)
2509{
2510 struct ctl_table *table;
2511
2512 table = kmemdup(ipv6_route_table_template,
2513 sizeof(ipv6_route_table_template),
2514 GFP_KERNEL);
5ee09105
YH
2515
2516 if (table) {
2517 table[0].data = &net->ipv6.sysctl.flush_delay;
2518 /* table[1].data will be handled when we have
2519 routes per namespace */
2520 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
2521 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2522 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
2523 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
2524 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
2525 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
2526 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
2527 }
2528
760f2d01
DL
2529 return table;
2530}
1da177e4
LT
2531#endif
2532
cdb18761
DL
2533static int ip6_route_net_init(struct net *net)
2534{
2535#ifdef CONFIG_PROC_FS
2536 proc_net_fops_create(net, "ipv6_route", 0, &ipv6_route_proc_fops);
2537 proc_net_fops_create(net, "rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2538#endif
2539 return 0;
2540}
2541
2542static void ip6_route_net_exit(struct net *net)
2543{
2544#ifdef CONFIG_PROC_FS
2545 proc_net_remove(net, "ipv6_route");
2546 proc_net_remove(net, "rt6_stats");
2547#endif
5578689a 2548 rt6_ifdown(net, NULL);
cdb18761
DL
2549}
2550
2551static struct pernet_operations ip6_route_net_ops = {
2552 .init = ip6_route_net_init,
2553 .exit = ip6_route_net_exit,
2554};
2555
433d49c3 2556int __init ip6_route_init(void)
1da177e4 2557{
433d49c3
DL
2558 int ret;
2559
e5d679f3
AD
2560 ip6_dst_ops.kmem_cachep =
2561 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
f845ab6b
DL
2562 SLAB_HWCACHE_ALIGN, NULL);
2563 if (!ip6_dst_ops.kmem_cachep)
2564 return -ENOMEM;
2565
14e50e57
DM
2566 ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops.kmem_cachep;
2567
433d49c3
DL
2568 ret = fib6_init();
2569 if (ret)
2570 goto out_kmem_cache;
2571
433d49c3
DL
2572 ret = xfrm6_init();
2573 if (ret)
cdb18761 2574 goto out_fib6_init;
c35b7e72 2575
433d49c3
DL
2576 ret = fib6_rules_init();
2577 if (ret)
2578 goto xfrm6_init;
7e5449c2 2579
433d49c3
DL
2580 ret = -ENOBUFS;
2581 if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL) ||
2582 __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL) ||
2583 __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL))
2584 goto fib6_rules_init;
c127ea2c 2585
cdb18761
DL
2586 ret = register_pernet_subsys(&ip6_route_net_ops);
2587 if (ret)
2588 goto fib6_rules_init;
433d49c3
DL
2589out:
2590 return ret;
2591
2592fib6_rules_init:
433d49c3
DL
2593 fib6_rules_cleanup();
2594xfrm6_init:
433d49c3 2595 xfrm6_fini();
433d49c3 2596out_fib6_init:
433d49c3
DL
2597 fib6_gc_cleanup();
2598out_kmem_cache:
2599 kmem_cache_destroy(ip6_dst_ops.kmem_cachep);
2600 goto out;
1da177e4
LT
2601}
2602
2603void ip6_route_cleanup(void)
2604{
cdb18761 2605 unregister_pernet_subsys(&ip6_route_net_ops);
101367c2 2606 fib6_rules_cleanup();
1da177e4 2607 xfrm6_fini();
1da177e4
LT
2608 fib6_gc_cleanup();
2609 kmem_cache_destroy(ip6_dst_ops.kmem_cachep);
2610}