]> bbs.cooldavid.org Git - net-next-2.6.git/blame - net/ipv6/route.c
[NETNS][IPV6] route6 - Pass the network namespace parameter to rt6_purge_dflt_routers
[net-next-2.6.git] / net / ipv6 / route.c
CommitLineData
1da177e4
LT
1/*
2 * Linux INET6 implementation
3 * FIB front-end.
4 *
5 * Authors:
1ab1457c 6 * Pedro Roque <roque@di.fc.ul.pt>
1da177e4
LT
7 *
8 * $Id: route.c,v 1.56 2001/10/31 21:55:55 davem Exp $
9 *
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License
12 * as published by the Free Software Foundation; either version
13 * 2 of the License, or (at your option) any later version.
14 */
15
16/* Changes:
17 *
18 * YOSHIFUJI Hideaki @USAGI
19 * reworked default router selection.
20 * - respect outgoing interface
21 * - select from (probably) reachable routers (i.e.
22 * routers in REACHABLE, STALE, DELAY or PROBE states).
23 * - always select the same router if it is (probably)
24 * reachable. otherwise, round-robin the list.
c0bece9f
YH
25 * Ville Nuorvala
26 * Fixed routing subtrees.
1da177e4
LT
27 */
28
4fc268d2 29#include <linux/capability.h>
1da177e4
LT
30#include <linux/errno.h>
31#include <linux/types.h>
32#include <linux/times.h>
33#include <linux/socket.h>
34#include <linux/sockios.h>
35#include <linux/net.h>
36#include <linux/route.h>
37#include <linux/netdevice.h>
38#include <linux/in6.h>
39#include <linux/init.h>
1da177e4 40#include <linux/if_arp.h>
1da177e4
LT
41#include <linux/proc_fs.h>
42#include <linux/seq_file.h>
5b7c931d 43#include <linux/nsproxy.h>
457c4cbc 44#include <net/net_namespace.h>
1da177e4
LT
45#include <net/snmp.h>
46#include <net/ipv6.h>
47#include <net/ip6_fib.h>
48#include <net/ip6_route.h>
49#include <net/ndisc.h>
50#include <net/addrconf.h>
51#include <net/tcp.h>
52#include <linux/rtnetlink.h>
53#include <net/dst.h>
54#include <net/xfrm.h>
8d71740c 55#include <net/netevent.h>
21713ebc 56#include <net/netlink.h>
1da177e4
LT
57
58#include <asm/uaccess.h>
59
60#ifdef CONFIG_SYSCTL
61#include <linux/sysctl.h>
62#endif
63
64/* Set to 3 to get tracing. */
65#define RT6_DEBUG 2
66
67#if RT6_DEBUG >= 3
68#define RDBG(x) printk x
69#define RT6_TRACE(x...) printk(KERN_DEBUG x)
70#else
71#define RDBG(x)
72#define RT6_TRACE(x...) do { ; } while (0)
73#endif
74
519fbd87 75#define CLONE_OFFLINK_ROUTE 0
1da177e4 76
1da177e4
LT
77static struct rt6_info * ip6_rt_copy(struct rt6_info *ort);
78static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
79static struct dst_entry *ip6_negative_advice(struct dst_entry *);
80static void ip6_dst_destroy(struct dst_entry *);
81static void ip6_dst_ifdown(struct dst_entry *,
82 struct net_device *dev, int how);
569d3645 83static int ip6_dst_gc(struct dst_ops *ops);
1da177e4
LT
84
85static int ip6_pkt_discard(struct sk_buff *skb);
86static int ip6_pkt_discard_out(struct sk_buff *skb);
87static void ip6_link_failure(struct sk_buff *skb);
88static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
89
70ceb4f5 90#ifdef CONFIG_IPV6_ROUTE_INFO
efa2cea0
DL
91static struct rt6_info *rt6_add_route_info(struct net *net,
92 struct in6_addr *prefix, int prefixlen,
70ceb4f5
YH
93 struct in6_addr *gwaddr, int ifindex,
94 unsigned pref);
efa2cea0
DL
95static struct rt6_info *rt6_get_route_info(struct net *net,
96 struct in6_addr *prefix, int prefixlen,
70ceb4f5
YH
97 struct in6_addr *gwaddr, int ifindex);
98#endif
99
1da177e4
LT
100static struct dst_ops ip6_dst_ops = {
101 .family = AF_INET6,
102 .protocol = __constant_htons(ETH_P_IPV6),
103 .gc = ip6_dst_gc,
104 .gc_thresh = 1024,
105 .check = ip6_dst_check,
106 .destroy = ip6_dst_destroy,
107 .ifdown = ip6_dst_ifdown,
108 .negative_advice = ip6_negative_advice,
109 .link_failure = ip6_link_failure,
110 .update_pmtu = ip6_rt_update_pmtu,
862b82c6 111 .local_out = ip6_local_out,
1da177e4 112 .entry_size = sizeof(struct rt6_info),
e2422970 113 .entries = ATOMIC_INIT(0),
1da177e4
LT
114};
115
14e50e57
DM
116static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu)
117{
118}
119
120static struct dst_ops ip6_dst_blackhole_ops = {
121 .family = AF_INET6,
122 .protocol = __constant_htons(ETH_P_IPV6),
123 .destroy = ip6_dst_destroy,
124 .check = ip6_dst_check,
125 .update_pmtu = ip6_rt_blackhole_update_pmtu,
126 .entry_size = sizeof(struct rt6_info),
e2422970 127 .entries = ATOMIC_INIT(0),
14e50e57
DM
128};
129
1da177e4
LT
130struct rt6_info ip6_null_entry = {
131 .u = {
132 .dst = {
133 .__refcnt = ATOMIC_INIT(1),
134 .__use = 1,
1da177e4
LT
135 .obsolete = -1,
136 .error = -ENETUNREACH,
137 .metrics = { [RTAX_HOPLIMIT - 1] = 255, },
138 .input = ip6_pkt_discard,
139 .output = ip6_pkt_discard_out,
140 .ops = &ip6_dst_ops,
141 .path = (struct dst_entry*)&ip6_null_entry,
142 }
143 },
144 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
145 .rt6i_metric = ~(u32) 0,
146 .rt6i_ref = ATOMIC_INIT(1),
147};
148
101367c2
TG
149#ifdef CONFIG_IPV6_MULTIPLE_TABLES
150
6723ab54
DM
151static int ip6_pkt_prohibit(struct sk_buff *skb);
152static int ip6_pkt_prohibit_out(struct sk_buff *skb);
6723ab54 153
101367c2
TG
154struct rt6_info ip6_prohibit_entry = {
155 .u = {
156 .dst = {
157 .__refcnt = ATOMIC_INIT(1),
158 .__use = 1,
101367c2
TG
159 .obsolete = -1,
160 .error = -EACCES,
161 .metrics = { [RTAX_HOPLIMIT - 1] = 255, },
9ce8ade0
TG
162 .input = ip6_pkt_prohibit,
163 .output = ip6_pkt_prohibit_out,
101367c2
TG
164 .ops = &ip6_dst_ops,
165 .path = (struct dst_entry*)&ip6_prohibit_entry,
166 }
167 },
168 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
169 .rt6i_metric = ~(u32) 0,
170 .rt6i_ref = ATOMIC_INIT(1),
171};
172
173struct rt6_info ip6_blk_hole_entry = {
174 .u = {
175 .dst = {
176 .__refcnt = ATOMIC_INIT(1),
177 .__use = 1,
101367c2
TG
178 .obsolete = -1,
179 .error = -EINVAL,
180 .metrics = { [RTAX_HOPLIMIT - 1] = 255, },
352e512c
HX
181 .input = dst_discard,
182 .output = dst_discard,
101367c2
TG
183 .ops = &ip6_dst_ops,
184 .path = (struct dst_entry*)&ip6_blk_hole_entry,
185 }
186 },
187 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
188 .rt6i_metric = ~(u32) 0,
189 .rt6i_ref = ATOMIC_INIT(1),
190};
191
192#endif
193
1da177e4
LT
194/* allocate dst with ip6_dst_ops */
195static __inline__ struct rt6_info *ip6_dst_alloc(void)
196{
197 return (struct rt6_info *)dst_alloc(&ip6_dst_ops);
198}
199
200static void ip6_dst_destroy(struct dst_entry *dst)
201{
202 struct rt6_info *rt = (struct rt6_info *)dst;
203 struct inet6_dev *idev = rt->rt6i_idev;
204
205 if (idev != NULL) {
206 rt->rt6i_idev = NULL;
207 in6_dev_put(idev);
1ab1457c 208 }
1da177e4
LT
209}
210
211static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
212 int how)
213{
214 struct rt6_info *rt = (struct rt6_info *)dst;
215 struct inet6_dev *idev = rt->rt6i_idev;
5a3e55d6
DL
216 struct net_device *loopback_dev =
217 dev->nd_net->loopback_dev;
1da177e4 218
5a3e55d6
DL
219 if (dev != loopback_dev && idev != NULL && idev->dev == dev) {
220 struct inet6_dev *loopback_idev =
221 in6_dev_get(loopback_dev);
1da177e4
LT
222 if (loopback_idev != NULL) {
223 rt->rt6i_idev = loopback_idev;
224 in6_dev_put(idev);
225 }
226 }
227}
228
229static __inline__ int rt6_check_expired(const struct rt6_info *rt)
230{
231 return (rt->rt6i_flags & RTF_EXPIRES &&
232 time_after(jiffies, rt->rt6i_expires));
233}
234
c71099ac
TG
235static inline int rt6_need_strict(struct in6_addr *daddr)
236{
237 return (ipv6_addr_type(daddr) &
238 (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL));
239}
240
1da177e4 241/*
c71099ac 242 * Route lookup. Any table->tb6_lock is implied.
1da177e4
LT
243 */
244
245static __inline__ struct rt6_info *rt6_device_match(struct rt6_info *rt,
246 int oif,
247 int strict)
248{
249 struct rt6_info *local = NULL;
250 struct rt6_info *sprt;
251
252 if (oif) {
7cc48263 253 for (sprt = rt; sprt; sprt = sprt->u.dst.rt6_next) {
1da177e4
LT
254 struct net_device *dev = sprt->rt6i_dev;
255 if (dev->ifindex == oif)
256 return sprt;
257 if (dev->flags & IFF_LOOPBACK) {
258 if (sprt->rt6i_idev == NULL ||
259 sprt->rt6i_idev->dev->ifindex != oif) {
260 if (strict && oif)
261 continue;
1ab1457c 262 if (local && (!oif ||
1da177e4
LT
263 local->rt6i_idev->dev->ifindex == oif))
264 continue;
265 }
266 local = sprt;
267 }
268 }
269
270 if (local)
271 return local;
272
273 if (strict)
274 return &ip6_null_entry;
275 }
276 return rt;
277}
278
27097255
YH
279#ifdef CONFIG_IPV6_ROUTER_PREF
280static void rt6_probe(struct rt6_info *rt)
281{
282 struct neighbour *neigh = rt ? rt->rt6i_nexthop : NULL;
283 /*
284 * Okay, this does not seem to be appropriate
285 * for now, however, we need to check if it
286 * is really so; aka Router Reachability Probing.
287 *
288 * Router Reachability Probe MUST be rate-limited
289 * to no more than one per minute.
290 */
291 if (!neigh || (neigh->nud_state & NUD_VALID))
292 return;
293 read_lock_bh(&neigh->lock);
294 if (!(neigh->nud_state & NUD_VALID) &&
52e16356 295 time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
27097255
YH
296 struct in6_addr mcaddr;
297 struct in6_addr *target;
298
299 neigh->updated = jiffies;
300 read_unlock_bh(&neigh->lock);
301
302 target = (struct in6_addr *)&neigh->primary_key;
303 addrconf_addr_solict_mult(target, &mcaddr);
304 ndisc_send_ns(rt->rt6i_dev, NULL, target, &mcaddr, NULL);
305 } else
306 read_unlock_bh(&neigh->lock);
307}
308#else
309static inline void rt6_probe(struct rt6_info *rt)
310{
311 return;
312}
313#endif
314
1da177e4 315/*
554cfb7e 316 * Default Router Selection (RFC 2461 6.3.6)
1da177e4 317 */
b6f99a21 318static inline int rt6_check_dev(struct rt6_info *rt, int oif)
554cfb7e
YH
319{
320 struct net_device *dev = rt->rt6i_dev;
161980f4 321 if (!oif || dev->ifindex == oif)
554cfb7e 322 return 2;
161980f4
DM
323 if ((dev->flags & IFF_LOOPBACK) &&
324 rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
325 return 1;
326 return 0;
554cfb7e 327}
1da177e4 328
b6f99a21 329static inline int rt6_check_neigh(struct rt6_info *rt)
1da177e4 330{
554cfb7e 331 struct neighbour *neigh = rt->rt6i_nexthop;
398bcbeb 332 int m;
4d0c5911
YH
333 if (rt->rt6i_flags & RTF_NONEXTHOP ||
334 !(rt->rt6i_flags & RTF_GATEWAY))
335 m = 1;
336 else if (neigh) {
554cfb7e
YH
337 read_lock_bh(&neigh->lock);
338 if (neigh->nud_state & NUD_VALID)
4d0c5911 339 m = 2;
398bcbeb
YH
340#ifdef CONFIG_IPV6_ROUTER_PREF
341 else if (neigh->nud_state & NUD_FAILED)
342 m = 0;
343#endif
344 else
ea73ee23 345 m = 1;
554cfb7e 346 read_unlock_bh(&neigh->lock);
398bcbeb
YH
347 } else
348 m = 0;
554cfb7e 349 return m;
1da177e4
LT
350}
351
554cfb7e
YH
352static int rt6_score_route(struct rt6_info *rt, int oif,
353 int strict)
1da177e4 354{
4d0c5911 355 int m, n;
1ab1457c 356
4d0c5911 357 m = rt6_check_dev(rt, oif);
77d16f45 358 if (!m && (strict & RT6_LOOKUP_F_IFACE))
554cfb7e 359 return -1;
ebacaaa0
YH
360#ifdef CONFIG_IPV6_ROUTER_PREF
361 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
362#endif
4d0c5911 363 n = rt6_check_neigh(rt);
557e92ef 364 if (!n && (strict & RT6_LOOKUP_F_REACHABLE))
554cfb7e
YH
365 return -1;
366 return m;
367}
368
f11e6659
DM
369static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
370 int *mpri, struct rt6_info *match)
554cfb7e 371{
f11e6659
DM
372 int m;
373
374 if (rt6_check_expired(rt))
375 goto out;
376
377 m = rt6_score_route(rt, oif, strict);
378 if (m < 0)
379 goto out;
380
381 if (m > *mpri) {
382 if (strict & RT6_LOOKUP_F_REACHABLE)
383 rt6_probe(match);
384 *mpri = m;
385 match = rt;
386 } else if (strict & RT6_LOOKUP_F_REACHABLE) {
387 rt6_probe(rt);
388 }
389
390out:
391 return match;
392}
393
394static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
395 struct rt6_info *rr_head,
396 u32 metric, int oif, int strict)
397{
398 struct rt6_info *rt, *match;
554cfb7e 399 int mpri = -1;
1da177e4 400
f11e6659
DM
401 match = NULL;
402 for (rt = rr_head; rt && rt->rt6i_metric == metric;
403 rt = rt->u.dst.rt6_next)
404 match = find_match(rt, oif, strict, &mpri, match);
405 for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric;
406 rt = rt->u.dst.rt6_next)
407 match = find_match(rt, oif, strict, &mpri, match);
1da177e4 408
f11e6659
DM
409 return match;
410}
1da177e4 411
f11e6659
DM
412static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
413{
414 struct rt6_info *match, *rt0;
1da177e4 415
f11e6659
DM
416 RT6_TRACE("%s(fn->leaf=%p, oif=%d)\n",
417 __FUNCTION__, fn->leaf, oif);
554cfb7e 418
f11e6659
DM
419 rt0 = fn->rr_ptr;
420 if (!rt0)
421 fn->rr_ptr = rt0 = fn->leaf;
1da177e4 422
f11e6659 423 match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict);
1da177e4 424
554cfb7e 425 if (!match &&
f11e6659
DM
426 (strict & RT6_LOOKUP_F_REACHABLE)) {
427 struct rt6_info *next = rt0->u.dst.rt6_next;
428
554cfb7e 429 /* no entries matched; do round-robin */
f11e6659
DM
430 if (!next || next->rt6i_metric != rt0->rt6i_metric)
431 next = fn->leaf;
432
433 if (next != rt0)
434 fn->rr_ptr = next;
1da177e4 435 }
1da177e4 436
f11e6659
DM
437 RT6_TRACE("%s() => %p\n",
438 __FUNCTION__, match);
1da177e4 439
554cfb7e 440 return (match ? match : &ip6_null_entry);
1da177e4
LT
441}
442
70ceb4f5
YH
443#ifdef CONFIG_IPV6_ROUTE_INFO
444int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
445 struct in6_addr *gwaddr)
446{
efa2cea0 447 struct net *net = dev->nd_net;
70ceb4f5
YH
448 struct route_info *rinfo = (struct route_info *) opt;
449 struct in6_addr prefix_buf, *prefix;
450 unsigned int pref;
451 u32 lifetime;
452 struct rt6_info *rt;
453
454 if (len < sizeof(struct route_info)) {
455 return -EINVAL;
456 }
457
458 /* Sanity check for prefix_len and length */
459 if (rinfo->length > 3) {
460 return -EINVAL;
461 } else if (rinfo->prefix_len > 128) {
462 return -EINVAL;
463 } else if (rinfo->prefix_len > 64) {
464 if (rinfo->length < 2) {
465 return -EINVAL;
466 }
467 } else if (rinfo->prefix_len > 0) {
468 if (rinfo->length < 1) {
469 return -EINVAL;
470 }
471 }
472
473 pref = rinfo->route_pref;
474 if (pref == ICMPV6_ROUTER_PREF_INVALID)
475 pref = ICMPV6_ROUTER_PREF_MEDIUM;
476
e69a4adc 477 lifetime = ntohl(rinfo->lifetime);
70ceb4f5
YH
478 if (lifetime == 0xffffffff) {
479 /* infinity */
480 } else if (lifetime > 0x7fffffff/HZ) {
481 /* Avoid arithmetic overflow */
482 lifetime = 0x7fffffff/HZ - 1;
483 }
484
485 if (rinfo->length == 3)
486 prefix = (struct in6_addr *)rinfo->prefix;
487 else {
488 /* this function is safe */
489 ipv6_addr_prefix(&prefix_buf,
490 (struct in6_addr *)rinfo->prefix,
491 rinfo->prefix_len);
492 prefix = &prefix_buf;
493 }
494
efa2cea0
DL
495 rt = rt6_get_route_info(net, prefix, rinfo->prefix_len, gwaddr,
496 dev->ifindex);
70ceb4f5
YH
497
498 if (rt && !lifetime) {
e0a1ad73 499 ip6_del_rt(rt);
70ceb4f5
YH
500 rt = NULL;
501 }
502
503 if (!rt && lifetime)
efa2cea0 504 rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
70ceb4f5
YH
505 pref);
506 else if (rt)
507 rt->rt6i_flags = RTF_ROUTEINFO |
508 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
509
510 if (rt) {
511 if (lifetime == 0xffffffff) {
512 rt->rt6i_flags &= ~RTF_EXPIRES;
513 } else {
514 rt->rt6i_expires = jiffies + HZ * lifetime;
515 rt->rt6i_flags |= RTF_EXPIRES;
516 }
517 dst_release(&rt->u.dst);
518 }
519 return 0;
520}
521#endif
522
982f56f3
YH
523#define BACKTRACK(saddr) \
524do { \
525 if (rt == &ip6_null_entry) { \
526 struct fib6_node *pn; \
e0eda7bb 527 while (1) { \
982f56f3
YH
528 if (fn->fn_flags & RTN_TL_ROOT) \
529 goto out; \
530 pn = fn->parent; \
531 if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
8bce65b9 532 fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \
982f56f3
YH
533 else \
534 fn = pn; \
535 if (fn->fn_flags & RTN_RTINFO) \
536 goto restart; \
c71099ac 537 } \
c71099ac 538 } \
982f56f3 539} while(0)
c71099ac
TG
540
541static struct rt6_info *ip6_pol_route_lookup(struct fib6_table *table,
542 struct flowi *fl, int flags)
1da177e4
LT
543{
544 struct fib6_node *fn;
545 struct rt6_info *rt;
546
c71099ac
TG
547 read_lock_bh(&table->tb6_lock);
548 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
549restart:
550 rt = fn->leaf;
77d16f45 551 rt = rt6_device_match(rt, fl->oif, flags);
982f56f3 552 BACKTRACK(&fl->fl6_src);
c71099ac 553out:
03f49f34 554 dst_use(&rt->u.dst, jiffies);
c71099ac 555 read_unlock_bh(&table->tb6_lock);
c71099ac
TG
556 return rt;
557
558}
559
606a2b48
DL
560struct rt6_info *rt6_lookup(struct net *net, struct in6_addr *daddr,
561 struct in6_addr *saddr, int oif, int strict)
c71099ac
TG
562{
563 struct flowi fl = {
564 .oif = oif,
565 .nl_u = {
566 .ip6_u = {
567 .daddr = *daddr,
c71099ac
TG
568 },
569 },
570 };
571 struct dst_entry *dst;
77d16f45 572 int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
c71099ac 573
adaa70bb
TG
574 if (saddr) {
575 memcpy(&fl.fl6_src, saddr, sizeof(*saddr));
576 flags |= RT6_LOOKUP_F_HAS_SADDR;
577 }
578
606a2b48 579 dst = fib6_rule_lookup(net, &fl, flags, ip6_pol_route_lookup);
c71099ac
TG
580 if (dst->error == 0)
581 return (struct rt6_info *) dst;
582
583 dst_release(dst);
584
1da177e4
LT
585 return NULL;
586}
587
7159039a
YH
588EXPORT_SYMBOL(rt6_lookup);
589
c71099ac 590/* ip6_ins_rt is called with FREE table->tb6_lock.
1da177e4
LT
591 It takes new route entry, the addition fails by any reason the
592 route is freed. In any case, if caller does not hold it, it may
593 be destroyed.
594 */
595
86872cb5 596static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
1da177e4
LT
597{
598 int err;
c71099ac 599 struct fib6_table *table;
1da177e4 600
c71099ac
TG
601 table = rt->rt6i_table;
602 write_lock_bh(&table->tb6_lock);
86872cb5 603 err = fib6_add(&table->tb6_root, rt, info);
c71099ac 604 write_unlock_bh(&table->tb6_lock);
1da177e4
LT
605
606 return err;
607}
608
40e22e8f
TG
609int ip6_ins_rt(struct rt6_info *rt)
610{
4d1169c1
DL
611 struct nl_info info = {
612 .nl_net = &init_net,
613 };
528c4ceb 614 return __ip6_ins_rt(rt, &info);
40e22e8f
TG
615}
616
95a9a5ba
YH
617static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, struct in6_addr *daddr,
618 struct in6_addr *saddr)
1da177e4 619{
1da177e4
LT
620 struct rt6_info *rt;
621
622 /*
623 * Clone the route.
624 */
625
626 rt = ip6_rt_copy(ort);
627
628 if (rt) {
58c4fb86
YH
629 if (!(rt->rt6i_flags&RTF_GATEWAY)) {
630 if (rt->rt6i_dst.plen != 128 &&
631 ipv6_addr_equal(&rt->rt6i_dst.addr, daddr))
632 rt->rt6i_flags |= RTF_ANYCAST;
1da177e4 633 ipv6_addr_copy(&rt->rt6i_gateway, daddr);
58c4fb86 634 }
1da177e4 635
58c4fb86 636 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
1da177e4
LT
637 rt->rt6i_dst.plen = 128;
638 rt->rt6i_flags |= RTF_CACHE;
639 rt->u.dst.flags |= DST_HOST;
640
641#ifdef CONFIG_IPV6_SUBTREES
642 if (rt->rt6i_src.plen && saddr) {
643 ipv6_addr_copy(&rt->rt6i_src.addr, saddr);
644 rt->rt6i_src.plen = 128;
645 }
646#endif
647
648 rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
649
95a9a5ba 650 }
1da177e4 651
95a9a5ba
YH
652 return rt;
653}
1da177e4 654
299d9939
YH
655static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort, struct in6_addr *daddr)
656{
657 struct rt6_info *rt = ip6_rt_copy(ort);
658 if (rt) {
659 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
660 rt->rt6i_dst.plen = 128;
661 rt->rt6i_flags |= RTF_CACHE;
299d9939
YH
662 rt->u.dst.flags |= DST_HOST;
663 rt->rt6i_nexthop = neigh_clone(ort->rt6i_nexthop);
664 }
665 return rt;
666}
667
4acad72d 668static struct rt6_info *ip6_pol_route(struct fib6_table *table, int oif,
8ce11e6a 669 struct flowi *fl, int flags)
1da177e4
LT
670{
671 struct fib6_node *fn;
519fbd87 672 struct rt6_info *rt, *nrt;
c71099ac 673 int strict = 0;
1da177e4 674 int attempts = 3;
519fbd87 675 int err;
ea659e07 676 int reachable = ipv6_devconf.forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
1da177e4 677
77d16f45 678 strict |= flags & RT6_LOOKUP_F_IFACE;
1da177e4
LT
679
680relookup:
c71099ac 681 read_lock_bh(&table->tb6_lock);
1da177e4 682
8238dd06 683restart_2:
c71099ac 684 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
1da177e4
LT
685
686restart:
4acad72d 687 rt = rt6_select(fn, oif, strict | reachable);
982f56f3 688 BACKTRACK(&fl->fl6_src);
8238dd06
YH
689 if (rt == &ip6_null_entry ||
690 rt->rt6i_flags & RTF_CACHE)
1ddef044 691 goto out;
1da177e4 692
fb9de91e 693 dst_hold(&rt->u.dst);
c71099ac 694 read_unlock_bh(&table->tb6_lock);
fb9de91e 695
519fbd87 696 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
c71099ac 697 nrt = rt6_alloc_cow(rt, &fl->fl6_dst, &fl->fl6_src);
519fbd87
YH
698 else {
699#if CLONE_OFFLINK_ROUTE
c71099ac 700 nrt = rt6_alloc_clone(rt, &fl->fl6_dst);
519fbd87
YH
701#else
702 goto out2;
703#endif
704 }
e40cf353 705
519fbd87
YH
706 dst_release(&rt->u.dst);
707 rt = nrt ? : &ip6_null_entry;
1da177e4 708
519fbd87
YH
709 dst_hold(&rt->u.dst);
710 if (nrt) {
40e22e8f 711 err = ip6_ins_rt(nrt);
519fbd87 712 if (!err)
1da177e4 713 goto out2;
1da177e4 714 }
1da177e4 715
519fbd87
YH
716 if (--attempts <= 0)
717 goto out2;
718
719 /*
c71099ac 720 * Race condition! In the gap, when table->tb6_lock was
519fbd87
YH
721 * released someone could insert this route. Relookup.
722 */
723 dst_release(&rt->u.dst);
724 goto relookup;
725
726out:
8238dd06
YH
727 if (reachable) {
728 reachable = 0;
729 goto restart_2;
730 }
519fbd87 731 dst_hold(&rt->u.dst);
c71099ac 732 read_unlock_bh(&table->tb6_lock);
1da177e4
LT
733out2:
734 rt->u.dst.lastuse = jiffies;
735 rt->u.dst.__use++;
c71099ac
TG
736
737 return rt;
1da177e4
LT
738}
739
4acad72d
PE
740static struct rt6_info *ip6_pol_route_input(struct fib6_table *table,
741 struct flowi *fl, int flags)
742{
743 return ip6_pol_route(table, fl->iif, fl, flags);
744}
745
c71099ac
TG
746void ip6_route_input(struct sk_buff *skb)
747{
0660e03f 748 struct ipv6hdr *iph = ipv6_hdr(skb);
adaa70bb 749 int flags = RT6_LOOKUP_F_HAS_SADDR;
c71099ac
TG
750 struct flowi fl = {
751 .iif = skb->dev->ifindex,
752 .nl_u = {
753 .ip6_u = {
754 .daddr = iph->daddr,
755 .saddr = iph->saddr,
90bcaf7b 756 .flowlabel = (* (__be32 *) iph)&IPV6_FLOWINFO_MASK,
c71099ac
TG
757 },
758 },
1ab1457c 759 .mark = skb->mark,
c71099ac
TG
760 .proto = iph->nexthdr,
761 };
adaa70bb
TG
762
763 if (rt6_need_strict(&iph->daddr))
764 flags |= RT6_LOOKUP_F_IFACE;
c71099ac 765
58f09b78 766 skb->dst = fib6_rule_lookup(&init_net, &fl, flags, ip6_pol_route_input);
c71099ac
TG
767}
768
769static struct rt6_info *ip6_pol_route_output(struct fib6_table *table,
770 struct flowi *fl, int flags)
1da177e4 771{
4acad72d 772 return ip6_pol_route(table, fl->oif, fl, flags);
c71099ac
TG
773}
774
775struct dst_entry * ip6_route_output(struct sock *sk, struct flowi *fl)
776{
777 int flags = 0;
778
779 if (rt6_need_strict(&fl->fl6_dst))
77d16f45 780 flags |= RT6_LOOKUP_F_IFACE;
c71099ac 781
adaa70bb
TG
782 if (!ipv6_addr_any(&fl->fl6_src))
783 flags |= RT6_LOOKUP_F_HAS_SADDR;
784
58f09b78 785 return fib6_rule_lookup(&init_net, fl, flags, ip6_pol_route_output);
1da177e4
LT
786}
787
7159039a 788EXPORT_SYMBOL(ip6_route_output);
1da177e4 789
14e50e57
DM
790int ip6_dst_blackhole(struct sock *sk, struct dst_entry **dstp, struct flowi *fl)
791{
792 struct rt6_info *ort = (struct rt6_info *) *dstp;
793 struct rt6_info *rt = (struct rt6_info *)
794 dst_alloc(&ip6_dst_blackhole_ops);
795 struct dst_entry *new = NULL;
796
797 if (rt) {
798 new = &rt->u.dst;
799
800 atomic_set(&new->__refcnt, 1);
801 new->__use = 1;
352e512c
HX
802 new->input = dst_discard;
803 new->output = dst_discard;
14e50e57
DM
804
805 memcpy(new->metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32));
806 new->dev = ort->u.dst.dev;
807 if (new->dev)
808 dev_hold(new->dev);
809 rt->rt6i_idev = ort->rt6i_idev;
810 if (rt->rt6i_idev)
811 in6_dev_hold(rt->rt6i_idev);
812 rt->rt6i_expires = 0;
813
814 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
815 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
816 rt->rt6i_metric = 0;
817
818 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
819#ifdef CONFIG_IPV6_SUBTREES
820 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
821#endif
822
823 dst_free(new);
824 }
825
826 dst_release(*dstp);
827 *dstp = new;
828 return (new ? 0 : -ENOMEM);
829}
830EXPORT_SYMBOL_GPL(ip6_dst_blackhole);
831
1da177e4
LT
832/*
833 * Destination cache support functions
834 */
835
836static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
837{
838 struct rt6_info *rt;
839
840 rt = (struct rt6_info *) dst;
841
842 if (rt && rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie))
843 return dst;
844
845 return NULL;
846}
847
848static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
849{
850 struct rt6_info *rt = (struct rt6_info *) dst;
851
852 if (rt) {
853 if (rt->rt6i_flags & RTF_CACHE)
e0a1ad73 854 ip6_del_rt(rt);
1da177e4
LT
855 else
856 dst_release(dst);
857 }
858 return NULL;
859}
860
861static void ip6_link_failure(struct sk_buff *skb)
862{
863 struct rt6_info *rt;
864
865 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0, skb->dev);
866
867 rt = (struct rt6_info *) skb->dst;
868 if (rt) {
869 if (rt->rt6i_flags&RTF_CACHE) {
870 dst_set_expires(&rt->u.dst, 0);
871 rt->rt6i_flags |= RTF_EXPIRES;
872 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
873 rt->rt6i_node->fn_sernum = -1;
874 }
875}
876
877static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
878{
879 struct rt6_info *rt6 = (struct rt6_info*)dst;
880
881 if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
882 rt6->rt6i_flags |= RTF_MODIFIED;
883 if (mtu < IPV6_MIN_MTU) {
884 mtu = IPV6_MIN_MTU;
885 dst->metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
886 }
887 dst->metrics[RTAX_MTU-1] = mtu;
8d71740c 888 call_netevent_notifiers(NETEVENT_PMTU_UPDATE, dst);
1da177e4
LT
889 }
890}
891
1da177e4
LT
892static int ipv6_get_mtu(struct net_device *dev);
893
894static inline unsigned int ipv6_advmss(unsigned int mtu)
895{
896 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
897
4990509f
DL
898 if (mtu < init_net.ipv6.sysctl.ip6_rt_min_advmss)
899 mtu = init_net.ipv6.sysctl.ip6_rt_min_advmss;
1da177e4
LT
900
901 /*
1ab1457c
YH
902 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
903 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
904 * IPV6_MAXPLEN is also valid and means: "any MSS,
1da177e4
LT
905 * rely only on pmtu discovery"
906 */
907 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
908 mtu = IPV6_MAXPLEN;
909 return mtu;
910}
911
3b00944c
YH
912static struct dst_entry *icmp6_dst_gc_list;
913static DEFINE_SPINLOCK(icmp6_dst_lock);
5d0bbeeb 914
3b00944c 915struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
1da177e4 916 struct neighbour *neigh,
3b00944c 917 struct in6_addr *addr)
1da177e4
LT
918{
919 struct rt6_info *rt;
920 struct inet6_dev *idev = in6_dev_get(dev);
921
922 if (unlikely(idev == NULL))
923 return NULL;
924
925 rt = ip6_dst_alloc();
926 if (unlikely(rt == NULL)) {
927 in6_dev_put(idev);
928 goto out;
929 }
930
931 dev_hold(dev);
932 if (neigh)
933 neigh_hold(neigh);
934 else
935 neigh = ndisc_get_neigh(dev, addr);
936
937 rt->rt6i_dev = dev;
938 rt->rt6i_idev = idev;
939 rt->rt6i_nexthop = neigh;
940 atomic_set(&rt->u.dst.__refcnt, 1);
941 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = 255;
942 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
943 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
3b00944c 944 rt->u.dst.output = ip6_output;
1da177e4
LT
945
946#if 0 /* there's no chance to use these for ndisc */
1ab1457c
YH
947 rt->u.dst.flags = ipv6_addr_type(addr) & IPV6_ADDR_UNICAST
948 ? DST_HOST
1da177e4
LT
949 : 0;
950 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
951 rt->rt6i_dst.plen = 128;
952#endif
953
3b00944c
YH
954 spin_lock_bh(&icmp6_dst_lock);
955 rt->u.dst.next = icmp6_dst_gc_list;
956 icmp6_dst_gc_list = &rt->u.dst;
957 spin_unlock_bh(&icmp6_dst_lock);
1da177e4 958
63152fc0 959 fib6_force_start_gc(dev->nd_net);
1da177e4
LT
960
961out:
40aa7b90 962 return &rt->u.dst;
1da177e4
LT
963}
964
3b00944c 965int icmp6_dst_gc(int *more)
1da177e4
LT
966{
967 struct dst_entry *dst, *next, **pprev;
968 int freed;
969
970 next = NULL;
1ab1457c 971 freed = 0;
5d0bbeeb 972
3b00944c
YH
973 spin_lock_bh(&icmp6_dst_lock);
974 pprev = &icmp6_dst_gc_list;
5d0bbeeb 975
1da177e4
LT
976 while ((dst = *pprev) != NULL) {
977 if (!atomic_read(&dst->__refcnt)) {
978 *pprev = dst->next;
979 dst_free(dst);
980 freed++;
981 } else {
982 pprev = &dst->next;
983 (*more)++;
984 }
985 }
986
3b00944c 987 spin_unlock_bh(&icmp6_dst_lock);
5d0bbeeb 988
1da177e4
LT
989 return freed;
990}
991
569d3645 992static int ip6_dst_gc(struct dst_ops *ops)
1da177e4
LT
993{
994 static unsigned expire = 30*HZ;
995 static unsigned long last_gc;
996 unsigned long now = jiffies;
997
4990509f
DL
998 if (time_after(last_gc + init_net.ipv6.sysctl.ip6_rt_gc_min_interval, now) &&
999 atomic_read(&ip6_dst_ops.entries) <= init_net.ipv6.sysctl.ip6_rt_max_size)
1da177e4
LT
1000 goto out;
1001
1002 expire++;
5b7c931d 1003 fib6_run_gc(expire, &init_net);
1da177e4
LT
1004 last_gc = now;
1005 if (atomic_read(&ip6_dst_ops.entries) < ip6_dst_ops.gc_thresh)
4990509f 1006 expire = init_net.ipv6.sysctl.ip6_rt_gc_timeout>>1;
1da177e4
LT
1007
1008out:
4990509f
DL
1009 expire -= expire>>init_net.ipv6.sysctl.ip6_rt_gc_elasticity;
1010 return (atomic_read(&ip6_dst_ops.entries) > init_net.ipv6.sysctl.ip6_rt_max_size);
1da177e4
LT
1011}
1012
1013/* Clean host part of a prefix. Not necessary in radix tree,
1014 but results in cleaner routing tables.
1015
1016 Remove it only when all the things will work!
1017 */
1018
1019static int ipv6_get_mtu(struct net_device *dev)
1020{
1021 int mtu = IPV6_MIN_MTU;
1022 struct inet6_dev *idev;
1023
1024 idev = in6_dev_get(dev);
1025 if (idev) {
1026 mtu = idev->cnf.mtu6;
1027 in6_dev_put(idev);
1028 }
1029 return mtu;
1030}
1031
1032int ipv6_get_hoplimit(struct net_device *dev)
1033{
1034 int hoplimit = ipv6_devconf.hop_limit;
1035 struct inet6_dev *idev;
1036
1037 idev = in6_dev_get(dev);
1038 if (idev) {
1039 hoplimit = idev->cnf.hop_limit;
1040 in6_dev_put(idev);
1041 }
1042 return hoplimit;
1043}
1044
1045/*
1046 *
1047 */
1048
86872cb5 1049int ip6_route_add(struct fib6_config *cfg)
1da177e4
LT
1050{
1051 int err;
1da177e4
LT
1052 struct rt6_info *rt = NULL;
1053 struct net_device *dev = NULL;
1054 struct inet6_dev *idev = NULL;
c71099ac 1055 struct fib6_table *table;
1da177e4
LT
1056 int addr_type;
1057
86872cb5 1058 if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1da177e4
LT
1059 return -EINVAL;
1060#ifndef CONFIG_IPV6_SUBTREES
86872cb5 1061 if (cfg->fc_src_len)
1da177e4
LT
1062 return -EINVAL;
1063#endif
86872cb5 1064 if (cfg->fc_ifindex) {
1da177e4 1065 err = -ENODEV;
881d966b 1066 dev = dev_get_by_index(&init_net, cfg->fc_ifindex);
1da177e4
LT
1067 if (!dev)
1068 goto out;
1069 idev = in6_dev_get(dev);
1070 if (!idev)
1071 goto out;
1072 }
1073
86872cb5
TG
1074 if (cfg->fc_metric == 0)
1075 cfg->fc_metric = IP6_RT_PRIO_USER;
1da177e4 1076
58f09b78 1077 table = fib6_new_table(&init_net, cfg->fc_table);
c71099ac
TG
1078 if (table == NULL) {
1079 err = -ENOBUFS;
1080 goto out;
1081 }
1082
1da177e4
LT
1083 rt = ip6_dst_alloc();
1084
1085 if (rt == NULL) {
1086 err = -ENOMEM;
1087 goto out;
1088 }
1089
1090 rt->u.dst.obsolete = -1;
86872cb5 1091 rt->rt6i_expires = jiffies + clock_t_to_jiffies(cfg->fc_expires);
1da177e4 1092
86872cb5
TG
1093 if (cfg->fc_protocol == RTPROT_UNSPEC)
1094 cfg->fc_protocol = RTPROT_BOOT;
1095 rt->rt6i_protocol = cfg->fc_protocol;
1096
1097 addr_type = ipv6_addr_type(&cfg->fc_dst);
1da177e4
LT
1098
1099 if (addr_type & IPV6_ADDR_MULTICAST)
1100 rt->u.dst.input = ip6_mc_input;
1101 else
1102 rt->u.dst.input = ip6_forward;
1103
1104 rt->u.dst.output = ip6_output;
1105
86872cb5
TG
1106 ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1107 rt->rt6i_dst.plen = cfg->fc_dst_len;
1da177e4
LT
1108 if (rt->rt6i_dst.plen == 128)
1109 rt->u.dst.flags = DST_HOST;
1110
1111#ifdef CONFIG_IPV6_SUBTREES
86872cb5
TG
1112 ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1113 rt->rt6i_src.plen = cfg->fc_src_len;
1da177e4
LT
1114#endif
1115
86872cb5 1116 rt->rt6i_metric = cfg->fc_metric;
1da177e4
LT
1117
1118 /* We cannot add true routes via loopback here,
1119 they would result in kernel looping; promote them to reject routes
1120 */
86872cb5 1121 if ((cfg->fc_flags & RTF_REJECT) ||
1da177e4
LT
1122 (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK))) {
1123 /* hold loopback dev/idev if we haven't done so. */
2774c7ab 1124 if (dev != init_net.loopback_dev) {
1da177e4
LT
1125 if (dev) {
1126 dev_put(dev);
1127 in6_dev_put(idev);
1128 }
2774c7ab 1129 dev = init_net.loopback_dev;
1da177e4
LT
1130 dev_hold(dev);
1131 idev = in6_dev_get(dev);
1132 if (!idev) {
1133 err = -ENODEV;
1134 goto out;
1135 }
1136 }
1137 rt->u.dst.output = ip6_pkt_discard_out;
1138 rt->u.dst.input = ip6_pkt_discard;
1139 rt->u.dst.error = -ENETUNREACH;
1140 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1141 goto install_route;
1142 }
1143
86872cb5 1144 if (cfg->fc_flags & RTF_GATEWAY) {
1da177e4
LT
1145 struct in6_addr *gw_addr;
1146 int gwa_type;
1147
86872cb5
TG
1148 gw_addr = &cfg->fc_gateway;
1149 ipv6_addr_copy(&rt->rt6i_gateway, gw_addr);
1da177e4
LT
1150 gwa_type = ipv6_addr_type(gw_addr);
1151
1152 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1153 struct rt6_info *grt;
1154
1155 /* IPv6 strictly inhibits using not link-local
1156 addresses as nexthop address.
1157 Otherwise, router will not able to send redirects.
1158 It is very good, but in some (rare!) circumstances
1159 (SIT, PtP, NBMA NOARP links) it is handy to allow
1160 some exceptions. --ANK
1161 */
1162 err = -EINVAL;
1163 if (!(gwa_type&IPV6_ADDR_UNICAST))
1164 goto out;
1165
606a2b48 1166 grt = rt6_lookup(&init_net, gw_addr, NULL, cfg->fc_ifindex, 1);
1da177e4
LT
1167
1168 err = -EHOSTUNREACH;
1169 if (grt == NULL)
1170 goto out;
1171 if (dev) {
1172 if (dev != grt->rt6i_dev) {
1173 dst_release(&grt->u.dst);
1174 goto out;
1175 }
1176 } else {
1177 dev = grt->rt6i_dev;
1178 idev = grt->rt6i_idev;
1179 dev_hold(dev);
1180 in6_dev_hold(grt->rt6i_idev);
1181 }
1182 if (!(grt->rt6i_flags&RTF_GATEWAY))
1183 err = 0;
1184 dst_release(&grt->u.dst);
1185
1186 if (err)
1187 goto out;
1188 }
1189 err = -EINVAL;
1190 if (dev == NULL || (dev->flags&IFF_LOOPBACK))
1191 goto out;
1192 }
1193
1194 err = -ENODEV;
1195 if (dev == NULL)
1196 goto out;
1197
86872cb5 1198 if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) {
1da177e4
LT
1199 rt->rt6i_nexthop = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev);
1200 if (IS_ERR(rt->rt6i_nexthop)) {
1201 err = PTR_ERR(rt->rt6i_nexthop);
1202 rt->rt6i_nexthop = NULL;
1203 goto out;
1204 }
1205 }
1206
86872cb5 1207 rt->rt6i_flags = cfg->fc_flags;
1da177e4
LT
1208
1209install_route:
86872cb5
TG
1210 if (cfg->fc_mx) {
1211 struct nlattr *nla;
1212 int remaining;
1213
1214 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
8f4c1f9b 1215 int type = nla_type(nla);
86872cb5
TG
1216
1217 if (type) {
1218 if (type > RTAX_MAX) {
1da177e4
LT
1219 err = -EINVAL;
1220 goto out;
1221 }
86872cb5
TG
1222
1223 rt->u.dst.metrics[type - 1] = nla_get_u32(nla);
1da177e4 1224 }
1da177e4
LT
1225 }
1226 }
1227
1228 if (rt->u.dst.metrics[RTAX_HOPLIMIT-1] == 0)
1229 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
1230 if (!rt->u.dst.metrics[RTAX_MTU-1])
1231 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(dev);
1232 if (!rt->u.dst.metrics[RTAX_ADVMSS-1])
1233 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
1234 rt->u.dst.dev = dev;
1235 rt->rt6i_idev = idev;
c71099ac 1236 rt->rt6i_table = table;
63152fc0
DL
1237
1238 cfg->fc_nlinfo.nl_net = dev->nd_net;
1239
86872cb5 1240 return __ip6_ins_rt(rt, &cfg->fc_nlinfo);
1da177e4
LT
1241
1242out:
1243 if (dev)
1244 dev_put(dev);
1245 if (idev)
1246 in6_dev_put(idev);
1247 if (rt)
40aa7b90 1248 dst_free(&rt->u.dst);
1da177e4
LT
1249 return err;
1250}
1251
86872cb5 1252static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
1da177e4
LT
1253{
1254 int err;
c71099ac 1255 struct fib6_table *table;
1da177e4 1256
6c813a72
PM
1257 if (rt == &ip6_null_entry)
1258 return -ENOENT;
1259
c71099ac
TG
1260 table = rt->rt6i_table;
1261 write_lock_bh(&table->tb6_lock);
1da177e4 1262
86872cb5 1263 err = fib6_del(rt, info);
1da177e4
LT
1264 dst_release(&rt->u.dst);
1265
c71099ac 1266 write_unlock_bh(&table->tb6_lock);
1da177e4
LT
1267
1268 return err;
1269}
1270
e0a1ad73
TG
1271int ip6_del_rt(struct rt6_info *rt)
1272{
4d1169c1
DL
1273 struct nl_info info = {
1274 .nl_net = &init_net,
1275 };
528c4ceb 1276 return __ip6_del_rt(rt, &info);
e0a1ad73
TG
1277}
1278
86872cb5 1279static int ip6_route_del(struct fib6_config *cfg)
1da177e4 1280{
c71099ac 1281 struct fib6_table *table;
1da177e4
LT
1282 struct fib6_node *fn;
1283 struct rt6_info *rt;
1284 int err = -ESRCH;
1285
58f09b78 1286 table = fib6_get_table(&init_net, cfg->fc_table);
c71099ac
TG
1287 if (table == NULL)
1288 return err;
1289
1290 read_lock_bh(&table->tb6_lock);
1da177e4 1291
c71099ac 1292 fn = fib6_locate(&table->tb6_root,
86872cb5
TG
1293 &cfg->fc_dst, cfg->fc_dst_len,
1294 &cfg->fc_src, cfg->fc_src_len);
1ab1457c 1295
1da177e4 1296 if (fn) {
7cc48263 1297 for (rt = fn->leaf; rt; rt = rt->u.dst.rt6_next) {
86872cb5 1298 if (cfg->fc_ifindex &&
1da177e4 1299 (rt->rt6i_dev == NULL ||
86872cb5 1300 rt->rt6i_dev->ifindex != cfg->fc_ifindex))
1da177e4 1301 continue;
86872cb5
TG
1302 if (cfg->fc_flags & RTF_GATEWAY &&
1303 !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
1da177e4 1304 continue;
86872cb5 1305 if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
1da177e4
LT
1306 continue;
1307 dst_hold(&rt->u.dst);
c71099ac 1308 read_unlock_bh(&table->tb6_lock);
1da177e4 1309
86872cb5 1310 return __ip6_del_rt(rt, &cfg->fc_nlinfo);
1da177e4
LT
1311 }
1312 }
c71099ac 1313 read_unlock_bh(&table->tb6_lock);
1da177e4
LT
1314
1315 return err;
1316}
1317
1318/*
1319 * Handle redirects
1320 */
a6279458
YH
1321struct ip6rd_flowi {
1322 struct flowi fl;
1323 struct in6_addr gateway;
1324};
1325
1326static struct rt6_info *__ip6_route_redirect(struct fib6_table *table,
1327 struct flowi *fl,
1328 int flags)
1da177e4 1329{
a6279458
YH
1330 struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl;
1331 struct rt6_info *rt;
e843b9e1 1332 struct fib6_node *fn;
c71099ac 1333
1da177e4 1334 /*
e843b9e1
YH
1335 * Get the "current" route for this destination and
1336 * check if the redirect has come from approriate router.
1337 *
1338 * RFC 2461 specifies that redirects should only be
1339 * accepted if they come from the nexthop to the target.
1340 * Due to the way the routes are chosen, this notion
1341 * is a bit fuzzy and one might need to check all possible
1342 * routes.
1da177e4 1343 */
1da177e4 1344
c71099ac 1345 read_lock_bh(&table->tb6_lock);
a6279458 1346 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
e843b9e1 1347restart:
7cc48263 1348 for (rt = fn->leaf; rt; rt = rt->u.dst.rt6_next) {
e843b9e1
YH
1349 /*
1350 * Current route is on-link; redirect is always invalid.
1351 *
1352 * Seems, previous statement is not true. It could
1353 * be node, which looks for us as on-link (f.e. proxy ndisc)
1354 * But then router serving it might decide, that we should
1355 * know truth 8)8) --ANK (980726).
1356 */
1357 if (rt6_check_expired(rt))
1358 continue;
1359 if (!(rt->rt6i_flags & RTF_GATEWAY))
1360 continue;
a6279458 1361 if (fl->oif != rt->rt6i_dev->ifindex)
e843b9e1 1362 continue;
a6279458 1363 if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
e843b9e1
YH
1364 continue;
1365 break;
1366 }
a6279458 1367
cb15d9c2 1368 if (!rt)
a6279458 1369 rt = &ip6_null_entry;
cb15d9c2
YH
1370 BACKTRACK(&fl->fl6_src);
1371out:
a6279458
YH
1372 dst_hold(&rt->u.dst);
1373
c71099ac 1374 read_unlock_bh(&table->tb6_lock);
e843b9e1 1375
a6279458
YH
1376 return rt;
1377};
1378
1379static struct rt6_info *ip6_route_redirect(struct in6_addr *dest,
1380 struct in6_addr *src,
1381 struct in6_addr *gateway,
1382 struct net_device *dev)
1383{
adaa70bb 1384 int flags = RT6_LOOKUP_F_HAS_SADDR;
a6279458
YH
1385 struct ip6rd_flowi rdfl = {
1386 .fl = {
1387 .oif = dev->ifindex,
1388 .nl_u = {
1389 .ip6_u = {
1390 .daddr = *dest,
1391 .saddr = *src,
1392 },
1393 },
1394 },
1395 .gateway = *gateway,
1396 };
adaa70bb
TG
1397
1398 if (rt6_need_strict(dest))
1399 flags |= RT6_LOOKUP_F_IFACE;
a6279458 1400
58f09b78
DL
1401 return (struct rt6_info *)fib6_rule_lookup(&init_net,
1402 (struct flowi *)&rdfl,
1403 flags, __ip6_route_redirect);
a6279458
YH
1404}
1405
1406void rt6_redirect(struct in6_addr *dest, struct in6_addr *src,
1407 struct in6_addr *saddr,
1408 struct neighbour *neigh, u8 *lladdr, int on_link)
1409{
1410 struct rt6_info *rt, *nrt = NULL;
1411 struct netevent_redirect netevent;
1412
1413 rt = ip6_route_redirect(dest, src, saddr, neigh->dev);
1414
1415 if (rt == &ip6_null_entry) {
1da177e4
LT
1416 if (net_ratelimit())
1417 printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
1418 "for redirect target\n");
a6279458 1419 goto out;
1da177e4
LT
1420 }
1421
1da177e4
LT
1422 /*
1423 * We have finally decided to accept it.
1424 */
1425
1ab1457c 1426 neigh_update(neigh, lladdr, NUD_STALE,
1da177e4
LT
1427 NEIGH_UPDATE_F_WEAK_OVERRIDE|
1428 NEIGH_UPDATE_F_OVERRIDE|
1429 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1430 NEIGH_UPDATE_F_ISROUTER))
1431 );
1432
1433 /*
1434 * Redirect received -> path was valid.
1435 * Look, redirects are sent only in response to data packets,
1436 * so that this nexthop apparently is reachable. --ANK
1437 */
1438 dst_confirm(&rt->u.dst);
1439
1440 /* Duplicate redirect: silently ignore. */
1441 if (neigh == rt->u.dst.neighbour)
1442 goto out;
1443
1444 nrt = ip6_rt_copy(rt);
1445 if (nrt == NULL)
1446 goto out;
1447
1448 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1449 if (on_link)
1450 nrt->rt6i_flags &= ~RTF_GATEWAY;
1451
1452 ipv6_addr_copy(&nrt->rt6i_dst.addr, dest);
1453 nrt->rt6i_dst.plen = 128;
1454 nrt->u.dst.flags |= DST_HOST;
1455
1456 ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key);
1457 nrt->rt6i_nexthop = neigh_clone(neigh);
1458 /* Reset pmtu, it may be better */
1459 nrt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(neigh->dev);
1460 nrt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&nrt->u.dst));
1461
40e22e8f 1462 if (ip6_ins_rt(nrt))
1da177e4
LT
1463 goto out;
1464
8d71740c
TT
1465 netevent.old = &rt->u.dst;
1466 netevent.new = &nrt->u.dst;
1467 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1468
1da177e4 1469 if (rt->rt6i_flags&RTF_CACHE) {
e0a1ad73 1470 ip6_del_rt(rt);
1da177e4
LT
1471 return;
1472 }
1473
1474out:
1ab1457c 1475 dst_release(&rt->u.dst);
1da177e4
LT
1476 return;
1477}
1478
1479/*
1480 * Handle ICMP "packet too big" messages
1481 * i.e. Path MTU discovery
1482 */
1483
1484void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr,
1485 struct net_device *dev, u32 pmtu)
1486{
1487 struct rt6_info *rt, *nrt;
1488 int allfrag = 0;
1489
606a2b48 1490 rt = rt6_lookup(dev->nd_net, daddr, saddr, dev->ifindex, 0);
1da177e4
LT
1491 if (rt == NULL)
1492 return;
1493
1494 if (pmtu >= dst_mtu(&rt->u.dst))
1495 goto out;
1496
1497 if (pmtu < IPV6_MIN_MTU) {
1498 /*
1ab1457c 1499 * According to RFC2460, PMTU is set to the IPv6 Minimum Link
1da177e4
LT
1500 * MTU (1280) and a fragment header should always be included
1501 * after a node receiving Too Big message reporting PMTU is
1502 * less than the IPv6 Minimum Link MTU.
1503 */
1504 pmtu = IPV6_MIN_MTU;
1505 allfrag = 1;
1506 }
1507
1508 /* New mtu received -> path was valid.
1509 They are sent only in response to data packets,
1510 so that this nexthop apparently is reachable. --ANK
1511 */
1512 dst_confirm(&rt->u.dst);
1513
1514 /* Host route. If it is static, it would be better
1515 not to override it, but add new one, so that
1516 when cache entry will expire old pmtu
1517 would return automatically.
1518 */
1519 if (rt->rt6i_flags & RTF_CACHE) {
1520 rt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1521 if (allfrag)
1522 rt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
4990509f 1523 dst_set_expires(&rt->u.dst, init_net.ipv6.sysctl.ip6_rt_mtu_expires);
1da177e4
LT
1524 rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
1525 goto out;
1526 }
1527
1528 /* Network route.
1529 Two cases are possible:
1530 1. It is connected route. Action: COW
1531 2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1532 */
d5315b50 1533 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
a1e78363 1534 nrt = rt6_alloc_cow(rt, daddr, saddr);
d5315b50
YH
1535 else
1536 nrt = rt6_alloc_clone(rt, daddr);
a1e78363 1537
d5315b50 1538 if (nrt) {
a1e78363
YH
1539 nrt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1540 if (allfrag)
1541 nrt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1542
1543 /* According to RFC 1981, detecting PMTU increase shouldn't be
1544 * happened within 5 mins, the recommended timer is 10 mins.
1545 * Here this route expiration time is set to ip6_rt_mtu_expires
1546 * which is 10 mins. After 10 mins the decreased pmtu is expired
1547 * and detecting PMTU increase will be automatically happened.
1548 */
4990509f 1549 dst_set_expires(&nrt->u.dst, init_net.ipv6.sysctl.ip6_rt_mtu_expires);
a1e78363
YH
1550 nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
1551
40e22e8f 1552 ip6_ins_rt(nrt);
1da177e4 1553 }
1da177e4
LT
1554out:
1555 dst_release(&rt->u.dst);
1556}
1557
1558/*
1559 * Misc support functions
1560 */
1561
1562static struct rt6_info * ip6_rt_copy(struct rt6_info *ort)
1563{
1564 struct rt6_info *rt = ip6_dst_alloc();
1565
1566 if (rt) {
1567 rt->u.dst.input = ort->u.dst.input;
1568 rt->u.dst.output = ort->u.dst.output;
1569
1570 memcpy(rt->u.dst.metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32));
22e1e4d8 1571 rt->u.dst.error = ort->u.dst.error;
1da177e4
LT
1572 rt->u.dst.dev = ort->u.dst.dev;
1573 if (rt->u.dst.dev)
1574 dev_hold(rt->u.dst.dev);
1575 rt->rt6i_idev = ort->rt6i_idev;
1576 if (rt->rt6i_idev)
1577 in6_dev_hold(rt->rt6i_idev);
1578 rt->u.dst.lastuse = jiffies;
1579 rt->rt6i_expires = 0;
1580
1581 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
1582 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
1583 rt->rt6i_metric = 0;
1584
1585 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1586#ifdef CONFIG_IPV6_SUBTREES
1587 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1588#endif
c71099ac 1589 rt->rt6i_table = ort->rt6i_table;
1da177e4
LT
1590 }
1591 return rt;
1592}
1593
70ceb4f5 1594#ifdef CONFIG_IPV6_ROUTE_INFO
efa2cea0
DL
1595static struct rt6_info *rt6_get_route_info(struct net *net,
1596 struct in6_addr *prefix, int prefixlen,
70ceb4f5
YH
1597 struct in6_addr *gwaddr, int ifindex)
1598{
1599 struct fib6_node *fn;
1600 struct rt6_info *rt = NULL;
c71099ac
TG
1601 struct fib6_table *table;
1602
efa2cea0 1603 table = fib6_get_table(net, RT6_TABLE_INFO);
c71099ac
TG
1604 if (table == NULL)
1605 return NULL;
70ceb4f5 1606
c71099ac
TG
1607 write_lock_bh(&table->tb6_lock);
1608 fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
70ceb4f5
YH
1609 if (!fn)
1610 goto out;
1611
7cc48263 1612 for (rt = fn->leaf; rt; rt = rt->u.dst.rt6_next) {
70ceb4f5
YH
1613 if (rt->rt6i_dev->ifindex != ifindex)
1614 continue;
1615 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1616 continue;
1617 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1618 continue;
1619 dst_hold(&rt->u.dst);
1620 break;
1621 }
1622out:
c71099ac 1623 write_unlock_bh(&table->tb6_lock);
70ceb4f5
YH
1624 return rt;
1625}
1626
efa2cea0
DL
1627static struct rt6_info *rt6_add_route_info(struct net *net,
1628 struct in6_addr *prefix, int prefixlen,
70ceb4f5
YH
1629 struct in6_addr *gwaddr, int ifindex,
1630 unsigned pref)
1631{
86872cb5
TG
1632 struct fib6_config cfg = {
1633 .fc_table = RT6_TABLE_INFO,
238fc7ea 1634 .fc_metric = IP6_RT_PRIO_USER,
86872cb5
TG
1635 .fc_ifindex = ifindex,
1636 .fc_dst_len = prefixlen,
1637 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
1638 RTF_UP | RTF_PREF(pref),
efa2cea0
DL
1639 .fc_nlinfo.pid = 0,
1640 .fc_nlinfo.nlh = NULL,
1641 .fc_nlinfo.nl_net = net,
86872cb5
TG
1642 };
1643
1644 ipv6_addr_copy(&cfg.fc_dst, prefix);
1645 ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
70ceb4f5 1646
e317da96
YH
1647 /* We should treat it as a default route if prefix length is 0. */
1648 if (!prefixlen)
86872cb5 1649 cfg.fc_flags |= RTF_DEFAULT;
70ceb4f5 1650
86872cb5 1651 ip6_route_add(&cfg);
70ceb4f5 1652
efa2cea0 1653 return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex);
70ceb4f5
YH
1654}
1655#endif
1656
1da177e4 1657struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *dev)
1ab1457c 1658{
1da177e4 1659 struct rt6_info *rt;
c71099ac 1660 struct fib6_table *table;
1da177e4 1661
58f09b78 1662 table = fib6_get_table(&init_net, RT6_TABLE_DFLT);
c71099ac
TG
1663 if (table == NULL)
1664 return NULL;
1da177e4 1665
c71099ac 1666 write_lock_bh(&table->tb6_lock);
7cc48263 1667 for (rt = table->tb6_root.leaf; rt; rt=rt->u.dst.rt6_next) {
1da177e4 1668 if (dev == rt->rt6i_dev &&
045927ff 1669 ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
1da177e4
LT
1670 ipv6_addr_equal(&rt->rt6i_gateway, addr))
1671 break;
1672 }
1673 if (rt)
1674 dst_hold(&rt->u.dst);
c71099ac 1675 write_unlock_bh(&table->tb6_lock);
1da177e4
LT
1676 return rt;
1677}
1678
c7dc89c0
FT
1679EXPORT_SYMBOL(rt6_get_dflt_router);
1680
1da177e4 1681struct rt6_info *rt6_add_dflt_router(struct in6_addr *gwaddr,
ebacaaa0
YH
1682 struct net_device *dev,
1683 unsigned int pref)
1da177e4 1684{
86872cb5
TG
1685 struct fib6_config cfg = {
1686 .fc_table = RT6_TABLE_DFLT,
238fc7ea 1687 .fc_metric = IP6_RT_PRIO_USER,
86872cb5
TG
1688 .fc_ifindex = dev->ifindex,
1689 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
1690 RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
1691 };
1da177e4 1692
86872cb5 1693 ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
1da177e4 1694
86872cb5 1695 ip6_route_add(&cfg);
1da177e4 1696
1da177e4
LT
1697 return rt6_get_dflt_router(gwaddr, dev);
1698}
1699
7b4da532 1700void rt6_purge_dflt_routers(struct net *net)
1da177e4
LT
1701{
1702 struct rt6_info *rt;
c71099ac
TG
1703 struct fib6_table *table;
1704
1705 /* NOTE: Keep consistent with rt6_get_dflt_router */
7b4da532 1706 table = fib6_get_table(net, RT6_TABLE_DFLT);
c71099ac
TG
1707 if (table == NULL)
1708 return;
1da177e4
LT
1709
1710restart:
c71099ac 1711 read_lock_bh(&table->tb6_lock);
7cc48263 1712 for (rt = table->tb6_root.leaf; rt; rt = rt->u.dst.rt6_next) {
1da177e4
LT
1713 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
1714 dst_hold(&rt->u.dst);
c71099ac 1715 read_unlock_bh(&table->tb6_lock);
e0a1ad73 1716 ip6_del_rt(rt);
1da177e4
LT
1717 goto restart;
1718 }
1719 }
c71099ac 1720 read_unlock_bh(&table->tb6_lock);
1da177e4
LT
1721}
1722
86872cb5
TG
1723static void rtmsg_to_fib6_config(struct in6_rtmsg *rtmsg,
1724 struct fib6_config *cfg)
1725{
1726 memset(cfg, 0, sizeof(*cfg));
1727
1728 cfg->fc_table = RT6_TABLE_MAIN;
1729 cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
1730 cfg->fc_metric = rtmsg->rtmsg_metric;
1731 cfg->fc_expires = rtmsg->rtmsg_info;
1732 cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
1733 cfg->fc_src_len = rtmsg->rtmsg_src_len;
1734 cfg->fc_flags = rtmsg->rtmsg_flags;
1735
f1243c2d
BT
1736 cfg->fc_nlinfo.nl_net = &init_net;
1737
86872cb5
TG
1738 ipv6_addr_copy(&cfg->fc_dst, &rtmsg->rtmsg_dst);
1739 ipv6_addr_copy(&cfg->fc_src, &rtmsg->rtmsg_src);
1740 ipv6_addr_copy(&cfg->fc_gateway, &rtmsg->rtmsg_gateway);
1741}
1742
1da177e4
LT
1743int ipv6_route_ioctl(unsigned int cmd, void __user *arg)
1744{
86872cb5 1745 struct fib6_config cfg;
1da177e4
LT
1746 struct in6_rtmsg rtmsg;
1747 int err;
1748
1749 switch(cmd) {
1750 case SIOCADDRT: /* Add a route */
1751 case SIOCDELRT: /* Delete a route */
1752 if (!capable(CAP_NET_ADMIN))
1753 return -EPERM;
1754 err = copy_from_user(&rtmsg, arg,
1755 sizeof(struct in6_rtmsg));
1756 if (err)
1757 return -EFAULT;
86872cb5
TG
1758
1759 rtmsg_to_fib6_config(&rtmsg, &cfg);
1760
1da177e4
LT
1761 rtnl_lock();
1762 switch (cmd) {
1763 case SIOCADDRT:
86872cb5 1764 err = ip6_route_add(&cfg);
1da177e4
LT
1765 break;
1766 case SIOCDELRT:
86872cb5 1767 err = ip6_route_del(&cfg);
1da177e4
LT
1768 break;
1769 default:
1770 err = -EINVAL;
1771 }
1772 rtnl_unlock();
1773
1774 return err;
3ff50b79 1775 }
1da177e4
LT
1776
1777 return -EINVAL;
1778}
1779
1780/*
1781 * Drop the packet on the floor
1782 */
1783
50eb431d 1784static int ip6_pkt_drop(struct sk_buff *skb, int code, int ipstats_mib_noroutes)
1da177e4 1785{
612f09e8
YH
1786 int type;
1787 switch (ipstats_mib_noroutes) {
1788 case IPSTATS_MIB_INNOROUTES:
0660e03f 1789 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
612f09e8
YH
1790 if (type == IPV6_ADDR_ANY || type == IPV6_ADDR_RESERVED) {
1791 IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_INADDRERRORS);
1792 break;
1793 }
1794 /* FALLTHROUGH */
1795 case IPSTATS_MIB_OUTNOROUTES:
1796 IP6_INC_STATS(ip6_dst_idev(skb->dst), ipstats_mib_noroutes);
1797 break;
1798 }
9ce8ade0 1799 icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0, skb->dev);
1da177e4
LT
1800 kfree_skb(skb);
1801 return 0;
1802}
1803
9ce8ade0
TG
1804static int ip6_pkt_discard(struct sk_buff *skb)
1805{
612f09e8 1806 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
9ce8ade0
TG
1807}
1808
20380731 1809static int ip6_pkt_discard_out(struct sk_buff *skb)
1da177e4
LT
1810{
1811 skb->dev = skb->dst->dev;
612f09e8 1812 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
1da177e4
LT
1813}
1814
6723ab54
DM
1815#ifdef CONFIG_IPV6_MULTIPLE_TABLES
1816
9ce8ade0
TG
1817static int ip6_pkt_prohibit(struct sk_buff *skb)
1818{
612f09e8 1819 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
9ce8ade0
TG
1820}
1821
1822static int ip6_pkt_prohibit_out(struct sk_buff *skb)
1823{
1824 skb->dev = skb->dst->dev;
612f09e8 1825 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
9ce8ade0
TG
1826}
1827
6723ab54
DM
1828#endif
1829
1da177e4
LT
1830/*
1831 * Allocate a dst for local (unicast / anycast) address.
1832 */
1833
1834struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
1835 const struct in6_addr *addr,
1836 int anycast)
1837{
1838 struct rt6_info *rt = ip6_dst_alloc();
1839
1840 if (rt == NULL)
1841 return ERR_PTR(-ENOMEM);
1842
2774c7ab 1843 dev_hold(init_net.loopback_dev);
1da177e4
LT
1844 in6_dev_hold(idev);
1845
1846 rt->u.dst.flags = DST_HOST;
1847 rt->u.dst.input = ip6_input;
1848 rt->u.dst.output = ip6_output;
2774c7ab 1849 rt->rt6i_dev = init_net.loopback_dev;
1da177e4
LT
1850 rt->rt6i_idev = idev;
1851 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
1852 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
1853 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
1854 rt->u.dst.obsolete = -1;
1855
1856 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
58c4fb86
YH
1857 if (anycast)
1858 rt->rt6i_flags |= RTF_ANYCAST;
1859 else
1da177e4
LT
1860 rt->rt6i_flags |= RTF_LOCAL;
1861 rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
1862 if (rt->rt6i_nexthop == NULL) {
40aa7b90 1863 dst_free(&rt->u.dst);
1da177e4
LT
1864 return ERR_PTR(-ENOMEM);
1865 }
1866
1867 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
1868 rt->rt6i_dst.plen = 128;
58f09b78 1869 rt->rt6i_table = fib6_get_table(&init_net, RT6_TABLE_LOCAL);
1da177e4
LT
1870
1871 atomic_set(&rt->u.dst.__refcnt, 1);
1872
1873 return rt;
1874}
1875
1876static int fib6_ifdown(struct rt6_info *rt, void *arg)
1877{
1878 if (((void*)rt->rt6i_dev == arg || arg == NULL) &&
1879 rt != &ip6_null_entry) {
1880 RT6_TRACE("deleted by ifdown %p\n", rt);
1881 return -1;
1882 }
1883 return 0;
1884}
1885
f3db4851 1886void rt6_ifdown(struct net *net, struct net_device *dev)
1da177e4 1887{
f3db4851 1888 fib6_clean_all(net, fib6_ifdown, 0, dev);
1da177e4
LT
1889}
1890
1891struct rt6_mtu_change_arg
1892{
1893 struct net_device *dev;
1894 unsigned mtu;
1895};
1896
1897static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
1898{
1899 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
1900 struct inet6_dev *idev;
1901
1902 /* In IPv6 pmtu discovery is not optional,
1903 so that RTAX_MTU lock cannot disable it.
1904 We still use this lock to block changes
1905 caused by addrconf/ndisc.
1906 */
1907
1908 idev = __in6_dev_get(arg->dev);
1909 if (idev == NULL)
1910 return 0;
1911
1912 /* For administrative MTU increase, there is no way to discover
1913 IPv6 PMTU increase, so PMTU increase should be updated here.
1914 Since RFC 1981 doesn't include administrative MTU increase
1915 update PMTU increase is a MUST. (i.e. jumbo frame)
1916 */
1917 /*
1918 If new MTU is less than route PMTU, this new MTU will be the
1919 lowest MTU in the path, update the route PMTU to reflect PMTU
1920 decreases; if new MTU is greater than route PMTU, and the
1921 old MTU is the lowest MTU in the path, update the route PMTU
1922 to reflect the increase. In this case if the other nodes' MTU
1923 also have the lowest MTU, TOO BIG MESSAGE will be lead to
1924 PMTU discouvery.
1925 */
1926 if (rt->rt6i_dev == arg->dev &&
1927 !dst_metric_locked(&rt->u.dst, RTAX_MTU) &&
23717795 1928 (dst_mtu(&rt->u.dst) >= arg->mtu ||
1ab1457c 1929 (dst_mtu(&rt->u.dst) < arg->mtu &&
566cfd8f 1930 dst_mtu(&rt->u.dst) == idev->cnf.mtu6))) {
1da177e4 1931 rt->u.dst.metrics[RTAX_MTU-1] = arg->mtu;
566cfd8f
SA
1932 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(arg->mtu);
1933 }
1da177e4
LT
1934 return 0;
1935}
1936
1937void rt6_mtu_change(struct net_device *dev, unsigned mtu)
1938{
c71099ac
TG
1939 struct rt6_mtu_change_arg arg = {
1940 .dev = dev,
1941 .mtu = mtu,
1942 };
1da177e4 1943
f3db4851 1944 fib6_clean_all(dev->nd_net, rt6_mtu_change_route, 0, &arg);
1da177e4
LT
1945}
1946
ef7c79ed 1947static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
5176f91e 1948 [RTA_GATEWAY] = { .len = sizeof(struct in6_addr) },
86872cb5 1949 [RTA_OIF] = { .type = NLA_U32 },
ab364a6f 1950 [RTA_IIF] = { .type = NLA_U32 },
86872cb5
TG
1951 [RTA_PRIORITY] = { .type = NLA_U32 },
1952 [RTA_METRICS] = { .type = NLA_NESTED },
1953};
1954
1955static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
1956 struct fib6_config *cfg)
1da177e4 1957{
86872cb5
TG
1958 struct rtmsg *rtm;
1959 struct nlattr *tb[RTA_MAX+1];
1960 int err;
1da177e4 1961
86872cb5
TG
1962 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
1963 if (err < 0)
1964 goto errout;
1da177e4 1965
86872cb5
TG
1966 err = -EINVAL;
1967 rtm = nlmsg_data(nlh);
1968 memset(cfg, 0, sizeof(*cfg));
1969
1970 cfg->fc_table = rtm->rtm_table;
1971 cfg->fc_dst_len = rtm->rtm_dst_len;
1972 cfg->fc_src_len = rtm->rtm_src_len;
1973 cfg->fc_flags = RTF_UP;
1974 cfg->fc_protocol = rtm->rtm_protocol;
1975
1976 if (rtm->rtm_type == RTN_UNREACHABLE)
1977 cfg->fc_flags |= RTF_REJECT;
1978
1979 cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
1980 cfg->fc_nlinfo.nlh = nlh;
2216b483 1981 cfg->fc_nlinfo.nl_net = skb->sk->sk_net;
86872cb5
TG
1982
1983 if (tb[RTA_GATEWAY]) {
1984 nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
1985 cfg->fc_flags |= RTF_GATEWAY;
1da177e4 1986 }
86872cb5
TG
1987
1988 if (tb[RTA_DST]) {
1989 int plen = (rtm->rtm_dst_len + 7) >> 3;
1990
1991 if (nla_len(tb[RTA_DST]) < plen)
1992 goto errout;
1993
1994 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
1da177e4 1995 }
86872cb5
TG
1996
1997 if (tb[RTA_SRC]) {
1998 int plen = (rtm->rtm_src_len + 7) >> 3;
1999
2000 if (nla_len(tb[RTA_SRC]) < plen)
2001 goto errout;
2002
2003 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
1da177e4 2004 }
86872cb5
TG
2005
2006 if (tb[RTA_OIF])
2007 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2008
2009 if (tb[RTA_PRIORITY])
2010 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2011
2012 if (tb[RTA_METRICS]) {
2013 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2014 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
1da177e4 2015 }
86872cb5
TG
2016
2017 if (tb[RTA_TABLE])
2018 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2019
2020 err = 0;
2021errout:
2022 return err;
1da177e4
LT
2023}
2024
c127ea2c 2025static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1da177e4 2026{
b854272b 2027 struct net *net = skb->sk->sk_net;
86872cb5
TG
2028 struct fib6_config cfg;
2029 int err;
1da177e4 2030
b854272b
DL
2031 if (net != &init_net)
2032 return -EINVAL;
2033
86872cb5
TG
2034 err = rtm_to_fib6_config(skb, nlh, &cfg);
2035 if (err < 0)
2036 return err;
2037
2038 return ip6_route_del(&cfg);
1da177e4
LT
2039}
2040
c127ea2c 2041static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1da177e4 2042{
b854272b 2043 struct net *net = skb->sk->sk_net;
86872cb5
TG
2044 struct fib6_config cfg;
2045 int err;
1da177e4 2046
b854272b
DL
2047 if (net != &init_net)
2048 return -EINVAL;
2049
86872cb5
TG
2050 err = rtm_to_fib6_config(skb, nlh, &cfg);
2051 if (err < 0)
2052 return err;
2053
2054 return ip6_route_add(&cfg);
1da177e4
LT
2055}
2056
339bf98f
TG
2057static inline size_t rt6_nlmsg_size(void)
2058{
2059 return NLMSG_ALIGN(sizeof(struct rtmsg))
2060 + nla_total_size(16) /* RTA_SRC */
2061 + nla_total_size(16) /* RTA_DST */
2062 + nla_total_size(16) /* RTA_GATEWAY */
2063 + nla_total_size(16) /* RTA_PREFSRC */
2064 + nla_total_size(4) /* RTA_TABLE */
2065 + nla_total_size(4) /* RTA_IIF */
2066 + nla_total_size(4) /* RTA_OIF */
2067 + nla_total_size(4) /* RTA_PRIORITY */
6a2b9ce0 2068 + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
339bf98f
TG
2069 + nla_total_size(sizeof(struct rta_cacheinfo));
2070}
2071
1da177e4 2072static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt,
0d51aa80
JHS
2073 struct in6_addr *dst, struct in6_addr *src,
2074 int iif, int type, u32 pid, u32 seq,
2075 int prefix, unsigned int flags)
1da177e4
LT
2076{
2077 struct rtmsg *rtm;
2d7202bf 2078 struct nlmsghdr *nlh;
e3703b3d 2079 long expires;
9e762a4a 2080 u32 table;
1da177e4
LT
2081
2082 if (prefix) { /* user wants prefix routes only */
2083 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
2084 /* success since this is not a prefix route */
2085 return 1;
2086 }
2087 }
2088
2d7202bf
TG
2089 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*rtm), flags);
2090 if (nlh == NULL)
26932566 2091 return -EMSGSIZE;
2d7202bf
TG
2092
2093 rtm = nlmsg_data(nlh);
1da177e4
LT
2094 rtm->rtm_family = AF_INET6;
2095 rtm->rtm_dst_len = rt->rt6i_dst.plen;
2096 rtm->rtm_src_len = rt->rt6i_src.plen;
2097 rtm->rtm_tos = 0;
c71099ac 2098 if (rt->rt6i_table)
9e762a4a 2099 table = rt->rt6i_table->tb6_id;
c71099ac 2100 else
9e762a4a
PM
2101 table = RT6_TABLE_UNSPEC;
2102 rtm->rtm_table = table;
2d7202bf 2103 NLA_PUT_U32(skb, RTA_TABLE, table);
1da177e4
LT
2104 if (rt->rt6i_flags&RTF_REJECT)
2105 rtm->rtm_type = RTN_UNREACHABLE;
2106 else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK))
2107 rtm->rtm_type = RTN_LOCAL;
2108 else
2109 rtm->rtm_type = RTN_UNICAST;
2110 rtm->rtm_flags = 0;
2111 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2112 rtm->rtm_protocol = rt->rt6i_protocol;
2113 if (rt->rt6i_flags&RTF_DYNAMIC)
2114 rtm->rtm_protocol = RTPROT_REDIRECT;
2115 else if (rt->rt6i_flags & RTF_ADDRCONF)
2116 rtm->rtm_protocol = RTPROT_KERNEL;
2117 else if (rt->rt6i_flags&RTF_DEFAULT)
2118 rtm->rtm_protocol = RTPROT_RA;
2119
2120 if (rt->rt6i_flags&RTF_CACHE)
2121 rtm->rtm_flags |= RTM_F_CLONED;
2122
2123 if (dst) {
2d7202bf 2124 NLA_PUT(skb, RTA_DST, 16, dst);
1ab1457c 2125 rtm->rtm_dst_len = 128;
1da177e4 2126 } else if (rtm->rtm_dst_len)
2d7202bf 2127 NLA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr);
1da177e4
LT
2128#ifdef CONFIG_IPV6_SUBTREES
2129 if (src) {
2d7202bf 2130 NLA_PUT(skb, RTA_SRC, 16, src);
1ab1457c 2131 rtm->rtm_src_len = 128;
1da177e4 2132 } else if (rtm->rtm_src_len)
2d7202bf 2133 NLA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr);
1da177e4
LT
2134#endif
2135 if (iif)
2d7202bf 2136 NLA_PUT_U32(skb, RTA_IIF, iif);
1da177e4
LT
2137 else if (dst) {
2138 struct in6_addr saddr_buf;
5e5f3f0f
YH
2139 if (ipv6_dev_get_saddr(ip6_dst_idev(&rt->u.dst)->dev,
2140 dst, &saddr_buf) == 0)
2d7202bf 2141 NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
1da177e4 2142 }
2d7202bf 2143
1da177e4 2144 if (rtnetlink_put_metrics(skb, rt->u.dst.metrics) < 0)
2d7202bf
TG
2145 goto nla_put_failure;
2146
1da177e4 2147 if (rt->u.dst.neighbour)
2d7202bf
TG
2148 NLA_PUT(skb, RTA_GATEWAY, 16, &rt->u.dst.neighbour->primary_key);
2149
1da177e4 2150 if (rt->u.dst.dev)
2d7202bf
TG
2151 NLA_PUT_U32(skb, RTA_OIF, rt->rt6i_dev->ifindex);
2152
2153 NLA_PUT_U32(skb, RTA_PRIORITY, rt->rt6i_metric);
e3703b3d
TG
2154
2155 expires = rt->rt6i_expires ? rt->rt6i_expires - jiffies : 0;
2156 if (rtnl_put_cacheinfo(skb, &rt->u.dst, 0, 0, 0,
2157 expires, rt->u.dst.error) < 0)
2158 goto nla_put_failure;
2d7202bf
TG
2159
2160 return nlmsg_end(skb, nlh);
2161
2162nla_put_failure:
26932566
PM
2163 nlmsg_cancel(skb, nlh);
2164 return -EMSGSIZE;
1da177e4
LT
2165}
2166
1b43af54 2167int rt6_dump_route(struct rt6_info *rt, void *p_arg)
1da177e4
LT
2168{
2169 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
2170 int prefix;
2171
2d7202bf
TG
2172 if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
2173 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
1da177e4
LT
2174 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
2175 } else
2176 prefix = 0;
2177
2178 return rt6_fill_node(arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
2179 NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
0d51aa80 2180 prefix, NLM_F_MULTI);
1da177e4
LT
2181}
2182
c127ea2c 2183static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
1da177e4 2184{
b854272b 2185 struct net *net = in_skb->sk->sk_net;
ab364a6f
TG
2186 struct nlattr *tb[RTA_MAX+1];
2187 struct rt6_info *rt;
1da177e4 2188 struct sk_buff *skb;
ab364a6f 2189 struct rtmsg *rtm;
1da177e4 2190 struct flowi fl;
ab364a6f 2191 int err, iif = 0;
1da177e4 2192
b854272b
DL
2193 if (net != &init_net)
2194 return -EINVAL;
2195
ab364a6f
TG
2196 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2197 if (err < 0)
2198 goto errout;
1da177e4 2199
ab364a6f 2200 err = -EINVAL;
1da177e4 2201 memset(&fl, 0, sizeof(fl));
1da177e4 2202
ab364a6f
TG
2203 if (tb[RTA_SRC]) {
2204 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2205 goto errout;
2206
2207 ipv6_addr_copy(&fl.fl6_src, nla_data(tb[RTA_SRC]));
2208 }
2209
2210 if (tb[RTA_DST]) {
2211 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2212 goto errout;
2213
2214 ipv6_addr_copy(&fl.fl6_dst, nla_data(tb[RTA_DST]));
2215 }
2216
2217 if (tb[RTA_IIF])
2218 iif = nla_get_u32(tb[RTA_IIF]);
2219
2220 if (tb[RTA_OIF])
2221 fl.oif = nla_get_u32(tb[RTA_OIF]);
1da177e4
LT
2222
2223 if (iif) {
2224 struct net_device *dev;
881d966b 2225 dev = __dev_get_by_index(&init_net, iif);
1da177e4
LT
2226 if (!dev) {
2227 err = -ENODEV;
ab364a6f 2228 goto errout;
1da177e4
LT
2229 }
2230 }
2231
ab364a6f
TG
2232 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
2233 if (skb == NULL) {
2234 err = -ENOBUFS;
2235 goto errout;
2236 }
1da177e4 2237
ab364a6f
TG
2238 /* Reserve room for dummy headers, this skb can pass
2239 through good chunk of routing engine.
2240 */
459a98ed 2241 skb_reset_mac_header(skb);
ab364a6f 2242 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
1da177e4 2243
ab364a6f 2244 rt = (struct rt6_info*) ip6_route_output(NULL, &fl);
1da177e4
LT
2245 skb->dst = &rt->u.dst;
2246
ab364a6f 2247 err = rt6_fill_node(skb, rt, &fl.fl6_dst, &fl.fl6_src, iif,
1da177e4 2248 RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
0d51aa80 2249 nlh->nlmsg_seq, 0, 0);
1da177e4 2250 if (err < 0) {
ab364a6f
TG
2251 kfree_skb(skb);
2252 goto errout;
1da177e4
LT
2253 }
2254
97c53cac 2255 err = rtnl_unicast(skb, &init_net, NETLINK_CB(in_skb).pid);
ab364a6f 2256errout:
1da177e4 2257 return err;
1da177e4
LT
2258}
2259
86872cb5 2260void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
1da177e4
LT
2261{
2262 struct sk_buff *skb;
528c4ceb
DL
2263 u32 seq;
2264 int err;
2265
2266 err = -ENOBUFS;
2267 seq = info->nlh != NULL ? info->nlh->nlmsg_seq : 0;
86872cb5 2268
339bf98f 2269 skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
21713ebc
TG
2270 if (skb == NULL)
2271 goto errout;
2272
528c4ceb
DL
2273 err = rt6_fill_node(skb, rt, NULL, NULL, 0,
2274 event, info->pid, seq, 0, 0);
26932566
PM
2275 if (err < 0) {
2276 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
2277 WARN_ON(err == -EMSGSIZE);
2278 kfree_skb(skb);
2279 goto errout;
2280 }
528c4ceb
DL
2281 err = rtnl_notify(skb, &init_net, info->pid,
2282 RTNLGRP_IPV6_ROUTE, info->nlh, gfp_any());
21713ebc
TG
2283errout:
2284 if (err < 0)
97c53cac 2285 rtnl_set_sk_err(&init_net, RTNLGRP_IPV6_ROUTE, err);
1da177e4
LT
2286}
2287
2288/*
2289 * /proc
2290 */
2291
2292#ifdef CONFIG_PROC_FS
2293
2294#define RT6_INFO_LEN (32 + 4 + 32 + 4 + 32 + 40 + 5 + 1)
2295
2296struct rt6_proc_arg
2297{
2298 char *buffer;
2299 int offset;
2300 int length;
2301 int skip;
2302 int len;
2303};
2304
2305static int rt6_info_route(struct rt6_info *rt, void *p_arg)
2306{
33120b30 2307 struct seq_file *m = p_arg;
1da177e4 2308
33120b30
AD
2309 seq_printf(m, NIP6_SEQFMT " %02x ", NIP6(rt->rt6i_dst.addr),
2310 rt->rt6i_dst.plen);
1da177e4
LT
2311
2312#ifdef CONFIG_IPV6_SUBTREES
33120b30
AD
2313 seq_printf(m, NIP6_SEQFMT " %02x ", NIP6(rt->rt6i_src.addr),
2314 rt->rt6i_src.plen);
1da177e4 2315#else
33120b30 2316 seq_puts(m, "00000000000000000000000000000000 00 ");
1da177e4
LT
2317#endif
2318
2319 if (rt->rt6i_nexthop) {
33120b30
AD
2320 seq_printf(m, NIP6_SEQFMT,
2321 NIP6(*((struct in6_addr *)rt->rt6i_nexthop->primary_key)));
1da177e4 2322 } else {
33120b30 2323 seq_puts(m, "00000000000000000000000000000000");
1da177e4 2324 }
33120b30
AD
2325 seq_printf(m, " %08x %08x %08x %08x %8s\n",
2326 rt->rt6i_metric, atomic_read(&rt->u.dst.__refcnt),
2327 rt->u.dst.__use, rt->rt6i_flags,
2328 rt->rt6i_dev ? rt->rt6i_dev->name : "");
1da177e4
LT
2329 return 0;
2330}
2331
33120b30 2332static int ipv6_route_show(struct seq_file *m, void *v)
1da177e4 2333{
f3db4851
DL
2334 struct net *net = (struct net *)m->private;
2335 fib6_clean_all(net, rt6_info_route, 0, m);
33120b30
AD
2336 return 0;
2337}
1da177e4 2338
33120b30
AD
2339static int ipv6_route_open(struct inode *inode, struct file *file)
2340{
f3db4851
DL
2341 struct net *net = get_proc_net(inode);
2342 if (!net)
2343 return -ENXIO;
2344 return single_open(file, ipv6_route_show, net);
2345}
2346
2347static int ipv6_route_release(struct inode *inode, struct file *file)
2348{
2349 struct seq_file *seq = file->private_data;
2350 struct net *net = seq->private;
2351 put_net(net);
2352 return single_release(inode, file);
1da177e4
LT
2353}
2354
33120b30
AD
2355static const struct file_operations ipv6_route_proc_fops = {
2356 .owner = THIS_MODULE,
2357 .open = ipv6_route_open,
2358 .read = seq_read,
2359 .llseek = seq_lseek,
f3db4851 2360 .release = ipv6_route_release,
33120b30
AD
2361};
2362
1da177e4
LT
2363static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2364{
69ddb805 2365 struct net *net = (struct net *)seq->private;
1da177e4 2366 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
69ddb805
DL
2367 net->ipv6.rt6_stats->fib_nodes,
2368 net->ipv6.rt6_stats->fib_route_nodes,
2369 net->ipv6.rt6_stats->fib_rt_alloc,
2370 net->ipv6.rt6_stats->fib_rt_entries,
2371 net->ipv6.rt6_stats->fib_rt_cache,
c572872f 2372 atomic_read(&ip6_dst_ops.entries),
69ddb805 2373 net->ipv6.rt6_stats->fib_discarded_routes);
1da177e4
LT
2374
2375 return 0;
2376}
2377
2378static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2379{
69ddb805
DL
2380 struct net *net = get_proc_net(inode);
2381 return single_open(file, rt6_stats_seq_show, net);
2382}
2383
2384static int rt6_stats_seq_release(struct inode *inode, struct file *file)
2385{
2386 struct seq_file *seq = file->private_data;
2387 struct net *net = (struct net *)seq->private;
2388 put_net(net);
2389 return single_release(inode, file);
1da177e4
LT
2390}
2391
9a32144e 2392static const struct file_operations rt6_stats_seq_fops = {
1da177e4
LT
2393 .owner = THIS_MODULE,
2394 .open = rt6_stats_seq_open,
2395 .read = seq_read,
2396 .llseek = seq_lseek,
69ddb805 2397 .release = rt6_stats_seq_release,
1da177e4
LT
2398};
2399#endif /* CONFIG_PROC_FS */
2400
2401#ifdef CONFIG_SYSCTL
2402
1da177e4
LT
2403static
2404int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write, struct file * filp,
2405 void __user *buffer, size_t *lenp, loff_t *ppos)
2406{
5b7c931d
DL
2407 struct net *net = current->nsproxy->net_ns;
2408 int delay = net->ipv6.sysctl.flush_delay;
1da177e4
LT
2409 if (write) {
2410 proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
5b7c931d 2411 fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay, net);
1da177e4
LT
2412 return 0;
2413 } else
2414 return -EINVAL;
2415}
2416
760f2d01 2417ctl_table ipv6_route_table_template[] = {
1ab1457c 2418 {
1da177e4 2419 .procname = "flush",
4990509f 2420 .data = &init_net.ipv6.sysctl.flush_delay,
1da177e4 2421 .maxlen = sizeof(int),
89c8b3a1 2422 .mode = 0200,
1ab1457c 2423 .proc_handler = &ipv6_sysctl_rtcache_flush
1da177e4
LT
2424 },
2425 {
2426 .ctl_name = NET_IPV6_ROUTE_GC_THRESH,
2427 .procname = "gc_thresh",
1ab1457c 2428 .data = &ip6_dst_ops.gc_thresh,
1da177e4
LT
2429 .maxlen = sizeof(int),
2430 .mode = 0644,
1ab1457c 2431 .proc_handler = &proc_dointvec,
1da177e4
LT
2432 },
2433 {
2434 .ctl_name = NET_IPV6_ROUTE_MAX_SIZE,
2435 .procname = "max_size",
4990509f 2436 .data = &init_net.ipv6.sysctl.ip6_rt_max_size,
1da177e4
LT
2437 .maxlen = sizeof(int),
2438 .mode = 0644,
1ab1457c 2439 .proc_handler = &proc_dointvec,
1da177e4
LT
2440 },
2441 {
2442 .ctl_name = NET_IPV6_ROUTE_GC_MIN_INTERVAL,
2443 .procname = "gc_min_interval",
4990509f 2444 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
1da177e4
LT
2445 .maxlen = sizeof(int),
2446 .mode = 0644,
1ab1457c 2447 .proc_handler = &proc_dointvec_jiffies,
1da177e4
LT
2448 .strategy = &sysctl_jiffies,
2449 },
2450 {
2451 .ctl_name = NET_IPV6_ROUTE_GC_TIMEOUT,
2452 .procname = "gc_timeout",
4990509f 2453 .data = &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
1da177e4
LT
2454 .maxlen = sizeof(int),
2455 .mode = 0644,
1ab1457c 2456 .proc_handler = &proc_dointvec_jiffies,
1da177e4
LT
2457 .strategy = &sysctl_jiffies,
2458 },
2459 {
2460 .ctl_name = NET_IPV6_ROUTE_GC_INTERVAL,
2461 .procname = "gc_interval",
4990509f 2462 .data = &init_net.ipv6.sysctl.ip6_rt_gc_interval,
1da177e4
LT
2463 .maxlen = sizeof(int),
2464 .mode = 0644,
1ab1457c 2465 .proc_handler = &proc_dointvec_jiffies,
1da177e4
LT
2466 .strategy = &sysctl_jiffies,
2467 },
2468 {
2469 .ctl_name = NET_IPV6_ROUTE_GC_ELASTICITY,
2470 .procname = "gc_elasticity",
4990509f 2471 .data = &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
1da177e4
LT
2472 .maxlen = sizeof(int),
2473 .mode = 0644,
1ab1457c 2474 .proc_handler = &proc_dointvec_jiffies,
1da177e4
LT
2475 .strategy = &sysctl_jiffies,
2476 },
2477 {
2478 .ctl_name = NET_IPV6_ROUTE_MTU_EXPIRES,
2479 .procname = "mtu_expires",
4990509f 2480 .data = &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
1da177e4
LT
2481 .maxlen = sizeof(int),
2482 .mode = 0644,
1ab1457c 2483 .proc_handler = &proc_dointvec_jiffies,
1da177e4
LT
2484 .strategy = &sysctl_jiffies,
2485 },
2486 {
2487 .ctl_name = NET_IPV6_ROUTE_MIN_ADVMSS,
2488 .procname = "min_adv_mss",
4990509f 2489 .data = &init_net.ipv6.sysctl.ip6_rt_min_advmss,
1da177e4
LT
2490 .maxlen = sizeof(int),
2491 .mode = 0644,
1ab1457c 2492 .proc_handler = &proc_dointvec_jiffies,
1da177e4
LT
2493 .strategy = &sysctl_jiffies,
2494 },
2495 {
2496 .ctl_name = NET_IPV6_ROUTE_GC_MIN_INTERVAL_MS,
2497 .procname = "gc_min_interval_ms",
4990509f 2498 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
1da177e4
LT
2499 .maxlen = sizeof(int),
2500 .mode = 0644,
1ab1457c 2501 .proc_handler = &proc_dointvec_ms_jiffies,
1da177e4
LT
2502 .strategy = &sysctl_ms_jiffies,
2503 },
2504 { .ctl_name = 0 }
2505};
2506
760f2d01
DL
2507struct ctl_table *ipv6_route_sysctl_init(struct net *net)
2508{
2509 struct ctl_table *table;
2510
2511 table = kmemdup(ipv6_route_table_template,
2512 sizeof(ipv6_route_table_template),
2513 GFP_KERNEL);
5ee09105
YH
2514
2515 if (table) {
2516 table[0].data = &net->ipv6.sysctl.flush_delay;
2517 /* table[1].data will be handled when we have
2518 routes per namespace */
2519 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
2520 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2521 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
2522 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
2523 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
2524 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
2525 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
2526 }
2527
760f2d01
DL
2528 return table;
2529}
1da177e4
LT
2530#endif
2531
cdb18761
DL
2532static int ip6_route_net_init(struct net *net)
2533{
2534#ifdef CONFIG_PROC_FS
2535 proc_net_fops_create(net, "ipv6_route", 0, &ipv6_route_proc_fops);
2536 proc_net_fops_create(net, "rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2537#endif
2538 return 0;
2539}
2540
2541static void ip6_route_net_exit(struct net *net)
2542{
2543#ifdef CONFIG_PROC_FS
2544 proc_net_remove(net, "ipv6_route");
2545 proc_net_remove(net, "rt6_stats");
2546#endif
2547}
2548
2549static struct pernet_operations ip6_route_net_ops = {
2550 .init = ip6_route_net_init,
2551 .exit = ip6_route_net_exit,
2552};
2553
433d49c3 2554int __init ip6_route_init(void)
1da177e4 2555{
433d49c3
DL
2556 int ret;
2557
e5d679f3
AD
2558 ip6_dst_ops.kmem_cachep =
2559 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
f845ab6b
DL
2560 SLAB_HWCACHE_ALIGN, NULL);
2561 if (!ip6_dst_ops.kmem_cachep)
2562 return -ENOMEM;
2563
14e50e57
DM
2564 ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops.kmem_cachep;
2565
433d49c3
DL
2566 ret = fib6_init();
2567 if (ret)
2568 goto out_kmem_cache;
2569
433d49c3
DL
2570 ret = xfrm6_init();
2571 if (ret)
cdb18761 2572 goto out_fib6_init;
c35b7e72 2573
433d49c3
DL
2574 ret = fib6_rules_init();
2575 if (ret)
2576 goto xfrm6_init;
7e5449c2 2577
433d49c3
DL
2578 ret = -ENOBUFS;
2579 if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL) ||
2580 __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL) ||
2581 __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL))
2582 goto fib6_rules_init;
c127ea2c 2583
cdb18761
DL
2584 ret = register_pernet_subsys(&ip6_route_net_ops);
2585 if (ret)
2586 goto fib6_rules_init;
433d49c3
DL
2587out:
2588 return ret;
2589
2590fib6_rules_init:
433d49c3
DL
2591 fib6_rules_cleanup();
2592xfrm6_init:
433d49c3 2593 xfrm6_fini();
433d49c3 2594out_fib6_init:
f3db4851 2595 rt6_ifdown(&init_net, NULL);
433d49c3
DL
2596 fib6_gc_cleanup();
2597out_kmem_cache:
2598 kmem_cache_destroy(ip6_dst_ops.kmem_cachep);
2599 goto out;
1da177e4
LT
2600}
2601
2602void ip6_route_cleanup(void)
2603{
cdb18761 2604 unregister_pernet_subsys(&ip6_route_net_ops);
101367c2 2605 fib6_rules_cleanup();
1da177e4 2606 xfrm6_fini();
f3db4851 2607 rt6_ifdown(&init_net, NULL);
1da177e4
LT
2608 fib6_gc_cleanup();
2609 kmem_cache_destroy(ip6_dst_ops.kmem_cachep);
2610}