]> bbs.cooldavid.org Git - net-next-2.6.git/blame - net/ipv6/route.c
[NETNS][IPV6] ip6_fib - dynamically allocate gc-timer
[net-next-2.6.git] / net / ipv6 / route.c
CommitLineData
1da177e4
LT
1/*
2 * Linux INET6 implementation
3 * FIB front-end.
4 *
5 * Authors:
1ab1457c 6 * Pedro Roque <roque@di.fc.ul.pt>
1da177e4
LT
7 *
8 * $Id: route.c,v 1.56 2001/10/31 21:55:55 davem Exp $
9 *
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License
12 * as published by the Free Software Foundation; either version
13 * 2 of the License, or (at your option) any later version.
14 */
15
16/* Changes:
17 *
18 * YOSHIFUJI Hideaki @USAGI
19 * reworked default router selection.
20 * - respect outgoing interface
21 * - select from (probably) reachable routers (i.e.
22 * routers in REACHABLE, STALE, DELAY or PROBE states).
23 * - always select the same router if it is (probably)
24 * reachable. otherwise, round-robin the list.
c0bece9f
YH
25 * Ville Nuorvala
26 * Fixed routing subtrees.
1da177e4
LT
27 */
28
4fc268d2 29#include <linux/capability.h>
1da177e4
LT
30#include <linux/errno.h>
31#include <linux/types.h>
32#include <linux/times.h>
33#include <linux/socket.h>
34#include <linux/sockios.h>
35#include <linux/net.h>
36#include <linux/route.h>
37#include <linux/netdevice.h>
38#include <linux/in6.h>
39#include <linux/init.h>
1da177e4 40#include <linux/if_arp.h>
1da177e4
LT
41#include <linux/proc_fs.h>
42#include <linux/seq_file.h>
5b7c931d 43#include <linux/nsproxy.h>
457c4cbc 44#include <net/net_namespace.h>
1da177e4
LT
45#include <net/snmp.h>
46#include <net/ipv6.h>
47#include <net/ip6_fib.h>
48#include <net/ip6_route.h>
49#include <net/ndisc.h>
50#include <net/addrconf.h>
51#include <net/tcp.h>
52#include <linux/rtnetlink.h>
53#include <net/dst.h>
54#include <net/xfrm.h>
8d71740c 55#include <net/netevent.h>
21713ebc 56#include <net/netlink.h>
1da177e4
LT
57
58#include <asm/uaccess.h>
59
60#ifdef CONFIG_SYSCTL
61#include <linux/sysctl.h>
62#endif
63
64/* Set to 3 to get tracing. */
65#define RT6_DEBUG 2
66
67#if RT6_DEBUG >= 3
68#define RDBG(x) printk x
69#define RT6_TRACE(x...) printk(KERN_DEBUG x)
70#else
71#define RDBG(x)
72#define RT6_TRACE(x...) do { ; } while (0)
73#endif
74
519fbd87 75#define CLONE_OFFLINK_ROUTE 0
1da177e4 76
1da177e4
LT
77static struct rt6_info * ip6_rt_copy(struct rt6_info *ort);
78static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
79static struct dst_entry *ip6_negative_advice(struct dst_entry *);
80static void ip6_dst_destroy(struct dst_entry *);
81static void ip6_dst_ifdown(struct dst_entry *,
82 struct net_device *dev, int how);
569d3645 83static int ip6_dst_gc(struct dst_ops *ops);
1da177e4
LT
84
85static int ip6_pkt_discard(struct sk_buff *skb);
86static int ip6_pkt_discard_out(struct sk_buff *skb);
87static void ip6_link_failure(struct sk_buff *skb);
88static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
89
70ceb4f5
YH
90#ifdef CONFIG_IPV6_ROUTE_INFO
91static struct rt6_info *rt6_add_route_info(struct in6_addr *prefix, int prefixlen,
92 struct in6_addr *gwaddr, int ifindex,
93 unsigned pref);
94static struct rt6_info *rt6_get_route_info(struct in6_addr *prefix, int prefixlen,
95 struct in6_addr *gwaddr, int ifindex);
96#endif
97
1da177e4
LT
98static struct dst_ops ip6_dst_ops = {
99 .family = AF_INET6,
100 .protocol = __constant_htons(ETH_P_IPV6),
101 .gc = ip6_dst_gc,
102 .gc_thresh = 1024,
103 .check = ip6_dst_check,
104 .destroy = ip6_dst_destroy,
105 .ifdown = ip6_dst_ifdown,
106 .negative_advice = ip6_negative_advice,
107 .link_failure = ip6_link_failure,
108 .update_pmtu = ip6_rt_update_pmtu,
862b82c6 109 .local_out = ip6_local_out,
1da177e4 110 .entry_size = sizeof(struct rt6_info),
e2422970 111 .entries = ATOMIC_INIT(0),
1da177e4
LT
112};
113
14e50e57
DM
114static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu)
115{
116}
117
118static struct dst_ops ip6_dst_blackhole_ops = {
119 .family = AF_INET6,
120 .protocol = __constant_htons(ETH_P_IPV6),
121 .destroy = ip6_dst_destroy,
122 .check = ip6_dst_check,
123 .update_pmtu = ip6_rt_blackhole_update_pmtu,
124 .entry_size = sizeof(struct rt6_info),
e2422970 125 .entries = ATOMIC_INIT(0),
14e50e57
DM
126};
127
1da177e4
LT
128struct rt6_info ip6_null_entry = {
129 .u = {
130 .dst = {
131 .__refcnt = ATOMIC_INIT(1),
132 .__use = 1,
1da177e4
LT
133 .obsolete = -1,
134 .error = -ENETUNREACH,
135 .metrics = { [RTAX_HOPLIMIT - 1] = 255, },
136 .input = ip6_pkt_discard,
137 .output = ip6_pkt_discard_out,
138 .ops = &ip6_dst_ops,
139 .path = (struct dst_entry*)&ip6_null_entry,
140 }
141 },
142 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
143 .rt6i_metric = ~(u32) 0,
144 .rt6i_ref = ATOMIC_INIT(1),
145};
146
101367c2
TG
147#ifdef CONFIG_IPV6_MULTIPLE_TABLES
148
6723ab54
DM
149static int ip6_pkt_prohibit(struct sk_buff *skb);
150static int ip6_pkt_prohibit_out(struct sk_buff *skb);
6723ab54 151
101367c2
TG
152struct rt6_info ip6_prohibit_entry = {
153 .u = {
154 .dst = {
155 .__refcnt = ATOMIC_INIT(1),
156 .__use = 1,
101367c2
TG
157 .obsolete = -1,
158 .error = -EACCES,
159 .metrics = { [RTAX_HOPLIMIT - 1] = 255, },
9ce8ade0
TG
160 .input = ip6_pkt_prohibit,
161 .output = ip6_pkt_prohibit_out,
101367c2
TG
162 .ops = &ip6_dst_ops,
163 .path = (struct dst_entry*)&ip6_prohibit_entry,
164 }
165 },
166 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
167 .rt6i_metric = ~(u32) 0,
168 .rt6i_ref = ATOMIC_INIT(1),
169};
170
171struct rt6_info ip6_blk_hole_entry = {
172 .u = {
173 .dst = {
174 .__refcnt = ATOMIC_INIT(1),
175 .__use = 1,
101367c2
TG
176 .obsolete = -1,
177 .error = -EINVAL,
178 .metrics = { [RTAX_HOPLIMIT - 1] = 255, },
352e512c
HX
179 .input = dst_discard,
180 .output = dst_discard,
101367c2
TG
181 .ops = &ip6_dst_ops,
182 .path = (struct dst_entry*)&ip6_blk_hole_entry,
183 }
184 },
185 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
186 .rt6i_metric = ~(u32) 0,
187 .rt6i_ref = ATOMIC_INIT(1),
188};
189
190#endif
191
1da177e4
LT
192/* allocate dst with ip6_dst_ops */
193static __inline__ struct rt6_info *ip6_dst_alloc(void)
194{
195 return (struct rt6_info *)dst_alloc(&ip6_dst_ops);
196}
197
198static void ip6_dst_destroy(struct dst_entry *dst)
199{
200 struct rt6_info *rt = (struct rt6_info *)dst;
201 struct inet6_dev *idev = rt->rt6i_idev;
202
203 if (idev != NULL) {
204 rt->rt6i_idev = NULL;
205 in6_dev_put(idev);
1ab1457c 206 }
1da177e4
LT
207}
208
209static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
210 int how)
211{
212 struct rt6_info *rt = (struct rt6_info *)dst;
213 struct inet6_dev *idev = rt->rt6i_idev;
5a3e55d6
DL
214 struct net_device *loopback_dev =
215 dev->nd_net->loopback_dev;
1da177e4 216
5a3e55d6
DL
217 if (dev != loopback_dev && idev != NULL && idev->dev == dev) {
218 struct inet6_dev *loopback_idev =
219 in6_dev_get(loopback_dev);
1da177e4
LT
220 if (loopback_idev != NULL) {
221 rt->rt6i_idev = loopback_idev;
222 in6_dev_put(idev);
223 }
224 }
225}
226
227static __inline__ int rt6_check_expired(const struct rt6_info *rt)
228{
229 return (rt->rt6i_flags & RTF_EXPIRES &&
230 time_after(jiffies, rt->rt6i_expires));
231}
232
c71099ac
TG
233static inline int rt6_need_strict(struct in6_addr *daddr)
234{
235 return (ipv6_addr_type(daddr) &
236 (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL));
237}
238
1da177e4 239/*
c71099ac 240 * Route lookup. Any table->tb6_lock is implied.
1da177e4
LT
241 */
242
243static __inline__ struct rt6_info *rt6_device_match(struct rt6_info *rt,
244 int oif,
245 int strict)
246{
247 struct rt6_info *local = NULL;
248 struct rt6_info *sprt;
249
250 if (oif) {
7cc48263 251 for (sprt = rt; sprt; sprt = sprt->u.dst.rt6_next) {
1da177e4
LT
252 struct net_device *dev = sprt->rt6i_dev;
253 if (dev->ifindex == oif)
254 return sprt;
255 if (dev->flags & IFF_LOOPBACK) {
256 if (sprt->rt6i_idev == NULL ||
257 sprt->rt6i_idev->dev->ifindex != oif) {
258 if (strict && oif)
259 continue;
1ab1457c 260 if (local && (!oif ||
1da177e4
LT
261 local->rt6i_idev->dev->ifindex == oif))
262 continue;
263 }
264 local = sprt;
265 }
266 }
267
268 if (local)
269 return local;
270
271 if (strict)
272 return &ip6_null_entry;
273 }
274 return rt;
275}
276
27097255
YH
277#ifdef CONFIG_IPV6_ROUTER_PREF
278static void rt6_probe(struct rt6_info *rt)
279{
280 struct neighbour *neigh = rt ? rt->rt6i_nexthop : NULL;
281 /*
282 * Okay, this does not seem to be appropriate
283 * for now, however, we need to check if it
284 * is really so; aka Router Reachability Probing.
285 *
286 * Router Reachability Probe MUST be rate-limited
287 * to no more than one per minute.
288 */
289 if (!neigh || (neigh->nud_state & NUD_VALID))
290 return;
291 read_lock_bh(&neigh->lock);
292 if (!(neigh->nud_state & NUD_VALID) &&
52e16356 293 time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
27097255
YH
294 struct in6_addr mcaddr;
295 struct in6_addr *target;
296
297 neigh->updated = jiffies;
298 read_unlock_bh(&neigh->lock);
299
300 target = (struct in6_addr *)&neigh->primary_key;
301 addrconf_addr_solict_mult(target, &mcaddr);
302 ndisc_send_ns(rt->rt6i_dev, NULL, target, &mcaddr, NULL);
303 } else
304 read_unlock_bh(&neigh->lock);
305}
306#else
307static inline void rt6_probe(struct rt6_info *rt)
308{
309 return;
310}
311#endif
312
1da177e4 313/*
554cfb7e 314 * Default Router Selection (RFC 2461 6.3.6)
1da177e4 315 */
b6f99a21 316static inline int rt6_check_dev(struct rt6_info *rt, int oif)
554cfb7e
YH
317{
318 struct net_device *dev = rt->rt6i_dev;
161980f4 319 if (!oif || dev->ifindex == oif)
554cfb7e 320 return 2;
161980f4
DM
321 if ((dev->flags & IFF_LOOPBACK) &&
322 rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
323 return 1;
324 return 0;
554cfb7e 325}
1da177e4 326
b6f99a21 327static inline int rt6_check_neigh(struct rt6_info *rt)
1da177e4 328{
554cfb7e 329 struct neighbour *neigh = rt->rt6i_nexthop;
398bcbeb 330 int m;
4d0c5911
YH
331 if (rt->rt6i_flags & RTF_NONEXTHOP ||
332 !(rt->rt6i_flags & RTF_GATEWAY))
333 m = 1;
334 else if (neigh) {
554cfb7e
YH
335 read_lock_bh(&neigh->lock);
336 if (neigh->nud_state & NUD_VALID)
4d0c5911 337 m = 2;
398bcbeb
YH
338#ifdef CONFIG_IPV6_ROUTER_PREF
339 else if (neigh->nud_state & NUD_FAILED)
340 m = 0;
341#endif
342 else
ea73ee23 343 m = 1;
554cfb7e 344 read_unlock_bh(&neigh->lock);
398bcbeb
YH
345 } else
346 m = 0;
554cfb7e 347 return m;
1da177e4
LT
348}
349
554cfb7e
YH
350static int rt6_score_route(struct rt6_info *rt, int oif,
351 int strict)
1da177e4 352{
4d0c5911 353 int m, n;
1ab1457c 354
4d0c5911 355 m = rt6_check_dev(rt, oif);
77d16f45 356 if (!m && (strict & RT6_LOOKUP_F_IFACE))
554cfb7e 357 return -1;
ebacaaa0
YH
358#ifdef CONFIG_IPV6_ROUTER_PREF
359 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
360#endif
4d0c5911 361 n = rt6_check_neigh(rt);
557e92ef 362 if (!n && (strict & RT6_LOOKUP_F_REACHABLE))
554cfb7e
YH
363 return -1;
364 return m;
365}
366
f11e6659
DM
367static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
368 int *mpri, struct rt6_info *match)
554cfb7e 369{
f11e6659
DM
370 int m;
371
372 if (rt6_check_expired(rt))
373 goto out;
374
375 m = rt6_score_route(rt, oif, strict);
376 if (m < 0)
377 goto out;
378
379 if (m > *mpri) {
380 if (strict & RT6_LOOKUP_F_REACHABLE)
381 rt6_probe(match);
382 *mpri = m;
383 match = rt;
384 } else if (strict & RT6_LOOKUP_F_REACHABLE) {
385 rt6_probe(rt);
386 }
387
388out:
389 return match;
390}
391
392static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
393 struct rt6_info *rr_head,
394 u32 metric, int oif, int strict)
395{
396 struct rt6_info *rt, *match;
554cfb7e 397 int mpri = -1;
1da177e4 398
f11e6659
DM
399 match = NULL;
400 for (rt = rr_head; rt && rt->rt6i_metric == metric;
401 rt = rt->u.dst.rt6_next)
402 match = find_match(rt, oif, strict, &mpri, match);
403 for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric;
404 rt = rt->u.dst.rt6_next)
405 match = find_match(rt, oif, strict, &mpri, match);
1da177e4 406
f11e6659
DM
407 return match;
408}
1da177e4 409
f11e6659
DM
410static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
411{
412 struct rt6_info *match, *rt0;
1da177e4 413
f11e6659
DM
414 RT6_TRACE("%s(fn->leaf=%p, oif=%d)\n",
415 __FUNCTION__, fn->leaf, oif);
554cfb7e 416
f11e6659
DM
417 rt0 = fn->rr_ptr;
418 if (!rt0)
419 fn->rr_ptr = rt0 = fn->leaf;
1da177e4 420
f11e6659 421 match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict);
1da177e4 422
554cfb7e 423 if (!match &&
f11e6659
DM
424 (strict & RT6_LOOKUP_F_REACHABLE)) {
425 struct rt6_info *next = rt0->u.dst.rt6_next;
426
554cfb7e 427 /* no entries matched; do round-robin */
f11e6659
DM
428 if (!next || next->rt6i_metric != rt0->rt6i_metric)
429 next = fn->leaf;
430
431 if (next != rt0)
432 fn->rr_ptr = next;
1da177e4 433 }
1da177e4 434
f11e6659
DM
435 RT6_TRACE("%s() => %p\n",
436 __FUNCTION__, match);
1da177e4 437
554cfb7e 438 return (match ? match : &ip6_null_entry);
1da177e4
LT
439}
440
70ceb4f5
YH
441#ifdef CONFIG_IPV6_ROUTE_INFO
442int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
443 struct in6_addr *gwaddr)
444{
445 struct route_info *rinfo = (struct route_info *) opt;
446 struct in6_addr prefix_buf, *prefix;
447 unsigned int pref;
448 u32 lifetime;
449 struct rt6_info *rt;
450
451 if (len < sizeof(struct route_info)) {
452 return -EINVAL;
453 }
454
455 /* Sanity check for prefix_len and length */
456 if (rinfo->length > 3) {
457 return -EINVAL;
458 } else if (rinfo->prefix_len > 128) {
459 return -EINVAL;
460 } else if (rinfo->prefix_len > 64) {
461 if (rinfo->length < 2) {
462 return -EINVAL;
463 }
464 } else if (rinfo->prefix_len > 0) {
465 if (rinfo->length < 1) {
466 return -EINVAL;
467 }
468 }
469
470 pref = rinfo->route_pref;
471 if (pref == ICMPV6_ROUTER_PREF_INVALID)
472 pref = ICMPV6_ROUTER_PREF_MEDIUM;
473
e69a4adc 474 lifetime = ntohl(rinfo->lifetime);
70ceb4f5
YH
475 if (lifetime == 0xffffffff) {
476 /* infinity */
477 } else if (lifetime > 0x7fffffff/HZ) {
478 /* Avoid arithmetic overflow */
479 lifetime = 0x7fffffff/HZ - 1;
480 }
481
482 if (rinfo->length == 3)
483 prefix = (struct in6_addr *)rinfo->prefix;
484 else {
485 /* this function is safe */
486 ipv6_addr_prefix(&prefix_buf,
487 (struct in6_addr *)rinfo->prefix,
488 rinfo->prefix_len);
489 prefix = &prefix_buf;
490 }
491
492 rt = rt6_get_route_info(prefix, rinfo->prefix_len, gwaddr, dev->ifindex);
493
494 if (rt && !lifetime) {
e0a1ad73 495 ip6_del_rt(rt);
70ceb4f5
YH
496 rt = NULL;
497 }
498
499 if (!rt && lifetime)
500 rt = rt6_add_route_info(prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
501 pref);
502 else if (rt)
503 rt->rt6i_flags = RTF_ROUTEINFO |
504 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
505
506 if (rt) {
507 if (lifetime == 0xffffffff) {
508 rt->rt6i_flags &= ~RTF_EXPIRES;
509 } else {
510 rt->rt6i_expires = jiffies + HZ * lifetime;
511 rt->rt6i_flags |= RTF_EXPIRES;
512 }
513 dst_release(&rt->u.dst);
514 }
515 return 0;
516}
517#endif
518
982f56f3
YH
519#define BACKTRACK(saddr) \
520do { \
521 if (rt == &ip6_null_entry) { \
522 struct fib6_node *pn; \
e0eda7bb 523 while (1) { \
982f56f3
YH
524 if (fn->fn_flags & RTN_TL_ROOT) \
525 goto out; \
526 pn = fn->parent; \
527 if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
8bce65b9 528 fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \
982f56f3
YH
529 else \
530 fn = pn; \
531 if (fn->fn_flags & RTN_RTINFO) \
532 goto restart; \
c71099ac 533 } \
c71099ac 534 } \
982f56f3 535} while(0)
c71099ac
TG
536
537static struct rt6_info *ip6_pol_route_lookup(struct fib6_table *table,
538 struct flowi *fl, int flags)
1da177e4
LT
539{
540 struct fib6_node *fn;
541 struct rt6_info *rt;
542
c71099ac
TG
543 read_lock_bh(&table->tb6_lock);
544 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
545restart:
546 rt = fn->leaf;
77d16f45 547 rt = rt6_device_match(rt, fl->oif, flags);
982f56f3 548 BACKTRACK(&fl->fl6_src);
c71099ac 549out:
03f49f34 550 dst_use(&rt->u.dst, jiffies);
c71099ac 551 read_unlock_bh(&table->tb6_lock);
c71099ac
TG
552 return rt;
553
554}
555
556struct rt6_info *rt6_lookup(struct in6_addr *daddr, struct in6_addr *saddr,
557 int oif, int strict)
558{
559 struct flowi fl = {
560 .oif = oif,
561 .nl_u = {
562 .ip6_u = {
563 .daddr = *daddr,
c71099ac
TG
564 },
565 },
566 };
567 struct dst_entry *dst;
77d16f45 568 int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
c71099ac 569
adaa70bb
TG
570 if (saddr) {
571 memcpy(&fl.fl6_src, saddr, sizeof(*saddr));
572 flags |= RT6_LOOKUP_F_HAS_SADDR;
573 }
574
58f09b78 575 dst = fib6_rule_lookup(&init_net, &fl, flags, ip6_pol_route_lookup);
c71099ac
TG
576 if (dst->error == 0)
577 return (struct rt6_info *) dst;
578
579 dst_release(dst);
580
1da177e4
LT
581 return NULL;
582}
583
7159039a
YH
584EXPORT_SYMBOL(rt6_lookup);
585
c71099ac 586/* ip6_ins_rt is called with FREE table->tb6_lock.
1da177e4
LT
587 It takes new route entry, the addition fails by any reason the
588 route is freed. In any case, if caller does not hold it, it may
589 be destroyed.
590 */
591
86872cb5 592static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
1da177e4
LT
593{
594 int err;
c71099ac 595 struct fib6_table *table;
1da177e4 596
c71099ac
TG
597 table = rt->rt6i_table;
598 write_lock_bh(&table->tb6_lock);
86872cb5 599 err = fib6_add(&table->tb6_root, rt, info);
c71099ac 600 write_unlock_bh(&table->tb6_lock);
1da177e4
LT
601
602 return err;
603}
604
40e22e8f
TG
605int ip6_ins_rt(struct rt6_info *rt)
606{
4d1169c1
DL
607 struct nl_info info = {
608 .nl_net = &init_net,
609 };
528c4ceb 610 return __ip6_ins_rt(rt, &info);
40e22e8f
TG
611}
612
95a9a5ba
YH
613static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, struct in6_addr *daddr,
614 struct in6_addr *saddr)
1da177e4 615{
1da177e4
LT
616 struct rt6_info *rt;
617
618 /*
619 * Clone the route.
620 */
621
622 rt = ip6_rt_copy(ort);
623
624 if (rt) {
58c4fb86
YH
625 if (!(rt->rt6i_flags&RTF_GATEWAY)) {
626 if (rt->rt6i_dst.plen != 128 &&
627 ipv6_addr_equal(&rt->rt6i_dst.addr, daddr))
628 rt->rt6i_flags |= RTF_ANYCAST;
1da177e4 629 ipv6_addr_copy(&rt->rt6i_gateway, daddr);
58c4fb86 630 }
1da177e4 631
58c4fb86 632 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
1da177e4
LT
633 rt->rt6i_dst.plen = 128;
634 rt->rt6i_flags |= RTF_CACHE;
635 rt->u.dst.flags |= DST_HOST;
636
637#ifdef CONFIG_IPV6_SUBTREES
638 if (rt->rt6i_src.plen && saddr) {
639 ipv6_addr_copy(&rt->rt6i_src.addr, saddr);
640 rt->rt6i_src.plen = 128;
641 }
642#endif
643
644 rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
645
95a9a5ba 646 }
1da177e4 647
95a9a5ba
YH
648 return rt;
649}
1da177e4 650
299d9939
YH
651static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort, struct in6_addr *daddr)
652{
653 struct rt6_info *rt = ip6_rt_copy(ort);
654 if (rt) {
655 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
656 rt->rt6i_dst.plen = 128;
657 rt->rt6i_flags |= RTF_CACHE;
299d9939
YH
658 rt->u.dst.flags |= DST_HOST;
659 rt->rt6i_nexthop = neigh_clone(ort->rt6i_nexthop);
660 }
661 return rt;
662}
663
4acad72d 664static struct rt6_info *ip6_pol_route(struct fib6_table *table, int oif,
8ce11e6a 665 struct flowi *fl, int flags)
1da177e4
LT
666{
667 struct fib6_node *fn;
519fbd87 668 struct rt6_info *rt, *nrt;
c71099ac 669 int strict = 0;
1da177e4 670 int attempts = 3;
519fbd87 671 int err;
ea659e07 672 int reachable = ipv6_devconf.forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
1da177e4 673
77d16f45 674 strict |= flags & RT6_LOOKUP_F_IFACE;
1da177e4
LT
675
676relookup:
c71099ac 677 read_lock_bh(&table->tb6_lock);
1da177e4 678
8238dd06 679restart_2:
c71099ac 680 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
1da177e4
LT
681
682restart:
4acad72d 683 rt = rt6_select(fn, oif, strict | reachable);
982f56f3 684 BACKTRACK(&fl->fl6_src);
8238dd06
YH
685 if (rt == &ip6_null_entry ||
686 rt->rt6i_flags & RTF_CACHE)
1ddef044 687 goto out;
1da177e4 688
fb9de91e 689 dst_hold(&rt->u.dst);
c71099ac 690 read_unlock_bh(&table->tb6_lock);
fb9de91e 691
519fbd87 692 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
c71099ac 693 nrt = rt6_alloc_cow(rt, &fl->fl6_dst, &fl->fl6_src);
519fbd87
YH
694 else {
695#if CLONE_OFFLINK_ROUTE
c71099ac 696 nrt = rt6_alloc_clone(rt, &fl->fl6_dst);
519fbd87
YH
697#else
698 goto out2;
699#endif
700 }
e40cf353 701
519fbd87
YH
702 dst_release(&rt->u.dst);
703 rt = nrt ? : &ip6_null_entry;
1da177e4 704
519fbd87
YH
705 dst_hold(&rt->u.dst);
706 if (nrt) {
40e22e8f 707 err = ip6_ins_rt(nrt);
519fbd87 708 if (!err)
1da177e4 709 goto out2;
1da177e4 710 }
1da177e4 711
519fbd87
YH
712 if (--attempts <= 0)
713 goto out2;
714
715 /*
c71099ac 716 * Race condition! In the gap, when table->tb6_lock was
519fbd87
YH
717 * released someone could insert this route. Relookup.
718 */
719 dst_release(&rt->u.dst);
720 goto relookup;
721
722out:
8238dd06
YH
723 if (reachable) {
724 reachable = 0;
725 goto restart_2;
726 }
519fbd87 727 dst_hold(&rt->u.dst);
c71099ac 728 read_unlock_bh(&table->tb6_lock);
1da177e4
LT
729out2:
730 rt->u.dst.lastuse = jiffies;
731 rt->u.dst.__use++;
c71099ac
TG
732
733 return rt;
1da177e4
LT
734}
735
4acad72d
PE
736static struct rt6_info *ip6_pol_route_input(struct fib6_table *table,
737 struct flowi *fl, int flags)
738{
739 return ip6_pol_route(table, fl->iif, fl, flags);
740}
741
c71099ac
TG
742void ip6_route_input(struct sk_buff *skb)
743{
0660e03f 744 struct ipv6hdr *iph = ipv6_hdr(skb);
adaa70bb 745 int flags = RT6_LOOKUP_F_HAS_SADDR;
c71099ac
TG
746 struct flowi fl = {
747 .iif = skb->dev->ifindex,
748 .nl_u = {
749 .ip6_u = {
750 .daddr = iph->daddr,
751 .saddr = iph->saddr,
90bcaf7b 752 .flowlabel = (* (__be32 *) iph)&IPV6_FLOWINFO_MASK,
c71099ac
TG
753 },
754 },
1ab1457c 755 .mark = skb->mark,
c71099ac
TG
756 .proto = iph->nexthdr,
757 };
adaa70bb
TG
758
759 if (rt6_need_strict(&iph->daddr))
760 flags |= RT6_LOOKUP_F_IFACE;
c71099ac 761
58f09b78 762 skb->dst = fib6_rule_lookup(&init_net, &fl, flags, ip6_pol_route_input);
c71099ac
TG
763}
764
765static struct rt6_info *ip6_pol_route_output(struct fib6_table *table,
766 struct flowi *fl, int flags)
1da177e4 767{
4acad72d 768 return ip6_pol_route(table, fl->oif, fl, flags);
c71099ac
TG
769}
770
771struct dst_entry * ip6_route_output(struct sock *sk, struct flowi *fl)
772{
773 int flags = 0;
774
775 if (rt6_need_strict(&fl->fl6_dst))
77d16f45 776 flags |= RT6_LOOKUP_F_IFACE;
c71099ac 777
adaa70bb
TG
778 if (!ipv6_addr_any(&fl->fl6_src))
779 flags |= RT6_LOOKUP_F_HAS_SADDR;
780
58f09b78 781 return fib6_rule_lookup(&init_net, fl, flags, ip6_pol_route_output);
1da177e4
LT
782}
783
7159039a 784EXPORT_SYMBOL(ip6_route_output);
1da177e4 785
14e50e57
DM
786int ip6_dst_blackhole(struct sock *sk, struct dst_entry **dstp, struct flowi *fl)
787{
788 struct rt6_info *ort = (struct rt6_info *) *dstp;
789 struct rt6_info *rt = (struct rt6_info *)
790 dst_alloc(&ip6_dst_blackhole_ops);
791 struct dst_entry *new = NULL;
792
793 if (rt) {
794 new = &rt->u.dst;
795
796 atomic_set(&new->__refcnt, 1);
797 new->__use = 1;
352e512c
HX
798 new->input = dst_discard;
799 new->output = dst_discard;
14e50e57
DM
800
801 memcpy(new->metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32));
802 new->dev = ort->u.dst.dev;
803 if (new->dev)
804 dev_hold(new->dev);
805 rt->rt6i_idev = ort->rt6i_idev;
806 if (rt->rt6i_idev)
807 in6_dev_hold(rt->rt6i_idev);
808 rt->rt6i_expires = 0;
809
810 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
811 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
812 rt->rt6i_metric = 0;
813
814 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
815#ifdef CONFIG_IPV6_SUBTREES
816 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
817#endif
818
819 dst_free(new);
820 }
821
822 dst_release(*dstp);
823 *dstp = new;
824 return (new ? 0 : -ENOMEM);
825}
826EXPORT_SYMBOL_GPL(ip6_dst_blackhole);
827
1da177e4
LT
828/*
829 * Destination cache support functions
830 */
831
832static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
833{
834 struct rt6_info *rt;
835
836 rt = (struct rt6_info *) dst;
837
838 if (rt && rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie))
839 return dst;
840
841 return NULL;
842}
843
844static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
845{
846 struct rt6_info *rt = (struct rt6_info *) dst;
847
848 if (rt) {
849 if (rt->rt6i_flags & RTF_CACHE)
e0a1ad73 850 ip6_del_rt(rt);
1da177e4
LT
851 else
852 dst_release(dst);
853 }
854 return NULL;
855}
856
857static void ip6_link_failure(struct sk_buff *skb)
858{
859 struct rt6_info *rt;
860
861 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0, skb->dev);
862
863 rt = (struct rt6_info *) skb->dst;
864 if (rt) {
865 if (rt->rt6i_flags&RTF_CACHE) {
866 dst_set_expires(&rt->u.dst, 0);
867 rt->rt6i_flags |= RTF_EXPIRES;
868 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
869 rt->rt6i_node->fn_sernum = -1;
870 }
871}
872
873static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
874{
875 struct rt6_info *rt6 = (struct rt6_info*)dst;
876
877 if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
878 rt6->rt6i_flags |= RTF_MODIFIED;
879 if (mtu < IPV6_MIN_MTU) {
880 mtu = IPV6_MIN_MTU;
881 dst->metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
882 }
883 dst->metrics[RTAX_MTU-1] = mtu;
8d71740c 884 call_netevent_notifiers(NETEVENT_PMTU_UPDATE, dst);
1da177e4
LT
885 }
886}
887
1da177e4
LT
888static int ipv6_get_mtu(struct net_device *dev);
889
890static inline unsigned int ipv6_advmss(unsigned int mtu)
891{
892 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
893
4990509f
DL
894 if (mtu < init_net.ipv6.sysctl.ip6_rt_min_advmss)
895 mtu = init_net.ipv6.sysctl.ip6_rt_min_advmss;
1da177e4
LT
896
897 /*
1ab1457c
YH
898 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
899 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
900 * IPV6_MAXPLEN is also valid and means: "any MSS,
1da177e4
LT
901 * rely only on pmtu discovery"
902 */
903 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
904 mtu = IPV6_MAXPLEN;
905 return mtu;
906}
907
3b00944c
YH
908static struct dst_entry *icmp6_dst_gc_list;
909static DEFINE_SPINLOCK(icmp6_dst_lock);
5d0bbeeb 910
3b00944c 911struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
1da177e4 912 struct neighbour *neigh,
3b00944c 913 struct in6_addr *addr)
1da177e4
LT
914{
915 struct rt6_info *rt;
916 struct inet6_dev *idev = in6_dev_get(dev);
917
918 if (unlikely(idev == NULL))
919 return NULL;
920
921 rt = ip6_dst_alloc();
922 if (unlikely(rt == NULL)) {
923 in6_dev_put(idev);
924 goto out;
925 }
926
927 dev_hold(dev);
928 if (neigh)
929 neigh_hold(neigh);
930 else
931 neigh = ndisc_get_neigh(dev, addr);
932
933 rt->rt6i_dev = dev;
934 rt->rt6i_idev = idev;
935 rt->rt6i_nexthop = neigh;
936 atomic_set(&rt->u.dst.__refcnt, 1);
937 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = 255;
938 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
939 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
3b00944c 940 rt->u.dst.output = ip6_output;
1da177e4
LT
941
942#if 0 /* there's no chance to use these for ndisc */
1ab1457c
YH
943 rt->u.dst.flags = ipv6_addr_type(addr) & IPV6_ADDR_UNICAST
944 ? DST_HOST
1da177e4
LT
945 : 0;
946 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
947 rt->rt6i_dst.plen = 128;
948#endif
949
3b00944c
YH
950 spin_lock_bh(&icmp6_dst_lock);
951 rt->u.dst.next = icmp6_dst_gc_list;
952 icmp6_dst_gc_list = &rt->u.dst;
953 spin_unlock_bh(&icmp6_dst_lock);
1da177e4
LT
954
955 fib6_force_start_gc();
956
957out:
40aa7b90 958 return &rt->u.dst;
1da177e4
LT
959}
960
3b00944c 961int icmp6_dst_gc(int *more)
1da177e4
LT
962{
963 struct dst_entry *dst, *next, **pprev;
964 int freed;
965
966 next = NULL;
1ab1457c 967 freed = 0;
5d0bbeeb 968
3b00944c
YH
969 spin_lock_bh(&icmp6_dst_lock);
970 pprev = &icmp6_dst_gc_list;
5d0bbeeb 971
1da177e4
LT
972 while ((dst = *pprev) != NULL) {
973 if (!atomic_read(&dst->__refcnt)) {
974 *pprev = dst->next;
975 dst_free(dst);
976 freed++;
977 } else {
978 pprev = &dst->next;
979 (*more)++;
980 }
981 }
982
3b00944c 983 spin_unlock_bh(&icmp6_dst_lock);
5d0bbeeb 984
1da177e4
LT
985 return freed;
986}
987
569d3645 988static int ip6_dst_gc(struct dst_ops *ops)
1da177e4
LT
989{
990 static unsigned expire = 30*HZ;
991 static unsigned long last_gc;
992 unsigned long now = jiffies;
993
4990509f
DL
994 if (time_after(last_gc + init_net.ipv6.sysctl.ip6_rt_gc_min_interval, now) &&
995 atomic_read(&ip6_dst_ops.entries) <= init_net.ipv6.sysctl.ip6_rt_max_size)
1da177e4
LT
996 goto out;
997
998 expire++;
5b7c931d 999 fib6_run_gc(expire, &init_net);
1da177e4
LT
1000 last_gc = now;
1001 if (atomic_read(&ip6_dst_ops.entries) < ip6_dst_ops.gc_thresh)
4990509f 1002 expire = init_net.ipv6.sysctl.ip6_rt_gc_timeout>>1;
1da177e4
LT
1003
1004out:
4990509f
DL
1005 expire -= expire>>init_net.ipv6.sysctl.ip6_rt_gc_elasticity;
1006 return (atomic_read(&ip6_dst_ops.entries) > init_net.ipv6.sysctl.ip6_rt_max_size);
1da177e4
LT
1007}
1008
1009/* Clean host part of a prefix. Not necessary in radix tree,
1010 but results in cleaner routing tables.
1011
1012 Remove it only when all the things will work!
1013 */
1014
1015static int ipv6_get_mtu(struct net_device *dev)
1016{
1017 int mtu = IPV6_MIN_MTU;
1018 struct inet6_dev *idev;
1019
1020 idev = in6_dev_get(dev);
1021 if (idev) {
1022 mtu = idev->cnf.mtu6;
1023 in6_dev_put(idev);
1024 }
1025 return mtu;
1026}
1027
1028int ipv6_get_hoplimit(struct net_device *dev)
1029{
1030 int hoplimit = ipv6_devconf.hop_limit;
1031 struct inet6_dev *idev;
1032
1033 idev = in6_dev_get(dev);
1034 if (idev) {
1035 hoplimit = idev->cnf.hop_limit;
1036 in6_dev_put(idev);
1037 }
1038 return hoplimit;
1039}
1040
1041/*
1042 *
1043 */
1044
86872cb5 1045int ip6_route_add(struct fib6_config *cfg)
1da177e4
LT
1046{
1047 int err;
1da177e4
LT
1048 struct rt6_info *rt = NULL;
1049 struct net_device *dev = NULL;
1050 struct inet6_dev *idev = NULL;
c71099ac 1051 struct fib6_table *table;
1da177e4
LT
1052 int addr_type;
1053
86872cb5 1054 if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1da177e4
LT
1055 return -EINVAL;
1056#ifndef CONFIG_IPV6_SUBTREES
86872cb5 1057 if (cfg->fc_src_len)
1da177e4
LT
1058 return -EINVAL;
1059#endif
86872cb5 1060 if (cfg->fc_ifindex) {
1da177e4 1061 err = -ENODEV;
881d966b 1062 dev = dev_get_by_index(&init_net, cfg->fc_ifindex);
1da177e4
LT
1063 if (!dev)
1064 goto out;
1065 idev = in6_dev_get(dev);
1066 if (!idev)
1067 goto out;
1068 }
1069
86872cb5
TG
1070 if (cfg->fc_metric == 0)
1071 cfg->fc_metric = IP6_RT_PRIO_USER;
1da177e4 1072
58f09b78 1073 table = fib6_new_table(&init_net, cfg->fc_table);
c71099ac
TG
1074 if (table == NULL) {
1075 err = -ENOBUFS;
1076 goto out;
1077 }
1078
1da177e4
LT
1079 rt = ip6_dst_alloc();
1080
1081 if (rt == NULL) {
1082 err = -ENOMEM;
1083 goto out;
1084 }
1085
1086 rt->u.dst.obsolete = -1;
86872cb5 1087 rt->rt6i_expires = jiffies + clock_t_to_jiffies(cfg->fc_expires);
1da177e4 1088
86872cb5
TG
1089 if (cfg->fc_protocol == RTPROT_UNSPEC)
1090 cfg->fc_protocol = RTPROT_BOOT;
1091 rt->rt6i_protocol = cfg->fc_protocol;
1092
1093 addr_type = ipv6_addr_type(&cfg->fc_dst);
1da177e4
LT
1094
1095 if (addr_type & IPV6_ADDR_MULTICAST)
1096 rt->u.dst.input = ip6_mc_input;
1097 else
1098 rt->u.dst.input = ip6_forward;
1099
1100 rt->u.dst.output = ip6_output;
1101
86872cb5
TG
1102 ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1103 rt->rt6i_dst.plen = cfg->fc_dst_len;
1da177e4
LT
1104 if (rt->rt6i_dst.plen == 128)
1105 rt->u.dst.flags = DST_HOST;
1106
1107#ifdef CONFIG_IPV6_SUBTREES
86872cb5
TG
1108 ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1109 rt->rt6i_src.plen = cfg->fc_src_len;
1da177e4
LT
1110#endif
1111
86872cb5 1112 rt->rt6i_metric = cfg->fc_metric;
1da177e4
LT
1113
1114 /* We cannot add true routes via loopback here,
1115 they would result in kernel looping; promote them to reject routes
1116 */
86872cb5 1117 if ((cfg->fc_flags & RTF_REJECT) ||
1da177e4
LT
1118 (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK))) {
1119 /* hold loopback dev/idev if we haven't done so. */
2774c7ab 1120 if (dev != init_net.loopback_dev) {
1da177e4
LT
1121 if (dev) {
1122 dev_put(dev);
1123 in6_dev_put(idev);
1124 }
2774c7ab 1125 dev = init_net.loopback_dev;
1da177e4
LT
1126 dev_hold(dev);
1127 idev = in6_dev_get(dev);
1128 if (!idev) {
1129 err = -ENODEV;
1130 goto out;
1131 }
1132 }
1133 rt->u.dst.output = ip6_pkt_discard_out;
1134 rt->u.dst.input = ip6_pkt_discard;
1135 rt->u.dst.error = -ENETUNREACH;
1136 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1137 goto install_route;
1138 }
1139
86872cb5 1140 if (cfg->fc_flags & RTF_GATEWAY) {
1da177e4
LT
1141 struct in6_addr *gw_addr;
1142 int gwa_type;
1143
86872cb5
TG
1144 gw_addr = &cfg->fc_gateway;
1145 ipv6_addr_copy(&rt->rt6i_gateway, gw_addr);
1da177e4
LT
1146 gwa_type = ipv6_addr_type(gw_addr);
1147
1148 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1149 struct rt6_info *grt;
1150
1151 /* IPv6 strictly inhibits using not link-local
1152 addresses as nexthop address.
1153 Otherwise, router will not able to send redirects.
1154 It is very good, but in some (rare!) circumstances
1155 (SIT, PtP, NBMA NOARP links) it is handy to allow
1156 some exceptions. --ANK
1157 */
1158 err = -EINVAL;
1159 if (!(gwa_type&IPV6_ADDR_UNICAST))
1160 goto out;
1161
86872cb5 1162 grt = rt6_lookup(gw_addr, NULL, cfg->fc_ifindex, 1);
1da177e4
LT
1163
1164 err = -EHOSTUNREACH;
1165 if (grt == NULL)
1166 goto out;
1167 if (dev) {
1168 if (dev != grt->rt6i_dev) {
1169 dst_release(&grt->u.dst);
1170 goto out;
1171 }
1172 } else {
1173 dev = grt->rt6i_dev;
1174 idev = grt->rt6i_idev;
1175 dev_hold(dev);
1176 in6_dev_hold(grt->rt6i_idev);
1177 }
1178 if (!(grt->rt6i_flags&RTF_GATEWAY))
1179 err = 0;
1180 dst_release(&grt->u.dst);
1181
1182 if (err)
1183 goto out;
1184 }
1185 err = -EINVAL;
1186 if (dev == NULL || (dev->flags&IFF_LOOPBACK))
1187 goto out;
1188 }
1189
1190 err = -ENODEV;
1191 if (dev == NULL)
1192 goto out;
1193
86872cb5 1194 if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) {
1da177e4
LT
1195 rt->rt6i_nexthop = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev);
1196 if (IS_ERR(rt->rt6i_nexthop)) {
1197 err = PTR_ERR(rt->rt6i_nexthop);
1198 rt->rt6i_nexthop = NULL;
1199 goto out;
1200 }
1201 }
1202
86872cb5 1203 rt->rt6i_flags = cfg->fc_flags;
1da177e4
LT
1204
1205install_route:
86872cb5
TG
1206 if (cfg->fc_mx) {
1207 struct nlattr *nla;
1208 int remaining;
1209
1210 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
8f4c1f9b 1211 int type = nla_type(nla);
86872cb5
TG
1212
1213 if (type) {
1214 if (type > RTAX_MAX) {
1da177e4
LT
1215 err = -EINVAL;
1216 goto out;
1217 }
86872cb5
TG
1218
1219 rt->u.dst.metrics[type - 1] = nla_get_u32(nla);
1da177e4 1220 }
1da177e4
LT
1221 }
1222 }
1223
1224 if (rt->u.dst.metrics[RTAX_HOPLIMIT-1] == 0)
1225 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
1226 if (!rt->u.dst.metrics[RTAX_MTU-1])
1227 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(dev);
1228 if (!rt->u.dst.metrics[RTAX_ADVMSS-1])
1229 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
1230 rt->u.dst.dev = dev;
1231 rt->rt6i_idev = idev;
c71099ac 1232 rt->rt6i_table = table;
86872cb5 1233 return __ip6_ins_rt(rt, &cfg->fc_nlinfo);
1da177e4
LT
1234
1235out:
1236 if (dev)
1237 dev_put(dev);
1238 if (idev)
1239 in6_dev_put(idev);
1240 if (rt)
40aa7b90 1241 dst_free(&rt->u.dst);
1da177e4
LT
1242 return err;
1243}
1244
86872cb5 1245static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
1da177e4
LT
1246{
1247 int err;
c71099ac 1248 struct fib6_table *table;
1da177e4 1249
6c813a72
PM
1250 if (rt == &ip6_null_entry)
1251 return -ENOENT;
1252
c71099ac
TG
1253 table = rt->rt6i_table;
1254 write_lock_bh(&table->tb6_lock);
1da177e4 1255
86872cb5 1256 err = fib6_del(rt, info);
1da177e4
LT
1257 dst_release(&rt->u.dst);
1258
c71099ac 1259 write_unlock_bh(&table->tb6_lock);
1da177e4
LT
1260
1261 return err;
1262}
1263
e0a1ad73
TG
1264int ip6_del_rt(struct rt6_info *rt)
1265{
4d1169c1
DL
1266 struct nl_info info = {
1267 .nl_net = &init_net,
1268 };
528c4ceb 1269 return __ip6_del_rt(rt, &info);
e0a1ad73
TG
1270}
1271
86872cb5 1272static int ip6_route_del(struct fib6_config *cfg)
1da177e4 1273{
c71099ac 1274 struct fib6_table *table;
1da177e4
LT
1275 struct fib6_node *fn;
1276 struct rt6_info *rt;
1277 int err = -ESRCH;
1278
58f09b78 1279 table = fib6_get_table(&init_net, cfg->fc_table);
c71099ac
TG
1280 if (table == NULL)
1281 return err;
1282
1283 read_lock_bh(&table->tb6_lock);
1da177e4 1284
c71099ac 1285 fn = fib6_locate(&table->tb6_root,
86872cb5
TG
1286 &cfg->fc_dst, cfg->fc_dst_len,
1287 &cfg->fc_src, cfg->fc_src_len);
1ab1457c 1288
1da177e4 1289 if (fn) {
7cc48263 1290 for (rt = fn->leaf; rt; rt = rt->u.dst.rt6_next) {
86872cb5 1291 if (cfg->fc_ifindex &&
1da177e4 1292 (rt->rt6i_dev == NULL ||
86872cb5 1293 rt->rt6i_dev->ifindex != cfg->fc_ifindex))
1da177e4 1294 continue;
86872cb5
TG
1295 if (cfg->fc_flags & RTF_GATEWAY &&
1296 !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
1da177e4 1297 continue;
86872cb5 1298 if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
1da177e4
LT
1299 continue;
1300 dst_hold(&rt->u.dst);
c71099ac 1301 read_unlock_bh(&table->tb6_lock);
1da177e4 1302
86872cb5 1303 return __ip6_del_rt(rt, &cfg->fc_nlinfo);
1da177e4
LT
1304 }
1305 }
c71099ac 1306 read_unlock_bh(&table->tb6_lock);
1da177e4
LT
1307
1308 return err;
1309}
1310
1311/*
1312 * Handle redirects
1313 */
a6279458
YH
1314struct ip6rd_flowi {
1315 struct flowi fl;
1316 struct in6_addr gateway;
1317};
1318
1319static struct rt6_info *__ip6_route_redirect(struct fib6_table *table,
1320 struct flowi *fl,
1321 int flags)
1da177e4 1322{
a6279458
YH
1323 struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl;
1324 struct rt6_info *rt;
e843b9e1 1325 struct fib6_node *fn;
c71099ac 1326
1da177e4 1327 /*
e843b9e1
YH
1328 * Get the "current" route for this destination and
1329 * check if the redirect has come from approriate router.
1330 *
1331 * RFC 2461 specifies that redirects should only be
1332 * accepted if they come from the nexthop to the target.
1333 * Due to the way the routes are chosen, this notion
1334 * is a bit fuzzy and one might need to check all possible
1335 * routes.
1da177e4 1336 */
1da177e4 1337
c71099ac 1338 read_lock_bh(&table->tb6_lock);
a6279458 1339 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
e843b9e1 1340restart:
7cc48263 1341 for (rt = fn->leaf; rt; rt = rt->u.dst.rt6_next) {
e843b9e1
YH
1342 /*
1343 * Current route is on-link; redirect is always invalid.
1344 *
1345 * Seems, previous statement is not true. It could
1346 * be node, which looks for us as on-link (f.e. proxy ndisc)
1347 * But then router serving it might decide, that we should
1348 * know truth 8)8) --ANK (980726).
1349 */
1350 if (rt6_check_expired(rt))
1351 continue;
1352 if (!(rt->rt6i_flags & RTF_GATEWAY))
1353 continue;
a6279458 1354 if (fl->oif != rt->rt6i_dev->ifindex)
e843b9e1 1355 continue;
a6279458 1356 if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
e843b9e1
YH
1357 continue;
1358 break;
1359 }
a6279458 1360
cb15d9c2 1361 if (!rt)
a6279458 1362 rt = &ip6_null_entry;
cb15d9c2
YH
1363 BACKTRACK(&fl->fl6_src);
1364out:
a6279458
YH
1365 dst_hold(&rt->u.dst);
1366
c71099ac 1367 read_unlock_bh(&table->tb6_lock);
e843b9e1 1368
a6279458
YH
1369 return rt;
1370};
1371
1372static struct rt6_info *ip6_route_redirect(struct in6_addr *dest,
1373 struct in6_addr *src,
1374 struct in6_addr *gateway,
1375 struct net_device *dev)
1376{
adaa70bb 1377 int flags = RT6_LOOKUP_F_HAS_SADDR;
a6279458
YH
1378 struct ip6rd_flowi rdfl = {
1379 .fl = {
1380 .oif = dev->ifindex,
1381 .nl_u = {
1382 .ip6_u = {
1383 .daddr = *dest,
1384 .saddr = *src,
1385 },
1386 },
1387 },
1388 .gateway = *gateway,
1389 };
adaa70bb
TG
1390
1391 if (rt6_need_strict(dest))
1392 flags |= RT6_LOOKUP_F_IFACE;
a6279458 1393
58f09b78
DL
1394 return (struct rt6_info *)fib6_rule_lookup(&init_net,
1395 (struct flowi *)&rdfl,
1396 flags, __ip6_route_redirect);
a6279458
YH
1397}
1398
1399void rt6_redirect(struct in6_addr *dest, struct in6_addr *src,
1400 struct in6_addr *saddr,
1401 struct neighbour *neigh, u8 *lladdr, int on_link)
1402{
1403 struct rt6_info *rt, *nrt = NULL;
1404 struct netevent_redirect netevent;
1405
1406 rt = ip6_route_redirect(dest, src, saddr, neigh->dev);
1407
1408 if (rt == &ip6_null_entry) {
1da177e4
LT
1409 if (net_ratelimit())
1410 printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
1411 "for redirect target\n");
a6279458 1412 goto out;
1da177e4
LT
1413 }
1414
1da177e4
LT
1415 /*
1416 * We have finally decided to accept it.
1417 */
1418
1ab1457c 1419 neigh_update(neigh, lladdr, NUD_STALE,
1da177e4
LT
1420 NEIGH_UPDATE_F_WEAK_OVERRIDE|
1421 NEIGH_UPDATE_F_OVERRIDE|
1422 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1423 NEIGH_UPDATE_F_ISROUTER))
1424 );
1425
1426 /*
1427 * Redirect received -> path was valid.
1428 * Look, redirects are sent only in response to data packets,
1429 * so that this nexthop apparently is reachable. --ANK
1430 */
1431 dst_confirm(&rt->u.dst);
1432
1433 /* Duplicate redirect: silently ignore. */
1434 if (neigh == rt->u.dst.neighbour)
1435 goto out;
1436
1437 nrt = ip6_rt_copy(rt);
1438 if (nrt == NULL)
1439 goto out;
1440
1441 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1442 if (on_link)
1443 nrt->rt6i_flags &= ~RTF_GATEWAY;
1444
1445 ipv6_addr_copy(&nrt->rt6i_dst.addr, dest);
1446 nrt->rt6i_dst.plen = 128;
1447 nrt->u.dst.flags |= DST_HOST;
1448
1449 ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key);
1450 nrt->rt6i_nexthop = neigh_clone(neigh);
1451 /* Reset pmtu, it may be better */
1452 nrt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(neigh->dev);
1453 nrt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&nrt->u.dst));
1454
40e22e8f 1455 if (ip6_ins_rt(nrt))
1da177e4
LT
1456 goto out;
1457
8d71740c
TT
1458 netevent.old = &rt->u.dst;
1459 netevent.new = &nrt->u.dst;
1460 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1461
1da177e4 1462 if (rt->rt6i_flags&RTF_CACHE) {
e0a1ad73 1463 ip6_del_rt(rt);
1da177e4
LT
1464 return;
1465 }
1466
1467out:
1ab1457c 1468 dst_release(&rt->u.dst);
1da177e4
LT
1469 return;
1470}
1471
1472/*
1473 * Handle ICMP "packet too big" messages
1474 * i.e. Path MTU discovery
1475 */
1476
1477void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr,
1478 struct net_device *dev, u32 pmtu)
1479{
1480 struct rt6_info *rt, *nrt;
1481 int allfrag = 0;
1482
1483 rt = rt6_lookup(daddr, saddr, dev->ifindex, 0);
1484 if (rt == NULL)
1485 return;
1486
1487 if (pmtu >= dst_mtu(&rt->u.dst))
1488 goto out;
1489
1490 if (pmtu < IPV6_MIN_MTU) {
1491 /*
1ab1457c 1492 * According to RFC2460, PMTU is set to the IPv6 Minimum Link
1da177e4
LT
1493 * MTU (1280) and a fragment header should always be included
1494 * after a node receiving Too Big message reporting PMTU is
1495 * less than the IPv6 Minimum Link MTU.
1496 */
1497 pmtu = IPV6_MIN_MTU;
1498 allfrag = 1;
1499 }
1500
1501 /* New mtu received -> path was valid.
1502 They are sent only in response to data packets,
1503 so that this nexthop apparently is reachable. --ANK
1504 */
1505 dst_confirm(&rt->u.dst);
1506
1507 /* Host route. If it is static, it would be better
1508 not to override it, but add new one, so that
1509 when cache entry will expire old pmtu
1510 would return automatically.
1511 */
1512 if (rt->rt6i_flags & RTF_CACHE) {
1513 rt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1514 if (allfrag)
1515 rt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
4990509f 1516 dst_set_expires(&rt->u.dst, init_net.ipv6.sysctl.ip6_rt_mtu_expires);
1da177e4
LT
1517 rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
1518 goto out;
1519 }
1520
1521 /* Network route.
1522 Two cases are possible:
1523 1. It is connected route. Action: COW
1524 2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1525 */
d5315b50 1526 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
a1e78363 1527 nrt = rt6_alloc_cow(rt, daddr, saddr);
d5315b50
YH
1528 else
1529 nrt = rt6_alloc_clone(rt, daddr);
a1e78363 1530
d5315b50 1531 if (nrt) {
a1e78363
YH
1532 nrt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1533 if (allfrag)
1534 nrt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1535
1536 /* According to RFC 1981, detecting PMTU increase shouldn't be
1537 * happened within 5 mins, the recommended timer is 10 mins.
1538 * Here this route expiration time is set to ip6_rt_mtu_expires
1539 * which is 10 mins. After 10 mins the decreased pmtu is expired
1540 * and detecting PMTU increase will be automatically happened.
1541 */
4990509f 1542 dst_set_expires(&nrt->u.dst, init_net.ipv6.sysctl.ip6_rt_mtu_expires);
a1e78363
YH
1543 nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
1544
40e22e8f 1545 ip6_ins_rt(nrt);
1da177e4 1546 }
1da177e4
LT
1547out:
1548 dst_release(&rt->u.dst);
1549}
1550
1551/*
1552 * Misc support functions
1553 */
1554
1555static struct rt6_info * ip6_rt_copy(struct rt6_info *ort)
1556{
1557 struct rt6_info *rt = ip6_dst_alloc();
1558
1559 if (rt) {
1560 rt->u.dst.input = ort->u.dst.input;
1561 rt->u.dst.output = ort->u.dst.output;
1562
1563 memcpy(rt->u.dst.metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32));
22e1e4d8 1564 rt->u.dst.error = ort->u.dst.error;
1da177e4
LT
1565 rt->u.dst.dev = ort->u.dst.dev;
1566 if (rt->u.dst.dev)
1567 dev_hold(rt->u.dst.dev);
1568 rt->rt6i_idev = ort->rt6i_idev;
1569 if (rt->rt6i_idev)
1570 in6_dev_hold(rt->rt6i_idev);
1571 rt->u.dst.lastuse = jiffies;
1572 rt->rt6i_expires = 0;
1573
1574 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
1575 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
1576 rt->rt6i_metric = 0;
1577
1578 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1579#ifdef CONFIG_IPV6_SUBTREES
1580 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1581#endif
c71099ac 1582 rt->rt6i_table = ort->rt6i_table;
1da177e4
LT
1583 }
1584 return rt;
1585}
1586
70ceb4f5
YH
1587#ifdef CONFIG_IPV6_ROUTE_INFO
1588static struct rt6_info *rt6_get_route_info(struct in6_addr *prefix, int prefixlen,
1589 struct in6_addr *gwaddr, int ifindex)
1590{
1591 struct fib6_node *fn;
1592 struct rt6_info *rt = NULL;
c71099ac
TG
1593 struct fib6_table *table;
1594
58f09b78 1595 table = fib6_get_table(&init_net, RT6_TABLE_INFO);
c71099ac
TG
1596 if (table == NULL)
1597 return NULL;
70ceb4f5 1598
c71099ac
TG
1599 write_lock_bh(&table->tb6_lock);
1600 fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
70ceb4f5
YH
1601 if (!fn)
1602 goto out;
1603
7cc48263 1604 for (rt = fn->leaf; rt; rt = rt->u.dst.rt6_next) {
70ceb4f5
YH
1605 if (rt->rt6i_dev->ifindex != ifindex)
1606 continue;
1607 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1608 continue;
1609 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1610 continue;
1611 dst_hold(&rt->u.dst);
1612 break;
1613 }
1614out:
c71099ac 1615 write_unlock_bh(&table->tb6_lock);
70ceb4f5
YH
1616 return rt;
1617}
1618
1619static struct rt6_info *rt6_add_route_info(struct in6_addr *prefix, int prefixlen,
1620 struct in6_addr *gwaddr, int ifindex,
1621 unsigned pref)
1622{
86872cb5
TG
1623 struct fib6_config cfg = {
1624 .fc_table = RT6_TABLE_INFO,
238fc7ea 1625 .fc_metric = IP6_RT_PRIO_USER,
86872cb5
TG
1626 .fc_ifindex = ifindex,
1627 .fc_dst_len = prefixlen,
1628 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
1629 RTF_UP | RTF_PREF(pref),
1630 };
1631
1632 ipv6_addr_copy(&cfg.fc_dst, prefix);
1633 ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
70ceb4f5 1634
e317da96
YH
1635 /* We should treat it as a default route if prefix length is 0. */
1636 if (!prefixlen)
86872cb5 1637 cfg.fc_flags |= RTF_DEFAULT;
70ceb4f5 1638
86872cb5 1639 ip6_route_add(&cfg);
70ceb4f5
YH
1640
1641 return rt6_get_route_info(prefix, prefixlen, gwaddr, ifindex);
1642}
1643#endif
1644
1da177e4 1645struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *dev)
1ab1457c 1646{
1da177e4 1647 struct rt6_info *rt;
c71099ac 1648 struct fib6_table *table;
1da177e4 1649
58f09b78 1650 table = fib6_get_table(&init_net, RT6_TABLE_DFLT);
c71099ac
TG
1651 if (table == NULL)
1652 return NULL;
1da177e4 1653
c71099ac 1654 write_lock_bh(&table->tb6_lock);
7cc48263 1655 for (rt = table->tb6_root.leaf; rt; rt=rt->u.dst.rt6_next) {
1da177e4 1656 if (dev == rt->rt6i_dev &&
045927ff 1657 ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
1da177e4
LT
1658 ipv6_addr_equal(&rt->rt6i_gateway, addr))
1659 break;
1660 }
1661 if (rt)
1662 dst_hold(&rt->u.dst);
c71099ac 1663 write_unlock_bh(&table->tb6_lock);
1da177e4
LT
1664 return rt;
1665}
1666
c7dc89c0
FT
1667EXPORT_SYMBOL(rt6_get_dflt_router);
1668
1da177e4 1669struct rt6_info *rt6_add_dflt_router(struct in6_addr *gwaddr,
ebacaaa0
YH
1670 struct net_device *dev,
1671 unsigned int pref)
1da177e4 1672{
86872cb5
TG
1673 struct fib6_config cfg = {
1674 .fc_table = RT6_TABLE_DFLT,
238fc7ea 1675 .fc_metric = IP6_RT_PRIO_USER,
86872cb5
TG
1676 .fc_ifindex = dev->ifindex,
1677 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
1678 RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
1679 };
1da177e4 1680
86872cb5 1681 ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
1da177e4 1682
86872cb5 1683 ip6_route_add(&cfg);
1da177e4 1684
1da177e4
LT
1685 return rt6_get_dflt_router(gwaddr, dev);
1686}
1687
1688void rt6_purge_dflt_routers(void)
1689{
1690 struct rt6_info *rt;
c71099ac
TG
1691 struct fib6_table *table;
1692
1693 /* NOTE: Keep consistent with rt6_get_dflt_router */
58f09b78 1694 table = fib6_get_table(&init_net, RT6_TABLE_DFLT);
c71099ac
TG
1695 if (table == NULL)
1696 return;
1da177e4
LT
1697
1698restart:
c71099ac 1699 read_lock_bh(&table->tb6_lock);
7cc48263 1700 for (rt = table->tb6_root.leaf; rt; rt = rt->u.dst.rt6_next) {
1da177e4
LT
1701 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
1702 dst_hold(&rt->u.dst);
c71099ac 1703 read_unlock_bh(&table->tb6_lock);
e0a1ad73 1704 ip6_del_rt(rt);
1da177e4
LT
1705 goto restart;
1706 }
1707 }
c71099ac 1708 read_unlock_bh(&table->tb6_lock);
1da177e4
LT
1709}
1710
86872cb5
TG
1711static void rtmsg_to_fib6_config(struct in6_rtmsg *rtmsg,
1712 struct fib6_config *cfg)
1713{
1714 memset(cfg, 0, sizeof(*cfg));
1715
1716 cfg->fc_table = RT6_TABLE_MAIN;
1717 cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
1718 cfg->fc_metric = rtmsg->rtmsg_metric;
1719 cfg->fc_expires = rtmsg->rtmsg_info;
1720 cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
1721 cfg->fc_src_len = rtmsg->rtmsg_src_len;
1722 cfg->fc_flags = rtmsg->rtmsg_flags;
1723
f1243c2d
BT
1724 cfg->fc_nlinfo.nl_net = &init_net;
1725
86872cb5
TG
1726 ipv6_addr_copy(&cfg->fc_dst, &rtmsg->rtmsg_dst);
1727 ipv6_addr_copy(&cfg->fc_src, &rtmsg->rtmsg_src);
1728 ipv6_addr_copy(&cfg->fc_gateway, &rtmsg->rtmsg_gateway);
1729}
1730
1da177e4
LT
1731int ipv6_route_ioctl(unsigned int cmd, void __user *arg)
1732{
86872cb5 1733 struct fib6_config cfg;
1da177e4
LT
1734 struct in6_rtmsg rtmsg;
1735 int err;
1736
1737 switch(cmd) {
1738 case SIOCADDRT: /* Add a route */
1739 case SIOCDELRT: /* Delete a route */
1740 if (!capable(CAP_NET_ADMIN))
1741 return -EPERM;
1742 err = copy_from_user(&rtmsg, arg,
1743 sizeof(struct in6_rtmsg));
1744 if (err)
1745 return -EFAULT;
86872cb5
TG
1746
1747 rtmsg_to_fib6_config(&rtmsg, &cfg);
1748
1da177e4
LT
1749 rtnl_lock();
1750 switch (cmd) {
1751 case SIOCADDRT:
86872cb5 1752 err = ip6_route_add(&cfg);
1da177e4
LT
1753 break;
1754 case SIOCDELRT:
86872cb5 1755 err = ip6_route_del(&cfg);
1da177e4
LT
1756 break;
1757 default:
1758 err = -EINVAL;
1759 }
1760 rtnl_unlock();
1761
1762 return err;
3ff50b79 1763 }
1da177e4
LT
1764
1765 return -EINVAL;
1766}
1767
1768/*
1769 * Drop the packet on the floor
1770 */
1771
50eb431d 1772static int ip6_pkt_drop(struct sk_buff *skb, int code, int ipstats_mib_noroutes)
1da177e4 1773{
612f09e8
YH
1774 int type;
1775 switch (ipstats_mib_noroutes) {
1776 case IPSTATS_MIB_INNOROUTES:
0660e03f 1777 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
612f09e8
YH
1778 if (type == IPV6_ADDR_ANY || type == IPV6_ADDR_RESERVED) {
1779 IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_INADDRERRORS);
1780 break;
1781 }
1782 /* FALLTHROUGH */
1783 case IPSTATS_MIB_OUTNOROUTES:
1784 IP6_INC_STATS(ip6_dst_idev(skb->dst), ipstats_mib_noroutes);
1785 break;
1786 }
9ce8ade0 1787 icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0, skb->dev);
1da177e4
LT
1788 kfree_skb(skb);
1789 return 0;
1790}
1791
9ce8ade0
TG
1792static int ip6_pkt_discard(struct sk_buff *skb)
1793{
612f09e8 1794 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
9ce8ade0
TG
1795}
1796
20380731 1797static int ip6_pkt_discard_out(struct sk_buff *skb)
1da177e4
LT
1798{
1799 skb->dev = skb->dst->dev;
612f09e8 1800 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
1da177e4
LT
1801}
1802
6723ab54
DM
1803#ifdef CONFIG_IPV6_MULTIPLE_TABLES
1804
9ce8ade0
TG
1805static int ip6_pkt_prohibit(struct sk_buff *skb)
1806{
612f09e8 1807 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
9ce8ade0
TG
1808}
1809
1810static int ip6_pkt_prohibit_out(struct sk_buff *skb)
1811{
1812 skb->dev = skb->dst->dev;
612f09e8 1813 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
9ce8ade0
TG
1814}
1815
6723ab54
DM
1816#endif
1817
1da177e4
LT
1818/*
1819 * Allocate a dst for local (unicast / anycast) address.
1820 */
1821
1822struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
1823 const struct in6_addr *addr,
1824 int anycast)
1825{
1826 struct rt6_info *rt = ip6_dst_alloc();
1827
1828 if (rt == NULL)
1829 return ERR_PTR(-ENOMEM);
1830
2774c7ab 1831 dev_hold(init_net.loopback_dev);
1da177e4
LT
1832 in6_dev_hold(idev);
1833
1834 rt->u.dst.flags = DST_HOST;
1835 rt->u.dst.input = ip6_input;
1836 rt->u.dst.output = ip6_output;
2774c7ab 1837 rt->rt6i_dev = init_net.loopback_dev;
1da177e4
LT
1838 rt->rt6i_idev = idev;
1839 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
1840 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
1841 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
1842 rt->u.dst.obsolete = -1;
1843
1844 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
58c4fb86
YH
1845 if (anycast)
1846 rt->rt6i_flags |= RTF_ANYCAST;
1847 else
1da177e4
LT
1848 rt->rt6i_flags |= RTF_LOCAL;
1849 rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
1850 if (rt->rt6i_nexthop == NULL) {
40aa7b90 1851 dst_free(&rt->u.dst);
1da177e4
LT
1852 return ERR_PTR(-ENOMEM);
1853 }
1854
1855 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
1856 rt->rt6i_dst.plen = 128;
58f09b78 1857 rt->rt6i_table = fib6_get_table(&init_net, RT6_TABLE_LOCAL);
1da177e4
LT
1858
1859 atomic_set(&rt->u.dst.__refcnt, 1);
1860
1861 return rt;
1862}
1863
1864static int fib6_ifdown(struct rt6_info *rt, void *arg)
1865{
1866 if (((void*)rt->rt6i_dev == arg || arg == NULL) &&
1867 rt != &ip6_null_entry) {
1868 RT6_TRACE("deleted by ifdown %p\n", rt);
1869 return -1;
1870 }
1871 return 0;
1872}
1873
f3db4851 1874void rt6_ifdown(struct net *net, struct net_device *dev)
1da177e4 1875{
f3db4851 1876 fib6_clean_all(net, fib6_ifdown, 0, dev);
1da177e4
LT
1877}
1878
1879struct rt6_mtu_change_arg
1880{
1881 struct net_device *dev;
1882 unsigned mtu;
1883};
1884
1885static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
1886{
1887 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
1888 struct inet6_dev *idev;
1889
1890 /* In IPv6 pmtu discovery is not optional,
1891 so that RTAX_MTU lock cannot disable it.
1892 We still use this lock to block changes
1893 caused by addrconf/ndisc.
1894 */
1895
1896 idev = __in6_dev_get(arg->dev);
1897 if (idev == NULL)
1898 return 0;
1899
1900 /* For administrative MTU increase, there is no way to discover
1901 IPv6 PMTU increase, so PMTU increase should be updated here.
1902 Since RFC 1981 doesn't include administrative MTU increase
1903 update PMTU increase is a MUST. (i.e. jumbo frame)
1904 */
1905 /*
1906 If new MTU is less than route PMTU, this new MTU will be the
1907 lowest MTU in the path, update the route PMTU to reflect PMTU
1908 decreases; if new MTU is greater than route PMTU, and the
1909 old MTU is the lowest MTU in the path, update the route PMTU
1910 to reflect the increase. In this case if the other nodes' MTU
1911 also have the lowest MTU, TOO BIG MESSAGE will be lead to
1912 PMTU discouvery.
1913 */
1914 if (rt->rt6i_dev == arg->dev &&
1915 !dst_metric_locked(&rt->u.dst, RTAX_MTU) &&
23717795 1916 (dst_mtu(&rt->u.dst) >= arg->mtu ||
1ab1457c 1917 (dst_mtu(&rt->u.dst) < arg->mtu &&
566cfd8f 1918 dst_mtu(&rt->u.dst) == idev->cnf.mtu6))) {
1da177e4 1919 rt->u.dst.metrics[RTAX_MTU-1] = arg->mtu;
566cfd8f
SA
1920 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(arg->mtu);
1921 }
1da177e4
LT
1922 return 0;
1923}
1924
1925void rt6_mtu_change(struct net_device *dev, unsigned mtu)
1926{
c71099ac
TG
1927 struct rt6_mtu_change_arg arg = {
1928 .dev = dev,
1929 .mtu = mtu,
1930 };
1da177e4 1931
f3db4851 1932 fib6_clean_all(dev->nd_net, rt6_mtu_change_route, 0, &arg);
1da177e4
LT
1933}
1934
ef7c79ed 1935static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
5176f91e 1936 [RTA_GATEWAY] = { .len = sizeof(struct in6_addr) },
86872cb5 1937 [RTA_OIF] = { .type = NLA_U32 },
ab364a6f 1938 [RTA_IIF] = { .type = NLA_U32 },
86872cb5
TG
1939 [RTA_PRIORITY] = { .type = NLA_U32 },
1940 [RTA_METRICS] = { .type = NLA_NESTED },
1941};
1942
1943static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
1944 struct fib6_config *cfg)
1da177e4 1945{
86872cb5
TG
1946 struct rtmsg *rtm;
1947 struct nlattr *tb[RTA_MAX+1];
1948 int err;
1da177e4 1949
86872cb5
TG
1950 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
1951 if (err < 0)
1952 goto errout;
1da177e4 1953
86872cb5
TG
1954 err = -EINVAL;
1955 rtm = nlmsg_data(nlh);
1956 memset(cfg, 0, sizeof(*cfg));
1957
1958 cfg->fc_table = rtm->rtm_table;
1959 cfg->fc_dst_len = rtm->rtm_dst_len;
1960 cfg->fc_src_len = rtm->rtm_src_len;
1961 cfg->fc_flags = RTF_UP;
1962 cfg->fc_protocol = rtm->rtm_protocol;
1963
1964 if (rtm->rtm_type == RTN_UNREACHABLE)
1965 cfg->fc_flags |= RTF_REJECT;
1966
1967 cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
1968 cfg->fc_nlinfo.nlh = nlh;
2216b483 1969 cfg->fc_nlinfo.nl_net = skb->sk->sk_net;
86872cb5
TG
1970
1971 if (tb[RTA_GATEWAY]) {
1972 nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
1973 cfg->fc_flags |= RTF_GATEWAY;
1da177e4 1974 }
86872cb5
TG
1975
1976 if (tb[RTA_DST]) {
1977 int plen = (rtm->rtm_dst_len + 7) >> 3;
1978
1979 if (nla_len(tb[RTA_DST]) < plen)
1980 goto errout;
1981
1982 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
1da177e4 1983 }
86872cb5
TG
1984
1985 if (tb[RTA_SRC]) {
1986 int plen = (rtm->rtm_src_len + 7) >> 3;
1987
1988 if (nla_len(tb[RTA_SRC]) < plen)
1989 goto errout;
1990
1991 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
1da177e4 1992 }
86872cb5
TG
1993
1994 if (tb[RTA_OIF])
1995 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
1996
1997 if (tb[RTA_PRIORITY])
1998 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
1999
2000 if (tb[RTA_METRICS]) {
2001 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2002 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
1da177e4 2003 }
86872cb5
TG
2004
2005 if (tb[RTA_TABLE])
2006 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2007
2008 err = 0;
2009errout:
2010 return err;
1da177e4
LT
2011}
2012
c127ea2c 2013static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1da177e4 2014{
b854272b 2015 struct net *net = skb->sk->sk_net;
86872cb5
TG
2016 struct fib6_config cfg;
2017 int err;
1da177e4 2018
b854272b
DL
2019 if (net != &init_net)
2020 return -EINVAL;
2021
86872cb5
TG
2022 err = rtm_to_fib6_config(skb, nlh, &cfg);
2023 if (err < 0)
2024 return err;
2025
2026 return ip6_route_del(&cfg);
1da177e4
LT
2027}
2028
c127ea2c 2029static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1da177e4 2030{
b854272b 2031 struct net *net = skb->sk->sk_net;
86872cb5
TG
2032 struct fib6_config cfg;
2033 int err;
1da177e4 2034
b854272b
DL
2035 if (net != &init_net)
2036 return -EINVAL;
2037
86872cb5
TG
2038 err = rtm_to_fib6_config(skb, nlh, &cfg);
2039 if (err < 0)
2040 return err;
2041
2042 return ip6_route_add(&cfg);
1da177e4
LT
2043}
2044
339bf98f
TG
2045static inline size_t rt6_nlmsg_size(void)
2046{
2047 return NLMSG_ALIGN(sizeof(struct rtmsg))
2048 + nla_total_size(16) /* RTA_SRC */
2049 + nla_total_size(16) /* RTA_DST */
2050 + nla_total_size(16) /* RTA_GATEWAY */
2051 + nla_total_size(16) /* RTA_PREFSRC */
2052 + nla_total_size(4) /* RTA_TABLE */
2053 + nla_total_size(4) /* RTA_IIF */
2054 + nla_total_size(4) /* RTA_OIF */
2055 + nla_total_size(4) /* RTA_PRIORITY */
6a2b9ce0 2056 + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
339bf98f
TG
2057 + nla_total_size(sizeof(struct rta_cacheinfo));
2058}
2059
1da177e4 2060static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt,
0d51aa80
JHS
2061 struct in6_addr *dst, struct in6_addr *src,
2062 int iif, int type, u32 pid, u32 seq,
2063 int prefix, unsigned int flags)
1da177e4
LT
2064{
2065 struct rtmsg *rtm;
2d7202bf 2066 struct nlmsghdr *nlh;
e3703b3d 2067 long expires;
9e762a4a 2068 u32 table;
1da177e4
LT
2069
2070 if (prefix) { /* user wants prefix routes only */
2071 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
2072 /* success since this is not a prefix route */
2073 return 1;
2074 }
2075 }
2076
2d7202bf
TG
2077 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*rtm), flags);
2078 if (nlh == NULL)
26932566 2079 return -EMSGSIZE;
2d7202bf
TG
2080
2081 rtm = nlmsg_data(nlh);
1da177e4
LT
2082 rtm->rtm_family = AF_INET6;
2083 rtm->rtm_dst_len = rt->rt6i_dst.plen;
2084 rtm->rtm_src_len = rt->rt6i_src.plen;
2085 rtm->rtm_tos = 0;
c71099ac 2086 if (rt->rt6i_table)
9e762a4a 2087 table = rt->rt6i_table->tb6_id;
c71099ac 2088 else
9e762a4a
PM
2089 table = RT6_TABLE_UNSPEC;
2090 rtm->rtm_table = table;
2d7202bf 2091 NLA_PUT_U32(skb, RTA_TABLE, table);
1da177e4
LT
2092 if (rt->rt6i_flags&RTF_REJECT)
2093 rtm->rtm_type = RTN_UNREACHABLE;
2094 else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK))
2095 rtm->rtm_type = RTN_LOCAL;
2096 else
2097 rtm->rtm_type = RTN_UNICAST;
2098 rtm->rtm_flags = 0;
2099 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2100 rtm->rtm_protocol = rt->rt6i_protocol;
2101 if (rt->rt6i_flags&RTF_DYNAMIC)
2102 rtm->rtm_protocol = RTPROT_REDIRECT;
2103 else if (rt->rt6i_flags & RTF_ADDRCONF)
2104 rtm->rtm_protocol = RTPROT_KERNEL;
2105 else if (rt->rt6i_flags&RTF_DEFAULT)
2106 rtm->rtm_protocol = RTPROT_RA;
2107
2108 if (rt->rt6i_flags&RTF_CACHE)
2109 rtm->rtm_flags |= RTM_F_CLONED;
2110
2111 if (dst) {
2d7202bf 2112 NLA_PUT(skb, RTA_DST, 16, dst);
1ab1457c 2113 rtm->rtm_dst_len = 128;
1da177e4 2114 } else if (rtm->rtm_dst_len)
2d7202bf 2115 NLA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr);
1da177e4
LT
2116#ifdef CONFIG_IPV6_SUBTREES
2117 if (src) {
2d7202bf 2118 NLA_PUT(skb, RTA_SRC, 16, src);
1ab1457c 2119 rtm->rtm_src_len = 128;
1da177e4 2120 } else if (rtm->rtm_src_len)
2d7202bf 2121 NLA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr);
1da177e4
LT
2122#endif
2123 if (iif)
2d7202bf 2124 NLA_PUT_U32(skb, RTA_IIF, iif);
1da177e4
LT
2125 else if (dst) {
2126 struct in6_addr saddr_buf;
5e5f3f0f
YH
2127 if (ipv6_dev_get_saddr(ip6_dst_idev(&rt->u.dst)->dev,
2128 dst, &saddr_buf) == 0)
2d7202bf 2129 NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
1da177e4 2130 }
2d7202bf 2131
1da177e4 2132 if (rtnetlink_put_metrics(skb, rt->u.dst.metrics) < 0)
2d7202bf
TG
2133 goto nla_put_failure;
2134
1da177e4 2135 if (rt->u.dst.neighbour)
2d7202bf
TG
2136 NLA_PUT(skb, RTA_GATEWAY, 16, &rt->u.dst.neighbour->primary_key);
2137
1da177e4 2138 if (rt->u.dst.dev)
2d7202bf
TG
2139 NLA_PUT_U32(skb, RTA_OIF, rt->rt6i_dev->ifindex);
2140
2141 NLA_PUT_U32(skb, RTA_PRIORITY, rt->rt6i_metric);
e3703b3d
TG
2142
2143 expires = rt->rt6i_expires ? rt->rt6i_expires - jiffies : 0;
2144 if (rtnl_put_cacheinfo(skb, &rt->u.dst, 0, 0, 0,
2145 expires, rt->u.dst.error) < 0)
2146 goto nla_put_failure;
2d7202bf
TG
2147
2148 return nlmsg_end(skb, nlh);
2149
2150nla_put_failure:
26932566
PM
2151 nlmsg_cancel(skb, nlh);
2152 return -EMSGSIZE;
1da177e4
LT
2153}
2154
1b43af54 2155int rt6_dump_route(struct rt6_info *rt, void *p_arg)
1da177e4
LT
2156{
2157 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
2158 int prefix;
2159
2d7202bf
TG
2160 if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
2161 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
1da177e4
LT
2162 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
2163 } else
2164 prefix = 0;
2165
2166 return rt6_fill_node(arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
2167 NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
0d51aa80 2168 prefix, NLM_F_MULTI);
1da177e4
LT
2169}
2170
c127ea2c 2171static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
1da177e4 2172{
b854272b 2173 struct net *net = in_skb->sk->sk_net;
ab364a6f
TG
2174 struct nlattr *tb[RTA_MAX+1];
2175 struct rt6_info *rt;
1da177e4 2176 struct sk_buff *skb;
ab364a6f 2177 struct rtmsg *rtm;
1da177e4 2178 struct flowi fl;
ab364a6f 2179 int err, iif = 0;
1da177e4 2180
b854272b
DL
2181 if (net != &init_net)
2182 return -EINVAL;
2183
ab364a6f
TG
2184 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2185 if (err < 0)
2186 goto errout;
1da177e4 2187
ab364a6f 2188 err = -EINVAL;
1da177e4 2189 memset(&fl, 0, sizeof(fl));
1da177e4 2190
ab364a6f
TG
2191 if (tb[RTA_SRC]) {
2192 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2193 goto errout;
2194
2195 ipv6_addr_copy(&fl.fl6_src, nla_data(tb[RTA_SRC]));
2196 }
2197
2198 if (tb[RTA_DST]) {
2199 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2200 goto errout;
2201
2202 ipv6_addr_copy(&fl.fl6_dst, nla_data(tb[RTA_DST]));
2203 }
2204
2205 if (tb[RTA_IIF])
2206 iif = nla_get_u32(tb[RTA_IIF]);
2207
2208 if (tb[RTA_OIF])
2209 fl.oif = nla_get_u32(tb[RTA_OIF]);
1da177e4
LT
2210
2211 if (iif) {
2212 struct net_device *dev;
881d966b 2213 dev = __dev_get_by_index(&init_net, iif);
1da177e4
LT
2214 if (!dev) {
2215 err = -ENODEV;
ab364a6f 2216 goto errout;
1da177e4
LT
2217 }
2218 }
2219
ab364a6f
TG
2220 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
2221 if (skb == NULL) {
2222 err = -ENOBUFS;
2223 goto errout;
2224 }
1da177e4 2225
ab364a6f
TG
2226 /* Reserve room for dummy headers, this skb can pass
2227 through good chunk of routing engine.
2228 */
459a98ed 2229 skb_reset_mac_header(skb);
ab364a6f 2230 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
1da177e4 2231
ab364a6f 2232 rt = (struct rt6_info*) ip6_route_output(NULL, &fl);
1da177e4
LT
2233 skb->dst = &rt->u.dst;
2234
ab364a6f 2235 err = rt6_fill_node(skb, rt, &fl.fl6_dst, &fl.fl6_src, iif,
1da177e4 2236 RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
0d51aa80 2237 nlh->nlmsg_seq, 0, 0);
1da177e4 2238 if (err < 0) {
ab364a6f
TG
2239 kfree_skb(skb);
2240 goto errout;
1da177e4
LT
2241 }
2242
97c53cac 2243 err = rtnl_unicast(skb, &init_net, NETLINK_CB(in_skb).pid);
ab364a6f 2244errout:
1da177e4 2245 return err;
1da177e4
LT
2246}
2247
86872cb5 2248void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
1da177e4
LT
2249{
2250 struct sk_buff *skb;
528c4ceb
DL
2251 u32 seq;
2252 int err;
2253
2254 err = -ENOBUFS;
2255 seq = info->nlh != NULL ? info->nlh->nlmsg_seq : 0;
86872cb5 2256
339bf98f 2257 skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
21713ebc
TG
2258 if (skb == NULL)
2259 goto errout;
2260
528c4ceb
DL
2261 err = rt6_fill_node(skb, rt, NULL, NULL, 0,
2262 event, info->pid, seq, 0, 0);
26932566
PM
2263 if (err < 0) {
2264 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
2265 WARN_ON(err == -EMSGSIZE);
2266 kfree_skb(skb);
2267 goto errout;
2268 }
528c4ceb
DL
2269 err = rtnl_notify(skb, &init_net, info->pid,
2270 RTNLGRP_IPV6_ROUTE, info->nlh, gfp_any());
21713ebc
TG
2271errout:
2272 if (err < 0)
97c53cac 2273 rtnl_set_sk_err(&init_net, RTNLGRP_IPV6_ROUTE, err);
1da177e4
LT
2274}
2275
2276/*
2277 * /proc
2278 */
2279
2280#ifdef CONFIG_PROC_FS
2281
2282#define RT6_INFO_LEN (32 + 4 + 32 + 4 + 32 + 40 + 5 + 1)
2283
2284struct rt6_proc_arg
2285{
2286 char *buffer;
2287 int offset;
2288 int length;
2289 int skip;
2290 int len;
2291};
2292
2293static int rt6_info_route(struct rt6_info *rt, void *p_arg)
2294{
33120b30 2295 struct seq_file *m = p_arg;
1da177e4 2296
33120b30
AD
2297 seq_printf(m, NIP6_SEQFMT " %02x ", NIP6(rt->rt6i_dst.addr),
2298 rt->rt6i_dst.plen);
1da177e4
LT
2299
2300#ifdef CONFIG_IPV6_SUBTREES
33120b30
AD
2301 seq_printf(m, NIP6_SEQFMT " %02x ", NIP6(rt->rt6i_src.addr),
2302 rt->rt6i_src.plen);
1da177e4 2303#else
33120b30 2304 seq_puts(m, "00000000000000000000000000000000 00 ");
1da177e4
LT
2305#endif
2306
2307 if (rt->rt6i_nexthop) {
33120b30
AD
2308 seq_printf(m, NIP6_SEQFMT,
2309 NIP6(*((struct in6_addr *)rt->rt6i_nexthop->primary_key)));
1da177e4 2310 } else {
33120b30 2311 seq_puts(m, "00000000000000000000000000000000");
1da177e4 2312 }
33120b30
AD
2313 seq_printf(m, " %08x %08x %08x %08x %8s\n",
2314 rt->rt6i_metric, atomic_read(&rt->u.dst.__refcnt),
2315 rt->u.dst.__use, rt->rt6i_flags,
2316 rt->rt6i_dev ? rt->rt6i_dev->name : "");
1da177e4
LT
2317 return 0;
2318}
2319
33120b30 2320static int ipv6_route_show(struct seq_file *m, void *v)
1da177e4 2321{
f3db4851
DL
2322 struct net *net = (struct net *)m->private;
2323 fib6_clean_all(net, rt6_info_route, 0, m);
33120b30
AD
2324 return 0;
2325}
1da177e4 2326
33120b30
AD
2327static int ipv6_route_open(struct inode *inode, struct file *file)
2328{
f3db4851
DL
2329 struct net *net = get_proc_net(inode);
2330 if (!net)
2331 return -ENXIO;
2332 return single_open(file, ipv6_route_show, net);
2333}
2334
2335static int ipv6_route_release(struct inode *inode, struct file *file)
2336{
2337 struct seq_file *seq = file->private_data;
2338 struct net *net = seq->private;
2339 put_net(net);
2340 return single_release(inode, file);
1da177e4
LT
2341}
2342
33120b30
AD
2343static const struct file_operations ipv6_route_proc_fops = {
2344 .owner = THIS_MODULE,
2345 .open = ipv6_route_open,
2346 .read = seq_read,
2347 .llseek = seq_lseek,
f3db4851 2348 .release = ipv6_route_release,
33120b30
AD
2349};
2350
1da177e4
LT
2351static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2352{
2353 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
2354 rt6_stats.fib_nodes, rt6_stats.fib_route_nodes,
2355 rt6_stats.fib_rt_alloc, rt6_stats.fib_rt_entries,
2356 rt6_stats.fib_rt_cache,
2357 atomic_read(&ip6_dst_ops.entries),
2358 rt6_stats.fib_discarded_routes);
2359
2360 return 0;
2361}
2362
2363static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2364{
2365 return single_open(file, rt6_stats_seq_show, NULL);
2366}
2367
9a32144e 2368static const struct file_operations rt6_stats_seq_fops = {
1da177e4
LT
2369 .owner = THIS_MODULE,
2370 .open = rt6_stats_seq_open,
2371 .read = seq_read,
2372 .llseek = seq_lseek,
2373 .release = single_release,
2374};
75314fb3
DL
2375
2376static int ipv6_route_proc_init(struct net *net)
2377{
2378 int ret = -ENOMEM;
2379 if (!proc_net_fops_create(net, "ipv6_route",
2380 0, &ipv6_route_proc_fops))
2381 goto out;
2382
2383 if (!proc_net_fops_create(net, "rt6_stats",
2384 S_IRUGO, &rt6_stats_seq_fops))
2385 goto out_ipv6_route;
2386
2387 ret = 0;
2388out:
2389 return ret;
2390out_ipv6_route:
2391 proc_net_remove(net, "ipv6_route");
2392 goto out;
2393}
2394
2395static void ipv6_route_proc_fini(struct net *net)
2396{
2397 proc_net_remove(net, "ipv6_route");
2398 proc_net_remove(net, "rt6_stats");
2399}
2400#else
2401static inline int ipv6_route_proc_init(struct net *net)
2402{
2403 return 0;
2404}
2405static inline void ipv6_route_proc_fini(struct net *net)
2406{
2407 return ;
2408}
1da177e4
LT
2409#endif /* CONFIG_PROC_FS */
2410
2411#ifdef CONFIG_SYSCTL
2412
1da177e4
LT
2413static
2414int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write, struct file * filp,
2415 void __user *buffer, size_t *lenp, loff_t *ppos)
2416{
5b7c931d
DL
2417 struct net *net = current->nsproxy->net_ns;
2418 int delay = net->ipv6.sysctl.flush_delay;
1da177e4
LT
2419 if (write) {
2420 proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
5b7c931d 2421 fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay, net);
1da177e4
LT
2422 return 0;
2423 } else
2424 return -EINVAL;
2425}
2426
760f2d01 2427ctl_table ipv6_route_table_template[] = {
1ab1457c 2428 {
1da177e4 2429 .procname = "flush",
4990509f 2430 .data = &init_net.ipv6.sysctl.flush_delay,
1da177e4 2431 .maxlen = sizeof(int),
89c8b3a1 2432 .mode = 0200,
1ab1457c 2433 .proc_handler = &ipv6_sysctl_rtcache_flush
1da177e4
LT
2434 },
2435 {
2436 .ctl_name = NET_IPV6_ROUTE_GC_THRESH,
2437 .procname = "gc_thresh",
1ab1457c 2438 .data = &ip6_dst_ops.gc_thresh,
1da177e4
LT
2439 .maxlen = sizeof(int),
2440 .mode = 0644,
1ab1457c 2441 .proc_handler = &proc_dointvec,
1da177e4
LT
2442 },
2443 {
2444 .ctl_name = NET_IPV6_ROUTE_MAX_SIZE,
2445 .procname = "max_size",
4990509f 2446 .data = &init_net.ipv6.sysctl.ip6_rt_max_size,
1da177e4
LT
2447 .maxlen = sizeof(int),
2448 .mode = 0644,
1ab1457c 2449 .proc_handler = &proc_dointvec,
1da177e4
LT
2450 },
2451 {
2452 .ctl_name = NET_IPV6_ROUTE_GC_MIN_INTERVAL,
2453 .procname = "gc_min_interval",
4990509f 2454 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
1da177e4
LT
2455 .maxlen = sizeof(int),
2456 .mode = 0644,
1ab1457c 2457 .proc_handler = &proc_dointvec_jiffies,
1da177e4
LT
2458 .strategy = &sysctl_jiffies,
2459 },
2460 {
2461 .ctl_name = NET_IPV6_ROUTE_GC_TIMEOUT,
2462 .procname = "gc_timeout",
4990509f 2463 .data = &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
1da177e4
LT
2464 .maxlen = sizeof(int),
2465 .mode = 0644,
1ab1457c 2466 .proc_handler = &proc_dointvec_jiffies,
1da177e4
LT
2467 .strategy = &sysctl_jiffies,
2468 },
2469 {
2470 .ctl_name = NET_IPV6_ROUTE_GC_INTERVAL,
2471 .procname = "gc_interval",
4990509f 2472 .data = &init_net.ipv6.sysctl.ip6_rt_gc_interval,
1da177e4
LT
2473 .maxlen = sizeof(int),
2474 .mode = 0644,
1ab1457c 2475 .proc_handler = &proc_dointvec_jiffies,
1da177e4
LT
2476 .strategy = &sysctl_jiffies,
2477 },
2478 {
2479 .ctl_name = NET_IPV6_ROUTE_GC_ELASTICITY,
2480 .procname = "gc_elasticity",
4990509f 2481 .data = &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
1da177e4
LT
2482 .maxlen = sizeof(int),
2483 .mode = 0644,
1ab1457c 2484 .proc_handler = &proc_dointvec_jiffies,
1da177e4
LT
2485 .strategy = &sysctl_jiffies,
2486 },
2487 {
2488 .ctl_name = NET_IPV6_ROUTE_MTU_EXPIRES,
2489 .procname = "mtu_expires",
4990509f 2490 .data = &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
1da177e4
LT
2491 .maxlen = sizeof(int),
2492 .mode = 0644,
1ab1457c 2493 .proc_handler = &proc_dointvec_jiffies,
1da177e4
LT
2494 .strategy = &sysctl_jiffies,
2495 },
2496 {
2497 .ctl_name = NET_IPV6_ROUTE_MIN_ADVMSS,
2498 .procname = "min_adv_mss",
4990509f 2499 .data = &init_net.ipv6.sysctl.ip6_rt_min_advmss,
1da177e4
LT
2500 .maxlen = sizeof(int),
2501 .mode = 0644,
1ab1457c 2502 .proc_handler = &proc_dointvec_jiffies,
1da177e4
LT
2503 .strategy = &sysctl_jiffies,
2504 },
2505 {
2506 .ctl_name = NET_IPV6_ROUTE_GC_MIN_INTERVAL_MS,
2507 .procname = "gc_min_interval_ms",
4990509f 2508 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
1da177e4
LT
2509 .maxlen = sizeof(int),
2510 .mode = 0644,
1ab1457c 2511 .proc_handler = &proc_dointvec_ms_jiffies,
1da177e4
LT
2512 .strategy = &sysctl_ms_jiffies,
2513 },
2514 { .ctl_name = 0 }
2515};
2516
760f2d01
DL
2517struct ctl_table *ipv6_route_sysctl_init(struct net *net)
2518{
2519 struct ctl_table *table;
2520
2521 table = kmemdup(ipv6_route_table_template,
2522 sizeof(ipv6_route_table_template),
2523 GFP_KERNEL);
5ee09105
YH
2524
2525 if (table) {
2526 table[0].data = &net->ipv6.sysctl.flush_delay;
2527 /* table[1].data will be handled when we have
2528 routes per namespace */
2529 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
2530 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2531 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
2532 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
2533 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
2534 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
2535 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
2536 }
2537
760f2d01
DL
2538 return table;
2539}
1da177e4
LT
2540#endif
2541
433d49c3 2542int __init ip6_route_init(void)
1da177e4 2543{
433d49c3
DL
2544 int ret;
2545
e5d679f3
AD
2546 ip6_dst_ops.kmem_cachep =
2547 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
f845ab6b
DL
2548 SLAB_HWCACHE_ALIGN, NULL);
2549 if (!ip6_dst_ops.kmem_cachep)
2550 return -ENOMEM;
2551
14e50e57
DM
2552 ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops.kmem_cachep;
2553
433d49c3
DL
2554 ret = fib6_init();
2555 if (ret)
2556 goto out_kmem_cache;
2557
75314fb3
DL
2558 ret = ipv6_route_proc_init(&init_net);
2559 if (ret)
433d49c3
DL
2560 goto out_fib6_init;
2561
433d49c3
DL
2562 ret = xfrm6_init();
2563 if (ret)
75314fb3 2564 goto out_proc_init;
c35b7e72 2565
433d49c3
DL
2566 ret = fib6_rules_init();
2567 if (ret)
2568 goto xfrm6_init;
7e5449c2 2569
433d49c3
DL
2570 ret = -ENOBUFS;
2571 if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL) ||
2572 __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL) ||
2573 __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL))
2574 goto fib6_rules_init;
c127ea2c 2575
433d49c3
DL
2576 ret = 0;
2577out:
2578 return ret;
2579
2580fib6_rules_init:
433d49c3
DL
2581 fib6_rules_cleanup();
2582xfrm6_init:
433d49c3 2583 xfrm6_fini();
75314fb3
DL
2584out_proc_init:
2585 ipv6_route_proc_fini(&init_net);
433d49c3 2586out_fib6_init:
f3db4851 2587 rt6_ifdown(&init_net, NULL);
433d49c3
DL
2588 fib6_gc_cleanup();
2589out_kmem_cache:
2590 kmem_cache_destroy(ip6_dst_ops.kmem_cachep);
2591 goto out;
1da177e4
LT
2592}
2593
2594void ip6_route_cleanup(void)
2595{
101367c2 2596 fib6_rules_cleanup();
75314fb3 2597 ipv6_route_proc_fini(&init_net);
1da177e4 2598 xfrm6_fini();
f3db4851 2599 rt6_ifdown(&init_net, NULL);
1da177e4
LT
2600 fib6_gc_cleanup();
2601 kmem_cache_destroy(ip6_dst_ops.kmem_cachep);
2602}