]> bbs.cooldavid.org Git - net-next-2.6.git/blame - net/ipv6/route.c
[IPV6]: Make af_inet6 to check ip6_route_init return value.
[net-next-2.6.git] / net / ipv6 / route.c
CommitLineData
1da177e4
LT
1/*
2 * Linux INET6 implementation
3 * FIB front-end.
4 *
5 * Authors:
1ab1457c 6 * Pedro Roque <roque@di.fc.ul.pt>
1da177e4
LT
7 *
8 * $Id: route.c,v 1.56 2001/10/31 21:55:55 davem Exp $
9 *
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License
12 * as published by the Free Software Foundation; either version
13 * 2 of the License, or (at your option) any later version.
14 */
15
16/* Changes:
17 *
18 * YOSHIFUJI Hideaki @USAGI
19 * reworked default router selection.
20 * - respect outgoing interface
21 * - select from (probably) reachable routers (i.e.
22 * routers in REACHABLE, STALE, DELAY or PROBE states).
23 * - always select the same router if it is (probably)
24 * reachable. otherwise, round-robin the list.
c0bece9f
YH
25 * Ville Nuorvala
26 * Fixed routing subtrees.
1da177e4
LT
27 */
28
4fc268d2 29#include <linux/capability.h>
1da177e4
LT
30#include <linux/errno.h>
31#include <linux/types.h>
32#include <linux/times.h>
33#include <linux/socket.h>
34#include <linux/sockios.h>
35#include <linux/net.h>
36#include <linux/route.h>
37#include <linux/netdevice.h>
38#include <linux/in6.h>
39#include <linux/init.h>
1da177e4 40#include <linux/if_arp.h>
1da177e4
LT
41#include <linux/proc_fs.h>
42#include <linux/seq_file.h>
457c4cbc 43#include <net/net_namespace.h>
1da177e4
LT
44#include <net/snmp.h>
45#include <net/ipv6.h>
46#include <net/ip6_fib.h>
47#include <net/ip6_route.h>
48#include <net/ndisc.h>
49#include <net/addrconf.h>
50#include <net/tcp.h>
51#include <linux/rtnetlink.h>
52#include <net/dst.h>
53#include <net/xfrm.h>
8d71740c 54#include <net/netevent.h>
21713ebc 55#include <net/netlink.h>
1da177e4
LT
56
57#include <asm/uaccess.h>
58
59#ifdef CONFIG_SYSCTL
60#include <linux/sysctl.h>
61#endif
62
63/* Set to 3 to get tracing. */
64#define RT6_DEBUG 2
65
66#if RT6_DEBUG >= 3
67#define RDBG(x) printk x
68#define RT6_TRACE(x...) printk(KERN_DEBUG x)
69#else
70#define RDBG(x)
71#define RT6_TRACE(x...) do { ; } while (0)
72#endif
73
519fbd87 74#define CLONE_OFFLINK_ROUTE 0
1da177e4
LT
75
76static int ip6_rt_max_size = 4096;
77static int ip6_rt_gc_min_interval = HZ / 2;
78static int ip6_rt_gc_timeout = 60*HZ;
79int ip6_rt_gc_interval = 30*HZ;
80static int ip6_rt_gc_elasticity = 9;
81static int ip6_rt_mtu_expires = 10*60*HZ;
82static int ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
83
84static struct rt6_info * ip6_rt_copy(struct rt6_info *ort);
85static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
86static struct dst_entry *ip6_negative_advice(struct dst_entry *);
87static void ip6_dst_destroy(struct dst_entry *);
88static void ip6_dst_ifdown(struct dst_entry *,
89 struct net_device *dev, int how);
90static int ip6_dst_gc(void);
91
92static int ip6_pkt_discard(struct sk_buff *skb);
93static int ip6_pkt_discard_out(struct sk_buff *skb);
94static void ip6_link_failure(struct sk_buff *skb);
95static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
96
70ceb4f5
YH
97#ifdef CONFIG_IPV6_ROUTE_INFO
98static struct rt6_info *rt6_add_route_info(struct in6_addr *prefix, int prefixlen,
99 struct in6_addr *gwaddr, int ifindex,
100 unsigned pref);
101static struct rt6_info *rt6_get_route_info(struct in6_addr *prefix, int prefixlen,
102 struct in6_addr *gwaddr, int ifindex);
103#endif
104
1da177e4
LT
105static struct dst_ops ip6_dst_ops = {
106 .family = AF_INET6,
107 .protocol = __constant_htons(ETH_P_IPV6),
108 .gc = ip6_dst_gc,
109 .gc_thresh = 1024,
110 .check = ip6_dst_check,
111 .destroy = ip6_dst_destroy,
112 .ifdown = ip6_dst_ifdown,
113 .negative_advice = ip6_negative_advice,
114 .link_failure = ip6_link_failure,
115 .update_pmtu = ip6_rt_update_pmtu,
862b82c6 116 .local_out = ip6_local_out,
1da177e4
LT
117 .entry_size = sizeof(struct rt6_info),
118};
119
14e50e57
DM
120static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu)
121{
122}
123
124static struct dst_ops ip6_dst_blackhole_ops = {
125 .family = AF_INET6,
126 .protocol = __constant_htons(ETH_P_IPV6),
127 .destroy = ip6_dst_destroy,
128 .check = ip6_dst_check,
129 .update_pmtu = ip6_rt_blackhole_update_pmtu,
130 .entry_size = sizeof(struct rt6_info),
131};
132
1da177e4
LT
133struct rt6_info ip6_null_entry = {
134 .u = {
135 .dst = {
136 .__refcnt = ATOMIC_INIT(1),
137 .__use = 1,
1da177e4
LT
138 .obsolete = -1,
139 .error = -ENETUNREACH,
140 .metrics = { [RTAX_HOPLIMIT - 1] = 255, },
141 .input = ip6_pkt_discard,
142 .output = ip6_pkt_discard_out,
143 .ops = &ip6_dst_ops,
144 .path = (struct dst_entry*)&ip6_null_entry,
145 }
146 },
147 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
148 .rt6i_metric = ~(u32) 0,
149 .rt6i_ref = ATOMIC_INIT(1),
150};
151
101367c2
TG
152#ifdef CONFIG_IPV6_MULTIPLE_TABLES
153
6723ab54
DM
154static int ip6_pkt_prohibit(struct sk_buff *skb);
155static int ip6_pkt_prohibit_out(struct sk_buff *skb);
6723ab54 156
101367c2
TG
157struct rt6_info ip6_prohibit_entry = {
158 .u = {
159 .dst = {
160 .__refcnt = ATOMIC_INIT(1),
161 .__use = 1,
101367c2
TG
162 .obsolete = -1,
163 .error = -EACCES,
164 .metrics = { [RTAX_HOPLIMIT - 1] = 255, },
9ce8ade0
TG
165 .input = ip6_pkt_prohibit,
166 .output = ip6_pkt_prohibit_out,
101367c2
TG
167 .ops = &ip6_dst_ops,
168 .path = (struct dst_entry*)&ip6_prohibit_entry,
169 }
170 },
171 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
172 .rt6i_metric = ~(u32) 0,
173 .rt6i_ref = ATOMIC_INIT(1),
174};
175
176struct rt6_info ip6_blk_hole_entry = {
177 .u = {
178 .dst = {
179 .__refcnt = ATOMIC_INIT(1),
180 .__use = 1,
101367c2
TG
181 .obsolete = -1,
182 .error = -EINVAL,
183 .metrics = { [RTAX_HOPLIMIT - 1] = 255, },
352e512c
HX
184 .input = dst_discard,
185 .output = dst_discard,
101367c2
TG
186 .ops = &ip6_dst_ops,
187 .path = (struct dst_entry*)&ip6_blk_hole_entry,
188 }
189 },
190 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
191 .rt6i_metric = ~(u32) 0,
192 .rt6i_ref = ATOMIC_INIT(1),
193};
194
195#endif
196
1da177e4
LT
197/* allocate dst with ip6_dst_ops */
198static __inline__ struct rt6_info *ip6_dst_alloc(void)
199{
200 return (struct rt6_info *)dst_alloc(&ip6_dst_ops);
201}
202
203static void ip6_dst_destroy(struct dst_entry *dst)
204{
205 struct rt6_info *rt = (struct rt6_info *)dst;
206 struct inet6_dev *idev = rt->rt6i_idev;
207
208 if (idev != NULL) {
209 rt->rt6i_idev = NULL;
210 in6_dev_put(idev);
1ab1457c 211 }
1da177e4
LT
212}
213
214static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
215 int how)
216{
217 struct rt6_info *rt = (struct rt6_info *)dst;
218 struct inet6_dev *idev = rt->rt6i_idev;
5a3e55d6
DL
219 struct net_device *loopback_dev =
220 dev->nd_net->loopback_dev;
1da177e4 221
5a3e55d6
DL
222 if (dev != loopback_dev && idev != NULL && idev->dev == dev) {
223 struct inet6_dev *loopback_idev =
224 in6_dev_get(loopback_dev);
1da177e4
LT
225 if (loopback_idev != NULL) {
226 rt->rt6i_idev = loopback_idev;
227 in6_dev_put(idev);
228 }
229 }
230}
231
232static __inline__ int rt6_check_expired(const struct rt6_info *rt)
233{
234 return (rt->rt6i_flags & RTF_EXPIRES &&
235 time_after(jiffies, rt->rt6i_expires));
236}
237
c71099ac
TG
238static inline int rt6_need_strict(struct in6_addr *daddr)
239{
240 return (ipv6_addr_type(daddr) &
241 (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL));
242}
243
1da177e4 244/*
c71099ac 245 * Route lookup. Any table->tb6_lock is implied.
1da177e4
LT
246 */
247
248static __inline__ struct rt6_info *rt6_device_match(struct rt6_info *rt,
249 int oif,
250 int strict)
251{
252 struct rt6_info *local = NULL;
253 struct rt6_info *sprt;
254
255 if (oif) {
7cc48263 256 for (sprt = rt; sprt; sprt = sprt->u.dst.rt6_next) {
1da177e4
LT
257 struct net_device *dev = sprt->rt6i_dev;
258 if (dev->ifindex == oif)
259 return sprt;
260 if (dev->flags & IFF_LOOPBACK) {
261 if (sprt->rt6i_idev == NULL ||
262 sprt->rt6i_idev->dev->ifindex != oif) {
263 if (strict && oif)
264 continue;
1ab1457c 265 if (local && (!oif ||
1da177e4
LT
266 local->rt6i_idev->dev->ifindex == oif))
267 continue;
268 }
269 local = sprt;
270 }
271 }
272
273 if (local)
274 return local;
275
276 if (strict)
277 return &ip6_null_entry;
278 }
279 return rt;
280}
281
27097255
YH
282#ifdef CONFIG_IPV6_ROUTER_PREF
283static void rt6_probe(struct rt6_info *rt)
284{
285 struct neighbour *neigh = rt ? rt->rt6i_nexthop : NULL;
286 /*
287 * Okay, this does not seem to be appropriate
288 * for now, however, we need to check if it
289 * is really so; aka Router Reachability Probing.
290 *
291 * Router Reachability Probe MUST be rate-limited
292 * to no more than one per minute.
293 */
294 if (!neigh || (neigh->nud_state & NUD_VALID))
295 return;
296 read_lock_bh(&neigh->lock);
297 if (!(neigh->nud_state & NUD_VALID) &&
52e16356 298 time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
27097255
YH
299 struct in6_addr mcaddr;
300 struct in6_addr *target;
301
302 neigh->updated = jiffies;
303 read_unlock_bh(&neigh->lock);
304
305 target = (struct in6_addr *)&neigh->primary_key;
306 addrconf_addr_solict_mult(target, &mcaddr);
307 ndisc_send_ns(rt->rt6i_dev, NULL, target, &mcaddr, NULL);
308 } else
309 read_unlock_bh(&neigh->lock);
310}
311#else
312static inline void rt6_probe(struct rt6_info *rt)
313{
314 return;
315}
316#endif
317
1da177e4 318/*
554cfb7e 319 * Default Router Selection (RFC 2461 6.3.6)
1da177e4 320 */
b6f99a21 321static inline int rt6_check_dev(struct rt6_info *rt, int oif)
554cfb7e
YH
322{
323 struct net_device *dev = rt->rt6i_dev;
161980f4 324 if (!oif || dev->ifindex == oif)
554cfb7e 325 return 2;
161980f4
DM
326 if ((dev->flags & IFF_LOOPBACK) &&
327 rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
328 return 1;
329 return 0;
554cfb7e 330}
1da177e4 331
b6f99a21 332static inline int rt6_check_neigh(struct rt6_info *rt)
1da177e4 333{
554cfb7e 334 struct neighbour *neigh = rt->rt6i_nexthop;
398bcbeb 335 int m;
4d0c5911
YH
336 if (rt->rt6i_flags & RTF_NONEXTHOP ||
337 !(rt->rt6i_flags & RTF_GATEWAY))
338 m = 1;
339 else if (neigh) {
554cfb7e
YH
340 read_lock_bh(&neigh->lock);
341 if (neigh->nud_state & NUD_VALID)
4d0c5911 342 m = 2;
398bcbeb
YH
343#ifdef CONFIG_IPV6_ROUTER_PREF
344 else if (neigh->nud_state & NUD_FAILED)
345 m = 0;
346#endif
347 else
ea73ee23 348 m = 1;
554cfb7e 349 read_unlock_bh(&neigh->lock);
398bcbeb
YH
350 } else
351 m = 0;
554cfb7e 352 return m;
1da177e4
LT
353}
354
554cfb7e
YH
355static int rt6_score_route(struct rt6_info *rt, int oif,
356 int strict)
1da177e4 357{
4d0c5911 358 int m, n;
1ab1457c 359
4d0c5911 360 m = rt6_check_dev(rt, oif);
77d16f45 361 if (!m && (strict & RT6_LOOKUP_F_IFACE))
554cfb7e 362 return -1;
ebacaaa0
YH
363#ifdef CONFIG_IPV6_ROUTER_PREF
364 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
365#endif
4d0c5911 366 n = rt6_check_neigh(rt);
557e92ef 367 if (!n && (strict & RT6_LOOKUP_F_REACHABLE))
554cfb7e
YH
368 return -1;
369 return m;
370}
371
f11e6659
DM
372static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
373 int *mpri, struct rt6_info *match)
554cfb7e 374{
f11e6659
DM
375 int m;
376
377 if (rt6_check_expired(rt))
378 goto out;
379
380 m = rt6_score_route(rt, oif, strict);
381 if (m < 0)
382 goto out;
383
384 if (m > *mpri) {
385 if (strict & RT6_LOOKUP_F_REACHABLE)
386 rt6_probe(match);
387 *mpri = m;
388 match = rt;
389 } else if (strict & RT6_LOOKUP_F_REACHABLE) {
390 rt6_probe(rt);
391 }
392
393out:
394 return match;
395}
396
397static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
398 struct rt6_info *rr_head,
399 u32 metric, int oif, int strict)
400{
401 struct rt6_info *rt, *match;
554cfb7e 402 int mpri = -1;
1da177e4 403
f11e6659
DM
404 match = NULL;
405 for (rt = rr_head; rt && rt->rt6i_metric == metric;
406 rt = rt->u.dst.rt6_next)
407 match = find_match(rt, oif, strict, &mpri, match);
408 for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric;
409 rt = rt->u.dst.rt6_next)
410 match = find_match(rt, oif, strict, &mpri, match);
1da177e4 411
f11e6659
DM
412 return match;
413}
1da177e4 414
f11e6659
DM
415static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
416{
417 struct rt6_info *match, *rt0;
1da177e4 418
f11e6659
DM
419 RT6_TRACE("%s(fn->leaf=%p, oif=%d)\n",
420 __FUNCTION__, fn->leaf, oif);
554cfb7e 421
f11e6659
DM
422 rt0 = fn->rr_ptr;
423 if (!rt0)
424 fn->rr_ptr = rt0 = fn->leaf;
1da177e4 425
f11e6659 426 match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict);
1da177e4 427
554cfb7e 428 if (!match &&
f11e6659
DM
429 (strict & RT6_LOOKUP_F_REACHABLE)) {
430 struct rt6_info *next = rt0->u.dst.rt6_next;
431
554cfb7e 432 /* no entries matched; do round-robin */
f11e6659
DM
433 if (!next || next->rt6i_metric != rt0->rt6i_metric)
434 next = fn->leaf;
435
436 if (next != rt0)
437 fn->rr_ptr = next;
1da177e4 438 }
1da177e4 439
f11e6659
DM
440 RT6_TRACE("%s() => %p\n",
441 __FUNCTION__, match);
1da177e4 442
554cfb7e 443 return (match ? match : &ip6_null_entry);
1da177e4
LT
444}
445
70ceb4f5
YH
446#ifdef CONFIG_IPV6_ROUTE_INFO
447int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
448 struct in6_addr *gwaddr)
449{
450 struct route_info *rinfo = (struct route_info *) opt;
451 struct in6_addr prefix_buf, *prefix;
452 unsigned int pref;
453 u32 lifetime;
454 struct rt6_info *rt;
455
456 if (len < sizeof(struct route_info)) {
457 return -EINVAL;
458 }
459
460 /* Sanity check for prefix_len and length */
461 if (rinfo->length > 3) {
462 return -EINVAL;
463 } else if (rinfo->prefix_len > 128) {
464 return -EINVAL;
465 } else if (rinfo->prefix_len > 64) {
466 if (rinfo->length < 2) {
467 return -EINVAL;
468 }
469 } else if (rinfo->prefix_len > 0) {
470 if (rinfo->length < 1) {
471 return -EINVAL;
472 }
473 }
474
475 pref = rinfo->route_pref;
476 if (pref == ICMPV6_ROUTER_PREF_INVALID)
477 pref = ICMPV6_ROUTER_PREF_MEDIUM;
478
e69a4adc 479 lifetime = ntohl(rinfo->lifetime);
70ceb4f5
YH
480 if (lifetime == 0xffffffff) {
481 /* infinity */
482 } else if (lifetime > 0x7fffffff/HZ) {
483 /* Avoid arithmetic overflow */
484 lifetime = 0x7fffffff/HZ - 1;
485 }
486
487 if (rinfo->length == 3)
488 prefix = (struct in6_addr *)rinfo->prefix;
489 else {
490 /* this function is safe */
491 ipv6_addr_prefix(&prefix_buf,
492 (struct in6_addr *)rinfo->prefix,
493 rinfo->prefix_len);
494 prefix = &prefix_buf;
495 }
496
497 rt = rt6_get_route_info(prefix, rinfo->prefix_len, gwaddr, dev->ifindex);
498
499 if (rt && !lifetime) {
e0a1ad73 500 ip6_del_rt(rt);
70ceb4f5
YH
501 rt = NULL;
502 }
503
504 if (!rt && lifetime)
505 rt = rt6_add_route_info(prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
506 pref);
507 else if (rt)
508 rt->rt6i_flags = RTF_ROUTEINFO |
509 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
510
511 if (rt) {
512 if (lifetime == 0xffffffff) {
513 rt->rt6i_flags &= ~RTF_EXPIRES;
514 } else {
515 rt->rt6i_expires = jiffies + HZ * lifetime;
516 rt->rt6i_flags |= RTF_EXPIRES;
517 }
518 dst_release(&rt->u.dst);
519 }
520 return 0;
521}
522#endif
523
982f56f3
YH
524#define BACKTRACK(saddr) \
525do { \
526 if (rt == &ip6_null_entry) { \
527 struct fib6_node *pn; \
e0eda7bb 528 while (1) { \
982f56f3
YH
529 if (fn->fn_flags & RTN_TL_ROOT) \
530 goto out; \
531 pn = fn->parent; \
532 if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
8bce65b9 533 fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \
982f56f3
YH
534 else \
535 fn = pn; \
536 if (fn->fn_flags & RTN_RTINFO) \
537 goto restart; \
c71099ac 538 } \
c71099ac 539 } \
982f56f3 540} while(0)
c71099ac
TG
541
542static struct rt6_info *ip6_pol_route_lookup(struct fib6_table *table,
543 struct flowi *fl, int flags)
1da177e4
LT
544{
545 struct fib6_node *fn;
546 struct rt6_info *rt;
547
c71099ac
TG
548 read_lock_bh(&table->tb6_lock);
549 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
550restart:
551 rt = fn->leaf;
77d16f45 552 rt = rt6_device_match(rt, fl->oif, flags);
982f56f3 553 BACKTRACK(&fl->fl6_src);
c71099ac 554out:
03f49f34 555 dst_use(&rt->u.dst, jiffies);
c71099ac 556 read_unlock_bh(&table->tb6_lock);
c71099ac
TG
557 return rt;
558
559}
560
561struct rt6_info *rt6_lookup(struct in6_addr *daddr, struct in6_addr *saddr,
562 int oif, int strict)
563{
564 struct flowi fl = {
565 .oif = oif,
566 .nl_u = {
567 .ip6_u = {
568 .daddr = *daddr,
c71099ac
TG
569 },
570 },
571 };
572 struct dst_entry *dst;
77d16f45 573 int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
c71099ac 574
adaa70bb
TG
575 if (saddr) {
576 memcpy(&fl.fl6_src, saddr, sizeof(*saddr));
577 flags |= RT6_LOOKUP_F_HAS_SADDR;
578 }
579
c71099ac
TG
580 dst = fib6_rule_lookup(&fl, flags, ip6_pol_route_lookup);
581 if (dst->error == 0)
582 return (struct rt6_info *) dst;
583
584 dst_release(dst);
585
1da177e4
LT
586 return NULL;
587}
588
7159039a
YH
589EXPORT_SYMBOL(rt6_lookup);
590
c71099ac 591/* ip6_ins_rt is called with FREE table->tb6_lock.
1da177e4
LT
592 It takes new route entry, the addition fails by any reason the
593 route is freed. In any case, if caller does not hold it, it may
594 be destroyed.
595 */
596
86872cb5 597static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
1da177e4
LT
598{
599 int err;
c71099ac 600 struct fib6_table *table;
1da177e4 601
c71099ac
TG
602 table = rt->rt6i_table;
603 write_lock_bh(&table->tb6_lock);
86872cb5 604 err = fib6_add(&table->tb6_root, rt, info);
c71099ac 605 write_unlock_bh(&table->tb6_lock);
1da177e4
LT
606
607 return err;
608}
609
40e22e8f
TG
610int ip6_ins_rt(struct rt6_info *rt)
611{
86872cb5 612 return __ip6_ins_rt(rt, NULL);
40e22e8f
TG
613}
614
95a9a5ba
YH
615static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, struct in6_addr *daddr,
616 struct in6_addr *saddr)
1da177e4 617{
1da177e4
LT
618 struct rt6_info *rt;
619
620 /*
621 * Clone the route.
622 */
623
624 rt = ip6_rt_copy(ort);
625
626 if (rt) {
58c4fb86
YH
627 if (!(rt->rt6i_flags&RTF_GATEWAY)) {
628 if (rt->rt6i_dst.plen != 128 &&
629 ipv6_addr_equal(&rt->rt6i_dst.addr, daddr))
630 rt->rt6i_flags |= RTF_ANYCAST;
1da177e4 631 ipv6_addr_copy(&rt->rt6i_gateway, daddr);
58c4fb86 632 }
1da177e4 633
58c4fb86 634 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
1da177e4
LT
635 rt->rt6i_dst.plen = 128;
636 rt->rt6i_flags |= RTF_CACHE;
637 rt->u.dst.flags |= DST_HOST;
638
639#ifdef CONFIG_IPV6_SUBTREES
640 if (rt->rt6i_src.plen && saddr) {
641 ipv6_addr_copy(&rt->rt6i_src.addr, saddr);
642 rt->rt6i_src.plen = 128;
643 }
644#endif
645
646 rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
647
95a9a5ba 648 }
1da177e4 649
95a9a5ba
YH
650 return rt;
651}
1da177e4 652
299d9939
YH
653static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort, struct in6_addr *daddr)
654{
655 struct rt6_info *rt = ip6_rt_copy(ort);
656 if (rt) {
657 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
658 rt->rt6i_dst.plen = 128;
659 rt->rt6i_flags |= RTF_CACHE;
299d9939
YH
660 rt->u.dst.flags |= DST_HOST;
661 rt->rt6i_nexthop = neigh_clone(ort->rt6i_nexthop);
662 }
663 return rt;
664}
665
4acad72d 666static struct rt6_info *ip6_pol_route(struct fib6_table *table, int oif,
8ce11e6a 667 struct flowi *fl, int flags)
1da177e4
LT
668{
669 struct fib6_node *fn;
519fbd87 670 struct rt6_info *rt, *nrt;
c71099ac 671 int strict = 0;
1da177e4 672 int attempts = 3;
519fbd87 673 int err;
ea659e07 674 int reachable = ipv6_devconf.forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
1da177e4 675
77d16f45 676 strict |= flags & RT6_LOOKUP_F_IFACE;
1da177e4
LT
677
678relookup:
c71099ac 679 read_lock_bh(&table->tb6_lock);
1da177e4 680
8238dd06 681restart_2:
c71099ac 682 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
1da177e4
LT
683
684restart:
4acad72d 685 rt = rt6_select(fn, oif, strict | reachable);
982f56f3 686 BACKTRACK(&fl->fl6_src);
8238dd06
YH
687 if (rt == &ip6_null_entry ||
688 rt->rt6i_flags & RTF_CACHE)
1ddef044 689 goto out;
1da177e4 690
fb9de91e 691 dst_hold(&rt->u.dst);
c71099ac 692 read_unlock_bh(&table->tb6_lock);
fb9de91e 693
519fbd87 694 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
c71099ac 695 nrt = rt6_alloc_cow(rt, &fl->fl6_dst, &fl->fl6_src);
519fbd87
YH
696 else {
697#if CLONE_OFFLINK_ROUTE
c71099ac 698 nrt = rt6_alloc_clone(rt, &fl->fl6_dst);
519fbd87
YH
699#else
700 goto out2;
701#endif
702 }
e40cf353 703
519fbd87
YH
704 dst_release(&rt->u.dst);
705 rt = nrt ? : &ip6_null_entry;
1da177e4 706
519fbd87
YH
707 dst_hold(&rt->u.dst);
708 if (nrt) {
40e22e8f 709 err = ip6_ins_rt(nrt);
519fbd87 710 if (!err)
1da177e4 711 goto out2;
1da177e4 712 }
1da177e4 713
519fbd87
YH
714 if (--attempts <= 0)
715 goto out2;
716
717 /*
c71099ac 718 * Race condition! In the gap, when table->tb6_lock was
519fbd87
YH
719 * released someone could insert this route. Relookup.
720 */
721 dst_release(&rt->u.dst);
722 goto relookup;
723
724out:
8238dd06
YH
725 if (reachable) {
726 reachable = 0;
727 goto restart_2;
728 }
519fbd87 729 dst_hold(&rt->u.dst);
c71099ac 730 read_unlock_bh(&table->tb6_lock);
1da177e4
LT
731out2:
732 rt->u.dst.lastuse = jiffies;
733 rt->u.dst.__use++;
c71099ac
TG
734
735 return rt;
1da177e4
LT
736}
737
4acad72d
PE
738static struct rt6_info *ip6_pol_route_input(struct fib6_table *table,
739 struct flowi *fl, int flags)
740{
741 return ip6_pol_route(table, fl->iif, fl, flags);
742}
743
c71099ac
TG
744void ip6_route_input(struct sk_buff *skb)
745{
0660e03f 746 struct ipv6hdr *iph = ipv6_hdr(skb);
adaa70bb 747 int flags = RT6_LOOKUP_F_HAS_SADDR;
c71099ac
TG
748 struct flowi fl = {
749 .iif = skb->dev->ifindex,
750 .nl_u = {
751 .ip6_u = {
752 .daddr = iph->daddr,
753 .saddr = iph->saddr,
90bcaf7b 754 .flowlabel = (* (__be32 *) iph)&IPV6_FLOWINFO_MASK,
c71099ac
TG
755 },
756 },
1ab1457c 757 .mark = skb->mark,
c71099ac
TG
758 .proto = iph->nexthdr,
759 };
adaa70bb
TG
760
761 if (rt6_need_strict(&iph->daddr))
762 flags |= RT6_LOOKUP_F_IFACE;
c71099ac
TG
763
764 skb->dst = fib6_rule_lookup(&fl, flags, ip6_pol_route_input);
765}
766
767static struct rt6_info *ip6_pol_route_output(struct fib6_table *table,
768 struct flowi *fl, int flags)
1da177e4 769{
4acad72d 770 return ip6_pol_route(table, fl->oif, fl, flags);
c71099ac
TG
771}
772
773struct dst_entry * ip6_route_output(struct sock *sk, struct flowi *fl)
774{
775 int flags = 0;
776
777 if (rt6_need_strict(&fl->fl6_dst))
77d16f45 778 flags |= RT6_LOOKUP_F_IFACE;
c71099ac 779
adaa70bb
TG
780 if (!ipv6_addr_any(&fl->fl6_src))
781 flags |= RT6_LOOKUP_F_HAS_SADDR;
782
c71099ac 783 return fib6_rule_lookup(fl, flags, ip6_pol_route_output);
1da177e4
LT
784}
785
7159039a 786EXPORT_SYMBOL(ip6_route_output);
1da177e4 787
14e50e57
DM
788int ip6_dst_blackhole(struct sock *sk, struct dst_entry **dstp, struct flowi *fl)
789{
790 struct rt6_info *ort = (struct rt6_info *) *dstp;
791 struct rt6_info *rt = (struct rt6_info *)
792 dst_alloc(&ip6_dst_blackhole_ops);
793 struct dst_entry *new = NULL;
794
795 if (rt) {
796 new = &rt->u.dst;
797
798 atomic_set(&new->__refcnt, 1);
799 new->__use = 1;
352e512c
HX
800 new->input = dst_discard;
801 new->output = dst_discard;
14e50e57
DM
802
803 memcpy(new->metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32));
804 new->dev = ort->u.dst.dev;
805 if (new->dev)
806 dev_hold(new->dev);
807 rt->rt6i_idev = ort->rt6i_idev;
808 if (rt->rt6i_idev)
809 in6_dev_hold(rt->rt6i_idev);
810 rt->rt6i_expires = 0;
811
812 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
813 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
814 rt->rt6i_metric = 0;
815
816 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
817#ifdef CONFIG_IPV6_SUBTREES
818 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
819#endif
820
821 dst_free(new);
822 }
823
824 dst_release(*dstp);
825 *dstp = new;
826 return (new ? 0 : -ENOMEM);
827}
828EXPORT_SYMBOL_GPL(ip6_dst_blackhole);
829
1da177e4
LT
830/*
831 * Destination cache support functions
832 */
833
834static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
835{
836 struct rt6_info *rt;
837
838 rt = (struct rt6_info *) dst;
839
840 if (rt && rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie))
841 return dst;
842
843 return NULL;
844}
845
846static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
847{
848 struct rt6_info *rt = (struct rt6_info *) dst;
849
850 if (rt) {
851 if (rt->rt6i_flags & RTF_CACHE)
e0a1ad73 852 ip6_del_rt(rt);
1da177e4
LT
853 else
854 dst_release(dst);
855 }
856 return NULL;
857}
858
859static void ip6_link_failure(struct sk_buff *skb)
860{
861 struct rt6_info *rt;
862
863 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0, skb->dev);
864
865 rt = (struct rt6_info *) skb->dst;
866 if (rt) {
867 if (rt->rt6i_flags&RTF_CACHE) {
868 dst_set_expires(&rt->u.dst, 0);
869 rt->rt6i_flags |= RTF_EXPIRES;
870 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
871 rt->rt6i_node->fn_sernum = -1;
872 }
873}
874
875static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
876{
877 struct rt6_info *rt6 = (struct rt6_info*)dst;
878
879 if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
880 rt6->rt6i_flags |= RTF_MODIFIED;
881 if (mtu < IPV6_MIN_MTU) {
882 mtu = IPV6_MIN_MTU;
883 dst->metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
884 }
885 dst->metrics[RTAX_MTU-1] = mtu;
8d71740c 886 call_netevent_notifiers(NETEVENT_PMTU_UPDATE, dst);
1da177e4
LT
887 }
888}
889
1da177e4
LT
890static int ipv6_get_mtu(struct net_device *dev);
891
892static inline unsigned int ipv6_advmss(unsigned int mtu)
893{
894 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
895
896 if (mtu < ip6_rt_min_advmss)
897 mtu = ip6_rt_min_advmss;
898
899 /*
1ab1457c
YH
900 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
901 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
902 * IPV6_MAXPLEN is also valid and means: "any MSS,
1da177e4
LT
903 * rely only on pmtu discovery"
904 */
905 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
906 mtu = IPV6_MAXPLEN;
907 return mtu;
908}
909
5d0bbeeb 910static struct dst_entry *ndisc_dst_gc_list;
8ce11e6a 911static DEFINE_SPINLOCK(ndisc_lock);
5d0bbeeb 912
1ab1457c 913struct dst_entry *ndisc_dst_alloc(struct net_device *dev,
1da177e4
LT
914 struct neighbour *neigh,
915 struct in6_addr *addr,
916 int (*output)(struct sk_buff *))
917{
918 struct rt6_info *rt;
919 struct inet6_dev *idev = in6_dev_get(dev);
920
921 if (unlikely(idev == NULL))
922 return NULL;
923
924 rt = ip6_dst_alloc();
925 if (unlikely(rt == NULL)) {
926 in6_dev_put(idev);
927 goto out;
928 }
929
930 dev_hold(dev);
931 if (neigh)
932 neigh_hold(neigh);
933 else
934 neigh = ndisc_get_neigh(dev, addr);
935
936 rt->rt6i_dev = dev;
937 rt->rt6i_idev = idev;
938 rt->rt6i_nexthop = neigh;
939 atomic_set(&rt->u.dst.__refcnt, 1);
940 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = 255;
941 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
942 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
943 rt->u.dst.output = output;
944
945#if 0 /* there's no chance to use these for ndisc */
1ab1457c
YH
946 rt->u.dst.flags = ipv6_addr_type(addr) & IPV6_ADDR_UNICAST
947 ? DST_HOST
1da177e4
LT
948 : 0;
949 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
950 rt->rt6i_dst.plen = 128;
951#endif
952
5d0bbeeb 953 spin_lock_bh(&ndisc_lock);
1da177e4
LT
954 rt->u.dst.next = ndisc_dst_gc_list;
955 ndisc_dst_gc_list = &rt->u.dst;
5d0bbeeb 956 spin_unlock_bh(&ndisc_lock);
1da177e4
LT
957
958 fib6_force_start_gc();
959
960out:
40aa7b90 961 return &rt->u.dst;
1da177e4
LT
962}
963
964int ndisc_dst_gc(int *more)
965{
966 struct dst_entry *dst, *next, **pprev;
967 int freed;
968
969 next = NULL;
1ab1457c 970 freed = 0;
5d0bbeeb
TG
971
972 spin_lock_bh(&ndisc_lock);
1da177e4 973 pprev = &ndisc_dst_gc_list;
5d0bbeeb 974
1da177e4
LT
975 while ((dst = *pprev) != NULL) {
976 if (!atomic_read(&dst->__refcnt)) {
977 *pprev = dst->next;
978 dst_free(dst);
979 freed++;
980 } else {
981 pprev = &dst->next;
982 (*more)++;
983 }
984 }
985
5d0bbeeb
TG
986 spin_unlock_bh(&ndisc_lock);
987
1da177e4
LT
988 return freed;
989}
990
991static int ip6_dst_gc(void)
992{
993 static unsigned expire = 30*HZ;
994 static unsigned long last_gc;
995 unsigned long now = jiffies;
996
997 if (time_after(last_gc + ip6_rt_gc_min_interval, now) &&
998 atomic_read(&ip6_dst_ops.entries) <= ip6_rt_max_size)
999 goto out;
1000
1001 expire++;
1002 fib6_run_gc(expire);
1003 last_gc = now;
1004 if (atomic_read(&ip6_dst_ops.entries) < ip6_dst_ops.gc_thresh)
1005 expire = ip6_rt_gc_timeout>>1;
1006
1007out:
1008 expire -= expire>>ip6_rt_gc_elasticity;
1009 return (atomic_read(&ip6_dst_ops.entries) > ip6_rt_max_size);
1010}
1011
1012/* Clean host part of a prefix. Not necessary in radix tree,
1013 but results in cleaner routing tables.
1014
1015 Remove it only when all the things will work!
1016 */
1017
1018static int ipv6_get_mtu(struct net_device *dev)
1019{
1020 int mtu = IPV6_MIN_MTU;
1021 struct inet6_dev *idev;
1022
1023 idev = in6_dev_get(dev);
1024 if (idev) {
1025 mtu = idev->cnf.mtu6;
1026 in6_dev_put(idev);
1027 }
1028 return mtu;
1029}
1030
1031int ipv6_get_hoplimit(struct net_device *dev)
1032{
1033 int hoplimit = ipv6_devconf.hop_limit;
1034 struct inet6_dev *idev;
1035
1036 idev = in6_dev_get(dev);
1037 if (idev) {
1038 hoplimit = idev->cnf.hop_limit;
1039 in6_dev_put(idev);
1040 }
1041 return hoplimit;
1042}
1043
1044/*
1045 *
1046 */
1047
86872cb5 1048int ip6_route_add(struct fib6_config *cfg)
1da177e4
LT
1049{
1050 int err;
1da177e4
LT
1051 struct rt6_info *rt = NULL;
1052 struct net_device *dev = NULL;
1053 struct inet6_dev *idev = NULL;
c71099ac 1054 struct fib6_table *table;
1da177e4
LT
1055 int addr_type;
1056
86872cb5 1057 if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1da177e4
LT
1058 return -EINVAL;
1059#ifndef CONFIG_IPV6_SUBTREES
86872cb5 1060 if (cfg->fc_src_len)
1da177e4
LT
1061 return -EINVAL;
1062#endif
86872cb5 1063 if (cfg->fc_ifindex) {
1da177e4 1064 err = -ENODEV;
881d966b 1065 dev = dev_get_by_index(&init_net, cfg->fc_ifindex);
1da177e4
LT
1066 if (!dev)
1067 goto out;
1068 idev = in6_dev_get(dev);
1069 if (!idev)
1070 goto out;
1071 }
1072
86872cb5
TG
1073 if (cfg->fc_metric == 0)
1074 cfg->fc_metric = IP6_RT_PRIO_USER;
1da177e4 1075
86872cb5 1076 table = fib6_new_table(cfg->fc_table);
c71099ac
TG
1077 if (table == NULL) {
1078 err = -ENOBUFS;
1079 goto out;
1080 }
1081
1da177e4
LT
1082 rt = ip6_dst_alloc();
1083
1084 if (rt == NULL) {
1085 err = -ENOMEM;
1086 goto out;
1087 }
1088
1089 rt->u.dst.obsolete = -1;
86872cb5 1090 rt->rt6i_expires = jiffies + clock_t_to_jiffies(cfg->fc_expires);
1da177e4 1091
86872cb5
TG
1092 if (cfg->fc_protocol == RTPROT_UNSPEC)
1093 cfg->fc_protocol = RTPROT_BOOT;
1094 rt->rt6i_protocol = cfg->fc_protocol;
1095
1096 addr_type = ipv6_addr_type(&cfg->fc_dst);
1da177e4
LT
1097
1098 if (addr_type & IPV6_ADDR_MULTICAST)
1099 rt->u.dst.input = ip6_mc_input;
1100 else
1101 rt->u.dst.input = ip6_forward;
1102
1103 rt->u.dst.output = ip6_output;
1104
86872cb5
TG
1105 ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1106 rt->rt6i_dst.plen = cfg->fc_dst_len;
1da177e4
LT
1107 if (rt->rt6i_dst.plen == 128)
1108 rt->u.dst.flags = DST_HOST;
1109
1110#ifdef CONFIG_IPV6_SUBTREES
86872cb5
TG
1111 ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1112 rt->rt6i_src.plen = cfg->fc_src_len;
1da177e4
LT
1113#endif
1114
86872cb5 1115 rt->rt6i_metric = cfg->fc_metric;
1da177e4
LT
1116
1117 /* We cannot add true routes via loopback here,
1118 they would result in kernel looping; promote them to reject routes
1119 */
86872cb5 1120 if ((cfg->fc_flags & RTF_REJECT) ||
1da177e4
LT
1121 (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK))) {
1122 /* hold loopback dev/idev if we haven't done so. */
2774c7ab 1123 if (dev != init_net.loopback_dev) {
1da177e4
LT
1124 if (dev) {
1125 dev_put(dev);
1126 in6_dev_put(idev);
1127 }
2774c7ab 1128 dev = init_net.loopback_dev;
1da177e4
LT
1129 dev_hold(dev);
1130 idev = in6_dev_get(dev);
1131 if (!idev) {
1132 err = -ENODEV;
1133 goto out;
1134 }
1135 }
1136 rt->u.dst.output = ip6_pkt_discard_out;
1137 rt->u.dst.input = ip6_pkt_discard;
1138 rt->u.dst.error = -ENETUNREACH;
1139 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1140 goto install_route;
1141 }
1142
86872cb5 1143 if (cfg->fc_flags & RTF_GATEWAY) {
1da177e4
LT
1144 struct in6_addr *gw_addr;
1145 int gwa_type;
1146
86872cb5
TG
1147 gw_addr = &cfg->fc_gateway;
1148 ipv6_addr_copy(&rt->rt6i_gateway, gw_addr);
1da177e4
LT
1149 gwa_type = ipv6_addr_type(gw_addr);
1150
1151 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1152 struct rt6_info *grt;
1153
1154 /* IPv6 strictly inhibits using not link-local
1155 addresses as nexthop address.
1156 Otherwise, router will not able to send redirects.
1157 It is very good, but in some (rare!) circumstances
1158 (SIT, PtP, NBMA NOARP links) it is handy to allow
1159 some exceptions. --ANK
1160 */
1161 err = -EINVAL;
1162 if (!(gwa_type&IPV6_ADDR_UNICAST))
1163 goto out;
1164
86872cb5 1165 grt = rt6_lookup(gw_addr, NULL, cfg->fc_ifindex, 1);
1da177e4
LT
1166
1167 err = -EHOSTUNREACH;
1168 if (grt == NULL)
1169 goto out;
1170 if (dev) {
1171 if (dev != grt->rt6i_dev) {
1172 dst_release(&grt->u.dst);
1173 goto out;
1174 }
1175 } else {
1176 dev = grt->rt6i_dev;
1177 idev = grt->rt6i_idev;
1178 dev_hold(dev);
1179 in6_dev_hold(grt->rt6i_idev);
1180 }
1181 if (!(grt->rt6i_flags&RTF_GATEWAY))
1182 err = 0;
1183 dst_release(&grt->u.dst);
1184
1185 if (err)
1186 goto out;
1187 }
1188 err = -EINVAL;
1189 if (dev == NULL || (dev->flags&IFF_LOOPBACK))
1190 goto out;
1191 }
1192
1193 err = -ENODEV;
1194 if (dev == NULL)
1195 goto out;
1196
86872cb5 1197 if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) {
1da177e4
LT
1198 rt->rt6i_nexthop = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev);
1199 if (IS_ERR(rt->rt6i_nexthop)) {
1200 err = PTR_ERR(rt->rt6i_nexthop);
1201 rt->rt6i_nexthop = NULL;
1202 goto out;
1203 }
1204 }
1205
86872cb5 1206 rt->rt6i_flags = cfg->fc_flags;
1da177e4
LT
1207
1208install_route:
86872cb5
TG
1209 if (cfg->fc_mx) {
1210 struct nlattr *nla;
1211 int remaining;
1212
1213 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
8f4c1f9b 1214 int type = nla_type(nla);
86872cb5
TG
1215
1216 if (type) {
1217 if (type > RTAX_MAX) {
1da177e4
LT
1218 err = -EINVAL;
1219 goto out;
1220 }
86872cb5
TG
1221
1222 rt->u.dst.metrics[type - 1] = nla_get_u32(nla);
1da177e4 1223 }
1da177e4
LT
1224 }
1225 }
1226
1227 if (rt->u.dst.metrics[RTAX_HOPLIMIT-1] == 0)
1228 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
1229 if (!rt->u.dst.metrics[RTAX_MTU-1])
1230 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(dev);
1231 if (!rt->u.dst.metrics[RTAX_ADVMSS-1])
1232 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
1233 rt->u.dst.dev = dev;
1234 rt->rt6i_idev = idev;
c71099ac 1235 rt->rt6i_table = table;
86872cb5 1236 return __ip6_ins_rt(rt, &cfg->fc_nlinfo);
1da177e4
LT
1237
1238out:
1239 if (dev)
1240 dev_put(dev);
1241 if (idev)
1242 in6_dev_put(idev);
1243 if (rt)
40aa7b90 1244 dst_free(&rt->u.dst);
1da177e4
LT
1245 return err;
1246}
1247
86872cb5 1248static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
1da177e4
LT
1249{
1250 int err;
c71099ac 1251 struct fib6_table *table;
1da177e4 1252
6c813a72
PM
1253 if (rt == &ip6_null_entry)
1254 return -ENOENT;
1255
c71099ac
TG
1256 table = rt->rt6i_table;
1257 write_lock_bh(&table->tb6_lock);
1da177e4 1258
86872cb5 1259 err = fib6_del(rt, info);
1da177e4
LT
1260 dst_release(&rt->u.dst);
1261
c71099ac 1262 write_unlock_bh(&table->tb6_lock);
1da177e4
LT
1263
1264 return err;
1265}
1266
e0a1ad73
TG
1267int ip6_del_rt(struct rt6_info *rt)
1268{
86872cb5 1269 return __ip6_del_rt(rt, NULL);
e0a1ad73
TG
1270}
1271
86872cb5 1272static int ip6_route_del(struct fib6_config *cfg)
1da177e4 1273{
c71099ac 1274 struct fib6_table *table;
1da177e4
LT
1275 struct fib6_node *fn;
1276 struct rt6_info *rt;
1277 int err = -ESRCH;
1278
86872cb5 1279 table = fib6_get_table(cfg->fc_table);
c71099ac
TG
1280 if (table == NULL)
1281 return err;
1282
1283 read_lock_bh(&table->tb6_lock);
1da177e4 1284
c71099ac 1285 fn = fib6_locate(&table->tb6_root,
86872cb5
TG
1286 &cfg->fc_dst, cfg->fc_dst_len,
1287 &cfg->fc_src, cfg->fc_src_len);
1ab1457c 1288
1da177e4 1289 if (fn) {
7cc48263 1290 for (rt = fn->leaf; rt; rt = rt->u.dst.rt6_next) {
86872cb5 1291 if (cfg->fc_ifindex &&
1da177e4 1292 (rt->rt6i_dev == NULL ||
86872cb5 1293 rt->rt6i_dev->ifindex != cfg->fc_ifindex))
1da177e4 1294 continue;
86872cb5
TG
1295 if (cfg->fc_flags & RTF_GATEWAY &&
1296 !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
1da177e4 1297 continue;
86872cb5 1298 if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
1da177e4
LT
1299 continue;
1300 dst_hold(&rt->u.dst);
c71099ac 1301 read_unlock_bh(&table->tb6_lock);
1da177e4 1302
86872cb5 1303 return __ip6_del_rt(rt, &cfg->fc_nlinfo);
1da177e4
LT
1304 }
1305 }
c71099ac 1306 read_unlock_bh(&table->tb6_lock);
1da177e4
LT
1307
1308 return err;
1309}
1310
1311/*
1312 * Handle redirects
1313 */
a6279458
YH
1314struct ip6rd_flowi {
1315 struct flowi fl;
1316 struct in6_addr gateway;
1317};
1318
1319static struct rt6_info *__ip6_route_redirect(struct fib6_table *table,
1320 struct flowi *fl,
1321 int flags)
1da177e4 1322{
a6279458
YH
1323 struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl;
1324 struct rt6_info *rt;
e843b9e1 1325 struct fib6_node *fn;
c71099ac 1326
1da177e4 1327 /*
e843b9e1
YH
1328 * Get the "current" route for this destination and
1329 * check if the redirect has come from approriate router.
1330 *
1331 * RFC 2461 specifies that redirects should only be
1332 * accepted if they come from the nexthop to the target.
1333 * Due to the way the routes are chosen, this notion
1334 * is a bit fuzzy and one might need to check all possible
1335 * routes.
1da177e4 1336 */
1da177e4 1337
c71099ac 1338 read_lock_bh(&table->tb6_lock);
a6279458 1339 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
e843b9e1 1340restart:
7cc48263 1341 for (rt = fn->leaf; rt; rt = rt->u.dst.rt6_next) {
e843b9e1
YH
1342 /*
1343 * Current route is on-link; redirect is always invalid.
1344 *
1345 * Seems, previous statement is not true. It could
1346 * be node, which looks for us as on-link (f.e. proxy ndisc)
1347 * But then router serving it might decide, that we should
1348 * know truth 8)8) --ANK (980726).
1349 */
1350 if (rt6_check_expired(rt))
1351 continue;
1352 if (!(rt->rt6i_flags & RTF_GATEWAY))
1353 continue;
a6279458 1354 if (fl->oif != rt->rt6i_dev->ifindex)
e843b9e1 1355 continue;
a6279458 1356 if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
e843b9e1
YH
1357 continue;
1358 break;
1359 }
a6279458 1360
cb15d9c2 1361 if (!rt)
a6279458 1362 rt = &ip6_null_entry;
cb15d9c2
YH
1363 BACKTRACK(&fl->fl6_src);
1364out:
a6279458
YH
1365 dst_hold(&rt->u.dst);
1366
c71099ac 1367 read_unlock_bh(&table->tb6_lock);
e843b9e1 1368
a6279458
YH
1369 return rt;
1370};
1371
1372static struct rt6_info *ip6_route_redirect(struct in6_addr *dest,
1373 struct in6_addr *src,
1374 struct in6_addr *gateway,
1375 struct net_device *dev)
1376{
adaa70bb 1377 int flags = RT6_LOOKUP_F_HAS_SADDR;
a6279458
YH
1378 struct ip6rd_flowi rdfl = {
1379 .fl = {
1380 .oif = dev->ifindex,
1381 .nl_u = {
1382 .ip6_u = {
1383 .daddr = *dest,
1384 .saddr = *src,
1385 },
1386 },
1387 },
1388 .gateway = *gateway,
1389 };
adaa70bb
TG
1390
1391 if (rt6_need_strict(dest))
1392 flags |= RT6_LOOKUP_F_IFACE;
a6279458
YH
1393
1394 return (struct rt6_info *)fib6_rule_lookup((struct flowi *)&rdfl, flags, __ip6_route_redirect);
1395}
1396
1397void rt6_redirect(struct in6_addr *dest, struct in6_addr *src,
1398 struct in6_addr *saddr,
1399 struct neighbour *neigh, u8 *lladdr, int on_link)
1400{
1401 struct rt6_info *rt, *nrt = NULL;
1402 struct netevent_redirect netevent;
1403
1404 rt = ip6_route_redirect(dest, src, saddr, neigh->dev);
1405
1406 if (rt == &ip6_null_entry) {
1da177e4
LT
1407 if (net_ratelimit())
1408 printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
1409 "for redirect target\n");
a6279458 1410 goto out;
1da177e4
LT
1411 }
1412
1da177e4
LT
1413 /*
1414 * We have finally decided to accept it.
1415 */
1416
1ab1457c 1417 neigh_update(neigh, lladdr, NUD_STALE,
1da177e4
LT
1418 NEIGH_UPDATE_F_WEAK_OVERRIDE|
1419 NEIGH_UPDATE_F_OVERRIDE|
1420 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1421 NEIGH_UPDATE_F_ISROUTER))
1422 );
1423
1424 /*
1425 * Redirect received -> path was valid.
1426 * Look, redirects are sent only in response to data packets,
1427 * so that this nexthop apparently is reachable. --ANK
1428 */
1429 dst_confirm(&rt->u.dst);
1430
1431 /* Duplicate redirect: silently ignore. */
1432 if (neigh == rt->u.dst.neighbour)
1433 goto out;
1434
1435 nrt = ip6_rt_copy(rt);
1436 if (nrt == NULL)
1437 goto out;
1438
1439 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1440 if (on_link)
1441 nrt->rt6i_flags &= ~RTF_GATEWAY;
1442
1443 ipv6_addr_copy(&nrt->rt6i_dst.addr, dest);
1444 nrt->rt6i_dst.plen = 128;
1445 nrt->u.dst.flags |= DST_HOST;
1446
1447 ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key);
1448 nrt->rt6i_nexthop = neigh_clone(neigh);
1449 /* Reset pmtu, it may be better */
1450 nrt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(neigh->dev);
1451 nrt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&nrt->u.dst));
1452
40e22e8f 1453 if (ip6_ins_rt(nrt))
1da177e4
LT
1454 goto out;
1455
8d71740c
TT
1456 netevent.old = &rt->u.dst;
1457 netevent.new = &nrt->u.dst;
1458 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1459
1da177e4 1460 if (rt->rt6i_flags&RTF_CACHE) {
e0a1ad73 1461 ip6_del_rt(rt);
1da177e4
LT
1462 return;
1463 }
1464
1465out:
1ab1457c 1466 dst_release(&rt->u.dst);
1da177e4
LT
1467 return;
1468}
1469
1470/*
1471 * Handle ICMP "packet too big" messages
1472 * i.e. Path MTU discovery
1473 */
1474
1475void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr,
1476 struct net_device *dev, u32 pmtu)
1477{
1478 struct rt6_info *rt, *nrt;
1479 int allfrag = 0;
1480
1481 rt = rt6_lookup(daddr, saddr, dev->ifindex, 0);
1482 if (rt == NULL)
1483 return;
1484
1485 if (pmtu >= dst_mtu(&rt->u.dst))
1486 goto out;
1487
1488 if (pmtu < IPV6_MIN_MTU) {
1489 /*
1ab1457c 1490 * According to RFC2460, PMTU is set to the IPv6 Minimum Link
1da177e4
LT
1491 * MTU (1280) and a fragment header should always be included
1492 * after a node receiving Too Big message reporting PMTU is
1493 * less than the IPv6 Minimum Link MTU.
1494 */
1495 pmtu = IPV6_MIN_MTU;
1496 allfrag = 1;
1497 }
1498
1499 /* New mtu received -> path was valid.
1500 They are sent only in response to data packets,
1501 so that this nexthop apparently is reachable. --ANK
1502 */
1503 dst_confirm(&rt->u.dst);
1504
1505 /* Host route. If it is static, it would be better
1506 not to override it, but add new one, so that
1507 when cache entry will expire old pmtu
1508 would return automatically.
1509 */
1510 if (rt->rt6i_flags & RTF_CACHE) {
1511 rt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1512 if (allfrag)
1513 rt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1514 dst_set_expires(&rt->u.dst, ip6_rt_mtu_expires);
1515 rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
1516 goto out;
1517 }
1518
1519 /* Network route.
1520 Two cases are possible:
1521 1. It is connected route. Action: COW
1522 2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1523 */
d5315b50 1524 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
a1e78363 1525 nrt = rt6_alloc_cow(rt, daddr, saddr);
d5315b50
YH
1526 else
1527 nrt = rt6_alloc_clone(rt, daddr);
a1e78363 1528
d5315b50 1529 if (nrt) {
a1e78363
YH
1530 nrt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1531 if (allfrag)
1532 nrt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1533
1534 /* According to RFC 1981, detecting PMTU increase shouldn't be
1535 * happened within 5 mins, the recommended timer is 10 mins.
1536 * Here this route expiration time is set to ip6_rt_mtu_expires
1537 * which is 10 mins. After 10 mins the decreased pmtu is expired
1538 * and detecting PMTU increase will be automatically happened.
1539 */
1540 dst_set_expires(&nrt->u.dst, ip6_rt_mtu_expires);
1541 nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
1542
40e22e8f 1543 ip6_ins_rt(nrt);
1da177e4 1544 }
1da177e4
LT
1545out:
1546 dst_release(&rt->u.dst);
1547}
1548
1549/*
1550 * Misc support functions
1551 */
1552
1553static struct rt6_info * ip6_rt_copy(struct rt6_info *ort)
1554{
1555 struct rt6_info *rt = ip6_dst_alloc();
1556
1557 if (rt) {
1558 rt->u.dst.input = ort->u.dst.input;
1559 rt->u.dst.output = ort->u.dst.output;
1560
1561 memcpy(rt->u.dst.metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32));
22e1e4d8 1562 rt->u.dst.error = ort->u.dst.error;
1da177e4
LT
1563 rt->u.dst.dev = ort->u.dst.dev;
1564 if (rt->u.dst.dev)
1565 dev_hold(rt->u.dst.dev);
1566 rt->rt6i_idev = ort->rt6i_idev;
1567 if (rt->rt6i_idev)
1568 in6_dev_hold(rt->rt6i_idev);
1569 rt->u.dst.lastuse = jiffies;
1570 rt->rt6i_expires = 0;
1571
1572 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
1573 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
1574 rt->rt6i_metric = 0;
1575
1576 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1577#ifdef CONFIG_IPV6_SUBTREES
1578 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1579#endif
c71099ac 1580 rt->rt6i_table = ort->rt6i_table;
1da177e4
LT
1581 }
1582 return rt;
1583}
1584
70ceb4f5
YH
1585#ifdef CONFIG_IPV6_ROUTE_INFO
1586static struct rt6_info *rt6_get_route_info(struct in6_addr *prefix, int prefixlen,
1587 struct in6_addr *gwaddr, int ifindex)
1588{
1589 struct fib6_node *fn;
1590 struct rt6_info *rt = NULL;
c71099ac
TG
1591 struct fib6_table *table;
1592
1593 table = fib6_get_table(RT6_TABLE_INFO);
1594 if (table == NULL)
1595 return NULL;
70ceb4f5 1596
c71099ac
TG
1597 write_lock_bh(&table->tb6_lock);
1598 fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
70ceb4f5
YH
1599 if (!fn)
1600 goto out;
1601
7cc48263 1602 for (rt = fn->leaf; rt; rt = rt->u.dst.rt6_next) {
70ceb4f5
YH
1603 if (rt->rt6i_dev->ifindex != ifindex)
1604 continue;
1605 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1606 continue;
1607 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1608 continue;
1609 dst_hold(&rt->u.dst);
1610 break;
1611 }
1612out:
c71099ac 1613 write_unlock_bh(&table->tb6_lock);
70ceb4f5
YH
1614 return rt;
1615}
1616
1617static struct rt6_info *rt6_add_route_info(struct in6_addr *prefix, int prefixlen,
1618 struct in6_addr *gwaddr, int ifindex,
1619 unsigned pref)
1620{
86872cb5
TG
1621 struct fib6_config cfg = {
1622 .fc_table = RT6_TABLE_INFO,
1623 .fc_metric = 1024,
1624 .fc_ifindex = ifindex,
1625 .fc_dst_len = prefixlen,
1626 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
1627 RTF_UP | RTF_PREF(pref),
1628 };
1629
1630 ipv6_addr_copy(&cfg.fc_dst, prefix);
1631 ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
70ceb4f5 1632
e317da96
YH
1633 /* We should treat it as a default route if prefix length is 0. */
1634 if (!prefixlen)
86872cb5 1635 cfg.fc_flags |= RTF_DEFAULT;
70ceb4f5 1636
86872cb5 1637 ip6_route_add(&cfg);
70ceb4f5
YH
1638
1639 return rt6_get_route_info(prefix, prefixlen, gwaddr, ifindex);
1640}
1641#endif
1642
1da177e4 1643struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *dev)
1ab1457c 1644{
1da177e4 1645 struct rt6_info *rt;
c71099ac 1646 struct fib6_table *table;
1da177e4 1647
c71099ac
TG
1648 table = fib6_get_table(RT6_TABLE_DFLT);
1649 if (table == NULL)
1650 return NULL;
1da177e4 1651
c71099ac 1652 write_lock_bh(&table->tb6_lock);
7cc48263 1653 for (rt = table->tb6_root.leaf; rt; rt=rt->u.dst.rt6_next) {
1da177e4 1654 if (dev == rt->rt6i_dev &&
045927ff 1655 ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
1da177e4
LT
1656 ipv6_addr_equal(&rt->rt6i_gateway, addr))
1657 break;
1658 }
1659 if (rt)
1660 dst_hold(&rt->u.dst);
c71099ac 1661 write_unlock_bh(&table->tb6_lock);
1da177e4
LT
1662 return rt;
1663}
1664
c7dc89c0
FT
1665EXPORT_SYMBOL(rt6_get_dflt_router);
1666
1da177e4 1667struct rt6_info *rt6_add_dflt_router(struct in6_addr *gwaddr,
ebacaaa0
YH
1668 struct net_device *dev,
1669 unsigned int pref)
1da177e4 1670{
86872cb5
TG
1671 struct fib6_config cfg = {
1672 .fc_table = RT6_TABLE_DFLT,
1673 .fc_metric = 1024,
1674 .fc_ifindex = dev->ifindex,
1675 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
1676 RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
1677 };
1da177e4 1678
86872cb5 1679 ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
1da177e4 1680
86872cb5 1681 ip6_route_add(&cfg);
1da177e4 1682
1da177e4
LT
1683 return rt6_get_dflt_router(gwaddr, dev);
1684}
1685
1686void rt6_purge_dflt_routers(void)
1687{
1688 struct rt6_info *rt;
c71099ac
TG
1689 struct fib6_table *table;
1690
1691 /* NOTE: Keep consistent with rt6_get_dflt_router */
1692 table = fib6_get_table(RT6_TABLE_DFLT);
1693 if (table == NULL)
1694 return;
1da177e4
LT
1695
1696restart:
c71099ac 1697 read_lock_bh(&table->tb6_lock);
7cc48263 1698 for (rt = table->tb6_root.leaf; rt; rt = rt->u.dst.rt6_next) {
1da177e4
LT
1699 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
1700 dst_hold(&rt->u.dst);
c71099ac 1701 read_unlock_bh(&table->tb6_lock);
e0a1ad73 1702 ip6_del_rt(rt);
1da177e4
LT
1703 goto restart;
1704 }
1705 }
c71099ac 1706 read_unlock_bh(&table->tb6_lock);
1da177e4
LT
1707}
1708
86872cb5
TG
1709static void rtmsg_to_fib6_config(struct in6_rtmsg *rtmsg,
1710 struct fib6_config *cfg)
1711{
1712 memset(cfg, 0, sizeof(*cfg));
1713
1714 cfg->fc_table = RT6_TABLE_MAIN;
1715 cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
1716 cfg->fc_metric = rtmsg->rtmsg_metric;
1717 cfg->fc_expires = rtmsg->rtmsg_info;
1718 cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
1719 cfg->fc_src_len = rtmsg->rtmsg_src_len;
1720 cfg->fc_flags = rtmsg->rtmsg_flags;
1721
1722 ipv6_addr_copy(&cfg->fc_dst, &rtmsg->rtmsg_dst);
1723 ipv6_addr_copy(&cfg->fc_src, &rtmsg->rtmsg_src);
1724 ipv6_addr_copy(&cfg->fc_gateway, &rtmsg->rtmsg_gateway);
1725}
1726
1da177e4
LT
1727int ipv6_route_ioctl(unsigned int cmd, void __user *arg)
1728{
86872cb5 1729 struct fib6_config cfg;
1da177e4
LT
1730 struct in6_rtmsg rtmsg;
1731 int err;
1732
1733 switch(cmd) {
1734 case SIOCADDRT: /* Add a route */
1735 case SIOCDELRT: /* Delete a route */
1736 if (!capable(CAP_NET_ADMIN))
1737 return -EPERM;
1738 err = copy_from_user(&rtmsg, arg,
1739 sizeof(struct in6_rtmsg));
1740 if (err)
1741 return -EFAULT;
86872cb5
TG
1742
1743 rtmsg_to_fib6_config(&rtmsg, &cfg);
1744
1da177e4
LT
1745 rtnl_lock();
1746 switch (cmd) {
1747 case SIOCADDRT:
86872cb5 1748 err = ip6_route_add(&cfg);
1da177e4
LT
1749 break;
1750 case SIOCDELRT:
86872cb5 1751 err = ip6_route_del(&cfg);
1da177e4
LT
1752 break;
1753 default:
1754 err = -EINVAL;
1755 }
1756 rtnl_unlock();
1757
1758 return err;
3ff50b79 1759 }
1da177e4
LT
1760
1761 return -EINVAL;
1762}
1763
1764/*
1765 * Drop the packet on the floor
1766 */
1767
612f09e8
YH
1768static inline int ip6_pkt_drop(struct sk_buff *skb, int code,
1769 int ipstats_mib_noroutes)
1da177e4 1770{
612f09e8
YH
1771 int type;
1772 switch (ipstats_mib_noroutes) {
1773 case IPSTATS_MIB_INNOROUTES:
0660e03f 1774 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
612f09e8
YH
1775 if (type == IPV6_ADDR_ANY || type == IPV6_ADDR_RESERVED) {
1776 IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_INADDRERRORS);
1777 break;
1778 }
1779 /* FALLTHROUGH */
1780 case IPSTATS_MIB_OUTNOROUTES:
1781 IP6_INC_STATS(ip6_dst_idev(skb->dst), ipstats_mib_noroutes);
1782 break;
1783 }
9ce8ade0 1784 icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0, skb->dev);
1da177e4
LT
1785 kfree_skb(skb);
1786 return 0;
1787}
1788
9ce8ade0
TG
1789static int ip6_pkt_discard(struct sk_buff *skb)
1790{
612f09e8 1791 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
9ce8ade0
TG
1792}
1793
20380731 1794static int ip6_pkt_discard_out(struct sk_buff *skb)
1da177e4
LT
1795{
1796 skb->dev = skb->dst->dev;
612f09e8 1797 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
1da177e4
LT
1798}
1799
6723ab54
DM
1800#ifdef CONFIG_IPV6_MULTIPLE_TABLES
1801
9ce8ade0
TG
1802static int ip6_pkt_prohibit(struct sk_buff *skb)
1803{
612f09e8 1804 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
9ce8ade0
TG
1805}
1806
1807static int ip6_pkt_prohibit_out(struct sk_buff *skb)
1808{
1809 skb->dev = skb->dst->dev;
612f09e8 1810 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
9ce8ade0
TG
1811}
1812
6723ab54
DM
1813#endif
1814
1da177e4
LT
1815/*
1816 * Allocate a dst for local (unicast / anycast) address.
1817 */
1818
1819struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
1820 const struct in6_addr *addr,
1821 int anycast)
1822{
1823 struct rt6_info *rt = ip6_dst_alloc();
1824
1825 if (rt == NULL)
1826 return ERR_PTR(-ENOMEM);
1827
2774c7ab 1828 dev_hold(init_net.loopback_dev);
1da177e4
LT
1829 in6_dev_hold(idev);
1830
1831 rt->u.dst.flags = DST_HOST;
1832 rt->u.dst.input = ip6_input;
1833 rt->u.dst.output = ip6_output;
2774c7ab 1834 rt->rt6i_dev = init_net.loopback_dev;
1da177e4
LT
1835 rt->rt6i_idev = idev;
1836 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
1837 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
1838 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
1839 rt->u.dst.obsolete = -1;
1840
1841 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
58c4fb86
YH
1842 if (anycast)
1843 rt->rt6i_flags |= RTF_ANYCAST;
1844 else
1da177e4
LT
1845 rt->rt6i_flags |= RTF_LOCAL;
1846 rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
1847 if (rt->rt6i_nexthop == NULL) {
40aa7b90 1848 dst_free(&rt->u.dst);
1da177e4
LT
1849 return ERR_PTR(-ENOMEM);
1850 }
1851
1852 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
1853 rt->rt6i_dst.plen = 128;
c71099ac 1854 rt->rt6i_table = fib6_get_table(RT6_TABLE_LOCAL);
1da177e4
LT
1855
1856 atomic_set(&rt->u.dst.__refcnt, 1);
1857
1858 return rt;
1859}
1860
1861static int fib6_ifdown(struct rt6_info *rt, void *arg)
1862{
1863 if (((void*)rt->rt6i_dev == arg || arg == NULL) &&
1864 rt != &ip6_null_entry) {
1865 RT6_TRACE("deleted by ifdown %p\n", rt);
1866 return -1;
1867 }
1868 return 0;
1869}
1870
1871void rt6_ifdown(struct net_device *dev)
1872{
c71099ac 1873 fib6_clean_all(fib6_ifdown, 0, dev);
1da177e4
LT
1874}
1875
1876struct rt6_mtu_change_arg
1877{
1878 struct net_device *dev;
1879 unsigned mtu;
1880};
1881
1882static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
1883{
1884 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
1885 struct inet6_dev *idev;
1886
1887 /* In IPv6 pmtu discovery is not optional,
1888 so that RTAX_MTU lock cannot disable it.
1889 We still use this lock to block changes
1890 caused by addrconf/ndisc.
1891 */
1892
1893 idev = __in6_dev_get(arg->dev);
1894 if (idev == NULL)
1895 return 0;
1896
1897 /* For administrative MTU increase, there is no way to discover
1898 IPv6 PMTU increase, so PMTU increase should be updated here.
1899 Since RFC 1981 doesn't include administrative MTU increase
1900 update PMTU increase is a MUST. (i.e. jumbo frame)
1901 */
1902 /*
1903 If new MTU is less than route PMTU, this new MTU will be the
1904 lowest MTU in the path, update the route PMTU to reflect PMTU
1905 decreases; if new MTU is greater than route PMTU, and the
1906 old MTU is the lowest MTU in the path, update the route PMTU
1907 to reflect the increase. In this case if the other nodes' MTU
1908 also have the lowest MTU, TOO BIG MESSAGE will be lead to
1909 PMTU discouvery.
1910 */
1911 if (rt->rt6i_dev == arg->dev &&
1912 !dst_metric_locked(&rt->u.dst, RTAX_MTU) &&
1ab1457c
YH
1913 (dst_mtu(&rt->u.dst) > arg->mtu ||
1914 (dst_mtu(&rt->u.dst) < arg->mtu &&
566cfd8f 1915 dst_mtu(&rt->u.dst) == idev->cnf.mtu6))) {
1da177e4 1916 rt->u.dst.metrics[RTAX_MTU-1] = arg->mtu;
566cfd8f
SA
1917 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(arg->mtu);
1918 }
1da177e4
LT
1919 return 0;
1920}
1921
1922void rt6_mtu_change(struct net_device *dev, unsigned mtu)
1923{
c71099ac
TG
1924 struct rt6_mtu_change_arg arg = {
1925 .dev = dev,
1926 .mtu = mtu,
1927 };
1da177e4 1928
c71099ac 1929 fib6_clean_all(rt6_mtu_change_route, 0, &arg);
1da177e4
LT
1930}
1931
ef7c79ed 1932static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
5176f91e 1933 [RTA_GATEWAY] = { .len = sizeof(struct in6_addr) },
86872cb5 1934 [RTA_OIF] = { .type = NLA_U32 },
ab364a6f 1935 [RTA_IIF] = { .type = NLA_U32 },
86872cb5
TG
1936 [RTA_PRIORITY] = { .type = NLA_U32 },
1937 [RTA_METRICS] = { .type = NLA_NESTED },
1938};
1939
1940static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
1941 struct fib6_config *cfg)
1da177e4 1942{
86872cb5
TG
1943 struct rtmsg *rtm;
1944 struct nlattr *tb[RTA_MAX+1];
1945 int err;
1da177e4 1946
86872cb5
TG
1947 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
1948 if (err < 0)
1949 goto errout;
1da177e4 1950
86872cb5
TG
1951 err = -EINVAL;
1952 rtm = nlmsg_data(nlh);
1953 memset(cfg, 0, sizeof(*cfg));
1954
1955 cfg->fc_table = rtm->rtm_table;
1956 cfg->fc_dst_len = rtm->rtm_dst_len;
1957 cfg->fc_src_len = rtm->rtm_src_len;
1958 cfg->fc_flags = RTF_UP;
1959 cfg->fc_protocol = rtm->rtm_protocol;
1960
1961 if (rtm->rtm_type == RTN_UNREACHABLE)
1962 cfg->fc_flags |= RTF_REJECT;
1963
1964 cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
1965 cfg->fc_nlinfo.nlh = nlh;
1966
1967 if (tb[RTA_GATEWAY]) {
1968 nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
1969 cfg->fc_flags |= RTF_GATEWAY;
1da177e4 1970 }
86872cb5
TG
1971
1972 if (tb[RTA_DST]) {
1973 int plen = (rtm->rtm_dst_len + 7) >> 3;
1974
1975 if (nla_len(tb[RTA_DST]) < plen)
1976 goto errout;
1977
1978 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
1da177e4 1979 }
86872cb5
TG
1980
1981 if (tb[RTA_SRC]) {
1982 int plen = (rtm->rtm_src_len + 7) >> 3;
1983
1984 if (nla_len(tb[RTA_SRC]) < plen)
1985 goto errout;
1986
1987 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
1da177e4 1988 }
86872cb5
TG
1989
1990 if (tb[RTA_OIF])
1991 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
1992
1993 if (tb[RTA_PRIORITY])
1994 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
1995
1996 if (tb[RTA_METRICS]) {
1997 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
1998 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
1da177e4 1999 }
86872cb5
TG
2000
2001 if (tb[RTA_TABLE])
2002 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2003
2004 err = 0;
2005errout:
2006 return err;
1da177e4
LT
2007}
2008
c127ea2c 2009static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1da177e4 2010{
b854272b 2011 struct net *net = skb->sk->sk_net;
86872cb5
TG
2012 struct fib6_config cfg;
2013 int err;
1da177e4 2014
b854272b
DL
2015 if (net != &init_net)
2016 return -EINVAL;
2017
86872cb5
TG
2018 err = rtm_to_fib6_config(skb, nlh, &cfg);
2019 if (err < 0)
2020 return err;
2021
2022 return ip6_route_del(&cfg);
1da177e4
LT
2023}
2024
c127ea2c 2025static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1da177e4 2026{
b854272b 2027 struct net *net = skb->sk->sk_net;
86872cb5
TG
2028 struct fib6_config cfg;
2029 int err;
1da177e4 2030
b854272b
DL
2031 if (net != &init_net)
2032 return -EINVAL;
2033
86872cb5
TG
2034 err = rtm_to_fib6_config(skb, nlh, &cfg);
2035 if (err < 0)
2036 return err;
2037
2038 return ip6_route_add(&cfg);
1da177e4
LT
2039}
2040
339bf98f
TG
2041static inline size_t rt6_nlmsg_size(void)
2042{
2043 return NLMSG_ALIGN(sizeof(struct rtmsg))
2044 + nla_total_size(16) /* RTA_SRC */
2045 + nla_total_size(16) /* RTA_DST */
2046 + nla_total_size(16) /* RTA_GATEWAY */
2047 + nla_total_size(16) /* RTA_PREFSRC */
2048 + nla_total_size(4) /* RTA_TABLE */
2049 + nla_total_size(4) /* RTA_IIF */
2050 + nla_total_size(4) /* RTA_OIF */
2051 + nla_total_size(4) /* RTA_PRIORITY */
6a2b9ce0 2052 + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
339bf98f
TG
2053 + nla_total_size(sizeof(struct rta_cacheinfo));
2054}
2055
1da177e4 2056static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt,
0d51aa80
JHS
2057 struct in6_addr *dst, struct in6_addr *src,
2058 int iif, int type, u32 pid, u32 seq,
2059 int prefix, unsigned int flags)
1da177e4
LT
2060{
2061 struct rtmsg *rtm;
2d7202bf 2062 struct nlmsghdr *nlh;
e3703b3d 2063 long expires;
9e762a4a 2064 u32 table;
1da177e4
LT
2065
2066 if (prefix) { /* user wants prefix routes only */
2067 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
2068 /* success since this is not a prefix route */
2069 return 1;
2070 }
2071 }
2072
2d7202bf
TG
2073 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*rtm), flags);
2074 if (nlh == NULL)
26932566 2075 return -EMSGSIZE;
2d7202bf
TG
2076
2077 rtm = nlmsg_data(nlh);
1da177e4
LT
2078 rtm->rtm_family = AF_INET6;
2079 rtm->rtm_dst_len = rt->rt6i_dst.plen;
2080 rtm->rtm_src_len = rt->rt6i_src.plen;
2081 rtm->rtm_tos = 0;
c71099ac 2082 if (rt->rt6i_table)
9e762a4a 2083 table = rt->rt6i_table->tb6_id;
c71099ac 2084 else
9e762a4a
PM
2085 table = RT6_TABLE_UNSPEC;
2086 rtm->rtm_table = table;
2d7202bf 2087 NLA_PUT_U32(skb, RTA_TABLE, table);
1da177e4
LT
2088 if (rt->rt6i_flags&RTF_REJECT)
2089 rtm->rtm_type = RTN_UNREACHABLE;
2090 else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK))
2091 rtm->rtm_type = RTN_LOCAL;
2092 else
2093 rtm->rtm_type = RTN_UNICAST;
2094 rtm->rtm_flags = 0;
2095 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2096 rtm->rtm_protocol = rt->rt6i_protocol;
2097 if (rt->rt6i_flags&RTF_DYNAMIC)
2098 rtm->rtm_protocol = RTPROT_REDIRECT;
2099 else if (rt->rt6i_flags & RTF_ADDRCONF)
2100 rtm->rtm_protocol = RTPROT_KERNEL;
2101 else if (rt->rt6i_flags&RTF_DEFAULT)
2102 rtm->rtm_protocol = RTPROT_RA;
2103
2104 if (rt->rt6i_flags&RTF_CACHE)
2105 rtm->rtm_flags |= RTM_F_CLONED;
2106
2107 if (dst) {
2d7202bf 2108 NLA_PUT(skb, RTA_DST, 16, dst);
1ab1457c 2109 rtm->rtm_dst_len = 128;
1da177e4 2110 } else if (rtm->rtm_dst_len)
2d7202bf 2111 NLA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr);
1da177e4
LT
2112#ifdef CONFIG_IPV6_SUBTREES
2113 if (src) {
2d7202bf 2114 NLA_PUT(skb, RTA_SRC, 16, src);
1ab1457c 2115 rtm->rtm_src_len = 128;
1da177e4 2116 } else if (rtm->rtm_src_len)
2d7202bf 2117 NLA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr);
1da177e4
LT
2118#endif
2119 if (iif)
2d7202bf 2120 NLA_PUT_U32(skb, RTA_IIF, iif);
1da177e4
LT
2121 else if (dst) {
2122 struct in6_addr saddr_buf;
2123 if (ipv6_get_saddr(&rt->u.dst, dst, &saddr_buf) == 0)
2d7202bf 2124 NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
1da177e4 2125 }
2d7202bf 2126
1da177e4 2127 if (rtnetlink_put_metrics(skb, rt->u.dst.metrics) < 0)
2d7202bf
TG
2128 goto nla_put_failure;
2129
1da177e4 2130 if (rt->u.dst.neighbour)
2d7202bf
TG
2131 NLA_PUT(skb, RTA_GATEWAY, 16, &rt->u.dst.neighbour->primary_key);
2132
1da177e4 2133 if (rt->u.dst.dev)
2d7202bf
TG
2134 NLA_PUT_U32(skb, RTA_OIF, rt->rt6i_dev->ifindex);
2135
2136 NLA_PUT_U32(skb, RTA_PRIORITY, rt->rt6i_metric);
e3703b3d
TG
2137
2138 expires = rt->rt6i_expires ? rt->rt6i_expires - jiffies : 0;
2139 if (rtnl_put_cacheinfo(skb, &rt->u.dst, 0, 0, 0,
2140 expires, rt->u.dst.error) < 0)
2141 goto nla_put_failure;
2d7202bf
TG
2142
2143 return nlmsg_end(skb, nlh);
2144
2145nla_put_failure:
26932566
PM
2146 nlmsg_cancel(skb, nlh);
2147 return -EMSGSIZE;
1da177e4
LT
2148}
2149
1b43af54 2150int rt6_dump_route(struct rt6_info *rt, void *p_arg)
1da177e4
LT
2151{
2152 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
2153 int prefix;
2154
2d7202bf
TG
2155 if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
2156 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
1da177e4
LT
2157 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
2158 } else
2159 prefix = 0;
2160
2161 return rt6_fill_node(arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
2162 NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
0d51aa80 2163 prefix, NLM_F_MULTI);
1da177e4
LT
2164}
2165
c127ea2c 2166static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
1da177e4 2167{
b854272b 2168 struct net *net = in_skb->sk->sk_net;
ab364a6f
TG
2169 struct nlattr *tb[RTA_MAX+1];
2170 struct rt6_info *rt;
1da177e4 2171 struct sk_buff *skb;
ab364a6f 2172 struct rtmsg *rtm;
1da177e4 2173 struct flowi fl;
ab364a6f 2174 int err, iif = 0;
1da177e4 2175
b854272b
DL
2176 if (net != &init_net)
2177 return -EINVAL;
2178
ab364a6f
TG
2179 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2180 if (err < 0)
2181 goto errout;
1da177e4 2182
ab364a6f 2183 err = -EINVAL;
1da177e4 2184 memset(&fl, 0, sizeof(fl));
1da177e4 2185
ab364a6f
TG
2186 if (tb[RTA_SRC]) {
2187 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2188 goto errout;
2189
2190 ipv6_addr_copy(&fl.fl6_src, nla_data(tb[RTA_SRC]));
2191 }
2192
2193 if (tb[RTA_DST]) {
2194 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2195 goto errout;
2196
2197 ipv6_addr_copy(&fl.fl6_dst, nla_data(tb[RTA_DST]));
2198 }
2199
2200 if (tb[RTA_IIF])
2201 iif = nla_get_u32(tb[RTA_IIF]);
2202
2203 if (tb[RTA_OIF])
2204 fl.oif = nla_get_u32(tb[RTA_OIF]);
1da177e4
LT
2205
2206 if (iif) {
2207 struct net_device *dev;
881d966b 2208 dev = __dev_get_by_index(&init_net, iif);
1da177e4
LT
2209 if (!dev) {
2210 err = -ENODEV;
ab364a6f 2211 goto errout;
1da177e4
LT
2212 }
2213 }
2214
ab364a6f
TG
2215 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
2216 if (skb == NULL) {
2217 err = -ENOBUFS;
2218 goto errout;
2219 }
1da177e4 2220
ab364a6f
TG
2221 /* Reserve room for dummy headers, this skb can pass
2222 through good chunk of routing engine.
2223 */
459a98ed 2224 skb_reset_mac_header(skb);
ab364a6f 2225 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
1da177e4 2226
ab364a6f 2227 rt = (struct rt6_info*) ip6_route_output(NULL, &fl);
1da177e4
LT
2228 skb->dst = &rt->u.dst;
2229
ab364a6f 2230 err = rt6_fill_node(skb, rt, &fl.fl6_dst, &fl.fl6_src, iif,
1da177e4 2231 RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
0d51aa80 2232 nlh->nlmsg_seq, 0, 0);
1da177e4 2233 if (err < 0) {
ab364a6f
TG
2234 kfree_skb(skb);
2235 goto errout;
1da177e4
LT
2236 }
2237
97c53cac 2238 err = rtnl_unicast(skb, &init_net, NETLINK_CB(in_skb).pid);
ab364a6f 2239errout:
1da177e4 2240 return err;
1da177e4
LT
2241}
2242
86872cb5 2243void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
1da177e4
LT
2244{
2245 struct sk_buff *skb;
86872cb5
TG
2246 u32 pid = 0, seq = 0;
2247 struct nlmsghdr *nlh = NULL;
21713ebc
TG
2248 int err = -ENOBUFS;
2249
86872cb5
TG
2250 if (info) {
2251 pid = info->pid;
2252 nlh = info->nlh;
2253 if (nlh)
2254 seq = nlh->nlmsg_seq;
2255 }
2256
339bf98f 2257 skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
21713ebc
TG
2258 if (skb == NULL)
2259 goto errout;
2260
2261 err = rt6_fill_node(skb, rt, NULL, NULL, 0, event, pid, seq, 0, 0);
26932566
PM
2262 if (err < 0) {
2263 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
2264 WARN_ON(err == -EMSGSIZE);
2265 kfree_skb(skb);
2266 goto errout;
2267 }
97c53cac 2268 err = rtnl_notify(skb, &init_net, pid, RTNLGRP_IPV6_ROUTE, nlh, gfp_any());
21713ebc
TG
2269errout:
2270 if (err < 0)
97c53cac 2271 rtnl_set_sk_err(&init_net, RTNLGRP_IPV6_ROUTE, err);
1da177e4
LT
2272}
2273
2274/*
2275 * /proc
2276 */
2277
2278#ifdef CONFIG_PROC_FS
2279
2280#define RT6_INFO_LEN (32 + 4 + 32 + 4 + 32 + 40 + 5 + 1)
2281
2282struct rt6_proc_arg
2283{
2284 char *buffer;
2285 int offset;
2286 int length;
2287 int skip;
2288 int len;
2289};
2290
2291static int rt6_info_route(struct rt6_info *rt, void *p_arg)
2292{
33120b30 2293 struct seq_file *m = p_arg;
1da177e4 2294
33120b30
AD
2295 seq_printf(m, NIP6_SEQFMT " %02x ", NIP6(rt->rt6i_dst.addr),
2296 rt->rt6i_dst.plen);
1da177e4
LT
2297
2298#ifdef CONFIG_IPV6_SUBTREES
33120b30
AD
2299 seq_printf(m, NIP6_SEQFMT " %02x ", NIP6(rt->rt6i_src.addr),
2300 rt->rt6i_src.plen);
1da177e4 2301#else
33120b30 2302 seq_puts(m, "00000000000000000000000000000000 00 ");
1da177e4
LT
2303#endif
2304
2305 if (rt->rt6i_nexthop) {
33120b30
AD
2306 seq_printf(m, NIP6_SEQFMT,
2307 NIP6(*((struct in6_addr *)rt->rt6i_nexthop->primary_key)));
1da177e4 2308 } else {
33120b30 2309 seq_puts(m, "00000000000000000000000000000000");
1da177e4 2310 }
33120b30
AD
2311 seq_printf(m, " %08x %08x %08x %08x %8s\n",
2312 rt->rt6i_metric, atomic_read(&rt->u.dst.__refcnt),
2313 rt->u.dst.__use, rt->rt6i_flags,
2314 rt->rt6i_dev ? rt->rt6i_dev->name : "");
1da177e4
LT
2315 return 0;
2316}
2317
33120b30 2318static int ipv6_route_show(struct seq_file *m, void *v)
1da177e4 2319{
33120b30
AD
2320 fib6_clean_all(rt6_info_route, 0, m);
2321 return 0;
2322}
1da177e4 2323
33120b30
AD
2324static int ipv6_route_open(struct inode *inode, struct file *file)
2325{
2326 return single_open(file, ipv6_route_show, NULL);
1da177e4
LT
2327}
2328
33120b30
AD
2329static const struct file_operations ipv6_route_proc_fops = {
2330 .owner = THIS_MODULE,
2331 .open = ipv6_route_open,
2332 .read = seq_read,
2333 .llseek = seq_lseek,
2334 .release = single_release,
2335};
2336
1da177e4
LT
2337static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2338{
2339 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
2340 rt6_stats.fib_nodes, rt6_stats.fib_route_nodes,
2341 rt6_stats.fib_rt_alloc, rt6_stats.fib_rt_entries,
2342 rt6_stats.fib_rt_cache,
2343 atomic_read(&ip6_dst_ops.entries),
2344 rt6_stats.fib_discarded_routes);
2345
2346 return 0;
2347}
2348
2349static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2350{
2351 return single_open(file, rt6_stats_seq_show, NULL);
2352}
2353
9a32144e 2354static const struct file_operations rt6_stats_seq_fops = {
1da177e4
LT
2355 .owner = THIS_MODULE,
2356 .open = rt6_stats_seq_open,
2357 .read = seq_read,
2358 .llseek = seq_lseek,
2359 .release = single_release,
2360};
2361#endif /* CONFIG_PROC_FS */
2362
2363#ifdef CONFIG_SYSCTL
2364
2365static int flush_delay;
2366
2367static
2368int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write, struct file * filp,
2369 void __user *buffer, size_t *lenp, loff_t *ppos)
2370{
2371 if (write) {
2372 proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
2373 fib6_run_gc(flush_delay <= 0 ? ~0UL : (unsigned long)flush_delay);
2374 return 0;
2375 } else
2376 return -EINVAL;
2377}
2378
2379ctl_table ipv6_route_table[] = {
1ab1457c 2380 {
1da177e4 2381 .procname = "flush",
1ab1457c 2382 .data = &flush_delay,
1da177e4 2383 .maxlen = sizeof(int),
89c8b3a1 2384 .mode = 0200,
1ab1457c 2385 .proc_handler = &ipv6_sysctl_rtcache_flush
1da177e4
LT
2386 },
2387 {
2388 .ctl_name = NET_IPV6_ROUTE_GC_THRESH,
2389 .procname = "gc_thresh",
1ab1457c 2390 .data = &ip6_dst_ops.gc_thresh,
1da177e4
LT
2391 .maxlen = sizeof(int),
2392 .mode = 0644,
1ab1457c 2393 .proc_handler = &proc_dointvec,
1da177e4
LT
2394 },
2395 {
2396 .ctl_name = NET_IPV6_ROUTE_MAX_SIZE,
2397 .procname = "max_size",
1ab1457c 2398 .data = &ip6_rt_max_size,
1da177e4
LT
2399 .maxlen = sizeof(int),
2400 .mode = 0644,
1ab1457c 2401 .proc_handler = &proc_dointvec,
1da177e4
LT
2402 },
2403 {
2404 .ctl_name = NET_IPV6_ROUTE_GC_MIN_INTERVAL,
2405 .procname = "gc_min_interval",
1ab1457c 2406 .data = &ip6_rt_gc_min_interval,
1da177e4
LT
2407 .maxlen = sizeof(int),
2408 .mode = 0644,
1ab1457c 2409 .proc_handler = &proc_dointvec_jiffies,
1da177e4
LT
2410 .strategy = &sysctl_jiffies,
2411 },
2412 {
2413 .ctl_name = NET_IPV6_ROUTE_GC_TIMEOUT,
2414 .procname = "gc_timeout",
1ab1457c 2415 .data = &ip6_rt_gc_timeout,
1da177e4
LT
2416 .maxlen = sizeof(int),
2417 .mode = 0644,
1ab1457c 2418 .proc_handler = &proc_dointvec_jiffies,
1da177e4
LT
2419 .strategy = &sysctl_jiffies,
2420 },
2421 {
2422 .ctl_name = NET_IPV6_ROUTE_GC_INTERVAL,
2423 .procname = "gc_interval",
1ab1457c 2424 .data = &ip6_rt_gc_interval,
1da177e4
LT
2425 .maxlen = sizeof(int),
2426 .mode = 0644,
1ab1457c 2427 .proc_handler = &proc_dointvec_jiffies,
1da177e4
LT
2428 .strategy = &sysctl_jiffies,
2429 },
2430 {
2431 .ctl_name = NET_IPV6_ROUTE_GC_ELASTICITY,
2432 .procname = "gc_elasticity",
1ab1457c 2433 .data = &ip6_rt_gc_elasticity,
1da177e4
LT
2434 .maxlen = sizeof(int),
2435 .mode = 0644,
1ab1457c 2436 .proc_handler = &proc_dointvec_jiffies,
1da177e4
LT
2437 .strategy = &sysctl_jiffies,
2438 },
2439 {
2440 .ctl_name = NET_IPV6_ROUTE_MTU_EXPIRES,
2441 .procname = "mtu_expires",
1ab1457c 2442 .data = &ip6_rt_mtu_expires,
1da177e4
LT
2443 .maxlen = sizeof(int),
2444 .mode = 0644,
1ab1457c 2445 .proc_handler = &proc_dointvec_jiffies,
1da177e4
LT
2446 .strategy = &sysctl_jiffies,
2447 },
2448 {
2449 .ctl_name = NET_IPV6_ROUTE_MIN_ADVMSS,
2450 .procname = "min_adv_mss",
1ab1457c 2451 .data = &ip6_rt_min_advmss,
1da177e4
LT
2452 .maxlen = sizeof(int),
2453 .mode = 0644,
1ab1457c 2454 .proc_handler = &proc_dointvec_jiffies,
1da177e4
LT
2455 .strategy = &sysctl_jiffies,
2456 },
2457 {
2458 .ctl_name = NET_IPV6_ROUTE_GC_MIN_INTERVAL_MS,
2459 .procname = "gc_min_interval_ms",
1ab1457c 2460 .data = &ip6_rt_gc_min_interval,
1da177e4
LT
2461 .maxlen = sizeof(int),
2462 .mode = 0644,
1ab1457c 2463 .proc_handler = &proc_dointvec_ms_jiffies,
1da177e4
LT
2464 .strategy = &sysctl_ms_jiffies,
2465 },
2466 { .ctl_name = 0 }
2467};
2468
2469#endif
2470
433d49c3 2471int __init ip6_route_init(void)
1da177e4 2472{
433d49c3
DL
2473 int ret;
2474
e5d679f3
AD
2475 ip6_dst_ops.kmem_cachep =
2476 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
20c2df83 2477 SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);
14e50e57
DM
2478 ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops.kmem_cachep;
2479
433d49c3
DL
2480 ret = fib6_init();
2481 if (ret)
2482 goto out_kmem_cache;
2483
2484#ifdef CONFIG_PROC_FS
2485 ret = -ENOMEM;
2486 if (!proc_net_fops_create(&init_net, "ipv6_route",
2487 0, &ipv6_route_proc_fops))
2488 goto out_fib6_init;
2489
2490 if (!proc_net_fops_create(&init_net, "rt6_stats",
2491 S_IRUGO, &rt6_stats_seq_fops))
2492 goto out_proc_ipv6_route;
2493#endif
2494
1da177e4 2495#ifdef CONFIG_XFRM
433d49c3
DL
2496 ret = xfrm6_init();
2497 if (ret)
2498 goto out_proc_rt6_stats;
1da177e4 2499#endif
101367c2 2500#ifdef CONFIG_IPV6_MULTIPLE_TABLES
433d49c3
DL
2501 ret = fib6_rules_init();
2502 if (ret)
2503 goto xfrm6_init;
101367c2 2504#endif
433d49c3
DL
2505 ret = -ENOBUFS;
2506 if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL) ||
2507 __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL) ||
2508 __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL))
2509 goto fib6_rules_init;
c127ea2c 2510
433d49c3
DL
2511 ret = 0;
2512out:
2513 return ret;
2514
2515fib6_rules_init:
2516#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2517 fib6_rules_cleanup();
2518xfrm6_init:
2519#endif
2520#ifdef CONFIG_XFRM
2521 xfrm6_fini();
2522out_proc_rt6_stats:
2523#endif
2524#ifdef CONFIG_PROC_FS
2525 proc_net_remove(&init_net, "rt6_stats");
2526out_proc_ipv6_route:
2527 proc_net_remove(&init_net, "ipv6_route");
2528out_fib6_init:
2529#endif
2530 rt6_ifdown(NULL);
2531 fib6_gc_cleanup();
2532out_kmem_cache:
2533 kmem_cache_destroy(ip6_dst_ops.kmem_cachep);
2534 goto out;
1da177e4
LT
2535}
2536
2537void ip6_route_cleanup(void)
2538{
101367c2
TG
2539#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2540 fib6_rules_cleanup();
2541#endif
457c4cbc
EB
2542 proc_net_remove(&init_net, "ipv6_route");
2543 proc_net_remove(&init_net, "rt6_stats");
1da177e4
LT
2544#ifdef CONFIG_XFRM
2545 xfrm6_fini();
2546#endif
2547 rt6_ifdown(NULL);
2548 fib6_gc_cleanup();
2549 kmem_cache_destroy(ip6_dst_ops.kmem_cachep);
2550}