]> bbs.cooldavid.org Git - net-next-2.6.git/blame - net/ipv6/route.c
[DECNET]: Fix input routing bug
[net-next-2.6.git] / net / ipv6 / route.c
CommitLineData
1da177e4
LT
1/*
2 * Linux INET6 implementation
3 * FIB front-end.
4 *
5 * Authors:
6 * Pedro Roque <roque@di.fc.ul.pt>
7 *
8 * $Id: route.c,v 1.56 2001/10/31 21:55:55 davem Exp $
9 *
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License
12 * as published by the Free Software Foundation; either version
13 * 2 of the License, or (at your option) any later version.
14 */
15
16/* Changes:
17 *
18 * YOSHIFUJI Hideaki @USAGI
19 * reworked default router selection.
20 * - respect outgoing interface
21 * - select from (probably) reachable routers (i.e.
22 * routers in REACHABLE, STALE, DELAY or PROBE states).
23 * - always select the same router if it is (probably)
24 * reachable. otherwise, round-robin the list.
c0bece9f
YH
25 * Ville Nuorvala
26 * Fixed routing subtrees.
1da177e4
LT
27 */
28
4fc268d2 29#include <linux/capability.h>
1da177e4
LT
30#include <linux/errno.h>
31#include <linux/types.h>
32#include <linux/times.h>
33#include <linux/socket.h>
34#include <linux/sockios.h>
35#include <linux/net.h>
36#include <linux/route.h>
37#include <linux/netdevice.h>
38#include <linux/in6.h>
39#include <linux/init.h>
1da177e4
LT
40#include <linux/if_arp.h>
41
42#ifdef CONFIG_PROC_FS
43#include <linux/proc_fs.h>
44#include <linux/seq_file.h>
45#endif
46
47#include <net/snmp.h>
48#include <net/ipv6.h>
49#include <net/ip6_fib.h>
50#include <net/ip6_route.h>
51#include <net/ndisc.h>
52#include <net/addrconf.h>
53#include <net/tcp.h>
54#include <linux/rtnetlink.h>
55#include <net/dst.h>
56#include <net/xfrm.h>
8d71740c 57#include <net/netevent.h>
21713ebc 58#include <net/netlink.h>
1da177e4
LT
59
60#include <asm/uaccess.h>
61
62#ifdef CONFIG_SYSCTL
63#include <linux/sysctl.h>
64#endif
65
66/* Set to 3 to get tracing. */
67#define RT6_DEBUG 2
68
69#if RT6_DEBUG >= 3
70#define RDBG(x) printk x
71#define RT6_TRACE(x...) printk(KERN_DEBUG x)
72#else
73#define RDBG(x)
74#define RT6_TRACE(x...) do { ; } while (0)
75#endif
76
519fbd87 77#define CLONE_OFFLINK_ROUTE 0
1da177e4
LT
78
79static int ip6_rt_max_size = 4096;
80static int ip6_rt_gc_min_interval = HZ / 2;
81static int ip6_rt_gc_timeout = 60*HZ;
82int ip6_rt_gc_interval = 30*HZ;
83static int ip6_rt_gc_elasticity = 9;
84static int ip6_rt_mtu_expires = 10*60*HZ;
85static int ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
86
87static struct rt6_info * ip6_rt_copy(struct rt6_info *ort);
88static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
89static struct dst_entry *ip6_negative_advice(struct dst_entry *);
90static void ip6_dst_destroy(struct dst_entry *);
91static void ip6_dst_ifdown(struct dst_entry *,
92 struct net_device *dev, int how);
93static int ip6_dst_gc(void);
94
95static int ip6_pkt_discard(struct sk_buff *skb);
96static int ip6_pkt_discard_out(struct sk_buff *skb);
97static void ip6_link_failure(struct sk_buff *skb);
98static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
99
70ceb4f5
YH
100#ifdef CONFIG_IPV6_ROUTE_INFO
101static struct rt6_info *rt6_add_route_info(struct in6_addr *prefix, int prefixlen,
102 struct in6_addr *gwaddr, int ifindex,
103 unsigned pref);
104static struct rt6_info *rt6_get_route_info(struct in6_addr *prefix, int prefixlen,
105 struct in6_addr *gwaddr, int ifindex);
106#endif
107
1da177e4
LT
108static struct dst_ops ip6_dst_ops = {
109 .family = AF_INET6,
110 .protocol = __constant_htons(ETH_P_IPV6),
111 .gc = ip6_dst_gc,
112 .gc_thresh = 1024,
113 .check = ip6_dst_check,
114 .destroy = ip6_dst_destroy,
115 .ifdown = ip6_dst_ifdown,
116 .negative_advice = ip6_negative_advice,
117 .link_failure = ip6_link_failure,
118 .update_pmtu = ip6_rt_update_pmtu,
119 .entry_size = sizeof(struct rt6_info),
120};
121
122struct rt6_info ip6_null_entry = {
123 .u = {
124 .dst = {
125 .__refcnt = ATOMIC_INIT(1),
126 .__use = 1,
127 .dev = &loopback_dev,
128 .obsolete = -1,
129 .error = -ENETUNREACH,
130 .metrics = { [RTAX_HOPLIMIT - 1] = 255, },
131 .input = ip6_pkt_discard,
132 .output = ip6_pkt_discard_out,
133 .ops = &ip6_dst_ops,
134 .path = (struct dst_entry*)&ip6_null_entry,
135 }
136 },
137 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
138 .rt6i_metric = ~(u32) 0,
139 .rt6i_ref = ATOMIC_INIT(1),
140};
141
101367c2
TG
142#ifdef CONFIG_IPV6_MULTIPLE_TABLES
143
144struct rt6_info ip6_prohibit_entry = {
145 .u = {
146 .dst = {
147 .__refcnt = ATOMIC_INIT(1),
148 .__use = 1,
149 .dev = &loopback_dev,
150 .obsolete = -1,
151 .error = -EACCES,
152 .metrics = { [RTAX_HOPLIMIT - 1] = 255, },
153 .input = ip6_pkt_discard,
154 .output = ip6_pkt_discard_out,
155 .ops = &ip6_dst_ops,
156 .path = (struct dst_entry*)&ip6_prohibit_entry,
157 }
158 },
159 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
160 .rt6i_metric = ~(u32) 0,
161 .rt6i_ref = ATOMIC_INIT(1),
162};
163
164struct rt6_info ip6_blk_hole_entry = {
165 .u = {
166 .dst = {
167 .__refcnt = ATOMIC_INIT(1),
168 .__use = 1,
169 .dev = &loopback_dev,
170 .obsolete = -1,
171 .error = -EINVAL,
172 .metrics = { [RTAX_HOPLIMIT - 1] = 255, },
173 .input = ip6_pkt_discard,
174 .output = ip6_pkt_discard_out,
175 .ops = &ip6_dst_ops,
176 .path = (struct dst_entry*)&ip6_blk_hole_entry,
177 }
178 },
179 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
180 .rt6i_metric = ~(u32) 0,
181 .rt6i_ref = ATOMIC_INIT(1),
182};
183
184#endif
185
1da177e4
LT
186/* allocate dst with ip6_dst_ops */
187static __inline__ struct rt6_info *ip6_dst_alloc(void)
188{
189 return (struct rt6_info *)dst_alloc(&ip6_dst_ops);
190}
191
192static void ip6_dst_destroy(struct dst_entry *dst)
193{
194 struct rt6_info *rt = (struct rt6_info *)dst;
195 struct inet6_dev *idev = rt->rt6i_idev;
196
197 if (idev != NULL) {
198 rt->rt6i_idev = NULL;
199 in6_dev_put(idev);
200 }
201}
202
203static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
204 int how)
205{
206 struct rt6_info *rt = (struct rt6_info *)dst;
207 struct inet6_dev *idev = rt->rt6i_idev;
208
209 if (dev != &loopback_dev && idev != NULL && idev->dev == dev) {
210 struct inet6_dev *loopback_idev = in6_dev_get(&loopback_dev);
211 if (loopback_idev != NULL) {
212 rt->rt6i_idev = loopback_idev;
213 in6_dev_put(idev);
214 }
215 }
216}
217
218static __inline__ int rt6_check_expired(const struct rt6_info *rt)
219{
220 return (rt->rt6i_flags & RTF_EXPIRES &&
221 time_after(jiffies, rt->rt6i_expires));
222}
223
c71099ac
TG
224static inline int rt6_need_strict(struct in6_addr *daddr)
225{
226 return (ipv6_addr_type(daddr) &
227 (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL));
228}
229
1da177e4 230/*
c71099ac 231 * Route lookup. Any table->tb6_lock is implied.
1da177e4
LT
232 */
233
234static __inline__ struct rt6_info *rt6_device_match(struct rt6_info *rt,
235 int oif,
236 int strict)
237{
238 struct rt6_info *local = NULL;
239 struct rt6_info *sprt;
240
241 if (oif) {
242 for (sprt = rt; sprt; sprt = sprt->u.next) {
243 struct net_device *dev = sprt->rt6i_dev;
244 if (dev->ifindex == oif)
245 return sprt;
246 if (dev->flags & IFF_LOOPBACK) {
247 if (sprt->rt6i_idev == NULL ||
248 sprt->rt6i_idev->dev->ifindex != oif) {
249 if (strict && oif)
250 continue;
251 if (local && (!oif ||
252 local->rt6i_idev->dev->ifindex == oif))
253 continue;
254 }
255 local = sprt;
256 }
257 }
258
259 if (local)
260 return local;
261
262 if (strict)
263 return &ip6_null_entry;
264 }
265 return rt;
266}
267
27097255
YH
268#ifdef CONFIG_IPV6_ROUTER_PREF
269static void rt6_probe(struct rt6_info *rt)
270{
271 struct neighbour *neigh = rt ? rt->rt6i_nexthop : NULL;
272 /*
273 * Okay, this does not seem to be appropriate
274 * for now, however, we need to check if it
275 * is really so; aka Router Reachability Probing.
276 *
277 * Router Reachability Probe MUST be rate-limited
278 * to no more than one per minute.
279 */
280 if (!neigh || (neigh->nud_state & NUD_VALID))
281 return;
282 read_lock_bh(&neigh->lock);
283 if (!(neigh->nud_state & NUD_VALID) &&
52e16356 284 time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
27097255
YH
285 struct in6_addr mcaddr;
286 struct in6_addr *target;
287
288 neigh->updated = jiffies;
289 read_unlock_bh(&neigh->lock);
290
291 target = (struct in6_addr *)&neigh->primary_key;
292 addrconf_addr_solict_mult(target, &mcaddr);
293 ndisc_send_ns(rt->rt6i_dev, NULL, target, &mcaddr, NULL);
294 } else
295 read_unlock_bh(&neigh->lock);
296}
297#else
298static inline void rt6_probe(struct rt6_info *rt)
299{
300 return;
301}
302#endif
303
1da177e4 304/*
554cfb7e 305 * Default Router Selection (RFC 2461 6.3.6)
1da177e4 306 */
554cfb7e
YH
307static int inline rt6_check_dev(struct rt6_info *rt, int oif)
308{
309 struct net_device *dev = rt->rt6i_dev;
310 if (!oif || dev->ifindex == oif)
311 return 2;
312 if ((dev->flags & IFF_LOOPBACK) &&
313 rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
314 return 1;
315 return 0;
316}
1da177e4 317
554cfb7e 318static int inline rt6_check_neigh(struct rt6_info *rt)
1da177e4 319{
554cfb7e
YH
320 struct neighbour *neigh = rt->rt6i_nexthop;
321 int m = 0;
4d0c5911
YH
322 if (rt->rt6i_flags & RTF_NONEXTHOP ||
323 !(rt->rt6i_flags & RTF_GATEWAY))
324 m = 1;
325 else if (neigh) {
554cfb7e
YH
326 read_lock_bh(&neigh->lock);
327 if (neigh->nud_state & NUD_VALID)
4d0c5911 328 m = 2;
554cfb7e 329 read_unlock_bh(&neigh->lock);
1da177e4 330 }
554cfb7e 331 return m;
1da177e4
LT
332}
333
554cfb7e
YH
334static int rt6_score_route(struct rt6_info *rt, int oif,
335 int strict)
1da177e4 336{
4d0c5911
YH
337 int m, n;
338
339 m = rt6_check_dev(rt, oif);
77d16f45 340 if (!m && (strict & RT6_LOOKUP_F_IFACE))
554cfb7e 341 return -1;
ebacaaa0
YH
342#ifdef CONFIG_IPV6_ROUTER_PREF
343 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
344#endif
4d0c5911
YH
345 n = rt6_check_neigh(rt);
346 if (n > 1)
ebacaaa0 347 m |= 16;
77d16f45 348 else if (!n && strict & RT6_LOOKUP_F_REACHABLE)
554cfb7e
YH
349 return -1;
350 return m;
351}
352
353static struct rt6_info *rt6_select(struct rt6_info **head, int oif,
354 int strict)
355{
356 struct rt6_info *match = NULL, *last = NULL;
357 struct rt6_info *rt, *rt0 = *head;
358 u32 metric;
359 int mpri = -1;
1da177e4 360
554cfb7e
YH
361 RT6_TRACE("%s(head=%p(*head=%p), oif=%d)\n",
362 __FUNCTION__, head, head ? *head : NULL, oif);
1da177e4 363
554cfb7e 364 for (rt = rt0, metric = rt0->rt6i_metric;
c302e6d5 365 rt && rt->rt6i_metric == metric && (!last || rt != rt0);
554cfb7e
YH
366 rt = rt->u.next) {
367 int m;
1da177e4 368
554cfb7e 369 if (rt6_check_expired(rt))
1da177e4
LT
370 continue;
371
554cfb7e
YH
372 last = rt;
373
374 m = rt6_score_route(rt, oif, strict);
375 if (m < 0)
1da177e4 376 continue;
1da177e4 377
554cfb7e 378 if (m > mpri) {
27097255 379 rt6_probe(match);
554cfb7e 380 match = rt;
1da177e4 381 mpri = m;
27097255
YH
382 } else {
383 rt6_probe(rt);
1da177e4
LT
384 }
385 }
386
554cfb7e 387 if (!match &&
77d16f45 388 (strict & RT6_LOOKUP_F_REACHABLE) &&
554cfb7e
YH
389 last && last != rt0) {
390 /* no entries matched; do round-robin */
34af946a 391 static DEFINE_SPINLOCK(lock);
c302e6d5 392 spin_lock(&lock);
554cfb7e
YH
393 *head = rt0->u.next;
394 rt0->u.next = last->u.next;
395 last->u.next = rt0;
c302e6d5 396 spin_unlock(&lock);
1da177e4 397 }
1da177e4 398
554cfb7e
YH
399 RT6_TRACE("%s() => %p, score=%d\n",
400 __FUNCTION__, match, mpri);
1da177e4 401
554cfb7e 402 return (match ? match : &ip6_null_entry);
1da177e4
LT
403}
404
70ceb4f5
YH
405#ifdef CONFIG_IPV6_ROUTE_INFO
406int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
407 struct in6_addr *gwaddr)
408{
409 struct route_info *rinfo = (struct route_info *) opt;
410 struct in6_addr prefix_buf, *prefix;
411 unsigned int pref;
412 u32 lifetime;
413 struct rt6_info *rt;
414
415 if (len < sizeof(struct route_info)) {
416 return -EINVAL;
417 }
418
419 /* Sanity check for prefix_len and length */
420 if (rinfo->length > 3) {
421 return -EINVAL;
422 } else if (rinfo->prefix_len > 128) {
423 return -EINVAL;
424 } else if (rinfo->prefix_len > 64) {
425 if (rinfo->length < 2) {
426 return -EINVAL;
427 }
428 } else if (rinfo->prefix_len > 0) {
429 if (rinfo->length < 1) {
430 return -EINVAL;
431 }
432 }
433
434 pref = rinfo->route_pref;
435 if (pref == ICMPV6_ROUTER_PREF_INVALID)
436 pref = ICMPV6_ROUTER_PREF_MEDIUM;
437
438 lifetime = htonl(rinfo->lifetime);
439 if (lifetime == 0xffffffff) {
440 /* infinity */
441 } else if (lifetime > 0x7fffffff/HZ) {
442 /* Avoid arithmetic overflow */
443 lifetime = 0x7fffffff/HZ - 1;
444 }
445
446 if (rinfo->length == 3)
447 prefix = (struct in6_addr *)rinfo->prefix;
448 else {
449 /* this function is safe */
450 ipv6_addr_prefix(&prefix_buf,
451 (struct in6_addr *)rinfo->prefix,
452 rinfo->prefix_len);
453 prefix = &prefix_buf;
454 }
455
456 rt = rt6_get_route_info(prefix, rinfo->prefix_len, gwaddr, dev->ifindex);
457
458 if (rt && !lifetime) {
e0a1ad73 459 ip6_del_rt(rt);
70ceb4f5
YH
460 rt = NULL;
461 }
462
463 if (!rt && lifetime)
464 rt = rt6_add_route_info(prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
465 pref);
466 else if (rt)
467 rt->rt6i_flags = RTF_ROUTEINFO |
468 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
469
470 if (rt) {
471 if (lifetime == 0xffffffff) {
472 rt->rt6i_flags &= ~RTF_EXPIRES;
473 } else {
474 rt->rt6i_expires = jiffies + HZ * lifetime;
475 rt->rt6i_flags |= RTF_EXPIRES;
476 }
477 dst_release(&rt->u.dst);
478 }
479 return 0;
480}
481#endif
482
982f56f3
YH
483#define BACKTRACK(saddr) \
484do { \
485 if (rt == &ip6_null_entry) { \
486 struct fib6_node *pn; \
e0eda7bb 487 while (1) { \
982f56f3
YH
488 if (fn->fn_flags & RTN_TL_ROOT) \
489 goto out; \
490 pn = fn->parent; \
491 if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
492 fn = fib6_lookup(pn->subtree, NULL, saddr); \
493 else \
494 fn = pn; \
495 if (fn->fn_flags & RTN_RTINFO) \
496 goto restart; \
c71099ac 497 } \
c71099ac 498 } \
982f56f3 499} while(0)
c71099ac
TG
500
501static struct rt6_info *ip6_pol_route_lookup(struct fib6_table *table,
502 struct flowi *fl, int flags)
1da177e4
LT
503{
504 struct fib6_node *fn;
505 struct rt6_info *rt;
506
c71099ac
TG
507 read_lock_bh(&table->tb6_lock);
508 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
509restart:
510 rt = fn->leaf;
77d16f45 511 rt = rt6_device_match(rt, fl->oif, flags);
982f56f3 512 BACKTRACK(&fl->fl6_src);
c71099ac 513out:
33cc4896 514 dst_hold(&rt->u.dst);
c71099ac 515 read_unlock_bh(&table->tb6_lock);
1da177e4
LT
516
517 rt->u.dst.lastuse = jiffies;
c71099ac
TG
518 rt->u.dst.__use++;
519
520 return rt;
521
522}
523
524struct rt6_info *rt6_lookup(struct in6_addr *daddr, struct in6_addr *saddr,
525 int oif, int strict)
526{
527 struct flowi fl = {
528 .oif = oif,
529 .nl_u = {
530 .ip6_u = {
531 .daddr = *daddr,
c71099ac
TG
532 },
533 },
534 };
535 struct dst_entry *dst;
77d16f45 536 int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
c71099ac 537
adaa70bb
TG
538 if (saddr) {
539 memcpy(&fl.fl6_src, saddr, sizeof(*saddr));
540 flags |= RT6_LOOKUP_F_HAS_SADDR;
541 }
542
c71099ac
TG
543 dst = fib6_rule_lookup(&fl, flags, ip6_pol_route_lookup);
544 if (dst->error == 0)
545 return (struct rt6_info *) dst;
546
547 dst_release(dst);
548
1da177e4
LT
549 return NULL;
550}
551
c71099ac 552/* ip6_ins_rt is called with FREE table->tb6_lock.
1da177e4
LT
553 It takes new route entry, the addition fails by any reason the
554 route is freed. In any case, if caller does not hold it, it may
555 be destroyed.
556 */
557
86872cb5 558static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
1da177e4
LT
559{
560 int err;
c71099ac 561 struct fib6_table *table;
1da177e4 562
c71099ac
TG
563 table = rt->rt6i_table;
564 write_lock_bh(&table->tb6_lock);
86872cb5 565 err = fib6_add(&table->tb6_root, rt, info);
c71099ac 566 write_unlock_bh(&table->tb6_lock);
1da177e4
LT
567
568 return err;
569}
570
40e22e8f
TG
571int ip6_ins_rt(struct rt6_info *rt)
572{
86872cb5 573 return __ip6_ins_rt(rt, NULL);
40e22e8f
TG
574}
575
95a9a5ba
YH
576static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, struct in6_addr *daddr,
577 struct in6_addr *saddr)
1da177e4 578{
1da177e4
LT
579 struct rt6_info *rt;
580
581 /*
582 * Clone the route.
583 */
584
585 rt = ip6_rt_copy(ort);
586
587 if (rt) {
58c4fb86
YH
588 if (!(rt->rt6i_flags&RTF_GATEWAY)) {
589 if (rt->rt6i_dst.plen != 128 &&
590 ipv6_addr_equal(&rt->rt6i_dst.addr, daddr))
591 rt->rt6i_flags |= RTF_ANYCAST;
1da177e4 592 ipv6_addr_copy(&rt->rt6i_gateway, daddr);
58c4fb86 593 }
1da177e4 594
58c4fb86 595 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
1da177e4
LT
596 rt->rt6i_dst.plen = 128;
597 rt->rt6i_flags |= RTF_CACHE;
598 rt->u.dst.flags |= DST_HOST;
599
600#ifdef CONFIG_IPV6_SUBTREES
601 if (rt->rt6i_src.plen && saddr) {
602 ipv6_addr_copy(&rt->rt6i_src.addr, saddr);
603 rt->rt6i_src.plen = 128;
604 }
605#endif
606
607 rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
608
95a9a5ba 609 }
1da177e4 610
95a9a5ba
YH
611 return rt;
612}
1da177e4 613
299d9939
YH
614static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort, struct in6_addr *daddr)
615{
616 struct rt6_info *rt = ip6_rt_copy(ort);
617 if (rt) {
618 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
619 rt->rt6i_dst.plen = 128;
620 rt->rt6i_flags |= RTF_CACHE;
299d9939
YH
621 rt->u.dst.flags |= DST_HOST;
622 rt->rt6i_nexthop = neigh_clone(ort->rt6i_nexthop);
623 }
624 return rt;
625}
626
8ce11e6a
AB
627static struct rt6_info *ip6_pol_route_input(struct fib6_table *table,
628 struct flowi *fl, int flags)
1da177e4
LT
629{
630 struct fib6_node *fn;
519fbd87 631 struct rt6_info *rt, *nrt;
c71099ac 632 int strict = 0;
1da177e4 633 int attempts = 3;
519fbd87 634 int err;
77d16f45 635 int reachable = RT6_LOOKUP_F_REACHABLE;
1da177e4 636
77d16f45 637 strict |= flags & RT6_LOOKUP_F_IFACE;
1da177e4
LT
638
639relookup:
c71099ac 640 read_lock_bh(&table->tb6_lock);
1da177e4 641
8238dd06 642restart_2:
c71099ac 643 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
1da177e4
LT
644
645restart:
c71099ac 646 rt = rt6_select(&fn->leaf, fl->iif, strict | reachable);
982f56f3 647 BACKTRACK(&fl->fl6_src);
8238dd06
YH
648 if (rt == &ip6_null_entry ||
649 rt->rt6i_flags & RTF_CACHE)
1ddef044 650 goto out;
1da177e4 651
fb9de91e 652 dst_hold(&rt->u.dst);
c71099ac 653 read_unlock_bh(&table->tb6_lock);
fb9de91e 654
519fbd87 655 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
c71099ac 656 nrt = rt6_alloc_cow(rt, &fl->fl6_dst, &fl->fl6_src);
519fbd87
YH
657 else {
658#if CLONE_OFFLINK_ROUTE
c71099ac 659 nrt = rt6_alloc_clone(rt, &fl->fl6_dst);
519fbd87
YH
660#else
661 goto out2;
662#endif
663 }
e40cf353 664
519fbd87
YH
665 dst_release(&rt->u.dst);
666 rt = nrt ? : &ip6_null_entry;
1da177e4 667
519fbd87
YH
668 dst_hold(&rt->u.dst);
669 if (nrt) {
40e22e8f 670 err = ip6_ins_rt(nrt);
519fbd87 671 if (!err)
1da177e4 672 goto out2;
1da177e4 673 }
1da177e4 674
519fbd87
YH
675 if (--attempts <= 0)
676 goto out2;
677
678 /*
c71099ac 679 * Race condition! In the gap, when table->tb6_lock was
519fbd87
YH
680 * released someone could insert this route. Relookup.
681 */
682 dst_release(&rt->u.dst);
683 goto relookup;
684
685out:
8238dd06
YH
686 if (reachable) {
687 reachable = 0;
688 goto restart_2;
689 }
519fbd87 690 dst_hold(&rt->u.dst);
c71099ac 691 read_unlock_bh(&table->tb6_lock);
1da177e4
LT
692out2:
693 rt->u.dst.lastuse = jiffies;
694 rt->u.dst.__use++;
c71099ac
TG
695
696 return rt;
1da177e4
LT
697}
698
c71099ac
TG
699void ip6_route_input(struct sk_buff *skb)
700{
701 struct ipv6hdr *iph = skb->nh.ipv6h;
adaa70bb 702 int flags = RT6_LOOKUP_F_HAS_SADDR;
c71099ac
TG
703 struct flowi fl = {
704 .iif = skb->dev->ifindex,
705 .nl_u = {
706 .ip6_u = {
707 .daddr = iph->daddr,
708 .saddr = iph->saddr,
267935b1 709#ifdef CONFIG_IPV6_ROUTE_FWMARK
75bff8f0 710 .fwmark = skb->nfmark,
267935b1 711#endif
c71099ac
TG
712 .flowlabel = (* (u32 *) iph)&IPV6_FLOWINFO_MASK,
713 },
714 },
715 .proto = iph->nexthdr,
716 };
adaa70bb
TG
717
718 if (rt6_need_strict(&iph->daddr))
719 flags |= RT6_LOOKUP_F_IFACE;
c71099ac
TG
720
721 skb->dst = fib6_rule_lookup(&fl, flags, ip6_pol_route_input);
722}
723
724static struct rt6_info *ip6_pol_route_output(struct fib6_table *table,
725 struct flowi *fl, int flags)
1da177e4
LT
726{
727 struct fib6_node *fn;
519fbd87 728 struct rt6_info *rt, *nrt;
c71099ac 729 int strict = 0;
1da177e4 730 int attempts = 3;
519fbd87 731 int err;
77d16f45 732 int reachable = RT6_LOOKUP_F_REACHABLE;
1da177e4 733
77d16f45 734 strict |= flags & RT6_LOOKUP_F_IFACE;
1da177e4
LT
735
736relookup:
c71099ac 737 read_lock_bh(&table->tb6_lock);
1da177e4 738
8238dd06 739restart_2:
c71099ac 740 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
1da177e4
LT
741
742restart:
8238dd06 743 rt = rt6_select(&fn->leaf, fl->oif, strict | reachable);
982f56f3 744 BACKTRACK(&fl->fl6_src);
8238dd06
YH
745 if (rt == &ip6_null_entry ||
746 rt->rt6i_flags & RTF_CACHE)
1da177e4 747 goto out;
1da177e4 748
fb9de91e 749 dst_hold(&rt->u.dst);
c71099ac 750 read_unlock_bh(&table->tb6_lock);
fb9de91e 751
519fbd87 752 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
e40cf353 753 nrt = rt6_alloc_cow(rt, &fl->fl6_dst, &fl->fl6_src);
519fbd87
YH
754 else {
755#if CLONE_OFFLINK_ROUTE
756 nrt = rt6_alloc_clone(rt, &fl->fl6_dst);
757#else
758 goto out2;
759#endif
760 }
1da177e4 761
519fbd87
YH
762 dst_release(&rt->u.dst);
763 rt = nrt ? : &ip6_null_entry;
1da177e4 764
519fbd87
YH
765 dst_hold(&rt->u.dst);
766 if (nrt) {
40e22e8f 767 err = ip6_ins_rt(nrt);
519fbd87 768 if (!err)
1da177e4 769 goto out2;
1da177e4 770 }
e40cf353 771
519fbd87
YH
772 if (--attempts <= 0)
773 goto out2;
774
775 /*
c71099ac 776 * Race condition! In the gap, when table->tb6_lock was
519fbd87
YH
777 * released someone could insert this route. Relookup.
778 */
779 dst_release(&rt->u.dst);
780 goto relookup;
781
782out:
8238dd06
YH
783 if (reachable) {
784 reachable = 0;
785 goto restart_2;
786 }
519fbd87 787 dst_hold(&rt->u.dst);
c71099ac 788 read_unlock_bh(&table->tb6_lock);
1da177e4
LT
789out2:
790 rt->u.dst.lastuse = jiffies;
791 rt->u.dst.__use++;
c71099ac
TG
792 return rt;
793}
794
795struct dst_entry * ip6_route_output(struct sock *sk, struct flowi *fl)
796{
797 int flags = 0;
798
799 if (rt6_need_strict(&fl->fl6_dst))
77d16f45 800 flags |= RT6_LOOKUP_F_IFACE;
c71099ac 801
adaa70bb
TG
802 if (!ipv6_addr_any(&fl->fl6_src))
803 flags |= RT6_LOOKUP_F_HAS_SADDR;
804
c71099ac 805 return fib6_rule_lookup(fl, flags, ip6_pol_route_output);
1da177e4
LT
806}
807
808
809/*
810 * Destination cache support functions
811 */
812
813static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
814{
815 struct rt6_info *rt;
816
817 rt = (struct rt6_info *) dst;
818
819 if (rt && rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie))
820 return dst;
821
822 return NULL;
823}
824
825static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
826{
827 struct rt6_info *rt = (struct rt6_info *) dst;
828
829 if (rt) {
830 if (rt->rt6i_flags & RTF_CACHE)
e0a1ad73 831 ip6_del_rt(rt);
1da177e4
LT
832 else
833 dst_release(dst);
834 }
835 return NULL;
836}
837
838static void ip6_link_failure(struct sk_buff *skb)
839{
840 struct rt6_info *rt;
841
842 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0, skb->dev);
843
844 rt = (struct rt6_info *) skb->dst;
845 if (rt) {
846 if (rt->rt6i_flags&RTF_CACHE) {
847 dst_set_expires(&rt->u.dst, 0);
848 rt->rt6i_flags |= RTF_EXPIRES;
849 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
850 rt->rt6i_node->fn_sernum = -1;
851 }
852}
853
854static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
855{
856 struct rt6_info *rt6 = (struct rt6_info*)dst;
857
858 if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
859 rt6->rt6i_flags |= RTF_MODIFIED;
860 if (mtu < IPV6_MIN_MTU) {
861 mtu = IPV6_MIN_MTU;
862 dst->metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
863 }
864 dst->metrics[RTAX_MTU-1] = mtu;
8d71740c 865 call_netevent_notifiers(NETEVENT_PMTU_UPDATE, dst);
1da177e4
LT
866 }
867}
868
1da177e4
LT
869static int ipv6_get_mtu(struct net_device *dev);
870
871static inline unsigned int ipv6_advmss(unsigned int mtu)
872{
873 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
874
875 if (mtu < ip6_rt_min_advmss)
876 mtu = ip6_rt_min_advmss;
877
878 /*
879 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
880 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
881 * IPV6_MAXPLEN is also valid and means: "any MSS,
882 * rely only on pmtu discovery"
883 */
884 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
885 mtu = IPV6_MAXPLEN;
886 return mtu;
887}
888
5d0bbeeb 889static struct dst_entry *ndisc_dst_gc_list;
8ce11e6a 890static DEFINE_SPINLOCK(ndisc_lock);
5d0bbeeb 891
1da177e4
LT
892struct dst_entry *ndisc_dst_alloc(struct net_device *dev,
893 struct neighbour *neigh,
894 struct in6_addr *addr,
895 int (*output)(struct sk_buff *))
896{
897 struct rt6_info *rt;
898 struct inet6_dev *idev = in6_dev_get(dev);
899
900 if (unlikely(idev == NULL))
901 return NULL;
902
903 rt = ip6_dst_alloc();
904 if (unlikely(rt == NULL)) {
905 in6_dev_put(idev);
906 goto out;
907 }
908
909 dev_hold(dev);
910 if (neigh)
911 neigh_hold(neigh);
912 else
913 neigh = ndisc_get_neigh(dev, addr);
914
915 rt->rt6i_dev = dev;
916 rt->rt6i_idev = idev;
917 rt->rt6i_nexthop = neigh;
918 atomic_set(&rt->u.dst.__refcnt, 1);
919 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = 255;
920 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
921 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
922 rt->u.dst.output = output;
923
924#if 0 /* there's no chance to use these for ndisc */
925 rt->u.dst.flags = ipv6_addr_type(addr) & IPV6_ADDR_UNICAST
926 ? DST_HOST
927 : 0;
928 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
929 rt->rt6i_dst.plen = 128;
930#endif
931
5d0bbeeb 932 spin_lock_bh(&ndisc_lock);
1da177e4
LT
933 rt->u.dst.next = ndisc_dst_gc_list;
934 ndisc_dst_gc_list = &rt->u.dst;
5d0bbeeb 935 spin_unlock_bh(&ndisc_lock);
1da177e4
LT
936
937 fib6_force_start_gc();
938
939out:
940 return (struct dst_entry *)rt;
941}
942
943int ndisc_dst_gc(int *more)
944{
945 struct dst_entry *dst, *next, **pprev;
946 int freed;
947
948 next = NULL;
5d0bbeeb
TG
949 freed = 0;
950
951 spin_lock_bh(&ndisc_lock);
1da177e4 952 pprev = &ndisc_dst_gc_list;
5d0bbeeb 953
1da177e4
LT
954 while ((dst = *pprev) != NULL) {
955 if (!atomic_read(&dst->__refcnt)) {
956 *pprev = dst->next;
957 dst_free(dst);
958 freed++;
959 } else {
960 pprev = &dst->next;
961 (*more)++;
962 }
963 }
964
5d0bbeeb
TG
965 spin_unlock_bh(&ndisc_lock);
966
1da177e4
LT
967 return freed;
968}
969
970static int ip6_dst_gc(void)
971{
972 static unsigned expire = 30*HZ;
973 static unsigned long last_gc;
974 unsigned long now = jiffies;
975
976 if (time_after(last_gc + ip6_rt_gc_min_interval, now) &&
977 atomic_read(&ip6_dst_ops.entries) <= ip6_rt_max_size)
978 goto out;
979
980 expire++;
981 fib6_run_gc(expire);
982 last_gc = now;
983 if (atomic_read(&ip6_dst_ops.entries) < ip6_dst_ops.gc_thresh)
984 expire = ip6_rt_gc_timeout>>1;
985
986out:
987 expire -= expire>>ip6_rt_gc_elasticity;
988 return (atomic_read(&ip6_dst_ops.entries) > ip6_rt_max_size);
989}
990
991/* Clean host part of a prefix. Not necessary in radix tree,
992 but results in cleaner routing tables.
993
994 Remove it only when all the things will work!
995 */
996
997static int ipv6_get_mtu(struct net_device *dev)
998{
999 int mtu = IPV6_MIN_MTU;
1000 struct inet6_dev *idev;
1001
1002 idev = in6_dev_get(dev);
1003 if (idev) {
1004 mtu = idev->cnf.mtu6;
1005 in6_dev_put(idev);
1006 }
1007 return mtu;
1008}
1009
1010int ipv6_get_hoplimit(struct net_device *dev)
1011{
1012 int hoplimit = ipv6_devconf.hop_limit;
1013 struct inet6_dev *idev;
1014
1015 idev = in6_dev_get(dev);
1016 if (idev) {
1017 hoplimit = idev->cnf.hop_limit;
1018 in6_dev_put(idev);
1019 }
1020 return hoplimit;
1021}
1022
1023/*
1024 *
1025 */
1026
86872cb5 1027int ip6_route_add(struct fib6_config *cfg)
1da177e4
LT
1028{
1029 int err;
1da177e4
LT
1030 struct rt6_info *rt = NULL;
1031 struct net_device *dev = NULL;
1032 struct inet6_dev *idev = NULL;
c71099ac 1033 struct fib6_table *table;
1da177e4
LT
1034 int addr_type;
1035
86872cb5 1036 if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1da177e4
LT
1037 return -EINVAL;
1038#ifndef CONFIG_IPV6_SUBTREES
86872cb5 1039 if (cfg->fc_src_len)
1da177e4
LT
1040 return -EINVAL;
1041#endif
86872cb5 1042 if (cfg->fc_ifindex) {
1da177e4 1043 err = -ENODEV;
86872cb5 1044 dev = dev_get_by_index(cfg->fc_ifindex);
1da177e4
LT
1045 if (!dev)
1046 goto out;
1047 idev = in6_dev_get(dev);
1048 if (!idev)
1049 goto out;
1050 }
1051
86872cb5
TG
1052 if (cfg->fc_metric == 0)
1053 cfg->fc_metric = IP6_RT_PRIO_USER;
1da177e4 1054
86872cb5 1055 table = fib6_new_table(cfg->fc_table);
c71099ac
TG
1056 if (table == NULL) {
1057 err = -ENOBUFS;
1058 goto out;
1059 }
1060
1da177e4
LT
1061 rt = ip6_dst_alloc();
1062
1063 if (rt == NULL) {
1064 err = -ENOMEM;
1065 goto out;
1066 }
1067
1068 rt->u.dst.obsolete = -1;
86872cb5 1069 rt->rt6i_expires = jiffies + clock_t_to_jiffies(cfg->fc_expires);
1da177e4 1070
86872cb5
TG
1071 if (cfg->fc_protocol == RTPROT_UNSPEC)
1072 cfg->fc_protocol = RTPROT_BOOT;
1073 rt->rt6i_protocol = cfg->fc_protocol;
1074
1075 addr_type = ipv6_addr_type(&cfg->fc_dst);
1da177e4
LT
1076
1077 if (addr_type & IPV6_ADDR_MULTICAST)
1078 rt->u.dst.input = ip6_mc_input;
1079 else
1080 rt->u.dst.input = ip6_forward;
1081
1082 rt->u.dst.output = ip6_output;
1083
86872cb5
TG
1084 ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1085 rt->rt6i_dst.plen = cfg->fc_dst_len;
1da177e4
LT
1086 if (rt->rt6i_dst.plen == 128)
1087 rt->u.dst.flags = DST_HOST;
1088
1089#ifdef CONFIG_IPV6_SUBTREES
86872cb5
TG
1090 ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1091 rt->rt6i_src.plen = cfg->fc_src_len;
1da177e4
LT
1092#endif
1093
86872cb5 1094 rt->rt6i_metric = cfg->fc_metric;
1da177e4
LT
1095
1096 /* We cannot add true routes via loopback here,
1097 they would result in kernel looping; promote them to reject routes
1098 */
86872cb5 1099 if ((cfg->fc_flags & RTF_REJECT) ||
1da177e4
LT
1100 (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK))) {
1101 /* hold loopback dev/idev if we haven't done so. */
1102 if (dev != &loopback_dev) {
1103 if (dev) {
1104 dev_put(dev);
1105 in6_dev_put(idev);
1106 }
1107 dev = &loopback_dev;
1108 dev_hold(dev);
1109 idev = in6_dev_get(dev);
1110 if (!idev) {
1111 err = -ENODEV;
1112 goto out;
1113 }
1114 }
1115 rt->u.dst.output = ip6_pkt_discard_out;
1116 rt->u.dst.input = ip6_pkt_discard;
1117 rt->u.dst.error = -ENETUNREACH;
1118 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1119 goto install_route;
1120 }
1121
86872cb5 1122 if (cfg->fc_flags & RTF_GATEWAY) {
1da177e4
LT
1123 struct in6_addr *gw_addr;
1124 int gwa_type;
1125
86872cb5
TG
1126 gw_addr = &cfg->fc_gateway;
1127 ipv6_addr_copy(&rt->rt6i_gateway, gw_addr);
1da177e4
LT
1128 gwa_type = ipv6_addr_type(gw_addr);
1129
1130 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1131 struct rt6_info *grt;
1132
1133 /* IPv6 strictly inhibits using not link-local
1134 addresses as nexthop address.
1135 Otherwise, router will not able to send redirects.
1136 It is very good, but in some (rare!) circumstances
1137 (SIT, PtP, NBMA NOARP links) it is handy to allow
1138 some exceptions. --ANK
1139 */
1140 err = -EINVAL;
1141 if (!(gwa_type&IPV6_ADDR_UNICAST))
1142 goto out;
1143
86872cb5 1144 grt = rt6_lookup(gw_addr, NULL, cfg->fc_ifindex, 1);
1da177e4
LT
1145
1146 err = -EHOSTUNREACH;
1147 if (grt == NULL)
1148 goto out;
1149 if (dev) {
1150 if (dev != grt->rt6i_dev) {
1151 dst_release(&grt->u.dst);
1152 goto out;
1153 }
1154 } else {
1155 dev = grt->rt6i_dev;
1156 idev = grt->rt6i_idev;
1157 dev_hold(dev);
1158 in6_dev_hold(grt->rt6i_idev);
1159 }
1160 if (!(grt->rt6i_flags&RTF_GATEWAY))
1161 err = 0;
1162 dst_release(&grt->u.dst);
1163
1164 if (err)
1165 goto out;
1166 }
1167 err = -EINVAL;
1168 if (dev == NULL || (dev->flags&IFF_LOOPBACK))
1169 goto out;
1170 }
1171
1172 err = -ENODEV;
1173 if (dev == NULL)
1174 goto out;
1175
86872cb5 1176 if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) {
1da177e4
LT
1177 rt->rt6i_nexthop = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev);
1178 if (IS_ERR(rt->rt6i_nexthop)) {
1179 err = PTR_ERR(rt->rt6i_nexthop);
1180 rt->rt6i_nexthop = NULL;
1181 goto out;
1182 }
1183 }
1184
86872cb5 1185 rt->rt6i_flags = cfg->fc_flags;
1da177e4
LT
1186
1187install_route:
86872cb5
TG
1188 if (cfg->fc_mx) {
1189 struct nlattr *nla;
1190 int remaining;
1191
1192 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
1193 int type = nla->nla_type;
1194
1195 if (type) {
1196 if (type > RTAX_MAX) {
1da177e4
LT
1197 err = -EINVAL;
1198 goto out;
1199 }
86872cb5
TG
1200
1201 rt->u.dst.metrics[type - 1] = nla_get_u32(nla);
1da177e4 1202 }
1da177e4
LT
1203 }
1204 }
1205
1206 if (rt->u.dst.metrics[RTAX_HOPLIMIT-1] == 0)
1207 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
1208 if (!rt->u.dst.metrics[RTAX_MTU-1])
1209 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(dev);
1210 if (!rt->u.dst.metrics[RTAX_ADVMSS-1])
1211 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
1212 rt->u.dst.dev = dev;
1213 rt->rt6i_idev = idev;
c71099ac 1214 rt->rt6i_table = table;
86872cb5 1215 return __ip6_ins_rt(rt, &cfg->fc_nlinfo);
1da177e4
LT
1216
1217out:
1218 if (dev)
1219 dev_put(dev);
1220 if (idev)
1221 in6_dev_put(idev);
1222 if (rt)
1223 dst_free((struct dst_entry *) rt);
1224 return err;
1225}
1226
86872cb5 1227static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
1da177e4
LT
1228{
1229 int err;
c71099ac 1230 struct fib6_table *table;
1da177e4 1231
6c813a72
PM
1232 if (rt == &ip6_null_entry)
1233 return -ENOENT;
1234
c71099ac
TG
1235 table = rt->rt6i_table;
1236 write_lock_bh(&table->tb6_lock);
1da177e4 1237
86872cb5 1238 err = fib6_del(rt, info);
1da177e4
LT
1239 dst_release(&rt->u.dst);
1240
c71099ac 1241 write_unlock_bh(&table->tb6_lock);
1da177e4
LT
1242
1243 return err;
1244}
1245
e0a1ad73
TG
1246int ip6_del_rt(struct rt6_info *rt)
1247{
86872cb5 1248 return __ip6_del_rt(rt, NULL);
e0a1ad73
TG
1249}
1250
86872cb5 1251static int ip6_route_del(struct fib6_config *cfg)
1da177e4 1252{
c71099ac 1253 struct fib6_table *table;
1da177e4
LT
1254 struct fib6_node *fn;
1255 struct rt6_info *rt;
1256 int err = -ESRCH;
1257
86872cb5 1258 table = fib6_get_table(cfg->fc_table);
c71099ac
TG
1259 if (table == NULL)
1260 return err;
1261
1262 read_lock_bh(&table->tb6_lock);
1da177e4 1263
c71099ac 1264 fn = fib6_locate(&table->tb6_root,
86872cb5
TG
1265 &cfg->fc_dst, cfg->fc_dst_len,
1266 &cfg->fc_src, cfg->fc_src_len);
1da177e4
LT
1267
1268 if (fn) {
1269 for (rt = fn->leaf; rt; rt = rt->u.next) {
86872cb5 1270 if (cfg->fc_ifindex &&
1da177e4 1271 (rt->rt6i_dev == NULL ||
86872cb5 1272 rt->rt6i_dev->ifindex != cfg->fc_ifindex))
1da177e4 1273 continue;
86872cb5
TG
1274 if (cfg->fc_flags & RTF_GATEWAY &&
1275 !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
1da177e4 1276 continue;
86872cb5 1277 if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
1da177e4
LT
1278 continue;
1279 dst_hold(&rt->u.dst);
c71099ac 1280 read_unlock_bh(&table->tb6_lock);
1da177e4 1281
86872cb5 1282 return __ip6_del_rt(rt, &cfg->fc_nlinfo);
1da177e4
LT
1283 }
1284 }
c71099ac 1285 read_unlock_bh(&table->tb6_lock);
1da177e4
LT
1286
1287 return err;
1288}
1289
1290/*
1291 * Handle redirects
1292 */
a6279458
YH
1293struct ip6rd_flowi {
1294 struct flowi fl;
1295 struct in6_addr gateway;
1296};
1297
1298static struct rt6_info *__ip6_route_redirect(struct fib6_table *table,
1299 struct flowi *fl,
1300 int flags)
1da177e4 1301{
a6279458
YH
1302 struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl;
1303 struct rt6_info *rt;
e843b9e1 1304 struct fib6_node *fn;
c71099ac 1305
1da177e4 1306 /*
e843b9e1
YH
1307 * Get the "current" route for this destination and
1308 * check if the redirect has come from approriate router.
1309 *
1310 * RFC 2461 specifies that redirects should only be
1311 * accepted if they come from the nexthop to the target.
1312 * Due to the way the routes are chosen, this notion
1313 * is a bit fuzzy and one might need to check all possible
1314 * routes.
1da177e4 1315 */
1da177e4 1316
c71099ac 1317 read_lock_bh(&table->tb6_lock);
a6279458 1318 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
e843b9e1
YH
1319restart:
1320 for (rt = fn->leaf; rt; rt = rt->u.next) {
1321 /*
1322 * Current route is on-link; redirect is always invalid.
1323 *
1324 * Seems, previous statement is not true. It could
1325 * be node, which looks for us as on-link (f.e. proxy ndisc)
1326 * But then router serving it might decide, that we should
1327 * know truth 8)8) --ANK (980726).
1328 */
1329 if (rt6_check_expired(rt))
1330 continue;
1331 if (!(rt->rt6i_flags & RTF_GATEWAY))
1332 continue;
a6279458 1333 if (fl->oif != rt->rt6i_dev->ifindex)
e843b9e1 1334 continue;
a6279458 1335 if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
e843b9e1
YH
1336 continue;
1337 break;
1338 }
a6279458 1339
cb15d9c2 1340 if (!rt)
a6279458 1341 rt = &ip6_null_entry;
cb15d9c2
YH
1342 BACKTRACK(&fl->fl6_src);
1343out:
a6279458
YH
1344 dst_hold(&rt->u.dst);
1345
c71099ac 1346 read_unlock_bh(&table->tb6_lock);
e843b9e1 1347
a6279458
YH
1348 return rt;
1349};
1350
1351static struct rt6_info *ip6_route_redirect(struct in6_addr *dest,
1352 struct in6_addr *src,
1353 struct in6_addr *gateway,
1354 struct net_device *dev)
1355{
adaa70bb 1356 int flags = RT6_LOOKUP_F_HAS_SADDR;
a6279458
YH
1357 struct ip6rd_flowi rdfl = {
1358 .fl = {
1359 .oif = dev->ifindex,
1360 .nl_u = {
1361 .ip6_u = {
1362 .daddr = *dest,
1363 .saddr = *src,
1364 },
1365 },
1366 },
1367 .gateway = *gateway,
1368 };
adaa70bb
TG
1369
1370 if (rt6_need_strict(dest))
1371 flags |= RT6_LOOKUP_F_IFACE;
a6279458
YH
1372
1373 return (struct rt6_info *)fib6_rule_lookup((struct flowi *)&rdfl, flags, __ip6_route_redirect);
1374}
1375
1376void rt6_redirect(struct in6_addr *dest, struct in6_addr *src,
1377 struct in6_addr *saddr,
1378 struct neighbour *neigh, u8 *lladdr, int on_link)
1379{
1380 struct rt6_info *rt, *nrt = NULL;
1381 struct netevent_redirect netevent;
1382
1383 rt = ip6_route_redirect(dest, src, saddr, neigh->dev);
1384
1385 if (rt == &ip6_null_entry) {
1da177e4
LT
1386 if (net_ratelimit())
1387 printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
1388 "for redirect target\n");
a6279458 1389 goto out;
1da177e4
LT
1390 }
1391
1da177e4
LT
1392 /*
1393 * We have finally decided to accept it.
1394 */
1395
1396 neigh_update(neigh, lladdr, NUD_STALE,
1397 NEIGH_UPDATE_F_WEAK_OVERRIDE|
1398 NEIGH_UPDATE_F_OVERRIDE|
1399 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1400 NEIGH_UPDATE_F_ISROUTER))
1401 );
1402
1403 /*
1404 * Redirect received -> path was valid.
1405 * Look, redirects are sent only in response to data packets,
1406 * so that this nexthop apparently is reachable. --ANK
1407 */
1408 dst_confirm(&rt->u.dst);
1409
1410 /* Duplicate redirect: silently ignore. */
1411 if (neigh == rt->u.dst.neighbour)
1412 goto out;
1413
1414 nrt = ip6_rt_copy(rt);
1415 if (nrt == NULL)
1416 goto out;
1417
1418 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1419 if (on_link)
1420 nrt->rt6i_flags &= ~RTF_GATEWAY;
1421
1422 ipv6_addr_copy(&nrt->rt6i_dst.addr, dest);
1423 nrt->rt6i_dst.plen = 128;
1424 nrt->u.dst.flags |= DST_HOST;
1425
1426 ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key);
1427 nrt->rt6i_nexthop = neigh_clone(neigh);
1428 /* Reset pmtu, it may be better */
1429 nrt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(neigh->dev);
1430 nrt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&nrt->u.dst));
1431
40e22e8f 1432 if (ip6_ins_rt(nrt))
1da177e4
LT
1433 goto out;
1434
8d71740c
TT
1435 netevent.old = &rt->u.dst;
1436 netevent.new = &nrt->u.dst;
1437 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1438
1da177e4 1439 if (rt->rt6i_flags&RTF_CACHE) {
e0a1ad73 1440 ip6_del_rt(rt);
1da177e4
LT
1441 return;
1442 }
1443
1444out:
1445 dst_release(&rt->u.dst);
1446 return;
1447}
1448
1449/*
1450 * Handle ICMP "packet too big" messages
1451 * i.e. Path MTU discovery
1452 */
1453
1454void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr,
1455 struct net_device *dev, u32 pmtu)
1456{
1457 struct rt6_info *rt, *nrt;
1458 int allfrag = 0;
1459
1460 rt = rt6_lookup(daddr, saddr, dev->ifindex, 0);
1461 if (rt == NULL)
1462 return;
1463
1464 if (pmtu >= dst_mtu(&rt->u.dst))
1465 goto out;
1466
1467 if (pmtu < IPV6_MIN_MTU) {
1468 /*
1469 * According to RFC2460, PMTU is set to the IPv6 Minimum Link
1470 * MTU (1280) and a fragment header should always be included
1471 * after a node receiving Too Big message reporting PMTU is
1472 * less than the IPv6 Minimum Link MTU.
1473 */
1474 pmtu = IPV6_MIN_MTU;
1475 allfrag = 1;
1476 }
1477
1478 /* New mtu received -> path was valid.
1479 They are sent only in response to data packets,
1480 so that this nexthop apparently is reachable. --ANK
1481 */
1482 dst_confirm(&rt->u.dst);
1483
1484 /* Host route. If it is static, it would be better
1485 not to override it, but add new one, so that
1486 when cache entry will expire old pmtu
1487 would return automatically.
1488 */
1489 if (rt->rt6i_flags & RTF_CACHE) {
1490 rt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1491 if (allfrag)
1492 rt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1493 dst_set_expires(&rt->u.dst, ip6_rt_mtu_expires);
1494 rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
1495 goto out;
1496 }
1497
1498 /* Network route.
1499 Two cases are possible:
1500 1. It is connected route. Action: COW
1501 2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1502 */
d5315b50 1503 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
a1e78363 1504 nrt = rt6_alloc_cow(rt, daddr, saddr);
d5315b50
YH
1505 else
1506 nrt = rt6_alloc_clone(rt, daddr);
a1e78363 1507
d5315b50 1508 if (nrt) {
a1e78363
YH
1509 nrt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1510 if (allfrag)
1511 nrt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1512
1513 /* According to RFC 1981, detecting PMTU increase shouldn't be
1514 * happened within 5 mins, the recommended timer is 10 mins.
1515 * Here this route expiration time is set to ip6_rt_mtu_expires
1516 * which is 10 mins. After 10 mins the decreased pmtu is expired
1517 * and detecting PMTU increase will be automatically happened.
1518 */
1519 dst_set_expires(&nrt->u.dst, ip6_rt_mtu_expires);
1520 nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
1521
40e22e8f 1522 ip6_ins_rt(nrt);
1da177e4 1523 }
1da177e4
LT
1524out:
1525 dst_release(&rt->u.dst);
1526}
1527
1528/*
1529 * Misc support functions
1530 */
1531
1532static struct rt6_info * ip6_rt_copy(struct rt6_info *ort)
1533{
1534 struct rt6_info *rt = ip6_dst_alloc();
1535
1536 if (rt) {
1537 rt->u.dst.input = ort->u.dst.input;
1538 rt->u.dst.output = ort->u.dst.output;
1539
1540 memcpy(rt->u.dst.metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32));
22e1e4d8 1541 rt->u.dst.error = ort->u.dst.error;
1da177e4
LT
1542 rt->u.dst.dev = ort->u.dst.dev;
1543 if (rt->u.dst.dev)
1544 dev_hold(rt->u.dst.dev);
1545 rt->rt6i_idev = ort->rt6i_idev;
1546 if (rt->rt6i_idev)
1547 in6_dev_hold(rt->rt6i_idev);
1548 rt->u.dst.lastuse = jiffies;
1549 rt->rt6i_expires = 0;
1550
1551 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
1552 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
1553 rt->rt6i_metric = 0;
1554
1555 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1556#ifdef CONFIG_IPV6_SUBTREES
1557 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1558#endif
c71099ac 1559 rt->rt6i_table = ort->rt6i_table;
1da177e4
LT
1560 }
1561 return rt;
1562}
1563
70ceb4f5
YH
1564#ifdef CONFIG_IPV6_ROUTE_INFO
1565static struct rt6_info *rt6_get_route_info(struct in6_addr *prefix, int prefixlen,
1566 struct in6_addr *gwaddr, int ifindex)
1567{
1568 struct fib6_node *fn;
1569 struct rt6_info *rt = NULL;
c71099ac
TG
1570 struct fib6_table *table;
1571
1572 table = fib6_get_table(RT6_TABLE_INFO);
1573 if (table == NULL)
1574 return NULL;
70ceb4f5 1575
c71099ac
TG
1576 write_lock_bh(&table->tb6_lock);
1577 fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
70ceb4f5
YH
1578 if (!fn)
1579 goto out;
1580
1581 for (rt = fn->leaf; rt; rt = rt->u.next) {
1582 if (rt->rt6i_dev->ifindex != ifindex)
1583 continue;
1584 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1585 continue;
1586 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1587 continue;
1588 dst_hold(&rt->u.dst);
1589 break;
1590 }
1591out:
c71099ac 1592 write_unlock_bh(&table->tb6_lock);
70ceb4f5
YH
1593 return rt;
1594}
1595
1596static struct rt6_info *rt6_add_route_info(struct in6_addr *prefix, int prefixlen,
1597 struct in6_addr *gwaddr, int ifindex,
1598 unsigned pref)
1599{
86872cb5
TG
1600 struct fib6_config cfg = {
1601 .fc_table = RT6_TABLE_INFO,
1602 .fc_metric = 1024,
1603 .fc_ifindex = ifindex,
1604 .fc_dst_len = prefixlen,
1605 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
1606 RTF_UP | RTF_PREF(pref),
1607 };
1608
1609 ipv6_addr_copy(&cfg.fc_dst, prefix);
1610 ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
70ceb4f5 1611
e317da96
YH
1612 /* We should treat it as a default route if prefix length is 0. */
1613 if (!prefixlen)
86872cb5 1614 cfg.fc_flags |= RTF_DEFAULT;
70ceb4f5 1615
86872cb5 1616 ip6_route_add(&cfg);
70ceb4f5
YH
1617
1618 return rt6_get_route_info(prefix, prefixlen, gwaddr, ifindex);
1619}
1620#endif
1621
1da177e4
LT
1622struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *dev)
1623{
1624 struct rt6_info *rt;
c71099ac 1625 struct fib6_table *table;
1da177e4 1626
c71099ac
TG
1627 table = fib6_get_table(RT6_TABLE_DFLT);
1628 if (table == NULL)
1629 return NULL;
1da177e4 1630
c71099ac
TG
1631 write_lock_bh(&table->tb6_lock);
1632 for (rt = table->tb6_root.leaf; rt; rt=rt->u.next) {
1da177e4 1633 if (dev == rt->rt6i_dev &&
045927ff 1634 ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
1da177e4
LT
1635 ipv6_addr_equal(&rt->rt6i_gateway, addr))
1636 break;
1637 }
1638 if (rt)
1639 dst_hold(&rt->u.dst);
c71099ac 1640 write_unlock_bh(&table->tb6_lock);
1da177e4
LT
1641 return rt;
1642}
1643
1644struct rt6_info *rt6_add_dflt_router(struct in6_addr *gwaddr,
ebacaaa0
YH
1645 struct net_device *dev,
1646 unsigned int pref)
1da177e4 1647{
86872cb5
TG
1648 struct fib6_config cfg = {
1649 .fc_table = RT6_TABLE_DFLT,
1650 .fc_metric = 1024,
1651 .fc_ifindex = dev->ifindex,
1652 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
1653 RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
1654 };
1da177e4 1655
86872cb5 1656 ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
1da177e4 1657
86872cb5 1658 ip6_route_add(&cfg);
1da177e4 1659
1da177e4
LT
1660 return rt6_get_dflt_router(gwaddr, dev);
1661}
1662
1663void rt6_purge_dflt_routers(void)
1664{
1665 struct rt6_info *rt;
c71099ac
TG
1666 struct fib6_table *table;
1667
1668 /* NOTE: Keep consistent with rt6_get_dflt_router */
1669 table = fib6_get_table(RT6_TABLE_DFLT);
1670 if (table == NULL)
1671 return;
1da177e4
LT
1672
1673restart:
c71099ac
TG
1674 read_lock_bh(&table->tb6_lock);
1675 for (rt = table->tb6_root.leaf; rt; rt = rt->u.next) {
1da177e4
LT
1676 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
1677 dst_hold(&rt->u.dst);
c71099ac 1678 read_unlock_bh(&table->tb6_lock);
e0a1ad73 1679 ip6_del_rt(rt);
1da177e4
LT
1680 goto restart;
1681 }
1682 }
c71099ac 1683 read_unlock_bh(&table->tb6_lock);
1da177e4
LT
1684}
1685
86872cb5
TG
1686static void rtmsg_to_fib6_config(struct in6_rtmsg *rtmsg,
1687 struct fib6_config *cfg)
1688{
1689 memset(cfg, 0, sizeof(*cfg));
1690
1691 cfg->fc_table = RT6_TABLE_MAIN;
1692 cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
1693 cfg->fc_metric = rtmsg->rtmsg_metric;
1694 cfg->fc_expires = rtmsg->rtmsg_info;
1695 cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
1696 cfg->fc_src_len = rtmsg->rtmsg_src_len;
1697 cfg->fc_flags = rtmsg->rtmsg_flags;
1698
1699 ipv6_addr_copy(&cfg->fc_dst, &rtmsg->rtmsg_dst);
1700 ipv6_addr_copy(&cfg->fc_src, &rtmsg->rtmsg_src);
1701 ipv6_addr_copy(&cfg->fc_gateway, &rtmsg->rtmsg_gateway);
1702}
1703
1da177e4
LT
1704int ipv6_route_ioctl(unsigned int cmd, void __user *arg)
1705{
86872cb5 1706 struct fib6_config cfg;
1da177e4
LT
1707 struct in6_rtmsg rtmsg;
1708 int err;
1709
1710 switch(cmd) {
1711 case SIOCADDRT: /* Add a route */
1712 case SIOCDELRT: /* Delete a route */
1713 if (!capable(CAP_NET_ADMIN))
1714 return -EPERM;
1715 err = copy_from_user(&rtmsg, arg,
1716 sizeof(struct in6_rtmsg));
1717 if (err)
1718 return -EFAULT;
86872cb5
TG
1719
1720 rtmsg_to_fib6_config(&rtmsg, &cfg);
1721
1da177e4
LT
1722 rtnl_lock();
1723 switch (cmd) {
1724 case SIOCADDRT:
86872cb5 1725 err = ip6_route_add(&cfg);
1da177e4
LT
1726 break;
1727 case SIOCDELRT:
86872cb5 1728 err = ip6_route_del(&cfg);
1da177e4
LT
1729 break;
1730 default:
1731 err = -EINVAL;
1732 }
1733 rtnl_unlock();
1734
1735 return err;
1736 };
1737
1738 return -EINVAL;
1739}
1740
1741/*
1742 * Drop the packet on the floor
1743 */
1744
20380731 1745static int ip6_pkt_discard(struct sk_buff *skb)
1da177e4 1746{
76d0cc1b
LL
1747 int type = ipv6_addr_type(&skb->nh.ipv6h->daddr);
1748 if (type == IPV6_ADDR_ANY || type == IPV6_ADDR_RESERVED)
1749 IP6_INC_STATS(IPSTATS_MIB_INADDRERRORS);
1750
1da177e4
LT
1751 IP6_INC_STATS(IPSTATS_MIB_OUTNOROUTES);
1752 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_NOROUTE, 0, skb->dev);
1753 kfree_skb(skb);
1754 return 0;
1755}
1756
20380731 1757static int ip6_pkt_discard_out(struct sk_buff *skb)
1da177e4
LT
1758{
1759 skb->dev = skb->dst->dev;
1760 return ip6_pkt_discard(skb);
1761}
1762
1763/*
1764 * Allocate a dst for local (unicast / anycast) address.
1765 */
1766
1767struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
1768 const struct in6_addr *addr,
1769 int anycast)
1770{
1771 struct rt6_info *rt = ip6_dst_alloc();
1772
1773 if (rt == NULL)
1774 return ERR_PTR(-ENOMEM);
1775
1776 dev_hold(&loopback_dev);
1777 in6_dev_hold(idev);
1778
1779 rt->u.dst.flags = DST_HOST;
1780 rt->u.dst.input = ip6_input;
1781 rt->u.dst.output = ip6_output;
1782 rt->rt6i_dev = &loopback_dev;
1783 rt->rt6i_idev = idev;
1784 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
1785 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
1786 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
1787 rt->u.dst.obsolete = -1;
1788
1789 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
58c4fb86
YH
1790 if (anycast)
1791 rt->rt6i_flags |= RTF_ANYCAST;
1792 else
1da177e4
LT
1793 rt->rt6i_flags |= RTF_LOCAL;
1794 rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
1795 if (rt->rt6i_nexthop == NULL) {
1796 dst_free((struct dst_entry *) rt);
1797 return ERR_PTR(-ENOMEM);
1798 }
1799
1800 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
1801 rt->rt6i_dst.plen = 128;
c71099ac 1802 rt->rt6i_table = fib6_get_table(RT6_TABLE_LOCAL);
1da177e4
LT
1803
1804 atomic_set(&rt->u.dst.__refcnt, 1);
1805
1806 return rt;
1807}
1808
1809static int fib6_ifdown(struct rt6_info *rt, void *arg)
1810{
1811 if (((void*)rt->rt6i_dev == arg || arg == NULL) &&
1812 rt != &ip6_null_entry) {
1813 RT6_TRACE("deleted by ifdown %p\n", rt);
1814 return -1;
1815 }
1816 return 0;
1817}
1818
1819void rt6_ifdown(struct net_device *dev)
1820{
c71099ac 1821 fib6_clean_all(fib6_ifdown, 0, dev);
1da177e4
LT
1822}
1823
1824struct rt6_mtu_change_arg
1825{
1826 struct net_device *dev;
1827 unsigned mtu;
1828};
1829
1830static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
1831{
1832 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
1833 struct inet6_dev *idev;
1834
1835 /* In IPv6 pmtu discovery is not optional,
1836 so that RTAX_MTU lock cannot disable it.
1837 We still use this lock to block changes
1838 caused by addrconf/ndisc.
1839 */
1840
1841 idev = __in6_dev_get(arg->dev);
1842 if (idev == NULL)
1843 return 0;
1844
1845 /* For administrative MTU increase, there is no way to discover
1846 IPv6 PMTU increase, so PMTU increase should be updated here.
1847 Since RFC 1981 doesn't include administrative MTU increase
1848 update PMTU increase is a MUST. (i.e. jumbo frame)
1849 */
1850 /*
1851 If new MTU is less than route PMTU, this new MTU will be the
1852 lowest MTU in the path, update the route PMTU to reflect PMTU
1853 decreases; if new MTU is greater than route PMTU, and the
1854 old MTU is the lowest MTU in the path, update the route PMTU
1855 to reflect the increase. In this case if the other nodes' MTU
1856 also have the lowest MTU, TOO BIG MESSAGE will be lead to
1857 PMTU discouvery.
1858 */
1859 if (rt->rt6i_dev == arg->dev &&
1860 !dst_metric_locked(&rt->u.dst, RTAX_MTU) &&
1861 (dst_mtu(&rt->u.dst) > arg->mtu ||
1862 (dst_mtu(&rt->u.dst) < arg->mtu &&
1863 dst_mtu(&rt->u.dst) == idev->cnf.mtu6)))
1864 rt->u.dst.metrics[RTAX_MTU-1] = arg->mtu;
1865 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(arg->mtu);
1866 return 0;
1867}
1868
1869void rt6_mtu_change(struct net_device *dev, unsigned mtu)
1870{
c71099ac
TG
1871 struct rt6_mtu_change_arg arg = {
1872 .dev = dev,
1873 .mtu = mtu,
1874 };
1da177e4 1875
c71099ac 1876 fib6_clean_all(rt6_mtu_change_route, 0, &arg);
1da177e4
LT
1877}
1878
86872cb5 1879static struct nla_policy rtm_ipv6_policy[RTA_MAX+1] __read_mostly = {
5176f91e 1880 [RTA_GATEWAY] = { .len = sizeof(struct in6_addr) },
86872cb5 1881 [RTA_OIF] = { .type = NLA_U32 },
ab364a6f 1882 [RTA_IIF] = { .type = NLA_U32 },
86872cb5
TG
1883 [RTA_PRIORITY] = { .type = NLA_U32 },
1884 [RTA_METRICS] = { .type = NLA_NESTED },
1885};
1886
1887static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
1888 struct fib6_config *cfg)
1da177e4 1889{
86872cb5
TG
1890 struct rtmsg *rtm;
1891 struct nlattr *tb[RTA_MAX+1];
1892 int err;
1da177e4 1893
86872cb5
TG
1894 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
1895 if (err < 0)
1896 goto errout;
1da177e4 1897
86872cb5
TG
1898 err = -EINVAL;
1899 rtm = nlmsg_data(nlh);
1900 memset(cfg, 0, sizeof(*cfg));
1901
1902 cfg->fc_table = rtm->rtm_table;
1903 cfg->fc_dst_len = rtm->rtm_dst_len;
1904 cfg->fc_src_len = rtm->rtm_src_len;
1905 cfg->fc_flags = RTF_UP;
1906 cfg->fc_protocol = rtm->rtm_protocol;
1907
1908 if (rtm->rtm_type == RTN_UNREACHABLE)
1909 cfg->fc_flags |= RTF_REJECT;
1910
1911 cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
1912 cfg->fc_nlinfo.nlh = nlh;
1913
1914 if (tb[RTA_GATEWAY]) {
1915 nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
1916 cfg->fc_flags |= RTF_GATEWAY;
1da177e4 1917 }
86872cb5
TG
1918
1919 if (tb[RTA_DST]) {
1920 int plen = (rtm->rtm_dst_len + 7) >> 3;
1921
1922 if (nla_len(tb[RTA_DST]) < plen)
1923 goto errout;
1924
1925 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
1da177e4 1926 }
86872cb5
TG
1927
1928 if (tb[RTA_SRC]) {
1929 int plen = (rtm->rtm_src_len + 7) >> 3;
1930
1931 if (nla_len(tb[RTA_SRC]) < plen)
1932 goto errout;
1933
1934 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
1da177e4 1935 }
86872cb5
TG
1936
1937 if (tb[RTA_OIF])
1938 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
1939
1940 if (tb[RTA_PRIORITY])
1941 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
1942
1943 if (tb[RTA_METRICS]) {
1944 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
1945 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
1da177e4 1946 }
86872cb5
TG
1947
1948 if (tb[RTA_TABLE])
1949 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
1950
1951 err = 0;
1952errout:
1953 return err;
1da177e4
LT
1954}
1955
1956int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1957{
86872cb5
TG
1958 struct fib6_config cfg;
1959 int err;
1da177e4 1960
86872cb5
TG
1961 err = rtm_to_fib6_config(skb, nlh, &cfg);
1962 if (err < 0)
1963 return err;
1964
1965 return ip6_route_del(&cfg);
1da177e4
LT
1966}
1967
1968int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1969{
86872cb5
TG
1970 struct fib6_config cfg;
1971 int err;
1da177e4 1972
86872cb5
TG
1973 err = rtm_to_fib6_config(skb, nlh, &cfg);
1974 if (err < 0)
1975 return err;
1976
1977 return ip6_route_add(&cfg);
1da177e4
LT
1978}
1979
1da177e4 1980static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt,
0d51aa80
JHS
1981 struct in6_addr *dst, struct in6_addr *src,
1982 int iif, int type, u32 pid, u32 seq,
1983 int prefix, unsigned int flags)
1da177e4
LT
1984{
1985 struct rtmsg *rtm;
2d7202bf 1986 struct nlmsghdr *nlh;
1da177e4 1987 struct rta_cacheinfo ci;
9e762a4a 1988 u32 table;
1da177e4
LT
1989
1990 if (prefix) { /* user wants prefix routes only */
1991 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
1992 /* success since this is not a prefix route */
1993 return 1;
1994 }
1995 }
1996
2d7202bf
TG
1997 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*rtm), flags);
1998 if (nlh == NULL)
1999 return -ENOBUFS;
2000
2001 rtm = nlmsg_data(nlh);
1da177e4
LT
2002 rtm->rtm_family = AF_INET6;
2003 rtm->rtm_dst_len = rt->rt6i_dst.plen;
2004 rtm->rtm_src_len = rt->rt6i_src.plen;
2005 rtm->rtm_tos = 0;
c71099ac 2006 if (rt->rt6i_table)
9e762a4a 2007 table = rt->rt6i_table->tb6_id;
c71099ac 2008 else
9e762a4a
PM
2009 table = RT6_TABLE_UNSPEC;
2010 rtm->rtm_table = table;
2d7202bf 2011 NLA_PUT_U32(skb, RTA_TABLE, table);
1da177e4
LT
2012 if (rt->rt6i_flags&RTF_REJECT)
2013 rtm->rtm_type = RTN_UNREACHABLE;
2014 else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK))
2015 rtm->rtm_type = RTN_LOCAL;
2016 else
2017 rtm->rtm_type = RTN_UNICAST;
2018 rtm->rtm_flags = 0;
2019 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2020 rtm->rtm_protocol = rt->rt6i_protocol;
2021 if (rt->rt6i_flags&RTF_DYNAMIC)
2022 rtm->rtm_protocol = RTPROT_REDIRECT;
2023 else if (rt->rt6i_flags & RTF_ADDRCONF)
2024 rtm->rtm_protocol = RTPROT_KERNEL;
2025 else if (rt->rt6i_flags&RTF_DEFAULT)
2026 rtm->rtm_protocol = RTPROT_RA;
2027
2028 if (rt->rt6i_flags&RTF_CACHE)
2029 rtm->rtm_flags |= RTM_F_CLONED;
2030
2031 if (dst) {
2d7202bf 2032 NLA_PUT(skb, RTA_DST, 16, dst);
1da177e4
LT
2033 rtm->rtm_dst_len = 128;
2034 } else if (rtm->rtm_dst_len)
2d7202bf 2035 NLA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr);
1da177e4
LT
2036#ifdef CONFIG_IPV6_SUBTREES
2037 if (src) {
2d7202bf 2038 NLA_PUT(skb, RTA_SRC, 16, src);
1da177e4
LT
2039 rtm->rtm_src_len = 128;
2040 } else if (rtm->rtm_src_len)
2d7202bf 2041 NLA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr);
1da177e4
LT
2042#endif
2043 if (iif)
2d7202bf 2044 NLA_PUT_U32(skb, RTA_IIF, iif);
1da177e4
LT
2045 else if (dst) {
2046 struct in6_addr saddr_buf;
2047 if (ipv6_get_saddr(&rt->u.dst, dst, &saddr_buf) == 0)
2d7202bf 2048 NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
1da177e4 2049 }
2d7202bf 2050
1da177e4 2051 if (rtnetlink_put_metrics(skb, rt->u.dst.metrics) < 0)
2d7202bf
TG
2052 goto nla_put_failure;
2053
1da177e4 2054 if (rt->u.dst.neighbour)
2d7202bf
TG
2055 NLA_PUT(skb, RTA_GATEWAY, 16, &rt->u.dst.neighbour->primary_key);
2056
1da177e4 2057 if (rt->u.dst.dev)
2d7202bf
TG
2058 NLA_PUT_U32(skb, RTA_OIF, rt->rt6i_dev->ifindex);
2059
2060 NLA_PUT_U32(skb, RTA_PRIORITY, rt->rt6i_metric);
1da177e4
LT
2061 ci.rta_lastuse = jiffies_to_clock_t(jiffies - rt->u.dst.lastuse);
2062 if (rt->rt6i_expires)
2063 ci.rta_expires = jiffies_to_clock_t(rt->rt6i_expires - jiffies);
2064 else
2065 ci.rta_expires = 0;
2066 ci.rta_used = rt->u.dst.__use;
2067 ci.rta_clntref = atomic_read(&rt->u.dst.__refcnt);
2068 ci.rta_error = rt->u.dst.error;
2069 ci.rta_id = 0;
2070 ci.rta_ts = 0;
2071 ci.rta_tsage = 0;
2d7202bf
TG
2072 NLA_PUT(skb, RTA_CACHEINFO, sizeof(ci), &ci);
2073
2074 return nlmsg_end(skb, nlh);
2075
2076nla_put_failure:
2077 return nlmsg_cancel(skb, nlh);
1da177e4
LT
2078}
2079
1b43af54 2080int rt6_dump_route(struct rt6_info *rt, void *p_arg)
1da177e4
LT
2081{
2082 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
2083 int prefix;
2084
2d7202bf
TG
2085 if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
2086 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
1da177e4
LT
2087 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
2088 } else
2089 prefix = 0;
2090
2091 return rt6_fill_node(arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
2092 NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
0d51aa80 2093 prefix, NLM_F_MULTI);
1da177e4
LT
2094}
2095
1da177e4
LT
2096int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
2097{
ab364a6f
TG
2098 struct nlattr *tb[RTA_MAX+1];
2099 struct rt6_info *rt;
1da177e4 2100 struct sk_buff *skb;
ab364a6f 2101 struct rtmsg *rtm;
1da177e4 2102 struct flowi fl;
ab364a6f 2103 int err, iif = 0;
1da177e4 2104
ab364a6f
TG
2105 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2106 if (err < 0)
2107 goto errout;
1da177e4 2108
ab364a6f 2109 err = -EINVAL;
1da177e4 2110 memset(&fl, 0, sizeof(fl));
1da177e4 2111
ab364a6f
TG
2112 if (tb[RTA_SRC]) {
2113 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2114 goto errout;
2115
2116 ipv6_addr_copy(&fl.fl6_src, nla_data(tb[RTA_SRC]));
2117 }
2118
2119 if (tb[RTA_DST]) {
2120 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2121 goto errout;
2122
2123 ipv6_addr_copy(&fl.fl6_dst, nla_data(tb[RTA_DST]));
2124 }
2125
2126 if (tb[RTA_IIF])
2127 iif = nla_get_u32(tb[RTA_IIF]);
2128
2129 if (tb[RTA_OIF])
2130 fl.oif = nla_get_u32(tb[RTA_OIF]);
1da177e4
LT
2131
2132 if (iif) {
2133 struct net_device *dev;
2134 dev = __dev_get_by_index(iif);
2135 if (!dev) {
2136 err = -ENODEV;
ab364a6f 2137 goto errout;
1da177e4
LT
2138 }
2139 }
2140
ab364a6f
TG
2141 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
2142 if (skb == NULL) {
2143 err = -ENOBUFS;
2144 goto errout;
2145 }
1da177e4 2146
ab364a6f
TG
2147 /* Reserve room for dummy headers, this skb can pass
2148 through good chunk of routing engine.
2149 */
2150 skb->mac.raw = skb->data;
2151 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
1da177e4 2152
ab364a6f 2153 rt = (struct rt6_info*) ip6_route_output(NULL, &fl);
1da177e4
LT
2154 skb->dst = &rt->u.dst;
2155
ab364a6f 2156 err = rt6_fill_node(skb, rt, &fl.fl6_dst, &fl.fl6_src, iif,
1da177e4 2157 RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
0d51aa80 2158 nlh->nlmsg_seq, 0, 0);
1da177e4 2159 if (err < 0) {
ab364a6f
TG
2160 kfree_skb(skb);
2161 goto errout;
1da177e4
LT
2162 }
2163
2942e900 2164 err = rtnl_unicast(skb, NETLINK_CB(in_skb).pid);
ab364a6f 2165errout:
1da177e4 2166 return err;
1da177e4
LT
2167}
2168
86872cb5 2169void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
1da177e4
LT
2170{
2171 struct sk_buff *skb;
86872cb5
TG
2172 u32 pid = 0, seq = 0;
2173 struct nlmsghdr *nlh = NULL;
21713ebc
TG
2174 int payload = sizeof(struct rtmsg) + 256;
2175 int err = -ENOBUFS;
2176
86872cb5
TG
2177 if (info) {
2178 pid = info->pid;
2179 nlh = info->nlh;
2180 if (nlh)
2181 seq = nlh->nlmsg_seq;
2182 }
2183
21713ebc
TG
2184 skb = nlmsg_new(nlmsg_total_size(payload), gfp_any());
2185 if (skb == NULL)
2186 goto errout;
2187
2188 err = rt6_fill_node(skb, rt, NULL, NULL, 0, event, pid, seq, 0, 0);
2189 if (err < 0) {
1da177e4 2190 kfree_skb(skb);
21713ebc 2191 goto errout;
1da177e4 2192 }
21713ebc
TG
2193
2194 err = rtnl_notify(skb, pid, RTNLGRP_IPV6_ROUTE, nlh, gfp_any());
2195errout:
2196 if (err < 0)
2197 rtnl_set_sk_err(RTNLGRP_IPV6_ROUTE, err);
1da177e4
LT
2198}
2199
2200/*
2201 * /proc
2202 */
2203
2204#ifdef CONFIG_PROC_FS
2205
2206#define RT6_INFO_LEN (32 + 4 + 32 + 4 + 32 + 40 + 5 + 1)
2207
2208struct rt6_proc_arg
2209{
2210 char *buffer;
2211 int offset;
2212 int length;
2213 int skip;
2214 int len;
2215};
2216
2217static int rt6_info_route(struct rt6_info *rt, void *p_arg)
2218{
2219 struct rt6_proc_arg *arg = (struct rt6_proc_arg *) p_arg;
2220 int i;
2221
2222 if (arg->skip < arg->offset / RT6_INFO_LEN) {
2223 arg->skip++;
2224 return 0;
2225 }
2226
2227 if (arg->len >= arg->length)
2228 return 0;
2229
2230 for (i=0; i<16; i++) {
2231 sprintf(arg->buffer + arg->len, "%02x",
2232 rt->rt6i_dst.addr.s6_addr[i]);
2233 arg->len += 2;
2234 }
2235 arg->len += sprintf(arg->buffer + arg->len, " %02x ",
2236 rt->rt6i_dst.plen);
2237
2238#ifdef CONFIG_IPV6_SUBTREES
2239 for (i=0; i<16; i++) {
2240 sprintf(arg->buffer + arg->len, "%02x",
2241 rt->rt6i_src.addr.s6_addr[i]);
2242 arg->len += 2;
2243 }
2244 arg->len += sprintf(arg->buffer + arg->len, " %02x ",
2245 rt->rt6i_src.plen);
2246#else
2247 sprintf(arg->buffer + arg->len,
2248 "00000000000000000000000000000000 00 ");
2249 arg->len += 36;
2250#endif
2251
2252 if (rt->rt6i_nexthop) {
2253 for (i=0; i<16; i++) {
2254 sprintf(arg->buffer + arg->len, "%02x",
2255 rt->rt6i_nexthop->primary_key[i]);
2256 arg->len += 2;
2257 }
2258 } else {
2259 sprintf(arg->buffer + arg->len,
2260 "00000000000000000000000000000000");
2261 arg->len += 32;
2262 }
2263 arg->len += sprintf(arg->buffer + arg->len,
2264 " %08x %08x %08x %08x %8s\n",
2265 rt->rt6i_metric, atomic_read(&rt->u.dst.__refcnt),
2266 rt->u.dst.__use, rt->rt6i_flags,
2267 rt->rt6i_dev ? rt->rt6i_dev->name : "");
2268 return 0;
2269}
2270
2271static int rt6_proc_info(char *buffer, char **start, off_t offset, int length)
2272{
c71099ac
TG
2273 struct rt6_proc_arg arg = {
2274 .buffer = buffer,
2275 .offset = offset,
2276 .length = length,
2277 };
1da177e4 2278
c71099ac 2279 fib6_clean_all(rt6_info_route, 0, &arg);
1da177e4
LT
2280
2281 *start = buffer;
2282 if (offset)
2283 *start += offset % RT6_INFO_LEN;
2284
2285 arg.len -= offset % RT6_INFO_LEN;
2286
2287 if (arg.len > length)
2288 arg.len = length;
2289 if (arg.len < 0)
2290 arg.len = 0;
2291
2292 return arg.len;
2293}
2294
1da177e4
LT
2295static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2296{
2297 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
2298 rt6_stats.fib_nodes, rt6_stats.fib_route_nodes,
2299 rt6_stats.fib_rt_alloc, rt6_stats.fib_rt_entries,
2300 rt6_stats.fib_rt_cache,
2301 atomic_read(&ip6_dst_ops.entries),
2302 rt6_stats.fib_discarded_routes);
2303
2304 return 0;
2305}
2306
2307static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2308{
2309 return single_open(file, rt6_stats_seq_show, NULL);
2310}
2311
2312static struct file_operations rt6_stats_seq_fops = {
2313 .owner = THIS_MODULE,
2314 .open = rt6_stats_seq_open,
2315 .read = seq_read,
2316 .llseek = seq_lseek,
2317 .release = single_release,
2318};
2319#endif /* CONFIG_PROC_FS */
2320
2321#ifdef CONFIG_SYSCTL
2322
2323static int flush_delay;
2324
2325static
2326int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write, struct file * filp,
2327 void __user *buffer, size_t *lenp, loff_t *ppos)
2328{
2329 if (write) {
2330 proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
2331 fib6_run_gc(flush_delay <= 0 ? ~0UL : (unsigned long)flush_delay);
2332 return 0;
2333 } else
2334 return -EINVAL;
2335}
2336
2337ctl_table ipv6_route_table[] = {
2338 {
2339 .ctl_name = NET_IPV6_ROUTE_FLUSH,
2340 .procname = "flush",
2341 .data = &flush_delay,
2342 .maxlen = sizeof(int),
89c8b3a1 2343 .mode = 0200,
1da177e4
LT
2344 .proc_handler = &ipv6_sysctl_rtcache_flush
2345 },
2346 {
2347 .ctl_name = NET_IPV6_ROUTE_GC_THRESH,
2348 .procname = "gc_thresh",
2349 .data = &ip6_dst_ops.gc_thresh,
2350 .maxlen = sizeof(int),
2351 .mode = 0644,
2352 .proc_handler = &proc_dointvec,
2353 },
2354 {
2355 .ctl_name = NET_IPV6_ROUTE_MAX_SIZE,
2356 .procname = "max_size",
2357 .data = &ip6_rt_max_size,
2358 .maxlen = sizeof(int),
2359 .mode = 0644,
2360 .proc_handler = &proc_dointvec,
2361 },
2362 {
2363 .ctl_name = NET_IPV6_ROUTE_GC_MIN_INTERVAL,
2364 .procname = "gc_min_interval",
2365 .data = &ip6_rt_gc_min_interval,
2366 .maxlen = sizeof(int),
2367 .mode = 0644,
2368 .proc_handler = &proc_dointvec_jiffies,
2369 .strategy = &sysctl_jiffies,
2370 },
2371 {
2372 .ctl_name = NET_IPV6_ROUTE_GC_TIMEOUT,
2373 .procname = "gc_timeout",
2374 .data = &ip6_rt_gc_timeout,
2375 .maxlen = sizeof(int),
2376 .mode = 0644,
2377 .proc_handler = &proc_dointvec_jiffies,
2378 .strategy = &sysctl_jiffies,
2379 },
2380 {
2381 .ctl_name = NET_IPV6_ROUTE_GC_INTERVAL,
2382 .procname = "gc_interval",
2383 .data = &ip6_rt_gc_interval,
2384 .maxlen = sizeof(int),
2385 .mode = 0644,
2386 .proc_handler = &proc_dointvec_jiffies,
2387 .strategy = &sysctl_jiffies,
2388 },
2389 {
2390 .ctl_name = NET_IPV6_ROUTE_GC_ELASTICITY,
2391 .procname = "gc_elasticity",
2392 .data = &ip6_rt_gc_elasticity,
2393 .maxlen = sizeof(int),
2394 .mode = 0644,
2395 .proc_handler = &proc_dointvec_jiffies,
2396 .strategy = &sysctl_jiffies,
2397 },
2398 {
2399 .ctl_name = NET_IPV6_ROUTE_MTU_EXPIRES,
2400 .procname = "mtu_expires",
2401 .data = &ip6_rt_mtu_expires,
2402 .maxlen = sizeof(int),
2403 .mode = 0644,
2404 .proc_handler = &proc_dointvec_jiffies,
2405 .strategy = &sysctl_jiffies,
2406 },
2407 {
2408 .ctl_name = NET_IPV6_ROUTE_MIN_ADVMSS,
2409 .procname = "min_adv_mss",
2410 .data = &ip6_rt_min_advmss,
2411 .maxlen = sizeof(int),
2412 .mode = 0644,
2413 .proc_handler = &proc_dointvec_jiffies,
2414 .strategy = &sysctl_jiffies,
2415 },
2416 {
2417 .ctl_name = NET_IPV6_ROUTE_GC_MIN_INTERVAL_MS,
2418 .procname = "gc_min_interval_ms",
2419 .data = &ip6_rt_gc_min_interval,
2420 .maxlen = sizeof(int),
2421 .mode = 0644,
2422 .proc_handler = &proc_dointvec_ms_jiffies,
2423 .strategy = &sysctl_ms_jiffies,
2424 },
2425 { .ctl_name = 0 }
2426};
2427
2428#endif
2429
2430void __init ip6_route_init(void)
2431{
2432 struct proc_dir_entry *p;
2433
e5d679f3
AD
2434 ip6_dst_ops.kmem_cachep =
2435 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
2436 SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL);
1da177e4
LT
2437 fib6_init();
2438#ifdef CONFIG_PROC_FS
2439 p = proc_net_create("ipv6_route", 0, rt6_proc_info);
2440 if (p)
2441 p->owner = THIS_MODULE;
2442
2443 proc_net_fops_create("rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2444#endif
2445#ifdef CONFIG_XFRM
2446 xfrm6_init();
2447#endif
101367c2
TG
2448#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2449 fib6_rules_init();
2450#endif
1da177e4
LT
2451}
2452
2453void ip6_route_cleanup(void)
2454{
101367c2
TG
2455#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2456 fib6_rules_cleanup();
2457#endif
1da177e4
LT
2458#ifdef CONFIG_PROC_FS
2459 proc_net_remove("ipv6_route");
2460 proc_net_remove("rt6_stats");
2461#endif
2462#ifdef CONFIG_XFRM
2463 xfrm6_fini();
2464#endif
2465 rt6_ifdown(NULL);
2466 fib6_gc_cleanup();
2467 kmem_cache_destroy(ip6_dst_ops.kmem_cachep);
2468}