]> bbs.cooldavid.org Git - net-next-2.6.git/blame - net/ipv6/route.c
[IPV6] ROUTE: Unify RT6_F_xxx and RT6_SELECT_F_xxx flags
[net-next-2.6.git] / net / ipv6 / route.c
CommitLineData
1da177e4
LT
1/*
2 * Linux INET6 implementation
3 * FIB front-end.
4 *
5 * Authors:
6 * Pedro Roque <roque@di.fc.ul.pt>
7 *
8 * $Id: route.c,v 1.56 2001/10/31 21:55:55 davem Exp $
9 *
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License
12 * as published by the Free Software Foundation; either version
13 * 2 of the License, or (at your option) any later version.
14 */
15
16/* Changes:
17 *
18 * YOSHIFUJI Hideaki @USAGI
19 * reworked default router selection.
20 * - respect outgoing interface
21 * - select from (probably) reachable routers (i.e.
22 * routers in REACHABLE, STALE, DELAY or PROBE states).
23 * - always select the same router if it is (probably)
24 * reachable. otherwise, round-robin the list.
c0bece9f
YH
25 * Ville Nuorvala
26 * Fixed routing subtrees.
1da177e4
LT
27 */
28
4fc268d2 29#include <linux/capability.h>
1da177e4
LT
30#include <linux/errno.h>
31#include <linux/types.h>
32#include <linux/times.h>
33#include <linux/socket.h>
34#include <linux/sockios.h>
35#include <linux/net.h>
36#include <linux/route.h>
37#include <linux/netdevice.h>
38#include <linux/in6.h>
39#include <linux/init.h>
1da177e4
LT
40#include <linux/if_arp.h>
41
42#ifdef CONFIG_PROC_FS
43#include <linux/proc_fs.h>
44#include <linux/seq_file.h>
45#endif
46
47#include <net/snmp.h>
48#include <net/ipv6.h>
49#include <net/ip6_fib.h>
50#include <net/ip6_route.h>
51#include <net/ndisc.h>
52#include <net/addrconf.h>
53#include <net/tcp.h>
54#include <linux/rtnetlink.h>
55#include <net/dst.h>
56#include <net/xfrm.h>
8d71740c 57#include <net/netevent.h>
21713ebc 58#include <net/netlink.h>
1da177e4
LT
59
60#include <asm/uaccess.h>
61
62#ifdef CONFIG_SYSCTL
63#include <linux/sysctl.h>
64#endif
65
66/* Set to 3 to get tracing. */
67#define RT6_DEBUG 2
68
69#if RT6_DEBUG >= 3
70#define RDBG(x) printk x
71#define RT6_TRACE(x...) printk(KERN_DEBUG x)
72#else
73#define RDBG(x)
74#define RT6_TRACE(x...) do { ; } while (0)
75#endif
76
519fbd87 77#define CLONE_OFFLINK_ROUTE 0
1da177e4
LT
78
79static int ip6_rt_max_size = 4096;
80static int ip6_rt_gc_min_interval = HZ / 2;
81static int ip6_rt_gc_timeout = 60*HZ;
82int ip6_rt_gc_interval = 30*HZ;
83static int ip6_rt_gc_elasticity = 9;
84static int ip6_rt_mtu_expires = 10*60*HZ;
85static int ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
86
87static struct rt6_info * ip6_rt_copy(struct rt6_info *ort);
88static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
89static struct dst_entry *ip6_negative_advice(struct dst_entry *);
90static void ip6_dst_destroy(struct dst_entry *);
91static void ip6_dst_ifdown(struct dst_entry *,
92 struct net_device *dev, int how);
93static int ip6_dst_gc(void);
94
95static int ip6_pkt_discard(struct sk_buff *skb);
96static int ip6_pkt_discard_out(struct sk_buff *skb);
97static void ip6_link_failure(struct sk_buff *skb);
98static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
99
70ceb4f5
YH
100#ifdef CONFIG_IPV6_ROUTE_INFO
101static struct rt6_info *rt6_add_route_info(struct in6_addr *prefix, int prefixlen,
102 struct in6_addr *gwaddr, int ifindex,
103 unsigned pref);
104static struct rt6_info *rt6_get_route_info(struct in6_addr *prefix, int prefixlen,
105 struct in6_addr *gwaddr, int ifindex);
106#endif
107
1da177e4
LT
108static struct dst_ops ip6_dst_ops = {
109 .family = AF_INET6,
110 .protocol = __constant_htons(ETH_P_IPV6),
111 .gc = ip6_dst_gc,
112 .gc_thresh = 1024,
113 .check = ip6_dst_check,
114 .destroy = ip6_dst_destroy,
115 .ifdown = ip6_dst_ifdown,
116 .negative_advice = ip6_negative_advice,
117 .link_failure = ip6_link_failure,
118 .update_pmtu = ip6_rt_update_pmtu,
119 .entry_size = sizeof(struct rt6_info),
120};
121
122struct rt6_info ip6_null_entry = {
123 .u = {
124 .dst = {
125 .__refcnt = ATOMIC_INIT(1),
126 .__use = 1,
127 .dev = &loopback_dev,
128 .obsolete = -1,
129 .error = -ENETUNREACH,
130 .metrics = { [RTAX_HOPLIMIT - 1] = 255, },
131 .input = ip6_pkt_discard,
132 .output = ip6_pkt_discard_out,
133 .ops = &ip6_dst_ops,
134 .path = (struct dst_entry*)&ip6_null_entry,
135 }
136 },
137 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
138 .rt6i_metric = ~(u32) 0,
139 .rt6i_ref = ATOMIC_INIT(1),
140};
141
101367c2
TG
142#ifdef CONFIG_IPV6_MULTIPLE_TABLES
143
144struct rt6_info ip6_prohibit_entry = {
145 .u = {
146 .dst = {
147 .__refcnt = ATOMIC_INIT(1),
148 .__use = 1,
149 .dev = &loopback_dev,
150 .obsolete = -1,
151 .error = -EACCES,
152 .metrics = { [RTAX_HOPLIMIT - 1] = 255, },
153 .input = ip6_pkt_discard,
154 .output = ip6_pkt_discard_out,
155 .ops = &ip6_dst_ops,
156 .path = (struct dst_entry*)&ip6_prohibit_entry,
157 }
158 },
159 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
160 .rt6i_metric = ~(u32) 0,
161 .rt6i_ref = ATOMIC_INIT(1),
162};
163
164struct rt6_info ip6_blk_hole_entry = {
165 .u = {
166 .dst = {
167 .__refcnt = ATOMIC_INIT(1),
168 .__use = 1,
169 .dev = &loopback_dev,
170 .obsolete = -1,
171 .error = -EINVAL,
172 .metrics = { [RTAX_HOPLIMIT - 1] = 255, },
173 .input = ip6_pkt_discard,
174 .output = ip6_pkt_discard_out,
175 .ops = &ip6_dst_ops,
176 .path = (struct dst_entry*)&ip6_blk_hole_entry,
177 }
178 },
179 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
180 .rt6i_metric = ~(u32) 0,
181 .rt6i_ref = ATOMIC_INIT(1),
182};
183
184#endif
185
1da177e4
LT
186/* allocate dst with ip6_dst_ops */
187static __inline__ struct rt6_info *ip6_dst_alloc(void)
188{
189 return (struct rt6_info *)dst_alloc(&ip6_dst_ops);
190}
191
192static void ip6_dst_destroy(struct dst_entry *dst)
193{
194 struct rt6_info *rt = (struct rt6_info *)dst;
195 struct inet6_dev *idev = rt->rt6i_idev;
196
197 if (idev != NULL) {
198 rt->rt6i_idev = NULL;
199 in6_dev_put(idev);
200 }
201}
202
203static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
204 int how)
205{
206 struct rt6_info *rt = (struct rt6_info *)dst;
207 struct inet6_dev *idev = rt->rt6i_idev;
208
209 if (dev != &loopback_dev && idev != NULL && idev->dev == dev) {
210 struct inet6_dev *loopback_idev = in6_dev_get(&loopback_dev);
211 if (loopback_idev != NULL) {
212 rt->rt6i_idev = loopback_idev;
213 in6_dev_put(idev);
214 }
215 }
216}
217
218static __inline__ int rt6_check_expired(const struct rt6_info *rt)
219{
220 return (rt->rt6i_flags & RTF_EXPIRES &&
221 time_after(jiffies, rt->rt6i_expires));
222}
223
c71099ac
TG
224static inline int rt6_need_strict(struct in6_addr *daddr)
225{
226 return (ipv6_addr_type(daddr) &
227 (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL));
228}
229
1da177e4 230/*
c71099ac 231 * Route lookup. Any table->tb6_lock is implied.
1da177e4
LT
232 */
233
234static __inline__ struct rt6_info *rt6_device_match(struct rt6_info *rt,
235 int oif,
236 int strict)
237{
238 struct rt6_info *local = NULL;
239 struct rt6_info *sprt;
240
241 if (oif) {
242 for (sprt = rt; sprt; sprt = sprt->u.next) {
243 struct net_device *dev = sprt->rt6i_dev;
244 if (dev->ifindex == oif)
245 return sprt;
246 if (dev->flags & IFF_LOOPBACK) {
247 if (sprt->rt6i_idev == NULL ||
248 sprt->rt6i_idev->dev->ifindex != oif) {
249 if (strict && oif)
250 continue;
251 if (local && (!oif ||
252 local->rt6i_idev->dev->ifindex == oif))
253 continue;
254 }
255 local = sprt;
256 }
257 }
258
259 if (local)
260 return local;
261
262 if (strict)
263 return &ip6_null_entry;
264 }
265 return rt;
266}
267
27097255
YH
268#ifdef CONFIG_IPV6_ROUTER_PREF
269static void rt6_probe(struct rt6_info *rt)
270{
271 struct neighbour *neigh = rt ? rt->rt6i_nexthop : NULL;
272 /*
273 * Okay, this does not seem to be appropriate
274 * for now, however, we need to check if it
275 * is really so; aka Router Reachability Probing.
276 *
277 * Router Reachability Probe MUST be rate-limited
278 * to no more than one per minute.
279 */
280 if (!neigh || (neigh->nud_state & NUD_VALID))
281 return;
282 read_lock_bh(&neigh->lock);
283 if (!(neigh->nud_state & NUD_VALID) &&
52e16356 284 time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
27097255
YH
285 struct in6_addr mcaddr;
286 struct in6_addr *target;
287
288 neigh->updated = jiffies;
289 read_unlock_bh(&neigh->lock);
290
291 target = (struct in6_addr *)&neigh->primary_key;
292 addrconf_addr_solict_mult(target, &mcaddr);
293 ndisc_send_ns(rt->rt6i_dev, NULL, target, &mcaddr, NULL);
294 } else
295 read_unlock_bh(&neigh->lock);
296}
297#else
298static inline void rt6_probe(struct rt6_info *rt)
299{
300 return;
301}
302#endif
303
1da177e4 304/*
554cfb7e 305 * Default Router Selection (RFC 2461 6.3.6)
1da177e4 306 */
554cfb7e
YH
307static int inline rt6_check_dev(struct rt6_info *rt, int oif)
308{
309 struct net_device *dev = rt->rt6i_dev;
310 if (!oif || dev->ifindex == oif)
311 return 2;
312 if ((dev->flags & IFF_LOOPBACK) &&
313 rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
314 return 1;
315 return 0;
316}
1da177e4 317
554cfb7e 318static int inline rt6_check_neigh(struct rt6_info *rt)
1da177e4 319{
554cfb7e
YH
320 struct neighbour *neigh = rt->rt6i_nexthop;
321 int m = 0;
4d0c5911
YH
322 if (rt->rt6i_flags & RTF_NONEXTHOP ||
323 !(rt->rt6i_flags & RTF_GATEWAY))
324 m = 1;
325 else if (neigh) {
554cfb7e
YH
326 read_lock_bh(&neigh->lock);
327 if (neigh->nud_state & NUD_VALID)
4d0c5911 328 m = 2;
554cfb7e 329 read_unlock_bh(&neigh->lock);
1da177e4 330 }
554cfb7e 331 return m;
1da177e4
LT
332}
333
554cfb7e
YH
334static int rt6_score_route(struct rt6_info *rt, int oif,
335 int strict)
1da177e4 336{
4d0c5911
YH
337 int m, n;
338
339 m = rt6_check_dev(rt, oif);
77d16f45 340 if (!m && (strict & RT6_LOOKUP_F_IFACE))
554cfb7e 341 return -1;
ebacaaa0
YH
342#ifdef CONFIG_IPV6_ROUTER_PREF
343 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
344#endif
4d0c5911
YH
345 n = rt6_check_neigh(rt);
346 if (n > 1)
ebacaaa0 347 m |= 16;
77d16f45 348 else if (!n && strict & RT6_LOOKUP_F_REACHABLE)
554cfb7e
YH
349 return -1;
350 return m;
351}
352
353static struct rt6_info *rt6_select(struct rt6_info **head, int oif,
354 int strict)
355{
356 struct rt6_info *match = NULL, *last = NULL;
357 struct rt6_info *rt, *rt0 = *head;
358 u32 metric;
359 int mpri = -1;
1da177e4 360
554cfb7e
YH
361 RT6_TRACE("%s(head=%p(*head=%p), oif=%d)\n",
362 __FUNCTION__, head, head ? *head : NULL, oif);
1da177e4 363
554cfb7e 364 for (rt = rt0, metric = rt0->rt6i_metric;
c302e6d5 365 rt && rt->rt6i_metric == metric && (!last || rt != rt0);
554cfb7e
YH
366 rt = rt->u.next) {
367 int m;
1da177e4 368
554cfb7e 369 if (rt6_check_expired(rt))
1da177e4
LT
370 continue;
371
554cfb7e
YH
372 last = rt;
373
374 m = rt6_score_route(rt, oif, strict);
375 if (m < 0)
1da177e4 376 continue;
1da177e4 377
554cfb7e 378 if (m > mpri) {
27097255 379 rt6_probe(match);
554cfb7e 380 match = rt;
1da177e4 381 mpri = m;
27097255
YH
382 } else {
383 rt6_probe(rt);
1da177e4
LT
384 }
385 }
386
554cfb7e 387 if (!match &&
77d16f45 388 (strict & RT6_LOOKUP_F_REACHABLE) &&
554cfb7e
YH
389 last && last != rt0) {
390 /* no entries matched; do round-robin */
34af946a 391 static DEFINE_SPINLOCK(lock);
c302e6d5 392 spin_lock(&lock);
554cfb7e
YH
393 *head = rt0->u.next;
394 rt0->u.next = last->u.next;
395 last->u.next = rt0;
c302e6d5 396 spin_unlock(&lock);
1da177e4 397 }
1da177e4 398
554cfb7e
YH
399 RT6_TRACE("%s() => %p, score=%d\n",
400 __FUNCTION__, match, mpri);
1da177e4 401
554cfb7e 402 return (match ? match : &ip6_null_entry);
1da177e4
LT
403}
404
70ceb4f5
YH
405#ifdef CONFIG_IPV6_ROUTE_INFO
406int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
407 struct in6_addr *gwaddr)
408{
409 struct route_info *rinfo = (struct route_info *) opt;
410 struct in6_addr prefix_buf, *prefix;
411 unsigned int pref;
412 u32 lifetime;
413 struct rt6_info *rt;
414
415 if (len < sizeof(struct route_info)) {
416 return -EINVAL;
417 }
418
419 /* Sanity check for prefix_len and length */
420 if (rinfo->length > 3) {
421 return -EINVAL;
422 } else if (rinfo->prefix_len > 128) {
423 return -EINVAL;
424 } else if (rinfo->prefix_len > 64) {
425 if (rinfo->length < 2) {
426 return -EINVAL;
427 }
428 } else if (rinfo->prefix_len > 0) {
429 if (rinfo->length < 1) {
430 return -EINVAL;
431 }
432 }
433
434 pref = rinfo->route_pref;
435 if (pref == ICMPV6_ROUTER_PREF_INVALID)
436 pref = ICMPV6_ROUTER_PREF_MEDIUM;
437
438 lifetime = htonl(rinfo->lifetime);
439 if (lifetime == 0xffffffff) {
440 /* infinity */
441 } else if (lifetime > 0x7fffffff/HZ) {
442 /* Avoid arithmetic overflow */
443 lifetime = 0x7fffffff/HZ - 1;
444 }
445
446 if (rinfo->length == 3)
447 prefix = (struct in6_addr *)rinfo->prefix;
448 else {
449 /* this function is safe */
450 ipv6_addr_prefix(&prefix_buf,
451 (struct in6_addr *)rinfo->prefix,
452 rinfo->prefix_len);
453 prefix = &prefix_buf;
454 }
455
456 rt = rt6_get_route_info(prefix, rinfo->prefix_len, gwaddr, dev->ifindex);
457
458 if (rt && !lifetime) {
e0a1ad73 459 ip6_del_rt(rt);
70ceb4f5
YH
460 rt = NULL;
461 }
462
463 if (!rt && lifetime)
464 rt = rt6_add_route_info(prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
465 pref);
466 else if (rt)
467 rt->rt6i_flags = RTF_ROUTEINFO |
468 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
469
470 if (rt) {
471 if (lifetime == 0xffffffff) {
472 rt->rt6i_flags &= ~RTF_EXPIRES;
473 } else {
474 rt->rt6i_expires = jiffies + HZ * lifetime;
475 rt->rt6i_flags |= RTF_EXPIRES;
476 }
477 dst_release(&rt->u.dst);
478 }
479 return 0;
480}
481#endif
482
982f56f3
YH
483#define BACKTRACK(saddr) \
484do { \
485 if (rt == &ip6_null_entry) { \
486 struct fib6_node *pn; \
487 while (fn) { \
488 if (fn->fn_flags & RTN_TL_ROOT) \
489 goto out; \
490 pn = fn->parent; \
491 if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
492 fn = fib6_lookup(pn->subtree, NULL, saddr); \
493 else \
494 fn = pn; \
495 if (fn->fn_flags & RTN_RTINFO) \
496 goto restart; \
c71099ac 497 } \
c71099ac 498 } \
982f56f3 499} while(0)
c71099ac
TG
500
501static struct rt6_info *ip6_pol_route_lookup(struct fib6_table *table,
502 struct flowi *fl, int flags)
1da177e4
LT
503{
504 struct fib6_node *fn;
505 struct rt6_info *rt;
506
c71099ac
TG
507 read_lock_bh(&table->tb6_lock);
508 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
509restart:
510 rt = fn->leaf;
77d16f45 511 rt = rt6_device_match(rt, fl->oif, flags);
982f56f3 512 BACKTRACK(&fl->fl6_src);
1da177e4 513 dst_hold(&rt->u.dst);
c71099ac
TG
514out:
515 read_unlock_bh(&table->tb6_lock);
1da177e4
LT
516
517 rt->u.dst.lastuse = jiffies;
c71099ac
TG
518 rt->u.dst.__use++;
519
520 return rt;
521
522}
523
524struct rt6_info *rt6_lookup(struct in6_addr *daddr, struct in6_addr *saddr,
525 int oif, int strict)
526{
527 struct flowi fl = {
528 .oif = oif,
529 .nl_u = {
530 .ip6_u = {
531 .daddr = *daddr,
532 /* TODO: saddr */
533 },
534 },
535 };
536 struct dst_entry *dst;
77d16f45 537 int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
c71099ac
TG
538
539 dst = fib6_rule_lookup(&fl, flags, ip6_pol_route_lookup);
540 if (dst->error == 0)
541 return (struct rt6_info *) dst;
542
543 dst_release(dst);
544
1da177e4
LT
545 return NULL;
546}
547
c71099ac 548/* ip6_ins_rt is called with FREE table->tb6_lock.
1da177e4
LT
549 It takes new route entry, the addition fails by any reason the
550 route is freed. In any case, if caller does not hold it, it may
551 be destroyed.
552 */
553
86872cb5 554static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
1da177e4
LT
555{
556 int err;
c71099ac 557 struct fib6_table *table;
1da177e4 558
c71099ac
TG
559 table = rt->rt6i_table;
560 write_lock_bh(&table->tb6_lock);
86872cb5 561 err = fib6_add(&table->tb6_root, rt, info);
c71099ac 562 write_unlock_bh(&table->tb6_lock);
1da177e4
LT
563
564 return err;
565}
566
40e22e8f
TG
567int ip6_ins_rt(struct rt6_info *rt)
568{
86872cb5 569 return __ip6_ins_rt(rt, NULL);
40e22e8f
TG
570}
571
95a9a5ba
YH
572static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, struct in6_addr *daddr,
573 struct in6_addr *saddr)
1da177e4 574{
1da177e4
LT
575 struct rt6_info *rt;
576
577 /*
578 * Clone the route.
579 */
580
581 rt = ip6_rt_copy(ort);
582
583 if (rt) {
58c4fb86
YH
584 if (!(rt->rt6i_flags&RTF_GATEWAY)) {
585 if (rt->rt6i_dst.plen != 128 &&
586 ipv6_addr_equal(&rt->rt6i_dst.addr, daddr))
587 rt->rt6i_flags |= RTF_ANYCAST;
1da177e4 588 ipv6_addr_copy(&rt->rt6i_gateway, daddr);
58c4fb86 589 }
1da177e4 590
58c4fb86 591 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
1da177e4
LT
592 rt->rt6i_dst.plen = 128;
593 rt->rt6i_flags |= RTF_CACHE;
594 rt->u.dst.flags |= DST_HOST;
595
596#ifdef CONFIG_IPV6_SUBTREES
597 if (rt->rt6i_src.plen && saddr) {
598 ipv6_addr_copy(&rt->rt6i_src.addr, saddr);
599 rt->rt6i_src.plen = 128;
600 }
601#endif
602
603 rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
604
95a9a5ba 605 }
1da177e4 606
95a9a5ba
YH
607 return rt;
608}
1da177e4 609
299d9939
YH
610static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort, struct in6_addr *daddr)
611{
612 struct rt6_info *rt = ip6_rt_copy(ort);
613 if (rt) {
614 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
615 rt->rt6i_dst.plen = 128;
616 rt->rt6i_flags |= RTF_CACHE;
617 if (rt->rt6i_flags & RTF_REJECT)
618 rt->u.dst.error = ort->u.dst.error;
619 rt->u.dst.flags |= DST_HOST;
620 rt->rt6i_nexthop = neigh_clone(ort->rt6i_nexthop);
621 }
622 return rt;
623}
624
8ce11e6a
AB
625static struct rt6_info *ip6_pol_route_input(struct fib6_table *table,
626 struct flowi *fl, int flags)
1da177e4
LT
627{
628 struct fib6_node *fn;
519fbd87 629 struct rt6_info *rt, *nrt;
c71099ac 630 int strict = 0;
1da177e4 631 int attempts = 3;
519fbd87 632 int err;
77d16f45 633 int reachable = RT6_LOOKUP_F_REACHABLE;
1da177e4 634
77d16f45 635 strict |= flags & RT6_LOOKUP_F_IFACE;
1da177e4
LT
636
637relookup:
c71099ac 638 read_lock_bh(&table->tb6_lock);
1da177e4 639
8238dd06 640restart_2:
c71099ac 641 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
1da177e4
LT
642
643restart:
c71099ac 644 rt = rt6_select(&fn->leaf, fl->iif, strict | reachable);
982f56f3 645 BACKTRACK(&fl->fl6_src);
8238dd06
YH
646 if (rt == &ip6_null_entry ||
647 rt->rt6i_flags & RTF_CACHE)
1ddef044 648 goto out;
1da177e4 649
fb9de91e 650 dst_hold(&rt->u.dst);
c71099ac 651 read_unlock_bh(&table->tb6_lock);
fb9de91e 652
519fbd87 653 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
c71099ac 654 nrt = rt6_alloc_cow(rt, &fl->fl6_dst, &fl->fl6_src);
519fbd87
YH
655 else {
656#if CLONE_OFFLINK_ROUTE
c71099ac 657 nrt = rt6_alloc_clone(rt, &fl->fl6_dst);
519fbd87
YH
658#else
659 goto out2;
660#endif
661 }
e40cf353 662
519fbd87
YH
663 dst_release(&rt->u.dst);
664 rt = nrt ? : &ip6_null_entry;
1da177e4 665
519fbd87
YH
666 dst_hold(&rt->u.dst);
667 if (nrt) {
40e22e8f 668 err = ip6_ins_rt(nrt);
519fbd87 669 if (!err)
1da177e4 670 goto out2;
1da177e4 671 }
1da177e4 672
519fbd87
YH
673 if (--attempts <= 0)
674 goto out2;
675
676 /*
c71099ac 677 * Race condition! In the gap, when table->tb6_lock was
519fbd87
YH
678 * released someone could insert this route. Relookup.
679 */
680 dst_release(&rt->u.dst);
681 goto relookup;
682
683out:
8238dd06
YH
684 if (reachable) {
685 reachable = 0;
686 goto restart_2;
687 }
519fbd87 688 dst_hold(&rt->u.dst);
c71099ac 689 read_unlock_bh(&table->tb6_lock);
1da177e4
LT
690out2:
691 rt->u.dst.lastuse = jiffies;
692 rt->u.dst.__use++;
c71099ac
TG
693
694 return rt;
1da177e4
LT
695}
696
c71099ac
TG
697void ip6_route_input(struct sk_buff *skb)
698{
699 struct ipv6hdr *iph = skb->nh.ipv6h;
700 struct flowi fl = {
701 .iif = skb->dev->ifindex,
702 .nl_u = {
703 .ip6_u = {
704 .daddr = iph->daddr,
705 .saddr = iph->saddr,
706 .flowlabel = (* (u32 *) iph)&IPV6_FLOWINFO_MASK,
707 },
708 },
709 .proto = iph->nexthdr,
710 };
77d16f45 711 int flags = rt6_need_strict(&iph->daddr) ? RT6_LOOKUP_F_IFACE : 0;
c71099ac
TG
712
713 skb->dst = fib6_rule_lookup(&fl, flags, ip6_pol_route_input);
714}
715
716static struct rt6_info *ip6_pol_route_output(struct fib6_table *table,
717 struct flowi *fl, int flags)
1da177e4
LT
718{
719 struct fib6_node *fn;
519fbd87 720 struct rt6_info *rt, *nrt;
c71099ac 721 int strict = 0;
1da177e4 722 int attempts = 3;
519fbd87 723 int err;
77d16f45 724 int reachable = RT6_LOOKUP_F_REACHABLE;
1da177e4 725
77d16f45 726 strict |= flags & RT6_LOOKUP_F_IFACE;
1da177e4
LT
727
728relookup:
c71099ac 729 read_lock_bh(&table->tb6_lock);
1da177e4 730
8238dd06 731restart_2:
c71099ac 732 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
1da177e4
LT
733
734restart:
8238dd06 735 rt = rt6_select(&fn->leaf, fl->oif, strict | reachable);
982f56f3 736 BACKTRACK(&fl->fl6_src);
8238dd06
YH
737 if (rt == &ip6_null_entry ||
738 rt->rt6i_flags & RTF_CACHE)
1da177e4 739 goto out;
1da177e4 740
fb9de91e 741 dst_hold(&rt->u.dst);
c71099ac 742 read_unlock_bh(&table->tb6_lock);
fb9de91e 743
519fbd87 744 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
e40cf353 745 nrt = rt6_alloc_cow(rt, &fl->fl6_dst, &fl->fl6_src);
519fbd87
YH
746 else {
747#if CLONE_OFFLINK_ROUTE
748 nrt = rt6_alloc_clone(rt, &fl->fl6_dst);
749#else
750 goto out2;
751#endif
752 }
1da177e4 753
519fbd87
YH
754 dst_release(&rt->u.dst);
755 rt = nrt ? : &ip6_null_entry;
1da177e4 756
519fbd87
YH
757 dst_hold(&rt->u.dst);
758 if (nrt) {
40e22e8f 759 err = ip6_ins_rt(nrt);
519fbd87 760 if (!err)
1da177e4 761 goto out2;
1da177e4 762 }
e40cf353 763
519fbd87
YH
764 if (--attempts <= 0)
765 goto out2;
766
767 /*
c71099ac 768 * Race condition! In the gap, when table->tb6_lock was
519fbd87
YH
769 * released someone could insert this route. Relookup.
770 */
771 dst_release(&rt->u.dst);
772 goto relookup;
773
774out:
8238dd06
YH
775 if (reachable) {
776 reachable = 0;
777 goto restart_2;
778 }
519fbd87 779 dst_hold(&rt->u.dst);
c71099ac 780 read_unlock_bh(&table->tb6_lock);
1da177e4
LT
781out2:
782 rt->u.dst.lastuse = jiffies;
783 rt->u.dst.__use++;
c71099ac
TG
784 return rt;
785}
786
787struct dst_entry * ip6_route_output(struct sock *sk, struct flowi *fl)
788{
789 int flags = 0;
790
791 if (rt6_need_strict(&fl->fl6_dst))
77d16f45 792 flags |= RT6_LOOKUP_F_IFACE;
c71099ac
TG
793
794 return fib6_rule_lookup(fl, flags, ip6_pol_route_output);
1da177e4
LT
795}
796
797
798/*
799 * Destination cache support functions
800 */
801
802static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
803{
804 struct rt6_info *rt;
805
806 rt = (struct rt6_info *) dst;
807
808 if (rt && rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie))
809 return dst;
810
811 return NULL;
812}
813
814static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
815{
816 struct rt6_info *rt = (struct rt6_info *) dst;
817
818 if (rt) {
819 if (rt->rt6i_flags & RTF_CACHE)
e0a1ad73 820 ip6_del_rt(rt);
1da177e4
LT
821 else
822 dst_release(dst);
823 }
824 return NULL;
825}
826
827static void ip6_link_failure(struct sk_buff *skb)
828{
829 struct rt6_info *rt;
830
831 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0, skb->dev);
832
833 rt = (struct rt6_info *) skb->dst;
834 if (rt) {
835 if (rt->rt6i_flags&RTF_CACHE) {
836 dst_set_expires(&rt->u.dst, 0);
837 rt->rt6i_flags |= RTF_EXPIRES;
838 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
839 rt->rt6i_node->fn_sernum = -1;
840 }
841}
842
843static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
844{
845 struct rt6_info *rt6 = (struct rt6_info*)dst;
846
847 if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
848 rt6->rt6i_flags |= RTF_MODIFIED;
849 if (mtu < IPV6_MIN_MTU) {
850 mtu = IPV6_MIN_MTU;
851 dst->metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
852 }
853 dst->metrics[RTAX_MTU-1] = mtu;
8d71740c 854 call_netevent_notifiers(NETEVENT_PMTU_UPDATE, dst);
1da177e4
LT
855 }
856}
857
1da177e4
LT
858static int ipv6_get_mtu(struct net_device *dev);
859
860static inline unsigned int ipv6_advmss(unsigned int mtu)
861{
862 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
863
864 if (mtu < ip6_rt_min_advmss)
865 mtu = ip6_rt_min_advmss;
866
867 /*
868 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
869 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
870 * IPV6_MAXPLEN is also valid and means: "any MSS,
871 * rely only on pmtu discovery"
872 */
873 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
874 mtu = IPV6_MAXPLEN;
875 return mtu;
876}
877
5d0bbeeb 878static struct dst_entry *ndisc_dst_gc_list;
8ce11e6a 879static DEFINE_SPINLOCK(ndisc_lock);
5d0bbeeb 880
1da177e4
LT
881struct dst_entry *ndisc_dst_alloc(struct net_device *dev,
882 struct neighbour *neigh,
883 struct in6_addr *addr,
884 int (*output)(struct sk_buff *))
885{
886 struct rt6_info *rt;
887 struct inet6_dev *idev = in6_dev_get(dev);
888
889 if (unlikely(idev == NULL))
890 return NULL;
891
892 rt = ip6_dst_alloc();
893 if (unlikely(rt == NULL)) {
894 in6_dev_put(idev);
895 goto out;
896 }
897
898 dev_hold(dev);
899 if (neigh)
900 neigh_hold(neigh);
901 else
902 neigh = ndisc_get_neigh(dev, addr);
903
904 rt->rt6i_dev = dev;
905 rt->rt6i_idev = idev;
906 rt->rt6i_nexthop = neigh;
907 atomic_set(&rt->u.dst.__refcnt, 1);
908 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = 255;
909 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
910 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
911 rt->u.dst.output = output;
912
913#if 0 /* there's no chance to use these for ndisc */
914 rt->u.dst.flags = ipv6_addr_type(addr) & IPV6_ADDR_UNICAST
915 ? DST_HOST
916 : 0;
917 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
918 rt->rt6i_dst.plen = 128;
919#endif
920
5d0bbeeb 921 spin_lock_bh(&ndisc_lock);
1da177e4
LT
922 rt->u.dst.next = ndisc_dst_gc_list;
923 ndisc_dst_gc_list = &rt->u.dst;
5d0bbeeb 924 spin_unlock_bh(&ndisc_lock);
1da177e4
LT
925
926 fib6_force_start_gc();
927
928out:
929 return (struct dst_entry *)rt;
930}
931
932int ndisc_dst_gc(int *more)
933{
934 struct dst_entry *dst, *next, **pprev;
935 int freed;
936
937 next = NULL;
5d0bbeeb
TG
938 freed = 0;
939
940 spin_lock_bh(&ndisc_lock);
1da177e4 941 pprev = &ndisc_dst_gc_list;
5d0bbeeb 942
1da177e4
LT
943 while ((dst = *pprev) != NULL) {
944 if (!atomic_read(&dst->__refcnt)) {
945 *pprev = dst->next;
946 dst_free(dst);
947 freed++;
948 } else {
949 pprev = &dst->next;
950 (*more)++;
951 }
952 }
953
5d0bbeeb
TG
954 spin_unlock_bh(&ndisc_lock);
955
1da177e4
LT
956 return freed;
957}
958
959static int ip6_dst_gc(void)
960{
961 static unsigned expire = 30*HZ;
962 static unsigned long last_gc;
963 unsigned long now = jiffies;
964
965 if (time_after(last_gc + ip6_rt_gc_min_interval, now) &&
966 atomic_read(&ip6_dst_ops.entries) <= ip6_rt_max_size)
967 goto out;
968
969 expire++;
970 fib6_run_gc(expire);
971 last_gc = now;
972 if (atomic_read(&ip6_dst_ops.entries) < ip6_dst_ops.gc_thresh)
973 expire = ip6_rt_gc_timeout>>1;
974
975out:
976 expire -= expire>>ip6_rt_gc_elasticity;
977 return (atomic_read(&ip6_dst_ops.entries) > ip6_rt_max_size);
978}
979
980/* Clean host part of a prefix. Not necessary in radix tree,
981 but results in cleaner routing tables.
982
983 Remove it only when all the things will work!
984 */
985
986static int ipv6_get_mtu(struct net_device *dev)
987{
988 int mtu = IPV6_MIN_MTU;
989 struct inet6_dev *idev;
990
991 idev = in6_dev_get(dev);
992 if (idev) {
993 mtu = idev->cnf.mtu6;
994 in6_dev_put(idev);
995 }
996 return mtu;
997}
998
999int ipv6_get_hoplimit(struct net_device *dev)
1000{
1001 int hoplimit = ipv6_devconf.hop_limit;
1002 struct inet6_dev *idev;
1003
1004 idev = in6_dev_get(dev);
1005 if (idev) {
1006 hoplimit = idev->cnf.hop_limit;
1007 in6_dev_put(idev);
1008 }
1009 return hoplimit;
1010}
1011
1012/*
1013 *
1014 */
1015
86872cb5 1016int ip6_route_add(struct fib6_config *cfg)
1da177e4
LT
1017{
1018 int err;
1da177e4
LT
1019 struct rt6_info *rt = NULL;
1020 struct net_device *dev = NULL;
1021 struct inet6_dev *idev = NULL;
c71099ac 1022 struct fib6_table *table;
1da177e4
LT
1023 int addr_type;
1024
86872cb5 1025 if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1da177e4
LT
1026 return -EINVAL;
1027#ifndef CONFIG_IPV6_SUBTREES
86872cb5 1028 if (cfg->fc_src_len)
1da177e4
LT
1029 return -EINVAL;
1030#endif
86872cb5 1031 if (cfg->fc_ifindex) {
1da177e4 1032 err = -ENODEV;
86872cb5 1033 dev = dev_get_by_index(cfg->fc_ifindex);
1da177e4
LT
1034 if (!dev)
1035 goto out;
1036 idev = in6_dev_get(dev);
1037 if (!idev)
1038 goto out;
1039 }
1040
86872cb5
TG
1041 if (cfg->fc_metric == 0)
1042 cfg->fc_metric = IP6_RT_PRIO_USER;
1da177e4 1043
86872cb5 1044 table = fib6_new_table(cfg->fc_table);
c71099ac
TG
1045 if (table == NULL) {
1046 err = -ENOBUFS;
1047 goto out;
1048 }
1049
1da177e4
LT
1050 rt = ip6_dst_alloc();
1051
1052 if (rt == NULL) {
1053 err = -ENOMEM;
1054 goto out;
1055 }
1056
1057 rt->u.dst.obsolete = -1;
86872cb5 1058 rt->rt6i_expires = jiffies + clock_t_to_jiffies(cfg->fc_expires);
1da177e4 1059
86872cb5
TG
1060 if (cfg->fc_protocol == RTPROT_UNSPEC)
1061 cfg->fc_protocol = RTPROT_BOOT;
1062 rt->rt6i_protocol = cfg->fc_protocol;
1063
1064 addr_type = ipv6_addr_type(&cfg->fc_dst);
1da177e4
LT
1065
1066 if (addr_type & IPV6_ADDR_MULTICAST)
1067 rt->u.dst.input = ip6_mc_input;
1068 else
1069 rt->u.dst.input = ip6_forward;
1070
1071 rt->u.dst.output = ip6_output;
1072
86872cb5
TG
1073 ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1074 rt->rt6i_dst.plen = cfg->fc_dst_len;
1da177e4
LT
1075 if (rt->rt6i_dst.plen == 128)
1076 rt->u.dst.flags = DST_HOST;
1077
1078#ifdef CONFIG_IPV6_SUBTREES
86872cb5
TG
1079 ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1080 rt->rt6i_src.plen = cfg->fc_src_len;
1da177e4
LT
1081#endif
1082
86872cb5 1083 rt->rt6i_metric = cfg->fc_metric;
1da177e4
LT
1084
1085 /* We cannot add true routes via loopback here,
1086 they would result in kernel looping; promote them to reject routes
1087 */
86872cb5 1088 if ((cfg->fc_flags & RTF_REJECT) ||
1da177e4
LT
1089 (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK))) {
1090 /* hold loopback dev/idev if we haven't done so. */
1091 if (dev != &loopback_dev) {
1092 if (dev) {
1093 dev_put(dev);
1094 in6_dev_put(idev);
1095 }
1096 dev = &loopback_dev;
1097 dev_hold(dev);
1098 idev = in6_dev_get(dev);
1099 if (!idev) {
1100 err = -ENODEV;
1101 goto out;
1102 }
1103 }
1104 rt->u.dst.output = ip6_pkt_discard_out;
1105 rt->u.dst.input = ip6_pkt_discard;
1106 rt->u.dst.error = -ENETUNREACH;
1107 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1108 goto install_route;
1109 }
1110
86872cb5 1111 if (cfg->fc_flags & RTF_GATEWAY) {
1da177e4
LT
1112 struct in6_addr *gw_addr;
1113 int gwa_type;
1114
86872cb5
TG
1115 gw_addr = &cfg->fc_gateway;
1116 ipv6_addr_copy(&rt->rt6i_gateway, gw_addr);
1da177e4
LT
1117 gwa_type = ipv6_addr_type(gw_addr);
1118
1119 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1120 struct rt6_info *grt;
1121
1122 /* IPv6 strictly inhibits using not link-local
1123 addresses as nexthop address.
1124 Otherwise, router will not able to send redirects.
1125 It is very good, but in some (rare!) circumstances
1126 (SIT, PtP, NBMA NOARP links) it is handy to allow
1127 some exceptions. --ANK
1128 */
1129 err = -EINVAL;
1130 if (!(gwa_type&IPV6_ADDR_UNICAST))
1131 goto out;
1132
86872cb5 1133 grt = rt6_lookup(gw_addr, NULL, cfg->fc_ifindex, 1);
1da177e4
LT
1134
1135 err = -EHOSTUNREACH;
1136 if (grt == NULL)
1137 goto out;
1138 if (dev) {
1139 if (dev != grt->rt6i_dev) {
1140 dst_release(&grt->u.dst);
1141 goto out;
1142 }
1143 } else {
1144 dev = grt->rt6i_dev;
1145 idev = grt->rt6i_idev;
1146 dev_hold(dev);
1147 in6_dev_hold(grt->rt6i_idev);
1148 }
1149 if (!(grt->rt6i_flags&RTF_GATEWAY))
1150 err = 0;
1151 dst_release(&grt->u.dst);
1152
1153 if (err)
1154 goto out;
1155 }
1156 err = -EINVAL;
1157 if (dev == NULL || (dev->flags&IFF_LOOPBACK))
1158 goto out;
1159 }
1160
1161 err = -ENODEV;
1162 if (dev == NULL)
1163 goto out;
1164
86872cb5 1165 if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) {
1da177e4
LT
1166 rt->rt6i_nexthop = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev);
1167 if (IS_ERR(rt->rt6i_nexthop)) {
1168 err = PTR_ERR(rt->rt6i_nexthop);
1169 rt->rt6i_nexthop = NULL;
1170 goto out;
1171 }
1172 }
1173
86872cb5 1174 rt->rt6i_flags = cfg->fc_flags;
1da177e4
LT
1175
1176install_route:
86872cb5
TG
1177 if (cfg->fc_mx) {
1178 struct nlattr *nla;
1179 int remaining;
1180
1181 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
1182 int type = nla->nla_type;
1183
1184 if (type) {
1185 if (type > RTAX_MAX) {
1da177e4
LT
1186 err = -EINVAL;
1187 goto out;
1188 }
86872cb5
TG
1189
1190 rt->u.dst.metrics[type - 1] = nla_get_u32(nla);
1da177e4 1191 }
1da177e4
LT
1192 }
1193 }
1194
1195 if (rt->u.dst.metrics[RTAX_HOPLIMIT-1] == 0)
1196 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
1197 if (!rt->u.dst.metrics[RTAX_MTU-1])
1198 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(dev);
1199 if (!rt->u.dst.metrics[RTAX_ADVMSS-1])
1200 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
1201 rt->u.dst.dev = dev;
1202 rt->rt6i_idev = idev;
c71099ac 1203 rt->rt6i_table = table;
86872cb5 1204 return __ip6_ins_rt(rt, &cfg->fc_nlinfo);
1da177e4
LT
1205
1206out:
1207 if (dev)
1208 dev_put(dev);
1209 if (idev)
1210 in6_dev_put(idev);
1211 if (rt)
1212 dst_free((struct dst_entry *) rt);
1213 return err;
1214}
1215
86872cb5 1216static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
1da177e4
LT
1217{
1218 int err;
c71099ac 1219 struct fib6_table *table;
1da177e4 1220
6c813a72
PM
1221 if (rt == &ip6_null_entry)
1222 return -ENOENT;
1223
c71099ac
TG
1224 table = rt->rt6i_table;
1225 write_lock_bh(&table->tb6_lock);
1da177e4 1226
86872cb5 1227 err = fib6_del(rt, info);
1da177e4
LT
1228 dst_release(&rt->u.dst);
1229
c71099ac 1230 write_unlock_bh(&table->tb6_lock);
1da177e4
LT
1231
1232 return err;
1233}
1234
e0a1ad73
TG
1235int ip6_del_rt(struct rt6_info *rt)
1236{
86872cb5 1237 return __ip6_del_rt(rt, NULL);
e0a1ad73
TG
1238}
1239
86872cb5 1240static int ip6_route_del(struct fib6_config *cfg)
1da177e4 1241{
c71099ac 1242 struct fib6_table *table;
1da177e4
LT
1243 struct fib6_node *fn;
1244 struct rt6_info *rt;
1245 int err = -ESRCH;
1246
86872cb5 1247 table = fib6_get_table(cfg->fc_table);
c71099ac
TG
1248 if (table == NULL)
1249 return err;
1250
1251 read_lock_bh(&table->tb6_lock);
1da177e4 1252
c71099ac 1253 fn = fib6_locate(&table->tb6_root,
86872cb5
TG
1254 &cfg->fc_dst, cfg->fc_dst_len,
1255 &cfg->fc_src, cfg->fc_src_len);
1da177e4
LT
1256
1257 if (fn) {
1258 for (rt = fn->leaf; rt; rt = rt->u.next) {
86872cb5 1259 if (cfg->fc_ifindex &&
1da177e4 1260 (rt->rt6i_dev == NULL ||
86872cb5 1261 rt->rt6i_dev->ifindex != cfg->fc_ifindex))
1da177e4 1262 continue;
86872cb5
TG
1263 if (cfg->fc_flags & RTF_GATEWAY &&
1264 !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
1da177e4 1265 continue;
86872cb5 1266 if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
1da177e4
LT
1267 continue;
1268 dst_hold(&rt->u.dst);
c71099ac 1269 read_unlock_bh(&table->tb6_lock);
1da177e4 1270
86872cb5 1271 return __ip6_del_rt(rt, &cfg->fc_nlinfo);
1da177e4
LT
1272 }
1273 }
c71099ac 1274 read_unlock_bh(&table->tb6_lock);
1da177e4
LT
1275
1276 return err;
1277}
1278
1279/*
1280 * Handle redirects
1281 */
a6279458
YH
1282struct ip6rd_flowi {
1283 struct flowi fl;
1284 struct in6_addr gateway;
1285};
1286
1287static struct rt6_info *__ip6_route_redirect(struct fib6_table *table,
1288 struct flowi *fl,
1289 int flags)
1da177e4 1290{
a6279458
YH
1291 struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl;
1292 struct rt6_info *rt;
e843b9e1 1293 struct fib6_node *fn;
c71099ac 1294
1da177e4 1295 /*
e843b9e1
YH
1296 * Get the "current" route for this destination and
1297 * check if the redirect has come from approriate router.
1298 *
1299 * RFC 2461 specifies that redirects should only be
1300 * accepted if they come from the nexthop to the target.
1301 * Due to the way the routes are chosen, this notion
1302 * is a bit fuzzy and one might need to check all possible
1303 * routes.
1da177e4 1304 */
1da177e4 1305
c71099ac 1306 read_lock_bh(&table->tb6_lock);
a6279458 1307 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
e843b9e1
YH
1308restart:
1309 for (rt = fn->leaf; rt; rt = rt->u.next) {
1310 /*
1311 * Current route is on-link; redirect is always invalid.
1312 *
1313 * Seems, previous statement is not true. It could
1314 * be node, which looks for us as on-link (f.e. proxy ndisc)
1315 * But then router serving it might decide, that we should
1316 * know truth 8)8) --ANK (980726).
1317 */
1318 if (rt6_check_expired(rt))
1319 continue;
1320 if (!(rt->rt6i_flags & RTF_GATEWAY))
1321 continue;
a6279458 1322 if (fl->oif != rt->rt6i_dev->ifindex)
e843b9e1 1323 continue;
a6279458 1324 if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
e843b9e1
YH
1325 continue;
1326 break;
1327 }
a6279458 1328
cb15d9c2 1329 if (!rt)
a6279458 1330 rt = &ip6_null_entry;
cb15d9c2
YH
1331 BACKTRACK(&fl->fl6_src);
1332out:
a6279458
YH
1333 dst_hold(&rt->u.dst);
1334
c71099ac 1335 read_unlock_bh(&table->tb6_lock);
e843b9e1 1336
a6279458
YH
1337 return rt;
1338};
1339
1340static struct rt6_info *ip6_route_redirect(struct in6_addr *dest,
1341 struct in6_addr *src,
1342 struct in6_addr *gateway,
1343 struct net_device *dev)
1344{
1345 struct ip6rd_flowi rdfl = {
1346 .fl = {
1347 .oif = dev->ifindex,
1348 .nl_u = {
1349 .ip6_u = {
1350 .daddr = *dest,
1351 .saddr = *src,
1352 },
1353 },
1354 },
1355 .gateway = *gateway,
1356 };
77d16f45 1357 int flags = rt6_need_strict(dest) ? RT6_LOOKUP_F_IFACE : 0;
a6279458
YH
1358
1359 return (struct rt6_info *)fib6_rule_lookup((struct flowi *)&rdfl, flags, __ip6_route_redirect);
1360}
1361
1362void rt6_redirect(struct in6_addr *dest, struct in6_addr *src,
1363 struct in6_addr *saddr,
1364 struct neighbour *neigh, u8 *lladdr, int on_link)
1365{
1366 struct rt6_info *rt, *nrt = NULL;
1367 struct netevent_redirect netevent;
1368
1369 rt = ip6_route_redirect(dest, src, saddr, neigh->dev);
1370
1371 if (rt == &ip6_null_entry) {
1da177e4
LT
1372 if (net_ratelimit())
1373 printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
1374 "for redirect target\n");
a6279458 1375 goto out;
1da177e4
LT
1376 }
1377
1da177e4
LT
1378 /*
1379 * We have finally decided to accept it.
1380 */
1381
1382 neigh_update(neigh, lladdr, NUD_STALE,
1383 NEIGH_UPDATE_F_WEAK_OVERRIDE|
1384 NEIGH_UPDATE_F_OVERRIDE|
1385 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1386 NEIGH_UPDATE_F_ISROUTER))
1387 );
1388
1389 /*
1390 * Redirect received -> path was valid.
1391 * Look, redirects are sent only in response to data packets,
1392 * so that this nexthop apparently is reachable. --ANK
1393 */
1394 dst_confirm(&rt->u.dst);
1395
1396 /* Duplicate redirect: silently ignore. */
1397 if (neigh == rt->u.dst.neighbour)
1398 goto out;
1399
1400 nrt = ip6_rt_copy(rt);
1401 if (nrt == NULL)
1402 goto out;
1403
1404 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1405 if (on_link)
1406 nrt->rt6i_flags &= ~RTF_GATEWAY;
1407
1408 ipv6_addr_copy(&nrt->rt6i_dst.addr, dest);
1409 nrt->rt6i_dst.plen = 128;
1410 nrt->u.dst.flags |= DST_HOST;
1411
1412 ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key);
1413 nrt->rt6i_nexthop = neigh_clone(neigh);
1414 /* Reset pmtu, it may be better */
1415 nrt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(neigh->dev);
1416 nrt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&nrt->u.dst));
1417
40e22e8f 1418 if (ip6_ins_rt(nrt))
1da177e4
LT
1419 goto out;
1420
8d71740c
TT
1421 netevent.old = &rt->u.dst;
1422 netevent.new = &nrt->u.dst;
1423 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1424
1da177e4 1425 if (rt->rt6i_flags&RTF_CACHE) {
e0a1ad73 1426 ip6_del_rt(rt);
1da177e4
LT
1427 return;
1428 }
1429
1430out:
1431 dst_release(&rt->u.dst);
1432 return;
1433}
1434
1435/*
1436 * Handle ICMP "packet too big" messages
1437 * i.e. Path MTU discovery
1438 */
1439
1440void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr,
1441 struct net_device *dev, u32 pmtu)
1442{
1443 struct rt6_info *rt, *nrt;
1444 int allfrag = 0;
1445
1446 rt = rt6_lookup(daddr, saddr, dev->ifindex, 0);
1447 if (rt == NULL)
1448 return;
1449
1450 if (pmtu >= dst_mtu(&rt->u.dst))
1451 goto out;
1452
1453 if (pmtu < IPV6_MIN_MTU) {
1454 /*
1455 * According to RFC2460, PMTU is set to the IPv6 Minimum Link
1456 * MTU (1280) and a fragment header should always be included
1457 * after a node receiving Too Big message reporting PMTU is
1458 * less than the IPv6 Minimum Link MTU.
1459 */
1460 pmtu = IPV6_MIN_MTU;
1461 allfrag = 1;
1462 }
1463
1464 /* New mtu received -> path was valid.
1465 They are sent only in response to data packets,
1466 so that this nexthop apparently is reachable. --ANK
1467 */
1468 dst_confirm(&rt->u.dst);
1469
1470 /* Host route. If it is static, it would be better
1471 not to override it, but add new one, so that
1472 when cache entry will expire old pmtu
1473 would return automatically.
1474 */
1475 if (rt->rt6i_flags & RTF_CACHE) {
1476 rt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1477 if (allfrag)
1478 rt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1479 dst_set_expires(&rt->u.dst, ip6_rt_mtu_expires);
1480 rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
1481 goto out;
1482 }
1483
1484 /* Network route.
1485 Two cases are possible:
1486 1. It is connected route. Action: COW
1487 2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1488 */
d5315b50 1489 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
a1e78363 1490 nrt = rt6_alloc_cow(rt, daddr, saddr);
d5315b50
YH
1491 else
1492 nrt = rt6_alloc_clone(rt, daddr);
a1e78363 1493
d5315b50 1494 if (nrt) {
a1e78363
YH
1495 nrt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1496 if (allfrag)
1497 nrt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1498
1499 /* According to RFC 1981, detecting PMTU increase shouldn't be
1500 * happened within 5 mins, the recommended timer is 10 mins.
1501 * Here this route expiration time is set to ip6_rt_mtu_expires
1502 * which is 10 mins. After 10 mins the decreased pmtu is expired
1503 * and detecting PMTU increase will be automatically happened.
1504 */
1505 dst_set_expires(&nrt->u.dst, ip6_rt_mtu_expires);
1506 nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
1507
40e22e8f 1508 ip6_ins_rt(nrt);
1da177e4 1509 }
1da177e4
LT
1510out:
1511 dst_release(&rt->u.dst);
1512}
1513
1514/*
1515 * Misc support functions
1516 */
1517
1518static struct rt6_info * ip6_rt_copy(struct rt6_info *ort)
1519{
1520 struct rt6_info *rt = ip6_dst_alloc();
1521
1522 if (rt) {
1523 rt->u.dst.input = ort->u.dst.input;
1524 rt->u.dst.output = ort->u.dst.output;
1525
1526 memcpy(rt->u.dst.metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32));
1527 rt->u.dst.dev = ort->u.dst.dev;
1528 if (rt->u.dst.dev)
1529 dev_hold(rt->u.dst.dev);
1530 rt->rt6i_idev = ort->rt6i_idev;
1531 if (rt->rt6i_idev)
1532 in6_dev_hold(rt->rt6i_idev);
1533 rt->u.dst.lastuse = jiffies;
1534 rt->rt6i_expires = 0;
1535
1536 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
1537 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
1538 rt->rt6i_metric = 0;
1539
1540 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1541#ifdef CONFIG_IPV6_SUBTREES
1542 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1543#endif
c71099ac 1544 rt->rt6i_table = ort->rt6i_table;
1da177e4
LT
1545 }
1546 return rt;
1547}
1548
70ceb4f5
YH
1549#ifdef CONFIG_IPV6_ROUTE_INFO
1550static struct rt6_info *rt6_get_route_info(struct in6_addr *prefix, int prefixlen,
1551 struct in6_addr *gwaddr, int ifindex)
1552{
1553 struct fib6_node *fn;
1554 struct rt6_info *rt = NULL;
c71099ac
TG
1555 struct fib6_table *table;
1556
1557 table = fib6_get_table(RT6_TABLE_INFO);
1558 if (table == NULL)
1559 return NULL;
70ceb4f5 1560
c71099ac
TG
1561 write_lock_bh(&table->tb6_lock);
1562 fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
70ceb4f5
YH
1563 if (!fn)
1564 goto out;
1565
1566 for (rt = fn->leaf; rt; rt = rt->u.next) {
1567 if (rt->rt6i_dev->ifindex != ifindex)
1568 continue;
1569 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1570 continue;
1571 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1572 continue;
1573 dst_hold(&rt->u.dst);
1574 break;
1575 }
1576out:
c71099ac 1577 write_unlock_bh(&table->tb6_lock);
70ceb4f5
YH
1578 return rt;
1579}
1580
1581static struct rt6_info *rt6_add_route_info(struct in6_addr *prefix, int prefixlen,
1582 struct in6_addr *gwaddr, int ifindex,
1583 unsigned pref)
1584{
86872cb5
TG
1585 struct fib6_config cfg = {
1586 .fc_table = RT6_TABLE_INFO,
1587 .fc_metric = 1024,
1588 .fc_ifindex = ifindex,
1589 .fc_dst_len = prefixlen,
1590 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
1591 RTF_UP | RTF_PREF(pref),
1592 };
1593
1594 ipv6_addr_copy(&cfg.fc_dst, prefix);
1595 ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
70ceb4f5 1596
e317da96
YH
1597 /* We should treat it as a default route if prefix length is 0. */
1598 if (!prefixlen)
86872cb5 1599 cfg.fc_flags |= RTF_DEFAULT;
70ceb4f5 1600
86872cb5 1601 ip6_route_add(&cfg);
70ceb4f5
YH
1602
1603 return rt6_get_route_info(prefix, prefixlen, gwaddr, ifindex);
1604}
1605#endif
1606
1da177e4
LT
1607struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *dev)
1608{
1609 struct rt6_info *rt;
c71099ac 1610 struct fib6_table *table;
1da177e4 1611
c71099ac
TG
1612 table = fib6_get_table(RT6_TABLE_DFLT);
1613 if (table == NULL)
1614 return NULL;
1da177e4 1615
c71099ac
TG
1616 write_lock_bh(&table->tb6_lock);
1617 for (rt = table->tb6_root.leaf; rt; rt=rt->u.next) {
1da177e4 1618 if (dev == rt->rt6i_dev &&
045927ff 1619 ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
1da177e4
LT
1620 ipv6_addr_equal(&rt->rt6i_gateway, addr))
1621 break;
1622 }
1623 if (rt)
1624 dst_hold(&rt->u.dst);
c71099ac 1625 write_unlock_bh(&table->tb6_lock);
1da177e4
LT
1626 return rt;
1627}
1628
1629struct rt6_info *rt6_add_dflt_router(struct in6_addr *gwaddr,
ebacaaa0
YH
1630 struct net_device *dev,
1631 unsigned int pref)
1da177e4 1632{
86872cb5
TG
1633 struct fib6_config cfg = {
1634 .fc_table = RT6_TABLE_DFLT,
1635 .fc_metric = 1024,
1636 .fc_ifindex = dev->ifindex,
1637 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
1638 RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
1639 };
1da177e4 1640
86872cb5 1641 ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
1da177e4 1642
86872cb5 1643 ip6_route_add(&cfg);
1da177e4 1644
1da177e4
LT
1645 return rt6_get_dflt_router(gwaddr, dev);
1646}
1647
1648void rt6_purge_dflt_routers(void)
1649{
1650 struct rt6_info *rt;
c71099ac
TG
1651 struct fib6_table *table;
1652
1653 /* NOTE: Keep consistent with rt6_get_dflt_router */
1654 table = fib6_get_table(RT6_TABLE_DFLT);
1655 if (table == NULL)
1656 return;
1da177e4
LT
1657
1658restart:
c71099ac
TG
1659 read_lock_bh(&table->tb6_lock);
1660 for (rt = table->tb6_root.leaf; rt; rt = rt->u.next) {
1da177e4
LT
1661 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
1662 dst_hold(&rt->u.dst);
c71099ac 1663 read_unlock_bh(&table->tb6_lock);
e0a1ad73 1664 ip6_del_rt(rt);
1da177e4
LT
1665 goto restart;
1666 }
1667 }
c71099ac 1668 read_unlock_bh(&table->tb6_lock);
1da177e4
LT
1669}
1670
86872cb5
TG
1671static void rtmsg_to_fib6_config(struct in6_rtmsg *rtmsg,
1672 struct fib6_config *cfg)
1673{
1674 memset(cfg, 0, sizeof(*cfg));
1675
1676 cfg->fc_table = RT6_TABLE_MAIN;
1677 cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
1678 cfg->fc_metric = rtmsg->rtmsg_metric;
1679 cfg->fc_expires = rtmsg->rtmsg_info;
1680 cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
1681 cfg->fc_src_len = rtmsg->rtmsg_src_len;
1682 cfg->fc_flags = rtmsg->rtmsg_flags;
1683
1684 ipv6_addr_copy(&cfg->fc_dst, &rtmsg->rtmsg_dst);
1685 ipv6_addr_copy(&cfg->fc_src, &rtmsg->rtmsg_src);
1686 ipv6_addr_copy(&cfg->fc_gateway, &rtmsg->rtmsg_gateway);
1687}
1688
1da177e4
LT
1689int ipv6_route_ioctl(unsigned int cmd, void __user *arg)
1690{
86872cb5 1691 struct fib6_config cfg;
1da177e4
LT
1692 struct in6_rtmsg rtmsg;
1693 int err;
1694
1695 switch(cmd) {
1696 case SIOCADDRT: /* Add a route */
1697 case SIOCDELRT: /* Delete a route */
1698 if (!capable(CAP_NET_ADMIN))
1699 return -EPERM;
1700 err = copy_from_user(&rtmsg, arg,
1701 sizeof(struct in6_rtmsg));
1702 if (err)
1703 return -EFAULT;
86872cb5
TG
1704
1705 rtmsg_to_fib6_config(&rtmsg, &cfg);
1706
1da177e4
LT
1707 rtnl_lock();
1708 switch (cmd) {
1709 case SIOCADDRT:
86872cb5 1710 err = ip6_route_add(&cfg);
1da177e4
LT
1711 break;
1712 case SIOCDELRT:
86872cb5 1713 err = ip6_route_del(&cfg);
1da177e4
LT
1714 break;
1715 default:
1716 err = -EINVAL;
1717 }
1718 rtnl_unlock();
1719
1720 return err;
1721 };
1722
1723 return -EINVAL;
1724}
1725
1726/*
1727 * Drop the packet on the floor
1728 */
1729
20380731 1730static int ip6_pkt_discard(struct sk_buff *skb)
1da177e4 1731{
76d0cc1b
LL
1732 int type = ipv6_addr_type(&skb->nh.ipv6h->daddr);
1733 if (type == IPV6_ADDR_ANY || type == IPV6_ADDR_RESERVED)
1734 IP6_INC_STATS(IPSTATS_MIB_INADDRERRORS);
1735
1da177e4
LT
1736 IP6_INC_STATS(IPSTATS_MIB_OUTNOROUTES);
1737 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_NOROUTE, 0, skb->dev);
1738 kfree_skb(skb);
1739 return 0;
1740}
1741
20380731 1742static int ip6_pkt_discard_out(struct sk_buff *skb)
1da177e4
LT
1743{
1744 skb->dev = skb->dst->dev;
1745 return ip6_pkt_discard(skb);
1746}
1747
1748/*
1749 * Allocate a dst for local (unicast / anycast) address.
1750 */
1751
1752struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
1753 const struct in6_addr *addr,
1754 int anycast)
1755{
1756 struct rt6_info *rt = ip6_dst_alloc();
1757
1758 if (rt == NULL)
1759 return ERR_PTR(-ENOMEM);
1760
1761 dev_hold(&loopback_dev);
1762 in6_dev_hold(idev);
1763
1764 rt->u.dst.flags = DST_HOST;
1765 rt->u.dst.input = ip6_input;
1766 rt->u.dst.output = ip6_output;
1767 rt->rt6i_dev = &loopback_dev;
1768 rt->rt6i_idev = idev;
1769 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
1770 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
1771 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
1772 rt->u.dst.obsolete = -1;
1773
1774 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
58c4fb86
YH
1775 if (anycast)
1776 rt->rt6i_flags |= RTF_ANYCAST;
1777 else
1da177e4
LT
1778 rt->rt6i_flags |= RTF_LOCAL;
1779 rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
1780 if (rt->rt6i_nexthop == NULL) {
1781 dst_free((struct dst_entry *) rt);
1782 return ERR_PTR(-ENOMEM);
1783 }
1784
1785 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
1786 rt->rt6i_dst.plen = 128;
c71099ac 1787 rt->rt6i_table = fib6_get_table(RT6_TABLE_LOCAL);
1da177e4
LT
1788
1789 atomic_set(&rt->u.dst.__refcnt, 1);
1790
1791 return rt;
1792}
1793
1794static int fib6_ifdown(struct rt6_info *rt, void *arg)
1795{
1796 if (((void*)rt->rt6i_dev == arg || arg == NULL) &&
1797 rt != &ip6_null_entry) {
1798 RT6_TRACE("deleted by ifdown %p\n", rt);
1799 return -1;
1800 }
1801 return 0;
1802}
1803
1804void rt6_ifdown(struct net_device *dev)
1805{
c71099ac 1806 fib6_clean_all(fib6_ifdown, 0, dev);
1da177e4
LT
1807}
1808
1809struct rt6_mtu_change_arg
1810{
1811 struct net_device *dev;
1812 unsigned mtu;
1813};
1814
1815static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
1816{
1817 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
1818 struct inet6_dev *idev;
1819
1820 /* In IPv6 pmtu discovery is not optional,
1821 so that RTAX_MTU lock cannot disable it.
1822 We still use this lock to block changes
1823 caused by addrconf/ndisc.
1824 */
1825
1826 idev = __in6_dev_get(arg->dev);
1827 if (idev == NULL)
1828 return 0;
1829
1830 /* For administrative MTU increase, there is no way to discover
1831 IPv6 PMTU increase, so PMTU increase should be updated here.
1832 Since RFC 1981 doesn't include administrative MTU increase
1833 update PMTU increase is a MUST. (i.e. jumbo frame)
1834 */
1835 /*
1836 If new MTU is less than route PMTU, this new MTU will be the
1837 lowest MTU in the path, update the route PMTU to reflect PMTU
1838 decreases; if new MTU is greater than route PMTU, and the
1839 old MTU is the lowest MTU in the path, update the route PMTU
1840 to reflect the increase. In this case if the other nodes' MTU
1841 also have the lowest MTU, TOO BIG MESSAGE will be lead to
1842 PMTU discouvery.
1843 */
1844 if (rt->rt6i_dev == arg->dev &&
1845 !dst_metric_locked(&rt->u.dst, RTAX_MTU) &&
1846 (dst_mtu(&rt->u.dst) > arg->mtu ||
1847 (dst_mtu(&rt->u.dst) < arg->mtu &&
1848 dst_mtu(&rt->u.dst) == idev->cnf.mtu6)))
1849 rt->u.dst.metrics[RTAX_MTU-1] = arg->mtu;
1850 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(arg->mtu);
1851 return 0;
1852}
1853
1854void rt6_mtu_change(struct net_device *dev, unsigned mtu)
1855{
c71099ac
TG
1856 struct rt6_mtu_change_arg arg = {
1857 .dev = dev,
1858 .mtu = mtu,
1859 };
1da177e4 1860
c71099ac 1861 fib6_clean_all(rt6_mtu_change_route, 0, &arg);
1da177e4
LT
1862}
1863
86872cb5
TG
1864static struct nla_policy rtm_ipv6_policy[RTA_MAX+1] __read_mostly = {
1865 [RTA_GATEWAY] = { .minlen = sizeof(struct in6_addr) },
1866 [RTA_OIF] = { .type = NLA_U32 },
ab364a6f 1867 [RTA_IIF] = { .type = NLA_U32 },
86872cb5
TG
1868 [RTA_PRIORITY] = { .type = NLA_U32 },
1869 [RTA_METRICS] = { .type = NLA_NESTED },
1870};
1871
1872static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
1873 struct fib6_config *cfg)
1da177e4 1874{
86872cb5
TG
1875 struct rtmsg *rtm;
1876 struct nlattr *tb[RTA_MAX+1];
1877 int err;
1da177e4 1878
86872cb5
TG
1879 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
1880 if (err < 0)
1881 goto errout;
1da177e4 1882
86872cb5
TG
1883 err = -EINVAL;
1884 rtm = nlmsg_data(nlh);
1885 memset(cfg, 0, sizeof(*cfg));
1886
1887 cfg->fc_table = rtm->rtm_table;
1888 cfg->fc_dst_len = rtm->rtm_dst_len;
1889 cfg->fc_src_len = rtm->rtm_src_len;
1890 cfg->fc_flags = RTF_UP;
1891 cfg->fc_protocol = rtm->rtm_protocol;
1892
1893 if (rtm->rtm_type == RTN_UNREACHABLE)
1894 cfg->fc_flags |= RTF_REJECT;
1895
1896 cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
1897 cfg->fc_nlinfo.nlh = nlh;
1898
1899 if (tb[RTA_GATEWAY]) {
1900 nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
1901 cfg->fc_flags |= RTF_GATEWAY;
1da177e4 1902 }
86872cb5
TG
1903
1904 if (tb[RTA_DST]) {
1905 int plen = (rtm->rtm_dst_len + 7) >> 3;
1906
1907 if (nla_len(tb[RTA_DST]) < plen)
1908 goto errout;
1909
1910 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
1da177e4 1911 }
86872cb5
TG
1912
1913 if (tb[RTA_SRC]) {
1914 int plen = (rtm->rtm_src_len + 7) >> 3;
1915
1916 if (nla_len(tb[RTA_SRC]) < plen)
1917 goto errout;
1918
1919 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
1da177e4 1920 }
86872cb5
TG
1921
1922 if (tb[RTA_OIF])
1923 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
1924
1925 if (tb[RTA_PRIORITY])
1926 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
1927
1928 if (tb[RTA_METRICS]) {
1929 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
1930 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
1da177e4 1931 }
86872cb5
TG
1932
1933 if (tb[RTA_TABLE])
1934 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
1935
1936 err = 0;
1937errout:
1938 return err;
1da177e4
LT
1939}
1940
1941int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1942{
86872cb5
TG
1943 struct fib6_config cfg;
1944 int err;
1da177e4 1945
86872cb5
TG
1946 err = rtm_to_fib6_config(skb, nlh, &cfg);
1947 if (err < 0)
1948 return err;
1949
1950 return ip6_route_del(&cfg);
1da177e4
LT
1951}
1952
1953int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1954{
86872cb5
TG
1955 struct fib6_config cfg;
1956 int err;
1da177e4 1957
86872cb5
TG
1958 err = rtm_to_fib6_config(skb, nlh, &cfg);
1959 if (err < 0)
1960 return err;
1961
1962 return ip6_route_add(&cfg);
1da177e4
LT
1963}
1964
1da177e4 1965static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt,
0d51aa80
JHS
1966 struct in6_addr *dst, struct in6_addr *src,
1967 int iif, int type, u32 pid, u32 seq,
1968 int prefix, unsigned int flags)
1da177e4
LT
1969{
1970 struct rtmsg *rtm;
2d7202bf 1971 struct nlmsghdr *nlh;
1da177e4 1972 struct rta_cacheinfo ci;
9e762a4a 1973 u32 table;
1da177e4
LT
1974
1975 if (prefix) { /* user wants prefix routes only */
1976 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
1977 /* success since this is not a prefix route */
1978 return 1;
1979 }
1980 }
1981
2d7202bf
TG
1982 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*rtm), flags);
1983 if (nlh == NULL)
1984 return -ENOBUFS;
1985
1986 rtm = nlmsg_data(nlh);
1da177e4
LT
1987 rtm->rtm_family = AF_INET6;
1988 rtm->rtm_dst_len = rt->rt6i_dst.plen;
1989 rtm->rtm_src_len = rt->rt6i_src.plen;
1990 rtm->rtm_tos = 0;
c71099ac 1991 if (rt->rt6i_table)
9e762a4a 1992 table = rt->rt6i_table->tb6_id;
c71099ac 1993 else
9e762a4a
PM
1994 table = RT6_TABLE_UNSPEC;
1995 rtm->rtm_table = table;
2d7202bf 1996 NLA_PUT_U32(skb, RTA_TABLE, table);
1da177e4
LT
1997 if (rt->rt6i_flags&RTF_REJECT)
1998 rtm->rtm_type = RTN_UNREACHABLE;
1999 else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK))
2000 rtm->rtm_type = RTN_LOCAL;
2001 else
2002 rtm->rtm_type = RTN_UNICAST;
2003 rtm->rtm_flags = 0;
2004 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2005 rtm->rtm_protocol = rt->rt6i_protocol;
2006 if (rt->rt6i_flags&RTF_DYNAMIC)
2007 rtm->rtm_protocol = RTPROT_REDIRECT;
2008 else if (rt->rt6i_flags & RTF_ADDRCONF)
2009 rtm->rtm_protocol = RTPROT_KERNEL;
2010 else if (rt->rt6i_flags&RTF_DEFAULT)
2011 rtm->rtm_protocol = RTPROT_RA;
2012
2013 if (rt->rt6i_flags&RTF_CACHE)
2014 rtm->rtm_flags |= RTM_F_CLONED;
2015
2016 if (dst) {
2d7202bf 2017 NLA_PUT(skb, RTA_DST, 16, dst);
1da177e4
LT
2018 rtm->rtm_dst_len = 128;
2019 } else if (rtm->rtm_dst_len)
2d7202bf 2020 NLA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr);
1da177e4
LT
2021#ifdef CONFIG_IPV6_SUBTREES
2022 if (src) {
2d7202bf 2023 NLA_PUT(skb, RTA_SRC, 16, src);
1da177e4
LT
2024 rtm->rtm_src_len = 128;
2025 } else if (rtm->rtm_src_len)
2d7202bf 2026 NLA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr);
1da177e4
LT
2027#endif
2028 if (iif)
2d7202bf 2029 NLA_PUT_U32(skb, RTA_IIF, iif);
1da177e4
LT
2030 else if (dst) {
2031 struct in6_addr saddr_buf;
2032 if (ipv6_get_saddr(&rt->u.dst, dst, &saddr_buf) == 0)
2d7202bf 2033 NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
1da177e4 2034 }
2d7202bf 2035
1da177e4 2036 if (rtnetlink_put_metrics(skb, rt->u.dst.metrics) < 0)
2d7202bf
TG
2037 goto nla_put_failure;
2038
1da177e4 2039 if (rt->u.dst.neighbour)
2d7202bf
TG
2040 NLA_PUT(skb, RTA_GATEWAY, 16, &rt->u.dst.neighbour->primary_key);
2041
1da177e4 2042 if (rt->u.dst.dev)
2d7202bf
TG
2043 NLA_PUT_U32(skb, RTA_OIF, rt->rt6i_dev->ifindex);
2044
2045 NLA_PUT_U32(skb, RTA_PRIORITY, rt->rt6i_metric);
1da177e4
LT
2046 ci.rta_lastuse = jiffies_to_clock_t(jiffies - rt->u.dst.lastuse);
2047 if (rt->rt6i_expires)
2048 ci.rta_expires = jiffies_to_clock_t(rt->rt6i_expires - jiffies);
2049 else
2050 ci.rta_expires = 0;
2051 ci.rta_used = rt->u.dst.__use;
2052 ci.rta_clntref = atomic_read(&rt->u.dst.__refcnt);
2053 ci.rta_error = rt->u.dst.error;
2054 ci.rta_id = 0;
2055 ci.rta_ts = 0;
2056 ci.rta_tsage = 0;
2d7202bf
TG
2057 NLA_PUT(skb, RTA_CACHEINFO, sizeof(ci), &ci);
2058
2059 return nlmsg_end(skb, nlh);
2060
2061nla_put_failure:
2062 return nlmsg_cancel(skb, nlh);
1da177e4
LT
2063}
2064
1b43af54 2065int rt6_dump_route(struct rt6_info *rt, void *p_arg)
1da177e4
LT
2066{
2067 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
2068 int prefix;
2069
2d7202bf
TG
2070 if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
2071 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
1da177e4
LT
2072 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
2073 } else
2074 prefix = 0;
2075
2076 return rt6_fill_node(arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
2077 NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
0d51aa80 2078 prefix, NLM_F_MULTI);
1da177e4
LT
2079}
2080
1da177e4
LT
2081int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
2082{
ab364a6f
TG
2083 struct nlattr *tb[RTA_MAX+1];
2084 struct rt6_info *rt;
1da177e4 2085 struct sk_buff *skb;
ab364a6f 2086 struct rtmsg *rtm;
1da177e4 2087 struct flowi fl;
ab364a6f 2088 int err, iif = 0;
1da177e4 2089
ab364a6f
TG
2090 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2091 if (err < 0)
2092 goto errout;
1da177e4 2093
ab364a6f 2094 err = -EINVAL;
1da177e4 2095 memset(&fl, 0, sizeof(fl));
1da177e4 2096
ab364a6f
TG
2097 if (tb[RTA_SRC]) {
2098 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2099 goto errout;
2100
2101 ipv6_addr_copy(&fl.fl6_src, nla_data(tb[RTA_SRC]));
2102 }
2103
2104 if (tb[RTA_DST]) {
2105 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2106 goto errout;
2107
2108 ipv6_addr_copy(&fl.fl6_dst, nla_data(tb[RTA_DST]));
2109 }
2110
2111 if (tb[RTA_IIF])
2112 iif = nla_get_u32(tb[RTA_IIF]);
2113
2114 if (tb[RTA_OIF])
2115 fl.oif = nla_get_u32(tb[RTA_OIF]);
1da177e4
LT
2116
2117 if (iif) {
2118 struct net_device *dev;
2119 dev = __dev_get_by_index(iif);
2120 if (!dev) {
2121 err = -ENODEV;
ab364a6f 2122 goto errout;
1da177e4
LT
2123 }
2124 }
2125
ab364a6f
TG
2126 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
2127 if (skb == NULL) {
2128 err = -ENOBUFS;
2129 goto errout;
2130 }
1da177e4 2131
ab364a6f
TG
2132 /* Reserve room for dummy headers, this skb can pass
2133 through good chunk of routing engine.
2134 */
2135 skb->mac.raw = skb->data;
2136 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
1da177e4 2137
ab364a6f 2138 rt = (struct rt6_info*) ip6_route_output(NULL, &fl);
1da177e4
LT
2139 skb->dst = &rt->u.dst;
2140
ab364a6f 2141 err = rt6_fill_node(skb, rt, &fl.fl6_dst, &fl.fl6_src, iif,
1da177e4 2142 RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
0d51aa80 2143 nlh->nlmsg_seq, 0, 0);
1da177e4 2144 if (err < 0) {
ab364a6f
TG
2145 kfree_skb(skb);
2146 goto errout;
1da177e4
LT
2147 }
2148
2942e900 2149 err = rtnl_unicast(skb, NETLINK_CB(in_skb).pid);
ab364a6f 2150errout:
1da177e4 2151 return err;
1da177e4
LT
2152}
2153
86872cb5 2154void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
1da177e4
LT
2155{
2156 struct sk_buff *skb;
86872cb5
TG
2157 u32 pid = 0, seq = 0;
2158 struct nlmsghdr *nlh = NULL;
21713ebc
TG
2159 int payload = sizeof(struct rtmsg) + 256;
2160 int err = -ENOBUFS;
2161
86872cb5
TG
2162 if (info) {
2163 pid = info->pid;
2164 nlh = info->nlh;
2165 if (nlh)
2166 seq = nlh->nlmsg_seq;
2167 }
2168
21713ebc
TG
2169 skb = nlmsg_new(nlmsg_total_size(payload), gfp_any());
2170 if (skb == NULL)
2171 goto errout;
2172
2173 err = rt6_fill_node(skb, rt, NULL, NULL, 0, event, pid, seq, 0, 0);
2174 if (err < 0) {
1da177e4 2175 kfree_skb(skb);
21713ebc 2176 goto errout;
1da177e4 2177 }
21713ebc
TG
2178
2179 err = rtnl_notify(skb, pid, RTNLGRP_IPV6_ROUTE, nlh, gfp_any());
2180errout:
2181 if (err < 0)
2182 rtnl_set_sk_err(RTNLGRP_IPV6_ROUTE, err);
1da177e4
LT
2183}
2184
2185/*
2186 * /proc
2187 */
2188
2189#ifdef CONFIG_PROC_FS
2190
2191#define RT6_INFO_LEN (32 + 4 + 32 + 4 + 32 + 40 + 5 + 1)
2192
2193struct rt6_proc_arg
2194{
2195 char *buffer;
2196 int offset;
2197 int length;
2198 int skip;
2199 int len;
2200};
2201
2202static int rt6_info_route(struct rt6_info *rt, void *p_arg)
2203{
2204 struct rt6_proc_arg *arg = (struct rt6_proc_arg *) p_arg;
2205 int i;
2206
2207 if (arg->skip < arg->offset / RT6_INFO_LEN) {
2208 arg->skip++;
2209 return 0;
2210 }
2211
2212 if (arg->len >= arg->length)
2213 return 0;
2214
2215 for (i=0; i<16; i++) {
2216 sprintf(arg->buffer + arg->len, "%02x",
2217 rt->rt6i_dst.addr.s6_addr[i]);
2218 arg->len += 2;
2219 }
2220 arg->len += sprintf(arg->buffer + arg->len, " %02x ",
2221 rt->rt6i_dst.plen);
2222
2223#ifdef CONFIG_IPV6_SUBTREES
2224 for (i=0; i<16; i++) {
2225 sprintf(arg->buffer + arg->len, "%02x",
2226 rt->rt6i_src.addr.s6_addr[i]);
2227 arg->len += 2;
2228 }
2229 arg->len += sprintf(arg->buffer + arg->len, " %02x ",
2230 rt->rt6i_src.plen);
2231#else
2232 sprintf(arg->buffer + arg->len,
2233 "00000000000000000000000000000000 00 ");
2234 arg->len += 36;
2235#endif
2236
2237 if (rt->rt6i_nexthop) {
2238 for (i=0; i<16; i++) {
2239 sprintf(arg->buffer + arg->len, "%02x",
2240 rt->rt6i_nexthop->primary_key[i]);
2241 arg->len += 2;
2242 }
2243 } else {
2244 sprintf(arg->buffer + arg->len,
2245 "00000000000000000000000000000000");
2246 arg->len += 32;
2247 }
2248 arg->len += sprintf(arg->buffer + arg->len,
2249 " %08x %08x %08x %08x %8s\n",
2250 rt->rt6i_metric, atomic_read(&rt->u.dst.__refcnt),
2251 rt->u.dst.__use, rt->rt6i_flags,
2252 rt->rt6i_dev ? rt->rt6i_dev->name : "");
2253 return 0;
2254}
2255
2256static int rt6_proc_info(char *buffer, char **start, off_t offset, int length)
2257{
c71099ac
TG
2258 struct rt6_proc_arg arg = {
2259 .buffer = buffer,
2260 .offset = offset,
2261 .length = length,
2262 };
1da177e4 2263
c71099ac 2264 fib6_clean_all(rt6_info_route, 0, &arg);
1da177e4
LT
2265
2266 *start = buffer;
2267 if (offset)
2268 *start += offset % RT6_INFO_LEN;
2269
2270 arg.len -= offset % RT6_INFO_LEN;
2271
2272 if (arg.len > length)
2273 arg.len = length;
2274 if (arg.len < 0)
2275 arg.len = 0;
2276
2277 return arg.len;
2278}
2279
1da177e4
LT
2280static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2281{
2282 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
2283 rt6_stats.fib_nodes, rt6_stats.fib_route_nodes,
2284 rt6_stats.fib_rt_alloc, rt6_stats.fib_rt_entries,
2285 rt6_stats.fib_rt_cache,
2286 atomic_read(&ip6_dst_ops.entries),
2287 rt6_stats.fib_discarded_routes);
2288
2289 return 0;
2290}
2291
2292static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2293{
2294 return single_open(file, rt6_stats_seq_show, NULL);
2295}
2296
2297static struct file_operations rt6_stats_seq_fops = {
2298 .owner = THIS_MODULE,
2299 .open = rt6_stats_seq_open,
2300 .read = seq_read,
2301 .llseek = seq_lseek,
2302 .release = single_release,
2303};
2304#endif /* CONFIG_PROC_FS */
2305
2306#ifdef CONFIG_SYSCTL
2307
2308static int flush_delay;
2309
2310static
2311int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write, struct file * filp,
2312 void __user *buffer, size_t *lenp, loff_t *ppos)
2313{
2314 if (write) {
2315 proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
2316 fib6_run_gc(flush_delay <= 0 ? ~0UL : (unsigned long)flush_delay);
2317 return 0;
2318 } else
2319 return -EINVAL;
2320}
2321
2322ctl_table ipv6_route_table[] = {
2323 {
2324 .ctl_name = NET_IPV6_ROUTE_FLUSH,
2325 .procname = "flush",
2326 .data = &flush_delay,
2327 .maxlen = sizeof(int),
89c8b3a1 2328 .mode = 0200,
1da177e4
LT
2329 .proc_handler = &ipv6_sysctl_rtcache_flush
2330 },
2331 {
2332 .ctl_name = NET_IPV6_ROUTE_GC_THRESH,
2333 .procname = "gc_thresh",
2334 .data = &ip6_dst_ops.gc_thresh,
2335 .maxlen = sizeof(int),
2336 .mode = 0644,
2337 .proc_handler = &proc_dointvec,
2338 },
2339 {
2340 .ctl_name = NET_IPV6_ROUTE_MAX_SIZE,
2341 .procname = "max_size",
2342 .data = &ip6_rt_max_size,
2343 .maxlen = sizeof(int),
2344 .mode = 0644,
2345 .proc_handler = &proc_dointvec,
2346 },
2347 {
2348 .ctl_name = NET_IPV6_ROUTE_GC_MIN_INTERVAL,
2349 .procname = "gc_min_interval",
2350 .data = &ip6_rt_gc_min_interval,
2351 .maxlen = sizeof(int),
2352 .mode = 0644,
2353 .proc_handler = &proc_dointvec_jiffies,
2354 .strategy = &sysctl_jiffies,
2355 },
2356 {
2357 .ctl_name = NET_IPV6_ROUTE_GC_TIMEOUT,
2358 .procname = "gc_timeout",
2359 .data = &ip6_rt_gc_timeout,
2360 .maxlen = sizeof(int),
2361 .mode = 0644,
2362 .proc_handler = &proc_dointvec_jiffies,
2363 .strategy = &sysctl_jiffies,
2364 },
2365 {
2366 .ctl_name = NET_IPV6_ROUTE_GC_INTERVAL,
2367 .procname = "gc_interval",
2368 .data = &ip6_rt_gc_interval,
2369 .maxlen = sizeof(int),
2370 .mode = 0644,
2371 .proc_handler = &proc_dointvec_jiffies,
2372 .strategy = &sysctl_jiffies,
2373 },
2374 {
2375 .ctl_name = NET_IPV6_ROUTE_GC_ELASTICITY,
2376 .procname = "gc_elasticity",
2377 .data = &ip6_rt_gc_elasticity,
2378 .maxlen = sizeof(int),
2379 .mode = 0644,
2380 .proc_handler = &proc_dointvec_jiffies,
2381 .strategy = &sysctl_jiffies,
2382 },
2383 {
2384 .ctl_name = NET_IPV6_ROUTE_MTU_EXPIRES,
2385 .procname = "mtu_expires",
2386 .data = &ip6_rt_mtu_expires,
2387 .maxlen = sizeof(int),
2388 .mode = 0644,
2389 .proc_handler = &proc_dointvec_jiffies,
2390 .strategy = &sysctl_jiffies,
2391 },
2392 {
2393 .ctl_name = NET_IPV6_ROUTE_MIN_ADVMSS,
2394 .procname = "min_adv_mss",
2395 .data = &ip6_rt_min_advmss,
2396 .maxlen = sizeof(int),
2397 .mode = 0644,
2398 .proc_handler = &proc_dointvec_jiffies,
2399 .strategy = &sysctl_jiffies,
2400 },
2401 {
2402 .ctl_name = NET_IPV6_ROUTE_GC_MIN_INTERVAL_MS,
2403 .procname = "gc_min_interval_ms",
2404 .data = &ip6_rt_gc_min_interval,
2405 .maxlen = sizeof(int),
2406 .mode = 0644,
2407 .proc_handler = &proc_dointvec_ms_jiffies,
2408 .strategy = &sysctl_ms_jiffies,
2409 },
2410 { .ctl_name = 0 }
2411};
2412
2413#endif
2414
2415void __init ip6_route_init(void)
2416{
2417 struct proc_dir_entry *p;
2418
2419 ip6_dst_ops.kmem_cachep = kmem_cache_create("ip6_dst_cache",
2420 sizeof(struct rt6_info),
2421 0, SLAB_HWCACHE_ALIGN,
2422 NULL, NULL);
2423 if (!ip6_dst_ops.kmem_cachep)
2424 panic("cannot create ip6_dst_cache");
2425
2426 fib6_init();
2427#ifdef CONFIG_PROC_FS
2428 p = proc_net_create("ipv6_route", 0, rt6_proc_info);
2429 if (p)
2430 p->owner = THIS_MODULE;
2431
2432 proc_net_fops_create("rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2433#endif
2434#ifdef CONFIG_XFRM
2435 xfrm6_init();
2436#endif
101367c2
TG
2437#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2438 fib6_rules_init();
2439#endif
1da177e4
LT
2440}
2441
2442void ip6_route_cleanup(void)
2443{
101367c2
TG
2444#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2445 fib6_rules_cleanup();
2446#endif
1da177e4
LT
2447#ifdef CONFIG_PROC_FS
2448 proc_net_remove("ipv6_route");
2449 proc_net_remove("rt6_stats");
2450#endif
2451#ifdef CONFIG_XFRM
2452 xfrm6_fini();
2453#endif
2454 rt6_ifdown(NULL);
2455 fib6_gc_cleanup();
2456 kmem_cache_destroy(ip6_dst_ops.kmem_cachep);
2457}