]> bbs.cooldavid.org Git - net-next-2.6.git/blame - net/ipv6/route.c
[NETLINK]: remove third bogus argument from NLA_PUT_FLAG
[net-next-2.6.git] / net / ipv6 / route.c
CommitLineData
1da177e4
LT
1/*
2 * Linux INET6 implementation
3 * FIB front-end.
4 *
5 * Authors:
6 * Pedro Roque <roque@di.fc.ul.pt>
7 *
8 * $Id: route.c,v 1.56 2001/10/31 21:55:55 davem Exp $
9 *
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License
12 * as published by the Free Software Foundation; either version
13 * 2 of the License, or (at your option) any later version.
14 */
15
16/* Changes:
17 *
18 * YOSHIFUJI Hideaki @USAGI
19 * reworked default router selection.
20 * - respect outgoing interface
21 * - select from (probably) reachable routers (i.e.
22 * routers in REACHABLE, STALE, DELAY or PROBE states).
23 * - always select the same router if it is (probably)
24 * reachable. otherwise, round-robin the list.
c0bece9f
YH
25 * Ville Nuorvala
26 * Fixed routing subtrees.
1da177e4
LT
27 */
28
4fc268d2 29#include <linux/capability.h>
1da177e4
LT
30#include <linux/errno.h>
31#include <linux/types.h>
32#include <linux/times.h>
33#include <linux/socket.h>
34#include <linux/sockios.h>
35#include <linux/net.h>
36#include <linux/route.h>
37#include <linux/netdevice.h>
38#include <linux/in6.h>
39#include <linux/init.h>
1da177e4
LT
40#include <linux/if_arp.h>
41
42#ifdef CONFIG_PROC_FS
43#include <linux/proc_fs.h>
44#include <linux/seq_file.h>
45#endif
46
47#include <net/snmp.h>
48#include <net/ipv6.h>
49#include <net/ip6_fib.h>
50#include <net/ip6_route.h>
51#include <net/ndisc.h>
52#include <net/addrconf.h>
53#include <net/tcp.h>
54#include <linux/rtnetlink.h>
55#include <net/dst.h>
56#include <net/xfrm.h>
8d71740c 57#include <net/netevent.h>
21713ebc 58#include <net/netlink.h>
1da177e4
LT
59
60#include <asm/uaccess.h>
61
62#ifdef CONFIG_SYSCTL
63#include <linux/sysctl.h>
64#endif
65
66/* Set to 3 to get tracing. */
67#define RT6_DEBUG 2
68
69#if RT6_DEBUG >= 3
70#define RDBG(x) printk x
71#define RT6_TRACE(x...) printk(KERN_DEBUG x)
72#else
73#define RDBG(x)
74#define RT6_TRACE(x...) do { ; } while (0)
75#endif
76
519fbd87 77#define CLONE_OFFLINK_ROUTE 0
1da177e4
LT
78
79static int ip6_rt_max_size = 4096;
80static int ip6_rt_gc_min_interval = HZ / 2;
81static int ip6_rt_gc_timeout = 60*HZ;
82int ip6_rt_gc_interval = 30*HZ;
83static int ip6_rt_gc_elasticity = 9;
84static int ip6_rt_mtu_expires = 10*60*HZ;
85static int ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
86
87static struct rt6_info * ip6_rt_copy(struct rt6_info *ort);
88static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
89static struct dst_entry *ip6_negative_advice(struct dst_entry *);
90static void ip6_dst_destroy(struct dst_entry *);
91static void ip6_dst_ifdown(struct dst_entry *,
92 struct net_device *dev, int how);
93static int ip6_dst_gc(void);
94
95static int ip6_pkt_discard(struct sk_buff *skb);
96static int ip6_pkt_discard_out(struct sk_buff *skb);
97static void ip6_link_failure(struct sk_buff *skb);
98static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
99
70ceb4f5
YH
100#ifdef CONFIG_IPV6_ROUTE_INFO
101static struct rt6_info *rt6_add_route_info(struct in6_addr *prefix, int prefixlen,
102 struct in6_addr *gwaddr, int ifindex,
103 unsigned pref);
104static struct rt6_info *rt6_get_route_info(struct in6_addr *prefix, int prefixlen,
105 struct in6_addr *gwaddr, int ifindex);
106#endif
107
1da177e4
LT
108static struct dst_ops ip6_dst_ops = {
109 .family = AF_INET6,
110 .protocol = __constant_htons(ETH_P_IPV6),
111 .gc = ip6_dst_gc,
112 .gc_thresh = 1024,
113 .check = ip6_dst_check,
114 .destroy = ip6_dst_destroy,
115 .ifdown = ip6_dst_ifdown,
116 .negative_advice = ip6_negative_advice,
117 .link_failure = ip6_link_failure,
118 .update_pmtu = ip6_rt_update_pmtu,
119 .entry_size = sizeof(struct rt6_info),
120};
121
122struct rt6_info ip6_null_entry = {
123 .u = {
124 .dst = {
125 .__refcnt = ATOMIC_INIT(1),
126 .__use = 1,
127 .dev = &loopback_dev,
128 .obsolete = -1,
129 .error = -ENETUNREACH,
130 .metrics = { [RTAX_HOPLIMIT - 1] = 255, },
131 .input = ip6_pkt_discard,
132 .output = ip6_pkt_discard_out,
133 .ops = &ip6_dst_ops,
134 .path = (struct dst_entry*)&ip6_null_entry,
135 }
136 },
137 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
138 .rt6i_metric = ~(u32) 0,
139 .rt6i_ref = ATOMIC_INIT(1),
140};
141
101367c2
TG
142#ifdef CONFIG_IPV6_MULTIPLE_TABLES
143
144struct rt6_info ip6_prohibit_entry = {
145 .u = {
146 .dst = {
147 .__refcnt = ATOMIC_INIT(1),
148 .__use = 1,
149 .dev = &loopback_dev,
150 .obsolete = -1,
151 .error = -EACCES,
152 .metrics = { [RTAX_HOPLIMIT - 1] = 255, },
153 .input = ip6_pkt_discard,
154 .output = ip6_pkt_discard_out,
155 .ops = &ip6_dst_ops,
156 .path = (struct dst_entry*)&ip6_prohibit_entry,
157 }
158 },
159 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
160 .rt6i_metric = ~(u32) 0,
161 .rt6i_ref = ATOMIC_INIT(1),
162};
163
164struct rt6_info ip6_blk_hole_entry = {
165 .u = {
166 .dst = {
167 .__refcnt = ATOMIC_INIT(1),
168 .__use = 1,
169 .dev = &loopback_dev,
170 .obsolete = -1,
171 .error = -EINVAL,
172 .metrics = { [RTAX_HOPLIMIT - 1] = 255, },
173 .input = ip6_pkt_discard,
174 .output = ip6_pkt_discard_out,
175 .ops = &ip6_dst_ops,
176 .path = (struct dst_entry*)&ip6_blk_hole_entry,
177 }
178 },
179 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
180 .rt6i_metric = ~(u32) 0,
181 .rt6i_ref = ATOMIC_INIT(1),
182};
183
184#endif
185
1da177e4
LT
186/* allocate dst with ip6_dst_ops */
187static __inline__ struct rt6_info *ip6_dst_alloc(void)
188{
189 return (struct rt6_info *)dst_alloc(&ip6_dst_ops);
190}
191
192static void ip6_dst_destroy(struct dst_entry *dst)
193{
194 struct rt6_info *rt = (struct rt6_info *)dst;
195 struct inet6_dev *idev = rt->rt6i_idev;
196
197 if (idev != NULL) {
198 rt->rt6i_idev = NULL;
199 in6_dev_put(idev);
200 }
201}
202
203static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
204 int how)
205{
206 struct rt6_info *rt = (struct rt6_info *)dst;
207 struct inet6_dev *idev = rt->rt6i_idev;
208
209 if (dev != &loopback_dev && idev != NULL && idev->dev == dev) {
210 struct inet6_dev *loopback_idev = in6_dev_get(&loopback_dev);
211 if (loopback_idev != NULL) {
212 rt->rt6i_idev = loopback_idev;
213 in6_dev_put(idev);
214 }
215 }
216}
217
218static __inline__ int rt6_check_expired(const struct rt6_info *rt)
219{
220 return (rt->rt6i_flags & RTF_EXPIRES &&
221 time_after(jiffies, rt->rt6i_expires));
222}
223
c71099ac
TG
224static inline int rt6_need_strict(struct in6_addr *daddr)
225{
226 return (ipv6_addr_type(daddr) &
227 (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL));
228}
229
1da177e4 230/*
c71099ac 231 * Route lookup. Any table->tb6_lock is implied.
1da177e4
LT
232 */
233
234static __inline__ struct rt6_info *rt6_device_match(struct rt6_info *rt,
235 int oif,
236 int strict)
237{
238 struct rt6_info *local = NULL;
239 struct rt6_info *sprt;
240
241 if (oif) {
242 for (sprt = rt; sprt; sprt = sprt->u.next) {
243 struct net_device *dev = sprt->rt6i_dev;
244 if (dev->ifindex == oif)
245 return sprt;
246 if (dev->flags & IFF_LOOPBACK) {
247 if (sprt->rt6i_idev == NULL ||
248 sprt->rt6i_idev->dev->ifindex != oif) {
249 if (strict && oif)
250 continue;
251 if (local && (!oif ||
252 local->rt6i_idev->dev->ifindex == oif))
253 continue;
254 }
255 local = sprt;
256 }
257 }
258
259 if (local)
260 return local;
261
262 if (strict)
263 return &ip6_null_entry;
264 }
265 return rt;
266}
267
27097255
YH
268#ifdef CONFIG_IPV6_ROUTER_PREF
269static void rt6_probe(struct rt6_info *rt)
270{
271 struct neighbour *neigh = rt ? rt->rt6i_nexthop : NULL;
272 /*
273 * Okay, this does not seem to be appropriate
274 * for now, however, we need to check if it
275 * is really so; aka Router Reachability Probing.
276 *
277 * Router Reachability Probe MUST be rate-limited
278 * to no more than one per minute.
279 */
280 if (!neigh || (neigh->nud_state & NUD_VALID))
281 return;
282 read_lock_bh(&neigh->lock);
283 if (!(neigh->nud_state & NUD_VALID) &&
52e16356 284 time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
27097255
YH
285 struct in6_addr mcaddr;
286 struct in6_addr *target;
287
288 neigh->updated = jiffies;
289 read_unlock_bh(&neigh->lock);
290
291 target = (struct in6_addr *)&neigh->primary_key;
292 addrconf_addr_solict_mult(target, &mcaddr);
293 ndisc_send_ns(rt->rt6i_dev, NULL, target, &mcaddr, NULL);
294 } else
295 read_unlock_bh(&neigh->lock);
296}
297#else
298static inline void rt6_probe(struct rt6_info *rt)
299{
300 return;
301}
302#endif
303
1da177e4 304/*
554cfb7e 305 * Default Router Selection (RFC 2461 6.3.6)
1da177e4 306 */
554cfb7e
YH
307static int inline rt6_check_dev(struct rt6_info *rt, int oif)
308{
309 struct net_device *dev = rt->rt6i_dev;
310 if (!oif || dev->ifindex == oif)
311 return 2;
312 if ((dev->flags & IFF_LOOPBACK) &&
313 rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
314 return 1;
315 return 0;
316}
1da177e4 317
554cfb7e 318static int inline rt6_check_neigh(struct rt6_info *rt)
1da177e4 319{
554cfb7e
YH
320 struct neighbour *neigh = rt->rt6i_nexthop;
321 int m = 0;
4d0c5911
YH
322 if (rt->rt6i_flags & RTF_NONEXTHOP ||
323 !(rt->rt6i_flags & RTF_GATEWAY))
324 m = 1;
325 else if (neigh) {
554cfb7e
YH
326 read_lock_bh(&neigh->lock);
327 if (neigh->nud_state & NUD_VALID)
4d0c5911 328 m = 2;
554cfb7e 329 read_unlock_bh(&neigh->lock);
1da177e4 330 }
554cfb7e 331 return m;
1da177e4
LT
332}
333
554cfb7e
YH
334static int rt6_score_route(struct rt6_info *rt, int oif,
335 int strict)
1da177e4 336{
4d0c5911
YH
337 int m, n;
338
339 m = rt6_check_dev(rt, oif);
77d16f45 340 if (!m && (strict & RT6_LOOKUP_F_IFACE))
554cfb7e 341 return -1;
ebacaaa0
YH
342#ifdef CONFIG_IPV6_ROUTER_PREF
343 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
344#endif
4d0c5911
YH
345 n = rt6_check_neigh(rt);
346 if (n > 1)
ebacaaa0 347 m |= 16;
77d16f45 348 else if (!n && strict & RT6_LOOKUP_F_REACHABLE)
554cfb7e
YH
349 return -1;
350 return m;
351}
352
353static struct rt6_info *rt6_select(struct rt6_info **head, int oif,
354 int strict)
355{
356 struct rt6_info *match = NULL, *last = NULL;
357 struct rt6_info *rt, *rt0 = *head;
358 u32 metric;
359 int mpri = -1;
1da177e4 360
554cfb7e
YH
361 RT6_TRACE("%s(head=%p(*head=%p), oif=%d)\n",
362 __FUNCTION__, head, head ? *head : NULL, oif);
1da177e4 363
554cfb7e 364 for (rt = rt0, metric = rt0->rt6i_metric;
c302e6d5 365 rt && rt->rt6i_metric == metric && (!last || rt != rt0);
554cfb7e
YH
366 rt = rt->u.next) {
367 int m;
1da177e4 368
554cfb7e 369 if (rt6_check_expired(rt))
1da177e4
LT
370 continue;
371
554cfb7e
YH
372 last = rt;
373
374 m = rt6_score_route(rt, oif, strict);
375 if (m < 0)
1da177e4 376 continue;
1da177e4 377
554cfb7e 378 if (m > mpri) {
27097255 379 rt6_probe(match);
554cfb7e 380 match = rt;
1da177e4 381 mpri = m;
27097255
YH
382 } else {
383 rt6_probe(rt);
1da177e4
LT
384 }
385 }
386
554cfb7e 387 if (!match &&
77d16f45 388 (strict & RT6_LOOKUP_F_REACHABLE) &&
554cfb7e
YH
389 last && last != rt0) {
390 /* no entries matched; do round-robin */
34af946a 391 static DEFINE_SPINLOCK(lock);
c302e6d5 392 spin_lock(&lock);
554cfb7e
YH
393 *head = rt0->u.next;
394 rt0->u.next = last->u.next;
395 last->u.next = rt0;
c302e6d5 396 spin_unlock(&lock);
1da177e4 397 }
1da177e4 398
554cfb7e
YH
399 RT6_TRACE("%s() => %p, score=%d\n",
400 __FUNCTION__, match, mpri);
1da177e4 401
554cfb7e 402 return (match ? match : &ip6_null_entry);
1da177e4
LT
403}
404
70ceb4f5
YH
405#ifdef CONFIG_IPV6_ROUTE_INFO
406int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
407 struct in6_addr *gwaddr)
408{
409 struct route_info *rinfo = (struct route_info *) opt;
410 struct in6_addr prefix_buf, *prefix;
411 unsigned int pref;
412 u32 lifetime;
413 struct rt6_info *rt;
414
415 if (len < sizeof(struct route_info)) {
416 return -EINVAL;
417 }
418
419 /* Sanity check for prefix_len and length */
420 if (rinfo->length > 3) {
421 return -EINVAL;
422 } else if (rinfo->prefix_len > 128) {
423 return -EINVAL;
424 } else if (rinfo->prefix_len > 64) {
425 if (rinfo->length < 2) {
426 return -EINVAL;
427 }
428 } else if (rinfo->prefix_len > 0) {
429 if (rinfo->length < 1) {
430 return -EINVAL;
431 }
432 }
433
434 pref = rinfo->route_pref;
435 if (pref == ICMPV6_ROUTER_PREF_INVALID)
436 pref = ICMPV6_ROUTER_PREF_MEDIUM;
437
438 lifetime = htonl(rinfo->lifetime);
439 if (lifetime == 0xffffffff) {
440 /* infinity */
441 } else if (lifetime > 0x7fffffff/HZ) {
442 /* Avoid arithmetic overflow */
443 lifetime = 0x7fffffff/HZ - 1;
444 }
445
446 if (rinfo->length == 3)
447 prefix = (struct in6_addr *)rinfo->prefix;
448 else {
449 /* this function is safe */
450 ipv6_addr_prefix(&prefix_buf,
451 (struct in6_addr *)rinfo->prefix,
452 rinfo->prefix_len);
453 prefix = &prefix_buf;
454 }
455
456 rt = rt6_get_route_info(prefix, rinfo->prefix_len, gwaddr, dev->ifindex);
457
458 if (rt && !lifetime) {
e0a1ad73 459 ip6_del_rt(rt);
70ceb4f5
YH
460 rt = NULL;
461 }
462
463 if (!rt && lifetime)
464 rt = rt6_add_route_info(prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
465 pref);
466 else if (rt)
467 rt->rt6i_flags = RTF_ROUTEINFO |
468 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
469
470 if (rt) {
471 if (lifetime == 0xffffffff) {
472 rt->rt6i_flags &= ~RTF_EXPIRES;
473 } else {
474 rt->rt6i_expires = jiffies + HZ * lifetime;
475 rt->rt6i_flags |= RTF_EXPIRES;
476 }
477 dst_release(&rt->u.dst);
478 }
479 return 0;
480}
481#endif
482
982f56f3
YH
483#define BACKTRACK(saddr) \
484do { \
485 if (rt == &ip6_null_entry) { \
486 struct fib6_node *pn; \
487 while (fn) { \
488 if (fn->fn_flags & RTN_TL_ROOT) \
489 goto out; \
490 pn = fn->parent; \
491 if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
492 fn = fib6_lookup(pn->subtree, NULL, saddr); \
493 else \
494 fn = pn; \
495 if (fn->fn_flags & RTN_RTINFO) \
496 goto restart; \
c71099ac 497 } \
c71099ac 498 } \
982f56f3 499} while(0)
c71099ac
TG
500
501static struct rt6_info *ip6_pol_route_lookup(struct fib6_table *table,
502 struct flowi *fl, int flags)
1da177e4
LT
503{
504 struct fib6_node *fn;
505 struct rt6_info *rt;
506
c71099ac
TG
507 read_lock_bh(&table->tb6_lock);
508 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
509restart:
510 rt = fn->leaf;
77d16f45 511 rt = rt6_device_match(rt, fl->oif, flags);
982f56f3 512 BACKTRACK(&fl->fl6_src);
1da177e4 513 dst_hold(&rt->u.dst);
c71099ac
TG
514out:
515 read_unlock_bh(&table->tb6_lock);
1da177e4
LT
516
517 rt->u.dst.lastuse = jiffies;
c71099ac
TG
518 rt->u.dst.__use++;
519
520 return rt;
521
522}
523
524struct rt6_info *rt6_lookup(struct in6_addr *daddr, struct in6_addr *saddr,
525 int oif, int strict)
526{
527 struct flowi fl = {
528 .oif = oif,
529 .nl_u = {
530 .ip6_u = {
531 .daddr = *daddr,
532 /* TODO: saddr */
533 },
534 },
535 };
536 struct dst_entry *dst;
77d16f45 537 int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
c71099ac
TG
538
539 dst = fib6_rule_lookup(&fl, flags, ip6_pol_route_lookup);
540 if (dst->error == 0)
541 return (struct rt6_info *) dst;
542
543 dst_release(dst);
544
1da177e4
LT
545 return NULL;
546}
547
c71099ac 548/* ip6_ins_rt is called with FREE table->tb6_lock.
1da177e4
LT
549 It takes new route entry, the addition fails by any reason the
550 route is freed. In any case, if caller does not hold it, it may
551 be destroyed.
552 */
553
86872cb5 554static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
1da177e4
LT
555{
556 int err;
c71099ac 557 struct fib6_table *table;
1da177e4 558
c71099ac
TG
559 table = rt->rt6i_table;
560 write_lock_bh(&table->tb6_lock);
86872cb5 561 err = fib6_add(&table->tb6_root, rt, info);
c71099ac 562 write_unlock_bh(&table->tb6_lock);
1da177e4
LT
563
564 return err;
565}
566
40e22e8f
TG
567int ip6_ins_rt(struct rt6_info *rt)
568{
86872cb5 569 return __ip6_ins_rt(rt, NULL);
40e22e8f
TG
570}
571
95a9a5ba
YH
572static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, struct in6_addr *daddr,
573 struct in6_addr *saddr)
1da177e4 574{
1da177e4
LT
575 struct rt6_info *rt;
576
577 /*
578 * Clone the route.
579 */
580
581 rt = ip6_rt_copy(ort);
582
583 if (rt) {
58c4fb86
YH
584 if (!(rt->rt6i_flags&RTF_GATEWAY)) {
585 if (rt->rt6i_dst.plen != 128 &&
586 ipv6_addr_equal(&rt->rt6i_dst.addr, daddr))
587 rt->rt6i_flags |= RTF_ANYCAST;
1da177e4 588 ipv6_addr_copy(&rt->rt6i_gateway, daddr);
58c4fb86 589 }
1da177e4 590
58c4fb86 591 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
1da177e4
LT
592 rt->rt6i_dst.plen = 128;
593 rt->rt6i_flags |= RTF_CACHE;
594 rt->u.dst.flags |= DST_HOST;
595
596#ifdef CONFIG_IPV6_SUBTREES
597 if (rt->rt6i_src.plen && saddr) {
598 ipv6_addr_copy(&rt->rt6i_src.addr, saddr);
599 rt->rt6i_src.plen = 128;
600 }
601#endif
602
603 rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
604
95a9a5ba 605 }
1da177e4 606
95a9a5ba
YH
607 return rt;
608}
1da177e4 609
299d9939
YH
610static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort, struct in6_addr *daddr)
611{
612 struct rt6_info *rt = ip6_rt_copy(ort);
613 if (rt) {
614 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
615 rt->rt6i_dst.plen = 128;
616 rt->rt6i_flags |= RTF_CACHE;
617 if (rt->rt6i_flags & RTF_REJECT)
618 rt->u.dst.error = ort->u.dst.error;
619 rt->u.dst.flags |= DST_HOST;
620 rt->rt6i_nexthop = neigh_clone(ort->rt6i_nexthop);
621 }
622 return rt;
623}
624
8ce11e6a
AB
625static struct rt6_info *ip6_pol_route_input(struct fib6_table *table,
626 struct flowi *fl, int flags)
1da177e4
LT
627{
628 struct fib6_node *fn;
519fbd87 629 struct rt6_info *rt, *nrt;
c71099ac 630 int strict = 0;
1da177e4 631 int attempts = 3;
519fbd87 632 int err;
77d16f45 633 int reachable = RT6_LOOKUP_F_REACHABLE;
1da177e4 634
77d16f45 635 strict |= flags & RT6_LOOKUP_F_IFACE;
1da177e4
LT
636
637relookup:
c71099ac 638 read_lock_bh(&table->tb6_lock);
1da177e4 639
8238dd06 640restart_2:
c71099ac 641 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
1da177e4
LT
642
643restart:
c71099ac 644 rt = rt6_select(&fn->leaf, fl->iif, strict | reachable);
982f56f3 645 BACKTRACK(&fl->fl6_src);
8238dd06
YH
646 if (rt == &ip6_null_entry ||
647 rt->rt6i_flags & RTF_CACHE)
1ddef044 648 goto out;
1da177e4 649
fb9de91e 650 dst_hold(&rt->u.dst);
c71099ac 651 read_unlock_bh(&table->tb6_lock);
fb9de91e 652
519fbd87 653 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
c71099ac 654 nrt = rt6_alloc_cow(rt, &fl->fl6_dst, &fl->fl6_src);
519fbd87
YH
655 else {
656#if CLONE_OFFLINK_ROUTE
c71099ac 657 nrt = rt6_alloc_clone(rt, &fl->fl6_dst);
519fbd87
YH
658#else
659 goto out2;
660#endif
661 }
e40cf353 662
519fbd87
YH
663 dst_release(&rt->u.dst);
664 rt = nrt ? : &ip6_null_entry;
1da177e4 665
519fbd87
YH
666 dst_hold(&rt->u.dst);
667 if (nrt) {
40e22e8f 668 err = ip6_ins_rt(nrt);
519fbd87 669 if (!err)
1da177e4 670 goto out2;
1da177e4 671 }
1da177e4 672
519fbd87
YH
673 if (--attempts <= 0)
674 goto out2;
675
676 /*
c71099ac 677 * Race condition! In the gap, when table->tb6_lock was
519fbd87
YH
678 * released someone could insert this route. Relookup.
679 */
680 dst_release(&rt->u.dst);
681 goto relookup;
682
683out:
8238dd06
YH
684 if (reachable) {
685 reachable = 0;
686 goto restart_2;
687 }
519fbd87 688 dst_hold(&rt->u.dst);
c71099ac 689 read_unlock_bh(&table->tb6_lock);
1da177e4
LT
690out2:
691 rt->u.dst.lastuse = jiffies;
692 rt->u.dst.__use++;
c71099ac
TG
693
694 return rt;
1da177e4
LT
695}
696
c71099ac
TG
697void ip6_route_input(struct sk_buff *skb)
698{
699 struct ipv6hdr *iph = skb->nh.ipv6h;
700 struct flowi fl = {
701 .iif = skb->dev->ifindex,
702 .nl_u = {
703 .ip6_u = {
704 .daddr = iph->daddr,
705 .saddr = iph->saddr,
267935b1 706#ifdef CONFIG_IPV6_ROUTE_FWMARK
75bff8f0 707 .fwmark = skb->nfmark,
267935b1 708#endif
c71099ac
TG
709 .flowlabel = (* (u32 *) iph)&IPV6_FLOWINFO_MASK,
710 },
711 },
712 .proto = iph->nexthdr,
713 };
77d16f45 714 int flags = rt6_need_strict(&iph->daddr) ? RT6_LOOKUP_F_IFACE : 0;
c71099ac
TG
715
716 skb->dst = fib6_rule_lookup(&fl, flags, ip6_pol_route_input);
717}
718
719static struct rt6_info *ip6_pol_route_output(struct fib6_table *table,
720 struct flowi *fl, int flags)
1da177e4
LT
721{
722 struct fib6_node *fn;
519fbd87 723 struct rt6_info *rt, *nrt;
c71099ac 724 int strict = 0;
1da177e4 725 int attempts = 3;
519fbd87 726 int err;
77d16f45 727 int reachable = RT6_LOOKUP_F_REACHABLE;
1da177e4 728
77d16f45 729 strict |= flags & RT6_LOOKUP_F_IFACE;
1da177e4
LT
730
731relookup:
c71099ac 732 read_lock_bh(&table->tb6_lock);
1da177e4 733
8238dd06 734restart_2:
c71099ac 735 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
1da177e4
LT
736
737restart:
8238dd06 738 rt = rt6_select(&fn->leaf, fl->oif, strict | reachable);
982f56f3 739 BACKTRACK(&fl->fl6_src);
8238dd06
YH
740 if (rt == &ip6_null_entry ||
741 rt->rt6i_flags & RTF_CACHE)
1da177e4 742 goto out;
1da177e4 743
fb9de91e 744 dst_hold(&rt->u.dst);
c71099ac 745 read_unlock_bh(&table->tb6_lock);
fb9de91e 746
519fbd87 747 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
e40cf353 748 nrt = rt6_alloc_cow(rt, &fl->fl6_dst, &fl->fl6_src);
519fbd87
YH
749 else {
750#if CLONE_OFFLINK_ROUTE
751 nrt = rt6_alloc_clone(rt, &fl->fl6_dst);
752#else
753 goto out2;
754#endif
755 }
1da177e4 756
519fbd87
YH
757 dst_release(&rt->u.dst);
758 rt = nrt ? : &ip6_null_entry;
1da177e4 759
519fbd87
YH
760 dst_hold(&rt->u.dst);
761 if (nrt) {
40e22e8f 762 err = ip6_ins_rt(nrt);
519fbd87 763 if (!err)
1da177e4 764 goto out2;
1da177e4 765 }
e40cf353 766
519fbd87
YH
767 if (--attempts <= 0)
768 goto out2;
769
770 /*
c71099ac 771 * Race condition! In the gap, when table->tb6_lock was
519fbd87
YH
772 * released someone could insert this route. Relookup.
773 */
774 dst_release(&rt->u.dst);
775 goto relookup;
776
777out:
8238dd06
YH
778 if (reachable) {
779 reachable = 0;
780 goto restart_2;
781 }
519fbd87 782 dst_hold(&rt->u.dst);
c71099ac 783 read_unlock_bh(&table->tb6_lock);
1da177e4
LT
784out2:
785 rt->u.dst.lastuse = jiffies;
786 rt->u.dst.__use++;
c71099ac
TG
787 return rt;
788}
789
790struct dst_entry * ip6_route_output(struct sock *sk, struct flowi *fl)
791{
792 int flags = 0;
793
794 if (rt6_need_strict(&fl->fl6_dst))
77d16f45 795 flags |= RT6_LOOKUP_F_IFACE;
c71099ac
TG
796
797 return fib6_rule_lookup(fl, flags, ip6_pol_route_output);
1da177e4
LT
798}
799
800
801/*
802 * Destination cache support functions
803 */
804
805static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
806{
807 struct rt6_info *rt;
808
809 rt = (struct rt6_info *) dst;
810
811 if (rt && rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie))
812 return dst;
813
814 return NULL;
815}
816
817static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
818{
819 struct rt6_info *rt = (struct rt6_info *) dst;
820
821 if (rt) {
822 if (rt->rt6i_flags & RTF_CACHE)
e0a1ad73 823 ip6_del_rt(rt);
1da177e4
LT
824 else
825 dst_release(dst);
826 }
827 return NULL;
828}
829
830static void ip6_link_failure(struct sk_buff *skb)
831{
832 struct rt6_info *rt;
833
834 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0, skb->dev);
835
836 rt = (struct rt6_info *) skb->dst;
837 if (rt) {
838 if (rt->rt6i_flags&RTF_CACHE) {
839 dst_set_expires(&rt->u.dst, 0);
840 rt->rt6i_flags |= RTF_EXPIRES;
841 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
842 rt->rt6i_node->fn_sernum = -1;
843 }
844}
845
846static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
847{
848 struct rt6_info *rt6 = (struct rt6_info*)dst;
849
850 if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
851 rt6->rt6i_flags |= RTF_MODIFIED;
852 if (mtu < IPV6_MIN_MTU) {
853 mtu = IPV6_MIN_MTU;
854 dst->metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
855 }
856 dst->metrics[RTAX_MTU-1] = mtu;
8d71740c 857 call_netevent_notifiers(NETEVENT_PMTU_UPDATE, dst);
1da177e4
LT
858 }
859}
860
1da177e4
LT
861static int ipv6_get_mtu(struct net_device *dev);
862
863static inline unsigned int ipv6_advmss(unsigned int mtu)
864{
865 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
866
867 if (mtu < ip6_rt_min_advmss)
868 mtu = ip6_rt_min_advmss;
869
870 /*
871 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
872 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
873 * IPV6_MAXPLEN is also valid and means: "any MSS,
874 * rely only on pmtu discovery"
875 */
876 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
877 mtu = IPV6_MAXPLEN;
878 return mtu;
879}
880
5d0bbeeb 881static struct dst_entry *ndisc_dst_gc_list;
8ce11e6a 882static DEFINE_SPINLOCK(ndisc_lock);
5d0bbeeb 883
1da177e4
LT
884struct dst_entry *ndisc_dst_alloc(struct net_device *dev,
885 struct neighbour *neigh,
886 struct in6_addr *addr,
887 int (*output)(struct sk_buff *))
888{
889 struct rt6_info *rt;
890 struct inet6_dev *idev = in6_dev_get(dev);
891
892 if (unlikely(idev == NULL))
893 return NULL;
894
895 rt = ip6_dst_alloc();
896 if (unlikely(rt == NULL)) {
897 in6_dev_put(idev);
898 goto out;
899 }
900
901 dev_hold(dev);
902 if (neigh)
903 neigh_hold(neigh);
904 else
905 neigh = ndisc_get_neigh(dev, addr);
906
907 rt->rt6i_dev = dev;
908 rt->rt6i_idev = idev;
909 rt->rt6i_nexthop = neigh;
910 atomic_set(&rt->u.dst.__refcnt, 1);
911 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = 255;
912 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
913 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
914 rt->u.dst.output = output;
915
916#if 0 /* there's no chance to use these for ndisc */
917 rt->u.dst.flags = ipv6_addr_type(addr) & IPV6_ADDR_UNICAST
918 ? DST_HOST
919 : 0;
920 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
921 rt->rt6i_dst.plen = 128;
922#endif
923
5d0bbeeb 924 spin_lock_bh(&ndisc_lock);
1da177e4
LT
925 rt->u.dst.next = ndisc_dst_gc_list;
926 ndisc_dst_gc_list = &rt->u.dst;
5d0bbeeb 927 spin_unlock_bh(&ndisc_lock);
1da177e4
LT
928
929 fib6_force_start_gc();
930
931out:
932 return (struct dst_entry *)rt;
933}
934
935int ndisc_dst_gc(int *more)
936{
937 struct dst_entry *dst, *next, **pprev;
938 int freed;
939
940 next = NULL;
5d0bbeeb
TG
941 freed = 0;
942
943 spin_lock_bh(&ndisc_lock);
1da177e4 944 pprev = &ndisc_dst_gc_list;
5d0bbeeb 945
1da177e4
LT
946 while ((dst = *pprev) != NULL) {
947 if (!atomic_read(&dst->__refcnt)) {
948 *pprev = dst->next;
949 dst_free(dst);
950 freed++;
951 } else {
952 pprev = &dst->next;
953 (*more)++;
954 }
955 }
956
5d0bbeeb
TG
957 spin_unlock_bh(&ndisc_lock);
958
1da177e4
LT
959 return freed;
960}
961
962static int ip6_dst_gc(void)
963{
964 static unsigned expire = 30*HZ;
965 static unsigned long last_gc;
966 unsigned long now = jiffies;
967
968 if (time_after(last_gc + ip6_rt_gc_min_interval, now) &&
969 atomic_read(&ip6_dst_ops.entries) <= ip6_rt_max_size)
970 goto out;
971
972 expire++;
973 fib6_run_gc(expire);
974 last_gc = now;
975 if (atomic_read(&ip6_dst_ops.entries) < ip6_dst_ops.gc_thresh)
976 expire = ip6_rt_gc_timeout>>1;
977
978out:
979 expire -= expire>>ip6_rt_gc_elasticity;
980 return (atomic_read(&ip6_dst_ops.entries) > ip6_rt_max_size);
981}
982
983/* Clean host part of a prefix. Not necessary in radix tree,
984 but results in cleaner routing tables.
985
986 Remove it only when all the things will work!
987 */
988
989static int ipv6_get_mtu(struct net_device *dev)
990{
991 int mtu = IPV6_MIN_MTU;
992 struct inet6_dev *idev;
993
994 idev = in6_dev_get(dev);
995 if (idev) {
996 mtu = idev->cnf.mtu6;
997 in6_dev_put(idev);
998 }
999 return mtu;
1000}
1001
1002int ipv6_get_hoplimit(struct net_device *dev)
1003{
1004 int hoplimit = ipv6_devconf.hop_limit;
1005 struct inet6_dev *idev;
1006
1007 idev = in6_dev_get(dev);
1008 if (idev) {
1009 hoplimit = idev->cnf.hop_limit;
1010 in6_dev_put(idev);
1011 }
1012 return hoplimit;
1013}
1014
1015/*
1016 *
1017 */
1018
86872cb5 1019int ip6_route_add(struct fib6_config *cfg)
1da177e4
LT
1020{
1021 int err;
1da177e4
LT
1022 struct rt6_info *rt = NULL;
1023 struct net_device *dev = NULL;
1024 struct inet6_dev *idev = NULL;
c71099ac 1025 struct fib6_table *table;
1da177e4
LT
1026 int addr_type;
1027
86872cb5 1028 if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1da177e4
LT
1029 return -EINVAL;
1030#ifndef CONFIG_IPV6_SUBTREES
86872cb5 1031 if (cfg->fc_src_len)
1da177e4
LT
1032 return -EINVAL;
1033#endif
86872cb5 1034 if (cfg->fc_ifindex) {
1da177e4 1035 err = -ENODEV;
86872cb5 1036 dev = dev_get_by_index(cfg->fc_ifindex);
1da177e4
LT
1037 if (!dev)
1038 goto out;
1039 idev = in6_dev_get(dev);
1040 if (!idev)
1041 goto out;
1042 }
1043
86872cb5
TG
1044 if (cfg->fc_metric == 0)
1045 cfg->fc_metric = IP6_RT_PRIO_USER;
1da177e4 1046
86872cb5 1047 table = fib6_new_table(cfg->fc_table);
c71099ac
TG
1048 if (table == NULL) {
1049 err = -ENOBUFS;
1050 goto out;
1051 }
1052
1da177e4
LT
1053 rt = ip6_dst_alloc();
1054
1055 if (rt == NULL) {
1056 err = -ENOMEM;
1057 goto out;
1058 }
1059
1060 rt->u.dst.obsolete = -1;
86872cb5 1061 rt->rt6i_expires = jiffies + clock_t_to_jiffies(cfg->fc_expires);
1da177e4 1062
86872cb5
TG
1063 if (cfg->fc_protocol == RTPROT_UNSPEC)
1064 cfg->fc_protocol = RTPROT_BOOT;
1065 rt->rt6i_protocol = cfg->fc_protocol;
1066
1067 addr_type = ipv6_addr_type(&cfg->fc_dst);
1da177e4
LT
1068
1069 if (addr_type & IPV6_ADDR_MULTICAST)
1070 rt->u.dst.input = ip6_mc_input;
1071 else
1072 rt->u.dst.input = ip6_forward;
1073
1074 rt->u.dst.output = ip6_output;
1075
86872cb5
TG
1076 ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1077 rt->rt6i_dst.plen = cfg->fc_dst_len;
1da177e4
LT
1078 if (rt->rt6i_dst.plen == 128)
1079 rt->u.dst.flags = DST_HOST;
1080
1081#ifdef CONFIG_IPV6_SUBTREES
86872cb5
TG
1082 ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1083 rt->rt6i_src.plen = cfg->fc_src_len;
1da177e4
LT
1084#endif
1085
86872cb5 1086 rt->rt6i_metric = cfg->fc_metric;
1da177e4
LT
1087
1088 /* We cannot add true routes via loopback here,
1089 they would result in kernel looping; promote them to reject routes
1090 */
86872cb5 1091 if ((cfg->fc_flags & RTF_REJECT) ||
1da177e4
LT
1092 (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK))) {
1093 /* hold loopback dev/idev if we haven't done so. */
1094 if (dev != &loopback_dev) {
1095 if (dev) {
1096 dev_put(dev);
1097 in6_dev_put(idev);
1098 }
1099 dev = &loopback_dev;
1100 dev_hold(dev);
1101 idev = in6_dev_get(dev);
1102 if (!idev) {
1103 err = -ENODEV;
1104 goto out;
1105 }
1106 }
1107 rt->u.dst.output = ip6_pkt_discard_out;
1108 rt->u.dst.input = ip6_pkt_discard;
1109 rt->u.dst.error = -ENETUNREACH;
1110 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1111 goto install_route;
1112 }
1113
86872cb5 1114 if (cfg->fc_flags & RTF_GATEWAY) {
1da177e4
LT
1115 struct in6_addr *gw_addr;
1116 int gwa_type;
1117
86872cb5
TG
1118 gw_addr = &cfg->fc_gateway;
1119 ipv6_addr_copy(&rt->rt6i_gateway, gw_addr);
1da177e4
LT
1120 gwa_type = ipv6_addr_type(gw_addr);
1121
1122 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1123 struct rt6_info *grt;
1124
1125 /* IPv6 strictly inhibits using not link-local
1126 addresses as nexthop address.
1127 Otherwise, router will not able to send redirects.
1128 It is very good, but in some (rare!) circumstances
1129 (SIT, PtP, NBMA NOARP links) it is handy to allow
1130 some exceptions. --ANK
1131 */
1132 err = -EINVAL;
1133 if (!(gwa_type&IPV6_ADDR_UNICAST))
1134 goto out;
1135
86872cb5 1136 grt = rt6_lookup(gw_addr, NULL, cfg->fc_ifindex, 1);
1da177e4
LT
1137
1138 err = -EHOSTUNREACH;
1139 if (grt == NULL)
1140 goto out;
1141 if (dev) {
1142 if (dev != grt->rt6i_dev) {
1143 dst_release(&grt->u.dst);
1144 goto out;
1145 }
1146 } else {
1147 dev = grt->rt6i_dev;
1148 idev = grt->rt6i_idev;
1149 dev_hold(dev);
1150 in6_dev_hold(grt->rt6i_idev);
1151 }
1152 if (!(grt->rt6i_flags&RTF_GATEWAY))
1153 err = 0;
1154 dst_release(&grt->u.dst);
1155
1156 if (err)
1157 goto out;
1158 }
1159 err = -EINVAL;
1160 if (dev == NULL || (dev->flags&IFF_LOOPBACK))
1161 goto out;
1162 }
1163
1164 err = -ENODEV;
1165 if (dev == NULL)
1166 goto out;
1167
86872cb5 1168 if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) {
1da177e4
LT
1169 rt->rt6i_nexthop = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev);
1170 if (IS_ERR(rt->rt6i_nexthop)) {
1171 err = PTR_ERR(rt->rt6i_nexthop);
1172 rt->rt6i_nexthop = NULL;
1173 goto out;
1174 }
1175 }
1176
86872cb5 1177 rt->rt6i_flags = cfg->fc_flags;
1da177e4
LT
1178
1179install_route:
86872cb5
TG
1180 if (cfg->fc_mx) {
1181 struct nlattr *nla;
1182 int remaining;
1183
1184 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
1185 int type = nla->nla_type;
1186
1187 if (type) {
1188 if (type > RTAX_MAX) {
1da177e4
LT
1189 err = -EINVAL;
1190 goto out;
1191 }
86872cb5
TG
1192
1193 rt->u.dst.metrics[type - 1] = nla_get_u32(nla);
1da177e4 1194 }
1da177e4
LT
1195 }
1196 }
1197
1198 if (rt->u.dst.metrics[RTAX_HOPLIMIT-1] == 0)
1199 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
1200 if (!rt->u.dst.metrics[RTAX_MTU-1])
1201 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(dev);
1202 if (!rt->u.dst.metrics[RTAX_ADVMSS-1])
1203 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
1204 rt->u.dst.dev = dev;
1205 rt->rt6i_idev = idev;
c71099ac 1206 rt->rt6i_table = table;
86872cb5 1207 return __ip6_ins_rt(rt, &cfg->fc_nlinfo);
1da177e4
LT
1208
1209out:
1210 if (dev)
1211 dev_put(dev);
1212 if (idev)
1213 in6_dev_put(idev);
1214 if (rt)
1215 dst_free((struct dst_entry *) rt);
1216 return err;
1217}
1218
86872cb5 1219static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
1da177e4
LT
1220{
1221 int err;
c71099ac 1222 struct fib6_table *table;
1da177e4 1223
6c813a72
PM
1224 if (rt == &ip6_null_entry)
1225 return -ENOENT;
1226
c71099ac
TG
1227 table = rt->rt6i_table;
1228 write_lock_bh(&table->tb6_lock);
1da177e4 1229
86872cb5 1230 err = fib6_del(rt, info);
1da177e4
LT
1231 dst_release(&rt->u.dst);
1232
c71099ac 1233 write_unlock_bh(&table->tb6_lock);
1da177e4
LT
1234
1235 return err;
1236}
1237
e0a1ad73
TG
1238int ip6_del_rt(struct rt6_info *rt)
1239{
86872cb5 1240 return __ip6_del_rt(rt, NULL);
e0a1ad73
TG
1241}
1242
86872cb5 1243static int ip6_route_del(struct fib6_config *cfg)
1da177e4 1244{
c71099ac 1245 struct fib6_table *table;
1da177e4
LT
1246 struct fib6_node *fn;
1247 struct rt6_info *rt;
1248 int err = -ESRCH;
1249
86872cb5 1250 table = fib6_get_table(cfg->fc_table);
c71099ac
TG
1251 if (table == NULL)
1252 return err;
1253
1254 read_lock_bh(&table->tb6_lock);
1da177e4 1255
c71099ac 1256 fn = fib6_locate(&table->tb6_root,
86872cb5
TG
1257 &cfg->fc_dst, cfg->fc_dst_len,
1258 &cfg->fc_src, cfg->fc_src_len);
1da177e4
LT
1259
1260 if (fn) {
1261 for (rt = fn->leaf; rt; rt = rt->u.next) {
86872cb5 1262 if (cfg->fc_ifindex &&
1da177e4 1263 (rt->rt6i_dev == NULL ||
86872cb5 1264 rt->rt6i_dev->ifindex != cfg->fc_ifindex))
1da177e4 1265 continue;
86872cb5
TG
1266 if (cfg->fc_flags & RTF_GATEWAY &&
1267 !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
1da177e4 1268 continue;
86872cb5 1269 if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
1da177e4
LT
1270 continue;
1271 dst_hold(&rt->u.dst);
c71099ac 1272 read_unlock_bh(&table->tb6_lock);
1da177e4 1273
86872cb5 1274 return __ip6_del_rt(rt, &cfg->fc_nlinfo);
1da177e4
LT
1275 }
1276 }
c71099ac 1277 read_unlock_bh(&table->tb6_lock);
1da177e4
LT
1278
1279 return err;
1280}
1281
1282/*
1283 * Handle redirects
1284 */
a6279458
YH
1285struct ip6rd_flowi {
1286 struct flowi fl;
1287 struct in6_addr gateway;
1288};
1289
1290static struct rt6_info *__ip6_route_redirect(struct fib6_table *table,
1291 struct flowi *fl,
1292 int flags)
1da177e4 1293{
a6279458
YH
1294 struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl;
1295 struct rt6_info *rt;
e843b9e1 1296 struct fib6_node *fn;
c71099ac 1297
1da177e4 1298 /*
e843b9e1
YH
1299 * Get the "current" route for this destination and
1300 * check if the redirect has come from approriate router.
1301 *
1302 * RFC 2461 specifies that redirects should only be
1303 * accepted if they come from the nexthop to the target.
1304 * Due to the way the routes are chosen, this notion
1305 * is a bit fuzzy and one might need to check all possible
1306 * routes.
1da177e4 1307 */
1da177e4 1308
c71099ac 1309 read_lock_bh(&table->tb6_lock);
a6279458 1310 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
e843b9e1
YH
1311restart:
1312 for (rt = fn->leaf; rt; rt = rt->u.next) {
1313 /*
1314 * Current route is on-link; redirect is always invalid.
1315 *
1316 * Seems, previous statement is not true. It could
1317 * be node, which looks for us as on-link (f.e. proxy ndisc)
1318 * But then router serving it might decide, that we should
1319 * know truth 8)8) --ANK (980726).
1320 */
1321 if (rt6_check_expired(rt))
1322 continue;
1323 if (!(rt->rt6i_flags & RTF_GATEWAY))
1324 continue;
a6279458 1325 if (fl->oif != rt->rt6i_dev->ifindex)
e843b9e1 1326 continue;
a6279458 1327 if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
e843b9e1
YH
1328 continue;
1329 break;
1330 }
a6279458 1331
cb15d9c2 1332 if (!rt)
a6279458 1333 rt = &ip6_null_entry;
cb15d9c2
YH
1334 BACKTRACK(&fl->fl6_src);
1335out:
a6279458
YH
1336 dst_hold(&rt->u.dst);
1337
c71099ac 1338 read_unlock_bh(&table->tb6_lock);
e843b9e1 1339
a6279458
YH
1340 return rt;
1341};
1342
1343static struct rt6_info *ip6_route_redirect(struct in6_addr *dest,
1344 struct in6_addr *src,
1345 struct in6_addr *gateway,
1346 struct net_device *dev)
1347{
1348 struct ip6rd_flowi rdfl = {
1349 .fl = {
1350 .oif = dev->ifindex,
1351 .nl_u = {
1352 .ip6_u = {
1353 .daddr = *dest,
1354 .saddr = *src,
1355 },
1356 },
1357 },
1358 .gateway = *gateway,
1359 };
77d16f45 1360 int flags = rt6_need_strict(dest) ? RT6_LOOKUP_F_IFACE : 0;
a6279458
YH
1361
1362 return (struct rt6_info *)fib6_rule_lookup((struct flowi *)&rdfl, flags, __ip6_route_redirect);
1363}
1364
1365void rt6_redirect(struct in6_addr *dest, struct in6_addr *src,
1366 struct in6_addr *saddr,
1367 struct neighbour *neigh, u8 *lladdr, int on_link)
1368{
1369 struct rt6_info *rt, *nrt = NULL;
1370 struct netevent_redirect netevent;
1371
1372 rt = ip6_route_redirect(dest, src, saddr, neigh->dev);
1373
1374 if (rt == &ip6_null_entry) {
1da177e4
LT
1375 if (net_ratelimit())
1376 printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
1377 "for redirect target\n");
a6279458 1378 goto out;
1da177e4
LT
1379 }
1380
1da177e4
LT
1381 /*
1382 * We have finally decided to accept it.
1383 */
1384
1385 neigh_update(neigh, lladdr, NUD_STALE,
1386 NEIGH_UPDATE_F_WEAK_OVERRIDE|
1387 NEIGH_UPDATE_F_OVERRIDE|
1388 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1389 NEIGH_UPDATE_F_ISROUTER))
1390 );
1391
1392 /*
1393 * Redirect received -> path was valid.
1394 * Look, redirects are sent only in response to data packets,
1395 * so that this nexthop apparently is reachable. --ANK
1396 */
1397 dst_confirm(&rt->u.dst);
1398
1399 /* Duplicate redirect: silently ignore. */
1400 if (neigh == rt->u.dst.neighbour)
1401 goto out;
1402
1403 nrt = ip6_rt_copy(rt);
1404 if (nrt == NULL)
1405 goto out;
1406
1407 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1408 if (on_link)
1409 nrt->rt6i_flags &= ~RTF_GATEWAY;
1410
1411 ipv6_addr_copy(&nrt->rt6i_dst.addr, dest);
1412 nrt->rt6i_dst.plen = 128;
1413 nrt->u.dst.flags |= DST_HOST;
1414
1415 ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key);
1416 nrt->rt6i_nexthop = neigh_clone(neigh);
1417 /* Reset pmtu, it may be better */
1418 nrt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(neigh->dev);
1419 nrt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&nrt->u.dst));
1420
40e22e8f 1421 if (ip6_ins_rt(nrt))
1da177e4
LT
1422 goto out;
1423
8d71740c
TT
1424 netevent.old = &rt->u.dst;
1425 netevent.new = &nrt->u.dst;
1426 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1427
1da177e4 1428 if (rt->rt6i_flags&RTF_CACHE) {
e0a1ad73 1429 ip6_del_rt(rt);
1da177e4
LT
1430 return;
1431 }
1432
1433out:
1434 dst_release(&rt->u.dst);
1435 return;
1436}
1437
1438/*
1439 * Handle ICMP "packet too big" messages
1440 * i.e. Path MTU discovery
1441 */
1442
1443void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr,
1444 struct net_device *dev, u32 pmtu)
1445{
1446 struct rt6_info *rt, *nrt;
1447 int allfrag = 0;
1448
1449 rt = rt6_lookup(daddr, saddr, dev->ifindex, 0);
1450 if (rt == NULL)
1451 return;
1452
1453 if (pmtu >= dst_mtu(&rt->u.dst))
1454 goto out;
1455
1456 if (pmtu < IPV6_MIN_MTU) {
1457 /*
1458 * According to RFC2460, PMTU is set to the IPv6 Minimum Link
1459 * MTU (1280) and a fragment header should always be included
1460 * after a node receiving Too Big message reporting PMTU is
1461 * less than the IPv6 Minimum Link MTU.
1462 */
1463 pmtu = IPV6_MIN_MTU;
1464 allfrag = 1;
1465 }
1466
1467 /* New mtu received -> path was valid.
1468 They are sent only in response to data packets,
1469 so that this nexthop apparently is reachable. --ANK
1470 */
1471 dst_confirm(&rt->u.dst);
1472
1473 /* Host route. If it is static, it would be better
1474 not to override it, but add new one, so that
1475 when cache entry will expire old pmtu
1476 would return automatically.
1477 */
1478 if (rt->rt6i_flags & RTF_CACHE) {
1479 rt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1480 if (allfrag)
1481 rt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1482 dst_set_expires(&rt->u.dst, ip6_rt_mtu_expires);
1483 rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
1484 goto out;
1485 }
1486
1487 /* Network route.
1488 Two cases are possible:
1489 1. It is connected route. Action: COW
1490 2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1491 */
d5315b50 1492 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
a1e78363 1493 nrt = rt6_alloc_cow(rt, daddr, saddr);
d5315b50
YH
1494 else
1495 nrt = rt6_alloc_clone(rt, daddr);
a1e78363 1496
d5315b50 1497 if (nrt) {
a1e78363
YH
1498 nrt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1499 if (allfrag)
1500 nrt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1501
1502 /* According to RFC 1981, detecting PMTU increase shouldn't be
1503 * happened within 5 mins, the recommended timer is 10 mins.
1504 * Here this route expiration time is set to ip6_rt_mtu_expires
1505 * which is 10 mins. After 10 mins the decreased pmtu is expired
1506 * and detecting PMTU increase will be automatically happened.
1507 */
1508 dst_set_expires(&nrt->u.dst, ip6_rt_mtu_expires);
1509 nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
1510
40e22e8f 1511 ip6_ins_rt(nrt);
1da177e4 1512 }
1da177e4
LT
1513out:
1514 dst_release(&rt->u.dst);
1515}
1516
1517/*
1518 * Misc support functions
1519 */
1520
1521static struct rt6_info * ip6_rt_copy(struct rt6_info *ort)
1522{
1523 struct rt6_info *rt = ip6_dst_alloc();
1524
1525 if (rt) {
1526 rt->u.dst.input = ort->u.dst.input;
1527 rt->u.dst.output = ort->u.dst.output;
1528
1529 memcpy(rt->u.dst.metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32));
1530 rt->u.dst.dev = ort->u.dst.dev;
1531 if (rt->u.dst.dev)
1532 dev_hold(rt->u.dst.dev);
1533 rt->rt6i_idev = ort->rt6i_idev;
1534 if (rt->rt6i_idev)
1535 in6_dev_hold(rt->rt6i_idev);
1536 rt->u.dst.lastuse = jiffies;
1537 rt->rt6i_expires = 0;
1538
1539 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
1540 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
1541 rt->rt6i_metric = 0;
1542
1543 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1544#ifdef CONFIG_IPV6_SUBTREES
1545 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1546#endif
c71099ac 1547 rt->rt6i_table = ort->rt6i_table;
1da177e4
LT
1548 }
1549 return rt;
1550}
1551
70ceb4f5
YH
1552#ifdef CONFIG_IPV6_ROUTE_INFO
1553static struct rt6_info *rt6_get_route_info(struct in6_addr *prefix, int prefixlen,
1554 struct in6_addr *gwaddr, int ifindex)
1555{
1556 struct fib6_node *fn;
1557 struct rt6_info *rt = NULL;
c71099ac
TG
1558 struct fib6_table *table;
1559
1560 table = fib6_get_table(RT6_TABLE_INFO);
1561 if (table == NULL)
1562 return NULL;
70ceb4f5 1563
c71099ac
TG
1564 write_lock_bh(&table->tb6_lock);
1565 fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
70ceb4f5
YH
1566 if (!fn)
1567 goto out;
1568
1569 for (rt = fn->leaf; rt; rt = rt->u.next) {
1570 if (rt->rt6i_dev->ifindex != ifindex)
1571 continue;
1572 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1573 continue;
1574 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1575 continue;
1576 dst_hold(&rt->u.dst);
1577 break;
1578 }
1579out:
c71099ac 1580 write_unlock_bh(&table->tb6_lock);
70ceb4f5
YH
1581 return rt;
1582}
1583
1584static struct rt6_info *rt6_add_route_info(struct in6_addr *prefix, int prefixlen,
1585 struct in6_addr *gwaddr, int ifindex,
1586 unsigned pref)
1587{
86872cb5
TG
1588 struct fib6_config cfg = {
1589 .fc_table = RT6_TABLE_INFO,
1590 .fc_metric = 1024,
1591 .fc_ifindex = ifindex,
1592 .fc_dst_len = prefixlen,
1593 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
1594 RTF_UP | RTF_PREF(pref),
1595 };
1596
1597 ipv6_addr_copy(&cfg.fc_dst, prefix);
1598 ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
70ceb4f5 1599
e317da96
YH
1600 /* We should treat it as a default route if prefix length is 0. */
1601 if (!prefixlen)
86872cb5 1602 cfg.fc_flags |= RTF_DEFAULT;
70ceb4f5 1603
86872cb5 1604 ip6_route_add(&cfg);
70ceb4f5
YH
1605
1606 return rt6_get_route_info(prefix, prefixlen, gwaddr, ifindex);
1607}
1608#endif
1609
1da177e4
LT
1610struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *dev)
1611{
1612 struct rt6_info *rt;
c71099ac 1613 struct fib6_table *table;
1da177e4 1614
c71099ac
TG
1615 table = fib6_get_table(RT6_TABLE_DFLT);
1616 if (table == NULL)
1617 return NULL;
1da177e4 1618
c71099ac
TG
1619 write_lock_bh(&table->tb6_lock);
1620 for (rt = table->tb6_root.leaf; rt; rt=rt->u.next) {
1da177e4 1621 if (dev == rt->rt6i_dev &&
045927ff 1622 ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
1da177e4
LT
1623 ipv6_addr_equal(&rt->rt6i_gateway, addr))
1624 break;
1625 }
1626 if (rt)
1627 dst_hold(&rt->u.dst);
c71099ac 1628 write_unlock_bh(&table->tb6_lock);
1da177e4
LT
1629 return rt;
1630}
1631
1632struct rt6_info *rt6_add_dflt_router(struct in6_addr *gwaddr,
ebacaaa0
YH
1633 struct net_device *dev,
1634 unsigned int pref)
1da177e4 1635{
86872cb5
TG
1636 struct fib6_config cfg = {
1637 .fc_table = RT6_TABLE_DFLT,
1638 .fc_metric = 1024,
1639 .fc_ifindex = dev->ifindex,
1640 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
1641 RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
1642 };
1da177e4 1643
86872cb5 1644 ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
1da177e4 1645
86872cb5 1646 ip6_route_add(&cfg);
1da177e4 1647
1da177e4
LT
1648 return rt6_get_dflt_router(gwaddr, dev);
1649}
1650
1651void rt6_purge_dflt_routers(void)
1652{
1653 struct rt6_info *rt;
c71099ac
TG
1654 struct fib6_table *table;
1655
1656 /* NOTE: Keep consistent with rt6_get_dflt_router */
1657 table = fib6_get_table(RT6_TABLE_DFLT);
1658 if (table == NULL)
1659 return;
1da177e4
LT
1660
1661restart:
c71099ac
TG
1662 read_lock_bh(&table->tb6_lock);
1663 for (rt = table->tb6_root.leaf; rt; rt = rt->u.next) {
1da177e4
LT
1664 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
1665 dst_hold(&rt->u.dst);
c71099ac 1666 read_unlock_bh(&table->tb6_lock);
e0a1ad73 1667 ip6_del_rt(rt);
1da177e4
LT
1668 goto restart;
1669 }
1670 }
c71099ac 1671 read_unlock_bh(&table->tb6_lock);
1da177e4
LT
1672}
1673
86872cb5
TG
1674static void rtmsg_to_fib6_config(struct in6_rtmsg *rtmsg,
1675 struct fib6_config *cfg)
1676{
1677 memset(cfg, 0, sizeof(*cfg));
1678
1679 cfg->fc_table = RT6_TABLE_MAIN;
1680 cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
1681 cfg->fc_metric = rtmsg->rtmsg_metric;
1682 cfg->fc_expires = rtmsg->rtmsg_info;
1683 cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
1684 cfg->fc_src_len = rtmsg->rtmsg_src_len;
1685 cfg->fc_flags = rtmsg->rtmsg_flags;
1686
1687 ipv6_addr_copy(&cfg->fc_dst, &rtmsg->rtmsg_dst);
1688 ipv6_addr_copy(&cfg->fc_src, &rtmsg->rtmsg_src);
1689 ipv6_addr_copy(&cfg->fc_gateway, &rtmsg->rtmsg_gateway);
1690}
1691
1da177e4
LT
1692int ipv6_route_ioctl(unsigned int cmd, void __user *arg)
1693{
86872cb5 1694 struct fib6_config cfg;
1da177e4
LT
1695 struct in6_rtmsg rtmsg;
1696 int err;
1697
1698 switch(cmd) {
1699 case SIOCADDRT: /* Add a route */
1700 case SIOCDELRT: /* Delete a route */
1701 if (!capable(CAP_NET_ADMIN))
1702 return -EPERM;
1703 err = copy_from_user(&rtmsg, arg,
1704 sizeof(struct in6_rtmsg));
1705 if (err)
1706 return -EFAULT;
86872cb5
TG
1707
1708 rtmsg_to_fib6_config(&rtmsg, &cfg);
1709
1da177e4
LT
1710 rtnl_lock();
1711 switch (cmd) {
1712 case SIOCADDRT:
86872cb5 1713 err = ip6_route_add(&cfg);
1da177e4
LT
1714 break;
1715 case SIOCDELRT:
86872cb5 1716 err = ip6_route_del(&cfg);
1da177e4
LT
1717 break;
1718 default:
1719 err = -EINVAL;
1720 }
1721 rtnl_unlock();
1722
1723 return err;
1724 };
1725
1726 return -EINVAL;
1727}
1728
1729/*
1730 * Drop the packet on the floor
1731 */
1732
20380731 1733static int ip6_pkt_discard(struct sk_buff *skb)
1da177e4 1734{
76d0cc1b
LL
1735 int type = ipv6_addr_type(&skb->nh.ipv6h->daddr);
1736 if (type == IPV6_ADDR_ANY || type == IPV6_ADDR_RESERVED)
1737 IP6_INC_STATS(IPSTATS_MIB_INADDRERRORS);
1738
1da177e4
LT
1739 IP6_INC_STATS(IPSTATS_MIB_OUTNOROUTES);
1740 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_NOROUTE, 0, skb->dev);
1741 kfree_skb(skb);
1742 return 0;
1743}
1744
20380731 1745static int ip6_pkt_discard_out(struct sk_buff *skb)
1da177e4
LT
1746{
1747 skb->dev = skb->dst->dev;
1748 return ip6_pkt_discard(skb);
1749}
1750
1751/*
1752 * Allocate a dst for local (unicast / anycast) address.
1753 */
1754
1755struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
1756 const struct in6_addr *addr,
1757 int anycast)
1758{
1759 struct rt6_info *rt = ip6_dst_alloc();
1760
1761 if (rt == NULL)
1762 return ERR_PTR(-ENOMEM);
1763
1764 dev_hold(&loopback_dev);
1765 in6_dev_hold(idev);
1766
1767 rt->u.dst.flags = DST_HOST;
1768 rt->u.dst.input = ip6_input;
1769 rt->u.dst.output = ip6_output;
1770 rt->rt6i_dev = &loopback_dev;
1771 rt->rt6i_idev = idev;
1772 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
1773 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
1774 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
1775 rt->u.dst.obsolete = -1;
1776
1777 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
58c4fb86
YH
1778 if (anycast)
1779 rt->rt6i_flags |= RTF_ANYCAST;
1780 else
1da177e4
LT
1781 rt->rt6i_flags |= RTF_LOCAL;
1782 rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
1783 if (rt->rt6i_nexthop == NULL) {
1784 dst_free((struct dst_entry *) rt);
1785 return ERR_PTR(-ENOMEM);
1786 }
1787
1788 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
1789 rt->rt6i_dst.plen = 128;
c71099ac 1790 rt->rt6i_table = fib6_get_table(RT6_TABLE_LOCAL);
1da177e4
LT
1791
1792 atomic_set(&rt->u.dst.__refcnt, 1);
1793
1794 return rt;
1795}
1796
1797static int fib6_ifdown(struct rt6_info *rt, void *arg)
1798{
1799 if (((void*)rt->rt6i_dev == arg || arg == NULL) &&
1800 rt != &ip6_null_entry) {
1801 RT6_TRACE("deleted by ifdown %p\n", rt);
1802 return -1;
1803 }
1804 return 0;
1805}
1806
1807void rt6_ifdown(struct net_device *dev)
1808{
c71099ac 1809 fib6_clean_all(fib6_ifdown, 0, dev);
1da177e4
LT
1810}
1811
1812struct rt6_mtu_change_arg
1813{
1814 struct net_device *dev;
1815 unsigned mtu;
1816};
1817
1818static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
1819{
1820 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
1821 struct inet6_dev *idev;
1822
1823 /* In IPv6 pmtu discovery is not optional,
1824 so that RTAX_MTU lock cannot disable it.
1825 We still use this lock to block changes
1826 caused by addrconf/ndisc.
1827 */
1828
1829 idev = __in6_dev_get(arg->dev);
1830 if (idev == NULL)
1831 return 0;
1832
1833 /* For administrative MTU increase, there is no way to discover
1834 IPv6 PMTU increase, so PMTU increase should be updated here.
1835 Since RFC 1981 doesn't include administrative MTU increase
1836 update PMTU increase is a MUST. (i.e. jumbo frame)
1837 */
1838 /*
1839 If new MTU is less than route PMTU, this new MTU will be the
1840 lowest MTU in the path, update the route PMTU to reflect PMTU
1841 decreases; if new MTU is greater than route PMTU, and the
1842 old MTU is the lowest MTU in the path, update the route PMTU
1843 to reflect the increase. In this case if the other nodes' MTU
1844 also have the lowest MTU, TOO BIG MESSAGE will be lead to
1845 PMTU discouvery.
1846 */
1847 if (rt->rt6i_dev == arg->dev &&
1848 !dst_metric_locked(&rt->u.dst, RTAX_MTU) &&
1849 (dst_mtu(&rt->u.dst) > arg->mtu ||
1850 (dst_mtu(&rt->u.dst) < arg->mtu &&
1851 dst_mtu(&rt->u.dst) == idev->cnf.mtu6)))
1852 rt->u.dst.metrics[RTAX_MTU-1] = arg->mtu;
1853 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(arg->mtu);
1854 return 0;
1855}
1856
1857void rt6_mtu_change(struct net_device *dev, unsigned mtu)
1858{
c71099ac
TG
1859 struct rt6_mtu_change_arg arg = {
1860 .dev = dev,
1861 .mtu = mtu,
1862 };
1da177e4 1863
c71099ac 1864 fib6_clean_all(rt6_mtu_change_route, 0, &arg);
1da177e4
LT
1865}
1866
86872cb5
TG
1867static struct nla_policy rtm_ipv6_policy[RTA_MAX+1] __read_mostly = {
1868 [RTA_GATEWAY] = { .minlen = sizeof(struct in6_addr) },
1869 [RTA_OIF] = { .type = NLA_U32 },
ab364a6f 1870 [RTA_IIF] = { .type = NLA_U32 },
86872cb5
TG
1871 [RTA_PRIORITY] = { .type = NLA_U32 },
1872 [RTA_METRICS] = { .type = NLA_NESTED },
1873};
1874
1875static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
1876 struct fib6_config *cfg)
1da177e4 1877{
86872cb5
TG
1878 struct rtmsg *rtm;
1879 struct nlattr *tb[RTA_MAX+1];
1880 int err;
1da177e4 1881
86872cb5
TG
1882 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
1883 if (err < 0)
1884 goto errout;
1da177e4 1885
86872cb5
TG
1886 err = -EINVAL;
1887 rtm = nlmsg_data(nlh);
1888 memset(cfg, 0, sizeof(*cfg));
1889
1890 cfg->fc_table = rtm->rtm_table;
1891 cfg->fc_dst_len = rtm->rtm_dst_len;
1892 cfg->fc_src_len = rtm->rtm_src_len;
1893 cfg->fc_flags = RTF_UP;
1894 cfg->fc_protocol = rtm->rtm_protocol;
1895
1896 if (rtm->rtm_type == RTN_UNREACHABLE)
1897 cfg->fc_flags |= RTF_REJECT;
1898
1899 cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
1900 cfg->fc_nlinfo.nlh = nlh;
1901
1902 if (tb[RTA_GATEWAY]) {
1903 nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
1904 cfg->fc_flags |= RTF_GATEWAY;
1da177e4 1905 }
86872cb5
TG
1906
1907 if (tb[RTA_DST]) {
1908 int plen = (rtm->rtm_dst_len + 7) >> 3;
1909
1910 if (nla_len(tb[RTA_DST]) < plen)
1911 goto errout;
1912
1913 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
1da177e4 1914 }
86872cb5
TG
1915
1916 if (tb[RTA_SRC]) {
1917 int plen = (rtm->rtm_src_len + 7) >> 3;
1918
1919 if (nla_len(tb[RTA_SRC]) < plen)
1920 goto errout;
1921
1922 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
1da177e4 1923 }
86872cb5
TG
1924
1925 if (tb[RTA_OIF])
1926 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
1927
1928 if (tb[RTA_PRIORITY])
1929 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
1930
1931 if (tb[RTA_METRICS]) {
1932 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
1933 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
1da177e4 1934 }
86872cb5
TG
1935
1936 if (tb[RTA_TABLE])
1937 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
1938
1939 err = 0;
1940errout:
1941 return err;
1da177e4
LT
1942}
1943
1944int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1945{
86872cb5
TG
1946 struct fib6_config cfg;
1947 int err;
1da177e4 1948
86872cb5
TG
1949 err = rtm_to_fib6_config(skb, nlh, &cfg);
1950 if (err < 0)
1951 return err;
1952
1953 return ip6_route_del(&cfg);
1da177e4
LT
1954}
1955
1956int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1957{
86872cb5
TG
1958 struct fib6_config cfg;
1959 int err;
1da177e4 1960
86872cb5
TG
1961 err = rtm_to_fib6_config(skb, nlh, &cfg);
1962 if (err < 0)
1963 return err;
1964
1965 return ip6_route_add(&cfg);
1da177e4
LT
1966}
1967
1da177e4 1968static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt,
0d51aa80
JHS
1969 struct in6_addr *dst, struct in6_addr *src,
1970 int iif, int type, u32 pid, u32 seq,
1971 int prefix, unsigned int flags)
1da177e4
LT
1972{
1973 struct rtmsg *rtm;
2d7202bf 1974 struct nlmsghdr *nlh;
1da177e4 1975 struct rta_cacheinfo ci;
9e762a4a 1976 u32 table;
1da177e4
LT
1977
1978 if (prefix) { /* user wants prefix routes only */
1979 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
1980 /* success since this is not a prefix route */
1981 return 1;
1982 }
1983 }
1984
2d7202bf
TG
1985 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*rtm), flags);
1986 if (nlh == NULL)
1987 return -ENOBUFS;
1988
1989 rtm = nlmsg_data(nlh);
1da177e4
LT
1990 rtm->rtm_family = AF_INET6;
1991 rtm->rtm_dst_len = rt->rt6i_dst.plen;
1992 rtm->rtm_src_len = rt->rt6i_src.plen;
1993 rtm->rtm_tos = 0;
c71099ac 1994 if (rt->rt6i_table)
9e762a4a 1995 table = rt->rt6i_table->tb6_id;
c71099ac 1996 else
9e762a4a
PM
1997 table = RT6_TABLE_UNSPEC;
1998 rtm->rtm_table = table;
2d7202bf 1999 NLA_PUT_U32(skb, RTA_TABLE, table);
1da177e4
LT
2000 if (rt->rt6i_flags&RTF_REJECT)
2001 rtm->rtm_type = RTN_UNREACHABLE;
2002 else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK))
2003 rtm->rtm_type = RTN_LOCAL;
2004 else
2005 rtm->rtm_type = RTN_UNICAST;
2006 rtm->rtm_flags = 0;
2007 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2008 rtm->rtm_protocol = rt->rt6i_protocol;
2009 if (rt->rt6i_flags&RTF_DYNAMIC)
2010 rtm->rtm_protocol = RTPROT_REDIRECT;
2011 else if (rt->rt6i_flags & RTF_ADDRCONF)
2012 rtm->rtm_protocol = RTPROT_KERNEL;
2013 else if (rt->rt6i_flags&RTF_DEFAULT)
2014 rtm->rtm_protocol = RTPROT_RA;
2015
2016 if (rt->rt6i_flags&RTF_CACHE)
2017 rtm->rtm_flags |= RTM_F_CLONED;
2018
2019 if (dst) {
2d7202bf 2020 NLA_PUT(skb, RTA_DST, 16, dst);
1da177e4
LT
2021 rtm->rtm_dst_len = 128;
2022 } else if (rtm->rtm_dst_len)
2d7202bf 2023 NLA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr);
1da177e4
LT
2024#ifdef CONFIG_IPV6_SUBTREES
2025 if (src) {
2d7202bf 2026 NLA_PUT(skb, RTA_SRC, 16, src);
1da177e4
LT
2027 rtm->rtm_src_len = 128;
2028 } else if (rtm->rtm_src_len)
2d7202bf 2029 NLA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr);
1da177e4
LT
2030#endif
2031 if (iif)
2d7202bf 2032 NLA_PUT_U32(skb, RTA_IIF, iif);
1da177e4
LT
2033 else if (dst) {
2034 struct in6_addr saddr_buf;
2035 if (ipv6_get_saddr(&rt->u.dst, dst, &saddr_buf) == 0)
2d7202bf 2036 NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
1da177e4 2037 }
2d7202bf 2038
1da177e4 2039 if (rtnetlink_put_metrics(skb, rt->u.dst.metrics) < 0)
2d7202bf
TG
2040 goto nla_put_failure;
2041
1da177e4 2042 if (rt->u.dst.neighbour)
2d7202bf
TG
2043 NLA_PUT(skb, RTA_GATEWAY, 16, &rt->u.dst.neighbour->primary_key);
2044
1da177e4 2045 if (rt->u.dst.dev)
2d7202bf
TG
2046 NLA_PUT_U32(skb, RTA_OIF, rt->rt6i_dev->ifindex);
2047
2048 NLA_PUT_U32(skb, RTA_PRIORITY, rt->rt6i_metric);
1da177e4
LT
2049 ci.rta_lastuse = jiffies_to_clock_t(jiffies - rt->u.dst.lastuse);
2050 if (rt->rt6i_expires)
2051 ci.rta_expires = jiffies_to_clock_t(rt->rt6i_expires - jiffies);
2052 else
2053 ci.rta_expires = 0;
2054 ci.rta_used = rt->u.dst.__use;
2055 ci.rta_clntref = atomic_read(&rt->u.dst.__refcnt);
2056 ci.rta_error = rt->u.dst.error;
2057 ci.rta_id = 0;
2058 ci.rta_ts = 0;
2059 ci.rta_tsage = 0;
2d7202bf
TG
2060 NLA_PUT(skb, RTA_CACHEINFO, sizeof(ci), &ci);
2061
2062 return nlmsg_end(skb, nlh);
2063
2064nla_put_failure:
2065 return nlmsg_cancel(skb, nlh);
1da177e4
LT
2066}
2067
1b43af54 2068int rt6_dump_route(struct rt6_info *rt, void *p_arg)
1da177e4
LT
2069{
2070 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
2071 int prefix;
2072
2d7202bf
TG
2073 if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
2074 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
1da177e4
LT
2075 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
2076 } else
2077 prefix = 0;
2078
2079 return rt6_fill_node(arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
2080 NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
0d51aa80 2081 prefix, NLM_F_MULTI);
1da177e4
LT
2082}
2083
1da177e4
LT
2084int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
2085{
ab364a6f
TG
2086 struct nlattr *tb[RTA_MAX+1];
2087 struct rt6_info *rt;
1da177e4 2088 struct sk_buff *skb;
ab364a6f 2089 struct rtmsg *rtm;
1da177e4 2090 struct flowi fl;
ab364a6f 2091 int err, iif = 0;
1da177e4 2092
ab364a6f
TG
2093 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2094 if (err < 0)
2095 goto errout;
1da177e4 2096
ab364a6f 2097 err = -EINVAL;
1da177e4 2098 memset(&fl, 0, sizeof(fl));
1da177e4 2099
ab364a6f
TG
2100 if (tb[RTA_SRC]) {
2101 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2102 goto errout;
2103
2104 ipv6_addr_copy(&fl.fl6_src, nla_data(tb[RTA_SRC]));
2105 }
2106
2107 if (tb[RTA_DST]) {
2108 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2109 goto errout;
2110
2111 ipv6_addr_copy(&fl.fl6_dst, nla_data(tb[RTA_DST]));
2112 }
2113
2114 if (tb[RTA_IIF])
2115 iif = nla_get_u32(tb[RTA_IIF]);
2116
2117 if (tb[RTA_OIF])
2118 fl.oif = nla_get_u32(tb[RTA_OIF]);
1da177e4
LT
2119
2120 if (iif) {
2121 struct net_device *dev;
2122 dev = __dev_get_by_index(iif);
2123 if (!dev) {
2124 err = -ENODEV;
ab364a6f 2125 goto errout;
1da177e4
LT
2126 }
2127 }
2128
ab364a6f
TG
2129 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
2130 if (skb == NULL) {
2131 err = -ENOBUFS;
2132 goto errout;
2133 }
1da177e4 2134
ab364a6f
TG
2135 /* Reserve room for dummy headers, this skb can pass
2136 through good chunk of routing engine.
2137 */
2138 skb->mac.raw = skb->data;
2139 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
1da177e4 2140
ab364a6f 2141 rt = (struct rt6_info*) ip6_route_output(NULL, &fl);
1da177e4
LT
2142 skb->dst = &rt->u.dst;
2143
ab364a6f 2144 err = rt6_fill_node(skb, rt, &fl.fl6_dst, &fl.fl6_src, iif,
1da177e4 2145 RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
0d51aa80 2146 nlh->nlmsg_seq, 0, 0);
1da177e4 2147 if (err < 0) {
ab364a6f
TG
2148 kfree_skb(skb);
2149 goto errout;
1da177e4
LT
2150 }
2151
2942e900 2152 err = rtnl_unicast(skb, NETLINK_CB(in_skb).pid);
ab364a6f 2153errout:
1da177e4 2154 return err;
1da177e4
LT
2155}
2156
86872cb5 2157void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
1da177e4
LT
2158{
2159 struct sk_buff *skb;
86872cb5
TG
2160 u32 pid = 0, seq = 0;
2161 struct nlmsghdr *nlh = NULL;
21713ebc
TG
2162 int payload = sizeof(struct rtmsg) + 256;
2163 int err = -ENOBUFS;
2164
86872cb5
TG
2165 if (info) {
2166 pid = info->pid;
2167 nlh = info->nlh;
2168 if (nlh)
2169 seq = nlh->nlmsg_seq;
2170 }
2171
21713ebc
TG
2172 skb = nlmsg_new(nlmsg_total_size(payload), gfp_any());
2173 if (skb == NULL)
2174 goto errout;
2175
2176 err = rt6_fill_node(skb, rt, NULL, NULL, 0, event, pid, seq, 0, 0);
2177 if (err < 0) {
1da177e4 2178 kfree_skb(skb);
21713ebc 2179 goto errout;
1da177e4 2180 }
21713ebc
TG
2181
2182 err = rtnl_notify(skb, pid, RTNLGRP_IPV6_ROUTE, nlh, gfp_any());
2183errout:
2184 if (err < 0)
2185 rtnl_set_sk_err(RTNLGRP_IPV6_ROUTE, err);
1da177e4
LT
2186}
2187
2188/*
2189 * /proc
2190 */
2191
2192#ifdef CONFIG_PROC_FS
2193
2194#define RT6_INFO_LEN (32 + 4 + 32 + 4 + 32 + 40 + 5 + 1)
2195
2196struct rt6_proc_arg
2197{
2198 char *buffer;
2199 int offset;
2200 int length;
2201 int skip;
2202 int len;
2203};
2204
2205static int rt6_info_route(struct rt6_info *rt, void *p_arg)
2206{
2207 struct rt6_proc_arg *arg = (struct rt6_proc_arg *) p_arg;
2208 int i;
2209
2210 if (arg->skip < arg->offset / RT6_INFO_LEN) {
2211 arg->skip++;
2212 return 0;
2213 }
2214
2215 if (arg->len >= arg->length)
2216 return 0;
2217
2218 for (i=0; i<16; i++) {
2219 sprintf(arg->buffer + arg->len, "%02x",
2220 rt->rt6i_dst.addr.s6_addr[i]);
2221 arg->len += 2;
2222 }
2223 arg->len += sprintf(arg->buffer + arg->len, " %02x ",
2224 rt->rt6i_dst.plen);
2225
2226#ifdef CONFIG_IPV6_SUBTREES
2227 for (i=0; i<16; i++) {
2228 sprintf(arg->buffer + arg->len, "%02x",
2229 rt->rt6i_src.addr.s6_addr[i]);
2230 arg->len += 2;
2231 }
2232 arg->len += sprintf(arg->buffer + arg->len, " %02x ",
2233 rt->rt6i_src.plen);
2234#else
2235 sprintf(arg->buffer + arg->len,
2236 "00000000000000000000000000000000 00 ");
2237 arg->len += 36;
2238#endif
2239
2240 if (rt->rt6i_nexthop) {
2241 for (i=0; i<16; i++) {
2242 sprintf(arg->buffer + arg->len, "%02x",
2243 rt->rt6i_nexthop->primary_key[i]);
2244 arg->len += 2;
2245 }
2246 } else {
2247 sprintf(arg->buffer + arg->len,
2248 "00000000000000000000000000000000");
2249 arg->len += 32;
2250 }
2251 arg->len += sprintf(arg->buffer + arg->len,
2252 " %08x %08x %08x %08x %8s\n",
2253 rt->rt6i_metric, atomic_read(&rt->u.dst.__refcnt),
2254 rt->u.dst.__use, rt->rt6i_flags,
2255 rt->rt6i_dev ? rt->rt6i_dev->name : "");
2256 return 0;
2257}
2258
2259static int rt6_proc_info(char *buffer, char **start, off_t offset, int length)
2260{
c71099ac
TG
2261 struct rt6_proc_arg arg = {
2262 .buffer = buffer,
2263 .offset = offset,
2264 .length = length,
2265 };
1da177e4 2266
c71099ac 2267 fib6_clean_all(rt6_info_route, 0, &arg);
1da177e4
LT
2268
2269 *start = buffer;
2270 if (offset)
2271 *start += offset % RT6_INFO_LEN;
2272
2273 arg.len -= offset % RT6_INFO_LEN;
2274
2275 if (arg.len > length)
2276 arg.len = length;
2277 if (arg.len < 0)
2278 arg.len = 0;
2279
2280 return arg.len;
2281}
2282
1da177e4
LT
2283static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2284{
2285 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
2286 rt6_stats.fib_nodes, rt6_stats.fib_route_nodes,
2287 rt6_stats.fib_rt_alloc, rt6_stats.fib_rt_entries,
2288 rt6_stats.fib_rt_cache,
2289 atomic_read(&ip6_dst_ops.entries),
2290 rt6_stats.fib_discarded_routes);
2291
2292 return 0;
2293}
2294
2295static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2296{
2297 return single_open(file, rt6_stats_seq_show, NULL);
2298}
2299
2300static struct file_operations rt6_stats_seq_fops = {
2301 .owner = THIS_MODULE,
2302 .open = rt6_stats_seq_open,
2303 .read = seq_read,
2304 .llseek = seq_lseek,
2305 .release = single_release,
2306};
2307#endif /* CONFIG_PROC_FS */
2308
2309#ifdef CONFIG_SYSCTL
2310
2311static int flush_delay;
2312
2313static
2314int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write, struct file * filp,
2315 void __user *buffer, size_t *lenp, loff_t *ppos)
2316{
2317 if (write) {
2318 proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
2319 fib6_run_gc(flush_delay <= 0 ? ~0UL : (unsigned long)flush_delay);
2320 return 0;
2321 } else
2322 return -EINVAL;
2323}
2324
2325ctl_table ipv6_route_table[] = {
2326 {
2327 .ctl_name = NET_IPV6_ROUTE_FLUSH,
2328 .procname = "flush",
2329 .data = &flush_delay,
2330 .maxlen = sizeof(int),
89c8b3a1 2331 .mode = 0200,
1da177e4
LT
2332 .proc_handler = &ipv6_sysctl_rtcache_flush
2333 },
2334 {
2335 .ctl_name = NET_IPV6_ROUTE_GC_THRESH,
2336 .procname = "gc_thresh",
2337 .data = &ip6_dst_ops.gc_thresh,
2338 .maxlen = sizeof(int),
2339 .mode = 0644,
2340 .proc_handler = &proc_dointvec,
2341 },
2342 {
2343 .ctl_name = NET_IPV6_ROUTE_MAX_SIZE,
2344 .procname = "max_size",
2345 .data = &ip6_rt_max_size,
2346 .maxlen = sizeof(int),
2347 .mode = 0644,
2348 .proc_handler = &proc_dointvec,
2349 },
2350 {
2351 .ctl_name = NET_IPV6_ROUTE_GC_MIN_INTERVAL,
2352 .procname = "gc_min_interval",
2353 .data = &ip6_rt_gc_min_interval,
2354 .maxlen = sizeof(int),
2355 .mode = 0644,
2356 .proc_handler = &proc_dointvec_jiffies,
2357 .strategy = &sysctl_jiffies,
2358 },
2359 {
2360 .ctl_name = NET_IPV6_ROUTE_GC_TIMEOUT,
2361 .procname = "gc_timeout",
2362 .data = &ip6_rt_gc_timeout,
2363 .maxlen = sizeof(int),
2364 .mode = 0644,
2365 .proc_handler = &proc_dointvec_jiffies,
2366 .strategy = &sysctl_jiffies,
2367 },
2368 {
2369 .ctl_name = NET_IPV6_ROUTE_GC_INTERVAL,
2370 .procname = "gc_interval",
2371 .data = &ip6_rt_gc_interval,
2372 .maxlen = sizeof(int),
2373 .mode = 0644,
2374 .proc_handler = &proc_dointvec_jiffies,
2375 .strategy = &sysctl_jiffies,
2376 },
2377 {
2378 .ctl_name = NET_IPV6_ROUTE_GC_ELASTICITY,
2379 .procname = "gc_elasticity",
2380 .data = &ip6_rt_gc_elasticity,
2381 .maxlen = sizeof(int),
2382 .mode = 0644,
2383 .proc_handler = &proc_dointvec_jiffies,
2384 .strategy = &sysctl_jiffies,
2385 },
2386 {
2387 .ctl_name = NET_IPV6_ROUTE_MTU_EXPIRES,
2388 .procname = "mtu_expires",
2389 .data = &ip6_rt_mtu_expires,
2390 .maxlen = sizeof(int),
2391 .mode = 0644,
2392 .proc_handler = &proc_dointvec_jiffies,
2393 .strategy = &sysctl_jiffies,
2394 },
2395 {
2396 .ctl_name = NET_IPV6_ROUTE_MIN_ADVMSS,
2397 .procname = "min_adv_mss",
2398 .data = &ip6_rt_min_advmss,
2399 .maxlen = sizeof(int),
2400 .mode = 0644,
2401 .proc_handler = &proc_dointvec_jiffies,
2402 .strategy = &sysctl_jiffies,
2403 },
2404 {
2405 .ctl_name = NET_IPV6_ROUTE_GC_MIN_INTERVAL_MS,
2406 .procname = "gc_min_interval_ms",
2407 .data = &ip6_rt_gc_min_interval,
2408 .maxlen = sizeof(int),
2409 .mode = 0644,
2410 .proc_handler = &proc_dointvec_ms_jiffies,
2411 .strategy = &sysctl_ms_jiffies,
2412 },
2413 { .ctl_name = 0 }
2414};
2415
2416#endif
2417
2418void __init ip6_route_init(void)
2419{
2420 struct proc_dir_entry *p;
2421
2422 ip6_dst_ops.kmem_cachep = kmem_cache_create("ip6_dst_cache",
2423 sizeof(struct rt6_info),
2424 0, SLAB_HWCACHE_ALIGN,
2425 NULL, NULL);
2426 if (!ip6_dst_ops.kmem_cachep)
2427 panic("cannot create ip6_dst_cache");
2428
2429 fib6_init();
2430#ifdef CONFIG_PROC_FS
2431 p = proc_net_create("ipv6_route", 0, rt6_proc_info);
2432 if (p)
2433 p->owner = THIS_MODULE;
2434
2435 proc_net_fops_create("rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2436#endif
2437#ifdef CONFIG_XFRM
2438 xfrm6_init();
2439#endif
101367c2
TG
2440#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2441 fib6_rules_init();
2442#endif
1da177e4
LT
2443}
2444
2445void ip6_route_cleanup(void)
2446{
101367c2
TG
2447#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2448 fib6_rules_cleanup();
2449#endif
1da177e4
LT
2450#ifdef CONFIG_PROC_FS
2451 proc_net_remove("ipv6_route");
2452 proc_net_remove("rt6_stats");
2453#endif
2454#ifdef CONFIG_XFRM
2455 xfrm6_fini();
2456#endif
2457 rt6_ifdown(NULL);
2458 fib6_gc_cleanup();
2459 kmem_cache_destroy(ip6_dst_ops.kmem_cachep);
2460}