]> bbs.cooldavid.org Git - net-next-2.6.git/blame - net/ipv6/route.c
[IPV6] NDISC: Search subtrees when backtracking on receipt of redirects.
[net-next-2.6.git] / net / ipv6 / route.c
CommitLineData
1da177e4
LT
1/*
2 * Linux INET6 implementation
3 * FIB front-end.
4 *
5 * Authors:
6 * Pedro Roque <roque@di.fc.ul.pt>
7 *
8 * $Id: route.c,v 1.56 2001/10/31 21:55:55 davem Exp $
9 *
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License
12 * as published by the Free Software Foundation; either version
13 * 2 of the License, or (at your option) any later version.
14 */
15
16/* Changes:
17 *
18 * YOSHIFUJI Hideaki @USAGI
19 * reworked default router selection.
20 * - respect outgoing interface
21 * - select from (probably) reachable routers (i.e.
22 * routers in REACHABLE, STALE, DELAY or PROBE states).
23 * - always select the same router if it is (probably)
24 * reachable. otherwise, round-robin the list.
25 */
26
4fc268d2 27#include <linux/capability.h>
1da177e4
LT
28#include <linux/errno.h>
29#include <linux/types.h>
30#include <linux/times.h>
31#include <linux/socket.h>
32#include <linux/sockios.h>
33#include <linux/net.h>
34#include <linux/route.h>
35#include <linux/netdevice.h>
36#include <linux/in6.h>
37#include <linux/init.h>
1da177e4
LT
38#include <linux/if_arp.h>
39
40#ifdef CONFIG_PROC_FS
41#include <linux/proc_fs.h>
42#include <linux/seq_file.h>
43#endif
44
45#include <net/snmp.h>
46#include <net/ipv6.h>
47#include <net/ip6_fib.h>
48#include <net/ip6_route.h>
49#include <net/ndisc.h>
50#include <net/addrconf.h>
51#include <net/tcp.h>
52#include <linux/rtnetlink.h>
53#include <net/dst.h>
54#include <net/xfrm.h>
8d71740c 55#include <net/netevent.h>
21713ebc 56#include <net/netlink.h>
1da177e4
LT
57
58#include <asm/uaccess.h>
59
60#ifdef CONFIG_SYSCTL
61#include <linux/sysctl.h>
62#endif
63
64/* Set to 3 to get tracing. */
65#define RT6_DEBUG 2
66
67#if RT6_DEBUG >= 3
68#define RDBG(x) printk x
69#define RT6_TRACE(x...) printk(KERN_DEBUG x)
70#else
71#define RDBG(x)
72#define RT6_TRACE(x...) do { ; } while (0)
73#endif
74
519fbd87 75#define CLONE_OFFLINK_ROUTE 0
1da177e4 76
554cfb7e
YH
77#define RT6_SELECT_F_IFACE 0x1
78#define RT6_SELECT_F_REACHABLE 0x2
79
1da177e4
LT
80static int ip6_rt_max_size = 4096;
81static int ip6_rt_gc_min_interval = HZ / 2;
82static int ip6_rt_gc_timeout = 60*HZ;
83int ip6_rt_gc_interval = 30*HZ;
84static int ip6_rt_gc_elasticity = 9;
85static int ip6_rt_mtu_expires = 10*60*HZ;
86static int ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
87
88static struct rt6_info * ip6_rt_copy(struct rt6_info *ort);
89static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
90static struct dst_entry *ip6_negative_advice(struct dst_entry *);
91static void ip6_dst_destroy(struct dst_entry *);
92static void ip6_dst_ifdown(struct dst_entry *,
93 struct net_device *dev, int how);
94static int ip6_dst_gc(void);
95
96static int ip6_pkt_discard(struct sk_buff *skb);
97static int ip6_pkt_discard_out(struct sk_buff *skb);
98static void ip6_link_failure(struct sk_buff *skb);
99static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
100
70ceb4f5
YH
101#ifdef CONFIG_IPV6_ROUTE_INFO
102static struct rt6_info *rt6_add_route_info(struct in6_addr *prefix, int prefixlen,
103 struct in6_addr *gwaddr, int ifindex,
104 unsigned pref);
105static struct rt6_info *rt6_get_route_info(struct in6_addr *prefix, int prefixlen,
106 struct in6_addr *gwaddr, int ifindex);
107#endif
108
1da177e4
LT
109static struct dst_ops ip6_dst_ops = {
110 .family = AF_INET6,
111 .protocol = __constant_htons(ETH_P_IPV6),
112 .gc = ip6_dst_gc,
113 .gc_thresh = 1024,
114 .check = ip6_dst_check,
115 .destroy = ip6_dst_destroy,
116 .ifdown = ip6_dst_ifdown,
117 .negative_advice = ip6_negative_advice,
118 .link_failure = ip6_link_failure,
119 .update_pmtu = ip6_rt_update_pmtu,
120 .entry_size = sizeof(struct rt6_info),
121};
122
123struct rt6_info ip6_null_entry = {
124 .u = {
125 .dst = {
126 .__refcnt = ATOMIC_INIT(1),
127 .__use = 1,
128 .dev = &loopback_dev,
129 .obsolete = -1,
130 .error = -ENETUNREACH,
131 .metrics = { [RTAX_HOPLIMIT - 1] = 255, },
132 .input = ip6_pkt_discard,
133 .output = ip6_pkt_discard_out,
134 .ops = &ip6_dst_ops,
135 .path = (struct dst_entry*)&ip6_null_entry,
136 }
137 },
138 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
139 .rt6i_metric = ~(u32) 0,
140 .rt6i_ref = ATOMIC_INIT(1),
141};
142
101367c2
TG
143#ifdef CONFIG_IPV6_MULTIPLE_TABLES
144
145struct rt6_info ip6_prohibit_entry = {
146 .u = {
147 .dst = {
148 .__refcnt = ATOMIC_INIT(1),
149 .__use = 1,
150 .dev = &loopback_dev,
151 .obsolete = -1,
152 .error = -EACCES,
153 .metrics = { [RTAX_HOPLIMIT - 1] = 255, },
154 .input = ip6_pkt_discard,
155 .output = ip6_pkt_discard_out,
156 .ops = &ip6_dst_ops,
157 .path = (struct dst_entry*)&ip6_prohibit_entry,
158 }
159 },
160 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
161 .rt6i_metric = ~(u32) 0,
162 .rt6i_ref = ATOMIC_INIT(1),
163};
164
165struct rt6_info ip6_blk_hole_entry = {
166 .u = {
167 .dst = {
168 .__refcnt = ATOMIC_INIT(1),
169 .__use = 1,
170 .dev = &loopback_dev,
171 .obsolete = -1,
172 .error = -EINVAL,
173 .metrics = { [RTAX_HOPLIMIT - 1] = 255, },
174 .input = ip6_pkt_discard,
175 .output = ip6_pkt_discard_out,
176 .ops = &ip6_dst_ops,
177 .path = (struct dst_entry*)&ip6_blk_hole_entry,
178 }
179 },
180 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
181 .rt6i_metric = ~(u32) 0,
182 .rt6i_ref = ATOMIC_INIT(1),
183};
184
185#endif
186
1da177e4
LT
187/* allocate dst with ip6_dst_ops */
188static __inline__ struct rt6_info *ip6_dst_alloc(void)
189{
190 return (struct rt6_info *)dst_alloc(&ip6_dst_ops);
191}
192
193static void ip6_dst_destroy(struct dst_entry *dst)
194{
195 struct rt6_info *rt = (struct rt6_info *)dst;
196 struct inet6_dev *idev = rt->rt6i_idev;
197
198 if (idev != NULL) {
199 rt->rt6i_idev = NULL;
200 in6_dev_put(idev);
201 }
202}
203
204static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
205 int how)
206{
207 struct rt6_info *rt = (struct rt6_info *)dst;
208 struct inet6_dev *idev = rt->rt6i_idev;
209
210 if (dev != &loopback_dev && idev != NULL && idev->dev == dev) {
211 struct inet6_dev *loopback_idev = in6_dev_get(&loopback_dev);
212 if (loopback_idev != NULL) {
213 rt->rt6i_idev = loopback_idev;
214 in6_dev_put(idev);
215 }
216 }
217}
218
219static __inline__ int rt6_check_expired(const struct rt6_info *rt)
220{
221 return (rt->rt6i_flags & RTF_EXPIRES &&
222 time_after(jiffies, rt->rt6i_expires));
223}
224
c71099ac
TG
225static inline int rt6_need_strict(struct in6_addr *daddr)
226{
227 return (ipv6_addr_type(daddr) &
228 (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL));
229}
230
1da177e4 231/*
c71099ac 232 * Route lookup. Any table->tb6_lock is implied.
1da177e4
LT
233 */
234
235static __inline__ struct rt6_info *rt6_device_match(struct rt6_info *rt,
236 int oif,
237 int strict)
238{
239 struct rt6_info *local = NULL;
240 struct rt6_info *sprt;
241
242 if (oif) {
243 for (sprt = rt; sprt; sprt = sprt->u.next) {
244 struct net_device *dev = sprt->rt6i_dev;
245 if (dev->ifindex == oif)
246 return sprt;
247 if (dev->flags & IFF_LOOPBACK) {
248 if (sprt->rt6i_idev == NULL ||
249 sprt->rt6i_idev->dev->ifindex != oif) {
250 if (strict && oif)
251 continue;
252 if (local && (!oif ||
253 local->rt6i_idev->dev->ifindex == oif))
254 continue;
255 }
256 local = sprt;
257 }
258 }
259
260 if (local)
261 return local;
262
263 if (strict)
264 return &ip6_null_entry;
265 }
266 return rt;
267}
268
27097255
YH
269#ifdef CONFIG_IPV6_ROUTER_PREF
270static void rt6_probe(struct rt6_info *rt)
271{
272 struct neighbour *neigh = rt ? rt->rt6i_nexthop : NULL;
273 /*
274 * Okay, this does not seem to be appropriate
275 * for now, however, we need to check if it
276 * is really so; aka Router Reachability Probing.
277 *
278 * Router Reachability Probe MUST be rate-limited
279 * to no more than one per minute.
280 */
281 if (!neigh || (neigh->nud_state & NUD_VALID))
282 return;
283 read_lock_bh(&neigh->lock);
284 if (!(neigh->nud_state & NUD_VALID) &&
52e16356 285 time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
27097255
YH
286 struct in6_addr mcaddr;
287 struct in6_addr *target;
288
289 neigh->updated = jiffies;
290 read_unlock_bh(&neigh->lock);
291
292 target = (struct in6_addr *)&neigh->primary_key;
293 addrconf_addr_solict_mult(target, &mcaddr);
294 ndisc_send_ns(rt->rt6i_dev, NULL, target, &mcaddr, NULL);
295 } else
296 read_unlock_bh(&neigh->lock);
297}
298#else
299static inline void rt6_probe(struct rt6_info *rt)
300{
301 return;
302}
303#endif
304
1da177e4 305/*
554cfb7e 306 * Default Router Selection (RFC 2461 6.3.6)
1da177e4 307 */
554cfb7e
YH
308static int inline rt6_check_dev(struct rt6_info *rt, int oif)
309{
310 struct net_device *dev = rt->rt6i_dev;
311 if (!oif || dev->ifindex == oif)
312 return 2;
313 if ((dev->flags & IFF_LOOPBACK) &&
314 rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
315 return 1;
316 return 0;
317}
1da177e4 318
554cfb7e 319static int inline rt6_check_neigh(struct rt6_info *rt)
1da177e4 320{
554cfb7e
YH
321 struct neighbour *neigh = rt->rt6i_nexthop;
322 int m = 0;
4d0c5911
YH
323 if (rt->rt6i_flags & RTF_NONEXTHOP ||
324 !(rt->rt6i_flags & RTF_GATEWAY))
325 m = 1;
326 else if (neigh) {
554cfb7e
YH
327 read_lock_bh(&neigh->lock);
328 if (neigh->nud_state & NUD_VALID)
4d0c5911 329 m = 2;
554cfb7e 330 read_unlock_bh(&neigh->lock);
1da177e4 331 }
554cfb7e 332 return m;
1da177e4
LT
333}
334
554cfb7e
YH
335static int rt6_score_route(struct rt6_info *rt, int oif,
336 int strict)
1da177e4 337{
4d0c5911
YH
338 int m, n;
339
340 m = rt6_check_dev(rt, oif);
554cfb7e
YH
341 if (!m && (strict & RT6_SELECT_F_IFACE))
342 return -1;
ebacaaa0
YH
343#ifdef CONFIG_IPV6_ROUTER_PREF
344 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
345#endif
4d0c5911
YH
346 n = rt6_check_neigh(rt);
347 if (n > 1)
ebacaaa0 348 m |= 16;
4d0c5911 349 else if (!n && strict & RT6_SELECT_F_REACHABLE)
554cfb7e
YH
350 return -1;
351 return m;
352}
353
354static struct rt6_info *rt6_select(struct rt6_info **head, int oif,
355 int strict)
356{
357 struct rt6_info *match = NULL, *last = NULL;
358 struct rt6_info *rt, *rt0 = *head;
359 u32 metric;
360 int mpri = -1;
1da177e4 361
554cfb7e
YH
362 RT6_TRACE("%s(head=%p(*head=%p), oif=%d)\n",
363 __FUNCTION__, head, head ? *head : NULL, oif);
1da177e4 364
554cfb7e 365 for (rt = rt0, metric = rt0->rt6i_metric;
c302e6d5 366 rt && rt->rt6i_metric == metric && (!last || rt != rt0);
554cfb7e
YH
367 rt = rt->u.next) {
368 int m;
1da177e4 369
554cfb7e 370 if (rt6_check_expired(rt))
1da177e4
LT
371 continue;
372
554cfb7e
YH
373 last = rt;
374
375 m = rt6_score_route(rt, oif, strict);
376 if (m < 0)
1da177e4 377 continue;
1da177e4 378
554cfb7e 379 if (m > mpri) {
27097255 380 rt6_probe(match);
554cfb7e 381 match = rt;
1da177e4 382 mpri = m;
27097255
YH
383 } else {
384 rt6_probe(rt);
1da177e4
LT
385 }
386 }
387
554cfb7e
YH
388 if (!match &&
389 (strict & RT6_SELECT_F_REACHABLE) &&
390 last && last != rt0) {
391 /* no entries matched; do round-robin */
34af946a 392 static DEFINE_SPINLOCK(lock);
c302e6d5 393 spin_lock(&lock);
554cfb7e
YH
394 *head = rt0->u.next;
395 rt0->u.next = last->u.next;
396 last->u.next = rt0;
c302e6d5 397 spin_unlock(&lock);
1da177e4 398 }
1da177e4 399
554cfb7e
YH
400 RT6_TRACE("%s() => %p, score=%d\n",
401 __FUNCTION__, match, mpri);
1da177e4 402
554cfb7e 403 return (match ? match : &ip6_null_entry);
1da177e4
LT
404}
405
70ceb4f5
YH
406#ifdef CONFIG_IPV6_ROUTE_INFO
407int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
408 struct in6_addr *gwaddr)
409{
410 struct route_info *rinfo = (struct route_info *) opt;
411 struct in6_addr prefix_buf, *prefix;
412 unsigned int pref;
413 u32 lifetime;
414 struct rt6_info *rt;
415
416 if (len < sizeof(struct route_info)) {
417 return -EINVAL;
418 }
419
420 /* Sanity check for prefix_len and length */
421 if (rinfo->length > 3) {
422 return -EINVAL;
423 } else if (rinfo->prefix_len > 128) {
424 return -EINVAL;
425 } else if (rinfo->prefix_len > 64) {
426 if (rinfo->length < 2) {
427 return -EINVAL;
428 }
429 } else if (rinfo->prefix_len > 0) {
430 if (rinfo->length < 1) {
431 return -EINVAL;
432 }
433 }
434
435 pref = rinfo->route_pref;
436 if (pref == ICMPV6_ROUTER_PREF_INVALID)
437 pref = ICMPV6_ROUTER_PREF_MEDIUM;
438
439 lifetime = htonl(rinfo->lifetime);
440 if (lifetime == 0xffffffff) {
441 /* infinity */
442 } else if (lifetime > 0x7fffffff/HZ) {
443 /* Avoid arithmetic overflow */
444 lifetime = 0x7fffffff/HZ - 1;
445 }
446
447 if (rinfo->length == 3)
448 prefix = (struct in6_addr *)rinfo->prefix;
449 else {
450 /* this function is safe */
451 ipv6_addr_prefix(&prefix_buf,
452 (struct in6_addr *)rinfo->prefix,
453 rinfo->prefix_len);
454 prefix = &prefix_buf;
455 }
456
457 rt = rt6_get_route_info(prefix, rinfo->prefix_len, gwaddr, dev->ifindex);
458
459 if (rt && !lifetime) {
e0a1ad73 460 ip6_del_rt(rt);
70ceb4f5
YH
461 rt = NULL;
462 }
463
464 if (!rt && lifetime)
465 rt = rt6_add_route_info(prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
466 pref);
467 else if (rt)
468 rt->rt6i_flags = RTF_ROUTEINFO |
469 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
470
471 if (rt) {
472 if (lifetime == 0xffffffff) {
473 rt->rt6i_flags &= ~RTF_EXPIRES;
474 } else {
475 rt->rt6i_expires = jiffies + HZ * lifetime;
476 rt->rt6i_flags |= RTF_EXPIRES;
477 }
478 dst_release(&rt->u.dst);
479 }
480 return 0;
481}
482#endif
483
982f56f3
YH
484#define BACKTRACK(saddr) \
485do { \
486 if (rt == &ip6_null_entry) { \
487 struct fib6_node *pn; \
488 while (fn) { \
489 if (fn->fn_flags & RTN_TL_ROOT) \
490 goto out; \
491 pn = fn->parent; \
492 if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
493 fn = fib6_lookup(pn->subtree, NULL, saddr); \
494 else \
495 fn = pn; \
496 if (fn->fn_flags & RTN_RTINFO) \
497 goto restart; \
c71099ac 498 } \
c71099ac 499 } \
982f56f3 500} while(0)
c71099ac
TG
501
502static struct rt6_info *ip6_pol_route_lookup(struct fib6_table *table,
503 struct flowi *fl, int flags)
1da177e4
LT
504{
505 struct fib6_node *fn;
506 struct rt6_info *rt;
507
c71099ac
TG
508 read_lock_bh(&table->tb6_lock);
509 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
510restart:
511 rt = fn->leaf;
512 rt = rt6_device_match(rt, fl->oif, flags & RT6_F_STRICT);
982f56f3 513 BACKTRACK(&fl->fl6_src);
1da177e4 514 dst_hold(&rt->u.dst);
c71099ac
TG
515out:
516 read_unlock_bh(&table->tb6_lock);
1da177e4
LT
517
518 rt->u.dst.lastuse = jiffies;
c71099ac
TG
519 rt->u.dst.__use++;
520
521 return rt;
522
523}
524
525struct rt6_info *rt6_lookup(struct in6_addr *daddr, struct in6_addr *saddr,
526 int oif, int strict)
527{
528 struct flowi fl = {
529 .oif = oif,
530 .nl_u = {
531 .ip6_u = {
532 .daddr = *daddr,
533 /* TODO: saddr */
534 },
535 },
536 };
537 struct dst_entry *dst;
538 int flags = strict ? RT6_F_STRICT : 0;
539
540 dst = fib6_rule_lookup(&fl, flags, ip6_pol_route_lookup);
541 if (dst->error == 0)
542 return (struct rt6_info *) dst;
543
544 dst_release(dst);
545
1da177e4
LT
546 return NULL;
547}
548
c71099ac 549/* ip6_ins_rt is called with FREE table->tb6_lock.
1da177e4
LT
550 It takes new route entry, the addition fails by any reason the
551 route is freed. In any case, if caller does not hold it, it may
552 be destroyed.
553 */
554
86872cb5 555static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
1da177e4
LT
556{
557 int err;
c71099ac 558 struct fib6_table *table;
1da177e4 559
c71099ac
TG
560 table = rt->rt6i_table;
561 write_lock_bh(&table->tb6_lock);
86872cb5 562 err = fib6_add(&table->tb6_root, rt, info);
c71099ac 563 write_unlock_bh(&table->tb6_lock);
1da177e4
LT
564
565 return err;
566}
567
40e22e8f
TG
568int ip6_ins_rt(struct rt6_info *rt)
569{
86872cb5 570 return __ip6_ins_rt(rt, NULL);
40e22e8f
TG
571}
572
95a9a5ba
YH
573static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, struct in6_addr *daddr,
574 struct in6_addr *saddr)
1da177e4 575{
1da177e4
LT
576 struct rt6_info *rt;
577
578 /*
579 * Clone the route.
580 */
581
582 rt = ip6_rt_copy(ort);
583
584 if (rt) {
58c4fb86
YH
585 if (!(rt->rt6i_flags&RTF_GATEWAY)) {
586 if (rt->rt6i_dst.plen != 128 &&
587 ipv6_addr_equal(&rt->rt6i_dst.addr, daddr))
588 rt->rt6i_flags |= RTF_ANYCAST;
1da177e4 589 ipv6_addr_copy(&rt->rt6i_gateway, daddr);
58c4fb86 590 }
1da177e4 591
58c4fb86 592 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
1da177e4
LT
593 rt->rt6i_dst.plen = 128;
594 rt->rt6i_flags |= RTF_CACHE;
595 rt->u.dst.flags |= DST_HOST;
596
597#ifdef CONFIG_IPV6_SUBTREES
598 if (rt->rt6i_src.plen && saddr) {
599 ipv6_addr_copy(&rt->rt6i_src.addr, saddr);
600 rt->rt6i_src.plen = 128;
601 }
602#endif
603
604 rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
605
95a9a5ba 606 }
1da177e4 607
95a9a5ba
YH
608 return rt;
609}
1da177e4 610
299d9939
YH
611static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort, struct in6_addr *daddr)
612{
613 struct rt6_info *rt = ip6_rt_copy(ort);
614 if (rt) {
615 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
616 rt->rt6i_dst.plen = 128;
617 rt->rt6i_flags |= RTF_CACHE;
618 if (rt->rt6i_flags & RTF_REJECT)
619 rt->u.dst.error = ort->u.dst.error;
620 rt->u.dst.flags |= DST_HOST;
621 rt->rt6i_nexthop = neigh_clone(ort->rt6i_nexthop);
622 }
623 return rt;
624}
625
8ce11e6a
AB
626static struct rt6_info *ip6_pol_route_input(struct fib6_table *table,
627 struct flowi *fl, int flags)
1da177e4
LT
628{
629 struct fib6_node *fn;
519fbd87 630 struct rt6_info *rt, *nrt;
c71099ac 631 int strict = 0;
1da177e4 632 int attempts = 3;
519fbd87 633 int err;
8238dd06 634 int reachable = RT6_SELECT_F_REACHABLE;
1da177e4 635
c71099ac
TG
636 if (flags & RT6_F_STRICT)
637 strict = RT6_SELECT_F_IFACE;
1da177e4
LT
638
639relookup:
c71099ac 640 read_lock_bh(&table->tb6_lock);
1da177e4 641
8238dd06 642restart_2:
c71099ac 643 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
1da177e4
LT
644
645restart:
c71099ac 646 rt = rt6_select(&fn->leaf, fl->iif, strict | reachable);
982f56f3 647 BACKTRACK(&fl->fl6_src);
8238dd06
YH
648 if (rt == &ip6_null_entry ||
649 rt->rt6i_flags & RTF_CACHE)
1ddef044 650 goto out;
1da177e4 651
fb9de91e 652 dst_hold(&rt->u.dst);
c71099ac 653 read_unlock_bh(&table->tb6_lock);
fb9de91e 654
519fbd87 655 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
c71099ac 656 nrt = rt6_alloc_cow(rt, &fl->fl6_dst, &fl->fl6_src);
519fbd87
YH
657 else {
658#if CLONE_OFFLINK_ROUTE
c71099ac 659 nrt = rt6_alloc_clone(rt, &fl->fl6_dst);
519fbd87
YH
660#else
661 goto out2;
662#endif
663 }
e40cf353 664
519fbd87
YH
665 dst_release(&rt->u.dst);
666 rt = nrt ? : &ip6_null_entry;
1da177e4 667
519fbd87
YH
668 dst_hold(&rt->u.dst);
669 if (nrt) {
40e22e8f 670 err = ip6_ins_rt(nrt);
519fbd87 671 if (!err)
1da177e4 672 goto out2;
1da177e4 673 }
1da177e4 674
519fbd87
YH
675 if (--attempts <= 0)
676 goto out2;
677
678 /*
c71099ac 679 * Race condition! In the gap, when table->tb6_lock was
519fbd87
YH
680 * released someone could insert this route. Relookup.
681 */
682 dst_release(&rt->u.dst);
683 goto relookup;
684
685out:
8238dd06
YH
686 if (reachable) {
687 reachable = 0;
688 goto restart_2;
689 }
519fbd87 690 dst_hold(&rt->u.dst);
c71099ac 691 read_unlock_bh(&table->tb6_lock);
1da177e4
LT
692out2:
693 rt->u.dst.lastuse = jiffies;
694 rt->u.dst.__use++;
c71099ac
TG
695
696 return rt;
1da177e4
LT
697}
698
c71099ac
TG
699void ip6_route_input(struct sk_buff *skb)
700{
701 struct ipv6hdr *iph = skb->nh.ipv6h;
702 struct flowi fl = {
703 .iif = skb->dev->ifindex,
704 .nl_u = {
705 .ip6_u = {
706 .daddr = iph->daddr,
707 .saddr = iph->saddr,
708 .flowlabel = (* (u32 *) iph)&IPV6_FLOWINFO_MASK,
709 },
710 },
711 .proto = iph->nexthdr,
712 };
713 int flags = 0;
714
715 if (rt6_need_strict(&iph->daddr))
716 flags |= RT6_F_STRICT;
717
718 skb->dst = fib6_rule_lookup(&fl, flags, ip6_pol_route_input);
719}
720
721static struct rt6_info *ip6_pol_route_output(struct fib6_table *table,
722 struct flowi *fl, int flags)
1da177e4
LT
723{
724 struct fib6_node *fn;
519fbd87 725 struct rt6_info *rt, *nrt;
c71099ac 726 int strict = 0;
1da177e4 727 int attempts = 3;
519fbd87 728 int err;
8238dd06 729 int reachable = RT6_SELECT_F_REACHABLE;
1da177e4 730
c71099ac
TG
731 if (flags & RT6_F_STRICT)
732 strict = RT6_SELECT_F_IFACE;
1da177e4
LT
733
734relookup:
c71099ac 735 read_lock_bh(&table->tb6_lock);
1da177e4 736
8238dd06 737restart_2:
c71099ac 738 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
1da177e4
LT
739
740restart:
8238dd06 741 rt = rt6_select(&fn->leaf, fl->oif, strict | reachable);
982f56f3 742 BACKTRACK(&fl->fl6_src);
8238dd06
YH
743 if (rt == &ip6_null_entry ||
744 rt->rt6i_flags & RTF_CACHE)
1da177e4 745 goto out;
1da177e4 746
fb9de91e 747 dst_hold(&rt->u.dst);
c71099ac 748 read_unlock_bh(&table->tb6_lock);
fb9de91e 749
519fbd87 750 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
e40cf353 751 nrt = rt6_alloc_cow(rt, &fl->fl6_dst, &fl->fl6_src);
519fbd87
YH
752 else {
753#if CLONE_OFFLINK_ROUTE
754 nrt = rt6_alloc_clone(rt, &fl->fl6_dst);
755#else
756 goto out2;
757#endif
758 }
1da177e4 759
519fbd87
YH
760 dst_release(&rt->u.dst);
761 rt = nrt ? : &ip6_null_entry;
1da177e4 762
519fbd87
YH
763 dst_hold(&rt->u.dst);
764 if (nrt) {
40e22e8f 765 err = ip6_ins_rt(nrt);
519fbd87 766 if (!err)
1da177e4 767 goto out2;
1da177e4 768 }
e40cf353 769
519fbd87
YH
770 if (--attempts <= 0)
771 goto out2;
772
773 /*
c71099ac 774 * Race condition! In the gap, when table->tb6_lock was
519fbd87
YH
775 * released someone could insert this route. Relookup.
776 */
777 dst_release(&rt->u.dst);
778 goto relookup;
779
780out:
8238dd06
YH
781 if (reachable) {
782 reachable = 0;
783 goto restart_2;
784 }
519fbd87 785 dst_hold(&rt->u.dst);
c71099ac 786 read_unlock_bh(&table->tb6_lock);
1da177e4
LT
787out2:
788 rt->u.dst.lastuse = jiffies;
789 rt->u.dst.__use++;
c71099ac
TG
790 return rt;
791}
792
793struct dst_entry * ip6_route_output(struct sock *sk, struct flowi *fl)
794{
795 int flags = 0;
796
797 if (rt6_need_strict(&fl->fl6_dst))
798 flags |= RT6_F_STRICT;
799
800 return fib6_rule_lookup(fl, flags, ip6_pol_route_output);
1da177e4
LT
801}
802
803
804/*
805 * Destination cache support functions
806 */
807
808static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
809{
810 struct rt6_info *rt;
811
812 rt = (struct rt6_info *) dst;
813
814 if (rt && rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie))
815 return dst;
816
817 return NULL;
818}
819
820static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
821{
822 struct rt6_info *rt = (struct rt6_info *) dst;
823
824 if (rt) {
825 if (rt->rt6i_flags & RTF_CACHE)
e0a1ad73 826 ip6_del_rt(rt);
1da177e4
LT
827 else
828 dst_release(dst);
829 }
830 return NULL;
831}
832
833static void ip6_link_failure(struct sk_buff *skb)
834{
835 struct rt6_info *rt;
836
837 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0, skb->dev);
838
839 rt = (struct rt6_info *) skb->dst;
840 if (rt) {
841 if (rt->rt6i_flags&RTF_CACHE) {
842 dst_set_expires(&rt->u.dst, 0);
843 rt->rt6i_flags |= RTF_EXPIRES;
844 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
845 rt->rt6i_node->fn_sernum = -1;
846 }
847}
848
849static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
850{
851 struct rt6_info *rt6 = (struct rt6_info*)dst;
852
853 if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
854 rt6->rt6i_flags |= RTF_MODIFIED;
855 if (mtu < IPV6_MIN_MTU) {
856 mtu = IPV6_MIN_MTU;
857 dst->metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
858 }
859 dst->metrics[RTAX_MTU-1] = mtu;
8d71740c 860 call_netevent_notifiers(NETEVENT_PMTU_UPDATE, dst);
1da177e4
LT
861 }
862}
863
1da177e4
LT
864static int ipv6_get_mtu(struct net_device *dev);
865
866static inline unsigned int ipv6_advmss(unsigned int mtu)
867{
868 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
869
870 if (mtu < ip6_rt_min_advmss)
871 mtu = ip6_rt_min_advmss;
872
873 /*
874 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
875 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
876 * IPV6_MAXPLEN is also valid and means: "any MSS,
877 * rely only on pmtu discovery"
878 */
879 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
880 mtu = IPV6_MAXPLEN;
881 return mtu;
882}
883
5d0bbeeb 884static struct dst_entry *ndisc_dst_gc_list;
8ce11e6a 885static DEFINE_SPINLOCK(ndisc_lock);
5d0bbeeb 886
1da177e4
LT
887struct dst_entry *ndisc_dst_alloc(struct net_device *dev,
888 struct neighbour *neigh,
889 struct in6_addr *addr,
890 int (*output)(struct sk_buff *))
891{
892 struct rt6_info *rt;
893 struct inet6_dev *idev = in6_dev_get(dev);
894
895 if (unlikely(idev == NULL))
896 return NULL;
897
898 rt = ip6_dst_alloc();
899 if (unlikely(rt == NULL)) {
900 in6_dev_put(idev);
901 goto out;
902 }
903
904 dev_hold(dev);
905 if (neigh)
906 neigh_hold(neigh);
907 else
908 neigh = ndisc_get_neigh(dev, addr);
909
910 rt->rt6i_dev = dev;
911 rt->rt6i_idev = idev;
912 rt->rt6i_nexthop = neigh;
913 atomic_set(&rt->u.dst.__refcnt, 1);
914 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = 255;
915 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
916 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
917 rt->u.dst.output = output;
918
919#if 0 /* there's no chance to use these for ndisc */
920 rt->u.dst.flags = ipv6_addr_type(addr) & IPV6_ADDR_UNICAST
921 ? DST_HOST
922 : 0;
923 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
924 rt->rt6i_dst.plen = 128;
925#endif
926
5d0bbeeb 927 spin_lock_bh(&ndisc_lock);
1da177e4
LT
928 rt->u.dst.next = ndisc_dst_gc_list;
929 ndisc_dst_gc_list = &rt->u.dst;
5d0bbeeb 930 spin_unlock_bh(&ndisc_lock);
1da177e4
LT
931
932 fib6_force_start_gc();
933
934out:
935 return (struct dst_entry *)rt;
936}
937
938int ndisc_dst_gc(int *more)
939{
940 struct dst_entry *dst, *next, **pprev;
941 int freed;
942
943 next = NULL;
5d0bbeeb
TG
944 freed = 0;
945
946 spin_lock_bh(&ndisc_lock);
1da177e4 947 pprev = &ndisc_dst_gc_list;
5d0bbeeb 948
1da177e4
LT
949 while ((dst = *pprev) != NULL) {
950 if (!atomic_read(&dst->__refcnt)) {
951 *pprev = dst->next;
952 dst_free(dst);
953 freed++;
954 } else {
955 pprev = &dst->next;
956 (*more)++;
957 }
958 }
959
5d0bbeeb
TG
960 spin_unlock_bh(&ndisc_lock);
961
1da177e4
LT
962 return freed;
963}
964
965static int ip6_dst_gc(void)
966{
967 static unsigned expire = 30*HZ;
968 static unsigned long last_gc;
969 unsigned long now = jiffies;
970
971 if (time_after(last_gc + ip6_rt_gc_min_interval, now) &&
972 atomic_read(&ip6_dst_ops.entries) <= ip6_rt_max_size)
973 goto out;
974
975 expire++;
976 fib6_run_gc(expire);
977 last_gc = now;
978 if (atomic_read(&ip6_dst_ops.entries) < ip6_dst_ops.gc_thresh)
979 expire = ip6_rt_gc_timeout>>1;
980
981out:
982 expire -= expire>>ip6_rt_gc_elasticity;
983 return (atomic_read(&ip6_dst_ops.entries) > ip6_rt_max_size);
984}
985
986/* Clean host part of a prefix. Not necessary in radix tree,
987 but results in cleaner routing tables.
988
989 Remove it only when all the things will work!
990 */
991
992static int ipv6_get_mtu(struct net_device *dev)
993{
994 int mtu = IPV6_MIN_MTU;
995 struct inet6_dev *idev;
996
997 idev = in6_dev_get(dev);
998 if (idev) {
999 mtu = idev->cnf.mtu6;
1000 in6_dev_put(idev);
1001 }
1002 return mtu;
1003}
1004
1005int ipv6_get_hoplimit(struct net_device *dev)
1006{
1007 int hoplimit = ipv6_devconf.hop_limit;
1008 struct inet6_dev *idev;
1009
1010 idev = in6_dev_get(dev);
1011 if (idev) {
1012 hoplimit = idev->cnf.hop_limit;
1013 in6_dev_put(idev);
1014 }
1015 return hoplimit;
1016}
1017
1018/*
1019 *
1020 */
1021
86872cb5 1022int ip6_route_add(struct fib6_config *cfg)
1da177e4
LT
1023{
1024 int err;
1da177e4
LT
1025 struct rt6_info *rt = NULL;
1026 struct net_device *dev = NULL;
1027 struct inet6_dev *idev = NULL;
c71099ac 1028 struct fib6_table *table;
1da177e4
LT
1029 int addr_type;
1030
86872cb5 1031 if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1da177e4
LT
1032 return -EINVAL;
1033#ifndef CONFIG_IPV6_SUBTREES
86872cb5 1034 if (cfg->fc_src_len)
1da177e4
LT
1035 return -EINVAL;
1036#endif
86872cb5 1037 if (cfg->fc_ifindex) {
1da177e4 1038 err = -ENODEV;
86872cb5 1039 dev = dev_get_by_index(cfg->fc_ifindex);
1da177e4
LT
1040 if (!dev)
1041 goto out;
1042 idev = in6_dev_get(dev);
1043 if (!idev)
1044 goto out;
1045 }
1046
86872cb5
TG
1047 if (cfg->fc_metric == 0)
1048 cfg->fc_metric = IP6_RT_PRIO_USER;
1da177e4 1049
86872cb5 1050 table = fib6_new_table(cfg->fc_table);
c71099ac
TG
1051 if (table == NULL) {
1052 err = -ENOBUFS;
1053 goto out;
1054 }
1055
1da177e4
LT
1056 rt = ip6_dst_alloc();
1057
1058 if (rt == NULL) {
1059 err = -ENOMEM;
1060 goto out;
1061 }
1062
1063 rt->u.dst.obsolete = -1;
86872cb5 1064 rt->rt6i_expires = jiffies + clock_t_to_jiffies(cfg->fc_expires);
1da177e4 1065
86872cb5
TG
1066 if (cfg->fc_protocol == RTPROT_UNSPEC)
1067 cfg->fc_protocol = RTPROT_BOOT;
1068 rt->rt6i_protocol = cfg->fc_protocol;
1069
1070 addr_type = ipv6_addr_type(&cfg->fc_dst);
1da177e4
LT
1071
1072 if (addr_type & IPV6_ADDR_MULTICAST)
1073 rt->u.dst.input = ip6_mc_input;
1074 else
1075 rt->u.dst.input = ip6_forward;
1076
1077 rt->u.dst.output = ip6_output;
1078
86872cb5
TG
1079 ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1080 rt->rt6i_dst.plen = cfg->fc_dst_len;
1da177e4
LT
1081 if (rt->rt6i_dst.plen == 128)
1082 rt->u.dst.flags = DST_HOST;
1083
1084#ifdef CONFIG_IPV6_SUBTREES
86872cb5
TG
1085 ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1086 rt->rt6i_src.plen = cfg->fc_src_len;
1da177e4
LT
1087#endif
1088
86872cb5 1089 rt->rt6i_metric = cfg->fc_metric;
1da177e4
LT
1090
1091 /* We cannot add true routes via loopback here,
1092 they would result in kernel looping; promote them to reject routes
1093 */
86872cb5 1094 if ((cfg->fc_flags & RTF_REJECT) ||
1da177e4
LT
1095 (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK))) {
1096 /* hold loopback dev/idev if we haven't done so. */
1097 if (dev != &loopback_dev) {
1098 if (dev) {
1099 dev_put(dev);
1100 in6_dev_put(idev);
1101 }
1102 dev = &loopback_dev;
1103 dev_hold(dev);
1104 idev = in6_dev_get(dev);
1105 if (!idev) {
1106 err = -ENODEV;
1107 goto out;
1108 }
1109 }
1110 rt->u.dst.output = ip6_pkt_discard_out;
1111 rt->u.dst.input = ip6_pkt_discard;
1112 rt->u.dst.error = -ENETUNREACH;
1113 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1114 goto install_route;
1115 }
1116
86872cb5 1117 if (cfg->fc_flags & RTF_GATEWAY) {
1da177e4
LT
1118 struct in6_addr *gw_addr;
1119 int gwa_type;
1120
86872cb5
TG
1121 gw_addr = &cfg->fc_gateway;
1122 ipv6_addr_copy(&rt->rt6i_gateway, gw_addr);
1da177e4
LT
1123 gwa_type = ipv6_addr_type(gw_addr);
1124
1125 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1126 struct rt6_info *grt;
1127
1128 /* IPv6 strictly inhibits using not link-local
1129 addresses as nexthop address.
1130 Otherwise, router will not able to send redirects.
1131 It is very good, but in some (rare!) circumstances
1132 (SIT, PtP, NBMA NOARP links) it is handy to allow
1133 some exceptions. --ANK
1134 */
1135 err = -EINVAL;
1136 if (!(gwa_type&IPV6_ADDR_UNICAST))
1137 goto out;
1138
86872cb5 1139 grt = rt6_lookup(gw_addr, NULL, cfg->fc_ifindex, 1);
1da177e4
LT
1140
1141 err = -EHOSTUNREACH;
1142 if (grt == NULL)
1143 goto out;
1144 if (dev) {
1145 if (dev != grt->rt6i_dev) {
1146 dst_release(&grt->u.dst);
1147 goto out;
1148 }
1149 } else {
1150 dev = grt->rt6i_dev;
1151 idev = grt->rt6i_idev;
1152 dev_hold(dev);
1153 in6_dev_hold(grt->rt6i_idev);
1154 }
1155 if (!(grt->rt6i_flags&RTF_GATEWAY))
1156 err = 0;
1157 dst_release(&grt->u.dst);
1158
1159 if (err)
1160 goto out;
1161 }
1162 err = -EINVAL;
1163 if (dev == NULL || (dev->flags&IFF_LOOPBACK))
1164 goto out;
1165 }
1166
1167 err = -ENODEV;
1168 if (dev == NULL)
1169 goto out;
1170
86872cb5 1171 if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) {
1da177e4
LT
1172 rt->rt6i_nexthop = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev);
1173 if (IS_ERR(rt->rt6i_nexthop)) {
1174 err = PTR_ERR(rt->rt6i_nexthop);
1175 rt->rt6i_nexthop = NULL;
1176 goto out;
1177 }
1178 }
1179
86872cb5 1180 rt->rt6i_flags = cfg->fc_flags;
1da177e4
LT
1181
1182install_route:
86872cb5
TG
1183 if (cfg->fc_mx) {
1184 struct nlattr *nla;
1185 int remaining;
1186
1187 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
1188 int type = nla->nla_type;
1189
1190 if (type) {
1191 if (type > RTAX_MAX) {
1da177e4
LT
1192 err = -EINVAL;
1193 goto out;
1194 }
86872cb5
TG
1195
1196 rt->u.dst.metrics[type - 1] = nla_get_u32(nla);
1da177e4 1197 }
1da177e4
LT
1198 }
1199 }
1200
1201 if (rt->u.dst.metrics[RTAX_HOPLIMIT-1] == 0)
1202 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
1203 if (!rt->u.dst.metrics[RTAX_MTU-1])
1204 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(dev);
1205 if (!rt->u.dst.metrics[RTAX_ADVMSS-1])
1206 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
1207 rt->u.dst.dev = dev;
1208 rt->rt6i_idev = idev;
c71099ac 1209 rt->rt6i_table = table;
86872cb5 1210 return __ip6_ins_rt(rt, &cfg->fc_nlinfo);
1da177e4
LT
1211
1212out:
1213 if (dev)
1214 dev_put(dev);
1215 if (idev)
1216 in6_dev_put(idev);
1217 if (rt)
1218 dst_free((struct dst_entry *) rt);
1219 return err;
1220}
1221
86872cb5 1222static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
1da177e4
LT
1223{
1224 int err;
c71099ac 1225 struct fib6_table *table;
1da177e4 1226
6c813a72
PM
1227 if (rt == &ip6_null_entry)
1228 return -ENOENT;
1229
c71099ac
TG
1230 table = rt->rt6i_table;
1231 write_lock_bh(&table->tb6_lock);
1da177e4 1232
86872cb5 1233 err = fib6_del(rt, info);
1da177e4
LT
1234 dst_release(&rt->u.dst);
1235
c71099ac 1236 write_unlock_bh(&table->tb6_lock);
1da177e4
LT
1237
1238 return err;
1239}
1240
e0a1ad73
TG
1241int ip6_del_rt(struct rt6_info *rt)
1242{
86872cb5 1243 return __ip6_del_rt(rt, NULL);
e0a1ad73
TG
1244}
1245
86872cb5 1246static int ip6_route_del(struct fib6_config *cfg)
1da177e4 1247{
c71099ac 1248 struct fib6_table *table;
1da177e4
LT
1249 struct fib6_node *fn;
1250 struct rt6_info *rt;
1251 int err = -ESRCH;
1252
86872cb5 1253 table = fib6_get_table(cfg->fc_table);
c71099ac
TG
1254 if (table == NULL)
1255 return err;
1256
1257 read_lock_bh(&table->tb6_lock);
1da177e4 1258
c71099ac 1259 fn = fib6_locate(&table->tb6_root,
86872cb5
TG
1260 &cfg->fc_dst, cfg->fc_dst_len,
1261 &cfg->fc_src, cfg->fc_src_len);
1da177e4
LT
1262
1263 if (fn) {
1264 for (rt = fn->leaf; rt; rt = rt->u.next) {
86872cb5 1265 if (cfg->fc_ifindex &&
1da177e4 1266 (rt->rt6i_dev == NULL ||
86872cb5 1267 rt->rt6i_dev->ifindex != cfg->fc_ifindex))
1da177e4 1268 continue;
86872cb5
TG
1269 if (cfg->fc_flags & RTF_GATEWAY &&
1270 !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
1da177e4 1271 continue;
86872cb5 1272 if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
1da177e4
LT
1273 continue;
1274 dst_hold(&rt->u.dst);
c71099ac 1275 read_unlock_bh(&table->tb6_lock);
1da177e4 1276
86872cb5 1277 return __ip6_del_rt(rt, &cfg->fc_nlinfo);
1da177e4
LT
1278 }
1279 }
c71099ac 1280 read_unlock_bh(&table->tb6_lock);
1da177e4
LT
1281
1282 return err;
1283}
1284
1285/*
1286 * Handle redirects
1287 */
a6279458
YH
1288struct ip6rd_flowi {
1289 struct flowi fl;
1290 struct in6_addr gateway;
1291};
1292
1293static struct rt6_info *__ip6_route_redirect(struct fib6_table *table,
1294 struct flowi *fl,
1295 int flags)
1da177e4 1296{
a6279458
YH
1297 struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl;
1298 struct rt6_info *rt;
e843b9e1 1299 struct fib6_node *fn;
c71099ac 1300
1da177e4 1301 /*
e843b9e1
YH
1302 * Get the "current" route for this destination and
1303 * check if the redirect has come from approriate router.
1304 *
1305 * RFC 2461 specifies that redirects should only be
1306 * accepted if they come from the nexthop to the target.
1307 * Due to the way the routes are chosen, this notion
1308 * is a bit fuzzy and one might need to check all possible
1309 * routes.
1da177e4 1310 */
1da177e4 1311
c71099ac 1312 read_lock_bh(&table->tb6_lock);
a6279458 1313 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
e843b9e1
YH
1314restart:
1315 for (rt = fn->leaf; rt; rt = rt->u.next) {
1316 /*
1317 * Current route is on-link; redirect is always invalid.
1318 *
1319 * Seems, previous statement is not true. It could
1320 * be node, which looks for us as on-link (f.e. proxy ndisc)
1321 * But then router serving it might decide, that we should
1322 * know truth 8)8) --ANK (980726).
1323 */
1324 if (rt6_check_expired(rt))
1325 continue;
1326 if (!(rt->rt6i_flags & RTF_GATEWAY))
1327 continue;
a6279458 1328 if (fl->oif != rt->rt6i_dev->ifindex)
e843b9e1 1329 continue;
a6279458 1330 if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
e843b9e1
YH
1331 continue;
1332 break;
1333 }
a6279458 1334
cb15d9c2 1335 if (!rt)
a6279458 1336 rt = &ip6_null_entry;
cb15d9c2
YH
1337 BACKTRACK(&fl->fl6_src);
1338out:
a6279458
YH
1339 dst_hold(&rt->u.dst);
1340
c71099ac 1341 read_unlock_bh(&table->tb6_lock);
e843b9e1 1342
a6279458
YH
1343 return rt;
1344};
1345
1346static struct rt6_info *ip6_route_redirect(struct in6_addr *dest,
1347 struct in6_addr *src,
1348 struct in6_addr *gateway,
1349 struct net_device *dev)
1350{
1351 struct ip6rd_flowi rdfl = {
1352 .fl = {
1353 .oif = dev->ifindex,
1354 .nl_u = {
1355 .ip6_u = {
1356 .daddr = *dest,
1357 .saddr = *src,
1358 },
1359 },
1360 },
1361 .gateway = *gateway,
1362 };
1363 int flags = rt6_need_strict(dest) ? RT6_F_STRICT : 0;
1364
1365 return (struct rt6_info *)fib6_rule_lookup((struct flowi *)&rdfl, flags, __ip6_route_redirect);
1366}
1367
1368void rt6_redirect(struct in6_addr *dest, struct in6_addr *src,
1369 struct in6_addr *saddr,
1370 struct neighbour *neigh, u8 *lladdr, int on_link)
1371{
1372 struct rt6_info *rt, *nrt = NULL;
1373 struct netevent_redirect netevent;
1374
1375 rt = ip6_route_redirect(dest, src, saddr, neigh->dev);
1376
1377 if (rt == &ip6_null_entry) {
1da177e4
LT
1378 if (net_ratelimit())
1379 printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
1380 "for redirect target\n");
a6279458 1381 goto out;
1da177e4
LT
1382 }
1383
1da177e4
LT
1384 /*
1385 * We have finally decided to accept it.
1386 */
1387
1388 neigh_update(neigh, lladdr, NUD_STALE,
1389 NEIGH_UPDATE_F_WEAK_OVERRIDE|
1390 NEIGH_UPDATE_F_OVERRIDE|
1391 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1392 NEIGH_UPDATE_F_ISROUTER))
1393 );
1394
1395 /*
1396 * Redirect received -> path was valid.
1397 * Look, redirects are sent only in response to data packets,
1398 * so that this nexthop apparently is reachable. --ANK
1399 */
1400 dst_confirm(&rt->u.dst);
1401
1402 /* Duplicate redirect: silently ignore. */
1403 if (neigh == rt->u.dst.neighbour)
1404 goto out;
1405
1406 nrt = ip6_rt_copy(rt);
1407 if (nrt == NULL)
1408 goto out;
1409
1410 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1411 if (on_link)
1412 nrt->rt6i_flags &= ~RTF_GATEWAY;
1413
1414 ipv6_addr_copy(&nrt->rt6i_dst.addr, dest);
1415 nrt->rt6i_dst.plen = 128;
1416 nrt->u.dst.flags |= DST_HOST;
1417
1418 ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key);
1419 nrt->rt6i_nexthop = neigh_clone(neigh);
1420 /* Reset pmtu, it may be better */
1421 nrt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(neigh->dev);
1422 nrt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&nrt->u.dst));
1423
40e22e8f 1424 if (ip6_ins_rt(nrt))
1da177e4
LT
1425 goto out;
1426
8d71740c
TT
1427 netevent.old = &rt->u.dst;
1428 netevent.new = &nrt->u.dst;
1429 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1430
1da177e4 1431 if (rt->rt6i_flags&RTF_CACHE) {
e0a1ad73 1432 ip6_del_rt(rt);
1da177e4
LT
1433 return;
1434 }
1435
1436out:
1437 dst_release(&rt->u.dst);
1438 return;
1439}
1440
1441/*
1442 * Handle ICMP "packet too big" messages
1443 * i.e. Path MTU discovery
1444 */
1445
1446void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr,
1447 struct net_device *dev, u32 pmtu)
1448{
1449 struct rt6_info *rt, *nrt;
1450 int allfrag = 0;
1451
1452 rt = rt6_lookup(daddr, saddr, dev->ifindex, 0);
1453 if (rt == NULL)
1454 return;
1455
1456 if (pmtu >= dst_mtu(&rt->u.dst))
1457 goto out;
1458
1459 if (pmtu < IPV6_MIN_MTU) {
1460 /*
1461 * According to RFC2460, PMTU is set to the IPv6 Minimum Link
1462 * MTU (1280) and a fragment header should always be included
1463 * after a node receiving Too Big message reporting PMTU is
1464 * less than the IPv6 Minimum Link MTU.
1465 */
1466 pmtu = IPV6_MIN_MTU;
1467 allfrag = 1;
1468 }
1469
1470 /* New mtu received -> path was valid.
1471 They are sent only in response to data packets,
1472 so that this nexthop apparently is reachable. --ANK
1473 */
1474 dst_confirm(&rt->u.dst);
1475
1476 /* Host route. If it is static, it would be better
1477 not to override it, but add new one, so that
1478 when cache entry will expire old pmtu
1479 would return automatically.
1480 */
1481 if (rt->rt6i_flags & RTF_CACHE) {
1482 rt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1483 if (allfrag)
1484 rt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1485 dst_set_expires(&rt->u.dst, ip6_rt_mtu_expires);
1486 rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
1487 goto out;
1488 }
1489
1490 /* Network route.
1491 Two cases are possible:
1492 1. It is connected route. Action: COW
1493 2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1494 */
d5315b50 1495 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
a1e78363 1496 nrt = rt6_alloc_cow(rt, daddr, saddr);
d5315b50
YH
1497 else
1498 nrt = rt6_alloc_clone(rt, daddr);
a1e78363 1499
d5315b50 1500 if (nrt) {
a1e78363
YH
1501 nrt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1502 if (allfrag)
1503 nrt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1504
1505 /* According to RFC 1981, detecting PMTU increase shouldn't be
1506 * happened within 5 mins, the recommended timer is 10 mins.
1507 * Here this route expiration time is set to ip6_rt_mtu_expires
1508 * which is 10 mins. After 10 mins the decreased pmtu is expired
1509 * and detecting PMTU increase will be automatically happened.
1510 */
1511 dst_set_expires(&nrt->u.dst, ip6_rt_mtu_expires);
1512 nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
1513
40e22e8f 1514 ip6_ins_rt(nrt);
1da177e4 1515 }
1da177e4
LT
1516out:
1517 dst_release(&rt->u.dst);
1518}
1519
1520/*
1521 * Misc support functions
1522 */
1523
1524static struct rt6_info * ip6_rt_copy(struct rt6_info *ort)
1525{
1526 struct rt6_info *rt = ip6_dst_alloc();
1527
1528 if (rt) {
1529 rt->u.dst.input = ort->u.dst.input;
1530 rt->u.dst.output = ort->u.dst.output;
1531
1532 memcpy(rt->u.dst.metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32));
1533 rt->u.dst.dev = ort->u.dst.dev;
1534 if (rt->u.dst.dev)
1535 dev_hold(rt->u.dst.dev);
1536 rt->rt6i_idev = ort->rt6i_idev;
1537 if (rt->rt6i_idev)
1538 in6_dev_hold(rt->rt6i_idev);
1539 rt->u.dst.lastuse = jiffies;
1540 rt->rt6i_expires = 0;
1541
1542 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
1543 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
1544 rt->rt6i_metric = 0;
1545
1546 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1547#ifdef CONFIG_IPV6_SUBTREES
1548 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1549#endif
c71099ac 1550 rt->rt6i_table = ort->rt6i_table;
1da177e4
LT
1551 }
1552 return rt;
1553}
1554
70ceb4f5
YH
1555#ifdef CONFIG_IPV6_ROUTE_INFO
1556static struct rt6_info *rt6_get_route_info(struct in6_addr *prefix, int prefixlen,
1557 struct in6_addr *gwaddr, int ifindex)
1558{
1559 struct fib6_node *fn;
1560 struct rt6_info *rt = NULL;
c71099ac
TG
1561 struct fib6_table *table;
1562
1563 table = fib6_get_table(RT6_TABLE_INFO);
1564 if (table == NULL)
1565 return NULL;
70ceb4f5 1566
c71099ac
TG
1567 write_lock_bh(&table->tb6_lock);
1568 fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
70ceb4f5
YH
1569 if (!fn)
1570 goto out;
1571
1572 for (rt = fn->leaf; rt; rt = rt->u.next) {
1573 if (rt->rt6i_dev->ifindex != ifindex)
1574 continue;
1575 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1576 continue;
1577 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1578 continue;
1579 dst_hold(&rt->u.dst);
1580 break;
1581 }
1582out:
c71099ac 1583 write_unlock_bh(&table->tb6_lock);
70ceb4f5
YH
1584 return rt;
1585}
1586
1587static struct rt6_info *rt6_add_route_info(struct in6_addr *prefix, int prefixlen,
1588 struct in6_addr *gwaddr, int ifindex,
1589 unsigned pref)
1590{
86872cb5
TG
1591 struct fib6_config cfg = {
1592 .fc_table = RT6_TABLE_INFO,
1593 .fc_metric = 1024,
1594 .fc_ifindex = ifindex,
1595 .fc_dst_len = prefixlen,
1596 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
1597 RTF_UP | RTF_PREF(pref),
1598 };
1599
1600 ipv6_addr_copy(&cfg.fc_dst, prefix);
1601 ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
70ceb4f5 1602
e317da96
YH
1603 /* We should treat it as a default route if prefix length is 0. */
1604 if (!prefixlen)
86872cb5 1605 cfg.fc_flags |= RTF_DEFAULT;
70ceb4f5 1606
86872cb5 1607 ip6_route_add(&cfg);
70ceb4f5
YH
1608
1609 return rt6_get_route_info(prefix, prefixlen, gwaddr, ifindex);
1610}
1611#endif
1612
1da177e4
LT
1613struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *dev)
1614{
1615 struct rt6_info *rt;
c71099ac 1616 struct fib6_table *table;
1da177e4 1617
c71099ac
TG
1618 table = fib6_get_table(RT6_TABLE_DFLT);
1619 if (table == NULL)
1620 return NULL;
1da177e4 1621
c71099ac
TG
1622 write_lock_bh(&table->tb6_lock);
1623 for (rt = table->tb6_root.leaf; rt; rt=rt->u.next) {
1da177e4 1624 if (dev == rt->rt6i_dev &&
045927ff 1625 ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
1da177e4
LT
1626 ipv6_addr_equal(&rt->rt6i_gateway, addr))
1627 break;
1628 }
1629 if (rt)
1630 dst_hold(&rt->u.dst);
c71099ac 1631 write_unlock_bh(&table->tb6_lock);
1da177e4
LT
1632 return rt;
1633}
1634
1635struct rt6_info *rt6_add_dflt_router(struct in6_addr *gwaddr,
ebacaaa0
YH
1636 struct net_device *dev,
1637 unsigned int pref)
1da177e4 1638{
86872cb5
TG
1639 struct fib6_config cfg = {
1640 .fc_table = RT6_TABLE_DFLT,
1641 .fc_metric = 1024,
1642 .fc_ifindex = dev->ifindex,
1643 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
1644 RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
1645 };
1da177e4 1646
86872cb5 1647 ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
1da177e4 1648
86872cb5 1649 ip6_route_add(&cfg);
1da177e4 1650
1da177e4
LT
1651 return rt6_get_dflt_router(gwaddr, dev);
1652}
1653
1654void rt6_purge_dflt_routers(void)
1655{
1656 struct rt6_info *rt;
c71099ac
TG
1657 struct fib6_table *table;
1658
1659 /* NOTE: Keep consistent with rt6_get_dflt_router */
1660 table = fib6_get_table(RT6_TABLE_DFLT);
1661 if (table == NULL)
1662 return;
1da177e4
LT
1663
1664restart:
c71099ac
TG
1665 read_lock_bh(&table->tb6_lock);
1666 for (rt = table->tb6_root.leaf; rt; rt = rt->u.next) {
1da177e4
LT
1667 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
1668 dst_hold(&rt->u.dst);
c71099ac 1669 read_unlock_bh(&table->tb6_lock);
e0a1ad73 1670 ip6_del_rt(rt);
1da177e4
LT
1671 goto restart;
1672 }
1673 }
c71099ac 1674 read_unlock_bh(&table->tb6_lock);
1da177e4
LT
1675}
1676
86872cb5
TG
1677static void rtmsg_to_fib6_config(struct in6_rtmsg *rtmsg,
1678 struct fib6_config *cfg)
1679{
1680 memset(cfg, 0, sizeof(*cfg));
1681
1682 cfg->fc_table = RT6_TABLE_MAIN;
1683 cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
1684 cfg->fc_metric = rtmsg->rtmsg_metric;
1685 cfg->fc_expires = rtmsg->rtmsg_info;
1686 cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
1687 cfg->fc_src_len = rtmsg->rtmsg_src_len;
1688 cfg->fc_flags = rtmsg->rtmsg_flags;
1689
1690 ipv6_addr_copy(&cfg->fc_dst, &rtmsg->rtmsg_dst);
1691 ipv6_addr_copy(&cfg->fc_src, &rtmsg->rtmsg_src);
1692 ipv6_addr_copy(&cfg->fc_gateway, &rtmsg->rtmsg_gateway);
1693}
1694
1da177e4
LT
1695int ipv6_route_ioctl(unsigned int cmd, void __user *arg)
1696{
86872cb5 1697 struct fib6_config cfg;
1da177e4
LT
1698 struct in6_rtmsg rtmsg;
1699 int err;
1700
1701 switch(cmd) {
1702 case SIOCADDRT: /* Add a route */
1703 case SIOCDELRT: /* Delete a route */
1704 if (!capable(CAP_NET_ADMIN))
1705 return -EPERM;
1706 err = copy_from_user(&rtmsg, arg,
1707 sizeof(struct in6_rtmsg));
1708 if (err)
1709 return -EFAULT;
86872cb5
TG
1710
1711 rtmsg_to_fib6_config(&rtmsg, &cfg);
1712
1da177e4
LT
1713 rtnl_lock();
1714 switch (cmd) {
1715 case SIOCADDRT:
86872cb5 1716 err = ip6_route_add(&cfg);
1da177e4
LT
1717 break;
1718 case SIOCDELRT:
86872cb5 1719 err = ip6_route_del(&cfg);
1da177e4
LT
1720 break;
1721 default:
1722 err = -EINVAL;
1723 }
1724 rtnl_unlock();
1725
1726 return err;
1727 };
1728
1729 return -EINVAL;
1730}
1731
1732/*
1733 * Drop the packet on the floor
1734 */
1735
20380731 1736static int ip6_pkt_discard(struct sk_buff *skb)
1da177e4 1737{
76d0cc1b
LL
1738 int type = ipv6_addr_type(&skb->nh.ipv6h->daddr);
1739 if (type == IPV6_ADDR_ANY || type == IPV6_ADDR_RESERVED)
1740 IP6_INC_STATS(IPSTATS_MIB_INADDRERRORS);
1741
1da177e4
LT
1742 IP6_INC_STATS(IPSTATS_MIB_OUTNOROUTES);
1743 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_NOROUTE, 0, skb->dev);
1744 kfree_skb(skb);
1745 return 0;
1746}
1747
20380731 1748static int ip6_pkt_discard_out(struct sk_buff *skb)
1da177e4
LT
1749{
1750 skb->dev = skb->dst->dev;
1751 return ip6_pkt_discard(skb);
1752}
1753
1754/*
1755 * Allocate a dst for local (unicast / anycast) address.
1756 */
1757
1758struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
1759 const struct in6_addr *addr,
1760 int anycast)
1761{
1762 struct rt6_info *rt = ip6_dst_alloc();
1763
1764 if (rt == NULL)
1765 return ERR_PTR(-ENOMEM);
1766
1767 dev_hold(&loopback_dev);
1768 in6_dev_hold(idev);
1769
1770 rt->u.dst.flags = DST_HOST;
1771 rt->u.dst.input = ip6_input;
1772 rt->u.dst.output = ip6_output;
1773 rt->rt6i_dev = &loopback_dev;
1774 rt->rt6i_idev = idev;
1775 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
1776 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
1777 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
1778 rt->u.dst.obsolete = -1;
1779
1780 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
58c4fb86
YH
1781 if (anycast)
1782 rt->rt6i_flags |= RTF_ANYCAST;
1783 else
1da177e4
LT
1784 rt->rt6i_flags |= RTF_LOCAL;
1785 rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
1786 if (rt->rt6i_nexthop == NULL) {
1787 dst_free((struct dst_entry *) rt);
1788 return ERR_PTR(-ENOMEM);
1789 }
1790
1791 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
1792 rt->rt6i_dst.plen = 128;
c71099ac 1793 rt->rt6i_table = fib6_get_table(RT6_TABLE_LOCAL);
1da177e4
LT
1794
1795 atomic_set(&rt->u.dst.__refcnt, 1);
1796
1797 return rt;
1798}
1799
1800static int fib6_ifdown(struct rt6_info *rt, void *arg)
1801{
1802 if (((void*)rt->rt6i_dev == arg || arg == NULL) &&
1803 rt != &ip6_null_entry) {
1804 RT6_TRACE("deleted by ifdown %p\n", rt);
1805 return -1;
1806 }
1807 return 0;
1808}
1809
1810void rt6_ifdown(struct net_device *dev)
1811{
c71099ac 1812 fib6_clean_all(fib6_ifdown, 0, dev);
1da177e4
LT
1813}
1814
1815struct rt6_mtu_change_arg
1816{
1817 struct net_device *dev;
1818 unsigned mtu;
1819};
1820
1821static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
1822{
1823 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
1824 struct inet6_dev *idev;
1825
1826 /* In IPv6 pmtu discovery is not optional,
1827 so that RTAX_MTU lock cannot disable it.
1828 We still use this lock to block changes
1829 caused by addrconf/ndisc.
1830 */
1831
1832 idev = __in6_dev_get(arg->dev);
1833 if (idev == NULL)
1834 return 0;
1835
1836 /* For administrative MTU increase, there is no way to discover
1837 IPv6 PMTU increase, so PMTU increase should be updated here.
1838 Since RFC 1981 doesn't include administrative MTU increase
1839 update PMTU increase is a MUST. (i.e. jumbo frame)
1840 */
1841 /*
1842 If new MTU is less than route PMTU, this new MTU will be the
1843 lowest MTU in the path, update the route PMTU to reflect PMTU
1844 decreases; if new MTU is greater than route PMTU, and the
1845 old MTU is the lowest MTU in the path, update the route PMTU
1846 to reflect the increase. In this case if the other nodes' MTU
1847 also have the lowest MTU, TOO BIG MESSAGE will be lead to
1848 PMTU discouvery.
1849 */
1850 if (rt->rt6i_dev == arg->dev &&
1851 !dst_metric_locked(&rt->u.dst, RTAX_MTU) &&
1852 (dst_mtu(&rt->u.dst) > arg->mtu ||
1853 (dst_mtu(&rt->u.dst) < arg->mtu &&
1854 dst_mtu(&rt->u.dst) == idev->cnf.mtu6)))
1855 rt->u.dst.metrics[RTAX_MTU-1] = arg->mtu;
1856 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(arg->mtu);
1857 return 0;
1858}
1859
1860void rt6_mtu_change(struct net_device *dev, unsigned mtu)
1861{
c71099ac
TG
1862 struct rt6_mtu_change_arg arg = {
1863 .dev = dev,
1864 .mtu = mtu,
1865 };
1da177e4 1866
c71099ac 1867 fib6_clean_all(rt6_mtu_change_route, 0, &arg);
1da177e4
LT
1868}
1869
86872cb5
TG
1870static struct nla_policy rtm_ipv6_policy[RTA_MAX+1] __read_mostly = {
1871 [RTA_GATEWAY] = { .minlen = sizeof(struct in6_addr) },
1872 [RTA_OIF] = { .type = NLA_U32 },
ab364a6f 1873 [RTA_IIF] = { .type = NLA_U32 },
86872cb5
TG
1874 [RTA_PRIORITY] = { .type = NLA_U32 },
1875 [RTA_METRICS] = { .type = NLA_NESTED },
1876};
1877
1878static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
1879 struct fib6_config *cfg)
1da177e4 1880{
86872cb5
TG
1881 struct rtmsg *rtm;
1882 struct nlattr *tb[RTA_MAX+1];
1883 int err;
1da177e4 1884
86872cb5
TG
1885 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
1886 if (err < 0)
1887 goto errout;
1da177e4 1888
86872cb5
TG
1889 err = -EINVAL;
1890 rtm = nlmsg_data(nlh);
1891 memset(cfg, 0, sizeof(*cfg));
1892
1893 cfg->fc_table = rtm->rtm_table;
1894 cfg->fc_dst_len = rtm->rtm_dst_len;
1895 cfg->fc_src_len = rtm->rtm_src_len;
1896 cfg->fc_flags = RTF_UP;
1897 cfg->fc_protocol = rtm->rtm_protocol;
1898
1899 if (rtm->rtm_type == RTN_UNREACHABLE)
1900 cfg->fc_flags |= RTF_REJECT;
1901
1902 cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
1903 cfg->fc_nlinfo.nlh = nlh;
1904
1905 if (tb[RTA_GATEWAY]) {
1906 nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
1907 cfg->fc_flags |= RTF_GATEWAY;
1da177e4 1908 }
86872cb5
TG
1909
1910 if (tb[RTA_DST]) {
1911 int plen = (rtm->rtm_dst_len + 7) >> 3;
1912
1913 if (nla_len(tb[RTA_DST]) < plen)
1914 goto errout;
1915
1916 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
1da177e4 1917 }
86872cb5
TG
1918
1919 if (tb[RTA_SRC]) {
1920 int plen = (rtm->rtm_src_len + 7) >> 3;
1921
1922 if (nla_len(tb[RTA_SRC]) < plen)
1923 goto errout;
1924
1925 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
1da177e4 1926 }
86872cb5
TG
1927
1928 if (tb[RTA_OIF])
1929 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
1930
1931 if (tb[RTA_PRIORITY])
1932 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
1933
1934 if (tb[RTA_METRICS]) {
1935 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
1936 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
1da177e4 1937 }
86872cb5
TG
1938
1939 if (tb[RTA_TABLE])
1940 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
1941
1942 err = 0;
1943errout:
1944 return err;
1da177e4
LT
1945}
1946
1947int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1948{
86872cb5
TG
1949 struct fib6_config cfg;
1950 int err;
1da177e4 1951
86872cb5
TG
1952 err = rtm_to_fib6_config(skb, nlh, &cfg);
1953 if (err < 0)
1954 return err;
1955
1956 return ip6_route_del(&cfg);
1da177e4
LT
1957}
1958
1959int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1960{
86872cb5
TG
1961 struct fib6_config cfg;
1962 int err;
1da177e4 1963
86872cb5
TG
1964 err = rtm_to_fib6_config(skb, nlh, &cfg);
1965 if (err < 0)
1966 return err;
1967
1968 return ip6_route_add(&cfg);
1da177e4
LT
1969}
1970
1da177e4 1971static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt,
0d51aa80
JHS
1972 struct in6_addr *dst, struct in6_addr *src,
1973 int iif, int type, u32 pid, u32 seq,
1974 int prefix, unsigned int flags)
1da177e4
LT
1975{
1976 struct rtmsg *rtm;
2d7202bf 1977 struct nlmsghdr *nlh;
1da177e4 1978 struct rta_cacheinfo ci;
9e762a4a 1979 u32 table;
1da177e4
LT
1980
1981 if (prefix) { /* user wants prefix routes only */
1982 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
1983 /* success since this is not a prefix route */
1984 return 1;
1985 }
1986 }
1987
2d7202bf
TG
1988 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*rtm), flags);
1989 if (nlh == NULL)
1990 return -ENOBUFS;
1991
1992 rtm = nlmsg_data(nlh);
1da177e4
LT
1993 rtm->rtm_family = AF_INET6;
1994 rtm->rtm_dst_len = rt->rt6i_dst.plen;
1995 rtm->rtm_src_len = rt->rt6i_src.plen;
1996 rtm->rtm_tos = 0;
c71099ac 1997 if (rt->rt6i_table)
9e762a4a 1998 table = rt->rt6i_table->tb6_id;
c71099ac 1999 else
9e762a4a
PM
2000 table = RT6_TABLE_UNSPEC;
2001 rtm->rtm_table = table;
2d7202bf 2002 NLA_PUT_U32(skb, RTA_TABLE, table);
1da177e4
LT
2003 if (rt->rt6i_flags&RTF_REJECT)
2004 rtm->rtm_type = RTN_UNREACHABLE;
2005 else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK))
2006 rtm->rtm_type = RTN_LOCAL;
2007 else
2008 rtm->rtm_type = RTN_UNICAST;
2009 rtm->rtm_flags = 0;
2010 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2011 rtm->rtm_protocol = rt->rt6i_protocol;
2012 if (rt->rt6i_flags&RTF_DYNAMIC)
2013 rtm->rtm_protocol = RTPROT_REDIRECT;
2014 else if (rt->rt6i_flags & RTF_ADDRCONF)
2015 rtm->rtm_protocol = RTPROT_KERNEL;
2016 else if (rt->rt6i_flags&RTF_DEFAULT)
2017 rtm->rtm_protocol = RTPROT_RA;
2018
2019 if (rt->rt6i_flags&RTF_CACHE)
2020 rtm->rtm_flags |= RTM_F_CLONED;
2021
2022 if (dst) {
2d7202bf 2023 NLA_PUT(skb, RTA_DST, 16, dst);
1da177e4
LT
2024 rtm->rtm_dst_len = 128;
2025 } else if (rtm->rtm_dst_len)
2d7202bf 2026 NLA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr);
1da177e4
LT
2027#ifdef CONFIG_IPV6_SUBTREES
2028 if (src) {
2d7202bf 2029 NLA_PUT(skb, RTA_SRC, 16, src);
1da177e4
LT
2030 rtm->rtm_src_len = 128;
2031 } else if (rtm->rtm_src_len)
2d7202bf 2032 NLA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr);
1da177e4
LT
2033#endif
2034 if (iif)
2d7202bf 2035 NLA_PUT_U32(skb, RTA_IIF, iif);
1da177e4
LT
2036 else if (dst) {
2037 struct in6_addr saddr_buf;
2038 if (ipv6_get_saddr(&rt->u.dst, dst, &saddr_buf) == 0)
2d7202bf 2039 NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
1da177e4 2040 }
2d7202bf 2041
1da177e4 2042 if (rtnetlink_put_metrics(skb, rt->u.dst.metrics) < 0)
2d7202bf
TG
2043 goto nla_put_failure;
2044
1da177e4 2045 if (rt->u.dst.neighbour)
2d7202bf
TG
2046 NLA_PUT(skb, RTA_GATEWAY, 16, &rt->u.dst.neighbour->primary_key);
2047
1da177e4 2048 if (rt->u.dst.dev)
2d7202bf
TG
2049 NLA_PUT_U32(skb, RTA_OIF, rt->rt6i_dev->ifindex);
2050
2051 NLA_PUT_U32(skb, RTA_PRIORITY, rt->rt6i_metric);
1da177e4
LT
2052 ci.rta_lastuse = jiffies_to_clock_t(jiffies - rt->u.dst.lastuse);
2053 if (rt->rt6i_expires)
2054 ci.rta_expires = jiffies_to_clock_t(rt->rt6i_expires - jiffies);
2055 else
2056 ci.rta_expires = 0;
2057 ci.rta_used = rt->u.dst.__use;
2058 ci.rta_clntref = atomic_read(&rt->u.dst.__refcnt);
2059 ci.rta_error = rt->u.dst.error;
2060 ci.rta_id = 0;
2061 ci.rta_ts = 0;
2062 ci.rta_tsage = 0;
2d7202bf
TG
2063 NLA_PUT(skb, RTA_CACHEINFO, sizeof(ci), &ci);
2064
2065 return nlmsg_end(skb, nlh);
2066
2067nla_put_failure:
2068 return nlmsg_cancel(skb, nlh);
1da177e4
LT
2069}
2070
1b43af54 2071int rt6_dump_route(struct rt6_info *rt, void *p_arg)
1da177e4
LT
2072{
2073 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
2074 int prefix;
2075
2d7202bf
TG
2076 if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
2077 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
1da177e4
LT
2078 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
2079 } else
2080 prefix = 0;
2081
2082 return rt6_fill_node(arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
2083 NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
0d51aa80 2084 prefix, NLM_F_MULTI);
1da177e4
LT
2085}
2086
1da177e4
LT
2087int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
2088{
ab364a6f
TG
2089 struct nlattr *tb[RTA_MAX+1];
2090 struct rt6_info *rt;
1da177e4 2091 struct sk_buff *skb;
ab364a6f 2092 struct rtmsg *rtm;
1da177e4 2093 struct flowi fl;
ab364a6f 2094 int err, iif = 0;
1da177e4 2095
ab364a6f
TG
2096 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2097 if (err < 0)
2098 goto errout;
1da177e4 2099
ab364a6f 2100 err = -EINVAL;
1da177e4 2101 memset(&fl, 0, sizeof(fl));
1da177e4 2102
ab364a6f
TG
2103 if (tb[RTA_SRC]) {
2104 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2105 goto errout;
2106
2107 ipv6_addr_copy(&fl.fl6_src, nla_data(tb[RTA_SRC]));
2108 }
2109
2110 if (tb[RTA_DST]) {
2111 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2112 goto errout;
2113
2114 ipv6_addr_copy(&fl.fl6_dst, nla_data(tb[RTA_DST]));
2115 }
2116
2117 if (tb[RTA_IIF])
2118 iif = nla_get_u32(tb[RTA_IIF]);
2119
2120 if (tb[RTA_OIF])
2121 fl.oif = nla_get_u32(tb[RTA_OIF]);
1da177e4
LT
2122
2123 if (iif) {
2124 struct net_device *dev;
2125 dev = __dev_get_by_index(iif);
2126 if (!dev) {
2127 err = -ENODEV;
ab364a6f 2128 goto errout;
1da177e4
LT
2129 }
2130 }
2131
ab364a6f
TG
2132 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
2133 if (skb == NULL) {
2134 err = -ENOBUFS;
2135 goto errout;
2136 }
1da177e4 2137
ab364a6f
TG
2138 /* Reserve room for dummy headers, this skb can pass
2139 through good chunk of routing engine.
2140 */
2141 skb->mac.raw = skb->data;
2142 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
1da177e4 2143
ab364a6f 2144 rt = (struct rt6_info*) ip6_route_output(NULL, &fl);
1da177e4
LT
2145 skb->dst = &rt->u.dst;
2146
ab364a6f 2147 err = rt6_fill_node(skb, rt, &fl.fl6_dst, &fl.fl6_src, iif,
1da177e4 2148 RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
0d51aa80 2149 nlh->nlmsg_seq, 0, 0);
1da177e4 2150 if (err < 0) {
ab364a6f
TG
2151 kfree_skb(skb);
2152 goto errout;
1da177e4
LT
2153 }
2154
2942e900 2155 err = rtnl_unicast(skb, NETLINK_CB(in_skb).pid);
ab364a6f 2156errout:
1da177e4 2157 return err;
1da177e4
LT
2158}
2159
86872cb5 2160void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
1da177e4
LT
2161{
2162 struct sk_buff *skb;
86872cb5
TG
2163 u32 pid = 0, seq = 0;
2164 struct nlmsghdr *nlh = NULL;
21713ebc
TG
2165 int payload = sizeof(struct rtmsg) + 256;
2166 int err = -ENOBUFS;
2167
86872cb5
TG
2168 if (info) {
2169 pid = info->pid;
2170 nlh = info->nlh;
2171 if (nlh)
2172 seq = nlh->nlmsg_seq;
2173 }
2174
21713ebc
TG
2175 skb = nlmsg_new(nlmsg_total_size(payload), gfp_any());
2176 if (skb == NULL)
2177 goto errout;
2178
2179 err = rt6_fill_node(skb, rt, NULL, NULL, 0, event, pid, seq, 0, 0);
2180 if (err < 0) {
1da177e4 2181 kfree_skb(skb);
21713ebc 2182 goto errout;
1da177e4 2183 }
21713ebc
TG
2184
2185 err = rtnl_notify(skb, pid, RTNLGRP_IPV6_ROUTE, nlh, gfp_any());
2186errout:
2187 if (err < 0)
2188 rtnl_set_sk_err(RTNLGRP_IPV6_ROUTE, err);
1da177e4
LT
2189}
2190
2191/*
2192 * /proc
2193 */
2194
2195#ifdef CONFIG_PROC_FS
2196
2197#define RT6_INFO_LEN (32 + 4 + 32 + 4 + 32 + 40 + 5 + 1)
2198
2199struct rt6_proc_arg
2200{
2201 char *buffer;
2202 int offset;
2203 int length;
2204 int skip;
2205 int len;
2206};
2207
2208static int rt6_info_route(struct rt6_info *rt, void *p_arg)
2209{
2210 struct rt6_proc_arg *arg = (struct rt6_proc_arg *) p_arg;
2211 int i;
2212
2213 if (arg->skip < arg->offset / RT6_INFO_LEN) {
2214 arg->skip++;
2215 return 0;
2216 }
2217
2218 if (arg->len >= arg->length)
2219 return 0;
2220
2221 for (i=0; i<16; i++) {
2222 sprintf(arg->buffer + arg->len, "%02x",
2223 rt->rt6i_dst.addr.s6_addr[i]);
2224 arg->len += 2;
2225 }
2226 arg->len += sprintf(arg->buffer + arg->len, " %02x ",
2227 rt->rt6i_dst.plen);
2228
2229#ifdef CONFIG_IPV6_SUBTREES
2230 for (i=0; i<16; i++) {
2231 sprintf(arg->buffer + arg->len, "%02x",
2232 rt->rt6i_src.addr.s6_addr[i]);
2233 arg->len += 2;
2234 }
2235 arg->len += sprintf(arg->buffer + arg->len, " %02x ",
2236 rt->rt6i_src.plen);
2237#else
2238 sprintf(arg->buffer + arg->len,
2239 "00000000000000000000000000000000 00 ");
2240 arg->len += 36;
2241#endif
2242
2243 if (rt->rt6i_nexthop) {
2244 for (i=0; i<16; i++) {
2245 sprintf(arg->buffer + arg->len, "%02x",
2246 rt->rt6i_nexthop->primary_key[i]);
2247 arg->len += 2;
2248 }
2249 } else {
2250 sprintf(arg->buffer + arg->len,
2251 "00000000000000000000000000000000");
2252 arg->len += 32;
2253 }
2254 arg->len += sprintf(arg->buffer + arg->len,
2255 " %08x %08x %08x %08x %8s\n",
2256 rt->rt6i_metric, atomic_read(&rt->u.dst.__refcnt),
2257 rt->u.dst.__use, rt->rt6i_flags,
2258 rt->rt6i_dev ? rt->rt6i_dev->name : "");
2259 return 0;
2260}
2261
2262static int rt6_proc_info(char *buffer, char **start, off_t offset, int length)
2263{
c71099ac
TG
2264 struct rt6_proc_arg arg = {
2265 .buffer = buffer,
2266 .offset = offset,
2267 .length = length,
2268 };
1da177e4 2269
c71099ac 2270 fib6_clean_all(rt6_info_route, 0, &arg);
1da177e4
LT
2271
2272 *start = buffer;
2273 if (offset)
2274 *start += offset % RT6_INFO_LEN;
2275
2276 arg.len -= offset % RT6_INFO_LEN;
2277
2278 if (arg.len > length)
2279 arg.len = length;
2280 if (arg.len < 0)
2281 arg.len = 0;
2282
2283 return arg.len;
2284}
2285
1da177e4
LT
2286static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2287{
2288 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
2289 rt6_stats.fib_nodes, rt6_stats.fib_route_nodes,
2290 rt6_stats.fib_rt_alloc, rt6_stats.fib_rt_entries,
2291 rt6_stats.fib_rt_cache,
2292 atomic_read(&ip6_dst_ops.entries),
2293 rt6_stats.fib_discarded_routes);
2294
2295 return 0;
2296}
2297
2298static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2299{
2300 return single_open(file, rt6_stats_seq_show, NULL);
2301}
2302
2303static struct file_operations rt6_stats_seq_fops = {
2304 .owner = THIS_MODULE,
2305 .open = rt6_stats_seq_open,
2306 .read = seq_read,
2307 .llseek = seq_lseek,
2308 .release = single_release,
2309};
2310#endif /* CONFIG_PROC_FS */
2311
2312#ifdef CONFIG_SYSCTL
2313
2314static int flush_delay;
2315
2316static
2317int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write, struct file * filp,
2318 void __user *buffer, size_t *lenp, loff_t *ppos)
2319{
2320 if (write) {
2321 proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
2322 fib6_run_gc(flush_delay <= 0 ? ~0UL : (unsigned long)flush_delay);
2323 return 0;
2324 } else
2325 return -EINVAL;
2326}
2327
2328ctl_table ipv6_route_table[] = {
2329 {
2330 .ctl_name = NET_IPV6_ROUTE_FLUSH,
2331 .procname = "flush",
2332 .data = &flush_delay,
2333 .maxlen = sizeof(int),
89c8b3a1 2334 .mode = 0200,
1da177e4
LT
2335 .proc_handler = &ipv6_sysctl_rtcache_flush
2336 },
2337 {
2338 .ctl_name = NET_IPV6_ROUTE_GC_THRESH,
2339 .procname = "gc_thresh",
2340 .data = &ip6_dst_ops.gc_thresh,
2341 .maxlen = sizeof(int),
2342 .mode = 0644,
2343 .proc_handler = &proc_dointvec,
2344 },
2345 {
2346 .ctl_name = NET_IPV6_ROUTE_MAX_SIZE,
2347 .procname = "max_size",
2348 .data = &ip6_rt_max_size,
2349 .maxlen = sizeof(int),
2350 .mode = 0644,
2351 .proc_handler = &proc_dointvec,
2352 },
2353 {
2354 .ctl_name = NET_IPV6_ROUTE_GC_MIN_INTERVAL,
2355 .procname = "gc_min_interval",
2356 .data = &ip6_rt_gc_min_interval,
2357 .maxlen = sizeof(int),
2358 .mode = 0644,
2359 .proc_handler = &proc_dointvec_jiffies,
2360 .strategy = &sysctl_jiffies,
2361 },
2362 {
2363 .ctl_name = NET_IPV6_ROUTE_GC_TIMEOUT,
2364 .procname = "gc_timeout",
2365 .data = &ip6_rt_gc_timeout,
2366 .maxlen = sizeof(int),
2367 .mode = 0644,
2368 .proc_handler = &proc_dointvec_jiffies,
2369 .strategy = &sysctl_jiffies,
2370 },
2371 {
2372 .ctl_name = NET_IPV6_ROUTE_GC_INTERVAL,
2373 .procname = "gc_interval",
2374 .data = &ip6_rt_gc_interval,
2375 .maxlen = sizeof(int),
2376 .mode = 0644,
2377 .proc_handler = &proc_dointvec_jiffies,
2378 .strategy = &sysctl_jiffies,
2379 },
2380 {
2381 .ctl_name = NET_IPV6_ROUTE_GC_ELASTICITY,
2382 .procname = "gc_elasticity",
2383 .data = &ip6_rt_gc_elasticity,
2384 .maxlen = sizeof(int),
2385 .mode = 0644,
2386 .proc_handler = &proc_dointvec_jiffies,
2387 .strategy = &sysctl_jiffies,
2388 },
2389 {
2390 .ctl_name = NET_IPV6_ROUTE_MTU_EXPIRES,
2391 .procname = "mtu_expires",
2392 .data = &ip6_rt_mtu_expires,
2393 .maxlen = sizeof(int),
2394 .mode = 0644,
2395 .proc_handler = &proc_dointvec_jiffies,
2396 .strategy = &sysctl_jiffies,
2397 },
2398 {
2399 .ctl_name = NET_IPV6_ROUTE_MIN_ADVMSS,
2400 .procname = "min_adv_mss",
2401 .data = &ip6_rt_min_advmss,
2402 .maxlen = sizeof(int),
2403 .mode = 0644,
2404 .proc_handler = &proc_dointvec_jiffies,
2405 .strategy = &sysctl_jiffies,
2406 },
2407 {
2408 .ctl_name = NET_IPV6_ROUTE_GC_MIN_INTERVAL_MS,
2409 .procname = "gc_min_interval_ms",
2410 .data = &ip6_rt_gc_min_interval,
2411 .maxlen = sizeof(int),
2412 .mode = 0644,
2413 .proc_handler = &proc_dointvec_ms_jiffies,
2414 .strategy = &sysctl_ms_jiffies,
2415 },
2416 { .ctl_name = 0 }
2417};
2418
2419#endif
2420
2421void __init ip6_route_init(void)
2422{
2423 struct proc_dir_entry *p;
2424
2425 ip6_dst_ops.kmem_cachep = kmem_cache_create("ip6_dst_cache",
2426 sizeof(struct rt6_info),
2427 0, SLAB_HWCACHE_ALIGN,
2428 NULL, NULL);
2429 if (!ip6_dst_ops.kmem_cachep)
2430 panic("cannot create ip6_dst_cache");
2431
2432 fib6_init();
2433#ifdef CONFIG_PROC_FS
2434 p = proc_net_create("ipv6_route", 0, rt6_proc_info);
2435 if (p)
2436 p->owner = THIS_MODULE;
2437
2438 proc_net_fops_create("rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2439#endif
2440#ifdef CONFIG_XFRM
2441 xfrm6_init();
2442#endif
101367c2
TG
2443#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2444 fib6_rules_init();
2445#endif
1da177e4
LT
2446}
2447
2448void ip6_route_cleanup(void)
2449{
101367c2
TG
2450#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2451 fib6_rules_cleanup();
2452#endif
1da177e4
LT
2453#ifdef CONFIG_PROC_FS
2454 proc_net_remove("ipv6_route");
2455 proc_net_remove("rt6_stats");
2456#endif
2457#ifdef CONFIG_XFRM
2458 xfrm6_fini();
2459#endif
2460 rt6_ifdown(NULL);
2461 fib6_gc_cleanup();
2462 kmem_cache_destroy(ip6_dst_ops.kmem_cachep);
2463}