]> bbs.cooldavid.org Git - net-next-2.6.git/blame - net/ipv6/route.c
[IPV6] ROUTE: Purge clones on other trees when deleting a route.
[net-next-2.6.git] / net / ipv6 / route.c
CommitLineData
1da177e4
LT
1/*
2 * Linux INET6 implementation
3 * FIB front-end.
4 *
5 * Authors:
6 * Pedro Roque <roque@di.fc.ul.pt>
7 *
8 * $Id: route.c,v 1.56 2001/10/31 21:55:55 davem Exp $
9 *
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License
12 * as published by the Free Software Foundation; either version
13 * 2 of the License, or (at your option) any later version.
14 */
15
16/* Changes:
17 *
18 * YOSHIFUJI Hideaki @USAGI
19 * reworked default router selection.
20 * - respect outgoing interface
21 * - select from (probably) reachable routers (i.e.
22 * routers in REACHABLE, STALE, DELAY or PROBE states).
23 * - always select the same router if it is (probably)
24 * reachable. otherwise, round-robin the list.
25 */
26
4fc268d2 27#include <linux/capability.h>
1da177e4
LT
28#include <linux/errno.h>
29#include <linux/types.h>
30#include <linux/times.h>
31#include <linux/socket.h>
32#include <linux/sockios.h>
33#include <linux/net.h>
34#include <linux/route.h>
35#include <linux/netdevice.h>
36#include <linux/in6.h>
37#include <linux/init.h>
1da177e4
LT
38#include <linux/if_arp.h>
39
40#ifdef CONFIG_PROC_FS
41#include <linux/proc_fs.h>
42#include <linux/seq_file.h>
43#endif
44
45#include <net/snmp.h>
46#include <net/ipv6.h>
47#include <net/ip6_fib.h>
48#include <net/ip6_route.h>
49#include <net/ndisc.h>
50#include <net/addrconf.h>
51#include <net/tcp.h>
52#include <linux/rtnetlink.h>
53#include <net/dst.h>
54#include <net/xfrm.h>
8d71740c 55#include <net/netevent.h>
21713ebc 56#include <net/netlink.h>
1da177e4
LT
57
58#include <asm/uaccess.h>
59
60#ifdef CONFIG_SYSCTL
61#include <linux/sysctl.h>
62#endif
63
64/* Set to 3 to get tracing. */
65#define RT6_DEBUG 2
66
67#if RT6_DEBUG >= 3
68#define RDBG(x) printk x
69#define RT6_TRACE(x...) printk(KERN_DEBUG x)
70#else
71#define RDBG(x)
72#define RT6_TRACE(x...) do { ; } while (0)
73#endif
74
519fbd87 75#define CLONE_OFFLINK_ROUTE 0
1da177e4 76
554cfb7e
YH
77#define RT6_SELECT_F_IFACE 0x1
78#define RT6_SELECT_F_REACHABLE 0x2
79
1da177e4
LT
80static int ip6_rt_max_size = 4096;
81static int ip6_rt_gc_min_interval = HZ / 2;
82static int ip6_rt_gc_timeout = 60*HZ;
83int ip6_rt_gc_interval = 30*HZ;
84static int ip6_rt_gc_elasticity = 9;
85static int ip6_rt_mtu_expires = 10*60*HZ;
86static int ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
87
88static struct rt6_info * ip6_rt_copy(struct rt6_info *ort);
89static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
90static struct dst_entry *ip6_negative_advice(struct dst_entry *);
91static void ip6_dst_destroy(struct dst_entry *);
92static void ip6_dst_ifdown(struct dst_entry *,
93 struct net_device *dev, int how);
94static int ip6_dst_gc(void);
95
96static int ip6_pkt_discard(struct sk_buff *skb);
97static int ip6_pkt_discard_out(struct sk_buff *skb);
98static void ip6_link_failure(struct sk_buff *skb);
99static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
100
70ceb4f5
YH
101#ifdef CONFIG_IPV6_ROUTE_INFO
102static struct rt6_info *rt6_add_route_info(struct in6_addr *prefix, int prefixlen,
103 struct in6_addr *gwaddr, int ifindex,
104 unsigned pref);
105static struct rt6_info *rt6_get_route_info(struct in6_addr *prefix, int prefixlen,
106 struct in6_addr *gwaddr, int ifindex);
107#endif
108
1da177e4
LT
109static struct dst_ops ip6_dst_ops = {
110 .family = AF_INET6,
111 .protocol = __constant_htons(ETH_P_IPV6),
112 .gc = ip6_dst_gc,
113 .gc_thresh = 1024,
114 .check = ip6_dst_check,
115 .destroy = ip6_dst_destroy,
116 .ifdown = ip6_dst_ifdown,
117 .negative_advice = ip6_negative_advice,
118 .link_failure = ip6_link_failure,
119 .update_pmtu = ip6_rt_update_pmtu,
120 .entry_size = sizeof(struct rt6_info),
121};
122
123struct rt6_info ip6_null_entry = {
124 .u = {
125 .dst = {
126 .__refcnt = ATOMIC_INIT(1),
127 .__use = 1,
128 .dev = &loopback_dev,
129 .obsolete = -1,
130 .error = -ENETUNREACH,
131 .metrics = { [RTAX_HOPLIMIT - 1] = 255, },
132 .input = ip6_pkt_discard,
133 .output = ip6_pkt_discard_out,
134 .ops = &ip6_dst_ops,
135 .path = (struct dst_entry*)&ip6_null_entry,
136 }
137 },
138 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
139 .rt6i_metric = ~(u32) 0,
140 .rt6i_ref = ATOMIC_INIT(1),
141};
142
101367c2
TG
143#ifdef CONFIG_IPV6_MULTIPLE_TABLES
144
145struct rt6_info ip6_prohibit_entry = {
146 .u = {
147 .dst = {
148 .__refcnt = ATOMIC_INIT(1),
149 .__use = 1,
150 .dev = &loopback_dev,
151 .obsolete = -1,
152 .error = -EACCES,
153 .metrics = { [RTAX_HOPLIMIT - 1] = 255, },
154 .input = ip6_pkt_discard,
155 .output = ip6_pkt_discard_out,
156 .ops = &ip6_dst_ops,
157 .path = (struct dst_entry*)&ip6_prohibit_entry,
158 }
159 },
160 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
161 .rt6i_metric = ~(u32) 0,
162 .rt6i_ref = ATOMIC_INIT(1),
163};
164
165struct rt6_info ip6_blk_hole_entry = {
166 .u = {
167 .dst = {
168 .__refcnt = ATOMIC_INIT(1),
169 .__use = 1,
170 .dev = &loopback_dev,
171 .obsolete = -1,
172 .error = -EINVAL,
173 .metrics = { [RTAX_HOPLIMIT - 1] = 255, },
174 .input = ip6_pkt_discard,
175 .output = ip6_pkt_discard_out,
176 .ops = &ip6_dst_ops,
177 .path = (struct dst_entry*)&ip6_blk_hole_entry,
178 }
179 },
180 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
181 .rt6i_metric = ~(u32) 0,
182 .rt6i_ref = ATOMIC_INIT(1),
183};
184
185#endif
186
1da177e4
LT
187/* allocate dst with ip6_dst_ops */
188static __inline__ struct rt6_info *ip6_dst_alloc(void)
189{
190 return (struct rt6_info *)dst_alloc(&ip6_dst_ops);
191}
192
193static void ip6_dst_destroy(struct dst_entry *dst)
194{
195 struct rt6_info *rt = (struct rt6_info *)dst;
196 struct inet6_dev *idev = rt->rt6i_idev;
197
198 if (idev != NULL) {
199 rt->rt6i_idev = NULL;
200 in6_dev_put(idev);
201 }
202}
203
204static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
205 int how)
206{
207 struct rt6_info *rt = (struct rt6_info *)dst;
208 struct inet6_dev *idev = rt->rt6i_idev;
209
210 if (dev != &loopback_dev && idev != NULL && idev->dev == dev) {
211 struct inet6_dev *loopback_idev = in6_dev_get(&loopback_dev);
212 if (loopback_idev != NULL) {
213 rt->rt6i_idev = loopback_idev;
214 in6_dev_put(idev);
215 }
216 }
217}
218
219static __inline__ int rt6_check_expired(const struct rt6_info *rt)
220{
221 return (rt->rt6i_flags & RTF_EXPIRES &&
222 time_after(jiffies, rt->rt6i_expires));
223}
224
c71099ac
TG
225static inline int rt6_need_strict(struct in6_addr *daddr)
226{
227 return (ipv6_addr_type(daddr) &
228 (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL));
229}
230
1da177e4 231/*
c71099ac 232 * Route lookup. Any table->tb6_lock is implied.
1da177e4
LT
233 */
234
235static __inline__ struct rt6_info *rt6_device_match(struct rt6_info *rt,
236 int oif,
237 int strict)
238{
239 struct rt6_info *local = NULL;
240 struct rt6_info *sprt;
241
242 if (oif) {
243 for (sprt = rt; sprt; sprt = sprt->u.next) {
244 struct net_device *dev = sprt->rt6i_dev;
245 if (dev->ifindex == oif)
246 return sprt;
247 if (dev->flags & IFF_LOOPBACK) {
248 if (sprt->rt6i_idev == NULL ||
249 sprt->rt6i_idev->dev->ifindex != oif) {
250 if (strict && oif)
251 continue;
252 if (local && (!oif ||
253 local->rt6i_idev->dev->ifindex == oif))
254 continue;
255 }
256 local = sprt;
257 }
258 }
259
260 if (local)
261 return local;
262
263 if (strict)
264 return &ip6_null_entry;
265 }
266 return rt;
267}
268
27097255
YH
269#ifdef CONFIG_IPV6_ROUTER_PREF
270static void rt6_probe(struct rt6_info *rt)
271{
272 struct neighbour *neigh = rt ? rt->rt6i_nexthop : NULL;
273 /*
274 * Okay, this does not seem to be appropriate
275 * for now, however, we need to check if it
276 * is really so; aka Router Reachability Probing.
277 *
278 * Router Reachability Probe MUST be rate-limited
279 * to no more than one per minute.
280 */
281 if (!neigh || (neigh->nud_state & NUD_VALID))
282 return;
283 read_lock_bh(&neigh->lock);
284 if (!(neigh->nud_state & NUD_VALID) &&
52e16356 285 time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
27097255
YH
286 struct in6_addr mcaddr;
287 struct in6_addr *target;
288
289 neigh->updated = jiffies;
290 read_unlock_bh(&neigh->lock);
291
292 target = (struct in6_addr *)&neigh->primary_key;
293 addrconf_addr_solict_mult(target, &mcaddr);
294 ndisc_send_ns(rt->rt6i_dev, NULL, target, &mcaddr, NULL);
295 } else
296 read_unlock_bh(&neigh->lock);
297}
298#else
299static inline void rt6_probe(struct rt6_info *rt)
300{
301 return;
302}
303#endif
304
1da177e4 305/*
554cfb7e 306 * Default Router Selection (RFC 2461 6.3.6)
1da177e4 307 */
554cfb7e
YH
308static int inline rt6_check_dev(struct rt6_info *rt, int oif)
309{
310 struct net_device *dev = rt->rt6i_dev;
311 if (!oif || dev->ifindex == oif)
312 return 2;
313 if ((dev->flags & IFF_LOOPBACK) &&
314 rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
315 return 1;
316 return 0;
317}
1da177e4 318
554cfb7e 319static int inline rt6_check_neigh(struct rt6_info *rt)
1da177e4 320{
554cfb7e
YH
321 struct neighbour *neigh = rt->rt6i_nexthop;
322 int m = 0;
4d0c5911
YH
323 if (rt->rt6i_flags & RTF_NONEXTHOP ||
324 !(rt->rt6i_flags & RTF_GATEWAY))
325 m = 1;
326 else if (neigh) {
554cfb7e
YH
327 read_lock_bh(&neigh->lock);
328 if (neigh->nud_state & NUD_VALID)
4d0c5911 329 m = 2;
554cfb7e 330 read_unlock_bh(&neigh->lock);
1da177e4 331 }
554cfb7e 332 return m;
1da177e4
LT
333}
334
554cfb7e
YH
335static int rt6_score_route(struct rt6_info *rt, int oif,
336 int strict)
1da177e4 337{
4d0c5911
YH
338 int m, n;
339
340 m = rt6_check_dev(rt, oif);
554cfb7e
YH
341 if (!m && (strict & RT6_SELECT_F_IFACE))
342 return -1;
ebacaaa0
YH
343#ifdef CONFIG_IPV6_ROUTER_PREF
344 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
345#endif
4d0c5911
YH
346 n = rt6_check_neigh(rt);
347 if (n > 1)
ebacaaa0 348 m |= 16;
4d0c5911 349 else if (!n && strict & RT6_SELECT_F_REACHABLE)
554cfb7e
YH
350 return -1;
351 return m;
352}
353
354static struct rt6_info *rt6_select(struct rt6_info **head, int oif,
355 int strict)
356{
357 struct rt6_info *match = NULL, *last = NULL;
358 struct rt6_info *rt, *rt0 = *head;
359 u32 metric;
360 int mpri = -1;
1da177e4 361
554cfb7e
YH
362 RT6_TRACE("%s(head=%p(*head=%p), oif=%d)\n",
363 __FUNCTION__, head, head ? *head : NULL, oif);
1da177e4 364
554cfb7e 365 for (rt = rt0, metric = rt0->rt6i_metric;
c302e6d5 366 rt && rt->rt6i_metric == metric && (!last || rt != rt0);
554cfb7e
YH
367 rt = rt->u.next) {
368 int m;
1da177e4 369
554cfb7e 370 if (rt6_check_expired(rt))
1da177e4
LT
371 continue;
372
554cfb7e
YH
373 last = rt;
374
375 m = rt6_score_route(rt, oif, strict);
376 if (m < 0)
1da177e4 377 continue;
1da177e4 378
554cfb7e 379 if (m > mpri) {
27097255 380 rt6_probe(match);
554cfb7e 381 match = rt;
1da177e4 382 mpri = m;
27097255
YH
383 } else {
384 rt6_probe(rt);
1da177e4
LT
385 }
386 }
387
554cfb7e
YH
388 if (!match &&
389 (strict & RT6_SELECT_F_REACHABLE) &&
390 last && last != rt0) {
391 /* no entries matched; do round-robin */
34af946a 392 static DEFINE_SPINLOCK(lock);
c302e6d5 393 spin_lock(&lock);
554cfb7e
YH
394 *head = rt0->u.next;
395 rt0->u.next = last->u.next;
396 last->u.next = rt0;
c302e6d5 397 spin_unlock(&lock);
1da177e4 398 }
1da177e4 399
554cfb7e
YH
400 RT6_TRACE("%s() => %p, score=%d\n",
401 __FUNCTION__, match, mpri);
1da177e4 402
554cfb7e 403 return (match ? match : &ip6_null_entry);
1da177e4
LT
404}
405
70ceb4f5
YH
406#ifdef CONFIG_IPV6_ROUTE_INFO
407int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
408 struct in6_addr *gwaddr)
409{
410 struct route_info *rinfo = (struct route_info *) opt;
411 struct in6_addr prefix_buf, *prefix;
412 unsigned int pref;
413 u32 lifetime;
414 struct rt6_info *rt;
415
416 if (len < sizeof(struct route_info)) {
417 return -EINVAL;
418 }
419
420 /* Sanity check for prefix_len and length */
421 if (rinfo->length > 3) {
422 return -EINVAL;
423 } else if (rinfo->prefix_len > 128) {
424 return -EINVAL;
425 } else if (rinfo->prefix_len > 64) {
426 if (rinfo->length < 2) {
427 return -EINVAL;
428 }
429 } else if (rinfo->prefix_len > 0) {
430 if (rinfo->length < 1) {
431 return -EINVAL;
432 }
433 }
434
435 pref = rinfo->route_pref;
436 if (pref == ICMPV6_ROUTER_PREF_INVALID)
437 pref = ICMPV6_ROUTER_PREF_MEDIUM;
438
439 lifetime = htonl(rinfo->lifetime);
440 if (lifetime == 0xffffffff) {
441 /* infinity */
442 } else if (lifetime > 0x7fffffff/HZ) {
443 /* Avoid arithmetic overflow */
444 lifetime = 0x7fffffff/HZ - 1;
445 }
446
447 if (rinfo->length == 3)
448 prefix = (struct in6_addr *)rinfo->prefix;
449 else {
450 /* this function is safe */
451 ipv6_addr_prefix(&prefix_buf,
452 (struct in6_addr *)rinfo->prefix,
453 rinfo->prefix_len);
454 prefix = &prefix_buf;
455 }
456
457 rt = rt6_get_route_info(prefix, rinfo->prefix_len, gwaddr, dev->ifindex);
458
459 if (rt && !lifetime) {
e0a1ad73 460 ip6_del_rt(rt);
70ceb4f5
YH
461 rt = NULL;
462 }
463
464 if (!rt && lifetime)
465 rt = rt6_add_route_info(prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
466 pref);
467 else if (rt)
468 rt->rt6i_flags = RTF_ROUTEINFO |
469 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
470
471 if (rt) {
472 if (lifetime == 0xffffffff) {
473 rt->rt6i_flags &= ~RTF_EXPIRES;
474 } else {
475 rt->rt6i_expires = jiffies + HZ * lifetime;
476 rt->rt6i_flags |= RTF_EXPIRES;
477 }
478 dst_release(&rt->u.dst);
479 }
480 return 0;
481}
482#endif
483
982f56f3
YH
484#define BACKTRACK(saddr) \
485do { \
486 if (rt == &ip6_null_entry) { \
487 struct fib6_node *pn; \
488 while (fn) { \
489 if (fn->fn_flags & RTN_TL_ROOT) \
490 goto out; \
491 pn = fn->parent; \
492 if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
493 fn = fib6_lookup(pn->subtree, NULL, saddr); \
494 else \
495 fn = pn; \
496 if (fn->fn_flags & RTN_RTINFO) \
497 goto restart; \
c71099ac 498 } \
c71099ac 499 } \
982f56f3 500} while(0)
c71099ac
TG
501
502static struct rt6_info *ip6_pol_route_lookup(struct fib6_table *table,
503 struct flowi *fl, int flags)
1da177e4
LT
504{
505 struct fib6_node *fn;
506 struct rt6_info *rt;
507
c71099ac
TG
508 read_lock_bh(&table->tb6_lock);
509 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
510restart:
511 rt = fn->leaf;
512 rt = rt6_device_match(rt, fl->oif, flags & RT6_F_STRICT);
982f56f3 513 BACKTRACK(&fl->fl6_src);
1da177e4 514 dst_hold(&rt->u.dst);
c71099ac
TG
515out:
516 read_unlock_bh(&table->tb6_lock);
1da177e4
LT
517
518 rt->u.dst.lastuse = jiffies;
c71099ac
TG
519 rt->u.dst.__use++;
520
521 return rt;
522
523}
524
525struct rt6_info *rt6_lookup(struct in6_addr *daddr, struct in6_addr *saddr,
526 int oif, int strict)
527{
528 struct flowi fl = {
529 .oif = oif,
530 .nl_u = {
531 .ip6_u = {
532 .daddr = *daddr,
533 /* TODO: saddr */
534 },
535 },
536 };
537 struct dst_entry *dst;
538 int flags = strict ? RT6_F_STRICT : 0;
539
540 dst = fib6_rule_lookup(&fl, flags, ip6_pol_route_lookup);
541 if (dst->error == 0)
542 return (struct rt6_info *) dst;
543
544 dst_release(dst);
545
1da177e4
LT
546 return NULL;
547}
548
c71099ac 549/* ip6_ins_rt is called with FREE table->tb6_lock.
1da177e4
LT
550 It takes new route entry, the addition fails by any reason the
551 route is freed. In any case, if caller does not hold it, it may
552 be destroyed.
553 */
554
86872cb5 555static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
1da177e4
LT
556{
557 int err;
c71099ac 558 struct fib6_table *table;
1da177e4 559
c71099ac
TG
560 table = rt->rt6i_table;
561 write_lock_bh(&table->tb6_lock);
86872cb5 562 err = fib6_add(&table->tb6_root, rt, info);
c71099ac 563 write_unlock_bh(&table->tb6_lock);
1da177e4
LT
564
565 return err;
566}
567
40e22e8f
TG
568int ip6_ins_rt(struct rt6_info *rt)
569{
86872cb5 570 return __ip6_ins_rt(rt, NULL);
40e22e8f
TG
571}
572
95a9a5ba
YH
573static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, struct in6_addr *daddr,
574 struct in6_addr *saddr)
1da177e4 575{
1da177e4
LT
576 struct rt6_info *rt;
577
578 /*
579 * Clone the route.
580 */
581
582 rt = ip6_rt_copy(ort);
583
584 if (rt) {
58c4fb86
YH
585 if (!(rt->rt6i_flags&RTF_GATEWAY)) {
586 if (rt->rt6i_dst.plen != 128 &&
587 ipv6_addr_equal(&rt->rt6i_dst.addr, daddr))
588 rt->rt6i_flags |= RTF_ANYCAST;
1da177e4 589 ipv6_addr_copy(&rt->rt6i_gateway, daddr);
58c4fb86 590 }
1da177e4 591
58c4fb86 592 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
1da177e4
LT
593 rt->rt6i_dst.plen = 128;
594 rt->rt6i_flags |= RTF_CACHE;
595 rt->u.dst.flags |= DST_HOST;
596
597#ifdef CONFIG_IPV6_SUBTREES
598 if (rt->rt6i_src.plen && saddr) {
599 ipv6_addr_copy(&rt->rt6i_src.addr, saddr);
600 rt->rt6i_src.plen = 128;
601 }
602#endif
603
604 rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
605
95a9a5ba 606 }
1da177e4 607
95a9a5ba
YH
608 return rt;
609}
1da177e4 610
299d9939
YH
611static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort, struct in6_addr *daddr)
612{
613 struct rt6_info *rt = ip6_rt_copy(ort);
614 if (rt) {
615 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
616 rt->rt6i_dst.plen = 128;
617 rt->rt6i_flags |= RTF_CACHE;
618 if (rt->rt6i_flags & RTF_REJECT)
619 rt->u.dst.error = ort->u.dst.error;
620 rt->u.dst.flags |= DST_HOST;
621 rt->rt6i_nexthop = neigh_clone(ort->rt6i_nexthop);
622 }
623 return rt;
624}
625
8ce11e6a
AB
626static struct rt6_info *ip6_pol_route_input(struct fib6_table *table,
627 struct flowi *fl, int flags)
1da177e4
LT
628{
629 struct fib6_node *fn;
519fbd87 630 struct rt6_info *rt, *nrt;
c71099ac 631 int strict = 0;
1da177e4 632 int attempts = 3;
519fbd87 633 int err;
8238dd06 634 int reachable = RT6_SELECT_F_REACHABLE;
1da177e4 635
c71099ac
TG
636 if (flags & RT6_F_STRICT)
637 strict = RT6_SELECT_F_IFACE;
1da177e4
LT
638
639relookup:
c71099ac 640 read_lock_bh(&table->tb6_lock);
1da177e4 641
8238dd06 642restart_2:
c71099ac 643 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
1da177e4
LT
644
645restart:
c71099ac 646 rt = rt6_select(&fn->leaf, fl->iif, strict | reachable);
982f56f3 647 BACKTRACK(&fl->fl6_src);
8238dd06
YH
648 if (rt == &ip6_null_entry ||
649 rt->rt6i_flags & RTF_CACHE)
1ddef044 650 goto out;
1da177e4 651
fb9de91e 652 dst_hold(&rt->u.dst);
c71099ac 653 read_unlock_bh(&table->tb6_lock);
fb9de91e 654
519fbd87 655 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
c71099ac 656 nrt = rt6_alloc_cow(rt, &fl->fl6_dst, &fl->fl6_src);
519fbd87
YH
657 else {
658#if CLONE_OFFLINK_ROUTE
c71099ac 659 nrt = rt6_alloc_clone(rt, &fl->fl6_dst);
519fbd87
YH
660#else
661 goto out2;
662#endif
663 }
e40cf353 664
519fbd87
YH
665 dst_release(&rt->u.dst);
666 rt = nrt ? : &ip6_null_entry;
1da177e4 667
519fbd87
YH
668 dst_hold(&rt->u.dst);
669 if (nrt) {
40e22e8f 670 err = ip6_ins_rt(nrt);
519fbd87 671 if (!err)
1da177e4 672 goto out2;
1da177e4 673 }
1da177e4 674
519fbd87
YH
675 if (--attempts <= 0)
676 goto out2;
677
678 /*
c71099ac 679 * Race condition! In the gap, when table->tb6_lock was
519fbd87
YH
680 * released someone could insert this route. Relookup.
681 */
682 dst_release(&rt->u.dst);
683 goto relookup;
684
685out:
8238dd06
YH
686 if (reachable) {
687 reachable = 0;
688 goto restart_2;
689 }
519fbd87 690 dst_hold(&rt->u.dst);
c71099ac 691 read_unlock_bh(&table->tb6_lock);
1da177e4
LT
692out2:
693 rt->u.dst.lastuse = jiffies;
694 rt->u.dst.__use++;
c71099ac
TG
695
696 return rt;
1da177e4
LT
697}
698
c71099ac
TG
699void ip6_route_input(struct sk_buff *skb)
700{
701 struct ipv6hdr *iph = skb->nh.ipv6h;
702 struct flowi fl = {
703 .iif = skb->dev->ifindex,
704 .nl_u = {
705 .ip6_u = {
706 .daddr = iph->daddr,
707 .saddr = iph->saddr,
708 .flowlabel = (* (u32 *) iph)&IPV6_FLOWINFO_MASK,
709 },
710 },
711 .proto = iph->nexthdr,
712 };
713 int flags = 0;
714
715 if (rt6_need_strict(&iph->daddr))
716 flags |= RT6_F_STRICT;
717
718 skb->dst = fib6_rule_lookup(&fl, flags, ip6_pol_route_input);
719}
720
721static struct rt6_info *ip6_pol_route_output(struct fib6_table *table,
722 struct flowi *fl, int flags)
1da177e4
LT
723{
724 struct fib6_node *fn;
519fbd87 725 struct rt6_info *rt, *nrt;
c71099ac 726 int strict = 0;
1da177e4 727 int attempts = 3;
519fbd87 728 int err;
8238dd06 729 int reachable = RT6_SELECT_F_REACHABLE;
1da177e4 730
c71099ac
TG
731 if (flags & RT6_F_STRICT)
732 strict = RT6_SELECT_F_IFACE;
1da177e4
LT
733
734relookup:
c71099ac 735 read_lock_bh(&table->tb6_lock);
1da177e4 736
8238dd06 737restart_2:
c71099ac 738 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
1da177e4
LT
739
740restart:
8238dd06 741 rt = rt6_select(&fn->leaf, fl->oif, strict | reachable);
982f56f3 742 BACKTRACK(&fl->fl6_src);
8238dd06
YH
743 if (rt == &ip6_null_entry ||
744 rt->rt6i_flags & RTF_CACHE)
1da177e4 745 goto out;
1da177e4 746
fb9de91e 747 dst_hold(&rt->u.dst);
c71099ac 748 read_unlock_bh(&table->tb6_lock);
fb9de91e 749
519fbd87 750 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
e40cf353 751 nrt = rt6_alloc_cow(rt, &fl->fl6_dst, &fl->fl6_src);
519fbd87
YH
752 else {
753#if CLONE_OFFLINK_ROUTE
754 nrt = rt6_alloc_clone(rt, &fl->fl6_dst);
755#else
756 goto out2;
757#endif
758 }
1da177e4 759
519fbd87
YH
760 dst_release(&rt->u.dst);
761 rt = nrt ? : &ip6_null_entry;
1da177e4 762
519fbd87
YH
763 dst_hold(&rt->u.dst);
764 if (nrt) {
40e22e8f 765 err = ip6_ins_rt(nrt);
519fbd87 766 if (!err)
1da177e4 767 goto out2;
1da177e4 768 }
e40cf353 769
519fbd87
YH
770 if (--attempts <= 0)
771 goto out2;
772
773 /*
c71099ac 774 * Race condition! In the gap, when table->tb6_lock was
519fbd87
YH
775 * released someone could insert this route. Relookup.
776 */
777 dst_release(&rt->u.dst);
778 goto relookup;
779
780out:
8238dd06
YH
781 if (reachable) {
782 reachable = 0;
783 goto restart_2;
784 }
519fbd87 785 dst_hold(&rt->u.dst);
c71099ac 786 read_unlock_bh(&table->tb6_lock);
1da177e4
LT
787out2:
788 rt->u.dst.lastuse = jiffies;
789 rt->u.dst.__use++;
c71099ac
TG
790 return rt;
791}
792
793struct dst_entry * ip6_route_output(struct sock *sk, struct flowi *fl)
794{
795 int flags = 0;
796
797 if (rt6_need_strict(&fl->fl6_dst))
798 flags |= RT6_F_STRICT;
799
800 return fib6_rule_lookup(fl, flags, ip6_pol_route_output);
1da177e4
LT
801}
802
803
804/*
805 * Destination cache support functions
806 */
807
808static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
809{
810 struct rt6_info *rt;
811
812 rt = (struct rt6_info *) dst;
813
814 if (rt && rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie))
815 return dst;
816
817 return NULL;
818}
819
820static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
821{
822 struct rt6_info *rt = (struct rt6_info *) dst;
823
824 if (rt) {
825 if (rt->rt6i_flags & RTF_CACHE)
e0a1ad73 826 ip6_del_rt(rt);
1da177e4
LT
827 else
828 dst_release(dst);
829 }
830 return NULL;
831}
832
833static void ip6_link_failure(struct sk_buff *skb)
834{
835 struct rt6_info *rt;
836
837 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0, skb->dev);
838
839 rt = (struct rt6_info *) skb->dst;
840 if (rt) {
841 if (rt->rt6i_flags&RTF_CACHE) {
842 dst_set_expires(&rt->u.dst, 0);
843 rt->rt6i_flags |= RTF_EXPIRES;
844 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
845 rt->rt6i_node->fn_sernum = -1;
846 }
847}
848
849static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
850{
851 struct rt6_info *rt6 = (struct rt6_info*)dst;
852
853 if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
854 rt6->rt6i_flags |= RTF_MODIFIED;
855 if (mtu < IPV6_MIN_MTU) {
856 mtu = IPV6_MIN_MTU;
857 dst->metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
858 }
859 dst->metrics[RTAX_MTU-1] = mtu;
8d71740c 860 call_netevent_notifiers(NETEVENT_PMTU_UPDATE, dst);
1da177e4
LT
861 }
862}
863
1da177e4
LT
864static int ipv6_get_mtu(struct net_device *dev);
865
866static inline unsigned int ipv6_advmss(unsigned int mtu)
867{
868 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
869
870 if (mtu < ip6_rt_min_advmss)
871 mtu = ip6_rt_min_advmss;
872
873 /*
874 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
875 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
876 * IPV6_MAXPLEN is also valid and means: "any MSS,
877 * rely only on pmtu discovery"
878 */
879 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
880 mtu = IPV6_MAXPLEN;
881 return mtu;
882}
883
5d0bbeeb 884static struct dst_entry *ndisc_dst_gc_list;
8ce11e6a 885static DEFINE_SPINLOCK(ndisc_lock);
5d0bbeeb 886
1da177e4
LT
887struct dst_entry *ndisc_dst_alloc(struct net_device *dev,
888 struct neighbour *neigh,
889 struct in6_addr *addr,
890 int (*output)(struct sk_buff *))
891{
892 struct rt6_info *rt;
893 struct inet6_dev *idev = in6_dev_get(dev);
894
895 if (unlikely(idev == NULL))
896 return NULL;
897
898 rt = ip6_dst_alloc();
899 if (unlikely(rt == NULL)) {
900 in6_dev_put(idev);
901 goto out;
902 }
903
904 dev_hold(dev);
905 if (neigh)
906 neigh_hold(neigh);
907 else
908 neigh = ndisc_get_neigh(dev, addr);
909
910 rt->rt6i_dev = dev;
911 rt->rt6i_idev = idev;
912 rt->rt6i_nexthop = neigh;
913 atomic_set(&rt->u.dst.__refcnt, 1);
914 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = 255;
915 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
916 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
917 rt->u.dst.output = output;
918
919#if 0 /* there's no chance to use these for ndisc */
920 rt->u.dst.flags = ipv6_addr_type(addr) & IPV6_ADDR_UNICAST
921 ? DST_HOST
922 : 0;
923 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
924 rt->rt6i_dst.plen = 128;
925#endif
926
5d0bbeeb 927 spin_lock_bh(&ndisc_lock);
1da177e4
LT
928 rt->u.dst.next = ndisc_dst_gc_list;
929 ndisc_dst_gc_list = &rt->u.dst;
5d0bbeeb 930 spin_unlock_bh(&ndisc_lock);
1da177e4
LT
931
932 fib6_force_start_gc();
933
934out:
935 return (struct dst_entry *)rt;
936}
937
938int ndisc_dst_gc(int *more)
939{
940 struct dst_entry *dst, *next, **pprev;
941 int freed;
942
943 next = NULL;
5d0bbeeb
TG
944 freed = 0;
945
946 spin_lock_bh(&ndisc_lock);
1da177e4 947 pprev = &ndisc_dst_gc_list;
5d0bbeeb 948
1da177e4
LT
949 while ((dst = *pprev) != NULL) {
950 if (!atomic_read(&dst->__refcnt)) {
951 *pprev = dst->next;
952 dst_free(dst);
953 freed++;
954 } else {
955 pprev = &dst->next;
956 (*more)++;
957 }
958 }
959
5d0bbeeb
TG
960 spin_unlock_bh(&ndisc_lock);
961
1da177e4
LT
962 return freed;
963}
964
965static int ip6_dst_gc(void)
966{
967 static unsigned expire = 30*HZ;
968 static unsigned long last_gc;
969 unsigned long now = jiffies;
970
971 if (time_after(last_gc + ip6_rt_gc_min_interval, now) &&
972 atomic_read(&ip6_dst_ops.entries) <= ip6_rt_max_size)
973 goto out;
974
975 expire++;
976 fib6_run_gc(expire);
977 last_gc = now;
978 if (atomic_read(&ip6_dst_ops.entries) < ip6_dst_ops.gc_thresh)
979 expire = ip6_rt_gc_timeout>>1;
980
981out:
982 expire -= expire>>ip6_rt_gc_elasticity;
983 return (atomic_read(&ip6_dst_ops.entries) > ip6_rt_max_size);
984}
985
986/* Clean host part of a prefix. Not necessary in radix tree,
987 but results in cleaner routing tables.
988
989 Remove it only when all the things will work!
990 */
991
992static int ipv6_get_mtu(struct net_device *dev)
993{
994 int mtu = IPV6_MIN_MTU;
995 struct inet6_dev *idev;
996
997 idev = in6_dev_get(dev);
998 if (idev) {
999 mtu = idev->cnf.mtu6;
1000 in6_dev_put(idev);
1001 }
1002 return mtu;
1003}
1004
1005int ipv6_get_hoplimit(struct net_device *dev)
1006{
1007 int hoplimit = ipv6_devconf.hop_limit;
1008 struct inet6_dev *idev;
1009
1010 idev = in6_dev_get(dev);
1011 if (idev) {
1012 hoplimit = idev->cnf.hop_limit;
1013 in6_dev_put(idev);
1014 }
1015 return hoplimit;
1016}
1017
1018/*
1019 *
1020 */
1021
86872cb5 1022int ip6_route_add(struct fib6_config *cfg)
1da177e4
LT
1023{
1024 int err;
1da177e4
LT
1025 struct rt6_info *rt = NULL;
1026 struct net_device *dev = NULL;
1027 struct inet6_dev *idev = NULL;
c71099ac 1028 struct fib6_table *table;
1da177e4
LT
1029 int addr_type;
1030
86872cb5 1031 if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1da177e4
LT
1032 return -EINVAL;
1033#ifndef CONFIG_IPV6_SUBTREES
86872cb5 1034 if (cfg->fc_src_len)
1da177e4
LT
1035 return -EINVAL;
1036#endif
86872cb5 1037 if (cfg->fc_ifindex) {
1da177e4 1038 err = -ENODEV;
86872cb5 1039 dev = dev_get_by_index(cfg->fc_ifindex);
1da177e4
LT
1040 if (!dev)
1041 goto out;
1042 idev = in6_dev_get(dev);
1043 if (!idev)
1044 goto out;
1045 }
1046
86872cb5
TG
1047 if (cfg->fc_metric == 0)
1048 cfg->fc_metric = IP6_RT_PRIO_USER;
1da177e4 1049
86872cb5 1050 table = fib6_new_table(cfg->fc_table);
c71099ac
TG
1051 if (table == NULL) {
1052 err = -ENOBUFS;
1053 goto out;
1054 }
1055
1da177e4
LT
1056 rt = ip6_dst_alloc();
1057
1058 if (rt == NULL) {
1059 err = -ENOMEM;
1060 goto out;
1061 }
1062
1063 rt->u.dst.obsolete = -1;
86872cb5 1064 rt->rt6i_expires = jiffies + clock_t_to_jiffies(cfg->fc_expires);
1da177e4 1065
86872cb5
TG
1066 if (cfg->fc_protocol == RTPROT_UNSPEC)
1067 cfg->fc_protocol = RTPROT_BOOT;
1068 rt->rt6i_protocol = cfg->fc_protocol;
1069
1070 addr_type = ipv6_addr_type(&cfg->fc_dst);
1da177e4
LT
1071
1072 if (addr_type & IPV6_ADDR_MULTICAST)
1073 rt->u.dst.input = ip6_mc_input;
1074 else
1075 rt->u.dst.input = ip6_forward;
1076
1077 rt->u.dst.output = ip6_output;
1078
86872cb5
TG
1079 ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1080 rt->rt6i_dst.plen = cfg->fc_dst_len;
1da177e4
LT
1081 if (rt->rt6i_dst.plen == 128)
1082 rt->u.dst.flags = DST_HOST;
1083
1084#ifdef CONFIG_IPV6_SUBTREES
86872cb5
TG
1085 ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1086 rt->rt6i_src.plen = cfg->fc_src_len;
1da177e4
LT
1087#endif
1088
86872cb5 1089 rt->rt6i_metric = cfg->fc_metric;
1da177e4
LT
1090
1091 /* We cannot add true routes via loopback here,
1092 they would result in kernel looping; promote them to reject routes
1093 */
86872cb5 1094 if ((cfg->fc_flags & RTF_REJECT) ||
1da177e4
LT
1095 (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK))) {
1096 /* hold loopback dev/idev if we haven't done so. */
1097 if (dev != &loopback_dev) {
1098 if (dev) {
1099 dev_put(dev);
1100 in6_dev_put(idev);
1101 }
1102 dev = &loopback_dev;
1103 dev_hold(dev);
1104 idev = in6_dev_get(dev);
1105 if (!idev) {
1106 err = -ENODEV;
1107 goto out;
1108 }
1109 }
1110 rt->u.dst.output = ip6_pkt_discard_out;
1111 rt->u.dst.input = ip6_pkt_discard;
1112 rt->u.dst.error = -ENETUNREACH;
1113 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1114 goto install_route;
1115 }
1116
86872cb5 1117 if (cfg->fc_flags & RTF_GATEWAY) {
1da177e4
LT
1118 struct in6_addr *gw_addr;
1119 int gwa_type;
1120
86872cb5
TG
1121 gw_addr = &cfg->fc_gateway;
1122 ipv6_addr_copy(&rt->rt6i_gateway, gw_addr);
1da177e4
LT
1123 gwa_type = ipv6_addr_type(gw_addr);
1124
1125 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1126 struct rt6_info *grt;
1127
1128 /* IPv6 strictly inhibits using not link-local
1129 addresses as nexthop address.
1130 Otherwise, router will not able to send redirects.
1131 It is very good, but in some (rare!) circumstances
1132 (SIT, PtP, NBMA NOARP links) it is handy to allow
1133 some exceptions. --ANK
1134 */
1135 err = -EINVAL;
1136 if (!(gwa_type&IPV6_ADDR_UNICAST))
1137 goto out;
1138
86872cb5 1139 grt = rt6_lookup(gw_addr, NULL, cfg->fc_ifindex, 1);
1da177e4
LT
1140
1141 err = -EHOSTUNREACH;
1142 if (grt == NULL)
1143 goto out;
1144 if (dev) {
1145 if (dev != grt->rt6i_dev) {
1146 dst_release(&grt->u.dst);
1147 goto out;
1148 }
1149 } else {
1150 dev = grt->rt6i_dev;
1151 idev = grt->rt6i_idev;
1152 dev_hold(dev);
1153 in6_dev_hold(grt->rt6i_idev);
1154 }
1155 if (!(grt->rt6i_flags&RTF_GATEWAY))
1156 err = 0;
1157 dst_release(&grt->u.dst);
1158
1159 if (err)
1160 goto out;
1161 }
1162 err = -EINVAL;
1163 if (dev == NULL || (dev->flags&IFF_LOOPBACK))
1164 goto out;
1165 }
1166
1167 err = -ENODEV;
1168 if (dev == NULL)
1169 goto out;
1170
86872cb5 1171 if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) {
1da177e4
LT
1172 rt->rt6i_nexthop = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev);
1173 if (IS_ERR(rt->rt6i_nexthop)) {
1174 err = PTR_ERR(rt->rt6i_nexthop);
1175 rt->rt6i_nexthop = NULL;
1176 goto out;
1177 }
1178 }
1179
86872cb5 1180 rt->rt6i_flags = cfg->fc_flags;
1da177e4
LT
1181
1182install_route:
86872cb5
TG
1183 if (cfg->fc_mx) {
1184 struct nlattr *nla;
1185 int remaining;
1186
1187 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
1188 int type = nla->nla_type;
1189
1190 if (type) {
1191 if (type > RTAX_MAX) {
1da177e4
LT
1192 err = -EINVAL;
1193 goto out;
1194 }
86872cb5
TG
1195
1196 rt->u.dst.metrics[type - 1] = nla_get_u32(nla);
1da177e4 1197 }
1da177e4
LT
1198 }
1199 }
1200
1201 if (rt->u.dst.metrics[RTAX_HOPLIMIT-1] == 0)
1202 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
1203 if (!rt->u.dst.metrics[RTAX_MTU-1])
1204 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(dev);
1205 if (!rt->u.dst.metrics[RTAX_ADVMSS-1])
1206 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
1207 rt->u.dst.dev = dev;
1208 rt->rt6i_idev = idev;
c71099ac 1209 rt->rt6i_table = table;
86872cb5 1210 return __ip6_ins_rt(rt, &cfg->fc_nlinfo);
1da177e4
LT
1211
1212out:
1213 if (dev)
1214 dev_put(dev);
1215 if (idev)
1216 in6_dev_put(idev);
1217 if (rt)
1218 dst_free((struct dst_entry *) rt);
1219 return err;
1220}
1221
86872cb5 1222static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
1da177e4
LT
1223{
1224 int err;
c71099ac 1225 struct fib6_table *table;
1da177e4 1226
6c813a72
PM
1227 if (rt == &ip6_null_entry)
1228 return -ENOENT;
1229
c71099ac
TG
1230 table = rt->rt6i_table;
1231 write_lock_bh(&table->tb6_lock);
1da177e4 1232
86872cb5 1233 err = fib6_del(rt, info);
1da177e4
LT
1234 dst_release(&rt->u.dst);
1235
c71099ac 1236 write_unlock_bh(&table->tb6_lock);
1da177e4
LT
1237
1238 return err;
1239}
1240
e0a1ad73
TG
1241int ip6_del_rt(struct rt6_info *rt)
1242{
86872cb5 1243 return __ip6_del_rt(rt, NULL);
e0a1ad73
TG
1244}
1245
86872cb5 1246static int ip6_route_del(struct fib6_config *cfg)
1da177e4 1247{
c71099ac 1248 struct fib6_table *table;
1da177e4
LT
1249 struct fib6_node *fn;
1250 struct rt6_info *rt;
1251 int err = -ESRCH;
1252
86872cb5 1253 table = fib6_get_table(cfg->fc_table);
c71099ac
TG
1254 if (table == NULL)
1255 return err;
1256
1257 read_lock_bh(&table->tb6_lock);
1da177e4 1258
c71099ac 1259 fn = fib6_locate(&table->tb6_root,
86872cb5
TG
1260 &cfg->fc_dst, cfg->fc_dst_len,
1261 &cfg->fc_src, cfg->fc_src_len);
1da177e4
LT
1262
1263 if (fn) {
1264 for (rt = fn->leaf; rt; rt = rt->u.next) {
86872cb5 1265 if (cfg->fc_ifindex &&
1da177e4 1266 (rt->rt6i_dev == NULL ||
86872cb5 1267 rt->rt6i_dev->ifindex != cfg->fc_ifindex))
1da177e4 1268 continue;
86872cb5
TG
1269 if (cfg->fc_flags & RTF_GATEWAY &&
1270 !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
1da177e4 1271 continue;
86872cb5 1272 if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
1da177e4
LT
1273 continue;
1274 dst_hold(&rt->u.dst);
c71099ac 1275 read_unlock_bh(&table->tb6_lock);
1da177e4 1276
86872cb5 1277 return __ip6_del_rt(rt, &cfg->fc_nlinfo);
1da177e4
LT
1278 }
1279 }
c71099ac 1280 read_unlock_bh(&table->tb6_lock);
1da177e4
LT
1281
1282 return err;
1283}
1284
1285/*
1286 * Handle redirects
1287 */
a6279458
YH
1288struct ip6rd_flowi {
1289 struct flowi fl;
1290 struct in6_addr gateway;
1291};
1292
1293static struct rt6_info *__ip6_route_redirect(struct fib6_table *table,
1294 struct flowi *fl,
1295 int flags)
1da177e4 1296{
a6279458
YH
1297 struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl;
1298 struct rt6_info *rt;
e843b9e1 1299 struct fib6_node *fn;
c71099ac 1300
1da177e4 1301 /*
e843b9e1
YH
1302 * Get the "current" route for this destination and
1303 * check if the redirect has come from approriate router.
1304 *
1305 * RFC 2461 specifies that redirects should only be
1306 * accepted if they come from the nexthop to the target.
1307 * Due to the way the routes are chosen, this notion
1308 * is a bit fuzzy and one might need to check all possible
1309 * routes.
1da177e4 1310 */
1da177e4 1311
c71099ac 1312 read_lock_bh(&table->tb6_lock);
a6279458 1313 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
e843b9e1
YH
1314restart:
1315 for (rt = fn->leaf; rt; rt = rt->u.next) {
1316 /*
1317 * Current route is on-link; redirect is always invalid.
1318 *
1319 * Seems, previous statement is not true. It could
1320 * be node, which looks for us as on-link (f.e. proxy ndisc)
1321 * But then router serving it might decide, that we should
1322 * know truth 8)8) --ANK (980726).
1323 */
1324 if (rt6_check_expired(rt))
1325 continue;
1326 if (!(rt->rt6i_flags & RTF_GATEWAY))
1327 continue;
a6279458 1328 if (fl->oif != rt->rt6i_dev->ifindex)
e843b9e1 1329 continue;
a6279458 1330 if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
e843b9e1
YH
1331 continue;
1332 break;
1333 }
a6279458
YH
1334
1335 if (!rt) {
1336 if (rt6_need_strict(&fl->fl6_dst)) {
1337 while ((fn = fn->parent) != NULL) {
1338 if (fn->fn_flags & RTN_ROOT)
1339 break;
1340 if (fn->fn_flags & RTN_RTINFO)
1341 goto restart;
1342 }
1da177e4 1343 }
a6279458 1344 rt = &ip6_null_entry;
e843b9e1 1345 }
a6279458
YH
1346 dst_hold(&rt->u.dst);
1347
c71099ac 1348 read_unlock_bh(&table->tb6_lock);
e843b9e1 1349
a6279458
YH
1350 return rt;
1351};
1352
1353static struct rt6_info *ip6_route_redirect(struct in6_addr *dest,
1354 struct in6_addr *src,
1355 struct in6_addr *gateway,
1356 struct net_device *dev)
1357{
1358 struct ip6rd_flowi rdfl = {
1359 .fl = {
1360 .oif = dev->ifindex,
1361 .nl_u = {
1362 .ip6_u = {
1363 .daddr = *dest,
1364 .saddr = *src,
1365 },
1366 },
1367 },
1368 .gateway = *gateway,
1369 };
1370 int flags = rt6_need_strict(dest) ? RT6_F_STRICT : 0;
1371
1372 return (struct rt6_info *)fib6_rule_lookup((struct flowi *)&rdfl, flags, __ip6_route_redirect);
1373}
1374
1375void rt6_redirect(struct in6_addr *dest, struct in6_addr *src,
1376 struct in6_addr *saddr,
1377 struct neighbour *neigh, u8 *lladdr, int on_link)
1378{
1379 struct rt6_info *rt, *nrt = NULL;
1380 struct netevent_redirect netevent;
1381
1382 rt = ip6_route_redirect(dest, src, saddr, neigh->dev);
1383
1384 if (rt == &ip6_null_entry) {
1da177e4
LT
1385 if (net_ratelimit())
1386 printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
1387 "for redirect target\n");
a6279458 1388 goto out;
1da177e4
LT
1389 }
1390
1da177e4
LT
1391 /*
1392 * We have finally decided to accept it.
1393 */
1394
1395 neigh_update(neigh, lladdr, NUD_STALE,
1396 NEIGH_UPDATE_F_WEAK_OVERRIDE|
1397 NEIGH_UPDATE_F_OVERRIDE|
1398 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1399 NEIGH_UPDATE_F_ISROUTER))
1400 );
1401
1402 /*
1403 * Redirect received -> path was valid.
1404 * Look, redirects are sent only in response to data packets,
1405 * so that this nexthop apparently is reachable. --ANK
1406 */
1407 dst_confirm(&rt->u.dst);
1408
1409 /* Duplicate redirect: silently ignore. */
1410 if (neigh == rt->u.dst.neighbour)
1411 goto out;
1412
1413 nrt = ip6_rt_copy(rt);
1414 if (nrt == NULL)
1415 goto out;
1416
1417 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1418 if (on_link)
1419 nrt->rt6i_flags &= ~RTF_GATEWAY;
1420
1421 ipv6_addr_copy(&nrt->rt6i_dst.addr, dest);
1422 nrt->rt6i_dst.plen = 128;
1423 nrt->u.dst.flags |= DST_HOST;
1424
1425 ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key);
1426 nrt->rt6i_nexthop = neigh_clone(neigh);
1427 /* Reset pmtu, it may be better */
1428 nrt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(neigh->dev);
1429 nrt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&nrt->u.dst));
1430
40e22e8f 1431 if (ip6_ins_rt(nrt))
1da177e4
LT
1432 goto out;
1433
8d71740c
TT
1434 netevent.old = &rt->u.dst;
1435 netevent.new = &nrt->u.dst;
1436 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1437
1da177e4 1438 if (rt->rt6i_flags&RTF_CACHE) {
e0a1ad73 1439 ip6_del_rt(rt);
1da177e4
LT
1440 return;
1441 }
1442
1443out:
1444 dst_release(&rt->u.dst);
1445 return;
1446}
1447
1448/*
1449 * Handle ICMP "packet too big" messages
1450 * i.e. Path MTU discovery
1451 */
1452
1453void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr,
1454 struct net_device *dev, u32 pmtu)
1455{
1456 struct rt6_info *rt, *nrt;
1457 int allfrag = 0;
1458
1459 rt = rt6_lookup(daddr, saddr, dev->ifindex, 0);
1460 if (rt == NULL)
1461 return;
1462
1463 if (pmtu >= dst_mtu(&rt->u.dst))
1464 goto out;
1465
1466 if (pmtu < IPV6_MIN_MTU) {
1467 /*
1468 * According to RFC2460, PMTU is set to the IPv6 Minimum Link
1469 * MTU (1280) and a fragment header should always be included
1470 * after a node receiving Too Big message reporting PMTU is
1471 * less than the IPv6 Minimum Link MTU.
1472 */
1473 pmtu = IPV6_MIN_MTU;
1474 allfrag = 1;
1475 }
1476
1477 /* New mtu received -> path was valid.
1478 They are sent only in response to data packets,
1479 so that this nexthop apparently is reachable. --ANK
1480 */
1481 dst_confirm(&rt->u.dst);
1482
1483 /* Host route. If it is static, it would be better
1484 not to override it, but add new one, so that
1485 when cache entry will expire old pmtu
1486 would return automatically.
1487 */
1488 if (rt->rt6i_flags & RTF_CACHE) {
1489 rt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1490 if (allfrag)
1491 rt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1492 dst_set_expires(&rt->u.dst, ip6_rt_mtu_expires);
1493 rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
1494 goto out;
1495 }
1496
1497 /* Network route.
1498 Two cases are possible:
1499 1. It is connected route. Action: COW
1500 2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1501 */
d5315b50 1502 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
a1e78363 1503 nrt = rt6_alloc_cow(rt, daddr, saddr);
d5315b50
YH
1504 else
1505 nrt = rt6_alloc_clone(rt, daddr);
a1e78363 1506
d5315b50 1507 if (nrt) {
a1e78363
YH
1508 nrt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1509 if (allfrag)
1510 nrt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1511
1512 /* According to RFC 1981, detecting PMTU increase shouldn't be
1513 * happened within 5 mins, the recommended timer is 10 mins.
1514 * Here this route expiration time is set to ip6_rt_mtu_expires
1515 * which is 10 mins. After 10 mins the decreased pmtu is expired
1516 * and detecting PMTU increase will be automatically happened.
1517 */
1518 dst_set_expires(&nrt->u.dst, ip6_rt_mtu_expires);
1519 nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
1520
40e22e8f 1521 ip6_ins_rt(nrt);
1da177e4 1522 }
1da177e4
LT
1523out:
1524 dst_release(&rt->u.dst);
1525}
1526
1527/*
1528 * Misc support functions
1529 */
1530
1531static struct rt6_info * ip6_rt_copy(struct rt6_info *ort)
1532{
1533 struct rt6_info *rt = ip6_dst_alloc();
1534
1535 if (rt) {
1536 rt->u.dst.input = ort->u.dst.input;
1537 rt->u.dst.output = ort->u.dst.output;
1538
1539 memcpy(rt->u.dst.metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32));
1540 rt->u.dst.dev = ort->u.dst.dev;
1541 if (rt->u.dst.dev)
1542 dev_hold(rt->u.dst.dev);
1543 rt->rt6i_idev = ort->rt6i_idev;
1544 if (rt->rt6i_idev)
1545 in6_dev_hold(rt->rt6i_idev);
1546 rt->u.dst.lastuse = jiffies;
1547 rt->rt6i_expires = 0;
1548
1549 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
1550 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
1551 rt->rt6i_metric = 0;
1552
1553 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1554#ifdef CONFIG_IPV6_SUBTREES
1555 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1556#endif
c71099ac 1557 rt->rt6i_table = ort->rt6i_table;
1da177e4
LT
1558 }
1559 return rt;
1560}
1561
70ceb4f5
YH
1562#ifdef CONFIG_IPV6_ROUTE_INFO
1563static struct rt6_info *rt6_get_route_info(struct in6_addr *prefix, int prefixlen,
1564 struct in6_addr *gwaddr, int ifindex)
1565{
1566 struct fib6_node *fn;
1567 struct rt6_info *rt = NULL;
c71099ac
TG
1568 struct fib6_table *table;
1569
1570 table = fib6_get_table(RT6_TABLE_INFO);
1571 if (table == NULL)
1572 return NULL;
70ceb4f5 1573
c71099ac
TG
1574 write_lock_bh(&table->tb6_lock);
1575 fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
70ceb4f5
YH
1576 if (!fn)
1577 goto out;
1578
1579 for (rt = fn->leaf; rt; rt = rt->u.next) {
1580 if (rt->rt6i_dev->ifindex != ifindex)
1581 continue;
1582 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1583 continue;
1584 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1585 continue;
1586 dst_hold(&rt->u.dst);
1587 break;
1588 }
1589out:
c71099ac 1590 write_unlock_bh(&table->tb6_lock);
70ceb4f5
YH
1591 return rt;
1592}
1593
1594static struct rt6_info *rt6_add_route_info(struct in6_addr *prefix, int prefixlen,
1595 struct in6_addr *gwaddr, int ifindex,
1596 unsigned pref)
1597{
86872cb5
TG
1598 struct fib6_config cfg = {
1599 .fc_table = RT6_TABLE_INFO,
1600 .fc_metric = 1024,
1601 .fc_ifindex = ifindex,
1602 .fc_dst_len = prefixlen,
1603 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
1604 RTF_UP | RTF_PREF(pref),
1605 };
1606
1607 ipv6_addr_copy(&cfg.fc_dst, prefix);
1608 ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
70ceb4f5 1609
e317da96
YH
1610 /* We should treat it as a default route if prefix length is 0. */
1611 if (!prefixlen)
86872cb5 1612 cfg.fc_flags |= RTF_DEFAULT;
70ceb4f5 1613
86872cb5 1614 ip6_route_add(&cfg);
70ceb4f5
YH
1615
1616 return rt6_get_route_info(prefix, prefixlen, gwaddr, ifindex);
1617}
1618#endif
1619
1da177e4
LT
1620struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *dev)
1621{
1622 struct rt6_info *rt;
c71099ac 1623 struct fib6_table *table;
1da177e4 1624
c71099ac
TG
1625 table = fib6_get_table(RT6_TABLE_DFLT);
1626 if (table == NULL)
1627 return NULL;
1da177e4 1628
c71099ac
TG
1629 write_lock_bh(&table->tb6_lock);
1630 for (rt = table->tb6_root.leaf; rt; rt=rt->u.next) {
1da177e4 1631 if (dev == rt->rt6i_dev &&
045927ff 1632 ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
1da177e4
LT
1633 ipv6_addr_equal(&rt->rt6i_gateway, addr))
1634 break;
1635 }
1636 if (rt)
1637 dst_hold(&rt->u.dst);
c71099ac 1638 write_unlock_bh(&table->tb6_lock);
1da177e4
LT
1639 return rt;
1640}
1641
1642struct rt6_info *rt6_add_dflt_router(struct in6_addr *gwaddr,
ebacaaa0
YH
1643 struct net_device *dev,
1644 unsigned int pref)
1da177e4 1645{
86872cb5
TG
1646 struct fib6_config cfg = {
1647 .fc_table = RT6_TABLE_DFLT,
1648 .fc_metric = 1024,
1649 .fc_ifindex = dev->ifindex,
1650 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
1651 RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
1652 };
1da177e4 1653
86872cb5 1654 ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
1da177e4 1655
86872cb5 1656 ip6_route_add(&cfg);
1da177e4 1657
1da177e4
LT
1658 return rt6_get_dflt_router(gwaddr, dev);
1659}
1660
1661void rt6_purge_dflt_routers(void)
1662{
1663 struct rt6_info *rt;
c71099ac
TG
1664 struct fib6_table *table;
1665
1666 /* NOTE: Keep consistent with rt6_get_dflt_router */
1667 table = fib6_get_table(RT6_TABLE_DFLT);
1668 if (table == NULL)
1669 return;
1da177e4
LT
1670
1671restart:
c71099ac
TG
1672 read_lock_bh(&table->tb6_lock);
1673 for (rt = table->tb6_root.leaf; rt; rt = rt->u.next) {
1da177e4
LT
1674 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
1675 dst_hold(&rt->u.dst);
c71099ac 1676 read_unlock_bh(&table->tb6_lock);
e0a1ad73 1677 ip6_del_rt(rt);
1da177e4
LT
1678 goto restart;
1679 }
1680 }
c71099ac 1681 read_unlock_bh(&table->tb6_lock);
1da177e4
LT
1682}
1683
86872cb5
TG
1684static void rtmsg_to_fib6_config(struct in6_rtmsg *rtmsg,
1685 struct fib6_config *cfg)
1686{
1687 memset(cfg, 0, sizeof(*cfg));
1688
1689 cfg->fc_table = RT6_TABLE_MAIN;
1690 cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
1691 cfg->fc_metric = rtmsg->rtmsg_metric;
1692 cfg->fc_expires = rtmsg->rtmsg_info;
1693 cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
1694 cfg->fc_src_len = rtmsg->rtmsg_src_len;
1695 cfg->fc_flags = rtmsg->rtmsg_flags;
1696
1697 ipv6_addr_copy(&cfg->fc_dst, &rtmsg->rtmsg_dst);
1698 ipv6_addr_copy(&cfg->fc_src, &rtmsg->rtmsg_src);
1699 ipv6_addr_copy(&cfg->fc_gateway, &rtmsg->rtmsg_gateway);
1700}
1701
1da177e4
LT
1702int ipv6_route_ioctl(unsigned int cmd, void __user *arg)
1703{
86872cb5 1704 struct fib6_config cfg;
1da177e4
LT
1705 struct in6_rtmsg rtmsg;
1706 int err;
1707
1708 switch(cmd) {
1709 case SIOCADDRT: /* Add a route */
1710 case SIOCDELRT: /* Delete a route */
1711 if (!capable(CAP_NET_ADMIN))
1712 return -EPERM;
1713 err = copy_from_user(&rtmsg, arg,
1714 sizeof(struct in6_rtmsg));
1715 if (err)
1716 return -EFAULT;
86872cb5
TG
1717
1718 rtmsg_to_fib6_config(&rtmsg, &cfg);
1719
1da177e4
LT
1720 rtnl_lock();
1721 switch (cmd) {
1722 case SIOCADDRT:
86872cb5 1723 err = ip6_route_add(&cfg);
1da177e4
LT
1724 break;
1725 case SIOCDELRT:
86872cb5 1726 err = ip6_route_del(&cfg);
1da177e4
LT
1727 break;
1728 default:
1729 err = -EINVAL;
1730 }
1731 rtnl_unlock();
1732
1733 return err;
1734 };
1735
1736 return -EINVAL;
1737}
1738
1739/*
1740 * Drop the packet on the floor
1741 */
1742
20380731 1743static int ip6_pkt_discard(struct sk_buff *skb)
1da177e4 1744{
76d0cc1b
LL
1745 int type = ipv6_addr_type(&skb->nh.ipv6h->daddr);
1746 if (type == IPV6_ADDR_ANY || type == IPV6_ADDR_RESERVED)
1747 IP6_INC_STATS(IPSTATS_MIB_INADDRERRORS);
1748
1da177e4
LT
1749 IP6_INC_STATS(IPSTATS_MIB_OUTNOROUTES);
1750 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_NOROUTE, 0, skb->dev);
1751 kfree_skb(skb);
1752 return 0;
1753}
1754
20380731 1755static int ip6_pkt_discard_out(struct sk_buff *skb)
1da177e4
LT
1756{
1757 skb->dev = skb->dst->dev;
1758 return ip6_pkt_discard(skb);
1759}
1760
1761/*
1762 * Allocate a dst for local (unicast / anycast) address.
1763 */
1764
1765struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
1766 const struct in6_addr *addr,
1767 int anycast)
1768{
1769 struct rt6_info *rt = ip6_dst_alloc();
1770
1771 if (rt == NULL)
1772 return ERR_PTR(-ENOMEM);
1773
1774 dev_hold(&loopback_dev);
1775 in6_dev_hold(idev);
1776
1777 rt->u.dst.flags = DST_HOST;
1778 rt->u.dst.input = ip6_input;
1779 rt->u.dst.output = ip6_output;
1780 rt->rt6i_dev = &loopback_dev;
1781 rt->rt6i_idev = idev;
1782 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
1783 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
1784 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
1785 rt->u.dst.obsolete = -1;
1786
1787 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
58c4fb86
YH
1788 if (anycast)
1789 rt->rt6i_flags |= RTF_ANYCAST;
1790 else
1da177e4
LT
1791 rt->rt6i_flags |= RTF_LOCAL;
1792 rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
1793 if (rt->rt6i_nexthop == NULL) {
1794 dst_free((struct dst_entry *) rt);
1795 return ERR_PTR(-ENOMEM);
1796 }
1797
1798 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
1799 rt->rt6i_dst.plen = 128;
c71099ac 1800 rt->rt6i_table = fib6_get_table(RT6_TABLE_LOCAL);
1da177e4
LT
1801
1802 atomic_set(&rt->u.dst.__refcnt, 1);
1803
1804 return rt;
1805}
1806
1807static int fib6_ifdown(struct rt6_info *rt, void *arg)
1808{
1809 if (((void*)rt->rt6i_dev == arg || arg == NULL) &&
1810 rt != &ip6_null_entry) {
1811 RT6_TRACE("deleted by ifdown %p\n", rt);
1812 return -1;
1813 }
1814 return 0;
1815}
1816
1817void rt6_ifdown(struct net_device *dev)
1818{
c71099ac 1819 fib6_clean_all(fib6_ifdown, 0, dev);
1da177e4
LT
1820}
1821
1822struct rt6_mtu_change_arg
1823{
1824 struct net_device *dev;
1825 unsigned mtu;
1826};
1827
1828static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
1829{
1830 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
1831 struct inet6_dev *idev;
1832
1833 /* In IPv6 pmtu discovery is not optional,
1834 so that RTAX_MTU lock cannot disable it.
1835 We still use this lock to block changes
1836 caused by addrconf/ndisc.
1837 */
1838
1839 idev = __in6_dev_get(arg->dev);
1840 if (idev == NULL)
1841 return 0;
1842
1843 /* For administrative MTU increase, there is no way to discover
1844 IPv6 PMTU increase, so PMTU increase should be updated here.
1845 Since RFC 1981 doesn't include administrative MTU increase
1846 update PMTU increase is a MUST. (i.e. jumbo frame)
1847 */
1848 /*
1849 If new MTU is less than route PMTU, this new MTU will be the
1850 lowest MTU in the path, update the route PMTU to reflect PMTU
1851 decreases; if new MTU is greater than route PMTU, and the
1852 old MTU is the lowest MTU in the path, update the route PMTU
1853 to reflect the increase. In this case if the other nodes' MTU
1854 also have the lowest MTU, TOO BIG MESSAGE will be lead to
1855 PMTU discouvery.
1856 */
1857 if (rt->rt6i_dev == arg->dev &&
1858 !dst_metric_locked(&rt->u.dst, RTAX_MTU) &&
1859 (dst_mtu(&rt->u.dst) > arg->mtu ||
1860 (dst_mtu(&rt->u.dst) < arg->mtu &&
1861 dst_mtu(&rt->u.dst) == idev->cnf.mtu6)))
1862 rt->u.dst.metrics[RTAX_MTU-1] = arg->mtu;
1863 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(arg->mtu);
1864 return 0;
1865}
1866
1867void rt6_mtu_change(struct net_device *dev, unsigned mtu)
1868{
c71099ac
TG
1869 struct rt6_mtu_change_arg arg = {
1870 .dev = dev,
1871 .mtu = mtu,
1872 };
1da177e4 1873
c71099ac 1874 fib6_clean_all(rt6_mtu_change_route, 0, &arg);
1da177e4
LT
1875}
1876
86872cb5
TG
1877static struct nla_policy rtm_ipv6_policy[RTA_MAX+1] __read_mostly = {
1878 [RTA_GATEWAY] = { .minlen = sizeof(struct in6_addr) },
1879 [RTA_OIF] = { .type = NLA_U32 },
ab364a6f 1880 [RTA_IIF] = { .type = NLA_U32 },
86872cb5
TG
1881 [RTA_PRIORITY] = { .type = NLA_U32 },
1882 [RTA_METRICS] = { .type = NLA_NESTED },
1883};
1884
1885static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
1886 struct fib6_config *cfg)
1da177e4 1887{
86872cb5
TG
1888 struct rtmsg *rtm;
1889 struct nlattr *tb[RTA_MAX+1];
1890 int err;
1da177e4 1891
86872cb5
TG
1892 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
1893 if (err < 0)
1894 goto errout;
1da177e4 1895
86872cb5
TG
1896 err = -EINVAL;
1897 rtm = nlmsg_data(nlh);
1898 memset(cfg, 0, sizeof(*cfg));
1899
1900 cfg->fc_table = rtm->rtm_table;
1901 cfg->fc_dst_len = rtm->rtm_dst_len;
1902 cfg->fc_src_len = rtm->rtm_src_len;
1903 cfg->fc_flags = RTF_UP;
1904 cfg->fc_protocol = rtm->rtm_protocol;
1905
1906 if (rtm->rtm_type == RTN_UNREACHABLE)
1907 cfg->fc_flags |= RTF_REJECT;
1908
1909 cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
1910 cfg->fc_nlinfo.nlh = nlh;
1911
1912 if (tb[RTA_GATEWAY]) {
1913 nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
1914 cfg->fc_flags |= RTF_GATEWAY;
1da177e4 1915 }
86872cb5
TG
1916
1917 if (tb[RTA_DST]) {
1918 int plen = (rtm->rtm_dst_len + 7) >> 3;
1919
1920 if (nla_len(tb[RTA_DST]) < plen)
1921 goto errout;
1922
1923 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
1da177e4 1924 }
86872cb5
TG
1925
1926 if (tb[RTA_SRC]) {
1927 int plen = (rtm->rtm_src_len + 7) >> 3;
1928
1929 if (nla_len(tb[RTA_SRC]) < plen)
1930 goto errout;
1931
1932 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
1da177e4 1933 }
86872cb5
TG
1934
1935 if (tb[RTA_OIF])
1936 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
1937
1938 if (tb[RTA_PRIORITY])
1939 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
1940
1941 if (tb[RTA_METRICS]) {
1942 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
1943 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
1da177e4 1944 }
86872cb5
TG
1945
1946 if (tb[RTA_TABLE])
1947 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
1948
1949 err = 0;
1950errout:
1951 return err;
1da177e4
LT
1952}
1953
1954int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1955{
86872cb5
TG
1956 struct fib6_config cfg;
1957 int err;
1da177e4 1958
86872cb5
TG
1959 err = rtm_to_fib6_config(skb, nlh, &cfg);
1960 if (err < 0)
1961 return err;
1962
1963 return ip6_route_del(&cfg);
1da177e4
LT
1964}
1965
1966int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1967{
86872cb5
TG
1968 struct fib6_config cfg;
1969 int err;
1da177e4 1970
86872cb5
TG
1971 err = rtm_to_fib6_config(skb, nlh, &cfg);
1972 if (err < 0)
1973 return err;
1974
1975 return ip6_route_add(&cfg);
1da177e4
LT
1976}
1977
1da177e4 1978static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt,
0d51aa80
JHS
1979 struct in6_addr *dst, struct in6_addr *src,
1980 int iif, int type, u32 pid, u32 seq,
1981 int prefix, unsigned int flags)
1da177e4
LT
1982{
1983 struct rtmsg *rtm;
2d7202bf 1984 struct nlmsghdr *nlh;
1da177e4 1985 struct rta_cacheinfo ci;
9e762a4a 1986 u32 table;
1da177e4
LT
1987
1988 if (prefix) { /* user wants prefix routes only */
1989 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
1990 /* success since this is not a prefix route */
1991 return 1;
1992 }
1993 }
1994
2d7202bf
TG
1995 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*rtm), flags);
1996 if (nlh == NULL)
1997 return -ENOBUFS;
1998
1999 rtm = nlmsg_data(nlh);
1da177e4
LT
2000 rtm->rtm_family = AF_INET6;
2001 rtm->rtm_dst_len = rt->rt6i_dst.plen;
2002 rtm->rtm_src_len = rt->rt6i_src.plen;
2003 rtm->rtm_tos = 0;
c71099ac 2004 if (rt->rt6i_table)
9e762a4a 2005 table = rt->rt6i_table->tb6_id;
c71099ac 2006 else
9e762a4a
PM
2007 table = RT6_TABLE_UNSPEC;
2008 rtm->rtm_table = table;
2d7202bf 2009 NLA_PUT_U32(skb, RTA_TABLE, table);
1da177e4
LT
2010 if (rt->rt6i_flags&RTF_REJECT)
2011 rtm->rtm_type = RTN_UNREACHABLE;
2012 else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK))
2013 rtm->rtm_type = RTN_LOCAL;
2014 else
2015 rtm->rtm_type = RTN_UNICAST;
2016 rtm->rtm_flags = 0;
2017 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2018 rtm->rtm_protocol = rt->rt6i_protocol;
2019 if (rt->rt6i_flags&RTF_DYNAMIC)
2020 rtm->rtm_protocol = RTPROT_REDIRECT;
2021 else if (rt->rt6i_flags & RTF_ADDRCONF)
2022 rtm->rtm_protocol = RTPROT_KERNEL;
2023 else if (rt->rt6i_flags&RTF_DEFAULT)
2024 rtm->rtm_protocol = RTPROT_RA;
2025
2026 if (rt->rt6i_flags&RTF_CACHE)
2027 rtm->rtm_flags |= RTM_F_CLONED;
2028
2029 if (dst) {
2d7202bf 2030 NLA_PUT(skb, RTA_DST, 16, dst);
1da177e4
LT
2031 rtm->rtm_dst_len = 128;
2032 } else if (rtm->rtm_dst_len)
2d7202bf 2033 NLA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr);
1da177e4
LT
2034#ifdef CONFIG_IPV6_SUBTREES
2035 if (src) {
2d7202bf 2036 NLA_PUT(skb, RTA_SRC, 16, src);
1da177e4
LT
2037 rtm->rtm_src_len = 128;
2038 } else if (rtm->rtm_src_len)
2d7202bf 2039 NLA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr);
1da177e4
LT
2040#endif
2041 if (iif)
2d7202bf 2042 NLA_PUT_U32(skb, RTA_IIF, iif);
1da177e4
LT
2043 else if (dst) {
2044 struct in6_addr saddr_buf;
2045 if (ipv6_get_saddr(&rt->u.dst, dst, &saddr_buf) == 0)
2d7202bf 2046 NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
1da177e4 2047 }
2d7202bf 2048
1da177e4 2049 if (rtnetlink_put_metrics(skb, rt->u.dst.metrics) < 0)
2d7202bf
TG
2050 goto nla_put_failure;
2051
1da177e4 2052 if (rt->u.dst.neighbour)
2d7202bf
TG
2053 NLA_PUT(skb, RTA_GATEWAY, 16, &rt->u.dst.neighbour->primary_key);
2054
1da177e4 2055 if (rt->u.dst.dev)
2d7202bf
TG
2056 NLA_PUT_U32(skb, RTA_OIF, rt->rt6i_dev->ifindex);
2057
2058 NLA_PUT_U32(skb, RTA_PRIORITY, rt->rt6i_metric);
1da177e4
LT
2059 ci.rta_lastuse = jiffies_to_clock_t(jiffies - rt->u.dst.lastuse);
2060 if (rt->rt6i_expires)
2061 ci.rta_expires = jiffies_to_clock_t(rt->rt6i_expires - jiffies);
2062 else
2063 ci.rta_expires = 0;
2064 ci.rta_used = rt->u.dst.__use;
2065 ci.rta_clntref = atomic_read(&rt->u.dst.__refcnt);
2066 ci.rta_error = rt->u.dst.error;
2067 ci.rta_id = 0;
2068 ci.rta_ts = 0;
2069 ci.rta_tsage = 0;
2d7202bf
TG
2070 NLA_PUT(skb, RTA_CACHEINFO, sizeof(ci), &ci);
2071
2072 return nlmsg_end(skb, nlh);
2073
2074nla_put_failure:
2075 return nlmsg_cancel(skb, nlh);
1da177e4
LT
2076}
2077
1b43af54 2078int rt6_dump_route(struct rt6_info *rt, void *p_arg)
1da177e4
LT
2079{
2080 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
2081 int prefix;
2082
2d7202bf
TG
2083 if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
2084 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
1da177e4
LT
2085 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
2086 } else
2087 prefix = 0;
2088
2089 return rt6_fill_node(arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
2090 NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
0d51aa80 2091 prefix, NLM_F_MULTI);
1da177e4
LT
2092}
2093
1da177e4
LT
2094int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
2095{
ab364a6f
TG
2096 struct nlattr *tb[RTA_MAX+1];
2097 struct rt6_info *rt;
1da177e4 2098 struct sk_buff *skb;
ab364a6f 2099 struct rtmsg *rtm;
1da177e4 2100 struct flowi fl;
ab364a6f 2101 int err, iif = 0;
1da177e4 2102
ab364a6f
TG
2103 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2104 if (err < 0)
2105 goto errout;
1da177e4 2106
ab364a6f 2107 err = -EINVAL;
1da177e4 2108 memset(&fl, 0, sizeof(fl));
1da177e4 2109
ab364a6f
TG
2110 if (tb[RTA_SRC]) {
2111 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2112 goto errout;
2113
2114 ipv6_addr_copy(&fl.fl6_src, nla_data(tb[RTA_SRC]));
2115 }
2116
2117 if (tb[RTA_DST]) {
2118 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2119 goto errout;
2120
2121 ipv6_addr_copy(&fl.fl6_dst, nla_data(tb[RTA_DST]));
2122 }
2123
2124 if (tb[RTA_IIF])
2125 iif = nla_get_u32(tb[RTA_IIF]);
2126
2127 if (tb[RTA_OIF])
2128 fl.oif = nla_get_u32(tb[RTA_OIF]);
1da177e4
LT
2129
2130 if (iif) {
2131 struct net_device *dev;
2132 dev = __dev_get_by_index(iif);
2133 if (!dev) {
2134 err = -ENODEV;
ab364a6f 2135 goto errout;
1da177e4
LT
2136 }
2137 }
2138
ab364a6f
TG
2139 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
2140 if (skb == NULL) {
2141 err = -ENOBUFS;
2142 goto errout;
2143 }
1da177e4 2144
ab364a6f
TG
2145 /* Reserve room for dummy headers, this skb can pass
2146 through good chunk of routing engine.
2147 */
2148 skb->mac.raw = skb->data;
2149 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
1da177e4 2150
ab364a6f 2151 rt = (struct rt6_info*) ip6_route_output(NULL, &fl);
1da177e4
LT
2152 skb->dst = &rt->u.dst;
2153
ab364a6f 2154 err = rt6_fill_node(skb, rt, &fl.fl6_dst, &fl.fl6_src, iif,
1da177e4 2155 RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
0d51aa80 2156 nlh->nlmsg_seq, 0, 0);
1da177e4 2157 if (err < 0) {
ab364a6f
TG
2158 kfree_skb(skb);
2159 goto errout;
1da177e4
LT
2160 }
2161
2942e900 2162 err = rtnl_unicast(skb, NETLINK_CB(in_skb).pid);
ab364a6f 2163errout:
1da177e4 2164 return err;
1da177e4
LT
2165}
2166
86872cb5 2167void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
1da177e4
LT
2168{
2169 struct sk_buff *skb;
86872cb5
TG
2170 u32 pid = 0, seq = 0;
2171 struct nlmsghdr *nlh = NULL;
21713ebc
TG
2172 int payload = sizeof(struct rtmsg) + 256;
2173 int err = -ENOBUFS;
2174
86872cb5
TG
2175 if (info) {
2176 pid = info->pid;
2177 nlh = info->nlh;
2178 if (nlh)
2179 seq = nlh->nlmsg_seq;
2180 }
2181
21713ebc
TG
2182 skb = nlmsg_new(nlmsg_total_size(payload), gfp_any());
2183 if (skb == NULL)
2184 goto errout;
2185
2186 err = rt6_fill_node(skb, rt, NULL, NULL, 0, event, pid, seq, 0, 0);
2187 if (err < 0) {
1da177e4 2188 kfree_skb(skb);
21713ebc 2189 goto errout;
1da177e4 2190 }
21713ebc
TG
2191
2192 err = rtnl_notify(skb, pid, RTNLGRP_IPV6_ROUTE, nlh, gfp_any());
2193errout:
2194 if (err < 0)
2195 rtnl_set_sk_err(RTNLGRP_IPV6_ROUTE, err);
1da177e4
LT
2196}
2197
2198/*
2199 * /proc
2200 */
2201
2202#ifdef CONFIG_PROC_FS
2203
2204#define RT6_INFO_LEN (32 + 4 + 32 + 4 + 32 + 40 + 5 + 1)
2205
2206struct rt6_proc_arg
2207{
2208 char *buffer;
2209 int offset;
2210 int length;
2211 int skip;
2212 int len;
2213};
2214
2215static int rt6_info_route(struct rt6_info *rt, void *p_arg)
2216{
2217 struct rt6_proc_arg *arg = (struct rt6_proc_arg *) p_arg;
2218 int i;
2219
2220 if (arg->skip < arg->offset / RT6_INFO_LEN) {
2221 arg->skip++;
2222 return 0;
2223 }
2224
2225 if (arg->len >= arg->length)
2226 return 0;
2227
2228 for (i=0; i<16; i++) {
2229 sprintf(arg->buffer + arg->len, "%02x",
2230 rt->rt6i_dst.addr.s6_addr[i]);
2231 arg->len += 2;
2232 }
2233 arg->len += sprintf(arg->buffer + arg->len, " %02x ",
2234 rt->rt6i_dst.plen);
2235
2236#ifdef CONFIG_IPV6_SUBTREES
2237 for (i=0; i<16; i++) {
2238 sprintf(arg->buffer + arg->len, "%02x",
2239 rt->rt6i_src.addr.s6_addr[i]);
2240 arg->len += 2;
2241 }
2242 arg->len += sprintf(arg->buffer + arg->len, " %02x ",
2243 rt->rt6i_src.plen);
2244#else
2245 sprintf(arg->buffer + arg->len,
2246 "00000000000000000000000000000000 00 ");
2247 arg->len += 36;
2248#endif
2249
2250 if (rt->rt6i_nexthop) {
2251 for (i=0; i<16; i++) {
2252 sprintf(arg->buffer + arg->len, "%02x",
2253 rt->rt6i_nexthop->primary_key[i]);
2254 arg->len += 2;
2255 }
2256 } else {
2257 sprintf(arg->buffer + arg->len,
2258 "00000000000000000000000000000000");
2259 arg->len += 32;
2260 }
2261 arg->len += sprintf(arg->buffer + arg->len,
2262 " %08x %08x %08x %08x %8s\n",
2263 rt->rt6i_metric, atomic_read(&rt->u.dst.__refcnt),
2264 rt->u.dst.__use, rt->rt6i_flags,
2265 rt->rt6i_dev ? rt->rt6i_dev->name : "");
2266 return 0;
2267}
2268
2269static int rt6_proc_info(char *buffer, char **start, off_t offset, int length)
2270{
c71099ac
TG
2271 struct rt6_proc_arg arg = {
2272 .buffer = buffer,
2273 .offset = offset,
2274 .length = length,
2275 };
1da177e4 2276
c71099ac 2277 fib6_clean_all(rt6_info_route, 0, &arg);
1da177e4
LT
2278
2279 *start = buffer;
2280 if (offset)
2281 *start += offset % RT6_INFO_LEN;
2282
2283 arg.len -= offset % RT6_INFO_LEN;
2284
2285 if (arg.len > length)
2286 arg.len = length;
2287 if (arg.len < 0)
2288 arg.len = 0;
2289
2290 return arg.len;
2291}
2292
1da177e4
LT
2293static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2294{
2295 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
2296 rt6_stats.fib_nodes, rt6_stats.fib_route_nodes,
2297 rt6_stats.fib_rt_alloc, rt6_stats.fib_rt_entries,
2298 rt6_stats.fib_rt_cache,
2299 atomic_read(&ip6_dst_ops.entries),
2300 rt6_stats.fib_discarded_routes);
2301
2302 return 0;
2303}
2304
2305static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2306{
2307 return single_open(file, rt6_stats_seq_show, NULL);
2308}
2309
2310static struct file_operations rt6_stats_seq_fops = {
2311 .owner = THIS_MODULE,
2312 .open = rt6_stats_seq_open,
2313 .read = seq_read,
2314 .llseek = seq_lseek,
2315 .release = single_release,
2316};
2317#endif /* CONFIG_PROC_FS */
2318
2319#ifdef CONFIG_SYSCTL
2320
2321static int flush_delay;
2322
2323static
2324int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write, struct file * filp,
2325 void __user *buffer, size_t *lenp, loff_t *ppos)
2326{
2327 if (write) {
2328 proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
2329 fib6_run_gc(flush_delay <= 0 ? ~0UL : (unsigned long)flush_delay);
2330 return 0;
2331 } else
2332 return -EINVAL;
2333}
2334
2335ctl_table ipv6_route_table[] = {
2336 {
2337 .ctl_name = NET_IPV6_ROUTE_FLUSH,
2338 .procname = "flush",
2339 .data = &flush_delay,
2340 .maxlen = sizeof(int),
89c8b3a1 2341 .mode = 0200,
1da177e4
LT
2342 .proc_handler = &ipv6_sysctl_rtcache_flush
2343 },
2344 {
2345 .ctl_name = NET_IPV6_ROUTE_GC_THRESH,
2346 .procname = "gc_thresh",
2347 .data = &ip6_dst_ops.gc_thresh,
2348 .maxlen = sizeof(int),
2349 .mode = 0644,
2350 .proc_handler = &proc_dointvec,
2351 },
2352 {
2353 .ctl_name = NET_IPV6_ROUTE_MAX_SIZE,
2354 .procname = "max_size",
2355 .data = &ip6_rt_max_size,
2356 .maxlen = sizeof(int),
2357 .mode = 0644,
2358 .proc_handler = &proc_dointvec,
2359 },
2360 {
2361 .ctl_name = NET_IPV6_ROUTE_GC_MIN_INTERVAL,
2362 .procname = "gc_min_interval",
2363 .data = &ip6_rt_gc_min_interval,
2364 .maxlen = sizeof(int),
2365 .mode = 0644,
2366 .proc_handler = &proc_dointvec_jiffies,
2367 .strategy = &sysctl_jiffies,
2368 },
2369 {
2370 .ctl_name = NET_IPV6_ROUTE_GC_TIMEOUT,
2371 .procname = "gc_timeout",
2372 .data = &ip6_rt_gc_timeout,
2373 .maxlen = sizeof(int),
2374 .mode = 0644,
2375 .proc_handler = &proc_dointvec_jiffies,
2376 .strategy = &sysctl_jiffies,
2377 },
2378 {
2379 .ctl_name = NET_IPV6_ROUTE_GC_INTERVAL,
2380 .procname = "gc_interval",
2381 .data = &ip6_rt_gc_interval,
2382 .maxlen = sizeof(int),
2383 .mode = 0644,
2384 .proc_handler = &proc_dointvec_jiffies,
2385 .strategy = &sysctl_jiffies,
2386 },
2387 {
2388 .ctl_name = NET_IPV6_ROUTE_GC_ELASTICITY,
2389 .procname = "gc_elasticity",
2390 .data = &ip6_rt_gc_elasticity,
2391 .maxlen = sizeof(int),
2392 .mode = 0644,
2393 .proc_handler = &proc_dointvec_jiffies,
2394 .strategy = &sysctl_jiffies,
2395 },
2396 {
2397 .ctl_name = NET_IPV6_ROUTE_MTU_EXPIRES,
2398 .procname = "mtu_expires",
2399 .data = &ip6_rt_mtu_expires,
2400 .maxlen = sizeof(int),
2401 .mode = 0644,
2402 .proc_handler = &proc_dointvec_jiffies,
2403 .strategy = &sysctl_jiffies,
2404 },
2405 {
2406 .ctl_name = NET_IPV6_ROUTE_MIN_ADVMSS,
2407 .procname = "min_adv_mss",
2408 .data = &ip6_rt_min_advmss,
2409 .maxlen = sizeof(int),
2410 .mode = 0644,
2411 .proc_handler = &proc_dointvec_jiffies,
2412 .strategy = &sysctl_jiffies,
2413 },
2414 {
2415 .ctl_name = NET_IPV6_ROUTE_GC_MIN_INTERVAL_MS,
2416 .procname = "gc_min_interval_ms",
2417 .data = &ip6_rt_gc_min_interval,
2418 .maxlen = sizeof(int),
2419 .mode = 0644,
2420 .proc_handler = &proc_dointvec_ms_jiffies,
2421 .strategy = &sysctl_ms_jiffies,
2422 },
2423 { .ctl_name = 0 }
2424};
2425
2426#endif
2427
2428void __init ip6_route_init(void)
2429{
2430 struct proc_dir_entry *p;
2431
2432 ip6_dst_ops.kmem_cachep = kmem_cache_create("ip6_dst_cache",
2433 sizeof(struct rt6_info),
2434 0, SLAB_HWCACHE_ALIGN,
2435 NULL, NULL);
2436 if (!ip6_dst_ops.kmem_cachep)
2437 panic("cannot create ip6_dst_cache");
2438
2439 fib6_init();
2440#ifdef CONFIG_PROC_FS
2441 p = proc_net_create("ipv6_route", 0, rt6_proc_info);
2442 if (p)
2443 p->owner = THIS_MODULE;
2444
2445 proc_net_fops_create("rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2446#endif
2447#ifdef CONFIG_XFRM
2448 xfrm6_init();
2449#endif
101367c2
TG
2450#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2451 fib6_rules_init();
2452#endif
1da177e4
LT
2453}
2454
2455void ip6_route_cleanup(void)
2456{
101367c2
TG
2457#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2458 fib6_rules_cleanup();
2459#endif
1da177e4
LT
2460#ifdef CONFIG_PROC_FS
2461 proc_net_remove("ipv6_route");
2462 proc_net_remove("rt6_stats");
2463#endif
2464#ifdef CONFIG_XFRM
2465 xfrm6_fini();
2466#endif
2467 rt6_ifdown(NULL);
2468 fib6_gc_cleanup();
2469 kmem_cache_destroy(ip6_dst_ops.kmem_cachep);
2470}