]> bbs.cooldavid.org Git - net-next-2.6.git/blame - net/ipv6/route.c
[IPV4]: Add LC-Trie FIB lookup algorithm.
[net-next-2.6.git] / net / ipv6 / route.c
CommitLineData
1da177e4
LT
1/*
2 * Linux INET6 implementation
3 * FIB front-end.
4 *
5 * Authors:
6 * Pedro Roque <roque@di.fc.ul.pt>
7 *
8 * $Id: route.c,v 1.56 2001/10/31 21:55:55 davem Exp $
9 *
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License
12 * as published by the Free Software Foundation; either version
13 * 2 of the License, or (at your option) any later version.
14 */
15
16/* Changes:
17 *
18 * YOSHIFUJI Hideaki @USAGI
19 * reworked default router selection.
20 * - respect outgoing interface
21 * - select from (probably) reachable routers (i.e.
22 * routers in REACHABLE, STALE, DELAY or PROBE states).
23 * - always select the same router if it is (probably)
24 * reachable. otherwise, round-robin the list.
25 */
26
27#include <linux/config.h>
28#include <linux/errno.h>
29#include <linux/types.h>
30#include <linux/times.h>
31#include <linux/socket.h>
32#include <linux/sockios.h>
33#include <linux/net.h>
34#include <linux/route.h>
35#include <linux/netdevice.h>
36#include <linux/in6.h>
37#include <linux/init.h>
38#include <linux/netlink.h>
39#include <linux/if_arp.h>
40
41#ifdef CONFIG_PROC_FS
42#include <linux/proc_fs.h>
43#include <linux/seq_file.h>
44#endif
45
46#include <net/snmp.h>
47#include <net/ipv6.h>
48#include <net/ip6_fib.h>
49#include <net/ip6_route.h>
50#include <net/ndisc.h>
51#include <net/addrconf.h>
52#include <net/tcp.h>
53#include <linux/rtnetlink.h>
54#include <net/dst.h>
55#include <net/xfrm.h>
56
57#include <asm/uaccess.h>
58
59#ifdef CONFIG_SYSCTL
60#include <linux/sysctl.h>
61#endif
62
63/* Set to 3 to get tracing. */
64#define RT6_DEBUG 2
65
66#if RT6_DEBUG >= 3
67#define RDBG(x) printk x
68#define RT6_TRACE(x...) printk(KERN_DEBUG x)
69#else
70#define RDBG(x)
71#define RT6_TRACE(x...) do { ; } while (0)
72#endif
73
74
75static int ip6_rt_max_size = 4096;
76static int ip6_rt_gc_min_interval = HZ / 2;
77static int ip6_rt_gc_timeout = 60*HZ;
78int ip6_rt_gc_interval = 30*HZ;
79static int ip6_rt_gc_elasticity = 9;
80static int ip6_rt_mtu_expires = 10*60*HZ;
81static int ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
82
83static struct rt6_info * ip6_rt_copy(struct rt6_info *ort);
84static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
85static struct dst_entry *ip6_negative_advice(struct dst_entry *);
86static void ip6_dst_destroy(struct dst_entry *);
87static void ip6_dst_ifdown(struct dst_entry *,
88 struct net_device *dev, int how);
89static int ip6_dst_gc(void);
90
91static int ip6_pkt_discard(struct sk_buff *skb);
92static int ip6_pkt_discard_out(struct sk_buff *skb);
93static void ip6_link_failure(struct sk_buff *skb);
94static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
95
96static struct dst_ops ip6_dst_ops = {
97 .family = AF_INET6,
98 .protocol = __constant_htons(ETH_P_IPV6),
99 .gc = ip6_dst_gc,
100 .gc_thresh = 1024,
101 .check = ip6_dst_check,
102 .destroy = ip6_dst_destroy,
103 .ifdown = ip6_dst_ifdown,
104 .negative_advice = ip6_negative_advice,
105 .link_failure = ip6_link_failure,
106 .update_pmtu = ip6_rt_update_pmtu,
107 .entry_size = sizeof(struct rt6_info),
108};
109
110struct rt6_info ip6_null_entry = {
111 .u = {
112 .dst = {
113 .__refcnt = ATOMIC_INIT(1),
114 .__use = 1,
115 .dev = &loopback_dev,
116 .obsolete = -1,
117 .error = -ENETUNREACH,
118 .metrics = { [RTAX_HOPLIMIT - 1] = 255, },
119 .input = ip6_pkt_discard,
120 .output = ip6_pkt_discard_out,
121 .ops = &ip6_dst_ops,
122 .path = (struct dst_entry*)&ip6_null_entry,
123 }
124 },
125 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
126 .rt6i_metric = ~(u32) 0,
127 .rt6i_ref = ATOMIC_INIT(1),
128};
129
130struct fib6_node ip6_routing_table = {
131 .leaf = &ip6_null_entry,
132 .fn_flags = RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO,
133};
134
135/* Protects all the ip6 fib */
136
137DEFINE_RWLOCK(rt6_lock);
138
139
140/* allocate dst with ip6_dst_ops */
141static __inline__ struct rt6_info *ip6_dst_alloc(void)
142{
143 return (struct rt6_info *)dst_alloc(&ip6_dst_ops);
144}
145
146static void ip6_dst_destroy(struct dst_entry *dst)
147{
148 struct rt6_info *rt = (struct rt6_info *)dst;
149 struct inet6_dev *idev = rt->rt6i_idev;
150
151 if (idev != NULL) {
152 rt->rt6i_idev = NULL;
153 in6_dev_put(idev);
154 }
155}
156
157static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
158 int how)
159{
160 struct rt6_info *rt = (struct rt6_info *)dst;
161 struct inet6_dev *idev = rt->rt6i_idev;
162
163 if (dev != &loopback_dev && idev != NULL && idev->dev == dev) {
164 struct inet6_dev *loopback_idev = in6_dev_get(&loopback_dev);
165 if (loopback_idev != NULL) {
166 rt->rt6i_idev = loopback_idev;
167 in6_dev_put(idev);
168 }
169 }
170}
171
172static __inline__ int rt6_check_expired(const struct rt6_info *rt)
173{
174 return (rt->rt6i_flags & RTF_EXPIRES &&
175 time_after(jiffies, rt->rt6i_expires));
176}
177
178/*
179 * Route lookup. Any rt6_lock is implied.
180 */
181
182static __inline__ struct rt6_info *rt6_device_match(struct rt6_info *rt,
183 int oif,
184 int strict)
185{
186 struct rt6_info *local = NULL;
187 struct rt6_info *sprt;
188
189 if (oif) {
190 for (sprt = rt; sprt; sprt = sprt->u.next) {
191 struct net_device *dev = sprt->rt6i_dev;
192 if (dev->ifindex == oif)
193 return sprt;
194 if (dev->flags & IFF_LOOPBACK) {
195 if (sprt->rt6i_idev == NULL ||
196 sprt->rt6i_idev->dev->ifindex != oif) {
197 if (strict && oif)
198 continue;
199 if (local && (!oif ||
200 local->rt6i_idev->dev->ifindex == oif))
201 continue;
202 }
203 local = sprt;
204 }
205 }
206
207 if (local)
208 return local;
209
210 if (strict)
211 return &ip6_null_entry;
212 }
213 return rt;
214}
215
216/*
217 * pointer to the last default router chosen. BH is disabled locally.
218 */
219static struct rt6_info *rt6_dflt_pointer;
220static DEFINE_SPINLOCK(rt6_dflt_lock);
221
222void rt6_reset_dflt_pointer(struct rt6_info *rt)
223{
224 spin_lock_bh(&rt6_dflt_lock);
225 if (rt == NULL || rt == rt6_dflt_pointer) {
226 RT6_TRACE("reset default router: %p->NULL\n", rt6_dflt_pointer);
227 rt6_dflt_pointer = NULL;
228 }
229 spin_unlock_bh(&rt6_dflt_lock);
230}
231
232/* Default Router Selection (RFC 2461 6.3.6) */
233static struct rt6_info *rt6_best_dflt(struct rt6_info *rt, int oif)
234{
235 struct rt6_info *match = NULL;
236 struct rt6_info *sprt;
237 int mpri = 0;
238
239 for (sprt = rt; sprt; sprt = sprt->u.next) {
240 struct neighbour *neigh;
241 int m = 0;
242
243 if (!oif ||
244 (sprt->rt6i_dev &&
245 sprt->rt6i_dev->ifindex == oif))
246 m += 8;
247
248 if (rt6_check_expired(sprt))
249 continue;
250
251 if (sprt == rt6_dflt_pointer)
252 m += 4;
253
254 if ((neigh = sprt->rt6i_nexthop) != NULL) {
255 read_lock_bh(&neigh->lock);
256 switch (neigh->nud_state) {
257 case NUD_REACHABLE:
258 m += 3;
259 break;
260
261 case NUD_STALE:
262 case NUD_DELAY:
263 case NUD_PROBE:
264 m += 2;
265 break;
266
267 case NUD_NOARP:
268 case NUD_PERMANENT:
269 m += 1;
270 break;
271
272 case NUD_INCOMPLETE:
273 default:
274 read_unlock_bh(&neigh->lock);
275 continue;
276 }
277 read_unlock_bh(&neigh->lock);
278 } else {
279 continue;
280 }
281
282 if (m > mpri || m >= 12) {
283 match = sprt;
284 mpri = m;
285 if (m >= 12) {
286 /* we choose the last default router if it
287 * is in (probably) reachable state.
288 * If route changed, we should do pmtu
289 * discovery. --yoshfuji
290 */
291 break;
292 }
293 }
294 }
295
296 spin_lock(&rt6_dflt_lock);
297 if (!match) {
298 /*
299 * No default routers are known to be reachable.
300 * SHOULD round robin
301 */
302 if (rt6_dflt_pointer) {
303 for (sprt = rt6_dflt_pointer->u.next;
304 sprt; sprt = sprt->u.next) {
305 if (sprt->u.dst.obsolete <= 0 &&
306 sprt->u.dst.error == 0 &&
307 !rt6_check_expired(sprt)) {
308 match = sprt;
309 break;
310 }
311 }
312 for (sprt = rt;
313 !match && sprt;
314 sprt = sprt->u.next) {
315 if (sprt->u.dst.obsolete <= 0 &&
316 sprt->u.dst.error == 0 &&
317 !rt6_check_expired(sprt)) {
318 match = sprt;
319 break;
320 }
321 if (sprt == rt6_dflt_pointer)
322 break;
323 }
324 }
325 }
326
327 if (match) {
328 if (rt6_dflt_pointer != match)
329 RT6_TRACE("changed default router: %p->%p\n",
330 rt6_dflt_pointer, match);
331 rt6_dflt_pointer = match;
332 }
333 spin_unlock(&rt6_dflt_lock);
334
335 if (!match) {
336 /*
337 * Last Resort: if no default routers found,
338 * use addrconf default route.
339 * We don't record this route.
340 */
341 for (sprt = ip6_routing_table.leaf;
342 sprt; sprt = sprt->u.next) {
343 if (!rt6_check_expired(sprt) &&
344 (sprt->rt6i_flags & RTF_DEFAULT) &&
345 (!oif ||
346 (sprt->rt6i_dev &&
347 sprt->rt6i_dev->ifindex == oif))) {
348 match = sprt;
349 break;
350 }
351 }
352 if (!match) {
353 /* no default route. give up. */
354 match = &ip6_null_entry;
355 }
356 }
357
358 return match;
359}
360
361struct rt6_info *rt6_lookup(struct in6_addr *daddr, struct in6_addr *saddr,
362 int oif, int strict)
363{
364 struct fib6_node *fn;
365 struct rt6_info *rt;
366
367 read_lock_bh(&rt6_lock);
368 fn = fib6_lookup(&ip6_routing_table, daddr, saddr);
369 rt = rt6_device_match(fn->leaf, oif, strict);
370 dst_hold(&rt->u.dst);
371 rt->u.dst.__use++;
372 read_unlock_bh(&rt6_lock);
373
374 rt->u.dst.lastuse = jiffies;
375 if (rt->u.dst.error == 0)
376 return rt;
377 dst_release(&rt->u.dst);
378 return NULL;
379}
380
381/* ip6_ins_rt is called with FREE rt6_lock.
382 It takes new route entry, the addition fails by any reason the
383 route is freed. In any case, if caller does not hold it, it may
384 be destroyed.
385 */
386
387int ip6_ins_rt(struct rt6_info *rt, struct nlmsghdr *nlh, void *_rtattr)
388{
389 int err;
390
391 write_lock_bh(&rt6_lock);
392 err = fib6_add(&ip6_routing_table, rt, nlh, _rtattr);
393 write_unlock_bh(&rt6_lock);
394
395 return err;
396}
397
398/* No rt6_lock! If COW failed, the function returns dead route entry
399 with dst->error set to errno value.
400 */
401
402static struct rt6_info *rt6_cow(struct rt6_info *ort, struct in6_addr *daddr,
403 struct in6_addr *saddr)
404{
405 int err;
406 struct rt6_info *rt;
407
408 /*
409 * Clone the route.
410 */
411
412 rt = ip6_rt_copy(ort);
413
414 if (rt) {
415 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
416
417 if (!(rt->rt6i_flags&RTF_GATEWAY))
418 ipv6_addr_copy(&rt->rt6i_gateway, daddr);
419
420 rt->rt6i_dst.plen = 128;
421 rt->rt6i_flags |= RTF_CACHE;
422 rt->u.dst.flags |= DST_HOST;
423
424#ifdef CONFIG_IPV6_SUBTREES
425 if (rt->rt6i_src.plen && saddr) {
426 ipv6_addr_copy(&rt->rt6i_src.addr, saddr);
427 rt->rt6i_src.plen = 128;
428 }
429#endif
430
431 rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
432
433 dst_hold(&rt->u.dst);
434
435 err = ip6_ins_rt(rt, NULL, NULL);
436 if (err == 0)
437 return rt;
438
439 rt->u.dst.error = err;
440
441 return rt;
442 }
443 dst_hold(&ip6_null_entry.u.dst);
444 return &ip6_null_entry;
445}
446
447#define BACKTRACK() \
448if (rt == &ip6_null_entry && strict) { \
449 while ((fn = fn->parent) != NULL) { \
450 if (fn->fn_flags & RTN_ROOT) { \
451 dst_hold(&rt->u.dst); \
452 goto out; \
453 } \
454 if (fn->fn_flags & RTN_RTINFO) \
455 goto restart; \
456 } \
457}
458
459
460void ip6_route_input(struct sk_buff *skb)
461{
462 struct fib6_node *fn;
463 struct rt6_info *rt;
464 int strict;
465 int attempts = 3;
466
467 strict = ipv6_addr_type(&skb->nh.ipv6h->daddr) & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL);
468
469relookup:
470 read_lock_bh(&rt6_lock);
471
472 fn = fib6_lookup(&ip6_routing_table, &skb->nh.ipv6h->daddr,
473 &skb->nh.ipv6h->saddr);
474
475restart:
476 rt = fn->leaf;
477
478 if ((rt->rt6i_flags & RTF_CACHE)) {
479 rt = rt6_device_match(rt, skb->dev->ifindex, strict);
480 BACKTRACK();
481 dst_hold(&rt->u.dst);
482 goto out;
483 }
484
485 rt = rt6_device_match(rt, skb->dev->ifindex, 0);
486 BACKTRACK();
487
488 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) {
489 struct rt6_info *nrt;
490 dst_hold(&rt->u.dst);
491 read_unlock_bh(&rt6_lock);
492
493 nrt = rt6_cow(rt, &skb->nh.ipv6h->daddr,
494 &skb->nh.ipv6h->saddr);
495
496 dst_release(&rt->u.dst);
497 rt = nrt;
498
499 if (rt->u.dst.error != -EEXIST || --attempts <= 0)
500 goto out2;
501
502 /* Race condition! In the gap, when rt6_lock was
503 released someone could insert this route. Relookup.
504 */
505 dst_release(&rt->u.dst);
506 goto relookup;
507 }
508 dst_hold(&rt->u.dst);
509
510out:
511 read_unlock_bh(&rt6_lock);
512out2:
513 rt->u.dst.lastuse = jiffies;
514 rt->u.dst.__use++;
515 skb->dst = (struct dst_entry *) rt;
516}
517
518struct dst_entry * ip6_route_output(struct sock *sk, struct flowi *fl)
519{
520 struct fib6_node *fn;
521 struct rt6_info *rt;
522 int strict;
523 int attempts = 3;
524
525 strict = ipv6_addr_type(&fl->fl6_dst) & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL);
526
527relookup:
528 read_lock_bh(&rt6_lock);
529
530 fn = fib6_lookup(&ip6_routing_table, &fl->fl6_dst, &fl->fl6_src);
531
532restart:
533 rt = fn->leaf;
534
535 if ((rt->rt6i_flags & RTF_CACHE)) {
536 rt = rt6_device_match(rt, fl->oif, strict);
537 BACKTRACK();
538 dst_hold(&rt->u.dst);
539 goto out;
540 }
541 if (rt->rt6i_flags & RTF_DEFAULT) {
542 if (rt->rt6i_metric >= IP6_RT_PRIO_ADDRCONF)
543 rt = rt6_best_dflt(rt, fl->oif);
544 } else {
545 rt = rt6_device_match(rt, fl->oif, strict);
546 BACKTRACK();
547 }
548
549 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) {
550 struct rt6_info *nrt;
551 dst_hold(&rt->u.dst);
552 read_unlock_bh(&rt6_lock);
553
554 nrt = rt6_cow(rt, &fl->fl6_dst, &fl->fl6_src);
555
556 dst_release(&rt->u.dst);
557 rt = nrt;
558
559 if (rt->u.dst.error != -EEXIST || --attempts <= 0)
560 goto out2;
561
562 /* Race condition! In the gap, when rt6_lock was
563 released someone could insert this route. Relookup.
564 */
565 dst_release(&rt->u.dst);
566 goto relookup;
567 }
568 dst_hold(&rt->u.dst);
569
570out:
571 read_unlock_bh(&rt6_lock);
572out2:
573 rt->u.dst.lastuse = jiffies;
574 rt->u.dst.__use++;
575 return &rt->u.dst;
576}
577
578
579/*
580 * Destination cache support functions
581 */
582
583static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
584{
585 struct rt6_info *rt;
586
587 rt = (struct rt6_info *) dst;
588
589 if (rt && rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie))
590 return dst;
591
592 return NULL;
593}
594
595static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
596{
597 struct rt6_info *rt = (struct rt6_info *) dst;
598
599 if (rt) {
600 if (rt->rt6i_flags & RTF_CACHE)
601 ip6_del_rt(rt, NULL, NULL);
602 else
603 dst_release(dst);
604 }
605 return NULL;
606}
607
608static void ip6_link_failure(struct sk_buff *skb)
609{
610 struct rt6_info *rt;
611
612 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0, skb->dev);
613
614 rt = (struct rt6_info *) skb->dst;
615 if (rt) {
616 if (rt->rt6i_flags&RTF_CACHE) {
617 dst_set_expires(&rt->u.dst, 0);
618 rt->rt6i_flags |= RTF_EXPIRES;
619 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
620 rt->rt6i_node->fn_sernum = -1;
621 }
622}
623
624static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
625{
626 struct rt6_info *rt6 = (struct rt6_info*)dst;
627
628 if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
629 rt6->rt6i_flags |= RTF_MODIFIED;
630 if (mtu < IPV6_MIN_MTU) {
631 mtu = IPV6_MIN_MTU;
632 dst->metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
633 }
634 dst->metrics[RTAX_MTU-1] = mtu;
635 }
636}
637
638/* Protected by rt6_lock. */
639static struct dst_entry *ndisc_dst_gc_list;
640static int ipv6_get_mtu(struct net_device *dev);
641
642static inline unsigned int ipv6_advmss(unsigned int mtu)
643{
644 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
645
646 if (mtu < ip6_rt_min_advmss)
647 mtu = ip6_rt_min_advmss;
648
649 /*
650 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
651 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
652 * IPV6_MAXPLEN is also valid and means: "any MSS,
653 * rely only on pmtu discovery"
654 */
655 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
656 mtu = IPV6_MAXPLEN;
657 return mtu;
658}
659
660struct dst_entry *ndisc_dst_alloc(struct net_device *dev,
661 struct neighbour *neigh,
662 struct in6_addr *addr,
663 int (*output)(struct sk_buff *))
664{
665 struct rt6_info *rt;
666 struct inet6_dev *idev = in6_dev_get(dev);
667
668 if (unlikely(idev == NULL))
669 return NULL;
670
671 rt = ip6_dst_alloc();
672 if (unlikely(rt == NULL)) {
673 in6_dev_put(idev);
674 goto out;
675 }
676
677 dev_hold(dev);
678 if (neigh)
679 neigh_hold(neigh);
680 else
681 neigh = ndisc_get_neigh(dev, addr);
682
683 rt->rt6i_dev = dev;
684 rt->rt6i_idev = idev;
685 rt->rt6i_nexthop = neigh;
686 atomic_set(&rt->u.dst.__refcnt, 1);
687 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = 255;
688 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
689 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
690 rt->u.dst.output = output;
691
692#if 0 /* there's no chance to use these for ndisc */
693 rt->u.dst.flags = ipv6_addr_type(addr) & IPV6_ADDR_UNICAST
694 ? DST_HOST
695 : 0;
696 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
697 rt->rt6i_dst.plen = 128;
698#endif
699
700 write_lock_bh(&rt6_lock);
701 rt->u.dst.next = ndisc_dst_gc_list;
702 ndisc_dst_gc_list = &rt->u.dst;
703 write_unlock_bh(&rt6_lock);
704
705 fib6_force_start_gc();
706
707out:
708 return (struct dst_entry *)rt;
709}
710
711int ndisc_dst_gc(int *more)
712{
713 struct dst_entry *dst, *next, **pprev;
714 int freed;
715
716 next = NULL;
717 pprev = &ndisc_dst_gc_list;
718 freed = 0;
719 while ((dst = *pprev) != NULL) {
720 if (!atomic_read(&dst->__refcnt)) {
721 *pprev = dst->next;
722 dst_free(dst);
723 freed++;
724 } else {
725 pprev = &dst->next;
726 (*more)++;
727 }
728 }
729
730 return freed;
731}
732
733static int ip6_dst_gc(void)
734{
735 static unsigned expire = 30*HZ;
736 static unsigned long last_gc;
737 unsigned long now = jiffies;
738
739 if (time_after(last_gc + ip6_rt_gc_min_interval, now) &&
740 atomic_read(&ip6_dst_ops.entries) <= ip6_rt_max_size)
741 goto out;
742
743 expire++;
744 fib6_run_gc(expire);
745 last_gc = now;
746 if (atomic_read(&ip6_dst_ops.entries) < ip6_dst_ops.gc_thresh)
747 expire = ip6_rt_gc_timeout>>1;
748
749out:
750 expire -= expire>>ip6_rt_gc_elasticity;
751 return (atomic_read(&ip6_dst_ops.entries) > ip6_rt_max_size);
752}
753
754/* Clean host part of a prefix. Not necessary in radix tree,
755 but results in cleaner routing tables.
756
757 Remove it only when all the things will work!
758 */
759
760static int ipv6_get_mtu(struct net_device *dev)
761{
762 int mtu = IPV6_MIN_MTU;
763 struct inet6_dev *idev;
764
765 idev = in6_dev_get(dev);
766 if (idev) {
767 mtu = idev->cnf.mtu6;
768 in6_dev_put(idev);
769 }
770 return mtu;
771}
772
773int ipv6_get_hoplimit(struct net_device *dev)
774{
775 int hoplimit = ipv6_devconf.hop_limit;
776 struct inet6_dev *idev;
777
778 idev = in6_dev_get(dev);
779 if (idev) {
780 hoplimit = idev->cnf.hop_limit;
781 in6_dev_put(idev);
782 }
783 return hoplimit;
784}
785
786/*
787 *
788 */
789
790int ip6_route_add(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh, void *_rtattr)
791{
792 int err;
793 struct rtmsg *r;
794 struct rtattr **rta;
795 struct rt6_info *rt = NULL;
796 struct net_device *dev = NULL;
797 struct inet6_dev *idev = NULL;
798 int addr_type;
799
800 rta = (struct rtattr **) _rtattr;
801
802 if (rtmsg->rtmsg_dst_len > 128 || rtmsg->rtmsg_src_len > 128)
803 return -EINVAL;
804#ifndef CONFIG_IPV6_SUBTREES
805 if (rtmsg->rtmsg_src_len)
806 return -EINVAL;
807#endif
808 if (rtmsg->rtmsg_ifindex) {
809 err = -ENODEV;
810 dev = dev_get_by_index(rtmsg->rtmsg_ifindex);
811 if (!dev)
812 goto out;
813 idev = in6_dev_get(dev);
814 if (!idev)
815 goto out;
816 }
817
818 if (rtmsg->rtmsg_metric == 0)
819 rtmsg->rtmsg_metric = IP6_RT_PRIO_USER;
820
821 rt = ip6_dst_alloc();
822
823 if (rt == NULL) {
824 err = -ENOMEM;
825 goto out;
826 }
827
828 rt->u.dst.obsolete = -1;
829 rt->rt6i_expires = clock_t_to_jiffies(rtmsg->rtmsg_info);
830 if (nlh && (r = NLMSG_DATA(nlh))) {
831 rt->rt6i_protocol = r->rtm_protocol;
832 } else {
833 rt->rt6i_protocol = RTPROT_BOOT;
834 }
835
836 addr_type = ipv6_addr_type(&rtmsg->rtmsg_dst);
837
838 if (addr_type & IPV6_ADDR_MULTICAST)
839 rt->u.dst.input = ip6_mc_input;
840 else
841 rt->u.dst.input = ip6_forward;
842
843 rt->u.dst.output = ip6_output;
844
845 ipv6_addr_prefix(&rt->rt6i_dst.addr,
846 &rtmsg->rtmsg_dst, rtmsg->rtmsg_dst_len);
847 rt->rt6i_dst.plen = rtmsg->rtmsg_dst_len;
848 if (rt->rt6i_dst.plen == 128)
849 rt->u.dst.flags = DST_HOST;
850
851#ifdef CONFIG_IPV6_SUBTREES
852 ipv6_addr_prefix(&rt->rt6i_src.addr,
853 &rtmsg->rtmsg_src, rtmsg->rtmsg_src_len);
854 rt->rt6i_src.plen = rtmsg->rtmsg_src_len;
855#endif
856
857 rt->rt6i_metric = rtmsg->rtmsg_metric;
858
859 /* We cannot add true routes via loopback here,
860 they would result in kernel looping; promote them to reject routes
861 */
862 if ((rtmsg->rtmsg_flags&RTF_REJECT) ||
863 (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK))) {
864 /* hold loopback dev/idev if we haven't done so. */
865 if (dev != &loopback_dev) {
866 if (dev) {
867 dev_put(dev);
868 in6_dev_put(idev);
869 }
870 dev = &loopback_dev;
871 dev_hold(dev);
872 idev = in6_dev_get(dev);
873 if (!idev) {
874 err = -ENODEV;
875 goto out;
876 }
877 }
878 rt->u.dst.output = ip6_pkt_discard_out;
879 rt->u.dst.input = ip6_pkt_discard;
880 rt->u.dst.error = -ENETUNREACH;
881 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
882 goto install_route;
883 }
884
885 if (rtmsg->rtmsg_flags & RTF_GATEWAY) {
886 struct in6_addr *gw_addr;
887 int gwa_type;
888
889 gw_addr = &rtmsg->rtmsg_gateway;
890 ipv6_addr_copy(&rt->rt6i_gateway, &rtmsg->rtmsg_gateway);
891 gwa_type = ipv6_addr_type(gw_addr);
892
893 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
894 struct rt6_info *grt;
895
896 /* IPv6 strictly inhibits using not link-local
897 addresses as nexthop address.
898 Otherwise, router will not able to send redirects.
899 It is very good, but in some (rare!) circumstances
900 (SIT, PtP, NBMA NOARP links) it is handy to allow
901 some exceptions. --ANK
902 */
903 err = -EINVAL;
904 if (!(gwa_type&IPV6_ADDR_UNICAST))
905 goto out;
906
907 grt = rt6_lookup(gw_addr, NULL, rtmsg->rtmsg_ifindex, 1);
908
909 err = -EHOSTUNREACH;
910 if (grt == NULL)
911 goto out;
912 if (dev) {
913 if (dev != grt->rt6i_dev) {
914 dst_release(&grt->u.dst);
915 goto out;
916 }
917 } else {
918 dev = grt->rt6i_dev;
919 idev = grt->rt6i_idev;
920 dev_hold(dev);
921 in6_dev_hold(grt->rt6i_idev);
922 }
923 if (!(grt->rt6i_flags&RTF_GATEWAY))
924 err = 0;
925 dst_release(&grt->u.dst);
926
927 if (err)
928 goto out;
929 }
930 err = -EINVAL;
931 if (dev == NULL || (dev->flags&IFF_LOOPBACK))
932 goto out;
933 }
934
935 err = -ENODEV;
936 if (dev == NULL)
937 goto out;
938
939 if (rtmsg->rtmsg_flags & (RTF_GATEWAY|RTF_NONEXTHOP)) {
940 rt->rt6i_nexthop = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev);
941 if (IS_ERR(rt->rt6i_nexthop)) {
942 err = PTR_ERR(rt->rt6i_nexthop);
943 rt->rt6i_nexthop = NULL;
944 goto out;
945 }
946 }
947
948 rt->rt6i_flags = rtmsg->rtmsg_flags;
949
950install_route:
951 if (rta && rta[RTA_METRICS-1]) {
952 int attrlen = RTA_PAYLOAD(rta[RTA_METRICS-1]);
953 struct rtattr *attr = RTA_DATA(rta[RTA_METRICS-1]);
954
955 while (RTA_OK(attr, attrlen)) {
956 unsigned flavor = attr->rta_type;
957 if (flavor) {
958 if (flavor > RTAX_MAX) {
959 err = -EINVAL;
960 goto out;
961 }
962 rt->u.dst.metrics[flavor-1] =
963 *(u32 *)RTA_DATA(attr);
964 }
965 attr = RTA_NEXT(attr, attrlen);
966 }
967 }
968
969 if (rt->u.dst.metrics[RTAX_HOPLIMIT-1] == 0)
970 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
971 if (!rt->u.dst.metrics[RTAX_MTU-1])
972 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(dev);
973 if (!rt->u.dst.metrics[RTAX_ADVMSS-1])
974 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
975 rt->u.dst.dev = dev;
976 rt->rt6i_idev = idev;
977 return ip6_ins_rt(rt, nlh, _rtattr);
978
979out:
980 if (dev)
981 dev_put(dev);
982 if (idev)
983 in6_dev_put(idev);
984 if (rt)
985 dst_free((struct dst_entry *) rt);
986 return err;
987}
988
989int ip6_del_rt(struct rt6_info *rt, struct nlmsghdr *nlh, void *_rtattr)
990{
991 int err;
992
993 write_lock_bh(&rt6_lock);
994
995 rt6_reset_dflt_pointer(NULL);
996
997 err = fib6_del(rt, nlh, _rtattr);
998 dst_release(&rt->u.dst);
999
1000 write_unlock_bh(&rt6_lock);
1001
1002 return err;
1003}
1004
1005static int ip6_route_del(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh, void *_rtattr)
1006{
1007 struct fib6_node *fn;
1008 struct rt6_info *rt;
1009 int err = -ESRCH;
1010
1011 read_lock_bh(&rt6_lock);
1012
1013 fn = fib6_locate(&ip6_routing_table,
1014 &rtmsg->rtmsg_dst, rtmsg->rtmsg_dst_len,
1015 &rtmsg->rtmsg_src, rtmsg->rtmsg_src_len);
1016
1017 if (fn) {
1018 for (rt = fn->leaf; rt; rt = rt->u.next) {
1019 if (rtmsg->rtmsg_ifindex &&
1020 (rt->rt6i_dev == NULL ||
1021 rt->rt6i_dev->ifindex != rtmsg->rtmsg_ifindex))
1022 continue;
1023 if (rtmsg->rtmsg_flags&RTF_GATEWAY &&
1024 !ipv6_addr_equal(&rtmsg->rtmsg_gateway, &rt->rt6i_gateway))
1025 continue;
1026 if (rtmsg->rtmsg_metric &&
1027 rtmsg->rtmsg_metric != rt->rt6i_metric)
1028 continue;
1029 dst_hold(&rt->u.dst);
1030 read_unlock_bh(&rt6_lock);
1031
1032 return ip6_del_rt(rt, nlh, _rtattr);
1033 }
1034 }
1035 read_unlock_bh(&rt6_lock);
1036
1037 return err;
1038}
1039
1040/*
1041 * Handle redirects
1042 */
1043void rt6_redirect(struct in6_addr *dest, struct in6_addr *saddr,
1044 struct neighbour *neigh, u8 *lladdr, int on_link)
1045{
1046 struct rt6_info *rt, *nrt;
1047
1048 /* Locate old route to this destination. */
1049 rt = rt6_lookup(dest, NULL, neigh->dev->ifindex, 1);
1050
1051 if (rt == NULL)
1052 return;
1053
1054 if (neigh->dev != rt->rt6i_dev)
1055 goto out;
1056
1057 /*
1058 * Current route is on-link; redirect is always invalid.
1059 *
1060 * Seems, previous statement is not true. It could
1061 * be node, which looks for us as on-link (f.e. proxy ndisc)
1062 * But then router serving it might decide, that we should
1063 * know truth 8)8) --ANK (980726).
1064 */
1065 if (!(rt->rt6i_flags&RTF_GATEWAY))
1066 goto out;
1067
1068 /*
1069 * RFC 2461 specifies that redirects should only be
1070 * accepted if they come from the nexthop to the target.
1071 * Due to the way default routers are chosen, this notion
1072 * is a bit fuzzy and one might need to check all default
1073 * routers.
1074 */
1075 if (!ipv6_addr_equal(saddr, &rt->rt6i_gateway)) {
1076 if (rt->rt6i_flags & RTF_DEFAULT) {
1077 struct rt6_info *rt1;
1078
1079 read_lock(&rt6_lock);
1080 for (rt1 = ip6_routing_table.leaf; rt1; rt1 = rt1->u.next) {
1081 if (ipv6_addr_equal(saddr, &rt1->rt6i_gateway)) {
1082 dst_hold(&rt1->u.dst);
1083 dst_release(&rt->u.dst);
1084 read_unlock(&rt6_lock);
1085 rt = rt1;
1086 goto source_ok;
1087 }
1088 }
1089 read_unlock(&rt6_lock);
1090 }
1091 if (net_ratelimit())
1092 printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
1093 "for redirect target\n");
1094 goto out;
1095 }
1096
1097source_ok:
1098
1099 /*
1100 * We have finally decided to accept it.
1101 */
1102
1103 neigh_update(neigh, lladdr, NUD_STALE,
1104 NEIGH_UPDATE_F_WEAK_OVERRIDE|
1105 NEIGH_UPDATE_F_OVERRIDE|
1106 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1107 NEIGH_UPDATE_F_ISROUTER))
1108 );
1109
1110 /*
1111 * Redirect received -> path was valid.
1112 * Look, redirects are sent only in response to data packets,
1113 * so that this nexthop apparently is reachable. --ANK
1114 */
1115 dst_confirm(&rt->u.dst);
1116
1117 /* Duplicate redirect: silently ignore. */
1118 if (neigh == rt->u.dst.neighbour)
1119 goto out;
1120
1121 nrt = ip6_rt_copy(rt);
1122 if (nrt == NULL)
1123 goto out;
1124
1125 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1126 if (on_link)
1127 nrt->rt6i_flags &= ~RTF_GATEWAY;
1128
1129 ipv6_addr_copy(&nrt->rt6i_dst.addr, dest);
1130 nrt->rt6i_dst.plen = 128;
1131 nrt->u.dst.flags |= DST_HOST;
1132
1133 ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key);
1134 nrt->rt6i_nexthop = neigh_clone(neigh);
1135 /* Reset pmtu, it may be better */
1136 nrt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(neigh->dev);
1137 nrt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&nrt->u.dst));
1138
1139 if (ip6_ins_rt(nrt, NULL, NULL))
1140 goto out;
1141
1142 if (rt->rt6i_flags&RTF_CACHE) {
1143 ip6_del_rt(rt, NULL, NULL);
1144 return;
1145 }
1146
1147out:
1148 dst_release(&rt->u.dst);
1149 return;
1150}
1151
1152/*
1153 * Handle ICMP "packet too big" messages
1154 * i.e. Path MTU discovery
1155 */
1156
1157void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr,
1158 struct net_device *dev, u32 pmtu)
1159{
1160 struct rt6_info *rt, *nrt;
1161 int allfrag = 0;
1162
1163 rt = rt6_lookup(daddr, saddr, dev->ifindex, 0);
1164 if (rt == NULL)
1165 return;
1166
1167 if (pmtu >= dst_mtu(&rt->u.dst))
1168 goto out;
1169
1170 if (pmtu < IPV6_MIN_MTU) {
1171 /*
1172 * According to RFC2460, PMTU is set to the IPv6 Minimum Link
1173 * MTU (1280) and a fragment header should always be included
1174 * after a node receiving Too Big message reporting PMTU is
1175 * less than the IPv6 Minimum Link MTU.
1176 */
1177 pmtu = IPV6_MIN_MTU;
1178 allfrag = 1;
1179 }
1180
1181 /* New mtu received -> path was valid.
1182 They are sent only in response to data packets,
1183 so that this nexthop apparently is reachable. --ANK
1184 */
1185 dst_confirm(&rt->u.dst);
1186
1187 /* Host route. If it is static, it would be better
1188 not to override it, but add new one, so that
1189 when cache entry will expire old pmtu
1190 would return automatically.
1191 */
1192 if (rt->rt6i_flags & RTF_CACHE) {
1193 rt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1194 if (allfrag)
1195 rt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1196 dst_set_expires(&rt->u.dst, ip6_rt_mtu_expires);
1197 rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
1198 goto out;
1199 }
1200
1201 /* Network route.
1202 Two cases are possible:
1203 1. It is connected route. Action: COW
1204 2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1205 */
1206 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) {
1207 nrt = rt6_cow(rt, daddr, saddr);
1208 if (!nrt->u.dst.error) {
1209 nrt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1210 if (allfrag)
1211 nrt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1212 /* According to RFC 1981, detecting PMTU increase shouldn't be
1213 happened within 5 mins, the recommended timer is 10 mins.
1214 Here this route expiration time is set to ip6_rt_mtu_expires
1215 which is 10 mins. After 10 mins the decreased pmtu is expired
1216 and detecting PMTU increase will be automatically happened.
1217 */
1218 dst_set_expires(&nrt->u.dst, ip6_rt_mtu_expires);
1219 nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
1220 }
1221 dst_release(&nrt->u.dst);
1222 } else {
1223 nrt = ip6_rt_copy(rt);
1224 if (nrt == NULL)
1225 goto out;
1226 ipv6_addr_copy(&nrt->rt6i_dst.addr, daddr);
1227 nrt->rt6i_dst.plen = 128;
1228 nrt->u.dst.flags |= DST_HOST;
1229 nrt->rt6i_nexthop = neigh_clone(rt->rt6i_nexthop);
1230 dst_set_expires(&nrt->u.dst, ip6_rt_mtu_expires);
1231 nrt->rt6i_flags |= RTF_DYNAMIC|RTF_CACHE|RTF_EXPIRES;
1232 nrt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1233 if (allfrag)
1234 nrt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1235 ip6_ins_rt(nrt, NULL, NULL);
1236 }
1237
1238out:
1239 dst_release(&rt->u.dst);
1240}
1241
1242/*
1243 * Misc support functions
1244 */
1245
1246static struct rt6_info * ip6_rt_copy(struct rt6_info *ort)
1247{
1248 struct rt6_info *rt = ip6_dst_alloc();
1249
1250 if (rt) {
1251 rt->u.dst.input = ort->u.dst.input;
1252 rt->u.dst.output = ort->u.dst.output;
1253
1254 memcpy(rt->u.dst.metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32));
1255 rt->u.dst.dev = ort->u.dst.dev;
1256 if (rt->u.dst.dev)
1257 dev_hold(rt->u.dst.dev);
1258 rt->rt6i_idev = ort->rt6i_idev;
1259 if (rt->rt6i_idev)
1260 in6_dev_hold(rt->rt6i_idev);
1261 rt->u.dst.lastuse = jiffies;
1262 rt->rt6i_expires = 0;
1263
1264 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
1265 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
1266 rt->rt6i_metric = 0;
1267
1268 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1269#ifdef CONFIG_IPV6_SUBTREES
1270 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1271#endif
1272 }
1273 return rt;
1274}
1275
1276struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *dev)
1277{
1278 struct rt6_info *rt;
1279 struct fib6_node *fn;
1280
1281 fn = &ip6_routing_table;
1282
1283 write_lock_bh(&rt6_lock);
1284 for (rt = fn->leaf; rt; rt=rt->u.next) {
1285 if (dev == rt->rt6i_dev &&
1286 ipv6_addr_equal(&rt->rt6i_gateway, addr))
1287 break;
1288 }
1289 if (rt)
1290 dst_hold(&rt->u.dst);
1291 write_unlock_bh(&rt6_lock);
1292 return rt;
1293}
1294
1295struct rt6_info *rt6_add_dflt_router(struct in6_addr *gwaddr,
1296 struct net_device *dev)
1297{
1298 struct in6_rtmsg rtmsg;
1299
1300 memset(&rtmsg, 0, sizeof(struct in6_rtmsg));
1301 rtmsg.rtmsg_type = RTMSG_NEWROUTE;
1302 ipv6_addr_copy(&rtmsg.rtmsg_gateway, gwaddr);
1303 rtmsg.rtmsg_metric = 1024;
1304 rtmsg.rtmsg_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT | RTF_UP | RTF_EXPIRES;
1305
1306 rtmsg.rtmsg_ifindex = dev->ifindex;
1307
1308 ip6_route_add(&rtmsg, NULL, NULL);
1309 return rt6_get_dflt_router(gwaddr, dev);
1310}
1311
1312void rt6_purge_dflt_routers(void)
1313{
1314 struct rt6_info *rt;
1315
1316restart:
1317 read_lock_bh(&rt6_lock);
1318 for (rt = ip6_routing_table.leaf; rt; rt = rt->u.next) {
1319 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
1320 dst_hold(&rt->u.dst);
1321
1322 rt6_reset_dflt_pointer(NULL);
1323
1324 read_unlock_bh(&rt6_lock);
1325
1326 ip6_del_rt(rt, NULL, NULL);
1327
1328 goto restart;
1329 }
1330 }
1331 read_unlock_bh(&rt6_lock);
1332}
1333
1334int ipv6_route_ioctl(unsigned int cmd, void __user *arg)
1335{
1336 struct in6_rtmsg rtmsg;
1337 int err;
1338
1339 switch(cmd) {
1340 case SIOCADDRT: /* Add a route */
1341 case SIOCDELRT: /* Delete a route */
1342 if (!capable(CAP_NET_ADMIN))
1343 return -EPERM;
1344 err = copy_from_user(&rtmsg, arg,
1345 sizeof(struct in6_rtmsg));
1346 if (err)
1347 return -EFAULT;
1348
1349 rtnl_lock();
1350 switch (cmd) {
1351 case SIOCADDRT:
1352 err = ip6_route_add(&rtmsg, NULL, NULL);
1353 break;
1354 case SIOCDELRT:
1355 err = ip6_route_del(&rtmsg, NULL, NULL);
1356 break;
1357 default:
1358 err = -EINVAL;
1359 }
1360 rtnl_unlock();
1361
1362 return err;
1363 };
1364
1365 return -EINVAL;
1366}
1367
1368/*
1369 * Drop the packet on the floor
1370 */
1371
1372int ip6_pkt_discard(struct sk_buff *skb)
1373{
1374 IP6_INC_STATS(IPSTATS_MIB_OUTNOROUTES);
1375 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_NOROUTE, 0, skb->dev);
1376 kfree_skb(skb);
1377 return 0;
1378}
1379
1380int ip6_pkt_discard_out(struct sk_buff *skb)
1381{
1382 skb->dev = skb->dst->dev;
1383 return ip6_pkt_discard(skb);
1384}
1385
1386/*
1387 * Allocate a dst for local (unicast / anycast) address.
1388 */
1389
1390struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
1391 const struct in6_addr *addr,
1392 int anycast)
1393{
1394 struct rt6_info *rt = ip6_dst_alloc();
1395
1396 if (rt == NULL)
1397 return ERR_PTR(-ENOMEM);
1398
1399 dev_hold(&loopback_dev);
1400 in6_dev_hold(idev);
1401
1402 rt->u.dst.flags = DST_HOST;
1403 rt->u.dst.input = ip6_input;
1404 rt->u.dst.output = ip6_output;
1405 rt->rt6i_dev = &loopback_dev;
1406 rt->rt6i_idev = idev;
1407 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
1408 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
1409 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
1410 rt->u.dst.obsolete = -1;
1411
1412 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
1413 if (!anycast)
1414 rt->rt6i_flags |= RTF_LOCAL;
1415 rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
1416 if (rt->rt6i_nexthop == NULL) {
1417 dst_free((struct dst_entry *) rt);
1418 return ERR_PTR(-ENOMEM);
1419 }
1420
1421 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
1422 rt->rt6i_dst.plen = 128;
1423
1424 atomic_set(&rt->u.dst.__refcnt, 1);
1425
1426 return rt;
1427}
1428
1429static int fib6_ifdown(struct rt6_info *rt, void *arg)
1430{
1431 if (((void*)rt->rt6i_dev == arg || arg == NULL) &&
1432 rt != &ip6_null_entry) {
1433 RT6_TRACE("deleted by ifdown %p\n", rt);
1434 return -1;
1435 }
1436 return 0;
1437}
1438
1439void rt6_ifdown(struct net_device *dev)
1440{
1441 write_lock_bh(&rt6_lock);
1442 fib6_clean_tree(&ip6_routing_table, fib6_ifdown, 0, dev);
1443 write_unlock_bh(&rt6_lock);
1444}
1445
1446struct rt6_mtu_change_arg
1447{
1448 struct net_device *dev;
1449 unsigned mtu;
1450};
1451
1452static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
1453{
1454 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
1455 struct inet6_dev *idev;
1456
1457 /* In IPv6 pmtu discovery is not optional,
1458 so that RTAX_MTU lock cannot disable it.
1459 We still use this lock to block changes
1460 caused by addrconf/ndisc.
1461 */
1462
1463 idev = __in6_dev_get(arg->dev);
1464 if (idev == NULL)
1465 return 0;
1466
1467 /* For administrative MTU increase, there is no way to discover
1468 IPv6 PMTU increase, so PMTU increase should be updated here.
1469 Since RFC 1981 doesn't include administrative MTU increase
1470 update PMTU increase is a MUST. (i.e. jumbo frame)
1471 */
1472 /*
1473 If new MTU is less than route PMTU, this new MTU will be the
1474 lowest MTU in the path, update the route PMTU to reflect PMTU
1475 decreases; if new MTU is greater than route PMTU, and the
1476 old MTU is the lowest MTU in the path, update the route PMTU
1477 to reflect the increase. In this case if the other nodes' MTU
1478 also have the lowest MTU, TOO BIG MESSAGE will be lead to
1479 PMTU discouvery.
1480 */
1481 if (rt->rt6i_dev == arg->dev &&
1482 !dst_metric_locked(&rt->u.dst, RTAX_MTU) &&
1483 (dst_mtu(&rt->u.dst) > arg->mtu ||
1484 (dst_mtu(&rt->u.dst) < arg->mtu &&
1485 dst_mtu(&rt->u.dst) == idev->cnf.mtu6)))
1486 rt->u.dst.metrics[RTAX_MTU-1] = arg->mtu;
1487 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(arg->mtu);
1488 return 0;
1489}
1490
1491void rt6_mtu_change(struct net_device *dev, unsigned mtu)
1492{
1493 struct rt6_mtu_change_arg arg;
1494
1495 arg.dev = dev;
1496 arg.mtu = mtu;
1497 read_lock_bh(&rt6_lock);
1498 fib6_clean_tree(&ip6_routing_table, rt6_mtu_change_route, 0, &arg);
1499 read_unlock_bh(&rt6_lock);
1500}
1501
1502static int inet6_rtm_to_rtmsg(struct rtmsg *r, struct rtattr **rta,
1503 struct in6_rtmsg *rtmsg)
1504{
1505 memset(rtmsg, 0, sizeof(*rtmsg));
1506
1507 rtmsg->rtmsg_dst_len = r->rtm_dst_len;
1508 rtmsg->rtmsg_src_len = r->rtm_src_len;
1509 rtmsg->rtmsg_flags = RTF_UP;
1510 if (r->rtm_type == RTN_UNREACHABLE)
1511 rtmsg->rtmsg_flags |= RTF_REJECT;
1512
1513 if (rta[RTA_GATEWAY-1]) {
1514 if (rta[RTA_GATEWAY-1]->rta_len != RTA_LENGTH(16))
1515 return -EINVAL;
1516 memcpy(&rtmsg->rtmsg_gateway, RTA_DATA(rta[RTA_GATEWAY-1]), 16);
1517 rtmsg->rtmsg_flags |= RTF_GATEWAY;
1518 }
1519 if (rta[RTA_DST-1]) {
1520 if (RTA_PAYLOAD(rta[RTA_DST-1]) < ((r->rtm_dst_len+7)>>3))
1521 return -EINVAL;
1522 memcpy(&rtmsg->rtmsg_dst, RTA_DATA(rta[RTA_DST-1]), ((r->rtm_dst_len+7)>>3));
1523 }
1524 if (rta[RTA_SRC-1]) {
1525 if (RTA_PAYLOAD(rta[RTA_SRC-1]) < ((r->rtm_src_len+7)>>3))
1526 return -EINVAL;
1527 memcpy(&rtmsg->rtmsg_src, RTA_DATA(rta[RTA_SRC-1]), ((r->rtm_src_len+7)>>3));
1528 }
1529 if (rta[RTA_OIF-1]) {
1530 if (rta[RTA_OIF-1]->rta_len != RTA_LENGTH(sizeof(int)))
1531 return -EINVAL;
1532 memcpy(&rtmsg->rtmsg_ifindex, RTA_DATA(rta[RTA_OIF-1]), sizeof(int));
1533 }
1534 if (rta[RTA_PRIORITY-1]) {
1535 if (rta[RTA_PRIORITY-1]->rta_len != RTA_LENGTH(4))
1536 return -EINVAL;
1537 memcpy(&rtmsg->rtmsg_metric, RTA_DATA(rta[RTA_PRIORITY-1]), 4);
1538 }
1539 return 0;
1540}
1541
1542int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1543{
1544 struct rtmsg *r = NLMSG_DATA(nlh);
1545 struct in6_rtmsg rtmsg;
1546
1547 if (inet6_rtm_to_rtmsg(r, arg, &rtmsg))
1548 return -EINVAL;
1549 return ip6_route_del(&rtmsg, nlh, arg);
1550}
1551
1552int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1553{
1554 struct rtmsg *r = NLMSG_DATA(nlh);
1555 struct in6_rtmsg rtmsg;
1556
1557 if (inet6_rtm_to_rtmsg(r, arg, &rtmsg))
1558 return -EINVAL;
1559 return ip6_route_add(&rtmsg, nlh, arg);
1560}
1561
1562struct rt6_rtnl_dump_arg
1563{
1564 struct sk_buff *skb;
1565 struct netlink_callback *cb;
1566};
1567
1568static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt,
1569 struct in6_addr *dst,
1570 struct in6_addr *src,
1571 int iif,
1572 int type, u32 pid, u32 seq,
b6544c0b
JHS
1573 struct nlmsghdr *in_nlh, int prefix,
1574 unsigned int flags)
1da177e4
LT
1575{
1576 struct rtmsg *rtm;
1577 struct nlmsghdr *nlh;
1578 unsigned char *b = skb->tail;
1579 struct rta_cacheinfo ci;
1580
1581 if (prefix) { /* user wants prefix routes only */
1582 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
1583 /* success since this is not a prefix route */
1584 return 1;
1585 }
1586 }
1587
1588 if (!pid && in_nlh) {
1589 pid = in_nlh->nlmsg_pid;
1590 }
1591
b6544c0b 1592 nlh = NLMSG_NEW(skb, pid, seq, type, sizeof(*rtm), flags);
1da177e4
LT
1593 rtm = NLMSG_DATA(nlh);
1594 rtm->rtm_family = AF_INET6;
1595 rtm->rtm_dst_len = rt->rt6i_dst.plen;
1596 rtm->rtm_src_len = rt->rt6i_src.plen;
1597 rtm->rtm_tos = 0;
1598 rtm->rtm_table = RT_TABLE_MAIN;
1599 if (rt->rt6i_flags&RTF_REJECT)
1600 rtm->rtm_type = RTN_UNREACHABLE;
1601 else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK))
1602 rtm->rtm_type = RTN_LOCAL;
1603 else
1604 rtm->rtm_type = RTN_UNICAST;
1605 rtm->rtm_flags = 0;
1606 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
1607 rtm->rtm_protocol = rt->rt6i_protocol;
1608 if (rt->rt6i_flags&RTF_DYNAMIC)
1609 rtm->rtm_protocol = RTPROT_REDIRECT;
1610 else if (rt->rt6i_flags & RTF_ADDRCONF)
1611 rtm->rtm_protocol = RTPROT_KERNEL;
1612 else if (rt->rt6i_flags&RTF_DEFAULT)
1613 rtm->rtm_protocol = RTPROT_RA;
1614
1615 if (rt->rt6i_flags&RTF_CACHE)
1616 rtm->rtm_flags |= RTM_F_CLONED;
1617
1618 if (dst) {
1619 RTA_PUT(skb, RTA_DST, 16, dst);
1620 rtm->rtm_dst_len = 128;
1621 } else if (rtm->rtm_dst_len)
1622 RTA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr);
1623#ifdef CONFIG_IPV6_SUBTREES
1624 if (src) {
1625 RTA_PUT(skb, RTA_SRC, 16, src);
1626 rtm->rtm_src_len = 128;
1627 } else if (rtm->rtm_src_len)
1628 RTA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr);
1629#endif
1630 if (iif)
1631 RTA_PUT(skb, RTA_IIF, 4, &iif);
1632 else if (dst) {
1633 struct in6_addr saddr_buf;
1634 if (ipv6_get_saddr(&rt->u.dst, dst, &saddr_buf) == 0)
1635 RTA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
1636 }
1637 if (rtnetlink_put_metrics(skb, rt->u.dst.metrics) < 0)
1638 goto rtattr_failure;
1639 if (rt->u.dst.neighbour)
1640 RTA_PUT(skb, RTA_GATEWAY, 16, &rt->u.dst.neighbour->primary_key);
1641 if (rt->u.dst.dev)
1642 RTA_PUT(skb, RTA_OIF, sizeof(int), &rt->rt6i_dev->ifindex);
1643 RTA_PUT(skb, RTA_PRIORITY, 4, &rt->rt6i_metric);
1644 ci.rta_lastuse = jiffies_to_clock_t(jiffies - rt->u.dst.lastuse);
1645 if (rt->rt6i_expires)
1646 ci.rta_expires = jiffies_to_clock_t(rt->rt6i_expires - jiffies);
1647 else
1648 ci.rta_expires = 0;
1649 ci.rta_used = rt->u.dst.__use;
1650 ci.rta_clntref = atomic_read(&rt->u.dst.__refcnt);
1651 ci.rta_error = rt->u.dst.error;
1652 ci.rta_id = 0;
1653 ci.rta_ts = 0;
1654 ci.rta_tsage = 0;
1655 RTA_PUT(skb, RTA_CACHEINFO, sizeof(ci), &ci);
1656 nlh->nlmsg_len = skb->tail - b;
1657 return skb->len;
1658
1659nlmsg_failure:
1660rtattr_failure:
1661 skb_trim(skb, b - skb->data);
1662 return -1;
1663}
1664
1665static int rt6_dump_route(struct rt6_info *rt, void *p_arg)
1666{
1667 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
1668 int prefix;
1669
1670 if (arg->cb->nlh->nlmsg_len >= NLMSG_LENGTH(sizeof(struct rtmsg))) {
1671 struct rtmsg *rtm = NLMSG_DATA(arg->cb->nlh);
1672 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
1673 } else
1674 prefix = 0;
1675
1676 return rt6_fill_node(arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
1677 NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
b6544c0b 1678 NULL, prefix, NLM_F_MULTI);
1da177e4
LT
1679}
1680
1681static int fib6_dump_node(struct fib6_walker_t *w)
1682{
1683 int res;
1684 struct rt6_info *rt;
1685
1686 for (rt = w->leaf; rt; rt = rt->u.next) {
1687 res = rt6_dump_route(rt, w->args);
1688 if (res < 0) {
1689 /* Frame is full, suspend walking */
1690 w->leaf = rt;
1691 return 1;
1692 }
1693 BUG_TRAP(res!=0);
1694 }
1695 w->leaf = NULL;
1696 return 0;
1697}
1698
1699static void fib6_dump_end(struct netlink_callback *cb)
1700{
1701 struct fib6_walker_t *w = (void*)cb->args[0];
1702
1703 if (w) {
1704 cb->args[0] = 0;
1705 fib6_walker_unlink(w);
1706 kfree(w);
1707 }
1708 if (cb->args[1]) {
1709 cb->done = (void*)cb->args[1];
1710 cb->args[1] = 0;
1711 }
1712}
1713
1714static int fib6_dump_done(struct netlink_callback *cb)
1715{
1716 fib6_dump_end(cb);
1717 return cb->done(cb);
1718}
1719
1720int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
1721{
1722 struct rt6_rtnl_dump_arg arg;
1723 struct fib6_walker_t *w;
1724 int res;
1725
1726 arg.skb = skb;
1727 arg.cb = cb;
1728
1729 w = (void*)cb->args[0];
1730 if (w == NULL) {
1731 /* New dump:
1732 *
1733 * 1. hook callback destructor.
1734 */
1735 cb->args[1] = (long)cb->done;
1736 cb->done = fib6_dump_done;
1737
1738 /*
1739 * 2. allocate and initialize walker.
1740 */
1741 w = kmalloc(sizeof(*w), GFP_ATOMIC);
1742 if (w == NULL)
1743 return -ENOMEM;
1744 RT6_TRACE("dump<%p", w);
1745 memset(w, 0, sizeof(*w));
1746 w->root = &ip6_routing_table;
1747 w->func = fib6_dump_node;
1748 w->args = &arg;
1749 cb->args[0] = (long)w;
1750 read_lock_bh(&rt6_lock);
1751 res = fib6_walk(w);
1752 read_unlock_bh(&rt6_lock);
1753 } else {
1754 w->args = &arg;
1755 read_lock_bh(&rt6_lock);
1756 res = fib6_walk_continue(w);
1757 read_unlock_bh(&rt6_lock);
1758 }
1759#if RT6_DEBUG >= 3
1760 if (res <= 0 && skb->len == 0)
1761 RT6_TRACE("%p>dump end\n", w);
1762#endif
1763 res = res < 0 ? res : skb->len;
1764 /* res < 0 is an error. (really, impossible)
1765 res == 0 means that dump is complete, but skb still can contain data.
1766 res > 0 dump is not complete, but frame is full.
1767 */
1768 /* Destroy walker, if dump of this table is complete. */
1769 if (res <= 0)
1770 fib6_dump_end(cb);
1771 return res;
1772}
1773
1774int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
1775{
1776 struct rtattr **rta = arg;
1777 int iif = 0;
1778 int err = -ENOBUFS;
1779 struct sk_buff *skb;
1780 struct flowi fl;
1781 struct rt6_info *rt;
1782
1783 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
1784 if (skb == NULL)
1785 goto out;
1786
1787 /* Reserve room for dummy headers, this skb can pass
1788 through good chunk of routing engine.
1789 */
1790 skb->mac.raw = skb->data;
1791 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
1792
1793 memset(&fl, 0, sizeof(fl));
1794 if (rta[RTA_SRC-1])
1795 ipv6_addr_copy(&fl.fl6_src,
1796 (struct in6_addr*)RTA_DATA(rta[RTA_SRC-1]));
1797 if (rta[RTA_DST-1])
1798 ipv6_addr_copy(&fl.fl6_dst,
1799 (struct in6_addr*)RTA_DATA(rta[RTA_DST-1]));
1800
1801 if (rta[RTA_IIF-1])
1802 memcpy(&iif, RTA_DATA(rta[RTA_IIF-1]), sizeof(int));
1803
1804 if (iif) {
1805 struct net_device *dev;
1806 dev = __dev_get_by_index(iif);
1807 if (!dev) {
1808 err = -ENODEV;
1809 goto out_free;
1810 }
1811 }
1812
1813 fl.oif = 0;
1814 if (rta[RTA_OIF-1])
1815 memcpy(&fl.oif, RTA_DATA(rta[RTA_OIF-1]), sizeof(int));
1816
1817 rt = (struct rt6_info*)ip6_route_output(NULL, &fl);
1818
1819 skb->dst = &rt->u.dst;
1820
1821 NETLINK_CB(skb).dst_pid = NETLINK_CB(in_skb).pid;
1822 err = rt6_fill_node(skb, rt,
1823 &fl.fl6_dst, &fl.fl6_src,
1824 iif,
1825 RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
b6544c0b 1826 nlh->nlmsg_seq, nlh, 0, 0);
1da177e4
LT
1827 if (err < 0) {
1828 err = -EMSGSIZE;
1829 goto out_free;
1830 }
1831
1832 err = netlink_unicast(rtnl, skb, NETLINK_CB(in_skb).pid, MSG_DONTWAIT);
1833 if (err > 0)
1834 err = 0;
1835out:
1836 return err;
1837out_free:
1838 kfree_skb(skb);
1839 goto out;
1840}
1841
1842void inet6_rt_notify(int event, struct rt6_info *rt, struct nlmsghdr *nlh)
1843{
1844 struct sk_buff *skb;
1845 int size = NLMSG_SPACE(sizeof(struct rtmsg)+256);
1846
1847 skb = alloc_skb(size, gfp_any());
1848 if (!skb) {
1849 netlink_set_err(rtnl, 0, RTMGRP_IPV6_ROUTE, ENOBUFS);
1850 return;
1851 }
b6544c0b 1852 if (rt6_fill_node(skb, rt, NULL, NULL, 0, event, 0, 0, nlh, 0, 0) < 0) {
1da177e4
LT
1853 kfree_skb(skb);
1854 netlink_set_err(rtnl, 0, RTMGRP_IPV6_ROUTE, EINVAL);
1855 return;
1856 }
1857 NETLINK_CB(skb).dst_groups = RTMGRP_IPV6_ROUTE;
1858 netlink_broadcast(rtnl, skb, 0, RTMGRP_IPV6_ROUTE, gfp_any());
1859}
1860
1861/*
1862 * /proc
1863 */
1864
1865#ifdef CONFIG_PROC_FS
1866
1867#define RT6_INFO_LEN (32 + 4 + 32 + 4 + 32 + 40 + 5 + 1)
1868
1869struct rt6_proc_arg
1870{
1871 char *buffer;
1872 int offset;
1873 int length;
1874 int skip;
1875 int len;
1876};
1877
1878static int rt6_info_route(struct rt6_info *rt, void *p_arg)
1879{
1880 struct rt6_proc_arg *arg = (struct rt6_proc_arg *) p_arg;
1881 int i;
1882
1883 if (arg->skip < arg->offset / RT6_INFO_LEN) {
1884 arg->skip++;
1885 return 0;
1886 }
1887
1888 if (arg->len >= arg->length)
1889 return 0;
1890
1891 for (i=0; i<16; i++) {
1892 sprintf(arg->buffer + arg->len, "%02x",
1893 rt->rt6i_dst.addr.s6_addr[i]);
1894 arg->len += 2;
1895 }
1896 arg->len += sprintf(arg->buffer + arg->len, " %02x ",
1897 rt->rt6i_dst.plen);
1898
1899#ifdef CONFIG_IPV6_SUBTREES
1900 for (i=0; i<16; i++) {
1901 sprintf(arg->buffer + arg->len, "%02x",
1902 rt->rt6i_src.addr.s6_addr[i]);
1903 arg->len += 2;
1904 }
1905 arg->len += sprintf(arg->buffer + arg->len, " %02x ",
1906 rt->rt6i_src.plen);
1907#else
1908 sprintf(arg->buffer + arg->len,
1909 "00000000000000000000000000000000 00 ");
1910 arg->len += 36;
1911#endif
1912
1913 if (rt->rt6i_nexthop) {
1914 for (i=0; i<16; i++) {
1915 sprintf(arg->buffer + arg->len, "%02x",
1916 rt->rt6i_nexthop->primary_key[i]);
1917 arg->len += 2;
1918 }
1919 } else {
1920 sprintf(arg->buffer + arg->len,
1921 "00000000000000000000000000000000");
1922 arg->len += 32;
1923 }
1924 arg->len += sprintf(arg->buffer + arg->len,
1925 " %08x %08x %08x %08x %8s\n",
1926 rt->rt6i_metric, atomic_read(&rt->u.dst.__refcnt),
1927 rt->u.dst.__use, rt->rt6i_flags,
1928 rt->rt6i_dev ? rt->rt6i_dev->name : "");
1929 return 0;
1930}
1931
1932static int rt6_proc_info(char *buffer, char **start, off_t offset, int length)
1933{
1934 struct rt6_proc_arg arg;
1935 arg.buffer = buffer;
1936 arg.offset = offset;
1937 arg.length = length;
1938 arg.skip = 0;
1939 arg.len = 0;
1940
1941 read_lock_bh(&rt6_lock);
1942 fib6_clean_tree(&ip6_routing_table, rt6_info_route, 0, &arg);
1943 read_unlock_bh(&rt6_lock);
1944
1945 *start = buffer;
1946 if (offset)
1947 *start += offset % RT6_INFO_LEN;
1948
1949 arg.len -= offset % RT6_INFO_LEN;
1950
1951 if (arg.len > length)
1952 arg.len = length;
1953 if (arg.len < 0)
1954 arg.len = 0;
1955
1956 return arg.len;
1957}
1958
1959extern struct rt6_statistics rt6_stats;
1960
1961static int rt6_stats_seq_show(struct seq_file *seq, void *v)
1962{
1963 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
1964 rt6_stats.fib_nodes, rt6_stats.fib_route_nodes,
1965 rt6_stats.fib_rt_alloc, rt6_stats.fib_rt_entries,
1966 rt6_stats.fib_rt_cache,
1967 atomic_read(&ip6_dst_ops.entries),
1968 rt6_stats.fib_discarded_routes);
1969
1970 return 0;
1971}
1972
1973static int rt6_stats_seq_open(struct inode *inode, struct file *file)
1974{
1975 return single_open(file, rt6_stats_seq_show, NULL);
1976}
1977
1978static struct file_operations rt6_stats_seq_fops = {
1979 .owner = THIS_MODULE,
1980 .open = rt6_stats_seq_open,
1981 .read = seq_read,
1982 .llseek = seq_lseek,
1983 .release = single_release,
1984};
1985#endif /* CONFIG_PROC_FS */
1986
1987#ifdef CONFIG_SYSCTL
1988
1989static int flush_delay;
1990
1991static
1992int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write, struct file * filp,
1993 void __user *buffer, size_t *lenp, loff_t *ppos)
1994{
1995 if (write) {
1996 proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
1997 fib6_run_gc(flush_delay <= 0 ? ~0UL : (unsigned long)flush_delay);
1998 return 0;
1999 } else
2000 return -EINVAL;
2001}
2002
2003ctl_table ipv6_route_table[] = {
2004 {
2005 .ctl_name = NET_IPV6_ROUTE_FLUSH,
2006 .procname = "flush",
2007 .data = &flush_delay,
2008 .maxlen = sizeof(int),
89c8b3a1 2009 .mode = 0200,
1da177e4
LT
2010 .proc_handler = &ipv6_sysctl_rtcache_flush
2011 },
2012 {
2013 .ctl_name = NET_IPV6_ROUTE_GC_THRESH,
2014 .procname = "gc_thresh",
2015 .data = &ip6_dst_ops.gc_thresh,
2016 .maxlen = sizeof(int),
2017 .mode = 0644,
2018 .proc_handler = &proc_dointvec,
2019 },
2020 {
2021 .ctl_name = NET_IPV6_ROUTE_MAX_SIZE,
2022 .procname = "max_size",
2023 .data = &ip6_rt_max_size,
2024 .maxlen = sizeof(int),
2025 .mode = 0644,
2026 .proc_handler = &proc_dointvec,
2027 },
2028 {
2029 .ctl_name = NET_IPV6_ROUTE_GC_MIN_INTERVAL,
2030 .procname = "gc_min_interval",
2031 .data = &ip6_rt_gc_min_interval,
2032 .maxlen = sizeof(int),
2033 .mode = 0644,
2034 .proc_handler = &proc_dointvec_jiffies,
2035 .strategy = &sysctl_jiffies,
2036 },
2037 {
2038 .ctl_name = NET_IPV6_ROUTE_GC_TIMEOUT,
2039 .procname = "gc_timeout",
2040 .data = &ip6_rt_gc_timeout,
2041 .maxlen = sizeof(int),
2042 .mode = 0644,
2043 .proc_handler = &proc_dointvec_jiffies,
2044 .strategy = &sysctl_jiffies,
2045 },
2046 {
2047 .ctl_name = NET_IPV6_ROUTE_GC_INTERVAL,
2048 .procname = "gc_interval",
2049 .data = &ip6_rt_gc_interval,
2050 .maxlen = sizeof(int),
2051 .mode = 0644,
2052 .proc_handler = &proc_dointvec_jiffies,
2053 .strategy = &sysctl_jiffies,
2054 },
2055 {
2056 .ctl_name = NET_IPV6_ROUTE_GC_ELASTICITY,
2057 .procname = "gc_elasticity",
2058 .data = &ip6_rt_gc_elasticity,
2059 .maxlen = sizeof(int),
2060 .mode = 0644,
2061 .proc_handler = &proc_dointvec_jiffies,
2062 .strategy = &sysctl_jiffies,
2063 },
2064 {
2065 .ctl_name = NET_IPV6_ROUTE_MTU_EXPIRES,
2066 .procname = "mtu_expires",
2067 .data = &ip6_rt_mtu_expires,
2068 .maxlen = sizeof(int),
2069 .mode = 0644,
2070 .proc_handler = &proc_dointvec_jiffies,
2071 .strategy = &sysctl_jiffies,
2072 },
2073 {
2074 .ctl_name = NET_IPV6_ROUTE_MIN_ADVMSS,
2075 .procname = "min_adv_mss",
2076 .data = &ip6_rt_min_advmss,
2077 .maxlen = sizeof(int),
2078 .mode = 0644,
2079 .proc_handler = &proc_dointvec_jiffies,
2080 .strategy = &sysctl_jiffies,
2081 },
2082 {
2083 .ctl_name = NET_IPV6_ROUTE_GC_MIN_INTERVAL_MS,
2084 .procname = "gc_min_interval_ms",
2085 .data = &ip6_rt_gc_min_interval,
2086 .maxlen = sizeof(int),
2087 .mode = 0644,
2088 .proc_handler = &proc_dointvec_ms_jiffies,
2089 .strategy = &sysctl_ms_jiffies,
2090 },
2091 { .ctl_name = 0 }
2092};
2093
2094#endif
2095
2096void __init ip6_route_init(void)
2097{
2098 struct proc_dir_entry *p;
2099
2100 ip6_dst_ops.kmem_cachep = kmem_cache_create("ip6_dst_cache",
2101 sizeof(struct rt6_info),
2102 0, SLAB_HWCACHE_ALIGN,
2103 NULL, NULL);
2104 if (!ip6_dst_ops.kmem_cachep)
2105 panic("cannot create ip6_dst_cache");
2106
2107 fib6_init();
2108#ifdef CONFIG_PROC_FS
2109 p = proc_net_create("ipv6_route", 0, rt6_proc_info);
2110 if (p)
2111 p->owner = THIS_MODULE;
2112
2113 proc_net_fops_create("rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2114#endif
2115#ifdef CONFIG_XFRM
2116 xfrm6_init();
2117#endif
2118}
2119
2120void ip6_route_cleanup(void)
2121{
2122#ifdef CONFIG_PROC_FS
2123 proc_net_remove("ipv6_route");
2124 proc_net_remove("rt6_stats");
2125#endif
2126#ifdef CONFIG_XFRM
2127 xfrm6_fini();
2128#endif
2129 rt6_ifdown(NULL);
2130 fib6_gc_cleanup();
2131 kmem_cache_destroy(ip6_dst_ops.kmem_cachep);
2132}