]> bbs.cooldavid.org Git - net-next-2.6.git/blame - net/ipv6/route.c
[PATCH] fix spinlock-debugging smp_processor_id() usage
[net-next-2.6.git] / net / ipv6 / route.c
CommitLineData
1da177e4
LT
1/*
2 * Linux INET6 implementation
3 * FIB front-end.
4 *
5 * Authors:
6 * Pedro Roque <roque@di.fc.ul.pt>
7 *
8 * $Id: route.c,v 1.56 2001/10/31 21:55:55 davem Exp $
9 *
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License
12 * as published by the Free Software Foundation; either version
13 * 2 of the License, or (at your option) any later version.
14 */
15
16/* Changes:
17 *
18 * YOSHIFUJI Hideaki @USAGI
19 * reworked default router selection.
20 * - respect outgoing interface
21 * - select from (probably) reachable routers (i.e.
22 * routers in REACHABLE, STALE, DELAY or PROBE states).
23 * - always select the same router if it is (probably)
24 * reachable. otherwise, round-robin the list.
25 */
26
27#include <linux/config.h>
28#include <linux/errno.h>
29#include <linux/types.h>
30#include <linux/times.h>
31#include <linux/socket.h>
32#include <linux/sockios.h>
33#include <linux/net.h>
34#include <linux/route.h>
35#include <linux/netdevice.h>
36#include <linux/in6.h>
37#include <linux/init.h>
38#include <linux/netlink.h>
39#include <linux/if_arp.h>
40
41#ifdef CONFIG_PROC_FS
42#include <linux/proc_fs.h>
43#include <linux/seq_file.h>
44#endif
45
46#include <net/snmp.h>
47#include <net/ipv6.h>
48#include <net/ip6_fib.h>
49#include <net/ip6_route.h>
50#include <net/ndisc.h>
51#include <net/addrconf.h>
52#include <net/tcp.h>
53#include <linux/rtnetlink.h>
54#include <net/dst.h>
55#include <net/xfrm.h>
56
57#include <asm/uaccess.h>
58
59#ifdef CONFIG_SYSCTL
60#include <linux/sysctl.h>
61#endif
62
63/* Set to 3 to get tracing. */
64#define RT6_DEBUG 2
65
66#if RT6_DEBUG >= 3
67#define RDBG(x) printk x
68#define RT6_TRACE(x...) printk(KERN_DEBUG x)
69#else
70#define RDBG(x)
71#define RT6_TRACE(x...) do { ; } while (0)
72#endif
73
74
75static int ip6_rt_max_size = 4096;
76static int ip6_rt_gc_min_interval = HZ / 2;
77static int ip6_rt_gc_timeout = 60*HZ;
78int ip6_rt_gc_interval = 30*HZ;
79static int ip6_rt_gc_elasticity = 9;
80static int ip6_rt_mtu_expires = 10*60*HZ;
81static int ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
82
83static struct rt6_info * ip6_rt_copy(struct rt6_info *ort);
84static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
85static struct dst_entry *ip6_negative_advice(struct dst_entry *);
86static void ip6_dst_destroy(struct dst_entry *);
87static void ip6_dst_ifdown(struct dst_entry *,
88 struct net_device *dev, int how);
89static int ip6_dst_gc(void);
90
91static int ip6_pkt_discard(struct sk_buff *skb);
92static int ip6_pkt_discard_out(struct sk_buff *skb);
93static void ip6_link_failure(struct sk_buff *skb);
94static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
95
96static struct dst_ops ip6_dst_ops = {
97 .family = AF_INET6,
98 .protocol = __constant_htons(ETH_P_IPV6),
99 .gc = ip6_dst_gc,
100 .gc_thresh = 1024,
101 .check = ip6_dst_check,
102 .destroy = ip6_dst_destroy,
103 .ifdown = ip6_dst_ifdown,
104 .negative_advice = ip6_negative_advice,
105 .link_failure = ip6_link_failure,
106 .update_pmtu = ip6_rt_update_pmtu,
107 .entry_size = sizeof(struct rt6_info),
108};
109
110struct rt6_info ip6_null_entry = {
111 .u = {
112 .dst = {
113 .__refcnt = ATOMIC_INIT(1),
114 .__use = 1,
115 .dev = &loopback_dev,
116 .obsolete = -1,
117 .error = -ENETUNREACH,
118 .metrics = { [RTAX_HOPLIMIT - 1] = 255, },
119 .input = ip6_pkt_discard,
120 .output = ip6_pkt_discard_out,
121 .ops = &ip6_dst_ops,
122 .path = (struct dst_entry*)&ip6_null_entry,
123 }
124 },
125 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
126 .rt6i_metric = ~(u32) 0,
127 .rt6i_ref = ATOMIC_INIT(1),
128};
129
130struct fib6_node ip6_routing_table = {
131 .leaf = &ip6_null_entry,
132 .fn_flags = RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO,
133};
134
135/* Protects all the ip6 fib */
136
137DEFINE_RWLOCK(rt6_lock);
138
139
140/* allocate dst with ip6_dst_ops */
141static __inline__ struct rt6_info *ip6_dst_alloc(void)
142{
143 return (struct rt6_info *)dst_alloc(&ip6_dst_ops);
144}
145
146static void ip6_dst_destroy(struct dst_entry *dst)
147{
148 struct rt6_info *rt = (struct rt6_info *)dst;
149 struct inet6_dev *idev = rt->rt6i_idev;
150
151 if (idev != NULL) {
152 rt->rt6i_idev = NULL;
153 in6_dev_put(idev);
154 }
155}
156
157static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
158 int how)
159{
160 struct rt6_info *rt = (struct rt6_info *)dst;
161 struct inet6_dev *idev = rt->rt6i_idev;
162
163 if (dev != &loopback_dev && idev != NULL && idev->dev == dev) {
164 struct inet6_dev *loopback_idev = in6_dev_get(&loopback_dev);
165 if (loopback_idev != NULL) {
166 rt->rt6i_idev = loopback_idev;
167 in6_dev_put(idev);
168 }
169 }
170}
171
172static __inline__ int rt6_check_expired(const struct rt6_info *rt)
173{
174 return (rt->rt6i_flags & RTF_EXPIRES &&
175 time_after(jiffies, rt->rt6i_expires));
176}
177
178/*
179 * Route lookup. Any rt6_lock is implied.
180 */
181
182static __inline__ struct rt6_info *rt6_device_match(struct rt6_info *rt,
183 int oif,
184 int strict)
185{
186 struct rt6_info *local = NULL;
187 struct rt6_info *sprt;
188
189 if (oif) {
190 for (sprt = rt; sprt; sprt = sprt->u.next) {
191 struct net_device *dev = sprt->rt6i_dev;
192 if (dev->ifindex == oif)
193 return sprt;
194 if (dev->flags & IFF_LOOPBACK) {
195 if (sprt->rt6i_idev == NULL ||
196 sprt->rt6i_idev->dev->ifindex != oif) {
197 if (strict && oif)
198 continue;
199 if (local && (!oif ||
200 local->rt6i_idev->dev->ifindex == oif))
201 continue;
202 }
203 local = sprt;
204 }
205 }
206
207 if (local)
208 return local;
209
210 if (strict)
211 return &ip6_null_entry;
212 }
213 return rt;
214}
215
216/*
217 * pointer to the last default router chosen. BH is disabled locally.
218 */
219static struct rt6_info *rt6_dflt_pointer;
220static DEFINE_SPINLOCK(rt6_dflt_lock);
221
222void rt6_reset_dflt_pointer(struct rt6_info *rt)
223{
224 spin_lock_bh(&rt6_dflt_lock);
225 if (rt == NULL || rt == rt6_dflt_pointer) {
226 RT6_TRACE("reset default router: %p->NULL\n", rt6_dflt_pointer);
227 rt6_dflt_pointer = NULL;
228 }
229 spin_unlock_bh(&rt6_dflt_lock);
230}
231
232/* Default Router Selection (RFC 2461 6.3.6) */
233static struct rt6_info *rt6_best_dflt(struct rt6_info *rt, int oif)
234{
235 struct rt6_info *match = NULL;
236 struct rt6_info *sprt;
237 int mpri = 0;
238
239 for (sprt = rt; sprt; sprt = sprt->u.next) {
240 struct neighbour *neigh;
241 int m = 0;
242
243 if (!oif ||
244 (sprt->rt6i_dev &&
245 sprt->rt6i_dev->ifindex == oif))
246 m += 8;
247
248 if (rt6_check_expired(sprt))
249 continue;
250
251 if (sprt == rt6_dflt_pointer)
252 m += 4;
253
254 if ((neigh = sprt->rt6i_nexthop) != NULL) {
255 read_lock_bh(&neigh->lock);
256 switch (neigh->nud_state) {
257 case NUD_REACHABLE:
258 m += 3;
259 break;
260
261 case NUD_STALE:
262 case NUD_DELAY:
263 case NUD_PROBE:
264 m += 2;
265 break;
266
267 case NUD_NOARP:
268 case NUD_PERMANENT:
269 m += 1;
270 break;
271
272 case NUD_INCOMPLETE:
273 default:
274 read_unlock_bh(&neigh->lock);
275 continue;
276 }
277 read_unlock_bh(&neigh->lock);
278 } else {
279 continue;
280 }
281
282 if (m > mpri || m >= 12) {
283 match = sprt;
284 mpri = m;
285 if (m >= 12) {
286 /* we choose the last default router if it
287 * is in (probably) reachable state.
288 * If route changed, we should do pmtu
289 * discovery. --yoshfuji
290 */
291 break;
292 }
293 }
294 }
295
296 spin_lock(&rt6_dflt_lock);
297 if (!match) {
298 /*
299 * No default routers are known to be reachable.
300 * SHOULD round robin
301 */
302 if (rt6_dflt_pointer) {
303 for (sprt = rt6_dflt_pointer->u.next;
304 sprt; sprt = sprt->u.next) {
305 if (sprt->u.dst.obsolete <= 0 &&
306 sprt->u.dst.error == 0 &&
307 !rt6_check_expired(sprt)) {
308 match = sprt;
309 break;
310 }
311 }
312 for (sprt = rt;
313 !match && sprt;
314 sprt = sprt->u.next) {
315 if (sprt->u.dst.obsolete <= 0 &&
316 sprt->u.dst.error == 0 &&
317 !rt6_check_expired(sprt)) {
318 match = sprt;
319 break;
320 }
321 if (sprt == rt6_dflt_pointer)
322 break;
323 }
324 }
325 }
326
327 if (match) {
328 if (rt6_dflt_pointer != match)
329 RT6_TRACE("changed default router: %p->%p\n",
330 rt6_dflt_pointer, match);
331 rt6_dflt_pointer = match;
332 }
333 spin_unlock(&rt6_dflt_lock);
334
335 if (!match) {
336 /*
337 * Last Resort: if no default routers found,
338 * use addrconf default route.
339 * We don't record this route.
340 */
341 for (sprt = ip6_routing_table.leaf;
342 sprt; sprt = sprt->u.next) {
343 if (!rt6_check_expired(sprt) &&
344 (sprt->rt6i_flags & RTF_DEFAULT) &&
345 (!oif ||
346 (sprt->rt6i_dev &&
347 sprt->rt6i_dev->ifindex == oif))) {
348 match = sprt;
349 break;
350 }
351 }
352 if (!match) {
353 /* no default route. give up. */
354 match = &ip6_null_entry;
355 }
356 }
357
358 return match;
359}
360
361struct rt6_info *rt6_lookup(struct in6_addr *daddr, struct in6_addr *saddr,
362 int oif, int strict)
363{
364 struct fib6_node *fn;
365 struct rt6_info *rt;
366
367 read_lock_bh(&rt6_lock);
368 fn = fib6_lookup(&ip6_routing_table, daddr, saddr);
369 rt = rt6_device_match(fn->leaf, oif, strict);
370 dst_hold(&rt->u.dst);
371 rt->u.dst.__use++;
372 read_unlock_bh(&rt6_lock);
373
374 rt->u.dst.lastuse = jiffies;
375 if (rt->u.dst.error == 0)
376 return rt;
377 dst_release(&rt->u.dst);
378 return NULL;
379}
380
381/* ip6_ins_rt is called with FREE rt6_lock.
382 It takes new route entry, the addition fails by any reason the
383 route is freed. In any case, if caller does not hold it, it may
384 be destroyed.
385 */
386
0d51aa80
JHS
387int ip6_ins_rt(struct rt6_info *rt, struct nlmsghdr *nlh,
388 void *_rtattr, struct netlink_skb_parms *req)
1da177e4
LT
389{
390 int err;
391
392 write_lock_bh(&rt6_lock);
0d51aa80 393 err = fib6_add(&ip6_routing_table, rt, nlh, _rtattr, req);
1da177e4
LT
394 write_unlock_bh(&rt6_lock);
395
396 return err;
397}
398
399/* No rt6_lock! If COW failed, the function returns dead route entry
400 with dst->error set to errno value.
401 */
402
403static struct rt6_info *rt6_cow(struct rt6_info *ort, struct in6_addr *daddr,
0d51aa80 404 struct in6_addr *saddr, struct netlink_skb_parms *req)
1da177e4
LT
405{
406 int err;
407 struct rt6_info *rt;
408
409 /*
410 * Clone the route.
411 */
412
413 rt = ip6_rt_copy(ort);
414
415 if (rt) {
416 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
417
418 if (!(rt->rt6i_flags&RTF_GATEWAY))
419 ipv6_addr_copy(&rt->rt6i_gateway, daddr);
420
421 rt->rt6i_dst.plen = 128;
422 rt->rt6i_flags |= RTF_CACHE;
423 rt->u.dst.flags |= DST_HOST;
424
425#ifdef CONFIG_IPV6_SUBTREES
426 if (rt->rt6i_src.plen && saddr) {
427 ipv6_addr_copy(&rt->rt6i_src.addr, saddr);
428 rt->rt6i_src.plen = 128;
429 }
430#endif
431
432 rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
433
434 dst_hold(&rt->u.dst);
435
0d51aa80 436 err = ip6_ins_rt(rt, NULL, NULL, req);
1da177e4
LT
437 if (err == 0)
438 return rt;
439
440 rt->u.dst.error = err;
441
442 return rt;
443 }
444 dst_hold(&ip6_null_entry.u.dst);
445 return &ip6_null_entry;
446}
447
448#define BACKTRACK() \
449if (rt == &ip6_null_entry && strict) { \
450 while ((fn = fn->parent) != NULL) { \
451 if (fn->fn_flags & RTN_ROOT) { \
452 dst_hold(&rt->u.dst); \
453 goto out; \
454 } \
455 if (fn->fn_flags & RTN_RTINFO) \
456 goto restart; \
457 } \
458}
459
460
461void ip6_route_input(struct sk_buff *skb)
462{
463 struct fib6_node *fn;
464 struct rt6_info *rt;
465 int strict;
466 int attempts = 3;
467
468 strict = ipv6_addr_type(&skb->nh.ipv6h->daddr) & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL);
469
470relookup:
471 read_lock_bh(&rt6_lock);
472
473 fn = fib6_lookup(&ip6_routing_table, &skb->nh.ipv6h->daddr,
474 &skb->nh.ipv6h->saddr);
475
476restart:
477 rt = fn->leaf;
478
479 if ((rt->rt6i_flags & RTF_CACHE)) {
480 rt = rt6_device_match(rt, skb->dev->ifindex, strict);
481 BACKTRACK();
482 dst_hold(&rt->u.dst);
483 goto out;
484 }
485
9d17f218 486 rt = rt6_device_match(rt, skb->dev->ifindex, strict);
1da177e4
LT
487 BACKTRACK();
488
489 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) {
490 struct rt6_info *nrt;
491 dst_hold(&rt->u.dst);
492 read_unlock_bh(&rt6_lock);
493
494 nrt = rt6_cow(rt, &skb->nh.ipv6h->daddr,
0d51aa80
JHS
495 &skb->nh.ipv6h->saddr,
496 &NETLINK_CB(skb));
1da177e4
LT
497
498 dst_release(&rt->u.dst);
499 rt = nrt;
500
501 if (rt->u.dst.error != -EEXIST || --attempts <= 0)
502 goto out2;
503
504 /* Race condition! In the gap, when rt6_lock was
505 released someone could insert this route. Relookup.
506 */
507 dst_release(&rt->u.dst);
508 goto relookup;
509 }
510 dst_hold(&rt->u.dst);
511
512out:
513 read_unlock_bh(&rt6_lock);
514out2:
515 rt->u.dst.lastuse = jiffies;
516 rt->u.dst.__use++;
517 skb->dst = (struct dst_entry *) rt;
518}
519
520struct dst_entry * ip6_route_output(struct sock *sk, struct flowi *fl)
521{
522 struct fib6_node *fn;
523 struct rt6_info *rt;
524 int strict;
525 int attempts = 3;
526
527 strict = ipv6_addr_type(&fl->fl6_dst) & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL);
528
529relookup:
530 read_lock_bh(&rt6_lock);
531
532 fn = fib6_lookup(&ip6_routing_table, &fl->fl6_dst, &fl->fl6_src);
533
534restart:
535 rt = fn->leaf;
536
537 if ((rt->rt6i_flags & RTF_CACHE)) {
538 rt = rt6_device_match(rt, fl->oif, strict);
539 BACKTRACK();
540 dst_hold(&rt->u.dst);
541 goto out;
542 }
543 if (rt->rt6i_flags & RTF_DEFAULT) {
544 if (rt->rt6i_metric >= IP6_RT_PRIO_ADDRCONF)
545 rt = rt6_best_dflt(rt, fl->oif);
546 } else {
547 rt = rt6_device_match(rt, fl->oif, strict);
548 BACKTRACK();
549 }
550
551 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) {
552 struct rt6_info *nrt;
553 dst_hold(&rt->u.dst);
554 read_unlock_bh(&rt6_lock);
555
0d51aa80 556 nrt = rt6_cow(rt, &fl->fl6_dst, &fl->fl6_src, NULL);
1da177e4
LT
557
558 dst_release(&rt->u.dst);
559 rt = nrt;
560
561 if (rt->u.dst.error != -EEXIST || --attempts <= 0)
562 goto out2;
563
564 /* Race condition! In the gap, when rt6_lock was
565 released someone could insert this route. Relookup.
566 */
567 dst_release(&rt->u.dst);
568 goto relookup;
569 }
570 dst_hold(&rt->u.dst);
571
572out:
573 read_unlock_bh(&rt6_lock);
574out2:
575 rt->u.dst.lastuse = jiffies;
576 rt->u.dst.__use++;
577 return &rt->u.dst;
578}
579
580
581/*
582 * Destination cache support functions
583 */
584
585static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
586{
587 struct rt6_info *rt;
588
589 rt = (struct rt6_info *) dst;
590
591 if (rt && rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie))
592 return dst;
593
594 return NULL;
595}
596
597static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
598{
599 struct rt6_info *rt = (struct rt6_info *) dst;
600
601 if (rt) {
602 if (rt->rt6i_flags & RTF_CACHE)
0d51aa80 603 ip6_del_rt(rt, NULL, NULL, NULL);
1da177e4
LT
604 else
605 dst_release(dst);
606 }
607 return NULL;
608}
609
610static void ip6_link_failure(struct sk_buff *skb)
611{
612 struct rt6_info *rt;
613
614 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0, skb->dev);
615
616 rt = (struct rt6_info *) skb->dst;
617 if (rt) {
618 if (rt->rt6i_flags&RTF_CACHE) {
619 dst_set_expires(&rt->u.dst, 0);
620 rt->rt6i_flags |= RTF_EXPIRES;
621 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
622 rt->rt6i_node->fn_sernum = -1;
623 }
624}
625
626static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
627{
628 struct rt6_info *rt6 = (struct rt6_info*)dst;
629
630 if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
631 rt6->rt6i_flags |= RTF_MODIFIED;
632 if (mtu < IPV6_MIN_MTU) {
633 mtu = IPV6_MIN_MTU;
634 dst->metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
635 }
636 dst->metrics[RTAX_MTU-1] = mtu;
637 }
638}
639
640/* Protected by rt6_lock. */
641static struct dst_entry *ndisc_dst_gc_list;
642static int ipv6_get_mtu(struct net_device *dev);
643
644static inline unsigned int ipv6_advmss(unsigned int mtu)
645{
646 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
647
648 if (mtu < ip6_rt_min_advmss)
649 mtu = ip6_rt_min_advmss;
650
651 /*
652 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
653 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
654 * IPV6_MAXPLEN is also valid and means: "any MSS,
655 * rely only on pmtu discovery"
656 */
657 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
658 mtu = IPV6_MAXPLEN;
659 return mtu;
660}
661
662struct dst_entry *ndisc_dst_alloc(struct net_device *dev,
663 struct neighbour *neigh,
664 struct in6_addr *addr,
665 int (*output)(struct sk_buff *))
666{
667 struct rt6_info *rt;
668 struct inet6_dev *idev = in6_dev_get(dev);
669
670 if (unlikely(idev == NULL))
671 return NULL;
672
673 rt = ip6_dst_alloc();
674 if (unlikely(rt == NULL)) {
675 in6_dev_put(idev);
676 goto out;
677 }
678
679 dev_hold(dev);
680 if (neigh)
681 neigh_hold(neigh);
682 else
683 neigh = ndisc_get_neigh(dev, addr);
684
685 rt->rt6i_dev = dev;
686 rt->rt6i_idev = idev;
687 rt->rt6i_nexthop = neigh;
688 atomic_set(&rt->u.dst.__refcnt, 1);
689 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = 255;
690 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
691 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
692 rt->u.dst.output = output;
693
694#if 0 /* there's no chance to use these for ndisc */
695 rt->u.dst.flags = ipv6_addr_type(addr) & IPV6_ADDR_UNICAST
696 ? DST_HOST
697 : 0;
698 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
699 rt->rt6i_dst.plen = 128;
700#endif
701
702 write_lock_bh(&rt6_lock);
703 rt->u.dst.next = ndisc_dst_gc_list;
704 ndisc_dst_gc_list = &rt->u.dst;
705 write_unlock_bh(&rt6_lock);
706
707 fib6_force_start_gc();
708
709out:
710 return (struct dst_entry *)rt;
711}
712
713int ndisc_dst_gc(int *more)
714{
715 struct dst_entry *dst, *next, **pprev;
716 int freed;
717
718 next = NULL;
719 pprev = &ndisc_dst_gc_list;
720 freed = 0;
721 while ((dst = *pprev) != NULL) {
722 if (!atomic_read(&dst->__refcnt)) {
723 *pprev = dst->next;
724 dst_free(dst);
725 freed++;
726 } else {
727 pprev = &dst->next;
728 (*more)++;
729 }
730 }
731
732 return freed;
733}
734
735static int ip6_dst_gc(void)
736{
737 static unsigned expire = 30*HZ;
738 static unsigned long last_gc;
739 unsigned long now = jiffies;
740
741 if (time_after(last_gc + ip6_rt_gc_min_interval, now) &&
742 atomic_read(&ip6_dst_ops.entries) <= ip6_rt_max_size)
743 goto out;
744
745 expire++;
746 fib6_run_gc(expire);
747 last_gc = now;
748 if (atomic_read(&ip6_dst_ops.entries) < ip6_dst_ops.gc_thresh)
749 expire = ip6_rt_gc_timeout>>1;
750
751out:
752 expire -= expire>>ip6_rt_gc_elasticity;
753 return (atomic_read(&ip6_dst_ops.entries) > ip6_rt_max_size);
754}
755
756/* Clean host part of a prefix. Not necessary in radix tree,
757 but results in cleaner routing tables.
758
759 Remove it only when all the things will work!
760 */
761
762static int ipv6_get_mtu(struct net_device *dev)
763{
764 int mtu = IPV6_MIN_MTU;
765 struct inet6_dev *idev;
766
767 idev = in6_dev_get(dev);
768 if (idev) {
769 mtu = idev->cnf.mtu6;
770 in6_dev_put(idev);
771 }
772 return mtu;
773}
774
775int ipv6_get_hoplimit(struct net_device *dev)
776{
777 int hoplimit = ipv6_devconf.hop_limit;
778 struct inet6_dev *idev;
779
780 idev = in6_dev_get(dev);
781 if (idev) {
782 hoplimit = idev->cnf.hop_limit;
783 in6_dev_put(idev);
784 }
785 return hoplimit;
786}
787
788/*
789 *
790 */
791
0d51aa80
JHS
792int ip6_route_add(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh,
793 void *_rtattr, struct netlink_skb_parms *req)
1da177e4
LT
794{
795 int err;
796 struct rtmsg *r;
797 struct rtattr **rta;
798 struct rt6_info *rt = NULL;
799 struct net_device *dev = NULL;
800 struct inet6_dev *idev = NULL;
801 int addr_type;
802
803 rta = (struct rtattr **) _rtattr;
804
805 if (rtmsg->rtmsg_dst_len > 128 || rtmsg->rtmsg_src_len > 128)
806 return -EINVAL;
807#ifndef CONFIG_IPV6_SUBTREES
808 if (rtmsg->rtmsg_src_len)
809 return -EINVAL;
810#endif
811 if (rtmsg->rtmsg_ifindex) {
812 err = -ENODEV;
813 dev = dev_get_by_index(rtmsg->rtmsg_ifindex);
814 if (!dev)
815 goto out;
816 idev = in6_dev_get(dev);
817 if (!idev)
818 goto out;
819 }
820
821 if (rtmsg->rtmsg_metric == 0)
822 rtmsg->rtmsg_metric = IP6_RT_PRIO_USER;
823
824 rt = ip6_dst_alloc();
825
826 if (rt == NULL) {
827 err = -ENOMEM;
828 goto out;
829 }
830
831 rt->u.dst.obsolete = -1;
3dd4bc68 832 rt->rt6i_expires = jiffies + clock_t_to_jiffies(rtmsg->rtmsg_info);
1da177e4
LT
833 if (nlh && (r = NLMSG_DATA(nlh))) {
834 rt->rt6i_protocol = r->rtm_protocol;
835 } else {
836 rt->rt6i_protocol = RTPROT_BOOT;
837 }
838
839 addr_type = ipv6_addr_type(&rtmsg->rtmsg_dst);
840
841 if (addr_type & IPV6_ADDR_MULTICAST)
842 rt->u.dst.input = ip6_mc_input;
843 else
844 rt->u.dst.input = ip6_forward;
845
846 rt->u.dst.output = ip6_output;
847
848 ipv6_addr_prefix(&rt->rt6i_dst.addr,
849 &rtmsg->rtmsg_dst, rtmsg->rtmsg_dst_len);
850 rt->rt6i_dst.plen = rtmsg->rtmsg_dst_len;
851 if (rt->rt6i_dst.plen == 128)
852 rt->u.dst.flags = DST_HOST;
853
854#ifdef CONFIG_IPV6_SUBTREES
855 ipv6_addr_prefix(&rt->rt6i_src.addr,
856 &rtmsg->rtmsg_src, rtmsg->rtmsg_src_len);
857 rt->rt6i_src.plen = rtmsg->rtmsg_src_len;
858#endif
859
860 rt->rt6i_metric = rtmsg->rtmsg_metric;
861
862 /* We cannot add true routes via loopback here,
863 they would result in kernel looping; promote them to reject routes
864 */
865 if ((rtmsg->rtmsg_flags&RTF_REJECT) ||
866 (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK))) {
867 /* hold loopback dev/idev if we haven't done so. */
868 if (dev != &loopback_dev) {
869 if (dev) {
870 dev_put(dev);
871 in6_dev_put(idev);
872 }
873 dev = &loopback_dev;
874 dev_hold(dev);
875 idev = in6_dev_get(dev);
876 if (!idev) {
877 err = -ENODEV;
878 goto out;
879 }
880 }
881 rt->u.dst.output = ip6_pkt_discard_out;
882 rt->u.dst.input = ip6_pkt_discard;
883 rt->u.dst.error = -ENETUNREACH;
884 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
885 goto install_route;
886 }
887
888 if (rtmsg->rtmsg_flags & RTF_GATEWAY) {
889 struct in6_addr *gw_addr;
890 int gwa_type;
891
892 gw_addr = &rtmsg->rtmsg_gateway;
893 ipv6_addr_copy(&rt->rt6i_gateway, &rtmsg->rtmsg_gateway);
894 gwa_type = ipv6_addr_type(gw_addr);
895
896 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
897 struct rt6_info *grt;
898
899 /* IPv6 strictly inhibits using not link-local
900 addresses as nexthop address.
901 Otherwise, router will not able to send redirects.
902 It is very good, but in some (rare!) circumstances
903 (SIT, PtP, NBMA NOARP links) it is handy to allow
904 some exceptions. --ANK
905 */
906 err = -EINVAL;
907 if (!(gwa_type&IPV6_ADDR_UNICAST))
908 goto out;
909
910 grt = rt6_lookup(gw_addr, NULL, rtmsg->rtmsg_ifindex, 1);
911
912 err = -EHOSTUNREACH;
913 if (grt == NULL)
914 goto out;
915 if (dev) {
916 if (dev != grt->rt6i_dev) {
917 dst_release(&grt->u.dst);
918 goto out;
919 }
920 } else {
921 dev = grt->rt6i_dev;
922 idev = grt->rt6i_idev;
923 dev_hold(dev);
924 in6_dev_hold(grt->rt6i_idev);
925 }
926 if (!(grt->rt6i_flags&RTF_GATEWAY))
927 err = 0;
928 dst_release(&grt->u.dst);
929
930 if (err)
931 goto out;
932 }
933 err = -EINVAL;
934 if (dev == NULL || (dev->flags&IFF_LOOPBACK))
935 goto out;
936 }
937
938 err = -ENODEV;
939 if (dev == NULL)
940 goto out;
941
942 if (rtmsg->rtmsg_flags & (RTF_GATEWAY|RTF_NONEXTHOP)) {
943 rt->rt6i_nexthop = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev);
944 if (IS_ERR(rt->rt6i_nexthop)) {
945 err = PTR_ERR(rt->rt6i_nexthop);
946 rt->rt6i_nexthop = NULL;
947 goto out;
948 }
949 }
950
951 rt->rt6i_flags = rtmsg->rtmsg_flags;
952
953install_route:
954 if (rta && rta[RTA_METRICS-1]) {
955 int attrlen = RTA_PAYLOAD(rta[RTA_METRICS-1]);
956 struct rtattr *attr = RTA_DATA(rta[RTA_METRICS-1]);
957
958 while (RTA_OK(attr, attrlen)) {
959 unsigned flavor = attr->rta_type;
960 if (flavor) {
961 if (flavor > RTAX_MAX) {
962 err = -EINVAL;
963 goto out;
964 }
965 rt->u.dst.metrics[flavor-1] =
966 *(u32 *)RTA_DATA(attr);
967 }
968 attr = RTA_NEXT(attr, attrlen);
969 }
970 }
971
972 if (rt->u.dst.metrics[RTAX_HOPLIMIT-1] == 0)
973 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
974 if (!rt->u.dst.metrics[RTAX_MTU-1])
975 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(dev);
976 if (!rt->u.dst.metrics[RTAX_ADVMSS-1])
977 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
978 rt->u.dst.dev = dev;
979 rt->rt6i_idev = idev;
0d51aa80 980 return ip6_ins_rt(rt, nlh, _rtattr, req);
1da177e4
LT
981
982out:
983 if (dev)
984 dev_put(dev);
985 if (idev)
986 in6_dev_put(idev);
987 if (rt)
988 dst_free((struct dst_entry *) rt);
989 return err;
990}
991
0d51aa80 992int ip6_del_rt(struct rt6_info *rt, struct nlmsghdr *nlh, void *_rtattr, struct netlink_skb_parms *req)
1da177e4
LT
993{
994 int err;
995
996 write_lock_bh(&rt6_lock);
997
998 rt6_reset_dflt_pointer(NULL);
999
0d51aa80 1000 err = fib6_del(rt, nlh, _rtattr, req);
1da177e4
LT
1001 dst_release(&rt->u.dst);
1002
1003 write_unlock_bh(&rt6_lock);
1004
1005 return err;
1006}
1007
0d51aa80 1008static int ip6_route_del(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh, void *_rtattr, struct netlink_skb_parms *req)
1da177e4
LT
1009{
1010 struct fib6_node *fn;
1011 struct rt6_info *rt;
1012 int err = -ESRCH;
1013
1014 read_lock_bh(&rt6_lock);
1015
1016 fn = fib6_locate(&ip6_routing_table,
1017 &rtmsg->rtmsg_dst, rtmsg->rtmsg_dst_len,
1018 &rtmsg->rtmsg_src, rtmsg->rtmsg_src_len);
1019
1020 if (fn) {
1021 for (rt = fn->leaf; rt; rt = rt->u.next) {
1022 if (rtmsg->rtmsg_ifindex &&
1023 (rt->rt6i_dev == NULL ||
1024 rt->rt6i_dev->ifindex != rtmsg->rtmsg_ifindex))
1025 continue;
1026 if (rtmsg->rtmsg_flags&RTF_GATEWAY &&
1027 !ipv6_addr_equal(&rtmsg->rtmsg_gateway, &rt->rt6i_gateway))
1028 continue;
1029 if (rtmsg->rtmsg_metric &&
1030 rtmsg->rtmsg_metric != rt->rt6i_metric)
1031 continue;
1032 dst_hold(&rt->u.dst);
1033 read_unlock_bh(&rt6_lock);
1034
0d51aa80 1035 return ip6_del_rt(rt, nlh, _rtattr, req);
1da177e4
LT
1036 }
1037 }
1038 read_unlock_bh(&rt6_lock);
1039
1040 return err;
1041}
1042
1043/*
1044 * Handle redirects
1045 */
1046void rt6_redirect(struct in6_addr *dest, struct in6_addr *saddr,
1047 struct neighbour *neigh, u8 *lladdr, int on_link)
1048{
1049 struct rt6_info *rt, *nrt;
1050
1051 /* Locate old route to this destination. */
1052 rt = rt6_lookup(dest, NULL, neigh->dev->ifindex, 1);
1053
1054 if (rt == NULL)
1055 return;
1056
1057 if (neigh->dev != rt->rt6i_dev)
1058 goto out;
1059
1060 /*
1061 * Current route is on-link; redirect is always invalid.
1062 *
1063 * Seems, previous statement is not true. It could
1064 * be node, which looks for us as on-link (f.e. proxy ndisc)
1065 * But then router serving it might decide, that we should
1066 * know truth 8)8) --ANK (980726).
1067 */
1068 if (!(rt->rt6i_flags&RTF_GATEWAY))
1069 goto out;
1070
1071 /*
1072 * RFC 2461 specifies that redirects should only be
1073 * accepted if they come from the nexthop to the target.
1074 * Due to the way default routers are chosen, this notion
1075 * is a bit fuzzy and one might need to check all default
1076 * routers.
1077 */
1078 if (!ipv6_addr_equal(saddr, &rt->rt6i_gateway)) {
1079 if (rt->rt6i_flags & RTF_DEFAULT) {
1080 struct rt6_info *rt1;
1081
1082 read_lock(&rt6_lock);
1083 for (rt1 = ip6_routing_table.leaf; rt1; rt1 = rt1->u.next) {
1084 if (ipv6_addr_equal(saddr, &rt1->rt6i_gateway)) {
1085 dst_hold(&rt1->u.dst);
1086 dst_release(&rt->u.dst);
1087 read_unlock(&rt6_lock);
1088 rt = rt1;
1089 goto source_ok;
1090 }
1091 }
1092 read_unlock(&rt6_lock);
1093 }
1094 if (net_ratelimit())
1095 printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
1096 "for redirect target\n");
1097 goto out;
1098 }
1099
1100source_ok:
1101
1102 /*
1103 * We have finally decided to accept it.
1104 */
1105
1106 neigh_update(neigh, lladdr, NUD_STALE,
1107 NEIGH_UPDATE_F_WEAK_OVERRIDE|
1108 NEIGH_UPDATE_F_OVERRIDE|
1109 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1110 NEIGH_UPDATE_F_ISROUTER))
1111 );
1112
1113 /*
1114 * Redirect received -> path was valid.
1115 * Look, redirects are sent only in response to data packets,
1116 * so that this nexthop apparently is reachable. --ANK
1117 */
1118 dst_confirm(&rt->u.dst);
1119
1120 /* Duplicate redirect: silently ignore. */
1121 if (neigh == rt->u.dst.neighbour)
1122 goto out;
1123
1124 nrt = ip6_rt_copy(rt);
1125 if (nrt == NULL)
1126 goto out;
1127
1128 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1129 if (on_link)
1130 nrt->rt6i_flags &= ~RTF_GATEWAY;
1131
1132 ipv6_addr_copy(&nrt->rt6i_dst.addr, dest);
1133 nrt->rt6i_dst.plen = 128;
1134 nrt->u.dst.flags |= DST_HOST;
1135
1136 ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key);
1137 nrt->rt6i_nexthop = neigh_clone(neigh);
1138 /* Reset pmtu, it may be better */
1139 nrt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(neigh->dev);
1140 nrt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&nrt->u.dst));
1141
0d51aa80 1142 if (ip6_ins_rt(nrt, NULL, NULL, NULL))
1da177e4
LT
1143 goto out;
1144
1145 if (rt->rt6i_flags&RTF_CACHE) {
0d51aa80 1146 ip6_del_rt(rt, NULL, NULL, NULL);
1da177e4
LT
1147 return;
1148 }
1149
1150out:
1151 dst_release(&rt->u.dst);
1152 return;
1153}
1154
1155/*
1156 * Handle ICMP "packet too big" messages
1157 * i.e. Path MTU discovery
1158 */
1159
1160void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr,
1161 struct net_device *dev, u32 pmtu)
1162{
1163 struct rt6_info *rt, *nrt;
1164 int allfrag = 0;
1165
1166 rt = rt6_lookup(daddr, saddr, dev->ifindex, 0);
1167 if (rt == NULL)
1168 return;
1169
1170 if (pmtu >= dst_mtu(&rt->u.dst))
1171 goto out;
1172
1173 if (pmtu < IPV6_MIN_MTU) {
1174 /*
1175 * According to RFC2460, PMTU is set to the IPv6 Minimum Link
1176 * MTU (1280) and a fragment header should always be included
1177 * after a node receiving Too Big message reporting PMTU is
1178 * less than the IPv6 Minimum Link MTU.
1179 */
1180 pmtu = IPV6_MIN_MTU;
1181 allfrag = 1;
1182 }
1183
1184 /* New mtu received -> path was valid.
1185 They are sent only in response to data packets,
1186 so that this nexthop apparently is reachable. --ANK
1187 */
1188 dst_confirm(&rt->u.dst);
1189
1190 /* Host route. If it is static, it would be better
1191 not to override it, but add new one, so that
1192 when cache entry will expire old pmtu
1193 would return automatically.
1194 */
1195 if (rt->rt6i_flags & RTF_CACHE) {
1196 rt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1197 if (allfrag)
1198 rt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1199 dst_set_expires(&rt->u.dst, ip6_rt_mtu_expires);
1200 rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
1201 goto out;
1202 }
1203
1204 /* Network route.
1205 Two cases are possible:
1206 1. It is connected route. Action: COW
1207 2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1208 */
1209 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) {
0d51aa80 1210 nrt = rt6_cow(rt, daddr, saddr, NULL);
1da177e4
LT
1211 if (!nrt->u.dst.error) {
1212 nrt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1213 if (allfrag)
1214 nrt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1215 /* According to RFC 1981, detecting PMTU increase shouldn't be
1216 happened within 5 mins, the recommended timer is 10 mins.
1217 Here this route expiration time is set to ip6_rt_mtu_expires
1218 which is 10 mins. After 10 mins the decreased pmtu is expired
1219 and detecting PMTU increase will be automatically happened.
1220 */
1221 dst_set_expires(&nrt->u.dst, ip6_rt_mtu_expires);
1222 nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
1223 }
1224 dst_release(&nrt->u.dst);
1225 } else {
1226 nrt = ip6_rt_copy(rt);
1227 if (nrt == NULL)
1228 goto out;
1229 ipv6_addr_copy(&nrt->rt6i_dst.addr, daddr);
1230 nrt->rt6i_dst.plen = 128;
1231 nrt->u.dst.flags |= DST_HOST;
1232 nrt->rt6i_nexthop = neigh_clone(rt->rt6i_nexthop);
1233 dst_set_expires(&nrt->u.dst, ip6_rt_mtu_expires);
1234 nrt->rt6i_flags |= RTF_DYNAMIC|RTF_CACHE|RTF_EXPIRES;
1235 nrt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1236 if (allfrag)
1237 nrt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
0d51aa80 1238 ip6_ins_rt(nrt, NULL, NULL, NULL);
1da177e4
LT
1239 }
1240
1241out:
1242 dst_release(&rt->u.dst);
1243}
1244
1245/*
1246 * Misc support functions
1247 */
1248
1249static struct rt6_info * ip6_rt_copy(struct rt6_info *ort)
1250{
1251 struct rt6_info *rt = ip6_dst_alloc();
1252
1253 if (rt) {
1254 rt->u.dst.input = ort->u.dst.input;
1255 rt->u.dst.output = ort->u.dst.output;
1256
1257 memcpy(rt->u.dst.metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32));
1258 rt->u.dst.dev = ort->u.dst.dev;
1259 if (rt->u.dst.dev)
1260 dev_hold(rt->u.dst.dev);
1261 rt->rt6i_idev = ort->rt6i_idev;
1262 if (rt->rt6i_idev)
1263 in6_dev_hold(rt->rt6i_idev);
1264 rt->u.dst.lastuse = jiffies;
1265 rt->rt6i_expires = 0;
1266
1267 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
1268 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
1269 rt->rt6i_metric = 0;
1270
1271 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1272#ifdef CONFIG_IPV6_SUBTREES
1273 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1274#endif
1275 }
1276 return rt;
1277}
1278
1279struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *dev)
1280{
1281 struct rt6_info *rt;
1282 struct fib6_node *fn;
1283
1284 fn = &ip6_routing_table;
1285
1286 write_lock_bh(&rt6_lock);
1287 for (rt = fn->leaf; rt; rt=rt->u.next) {
1288 if (dev == rt->rt6i_dev &&
1289 ipv6_addr_equal(&rt->rt6i_gateway, addr))
1290 break;
1291 }
1292 if (rt)
1293 dst_hold(&rt->u.dst);
1294 write_unlock_bh(&rt6_lock);
1295 return rt;
1296}
1297
1298struct rt6_info *rt6_add_dflt_router(struct in6_addr *gwaddr,
1299 struct net_device *dev)
1300{
1301 struct in6_rtmsg rtmsg;
1302
1303 memset(&rtmsg, 0, sizeof(struct in6_rtmsg));
1304 rtmsg.rtmsg_type = RTMSG_NEWROUTE;
1305 ipv6_addr_copy(&rtmsg.rtmsg_gateway, gwaddr);
1306 rtmsg.rtmsg_metric = 1024;
1307 rtmsg.rtmsg_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT | RTF_UP | RTF_EXPIRES;
1308
1309 rtmsg.rtmsg_ifindex = dev->ifindex;
1310
0d51aa80 1311 ip6_route_add(&rtmsg, NULL, NULL, NULL);
1da177e4
LT
1312 return rt6_get_dflt_router(gwaddr, dev);
1313}
1314
1315void rt6_purge_dflt_routers(void)
1316{
1317 struct rt6_info *rt;
1318
1319restart:
1320 read_lock_bh(&rt6_lock);
1321 for (rt = ip6_routing_table.leaf; rt; rt = rt->u.next) {
1322 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
1323 dst_hold(&rt->u.dst);
1324
1325 rt6_reset_dflt_pointer(NULL);
1326
1327 read_unlock_bh(&rt6_lock);
1328
0d51aa80 1329 ip6_del_rt(rt, NULL, NULL, NULL);
1da177e4
LT
1330
1331 goto restart;
1332 }
1333 }
1334 read_unlock_bh(&rt6_lock);
1335}
1336
1337int ipv6_route_ioctl(unsigned int cmd, void __user *arg)
1338{
1339 struct in6_rtmsg rtmsg;
1340 int err;
1341
1342 switch(cmd) {
1343 case SIOCADDRT: /* Add a route */
1344 case SIOCDELRT: /* Delete a route */
1345 if (!capable(CAP_NET_ADMIN))
1346 return -EPERM;
1347 err = copy_from_user(&rtmsg, arg,
1348 sizeof(struct in6_rtmsg));
1349 if (err)
1350 return -EFAULT;
1351
1352 rtnl_lock();
1353 switch (cmd) {
1354 case SIOCADDRT:
0d51aa80 1355 err = ip6_route_add(&rtmsg, NULL, NULL, NULL);
1da177e4
LT
1356 break;
1357 case SIOCDELRT:
0d51aa80 1358 err = ip6_route_del(&rtmsg, NULL, NULL, NULL);
1da177e4
LT
1359 break;
1360 default:
1361 err = -EINVAL;
1362 }
1363 rtnl_unlock();
1364
1365 return err;
1366 };
1367
1368 return -EINVAL;
1369}
1370
1371/*
1372 * Drop the packet on the floor
1373 */
1374
20380731 1375static int ip6_pkt_discard(struct sk_buff *skb)
1da177e4
LT
1376{
1377 IP6_INC_STATS(IPSTATS_MIB_OUTNOROUTES);
1378 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_NOROUTE, 0, skb->dev);
1379 kfree_skb(skb);
1380 return 0;
1381}
1382
20380731 1383static int ip6_pkt_discard_out(struct sk_buff *skb)
1da177e4
LT
1384{
1385 skb->dev = skb->dst->dev;
1386 return ip6_pkt_discard(skb);
1387}
1388
1389/*
1390 * Allocate a dst for local (unicast / anycast) address.
1391 */
1392
1393struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
1394 const struct in6_addr *addr,
1395 int anycast)
1396{
1397 struct rt6_info *rt = ip6_dst_alloc();
1398
1399 if (rt == NULL)
1400 return ERR_PTR(-ENOMEM);
1401
1402 dev_hold(&loopback_dev);
1403 in6_dev_hold(idev);
1404
1405 rt->u.dst.flags = DST_HOST;
1406 rt->u.dst.input = ip6_input;
1407 rt->u.dst.output = ip6_output;
1408 rt->rt6i_dev = &loopback_dev;
1409 rt->rt6i_idev = idev;
1410 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
1411 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
1412 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
1413 rt->u.dst.obsolete = -1;
1414
1415 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
1416 if (!anycast)
1417 rt->rt6i_flags |= RTF_LOCAL;
1418 rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
1419 if (rt->rt6i_nexthop == NULL) {
1420 dst_free((struct dst_entry *) rt);
1421 return ERR_PTR(-ENOMEM);
1422 }
1423
1424 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
1425 rt->rt6i_dst.plen = 128;
1426
1427 atomic_set(&rt->u.dst.__refcnt, 1);
1428
1429 return rt;
1430}
1431
1432static int fib6_ifdown(struct rt6_info *rt, void *arg)
1433{
1434 if (((void*)rt->rt6i_dev == arg || arg == NULL) &&
1435 rt != &ip6_null_entry) {
1436 RT6_TRACE("deleted by ifdown %p\n", rt);
1437 return -1;
1438 }
1439 return 0;
1440}
1441
1442void rt6_ifdown(struct net_device *dev)
1443{
1444 write_lock_bh(&rt6_lock);
1445 fib6_clean_tree(&ip6_routing_table, fib6_ifdown, 0, dev);
1446 write_unlock_bh(&rt6_lock);
1447}
1448
1449struct rt6_mtu_change_arg
1450{
1451 struct net_device *dev;
1452 unsigned mtu;
1453};
1454
1455static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
1456{
1457 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
1458 struct inet6_dev *idev;
1459
1460 /* In IPv6 pmtu discovery is not optional,
1461 so that RTAX_MTU lock cannot disable it.
1462 We still use this lock to block changes
1463 caused by addrconf/ndisc.
1464 */
1465
1466 idev = __in6_dev_get(arg->dev);
1467 if (idev == NULL)
1468 return 0;
1469
1470 /* For administrative MTU increase, there is no way to discover
1471 IPv6 PMTU increase, so PMTU increase should be updated here.
1472 Since RFC 1981 doesn't include administrative MTU increase
1473 update PMTU increase is a MUST. (i.e. jumbo frame)
1474 */
1475 /*
1476 If new MTU is less than route PMTU, this new MTU will be the
1477 lowest MTU in the path, update the route PMTU to reflect PMTU
1478 decreases; if new MTU is greater than route PMTU, and the
1479 old MTU is the lowest MTU in the path, update the route PMTU
1480 to reflect the increase. In this case if the other nodes' MTU
1481 also have the lowest MTU, TOO BIG MESSAGE will be lead to
1482 PMTU discouvery.
1483 */
1484 if (rt->rt6i_dev == arg->dev &&
1485 !dst_metric_locked(&rt->u.dst, RTAX_MTU) &&
1486 (dst_mtu(&rt->u.dst) > arg->mtu ||
1487 (dst_mtu(&rt->u.dst) < arg->mtu &&
1488 dst_mtu(&rt->u.dst) == idev->cnf.mtu6)))
1489 rt->u.dst.metrics[RTAX_MTU-1] = arg->mtu;
1490 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(arg->mtu);
1491 return 0;
1492}
1493
1494void rt6_mtu_change(struct net_device *dev, unsigned mtu)
1495{
1496 struct rt6_mtu_change_arg arg;
1497
1498 arg.dev = dev;
1499 arg.mtu = mtu;
1500 read_lock_bh(&rt6_lock);
1501 fib6_clean_tree(&ip6_routing_table, rt6_mtu_change_route, 0, &arg);
1502 read_unlock_bh(&rt6_lock);
1503}
1504
1505static int inet6_rtm_to_rtmsg(struct rtmsg *r, struct rtattr **rta,
1506 struct in6_rtmsg *rtmsg)
1507{
1508 memset(rtmsg, 0, sizeof(*rtmsg));
1509
1510 rtmsg->rtmsg_dst_len = r->rtm_dst_len;
1511 rtmsg->rtmsg_src_len = r->rtm_src_len;
1512 rtmsg->rtmsg_flags = RTF_UP;
1513 if (r->rtm_type == RTN_UNREACHABLE)
1514 rtmsg->rtmsg_flags |= RTF_REJECT;
1515
1516 if (rta[RTA_GATEWAY-1]) {
1517 if (rta[RTA_GATEWAY-1]->rta_len != RTA_LENGTH(16))
1518 return -EINVAL;
1519 memcpy(&rtmsg->rtmsg_gateway, RTA_DATA(rta[RTA_GATEWAY-1]), 16);
1520 rtmsg->rtmsg_flags |= RTF_GATEWAY;
1521 }
1522 if (rta[RTA_DST-1]) {
1523 if (RTA_PAYLOAD(rta[RTA_DST-1]) < ((r->rtm_dst_len+7)>>3))
1524 return -EINVAL;
1525 memcpy(&rtmsg->rtmsg_dst, RTA_DATA(rta[RTA_DST-1]), ((r->rtm_dst_len+7)>>3));
1526 }
1527 if (rta[RTA_SRC-1]) {
1528 if (RTA_PAYLOAD(rta[RTA_SRC-1]) < ((r->rtm_src_len+7)>>3))
1529 return -EINVAL;
1530 memcpy(&rtmsg->rtmsg_src, RTA_DATA(rta[RTA_SRC-1]), ((r->rtm_src_len+7)>>3));
1531 }
1532 if (rta[RTA_OIF-1]) {
1533 if (rta[RTA_OIF-1]->rta_len != RTA_LENGTH(sizeof(int)))
1534 return -EINVAL;
1535 memcpy(&rtmsg->rtmsg_ifindex, RTA_DATA(rta[RTA_OIF-1]), sizeof(int));
1536 }
1537 if (rta[RTA_PRIORITY-1]) {
1538 if (rta[RTA_PRIORITY-1]->rta_len != RTA_LENGTH(4))
1539 return -EINVAL;
1540 memcpy(&rtmsg->rtmsg_metric, RTA_DATA(rta[RTA_PRIORITY-1]), 4);
1541 }
1542 return 0;
1543}
1544
1545int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1546{
1547 struct rtmsg *r = NLMSG_DATA(nlh);
1548 struct in6_rtmsg rtmsg;
1549
1550 if (inet6_rtm_to_rtmsg(r, arg, &rtmsg))
1551 return -EINVAL;
0d51aa80 1552 return ip6_route_del(&rtmsg, nlh, arg, &NETLINK_CB(skb));
1da177e4
LT
1553}
1554
1555int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1556{
1557 struct rtmsg *r = NLMSG_DATA(nlh);
1558 struct in6_rtmsg rtmsg;
1559
1560 if (inet6_rtm_to_rtmsg(r, arg, &rtmsg))
1561 return -EINVAL;
0d51aa80 1562 return ip6_route_add(&rtmsg, nlh, arg, &NETLINK_CB(skb));
1da177e4
LT
1563}
1564
1565struct rt6_rtnl_dump_arg
1566{
1567 struct sk_buff *skb;
1568 struct netlink_callback *cb;
1569};
1570
1571static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt,
0d51aa80
JHS
1572 struct in6_addr *dst, struct in6_addr *src,
1573 int iif, int type, u32 pid, u32 seq,
1574 int prefix, unsigned int flags)
1da177e4
LT
1575{
1576 struct rtmsg *rtm;
1577 struct nlmsghdr *nlh;
1578 unsigned char *b = skb->tail;
1579 struct rta_cacheinfo ci;
1580
1581 if (prefix) { /* user wants prefix routes only */
1582 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
1583 /* success since this is not a prefix route */
1584 return 1;
1585 }
1586 }
1587
b6544c0b 1588 nlh = NLMSG_NEW(skb, pid, seq, type, sizeof(*rtm), flags);
1da177e4
LT
1589 rtm = NLMSG_DATA(nlh);
1590 rtm->rtm_family = AF_INET6;
1591 rtm->rtm_dst_len = rt->rt6i_dst.plen;
1592 rtm->rtm_src_len = rt->rt6i_src.plen;
1593 rtm->rtm_tos = 0;
1594 rtm->rtm_table = RT_TABLE_MAIN;
1595 if (rt->rt6i_flags&RTF_REJECT)
1596 rtm->rtm_type = RTN_UNREACHABLE;
1597 else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK))
1598 rtm->rtm_type = RTN_LOCAL;
1599 else
1600 rtm->rtm_type = RTN_UNICAST;
1601 rtm->rtm_flags = 0;
1602 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
1603 rtm->rtm_protocol = rt->rt6i_protocol;
1604 if (rt->rt6i_flags&RTF_DYNAMIC)
1605 rtm->rtm_protocol = RTPROT_REDIRECT;
1606 else if (rt->rt6i_flags & RTF_ADDRCONF)
1607 rtm->rtm_protocol = RTPROT_KERNEL;
1608 else if (rt->rt6i_flags&RTF_DEFAULT)
1609 rtm->rtm_protocol = RTPROT_RA;
1610
1611 if (rt->rt6i_flags&RTF_CACHE)
1612 rtm->rtm_flags |= RTM_F_CLONED;
1613
1614 if (dst) {
1615 RTA_PUT(skb, RTA_DST, 16, dst);
1616 rtm->rtm_dst_len = 128;
1617 } else if (rtm->rtm_dst_len)
1618 RTA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr);
1619#ifdef CONFIG_IPV6_SUBTREES
1620 if (src) {
1621 RTA_PUT(skb, RTA_SRC, 16, src);
1622 rtm->rtm_src_len = 128;
1623 } else if (rtm->rtm_src_len)
1624 RTA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr);
1625#endif
1626 if (iif)
1627 RTA_PUT(skb, RTA_IIF, 4, &iif);
1628 else if (dst) {
1629 struct in6_addr saddr_buf;
1630 if (ipv6_get_saddr(&rt->u.dst, dst, &saddr_buf) == 0)
1631 RTA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
1632 }
1633 if (rtnetlink_put_metrics(skb, rt->u.dst.metrics) < 0)
1634 goto rtattr_failure;
1635 if (rt->u.dst.neighbour)
1636 RTA_PUT(skb, RTA_GATEWAY, 16, &rt->u.dst.neighbour->primary_key);
1637 if (rt->u.dst.dev)
1638 RTA_PUT(skb, RTA_OIF, sizeof(int), &rt->rt6i_dev->ifindex);
1639 RTA_PUT(skb, RTA_PRIORITY, 4, &rt->rt6i_metric);
1640 ci.rta_lastuse = jiffies_to_clock_t(jiffies - rt->u.dst.lastuse);
1641 if (rt->rt6i_expires)
1642 ci.rta_expires = jiffies_to_clock_t(rt->rt6i_expires - jiffies);
1643 else
1644 ci.rta_expires = 0;
1645 ci.rta_used = rt->u.dst.__use;
1646 ci.rta_clntref = atomic_read(&rt->u.dst.__refcnt);
1647 ci.rta_error = rt->u.dst.error;
1648 ci.rta_id = 0;
1649 ci.rta_ts = 0;
1650 ci.rta_tsage = 0;
1651 RTA_PUT(skb, RTA_CACHEINFO, sizeof(ci), &ci);
1652 nlh->nlmsg_len = skb->tail - b;
1653 return skb->len;
1654
1655nlmsg_failure:
1656rtattr_failure:
1657 skb_trim(skb, b - skb->data);
1658 return -1;
1659}
1660
1661static int rt6_dump_route(struct rt6_info *rt, void *p_arg)
1662{
1663 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
1664 int prefix;
1665
1666 if (arg->cb->nlh->nlmsg_len >= NLMSG_LENGTH(sizeof(struct rtmsg))) {
1667 struct rtmsg *rtm = NLMSG_DATA(arg->cb->nlh);
1668 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
1669 } else
1670 prefix = 0;
1671
1672 return rt6_fill_node(arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
1673 NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
0d51aa80 1674 prefix, NLM_F_MULTI);
1da177e4
LT
1675}
1676
1677static int fib6_dump_node(struct fib6_walker_t *w)
1678{
1679 int res;
1680 struct rt6_info *rt;
1681
1682 for (rt = w->leaf; rt; rt = rt->u.next) {
1683 res = rt6_dump_route(rt, w->args);
1684 if (res < 0) {
1685 /* Frame is full, suspend walking */
1686 w->leaf = rt;
1687 return 1;
1688 }
1689 BUG_TRAP(res!=0);
1690 }
1691 w->leaf = NULL;
1692 return 0;
1693}
1694
1695static void fib6_dump_end(struct netlink_callback *cb)
1696{
1697 struct fib6_walker_t *w = (void*)cb->args[0];
1698
1699 if (w) {
1700 cb->args[0] = 0;
1701 fib6_walker_unlink(w);
1702 kfree(w);
1703 }
efacfbcb
HX
1704 cb->done = (void*)cb->args[1];
1705 cb->args[1] = 0;
1da177e4
LT
1706}
1707
1708static int fib6_dump_done(struct netlink_callback *cb)
1709{
1710 fib6_dump_end(cb);
a8f74b22 1711 return cb->done ? cb->done(cb) : 0;
1da177e4
LT
1712}
1713
1714int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
1715{
1716 struct rt6_rtnl_dump_arg arg;
1717 struct fib6_walker_t *w;
1718 int res;
1719
1720 arg.skb = skb;
1721 arg.cb = cb;
1722
1723 w = (void*)cb->args[0];
1724 if (w == NULL) {
1725 /* New dump:
1726 *
1727 * 1. hook callback destructor.
1728 */
1729 cb->args[1] = (long)cb->done;
1730 cb->done = fib6_dump_done;
1731
1732 /*
1733 * 2. allocate and initialize walker.
1734 */
9e147a1c 1735 w = kmalloc(sizeof(*w), GFP_ATOMIC);
1da177e4
LT
1736 if (w == NULL)
1737 return -ENOMEM;
1738 RT6_TRACE("dump<%p", w);
1739 memset(w, 0, sizeof(*w));
1740 w->root = &ip6_routing_table;
1741 w->func = fib6_dump_node;
1742 w->args = &arg;
1743 cb->args[0] = (long)w;
1744 read_lock_bh(&rt6_lock);
1745 res = fib6_walk(w);
1746 read_unlock_bh(&rt6_lock);
1747 } else {
1748 w->args = &arg;
1749 read_lock_bh(&rt6_lock);
1750 res = fib6_walk_continue(w);
1751 read_unlock_bh(&rt6_lock);
1752 }
1753#if RT6_DEBUG >= 3
1754 if (res <= 0 && skb->len == 0)
1755 RT6_TRACE("%p>dump end\n", w);
1756#endif
1757 res = res < 0 ? res : skb->len;
1758 /* res < 0 is an error. (really, impossible)
1759 res == 0 means that dump is complete, but skb still can contain data.
1760 res > 0 dump is not complete, but frame is full.
1761 */
1762 /* Destroy walker, if dump of this table is complete. */
1763 if (res <= 0)
1764 fib6_dump_end(cb);
1765 return res;
1766}
1767
1768int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
1769{
1770 struct rtattr **rta = arg;
1771 int iif = 0;
1772 int err = -ENOBUFS;
1773 struct sk_buff *skb;
1774 struct flowi fl;
1775 struct rt6_info *rt;
1776
1777 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
1778 if (skb == NULL)
1779 goto out;
1780
1781 /* Reserve room for dummy headers, this skb can pass
1782 through good chunk of routing engine.
1783 */
1784 skb->mac.raw = skb->data;
1785 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
1786
1787 memset(&fl, 0, sizeof(fl));
1788 if (rta[RTA_SRC-1])
1789 ipv6_addr_copy(&fl.fl6_src,
1790 (struct in6_addr*)RTA_DATA(rta[RTA_SRC-1]));
1791 if (rta[RTA_DST-1])
1792 ipv6_addr_copy(&fl.fl6_dst,
1793 (struct in6_addr*)RTA_DATA(rta[RTA_DST-1]));
1794
1795 if (rta[RTA_IIF-1])
1796 memcpy(&iif, RTA_DATA(rta[RTA_IIF-1]), sizeof(int));
1797
1798 if (iif) {
1799 struct net_device *dev;
1800 dev = __dev_get_by_index(iif);
1801 if (!dev) {
1802 err = -ENODEV;
1803 goto out_free;
1804 }
1805 }
1806
1807 fl.oif = 0;
1808 if (rta[RTA_OIF-1])
1809 memcpy(&fl.oif, RTA_DATA(rta[RTA_OIF-1]), sizeof(int));
1810
1811 rt = (struct rt6_info*)ip6_route_output(NULL, &fl);
1812
1813 skb->dst = &rt->u.dst;
1814
1815 NETLINK_CB(skb).dst_pid = NETLINK_CB(in_skb).pid;
1816 err = rt6_fill_node(skb, rt,
1817 &fl.fl6_dst, &fl.fl6_src,
1818 iif,
1819 RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
0d51aa80 1820 nlh->nlmsg_seq, 0, 0);
1da177e4
LT
1821 if (err < 0) {
1822 err = -EMSGSIZE;
1823 goto out_free;
1824 }
1825
1826 err = netlink_unicast(rtnl, skb, NETLINK_CB(in_skb).pid, MSG_DONTWAIT);
1827 if (err > 0)
1828 err = 0;
1829out:
1830 return err;
1831out_free:
1832 kfree_skb(skb);
1833 goto out;
1834}
1835
0d51aa80
JHS
1836void inet6_rt_notify(int event, struct rt6_info *rt, struct nlmsghdr *nlh,
1837 struct netlink_skb_parms *req)
1da177e4
LT
1838{
1839 struct sk_buff *skb;
1840 int size = NLMSG_SPACE(sizeof(struct rtmsg)+256);
0d51aa80
JHS
1841 u32 pid = current->pid;
1842 u32 seq = 0;
1da177e4 1843
0d51aa80
JHS
1844 if (req)
1845 pid = req->pid;
1846 if (nlh)
1847 seq = nlh->nlmsg_seq;
1848
1da177e4
LT
1849 skb = alloc_skb(size, gfp_any());
1850 if (!skb) {
ac6d439d 1851 netlink_set_err(rtnl, 0, RTNLGRP_IPV6_ROUTE, ENOBUFS);
1da177e4
LT
1852 return;
1853 }
0d51aa80 1854 if (rt6_fill_node(skb, rt, NULL, NULL, 0, event, pid, seq, 0, 0) < 0) {
1da177e4 1855 kfree_skb(skb);
ac6d439d 1856 netlink_set_err(rtnl, 0, RTNLGRP_IPV6_ROUTE, EINVAL);
1da177e4
LT
1857 return;
1858 }
ac6d439d
PM
1859 NETLINK_CB(skb).dst_group = RTNLGRP_IPV6_ROUTE;
1860 netlink_broadcast(rtnl, skb, 0, RTNLGRP_IPV6_ROUTE, gfp_any());
1da177e4
LT
1861}
1862
1863/*
1864 * /proc
1865 */
1866
1867#ifdef CONFIG_PROC_FS
1868
1869#define RT6_INFO_LEN (32 + 4 + 32 + 4 + 32 + 40 + 5 + 1)
1870
1871struct rt6_proc_arg
1872{
1873 char *buffer;
1874 int offset;
1875 int length;
1876 int skip;
1877 int len;
1878};
1879
1880static int rt6_info_route(struct rt6_info *rt, void *p_arg)
1881{
1882 struct rt6_proc_arg *arg = (struct rt6_proc_arg *) p_arg;
1883 int i;
1884
1885 if (arg->skip < arg->offset / RT6_INFO_LEN) {
1886 arg->skip++;
1887 return 0;
1888 }
1889
1890 if (arg->len >= arg->length)
1891 return 0;
1892
1893 for (i=0; i<16; i++) {
1894 sprintf(arg->buffer + arg->len, "%02x",
1895 rt->rt6i_dst.addr.s6_addr[i]);
1896 arg->len += 2;
1897 }
1898 arg->len += sprintf(arg->buffer + arg->len, " %02x ",
1899 rt->rt6i_dst.plen);
1900
1901#ifdef CONFIG_IPV6_SUBTREES
1902 for (i=0; i<16; i++) {
1903 sprintf(arg->buffer + arg->len, "%02x",
1904 rt->rt6i_src.addr.s6_addr[i]);
1905 arg->len += 2;
1906 }
1907 arg->len += sprintf(arg->buffer + arg->len, " %02x ",
1908 rt->rt6i_src.plen);
1909#else
1910 sprintf(arg->buffer + arg->len,
1911 "00000000000000000000000000000000 00 ");
1912 arg->len += 36;
1913#endif
1914
1915 if (rt->rt6i_nexthop) {
1916 for (i=0; i<16; i++) {
1917 sprintf(arg->buffer + arg->len, "%02x",
1918 rt->rt6i_nexthop->primary_key[i]);
1919 arg->len += 2;
1920 }
1921 } else {
1922 sprintf(arg->buffer + arg->len,
1923 "00000000000000000000000000000000");
1924 arg->len += 32;
1925 }
1926 arg->len += sprintf(arg->buffer + arg->len,
1927 " %08x %08x %08x %08x %8s\n",
1928 rt->rt6i_metric, atomic_read(&rt->u.dst.__refcnt),
1929 rt->u.dst.__use, rt->rt6i_flags,
1930 rt->rt6i_dev ? rt->rt6i_dev->name : "");
1931 return 0;
1932}
1933
1934static int rt6_proc_info(char *buffer, char **start, off_t offset, int length)
1935{
1936 struct rt6_proc_arg arg;
1937 arg.buffer = buffer;
1938 arg.offset = offset;
1939 arg.length = length;
1940 arg.skip = 0;
1941 arg.len = 0;
1942
1943 read_lock_bh(&rt6_lock);
1944 fib6_clean_tree(&ip6_routing_table, rt6_info_route, 0, &arg);
1945 read_unlock_bh(&rt6_lock);
1946
1947 *start = buffer;
1948 if (offset)
1949 *start += offset % RT6_INFO_LEN;
1950
1951 arg.len -= offset % RT6_INFO_LEN;
1952
1953 if (arg.len > length)
1954 arg.len = length;
1955 if (arg.len < 0)
1956 arg.len = 0;
1957
1958 return arg.len;
1959}
1960
1da177e4
LT
1961static int rt6_stats_seq_show(struct seq_file *seq, void *v)
1962{
1963 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
1964 rt6_stats.fib_nodes, rt6_stats.fib_route_nodes,
1965 rt6_stats.fib_rt_alloc, rt6_stats.fib_rt_entries,
1966 rt6_stats.fib_rt_cache,
1967 atomic_read(&ip6_dst_ops.entries),
1968 rt6_stats.fib_discarded_routes);
1969
1970 return 0;
1971}
1972
1973static int rt6_stats_seq_open(struct inode *inode, struct file *file)
1974{
1975 return single_open(file, rt6_stats_seq_show, NULL);
1976}
1977
1978static struct file_operations rt6_stats_seq_fops = {
1979 .owner = THIS_MODULE,
1980 .open = rt6_stats_seq_open,
1981 .read = seq_read,
1982 .llseek = seq_lseek,
1983 .release = single_release,
1984};
1985#endif /* CONFIG_PROC_FS */
1986
1987#ifdef CONFIG_SYSCTL
1988
1989static int flush_delay;
1990
1991static
1992int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write, struct file * filp,
1993 void __user *buffer, size_t *lenp, loff_t *ppos)
1994{
1995 if (write) {
1996 proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
1997 fib6_run_gc(flush_delay <= 0 ? ~0UL : (unsigned long)flush_delay);
1998 return 0;
1999 } else
2000 return -EINVAL;
2001}
2002
2003ctl_table ipv6_route_table[] = {
2004 {
2005 .ctl_name = NET_IPV6_ROUTE_FLUSH,
2006 .procname = "flush",
2007 .data = &flush_delay,
2008 .maxlen = sizeof(int),
89c8b3a1 2009 .mode = 0200,
1da177e4
LT
2010 .proc_handler = &ipv6_sysctl_rtcache_flush
2011 },
2012 {
2013 .ctl_name = NET_IPV6_ROUTE_GC_THRESH,
2014 .procname = "gc_thresh",
2015 .data = &ip6_dst_ops.gc_thresh,
2016 .maxlen = sizeof(int),
2017 .mode = 0644,
2018 .proc_handler = &proc_dointvec,
2019 },
2020 {
2021 .ctl_name = NET_IPV6_ROUTE_MAX_SIZE,
2022 .procname = "max_size",
2023 .data = &ip6_rt_max_size,
2024 .maxlen = sizeof(int),
2025 .mode = 0644,
2026 .proc_handler = &proc_dointvec,
2027 },
2028 {
2029 .ctl_name = NET_IPV6_ROUTE_GC_MIN_INTERVAL,
2030 .procname = "gc_min_interval",
2031 .data = &ip6_rt_gc_min_interval,
2032 .maxlen = sizeof(int),
2033 .mode = 0644,
2034 .proc_handler = &proc_dointvec_jiffies,
2035 .strategy = &sysctl_jiffies,
2036 },
2037 {
2038 .ctl_name = NET_IPV6_ROUTE_GC_TIMEOUT,
2039 .procname = "gc_timeout",
2040 .data = &ip6_rt_gc_timeout,
2041 .maxlen = sizeof(int),
2042 .mode = 0644,
2043 .proc_handler = &proc_dointvec_jiffies,
2044 .strategy = &sysctl_jiffies,
2045 },
2046 {
2047 .ctl_name = NET_IPV6_ROUTE_GC_INTERVAL,
2048 .procname = "gc_interval",
2049 .data = &ip6_rt_gc_interval,
2050 .maxlen = sizeof(int),
2051 .mode = 0644,
2052 .proc_handler = &proc_dointvec_jiffies,
2053 .strategy = &sysctl_jiffies,
2054 },
2055 {
2056 .ctl_name = NET_IPV6_ROUTE_GC_ELASTICITY,
2057 .procname = "gc_elasticity",
2058 .data = &ip6_rt_gc_elasticity,
2059 .maxlen = sizeof(int),
2060 .mode = 0644,
2061 .proc_handler = &proc_dointvec_jiffies,
2062 .strategy = &sysctl_jiffies,
2063 },
2064 {
2065 .ctl_name = NET_IPV6_ROUTE_MTU_EXPIRES,
2066 .procname = "mtu_expires",
2067 .data = &ip6_rt_mtu_expires,
2068 .maxlen = sizeof(int),
2069 .mode = 0644,
2070 .proc_handler = &proc_dointvec_jiffies,
2071 .strategy = &sysctl_jiffies,
2072 },
2073 {
2074 .ctl_name = NET_IPV6_ROUTE_MIN_ADVMSS,
2075 .procname = "min_adv_mss",
2076 .data = &ip6_rt_min_advmss,
2077 .maxlen = sizeof(int),
2078 .mode = 0644,
2079 .proc_handler = &proc_dointvec_jiffies,
2080 .strategy = &sysctl_jiffies,
2081 },
2082 {
2083 .ctl_name = NET_IPV6_ROUTE_GC_MIN_INTERVAL_MS,
2084 .procname = "gc_min_interval_ms",
2085 .data = &ip6_rt_gc_min_interval,
2086 .maxlen = sizeof(int),
2087 .mode = 0644,
2088 .proc_handler = &proc_dointvec_ms_jiffies,
2089 .strategy = &sysctl_ms_jiffies,
2090 },
2091 { .ctl_name = 0 }
2092};
2093
2094#endif
2095
2096void __init ip6_route_init(void)
2097{
2098 struct proc_dir_entry *p;
2099
2100 ip6_dst_ops.kmem_cachep = kmem_cache_create("ip6_dst_cache",
2101 sizeof(struct rt6_info),
2102 0, SLAB_HWCACHE_ALIGN,
2103 NULL, NULL);
2104 if (!ip6_dst_ops.kmem_cachep)
2105 panic("cannot create ip6_dst_cache");
2106
2107 fib6_init();
2108#ifdef CONFIG_PROC_FS
2109 p = proc_net_create("ipv6_route", 0, rt6_proc_info);
2110 if (p)
2111 p->owner = THIS_MODULE;
2112
2113 proc_net_fops_create("rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2114#endif
2115#ifdef CONFIG_XFRM
2116 xfrm6_init();
2117#endif
2118}
2119
2120void ip6_route_cleanup(void)
2121{
2122#ifdef CONFIG_PROC_FS
2123 proc_net_remove("ipv6_route");
2124 proc_net_remove("rt6_stats");
2125#endif
2126#ifdef CONFIG_XFRM
2127 xfrm6_fini();
2128#endif
2129 rt6_ifdown(NULL);
2130 fib6_gc_cleanup();
2131 kmem_cache_destroy(ip6_dst_ops.kmem_cachep);
2132}