]> bbs.cooldavid.org Git - net-next-2.6.git/blame - net/ipv6/route.c
[IPV6]: ROUTE: Unify two code paths for pmtu disc.
[net-next-2.6.git] / net / ipv6 / route.c
CommitLineData
1da177e4
LT
1/*
2 * Linux INET6 implementation
3 * FIB front-end.
4 *
5 * Authors:
6 * Pedro Roque <roque@di.fc.ul.pt>
7 *
8 * $Id: route.c,v 1.56 2001/10/31 21:55:55 davem Exp $
9 *
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License
12 * as published by the Free Software Foundation; either version
13 * 2 of the License, or (at your option) any later version.
14 */
15
16/* Changes:
17 *
18 * YOSHIFUJI Hideaki @USAGI
19 * reworked default router selection.
20 * - respect outgoing interface
21 * - select from (probably) reachable routers (i.e.
22 * routers in REACHABLE, STALE, DELAY or PROBE states).
23 * - always select the same router if it is (probably)
24 * reachable. otherwise, round-robin the list.
25 */
26
4fc268d2 27#include <linux/capability.h>
1da177e4
LT
28#include <linux/config.h>
29#include <linux/errno.h>
30#include <linux/types.h>
31#include <linux/times.h>
32#include <linux/socket.h>
33#include <linux/sockios.h>
34#include <linux/net.h>
35#include <linux/route.h>
36#include <linux/netdevice.h>
37#include <linux/in6.h>
38#include <linux/init.h>
39#include <linux/netlink.h>
40#include <linux/if_arp.h>
41
42#ifdef CONFIG_PROC_FS
43#include <linux/proc_fs.h>
44#include <linux/seq_file.h>
45#endif
46
47#include <net/snmp.h>
48#include <net/ipv6.h>
49#include <net/ip6_fib.h>
50#include <net/ip6_route.h>
51#include <net/ndisc.h>
52#include <net/addrconf.h>
53#include <net/tcp.h>
54#include <linux/rtnetlink.h>
55#include <net/dst.h>
56#include <net/xfrm.h>
57
58#include <asm/uaccess.h>
59
60#ifdef CONFIG_SYSCTL
61#include <linux/sysctl.h>
62#endif
63
64/* Set to 3 to get tracing. */
65#define RT6_DEBUG 2
66
67#if RT6_DEBUG >= 3
68#define RDBG(x) printk x
69#define RT6_TRACE(x...) printk(KERN_DEBUG x)
70#else
71#define RDBG(x)
72#define RT6_TRACE(x...) do { ; } while (0)
73#endif
74
75
76static int ip6_rt_max_size = 4096;
77static int ip6_rt_gc_min_interval = HZ / 2;
78static int ip6_rt_gc_timeout = 60*HZ;
79int ip6_rt_gc_interval = 30*HZ;
80static int ip6_rt_gc_elasticity = 9;
81static int ip6_rt_mtu_expires = 10*60*HZ;
82static int ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
83
84static struct rt6_info * ip6_rt_copy(struct rt6_info *ort);
85static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
86static struct dst_entry *ip6_negative_advice(struct dst_entry *);
87static void ip6_dst_destroy(struct dst_entry *);
88static void ip6_dst_ifdown(struct dst_entry *,
89 struct net_device *dev, int how);
90static int ip6_dst_gc(void);
91
92static int ip6_pkt_discard(struct sk_buff *skb);
93static int ip6_pkt_discard_out(struct sk_buff *skb);
94static void ip6_link_failure(struct sk_buff *skb);
95static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
96
97static struct dst_ops ip6_dst_ops = {
98 .family = AF_INET6,
99 .protocol = __constant_htons(ETH_P_IPV6),
100 .gc = ip6_dst_gc,
101 .gc_thresh = 1024,
102 .check = ip6_dst_check,
103 .destroy = ip6_dst_destroy,
104 .ifdown = ip6_dst_ifdown,
105 .negative_advice = ip6_negative_advice,
106 .link_failure = ip6_link_failure,
107 .update_pmtu = ip6_rt_update_pmtu,
108 .entry_size = sizeof(struct rt6_info),
109};
110
111struct rt6_info ip6_null_entry = {
112 .u = {
113 .dst = {
114 .__refcnt = ATOMIC_INIT(1),
115 .__use = 1,
116 .dev = &loopback_dev,
117 .obsolete = -1,
118 .error = -ENETUNREACH,
119 .metrics = { [RTAX_HOPLIMIT - 1] = 255, },
120 .input = ip6_pkt_discard,
121 .output = ip6_pkt_discard_out,
122 .ops = &ip6_dst_ops,
123 .path = (struct dst_entry*)&ip6_null_entry,
124 }
125 },
126 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
127 .rt6i_metric = ~(u32) 0,
128 .rt6i_ref = ATOMIC_INIT(1),
129};
130
131struct fib6_node ip6_routing_table = {
132 .leaf = &ip6_null_entry,
133 .fn_flags = RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO,
134};
135
136/* Protects all the ip6 fib */
137
138DEFINE_RWLOCK(rt6_lock);
139
140
141/* allocate dst with ip6_dst_ops */
142static __inline__ struct rt6_info *ip6_dst_alloc(void)
143{
144 return (struct rt6_info *)dst_alloc(&ip6_dst_ops);
145}
146
147static void ip6_dst_destroy(struct dst_entry *dst)
148{
149 struct rt6_info *rt = (struct rt6_info *)dst;
150 struct inet6_dev *idev = rt->rt6i_idev;
151
152 if (idev != NULL) {
153 rt->rt6i_idev = NULL;
154 in6_dev_put(idev);
155 }
156}
157
158static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
159 int how)
160{
161 struct rt6_info *rt = (struct rt6_info *)dst;
162 struct inet6_dev *idev = rt->rt6i_idev;
163
164 if (dev != &loopback_dev && idev != NULL && idev->dev == dev) {
165 struct inet6_dev *loopback_idev = in6_dev_get(&loopback_dev);
166 if (loopback_idev != NULL) {
167 rt->rt6i_idev = loopback_idev;
168 in6_dev_put(idev);
169 }
170 }
171}
172
173static __inline__ int rt6_check_expired(const struct rt6_info *rt)
174{
175 return (rt->rt6i_flags & RTF_EXPIRES &&
176 time_after(jiffies, rt->rt6i_expires));
177}
178
179/*
180 * Route lookup. Any rt6_lock is implied.
181 */
182
183static __inline__ struct rt6_info *rt6_device_match(struct rt6_info *rt,
184 int oif,
185 int strict)
186{
187 struct rt6_info *local = NULL;
188 struct rt6_info *sprt;
189
190 if (oif) {
191 for (sprt = rt; sprt; sprt = sprt->u.next) {
192 struct net_device *dev = sprt->rt6i_dev;
193 if (dev->ifindex == oif)
194 return sprt;
195 if (dev->flags & IFF_LOOPBACK) {
196 if (sprt->rt6i_idev == NULL ||
197 sprt->rt6i_idev->dev->ifindex != oif) {
198 if (strict && oif)
199 continue;
200 if (local && (!oif ||
201 local->rt6i_idev->dev->ifindex == oif))
202 continue;
203 }
204 local = sprt;
205 }
206 }
207
208 if (local)
209 return local;
210
211 if (strict)
212 return &ip6_null_entry;
213 }
214 return rt;
215}
216
217/*
218 * pointer to the last default router chosen. BH is disabled locally.
219 */
220static struct rt6_info *rt6_dflt_pointer;
221static DEFINE_SPINLOCK(rt6_dflt_lock);
222
223void rt6_reset_dflt_pointer(struct rt6_info *rt)
224{
225 spin_lock_bh(&rt6_dflt_lock);
226 if (rt == NULL || rt == rt6_dflt_pointer) {
227 RT6_TRACE("reset default router: %p->NULL\n", rt6_dflt_pointer);
228 rt6_dflt_pointer = NULL;
229 }
230 spin_unlock_bh(&rt6_dflt_lock);
231}
232
233/* Default Router Selection (RFC 2461 6.3.6) */
234static struct rt6_info *rt6_best_dflt(struct rt6_info *rt, int oif)
235{
236 struct rt6_info *match = NULL;
237 struct rt6_info *sprt;
238 int mpri = 0;
239
240 for (sprt = rt; sprt; sprt = sprt->u.next) {
241 struct neighbour *neigh;
242 int m = 0;
243
244 if (!oif ||
245 (sprt->rt6i_dev &&
246 sprt->rt6i_dev->ifindex == oif))
247 m += 8;
248
249 if (rt6_check_expired(sprt))
250 continue;
251
252 if (sprt == rt6_dflt_pointer)
253 m += 4;
254
255 if ((neigh = sprt->rt6i_nexthop) != NULL) {
256 read_lock_bh(&neigh->lock);
257 switch (neigh->nud_state) {
258 case NUD_REACHABLE:
259 m += 3;
260 break;
261
262 case NUD_STALE:
263 case NUD_DELAY:
264 case NUD_PROBE:
265 m += 2;
266 break;
267
268 case NUD_NOARP:
269 case NUD_PERMANENT:
270 m += 1;
271 break;
272
273 case NUD_INCOMPLETE:
274 default:
275 read_unlock_bh(&neigh->lock);
276 continue;
277 }
278 read_unlock_bh(&neigh->lock);
279 } else {
280 continue;
281 }
282
283 if (m > mpri || m >= 12) {
284 match = sprt;
285 mpri = m;
286 if (m >= 12) {
287 /* we choose the last default router if it
288 * is in (probably) reachable state.
289 * If route changed, we should do pmtu
290 * discovery. --yoshfuji
291 */
292 break;
293 }
294 }
295 }
296
297 spin_lock(&rt6_dflt_lock);
298 if (!match) {
299 /*
300 * No default routers are known to be reachable.
301 * SHOULD round robin
302 */
303 if (rt6_dflt_pointer) {
304 for (sprt = rt6_dflt_pointer->u.next;
305 sprt; sprt = sprt->u.next) {
306 if (sprt->u.dst.obsolete <= 0 &&
307 sprt->u.dst.error == 0 &&
308 !rt6_check_expired(sprt)) {
309 match = sprt;
310 break;
311 }
312 }
313 for (sprt = rt;
314 !match && sprt;
315 sprt = sprt->u.next) {
316 if (sprt->u.dst.obsolete <= 0 &&
317 sprt->u.dst.error == 0 &&
318 !rt6_check_expired(sprt)) {
319 match = sprt;
320 break;
321 }
322 if (sprt == rt6_dflt_pointer)
323 break;
324 }
325 }
326 }
327
328 if (match) {
329 if (rt6_dflt_pointer != match)
330 RT6_TRACE("changed default router: %p->%p\n",
331 rt6_dflt_pointer, match);
332 rt6_dflt_pointer = match;
333 }
334 spin_unlock(&rt6_dflt_lock);
335
336 if (!match) {
337 /*
338 * Last Resort: if no default routers found,
339 * use addrconf default route.
340 * We don't record this route.
341 */
342 for (sprt = ip6_routing_table.leaf;
343 sprt; sprt = sprt->u.next) {
344 if (!rt6_check_expired(sprt) &&
345 (sprt->rt6i_flags & RTF_DEFAULT) &&
346 (!oif ||
347 (sprt->rt6i_dev &&
348 sprt->rt6i_dev->ifindex == oif))) {
349 match = sprt;
350 break;
351 }
352 }
353 if (!match) {
354 /* no default route. give up. */
355 match = &ip6_null_entry;
356 }
357 }
358
359 return match;
360}
361
362struct rt6_info *rt6_lookup(struct in6_addr *daddr, struct in6_addr *saddr,
363 int oif, int strict)
364{
365 struct fib6_node *fn;
366 struct rt6_info *rt;
367
368 read_lock_bh(&rt6_lock);
369 fn = fib6_lookup(&ip6_routing_table, daddr, saddr);
370 rt = rt6_device_match(fn->leaf, oif, strict);
371 dst_hold(&rt->u.dst);
372 rt->u.dst.__use++;
373 read_unlock_bh(&rt6_lock);
374
375 rt->u.dst.lastuse = jiffies;
376 if (rt->u.dst.error == 0)
377 return rt;
378 dst_release(&rt->u.dst);
379 return NULL;
380}
381
382/* ip6_ins_rt is called with FREE rt6_lock.
383 It takes new route entry, the addition fails by any reason the
384 route is freed. In any case, if caller does not hold it, it may
385 be destroyed.
386 */
387
0d51aa80
JHS
388int ip6_ins_rt(struct rt6_info *rt, struct nlmsghdr *nlh,
389 void *_rtattr, struct netlink_skb_parms *req)
1da177e4
LT
390{
391 int err;
392
393 write_lock_bh(&rt6_lock);
0d51aa80 394 err = fib6_add(&ip6_routing_table, rt, nlh, _rtattr, req);
1da177e4
LT
395 write_unlock_bh(&rt6_lock);
396
397 return err;
398}
399
400/* No rt6_lock! If COW failed, the function returns dead route entry
401 with dst->error set to errno value.
402 */
403
95a9a5ba
YH
404static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, struct in6_addr *daddr,
405 struct in6_addr *saddr)
1da177e4 406{
1da177e4
LT
407 struct rt6_info *rt;
408
409 /*
410 * Clone the route.
411 */
412
413 rt = ip6_rt_copy(ort);
414
415 if (rt) {
58c4fb86
YH
416 if (!(rt->rt6i_flags&RTF_GATEWAY)) {
417 if (rt->rt6i_dst.plen != 128 &&
418 ipv6_addr_equal(&rt->rt6i_dst.addr, daddr))
419 rt->rt6i_flags |= RTF_ANYCAST;
1da177e4 420 ipv6_addr_copy(&rt->rt6i_gateway, daddr);
58c4fb86 421 }
1da177e4 422
58c4fb86 423 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
1da177e4
LT
424 rt->rt6i_dst.plen = 128;
425 rt->rt6i_flags |= RTF_CACHE;
426 rt->u.dst.flags |= DST_HOST;
427
428#ifdef CONFIG_IPV6_SUBTREES
429 if (rt->rt6i_src.plen && saddr) {
430 ipv6_addr_copy(&rt->rt6i_src.addr, saddr);
431 rt->rt6i_src.plen = 128;
432 }
433#endif
434
435 rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
436
95a9a5ba 437 }
1da177e4 438
95a9a5ba
YH
439 return rt;
440}
1da177e4 441
95a9a5ba
YH
442static struct rt6_info *rt6_cow(struct rt6_info *ort, struct in6_addr *daddr,
443 struct in6_addr *saddr, struct netlink_skb_parms *req)
444{
445 struct rt6_info *rt = rt6_alloc_cow(ort, daddr, saddr);
446 int err;
1da177e4 447
95a9a5ba
YH
448 if (!rt) {
449 dst_hold(&ip6_null_entry.u.dst);
450 return &ip6_null_entry;
1da177e4 451 }
95a9a5ba
YH
452
453 dst_hold(&rt->u.dst);
454
455 err = ip6_ins_rt(rt, NULL, NULL, req);
456 if (err)
457 rt->u.dst.error = err;
458
459 return rt;
1da177e4
LT
460}
461
299d9939
YH
462static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort, struct in6_addr *daddr)
463{
464 struct rt6_info *rt = ip6_rt_copy(ort);
465 if (rt) {
466 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
467 rt->rt6i_dst.plen = 128;
468 rt->rt6i_flags |= RTF_CACHE;
469 if (rt->rt6i_flags & RTF_REJECT)
470 rt->u.dst.error = ort->u.dst.error;
471 rt->u.dst.flags |= DST_HOST;
472 rt->rt6i_nexthop = neigh_clone(ort->rt6i_nexthop);
473 }
474 return rt;
475}
476
1da177e4
LT
477#define BACKTRACK() \
478if (rt == &ip6_null_entry && strict) { \
479 while ((fn = fn->parent) != NULL) { \
480 if (fn->fn_flags & RTN_ROOT) { \
481 dst_hold(&rt->u.dst); \
482 goto out; \
483 } \
484 if (fn->fn_flags & RTN_RTINFO) \
485 goto restart; \
486 } \
487}
488
489
490void ip6_route_input(struct sk_buff *skb)
491{
492 struct fib6_node *fn;
493 struct rt6_info *rt;
494 int strict;
495 int attempts = 3;
496
497 strict = ipv6_addr_type(&skb->nh.ipv6h->daddr) & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL);
498
499relookup:
500 read_lock_bh(&rt6_lock);
501
502 fn = fib6_lookup(&ip6_routing_table, &skb->nh.ipv6h->daddr,
503 &skb->nh.ipv6h->saddr);
504
505restart:
506 rt = fn->leaf;
507
508 if ((rt->rt6i_flags & RTF_CACHE)) {
509 rt = rt6_device_match(rt, skb->dev->ifindex, strict);
510 BACKTRACK();
511 dst_hold(&rt->u.dst);
512 goto out;
513 }
514
9d17f218 515 rt = rt6_device_match(rt, skb->dev->ifindex, strict);
1da177e4
LT
516 BACKTRACK();
517
518 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) {
519 struct rt6_info *nrt;
520 dst_hold(&rt->u.dst);
521 read_unlock_bh(&rt6_lock);
522
523 nrt = rt6_cow(rt, &skb->nh.ipv6h->daddr,
0d51aa80
JHS
524 &skb->nh.ipv6h->saddr,
525 &NETLINK_CB(skb));
1da177e4
LT
526
527 dst_release(&rt->u.dst);
528 rt = nrt;
529
530 if (rt->u.dst.error != -EEXIST || --attempts <= 0)
531 goto out2;
532
533 /* Race condition! In the gap, when rt6_lock was
534 released someone could insert this route. Relookup.
535 */
536 dst_release(&rt->u.dst);
537 goto relookup;
538 }
539 dst_hold(&rt->u.dst);
540
541out:
542 read_unlock_bh(&rt6_lock);
543out2:
544 rt->u.dst.lastuse = jiffies;
545 rt->u.dst.__use++;
546 skb->dst = (struct dst_entry *) rt;
547}
548
549struct dst_entry * ip6_route_output(struct sock *sk, struct flowi *fl)
550{
551 struct fib6_node *fn;
552 struct rt6_info *rt;
553 int strict;
554 int attempts = 3;
555
556 strict = ipv6_addr_type(&fl->fl6_dst) & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL);
557
558relookup:
559 read_lock_bh(&rt6_lock);
560
561 fn = fib6_lookup(&ip6_routing_table, &fl->fl6_dst, &fl->fl6_src);
562
563restart:
564 rt = fn->leaf;
565
566 if ((rt->rt6i_flags & RTF_CACHE)) {
567 rt = rt6_device_match(rt, fl->oif, strict);
568 BACKTRACK();
569 dst_hold(&rt->u.dst);
570 goto out;
571 }
572 if (rt->rt6i_flags & RTF_DEFAULT) {
573 if (rt->rt6i_metric >= IP6_RT_PRIO_ADDRCONF)
574 rt = rt6_best_dflt(rt, fl->oif);
575 } else {
576 rt = rt6_device_match(rt, fl->oif, strict);
577 BACKTRACK();
578 }
579
580 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) {
581 struct rt6_info *nrt;
582 dst_hold(&rt->u.dst);
583 read_unlock_bh(&rt6_lock);
584
0d51aa80 585 nrt = rt6_cow(rt, &fl->fl6_dst, &fl->fl6_src, NULL);
1da177e4
LT
586
587 dst_release(&rt->u.dst);
588 rt = nrt;
589
590 if (rt->u.dst.error != -EEXIST || --attempts <= 0)
591 goto out2;
592
593 /* Race condition! In the gap, when rt6_lock was
594 released someone could insert this route. Relookup.
595 */
596 dst_release(&rt->u.dst);
597 goto relookup;
598 }
599 dst_hold(&rt->u.dst);
600
601out:
602 read_unlock_bh(&rt6_lock);
603out2:
604 rt->u.dst.lastuse = jiffies;
605 rt->u.dst.__use++;
606 return &rt->u.dst;
607}
608
609
610/*
611 * Destination cache support functions
612 */
613
614static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
615{
616 struct rt6_info *rt;
617
618 rt = (struct rt6_info *) dst;
619
620 if (rt && rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie))
621 return dst;
622
623 return NULL;
624}
625
626static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
627{
628 struct rt6_info *rt = (struct rt6_info *) dst;
629
630 if (rt) {
631 if (rt->rt6i_flags & RTF_CACHE)
0d51aa80 632 ip6_del_rt(rt, NULL, NULL, NULL);
1da177e4
LT
633 else
634 dst_release(dst);
635 }
636 return NULL;
637}
638
639static void ip6_link_failure(struct sk_buff *skb)
640{
641 struct rt6_info *rt;
642
643 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0, skb->dev);
644
645 rt = (struct rt6_info *) skb->dst;
646 if (rt) {
647 if (rt->rt6i_flags&RTF_CACHE) {
648 dst_set_expires(&rt->u.dst, 0);
649 rt->rt6i_flags |= RTF_EXPIRES;
650 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
651 rt->rt6i_node->fn_sernum = -1;
652 }
653}
654
655static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
656{
657 struct rt6_info *rt6 = (struct rt6_info*)dst;
658
659 if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
660 rt6->rt6i_flags |= RTF_MODIFIED;
661 if (mtu < IPV6_MIN_MTU) {
662 mtu = IPV6_MIN_MTU;
663 dst->metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
664 }
665 dst->metrics[RTAX_MTU-1] = mtu;
666 }
667}
668
669/* Protected by rt6_lock. */
670static struct dst_entry *ndisc_dst_gc_list;
671static int ipv6_get_mtu(struct net_device *dev);
672
673static inline unsigned int ipv6_advmss(unsigned int mtu)
674{
675 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
676
677 if (mtu < ip6_rt_min_advmss)
678 mtu = ip6_rt_min_advmss;
679
680 /*
681 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
682 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
683 * IPV6_MAXPLEN is also valid and means: "any MSS,
684 * rely only on pmtu discovery"
685 */
686 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
687 mtu = IPV6_MAXPLEN;
688 return mtu;
689}
690
691struct dst_entry *ndisc_dst_alloc(struct net_device *dev,
692 struct neighbour *neigh,
693 struct in6_addr *addr,
694 int (*output)(struct sk_buff *))
695{
696 struct rt6_info *rt;
697 struct inet6_dev *idev = in6_dev_get(dev);
698
699 if (unlikely(idev == NULL))
700 return NULL;
701
702 rt = ip6_dst_alloc();
703 if (unlikely(rt == NULL)) {
704 in6_dev_put(idev);
705 goto out;
706 }
707
708 dev_hold(dev);
709 if (neigh)
710 neigh_hold(neigh);
711 else
712 neigh = ndisc_get_neigh(dev, addr);
713
714 rt->rt6i_dev = dev;
715 rt->rt6i_idev = idev;
716 rt->rt6i_nexthop = neigh;
717 atomic_set(&rt->u.dst.__refcnt, 1);
718 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = 255;
719 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
720 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
721 rt->u.dst.output = output;
722
723#if 0 /* there's no chance to use these for ndisc */
724 rt->u.dst.flags = ipv6_addr_type(addr) & IPV6_ADDR_UNICAST
725 ? DST_HOST
726 : 0;
727 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
728 rt->rt6i_dst.plen = 128;
729#endif
730
731 write_lock_bh(&rt6_lock);
732 rt->u.dst.next = ndisc_dst_gc_list;
733 ndisc_dst_gc_list = &rt->u.dst;
734 write_unlock_bh(&rt6_lock);
735
736 fib6_force_start_gc();
737
738out:
739 return (struct dst_entry *)rt;
740}
741
742int ndisc_dst_gc(int *more)
743{
744 struct dst_entry *dst, *next, **pprev;
745 int freed;
746
747 next = NULL;
748 pprev = &ndisc_dst_gc_list;
749 freed = 0;
750 while ((dst = *pprev) != NULL) {
751 if (!atomic_read(&dst->__refcnt)) {
752 *pprev = dst->next;
753 dst_free(dst);
754 freed++;
755 } else {
756 pprev = &dst->next;
757 (*more)++;
758 }
759 }
760
761 return freed;
762}
763
764static int ip6_dst_gc(void)
765{
766 static unsigned expire = 30*HZ;
767 static unsigned long last_gc;
768 unsigned long now = jiffies;
769
770 if (time_after(last_gc + ip6_rt_gc_min_interval, now) &&
771 atomic_read(&ip6_dst_ops.entries) <= ip6_rt_max_size)
772 goto out;
773
774 expire++;
775 fib6_run_gc(expire);
776 last_gc = now;
777 if (atomic_read(&ip6_dst_ops.entries) < ip6_dst_ops.gc_thresh)
778 expire = ip6_rt_gc_timeout>>1;
779
780out:
781 expire -= expire>>ip6_rt_gc_elasticity;
782 return (atomic_read(&ip6_dst_ops.entries) > ip6_rt_max_size);
783}
784
785/* Clean host part of a prefix. Not necessary in radix tree,
786 but results in cleaner routing tables.
787
788 Remove it only when all the things will work!
789 */
790
791static int ipv6_get_mtu(struct net_device *dev)
792{
793 int mtu = IPV6_MIN_MTU;
794 struct inet6_dev *idev;
795
796 idev = in6_dev_get(dev);
797 if (idev) {
798 mtu = idev->cnf.mtu6;
799 in6_dev_put(idev);
800 }
801 return mtu;
802}
803
804int ipv6_get_hoplimit(struct net_device *dev)
805{
806 int hoplimit = ipv6_devconf.hop_limit;
807 struct inet6_dev *idev;
808
809 idev = in6_dev_get(dev);
810 if (idev) {
811 hoplimit = idev->cnf.hop_limit;
812 in6_dev_put(idev);
813 }
814 return hoplimit;
815}
816
817/*
818 *
819 */
820
0d51aa80
JHS
821int ip6_route_add(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh,
822 void *_rtattr, struct netlink_skb_parms *req)
1da177e4
LT
823{
824 int err;
825 struct rtmsg *r;
826 struct rtattr **rta;
827 struct rt6_info *rt = NULL;
828 struct net_device *dev = NULL;
829 struct inet6_dev *idev = NULL;
830 int addr_type;
831
832 rta = (struct rtattr **) _rtattr;
833
834 if (rtmsg->rtmsg_dst_len > 128 || rtmsg->rtmsg_src_len > 128)
835 return -EINVAL;
836#ifndef CONFIG_IPV6_SUBTREES
837 if (rtmsg->rtmsg_src_len)
838 return -EINVAL;
839#endif
840 if (rtmsg->rtmsg_ifindex) {
841 err = -ENODEV;
842 dev = dev_get_by_index(rtmsg->rtmsg_ifindex);
843 if (!dev)
844 goto out;
845 idev = in6_dev_get(dev);
846 if (!idev)
847 goto out;
848 }
849
850 if (rtmsg->rtmsg_metric == 0)
851 rtmsg->rtmsg_metric = IP6_RT_PRIO_USER;
852
853 rt = ip6_dst_alloc();
854
855 if (rt == NULL) {
856 err = -ENOMEM;
857 goto out;
858 }
859
860 rt->u.dst.obsolete = -1;
3dd4bc68 861 rt->rt6i_expires = jiffies + clock_t_to_jiffies(rtmsg->rtmsg_info);
1da177e4
LT
862 if (nlh && (r = NLMSG_DATA(nlh))) {
863 rt->rt6i_protocol = r->rtm_protocol;
864 } else {
865 rt->rt6i_protocol = RTPROT_BOOT;
866 }
867
868 addr_type = ipv6_addr_type(&rtmsg->rtmsg_dst);
869
870 if (addr_type & IPV6_ADDR_MULTICAST)
871 rt->u.dst.input = ip6_mc_input;
872 else
873 rt->u.dst.input = ip6_forward;
874
875 rt->u.dst.output = ip6_output;
876
877 ipv6_addr_prefix(&rt->rt6i_dst.addr,
878 &rtmsg->rtmsg_dst, rtmsg->rtmsg_dst_len);
879 rt->rt6i_dst.plen = rtmsg->rtmsg_dst_len;
880 if (rt->rt6i_dst.plen == 128)
881 rt->u.dst.flags = DST_HOST;
882
883#ifdef CONFIG_IPV6_SUBTREES
884 ipv6_addr_prefix(&rt->rt6i_src.addr,
885 &rtmsg->rtmsg_src, rtmsg->rtmsg_src_len);
886 rt->rt6i_src.plen = rtmsg->rtmsg_src_len;
887#endif
888
889 rt->rt6i_metric = rtmsg->rtmsg_metric;
890
891 /* We cannot add true routes via loopback here,
892 they would result in kernel looping; promote them to reject routes
893 */
894 if ((rtmsg->rtmsg_flags&RTF_REJECT) ||
895 (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK))) {
896 /* hold loopback dev/idev if we haven't done so. */
897 if (dev != &loopback_dev) {
898 if (dev) {
899 dev_put(dev);
900 in6_dev_put(idev);
901 }
902 dev = &loopback_dev;
903 dev_hold(dev);
904 idev = in6_dev_get(dev);
905 if (!idev) {
906 err = -ENODEV;
907 goto out;
908 }
909 }
910 rt->u.dst.output = ip6_pkt_discard_out;
911 rt->u.dst.input = ip6_pkt_discard;
912 rt->u.dst.error = -ENETUNREACH;
913 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
914 goto install_route;
915 }
916
917 if (rtmsg->rtmsg_flags & RTF_GATEWAY) {
918 struct in6_addr *gw_addr;
919 int gwa_type;
920
921 gw_addr = &rtmsg->rtmsg_gateway;
922 ipv6_addr_copy(&rt->rt6i_gateway, &rtmsg->rtmsg_gateway);
923 gwa_type = ipv6_addr_type(gw_addr);
924
925 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
926 struct rt6_info *grt;
927
928 /* IPv6 strictly inhibits using not link-local
929 addresses as nexthop address.
930 Otherwise, router will not able to send redirects.
931 It is very good, but in some (rare!) circumstances
932 (SIT, PtP, NBMA NOARP links) it is handy to allow
933 some exceptions. --ANK
934 */
935 err = -EINVAL;
936 if (!(gwa_type&IPV6_ADDR_UNICAST))
937 goto out;
938
939 grt = rt6_lookup(gw_addr, NULL, rtmsg->rtmsg_ifindex, 1);
940
941 err = -EHOSTUNREACH;
942 if (grt == NULL)
943 goto out;
944 if (dev) {
945 if (dev != grt->rt6i_dev) {
946 dst_release(&grt->u.dst);
947 goto out;
948 }
949 } else {
950 dev = grt->rt6i_dev;
951 idev = grt->rt6i_idev;
952 dev_hold(dev);
953 in6_dev_hold(grt->rt6i_idev);
954 }
955 if (!(grt->rt6i_flags&RTF_GATEWAY))
956 err = 0;
957 dst_release(&grt->u.dst);
958
959 if (err)
960 goto out;
961 }
962 err = -EINVAL;
963 if (dev == NULL || (dev->flags&IFF_LOOPBACK))
964 goto out;
965 }
966
967 err = -ENODEV;
968 if (dev == NULL)
969 goto out;
970
971 if (rtmsg->rtmsg_flags & (RTF_GATEWAY|RTF_NONEXTHOP)) {
972 rt->rt6i_nexthop = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev);
973 if (IS_ERR(rt->rt6i_nexthop)) {
974 err = PTR_ERR(rt->rt6i_nexthop);
975 rt->rt6i_nexthop = NULL;
976 goto out;
977 }
978 }
979
980 rt->rt6i_flags = rtmsg->rtmsg_flags;
981
982install_route:
983 if (rta && rta[RTA_METRICS-1]) {
984 int attrlen = RTA_PAYLOAD(rta[RTA_METRICS-1]);
985 struct rtattr *attr = RTA_DATA(rta[RTA_METRICS-1]);
986
987 while (RTA_OK(attr, attrlen)) {
988 unsigned flavor = attr->rta_type;
989 if (flavor) {
990 if (flavor > RTAX_MAX) {
991 err = -EINVAL;
992 goto out;
993 }
994 rt->u.dst.metrics[flavor-1] =
995 *(u32 *)RTA_DATA(attr);
996 }
997 attr = RTA_NEXT(attr, attrlen);
998 }
999 }
1000
1001 if (rt->u.dst.metrics[RTAX_HOPLIMIT-1] == 0)
1002 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
1003 if (!rt->u.dst.metrics[RTAX_MTU-1])
1004 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(dev);
1005 if (!rt->u.dst.metrics[RTAX_ADVMSS-1])
1006 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
1007 rt->u.dst.dev = dev;
1008 rt->rt6i_idev = idev;
0d51aa80 1009 return ip6_ins_rt(rt, nlh, _rtattr, req);
1da177e4
LT
1010
1011out:
1012 if (dev)
1013 dev_put(dev);
1014 if (idev)
1015 in6_dev_put(idev);
1016 if (rt)
1017 dst_free((struct dst_entry *) rt);
1018 return err;
1019}
1020
0d51aa80 1021int ip6_del_rt(struct rt6_info *rt, struct nlmsghdr *nlh, void *_rtattr, struct netlink_skb_parms *req)
1da177e4
LT
1022{
1023 int err;
1024
1025 write_lock_bh(&rt6_lock);
1026
1027 rt6_reset_dflt_pointer(NULL);
1028
0d51aa80 1029 err = fib6_del(rt, nlh, _rtattr, req);
1da177e4
LT
1030 dst_release(&rt->u.dst);
1031
1032 write_unlock_bh(&rt6_lock);
1033
1034 return err;
1035}
1036
0d51aa80 1037static int ip6_route_del(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh, void *_rtattr, struct netlink_skb_parms *req)
1da177e4
LT
1038{
1039 struct fib6_node *fn;
1040 struct rt6_info *rt;
1041 int err = -ESRCH;
1042
1043 read_lock_bh(&rt6_lock);
1044
1045 fn = fib6_locate(&ip6_routing_table,
1046 &rtmsg->rtmsg_dst, rtmsg->rtmsg_dst_len,
1047 &rtmsg->rtmsg_src, rtmsg->rtmsg_src_len);
1048
1049 if (fn) {
1050 for (rt = fn->leaf; rt; rt = rt->u.next) {
1051 if (rtmsg->rtmsg_ifindex &&
1052 (rt->rt6i_dev == NULL ||
1053 rt->rt6i_dev->ifindex != rtmsg->rtmsg_ifindex))
1054 continue;
1055 if (rtmsg->rtmsg_flags&RTF_GATEWAY &&
1056 !ipv6_addr_equal(&rtmsg->rtmsg_gateway, &rt->rt6i_gateway))
1057 continue;
1058 if (rtmsg->rtmsg_metric &&
1059 rtmsg->rtmsg_metric != rt->rt6i_metric)
1060 continue;
1061 dst_hold(&rt->u.dst);
1062 read_unlock_bh(&rt6_lock);
1063
0d51aa80 1064 return ip6_del_rt(rt, nlh, _rtattr, req);
1da177e4
LT
1065 }
1066 }
1067 read_unlock_bh(&rt6_lock);
1068
1069 return err;
1070}
1071
1072/*
1073 * Handle redirects
1074 */
1075void rt6_redirect(struct in6_addr *dest, struct in6_addr *saddr,
1076 struct neighbour *neigh, u8 *lladdr, int on_link)
1077{
1078 struct rt6_info *rt, *nrt;
1079
1080 /* Locate old route to this destination. */
1081 rt = rt6_lookup(dest, NULL, neigh->dev->ifindex, 1);
1082
1083 if (rt == NULL)
1084 return;
1085
1086 if (neigh->dev != rt->rt6i_dev)
1087 goto out;
1088
1089 /*
1090 * Current route is on-link; redirect is always invalid.
1091 *
1092 * Seems, previous statement is not true. It could
1093 * be node, which looks for us as on-link (f.e. proxy ndisc)
1094 * But then router serving it might decide, that we should
1095 * know truth 8)8) --ANK (980726).
1096 */
1097 if (!(rt->rt6i_flags&RTF_GATEWAY))
1098 goto out;
1099
1100 /*
1101 * RFC 2461 specifies that redirects should only be
1102 * accepted if they come from the nexthop to the target.
1103 * Due to the way default routers are chosen, this notion
1104 * is a bit fuzzy and one might need to check all default
1105 * routers.
1106 */
1107 if (!ipv6_addr_equal(saddr, &rt->rt6i_gateway)) {
1108 if (rt->rt6i_flags & RTF_DEFAULT) {
1109 struct rt6_info *rt1;
1110
1111 read_lock(&rt6_lock);
1112 for (rt1 = ip6_routing_table.leaf; rt1; rt1 = rt1->u.next) {
1113 if (ipv6_addr_equal(saddr, &rt1->rt6i_gateway)) {
1114 dst_hold(&rt1->u.dst);
1115 dst_release(&rt->u.dst);
1116 read_unlock(&rt6_lock);
1117 rt = rt1;
1118 goto source_ok;
1119 }
1120 }
1121 read_unlock(&rt6_lock);
1122 }
1123 if (net_ratelimit())
1124 printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
1125 "for redirect target\n");
1126 goto out;
1127 }
1128
1129source_ok:
1130
1131 /*
1132 * We have finally decided to accept it.
1133 */
1134
1135 neigh_update(neigh, lladdr, NUD_STALE,
1136 NEIGH_UPDATE_F_WEAK_OVERRIDE|
1137 NEIGH_UPDATE_F_OVERRIDE|
1138 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1139 NEIGH_UPDATE_F_ISROUTER))
1140 );
1141
1142 /*
1143 * Redirect received -> path was valid.
1144 * Look, redirects are sent only in response to data packets,
1145 * so that this nexthop apparently is reachable. --ANK
1146 */
1147 dst_confirm(&rt->u.dst);
1148
1149 /* Duplicate redirect: silently ignore. */
1150 if (neigh == rt->u.dst.neighbour)
1151 goto out;
1152
1153 nrt = ip6_rt_copy(rt);
1154 if (nrt == NULL)
1155 goto out;
1156
1157 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1158 if (on_link)
1159 nrt->rt6i_flags &= ~RTF_GATEWAY;
1160
1161 ipv6_addr_copy(&nrt->rt6i_dst.addr, dest);
1162 nrt->rt6i_dst.plen = 128;
1163 nrt->u.dst.flags |= DST_HOST;
1164
1165 ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key);
1166 nrt->rt6i_nexthop = neigh_clone(neigh);
1167 /* Reset pmtu, it may be better */
1168 nrt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(neigh->dev);
1169 nrt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&nrt->u.dst));
1170
0d51aa80 1171 if (ip6_ins_rt(nrt, NULL, NULL, NULL))
1da177e4
LT
1172 goto out;
1173
1174 if (rt->rt6i_flags&RTF_CACHE) {
0d51aa80 1175 ip6_del_rt(rt, NULL, NULL, NULL);
1da177e4
LT
1176 return;
1177 }
1178
1179out:
1180 dst_release(&rt->u.dst);
1181 return;
1182}
1183
1184/*
1185 * Handle ICMP "packet too big" messages
1186 * i.e. Path MTU discovery
1187 */
1188
1189void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr,
1190 struct net_device *dev, u32 pmtu)
1191{
1192 struct rt6_info *rt, *nrt;
1193 int allfrag = 0;
1194
1195 rt = rt6_lookup(daddr, saddr, dev->ifindex, 0);
1196 if (rt == NULL)
1197 return;
1198
1199 if (pmtu >= dst_mtu(&rt->u.dst))
1200 goto out;
1201
1202 if (pmtu < IPV6_MIN_MTU) {
1203 /*
1204 * According to RFC2460, PMTU is set to the IPv6 Minimum Link
1205 * MTU (1280) and a fragment header should always be included
1206 * after a node receiving Too Big message reporting PMTU is
1207 * less than the IPv6 Minimum Link MTU.
1208 */
1209 pmtu = IPV6_MIN_MTU;
1210 allfrag = 1;
1211 }
1212
1213 /* New mtu received -> path was valid.
1214 They are sent only in response to data packets,
1215 so that this nexthop apparently is reachable. --ANK
1216 */
1217 dst_confirm(&rt->u.dst);
1218
1219 /* Host route. If it is static, it would be better
1220 not to override it, but add new one, so that
1221 when cache entry will expire old pmtu
1222 would return automatically.
1223 */
1224 if (rt->rt6i_flags & RTF_CACHE) {
1225 rt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1226 if (allfrag)
1227 rt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1228 dst_set_expires(&rt->u.dst, ip6_rt_mtu_expires);
1229 rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
1230 goto out;
1231 }
1232
1233 /* Network route.
1234 Two cases are possible:
1235 1. It is connected route. Action: COW
1236 2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1237 */
d5315b50 1238 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
a1e78363 1239 nrt = rt6_alloc_cow(rt, daddr, saddr);
d5315b50
YH
1240 else
1241 nrt = rt6_alloc_clone(rt, daddr);
a1e78363 1242
d5315b50 1243 if (nrt) {
a1e78363
YH
1244 nrt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1245 if (allfrag)
1246 nrt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1247
1248 /* According to RFC 1981, detecting PMTU increase shouldn't be
1249 * happened within 5 mins, the recommended timer is 10 mins.
1250 * Here this route expiration time is set to ip6_rt_mtu_expires
1251 * which is 10 mins. After 10 mins the decreased pmtu is expired
1252 * and detecting PMTU increase will be automatically happened.
1253 */
1254 dst_set_expires(&nrt->u.dst, ip6_rt_mtu_expires);
1255 nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
1256
1257 ip6_ins_rt(nrt, NULL, NULL, NULL);
1da177e4 1258 }
1da177e4
LT
1259out:
1260 dst_release(&rt->u.dst);
1261}
1262
1263/*
1264 * Misc support functions
1265 */
1266
1267static struct rt6_info * ip6_rt_copy(struct rt6_info *ort)
1268{
1269 struct rt6_info *rt = ip6_dst_alloc();
1270
1271 if (rt) {
1272 rt->u.dst.input = ort->u.dst.input;
1273 rt->u.dst.output = ort->u.dst.output;
1274
1275 memcpy(rt->u.dst.metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32));
1276 rt->u.dst.dev = ort->u.dst.dev;
1277 if (rt->u.dst.dev)
1278 dev_hold(rt->u.dst.dev);
1279 rt->rt6i_idev = ort->rt6i_idev;
1280 if (rt->rt6i_idev)
1281 in6_dev_hold(rt->rt6i_idev);
1282 rt->u.dst.lastuse = jiffies;
1283 rt->rt6i_expires = 0;
1284
1285 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
1286 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
1287 rt->rt6i_metric = 0;
1288
1289 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1290#ifdef CONFIG_IPV6_SUBTREES
1291 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1292#endif
1293 }
1294 return rt;
1295}
1296
1297struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *dev)
1298{
1299 struct rt6_info *rt;
1300 struct fib6_node *fn;
1301
1302 fn = &ip6_routing_table;
1303
1304 write_lock_bh(&rt6_lock);
1305 for (rt = fn->leaf; rt; rt=rt->u.next) {
1306 if (dev == rt->rt6i_dev &&
1307 ipv6_addr_equal(&rt->rt6i_gateway, addr))
1308 break;
1309 }
1310 if (rt)
1311 dst_hold(&rt->u.dst);
1312 write_unlock_bh(&rt6_lock);
1313 return rt;
1314}
1315
1316struct rt6_info *rt6_add_dflt_router(struct in6_addr *gwaddr,
1317 struct net_device *dev)
1318{
1319 struct in6_rtmsg rtmsg;
1320
1321 memset(&rtmsg, 0, sizeof(struct in6_rtmsg));
1322 rtmsg.rtmsg_type = RTMSG_NEWROUTE;
1323 ipv6_addr_copy(&rtmsg.rtmsg_gateway, gwaddr);
1324 rtmsg.rtmsg_metric = 1024;
1325 rtmsg.rtmsg_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT | RTF_UP | RTF_EXPIRES;
1326
1327 rtmsg.rtmsg_ifindex = dev->ifindex;
1328
0d51aa80 1329 ip6_route_add(&rtmsg, NULL, NULL, NULL);
1da177e4
LT
1330 return rt6_get_dflt_router(gwaddr, dev);
1331}
1332
1333void rt6_purge_dflt_routers(void)
1334{
1335 struct rt6_info *rt;
1336
1337restart:
1338 read_lock_bh(&rt6_lock);
1339 for (rt = ip6_routing_table.leaf; rt; rt = rt->u.next) {
1340 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
1341 dst_hold(&rt->u.dst);
1342
1343 rt6_reset_dflt_pointer(NULL);
1344
1345 read_unlock_bh(&rt6_lock);
1346
0d51aa80 1347 ip6_del_rt(rt, NULL, NULL, NULL);
1da177e4
LT
1348
1349 goto restart;
1350 }
1351 }
1352 read_unlock_bh(&rt6_lock);
1353}
1354
1355int ipv6_route_ioctl(unsigned int cmd, void __user *arg)
1356{
1357 struct in6_rtmsg rtmsg;
1358 int err;
1359
1360 switch(cmd) {
1361 case SIOCADDRT: /* Add a route */
1362 case SIOCDELRT: /* Delete a route */
1363 if (!capable(CAP_NET_ADMIN))
1364 return -EPERM;
1365 err = copy_from_user(&rtmsg, arg,
1366 sizeof(struct in6_rtmsg));
1367 if (err)
1368 return -EFAULT;
1369
1370 rtnl_lock();
1371 switch (cmd) {
1372 case SIOCADDRT:
0d51aa80 1373 err = ip6_route_add(&rtmsg, NULL, NULL, NULL);
1da177e4
LT
1374 break;
1375 case SIOCDELRT:
0d51aa80 1376 err = ip6_route_del(&rtmsg, NULL, NULL, NULL);
1da177e4
LT
1377 break;
1378 default:
1379 err = -EINVAL;
1380 }
1381 rtnl_unlock();
1382
1383 return err;
1384 };
1385
1386 return -EINVAL;
1387}
1388
1389/*
1390 * Drop the packet on the floor
1391 */
1392
20380731 1393static int ip6_pkt_discard(struct sk_buff *skb)
1da177e4
LT
1394{
1395 IP6_INC_STATS(IPSTATS_MIB_OUTNOROUTES);
1396 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_NOROUTE, 0, skb->dev);
1397 kfree_skb(skb);
1398 return 0;
1399}
1400
20380731 1401static int ip6_pkt_discard_out(struct sk_buff *skb)
1da177e4
LT
1402{
1403 skb->dev = skb->dst->dev;
1404 return ip6_pkt_discard(skb);
1405}
1406
1407/*
1408 * Allocate a dst for local (unicast / anycast) address.
1409 */
1410
1411struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
1412 const struct in6_addr *addr,
1413 int anycast)
1414{
1415 struct rt6_info *rt = ip6_dst_alloc();
1416
1417 if (rt == NULL)
1418 return ERR_PTR(-ENOMEM);
1419
1420 dev_hold(&loopback_dev);
1421 in6_dev_hold(idev);
1422
1423 rt->u.dst.flags = DST_HOST;
1424 rt->u.dst.input = ip6_input;
1425 rt->u.dst.output = ip6_output;
1426 rt->rt6i_dev = &loopback_dev;
1427 rt->rt6i_idev = idev;
1428 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
1429 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
1430 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
1431 rt->u.dst.obsolete = -1;
1432
1433 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
58c4fb86
YH
1434 if (anycast)
1435 rt->rt6i_flags |= RTF_ANYCAST;
1436 else
1da177e4
LT
1437 rt->rt6i_flags |= RTF_LOCAL;
1438 rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
1439 if (rt->rt6i_nexthop == NULL) {
1440 dst_free((struct dst_entry *) rt);
1441 return ERR_PTR(-ENOMEM);
1442 }
1443
1444 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
1445 rt->rt6i_dst.plen = 128;
1446
1447 atomic_set(&rt->u.dst.__refcnt, 1);
1448
1449 return rt;
1450}
1451
1452static int fib6_ifdown(struct rt6_info *rt, void *arg)
1453{
1454 if (((void*)rt->rt6i_dev == arg || arg == NULL) &&
1455 rt != &ip6_null_entry) {
1456 RT6_TRACE("deleted by ifdown %p\n", rt);
1457 return -1;
1458 }
1459 return 0;
1460}
1461
1462void rt6_ifdown(struct net_device *dev)
1463{
1464 write_lock_bh(&rt6_lock);
1465 fib6_clean_tree(&ip6_routing_table, fib6_ifdown, 0, dev);
1466 write_unlock_bh(&rt6_lock);
1467}
1468
1469struct rt6_mtu_change_arg
1470{
1471 struct net_device *dev;
1472 unsigned mtu;
1473};
1474
1475static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
1476{
1477 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
1478 struct inet6_dev *idev;
1479
1480 /* In IPv6 pmtu discovery is not optional,
1481 so that RTAX_MTU lock cannot disable it.
1482 We still use this lock to block changes
1483 caused by addrconf/ndisc.
1484 */
1485
1486 idev = __in6_dev_get(arg->dev);
1487 if (idev == NULL)
1488 return 0;
1489
1490 /* For administrative MTU increase, there is no way to discover
1491 IPv6 PMTU increase, so PMTU increase should be updated here.
1492 Since RFC 1981 doesn't include administrative MTU increase
1493 update PMTU increase is a MUST. (i.e. jumbo frame)
1494 */
1495 /*
1496 If new MTU is less than route PMTU, this new MTU will be the
1497 lowest MTU in the path, update the route PMTU to reflect PMTU
1498 decreases; if new MTU is greater than route PMTU, and the
1499 old MTU is the lowest MTU in the path, update the route PMTU
1500 to reflect the increase. In this case if the other nodes' MTU
1501 also have the lowest MTU, TOO BIG MESSAGE will be lead to
1502 PMTU discouvery.
1503 */
1504 if (rt->rt6i_dev == arg->dev &&
1505 !dst_metric_locked(&rt->u.dst, RTAX_MTU) &&
1506 (dst_mtu(&rt->u.dst) > arg->mtu ||
1507 (dst_mtu(&rt->u.dst) < arg->mtu &&
1508 dst_mtu(&rt->u.dst) == idev->cnf.mtu6)))
1509 rt->u.dst.metrics[RTAX_MTU-1] = arg->mtu;
1510 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(arg->mtu);
1511 return 0;
1512}
1513
1514void rt6_mtu_change(struct net_device *dev, unsigned mtu)
1515{
1516 struct rt6_mtu_change_arg arg;
1517
1518 arg.dev = dev;
1519 arg.mtu = mtu;
1520 read_lock_bh(&rt6_lock);
1521 fib6_clean_tree(&ip6_routing_table, rt6_mtu_change_route, 0, &arg);
1522 read_unlock_bh(&rt6_lock);
1523}
1524
1525static int inet6_rtm_to_rtmsg(struct rtmsg *r, struct rtattr **rta,
1526 struct in6_rtmsg *rtmsg)
1527{
1528 memset(rtmsg, 0, sizeof(*rtmsg));
1529
1530 rtmsg->rtmsg_dst_len = r->rtm_dst_len;
1531 rtmsg->rtmsg_src_len = r->rtm_src_len;
1532 rtmsg->rtmsg_flags = RTF_UP;
1533 if (r->rtm_type == RTN_UNREACHABLE)
1534 rtmsg->rtmsg_flags |= RTF_REJECT;
1535
1536 if (rta[RTA_GATEWAY-1]) {
1537 if (rta[RTA_GATEWAY-1]->rta_len != RTA_LENGTH(16))
1538 return -EINVAL;
1539 memcpy(&rtmsg->rtmsg_gateway, RTA_DATA(rta[RTA_GATEWAY-1]), 16);
1540 rtmsg->rtmsg_flags |= RTF_GATEWAY;
1541 }
1542 if (rta[RTA_DST-1]) {
1543 if (RTA_PAYLOAD(rta[RTA_DST-1]) < ((r->rtm_dst_len+7)>>3))
1544 return -EINVAL;
1545 memcpy(&rtmsg->rtmsg_dst, RTA_DATA(rta[RTA_DST-1]), ((r->rtm_dst_len+7)>>3));
1546 }
1547 if (rta[RTA_SRC-1]) {
1548 if (RTA_PAYLOAD(rta[RTA_SRC-1]) < ((r->rtm_src_len+7)>>3))
1549 return -EINVAL;
1550 memcpy(&rtmsg->rtmsg_src, RTA_DATA(rta[RTA_SRC-1]), ((r->rtm_src_len+7)>>3));
1551 }
1552 if (rta[RTA_OIF-1]) {
1553 if (rta[RTA_OIF-1]->rta_len != RTA_LENGTH(sizeof(int)))
1554 return -EINVAL;
1555 memcpy(&rtmsg->rtmsg_ifindex, RTA_DATA(rta[RTA_OIF-1]), sizeof(int));
1556 }
1557 if (rta[RTA_PRIORITY-1]) {
1558 if (rta[RTA_PRIORITY-1]->rta_len != RTA_LENGTH(4))
1559 return -EINVAL;
1560 memcpy(&rtmsg->rtmsg_metric, RTA_DATA(rta[RTA_PRIORITY-1]), 4);
1561 }
1562 return 0;
1563}
1564
1565int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1566{
1567 struct rtmsg *r = NLMSG_DATA(nlh);
1568 struct in6_rtmsg rtmsg;
1569
1570 if (inet6_rtm_to_rtmsg(r, arg, &rtmsg))
1571 return -EINVAL;
0d51aa80 1572 return ip6_route_del(&rtmsg, nlh, arg, &NETLINK_CB(skb));
1da177e4
LT
1573}
1574
1575int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1576{
1577 struct rtmsg *r = NLMSG_DATA(nlh);
1578 struct in6_rtmsg rtmsg;
1579
1580 if (inet6_rtm_to_rtmsg(r, arg, &rtmsg))
1581 return -EINVAL;
0d51aa80 1582 return ip6_route_add(&rtmsg, nlh, arg, &NETLINK_CB(skb));
1da177e4
LT
1583}
1584
1585struct rt6_rtnl_dump_arg
1586{
1587 struct sk_buff *skb;
1588 struct netlink_callback *cb;
1589};
1590
1591static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt,
0d51aa80
JHS
1592 struct in6_addr *dst, struct in6_addr *src,
1593 int iif, int type, u32 pid, u32 seq,
1594 int prefix, unsigned int flags)
1da177e4
LT
1595{
1596 struct rtmsg *rtm;
1597 struct nlmsghdr *nlh;
1598 unsigned char *b = skb->tail;
1599 struct rta_cacheinfo ci;
1600
1601 if (prefix) { /* user wants prefix routes only */
1602 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
1603 /* success since this is not a prefix route */
1604 return 1;
1605 }
1606 }
1607
b6544c0b 1608 nlh = NLMSG_NEW(skb, pid, seq, type, sizeof(*rtm), flags);
1da177e4
LT
1609 rtm = NLMSG_DATA(nlh);
1610 rtm->rtm_family = AF_INET6;
1611 rtm->rtm_dst_len = rt->rt6i_dst.plen;
1612 rtm->rtm_src_len = rt->rt6i_src.plen;
1613 rtm->rtm_tos = 0;
1614 rtm->rtm_table = RT_TABLE_MAIN;
1615 if (rt->rt6i_flags&RTF_REJECT)
1616 rtm->rtm_type = RTN_UNREACHABLE;
1617 else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK))
1618 rtm->rtm_type = RTN_LOCAL;
1619 else
1620 rtm->rtm_type = RTN_UNICAST;
1621 rtm->rtm_flags = 0;
1622 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
1623 rtm->rtm_protocol = rt->rt6i_protocol;
1624 if (rt->rt6i_flags&RTF_DYNAMIC)
1625 rtm->rtm_protocol = RTPROT_REDIRECT;
1626 else if (rt->rt6i_flags & RTF_ADDRCONF)
1627 rtm->rtm_protocol = RTPROT_KERNEL;
1628 else if (rt->rt6i_flags&RTF_DEFAULT)
1629 rtm->rtm_protocol = RTPROT_RA;
1630
1631 if (rt->rt6i_flags&RTF_CACHE)
1632 rtm->rtm_flags |= RTM_F_CLONED;
1633
1634 if (dst) {
1635 RTA_PUT(skb, RTA_DST, 16, dst);
1636 rtm->rtm_dst_len = 128;
1637 } else if (rtm->rtm_dst_len)
1638 RTA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr);
1639#ifdef CONFIG_IPV6_SUBTREES
1640 if (src) {
1641 RTA_PUT(skb, RTA_SRC, 16, src);
1642 rtm->rtm_src_len = 128;
1643 } else if (rtm->rtm_src_len)
1644 RTA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr);
1645#endif
1646 if (iif)
1647 RTA_PUT(skb, RTA_IIF, 4, &iif);
1648 else if (dst) {
1649 struct in6_addr saddr_buf;
1650 if (ipv6_get_saddr(&rt->u.dst, dst, &saddr_buf) == 0)
1651 RTA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
1652 }
1653 if (rtnetlink_put_metrics(skb, rt->u.dst.metrics) < 0)
1654 goto rtattr_failure;
1655 if (rt->u.dst.neighbour)
1656 RTA_PUT(skb, RTA_GATEWAY, 16, &rt->u.dst.neighbour->primary_key);
1657 if (rt->u.dst.dev)
1658 RTA_PUT(skb, RTA_OIF, sizeof(int), &rt->rt6i_dev->ifindex);
1659 RTA_PUT(skb, RTA_PRIORITY, 4, &rt->rt6i_metric);
1660 ci.rta_lastuse = jiffies_to_clock_t(jiffies - rt->u.dst.lastuse);
1661 if (rt->rt6i_expires)
1662 ci.rta_expires = jiffies_to_clock_t(rt->rt6i_expires - jiffies);
1663 else
1664 ci.rta_expires = 0;
1665 ci.rta_used = rt->u.dst.__use;
1666 ci.rta_clntref = atomic_read(&rt->u.dst.__refcnt);
1667 ci.rta_error = rt->u.dst.error;
1668 ci.rta_id = 0;
1669 ci.rta_ts = 0;
1670 ci.rta_tsage = 0;
1671 RTA_PUT(skb, RTA_CACHEINFO, sizeof(ci), &ci);
1672 nlh->nlmsg_len = skb->tail - b;
1673 return skb->len;
1674
1675nlmsg_failure:
1676rtattr_failure:
1677 skb_trim(skb, b - skb->data);
1678 return -1;
1679}
1680
1681static int rt6_dump_route(struct rt6_info *rt, void *p_arg)
1682{
1683 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
1684 int prefix;
1685
1686 if (arg->cb->nlh->nlmsg_len >= NLMSG_LENGTH(sizeof(struct rtmsg))) {
1687 struct rtmsg *rtm = NLMSG_DATA(arg->cb->nlh);
1688 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
1689 } else
1690 prefix = 0;
1691
1692 return rt6_fill_node(arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
1693 NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
0d51aa80 1694 prefix, NLM_F_MULTI);
1da177e4
LT
1695}
1696
1697static int fib6_dump_node(struct fib6_walker_t *w)
1698{
1699 int res;
1700 struct rt6_info *rt;
1701
1702 for (rt = w->leaf; rt; rt = rt->u.next) {
1703 res = rt6_dump_route(rt, w->args);
1704 if (res < 0) {
1705 /* Frame is full, suspend walking */
1706 w->leaf = rt;
1707 return 1;
1708 }
1709 BUG_TRAP(res!=0);
1710 }
1711 w->leaf = NULL;
1712 return 0;
1713}
1714
1715static void fib6_dump_end(struct netlink_callback *cb)
1716{
1717 struct fib6_walker_t *w = (void*)cb->args[0];
1718
1719 if (w) {
1720 cb->args[0] = 0;
1721 fib6_walker_unlink(w);
1722 kfree(w);
1723 }
efacfbcb
HX
1724 cb->done = (void*)cb->args[1];
1725 cb->args[1] = 0;
1da177e4
LT
1726}
1727
1728static int fib6_dump_done(struct netlink_callback *cb)
1729{
1730 fib6_dump_end(cb);
a8f74b22 1731 return cb->done ? cb->done(cb) : 0;
1da177e4
LT
1732}
1733
1734int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
1735{
1736 struct rt6_rtnl_dump_arg arg;
1737 struct fib6_walker_t *w;
1738 int res;
1739
1740 arg.skb = skb;
1741 arg.cb = cb;
1742
1743 w = (void*)cb->args[0];
1744 if (w == NULL) {
1745 /* New dump:
1746 *
1747 * 1. hook callback destructor.
1748 */
1749 cb->args[1] = (long)cb->done;
1750 cb->done = fib6_dump_done;
1751
1752 /*
1753 * 2. allocate and initialize walker.
1754 */
9e147a1c 1755 w = kmalloc(sizeof(*w), GFP_ATOMIC);
1da177e4
LT
1756 if (w == NULL)
1757 return -ENOMEM;
1758 RT6_TRACE("dump<%p", w);
1759 memset(w, 0, sizeof(*w));
1760 w->root = &ip6_routing_table;
1761 w->func = fib6_dump_node;
1762 w->args = &arg;
1763 cb->args[0] = (long)w;
1764 read_lock_bh(&rt6_lock);
1765 res = fib6_walk(w);
1766 read_unlock_bh(&rt6_lock);
1767 } else {
1768 w->args = &arg;
1769 read_lock_bh(&rt6_lock);
1770 res = fib6_walk_continue(w);
1771 read_unlock_bh(&rt6_lock);
1772 }
1773#if RT6_DEBUG >= 3
1774 if (res <= 0 && skb->len == 0)
1775 RT6_TRACE("%p>dump end\n", w);
1776#endif
1777 res = res < 0 ? res : skb->len;
1778 /* res < 0 is an error. (really, impossible)
1779 res == 0 means that dump is complete, but skb still can contain data.
1780 res > 0 dump is not complete, but frame is full.
1781 */
1782 /* Destroy walker, if dump of this table is complete. */
1783 if (res <= 0)
1784 fib6_dump_end(cb);
1785 return res;
1786}
1787
1788int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
1789{
1790 struct rtattr **rta = arg;
1791 int iif = 0;
1792 int err = -ENOBUFS;
1793 struct sk_buff *skb;
1794 struct flowi fl;
1795 struct rt6_info *rt;
1796
1797 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
1798 if (skb == NULL)
1799 goto out;
1800
1801 /* Reserve room for dummy headers, this skb can pass
1802 through good chunk of routing engine.
1803 */
1804 skb->mac.raw = skb->data;
1805 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
1806
1807 memset(&fl, 0, sizeof(fl));
1808 if (rta[RTA_SRC-1])
1809 ipv6_addr_copy(&fl.fl6_src,
1810 (struct in6_addr*)RTA_DATA(rta[RTA_SRC-1]));
1811 if (rta[RTA_DST-1])
1812 ipv6_addr_copy(&fl.fl6_dst,
1813 (struct in6_addr*)RTA_DATA(rta[RTA_DST-1]));
1814
1815 if (rta[RTA_IIF-1])
1816 memcpy(&iif, RTA_DATA(rta[RTA_IIF-1]), sizeof(int));
1817
1818 if (iif) {
1819 struct net_device *dev;
1820 dev = __dev_get_by_index(iif);
1821 if (!dev) {
1822 err = -ENODEV;
1823 goto out_free;
1824 }
1825 }
1826
1827 fl.oif = 0;
1828 if (rta[RTA_OIF-1])
1829 memcpy(&fl.oif, RTA_DATA(rta[RTA_OIF-1]), sizeof(int));
1830
1831 rt = (struct rt6_info*)ip6_route_output(NULL, &fl);
1832
1833 skb->dst = &rt->u.dst;
1834
1835 NETLINK_CB(skb).dst_pid = NETLINK_CB(in_skb).pid;
1836 err = rt6_fill_node(skb, rt,
1837 &fl.fl6_dst, &fl.fl6_src,
1838 iif,
1839 RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
0d51aa80 1840 nlh->nlmsg_seq, 0, 0);
1da177e4
LT
1841 if (err < 0) {
1842 err = -EMSGSIZE;
1843 goto out_free;
1844 }
1845
1846 err = netlink_unicast(rtnl, skb, NETLINK_CB(in_skb).pid, MSG_DONTWAIT);
1847 if (err > 0)
1848 err = 0;
1849out:
1850 return err;
1851out_free:
1852 kfree_skb(skb);
1853 goto out;
1854}
1855
0d51aa80
JHS
1856void inet6_rt_notify(int event, struct rt6_info *rt, struct nlmsghdr *nlh,
1857 struct netlink_skb_parms *req)
1da177e4
LT
1858{
1859 struct sk_buff *skb;
1860 int size = NLMSG_SPACE(sizeof(struct rtmsg)+256);
0d51aa80
JHS
1861 u32 pid = current->pid;
1862 u32 seq = 0;
1da177e4 1863
0d51aa80
JHS
1864 if (req)
1865 pid = req->pid;
1866 if (nlh)
1867 seq = nlh->nlmsg_seq;
1868
1da177e4
LT
1869 skb = alloc_skb(size, gfp_any());
1870 if (!skb) {
ac6d439d 1871 netlink_set_err(rtnl, 0, RTNLGRP_IPV6_ROUTE, ENOBUFS);
1da177e4
LT
1872 return;
1873 }
0d51aa80 1874 if (rt6_fill_node(skb, rt, NULL, NULL, 0, event, pid, seq, 0, 0) < 0) {
1da177e4 1875 kfree_skb(skb);
ac6d439d 1876 netlink_set_err(rtnl, 0, RTNLGRP_IPV6_ROUTE, EINVAL);
1da177e4
LT
1877 return;
1878 }
ac6d439d
PM
1879 NETLINK_CB(skb).dst_group = RTNLGRP_IPV6_ROUTE;
1880 netlink_broadcast(rtnl, skb, 0, RTNLGRP_IPV6_ROUTE, gfp_any());
1da177e4
LT
1881}
1882
1883/*
1884 * /proc
1885 */
1886
1887#ifdef CONFIG_PROC_FS
1888
1889#define RT6_INFO_LEN (32 + 4 + 32 + 4 + 32 + 40 + 5 + 1)
1890
1891struct rt6_proc_arg
1892{
1893 char *buffer;
1894 int offset;
1895 int length;
1896 int skip;
1897 int len;
1898};
1899
1900static int rt6_info_route(struct rt6_info *rt, void *p_arg)
1901{
1902 struct rt6_proc_arg *arg = (struct rt6_proc_arg *) p_arg;
1903 int i;
1904
1905 if (arg->skip < arg->offset / RT6_INFO_LEN) {
1906 arg->skip++;
1907 return 0;
1908 }
1909
1910 if (arg->len >= arg->length)
1911 return 0;
1912
1913 for (i=0; i<16; i++) {
1914 sprintf(arg->buffer + arg->len, "%02x",
1915 rt->rt6i_dst.addr.s6_addr[i]);
1916 arg->len += 2;
1917 }
1918 arg->len += sprintf(arg->buffer + arg->len, " %02x ",
1919 rt->rt6i_dst.plen);
1920
1921#ifdef CONFIG_IPV6_SUBTREES
1922 for (i=0; i<16; i++) {
1923 sprintf(arg->buffer + arg->len, "%02x",
1924 rt->rt6i_src.addr.s6_addr[i]);
1925 arg->len += 2;
1926 }
1927 arg->len += sprintf(arg->buffer + arg->len, " %02x ",
1928 rt->rt6i_src.plen);
1929#else
1930 sprintf(arg->buffer + arg->len,
1931 "00000000000000000000000000000000 00 ");
1932 arg->len += 36;
1933#endif
1934
1935 if (rt->rt6i_nexthop) {
1936 for (i=0; i<16; i++) {
1937 sprintf(arg->buffer + arg->len, "%02x",
1938 rt->rt6i_nexthop->primary_key[i]);
1939 arg->len += 2;
1940 }
1941 } else {
1942 sprintf(arg->buffer + arg->len,
1943 "00000000000000000000000000000000");
1944 arg->len += 32;
1945 }
1946 arg->len += sprintf(arg->buffer + arg->len,
1947 " %08x %08x %08x %08x %8s\n",
1948 rt->rt6i_metric, atomic_read(&rt->u.dst.__refcnt),
1949 rt->u.dst.__use, rt->rt6i_flags,
1950 rt->rt6i_dev ? rt->rt6i_dev->name : "");
1951 return 0;
1952}
1953
1954static int rt6_proc_info(char *buffer, char **start, off_t offset, int length)
1955{
1956 struct rt6_proc_arg arg;
1957 arg.buffer = buffer;
1958 arg.offset = offset;
1959 arg.length = length;
1960 arg.skip = 0;
1961 arg.len = 0;
1962
1963 read_lock_bh(&rt6_lock);
1964 fib6_clean_tree(&ip6_routing_table, rt6_info_route, 0, &arg);
1965 read_unlock_bh(&rt6_lock);
1966
1967 *start = buffer;
1968 if (offset)
1969 *start += offset % RT6_INFO_LEN;
1970
1971 arg.len -= offset % RT6_INFO_LEN;
1972
1973 if (arg.len > length)
1974 arg.len = length;
1975 if (arg.len < 0)
1976 arg.len = 0;
1977
1978 return arg.len;
1979}
1980
1da177e4
LT
1981static int rt6_stats_seq_show(struct seq_file *seq, void *v)
1982{
1983 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
1984 rt6_stats.fib_nodes, rt6_stats.fib_route_nodes,
1985 rt6_stats.fib_rt_alloc, rt6_stats.fib_rt_entries,
1986 rt6_stats.fib_rt_cache,
1987 atomic_read(&ip6_dst_ops.entries),
1988 rt6_stats.fib_discarded_routes);
1989
1990 return 0;
1991}
1992
1993static int rt6_stats_seq_open(struct inode *inode, struct file *file)
1994{
1995 return single_open(file, rt6_stats_seq_show, NULL);
1996}
1997
1998static struct file_operations rt6_stats_seq_fops = {
1999 .owner = THIS_MODULE,
2000 .open = rt6_stats_seq_open,
2001 .read = seq_read,
2002 .llseek = seq_lseek,
2003 .release = single_release,
2004};
2005#endif /* CONFIG_PROC_FS */
2006
2007#ifdef CONFIG_SYSCTL
2008
2009static int flush_delay;
2010
2011static
2012int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write, struct file * filp,
2013 void __user *buffer, size_t *lenp, loff_t *ppos)
2014{
2015 if (write) {
2016 proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
2017 fib6_run_gc(flush_delay <= 0 ? ~0UL : (unsigned long)flush_delay);
2018 return 0;
2019 } else
2020 return -EINVAL;
2021}
2022
2023ctl_table ipv6_route_table[] = {
2024 {
2025 .ctl_name = NET_IPV6_ROUTE_FLUSH,
2026 .procname = "flush",
2027 .data = &flush_delay,
2028 .maxlen = sizeof(int),
89c8b3a1 2029 .mode = 0200,
1da177e4
LT
2030 .proc_handler = &ipv6_sysctl_rtcache_flush
2031 },
2032 {
2033 .ctl_name = NET_IPV6_ROUTE_GC_THRESH,
2034 .procname = "gc_thresh",
2035 .data = &ip6_dst_ops.gc_thresh,
2036 .maxlen = sizeof(int),
2037 .mode = 0644,
2038 .proc_handler = &proc_dointvec,
2039 },
2040 {
2041 .ctl_name = NET_IPV6_ROUTE_MAX_SIZE,
2042 .procname = "max_size",
2043 .data = &ip6_rt_max_size,
2044 .maxlen = sizeof(int),
2045 .mode = 0644,
2046 .proc_handler = &proc_dointvec,
2047 },
2048 {
2049 .ctl_name = NET_IPV6_ROUTE_GC_MIN_INTERVAL,
2050 .procname = "gc_min_interval",
2051 .data = &ip6_rt_gc_min_interval,
2052 .maxlen = sizeof(int),
2053 .mode = 0644,
2054 .proc_handler = &proc_dointvec_jiffies,
2055 .strategy = &sysctl_jiffies,
2056 },
2057 {
2058 .ctl_name = NET_IPV6_ROUTE_GC_TIMEOUT,
2059 .procname = "gc_timeout",
2060 .data = &ip6_rt_gc_timeout,
2061 .maxlen = sizeof(int),
2062 .mode = 0644,
2063 .proc_handler = &proc_dointvec_jiffies,
2064 .strategy = &sysctl_jiffies,
2065 },
2066 {
2067 .ctl_name = NET_IPV6_ROUTE_GC_INTERVAL,
2068 .procname = "gc_interval",
2069 .data = &ip6_rt_gc_interval,
2070 .maxlen = sizeof(int),
2071 .mode = 0644,
2072 .proc_handler = &proc_dointvec_jiffies,
2073 .strategy = &sysctl_jiffies,
2074 },
2075 {
2076 .ctl_name = NET_IPV6_ROUTE_GC_ELASTICITY,
2077 .procname = "gc_elasticity",
2078 .data = &ip6_rt_gc_elasticity,
2079 .maxlen = sizeof(int),
2080 .mode = 0644,
2081 .proc_handler = &proc_dointvec_jiffies,
2082 .strategy = &sysctl_jiffies,
2083 },
2084 {
2085 .ctl_name = NET_IPV6_ROUTE_MTU_EXPIRES,
2086 .procname = "mtu_expires",
2087 .data = &ip6_rt_mtu_expires,
2088 .maxlen = sizeof(int),
2089 .mode = 0644,
2090 .proc_handler = &proc_dointvec_jiffies,
2091 .strategy = &sysctl_jiffies,
2092 },
2093 {
2094 .ctl_name = NET_IPV6_ROUTE_MIN_ADVMSS,
2095 .procname = "min_adv_mss",
2096 .data = &ip6_rt_min_advmss,
2097 .maxlen = sizeof(int),
2098 .mode = 0644,
2099 .proc_handler = &proc_dointvec_jiffies,
2100 .strategy = &sysctl_jiffies,
2101 },
2102 {
2103 .ctl_name = NET_IPV6_ROUTE_GC_MIN_INTERVAL_MS,
2104 .procname = "gc_min_interval_ms",
2105 .data = &ip6_rt_gc_min_interval,
2106 .maxlen = sizeof(int),
2107 .mode = 0644,
2108 .proc_handler = &proc_dointvec_ms_jiffies,
2109 .strategy = &sysctl_ms_jiffies,
2110 },
2111 { .ctl_name = 0 }
2112};
2113
2114#endif
2115
2116void __init ip6_route_init(void)
2117{
2118 struct proc_dir_entry *p;
2119
2120 ip6_dst_ops.kmem_cachep = kmem_cache_create("ip6_dst_cache",
2121 sizeof(struct rt6_info),
2122 0, SLAB_HWCACHE_ALIGN,
2123 NULL, NULL);
2124 if (!ip6_dst_ops.kmem_cachep)
2125 panic("cannot create ip6_dst_cache");
2126
2127 fib6_init();
2128#ifdef CONFIG_PROC_FS
2129 p = proc_net_create("ipv6_route", 0, rt6_proc_info);
2130 if (p)
2131 p->owner = THIS_MODULE;
2132
2133 proc_net_fops_create("rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2134#endif
2135#ifdef CONFIG_XFRM
2136 xfrm6_init();
2137#endif
2138}
2139
2140void ip6_route_cleanup(void)
2141{
2142#ifdef CONFIG_PROC_FS
2143 proc_net_remove("ipv6_route");
2144 proc_net_remove("rt6_stats");
2145#endif
2146#ifdef CONFIG_XFRM
2147 xfrm6_fini();
2148#endif
2149 rt6_ifdown(NULL);
2150 fib6_gc_cleanup();
2151 kmem_cache_destroy(ip6_dst_ops.kmem_cachep);
2152}