]> bbs.cooldavid.org Git - net-next-2.6.git/blame - net/ipv6/route.c
[IPV6]: ROUTE: Try selecting better route for non-default routes as well.
[net-next-2.6.git] / net / ipv6 / route.c
CommitLineData
1da177e4
LT
1/*
2 * Linux INET6 implementation
3 * FIB front-end.
4 *
5 * Authors:
6 * Pedro Roque <roque@di.fc.ul.pt>
7 *
8 * $Id: route.c,v 1.56 2001/10/31 21:55:55 davem Exp $
9 *
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License
12 * as published by the Free Software Foundation; either version
13 * 2 of the License, or (at your option) any later version.
14 */
15
16/* Changes:
17 *
18 * YOSHIFUJI Hideaki @USAGI
19 * reworked default router selection.
20 * - respect outgoing interface
21 * - select from (probably) reachable routers (i.e.
22 * routers in REACHABLE, STALE, DELAY or PROBE states).
23 * - always select the same router if it is (probably)
24 * reachable. otherwise, round-robin the list.
25 */
26
4fc268d2 27#include <linux/capability.h>
1da177e4
LT
28#include <linux/config.h>
29#include <linux/errno.h>
30#include <linux/types.h>
31#include <linux/times.h>
32#include <linux/socket.h>
33#include <linux/sockios.h>
34#include <linux/net.h>
35#include <linux/route.h>
36#include <linux/netdevice.h>
37#include <linux/in6.h>
38#include <linux/init.h>
39#include <linux/netlink.h>
40#include <linux/if_arp.h>
41
42#ifdef CONFIG_PROC_FS
43#include <linux/proc_fs.h>
44#include <linux/seq_file.h>
45#endif
46
47#include <net/snmp.h>
48#include <net/ipv6.h>
49#include <net/ip6_fib.h>
50#include <net/ip6_route.h>
51#include <net/ndisc.h>
52#include <net/addrconf.h>
53#include <net/tcp.h>
54#include <linux/rtnetlink.h>
55#include <net/dst.h>
56#include <net/xfrm.h>
57
58#include <asm/uaccess.h>
59
60#ifdef CONFIG_SYSCTL
61#include <linux/sysctl.h>
62#endif
63
64/* Set to 3 to get tracing. */
65#define RT6_DEBUG 2
66
67#if RT6_DEBUG >= 3
68#define RDBG(x) printk x
69#define RT6_TRACE(x...) printk(KERN_DEBUG x)
70#else
71#define RDBG(x)
72#define RT6_TRACE(x...) do { ; } while (0)
73#endif
74
519fbd87 75#define CLONE_OFFLINK_ROUTE 0
1da177e4 76
554cfb7e
YH
77#define RT6_SELECT_F_IFACE 0x1
78#define RT6_SELECT_F_REACHABLE 0x2
79
1da177e4
LT
80static int ip6_rt_max_size = 4096;
81static int ip6_rt_gc_min_interval = HZ / 2;
82static int ip6_rt_gc_timeout = 60*HZ;
83int ip6_rt_gc_interval = 30*HZ;
84static int ip6_rt_gc_elasticity = 9;
85static int ip6_rt_mtu_expires = 10*60*HZ;
86static int ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
87
88static struct rt6_info * ip6_rt_copy(struct rt6_info *ort);
89static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
90static struct dst_entry *ip6_negative_advice(struct dst_entry *);
91static void ip6_dst_destroy(struct dst_entry *);
92static void ip6_dst_ifdown(struct dst_entry *,
93 struct net_device *dev, int how);
94static int ip6_dst_gc(void);
95
96static int ip6_pkt_discard(struct sk_buff *skb);
97static int ip6_pkt_discard_out(struct sk_buff *skb);
98static void ip6_link_failure(struct sk_buff *skb);
99static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
100
101static struct dst_ops ip6_dst_ops = {
102 .family = AF_INET6,
103 .protocol = __constant_htons(ETH_P_IPV6),
104 .gc = ip6_dst_gc,
105 .gc_thresh = 1024,
106 .check = ip6_dst_check,
107 .destroy = ip6_dst_destroy,
108 .ifdown = ip6_dst_ifdown,
109 .negative_advice = ip6_negative_advice,
110 .link_failure = ip6_link_failure,
111 .update_pmtu = ip6_rt_update_pmtu,
112 .entry_size = sizeof(struct rt6_info),
113};
114
115struct rt6_info ip6_null_entry = {
116 .u = {
117 .dst = {
118 .__refcnt = ATOMIC_INIT(1),
119 .__use = 1,
120 .dev = &loopback_dev,
121 .obsolete = -1,
122 .error = -ENETUNREACH,
123 .metrics = { [RTAX_HOPLIMIT - 1] = 255, },
124 .input = ip6_pkt_discard,
125 .output = ip6_pkt_discard_out,
126 .ops = &ip6_dst_ops,
127 .path = (struct dst_entry*)&ip6_null_entry,
128 }
129 },
130 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
131 .rt6i_metric = ~(u32) 0,
132 .rt6i_ref = ATOMIC_INIT(1),
133};
134
135struct fib6_node ip6_routing_table = {
136 .leaf = &ip6_null_entry,
137 .fn_flags = RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO,
138};
139
140/* Protects all the ip6 fib */
141
142DEFINE_RWLOCK(rt6_lock);
143
144
145/* allocate dst with ip6_dst_ops */
146static __inline__ struct rt6_info *ip6_dst_alloc(void)
147{
148 return (struct rt6_info *)dst_alloc(&ip6_dst_ops);
149}
150
151static void ip6_dst_destroy(struct dst_entry *dst)
152{
153 struct rt6_info *rt = (struct rt6_info *)dst;
154 struct inet6_dev *idev = rt->rt6i_idev;
155
156 if (idev != NULL) {
157 rt->rt6i_idev = NULL;
158 in6_dev_put(idev);
159 }
160}
161
162static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
163 int how)
164{
165 struct rt6_info *rt = (struct rt6_info *)dst;
166 struct inet6_dev *idev = rt->rt6i_idev;
167
168 if (dev != &loopback_dev && idev != NULL && idev->dev == dev) {
169 struct inet6_dev *loopback_idev = in6_dev_get(&loopback_dev);
170 if (loopback_idev != NULL) {
171 rt->rt6i_idev = loopback_idev;
172 in6_dev_put(idev);
173 }
174 }
175}
176
177static __inline__ int rt6_check_expired(const struct rt6_info *rt)
178{
179 return (rt->rt6i_flags & RTF_EXPIRES &&
180 time_after(jiffies, rt->rt6i_expires));
181}
182
183/*
184 * Route lookup. Any rt6_lock is implied.
185 */
186
187static __inline__ struct rt6_info *rt6_device_match(struct rt6_info *rt,
188 int oif,
189 int strict)
190{
191 struct rt6_info *local = NULL;
192 struct rt6_info *sprt;
193
194 if (oif) {
195 for (sprt = rt; sprt; sprt = sprt->u.next) {
196 struct net_device *dev = sprt->rt6i_dev;
197 if (dev->ifindex == oif)
198 return sprt;
199 if (dev->flags & IFF_LOOPBACK) {
200 if (sprt->rt6i_idev == NULL ||
201 sprt->rt6i_idev->dev->ifindex != oif) {
202 if (strict && oif)
203 continue;
204 if (local && (!oif ||
205 local->rt6i_idev->dev->ifindex == oif))
206 continue;
207 }
208 local = sprt;
209 }
210 }
211
212 if (local)
213 return local;
214
215 if (strict)
216 return &ip6_null_entry;
217 }
218 return rt;
219}
220
221/*
554cfb7e 222 * Default Router Selection (RFC 2461 6.3.6)
1da177e4 223 */
554cfb7e
YH
224static int inline rt6_check_dev(struct rt6_info *rt, int oif)
225{
226 struct net_device *dev = rt->rt6i_dev;
227 if (!oif || dev->ifindex == oif)
228 return 2;
229 if ((dev->flags & IFF_LOOPBACK) &&
230 rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
231 return 1;
232 return 0;
233}
1da177e4 234
554cfb7e 235static int inline rt6_check_neigh(struct rt6_info *rt)
1da177e4 236{
554cfb7e
YH
237 struct neighbour *neigh = rt->rt6i_nexthop;
238 int m = 0;
239 if (neigh) {
240 read_lock_bh(&neigh->lock);
241 if (neigh->nud_state & NUD_VALID)
242 m = 1;
243 read_unlock_bh(&neigh->lock);
1da177e4 244 }
554cfb7e 245 return m;
1da177e4
LT
246}
247
554cfb7e
YH
248static int rt6_score_route(struct rt6_info *rt, int oif,
249 int strict)
1da177e4 250{
554cfb7e
YH
251 int m = rt6_check_dev(rt, oif);
252 if (!m && (strict & RT6_SELECT_F_IFACE))
253 return -1;
254 if (rt6_check_neigh(rt))
255 m |= 4;
256 else if (strict & RT6_SELECT_F_REACHABLE)
257 return -1;
258 return m;
259}
260
261static struct rt6_info *rt6_select(struct rt6_info **head, int oif,
262 int strict)
263{
264 struct rt6_info *match = NULL, *last = NULL;
265 struct rt6_info *rt, *rt0 = *head;
266 u32 metric;
267 int mpri = -1;
1da177e4 268
554cfb7e
YH
269 RT6_TRACE("%s(head=%p(*head=%p), oif=%d)\n",
270 __FUNCTION__, head, head ? *head : NULL, oif);
1da177e4 271
554cfb7e
YH
272 for (rt = rt0, metric = rt0->rt6i_metric;
273 rt && rt->rt6i_metric == metric;
274 rt = rt->u.next) {
275 int m;
1da177e4 276
554cfb7e 277 if (rt6_check_expired(rt))
1da177e4
LT
278 continue;
279
554cfb7e
YH
280 last = rt;
281
282 m = rt6_score_route(rt, oif, strict);
283 if (m < 0)
1da177e4 284 continue;
1da177e4 285
554cfb7e
YH
286 if (m > mpri) {
287 match = rt;
1da177e4 288 mpri = m;
1da177e4
LT
289 }
290 }
291
554cfb7e
YH
292 if (!match &&
293 (strict & RT6_SELECT_F_REACHABLE) &&
294 last && last != rt0) {
295 /* no entries matched; do round-robin */
296 *head = rt0->u.next;
297 rt0->u.next = last->u.next;
298 last->u.next = rt0;
1da177e4 299 }
1da177e4 300
554cfb7e
YH
301 RT6_TRACE("%s() => %p, score=%d\n",
302 __FUNCTION__, match, mpri);
1da177e4 303
554cfb7e 304 return (match ? match : &ip6_null_entry);
1da177e4
LT
305}
306
307struct rt6_info *rt6_lookup(struct in6_addr *daddr, struct in6_addr *saddr,
308 int oif, int strict)
309{
310 struct fib6_node *fn;
311 struct rt6_info *rt;
312
313 read_lock_bh(&rt6_lock);
314 fn = fib6_lookup(&ip6_routing_table, daddr, saddr);
315 rt = rt6_device_match(fn->leaf, oif, strict);
316 dst_hold(&rt->u.dst);
317 rt->u.dst.__use++;
318 read_unlock_bh(&rt6_lock);
319
320 rt->u.dst.lastuse = jiffies;
321 if (rt->u.dst.error == 0)
322 return rt;
323 dst_release(&rt->u.dst);
324 return NULL;
325}
326
327/* ip6_ins_rt is called with FREE rt6_lock.
328 It takes new route entry, the addition fails by any reason the
329 route is freed. In any case, if caller does not hold it, it may
330 be destroyed.
331 */
332
0d51aa80
JHS
333int ip6_ins_rt(struct rt6_info *rt, struct nlmsghdr *nlh,
334 void *_rtattr, struct netlink_skb_parms *req)
1da177e4
LT
335{
336 int err;
337
338 write_lock_bh(&rt6_lock);
0d51aa80 339 err = fib6_add(&ip6_routing_table, rt, nlh, _rtattr, req);
1da177e4
LT
340 write_unlock_bh(&rt6_lock);
341
342 return err;
343}
344
95a9a5ba
YH
345static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, struct in6_addr *daddr,
346 struct in6_addr *saddr)
1da177e4 347{
1da177e4
LT
348 struct rt6_info *rt;
349
350 /*
351 * Clone the route.
352 */
353
354 rt = ip6_rt_copy(ort);
355
356 if (rt) {
58c4fb86
YH
357 if (!(rt->rt6i_flags&RTF_GATEWAY)) {
358 if (rt->rt6i_dst.plen != 128 &&
359 ipv6_addr_equal(&rt->rt6i_dst.addr, daddr))
360 rt->rt6i_flags |= RTF_ANYCAST;
1da177e4 361 ipv6_addr_copy(&rt->rt6i_gateway, daddr);
58c4fb86 362 }
1da177e4 363
58c4fb86 364 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
1da177e4
LT
365 rt->rt6i_dst.plen = 128;
366 rt->rt6i_flags |= RTF_CACHE;
367 rt->u.dst.flags |= DST_HOST;
368
369#ifdef CONFIG_IPV6_SUBTREES
370 if (rt->rt6i_src.plen && saddr) {
371 ipv6_addr_copy(&rt->rt6i_src.addr, saddr);
372 rt->rt6i_src.plen = 128;
373 }
374#endif
375
376 rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
377
95a9a5ba 378 }
1da177e4 379
95a9a5ba
YH
380 return rt;
381}
1da177e4 382
299d9939
YH
383static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort, struct in6_addr *daddr)
384{
385 struct rt6_info *rt = ip6_rt_copy(ort);
386 if (rt) {
387 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
388 rt->rt6i_dst.plen = 128;
389 rt->rt6i_flags |= RTF_CACHE;
390 if (rt->rt6i_flags & RTF_REJECT)
391 rt->u.dst.error = ort->u.dst.error;
392 rt->u.dst.flags |= DST_HOST;
393 rt->rt6i_nexthop = neigh_clone(ort->rt6i_nexthop);
394 }
395 return rt;
396}
397
1da177e4
LT
398#define BACKTRACK() \
399if (rt == &ip6_null_entry && strict) { \
400 while ((fn = fn->parent) != NULL) { \
401 if (fn->fn_flags & RTN_ROOT) { \
1da177e4
LT
402 goto out; \
403 } \
404 if (fn->fn_flags & RTN_RTINFO) \
405 goto restart; \
406 } \
407}
408
409
410void ip6_route_input(struct sk_buff *skb)
411{
412 struct fib6_node *fn;
519fbd87 413 struct rt6_info *rt, *nrt;
1da177e4
LT
414 int strict;
415 int attempts = 3;
519fbd87 416 int err;
1da177e4 417
118f8c16 418 strict = ipv6_addr_type(&skb->nh.ipv6h->daddr) & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL) ? RT6_SELECT_F_IFACE : 0;
1da177e4
LT
419
420relookup:
421 read_lock_bh(&rt6_lock);
422
423 fn = fib6_lookup(&ip6_routing_table, &skb->nh.ipv6h->daddr,
424 &skb->nh.ipv6h->saddr);
425
426restart:
427 rt = fn->leaf;
428
429 if ((rt->rt6i_flags & RTF_CACHE)) {
118f8c16
YH
430 rt = rt6_select(&fn->leaf, skb->dev->ifindex, strict | RT6_SELECT_F_REACHABLE);
431 if (rt == &ip6_null_entry)
432 rt = rt6_select(&fn->leaf, skb->dev->ifindex, strict);
1da177e4 433 BACKTRACK();
1da177e4
LT
434 goto out;
435 }
436
118f8c16
YH
437 rt = rt6_select(&fn->leaf, skb->dev->ifindex, strict | RT6_SELECT_F_REACHABLE);
438 if (rt == &ip6_null_entry)
439 rt = rt6_select(&fn->leaf, skb->dev->ifindex, strict);
1da177e4
LT
440 BACKTRACK();
441
fb9de91e
YH
442 dst_hold(&rt->u.dst);
443 read_unlock_bh(&rt6_lock);
444
519fbd87
YH
445 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
446 nrt = rt6_alloc_cow(rt, &skb->nh.ipv6h->daddr, &skb->nh.ipv6h->saddr);
447 else {
448#if CLONE_OFFLINK_ROUTE
449 nrt = rt6_alloc_clone(rt, &skb->nh.ipv6h->daddr);
450#else
451 goto out2;
452#endif
453 }
e40cf353 454
519fbd87
YH
455 dst_release(&rt->u.dst);
456 rt = nrt ? : &ip6_null_entry;
1da177e4 457
519fbd87
YH
458 dst_hold(&rt->u.dst);
459 if (nrt) {
460 err = ip6_ins_rt(nrt, NULL, NULL, &NETLINK_CB(skb));
461 if (!err)
1da177e4 462 goto out2;
1da177e4 463 }
1da177e4 464
519fbd87
YH
465 if (--attempts <= 0)
466 goto out2;
467
468 /*
469 * Race condition! In the gap, when rt6_lock was
470 * released someone could insert this route. Relookup.
471 */
472 dst_release(&rt->u.dst);
473 goto relookup;
474
475out:
476 dst_hold(&rt->u.dst);
477 read_unlock_bh(&rt6_lock);
1da177e4
LT
478out2:
479 rt->u.dst.lastuse = jiffies;
480 rt->u.dst.__use++;
481 skb->dst = (struct dst_entry *) rt;
fb9de91e 482 return;
1da177e4
LT
483}
484
485struct dst_entry * ip6_route_output(struct sock *sk, struct flowi *fl)
486{
487 struct fib6_node *fn;
519fbd87 488 struct rt6_info *rt, *nrt;
1da177e4
LT
489 int strict;
490 int attempts = 3;
519fbd87 491 int err;
1da177e4 492
554cfb7e 493 strict = ipv6_addr_type(&fl->fl6_dst) & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL) ? RT6_SELECT_F_IFACE : 0;
1da177e4
LT
494
495relookup:
496 read_lock_bh(&rt6_lock);
497
498 fn = fib6_lookup(&ip6_routing_table, &fl->fl6_dst, &fl->fl6_src);
499
500restart:
501 rt = fn->leaf;
502
503 if ((rt->rt6i_flags & RTF_CACHE)) {
118f8c16
YH
504 rt = rt6_select(&fn->leaf, fl->oif, strict | RT6_SELECT_F_REACHABLE);
505 if (rt == &ip6_null_entry)
506 rt = rt6_select(&fn->leaf, fl->oif, strict);
1da177e4 507 BACKTRACK();
1da177e4
LT
508 goto out;
509 }
510 if (rt->rt6i_flags & RTF_DEFAULT) {
554cfb7e
YH
511 rt = rt6_select(&fn->leaf, fl->oif, strict | RT6_SELECT_F_REACHABLE);
512 if (rt == &ip6_null_entry)
513 rt = rt6_select(&fn->leaf, fl->oif, strict);
1da177e4 514 } else {
118f8c16
YH
515 rt = rt6_select(&fn->leaf, fl->oif, strict | RT6_SELECT_F_REACHABLE);
516 if (rt == &ip6_null_entry)
517 rt = rt6_select(&fn->leaf, fl->oif, strict);
1da177e4
LT
518 BACKTRACK();
519 }
520
fb9de91e
YH
521 dst_hold(&rt->u.dst);
522 read_unlock_bh(&rt6_lock);
523
519fbd87 524 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
e40cf353 525 nrt = rt6_alloc_cow(rt, &fl->fl6_dst, &fl->fl6_src);
519fbd87
YH
526 else {
527#if CLONE_OFFLINK_ROUTE
528 nrt = rt6_alloc_clone(rt, &fl->fl6_dst);
529#else
530 goto out2;
531#endif
532 }
1da177e4 533
519fbd87
YH
534 dst_release(&rt->u.dst);
535 rt = nrt ? : &ip6_null_entry;
1da177e4 536
519fbd87
YH
537 dst_hold(&rt->u.dst);
538 if (nrt) {
539 err = ip6_ins_rt(nrt, NULL, NULL, NULL);
540 if (!err)
1da177e4 541 goto out2;
1da177e4 542 }
e40cf353 543
519fbd87
YH
544 if (--attempts <= 0)
545 goto out2;
546
547 /*
548 * Race condition! In the gap, when rt6_lock was
549 * released someone could insert this route. Relookup.
550 */
551 dst_release(&rt->u.dst);
552 goto relookup;
553
554out:
555 dst_hold(&rt->u.dst);
556 read_unlock_bh(&rt6_lock);
1da177e4
LT
557out2:
558 rt->u.dst.lastuse = jiffies;
559 rt->u.dst.__use++;
560 return &rt->u.dst;
561}
562
563
564/*
565 * Destination cache support functions
566 */
567
568static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
569{
570 struct rt6_info *rt;
571
572 rt = (struct rt6_info *) dst;
573
574 if (rt && rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie))
575 return dst;
576
577 return NULL;
578}
579
580static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
581{
582 struct rt6_info *rt = (struct rt6_info *) dst;
583
584 if (rt) {
585 if (rt->rt6i_flags & RTF_CACHE)
0d51aa80 586 ip6_del_rt(rt, NULL, NULL, NULL);
1da177e4
LT
587 else
588 dst_release(dst);
589 }
590 return NULL;
591}
592
593static void ip6_link_failure(struct sk_buff *skb)
594{
595 struct rt6_info *rt;
596
597 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0, skb->dev);
598
599 rt = (struct rt6_info *) skb->dst;
600 if (rt) {
601 if (rt->rt6i_flags&RTF_CACHE) {
602 dst_set_expires(&rt->u.dst, 0);
603 rt->rt6i_flags |= RTF_EXPIRES;
604 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
605 rt->rt6i_node->fn_sernum = -1;
606 }
607}
608
609static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
610{
611 struct rt6_info *rt6 = (struct rt6_info*)dst;
612
613 if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
614 rt6->rt6i_flags |= RTF_MODIFIED;
615 if (mtu < IPV6_MIN_MTU) {
616 mtu = IPV6_MIN_MTU;
617 dst->metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
618 }
619 dst->metrics[RTAX_MTU-1] = mtu;
620 }
621}
622
623/* Protected by rt6_lock. */
624static struct dst_entry *ndisc_dst_gc_list;
625static int ipv6_get_mtu(struct net_device *dev);
626
627static inline unsigned int ipv6_advmss(unsigned int mtu)
628{
629 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
630
631 if (mtu < ip6_rt_min_advmss)
632 mtu = ip6_rt_min_advmss;
633
634 /*
635 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
636 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
637 * IPV6_MAXPLEN is also valid and means: "any MSS,
638 * rely only on pmtu discovery"
639 */
640 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
641 mtu = IPV6_MAXPLEN;
642 return mtu;
643}
644
645struct dst_entry *ndisc_dst_alloc(struct net_device *dev,
646 struct neighbour *neigh,
647 struct in6_addr *addr,
648 int (*output)(struct sk_buff *))
649{
650 struct rt6_info *rt;
651 struct inet6_dev *idev = in6_dev_get(dev);
652
653 if (unlikely(idev == NULL))
654 return NULL;
655
656 rt = ip6_dst_alloc();
657 if (unlikely(rt == NULL)) {
658 in6_dev_put(idev);
659 goto out;
660 }
661
662 dev_hold(dev);
663 if (neigh)
664 neigh_hold(neigh);
665 else
666 neigh = ndisc_get_neigh(dev, addr);
667
668 rt->rt6i_dev = dev;
669 rt->rt6i_idev = idev;
670 rt->rt6i_nexthop = neigh;
671 atomic_set(&rt->u.dst.__refcnt, 1);
672 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = 255;
673 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
674 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
675 rt->u.dst.output = output;
676
677#if 0 /* there's no chance to use these for ndisc */
678 rt->u.dst.flags = ipv6_addr_type(addr) & IPV6_ADDR_UNICAST
679 ? DST_HOST
680 : 0;
681 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
682 rt->rt6i_dst.plen = 128;
683#endif
684
685 write_lock_bh(&rt6_lock);
686 rt->u.dst.next = ndisc_dst_gc_list;
687 ndisc_dst_gc_list = &rt->u.dst;
688 write_unlock_bh(&rt6_lock);
689
690 fib6_force_start_gc();
691
692out:
693 return (struct dst_entry *)rt;
694}
695
696int ndisc_dst_gc(int *more)
697{
698 struct dst_entry *dst, *next, **pprev;
699 int freed;
700
701 next = NULL;
702 pprev = &ndisc_dst_gc_list;
703 freed = 0;
704 while ((dst = *pprev) != NULL) {
705 if (!atomic_read(&dst->__refcnt)) {
706 *pprev = dst->next;
707 dst_free(dst);
708 freed++;
709 } else {
710 pprev = &dst->next;
711 (*more)++;
712 }
713 }
714
715 return freed;
716}
717
718static int ip6_dst_gc(void)
719{
720 static unsigned expire = 30*HZ;
721 static unsigned long last_gc;
722 unsigned long now = jiffies;
723
724 if (time_after(last_gc + ip6_rt_gc_min_interval, now) &&
725 atomic_read(&ip6_dst_ops.entries) <= ip6_rt_max_size)
726 goto out;
727
728 expire++;
729 fib6_run_gc(expire);
730 last_gc = now;
731 if (atomic_read(&ip6_dst_ops.entries) < ip6_dst_ops.gc_thresh)
732 expire = ip6_rt_gc_timeout>>1;
733
734out:
735 expire -= expire>>ip6_rt_gc_elasticity;
736 return (atomic_read(&ip6_dst_ops.entries) > ip6_rt_max_size);
737}
738
739/* Clean host part of a prefix. Not necessary in radix tree,
740 but results in cleaner routing tables.
741
742 Remove it only when all the things will work!
743 */
744
745static int ipv6_get_mtu(struct net_device *dev)
746{
747 int mtu = IPV6_MIN_MTU;
748 struct inet6_dev *idev;
749
750 idev = in6_dev_get(dev);
751 if (idev) {
752 mtu = idev->cnf.mtu6;
753 in6_dev_put(idev);
754 }
755 return mtu;
756}
757
758int ipv6_get_hoplimit(struct net_device *dev)
759{
760 int hoplimit = ipv6_devconf.hop_limit;
761 struct inet6_dev *idev;
762
763 idev = in6_dev_get(dev);
764 if (idev) {
765 hoplimit = idev->cnf.hop_limit;
766 in6_dev_put(idev);
767 }
768 return hoplimit;
769}
770
771/*
772 *
773 */
774
0d51aa80
JHS
775int ip6_route_add(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh,
776 void *_rtattr, struct netlink_skb_parms *req)
1da177e4
LT
777{
778 int err;
779 struct rtmsg *r;
780 struct rtattr **rta;
781 struct rt6_info *rt = NULL;
782 struct net_device *dev = NULL;
783 struct inet6_dev *idev = NULL;
784 int addr_type;
785
786 rta = (struct rtattr **) _rtattr;
787
788 if (rtmsg->rtmsg_dst_len > 128 || rtmsg->rtmsg_src_len > 128)
789 return -EINVAL;
790#ifndef CONFIG_IPV6_SUBTREES
791 if (rtmsg->rtmsg_src_len)
792 return -EINVAL;
793#endif
794 if (rtmsg->rtmsg_ifindex) {
795 err = -ENODEV;
796 dev = dev_get_by_index(rtmsg->rtmsg_ifindex);
797 if (!dev)
798 goto out;
799 idev = in6_dev_get(dev);
800 if (!idev)
801 goto out;
802 }
803
804 if (rtmsg->rtmsg_metric == 0)
805 rtmsg->rtmsg_metric = IP6_RT_PRIO_USER;
806
807 rt = ip6_dst_alloc();
808
809 if (rt == NULL) {
810 err = -ENOMEM;
811 goto out;
812 }
813
814 rt->u.dst.obsolete = -1;
3dd4bc68 815 rt->rt6i_expires = jiffies + clock_t_to_jiffies(rtmsg->rtmsg_info);
1da177e4
LT
816 if (nlh && (r = NLMSG_DATA(nlh))) {
817 rt->rt6i_protocol = r->rtm_protocol;
818 } else {
819 rt->rt6i_protocol = RTPROT_BOOT;
820 }
821
822 addr_type = ipv6_addr_type(&rtmsg->rtmsg_dst);
823
824 if (addr_type & IPV6_ADDR_MULTICAST)
825 rt->u.dst.input = ip6_mc_input;
826 else
827 rt->u.dst.input = ip6_forward;
828
829 rt->u.dst.output = ip6_output;
830
831 ipv6_addr_prefix(&rt->rt6i_dst.addr,
832 &rtmsg->rtmsg_dst, rtmsg->rtmsg_dst_len);
833 rt->rt6i_dst.plen = rtmsg->rtmsg_dst_len;
834 if (rt->rt6i_dst.plen == 128)
835 rt->u.dst.flags = DST_HOST;
836
837#ifdef CONFIG_IPV6_SUBTREES
838 ipv6_addr_prefix(&rt->rt6i_src.addr,
839 &rtmsg->rtmsg_src, rtmsg->rtmsg_src_len);
840 rt->rt6i_src.plen = rtmsg->rtmsg_src_len;
841#endif
842
843 rt->rt6i_metric = rtmsg->rtmsg_metric;
844
845 /* We cannot add true routes via loopback here,
846 they would result in kernel looping; promote them to reject routes
847 */
848 if ((rtmsg->rtmsg_flags&RTF_REJECT) ||
849 (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK))) {
850 /* hold loopback dev/idev if we haven't done so. */
851 if (dev != &loopback_dev) {
852 if (dev) {
853 dev_put(dev);
854 in6_dev_put(idev);
855 }
856 dev = &loopback_dev;
857 dev_hold(dev);
858 idev = in6_dev_get(dev);
859 if (!idev) {
860 err = -ENODEV;
861 goto out;
862 }
863 }
864 rt->u.dst.output = ip6_pkt_discard_out;
865 rt->u.dst.input = ip6_pkt_discard;
866 rt->u.dst.error = -ENETUNREACH;
867 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
868 goto install_route;
869 }
870
871 if (rtmsg->rtmsg_flags & RTF_GATEWAY) {
872 struct in6_addr *gw_addr;
873 int gwa_type;
874
875 gw_addr = &rtmsg->rtmsg_gateway;
876 ipv6_addr_copy(&rt->rt6i_gateway, &rtmsg->rtmsg_gateway);
877 gwa_type = ipv6_addr_type(gw_addr);
878
879 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
880 struct rt6_info *grt;
881
882 /* IPv6 strictly inhibits using not link-local
883 addresses as nexthop address.
884 Otherwise, router will not able to send redirects.
885 It is very good, but in some (rare!) circumstances
886 (SIT, PtP, NBMA NOARP links) it is handy to allow
887 some exceptions. --ANK
888 */
889 err = -EINVAL;
890 if (!(gwa_type&IPV6_ADDR_UNICAST))
891 goto out;
892
893 grt = rt6_lookup(gw_addr, NULL, rtmsg->rtmsg_ifindex, 1);
894
895 err = -EHOSTUNREACH;
896 if (grt == NULL)
897 goto out;
898 if (dev) {
899 if (dev != grt->rt6i_dev) {
900 dst_release(&grt->u.dst);
901 goto out;
902 }
903 } else {
904 dev = grt->rt6i_dev;
905 idev = grt->rt6i_idev;
906 dev_hold(dev);
907 in6_dev_hold(grt->rt6i_idev);
908 }
909 if (!(grt->rt6i_flags&RTF_GATEWAY))
910 err = 0;
911 dst_release(&grt->u.dst);
912
913 if (err)
914 goto out;
915 }
916 err = -EINVAL;
917 if (dev == NULL || (dev->flags&IFF_LOOPBACK))
918 goto out;
919 }
920
921 err = -ENODEV;
922 if (dev == NULL)
923 goto out;
924
925 if (rtmsg->rtmsg_flags & (RTF_GATEWAY|RTF_NONEXTHOP)) {
926 rt->rt6i_nexthop = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev);
927 if (IS_ERR(rt->rt6i_nexthop)) {
928 err = PTR_ERR(rt->rt6i_nexthop);
929 rt->rt6i_nexthop = NULL;
930 goto out;
931 }
932 }
933
934 rt->rt6i_flags = rtmsg->rtmsg_flags;
935
936install_route:
937 if (rta && rta[RTA_METRICS-1]) {
938 int attrlen = RTA_PAYLOAD(rta[RTA_METRICS-1]);
939 struct rtattr *attr = RTA_DATA(rta[RTA_METRICS-1]);
940
941 while (RTA_OK(attr, attrlen)) {
942 unsigned flavor = attr->rta_type;
943 if (flavor) {
944 if (flavor > RTAX_MAX) {
945 err = -EINVAL;
946 goto out;
947 }
948 rt->u.dst.metrics[flavor-1] =
949 *(u32 *)RTA_DATA(attr);
950 }
951 attr = RTA_NEXT(attr, attrlen);
952 }
953 }
954
955 if (rt->u.dst.metrics[RTAX_HOPLIMIT-1] == 0)
956 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
957 if (!rt->u.dst.metrics[RTAX_MTU-1])
958 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(dev);
959 if (!rt->u.dst.metrics[RTAX_ADVMSS-1])
960 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
961 rt->u.dst.dev = dev;
962 rt->rt6i_idev = idev;
0d51aa80 963 return ip6_ins_rt(rt, nlh, _rtattr, req);
1da177e4
LT
964
965out:
966 if (dev)
967 dev_put(dev);
968 if (idev)
969 in6_dev_put(idev);
970 if (rt)
971 dst_free((struct dst_entry *) rt);
972 return err;
973}
974
0d51aa80 975int ip6_del_rt(struct rt6_info *rt, struct nlmsghdr *nlh, void *_rtattr, struct netlink_skb_parms *req)
1da177e4
LT
976{
977 int err;
978
979 write_lock_bh(&rt6_lock);
980
0d51aa80 981 err = fib6_del(rt, nlh, _rtattr, req);
1da177e4
LT
982 dst_release(&rt->u.dst);
983
984 write_unlock_bh(&rt6_lock);
985
986 return err;
987}
988
0d51aa80 989static int ip6_route_del(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh, void *_rtattr, struct netlink_skb_parms *req)
1da177e4
LT
990{
991 struct fib6_node *fn;
992 struct rt6_info *rt;
993 int err = -ESRCH;
994
995 read_lock_bh(&rt6_lock);
996
997 fn = fib6_locate(&ip6_routing_table,
998 &rtmsg->rtmsg_dst, rtmsg->rtmsg_dst_len,
999 &rtmsg->rtmsg_src, rtmsg->rtmsg_src_len);
1000
1001 if (fn) {
1002 for (rt = fn->leaf; rt; rt = rt->u.next) {
1003 if (rtmsg->rtmsg_ifindex &&
1004 (rt->rt6i_dev == NULL ||
1005 rt->rt6i_dev->ifindex != rtmsg->rtmsg_ifindex))
1006 continue;
1007 if (rtmsg->rtmsg_flags&RTF_GATEWAY &&
1008 !ipv6_addr_equal(&rtmsg->rtmsg_gateway, &rt->rt6i_gateway))
1009 continue;
1010 if (rtmsg->rtmsg_metric &&
1011 rtmsg->rtmsg_metric != rt->rt6i_metric)
1012 continue;
1013 dst_hold(&rt->u.dst);
1014 read_unlock_bh(&rt6_lock);
1015
0d51aa80 1016 return ip6_del_rt(rt, nlh, _rtattr, req);
1da177e4
LT
1017 }
1018 }
1019 read_unlock_bh(&rt6_lock);
1020
1021 return err;
1022}
1023
1024/*
1025 * Handle redirects
1026 */
1027void rt6_redirect(struct in6_addr *dest, struct in6_addr *saddr,
1028 struct neighbour *neigh, u8 *lladdr, int on_link)
1029{
1030 struct rt6_info *rt, *nrt;
1031
1032 /* Locate old route to this destination. */
1033 rt = rt6_lookup(dest, NULL, neigh->dev->ifindex, 1);
1034
1035 if (rt == NULL)
1036 return;
1037
1038 if (neigh->dev != rt->rt6i_dev)
1039 goto out;
1040
1041 /*
1042 * Current route is on-link; redirect is always invalid.
1043 *
1044 * Seems, previous statement is not true. It could
1045 * be node, which looks for us as on-link (f.e. proxy ndisc)
1046 * But then router serving it might decide, that we should
1047 * know truth 8)8) --ANK (980726).
1048 */
1049 if (!(rt->rt6i_flags&RTF_GATEWAY))
1050 goto out;
1051
1052 /*
1053 * RFC 2461 specifies that redirects should only be
1054 * accepted if they come from the nexthop to the target.
1055 * Due to the way default routers are chosen, this notion
1056 * is a bit fuzzy and one might need to check all default
1057 * routers.
1058 */
1059 if (!ipv6_addr_equal(saddr, &rt->rt6i_gateway)) {
1060 if (rt->rt6i_flags & RTF_DEFAULT) {
1061 struct rt6_info *rt1;
1062
1063 read_lock(&rt6_lock);
1064 for (rt1 = ip6_routing_table.leaf; rt1; rt1 = rt1->u.next) {
1065 if (ipv6_addr_equal(saddr, &rt1->rt6i_gateway)) {
1066 dst_hold(&rt1->u.dst);
1067 dst_release(&rt->u.dst);
1068 read_unlock(&rt6_lock);
1069 rt = rt1;
1070 goto source_ok;
1071 }
1072 }
1073 read_unlock(&rt6_lock);
1074 }
1075 if (net_ratelimit())
1076 printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
1077 "for redirect target\n");
1078 goto out;
1079 }
1080
1081source_ok:
1082
1083 /*
1084 * We have finally decided to accept it.
1085 */
1086
1087 neigh_update(neigh, lladdr, NUD_STALE,
1088 NEIGH_UPDATE_F_WEAK_OVERRIDE|
1089 NEIGH_UPDATE_F_OVERRIDE|
1090 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1091 NEIGH_UPDATE_F_ISROUTER))
1092 );
1093
1094 /*
1095 * Redirect received -> path was valid.
1096 * Look, redirects are sent only in response to data packets,
1097 * so that this nexthop apparently is reachable. --ANK
1098 */
1099 dst_confirm(&rt->u.dst);
1100
1101 /* Duplicate redirect: silently ignore. */
1102 if (neigh == rt->u.dst.neighbour)
1103 goto out;
1104
1105 nrt = ip6_rt_copy(rt);
1106 if (nrt == NULL)
1107 goto out;
1108
1109 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1110 if (on_link)
1111 nrt->rt6i_flags &= ~RTF_GATEWAY;
1112
1113 ipv6_addr_copy(&nrt->rt6i_dst.addr, dest);
1114 nrt->rt6i_dst.plen = 128;
1115 nrt->u.dst.flags |= DST_HOST;
1116
1117 ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key);
1118 nrt->rt6i_nexthop = neigh_clone(neigh);
1119 /* Reset pmtu, it may be better */
1120 nrt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(neigh->dev);
1121 nrt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&nrt->u.dst));
1122
0d51aa80 1123 if (ip6_ins_rt(nrt, NULL, NULL, NULL))
1da177e4
LT
1124 goto out;
1125
1126 if (rt->rt6i_flags&RTF_CACHE) {
0d51aa80 1127 ip6_del_rt(rt, NULL, NULL, NULL);
1da177e4
LT
1128 return;
1129 }
1130
1131out:
1132 dst_release(&rt->u.dst);
1133 return;
1134}
1135
1136/*
1137 * Handle ICMP "packet too big" messages
1138 * i.e. Path MTU discovery
1139 */
1140
1141void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr,
1142 struct net_device *dev, u32 pmtu)
1143{
1144 struct rt6_info *rt, *nrt;
1145 int allfrag = 0;
1146
1147 rt = rt6_lookup(daddr, saddr, dev->ifindex, 0);
1148 if (rt == NULL)
1149 return;
1150
1151 if (pmtu >= dst_mtu(&rt->u.dst))
1152 goto out;
1153
1154 if (pmtu < IPV6_MIN_MTU) {
1155 /*
1156 * According to RFC2460, PMTU is set to the IPv6 Minimum Link
1157 * MTU (1280) and a fragment header should always be included
1158 * after a node receiving Too Big message reporting PMTU is
1159 * less than the IPv6 Minimum Link MTU.
1160 */
1161 pmtu = IPV6_MIN_MTU;
1162 allfrag = 1;
1163 }
1164
1165 /* New mtu received -> path was valid.
1166 They are sent only in response to data packets,
1167 so that this nexthop apparently is reachable. --ANK
1168 */
1169 dst_confirm(&rt->u.dst);
1170
1171 /* Host route. If it is static, it would be better
1172 not to override it, but add new one, so that
1173 when cache entry will expire old pmtu
1174 would return automatically.
1175 */
1176 if (rt->rt6i_flags & RTF_CACHE) {
1177 rt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1178 if (allfrag)
1179 rt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1180 dst_set_expires(&rt->u.dst, ip6_rt_mtu_expires);
1181 rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
1182 goto out;
1183 }
1184
1185 /* Network route.
1186 Two cases are possible:
1187 1. It is connected route. Action: COW
1188 2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1189 */
d5315b50 1190 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
a1e78363 1191 nrt = rt6_alloc_cow(rt, daddr, saddr);
d5315b50
YH
1192 else
1193 nrt = rt6_alloc_clone(rt, daddr);
a1e78363 1194
d5315b50 1195 if (nrt) {
a1e78363
YH
1196 nrt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1197 if (allfrag)
1198 nrt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1199
1200 /* According to RFC 1981, detecting PMTU increase shouldn't be
1201 * happened within 5 mins, the recommended timer is 10 mins.
1202 * Here this route expiration time is set to ip6_rt_mtu_expires
1203 * which is 10 mins. After 10 mins the decreased pmtu is expired
1204 * and detecting PMTU increase will be automatically happened.
1205 */
1206 dst_set_expires(&nrt->u.dst, ip6_rt_mtu_expires);
1207 nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
1208
1209 ip6_ins_rt(nrt, NULL, NULL, NULL);
1da177e4 1210 }
1da177e4
LT
1211out:
1212 dst_release(&rt->u.dst);
1213}
1214
1215/*
1216 * Misc support functions
1217 */
1218
1219static struct rt6_info * ip6_rt_copy(struct rt6_info *ort)
1220{
1221 struct rt6_info *rt = ip6_dst_alloc();
1222
1223 if (rt) {
1224 rt->u.dst.input = ort->u.dst.input;
1225 rt->u.dst.output = ort->u.dst.output;
1226
1227 memcpy(rt->u.dst.metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32));
1228 rt->u.dst.dev = ort->u.dst.dev;
1229 if (rt->u.dst.dev)
1230 dev_hold(rt->u.dst.dev);
1231 rt->rt6i_idev = ort->rt6i_idev;
1232 if (rt->rt6i_idev)
1233 in6_dev_hold(rt->rt6i_idev);
1234 rt->u.dst.lastuse = jiffies;
1235 rt->rt6i_expires = 0;
1236
1237 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
1238 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
1239 rt->rt6i_metric = 0;
1240
1241 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1242#ifdef CONFIG_IPV6_SUBTREES
1243 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1244#endif
1245 }
1246 return rt;
1247}
1248
1249struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *dev)
1250{
1251 struct rt6_info *rt;
1252 struct fib6_node *fn;
1253
1254 fn = &ip6_routing_table;
1255
1256 write_lock_bh(&rt6_lock);
1257 for (rt = fn->leaf; rt; rt=rt->u.next) {
1258 if (dev == rt->rt6i_dev &&
045927ff 1259 ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
1da177e4
LT
1260 ipv6_addr_equal(&rt->rt6i_gateway, addr))
1261 break;
1262 }
1263 if (rt)
1264 dst_hold(&rt->u.dst);
1265 write_unlock_bh(&rt6_lock);
1266 return rt;
1267}
1268
1269struct rt6_info *rt6_add_dflt_router(struct in6_addr *gwaddr,
1270 struct net_device *dev)
1271{
1272 struct in6_rtmsg rtmsg;
1273
1274 memset(&rtmsg, 0, sizeof(struct in6_rtmsg));
1275 rtmsg.rtmsg_type = RTMSG_NEWROUTE;
1276 ipv6_addr_copy(&rtmsg.rtmsg_gateway, gwaddr);
1277 rtmsg.rtmsg_metric = 1024;
1278 rtmsg.rtmsg_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT | RTF_UP | RTF_EXPIRES;
1279
1280 rtmsg.rtmsg_ifindex = dev->ifindex;
1281
0d51aa80 1282 ip6_route_add(&rtmsg, NULL, NULL, NULL);
1da177e4
LT
1283 return rt6_get_dflt_router(gwaddr, dev);
1284}
1285
1286void rt6_purge_dflt_routers(void)
1287{
1288 struct rt6_info *rt;
1289
1290restart:
1291 read_lock_bh(&rt6_lock);
1292 for (rt = ip6_routing_table.leaf; rt; rt = rt->u.next) {
1293 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
1294 dst_hold(&rt->u.dst);
1295
1da177e4
LT
1296 read_unlock_bh(&rt6_lock);
1297
0d51aa80 1298 ip6_del_rt(rt, NULL, NULL, NULL);
1da177e4
LT
1299
1300 goto restart;
1301 }
1302 }
1303 read_unlock_bh(&rt6_lock);
1304}
1305
1306int ipv6_route_ioctl(unsigned int cmd, void __user *arg)
1307{
1308 struct in6_rtmsg rtmsg;
1309 int err;
1310
1311 switch(cmd) {
1312 case SIOCADDRT: /* Add a route */
1313 case SIOCDELRT: /* Delete a route */
1314 if (!capable(CAP_NET_ADMIN))
1315 return -EPERM;
1316 err = copy_from_user(&rtmsg, arg,
1317 sizeof(struct in6_rtmsg));
1318 if (err)
1319 return -EFAULT;
1320
1321 rtnl_lock();
1322 switch (cmd) {
1323 case SIOCADDRT:
0d51aa80 1324 err = ip6_route_add(&rtmsg, NULL, NULL, NULL);
1da177e4
LT
1325 break;
1326 case SIOCDELRT:
0d51aa80 1327 err = ip6_route_del(&rtmsg, NULL, NULL, NULL);
1da177e4
LT
1328 break;
1329 default:
1330 err = -EINVAL;
1331 }
1332 rtnl_unlock();
1333
1334 return err;
1335 };
1336
1337 return -EINVAL;
1338}
1339
1340/*
1341 * Drop the packet on the floor
1342 */
1343
20380731 1344static int ip6_pkt_discard(struct sk_buff *skb)
1da177e4
LT
1345{
1346 IP6_INC_STATS(IPSTATS_MIB_OUTNOROUTES);
1347 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_NOROUTE, 0, skb->dev);
1348 kfree_skb(skb);
1349 return 0;
1350}
1351
20380731 1352static int ip6_pkt_discard_out(struct sk_buff *skb)
1da177e4
LT
1353{
1354 skb->dev = skb->dst->dev;
1355 return ip6_pkt_discard(skb);
1356}
1357
1358/*
1359 * Allocate a dst for local (unicast / anycast) address.
1360 */
1361
1362struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
1363 const struct in6_addr *addr,
1364 int anycast)
1365{
1366 struct rt6_info *rt = ip6_dst_alloc();
1367
1368 if (rt == NULL)
1369 return ERR_PTR(-ENOMEM);
1370
1371 dev_hold(&loopback_dev);
1372 in6_dev_hold(idev);
1373
1374 rt->u.dst.flags = DST_HOST;
1375 rt->u.dst.input = ip6_input;
1376 rt->u.dst.output = ip6_output;
1377 rt->rt6i_dev = &loopback_dev;
1378 rt->rt6i_idev = idev;
1379 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
1380 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
1381 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
1382 rt->u.dst.obsolete = -1;
1383
1384 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
58c4fb86
YH
1385 if (anycast)
1386 rt->rt6i_flags |= RTF_ANYCAST;
1387 else
1da177e4
LT
1388 rt->rt6i_flags |= RTF_LOCAL;
1389 rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
1390 if (rt->rt6i_nexthop == NULL) {
1391 dst_free((struct dst_entry *) rt);
1392 return ERR_PTR(-ENOMEM);
1393 }
1394
1395 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
1396 rt->rt6i_dst.plen = 128;
1397
1398 atomic_set(&rt->u.dst.__refcnt, 1);
1399
1400 return rt;
1401}
1402
1403static int fib6_ifdown(struct rt6_info *rt, void *arg)
1404{
1405 if (((void*)rt->rt6i_dev == arg || arg == NULL) &&
1406 rt != &ip6_null_entry) {
1407 RT6_TRACE("deleted by ifdown %p\n", rt);
1408 return -1;
1409 }
1410 return 0;
1411}
1412
1413void rt6_ifdown(struct net_device *dev)
1414{
1415 write_lock_bh(&rt6_lock);
1416 fib6_clean_tree(&ip6_routing_table, fib6_ifdown, 0, dev);
1417 write_unlock_bh(&rt6_lock);
1418}
1419
1420struct rt6_mtu_change_arg
1421{
1422 struct net_device *dev;
1423 unsigned mtu;
1424};
1425
1426static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
1427{
1428 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
1429 struct inet6_dev *idev;
1430
1431 /* In IPv6 pmtu discovery is not optional,
1432 so that RTAX_MTU lock cannot disable it.
1433 We still use this lock to block changes
1434 caused by addrconf/ndisc.
1435 */
1436
1437 idev = __in6_dev_get(arg->dev);
1438 if (idev == NULL)
1439 return 0;
1440
1441 /* For administrative MTU increase, there is no way to discover
1442 IPv6 PMTU increase, so PMTU increase should be updated here.
1443 Since RFC 1981 doesn't include administrative MTU increase
1444 update PMTU increase is a MUST. (i.e. jumbo frame)
1445 */
1446 /*
1447 If new MTU is less than route PMTU, this new MTU will be the
1448 lowest MTU in the path, update the route PMTU to reflect PMTU
1449 decreases; if new MTU is greater than route PMTU, and the
1450 old MTU is the lowest MTU in the path, update the route PMTU
1451 to reflect the increase. In this case if the other nodes' MTU
1452 also have the lowest MTU, TOO BIG MESSAGE will be lead to
1453 PMTU discouvery.
1454 */
1455 if (rt->rt6i_dev == arg->dev &&
1456 !dst_metric_locked(&rt->u.dst, RTAX_MTU) &&
1457 (dst_mtu(&rt->u.dst) > arg->mtu ||
1458 (dst_mtu(&rt->u.dst) < arg->mtu &&
1459 dst_mtu(&rt->u.dst) == idev->cnf.mtu6)))
1460 rt->u.dst.metrics[RTAX_MTU-1] = arg->mtu;
1461 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(arg->mtu);
1462 return 0;
1463}
1464
1465void rt6_mtu_change(struct net_device *dev, unsigned mtu)
1466{
1467 struct rt6_mtu_change_arg arg;
1468
1469 arg.dev = dev;
1470 arg.mtu = mtu;
1471 read_lock_bh(&rt6_lock);
1472 fib6_clean_tree(&ip6_routing_table, rt6_mtu_change_route, 0, &arg);
1473 read_unlock_bh(&rt6_lock);
1474}
1475
1476static int inet6_rtm_to_rtmsg(struct rtmsg *r, struct rtattr **rta,
1477 struct in6_rtmsg *rtmsg)
1478{
1479 memset(rtmsg, 0, sizeof(*rtmsg));
1480
1481 rtmsg->rtmsg_dst_len = r->rtm_dst_len;
1482 rtmsg->rtmsg_src_len = r->rtm_src_len;
1483 rtmsg->rtmsg_flags = RTF_UP;
1484 if (r->rtm_type == RTN_UNREACHABLE)
1485 rtmsg->rtmsg_flags |= RTF_REJECT;
1486
1487 if (rta[RTA_GATEWAY-1]) {
1488 if (rta[RTA_GATEWAY-1]->rta_len != RTA_LENGTH(16))
1489 return -EINVAL;
1490 memcpy(&rtmsg->rtmsg_gateway, RTA_DATA(rta[RTA_GATEWAY-1]), 16);
1491 rtmsg->rtmsg_flags |= RTF_GATEWAY;
1492 }
1493 if (rta[RTA_DST-1]) {
1494 if (RTA_PAYLOAD(rta[RTA_DST-1]) < ((r->rtm_dst_len+7)>>3))
1495 return -EINVAL;
1496 memcpy(&rtmsg->rtmsg_dst, RTA_DATA(rta[RTA_DST-1]), ((r->rtm_dst_len+7)>>3));
1497 }
1498 if (rta[RTA_SRC-1]) {
1499 if (RTA_PAYLOAD(rta[RTA_SRC-1]) < ((r->rtm_src_len+7)>>3))
1500 return -EINVAL;
1501 memcpy(&rtmsg->rtmsg_src, RTA_DATA(rta[RTA_SRC-1]), ((r->rtm_src_len+7)>>3));
1502 }
1503 if (rta[RTA_OIF-1]) {
1504 if (rta[RTA_OIF-1]->rta_len != RTA_LENGTH(sizeof(int)))
1505 return -EINVAL;
1506 memcpy(&rtmsg->rtmsg_ifindex, RTA_DATA(rta[RTA_OIF-1]), sizeof(int));
1507 }
1508 if (rta[RTA_PRIORITY-1]) {
1509 if (rta[RTA_PRIORITY-1]->rta_len != RTA_LENGTH(4))
1510 return -EINVAL;
1511 memcpy(&rtmsg->rtmsg_metric, RTA_DATA(rta[RTA_PRIORITY-1]), 4);
1512 }
1513 return 0;
1514}
1515
1516int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1517{
1518 struct rtmsg *r = NLMSG_DATA(nlh);
1519 struct in6_rtmsg rtmsg;
1520
1521 if (inet6_rtm_to_rtmsg(r, arg, &rtmsg))
1522 return -EINVAL;
0d51aa80 1523 return ip6_route_del(&rtmsg, nlh, arg, &NETLINK_CB(skb));
1da177e4
LT
1524}
1525
1526int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1527{
1528 struct rtmsg *r = NLMSG_DATA(nlh);
1529 struct in6_rtmsg rtmsg;
1530
1531 if (inet6_rtm_to_rtmsg(r, arg, &rtmsg))
1532 return -EINVAL;
0d51aa80 1533 return ip6_route_add(&rtmsg, nlh, arg, &NETLINK_CB(skb));
1da177e4
LT
1534}
1535
1536struct rt6_rtnl_dump_arg
1537{
1538 struct sk_buff *skb;
1539 struct netlink_callback *cb;
1540};
1541
1542static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt,
0d51aa80
JHS
1543 struct in6_addr *dst, struct in6_addr *src,
1544 int iif, int type, u32 pid, u32 seq,
1545 int prefix, unsigned int flags)
1da177e4
LT
1546{
1547 struct rtmsg *rtm;
1548 struct nlmsghdr *nlh;
1549 unsigned char *b = skb->tail;
1550 struct rta_cacheinfo ci;
1551
1552 if (prefix) { /* user wants prefix routes only */
1553 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
1554 /* success since this is not a prefix route */
1555 return 1;
1556 }
1557 }
1558
b6544c0b 1559 nlh = NLMSG_NEW(skb, pid, seq, type, sizeof(*rtm), flags);
1da177e4
LT
1560 rtm = NLMSG_DATA(nlh);
1561 rtm->rtm_family = AF_INET6;
1562 rtm->rtm_dst_len = rt->rt6i_dst.plen;
1563 rtm->rtm_src_len = rt->rt6i_src.plen;
1564 rtm->rtm_tos = 0;
1565 rtm->rtm_table = RT_TABLE_MAIN;
1566 if (rt->rt6i_flags&RTF_REJECT)
1567 rtm->rtm_type = RTN_UNREACHABLE;
1568 else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK))
1569 rtm->rtm_type = RTN_LOCAL;
1570 else
1571 rtm->rtm_type = RTN_UNICAST;
1572 rtm->rtm_flags = 0;
1573 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
1574 rtm->rtm_protocol = rt->rt6i_protocol;
1575 if (rt->rt6i_flags&RTF_DYNAMIC)
1576 rtm->rtm_protocol = RTPROT_REDIRECT;
1577 else if (rt->rt6i_flags & RTF_ADDRCONF)
1578 rtm->rtm_protocol = RTPROT_KERNEL;
1579 else if (rt->rt6i_flags&RTF_DEFAULT)
1580 rtm->rtm_protocol = RTPROT_RA;
1581
1582 if (rt->rt6i_flags&RTF_CACHE)
1583 rtm->rtm_flags |= RTM_F_CLONED;
1584
1585 if (dst) {
1586 RTA_PUT(skb, RTA_DST, 16, dst);
1587 rtm->rtm_dst_len = 128;
1588 } else if (rtm->rtm_dst_len)
1589 RTA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr);
1590#ifdef CONFIG_IPV6_SUBTREES
1591 if (src) {
1592 RTA_PUT(skb, RTA_SRC, 16, src);
1593 rtm->rtm_src_len = 128;
1594 } else if (rtm->rtm_src_len)
1595 RTA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr);
1596#endif
1597 if (iif)
1598 RTA_PUT(skb, RTA_IIF, 4, &iif);
1599 else if (dst) {
1600 struct in6_addr saddr_buf;
1601 if (ipv6_get_saddr(&rt->u.dst, dst, &saddr_buf) == 0)
1602 RTA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
1603 }
1604 if (rtnetlink_put_metrics(skb, rt->u.dst.metrics) < 0)
1605 goto rtattr_failure;
1606 if (rt->u.dst.neighbour)
1607 RTA_PUT(skb, RTA_GATEWAY, 16, &rt->u.dst.neighbour->primary_key);
1608 if (rt->u.dst.dev)
1609 RTA_PUT(skb, RTA_OIF, sizeof(int), &rt->rt6i_dev->ifindex);
1610 RTA_PUT(skb, RTA_PRIORITY, 4, &rt->rt6i_metric);
1611 ci.rta_lastuse = jiffies_to_clock_t(jiffies - rt->u.dst.lastuse);
1612 if (rt->rt6i_expires)
1613 ci.rta_expires = jiffies_to_clock_t(rt->rt6i_expires - jiffies);
1614 else
1615 ci.rta_expires = 0;
1616 ci.rta_used = rt->u.dst.__use;
1617 ci.rta_clntref = atomic_read(&rt->u.dst.__refcnt);
1618 ci.rta_error = rt->u.dst.error;
1619 ci.rta_id = 0;
1620 ci.rta_ts = 0;
1621 ci.rta_tsage = 0;
1622 RTA_PUT(skb, RTA_CACHEINFO, sizeof(ci), &ci);
1623 nlh->nlmsg_len = skb->tail - b;
1624 return skb->len;
1625
1626nlmsg_failure:
1627rtattr_failure:
1628 skb_trim(skb, b - skb->data);
1629 return -1;
1630}
1631
1632static int rt6_dump_route(struct rt6_info *rt, void *p_arg)
1633{
1634 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
1635 int prefix;
1636
1637 if (arg->cb->nlh->nlmsg_len >= NLMSG_LENGTH(sizeof(struct rtmsg))) {
1638 struct rtmsg *rtm = NLMSG_DATA(arg->cb->nlh);
1639 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
1640 } else
1641 prefix = 0;
1642
1643 return rt6_fill_node(arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
1644 NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
0d51aa80 1645 prefix, NLM_F_MULTI);
1da177e4
LT
1646}
1647
1648static int fib6_dump_node(struct fib6_walker_t *w)
1649{
1650 int res;
1651 struct rt6_info *rt;
1652
1653 for (rt = w->leaf; rt; rt = rt->u.next) {
1654 res = rt6_dump_route(rt, w->args);
1655 if (res < 0) {
1656 /* Frame is full, suspend walking */
1657 w->leaf = rt;
1658 return 1;
1659 }
1660 BUG_TRAP(res!=0);
1661 }
1662 w->leaf = NULL;
1663 return 0;
1664}
1665
1666static void fib6_dump_end(struct netlink_callback *cb)
1667{
1668 struct fib6_walker_t *w = (void*)cb->args[0];
1669
1670 if (w) {
1671 cb->args[0] = 0;
1672 fib6_walker_unlink(w);
1673 kfree(w);
1674 }
efacfbcb
HX
1675 cb->done = (void*)cb->args[1];
1676 cb->args[1] = 0;
1da177e4
LT
1677}
1678
1679static int fib6_dump_done(struct netlink_callback *cb)
1680{
1681 fib6_dump_end(cb);
a8f74b22 1682 return cb->done ? cb->done(cb) : 0;
1da177e4
LT
1683}
1684
1685int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
1686{
1687 struct rt6_rtnl_dump_arg arg;
1688 struct fib6_walker_t *w;
1689 int res;
1690
1691 arg.skb = skb;
1692 arg.cb = cb;
1693
1694 w = (void*)cb->args[0];
1695 if (w == NULL) {
1696 /* New dump:
1697 *
1698 * 1. hook callback destructor.
1699 */
1700 cb->args[1] = (long)cb->done;
1701 cb->done = fib6_dump_done;
1702
1703 /*
1704 * 2. allocate and initialize walker.
1705 */
9e147a1c 1706 w = kmalloc(sizeof(*w), GFP_ATOMIC);
1da177e4
LT
1707 if (w == NULL)
1708 return -ENOMEM;
1709 RT6_TRACE("dump<%p", w);
1710 memset(w, 0, sizeof(*w));
1711 w->root = &ip6_routing_table;
1712 w->func = fib6_dump_node;
1713 w->args = &arg;
1714 cb->args[0] = (long)w;
1715 read_lock_bh(&rt6_lock);
1716 res = fib6_walk(w);
1717 read_unlock_bh(&rt6_lock);
1718 } else {
1719 w->args = &arg;
1720 read_lock_bh(&rt6_lock);
1721 res = fib6_walk_continue(w);
1722 read_unlock_bh(&rt6_lock);
1723 }
1724#if RT6_DEBUG >= 3
1725 if (res <= 0 && skb->len == 0)
1726 RT6_TRACE("%p>dump end\n", w);
1727#endif
1728 res = res < 0 ? res : skb->len;
1729 /* res < 0 is an error. (really, impossible)
1730 res == 0 means that dump is complete, but skb still can contain data.
1731 res > 0 dump is not complete, but frame is full.
1732 */
1733 /* Destroy walker, if dump of this table is complete. */
1734 if (res <= 0)
1735 fib6_dump_end(cb);
1736 return res;
1737}
1738
1739int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
1740{
1741 struct rtattr **rta = arg;
1742 int iif = 0;
1743 int err = -ENOBUFS;
1744 struct sk_buff *skb;
1745 struct flowi fl;
1746 struct rt6_info *rt;
1747
1748 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
1749 if (skb == NULL)
1750 goto out;
1751
1752 /* Reserve room for dummy headers, this skb can pass
1753 through good chunk of routing engine.
1754 */
1755 skb->mac.raw = skb->data;
1756 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
1757
1758 memset(&fl, 0, sizeof(fl));
1759 if (rta[RTA_SRC-1])
1760 ipv6_addr_copy(&fl.fl6_src,
1761 (struct in6_addr*)RTA_DATA(rta[RTA_SRC-1]));
1762 if (rta[RTA_DST-1])
1763 ipv6_addr_copy(&fl.fl6_dst,
1764 (struct in6_addr*)RTA_DATA(rta[RTA_DST-1]));
1765
1766 if (rta[RTA_IIF-1])
1767 memcpy(&iif, RTA_DATA(rta[RTA_IIF-1]), sizeof(int));
1768
1769 if (iif) {
1770 struct net_device *dev;
1771 dev = __dev_get_by_index(iif);
1772 if (!dev) {
1773 err = -ENODEV;
1774 goto out_free;
1775 }
1776 }
1777
1778 fl.oif = 0;
1779 if (rta[RTA_OIF-1])
1780 memcpy(&fl.oif, RTA_DATA(rta[RTA_OIF-1]), sizeof(int));
1781
1782 rt = (struct rt6_info*)ip6_route_output(NULL, &fl);
1783
1784 skb->dst = &rt->u.dst;
1785
1786 NETLINK_CB(skb).dst_pid = NETLINK_CB(in_skb).pid;
1787 err = rt6_fill_node(skb, rt,
1788 &fl.fl6_dst, &fl.fl6_src,
1789 iif,
1790 RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
0d51aa80 1791 nlh->nlmsg_seq, 0, 0);
1da177e4
LT
1792 if (err < 0) {
1793 err = -EMSGSIZE;
1794 goto out_free;
1795 }
1796
1797 err = netlink_unicast(rtnl, skb, NETLINK_CB(in_skb).pid, MSG_DONTWAIT);
1798 if (err > 0)
1799 err = 0;
1800out:
1801 return err;
1802out_free:
1803 kfree_skb(skb);
1804 goto out;
1805}
1806
0d51aa80
JHS
1807void inet6_rt_notify(int event, struct rt6_info *rt, struct nlmsghdr *nlh,
1808 struct netlink_skb_parms *req)
1da177e4
LT
1809{
1810 struct sk_buff *skb;
1811 int size = NLMSG_SPACE(sizeof(struct rtmsg)+256);
0d51aa80
JHS
1812 u32 pid = current->pid;
1813 u32 seq = 0;
1da177e4 1814
0d51aa80
JHS
1815 if (req)
1816 pid = req->pid;
1817 if (nlh)
1818 seq = nlh->nlmsg_seq;
1819
1da177e4
LT
1820 skb = alloc_skb(size, gfp_any());
1821 if (!skb) {
ac6d439d 1822 netlink_set_err(rtnl, 0, RTNLGRP_IPV6_ROUTE, ENOBUFS);
1da177e4
LT
1823 return;
1824 }
0d51aa80 1825 if (rt6_fill_node(skb, rt, NULL, NULL, 0, event, pid, seq, 0, 0) < 0) {
1da177e4 1826 kfree_skb(skb);
ac6d439d 1827 netlink_set_err(rtnl, 0, RTNLGRP_IPV6_ROUTE, EINVAL);
1da177e4
LT
1828 return;
1829 }
ac6d439d
PM
1830 NETLINK_CB(skb).dst_group = RTNLGRP_IPV6_ROUTE;
1831 netlink_broadcast(rtnl, skb, 0, RTNLGRP_IPV6_ROUTE, gfp_any());
1da177e4
LT
1832}
1833
1834/*
1835 * /proc
1836 */
1837
1838#ifdef CONFIG_PROC_FS
1839
1840#define RT6_INFO_LEN (32 + 4 + 32 + 4 + 32 + 40 + 5 + 1)
1841
1842struct rt6_proc_arg
1843{
1844 char *buffer;
1845 int offset;
1846 int length;
1847 int skip;
1848 int len;
1849};
1850
1851static int rt6_info_route(struct rt6_info *rt, void *p_arg)
1852{
1853 struct rt6_proc_arg *arg = (struct rt6_proc_arg *) p_arg;
1854 int i;
1855
1856 if (arg->skip < arg->offset / RT6_INFO_LEN) {
1857 arg->skip++;
1858 return 0;
1859 }
1860
1861 if (arg->len >= arg->length)
1862 return 0;
1863
1864 for (i=0; i<16; i++) {
1865 sprintf(arg->buffer + arg->len, "%02x",
1866 rt->rt6i_dst.addr.s6_addr[i]);
1867 arg->len += 2;
1868 }
1869 arg->len += sprintf(arg->buffer + arg->len, " %02x ",
1870 rt->rt6i_dst.plen);
1871
1872#ifdef CONFIG_IPV6_SUBTREES
1873 for (i=0; i<16; i++) {
1874 sprintf(arg->buffer + arg->len, "%02x",
1875 rt->rt6i_src.addr.s6_addr[i]);
1876 arg->len += 2;
1877 }
1878 arg->len += sprintf(arg->buffer + arg->len, " %02x ",
1879 rt->rt6i_src.plen);
1880#else
1881 sprintf(arg->buffer + arg->len,
1882 "00000000000000000000000000000000 00 ");
1883 arg->len += 36;
1884#endif
1885
1886 if (rt->rt6i_nexthop) {
1887 for (i=0; i<16; i++) {
1888 sprintf(arg->buffer + arg->len, "%02x",
1889 rt->rt6i_nexthop->primary_key[i]);
1890 arg->len += 2;
1891 }
1892 } else {
1893 sprintf(arg->buffer + arg->len,
1894 "00000000000000000000000000000000");
1895 arg->len += 32;
1896 }
1897 arg->len += sprintf(arg->buffer + arg->len,
1898 " %08x %08x %08x %08x %8s\n",
1899 rt->rt6i_metric, atomic_read(&rt->u.dst.__refcnt),
1900 rt->u.dst.__use, rt->rt6i_flags,
1901 rt->rt6i_dev ? rt->rt6i_dev->name : "");
1902 return 0;
1903}
1904
1905static int rt6_proc_info(char *buffer, char **start, off_t offset, int length)
1906{
1907 struct rt6_proc_arg arg;
1908 arg.buffer = buffer;
1909 arg.offset = offset;
1910 arg.length = length;
1911 arg.skip = 0;
1912 arg.len = 0;
1913
1914 read_lock_bh(&rt6_lock);
1915 fib6_clean_tree(&ip6_routing_table, rt6_info_route, 0, &arg);
1916 read_unlock_bh(&rt6_lock);
1917
1918 *start = buffer;
1919 if (offset)
1920 *start += offset % RT6_INFO_LEN;
1921
1922 arg.len -= offset % RT6_INFO_LEN;
1923
1924 if (arg.len > length)
1925 arg.len = length;
1926 if (arg.len < 0)
1927 arg.len = 0;
1928
1929 return arg.len;
1930}
1931
1da177e4
LT
1932static int rt6_stats_seq_show(struct seq_file *seq, void *v)
1933{
1934 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
1935 rt6_stats.fib_nodes, rt6_stats.fib_route_nodes,
1936 rt6_stats.fib_rt_alloc, rt6_stats.fib_rt_entries,
1937 rt6_stats.fib_rt_cache,
1938 atomic_read(&ip6_dst_ops.entries),
1939 rt6_stats.fib_discarded_routes);
1940
1941 return 0;
1942}
1943
1944static int rt6_stats_seq_open(struct inode *inode, struct file *file)
1945{
1946 return single_open(file, rt6_stats_seq_show, NULL);
1947}
1948
1949static struct file_operations rt6_stats_seq_fops = {
1950 .owner = THIS_MODULE,
1951 .open = rt6_stats_seq_open,
1952 .read = seq_read,
1953 .llseek = seq_lseek,
1954 .release = single_release,
1955};
1956#endif /* CONFIG_PROC_FS */
1957
1958#ifdef CONFIG_SYSCTL
1959
1960static int flush_delay;
1961
1962static
1963int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write, struct file * filp,
1964 void __user *buffer, size_t *lenp, loff_t *ppos)
1965{
1966 if (write) {
1967 proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
1968 fib6_run_gc(flush_delay <= 0 ? ~0UL : (unsigned long)flush_delay);
1969 return 0;
1970 } else
1971 return -EINVAL;
1972}
1973
1974ctl_table ipv6_route_table[] = {
1975 {
1976 .ctl_name = NET_IPV6_ROUTE_FLUSH,
1977 .procname = "flush",
1978 .data = &flush_delay,
1979 .maxlen = sizeof(int),
89c8b3a1 1980 .mode = 0200,
1da177e4
LT
1981 .proc_handler = &ipv6_sysctl_rtcache_flush
1982 },
1983 {
1984 .ctl_name = NET_IPV6_ROUTE_GC_THRESH,
1985 .procname = "gc_thresh",
1986 .data = &ip6_dst_ops.gc_thresh,
1987 .maxlen = sizeof(int),
1988 .mode = 0644,
1989 .proc_handler = &proc_dointvec,
1990 },
1991 {
1992 .ctl_name = NET_IPV6_ROUTE_MAX_SIZE,
1993 .procname = "max_size",
1994 .data = &ip6_rt_max_size,
1995 .maxlen = sizeof(int),
1996 .mode = 0644,
1997 .proc_handler = &proc_dointvec,
1998 },
1999 {
2000 .ctl_name = NET_IPV6_ROUTE_GC_MIN_INTERVAL,
2001 .procname = "gc_min_interval",
2002 .data = &ip6_rt_gc_min_interval,
2003 .maxlen = sizeof(int),
2004 .mode = 0644,
2005 .proc_handler = &proc_dointvec_jiffies,
2006 .strategy = &sysctl_jiffies,
2007 },
2008 {
2009 .ctl_name = NET_IPV6_ROUTE_GC_TIMEOUT,
2010 .procname = "gc_timeout",
2011 .data = &ip6_rt_gc_timeout,
2012 .maxlen = sizeof(int),
2013 .mode = 0644,
2014 .proc_handler = &proc_dointvec_jiffies,
2015 .strategy = &sysctl_jiffies,
2016 },
2017 {
2018 .ctl_name = NET_IPV6_ROUTE_GC_INTERVAL,
2019 .procname = "gc_interval",
2020 .data = &ip6_rt_gc_interval,
2021 .maxlen = sizeof(int),
2022 .mode = 0644,
2023 .proc_handler = &proc_dointvec_jiffies,
2024 .strategy = &sysctl_jiffies,
2025 },
2026 {
2027 .ctl_name = NET_IPV6_ROUTE_GC_ELASTICITY,
2028 .procname = "gc_elasticity",
2029 .data = &ip6_rt_gc_elasticity,
2030 .maxlen = sizeof(int),
2031 .mode = 0644,
2032 .proc_handler = &proc_dointvec_jiffies,
2033 .strategy = &sysctl_jiffies,
2034 },
2035 {
2036 .ctl_name = NET_IPV6_ROUTE_MTU_EXPIRES,
2037 .procname = "mtu_expires",
2038 .data = &ip6_rt_mtu_expires,
2039 .maxlen = sizeof(int),
2040 .mode = 0644,
2041 .proc_handler = &proc_dointvec_jiffies,
2042 .strategy = &sysctl_jiffies,
2043 },
2044 {
2045 .ctl_name = NET_IPV6_ROUTE_MIN_ADVMSS,
2046 .procname = "min_adv_mss",
2047 .data = &ip6_rt_min_advmss,
2048 .maxlen = sizeof(int),
2049 .mode = 0644,
2050 .proc_handler = &proc_dointvec_jiffies,
2051 .strategy = &sysctl_jiffies,
2052 },
2053 {
2054 .ctl_name = NET_IPV6_ROUTE_GC_MIN_INTERVAL_MS,
2055 .procname = "gc_min_interval_ms",
2056 .data = &ip6_rt_gc_min_interval,
2057 .maxlen = sizeof(int),
2058 .mode = 0644,
2059 .proc_handler = &proc_dointvec_ms_jiffies,
2060 .strategy = &sysctl_ms_jiffies,
2061 },
2062 { .ctl_name = 0 }
2063};
2064
2065#endif
2066
2067void __init ip6_route_init(void)
2068{
2069 struct proc_dir_entry *p;
2070
2071 ip6_dst_ops.kmem_cachep = kmem_cache_create("ip6_dst_cache",
2072 sizeof(struct rt6_info),
2073 0, SLAB_HWCACHE_ALIGN,
2074 NULL, NULL);
2075 if (!ip6_dst_ops.kmem_cachep)
2076 panic("cannot create ip6_dst_cache");
2077
2078 fib6_init();
2079#ifdef CONFIG_PROC_FS
2080 p = proc_net_create("ipv6_route", 0, rt6_proc_info);
2081 if (p)
2082 p->owner = THIS_MODULE;
2083
2084 proc_net_fops_create("rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2085#endif
2086#ifdef CONFIG_XFRM
2087 xfrm6_init();
2088#endif
2089}
2090
2091void ip6_route_cleanup(void)
2092{
2093#ifdef CONFIG_PROC_FS
2094 proc_net_remove("ipv6_route");
2095 proc_net_remove("rt6_stats");
2096#endif
2097#ifdef CONFIG_XFRM
2098 xfrm6_fini();
2099#endif
2100 rt6_ifdown(NULL);
2101 fib6_gc_cleanup();
2102 kmem_cache_destroy(ip6_dst_ops.kmem_cachep);
2103}