]> bbs.cooldavid.org Git - net-next-2.6.git/blame - net/ipv6/route.c
[NET]: Network Event Notifier Mechanism.
[net-next-2.6.git] / net / ipv6 / route.c
CommitLineData
1da177e4
LT
1/*
2 * Linux INET6 implementation
3 * FIB front-end.
4 *
5 * Authors:
6 * Pedro Roque <roque@di.fc.ul.pt>
7 *
8 * $Id: route.c,v 1.56 2001/10/31 21:55:55 davem Exp $
9 *
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License
12 * as published by the Free Software Foundation; either version
13 * 2 of the License, or (at your option) any later version.
14 */
15
16/* Changes:
17 *
18 * YOSHIFUJI Hideaki @USAGI
19 * reworked default router selection.
20 * - respect outgoing interface
21 * - select from (probably) reachable routers (i.e.
22 * routers in REACHABLE, STALE, DELAY or PROBE states).
23 * - always select the same router if it is (probably)
24 * reachable. otherwise, round-robin the list.
25 */
26
4fc268d2 27#include <linux/capability.h>
1da177e4
LT
28#include <linux/errno.h>
29#include <linux/types.h>
30#include <linux/times.h>
31#include <linux/socket.h>
32#include <linux/sockios.h>
33#include <linux/net.h>
34#include <linux/route.h>
35#include <linux/netdevice.h>
36#include <linux/in6.h>
37#include <linux/init.h>
38#include <linux/netlink.h>
39#include <linux/if_arp.h>
40
41#ifdef CONFIG_PROC_FS
42#include <linux/proc_fs.h>
43#include <linux/seq_file.h>
44#endif
45
46#include <net/snmp.h>
47#include <net/ipv6.h>
48#include <net/ip6_fib.h>
49#include <net/ip6_route.h>
50#include <net/ndisc.h>
51#include <net/addrconf.h>
52#include <net/tcp.h>
53#include <linux/rtnetlink.h>
54#include <net/dst.h>
55#include <net/xfrm.h>
56
57#include <asm/uaccess.h>
58
59#ifdef CONFIG_SYSCTL
60#include <linux/sysctl.h>
61#endif
62
63/* Set to 3 to get tracing. */
64#define RT6_DEBUG 2
65
66#if RT6_DEBUG >= 3
67#define RDBG(x) printk x
68#define RT6_TRACE(x...) printk(KERN_DEBUG x)
69#else
70#define RDBG(x)
71#define RT6_TRACE(x...) do { ; } while (0)
72#endif
73
519fbd87 74#define CLONE_OFFLINK_ROUTE 0
1da177e4 75
554cfb7e
YH
76#define RT6_SELECT_F_IFACE 0x1
77#define RT6_SELECT_F_REACHABLE 0x2
78
1da177e4
LT
79static int ip6_rt_max_size = 4096;
80static int ip6_rt_gc_min_interval = HZ / 2;
81static int ip6_rt_gc_timeout = 60*HZ;
82int ip6_rt_gc_interval = 30*HZ;
83static int ip6_rt_gc_elasticity = 9;
84static int ip6_rt_mtu_expires = 10*60*HZ;
85static int ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
86
87static struct rt6_info * ip6_rt_copy(struct rt6_info *ort);
88static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
89static struct dst_entry *ip6_negative_advice(struct dst_entry *);
90static void ip6_dst_destroy(struct dst_entry *);
91static void ip6_dst_ifdown(struct dst_entry *,
92 struct net_device *dev, int how);
93static int ip6_dst_gc(void);
94
95static int ip6_pkt_discard(struct sk_buff *skb);
96static int ip6_pkt_discard_out(struct sk_buff *skb);
97static void ip6_link_failure(struct sk_buff *skb);
98static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
99
70ceb4f5
YH
100#ifdef CONFIG_IPV6_ROUTE_INFO
101static struct rt6_info *rt6_add_route_info(struct in6_addr *prefix, int prefixlen,
102 struct in6_addr *gwaddr, int ifindex,
103 unsigned pref);
104static struct rt6_info *rt6_get_route_info(struct in6_addr *prefix, int prefixlen,
105 struct in6_addr *gwaddr, int ifindex);
106#endif
107
1da177e4
LT
108static struct dst_ops ip6_dst_ops = {
109 .family = AF_INET6,
110 .protocol = __constant_htons(ETH_P_IPV6),
111 .gc = ip6_dst_gc,
112 .gc_thresh = 1024,
113 .check = ip6_dst_check,
114 .destroy = ip6_dst_destroy,
115 .ifdown = ip6_dst_ifdown,
116 .negative_advice = ip6_negative_advice,
117 .link_failure = ip6_link_failure,
118 .update_pmtu = ip6_rt_update_pmtu,
119 .entry_size = sizeof(struct rt6_info),
120};
121
122struct rt6_info ip6_null_entry = {
123 .u = {
124 .dst = {
125 .__refcnt = ATOMIC_INIT(1),
126 .__use = 1,
127 .dev = &loopback_dev,
128 .obsolete = -1,
129 .error = -ENETUNREACH,
130 .metrics = { [RTAX_HOPLIMIT - 1] = 255, },
131 .input = ip6_pkt_discard,
132 .output = ip6_pkt_discard_out,
133 .ops = &ip6_dst_ops,
134 .path = (struct dst_entry*)&ip6_null_entry,
135 }
136 },
137 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
138 .rt6i_metric = ~(u32) 0,
139 .rt6i_ref = ATOMIC_INIT(1),
140};
141
142struct fib6_node ip6_routing_table = {
143 .leaf = &ip6_null_entry,
144 .fn_flags = RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO,
145};
146
147/* Protects all the ip6 fib */
148
149DEFINE_RWLOCK(rt6_lock);
150
151
152/* allocate dst with ip6_dst_ops */
153static __inline__ struct rt6_info *ip6_dst_alloc(void)
154{
155 return (struct rt6_info *)dst_alloc(&ip6_dst_ops);
156}
157
158static void ip6_dst_destroy(struct dst_entry *dst)
159{
160 struct rt6_info *rt = (struct rt6_info *)dst;
161 struct inet6_dev *idev = rt->rt6i_idev;
162
163 if (idev != NULL) {
164 rt->rt6i_idev = NULL;
165 in6_dev_put(idev);
166 }
167}
168
169static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
170 int how)
171{
172 struct rt6_info *rt = (struct rt6_info *)dst;
173 struct inet6_dev *idev = rt->rt6i_idev;
174
175 if (dev != &loopback_dev && idev != NULL && idev->dev == dev) {
176 struct inet6_dev *loopback_idev = in6_dev_get(&loopback_dev);
177 if (loopback_idev != NULL) {
178 rt->rt6i_idev = loopback_idev;
179 in6_dev_put(idev);
180 }
181 }
182}
183
184static __inline__ int rt6_check_expired(const struct rt6_info *rt)
185{
186 return (rt->rt6i_flags & RTF_EXPIRES &&
187 time_after(jiffies, rt->rt6i_expires));
188}
189
190/*
191 * Route lookup. Any rt6_lock is implied.
192 */
193
194static __inline__ struct rt6_info *rt6_device_match(struct rt6_info *rt,
195 int oif,
196 int strict)
197{
198 struct rt6_info *local = NULL;
199 struct rt6_info *sprt;
200
201 if (oif) {
202 for (sprt = rt; sprt; sprt = sprt->u.next) {
203 struct net_device *dev = sprt->rt6i_dev;
204 if (dev->ifindex == oif)
205 return sprt;
206 if (dev->flags & IFF_LOOPBACK) {
207 if (sprt->rt6i_idev == NULL ||
208 sprt->rt6i_idev->dev->ifindex != oif) {
209 if (strict && oif)
210 continue;
211 if (local && (!oif ||
212 local->rt6i_idev->dev->ifindex == oif))
213 continue;
214 }
215 local = sprt;
216 }
217 }
218
219 if (local)
220 return local;
221
222 if (strict)
223 return &ip6_null_entry;
224 }
225 return rt;
226}
227
27097255
YH
228#ifdef CONFIG_IPV6_ROUTER_PREF
229static void rt6_probe(struct rt6_info *rt)
230{
231 struct neighbour *neigh = rt ? rt->rt6i_nexthop : NULL;
232 /*
233 * Okay, this does not seem to be appropriate
234 * for now, however, we need to check if it
235 * is really so; aka Router Reachability Probing.
236 *
237 * Router Reachability Probe MUST be rate-limited
238 * to no more than one per minute.
239 */
240 if (!neigh || (neigh->nud_state & NUD_VALID))
241 return;
242 read_lock_bh(&neigh->lock);
243 if (!(neigh->nud_state & NUD_VALID) &&
52e16356 244 time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
27097255
YH
245 struct in6_addr mcaddr;
246 struct in6_addr *target;
247
248 neigh->updated = jiffies;
249 read_unlock_bh(&neigh->lock);
250
251 target = (struct in6_addr *)&neigh->primary_key;
252 addrconf_addr_solict_mult(target, &mcaddr);
253 ndisc_send_ns(rt->rt6i_dev, NULL, target, &mcaddr, NULL);
254 } else
255 read_unlock_bh(&neigh->lock);
256}
257#else
258static inline void rt6_probe(struct rt6_info *rt)
259{
260 return;
261}
262#endif
263
1da177e4 264/*
554cfb7e 265 * Default Router Selection (RFC 2461 6.3.6)
1da177e4 266 */
554cfb7e
YH
267static int inline rt6_check_dev(struct rt6_info *rt, int oif)
268{
269 struct net_device *dev = rt->rt6i_dev;
270 if (!oif || dev->ifindex == oif)
271 return 2;
272 if ((dev->flags & IFF_LOOPBACK) &&
273 rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
274 return 1;
275 return 0;
276}
1da177e4 277
554cfb7e 278static int inline rt6_check_neigh(struct rt6_info *rt)
1da177e4 279{
554cfb7e
YH
280 struct neighbour *neigh = rt->rt6i_nexthop;
281 int m = 0;
4d0c5911
YH
282 if (rt->rt6i_flags & RTF_NONEXTHOP ||
283 !(rt->rt6i_flags & RTF_GATEWAY))
284 m = 1;
285 else if (neigh) {
554cfb7e
YH
286 read_lock_bh(&neigh->lock);
287 if (neigh->nud_state & NUD_VALID)
4d0c5911 288 m = 2;
554cfb7e 289 read_unlock_bh(&neigh->lock);
1da177e4 290 }
554cfb7e 291 return m;
1da177e4
LT
292}
293
554cfb7e
YH
294static int rt6_score_route(struct rt6_info *rt, int oif,
295 int strict)
1da177e4 296{
4d0c5911
YH
297 int m, n;
298
299 m = rt6_check_dev(rt, oif);
554cfb7e
YH
300 if (!m && (strict & RT6_SELECT_F_IFACE))
301 return -1;
ebacaaa0
YH
302#ifdef CONFIG_IPV6_ROUTER_PREF
303 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
304#endif
4d0c5911
YH
305 n = rt6_check_neigh(rt);
306 if (n > 1)
ebacaaa0 307 m |= 16;
4d0c5911 308 else if (!n && strict & RT6_SELECT_F_REACHABLE)
554cfb7e
YH
309 return -1;
310 return m;
311}
312
313static struct rt6_info *rt6_select(struct rt6_info **head, int oif,
314 int strict)
315{
316 struct rt6_info *match = NULL, *last = NULL;
317 struct rt6_info *rt, *rt0 = *head;
318 u32 metric;
319 int mpri = -1;
1da177e4 320
554cfb7e
YH
321 RT6_TRACE("%s(head=%p(*head=%p), oif=%d)\n",
322 __FUNCTION__, head, head ? *head : NULL, oif);
1da177e4 323
554cfb7e 324 for (rt = rt0, metric = rt0->rt6i_metric;
c302e6d5 325 rt && rt->rt6i_metric == metric && (!last || rt != rt0);
554cfb7e
YH
326 rt = rt->u.next) {
327 int m;
1da177e4 328
554cfb7e 329 if (rt6_check_expired(rt))
1da177e4
LT
330 continue;
331
554cfb7e
YH
332 last = rt;
333
334 m = rt6_score_route(rt, oif, strict);
335 if (m < 0)
1da177e4 336 continue;
1da177e4 337
554cfb7e 338 if (m > mpri) {
27097255 339 rt6_probe(match);
554cfb7e 340 match = rt;
1da177e4 341 mpri = m;
27097255
YH
342 } else {
343 rt6_probe(rt);
1da177e4
LT
344 }
345 }
346
554cfb7e
YH
347 if (!match &&
348 (strict & RT6_SELECT_F_REACHABLE) &&
349 last && last != rt0) {
350 /* no entries matched; do round-robin */
34af946a 351 static DEFINE_SPINLOCK(lock);
c302e6d5 352 spin_lock(&lock);
554cfb7e
YH
353 *head = rt0->u.next;
354 rt0->u.next = last->u.next;
355 last->u.next = rt0;
c302e6d5 356 spin_unlock(&lock);
1da177e4 357 }
1da177e4 358
554cfb7e
YH
359 RT6_TRACE("%s() => %p, score=%d\n",
360 __FUNCTION__, match, mpri);
1da177e4 361
554cfb7e 362 return (match ? match : &ip6_null_entry);
1da177e4
LT
363}
364
70ceb4f5
YH
365#ifdef CONFIG_IPV6_ROUTE_INFO
366int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
367 struct in6_addr *gwaddr)
368{
369 struct route_info *rinfo = (struct route_info *) opt;
370 struct in6_addr prefix_buf, *prefix;
371 unsigned int pref;
372 u32 lifetime;
373 struct rt6_info *rt;
374
375 if (len < sizeof(struct route_info)) {
376 return -EINVAL;
377 }
378
379 /* Sanity check for prefix_len and length */
380 if (rinfo->length > 3) {
381 return -EINVAL;
382 } else if (rinfo->prefix_len > 128) {
383 return -EINVAL;
384 } else if (rinfo->prefix_len > 64) {
385 if (rinfo->length < 2) {
386 return -EINVAL;
387 }
388 } else if (rinfo->prefix_len > 0) {
389 if (rinfo->length < 1) {
390 return -EINVAL;
391 }
392 }
393
394 pref = rinfo->route_pref;
395 if (pref == ICMPV6_ROUTER_PREF_INVALID)
396 pref = ICMPV6_ROUTER_PREF_MEDIUM;
397
398 lifetime = htonl(rinfo->lifetime);
399 if (lifetime == 0xffffffff) {
400 /* infinity */
401 } else if (lifetime > 0x7fffffff/HZ) {
402 /* Avoid arithmetic overflow */
403 lifetime = 0x7fffffff/HZ - 1;
404 }
405
406 if (rinfo->length == 3)
407 prefix = (struct in6_addr *)rinfo->prefix;
408 else {
409 /* this function is safe */
410 ipv6_addr_prefix(&prefix_buf,
411 (struct in6_addr *)rinfo->prefix,
412 rinfo->prefix_len);
413 prefix = &prefix_buf;
414 }
415
416 rt = rt6_get_route_info(prefix, rinfo->prefix_len, gwaddr, dev->ifindex);
417
418 if (rt && !lifetime) {
419 ip6_del_rt(rt, NULL, NULL, NULL);
420 rt = NULL;
421 }
422
423 if (!rt && lifetime)
424 rt = rt6_add_route_info(prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
425 pref);
426 else if (rt)
427 rt->rt6i_flags = RTF_ROUTEINFO |
428 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
429
430 if (rt) {
431 if (lifetime == 0xffffffff) {
432 rt->rt6i_flags &= ~RTF_EXPIRES;
433 } else {
434 rt->rt6i_expires = jiffies + HZ * lifetime;
435 rt->rt6i_flags |= RTF_EXPIRES;
436 }
437 dst_release(&rt->u.dst);
438 }
439 return 0;
440}
441#endif
442
1da177e4
LT
443struct rt6_info *rt6_lookup(struct in6_addr *daddr, struct in6_addr *saddr,
444 int oif, int strict)
445{
446 struct fib6_node *fn;
447 struct rt6_info *rt;
448
449 read_lock_bh(&rt6_lock);
450 fn = fib6_lookup(&ip6_routing_table, daddr, saddr);
451 rt = rt6_device_match(fn->leaf, oif, strict);
452 dst_hold(&rt->u.dst);
453 rt->u.dst.__use++;
454 read_unlock_bh(&rt6_lock);
455
456 rt->u.dst.lastuse = jiffies;
457 if (rt->u.dst.error == 0)
458 return rt;
459 dst_release(&rt->u.dst);
460 return NULL;
461}
462
463/* ip6_ins_rt is called with FREE rt6_lock.
464 It takes new route entry, the addition fails by any reason the
465 route is freed. In any case, if caller does not hold it, it may
466 be destroyed.
467 */
468
0d51aa80
JHS
469int ip6_ins_rt(struct rt6_info *rt, struct nlmsghdr *nlh,
470 void *_rtattr, struct netlink_skb_parms *req)
1da177e4
LT
471{
472 int err;
473
474 write_lock_bh(&rt6_lock);
0d51aa80 475 err = fib6_add(&ip6_routing_table, rt, nlh, _rtattr, req);
1da177e4
LT
476 write_unlock_bh(&rt6_lock);
477
478 return err;
479}
480
95a9a5ba
YH
481static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, struct in6_addr *daddr,
482 struct in6_addr *saddr)
1da177e4 483{
1da177e4
LT
484 struct rt6_info *rt;
485
486 /*
487 * Clone the route.
488 */
489
490 rt = ip6_rt_copy(ort);
491
492 if (rt) {
58c4fb86
YH
493 if (!(rt->rt6i_flags&RTF_GATEWAY)) {
494 if (rt->rt6i_dst.plen != 128 &&
495 ipv6_addr_equal(&rt->rt6i_dst.addr, daddr))
496 rt->rt6i_flags |= RTF_ANYCAST;
1da177e4 497 ipv6_addr_copy(&rt->rt6i_gateway, daddr);
58c4fb86 498 }
1da177e4 499
58c4fb86 500 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
1da177e4
LT
501 rt->rt6i_dst.plen = 128;
502 rt->rt6i_flags |= RTF_CACHE;
503 rt->u.dst.flags |= DST_HOST;
504
505#ifdef CONFIG_IPV6_SUBTREES
506 if (rt->rt6i_src.plen && saddr) {
507 ipv6_addr_copy(&rt->rt6i_src.addr, saddr);
508 rt->rt6i_src.plen = 128;
509 }
510#endif
511
512 rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
513
95a9a5ba 514 }
1da177e4 515
95a9a5ba
YH
516 return rt;
517}
1da177e4 518
299d9939
YH
519static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort, struct in6_addr *daddr)
520{
521 struct rt6_info *rt = ip6_rt_copy(ort);
522 if (rt) {
523 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
524 rt->rt6i_dst.plen = 128;
525 rt->rt6i_flags |= RTF_CACHE;
526 if (rt->rt6i_flags & RTF_REJECT)
527 rt->u.dst.error = ort->u.dst.error;
528 rt->u.dst.flags |= DST_HOST;
529 rt->rt6i_nexthop = neigh_clone(ort->rt6i_nexthop);
530 }
531 return rt;
532}
533
1da177e4 534#define BACKTRACK() \
bb133964 535if (rt == &ip6_null_entry) { \
1da177e4
LT
536 while ((fn = fn->parent) != NULL) { \
537 if (fn->fn_flags & RTN_ROOT) { \
1da177e4
LT
538 goto out; \
539 } \
540 if (fn->fn_flags & RTN_RTINFO) \
541 goto restart; \
542 } \
543}
544
545
546void ip6_route_input(struct sk_buff *skb)
547{
548 struct fib6_node *fn;
519fbd87 549 struct rt6_info *rt, *nrt;
1da177e4
LT
550 int strict;
551 int attempts = 3;
519fbd87 552 int err;
8238dd06 553 int reachable = RT6_SELECT_F_REACHABLE;
1da177e4 554
118f8c16 555 strict = ipv6_addr_type(&skb->nh.ipv6h->daddr) & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL) ? RT6_SELECT_F_IFACE : 0;
1da177e4
LT
556
557relookup:
558 read_lock_bh(&rt6_lock);
559
8238dd06 560restart_2:
1da177e4
LT
561 fn = fib6_lookup(&ip6_routing_table, &skb->nh.ipv6h->daddr,
562 &skb->nh.ipv6h->saddr);
563
564restart:
8238dd06 565 rt = rt6_select(&fn->leaf, skb->dev->ifindex, strict | reachable);
1da177e4 566 BACKTRACK();
8238dd06
YH
567 if (rt == &ip6_null_entry ||
568 rt->rt6i_flags & RTF_CACHE)
1ddef044 569 goto out;
1da177e4 570
fb9de91e
YH
571 dst_hold(&rt->u.dst);
572 read_unlock_bh(&rt6_lock);
573
519fbd87
YH
574 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
575 nrt = rt6_alloc_cow(rt, &skb->nh.ipv6h->daddr, &skb->nh.ipv6h->saddr);
576 else {
577#if CLONE_OFFLINK_ROUTE
578 nrt = rt6_alloc_clone(rt, &skb->nh.ipv6h->daddr);
579#else
580 goto out2;
581#endif
582 }
e40cf353 583
519fbd87
YH
584 dst_release(&rt->u.dst);
585 rt = nrt ? : &ip6_null_entry;
1da177e4 586
519fbd87
YH
587 dst_hold(&rt->u.dst);
588 if (nrt) {
589 err = ip6_ins_rt(nrt, NULL, NULL, &NETLINK_CB(skb));
590 if (!err)
1da177e4 591 goto out2;
1da177e4 592 }
1da177e4 593
519fbd87
YH
594 if (--attempts <= 0)
595 goto out2;
596
597 /*
598 * Race condition! In the gap, when rt6_lock was
599 * released someone could insert this route. Relookup.
600 */
601 dst_release(&rt->u.dst);
602 goto relookup;
603
604out:
8238dd06
YH
605 if (reachable) {
606 reachable = 0;
607 goto restart_2;
608 }
519fbd87
YH
609 dst_hold(&rt->u.dst);
610 read_unlock_bh(&rt6_lock);
1da177e4
LT
611out2:
612 rt->u.dst.lastuse = jiffies;
613 rt->u.dst.__use++;
614 skb->dst = (struct dst_entry *) rt;
fb9de91e 615 return;
1da177e4
LT
616}
617
618struct dst_entry * ip6_route_output(struct sock *sk, struct flowi *fl)
619{
620 struct fib6_node *fn;
519fbd87 621 struct rt6_info *rt, *nrt;
1da177e4
LT
622 int strict;
623 int attempts = 3;
519fbd87 624 int err;
8238dd06 625 int reachable = RT6_SELECT_F_REACHABLE;
1da177e4 626
554cfb7e 627 strict = ipv6_addr_type(&fl->fl6_dst) & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL) ? RT6_SELECT_F_IFACE : 0;
1da177e4
LT
628
629relookup:
630 read_lock_bh(&rt6_lock);
631
8238dd06 632restart_2:
1da177e4
LT
633 fn = fib6_lookup(&ip6_routing_table, &fl->fl6_dst, &fl->fl6_src);
634
635restart:
8238dd06 636 rt = rt6_select(&fn->leaf, fl->oif, strict | reachable);
1ddef044 637 BACKTRACK();
8238dd06
YH
638 if (rt == &ip6_null_entry ||
639 rt->rt6i_flags & RTF_CACHE)
1da177e4 640 goto out;
1da177e4 641
fb9de91e
YH
642 dst_hold(&rt->u.dst);
643 read_unlock_bh(&rt6_lock);
644
519fbd87 645 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
e40cf353 646 nrt = rt6_alloc_cow(rt, &fl->fl6_dst, &fl->fl6_src);
519fbd87
YH
647 else {
648#if CLONE_OFFLINK_ROUTE
649 nrt = rt6_alloc_clone(rt, &fl->fl6_dst);
650#else
651 goto out2;
652#endif
653 }
1da177e4 654
519fbd87
YH
655 dst_release(&rt->u.dst);
656 rt = nrt ? : &ip6_null_entry;
1da177e4 657
519fbd87
YH
658 dst_hold(&rt->u.dst);
659 if (nrt) {
660 err = ip6_ins_rt(nrt, NULL, NULL, NULL);
661 if (!err)
1da177e4 662 goto out2;
1da177e4 663 }
e40cf353 664
519fbd87
YH
665 if (--attempts <= 0)
666 goto out2;
667
668 /*
669 * Race condition! In the gap, when rt6_lock was
670 * released someone could insert this route. Relookup.
671 */
672 dst_release(&rt->u.dst);
673 goto relookup;
674
675out:
8238dd06
YH
676 if (reachable) {
677 reachable = 0;
678 goto restart_2;
679 }
519fbd87
YH
680 dst_hold(&rt->u.dst);
681 read_unlock_bh(&rt6_lock);
1da177e4
LT
682out2:
683 rt->u.dst.lastuse = jiffies;
684 rt->u.dst.__use++;
685 return &rt->u.dst;
686}
687
688
689/*
690 * Destination cache support functions
691 */
692
693static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
694{
695 struct rt6_info *rt;
696
697 rt = (struct rt6_info *) dst;
698
699 if (rt && rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie))
700 return dst;
701
702 return NULL;
703}
704
705static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
706{
707 struct rt6_info *rt = (struct rt6_info *) dst;
708
709 if (rt) {
710 if (rt->rt6i_flags & RTF_CACHE)
0d51aa80 711 ip6_del_rt(rt, NULL, NULL, NULL);
1da177e4
LT
712 else
713 dst_release(dst);
714 }
715 return NULL;
716}
717
718static void ip6_link_failure(struct sk_buff *skb)
719{
720 struct rt6_info *rt;
721
722 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0, skb->dev);
723
724 rt = (struct rt6_info *) skb->dst;
725 if (rt) {
726 if (rt->rt6i_flags&RTF_CACHE) {
727 dst_set_expires(&rt->u.dst, 0);
728 rt->rt6i_flags |= RTF_EXPIRES;
729 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
730 rt->rt6i_node->fn_sernum = -1;
731 }
732}
733
734static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
735{
736 struct rt6_info *rt6 = (struct rt6_info*)dst;
737
738 if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
739 rt6->rt6i_flags |= RTF_MODIFIED;
740 if (mtu < IPV6_MIN_MTU) {
741 mtu = IPV6_MIN_MTU;
742 dst->metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
743 }
744 dst->metrics[RTAX_MTU-1] = mtu;
745 }
746}
747
748/* Protected by rt6_lock. */
749static struct dst_entry *ndisc_dst_gc_list;
750static int ipv6_get_mtu(struct net_device *dev);
751
752static inline unsigned int ipv6_advmss(unsigned int mtu)
753{
754 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
755
756 if (mtu < ip6_rt_min_advmss)
757 mtu = ip6_rt_min_advmss;
758
759 /*
760 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
761 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
762 * IPV6_MAXPLEN is also valid and means: "any MSS,
763 * rely only on pmtu discovery"
764 */
765 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
766 mtu = IPV6_MAXPLEN;
767 return mtu;
768}
769
770struct dst_entry *ndisc_dst_alloc(struct net_device *dev,
771 struct neighbour *neigh,
772 struct in6_addr *addr,
773 int (*output)(struct sk_buff *))
774{
775 struct rt6_info *rt;
776 struct inet6_dev *idev = in6_dev_get(dev);
777
778 if (unlikely(idev == NULL))
779 return NULL;
780
781 rt = ip6_dst_alloc();
782 if (unlikely(rt == NULL)) {
783 in6_dev_put(idev);
784 goto out;
785 }
786
787 dev_hold(dev);
788 if (neigh)
789 neigh_hold(neigh);
790 else
791 neigh = ndisc_get_neigh(dev, addr);
792
793 rt->rt6i_dev = dev;
794 rt->rt6i_idev = idev;
795 rt->rt6i_nexthop = neigh;
796 atomic_set(&rt->u.dst.__refcnt, 1);
797 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = 255;
798 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
799 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
800 rt->u.dst.output = output;
801
802#if 0 /* there's no chance to use these for ndisc */
803 rt->u.dst.flags = ipv6_addr_type(addr) & IPV6_ADDR_UNICAST
804 ? DST_HOST
805 : 0;
806 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
807 rt->rt6i_dst.plen = 128;
808#endif
809
810 write_lock_bh(&rt6_lock);
811 rt->u.dst.next = ndisc_dst_gc_list;
812 ndisc_dst_gc_list = &rt->u.dst;
813 write_unlock_bh(&rt6_lock);
814
815 fib6_force_start_gc();
816
817out:
818 return (struct dst_entry *)rt;
819}
820
821int ndisc_dst_gc(int *more)
822{
823 struct dst_entry *dst, *next, **pprev;
824 int freed;
825
826 next = NULL;
827 pprev = &ndisc_dst_gc_list;
828 freed = 0;
829 while ((dst = *pprev) != NULL) {
830 if (!atomic_read(&dst->__refcnt)) {
831 *pprev = dst->next;
832 dst_free(dst);
833 freed++;
834 } else {
835 pprev = &dst->next;
836 (*more)++;
837 }
838 }
839
840 return freed;
841}
842
843static int ip6_dst_gc(void)
844{
845 static unsigned expire = 30*HZ;
846 static unsigned long last_gc;
847 unsigned long now = jiffies;
848
849 if (time_after(last_gc + ip6_rt_gc_min_interval, now) &&
850 atomic_read(&ip6_dst_ops.entries) <= ip6_rt_max_size)
851 goto out;
852
853 expire++;
854 fib6_run_gc(expire);
855 last_gc = now;
856 if (atomic_read(&ip6_dst_ops.entries) < ip6_dst_ops.gc_thresh)
857 expire = ip6_rt_gc_timeout>>1;
858
859out:
860 expire -= expire>>ip6_rt_gc_elasticity;
861 return (atomic_read(&ip6_dst_ops.entries) > ip6_rt_max_size);
862}
863
864/* Clean host part of a prefix. Not necessary in radix tree,
865 but results in cleaner routing tables.
866
867 Remove it only when all the things will work!
868 */
869
870static int ipv6_get_mtu(struct net_device *dev)
871{
872 int mtu = IPV6_MIN_MTU;
873 struct inet6_dev *idev;
874
875 idev = in6_dev_get(dev);
876 if (idev) {
877 mtu = idev->cnf.mtu6;
878 in6_dev_put(idev);
879 }
880 return mtu;
881}
882
883int ipv6_get_hoplimit(struct net_device *dev)
884{
885 int hoplimit = ipv6_devconf.hop_limit;
886 struct inet6_dev *idev;
887
888 idev = in6_dev_get(dev);
889 if (idev) {
890 hoplimit = idev->cnf.hop_limit;
891 in6_dev_put(idev);
892 }
893 return hoplimit;
894}
895
896/*
897 *
898 */
899
0d51aa80
JHS
900int ip6_route_add(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh,
901 void *_rtattr, struct netlink_skb_parms *req)
1da177e4
LT
902{
903 int err;
904 struct rtmsg *r;
905 struct rtattr **rta;
906 struct rt6_info *rt = NULL;
907 struct net_device *dev = NULL;
908 struct inet6_dev *idev = NULL;
909 int addr_type;
910
911 rta = (struct rtattr **) _rtattr;
912
913 if (rtmsg->rtmsg_dst_len > 128 || rtmsg->rtmsg_src_len > 128)
914 return -EINVAL;
915#ifndef CONFIG_IPV6_SUBTREES
916 if (rtmsg->rtmsg_src_len)
917 return -EINVAL;
918#endif
919 if (rtmsg->rtmsg_ifindex) {
920 err = -ENODEV;
921 dev = dev_get_by_index(rtmsg->rtmsg_ifindex);
922 if (!dev)
923 goto out;
924 idev = in6_dev_get(dev);
925 if (!idev)
926 goto out;
927 }
928
929 if (rtmsg->rtmsg_metric == 0)
930 rtmsg->rtmsg_metric = IP6_RT_PRIO_USER;
931
932 rt = ip6_dst_alloc();
933
934 if (rt == NULL) {
935 err = -ENOMEM;
936 goto out;
937 }
938
939 rt->u.dst.obsolete = -1;
3dd4bc68 940 rt->rt6i_expires = jiffies + clock_t_to_jiffies(rtmsg->rtmsg_info);
1da177e4
LT
941 if (nlh && (r = NLMSG_DATA(nlh))) {
942 rt->rt6i_protocol = r->rtm_protocol;
943 } else {
944 rt->rt6i_protocol = RTPROT_BOOT;
945 }
946
947 addr_type = ipv6_addr_type(&rtmsg->rtmsg_dst);
948
949 if (addr_type & IPV6_ADDR_MULTICAST)
950 rt->u.dst.input = ip6_mc_input;
951 else
952 rt->u.dst.input = ip6_forward;
953
954 rt->u.dst.output = ip6_output;
955
956 ipv6_addr_prefix(&rt->rt6i_dst.addr,
957 &rtmsg->rtmsg_dst, rtmsg->rtmsg_dst_len);
958 rt->rt6i_dst.plen = rtmsg->rtmsg_dst_len;
959 if (rt->rt6i_dst.plen == 128)
960 rt->u.dst.flags = DST_HOST;
961
962#ifdef CONFIG_IPV6_SUBTREES
963 ipv6_addr_prefix(&rt->rt6i_src.addr,
964 &rtmsg->rtmsg_src, rtmsg->rtmsg_src_len);
965 rt->rt6i_src.plen = rtmsg->rtmsg_src_len;
966#endif
967
968 rt->rt6i_metric = rtmsg->rtmsg_metric;
969
970 /* We cannot add true routes via loopback here,
971 they would result in kernel looping; promote them to reject routes
972 */
973 if ((rtmsg->rtmsg_flags&RTF_REJECT) ||
974 (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK))) {
975 /* hold loopback dev/idev if we haven't done so. */
976 if (dev != &loopback_dev) {
977 if (dev) {
978 dev_put(dev);
979 in6_dev_put(idev);
980 }
981 dev = &loopback_dev;
982 dev_hold(dev);
983 idev = in6_dev_get(dev);
984 if (!idev) {
985 err = -ENODEV;
986 goto out;
987 }
988 }
989 rt->u.dst.output = ip6_pkt_discard_out;
990 rt->u.dst.input = ip6_pkt_discard;
991 rt->u.dst.error = -ENETUNREACH;
992 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
993 goto install_route;
994 }
995
996 if (rtmsg->rtmsg_flags & RTF_GATEWAY) {
997 struct in6_addr *gw_addr;
998 int gwa_type;
999
1000 gw_addr = &rtmsg->rtmsg_gateway;
1001 ipv6_addr_copy(&rt->rt6i_gateway, &rtmsg->rtmsg_gateway);
1002 gwa_type = ipv6_addr_type(gw_addr);
1003
1004 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1005 struct rt6_info *grt;
1006
1007 /* IPv6 strictly inhibits using not link-local
1008 addresses as nexthop address.
1009 Otherwise, router will not able to send redirects.
1010 It is very good, but in some (rare!) circumstances
1011 (SIT, PtP, NBMA NOARP links) it is handy to allow
1012 some exceptions. --ANK
1013 */
1014 err = -EINVAL;
1015 if (!(gwa_type&IPV6_ADDR_UNICAST))
1016 goto out;
1017
1018 grt = rt6_lookup(gw_addr, NULL, rtmsg->rtmsg_ifindex, 1);
1019
1020 err = -EHOSTUNREACH;
1021 if (grt == NULL)
1022 goto out;
1023 if (dev) {
1024 if (dev != grt->rt6i_dev) {
1025 dst_release(&grt->u.dst);
1026 goto out;
1027 }
1028 } else {
1029 dev = grt->rt6i_dev;
1030 idev = grt->rt6i_idev;
1031 dev_hold(dev);
1032 in6_dev_hold(grt->rt6i_idev);
1033 }
1034 if (!(grt->rt6i_flags&RTF_GATEWAY))
1035 err = 0;
1036 dst_release(&grt->u.dst);
1037
1038 if (err)
1039 goto out;
1040 }
1041 err = -EINVAL;
1042 if (dev == NULL || (dev->flags&IFF_LOOPBACK))
1043 goto out;
1044 }
1045
1046 err = -ENODEV;
1047 if (dev == NULL)
1048 goto out;
1049
1050 if (rtmsg->rtmsg_flags & (RTF_GATEWAY|RTF_NONEXTHOP)) {
1051 rt->rt6i_nexthop = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev);
1052 if (IS_ERR(rt->rt6i_nexthop)) {
1053 err = PTR_ERR(rt->rt6i_nexthop);
1054 rt->rt6i_nexthop = NULL;
1055 goto out;
1056 }
1057 }
1058
1059 rt->rt6i_flags = rtmsg->rtmsg_flags;
1060
1061install_route:
1062 if (rta && rta[RTA_METRICS-1]) {
1063 int attrlen = RTA_PAYLOAD(rta[RTA_METRICS-1]);
1064 struct rtattr *attr = RTA_DATA(rta[RTA_METRICS-1]);
1065
1066 while (RTA_OK(attr, attrlen)) {
1067 unsigned flavor = attr->rta_type;
1068 if (flavor) {
1069 if (flavor > RTAX_MAX) {
1070 err = -EINVAL;
1071 goto out;
1072 }
1073 rt->u.dst.metrics[flavor-1] =
1074 *(u32 *)RTA_DATA(attr);
1075 }
1076 attr = RTA_NEXT(attr, attrlen);
1077 }
1078 }
1079
1080 if (rt->u.dst.metrics[RTAX_HOPLIMIT-1] == 0)
1081 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
1082 if (!rt->u.dst.metrics[RTAX_MTU-1])
1083 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(dev);
1084 if (!rt->u.dst.metrics[RTAX_ADVMSS-1])
1085 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
1086 rt->u.dst.dev = dev;
1087 rt->rt6i_idev = idev;
0d51aa80 1088 return ip6_ins_rt(rt, nlh, _rtattr, req);
1da177e4
LT
1089
1090out:
1091 if (dev)
1092 dev_put(dev);
1093 if (idev)
1094 in6_dev_put(idev);
1095 if (rt)
1096 dst_free((struct dst_entry *) rt);
1097 return err;
1098}
1099
0d51aa80 1100int ip6_del_rt(struct rt6_info *rt, struct nlmsghdr *nlh, void *_rtattr, struct netlink_skb_parms *req)
1da177e4
LT
1101{
1102 int err;
1103
1104 write_lock_bh(&rt6_lock);
1105
0d51aa80 1106 err = fib6_del(rt, nlh, _rtattr, req);
1da177e4
LT
1107 dst_release(&rt->u.dst);
1108
1109 write_unlock_bh(&rt6_lock);
1110
1111 return err;
1112}
1113
0d51aa80 1114static int ip6_route_del(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh, void *_rtattr, struct netlink_skb_parms *req)
1da177e4
LT
1115{
1116 struct fib6_node *fn;
1117 struct rt6_info *rt;
1118 int err = -ESRCH;
1119
1120 read_lock_bh(&rt6_lock);
1121
1122 fn = fib6_locate(&ip6_routing_table,
1123 &rtmsg->rtmsg_dst, rtmsg->rtmsg_dst_len,
1124 &rtmsg->rtmsg_src, rtmsg->rtmsg_src_len);
1125
1126 if (fn) {
1127 for (rt = fn->leaf; rt; rt = rt->u.next) {
1128 if (rtmsg->rtmsg_ifindex &&
1129 (rt->rt6i_dev == NULL ||
1130 rt->rt6i_dev->ifindex != rtmsg->rtmsg_ifindex))
1131 continue;
1132 if (rtmsg->rtmsg_flags&RTF_GATEWAY &&
1133 !ipv6_addr_equal(&rtmsg->rtmsg_gateway, &rt->rt6i_gateway))
1134 continue;
1135 if (rtmsg->rtmsg_metric &&
1136 rtmsg->rtmsg_metric != rt->rt6i_metric)
1137 continue;
1138 dst_hold(&rt->u.dst);
1139 read_unlock_bh(&rt6_lock);
1140
0d51aa80 1141 return ip6_del_rt(rt, nlh, _rtattr, req);
1da177e4
LT
1142 }
1143 }
1144 read_unlock_bh(&rt6_lock);
1145
1146 return err;
1147}
1148
1149/*
1150 * Handle redirects
1151 */
1152void rt6_redirect(struct in6_addr *dest, struct in6_addr *saddr,
1153 struct neighbour *neigh, u8 *lladdr, int on_link)
1154{
e843b9e1
YH
1155 struct rt6_info *rt, *nrt = NULL;
1156 int strict;
1157 struct fib6_node *fn;
1da177e4
LT
1158
1159 /*
e843b9e1
YH
1160 * Get the "current" route for this destination and
1161 * check if the redirect has come from approriate router.
1162 *
1163 * RFC 2461 specifies that redirects should only be
1164 * accepted if they come from the nexthop to the target.
1165 * Due to the way the routes are chosen, this notion
1166 * is a bit fuzzy and one might need to check all possible
1167 * routes.
1da177e4 1168 */
e843b9e1 1169 strict = ipv6_addr_type(dest) & (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL);
1da177e4 1170
e843b9e1
YH
1171 read_lock_bh(&rt6_lock);
1172 fn = fib6_lookup(&ip6_routing_table, dest, NULL);
1173restart:
1174 for (rt = fn->leaf; rt; rt = rt->u.next) {
1175 /*
1176 * Current route is on-link; redirect is always invalid.
1177 *
1178 * Seems, previous statement is not true. It could
1179 * be node, which looks for us as on-link (f.e. proxy ndisc)
1180 * But then router serving it might decide, that we should
1181 * know truth 8)8) --ANK (980726).
1182 */
1183 if (rt6_check_expired(rt))
1184 continue;
1185 if (!(rt->rt6i_flags & RTF_GATEWAY))
1186 continue;
1187 if (neigh->dev != rt->rt6i_dev)
1188 continue;
1189 if (!ipv6_addr_equal(saddr, &rt->rt6i_gateway))
1190 continue;
1191 break;
1192 }
1193 if (rt)
1194 dst_hold(&rt->u.dst);
1195 else if (strict) {
1196 while ((fn = fn->parent) != NULL) {
1197 if (fn->fn_flags & RTN_ROOT)
1198 break;
1199 if (fn->fn_flags & RTN_RTINFO)
1200 goto restart;
1da177e4 1201 }
e843b9e1
YH
1202 }
1203 read_unlock_bh(&rt6_lock);
1204
1205 if (!rt) {
1da177e4
LT
1206 if (net_ratelimit())
1207 printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
1208 "for redirect target\n");
e843b9e1 1209 return;
1da177e4
LT
1210 }
1211
1da177e4
LT
1212 /*
1213 * We have finally decided to accept it.
1214 */
1215
1216 neigh_update(neigh, lladdr, NUD_STALE,
1217 NEIGH_UPDATE_F_WEAK_OVERRIDE|
1218 NEIGH_UPDATE_F_OVERRIDE|
1219 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1220 NEIGH_UPDATE_F_ISROUTER))
1221 );
1222
1223 /*
1224 * Redirect received -> path was valid.
1225 * Look, redirects are sent only in response to data packets,
1226 * so that this nexthop apparently is reachable. --ANK
1227 */
1228 dst_confirm(&rt->u.dst);
1229
1230 /* Duplicate redirect: silently ignore. */
1231 if (neigh == rt->u.dst.neighbour)
1232 goto out;
1233
1234 nrt = ip6_rt_copy(rt);
1235 if (nrt == NULL)
1236 goto out;
1237
1238 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1239 if (on_link)
1240 nrt->rt6i_flags &= ~RTF_GATEWAY;
1241
1242 ipv6_addr_copy(&nrt->rt6i_dst.addr, dest);
1243 nrt->rt6i_dst.plen = 128;
1244 nrt->u.dst.flags |= DST_HOST;
1245
1246 ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key);
1247 nrt->rt6i_nexthop = neigh_clone(neigh);
1248 /* Reset pmtu, it may be better */
1249 nrt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(neigh->dev);
1250 nrt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&nrt->u.dst));
1251
0d51aa80 1252 if (ip6_ins_rt(nrt, NULL, NULL, NULL))
1da177e4
LT
1253 goto out;
1254
1255 if (rt->rt6i_flags&RTF_CACHE) {
0d51aa80 1256 ip6_del_rt(rt, NULL, NULL, NULL);
1da177e4
LT
1257 return;
1258 }
1259
1260out:
1261 dst_release(&rt->u.dst);
1262 return;
1263}
1264
1265/*
1266 * Handle ICMP "packet too big" messages
1267 * i.e. Path MTU discovery
1268 */
1269
1270void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr,
1271 struct net_device *dev, u32 pmtu)
1272{
1273 struct rt6_info *rt, *nrt;
1274 int allfrag = 0;
1275
1276 rt = rt6_lookup(daddr, saddr, dev->ifindex, 0);
1277 if (rt == NULL)
1278 return;
1279
1280 if (pmtu >= dst_mtu(&rt->u.dst))
1281 goto out;
1282
1283 if (pmtu < IPV6_MIN_MTU) {
1284 /*
1285 * According to RFC2460, PMTU is set to the IPv6 Minimum Link
1286 * MTU (1280) and a fragment header should always be included
1287 * after a node receiving Too Big message reporting PMTU is
1288 * less than the IPv6 Minimum Link MTU.
1289 */
1290 pmtu = IPV6_MIN_MTU;
1291 allfrag = 1;
1292 }
1293
1294 /* New mtu received -> path was valid.
1295 They are sent only in response to data packets,
1296 so that this nexthop apparently is reachable. --ANK
1297 */
1298 dst_confirm(&rt->u.dst);
1299
1300 /* Host route. If it is static, it would be better
1301 not to override it, but add new one, so that
1302 when cache entry will expire old pmtu
1303 would return automatically.
1304 */
1305 if (rt->rt6i_flags & RTF_CACHE) {
1306 rt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1307 if (allfrag)
1308 rt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1309 dst_set_expires(&rt->u.dst, ip6_rt_mtu_expires);
1310 rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
1311 goto out;
1312 }
1313
1314 /* Network route.
1315 Two cases are possible:
1316 1. It is connected route. Action: COW
1317 2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1318 */
d5315b50 1319 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
a1e78363 1320 nrt = rt6_alloc_cow(rt, daddr, saddr);
d5315b50
YH
1321 else
1322 nrt = rt6_alloc_clone(rt, daddr);
a1e78363 1323
d5315b50 1324 if (nrt) {
a1e78363
YH
1325 nrt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1326 if (allfrag)
1327 nrt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1328
1329 /* According to RFC 1981, detecting PMTU increase shouldn't be
1330 * happened within 5 mins, the recommended timer is 10 mins.
1331 * Here this route expiration time is set to ip6_rt_mtu_expires
1332 * which is 10 mins. After 10 mins the decreased pmtu is expired
1333 * and detecting PMTU increase will be automatically happened.
1334 */
1335 dst_set_expires(&nrt->u.dst, ip6_rt_mtu_expires);
1336 nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
1337
1338 ip6_ins_rt(nrt, NULL, NULL, NULL);
1da177e4 1339 }
1da177e4
LT
1340out:
1341 dst_release(&rt->u.dst);
1342}
1343
1344/*
1345 * Misc support functions
1346 */
1347
1348static struct rt6_info * ip6_rt_copy(struct rt6_info *ort)
1349{
1350 struct rt6_info *rt = ip6_dst_alloc();
1351
1352 if (rt) {
1353 rt->u.dst.input = ort->u.dst.input;
1354 rt->u.dst.output = ort->u.dst.output;
1355
1356 memcpy(rt->u.dst.metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32));
1357 rt->u.dst.dev = ort->u.dst.dev;
1358 if (rt->u.dst.dev)
1359 dev_hold(rt->u.dst.dev);
1360 rt->rt6i_idev = ort->rt6i_idev;
1361 if (rt->rt6i_idev)
1362 in6_dev_hold(rt->rt6i_idev);
1363 rt->u.dst.lastuse = jiffies;
1364 rt->rt6i_expires = 0;
1365
1366 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
1367 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
1368 rt->rt6i_metric = 0;
1369
1370 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1371#ifdef CONFIG_IPV6_SUBTREES
1372 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1373#endif
1374 }
1375 return rt;
1376}
1377
70ceb4f5
YH
1378#ifdef CONFIG_IPV6_ROUTE_INFO
1379static struct rt6_info *rt6_get_route_info(struct in6_addr *prefix, int prefixlen,
1380 struct in6_addr *gwaddr, int ifindex)
1381{
1382 struct fib6_node *fn;
1383 struct rt6_info *rt = NULL;
1384
1385 write_lock_bh(&rt6_lock);
1386 fn = fib6_locate(&ip6_routing_table, prefix ,prefixlen, NULL, 0);
1387 if (!fn)
1388 goto out;
1389
1390 for (rt = fn->leaf; rt; rt = rt->u.next) {
1391 if (rt->rt6i_dev->ifindex != ifindex)
1392 continue;
1393 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1394 continue;
1395 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1396 continue;
1397 dst_hold(&rt->u.dst);
1398 break;
1399 }
1400out:
1401 write_unlock_bh(&rt6_lock);
1402 return rt;
1403}
1404
1405static struct rt6_info *rt6_add_route_info(struct in6_addr *prefix, int prefixlen,
1406 struct in6_addr *gwaddr, int ifindex,
1407 unsigned pref)
1408{
1409 struct in6_rtmsg rtmsg;
1410
1411 memset(&rtmsg, 0, sizeof(rtmsg));
1412 rtmsg.rtmsg_type = RTMSG_NEWROUTE;
1413 ipv6_addr_copy(&rtmsg.rtmsg_dst, prefix);
1414 rtmsg.rtmsg_dst_len = prefixlen;
1415 ipv6_addr_copy(&rtmsg.rtmsg_gateway, gwaddr);
1416 rtmsg.rtmsg_metric = 1024;
1417 rtmsg.rtmsg_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO | RTF_UP | RTF_PREF(pref);
e317da96
YH
1418 /* We should treat it as a default route if prefix length is 0. */
1419 if (!prefixlen)
1420 rtmsg.rtmsg_flags |= RTF_DEFAULT;
70ceb4f5
YH
1421 rtmsg.rtmsg_ifindex = ifindex;
1422
1423 ip6_route_add(&rtmsg, NULL, NULL, NULL);
1424
1425 return rt6_get_route_info(prefix, prefixlen, gwaddr, ifindex);
1426}
1427#endif
1428
1da177e4
LT
1429struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *dev)
1430{
1431 struct rt6_info *rt;
1432 struct fib6_node *fn;
1433
1434 fn = &ip6_routing_table;
1435
1436 write_lock_bh(&rt6_lock);
1437 for (rt = fn->leaf; rt; rt=rt->u.next) {
1438 if (dev == rt->rt6i_dev &&
045927ff 1439 ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
1da177e4
LT
1440 ipv6_addr_equal(&rt->rt6i_gateway, addr))
1441 break;
1442 }
1443 if (rt)
1444 dst_hold(&rt->u.dst);
1445 write_unlock_bh(&rt6_lock);
1446 return rt;
1447}
1448
1449struct rt6_info *rt6_add_dflt_router(struct in6_addr *gwaddr,
ebacaaa0
YH
1450 struct net_device *dev,
1451 unsigned int pref)
1da177e4
LT
1452{
1453 struct in6_rtmsg rtmsg;
1454
1455 memset(&rtmsg, 0, sizeof(struct in6_rtmsg));
1456 rtmsg.rtmsg_type = RTMSG_NEWROUTE;
1457 ipv6_addr_copy(&rtmsg.rtmsg_gateway, gwaddr);
1458 rtmsg.rtmsg_metric = 1024;
ebacaaa0
YH
1459 rtmsg.rtmsg_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT | RTF_UP | RTF_EXPIRES |
1460 RTF_PREF(pref);
1da177e4
LT
1461
1462 rtmsg.rtmsg_ifindex = dev->ifindex;
1463
0d51aa80 1464 ip6_route_add(&rtmsg, NULL, NULL, NULL);
1da177e4
LT
1465 return rt6_get_dflt_router(gwaddr, dev);
1466}
1467
1468void rt6_purge_dflt_routers(void)
1469{
1470 struct rt6_info *rt;
1471
1472restart:
1473 read_lock_bh(&rt6_lock);
1474 for (rt = ip6_routing_table.leaf; rt; rt = rt->u.next) {
1475 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
1476 dst_hold(&rt->u.dst);
1477
1da177e4
LT
1478 read_unlock_bh(&rt6_lock);
1479
0d51aa80 1480 ip6_del_rt(rt, NULL, NULL, NULL);
1da177e4
LT
1481
1482 goto restart;
1483 }
1484 }
1485 read_unlock_bh(&rt6_lock);
1486}
1487
1488int ipv6_route_ioctl(unsigned int cmd, void __user *arg)
1489{
1490 struct in6_rtmsg rtmsg;
1491 int err;
1492
1493 switch(cmd) {
1494 case SIOCADDRT: /* Add a route */
1495 case SIOCDELRT: /* Delete a route */
1496 if (!capable(CAP_NET_ADMIN))
1497 return -EPERM;
1498 err = copy_from_user(&rtmsg, arg,
1499 sizeof(struct in6_rtmsg));
1500 if (err)
1501 return -EFAULT;
1502
1503 rtnl_lock();
1504 switch (cmd) {
1505 case SIOCADDRT:
0d51aa80 1506 err = ip6_route_add(&rtmsg, NULL, NULL, NULL);
1da177e4
LT
1507 break;
1508 case SIOCDELRT:
0d51aa80 1509 err = ip6_route_del(&rtmsg, NULL, NULL, NULL);
1da177e4
LT
1510 break;
1511 default:
1512 err = -EINVAL;
1513 }
1514 rtnl_unlock();
1515
1516 return err;
1517 };
1518
1519 return -EINVAL;
1520}
1521
1522/*
1523 * Drop the packet on the floor
1524 */
1525
20380731 1526static int ip6_pkt_discard(struct sk_buff *skb)
1da177e4
LT
1527{
1528 IP6_INC_STATS(IPSTATS_MIB_OUTNOROUTES);
1529 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_NOROUTE, 0, skb->dev);
1530 kfree_skb(skb);
1531 return 0;
1532}
1533
20380731 1534static int ip6_pkt_discard_out(struct sk_buff *skb)
1da177e4
LT
1535{
1536 skb->dev = skb->dst->dev;
1537 return ip6_pkt_discard(skb);
1538}
1539
1540/*
1541 * Allocate a dst for local (unicast / anycast) address.
1542 */
1543
1544struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
1545 const struct in6_addr *addr,
1546 int anycast)
1547{
1548 struct rt6_info *rt = ip6_dst_alloc();
1549
1550 if (rt == NULL)
1551 return ERR_PTR(-ENOMEM);
1552
1553 dev_hold(&loopback_dev);
1554 in6_dev_hold(idev);
1555
1556 rt->u.dst.flags = DST_HOST;
1557 rt->u.dst.input = ip6_input;
1558 rt->u.dst.output = ip6_output;
1559 rt->rt6i_dev = &loopback_dev;
1560 rt->rt6i_idev = idev;
1561 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
1562 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
1563 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
1564 rt->u.dst.obsolete = -1;
1565
1566 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
58c4fb86
YH
1567 if (anycast)
1568 rt->rt6i_flags |= RTF_ANYCAST;
1569 else
1da177e4
LT
1570 rt->rt6i_flags |= RTF_LOCAL;
1571 rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
1572 if (rt->rt6i_nexthop == NULL) {
1573 dst_free((struct dst_entry *) rt);
1574 return ERR_PTR(-ENOMEM);
1575 }
1576
1577 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
1578 rt->rt6i_dst.plen = 128;
1579
1580 atomic_set(&rt->u.dst.__refcnt, 1);
1581
1582 return rt;
1583}
1584
1585static int fib6_ifdown(struct rt6_info *rt, void *arg)
1586{
1587 if (((void*)rt->rt6i_dev == arg || arg == NULL) &&
1588 rt != &ip6_null_entry) {
1589 RT6_TRACE("deleted by ifdown %p\n", rt);
1590 return -1;
1591 }
1592 return 0;
1593}
1594
1595void rt6_ifdown(struct net_device *dev)
1596{
1597 write_lock_bh(&rt6_lock);
1598 fib6_clean_tree(&ip6_routing_table, fib6_ifdown, 0, dev);
1599 write_unlock_bh(&rt6_lock);
1600}
1601
1602struct rt6_mtu_change_arg
1603{
1604 struct net_device *dev;
1605 unsigned mtu;
1606};
1607
1608static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
1609{
1610 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
1611 struct inet6_dev *idev;
1612
1613 /* In IPv6 pmtu discovery is not optional,
1614 so that RTAX_MTU lock cannot disable it.
1615 We still use this lock to block changes
1616 caused by addrconf/ndisc.
1617 */
1618
1619 idev = __in6_dev_get(arg->dev);
1620 if (idev == NULL)
1621 return 0;
1622
1623 /* For administrative MTU increase, there is no way to discover
1624 IPv6 PMTU increase, so PMTU increase should be updated here.
1625 Since RFC 1981 doesn't include administrative MTU increase
1626 update PMTU increase is a MUST. (i.e. jumbo frame)
1627 */
1628 /*
1629 If new MTU is less than route PMTU, this new MTU will be the
1630 lowest MTU in the path, update the route PMTU to reflect PMTU
1631 decreases; if new MTU is greater than route PMTU, and the
1632 old MTU is the lowest MTU in the path, update the route PMTU
1633 to reflect the increase. In this case if the other nodes' MTU
1634 also have the lowest MTU, TOO BIG MESSAGE will be lead to
1635 PMTU discouvery.
1636 */
1637 if (rt->rt6i_dev == arg->dev &&
1638 !dst_metric_locked(&rt->u.dst, RTAX_MTU) &&
1639 (dst_mtu(&rt->u.dst) > arg->mtu ||
1640 (dst_mtu(&rt->u.dst) < arg->mtu &&
1641 dst_mtu(&rt->u.dst) == idev->cnf.mtu6)))
1642 rt->u.dst.metrics[RTAX_MTU-1] = arg->mtu;
1643 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(arg->mtu);
1644 return 0;
1645}
1646
1647void rt6_mtu_change(struct net_device *dev, unsigned mtu)
1648{
1649 struct rt6_mtu_change_arg arg;
1650
1651 arg.dev = dev;
1652 arg.mtu = mtu;
1653 read_lock_bh(&rt6_lock);
1654 fib6_clean_tree(&ip6_routing_table, rt6_mtu_change_route, 0, &arg);
1655 read_unlock_bh(&rt6_lock);
1656}
1657
1658static int inet6_rtm_to_rtmsg(struct rtmsg *r, struct rtattr **rta,
1659 struct in6_rtmsg *rtmsg)
1660{
1661 memset(rtmsg, 0, sizeof(*rtmsg));
1662
1663 rtmsg->rtmsg_dst_len = r->rtm_dst_len;
1664 rtmsg->rtmsg_src_len = r->rtm_src_len;
1665 rtmsg->rtmsg_flags = RTF_UP;
1666 if (r->rtm_type == RTN_UNREACHABLE)
1667 rtmsg->rtmsg_flags |= RTF_REJECT;
1668
1669 if (rta[RTA_GATEWAY-1]) {
1670 if (rta[RTA_GATEWAY-1]->rta_len != RTA_LENGTH(16))
1671 return -EINVAL;
1672 memcpy(&rtmsg->rtmsg_gateway, RTA_DATA(rta[RTA_GATEWAY-1]), 16);
1673 rtmsg->rtmsg_flags |= RTF_GATEWAY;
1674 }
1675 if (rta[RTA_DST-1]) {
1676 if (RTA_PAYLOAD(rta[RTA_DST-1]) < ((r->rtm_dst_len+7)>>3))
1677 return -EINVAL;
1678 memcpy(&rtmsg->rtmsg_dst, RTA_DATA(rta[RTA_DST-1]), ((r->rtm_dst_len+7)>>3));
1679 }
1680 if (rta[RTA_SRC-1]) {
1681 if (RTA_PAYLOAD(rta[RTA_SRC-1]) < ((r->rtm_src_len+7)>>3))
1682 return -EINVAL;
1683 memcpy(&rtmsg->rtmsg_src, RTA_DATA(rta[RTA_SRC-1]), ((r->rtm_src_len+7)>>3));
1684 }
1685 if (rta[RTA_OIF-1]) {
1686 if (rta[RTA_OIF-1]->rta_len != RTA_LENGTH(sizeof(int)))
1687 return -EINVAL;
1688 memcpy(&rtmsg->rtmsg_ifindex, RTA_DATA(rta[RTA_OIF-1]), sizeof(int));
1689 }
1690 if (rta[RTA_PRIORITY-1]) {
1691 if (rta[RTA_PRIORITY-1]->rta_len != RTA_LENGTH(4))
1692 return -EINVAL;
1693 memcpy(&rtmsg->rtmsg_metric, RTA_DATA(rta[RTA_PRIORITY-1]), 4);
1694 }
1695 return 0;
1696}
1697
1698int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1699{
1700 struct rtmsg *r = NLMSG_DATA(nlh);
1701 struct in6_rtmsg rtmsg;
1702
1703 if (inet6_rtm_to_rtmsg(r, arg, &rtmsg))
1704 return -EINVAL;
0d51aa80 1705 return ip6_route_del(&rtmsg, nlh, arg, &NETLINK_CB(skb));
1da177e4
LT
1706}
1707
1708int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1709{
1710 struct rtmsg *r = NLMSG_DATA(nlh);
1711 struct in6_rtmsg rtmsg;
1712
1713 if (inet6_rtm_to_rtmsg(r, arg, &rtmsg))
1714 return -EINVAL;
0d51aa80 1715 return ip6_route_add(&rtmsg, nlh, arg, &NETLINK_CB(skb));
1da177e4
LT
1716}
1717
1718struct rt6_rtnl_dump_arg
1719{
1720 struct sk_buff *skb;
1721 struct netlink_callback *cb;
1722};
1723
1724static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt,
0d51aa80
JHS
1725 struct in6_addr *dst, struct in6_addr *src,
1726 int iif, int type, u32 pid, u32 seq,
1727 int prefix, unsigned int flags)
1da177e4
LT
1728{
1729 struct rtmsg *rtm;
1730 struct nlmsghdr *nlh;
1731 unsigned char *b = skb->tail;
1732 struct rta_cacheinfo ci;
1733
1734 if (prefix) { /* user wants prefix routes only */
1735 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
1736 /* success since this is not a prefix route */
1737 return 1;
1738 }
1739 }
1740
b6544c0b 1741 nlh = NLMSG_NEW(skb, pid, seq, type, sizeof(*rtm), flags);
1da177e4
LT
1742 rtm = NLMSG_DATA(nlh);
1743 rtm->rtm_family = AF_INET6;
1744 rtm->rtm_dst_len = rt->rt6i_dst.plen;
1745 rtm->rtm_src_len = rt->rt6i_src.plen;
1746 rtm->rtm_tos = 0;
1747 rtm->rtm_table = RT_TABLE_MAIN;
1748 if (rt->rt6i_flags&RTF_REJECT)
1749 rtm->rtm_type = RTN_UNREACHABLE;
1750 else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK))
1751 rtm->rtm_type = RTN_LOCAL;
1752 else
1753 rtm->rtm_type = RTN_UNICAST;
1754 rtm->rtm_flags = 0;
1755 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
1756 rtm->rtm_protocol = rt->rt6i_protocol;
1757 if (rt->rt6i_flags&RTF_DYNAMIC)
1758 rtm->rtm_protocol = RTPROT_REDIRECT;
1759 else if (rt->rt6i_flags & RTF_ADDRCONF)
1760 rtm->rtm_protocol = RTPROT_KERNEL;
1761 else if (rt->rt6i_flags&RTF_DEFAULT)
1762 rtm->rtm_protocol = RTPROT_RA;
1763
1764 if (rt->rt6i_flags&RTF_CACHE)
1765 rtm->rtm_flags |= RTM_F_CLONED;
1766
1767 if (dst) {
1768 RTA_PUT(skb, RTA_DST, 16, dst);
1769 rtm->rtm_dst_len = 128;
1770 } else if (rtm->rtm_dst_len)
1771 RTA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr);
1772#ifdef CONFIG_IPV6_SUBTREES
1773 if (src) {
1774 RTA_PUT(skb, RTA_SRC, 16, src);
1775 rtm->rtm_src_len = 128;
1776 } else if (rtm->rtm_src_len)
1777 RTA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr);
1778#endif
1779 if (iif)
1780 RTA_PUT(skb, RTA_IIF, 4, &iif);
1781 else if (dst) {
1782 struct in6_addr saddr_buf;
1783 if (ipv6_get_saddr(&rt->u.dst, dst, &saddr_buf) == 0)
1784 RTA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
1785 }
1786 if (rtnetlink_put_metrics(skb, rt->u.dst.metrics) < 0)
1787 goto rtattr_failure;
1788 if (rt->u.dst.neighbour)
1789 RTA_PUT(skb, RTA_GATEWAY, 16, &rt->u.dst.neighbour->primary_key);
1790 if (rt->u.dst.dev)
1791 RTA_PUT(skb, RTA_OIF, sizeof(int), &rt->rt6i_dev->ifindex);
1792 RTA_PUT(skb, RTA_PRIORITY, 4, &rt->rt6i_metric);
1793 ci.rta_lastuse = jiffies_to_clock_t(jiffies - rt->u.dst.lastuse);
1794 if (rt->rt6i_expires)
1795 ci.rta_expires = jiffies_to_clock_t(rt->rt6i_expires - jiffies);
1796 else
1797 ci.rta_expires = 0;
1798 ci.rta_used = rt->u.dst.__use;
1799 ci.rta_clntref = atomic_read(&rt->u.dst.__refcnt);
1800 ci.rta_error = rt->u.dst.error;
1801 ci.rta_id = 0;
1802 ci.rta_ts = 0;
1803 ci.rta_tsage = 0;
1804 RTA_PUT(skb, RTA_CACHEINFO, sizeof(ci), &ci);
1805 nlh->nlmsg_len = skb->tail - b;
1806 return skb->len;
1807
1808nlmsg_failure:
1809rtattr_failure:
1810 skb_trim(skb, b - skb->data);
1811 return -1;
1812}
1813
1814static int rt6_dump_route(struct rt6_info *rt, void *p_arg)
1815{
1816 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
1817 int prefix;
1818
1819 if (arg->cb->nlh->nlmsg_len >= NLMSG_LENGTH(sizeof(struct rtmsg))) {
1820 struct rtmsg *rtm = NLMSG_DATA(arg->cb->nlh);
1821 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
1822 } else
1823 prefix = 0;
1824
1825 return rt6_fill_node(arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
1826 NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
0d51aa80 1827 prefix, NLM_F_MULTI);
1da177e4
LT
1828}
1829
1830static int fib6_dump_node(struct fib6_walker_t *w)
1831{
1832 int res;
1833 struct rt6_info *rt;
1834
1835 for (rt = w->leaf; rt; rt = rt->u.next) {
1836 res = rt6_dump_route(rt, w->args);
1837 if (res < 0) {
1838 /* Frame is full, suspend walking */
1839 w->leaf = rt;
1840 return 1;
1841 }
1842 BUG_TRAP(res!=0);
1843 }
1844 w->leaf = NULL;
1845 return 0;
1846}
1847
1848static void fib6_dump_end(struct netlink_callback *cb)
1849{
1850 struct fib6_walker_t *w = (void*)cb->args[0];
1851
1852 if (w) {
1853 cb->args[0] = 0;
1854 fib6_walker_unlink(w);
1855 kfree(w);
1856 }
efacfbcb
HX
1857 cb->done = (void*)cb->args[1];
1858 cb->args[1] = 0;
1da177e4
LT
1859}
1860
1861static int fib6_dump_done(struct netlink_callback *cb)
1862{
1863 fib6_dump_end(cb);
a8f74b22 1864 return cb->done ? cb->done(cb) : 0;
1da177e4
LT
1865}
1866
1867int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
1868{
1869 struct rt6_rtnl_dump_arg arg;
1870 struct fib6_walker_t *w;
1871 int res;
1872
1873 arg.skb = skb;
1874 arg.cb = cb;
1875
1876 w = (void*)cb->args[0];
1877 if (w == NULL) {
1878 /* New dump:
1879 *
1880 * 1. hook callback destructor.
1881 */
1882 cb->args[1] = (long)cb->done;
1883 cb->done = fib6_dump_done;
1884
1885 /*
1886 * 2. allocate and initialize walker.
1887 */
0c600eda 1888 w = kzalloc(sizeof(*w), GFP_ATOMIC);
1da177e4
LT
1889 if (w == NULL)
1890 return -ENOMEM;
1891 RT6_TRACE("dump<%p", w);
1da177e4
LT
1892 w->root = &ip6_routing_table;
1893 w->func = fib6_dump_node;
1894 w->args = &arg;
1895 cb->args[0] = (long)w;
1896 read_lock_bh(&rt6_lock);
1897 res = fib6_walk(w);
1898 read_unlock_bh(&rt6_lock);
1899 } else {
1900 w->args = &arg;
1901 read_lock_bh(&rt6_lock);
1902 res = fib6_walk_continue(w);
1903 read_unlock_bh(&rt6_lock);
1904 }
1905#if RT6_DEBUG >= 3
1906 if (res <= 0 && skb->len == 0)
1907 RT6_TRACE("%p>dump end\n", w);
1908#endif
1909 res = res < 0 ? res : skb->len;
1910 /* res < 0 is an error. (really, impossible)
1911 res == 0 means that dump is complete, but skb still can contain data.
1912 res > 0 dump is not complete, but frame is full.
1913 */
1914 /* Destroy walker, if dump of this table is complete. */
1915 if (res <= 0)
1916 fib6_dump_end(cb);
1917 return res;
1918}
1919
1920int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
1921{
1922 struct rtattr **rta = arg;
1923 int iif = 0;
1924 int err = -ENOBUFS;
1925 struct sk_buff *skb;
1926 struct flowi fl;
1927 struct rt6_info *rt;
1928
1929 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
1930 if (skb == NULL)
1931 goto out;
1932
1933 /* Reserve room for dummy headers, this skb can pass
1934 through good chunk of routing engine.
1935 */
1936 skb->mac.raw = skb->data;
1937 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
1938
1939 memset(&fl, 0, sizeof(fl));
1940 if (rta[RTA_SRC-1])
1941 ipv6_addr_copy(&fl.fl6_src,
1942 (struct in6_addr*)RTA_DATA(rta[RTA_SRC-1]));
1943 if (rta[RTA_DST-1])
1944 ipv6_addr_copy(&fl.fl6_dst,
1945 (struct in6_addr*)RTA_DATA(rta[RTA_DST-1]));
1946
1947 if (rta[RTA_IIF-1])
1948 memcpy(&iif, RTA_DATA(rta[RTA_IIF-1]), sizeof(int));
1949
1950 if (iif) {
1951 struct net_device *dev;
1952 dev = __dev_get_by_index(iif);
1953 if (!dev) {
1954 err = -ENODEV;
1955 goto out_free;
1956 }
1957 }
1958
1959 fl.oif = 0;
1960 if (rta[RTA_OIF-1])
1961 memcpy(&fl.oif, RTA_DATA(rta[RTA_OIF-1]), sizeof(int));
1962
1963 rt = (struct rt6_info*)ip6_route_output(NULL, &fl);
1964
1965 skb->dst = &rt->u.dst;
1966
1967 NETLINK_CB(skb).dst_pid = NETLINK_CB(in_skb).pid;
1968 err = rt6_fill_node(skb, rt,
1969 &fl.fl6_dst, &fl.fl6_src,
1970 iif,
1971 RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
0d51aa80 1972 nlh->nlmsg_seq, 0, 0);
1da177e4
LT
1973 if (err < 0) {
1974 err = -EMSGSIZE;
1975 goto out_free;
1976 }
1977
1978 err = netlink_unicast(rtnl, skb, NETLINK_CB(in_skb).pid, MSG_DONTWAIT);
1979 if (err > 0)
1980 err = 0;
1981out:
1982 return err;
1983out_free:
1984 kfree_skb(skb);
1985 goto out;
1986}
1987
0d51aa80
JHS
1988void inet6_rt_notify(int event, struct rt6_info *rt, struct nlmsghdr *nlh,
1989 struct netlink_skb_parms *req)
1da177e4
LT
1990{
1991 struct sk_buff *skb;
1992 int size = NLMSG_SPACE(sizeof(struct rtmsg)+256);
0d51aa80
JHS
1993 u32 pid = current->pid;
1994 u32 seq = 0;
1da177e4 1995
0d51aa80
JHS
1996 if (req)
1997 pid = req->pid;
1998 if (nlh)
1999 seq = nlh->nlmsg_seq;
2000
1da177e4
LT
2001 skb = alloc_skb(size, gfp_any());
2002 if (!skb) {
ac6d439d 2003 netlink_set_err(rtnl, 0, RTNLGRP_IPV6_ROUTE, ENOBUFS);
1da177e4
LT
2004 return;
2005 }
0d51aa80 2006 if (rt6_fill_node(skb, rt, NULL, NULL, 0, event, pid, seq, 0, 0) < 0) {
1da177e4 2007 kfree_skb(skb);
ac6d439d 2008 netlink_set_err(rtnl, 0, RTNLGRP_IPV6_ROUTE, EINVAL);
1da177e4
LT
2009 return;
2010 }
ac6d439d
PM
2011 NETLINK_CB(skb).dst_group = RTNLGRP_IPV6_ROUTE;
2012 netlink_broadcast(rtnl, skb, 0, RTNLGRP_IPV6_ROUTE, gfp_any());
1da177e4
LT
2013}
2014
2015/*
2016 * /proc
2017 */
2018
2019#ifdef CONFIG_PROC_FS
2020
2021#define RT6_INFO_LEN (32 + 4 + 32 + 4 + 32 + 40 + 5 + 1)
2022
2023struct rt6_proc_arg
2024{
2025 char *buffer;
2026 int offset;
2027 int length;
2028 int skip;
2029 int len;
2030};
2031
2032static int rt6_info_route(struct rt6_info *rt, void *p_arg)
2033{
2034 struct rt6_proc_arg *arg = (struct rt6_proc_arg *) p_arg;
2035 int i;
2036
2037 if (arg->skip < arg->offset / RT6_INFO_LEN) {
2038 arg->skip++;
2039 return 0;
2040 }
2041
2042 if (arg->len >= arg->length)
2043 return 0;
2044
2045 for (i=0; i<16; i++) {
2046 sprintf(arg->buffer + arg->len, "%02x",
2047 rt->rt6i_dst.addr.s6_addr[i]);
2048 arg->len += 2;
2049 }
2050 arg->len += sprintf(arg->buffer + arg->len, " %02x ",
2051 rt->rt6i_dst.plen);
2052
2053#ifdef CONFIG_IPV6_SUBTREES
2054 for (i=0; i<16; i++) {
2055 sprintf(arg->buffer + arg->len, "%02x",
2056 rt->rt6i_src.addr.s6_addr[i]);
2057 arg->len += 2;
2058 }
2059 arg->len += sprintf(arg->buffer + arg->len, " %02x ",
2060 rt->rt6i_src.plen);
2061#else
2062 sprintf(arg->buffer + arg->len,
2063 "00000000000000000000000000000000 00 ");
2064 arg->len += 36;
2065#endif
2066
2067 if (rt->rt6i_nexthop) {
2068 for (i=0; i<16; i++) {
2069 sprintf(arg->buffer + arg->len, "%02x",
2070 rt->rt6i_nexthop->primary_key[i]);
2071 arg->len += 2;
2072 }
2073 } else {
2074 sprintf(arg->buffer + arg->len,
2075 "00000000000000000000000000000000");
2076 arg->len += 32;
2077 }
2078 arg->len += sprintf(arg->buffer + arg->len,
2079 " %08x %08x %08x %08x %8s\n",
2080 rt->rt6i_metric, atomic_read(&rt->u.dst.__refcnt),
2081 rt->u.dst.__use, rt->rt6i_flags,
2082 rt->rt6i_dev ? rt->rt6i_dev->name : "");
2083 return 0;
2084}
2085
2086static int rt6_proc_info(char *buffer, char **start, off_t offset, int length)
2087{
2088 struct rt6_proc_arg arg;
2089 arg.buffer = buffer;
2090 arg.offset = offset;
2091 arg.length = length;
2092 arg.skip = 0;
2093 arg.len = 0;
2094
2095 read_lock_bh(&rt6_lock);
2096 fib6_clean_tree(&ip6_routing_table, rt6_info_route, 0, &arg);
2097 read_unlock_bh(&rt6_lock);
2098
2099 *start = buffer;
2100 if (offset)
2101 *start += offset % RT6_INFO_LEN;
2102
2103 arg.len -= offset % RT6_INFO_LEN;
2104
2105 if (arg.len > length)
2106 arg.len = length;
2107 if (arg.len < 0)
2108 arg.len = 0;
2109
2110 return arg.len;
2111}
2112
1da177e4
LT
2113static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2114{
2115 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
2116 rt6_stats.fib_nodes, rt6_stats.fib_route_nodes,
2117 rt6_stats.fib_rt_alloc, rt6_stats.fib_rt_entries,
2118 rt6_stats.fib_rt_cache,
2119 atomic_read(&ip6_dst_ops.entries),
2120 rt6_stats.fib_discarded_routes);
2121
2122 return 0;
2123}
2124
2125static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2126{
2127 return single_open(file, rt6_stats_seq_show, NULL);
2128}
2129
2130static struct file_operations rt6_stats_seq_fops = {
2131 .owner = THIS_MODULE,
2132 .open = rt6_stats_seq_open,
2133 .read = seq_read,
2134 .llseek = seq_lseek,
2135 .release = single_release,
2136};
2137#endif /* CONFIG_PROC_FS */
2138
2139#ifdef CONFIG_SYSCTL
2140
2141static int flush_delay;
2142
2143static
2144int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write, struct file * filp,
2145 void __user *buffer, size_t *lenp, loff_t *ppos)
2146{
2147 if (write) {
2148 proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
2149 fib6_run_gc(flush_delay <= 0 ? ~0UL : (unsigned long)flush_delay);
2150 return 0;
2151 } else
2152 return -EINVAL;
2153}
2154
2155ctl_table ipv6_route_table[] = {
2156 {
2157 .ctl_name = NET_IPV6_ROUTE_FLUSH,
2158 .procname = "flush",
2159 .data = &flush_delay,
2160 .maxlen = sizeof(int),
89c8b3a1 2161 .mode = 0200,
1da177e4
LT
2162 .proc_handler = &ipv6_sysctl_rtcache_flush
2163 },
2164 {
2165 .ctl_name = NET_IPV6_ROUTE_GC_THRESH,
2166 .procname = "gc_thresh",
2167 .data = &ip6_dst_ops.gc_thresh,
2168 .maxlen = sizeof(int),
2169 .mode = 0644,
2170 .proc_handler = &proc_dointvec,
2171 },
2172 {
2173 .ctl_name = NET_IPV6_ROUTE_MAX_SIZE,
2174 .procname = "max_size",
2175 .data = &ip6_rt_max_size,
2176 .maxlen = sizeof(int),
2177 .mode = 0644,
2178 .proc_handler = &proc_dointvec,
2179 },
2180 {
2181 .ctl_name = NET_IPV6_ROUTE_GC_MIN_INTERVAL,
2182 .procname = "gc_min_interval",
2183 .data = &ip6_rt_gc_min_interval,
2184 .maxlen = sizeof(int),
2185 .mode = 0644,
2186 .proc_handler = &proc_dointvec_jiffies,
2187 .strategy = &sysctl_jiffies,
2188 },
2189 {
2190 .ctl_name = NET_IPV6_ROUTE_GC_TIMEOUT,
2191 .procname = "gc_timeout",
2192 .data = &ip6_rt_gc_timeout,
2193 .maxlen = sizeof(int),
2194 .mode = 0644,
2195 .proc_handler = &proc_dointvec_jiffies,
2196 .strategy = &sysctl_jiffies,
2197 },
2198 {
2199 .ctl_name = NET_IPV6_ROUTE_GC_INTERVAL,
2200 .procname = "gc_interval",
2201 .data = &ip6_rt_gc_interval,
2202 .maxlen = sizeof(int),
2203 .mode = 0644,
2204 .proc_handler = &proc_dointvec_jiffies,
2205 .strategy = &sysctl_jiffies,
2206 },
2207 {
2208 .ctl_name = NET_IPV6_ROUTE_GC_ELASTICITY,
2209 .procname = "gc_elasticity",
2210 .data = &ip6_rt_gc_elasticity,
2211 .maxlen = sizeof(int),
2212 .mode = 0644,
2213 .proc_handler = &proc_dointvec_jiffies,
2214 .strategy = &sysctl_jiffies,
2215 },
2216 {
2217 .ctl_name = NET_IPV6_ROUTE_MTU_EXPIRES,
2218 .procname = "mtu_expires",
2219 .data = &ip6_rt_mtu_expires,
2220 .maxlen = sizeof(int),
2221 .mode = 0644,
2222 .proc_handler = &proc_dointvec_jiffies,
2223 .strategy = &sysctl_jiffies,
2224 },
2225 {
2226 .ctl_name = NET_IPV6_ROUTE_MIN_ADVMSS,
2227 .procname = "min_adv_mss",
2228 .data = &ip6_rt_min_advmss,
2229 .maxlen = sizeof(int),
2230 .mode = 0644,
2231 .proc_handler = &proc_dointvec_jiffies,
2232 .strategy = &sysctl_jiffies,
2233 },
2234 {
2235 .ctl_name = NET_IPV6_ROUTE_GC_MIN_INTERVAL_MS,
2236 .procname = "gc_min_interval_ms",
2237 .data = &ip6_rt_gc_min_interval,
2238 .maxlen = sizeof(int),
2239 .mode = 0644,
2240 .proc_handler = &proc_dointvec_ms_jiffies,
2241 .strategy = &sysctl_ms_jiffies,
2242 },
2243 { .ctl_name = 0 }
2244};
2245
2246#endif
2247
2248void __init ip6_route_init(void)
2249{
2250 struct proc_dir_entry *p;
2251
2252 ip6_dst_ops.kmem_cachep = kmem_cache_create("ip6_dst_cache",
2253 sizeof(struct rt6_info),
2254 0, SLAB_HWCACHE_ALIGN,
2255 NULL, NULL);
2256 if (!ip6_dst_ops.kmem_cachep)
2257 panic("cannot create ip6_dst_cache");
2258
2259 fib6_init();
2260#ifdef CONFIG_PROC_FS
2261 p = proc_net_create("ipv6_route", 0, rt6_proc_info);
2262 if (p)
2263 p->owner = THIS_MODULE;
2264
2265 proc_net_fops_create("rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2266#endif
2267#ifdef CONFIG_XFRM
2268 xfrm6_init();
2269#endif
2270}
2271
2272void ip6_route_cleanup(void)
2273{
2274#ifdef CONFIG_PROC_FS
2275 proc_net_remove("ipv6_route");
2276 proc_net_remove("rt6_stats");
2277#endif
2278#ifdef CONFIG_XFRM
2279 xfrm6_fini();
2280#endif
2281 rt6_ifdown(NULL);
2282 fib6_gc_cleanup();
2283 kmem_cache_destroy(ip6_dst_ops.kmem_cachep);
2284}